diff --git a/bifrost/r12p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/bifrost/r12p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100755
index 0000000..1a69e1a
--- /dev/null
+++ b/bifrost/r12p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,153 @@
+#
+# (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points-v2 : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets the power model parameters. Three power models are currently
+  defined which include "mali-simple-power-model", "mali-g71-power-model" and
+	  "mali-g72-power-model".
+	- mali-simple-power-model: this model derives the GPU power usage based
+	  on the GPU voltage scaled by the system temperature. Note: it was
+	  designed for the Juno platform, and may not be suitable for others.
+		- compatible: Should be "arm,mali-simple-power-model"
+		- dynamic-coefficient: Coefficient, in pW/(Hz V^2), which is
+		  multiplied by v^2*f to calculate the dynamic power consumption.
+		- static-coefficient: Coefficient, in uW/V^3, which is
+		  multiplied by v^3 to calculate the static power consumption.
+		- ts: An array containing coefficients for the temperature
+		  scaling factor. This is used to scale the static power by a
+		  factor of tsf/1000000,
+		  where tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0],
+		  and T = temperature in degrees.
+		- thermal-zone: A string identifying the thermal zone used for
+		  the GPU
+		- temp-poll-interval-ms: the interval at which the system
+		  temperature is polled
+	- mali-g71-power-model / mali-g72-power-model: these models derive
+	  the GPU power usage based on performance counters, so they are more
+	  accurate.
+		- compatible: Should be "arm,mali-g71-power-model" /
+		  "arm,mali-g72-power-model"
+		- scale: the dynamic power calculated by the power model is
+		  scaled by a factor of "scale"/1000. This value should be
+		  chosen to match a particular implementation.
+		- min_sample_cycles: Fall back to the simple power model if the
+		  number of GPU cycles for a given counter dump is less than
+		  'min_sample_cycles'. The default value of this should suffice.
+	* Note: when IPA is used, two separate power models (simple and counter-based)
+	  are used at different points so care should be taken to configure
+	  both power models in the device tree (specifically dynamic-coefficient,
+	  static-coefficient and scale) to best match the platform.
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+- ipa-model : Sets the IPA model to be used for power management. GPU probe will fail if the
+	      model is not found in the registered models list. If no model is specified here,
+	      a gpu-id based model is picked if available, otherwise the default model is used.
+	- mali-simple-power-model: Default model used on mali
+- protected-mode-switcher : Phandle to device implemented protected mode switching functionality.
+Refer to Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt for one implementation.
+
+Example for a Mali GPU:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points-v2 = <&gpu_opp_table>;
+	power_model@0 {
+		compatible = "arm,mali-simple-power-model";
+		static-coefficient = <2427750>;
+		dynamic-coefficient = <4687>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+	power_model@1 {
+		compatible = "arm,mali-g71-power-model";
+		scale = <5>;
+	};
+};
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2";
+
+	opp@533000000 {
+		opp-hz = /bits/ 64 <533000000>;
+		opp-microvolt = <1250000>;
+	};
+	opp@450000000 {
+		opp-hz = /bits/ 64 <450000000>;
+		opp-microvolt = <1150000>;
+	};
+	opp@400000000 {
+		opp-hz = /bits/ 64 <400000000>;
+		opp-microvolt = <1125000>;
+	};
+	opp@350000000 {
+		opp-hz = /bits/ 64 <350000000>;
+		opp-microvolt = <1075000>;
+	};
+	opp@266000000 {
+		opp-hz = /bits/ 64 <266000000>;
+		opp-microvolt = <1025000>;
+	};
+	opp@160000000 {
+		opp-hz = /bits/ 64 <160000000>;
+		opp-microvolt = <925000>;
+	};
+	opp@100000000 {
+		opp-hz = /bits/ 64 <100000000>;
+		opp-microvolt = <912500>;
+	};
+};
diff --git a/bifrost/r12p0/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt b/bifrost/r12p0/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt
new file mode 100644
index 0000000..280358e
--- /dev/null
+++ b/bifrost/r12p0/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+* ARM SMC protected mode switcher devices
+
+Required properties :
+
+- compatible : Must be "arm,smc-protected-mode-switcher"
+- arm,smc,protected_enable : SMC call ID to enable protected mode
+- arm,smc,protected_disable : SMC call ID to disable protected mode and reset
+			      device
+
+An example node :
+
+	gpu_switcher {
+		compatible = "arm,smc-protected-mode-switcher";
+		arm,smc,protected_enable = <0xff06>;
+		arm,smc,protected_disable = <0xff07>;
+	};
diff --git a/bifrost/r12p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt b/bifrost/r12p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt
new file mode 100644
index 0000000..17263f5
--- /dev/null
+++ b/bifrost/r12p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt
@@ -0,0 +1,169 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+* ARM Mali Midgard OPP
+
+* OPP Table Node
+
+This describes the OPPs belonging to a device. This node can have following
+properties:
+
+Required properties:
+- compatible: Allow OPPs to express their compatibility. It should be:
+  "operating-points-v2", "operating-points-v2-mali".
+
+- OPP nodes: One or more OPP nodes describing voltage-current-frequency
+  combinations. Their name isn't significant but their phandle can be used to
+  reference an OPP.
+
+* OPP Node
+
+This defines voltage-current-frequency combinations along with other related
+properties.
+
+Required properties:
+- opp-hz: Nominal frequency in Hz, expressed as a 64-bit big-endian integer.
+  This should be treated as a relative performance measurement, taking both GPU
+  frequency and core mask into account.
+
+Optional properties:
+- opp-hz-real: Real frequency in Hz, expressed as a 64-bit big-endian integer.
+  If this is not present then the nominal frequency will be used instead.
+
+- opp-core-mask: Shader core mask. If neither this or opp-core-count are present
+  then all shader cores will be used for this OPP.
+
+- opp-core-count: Number of cores to use for this OPP. If this is present then
+  the driver will build a core mask using the available core mask provided by
+  the GPU hardware.
+
+  If neither this nor opp-core-mask are present then all shader cores will be
+  used for this OPP.
+
+  If both this and opp-core-mask are present then opp-core-mask is ignored.
+
+- opp-microvolt: voltage in micro Volts.
+
+  A single regulator's voltage is specified with an array of size one or three.
+  Single entry is for target voltage and three entries are for <target min max>
+  voltages.
+
+  Entries for multiple regulators must be present in the same order as
+  regulators are specified in device's DT node.
+
+- opp-microvolt-<name>: Named opp-microvolt property. This is exactly similar to
+  the above opp-microvolt property, but allows multiple voltage ranges to be
+  provided for the same OPP. At runtime, the platform can pick a <name> and
+  matching opp-microvolt-<name> property will be enabled for all OPPs. If the
+  platform doesn't pick a specific <name> or the <name> doesn't match with any
+  opp-microvolt-<name> properties, then opp-microvolt property shall be used, if
+  present.
+
+- opp-microamp: The maximum current drawn by the device in microamperes
+  considering system specific parameters (such as transients, process, aging,
+  maximum operating temperature range etc.) as necessary. This may be used to
+  set the most efficient regulator operating mode.
+
+  Should only be set if opp-microvolt is set for the OPP.
+
+  Entries for multiple regulators must be present in the same order as
+  regulators are specified in device's DT node. If this property isn't required
+  for few regulators, then this should be marked as zero for them. If it isn't
+  required for any regulator, then this property need not be present.
+
+- opp-microamp-<name>: Named opp-microamp property. Similar to
+  opp-microvolt-<name> property, but for microamp instead.
+
+- clock-latency-ns: Specifies the maximum possible transition latency (in
+  nanoseconds) for switching to this OPP from any other OPP.
+
+- turbo-mode: Marks the OPP to be used only for turbo modes. Turbo mode is
+  available on some platforms, where the device can run over its operating
+  frequency for a short duration of time limited by the device's power, current
+  and thermal limits.
+
+- opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in
+  the table should have this.
+
+- opp-supported-hw: This enables us to select only a subset of OPPs from the
+  larger OPP table, based on what version of the hardware we are running on. We
+  still can't have multiple nodes with the same opp-hz value in OPP table.
+
+  It's an user defined array containing a hierarchy of hardware version numbers,
+  supported by the OPP. For example: a platform with hierarchy of three levels
+  of versions (A, B and C), this field should be like <X Y Z>, where X
+  corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z
+  corresponds to version hierarchy C.
+
+  Each level of hierarchy is represented by a 32 bit value, and so there can be
+  only 32 different supported version per hierarchy. i.e. 1 bit per version. A
+  value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy
+  level. And a value of 0x00000000 will disable the OPP completely, and so we
+  never want that to happen.
+
+  If 32 values aren't sufficient for a version hierarchy, than that version
+  hierarchy can be contained in multiple 32 bit values. i.e. <X Y Z1 Z2> in the
+  above example, Z1 & Z2 refer to the version hierarchy Z.
+
+- status: Marks the node enabled/disabled.
+
+Example for a Juno with Mali T624:
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2", "operating-points-v2-mali";
+
+	opp@112500000 {
+		opp-hz = /bits/ 64 <112500000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-mask = /bits/ 64 <0x1>;
+		opp-suspend;
+	};
+	opp@225000000 {
+		opp-hz = /bits/ 64 <225000000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-count = <2>;
+	};
+	opp@450000000 {
+		opp-hz = /bits/ 64 <450000000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-mask = /bits/ 64 <0xf>;
+	};
+	opp@487500000 {
+		opp-hz = /bits/ 64 <487500000>;
+		opp-microvolt = <825000>;
+	};
+	opp@525000000 {
+		opp-hz = /bits/ 64 <525000000>;
+		opp-microvolt = <850000>;
+	};
+	opp@562500000 {
+		opp-hz = /bits/ 64 <562500000>;
+		opp-microvolt = <875000>;
+	};
+	opp@600000000 {
+		opp-hz = /bits/ 64 <600000000>;
+		opp-microvolt = <900000>;
+	};
+};
+
diff --git a/bifrost/r12p0/kernel/Documentation/dma-buf-test-exporter.txt b/bifrost/r12p0/kernel/Documentation/dma-buf-test-exporter.txt
new file mode 100644
index 0000000..8d8cbc9
--- /dev/null
+++ b/bifrost/r12p0/kernel/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,46 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/bifrost/r12p0/kernel/Mconfig b/bifrost/r12p0/kernel/Mconfig
new file mode 100644
index 0000000..181d335
--- /dev/null
+++ b/bifrost/r12p0/kernel/Mconfig
@@ -0,0 +1,11 @@
+# copyright:
+# ----------------------------------------------------------------------------
+# This confidential and proprietary software may be used only as authorized
+# by a licensing agreement from ARM Limited.
+#      (C) COPYRIGHT 2017 ARM Limited, ALL RIGHTS RESERVED
+# The entire notice above must be reproduced on all authorized copies and
+# copies may only be made to the extent permitted by a licensing agreement
+# from ARM Limited.
+# ----------------------------------------------------------------------------
+
+source "kernel/drivers/gpu/arm/midgard/Mconfig"
diff --git a/bifrost/r12p0/kernel/build.bp b/bifrost/r12p0/kernel/build.bp
new file mode 100755
index 0000000..ec22c01
--- /dev/null
+++ b/bifrost/r12p0/kernel/build.bp
@@ -0,0 +1,49 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2016-2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_install_group {
+    name: "IG_kernel_modules",
+    android: {
+        install_path: "$(TARGET_OUT)/{{.module_path}}",
+    },
+    linux: {
+        install_path: "{{.install_dir}}/{{.module_path}}",
+    },
+}
+
+bob_defaults {
+    name: "kernel_defaults",
+    enabled: false,
+    exclude_srcs: [
+        "*.mod.c",
+    ],
+    include_dirs: [
+        "kernel/include",
+    ],
+    build_kernel_modules: {
+        enabled: true,
+    },
+    install_group: "IG_kernel_modules",
+}
+
+bob_alias {
+    name: "kernel",
+    srcs: [
+        "dma-buf-test-exporter",
+        "mali_kbase",
+    ],
+}
+
+subdirs = [
+    "drivers/base/dma_buf_test_exporter",
+    "drivers/gpu/arm/midgard",
+]
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/sconscript b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/sconscript
new file mode 100644
index 0000000..aeaa5ea
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/sconscript
@@ -0,0 +1,29 @@
+#
+# (C) COPYRIGHT 2012, 2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+import os
+import re
+Import('env')
+
+if env.KernelVersion() >= (3, 18, 34):
+    SConscript("src/sconscript")
+    if env["tests"] and Glob("tests/sconscript"):
+        SConscript("tests/sconscript")
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/Kbuild b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100644
index 0000000..ddf1bb5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/Makefile b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/Makefile
new file mode 100644
index 0000000..3b10406
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,38 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100755
index 0000000..529ce71
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,898 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/reservation.h>
+#include <linux/dma-buf.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+
+#include <linux/fence.h>
+
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+
+#if (KERNEL_VERSION(4, 9, 68) > LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#else
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#endif
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
+	(a)->status ?: 1 \
+	: 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+#define DMA_BUF_LOCK_INIT_BIAS  0xFF
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence fence;
+#else
+	struct dma_fence fence;
+#endif
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	atomic_t fence_dep_count;		/* Number of dma-fence dependencies */
+	struct list_head dma_fence_callbacks;	/* list of all callbacks set up to wait on other fences */
+	wait_queue_head_t wait;
+	struct kref refcount;
+	struct list_head link;
+	struct work_struct work;
+	int count;
+} dma_buf_lock_resource;
+
+/**
+ * struct dma_buf_lock_fence_cb - Callback data struct for dma-fence
+ * @fence_cb: Callback function
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @res:      Pointer to dma_buf_lock_resource that is waiting on this callback
+ * @node:     List head for linking this callback to the lock resource
+ */
+struct dma_buf_lock_fence_cb {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence_cb fence_cb;
+	struct fence *fence;
+#else
+	struct dma_fence_cb fence_cb;
+	struct dma_fence *fence;
+#endif
+	struct dma_buf_lock_resource *res;
+	struct list_head node;
+};
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+
+/*** dma_buf_lock fence part ***/
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(dma_buf_lock_fence_lock);
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_get_driver_name(struct fence *fence)
+#else
+dma_buf_lock_fence_get_driver_name(struct dma_fence *fence)
+#endif
+{
+	return "dma_buf_lock";
+}
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_get_timeline_name(struct fence *fence)
+#else
+dma_buf_lock_fence_get_timeline_name(struct dma_fence *fence)
+#endif
+{
+	return "dma_buf_lock.timeline";
+}
+
+static bool
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_enable_signaling(struct fence *fence)
+#else
+dma_buf_lock_fence_enable_signaling(struct dma_fence *fence)
+#endif
+{
+	return true;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+const struct fence_ops dma_buf_lock_fence_ops = {
+	.wait = fence_default_wait,
+#else
+const struct dma_fence_ops dma_buf_lock_fence_ops = {
+	.wait = dma_fence_default_wait,
+#endif
+	.get_driver_name = dma_buf_lock_fence_get_driver_name,
+	.get_timeline_name = dma_buf_lock_fence_get_timeline_name,
+	.enable_signaling = dma_buf_lock_fence_enable_signaling,
+};
+
+static void
+dma_buf_lock_fence_init(dma_buf_lock_resource *resource)
+{
+	dma_fence_init(&resource->fence,
+		       &dma_buf_lock_fence_ops,
+		       &dma_buf_lock_fence_lock,
+		       0,
+		       0);
+}
+
+static void
+dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource)
+{
+	struct dma_buf_lock_fence_cb *cb, *tmp;
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &resource->dma_fence_callbacks, node) {
+		/* Cancel callbacks that hasn't been called yet and release the
+		 * reference taken in dma_buf_lock_fence_add_callback().
+		 */
+		dma_fence_remove_callback(cb->fence, &cb->fence_cb);
+		dma_fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+}
+
+static void
+dma_buf_lock_fence_work(struct work_struct *pwork)
+{
+	dma_buf_lock_resource *resource =
+		container_of(pwork, dma_buf_lock_resource, work);
+
+	WARN_ON(atomic_read(&resource->fence_dep_count));
+	WARN_ON(!atomic_read(&resource->locked));
+	WARN_ON(!resource->exclusive);
+
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_callback(struct fence *fence, struct fence_cb *cb)
+#else
+dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+#endif
+{
+	struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb,
+				struct dma_buf_lock_fence_cb,
+				fence_cb);
+	dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
+
+#if DMA_BUF_LOCK_DEBUG
+	printk(KERN_DEBUG "dma_buf_lock_fence_callback\n");
+#endif
+
+	/* Callback function will be invoked in atomic context. */
+
+	if (atomic_dec_and_test(&resource->fence_dep_count)) {
+		atomic_set(&resource->locked, 1);
+		wake_up(&resource->wait);
+
+		if (resource->exclusive) {
+			/* Warn if the work was already queued */
+			WARN_ON(!schedule_work(&resource->work));
+		}
+	}
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static int
+dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
+				struct fence *fence,
+				fence_func_t callback)
+#else
+static int
+dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
+				struct dma_fence *fence,
+				dma_fence_func_t callback)
+#endif
+{
+	int err = 0;
+	struct dma_buf_lock_fence_cb *fence_cb;
+
+	if (!fence)
+		return -EINVAL;
+
+	fence_cb = kmalloc(sizeof(*fence_cb), GFP_KERNEL);
+	if (!fence_cb)
+		return -ENOMEM;
+
+	fence_cb->fence = fence;
+	fence_cb->res   = resource;
+	INIT_LIST_HEAD(&fence_cb->node);
+
+	err = dma_fence_add_callback(fence, &fence_cb->fence_cb,
+				     callback);
+
+	if (err == -ENOENT) {
+		/* Fence signaled, get the completion result */
+		err = dma_fence_get_status(fence);
+
+		/* remap success completion to err code */
+		if (err == 1)
+			err = 0;
+
+		kfree(fence_cb);
+	} else if (err) {
+		kfree(fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in dma_buf_lock_fence_free_callbacks().
+		 */
+		dma_fence_get(fence);
+		atomic_inc(&resource->fence_dep_count);
+		/* Add callback to resource's list of callbacks */
+		list_add(&fence_cb->node, &resource->dma_fence_callbacks);
+	}
+
+	return err;
+}
+
+static int
+dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource,
+					    struct reservation_object *resv,
+					    bool exclusive)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+#else
+	struct dma_fence *excl_fence = NULL;
+	struct dma_fence **shared_fences = NULL;
+#endif
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = dma_buf_lock_fence_add_callback(resource,
+						      excl_fence,
+						      dma_buf_lock_fence_callback);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		dma_fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = dma_buf_lock_fence_add_callback(resource,
+							      shared_fences[i],
+							      dma_buf_lock_fence_callback);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		dma_fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	return err;
+}
+
+static void
+dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource,
+				       struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < resource->count; r++)
+		ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
+	ww_acquire_fini(ctx);
+}
+
+static int
+dma_buf_lock_acquire_fence_reservation(dma_buf_lock_resource *resource,
+				       struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_resv = NULL;
+	unsigned int content_resv_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < resource->count; r++) {
+		if (resource->dma_bufs[r]->resv == content_resv) {
+			content_resv = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&resource->dma_bufs[r]->resv->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_resv_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
+
+	if (content_resv)
+		ww_mutex_unlock(&content_resv->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "deadlock at dma_buf fd %i\n",
+		       resource->list_of_dma_buf_fds[content_resv_idx]);
+#endif
+		content_resv = resource->dma_bufs[content_resv_idx]->resv;
+		ww_mutex_lock_slow(&content_resv->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.owner		= THIS_MODULE,
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related reservation objects, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	struct ww_acquire_ctx ww_ctx;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	INIT_WORK(&resource->work, dma_buf_lock_fence_work);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Initialize the fence associated with dma_buf_lock resource */
+	dma_buf_lock_fence_init(resource);
+
+	INIT_LIST_HEAD(&resource->dma_fence_callbacks);
+
+	atomic_set(&resource->fence_dep_count, DMA_BUF_LOCK_INIT_BIAS);
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Check the reservation object associated with dma_buf */
+		if (NULL == resource->dma_bufs[i]->resv)
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "dma_buf_lock_dolock : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
+		       resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
+#endif
+	}
+
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops,
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx);
+	if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d locking reservations.\n", ret);
+#endif
+		put_unused_fd(fd);
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return ret;
+	}
+
+	/* Take an extra reference for exclusive access, which will be dropped
+	 * once the pre-existing fences attached to dma-buf resources, for which
+	 * we have commited for exclusive access, are signaled.
+	 * At a given time there can be only one exclusive fence attached to a
+	 * reservation object, so the new exclusive fence replaces the original
+	 * fence and the future sync is done against the new fence which is
+	 * supposed to be signaled only after the original fence was signaled.
+	 * If the new exclusive fence is signaled prematurely then the resources
+	 * would become available for new access while they are already being
+	 * written to by the original owner.
+	 */
+	if (resource->exclusive)
+		kref_get(&resource->refcount);
+
+	for (i = 0; i < request->count; i++) {
+		struct reservation_object *resv = resource->dma_bufs[i]->resv;
+
+		if (!test_bit(i, &resource->exclusive)) {
+			ret = reservation_object_reserve_shared(resv);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d reserving space for shared fence.\n", ret);
+#endif
+				break;
+			}
+
+			ret = dma_buf_lock_add_fence_reservation_callback(resource,
+									  resv,
+									  false);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
+#endif
+				break;
+			}
+
+			reservation_object_add_shared_fence(resv, &resource->fence);
+		} else {
+			ret = dma_buf_lock_add_fence_reservation_callback(resource,
+									  resv,
+									  true);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
+#endif
+				break;
+			}
+
+			reservation_object_add_excl_fence(resv, &resource->fence);
+		}
+	}
+
+	dma_buf_lock_release_fence_reservation(resource, &ww_ctx);
+
+	/* Test if the callbacks were already triggered */
+	if (!atomic_sub_return(DMA_BUF_LOCK_INIT_BIAS, &resource->fence_dep_count)) {
+		atomic_set(&resource->locked, 1);
+
+		/* Drop the extra reference taken for exclusive access */
+		if (resource->exclusive)
+			dma_buf_lock_fence_work(&resource->work);
+	}
+
+	if (IS_ERR_VALUE((unsigned long)ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	/* Signal the resource's fence. */
+	dma_fence_signal(&resource->fence);
+
+	dma_buf_lock_fence_free_callbacks(resource);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		if (resource->dma_bufs[i])
+			dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	dma_fence_put(&resource->fence);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100644
index 0000000..f2ae575
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/sconscript b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/sconscript
new file mode 100644
index 0000000..2db2553
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_lock/src/sconscript
@@ -0,0 +1,39 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+import os
+import re
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')]
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] dma_buf_lock'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100644
index 0000000..c382b79
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100644
index 0000000..66ca1bc
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/Makefile b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100644
index 0000000..528582c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,36 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/build.bp b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/build.bp
new file mode 100644
index 0000000..45508d3
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/build.bp
@@ -0,0 +1,23 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2017 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "dma-buf-test-exporter",
+    srcs: [
+        "Kbuild",
+        "dma-buf-test-exporter.c",
+    ],
+    kbuild_options: [
+        "CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m",
+    ],
+    defaults: ["kernel_defaults"],
+}
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100644
index 0000000..f3f8ac8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,724 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+#else
+static int dma_buf_te_mmap_fault(struct vm_fault *vmf)
+#endif
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	dmabuf = vma->vm_private_data;
+#else
+	dmabuf = vmf->vma->vm_private_data;
+#endif
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return NULL;
+
+	return kmap(alloc->pages[page_num]);
+}
+static void dma_buf_te_kunmap(struct dma_buf *buf,
+		unsigned long page_num, void *addr)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return;
+
+	kunmap(alloc->pages[page_num]);
+	return;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
+	.kmap = dma_buf_te_kmap,
+	.kunmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic
+#else
+	.map = dma_buf_te_kmap,
+	.unmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.map_atomic = dma_buf_te_kmap_atomic
+#endif
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (alloc_req.size > SG_MAX_SINGLE_ALLOC) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %lu pages",
+				__func__, alloc_req.size, SG_MAX_SINGLE_ALLOC);
+		goto invalid_size;
+	}
+#endif
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO,
+				DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+	{
+		struct dma_buf_export_info export_info = {
+			.exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+			.owner = THIS_MODULE,
+#endif
+			.ops = &dma_buf_te_ops,
+			.size = alloc->nr_pages << PAGE_SHIFT,
+			.flags = O_CLOEXEC | O_RDWR,
+			.priv = alloc,
+		};
+
+		dma_buf = dma_buf_export(&export_info);
+	}
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+	if (ret)
+		goto no_cpu_access;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				ret = -EPERM;
+				goto no_kmap;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		offset += sg_dma_len(sg);
+	}
+
+no_kmap:
+	dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/sconscript b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
new file mode 100644
index 0000000..aa727ef
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+Import('env')
+
+mod = env.BuildKernelModule('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', Glob('*.c'))
+env.KernelObjTarget('dma-buf-test-exporter', mod)
diff --git a/bifrost/r12p0/kernel/drivers/base/sconscript b/bifrost/r12p0/kernel/drivers/base/sconscript
new file mode 100755
index 0000000..66f6847
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/sconscript
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+Import( 'env' )
+
+if Glob('bus_logger/sconscript'):
+	if env['buslog'] == '1':
+		SConscript('bus_logger/sconscript')
+
+if Glob('mali_fpga_sysctl/sconscript'):
+	SConscript('mali_fpga_sysctl/sconscript')
+
+if Glob('dma_buf_lock/sconscript'):
+	SConscript('dma_buf_lock/sconscript')
+
+if Glob('dma_buf_test_exporter/sconscript'):
+	SConscript('dma_buf_test_exporter/sconscript')
+
+if Glob('smc_protected_mode_switcher/sconscript'):
+	if env['platform_config'] == 'juno_soc':
+		SConscript('smc_protected_mode_switcher/sconscript')
+
diff --git a/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/Kbuild b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/Kbuild
new file mode 100644
index 0000000..02f6cf6
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+obj-$(CONFIG_SMC_PROTECTED_MODE_SWITCHER) := smc_protected_mode_switcher.o
+smc_protected_mode_switcher-y := protected_mode_switcher_device.o \
+				 protected_mode_switcher_smc.o
diff --git a/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/Kconfig b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/Kconfig
new file mode 100644
index 0000000..02f90f0
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+config SMC_PROTECTED_MODE_SWITCHER
+	tristate "SMC protected mode switcher"
+	help
+	  This option enables the SMC protected mode switcher
diff --git a/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/Makefile b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/Makefile
new file mode 100644
index 0000000..fc05ee0
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include -I$(CURDIR)/../../gpu/arm/midgard -DCONFIG_SMC_PROTECTED_MODE_SWITCHER" CONFIG_SMC_PROTECTED_MODE_SWITCHER=m modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c
new file mode 100644
index 0000000..2051fd6
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/compiler.h>
+
+#include <linux/protected_mode_switcher.h>
+
+/*
+ * Protected Mode Switch
+ */
+
+#define SMC_FAST_CALL  (1 << 31)
+#define SMC_64         (1 << 30)
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_SIP    (2 << SMC_OEN_OFFSET)
+
+struct smc_protected_mode_device {
+	u16 smc_fid_enable;
+	u16 smc_fid_disable;
+	struct device *dev;
+};
+
+asmlinkage u64 __invoke_protected_mode_switch_smc(u64, u64, u64, u64);
+
+static u64 invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return __invoke_protected_mode_switch_smc(fid, arg0, arg1, arg2);
+}
+
+static int protected_mode_enable(struct protected_mode_device *protected_dev)
+{
+	struct smc_protected_mode_device *sdev = protected_dev->data;
+
+	if (!sdev)
+		/* Not supported */
+		return -EINVAL;
+
+	return invoke_smc(SMC_OEN_SIP,
+			sdev->smc_fid_enable, false,
+			0, 0, 0);
+
+}
+
+static int protected_mode_disable(struct protected_mode_device *protected_dev)
+{
+	struct smc_protected_mode_device *sdev = protected_dev->data;
+
+	if (!sdev)
+		/* Not supported */
+		return -EINVAL;
+
+	return invoke_smc(SMC_OEN_SIP,
+			sdev->smc_fid_disable, false,
+			0, 0, 0);
+}
+
+
+static int protected_mode_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct protected_mode_device *protected_dev;
+	struct smc_protected_mode_device *sdev;
+	u32 tmp = 0;
+
+	protected_dev = devm_kzalloc(&pdev->dev, sizeof(*protected_dev),
+			GFP_KERNEL);
+	if (!protected_dev)
+		return -ENOMEM;
+
+	sdev = devm_kzalloc(&pdev->dev, sizeof(*sdev), GFP_KERNEL);
+	if (!sdev) {
+		devm_kfree(&pdev->dev, protected_dev);
+		return -ENOMEM;
+	}
+
+	protected_dev->data = sdev;
+	protected_dev->ops.protected_mode_enable = protected_mode_enable;
+	protected_dev->ops.protected_mode_disable = protected_mode_disable;
+	sdev->dev = dev;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,protected_enable",
+			&tmp))
+		sdev->smc_fid_enable = tmp;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,protected_disable",
+			&tmp))
+		sdev->smc_fid_disable = tmp;
+
+	/* Check older property names, for compatibility with outdated DTBs */
+	if (!of_property_read_u32(dev->of_node, "arm,smc,secure_enable", &tmp))
+		sdev->smc_fid_enable = tmp;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,secure_disable", &tmp))
+		sdev->smc_fid_disable = tmp;
+
+	platform_set_drvdata(pdev, protected_dev);
+
+	dev_info(&pdev->dev, "Protected mode switcher %s loaded\n", pdev->name);
+	dev_info(&pdev->dev, "SMC enable: 0x%x\n", sdev->smc_fid_enable);
+	dev_info(&pdev->dev, "SMC disable: 0x%x\n", sdev->smc_fid_disable);
+
+	return 0;
+}
+
+static int protected_mode_remove(struct platform_device *pdev)
+{
+	dev_info(&pdev->dev, "Protected mode switcher %s removed\n",
+			pdev->name);
+
+	return 0;
+}
+
+static const struct of_device_id protected_mode_dt_ids[] = {
+	{ .compatible = "arm,smc-protected-mode-switcher" },
+	{ .compatible = "arm,secure-mode-switcher" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, protected_mode_dt_ids);
+
+static struct platform_driver protected_mode_driver = {
+	.probe = protected_mode_probe,
+	.remove = protected_mode_remove,
+	.driver = {
+		.name = "smc-protected-mode-switcher",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(protected_mode_dt_ids),
+	}
+};
+
+module_platform_driver(protected_mode_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S
new file mode 100644
index 0000000..5eae328
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/linkage.h>
+
+/* u64 invoke_protected_mode_switch_smc(u64 function_id, u64 arg0, u64 arg1,
+		u64 arg2) */
+ENTRY(__invoke_protected_mode_switch_smc)
+	smc	#0
+	ret
+ENDPROC(__invoke_protected_mode_switch_smc)
diff --git a/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/sconscript b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/sconscript
new file mode 100644
index 0000000..17c54ab
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/base/smc_protected_mode_switcher/sconscript
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+import os
+import re
+Import('env')
+
+if env['v'] != '1':
+	env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}'
+
+src = [ Glob('#kernel/drivers/base/smc_protected_mode_switcher/*.c'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/*.S'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/*.h'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/K*'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/Makefile') ]
+
+env.Append( CPPPATH = '#kernel/include' )
+env['smc_protected_mode_switcher'] = 1
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] smc_protected_mode_switcher'))
+	cmd = env.Command('$STATIC_LIB_PATH/smc_protected_mode_switcher.ko', src, [])
+	env.ProgTarget('smc_protected_mode_switcher', cmd)
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make && cp smc_protected_mode_switcher.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/smc_protected_mode_switcher.ko', src, [makeAction])
+	env.ProgTarget('smc_protected_mode_switcher', cmd)
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/Kbuild
new file mode 100644
index 0000000..1a6fa3c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/Kbuild
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/Kconfig b/bifrost/r12p0/kernel/drivers/gpu/arm/Kconfig
new file mode 100644
index 0000000..693b86f
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/Kconfig
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Kbuild
new file mode 100755
index 0000000..9d3ab61
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,176 @@
+#
+# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r12p0-01rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+#ldflags-y += --strip-debug
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_ctx_sched.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c \
+	mali_kbase_regs_history_debugfs.c \
+	thirdparty/mali_kbase_mmap.c
+
+
+ifeq ($(CONFIG_MALI_JOB_DUMP),y)
+	SRC += mali_kbase_gwt.c
+endif
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+ccflags-y += -I$(KBASE_PATH)
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+# Kconfig passes in the name with quotes for in-tree builds - remove them.
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_NAME))
+MALI_PLATFORM_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR)
+include $(src)/$(MALI_PLATFORM_DIR)/Kbuild
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+  ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+    include $(src)/ipa/Kbuild
+  endif
+endif
+
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \
+	mali_kbase_dma_fence.o \
+	mali_kbase_fence.o
+mali_kbase-$(CONFIG_SYNC) += \
+	mali_kbase_sync_android.o \
+	mali_kbase_sync_common.o
+mali_kbase-$(CONFIG_SYNC_FILE) += \
+	mali_kbase_sync_file.o \
+	mali_kbase_sync_common.o \
+	mali_kbase_fence.o
+
+ifeq ($(MALI_MOCK_TEST),1)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+
+include  $(src)/backend/gpu/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+
+ccflags-y += -I$(src)/backend/gpu
+subdir-ccflags-y += -I$(src)/backend/gpu
+
+# For kutf and mali_kutf_irq_latency_test
+obj-$(CONFIG_MALI_KUTF) += tests/
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Kconfig b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Kconfig
new file mode 100755
index 0000000..d971903
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,217 @@
+#
+# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if the Linux Kernel has built in
+	  support for DMA_BUF fences.
+
+config MALI_PLATFORM_NAME
+	depends on MALI_MIDGARD
+	string "Platform name"
+	default "devicetree"
+	help
+	  Enter the name of the desired platform configuration directory to
+	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
+	  exist.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_CORESTACK
+	bool "Support controlling power to the GPU core stack"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature on supported GPUs will let the driver powering
+	  on/off the GPU core stack independently without involving the Power
+	  Domain Controller. This should only be enabled on platforms which
+	  integration of the PDC to the Mali GPU is known to be problematic.
+	  This feature is currently only supported on t-Six and t-HEx GPUs.
+
+	  If unsure, say N.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE)
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event. This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_JOB_DUMP
+	bool "Enable system level support needed for job dumping"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system level support needed for
+	  job dumping. This is typically used for instrumentation but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_2MB_ALLOC
+	bool "Attempt to allocate 2MB pages"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Rather than allocating all GPU memory page-by-page, attempt to
+	  allocate 2MB pages from the kernel. This reduces TLB pressure and
+	  helps to prevent memory fragmentation.
+
+	  If in doubt, say N
+
+config MALI_PWRSOFT_765
+	bool "PWRSOFT-765 ticket"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged
+	  in kernel v4.10, however if backported into the kernel then this
+	  option must be manually selected.
+
+	  If using kernel >= v4.10 then say N, otherwise if devfreq cooling
+	  changes have been backported say Y to avoid compilation errors.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
+source "drivers/gpu/arm/midgard/tests/Kconfig"
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Makefile b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Makefile
new file mode 100755
index 0000000..13af9f4
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+KBASE_PATH_RELATIVE = $(CURDIR)
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100644
index 0000000..d7898cb
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Mconfig b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Mconfig
new file mode 100755
index 0000000..9ad765a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/Mconfig
@@ -0,0 +1,209 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+menuconfig MALI_MIDGARD
+	bool "Mali Midgard series support"
+	default y
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default y if INSTRUMENTATION_STREAMLINE_OLD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD
+	default y if PLATFORM_JUNO
+	default y if PLATFORM_CUSTOM
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if the Linux Kernel has built in
+	  support for DMA_BUF fences.
+
+config MALI_PLATFORM_NAME
+	depends on MALI_MIDGARD
+	string "Platform name"
+	default "arndale" if PLATFORM_ARNDALE
+	default "arndale_octa" if PLATFORM_ARNDALE_OCTA
+	default "rk" if PLATFORM_FIREFLY
+	default "hisilicon" if PLATFORM_HIKEY960
+	default "vexpress" if PLATFORM_VEXPRESS
+	default "devicetree"
+	help
+	  Enter the name of the desired platform configuration directory to
+	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
+	  exist.
+
+config MALI_MOCK_TEST
+	bool
+	depends on MALI_MIDGARD && !RELEASE
+	default y
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default y
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_CORESTACK
+	bool "Support controlling power to the GPU core stack"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature on supported GPUs will let the driver powering
+	  on/off the GPU core stack independently without involving the Power
+	  Domain Controller. This should only be enabled on platforms which
+	  integration of the PDC to the Mali GPU is known to be problematic.
+	  This feature is currently only supported on t-Six and t-HEx GPUs.
+
+	  If unsure, say N.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default y if DEBUG
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_ERROR_INJECT_RANDOM
+	bool "Random error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI && MALI_ERROR_INJECT
+	default n
+	help
+	  Injected errors are random, rather than user-driven.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_2MB_ALLOC
+	bool "Attempt to allocate 2MB pages"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Rather than allocating all GPU memory page-by-page, attempt to
+	  allocate 2MB pages from the kernel. This reduces TLB pressure and
+	  helps to prevent memory fragmentation.
+
+	  If in doubt, say N
+
+config MALI_FPGA_BUS_LOGGER
+	bool "Enable bus log integration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+
+config MALI_PWRSOFT_765
+	bool "PWRSOFT-765 ticket"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  PWRSOFT-765 fixes devfreq cooling devices issues. However, they are
+	  not merged in mainline kernel yet. So this define helps to guard those
+	  parts of the code.
+
+source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100644
index 0000000..bdf4c5a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,66 @@
+#
+# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += \
+	backend/gpu/mali_kbase_devfreq.c \
+	backend/gpu/mali_kbase_pm_ca_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100644
index 0000000..196a776
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100644
index 0000000..49567f7
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	kbdev->current_gpu_coherency_mode = mode;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100644
index 0000000..f78ada7
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100644
index 0000000..c9c463e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100755
index 0000000..432c2aa
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,439 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#define dev_pm_opp_find_freq_floor opp_find_freq_floor
+#endif /* Linux >= 3.13 */
+
+/**
+ * opp_translate - Translate nominal OPP frequency from devicetree into real
+ *                 frequency and core mask
+ * @kbdev:     Device pointer
+ * @freq:      Nominal frequency
+ * @core_mask: Pointer to u64 to store core mask to
+ *
+ * Return: Real target frequency
+ *
+ * This function will only perform translation if an operating-points-v2-mali
+ * table is present in devicetree. If one is not present then it will return an
+ * untranslated frequency and all cores enabled.
+ */
+static unsigned long opp_translate(struct kbase_device *kbdev,
+		unsigned long freq, u64 *core_mask)
+{
+	int i;
+
+	for (i = 0; i < kbdev->num_opps; i++) {
+		if (kbdev->opp_table[i].opp_freq == freq) {
+			*core_mask = kbdev->opp_table[i].core_mask;
+			return kbdev->opp_table[i].real_freq;
+		}
+	}
+
+	/* Failed to find OPP - return all cores enabled & nominal frequency */
+	*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
+
+	return freq;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long nominal_freq;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+	u64 core_mask;
+
+	freq = *target_freq;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_lock();
+#endif
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_unlock();
+#endif
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+	dev_pm_opp_put(opp);
+#endif
+
+	nominal_freq = freq;
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_nominal_freq == nominal_freq) {
+		*target_freq = nominal_freq;
+		return 0;
+	}
+
+	freq = opp_translate(kbdev, nominal_freq, &core_mask);
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	if (kbdev->pm.backend.ca_current_policy->id ==
+			KBASE_PM_CA_POLICY_ID_DEVFREQ)
+		kbase_devfreq_set_core_mask(kbdev, core_mask);
+
+	*target_freq = nominal_freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_nominal_freq = nominal_freq;
+	kbdev->current_freq = freq;
+	kbdev->current_core_mask = core_mask;
+
+	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_nominal_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbasep_pm_metrics diff;
+
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->last_devfreq_metrics, &diff);
+
+	stat->busy_time = diff.time_busy;
+	stat->total_time = diff.time_busy + diff.time_idle;
+	stat->current_frequency = kbdev->current_nominal_freq;
+	stat->private_data = NULL;
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq;
+	struct dev_pm_opp *opp;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_lock();
+#endif
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_unlock();
+#endif
+	if (count < 0)
+		return count;
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_lock();
+#endif
+	for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
+		opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+		dev_pm_opp_put(opp);
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */
+
+		dp->freq_table[i] = freq;
+	}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_unlock();
+#endif
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
+{
+	struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node,
+			"operating-points-v2", 0);
+	struct device_node *node;
+	int i = 0;
+	int count;
+
+	if (!opp_node)
+		return 0;
+	if (!of_device_is_compatible(opp_node, "operating-points-v2-mali"))
+		return 0;
+
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	kbdev->opp_table = kmalloc_array(count,
+			sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
+	if (!kbdev->opp_table)
+		return -ENOMEM;
+
+	for_each_available_child_of_node(opp_node, node) {
+		u64 core_mask;
+		u64 opp_freq, real_freq;
+		const void *core_count_p;
+
+		if (of_property_read_u64(node, "opp-hz", &opp_freq)) {
+			dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n");
+			continue;
+		}
+		if (of_property_read_u64(node, "opp-hz-real", &real_freq))
+			real_freq = opp_freq;
+		if (of_property_read_u64(node, "opp-core-mask", &core_mask))
+			core_mask =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		core_count_p = of_get_property(node, "opp-core-count", NULL);
+		if (core_count_p) {
+			u64 remaining_core_mask =
+				kbdev->gpu_props.props.raw_props.shader_present;
+			int core_count = be32_to_cpup(core_count_p);
+
+			core_mask = 0;
+
+			for (; core_count > 0; core_count--) {
+				int core = ffs(remaining_core_mask);
+
+				if (!core) {
+					dev_err(kbdev->dev, "OPP has more cores than GPU\n");
+					return -ENODEV;
+				}
+
+				core_mask |= (1ull << (core-1));
+				remaining_core_mask &= ~(1ull << (core-1));
+			}
+		}
+
+		if (!core_mask) {
+			dev_err(kbdev->dev, "OPP has invalid core mask of 0\n");
+			return -ENODEV;
+		}
+
+		kbdev->opp_table[i].opp_freq = opp_freq;
+		kbdev->opp_table[i].real_freq = real_freq;
+		kbdev->opp_table[i].core_mask = core_mask;
+
+		dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n",
+				i, opp_freq, real_freq, core_mask);
+
+		i++;
+	}
+
+	kbdev->num_opps = i;
+
+	return 0;
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	if (!kbdev->clock) {
+		dev_err(kbdev->dev, "Clock not available for devfreq\n");
+		return -ENODEV;
+	}
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+	kbdev->current_nominal_freq = kbdev->current_freq;
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	if (dp->max_state > 0) {
+		/* Record the maximum frequency possible */
+		kbdev->gpu_props.props.core_props.gpu_freq_khz_max =
+			dp->freq_table[0] / 1000;
+	};
+
+	err = kbase_devfreq_init_core_mask_table(kbdev);
+	if (err)
+		return err;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", NULL);
+	if (IS_ERR(kbdev->devfreq)) {
+		kfree(dp->freq_table);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	/* devfreq_add_device only copies a few of kbdev->dev's fields, so
+	 * set drvdata explicitly so IPA models can access kbdev. */
+	dev_set_drvdata(&kbdev->devfreq->dev, kbdev);
+
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_ipa_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		goto cooling_failed;
+	}
+
+	kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+			kbdev->dev->of_node,
+			kbdev->devfreq,
+			&kbase_ipa_power_model_ops);
+	if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+		err = PTR_ERR(kbdev->devfreq_cooling);
+		dev_err(kbdev->dev,
+			"Failed to register cooling device (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+
+	kbase_ipa_term(kbdev);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	kfree(kbdev->opp_table);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100644
index 0000000..0634038
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100644
index 0000000..a0dfd81
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,260 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+	struct kbase_io_access *old_buf;
+	struct kbase_io_access *new_buf;
+	unsigned long flags;
+
+	if (!new_size)
+		goto out_err; /* The new size must not be 0 */
+
+	new_buf = vmalloc(new_size * sizeof(*h->buf));
+	if (!new_buf)
+		goto out_err;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	old_buf = h->buf;
+
+	/* Note: we won't bother with copying the old data over. The dumping
+	 * logic wouldn't work properly as it relies on 'count' both as a
+	 * counter and as an index to the buffer which would have changed with
+	 * the new array. This is a corner case that we don't need to support.
+	 */
+	h->count = 0;
+	h->size = new_size;
+	h->buf = new_buf;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	vfree(old_buf);
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+	h->enabled = false;
+	spin_lock_init(&h->lock);
+	h->count = 0;
+	h->size = 0;
+	h->buf = NULL;
+	if (kbase_io_history_resize(h, n))
+		return -1;
+
+	return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+	vfree(h->buf);
+	h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+		void __iomem const *addr, u32 value, u8 write)
+{
+	struct kbase_io_access *io;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	io = &h->buf[h->count % h->size];
+	io->addr = (uintptr_t)addr | write;
+	io->value = value;
+	++h->count;
+	/* If count overflows, move the index by the buffer size so the entire
+	 * buffer will still be dumped later */
+	if (unlikely(!h->count))
+		h->count = h->size;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+	struct kbase_io_history *const h = &kbdev->io_history;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!unlikely(h->enabled))
+		return;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	dev_err(kbdev->dev, "Register IO History:");
+	iters = (h->size > h->count) ? h->count : h->size;
+	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	writel(value, kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				value, 1);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				val, 0);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100644
index 0000000..729256e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100644
index 0000000..881d50c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,135 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	err = kbase_pm_runtime_init(kbdev);
+	if (err)
+		goto fail_runtime_pm;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	return 0;
+
+fail_pm:
+	kbase_release_interrupts(kbdev);
+fail_interrupts:
+	kbase_pm_runtime_term(kbdev);
+fail_runtime_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_term(kbdev);
+	kbase_release_interrupts(kbdev);
+	kbase_pm_runtime_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100755
index 0000000..8809ab0
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,117 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->core_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(CORE_FEATURES), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+	regdump->thread_tls_alloc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_TLS_ALLOC), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+
+	regdump->stack_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_LO), NULL);
+	regdump->stack_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100755
index 0000000..77d71f5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,497 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_ioctl_hwcnt_enable *enable)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* alignment failure */
+	if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = enable->dump_buffer;
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					enable->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					enable->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					enable->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					enable->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					enable->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							enable->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							enable->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100644
index 0000000..fb55d2d
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100644
index 0000000..608379e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100644
index 0000000..ca3c048
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100644
index 0000000..95bebf8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,474 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100755
index 0000000..c8153ba
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,244 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx[js] == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		if (kbdev->as_to_kctx[i] == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_read(&kctx->refcount) != 1) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+
+	kbase_ctx_sched_release_ctx(kctx);
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		as_kctx = kbdev->as_to_kctx[i];
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running.
+		 * Note that a context will have at least 1 reference (which
+		 * was previously taken by kbasep_js_schedule_ctx()) until
+		 * descheduled.
+		 */
+		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+			atomic_read(&as_kctx->refcount) == 1) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_as *new_address_space = NULL;
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == kctx) {
+			WARN(1, "Context is already scheduled in\n");
+			return false;
+		}
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100755
index 0000000..b4d2ae1
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100755
index 0000000..fd56b08
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1464 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649))
+		cfg |= JS_CONFIG_START_MMU;
+
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
+			katom->affinity, cfg);
+	KBASE_TLSTREAM_TL_RET_CTX_LPU(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_LPU(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string,
+						sizeof(js_string)),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the time the job was submitted, to
+ * work out the best estimate (which might still result in an over-estimate to
+ * the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		timestamp_diff = ktime_sub(end_timestamp,
+				katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = end_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
+ * tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Make a tracepoint call to the instrumentation module informing that
+ * softstop happened on given lpu (job slot).
+ */
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
+					int js)
+{
+	KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
+		&kbdev->gpu_props.props.raw_props.js_features[js]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbasep_trace_tl_event_lpu_softstop(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					meson_gpu_data_invalid_count ++;
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i, completion_code);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+						base_jd_core_req core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+	bool stop_sent = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if ((kbdev->js_ctx_scheduling_mode ==
+			KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) &&
+				(katom->kctx != kctx))
+			continue;
+
+		if (katom->sched_priority > priority) {
+			if (!stop_sent)
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(
+						target_katom);
+
+			kbase_job_slot_softstop(kbdev, js, katom);
+			stop_sent = true;
+		}
+	}
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT);
+
+	timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait,
+			kctx->jctx.job_nr == 0, timeout);
+
+	if (timeout != 0)
+		timeout = wait_event_timeout(
+			kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			!kbase_ctx_flag(kctx, KCTX_SCHEDULED),
+			timeout);
+
+	/* Neither wait timed out; all done! */
+	if (timeout != 0)
+		goto exit;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev,
+			"Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+			ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+
+	/* Wait for the reset to complete */
+	wait_event(kbdev->hwaccess.backend.reset_wait,
+			atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+			== KBASE_RESET_GPU_NOT_PENDING);
+#else
+	dev_warn(kbdev->dev,
+		"Jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+		ZAP_TIMEOUT);
+
+#endif
+exit:
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * hwaccess_lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	kbase_io_history_dump(kbdev);
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+	dev_err(kbdev->dev, "  TILER_CONFIG=0x%08x    JM_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL));
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	bool try_schedule = false;
+	bool silent = false;
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	spin_lock(&kbdev->hwaccess_lock);
+	spin_lock(&kbdev->mmu_mask_change);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts_nolock(kbdev);
+
+	spin_unlock(&kbdev->mmu_mask_change);
+	spin_unlock(&kbdev->hwaccess_lock);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+		/* Ensure that L2 is not transitioning when we send the reset
+		 * command */
+		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2))
+			;
+
+		WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->protected_mode = false;
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	/* Process any pending slot updates */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_context_idle(kbdev);
+
+	/* Release vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+
+void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+}
+
+bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100644
index 0000000..d71a9ed
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string,
+						size_t js_size)
+{
+	snprintf(js_string, js_size, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_cacheclean - Cause a GPU cache clean & flush
+ * @kbdev: Device pointer
+ *
+ * Caller must not be in IRQ context
+ */
+void kbase_gpu_cacheclean(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100755
index 0000000..7f09fd2
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1954 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+/**
+ * check_secure_atom - Check if the given atom is in the given secure state and
+ *                     has a ringbuffer state of at least
+ *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @katom:  Atom pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if atom is in the given state, false otherwise
+ */
+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
+{
+	if (katom->gpu_rb_state >=
+			KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+			((kbase_jd_katom_is_protected(katom) && secure) ||
+			(!kbase_jd_katom_is_protected(katom) && !secure)))
+		return true;
+
+	return false;
+}
+
+/**
+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
+ *                                secure state in the ringbuffers of at least
+ *                                state
+ *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
+ * @kbdev:  Device pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if any atoms are in the given state, false otherwise
+ */
+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
+		bool secure)
+{
+	int js, i;
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, i);
+
+			if (katom) {
+				if (check_secure_atom(katom, secure))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * hwaccess_lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+		if (katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
+				katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK)
+			kbdev->protected_mode_transition = false;
+
+		if (kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) {
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			/* Go back to configured model for IPA */
+			kbase_ipa_model_use_configured_locked(kbdev);
+		}
+
+
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+/**
+ * other_slots_busy - Determine if any job slots other than @js are currently
+ *                    running atoms
+ * @kbdev: Device pointer
+ * @js:    Job slot
+ *
+ * Return: true if any slots other than @js are busy, false otherwise
+ */
+static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+{
+	int slot;
+
+	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
+		if (slot == js)
+			continue;
+
+		if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot,
+				KBASE_ATOM_GPU_RB_SUBMITTED))
+			return true;
+	}
+
+	return false;
+}
+
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
+{
+	return kbdev->protected_mode;
+}
+
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
+
+	/*
+	 * When entering into protected mode, we must ensure that the
+	 * GPU is not operating in coherent mode as well. This is to
+	 * ensure that no protected memory can be leaked.
+	 */
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enable(
+				kbdev->protected_dev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
+		else
+			kbdev->protected_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
+
+	if (!kbdev->protected_ops)
+		return -EINVAL;
+
+	/* The protected mode disable callback will be called as part of reset
+	 */
+	kbase_reset_gpu_silent(kbdev);
+
+	return 0;
+}
+
+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	switch (katom[idx]->protected_state.enter) {
+	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		kbdev->protected_mode_transition = true;
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_VINSTR;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
+		if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+			/*
+			 * We can't switch now because
+			 * the vinstr core state switch
+			 * is not done yet.
+			 */
+			return -EAGAIN;
+		}
+
+		/* Use generic model for IPA in protected mode */
+		kbase_ipa_model_use_fallback_locked(kbdev);
+
+		/* Once reaching this point GPU must be
+		 * switched to protected mode or vinstr
+		 * re-enabled. */
+
+		/*
+		 * Not in correct mode, begin protected mode switch.
+		 * Entering protected mode requires us to power down the L2,
+		 * and drop out of fully coherent mode.
+		 */
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
+		/* Avoid unnecessary waiting on non-ACE platforms. */
+		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+				/*
+				* The L2 is still powered, wait for all the users to
+				* finish with it before doing the actual reset.
+				*/
+				return -EAGAIN;
+			}
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+
+		/* No jobs running, so we can switch GPU mode right now. */
+		err = kbase_gpu_protected_mode_enter(kbdev);
+
+		/*
+		 * Regardless of result, we are no longer transitioning
+		 * the GPU.
+		 */
+		kbdev->protected_mode_transition = false;
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev);
+		if (err) {
+			/*
+			 * Failed to switch into protected mode, resume
+			 * vinstr core and fail atom.
+			 */
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order. */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			/* Go back to configured model for IPA */
+			kbase_ipa_model_use_configured_locked(kbdev);
+
+			return -EINVAL;
+		}
+
+		/* Protected mode sanity checks. */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) ==
+			kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]),
+			kbase_gpu_in_protected_mode(kbdev));
+		katom[idx]->gpu_rb_state =
+			KBASE_ATOM_GPU_RB_READY;
+	}
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+
+	switch (katom[idx]->protected_state.exit) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		kbdev->protected_mode_transition = true;
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+
+		if (err) {
+			kbdev->protected_mode_transition = false;
+
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			/* Use generic model for IPA in protected mode */
+			kbase_ipa_model_use_fallback_locked(kbdev);
+
+			return -EINVAL;
+		}
+
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		/* A GPU reset is issued when exiting protected mode. Once the
+		 * reset is done all atoms' state will also be reset. For this
+		 * reason, if the atom is still in this state we can safely
+		 * say that the reset has not completed i.e., we have not
+		 * finished exiting protected mode yet.
+		 */
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+void kbase_backend_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+			int ret;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+				if (kbase_gpu_check_secure_atoms(kbdev,
+						!kbase_jd_katom_is_protected(
+						katom[idx])))
+					break;
+
+				if ((idx == 1) && (kbase_jd_katom_is_protected(
+								katom[0]) !=
+						kbase_jd_katom_is_protected(
+								katom[1])))
+					break;
+
+				if (kbdev->protected_mode_transition)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+
+				if (!kbase_gpu_in_protected_mode(kbdev) &&
+					kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition into protected mode. */
+					ret = kbase_jm_enter_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				} else if (kbase_gpu_in_protected_mode(kbdev) &&
+					!kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition out of protected mode. */
+					ret = kbase_jm_exit_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				}
+				katom[idx]->protected_state.exit =
+						KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+				/* Atom needs no protected mode transition. */
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+
+				if (idx == 1) {
+					/* Only submit if head atom or previous
+					 * atom already submitted */
+					if ((katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+						katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+						break;
+
+					/* If intra-slot serialization in use
+					 * then don't submit atom to NEXT slot
+					 */
+					if (kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTRA_SLOT)
+						break;
+				}
+
+				/* If inter-slot serialization in use then don't
+				 * submit atom if any other slots are in use */
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTER_SLOT) &&
+						other_slots_busy(kbdev, js))
+					break;
+
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_RESET) &&
+						kbase_reset_gpu_active(kbdev))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_backend_slot_update(kbdev);
+}
+
+#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
+	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
+				u32 completion_code)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		HAS_DEP(next_katom) &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+
+		if (completion_code == BASE_JD_EVENT_STOPPED) {
+			KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[katom->slot_nr]);
+			KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as
+					[katom->kctx->as_nr]);
+			KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[katom->slot_nr]);
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/*
+	 * When a hard-stop is followed close after a soft-stop, the completion
+	 * code may be set to STOPPED, even though the job is terminated
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) {
+		if (completion_code == BASE_JD_EVENT_STOPPED &&
+				(katom->atom_flags &
+				KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) {
+			completion_code = BASE_JD_EVENT_TERMINATED;
+		}
+	}
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req &
+					BASE_JD_REQ_SKIP_CACHE_END)) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		/* When a job chain fails, on a T60x or when
+		 * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not
+		 * flushed. To prevent future evictions causing possible memory
+		 * corruption we need to flush the cache manually before any
+		 * affected memory gets reused. */
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx &&
+				next_katom->sched_priority ==
+				katom->sched_priority) {
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+					HAS_DEP(katom_idx0) &&
+					katom_idx0->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+						katom_idx1->kctx == katom->kctx
+						&& HAS_DEP(katom_idx1) &&
+						katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					HAS_DEP(katom_idx1) &&
+					katom_idx1->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)
+		kbase_reset_gpu_silent(kbdev);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		katom = kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		katom = kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	if (katom) {
+		/* Cross-slot dependency has now become runnable. Try to submit
+		 * it. */
+
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
+	}
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_backend_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Reset should always take the GPU out of protected mode */
+	WARN_ON(kbase_gpu_in_protected_mode(kbdev));
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int atom_idx = 0;
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, atom_idx);
+			bool keep_in_jm_rb = false;
+
+			if (!katom)
+				break;
+			if (katom->protected_state.exit ==
+			    KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
+				KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
+
+				kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+				/* protected mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
+					"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+					kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
+				KBASE_DEBUG_ASSERT_MSG(
+					(kbase_jd_katom_is_protected(katom) && js == 0) ||
+					!kbase_jd_katom_is_protected(katom),
+					"Protected atom on JS%d not supported", js);
+			}
+			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+				katom->affinity = 0;
+				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				/* As the atom was not removed, increment the
+				 * index so that we read the correct atom in the
+				 * next iteration. */
+				atom_idx++;
+				continue;
+			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
+		}
+	}
+
+	kbdev->protected_mode_transition = false;
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	katom->kctx->blocked_js[js][katom->sched_priority] = true;
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+	int prio_idx0 = 0, prio_idx1 = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom_idx0)
+		prio_idx0 = katom_idx0->sched_priority;
+	if (katom_idx1)
+		prio_idx1 = katom_idx1->sched_priority;
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+				(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+				(!kctx || katom_idx1->kctx == kctx) &&
+				prio_idx0 == prio_idx1);
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				katom_idx1->kctx->blocked_js[js][prio_idx1] =
+						true;
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1_valid && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_job_evicted(katom_idx1);
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_job_evicted(katom_idx1);
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->need_cache_flush_cores_retained) {
+		unsigned long flags;
+
+		kbase_gpu_cacheclean(kbdev);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	kbase_backend_cacheclean(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->affinity = 0;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
+			coreref_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int js;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100755
index 0000000..c3b9f2d
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot to evict from
+ * @completion_code: Event code from job that was run.
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
+				u32 completion_code);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100644
index 0000000..c937eca
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+#include "mali_kbase_hw.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		 /* If the hardware supports XAFFINITY then we'll only enable
+		  * the tiler (which is the default so this is a no-op),
+		  * otherwise enable shader core 0. */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = 1;
+		else
+			*affinity = 0;
+
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required for hardware without XAFFINITY
+	 * support (notes above) */
+	if (core_req & BASE_JD_REQ_T) {
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = *affinity | 1;
+	}
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100644
index 0000000..dbabd94
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,134 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold hwaccess_lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100755
index 0000000..df2dd5e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,354 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->ticks++;
+
+#ifndef CONFIG_MALI_JOB_DUMP
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->ticks = soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+
+					dev_dbg(kbdev->dev, "Soft-stop");
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * hwaccess_lock, so it's OK to observe
+					 * the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CONFIG_MALI_JOB_DUMP */
+				/* NOTE: During CONFIG_MALI_JOB_DUMP, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CONFIG_MALI_JOB_DUMP, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CONFIG_MALI_JOB_DUMP */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100644
index 0000000..6576e55
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100755
index 0000000..9cd2982
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,405 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+static void validate_protected_page_fault(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	/* GPUs which support (native) protected mode shall not report page
+	 * fault addresses unless it has protected debug mode and protected
+	 * debug mode is turned on */
+	u32 protected_debug_mode = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		return;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		protected_debug_mode = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS),
+				kctx) & GPU_DBGEN;
+	}
+
+	if (!protected_debug_mode) {
+		/* fault_addr should never be reported in protected mode.
+		 * However, we just continue by printing an error message */
+		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+	}
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* Mark the fault protected or not */
+		as->protected_mode = kbdev->protected_mode;
+
+		if (kbdev->protected_mode && as->fault_addr) {
+			/* check if address reporting is allowed */
+			validate_protected_page_fault(kbdev, kctx);
+		}
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+			as->fault_extra_addr = kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+					kctx);
+			as->fault_extra_addr <<= 32;
+			as->fault_extra_addr |= kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+					kctx);
+		}
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+		transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+		/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+		/* Clear PTW_MEMATTR bits */
+		transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+		/* Enable correct PTW_MEMATTR bits */
+		transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+		if (kbdev->system_coherency == COHERENCY_ACE) {
+			/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+			/* Clear PTW_SH bits */
+			transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+			/* Enable correct PTW_SH bits */
+			transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+		}
+
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+				transcfg, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+				(current_setup->transcfg >> 32) & 0xFFFFFFFFUL,
+				kctx);
+	} else {
+		if (kbdev->system_coherency == COHERENCY_ACE)
+			current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100644
index 0000000..1f76eed
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100644
index 0000000..2ed7dfd
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100644
index 0000000..d61d0d0
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,82 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100755
index 0000000..6069c0f
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,504 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+
+int kbase_pm_runtime_init(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+
+		if (callbacks->power_runtime_init_callback)
+			return callbacks->power_runtime_init_callback(kbdev);
+		else
+			return 0;
+	}
+
+	kbdev->pm.backend.callback_power_on = NULL;
+	kbdev->pm.backend.callback_power_off = NULL;
+	kbdev->pm.backend.callback_power_suspend = NULL;
+	kbdev->pm.backend.callback_power_resume = NULL;
+	kbdev->pm.callback_power_runtime_init = NULL;
+	kbdev->pm.callback_power_runtime_term = NULL;
+	kbdev->pm.backend.callback_power_runtime_on = NULL;
+	kbdev->pm.backend.callback_power_runtime_off = NULL;
+	kbdev->pm.backend.callback_power_runtime_idle = NULL;
+
+	return 0;
+}
+
+void kbase_pm_runtime_term(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.callback_power_runtime_term) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+	}
+}
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+			kbase_pm_gpu_poweroff_wait_wq);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+#if !PLATFORM_POWER_DOWN_ONLY
+	/* Wait for power transitions to complete. We do this with no locks held
+	 * so that we don't deadlock with any pending workqueues */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	kbase_pm_check_transitions_sync(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+#if PLATFORM_POWER_DOWN_ONLY
+	if (kbdev->pm.backend.gpu_powered) {
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/* If L2 cache is powered then we must flush it before
+			 * we power off the GPU. Normally this would have been
+			 * handled when the L2 was powered off. */
+			kbase_gpu_cacheclean(kbdev);
+		}
+	}
+#endif /* PLATFORM_POWER_DOWN_ONLY */
+
+	if (!backend->poweron_required) {
+#if !PLATFORM_POWER_DOWN_ONLY
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		WARN_ON(kbdev->l2_available_bitmap ||
+				kbdev->shader_available_bitmap ||
+				kbdev->tiler_available_bitmap);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+		/* Consume any change-state events */
+		kbase_timeline_pm_check_handle_event(kbdev,
+					KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		/* Disable interrupts and turn the clock off */
+		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+			/*
+			 * Page/bus faults are pending, must drop locks to
+			 * process.  Interrupts are disabled so no more faults
+			 * should be generated at this point.
+			 */
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			kbase_flush_mmu_wqs(kbdev);
+			mutex_lock(&js_devdata->runpool_mutex);
+			mutex_lock(&kbdev->pm.lock);
+
+			/* Turn off clock now that fault have been handled. We
+			 * dropped locks so poweron_required may have changed -
+			 * power back on if this is the case (effectively only
+			 * re-enabling of the interrupts would be done in this
+			 * case, as the clocks to GPU were not withdrawn yet).
+			 */
+			if (backend->poweron_required)
+				kbase_pm_clock_on(kbdev, false);
+			else
+				WARN_ON(!kbase_pm_clock_off(kbdev,
+						backend->poweroff_is_suspend));
+		}
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	backend->poweroff_wait_in_progress = false;
+	if (backend->poweron_required) {
+		backend->poweron_required = false;
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
+		/* Force all cores off */
+		kbdev->pm.backend.desired_shader_state = 0;
+		kbdev->pm.backend.desired_tiler_state = 0;
+
+		/* Force all cores to be unavailable, in the situation where
+		 * transitions are in progress for some cores but not others,
+		 * and kbase_pm_check_transitions_nolock can not immediately
+		 * power off the cores */
+		kbdev->shader_available_bitmap = 0;
+		kbdev->tiler_available_bitmap = 0;
+		kbdev->l2_available_bitmap = 0;
+
+		kbdev->pm.backend.poweroff_wait_in_progress = true;
+		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/*Kick off wq here. Callers will have to wait*/
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
+	} else {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+	wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			is_poweroff_in_progress(kbdev));
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_init_core_use_bitmaps(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, true);
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100644
index 0000000..5b369fb
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,187 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#ifdef CONFIG_MALI_DEVFREQ
+	&kbase_pm_ca_devfreq_policy_ops,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_random_policy_ops
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100644
index 0000000..2b005c9
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
new file mode 100644
index 0000000..a3dfe2a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEMAND_H
+#define MALI_KBASE_PM_CA_DEMAND_H
+#include <linux/workqueue.h>
+/**
+ * struct kbasep_pm_ca_policy_demand - Private structure for demand ca policy
+ *
+ * This contains data that is private to the demand core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+#define MALI_CA_TIMER 1
+struct kbasep_pm_ca_policy_demand {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+#ifdef MALI_CA_TIMER
+	struct timer_list core_change_timer;
+#else
+	struct work_struct wq_work;
+#endif
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops;
+extern int mali_perf_set_num_pp_cores(int cores);
+
+#endif /* MALI_KBASE_PM_CA_DEMAND_H */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
new file mode 100644
index 0000000..4bb4c40
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
@@ -0,0 +1,134 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * A core availability policy implementing core mask selection from devfreq OPPs
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/version.h>
+
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	data->cores_desired = core_mask;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+static void devfreq_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+
+	if (kbdev->current_core_mask) {
+		data->cores_enabled = kbdev->current_core_mask;
+		data->cores_desired = kbdev->current_core_mask;
+	} else {
+		data->cores_enabled =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		data->cores_desired =
+				kbdev->gpu_props.props.raw_props.shader_present;
+	}
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void devfreq_term(struct kbase_device *kbdev)
+{
+}
+
+static u64 devfreq_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled;
+}
+
+static void devfreq_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the devfreq core availability
+ * policy.
+ *
+ * This is the static structure that defines the devfreq core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = {
+	"devfreq",			/* name */
+	devfreq_init,			/* init */
+	devfreq_term,			/* term */
+	devfreq_get_core_mask,		/* get_core_mask */
+	devfreq_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEVFREQ,	/* id */
+};
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
new file mode 100644
index 0000000..f67ec65
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * A core availability policy for use with devfreq, where core masks are
+ * associated with OPPs.
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEVFREQ_H
+#define MALI_KBASE_PM_CA_DEVFREQ_H
+
+/**
+ * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy
+ *
+ * This contains data that is private to the devfreq core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ */
+struct kbasep_pm_ca_policy_devfreq {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops;
+
+/**
+ * kbase_devfreq_set_core_mask - Set core mask for policy to use
+ * @kbdev: Device pointer
+ * @core_mask: New core mask
+ *
+ * The new core mask will have immediate effect if the GPU is powered, or will
+ * take effect when it is next powered on.
+ */
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
+
+#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100644
index 0000000..1eea7e8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100644
index 0000000..68a2eac
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
new file mode 100644
index 0000000..a8c9b15
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_RANDOM_H
+#define MALI_KBASE_PM_CA_RANDOM_H
+
+/**
+ * struct kbasep_pm_ca_policy_random - Private structure for random ca policy
+ *
+ * This contains data that is private to the random core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+struct kbasep_pm_ca_policy_random {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+	struct timer_list core_change_timer;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_RANDOM_H */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100644
index 0000000..602e175
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,75 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100644
index 0000000..f2b49eb
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100755
index 0000000..417f6f8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,533 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#include "mali_kbase_pm_ca_devfreq.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ * @KBASE_PM_CORE_STACK: Core stacks
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO,
+	KBASE_PM_CORE_STACK = STACK_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics - Metrics data collected for use by the power
+ *                            management framework.
+ *
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ */
+struct kbasep_pm_metrics {
+	u32 time_busy;
+	u32 time_idle;
+	u32 busy_cl[2];
+	u32 busy_gl;
+};
+
+/**
+ * struct kbasep_pm_metrics_state - State required to collect the metrics in
+ *                                  struct kbasep_pm_metrics
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *  @values: The current values of the power management metrics. The
+ *           kbase_pm_get_dvfs_metrics() function is used to compare these
+ *           current values with the saved values from a previous invocation.
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @dvfs_last: values of the PM metrics from the last DVFS tick
+ *  @dvfs_diff: different between the current and previous PM metrics.
+ */
+struct kbasep_pm_metrics_state {
+	ktime_t time_period_start;
+	bool gpu_active;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+
+	struct kbasep_pm_metrics values;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+	struct kbasep_pm_metrics dvfs_last;
+	struct kbasep_pm_metrics dvfs_diff;
+#endif
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+	struct kbasep_pm_ca_policy_devfreq devfreq;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @powering_on_stack_state: A bit mask indicating which core stacks are
+ *                           currently in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        and/or timers are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 powering_on_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_state metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	bool poweroff_wait_in_progress;
+	bool poweron_required;
+	bool poweroff_is_suspend;
+
+	struct workqueue_struct *gpu_poweroff_wait_wq;
+	struct work_struct gpu_poweroff_wait_work;
+
+	wait_queue_head_t poweroff_wait;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+	KBASE_PM_CA_POLICY_ID_DEVFREQ,
+	KBASE_PM_CA_POLICY_ID_RANDOM
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * Maximum length of a CA policy names
+ */
+#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1];
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100644
index 0000000..e0edddc
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100644
index 0000000..5ee1824
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100755
index 0000000..44803ab
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1687 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+#ifdef CONFIG_MALI_CORESTACK
+	if (core_type == KBASE_PM_CORE_STACK) {
+		switch (action) {
+		case ACTION_PRESENT:
+			return STACK_PRESENT_LO;
+		case ACTION_READY:
+			return STACK_READY_LO;
+		case ACTION_PWRON:
+			return STACK_PWRON_LO;
+		case ACTION_PWROFF:
+			return STACK_PWROFF_LO;
+		case ACTION_PWRTRANS:
+			return STACK_PWRTRANS_LO;
+		default:
+			BUG();
+		}
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+static void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		KBASE_TLSTREAM_AUX_PM_STATE(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and
+ * kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev)
+{
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+	case KBASE_PM_CORE_STACK:
+		return kbdev->gpu_props.props.raw_props.stack_present;
+	default:
+		break;
+	}
+	KBASE_DEBUG_ASSERT(0);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	if (trans) /* Do not progress if any cores are transitioning */
+		return false;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+#if !PLATFORM_POWER_DOWN_ONLY
+	kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+#endif
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ * @tilers_powered: The bit mask of tilers that are desired to be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered,
+		u64 tilers_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	/* Power up the required L2(s) for the tiler */
+	if (tilers_powered)
+		desired |= 1;
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+#ifdef CONFIG_MALI_CORESTACK
+u64 kbase_pm_core_stack_mask(u64 cores)
+{
+	u64 stack_mask = 0;
+	size_t const MAX_CORE_ID = 31;
+	size_t const NUM_CORES_PER_STACK = 4;
+	size_t i;
+
+	for (i = 0; i <= MAX_CORE_ID; ++i) {
+		if (test_bit(i, (unsigned long *)&cores)) {
+			/* Every core which ID >= 16 is filled to stacks 4-7
+			 * instead of 0-3 */
+			size_t const stack_num = (i >= 16) ?
+				(i % NUM_CORES_PER_STACK) + 4 :
+				(i % NUM_CORES_PER_STACK);
+			set_bit(stack_num, (unsigned long *)&stack_mask);
+		}
+	}
+
+	return stack_mask;
+}
+#endif /* CONFIG_MALI_CORESTACK */
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 desired_stack_state;
+	u64 stacks_powered;
+#endif /* CONFIG_MALI_CORESTACK */
+	u64 cores_powered;
+	u64 tilers_powered;
+	u64 tiler_available_bitmap;
+	u64 tiler_transitioning_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+	u64 l2_inuse_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+#ifdef CONFIG_MALI_CORESTACK
+	/* Work out which core stacks want to be powered */
+	desired_stack_state = kbase_pm_core_stack_mask(cores_powered);
+	stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) |
+		desired_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* Work out which tilers want to be powered */
+	tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_TILER);
+	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered, tilers_powered);
+
+	l2_inuse_bitmap = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered | shader_transitioning_bitmap,
+			tilers_powered | tiler_transitioning_bitmap);
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (stacks_powered)
+		desired_l2_state |= 1;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state)
+		desired_l2_state |= 1;
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+			KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap,
+			&l2_available_bitmap,
+			&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_STACK, desired_stack_state, 0,
+				&kbdev->stack_available_bitmap,
+				&kbdev->pm.backend.powering_on_stack_state);
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+#endif
+
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_STACK,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO),
+					NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO), NULL));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO), NULL));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	} else {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_disable_interrupts_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	if (!kbdev->hw_quirks_sc)
+		kbdev->hw_quirks_sc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_CONFIG), NULL);
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+
+	/* Limit read & write ID width for AXI */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) {
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS);
+		kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_ARID_LIMIT & 0x7) <<
+				L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT;
+
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES);
+		kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_AWID_LIMIT & 0x7) <<
+				L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT;
+	} else {
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+		kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+		kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+	}
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	kbdev->hw_quirks_jm = 0;
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm |= (jm_values[0] ?
+				JM_TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ?
+				JM_CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ?
+				JM_JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JM_JOB_THROTTLE_LIMIT_SHIFT);
+
+	} else if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+			   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+					   GPU_ID2_PRODUCT_TMIX)) {
+		/* Only for tMIx */
+		u32 coherency_features;
+
+		coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+		 * documented for tMIx so force correct value here.
+		 */
+		if (coherency_features ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
+			kbdev->hw_quirks_jm |=
+				(COHERENCY_ACE_LITE | COHERENCY_ACE) <<
+				JM_FORCE_COHERENCY_FEATURES_SHIFT;
+		}
+	}
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING))
+		kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE;
+
+	if (!kbdev->hw_quirks_jm)
+		kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JM_CONFIG), NULL);
+
+#ifdef CONFIG_MALI_CORESTACK
+#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
+	kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+#endif /* CONFIG_MALI_CORESTACK */
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+			kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+			kbdev->hw_quirks_jm, NULL);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if (kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+static int kbase_pm_do_reset(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET, NULL);
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	return -EINVAL;
+}
+
+static int kbasep_protected_mode_enable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
+
+static int kbasep_protected_mode_disable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	return kbase_pm_do_reset(kbdev);
+}
+
+struct protected_mode_ops kbase_native_protected_ops = {
+	.protected_mode_enable = kbasep_protected_mode_enable,
+	.protected_mode_disable = kbasep_protected_mode_disable
+};
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+	bool resume_vinstr = false;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support)
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+	else
+		err = kbase_pm_do_reset(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->protected_mode)
+		resume_vinstr = true;
+	kbdev->protected_mode = false;
+	kbase_ipa_model_use_configured_locked(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	if (err)
+		goto exit;
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+		kbase_pm_release_l2_caches(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+exit:
+	/* If GPU is leaving protected mode resume vinstr operation. */
+	if (kbdev->vinstr_ctx && resume_vinstr)
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	return err;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100755
index 0000000..c558736
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,568 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required
+ *                                   and used cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device
+ *
+ * Setup the power management callbacks and initialize/enable the runtime-pm
+ * for the Mali GPU platform device, using the callback function. This must be
+ * called before the kbase_pm_register_access_enable() function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+int kbase_pm_runtime_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_runtime_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
+			       struct kbasep_pm_metrics *last,
+			       struct kbasep_pm_metrics *diff);
+#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold hwaccess_lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100755
index 0000000..6b9b686
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,295 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+#include <backend/gpu/mali_kbase_pm_defs.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_state *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_state, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+
+	kbdev->pm.backend.metrics.values.time_busy = 0;
+	kbdev->pm.backend.metrics.values.time_idle = 0;
+	kbdev->pm.backend.metrics.values.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.values.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.values.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.values.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
+			       struct kbasep_pm_metrics *last,
+			       struct kbasep_pm_metrics *diff)
+{
+	struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get());
+
+	memset(diff, 0, sizeof(*diff));
+	diff->time_busy = cur->time_busy - last->time_busy;
+	diff->time_idle = cur->time_idle - last->time_idle;
+	diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0];
+	diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1];
+	diff->busy_gl = cur->busy_gl - last->busy_gl;
+
+	*last = *cur;
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	int busy;
+	struct kbasep_pm_metrics *diff;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	diff = &kbdev->pm.backend.metrics.dvfs_diff;
+
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff);
+
+	utilisation = (100 * diff->time_busy) /
+			max(diff->time_busy + diff->time_idle, 1u);
+
+	busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
+	util_gl_share = (100 * diff->busy_gl) / busy;
+	util_cl_share[0] = (100 * diff->busy_cl[0]) / busy;
+	util_cl_share[1] = (100 * diff->busy_cl[1]) / busy;
+
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				if (!WARN_ON(device_nr >= 2))
+					kbdev->pm.backend.metrics.
+						active_cl_ctx[device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				if (!WARN_ON(js >= 2))
+					kbdev->pm.backend.metrics.
+						active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100755
index 0000000..1d771e6
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,984 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+	&kbase_pm_coarse_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		kbase_pm_do_poweroff(kbdev, false);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		if (pm->backend.poweroff_wait_in_progress) {
+			KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+			pm->backend.poweron_required = true;
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbase_pm_do_poweron(kbdev, false);
+		}
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				kbase_pm_do_poweroff(kbdev, false);
+			}
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		return;
+
+	if (kbdev->protected_mode_transition &&	!kbdev->shader_needed_bitmap &&
+			!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt) {
+		/* We are trying to change in/out of protected mode - force all
+		 * cores off so that the L2 powers down */
+		desired_bitmap = 0;
+		desired_tiler_bitmap = 0;
+	} else {
+		desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+		desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+			desired_tiler_bitmap = 1;
+		else
+			desired_tiler_bitmap = 0;
+
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+			/* Unless XAFFINITY is supported, enable core 0 if tiler
+			 * required, regardless of core availability */
+			if (kbdev->tiler_needed_cnt > 0 ||
+					kbdev->tiler_inuse_cnt > 0)
+				desired_bitmap |= 1;
+		}
+	}
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks &&
+					!kbdev->protected_mode_transition)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks &&
+				!kbdev->protected_mode_transition)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		return (kbdev->pm.backend.poweroff_wait_in_progress ||
+				kbdev->pm.backend.pm_current_policy == NULL) ?
+				KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* Check for the required L2 transitions.
+	 * Caller would block here for the L2 caches of all core groups to be
+	 * powered on, so need to inform the Hw to power up all the L2 caches.
+	 * Can't rely on the l2_users_count value being non-zero previously to
+	 * avoid checking for the transition, as the count could be non-zero
+	 * even if not all the instances of L2 cache are powered up since
+	 * currently the power status of L2 is not tracked separately for each
+	 * core group. Also if the GPU is reset while the L2 is on, L2 will be
+	 * off but the count will be non-zero.
+	 */
+	kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->l2_users_count++;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100644
index 0000000..852fedd
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,232 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100644
index 0000000..cef0745
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,108 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100644
index 0000000..e1bd263
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,57 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/build.bp b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/build.bp
new file mode 100755
index 0000000..afc39ff
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/build.bp
@@ -0,0 +1,101 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2017-2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+/* Kernel-side tests may include mali_kbase's headers. Therefore any config
+ * options which affect the sizes of any structs (e.g. adding extra members)
+ * must be included in these defaults, so that the structs are consistent in
+ * both mali_kbase and the test modules. */
+bob_defaults {
+    name: "mali_kbase_shared_config_defaults",
+    no_mali: {
+        kbuild_options: ["CONFIG_MALI_NO_MALI=y"],
+    },
+    mali_corestack: {
+        kbuild_options: ["CONFIG_MALI_CORESTACK=y"],
+    },
+    mali_devfreq: {
+        kbuild_options: ["CONFIG_MALI_DEVFREQ=y"],
+    },
+    mali_midgard_dvfs: {
+        kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"],
+    },
+    mali_trace_timeline: {
+        kbuild_options: ["CONFIG_MALI_TRACE_TIMELINE=y"],
+    },
+    mali_debug: {
+        kbuild_options: ["CONFIG_MALI_DEBUG=y"],
+    },
+    mali_fpga_bus_logger: {
+        kbuild_options: ["CONFIG_MALI_FPGA_BUS_LOGGER=y"],
+    },
+    cinstr_job_dump: {
+        kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"],
+    },
+    mali_gator_support: {
+        kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"],
+    },
+    mali_system_trace: {
+        kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"],
+    },
+    kbuild_options: [
+        "MALI_UNIT_TEST={{.unit_test_code}}",
+        "MALI_CUSTOMER_RELEASE={{.release}}",
+        "MALI_KERNEL_TEST_API={{.debug}}",
+    ],
+    defaults: ["kernel_defaults"],
+}
+
+bob_kernel_module {
+    name: "mali_kbase",
+    srcs: [
+        "*.c",
+        "*.h",
+        "Kbuild",
+        "backend/gpu/*.c",
+        "backend/gpu/*.h",
+        "backend/gpu/Kbuild",
+        "ipa/*.c",
+        "ipa/*.h",
+        "ipa/Kbuild",
+        "platform/*.h",
+        "platform/*/*.c",
+        "platform/*/*.h",
+        "platform/*/Kbuild",
+        "thirdparty/*.c",
+    ],
+    kbuild_options: [
+        "CONFIG_MALI_KUTF=n",
+        "CONFIG_MALI_MIDGARD=m",
+        "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
+        "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
+        "MALI_KERNEL_TEST_API={{.unit_test_code}}",
+        "MALI_MOCK_TEST={{.mali_mock_test}}",
+    ],
+    mali_error_inject: {
+        kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
+    },
+    mali_error_inject_random: {
+        kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"],
+    },
+    mali_prfcnt_set_secondary: {
+        kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"],
+    },
+    mali_2mb_alloc: {
+        kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
+    },
+    mali_mock_test: {
+        srcs: ["tests/internal/src/mock/mali_kbase_pm_driver_mock.c"],
+    },
+    defaults: ["mali_kbase_shared_config_defaults"],
+}
+
+optional_subdirs = ["tests"]
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100755
index 0000000..6498dcb
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,132 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100644
index 0000000..a15b558
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,117 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100644
index 0000000..6b87335
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild
new file mode 100755
index 0000000..297d7f9
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	ipa/mali_kbase_ipa_simple.o \
+	ipa/mali_kbase_ipa.o
+
+mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
+
+ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_common.c),)
+  mali_kbase-y += \
+	ipa/mali_kbase_ipa_vinstr_g7x.o \
+	ipa/mali_kbase_ipa_vinstr_common.o
+endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
new file mode 100755
index 0000000..254c1a8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
@@ -0,0 +1,632 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/thermal.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/of.h>
+#include "mali_kbase.h"
+#include "mali_kbase_ipa.h"
+#include "mali_kbase_ipa_debugfs.h"
+#include "mali_kbase_ipa_simple.h"
+#include "backend/gpu/mali_kbase_pm_internal.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#define dev_pm_opp_find_freq_exact opp_find_freq_exact
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp opp
+#endif
+
+#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
+#define KBASE_IPA_G71_MODEL_NAME      "mali-g71-power-model"
+#define KBASE_IPA_G72_MODEL_NAME      "mali-g72-power-model"
+#define KBASE_IPA_TNOX_MODEL_NAME     "mali-tnox-power-model"
+
+static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
+	&kbase_simple_ipa_model_ops,
+	&kbase_g71_ipa_model_ops,
+	&kbase_g72_ipa_model_ops,
+	&kbase_tnox_ipa_model_ops
+};
+
+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
+{
+	int err = 0;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (model->ops->recalculate) {
+		err = model->ops->recalculate(model);
+		if (err) {
+			dev_err(model->kbdev->dev,
+				"recalculation of power model %s returned error %d\n",
+				model->ops->name, err);
+		}
+	}
+
+	return err;
+}
+
+static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
+							    const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
+		struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
+
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
+
+	return NULL;
+}
+
+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
+{
+	atomic_set(&kbdev->ipa_use_configured_model, false);
+}
+
+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
+{
+	atomic_set(&kbdev->ipa_use_configured_model, true);
+}
+
+const char *kbase_ipa_model_name_from_id(u32 gpu_id)
+{
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(prod_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			return KBASE_IPA_G71_MODEL_NAME;
+		case GPU_ID2_PRODUCT_THEX:
+			return KBASE_IPA_G72_MODEL_NAME;
+		case GPU_ID2_PRODUCT_TNOX:
+			return KBASE_IPA_TNOX_MODEL_NAME;
+		case GPU_ID2_PRODUCT_TGOX:
+			if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
+					(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
+				/* TGOX r0 shares a power model with TNOX */
+				return KBASE_IPA_TNOX_MODEL_NAME;
+		default:
+			return KBASE_IPA_FALLBACK_MODEL_NAME;
+		}
+	}
+
+	return KBASE_IPA_FALLBACK_MODEL_NAME;
+}
+
+static struct device_node *get_model_dt_node(struct kbase_ipa_model *model)
+{
+	struct device_node *model_dt_node;
+	char compat_string[64];
+
+	snprintf(compat_string, sizeof(compat_string), "arm,%s",
+		 model->ops->name);
+
+	/* of_find_compatible_node() will call of_node_put() on the root node,
+	 * so take a reference on it first.
+	 */
+	of_node_get(model->kbdev->dev->of_node);
+	model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node,
+						NULL, compat_string);
+	if (!model_dt_node && !model->missing_dt_node_warning) {
+		dev_warn(model->kbdev->dev,
+			 "Couldn't find power_model DT node matching \'%s\'\n",
+			 compat_string);
+		model->missing_dt_node_warning = true;
+	}
+
+	return model_dt_node;
+}
+
+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
+				  const char *name, s32 *addr,
+				  size_t num_elems, bool dt_required)
+{
+	int err, i;
+	struct device_node *model_dt_node = get_model_dt_node(model);
+	char *origin;
+
+	err = of_property_read_u32_array(model_dt_node, name, addr, num_elems);
+	/* We're done with model_dt_node now, so drop the reference taken in
+	 * get_model_dt_node()/of_find_compatible_node().
+	 */
+	of_node_put(model_dt_node);
+
+	if (err && dt_required) {
+		memset(addr, 0, sizeof(s32) * num_elems);
+		dev_warn(model->kbdev->dev,
+			 "Error %d, no DT entry: %s.%s = %zu*[0]\n",
+			 err, model->ops->name, name, num_elems);
+		origin = "zero";
+	} else if (err && !dt_required) {
+		origin = "default";
+	} else /* !err */ {
+		origin = "DT";
+	}
+
+	/* Create a unique debugfs entry for each element */
+	for (i = 0; i < num_elems; ++i) {
+		char elem_name[32];
+
+		if (num_elems == 1)
+			snprintf(elem_name, sizeof(elem_name), "%s", name);
+		else
+			snprintf(elem_name, sizeof(elem_name), "%s.%d",
+				name, i);
+
+		dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
+			model->ops->name, elem_name, addr[i], origin);
+
+		err = kbase_ipa_model_param_add(model, elem_name,
+						&addr[i], sizeof(s32),
+						PARAM_TYPE_S32);
+		if (err)
+			goto exit;
+	}
+exit:
+	return err;
+}
+
+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
+				     const char *name, char *addr,
+				     size_t size, bool dt_required)
+{
+	int err;
+	struct device_node *model_dt_node = get_model_dt_node(model);
+	const char *string_prop_value;
+	char *origin;
+
+	err = of_property_read_string(model_dt_node, name,
+				      &string_prop_value);
+
+	/* We're done with model_dt_node now, so drop the reference taken in
+	 * get_model_dt_node()/of_find_compatible_node().
+	 */
+	of_node_put(model_dt_node);
+
+	if (err && dt_required) {
+		strncpy(addr, "", size - 1);
+		dev_warn(model->kbdev->dev,
+			 "Error %d, no DT entry: %s.%s = \'%s\'\n",
+			 err, model->ops->name, name, addr);
+		err = 0;
+		origin = "zero";
+	} else if (err && !dt_required) {
+		origin = "default";
+	} else /* !err */ {
+		strncpy(addr, string_prop_value, size - 1);
+		origin = "DT";
+	}
+
+	addr[size - 1] = '\0';
+
+	dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n",
+		model->ops->name, name, string_prop_value, origin);
+
+	err = kbase_ipa_model_param_add(model, name, addr, size,
+					PARAM_TYPE_STRING);
+	return err;
+}
+
+void kbase_ipa_term_model(struct kbase_ipa_model *model)
+{
+	if (!model)
+		return;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (model->ops->term)
+		model->ops->term(model);
+
+	kbase_ipa_model_param_free_all(model);
+
+	kfree(model);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_term_model);
+
+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
+					     struct kbase_ipa_model_ops *ops)
+{
+	struct kbase_ipa_model *model;
+	int err;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	if (!ops || !ops->name)
+		return NULL;
+
+	model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL);
+	if (!model)
+		return NULL;
+
+	model->kbdev = kbdev;
+	model->ops = ops;
+	INIT_LIST_HEAD(&model->params);
+
+	err = model->ops->init(model);
+	if (err) {
+		dev_err(kbdev->dev,
+			"init of power model \'%s\' returned error %d\n",
+			ops->name, err);
+		kfree(model);
+		return NULL;
+	}
+
+	err = kbase_ipa_model_recalculate(model);
+	if (err) {
+		kbase_ipa_term_model(model);
+		return NULL;
+	}
+
+	return model;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_init_model);
+
+static void kbase_ipa_term_locked(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	/* Clean up the models */
+	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
+		kbase_ipa_term_model(kbdev->ipa.configured_model);
+	kbase_ipa_term_model(kbdev->ipa.fallback_model);
+
+	kbdev->ipa.configured_model = NULL;
+	kbdev->ipa.fallback_model = NULL;
+}
+
+int kbase_ipa_init(struct kbase_device *kbdev)
+{
+
+	const char *model_name;
+	struct kbase_ipa_model_ops *ops;
+	struct kbase_ipa_model *default_model = NULL;
+	int err;
+
+	mutex_init(&kbdev->ipa.lock);
+	/*
+	 * Lock during init to avoid warnings from lockdep_assert_held (there
+	 * shouldn't be any concurrent access yet).
+	 */
+	mutex_lock(&kbdev->ipa.lock);
+
+	/* The simple IPA model must *always* be present.*/
+	ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME);
+
+	default_model = kbase_ipa_init_model(kbdev, ops);
+	if (!default_model) {
+		err = -EINVAL;
+		goto end;
+	}
+
+	kbdev->ipa.fallback_model = default_model;
+	err = of_property_read_string(kbdev->dev->of_node,
+				      "ipa-model",
+				      &model_name);
+	if (err) {
+		/* Attempt to load a match from GPU-ID */
+		u32 gpu_id;
+
+		gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		model_name = kbase_ipa_model_name_from_id(gpu_id);
+		dev_dbg(kbdev->dev,
+			"Inferring model from GPU ID 0x%x: \'%s\'\n",
+			gpu_id, model_name);
+		err = 0;
+	} else {
+		dev_dbg(kbdev->dev,
+			"Using ipa-model parameter from DT: \'%s\'\n",
+			model_name);
+	}
+
+	if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) {
+		ops = kbase_ipa_model_ops_find(kbdev, model_name);
+		kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops);
+		if (!kbdev->ipa.configured_model) {
+			dev_warn(kbdev->dev,
+				"Failed to initialize ipa-model: \'%s\'\n"
+				"Falling back on default model\n",
+				model_name);
+			kbdev->ipa.configured_model = default_model;
+		}
+	} else {
+		kbdev->ipa.configured_model = default_model;
+	}
+
+	kbase_ipa_model_use_configured_locked(kbdev);
+
+end:
+	if (err)
+		kbase_ipa_term_locked(kbdev);
+	else
+		dev_info(kbdev->dev,
+			 "Using configured power model %s, and fallback %s\n",
+			 kbdev->ipa.configured_model->ops->name,
+			 kbdev->ipa.fallback_model->ops->name);
+
+	mutex_unlock(&kbdev->ipa.lock);
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_init);
+
+void kbase_ipa_term(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->ipa.lock);
+	kbase_ipa_term_locked(kbdev);
+	mutex_unlock(&kbdev->ipa.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_term);
+
+/**
+ * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP
+ * @c:		Dynamic model coefficient, in pW/(Hz V^2). Should be in range
+ *		0 < c < 2^26 to prevent overflow.
+ * @freq:	Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz)
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Keep a record of the approximate range of each value at every stage of the
+ * calculation, to ensure we don't overflow. This makes heavy use of the
+ * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual
+ * calculations in decimal for increased accuracy.
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq,
+				     const u32 voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const u32 v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^3 < f_MHz < 2^10 MHz */
+	const u32 f_MHz = freq / 1000000;
+
+	/* Range: 2^11 < v2f_big < 2^26 kHz V^2 */
+	const u32 v2f_big = v2 * f_MHz;
+
+	/* Range: 2^1 < v2f < 2^16 MHz V^2 */
+	const u32 v2f = v2f_big / 1000;
+
+	/* Range (working backwards from next line): 0 < v2fc < 2^23 uW.
+	 * Must be < 2^42 to avoid overflowing the return value. */
+	const u64 v2fc = (u64) c * (u64) v2f;
+
+	/* Range: 0 < v2fc / 1000 < 2^13 mW */
+	return div_u64(v2fc, 1000);
+}
+
+/**
+ * kbase_scale_static_power() - Scale a static power coefficient to an OPP
+ * @c:		Static model coefficient, in uW/V^3. Should be in range
+ *		0 < c < 2^32 to prevent overflow.
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+u32 kbase_scale_static_power(const u32 c, const u32 voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const u32 v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^17 < v3_big < 2^29 m(V^2) mV */
+	const u32 v3_big = v2 * voltage;
+
+	/* Range: 2^7 < v3 < 2^19 m(V^3) */
+	const u32 v3 = v3_big / 1000;
+
+	/*
+	 * Range (working backwards from next line): 0 < v3c_big < 2^33 nW.
+	 * The result should be < 2^52 to avoid overflowing the return value.
+	 */
+	const u64 v3c_big = (u64) c * (u64) v3;
+
+	/* Range: 0 < v3c_big / 1000000 < 2^13 mW */
+	return div_u64(v3c_big, 1000000);
+}
+
+static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	if (atomic_read(&kbdev->ipa_use_configured_model))
+		return kbdev->ipa.configured_model;
+	else
+		return kbdev->ipa.fallback_model;
+}
+
+static u32 get_static_power_locked(struct kbase_device *kbdev,
+				   struct kbase_ipa_model *model,
+				   unsigned long voltage)
+{
+	u32 power = 0;
+	int err;
+	u32 power_coeff;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (!model->ops->get_static_coeff)
+		model = kbdev->ipa.fallback_model;
+
+	if (model->ops->get_static_coeff) {
+		err = model->ops->get_static_coeff(model, &power_coeff);
+		if (!err)
+			power = kbase_scale_static_power(power_coeff,
+							 (u32) voltage);
+	}
+
+	return power;
+}
+
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static unsigned long kbase_get_static_power(struct devfreq *df,
+					    unsigned long voltage)
+#else
+static unsigned long kbase_get_static_power(unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+	u32 power = 0;
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = get_current_model(kbdev);
+	power = get_static_power_locked(kbdev, model, voltage);
+
+	mutex_unlock(&kbdev->ipa.lock);
+
+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+	kbase_release_device(kbdev);
+#endif
+
+	return power;
+}
+
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static unsigned long kbase_get_dynamic_power(struct devfreq *df,
+					     unsigned long freq,
+					     unsigned long voltage)
+#else
+static unsigned long kbase_get_dynamic_power(unsigned long freq,
+					     unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+	u32 power_coeff = 0, power = 0;
+	int err = 0;
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = kbdev->ipa.fallback_model;
+
+	err = model->ops->get_dynamic_coeff(model, &power_coeff);
+
+	if (!err)
+		power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+	else
+		dev_err_ratelimited(kbdev->dev,
+				    "Model %s returned error code %d\n",
+				    model->ops->name, err);
+
+	mutex_unlock(&kbdev->ipa.lock);
+
+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+	kbase_release_device(kbdev);
+#endif
+
+	return power;
+}
+
+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
+				unsigned long freq,
+				unsigned long voltage)
+{
+	struct kbase_ipa_model *model;
+	u32 power_coeff = 0;
+	int err = 0;
+	struct kbasep_pm_metrics diff;
+	u64 total_time;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff);
+
+	model = get_current_model(kbdev);
+
+	err = model->ops->get_dynamic_coeff(model, &power_coeff);
+
+	/* If the counter model returns an error (e.g. switching back to
+	 * protected mode and failing to read counters, or a counter sample
+	 * with too few cycles), revert to the fallback model.
+	 */
+	if (err && model != kbdev->ipa.fallback_model) {
+		model = kbdev->ipa.fallback_model;
+		err = model->ops->get_dynamic_coeff(model, &power_coeff);
+	}
+
+	if (err)
+		return err;
+
+	*power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+
+	/* time_busy / total_time cannot be >1, so assigning the 64-bit
+	 * result of div_u64 to *power cannot overflow.
+	 */
+	total_time = diff.time_busy + (u64) diff.time_idle;
+	*power = div_u64(*power * (u64) diff.time_busy,
+			 max(total_time, 1ull));
+
+	*power += get_static_power_locked(kbdev, model, voltage);
+
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_get_real_power_locked);
+
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage)
+{
+	int ret;
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+
+	mutex_lock(&kbdev->ipa.lock);
+	ret = kbase_get_real_power_locked(kbdev, power, freq, voltage);
+	mutex_unlock(&kbdev->ipa.lock);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_get_real_power);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops kbase_ipa_power_model_ops = {
+#else
+struct devfreq_cooling_power kbase_ipa_power_model_ops = {
+#endif
+	.get_static_power = &kbase_get_static_power,
+	.get_dynamic_power = &kbase_get_dynamic_power,
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	.get_real_power = &kbase_get_real_power,
+#endif
+};
+KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
new file mode 100755
index 0000000..e215c2c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
@@ -0,0 +1,228 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_H_
+#define _KBASE_IPA_H_
+
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+
+struct devfreq;
+
+/**
+ * struct kbase_ipa_model - Object describing a particular IPA model.
+ * @kbdev:                    pointer to kbase device
+ * @model_data:               opaque pointer to model specific data, accessed
+ *                            only by model specific methods.
+ * @ops:                      pointer to object containing model specific methods.
+ * @params:                   head of the list of debugfs params added for model
+ * @missing_dt_node_warning:  flag to limit the matching power model DT not found
+ *                            warning to once.
+ */
+struct kbase_ipa_model {
+	struct kbase_device *kbdev;
+	void *model_data;
+	struct kbase_ipa_model_ops *ops;
+	struct list_head params;
+	bool missing_dt_node_warning;
+};
+
+/**
+ * kbase_ipa_model_add_param_s32 - Add an integer model parameter
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @addr:	address where the value is stored
+ * @num_elems:	number of elements (1 if not an array)
+ * @dt_required: if false, a corresponding devicetree entry is not required,
+ *		 and the current value will be used. If true, a warning is
+ *		 output and the data is zeroed
+ *
+ * Return: 0 on success, or an error code
+ */
+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
+				  const char *name, s32 *addr,
+				  size_t num_elems, bool dt_required);
+
+/**
+ * kbase_ipa_model_add_param_string - Add a string model parameter
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @addr:	address where the value is stored
+ * @size:	size, in bytes, of the value storage (so the maximum string
+ *		length is size - 1)
+ * @dt_required: if false, a corresponding devicetree entry is not required,
+ *		 and the current value will be used. If true, a warning is
+ *		 output and the data is zeroed
+ *
+ * Return: 0 on success, or an error code
+ */
+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
+				     const char *name, char *addr,
+				     size_t size, bool dt_required);
+
+struct kbase_ipa_model_ops {
+	char *name;
+	/* The init, recalculate and term ops on the default model are always
+	 * called.  However, all the other models are only invoked if the model
+	 * is selected in the device tree. Otherwise they are never
+	 * initialized. Additional resources can be acquired by models in
+	 * init(), however they must be terminated in the term().
+	 */
+	int (*init)(struct kbase_ipa_model *model);
+	/* Called immediately after init(), or when a parameter is changed, so
+	 * that any coefficients derived from model parameters can be
+	 * recalculated. */
+	int (*recalculate)(struct kbase_ipa_model *model);
+	void (*term)(struct kbase_ipa_model *model);
+	/*
+	 * get_dynamic_coeff() - calculate dynamic power coefficient
+	 * @model:		pointer to model
+	 * @coeffp:		pointer to return value location
+	 *
+	 * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which
+	 * is then scaled by the IPA framework according to the current OPP's
+	 * frequency and voltage.
+	 *
+	 * Return: 0 on success, or an error code.
+	 */
+	int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
+	/*
+	 * get_static_coeff() - calculate static power coefficient
+	 * @model:		pointer to model
+	 * @coeffp:		pointer to return value location
+	 *
+	 * Calculate a static power coefficient, with units uW/(V^3), which is
+	 * scaled by the IPA framework according to the current OPP's voltage.
+	 *
+	 * Return: 0 on success, or an error code.
+	 */
+	int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
+};
+
+/**
+ * kbase_ipa_init - Initialize the IPA feature
+ * @kbdev:      pointer to kbase device
+ *
+ * simple IPA power model is initialized as a fallback model and if that
+ * initialization fails then IPA is not used.
+ * The device tree is read for the name of ipa model to be used, by using the
+ * property string "ipa-model". If that ipa model is supported then it is
+ * initialized but if the initialization fails then simple power model is used.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_term - Terminate the IPA feature
+ * @kbdev:      pointer to kbase device
+ *
+ * Both simple IPA power model and model retrieved from device tree are
+ * terminated.
+ */
+void kbase_ipa_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_model_recalculate - Recalculate the model coefficients
+ * @model:      pointer to the IPA model object, already initialized
+ *
+ * It shall be called immediately after the model has been initialized
+ * or when the model parameter has changed, so that any coefficients
+ * derived from parameters can be recalculated.
+ * Its a wrapper for the module specific recalculate() method.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_init_model - Initilaize the particular IPA model
+ * @kbdev:      pointer to the IPA model object, already initialized
+ * @ops:        pointer to object containing model specific methods.
+ *
+ * Initialize the model corresponding to the @ops pointer passed.
+ * The init() method specified in @ops would be called.
+ *
+ * Return: pointer to kbase_ipa_model on success, NULL on error
+ */
+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
+					     struct kbase_ipa_model_ops *ops);
+/**
+ * kbase_ipa_term_model - Terminate the particular IPA model
+ * @model:      pointer to the IPA model object, already initialized
+ *
+ * Terminate the model, using the term() method.
+ * Module specific parameters would be freed.
+ */
+void kbase_ipa_term_model(struct kbase_ipa_model *model);
+
+/* Switch to the fallback model */
+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev);
+
+/* Switch to the model retrieved from device tree */
+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev);
+
+extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
+extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
+extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_get_real_power() - get the real power consumption of the GPU
+ * @df: dynamic voltage and frequency scaling information for the GPU.
+ * @power: where to store the power consumption, in mW.
+ * @freq: a frequency, in HZ.
+ * @voltage: a voltage, in mV.
+ *
+ * This function is only exposed for use by unit tests. The returned value
+ * incorporates both static and dynamic power consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage);
+
+/* Called by kbase_get_real_power() to invoke the power models.
+ * Must be called with kbdev->ipa.lock held.
+ */
+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
+				unsigned long freq,
+				unsigned long voltage);
+#endif /* MALI_UNIT_TEST */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops kbase_ipa_power_model_ops;
+#else
+extern struct devfreq_cooling_power kbase_ipa_power_model_ops;
+#endif
+
+#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+static inline void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
+{ }
+
+static inline void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
+{ }
+
+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
new file mode 100644
index 0000000..029023c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
@@ -0,0 +1,268 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_ipa.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0))
+#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
+#endif
+
+struct kbase_ipa_model_param {
+	char *name;
+	union {
+		void *voidp;
+		s32 *s32p;
+		char *str;
+	} addr;
+	size_t size;
+	enum kbase_ipa_model_param_type type;
+	struct kbase_ipa_model *model;
+	struct list_head link;
+};
+
+static int param_int_get(void *data, u64 *val)
+{
+	struct kbase_ipa_model_param *param = data;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	*(s64 *) val = *param->addr.s32p;
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return 0;
+}
+
+static int param_int_set(void *data, u64 val)
+{
+	struct kbase_ipa_model_param *param = data;
+	struct kbase_ipa_model *model = param->model;
+	s64 sval = (s64) val;
+	s32 old_val;
+	int err = 0;
+
+	if (sval < S32_MIN || sval > S32_MAX)
+		return -ERANGE;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	old_val = *param->addr.s32p;
+	*param->addr.s32p = val;
+	err = kbase_ipa_model_recalculate(model);
+	if (err < 0)
+		*param->addr.s32p = old_val;
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return err;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n");
+
+static ssize_t param_string_get(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct kbase_ipa_model_param *param = file->private_data;
+	ssize_t ret;
+	size_t len;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	len = strnlen(param->addr.str, param->size - 1) + 1;
+	ret = simple_read_from_buffer(user_buf, count, ppos,
+				      param->addr.str, len);
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return ret;
+}
+
+static ssize_t param_string_set(struct file *file, const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct kbase_ipa_model_param *param = file->private_data;
+	struct kbase_ipa_model *model = param->model;
+	char *old_str = NULL;
+	ssize_t ret = count;
+	size_t buf_size;
+	int err;
+
+	mutex_lock(&model->kbdev->ipa.lock);
+
+	if (count > param->size) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	old_str = kstrndup(param->addr.str, param->size, GFP_KERNEL);
+	if (!old_str) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	buf_size = min(param->size - 1, count);
+	if (copy_from_user(param->addr.str, user_buf, buf_size)) {
+		ret = -EFAULT;
+		goto end;
+	}
+
+	param->addr.str[buf_size] = '\0';
+
+	err = kbase_ipa_model_recalculate(model);
+	if (err < 0) {
+		ret = err;
+		strlcpy(param->addr.str, old_str, param->size);
+	}
+
+end:
+	kfree(old_str);
+	mutex_unlock(&model->kbdev->ipa.lock);
+
+	return ret;
+}
+
+static const struct file_operations fops_string = {
+	.read = param_string_get,
+	.write = param_string_set,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
+			      void *addr, size_t size,
+			      enum kbase_ipa_model_param_type type)
+{
+	struct kbase_ipa_model_param *param;
+
+	param = kzalloc(sizeof(*param), GFP_KERNEL);
+
+	if (!param)
+		return -ENOMEM;
+
+	/* 'name' is stack-allocated for array elements, so copy it into
+	 * heap-allocated storage */
+	param->name = kstrdup(name, GFP_KERNEL);
+
+	if (!param->name) {
+		kfree(param);
+		return -ENOMEM;
+	}
+
+	param->addr.voidp = addr;
+	param->size = size;
+	param->type = type;
+	param->model = model;
+
+	list_add(&param->link, &model->params);
+
+	return 0;
+}
+
+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_param *param_p, *param_n;
+
+	list_for_each_entry_safe(param_p, param_n, &model->params, link) {
+		list_del(&param_p->link);
+		kfree(param_p->name);
+		kfree(param_p);
+	}
+}
+
+static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
+{
+	struct list_head *it;
+	struct dentry *dir;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	dir = debugfs_create_dir(model->ops->name,
+				 model->kbdev->mali_debugfs_directory);
+
+	if (!dir) {
+		dev_err(model->kbdev->dev,
+			"Couldn't create mali debugfs %s directory",
+			model->ops->name);
+		return;
+	}
+
+	list_for_each(it, &model->params) {
+		struct kbase_ipa_model_param *param =
+				list_entry(it,
+					   struct kbase_ipa_model_param,
+					   link);
+		const struct file_operations *fops = NULL;
+
+		switch (param->type) {
+		case PARAM_TYPE_S32:
+			fops = &fops_s32;
+			break;
+		case PARAM_TYPE_STRING:
+			fops = &fops_string;
+			break;
+		}
+
+		if (unlikely(!fops)) {
+			dev_err(model->kbdev->dev,
+				"Type not set for %s parameter %s\n",
+				model->ops->name, param->name);
+		} else {
+			debugfs_create_file(param->name, S_IRUGO | S_IWUSR,
+					    dir, param, fops);
+		}
+	}
+}
+
+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+	const char *name, s32 val)
+{
+	struct kbase_ipa_model_param *param;
+
+	mutex_lock(&model->kbdev->ipa.lock);
+
+	list_for_each_entry(param, &model->params, link) {
+		if (!strcmp(param->name, name)) {
+			if (param->type == PARAM_TYPE_S32) {
+				*param->addr.s32p = val;
+			} else {
+				dev_err(model->kbdev->dev,
+					"Wrong type for %s parameter %s\n",
+					model->ops->name, param->name);
+			}
+			break;
+		}
+	}
+
+	mutex_unlock(&model->kbdev->ipa.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32);
+
+void kbase_ipa_debugfs_init(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->ipa.lock);
+
+	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
+		kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model);
+	kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model);
+
+	mutex_unlock(&kbdev->ipa.lock);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h
new file mode 100644
index 0000000..a983d9c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_DEBUGFS_H_
+#define _KBASE_IPA_DEBUGFS_H_
+
+enum kbase_ipa_model_param_type {
+	PARAM_TYPE_S32 = 1,
+	PARAM_TYPE_STRING,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbase_ipa_debugfs_init(struct kbase_device *kbdev);
+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
+			      void *addr, size_t size,
+			      enum kbase_ipa_model_param_type type);
+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_model_param_set_s32 - Set an integer model parameter
+ *
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @val:	new value of the parameter
+ *
+ * This function is only exposed for use by unit tests running in
+ * kernel space. Normally it is expected that parameter values will
+ * instead be set via debugfs.
+ */
+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+	const char *name, s32 val);
+
+#else /* CONFIG_DEBUG_FS */
+
+static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model,
+					    const char *name, void *addr,
+					    size_t size,
+					    enum kbase_ipa_model_param_type type)
+{
+	return 0;
+}
+
+static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
+{ }
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* _KBASE_IPA_DEBUGFS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
new file mode 100755
index 0000000..e684df4
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
@@ -0,0 +1,350 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <uapi/linux/thermal.h>
+#include <linux/thermal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#include <linux/of.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_ipa_simple.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#if MALI_UNIT_TEST
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+static unsigned long dummy_temp;
+
+static int kbase_simple_power_model_get_dummy_temp(
+	struct thermal_zone_device *tz,
+	unsigned long *temp)
+{
+	*temp = READ_ONCE(dummy_temp);
+	return 0;
+}
+
+#else
+static int dummy_temp;
+
+static int kbase_simple_power_model_get_dummy_temp(
+	struct thermal_zone_device *tz,
+	int *temp)
+{
+	*temp = READ_ONCE(dummy_temp);
+	return 0;
+}
+#endif
+
+/* Intercept calls to the kernel function using a macro */
+#ifdef thermal_zone_get_temp
+#undef thermal_zone_get_temp
+#endif
+#define thermal_zone_get_temp(tz, temp) \
+	kbase_simple_power_model_get_dummy_temp(tz, temp)
+
+void kbase_simple_power_model_set_dummy_temp(int temp)
+{
+	WRITE_ONCE(dummy_temp, temp);
+}
+KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp);
+
+#endif /* MALI_UNIT_TEST */
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms. The additional resources in this model
+ * should preferably be minimal, as this model is rarely used when a dynamic
+ * model is available.
+ */
+
+/**
+ * struct kbase_ipa_model_simple_data - IPA context per device
+ * @dynamic_coefficient: dynamic coefficient of the model
+ * @static_coefficient:  static coefficient of the model
+ * @ts:                  Thermal scaling coefficients of the model
+ * @tz_name:             Thermal zone name
+ * @gpu_tz:              thermal zone device
+ * @poll_temperature_thread: Handle for temperature polling thread
+ * @current_temperature: Most recent value of polled temperature
+ * @temperature_poll_interval_ms: How often temperature should be checked, in ms
+ */
+
+struct kbase_ipa_model_simple_data {
+	u32 dynamic_coefficient;
+	u32 static_coefficient;
+	s32 ts[4];
+	char tz_name[THERMAL_NAME_LENGTH];
+	struct thermal_zone_device *gpu_tz;
+	struct task_struct *poll_temperature_thread;
+	int current_temperature;
+	int temperature_poll_interval_ms;
+};
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+/**
+ * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient
+ * @ts:		Signed coefficients, in order t^0 to t^3, with units Deg^-N
+ * @t:		Temperature, in mDeg C. Range: -2^17 < t < 2^17
+ *
+ * Scale the temperature according to a cubic polynomial whose coefficients are
+ * provided in the device tree. The result is used to scale the static power
+ * coefficient, where 1000000 means no change.
+ *
+ * Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000.
+ */
+static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t)
+{
+	/* Range: -2^24 < t2 < 2^24 m(Deg^2) */
+	const s64 t2 = div_s64((t * t), 1000);
+
+	/* Range: -2^31 < t3 < 2^31 m(Deg^3) */
+	const s64 t3 = div_s64((t * t2), 1000);
+
+	/*
+	 * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in
+	 * Deg^-N, so we need to multiply the last coefficient by 1000.
+	 * Range: -2^63 < res_big < 2^63
+	 */
+	const s64 res_big = ts[3] * t3    /* +/- 2^62 */
+			  + ts[2] * t2    /* +/- 2^55 */
+			  + ts[1] * t     /* +/- 2^48 */
+			  + ts[0] * 1000; /* +/- 2^41 */
+
+	/* Range: -2^60 < res_unclamped < 2^60 */
+	s64 res_unclamped = div_s64(res_big, 1000);
+
+	/* Clamp to range of 0x to 10x the static power */
+	return clamp(res_unclamped, (s64) 0, (s64) 10000000);
+}
+
+/* We can't call thermal_zone_get_temp() directly in model_static_coeff(),
+ * because we don't know if tz->lock is held in the same thread. So poll it in
+ * a separate thread to get around this. */
+static int poll_temperature(void *data)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *) data;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temp;
+#else
+	int temp;
+#endif
+
+	while (!kthread_should_stop()) {
+		struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz);
+
+		if (tz) {
+			int ret;
+
+			ret = thermal_zone_get_temp(tz, &temp);
+			if (ret) {
+				pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+						    ret);
+				temp = FALLBACK_STATIC_TEMPERATURE;
+			}
+		} else {
+			temp = FALLBACK_STATIC_TEMPERATURE;
+		}
+
+		WRITE_ONCE(model_data->current_temperature, temp);
+
+		msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms));
+	}
+
+	return 0;
+}
+
+static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	u32 temp_scaling_factor;
+	struct kbase_ipa_model_simple_data *model_data =
+		(struct kbase_ipa_model_simple_data *) model->model_data;
+	u64 coeff_big;
+	int temp;
+
+	temp = READ_ONCE(model_data->current_temperature);
+
+	/* Range: 0 <= temp_scaling_factor < 2^24 */
+	temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts,
+							    temp);
+
+	/*
+	 * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This
+	 * means static_coefficient must be in range
+	 * 0 <= static_coefficient < 2^28.
+	 */
+	coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor;
+	*coeffp = div_u64(coeff_big, 1000000);
+
+	return 0;
+}
+
+static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+		(struct kbase_ipa_model_simple_data *) model->model_data;
+
+	*coeffp = model_data->dynamic_coefficient;
+
+	return 0;
+}
+
+static int add_params(struct kbase_ipa_model *model)
+{
+	int err = 0;
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+
+	err = kbase_ipa_model_add_param_s32(model, "static-coefficient",
+					    &model_data->static_coefficient,
+					    1, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient",
+					    &model_data->dynamic_coefficient,
+					    1, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_s32(model, "ts",
+					    model_data->ts, 4, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_string(model, "thermal-zone",
+					       model_data->tz_name,
+					       sizeof(model_data->tz_name), true);
+	if (err)
+		goto end;
+
+	model_data->temperature_poll_interval_ms = 200;
+	err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms",
+					    &model_data->temperature_poll_interval_ms,
+					    1, false);
+
+end:
+	return err;
+}
+
+static int kbase_simple_power_model_init(struct kbase_ipa_model *model)
+{
+	int err;
+	struct kbase_ipa_model_simple_data *model_data;
+
+	model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data),
+			     GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model->model_data = (void *) model_data;
+
+	model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE;
+	model_data->poll_temperature_thread = kthread_run(poll_temperature,
+							  (void *) model_data,
+							  "mali-simple-power-model-temp-poll");
+	if (IS_ERR(model_data->poll_temperature_thread)) {
+		kfree(model_data);
+		return PTR_ERR(model_data->poll_temperature_thread);
+	}
+
+	err = add_params(model);
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kthread_stop(model_data->poll_temperature_thread);
+		kfree(model_data);
+	}
+
+	return err;
+}
+
+static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+	struct thermal_zone_device *tz;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) {
+		model_data->gpu_tz = NULL;
+	} else {
+		char tz_name[THERMAL_NAME_LENGTH];
+
+		strlcpy(tz_name, model_data->tz_name, sizeof(tz_name));
+
+		/* Release ipa.lock so that thermal_list_lock is not acquired
+		 * with ipa.lock held, thereby avoid lock ordering violation
+		 * lockdep warning. The warning comes as a chain of locks
+		 * ipa.lock --> thermal_list_lock --> tz->lock gets formed
+		 * on registering devfreq cooling device when probe method
+		 * of mali platform driver is invoked.
+		 */
+		mutex_unlock(&model->kbdev->ipa.lock);
+		tz = thermal_zone_get_zone_by_name(tz_name);
+		mutex_lock(&model->kbdev->ipa.lock);
+
+		if (IS_ERR_OR_NULL(tz)) {
+			pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n",
+					    PTR_ERR(tz), tz_name);
+			return -EPROBE_DEFER;
+		}
+
+		/* Check if another thread raced against us & updated the
+		 * thermal zone name string. Update the gpu_tz pointer only if
+		 * the name string did not change whilst we retrieved the new
+		 * thermal_zone_device pointer, otherwise model_data->tz_name &
+		 * model_data->gpu_tz would become inconsistent with each other.
+		 * The below check will succeed only for the thread which last
+		 * updated the name string.
+		 */
+		if (strncmp(tz_name, model_data->tz_name, sizeof(tz_name)) == 0)
+			model_data->gpu_tz = tz;
+	}
+
+	return 0;
+}
+
+static void kbase_simple_power_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+
+	kthread_stop(model_data->poll_temperature_thread);
+
+	kfree(model_data);
+}
+
+struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = {
+		.name = "mali-simple-power-model",
+		.init = &kbase_simple_power_model_init,
+		.recalculate = &kbase_simple_power_model_recalculate,
+		.term = &kbase_simple_power_model_term,
+		.get_dynamic_coeff = &model_dynamic_coeff,
+		.get_static_coeff = &model_static_coeff,
+};
+KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h
new file mode 100644
index 0000000..fed67d5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_SIMPLE_H_
+#define _KBASE_IPA_SIMPLE_H_
+
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+
+extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops;
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value
+ * @temp: Temperature of the thermal zone, in millidegrees celsius.
+ *
+ * This is only intended for use in unit tests, to ensure that the temperature
+ * values used by the simple power model are predictable. Deterministic
+ * behavior is necessary to allow validation of the static power values
+ * computed by this model.
+ */
+void kbase_simple_power_model_set_dummy_temp(int temp);
+#endif /* MALI_UNIT_TEST */
+
+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+#endif /* _KBASE_IPA_SIMPLE_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
new file mode 100755
index 0000000..4019657
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
@@ -0,0 +1,338 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#define DEFAULT_SCALING_FACTOR 5
+
+/* If the value of GPU_ACTIVE is below this, use the simple model
+ * instead, to avoid extrapolating small amounts of counter data across
+ * large sample periods.
+ */
+#define DEFAULT_MIN_SAMPLE_CYCLES 10000
+
+/**
+ * read_hwcnt() - read a counter value
+ * @model_data:		pointer to model data
+ * @offset:		offset, in bytes, into vinstr buffer
+ *
+ * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be
+ * incrementing every cycle over a ~100ms sample period at a high frequency,
+ * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27.
+ */
+static inline u32 kbase_ipa_read_hwcnt(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	u32 offset)
+{
+	u8 *p = model_data->vinstr_buffer;
+
+	return *(u32 *)&p[offset];
+}
+
+static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
+{
+	if (S64_MAX - a < b)
+		return S64_MAX;
+	return a + b;
+}
+
+s64 kbase_ipa_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	u64 core_mask;
+	u32 base = 0;
+	s64 ret = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			/* 0 < counter_value < 2^27 */
+			u32 counter_value = kbase_ipa_read_hwcnt(model_data,
+						       base + counter);
+
+			/* 0 < ret < 2^27 * max_num_cores = 2^32 */
+			ret = kbase_ipa_add_saturate(ret, counter_value);
+		}
+		base += KBASE_IPA_NR_BYTES_PER_BLOCK;
+		core_mask >>= 1;
+	}
+
+	/* Range: -2^54 < ret * coeff < 2^54 */
+	return ret * coeff;
+}
+
+s64 kbase_ipa_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	/* Range: 0 < counter_value < 2^27 */
+	const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter);
+
+	/* Range: -2^49 < ret < 2^49 */
+	return counter_value * (s64) coeff;
+}
+
+/**
+ * kbase_ipa_gpu_active - Inform IPA that GPU is now active
+ * @model_data: Pointer to model data
+ *
+ * This function may cause vinstr to become active.
+ */
+static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (!kbdev->ipa.vinstr_active) {
+		kbdev->ipa.vinstr_active = true;
+		kbase_vinstr_resume_client(model_data->vinstr_cli);
+	}
+}
+
+/**
+ * kbase_ipa_gpu_idle - Inform IPA that GPU is now idle
+ * @model_data: Pointer to model data
+ *
+ * This function may cause vinstr to become idle.
+ */
+static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->ipa.vinstr_active) {
+		kbase_vinstr_suspend_client(model_data->vinstr_cli);
+		kbdev->ipa.vinstr_active = false;
+	}
+}
+
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	struct kbase_ioctl_hwcnt_reader_setup setup;
+	size_t dump_size;
+
+	dump_size = kbase_vinstr_dump_size(kbdev);
+	model_data->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!model_data->vinstr_buffer) {
+		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+		return -1;
+	}
+
+	setup.jm_bm = ~0u;
+	setup.shader_bm = ~0u;
+	setup.tiler_bm = ~0u;
+	setup.mmu_l2_bm = ~0u;
+	model_data->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
+			&setup, model_data->vinstr_buffer);
+	if (!model_data->vinstr_cli) {
+		dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
+		kfree(model_data->vinstr_buffer);
+		model_data->vinstr_buffer = NULL;
+		return -1;
+	}
+
+	kbase_vinstr_hwc_clear(model_data->vinstr_cli);
+
+	kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active;
+	kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle;
+	kbdev->ipa.model_data = model_data;
+	kbdev->ipa.vinstr_active = false;
+	/* Suspend vinstr, to ensure that the GPU is powered off until there is
+	 * something to execute.
+	 */
+	kbase_vinstr_suspend_client(model_data->vinstr_cli);
+
+	return 0;
+}
+
+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	kbdev->ipa.gpu_active_callback = NULL;
+	kbdev->ipa.gpu_idle_callback = NULL;
+	kbdev->ipa.model_data = NULL;
+	kbdev->ipa.vinstr_active = false;
+
+	if (model_data->vinstr_cli)
+		kbase_vinstr_detach_client(model_data->vinstr_cli);
+
+	model_data->vinstr_cli = NULL;
+	kfree(model_data->vinstr_buffer);
+	model_data->vinstr_buffer = NULL;
+}
+
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+	struct kbase_device *kbdev = model_data->kbdev;
+	s64 energy = 0;
+	size_t i;
+	u64 coeff = 0, coeff_mul = 0;
+	u32 active_cycles;
+	int err = 0;
+
+	if (!kbdev->ipa.vinstr_active)
+		goto err0; /* GPU powered off - no counters to collect */
+
+	err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
+				    BASE_HWCNT_READER_EVENT_MANUAL);
+	if (err)
+		goto err0;
+
+	/* Range: 0 (GPU not used at all), to the max sampling interval, say
+	 * 1s, * max GPU frequency (GPU 100% utilized).
+	 * 0 <= active_cycles <= 1 * ~2GHz
+	 * 0 <= active_cycles < 2^31
+	 */
+	active_cycles = model_data->get_active_cycles(model_data);
+
+	if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) {
+		err = -ENODATA;
+		goto err0;
+	}
+
+	/* Range: 1 <= active_cycles < 2^31 */
+	active_cycles = max(1u, active_cycles);
+
+	/* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around
+	 * -2^57 < energy < 2^57
+	 */
+	for (i = 0; i < model_data->groups_def_num; i++) {
+		const struct kbase_ipa_group *group = &model_data->groups_def[i];
+		s32 coeff = model_data->group_values[i];
+		s64 group_energy = group->op(model_data, coeff,
+					     group->counter_block_offset);
+
+		energy = kbase_ipa_add_saturate(energy, group_energy);
+	}
+
+	/* Range: 0 <= coeff < 2^57 */
+	if (energy > 0)
+		coeff = energy;
+
+	/* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this
+	 * can be constrained further: Counter values can only be increased by
+	 * a theoretical maximum of about 64k per clock cycle. Beyond this,
+	 * we'd have to sample every 1ms to avoid them overflowing at the
+	 * lowest clock frequency (say 100MHz). Therefore, we can write the
+	 * range of 'coeff' in terms of active_cycles:
+	 *
+	 * coeff = SUM(coeffN * counterN * num_cores_for_counterN)
+	 * coeff <= SUM(coeffN * counterN) * max_num_cores
+	 * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores
+	 *       (substitute max_counter = 2^16 * active_cycles)
+	 * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores
+	 * coeff <=    2^3         *    2^22   * 2^16 * active_cycles * 2^5
+	 * coeff <= 2^46 * active_cycles
+	 *
+	 * So after the division: 0 <= coeff <= 2^46
+	 */
+	coeff = div_u64(coeff, active_cycles);
+
+	/* Scale by user-specified factor (where unity is 1000).
+	 * Range: 0 <= coeff_mul < 2^61
+	 */
+	coeff_mul = coeff * model_data->scaling_factor;
+
+	/* Range: 0 <= coeff_mul < 2^51 */
+	coeff_mul = div_u64(coeff_mul, 1000u);
+
+err0:
+	/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
+	*coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16);
+	return err;
+}
+
+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
+				       const struct kbase_ipa_group *ipa_groups_def,
+				       size_t ipa_group_size,
+				       kbase_ipa_get_active_cycles_callback get_active_cycles)
+{
+	int err = 0;
+	size_t i;
+	struct kbase_ipa_model_vinstr_data *model_data;
+
+	if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles)
+		return -EINVAL;
+
+	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model_data->kbdev = model->kbdev;
+	model_data->groups_def = ipa_groups_def;
+	model_data->groups_def_num = ipa_group_size;
+	model_data->get_active_cycles = get_active_cycles;
+
+	model->model_data = (void *) model_data;
+
+	for (i = 0; i < model_data->groups_def_num; ++i) {
+		const struct kbase_ipa_group *group = &model_data->groups_def[i];
+
+		model_data->group_values[i] = group->default_value;
+		err = kbase_ipa_model_add_param_s32(model, group->name,
+					&model_data->group_values[i],
+					1, false);
+		if (err)
+			goto exit;
+	}
+
+	model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
+	err = kbase_ipa_model_add_param_s32(model, "scale",
+					    &model_data->scaling_factor,
+					    1, false);
+	if (err)
+		goto exit;
+
+	model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
+	err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
+					    &model_data->min_sample_cycles,
+					    1, false);
+	if (err)
+		goto exit;
+
+	err = kbase_ipa_attach_vinstr(model_data);
+
+exit:
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kfree(model_data);
+	}
+	return err;
+}
+
+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+
+	kbase_ipa_detach_vinstr(model_data);
+	kfree(model_data);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
new file mode 100755
index 0000000..c9288e8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
@@ -0,0 +1,189 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_VINSTR_COMMON_H_
+#define _KBASE_IPA_VINSTR_COMMON_H_
+
+#include "mali_kbase.h"
+
+/* Maximum number of IPA groups for an IPA model. */
+#define KBASE_IPA_MAX_GROUP_DEF_NUM  16
+
+/* Number of bytes per hardware counter in a vinstr_buffer. */
+#define KBASE_IPA_NR_BYTES_PER_CNT    4
+
+/* Number of hardware counters per block in a vinstr_buffer. */
+#define KBASE_IPA_NR_CNT_PER_BLOCK   64
+
+/* Number of bytes per block in a vinstr_buffer. */
+#define KBASE_IPA_NR_BYTES_PER_BLOCK \
+	(KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT)
+
+struct kbase_ipa_model_vinstr_data;
+
+typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinstr_data *);
+
+/**
+ * struct kbase_ipa_model_vinstr_data - IPA context per device
+ * @kbdev:               pointer to kbase device
+ * @groups_def:          Array of IPA groups.
+ * @groups_def_num:      Number of elements in the array of IPA groups.
+ * @get_active_cycles:   Callback to return number of active cycles during
+ *                       counter sample period
+ * @vinstr_cli:          vinstr client handle
+ * @vinstr_buffer:       buffer to dump hardware counters onto
+ * @scaling_factor:      user-specified power scaling factor. This is
+ *                       interpreted as a fraction where the denominator is
+ *                       1000. Range approx 0.0-32.0:
+ *                       0 < scaling_factor < 2^15
+ * @min_sample_cycles:   If the value of the GPU_ACTIVE counter (the number of
+ *                       cycles the GPU was working) is less than
+ *                       min_sample_cycles, the counter model will return an
+ *                       error, causing the IPA framework to approximate using
+ *                       the cached simple model results instead. This may be
+ *                       more accurate than extrapolating  using a very small
+ *                       counter dump.
+ */
+struct kbase_ipa_model_vinstr_data {
+	struct kbase_device *kbdev;
+	s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM];
+	const struct kbase_ipa_group *groups_def;
+	size_t groups_def_num;
+	kbase_ipa_get_active_cycles_callback get_active_cycles;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	s32 scaling_factor;
+	s32 min_sample_cycles;
+};
+
+/**
+ * struct ipa_group - represents a single IPA group
+ * @name:               name of the IPA group
+ * @default_value:      default value of coefficient for IPA group.
+ *                      Coefficients are interpreted as fractions where the
+ *                      denominator is 1000000.
+ * @op:                 which operation to be performed on the counter values
+ * @counter_block_offset:  block offset in bytes of the counter used to calculate energy for IPA group
+ */
+struct kbase_ipa_group {
+	const char *name;
+	s32 default_value;
+	s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
+	u32 counter_block_offset;
+};
+
+/**
+ * sum_all_shader_cores() - sum a counter over all cores
+ * @model_data		pointer to model data
+ * @coeff		model coefficient. Unity is ~2^20, so range approx
+ * +/- 4.0: -2^22 < coeff < 2^22
+ * @counter     offset in bytes of the counter used to calculate energy for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter'
+ * across all shader cores.
+ *
+ * Return: Sum of counter values. Range: -2^54 < ret < 2^54
+ */
+s64 kbase_ipa_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * sum_single_counter() - sum a single counter
+ * @model_data		pointer to model data
+ * @coeff		model coefficient. Unity is ~2^20, so range approx
+ * +/- 4.0: -2^22 < coeff < 2^22
+ * @counter     offset in bytes of the counter used to calculate energy for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter'.
+ *
+ * Return: Counter value. Range: -2^49 < ret < 2^49
+ */
+s64 kbase_ipa_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * attach_vinstr() - attach a vinstr_buffer to an IPA model.
+ * @model_data		pointer to model data
+ *
+ * Attach a vinstr_buffer to an IPA model. The vinstr_buffer
+ * allows access to the hardware counters used to calculate
+ * energy consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
+
+/**
+ * detach_vinstr() - detach a vinstr_buffer from an IPA model.
+ * @model_data		pointer to model data
+ *
+ * Detach a vinstr_buffer from an IPA model.
+ */
+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
+
+/**
+ * kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters
+ * @model:		pointer to instantiated model
+ * @coeffp:		pointer to location where calculated power, in
+ *			pW/(Hz V^2), is stored.
+ *
+ * This is a GPU-agnostic implementation of the get_dynamic_coeff()
+ * function of an IPA model. It relies on the model being populated
+ * with GPU-specific attributes at initialization time.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
+
+/**
+ * kbase_ipa_vinstr_common_model_init() - initialize ipa power model
+ * @model:		ipa power model to initialize
+ * @ipa_groups_def:	array of ipa groups which sets coefficients for
+ *			the corresponding counters used in the ipa model
+ * @ipa_group_size:     number of elements in the array @ipa_groups_def
+ * @get_active_cycles:  callback to return the number of cycles the GPU was
+ *			active during the counter sample period.
+ *
+ * This initialization function performs initialization steps common
+ * for ipa models based on counter values. In each call, the model
+ * passes its specific coefficient values per ipa counter group via
+ * @ipa_groups_def array.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
+				       const struct kbase_ipa_group *ipa_groups_def,
+				       size_t ipa_group_size,
+				       kbase_ipa_get_active_cycles_callback get_active_cycles);
+
+/**
+ * kbase_ipa_vinstr_common_model_term() - terminate ipa power model
+ * @model: ipa power model to terminate
+ *
+ * This function performs all necessary steps to terminate ipa power model
+ * including clean up of resources allocated to hold model data.
+ */
+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model);
+
+#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c
new file mode 100644
index 0000000..d07fb36
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c
@@ -0,0 +1,256 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/thermal.h>
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+
+/* Performance counter blocks base offsets */
+#define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define TILER_BASE          (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define MEMSYS_BASE         (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+
+/* JM counter block offsets */
+#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT *  6)
+
+/* Tiler counter block offsets */
+#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
+
+/* MEMSYS counter block offsets */
+#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
+
+/* SC counter block offsets */
+#define SC_FRAG_ACTIVE      (KBASE_IPA_NR_BYTES_PER_CNT *  4)
+#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26)
+#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28)
+#define SC_TEX_COORD_ISSUE  (KBASE_IPA_NR_BYTES_PER_CNT * 40)
+#define SC_VARY_SLOT_32     (KBASE_IPA_NR_BYTES_PER_CNT * 50)
+#define SC_VARY_SLOT_16     (KBASE_IPA_NR_BYTES_PER_CNT * 51)
+#define SC_BEATS_RD_LSC     (KBASE_IPA_NR_BYTES_PER_CNT * 56)
+#define SC_BEATS_WR_LSC     (KBASE_IPA_NR_BYTES_PER_CNT * 61)
+#define SC_BEATS_WR_TIB     (KBASE_IPA_NR_BYTES_PER_CNT * 62)
+
+/** Maximum number of cores for which a single Memory System block of performance counters is present. */
+#define KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4)
+
+
+/**
+ * get_jm_counter() - get performance counter offset inside the Job Manager block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the Job Manager block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g71_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
+                                                u32 counter_block_offset)
+{
+	return JM_BASE + counter_block_offset;
+}
+
+/**
+ * get_memsys_counter() - get peformance counter offset inside the Memory System block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Memory System block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g71_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
+                                                    u32 counter_block_offset)
+{
+	/* The base address of Memory System performance counters is always the same, although their number
+	 * may vary based on the number of cores. For the moment it's ok to return a constant.
+	 */
+	return MEMSYS_BASE + counter_block_offset;
+}
+
+/**
+ * get_sc_counter() - get performance counter offset inside the Shader Cores block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Shader Cores block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g71_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
+                                                u32 counter_block_offset)
+{
+	const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ?
+	                    SC0_BASE_ONE_MEMSYS :
+	                    SC0_BASE_TWO_MEMSYS;
+
+	return sc_base + counter_block_offset;
+}
+
+/**
+ * memsys_single_counter() - calculate energy for a single Memory System performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Memory System performance counter.
+ */
+static s64 kbase_g71_memsys_single_counter(
+    struct kbase_ipa_model_vinstr_data *model_data,
+    s32 coeff,
+    u32 counter_block_offset)
+{
+	return kbase_ipa_single_counter(model_data, coeff,
+	                                kbase_g71_power_model_get_memsys_counter(model_data, counter_block_offset));
+}
+
+/**
+ * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a Shader Cores performance counter for all cores.
+ */
+static s64 kbase_g71_sum_all_shader_cores(
+    struct kbase_ipa_model_vinstr_data *model_data,
+    s32 coeff,
+    u32 counter_block_offset)
+{
+	return kbase_ipa_sum_all_shader_cores(model_data, coeff,
+	                                      kbase_g71_power_model_get_sc_counter(model_data, counter_block_offset));
+}
+
+/**
+ * jm_single_counter() - calculate energy for a single Job Manager performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Job Manager performance counter.
+ */
+static s64 kbase_g71_jm_single_counter(
+    struct kbase_ipa_model_vinstr_data *model_data,
+    s32 coeff,
+    u32 counter_block_offset)
+{
+	return kbase_ipa_single_counter(model_data, coeff,
+	                                kbase_g71_power_model_get_jm_counter(model_data, counter_block_offset));
+}
+
+/** Table of IPA group definitions.
+ *
+ * For each IPA group, this table defines a function to access the given performance block counter (or counters,
+ * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
+ */
+static const struct kbase_ipa_group ipa_groups_def[] = {
+	{
+		.name = "l2_access",
+		.default_value = 526300,
+		.op = kbase_g71_memsys_single_counter,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 301100,
+		.op = kbase_g71_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 197400,
+		.op = kbase_g71_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -156400,
+		.op = kbase_g71_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 115800,
+		.op = kbase_g71_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static int kbase_g71_power_model_init(struct kbase_ipa_model *model)
+{
+	int i, err = 0;
+	struct kbase_ipa_model_vinstr_data *model_data;
+
+	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model_data->kbdev = model->kbdev;
+	model_data->groups_def = ipa_groups_def;
+	BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def) > KBASE_IPA_MAX_GROUP_DEF_NUM);
+	model_data->groups_def_num = ARRAY_SIZE(ipa_groups_def);
+
+	model->model_data = (void *) model_data;
+
+	for (i = 0; i < ARRAY_SIZE(ipa_groups_def); ++i) {
+		const struct kbase_ipa_group *group = &ipa_groups_def[i];
+
+		model_data->group_values[i] = group->default_value;
+		err = kbase_ipa_model_add_param_s32(model, group->name,
+					&model_data->group_values[i],
+					1, false);
+		if (err)
+			goto exit;
+	}
+
+	model_data->scaling_factor = 5;
+	err = kbase_ipa_model_add_param_s32(model, "scale",
+					    &model_data->scaling_factor,
+					    1, false);
+	if (err)
+		goto exit;
+
+	err = kbase_ipa_attach_vinstr(model_data);
+
+exit:
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kfree(model_data);
+	}
+	return err;
+}
+
+static void kbase_g71_power_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+
+	kbase_ipa_detach_vinstr(model_data);
+	kfree(model_data);
+}
+
+
+struct kbase_ipa_model_ops kbase_g71_ipa_model_ops = {
+		.name = "mali-g71-power-model",
+		.init = kbase_g71_power_model_init,
+		.term = kbase_g71_power_model_term,
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff,
+		.do_utilization_scaling_in_framework = false,
+};
+KBASE_EXPORT_TEST_API(kbase_g71_ipa_model_ops);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
new file mode 100755
index 0000000..7951b74
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
@@ -0,0 +1,311 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/thermal.h>
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+
+/* Performance counter blocks base offsets */
+#define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define TILER_BASE          (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define MEMSYS_BASE         (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+
+/* JM counter block offsets */
+#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT *  6)
+
+/* Tiler counter block offsets */
+#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
+
+/* MEMSYS counter block offsets */
+#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
+
+/* SC counter block offsets */
+#define SC_FRAG_ACTIVE             (KBASE_IPA_NR_BYTES_PER_CNT *  4)
+#define SC_EXEC_CORE_ACTIVE        (KBASE_IPA_NR_BYTES_PER_CNT * 26)
+#define SC_EXEC_INSTR_COUNT        (KBASE_IPA_NR_BYTES_PER_CNT * 28)
+#define SC_TEX_COORD_ISSUE         (KBASE_IPA_NR_BYTES_PER_CNT * 40)
+#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42)
+#define SC_VARY_INSTR              (KBASE_IPA_NR_BYTES_PER_CNT * 49)
+#define SC_VARY_SLOT_32            (KBASE_IPA_NR_BYTES_PER_CNT * 50)
+#define SC_VARY_SLOT_16            (KBASE_IPA_NR_BYTES_PER_CNT * 51)
+#define SC_BEATS_RD_LSC            (KBASE_IPA_NR_BYTES_PER_CNT * 56)
+#define SC_BEATS_WR_LSC            (KBASE_IPA_NR_BYTES_PER_CNT * 61)
+#define SC_BEATS_WR_TIB            (KBASE_IPA_NR_BYTES_PER_CNT * 62)
+
+/** Maximum number of cores for which a single Memory System block of performance counters is present. */
+#define KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4)
+
+
+/**
+ * get_jm_counter() - get performance counter offset inside the Job Manager block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the Job Manager block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						u32 counter_block_offset)
+{
+	return JM_BASE + counter_block_offset;
+}
+
+/**
+ * get_memsys_counter() - get performance counter offset inside the Memory System block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Memory System block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						    u32 counter_block_offset)
+{
+	/* The base address of Memory System performance counters is always the same, although their number
+	 * may vary based on the number of cores. For the moment it's ok to return a constant.
+	 */
+	return MEMSYS_BASE + counter_block_offset;
+}
+
+/**
+ * get_sc_counter() - get performance counter offset inside the Shader Cores block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Shader Cores block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						u32 counter_block_offset)
+{
+	const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ?
+			    SC0_BASE_ONE_MEMSYS :
+			    SC0_BASE_TWO_MEMSYS;
+
+	return sc_base + counter_block_offset;
+}
+
+/**
+ * memsys_single_counter() - calculate energy for a single Memory System performance counter.
+ * @model_data:   pointer to GPU model data.
+ * @coeff:        default value of coefficient for IPA group.
+ * @offset:       offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Memory System performance counter.
+ */
+static s64 kbase_g7x_memsys_single_counter(
+		struct kbase_ipa_model_vinstr_data *model_data,
+		s32 coeff,
+		u32 offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
+	return kbase_ipa_single_counter(model_data, coeff, counter);
+}
+
+/**
+ * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a Shader Cores performance counter for all cores.
+ */
+static s64 kbase_g7x_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff,
+	u32 counter_block_offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_sc_counter(model_data,
+						       counter_block_offset);
+	return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter);
+}
+
+/**
+ * jm_single_counter() - calculate energy for a single Job Manager performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Job Manager performance counter.
+ */
+static s64 kbase_g7x_jm_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff,
+	u32 counter_block_offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_jm_counter(model_data,
+						     counter_block_offset);
+	return kbase_ipa_single_counter(model_data, coeff, counter);
+}
+
+/**
+ * get_active_cycles() - return the GPU_ACTIVE counter
+ * @model_data:            pointer to GPU model data.
+ *
+ * Return: the number of cycles the GPU was active during the counter sampling
+ * period.
+ */
+static u32 kbase_g7x_get_active_cycles(
+	struct kbase_ipa_model_vinstr_data *model_data)
+{
+	u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE);
+
+	/* Counters are only 32-bit, so we can safely multiply by 1 then cast
+	 * the 64-bit result back to a u32.
+	 */
+	return kbase_ipa_single_counter(model_data, 1, counter);
+}
+
+/** Table of IPA group definitions.
+ *
+ * For each IPA group, this table defines a function to access the given performance block counter (or counters,
+ * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
+ */
+
+static const struct kbase_ipa_group ipa_groups_def_g71[] = {
+	{
+		.name = "l2_access",
+		.default_value = 526300,
+		.op = kbase_g7x_memsys_single_counter,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 301100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 197400,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -156400,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 115800,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_g72[] = {
+	{
+		.name = "l2_access",
+		.default_value = 393000,
+		.op = kbase_g7x_memsys_single_counter,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 227000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 181900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -120200,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 133100,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
+	{
+		.name = "gpu_active",
+		.default_value = 122000,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 488900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "vary_instr",
+		.default_value = 212100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_VARY_INSTR,
+	},
+	{
+		.name = "tex_tfch_num_operations",
+		.default_value = 288000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+	},
+	{
+		.name = "l2_access",
+		.default_value = 378100,
+		.op = kbase_g7x_memsys_single_counter,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+};
+
+#define STANDARD_POWER_MODEL(gpu) \
+	static int kbase_ ## gpu ## _power_model_init(\
+			struct kbase_ipa_model *model) \
+	{ \
+		BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \
+				KBASE_IPA_MAX_GROUP_DEF_NUM); \
+		return kbase_ipa_vinstr_common_model_init(model, \
+				ipa_groups_def_ ## gpu, \
+				ARRAY_SIZE(ipa_groups_def_ ## gpu), \
+				kbase_g7x_get_active_cycles); \
+	} \
+	struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
+		.name = "mali-" #gpu "-power-model", \
+		.init = kbase_ ## gpu ## _power_model_init, \
+		.term = kbase_ipa_vinstr_common_model_term, \
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
+	}; \
+	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+
+STANDARD_POWER_MODEL(g71);
+STANDARD_POWER_MODEL(g72);
+STANDARD_POWER_MODEL(tnox);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100755
index 0000000..10da0c5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,461 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tHEx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tSIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tDVx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tNOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tGOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tKAx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tTRx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tBOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tEGx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100755
index 0000000..bcd6c5f
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,1222 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tKAx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tKAx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tBOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tBOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tEGx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tEGx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100755
index 0000000..e53528e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1783 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined(PAGE_MASK) && defined(PAGE_SHIFT)
+#define LOCAL_PAGE_SHIFT PAGE_SHIFT
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD),
+ * which defines a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/* Memory allocation, access/hint flags.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+	/* BASE_MEM_HINT flags have been removed, but their values are reserved
+	 * for backwards compatibility with older user-space drivers. The values
+	 * can be re-used once support for r5p0 user-space drivers is removed,
+	 * presumably in r7p0.
+	 *
+	 * RESERVED: (1U << 5)
+	 * RESERVED: (1U << 6)
+	 * RESERVED: (1U << 7)
+	 * RESERVED: (1U << 8)
+	 */
+#define BASE_MEM_RESERVED_BIT_5 ((base_mem_alloc_flags)1 << 5)
+#define BASE_MEM_RESERVED_BIT_6 ((base_mem_alloc_flags)1 << 6)
+#define BASE_MEM_RESERVED_BIT_7 ((base_mem_alloc_flags)1 << 7)
+#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* Should be cached on the CPU
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the alloc
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Secure memory
+ */
+#define BASE_MEM_SECURE ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/**
+ * Bit 19 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ */
+#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+
+/**
+ * Memory starting from the end of the initial commit is aligned to 'extent'
+ * pages, where 'extent' must be a power of 2 and no more than
+ * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
+
+/* Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 21
+
+/* A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+
+/* A mask of all currently reserved flags
+ */
+#define BASE_MEM_FLAGS_RESERVED \
+	(BASE_MEM_RESERVED_BIT_5 | BASE_MEM_RESERVED_BIT_6 | \
+		BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \
+		BASE_MEM_RESERVED_BIT_19)
+
+/* A mask of all the flags that can be returned via the base_mem_get_flags()
+ * interface.
+ */
+#define BASE_MEM_FLAGS_QUERYABLE \
+	(BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \
+		BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \
+		BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED))
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	/**
+	 * Import type with value 1 is deprecated.
+	 */
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	address of imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	u64 ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+
+/**
+ * Limit on the 'extent' parameter for an allocation with the
+ * BASE_MEM_TILER_ALIGN_TOP flag set
+ *
+ * This is the same as the maximum limit for a Buffer Descriptor's chunk size
+ */
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \
+		(21u - (LOCAL_PAGE_SHIFT))
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \
+		(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2))
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completely unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
+ * initial commit is aligned to 'extent' pages, where 'extent' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP  (1 << 0)
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ * @bin_id:                     The JIT allocation bin, used in conjunction with
+ *                              @max_allocations to limit the number of each
+ *                              type of JIT allocation.
+ * @max_allocations:            The maximum number of allocations allowed within
+ *                              the bin specified by @bin_id. Should be the same
+ *                              for all JIT allocations within the same bin.
+ * @flags:                      flags specifying the special requirements for
+ *                              the JIT allocation.
+ * @padding:                    Expansion space - should be initialised to zero
+ * @usage_id:                   A hint about which allocation should be reused.
+ *                              The kernel should attempt to use a previous
+ *                              allocation with the same usage_id
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+	u8 bin_id;
+	u8 max_allocations;
+	u8 flags;
+	u8 padding[2];
+	u16 usage_id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/**
+ * No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/**
+ * Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  ((base_jd_core_req)1 << 1)
+#define BASE_JD_REQ_T   ((base_jd_core_req)1 << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  ((base_jd_core_req)1 << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   ((base_jd_core_req)1 << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and
+ * BASE_JD_REQ_SOFT_EVENT_WAIT.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
+ * the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
+ * the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/*
+ * Returns non-zero value if core requirements passed define a soft job or
+ * a dependency only job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
+	((core_req & BASE_JD_REQ_SOFT_JOB) || \
+	(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
+ * In order to keep the size of the structure same, padding field has been adjusted
+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
+ * for possible future use. */
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	u64 extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[1];
+	base_jd_core_req core_req;          /**< core requirements */
+} base_jd_atom_v2;
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @ref base_jd_submit and @ref base_jd_submit
+ * has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ *
+ * @pre @p fence must reference a @ref base_fence successfully initialized by
+ *      calling @ref base_fence_init.
+ * @pre @p fence was @e not initialized by calling @ref base_fence_import, nor
+ *      is it associated with a fence-trigger job that was already submitted
+ *      by calling @ref base_jd_submit.
+ * @post @p atom can be submitted by calling @ref base_jd_submit.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @ref base_jd_submit and
+ * @ref base_jd_submit has returned.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ *
+ * @pre @p fence must reference a @ref base_fence successfully initialized by
+ *      calling @ref base_fence_import, or it must be associated with a
+ *      fence-trigger job that was already submitted by calling
+ *      @ref base_jd_submit.
+ * @post @p atom can be submitted by calling @ref base_jd_submit.
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field
+ * of @ref base_jd_atom.
+ *
+ * It must not occupy the same CPU cache line(s) as any neighboring data.
+ * This is to avoid cases where access to pages containing the structure
+ * is shared between cached and un-cached memory regions, which would
+ * cause memory corruption.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[36];
+} base_dump_cpu_gpu_counters;
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistent guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algorithm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * required for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+	 * No defined values, but starts at 0 and increases by one for each
+	 * release status (alpha, beta, EAC, etc.).
+	 * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/* The maximum GPU frequency. Reported to applications by
+	 * clGetDeviceInfo()
+	 */
+	u32 gpu_freq_khz_max;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+
+	/**
+	 * The number of execution engines.
+	 */
+	u8 num_exec_engines;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[3];
+	u32 tls_alloc;              /* Number of threads per core that TLS must
+				     * be allocated for
+				     */
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 stack_present;
+
+	u32 l2_features;
+	u32 core_features;
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+
+	u32 thread_tls_alloc;
+};
+
+/**
+ * Return structure for base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this data structure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+typedef u32 base_context_create_flags;
+
+/** No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
+
+/** Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/** Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
+	((base_context_create_flags)1 << 1)
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be
+ * passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+/** Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+} base_jd_replay_payload;
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact. */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
+		BASE_TLSTREAM_JOB_DUMPING_ENABLED)
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100644
index 0000000..52c8a4f
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,57 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100644
index 0000000..5e8add8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100755
index 0000000..cd711b8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,639 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+#include <linux/sched/mm.h>
+#endif
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_linux.h>
+
+/*
+ * Include mali_kbase_defs.h first as this provides types needed by other local
+ * header files.
+ */
+#include "mali_kbase_defs.h"
+
+#include "mali_kbase_context.h"
+#include "mali_kbase_strings.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+
+#include "ipa/mali_kbase_ipa.h"
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+
+#ifndef u64_to_user_ptr
+/* Introduced in Linux v4.6 */
+#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
+#endif
+
+/*
+ * Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+
+/**
+ * kbase_get_unmapped_area() - get an address range which is currently
+ *                             unmapped.
+ * @filp: File operations associated with kbase device.
+ * @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed
+ *        as Mali GPU driver decides about the mapping).
+ * @len: Length of the address range.
+ * @pgoff: Page offset within the GPU address space of the kbase context.
+ * @flags: Flags for the allocation.
+ *
+ * Finds the unmapped address range which satisfies requirements specific to
+ * GPU and those provided by the call parameters.
+ *
+ * 1) Requirement for allocations greater than 2MB:
+ * - alignment offset is set to 2MB and the alignment mask to 2MB decremented
+ * by 1.
+ *
+ * 2) Requirements imposed for the shader memory alignment:
+ * - alignment is decided by the number of GPU pc bits which can be read from
+ * GPU properties of the device associated with this kbase context; alignment
+ * offset is set to this value in bytes and the alignment mask to the offset
+ * decremented by 1.
+ * - allocations must not to be at 4GB boundaries. Such cases are indicated
+ * by the flag KBASE_REG_GPU_NX not being set (check the flags of the kbase
+ * region). 4GB boundaries can be checked against @ref BASE_MEM_MASK_4GB.
+ *
+ * 3) Requirements imposed for tiler memory alignment, cases indicated by
+ * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase
+ * region):
+ * - alignment offset is set to the difference between the kbase region
+ * extent (converted from the original value in pages to bytes) and the kbase
+ * region initial_commit (also converted from the original value in pages to
+ * bytes); alignment mask is set to the kbase region extent in bytes and
+ * decremented by 1.
+ *
+ * Return: if successful, address of the unmapped area aligned as required;
+ *         error code (negative) in case of failure;
+ */
+unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+
+/**
+ * kbase_jd_submit - Submit atoms to the job dispatcher
+ *
+ * @kctx: The kbase context to submit to
+ * @user_addr: The address in user space of the struct base_jd_atom_v2 array
+ * @nr_atoms: The number of atoms in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6)
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_jd_submit(struct kbase_context *kctx,
+		void __user *user_addr, u32 nr_atoms, u32 stride,
+		bool uk6_atom);
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom);
+#endif
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+void kbasep_soft_job_timeout_worker(struct timer_list *timer);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/* kbase_io_history_init - initialize data struct for register access history
+ *
+ * @kbdev The register history to initialize
+ * @n The number of register accesses that the buffer could hold
+ *
+ * @return 0 if successfully initialized, failure otherwise
+ */
+int kbase_io_history_init(struct kbase_io_history *h, u16 n);
+
+/* kbase_io_history_term - uninit all resources for the register access history
+ *
+ * @h The register history to terminate
+ */
+void kbase_io_history_term(struct kbase_io_history *h);
+
+/* kbase_io_history_dump - print the register history to the kernel ring buffer
+ *
+ * @kbdev Pointer to kbase_device containing the register history to dump
+ */
+void kbase_io_history_dump(struct kbase_device *kbdev);
+
+/**
+ * kbase_io_history_resize - resize the register access history buffer.
+ *
+ * @h: Pointer to a valid register history to resize
+ * @new_size: Number of accesses the buffer could hold
+ *
+ * A successful resize will clear all recent register accesses.
+ * If resizing fails for any reason (e.g., could not allocate memory, invalid
+ * buffer size) then the original buffer will be kept intact.
+ *
+ * @return 0 if the buffer was resized, failure otherwise
+ */
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbase_io_history_resize CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+#endif
+
+extern int meson_gpu_data_invalid_count;
+extern int meson_gpu_fault;
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100755
index 0000000..f3e71d1
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	struct tagged_addr *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = phys_to_page(as_phys_addr_t(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = phys_to_page(as_phys_addr_t(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = phys_to_page(as_phys_addr_t(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = phys_to_page(as_phys_addr_t(page_array[page_index +
+								   1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100755
index 0000000..379a05a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+/**
+ * kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices
+ * @katom: atom representing the fragment job for which the WA has to be applied
+ *
+ * This workaround is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, it can happen
+ * that the restart index is out of bounds and the rerun causes a tile range
+ * fault. If this happens we try to clamp the restart index to a correct value.
+ */
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
new file mode 100755
index 0000000..2e99a4d
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -0,0 +1,111 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+
+static int kbase_as_fault_read(struct seq_file *sfile, void *data)
+{
+	uintptr_t as_no = (uintptr_t) sfile->private;
+
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	struct kbase_device *kbdev = NULL;
+
+	kbdev_list = kbase_dev_list_get();
+
+	list_for_each(entry, kbdev_list) {
+		kbdev = list_entry(entry, struct kbase_device, entry);
+
+		if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+
+			/* don't show this one again until another fault occors */
+			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
+
+			/* output the last page fault addr */
+			seq_printf(sfile, "%llu\n",
+				   (u64) kbdev->as[as_no].fault_addr);
+		}
+
+	}
+
+	kbase_dev_list_put(kbdev_list);
+
+	return 0;
+}
+
+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_as_fault_read, in->i_private);
+}
+
+static const struct file_operations as_fault_fops = {
+	.open = kbase_as_fault_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ *  Initialize debugfs entry for each address space
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	uint i;
+	char as_name[64];
+	struct dentry *debugfs_directory;
+
+	kbdev->debugfs_as_read_bitmap = 0ULL;
+
+	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
+
+	debugfs_directory = debugfs_create_dir("address_spaces",
+					       kbdev->mali_debugfs_directory);
+
+	if (debugfs_directory) {
+		for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
+			debugfs_create_file(as_name, S_IRUGO,
+					    debugfs_directory,
+					    (void *)(uintptr_t)i,
+					    &as_fault_fops);
+		}
+	} else {
+		dev_warn(kbdev->dev,
+			 "unable to create address_spaces debugfs directory");
+	}
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+	return;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
new file mode 100644
index 0000000..496d8b1
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
+#define _KBASE_AS_FAULT_DEBUG_FS_H
+
+/**
+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
+ *
+ * @kbdev: Pointer to kbase_device
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs
+ *
+ * @kbdev: Pointer to kbase_device
+ * @as_no: The address space the fault occurred on
+ */
+static inline void
+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_MALI_DEBUG */
+	return;
+}
+
+#endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100644
index 0000000..18444b8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100644
index 0000000..8a1e529
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100644
index 0000000..ce7070d
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,48 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100644
index 0000000..1637fcb
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,299 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <linux/mm.h>
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+#include <linux/rbtree.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+#ifndef CONFIG_OF
+/**
+ * kbase_platform_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_register(void);
+
+/**
+ * kbase_platform_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_register()
+ */
+void kbase_platform_unregister(void);
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100755
index 0000000..376a94b
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,278 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 * Restricting ID width will reduce performance & bus load due to GPU.
+	 */
+	KBASE_3BIT_AID_32 = 0x0,
+
+	/* Restrict GPU to 7/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_28 = 0x1,
+
+	/* Restrict GPU to 3/4 of maximum Address ID count. */
+	KBASE_3BIT_AID_24 = 0x2,
+
+	/* Restrict GPU to 5/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_20 = 0x3,
+
+	/* Restrict GPU to 1/2 of maximum Address ID count.  */
+	KBASE_3BIT_AID_16 = 0x4,
+
+	/* Restrict GPU to 3/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_12 = 0x5,
+
+	/* Restrict GPU to 1/4 of maximum Address ID count. */
+	KBASE_3BIT_AID_8  = 0x6,
+
+	/* Restrict GPU to 1/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_4  = 0x7
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_3BIT_ARID_LIMIT KBASE_3BIT_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_3BIT_AWID_LIMIT KBASE_3BIT_AID_32
+
+/**
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/**
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/**
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/**
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/**
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/**
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/**
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/**
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/**
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/**
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/**
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/**
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/**
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+/**
+ * Perform GPU power down using only platform specific code, skipping DDK power
+ * management.
+ *
+ * If this is non-zero then kbase will avoid powering down shader cores, the
+ * tiler, and the L2 cache, instead just powering down the entire GPU through
+ * platform specific code. This may be required for certain platform
+ * integrations.
+ *
+ * Note that as this prevents kbase from powering down shader cores, this limits
+ * the available power policies to coarse_demand and always_on.
+ */
+#define PLATFORM_POWER_DOWN_ONLY (1)
+
+/**
+ * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
+ * this isn't available, so we simply define a dummy value here. If devfreq
+ * is enabled the value will be read from there, otherwise this should be
+ * overridden by defining GPU_FREQ_KHZ_MAX in the platform file.
+ */
+#define DEFAULT_GPU_FREQ_KHZ_MAX (5000)
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100755
index 0000000..127d9bf
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,347 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_dma_fence.h>
+#include <mali_kbase_ctx_sched.h>
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	atomic_set(&kctx->refcount, 0);
+	if (is_compat)
+		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
+#if defined(CONFIG_64BIT)
+	else
+		kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
+#endif /* !defined(CONFIG_64BIT) */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+				  kbdev->mem_pool_max_size_default,
+				  KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+				  kctx->kbdev,
+				  &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_pool_init(&kctx->lp_mem_pool,
+				  (kbdev->mem_pool_max_size_default >> 9),
+				  KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+				  kctx->kbdev,
+				  &kbdev->lp_mem_pool);
+	if (err)
+		goto free_mem_pool;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_both_pools;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	atomic_set(&kctx->drain_pending, 0);
+
+	mutex_init(&kctx->reg_lock);
+
+	mutex_init(&kctx->mem_partials_lock);
+	INIT_LIST_HEAD(&kctx->mem_partials);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kctx);
+	if (err)
+		goto term_dma_fence;
+
+	do {
+		err = kbase_mem_pool_grow(&kctx->mem_pool,
+				MIDGARD_MMU_BOTTOMLEVEL);
+		if (err)
+			goto pgd_no_mem;
+
+		mutex_lock(&kctx->mmu_lock);
+		kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+		mutex_unlock(&kctx->mmu_lock);
+	} while (!kctx->pgd);
+
+	p = kbase_mem_alloc_page(&kctx->mem_pool);
+	if (!p)
+		goto no_sink_page;
+	kctx->aliasing_sink_page = as_tagged(page_to_phys(p));
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	kbase_timer_setup(&kctx->soft_job_timeout,
+			  kbasep_soft_job_timeout_worker);
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+no_sink_page:
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+pgd_no_mem:
+	kbase_mmu_term(kctx);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_both_pools:
+	kbase_mem_pool_term(&kctx->lp_mem_pool);
+free_mem_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+	unsigned long flags;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_mem_pool_mark_dying(&kctx->mem_pool);
+
+	kbase_jd_zap_context(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	/* Removing the rest of the debugfs entries here as we want to keep the
+	 * atom debugfs interface alive until all atoms have completed. This
+	 * is useful for debugging hung contexts. */
+	debugfs_remove_recursive(kctx->kctx_dentry);
+#endif
+
+	kbase_event_cleanup(kctx);
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	p = phys_to_page(as_phys_addr_t(kctx->aliasing_sink_page));
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_dma_fence_term(kctx);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_remove_ctx(kctx);
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	kbase_mem_pool_term(&kctx->lp_mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+
+	kbase_pm_context_idle(kbdev);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
new file mode 100755
index 0000000..30b0f64
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set, which shall be one of the flags of
+ *         BASE_CONTEXT_CREATE_KERNEL_FLAGS.
+ *
+ * Return: 0 on success, -EINVAL otherwise when an invalid flag is specified.
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+					enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+	/*
+	 * Earlier kernel versions doesn't have atomic_andnot() or
+	 * atomic_and(). atomic_clear_mask() was only available on some
+	 * architectures and removed on arm in v3.13 on arm and arm64.
+	 *
+	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+	 * when atomic_andnot() becomes available.
+	 */
+	int old, new;
+
+	do {
+		old = atomic_read(&kctx->flags);
+		new = old & ~flag;
+
+	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+	atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100755
index 0000000..d696b40
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,4286 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#include <backend/gpu/mali_kbase_devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <ipa/mali_kbase_ipa_debugfs.h>
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include "mali_kbase_regs_history_debugfs.h"
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_ioctl.h"
+
+#ifdef CONFIG_MALI_JOB_DUMP
+#include "mali_kbase_gwt.h"
+#endif
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task/in_compat_syscall */
+#include <linux/mman.h>
+#include <linux/version.h>
+#include <mali_kbase_hw.h>
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+
+#include <mali_kbase_config.h>
+
+
+#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE)
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <mali_kbase_tlstream.h>
+
+#include <mali_kbase_as_fault_debugfs.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static int kbase_dev_nr;
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+
+static int kbase_api_handshake(struct kbase_context *kctx,
+			       struct kbase_ioctl_version_check *version)
+{
+	switch (version->major) {
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				       (int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version
+		 */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+
+	/* save the proposed version number for later use */
+	kctx->api_version = KBASE_API_VERSION(version->major, version->minor);
+
+	return 0;
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ *
+ * @MALI_ERROR_NONE: Success
+ * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver
+ * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure
+ * @MALI_ERROR_FUNCTION_FAILED: Generic error code
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	/* Bit number 2 was earlier assigned to the runtime-pm initialization
+	 * stage (which has been merged with the backend_early stage).
+	 */
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+
+	inited_job_fault = (1u << 10),
+	inited_sysfs_group = (1u << 11),
+	inited_misc_register = (1u << 12),
+	inited_get_device = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_io_history = (1u << 18),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20),
+	inited_protected = (1u << 21),
+	inited_ctx_sched = (1u << 22)
+};
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strncmp(irq_res->name, "JOB", 4)) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "MMU", 4)) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "GPU", 4)) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \
+		!(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \
+		LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0))
+/*
+ * Older versions, before v4.6, of the kernel doesn't have
+ * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28
+ */
+static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+	char buf[32];
+
+	count = min(sizeof(buf), count);
+
+	if (copy_from_user(buf, s, count))
+		return -EFAULT;
+	buf[count] = '\0';
+
+	return strtobool(buf, res);
+}
+#endif
+
+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+	else
+		kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE);
+
+	return size;
+}
+
+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_infinite_cache_fops = {
+	.open = simple_open,
+	.write = write_ctx_infinite_cache,
+	.read = read_ctx_infinite_cache,
+};
+
+static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf,
+		size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value) {
+#if defined(CONFIG_64BIT)
+		/* 32-bit clients cannot force SAME_VA */
+		if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+			return -EINVAL;
+		kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
+#else /* defined(CONFIG_64BIT) */
+		/* 32-bit clients cannot force SAME_VA */
+		return -EINVAL;
+#endif /* defined(CONFIG_64BIT) */
+	} else {
+		kbase_ctx_flag_clear(kctx, KCTX_FORCE_SAME_VA);
+	}
+
+	return size;
+}
+
+static ssize_t read_ctx_force_same_va(struct file *f, char __user *ubuf,
+		size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_force_same_va_fops = {
+	.open = simple_open,
+	.write = write_ctx_force_same_va,
+	.read = read_ctx_force_same_va,
+};
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
+	kctx = kbase_create_context(kbdev, in_compat_syscall());
+#else
+	kctx = kbase_create_context(kbdev, is_compat_task());
+#endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+	kctx->filp = filp;
+
+	if (kbdev->infinite_cache_active_default)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
+			kctx, &kbase_infinite_cache_fops);
+	debugfs_create_file("force_same_va", S_IRUSR | S_IWUSR,
+			kctx->kctx_dentry, kctx, &kbase_force_same_va_fops);
+
+	mutex_init(&kctx->mem_profile_lock);
+
+	kbasep_jd_debugfs_ctx_init(kctx);
+	kbase_debug_mem_view_init(filp);
+
+	kbase_debug_job_fault_context_init(kctx);
+
+	kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool, &kctx->lp_mem_pool);
+
+	kbase_jit_debugfs_init(kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			KBASE_TLSTREAM_TL_NEW_CTX(
+					element->kctx,
+					element->kctx->id,
+					(u32)(element->kctx->tgid));
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+	KBASE_TLSTREAM_TL_DEL_CTX(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	kbasep_mem_profile_debugfs_remove(kctx);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	if (kctx->vinstr_cli) {
+		struct kbase_ioctl_hwcnt_enable enable;
+
+		enable.dump_buffer = 0llu;
+		kbase_vinstr_legacy_hwc_setup(
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &enable);
+	}
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+static int kbase_api_set_flags(struct kbase_context *kctx,
+		struct kbase_ioctl_set_flags *flags)
+{
+	int err;
+
+	/* setup pending, try to signal that we'll do the setup,
+	 * if setup was already in progress, err this call
+	 */
+	if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+		return -EINVAL;
+
+	err = kbase_context_set_create_flags(kctx, flags->create_flags);
+	/* if bad flags, will stay stuck in setup mode */
+	if (err)
+		return err;
+
+	atomic_set(&kctx->setup_complete, 1);
+	return 0;
+}
+
+static int kbase_api_job_submit(struct kbase_context *kctx,
+		struct kbase_ioctl_job_submit *submit)
+{
+	return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr),
+			submit->nr_atoms,
+			submit->stride, false);
+}
+
+static int kbase_api_get_gpuprops(struct kbase_context *kctx,
+		struct kbase_ioctl_get_gpuprops *get_props)
+{
+	struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props;
+	int err;
+
+	if (get_props->flags != 0) {
+		dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops");
+		return -EINVAL;
+	}
+
+	if (get_props->size == 0)
+		return kprops->prop_buffer_size;
+	if (get_props->size < kprops->prop_buffer_size)
+		return -EINVAL;
+
+	err = copy_to_user(u64_to_user_ptr(get_props->buffer),
+			kprops->prop_buffer,
+			kprops->prop_buffer_size);
+	if (err)
+		return -EFAULT;
+	return kprops->prop_buffer_size;
+}
+
+static int kbase_api_post_term(struct kbase_context *kctx)
+{
+	kbase_event_close(kctx);
+	return 0;
+}
+
+static int kbase_api_mem_alloc(struct kbase_context *kctx,
+		union kbase_ioctl_mem_alloc *alloc)
+{
+	struct kbase_va_region *reg;
+	u64 flags = alloc->in.flags;
+	u64 gpu_va;
+
+	if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
+			kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) {
+		/* force SAME_VA if a 64-bit client */
+		flags |= BASE_MEM_SAME_VA;
+	}
+
+	reg = kbase_mem_alloc(kctx, alloc->in.va_pages,
+			alloc->in.commit_pages,
+			alloc->in.extent,
+			&flags, &gpu_va);
+
+	if (!reg)
+		return -ENOMEM;
+
+	alloc->out.flags = flags;
+	alloc->out.gpu_va = gpu_va;
+
+	return 0;
+}
+
+static int kbase_api_mem_query(struct kbase_context *kctx,
+		union kbase_ioctl_mem_query *query)
+{
+	return kbase_mem_query(kctx, query->in.gpu_addr,
+			query->in.query, &query->out.value);
+}
+
+static int kbase_api_mem_free(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_free *free)
+{
+	return kbase_mem_free(kctx, free->gpu_addr);
+}
+
+static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_enable *enable)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx,
+			&kctx->vinstr_cli, enable);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_dump(struct kbase_context *kctx)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwc_dump(kctx->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_clear(struct kbase_context *kctx)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwc_clear(kctx->vinstr_cli);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_api_hwcnt_set(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_values *values)
+{
+	gpu_model_set_dummy_prfcnt_sample(
+			(u32 __user *)(uintptr_t)values->data,
+			values->size);
+
+	return 0;
+}
+#endif
+
+static int kbase_api_disjoint_query(struct kbase_context *kctx,
+		struct kbase_ioctl_disjoint_query *query)
+{
+	query->counter = kbase_disjoint_event_get(kctx->kbdev);
+
+	return 0;
+}
+
+static int kbase_api_get_ddk_version(struct kbase_context *kctx,
+		struct kbase_ioctl_get_ddk_version *version)
+{
+	int ret;
+	int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+
+	if (version->version_buffer == 0)
+		return len;
+
+	if (version->size < len)
+		return -EOVERFLOW;
+
+	ret = copy_to_user(u64_to_user_ptr(version->version_buffer),
+			KERNEL_SIDE_DDK_VERSION_STRING,
+			sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+
+	if (ret)
+		return -EFAULT;
+
+	return len;
+}
+
+/* Defaults for legacy JIT init ioctl */
+#define DEFAULT_MAX_JIT_ALLOCATIONS 255
+#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */
+
+static int kbase_api_mem_jit_init_old(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_jit_init_old *jit_init)
+{
+	kctx->jit_version = 1;
+
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+			DEFAULT_MAX_JIT_ALLOCATIONS,
+			JIT_LEGACY_TRIM_LEVEL);
+}
+
+static int kbase_api_mem_jit_init(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_jit_init *jit_init)
+{
+	int i;
+
+	kctx->jit_version = 2;
+
+	for (i = 0; i < sizeof(jit_init->padding); i++) {
+		/* Ensure all padding bytes are 0 for potential future
+		 * extension
+		 */
+		if (jit_init->padding[i])
+			return -EINVAL;
+	}
+
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+			jit_init->max_allocations, jit_init->trim_level);
+}
+
+static int kbase_api_mem_sync(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_sync *sync)
+{
+	struct basep_syncset sset = {
+		.mem_handle.basep.handle = sync->handle,
+		.user_addr = sync->user_addr,
+		.size = sync->size,
+		.type = sync->type
+	};
+
+	return kbase_sync_now(kctx, &sset);
+}
+
+static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx,
+		union kbase_ioctl_mem_find_cpu_offset *find)
+{
+	return kbasep_find_enclosing_cpu_mapping_offset(
+			kctx,
+			find->in.cpu_addr,
+			find->in.size,
+			&find->out.offset);
+}
+
+static int kbase_api_mem_find_gpu_start_and_offset(struct kbase_context *kctx,
+		union kbase_ioctl_mem_find_gpu_start_and_offset *find)
+{
+	return kbasep_find_enclosing_gpu_mapping_start_and_offset(
+			kctx,
+			find->in.gpu_addr,
+			find->in.size,
+			&find->out.start,
+			&find->out.offset);
+}
+
+static int kbase_api_get_context_id(struct kbase_context *kctx,
+		struct kbase_ioctl_get_context_id *info)
+{
+	info->id = kctx->id;
+
+	return 0;
+}
+
+static int kbase_api_tlstream_acquire(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_acquire *acquire)
+{
+	return kbase_tlstream_acquire(kctx, acquire->flags);
+}
+
+static int kbase_api_tlstream_flush(struct kbase_context *kctx)
+{
+	kbase_tlstream_flush_streams();
+
+	return 0;
+}
+
+static int kbase_api_mem_commit(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_commit *commit)
+{
+	return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages);
+}
+
+static int kbase_api_mem_alias(struct kbase_context *kctx,
+		union kbase_ioctl_mem_alias *alias)
+{
+	struct base_mem_aliasing_info *ai;
+	u64 flags;
+	int err;
+
+	if (alias->in.nents == 0 || alias->in.nents > 2048)
+		return -EINVAL;
+
+	if (alias->in.stride > (U64_MAX / 2048))
+		return -EINVAL;
+
+	ai = vmalloc(sizeof(*ai) * alias->in.nents);
+	if (!ai)
+		return -ENOMEM;
+
+	err = copy_from_user(ai,
+			u64_to_user_ptr(alias->in.aliasing_info),
+			sizeof(*ai) * alias->in.nents);
+	if (err) {
+		vfree(ai);
+		return -EFAULT;
+	}
+
+	flags = alias->in.flags;
+
+	alias->out.gpu_va = kbase_mem_alias(kctx, &flags,
+			alias->in.stride, alias->in.nents,
+			ai, &alias->out.va_pages);
+
+	alias->out.flags = flags;
+
+	vfree(ai);
+
+	if (alias->out.gpu_va == 0)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int kbase_api_mem_import(struct kbase_context *kctx,
+		union kbase_ioctl_mem_import *import)
+{
+	int ret;
+	u64 flags = import->in.flags;
+
+	ret = kbase_mem_import(kctx,
+			import->in.type,
+			u64_to_user_ptr(import->in.phandle),
+			import->in.padding,
+			&import->out.gpu_va,
+			&import->out.va_pages,
+			&flags);
+
+	import->out.flags = flags;
+
+	return ret;
+}
+
+static int kbase_api_mem_flags_change(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_flags_change *change)
+{
+	return kbase_mem_flags_change(kctx, change->gpu_va,
+			change->flags, change->mask);
+}
+
+static int kbase_api_stream_create(struct kbase_context *kctx,
+		struct kbase_ioctl_stream_create *stream)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	int fd, ret;
+
+	/* Name must be NULL-terminated and padded with NULLs, so check last
+	 * character is NULL
+	 */
+	if (stream->name[sizeof(stream->name)-1] != 0)
+		return -EINVAL;
+
+	ret = kbase_sync_fence_stream_create(stream->name, &fd);
+
+	if (ret)
+		return ret;
+	return fd;
+#else
+	return -ENOENT;
+#endif
+}
+
+static int kbase_api_fence_validate(struct kbase_context *kctx,
+		struct kbase_ioctl_fence_validate *validate)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	return kbase_sync_fence_validate(validate->fd);
+#else
+	return -ENOENT;
+#endif
+}
+
+static int kbase_api_get_profiling_controls(struct kbase_context *kctx,
+		struct kbase_ioctl_get_profiling_controls *controls)
+{
+	int ret;
+
+	if (controls->count > (FBDUMP_CONTROL_MAX - FBDUMP_CONTROL_MIN))
+		return -EINVAL;
+
+	ret = copy_to_user(u64_to_user_ptr(controls->buffer),
+			&kctx->kbdev->kbase_profiling_controls[
+				FBDUMP_CONTROL_MIN],
+			controls->count * sizeof(u32));
+
+	if (ret)
+		return -EFAULT;
+	return 0;
+}
+
+static int kbase_api_mem_profile_add(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_profile_add *data)
+{
+	char *buf;
+	int err;
+
+	if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+		dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n");
+		return -EINVAL;
+	}
+
+	buf = kmalloc(data->len, GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(buf))
+		return -ENOMEM;
+
+	err = copy_from_user(buf, u64_to_user_ptr(data->buffer),
+			data->len);
+	if (err) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len);
+}
+
+static int kbase_api_soft_event_update(struct kbase_context *kctx,
+		struct kbase_ioctl_soft_event_update *update)
+{
+	if (update->flags != 0)
+		return -EINVAL;
+
+	return kbase_soft_event_update(kctx, update->event, update->new_status);
+}
+
+static int kbase_api_sticky_resource_map(struct kbase_context *kctx,
+		struct kbase_ioctl_sticky_resource_map *map)
+{
+	int ret;
+	u64 i;
+	u64 gpu_addr[BASE_EXT_RES_COUNT_MAX];
+
+	if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX)
+		return -EOVERFLOW;
+
+	ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address),
+			sizeof(u64) * map->count);
+
+	if (ret != 0)
+		return -EFAULT;
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (i = 0; i < map->count; i++) {
+		if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) {
+			/* Invalid resource */
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	if (ret != 0) {
+		while (i > 0) {
+			i--;
+			kbase_sticky_resource_release(kctx, NULL, gpu_addr[i]);
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
+static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx,
+		struct kbase_ioctl_sticky_resource_unmap *unmap)
+{
+	int ret;
+	u64 i;
+	u64 gpu_addr[BASE_EXT_RES_COUNT_MAX];
+
+	if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX)
+		return -EOVERFLOW;
+
+	ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address),
+			sizeof(u64) * unmap->count);
+
+	if (ret != 0)
+		return -EFAULT;
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (i = 0; i < unmap->count; i++) {
+		if (!kbase_sticky_resource_release(kctx, NULL, gpu_addr[i])) {
+			/* Invalid resource, but we keep going anyway */
+			ret = -EINVAL;
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
+#if MALI_UNIT_TEST
+static int kbase_api_tlstream_test(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_test *test)
+{
+	kbase_tlstream_test(
+			test->tpw_count,
+			test->msg_delay,
+			test->msg_count,
+			test->aux_msg);
+
+	return 0;
+}
+
+static int kbase_api_tlstream_stats(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_stats *stats)
+{
+	kbase_tlstream_stats(
+			&stats->bytes_collected,
+			&stats->bytes_generated);
+
+	return 0;
+}
+#endif /* MALI_UNIT_TEST */
+
+#define KBASE_HANDLE_IOCTL(cmd, function)                          \
+	do {                                                       \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE);          \
+		return function(kctx);                             \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_IN(cmd, function, type)                 \
+	do {                                                       \
+		type param;                                        \
+		int err;                                           \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE);         \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
+		err = copy_from_user(&param, uarg, sizeof(param)); \
+		if (err)                                           \
+			return -EFAULT;                            \
+		return function(kctx, &param);                     \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type)                \
+	do {                                                       \
+		type param;                                        \
+		int ret, err;                                      \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ);          \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
+		ret = function(kctx, &param);                      \
+		err = copy_to_user(uarg, &param, sizeof(param));   \
+		if (err)                                           \
+			return -EFAULT;                            \
+		return ret;                                        \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type)                  \
+	do {                                                           \
+		type param;                                            \
+		int ret, err;                                          \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));         \
+		err = copy_from_user(&param, uarg, sizeof(param));     \
+		if (err)                                               \
+			return -EFAULT;                                \
+		ret = function(kctx, &param);                          \
+		err = copy_to_user(uarg, &param, sizeof(param));       \
+		if (err)                                               \
+			return -EFAULT;                                \
+		return ret;                                            \
+	} while (0)
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	void __user *uarg = (void __user *)arg;
+
+	/* Only these ioctls are available until setup is complete */
+	switch (cmd) {
+	case KBASE_IOCTL_VERSION_CHECK:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK,
+				kbase_api_handshake,
+				struct kbase_ioctl_version_check);
+		break;
+
+	case KBASE_IOCTL_SET_FLAGS:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS,
+				kbase_api_set_flags,
+				struct kbase_ioctl_set_flags);
+		break;
+	}
+
+	/* Block call until version handshake and setup is complete */
+	if (kctx->api_version == 0 || !atomic_read(&kctx->setup_complete))
+		return -EINVAL;
+
+	/* Normal ioctls */
+	switch (cmd) {
+	case KBASE_IOCTL_JOB_SUBMIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT,
+				kbase_api_job_submit,
+				struct kbase_ioctl_job_submit);
+		break;
+	case KBASE_IOCTL_GET_GPUPROPS:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS,
+				kbase_api_get_gpuprops,
+				struct kbase_ioctl_get_gpuprops);
+		break;
+	case KBASE_IOCTL_POST_TERM:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM,
+				kbase_api_post_term);
+		break;
+	case KBASE_IOCTL_MEM_ALLOC:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC,
+				kbase_api_mem_alloc,
+				union kbase_ioctl_mem_alloc);
+		break;
+	case KBASE_IOCTL_MEM_QUERY:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY,
+				kbase_api_mem_query,
+				union kbase_ioctl_mem_query);
+		break;
+	case KBASE_IOCTL_MEM_FREE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE,
+				kbase_api_mem_free,
+				struct kbase_ioctl_mem_free);
+		break;
+	case KBASE_IOCTL_DISJOINT_QUERY:
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY,
+				kbase_api_disjoint_query,
+				struct kbase_ioctl_disjoint_query);
+		break;
+	case KBASE_IOCTL_GET_DDK_VERSION:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION,
+				kbase_api_get_ddk_version,
+				struct kbase_ioctl_get_ddk_version);
+		break;
+	case KBASE_IOCTL_MEM_JIT_INIT_OLD:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD,
+				kbase_api_mem_jit_init_old,
+				struct kbase_ioctl_mem_jit_init_old);
+		break;
+	case KBASE_IOCTL_MEM_JIT_INIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT,
+				kbase_api_mem_jit_init,
+				struct kbase_ioctl_mem_jit_init);
+		break;
+	case KBASE_IOCTL_MEM_SYNC:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC,
+				kbase_api_mem_sync,
+				struct kbase_ioctl_mem_sync);
+		break;
+	case KBASE_IOCTL_MEM_FIND_CPU_OFFSET:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET,
+				kbase_api_mem_find_cpu_offset,
+				union kbase_ioctl_mem_find_cpu_offset);
+		break;
+	case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET,
+				kbase_api_mem_find_gpu_start_and_offset,
+				union kbase_ioctl_mem_find_gpu_start_and_offset);
+		break;
+	case KBASE_IOCTL_GET_CONTEXT_ID:
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID,
+				kbase_api_get_context_id,
+				struct kbase_ioctl_get_context_id);
+		break;
+	case KBASE_IOCTL_TLSTREAM_ACQUIRE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE,
+				kbase_api_tlstream_acquire,
+				struct kbase_ioctl_tlstream_acquire);
+		break;
+	case KBASE_IOCTL_TLSTREAM_FLUSH:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH,
+				kbase_api_tlstream_flush);
+		break;
+	case KBASE_IOCTL_MEM_COMMIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT,
+				kbase_api_mem_commit,
+				struct kbase_ioctl_mem_commit);
+		break;
+	case KBASE_IOCTL_MEM_ALIAS:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS,
+				kbase_api_mem_alias,
+				union kbase_ioctl_mem_alias);
+		break;
+	case KBASE_IOCTL_MEM_IMPORT:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT,
+				kbase_api_mem_import,
+				union kbase_ioctl_mem_import);
+		break;
+	case KBASE_IOCTL_MEM_FLAGS_CHANGE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE,
+				kbase_api_mem_flags_change,
+				struct kbase_ioctl_mem_flags_change);
+		break;
+	case KBASE_IOCTL_STREAM_CREATE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE,
+				kbase_api_stream_create,
+				struct kbase_ioctl_stream_create);
+		break;
+	case KBASE_IOCTL_FENCE_VALIDATE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE,
+				kbase_api_fence_validate,
+				struct kbase_ioctl_fence_validate);
+		break;
+	case KBASE_IOCTL_GET_PROFILING_CONTROLS:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS,
+				kbase_api_get_profiling_controls,
+				struct kbase_ioctl_get_profiling_controls);
+		break;
+	case KBASE_IOCTL_MEM_PROFILE_ADD:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD,
+				kbase_api_mem_profile_add,
+				struct kbase_ioctl_mem_profile_add);
+		break;
+	case KBASE_IOCTL_SOFT_EVENT_UPDATE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE,
+				kbase_api_soft_event_update,
+				struct kbase_ioctl_soft_event_update);
+		break;
+	case KBASE_IOCTL_STICKY_RESOURCE_MAP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP,
+				kbase_api_sticky_resource_map,
+				struct kbase_ioctl_sticky_resource_map);
+		break;
+	case KBASE_IOCTL_STICKY_RESOURCE_UNMAP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP,
+				kbase_api_sticky_resource_unmap,
+				struct kbase_ioctl_sticky_resource_unmap);
+		break;
+
+	/* Instrumentation. */
+	case KBASE_IOCTL_HWCNT_READER_SETUP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
+				kbase_api_hwcnt_reader_setup,
+				struct kbase_ioctl_hwcnt_reader_setup);
+		break;
+	case KBASE_IOCTL_HWCNT_ENABLE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
+				kbase_api_hwcnt_enable,
+				struct kbase_ioctl_hwcnt_enable);
+		break;
+	case KBASE_IOCTL_HWCNT_DUMP:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
+				kbase_api_hwcnt_dump);
+		break;
+	case KBASE_IOCTL_HWCNT_CLEAR:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
+				kbase_api_hwcnt_clear);
+		break;
+#ifdef CONFIG_MALI_NO_MALI
+	case KBASE_IOCTL_HWCNT_SET:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET,
+				kbase_api_hwcnt_set,
+				struct kbase_ioctl_hwcnt_values);
+		break;
+#endif
+#ifdef CONFIG_MALI_JOB_DUMP
+	case KBASE_IOCTL_CINSTR_GWT_START:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START,
+				kbase_gpu_gwt_start);
+		break;
+	case KBASE_IOCTL_CINSTR_GWT_STOP:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP,
+				kbase_gpu_gwt_stop);
+		break;
+	case KBASE_IOCTL_CINSTR_GWT_DUMP:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP,
+				kbase_gpu_gwt_dump,
+				union kbase_ioctl_cinstr_gwt_dump);
+		break;
+#endif
+#if MALI_UNIT_TEST
+	case KBASE_IOCTL_TLSTREAM_TEST:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST,
+				kbase_api_tlstream_test,
+				struct kbase_ioctl_tlstream_test);
+		break;
+	case KBASE_IOCTL_TLSTREAM_STATS:
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS,
+				kbase_api_tlstream_stats,
+				struct kbase_ioctl_tlstream_stats);
+		break;
+#endif
+	}
+
+	dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd));
+
+	return -ENOIOCTLCMD;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+	.get_unmapped_area = kbase_get_unmapped_area,
+};
+
+/**
+ * show_policy - Show callback for the power_policy sysfs file.
+ *
+ * This function is called to get the contents of the power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * set_policy - Store callback for the power_policy sysfs file.
+ *
+ * This function is called when the power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls kbase_pm_set_policy() to change the
+ * policy.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/*
+ * The sysfs file power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/**
+ * show_ca_policy - Show callback for the core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * set_ca_policy - Store callback for the core_availability_policy sysfs file.
+ *
+ * This function is called when the core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls kbase_pm_set_policy() to change
+ * the policy.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/*
+ * The sysfs file core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/*
+ * show_core_mask - Show callback for the core_mask sysfs file.
+ *
+ * This function is called to get the contents of the core_mask sysfs file.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/**
+ * set_core_mask - Store callback for the core_mask sysfs file.
+ *
+ * This function is called when the core_mask sysfs file is written to.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	if (items == 1 || items == 3) {
+		u64 shader_present =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		u64 group0_core_mask =
+				kbdev->gpu_props.props.coherency_info.group[0].
+				core_mask;
+
+		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+				!(new_core_mask[0] & group0_core_mask) ||
+			(new_core_mask[1] & shader_present) !=
+						new_core_mask[1] ||
+				!(new_core_mask[1] & group0_core_mask) ||
+			(new_core_mask[2] & shader_present) !=
+						new_core_mask[2] ||
+				!(new_core_mask[2] & group0_core_mask)) {
+			dev_err(dev, "power_policy: invalid core specification\n");
+			return -EINVAL;
+		}
+
+		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+				kbdev->pm.debug_core_mask[1] !=
+						new_core_mask[1] ||
+				kbdev->pm.debug_core_mask[2] !=
+						new_core_mask[2]) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+					new_core_mask[1], new_core_mask[2]);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+		"Use format <core_mask>\n"
+		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+	return -EINVAL;
+}
+
+/*
+ * The sysfs file core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software jobs. Waiting soft event wait
+ * jobs will be cancelled after this period expires, while soft fence wait jobs
+ * will print debug information if the fence debug feature is enabled.
+ *
+ * This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_job_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_job_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) ||
+	    (soft_job_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+		   soft_job_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_job_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_job_timeout, set_soft_job_timeout);
+
+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
+				int default_ticks, u32 old_ticks)
+{
+	if (timeout_ms > 0) {
+		u64 ticks = timeout_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_period_ns);
+		if (!ticks)
+			return 1;
+		return ticks;
+	} else if (timeout_ms < 0) {
+		return default_ticks;
+	} else {
+		return old_ticks;
+	}
+}
+
+/**
+ * set_js_timeouts - Store callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS,
+ * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		struct kbasep_js_device_data *js_data = &kbdev->js_data;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
+	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
+			default, js_data->ticks_name); \
+	dev_dbg(kbdev->dev, "Overriding " #ticks_name \
+			" with %lu ticks (%lu ms)\n", \
+			(unsigned long)js_data->ticks_name, \
+			ms_name); \
+	} while (0)
+
+		UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms,
+				DEFAULT_JS_SOFT_STOP_TICKS);
+		UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl,
+				DEFAULT_JS_SOFT_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_HARD_STOP_TICKS_SS_8408 :
+				DEFAULT_JS_HARD_STOP_TICKS_SS);
+		UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl,
+				DEFAULT_JS_HARD_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping,
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING);
+		UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_RESET_TICKS_SS_8408 :
+				DEFAULT_JS_RESET_TICKS_SS);
+		UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl,
+				DEFAULT_JS_RESET_TICKS_CL);
+		UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping,
+				DEFAULT_JS_RESET_TICKS_DUMPING);
+
+		kbase_js_set_timeouts(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+static unsigned long get_js_timeout_in_ms(
+		u32 scheduling_period_ns,
+		u32 ticks)
+{
+	u64 ms = (u64)ticks * scheduling_period_ns;
+
+	do_div(ms, 1000000UL);
+	return ms;
+}
+
+/**
+ * show_js_timeouts - Show callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\
+		scheduling_period_ns, \
+		kbdev->js_data.name)
+
+	js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks);
+	js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl);
+	js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss);
+	js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl);
+	js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping);
+	js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss);
+	js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl);
+	js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef GET_TIMEOUT
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+static u32 get_new_js_timeout(
+		u32 old_period,
+		u32 old_ticks,
+		u32 new_scheduling_period_ns)
+{
+	u64 ticks = (u64)old_period * (u64)old_ticks;
+	do_div(ticks, new_scheduling_period_ns);
+	return ticks?ticks:1;
+}
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	struct kbasep_js_device_data *js_data;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	js_data = &kbdev->js_data;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&js_data->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	old_period = js_data->scheduling_period_ns;
+
+#define SET_TIMEOUT(name) \
+		(js_data->name = get_new_js_timeout(\
+				old_period, \
+				kbdev->js_data.name, \
+				new_scheduling_period_ns))
+
+	SET_TIMEOUT(soft_stop_ticks);
+	SET_TIMEOUT(soft_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_ss);
+	SET_TIMEOUT(hard_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_dumping);
+	SET_TIMEOUT(gpu_reset_ticks_ss);
+	SET_TIMEOUT(gpu_reset_ticks_cl);
+	SET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef SET_TIMEOUT
+
+	js_data->scheduling_period_ns = new_scheduling_period_ns;
+
+	kbase_js_set_timeouts(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&js_data->runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/**
+ * set_force_replay - Store callback for the force_replay sysfs file.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/**
+ * show_force_replay - Show callback for the force_replay sysfs file.
+ *
+ * This function is called to get the contents of the force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file force_replay.
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present.
+ * The ability to enable soft-stop when only a single context is present can be
+ * used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/* Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/**
+ * show_debug - Show callback for the debug_command sysfs file.
+ *
+ * This function is called to get the contents of the debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * issue_debug - Store callback for the debug_command sysfs file.
+ *
+ * This function is called when the debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @debug_commands to issue the command.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/* The sysfs file debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
+		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G72" },
+		{ .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G51" },
+		{ .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-TNOx" },
+		{ .id = GPU_ID2_PRODUCT_TDVX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G31" },
+		{ .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G52" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_max_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+/**
+ * show_lp_mem_pool_size - Show size of the large memory pages pool.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the number of large memory pages which currently populate the kbdev pool.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_lp_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_size(&kbdev->lp_mem_pool));
+}
+
+/**
+ * set_lp_mem_pool_size - Set size of the large memory pages pool.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the number of large memory pages which should populate the kbdev pool.
+ * This may cause existing pages to be removed from the pool, or new pages to be created and then added to the pool.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_lp_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	unsigned long new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->lp_mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size,
+		set_lp_mem_pool_size);
+
+/**
+ * show_lp_mem_pool_max_size - Show maximum size of the large memory pages pool.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the maximum number of large memory pages that the kbdev pool can possibly contain.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_max_size(&kbdev->lp_mem_pool));
+}
+
+/**
+ * set_lp_mem_pool_max_size - Set maximum size of the large memory pages pool.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the maximum number of large memory pages that the kbdev pool can possibly contain.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	unsigned long new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->lp_mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
+		set_lp_mem_pool_max_size);
+
+/**
+ * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs
+ *                               entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the context scheduling mode information.
+ *
+ * This function is called to get the context scheduling mode being used by JS.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_ctx_scheduling_mode(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode);
+}
+
+/**
+ * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs
+ *                              entry.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called when the js_ctx_scheduling_mode sysfs file is written
+ * to. It checks the data written, and if valid updates the ctx scheduling mode
+ * being by JS.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_ctx_scheduling_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbasep_kctx_list_element *element;
+	u32 new_js_ctx_scheduling_mode;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	int ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode);
+	if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) {
+		dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode"
+				" write operation.\n"
+				"Use format <js_ctx_scheduling_mode>\n");
+		return -EINVAL;
+	}
+
+	if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode)
+		return count;
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Update the context priority mode */
+	kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode;
+
+	/* Adjust priority of all the contexts as per the new mode */
+	list_for_each_entry(element, &kbdev->kctx_list, link)
+		kbase_js_update_ctx_priority(element->kctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode);
+
+	return count;
+}
+
+static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
+		show_js_ctx_scheduling_mode,
+		set_js_ctx_scheduling_mode);
+#ifdef CONFIG_DEBUG_FS
+
+/* Number of entries in serialize_jobs_settings[] */
+#define NR_SERIALIZE_JOBS_SETTINGS 5
+/* Maximum string length in serialize_jobs_settings[].name */
+#define MAX_SERIALIZE_JOBS_NAME_LEN 16
+
+static struct
+{
+	char *name;
+	u8 setting;
+} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = {
+	{"none", 0},
+	{"intra-slot", KBASE_SERIALIZE_INTRA_SLOT},
+	{"inter-slot", KBASE_SERIALIZE_INTER_SLOT},
+	{"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT},
+	{"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT |
+			KBASE_SERIALIZE_RESET}
+};
+
+/**
+ * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs
+ *                                  file
+ * @sfile: seq_file pointer
+ * @data:  Private callback data
+ *
+ * This function is called to get the contents of the serialize_jobs debugfs
+ * file. This is a list of the available settings with the currently active one
+ * surrounded by square brackets.
+ *
+ * Return: 0 on success, or an error code on error
+ */
+static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_device *kbdev = sfile->private;
+	int i;
+
+	CSTD_UNUSED(data);
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting)
+			seq_printf(sfile, "[%s] ",
+					serialize_jobs_settings[i].name);
+		else
+			seq_printf(sfile, "%s ",
+					serialize_jobs_settings[i].name);
+	}
+
+	seq_puts(sfile, "\n");
+
+	return 0;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs
+ *                                       debugfs file.
+ * @file:  File pointer
+ * @ubuf:  User buffer containing data to store
+ * @count: Number of bytes in user buffer
+ * @ppos:  File position
+ *
+ * This function is called when the serialize_jobs debugfs file is written to.
+ * It matches the requested setting against the available settings and if a
+ * matching setting is found updates kbdev->serialize_jobs.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	struct seq_file *s = file->private_data;
+	struct kbase_device *kbdev = s->private;
+	char buf[MAX_SERIALIZE_JOBS_NAME_LEN];
+	int i;
+	bool valid = false;
+
+	CSTD_UNUSED(ppos);
+
+	count = min_t(size_t, sizeof(buf) - 1, count);
+	if (copy_from_user(buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
+			kbdev->serialize_jobs =
+					serialize_jobs_settings[i].setting;
+			valid = true;
+			break;
+		}
+	}
+
+	if (!valid) {
+		dev_err(kbdev->dev, "serialize_jobs: invalid setting\n");
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs
+ *                                     debugfs file
+ * @in:   inode pointer
+ * @file: file pointer
+ *
+ * Return: Zero on success, error code on failure
+ */
+static int kbasep_serialize_jobs_debugfs_open(struct inode *in,
+		struct file *file)
+{
+	return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
+	.open = kbasep_serialize_jobs_debugfs_open,
+	.read = seq_read,
+	.write = kbasep_serialize_jobs_debugfs_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_DEBUG_FS */
+
+static int kbasep_protected_mode_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_OF
+	struct device_node *protected_node;
+	struct platform_device *pdev;
+	struct protected_mode_device *protected_dev;
+#endif
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native protected ops */
+		kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev),
+				GFP_KERNEL);
+		if (!kbdev->protected_dev)
+			return -ENOMEM;
+		kbdev->protected_dev->data = kbdev;
+		kbdev->protected_ops = &kbase_native_protected_ops;
+		kbdev->protected_mode_support = true;
+		return 0;
+	}
+
+	kbdev->protected_mode_support = false;
+
+#ifdef CONFIG_OF
+	protected_node = of_parse_phandle(kbdev->dev->of_node,
+			"protected-mode-switcher", 0);
+
+	if (!protected_node)
+		protected_node = of_parse_phandle(kbdev->dev->of_node,
+				"secure-mode-switcher", 0);
+
+	if (!protected_node) {
+		/* If protected_node cannot be looked up then we assume
+		 * protected mode is not supported on this platform. */
+		dev_info(kbdev->dev, "Protected mode not available\n");
+		return 0;
+	}
+
+	pdev = of_find_device_by_node(protected_node);
+	if (!pdev)
+		return -EINVAL;
+
+	protected_dev = platform_get_drvdata(pdev);
+	if (!protected_dev)
+		return -EPROBE_DEFER;
+
+	kbdev->protected_ops = &protected_dev->ops;
+	kbdev->protected_dev = protected_dev;
+
+	if (kbdev->protected_ops) {
+		int err;
+
+		/* Make sure protected mode is disabled on startup */
+		mutex_lock(&kbdev->pm.lock);
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* protected_mode_disable() returns -EINVAL if not supported */
+		kbdev->protected_mode_support = (err != -EINVAL);
+	}
+#endif
+	return 0;
+}
+
+static void kbasep_protected_mode_term(struct kbase_device *kbdev)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		kfree(kbdev->protected_dev);
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = 0;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return err;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = of_clk_get(kbdev->dev->of_node, 0);
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \
+	|| defined(LSK_OPPV2_BACKPORT)
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) || \
+		defined(LSK_OPPV2_BACKPORT)
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+MAKE_QUIRK_ACCESSORS(jm);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
+ * @file: File object to read is for
+ * @buf:  User buffer to populate with data
+ * @len:  Length of user buffer
+ * @ppos: Offset within file object
+ *
+ * Retrieves the current status of protected debug mode
+ * (0 = disabled, 1 = enabled)
+ *
+ * Return: Number of bytes added to user buffer
+ */
+static ssize_t debugfs_protected_debug_mode_read(struct file *file,
+				char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)file->private_data;
+	u32 gpu_status;
+	ssize_t ret_val;
+
+	kbase_pm_context_active(kbdev);
+	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL);
+	kbase_pm_context_idle(kbdev);
+
+	if (gpu_status & GPU_DBGEN)
+		ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2);
+	else
+		ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2);
+
+	return ret_val;
+}
+
+/*
+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops
+ *
+ * Contains the file operations for the "protected_debug_mode" debugfs file
+ */
+static const struct file_operations fops_protected_debug_mode = {
+	.open = simple_open,
+	.read = debugfs_protected_debug_mode_read,
+	.llseek = default_llseek,
+};
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_init(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+	kbasep_regs_history_debugfs_init(kbdev);
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+	kbase_as_fault_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	/* fops_* variables created by invocations of macro
+	 * MAKE_QUIRK_ACCESSORS() above. */
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+	debugfs_create_file("quirks_jm", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_jm_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+
+	debugfs_create_size_t("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_max_size_default);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		debugfs_create_file("protected_debug_mode", S_IRUGO,
+				kbdev->mali_debugfs_directory, kbdev,
+				&fops_protected_debug_mode);
+	}
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->inited_subsys & inited_devfreq)
+		kbase_ipa_debugfs_init(kbdev);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_serialize_jobs_debugfs_fops);
+#endif /* CONFIG_DEBUG_FS */
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev,
+		unsigned prod_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+
+	/* Only for tMIx :
+	 * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+	 * documented for tMIx so force correct value here.
+	 */
+	if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+		   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+				   GPU_ID2_PRODUCT_TMIX))
+		if (supported_coherency_bitmap ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE))
+			supported_coherency_bitmap |=
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_job_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_availability_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	&dev_attr_lp_mem_pool_size.attr,
+	&dev_attr_lp_mem_pool_max_size.attr,
+	&dev_attr_js_ctx_scheduling_mode.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kfree(kbdev->gpu_props.prop_buffer);
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_protected) {
+		kbasep_protected_mode_term(kbdev);
+		kbdev->inited_subsys &= ~inited_protected;
+	}
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_ctx_sched) {
+		kbase_ctx_sched_term(kbdev);
+		kbdev->inited_subsys &= ~inited_ctx_sched;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+	if (kbdev->inited_subsys & inited_io_history) {
+		kbase_io_history_term(&kbdev->io_history);
+		kbdev->inited_subsys &= ~inited_io_history;
+	}
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+
+/* Number of register accesses for the buffer that we allocate during
+ * initialization time. The buffer size can be changed later via debugfs. */
+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	unsigned prod_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+	err = kbase_io_history_init(&kbdev->io_history,
+			KBASEP_DEFAULT_REGISTER_HISTORY_SIZE);
+	if (err) {
+		dev_err(&pdev->dev, "Register access history initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+	kbdev->inited_subsys |= inited_io_history;
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain max configured gpu frequency, if devfreq is enabled then
+	 * this will be overridden by the highest operating point found
+	 */
+	core_props = &(kbdev->gpu_props.props.core_props);
+#ifdef GPU_FREQ_KHZ_MAX
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+#else
+	core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX;
+#endif
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	err = kbase_ctx_sched_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Context scheduler initialization failed (%d)\n",
+				err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_ctx_sched;
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, prod_id);
+
+	err = kbasep_protected_mode_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Protected mode subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_protected;
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+	/* Initialize the kctx list. This is used by vinstr. */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	/* Devfreq uses vinstr, so must be initialized after it. */
+	err = kbase_devfreq_init(kbdev);
+	if (!err)
+		kbdev->inited_subsys |= inited_devfreq;
+	else
+		dev_err(kbdev->dev, "Continuing without devfreq\n");
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->mdev.mode = 0666;
+	kbdev->inited_subsys |= inited_get_device;
+
+	/* This needs to happen before registering the device with misc_register(),
+	 * otherwise it causes a race condition between registering the device and a
+	 * uevent event being generated for userspace, causing udev rules to run
+	 * which might expect certain sysfs attributes present. As a result of the
+	 * race condition we avoid, some Mali sysfs entries may have appeared to
+	 * udev to not exist.
+
+	 * For more information, see
+	 * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the
+	 * paragraph that starts with "Word of warning", currently the second-last
+	 * paragraph.
+	 */
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	err = kbase_gpuprops_populate_user_buffer(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "GPU property population failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+
+	return err;
+}
+
+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE
+
+/**
+ * kbase_device_suspend - Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/**
+ * kbase_device_resume - Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @dev:  The device to resume
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_resume_device(kbdev->devfreq);
+#endif
+	return 0;
+}
+
+/**
+ * kbase_device_runtime_suspend - Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in
+ * which it will not be able to communicate with the CPU(s) and RAM due to
+ * power management.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/**
+ * kbase_device_runtime_resume - Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_resume_device(kbdev->devfreq);
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/* The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_register();
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&kbase_platform_driver);
+
+	if (ret)
+		kbase_platform_unregister();
+
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+	kbase_platform_unregister();
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
new file mode 100644
index 0000000..85a6afd
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
@@ -0,0 +1,208 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_ctx_sched.h"
+
+int kbase_ctx_sched_init(struct kbase_device *kbdev)
+{
+	int as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+
+	kbdev->as_free = as_present; /* All ASs initially free */
+
+	memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx));
+
+	return 0;
+}
+
+void kbase_ctx_sched_term(struct kbase_device *kbdev)
+{
+	s8 i;
+
+	/* Sanity checks */
+	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
+		WARN_ON(kbdev->as_to_kctx[i] != NULL);
+		WARN_ON(!(kbdev->as_free & (1u << i)));
+	}
+}
+
+/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space
+ *
+ * @kbdev: The context for which to find a free address space
+ *
+ * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID
+ *
+ * This function returns an address space available for use. It would prefer
+ * returning an AS that has been previously assigned to the context to
+ * avoid having to reprogram the MMU.
+ */
+static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+	int free_as;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* First check if the previously assigned AS is available */
+	if ((kctx->as_nr != KBASEP_AS_NR_INVALID) &&
+			(kbdev->as_free & (1u << kctx->as_nr)))
+		return kctx->as_nr;
+
+	/* The previously assigned AS was taken, we'll be returning any free
+	 * AS at this point.
+	 */
+	free_as = ffs(kbdev->as_free) - 1;
+	if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces)
+		return free_as;
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->pm.backend.gpu_powered);
+
+	if (atomic_inc_return(&kctx->refcount) == 1) {
+		int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx);
+
+		if (free_as != KBASEP_AS_NR_INVALID) {
+			kbdev->as_free &= ~(1u << free_as);
+			/* Only program the MMU if the context has not been
+			 * assigned the same address space before.
+			 */
+			if (free_as != kctx->as_nr) {
+				struct kbase_context *const prev_kctx =
+					kbdev->as_to_kctx[free_as];
+
+				if (prev_kctx) {
+					WARN_ON(atomic_read(&prev_kctx->refcount) != 0);
+					kbase_mmu_disable(prev_kctx);
+					prev_kctx->as_nr = KBASEP_AS_NR_INVALID;
+				}
+
+				kctx->as_nr = free_as;
+				kbdev->as_to_kctx[free_as] = kctx;
+				kbase_mmu_update(kctx);
+			}
+		} else {
+			atomic_dec(&kctx->refcount);
+
+			/* Failed to find an available address space, we must
+			 * be returning an error at this point.
+			 */
+			WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID);
+		}
+	}
+
+	return kctx->as_nr;
+}
+
+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	WARN_ON(atomic_read(&kctx->refcount) == 0);
+	WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
+	WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+
+	atomic_inc(&kctx->refcount);
+}
+
+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_dec_return(&kctx->refcount) == 0)
+		kbdev->as_free |= (1u << kctx->as_nr);
+}
+
+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(atomic_read(&kctx->refcount) != 0);
+
+	if (kctx->as_nr != KBASEP_AS_NR_INVALID) {
+		if (kbdev->pm.backend.gpu_powered)
+			kbase_mmu_disable(kctx);
+
+		kbdev->as_to_kctx[kctx->as_nr] = NULL;
+		kctx->as_nr = KBASEP_AS_NR_INVALID;
+	}
+}
+
+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
+{
+	s8 i;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->pm.backend.gpu_powered);
+
+	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
+		struct kbase_context *kctx;
+
+		kctx = kbdev->as_to_kctx[i];
+		if (kctx) {
+			if (atomic_read(&kctx->refcount)) {
+				WARN_ON(kctx->as_nr != i);
+
+				kbase_mmu_update(kctx);
+			} else {
+				/* This context might have been assigned an
+				 * AS before, clear it.
+				 */
+				kbdev->as_to_kctx[kctx->as_nr] = NULL;
+				kctx->as_nr = KBASEP_AS_NR_INVALID;
+			}
+		} else {
+			kbase_mmu_disable_as(kbdev, i);
+		}
+	}
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
new file mode 100755
index 0000000..ab57a0d
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
@@ -0,0 +1,135 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CTX_SCHED_H_
+#define _KBASE_CTX_SCHED_H_
+
+#include <mali_kbase.h>
+
+/**
+ * The Context Scheduler manages address space assignment and reference
+ * counting to kbase_context. The interface has been designed to minimise
+ * interactions between the Job Scheduler and Power Management/MMU to support
+ * the existing Job Scheduler interface.
+ *
+ * The initial implementation of the Context Scheduler does not schedule
+ * contexts. Instead it relies on the Job Scheduler to make decisions of
+ * when to schedule/evict contexts if address spaces are starved. In the
+ * future, once an interface between the CS and JS has been devised to
+ * provide enough information about how each context is consuming GPU resources,
+ * those decisions can be made in the CS itself, thereby reducing duplicated
+ * code.
+ */
+
+/**
+ * kbase_ctx_sched_init - Initialise the context scheduler
+ * @kbdev: The device for which the context scheduler needs to be initialised
+ *
+ * This must be called during device initialisation. The number of hardware
+ * address spaces must already be established before calling this function.
+ *
+ * Return: 0 for success, otherwise failure
+ */
+int kbase_ctx_sched_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ctx_sched_term - Terminate the context scheduler
+ * @kbdev: The device for which the context scheduler needs to be terminated
+ *
+ * This must be called during device termination after all contexts have been
+ * destroyed.
+ */
+void kbase_ctx_sched_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
+ * @kctx: The context to which to retain a reference
+ *
+ * This function should be called whenever an address space should be assigned
+ * to a context and programmed onto the MMU. It should typically be called
+ * when jobs are ready to be submitted to the GPU.
+ *
+ * It can be called as many times as necessary. The address space will be
+ * assigned to the context for as long as there is a reference to said context.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ *
+ * Return: The address space that the context has been assigned to or
+ *         KBASEP_AS_NR_INVALID if no address space was available.
+ */
+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_retain_ctx_refcount
+ * @kctx: The context to which to retain a reference
+ *
+ * This function only retains a reference to the context. It must be called
+ * only when the context already has a reference.
+ *
+ * This is typically called inside an atomic session where we know the context
+ * is already scheduled in but want to take an extra reference to ensure that
+ * it doesn't get descheduled.
+ *
+ * The kbase_device::hwaccess_lock must be held whilst calling this function
+ */
+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context
+ * @kctx: The context from which to release a reference
+ *
+ * This function should be called whenever an address space could be unassigned
+ * from a context. When there are no more references to said context, the
+ * address space previously assigned to this context shall be reassigned to
+ * other contexts as needed.
+ *
+ * The kbase_device::hwaccess_lock must be held whilst calling this function
+ */
+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space
+ * @kctx: The context to be removed
+ *
+ * This function should be called when a context is being destroyed. The
+ * context must no longer have any reference. If it has been assigned an
+ * address space before then the AS will be unprogrammed.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_restore_all_as - Reprogram all address spaces
+ * @kbdev: The device for which address spaces to be reprogrammed
+ *
+ * This function shall reprogram all address spaces previously assigned to
+ * contexts. It can be used after the GPU is reset.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CTX_SCHED_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100644
index 0000000..118f787
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100644
index 0000000..2fdb72d
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100644
index 0000000..d2c09d6
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,504 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				 kbase_is_job_fault_event_pending(kbdev)))
+			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		kbase_backend_cacheclean(kbdev, event->katom);
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100644
index 0000000..f5ab0a4
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,101 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100755
index 0000000..857fe97
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,311 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list)) {
+		kfree(data);
+		return NULL;
+	}
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = phys_to_page(as_phys_addr_t(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_zone_open(struct rb_root *rbtree,
+						struct debug_mem_data *mem_data)
+{
+	int ret = 0;
+	struct rb_node *p;
+	struct kbase_va_region *reg;
+	struct debug_mem_mapping *mapping;
+
+	for (p = rb_first(rbtree); p; p = rb_next(p)) {
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+out:
+	return ret;
+}
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
+	if (ret != 0) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUSR, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100644
index 0000000..886ca94
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,30 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100755
index 0000000..3df17ac
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,2265 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_gpuprops_types.h>
+#include <protected_mode_switcher.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sizes.h>
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#include <linux/bus_logger.h>
+#endif
+
+
+#if defined(CONFIG_SYNC)
+#include <sync.h>
+#else
+#include "mali_kbase_fence_defs.h"
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#define MIDGARD_MMU_LEVEL(x) (x)
+
+#define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(0)
+
+#define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3)
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps to
+ * disambiguate short-running job chains during soft/hard stopping of jobs
+ */
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+/* Serialize atoms within a slot (ie only one atom per job slot) */
+#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0)
+/* Serialize atoms between slots (ie only one job slot running at any time) */
+#define KBASE_SERIALIZE_INTER_SLOT (1 << 1)
+/* Reset the GPU after each atom completion */
+#define KBASE_SERIALIZE_RESET (1 << 2)
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+struct kbase_ipa_model_vinstr_data;
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * struct base_job_fault_event - keeps track of the atom which faulted or which
+ *                               completed after the faulty atom but before the
+ *                               debug data for faulty atom was dumped.
+ *
+ * @event_code:     event code for the atom, should != BASE_JD_EVENT_DONE for the
+ *                  atom which faulted.
+ * @katom:          pointer to the atom for which job fault occurred or which completed
+ *                  after the faulty atom.
+ * @job_fault_work: work item, queued only for the faulty atom, which waits for
+ *                  the dumping to get completed and then does the bottom half
+ *                  of job done for the atoms which followed the faulty atom.
+ * @head:           List head used to store the atom in the global list of faulty
+ *                  atoms or context specific list of atoms which got completed
+ *                  during the dump.
+ * @reg_offset:     offset of the register to be dumped next, only applicable for
+ *                  the faulty atom.
+ */
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+/**
+ * struct kbase_jd_atom_dependency - Contains the dependency info for an atom.
+ * @atom:          pointer to the dependee atom.
+ * @dep_type:      type of dependency on the dependee @atom, i.e. order or data
+ *                 dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency.
+ */
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * struct kbase_io_access - holds information about 1 register access
+ *
+ * @addr: first bit indicates r/w (r=0, w=1)
+ * @value: value written or read
+ */
+struct kbase_io_access {
+	uintptr_t addr;
+	u32 value;
+};
+
+/**
+ * struct kbase_io_history - keeps track of all recent register accesses
+ *
+ * @enabled: true if register accesses are recorded, false otherwise
+ * @lock: spinlock protecting kbase_io_access array
+ * @count: number of registers read/written
+ * @size: number of elements in kbase_io_access array
+ * @buf: array of kbase_io_access
+ */
+struct kbase_io_history {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool enabled;
+#else
+	u32 enabled;
+#endif
+
+	spinlock_t lock;
+	size_t count;
+	u16 size;
+	struct kbase_io_access *buf;
+};
+
+/**
+ * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the
+ *                           dependee atom.
+ * @dep:   pointer to the dependency info structure.
+ *
+ * Return: readonly reference to dependee atom.
+ */
+static inline const struct kbase_jd_atom *
+kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom *)(dep->atom);
+}
+
+/**
+ * kbase_jd_katom_dep_type -  Retrieves the dependency type info
+ *
+ * @dep:   pointer to the dependency info structure.
+ *
+ * Return: the type of dependency there is on the dependee atom.
+ */
+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * kbase_jd_katom_dep_set - sets up the dependency info structure
+ *                          as per the values passed.
+ * @const_dep:    pointer to the dependency info structure to be setup.
+ * @a:            pointer to the dependee atom.
+ * @type:         type of dependency there is on the dependee atom.
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * kbase_jd_katom_dep_clear - resets the dependency info structure
+ *
+ * @const_dep:    pointer to the dependency info structure to be setup.
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+/**
+ * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it becomes
+ *                                runnable, with respect to job slot ringbuffer/fifo.
+ * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, which
+ *                                implies that either atom has not become runnable
+ *                                due to dependency or has completed the execution
+ *                                on GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is blocked
+ *                                due to cross slot dependency, can't be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot fifo but
+ *                                is waiting for the completion of previously added atoms
+ *                                in current & other slots, as their protected mode
+ *                                requirements do not match with the current atom.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo and is
+ *                                waiting for completion of protected mode transition,
+ *                                needed before the atom is submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is waiting
+ *                                for the cores, which are needed to execute the job
+ *                                chain represented by the atom, to become available
+ * @KBASE_ATOM_GPU_RB_WAITING_AFFINITY: Atom is in slot fifo but is blocked on
+ *                                affinity due to rmu workaround for Hw issue 8987.
+ * @KBASE_ATOM_GPU_RB_READY:      Atom is in slot fifo and can be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_SUBMITTED:  Atom is in slot fifo and has been submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some failure,
+ *                                but only after the previously added atoms in fifo
+ *                                have completed or have also been returned to JS.
+ */
+enum kbase_atom_gpu_rb_state {
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	KBASE_ATOM_GPU_RB_READY,
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
+};
+
+/**
+ * enum kbase_atom_enter_protected_state - The state of an atom with respect to the
+ *                      preparation for GPU's entry into protected mode, becomes
+ *                      pertinent only after atom's state with respect to slot
+ *                      ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_ENTER_PROTECTED_CHECK:  Starting state. Check if there are any atoms
+ *                      currently submitted to GPU and protected mode transition is
+ *                      not already in progress.
+ * @KBASE_ATOM_ENTER_PROTECTED_VINSTR: Wait for vinstr to suspend before entry into
+ *                      protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
+ *                      for the coherency change. L2 shall be powered down and GPU shall
+ *                      come out of fully coherent mode before entering protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; Prepare coherency change and switch
+ *                      GPU to protected mode.
+ */
+enum kbase_atom_enter_protected_state {
+	/**
+	 * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
+	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
+	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
+	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
+};
+
+/**
+ * enum kbase_atom_exit_protected_state - The state of an atom with respect to the
+ *                      preparation for GPU's exit from protected mode, becomes
+ *                      pertinent only after atom's state with respect to slot
+ *                      ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any atoms
+ *                      currently submitted to GPU and protected mode transition is
+ *                      not already in progress.
+ * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
+ *                      for the reset, as exiting protected mode requires a reset.
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from protected mode
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to complete
+ */
+enum kbase_atom_exit_protected_state {
+	/**
+	 * NOTE: The integer value of this must match KBASE_ATOM_ENTER_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
+	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+	KBASE_ATOM_EXIT_PROTECTED_RESET,
+	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
+/**
+ * struct kbase_ext_res - Contains the info for external resources referred
+ *                        by an atom, which have been mapped on GPU side.
+ * @gpu_address:          Start address of the memory region allocated for
+ *                        the resource from GPU virtual address space.
+ * @alloc:                pointer to physical pages tracking object, set on
+ *                        mapping the external resource on GPU side.
+ */
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+/**
+ * struct kbase_jd_atom  - object representing the atom, containing the complete
+ *                         state and attributes of an atom.
+ * @work:                  work item for the bottom half processing of the atom,
+ *                         by JD or JS, after it got executed on GPU or the input
+ *                         fence got signaled
+ * @start_timestamp:       time at which the atom was submitted to the GPU, by
+ *                         updating the JS_HEAD_NEXTn register.
+ * @udata:                 copy of the user data sent for the atom in base_jd_submit.
+ * @kctx:                  Pointer to the base context with which the atom is associated.
+ * @dep_head:              Array of 2 list heads, pointing to the two list of atoms
+ *                         which are blocked due to dependency on this atom.
+ * @dep_item:              Array of 2 list heads, used to store the atom in the list of
+ *                         other atoms depending on the same dependee atom.
+ * @dep:                   Array containing the dependency info for the 2 atoms on which
+ *                         the atom depends upon.
+ * @jd_item:               List head used during job dispatch job_done processing - as
+ *                         dependencies may not be entirely resolved at this point,
+ *                         we need to use a separate list head.
+ * @in_jd_list:            flag set to true if atom's @jd_item is currently on a list,
+ *                         prevents atom being processed twice.
+ * @nr_extres:             number of external resources referenced by the atom.
+ * @extres:                pointer to the location containing info about @nr_extres
+ *                         external resources referenced by the atom.
+ * @device_nr:             indicates the coregroup with which the atom is associated,
+ *                         when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified.
+ * @affinity:              bitmask of the shader cores on which the atom can execute.
+ * @jc:                    GPU address of the job-chain.
+ * @softjob_data:          Copy of data read from the user space buffer that @jc
+ *                         points to.
+ * @coreref_state:         state of the atom with respect to retention of shader
+ *                         cores for affinity & power management.
+ * @fence:                 Stores either an input or output sync fence, depending
+ *                         on soft-job type
+ * @sync_waiter:           Pointer to the sync fence waiter structure passed to the
+ *                         callback function on signaling of the input fence.
+ * @dma_fence:             object containing pointers to both input & output fences
+ *                         and other related members used for explicit sync through
+ *                         soft jobs and for the implicit synchronization required
+ *                         on access to external resources.
+ * @event_code:            Event code for the job chain represented by the atom, both
+ *                         HW and low-level SW events are represented by event codes.
+ * @core_req:              bitmask of BASE_JD_REQ_* flags specifying either Hw or Sw
+ *                         requirements for the job chain represented by the atom.
+ * @ticks:                 Number of scheduling ticks for which atom has been running
+ *                         on the GPU.
+ * @sched_priority:        Priority of the atom for Job scheduling, as per the
+ *                         KBASE_JS_ATOM_SCHED_PRIO_*.
+ * @poking:                Indicates whether poking of MMU is ongoing for the atom,
+ *                         as a WA for the issue HW_ISSUE_8316.
+ * @completed:             Wait queue to wait upon for the completion of atom.
+ * @status:                Indicates at high level at what stage the atom is in,
+ *                         as per KBASE_JD_ATOM_STATE_*, that whether it is not in
+ *                         use or its queued in JD or given to JS or submitted to Hw
+ *                         or it completed the execution on Hw.
+ * @work_id:               used for GPU tracepoints, its a snapshot of the 'work_id'
+ *                         counter in kbase_jd_context which is incremented on
+ *                         every call to base_jd_submit.
+ * @slot_nr:               Job slot chosen for the atom.
+ * @atom_flags:            bitmask of KBASE_KATOM_FLAG* flags capturing the exact
+ *                         low level state of the atom.
+ * @retry_count:           Number of times this atom has been retried. Used by replay
+ *                         soft job.
+ * @gpu_rb_state:          bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking
+ *                         atom's state after it has entered Job scheduler on becoming
+ *                         runnable. Atom could be blocked due to cross slot dependency
+ *                         or waiting for the shader cores to become available or
+ *                         waiting for protected mode transitions to complete.
+ * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU
+ *                         cache is needed for the atom and the shader cores used
+ *                         for atom have been kept on.
+ * @blocked:               flag indicating that atom's resubmission to GPU is
+ *                         blocked till the work item is scheduled to return the
+ *                         atom to JS.
+ * @pre_dep:               Pointer to atom that this atom has same-slot dependency on
+ * @post_dep:              Pointer to atom that has same-slot dependency on this atom
+ * @x_pre_dep:             Pointer to atom that this atom has cross-slot dependency on
+ * @x_post_dep:            Pointer to atom that has cross-slot dependency on this atom
+ * @flush_id:              The GPU's flush count recorded at the time of submission,
+ *                         used for the cache flush optimisation
+ * @fault_event:           Info for dumping the debug data on Job fault.
+ * @queue:                 List head used for 4 different purposes :
+ *                         Adds atom to the list of dma-buf fence waiting atoms.
+ *                         Adds atom to the list of atoms blocked due to cross
+ *                         slot dependency.
+ *                         Adds atom to the list of softjob atoms for which JIT
+ *                         allocation has been deferred
+ *                         Adds atom to the list of softjob atoms waiting for the
+ *                         signaling of fence.
+ * @jit_node:              Used to keep track of all JIT free/alloc jobs in submission order
+ * @jit_blocked:           Flag indicating that JIT allocation requested through
+ *                         softjob atom will be reattempted after the impending
+ *                         free of other active JIT allocations.
+ * @will_fail_event_code:  If non-zero, this indicates that the atom will fail
+ *                         with the set event_code when the atom is processed.
+ *                         Used for special handling of atoms, which have a data
+ *                         dependency on the failed atoms.
+ * @protected_state:       State of the atom, as per KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*,
+ *                         when transitioning into or out of protected mode. Atom will
+ *                         be either entering or exiting the protected mode.
+ * @runnable_tree_node:    The node added to context's job slot specific rb tree
+ *                         when the atom becomes runnable.
+ * @age:                   Age of atom relative to other atoms in the context, is
+ *                         snapshot of the age_count counter in kbase context.
+ */
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+	struct list_head jd_item;
+	bool in_jd_list;
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	void *softjob_data;
+	enum kbase_atom_coreref_state coreref_state;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+	struct {
+		/* Use the functions/API defined in mali_kbase_fence.h to
+		 * when working with this sub struct */
+#if defined(CONFIG_SYNC_FILE)
+		/* Input fence */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+		struct fence *fence_in;
+#else
+		struct dma_fence *fence_in;
+#endif
+#endif
+		/* This points to the dma-buf output fence for this atom. If
+		 * this is NULL then there is no fence for this atom and the
+		 * following fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+		struct fence *fence;
+#else
+		struct dma_fence *fence;
+#endif
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;
+
+	u32 ticks;
+	int sched_priority;
+
+	int poking;
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	int slot_nr;
+
+	u32 atom_flags;
+
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	u64 need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	struct kbase_jd_atom *pre_dep;
+	struct kbase_jd_atom *post_dep;
+
+	struct kbase_jd_atom *x_pre_dep;
+	struct kbase_jd_atom *x_post_dep;
+
+	u32 flush_id;
+
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	struct list_head queue;
+
+	struct list_head jit_node;
+	bool jit_blocked;
+
+	enum base_jd_event_code will_fail_event_code;
+
+	union {
+		enum kbase_atom_enter_protected_state enter;
+		enum kbase_atom_exit_protected_state exit;
+	} protected_state;
+
+	struct rb_node runnable_tree_node;
+
+	u32 age;
+};
+
+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+/**
+ * struct kbase_jd_context  - per context object encapsulating all the Job dispatcher
+ *                            related state.
+ * @lock:                     lock to serialize the updates made to the Job dispatcher
+ *                            state and kbase_jd_atom objects.
+ * @sched_info:               Structure encapsulating all the Job scheduling info.
+ * @atoms:                    Array of the objects representing atoms, containing
+ *                            the complete state and attributes of an atom.
+ * @job_nr:                   Tracks the number of atoms being processed by the
+ *                            kbase. This includes atoms that are not tracked by
+ *                            scheduler: 'not ready to run' & 'dependency-only' jobs.
+ * @zero_jobs_wait:           Waitq that reflects whether there are no jobs
+ *                            (including SW-only dependency jobs). This is set
+ *                            when no jobs are present on the ctx, and clear when
+ *                            there are jobs.
+ *                            This must be updated atomically with @job_nr.
+ *                            note: Job Dispatcher knows about more jobs than the
+ *                            Job Scheduler as it is unaware of jobs that are
+ *                            blocked on dependencies and SW-only dependency jobs.
+ *                            This waitq can be waited upon to find out when the
+ *                            context jobs are all done/cancelled (including those
+ *                            that might've been blocked on dependencies) - and so,
+ *                            whether it can be terminated. However, it should only
+ *                            be terminated once it is not present in the run-pool.
+ *                            Since the waitq is only set under @lock, the waiter
+ *                            should also briefly obtain and drop @lock to guarantee
+ *                            that the setter has completed its work on the kbase_context
+ * @job_done_wq:              Workqueue to which the per atom work item is queued
+ *                            for bottom half processing when the atom completes
+ *                            execution on GPU or the input fence get signaled.
+ * @tb_lock:                  Lock to serialize the write access made to @tb to
+ *                            to store the register access trace messages.
+ * @tb:                       Pointer to the Userspace accessible buffer storing
+ *                            the trace messages for register read/write accesses
+ *                            made by the Kbase. The buffer is filled in circular
+ *                            fashion.
+ * @tb_wrap_offset:           Offset to the end location in the trace buffer, the
+ *                            write pointer is moved to the beginning on reaching
+ *                            this offset.
+ * @work_id:                  atomic variable used for GPU tracepoints, incremented
+ *                            on every call to base_jd_submit.
+ */
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	u32 job_nr;
+
+	wait_queue_head_t zero_jobs_wait;
+
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * struct kbase_as   - object representing an address space of GPU.
+ * @number:            Index at which this address space structure is present
+ *                     in an array of address space structures embedded inside the
+ *                     struct kbase_device.
+ * @pf_wq:             Workqueue for processing work items related to Bus fault
+ *                     and Page fault handling.
+ * @work_pagefault:    Work item for the Page fault handling.
+ * @work_busfault:     Work item for the Bus fault handling.
+ * @fault_type:        Type of fault which occured for this address space,
+ *                     regular/unexpected Bus or Page fault.
+ * @protected_mode:    Flag indicating whether the fault occurred in protected
+ *                     mode or not.
+ * @fault_status:      Records the fault status as reported by Hw.
+ * @fault_addr:        Records the faulting address.
+ * @fault_extra_addr:  Records the secondary fault address.
+ * @current_setup:     Stores the MMU configuration for this address space.
+ * @poke_wq:           Workqueue to process the work items queue for poking the
+ *                     MMU as a WA for BASE_HW_ISSUE_8316.
+ * @poke_work:         Work item to do the poking of MMU for this address space.
+ * @poke_refcount:     Refcount for the need of poking MMU. While the refcount is
+ *                     non zero the poking of MMU will continue.
+ *                     Protected by hwaccess_lock.
+ * @poke_state:        State indicating whether poking is in progress or it has
+ *                     been stopped. Protected by hwaccess_lock.
+ * @poke_timer:        Timer used to schedule the poking at regular intervals.
+ */
+struct kbase_as {
+	int number;
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	bool protected_mode;
+	u32 fault_status;
+	u64 fault_addr;
+	u64 fault_extra_addr;
+	struct kbase_mmu_setup current_setup;
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	int poke_refcount;
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+/**
+ * struct kbase_trace - object representing a trace message added to trace buffer
+ *                      kbase_device::trace_rbuf
+ * @timestamp:          CPU timestamp at which the trace message was added.
+ * @thread_id:          id of the thread in the context of which trace message
+ *                      was added.
+ * @cpu:                indicates which CPU the @thread_id was scheduled on when
+ *                      the trace message was added.
+ * @ctx:                Pointer to the kbase context for which the trace message
+ *                      was added. Will be NULL for certain trace messages like
+ *                      for traces added corresponding to power management events.
+ *                      Will point to the appropriate context corresponding to
+ *                      job-slot & context's reference count related events.
+ * @katom:              indicates if the trace message has atom related info.
+ * @atom_number:        id of the atom for which trace message was added.
+ *                      Only valid if @katom is true.
+ * @atom_udata:         Copy of the user data sent for the atom in base_jd_submit.
+ *                      Only valid if @katom is true.
+ * @gpu_addr:           GPU address of the job-chain represented by atom. Could
+ *                      be valid even if @katom is false.
+ * @info_val:           value specific to the type of event being traced. For the
+ *                      case where @katom is true, will be set to atom's affinity,
+ *                      i.e. bitmask of shader cores chosen for atom's execution.
+ * @code:               Identifies the event, refer enum kbase_trace_code.
+ * @jobslot:            job-slot for which trace message was added, valid only for
+ *                      job-slot management events.
+ * @refcount:           reference count for the context, valid for certain events
+ *                      related to scheduler core and policy.
+ * @flags:              indicates if info related to @jobslot & @refcount is present
+ *                      in the trace message, used during dumping of the message.
+ */
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold hwaccess_lock when accessing this */
+	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - true indicates that the transition is ongoing
+	 * Expected to be protected by hwaccess_lock */
+	bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:        Kbase device where memory is used
+ * @cur_size:     Number of free pages currently in the pool (may exceed
+ *                @max_size in some corner cases)
+ * @max_size:     Maximum number of free pages in the pool
+ * @order:        order = 0 refers to a pool of 4 KB pages
+ *                order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
+ * @pool_lock:    Lock protecting the pool - must be held when modifying
+ *                @cur_size and @page_list
+ * @page_list:    List of free pages in the pool
+ * @reclaim:      Shrinker for kernel reclaim of free pages
+ * @next_pool:    Pointer to next pool where pages can be allocated when this
+ *                pool is empty. Pages will spill over to the next pool when
+ *                this pool is full. Can be NULL if there is no next pool.
+ * @dying:        true if the pool is being terminated, and any ongoing
+ *                operations should be abandoned
+ * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from
+ *                this pool, eg during a grow operation
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	size_t		    order;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+
+	bool dying;
+	bool dont_reclaim;
+};
+
+/**
+ * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP
+ *                            frequency, and real frequency and core mask
+ * @opp_freq:  Nominal OPP frequency
+ * @real_freq: Real GPU frequency
+ * @core_mask: Shader core mask
+ */
+struct kbase_devfreq_opp {
+	u64 opp_freq;
+	u64 real_freq;
+	u64 core_mask;
+};
+
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_context *kctx);
+	void (*get_as_setup)(struct kbase_context *kctx,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate, unsigned int level);
+	int (*pte_is_valid)(u64 pte, unsigned int level);
+	void (*entry_set_ate)(u64 *entry, struct tagged_addr phy,
+			unsigned long flags, unsigned int level);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+
+#define DEVNAME_SIZE	16
+
+/**
+ * struct kbase_device   - Object representing an instance of GPU platform device,
+ *                         allocated from the probe method of mali driver.
+ * @hw_quirks_sc:          Configuration to be used for the shader cores as per
+ *                         the HW issues present in the GPU.
+ * @hw_quirks_tiler:       Configuration to be used for the Tiler as per the HW
+ *                         issues present in the GPU.
+ * @hw_quirks_mmu:         Configuration to be used for the MMU as per the HW
+ *                         issues present in the GPU.
+ * @hw_quirks_jm:          Configuration to be used for the Job Manager as per
+ *                         the HW issues present in the GPU.
+ * @entry:                 Links the device instance to the global list of GPU
+ *                         devices. The list would have as many entries as there
+ *                         are GPU device instances.
+ * @dev:                   Pointer to the kernel's generic/base representation
+ *                         of the GPU platform device.
+ * @mdev:                  Pointer to the miscellaneous device registered to
+ *                         provide Userspace access to kernel driver through the
+ *                         device file /dev/malixx.
+ * @reg_start:             Base address of the region in physical address space
+ *                         where GPU registers have been mapped.
+ * @reg_size:              Size of the region containing GPU registers
+ * @reg:                   Kernel virtual address of the region containing GPU
+ *                         registers, using which Driver will access the registers.
+ * @irqs:                  Array containing IRQ resource info for 3 types of
+ *                         interrupts : Job scheduling, MMU & GPU events (like
+ *                         power management, cache etc.)
+ * @clock:                 Pointer to the input clock resource (having an id of 0),
+ *                         referenced by the GPU device node.
+ * @regulator:             Pointer to the struct corresponding to the regulator
+ *                         for GPU device
+ * @devname:               string containing the name used for GPU device instance,
+ *                         miscellaneous device is registered using the same name.
+ * @model:                 Pointer, valid only when Driver is compiled to not access
+ *                         the real GPU Hw, to the dummy model which tries to mimic
+ *                         to some extent the state & behavior of GPU Hw in response
+ *                         to the register accesses made by the Driver.
+ * @irq_slab:              slab cache for allocating the work items queued when
+ *                         model mimics raising of IRQ to cause an interrupt on CPU.
+ * @irq_workq:             workqueue for processing the irq work items.
+ * @serving_job_irq:       function to execute work items queued when model mimics
+ *                         the raising of JS irq, mimics the interrupt handler
+ *                         processing JS interrupts.
+ * @serving_gpu_irq:       function to execute work items queued when model mimics
+ *                         the raising of GPU irq, mimics the interrupt handler
+ *                         processing GPU interrupts.
+ * @serving_mmu_irq:       function to execute work items queued when model mimics
+ *                         the raising of MMU irq, mimics the interrupt handler
+ *                         processing MMU interrupts.
+ * @reg_op_lock:           lock used by model to serialize the handling of register
+ *                         accesses made by the driver.
+ * @pm:                    Per device object for storing data for power management
+ *                         framework.
+ * @js_data:               Per device object encapsulating the current context of
+ *                         Job Scheduler, which is global to the device and is not
+ *                         tied to any particular struct kbase_context running on
+ *                         the device
+ * @mem_pool:              Object containing the state for global pool of 4KB size
+ *                         physical pages which can be used by all the contexts.
+ * @lp_mem_pool:           Object containing the state for global pool of 2MB size
+ *                         physical pages which can be used by all the contexts.
+ * @memdev:                keeps track of the in use physical pages allocated by
+ *                         the Driver.
+ * @mmu_mode:              Pointer to the object containing methods for programming
+ *                         the MMU, depending on the type of MMU supported by Hw.
+ * @as:                    Array of objects representing address spaces of GPU.
+ * @as_free:               Bitpattern of free/available address space lots
+ * @as_to_kctx:            Array of pointers to struct kbase_context, having
+ *                         GPU adrress spaces assigned to them.
+ * @mmu_mask_change:       Lock to serialize the access to MMU interrupt mask
+ *                         register used in the handling of Bus & Page faults.
+ * @gpu_props:             Object containing complete information about the
+ *                         configuration/properties of GPU HW device in use.
+ * @hw_issues_mask:        List of SW workarounds for HW issues
+ * @hw_features_mask:      List of available HW features.
+ * shader_inuse_bitmap:    Bitmaps of shader cores that are currently in use.
+ *                         These should be kept up to date by the job scheduler.
+ *                         The bit to be set in this bitmap should already be set
+ *                         in the @shader_needed_bitmap.
+ *                         @pm.power_change_lock should be held when accessing
+ *                         these members.
+ * @shader_inuse_cnt:      Usage count for each of the 64 shader cores
+ * @shader_needed_bitmap:  Bitmaps of cores the JS needs for jobs ready to run
+ *                         kbase_pm_check_transitions_nolock() should be called
+ *                         when the bitmap is modified to update the power
+ *                         management system and allow transitions to occur.
+ * @shader_needed_cnt:     Count for each of the 64 shader cores, incremented
+ *                         when the core is requested for use and decremented
+ *                         later when the core is known to be powered up for use.
+ * @tiler_inuse_cnt:       Usage count for the Tiler block. @tiler_needed_cnt
+ *                         should be non zero at the time of incrementing the
+ *                         usage count.
+ * @tiler_needed_cnt:      Count for the Tiler block shader cores, incremented
+ *                         when Tiler is requested for use and decremented
+ *                         later when Tiler is known to be powered up for use.
+ * @disjoint_event:        struct for keeping track of the disjoint information,
+ *                         that whether the GPU is in a disjoint state and the
+ *                         number of disjoint events that have occurred on GPU.
+ * @l2_users_count:        Refcount for tracking users of the l2 cache, e.g.
+ *                         when using hardware counter instrumentation.
+ * @shader_available_bitmap: Bitmap of shader cores that are currently available,
+ *                         powered up and the power policy is happy for jobs
+ *                         to be submitted to these cores. These are updated
+ *                         by the power management code. The job scheduler
+ *                         should avoid submitting new jobs to any cores
+ *                         that are not marked as available.
+ * @tiler_available_bitmap: Bitmap of tiler units that are currently available.
+ * @l2_available_bitmap:    Bitmap of the currently available Level 2 caches.
+ * @stack_available_bitmap: Bitmap of the currently available Core stacks.
+ * @shader_ready_bitmap:    Bitmap of shader cores that are ready (powered on)
+ * @shader_transitioning_bitmap: Bitmap of shader cores that are currently changing
+ *                         power state.
+ * @nr_hw_address_spaces:  Number of address spaces actually available in the
+ *                         GPU, remains constant after driver initialisation.
+ * @nr_user_address_spaces: Number of address spaces available to user contexts
+ * @hwcnt:                  Structure used for instrumentation and HW counters
+ *                         dumping
+ * @vinstr_ctx:            vinstr context created per device
+ * @trace_lock:            Lock to serialize the access to trace buffer.
+ * @trace_first_out:       Index/offset in the trace buffer at which the first
+ *                         unread message is present.
+ * @trace_next_in:         Index/offset in the trace buffer at which the new
+ *                         message will be written.
+ * @trace_rbuf:            Pointer to the buffer storing debug messages/prints
+ *                         tracing the various events in Driver.
+ *                         The buffer is filled in circular fashion.
+ * @reset_timeout_ms:      Number of milliseconds to wait for the soft stop to
+ *                         complete for the GPU jobs before proceeding with the
+ *                         GPU reset.
+ * @cacheclean_lock:       Lock to serialize the clean & invalidation of GPU caches,
+ *                         between Job Manager backend & Instrumentation code.
+ * @platform_context:      Platform specific private data to be accessed by
+ *                         platform specific config files only.
+ * @kctx_list:             List of kbase_contexts created for the device, including
+ *                         the kbase_context created for vinstr_ctx.
+ * @kctx_list_lock:        Lock protecting concurrent accesses to @kctx_list.
+ * @devfreq_profile:       Describes devfreq profile for the Mali GPU device, passed
+ *                         to devfreq_add_device() to add devfreq feature to Mali
+ *                         GPU device.
+ * @devfreq:               Pointer to devfreq structure for Mali GPU device,
+ *                         returned on the call to devfreq_add_device().
+ * @current_freq:          The real frequency, corresponding to @current_nominal_freq,
+ *                         at which the Mali GPU device is currently operating, as
+ *                         retrieved from @opp_table in the target callback of
+ *                         @devfreq_profile.
+ * @current_nominal_freq:  The nominal frequency currently used for the Mali GPU
+ *                         device as retrieved through devfreq_recommended_opp()
+ *                         using the freq value passed as an argument to target
+ *                         callback of @devfreq_profile
+ * @current_voltage:       The voltage corresponding to @current_nominal_freq, as
+ *                         retrieved through dev_pm_opp_get_voltage().
+ * @current_core_mask:     bitmask of shader cores that are currently desired &
+ *                         enabled, corresponding to @current_nominal_freq as
+ *                         retrieved from @opp_table in the target callback of
+ *                         @devfreq_profile.
+ * @opp_table:             Pointer to the lookup table for converting between nominal
+ *                         OPP (operating performance point) frequency, and real
+ *                         frequency and core mask. This table is constructed according
+ *                         to operating-points-v2-mali table in devicetree.
+ * @num_opps:              Number of operating performance points available for the Mali
+ *                         GPU device.
+ * @devfreq_cooling:       Pointer returned on registering devfreq cooling device
+ *                         corresponding to @devfreq.
+ * @ipa_use_configured_model: set to TRUE when configured model is used for IPA and
+ *                         FALSE when fallback model is used.
+ * @ipa:                   Top level structure for IPA, containing pointers to both
+ *                         configured & fallback models.
+ * @timeline:              Stores the global timeline tracking information.
+ * @job_fault_debug:       Flag to control the dumping of debug data for job faults,
+ *                         set when the 'job_fault' debugfs file is opened.
+ * @mali_debugfs_directory: Root directory for the debugfs files created by the driver
+ * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing
+ *                         a sub-directory for every context.
+ * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault
+ *                         has occurred.
+ * @job_fault_wq:          Waitqueue to block the job fault dumping daemon till the
+ *                         occurrence of a job fault.
+ * @job_fault_resume_wq:   Waitqueue on which every context with a faulty job wait
+ *                         for the job fault dumping to complete before they can
+ *                         do bottom half of job done for the atoms which followed
+ *                         the faulty atom.
+ * @job_fault_resume_workq: workqueue to process the work items queued for the faulty
+ *                         atoms, whereby the work item function waits for the dumping
+ *                         to get completed.
+ * @job_fault_event_list:  List of atoms, each belonging to a different context, which
+ *                         generated a job fault.
+ * @job_fault_event_lock:  Lock to protect concurrent accesses to @job_fault_event_list
+ * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs
+ *                         file "read_register".
+ * @kbase_profiling_controls: Profiling controls set by gator to control frame buffer
+ *                         dumping and s/w counter reporting.
+ * @force_replay_limit:    Number of gpu jobs, having replay atoms associated with them,
+ *                         that are run before a job is forced to fail and replay.
+ *                         Set to 0 to disable forced failures.
+ * @force_replay_count:    Count of gpu jobs, having replay atoms associated with them,
+ *                         between forced failures. Incremented on each gpu job which
+ *                         has replay atoms dependent on it. A gpu job is forced to
+ *                         fail once this is greater than or equal to @force_replay_limit
+ * @force_replay_core_req: Core requirements, set through the sysfs file, for the replay
+ *                         job atoms to consider the associated gpu job for forceful
+ *                         failure and replay. May be zero
+ * @force_replay_random:   Set to 1 to randomize the @force_replay_limit, in the
+ *                         range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+ * @ctx_num:               Total number of contexts created for the device.
+ * @io_history:            Pointer to an object keeping a track of all recent
+ *                         register accesses. The history of register accesses
+ *                         can be read through "regs_history" debugfs file.
+ * @hwaccess:              Contains a pointer to active kbase context and GPU
+ *                         backend specific data for HW access layer.
+ * @faults_pending:        Count of page/bus faults waiting for bottom half processing
+ *                         via workqueues.
+ * @poweroff_pending:      Set when power off operation for GPU is started, reset when
+ *                         power on for GPU is started.
+ * @infinite_cache_active_default: Set to enable using infinite cache for all the
+ *                         allocations of a new context.
+ * @mem_pool_max_size_default: Initial/default value for the maximum size of both
+ *                         types of pool created for a new context.
+ * @current_gpu_coherency_mode: coherency mode in use, which can be different
+ *                         from @system_coherency, when using protected mode.
+ * @system_coherency:      coherency mode as retrieved from the device tree.
+ * @cci_snoop_enabled:     Flag to track when CCI snoops have been enabled.
+ * @snoop_enable_smc:      SMC function ID to call into Trusted firmware to
+ *                         enable cache snooping. Value of 0 indicates that it
+ *                         is not used.
+ * @snoop_disable_smc:     SMC function ID to call disable cache snooping.
+ * @protected_ops:         Pointer to the methods for switching in or out of the
+ *                         protected mode, as per the @protected_dev being used.
+ * @protected_dev:         Pointer to the protected mode switcher device attached
+ *                         to the GPU device retrieved through device tree if
+ *                         GPU do not support protected mode switching natively.
+ * @protected_mode:        set to TRUE when GPU is put into protected mode
+ * @protected_mode_transition: set to TRUE when GPU is transitioning into or
+ *                         out of protected mode.
+ * @protected_mode_support: set to true if protected mode is supported.
+ * @buslogger:              Pointer to the structure required for interfacing
+ *                          with the bus logger module to set the size of buffer
+ *                          used by the module for capturing bus logs.
+ * @irq_reset_flush:        Flag to indicate that GPU reset is in-flight and flush of
+ *                          IRQ + bottom half is being done, to prevent the writes
+ *                          to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers.
+ * @inited_subsys:          Bitmap of inited sub systems at the time of device probe.
+ *                          Used during device remove or for handling error in probe.
+ * @hwaccess_lock:          Lock, which can be taken from IRQ context, to serialize
+ *                          the updates made to Job dispatcher + scheduler states.
+ * @mmu_hw_mutex:           Protects access to MMU operations and address space
+ *                          related state.
+ * @serialize_jobs:         Currently used mode for serialization of jobs, both
+ *                          intra & inter slots serialization is supported.
+ * @backup_serialize_jobs:  Copy of the original value of @serialize_jobs taken
+ *                          when GWT is enabled. Used to restore the original value
+ *                          on disabling of GWT.
+ * @js_ctx_scheduling_mode: Context scheduling mode currently being used by
+ *                          Job Scheduler
+ */
+struct kbase_device {
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+
+	struct clk *clock;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif	/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool mem_pool;
+	struct kbase_mem_pool lp_mem_pool;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+	u16 as_free; /* Bitpattern of free Address Spaces */
+	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
+
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	u64 shader_inuse_bitmap;
+
+	u32 shader_inuse_cnt[64];
+
+	u64 shader_needed_bitmap;
+
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	u32 l2_users_count;
+
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+	u64 stack_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;
+	s8 nr_user_address_spaces;
+
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	void *platform_context;
+
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_nominal_freq;
+	unsigned long current_voltage;
+	u64 current_core_mask;
+	struct kbase_devfreq_opp *opp_table;
+	int num_opps;
+	struct kbasep_pm_metrics last_devfreq_metrics;
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	struct devfreq_cooling_device *devfreq_cooling;
+#else
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+	atomic_t ipa_use_configured_model;
+	struct {
+		/* Access to this struct must be with ipa.lock held */
+		struct mutex lock;
+		struct kbase_ipa_model *configured_model;
+		struct kbase_ipa_model *fallback_model;
+
+		/* Values of the PM utilization metrics from last time the
+		 * power model was invoked. The utilization is calculated as
+		 * the difference between last_metrics and the current values.
+		 */
+		struct kbasep_pm_metrics last_metrics;
+
+		/*
+		 * gpu_active_callback - Inform IPA that GPU is now active
+		 * @model_data: Pointer to model data
+		 */
+		void (*gpu_active_callback)(
+				struct kbase_ipa_model_vinstr_data *model_data);
+
+		/*
+		 * gpu_idle_callback - Inform IPA that GPU is now idle
+		 * @model_data: Pointer to model data
+		 */
+		void (*gpu_idle_callback)(
+				struct kbase_ipa_model_vinstr_data *model_data);
+
+		/* Model data to pass to ipa_gpu_active/idle() */
+		struct kbase_ipa_model_vinstr_data *model_data;
+
+		/* true if IPA is currently using vinstr */
+		bool vinstr_active;
+	} ipa;
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *mali_debugfs_directory;
+	struct dentry *debugfs_ctx_directory;
+
+#ifdef CONFIG_MALI_DEBUG
+	u64 debugfs_as_read_bitmap;
+#endif /* CONFIG_MALI_DEBUG */
+
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
+
+#if !MALI_CUSTOMER_RELEASE
+	struct {
+		u16 reg_offset;
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	int force_replay_limit;
+	int force_replay_count;
+	base_jd_core_req force_replay_core_req;
+	bool force_replay_random;
+#endif
+
+	atomic_t ctx_num;
+
+#ifdef CONFIG_DEBUG_FS
+	struct kbase_io_history io_history;
+#endif /* CONFIG_DEBUG_FS */
+
+	struct kbase_hwaccess_data hwaccess;
+
+	atomic_t faults_pending;
+
+	bool poweroff_pending;
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	size_t mem_pool_max_size_default;
+
+	u32 current_gpu_coherency_mode;
+	u32 system_coherency;
+
+	bool cci_snoop_enabled;
+
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	struct protected_mode_ops *protected_ops;
+
+	struct protected_mode_device *protected_dev;
+
+	bool protected_mode;
+
+	bool protected_mode_transition;
+
+	bool protected_mode_support;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	struct bus_logger_client *buslogger;
+#endif
+
+	bool irq_reset_flush;
+
+	u32 inited_subsys;
+
+	spinlock_t hwaccess_lock;
+
+	struct mutex mmu_hw_mutex;
+
+	/* See KBASE_SERIALIZE_* for details */
+	u8 serialize_jobs;
+
+#ifdef CONFIG_MALI_JOB_DUMP
+	u8 backup_serialize_jobs;
+#endif
+
+	/* See KBASE_JS_*_PRIORITY_MODE for details. */
+	u32 js_ctx_scheduling_mode;
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ *                 job slot.
+ * @x_dep_head:    Head item of the linked list of atoms blocked on cross-slot
+ *                 dependencies. Atoms on this list will be moved to the
+ *                 runnable_tree when the blocking atom completes.
+ *
+ * hwaccess_lock must be held when accessing this structure.
+ */
+struct jsctx_queue {
+	struct rb_root runnable_tree;
+	struct list_head x_dep_head;
+};
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+/**
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
+ * context, to disable use of implicit dma-buf fences. This is used to avoid
+ * potential synchronization deadlocks.
+ *
+ * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory
+ * allocations. For 64-bit clients it is enabled by default, and disabled by
+ * default on 32-bit clients. Being able to clear this flag is only used for
+ * testing purposes of the custom zone allocation on 64-bit user-space builds,
+ * where we also require more control than is available through e.g. the JIT
+ * allocation mechanism. However, the 64-bit user-space client must still
+ * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled
+ * from it for job slot 0. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled
+ * from it for job slot 1. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled
+ * from it for job slot 2. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum kbase_context_flags {
+	KCTX_COMPAT = 1U << 0,
+	KCTX_RUNNABLE_REF = 1U << 1,
+	KCTX_ACTIVE = 1U << 2,
+	KCTX_PULLED = 1U << 3,
+	KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+	KCTX_INFINITE_CACHE = 1U << 5,
+	KCTX_SUBMIT_DISABLED = 1U << 6,
+	KCTX_PRIVILEGED = 1U << 7,
+	KCTX_SCHEDULED = 1U << 8,
+	KCTX_DYING = 1U << 9,
+	KCTX_NO_IMPLICIT_SYNC = 1U << 10,
+	KCTX_FORCE_SAME_VA = 1U << 11,
+	KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
+	KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
+	KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
+};
+
+struct kbase_sub_alloc {
+	struct list_head link;
+	struct page *page;
+	DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
+};
+
+/**
+ * struct kbase_context - Object representing an entity, among which GPU is
+ *                        scheduled and gets its own GPU address space.
+ *                        Created when the device file /dev/malixx is opened.
+ * @filp:                 Pointer to the struct file corresponding to device file
+ *                        /dev/malixx instance, passed to the file's open method.
+ * @kbdev:                Pointer to the Kbase device for which the context is created.
+ * @id:                   Unique indentifier for the context, indicates the number of
+ *                        contexts which have been created for the device so far.
+ * @api_version:          contains the version number for User/kernel interface,
+ *                        used for compatibility check.
+ * @pgd:                  Physical address of the page allocated for the top level
+ *                        page table of the context, this will be used for MMU Hw
+ *                        programming as the address translation will start from
+ *                        the top level page table.
+ * @event_list:           list of posted events about completed atoms, to be sent to
+ *                        event handling thread of Userpsace.
+ * @event_coalesce_list:  list containing events corresponding to successive atoms
+ *                        which have requested deferred delivery of the completion
+ *                        events to Userspace.
+ * @event_mutex:          Lock to protect the concurrent access to @event_list &
+ *                        @event_mutex.
+ * @event_closed:         Flag set through POST_TERM ioctl, indicates that Driver
+ *                        should stop posting events and also inform event handling
+ *                        thread that context termination is in progress.
+ * @event_workq:          Workqueue for processing work items corresponding to atoms
+ *                        that do not return an event to Userspace or have to perform
+ *                        a replay job
+ * @event_count:          Count of the posted events to be consumed by Userspace.
+ * @event_coalesce_count: Count of the events present in @event_coalesce_list.
+ * @flags:                bitmap of enums from kbase_context_flags, indicating the
+ *                        state & attributes for the context.
+ * @setup_complete:       Indicates if the setup for context has completed, i.e.
+ *                        flags have been set for the context. Driver allows only
+ *                        2 ioctls until the setup is done. Valid only for
+ *                        @api_version value 0.
+ * @setup_in_progress:    Indicates if the context's setup is in progress and other
+ *                        setup calls during that shall be rejected.
+ * @mmu_teardown_pages:   Buffer of 4 Pages in size, used to cache the entries of
+ *                        top & intermediate level page tables to avoid repeated
+ *                        calls to kmap_atomic during the MMU teardown.
+ * @aliasing_sink_page:   Special page used for KBASE_MEM_TYPE_ALIAS allocations,
+ *                        which can alias number of memory regions. The page is
+ *                        represent a region where it is mapped with a write-alloc
+ *                        cache setup, typically used when the write result of the
+ *                        GPU isn't needed, but the GPU must write anyway.
+ * @mem_partials_lock:    Lock for protecting the operations done on the elements
+ *                        added to @mem_partials list.
+ * @mem_partials:         List head for the list of large pages, 2MB in size, which
+ *                        which have been split into 4 KB pages and are used
+ *                        partially for the allocations >= 2 MB in size.
+ * @mmu_lock:             Lock to serialize the accesses made to multi level GPU
+ *                        page tables, maintained for every context.
+ * @reg_lock:             Lock used for GPU virtual address space management operations,
+ *                        like adding/freeing a memory region in the address space.
+ *                        Can be converted to a rwlock ?.
+ * @reg_rbtree_same:      RB tree of the memory regions allocated from the SAME_VA
+ *                        zone of the GPU virtual address space. Used for allocations
+ *                        having the same value for GPU & CPU virtual address.
+ * @reg_rbtree_exec:      RB tree of the memory regions allocated from the EXEC
+ *                        zone of the GPU virtual address space. Used for
+ *                        allocations containing executable code for
+ *                        shader programs.
+ * @reg_rbtree_custom:    RB tree of the memory regions allocated from the CUSTOM_VA
+ *                        zone of the GPU virtual address space.
+ * @cookies:              Bitmask containing of BITS_PER_LONG bits, used mainly for
+ *                        SAME_VA allocations to defer the reservation of memory region
+ *                        (from the GPU virtual address space) from base_mem_alloc
+ *                        ioctl to mmap system call. This helps returning unique
+ *                        handles, disguised as GPU VA, to Userspace from base_mem_alloc
+ *                        and later retrieving the pointer to memory region structure
+ *                        in the mmap handler.
+ * @pending_regions:      Array containing pointers to memory region structures,
+ *                        used in conjunction with @cookies bitmask mainly for
+ *                        providing a mechansim to have the same value for CPU &
+ *                        GPU virtual address.
+ * @event_queue:          Wait queue used for blocking the thread, which consumes
+ *                        the base_jd_event corresponding to an atom, when there
+ *                        are no more posted events.
+ * @tgid:                 thread group id of the process, whose thread opened the
+ *                        device file /dev/malixx instance to create a context.
+ * @pid:                  id of the thread, corresponding to process @tgid, which
+ *                        actually which opened the device file.
+ * @jctx:                 object encapsulating all the Job dispatcher related state,
+ *                        including the array of atoms.
+ * @used_pages:           Keeps a track of the number of 4KB physical pages in use
+ *                        for the context.
+ * @nonmapped_pages:      Updated in the same way as @used_pages, except for the case
+ *                        when special tracking page is freed by userspace where it
+ *                        is reset to 0.
+ * @mem_pool:             Object containing the state for the context specific pool of
+ *                        4KB size physical pages.
+ * @lp_mem_pool:          Object containing the state for the context specific pool of
+ *                        2MB size physical pages.
+ * @reclaim:              Shrinker object registered with the kernel containing
+ *                        the pointer to callback function which is invoked under
+ *                        low memory conditions. In the callback function Driver
+ *                        frees up the memory for allocations marked as
+ *                        evictable/reclaimable.
+ * @evict_list:           List head for the list containing the allocations which
+ *                        can be evicted or freed up in the shrinker callback.
+ * @waiting_soft_jobs:    List head for the list containing softjob atoms, which
+ *                        are either waiting for the event set operation, or waiting
+ *                        for the signaling of input fence or waiting for the GPU
+ *                        device to powered on so as to dump the CPU/GPU timestamps.
+ * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent
+ *                        accesses.
+ * @dma_fence:            Object containing list head for the list of dma-buf fence
+ *                        waiting atoms and the waitqueue to process the work item
+ *                        queued for the atoms blocked on the signaling of dma-buf
+ *                        fences.
+ * @as_nr:                id of the address space being used for the scheduled in
+ *                        context. This is effectively part of the Run Pool, because
+ *                        it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst
+ *                        the context is scheduled in. The hwaccess_lock must be held
+ *                        whilst accessing this.
+ *                        If the context relating to this value of as_nr is required,
+ *                        then the context must be retained to ensure that it doesn't
+ *                        disappear whilst it is being used. Alternatively, hwaccess_lock
+ *                        can be held to ensure the context doesn't disappear (but this
+ *                        has restrictions on what other locks can be taken simutaneously).
+ * @refcount:             Keeps track of the number of users of this context. A user
+ *                        can be a job that is available for execution, instrumentation
+ *                        needing to 'pin' a context for counter collection, etc.
+ *                        If the refcount reaches 0 then this context is considered
+ *                        inactive and the previously programmed AS might be cleared
+ *                        at any point.
+ *                        Generally the reference count is incremented when the context
+ *                        is scheduled in and an atom is pulled from the context's per
+ *                        slot runnable tree.
+ * @mm_update_lock:       lock used for handling of special tracking page.
+ * @process_mm:           Pointer to the memory descriptor of the process which
+ *                        created the context. Used for accounting the physical
+ *                        pages used for GPU allocations, done for the context,
+ *                        to the memory consumed by the process.
+ * @same_va_end:          End address of the SAME_VA zone (in 4KB page units)
+ * @timeline:             Object tracking the number of atoms currently in flight for
+ *                        the context and thread group id of the process, i.e. @tgid.
+ * @mem_profile_data:     Buffer containing the profiling information provided by
+ *                        Userspace, can be read through the mem_profile debugfs file.
+ * @mem_profile_size:     Size of the @mem_profile_data.
+ * @mem_profile_lock:     Lock to serialize the operations related to mem_profile
+ *                        debugfs file.
+ * @kctx_dentry:          Pointer to the debugfs directory created for every context,
+ *                        inside kbase_device::debugfs_ctx_directory, containing
+ *                        context specific files.
+ * @reg_dump:             Buffer containing a register offset & value pair, used
+ *                        for dumping job fault debug info.
+ * @job_fault_count:      Indicates that a job fault occurred for the context and
+ *                        dumping of its debug info is in progress.
+ * @job_fault_resume_event_list: List containing atoms completed after the faulty
+ *                        atom but before the debug data for faulty atom was dumped.
+ * @jsctx_queue:          Per slot & priority arrays of object containing the root
+ *                        of RB-tree holding currently runnable atoms on the job slot
+ *                        and the head item of the linked list of atoms blocked on
+ *                        cross-slot dependencies.
+ * @atoms_pulled:         Total number of atoms currently pulled from the context.
+ * @atoms_pulled_slot:    Per slot count of the number of atoms currently pulled
+ *                        from the context.
+ * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently
+ *                        pulled from the context. hwaccess_lock shall be held when
+ *                        accessing it.
+ * @blocked_js:           Indicates if the context is blocked from submitting atoms
+ *                        on a slot at a given priority. This is set to true, when
+ *                        the atom corresponding to context is soft/hard stopped or
+ *                        removed from the HEAD_NEXT register in response to
+ *                        soft/hard stop.
+ * @slots_pullable:       Bitmask of slots, indicating the slots for which the
+ *                        context has pullable atoms in the runnable tree.
+ * @work:                 Work structure used for deferred ASID assignment.
+ * @vinstr_cli:           Pointer to the legacy userspace vinstr client, there can
+ *                        be only such client per kbase context.
+ * @vinstr_cli_lock:      Lock used for the vinstr ioctl calls made for @vinstr_cli.
+ * @completed_jobs:       List containing completed atoms for which base_jd_event is
+ *                        to be posted.
+ * @work_count:           Number of work items, corresponding to atoms, currently
+ *                        pending on job_done workqueue of @jctx.
+ * @soft_job_timeout:     Timer object used for failing/cancelling the waiting
+ *                        soft-jobs which have been blocked for more than the
+ *                        timeout value used for the soft-jobs
+ * @jit_alloc:            Array of 256 pointers to GPU memory regions, used for
+ *                        for JIT allocations.
+ * @jit_max_allocations:  Maximum number of JIT allocations allowed at once.
+ * @jit_current_allocations: Current number of in-flight JIT allocations.
+ * @jit_current_allocations_per_bin: Current number of in-flight JIT allocations per bin
+ * @jit_version:          version number indicating whether userspace is using
+ *                        old or new version of interface for JIT allocations
+ *	                  1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD
+ *	                  2 -> client used KBASE_IOCTL_MEM_JIT_INIT
+ * @jit_active_head:      List containing the JIT allocations which are in use.
+ * @jit_pool_head:        List containing the JIT allocations which have been
+ *                        freed up by userpsace and so not being used by them.
+ *                        Driver caches them to quickly fulfill requests for new
+ *                        JIT allocations. They are released in case of memory
+ *                        pressure as they are put on the @evict_list when they
+ *                        are freed up by userspace.
+ * @jit_destroy_head:     List containing the JIT allocations which were moved to it
+ *                        from @jit_pool_head, in the shrinker callback, after freeing
+ *                        their backing physical pages.
+ * @jit_evict_lock:       Lock used for operations done on JIT allocations and also
+ *                        for accessing @evict_list.
+ * @jit_work:             Work item queued to defer the freeing of memory region when
+ *                        JIT allocation is moved to @jit_destroy_head.
+ * @jit_atoms_head:       A list of the JIT soft-jobs, both alloc & free, in submission
+ *                        order, protected by kbase_jd_context.lock.
+ * @jit_pending_alloc:    A list of JIT alloc soft-jobs for which allocation will be
+ *                        reattempted after the impending free of other active JIT
+ *                        allocations.
+ * @ext_res_meta_head:    A list of sticky external resources which were requested to
+ *                        be mapped on GPU side, through a softjob atom of type
+ *                        EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl.
+ * @drain_pending:        Used to record that a flush/invalidate of the GPU caches was
+ *                        requested from atomic context, so that the next flush request
+ *                        can wait for the flush of GPU writes.
+ * @age_count:            Counter incremented on every call to jd_submit_atom,
+ *                        atom is assigned the snapshot of this counter, which
+ *                        is used to determine the atom's age when it is added to
+ *                        the runnable RB-tree.
+ * @trim_level:           Level of JIT allocation trimming to perform on free (0-100%)
+ * @gwt_enabled:          Indicates if tracking of GPU writes is enabled, protected by
+ *                        kbase_context.reg_lock.
+ * @gwt_was_enabled:      Simple sticky bit flag to know if GWT was ever enabled.
+ * @gwt_current_list:     A list of addresses for which GPU has generated write faults,
+ *                        after the last snapshot of it was sent to userspace.
+ * @gwt_snapshot_list:    Snapshot of the @gwt_current_list for sending to user space.
+ * @priority:             Indicates the context priority. Used along with @atoms_count
+ *                        for context scheduling, protected by hwaccess_lock.
+ * @atoms_count:          Number of gpu atoms currently in use, per priority
+ */
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	u32 id;
+	unsigned long api_version;
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	atomic_t flags;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	u64 *mmu_teardown_pages;
+
+	struct tagged_addr aliasing_sink_page;
+
+	struct mutex            mem_partials_lock;
+	struct list_head        mem_partials;
+
+	struct mutex            mmu_lock;
+	struct mutex            reg_lock;
+	struct rb_root reg_rbtree_same;
+	struct rb_root reg_rbtree_exec;
+	struct rb_root reg_rbtree_custom;
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_pool mem_pool;
+	struct kbase_mem_pool lp_mem_pool;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	int as_nr;
+
+	atomic_t refcount;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct *process_mm;
+	u64 same_va_end;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	char *mem_profile_data;
+	size_t mem_profile_size;
+	struct mutex mem_profile_lock;
+	struct dentry *kctx_dentry;
+
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	atomic_t atoms_pulled;
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
+			KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	u32 slots_pullable;
+
+	struct work_struct work;
+
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+
+	struct list_head completed_jobs;
+	atomic_t work_count;
+
+	struct timer_list soft_job_timeout;
+
+	struct kbase_va_region *jit_alloc[256];
+	u8 jit_max_allocations;
+	u8 jit_current_allocations;
+	u8 jit_current_allocations_per_bin[256];
+	u8 jit_version;
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_evict_lock;
+	struct work_struct jit_work;
+
+	struct list_head jit_atoms_head;
+	struct list_head jit_pending_alloc;
+
+	struct list_head ext_res_meta_head;
+
+	atomic_t drain_pending;
+
+	u32 age_count;
+
+	u8 trim_level;
+
+#ifdef CONFIG_MALI_JOB_DUMP
+	bool gwt_enabled;
+
+	bool gwt_was_enabled;
+
+	struct list_head gwt_current_list;
+
+	struct list_head gwt_snapshot_list;
+#endif
+
+	int priority;
+	s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+};
+
+#ifdef CONFIG_MALI_JOB_DUMP
+/**
+ * struct kbasep_gwt_list_element - Structure used to collect GPU
+ *                                  write faults.
+ * @link:                           List head for adding write faults.
+ * @region:                         Details of the region where we have the
+ *                                  faulting page address.
+ * @page_addr:                      Page address where GPU write fault occurred.
+ * @num_pages:                      The number of pages modified.
+ *
+ * Using this structure all GPU write faults are stored in a list.
+ */
+struct kbasep_gwt_list_element {
+	struct list_head link;
+	struct kbase_va_region *region;
+	u64 page_addr;
+	u64 num_pages;
+};
+
+#endif
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100755
index 0000000..e58e27c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,674 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+	mutex_init(&kbdev->mmu_hw_mutex);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+		kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+	else
+		kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100644
index 0000000..68eb4ed
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
new file mode 100644
index 0000000..6a95900
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -0,0 +1,454 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+static void
+kbase_dma_fence_work(struct work_struct *pwork);
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+static void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_queue_work() - Queue work to handle @katom
+ * @katom: Pointer to atom for which to queue work
+ *
+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
+ * submit the atom.
+ */
+static void
+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	bool ret;
+
+	INIT_WORK(&katom->work, kbase_dma_fence_work);
+	ret = queue_work(kctx->dma_fence.wq, &katom->work);
+	/* Warn if work was already queued, that should not happen. */
+	WARN_ON(!ret);
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_fence_free_callbacks(katom);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (kbase_fence_dep_count_read(katom) != 0)
+		goto out;
+
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_fence_free_callbacks(katom);
+	/*
+	 * Queue atom on GPU, unless it has already completed due to a failing
+	 * dependency. Run jd_done_nolock() on the katom if it is completed.
+	 */
+	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
+		jd_done_nolock(katom, NULL);
+	else
+		kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+#else
+kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+#endif
+{
+	struct kbase_fence_cb *kcb = container_of(cb,
+				struct kbase_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+
+	if (kbase_fence_dep_count_dec_and_test(katom))
+		kbase_dma_fence_queue_work(katom);
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct reservation_object *resv,
+					 bool exclusive)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+#else
+	struct dma_fence *excl_fence = NULL;
+	struct dma_fence **shared_fences = NULL;
+#endif
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_fence_add_callback(katom,
+						excl_fence,
+						kbase_dma_fence_cb);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		dma_fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_fence_add_callback(katom,
+							shared_fences[i],
+							kbase_dma_fence_cb);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		dma_fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	if (err) {
+		/*
+		 * On error, cancel and clean up all callbacks that was set up
+		 * before the error.
+		 */
+		kbase_fence_free_callbacks(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	fence = kbase_fence_out_new(katom);
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	kbase_fence_dep_count_set(katom, 1);
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		kbase_fence_dep_count_set(katom, -1);
+		kbase_fence_out_remove(katom);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct reservation_object *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = reservation_object_reserve_shared(obj);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_shared_fence(obj, fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_excl_fence(obj, fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (kbase_fence_dep_count_dec_and_test(katom)) {
+			kbase_fence_dep_count_set(katom, -1);
+			kbase_fence_free_callbacks(katom);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	} else {
+		/* There was an error, cancel callbacks, set dep_count to -1 to
+		 * indicate that the atom has been handled (the caller will
+		 * kill it for us), signal the fence, free callbacks and the
+		 * fence.
+		 */
+		kbase_fence_free_callbacks(katom);
+		kbase_fence_dep_count_set(katom, -1);
+		kbase_dma_fence_signal(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct list_head *list = &kctx->dma_fence.waiting_resource;
+
+	while (!list_empty(list)) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_first_entry(list, struct kbase_jd_atom, queue);
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	if (kbase_fence_free_callbacks(katom))
+		kbase_dma_fence_queue_work(katom);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	/* Signal the atom's fence. */
+	dma_fence_signal(katom->dma_fence.fence);
+
+	kbase_fence_out_remove(katom);
+
+	kbase_fence_free_callbacks(katom);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
new file mode 100644
index 0000000..2a4d6fc
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
@@ -0,0 +1,136 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/list.h>
+#include <linux/reservation.h>
+#include <mali_kbase_fence.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct reservation_object **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100644
index 0000000..e290fce
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx);
+	KBASE_TLSTREAM_TL_DEL_ATOM(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED);
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c
new file mode 100644
index 0000000..ac8272c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_fence_defs.h>
+#include <mali_kbase_fence.h>
+#include <mali_kbase.h>
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_fence_lock);
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_get_driver_name(struct fence *fence)
+#else
+kbase_fence_get_driver_name(struct dma_fence *fence)
+#endif
+{
+	return kbase_drv_name;
+}
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_get_timeline_name(struct fence *fence)
+#else
+kbase_fence_get_timeline_name(struct dma_fence *fence)
+#endif
+{
+	return kbase_timeline_name;
+}
+
+static bool
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_enable_signaling(struct fence *fence)
+#else
+kbase_fence_enable_signaling(struct dma_fence *fence)
+#endif
+{
+	return true;
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_fence_value_str(struct fence *fence, char *str, int size)
+#else
+kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
+#endif
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+const struct fence_ops kbase_fence_ops = {
+	.wait = fence_default_wait,
+#else
+const struct dma_fence_ops kbase_fence_ops = {
+	.wait = dma_fence_default_wait,
+#endif
+	.get_driver_name = kbase_fence_get_driver_name,
+	.get_timeline_name = kbase_fence_get_timeline_name,
+	.enable_signaling = kbase_fence_enable_signaling,
+	.fence_value_str = kbase_fence_fence_value_str
+};
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+struct fence *
+kbase_fence_out_new(struct kbase_jd_atom *katom)
+#else
+struct dma_fence *
+kbase_fence_out_new(struct kbase_jd_atom *katom)
+#endif
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	WARN_ON(katom->dma_fence.fence);
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	dma_fence_init(fence,
+		       &kbase_fence_ops,
+		       &kbase_fence_lock,
+		       katom->dma_fence.context,
+		       atomic_inc_return(&katom->dma_fence.seqno));
+
+	katom->dma_fence.fence = fence;
+
+	return fence;
+}
+
+bool
+kbase_fence_free_callbacks(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_cb *cb, *tmp;
+	bool res = false;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			int ret;
+
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			ret = atomic_dec_return(&katom->dma_fence.dep_count);
+
+			if (unlikely(ret == 0))
+				res = true;
+		}
+
+		/*
+		 * Release the reference taken in
+		 * kbase_fence_add_callback().
+		 */
+		dma_fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+
+	return res;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+int
+kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			 struct fence *fence,
+			 fence_func_t callback)
+#else
+int
+kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			 struct dma_fence *fence,
+			 dma_fence_func_t callback)
+#endif
+{
+	int err = 0;
+	struct kbase_fence_cb *kbase_fence_cb;
+
+	if (!fence)
+		return -EINVAL;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+
+	err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb,
+				     callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, get the completion result */
+		err = dma_fence_get_status(fence);
+
+		/* remap success completion to err code */
+		if (err == 1)
+			err = 0;
+
+		kfree(kbase_fence_cb);
+	} else if (err) {
+		kfree(kbase_fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in kbase_fence_free_callbacks().
+		 */
+		dma_fence_get(fence);
+		atomic_inc(&katom->dma_fence.dep_count);
+		/* Add callback to katom's list of callbacks */
+		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
+	}
+
+	return err;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h
new file mode 100755
index 0000000..ab0db40
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h
@@ -0,0 +1,275 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_FENCE_H_
+#define _KBASE_FENCE_H_
+
+/*
+ * mali_kbase_fence.[hc] has common fence code used by both
+ * - CONFIG_MALI_DMA_FENCE - implicit DMA fences
+ * - CONFIG_SYNC_FILE      - explicit fences beginning with 4.9 kernel
+ */
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/list.h>
+#include "mali_kbase_fence_defs.h"
+#include "mali_kbase.h"
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+extern const struct fence_ops kbase_fence_ops;
+#else
+extern const struct dma_fence_ops kbase_fence_ops;
+#endif
+
+/**
+* struct kbase_fence_cb - Mali dma-fence callback data struct
+* @fence_cb: Callback function
+* @katom:    Pointer to katom that is waiting on this callback
+* @fence:    Pointer to the fence object on which this callback is waiting
+* @node:     List head for linking this callback to the katom
+*/
+struct kbase_fence_cb {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence_cb fence_cb;
+	struct fence *fence;
+#else
+	struct dma_fence_cb fence_cb;
+	struct dma_fence *fence;
+#endif
+	struct kbase_jd_atom *katom;
+	struct list_head node;
+};
+
+/**
+ * kbase_fence_out_new() - Creates a new output fence and puts it on the atom
+ * @katom: Atom to create an output fence for
+ *
+ * return: A new fence object on success, NULL on failure.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
+#else
+struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
+#endif
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_fence_in_set() - Assign input fence to atom
+ * @katom: Atom to assign input fence to
+ * @fence: Input fence to assign to atom
+ *
+ * This function will take ownership of one fence reference!
+ */
+#define kbase_fence_fence_in_set(katom, fence) \
+	do { \
+		WARN_ON((katom)->dma_fence.fence_in); \
+		(katom)->dma_fence.fence_in = fence; \
+	} while (0)
+#endif
+
+/**
+ * kbase_fence_out_remove() - Removes the output fence from atom
+ * @katom: Atom to remove output fence for
+ *
+ * This will also release the reference to this fence which the atom keeps
+ */
+static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->dma_fence.fence) {
+		dma_fence_put(katom->dma_fence.fence);
+		katom->dma_fence.fence = NULL;
+	}
+}
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_out_remove() - Removes the input fence from atom
+ * @katom: Atom to remove input fence for
+ *
+ * This will also release the reference to this fence which the atom keeps
+ */
+static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->dma_fence.fence_in) {
+		dma_fence_put(katom->dma_fence.fence_in);
+		katom->dma_fence.fence_in = NULL;
+	}
+}
+#endif
+
+/**
+ * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us
+ * @katom: Atom to check output fence for
+ *
+ * Return: true if fence exists and is valid, otherwise false
+ */
+static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom)
+{
+	return katom->dma_fence.fence &&
+				katom->dma_fence.fence->ops == &kbase_fence_ops;
+}
+
+/**
+ * kbase_fence_out_signal() - Signal output fence of atom
+ * @katom: Atom to signal output fence for
+ * @status: Status to signal with (0 for success, < 0 for error)
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
+					 int status)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68))
+	katom->dma_fence.fence->error = status;
+#else
+	katom->dma_fence.fence->status = status;
+#endif
+	return dma_fence_signal(katom->dma_fence.fence);
+}
+
+/**
+ * kbase_fence_add_callback() - Add callback on @fence to block @katom
+ * @katom: Pointer to katom that will be blocked by @fence
+ * @fence: Pointer to fence on which to set up the callback
+ * @callback: Pointer to function to be called when fence is signaled
+ *
+ * Caller needs to hold a reference to @fence when calling this function, and
+ * the caller is responsible for releasing that reference.  An additional
+ * reference to @fence will be taken when the callback was successfully set up
+ * and @fence needs to be kept valid until the callback has been called and
+ * cleanup have been done.
+ *
+ * Return: 0 on success: fence was either already signaled, or callback was
+ * set up. Negative error code is returned on error.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+int kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback);
+#else
+int kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct dma_fence *fence,
+			     dma_fence_func_t callback);
+#endif
+
+/**
+ * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value
+ * @katom: Atom to set dep_count for
+ * @val: value to set dep_count to
+ *
+ * The dep_count is available to the users of this module so that they can
+ * synchronize completion of the wait with cancellation and adding of more
+ * callbacks. For instance, a user could do the following:
+ *
+ * dep_count set to 1
+ * callback #1 added, dep_count is increased to 2
+ *                             callback #1 happens, dep_count decremented to 1
+ *                             since dep_count > 0, no completion is done
+ * callback #2 is added, dep_count is increased to 2
+ * dep_count decremented to 1
+ *                             callback #2 happens, dep_count decremented to 0
+ *                             since dep_count now is zero, completion executes
+ *
+ * The dep_count can also be used to make sure that the completion only
+ * executes once. This is typically done by setting dep_count to -1 for the
+ * thread that takes on this responsibility.
+ */
+static inline void
+kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val)
+{
+	atomic_set(&katom->dma_fence.dep_count, val);
+}
+
+/**
+ * kbase_fence_dep_count_dec_and_test() - Decrements dep_count
+ * @katom: Atom to decrement dep_count for
+ *
+ * See @kbase_fence_dep_count_set for general description about dep_count
+ *
+ * Return: true if value was decremented to zero, otherwise false
+ */
+static inline bool
+kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom)
+{
+	return atomic_dec_and_test(&katom->dma_fence.dep_count);
+}
+
+/**
+ * kbase_fence_dep_count_read() - Returns the current dep_count value
+ * @katom: Pointer to katom
+ *
+ * See @kbase_fence_dep_count_set for general description about dep_count
+ *
+ * Return: The current dep_count value
+ */
+static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom)
+{
+	return atomic_read(&katom->dma_fence.dep_count);
+}
+
+/**
+ * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ *
+ * Return: true if dep_count reached 0, otherwise false.
+ */
+bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom);
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_in_get() - Retrieve input fence for atom.
+ * @katom: Atom to get input fence from
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no input fence for atom
+ */
+#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in)
+#endif
+
+/**
+ * kbase_fence_out_get() - Retrieve output fence for atom.
+ * @katom: Atom to get output fence from
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no output fence for atom
+ */
+#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence)
+
+/**
+ * kbase_fence_put() - Releases a reference to a fence
+ * @fence: Fence to release reference for.
+ */
+#define kbase_fence_put(fence) dma_fence_put(fence)
+
+
+#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */
+
+#endif /* _KBASE_FENCE_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
new file mode 100755
index 0000000..607a95c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_FENCE_DEFS_H_
+#define _KBASE_FENCE_DEFS_H_
+
+/*
+ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*)
+ * This file hides the compatibility issues with this for the rest the driver
+ */
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+
+#include <linux/fence.h>
+
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+
+#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#else
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#endif
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
+	(a)->status ?: 1 \
+	: 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */
+
+#endif /* _KBASE_FENCE_DEFS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100644
index 0000000..4f54817
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100755
index 0000000..040b209
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,343 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
+};
+
+static void dump_worker(struct work_struct *work);
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+	uint32_t product_id;
+	uint32_t count;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	product_id = kbdev->gpu_props.props.core_props.product_id;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			hardware_counters = hardware_counters_mali_tHEx;
+			count = ARRAY_SIZE(hardware_counters_mali_tHEx);
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			hardware_counters = hardware_counters_mali_tSIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tSIx);
+			break;
+		case GPU_ID2_PRODUCT_TNOX:
+			hardware_counters = hardware_counters_mali_tNOx;
+			count = ARRAY_SIZE(hardware_counters_mali_tNOx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	} else {
+		switch (product_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	*total_counters = count;
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_ioctl_hwcnt_reader_setup setup;
+	uint32_t dump_size = 0, i = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
+		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+	in_out_info->size = dump_size;
+
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
+
+	return hand;
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+
+	if (opaque_handles && success) {
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		schedule_work(&opaque_handles->dump_work);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100644
index 0000000..bd0589e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,224 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100755
index 0000000..5d38c7b
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2178 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MISS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MISS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MISS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MISS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MISS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MISS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MISS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#include "mali_kbase_gator_hwcnt_names_tmix.h"
+
+#include "mali_kbase_gator_hwcnt_names_thex.h"
+
+#include "mali_kbase_gator_hwcnt_names_tsix.h"
+
+#include "mali_kbase_gator_hwcnt_names_tnox.h"
+
+#include "mali_kbase_gator_hwcnt_names_tgox.h"
+
+#include "mali_kbase_gator_hwcnt_names_tkax.h"
+
+#include "mali_kbase_gator_hwcnt_names_ttrx.h"
+
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tgox.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tgox.h
new file mode 100755
index 0000000..72b5266
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tgox.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TGOX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TGOX_H_
+
+static const char * const hardware_counters_mali_tGOx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_MESSAGES_SENT",
+	"TGOx_MESSAGES_RECEIVED",
+	"TGOx_GPU_ACTIVE",
+	"TGOx_IRQ_ACTIVE",
+	"TGOx_JS0_JOBS",
+	"TGOx_JS0_TASKS",
+	"TGOx_JS0_ACTIVE",
+	"",
+	"TGOx_JS0_WAIT_READ",
+	"TGOx_JS0_WAIT_ISSUE",
+	"TGOx_JS0_WAIT_DEPEND",
+	"TGOx_JS0_WAIT_FINISH",
+	"TGOx_JS1_JOBS",
+	"TGOx_JS1_TASKS",
+	"TGOx_JS1_ACTIVE",
+	"",
+	"TGOx_JS1_WAIT_READ",
+	"TGOx_JS1_WAIT_ISSUE",
+	"TGOx_JS1_WAIT_DEPEND",
+	"TGOx_JS1_WAIT_FINISH",
+	"TGOx_JS2_JOBS",
+	"TGOx_JS2_TASKS",
+	"TGOx_JS2_ACTIVE",
+	"",
+	"TGOx_JS2_WAIT_READ",
+	"TGOx_JS2_WAIT_ISSUE",
+	"TGOx_JS2_WAIT_DEPEND",
+	"TGOx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_TILER_ACTIVE",
+	"TGOx_JOBS_PROCESSED",
+	"TGOx_TRIANGLES",
+	"TGOx_LINES",
+	"TGOx_POINTS",
+	"TGOx_FRONT_FACING",
+	"TGOx_BACK_FACING",
+	"TGOx_PRIM_VISIBLE",
+	"TGOx_PRIM_CULLED",
+	"TGOx_PRIM_CLIPPED",
+	"TGOx_PRIM_SAT_CULLED",
+	"TGOx_BIN_ALLOC_INIT",
+	"TGOx_BIN_ALLOC_OVERFLOW",
+	"TGOx_BUS_READ",
+	"",
+	"TGOx_BUS_WRITE",
+	"TGOx_LOADING_DESC",
+	"TGOx_IDVS_POS_SHAD_REQ",
+	"TGOx_IDVS_POS_SHAD_WAIT",
+	"TGOx_IDVS_POS_SHAD_STALL",
+	"TGOx_IDVS_POS_FIFO_FULL",
+	"TGOx_PREFETCH_STALL",
+	"TGOx_VCACHE_HIT",
+	"TGOx_VCACHE_MISS",
+	"TGOx_VCACHE_LINE_WAIT",
+	"TGOx_VFETCH_POS_READ_WAIT",
+	"TGOx_VFETCH_VERTEX_WAIT",
+	"TGOx_VFETCH_STALL",
+	"TGOx_PRIMASSY_STALL",
+	"TGOx_BBOX_GEN_STALL",
+	"TGOx_IDVS_VBU_HIT",
+	"TGOx_IDVS_VBU_MISS",
+	"TGOx_IDVS_VBU_LINE_DEALLOCATE",
+	"TGOx_IDVS_VAR_SHAD_REQ",
+	"TGOx_IDVS_VAR_SHAD_STALL",
+	"TGOx_BINNER_STALL",
+	"TGOx_ITER_STALL",
+	"TGOx_COMPRESS_MISS",
+	"TGOx_COMPRESS_STALL",
+	"TGOx_PCACHE_HIT",
+	"TGOx_PCACHE_MISS",
+	"TGOx_PCACHE_MISS_STALL",
+	"TGOx_PCACHE_EVICT_STALL",
+	"TGOx_PMGR_PTR_WR_STALL",
+	"TGOx_PMGR_PTR_RD_STALL",
+	"TGOx_PMGR_CMD_WR_STALL",
+	"TGOx_WRBUF_ACTIVE",
+	"TGOx_WRBUF_HIT",
+	"TGOx_WRBUF_MISS",
+	"TGOx_WRBUF_NO_FREE_LINE_STALL",
+	"TGOx_WRBUF_NO_AXI_ID_STALL",
+	"TGOx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TGOx_UTLB_TRANS",
+	"TGOx_UTLB_TRANS_HIT",
+	"TGOx_UTLB_TRANS_STALL",
+	"TGOx_UTLB_TRANS_MISS_DELAY",
+	"TGOx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_FRAG_ACTIVE",
+	"TGOx_FRAG_PRIMITIVES",
+	"TGOx_FRAG_PRIM_RAST",
+	"TGOx_FRAG_FPK_ACTIVE",
+	"TGOx_FRAG_STARVING",
+	"TGOx_FRAG_WARPS",
+	"TGOx_FRAG_PARTIAL_WARPS",
+	"TGOx_FRAG_QUADS_RAST",
+	"TGOx_FRAG_QUADS_EZS_TEST",
+	"TGOx_FRAG_QUADS_EZS_UPDATE",
+	"TGOx_FRAG_QUADS_EZS_KILL",
+	"TGOx_FRAG_LZS_TEST",
+	"TGOx_FRAG_LZS_KILL",
+	"TGOx_WARP_REG_SIZE_64",
+	"TGOx_FRAG_PTILES",
+	"TGOx_FRAG_TRANS_ELIM",
+	"TGOx_QUAD_FPK_KILLER",
+	"TGOx_FULL_QUAD_WARPS",
+	"TGOx_COMPUTE_ACTIVE",
+	"TGOx_COMPUTE_TASKS",
+	"TGOx_COMPUTE_WARPS",
+	"TGOx_COMPUTE_STARVING",
+	"TGOx_EXEC_CORE_ACTIVE",
+	"TGOx_EXEC_ACTIVE",
+	"TGOx_EXEC_INSTR_COUNT",
+	"TGOx_EXEC_INSTR_DIVERGED",
+	"TGOx_EXEC_INSTR_STARVING",
+	"TGOx_ARITH_INSTR_SINGLE_FMA",
+	"TGOx_ARITH_INSTR_DOUBLE",
+	"TGOx_ARITH_INSTR_MSG",
+	"TGOx_ARITH_INSTR_MSG_ONLY",
+	"TGOx_TEX_MSGI_NUM_QUADS",
+	"TGOx_TEX_DFCH_NUM_PASSES",
+	"TGOx_TEX_DFCH_NUM_PASSES_MISS",
+	"TGOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TGOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TGOx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TGOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TGOx_TEX_TFCH_NUM_OPERATIONS",
+	"TGOx_TEX_FILT_NUM_OPERATIONS",
+	"TGOx_LS_MEM_READ_FULL",
+	"TGOx_LS_MEM_READ_SHORT",
+	"TGOx_LS_MEM_WRITE_FULL",
+	"TGOx_LS_MEM_WRITE_SHORT",
+	"TGOx_LS_MEM_ATOMIC",
+	"TGOx_VARY_INSTR",
+	"TGOx_VARY_SLOT_32",
+	"TGOx_VARY_SLOT_16",
+	"TGOx_ATTR_INSTR",
+	"TGOx_ARITH_INSTR_FP_MUL",
+	"TGOx_BEATS_RD_FTC",
+	"TGOx_BEATS_RD_FTC_EXT",
+	"TGOx_BEATS_RD_LSC",
+	"TGOx_BEATS_RD_LSC_EXT",
+	"TGOx_BEATS_RD_TEX",
+	"TGOx_BEATS_RD_TEX_EXT",
+	"TGOx_BEATS_RD_OTHER",
+	"TGOx_BEATS_WR_LSC_WB",
+	"TGOx_BEATS_WR_TIB",
+	"TGOx_BEATS_WR_LSC_OTHER",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TGOx_L2_RD_MSG_IN",
+	"TGOx_L2_RD_MSG_IN_STALL",
+	"TGOx_L2_WR_MSG_IN",
+	"TGOx_L2_WR_MSG_IN_STALL",
+	"TGOx_L2_SNP_MSG_IN",
+	"TGOx_L2_SNP_MSG_IN_STALL",
+	"TGOx_L2_RD_MSG_OUT",
+	"TGOx_L2_RD_MSG_OUT_STALL",
+	"TGOx_L2_WR_MSG_OUT",
+	"TGOx_L2_ANY_LOOKUP",
+	"TGOx_L2_READ_LOOKUP",
+	"TGOx_L2_WRITE_LOOKUP",
+	"TGOx_L2_EXT_SNOOP_LOOKUP",
+	"TGOx_L2_EXT_READ",
+	"TGOx_L2_EXT_READ_NOSNP",
+	"TGOx_L2_EXT_READ_UNIQUE",
+	"TGOx_L2_EXT_READ_BEATS",
+	"TGOx_L2_EXT_AR_STALL",
+	"TGOx_L2_EXT_AR_CNT_Q1",
+	"TGOx_L2_EXT_AR_CNT_Q2",
+	"TGOx_L2_EXT_AR_CNT_Q3",
+	"TGOx_L2_EXT_RRESP_0_127",
+	"TGOx_L2_EXT_RRESP_128_191",
+	"TGOx_L2_EXT_RRESP_192_255",
+	"TGOx_L2_EXT_RRESP_256_319",
+	"TGOx_L2_EXT_RRESP_320_383",
+	"TGOx_L2_EXT_WRITE",
+	"TGOx_L2_EXT_WRITE_NOSNP_FULL",
+	"TGOx_L2_EXT_WRITE_NOSNP_PTL",
+	"TGOx_L2_EXT_WRITE_SNP_FULL",
+	"TGOx_L2_EXT_WRITE_SNP_PTL",
+	"TGOx_L2_EXT_WRITE_BEATS",
+	"TGOx_L2_EXT_W_STALL",
+	"TGOx_L2_EXT_AW_CNT_Q1",
+	"TGOx_L2_EXT_AW_CNT_Q2",
+	"TGOx_L2_EXT_AW_CNT_Q3",
+	"TGOx_L2_EXT_SNOOP",
+	"TGOx_L2_EXT_SNOOP_STALL",
+	"TGOx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TGOx_L2_EXT_SNOOP_RESP_DATA",
+	"TGOx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
new file mode 100755
index 0000000..e24e91a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+
+static const char * const hardware_counters_mali_tHEx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MESSAGES_SENT",
+	"THEx_MESSAGES_RECEIVED",
+	"THEx_GPU_ACTIVE",
+	"THEx_IRQ_ACTIVE",
+	"THEx_JS0_JOBS",
+	"THEx_JS0_TASKS",
+	"THEx_JS0_ACTIVE",
+	"",
+	"THEx_JS0_WAIT_READ",
+	"THEx_JS0_WAIT_ISSUE",
+	"THEx_JS0_WAIT_DEPEND",
+	"THEx_JS0_WAIT_FINISH",
+	"THEx_JS1_JOBS",
+	"THEx_JS1_TASKS",
+	"THEx_JS1_ACTIVE",
+	"",
+	"THEx_JS1_WAIT_READ",
+	"THEx_JS1_WAIT_ISSUE",
+	"THEx_JS1_WAIT_DEPEND",
+	"THEx_JS1_WAIT_FINISH",
+	"THEx_JS2_JOBS",
+	"THEx_JS2_TASKS",
+	"THEx_JS2_ACTIVE",
+	"",
+	"THEx_JS2_WAIT_READ",
+	"THEx_JS2_WAIT_ISSUE",
+	"THEx_JS2_WAIT_DEPEND",
+	"THEx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"THEx_TILER_ACTIVE",
+	"THEx_JOBS_PROCESSED",
+	"THEx_TRIANGLES",
+	"THEx_LINES",
+	"THEx_POINTS",
+	"THEx_FRONT_FACING",
+	"THEx_BACK_FACING",
+	"THEx_PRIM_VISIBLE",
+	"THEx_PRIM_CULLED",
+	"THEx_PRIM_CLIPPED",
+	"THEx_PRIM_SAT_CULLED",
+	"THEx_BIN_ALLOC_INIT",
+	"THEx_BIN_ALLOC_OVERFLOW",
+	"THEx_BUS_READ",
+	"",
+	"THEx_BUS_WRITE",
+	"THEx_LOADING_DESC",
+	"THEx_IDVS_POS_SHAD_REQ",
+	"THEx_IDVS_POS_SHAD_WAIT",
+	"THEx_IDVS_POS_SHAD_STALL",
+	"THEx_IDVS_POS_FIFO_FULL",
+	"THEx_PREFETCH_STALL",
+	"THEx_VCACHE_HIT",
+	"THEx_VCACHE_MISS",
+	"THEx_VCACHE_LINE_WAIT",
+	"THEx_VFETCH_POS_READ_WAIT",
+	"THEx_VFETCH_VERTEX_WAIT",
+	"THEx_VFETCH_STALL",
+	"THEx_PRIMASSY_STALL",
+	"THEx_BBOX_GEN_STALL",
+	"THEx_IDVS_VBU_HIT",
+	"THEx_IDVS_VBU_MISS",
+	"THEx_IDVS_VBU_LINE_DEALLOCATE",
+	"THEx_IDVS_VAR_SHAD_REQ",
+	"THEx_IDVS_VAR_SHAD_STALL",
+	"THEx_BINNER_STALL",
+	"THEx_ITER_STALL",
+	"THEx_COMPRESS_MISS",
+	"THEx_COMPRESS_STALL",
+	"THEx_PCACHE_HIT",
+	"THEx_PCACHE_MISS",
+	"THEx_PCACHE_MISS_STALL",
+	"THEx_PCACHE_EVICT_STALL",
+	"THEx_PMGR_PTR_WR_STALL",
+	"THEx_PMGR_PTR_RD_STALL",
+	"THEx_PMGR_CMD_WR_STALL",
+	"THEx_WRBUF_ACTIVE",
+	"THEx_WRBUF_HIT",
+	"THEx_WRBUF_MISS",
+	"THEx_WRBUF_NO_FREE_LINE_STALL",
+	"THEx_WRBUF_NO_AXI_ID_STALL",
+	"THEx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"THEx_UTLB_TRANS",
+	"THEx_UTLB_TRANS_HIT",
+	"THEx_UTLB_TRANS_STALL",
+	"THEx_UTLB_TRANS_MISS_DELAY",
+	"THEx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"THEx_FRAG_ACTIVE",
+	"THEx_FRAG_PRIMITIVES",
+	"THEx_FRAG_PRIM_RAST",
+	"THEx_FRAG_FPK_ACTIVE",
+	"THEx_FRAG_STARVING",
+	"THEx_FRAG_WARPS",
+	"THEx_FRAG_PARTIAL_WARPS",
+	"THEx_FRAG_QUADS_RAST",
+	"THEx_FRAG_QUADS_EZS_TEST",
+	"THEx_FRAG_QUADS_EZS_UPDATE",
+	"THEx_FRAG_QUADS_EZS_KILL",
+	"THEx_FRAG_LZS_TEST",
+	"THEx_FRAG_LZS_KILL",
+	"",
+	"THEx_FRAG_PTILES",
+	"THEx_FRAG_TRANS_ELIM",
+	"THEx_QUAD_FPK_KILLER",
+	"",
+	"THEx_COMPUTE_ACTIVE",
+	"THEx_COMPUTE_TASKS",
+	"THEx_COMPUTE_WARPS",
+	"THEx_COMPUTE_STARVING",
+	"THEx_EXEC_CORE_ACTIVE",
+	"THEx_EXEC_ACTIVE",
+	"THEx_EXEC_INSTR_COUNT",
+	"THEx_EXEC_INSTR_DIVERGED",
+	"THEx_EXEC_INSTR_STARVING",
+	"THEx_ARITH_INSTR_SINGLE_FMA",
+	"THEx_ARITH_INSTR_DOUBLE",
+	"THEx_ARITH_INSTR_MSG",
+	"THEx_ARITH_INSTR_MSG_ONLY",
+	"THEx_TEX_INSTR",
+	"THEx_TEX_INSTR_MIPMAP",
+	"THEx_TEX_INSTR_COMPRESSED",
+	"THEx_TEX_INSTR_3D",
+	"THEx_TEX_INSTR_TRILINEAR",
+	"THEx_TEX_COORD_ISSUE",
+	"THEx_TEX_COORD_STALL",
+	"THEx_TEX_STARVE_CACHE",
+	"THEx_TEX_STARVE_FILTER",
+	"THEx_LS_MEM_READ_FULL",
+	"THEx_LS_MEM_READ_SHORT",
+	"THEx_LS_MEM_WRITE_FULL",
+	"THEx_LS_MEM_WRITE_SHORT",
+	"THEx_LS_MEM_ATOMIC",
+	"THEx_VARY_INSTR",
+	"THEx_VARY_SLOT_32",
+	"THEx_VARY_SLOT_16",
+	"THEx_ATTR_INSTR",
+	"THEx_ARITH_INSTR_FP_MUL",
+	"THEx_BEATS_RD_FTC",
+	"THEx_BEATS_RD_FTC_EXT",
+	"THEx_BEATS_RD_LSC",
+	"THEx_BEATS_RD_LSC_EXT",
+	"THEx_BEATS_RD_TEX",
+	"THEx_BEATS_RD_TEX_EXT",
+	"THEx_BEATS_RD_OTHER",
+	"THEx_BEATS_WR_LSC",
+	"THEx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"THEx_L2_RD_MSG_IN",
+	"THEx_L2_RD_MSG_IN_STALL",
+	"THEx_L2_WR_MSG_IN",
+	"THEx_L2_WR_MSG_IN_STALL",
+	"THEx_L2_SNP_MSG_IN",
+	"THEx_L2_SNP_MSG_IN_STALL",
+	"THEx_L2_RD_MSG_OUT",
+	"THEx_L2_RD_MSG_OUT_STALL",
+	"THEx_L2_WR_MSG_OUT",
+	"THEx_L2_ANY_LOOKUP",
+	"THEx_L2_READ_LOOKUP",
+	"THEx_L2_WRITE_LOOKUP",
+	"THEx_L2_EXT_SNOOP_LOOKUP",
+	"THEx_L2_EXT_READ",
+	"THEx_L2_EXT_READ_NOSNP",
+	"THEx_L2_EXT_READ_UNIQUE",
+	"THEx_L2_EXT_READ_BEATS",
+	"THEx_L2_EXT_AR_STALL",
+	"THEx_L2_EXT_AR_CNT_Q1",
+	"THEx_L2_EXT_AR_CNT_Q2",
+	"THEx_L2_EXT_AR_CNT_Q3",
+	"THEx_L2_EXT_RRESP_0_127",
+	"THEx_L2_EXT_RRESP_128_191",
+	"THEx_L2_EXT_RRESP_192_255",
+	"THEx_L2_EXT_RRESP_256_319",
+	"THEx_L2_EXT_RRESP_320_383",
+	"THEx_L2_EXT_WRITE",
+	"THEx_L2_EXT_WRITE_NOSNP_FULL",
+	"THEx_L2_EXT_WRITE_NOSNP_PTL",
+	"THEx_L2_EXT_WRITE_SNP_FULL",
+	"THEx_L2_EXT_WRITE_SNP_PTL",
+	"THEx_L2_EXT_WRITE_BEATS",
+	"THEx_L2_EXT_W_STALL",
+	"THEx_L2_EXT_AW_CNT_Q1",
+	"THEx_L2_EXT_AW_CNT_Q2",
+	"THEx_L2_EXT_AW_CNT_Q3",
+	"THEx_L2_EXT_SNOOP",
+	"THEx_L2_EXT_SNOOP_STALL",
+	"THEx_L2_EXT_SNOOP_RESP_CLEAN",
+	"THEx_L2_EXT_SNOOP_RESP_DATA",
+	"THEx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
new file mode 100755
index 0000000..73db45c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TKAX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TKAX_H_
+
+static const char * const hardware_counters_mali_tKAx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_MESSAGES_SENT",
+	"TKAx_MESSAGES_RECEIVED",
+	"TKAx_GPU_ACTIVE",
+	"TKAx_IRQ_ACTIVE",
+	"TKAx_JS0_JOBS",
+	"TKAx_JS0_TASKS",
+	"TKAx_JS0_ACTIVE",
+	"",
+	"TKAx_JS0_WAIT_READ",
+	"TKAx_JS0_WAIT_ISSUE",
+	"TKAx_JS0_WAIT_DEPEND",
+	"TKAx_JS0_WAIT_FINISH",
+	"TKAx_JS1_JOBS",
+	"TKAx_JS1_TASKS",
+	"TKAx_JS1_ACTIVE",
+	"",
+	"TKAx_JS1_WAIT_READ",
+	"TKAx_JS1_WAIT_ISSUE",
+	"TKAx_JS1_WAIT_DEPEND",
+	"TKAx_JS1_WAIT_FINISH",
+	"TKAx_JS2_JOBS",
+	"TKAx_JS2_TASKS",
+	"TKAx_JS2_ACTIVE",
+	"",
+	"TKAx_JS2_WAIT_READ",
+	"TKAx_JS2_WAIT_ISSUE",
+	"TKAx_JS2_WAIT_DEPEND",
+	"TKAx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_TILER_ACTIVE",
+	"TKAx_JOBS_PROCESSED",
+	"TKAx_TRIANGLES",
+	"TKAx_LINES",
+	"TKAx_POINTS",
+	"TKAx_FRONT_FACING",
+	"TKAx_BACK_FACING",
+	"TKAx_PRIM_VISIBLE",
+	"TKAx_PRIM_CULLED",
+	"TKAx_PRIM_CLIPPED",
+	"TKAx_PRIM_SAT_CULLED",
+	"TKAx_BIN_ALLOC_INIT",
+	"TKAx_BIN_ALLOC_OVERFLOW",
+	"TKAx_BUS_READ",
+	"",
+	"TKAx_BUS_WRITE",
+	"TKAx_LOADING_DESC",
+	"TKAx_IDVS_POS_SHAD_REQ",
+	"TKAx_IDVS_POS_SHAD_WAIT",
+	"TKAx_IDVS_POS_SHAD_STALL",
+	"TKAx_IDVS_POS_FIFO_FULL",
+	"TKAx_PREFETCH_STALL",
+	"TKAx_VCACHE_HIT",
+	"TKAx_VCACHE_MISS",
+	"TKAx_VCACHE_LINE_WAIT",
+	"TKAx_VFETCH_POS_READ_WAIT",
+	"TKAx_VFETCH_VERTEX_WAIT",
+	"TKAx_VFETCH_STALL",
+	"TKAx_PRIMASSY_STALL",
+	"TKAx_BBOX_GEN_STALL",
+	"TKAx_IDVS_VBU_HIT",
+	"TKAx_IDVS_VBU_MISS",
+	"TKAx_IDVS_VBU_LINE_DEALLOCATE",
+	"TKAx_IDVS_VAR_SHAD_REQ",
+	"TKAx_IDVS_VAR_SHAD_STALL",
+	"TKAx_BINNER_STALL",
+	"TKAx_ITER_STALL",
+	"TKAx_COMPRESS_MISS",
+	"TKAx_COMPRESS_STALL",
+	"TKAx_PCACHE_HIT",
+	"TKAx_PCACHE_MISS",
+	"TKAx_PCACHE_MISS_STALL",
+	"TKAx_PCACHE_EVICT_STALL",
+	"TKAx_PMGR_PTR_WR_STALL",
+	"TKAx_PMGR_PTR_RD_STALL",
+	"TKAx_PMGR_CMD_WR_STALL",
+	"TKAx_WRBUF_ACTIVE",
+	"TKAx_WRBUF_HIT",
+	"TKAx_WRBUF_MISS",
+	"TKAx_WRBUF_NO_FREE_LINE_STALL",
+	"TKAx_WRBUF_NO_AXI_ID_STALL",
+	"TKAx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TKAx_UTLB_TRANS",
+	"TKAx_UTLB_TRANS_HIT",
+	"TKAx_UTLB_TRANS_STALL",
+	"TKAx_UTLB_TRANS_MISS_DELAY",
+	"TKAx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_FRAG_ACTIVE",
+	"TKAx_FRAG_PRIMITIVES",
+	"TKAx_FRAG_PRIM_RAST",
+	"TKAx_FRAG_FPK_ACTIVE",
+	"TKAx_FRAG_STARVING",
+	"TKAx_FRAG_WARPS",
+	"TKAx_FRAG_PARTIAL_WARPS",
+	"TKAx_FRAG_QUADS_RAST",
+	"TKAx_FRAG_QUADS_EZS_TEST",
+	"TKAx_FRAG_QUADS_EZS_UPDATE",
+	"TKAx_FRAG_QUADS_EZS_KILL",
+	"TKAx_FRAG_LZS_TEST",
+	"TKAx_FRAG_LZS_KILL",
+	"TKAx_WARP_REG_SIZE_64",
+	"TKAx_FRAG_PTILES",
+	"TKAx_FRAG_TRANS_ELIM",
+	"TKAx_QUAD_FPK_KILLER",
+	"TKAx_FULL_QUAD_WARPS",
+	"TKAx_COMPUTE_ACTIVE",
+	"TKAx_COMPUTE_TASKS",
+	"TKAx_COMPUTE_WARPS",
+	"TKAx_COMPUTE_STARVING",
+	"TKAx_EXEC_CORE_ACTIVE",
+	"TKAx_EXEC_ACTIVE",
+	"TKAx_EXEC_INSTR_COUNT",
+	"TKAx_EXEC_INSTR_DIVERGED",
+	"TKAx_EXEC_INSTR_STARVING",
+	"TKAx_ARITH_INSTR_SINGLE_FMA",
+	"TKAx_ARITH_INSTR_DOUBLE",
+	"TKAx_ARITH_INSTR_MSG",
+	"TKAx_ARITH_INSTR_MSG_ONLY",
+	"TKAx_TEX_MSGI_NUM_QUADS",
+	"TKAx_TEX_DFCH_NUM_PASSES",
+	"TKAx_TEX_DFCH_NUM_PASSES_MISS",
+	"TKAx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TKAx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TKAx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TKAx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TKAx_TEX_TFCH_NUM_OPERATIONS",
+	"TKAx_TEX_FILT_NUM_OPERATIONS",
+	"TKAx_LS_MEM_READ_FULL",
+	"TKAx_LS_MEM_READ_SHORT",
+	"TKAx_LS_MEM_WRITE_FULL",
+	"TKAx_LS_MEM_WRITE_SHORT",
+	"TKAx_LS_MEM_ATOMIC",
+	"TKAx_VARY_INSTR",
+	"TKAx_VARY_SLOT_32",
+	"TKAx_VARY_SLOT_16",
+	"TKAx_ATTR_INSTR",
+	"TKAx_ARITH_INSTR_FP_MUL",
+	"TKAx_BEATS_RD_FTC",
+	"TKAx_BEATS_RD_FTC_EXT",
+	"TKAx_BEATS_RD_LSC",
+	"TKAx_BEATS_RD_LSC_EXT",
+	"TKAx_BEATS_RD_TEX",
+	"TKAx_BEATS_RD_TEX_EXT",
+	"TKAx_BEATS_RD_OTHER",
+	"TKAx_BEATS_WR_LSC_OTHER",
+	"TKAx_BEATS_WR_TIB",
+	"TKAx_BEATS_WR_LSC_WB",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TKAx_L2_RD_MSG_IN",
+	"TKAx_L2_RD_MSG_IN_STALL",
+	"TKAx_L2_WR_MSG_IN",
+	"TKAx_L2_WR_MSG_IN_STALL",
+	"TKAx_L2_SNP_MSG_IN",
+	"TKAx_L2_SNP_MSG_IN_STALL",
+	"TKAx_L2_RD_MSG_OUT",
+	"TKAx_L2_RD_MSG_OUT_STALL",
+	"TKAx_L2_WR_MSG_OUT",
+	"TKAx_L2_ANY_LOOKUP",
+	"TKAx_L2_READ_LOOKUP",
+	"TKAx_L2_WRITE_LOOKUP",
+	"TKAx_L2_EXT_SNOOP_LOOKUP",
+	"TKAx_L2_EXT_READ",
+	"TKAx_L2_EXT_READ_NOSNP",
+	"TKAx_L2_EXT_READ_UNIQUE",
+	"TKAx_L2_EXT_READ_BEATS",
+	"TKAx_L2_EXT_AR_STALL",
+	"TKAx_L2_EXT_AR_CNT_Q1",
+	"TKAx_L2_EXT_AR_CNT_Q2",
+	"TKAx_L2_EXT_AR_CNT_Q3",
+	"TKAx_L2_EXT_RRESP_0_127",
+	"TKAx_L2_EXT_RRESP_128_191",
+	"TKAx_L2_EXT_RRESP_192_255",
+	"TKAx_L2_EXT_RRESP_256_319",
+	"TKAx_L2_EXT_RRESP_320_383",
+	"TKAx_L2_EXT_WRITE",
+	"TKAx_L2_EXT_WRITE_NOSNP_FULL",
+	"TKAx_L2_EXT_WRITE_NOSNP_PTL",
+	"TKAx_L2_EXT_WRITE_SNP_FULL",
+	"TKAx_L2_EXT_WRITE_SNP_PTL",
+	"TKAx_L2_EXT_WRITE_BEATS",
+	"TKAx_L2_EXT_W_STALL",
+	"TKAx_L2_EXT_AW_CNT_Q1",
+	"TKAx_L2_EXT_AW_CNT_Q2",
+	"TKAx_L2_EXT_AW_CNT_Q3",
+	"TKAx_L2_EXT_SNOOP",
+	"TKAx_L2_EXT_SNOOP_STALL",
+	"TKAx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TKAx_L2_EXT_SNOOP_RESP_DATA",
+	"TKAx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
new file mode 100755
index 0000000..63eac50
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+
+static const char * const hardware_counters_mali_tMIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MESSAGES_SENT",
+	"TMIx_MESSAGES_RECEIVED",
+	"TMIx_GPU_ACTIVE",
+	"TMIx_IRQ_ACTIVE",
+	"TMIx_JS0_JOBS",
+	"TMIx_JS0_TASKS",
+	"TMIx_JS0_ACTIVE",
+	"",
+	"TMIx_JS0_WAIT_READ",
+	"TMIx_JS0_WAIT_ISSUE",
+	"TMIx_JS0_WAIT_DEPEND",
+	"TMIx_JS0_WAIT_FINISH",
+	"TMIx_JS1_JOBS",
+	"TMIx_JS1_TASKS",
+	"TMIx_JS1_ACTIVE",
+	"",
+	"TMIx_JS1_WAIT_READ",
+	"TMIx_JS1_WAIT_ISSUE",
+	"TMIx_JS1_WAIT_DEPEND",
+	"TMIx_JS1_WAIT_FINISH",
+	"TMIx_JS2_JOBS",
+	"TMIx_JS2_TASKS",
+	"TMIx_JS2_ACTIVE",
+	"",
+	"TMIx_JS2_WAIT_READ",
+	"TMIx_JS2_WAIT_ISSUE",
+	"TMIx_JS2_WAIT_DEPEND",
+	"TMIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_TILER_ACTIVE",
+	"TMIx_JOBS_PROCESSED",
+	"TMIx_TRIANGLES",
+	"TMIx_LINES",
+	"TMIx_POINTS",
+	"TMIx_FRONT_FACING",
+	"TMIx_BACK_FACING",
+	"TMIx_PRIM_VISIBLE",
+	"TMIx_PRIM_CULLED",
+	"TMIx_PRIM_CLIPPED",
+	"TMIx_PRIM_SAT_CULLED",
+	"TMIx_BIN_ALLOC_INIT",
+	"TMIx_BIN_ALLOC_OVERFLOW",
+	"TMIx_BUS_READ",
+	"",
+	"TMIx_BUS_WRITE",
+	"TMIx_LOADING_DESC",
+	"TMIx_IDVS_POS_SHAD_REQ",
+	"TMIx_IDVS_POS_SHAD_WAIT",
+	"TMIx_IDVS_POS_SHAD_STALL",
+	"TMIx_IDVS_POS_FIFO_FULL",
+	"TMIx_PREFETCH_STALL",
+	"TMIx_VCACHE_HIT",
+	"TMIx_VCACHE_MISS",
+	"TMIx_VCACHE_LINE_WAIT",
+	"TMIx_VFETCH_POS_READ_WAIT",
+	"TMIx_VFETCH_VERTEX_WAIT",
+	"TMIx_VFETCH_STALL",
+	"TMIx_PRIMASSY_STALL",
+	"TMIx_BBOX_GEN_STALL",
+	"TMIx_IDVS_VBU_HIT",
+	"TMIx_IDVS_VBU_MISS",
+	"TMIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TMIx_IDVS_VAR_SHAD_REQ",
+	"TMIx_IDVS_VAR_SHAD_STALL",
+	"TMIx_BINNER_STALL",
+	"TMIx_ITER_STALL",
+	"TMIx_COMPRESS_MISS",
+	"TMIx_COMPRESS_STALL",
+	"TMIx_PCACHE_HIT",
+	"TMIx_PCACHE_MISS",
+	"TMIx_PCACHE_MISS_STALL",
+	"TMIx_PCACHE_EVICT_STALL",
+	"TMIx_PMGR_PTR_WR_STALL",
+	"TMIx_PMGR_PTR_RD_STALL",
+	"TMIx_PMGR_CMD_WR_STALL",
+	"TMIx_WRBUF_ACTIVE",
+	"TMIx_WRBUF_HIT",
+	"TMIx_WRBUF_MISS",
+	"TMIx_WRBUF_NO_FREE_LINE_STALL",
+	"TMIx_WRBUF_NO_AXI_ID_STALL",
+	"TMIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TMIx_UTLB_TRANS",
+	"TMIx_UTLB_TRANS_HIT",
+	"TMIx_UTLB_TRANS_STALL",
+	"TMIx_UTLB_TRANS_MISS_DELAY",
+	"TMIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_FRAG_ACTIVE",
+	"TMIx_FRAG_PRIMITIVES",
+	"TMIx_FRAG_PRIM_RAST",
+	"TMIx_FRAG_FPK_ACTIVE",
+	"TMIx_FRAG_STARVING",
+	"TMIx_FRAG_WARPS",
+	"TMIx_FRAG_PARTIAL_WARPS",
+	"TMIx_FRAG_QUADS_RAST",
+	"TMIx_FRAG_QUADS_EZS_TEST",
+	"TMIx_FRAG_QUADS_EZS_UPDATE",
+	"TMIx_FRAG_QUADS_EZS_KILL",
+	"TMIx_FRAG_LZS_TEST",
+	"TMIx_FRAG_LZS_KILL",
+	"",
+	"TMIx_FRAG_PTILES",
+	"TMIx_FRAG_TRANS_ELIM",
+	"TMIx_QUAD_FPK_KILLER",
+	"",
+	"TMIx_COMPUTE_ACTIVE",
+	"TMIx_COMPUTE_TASKS",
+	"TMIx_COMPUTE_WARPS",
+	"TMIx_COMPUTE_STARVING",
+	"TMIx_EXEC_CORE_ACTIVE",
+	"TMIx_EXEC_ACTIVE",
+	"TMIx_EXEC_INSTR_COUNT",
+	"TMIx_EXEC_INSTR_DIVERGED",
+	"TMIx_EXEC_INSTR_STARVING",
+	"TMIx_ARITH_INSTR_SINGLE_FMA",
+	"TMIx_ARITH_INSTR_DOUBLE",
+	"TMIx_ARITH_INSTR_MSG",
+	"TMIx_ARITH_INSTR_MSG_ONLY",
+	"TMIx_TEX_INSTR",
+	"TMIx_TEX_INSTR_MIPMAP",
+	"TMIx_TEX_INSTR_COMPRESSED",
+	"TMIx_TEX_INSTR_3D",
+	"TMIx_TEX_INSTR_TRILINEAR",
+	"TMIx_TEX_COORD_ISSUE",
+	"TMIx_TEX_COORD_STALL",
+	"TMIx_TEX_STARVE_CACHE",
+	"TMIx_TEX_STARVE_FILTER",
+	"TMIx_LS_MEM_READ_FULL",
+	"TMIx_LS_MEM_READ_SHORT",
+	"TMIx_LS_MEM_WRITE_FULL",
+	"TMIx_LS_MEM_WRITE_SHORT",
+	"TMIx_LS_MEM_ATOMIC",
+	"TMIx_VARY_INSTR",
+	"TMIx_VARY_SLOT_32",
+	"TMIx_VARY_SLOT_16",
+	"TMIx_ATTR_INSTR",
+	"TMIx_ARITH_INSTR_FP_MUL",
+	"TMIx_BEATS_RD_FTC",
+	"TMIx_BEATS_RD_FTC_EXT",
+	"TMIx_BEATS_RD_LSC",
+	"TMIx_BEATS_RD_LSC_EXT",
+	"TMIx_BEATS_RD_TEX",
+	"TMIx_BEATS_RD_TEX_EXT",
+	"TMIx_BEATS_RD_OTHER",
+	"TMIx_BEATS_WR_LSC",
+	"TMIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_RD_MSG_IN",
+	"TMIx_L2_RD_MSG_IN_STALL",
+	"TMIx_L2_WR_MSG_IN",
+	"TMIx_L2_WR_MSG_IN_STALL",
+	"TMIx_L2_SNP_MSG_IN",
+	"TMIx_L2_SNP_MSG_IN_STALL",
+	"TMIx_L2_RD_MSG_OUT",
+	"TMIx_L2_RD_MSG_OUT_STALL",
+	"TMIx_L2_WR_MSG_OUT",
+	"TMIx_L2_ANY_LOOKUP",
+	"TMIx_L2_READ_LOOKUP",
+	"TMIx_L2_WRITE_LOOKUP",
+	"TMIx_L2_EXT_SNOOP_LOOKUP",
+	"TMIx_L2_EXT_READ",
+	"TMIx_L2_EXT_READ_NOSNP",
+	"TMIx_L2_EXT_READ_UNIQUE",
+	"TMIx_L2_EXT_READ_BEATS",
+	"TMIx_L2_EXT_AR_STALL",
+	"TMIx_L2_EXT_AR_CNT_Q1",
+	"TMIx_L2_EXT_AR_CNT_Q2",
+	"TMIx_L2_EXT_AR_CNT_Q3",
+	"TMIx_L2_EXT_RRESP_0_127",
+	"TMIx_L2_EXT_RRESP_128_191",
+	"TMIx_L2_EXT_RRESP_192_255",
+	"TMIx_L2_EXT_RRESP_256_319",
+	"TMIx_L2_EXT_RRESP_320_383",
+	"TMIx_L2_EXT_WRITE",
+	"TMIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TMIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TMIx_L2_EXT_WRITE_SNP_FULL",
+	"TMIx_L2_EXT_WRITE_SNP_PTL",
+	"TMIx_L2_EXT_WRITE_BEATS",
+	"TMIx_L2_EXT_W_STALL",
+	"TMIx_L2_EXT_AW_CNT_Q1",
+	"TMIx_L2_EXT_AW_CNT_Q2",
+	"TMIx_L2_EXT_AW_CNT_Q3",
+	"TMIx_L2_EXT_SNOOP",
+	"TMIx_L2_EXT_SNOOP_STALL",
+	"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TMIx_L2_EXT_SNOOP_RESP_DATA",
+	"TMIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
new file mode 100755
index 0000000..932663c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TNOX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TNOX_H_
+
+static const char * const hardware_counters_mali_tNOx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_MESSAGES_SENT",
+	"TNOx_MESSAGES_RECEIVED",
+	"TNOx_GPU_ACTIVE",
+	"TNOx_IRQ_ACTIVE",
+	"TNOx_JS0_JOBS",
+	"TNOx_JS0_TASKS",
+	"TNOx_JS0_ACTIVE",
+	"",
+	"TNOx_JS0_WAIT_READ",
+	"TNOx_JS0_WAIT_ISSUE",
+	"TNOx_JS0_WAIT_DEPEND",
+	"TNOx_JS0_WAIT_FINISH",
+	"TNOx_JS1_JOBS",
+	"TNOx_JS1_TASKS",
+	"TNOx_JS1_ACTIVE",
+	"",
+	"TNOx_JS1_WAIT_READ",
+	"TNOx_JS1_WAIT_ISSUE",
+	"TNOx_JS1_WAIT_DEPEND",
+	"TNOx_JS1_WAIT_FINISH",
+	"TNOx_JS2_JOBS",
+	"TNOx_JS2_TASKS",
+	"TNOx_JS2_ACTIVE",
+	"",
+	"TNOx_JS2_WAIT_READ",
+	"TNOx_JS2_WAIT_ISSUE",
+	"TNOx_JS2_WAIT_DEPEND",
+	"TNOx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_TILER_ACTIVE",
+	"TNOx_JOBS_PROCESSED",
+	"TNOx_TRIANGLES",
+	"TNOx_LINES",
+	"TNOx_POINTS",
+	"TNOx_FRONT_FACING",
+	"TNOx_BACK_FACING",
+	"TNOx_PRIM_VISIBLE",
+	"TNOx_PRIM_CULLED",
+	"TNOx_PRIM_CLIPPED",
+	"TNOx_PRIM_SAT_CULLED",
+	"TNOx_BIN_ALLOC_INIT",
+	"TNOx_BIN_ALLOC_OVERFLOW",
+	"TNOx_BUS_READ",
+	"",
+	"TNOx_BUS_WRITE",
+	"TNOx_LOADING_DESC",
+	"TNOx_IDVS_POS_SHAD_REQ",
+	"TNOx_IDVS_POS_SHAD_WAIT",
+	"TNOx_IDVS_POS_SHAD_STALL",
+	"TNOx_IDVS_POS_FIFO_FULL",
+	"TNOx_PREFETCH_STALL",
+	"TNOx_VCACHE_HIT",
+	"TNOx_VCACHE_MISS",
+	"TNOx_VCACHE_LINE_WAIT",
+	"TNOx_VFETCH_POS_READ_WAIT",
+	"TNOx_VFETCH_VERTEX_WAIT",
+	"TNOx_VFETCH_STALL",
+	"TNOx_PRIMASSY_STALL",
+	"TNOx_BBOX_GEN_STALL",
+	"TNOx_IDVS_VBU_HIT",
+	"TNOx_IDVS_VBU_MISS",
+	"TNOx_IDVS_VBU_LINE_DEALLOCATE",
+	"TNOx_IDVS_VAR_SHAD_REQ",
+	"TNOx_IDVS_VAR_SHAD_STALL",
+	"TNOx_BINNER_STALL",
+	"TNOx_ITER_STALL",
+	"TNOx_COMPRESS_MISS",
+	"TNOx_COMPRESS_STALL",
+	"TNOx_PCACHE_HIT",
+	"TNOx_PCACHE_MISS",
+	"TNOx_PCACHE_MISS_STALL",
+	"TNOx_PCACHE_EVICT_STALL",
+	"TNOx_PMGR_PTR_WR_STALL",
+	"TNOx_PMGR_PTR_RD_STALL",
+	"TNOx_PMGR_CMD_WR_STALL",
+	"TNOx_WRBUF_ACTIVE",
+	"TNOx_WRBUF_HIT",
+	"TNOx_WRBUF_MISS",
+	"TNOx_WRBUF_NO_FREE_LINE_STALL",
+	"TNOx_WRBUF_NO_AXI_ID_STALL",
+	"TNOx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TNOx_UTLB_TRANS",
+	"TNOx_UTLB_TRANS_HIT",
+	"TNOx_UTLB_TRANS_STALL",
+	"TNOx_UTLB_TRANS_MISS_DELAY",
+	"TNOx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_FRAG_ACTIVE",
+	"TNOx_FRAG_PRIMITIVES",
+	"TNOx_FRAG_PRIM_RAST",
+	"TNOx_FRAG_FPK_ACTIVE",
+	"TNOx_FRAG_STARVING",
+	"TNOx_FRAG_WARPS",
+	"TNOx_FRAG_PARTIAL_WARPS",
+	"TNOx_FRAG_QUADS_RAST",
+	"TNOx_FRAG_QUADS_EZS_TEST",
+	"TNOx_FRAG_QUADS_EZS_UPDATE",
+	"TNOx_FRAG_QUADS_EZS_KILL",
+	"TNOx_FRAG_LZS_TEST",
+	"TNOx_FRAG_LZS_KILL",
+	"TNOx_WARP_REG_SIZE_64",
+	"TNOx_FRAG_PTILES",
+	"TNOx_FRAG_TRANS_ELIM",
+	"TNOx_QUAD_FPK_KILLER",
+	"TNOx_FULL_QUAD_WARPS",
+	"TNOx_COMPUTE_ACTIVE",
+	"TNOx_COMPUTE_TASKS",
+	"TNOx_COMPUTE_WARPS",
+	"TNOx_COMPUTE_STARVING",
+	"TNOx_EXEC_CORE_ACTIVE",
+	"TNOx_EXEC_ACTIVE",
+	"TNOx_EXEC_INSTR_COUNT",
+	"TNOx_EXEC_INSTR_DIVERGED",
+	"TNOx_EXEC_INSTR_STARVING",
+	"TNOx_ARITH_INSTR_SINGLE_FMA",
+	"TNOx_ARITH_INSTR_DOUBLE",
+	"TNOx_ARITH_INSTR_MSG",
+	"TNOx_ARITH_INSTR_MSG_ONLY",
+	"TNOx_TEX_MSGI_NUM_QUADS",
+	"TNOx_TEX_DFCH_NUM_PASSES",
+	"TNOx_TEX_DFCH_NUM_PASSES_MISS",
+	"TNOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TNOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TNOx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TNOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TNOx_TEX_TFCH_NUM_OPERATIONS",
+	"TNOx_TEX_FILT_NUM_OPERATIONS",
+	"TNOx_LS_MEM_READ_FULL",
+	"TNOx_LS_MEM_READ_SHORT",
+	"TNOx_LS_MEM_WRITE_FULL",
+	"TNOx_LS_MEM_WRITE_SHORT",
+	"TNOx_LS_MEM_ATOMIC",
+	"TNOx_VARY_INSTR",
+	"TNOx_VARY_SLOT_32",
+	"TNOx_VARY_SLOT_16",
+	"TNOx_ATTR_INSTR",
+	"TNOx_ARITH_INSTR_FP_MUL",
+	"TNOx_BEATS_RD_FTC",
+	"TNOx_BEATS_RD_FTC_EXT",
+	"TNOx_BEATS_RD_LSC",
+	"TNOx_BEATS_RD_LSC_EXT",
+	"TNOx_BEATS_RD_TEX",
+	"TNOx_BEATS_RD_TEX_EXT",
+	"TNOx_BEATS_RD_OTHER",
+	"TNOx_BEATS_WR_LSC_OTHER",
+	"TNOx_BEATS_WR_TIB",
+	"TNOx_BEATS_WR_LSC_WB",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TNOx_L2_RD_MSG_IN",
+	"TNOx_L2_RD_MSG_IN_STALL",
+	"TNOx_L2_WR_MSG_IN",
+	"TNOx_L2_WR_MSG_IN_STALL",
+	"TNOx_L2_SNP_MSG_IN",
+	"TNOx_L2_SNP_MSG_IN_STALL",
+	"TNOx_L2_RD_MSG_OUT",
+	"TNOx_L2_RD_MSG_OUT_STALL",
+	"TNOx_L2_WR_MSG_OUT",
+	"TNOx_L2_ANY_LOOKUP",
+	"TNOx_L2_READ_LOOKUP",
+	"TNOx_L2_WRITE_LOOKUP",
+	"TNOx_L2_EXT_SNOOP_LOOKUP",
+	"TNOx_L2_EXT_READ",
+	"TNOx_L2_EXT_READ_NOSNP",
+	"TNOx_L2_EXT_READ_UNIQUE",
+	"TNOx_L2_EXT_READ_BEATS",
+	"TNOx_L2_EXT_AR_STALL",
+	"TNOx_L2_EXT_AR_CNT_Q1",
+	"TNOx_L2_EXT_AR_CNT_Q2",
+	"TNOx_L2_EXT_AR_CNT_Q3",
+	"TNOx_L2_EXT_RRESP_0_127",
+	"TNOx_L2_EXT_RRESP_128_191",
+	"TNOx_L2_EXT_RRESP_192_255",
+	"TNOx_L2_EXT_RRESP_256_319",
+	"TNOx_L2_EXT_RRESP_320_383",
+	"TNOx_L2_EXT_WRITE",
+	"TNOx_L2_EXT_WRITE_NOSNP_FULL",
+	"TNOx_L2_EXT_WRITE_NOSNP_PTL",
+	"TNOx_L2_EXT_WRITE_SNP_FULL",
+	"TNOx_L2_EXT_WRITE_SNP_PTL",
+	"TNOx_L2_EXT_WRITE_BEATS",
+	"TNOx_L2_EXT_W_STALL",
+	"TNOx_L2_EXT_AW_CNT_Q1",
+	"TNOx_L2_EXT_AW_CNT_Q2",
+	"TNOx_L2_EXT_AW_CNT_Q3",
+	"TNOx_L2_EXT_SNOOP",
+	"TNOx_L2_EXT_SNOOP_STALL",
+	"TNOx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TNOx_L2_EXT_SNOOP_RESP_DATA",
+	"TNOx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TNOX_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
new file mode 100755
index 0000000..b8dde32
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+
+static const char * const hardware_counters_mali_tSIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MESSAGES_SENT",
+	"TSIx_MESSAGES_RECEIVED",
+	"TSIx_GPU_ACTIVE",
+	"TSIx_IRQ_ACTIVE",
+	"TSIx_JS0_JOBS",
+	"TSIx_JS0_TASKS",
+	"TSIx_JS0_ACTIVE",
+	"",
+	"TSIx_JS0_WAIT_READ",
+	"TSIx_JS0_WAIT_ISSUE",
+	"TSIx_JS0_WAIT_DEPEND",
+	"TSIx_JS0_WAIT_FINISH",
+	"TSIx_JS1_JOBS",
+	"TSIx_JS1_TASKS",
+	"TSIx_JS1_ACTIVE",
+	"",
+	"TSIx_JS1_WAIT_READ",
+	"TSIx_JS1_WAIT_ISSUE",
+	"TSIx_JS1_WAIT_DEPEND",
+	"TSIx_JS1_WAIT_FINISH",
+	"TSIx_JS2_JOBS",
+	"TSIx_JS2_TASKS",
+	"TSIx_JS2_ACTIVE",
+	"",
+	"TSIx_JS2_WAIT_READ",
+	"TSIx_JS2_WAIT_ISSUE",
+	"TSIx_JS2_WAIT_DEPEND",
+	"TSIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_TILER_ACTIVE",
+	"TSIx_JOBS_PROCESSED",
+	"TSIx_TRIANGLES",
+	"TSIx_LINES",
+	"TSIx_POINTS",
+	"TSIx_FRONT_FACING",
+	"TSIx_BACK_FACING",
+	"TSIx_PRIM_VISIBLE",
+	"TSIx_PRIM_CULLED",
+	"TSIx_PRIM_CLIPPED",
+	"TSIx_PRIM_SAT_CULLED",
+	"TSIx_BIN_ALLOC_INIT",
+	"TSIx_BIN_ALLOC_OVERFLOW",
+	"TSIx_BUS_READ",
+	"",
+	"TSIx_BUS_WRITE",
+	"TSIx_LOADING_DESC",
+	"TSIx_IDVS_POS_SHAD_REQ",
+	"TSIx_IDVS_POS_SHAD_WAIT",
+	"TSIx_IDVS_POS_SHAD_STALL",
+	"TSIx_IDVS_POS_FIFO_FULL",
+	"TSIx_PREFETCH_STALL",
+	"TSIx_VCACHE_HIT",
+	"TSIx_VCACHE_MISS",
+	"TSIx_VCACHE_LINE_WAIT",
+	"TSIx_VFETCH_POS_READ_WAIT",
+	"TSIx_VFETCH_VERTEX_WAIT",
+	"TSIx_VFETCH_STALL",
+	"TSIx_PRIMASSY_STALL",
+	"TSIx_BBOX_GEN_STALL",
+	"TSIx_IDVS_VBU_HIT",
+	"TSIx_IDVS_VBU_MISS",
+	"TSIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TSIx_IDVS_VAR_SHAD_REQ",
+	"TSIx_IDVS_VAR_SHAD_STALL",
+	"TSIx_BINNER_STALL",
+	"TSIx_ITER_STALL",
+	"TSIx_COMPRESS_MISS",
+	"TSIx_COMPRESS_STALL",
+	"TSIx_PCACHE_HIT",
+	"TSIx_PCACHE_MISS",
+	"TSIx_PCACHE_MISS_STALL",
+	"TSIx_PCACHE_EVICT_STALL",
+	"TSIx_PMGR_PTR_WR_STALL",
+	"TSIx_PMGR_PTR_RD_STALL",
+	"TSIx_PMGR_CMD_WR_STALL",
+	"TSIx_WRBUF_ACTIVE",
+	"TSIx_WRBUF_HIT",
+	"TSIx_WRBUF_MISS",
+	"TSIx_WRBUF_NO_FREE_LINE_STALL",
+	"TSIx_WRBUF_NO_AXI_ID_STALL",
+	"TSIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TSIx_UTLB_TRANS",
+	"TSIx_UTLB_TRANS_HIT",
+	"TSIx_UTLB_TRANS_STALL",
+	"TSIx_UTLB_TRANS_MISS_DELAY",
+	"TSIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_FRAG_ACTIVE",
+	"TSIx_FRAG_PRIMITIVES",
+	"TSIx_FRAG_PRIM_RAST",
+	"TSIx_FRAG_FPK_ACTIVE",
+	"TSIx_FRAG_STARVING",
+	"TSIx_FRAG_WARPS",
+	"TSIx_FRAG_PARTIAL_WARPS",
+	"TSIx_FRAG_QUADS_RAST",
+	"TSIx_FRAG_QUADS_EZS_TEST",
+	"TSIx_FRAG_QUADS_EZS_UPDATE",
+	"TSIx_FRAG_QUADS_EZS_KILL",
+	"TSIx_FRAG_LZS_TEST",
+	"TSIx_FRAG_LZS_KILL",
+	"",
+	"TSIx_FRAG_PTILES",
+	"TSIx_FRAG_TRANS_ELIM",
+	"TSIx_QUAD_FPK_KILLER",
+	"",
+	"TSIx_COMPUTE_ACTIVE",
+	"TSIx_COMPUTE_TASKS",
+	"TSIx_COMPUTE_WARPS",
+	"TSIx_COMPUTE_STARVING",
+	"TSIx_EXEC_CORE_ACTIVE",
+	"TSIx_EXEC_ACTIVE",
+	"TSIx_EXEC_INSTR_COUNT",
+	"TSIx_EXEC_INSTR_DIVERGED",
+	"TSIx_EXEC_INSTR_STARVING",
+	"TSIx_ARITH_INSTR_SINGLE_FMA",
+	"TSIx_ARITH_INSTR_DOUBLE",
+	"TSIx_ARITH_INSTR_MSG",
+	"TSIx_ARITH_INSTR_MSG_ONLY",
+	"TSIx_TEX_MSGI_NUM_QUADS",
+	"TSIx_TEX_DFCH_NUM_PASSES",
+	"TSIx_TEX_DFCH_NUM_PASSES_MISS",
+	"TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TSIx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TSIx_TEX_TFCH_NUM_OPERATIONS",
+	"TSIx_TEX_FILT_NUM_OPERATIONS",
+	"TSIx_LS_MEM_READ_FULL",
+	"TSIx_LS_MEM_READ_SHORT",
+	"TSIx_LS_MEM_WRITE_FULL",
+	"TSIx_LS_MEM_WRITE_SHORT",
+	"TSIx_LS_MEM_ATOMIC",
+	"TSIx_VARY_INSTR",
+	"TSIx_VARY_SLOT_32",
+	"TSIx_VARY_SLOT_16",
+	"TSIx_ATTR_INSTR",
+	"TSIx_ARITH_INSTR_FP_MUL",
+	"TSIx_BEATS_RD_FTC",
+	"TSIx_BEATS_RD_FTC_EXT",
+	"TSIx_BEATS_RD_LSC",
+	"TSIx_BEATS_RD_LSC_EXT",
+	"TSIx_BEATS_RD_TEX",
+	"TSIx_BEATS_RD_TEX_EXT",
+	"TSIx_BEATS_RD_OTHER",
+	"TSIx_BEATS_WR_LSC_OTHER",
+	"TSIx_BEATS_WR_TIB",
+	"TSIx_BEATS_WR_LSC_WB",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TSIx_L2_RD_MSG_IN",
+	"TSIx_L2_RD_MSG_IN_STALL",
+	"TSIx_L2_WR_MSG_IN",
+	"TSIx_L2_WR_MSG_IN_STALL",
+	"TSIx_L2_SNP_MSG_IN",
+	"TSIx_L2_SNP_MSG_IN_STALL",
+	"TSIx_L2_RD_MSG_OUT",
+	"TSIx_L2_RD_MSG_OUT_STALL",
+	"TSIx_L2_WR_MSG_OUT",
+	"TSIx_L2_ANY_LOOKUP",
+	"TSIx_L2_READ_LOOKUP",
+	"TSIx_L2_WRITE_LOOKUP",
+	"TSIx_L2_EXT_SNOOP_LOOKUP",
+	"TSIx_L2_EXT_READ",
+	"TSIx_L2_EXT_READ_NOSNP",
+	"TSIx_L2_EXT_READ_UNIQUE",
+	"TSIx_L2_EXT_READ_BEATS",
+	"TSIx_L2_EXT_AR_STALL",
+	"TSIx_L2_EXT_AR_CNT_Q1",
+	"TSIx_L2_EXT_AR_CNT_Q2",
+	"TSIx_L2_EXT_AR_CNT_Q3",
+	"TSIx_L2_EXT_RRESP_0_127",
+	"TSIx_L2_EXT_RRESP_128_191",
+	"TSIx_L2_EXT_RRESP_192_255",
+	"TSIx_L2_EXT_RRESP_256_319",
+	"TSIx_L2_EXT_RRESP_320_383",
+	"TSIx_L2_EXT_WRITE",
+	"TSIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TSIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TSIx_L2_EXT_WRITE_SNP_FULL",
+	"TSIx_L2_EXT_WRITE_SNP_PTL",
+	"TSIx_L2_EXT_WRITE_BEATS",
+	"TSIx_L2_EXT_W_STALL",
+	"TSIx_L2_EXT_AW_CNT_Q1",
+	"TSIx_L2_EXT_AW_CNT_Q2",
+	"TSIx_L2_EXT_AW_CNT_Q3",
+	"TSIx_L2_EXT_SNOOP",
+	"TSIx_L2_EXT_SNOOP_STALL",
+	"TSIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TSIx_L2_EXT_SNOOP_RESP_DATA",
+	"TSIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h
new file mode 100755
index 0000000..a17870d
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TTRX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TTRX_H_
+
+static const char * const hardware_counters_mali_tTRx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TTRx_MESSAGES_SENT",
+	"TTRx_MESSAGES_RECEIVED",
+	"TTRx_GPU_ACTIVE",
+	"TTRx_IRQ_ACTIVE",
+	"TTRx_JS0_JOBS",
+	"TTRx_JS0_TASKS",
+	"TTRx_JS0_ACTIVE",
+	"",
+	"TTRx_JS0_WAIT_READ",
+	"TTRx_JS0_WAIT_ISSUE",
+	"TTRx_JS0_WAIT_DEPEND",
+	"TTRx_JS0_WAIT_FINISH",
+	"TTRx_JS1_JOBS",
+	"TTRx_JS1_TASKS",
+	"TTRx_JS1_ACTIVE",
+	"",
+	"TTRx_JS1_WAIT_READ",
+	"TTRx_JS1_WAIT_ISSUE",
+	"TTRx_JS1_WAIT_DEPEND",
+	"TTRx_JS1_WAIT_FINISH",
+	"TTRx_JS2_JOBS",
+	"TTRx_JS2_TASKS",
+	"TTRx_JS2_ACTIVE",
+	"",
+	"TTRx_JS2_WAIT_READ",
+	"TTRx_JS2_WAIT_ISSUE",
+	"TTRx_JS2_WAIT_DEPEND",
+	"TTRx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TTRx_TILER_ACTIVE",
+	"TTRx_JOBS_PROCESSED",
+	"TTRx_TRIANGLES",
+	"TTRx_LINES",
+	"TTRx_POINTS",
+	"TTRx_FRONT_FACING",
+	"TTRx_BACK_FACING",
+	"TTRx_PRIM_VISIBLE",
+	"TTRx_PRIM_CULLED",
+	"TTRx_PRIM_CLIPPED",
+	"TTRx_PRIM_SAT_CULLED",
+	"TTRx_BIN_ALLOC_INIT",
+	"TTRx_BIN_ALLOC_OVERFLOW",
+	"TTRx_BUS_READ",
+	"",
+	"TTRx_BUS_WRITE",
+	"TTRx_LOADING_DESC",
+	"",
+	"",
+	"",
+	"",
+	"TTRx_PREFETCH_STALL",
+	"TTRx_VCACHE_HIT",
+	"TTRx_VCACHE_MISS",
+	"TTRx_VCACHE_LINE_WAIT",
+	"TTRx_VFETCH_POS_READ_WAIT",
+	"TTRx_VFETCH_VERTEX_WAIT",
+	"TTRx_VFETCH_STALL",
+	"TTRx_PRIMASSY_STALL",
+	"TTRx_BBOX_GEN_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TTRx_BINNER_STALL",
+	"TTRx_ITER_STALL",
+	"TTRx_COMPRESS_MISS",
+	"TTRx_COMPRESS_STALL",
+	"TTRx_PCACHE_HIT",
+	"TTRx_PCACHE_MISS",
+	"TTRx_PCACHE_MISS_STALL",
+	"TTRx_PCACHE_EVICT_STALL",
+	"TTRx_PMGR_PTR_WR_STALL",
+	"TTRx_PMGR_PTR_RD_STALL",
+	"TTRx_PMGR_CMD_WR_STALL",
+	"TTRx_WRBUF_ACTIVE",
+	"TTRx_WRBUF_HIT",
+	"TTRx_WRBUF_MISS",
+	"TTRx_WRBUF_NO_FREE_LINE_STALL",
+	"TTRx_WRBUF_NO_AXI_ID_STALL",
+	"TTRx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TTRx_UTLB_TRANS",
+	"TTRx_UTLB_TRANS_HIT",
+	"TTRx_UTLB_TRANS_STALL",
+	"TTRx_UTLB_TRANS_MISS_DELAY",
+	"TTRx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TTRx_FRAG_ACTIVE",
+	"TTRx_FRAG_PRIMITIVES",
+	"TTRx_FRAG_PRIM_RAST",
+	"TTRx_FRAG_FPK_ACTIVE",
+	"TTRx_FRAG_STARVING",
+	"TTRx_FRAG_WARPS",
+	"TTRx_FRAG_PARTIAL_WARPS",
+	"TTRx_FRAG_QUADS_RAST",
+	"TTRx_FRAG_QUADS_EZS_TEST",
+	"TTRx_FRAG_QUADS_EZS_UPDATE",
+	"TTRx_FRAG_QUADS_EZS_KILL",
+	"TTRx_FRAG_LZS_TEST",
+	"TTRx_FRAG_LZS_KILL",
+	"TTRx_WARP_REG_SIZE_64",
+	"TTRx_FRAG_PTILES",
+	"TTRx_FRAG_TRANS_ELIM",
+	"TTRx_QUAD_FPK_KILLER",
+	"TTRx_FULL_QUAD_WARPS",
+	"TTRx_COMPUTE_ACTIVE",
+	"TTRx_COMPUTE_TASKS",
+	"TTRx_COMPUTE_WARPS",
+	"TTRx_COMPUTE_STARVING",
+	"TTRx_EXEC_CORE_ACTIVE",
+	"TTRx_EXEC_INSTR_FMA",
+	"TTRx_EXEC_INSTR_CVT",
+	"TTRx_EXEC_INSTR_SFU",
+	"TTRx_EXEC_INSTR_MSG",
+	"TTRx_EXEC_INSTR_DIVERGED",
+	"TTRx_EXEC_ICACHE_MISS",
+	"TTRx_EXEC_STARVE_ARITH",
+	"TTRx_CALL_BLEND_SHADER",
+	"TTRx_TEX_INSTR",
+	"TTRx_TEX_INSTR_MIPMAP",
+	"TTRx_TEX_INSTR_COMPRESSED",
+	"TTRx_TEX_INSTR_3D",
+	"TTRx_TEX_INSTR_TRILINEAR",
+	"TTRx_TEX_COORD_ISSUE",
+	"TTRx_TEX_COORD_STALL",
+	"TTRx_TEX_STARVE_CACHE",
+	"TTRx_TEX_STARVE_FILTER",
+	"TTRx_LS_MEM_READ_FULL",
+	"TTRx_LS_MEM_READ_SHORT",
+	"TTRx_LS_MEM_WRITE_FULL",
+	"TTRx_LS_MEM_WRITE_SHORT",
+	"TTRx_LS_MEM_ATOMIC",
+	"TTRx_VARY_INSTR",
+	"TTRx_VARY_SLOT_32",
+	"TTRx_VARY_SLOT_16",
+	"TTRx_ATTR_INSTR",
+	"TTRx_ARITH_INSTR_FP_MUL",
+	"TTRx_BEATS_RD_FTC",
+	"TTRx_BEATS_RD_FTC_EXT",
+	"TTRx_BEATS_RD_LSC",
+	"TTRx_BEATS_RD_LSC_EXT",
+	"TTRx_BEATS_RD_TEX",
+	"TTRx_BEATS_RD_TEX_EXT",
+	"TTRx_BEATS_RD_OTHER",
+	"",
+	"TTRx_BEATS_WR_TIB",
+	"TTRx_BEATS_WR_LSC",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TTRx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TTRx_L2_RD_MSG_IN",
+	"TTRx_L2_RD_MSG_IN_STALL",
+	"TTRx_L2_WR_MSG_IN",
+	"TTRx_L2_WR_MSG_IN_STALL",
+	"TTRx_L2_SNP_MSG_IN",
+	"TTRx_L2_SNP_MSG_IN_STALL",
+	"TTRx_L2_RD_MSG_OUT",
+	"TTRx_L2_RD_MSG_OUT_STALL",
+	"TTRx_L2_WR_MSG_OUT",
+	"TTRx_L2_ANY_LOOKUP",
+	"TTRx_L2_READ_LOOKUP",
+	"TTRx_L2_WRITE_LOOKUP",
+	"TTRx_L2_EXT_SNOOP_LOOKUP",
+	"TTRx_L2_EXT_READ",
+	"TTRx_L2_EXT_READ_NOSNP",
+	"TTRx_L2_EXT_READ_UNIQUE",
+	"TTRx_L2_EXT_READ_BEATS",
+	"TTRx_L2_EXT_AR_STALL",
+	"TTRx_L2_EXT_AR_CNT_Q1",
+	"TTRx_L2_EXT_AR_CNT_Q2",
+	"TTRx_L2_EXT_AR_CNT_Q3",
+	"TTRx_L2_EXT_RRESP_0_127",
+	"TTRx_L2_EXT_RRESP_128_191",
+	"TTRx_L2_EXT_RRESP_192_255",
+	"TTRx_L2_EXT_RRESP_256_319",
+	"TTRx_L2_EXT_RRESP_320_383",
+	"TTRx_L2_EXT_WRITE",
+	"TTRx_L2_EXT_WRITE_NOSNP_FULL",
+	"TTRx_L2_EXT_WRITE_NOSNP_PTL",
+	"TTRx_L2_EXT_WRITE_SNP_FULL",
+	"TTRx_L2_EXT_WRITE_SNP_PTL",
+	"TTRx_L2_EXT_WRITE_BEATS",
+	"TTRx_L2_EXT_W_STALL",
+	"TTRx_L2_EXT_AW_CNT_Q1",
+	"TTRx_L2_EXT_AW_CNT_Q2",
+	"TTRx_L2_EXT_AW_CNT_Q3",
+	"TTRx_L2_EXT_SNOOP",
+	"TTRx_L2_EXT_SNOOP_STALL",
+	"TTRx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TTRx_L2_EXT_SNOOP_RESP_DATA",
+	"TTRx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TTRX_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100755
index 0000000..218e63a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,128 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xFu  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFFu << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xFu  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956u
+#define GPU_ID_PI_T62X                    0x0620u
+#define GPU_ID_PI_T76X                    0x0750u
+#define GPU_ID_PI_T72X                    0x0720u
+#define GPU_ID_PI_TFRX                    0x0880u
+#define GPU_ID_PI_T86X                    0x0860u
+#define GPU_ID_PI_T82X                    0x0820u
+#define GPU_ID_PI_T83X                    0x0830u
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xFu << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xFu << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xFu << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+#define GPU_ID2_VERSION        (GPU_ID2_VERSION_MAJOR | \
+								GPU_ID2_VERSION_MINOR | \
+								GPU_ID2_VERSION_STATUS)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		(((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
+#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7, 0)
+#define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7, 3)
+#define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7, 1)
+#define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7, 2)
+#define GPU_ID2_PRODUCT_TKAX              GPU_ID2_MODEL_MAKE(8, 0)
+#define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(9, 0)
+#define GPU_ID2_PRODUCT_TBOX              GPU_ID2_MODEL_MAKE(8, 2)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		(((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		(((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		(((u32)status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100755
index 0000000..514b065
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show, NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100644
index 0000000..28a871a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100755
index 0000000..62ba105
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,482 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include "mali_kbase_ioctl.h"
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.core_features = regdump.core_features;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present =
+		((u64) regdump.shader_present_hi << 32) +
+		regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present =
+		((u64) regdump.tiler_present_hi << 32) +
+		regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present =
+		((u64) regdump.l2_present_hi << 32) +
+		regdump.l2_present_lo;
+	gpu_props->raw_props.stack_present =
+		((u64) regdump.stack_present_hi << 32) +
+		regdump.stack_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+	gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc;
+}
+
+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)
+{
+	gpu_props->core_props.version_status =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+	gpu_props->core_props.num_exec_engines =
+		KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	if (gpu_props->raw_props.thread_tls_alloc == 0)
+		gpu_props->thread_props.tls_alloc =
+				gpu_props->thread_props.max_threads;
+	else
+		gpu_props->thread_props.tls_alloc =
+				gpu_props->raw_props.thread_tls_alloc;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 * Additionally, add non-coherent mode, as this is always supported.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features |
+		COHERENCY_FEATURE_BIT(COHERENCY_NONE);
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
+		gpu_props->thread_props.max_thread_group_split = 0;
+}
+
+static struct {
+	u32 type;
+	size_t offset;
+	int size;
+} gpu_property_mapping[] = {
+#define PROP(name, member) \
+	{KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \
+		sizeof(((struct base_gpu_props *)0)->member)}
+	PROP(PRODUCT_ID,                  core_props.product_id),
+	PROP(VERSION_STATUS,              core_props.version_status),
+	PROP(MINOR_REVISION,              core_props.minor_revision),
+	PROP(MAJOR_REVISION,              core_props.major_revision),
+	PROP(GPU_FREQ_KHZ_MAX,            core_props.gpu_freq_khz_max),
+	PROP(LOG2_PROGRAM_COUNTER_SIZE,   core_props.log2_program_counter_size),
+	PROP(TEXTURE_FEATURES_0,          core_props.texture_features[0]),
+	PROP(TEXTURE_FEATURES_1,          core_props.texture_features[1]),
+	PROP(TEXTURE_FEATURES_2,          core_props.texture_features[2]),
+	PROP(TEXTURE_FEATURES_3,          core_props.texture_features[3]),
+	PROP(GPU_AVAILABLE_MEMORY_SIZE,   core_props.gpu_available_memory_size),
+	PROP(NUM_EXEC_ENGINES,            core_props.num_exec_engines),
+
+	PROP(L2_LOG2_LINE_SIZE,           l2_props.log2_line_size),
+	PROP(L2_LOG2_CACHE_SIZE,          l2_props.log2_cache_size),
+	PROP(L2_NUM_L2_SLICES,            l2_props.num_l2_slices),
+
+	PROP(TILER_BIN_SIZE_BYTES,        tiler_props.bin_size_bytes),
+	PROP(TILER_MAX_ACTIVE_LEVELS,     tiler_props.max_active_levels),
+
+	PROP(MAX_THREADS,                 thread_props.max_threads),
+	PROP(MAX_WORKGROUP_SIZE,          thread_props.max_workgroup_size),
+	PROP(MAX_BARRIER_SIZE,            thread_props.max_barrier_size),
+	PROP(MAX_REGISTERS,               thread_props.max_registers),
+	PROP(MAX_TASK_QUEUE,              thread_props.max_task_queue),
+	PROP(MAX_THREAD_GROUP_SPLIT,      thread_props.max_thread_group_split),
+	PROP(IMPL_TECH,                   thread_props.impl_tech),
+	PROP(TLS_ALLOC,                   thread_props.tls_alloc),
+
+	PROP(RAW_SHADER_PRESENT,          raw_props.shader_present),
+	PROP(RAW_TILER_PRESENT,           raw_props.tiler_present),
+	PROP(RAW_L2_PRESENT,              raw_props.l2_present),
+	PROP(RAW_STACK_PRESENT,           raw_props.stack_present),
+	PROP(RAW_L2_FEATURES,             raw_props.l2_features),
+	PROP(RAW_CORE_FEATURES,           raw_props.core_features),
+	PROP(RAW_MEM_FEATURES,            raw_props.mem_features),
+	PROP(RAW_MMU_FEATURES,            raw_props.mmu_features),
+	PROP(RAW_AS_PRESENT,              raw_props.as_present),
+	PROP(RAW_JS_PRESENT,              raw_props.js_present),
+	PROP(RAW_JS_FEATURES_0,           raw_props.js_features[0]),
+	PROP(RAW_JS_FEATURES_1,           raw_props.js_features[1]),
+	PROP(RAW_JS_FEATURES_2,           raw_props.js_features[2]),
+	PROP(RAW_JS_FEATURES_3,           raw_props.js_features[3]),
+	PROP(RAW_JS_FEATURES_4,           raw_props.js_features[4]),
+	PROP(RAW_JS_FEATURES_5,           raw_props.js_features[5]),
+	PROP(RAW_JS_FEATURES_6,           raw_props.js_features[6]),
+	PROP(RAW_JS_FEATURES_7,           raw_props.js_features[7]),
+	PROP(RAW_JS_FEATURES_8,           raw_props.js_features[8]),
+	PROP(RAW_JS_FEATURES_9,           raw_props.js_features[9]),
+	PROP(RAW_JS_FEATURES_10,          raw_props.js_features[10]),
+	PROP(RAW_JS_FEATURES_11,          raw_props.js_features[11]),
+	PROP(RAW_JS_FEATURES_12,          raw_props.js_features[12]),
+	PROP(RAW_JS_FEATURES_13,          raw_props.js_features[13]),
+	PROP(RAW_JS_FEATURES_14,          raw_props.js_features[14]),
+	PROP(RAW_JS_FEATURES_15,          raw_props.js_features[15]),
+	PROP(RAW_TILER_FEATURES,          raw_props.tiler_features),
+	PROP(RAW_TEXTURE_FEATURES_0,      raw_props.texture_features[0]),
+	PROP(RAW_TEXTURE_FEATURES_1,      raw_props.texture_features[1]),
+	PROP(RAW_TEXTURE_FEATURES_2,      raw_props.texture_features[2]),
+	PROP(RAW_TEXTURE_FEATURES_3,      raw_props.texture_features[3]),
+	PROP(RAW_GPU_ID,                  raw_props.gpu_id),
+	PROP(RAW_THREAD_MAX_THREADS,      raw_props.thread_max_threads),
+	PROP(RAW_THREAD_MAX_WORKGROUP_SIZE,
+			raw_props.thread_max_workgroup_size),
+	PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
+	PROP(RAW_THREAD_FEATURES,         raw_props.thread_features),
+	PROP(RAW_THREAD_TLS_ALLOC,        raw_props.thread_tls_alloc),
+	PROP(RAW_COHERENCY_MODE,          raw_props.coherency_mode),
+
+	PROP(COHERENCY_NUM_GROUPS,        coherency_info.num_groups),
+	PROP(COHERENCY_NUM_CORE_GROUPS,   coherency_info.num_core_groups),
+	PROP(COHERENCY_COHERENCY,         coherency_info.coherency),
+	PROP(COHERENCY_GROUP_0,           coherency_info.group[0].core_mask),
+	PROP(COHERENCY_GROUP_1,           coherency_info.group[1].core_mask),
+	PROP(COHERENCY_GROUP_2,           coherency_info.group[2].core_mask),
+	PROP(COHERENCY_GROUP_3,           coherency_info.group[3].core_mask),
+	PROP(COHERENCY_GROUP_4,           coherency_info.group[4].core_mask),
+	PROP(COHERENCY_GROUP_5,           coherency_info.group[5].core_mask),
+	PROP(COHERENCY_GROUP_6,           coherency_info.group[6].core_mask),
+	PROP(COHERENCY_GROUP_7,           coherency_info.group[7].core_mask),
+	PROP(COHERENCY_GROUP_8,           coherency_info.group[8].core_mask),
+	PROP(COHERENCY_GROUP_9,           coherency_info.group[9].core_mask),
+	PROP(COHERENCY_GROUP_10,          coherency_info.group[10].core_mask),
+	PROP(COHERENCY_GROUP_11,          coherency_info.group[11].core_mask),
+	PROP(COHERENCY_GROUP_12,          coherency_info.group[12].core_mask),
+	PROP(COHERENCY_GROUP_13,          coherency_info.group[13].core_mask),
+	PROP(COHERENCY_GROUP_14,          coherency_info.group[14].core_mask),
+	PROP(COHERENCY_GROUP_15,          coherency_info.group[15].core_mask),
+
+#undef PROP
+};
+
+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *kprops = &kbdev->gpu_props;
+	struct base_gpu_props *props = &kprops->props;
+	u32 count = ARRAY_SIZE(gpu_property_mapping);
+	u32 i;
+	u32 size = 0;
+	u8 *p;
+
+	for (i = 0; i < count; i++) {
+		/* 4 bytes for the ID, and the size of the property */
+		size += 4 + gpu_property_mapping[i].size;
+	}
+
+	kprops->prop_buffer_size = size;
+	kprops->prop_buffer = kmalloc(size, GFP_KERNEL);
+
+	if (!kprops->prop_buffer) {
+		kprops->prop_buffer_size = 0;
+		return -ENOMEM;
+	}
+
+	p = kprops->prop_buffer;
+
+#define WRITE_U8(v) (*p++ = (v) & 0xFF)
+#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0)
+#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0)
+#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0)
+
+	for (i = 0; i < count; i++) {
+		u32 type = gpu_property_mapping[i].type;
+		u8 type_size;
+		void *field = ((u8 *)props) + gpu_property_mapping[i].offset;
+
+		switch (gpu_property_mapping[i].size) {
+		case 1:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U8;
+			break;
+		case 2:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U16;
+			break;
+		case 4:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U32;
+			break;
+		case 8:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U64;
+			break;
+		default:
+			dev_err(kbdev->dev,
+				"Invalid gpu_property_mapping type=%d size=%d",
+				type, gpu_property_mapping[i].size);
+			return -EINVAL;
+		}
+
+		WRITE_U32((type<<2) | type_size);
+
+		switch (type_size) {
+		case KBASE_GPUPROP_VALUE_SIZE_U8:
+			WRITE_U8(*((u8 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U16:
+			WRITE_U16(*((u16 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U32:
+			WRITE_U32(*((u32 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U64:
+			WRITE_U64(*((u64 *)field));
+			break;
+		default: /* Cannot be reached */
+			WARN_ON(1);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100644
index 0000000..37d9c08
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,77 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer
+ * @kbdev: The kbase device
+ *
+ * Fills kbdev->gpu_props->prop_buffer with the GPU properties for user
+ * space to read.
+ */
+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value
+ * @gpu_props: the &base_gpu_props structure
+ *
+ * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into
+ * separate fields (version_status, minor_revision, major_revision, product_id)
+ * stored in base_gpu_props::core_props.
+ */
+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props);
+
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100755
index 0000000..d7877d1
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,98 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 core_features;
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 thread_tls_alloc;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 stack_present_lo;
+	u32 stack_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+
+	u32 prop_buffer_size;
+	void *prop_buffer;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c
new file mode 100755
index 0000000..b362546
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_gwt.h"
+#include <linux/list_sort.h>
+
+static inline void kbase_gpu_gwt_setup_page_permission(
+				struct kbase_context *kctx,
+				unsigned long flag,
+				struct rb_node *node)
+{
+	struct rb_node *rbnode = node;
+
+	while (rbnode) {
+		struct kbase_va_region *reg;
+		int err = 0;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->nr_pages && !(reg->flags & KBASE_REG_FREE) &&
+					(reg->flags & KBASE_REG_GPU_WR)) {
+			err = kbase_mmu_update_pages(kctx, reg->start_pfn,
+					kbase_get_gpu_phy_pages(reg),
+					reg->gpu_alloc->nents,
+					reg->flags & flag);
+			if (err)
+				dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages failure\n");
+		}
+
+		rbnode = rb_next(rbnode);
+	}
+}
+
+static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx,
+					unsigned long flag)
+{
+	kbase_gpu_gwt_setup_page_permission(kctx, flag,
+				rb_first(&(kctx->reg_rbtree_same)));
+	kbase_gpu_gwt_setup_page_permission(kctx, flag,
+				rb_first(&(kctx->reg_rbtree_exec)));
+	kbase_gpu_gwt_setup_page_permission(kctx, flag,
+				rb_first(&(kctx->reg_rbtree_custom)));
+}
+
+
+int kbase_gpu_gwt_start(struct kbase_context *kctx)
+{
+	kbase_gpu_vm_lock(kctx);
+	if (kctx->gwt_enabled) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EBUSY;
+	}
+
+	INIT_LIST_HEAD(&kctx->gwt_current_list);
+	INIT_LIST_HEAD(&kctx->gwt_snapshot_list);
+
+	/* If GWT is enabled using new vector dumping format
+	 * from user space, back up status of the job serialization flag and
+	 * use full serialisation of jobs for dumping.
+	 * Status will be restored on end of dumping in gwt_stop.
+	 */
+	kctx->kbdev->backup_serialize_jobs = kctx->kbdev->serialize_jobs;
+	kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT |
+						KBASE_SERIALIZE_INTER_SLOT;
+
+	/* Mark gwt enabled before making pages read only in case a
+	   write page fault is triggered while we're still in this loop.
+	   (kbase_gpu_vm_lock() doesn't prevent this!)
+	*/
+	kctx->gwt_enabled = true;
+	kctx->gwt_was_enabled = true;
+
+	kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+}
+
+int kbase_gpu_gwt_stop(struct kbase_context *kctx)
+{
+	struct kbasep_gwt_list_element *pos, *n;
+
+	kbase_gpu_vm_lock(kctx);
+	if (!kctx->gwt_enabled) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EINVAL;
+	}
+
+	list_for_each_entry_safe(pos, n, &kctx->gwt_current_list, link) {
+		list_del(&pos->link);
+		kfree(pos);
+	}
+
+	list_for_each_entry_safe(pos, n, &kctx->gwt_snapshot_list, link) {
+		list_del(&pos->link);
+		kfree(pos);
+	}
+
+	kctx->kbdev->serialize_jobs = kctx->kbdev->backup_serialize_jobs;
+
+	kbase_gpu_gwt_setup_pages(kctx, ~0UL);
+
+	kctx->gwt_enabled = false;
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+}
+
+
+static int list_cmp_function(void *priv, struct list_head *a,
+				struct list_head *b)
+{
+	struct kbasep_gwt_list_element *elementA = container_of(a,
+				struct kbasep_gwt_list_element, link);
+	struct kbasep_gwt_list_element *elementB = container_of(b,
+				struct kbasep_gwt_list_element, link);
+
+	CSTD_UNUSED(priv);
+
+	if (elementA->page_addr > elementB->page_addr)
+		return 1;
+	return -1;
+}
+
+static void kbase_gpu_gwt_collate(struct kbase_context *kctx,
+		struct list_head *snapshot_list)
+{
+	struct kbasep_gwt_list_element *pos, *n;
+	struct kbasep_gwt_list_element *collated = NULL;
+
+	/* Sort the list */
+	list_sort(NULL, snapshot_list, list_cmp_function);
+
+	/* Combine contiguous areas. */
+	list_for_each_entry_safe(pos, n, snapshot_list, link) {
+		if (collated == NULL ||	collated->region !=
+					pos->region ||
+					(collated->page_addr +
+					(collated->num_pages * PAGE_SIZE)) !=
+					pos->page_addr) {
+			/* This is the first time through, a new region or
+			 * is not contiguous - start collating to this element
+			 */
+			collated = pos;
+		} else {
+			/* contiguous so merge */
+			collated->num_pages += pos->num_pages;
+			/* remove element from list */
+			list_del(&pos->link);
+			kfree(pos);
+		}
+	}
+}
+
+int kbase_gpu_gwt_dump(struct kbase_context *kctx,
+			union kbase_ioctl_cinstr_gwt_dump *gwt_dump)
+{
+	const u32 ubuf_size = gwt_dump->in.len;
+	u32 ubuf_count = 0;
+	__user void *user_addr = (__user void *)
+			(uintptr_t)gwt_dump->in.addr_buffer;
+	__user void *user_sizes = (__user void *)
+			(uintptr_t)gwt_dump->in.size_buffer;
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (!kctx->gwt_enabled) {
+		kbase_gpu_vm_unlock(kctx);
+		/* gwt_dump shouldn't be called when gwt is disabled */
+		return -EPERM;
+	}
+
+	if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer
+			|| !gwt_dump->in.size_buffer) {
+		kbase_gpu_vm_unlock(kctx);
+		/* We don't have any valid user space buffer to copy the
+		 * write modified addresses.
+		 */
+		return -EINVAL;
+	}
+
+	if (list_empty(&kctx->gwt_snapshot_list) &&
+			!list_empty(&kctx->gwt_current_list)) {
+
+		list_replace_init(&kctx->gwt_current_list,
+					&kctx->gwt_snapshot_list);
+
+		/* We have collected all write faults so far
+		 * and they will be passed on to user space.
+		 * Reset the page flags state to allow collection of
+		 * further write faults.
+		 */
+		kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR);
+
+		/* Sort and combine consecutive pages in the dump list*/
+		kbase_gpu_gwt_collate(kctx, &kctx->gwt_snapshot_list);
+	}
+
+	while ((!list_empty(&kctx->gwt_snapshot_list))) {
+		u64 addr_buffer[32];
+		u64 num_page_buffer[32];
+		u32 count = 0;
+		int err;
+		struct kbasep_gwt_list_element *dump_info, *n;
+
+		list_for_each_entry_safe(dump_info, n,
+				&kctx->gwt_snapshot_list, link) {
+			addr_buffer[count] = dump_info->page_addr;
+			num_page_buffer[count] = dump_info->num_pages;
+			count++;
+			list_del(&dump_info->link);
+			kfree(dump_info);
+			if (ARRAY_SIZE(addr_buffer) == count ||
+					ubuf_size == (ubuf_count + count))
+				break;
+		}
+
+		if (count) {
+			err = copy_to_user((user_addr +
+					(ubuf_count * sizeof(u64))),
+					(void *)addr_buffer,
+					count * sizeof(u64));
+			if (err) {
+				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
+				kbase_gpu_vm_unlock(kctx);
+				return err;
+			}
+			err = copy_to_user((user_sizes +
+					(ubuf_count * sizeof(u64))),
+					(void *)num_page_buffer,
+					count * sizeof(u64));
+			if (err) {
+				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
+				kbase_gpu_vm_unlock(kctx);
+				return err;
+			}
+
+			ubuf_count += count;
+		}
+
+		if (ubuf_count == ubuf_size)
+			break;
+	}
+
+	if (!list_empty(&kctx->gwt_snapshot_list))
+		gwt_dump->out.more_data_available = 1;
+	else
+		gwt_dump->out.more_data_available = 0;
+
+	gwt_dump->out.no_of_addr_collected = ubuf_count;
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h
new file mode 100644
index 0000000..7e7746e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_GWT_H)
+#define _KBASE_GWT_H
+
+#include <mali_kbase.h>
+#include <mali_kbase_ioctl.h>
+
+/**
+ * kbase_gpu_gwt_start - Start the GPU write tracking
+ * @kctx: Pointer to kernel context
+ *
+ * @return 0 on success, error on failure.
+ */
+int kbase_gpu_gwt_start(struct kbase_context *kctx);
+
+/**
+ * kbase_gpu_gwt_stop - Stop the GPU write tracking
+ * @kctx: Pointer to kernel context
+ *
+ * @return 0 on success, error on failure.
+ */
+int kbase_gpu_gwt_stop(struct kbase_context *kctx);
+
+/**
+ * kbase_gpu_gwt_dump - Pass page address of faulting addresses to user space.
+ * @kctx:	Pointer to kernel context
+ * @gwt_dump:	User space data to be passed.
+ *
+ * @return 0 on success, error on failure.
+ */
+int kbase_gpu_gwt_dump(struct kbase_context *kctx,
+			union kbase_ioctl_cinstr_gwt_dump *gwt_dump);
+
+#endif /* _KBASE_GWT_H */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100755
index 0000000..f34f53a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,498 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			features = base_hw_features_tHEx;
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			features = base_hw_features_tSIx;
+			break;
+		case GPU_ID2_PRODUCT_TDVX:
+			features = base_hw_features_tDVx;
+			break;
+		case GPU_ID2_PRODUCT_TNOX:
+			features = base_hw_features_tNOx;
+			break;
+		case GPU_ID2_PRODUCT_TGOX:
+			features = base_hw_features_tGOx;
+			break;
+		case GPU_ID2_PRODUCT_TKAX:
+			features = base_hw_features_tKAx;
+			break;
+		case GPU_ID2_PRODUCT_TTRX:
+			features = base_hw_features_tTRx;
+			break;
+		case GPU_ID2_PRODUCT_TBOX:
+			features = base_hw_features_tBOx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+/**
+ * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID
+ * @kbdev: Device pointer
+ *
+ * Return: pointer to an array of hardware issues, terminated by
+ * BASE_HW_ISSUE_END.
+ *
+ * This function can only be used on new-format GPU IDs, i.e. those for which
+ * GPU_ID_IS_NEW_FORMAT evaluates as true. The GPU ID is read from the @kbdev.
+ *
+ * In debugging versions of the driver, unknown versions of a known GPU will
+ * be treated as the most recent known version not later than the actual
+ * version. In such circumstances, the GPU ID in @kbdev will also be replaced
+ * with the most recent known version.
+ *
+ * Note: The GPU configuration must have been read by kbase_gpuprops_get_props()
+ * before calling this function.
+ */
+static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
+					struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues = NULL;
+
+	struct base_hw_product {
+		u32 product_model;
+		struct {
+			u32 version;
+			const enum base_hw_issue *issues;
+		} map[7];
+	};
+
+	static const struct base_hw_product base_hw_products[] = {
+		{GPU_ID2_PRODUCT_TMIX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 1),
+		   base_hw_issues_tMIx_r0p0_05dev0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1},
+		  {U32_MAX /* sentinel value */, NULL} } },
+
+		{GPU_ID2_PRODUCT_THEX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2},
+		  {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TSIX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0},
+		  {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TDVX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TNOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TGOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TKAX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tKAx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TTRX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TBOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBOx_r0p0},
+		  {U32_MAX, NULL} } },
+	};
+
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL;
+	const struct base_hw_product *product = NULL;
+	size_t p;
+
+	/* Stop when we reach the end of the products array. */
+	for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) {
+		if (product_model == base_hw_products[p].product_model) {
+			product = &base_hw_products[p];
+			break;
+		}
+	}
+
+	if (product != NULL) {
+		/* Found a matching product. */
+		const u32 version = gpu_id & GPU_ID2_VERSION;
+#if !MALI_CUSTOMER_RELEASE
+		u32 fallback_version = 0;
+		const enum base_hw_issue *fallback_issues = NULL;
+#endif
+		size_t v;
+
+		/* Stop when we reach the end of the map. */
+		for (v = 0; product->map[v].version != U32_MAX; ++v) {
+
+			if (version == product->map[v].version) {
+				/* Exact match so stop. */
+				issues = product->map[v].issues;
+				break;
+			}
+
+#if !MALI_CUSTOMER_RELEASE
+			/* Check whether this is a candidate for most recent
+				known version not later than the actual
+				version. */
+			if ((version > product->map[v].version) &&
+				(product->map[v].version >= fallback_version)) {
+				fallback_version = product->map[v].version;
+				fallback_issues = product->map[v].issues;
+			}
+#endif
+		}
+
+#if !MALI_CUSTOMER_RELEASE
+		if ((issues == NULL) && (fallback_issues != NULL)) {
+			/* Fall back to the issue set of the most recent known
+				version not later than the actual version. */
+			issues = fallback_issues;
+
+			dev_info(kbdev->dev,
+				"r%dp%d status %d is unknown; treating as r%dp%d status %d",
+				(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+					GPU_ID2_VERSION_MAJOR_SHIFT,
+				(gpu_id & GPU_ID2_VERSION_MINOR) >>
+					GPU_ID2_VERSION_MINOR_SHIFT,
+				(gpu_id & GPU_ID2_VERSION_STATUS) >>
+					GPU_ID2_VERSION_STATUS_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_MAJOR) >>
+					GPU_ID2_VERSION_MAJOR_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_MINOR) >>
+					GPU_ID2_VERSION_MINOR_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_STATUS) >>
+					GPU_ID2_VERSION_STATUS_SHIFT);
+
+			gpu_id &= ~GPU_ID2_VERSION;
+			gpu_id |= fallback_version;
+			kbdev->gpu_props.props.raw_props.gpu_id = gpu_id;
+
+			kbase_gpuprops_update_core_props_gpu_id(
+				&kbdev->gpu_props.props);
+		}
+#endif
+	}
+	return issues;
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			issues = kbase_hw_get_issues_for_new_id(kbdev);
+			if (issues == NULL) {
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+
+#if !MALI_CUSTOMER_RELEASE
+			/* The GPU ID might have been replaced with the last
+			   known version of the same GPU. */
+			gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+#endif
+
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			case GPU_ID2_PRODUCT_THEX:
+				issues = base_hw_issues_model_tHEx;
+				break;
+			case GPU_ID2_PRODUCT_TSIX:
+				issues = base_hw_issues_model_tSIx;
+				break;
+			case GPU_ID2_PRODUCT_TDVX:
+				issues = base_hw_issues_model_tDVx;
+				break;
+			case GPU_ID2_PRODUCT_TNOX:
+				issues = base_hw_issues_model_tNOx;
+				break;
+			case GPU_ID2_PRODUCT_TGOX:
+				issues = base_hw_issues_model_tGOx;
+				break;
+			case GPU_ID2_PRODUCT_TKAX:
+				issues = base_hw_issues_model_tKAx;
+				break;
+			case GPU_ID2_PRODUCT_TTRX:
+				issues = base_hw_issues_model_tTRx;
+				break;
+			case GPU_ID2_PRODUCT_TBOX:
+				issues = base_hw_issues_model_tBOx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		dev_info(kbdev->dev,
+			"GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d",
+			(gpu_id & GPU_ID2_PRODUCT_MAJOR) >>
+				GPU_ID2_PRODUCT_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_MAJOR) >>
+				GPU_ID2_ARCH_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_MINOR) >>
+				GPU_ID2_ARCH_MINOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_REV) >>
+				GPU_ID2_ARCH_REV_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+				GPU_ID2_VERSION_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_MINOR) >>
+				GPU_ID2_VERSION_MINOR_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_STATUS) >>
+				GPU_ID2_VERSION_STATUS_SHIFT);
+	} else {
+		dev_info(kbdev->dev,
+			"GPU identified as 0x%04x r%dp%d status %d",
+			(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+				GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+			(gpu_id & GPU_ID_VERSION_MAJOR) >>
+				GPU_ID_VERSION_MAJOR_SHIFT,
+			(gpu_id & GPU_ID_VERSION_MINOR) >>
+				GPU_ID_VERSION_MINOR_SHIFT,
+			(gpu_id & GPU_ID_VERSION_STATUS) >>
+				GPU_ID_VERSION_STATUS_SHIFT);
+	}
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100644
index 0000000..f386b16
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID
+ * @kbdev: Device pointer
+ *
+ * Return: 0 if the GPU ID was recognized, otherwise -EINVAL.
+ *
+ * The GPU ID is read from the @kbdev.
+ *
+ * In debugging versions of the driver, unknown versions of a known GPU with a
+ * new-format ID will be treated as the most recent known version not later
+ * than the actual version. In such circumstances, the GPU ID in @kbdev will
+ * also be replaced with the most recent known version.
+ *
+ * Note: The GPU configuration must have been read by
+ * kbase_gpuprops_get_props() before calling this function.
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100644
index 0000000..dde4965
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100755
index 0000000..124a2d9
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/**
+ * struct kbase_hwaccess_data - object encapsulating the GPU backend specific
+ *                              data for the HW access layer.
+ *                              hwaccess_lock (a spinlock) must be held when
+ *                              accessing this structure.
+ * @active_kctx:     pointer to active kbase context which last submitted an
+ *                   atom to GPU and while the context is active it can
+ *                   submit new atoms to GPU from the irq context also, without
+ *                   going through the bottom half of job completion path.
+ * @backend:         GPU backend specific data for HW access layer
+ */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS];
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100755
index 0000000..63844d9
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * The caller should ensure that GPU remains powered-on during this function.
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask. It will power-on the GPU if required.
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100755
index 0000000..0c5ceff
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,121 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @enable:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				struct kbase_ioctl_hwcnt_enable *enable);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100755
index 0000000..ea87913
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,390 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_backend_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_find_and_release_free_address_space() - Release a free AS
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * This function can evict an idle context from the runpool, freeing up the
+ * address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_ctx_sched_release()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned.
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ * @js:         Job slot to activate context on
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx, int js);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_cacheclean - Perform a cache clean if the given atom requires
+ *                            one
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the failed atom
+ *
+ * On some GPUs, the GPU cache must be cleaned following a failed atom. This
+ * function performs a clean if it is required by @katom.
+ */
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom);
+
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ * @affinity:      Affinity of atom
+ * @coreref_state: Coreref state of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ *				  @js
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg updating timeouts of
+ * currently running atoms).
+ */
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs and don't emit messages into
+ * the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ */
+void kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset.
+ */
+bool kbase_reset_gpu_active(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+/* Object containing callbacks for enabling/disabling protected mode, used
+ * on GPU which supports protected mode switching natively.
+ */
+extern struct protected_mode_ops kbase_native_protected_ops;
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100644
index 0000000..4598d80
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,214 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100644
index 0000000..7f64936
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100644
index 0000000..10706b8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
new file mode 100755
index 0000000..fcb9ad3
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -0,0 +1,828 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IOCTL_H_
+#define _KBASE_IOCTL_H_
+
+#ifdef __cpluscplus
+extern "C" {
+#endif
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+#define KBASE_IOCTL_TYPE 0x80
+
+/*
+ * 11.1:
+ * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
+ * 11.2:
+ * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_SECURE,
+ *   which some user-side clients prior to 11.2 might fault if they received
+ *   them
+ * 11.3:
+ * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and
+ *   KBASE_IOCTL_STICKY_RESOURCE_UNMAP
+ * 11.4:
+ * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
+ * 11.5:
+ * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
+ * 11.6:
+ * - Added flags field to base_jit_alloc_info structure, which can be used to
+ *   specify pseudo chunked tiler alignment for JIT allocations.
+ * 11.7:
+ * - Removed UMP support
+ */
+#define BASE_UK_VERSION_MAJOR 11
+#define BASE_UK_VERSION_MINOR 7
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility with kernel
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+/**
+ * struct kbase_ioctl_set_flags - Set kernel context creation flags
+ *
+ * @create_flags: Flags - see base_context_create_flags
+ */
+struct kbase_ioctl_set_flags {
+	__u32 create_flags;
+};
+
+#define KBASE_IOCTL_SET_FLAGS \
+	_IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
+
+/**
+ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
+ *
+ * @addr: Memory address of an array of struct base_jd_atom_v2
+ * @nr_atoms: Number of entries in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ */
+struct kbase_ioctl_job_submit {
+	__u64 addr;
+	__u32 nr_atoms;
+	__u32 stride;
+};
+
+#define KBASE_IOCTL_JOB_SUBMIT \
+	_IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
+
+/**
+ * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
+ *
+ * @buffer: Pointer to the buffer to store properties into
+ * @size: Size of the buffer
+ * @flags: Flags - must be zero for now
+ *
+ * The ioctl will return the number of bytes stored into @buffer or an error
+ * on failure (e.g. @size is too small). If @size is specified as 0 then no
+ * data will be written but the return value will be the number of bytes needed
+ * for all the properties.
+ *
+ * @flags may be used in the future to request a different format for the
+ * buffer. With @flags == 0 the following format is used.
+ *
+ * The buffer will be filled with pairs of values, a u32 key identifying the
+ * property followed by the value. The size of the value is identified using
+ * the bottom bits of the key. The value then immediately followed the key and
+ * is tightly packed (there is no padding). All keys and values are
+ * little-endian.
+ *
+ * 00 = u8
+ * 01 = u16
+ * 10 = u32
+ * 11 = u64
+ */
+struct kbase_ioctl_get_gpuprops {
+	__u64 buffer;
+	__u32 size;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_GET_GPUPROPS \
+	_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
+
+#define KBASE_IOCTL_POST_TERM \
+	_IO(KBASE_IOCTL_TYPE, 4)
+
+/**
+ * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
+ *
+ * @va_pages: The number of pages of virtual address space to reserve
+ * @commit_pages: The number of physical pages to allocate
+ * @extent: The number of extra pages to allocate on each GPU fault which grows
+ *          the region
+ * @flags: Flags
+ * @gpu_va: The GPU virtual address which is allocated
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_alloc {
+	struct {
+		__u64 va_pages;
+		__u64 commit_pages;
+		__u64 extent;
+		__u64 flags;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALLOC \
+	_IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
+
+/**
+ * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
+ * @gpu_addr: A GPU address contained within the region
+ * @query: The type of query
+ * @value: The result of the query
+ *
+ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_query {
+	struct {
+		__u64 gpu_addr;
+		__u64 query;
+	} in;
+	struct {
+		__u64 value;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_QUERY \
+	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
+
+#define KBASE_MEM_QUERY_COMMIT_SIZE	((u64)1)
+#define KBASE_MEM_QUERY_VA_SIZE		((u64)2)
+#define KBASE_MEM_QUERY_FLAGS		((u64)3)
+
+/**
+ * struct kbase_ioctl_mem_free - Free a memory region
+ * @gpu_addr: Handle to the region to free
+ */
+struct kbase_ioctl_mem_free {
+	__u64 gpu_addr;
+};
+
+#define KBASE_IOCTL_MEM_FREE \
+	_IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
+
+/**
+ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error
+ */
+struct kbase_ioctl_hwcnt_reader_setup {
+	__u32 buffer_count;
+	__u32 jm_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_READER_SETUP \
+	_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
+
+/**
+ * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
+ * @dump_buffer:  GPU address to write counters to
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ */
+struct kbase_ioctl_hwcnt_enable {
+	__u64 dump_buffer;
+	__u32 jm_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_ENABLE \
+	_IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable)
+
+#define KBASE_IOCTL_HWCNT_DUMP \
+	_IO(KBASE_IOCTL_TYPE, 10)
+
+#define KBASE_IOCTL_HWCNT_CLEAR \
+	_IO(KBASE_IOCTL_TYPE, 11)
+
+/**
+ * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
+ * @data:    Counter samples for the dummy model.
+ * @size:    Size of the counter sample data.
+ * @padding: Padding.
+ */
+struct kbase_ioctl_hwcnt_values {
+	__u64 data;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_HWCNT_SET \
+	_IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
+
+/**
+ * struct kbase_ioctl_disjoint_query - Query the disjoint counter
+ * @counter:   A counter of disjoint events in the kernel
+ */
+struct kbase_ioctl_disjoint_query {
+	__u32 counter;
+};
+
+#define KBASE_IOCTL_DISJOINT_QUERY \
+	_IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
+
+/**
+ * struct kbase_ioctl_get_ddk_version - Query the kernel version
+ * @version_buffer: Buffer to receive the kernel version string
+ * @size: Size of the buffer
+ * @padding: Padding
+ *
+ * The ioctl will return the number of bytes written into version_buffer
+ * (which includes a NULL byte) or a negative error code
+ *
+ * The ioctl request code has to be _IOW because the data in ioctl struct is
+ * being copied to the kernel, even though the kernel then writes out the
+ * version info to the buffer specified in the ioctl.
+ */
+struct kbase_ioctl_get_ddk_version {
+	__u64 version_buffer;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_DDK_VERSION \
+	_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
+
+/**
+ * struct kbase_ioctl_mem_jit_init_old - Initialise the JIT memory allocator
+ *
+ * @va_pages: Number of VA pages to reserve for JIT
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ *
+ * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
+ * backwards compatibility.
+ */
+struct kbase_ioctl_mem_jit_init_old {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT_OLD \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_old)
+
+/**
+ * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator
+ *
+ * @va_pages: Number of VA pages to reserve for JIT
+ * @max_allocations: Maximum number of concurrent allocations
+ * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @padding: Currently unused, must be zero
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ */
+struct kbase_ioctl_mem_jit_init {
+	__u64 va_pages;
+	__u8 max_allocations;
+	__u8 trim_level;
+	__u8 padding[6];
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
+
+/**
+ * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
+ *
+ * @handle: GPU memory handle (GPU VA)
+ * @user_addr: The address where it is mapped in user space
+ * @size: The number of bytes to synchronise
+ * @type: The direction to synchronise: 0 is sync to memory (clean),
+ * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants.
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_mem_sync {
+	__u64 handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_MEM_SYNC \
+	_IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
+
+/**
+ * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
+ *
+ * @gpu_addr: The GPU address of the memory region
+ * @cpu_addr: The CPU address to locate
+ * @size: A size in bytes to validate is contained within the region
+ * @offset: The offset from the start of the memory region to @cpu_addr
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_find_cpu_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 cpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
+
+/**
+ * struct kbase_ioctl_get_context_id - Get the kernel context ID
+ *
+ * @id: The kernel context ID
+ */
+struct kbase_ioctl_get_context_id {
+	__u32 id;
+};
+
+#define KBASE_IOCTL_GET_CONTEXT_ID \
+	_IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
+
+/**
+ * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
+ *
+ * @flags: Flags
+ *
+ * The ioctl returns a file descriptor when successful
+ */
+struct kbase_ioctl_tlstream_acquire {
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_TLSTREAM_ACQUIRE \
+	_IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
+
+#define KBASE_IOCTL_TLSTREAM_FLUSH \
+	_IO(KBASE_IOCTL_TYPE, 19)
+
+/**
+ * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
+ *
+ * @gpu_addr: The memory region to modify
+ * @pages:    The number of physical pages that should be present
+ *
+ * The ioctl may return on the following error codes or 0 for success:
+ *   -ENOMEM: Out of memory
+ *   -EINVAL: Invalid arguments
+ */
+struct kbase_ioctl_mem_commit {
+	__u64 gpu_addr;
+	__u64 pages;
+};
+
+#define KBASE_IOCTL_MEM_COMMIT \
+	_IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
+
+/**
+ * union kbase_ioctl_mem_alias - Create an alias of memory regions
+ * @flags: Flags, see BASE_MEM_xxx
+ * @stride: Bytes between start of each memory region
+ * @nents: The number of regions to pack together into the alias
+ * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info
+ * @gpu_va: Address of the new alias
+ * @va_pages: Size of the new alias
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_alias {
+	struct {
+		__u64 flags;
+		__u64 stride;
+		__u64 nents;
+		__u64 aliasing_info;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALIAS \
+	_IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
+
+/**
+ * union kbase_ioctl_mem_import - Import memory for use by the GPU
+ * @flags: Flags, see BASE_MEM_xxx
+ * @phandle: Handle to the external memory
+ * @type: Type of external memory, see base_mem_import_type
+ * @padding: Amount of extra VA pages to append to the imported buffer
+ * @gpu_va: Address of the new alias
+ * @va_pages: Size of the new alias
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_import {
+	struct {
+		__u64 flags;
+		__u64 phandle;
+		__u32 type;
+		__u32 padding;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_IMPORT \
+	_IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
+
+/**
+ * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
+ * @gpu_va: The GPU region to modify
+ * @flags: The new flags to set
+ * @mask: Mask of the flags to modify
+ */
+struct kbase_ioctl_mem_flags_change {
+	__u64 gpu_va;
+	__u64 flags;
+	__u64 mask;
+};
+
+#define KBASE_IOCTL_MEM_FLAGS_CHANGE \
+	_IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
+
+/**
+ * struct kbase_ioctl_stream_create - Create a synchronisation stream
+ * @name: A name to identify this stream. Must be NULL-terminated.
+ *
+ * Note that this is also called a "timeline", but is named stream to avoid
+ * confusion with other uses of the word.
+ *
+ * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes.
+ *
+ * The ioctl returns a file descriptor.
+ */
+struct kbase_ioctl_stream_create {
+	char name[32];
+};
+
+#define KBASE_IOCTL_STREAM_CREATE \
+	_IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
+
+/**
+ * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
+ * @fd: The file descriptor to validate
+ */
+struct kbase_ioctl_fence_validate {
+	int fd;
+};
+
+#define KBASE_IOCTL_FENCE_VALIDATE \
+	_IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
+
+/**
+ * struct kbase_ioctl_get_profiling_controls - Get the profiling controls
+ * @count: The size of @buffer in u32 words
+ * @buffer: The buffer to receive the profiling controls
+ * @padding: Padding
+ */
+struct kbase_ioctl_get_profiling_controls {
+	__u64 buffer;
+	__u32 count;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_PROFILING_CONTROLS \
+	_IOW(KBASE_IOCTL_TYPE, 26, struct kbase_ioctl_get_profiling_controls)
+
+/**
+ * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
+ * @buffer: Pointer to the information
+ * @len: Length
+ * @padding: Padding
+ *
+ * The data provided is accessible through a debugfs file
+ */
+struct kbase_ioctl_mem_profile_add {
+	__u64 buffer;
+	__u32 len;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_MEM_PROFILE_ADD \
+	_IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
+
+/**
+ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
+ * @event: GPU address of the event which has been updated
+ * @new_status: The new status to set
+ * @flags: Flags for future expansion
+ */
+struct kbase_ioctl_soft_event_update {
+	__u64 event;
+	__u32 new_status;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
+	_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
+
+/**
+ * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
+ * @count: Number of resources
+ * @address: Array of u64 GPU addresses of the external resources to map
+ */
+struct kbase_ioctl_sticky_resource_map {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_MAP \
+	_IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map)
+
+/**
+ * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was
+ *                                          previously permanently mapped
+ * @count: Number of resources
+ * @address: Array of u64 GPU addresses of the external resources to unmap
+ */
+struct kbase_ioctl_sticky_resource_unmap {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \
+	_IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap)
+
+/**
+ * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of
+ *                                                   the GPU memory region for
+ *                                                   the given gpu address and
+ *                                                   the offset of that address
+ *                                                   into the region
+ *
+ * @gpu_addr: GPU virtual address
+ * @size: Size in bytes within the region
+ * @start: Address of the beginning of the memory region enclosing @gpu_addr
+ *         for the length of @offset bytes
+ * @offset: The offset from the start of the memory region to @gpu_addr
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_find_gpu_start_and_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 start;
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
+
+
+#define KBASE_IOCTL_CINSTR_GWT_START \
+	_IO(KBASE_IOCTL_TYPE, 33)
+
+#define KBASE_IOCTL_CINSTR_GWT_STOP \
+	_IO(KBASE_IOCTL_TYPE, 34)
+
+/**
+ * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses.
+ * @addr_buffer: Address of buffer to hold addresses of gpu modified areas.
+ * @size_buffer: Address of buffer to hold size of modified areas (in pages)
+ * @len: Number of addresses the buffers can hold.
+ * @more_data_available: Status indicating if more addresses are available.
+ * @no_of_addr_collected: Number of addresses collected into addr_buffer.
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ * This structure is used when performing a call to dump GPU write fault
+ * addresses.
+ */
+union kbase_ioctl_cinstr_gwt_dump {
+	struct {
+		__u64 addr_buffer;
+		__u64 size_buffer;
+		__u32 len;
+		__u32 padding;
+
+	} in;
+	struct {
+		__u32 no_of_addr_collected;
+		__u8 more_data_available;
+		__u8 padding[27];
+	} out;
+};
+
+#define KBASE_IOCTL_CINSTR_GWT_DUMP \
+	_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
+
+/* IOCTLs 36-41 are reserved */
+
+/* IOCTL 42 is free for use */
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
+
+/**
+ * struct kbase_ioctl_tlstream_test - Start a timeline stream test
+ *
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ */
+struct kbase_ioctl_tlstream_test {
+	__u32 tpw_count;
+	__u32 msg_delay;
+	__u32 msg_count;
+	__u32 aux_msg;
+};
+
+#define KBASE_IOCTL_TLSTREAM_TEST \
+	_IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test)
+
+/**
+ * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ */
+struct kbase_ioctl_tlstream_stats {
+	__u32 bytes_collected;
+	__u32 bytes_generated;
+};
+
+#define KBASE_IOCTL_TLSTREAM_STATS \
+	_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
+
+#endif
+
+/**********************************
+ * Definitions for GPU properties *
+ **********************************/
+#define KBASE_GPUPROP_VALUE_SIZE_U8	(0x0)
+#define KBASE_GPUPROP_VALUE_SIZE_U16	(0x1)
+#define KBASE_GPUPROP_VALUE_SIZE_U32	(0x2)
+#define KBASE_GPUPROP_VALUE_SIZE_U64	(0x3)
+
+#define KBASE_GPUPROP_PRODUCT_ID			1
+#define KBASE_GPUPROP_VERSION_STATUS			2
+#define KBASE_GPUPROP_MINOR_REVISION			3
+#define KBASE_GPUPROP_MAJOR_REVISION			4
+/* 5 previously used for GPU speed */
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX			6
+/* 7 previously used for minimum GPU speed */
+#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE		8
+#define KBASE_GPUPROP_TEXTURE_FEATURES_0		9
+#define KBASE_GPUPROP_TEXTURE_FEATURES_1		10
+#define KBASE_GPUPROP_TEXTURE_FEATURES_2		11
+#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE		12
+
+#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE			13
+#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE		14
+#define KBASE_GPUPROP_L2_NUM_L2_SLICES			15
+
+#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES		16
+#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS		17
+
+#define KBASE_GPUPROP_MAX_THREADS			18
+#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE		19
+#define KBASE_GPUPROP_MAX_BARRIER_SIZE			20
+#define KBASE_GPUPROP_MAX_REGISTERS			21
+#define KBASE_GPUPROP_MAX_TASK_QUEUE			22
+#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT		23
+#define KBASE_GPUPROP_IMPL_TECH				24
+
+#define KBASE_GPUPROP_RAW_SHADER_PRESENT		25
+#define KBASE_GPUPROP_RAW_TILER_PRESENT			26
+#define KBASE_GPUPROP_RAW_L2_PRESENT			27
+#define KBASE_GPUPROP_RAW_STACK_PRESENT			28
+#define KBASE_GPUPROP_RAW_L2_FEATURES			29
+#define KBASE_GPUPROP_RAW_CORE_FEATURES			30
+#define KBASE_GPUPROP_RAW_MEM_FEATURES			31
+#define KBASE_GPUPROP_RAW_MMU_FEATURES			32
+#define KBASE_GPUPROP_RAW_AS_PRESENT			33
+#define KBASE_GPUPROP_RAW_JS_PRESENT			34
+#define KBASE_GPUPROP_RAW_JS_FEATURES_0			35
+#define KBASE_GPUPROP_RAW_JS_FEATURES_1			36
+#define KBASE_GPUPROP_RAW_JS_FEATURES_2			37
+#define KBASE_GPUPROP_RAW_JS_FEATURES_3			38
+#define KBASE_GPUPROP_RAW_JS_FEATURES_4			39
+#define KBASE_GPUPROP_RAW_JS_FEATURES_5			40
+#define KBASE_GPUPROP_RAW_JS_FEATURES_6			41
+#define KBASE_GPUPROP_RAW_JS_FEATURES_7			42
+#define KBASE_GPUPROP_RAW_JS_FEATURES_8			43
+#define KBASE_GPUPROP_RAW_JS_FEATURES_9			44
+#define KBASE_GPUPROP_RAW_JS_FEATURES_10		45
+#define KBASE_GPUPROP_RAW_JS_FEATURES_11		46
+#define KBASE_GPUPROP_RAW_JS_FEATURES_12		47
+#define KBASE_GPUPROP_RAW_JS_FEATURES_13		48
+#define KBASE_GPUPROP_RAW_JS_FEATURES_14		49
+#define KBASE_GPUPROP_RAW_JS_FEATURES_15		50
+#define KBASE_GPUPROP_RAW_TILER_FEATURES		51
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0		52
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1		53
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2		54
+#define KBASE_GPUPROP_RAW_GPU_ID			55
+#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS		56
+#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE	57
+#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE	58
+#define KBASE_GPUPROP_RAW_THREAD_FEATURES		59
+#define KBASE_GPUPROP_RAW_COHERENCY_MODE		60
+
+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS		61
+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS		62
+#define KBASE_GPUPROP_COHERENCY_COHERENCY		63
+#define KBASE_GPUPROP_COHERENCY_GROUP_0			64
+#define KBASE_GPUPROP_COHERENCY_GROUP_1			65
+#define KBASE_GPUPROP_COHERENCY_GROUP_2			66
+#define KBASE_GPUPROP_COHERENCY_GROUP_3			67
+#define KBASE_GPUPROP_COHERENCY_GROUP_4			68
+#define KBASE_GPUPROP_COHERENCY_GROUP_5			69
+#define KBASE_GPUPROP_COHERENCY_GROUP_6			70
+#define KBASE_GPUPROP_COHERENCY_GROUP_7			71
+#define KBASE_GPUPROP_COHERENCY_GROUP_8			72
+#define KBASE_GPUPROP_COHERENCY_GROUP_9			73
+#define KBASE_GPUPROP_COHERENCY_GROUP_10		74
+#define KBASE_GPUPROP_COHERENCY_GROUP_11		75
+#define KBASE_GPUPROP_COHERENCY_GROUP_12		76
+#define KBASE_GPUPROP_COHERENCY_GROUP_13		77
+#define KBASE_GPUPROP_COHERENCY_GROUP_14		78
+#define KBASE_GPUPROP_COHERENCY_GROUP_15		79
+
+#define KBASE_GPUPROP_TEXTURE_FEATURES_3		80
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3		81
+
+#define KBASE_GPUPROP_NUM_EXEC_ENGINES                  82
+
+#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC		83
+#define KBASE_GPUPROP_TLS_ALLOC				84
+
+#ifdef __cpluscplus
+}
+#endif
+
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100755
index 0000000..71450d5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1625 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tlstream.h>
+
+#include "mali_kbase_dma_fence.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const u64 p)
+{
+#ifdef CONFIG_COMPAT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return compat_ptr(p);
+#endif
+	return u64_to_user_ptr(p);
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#if defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.dma_fence_resv_count = 0,
+	};
+#ifdef CONFIG_SYNC
+	/*
+	 * When both dma-buf fence and Android native sync is enabled, we
+	 * disable dma-buf fence for contexts that are using Android native
+	 * fences.
+	 */
+	const bool implicit_sync = !kbase_ctx_flag(katom->kctx,
+						   KCTX_NO_IMPLICIT_SYNC);
+#else /* CONFIG_SYNC */
+	const bool implicit_sync = true;
+#endif /* CONFIG_SYNC */
+#endif /* CONFIG_MALI_DMA_FENCE */
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		info.resv_objs = kmalloc_array(katom->nr_extres,
+					sizeof(struct reservation_object *),
+					GFP_KERNEL);
+		if (!info.resv_objs) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+
+		info.dma_fence_excl_bitmap =
+				kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					sizeof(unsigned long), GFP_KERNEL);
+		if (!info.dma_fence_excl_bitmap) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
+
+		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (implicit_sync &&
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct reservation_object *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock now */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		if (info.dma_fence_resv_count) {
+			int ret;
+
+			ret = kbase_dma_fence_wait(katom, &info);
+			if (ret < 0)
+				goto failed_dma_fence_setup;
+		}
+
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d, bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		struct kbase_jd_atom *other_dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		other_dep_atom = (struct kbase_jd_atom *)
+			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
+
+		if (!dep_atom->in_jd_list && (!other_dep_atom ||
+				(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
+				!dep_atom->will_fail_event_code &&
+				!other_dep_atom->will_fail_event_code))) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = kbase_fence_dep_count_read(dep_atom);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+			if (dep_satisfied) {
+				dep_atom->in_jd_list = true;
+				list_add_tail(&dep_atom->jd_item, out_list);
+			}
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/**
+ * is_dep_valid - Validate that a dependency is valid for early dependency
+ *                submission
+ * @katom: Dependency atom to validate
+ *
+ * A dependency is valid if any of the following are true :
+ * - It does not exist (a non-existent dependency does not block submission)
+ * - It is in the job scheduler
+ * - It has completed, does not have a failure event code, and has not been
+ *   marked to fail in the future
+ *
+ * Return: true if valid, false otherwise
+ */
+static bool is_dep_valid(struct kbase_jd_atom *katom)
+{
+	/* If there's no dependency then this is 'valid' from the perspective of
+	 * early dependency submission */
+	if (!katom)
+		return true;
+
+	/* Dependency must have reached the job scheduler */
+	if (katom->status < KBASE_JD_ATOM_STATE_IN_JS)
+		return false;
+
+	/* If dependency has completed and has failed or will fail then it is
+	 * not valid */
+	if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+			(katom->event_code != BASE_JD_EVENT_DONE ||
+			katom->will_fail_event_code))
+		return false;
+
+	return true;
+}
+
+static void jd_try_submitting_deps(struct list_head *out_list,
+		struct kbase_jd_atom *node)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *pos;
+
+		list_for_each(pos, &node->dep_head[i]) {
+			struct kbase_jd_atom *dep_atom = list_entry(pos,
+					struct kbase_jd_atom, dep_item[i]);
+
+			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
+				/*Check if atom deps look sane*/
+				bool dep0_valid = is_dep_valid(
+						dep_atom->dep[0].atom);
+				bool dep1_valid = is_dep_valid(
+						dep_atom->dep[1].atom);
+				bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+				int dep_count;
+
+				dep_count = kbase_fence_dep_count_read(
+								dep_atom);
+				if (likely(dep_count == -1)) {
+					dep_satisfied = true;
+				} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+					dep_satisfied = false;
+				}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+				if (dep0_valid && dep1_valid && dep_satisfied) {
+					dep_atom->in_jd_list = true;
+					list_add(&dep_atom->jd_item, out_list);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->jd_item, &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item);
+		list_del(completed_jobs.prev);
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+					kbase_ctx_flag(kctx, KCTX_DYING));
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(runnable_jobs.next);
+			node->in_jd_list = false;
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE) ==
+					BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait soft job
+					 * then remove it from the list of sync
+					 * waiters.
+					 */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						kbasep_remove_waiting_soft_job(node);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				list_add_tail(&node->jd_item, &completed_jobs);
+			} else if (node->status == KBASE_JD_ATOM_STATE_IN_JS &&
+					!node->will_fail_event_code) {
+				/* Node successfully submitted, try submitting
+				 * dependencies as they may now be representable
+				 * in JS */
+				jd_try_submitting_deps(&runnable_jobs, node);
+			}
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->jd_item, completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	katom->start_timestamp.tv64 = 0;
+#else
+	katom->start_timestamp = 0;
+#endif
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = user_atom->core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->pre_dep = NULL;
+	katom->post_dep = NULL;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
+	katom->softjob_data = NULL;
+
+	/* Implicitly sets katom->protected_state.enter as well. */
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+	katom->age = kctx->age_count++;
+
+	INIT_LIST_HEAD(&katom->jd_item);
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_fence_dep_count_set(katom, -1);
+#endif
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				KBASE_TLSTREAM_TL_NEW_ATOM(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				KBASE_TLSTREAM_TL_RET_ATOM_CTX(
+						katom, kctx);
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+						TL_ATOM_STATE_IDLE);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom is going through soft replay or
+			 * will be sent back to user space. Do not record any
+			 * dependencies. */
+			KBASE_TLSTREAM_TL_NEW_ATOM(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+			KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+					TL_ATOM_STATE_IDLE);
+
+			if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	/* Create a new atom recording all dependencies it was set up with. */
+	KBASE_TLSTREAM_TL_NEW_ATOM(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority);
+	KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+	for (i = 0; i < 2; i++)
+		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+					&katom->dep[i])) {
+			KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(
+					(void *)kbase_jd_katom_dep_atom(
+						&katom->dep[i]),
+					(void *)katom);
+		} else if (BASE_JD_DEP_TYPE_INVALID !=
+				user_atom->pre_dep[i].dependency_type) {
+			/* Resolved dependency. */
+			int dep_atom_number =
+				user_atom->pre_dep[i].atom_id;
+			struct kbase_jd_atom *dep_atom =
+				&jctx->atoms[dep_atom_number];
+
+			KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(
+					(void *)dep_atom,
+					(void *)katom);
+		}
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject fence wait soft-job atoms accessing external resources */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			 ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting fence wait soft-job atom accessing external resources");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue(kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (kbase_fence_dep_count_read(katom) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom, NULL);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+int kbase_jd_submit(struct kbase_context *kctx,
+		void __user *user_addr, u32 nr_atoms, u32 stride,
+		bool uk6_atom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the
+	 * jobs are reported by immediately failing them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+	if (stride != sizeof(base_jd_atom_v2)) {
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(nr_atoms,
+				&kctx->timeline.jd_atoms_in_flight));
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+		if (copy_from_user(&user_atom, user_addr,
+					sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+				atomic_sub_return(nr_atoms - i,
+				&kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+
+		user_addr = (void __user *)((uintptr_t) user_addr + stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		meson_gpu_fault ++;
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+                            (unsigned long)katom->event_code,
+                            katom->slot_nr);
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		context_idle = false;
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * KCTX_ACTIVE will have been set after we marked it as
+		 * inactive, and another pm reference will have been taken, so
+		 * drop our reference. But do not call kbase_jm_idle_ctx(), as
+		 * the context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but KCTX_ACTIVE will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then KCTX_ACTIVE will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * hwaccess_lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
+		    !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * hwaccess_lock. This is not performed if
+			 * KCTX_ACTIVE is set as in that case another pm
+			 * reference has been taken and a fast-start would be
+			 * valid.
+			 */
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE))
+				kbase_jm_idle_ctx(kbdev, kctx);
+			context_idle = true;
+		} else {
+			kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+
+	if (context_idle)
+		kbase_pm_context_idle(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbdev->hwaccess_lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	del_timer_sync(&kctx->soft_job_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, queue);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+		kctx->jctx.atoms[i].dma_fence.context =
+						dma_fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100644
index 0000000..271daef
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,241 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/seq_file.h>
+#include <mali_kbase.h>
+#include <mali_kbase_jd_debugfs.h>
+#include <mali_kbase_dma_fence.h>
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif
+#include <mali_kbase_ioctl.h>
+
+struct kbase_jd_debugfs_depinfo {
+	u8 id;
+	char type;
+};
+
+static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
+					struct seq_file *sfile)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	struct kbase_sync_fence_info info;
+	int res;
+
+	switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		res = kbase_sync_fence_out_info_get(atom, &info);
+		if (0 == res) {
+			seq_printf(sfile, "Sa([%p]%d) ",
+				   info.fence, info.status);
+			break;
+		}
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		res = kbase_sync_fence_in_info_get(atom, &info);
+		if (0 == res) {
+			seq_printf(sfile, "Wa([%p]%d) ",
+				   info.fence, info.status);
+			break;
+		}
+	default:
+		break;
+	}
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		struct kbase_fence_cb *cb;
+
+		if (atom->dma_fence.fence) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+			struct fence *fence = atom->dma_fence.fence;
+#else
+			struct dma_fence *fence = atom->dma_fence.fence;
+#endif
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Sd(%u#%u: %s) ",
+#else
+					"Sd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					dma_fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+
+		list_for_each_entry(cb, &atom->dma_fence.callbacks,
+				    node) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+			struct fence *fence = cb->fence;
+#else
+			struct dma_fence *fence = cb->fence;
+#endif
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Wd(%u#%u: %s) ",
+#else
+					"Wd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					dma_fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+}
+
+static void kbasep_jd_debugfs_atom_deps(
+		struct kbase_jd_debugfs_depinfo *deps,
+		struct kbase_jd_atom *atom)
+{
+	struct kbase_context *kctx = atom->kctx;
+	int i;
+
+	for (i = 0; i < 2; i++)	{
+		deps[i].id = (unsigned)(atom->dep[i].atom ?
+				kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0);
+
+		switch (atom->dep[i].dep_type) {
+		case BASE_JD_DEP_TYPE_INVALID:
+			deps[i].type = ' ';
+			break;
+		case BASE_JD_DEP_TYPE_DATA:
+			deps[i].type = 'D';
+			break;
+		case BASE_JD_DEP_TYPE_ORDER:
+			deps[i].type = '>';
+			break;
+		default:
+			deps[i].type = '?';
+			break;
+		}
+	}
+}
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, " ID, Core req, St, CR,   Predeps,           Start time, Additional info...\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+		struct kbase_jd_debugfs_depinfo deps[2];
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		kbasep_jd_debugfs_atom_deps(deps, atom);
+
+		seq_printf(sfile,
+				"%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ",
+				i, atom->core_req, atom->status,
+				atom->coreref_state,
+				deps[0].type, deps[0].id,
+				deps[1].type, deps[1].id,
+				start_timestamp);
+
+
+		kbase_jd_debugfs_fence_info(atom, sfile);
+
+		seq_puts(sfile, "\n");
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100644
index 0000000..ce0cb61
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#define MALI_JD_DEBUGFS_VERSION 2
+
+/* Forward declarations */
+struct kbase_context;
+
+/**
+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100755
index 0000000..da78a16
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx[js];
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == kctx)
+			kbdev->hwaccess.active_kctx[js] = NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		return kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+		return NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100644
index 0000000..c468ea4
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,115 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the hwaccess_lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+			struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100755
index 0000000..def56d2
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,2880 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_ctx_sched.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	return atomic_read(&kctx->refcount);
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the mmu_hw_mutex and hwaccess_lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	as_nr = kctx->as_nr;
+	if (atomic_read(&kctx->refcount) > 0) {
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+
+		kbase_ctx_sched_retain_ctx_refcount(kctx);
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, atomic_read(&kctx->refcount));
+		result = true;
+	}
+
+	return result;
+}
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	return RB_EMPTY_ROOT(&rb->runnable_tree);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a queue and invoke @callback for each entry in the queue, and
+ * remove the entry from the queue.
+ *
+ * If entries are added to the queue while this is running those entries may, or
+ * may not be covered. To ensure that all entries in the buffer have been
+ * enumerated when this function returns jsctx->lock must be held when calling
+ * this function.
+ *
+ * The HW access lock must always be held when calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
+		struct rb_node *node = rb_first(&queue->runnable_tree);
+		struct kbase_jd_atom *entry = rb_entry(node,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		rb_erase(node, &queue->runnable_tree);
+		callback(kctx->kbdev, entry);
+	}
+
+	while (!list_empty(&queue->x_dep_head)) {
+		struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
+				struct kbase_jd_atom, queue);
+
+		list_del(queue->x_dep_head.next);
+
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	struct rb_node *node;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	node = rb_first(&rb->runnable_tree);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
+}
+
+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
+
+static void
+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (*new) {
+		struct kbase_jd_atom *entry = container_of(*new,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		parent = *new;
+		if (LESS_THAN_WRAP(katom->age, entry->age))
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&katom->runnable_tree_node, parent, new);
+	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_tree_add(kctx, katom);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
+		atomic_read(&jsdd->soft_job_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&kbdev->hwaccess_lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) {
+			INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]);
+			INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]);
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	/* The caller must de-register all contexts before calling this
+	 */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+	KBASE_DEBUG_ASSERT(memcmp(
+				  js_devdata->runpool_irq.ctx_attr_ref_count,
+				  zero_ctx_attr_ref_count,
+				  sizeof(zero_ctx_attr_ref_count)) == 0);
+	CSTD_UNUSED(zero_ctx_attr_ref_count);
+}
+
+int kbasep_js_kctx_init(struct kbase_context *const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+	kbase_ctx_flag_clear(kctx, KCTX_DYING);
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
+			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int js;
+	bool update_ctx_count = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* The caller must de-register all jobs before calling this */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_nolock - Variant of
+ *                                         kbase_jd_ctx_list_add_pullable()
+ *                                         where the caller must hold
+ *                                         hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+			&kbdev->js_data.ctx_list_pullable[js][kctx->priority]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
+ *                                              kbase_js_ctx_list_add_pullable_head()
+ *                                              where the caller must hold
+ *                                              hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head_nolock(
+		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+			&kbdev->js_data.ctx_list_pullable[js][kctx->priority]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the
+ *                                           per-slot unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+		&kbdev->js_data.ctx_list_unpullable[js][kctx->priority]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable
+ *                                   or unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
+ *                                     where the caller must hold
+ *                                     hwaccess_lock
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i]))
+			continue;
+
+		kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next,
+				struct kbase_context,
+				jctx.sched_info.ctx.ctx_list_entry[js]);
+
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+		return kctx;
+	}
+	return NULL;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+		struct kbase_device *kbdev, int js)
+{
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return false;
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * same-slot dependency */
+				if (dep_atom->post_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+						(js == dep_js)) {
+					katom->pre_dep = dep_atom;
+					dep_atom->post_dep = katom;
+				}
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Move kctx to the pullable/upullable list as per the new priority */
+	if (new_priority != kctx->priority) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kctx->slots_pullable & (1 << js))
+				list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js][new_priority]);
+			else
+				list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_unpullable[js][new_priority]);
+		}
+
+		kctx->priority = new_priority;
+	}
+}
+
+void kbase_js_update_ctx_priority(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW;
+	int prio;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) {
+		/* Determine the new priority for context, as per the priority
+		 * of currently in-use atoms.
+		 */
+		for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+			if (kctx->atoms_count[prio]) {
+				new_priority = prio;
+				break;
+			}
+		}
+	}
+
+	kbase_js_set_ctx_priority(kctx, new_priority);
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (++kctx->atoms_count[atom->sched_priority] == 1)
+		kbase_js_update_ctx_priority(kctx);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		/* Undo the count, as the atom will get added again later but
+		 * leave the context priority adjusted or boosted, in case if
+		 * this was the first higher priority atom received for this
+		 * context.
+		 * This will prevent the scenario of priority inversion, where
+		 * another context having medium priority atoms keeps getting
+		 * scheduled over this context, which is having both lower and
+		 * higher priority atoms, but higher priority atoms are blocked
+		 * due to dependency on lower priority atoms. With priority
+		 * boost the high priority atom will get to run at earliest.
+		 */
+		kctx->atoms_count[atom->sched_priority]--;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY);
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx ==
+			kbdev->hwaccess.active_kctx[atom->slot_nr])
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context. Kill that job
+			 * by killing the context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Queue */
+			KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (--kctx->atoms_count[atom->sched_priority] == 0)
+		kbase_js_update_ctx_priority(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbase_context *found_kctx = NULL;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = kbdev->as_to_kctx[as_nr];
+
+	if (found_kctx != NULL)
+		kbase_ctx_sched_retain_ctx_refcount(found_kctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after
+ *                           releasing a context and/or atom
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst hwaccess_lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) {
+		/* A change in runpool ctx attributes might mean we can
+		 * run more jobs than before  */
+		result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+		KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+					kctx, NULL, 0u, 0);
+	}
+	return result;
+}
+
+/**
+ * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference
+ *                                          on a ctx and an atom's "retained state", only
+ *                                          taking the runpool and as transaction mutexes
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Has following requirements
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ *
+ * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating
+ *         the result of releasing a context that whether the caller should try
+ *         scheduling a new context or should try scheduling all contexts.
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	mutex_lock(&kbdev->pm.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/* Update refcount */
+	kbase_ctx_sched_release_ctx(kctx);
+	new_ref_count = atomic_read(&kctx->refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out.
+	 * Note that there'll always be at least 1 reference to the context
+	 * which was previously acquired by kbasep_js_schedule_ctx(). */
+	if (new_ref_count == 1 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		int num_slots = kbdev->gpu_props.num_job_slots;
+		int slot;
+
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+		KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		for (slot = 0; slot < num_slots; slot++) {
+			if (kbdev->hwaccess.active_kctx[slot] == kctx)
+				kbdev->hwaccess.active_kctx[slot] = NULL;
+		}
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after the KCTX_SHEDULED flag is changed, otherwise we
+		 * double-decount the attributes
+		 */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+
+		/* Recalculate pullable status for all slots */
+		for (slot = 0; slot < num_slots; slot++) {
+			if (kbase_js_ctx_pullable(kctx, slot, false))
+				kbase_js_ctx_list_add_pullable_nolock(kbdev,
+						kctx, slot);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED |
+			KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_backend_timeouts_changed(kbdev);
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kbase_ctx_sched_retain_ctx(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		as_nr = kbase_backend_find_and_release_free_address_space(
+				kbdev, kctx);
+		if (as_nr != KBASEP_AS_NR_INVALID) {
+			/* Attempt to retain the context again, this should
+			 * succeed */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			as_nr = kbase_ctx_sched_retain_ctx(kctx);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+
+			WARN_ON(as_nr == KBASEP_AS_NR_INVALID);
+		}
+	}
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Roll back the transaction so far and return */
+		kbase_ctx_sched_release_ctx(kctx);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	kbase_ctx_flag_set(kctx, KCTX_SCHEDULED);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		/* Roll back the transaction so far and return */
+		kbase_ctx_sched_release_ctx(kctx);
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx[js] = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+	KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the hwaccess_lock locking. If a suspend occurs *after* that lock
+	 * was taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	kbase_ctx_flag_clear(kctx, KCTX_PULLED_SINCE_ACTIVE_JS0 << js);
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Synchronize with any timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int js)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+			kbase_backend_use_ctx_sched(kbdev, kctx, js)) {
+		/* Context already has ASID - mark as active */
+		if (kbdev->hwaccess.active_kctx[js] != kctx) {
+			kbdev->hwaccess.active_kctx[js] = kctx;
+			kbase_ctx_flag_clear(kctx,
+					KCTX_PULLED_SINCE_ACTIVE_JS0 << js);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return true; /* Context already scheduled */
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	return kbasep_js_schedule_ctx(kbdev, kctx, js);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED);
+
+	is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED);
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			   kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbase_context *kctx = kbdev->as_to_kctx[i];
+
+		retained = retained << 1;
+
+		if (kctx) {
+			kbase_ctx_sched_retain_ctx_refcount(kctx);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+				++nr_privileged_ctx;
+				WARN_ON(nr_privileged_ctx != 1);
+			}
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbase_context *kctx = kbdev->as_to_kctx[i];
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js, prio;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+			struct kbase_context *kctx, *n;
+
+			list_for_each_entry_safe(kctx, n,
+				 &kbdev->js_data.ctx_list_unpullable[js][prio],
+				 jctx.sched_info.ctx.ctx_list_entry[js]) {
+				struct kbasep_js_kctx_info *js_kctx_info;
+				unsigned long flags;
+				bool timer_sync = false;
+
+				js_kctx_info = &kctx->jctx.sched_info;
+
+				mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+				mutex_lock(&js_devdata->runpool_mutex);
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+				if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+					kbase_js_ctx_pullable(kctx, js, false))
+					timer_sync =
+						kbase_js_ctx_list_add_pullable_nolock(
+								kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+				if (timer_sync)
+					kbase_backend_ctx_count_changed(kbdev);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+			}
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
+			(katom->pre_dep && (katom->pre_dep->atom_flags &
+			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		int prio = katom->sched_priority;
+		int js = katom->slot_nr;
+		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+		list_add_tail(&katom->queue, &queue->x_dep_head);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+		enqueue_required = false;
+	} else {
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		/* Add atom to ring buffer. */
+		jsctx_tree_add(kctx, katom);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+	}
+
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the
+ *                         runnable_tree, ready for execution
+ * @katom: Atom to submit
+ *
+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set,
+ * but is still present in the x_dep list. If @katom has a same-slot dependent
+ * atom then that atom (and any dependents) will also be moved.
+ */
+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
+
+	while (katom) {
+		WARN_ON(!(katom->atom_flags &
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			list_del(&katom->queue);
+			katom->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+		} else {
+			break;
+		}
+
+		katom = katom->post_dep;
+	}
+}
+
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = katom->post_dep;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (next_katom) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE |
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
+			kbase_js_move_to_tree(x_dep);
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return NULL;
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kbdev, js))
+			return NULL;
+	}
+
+	kbase_ctx_flag_set(kctx, KCTX_PULLED);
+	kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js));
+
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+		kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+		atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++;
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	int prio = katom->sched_priority;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--;
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
+
+	/* If this slot has been blocked due to soft-stopped atoms, and all
+	 * atoms have now been processed, then unblock the slot */
+	if (!kctx->atoms_pulled_slot_pri[js][prio] &&
+			kctx->blocked_js[js][prio]) {
+		kctx->blocked_js[js][prio] = false;
+
+		/* Only mark the slot as pullable if the context is not idle -
+		 * that case is handled below */
+		if (atomic_read(&kctx->atoms_pulled) &&
+				kbase_js_ctx_pullable(kctx, js, true))
+			timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, js);
+	}
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!kbase_ctx_flag(kctx, KCTX_DYING)) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+	int prio = katom->sched_priority;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+		kctx->atoms_pulled_slot_pri[atom_slot][prio]--;
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		/* If this slot has been blocked due to soft-stopped atoms, and
+		 * all atoms have now been processed, then unblock the slot */
+		if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
+				&& kctx->blocked_js[atom_slot][prio]) {
+			kctx->blocked_js[atom_slot][prio] = false;
+			if (kbase_js_ctx_pullable(kctx, atom_slot, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+						kbdev, kctx, atom_slot);
+		}
+	}
+	WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot)) {
+		if (!list_empty(
+			&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
+			timer_sync |= kbase_js_ctx_list_remove_nolock(
+					kctx->kbdev, kctx, atom_slot);
+	}
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp)
+{
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+
+	kbdev = kctx->kbdev;
+
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+			(x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false);
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+		kbase_js_move_to_tree(x_dep);
+		if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false))
+			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+					x_dep->slot_nr);
+
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)
+			return x_dep;
+	}
+
+	return NULL;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS];
+	bool timer_sync = false;
+	bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		last_active[js] = kbdev->hwaccess.active_kctx[js];
+		ctx_waiting[js] = false;
+	}
+
+	while (js_mask) {
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx, js)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				if (kbase_js_ctx_pullable(kctx, js, false)
+				    || kbase_ctx_flag(kctx, KCTX_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_ctx_flag_clear(kctx, KCTX_PULLED);
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
+				bool pullable = kbase_js_ctx_pullable(kctx, js,
+						true);
+
+				/* Failed to pull jobs - push to head of list.
+				 * Unless this context is already 'active', in
+				 * which case it's effectively already scheduled
+				 * so push it to the back of the list. */
+				if (pullable && kctx == last_active[js] &&
+						kbase_ctx_flag(kctx,
+						(KCTX_PULLED_SINCE_ACTIVE_JS0 <<
+						js)))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else if (pullable)
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+								kctx->kbdev,
+								kctx, js);
+
+				/* If this context is not the active context,
+				 * but the active context is pullable on this
+				 * slot, then we need to remove the active
+				 * marker to prevent it from submitting atoms in
+				 * the IRQ handler, which would prevent this
+				 * context from making progress. */
+				if (last_active[js] && kctx != last_active[js]
+						&& kbase_js_ctx_pullable(
+						last_active[js], js, true))
+					ctx_waiting[js] = true;
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == last_active[js] &&
+				ctx_waiting[js])
+			kbdev->hwaccess.active_kctx[js] = NULL;
+	}
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the queue */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_set(kctx, KCTX_DYING);
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	return atomic_read(&kctx->refcount);
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100755
index 0000000..963cef9
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,910 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_context.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase.
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold hwaccess_lock (as this will be obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the hwaccess_lock, (as this will be obtained
+ *   internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be
+ *   used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - mmu_hw_mutex, hwaccess_lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *   If the hwaccess_lock is already held, then the caller should use
+ *   kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - hwaccess_lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time.
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
+/**
+ * kbase_js_set_ctx_priority - set the context priority
+ * @kctx: Context pointer
+ * @new_priority: New priority value for the Context
+ *
+ * The context priority is set to a new value and it is moved to the
+ * pullable/unpullable list as per the new priority.
+ */
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority);
+
+
+/**
+ * kbase_js_update_ctx_priority - update the context priority
+ * @kctx: Context pointer
+ *
+ * The context priority gets updated as per the priority of atoms currently in
+ * use for that context, but only if system priority mode for context scheduling
+ * is being used.
+ */
+void kbase_js_update_ctx_priority(struct kbase_context *kctx);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guaranteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guaranteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_context *found_kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	found_kctx = kbdev->as_to_kctx[as_nr];
+	KBASE_DEBUG_ASSERT(found_kctx == NULL ||
+			atomic_read(&found_kctx->refcount) > 0);
+
+	return found_kctx;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100644
index 0000000..6fd908a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,306 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->hwaccess_lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100644
index 0000000..be781e6
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,163 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100755
index 0000000..7385daa
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,415 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/*
+ * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode
+ */
+enum {
+	/*
+	 * In this mode, the context containing higher priority atoms will be
+	 * scheduled first and also the new runnable higher priority atoms can
+	 * preempt lower priority atoms currently running on the GPU, even if
+	 * they belong to a different context.
+	 */
+	KBASE_JS_SYSTEM_PRIORITY_MODE = 0,
+
+	/*
+	 * In this mode, the contexts are scheduled in round-robin fashion and
+	 * the new runnable higher priority atoms can preempt the lower priority
+	 * atoms currently running on the GPU, only if they belong to the same
+	 * context.
+	 */
+	KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE,
+
+	/* Must be the last in the enum */
+	KBASE_JS_PRIORITY_MODE_COUNT,
+};
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace.
+ */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/* Sub-structure to collect together Job Scheduling data used in IRQ
+	 * context. The hwaccess_lock must be held when accessing. */
+	struct runpool_irq {
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' is allowed to submit jobs.
+		 */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+
+	/**< Value for JS_SOFT_JOB_TIMEOUT */
+	atomic_t soft_job_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/**
+		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100644
index 0000000..003ac9e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,48 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100755
index 0000000..1dd161b
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,3534 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+#include <linux/log2.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_tlstream.h>
+
+/* This function finds out which RB tree the given GPU VA region belongs to
+ * based on the region zone */
+static struct rb_root *kbase_reg_flags_to_rbtree(struct kbase_context *kctx,
+						    struct kbase_va_region *reg)
+{
+	struct rb_root *rbtree = NULL;
+
+	switch (reg->flags & KBASE_REG_ZONE_MASK) {
+	case KBASE_REG_ZONE_CUSTOM_VA:
+		rbtree = &kctx->reg_rbtree_custom;
+		break;
+	case KBASE_REG_ZONE_EXEC:
+		rbtree = &kctx->reg_rbtree_exec;
+		break;
+	case KBASE_REG_ZONE_SAME_VA:
+		rbtree = &kctx->reg_rbtree_same;
+		/* fall through */
+	default:
+		rbtree = &kctx->reg_rbtree_same;
+		break;
+	}
+
+	return rbtree;
+}
+
+/* This function finds out which RB tree the given pfn from the GPU VA belongs
+ * to based on the memory zone the pfn refers to */
+static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
+								    u64 gpu_pfn)
+{
+	struct rb_root *rbtree = NULL;
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif /* CONFIG_64BIT */
+		if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE)
+			rbtree = &kctx->reg_rbtree_custom;
+		else if (gpu_pfn >= KBASE_REG_ZONE_EXEC_BASE)
+			rbtree = &kctx->reg_rbtree_exec;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+#ifdef CONFIG_64BIT
+	} else {
+		if (gpu_pfn >= kctx->same_va_end)
+			rbtree = &kctx->reg_rbtree_custom;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+	}
+#endif /* CONFIG_64BIT */
+
+	return rbtree;
+}
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx,
+						struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = NULL;
+	struct rb_node *parent = NULL;
+	struct rb_root *rbtree = NULL;
+
+	rbtree = kbase_reg_flags_to_rbtree(kctx, new_reg);
+
+	link = &(rbtree->rb_node);
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+
+	rb_insert_color(&(new_reg->rblink), rbtree);
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, start_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_root *rbtree = NULL;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
+		struct kbase_context *kctx, struct kbase_va_region *reg_reqs,
+		size_t nr_pages, size_t align_offset, size_t align_mask,
+		u64 *out_start_pfn)
+{
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+	rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs);
+
+	rbnode = rb_first(rbtree);
+
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE)) {
+			/* Check alignment */
+			u64 start_pfn = reg->start_pfn;
+
+			/* When align_offset == align, this sequence is
+			 * equivalent to:
+			 *   (start_pfn + align_mask) & ~(align_mask)
+			 *
+			 * Otherwise, it aligns to n*align + offset, for the
+			 * lowest value n that makes this still >start_pfn */
+			start_pfn += align_mask;
+			start_pfn -= (start_pfn - align_offset) & (align_mask);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) {
+				*out_start_pfn = start_pfn;
+				return reg;
+			}
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+	struct rb_root *reg_rbtree = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	reg_rbtree = kbase_reg_flags_to_rbtree(kctx, reg);
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if (prev->flags & KBASE_REG_FREE) {
+			/* We're compatible with the previous VMA,
+			 * merge with it */
+			WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if (next->flags & KBASE_REG_FREE) {
+			WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_root *reg_rbtree = NULL;
+	int err = 0;
+
+	reg_rbtree = kbase_reg_flags_to_rbtree(kctx, at_reg);
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink),
+								reg_rbtree);
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT(is_power_of_2(align));
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+		if (!(tmp->flags & KBASE_REG_FREE)) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.
+	 *
+	 * Depending on the zone the allocation request is for
+	 * we might need to retry it. */
+	do {
+		u64 start_pfn;
+		size_t align_offset = align;
+		size_t align_mask = align - 1;
+
+		if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) {
+			WARN(align > 1,
+					"kbase_add_va_region with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
+					(unsigned long)align);
+			align_mask  = reg->extent - 1;
+			align_offset = reg->extent - reg->initial_commit;
+		}
+
+		tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg,
+				nr_pages, align_offset, align_mask,
+				&start_pfn);
+		if (tmp) {
+			err = kbase_insert_va_region_nolock(kctx, reg, tmp,
+					start_pfn, nr_pages);
+			break;
+		}
+
+		/*
+		 * If the allocation is not from the same zone as JIT
+		 * then don't retry, we're out of VA and there is
+		 * nothing which can be done about it.
+		 */
+		if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+				KBASE_REG_ZONE_CUSTOM_VA)
+			break;
+	} while (kbase_jit_evict(kctx));
+
+	if (!tmp)
+		err = -ENOMEM;
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *exec_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree_same = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* Although exec and custom_va_reg don't always exist,
+	 * initialize unconditionally because of the mem_view debugfs
+	 * implementation which relies on these being empty
+	 */
+	kctx->reg_rbtree_exec = RB_ROOT;
+	kctx->reg_rbtree_custom = RB_ROOT;
+
+	if (exec_reg)
+		kbase_region_tracker_insert(kctx, exec_reg);
+	if (custom_va_reg)
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+}
+
+static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(rbtree);
+		if (rbnode) {
+			rb_erase(rbnode, rbtree);
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
+}
+
+static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
+{
+#if defined(CONFIG_ARM64)
+	/* VA_BITS can be as high as 48 bits, but all bits are available for
+	 * both user and kernel.
+	 */
+	size_t cpu_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	/* x86_64 can access 48 bits of VA, but the 48th is used to denote
+	 * kernel (1) vs userspace (0), so the max here is 47.
+	 */
+	size_t cpu_va_bits = 47;
+#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32)
+	size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE;
+#else
+#error "Unknown CPU VA width for this architecture"
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		cpu_va_bits = 32;
+#endif
+
+	return min(cpu_va_bits, (size_t) kctx->kbdev->gpu_props.mmu.va_bits);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = kbase_get_same_va_bits(kctx);
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have exec and custom VA zones */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_exec;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg,
+					custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_exec:
+	kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+#ifdef CONFIG_64BIT
+static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
+		u64 jit_va_pages)
+{
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits = kbase_get_same_va_bits(kctx);
+	u64 total_va_size;
+	int err;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	if (same_va->nr_pages < jit_va_pages ||
+			kctx->same_va_end < jit_va_pages) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(kctx,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(kctx, custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+#endif
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+		u8 max_allocations, u8 trim_level)
+{
+	if (trim_level > 100)
+		return -EINVAL;
+
+	kctx->jit_max_allocations = max_allocations;
+	kctx->trim_level = trim_level;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
+#endif
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	return 0;
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	ret = kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV,
+			KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+			kbdev,
+			NULL);
+	if (ret)
+		return ret;
+
+	ret = kbase_mem_pool_init(&kbdev->lp_mem_pool,
+			(KBASE_MEM_POOL_MAX_SIZE_KBDEV >> 9),
+			KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+			kbdev,
+			NULL);
+	if (ret)
+		kbase_mem_pool_term(&kbdev->mem_pool);
+
+	return ret;
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_term(&kbdev->mem_pool);
+	kbase_mem_pool_term(&kbdev->lp_mem_pool);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA,
+ * or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	INIT_LIST_HEAD(&new_reg->jit_node);
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		mutex_lock(&reg->kctx->jit_evict_lock);
+
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			mutex_unlock(&reg->kctx->jit_evict_lock);
+
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		} else {
+			mutex_unlock(&reg->kctx->jit_evict_lock);
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(reg->kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+	unsigned long gwt_mask = ~0;
+
+#ifdef CONFIG_MALI_JOB_DUMP
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags & gwt_mask);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					kctx->aliasing_sink_page,
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask & gwt_mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags & gwt_mask);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type ==
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		struct kbase_alloc_import_user_buf *user_buf =
+			&reg->gpu_alloc->imported.user_buf;
+
+		if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
+			user_buf->current_mapping_usage_count &=
+				~PINNED_ON_IMPORT;
+
+			kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
+					(reg->flags & KBASE_REG_GPU_WR));
+		}
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct vm_area_struct *vma;
+	struct kbase_cpu_mapping *map;
+	unsigned long vm_pgoff_in_region;
+	unsigned long vm_off_in_region;
+	unsigned long map_start;
+	size_t map_size;
+
+	lockdep_assert_held(&current->mm->mmap_sem);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	vma = find_vma_intersection(current->mm, uaddr, uaddr+size);
+
+	if (!vma || vma->vm_start > uaddr)
+		return NULL;
+	if (vma->vm_ops != &kbase_vm_ops)
+		/* Not ours! */
+		return NULL;
+
+	map = vma->vm_private_data;
+
+	if (map->kctx != kctx)
+		/* Not from this context! */
+		return NULL;
+
+	vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn;
+	vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT;
+	map_start = vma->vm_start - vm_off_in_region;
+	map_size = map->region->nr_pages << PAGE_SHIFT;
+
+	if ((uaddr + size) > (map_start + map_size))
+		/* Not within the CPU mapping */
+		return NULL;
+
+	*offset = (uaddr - vma->vm_start) + vm_off_in_region;
+
+	return map;
+}
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct kbase_cpu_mapping *map;
+
+	kbase_os_mem_map_lock(kctx);
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset);
+
+	kbase_os_mem_map_unlock(kctx);
+
+	if (!map)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx,
+		u64 gpu_addr, size_t size, u64 *start, u64 *offset)
+{
+	struct kbase_va_region *region;
+
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+
+	if (!region) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EINVAL;
+	}
+
+	*start = region->start_pfn << PAGE_SHIFT;
+
+	*offset = gpu_addr - *start;
+
+	if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+	phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa);
+	phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa);
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct basep_syncset *sset, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	struct tagged_addr *cpu_pa;
+	struct tagged_addr *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	u64 offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) ||
+			kbase_mem_is_imported(reg->gpu_alloc->type))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	page_off = offset >> PAGE_SHIFT;
+	offset &= ~PAGE_MASK;
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	if (page_off > reg->nr_pages ||
+			page_off + page_count > reg->nr_pages) {
+		/* Sync overflows the region */
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Sync first page */
+	if (as_phys_addr_t(cpu_pa[page_off])) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!as_phys_addr_t(cpu_pa[page_off + i]))
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 &&
+	    as_phys_addr_t(cpu_pa[page_off + page_count - 1])) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset)
+{
+	int err = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(sset != NULL);
+
+	if (sset->mem_handle.basep.handle & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev,
+				"mem_handle: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (reg->flags & KBASE_REG_JIT) {
+		dev_warn(reg->kctx->kbdev->dev, "Attempt to free JIT memory!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
+		dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid");
+		return -EINVAL;
+	}
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+			return -EINVAL;
+	} else if (flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	}
+
+	if (!(reg->flags & KBASE_REG_SHARE_BOTH) &&
+			flags & BASE_MEM_COHERENT_LOCAL) {
+		reg->flags |= KBASE_REG_SHARE_IN;
+	}
+
+	if (flags & BASE_MEM_TILER_ALIGN_TOP)
+		reg->flags |= KBASE_REG_TILER_ALIGN_TOP;
+
+	/* Set up default MEMATTR usage */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+
+	return 0;
+}
+
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+		size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct tagged_addr *tp;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (alloc->reg) {
+		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+			goto invalid_request;
+	}
+
+	kctx = alloc->imported.kctx;
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + alloc->nents;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Check if we have enough pages requested so we can allocate a large
+	 * page (512 * 4KB = 2MB )
+	 */
+	if (nr_left >= (SZ_2M / SZ_4K)) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages(&kctx->lp_mem_pool,
+						 nr_lp * (SZ_2M / SZ_4K),
+						 tp,
+						 true);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			mutex_lock(&kctx->mem_partials_lock);
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(sa->page +
+									   pidx),
+							      FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) {
+						/* unlink from partial list when full */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+			mutex_unlock(&kctx->mem_partials_lock);
+		}
+
+		/* only if we actually have a chunk left <512. If more it indicates
+		 * that we couldn't allocate a 2MB above, so no point to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it */
+			struct page *np = NULL;
+
+			do {
+				int err;
+
+				np = kbase_mem_pool_alloc(&kctx->lp_mem_pool);
+				if (np)
+					break;
+				err = kbase_mem_pool_grow(&kctx->lp_mem_pool, 1);
+				if (err)
+					break;
+			} while (1);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *sa;
+				struct page *p;
+
+				sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+				if (!sa) {
+					kbase_mem_pool_free(&kctx->lp_mem_pool, np, false);
+					goto no_new_partial;
+				}
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
+
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+
+				/* expose for later use */
+				mutex_lock(&kctx->mem_partials_lock);
+				list_add(&sa->link, &kctx->mem_partials);
+				mutex_unlock(&kctx->mem_partials_lock);
+			}
+		}
+	}
+no_new_partial:
+#endif
+
+	if (nr_left) {
+		res = kbase_mem_pool_alloc_pages(&kctx->mem_pool,
+						 nr_left,
+						 tp,
+						 false);
+		if (res <= 0)
+			goto alloc_failed;
+	}
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested) {
+		size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+		alloc->nents += nr_pages_to_free;
+
+		kbase_process_page_usage_inc(kctx, nr_pages_to_free);
+		kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages);
+		kbase_atomic_add_pages(nr_pages_to_free,
+			       &kctx->kbdev->memdev.used_pages);
+
+		kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+	}
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+	return -ENOMEM;
+}
+
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+		struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+		size_t nr_pages_requested,
+		struct kbase_sub_alloc **prealloc_sa)
+{
+	int new_page_count __maybe_unused;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct tagged_addr *tp;
+	struct tagged_addr *new_pages = NULL;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	lockdep_assert_held(&pool->pool_lock);
+
+#if !defined(CONFIG_MALI_2MB_ALLOC)
+	WARN_ON(pool->order);
+#endif
+
+	if (alloc->reg) {
+		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+			goto invalid_request;
+	}
+
+	kctx = alloc->imported.kctx;
+
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer
+	 */
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + alloc->nents;
+	new_pages = tp;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (pool->order) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages_locked(pool,
+						 nr_lp * (SZ_2M / SZ_4K),
+						 tp);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(
+							sa->page + pidx),
+							FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages,
+							SZ_2M / SZ_4K)) {
+						/* unlink from partial list when
+						 * full
+						 */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+		}
+
+		/* only if we actually have a chunk left <512. If more it
+		 * indicates that we couldn't allocate a 2MB above, so no point
+		 * to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it
+			 */
+			struct page *np = NULL;
+
+			np = kbase_mem_pool_alloc_locked(pool);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *const sa = *prealloc_sa;
+				struct page *p;
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
+
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(
+							page_to_phys(np + i),
+							FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+				/* Indicate to user that we'll free this memory
+				 * later.
+				 */
+				*prealloc_sa = NULL;
+
+				/* expose for later use */
+				list_add(&sa->link, &kctx->mem_partials);
+			}
+		}
+		if (nr_left)
+			goto alloc_failed;
+	} else {
+#endif
+		res = kbase_mem_pool_alloc_pages_locked(pool,
+						 nr_left,
+						 tp);
+		if (res <= 0)
+			goto alloc_failed;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return new_pages;
+
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested) {
+		size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+		alloc->nents += nr_pages_to_free;
+
+		kbase_process_page_usage_inc(kctx, nr_pages_to_free);
+		kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages);
+		kbase_atomic_add_pages(nr_pages_to_free,
+			       &kctx->kbdev->memdev.used_pages);
+
+		kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+	}
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+	return NULL;
+}
+
+static void free_partial(struct kbase_context *kctx, struct tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	p = phys_to_page(as_phys_addr_t(tp));
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	mutex_lock(&kctx->mem_partials_lock);
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free(&kctx->lp_mem_pool, head_page, true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+	mutex_unlock(&kctx->mem_partials_lock);
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	struct tagged_addr *start_free;
+	int new_page_count __maybe_unused;
+	size_t freed = 0;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
+					512,
+					start_free,
+					syncback,
+					reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			free_partial(kctx, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+			       !is_huge(*local_end_free) &&
+			       !is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages(&kctx->mem_pool,
+					local_end_free - start_free,
+					start_free,
+					syncback,
+					reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = kbase_atomic_sub_pages(freed,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(freed,
+				       &kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				kctx->id,
+				(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+static void free_partial_locked(struct kbase_context *kctx,
+		struct kbase_mem_pool *pool, struct tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	lockdep_assert_held(&pool->pool_lock);
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	p = phys_to_page(as_phys_addr_t(tp));
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free(pool, head_page, true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+}
+
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+		struct kbase_mem_pool *pool, struct tagged_addr *pages,
+		size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	struct tagged_addr *start_free;
+	size_t freed = 0;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	lockdep_assert_held(&pool->pool_lock);
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	/* early out if nothing to do */
+	if (!nr_pages_to_free)
+		return;
+
+	start_free = pages;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			WARN_ON(!pool->order);
+			kbase_mem_pool_free_pages_locked(pool,
+					512,
+					start_free,
+					syncback,
+					reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			WARN_ON(!pool->order);
+			free_partial_locked(kctx, pool, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			WARN_ON(pool->order);
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+			       !is_huge(*local_end_free) &&
+			       !is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages_locked(pool,
+					local_end_free - start_free,
+					start_free,
+					syncback,
+					reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		int new_page_count;
+
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = kbase_atomic_sub_pages(freed,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(freed,
+				       &kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				kctx->id,
+				(u64)new_page_count);
+	}
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		if (!WARN_ON(!alloc->imported.kctx)) {
+			/*
+			 * The physical allocation must have been removed from
+			 * the eviction list before trying to free it.
+			 */
+			mutex_lock(&alloc->imported.kctx->jit_evict_lock);
+			WARN_ON(!list_empty(&alloc->evict_node));
+			mutex_unlock(&alloc->imported.kctx->jit_evict_lock);
+		}
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
+		kfree(alloc->imported.user_buf.pages);
+		break;
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU executable memory cannot:
+	 * - Be written by the GPU
+	 * - Be grown on GPU page fault
+	 * - Have the top of its initial commit aligned to 'extent' */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
+			(BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+			BASE_MEM_TILER_ALIGN_TOP)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
+	/* Should not combine BASE_MEM_COHERENT_LOCAL with
+	 * BASE_MEM_COHERENT_SYSTEM */
+	if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) ==
+			(BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM))
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* Imported memory cannot be aligned to the end of its initial commit */
+	if (flags & BASE_MEM_TILER_ALIGN_TOP)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
+		u64 va_pages, u64 commit_pages, u64 large_extent)
+{
+	struct device *dev = kctx->kbdev->dev;
+	int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT;
+	struct kbase_va_region test_reg;
+
+	/* kbase_va_region's extent member can be of variable size, so check against that type */
+	test_reg.extent = large_extent;
+
+#define KBASE_MSG_PRE "GPU allocation attempted with "
+
+	if (0 == va_pages) {
+		dev_warn(dev, KBASE_MSG_PRE "0 va_pages!");
+		return -EINVAL;
+	}
+
+	if (va_pages > (U64_MAX / PAGE_SIZE)) {
+		/* 64-bit address range is the max */
+		dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than 64-bit address range!",
+				(unsigned long long)va_pages);
+		return -ENOMEM;
+	}
+
+	/* Note: commit_pages is checked against va_pages during
+	 * kbase_alloc_phy_pages() */
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld",
+				(unsigned long long)va_pages,
+				(unsigned long long)gpu_pc_pages_max);
+
+		return -EINVAL;
+	}
+
+	if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
+			test_reg.extent == 0) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n");
+		return -EINVAL;
+	}
+
+	if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
+			test_reg.extent != 0) {
+		dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n");
+		return -EINVAL;
+	}
+
+	/* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */
+	if (flags & BASE_MEM_TILER_ALIGN_TOP) {
+#define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and "
+		unsigned long small_extent;
+
+		if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) {
+			dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld",
+					(unsigned long long)large_extent,
+					BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES);
+			return -EINVAL;
+		}
+		/* For use with is_power_of_2, which takes unsigned long, so
+		 * must ensure e.g. on 32-bit kernel it'll fit in that type */
+		small_extent = (unsigned long)large_extent;
+
+		if (!is_power_of_2(small_extent)) {
+			dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2",
+					small_extent);
+			return -EINVAL;
+		}
+
+		if (commit_pages > large_extent) {
+			dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld",
+					(unsigned long)commit_pages,
+					(unsigned long)large_extent);
+			return -EINVAL;
+		}
+#undef KBASE_MSG_PRE_FLAG
+	}
+
+	return 0;
+#undef KBASE_MSG_PRE
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+#ifdef CONFIG_DEBUG_FS
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_init(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_evict_lock);
+		if (list_empty(&kctx->jit_destroy_head)) {
+			mutex_unlock(&kctx->jit_evict_lock);
+			break;
+		}
+
+		reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		list_del(&reg->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+		reg->flags &= ~KBASE_REG_JIT;
+		kbase_mem_free_region(kctx, reg);
+		kbase_gpu_vm_unlock(kctx);
+	} while (1);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->jit_evict_lock);
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	INIT_LIST_HEAD(&kctx->jit_pending_alloc);
+	INIT_LIST_HEAD(&kctx->jit_atoms_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	kctx->jit_max_allocations = 0;
+	kctx->jit_current_allocations = 0;
+	kctx->trim_level = 0;
+
+	return 0;
+}
+
+/* Check if the allocation from JIT pool is of the same size as the new JIT
+ * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets
+ * the alignment requirements.
+ */
+static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx,
+	struct kbase_va_region *walker, struct base_jit_alloc_info *info)
+{
+	bool meet_reqs = true;
+
+	if (walker->nr_pages != info->va_pages)
+		meet_reqs = false;
+	else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+		size_t align = info->extent;
+		size_t align_mask = align - 1;
+
+		if ((walker->start_pfn + info->commit_pages) & align_mask)
+			meet_reqs = false;
+	}
+
+	return meet_reqs;
+}
+
+static int kbase_jit_grow(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info, struct kbase_va_region *reg)
+{
+	size_t delta;
+	size_t pages_required;
+	size_t old_size;
+	struct kbase_mem_pool *pool;
+	int ret = -ENOMEM;
+	struct tagged_addr *gpu_pages;
+	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
+	int i;
+
+	if (info->commit_pages > reg->nr_pages) {
+		/* Attempted to grow larger than maximum size */
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Make the physical backing no longer reclaimable */
+	if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+		goto update_failed;
+
+	if (reg->gpu_alloc->nents >= info->commit_pages)
+		goto done;
+
+	/* Grow the backing */
+	old_size = reg->gpu_alloc->nents;
+
+	/* Allocate some more pages */
+	delta = info->commit_pages - reg->gpu_alloc->nents;
+	pages_required = delta;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Preallocate memory for the sub-allocation structs */
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]),
+				GFP_KERNEL);
+		if (!prealloc_sas[i])
+			goto update_failed;
+	}
+
+	if (pages_required >= (SZ_2M / SZ_4K)) {
+		pool = &kctx->lp_mem_pool;
+		/* Round up to number of 2 MB pages required */
+		pages_required += ((SZ_2M / SZ_4K) - 1);
+		pages_required /= (SZ_2M / SZ_4K);
+	} else {
+#endif
+		pool = &kctx->mem_pool;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		pages_required *= 2;
+
+	mutex_lock(&kctx->mem_partials_lock);
+	kbase_mem_pool_lock(pool);
+
+	/* As we can not allocate memory from the kernel with the vm_lock held,
+	 * grow the pool to the required size with the lock dropped. We hold the
+	 * pool lock to prevent another thread from allocating from the pool
+	 * between the grow and allocation.
+	 */
+	while (kbase_mem_pool_size(pool) < pages_required) {
+		int pool_delta = pages_required - kbase_mem_pool_size(pool);
+
+		kbase_mem_pool_unlock(pool);
+		mutex_unlock(&kctx->mem_partials_lock);
+		kbase_gpu_vm_unlock(kctx);
+
+		if (kbase_mem_pool_grow(pool, pool_delta))
+			goto update_failed_unlocked;
+
+		kbase_gpu_vm_lock(kctx);
+		mutex_lock(&kctx->mem_partials_lock);
+		kbase_mem_pool_lock(pool);
+	}
+
+	gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool,
+			delta, &prealloc_sas[0]);
+	if (!gpu_pages) {
+		kbase_mem_pool_unlock(pool);
+		mutex_unlock(&kctx->mem_partials_lock);
+		goto update_failed;
+	}
+
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		struct tagged_addr *cpu_pages;
+
+		cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc,
+				pool, delta, &prealloc_sas[1]);
+		if (!cpu_pages) {
+			kbase_free_phy_pages_helper_locked(reg->gpu_alloc,
+					pool, gpu_pages, delta);
+			kbase_mem_pool_unlock(pool);
+			mutex_unlock(&kctx->mem_partials_lock);
+			goto update_failed;
+		}
+	}
+	kbase_mem_pool_unlock(pool);
+	mutex_unlock(&kctx->mem_partials_lock);
+
+	ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
+			old_size);
+	/*
+	 * The grow failed so put the allocation back in the
+	 * pool and return failure.
+	 */
+	if (ret)
+		goto update_failed;
+
+done:
+	ret = 0;
+
+	/* Update attributes of JIT allocation taken from the pool */
+	reg->initial_commit = info->commit_pages;
+	reg->extent = info->extent;
+
+update_failed:
+	kbase_gpu_vm_unlock(kctx);
+update_failed_unlocked:
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
+		kfree(prealloc_sas[i]);
+
+	return ret;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+
+	if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
+		/* Too many current allocations */
+		return NULL;
+	}
+	if (info->max_allocations > 0 &&
+			kctx->jit_current_allocations_per_bin[info->bin_id] >=
+			info->max_allocations) {
+		/* Too many current allocations in this bin */
+		return NULL;
+	}
+
+	mutex_lock(&kctx->jit_evict_lock);
+
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	if (info->usage_id != 0) {
+		/* First scan for an allocation with the same usage ID */
+		struct kbase_va_region *walker;
+		struct kbase_va_region *temp;
+		size_t current_diff = SIZE_MAX;
+
+		list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
+				jit_node) {
+
+			if (walker->jit_usage_id == info->usage_id &&
+					walker->jit_bin_id == info->bin_id &&
+					meet_size_and_tiler_align_top_requirements(
+							kctx, walker, info)) {
+				size_t min_size, max_size, diff;
+
+				/*
+				 * The JIT allocations VA requirements have been
+				 * met, it's suitable but other allocations
+				 * might be a better fit.
+				 */
+				min_size = min_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				max_size = max_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				diff = max_size - min_size;
+
+				if (current_diff > diff) {
+					current_diff = diff;
+					reg = walker;
+				}
+
+				/* The allocation is an exact match */
+				if (current_diff == 0)
+					break;
+			}
+		}
+	}
+
+	if (!reg) {
+		/* No allocation with the same usage ID, or usage IDs not in
+		 * use. Search for an allocation we can reuse.
+		 */
+		struct kbase_va_region *walker;
+		struct kbase_va_region *temp;
+		size_t current_diff = SIZE_MAX;
+
+		list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
+				jit_node) {
+
+			if (walker->jit_bin_id == info->bin_id &&
+					meet_size_and_tiler_align_top_requirements(
+							kctx, walker, info)) {
+				size_t min_size, max_size, diff;
+
+				/*
+				 * The JIT allocations VA requirements have been
+				 * met, it's suitable but other allocations
+				 * might be a better fit.
+				 */
+				min_size = min_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				max_size = max_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				diff = max_size - min_size;
+
+				if (current_diff > diff) {
+					current_diff = diff;
+					reg = walker;
+				}
+
+				/* The allocation is an exact match, so stop
+				 * looking.
+				 */
+				if (current_diff == 0)
+					break;
+			}
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_move(&reg->jit_node, &kctx->jit_active_head);
+
+		/*
+		 * Remove the allocation from the eviction list as it's no
+		 * longer eligible for eviction. This must be done before
+		 * dropping the jit_evict_lock
+		 */
+		list_del_init(&reg->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		if (kbase_jit_grow(kctx, info, reg) < 0) {
+			/*
+			 * An update to an allocation from the pool failed,
+			 * chances are slim a new allocation would fair any
+			 * better so return the allocation to the pool and
+			 * return the function with failure.
+			 */
+			goto update_failed_unlocked;
+		}
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL;
+		u64 gpu_addr;
+
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)
+			flags |= BASE_MEM_TILER_ALIGN_TOP;
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr);
+		if (!reg)
+			goto out_unlocked;
+
+		reg->flags |= KBASE_REG_JIT;
+
+		mutex_lock(&kctx->jit_evict_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_evict_lock);
+	}
+
+	kctx->jit_current_allocations++;
+	kctx->jit_current_allocations_per_bin[info->bin_id]++;
+
+	reg->jit_usage_id = info->usage_id;
+	reg->jit_bin_id = info->bin_id;
+
+	return reg;
+
+update_failed_unlocked:
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	u64 old_pages;
+
+	/* Get current size of JIT region */
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (reg->initial_commit < old_pages) {
+		/* Free trim_level % of region, but don't go below initial
+		 * commit size
+		 */
+		u64 new_size = MAX(reg->initial_commit,
+			div_u64(old_pages * (100 - kctx->trim_level), 100));
+		u64 delta = old_pages - new_size;
+
+		if (delta) {
+			kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta,
+					old_pages);
+			kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta,
+					old_pages);
+
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+		}
+	}
+
+	kctx->jit_current_allocations--;
+	kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
+
+	kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
+
+	kbase_gpu_vm_lock(kctx);
+	reg->flags |= KBASE_REG_DONT_NEED;
+	kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents);
+	kbase_gpu_vm_unlock(kctx);
+
+	/*
+	 * Add the allocation to the eviction list and the jit pool, after this
+	 * point the shrink can reclaim it, or it may be reused.
+	 */
+	mutex_lock(&kctx->jit_evict_lock);
+
+	/* This allocation can't already be on a list. */
+	WARN_ON(!list_empty(&reg->gpu_alloc->evict_node));
+	list_add(&reg->gpu_alloc->evict_node, &kctx->evict_list);
+
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+
+	mutex_unlock(&kctx->jit_evict_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = reg->kctx;
+
+	lockdep_assert_held(&kctx->jit_evict_lock);
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	list_move(&reg->jit_node, &kctx->jit_destroy_head);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_evict_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+		list_del_init(&reg->gpu_alloc->evict_node);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	if (reg) {
+		reg->flags &= ~KBASE_REG_JIT;
+		kbase_mem_free_region(kctx, reg);
+	}
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	kbase_gpu_vm_lock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		list_del_init(&walker->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		walker->flags &= ~KBASE_REG_JIT;
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		list_del_init(&walker->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		walker->flags &= ~KBASE_REG_JIT;
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_gpu_vm_unlock(kctx);
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	struct tagged_addr *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct mm_struct *mm;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+	unsigned long gwt_mask = ~0;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	mm = alloc->imported.user_buf.mm;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#else
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL, NULL);
+#endif
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = as_tagged(page_to_phys(pages[i]));
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+#ifdef CONFIG_MALI_JOB_DUMP
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+			kbase_reg_current_backed_size(reg),
+			reg->flags & gwt_mask);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	while (++i < pinned_pages) {
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	struct tagged_addr *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long gwt_mask = ~0;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		size_t j, pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = as_tagged(sg_dma_address(s) +
+				(j << PAGE_SHIFT));
+		WARN_ONCE(j < pages,
+			  "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (!(reg->flags & KBASE_REG_IMPORT_PAD) &&
+			WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto err_unmap_attachment;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = reg->nr_pages;
+
+#ifdef CONFIG_MALI_JOB_DUMP
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			count,
+			(reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) &
+			 gwt_mask);
+	if (err)
+		goto err_unmap_attachment;
+
+	if (reg->flags & KBASE_REG_IMPORT_PAD) {
+		err = kbase_mmu_insert_single_page(kctx,
+				reg->start_pfn + count,
+				kctx->aliasing_sink_page,
+				reg->nr_pages - count,
+				(reg->flags | KBASE_REG_GPU_RD) &
+				~KBASE_REG_GPU_WR);
+		if (err)
+			goto err_teardown_orig_pages;
+	}
+
+	return 0;
+
+err_teardown_orig_pages:
+	kbase_mmu_teardown_pages(kctx, reg->start_pfn, count);
+err_unmap_attachment:
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+			alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc) {
+				int err;
+
+				err = kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						alloc->nents);
+				WARN_ON(err);
+			}
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100755
index 0000000..e55a8fb
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1361 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	int      count;
+	int      free_on_close;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	struct tagged_addr    *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	/* member in union valid based on @a type */
+	union {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+		struct kbase_alloc_import_user_buf {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			/* top bit (1<<31) of current_mapping_usage_count
+			 * specifies that this import was pinned on import
+			 * See PINNED_ON_IMPORT
+			 */
+			u32 current_mapping_usage_count;
+			struct mm_struct *mm;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is
+ * used to signify that a buffer was pinned when it was imported. Since the
+ * reference count is limited by the number of atoms that can be submitted at
+ * once there should be no danger of overflowing into this bit.
+ * Stealing the top bit also has the benefit that
+ * current_mapping_usage_count != 0 if and only if the buffer is mapped.
+ */
+#define PINNED_ON_IMPORT	(1<<31)
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+/**
+ * kbase_mem_is_imported - Indicate whether a memory type is imported
+ *
+ * @type: the memory type
+ *
+ * Return: true if the memory type is imported, false otherwise
+ */
+static inline bool kbase_mem_is_imported(enum kbase_memory_type type)
+{
+	return (type == KBASE_MEM_TYPE_IMPORTED_UMM) ||
+		(type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+	/* Initial commit, for aligning the start address and correctly growing
+	 * KBASE_REG_TILER_ALIGN_TOP regions */
+	size_t initial_commit;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* Bit 8 is unused */
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+/* Imported buffer is padded? */
+#define KBASE_REG_IMPORT_PAD        (1ul << 21)
+
+/* Bit 22 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags */
+#define KBASE_REG_RESERVED_BIT_22   (1ul << 22)
+
+/* The top of the initial commit is aligned to extent pages.
+ * Extent must be a power of 2 */
+#define KBASE_REG_TILER_ALIGN_TOP   (1ul << 23)
+
+/* Memory is handled by JIT - user space should not be able to free it */
+#define KBASE_REG_JIT               (1ul << 24)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+/*
+ * Dedicated 16MB region for shader code:
+ * VA range 0x101000000-0x102000000
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_EXEC_BASE    (0x101000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+/* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    \
+	(KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE)
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+	/* The last JIT usage ID for this region */
+	u16 jit_usage_id;
+	/* The JIT bin this allocation came from */
+	u8 jit_bin_id;
+};
+
+/* Common functions */
+static inline struct tagged_addr *kbase_get_cpu_phy_pages(
+		struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline struct tagged_addr *kbase_get_gpu_phy_pages(
+		struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+
+	reg->cpu_alloc->imported.kctx = kctx;
+	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
+	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		if (IS_ERR_OR_NULL(reg->gpu_alloc)) {
+			kbase_mem_phy_alloc_put(reg->cpu_alloc);
+			return -ENOMEM;
+		}
+		reg->gpu_alloc->imported.kctx = kctx;
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	mutex_lock(&kctx->jit_evict_lock);
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	reg->flags &= ~KBASE_REG_FREE;
+
+	return 0;
+}
+
+static inline u32 kbase_atomic_add_pages(u32 num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * The order required for a 2MB page allocation (2^order * 4KB = 2MB)
+ */
+#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER	9
+
+/*
+ * The order required for a 4KB page allocation
+ */
+#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER	0
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @max_size:  Maximum number of free pages the pool can hold
+ * @order:     Page order for physical page size (order=0=>4kB, order=9=>2MB)
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		size_t order,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Return NULL if no memory in the pool
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_alloc_locked() instead.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc_locked - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * If there are free pages in the pool, this function allocates a page from
+ * @pool. This function does not use @next_pool.
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_free_locked() instead.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_free_locked - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @p:     Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * If @pool is not full, this function adds @page to @pool. Otherwise, @page is
+ * freed to the kernel. This function does not use @next_pool.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ * @partial_allowed: If fewer pages allocated is allowed
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return:
+ * On success number of pages allocated (could be less than nr_pages if
+ * partial_allowed).
+ * On error an error code.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_alloc_pages_locked() instead.
+ *
+ * The caller must not hold vm_lock, as this could cause a deadlock if
+ * the kernel OoM killer runs. If the caller must allocate pages while holding
+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool partial_allowed);
+
+/**
+ * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
+ * @pool:        Memory pool to allocate from
+ * @nr_4k_pages: Number of pages to allocate
+ * @pages:       Pointer to array where the physical address of the allocated
+ *               pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This
+ * version does not allocate new pages from the kernel, and therefore will never
+ * trigger the OoM killer. Therefore, it can be run while the vm_lock is held.
+ *
+ * As new pages can not be allocated, the caller must ensure there are
+ * sufficient pages in the pool. Usage of this function should look like :
+ *
+ *   kbase_gpu_vm_lock(kctx);
+ *   kbase_mem_pool_lock(pool)
+ *   while (kbase_mem_pool_size(pool) < pages_required) {
+ *     kbase_mem_pool_unlock(pool)
+ *     kbase_gpu_vm_unlock(kctx);
+ *     kbase_mem_pool_grow(pool)
+ *     kbase_gpu_vm_lock(kctx);
+ *     kbase_mem_pool_lock(pool)
+ *   }
+ *   kbase_mem_pool_alloc_pages_locked(pool)
+ *   kbase_mem_pool_unlock(pool)
+ *   Perform other processing that requires vm_lock...
+ *   kbase_gpu_vm_unlock(kctx);
+ *
+ * This ensures that the pool can be grown to the required size and that the
+ * allocation can complete without another thread using the newly grown pages.
+ *
+ * Return:
+ * On success number of pages allocated.
+ * On error an error code.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_4k_pages, struct tagged_addr *pages);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_free_pages_locked - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages, bool dirty,
+		bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return READ_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_grow - Grow the pool
+ * @pool:       Memory pool to grow
+ * @nr_to_grow: Number of pages to add to the pool
+ *
+ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
+ * become larger than the maximum size specified.
+ *
+ * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
+ */
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above the max_size for the pool.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ */
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+/**
+ * kbase_mem_pool_mark_dying - Mark that this pool is dying
+ * @pool:     Memory pool
+ *
+ * This will cause any ongoing allocation operations (eg growing on page fault)
+ * to be terminated.
+ */
+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_alloc_page - Allocate a new page for a device
+ * @pool:  Memory pool to allocate a page from
+ *
+ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that
+ * function can fail in the event the pool is empty.
+ *
+ * Return: A new page or NULL if no memory
+ */
+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+		u8 max_allocations, u8 trim_level);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+
+/**
+ * kbase_check_alloc_sizes - check user space sizes parameters for an
+ *                           allocation
+ *
+ * @kctx:         kbase context
+ * @flags:        The flags passed from user space
+ * @va_pages:     The size of the requested region, in pages.
+ * @commit_pages: Number of pages to commit initially.
+ * @extent:       Number of pages to grow by on GPU page fault and/or alignment
+ *                (depending on flags)
+ *
+ * Makes checks on the size parameters passed in from user space for a memory
+ * allocation call, with respect to the flags requested.
+ *
+ * Return: 0 if sizes are valid for these flags, negative error code otherwise
+ */
+int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
+		u64 va_pages, u64 commit_pages, u64 extent);
+
+/**
+ * kbase_update_region_flags - Convert user space flags to kernel region flags
+ *
+ * @kctx:  kbase context
+ * @reg:   The region to update the flags on
+ * @flags: The flags passed from user space
+ *
+ * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and
+ * this function will fail if the system does not support system coherency.
+ *
+ * Return: 0 if successful, -EINVAL if the flags are not supported
+ */
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  struct tagged_addr *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  struct tagged_addr *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr *phys, size_t nr,
+					unsigned long flags);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
+ * @kctx:	Kbase context
+ *
+ * Disable and perform the required cache maintenance to remove the all
+ * data from provided kbase context from the GPU caches.
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified
+ * address space.
+ * @kbdev:	Kbase device
+ * @as_nr:	The address space number to set to unmapped.
+ *
+ * This function must only be called during reset/power-up and it used to
+ * ensure the registers are in a known state.
+ *
+ * The caller must hold kbdev->mmu_hw_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+/**
+ * kbase_sync_now - Perform cache maintenance on a memory region
+ *
+ * @kctx: The kbase context of the region
+ * @sset: A syncset structure describing the region and direction of the
+ *        synchronisation required
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset);
+void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa,
+		struct tagged_addr gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU
+ * mapping of a memory allocation containing a given address range
+ *
+ * Searches for a CPU mapping of any part of any region that fully encloses the
+ * CPU virtual address range specified by @uaddr and @size. Returns a failure
+ * indication if only part of the address range lies within a CPU mapping.
+ *
+ * @kctx:      The kernel base context used for the allocation.
+ * @uaddr:     Start of the CPU virtual address range.
+ * @size:      Size of the CPU virtual address range (in bytes).
+ * @offset:    The offset from the start of the allocation to the specified CPU
+ *             virtual address.
+ *
+ * Return: 0 if offset was obtained successfully. Error code otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset);
+
+/**
+ * kbasep_find_enclosing_gpu_mapping_start_and_offset() - Find the address of
+ * the start of GPU virtual memory region which encloses @gpu_addr for the
+ * @size length in bytes
+ *
+ * Searches for the memory region in GPU virtual memory space which contains
+ * the region defined by the @gpu_addr and @size, where @gpu_addr is the
+ * beginning and @size the length in bytes of the provided region. If found,
+ * the location of the start address of the GPU virtual memory region is
+ * passed in @start pointer and the location of the offset of the region into
+ * the GPU virtual memory region is passed in @offset pointer.
+ *
+ * @kctx:	The kernel base context within which the memory is searched.
+ * @gpu_addr:	GPU virtual address for which the region is sought; defines
+ *              the beginning of the provided region.
+ * @size:       The length (in bytes) of the provided region for which the
+ *              GPU virtual memory region is sought.
+ * @start:      Pointer to the location where the address of the start of
+ *              the found GPU virtual memory region is.
+ * @offset:     Pointer to the location where the offset of @gpu_addr into
+ *              the found GPU virtual memory region is.
+ */
+int kbasep_find_enclosing_gpu_mapping_start_and_offset(
+		struct kbase_context *kctx,
+		u64 gpu_addr, size_t size, u64 *start, u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * kbase_alloc_phy_pages_helper - Allocates physical pages.
+ * @alloc:              allocation object to add pages to
+ * @nr_pages_requested: number of physical pages to allocate
+ *
+ * Allocates \a nr_pages_requested and updates the alloc object.
+ *
+ * Return: 0 if all pages have been successfully allocated. Error code otherwise
+ *
+ * Note : The caller must not hold vm_lock, as this could cause a deadlock if
+ * the kernel OoM killer runs. If the caller must allocate pages while holding
+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
+ */
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+		size_t nr_pages_requested);
+
+/**
+ * kbase_alloc_phy_pages_helper_locked - Allocates physical pages.
+ * @alloc:              allocation object to add pages to
+ * @pool:               Memory pool to allocate from
+ * @nr_pages_requested: number of physical pages to allocate
+ * @prealloc_sa:        Information about the partial allocation if the amount
+ *                      of memory requested is not a multiple of 2MB.
+ *
+ * Allocates \a nr_pages_requested and updates the alloc object. This function
+ * does not allocate new pages from the kernel, and therefore will never trigger
+ * the OoM killer. Therefore, it can be run while the vm_lock is held.
+ *
+ * As new pages can not be allocated, the caller must ensure there are
+ * sufficient pages in the pool. Usage of this function should look like :
+ *
+ *   kbase_gpu_vm_lock(kctx);
+ *   kbase_mem_pool_lock(pool)
+ *   while (kbase_mem_pool_size(pool) < pages_required) {
+ *     kbase_mem_pool_unlock(pool)
+ *     kbase_gpu_vm_unlock(kctx);
+ *     kbase_mem_pool_grow(pool)
+ *     kbase_gpu_vm_lock(kctx);
+ *     kbase_mem_pool_lock(pool)
+ *   }
+ *   kbase_alloc_phy_pages_helper_locked(pool)
+ *   kbase_mem_pool_unlock(pool)
+ *   Perform other processing that requires vm_lock...
+ *   kbase_gpu_vm_unlock(kctx);
+ *
+ * This ensures that the pool can be grown to the required size and that the
+ * allocation can complete without another thread using the newly grown pages.
+ *
+ * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then
+ * @pool must be alloc->imported.kctx->lp_mem_pool. Otherwise it must be
+ * alloc->imported.kctx->mem_pool.
+ *
+ * @prealloc_sa shall be set to NULL if it has been consumed by this function.
+ *
+ * Return: Pointer to array of allocated pages. NULL on failure.
+ *
+ * Note : Caller must hold pool->pool_lock
+ */
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+		struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+		size_t nr_pages_requested,
+		struct kbase_sub_alloc **prealloc_sa);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+/**
+ * kbase_free_phy_pages_helper_locked - Free pages allocated with
+ *                                      kbase_alloc_phy_pages_helper_locked()
+ * @alloc:            Allocation object to free pages from
+ * @pool:             Memory pool to return freed pages to
+ * @pages:            Pages allocated by kbase_alloc_phy_pages_helper_locked()
+ * @nr_pages_to_free: Number of physical pages to free
+ *
+ * This function atomically frees pages allocated with
+ * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page
+ * array that is returned by that function. @pool must be the pool that the
+ * pages were originally allocated from.
+ *
+ * If the mem_pool has been unlocked since the allocation then
+ * kbase_free_phy_pages_helper() should be used instead.
+ */
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+		struct kbase_mem_pool *pool, struct tagged_addr *pages,
+		size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_init(struct kbase_context *kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_pool_lock - Lock a memory pool
+ * @pool: Memory pool to lock
+ */
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+/**
+ * kbase_mem_pool_lock - Release a memory pool
+ * @pool: Memory pool to lock
+ */
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc);
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100755
index 0000000..59cc035
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,2382 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+#include <linux/cache.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_ioctl.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va)
+{
+	int zone;
+	struct kbase_va_region *reg;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+
+	dev = kctx->kbdev->dev;
+	*gpu_va = 0; /* return 0 on failure */
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent))
+		goto bad_sizes;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) {
+		/* kbase_check_alloc_sizes() already checks extent is valid for
+		 * assigning to reg->extent */
+		reg->extent = extent;
+	} else {
+		reg->extent = 0;
+	}
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+	reg->initial_commit = commit_pages;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/*
+		 * 10.1-10.4 UKU userland relies on the kernel to call mmap.
+		 * For all other versions we can just return the cookie
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1) ||
+		    kctx->api_version > KBASE_API_VERSION(10, 4)) {
+			*gpu_va = (u64) cookie;
+			return reg;
+		}
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kbase_gpu_vm_lock(kctx);
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+
+		*gpu_va = (u64) cpu_addr;
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_mem:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+invalid_flags:
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_sizes:
+bad_flags:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx,
+		u64 gpu_addr, u64 query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	if (gpu_addr & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		if (kctx->api_version >= KBASE_API_VERSION(11, 2)) {
+			/* Prior to 11.2, these were known about by user-side
+			 * but we did not return them. Returning some of these
+			 * caused certain clients that were not expecting them
+			 * to fail, so we omit all of them as a special-case
+			 * for compatibility reasons */
+			if (KBASE_REG_PF_GROW & reg->flags)
+				*out |= BASE_MEM_GROW_ON_GPF;
+			if (KBASE_REG_SECURE & reg->flags)
+				*out |= BASE_MEM_SECURE;
+		}
+		if (KBASE_REG_TILER_ALIGN_TOP & reg->flags)
+			*out |= BASE_MEM_TILER_ALIGN_TOP;
+
+		WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE,
+				"BASE_MEM_FLAGS_QUERYABLE needs updating\n");
+		*out &= BASE_MEM_FLAGS_QUERYABLE;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->jit_evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->jit_evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	int __maybe_unused new_page_count;
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	int __maybe_unused new_page_count;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	/* This allocation can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	list_del_init(&gpu_alloc->evict_node);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE))
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+	}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+	/* roll back on error */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
+		int fd, u64 *va_pages, u64 *flags, u32 padding)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	if (padding)
+		reg->flags |= KBASE_REG_IMPORT_PAD;
+
+	reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx)
+{
+	u32 cpu_cache_line_size = cache_line_size();
+	u32 gpu_cache_line_size =
+		(1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+
+	return ((cpu_cache_line_size > gpu_cache_line_size) ?
+				cpu_cache_line_size :
+				gpu_cache_line_size);
+}
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	long i;
+	struct kbase_va_region *reg;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
+	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx);
+	struct kbase_alloc_import_user_buf *user_buf;
+	struct page **pages = NULL;
+
+	if ((address & (cache_line_alignment - 1)) != 0 ||
+			(size & (cache_line_alignment - 1)) != 0) {
+		/* Coherency must be enabled to handle partial cache lines */
+		if (*flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+			/* Force coherent system required flag, import will
+			 * then fail if coherency isn't available
+			 */
+			*flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+		} else {
+			dev_warn(kctx->kbdev->dev,
+					"User buffer is not cache line aligned and no coherency enabled\n");
+			goto bad_size;
+		}
+	}
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	}
+
+	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages,
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	user_buf = &reg->gpu_alloc->imported.user_buf;
+
+	user_buf->size = size;
+	user_buf->address = address;
+	user_buf->nr_pages = *va_pages;
+	user_buf->mm = current->mm;
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+	atomic_inc(&current->mm->mm_count);
+#else
+	mmgrab(current->mm);
+#endif
+	user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *),
+			GFP_KERNEL);
+
+	if (!user_buf->pages)
+		goto no_page_array;
+
+	/* If the region is coherent with the CPU then the memory is imported
+	 * and mapped onto the GPU immediately.
+	 * Otherwise get_user_pages is called as a sanity check, but with
+	 * NULL as the pages argument which will fault the pages, but not
+	 * pin them. The memory will then be pinned only around the jobs that
+	 * specify the region as an external resource.
+	 */
+	if (reg->flags & KBASE_REG_SHARE_BOTH) {
+		pages = user_buf->pages;
+		*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+	}
+
+	down_read(&current->mm->mmap_sem);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#else
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#endif
+
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	if (pages) {
+		struct device *dev = kctx->kbdev->dev;
+		unsigned long local_size = user_buf->size;
+		unsigned long offset = user_buf->address & ~PAGE_MASK;
+		struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
+
+		/* Top bit signifies that this was pinned on import */
+		user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
+
+		for (i = 0; i < faulted_pages; i++) {
+			dma_addr_t dma_addr;
+			unsigned long min;
+
+			min = MIN(PAGE_SIZE - offset, local_size);
+			dma_addr = dma_map_page(dev, pages[i],
+					offset, min,
+					DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(dev, dma_addr))
+				goto unwind_dma_map;
+
+			user_buf->dma_addrs[i] = dma_addr;
+			pa[i] = as_tagged(page_to_phys(pages[i]));
+
+			local_size -= min;
+			offset = 0;
+		}
+
+		reg->gpu_alloc->nents = faulted_pages;
+	}
+
+	return reg;
+
+unwind_dma_map:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				user_buf->dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+fault_mismatch:
+	if (pages) {
+		for (i = 0; i < faulted_pages; i++)
+			put_page(pages[i]);
+	}
+no_page_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+	if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
+			kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA))
+		*flags |= BASE_MEM_SAME_VA;
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
+		dev_warn(kctx->kbdev->dev,
+				"padding is only supported for UMM");
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags,
+					padding);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				uptr = compat_ptr(user_buffer.ptr);
+			else
+#endif
+				uptr = u64_to_user_ptr(user_buffer.ptr);
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	struct tagged_addr *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags);
+
+	return ret;
+}
+
+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 gpu_va_start = reg->start_pfn;
+
+	if (new_pages == old_pages)
+		/* Nothing to do */
+		return;
+
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping,
+			(gpu_va_start + new_pages)<<PAGE_SHIFT,
+			(old_pages - new_pages)<<PAGE_SHIFT, 1);
+}
+
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx,
+			reg->start_pfn + new_pages, delta);
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	if (gpu_addr & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+		goto out_unlock;
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE))
+		goto out_unlock;
+
+	/* Would overflow the VA region */
+	if (new_pages > reg->nr_pages)
+		goto out_unlock;
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1)
+		goto out_unlock;
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				res = -ENOMEM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->free_on_close) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#else
+static int kbase_cpu_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+#endif
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+	pgoff_t addr;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	rel_pgoff = vmf->pgoff - map->region->start_pfn;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	i = rel_pgoff;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT);
+#else
+	addr = (pgoff_t)(vmf->address >> PAGE_SHIFT);
+#endif
+	while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) {
+		int ret = vm_insert_pfn(vma, addr << PAGE_SHIFT,
+		    PFN_DOWN(as_phys_addr_t(map->alloc->pages[i])));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+
+		i++; addr++;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	return VM_FAULT_SIGBUS;
+}
+
+const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	struct tagged_addr *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		unsigned long addr = vma->vm_start + aligned_offset;
+		u64 start_off = vma->vm_pgoff - reg->start_pfn +
+			(aligned_offset>>PAGE_SHIFT);
+
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			phys_addr_t phys;
+
+			phys = as_phys_addr_t(page_array[i + start_off]);
+			err = vm_insert_pfn(vma, addr, PFN_DOWN(phys));
+			if (WARN_ON(err))
+				break;
+
+			addr += PAGE_SIZE;
+		}
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->region = reg;
+	map->free_on_close = free_on_close;
+	map->kctx = reg->kctx;
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kbase_device_trace_buffer_install(kctx, tb, size);
+		if (err) {
+			vfree(tb);
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->cpu_alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+static int kbasep_reg_mmap(struct kbase_context *kctx,
+			   struct vm_area_struct *vma,
+			   struct kbase_va_region **regm,
+			   size_t *nr_pages, size_t *aligned_offset)
+
+{
+	int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+	struct kbase_va_region *reg;
+	int err = 0;
+
+	*aligned_offset = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n");
+
+	/* SAME_VA stuff, fetch the right region */
+	reg = kctx->pending_regions[cookie];
+	if (!reg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) {
+		/* incorrect mmap size */
+		/* leave the cookie for a potential later
+		 * mapping, or to be reclaimed later when the
+		 * context is freed */
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) ||
+	    (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) {
+		/* VM flags inconsistent with region flags */
+		err = -EPERM;
+		dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n",
+							__FILE__, __LINE__);
+		goto out;
+	}
+
+	/* adjust down nr_pages to what we have physically */
+	*nr_pages = kbase_reg_current_backed_size(reg);
+
+	if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
+						reg->nr_pages, 1) != 0) {
+		dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
+		/* Unable to map in GPU space. */
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+	/* no need for the cookie anymore */
+	kctx->pending_regions[cookie] = NULL;
+	kctx->cookies |= (1UL << cookie);
+
+	/*
+	 * Overwrite the offset with the region start_pfn, so we effectively
+	 * map from offset 0 in the region. However subtract the aligned
+	 * offset so that when user space trims the mapping the beginning of
+	 * the trimmed VMA has the correct vm_pgoff;
+	 */
+	vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT);
+out:
+	*regm = reg;
+	dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n");
+
+	return err;
+}
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg = NULL;
+	void *kaddr = NULL;
+	size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+
+	if (!(vma->vm_flags & VM_READ))
+		vma->vm_flags &= ~VM_MAYREAD;
+	if (!(vma->vm_flags & VM_WRITE))
+		vma->vm_flags &= ~VM_MAYWRITE;
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		err = kbasep_reg_mmap(kctx, vma, &reg, &nr_pages,
+							&aligned_offset);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+					(u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages -
+					(vma->vm_pgoff - reg->start_pfn))) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			if ((vma->vm_flags & VM_READ &&
+					!(reg->flags & KBASE_REG_CPU_RD)) ||
+					(vma->vm_flags & VM_WRITE &&
+					!(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (KBASE_MEM_TYPE_IMPORTED_UMM ==
+							reg->cpu_alloc->type) {
+				if (0 != (vma->vm_pgoff - reg->start_pfn)) {
+					err = -EINVAL;
+					dev_warn(dev, "%s:%d attempt to do a partial map in a dma_buf: non-zero offset to dma_buf mapping!\n",
+						__FILE__, __LINE__);
+					goto out_unlock;
+				}
+				err = dma_buf_mmap(
+					reg->cpu_alloc->imported.umm.dma_buf,
+					vma, vma->vm_pgoff - reg->start_pfn);
+				goto out_unlock;
+			}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+		} else {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+	} /* default */
+	} /* switch */
+
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+static void kbasep_sync_mem_regions(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map, enum kbase_sync_type dest)
+{
+	size_t i;
+	off_t const offset = (uintptr_t)map->gpu_addr & ~PAGE_MASK;
+	size_t const page_count = PFN_UP(offset + map->size);
+
+	/* Sync first page */
+	size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size);
+	struct tagged_addr cpu_pa = map->cpu_pages[0];
+	struct tagged_addr gpu_pa = map->gpu_pages[0];
+
+	kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, dest);
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		cpu_pa = map->cpu_pages[i];
+		gpu_pa = map->gpu_pages[i];
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, dest);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1) {
+		cpu_pa = map->cpu_pages[page_count - 1];
+		gpu_pa = map->gpu_pages[page_count - 1];
+		sz = ((offset + map->size - 1) & ~PAGE_MASK) + 1;
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, dest);
+	}
+}
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	struct tagged_addr *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = phys_to_page(as_phys_addr_t(page_array[page_index +
+								  i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+	/* Note: enforcing a RO prot_request onto prot is not done, since:
+	 * - CPU-arch-specific integration required
+	 * - kbase_vmap() requires no access checks to be made/enforced */
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) &&
+		!kbase_mem_is_imported(map->gpu_alloc->type);
+
+	if (map->sync_needed)
+		kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	/* 0 is specified for prot_request to indicate no access checks should
+	 * be made.
+	 *
+	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
+	 * CPU-RO mapping is not enforced to allow this to work */
+	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	vunmap(addr);
+
+	if (map->sync_needed)
+		kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
+	map->gpu_addr = 0;
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->sync_needed = false;
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	struct tagged_addr *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	unsigned long attrs = DMA_ATTR_WRITE_COMBINE;
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+	u32 i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	if (kbase_update_region_flags(kctx, reg, flags) != 0)
+		goto invalid_flags;
+
+	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = as_tagged(dma_pa + ((dma_addr_t)i << PAGE_SHIFT));
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+invalid_flags:
+	kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+		       handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100755
index 0000000..a14826e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,355 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+/**
+ * kbase_mem_alloc - Create a new allocation for GPU
+ *
+ * @kctx:         The kernel context
+ * @va_pages:     The number of pages of virtual address space to reserve
+ * @commit_pages: The number of physical pages to allocate upfront
+ * @extent:       The number of extra pages to allocate on each GPU fault which
+ *                grows the region.
+ * @flags:        bitmask of BASE_MEM_* flags to convey special requirements &
+ *                properties for the new allocation.
+ * @gpu_va:       Start address of the memory region which was allocated from GPU
+ *                virtual address space.
+ *
+ * Return: 0 on success or error code
+ */
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va);
+
+/**
+ * kbase_mem_query - Query properties of a GPU memory region
+ *
+ * @kctx:     The kernel context
+ * @gpu_addr: A GPU address contained within the memory region
+ * @query:    The type of query, from KBASE_MEM_QUERY_* flags, which could be
+ *            regarding the amount of backing physical memory allocated so far
+ *            for the region or the size of the region or the flags associated
+ *            with the region.
+ * @out:      Pointer to the location to store the result of query.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query,
+		u64 *const out);
+
+/**
+ * kbase_mem_import - Import the external memory for use by the GPU
+ *
+ * @kctx:     The kernel context
+ * @type:     Type of external memory
+ * @phandle:  Handle to the external memory interpreted as per the type.
+ * @padding:  Amount of extra VA pages to append to the imported buffer
+ * @gpu_va:   GPU address assigned to the imported external memory
+ * @va_pages: Size of the memory region reserved from the GPU address space
+ * @flags:    bitmask of BASE_MEM_* flags to convey special requirements &
+ *            properties for the new allocation representing the external
+ *            memory.
+ * Return: 0 on success or error code
+ */
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+
+/**
+ * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more
+ *                   memory regions
+ *
+ * @kctx:      The kernel context
+ * @flags:     bitmask of BASE_MEM_* flags.
+ * @stride:    Bytes between start of each memory region
+ * @nents:     The number of regions to pack together into the alias
+ * @ai:        Pointer to the struct containing the memory aliasing info
+ * @num_pages: Number of pages the alias will cover
+ *
+ * Return: 0 on failure or otherwise the GPU VA for the alias
+ */
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+
+/**
+ * kbase_mem_flags_change - Change the flags for a memory region
+ *
+ * @kctx:     The kernel context
+ * @gpu_addr: A GPU address contained within the memory region to modify.
+ * @flags:    The new flags to set
+ * @mask:     Mask of the flags, from BASE_MEM_*, to modify.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+
+/**
+ * kbase_mem_commit - Change the physical backing size of a region
+ *
+ * @kctx: The kernel context
+ * @gpu_addr: Handle to the memory region
+ * @new_pages: Number of physical pages to back the region with
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages);
+
+/**
+ * kbase_mmap - Mmap method, gets invoked when mmap system call is issued on
+ *              device file /dev/malixx.
+ * @file: Pointer to the device file /dev/malixx instance.
+ * @vma:  Pointer to the struct containing the info where the GPU allocation
+ *        will be mapped in virtual address space of CPU.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	u64 gpu_addr;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	struct tagged_addr *cpu_pages;
+	struct tagged_addr *gpu_pages;
+	void *addr;
+	size_t size;
+	bool sync_needed;
+};
+
+
+/**
+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the
+ * requested access permissions are supported
+ * @kctx:         Context the VA range belongs to
+ * @gpu_addr:     Start address of VA range
+ * @size:         Size of VA range
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map:          Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check
+ * whether the region should allow the intended access, and return an error if
+ * disallowed. This is essential for security of imported memory, particularly
+ * a user buf from SHM mapped into the process as RO. In that case, write
+ * access must be checked if the intention is for kernel to write to the
+ * memory.
+ *
+ * The checks are also there to help catch access errors on memory where
+ * security is not a concern: imported memory that is always RW, and memory
+ * that was allocated and owned by the process attached to @kctx. In this case,
+ * it helps to identify memory that was was mapped with the wrong access type.
+ *
+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
+ * where either the security of memory is solely dependent on those flags, or
+ * when userspace code was expecting only the GPU to access the memory (e.g. HW
+ * workarounds).
+ *
+ * All cache maintenance operations shall be ignored if the
+ * memory region has been imported.
+ *
+ */
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vmap - Map a GPU VA range into the kernel safely
+ * @kctx:     Context the VA range belongs to
+ * @gpu_addr: Start address of VA range
+ * @size:     Size of VA range
+ * @map:      Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no
+ * checks to ensure the security of e.g. imported user bufs from RO SHM.
+ *
+ * Note: All cache maintenance operations shall be ignored if the memory region
+ * has been imported.
+ */
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vunmap - Unmap a GPU VA range from the kernel
+ * @kctx: Context the VA range belongs to
+ * @map:  Structure describing the mapping from the corresponding kbase_vmap()
+ *        call
+ *
+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained
+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * The reference taken on pages during kbase_vmap() is released.
+ *
+ * Note: All cache maintenance operations shall be ignored if the memory region
+ * has been imported.
+ */
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+extern const struct vm_operations_struct kbase_vm_ops;
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ */
+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100755
index 0000000..6581ecf
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+struct tagged_addr { phys_addr_t tagged_addr; };
+
+#define HUGE_PAGE    (1u << 0)
+#define HUGE_HEAD    (1u << 1)
+#define FROM_PARTIAL (1u << 2)
+
+/*
+ * Note: if macro for converting physical address to page is not defined
+ * in the kernel itself, it is defined hereby. This is to avoid build errors
+ * which are reported during builds for some architectures.
+ */
+#ifndef phys_to_page
+#define phys_to_page(phys)	(pfn_to_page((phys) >> PAGE_SHIFT))
+#endif
+
+/**
+ * as_phys_addr_t - Retrieve the physical address from tagged address by
+ *                  masking the lower order 12 bits.
+ * @t: tagged address to be translated.
+ *
+ * Return: physical address corresponding to tagged address.
+ */
+static inline phys_addr_t as_phys_addr_t(struct tagged_addr t)
+{
+	return t.tagged_addr & PAGE_MASK;
+}
+
+/**
+ * as_tagged - Convert the physical address to tagged address type though
+ *             there is no tag info present, the lower order 12 bits will be 0
+ * @phys: physical address to be converted to tagged type
+ *
+ * This is used for 4KB physical pages allocated by the Driver or imported pages
+ * and is needed as physical pages tracking object stores the reference for
+ * physical pages using tagged address type in lieu of the type generally used
+ * for physical addresses.
+ *
+ * Return: address of tagged address type.
+ */
+static inline struct tagged_addr as_tagged(phys_addr_t phys)
+{
+	struct tagged_addr t;
+
+	t.tagged_addr = phys & PAGE_MASK;
+	return t;
+}
+
+/**
+ * as_tagged_tag - Form the tagged address by storing the tag or metadata in the
+ *                 lower order 12 bits of physial address
+ * @phys: physical address to be converted to tagged address
+ * @tag:  tag to be stored along with the physical address.
+ *
+ * The tag info is used while freeing up the pages
+ *
+ * Return: tagged address storing physical address & tag.
+ */
+static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag)
+{
+	struct tagged_addr t;
+
+	t.tagged_addr = (phys & PAGE_MASK) | (tag & ~PAGE_MASK);
+	return t;
+}
+
+/**
+ * is_huge - Check if the physical page is one of the 512 4KB pages of the
+ *           large page which was not split to be used partially
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page belongs to large page, or false
+ */
+static inline bool is_huge(struct tagged_addr t)
+{
+	return t.tagged_addr & HUGE_PAGE;
+}
+
+/**
+ * is_huge_head - Check if the physical page is the first 4KB page of the
+ *                512 4KB pages within a large page which was not split
+ *                to be used partially
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page is the first page of a large page, or false
+ */
+static inline bool is_huge_head(struct tagged_addr t)
+{
+	int mask = HUGE_HEAD | HUGE_PAGE;
+
+	return mask == (t.tagged_addr & mask);
+}
+
+/**
+ * is_partial - Check if the physical page is one of the 512 pages of the
+ *              large page which was split in 4KB pages to be used
+ *              partially for allocations >= 2 MB in size.
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page was taken from large page used partially, or false
+ */
+static inline bool is_partial(struct tagged_addr t)
+{
+	return t.tagged_addr & FROM_PARTIAL;
+}
+
+#endif /* _KBASE_LOWLEVEL_H */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100755
index 0000000..1255df0
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,834 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			(PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	int i;
+
+	for (i = 0; i < (1U << pool->order); i++)
+		clear_highpage(p+i);
+
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr;
+	int i;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	/* don't warn on higer order failures */
+	if (pool->order)
+		gfp |= __GFP_NOWARN;
+
+	p = alloc_pages(gfp, pool->order);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order),
+				DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		__free_pages(p, pool->order);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+	for (i = 0; i < (1u << pool->order); i++)
+		kbase_set_dma_addr(p+i, dma_addr + PAGE_SIZE * i);
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+	int i;
+
+	dma_unmap_page(dev, dma_addr, (PAGE_SIZE << pool->order),
+		       DMA_BIDIRECTIONAL);
+	for (i = 0; i < (1u << pool->order); i++)
+		kbase_clear_dma_addr(p+i);
+	__free_pages(p, pool->order);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->dont_reclaim = true;
+	for (i = 0; i < nr_to_grow; i++) {
+		if (pool->dying) {
+			pool->dont_reclaim = false;
+			kbase_mem_pool_shrink_locked(pool, nr_to_grow);
+			kbase_mem_pool_unlock(pool);
+
+			return -ENOMEM;
+		}
+		kbase_mem_pool_unlock(pool);
+
+		p = kbase_mem_alloc_page(pool);
+		if (!p) {
+			kbase_mem_pool_lock(pool);
+			pool->dont_reclaim = false;
+			kbase_mem_pool_unlock(pool);
+
+			return -ENOMEM;
+		}
+
+		kbase_mem_pool_lock(pool);
+		kbase_mem_pool_add_locked(pool, p);
+	}
+	pool->dont_reclaim = false;
+	kbase_mem_pool_unlock(pool);
+
+	return 0;
+}
+
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+	int err = 0;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size > pool->max_size)
+		new_size = pool->max_size;
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		err = kbase_mem_pool_grow(pool, new_size - cur_size);
+
+	if (err) {
+		size_t grown_size = kbase_mem_pool_size(pool);
+
+		dev_warn(pool->kbdev->dev,
+			 "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n",
+			 (new_size - cur_size), (grown_size - cur_size));
+	}
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	size_t pool_size;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	kbase_mem_pool_lock(pool);
+	if (pool->dont_reclaim && !pool->dying) {
+		kbase_mem_pool_unlock(pool);
+		return 0;
+	}
+	pool_size = kbase_mem_pool_size(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return pool_size;
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	kbase_mem_pool_lock(pool);
+	if (pool->dont_reclaim && !pool->dying) {
+		kbase_mem_pool_unlock(pool);
+		return 0;
+	}
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan);
+
+	kbase_mem_pool_unlock(pool);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		size_t order,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	pool->cur_size = 0;
+	pool->max_size = max_size;
+	pool->order = order;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+	pool->dying = false;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool)
+{
+	kbase_mem_pool_lock(pool);
+	pool->dying = true;
+	kbase_mem_pool_unlock(pool);
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			kbase_mem_pool_zero_page(pool, p);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	do {
+		pool_dbg(pool, "alloc()\n");
+		p = kbase_mem_pool_remove(pool);
+
+		if (p)
+			return p;
+
+		pool = pool->next_pool;
+	} while (pool);
+
+	return NULL;
+}
+
+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	pool_dbg(pool, "alloc_locked()\n");
+	p = kbase_mem_pool_remove_locked(pool);
+
+	if (p)
+		return p;
+
+	return NULL;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	pool_dbg(pool, "free_locked()\n");
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add_locked(pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
+		struct tagged_addr *pages, bool partial_allowed)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i = 0;
+	int err = -ENOMEM;
+	size_t nr_pages_internal;
+
+	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
+
+	if (nr_pages_internal * (1u << pool->order) != nr_4k_pages)
+		return -EINVAL;
+
+	pool_dbg(pool, "alloc_pages(4k=%zu):\n", nr_4k_pages);
+	pool_dbg(pool, "alloc_pages(internal=%zu):\n", nr_pages_internal);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
+	while (nr_from_pool--) {
+		int j;
+		p = kbase_mem_pool_remove_locked(pool);
+		if (pool->order) {
+			pages[i++] = as_tagged_tag(page_to_phys(p),
+						   HUGE_HEAD | HUGE_PAGE);
+			for (j = 1; j < (1u << pool->order); j++)
+				pages[i++] = as_tagged_tag(page_to_phys(p) +
+							   PAGE_SIZE * j,
+							   HUGE_PAGE);
+		} else {
+			pages[i++] = as_tagged(page_to_phys(p));
+		}
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_4k_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_4k_pages - i, pages + i, partial_allowed);
+
+		if (err < 0)
+			goto err_rollback;
+
+		i += err;
+	} else {
+		/* Get any remaining pages from kernel */
+		while (i != nr_4k_pages) {
+			p = kbase_mem_alloc_page(pool);
+			if (!p) {
+				if (partial_allowed)
+					goto done;
+				else
+					goto err_rollback;
+			}
+
+			if (pool->order) {
+				int j;
+
+				pages[i++] = as_tagged_tag(page_to_phys(p),
+							   HUGE_PAGE |
+							   HUGE_HEAD);
+				for (j = 1; j < (1u << pool->order); j++) {
+					phys_addr_t phys;
+
+					phys = page_to_phys(p) + PAGE_SIZE * j;
+					pages[i++] = as_tagged_tag(phys,
+								   HUGE_PAGE);
+				}
+			} else {
+				pages[i++] = as_tagged(page_to_phys(p));
+			}
+		}
+	}
+
+done:
+	pool_dbg(pool, "alloc_pages(%zu) done\n", i);
+	return i;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_4k_pages, struct tagged_addr *pages)
+{
+	struct page *p;
+	size_t i;
+	size_t nr_pages_internal;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
+
+	if (nr_pages_internal * (1u << pool->order) != nr_4k_pages)
+		return -EINVAL;
+
+	pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages);
+	pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n",
+			nr_pages_internal);
+
+	if (kbase_mem_pool_size(pool) < nr_pages_internal) {
+		pool_dbg(pool, "Failed alloc\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_pages_internal; i++) {
+		int j;
+
+		p = kbase_mem_pool_remove_locked(pool);
+		if (pool->order) {
+			*pages++ = as_tagged_tag(page_to_phys(p),
+						   HUGE_HEAD | HUGE_PAGE);
+			for (j = 1; j < (1u << pool->order); j++) {
+				*pages++ = as_tagged_tag(page_to_phys(p) +
+							   PAGE_SIZE * j,
+							   HUGE_PAGE);
+			}
+		} else {
+			*pages++ = as_tagged(page_to_phys(p));
+		}
+	}
+
+	return nr_4k_pages;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+				     size_t nr_pages, struct tagged_addr *pages,
+				     bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
+			p = phys_to_page(as_phys_addr_t(pages[i]));
+			if (zero)
+				kbase_mem_pool_zero_page(pool, p);
+			else if (sync)
+				kbase_mem_pool_sync_page(pool, p);
+
+			list_add(&p->lru, &new_page_list);
+			nr_to_pool++;
+		}
+		pages[i] = as_tagged(0);
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages,
+		bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
+			p = phys_to_page(as_phys_addr_t(pages[i]));
+			if (zero)
+				kbase_mem_pool_zero_page(pool, p);
+			else if (sync)
+				kbase_mem_pool_sync_page(pool, p);
+
+			list_add(&p->lru, &new_page_list);
+			nr_to_pool++;
+		}
+		pages[i] = as_tagged(0);
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge(pages[i]) && !is_huge_head(pages[i])) {
+			pages[i] = as_tagged(0);
+			continue;
+		}
+
+		p = phys_to_page(as_phys_addr_t(pages[i]));
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = as_tagged(0);
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
+
+
+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages, bool dirty,
+		bool reclaimed)
+{
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false,
+				dirty);
+
+		i += nr_to_pool;
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge(pages[i]) && !is_huge_head(pages[i])) {
+			pages[i] = as_tagged(0);
+			continue;
+		}
+
+		p = phys_to_page(as_phys_addr_t(pages[i]));
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = as_tagged(0);
+	}
+
+	pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100644
index 0000000..4b4eeb3
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,93 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <mali_kbase_mem_pool_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_trim(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
+		kbase_mem_pool_debugfs_size_get,
+		kbase_mem_pool_debugfs_size_set,
+		"%llu\n");
+
+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_max_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_set_max_size(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
+		kbase_mem_pool_debugfs_max_size_get,
+		kbase_mem_pool_debugfs_max_size_set,
+		"%llu\n");
+
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool,
+		struct kbase_mem_pool *lp_pool)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_max_size_fops);
+
+	debugfs_create_file("lp_mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			lp_pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("lp_mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			lp_pool, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100644
index 0000000..990d91c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H
+#define _KBASE_MEM_POOL_DEBUGFS_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool
+ * @parent:  Parent debugfs dentry
+ * @pool:    Memory pool of small pages to control
+ * @lp_pool: Memory pool of large pages to control
+ *
+ * Adds four debugfs files under @parent:
+ * - mem_pool_size: get/set the current size of @pool
+ * - mem_pool_max_size: get/set the max size of @pool
+ * - lp_mem_pool_size: get/set the current size of @lp_pool
+ * - lp_mem_pool_max_size: get/set the max size of @lp_pool
+ */
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool,
+		struct kbase_mem_pool *lp_pool);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100644
index 0000000..d4f8433
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,126 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+		kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		if (!debugfs_create_file("mem_profile", S_IRUGO,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kbase_ctx_flag_set(kctx,
+					   KCTX_MEM_PROFILE_INITIALIZED);
+		}
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	} else {
+		kfree(data);
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+		err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100644
index 0000000..1462247
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100755
index 0000000..7f44d81
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \
+	((size_t) (64 + ((80 + (56 * 64)) * 31) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100755
index 0000000..a998930
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,2437 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_time.h>
+#include <mali_kbase_mem.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ *
+ * If sync is not set then transactions still in flight when the flush is issued
+ * may use the old page tables and the data they write will not be written out
+ * to memory, this function returns after the flush has been issued but
+ * before all accesses which might effect the flushed region have completed.
+ *
+ * If sync is set then accesses in the flushed region will be drained
+ * before data is flush and invalidated through L1, L2 and into memory,
+ * after which point this function will return.
+ */
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync);
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+/**
+ * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
+ *                               a region on a GPU page fault
+ *
+ * @reg:           The region that will be backed with more pages
+ * @fault_rel_pfn: PFN of the fault relative to the start of the region
+ *
+ * This calculates how much to increase the backing of a region by, based on
+ * where a GPU page fault occurred and the flags in the region.
+ *
+ * This can be more than the minimum number of pages that would reach
+ * @fault_rel_pfn, for example to reduce the overall rate of page fault
+ * interrupts on a region, or to ensure that the end address is aligned.
+ *
+ * Return: the number of backed pages to increase by
+ */
+static size_t reg_grow_calc_extra_pages(struct kbase_va_region *reg, size_t fault_rel_pfn)
+{
+	size_t multiple = reg->extent;
+	size_t reg_current_size = kbase_reg_current_backed_size(reg);
+	size_t minimum_extra = fault_rel_pfn - reg_current_size + 1;
+	size_t remainder;
+
+	if (!multiple) {
+		dev_warn(reg->kctx->kbdev->dev,
+				"VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
+				((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
+		return minimum_extra;
+	}
+
+	/* Calculate the remainder to subtract from minimum_extra to make it
+	 * the desired (rounded down) multiple of the extent.
+	 * Depending on reg's flags, the base used for calculating multiples is
+	 * different */
+	if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
+		/* multiple is based from the top of the initial commit, which
+		 * has been allocated in such a way that (start_pfn +
+		 * initial_commit) is already aligned to multiple. Hence the
+		 * pfn for the end of committed memory will also be aligned to
+		 * multiple */
+		size_t initial_commit = reg->initial_commit;
+
+		if (fault_rel_pfn < initial_commit) {
+			/* this case is just to catch in case it's been
+			 * recommitted by userspace to be smaller than the
+			 * initial commit */
+			minimum_extra = initial_commit - reg_current_size;
+			remainder = 0;
+		} else {
+			/* same as calculating (fault_rel_pfn - initial_commit + 1) */
+			size_t pages_after_initial = minimum_extra + reg_current_size - initial_commit;
+
+			remainder = pages_after_initial % multiple;
+		}
+	} else {
+		/* multiple is based from the current backed size, even if the
+		 * current backed size/pfn for end of committed memory are not
+		 * themselves aligned to multiple */
+		remainder = minimum_extra % multiple;
+	}
+
+	if (remainder == 0)
+		return minimum_extra;
+
+	return minimum_extra + multiple - remainder;
+}
+
+#ifdef CONFIG_MALI_JOB_DUMP
+static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_context *kctx,
+				struct kbase_device *kbdev,
+				struct kbase_as *faulting_as,
+				u64 start_pfn, size_t nr, u32 op)
+{
+	mutex_lock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE);
+	kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx, start_pfn,
+			nr, op, 1);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE);
+}
+
+static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
+			struct kbase_as *faulting_as)
+{
+	struct kbasep_gwt_list_element *pos;
+	struct kbase_va_region *region;
+	struct kbase_device *kbdev;
+	u64 fault_pfn, pfn_offset;
+	u32 op;
+	int ret;
+	int as_no;
+
+	as_no = faulting_as->number;
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Find region and check if it should be writable. */
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		return;
+	}
+
+	if (!(region->flags & KBASE_REG_GPU_WR)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Region does not have write permissions");
+		return;
+	}
+
+	/* Capture addresses of faulting write location
+	 * for job dumping if write tracking is enabled.
+	 */
+	if (kctx->gwt_enabled) {
+		u64 page_addr = faulting_as->fault_addr & PAGE_MASK;
+		bool found = false;
+		/* Check if this write was already handled. */
+		list_for_each_entry(pos, &kctx->gwt_current_list, link) {
+			if (page_addr == pos->page_addr) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found) {
+			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+			if (pos) {
+				pos->region = region;
+				pos->page_addr = page_addr;
+				pos->num_pages = 1;
+				list_add(&pos->link, &kctx->gwt_current_list);
+			} else {
+				dev_warn(kbdev->dev, "kmalloc failure");
+			}
+		}
+	}
+
+	pfn_offset = fault_pfn - region->start_pfn;
+	/* Now make this faulting page writable to GPU. */
+	ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				1, region->flags);
+
+	/* flush L2 and unlock the VA (resumes the MMU) */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+		op = AS_COMMAND_FLUSH;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	kbase_gpu_mmu_handle_write_faulting_as(kctx, kbdev, faulting_as,
+			fault_pfn, 1, op);
+
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
+			struct kbase_as	*faulting_as)
+{
+	u32 fault_status;
+
+	fault_status = faulting_as->fault_status;
+
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		kbase_gpu_mmu_handle_write_fault(kctx, faulting_as);
+		break;
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Execute Permission fault");
+		break;
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Read Permission fault");
+		break;
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown Permission fault");
+		break;
+	}
+}
+#endif
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+	size_t min_pool_size;
+	struct kbase_mem_pool *pool;
+	int pages_to_grow;
+	struct tagged_addr *gpu_pages, *cpu_pages;
+	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
+	int i;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	if (unlikely(faulting_as->protected_mode)) {
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Protected mode fault");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+
+		goto fault_done;
+	}
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+#ifdef CONFIG_MALI_JOB_DUMP
+		/* If GWT was ever enabled then we need to handle
+		 * write fault pages even if the feature was disabled later.
+		 */
+		if (kctx->gwt_was_enabled) {
+			kbase_gpu_mmu_handle_permission_fault(kctx,
+							faulting_as);
+			goto fault_done;
+		}
+#endif
+
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Translation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		else
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Unknown fault code");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		else
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Unknown fault code");
+		goto fault_done;
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+page_fault_retry:
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Preallocate memory for the sub-allocation structs if necessary */
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+		if (!prealloc_sas[i]) {
+			prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]),
+					GFP_KERNEL);
+			if (!prealloc_sas[i]) {
+				kbase_mmu_report_fault_and_kill(
+						kctx, faulting_as,
+						"Failed pre-allocating memory for sub-allocations' metadata");
+				goto fault_done;
+			}
+		}
+	}
+#endif /* CONFIG_MALI_2MB_ALLOC */
+
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"DMA-BUF is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = reg_grow_calc_extra_pages(region, fault_rel_pfn);
+
+	/* cap to max vsize */
+	new_pages = min(new_pages, region->nr_pages - kbase_reg_current_backed_size(region));
+
+	if (0 == new_pages) {
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (new_pages >= (SZ_2M / SZ_4K)) {
+		pool = &kctx->lp_mem_pool;
+		/* Round up to number of 2 MB pages required */
+		min_pool_size = new_pages + ((SZ_2M / SZ_4K) - 1);
+		min_pool_size /= (SZ_2M / SZ_4K);
+	} else {
+#endif
+		pool = &kctx->mem_pool;
+		min_pool_size = new_pages;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	if (region->gpu_alloc != region->cpu_alloc)
+		min_pool_size *= 2;
+
+	pages_to_grow = 0;
+
+	mutex_lock(&kctx->mem_partials_lock);
+	kbase_mem_pool_lock(pool);
+	/* We can not allocate memory from the kernel with the vm_lock held, so
+	 * check that there is enough memory in the pool. If not then calculate
+	 * how much it has to grow by, grow the pool when the vm_lock is
+	 * dropped, and retry the allocation.
+	 */
+	if (kbase_mem_pool_size(pool) >= min_pool_size) {
+		gpu_pages = kbase_alloc_phy_pages_helper_locked(
+				region->gpu_alloc, pool, new_pages,
+				&prealloc_sas[0]);
+
+		if (gpu_pages) {
+			if (region->gpu_alloc != region->cpu_alloc) {
+				cpu_pages = kbase_alloc_phy_pages_helper_locked(
+						region->cpu_alloc, pool,
+						new_pages, &prealloc_sas[1]);
+
+				if (cpu_pages) {
+					grown = true;
+				} else {
+					kbase_free_phy_pages_helper_locked(
+							region->gpu_alloc,
+							pool, gpu_pages,
+							new_pages);
+				}
+			} else {
+				grown = true;
+			}
+		}
+	} else {
+		pages_to_grow = min_pool_size - kbase_mem_pool_size(pool);
+	}
+	kbase_mem_pool_unlock(pool);
+	mutex_unlock(&kctx->mem_partials_lock);
+
+	if (grown) {
+		u64 pfn_offset;
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
+		/*
+		 * Note:
+		 * Issuing an MMU operation will unlock the MMU and cause the
+		 * translation to be replayed. If the page insertion fails then
+		 * rather then trying to continue the context should be killed
+		 * so the no_flush version of insert_pages is used which allows
+		 * us to unlock the MMU as we see fit.
+		 */
+		err = kbase_mmu_insert_pages_no_flush(kctx,
+				region->start_pfn + pfn_offset,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				new_pages, region->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+		KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+#ifdef CONFIG_MALI_JOB_DUMP
+		if (kctx->gwt_enabled) {
+			/* GWT also tracks growable regions. */
+			struct kbasep_gwt_list_element *pos;
+
+			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+			if (pos) {
+				pos->region = region;
+				pos->page_addr = (region->start_pfn +
+							pfn_offset) <<
+							 PAGE_SHIFT;
+				pos->num_pages = new_pages;
+				list_add(&pos->link,
+					&kctx->gwt_current_list);
+			} else {
+				dev_warn(kbdev->dev, "kmalloc failure");
+			}
+		}
+#endif
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		int ret = -ENOMEM;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* If the memory pool was insufficient then grow it and retry.
+		 * Otherwise fail the allocation.
+		 */
+		if (pages_to_grow > 0)
+			ret = kbase_mem_pool_grow(pool, pages_to_grow);
+
+		if (ret < 0) {
+			/* failed to extend, handle as a normal PF */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page allocation failure");
+		} else {
+			goto page_fault_retry;
+		}
+	}
+
+fault_done:
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
+		kfree(prealloc_sas[i]);
+
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	p = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!p)
+		goto sub_pages;
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
+ * new table from the pool if needed and possible
+ */
+static int mmu_get_next_pgd(struct kbase_context *kctx,
+		phys_addr_t *pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(*pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(*pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return -EINVAL;
+	}
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return -ENOMEM;
+		}
+
+		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	*pgd = target_pgd;
+
+	return 0;
+}
+
+/*
+ * Returns the PGD for the specified level of translation
+ */
+static int mmu_get_pgd_at_level(struct kbase_context *kctx,
+					u64 vpfn,
+					unsigned int level,
+					phys_addr_t *out_pgd)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
+		int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l);
+		/* Handle failure condition */
+		if (err) {
+			dev_dbg(kctx->kbdev->dev,
+				 "%s: mmu_get_next_pgd failure at level %d\n",
+				 __func__, l);
+			return err;
+		}
+	}
+
+	*out_pgd = pgd;
+
+	return 0;
+}
+
+#define mmu_get_bottom_pgd(kctx, vpfn, out_pgd) \
+	mmu_get_pgd_at_level((kctx), (vpfn), MIDGARD_MMU_BOTTOMLEVEL, (out_pgd))
+
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx,
+					      u64 from_vpfn, u64 to_vpfn)
+{
+	phys_addr_t pgd;
+	u64 vpfn = from_vpfn;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+	KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (vpfn < to_vpfn) {
+		unsigned int i;
+		unsigned int idx = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
+		unsigned int pcount = 0;
+		unsigned int left = to_vpfn - vpfn;
+		unsigned int level;
+		u64 *page;
+
+		if (count > left)
+			count = left;
+
+		/* need to check if this is a 2MB page or a 4kB */
+		pgd = kctx->pgd;
+
+		for (level = MIDGARD_MMU_TOPLEVEL;
+				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
+			idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+			page = kmap(phys_to_page(pgd));
+			if (mmu_mode->ate_is_valid(page[idx], level))
+				break; /* keep the mapping */
+			kunmap(phys_to_page(pgd));
+			pgd = mmu_mode->pte_to_phy_addr(page[idx]);
+		}
+
+		switch (level) {
+		case MIDGARD_MMU_LEVEL(2):
+			/* remap to single entry to update */
+			pcount = 1;
+			break;
+		case MIDGARD_MMU_BOTTOMLEVEL:
+			/* page count is the same as the logical count */
+			pcount = count;
+			break;
+		default:
+			dev_warn(kctx->kbdev->dev, "%sNo support for ATEs at level %d\n",
+			       __func__, level);
+			goto next;
+		}
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < pcount; i++)
+			mmu_mode->entry_invalidate(&page[idx + i]);
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				   kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
+				   8 * pcount);
+		kunmap(phys_to_page(pgd));
+
+next:
+		vpfn += count;
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_vpfn +
+								  recover_count
+								  );
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_vpfn +
+								  recover_count
+								  );
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			/* Fail if the current page is a valid ATE entry */
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+
+			mmu_mode->entry_set_ate(&pgd_page[ofs],
+						phys, flags,
+						MIDGARD_MMU_BOTTOMLEVEL);
+		}
+
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+static inline void cleanup_empty_pte(struct kbase_context *kctx, u64 *pte)
+{
+	phys_addr_t tmp_pgd;
+	struct page *tmp_p;
+
+	tmp_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(*pte);
+	tmp_p = phys_to_page(tmp_pgd);
+	kbase_mem_pool_free(&kctx->mem_pool, tmp_p, false);
+	kbase_process_page_usage_dec(kctx, 1);
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+}
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx,
+				    const u64 start_vpfn,
+				    struct tagged_addr *phys, size_t nr,
+				    unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	u64 insert_vpfn = start_vpfn;
+	size_t remain = nr;
+	int err;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(start_vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int vindex = insert_vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
+		struct page *p;
+		unsigned int cur_level;
+
+		if (count > remain)
+			count = remain;
+
+		if (!vindex && is_huge_head(*phys))
+			cur_level = MIDGARD_MMU_LEVEL(2);
+		else
+			cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+
+		/*
+		 * Repeatedly calling mmu_get_pgd_at_level() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_pgd_at_level(kctx, insert_vpfn, cur_level,
+						   &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					cur_level);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+
+		if (err) {
+			dev_warn(kctx->kbdev->dev,
+				 "%s: mmu_get_bottom_pgd failure\n", __func__);
+			if (insert_vpfn != start_vpfn) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  start_vpfn,
+								  insert_vpfn);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "%s: kmap failure\n",
+				 __func__);
+			if (insert_vpfn != start_vpfn) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  start_vpfn,
+								  insert_vpfn);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
+			unsigned int level_index = (insert_vpfn >> 9) & 0x1FF;
+			u64 *target = &pgd_page[level_index];
+
+			if (mmu_mode->pte_is_valid(*target, cur_level))
+				cleanup_empty_pte(kctx, target);
+			mmu_mode->entry_set_ate(target, *phys, flags,
+						cur_level);
+		} else {
+			for (i = 0; i < count; i++) {
+				unsigned int ofs = vindex + i;
+				u64 *target = &pgd_page[ofs];
+
+				/* Fail if the current page is a valid ATE entry
+				 * unless gwt_was_enabled as in that case all
+				 * pages will be valid from when
+				 * kbase_gpu_gwt_start() cleared the gpu
+				 * write flag.
+				 */
+#ifdef CONFIG_MALI_JOB_DUMP
+				if (!kctx->gwt_was_enabled)
+#endif
+					KBASE_DEBUG_ASSERT
+						(0 == (*target & 1UL));
+				kctx->kbdev->mmu_mode->entry_set_ate(target,
+						phys[i], flags, cur_level);
+			}
+		}
+
+		phys += count;
+		insert_vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (vindex * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  struct tagged_addr *phys, size_t nr,
+				  unsigned long flags)
+{
+	int err;
+
+	err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
+ */
+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	u32 op;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+				&kbdev->as[kctx->as_nr],
+				kctx, vpfn, nr, op, 0);
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to
+		 * recover */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * As this function could be called in interrupt context the sync
+	 * request can't block. Instead log the request and the next flush
+	 * request will pick it up.
+	 */
+	if ((!err) && sync &&
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
+		atomic_set(&kctx->drain_pending, 1);
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+#ifndef CONFIG_MALI_NO_MALI
+	bool drain_pending = false;
+
+	if (atomic_xchg(&kctx->drain_pending, 0))
+		drain_pending = true;
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	kbdev = kctx->kbdev;
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		if (!kbase_pm_context_active_handle_suspend(kbdev,
+			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+			int err;
+			u32 op;
+
+			/* AS transaction begin */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+
+			if (sync)
+				op = AS_COMMAND_FLUSH_MEM;
+			else
+				op = AS_COMMAND_FLUSH_PT;
+
+			err = kbase_mmu_hw_do_operation(kbdev,
+						&kbdev->as[kctx->as_nr],
+						kctx, vpfn, nr, op, 0);
+
+#if KBASE_GPU_RESET_EN
+			if (err) {
+				/* Flush failed to complete, assume the
+				 * GPU has hung and perform a reset to
+				 * recover */
+				dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+				if (kbase_prepare_to_reset_gpu(kbdev))
+					kbase_reset_gpu(kbdev);
+			}
+#endif /* KBASE_GPU_RESET_EN */
+
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+			/* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+			/*
+			 * The transaction lock must be dropped before here
+			 * as kbase_wait_write_flush could take it if
+			 * the GPU was powered down (static analysis doesn't
+			 * know this can't happen).
+			 */
+			drain_pending |= (!err) && sync &&
+					kbase_hw_has_issue(kctx->kbdev,
+							BASE_HW_ISSUE_6367);
+			if (drain_pending) {
+				/* Wait for GPU to flush write buffer */
+				kbase_wait_write_flush(kctx);
+			}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+			kbase_pm_context_idle(kbdev);
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
+	 */
+	kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err = -EFAULT;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		unsigned int pcount;
+		unsigned int level;
+		u64 *page;
+
+		if (count > nr)
+			count = nr;
+
+		/* need to check if this is a 2MB or a 4kB page */
+		pgd = kctx->pgd;
+
+		for (level = MIDGARD_MMU_TOPLEVEL;
+				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
+			phys_addr_t next_pgd;
+
+			index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+			page = kmap(phys_to_page(pgd));
+			if (mmu_mode->ate_is_valid(page[index], level))
+				break; /* keep the mapping */
+			else if (!mmu_mode->pte_is_valid(page[index], level)) {
+				/* nothing here, advance */
+				switch (level) {
+				case MIDGARD_MMU_LEVEL(0):
+					count = 134217728;
+					break;
+				case MIDGARD_MMU_LEVEL(1):
+					count = 262144;
+					break;
+				case MIDGARD_MMU_LEVEL(2):
+					count = 512;
+					break;
+				case MIDGARD_MMU_LEVEL(3):
+					count = 1;
+					break;
+				}
+				if (count > nr)
+					count = nr;
+				goto next;
+			}
+			next_pgd = mmu_mode->pte_to_phy_addr(page[index]);
+			kunmap(phys_to_page(pgd));
+			pgd = next_pgd;
+		}
+
+		switch (level) {
+		case MIDGARD_MMU_LEVEL(0):
+		case MIDGARD_MMU_LEVEL(1):
+			dev_warn(kctx->kbdev->dev,
+				 "%s: No support for ATEs at level %d\n",
+				 __func__, level);
+			kunmap(phys_to_page(pgd));
+			goto out;
+		case MIDGARD_MMU_LEVEL(2):
+			/* can only teardown if count >= 512 */
+			if (count >= 512) {
+				pcount = 1;
+			} else {
+				dev_warn(kctx->kbdev->dev,
+					 "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
+					 __func__, count);
+				pcount = 0;
+			}
+			break;
+		case MIDGARD_MMU_BOTTOMLEVEL:
+			/* page count is the same as the logical count */
+			pcount = count;
+			break;
+		default:
+			dev_err(kctx->kbdev->dev,
+				"%s: found non-mapped memory, early out\n",
+				__func__);
+			vpfn += count;
+			nr -= count;
+			continue;
+		}
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < pcount; i++)
+			mmu_mode->entry_invalidate(&page[index + i]);
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				   kbase_dma_addr(phys_to_page(pgd)) +
+				   8 * index, 8*pcount);
+
+next:
+		kunmap(phys_to_page(pgd));
+		vpfn += count;
+		nr -= count;
+	}
+	err = 0;
+out:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr *phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev,
+				 "mmu_get_bottom_pgd failure\n");
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+						flags, MIDGARD_MMU_BOTTOMLEVEL);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags)
+{
+	int err;
+
+	err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, true);
+	return err;
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd,
+			       int level, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	struct page *p;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize
+	 * kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
+				mmu_teardown_level(kctx,
+						   target_pgd,
+						   level + 1,
+						   pgd_page_buffer +
+						   (PAGE_SIZE / sizeof(u64)));
+			}
+		}
+	}
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	kbase_mem_pool_free(&kctx->mem_pool, p, true);
+	kbase_process_page_usage_dec(kctx, 1);
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	mutex_init(&kctx->mmu_lock);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	int new_page_count = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	mutex_lock(&kctx->mmu_lock);
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL,
+			   kctx->mmu_teardown_pages);
+	mutex_unlock(&kctx->mmu_lock);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	mutex_lock(&kctx->mmu_lock);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t dump_size, size = 0;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+			size += sizeof(config);
+		}
+
+		dump_size = kbasep_mmu_dump_level(kctx,
+				kctx->pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!dump_size)
+			goto fail_free;
+
+		size += dump_size;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+
+		if (size > (nr_pages * PAGE_SIZE)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	if (unlikely(faulting_as->protected_mode)) {
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+		atomic_dec(&kbdev->faults_pending);
+		return;
+
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		unsigned long flags;
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Set the MMU into unmapped mode */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_disable(kctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "PERMISSION_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+		e = "ACCESS_FLAG";
+		break;
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "ACCESS_FLAG";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "ADDRESS_SIZE_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "MEMORY_ATTRIBUTES_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			return "ATOMIC";
+		else
+			return "UNKNOWN";
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status: 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_mmu_disable(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold hwaccess_lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the hwaccess_lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+#if KBASE_GPU_RESET_EN
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+#endif /* KBASE_GPU_RESET_EN */
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			dev_warn(kbdev->dev,
+					"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+					as->number, as->fault_addr,
+					as->fault_extra_addr);
+		else
+			dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+					as->number, as->fault_addr);
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		WARN_ON(!queue_work(as->pf_wq, &as->work_busfault));
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault));
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100644
index 0000000..92aa55d
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,128 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
new file mode 100755
index 0000000..aa0c403
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -0,0 +1,214 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+#include "mali_kbase_defs.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+/* For valid ATEs bit 1 = ((level == 3) ? 1 : 0).
+ * Valid ATE entries at level 3 are flagged with the value 3.
+ * Valid ATE entries at level 0-2 are flagged with the value 1.
+ */
+#define ENTRY_IS_ATE_L3		3ULL
+#define ENTRY_IS_ATE_L02	1ULL
+#define ENTRY_IS_INVAL		2ULL
+#define ENTRY_IS_PTE		3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_ACCESS_RW (1ULL << 6)     /* bits 6:7 */
+#define ENTRY_ACCESS_RO (3ULL << 6)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
+	WRITE_ONCE(*pte, phy);
+#else
+#ifdef CONFIG_64BIT
+	barrier();
+	*pte = phy;
+	barrier();
+#elif defined(CONFIG_ARM)
+	barrier();
+	asm volatile("ldrd r0, [%1]\n\t"
+		     "strd r0, %0\n\t"
+		     : "=m" (*pte)
+		     : "r" (&phy)
+		     : "r0", "r1");
+	barrier();
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register.
+	 */
+	setup->memattr =
+		(AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
+			(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
+			(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_WRITE_ALLOC           <<
+			(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF   <<
+			(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_WA         <<
+			(AS_MEMATTR_INDEX_OUTER_WA * 8));
+
+	setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK;
+	setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = 0ULL;
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate, unsigned int level)
+{
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3);
+	else
+		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02);
+}
+
+static int pte_is_valid(u64 pte, unsigned int level)
+{
+	/* PTEs cannot exist at the bottom level */
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		return false;
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* Set access flags - note that AArch64 stage 1 does not support
+	 * write-only access, so we use read/write instead
+	 */
+	if (flags & KBASE_REG_GPU_WR)
+		mmu_flags |= ENTRY_ACCESS_RW;
+	else if (flags & KBASE_REG_GPU_RD)
+		mmu_flags |= ENTRY_ACCESS_RO;
+
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry,
+		struct tagged_addr phy,
+		unsigned long flags,
+		unsigned int level)
+{
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		page_table_entry_set(entry, as_phys_addr_t(phy) |
+				get_mmu_flags(flags) |
+				ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3);
+	else
+		page_table_entry_set(entry, as_phys_addr_t(phy) |
+				get_mmu_flags(flags) |
+				ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & PAGE_MASK) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const aarch64_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
+{
+	return &aarch64_mode;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100755
index 0000000..7dc38fc
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,198 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+#include "mali_kbase_defs.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
+	WRITE_ONCE(*pte, phy);
+#else
+#ifdef CONFIG_64BIT
+	barrier();
+	*pte = phy;
+	barrier();
+#elif defined(CONFIG_ARM)
+	barrier();
+	asm volatile("ldrd r0, [%1]\n\t"
+		     "strd r0, %0\n\t"
+		     : "=m" (*pte)
+		     : "r" (&phy)
+		     : "r0", "r1");
+	barrier();
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+	setup->transcfg = 0;
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate, unsigned int level)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte, unsigned int level)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry,
+		struct tagged_addr phy,
+		unsigned long flags,
+		unsigned int level)
+{
+	page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) |
+			     ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100644
index 0000000..fbb090e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_register);
+
+void kbase_platform_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_unregister);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100755
index 0000000..e3cb0b1
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,230 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_vinstr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1) {
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+	}
+#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ)
+	if (kbdev->ipa.gpu_active_callback)
+		kbdev->ipa.gpu_active_callback(kbdev->ipa.model_data);
+#endif
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ)
+	/* IPA may be using vinstr, in which case there may be one PM reference
+	 * still held when all other contexts have left the GPU. Inform IPA that
+	 * the GPU is now idle so that vinstr can drop it's reference.
+	 *
+	 * If the GPU was only briefly active then it might have gone idle
+	 * before vinstr has taken a PM reference, meaning that active_count is
+	 * zero. We still need to inform IPA in this case, so that vinstr can
+	 * drop the PM reference and avoid keeping the GPU powered
+	 * unnecessarily.
+	 */
+	if (c <= 1 && kbdev->ipa.gpu_idle_callback)
+		kbdev->ipa.gpu_idle_callback(kbdev->ipa.model_data);
+#endif
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+
+	/* Resume vinstr operation */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+}
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100644
index 0000000..8de17e1
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,176 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100644
index 0000000..15bca79
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
new file mode 100644
index 0000000..763740e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
@@ -0,0 +1,135 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase.h"
+
+#include "mali_kbase_regs_history_debugfs.h"
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+#include <linux/debugfs.h>
+
+
+static int regs_history_size_get(void *data, u64 *val)
+{
+	struct kbase_io_history *const h = data;
+
+	*val = h->size;
+
+	return 0;
+}
+
+static int regs_history_size_set(void *data, u64 val)
+{
+	struct kbase_io_history *const h = data;
+
+	return kbase_io_history_resize(h, (u16)val);
+}
+
+
+DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
+		regs_history_size_get,
+		regs_history_size_set,
+		"%llu\n");
+
+
+/**
+ * regs_history_show - show callback for the register access history file.
+ *
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to dump all recent accesses to the GPU registers.
+ *
+ * @return 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int regs_history_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_io_history *const h = sfile->private;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!h->enabled) {
+		seq_puts(sfile, "The register access history is disabled\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	iters = (h->size > h->count) ? h->count : h->size;
+	seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+out:
+	return 0;
+}
+
+
+/**
+ * regs_history_open - open operation for regs_history debugfs file
+ *
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * @return file descriptor
+ */
+static int regs_history_open(struct inode *in, struct file *file)
+{
+	return single_open(file, &regs_history_show, in->i_private);
+}
+
+
+static const struct file_operations regs_history_fops = {
+	.open = &regs_history_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history.enabled);
+	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history, &regs_history_size_fops);
+	debugfs_create_file("regs_history", S_IRUGO,
+			kbdev->mali_debugfs_directory, &kbdev->io_history,
+			&regs_history_fops);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
new file mode 100644
index 0000000..a0078cb
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Header file for register access history support via debugfs
+ *
+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
+ *
+ * Usage:
+ * - regs_history_enabled: whether recording of register accesses is enabled.
+ *   Write 'y' to enable, 'n' to disable.
+ * - regs_history_size: size of the register history buffer, must be > 0
+ * - regs_history: return the information about last accesses to the registers.
+ */
+
+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H
+#define _KBASE_REGS_HISTORY_DEBUGFS_H
+
+struct kbase_device;
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/**
+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history
+ *
+ * @kbdev: Pointer to kbase_device containing the register history
+ */
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_regs_history_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif  /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100755
index 0000000..92101fe
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1156 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct fragment_job {
+	struct job_descriptor_header header;
+
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_descriptor_header *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next_job._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next_job._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+	job = &frag_job->header;
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->exception_status = JOB_NOT_STARTED;
+
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
+
+	if (job->job_type == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->job_barrier = 1;
+
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
+
+	job->job_index += hw_job_id_offset;
+
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
+	} else {
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
+		else
+			fbd_address = (u64)frag_job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
+
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
+		else
+			jc = job->next_job._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list = 0;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0, BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0, BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr,
+			     BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload = NULL;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+
+		int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP;
+		int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC;
+		int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+		int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+
+		if (t_atom_type != BASE_JD_REQ_T) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x",
+			    t_atom_type, BASE_JD_REQ_T);
+		}
+		if (f_atom_type != BASE_JD_REQ_FS) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n",
+			    f_atom_type, BASE_JD_REQ_FS);
+		}
+		if (t_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n");
+		}
+		if (f_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n");
+		}
+
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom, NULL);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_descriptor_header *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+		dump_job_head(kctx, "\njob_head structure:\n", job);
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
+		else
+			job_header = job->next_job._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	/* Don't replay this atom if these issues are not present in the
+	 * hardware */
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) &&
+			!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) {
+		dev_dbg(kbdev->dev, "Hardware does not need replay workaround");
+
+		/* Signal failure to userspace */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+
+		return false;
+	}
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100644
index 0000000..2176479
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,79 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100644
index 0000000..221eb21
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100755
index 0000000..01b3087
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1544 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif
+#include <linux/dma-mapping.h>
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/cache.h>
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->queue, &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_del(&katom->queue);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed.
+	 */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule timeout of this atom after a period if it is not active */
+	if (!timer_pending(&kctx->soft_job_timeout)) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_job_timeout_ms);
+		mod_timer(&kctx->soft_job_timeout,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}
+}
+
+static int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*status = *mapped_evt;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	if ((new_status != BASE_JD_SOFT_EVENT_SET) &&
+	    (new_status != BASE_JD_SOFT_EVENT_RESET))
+		return -EINVAL;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*mapped_evt = new_status;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_vmap_struct map;
+	void *user_result;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] was previously in use
+		 * for 'waiting_soft_jobs'.
+		 */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* GPU_WR access is checked on the range for returning the result to
+	 * userspace for the following reasons:
+	 * - security, this is currently how imported user bufs are checked.
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
+	if (!user_result)
+		return 0;
+
+	memcpy(user_result, &data, sizeof(data));
+
+	kbase_vunmap(kctx, &map);
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+/* Called by the explicit fence mechanism when a fence wait has completed */
+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+#endif
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, queue);
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			if (katom->jc == evt) {
+				list_del(&katom->queue);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet.
+				 */
+				cancel_timer = 0;
+			}
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			/* Keep the timer running if fence debug is enabled and
+			 * there are waiting fence jobs.
+			 */
+			cancel_timer = 0;
+			break;
+#endif
+		}
+	}
+
+	if (cancel_timer)
+		del_timer(&kctx->soft_job_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep;
+
+		list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) {
+			if (dep->status == KBASE_JD_ATOM_STATE_UNUSED ||
+			    dep->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				continue;
+
+			if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					== BASE_JD_REQ_SOFT_FENCE_TRIGGER) {
+				/* Found blocked trigger fence. */
+				struct kbase_sync_fence_info info;
+
+				if (!kbase_sync_fence_in_info_get(dep, &info)) {
+					dev_warn(dev,
+						 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+						 kbase_jd_atom_id(kctx, dep),
+						 info.fence,
+						 info.name,
+						 kbase_sync_status_string(info.status));
+				 }
+			}
+
+			kbase_fence_debug_check_atom(dep);
+		}
+	}
+}
+
+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = katom->kctx->kbdev->dev;
+	int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms);
+	unsigned long lflags;
+	struct kbase_sync_fence_info info;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+
+	if (kbase_sync_fence_in_info_get(katom, &info)) {
+		/* Fence must have signaled just after timeout. */
+		spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+		return;
+	}
+
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+		 kctx->tgid, kctx->id,
+		 kbase_jd_atom_id(kctx, katom),
+		 info.fence, timeout_ms);
+	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+		 info.fence, info.name,
+		 kbase_sync_status_string(info.status));
+
+	/* Search for blocked trigger atoms */
+	kbase_fence_debug_check_atom(katom);
+
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	kbase_sync_fence_in_dump(katom);
+}
+
+struct kbase_fence_debug_work {
+	struct kbase_jd_atom *katom;
+	struct work_struct work;
+};
+
+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work)
+{
+	struct kbase_fence_debug_work *w = container_of(work,
+			struct kbase_fence_debug_work, work);
+	struct kbase_jd_atom *katom = w->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_fence_debug_wait_timeout(katom);
+	mutex_unlock(&kctx->jctx.lock);
+
+	kfree(w);
+}
+
+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_debug_work *work;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Enqueue fence debug worker. Use job_done_wq to get
+	 * debug print ordered with job completion.
+	 */
+	work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC);
+	/* Ignore allocation failure. */
+	if (work) {
+		work->katom = katom;
+		INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker);
+		queue_work(kctx->jctx.job_done_wq, &work->work);
+	}
+}
+#endif /* CONFIG_MALI_FENCE_DEBUG */
+
+void kbasep_soft_job_timeout_worker(struct timer_list *timer)
+{
+	struct kbase_context *kctx = container_of(timer, struct kbase_context,
+			soft_job_timeout);
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_job_timeout_ms);
+	ktime_t cur_time = ktime_get();
+	bool restarting = false;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(entry,
+				struct kbase_jd_atom, queue);
+		s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time,
+					katom->start_timestamp));
+
+		if (elapsed_time < (s64)timeout_ms) {
+			restarting = true;
+			continue;
+		}
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			/* Take it out of the list to ensure that it
+			 * will be cancelled in all cases
+			 */
+			list_del(&katom->queue);
+
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			INIT_WORK(&katom->work, kbasep_soft_event_complete_job);
+			queue_work(kctx->jctx.job_done_wq, &katom->work);
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			kbase_fence_debug_timeout(katom);
+			break;
+#endif
+		}
+	}
+
+	if (restarting)
+		mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	kbasep_add_waiting_with_timeout(katom);
+
+	return 1;
+}
+
+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+/**
+ * kbase_soft_event_update() - Update soft event state
+ * @kctx: Pointer to context
+ * @event: Event to update
+ * @new_status: New status value of event
+ *
+ * Update the event, and wake up any atoms waiting for the event.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+int kbase_soft_event_update(struct kbase_context *kctx,
+			     u64 event,
+			     unsigned char new_status)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->jctx.lock);
+
+	if (kbasep_write_soft_event_status(kctx, event, new_status)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, event);
+
+out:
+	mutex_unlock(&kctx->jctx.lock);
+
+	return err;
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc;
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+		if (gpu_alloc) {
+			switch (gpu_alloc->type) {
+			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+			{
+				free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->softjob_data = NULL;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+	katom->softjob_data = buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret) {
+		ret = -EFAULT;
+		goto out_cleanup;
+	}
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, user_extres.ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		if (NULL == reg || NULL == reg->gpu_alloc ||
+				(reg->flags & KBASE_REG_FREE)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		buffers[i].nr_extres_pages = reg->nr_pages;
+
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages)
+				goto out_unlock;
+			ret = 0;
+			break;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	kfree(user_buffers);
+
+	return ret;
+}
+
+static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
+	int ret = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	size_t dma_to_copy;
+#endif
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	if (!gpu_alloc) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+		if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE)
+			dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch");
+
+		dma_to_copy = min(dma_buf->size,
+			(size_t)(buf_data->nr_extres_pages * PAGE_SIZE));
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, dma_to_copy,
+#endif
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < dma_to_copy/PAGE_SIZE; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, dma_to_copy,
+#endif
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
+	unsigned int i;
+
+	if (WARN_ON(!buffers))
+		return -EINVAL;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	struct kbase_context *kctx = katom->kctx;
+	int ret;
+
+	/* Fail the job if there is no info structure */
+	if (!data) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* If the ID is zero then fail the job */
+	if (info->id == 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & 0x7) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	if (kctx->jit_version == 1) {
+		/* Old JIT didn't have usage_id, max_allocations, bin_id
+		 * or padding, so force them to zero
+		 */
+		info->usage_id = 0;
+		info->max_allocations = 0;
+		info->bin_id = 0;
+		info->flags = 0;
+		memset(info->padding, 0, sizeof(info->padding));
+	} else {
+		int i;
+
+		/* Check padding is all zeroed */
+		for (i = 0; i < sizeof(info->padding); i++) {
+			if (info->padding[i] != 0) {
+				ret = -EINVAL;
+				goto free_info;
+			}
+		}
+
+		/* No bit other than TILER_ALIGN_TOP shall be set */
+		if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+			ret = -EINVAL;
+			goto free_info;
+		}
+	}
+
+	katom->softjob_data = info;
+	katom->jit_blocked = false;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(info);
+fail:
+	return ret;
+}
+
+static u8 kbase_jit_free_get_id(struct kbase_jd_atom *katom)
+{
+	if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) !=
+				BASE_JD_REQ_SOFT_JIT_FREE))
+		return 0;
+
+	return (u8) katom->jc;
+}
+
+static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr, new_addr;
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = katom->softjob_data;
+
+	if (WARN_ON(!info)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return 0;
+	}
+
+	/* The JIT ID is still in use so fail the allocation */
+	if (kctx->jit_alloc[info->id]) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return 0;
+	}
+
+	/* Create a JIT allocation */
+	reg = kbase_jit_allocate(kctx, info);
+	if (!reg) {
+		struct kbase_jd_atom *jit_atom;
+		bool can_block = false;
+
+		lockdep_assert_held(&kctx->jctx.lock);
+
+		jit_atom = list_first_entry(&kctx->jit_atoms_head,
+				struct kbase_jd_atom, jit_node);
+
+		list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) {
+			if (jit_atom == katom)
+				break;
+			if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+					BASE_JD_REQ_SOFT_JIT_FREE) {
+				u8 free_id = kbase_jit_free_get_id(jit_atom);
+
+				if (free_id && kctx->jit_alloc[free_id]) {
+					/* A JIT free which is active and
+					 * submitted before this atom
+					 */
+					can_block = true;
+					break;
+				}
+			}
+		}
+
+		if (!can_block) {
+			/* Mark the allocation so we know it's in use even if
+			 * the allocation itself fails.
+			 */
+			kctx->jit_alloc[info->id] =
+				(struct kbase_va_region *) -1;
+
+			katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+			return 0;
+		}
+
+		/* There are pending frees for an active allocation
+		 * so we should wait to see whether they free the memory.
+		 * Add to the beginning of the list to ensure that the atom is
+		 * processed only once in kbase_jit_free_finish
+		 */
+		list_add(&katom->queue, &kctx->jit_pending_alloc);
+		katom->jit_blocked = true;
+
+		return 1;
+	}
+
+	/*
+	 * Write the address of the JIT allocation to the user provided
+	 * GPU allocation.
+	 */
+	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+			&mapping);
+	if (!ptr) {
+		/*
+		 * Leave the allocation "live" as the JIT free jit will be
+		 * submitted anyway.
+		 */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return 0;
+	}
+
+	new_addr = reg->start_pfn << PAGE_SHIFT;
+	*ptr = new_addr;
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
+			katom, info->gpu_alloc_addr, new_addr);
+	kbase_vunmap(kctx, &mapping);
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	/*
+	 * Bind it to the user provided ID. Do this last so we can check for
+	 * the JIT free racing this JIT alloc job.
+	 */
+	kctx->jit_alloc[info->id] = reg;
+
+	return 0;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Remove atom from jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = katom->softjob_data;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static int kbase_jit_free_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	return 0;
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 id = kbase_jit_free_get_id(katom);
+
+	/*
+	 * If the ID is zero or it is not in use yet then fail the job.
+	 */
+	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	/*
+	 * If the ID is valid but the allocation request failed still succeed
+	 * this soft job but don't try and free the allocation.
+	 */
+	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+		kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+	kctx->jit_alloc[id] = NULL;
+}
+
+static void kbasep_jit_free_finish_worker(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_finish_soft_job(katom);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
+{
+	struct list_head *i, *tmp;
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	/* Remove this atom from the kctx->jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	list_for_each_safe(i, tmp, &kctx->jit_pending_alloc) {
+		struct kbase_jd_atom *pending_atom = list_entry(i,
+				struct kbase_jd_atom, queue);
+		if (kbase_jit_allocate_process(pending_atom) == 0) {
+			/* Atom has completed */
+			INIT_WORK(&pending_atom->work,
+					kbasep_jit_free_finish_worker);
+			queue_work(kctx->jctx.job_done_wq, &pending_atom->work);
+		}
+	}
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	katom->softjob_data = ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = katom->softjob_data;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (i > 0) {
+		u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+
+		--i;
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = katom->softjob_data;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(katom);
+
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		katom->event_code = kbase_sync_fence_out_trigger(katom,
+				katom->event_code == BASE_JD_EVENT_DONE ?
+								0 : -EFAULT);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+	{
+		int ret = kbase_sync_fence_in_wait(katom);
+
+		if (ret == 1) {
+#ifdef CONFIG_MALI_FENCE_DEBUG
+			kbasep_add_waiting_with_timeout(katom);
+#else
+			kbasep_add_waiting_soft_job(katom);
+#endif
+		}
+		return ret;
+	}
+#endif
+
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		return kbasep_soft_event_wait(katom);
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_process(katom);
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_sync_fence_in_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (!IS_ALIGNED(katom->jc, cache_line_size()))
+				return -EINVAL;
+		}
+		break;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_sync_fence_out_create(katom,
+							 fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				kbase_sync_fence_out_remove(katom);
+				kbase_sync_fence_close_fd(fd);
+				fence.basep.fd = -EINVAL;
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+			int ret;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			ret = kbase_sync_fence_in_from_fd(katom,
+							  fence.basep.fd);
+			if (ret < 0)
+				return ret;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			/*
+			 * Set KCTX_NO_IMPLICIT_FENCE in the context the first
+			 * time a soft fence wait job is observed. This will
+			 * prevent the implicit dma-buf fence to conflict with
+			 * the Android native sync fences.
+			 */
+			if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC))
+				kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC);
+#endif /* CONFIG_MALI_DMA_FENCE */
+		}
+		break;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_REPLAY:
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		return kbase_jit_free_prepare(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom);
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signaled, do it now */
+		kbase_sync_fence_out_trigger(katom, katom->event_code ==
+				BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release katom's reference to fence object */
+		kbase_sync_fence_in_remove(katom);
+		break;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		kbasep_remove_waiting_soft_job(katom_iter);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		} else {
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
new file mode 100644
index 0000000..22caa4a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
@@ -0,0 +1,28 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include "mali_kbase_strings.h"
+
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
+const char kbase_drv_name[] = KBASE_DRV_NAME;
+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
new file mode 100644
index 0000000..d2f1825
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+extern const char kbase_drv_name[];
+extern const char kbase_timeline_name[];
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100644
index 0000000..a7690b2
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,208 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ * This file contains our internal "API" for explicit fences.
+ * It hides the implementation details of the actual explicit fence mechanism
+ * used (Android fences or sync file with DMA fences).
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include <linux/syscalls.h>
+#ifdef CONFIG_SYNC
+#include <sync.h>
+#endif
+#ifdef CONFIG_SYNC_FILE
+#include "mali_kbase_fence_defs.h"
+#include <linux/sync_file.h>
+#endif
+
+#include "mali_kbase.h"
+
+/**
+ * struct kbase_sync_fence_info - Information about a fence
+ * @fence: Pointer to fence (type is void*, as underlaying struct can differ)
+ * @name: The name given to this fence when it was created
+ * @status: < 0 means error, 0 means active, 1 means signaled
+ *
+ * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get()
+ * to get the information.
+ */
+struct kbase_sync_fence_info {
+	void *fence;
+	char name[32];
+	int status;
+};
+
+/**
+ * kbase_sync_fence_stream_create() - Create a stream object
+ * @name: Name of stream (only used to ease debugging/visualization)
+ * @out_fd: A file descriptor representing the created stream object
+ *
+ * Can map down to a timeline implementation in some implementations.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd);
+
+/**
+ * kbase_sync_fence_out_create Create an explicit output fence to specified atom
+ * @katom: Atom to assign the new explicit fence to
+ * @stream_fd: File descriptor for stream object to create fence on
+ *
+ * return: Valid file descriptor to fence or < 0 on error
+ */
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd);
+
+/**
+ * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom
+ * @katom: Atom to assign the existing explicit fence to
+ * @fd: File descriptor to an existing fence
+ *
+ * Assigns an explicit input fence to atom.
+ * This can later be waited for by calling @kbase_sync_fence_in_wait
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd);
+
+/**
+ * kbase_sync_fence_validate() - Validate a fd to be a valid fence
+ * @fd: File descriptor to check
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ *
+ * return 0: if fd is for a valid fence, < 0 if invalid
+ */
+int kbase_sync_fence_validate(int fd);
+
+/**
+ * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom
+ * @katom: Atom with an explicit fence to signal
+ * @result: < 0 means signal with error, 0 >= indicates success
+ *
+ * Signal output fence attached on katom and remove the fence from the atom.
+ *
+ * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE
+ */
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result);
+
+/**
+ * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled
+ * @katom: Atom with explicit fence to wait for
+ *
+ * If the fence is already signaled, then 0 is returned, and the caller must
+ * continue processing of the katom.
+ *
+ * If the fence isn't already signaled, then this kbase_sync framework will
+ * take responsibility to continue the processing once the fence is signaled.
+ *
+ * return: 0 if already signaled, otherwise 1
+ */
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits
+ * @katom: Atom to cancel wait for
+ *
+ * This function is fully responsible for continuing processing of this atom
+ * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all)
+ */
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_in_remove() - Remove the input fence from the katom
+ * @katom: Atom to remove explicit input fence for
+ *
+ * This will also release the corresponding reference.
+ */
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_out_remove() - Remove the output fence from the katom
+ * @katom: Atom to remove explicit output fence for
+ *
+ * This will also release the corresponding reference.
+ */
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence
+ * @fd: File descriptor to close
+ */
+static inline void kbase_sync_fence_close_fd(int fd)
+{
+	sys_close(fd);
+}
+
+/**
+ * kbase_sync_fence_in_info_get() - Retrieves information about input fence
+ * @katom: Atom to get fence information from
+ * @info: Struct to be filled with fence information
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info);
+
+/**
+ * kbase_sync_fence_out_info_get() - Retrieves information about output fence
+ * @katom: Atom to get fence information from
+ * @info: Struct to be filled with fence information
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				  struct kbase_sync_fence_info *info);
+
+/**
+ * kbase_sync_status_string() - Get string matching @status
+ * @status: Value of fence status.
+ *
+ * return: Pointer to string describing @status.
+ */
+const char *kbase_sync_status_string(int status);
+
+/*
+ * Internal worker used to continue processing of atom.
+ */
+void kbase_sync_fence_wait_worker(struct work_struct *data);
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+/**
+ * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state
+ * @katom: Atom to trigger fence debug dump for
+ */
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom);
+#endif
+
+#endif /* MALI_KBASE_SYNC_H */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c
new file mode 100644
index 0000000..75940fb
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c
@@ -0,0 +1,542 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Code for supporting explicit Android fences (CONFIG_SYNC)
+ * Known to be good for kernels 4.5 and earlier.
+ * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels
+ * (see mali_kbase_sync_file.c)
+ */
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signaled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatibility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+static struct mali_sync_timeline *to_mali_sync_timeline(
+						struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt),
+						sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(
+							sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signaled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signaled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+static struct sync_timeline *mali_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops,
+				  sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signaled, 0);
+
+	return tl;
+}
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	if (!out_fd)
+		return -EINVAL;
+
+	tl = mali_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are
+ * allocated.
+ */
+static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent,
+					    sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+	katom->fence = sync_fence_fdget(fd);
+	if (katom->fence == NULL) {
+		/* The only way the fence can be NULL is if userspace closed it
+		 * for us, so we don't need to clear it up */
+		fd = -EINVAL;
+		goto out;
+	}
+
+out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
+{
+	katom->fence = sync_fence_fdget(fd);
+	return katom->fence ? 0 : -ENOENT;
+}
+
+int kbase_sync_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+/* Returns true if the specified timeline is allocated by Mali */
+static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are
+ * allocated.
+ *
+ * If they are signaled in the wrong order then a message will be printed in
+ * debug builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as
+ * success.
+ */
+static void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(
+							sync_pt_parent(pt));
+	int signaled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signaled = atomic_read(&mtl->signaled);
+
+		diff = signaled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signaling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signaled,
+				signaled, mpt->order) != signaled);
+}
+
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+	if (!katom->fence)
+		return BASE_JD_EVENT_JOB_CANCELLED;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly)
+		 * come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head,
+			      struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	kbase_sync_fence_out_remove(katom);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static inline int kbase_fence_get_status(struct sync_fence *fence)
+{
+	if (!fence)
+		return -ENOENT;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	return fence->status;
+#else
+	return atomic_read(&fence->status);
+#endif
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence,
+				      struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter,
+					struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+	if (kbase_fence_get_status(fence) < 0)
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the
+	 * job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding
+	 * kctx->jctx.lock and the callbacks are run synchronously from
+	 * sync_timeline_signal. So we simply defer the work.
+	 */
+
+	INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signaled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	return 1;
+}
+
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for
+		 * kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->fence) {
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+	}
+}
+
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->fence) {
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+	}
+}
+
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+	if (!katom->fence)
+		return -ENOENT;
+
+	info->fence = katom->fence;
+	info->status = kbase_fence_get_status(katom->fence);
+	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	return 0;
+}
+
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+	if (!katom->fence)
+		return -ENOENT;
+
+	info->fence = katom->fence;
+	info->status = kbase_fence_get_status(katom->fence);
+	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
+{
+	/* Dump out the full state of all the Android sync fences.
+	 * The function sync_dump() isn't exported to modules, so force
+	 * sync_fence_wait() to time out to trigger sync_dump().
+	 */
+	if (katom->fence)
+		sync_fence_wait(katom->fence, 1);
+}
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
new file mode 100644
index 0000000..9520f5a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
@@ -0,0 +1,48 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * @file mali_kbase_sync_common.c
+ *
+ * Common code for our explicit fence functionality
+ */
+
+#include <linux/workqueue.h>
+#include "mali_kbase.h"
+
+void kbase_sync_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kbase_soft_event_wait_callback(katom);
+}
+
+const char *kbase_sync_status_string(int status)
+{
+	if (status == 0)
+		return "signaled";
+	else if (status > 0)
+		return "active";
+	else
+		return "error";
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
new file mode 100755
index 0000000..2312399
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
@@ -0,0 +1,357 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE)
+ * Introduced in kernel 4.9.
+ * Android explicit fences (CONFIG_SYNC) can be used for older kernels
+ * (see mali_kbase_sync_android.c)
+ */
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/sync_file.h>
+#include <linux/slab.h>
+#include "mali_kbase_fence_defs.h"
+#include "mali_kbase_sync.h"
+#include "mali_kbase_fence.h"
+#include "mali_kbase.h"
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE
+};
+
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
+{
+	if (!out_fd)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, NULL,
+				   O_RDONLY | O_CLOEXEC);
+	if (*out_fd < 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct sync_file *sync_file;
+	int fd;
+
+	fence = kbase_fence_out_new(katom);
+	if (!fence)
+		return -ENOMEM;
+
+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE)
+	/* Take an extra reference to the fence on behalf of the sync_file.
+	 * This is only needed on older kernels where sync_file_create()
+	 * does not take its own reference. This was changed in v4.9.68,
+	 * where sync_file_create() now takes its own reference.
+	 */
+	dma_fence_get(fence);
+#endif
+
+	/* create a sync_file fd representing the fence */
+	sync_file = sync_file_create(fence);
+	if (!sync_file) {
+		dma_fence_put(fence);
+		kbase_fence_out_remove(katom);
+		return -ENOMEM;
+	}
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		fput(sync_file->file);
+		kbase_fence_out_remove(katom);
+		return fd;
+	}
+
+	fd_install(fd, sync_file->file);
+
+	return fd;
+}
+
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence = sync_file_get_fence(fd);
+#else
+	struct dma_fence *fence = sync_file_get_fence(fd);
+#endif
+
+	if (!fence)
+		return -ENOENT;
+
+	kbase_fence_fence_in_set(katom, fence);
+
+	return 0;
+}
+
+int kbase_sync_fence_validate(int fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence = sync_file_get_fence(fd);
+#else
+	struct dma_fence *fence = sync_file_get_fence(fd);
+#endif
+
+	if (!fence)
+		return -EINVAL;
+
+	dma_fence_put(fence);
+
+	return 0; /* valid */
+}
+
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
+{
+	int res;
+
+	if (!kbase_fence_out_is_ours(katom)) {
+		/* Not our fence */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	res = kbase_fence_out_signal(katom, result);
+	if (unlikely(res < 0)) {
+		dev_warn(katom->kctx->kbdev->dev,
+				"fence_signal() failed with %d\n", res);
+	}
+
+	kbase_sync_fence_out_remove(katom);
+
+	return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static void kbase_fence_wait_callback(struct fence *fence,
+				      struct fence_cb *cb)
+#else
+static void kbase_fence_wait_callback(struct dma_fence *fence,
+				      struct dma_fence_cb *cb)
+#endif
+{
+	struct kbase_fence_cb *kcb = container_of(cb,
+				struct kbase_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Cancel atom if fence is erroneous */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68))
+	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error)
+#else
+	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0)
+#endif
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	if (kbase_fence_dep_count_dec_and_test(katom)) {
+		/* We take responsibility of handling this */
+		kbase_fence_dep_count_set(katom, -1);
+
+		/* To prevent a potential deadlock we schedule the work onto the
+		 * job_done_wq workqueue
+		 *
+		 * The issue is that we may signal the timeline while holding
+		 * kctx->jctx.lock and the callbacks are run synchronously from
+		 * sync_timeline_signal. So we simply defer the work.
+		 */
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(kctx->jctx.job_done_wq, &katom->work);
+	}
+}
+
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
+{
+	int err;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_in_get(katom);
+	if (!fence)
+		return 0; /* no input fence to wait for, good to go! */
+
+	kbase_fence_dep_count_set(katom, 1);
+
+	err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback);
+
+	kbase_fence_put(fence);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (kbase_fence_dep_count_dec_and_test(katom)) {
+			kbase_fence_free_callbacks(katom);
+			kbase_fence_dep_count_set(katom, -1);
+			return 0; /* Already signaled, good to go right now */
+		}
+
+		/* Callback installed, so we just need to wait for it... */
+	} else {
+		/* Failure */
+		kbase_fence_free_callbacks(katom);
+		kbase_fence_dep_count_set(katom, -1);
+
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	return 1; /* completion to be done later by callback/worker */
+}
+
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (!kbase_fence_free_callbacks(katom)) {
+		/* The wait wasn't cancelled -
+		 * leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Take responsibility of completion */
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	kbase_fence_out_remove(katom);
+}
+
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	kbase_fence_free_callbacks(katom);
+	kbase_fence_in_remove(katom);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static void kbase_sync_fence_info_get(struct fence *fence,
+				      struct kbase_sync_fence_info *info)
+#else
+static void kbase_sync_fence_info_get(struct dma_fence *fence,
+				      struct kbase_sync_fence_info *info)
+#endif
+{
+	info->fence = fence;
+
+	/* translate into CONFIG_SYNC status:
+	 * < 0 : error
+	 * 0 : active
+	 * 1 : signaled
+	 */
+	if (dma_fence_is_signaled(fence)) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68))
+		int status = fence->error;
+#else
+		int status = fence->status;
+#endif
+		if (status < 0)
+			info->status = status; /* signaled with error */
+		else
+			info->status = 1; /* signaled with success */
+	} else  {
+		info->status = 0; /* still active (unsignaled) */
+	}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	scnprintf(info->name, sizeof(info->name), "%u#%u",
+		  fence->context, fence->seqno);
+#else
+	scnprintf(info->name, sizeof(info->name), "%llu#%u",
+		  fence->context, fence->seqno);
+#endif
+}
+
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_in_get(katom);
+	if (!fence)
+		return -ENOENT;
+
+	kbase_sync_fence_info_get(fence, info);
+
+	kbase_fence_put(fence);
+
+	return 0;
+}
+
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				  struct kbase_sync_fence_info *info)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_out_get(katom);
+	if (!fence)
+		return -ENOENT;
+
+	kbase_sync_fence_info_get(fence, info);
+
+	kbase_fence_put(fence);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
+{
+	/* Not implemented */
+}
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100755
index 0000000..2ff45f5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,2638 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_NDEP_ATOM_ATOM,
+	KBASE_TL_RDEP_ATOM_ATOM,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY,
+	KBASE_TL_ATTRIB_ATOM_STATE,
+	KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
+	KBASE_TL_ATTRIB_ATOM_JIT,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+	KBASE_TL_EVENT_LPU_SOFTSTOP,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_START,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_END,
+
+	/* Job dump specific events. */
+	KBASE_JD_GPU_SOFT_RESET
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC,
+	KBASE_AUX_DEVFREQ_TARGET,
+	KBASE_AUX_PROTECTED_ENTER_START,
+	KBASE_AUX_PROTECTED_ENTER_END,
+	KBASE_AUX_PROTECTED_LEAVE_START,
+	KBASE_AUX_PROTECTED_LEAVE_END
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: counter tracking stream's autoflush state
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pII",
+		"ctx,ctx_nr,tgid"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_NEW_AS,
+		__stringify(KBASE_TL_NEW_AS),
+		"address space object is created",
+		"@pI",
+		"address_space,as_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"ctx"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_LIFELINK_AS_GPU,
+		__stringify(KBASE_TL_LIFELINK_AS_GPU),
+		"address space is deleted with gpu",
+		"@pp",
+		"address_space,gpu"
+	},
+	{
+		KBASE_TL_RET_CTX_LPU,
+		__stringify(KBASE_TL_RET_CTX_LPU),
+		"context is retained by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pps",
+		"atom,lpu,attrib_match_list"
+	},
+	{
+		KBASE_TL_NRET_CTX_LPU,
+		__stringify(KBASE_TL_NRET_CTX_LPU),
+		"context is released by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_RET_AS_CTX,
+		__stringify(KBASE_TL_RET_AS_CTX),
+		"address space is retained by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_NRET_AS_CTX,
+		__stringify(KBASE_TL_NRET_AS_CTX),
+		"address space is released by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_AS,
+		__stringify(KBASE_TL_RET_ATOM_AS),
+		"atom is retained by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_NRET_ATOM_AS,
+		__stringify(KBASE_TL_NRET_ATOM_AS),
+		"atom is released by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_DEP_ATOM_ATOM,
+		__stringify(KBASE_TL_DEP_ATOM_ATOM),
+		"atom2 depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_NDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
+		"atom2 no longer depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_RDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
+		"resolved dependecy of atom2 depending on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
+		"atom job slot attributes",
+		"@pLLI",
+		"atom,descriptor,affinity,config"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY),
+		"atom priority",
+		"@pI",
+		"atom,prio"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_STATE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_STATE),
+		"atom state",
+		"@pI",
+		"atom,state"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITIZED),
+		"atom caused priority change",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_JIT,
+		__stringify(KBASE_TL_ATTRIB_ATOM_JIT),
+		"jit done for atom",
+		"@pLL",
+		"atom,edit_addr,new_addr"
+	},
+	{
+		KBASE_TL_ATTRIB_AS_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
+		"address space attributes",
+		"@pLLL",
+		"address_space,transtab,memattr,transcfg"
+	},
+	{
+		KBASE_TL_EVENT_LPU_SOFTSTOP,
+		__stringify(KBASE_TL_EVENT_LPU_SOFTSTOP),
+		"softstop event on given lpu",
+		"@p",
+		"lpu"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX),
+		"atom softstopped",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+		__stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE),
+		"atom softstop issued",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTJOB_START,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_START),
+		"atom soft job has started",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTJOB_END,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_END),
+		"atom soft job has completed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_JD_GPU_SOFT_RESET,
+		__stringify(KBASE_JD_GPU_SOFT_RESET),
+		"gpu soft reset",
+		"@p",
+		"gpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@IL",
+		"ctx_nr,page_cnt_change"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@IL",
+		"ctx_nr,page_cnt"
+	},
+	{
+		KBASE_AUX_DEVFREQ_TARGET,
+		__stringify(KBASE_AUX_DEVFREQ_TARGET),
+		"New device frequency target",
+		"@L",
+		"target_freq"
+	},
+	{
+		KBASE_AUX_PROTECTED_ENTER_START,
+		__stringify(KBASE_AUX_PROTECTED_ENTER_START),
+		"enter protected mode start",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_ENTER_END,
+		__stringify(KBASE_AUX_PROTECTED_ENTER_END),
+		"enter protected mode end",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_LEAVE_START,
+		__stringify(KBASE_AUX_PROTECTED_LEAVE_START),
+		"leave protected mode start",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_LEAVE_END,
+		__stringify(KBASE_AUX_PROTECTED_LEAVE_END),
+		"leave protected mode end",
+		"@p",
+		"gpu"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags) __acquires(&stream->lock)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags) __releases(&stream->lock)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @timer: unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(timer);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (wb_size > min_size) {
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(f_pos);
+
+	if (!buffer)
+		return -EINVAL;
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw = 0;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the rbi still points to the packet we just processed
+		 * then there was no overflow so we add the copied size to
+		 * copy_len and move rbi on to the next packet
+		 */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = SWTRACE_VERSION; /* protocol version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 0);
+	kbase_timer_setup(&autoflush_timer,
+			  kbasep_tlstream_autoflush_timer_callback);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	unsigned int                    as_nr;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu);
+	}
+
+	/* Create Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev);
+	}
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(
+				element->kctx,
+				element->kctx->id,
+				(u32)(element->kctx->tgid));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream.
+	 */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space.
+	 */
+	kbase_tlstream_flush_streams();
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags)
+{
+	int ret;
+	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
+
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) {
+		int rcode;
+
+		ret = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (ret < 0) {
+			atomic_set(&kbase_tlstream_enabled, 0);
+			return ret;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+		/* If job dumping is enabled, readjust the software event's
+		 * timeout as the default value of 3 seconds is often
+		 * insufficient. */
+		if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) {
+			dev_info(kctx->kbdev->dev,
+					"Job dumping is enabled, readjusting the software event's timeout\n");
+			atomic_set(&kctx->kbdev->js_data.soft_job_timeout_ms,
+					1800000);
+		}
+
+		/* Summary stream was cleared during acquire.
+		 * Create static timeline objects that will be
+		 * read by client.
+		 */
+		kbase_create_timeline_objects(kctx);
+
+	} else {
+		ret = -EBUSY;
+	}
+
+	return ret;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+			sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+			strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t  msg_size =
+			sizeof(msg_id) + sizeof(u64) +
+			sizeof(atom) + sizeof(lpu) + msg_s0;
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_string(
+			buffer, pos, attrib_match_list, msg_s0);
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+		sizeof(jd) + sizeof(affinity) + sizeof(config);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &jd, sizeof(jd));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &affinity, sizeof(affinity));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &config, sizeof(config));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &prio, sizeof(prio));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_jit(
+		void *atom, u64 edit_addr, u64 new_addr)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_JIT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom)
+		+ sizeof(edit_addr) + sizeof(new_addr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &edit_addr, sizeof(edit_addr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &new_addr, sizeof(new_addr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
+		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transtab, sizeof(transtab));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &memattr, sizeof(memattr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transcfg, sizeof(transcfg));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu)
+{
+	const u32     msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softjob_start(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softjob_end(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+{
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq)
+{
+	const u32       msg_id = KBASE_AUX_DEVFREQ_TARGET;
+	const size_t    msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(target_freq);
+	unsigned long   flags;
+	char            *buffer;
+	size_t          pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &target_freq, sizeof(target_freq));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_protected_enter_start(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+void __kbase_tlstream_aux_protected_enter_end(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_protected_leave_start(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+void __kbase_tlstream_aux_protected_leave_end(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100755
index 0000000..bfa25d9
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,644 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx:  kernel common context
+ * @flags: timeline stream flags
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) return will be a negative value.
+ *
+ * Return: file descriptor on success, negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+#define TL_ATOM_STATE_IDLE 0
+#define TL_ATOM_STATE_READY 1
+#define TL_ATOM_STATE_DONE 2
+#define TL_ATOM_STATE_POSTED 3
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio);
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state);
+void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom);
+void __kbase_tlstream_tl_attrib_atom_jit(
+		void *atom, u64 edit_addr, u64 new_addr);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_start(void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_end(void *atom);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq);
+void __kbase_tlstream_aux_protected_enter_start(void *gpu);
+void __kbase_tlstream_aux_protected_enter_end(void *gpu);
+void __kbase_tlstream_aux_protected_leave_start(void *gpu);
+void __kbase_tlstream_aux_protected_leave_end(void *gpu);
+
+#define TLSTREAM_ENABLED (1 << 31)
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & TLSTREAM_ENABLED)                     \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...)                     \
+	do {                                                            \
+		int enabled = atomic_read(&kbase_tlstream_enabled);     \
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \
+			__kbase_tlstream_##trace_name(__VA_ARGS__);     \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_JD(trace_name, ...)                      \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED)    \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+/*****************************************************************************/
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary
+ * @as: name of the address space object
+ * @nr: sequential number assigned to this address space
+ *
+ * Function emits a timeline message informing about address space creation.
+ * Address space is created with one attribute: number identifying this
+ * address space.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU
+ * @as:  name of the address space object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that address space object
+ * shall be deleted along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_CTX(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
+ */
+#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being held by the context object.
+ */
+#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being released by atom.
+ */
+#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by address space and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by address space.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_DEP_ATOM_ATOM - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM - dependency between atoms resolved
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM - information about already resolved dependency between atoms
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes
+ * @atom:     name of the atom object
+ * @jd:       job descriptor address
+ * @affinity: job affinity
+ * @config:   job config
+ *
+ * Function emits a timeline message containing atom attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority
+ * @atom: name of the atom object
+ * @prio: atom priority
+ *
+ * Function emits a timeline message containing atom priority.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state
+ * @atom:  name of the atom object
+ * @state: atom state
+ *
+ * Function emits a timeline message containing atom state.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - atom was prioritized
+ * @atom:  name of the atom object
+ *
+ * Function emits a timeline message signalling priority change
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(atom) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_prioritized, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit happened on atom
+ * @atom:       atom identifier
+ * @edit_addr:  address edited by jit
+ * @new_addr:   address placed into the edited location
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr) \
+	__TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, new_addr)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes
+ * @as:       assigned address space
+ * @transtab: configuration of the TRANSTAB register
+ * @memattr:  configuration of the MEMATTR register
+ * @transcfg: configuration of the TRANSCFG register (or zero if not present)
+ *
+ * Function emits a timeline message containing address space attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP
+ * @lpu:        name of the LPU object
+ */
+#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \
+	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softjob_start, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softjob_end, atom)
+
+/**
+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset
+ * @gpu:        name of the GPU object
+ *
+ * This imperative tracepoint is specific to job dumping.
+ * Function emits a timeline message indicating GPU soft reset.
+ */
+#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
+
+
+/**
+ * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
+ */
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated
+ *                                 pages is changed
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
+ */
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
+
+/**
+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS
+ *                                     frequency
+ * @target_freq: new target frequency
+ */
+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \
+	__TRACE_IF_ENABLED(aux_devfreq_target, target_freq)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - The GPU has started transitioning
+ *                                            to protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU is starting to
+ * transition to protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - The GPU has finished transitioning
+ *                                          to protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU has finished
+ * transitioning to protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - The GPU has started transitioning
+ *                                            to non-protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU is starting to
+ * transition to non-protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - The GPU has finished transitioning
+ *                                          to non-protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU has finished
+ * transitioning to non-protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu)
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100644
index 0000000..32fffe0
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,269 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100644
index 0000000..ee6bdf8
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,241 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbase_backend_inspect_head(kbdev, js);
+			WARN_ON(!next_katom);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = true;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the end of the transition */
+	if (kbdev->timeline.l2_transitioning) {
+		kbdev->timeline.l2_transitioning = false;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100644
index 0000000..c1a3dfc
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,368 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_trace_timeline_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
+				(int)kctx->timeline.owner_tgid,              \
+				count);                                      \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
+				CTX_FLOW_ATOM_READY,                         \
+				(int)kctx->timeline.owner_tgid,              \
+				atom_id);                                    \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_ACTIVE,                      \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_NEXT,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_HEAD,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_STOPPING,                    \
+				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+				js, count);                                  \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_ACTIVE, active);            \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_TILER_ACTIVE,               \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_L2_ACTIVE,                  \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_POWERING,               \
+				1);                                          \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
+				1);                                          \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_SEND_EVENT,                       \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_HANDLE_EVENT,                     \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _consumerof_atom_number);                \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _producerof_atom_number_completed);      \
+	} while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
+				trace_code, 1);                              \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
+				count);                                      \
+	} while (0)
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+		enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100644
index 0000000..114bcac
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,145 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
+"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain start (SW approximated)", "%d,%d,%d",
+"_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain stop (SW approximated)",  "%d,%d,%d",
+"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100644
index 0000000..3ea234a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100755
index 0000000..f2e5a33
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+
+static inline void kbase_timer_setup(struct timer_list *timer,
+				     void (*callback)(struct timer_list *timer))
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
+	setup_timer(timer, (void (*)(unsigned long)) callback,
+			(unsigned long) timer);
+#else
+	timer_setup(timer, callback, 0);
+#endif
+}
+
+#ifndef WRITE_ONCE
+	#ifdef ASSIGN_ONCE
+		#define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x)
+	#else
+		#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
+	#endif
+#endif
+
+#ifndef READ_ONCE
+	#define READ_ONCE(x) ACCESS_ONCE(x)
+#endif
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100755
index 0000000..800351b
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,2342 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/hrtimer.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+#ifdef CONFIG_MALI_NO_MALI
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif
+
+/*****************************************************************************/
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API        1
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC            1000000000ull /* ns */
+
+/* The time resolution of dumping service. */
+#define DUMPING_RESOLUTION      500000ull /* ns */
+
+/* The maximal supported number of dumping buffers. */
+#define MAX_BUFFER_COUNT        32
+
+/* Size and number of hw counters blocks. */
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+/*****************************************************************************/
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+enum vinstr_state {
+	VINSTR_IDLE,
+	VINSTR_DUMPING,
+	VINSTR_SUSPENDING,
+	VINSTR_SUSPENDED,
+	VINSTR_RESUMING
+};
+
+/**
+ * struct kbase_vinstr_context - vinstr context per device
+ * @lock:              protects the entire vinstr context, but the list of
+ *                     vinstr clients can be updated outside the lock using
+ *                     @state_lock.
+ * @kbdev:             pointer to kbase device
+ * @kctx:              pointer to kbase context
+ * @vmap:              vinstr vmap for mapping hwcnt dump buffer
+ * @gpu_va:            GPU hwcnt dump buffer address
+ * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
+ * @dump_size:         size of the dump buffer in bytes
+ * @bitmap:            current set of counters monitored, not always in sync
+ *                     with hardware
+ * @reprogram:         when true, reprogram hwcnt block with the new set of
+ *                     counters
+ * @state:             vinstr state
+ * @state_lock:        protects information about vinstr state and list of
+ *                     clients.
+ * @suspend_waitq:     notification queue to trigger state re-validation
+ * @suspend_cnt:       reference counter of vinstr's suspend state
+ * @suspend_work:      worker to execute on entering suspended state
+ * @resume_work:       worker to execute on leaving suspended state
+ * @nclients:          number of attached clients, pending or idle
+ * @nclients_suspended: number of attached but suspended clients
+ * @waiting_clients:   head of list of clients being periodically sampled
+ * @idle_clients:      head of list of clients being idle
+ * @suspended_clients: head of list of clients being suspended
+ * @thread:            periodic sampling thread
+ * @waitq:             notification queue of sampling thread
+ * @request_pending:   request for action for sampling thread
+ * @clients_present:   when true, we have at least one client
+ *                     Note: this variable is in sync. with nclients and is
+ *                     present to preserve simplicity. Protected by state_lock.
+ * @need_suspend:      when true, a suspend has been requested while a resume is
+ *                     in progress. Resume worker should queue a suspend.
+ * @need_resume:       when true, a resume has been requested while a suspend is
+ *                     in progress. Suspend worker should queue a resume.
+ */
+struct kbase_vinstr_context {
+	struct mutex             lock;
+	struct kbase_device      *kbdev;
+	struct kbase_context     *kctx;
+
+	struct kbase_vmap_struct vmap;
+	u64                      gpu_va;
+	void                     *cpu_va;
+	size_t                   dump_size;
+	u32                      bitmap[4];
+	bool                     reprogram;
+
+	enum vinstr_state        state;
+	struct spinlock          state_lock;
+	wait_queue_head_t        suspend_waitq;
+	unsigned int             suspend_cnt;
+	struct work_struct       suspend_work;
+	struct work_struct       resume_work;
+
+	u32                      nclients;
+	u32                      nclients_suspended;
+	struct list_head         waiting_clients;
+	struct list_head         idle_clients;
+	struct list_head         suspended_clients;
+
+	struct task_struct       *thread;
+	wait_queue_head_t        waitq;
+	atomic_t                 request_pending;
+
+	bool                     clients_present;
+
+	bool                     need_suspend;
+	bool                     need_resume;
+};
+
+/**
+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
+ * @vinstr_ctx:    vinstr context client is attached to
+ * @list:          node used to attach this client to list in vinstr context
+ * @buffer_count:  number of buffers this client is using
+ * @event_mask:    events this client reacts to
+ * @dump_size:     size of one dump buffer in bytes
+ * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
+ * @accum_buffer:  temporary accumulation buffer for preserving counters
+ * @dump_time:     next time this clients shall request hwcnt dump
+ * @dump_interval: interval between periodic hwcnt dumps
+ * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
+ * @dump_buffers_meta: metadata of dump buffers
+ * @meta_idx:      index of metadata being accessed by userspace
+ * @read_idx:      index of buffer read by userspace
+ * @write_idx:     index of buffer being written by dumping service
+ * @waitq:         client's notification queue
+ * @pending:       when true, client has attached but hwcnt not yet updated
+ * @suspended:     when true, client is suspended
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context        *vinstr_ctx;
+	struct list_head                   list;
+	unsigned int                       buffer_count;
+	u32                                event_mask;
+	size_t                             dump_size;
+	u32                                bitmap[4];
+	void __user                        *legacy_buffer;
+	void                               *kernel_buffer;
+	void                               *accum_buffer;
+	u64                                dump_time;
+	u32                                dump_interval;
+	char                               *dump_buffers;
+	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
+	atomic_t                           meta_idx;
+	atomic_t                           read_idx;
+	atomic_t                           write_idx;
+	wait_queue_head_t                  waitq;
+	bool                               pending;
+	bool                               suspended;
+};
+
+/**
+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
+ * @hrtimer:    high resolution timer
+ * @vinstr_ctx: vinstr context
+ */
+struct kbasep_vinstr_wake_up_timer {
+	struct hrtimer              hrtimer;
+	struct kbase_vinstr_context *vinstr_ctx;
+};
+
+/*****************************************************************************/
+
+static void kbase_vinstr_update_suspend(
+		struct kbase_vinstr_context *vinstr_ctx);
+
+static int kbasep_vinstr_service_task(void *data);
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+		struct file *filp,
+		poll_table  *wait);
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+		struct file   *filp,
+		unsigned int  cmd,
+		unsigned long arg);
+static int kbasep_vinstr_hwcnt_reader_mmap(
+		struct file           *filp,
+		struct vm_area_struct *vma);
+static int kbasep_vinstr_hwcnt_reader_release(
+		struct inode *inode,
+		struct file  *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations vinstr_client_fops = {
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/*****************************************************************************/
+
+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_ioctl_hwcnt_enable enable;
+	int err;
+
+	enable.dump_buffer = vinstr_ctx->gpu_va;
+	enable.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	enable.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	enable.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	enable.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+
+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err) {
+		dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
+				kctx);
+		return;
+	}
+
+	/* Release the context. This had its own Power Manager Active reference. */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference. */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
+}
+
+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	disable_hwcnt(vinstr_ctx);
+	return enable_hwcnt(vinstr_ctx);
+}
+
+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
+}
+
+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
+{
+	size_t dump_size;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else
+#endif /* CONFIG_MALI_NO_MALI */
+	{
+		/* assume v5 for now */
+		base_gpu_props *props = &kbdev->gpu_props.props;
+		u32 nr_l2 = props->l2_props.num_l2_slices;
+		u64 core_mask = props->coherency_info.group[0].core_mask;
+		u32 nr_blocks = fls64(core_mask);
+
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_blocks) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
+
+static int kbasep_vinstr_map_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	u64 flags, nr_pages;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	nr_pages = PFN_UP(vinstr_ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&vinstr_ctx->gpu_va);
+	if (!reg)
+		return -ENOMEM;
+
+	vinstr_ctx->cpu_va = kbase_vmap(
+			kctx,
+			vinstr_ctx->gpu_va,
+			vinstr_ctx->dump_size,
+			&vinstr_ctx->vmap);
+	if (!vinstr_ctx->cpu_va) {
+		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbasep_vinstr_unmap_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+
+	kbase_vunmap(kctx, &vinstr_ctx->vmap);
+	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+}
+
+/**
+ * kbasep_vinstr_create_kctx - create kernel context for vinstr
+ * @vinstr_ctx: vinstr context
+ * Return: zero on success
+ */
+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element = NULL;
+	unsigned long flags;
+	bool enable_backend = false;
+	int err;
+
+	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
+	if (!vinstr_ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
+	if (err)
+		goto failed_map;
+
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		KBASE_TLSTREAM_TL_NEW_CTX(
+				vinstr_ctx->kctx,
+				vinstr_ctx->kctx->id,
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	/* Don't enable hardware counters if vinstr is suspended.
+	 * Note that vinstr resume code is run under vinstr context lock,
+	 * lower layer will be enabled as needed on resume. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE == vinstr_ctx->state)
+		enable_backend = true;
+	vinstr_ctx->clients_present = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (enable_backend)
+		err = enable_hwcnt(vinstr_ctx);
+	if (err)
+		goto failed_enable;
+
+	vinstr_ctx->thread = kthread_run(
+			kbasep_vinstr_service_task,
+			vinstr_ctx,
+			"mali_vinstr_service");
+	if (IS_ERR(vinstr_ctx->thread)) {
+		err = PTR_ERR(vinstr_ctx->thread);
+		goto failed_kthread;
+	}
+
+	return 0;
+
+failed_kthread:
+	disable_hwcnt(vinstr_ctx);
+failed_enable:
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->clients_present = false;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+	if (element) {
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_del(&element->link);
+		kfree(element);
+		mutex_unlock(&kbdev->kctx_list_lock);
+		KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+	}
+failed_map:
+	kbase_destroy_context(vinstr_ctx->kctx);
+	vinstr_ctx->kctx = NULL;
+	return err;
+}
+
+/**
+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+	bool                            hwcnt_disabled = false;
+	unsigned long                   flags;
+
+	/* Release hw counters dumping resources. */
+	vinstr_ctx->thread = NULL;
+
+	/* Simplify state transitions by specifying that we have no clients. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->clients_present = false;
+	if ((VINSTR_SUSPENDED == vinstr_ctx->state) || (VINSTR_RESUMING == vinstr_ctx->state))
+		hwcnt_disabled = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	if (!hwcnt_disabled)
+		disable_hwcnt(vinstr_ctx);
+
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Destroy context. */
+	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Inform timeline client about context destruction. */
+	KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+
+	vinstr_ctx->kctx = NULL;
+}
+
+/**
+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
+ *
+ * Return: vinstr opaque client handle or NULL on failure
+ */
+static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
+		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
+		u32 bitmap[4], void *argp, void *kernel_buffer)
+{
+	struct task_struct         *thread = NULL;
+	struct kbase_vinstr_client *cli;
+	unsigned long flags;
+	bool clients_present = false;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	if (buffer_count > MAX_BUFFER_COUNT
+	    || (buffer_count & (buffer_count - 1)))
+		return NULL;
+
+	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->vinstr_ctx   = vinstr_ctx;
+	cli->buffer_count = buffer_count;
+	cli->event_mask   =
+		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
+		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
+	cli->pending      = true;
+
+	hwcnt_bitmap_set(cli->bitmap, bitmap);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
+	vinstr_ctx->reprogram = true;
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* If this is the first client, create the vinstr kbase
+	 * context. This context is permanently resident until the
+	 * last client exits. */
+	if (!clients_present) {
+		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
+		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
+			goto error;
+
+		vinstr_ctx->reprogram = false;
+		cli->pending = false;
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it. We need a per client kernel buffer for accumulating
+	 * the counters. */
+	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	if (!cli->accum_buffer)
+		goto error;
+
+	/* Prepare buffers. */
+	if (cli->buffer_count) {
+
+		int *fd;
+		size_t tmp;
+
+		if(! access_ok(VERIFY_WRITE, (void *)argp, sizeof(int)))
+		{
+			return NULL;
+		}
+		fd = (int *)argp;
+
+		/* Allocate area for buffers metadata storage. */
+		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
+			cli->buffer_count;
+		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
+		if (!cli->dump_buffers_meta)
+			goto error;
+
+		/* Allocate required number of dumping buffers. */
+		cli->dump_buffers = (char *)__get_free_pages(
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(cli->dump_size * cli->buffer_count));
+		if (!cli->dump_buffers)
+			goto error;
+
+		/* Create descriptor for user-kernel data exchange. */
+		*fd = anon_inode_getfd(
+				"[mali_vinstr_desc]",
+				&vinstr_client_fops,
+				cli,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd)
+			goto error;
+	} else if (kernel_buffer) {
+		cli->kernel_buffer = kernel_buffer;
+	} else {
+		cli->legacy_buffer = (void __user *)argp;
+	}
+
+	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->meta_idx, 0);
+	atomic_set(&cli->write_idx, 0);
+	init_waitqueue_head(&cli->waitq);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->nclients++;
+	list_add(&cli->list, &vinstr_ctx->idle_clients);
+	kbase_vinstr_update_suspend(vinstr_ctx);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return cli;
+
+error:
+	kfree(cli->dump_buffers_meta);
+	if (cli->dump_buffers)
+		free_pages(
+				(unsigned long)cli->dump_buffers,
+				get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	if (!clients_present && vinstr_ctx->kctx) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+	kfree(cli);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+
+	return NULL;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	struct kbase_vinstr_client  *iter, *tmp;
+	struct task_struct          *thread = NULL;
+	u32 zerobitmap[4] = { 0 };
+	int cli_found = 0;
+	unsigned long flags;
+	bool clients_present;
+
+	KBASE_DEBUG_ASSERT(cli);
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
+		if (iter == cli) {
+			cli_found = 1;
+			break;
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->waiting_clients, list) {
+			if (iter == cli) {
+				cli_found = 1;
+				break;
+			}
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->suspended_clients, list) {
+			if (iter == cli) {
+				cli_found = 1;
+				break;
+			}
+		}
+	}
+	KBASE_DEBUG_ASSERT(cli_found);
+
+	if (cli_found) {
+		vinstr_ctx->reprogram = true;
+		list_del(&iter->list);
+	}
+
+	if (!cli->suspended)
+		vinstr_ctx->nclients--;
+	else
+		vinstr_ctx->nclients_suspended--;
+
+	kbase_vinstr_update_suspend(vinstr_ctx);
+
+	clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->suspended_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	kfree(cli->dump_buffers_meta);
+	free_pages(
+			(unsigned long)cli->dump_buffers,
+			get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	kfree(cli);
+
+	if (!clients_present) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
+	u32 i, nr_l2;
+	u64 core_mask;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (0ull != core_mask) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		if (0ull != (core_mask & 1ull)) {
+			/* if block is not reserved update header */
+			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+			*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		}
+		dst += block_size;
+		src += block_size;
+
+		core_mask >>= 1;
+	}
+}
+
+/**
+ * accum_clients - accumulate dumped hw counters for all known clients
+ * @vinstr_ctx: vinstr context
+ */
+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4 = 0;
+
+#ifndef CONFIG_MALI_NO_MALI
+	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ *
+ * Return: timestamp value
+ */
+static u64 kbasep_vinstr_get_timestamp(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_add_dump_request - register client's dumping request
+ * @cli:             requesting client
+ * @waiting_clients: list of pending dumping requests
+ */
+static void kbasep_vinstr_add_dump_request(
+		struct kbase_vinstr_client *cli,
+		struct list_head *waiting_clients)
+{
+	struct kbase_vinstr_client *tmp;
+
+	if (list_empty(waiting_clients)) {
+		list_add(&cli->list, waiting_clients);
+		return;
+	}
+	list_for_each_entry(tmp, waiting_clients, list) {
+		if (tmp->dump_time > cli->dump_time) {
+			list_add_tail(&cli->list, &tmp->list);
+			return;
+		}
+	}
+	list_add_tail(&cli->list, waiting_clients);
+}
+
+/**
+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
+ *                                        dump and accumulate them for known
+ *                                        clients
+ * @vinstr_ctx: vinstr context
+ * @timestamp: pointer where collection timestamp will be recorded
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_collect_and_accumulate(
+		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+{
+	unsigned long flags;
+	int rcode;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		return -EAGAIN;
+	} else {
+		vinstr_ctx->state = VINSTR_DUMPING;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Request HW counters dump.
+	 * Disable preemption to make dump timestamp more accurate. */
+	preempt_disable();
+	*timestamp = kbasep_vinstr_get_timestamp();
+	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
+	preempt_enable();
+
+	if (!rcode)
+		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
+	WARN_ON(rcode);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDING:
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_IDLE;
+		wake_up_all(&vinstr_ctx->suspend_waitq);
+		break;
+	default:
+		break;
+	}
+
+	/* Accumulate values of collected counters. */
+	if (!rcode)
+		accum_clients(vinstr_ctx);
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
+ *                                  buffer
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_fill_dump_buffer(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	unsigned int write_idx = atomic_read(&cli->write_idx);
+	unsigned int read_idx  = atomic_read(&cli->read_idx);
+
+	struct kbase_hwcnt_reader_metadata *meta;
+	void                               *buffer;
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == cli->buffer_count)
+		return -1;
+	write_idx %= cli->buffer_count;
+
+	/* Fill in dump buffer and its metadata. */
+	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
+	meta   = &cli->dump_buffers_meta[write_idx];
+	meta->timestamp  = timestamp;
+	meta->event_id   = event_id;
+	meta->buffer_idx = write_idx;
+	memcpy(buffer, cli->accum_buffer, cli->dump_size);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
+ *                                         allocated in userspace
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of legacy ioctl interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_legacy(
+		struct kbase_vinstr_client *cli)
+{
+	void __user  *buffer = cli->legacy_buffer;
+	int          rcode;
+
+	/* Copy data to user buffer. */
+	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
+	if (rcode) {
+		pr_warn("error while copying buffer to user\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+		struct kbase_vinstr_client *cli)
+{
+	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_reprogram(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	bool suspended = false;
+
+	/* Don't enable hardware counters if vinstr is suspended. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state)
+		suspended = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (suspended)
+		return;
+
+	/* Change to suspended state is done while holding vinstr context
+	 * lock. Below code will then no re-enable the instrumentation. */
+
+	if (vinstr_ctx->reprogram) {
+		struct kbase_vinstr_client *iter;
+
+		if (!reprogram_hwcnt(vinstr_ctx)) {
+			vinstr_ctx->reprogram = false;
+			spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->idle_clients,
+					list)
+				iter->pending = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->waiting_clients,
+					list)
+				iter->pending = false;
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		}
+	}
+}
+
+/**
+ * kbasep_vinstr_update_client - copy accumulated counters to user readable
+ *                               buffer and notify the user
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_update_client(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	int rcode = 0;
+	unsigned long flags;
+
+	/* Copy collected counters to user readable buffer. */
+	if (cli->buffer_count)
+		rcode = kbasep_vinstr_fill_dump_buffer(
+				cli, timestamp, event_id);
+	else if (cli->kernel_buffer)
+		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
+	else
+		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
+
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
+	/* Check if client was put to suspend state while it was being updated */
+	if (cli->suspended)
+		rcode = -EINVAL;
+	spin_unlock_irqrestore(&cli->vinstr_ctx->state_lock, flags);
+
+	if (rcode)
+		goto exit;
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&cli->write_idx);
+	wake_up_interruptible(&cli->waitq);
+
+exit:
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ *
+ * @hrtimer: high resolution timer
+ *
+ * Return: High resolution timer restart enum.
+ */
+static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
+		struct hrtimer *hrtimer)
+{
+	struct kbasep_vinstr_wake_up_timer *timer =
+		container_of(
+			hrtimer,
+			struct kbasep_vinstr_wake_up_timer,
+			hrtimer);
+
+	KBASE_DEBUG_ASSERT(timer);
+
+	atomic_set(&timer->vinstr_ctx->request_pending, 1);
+	wake_up_all(&timer->vinstr_ctx->waitq);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_service_task - HWC dumping service thread
+ *
+ * @data: Pointer to vinstr context structure.
+ *
+ * Return: 0 on success; -ENOMEM if timer allocation fails
+ */
+static int kbasep_vinstr_service_task(void *data)
+{
+	struct kbase_vinstr_context        *vinstr_ctx = data;
+	struct kbasep_vinstr_wake_up_timer *timer;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+
+	if (!timer) {
+		dev_warn(vinstr_ctx->kbdev->dev, "Timer allocation failed!\n");
+		return -ENOMEM;
+	}
+
+	hrtimer_init(&timer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	timer->hrtimer.function = kbasep_vinstr_wake_up_callback;
+	timer->vinstr_ctx       = vinstr_ctx;
+
+	while (!kthread_should_stop()) {
+		struct kbase_vinstr_client *cli = NULL;
+		struct kbase_vinstr_client *tmp;
+		int                        rcode;
+		unsigned long              flags;
+
+		u64              timestamp = kbasep_vinstr_get_timestamp();
+		u64              dump_time = 0;
+		struct list_head expired_requests;
+
+		/* Hold lock while performing operations on lists of clients. */
+		mutex_lock(&vinstr_ctx->lock);
+
+		/* Closing thread must not interact with client requests. */
+		if (current == vinstr_ctx->thread) {
+			atomic_set(&vinstr_ctx->request_pending, 0);
+
+			spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+			if (!list_empty(&vinstr_ctx->waiting_clients)) {
+				cli = list_first_entry(
+						&vinstr_ctx->waiting_clients,
+						struct kbase_vinstr_client,
+						list);
+				dump_time = cli->dump_time;
+			}
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		}
+
+		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
+			mutex_unlock(&vinstr_ctx->lock);
+
+			/* Sleep until next dumping event or service request. */
+			if (cli) {
+				u64 diff = dump_time - timestamp;
+
+				hrtimer_start(
+						&timer->hrtimer,
+						ns_to_ktime(diff),
+						HRTIMER_MODE_REL);
+			}
+			wait_event(
+					vinstr_ctx->waitq,
+					atomic_read(
+						&vinstr_ctx->request_pending) ||
+					kthread_should_stop());
+			hrtimer_cancel(&timer->hrtimer);
+			continue;
+		}
+
+		rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
+				&timestamp);
+
+		INIT_LIST_HEAD(&expired_requests);
+
+		spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+		/* Find all expired requests. */
+		list_for_each_entry_safe(
+				cli,
+				tmp,
+				&vinstr_ctx->waiting_clients,
+				list) {
+			s64 tdiff =
+				(s64)(timestamp + DUMPING_RESOLUTION) -
+				(s64)cli->dump_time;
+			if (tdiff >= 0ll) {
+				list_del(&cli->list);
+				list_add(&cli->list, &expired_requests);
+			} else {
+				break;
+			}
+		}
+
+		/* Fill data for each request found. */
+		while (!list_empty(&expired_requests)) {
+			cli = list_first_entry(&expired_requests,
+					struct kbase_vinstr_client, list);
+
+			/* Ensure that legacy buffer will not be used from
+			 * this kthread context. */
+			BUG_ON(0 == cli->buffer_count);
+			/* Expect only periodically sampled clients. */
+			BUG_ON(0 == cli->dump_interval);
+
+			/* Release the spinlock, as filling the data in client's
+			 * userspace buffer could result in page faults. */
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+			if (!rcode)
+				kbasep_vinstr_update_client(
+						cli,
+						timestamp,
+						BASE_HWCNT_READER_EVENT_PERIODIC);
+			spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
+
+			/* This client got suspended, move to the next one. */
+			if (cli->suspended)
+				continue;
+
+			/* Set new dumping time. Drop missed probing times. */
+			do {
+				cli->dump_time += cli->dump_interval;
+			} while (cli->dump_time < timestamp);
+
+			list_del(&cli->list);
+			kbasep_vinstr_add_dump_request(
+					cli,
+					&vinstr_ctx->waiting_clients);
+		}
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		/* Reprogram counters set if required. */
+		kbasep_vinstr_reprogram(vinstr_ctx);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	}
+
+	kfree(timer);
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
+ * @cli: pointer to vinstr client structure
+ *
+ * Return: non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+		struct kbase_vinstr_client *cli)
+{
+	KBASE_DEBUG_ASSERT(cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+
+	/* Metadata sanity check. */
+	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @interval: periodic dumping interval (disable periodic dumping if zero)
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+		struct kbase_vinstr_client *cli, u32 interval)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (cli->suspended) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		mutex_unlock(&vinstr_ctx->lock);
+		return -ENOMEM;
+	}
+
+	list_del(&cli->list);
+
+	cli->dump_interval = interval;
+
+	/* If interval is non-zero, enable periodic dumping for this client. */
+	if (cli->dump_interval) {
+		if (DUMPING_RESOLUTION > cli->dump_interval)
+			cli->dump_interval = DUMPING_RESOLUTION;
+		cli->dump_time =
+			kbasep_vinstr_get_timestamp() + cli->dump_interval;
+
+		kbasep_vinstr_add_dump_request(
+				cli, &vinstr_ctx->waiting_clients);
+
+		atomic_set(&vinstr_ctx->request_pending, 1);
+		wake_up_all(&vinstr_ctx->waitq);
+	} else {
+		list_add(&cli->list, &vinstr_ctx->idle_clients);
+	}
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
+ * @event_id: id of event
+ * Return: event_mask or zero if event is not supported or maskable
+ */
+static u32 kbasep_vinstr_hwcnt_reader_event_mask(
+		enum base_hwcnt_reader_event event_id)
+{
+	u32 event_mask = 0;
+
+	switch (event_id) {
+	case BASE_HWCNT_READER_EVENT_PREJOB:
+	case BASE_HWCNT_READER_EVENT_POSTJOB:
+		/* These event are maskable. */
+		event_mask = (1 << event_id);
+		break;
+
+	case BASE_HWCNT_READER_EVENT_MANUAL:
+	case BASE_HWCNT_READER_EVENT_PERIODIC:
+		/* These event are non-maskable. */
+	default:
+		/* These event are not supported. */
+		break;
+	}
+
+	return event_mask;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to enable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask |= event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to disable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask &= ~event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
+ * @cli:   pointer to vinstr client structure
+ * @hwver: pointer to user buffer where hw version will be stored
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+		struct kbase_vinstr_client *cli, u32 __user *hwver)
+{
+#ifndef CONFIG_MALI_NO_MALI
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+#endif
+
+	u32                         ver = 5;
+
+#ifndef CONFIG_MALI_NO_MALI
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+		ver = 4;
+#endif
+
+	return put_user(ver, hwver);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
+ * @filp:   pointer to file structure
+ * @cmd:    user command
+ * @arg:    command's argument
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	long                       rcode = 0;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+				cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
+		rcode = put_user(
+				(u32)cli->vinstr_ctx->dump_size,
+				(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbase_vinstr_hwc_dump(
+				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbase_vinstr_hwc_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+				cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
+		poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
+ * @filp: pointer to file structure
+ * @vma:  pointer to vma structure
+ * Return: zero on success
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	unsigned long size, addr, pfn, offset;
+	unsigned long vm_size = vma->vm_end - vma->vm_start;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(vma);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	size = cli->buffer_count * cli->dump_size;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if (vm_size > size - offset)
+		return -EINVAL;
+
+	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	pfn = addr >> PAGE_SHIFT;
+
+	return remap_pfn_range(
+			vma,
+			vma->vm_start,
+			pfn,
+			vm_size,
+			vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ * Return always return zero
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+		struct file *filp)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	kbase_vinstr_detach_client(cli);
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
+ * @kbdev: pointer to kbase device structure
+ */
+static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	down(&js_devdata->schedule_sem);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	up(&js_devdata->schedule_sem);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker suspending vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_suspend_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			suspend_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		disable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_SUSPENDED;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+
+	if (vinstr_ctx->need_resume) {
+		vinstr_ctx->need_resume = false;
+		vinstr_ctx->state = VINSTR_RESUMING;
+		schedule_work(&vinstr_ctx->resume_work);
+
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	} else {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+
+		/* Kick GPU scheduler to allow entering protected mode.
+		 * This must happen after vinstr was suspended.
+		 */
+		kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+	}
+}
+
+/**
+ * kbasep_vinstr_resume_worker - worker resuming vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_resume_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			resume_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		enable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_IDLE;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+
+	if (vinstr_ctx->need_suspend) {
+		vinstr_ctx->need_suspend = false;
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		schedule_work(&vinstr_ctx->suspend_work);
+
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	} else {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+
+		/* Kick GPU scheduler to allow entering protected mode.
+		 * Note that scheduler state machine might requested re-entry to
+		 * protected mode before vinstr was resumed.
+		 * This must happen after vinstr was release.
+		 */
+		kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+	}
+}
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
+	if (!vinstr_ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->suspended_clients);
+	mutex_init(&vinstr_ctx->lock);
+	spin_lock_init(&vinstr_ctx->state_lock);
+	vinstr_ctx->kbdev = kbdev;
+	vinstr_ctx->thread = NULL;
+	vinstr_ctx->state = VINSTR_IDLE;
+	vinstr_ctx->suspend_cnt = 0;
+	INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
+	INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
+	init_waitqueue_head(&vinstr_ctx->suspend_waitq);
+
+	atomic_set(&vinstr_ctx->request_pending, 0);
+	init_waitqueue_head(&vinstr_ctx->waitq);
+
+	return vinstr_ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	/* Stop service thread first. */
+	if (vinstr_ctx->thread)
+		kthread_stop(vinstr_ctx->thread);
+
+	/* Wait for workers. */
+	flush_work(&vinstr_ctx->suspend_work);
+	flush_work(&vinstr_ctx->resume_work);
+
+	while (1) {
+		struct list_head *list = &vinstr_ctx->idle_clients;
+
+		if (list_empty(list)) {
+			list = &vinstr_ctx->waiting_clients;
+			if (list_empty(list)) {
+				list = &vinstr_ctx->suspended_clients;
+				if (list_empty(list))
+					break;
+			}
+		}
+
+		cli = list_first_entry(list, struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		if (!cli->suspended)
+			vinstr_ctx->nclients--;
+		else
+			vinstr_ctx->nclients_suspended--;
+		kfree(cli->accum_buffer);
+		kfree(cli);
+	}
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients_suspended);
+	if (vinstr_ctx->kctx)
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	kfree(vinstr_ctx);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup)
+{
+	struct kbase_vinstr_client  *cli;
+	u32                         bitmap[4];
+	int                         fd;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(setup->buffer_count);
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	cli = kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			setup->buffer_count,
+			bitmap,
+			&fd,
+			NULL);
+
+	if (!cli)
+		return -ENOMEM;
+
+	kbase_vinstr_wait_for_ready(vinstr_ctx);
+	return fd;
+}
+
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_ioctl_hwcnt_enable *enable)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(enable);
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (enable->dump_buffer) {
+		u32 bitmap[4];
+
+		bitmap[SHADER_HWCNT_BM] = enable->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = enable->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = enable->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = enable->jm_bm;
+
+		if (*cli)
+			return -EBUSY;
+
+		*cli = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				(void *)(uintptr_t)enable->dump_buffer,
+				NULL);
+
+		if (!(*cli))
+			return -ENOMEM;
+
+		kbase_vinstr_wait_for_ready(vinstr_ctx);
+	} else {
+		if (!*cli)
+			return -EINVAL;
+
+		kbase_vinstr_detach_client(*cli);
+		*cli = NULL;
+	}
+
+	return 0;
+}
+
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup,
+		void *kernel_buffer)
+{
+	struct kbase_vinstr_client *kernel_client;
+	u32 bitmap[4];
+
+	if (!vinstr_ctx || !setup || !kernel_buffer)
+		return NULL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	kernel_client = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				NULL,
+				kernel_buffer);
+
+	if (kernel_client)
+		kbase_vinstr_wait_for_ready(vinstr_ctx);
+
+	return kernel_client;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	int                         rcode = 0;
+	struct kbase_vinstr_context *vinstr_ctx;
+	u64                         timestamp;
+	u32                         event_mask;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
+	event_mask = 1 << event_id;
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (event_mask & cli->event_mask) {
+		rcode = kbasep_vinstr_collect_and_accumulate(
+				vinstr_ctx,
+				&timestamp);
+		if (rcode)
+			goto exit;
+
+		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
+		if (rcode)
+			goto exit;
+
+		kbasep_vinstr_reprogram(vinstr_ctx);
+	}
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
+
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	int                         rcode;
+	u64                         unused;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	if (rcode)
+		goto exit;
+	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
+	if (rcode)
+		goto exit;
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	kbasep_vinstr_reprogram(vinstr_ctx);
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		vinstr_ctx->suspend_cnt++;
+		/* overflow shall not happen */
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		ret = 0;
+		break;
+
+	case VINSTR_IDLE:
+		if (vinstr_ctx->clients_present) {
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+			schedule_work(&vinstr_ctx->suspend_work);
+		} else {
+			vinstr_ctx->state = VINSTR_SUSPENDED;
+
+			vinstr_ctx->suspend_cnt++;
+			/* overflow shall not happen */
+			WARN_ON(0 == vinstr_ctx->suspend_cnt);
+			ret = 0;
+		}
+		break;
+
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_RESUMING:
+		vinstr_ctx->need_suspend = true;
+		break;
+
+	case VINSTR_SUSPENDING:
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT(0);
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+static int kbase_vinstr_is_ready(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+	case VINSTR_RESUMING:
+	case VINSTR_SUSPENDING:
+		break;
+
+	case VINSTR_IDLE:
+	case VINSTR_DUMPING:
+		ret = 0;
+		break;
+	default:
+		KBASE_DEBUG_ASSERT(0);
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
+}
+
+void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_is_ready(vinstr_ctx)));
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_wait_for_ready);
+
+/**
+ * kbase_vinstr_update_suspend - Update vinstr suspend/resume status depending
+ *                               on nclients
+ * @vinstr_ctx: vinstr context pointer
+ *
+ * This function should be called whenever vinstr_ctx->nclients changes. This
+ * may cause vinstr to be suspended or resumed, depending on the number of
+ * clients and whether IPA is suspended or not.
+ */
+static void kbase_vinstr_update_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	lockdep_assert_held(&vinstr_ctx->state_lock);
+
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		if ((vinstr_ctx->nclients) && (0 == vinstr_ctx->suspend_cnt)) {
+			vinstr_ctx->state = VINSTR_RESUMING;
+			schedule_work(&vinstr_ctx->resume_work);
+		}
+		break;
+
+	case VINSTR_SUSPENDING:
+		if (vinstr_ctx->nclients)
+			vinstr_ctx->need_resume = true;
+		break;
+
+	case VINSTR_IDLE:
+		if (!vinstr_ctx->nclients) {
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+			schedule_work(&vinstr_ctx->suspend_work);
+		}
+		break;
+
+	case VINSTR_DUMPING:
+		if (!vinstr_ctx->nclients)
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_RESUMING:
+		if (!vinstr_ctx->nclients)
+			vinstr_ctx->need_suspend = true;
+		break;
+	}
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
+	if (VINSTR_SUSPENDED == vinstr_ctx->state) {
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		vinstr_ctx->suspend_cnt--;
+		if (0 == vinstr_ctx->suspend_cnt) {
+			if (vinstr_ctx->clients_present) {
+				vinstr_ctx->state = VINSTR_RESUMING;
+				schedule_work(&vinstr_ctx->resume_work);
+			} else {
+				vinstr_ctx->state = VINSTR_IDLE;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
+
+void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client)
+{
+	struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (!client->suspended) {
+		list_del(&client->list);
+		list_add(&client->list, &vinstr_ctx->suspended_clients);
+
+		vinstr_ctx->nclients--;
+		vinstr_ctx->nclients_suspended++;
+		kbase_vinstr_update_suspend(vinstr_ctx);
+
+		client->suspended = true;
+	}
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
+
+void kbase_vinstr_resume_client(struct kbase_vinstr_client *client)
+{
+	struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (client->suspended) {
+		list_del(&client->list);
+		list_add(&client->list, &vinstr_ctx->idle_clients);
+
+		vinstr_ctx->nclients++;
+		vinstr_ctx->nclients_suspended--;
+		kbase_vinstr_update_suspend(vinstr_ctx);
+
+		client->suspended = false;
+	}
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100755
index 0000000..d32799f
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_ioctl.h>
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+/*****************************************************************************/
+
+/**
+ * kbase_vinstr_init() - initialize the vinstr core
+ * @kbdev: kbase device
+ *
+ * Return: pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - terminate the vinstr core
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
+ * @vinstr_ctx: vinstr context
+ * @setup:      reader's configuration
+ *
+ * Return: file descriptor on success and a (negative) error code otherwise
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+		struct kbase_vinstr_context        *vinstr_ctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup);
+
+/**
+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
+ * @vinstr_ctx: vinstr context
+ * @cli:        pointer where to store pointer to new vinstr client structure
+ * @enable:      hwc configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_ioctl_hwcnt_enable *enable);
+
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count is not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup,
+		void *kernel_buffer);
+
+/**
+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
+ * @cli:      pointer to vinstr client
+ * @event_id: id of event that triggered hwcnt dump
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_dump(
+		struct kbase_vinstr_client   *cli,
+		enum base_hwcnt_reader_event event_id);
+
+/**
+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
+ *                          a given kbase context
+ * @cli: pointer to vinstr client
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Return: 0 on success, or negative if state change is in progress
+ *
+ * Warning: This API call is non-generic. It is meant to be used only by
+ *          job scheduler state machine.
+ *
+ * Function initiates vinstr switch to suspended state. Once it was called
+ * vinstr enters suspending state. If function return non-zero value, it
+ * indicates that state switch is not complete and function must be called
+ * again. On state switch vinstr will trigger job scheduler state machine
+ * cycle.
+ */
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_suspend - suspends operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function initiates vinstr switch to suspended state. Then it blocks until
+ * operation is completed.
+ */
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_wait_for_ready - waits for the vinstr context to get ready
+ * @vinstr_ctx: vinstr context
+ *
+ * Function waits for the vinstr to become ready for dumping. It can be in the
+ * resuming state after the client was attached but the client currently expects
+ * that vinstr is ready for dumping immediately post attach.
+ */
+void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_resume - resumes operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function can be called only if it was preceded by a successful call
+ * to kbase_vinstr_suspend.
+ */
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_suspend_client - Suspend vinstr client
+ * @client: pointer to vinstr client
+ */
+void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client);
+
+/**
+ * kbase_vinstr_resume_client - Resume vinstr client
+ * @client: pointer to vinstr client
+ */
+void kbase_vinstr_resume_client(struct kbase_vinstr_client *client);
+
+#endif /* _KBASE_VINSTR_H_ */
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100644
index 0000000..da2ffaf
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100644
index 0000000..0741dfc
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,194 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100644
index 0000000..f17bd5e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,136 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644
index 0000000..29d5df3
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,31 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100755
index 0000000..1808500
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,626 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define CORE_FEATURES           0x008	/* (RO) Shader Core Features */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+#define LATEST_FLUSH            0x038	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+#define GPU_DBGEN               (1 << 8)	/* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that
+					 * TLS must be allocated for
+					 */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+#define TEXTURE_FEATURES_3      0x0BC	/* (RO) Support flags for texture order */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+
+#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+
+/* Symbols for default MEMATTR to use
+ * Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT      (12)
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS            (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT     (15)
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES           (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_TLS_HASH_ENABLE          (1ul << 17)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+/* JM_CONFIG register */
+
+#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
+#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
+#define JM_IDVS_GROUP_SIZE_SHIFT (16)
+#define JM_MAX_IDVS_GROUP_SIZE (0x3F)
+/* End JM_CONFIG register */
+
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100644
index 0000000..d0deead
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->atom_id,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->count)
+);
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_uk.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100644
index 0000000..961a4a5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,146 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100644
index 0000000..ef9fb96
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME
+#
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100755
index 0000000..0a82eaf
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,39 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
+	$(MALI_PLATFORM_DIR)/mali_clock.o \
+	$(MALI_PLATFORM_DIR)/mpgpu.o \
+	$(MALI_PLATFORM_DIR)/meson_main2.o \
+	$(MALI_PLATFORM_DIR)/platform_gx.o \
+	$(MALI_PLATFORM_DIR)/scaling.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
new file mode 100755
index 0000000..d131a4b
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
@@ -0,0 +1,658 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)
+
+//disable print
+//#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+int mali_pm_statue = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "ao io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	mpdata->dvfs_table_size = 0;
+
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+	dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size);
+	dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n",
+			mpdata->dvfs_table,
+			sizeof(mpdata->dvfs_table[0]));
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->def_clock);
+	}
+	if (mpdata->def_clock > mpdata->scale_info.maxclk)
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+#else
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	if ((0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) &&
+			!IS_ERR(dvfs_tbl->clkp_handle) &&
+			(0 != dvfs_tbl->clkp_freq)) {
+		clk_prepare_enable(dvfs_tbl->clkp_handle);
+		clk_set_rate(dvfs_tbl->clkp_handle, dvfs_tbl->clkp_freq);
+	}
+	clk_prepare_enable(clk_mali);
+	clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+
+	GPU_CLK_DBG();
+	do_gettimeofday(&start);
+	ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+	do_gettimeofday(&end);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	GPU_CLK_DBG();
+	clk_disable_unprepare(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+				&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		} else if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+			dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+			if (IS_ERR(dvfs_tbl->clkp_handle)) {
+				dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+			}
+			ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+			if (ret) {
+				dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+			}
+		}
+
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->def_clock);
+	}
+	if (mpdata->def_clock > mpdata->scale_info.maxclk)
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux");
+#if 0
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+#endif
+	if (IS_ERR(mpdata->clk_mali)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100755
index 0000000..d7dd246
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,126 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/version.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long t83x_static_power(unsigned long voltage)
+{
+#if 0
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+#else
+	return 0;
+#endif
+}
+
+static unsigned long t83x_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+struct devfreq_cooling_ops t83x_model_ops = {
+#else
+struct devfreq_cooling_power t83x_model_ops = {
+#endif
+	.get_static_power = t83x_static_power,
+	.get_dynamic_power = t83x_dynamic_power,
+};
+
+#endif
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
+
+#ifndef CONFIG_OF
+int kbase_platform_register(void)
+{
+	return 0;
+}
+
+void kbase_platform_unregister(void)
+{
+}
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100755
index 0000000..1c56015
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (750000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (100000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+extern struct kbase_platform_funcs_conf dt_funcs_conf;
+#define PLATFORM_FUNCS (&dt_funcs_conf)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&t83x_model_ops)
+extern struct devfreq_cooling_ops t83x_model_ops;
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+/**
+ * Autosuspend delay
+ *
+ * The delay time (in milliseconds) to be used for autosuspend
+ */
+#define AUTO_SUSPEND_DELAY (100)
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c
new file mode 100755
index 0000000..dc1654f
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c
@@ -0,0 +1,336 @@
+/**
+ ** Meson hardware specific initialization
+ **/
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include <mali_kbase.h>
+#include "mali_kbase_meson.h"
+
+int meson_gpu_reset(struct kbase_device *kbdev)
+{
+	struct meson_context *platform = kbdev->platform_context;
+    void __iomem *reg_base_reset = platform->reg_base_reset;
+    u32 value;
+
+    //JOHNT
+    // Level reset mail
+ 
+    // Level reset mail
+    //writel(~(0x1<<14), reg_base_reset + P_RESET2_MASK * 4);
+    //writel(~(0x1<<14), reg_base_reset + P_RESET2_LEVEL * 4);
+
+    //writel(0xffffffff, reg_base_reset + P_RESET2_LEVEL * 4);
+    //writel(0xffffffff, reg_base_reset + P_RESET0_LEVEL * 4);
+
+    MESON_PRINT("%s, %d\n", __func__, __LINE__);
+    MESON_PRINT("reg_base=%p, reset0=%p\n", reg_base_reset, reg_base_reset + RESET0_MASK * 4);
+    udelay(100);
+#if 0
+    value = readl(reg_base_reset + RESET0_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    MESON_PRINT("%s, %d\n", __func__, __LINE__);
+    value = readl(reg_base_reset + RESET0_MASK * 4);
+    value = value & (~(0x1<<20));
+    writel(value, reg_base_reset + RESET0_MASK * 4);
+
+    udelay(100);
+#if 0
+    value = readl(reg_base_reset + RESET0_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value);
+    udelay(100);
+#endif
+
+    value = readl(reg_base_reset + RESET0_LEVEL * 4);
+    value = value & (~(0x1<<20));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET0_LEVEL * 4);
+    udelay(100);
+
+#if 0
+    value = readl(reg_base_reset + RESET0_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value);
+    udelay(100);
+#endif
+
+///////////////
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    value = readl(reg_base_reset + RESET2_MASK * 4);
+    value = value & (~(0x1<<14));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET2_MASK * 4);
+
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    value = readl(reg_base_reset + RESET2_LEVEL * 4);
+    value = value & (~(0x1<<14));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET2_LEVEL * 4);
+    udelay(100);
+
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    value = readl(reg_base_reset + RESET0_LEVEL * 4);
+    value = value | ((0x1<<20));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET0_LEVEL * 4);
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    value = readl(reg_base_reset + RESET2_LEVEL * 4);
+    value = value | ((0x1<<14));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET2_LEVEL * 4);
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(10); // OR POLL for reset done
+
+    return 0;
+}
+
+void  meson_gpu_pwr_on(struct kbase_device *kbdev, u32  mask)
+{
+    u32    part1_done = 0;
+    u32     value = 0;
+    u32     count = 0;
+
+    kbdev->pm.backend.gpu_powered = true;
+    MESON_PRINT("%s, %d begin\n", __func__,__LINE__);
+
+#if 0
+    value = 0x10 | (0x1<<16);
+#else
+    value = 0xfff | (0x20<<16);
+#endif
+    while (part1_done != value) {
+        Mali_WrReg(GPU_CONTROL_REG(PWR_KEY), 0x2968A819);
+        Mali_WrReg(GPU_CONTROL_REG(PWR_OVERRIDE1), value);
+        part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+        MESON_PRINT("write again, count=%d, overrider1=%x\n", count, part1_done);
+        udelay(20);
+        count ++;
+        if (0 == (count %100)) MESON_PRINT("write again, count%d\n", count);
+    }
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("write again, count=%d, overrider1=%x\n", count, part1_done);
+
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    MESON_PRINT("%s, %d\n", __func__,__LINE__);
+    if ((mask & 0x1) != 0 ) {
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+        Mali_WrReg(0x00000190, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x00000194, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x000001a0, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x000001a4, 0xffffffff);    // Power on all cores
+    }
+    MESON_PRINT("power on %d\n", __LINE__);
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x00000180, mask >> 1);    // Power on all cores
+        Mali_WrReg(0x00000184, 0x0);    // Power on all cores
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+    }
+
+    MESON_PRINT("%s, %d\n", __func__,__LINE__);
+    if ( mask != 0 ) {
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+        udelay(10);
+        part1_done = Mali_RdReg(0x0000020);
+        while(part1_done ==0) {
+            part1_done = Mali_RdReg(0x00000020);
+            udelay(10);
+        }
+
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+        Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+    }
+    MESON_PRINT("%s, %d end\n", __func__,__LINE__);
+}
+
+void  meson_gpu_pwr_off(struct kbase_device *kbdev, u32  mask)
+{
+#if 1
+    u32 part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x000001C0, mask >> 1);    // Power off all cores
+        Mali_WrReg(0x000001C4, 0x0);          // Power off all cores
+    }
+
+    if (  (mask & 0x1) != 0 ) {
+        Mali_WrReg(0x000001D0, 0xffffffff);    // Power off all cores
+        Mali_WrReg(0x000001D4, 0xffffffff);    // Power off all cores
+        Mali_WrReg(0x000001E0, 0xffffffff);    // Power off all cores
+        Mali_WrReg(0x000001E4, 0xffffffff);    // Power off all cores
+    }
+
+    if ( mask != 0 ) {
+        part1_done = Mali_RdReg(0x0000020);
+        while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); }
+        MESON_PRINT("Mali_pwr_off:gpu_irq : %x\n", part1_done);
+        Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+    }
+#endif
+}
+
+
+
+
+static int kbase_platform_meson_init(struct kbase_device *kbdev)
+{
+#if 0
+	int err;
+#endif
+	struct device_node *gpu_dn = kbdev->dev->of_node;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+	struct meson_context *platform;
+    u32 part1_done = 0;
+
+	platform = kmalloc(sizeof(struct meson_context), GFP_KERNEL);
+
+	if (!platform)
+		return -ENOMEM;
+
+	memset(platform, 0, sizeof(struct meson_context));
+
+	kbdev->platform_context = (void *) platform;
+
+    platform->reg_base_reset = of_iomap(gpu_dn, 1);
+	_dev_info(kbdev->dev, "reset io source  0x%p\n",platform->reg_base_reset);
+
+	platform->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(kbdev->dev, "ao io source  0x%p\n", platform->reg_base_aobus);
+
+	platform->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(kbdev->dev, "hiu io source  0x%p\n", platform->reg_base_hiubus);
+
+    platform->clk_mali = devm_clk_get(kbdev->dev, "gpu_mux");
+	if (IS_ERR(platform->clk_mali)) {
+		dev_err(kbdev->dev, "failed to get clock pointer\n");
+	} else {
+        clk_prepare_enable(platform->clk_mali);
+        clk_set_rate(platform->clk_mali, 285000000);
+    }
+    MESON_PRINT("%s, %d begin\n", __func__, __LINE__);
+    meson_gpu_reset(kbdev);
+    meson_gpu_pwr_on(kbdev, 0xe);
+
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done);
+    meson_gpu_pwr_off(kbdev, 0xe);
+#if 1
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done);
+
+    meson_gpu_reset(kbdev);
+    meson_gpu_pwr_on(kbdev, 0xe);
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done);
+#endif
+#if 0
+	platform->cmu_pmu_status = 0;
+	platform->dvfs_wq = NULL;
+	platform->polling_speed = 100;
+	gpu_debug_level = DVFS_WARNING;
+#endif
+
+	mutex_init(&platform->gpu_clock_lock);
+	mutex_init(&platform->gpu_dvfs_handler_lock);
+	spin_lock_init(&platform->gpu_dvfs_spinlock);
+#if 0
+	err = gpu_control_module_init(kbdev);
+	if (err)
+		goto clock_init_fail;
+
+	/* dvfs gobernor init*/
+	gpu_dvfs_governor_init(kbdev, G3D_DVFS_GOVERNOR_DEFAULT);
+#endif
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags);
+	platform->wakeup_lock = 0;
+	spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+#if 0
+	/* dvfs handler init*/
+	gpu_dvfs_handler_init(kbdev);
+
+	err = gpu_notifier_init(kbdev);
+	if (err)
+		goto notifier_init_fail;
+
+	err = gpu_create_sysfs_file(kbdev->dev);
+	if (err)
+		goto sysfs_init_fail;
+#endif
+
+    MESON_PRINT("%s, %d end\n", __func__, __LINE__);
+	return 0;
+#if 0
+clock_init_fail:
+notifier_init_fail:
+sysfs_init_fail:
+	kfree(platform);
+
+	return err;
+#endif
+}
+
+/**
+ ** Meson hardware specific termination
+ **/
+static void kbase_platform_meson_term(struct kbase_device *kbdev)
+{
+	struct meson_context *platform;
+	platform = (struct meson_context *) kbdev->platform_context;
+#if 0
+	gpu_notifier_term();
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	gpu_dvfs_handler_deinit(kbdev);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	gpu_control_module_term(kbdev);
+#endif
+
+	kfree(kbdev->platform_context);
+	kbdev->platform_context = 0;
+
+#if 0
+	gpu_remove_sysfs_file(kbdev->dev);
+#endif
+}
+
+struct kbase_platform_funcs_conf platform_funcs = {
+	.platform_init_func = &kbase_platform_meson_init,
+	.platform_term_func = &kbase_platform_meson_term,
+};
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h
new file mode 100644
index 0000000..89ff21b
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h
@@ -0,0 +1,65 @@
+
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+#define RESET0_MASK     0x00
+#define RESET1_MASK     0x01
+#define RESET2_MASK     0x02
+
+#define RESET0_LEVEL    0x10
+#define RESET1_LEVEL    0x11
+#define RESET2_LEVEL    0x12
+
+#define Mali_WrReg(regnum, value)   writel((value),(kbdev->reg + (regnum)))
+#define Mali_RdReg(regnum)          readl(kbdev->reg + (regnum))
+#define MESON_PRINT(...)            
+
+struct meson_context {
+	struct mutex gpu_clock_lock;
+	struct mutex gpu_dvfs_handler_lock;
+	spinlock_t gpu_dvfs_spinlock;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	int utilization;
+	int util_gl_share;
+	int util_cl_share[2];
+#ifdef CONFIG_CPU_THERMAL_IPA
+	int norm_utilisation;
+	int freq_for_normalisation;
+	unsigned long long power;
+#endif /* CONFIG_CPU_THERMAL_IPA */
+	int max_lock;
+	int min_lock;
+#if 0
+	int user_max_lock[NUMBER_LOCK];
+	int user_min_lock[NUMBER_LOCK];
+#endif
+	int target_lock_type;
+	int down_requirement;
+	bool wakeup_lock;
+	int governor_num;
+	int governor_type;
+	char governor_list[100];
+	bool dvfs_status;
+#ifdef CONFIG_CPU_THERMAL_IPA
+	int time_tick;
+	u32 time_busy;
+	u32 time_idle;
+#endif /* CONFIG_CPU_THERMAL_IPA */
+#endif
+	int cur_clock;
+	int cur_voltage;
+	int voltage_margin;
+	bool tmu_status;
+	int debug_level;
+	int polling_speed;
+	struct workqueue_struct *dvfs_wq;
+    void __iomem *reg_base_reset;
+    void __iomem *reg_base_aobus;
+    void __iomem *reg_base_hiubus;
+    struct clk  *clk_mali;
+    struct clk  *clk_gp;
+};
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100755
index 0000000..a6ef90f
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_config_platform.h"
+
+void *reg_base_hiubus = NULL;
+u32  override_value_aml = 0;
+static int first = 1;
+
+#define RESET0_MASK    0x10
+#define RESET1_MASK    0x11
+#define RESET2_MASK    0x12
+
+#define RESET0_LEVEL    0x20
+#define RESET1_LEVEL    0x21
+#define RESET2_LEVEL    0x22
+#define Rd(r)                           readl((reg_base_hiubus) + ((r)<<2))
+#define Wr(r, v)                        writel((v), ((reg_base_hiubus) + ((r)<<2)))
+#define Mali_WrReg(regnum, value)   kbase_reg_write(kbdev, (regnum), (value), NULL)
+#define Mali_RdReg(regnum)          kbase_reg_read(kbdev, (regnum), NULL)
+#define stimulus_print   printk
+#define stimulus_display printk
+#define Mali_pwr_off(x)  Mali_pwr_off_with_kdev(kbdev, (x))
+
+extern u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type);
+
+//[0]:CG [1]:SC0 [2]:SC2
+static void  Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    uint32_t    shader_present;
+    uint32_t    tiler_present;
+    uint32_t    l2_present;
+
+    part1_done = 0;
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    shader_present = Mali_RdReg(0x100);
+    tiler_present  = Mali_RdReg(0x110);
+    l2_present     = Mali_RdReg(0x120);
+    printk("shader_present=%d, tiler_present=%d, l2_present=%d\n",
+            shader_present, tiler_present, l2_present);
+
+    if (  mask == 0 ) {
+        Mali_WrReg(0x00000180, 0xffffffff);   // Power on all cores (shader low)
+        Mali_WrReg(0x00000184, 0xffffffff);   // Power on all cores (shader high)
+        Mali_WrReg(0x00000190, 0xffffffff);   // Power on all cores (tiler low)
+        Mali_WrReg(0x00000194, 0xffffffff);   // Power on all cores (tiler high)
+        Mali_WrReg(0x000001a0, 0xffffffff);   // Power on all cores (l2 low)
+        Mali_WrReg(0x000001a4, 0xffffffff);   // Power on all cores (l2 high)
+    } else {
+        Mali_WrReg(0x00000180, mask);    // Power on all cores (shader low)
+        Mali_WrReg(0x00000184, 0);       // Power on all cores (shader high)
+        Mali_WrReg(0x00000190, mask);    // Power on all cores (tiler low)
+        Mali_WrReg(0x00000194, 0);       // Power on all cores (tiler high)
+        Mali_WrReg(0x000001a0, mask);    // Power on all cores (l2 low)
+        Mali_WrReg(0x000001a4, 0);       // Power on all cores (l2 high)
+    }
+
+    part1_done = Mali_RdReg(0x0000020);
+    while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); }
+    stimulus_display("Mali_pwr_on:gpu_irq : %x\n", part1_done);
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+}
+
+//[0]:CG [1]:SC0 [2]:SC2
+#if 0
+static void  Mali_pwr_off_with_kdev( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    if ( mask == 0 ) {
+        Mali_WrReg(0x000001C0, 0xffffffff);    // Power off all cores (tiler low)
+        Mali_WrReg(0x000001C4, 0xffffffff);    // Power off all cores (tiler high)
+        Mali_WrReg(0x000001D0, 0xffffffff);    // Power off all cores (l2 low)
+        Mali_WrReg(0x000001D4, 0xffffffff);    // Power off all cores (l2 high)
+        Mali_WrReg(0x000001E0, 0xffffffff);    // Power off all cores (shader low)
+        Mali_WrReg(0x000001E4, 0xffffffff);    // Power off all cores (shader high)
+    } else {
+        Mali_WrReg(0x000001C0, mask);    // Power off all cores  (tiler low)
+        Mali_WrReg(0x000001C4, 0x0);     // Power off all cores  (tiler high)
+        Mali_WrReg(0x000001D0, mask);    // Power off all cores  (l2 low)
+        Mali_WrReg(0x000001D4, 0x0);     // Power off all cores  (l2 high)
+        Mali_WrReg(0x000001E0, mask);    // Power off all cores  (shader low)
+        Mali_WrReg(0x000001E4, 0x0);     // Power off all cores  (shader high)
+    }
+
+    part1_done = Mali_RdReg(0x0000020);
+    while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); }
+    stimulus_display("Mali_pwr_off:gpu_irq : %x\n", part1_done);
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+}
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret = 1; /* Assume GPU has been powered off */
+	int error;
+	struct platform_device *pdev = to_platform_device(kbdev->dev);
+	struct resource *reg_res;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+    //printk("20151013, %s, %d\n", __FILE__, __LINE__);
+    if (first == 0) goto ret;
+
+    reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+    if (!reg_res) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) 
+        reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res));
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+
+//JOHNT
+    // Level reset mail
+
+    // Level reset mail
+    //Wr(P_RESET2_MASK, ~(0x1<<14));
+    //Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    //Wr(P_RESET2_LEVEL, 0xffffffff);
+    //Wr(P_RESET0_LEVEL, 0xffffffff);
+
+    value = Rd(RESET0_MASK);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_MASK, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+///////////////
+    value = Rd(RESET2_MASK);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_MASK, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value | ((0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value | ((0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    udelay(10); // OR POLL for reset done
+
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+    Mali_pwr_on_with_kdev(kbdev, 0x1);
+    //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n",
+    //        kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus);
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+    first = 0;
+    //printk("%s, %d\n", __FILE__, __LINE__);
+ret:
+	error = pm_runtime_get_sync(kbdev->dev);
+	if (error == 1) {
+		/*
+		 * Let core know that the chip has not been
+		 * powered off, so we can save on re-initialization.
+		 */
+		ret = 0;
+	}
+	udelay(100);
+#if 1
+
+    core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+    l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+    tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+    //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready);
+#endif
+	dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
+
+	return ret;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+    //printk("%s, %d\n", __FILE__, __LINE__);
+#if 0
+    iounmap(reg_base_hiubus);
+    reg_base_hiubus = NULL;
+#endif
+	pm_runtime_mark_last_busy(kbdev->dev);
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	
+	pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY);
+	pm_runtime_use_autosuspend(kbdev->dev);
+	pm_runtime_set_active(kbdev->dev);
+	
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+
+	if (!pm_runtime_enabled(kbdev->dev)) {
+		dev_warn(kbdev->dev, "pm_runtime not enabled");
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+#endif
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
new file mode 100644
index 0000000..b541090
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
new file mode 100644
index 0000000..64de2d5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_defs.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+extern void mali_post_init(void);
+struct kbase_device;
+//static int gpu_dvfs_probe(struct platform_device *pdev)
+int platform_dt_init_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	int err = -1;
+
+	err = mali_meson_init_start(pdev);
+    mali_meson_init_finish(pdev);
+    mpgpu_class_init();
+    mali_post_init();
+    return err;
+}
+
+//static int gpu_dvfs_remove(struct platform_device *pdev)
+void platform_dt_term_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+    mpgpu_class_exit();
+	mali_meson_uninit(pdev);
+
+}
+
+static u32 last_utilisation, last_util_gl_share, last_util_cl_share[2];
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    last_utilisation = utilisation;
+    last_util_gl_share = util_gl_share;
+    last_util_cl_share[0] = util_cl_share[0];
+    last_util_cl_share[1] = util_cl_share[1];
+    mali_gpu_utilization_callback(utilisation*255/100);
+    return 1;
+}
+
+u32 mpgpu_get_utilization(void)
+{
+    return last_utilisation;
+}
+u32 mpgpu_get_util_gl_share(void)
+{
+    return last_util_gl_share;
+}
+u32 mpgpu_get_util_cl_share(u32 *util)
+{
+    util[0] = last_util_cl_share[0];
+    util[1] = last_util_cl_share[1];
+    return 0;
+}
+
+struct kbase_platform_funcs_conf dt_funcs_conf = {
+    .platform_init_func = platform_dt_init_func,
+    .platform_term_func = platform_dt_term_func,
+};
+#if 0
+static const struct of_device_id gpu_dvfs_ids[] = {
+	{ .compatible = "meson, gpu-dvfs-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpu_dvfs_ids);
+
+static struct platform_driver gpu_dvfs_driver = {
+	.driver = {
+		.name = "meson-gpu-dvfs",
+		.owner = THIS_MODULE,
+		.of_match_table = gpu_dvfs_ids,
+	},
+	.probe = gpu_dvfs_probe,
+	.remove = gpu_dvfs_remove,
+};
+module_platform_driver(gpu_dvfs_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Amlogic SH, MM");
+MODULE_DESCRIPTION("Driver for the Meson GPU dvfs");
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
new file mode 100755
index 0000000..65f8e14
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
@@ -0,0 +1,38 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mpgpu_class_init(void);
+void mpgpu_class_exit(void);
+void mali_gpu_utilization_callback(int utilization_pp);
+
+u32 mpgpu_get_utilization(void);
+u32 mpgpu_get_util_gl_share(void);
+u32 mpgpu_get_util_cl_share(u32 *util);
+u32 mpgpu_get_gpu_err_count(void);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
new file mode 100755
index 0000000..831ae72
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
@@ -0,0 +1,313 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+//#include <mali_kbase.h>
+#include "meson_main2.h"
+
+int meson_gpu_data_invalid_count = 0;
+int meson_gpu_fault = 0;
+
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+static ssize_t utilization_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", mpgpu_get_utilization());
+}
+
+static ssize_t util_gl_share_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", mpgpu_get_util_gl_share());
+}
+
+static ssize_t util_cl_share_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+    u32 val[2];
+
+    mpgpu_get_util_cl_share(val);
+
+	return sprintf(buf, "%d  %d\n", val[0], val[1]);
+}
+
+u32 mpgpu_get_gpu_err_count(void)
+{
+    return (meson_gpu_fault + meson_gpu_data_invalid_count);
+}
+
+static ssize_t meson_gpu_get_err_count(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", mpgpu_get_gpu_err_count());
+}
+
+static ssize_t mpgpu_set_err_count(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	meson_gpu_fault = val;
+
+	return count;
+}
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+	__ATTR(utilization,	0644, utilization_read, NULL),
+	__ATTR(util_gl,	    0644, util_gl_share_read, NULL),
+	__ATTR(util_cl,	    0644, util_cl_share_read, NULL),
+	__ATTR(gpu_err,	    0644, meson_gpu_get_err_count, mpgpu_set_err_count),
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+
+int mpgpu_class_init(void)
+{
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+}
+
+void  mpgpu_class_exit(void)
+{
+	class_unregister(&mpgpu_class);
+}
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
new file mode 100755
index 0000000..3997300
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
@@ -0,0 +1,246 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#ifdef CONFIG_AMLOGIC_GPU_THERMAL
+#include <linux/amlogic/gpu_cooling.h>
+#include <linux/amlogic/gpucore_cooling.h>
+//#include <linux/amlogic/aml_thermal_hw.h>
+#include <linux/amlogic/meson_cooldev.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq <= mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq <= mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_AMLOGIC_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+#ifndef MESON_DRV_BRING
+    return 55;
+#else
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+#endif
+}
+#endif
+#endif
+
+#ifdef CONFIG_AMLOGIC_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+#ifndef MESON_DRV_BRING
+    return 2;
+#else
+    return mali_executor_get_num_cores_enabled();
+#endif
+}
+#endif
+#endif
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_AMLOGIC_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        meson_gcooldev_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        meson_gcooldev_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
new file mode 100644
index 0000000..4fa0773
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
@@ -0,0 +1,584 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+
+#if AMLOGIC_GPU_USE_GPPLL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)
+#include <linux/amlogic/amports/gp_pll.h>
+#else
+#include <linux/amlogic/media/clk/gp_pll.h>
+#endif
+#endif
+
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+extern int  mali_pm_statue;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+#if AMLOGIC_GPU_USE_GPPLL
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+#endif
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+#if AMLOGIC_GPU_USE_GPPLL
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+#else
+	if ((0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) &&
+			!IS_ERR(dvfs_tbl->clkp_handle) &&
+			(0 != dvfs_tbl->clkp_freq)) {
+		clk_prepare_enable(dvfs_tbl->clkp_handle);
+		clk_set_rate(dvfs_tbl->clkp_handle, dvfs_tbl->clkp_freq);
+	}
+
+#endif
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+#if AMLOGIC_GPU_USE_GPPLL==0
+	if ((0 == strcmp(pdvfs[lastStep].clk_parent,"gp0_pll")) &&
+		(0 != strcmp(pdvfs[execStep].clk_parent, "gp0_pll"))) {
+			clk_disable_unprepare(pdvfs[lastStep].clkp_handle);
+	}
+#endif
+
+	lastStep = execStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+#endif
+
+}
+#if AMLOGIC_GPU_USE_GPPLL
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+#endif
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+    cores = cores;
+    return 0;
+}
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    if (err < 0) scalingdbg(1, "set pp failed");
+
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+    printk("mali_plat=%p\n", mali_plat);
+	num_cores_enabled = pmali_plat->sc_mpp;
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+#endif
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	40, /* 40% */
+	50, /* 50% */
+	90, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< utilization_pp)
+		ret = enable_one_core();
+	else if (0 < utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(int utilization_gpu, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(int utilization_pp, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization_pp,  ld_up);
+	if (utilization_pp >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization_pp <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(utilization_pp);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(utilization_pp);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/Kbuild
new file mode 100644
index 0000000..d40d798
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_devicetree.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_devicetree.c
new file mode 100644
index 0000000..299d0e7
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_devicetree.c
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
+
+#ifndef CONFIG_OF
+int kbase_platform_register(void)
+{
+	return 0;
+}
+
+void kbase_platform_unregister(void)
+{
+}
+#endif
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_platform.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_platform.h
new file mode 100644
index 0000000..2ceca34
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_platform.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (5000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (5000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+/**
+ * Autosuspend delay
+ *
+ * The delay time (in milliseconds) to be used for autosuspend
+ */
+#define AUTO_SUSPEND_DELAY (100)
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_runtime_pm.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_runtime_pm.c
new file mode 100644
index 0000000..8e2fd74
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_runtime_pm.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/pm_runtime.h>
+#include "mali_kbase_config_platform.h"
+
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    trace_printk("utilisation=%d, util_gl_share=%d\n", utilisation, util_gl_share);
+    return 1;
+}
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret = 1; /* Assume GPU has been powered off */
+	int error;
+
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+	error = pm_runtime_get_sync(kbdev->dev);
+	if (error == 1) {
+		/*
+		 * Let core know that the chip has not been
+		 * powered off, so we can save on re-initialization.
+		 */
+		ret = 0;
+	}
+
+	dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
+
+	return ret;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+
+	pm_runtime_mark_last_busy(kbdev->dev);
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+
+	pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY);
+	pm_runtime_use_autosuspend(kbdev->dev);
+
+	pm_runtime_set_active(kbdev->dev);
+	pm_runtime_enable(kbdev->dev);
+
+	if (!pm_runtime_enabled(kbdev->dev)) {
+		dev_warn(kbdev->dev, "pm_runtime not enabled");
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+#endif
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
+
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100644
index 0000000..6780e4c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100644
index 0000000..fac3cd5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..d165ce2
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_config_platform.h"
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100644
index 0000000..51b408e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100644
index 0000000..fac3cd5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..efca0a5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100644
index 0000000..e07709c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100644
index 0000000..fac3cd5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..b6714b9
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100644
index 0000000..ef1ec70
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h
new file mode 100644
index 0000000..8778d81
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _PROTECTED_MODE_SWITCH_H_
+#define _PROTECTED_MODE_SWITCH_H_
+
+struct protected_mode_device;
+
+/**
+ * struct protected_mode_ops - Callbacks for protected mode switch operations
+ *
+ * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_disable: Callback to disable protected mode for device
+ */
+struct protected_mode_ops {
+	/**
+	 * protected_mode_enable() - Enable protected mode on device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enable)(
+			struct protected_mode_device *protected_dev);
+
+	/**
+	 * protected_mode_disable() - Disable protected mode on device, and
+	 *                            reset device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_disable)(
+			struct protected_mode_device *protected_dev);
+};
+
+/**
+ * struct protected_mode_device - Device structure for protected mode devices
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_mode_device {
+	struct protected_mode_ops ops;
+	void *data;
+};
+
+#endif /* _PROTECTED_MODE_SWITCH_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/sconscript b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/sconscript
new file mode 100755
index 0000000..4c38f2a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/sconscript
@@ -0,0 +1,66 @@
+#
+# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+import sys
+Import('env')
+
+SConscript( 'tests/sconscript' )
+
+mock_test = 0
+
+# Source files required for kbase.
+kbase_src = [
+	Glob('*.c'),
+	Glob('backend/*/*.c'),
+	Glob('internal/*/*.c'),
+	Glob('ipa/*.c'),
+	Glob('platform/%s/*.c' % env['platform_config']),
+	Glob('thirdparty/*.c'),
+]
+
+if env['platform_config']=='juno_soc':
+	kbase_src += [Glob('platform/devicetree/*.c')]
+else:
+	kbase_src += [Glob('platform/%s/*.c' % env['platform_config'])]
+
+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
+	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
+	mock_test = 1
+
+make_args = env.kernel_get_config_defines(ret_list = True) + [
+	'PLATFORM=%s' % env['platform'],
+	'MALI_KERNEL_TEST_API=%s' % env['debug'],
+	'MALI_UNIT_TEST=%s' % env['unit'],
+	'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
+	'MALI_MOCK_TEST=%s' % mock_test,
+	'MALI_CUSTOMER_RELEASE=%s' % env['release'],
+	'MALI_COVERAGE=%s' % env['coverage'],
+]
+
+kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src,
+                              make_args = make_args)
+
+if 'smc_protected_mode_switcher' in env:
+	env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/smc_protected_mode_switcher.ko')
+
+env.KernelObjTarget('kbase', kbase)
+
+env.AppendUnique(BASE=['cutils_linked_list'])
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild
new file mode 100644
index 0000000..df16a77
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+obj-$(CONFIG_MALI_KUTF) += kutf/
+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig
new file mode 100644
index 0000000..fa91aea
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+source "drivers/gpu/arm/midgard/tests/kutf/Kconfig"
+source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig"
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/Mconfig b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/Mconfig
new file mode 100755
index 0000000..f692e34
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/Mconfig
@@ -0,0 +1,22 @@
+#
+# (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+
+config UNIT_TEST_KERNEL_MODULES
+	bool
+	default y if UNIT_TEST_CODE && BUILD_KERNEL_MODULES
+	default n
+
+config BUILD_IPA_TESTS
+	bool
+	default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ
+	default n
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/build.bp b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/build.bp
new file mode 100755
index 0000000..28a756b
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/build.bp
@@ -0,0 +1,36 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_defaults {
+    name: "kernel_test_module_defaults",
+    defaults: ["mali_kbase_shared_config_defaults"],
+    include_dirs: [
+        "kernel/drivers/gpu/arm",
+        "kernel/drivers/gpu/arm/midgard",
+        "kernel/drivers/gpu/arm/midgard/backend/gpu",
+        "kernel/drivers/gpu/arm/midgard/tests/include",
+    ],
+    extra_symbols: ["kutf"],
+}
+
+subdirs = [
+    "kutf",
+    "mali_kutf_irq_test",
+]
+
+optional_subdirs = [
+    "kutf_test",
+    "kutf_test_runner",
+    "mali_kutf_ipa_test",
+    "mali_kutf_ipa_unit_test",
+    "mali_kutf_vinstr_test",
+]
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h
new file mode 100644
index 0000000..15e168c
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h
@@ -0,0 +1,77 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_HELPERS_H_
+#define _KERNEL_UTF_HELPERS_H_
+
+/* kutf_helpers.h
+ * Test helper functions for the kernel UTF test infrastructure.
+ *
+ * These functions provide methods for enqueuing/dequeuing lines of text sent
+ * by user space. They are used to implement the transfer of "userdata" from
+ * user space to kernel.
+ */
+
+#include <kutf/kutf_suite.h>
+
+/**
+ * kutf_helper_input_dequeue() - Dequeue a line sent by user space
+ * @context:    KUTF context
+ * @str_size:   Pointer to an integer to receive the size of the string
+ *
+ * If no line is available then this function will wait (interruptibly) until
+ * a line is available.
+ *
+ * Return: The line dequeued, ERR_PTR(-EINTR) if interrupted or NULL on end
+ * of data.
+ */
+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size);
+
+/**
+ * kutf_helper_input_enqueue() - Enqueue a line sent by user space
+ * @context:   KUTF context
+ * @str:       The user space address of the line
+ * @size:      The length in bytes of the string
+ *
+ * This function will use copy_from_user to copy the string out of user space.
+ * The string need not be NULL-terminated (@size should not include the NULL
+ * termination).
+ *
+ * As a special case @str==NULL and @size==0 is valid to mark the end of input,
+ * but callers should use kutf_helper_input_enqueue_end_of_data() instead.
+ *
+ * Return: 0 on success, -EFAULT if the line cannot be copied from user space,
+ * -ENOMEM if out of memory.
+ */
+int kutf_helper_input_enqueue(struct kutf_context *context,
+		const char __user *str, size_t size);
+
+/**
+ * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent
+ * @context:    KUTF context
+ *
+ * After this function has been called, kutf_helper_input_dequeue() will always
+ * return NULL.
+ */
+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context);
+
+#endif	/* _KERNEL_UTF_HELPERS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h
new file mode 100644
index 0000000..3b1300e
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h
@@ -0,0 +1,179 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_HELPERS_USER_H_
+#define _KERNEL_UTF_HELPERS_USER_H_
+
+/* kutf_helpers.h
+ * Test helper functions for the kernel UTF test infrastructure, whose
+ * implementation mirrors that of similar functions for kutf-userside
+ */
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_helpers.h>
+
+
+#define KUTF_HELPER_MAX_VAL_NAME_LEN 255
+
+enum kutf_helper_valtype {
+	KUTF_HELPER_VALTYPE_INVALID,
+	KUTF_HELPER_VALTYPE_U64,
+	KUTF_HELPER_VALTYPE_STR,
+
+	KUTF_HELPER_VALTYPE_COUNT /* Must be last */
+};
+
+struct kutf_helper_named_val {
+	enum kutf_helper_valtype type;
+	char *val_name;
+	union {
+		u64 val_u64;
+		char *val_str;
+	} u;
+};
+
+/* Extra error values for certain helpers when we want to distinguish between
+ * Linux's own error values too.
+ *
+ * These can only be used on certain functions returning an int type that are
+ * documented as returning one of these potential values, they cannot be used
+ * from functions return a ptr type, since we can't decode it with PTR_ERR
+ *
+ * No negative values are used - Linux error codes should be used instead, and
+ * indicate a problem in accessing the data file itself (are generally
+ * unrecoverable)
+ *
+ * Positive values indicate correct access but invalid parsing (can be
+ * recovered from assuming data in the future is correct) */
+enum kutf_helper_err {
+	/* No error - must be zero */
+	KUTF_HELPER_ERR_NONE = 0,
+	/* Named value parsing encountered an invalid name */
+	KUTF_HELPER_ERR_INVALID_NAME,
+	/* Named value parsing of string or u64 type encountered extra
+	 * characters after the value (after the last digit for a u64 type or
+	 * after the string end delimiter for string type) */
+	KUTF_HELPER_ERR_CHARS_AFTER_VAL,
+	/* Named value parsing of string type couldn't find the string end
+	 * delimiter.
+	 *
+	 * This cannot be encountered when the NAME="value" message exceeds the
+	 * textbuf's maximum line length, because such messages are not checked
+	 * for an end string delimiter */
+	KUTF_HELPER_ERR_NO_END_DELIMITER,
+	/* Named value didn't parse as any of the known types */
+	KUTF_HELPER_ERR_INVALID_VALUE,
+};
+
+
+/* Send named NAME=value pair, u64 value
+ *
+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long
+ *
+ * Any failure will be logged on the suite's current test fixture
+ *
+ * Returns 0 on success, non-zero on failure
+ */
+int kutf_helper_send_named_u64(struct kutf_context *context,
+		const char *val_name, u64 val);
+
+/* Get the maximum length of a string that can be represented as a particular
+ * NAME="value" pair without string-value truncation in the kernel's buffer
+ *
+ * Given val_name and the kernel buffer's size, this can be used to determine
+ * the maximum length of a string that can be sent as val_name="value" pair
+ * without having the string value truncated. Any string longer than this will
+ * be truncated at some point during communication to this size.
+ *
+ * It is assumed that val_name is a valid name for
+ * kutf_helper_send_named_str(), and no checking will be made to
+ * ensure this.
+ *
+ * Returns the maximum string length that can be represented, or a negative
+ * value if the NAME="value" encoding itself wouldn't fit in kern_buf_sz
+ */
+int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz);
+
+/* Send named NAME="str" pair
+ *
+ * no escaping allowed in str. Any of the following characters will terminate
+ * the string: '"' '\\' '\n'
+ *
+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long
+ *
+ * Any failure will be logged on the suite's current test fixture
+ *
+ * Returns 0 on success, non-zero on failure */
+int kutf_helper_send_named_str(struct kutf_context *context,
+		const char *val_name, const char *val_str);
+
+/* Receive named NAME=value pair
+ *
+ * This can receive u64 and string values - check named_val->type
+ *
+ * If you are not planning on dynamic handling of the named value's name and
+ * type, then kutf_helper_receive_check_val() is more useful as a
+ * convenience function.
+ *
+ * String members of named_val will come from memory allocated on the fixture's mempool
+ *
+ * Returns 0 on success. Negative value on failure to receive from the 'run'
+ * file, positive value indicates an enum kutf_helper_err value for correct
+ * reception of data but invalid parsing */
+int kutf_helper_receive_named_val(
+		struct kutf_context *context,
+		struct kutf_helper_named_val *named_val);
+
+/* Receive and validate NAME=value pair
+ *
+ * As with kutf_helper_receive_named_val, but validate that the
+ * name and type are as expected, as a convenience for a common pattern found
+ * in tests.
+ *
+ * NOTE: this only returns an error value if there was actually a problem
+ * receiving data.
+ *
+ * NOTE: If the underlying data was received correctly, but:
+ * - isn't of the expected name
+ * - isn't the expected type
+ * - isn't correctly parsed for the type
+ * then the following happens:
+ * - failure result is recorded
+ * - named_val->type will be KUTF_HELPER_VALTYPE_INVALID
+ * - named_val->u will contain some default value that should be relatively
+ *   harmless for the test, including being writable in the case of string
+ *   values
+ * - return value will be 0 to indicate success
+ *
+ * The rationale behind this is that we'd prefer to continue the rest of the
+ * test with failures propagated, rather than hitting a timeout */
+int kutf_helper_receive_check_val(
+		struct kutf_helper_named_val *named_val,
+		struct kutf_context *context,
+		const char *expect_val_name,
+		enum kutf_helper_valtype expect_val_type);
+
+/* Output a named value to kmsg */
+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val);
+
+
+#endif	/* _KERNEL_UTF_HELPERS_USER_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h
new file mode 100644
index 0000000..988559d
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_MEM_H_
+#define _KERNEL_UTF_MEM_H_
+
+/* kutf_mem.h
+ * Functions for management of memory pools in the kernel.
+ *
+ * This module implements a memory pool allocator, allowing a test
+ * implementation to allocate linked allocations which can then be freed by a
+ * single free which releases all of the resources held by the entire pool.
+ *
+ * Note that it is not possible to free single resources within the pool once
+ * allocated.
+ */
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+/**
+ * struct kutf_mempool - the memory pool context management structure
+ * @head:	list head on which the allocations in this context are added to
+ * @lock:	mutex for concurrent allocation from multiple threads
+ *
+ */
+struct kutf_mempool {
+	struct list_head head;
+	struct mutex lock;
+};
+
+/**
+ * kutf_mempool_init() - Initialize a memory pool.
+ * @pool:	Memory pool structure to initialize, provided by the user
+ *
+ * Return:	zero on success
+ */
+int kutf_mempool_init(struct kutf_mempool *pool);
+
+/**
+ * kutf_mempool_alloc() - Allocate memory from a pool
+ * @pool:	Memory pool to allocate from
+ * @size:	Size of memory wanted in number of bytes
+ *
+ * Return:	Pointer to memory on success, NULL on failure.
+ */
+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size);
+
+/**
+ * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it.
+ * @pool:	The memory pool to free
+ */
+void kutf_mempool_destroy(struct kutf_mempool *pool);
+#endif	/* _KERNEL_UTF_MEM_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h
new file mode 100644
index 0000000..49ebeb4
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h
@@ -0,0 +1,181 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_RESULTSET_H_
+#define _KERNEL_UTF_RESULTSET_H_
+
+/* kutf_resultset.h
+ * Functions and structures for handling test results and result sets.
+ *
+ * This section of the kernel UTF contains structures and functions used for the
+ * management of Results and Result Sets.
+ */
+
+/**
+ * enum kutf_result_status - Status values for a single Test error.
+ * @KUTF_RESULT_BENCHMARK:	Result is a meta-result containing benchmark
+ *                              results.
+ * @KUTF_RESULT_SKIP:		The test was skipped.
+ * @KUTF_RESULT_UNKNOWN:	The test has an unknown result.
+ * @KUTF_RESULT_PASS:		The test result passed.
+ * @KUTF_RESULT_DEBUG:		The test result passed, but raised a debug
+ *                              message.
+ * @KUTF_RESULT_INFO:		The test result passed, but raised
+ *                              an informative message.
+ * @KUTF_RESULT_WARN:		The test result passed, but raised a warning
+ *                              message.
+ * @KUTF_RESULT_FAIL:		The test result failed with a non-fatal error.
+ * @KUTF_RESULT_FATAL:		The test result failed with a fatal error.
+ * @KUTF_RESULT_ABORT:		The test result failed due to a non-UTF
+ *                              assertion failure.
+ * @KUTF_RESULT_USERDATA:	User data is ready to be read,
+ *                              this is not seen outside the kernel
+ * @KUTF_RESULT_USERDATA_WAIT:	Waiting for user data to be sent,
+ *                              this is not seen outside the kernel
+ * @KUTF_RESULT_TEST_FINISHED:	The test has finished, no more results will
+ *                              be produced. This is not seen outside kutf
+ */
+enum kutf_result_status {
+	KUTF_RESULT_BENCHMARK = -3,
+	KUTF_RESULT_SKIP    = -2,
+	KUTF_RESULT_UNKNOWN = -1,
+
+	KUTF_RESULT_PASS    = 0,
+	KUTF_RESULT_DEBUG   = 1,
+	KUTF_RESULT_INFO    = 2,
+	KUTF_RESULT_WARN    = 3,
+	KUTF_RESULT_FAIL    = 4,
+	KUTF_RESULT_FATAL   = 5,
+	KUTF_RESULT_ABORT   = 6,
+
+	KUTF_RESULT_USERDATA      = 7,
+	KUTF_RESULT_USERDATA_WAIT = 8,
+	KUTF_RESULT_TEST_FINISHED = 9
+};
+
+/* The maximum size of a kutf_result_status result when
+ * converted to a string
+ */
+#define KUTF_ERROR_MAX_NAME_SIZE 21
+
+#ifdef __KERNEL__
+
+#include <kutf/kutf_mem.h>
+#include <linux/wait.h>
+
+struct kutf_context;
+
+/**
+ * struct kutf_result - Represents a single test result.
+ * @node:	Next result in the list of results.
+ * @status:	The status summary (pass / warn / fail / etc).
+ * @message:	A more verbose status message.
+ */
+struct kutf_result {
+	struct list_head            node;
+	enum kutf_result_status     status;
+	const char                  *message;
+};
+
+/**
+ * KUTF_RESULT_SET_WAITING_FOR_INPUT - Test is waiting for user data
+ *
+ * This flag is set within a struct kutf_result_set whenever the test is blocked
+ * waiting for user data. Attempts to dequeue results when this flag is set
+ * will cause a dummy %KUTF_RESULT_USERDATA_WAIT result to be produced. This
+ * is used to output a warning message and end of file.
+ */
+#define KUTF_RESULT_SET_WAITING_FOR_INPUT 1
+
+/**
+ * struct kutf_result_set - Represents a set of results.
+ * @results:	List head of a struct kutf_result list for storing the results
+ * @waitq:	Wait queue signalled whenever new results are added.
+ * @flags:	Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT
+ */
+struct kutf_result_set {
+	struct list_head          results;
+	wait_queue_head_t         waitq;
+	int                       flags;
+};
+
+/**
+ * kutf_create_result_set() - Create a new result set
+ *                            to which results can be added.
+ *
+ * Return: The created result set.
+ */
+struct kutf_result_set *kutf_create_result_set(void);
+
+/**
+ * kutf_add_result() - Add a result to the end of an existing result set.
+ *
+ * @context:	The kutf context
+ * @status:	The result status to add.
+ * @message:	The result message to add.
+ *
+ * Return: 0 if the result is successfully added. -ENOMEM if allocation fails.
+ */
+int kutf_add_result(struct kutf_context *context,
+		enum kutf_result_status status, const char *message);
+
+/**
+ * kutf_remove_result() - Remove a result from the head of a result set.
+ * @set:	The result set.
+ *
+ * This function will block until there is a result to read. The wait is
+ * interruptible, so this function will return with an ERR_PTR if interrupted.
+ *
+ * Return: result or ERR_PTR if interrupted
+ */
+struct kutf_result *kutf_remove_result(
+		struct kutf_result_set *set);
+
+/**
+ * kutf_destroy_result_set() - Free a previously created result set.
+ *
+ * @results:	The result set whose resources to free.
+ */
+void kutf_destroy_result_set(struct kutf_result_set *results);
+
+/**
+ * kutf_set_waiting_for_input() - The test is waiting for userdata
+ *
+ * @set: The result set to update
+ *
+ * Causes the result set to always have results and return a fake
+ * %KUTF_RESULT_USERDATA_WAIT result.
+ */
+void kutf_set_waiting_for_input(struct kutf_result_set *set);
+
+/**
+ * kutf_clear_waiting_for_input() - The test is no longer waiting for userdata
+ *
+ * @set: The result set to update
+ *
+ * Cancels the effect of kutf_set_waiting_for_input()
+ */
+void kutf_clear_waiting_for_input(struct kutf_result_set *set);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _KERNEL_UTF_RESULTSET_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h
new file mode 100644
index 0000000..8d75f50
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h
@@ -0,0 +1,569 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_SUITE_H_
+#define _KERNEL_UTF_SUITE_H_
+
+/* kutf_suite.h
+ * Functions for management of test suites.
+ *
+ * This collection of data structures, macros, and functions are used to
+ * create Test Suites, Tests within those Test Suites, and Fixture variants
+ * of each test.
+ */
+
+#include <linux/kref.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+
+#include <kutf/kutf_mem.h>
+#include <kutf/kutf_resultset.h>
+
+/* Arbitrary maximum size to prevent user space allocating too much kernel
+ * memory
+ */
+#define KUTF_MAX_LINE_LENGTH (1024u)
+
+/**
+ * Pseudo-flag indicating an absence of any specified test class. Note that
+ * tests should not be annotated with this constant as it is simply a zero
+ * value; tests without a more specific class must be marked with the flag
+ * KUTF_F_TEST_GENERIC.
+ */
+#define KUTF_F_TEST_NONE                ((unsigned int)(0))
+
+/**
+ * Class indicating this test is a smoke test.
+ * A given set of smoke tests should be quick to run, enabling rapid turn-around
+ * of "regress-on-commit" test runs.
+ */
+#define KUTF_F_TEST_SMOKETEST           ((unsigned int)(1 << 1))
+
+/**
+ * Class indicating this test is a performance test.
+ * These tests typically produce a performance metric, such as "time to run" or
+ * "frames per second",
+ */
+#define KUTF_F_TEST_PERFORMANCE         ((unsigned int)(1 << 2))
+
+/**
+ * Class indicating that this test is a deprecated test.
+ * These tests have typically been replaced by an alternative test which is
+ * more efficient, or has better coverage.
+ */
+#define KUTF_F_TEST_DEPRECATED          ((unsigned int)(1 << 3))
+
+/**
+ * Class indicating that this test is a known failure.
+ * These tests have typically been run and failed, but marking them as a known
+ * failure means it is easier to triage results.
+ *
+ * It is typically more convenient to triage known failures using the
+ * results database and web UI, as this means there is no need to modify the
+ * test code.
+ */
+#define KUTF_F_TEST_EXPECTED_FAILURE    ((unsigned int)(1 << 4))
+
+/**
+ * Class indicating that this test is a generic test, which is not a member of
+ * a more specific test class. Tests which are not created with a specific set
+ * of filter flags by the user are assigned this test class by default.
+ */
+#define KUTF_F_TEST_GENERIC             ((unsigned int)(1 << 5))
+
+/**
+ * Class indicating this test is a resource allocation failure test.
+ * A resource allocation failure test will test that an error code is
+ * correctly propagated when an allocation fails.
+ */
+#define KUTF_F_TEST_RESFAIL             ((unsigned int)(1 << 6))
+
+/**
+ * Additional flag indicating that this test is an expected failure when
+ * run in resource failure mode. These tests are never run when running
+ * the low resource mode.
+ */
+#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7))
+
+/**
+ * Flag reserved for user-defined filter zero.
+ */
+#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24))
+
+/**
+ * Flag reserved for user-defined filter one.
+ */
+#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25))
+
+/**
+ * Flag reserved for user-defined filter two.
+ */
+#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26))
+
+/**
+ * Flag reserved for user-defined filter three.
+ */
+#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27))
+
+/**
+ * Flag reserved for user-defined filter four.
+ */
+#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28))
+
+/**
+ * Flag reserved for user-defined filter five.
+ */
+#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29))
+
+/**
+ * Flag reserved for user-defined filter six.
+ */
+#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30))
+
+/**
+ * Flag reserved for user-defined filter seven.
+ */
+#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31))
+
+/**
+ * Pseudo-flag indicating that all test classes should be executed.
+ */
+#define KUTF_F_TEST_ALL                 ((unsigned int)(0xFFFFFFFFU))
+
+/**
+ * union kutf_callback_data - Union used to store test callback data
+ * @ptr_value:		pointer to the location where test callback data
+ *                      are stored
+ * @u32_value:		a number which represents test callback data
+ */
+union kutf_callback_data {
+	void *ptr_value;
+	u32  u32_value;
+};
+
+/**
+ * struct kutf_userdata_line - A line of user data to be returned to the user
+ * @node:   struct list_head to link this into a list
+ * @str:    The line of user data to return to user space
+ * @size:   The number of bytes within @str
+ */
+struct kutf_userdata_line {
+	struct list_head node;
+	char *str;
+	size_t size;
+};
+
+/**
+ * KUTF_USERDATA_WARNING_OUTPUT - Flag specifying that a warning has been output
+ *
+ * If user space reads the "run" file while the test is waiting for user data,
+ * then the framework will output a warning message and set this flag within
+ * struct kutf_userdata. A subsequent read will then simply return an end of
+ * file condition rather than outputting the warning again. The upshot of this
+ * is that simply running 'cat' on a test which requires user data will produce
+ * the warning followed by 'cat' exiting due to EOF - which is much more user
+ * friendly than blocking indefinitely waiting for user data.
+ */
+#define KUTF_USERDATA_WARNING_OUTPUT  1
+
+/**
+ * struct kutf_userdata - Structure holding user data
+ * @flags:       See %KUTF_USERDATA_WARNING_OUTPUT
+ * @input_head:  List of struct kutf_userdata_line containing user data
+ *               to be read by the kernel space test.
+ * @input_waitq: Wait queue signalled when there is new user data to be
+ *               read by the kernel space test.
+ */
+struct kutf_userdata {
+	unsigned long flags;
+	struct list_head input_head;
+	wait_queue_head_t input_waitq;
+};
+
+/**
+ * struct kutf_context - Structure representing a kernel test context
+ * @kref:		Refcount for number of users of this context
+ * @suite:		Convenience pointer to the suite this context
+ *                      is running
+ * @test_fix:		The fixture that is being run in this context
+ * @fixture_pool:	The memory pool used for the duration of
+ *                      the fixture/text context.
+ * @fixture:		The user provided fixture structure.
+ * @fixture_index:	The index (id) of the current fixture.
+ * @fixture_name:	The name of the current fixture (or NULL if unnamed).
+ * @test_data:		Any user private data associated with this test
+ * @result_set:		All the results logged by this test context
+ * @status:		The status of the currently running fixture.
+ * @expected_status:	The expected status on exist of the currently
+ *                      running fixture.
+ * @work:		Work item to enqueue onto the work queue to run the test
+ * @userdata:		Structure containing the user data for the test to read
+ */
+struct kutf_context {
+	struct kref                     kref;
+	struct kutf_suite               *suite;
+	struct kutf_test_fixture        *test_fix;
+	struct kutf_mempool             fixture_pool;
+	void                            *fixture;
+	unsigned int                    fixture_index;
+	const char                      *fixture_name;
+	union kutf_callback_data        test_data;
+	struct kutf_result_set          *result_set;
+	enum kutf_result_status         status;
+	enum kutf_result_status         expected_status;
+
+	struct work_struct              work;
+	struct kutf_userdata            userdata;
+};
+
+/**
+ * struct kutf_suite - Structure representing a kernel test suite
+ * @app:			The application this suite belongs to.
+ * @name:			The name of this suite.
+ * @suite_data:			Any user private data associated with this
+ *                              suite.
+ * @create_fixture:		Function used to create a new fixture instance
+ * @remove_fixture:		Function used to destroy a new fixture instance
+ * @fixture_variants:		The number of variants (must be at least 1).
+ * @suite_default_flags:	Suite global filter flags which are set on
+ *                              all tests.
+ * @node:			List node for suite_list
+ * @dir:			The debugfs directory for this suite
+ * @test_list:			List head to store all the tests which are
+ *                              part of this suite
+ */
+struct kutf_suite {
+	struct kutf_application        *app;
+	const char                     *name;
+	union kutf_callback_data       suite_data;
+	void *(*create_fixture)(struct kutf_context *context);
+	void  (*remove_fixture)(struct kutf_context *context);
+	unsigned int                   fixture_variants;
+	unsigned int                   suite_default_flags;
+	struct list_head               node;
+	struct dentry                  *dir;
+	struct list_head               test_list;
+};
+
+/* ============================================================================
+	Application functions
+============================================================================ */
+
+/**
+ * kutf_create_application() - Create an in kernel test application.
+ * @name:	The name of the test application.
+ *
+ * Return: pointer to the kutf_application  on success or NULL
+ * on failure
+ */
+struct kutf_application *kutf_create_application(const char *name);
+
+/**
+ * kutf_destroy_application() - Destroy an in kernel test application.
+ *
+ * @app:	The test application to destroy.
+ */
+void kutf_destroy_application(struct kutf_application *app);
+
+/* ============================================================================
+	Suite functions
+============================================================================ */
+
+/**
+ * kutf_create_suite() - Create a kernel test suite.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *                      functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *                      is stored in the fixture pointer in the context for
+ *                      use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ *
+ * Suite names must be unique. Should two suites with the same name be
+ * registered with the same application then this function will fail, if they
+ * are registered with different applications then the function will not detect
+ * this and the call will succeed.
+ *
+ * Return: pointer to the created kutf_suite on success or NULL
+ * on failure
+ */
+struct kutf_suite *kutf_create_suite(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context));
+
+/**
+ * kutf_create_suite_with_filters() - Create a kernel test suite with user
+ *                                    defined default filters.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *                      functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *			is stored in the fixture pointer in the context for
+ *			use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ * @filters:		Filters to apply to a test if it doesn't provide its own
+ *
+ * Suite names must be unique. Should two suites with the same name be
+ * registered with the same application then this function will fail, if they
+ * are registered with different applications then the function will not detect
+ * this and the call will succeed.
+ *
+ * Return: pointer to the created kutf_suite on success or NULL on failure
+ */
+struct kutf_suite *kutf_create_suite_with_filters(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters);
+
+/**
+ * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with
+ *                                             user defined default filters.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *			functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *			is stored in the fixture pointer in the context for
+ *			use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ * @filters:		Filters to apply to a test if it doesn't provide its own
+ * @suite_data:		Suite specific callback data, provided during the
+ *			running of the test in the kutf_context
+ *
+ * Return: pointer to the created kutf_suite on success or NULL
+ * on failure
+ */
+struct kutf_suite *kutf_create_suite_with_filters_and_data(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data suite_data);
+
+/**
+ * kutf_add_test() - Add a test to a kernel test suite.
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ *
+ * Note: As no filters are provided the test will use the suite filters instead
+ */
+void kutf_add_test(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context));
+
+/**
+ * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ * @filters:	A set of filtering flags, assigning test categories.
+ */
+void kutf_add_test_with_filters(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters);
+
+/**
+ * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite
+ *					   with filters.
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ * @filters:	A set of filtering flags, assigning test categories.
+ * @test_data:	Test specific callback data, provided during the
+ *		running of the test in the kutf_context
+ */
+void kutf_add_test_with_filters_and_data(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data test_data);
+
+
+/* ============================================================================
+	Test functions
+============================================================================ */
+/**
+ * kutf_test_log_result_external() - Log a result which has been created
+ *                                   externally into a in a standard form
+ *                                   recognized by the log parser.
+ * @context:	The test context the test is running in
+ * @message:	The message for this result
+ * @new_status:	The result status of this log message
+ */
+void kutf_test_log_result_external(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status);
+
+/**
+ * kutf_test_expect_abort() - Tell the kernel that you expect the current
+ *                            fixture to produce an abort.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_abort(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_fatal() - Tell the kernel that you expect the current
+ *                            fixture to produce a fatal error.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_fatal(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_fail() - Tell the kernel that you expect the current
+ *                           fixture to fail.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_fail(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_warn() - Tell the kernel that you expect the current
+ *                           fixture to produce a warning.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_warn(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_pass() - Tell the kernel that you expect the current
+ *                           fixture to pass.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_pass(struct kutf_context *context);
+
+/**
+ * kutf_test_skip() - Tell the kernel that the test should be skipped.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_skip(struct kutf_context *context);
+
+/**
+ * kutf_test_skip_msg() - Tell the kernel that this test has been skipped,
+ *                        supplying a reason string.
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the reason for the skip.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a prebaked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_skip_msg(struct kutf_context *context, const char *message);
+
+/**
+ * kutf_test_pass() - Tell the kernel that this test has passed.
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the reason for the pass.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_pass(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_debug() - Send a debug message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the debug information.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_debug(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_info() - Send an information message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the information message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_info(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_warn() - Send a warning message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the warning message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_warn(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_fail() - Tell the kernel that a test has failed
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the failure message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_fail(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the fatal error message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_fatal(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test
+ *
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_abort(struct kutf_context *context);
+
+#endif	/* _KERNEL_UTF_SUITE_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h
new file mode 100644
index 0000000..25b8285
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_UTILS_H_
+#define _KERNEL_UTF_UTILS_H_
+
+/* kutf_utils.h
+ * Utilities for the kernel UTF test infrastructure.
+ *
+ * This collection of library functions are provided for use by kernel UTF
+ * and users of kernel UTF which don't directly fit within the other
+ * code modules.
+ */
+
+#include <kutf/kutf_mem.h>
+
+/**
+ * Maximum size of the message strings within kernel UTF, messages longer then
+ * this will be truncated.
+ */
+#define KUTF_MAX_DSPRINTF_LEN	1024
+
+/**
+ * kutf_dsprintf() - dynamic sprintf
+ * @pool:	memory pool to allocate from
+ * @fmt:	The format string describing the string to document.
+ * @...		The parameters to feed in to the format string.
+ *
+ * This function implements sprintf which dynamically allocates memory to store
+ * the string. The library will free the memory containing the string when the
+ * result set is cleared or destroyed.
+ *
+ * Note The returned string may be truncated to fit an internal temporary
+ * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length.
+ *
+ * Return: Returns pointer to allocated string, or NULL on error.
+ */
+const char *kutf_dsprintf(struct kutf_mempool *pool,
+		const char *fmt, ...);
+
+#endif	/* _KERNEL_UTF_UTILS_H_ */
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild
new file mode 100644
index 0000000..2531d41
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ccflags-y += -I$(src)/../include
+
+obj-$(CONFIG_MALI_KUTF) += kutf.o
+
+kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o kutf_helpers.o kutf_helpers_user.o
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig
new file mode 100644
index 0000000..0cdb474
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+config MALI_KUTF
+ tristate "Mali Kernel Unit Test Framework"
+ default m
+ help
+   Enables MALI testing framework. To compile it as a module,
+   choose M here - this will generate a single module called kutf.
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile
new file mode 100644
index 0000000..d848e87
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile
@@ -0,0 +1,35 @@
+#
+# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS=-I$(CURDIR)/../include modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp
new file mode 100755
index 0000000..f6d4c3f
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp
@@ -0,0 +1,31 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "kutf",
+    defaults: ["kernel_defaults"],
+    srcs: [
+        "Kbuild",
+        "kutf_helpers.c",
+        "kutf_helpers_user.c",
+        "kutf_mem.c",
+        "kutf_resultset.c",
+        "kutf_suite.c",
+        "kutf_utils.c",
+    ],
+    kbuild_options: ["CONFIG_MALI_KUTF=m"],
+    include_dirs: ["kernel/drivers/gpu/arm/midgard/tests/include"],
+    enabled: false,
+    unit_test_kernel_modules: {
+        enabled: true,
+    },
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c
new file mode 100644
index 0000000..cab5add
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF test helpers */
+#include <kutf/kutf_helpers.h>
+
+#include <linux/err.h>
+#include <linux/jiffies.h>
+#include <linux/sched.h>
+#include <linux/preempt.h>
+#include <linux/wait.h>
+#include <linux/uaccess.h>
+
+static DEFINE_SPINLOCK(kutf_input_lock);
+
+static bool pending_input(struct kutf_context *context)
+{
+	bool input_pending;
+
+	spin_lock(&kutf_input_lock);
+
+	input_pending = !list_empty(&context->userdata.input_head);
+
+	spin_unlock(&kutf_input_lock);
+
+	return input_pending;
+}
+
+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size)
+{
+	struct kutf_userdata_line *line;
+
+	spin_lock(&kutf_input_lock);
+
+	while (list_empty(&context->userdata.input_head)) {
+		int err;
+
+		kutf_set_waiting_for_input(context->result_set);
+
+		spin_unlock(&kutf_input_lock);
+
+		err = wait_event_interruptible(context->userdata.input_waitq,
+				pending_input(context));
+
+		if (err)
+			return ERR_PTR(-EINTR);
+
+		spin_lock(&kutf_input_lock);
+	}
+
+	line = list_first_entry(&context->userdata.input_head,
+			struct kutf_userdata_line, node);
+	if (line->str) {
+		/*
+		 * Unless it is the end-of-input marker,
+		 * remove it from the list
+		 */
+		list_del(&line->node);
+	}
+
+	spin_unlock(&kutf_input_lock);
+
+	if (str_size)
+		*str_size = line->size;
+	return line->str;
+}
+
+int kutf_helper_input_enqueue(struct kutf_context *context,
+		const char __user *str, size_t size)
+{
+	struct kutf_userdata_line *line;
+
+	line = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(*line) + size + 1);
+	if (!line)
+		return -ENOMEM;
+	if (str) {
+		unsigned long bytes_not_copied;
+
+		line->size = size;
+		line->str = (void *)(line + 1);
+		bytes_not_copied = copy_from_user(line->str, str, size);
+		if (bytes_not_copied != 0)
+			return -EFAULT;
+		/* Zero terminate the string */
+		line->str[size] = '\0';
+	} else {
+		/* This is used to mark the end of input */
+		WARN_ON(size);
+		line->size = 0;
+		line->str = NULL;
+	}
+
+	spin_lock(&kutf_input_lock);
+
+	list_add_tail(&line->node, &context->userdata.input_head);
+
+	kutf_clear_waiting_for_input(context->result_set);
+
+	spin_unlock(&kutf_input_lock);
+
+	wake_up(&context->userdata.input_waitq);
+
+	return 0;
+}
+
+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context)
+{
+	kutf_helper_input_enqueue(context, NULL, 0);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c
new file mode 100644
index 0000000..108fa82
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c
@@ -0,0 +1,468 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF test helpers that mirror those for kutf-userside */
+#include <kutf/kutf_helpers_user.h>
+#include <kutf/kutf_helpers.h>
+#include <kutf/kutf_utils.h>
+
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+const char *valtype_names[] = {
+	"INVALID",
+	"U64",
+	"STR",
+};
+
+static const char *get_val_type_name(enum kutf_helper_valtype valtype)
+{
+	/* enums can be signed or unsigned (implementation dependant), so
+	 * enforce it to prevent:
+	 * a) "<0 comparison on unsigned type" warning - if we did both upper
+	 *    and lower bound check
+	 * b) incorrect range checking if it was a signed type - if we did
+	 *    upper bound check only */
+	unsigned int type_idx = (unsigned int)valtype;
+
+	if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT)
+		type_idx = (unsigned int)KUTF_HELPER_VALTYPE_INVALID;
+
+	return valtype_names[type_idx];
+}
+
+/* Check up to str_len chars of val_str to see if it's a valid value name:
+ *
+ * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator
+ * - And, each char is in the character set [A-Z0-9_] */
+static int validate_val_name(const char *val_str, int str_len)
+{
+	int i = 0;
+
+	for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) {
+		char val_chr = val_str[i];
+
+		if (val_chr >= 'A' && val_chr <= 'Z')
+			continue;
+		if (val_chr >= '0' && val_chr <= '9')
+			continue;
+		if (val_chr == '_')
+			continue;
+
+		/* Character not in the set [A-Z0-9_] - report error */
+		return 1;
+	}
+
+	/* Names of 0 length are not valid */
+	if (i == 0)
+		return 1;
+	/* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */
+	if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'))
+		return 1;
+
+	return 0;
+}
+
+/* Find the length of the valid part of the string when it will be in quotes
+ * e.g. "str"
+ *
+ * That is, before any '\\', '\n' or '"' characters. This is so we don't have
+ * to escape the string */
+static int find_quoted_string_valid_len(const char *str)
+{
+	char *ptr;
+	const char *check_chars = "\\\n\"";
+
+	ptr = strpbrk(str, check_chars);
+	if (ptr)
+		return (int)(ptr-str);
+
+	return (int)strlen(str);
+}
+
+static int kutf_helper_userdata_enqueue(struct kutf_context *context,
+		const char *str)
+{
+	char *str_copy;
+	size_t len;
+	int err;
+
+	len = strlen(str)+1;
+
+	str_copy = kutf_mempool_alloc(&context->fixture_pool, len);
+	if (!str_copy)
+		return -ENOMEM;
+
+	strcpy(str_copy, str);
+
+	err = kutf_add_result(context, KUTF_RESULT_USERDATA, str_copy);
+
+	return err;
+}
+
+#define MAX_U64_HEX_LEN 16
+/* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */
+#define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1)
+
+int kutf_helper_send_named_u64(struct kutf_context *context,
+		const char *val_name, u64 val)
+{
+	int ret = 1;
+	char msgbuf[NAMED_U64_VAL_BUF_SZ];
+	const char *errmsg = NULL;
+
+	if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': Invalid value name", val_name);
+		goto out_err;
+	}
+
+	ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val);
+	if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d",
+				val_name, NAMED_U64_VAL_BUF_SZ, ret);
+		goto out_err;
+	}
+
+	ret = kutf_helper_userdata_enqueue(context, msgbuf);
+	if (ret) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': send returned %d",
+				val_name, ret);
+		goto out_err;
+	}
+
+	return ret;
+out_err:
+	kutf_test_fail(context, errmsg);
+	return ret;
+}
+EXPORT_SYMBOL(kutf_helper_send_named_u64);
+
+#define NAMED_VALUE_SEP "="
+#define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\""
+#define NAMED_STR_END_DELIM "\""
+
+int kutf_helper_max_str_len_for_kern(const char *val_name,
+		int kern_buf_sz)
+{
+	const int val_name_len = strlen(val_name);
+	const int start_delim_len = strlen(NAMED_STR_START_DELIM);
+	const int end_delim_len = strlen(NAMED_STR_END_DELIM);
+	int max_msg_len = kern_buf_sz;
+	int max_str_len;
+
+	max_str_len = max_msg_len - val_name_len - start_delim_len -
+		end_delim_len;
+
+	return max_str_len;
+}
+EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern);
+
+int kutf_helper_send_named_str(struct kutf_context *context,
+		const char *val_name,
+		const char *val_str)
+{
+	int val_str_len;
+	int str_buf_sz;
+	char *str_buf = NULL;
+	int ret = 1;
+	char *copy_ptr;
+	int val_name_len;
+	int start_delim_len = strlen(NAMED_STR_START_DELIM);
+	int end_delim_len = strlen(NAMED_STR_END_DELIM);
+	const char *errmsg = NULL;
+
+	if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': Invalid value name", val_name);
+		goto out_err;
+	}
+	val_name_len = strlen(val_name);
+
+	val_str_len = find_quoted_string_valid_len(val_str);
+
+	/* (name length) + ("=\"" length) + (val_str len) + ("\"" length) + terminator */
+	str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1;
+
+	/* Using kmalloc() here instead of mempool since we know we need to free
+	 * before we return */
+	str_buf = kmalloc(str_buf_sz, GFP_KERNEL);
+	if (!str_buf) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d",
+				val_name, str_buf_sz);
+		goto out_err;
+	}
+	copy_ptr = str_buf;
+
+	/* Manually copy each string component instead of snprintf because
+	 * val_str may need to end early, and less error path handling */
+
+	/* name */
+	memcpy(copy_ptr, val_name, val_name_len);
+	copy_ptr += val_name_len;
+
+	/* str start delimiter */
+	memcpy(copy_ptr, NAMED_STR_START_DELIM, start_delim_len);
+	copy_ptr += start_delim_len;
+
+	/* str value */
+	memcpy(copy_ptr, val_str, val_str_len);
+	copy_ptr += val_str_len;
+
+	/* str end delimiter */
+	memcpy(copy_ptr, NAMED_STR_END_DELIM, end_delim_len);
+	copy_ptr += end_delim_len;
+
+	/* Terminator */
+	*copy_ptr = '\0';
+
+	ret = kutf_helper_userdata_enqueue(context, str_buf);
+
+	if (ret) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send str value named '%s': send returned %d",
+				val_name, ret);
+		goto out_err;
+	}
+
+	kfree(str_buf);
+	return ret;
+
+out_err:
+	kutf_test_fail(context, errmsg);
+	kfree(str_buf);
+	return ret;
+}
+EXPORT_SYMBOL(kutf_helper_send_named_str);
+
+int kutf_helper_receive_named_val(
+		struct kutf_context *context,
+		struct kutf_helper_named_val *named_val)
+{
+	size_t recv_sz;
+	char *recv_str;
+	char *search_ptr;
+	char *name_str = NULL;
+	int name_len;
+	int strval_len;
+	enum kutf_helper_valtype type = KUTF_HELPER_VALTYPE_INVALID;
+	char *strval = NULL;
+	u64 u64val = 0;
+	int err = KUTF_HELPER_ERR_INVALID_VALUE;
+
+	recv_str = kutf_helper_input_dequeue(context, &recv_sz);
+	if (!recv_str)
+		return -EBUSY;
+	else if (IS_ERR(recv_str))
+		return PTR_ERR(recv_str);
+
+	/* Find the '=', grab the name and validate it */
+	search_ptr = strnchr(recv_str, recv_sz, NAMED_VALUE_SEP[0]);
+	if (search_ptr) {
+		name_len = search_ptr - recv_str;
+		if (!validate_val_name(recv_str, name_len)) {
+			/* no need to reallocate - just modify string in place */
+			name_str = recv_str;
+			name_str[name_len] = '\0';
+
+			/* Move until after the '=' */
+			recv_str += (name_len + 1);
+			recv_sz -= (name_len + 1);
+		}
+	}
+	if (!name_str) {
+		pr_err("Invalid name part for received string '%s'\n",
+				recv_str);
+		return KUTF_HELPER_ERR_INVALID_NAME;
+	}
+
+	/* detect value type */
+	if (*recv_str == NAMED_STR_START_DELIM[1]) {
+		/* string delimiter start*/
+		++recv_str;
+		--recv_sz;
+
+		/* Find end of string */
+		search_ptr = strnchr(recv_str, recv_sz, NAMED_STR_END_DELIM[0]);
+		if (search_ptr) {
+			strval_len = search_ptr - recv_str;
+			/* Validate the string to ensure it contains no quotes */
+			if (strval_len == find_quoted_string_valid_len(recv_str)) {
+				/* no need to reallocate - just modify string in place */
+				strval = recv_str;
+				strval[strval_len] = '\0';
+
+				/* Move until after the end delimiter */
+				recv_str += (strval_len + 1);
+				recv_sz -= (strval_len + 1);
+				type = KUTF_HELPER_VALTYPE_STR;
+			} else {
+				pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str);
+				err = KUTF_HELPER_ERR_CHARS_AFTER_VAL;
+			}
+		} else {
+			pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str);
+			err = KUTF_HELPER_ERR_NO_END_DELIMITER;
+		}
+	} else {
+		/* possibly a number value - strtoull will parse it */
+		err = kstrtoull(recv_str, 0, &u64val);
+		/* unlike userspace can't get an end ptr, but if kstrtoull()
+		 * reads characters after the number it'll report -EINVAL */
+		if (!err) {
+			int len_remain = strnlen(recv_str, recv_sz);
+
+			type = KUTF_HELPER_VALTYPE_U64;
+			recv_str += len_remain;
+			recv_sz -= len_remain;
+		} else {
+			/* special case: not a number, report as such */
+			pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str);
+		}
+	}
+
+	if (type == KUTF_HELPER_VALTYPE_INVALID)
+		return err;
+
+	/* Any remaining characters - error */
+	if (strnlen(recv_str, recv_sz) != 0) {
+		pr_err("Characters remain after value of type %s: '%s'\n",
+				get_val_type_name(type), recv_str);
+		return KUTF_HELPER_ERR_CHARS_AFTER_VAL;
+	}
+
+	/* Success - write into the output structure */
+	switch (type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		named_val->u.val_u64 = u64val;
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		named_val->u.val_str = strval;
+		break;
+	default:
+		pr_err("Unreachable, fix kutf_helper_receive_named_val\n");
+		/* Coding error, report as though 'run' file failed */
+		return -EINVAL;
+	}
+
+	named_val->val_name = name_str;
+	named_val->type = type;
+
+	return KUTF_HELPER_ERR_NONE;
+}
+EXPORT_SYMBOL(kutf_helper_receive_named_val);
+
+#define DUMMY_MSG "<placeholder due to test fail>"
+int kutf_helper_receive_check_val(
+		struct kutf_helper_named_val *named_val,
+		struct kutf_context *context,
+		const char *expect_val_name,
+		enum kutf_helper_valtype expect_val_type)
+{
+	int err;
+
+	err = kutf_helper_receive_named_val(context, named_val);
+	if (err < 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to receive value named '%s'",
+				expect_val_name);
+		kutf_test_fail(context, msg);
+		return err;
+	} else if (err > 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Named-value parse error when expecting value named '%s'",
+				expect_val_name);
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+	if (strcmp(named_val->val_name, expect_val_name) != 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Expecting to receive value named '%s' but got '%s'",
+				expect_val_name, named_val->val_name);
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+
+	if (named_val->type != expect_val_type) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Expecting value named '%s' to be of type %s but got %s",
+				expect_val_name, get_val_type_name(expect_val_type),
+				get_val_type_name(named_val->type));
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+	return err;
+
+out_fail_and_fixup:
+	/* Produce a valid but incorrect value */
+	switch (expect_val_type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		named_val->u.val_u64 = 0ull;
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		{
+			char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG));
+
+			if (!str)
+				return -1;
+
+			strcpy(str, DUMMY_MSG);
+			named_val->u.val_str = str;
+			break;
+		}
+	default:
+		break;
+	}
+
+	/* Indicate that this is invalid */
+	named_val->type = KUTF_HELPER_VALTYPE_INVALID;
+
+	/* But at least allow the caller to continue in the test with failures */
+	return 0;
+}
+EXPORT_SYMBOL(kutf_helper_receive_check_val);
+
+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val)
+{
+	switch (named_val->type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		pr_warn("%s=0x%llx\n", named_val->val_name, named_val->u.val_u64);
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		pr_warn("%s=\"%s\"\n", named_val->val_name, named_val->u.val_str);
+		break;
+	case KUTF_HELPER_VALTYPE_INVALID:
+		pr_warn("%s is invalid\n", named_val->val_name);
+		break;
+	default:
+		pr_warn("%s has unknown type %d\n", named_val->val_name, named_val->type);
+		break;
+	}
+}
+EXPORT_SYMBOL(kutf_helper_output_named_val);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c
new file mode 100644
index 0000000..fd98bea
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c
@@ -0,0 +1,108 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF memory management functions */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <kutf/kutf_mem.h>
+
+
+/**
+ * struct kutf_alloc_entry - Structure representing an allocation.
+ * @node:	List node for use with kutf_mempool.
+ * @data:	Data area of the allocation
+ */
+struct kutf_alloc_entry {
+	struct list_head node;
+	u8 data[0];
+};
+
+int kutf_mempool_init(struct kutf_mempool *pool)
+{
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		return -1;
+	}
+
+	INIT_LIST_HEAD(&pool->head);
+	mutex_init(&pool->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(kutf_mempool_init);
+
+void kutf_mempool_destroy(struct kutf_mempool *pool)
+{
+	struct list_head *remove;
+	struct list_head *tmp;
+
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		return;
+	}
+
+	mutex_lock(&pool->lock);
+	list_for_each_safe(remove, tmp, &pool->head) {
+		struct kutf_alloc_entry *remove_alloc;
+
+		remove_alloc = list_entry(remove, struct kutf_alloc_entry, node);
+		list_del(&remove_alloc->node);
+		kfree(remove_alloc);
+	}
+	mutex_unlock(&pool->lock);
+
+}
+EXPORT_SYMBOL(kutf_mempool_destroy);
+
+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size)
+{
+	struct kutf_alloc_entry *ret;
+
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		goto fail_pool;
+	}
+
+	mutex_lock(&pool->lock);
+
+	ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL);
+	if (!ret) {
+		pr_err("Failed to allocate memory\n");
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&ret->node);
+	list_add(&ret->node, &pool->head);
+
+	mutex_unlock(&pool->lock);
+
+	return &ret->data[0];
+
+fail_alloc:
+	mutex_unlock(&pool->lock);
+fail_pool:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_mempool_alloc);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c
new file mode 100644
index 0000000..94ecfa4
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF result management functions */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_resultset.h>
+
+/* Lock to protect all result structures */
+static DEFINE_SPINLOCK(kutf_result_lock);
+
+struct kutf_result_set *kutf_create_result_set(void)
+{
+	struct kutf_result_set *set;
+
+	set = kmalloc(sizeof(*set), GFP_KERNEL);
+	if (!set) {
+		pr_err("Failed to allocate resultset");
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&set->results);
+	init_waitqueue_head(&set->waitq);
+	set->flags = 0;
+
+	return set;
+
+fail_alloc:
+	return NULL;
+}
+
+int kutf_add_result(struct kutf_context *context,
+		enum kutf_result_status status,
+		const char *message)
+{
+	struct kutf_mempool *mempool = &context->fixture_pool;
+	struct kutf_result_set *set = context->result_set;
+	/* Create the new result */
+	struct kutf_result *new_result;
+
+	BUG_ON(set == NULL);
+
+	new_result = kutf_mempool_alloc(mempool, sizeof(*new_result));
+	if (!new_result) {
+		pr_err("Result allocation failed\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&new_result->node);
+	new_result->status = status;
+	new_result->message = message;
+
+	spin_lock(&kutf_result_lock);
+
+	list_add_tail(&new_result->node, &set->results);
+
+	spin_unlock(&kutf_result_lock);
+
+	wake_up(&set->waitq);
+
+	return 0;
+}
+
+void kutf_destroy_result_set(struct kutf_result_set *set)
+{
+	if (!list_empty(&set->results))
+		pr_err("kutf_destroy_result_set: Unread results from test\n");
+
+	kfree(set);
+}
+
+static bool kutf_has_result(struct kutf_result_set *set)
+{
+	bool has_result;
+
+	spin_lock(&kutf_result_lock);
+	if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT)
+		/* Pretend there are results if waiting for input */
+		has_result = true;
+	else
+		has_result = !list_empty(&set->results);
+	spin_unlock(&kutf_result_lock);
+
+	return has_result;
+}
+
+struct kutf_result *kutf_remove_result(struct kutf_result_set *set)
+{
+	struct kutf_result *result = NULL;
+	int ret;
+
+	do {
+		ret = wait_event_interruptible(set->waitq,
+				kutf_has_result(set));
+
+		if (ret)
+			return ERR_PTR(ret);
+
+		spin_lock(&kutf_result_lock);
+
+		if (!list_empty(&set->results)) {
+			result = list_first_entry(&set->results,
+					struct kutf_result,
+					node);
+			list_del(&result->node);
+		} else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) {
+			/* Return a fake result */
+			static struct kutf_result waiting = {
+				.status = KUTF_RESULT_USERDATA_WAIT
+			};
+			result = &waiting;
+		}
+		/* If result == NULL then there was a race with the event
+		 * being removed between the check in kutf_has_result and
+		 * the lock being obtained. In this case we retry
+		 */
+
+		spin_unlock(&kutf_result_lock);
+	} while (result == NULL);
+
+	return result;
+}
+
+void kutf_set_waiting_for_input(struct kutf_result_set *set)
+{
+	spin_lock(&kutf_result_lock);
+	set->flags |= KUTF_RESULT_SET_WAITING_FOR_INPUT;
+	spin_unlock(&kutf_result_lock);
+
+	wake_up(&set->waitq);
+}
+
+void kutf_clear_waiting_for_input(struct kutf_result_set *set)
+{
+	spin_lock(&kutf_result_lock);
+	set->flags &= ~KUTF_RESULT_SET_WAITING_FOR_INPUT;
+	spin_unlock(&kutf_result_lock);
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
new file mode 100644
index 0000000..1c350bb
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
@@ -0,0 +1,1205 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF suite, test and fixture management including user to kernel
+ * interaction */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+
+#include <generated/autoconf.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_resultset.h>
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_helpers.h>
+
+#if defined(CONFIG_DEBUG_FS)
+
+/**
+ * struct kutf_application - Structure which represents kutf application
+ * @name:	The name of this test application.
+ * @dir:	The debugfs directory for this test
+ * @suite_list:	List head to store all the suites which are part of this
+ *              application
+ */
+struct kutf_application {
+	const char         *name;
+	struct dentry      *dir;
+	struct list_head   suite_list;
+};
+
+/**
+ * struct kutf_test_function - Structure which represents kutf test function
+ * @suite:		Back reference to the suite this test function
+ *                      belongs to
+ * @filters:		Filters that apply to this test function
+ * @test_id:		Test ID
+ * @execute:		Function to run for this test
+ * @test_data:		Static data for this test
+ * @node:		List node for test_list
+ * @variant_list:	List head to store all the variants which can run on
+ *                      this function
+ * @dir:		debugfs directory for this test function
+ */
+struct kutf_test_function {
+	struct kutf_suite  *suite;
+	unsigned int       filters;
+	unsigned int       test_id;
+	void (*execute)(struct kutf_context *context);
+	union kutf_callback_data test_data;
+	struct list_head   node;
+	struct list_head   variant_list;
+	struct dentry      *dir;
+};
+
+/**
+ * struct kutf_test_fixture - Structure which holds information on the kutf
+ *                            test fixture
+ * @test_func:		Test function this fixture belongs to
+ * @fixture_index:	Index of this fixture
+ * @node:		List node for variant_list
+ * @dir:		debugfs directory for this test fixture
+ */
+struct kutf_test_fixture {
+	struct kutf_test_function *test_func;
+	unsigned int              fixture_index;
+	struct list_head          node;
+	struct dentry             *dir;
+};
+
+static struct dentry *base_dir;
+static struct workqueue_struct *kutf_workq;
+
+/**
+ * struct kutf_convert_table - Structure which keeps test results
+ * @result_name:	Status of the test result
+ * @result:		Status value for a single test
+ */
+struct kutf_convert_table {
+	char                    result_name[50];
+	enum kutf_result_status result;
+};
+
+struct kutf_convert_table kutf_convert[] = {
+#define ADD_UTF_RESULT(_name) \
+{ \
+	#_name, \
+	_name, \
+},
+ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK)
+ADD_UTF_RESULT(KUTF_RESULT_SKIP)
+ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN)
+ADD_UTF_RESULT(KUTF_RESULT_PASS)
+ADD_UTF_RESULT(KUTF_RESULT_DEBUG)
+ADD_UTF_RESULT(KUTF_RESULT_INFO)
+ADD_UTF_RESULT(KUTF_RESULT_WARN)
+ADD_UTF_RESULT(KUTF_RESULT_FAIL)
+ADD_UTF_RESULT(KUTF_RESULT_FATAL)
+ADD_UTF_RESULT(KUTF_RESULT_ABORT)
+};
+
+#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert))
+
+/**
+ * kutf_create_context() - Create a test context in which a specific fixture
+ *                         of an application will be run and its results
+ *                         reported back to the user
+ * @test_fix:	Test fixture to be run.
+ *
+ * The context's refcount will be initialized to 1.
+ *
+ * Return: Returns the created test context on success or NULL on failure
+ */
+static struct kutf_context *kutf_create_context(
+		struct kutf_test_fixture *test_fix);
+
+/**
+ * kutf_destroy_context() - Destroy a previously created test context, only
+ *                          once its refcount has become zero
+ * @kref:	pointer to kref member within the context
+ *
+ * This should only be used via a kref_put() call on the context's kref member
+ */
+static void kutf_destroy_context(struct kref *kref);
+
+/**
+ * kutf_context_get() - increment refcount on a context
+ * @context:	the kutf context
+ *
+ * This must be used when the lifetime of the context might exceed that of the
+ * thread creating @context
+ */
+static void kutf_context_get(struct kutf_context *context);
+
+/**
+ * kutf_context_put() - decrement refcount on a context, destroying it when it
+ *                      reached zero
+ * @context:	the kutf context
+ *
+ * This must be used only after a corresponding kutf_context_get() call on
+ * @context, and the caller no longer needs access to @context.
+ */
+static void kutf_context_put(struct kutf_context *context);
+
+/**
+ * kutf_set_result() - Set the test result against the specified test context
+ * @context:	Test context
+ * @status:	Result status
+ */
+static void kutf_set_result(struct kutf_context *context,
+		enum kutf_result_status status);
+
+/**
+ * kutf_set_expected_result() - Set the expected test result for the specified
+ *                              test context
+ * @context:		Test context
+ * @expected_status:	Expected result status
+ */
+static void kutf_set_expected_result(struct kutf_context *context,
+		enum kutf_result_status expected_status);
+
+/**
+ * kutf_result_to_string() - Converts a KUTF result into a string
+ * @result_str:      Output result string
+ * @result:          Result status to convert
+ *
+ * Return: 1 if test result was successfully converted to string, 0 otherwise
+ */
+static int kutf_result_to_string(char **result_str,
+		enum kutf_result_status result)
+{
+	int i;
+	int ret = 0;
+
+	for (i = 0; i < UTF_CONVERT_SIZE; i++) {
+		if (result == kutf_convert[i].result) {
+			*result_str = kutf_convert[i].result_name;
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+/**
+ * kutf_debugfs_const_string_read() - Simple debugfs read callback which
+ *                                    returns a constant string
+ * @file:	Opened file to read from
+ * @buf:	User buffer to write the data into
+ * @len:	Amount of data to read
+ * @ppos:	Offset into file to read from
+ *
+ * Return: On success, the number of bytes read and offset @ppos advanced by
+ *         this number; on error, negative value
+ */
+static ssize_t kutf_debugfs_const_string_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	char *str = file->private_data;
+
+	return simple_read_from_buffer(buf, len, ppos, str, strlen(str));
+}
+
+static const struct file_operations kutf_debugfs_const_string_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = kutf_debugfs_const_string_read,
+	.llseek  = default_llseek,
+};
+
+/**
+ * kutf_add_explicit_result() - Check if an explicit result needs to be added
+ * @context:	KUTF test context
+ */
+static void kutf_add_explicit_result(struct kutf_context *context)
+{
+	switch (context->expected_status) {
+	case KUTF_RESULT_UNKNOWN:
+		if (context->status == KUTF_RESULT_UNKNOWN)
+			kutf_test_pass(context, "(implicit pass)");
+		break;
+
+	case KUTF_RESULT_WARN:
+		if (context->status == KUTF_RESULT_WARN)
+			kutf_test_pass(context,
+					"Pass (expected warn occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected warn missing)");
+		break;
+
+	case KUTF_RESULT_FAIL:
+		if (context->status == KUTF_RESULT_FAIL)
+			kutf_test_pass(context,
+					"Pass (expected fail occurred)");
+		else if (context->status != KUTF_RESULT_SKIP) {
+			/* Force the expected status so the fail gets logged */
+			context->expected_status = KUTF_RESULT_PASS;
+			kutf_test_fail(context,
+					"Fail (expected fail missing)");
+		}
+		break;
+
+	case KUTF_RESULT_FATAL:
+		if (context->status == KUTF_RESULT_FATAL)
+			kutf_test_pass(context,
+					"Pass (expected fatal occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected fatal missing)");
+		break;
+
+	case KUTF_RESULT_ABORT:
+		if (context->status == KUTF_RESULT_ABORT)
+			kutf_test_pass(context,
+					"Pass (expected abort occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected abort missing)");
+		break;
+	default:
+		break;
+	}
+}
+
+static void kutf_run_test(struct work_struct *data)
+{
+	struct kutf_context *test_context = container_of(data,
+			struct kutf_context, work);
+	struct kutf_suite *suite = test_context->suite;
+	struct kutf_test_function *test_func;
+
+	test_func = test_context->test_fix->test_func;
+
+	/*
+	 * Call the create fixture function if required before the
+	 * fixture is run
+	 */
+	if (suite->create_fixture)
+		test_context->fixture = suite->create_fixture(test_context);
+
+	/* Only run the test if the fixture was created (if required) */
+	if ((suite->create_fixture && test_context->fixture) ||
+			(!suite->create_fixture)) {
+		/* Run this fixture */
+		test_func->execute(test_context);
+
+		if (suite->remove_fixture)
+			suite->remove_fixture(test_context);
+
+		kutf_add_explicit_result(test_context);
+	}
+
+	kutf_add_result(test_context, KUTF_RESULT_TEST_FINISHED, NULL);
+
+	kutf_context_put(test_context);
+}
+
+/**
+ * kutf_debugfs_run_open() Debugfs open callback for the "run" entry.
+ * @inode:	inode of the opened file
+ * @file:	Opened file to read from
+ *
+ * This function creates a KUTF context and queues it onto a workqueue to be
+ * run asynchronously. The resulting file descriptor can be used to communicate
+ * userdata to the test and to read back the results of the test execution.
+ *
+ * Return: 0 on success
+ */
+static int kutf_debugfs_run_open(struct inode *inode, struct file *file)
+{
+	struct kutf_test_fixture *test_fix = inode->i_private;
+	struct kutf_context *test_context;
+	int err = 0;
+
+	test_context = kutf_create_context(test_fix);
+	if (!test_context) {
+		err = -ENOMEM;
+		goto finish;
+	}
+
+	file->private_data = test_context;
+
+	/* This reference is release by the kutf_run_test */
+	kutf_context_get(test_context);
+
+	queue_work(kutf_workq, &test_context->work);
+
+finish:
+	return err;
+}
+
+#define USERDATA_WARNING_MESSAGE "WARNING: This test requires userdata\n"
+
+/**
+ * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry.
+ * @file:	Opened file to read from
+ * @buf:	User buffer to write the data into
+ * @len:	Amount of data to read
+ * @ppos:	Offset into file to read from
+ *
+ * This function emits the results of the test, blocking until they are
+ * available.
+ *
+ * If the test involves user data then this will also return user data records
+ * to user space. If the test is waiting for user data then this function will
+ * output a message (to make the likes of 'cat' display it), followed by
+ * returning 0 to mark the end of file.
+ *
+ * Results will be emitted one at a time, once all the results have been read
+ * 0 will be returned to indicate there is no more data.
+ *
+ * Return: Number of bytes read.
+ */
+static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct kutf_context *test_context = file->private_data;
+	struct kutf_result *res;
+	unsigned long bytes_not_copied;
+	ssize_t bytes_copied = 0;
+	char *kutf_str_ptr = NULL;
+	size_t kutf_str_len = 0;
+	size_t message_len = 0;
+	char separator = ':';
+	char terminator = '\n';
+
+	res = kutf_remove_result(test_context->result_set);
+
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	/*
+	 * Handle 'fake' results - these results are converted to another
+	 * form before being returned from the kernel
+	 */
+	switch (res->status) {
+	case KUTF_RESULT_TEST_FINISHED:
+		return 0;
+	case KUTF_RESULT_USERDATA_WAIT:
+		if (test_context->userdata.flags &
+				KUTF_USERDATA_WARNING_OUTPUT) {
+			/*
+			 * Warning message already output,
+			 * signal end-of-file
+			 */
+			return 0;
+		}
+
+		message_len = sizeof(USERDATA_WARNING_MESSAGE)-1;
+		if (message_len > len)
+			message_len = len;
+
+		bytes_not_copied = copy_to_user(buf,
+				USERDATA_WARNING_MESSAGE,
+				message_len);
+		if (bytes_not_copied != 0)
+			return -EFAULT;
+		test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT;
+		return message_len;
+	case KUTF_RESULT_USERDATA:
+		message_len = strlen(res->message);
+		if (message_len > len-1) {
+			message_len = len-1;
+			pr_warn("User data truncated, read not long enough\n");
+		}
+		bytes_not_copied = copy_to_user(buf, res->message,
+				message_len);
+		if (bytes_not_copied != 0) {
+			pr_warn("Failed to copy data to user space buffer\n");
+			return -EFAULT;
+		}
+		/* Finally the terminator */
+		bytes_not_copied = copy_to_user(&buf[message_len],
+				&terminator, 1);
+		if (bytes_not_copied != 0) {
+			pr_warn("Failed to copy data to user space buffer\n");
+			return -EFAULT;
+		}
+		return message_len+1;
+	default:
+		/* Fall through - this is a test result */
+		break;
+	}
+
+	/* Note: This code assumes a result is read completely */
+	kutf_result_to_string(&kutf_str_ptr, res->status);
+	if (kutf_str_ptr)
+		kutf_str_len = strlen(kutf_str_ptr);
+
+	if (res->message)
+		message_len = strlen(res->message);
+
+	if ((kutf_str_len + 1 + message_len + 1) > len) {
+		pr_err("Not enough space in user buffer for a single result");
+		return 0;
+	}
+
+	/* First copy the result string */
+	if (kutf_str_ptr) {
+		bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr,
+						kutf_str_len);
+		bytes_copied += kutf_str_len - bytes_not_copied;
+		if (bytes_not_copied)
+			goto exit;
+	}
+
+	/* Then the separator */
+	bytes_not_copied = copy_to_user(&buf[bytes_copied],
+					&separator, 1);
+	bytes_copied += 1 - bytes_not_copied;
+	if (bytes_not_copied)
+		goto exit;
+
+	/* Finally Next copy the result string */
+	if (res->message) {
+		bytes_not_copied = copy_to_user(&buf[bytes_copied],
+						res->message, message_len);
+		bytes_copied += message_len - bytes_not_copied;
+		if (bytes_not_copied)
+			goto exit;
+	}
+
+	/* Finally the terminator */
+	bytes_not_copied = copy_to_user(&buf[bytes_copied],
+					&terminator, 1);
+	bytes_copied += 1 - bytes_not_copied;
+
+exit:
+	return bytes_copied;
+}
+
+/**
+ * kutf_debugfs_run_write() Debugfs write callback for the "run" entry.
+ * @file:	Opened file to write to
+ * @buf:	User buffer to read the data from
+ * @len:	Amount of data to write
+ * @ppos:	Offset into file to write to
+ *
+ * This function allows user and kernel to exchange extra data necessary for
+ * the test fixture.
+ *
+ * The data is added to the first struct kutf_context running the fixture
+ *
+ * Return: Number of bytes written
+ */
+static ssize_t kutf_debugfs_run_write(struct file *file,
+		const char __user *buf, size_t len, loff_t *ppos)
+{
+	int ret = 0;
+	struct kutf_context *test_context = file->private_data;
+
+	if (len > KUTF_MAX_LINE_LENGTH)
+		return -EINVAL;
+
+	ret = kutf_helper_input_enqueue(test_context, buf, len);
+	if (ret < 0)
+		return ret;
+
+	return len;
+}
+
+/**
+ * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry.
+ * @inode:	File entry representation
+ * @file:	A specific opening of the file
+ *
+ * Release any resources that were created during the opening of the file
+ *
+ * Note that resources may not be released immediately, that might only happen
+ * later when other users of the kutf_context release their refcount.
+ *
+ * Return: 0 on success
+ */
+static int kutf_debugfs_run_release(struct inode *inode, struct file *file)
+{
+	struct kutf_context *test_context = file->private_data;
+
+	kutf_helper_input_enqueue_end_of_data(test_context);
+
+	kutf_context_put(test_context);
+	return 0;
+}
+
+static const struct file_operations kutf_debugfs_run_ops = {
+	.owner = THIS_MODULE,
+	.open = kutf_debugfs_run_open,
+	.read = kutf_debugfs_run_read,
+	.write = kutf_debugfs_run_write,
+	.release = kutf_debugfs_run_release,
+	.llseek  = default_llseek,
+};
+
+/**
+ * create_fixture_variant() - Creates a fixture variant for the specified
+ *                            test function and index and the debugfs entries
+ *                            that represent it.
+ * @test_func:		Test function
+ * @fixture_index:	Fixture index
+ *
+ * Return: 0 on success, negative value corresponding to error code in failure
+ */
+static int create_fixture_variant(struct kutf_test_function *test_func,
+		unsigned int fixture_index)
+{
+	struct kutf_test_fixture *test_fix;
+	char name[11];	/* Enough to print the MAX_UINT32 + the null terminator */
+	struct dentry *tmp;
+	int err;
+
+	test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL);
+	if (!test_fix) {
+		pr_err("Failed to create debugfs directory when adding fixture\n");
+		err = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	test_fix->test_func = test_func;
+	test_fix->fixture_index = fixture_index;
+
+	snprintf(name, sizeof(name), "%d", fixture_index);
+	test_fix->dir = debugfs_create_dir(name, test_func->dir);
+	if (!test_func->dir) {
+		pr_err("Failed to create debugfs directory when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_dir;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_file;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	tmp = debugfs_create_file_unsafe(
+#else
+	tmp = debugfs_create_file(
+#endif
+			"run", 0600, test_fix->dir,
+			test_fix,
+			&kutf_debugfs_run_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"run\" when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_file;
+	}
+
+	list_add(&test_fix->node, &test_func->variant_list);
+	return 0;
+
+fail_file:
+	debugfs_remove_recursive(test_fix->dir);
+fail_dir:
+	kfree(test_fix);
+fail_alloc:
+	return err;
+}
+
+/**
+ * kutf_remove_test_variant() - Destroy a previously created fixture variant.
+ * @test_fix:	Test fixture
+ */
+static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix)
+{
+	debugfs_remove_recursive(test_fix->dir);
+	kfree(test_fix);
+}
+
+void kutf_add_test_with_filters_and_data(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data test_data)
+{
+	struct kutf_test_function *test_func;
+	struct dentry *tmp;
+	unsigned int i;
+
+	test_func = kmalloc(sizeof(*test_func), GFP_KERNEL);
+	if (!test_func) {
+		pr_err("Failed to allocate memory when adding test %s\n", name);
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&test_func->variant_list);
+
+	test_func->dir = debugfs_create_dir(name, suite->dir);
+	if (!test_func->dir) {
+		pr_err("Failed to create debugfs directory when adding test %s\n", name);
+		goto fail_dir;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	test_func->filters = filters;
+	tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir,
+				 &test_func->filters);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	test_func->test_id = id;
+	tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir,
+				 &test_func->test_id);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	for (i = 0; i < suite->fixture_variants; i++) {
+		if (create_fixture_variant(test_func, i)) {
+			pr_err("Failed to create fixture %d when adding test %s\n", i, name);
+			goto fail_file;
+		}
+	}
+
+	test_func->suite = suite;
+	test_func->execute = execute;
+	test_func->test_data = test_data;
+
+	list_add(&test_func->node, &suite->test_list);
+	return;
+
+fail_file:
+	debugfs_remove_recursive(test_func->dir);
+fail_dir:
+	kfree(test_func);
+fail_alloc:
+	return;
+}
+EXPORT_SYMBOL(kutf_add_test_with_filters_and_data);
+
+void kutf_add_test_with_filters(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters)
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+
+	kutf_add_test_with_filters_and_data(suite,
+					    id,
+					    name,
+					    execute,
+					    suite->suite_default_flags,
+					    data);
+}
+EXPORT_SYMBOL(kutf_add_test_with_filters);
+
+void kutf_add_test(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context))
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+
+	kutf_add_test_with_filters_and_data(suite,
+					    id,
+					    name,
+					    execute,
+					    suite->suite_default_flags,
+					    data);
+}
+EXPORT_SYMBOL(kutf_add_test);
+
+/**
+ * kutf_remove_test(): Remove a previously added test function.
+ * @test_func: Test function
+ */
+static void kutf_remove_test(struct kutf_test_function *test_func)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &test_func->variant_list) {
+		struct kutf_test_fixture *test_fix;
+
+		test_fix = list_entry(pos, struct kutf_test_fixture, node);
+		kutf_remove_test_variant(test_fix);
+	}
+
+	list_del(&test_func->node);
+	debugfs_remove_recursive(test_func->dir);
+	kfree(test_func);
+}
+
+struct kutf_suite *kutf_create_suite_with_filters_and_data(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data suite_data)
+{
+	struct kutf_suite *suite;
+	struct dentry *tmp;
+
+	suite = kmalloc(sizeof(*suite), GFP_KERNEL);
+	if (!suite) {
+		pr_err("Failed to allocate memory when creating suite %s\n", name);
+		goto fail_kmalloc;
+	}
+
+	suite->dir = debugfs_create_dir(name, app->dir);
+	if (!suite->dir) {
+		pr_err("Failed to create debugfs directory when adding test %s\n", name);
+		goto fail_debugfs;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	INIT_LIST_HEAD(&suite->test_list);
+	suite->app = app;
+	suite->name = name;
+	suite->fixture_variants = fixture_count;
+	suite->create_fixture = create_fixture;
+	suite->remove_fixture = remove_fixture;
+	suite->suite_default_flags = filters;
+	suite->suite_data = suite_data;
+
+	list_add(&suite->node, &app->suite_list);
+
+	return suite;
+
+fail_file:
+	debugfs_remove_recursive(suite->dir);
+fail_debugfs:
+	kfree(suite);
+fail_kmalloc:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data);
+
+struct kutf_suite *kutf_create_suite_with_filters(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters)
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+	return kutf_create_suite_with_filters_and_data(app,
+						       name,
+						       fixture_count,
+						       create_fixture,
+						       remove_fixture,
+						       filters,
+						       data);
+}
+EXPORT_SYMBOL(kutf_create_suite_with_filters);
+
+struct kutf_suite *kutf_create_suite(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context))
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+	return kutf_create_suite_with_filters_and_data(app,
+						       name,
+						       fixture_count,
+						       create_fixture,
+						       remove_fixture,
+						       KUTF_F_TEST_GENERIC,
+						       data);
+}
+EXPORT_SYMBOL(kutf_create_suite);
+
+/**
+ * kutf_destroy_suite() - Destroy a previously added test suite.
+ * @suite:	Test suite
+ */
+static void kutf_destroy_suite(struct kutf_suite *suite)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &suite->test_list) {
+		struct kutf_test_function *test_func;
+
+		test_func = list_entry(pos, struct kutf_test_function, node);
+		kutf_remove_test(test_func);
+	}
+
+	list_del(&suite->node);
+	debugfs_remove_recursive(suite->dir);
+	kfree(suite);
+}
+
+struct kutf_application *kutf_create_application(const char *name)
+{
+	struct kutf_application *app;
+	struct dentry *tmp;
+
+	app = kmalloc(sizeof(*app), GFP_KERNEL);
+	if (!app) {
+		pr_err("Failed to create allocate memory when creating application %s\n", name);
+		goto fail_kmalloc;
+	}
+
+	app->dir = debugfs_create_dir(name, base_dir);
+	if (!app->dir) {
+		pr_err("Failed to create debugfs direcotry when creating application %s\n", name);
+		goto fail_debugfs;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name);
+		goto fail_file;
+	}
+
+	INIT_LIST_HEAD(&app->suite_list);
+	app->name = name;
+
+	return app;
+
+fail_file:
+	debugfs_remove_recursive(app->dir);
+fail_debugfs:
+	kfree(app);
+fail_kmalloc:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_create_application);
+
+void kutf_destroy_application(struct kutf_application *app)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &app->suite_list) {
+		struct kutf_suite *suite;
+
+		suite = list_entry(pos, struct kutf_suite, node);
+		kutf_destroy_suite(suite);
+	}
+
+	debugfs_remove_recursive(app->dir);
+	kfree(app);
+}
+EXPORT_SYMBOL(kutf_destroy_application);
+
+static struct kutf_context *kutf_create_context(
+		struct kutf_test_fixture *test_fix)
+{
+	struct kutf_context *new_context;
+
+	new_context = kmalloc(sizeof(*new_context), GFP_KERNEL);
+	if (!new_context) {
+		pr_err("Failed to allocate test context");
+		goto fail_alloc;
+	}
+
+	new_context->result_set = kutf_create_result_set();
+	if (!new_context->result_set) {
+		pr_err("Failed to create result set");
+		goto fail_result_set;
+	}
+
+	new_context->test_fix = test_fix;
+	/* Save the pointer to the suite as the callbacks will require it */
+	new_context->suite = test_fix->test_func->suite;
+	new_context->status = KUTF_RESULT_UNKNOWN;
+	new_context->expected_status = KUTF_RESULT_UNKNOWN;
+
+	kutf_mempool_init(&new_context->fixture_pool);
+	new_context->fixture = NULL;
+	new_context->fixture_index = test_fix->fixture_index;
+	new_context->fixture_name = NULL;
+	new_context->test_data = test_fix->test_func->test_data;
+
+	new_context->userdata.flags = 0;
+	INIT_LIST_HEAD(&new_context->userdata.input_head);
+	init_waitqueue_head(&new_context->userdata.input_waitq);
+
+	INIT_WORK(&new_context->work, kutf_run_test);
+
+	kref_init(&new_context->kref);
+
+	return new_context;
+
+fail_result_set:
+	kfree(new_context);
+fail_alloc:
+	return NULL;
+}
+
+static void kutf_destroy_context(struct kref *kref)
+{
+	struct kutf_context *context;
+
+	context = container_of(kref, struct kutf_context, kref);
+	kutf_destroy_result_set(context->result_set);
+	kutf_mempool_destroy(&context->fixture_pool);
+	kfree(context);
+}
+
+static void kutf_context_get(struct kutf_context *context)
+{
+	kref_get(&context->kref);
+}
+
+static void kutf_context_put(struct kutf_context *context)
+{
+	kref_put(&context->kref, kutf_destroy_context);
+}
+
+
+static void kutf_set_result(struct kutf_context *context,
+		enum kutf_result_status status)
+{
+	context->status = status;
+}
+
+static void kutf_set_expected_result(struct kutf_context *context,
+		enum kutf_result_status expected_status)
+{
+	context->expected_status = expected_status;
+}
+
+/**
+ * kutf_test_log_result() - Log a result for the specified test context
+ * @context:	Test context
+ * @message:	Result string
+ * @new_status:	Result status
+ */
+static void kutf_test_log_result(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status)
+{
+	if (context->status < new_status)
+		context->status = new_status;
+
+	if (context->expected_status != new_status)
+		kutf_add_result(context, new_status, message);
+}
+
+void kutf_test_log_result_external(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status)
+{
+	kutf_test_log_result(context, message, new_status);
+}
+EXPORT_SYMBOL(kutf_test_log_result_external);
+
+void kutf_test_expect_abort(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_ABORT);
+}
+EXPORT_SYMBOL(kutf_test_expect_abort);
+
+void kutf_test_expect_fatal(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_FATAL);
+}
+EXPORT_SYMBOL(kutf_test_expect_fatal);
+
+void kutf_test_expect_fail(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_FAIL);
+}
+EXPORT_SYMBOL(kutf_test_expect_fail);
+
+void kutf_test_expect_warn(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_WARN);
+}
+EXPORT_SYMBOL(kutf_test_expect_warn);
+
+void kutf_test_expect_pass(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_PASS);
+}
+EXPORT_SYMBOL(kutf_test_expect_pass);
+
+void kutf_test_skip(struct kutf_context *context)
+{
+	kutf_set_result(context, KUTF_RESULT_SKIP);
+	kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN);
+
+	kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP);
+}
+EXPORT_SYMBOL(kutf_test_skip);
+
+void kutf_test_skip_msg(struct kutf_context *context, const char *message)
+{
+	kutf_set_result(context, KUTF_RESULT_SKIP);
+	kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN);
+
+	kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool,
+			     "Test skipped: %s", message), KUTF_RESULT_SKIP);
+	kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP);
+}
+EXPORT_SYMBOL(kutf_test_skip_msg);
+
+void kutf_test_debug(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_DEBUG);
+}
+EXPORT_SYMBOL(kutf_test_debug);
+
+void kutf_test_pass(struct kutf_context *context, char const *message)
+{
+	static const char explicit_message[] = "(explicit pass)";
+
+	if (!message)
+		message = explicit_message;
+
+	kutf_test_log_result(context, message, KUTF_RESULT_PASS);
+}
+EXPORT_SYMBOL(kutf_test_pass);
+
+void kutf_test_info(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_INFO);
+}
+EXPORT_SYMBOL(kutf_test_info);
+
+void kutf_test_warn(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_WARN);
+}
+EXPORT_SYMBOL(kutf_test_warn);
+
+void kutf_test_fail(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_FAIL);
+}
+EXPORT_SYMBOL(kutf_test_fail);
+
+void kutf_test_fatal(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_FATAL);
+}
+EXPORT_SYMBOL(kutf_test_fatal);
+
+void kutf_test_abort(struct kutf_context *context)
+{
+	kutf_test_log_result(context, "", KUTF_RESULT_ABORT);
+}
+EXPORT_SYMBOL(kutf_test_abort);
+
+/**
+ * init_kutf_core() - Module entry point.
+ *
+ * Create the base entry point in debugfs.
+ */
+static int __init init_kutf_core(void)
+{
+	kutf_workq = alloc_workqueue("kutf workq", WQ_UNBOUND, 1);
+	if (!kutf_workq)
+		return -ENOMEM;
+
+	base_dir = debugfs_create_dir("kutf_tests", NULL);
+	if (!base_dir) {
+		destroy_workqueue(kutf_workq);
+		kutf_workq = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * exit_kutf_core() - Module exit point.
+ *
+ * Remove the base entry point in debugfs.
+ */
+static void __exit exit_kutf_core(void)
+{
+	debugfs_remove_recursive(base_dir);
+
+	if (kutf_workq)
+		destroy_workqueue(kutf_workq);
+}
+
+#else	/* defined(CONFIG_DEBUG_FS) */
+
+/**
+ * init_kutf_core() - Module entry point.
+ *
+ * Stub for when build against a kernel without debugfs support
+ */
+static int __init init_kutf_core(void)
+{
+	pr_debug("KUTF requires a kernel with debug fs support");
+
+	return -ENODEV;
+}
+
+/**
+ * exit_kutf_core() - Module exit point.
+ *
+ * Stub for when build against a kernel without debugfs support
+ */
+static void __exit exit_kutf_core(void)
+{
+}
+#endif	/* defined(CONFIG_DEBUG_FS) */
+
+MODULE_LICENSE("GPL");
+
+module_init(init_kutf_core);
+module_exit(exit_kutf_core);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c
new file mode 100644
index 0000000..7f5ac51
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF utility functions */
+
+#include <linux/mutex.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_mem.h>
+
+static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN];
+
+DEFINE_MUTEX(buffer_lock);
+
+const char *kutf_dsprintf(struct kutf_mempool *pool,
+		const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	int size;
+	void *buffer;
+
+	mutex_lock(&buffer_lock);
+	va_start(args, fmt);
+	len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args);
+	va_end(args);
+
+	if (len < 0) {
+		pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt);
+		goto fail_format;
+	}
+
+	if (len >= sizeof(tmp_buffer)) {
+		pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt);
+		size = sizeof(tmp_buffer);
+	} else {
+		size = len + 1;
+	}
+
+	buffer = kutf_mempool_alloc(pool, size);
+	if (!buffer)
+		goto fail_alloc;
+
+	memcpy(buffer, tmp_buffer, size);
+	mutex_unlock(&buffer_lock);
+
+	return buffer;
+
+fail_alloc:
+fail_format:
+	mutex_unlock(&buffer_lock);
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_dsprintf);
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/sconscript b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/sconscript
new file mode 100644
index 0000000..98f6446
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/kutf/sconscript
@@ -0,0 +1,27 @@
+#
+# (C) COPYRIGHT 2014-2016, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+Import('kutf_env')
+
+make_args = kutf_env.kernel_get_config_defines(ret_list = True)
+
+mod = kutf_env.BuildKernelModule('$STATIC_LIB_PATH/kutf.ko', Glob('*.c'), make_args = make_args)
+kutf_env.KernelObjTarget('kutf', mod)
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild
new file mode 100644
index 0000000..ca8c512
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android
+
+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o
+
+mali_kutf_irq_test-y := mali_kutf_irq_test_main.o
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig
new file mode 100644
index 0000000..4a3863a
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig
@@ -0,0 +1,29 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+config MALI_IRQ_LATENCY
+ tristate "Mali GPU IRQ latency measurement"
+ depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF
+ default m
+ help
+   This option will build a test module mali_kutf_irq_test that
+   can determine the latency of the Mali GPU IRQ on your system.
+   Choosing M here will generate a single module called mali_kutf_irq_test.
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
new file mode 100755
index 0000000..40df117
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
@@ -0,0 +1,48 @@
+#
+# (C) COPYRIGHT 2015, 2017-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+TEST_CCFLAGS := \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	$(SCONS_CFLAGS) \
+	-I$(CURDIR)/../include \
+	-I$(CURDIR)/../../../../../../include \
+	-I$(CURDIR)/../../../ \
+	-I$(CURDIR)/../../ \
+	-I$(CURDIR)/../../backend/gpu \
+	-I$(CURDIR)/ \
+	-I$(srctree)/drivers/staging/android \
+	-I$(srctree)/include/linux
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS="$(TEST_CCFLAGS)" KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../kutf/Module.symvers $(CURDIR)/../../Module.symvers" modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
new file mode 100755
index 0000000..e1f77b0
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
@@ -0,0 +1,27 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "mali_kutf_irq_test",
+    defaults: ["kernel_test_module_defaults"],
+    srcs: [
+        "Kbuild",
+        "mali_kutf_irq_test_main.c",
+    ],
+    extra_symbols: ["mali_kbase"],
+    install_group: "IG_tests",
+    enabled: false,
+    unit_test_kernel_modules: {
+        enabled: true,
+        kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"],
+    },
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
new file mode 100644
index 0000000..5013a9d
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -0,0 +1,274 @@
+/*
+ *
+ * (C) COPYRIGHT 2016, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include "mali_kbase.h"
+#include <midgard/backend/gpu/mali_kbase_device_internal.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_utils.h>
+
+/*
+ * This file contains the code which is used for measuring interrupt latency
+ * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is
+ * used with this purpose and it is called within KUTF framework - a kernel
+ * unit test framework. The measured latency provided by this test should
+ * be representative for the latency of the Mali JOB/MMU IRQs as well.
+ */
+
+/* KUTF test application pointer for this test */
+struct kutf_application *irq_app;
+
+/**
+ * struct kutf_irq_fixture data - test fixture used by the test functions.
+ * @kbdev:	kbase device for the GPU.
+ *
+ */
+struct kutf_irq_fixture_data {
+	struct kbase_device *kbdev;
+};
+
+#define SEC_TO_NANO(s)	      ((s)*1000000000LL)
+
+/* ID for the GPU IRQ */
+#define GPU_IRQ_HANDLER 2
+
+#define NR_TEST_IRQS 1000000
+
+/* IRQ for the test to trigger. Currently MULTIPLE_GPU_FAULTS as we would not
+ * expect to see this in normal use (e.g., when Android is running). */
+#define TEST_IRQ MULTIPLE_GPU_FAULTS
+
+#define IRQ_TIMEOUT HZ
+
+/* Kernel API for setting irq throttle hook callback and irq time in us*/
+extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+		irq_handler_t custom_handler,
+		int irq_type);
+extern irqreturn_t kbase_gpu_irq_handler(int irq, void *data);
+
+static DECLARE_WAIT_QUEUE_HEAD(wait);
+static bool triggered;
+static u64 irq_time;
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+/**
+ * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler
+ * @irq:  IRQ number
+ * @data: Data associated with this IRQ
+ *
+ * Return: state of the IRQ
+ */
+static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data)
+{
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+	if (val & TEST_IRQ) {
+		struct timespec tval;
+
+		getnstimeofday(&tval);
+		irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val,
+				NULL);
+
+		triggered = true;
+		wake_up(&wait);
+
+		return IRQ_HANDLED;
+	}
+
+	/* Trigger main irq handler */
+	return kbase_gpu_irq_handler(irq, data);
+}
+
+/**
+ * mali_kutf_irq_default_create_fixture() - Creates the fixture data required
+ *                                          for all the tests in the irq suite.
+ * @context:             KUTF context.
+ *
+ * Return: Fixture data created on success or NULL on failure
+ */
+static void *mali_kutf_irq_default_create_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data;
+
+	data = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(struct kutf_irq_fixture_data));
+
+	if (!data)
+		goto fail;
+
+	/* Acquire the kbase device */
+	data->kbdev = kbase_find_device(-1);
+	if (data->kbdev == NULL) {
+		kutf_test_fail(context, "Failed to find kbase device");
+		goto fail;
+	}
+
+	return data;
+
+fail:
+	return NULL;
+}
+
+/**
+ * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously
+ *                          created by mali_kutf_irq_default_create_fixture.
+ *
+ * @context:             KUTF context.
+ */
+static void mali_kutf_irq_default_remove_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+
+	kbase_release_device(kbdev);
+}
+
+/**
+ * mali_kutf_irq_latency() - measure GPU IRQ latency
+ * @context:		kutf context within which to perform the test
+ *
+ * The test triggers IRQs manually, and measures the
+ * time between triggering the IRQ and the IRQ handler being executed.
+ *
+ * This is not a traditional test, in that the pass/fail status has little
+ * meaning (other than indicating that the IRQ handler executed at all). Instead
+ * the results are in the latencies provided with the test result. There is no
+ * meaningful pass/fail result that can be obtained here, instead the latencies
+ * are provided for manual analysis only.
+ */
+static void mali_kutf_irq_latency(struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+	u64 min_time = U64_MAX, max_time = 0, average_time = 0;
+	int i;
+	bool test_failed = false;
+
+	/* Force GPU to be powered */
+	kbase_pm_context_active(kbdev);
+
+	kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler,
+			GPU_IRQ_HANDLER);
+
+	for (i = 0; i < NR_TEST_IRQS; i++) {
+		struct timespec tval;
+		u64 start_time;
+		int ret;
+
+		triggered = false;
+		getnstimeofday(&tval);
+		start_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
+
+		/* Trigger fake IRQ */
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+				TEST_IRQ, NULL);
+
+		ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT);
+
+		if (ret == 0) {
+			kutf_test_fail(context, "Timed out waiting for IRQ\n");
+			test_failed = true;
+			break;
+		}
+
+		if ((irq_time - start_time) < min_time)
+			min_time = irq_time - start_time;
+		if ((irq_time - start_time) > max_time)
+			max_time = irq_time - start_time;
+		average_time += irq_time - start_time;
+
+		udelay(10);
+	}
+
+	/* Go back to default handler */
+	kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER);
+
+	kbase_pm_context_idle(kbdev);
+
+	if (!test_failed) {
+		const char *results;
+
+		do_div(average_time, NR_TEST_IRQS);
+		results = kutf_dsprintf(&context->fixture_pool,
+				"Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n",
+				min_time, max_time, average_time);
+		kutf_test_pass(context, results);
+	}
+}
+
+/**
+ * Module entry point for this test.
+ */
+int mali_kutf_irq_test_main_init(void)
+{
+	struct kutf_suite *suite;
+
+	irq_app = kutf_create_application("irq");
+
+	if (NULL == irq_app) {
+		pr_warn("Creation of test application failed!\n");
+		return -ENOMEM;
+	}
+
+	suite = kutf_create_suite(irq_app, "irq_default",
+			1, mali_kutf_irq_default_create_fixture,
+			mali_kutf_irq_default_remove_fixture);
+
+	if (NULL == suite) {
+		pr_warn("Creation of test suite failed!\n");
+		kutf_destroy_application(irq_app);
+		return -ENOMEM;
+	}
+
+	kutf_add_test(suite, 0x0, "irq_latency",
+			mali_kutf_irq_latency);
+	return 0;
+}
+
+/**
+ * Module exit point for this test.
+ */
+void mali_kutf_irq_test_main_exit(void)
+{
+	kutf_destroy_application(irq_app);
+}
+
+module_init(mali_kutf_irq_test_main_init);
+module_exit(mali_kutf_irq_test_main_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
new file mode 100755
index 0000000..0ec5ce7
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
@@ -0,0 +1,36 @@
+#
+# (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+import os
+Import('env')
+
+src = [Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/*.c'), Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile')]
+
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[CLEAN] mali_kutf_irq_test'))
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [])
+	env.KernelObjTarget('mali_kutf_irq_test', cmd)
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_UNIT_TEST=${unit} MALI_CUSTOMER_RELEASE=${release} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction])
+	env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko')
+	env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko')
+	env.KernelObjTarget('mali_kutf_irq_test', cmd)
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/sconscript b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/sconscript
new file mode 100644
index 0000000..0bd24a5
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/tests/sconscript
@@ -0,0 +1,44 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+Import ('env')
+
+kutf_env = env.Clone()
+kutf_env.Append(CPPPATH = '#kernel/drivers/gpu/arm/midgard/tests/include')
+Export('kutf_env')
+
+if Glob('internal/sconscript'):
+	SConscript('internal/sconscript')
+
+if kutf_env['debug'] == '1':
+	SConscript('kutf/sconscript')
+	SConscript('mali_kutf_irq_test/sconscript')
+
+	if Glob('kutf_test/sconscript'):
+		SConscript('kutf_test/sconscript')
+
+	if Glob('kutf_test_runner/sconscript'):
+		SConscript('kutf_test_runner/sconscript')
+
+if env['unit'] == '1':
+	SConscript('mali_kutf_ipa_test/sconscript')
+	SConscript('mali_kutf_ipa_unit_test/sconscript')
+	SConscript('mali_kutf_vinstr_test/sconscript')
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
new file mode 100755
index 0000000..6857eb7
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
@@ -0,0 +1,322 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ *//*
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "linux/mman.h"
+#include "../mali_kbase.h"
+
+/* mali_kbase_mmap.c
+ *
+ * This file contains Linux specific implementation of
+ * kbase_get_unmapped_area() interface.
+ */
+
+
+/**
+ * align_and_check() - Align the specified pointer to the provided alignment and
+ *                     check that it is still in range.
+ * @gap_end:        Highest possible start address for allocation (end of gap in
+ *                  address space)
+ * @gap_start:      Start address of current memory area / gap in address space
+ * @info:           vm_unmapped_area_info structure passed to caller, containing
+ *                  alignment, length and limits for the allocation
+ * @is_shader_code: True if the allocation is for shader code (which has
+ *                  additional alignment requirements)
+ *
+ * Return: true if gap_end is now aligned correctly and is still in range,
+ *         false otherwise
+ */
+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
+		struct vm_unmapped_area_info *info, bool is_shader_code)
+{
+	/* Compute highest gap address at the desired alignment */
+	(*gap_end) -= info->length;
+	(*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
+
+	if (is_shader_code) {
+		/* Check for 4GB boundary */
+		if (0 == (*gap_end & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+		if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+
+		if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
+				info->length) & BASE_MEM_MASK_4GB))
+			return false;
+	}
+
+
+	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
+		return false;
+
+
+	return true;
+}
+
+/**
+ * kbase_unmapped_area_topdown() - allocates new areas top-down from
+ *                                 below the stack limit.
+ * @info:              Information about the memory area to allocate.
+ * @is_shader_code:    Boolean which denotes whether the allocated area is
+ *                      intended for the use by shader core in which case a
+ *                      special alignment requirements apply.
+ *
+ * The unmapped_area_topdown() function in the Linux kernel is not exported
+ * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a
+ * module and also make use of the fact that some of the requirements for
+ * the unmapped area are known in advance, we implemented an extended version
+ * of this function and prefixed it with 'kbase_'.
+ *
+ * The difference in the call parameter list comes from the fact that
+ * kbase_unmapped_area_topdown() is called with additional parameter which
+ * is provided to denote whether the allocation is for a shader core memory
+ * or not. This is significant since the executable shader core memory has
+ * additional alignment requirements.
+ *
+ * The modification of the original Linux function lies in how the computation
+ * of the highest gap address at the desired alignment is performed once the
+ * gap with desirable properties is found. For this purpose a special function
+ * is introduced (@ref align_and_check()) which beside computing the gap end
+ * at the desired alignment also performs additional alignment check for the
+ * case when the memory is executable shader core memory. For such case, it is
+ * ensured that the gap does not end on a 4GB boundary.
+ *
+ * Return: address of the found gap end (high limit) if area is found;
+ *         -ENOMEM if search is unsuccessful
+*/
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info, bool is_shader_code)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit) {
+		if (align_and_check(&gap_end, gap_start, info, is_shader_code))
+			return gap_end;
+	}
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length) {
+			/* We found a suitable gap. Clip it with the original
+			 * high_limit. */
+			if (gap_end > info->high_limit)
+				gap_end = info->high_limit;
+
+			if (align_and_check(&gap_end, gap_start, info,
+					is_shader_code))
+				return gap_end;
+		}
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+	return -ENOMEM;
+}
+
+
+/* This function is based on Linux kernel's arch_get_unmapped_area, but
+ * simplified slightly. Modifications come from the fact that some values
+ * about the memory area are known in advance.
+ */
+unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+	unsigned long align_offset = 0;
+	unsigned long align_mask = 0;
+	unsigned long high_limit = mm->mmap_base;
+	unsigned long low_limit = PAGE_SIZE;
+	int cpu_va_bits = BITS_PER_LONG;
+	int gpu_pc_bits =
+	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	bool is_shader_code = false;
+	unsigned long ret;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+#ifdef CONFIG_64BIT
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+
+		high_limit = min_t(unsigned long, mm->mmap_base,
+				(kctx->same_va_end << PAGE_SHIFT));
+
+		/* If there's enough (> 33 bits) of GPU VA space, align
+		 * to 2MB boundaries.
+		 */
+		if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
+			if (len >= SZ_2M) {
+				align_offset = SZ_2M;
+				align_mask = SZ_2M - 1;
+			}
+		}
+
+		low_limit = SZ_2M;
+	} else {
+		cpu_va_bits = 32;
+	}
+#endif /* CONFIG_64BIT */
+	if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
+		(PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
+			int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+			struct kbase_va_region *reg =
+					kctx->pending_regions[cookie];
+
+			if (!reg)
+				return -EINVAL;
+
+			if (!(reg->flags & KBASE_REG_GPU_NX)) {
+				if (cpu_va_bits > gpu_pc_bits) {
+					align_offset = 1ULL << gpu_pc_bits;
+					align_mask = align_offset - 1;
+					is_shader_code = true;
+				}
+			} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
+				unsigned long extent_bytes =
+				     (unsigned long)(reg->extent << PAGE_SHIFT);
+				/* kbase_check_alloc_sizes() already satisfies
+				 * these checks, but they're here to avoid
+				 * maintenance hazards due to the assumptions
+				 * involved */
+				WARN_ON(reg->extent > (ULONG_MAX >> PAGE_SHIFT));
+				WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT));
+				WARN_ON(!is_power_of_2(extent_bytes));
+				align_mask = extent_bytes - 1;
+				align_offset =
+				      extent_bytes - (reg->initial_commit << PAGE_SHIFT);
+			}
+#ifndef CONFIG_64BIT
+	} else {
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+						      flags);
+#endif
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = low_limit;
+	info.high_limit = high_limit;
+	info.align_offset = align_offset;
+	info.align_mask = align_mask;
+
+	ret = kbase_unmapped_area_topdown(&info, is_shader_code);
+
+	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
+			high_limit < (kctx->same_va_end << PAGE_SHIFT)) {
+		/* Retry above mmap_base */
+		info.low_limit = mm->mmap_base;
+		info.high_limit = min_t(u64, TASK_SIZE,
+					(kctx->same_va_end << PAGE_SHIFT));
+
+		ret = kbase_unmapped_area_topdown(&info, is_shader_code);
+	}
+
+	return ret;
+}
diff --git a/bifrost/r12p0/kernel/drivers/gpu/arm/sconscript b/bifrost/r12p0/kernel/drivers/gpu/arm/sconscript
new file mode 100644
index 0000000..dd02acd
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/arm/sconscript
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+import glob
+
+
+SConscript('midgard/sconscript')
+
diff --git a/bifrost/r12p0/kernel/drivers/gpu/sconscript b/bifrost/r12p0/kernel/drivers/gpu/sconscript
new file mode 100644
index 0000000..1167edd
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/gpu/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2013,2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+SConscript('arm/sconscript')
+
diff --git a/bifrost/r12p0/kernel/drivers/sconscript b/bifrost/r12p0/kernel/drivers/sconscript
new file mode 100644
index 0000000..3c9b7c1
--- /dev/null
+++ b/bifrost/r12p0/kernel/drivers/sconscript
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+if Glob('base/sconscript'):
+	SConscript('base/sconscript')
+
+if Glob('video/sconscript'):
+	SConscript('video/sconscript')
+
+SConscript('gpu/sconscript')
diff --git a/bifrost/r12p0/kernel/include/linux/dma-buf-test-exporter.h b/bifrost/r12p0/kernel/include/linux/dma-buf-test-exporter.h
new file mode 100644
index 0000000..95bc6f8
--- /dev/null
+++ b/bifrost/r12p0/kernel/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/bifrost/r12p0/kernel/include/linux/protected_mode_switcher.h b/bifrost/r12p0/kernel/include/linux/protected_mode_switcher.h
new file mode 100644
index 0000000..8778d81
--- /dev/null
+++ b/bifrost/r12p0/kernel/include/linux/protected_mode_switcher.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _PROTECTED_MODE_SWITCH_H_
+#define _PROTECTED_MODE_SWITCH_H_
+
+struct protected_mode_device;
+
+/**
+ * struct protected_mode_ops - Callbacks for protected mode switch operations
+ *
+ * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_disable: Callback to disable protected mode for device
+ */
+struct protected_mode_ops {
+	/**
+	 * protected_mode_enable() - Enable protected mode on device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enable)(
+			struct protected_mode_device *protected_dev);
+
+	/**
+	 * protected_mode_disable() - Disable protected mode on device, and
+	 *                            reset device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_disable)(
+			struct protected_mode_device *protected_dev);
+};
+
+/**
+ * struct protected_mode_device - Device structure for protected mode devices
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_mode_device {
+	struct protected_mode_ops ops;
+	void *data;
+};
+
+#endif /* _PROTECTED_MODE_SWITCH_H_ */
diff --git a/bifrost/r12p0/kernel/include/linux/ump-common.h b/bifrost/r12p0/kernel/include/linux/ump-common.h
new file mode 100644
index 0000000..e9f2cbb
--- /dev/null
+++ b/bifrost/r12p0/kernel/include/linux/ump-common.h
@@ -0,0 +1,257 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/bifrost/r12p0/kernel/include/linux/ump-import.h b/bifrost/r12p0/kernel/include/linux/ump-import.h
new file mode 100644
index 0000000..3a0b501
--- /dev/null
+++ b/bifrost/r12p0/kernel/include/linux/ump-import.h
@@ -0,0 +1,104 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/bifrost/r12p0/kernel/include/linux/ump-ioctl.h b/bifrost/r12p0/kernel/include/linux/ump-ioctl.h
new file mode 100644
index 0000000..0428bb0
--- /dev/null
+++ b/bifrost/r12p0/kernel/include/linux/ump-ioctl.h
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H_
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/bifrost/r12p0/kernel/include/linux/ump.h b/bifrost/r12p0/kernel/include/linux/ump.h
new file mode 100644
index 0000000..00e9957
--- /dev/null
+++ b/bifrost/r12p0/kernel/include/linux/ump.h
@@ -0,0 +1,469 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a true to indicate that access to the handle is allowed or @n
+ * @a false to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a true to permit access
+ * @li @a false to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
diff --git a/bifrost/r12p0/kernel/license.txt b/bifrost/r12p0/kernel/license.txt
new file mode 100644
index 0000000..77c14bd
--- /dev/null
+++ b/bifrost/r12p0/kernel/license.txt
@@ -0,0 +1,198 @@
+GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE
+
+THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE.
+
+
+
+Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form.
+
+
+
+GPL Licence
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+Version 2, June 1991
+
+
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+
+
+Preamble
+
+
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
+
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program).
+
+Whether that is true depends on what the Program does.
+
+
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty;
+
+and give any other recipients of the Program a copy of this License along with the Program.
+
+
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the
+
+Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein.
+
+You are not responsible for enforcing compliance by third parties to this License.
+
+
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+
+
+NO WARRANTY
+
+
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+/end
+
diff --git a/bifrost/r12p0/kernel/sconscript b/bifrost/r12p0/kernel/sconscript
new file mode 100644
index 0000000..79a730a
--- /dev/null
+++ b/bifrost/r12p0/kernel/sconscript
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+Import('env')
+
+if env['backend'] == 'kernel' and int(env['kernel_modules']) == 1:
+	SConscript('drivers/sconscript')
diff --git a/bifrost/r19p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/bifrost/r19p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100755
index 0000000..2ce2755
--- /dev/null
+++ b/bifrost/r19p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,207 @@
+#
+# (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : One or more pairs of phandle to clock and clock specifier
+           for the Mali device. The order is important: the first clock
+           shall correspond to the "clk_mali" source, while the second clock
+           (that is optional) shall correspond to the "shadercores" source.
+- clock-names : Shall be set to: "clk_mali", "shadercores".
+- mali-supply : Phandle to the top level regulator for the Mali device.
+                Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- shadercores-supply : Phandle to shader cores regulator for the Mali device.
+                       This is optional.
+- operating-points-v2 : Refer to Documentation/devicetree/bindings/power/mali-opp.txt
+for details.
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets the power model parameters. Defined power models include:
+	  "mali-simple-power-model", "mali-g51-power-model", "mali-g52-power-model",
+	  "mali-g52_r1-power-model", "mali-g71-power-model", "mali-g72-power-model",
+	  "mali-g76-power-model" and "mali-g77-power-model".
+	- mali-simple-power-model: this model derives the GPU power usage based
+	  on the GPU voltage scaled by the system temperature. Note: it was
+	  designed for the Juno platform, and may not be suitable for others.
+		- compatible: Should be "arm,mali-simple-power-model"
+		- dynamic-coefficient: Coefficient, in pW/(Hz V^2), which is
+		  multiplied by v^2*f to calculate the dynamic power consumption.
+		- static-coefficient: Coefficient, in uW/V^3, which is
+		  multiplied by v^3 to calculate the static power consumption.
+		- ts: An array containing coefficients for the temperature
+		  scaling factor. This is used to scale the static power by a
+		  factor of tsf/1000000,
+		  where tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0],
+		  and T = temperature in degrees.
+		- thermal-zone: A string identifying the thermal zone used for
+		  the GPU
+		- temp-poll-interval-ms: the interval at which the system
+		  temperature is polled
+	- mali-g*-power-model(s): unless being stated otherwise, these models derive
+	  the GPU power usage based on performance counters, so they are more
+	  accurate.
+		- compatible: Should be, as examples, "arm,mali-g51-power-model" /
+		  "arm,mali-g72-power-model".
+		- scale: the dynamic power calculated by the power model is
+		  multiplied by a factor of 'scale'. This value should be
+		  chosen to match a particular implementation.
+		- min_sample_cycles: Fall back to the simple power model if the
+		  number of GPU cycles for a given counter dump is less than
+		  'min_sample_cycles'. The default value of this should suffice.
+	* Note: when IPA is used, two separate power models (simple and counter-based)
+	  are used at different points so care should be taken to configure
+	  both power models in the device tree (specifically dynamic-coefficient,
+	  static-coefficient and scale) to best match the platform.
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+- ipa-model : Sets the IPA model to be used for power management. GPU probe will fail if the
+	      model is not found in the registered models list. If no model is specified here,
+	      a gpu-id based model is picked if available, otherwise the default model is used.
+	- mali-simple-power-model: Default model used on mali
+- protected-mode-switcher : Phandle to device implemented protected mode switching functionality.
+Refer to Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt for one implementation.
+-  idvs-group-size : Override the IDVS group size value. Tasks are sent to
+		     cores in groups of N + 1, so i.e. 0xF means 16 tasks.
+		     Valid values are between 0 to 0x3F (including).
+-  l2-size : Override L2 cache size on GPU that supports it
+-  l2-hash : Override L2 hash function on GPU that supports it
+
+Example for a Mali GPU with 1 clock and no regulators:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points-v2 = <&gpu_opp_table>;
+	power_model@0 {
+		compatible = "arm,mali-simple-power-model";
+		static-coefficient = <2427750>;
+		dynamic-coefficient = <4687>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+	power_model@1 {
+		compatible = "arm,mali-g71-power-model";
+		scale = <5>;
+	};
+
+	idvs-group-size = <0x7>;
+	l2-size = /bits/ 8 <0x10>;
+	l2-hash = /bits/ 8 <0x04>;
+};
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2";
+
+	opp@533000000 {
+		opp-hz = /bits/ 64 <533000000>;
+		opp-microvolt = <1250000>;
+	};
+	opp@450000000 {
+		opp-hz = /bits/ 64 <450000000>;
+		opp-microvolt = <1150000>;
+	};
+	opp@400000000 {
+		opp-hz = /bits/ 64 <400000000>;
+		opp-microvolt = <1125000>;
+	};
+	opp@350000000 {
+		opp-hz = /bits/ 64 <350000000>;
+		opp-microvolt = <1075000>;
+	};
+	opp@266000000 {
+		opp-hz = /bits/ 64 <266000000>;
+		opp-microvolt = <1025000>;
+	};
+	opp@160000000 {
+		opp-hz = /bits/ 64 <160000000>;
+		opp-microvolt = <925000>;
+	};
+	opp@100000000 {
+		opp-hz = /bits/ 64 <100000000>;
+		opp-microvolt = <912500>;
+	};
+};
+
+Example for a Mali GPU with 2 clocks and 2 regulators:
+
+gpu: gpu@6e000000 {
+	compatible = "arm,mali-midgard";
+	reg = <0x0 0x6e000000 0x0 0x200000>;
+	interrupts = <0 168 4>, <0 168 4>, <0 168 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+	clocks = <&clk_mali 0>, <&clk_mali 1>;
+	clock-names = "clk_mali", "shadercores";
+	mali-supply = <&supply0_3v3>;
+	shadercores-supply = <&supply1_3v3>;
+	system-coherency = <31>;
+	operating-points-v2 = <&gpu_opp_table>;
+};
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2", "operating-points-v2-mali";
+
+	opp@0 {
+		opp-hz = /bits/ 64 <50000000>;
+		opp-hz-real = /bits/ 64 <50000000>, /bits/ 64 <45000000>;
+		opp-microvolt = <820000>, <800000>;
+		opp-core-mask = /bits/ 64 <0xf>;
+	};
+	opp@1 {
+		opp-hz = /bits/ 64 <40000000>;
+		opp-hz-real = /bits/ 64 <40000000>, /bits/ 64 <35000000>;
+		opp-microvolt = <720000>, <700000>;
+		opp-core-mask = /bits/ 64 <0x7>;
+	};
+	opp@2 {
+		opp-hz = /bits/ 64 <30000000>;
+		opp-hz-real = /bits/ 64 <30000000>, /bits/ 64 <25000000>;
+		opp-microvolt = <620000>, <700000>;
+		opp-core-mask = /bits/ 64 <0x3>;
+	};
+};
diff --git a/bifrost/r19p0/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt b/bifrost/r19p0/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt
new file mode 100644
index 0000000..fda8f00
--- /dev/null
+++ b/bifrost/r19p0/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+* Arm memory group manager for Mali GPU device drivers
+
+Required properties:
+
+- compatible: Must be "arm,physical-memory-group-manager"
+
+An example node:
+
+        gpu_physical_memory_group_manager: physical-memory-group-manager {
+                compatible = "arm,physical-memory-group-manager";
+        };
+
+It must be referenced by the GPU as well, see physical-memory-group-manager:
+
+	gpu: gpu@0x6e000000 {
+		compatible = "arm,mali-midgard";
+		reg = <0x0 0x6e000000 0x0 0x200000>;
+		interrupts = <0 168 4>, <0 168 4>, <0 168 4>;
+		interrupt-names = "JOB", "MMU", "GPU";
+		clocks = <&scpi_dvfs 2>;
+		clock-names = "clk_mali";
+		system-coherency = <31>;
+		physical-memory-group-manager = <&gpu_physical_memory_group_manager>;
+		operating-points = <
+			/* KHz uV */
+			50000 820000
+		>;
+	};
diff --git a/bifrost/r19p0/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt b/bifrost/r19p0/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt
new file mode 100755
index 0000000..280358e
--- /dev/null
+++ b/bifrost/r19p0/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+* ARM SMC protected mode switcher devices
+
+Required properties :
+
+- compatible : Must be "arm,smc-protected-mode-switcher"
+- arm,smc,protected_enable : SMC call ID to enable protected mode
+- arm,smc,protected_disable : SMC call ID to disable protected mode and reset
+			      device
+
+An example node :
+
+	gpu_switcher {
+		compatible = "arm,smc-protected-mode-switcher";
+		arm,smc,protected_enable = <0xff06>;
+		arm,smc,protected_disable = <0xff07>;
+	};
diff --git a/bifrost/r19p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt b/bifrost/r19p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt
new file mode 100755
index 0000000..7fa78dd
--- /dev/null
+++ b/bifrost/r19p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt
@@ -0,0 +1,196 @@
+#
+# (C) COPYRIGHT 2017, 2019 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+* ARM Mali Midgard OPP
+
+* OPP Table Node
+
+This describes the OPPs belonging to a device. This node can have following
+properties:
+
+Required properties:
+- compatible: Allow OPPs to express their compatibility. It should be:
+  "operating-points-v2", "operating-points-v2-mali".
+
+- OPP nodes: One or more OPP nodes describing voltage-current-frequency
+  combinations. Their name isn't significant but their phandle can be used to
+  reference an OPP.
+
+* OPP Node
+
+This defines voltage-current-frequency combinations along with other related
+properties.
+
+Required properties:
+- opp-hz: Nominal frequency in Hz, expressed as a 64-bit big-endian integer.
+  This should be treated as a relative performance measurement, taking both GPU
+  frequency and core mask into account.
+
+Optional properties:
+- opp-hz-real: List of one or two real frequencies in Hz, expressed as 64-bit
+  big-endian integers. They shall correspond to the clocks declared under
+  the Mali device node, and follow the same order.
+
+- opp-core-mask: Shader core mask. If neither this or opp-core-count are present
+  then all shader cores will be used for this OPP.
+
+- opp-core-count: Number of cores to use for this OPP. If this is present then
+  the driver will build a core mask using the available core mask provided by
+  the GPU hardware.
+
+  If neither this nor opp-core-mask are present then all shader cores will be
+  used for this OPP.
+
+  If both this and opp-core-mask are present then opp-core-mask is ignored.
+
+- opp-microvolt: List of one or two voltages in micro Volts. They shall correspond
+  to the regulators declared under the Mali device node, and follow the order:
+  "toplevel", "shadercores".
+
+  A single regulator's voltage is specified with an array of size one or three.
+  Single entry is for target voltage and three entries are for <target min max>
+  voltages.
+
+  Entries for multiple regulators must be present in the same order as
+  regulators are specified in device's DT node.
+
+- opp-microvolt-<name>: Named opp-microvolt property. This is exactly similar to
+  the above opp-microvolt property, but allows multiple voltage ranges to be
+  provided for the same OPP. At runtime, the platform can pick a <name> and
+  matching opp-microvolt-<name> property will be enabled for all OPPs. If the
+  platform doesn't pick a specific <name> or the <name> doesn't match with any
+  opp-microvolt-<name> properties, then opp-microvolt property shall be used, if
+  present.
+
+- opp-microamp: The maximum current drawn by the device in microamperes
+  considering system specific parameters (such as transients, process, aging,
+  maximum operating temperature range etc.) as necessary. This may be used to
+  set the most efficient regulator operating mode.
+
+  Should only be set if opp-microvolt is set for the OPP.
+
+  Entries for multiple regulators must be present in the same order as
+  regulators are specified in device's DT node. If this property isn't required
+  for few regulators, then this should be marked as zero for them. If it isn't
+  required for any regulator, then this property need not be present.
+
+- opp-microamp-<name>: Named opp-microamp property. Similar to
+  opp-microvolt-<name> property, but for microamp instead.
+
+- clock-latency-ns: Specifies the maximum possible transition latency (in
+  nanoseconds) for switching to this OPP from any other OPP.
+
+- turbo-mode: Marks the OPP to be used only for turbo modes. Turbo mode is
+  available on some platforms, where the device can run over its operating
+  frequency for a short duration of time limited by the device's power, current
+  and thermal limits.
+
+- opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in
+  the table should have this.
+
+- opp-supported-hw: This enables us to select only a subset of OPPs from the
+  larger OPP table, based on what version of the hardware we are running on. We
+  still can't have multiple nodes with the same opp-hz value in OPP table.
+
+  It's an user defined array containing a hierarchy of hardware version numbers,
+  supported by the OPP. For example: a platform with hierarchy of three levels
+  of versions (A, B and C), this field should be like <X Y Z>, where X
+  corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z
+  corresponds to version hierarchy C.
+
+  Each level of hierarchy is represented by a 32 bit value, and so there can be
+  only 32 different supported version per hierarchy. i.e. 1 bit per version. A
+  value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy
+  level. And a value of 0x00000000 will disable the OPP completely, and so we
+  never want that to happen.
+
+  If 32 values aren't sufficient for a version hierarchy, than that version
+  hierarchy can be contained in multiple 32 bit values. i.e. <X Y Z1 Z2> in the
+  above example, Z1 & Z2 refer to the version hierarchy Z.
+
+- status: Marks the node enabled/disabled.
+
+Example for a Juno with 1 clock and 1 regulator:
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2", "operating-points-v2-mali";
+
+	opp@112500000 {
+		opp-hz = /bits/ 64 <112500000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-mask = /bits/ 64 <0x1>;
+		opp-suspend;
+	};
+	opp@225000000 {
+		opp-hz = /bits/ 64 <225000000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-count = <2>;
+	};
+	opp@450000000 {
+		opp-hz = /bits/ 64 <450000000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-mask = /bits/ 64 <0xf>;
+	};
+	opp@487500000 {
+		opp-hz = /bits/ 64 <487500000>;
+		opp-microvolt = <825000>;
+	};
+	opp@525000000 {
+		opp-hz = /bits/ 64 <525000000>;
+		opp-microvolt = <850000>;
+	};
+	opp@562500000 {
+		opp-hz = /bits/ 64 <562500000>;
+		opp-microvolt = <875000>;
+	};
+	opp@600000000 {
+		opp-hz = /bits/ 64 <600000000>;
+		opp-microvolt = <900000>;
+	};
+};
+
+Example for a Juno with 2 clocks and 2 regulators:
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2", "operating-points-v2-mali";
+
+	opp@0 {
+		opp-hz = /bits/ 64 <50000000>;
+		opp-hz-real = /bits/ 64 <50000000>, /bits/ 64 <45000000>;
+		opp-microvolt = <820000>, <800000>;
+		opp-core-mask = /bits/ 64 <0xf>;
+	};
+	opp@1 {
+		opp-hz = /bits/ 64 <40000000>;
+		opp-hz-real = /bits/ 64 <40000000>, /bits/ 64 <35000000>;
+		opp-microvolt = <720000>, <700000>;
+		opp-core-mask = /bits/ 64 <0x7>;
+	};
+	opp@2 {
+		opp-hz = /bits/ 64 <30000000>;
+		opp-hz-real = /bits/ 64 <30000000>, /bits/ 64 <25000000>;
+		opp-microvolt = <620000>, <700000>;
+		opp-core-mask = /bits/ 64 <0x3>;
+	};
+};
\ No newline at end of file
diff --git a/bifrost/r19p0/kernel/Documentation/dma-buf-test-exporter.txt b/bifrost/r19p0/kernel/Documentation/dma-buf-test-exporter.txt
new file mode 100755
index 0000000..8d8cbc9
--- /dev/null
+++ b/bifrost/r19p0/kernel/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,46 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/bifrost/r19p0/kernel/Mconfig b/bifrost/r19p0/kernel/Mconfig
new file mode 100755
index 0000000..f6978c4
--- /dev/null
+++ b/bifrost/r19p0/kernel/Mconfig
@@ -0,0 +1,20 @@
+# copyright:
+# ----------------------------------------------------------------------------
+# This confidential and proprietary software may be used only as authorized
+# by a licensing agreement from ARM Limited.
+#      (C) COPYRIGHT 2017, 2019 ARM Limited, ALL RIGHTS RESERVED
+# The entire notice above must be reproduced on all authorized copies and
+# copies may only be made to the extent permitted by a licensing agreement
+# from ARM Limited.
+# ----------------------------------------------------------------------------
+
+source "kernel/drivers/gpu/arm/midgard/Mconfig"
+
+config SMC_PROTECTED_MODE_SWITCHER
+	bool
+	default n
+
+config DMA_BUF_SYNC_IOCTL_SUPPORTED
+	bool "Kernel DMA buffers support DMA_BUF_IOCTL_SYNC"
+	depends on BACKEND_KERNEL
+	default y
diff --git a/bifrost/r19p0/kernel/build.bp b/bifrost/r19p0/kernel/build.bp
new file mode 100755
index 0000000..b878ab5
--- /dev/null
+++ b/bifrost/r19p0/kernel/build.bp
@@ -0,0 +1,80 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2016-2019 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_install_group {
+    name: "IG_kernel_modules",
+    android: {
+        install_path: "{{.module_path}}",
+    },
+    linux: {
+        install_path: "{{.install_dir}}/{{.module_path}}",
+    },
+}
+
+bob_defaults {
+    name: "kernel_defaults",
+    enabled: false,
+    exclude_srcs: [
+        "**/*.mod.c",
+    ],
+    local_include_dirs: [
+        "include",
+    ],
+    build_kernel_modules: {
+        enabled: true,
+        kernel_dir: "{{.kernel_dir}}",
+        kernel_cross_compile: "{{.kernel_compiler}}",
+        kernel_cc: "{{.kernel_cc}}",
+        kernel_hostcc: "{{.kernel_hostcc}}",
+        kernel_clang_triple: "{{.kernel_clang_triple}}",
+    },
+    install_group: "IG_kernel_modules",
+    cflags: [
+        "-Wall",
+    ],
+    werror: {
+        cflags: ["-Werror"],
+    },
+    allow_android_tags: {
+        tags: [
+            "eng",
+            "optional",
+        ],
+    },
+}
+
+bob_defaults {
+    name: "kutf_includes",
+    local_include_dirs: [
+        "drivers/gpu/arm/midgard/tests/include",
+    ],
+}
+
+bob_defaults {
+    name: "kernel_test_includes",
+    defaults: ["kutf_includes"],
+    local_include_dirs: [
+        "drivers/gpu/arm",
+        "drivers/gpu/arm/midgard",
+        "drivers/gpu/arm/midgard/backend/gpu",
+    ],
+}
+
+bob_alias {
+    name: "kernel",
+    srcs: [
+        "dma-buf-test-exporter",
+        "smc_protected_mode_switcher",
+        "memory_group_manager",
+        "mali_kbase",
+    ],
+}
diff --git a/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/Kbuild b/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100755
index 0000000..ddf1bb5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/Makefile b/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/Makefile
new file mode 100755
index 0000000..3b10406
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,38 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100755
index 0000000..529ce71
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,898 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/reservation.h>
+#include <linux/dma-buf.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+
+#include <linux/fence.h>
+
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+
+#if (KERNEL_VERSION(4, 9, 68) > LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#else
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#endif
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
+	(a)->status ?: 1 \
+	: 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+#define DMA_BUF_LOCK_INIT_BIAS  0xFF
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence fence;
+#else
+	struct dma_fence fence;
+#endif
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	atomic_t fence_dep_count;		/* Number of dma-fence dependencies */
+	struct list_head dma_fence_callbacks;	/* list of all callbacks set up to wait on other fences */
+	wait_queue_head_t wait;
+	struct kref refcount;
+	struct list_head link;
+	struct work_struct work;
+	int count;
+} dma_buf_lock_resource;
+
+/**
+ * struct dma_buf_lock_fence_cb - Callback data struct for dma-fence
+ * @fence_cb: Callback function
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @res:      Pointer to dma_buf_lock_resource that is waiting on this callback
+ * @node:     List head for linking this callback to the lock resource
+ */
+struct dma_buf_lock_fence_cb {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence_cb fence_cb;
+	struct fence *fence;
+#else
+	struct dma_fence_cb fence_cb;
+	struct dma_fence *fence;
+#endif
+	struct dma_buf_lock_resource *res;
+	struct list_head node;
+};
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+
+/*** dma_buf_lock fence part ***/
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(dma_buf_lock_fence_lock);
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_get_driver_name(struct fence *fence)
+#else
+dma_buf_lock_fence_get_driver_name(struct dma_fence *fence)
+#endif
+{
+	return "dma_buf_lock";
+}
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_get_timeline_name(struct fence *fence)
+#else
+dma_buf_lock_fence_get_timeline_name(struct dma_fence *fence)
+#endif
+{
+	return "dma_buf_lock.timeline";
+}
+
+static bool
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_enable_signaling(struct fence *fence)
+#else
+dma_buf_lock_fence_enable_signaling(struct dma_fence *fence)
+#endif
+{
+	return true;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+const struct fence_ops dma_buf_lock_fence_ops = {
+	.wait = fence_default_wait,
+#else
+const struct dma_fence_ops dma_buf_lock_fence_ops = {
+	.wait = dma_fence_default_wait,
+#endif
+	.get_driver_name = dma_buf_lock_fence_get_driver_name,
+	.get_timeline_name = dma_buf_lock_fence_get_timeline_name,
+	.enable_signaling = dma_buf_lock_fence_enable_signaling,
+};
+
+static void
+dma_buf_lock_fence_init(dma_buf_lock_resource *resource)
+{
+	dma_fence_init(&resource->fence,
+		       &dma_buf_lock_fence_ops,
+		       &dma_buf_lock_fence_lock,
+		       0,
+		       0);
+}
+
+static void
+dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource)
+{
+	struct dma_buf_lock_fence_cb *cb, *tmp;
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &resource->dma_fence_callbacks, node) {
+		/* Cancel callbacks that hasn't been called yet and release the
+		 * reference taken in dma_buf_lock_fence_add_callback().
+		 */
+		dma_fence_remove_callback(cb->fence, &cb->fence_cb);
+		dma_fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+}
+
+static void
+dma_buf_lock_fence_work(struct work_struct *pwork)
+{
+	dma_buf_lock_resource *resource =
+		container_of(pwork, dma_buf_lock_resource, work);
+
+	WARN_ON(atomic_read(&resource->fence_dep_count));
+	WARN_ON(!atomic_read(&resource->locked));
+	WARN_ON(!resource->exclusive);
+
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_callback(struct fence *fence, struct fence_cb *cb)
+#else
+dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+#endif
+{
+	struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb,
+				struct dma_buf_lock_fence_cb,
+				fence_cb);
+	dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
+
+#if DMA_BUF_LOCK_DEBUG
+	printk(KERN_DEBUG "dma_buf_lock_fence_callback\n");
+#endif
+
+	/* Callback function will be invoked in atomic context. */
+
+	if (atomic_dec_and_test(&resource->fence_dep_count)) {
+		atomic_set(&resource->locked, 1);
+		wake_up(&resource->wait);
+
+		if (resource->exclusive) {
+			/* Warn if the work was already queued */
+			WARN_ON(!schedule_work(&resource->work));
+		}
+	}
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static int
+dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
+				struct fence *fence,
+				fence_func_t callback)
+#else
+static int
+dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
+				struct dma_fence *fence,
+				dma_fence_func_t callback)
+#endif
+{
+	int err = 0;
+	struct dma_buf_lock_fence_cb *fence_cb;
+
+	if (!fence)
+		return -EINVAL;
+
+	fence_cb = kmalloc(sizeof(*fence_cb), GFP_KERNEL);
+	if (!fence_cb)
+		return -ENOMEM;
+
+	fence_cb->fence = fence;
+	fence_cb->res   = resource;
+	INIT_LIST_HEAD(&fence_cb->node);
+
+	err = dma_fence_add_callback(fence, &fence_cb->fence_cb,
+				     callback);
+
+	if (err == -ENOENT) {
+		/* Fence signaled, get the completion result */
+		err = dma_fence_get_status(fence);
+
+		/* remap success completion to err code */
+		if (err == 1)
+			err = 0;
+
+		kfree(fence_cb);
+	} else if (err) {
+		kfree(fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in dma_buf_lock_fence_free_callbacks().
+		 */
+		dma_fence_get(fence);
+		atomic_inc(&resource->fence_dep_count);
+		/* Add callback to resource's list of callbacks */
+		list_add(&fence_cb->node, &resource->dma_fence_callbacks);
+	}
+
+	return err;
+}
+
+static int
+dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource,
+					    struct reservation_object *resv,
+					    bool exclusive)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+#else
+	struct dma_fence *excl_fence = NULL;
+	struct dma_fence **shared_fences = NULL;
+#endif
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = dma_buf_lock_fence_add_callback(resource,
+						      excl_fence,
+						      dma_buf_lock_fence_callback);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		dma_fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = dma_buf_lock_fence_add_callback(resource,
+							      shared_fences[i],
+							      dma_buf_lock_fence_callback);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		dma_fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	return err;
+}
+
+static void
+dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource,
+				       struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < resource->count; r++)
+		ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
+	ww_acquire_fini(ctx);
+}
+
+static int
+dma_buf_lock_acquire_fence_reservation(dma_buf_lock_resource *resource,
+				       struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_resv = NULL;
+	unsigned int content_resv_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < resource->count; r++) {
+		if (resource->dma_bufs[r]->resv == content_resv) {
+			content_resv = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&resource->dma_bufs[r]->resv->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_resv_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
+
+	if (content_resv)
+		ww_mutex_unlock(&content_resv->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "deadlock at dma_buf fd %i\n",
+		       resource->list_of_dma_buf_fds[content_resv_idx]);
+#endif
+		content_resv = resource->dma_bufs[content_resv_idx]->resv;
+		ww_mutex_lock_slow(&content_resv->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.owner		= THIS_MODULE,
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related reservation objects, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	struct ww_acquire_ctx ww_ctx;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	INIT_WORK(&resource->work, dma_buf_lock_fence_work);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Initialize the fence associated with dma_buf_lock resource */
+	dma_buf_lock_fence_init(resource);
+
+	INIT_LIST_HEAD(&resource->dma_fence_callbacks);
+
+	atomic_set(&resource->fence_dep_count, DMA_BUF_LOCK_INIT_BIAS);
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Check the reservation object associated with dma_buf */
+		if (NULL == resource->dma_bufs[i]->resv)
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "dma_buf_lock_dolock : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
+		       resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
+#endif
+	}
+
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops,
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx);
+	if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d locking reservations.\n", ret);
+#endif
+		put_unused_fd(fd);
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return ret;
+	}
+
+	/* Take an extra reference for exclusive access, which will be dropped
+	 * once the pre-existing fences attached to dma-buf resources, for which
+	 * we have commited for exclusive access, are signaled.
+	 * At a given time there can be only one exclusive fence attached to a
+	 * reservation object, so the new exclusive fence replaces the original
+	 * fence and the future sync is done against the new fence which is
+	 * supposed to be signaled only after the original fence was signaled.
+	 * If the new exclusive fence is signaled prematurely then the resources
+	 * would become available for new access while they are already being
+	 * written to by the original owner.
+	 */
+	if (resource->exclusive)
+		kref_get(&resource->refcount);
+
+	for (i = 0; i < request->count; i++) {
+		struct reservation_object *resv = resource->dma_bufs[i]->resv;
+
+		if (!test_bit(i, &resource->exclusive)) {
+			ret = reservation_object_reserve_shared(resv);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d reserving space for shared fence.\n", ret);
+#endif
+				break;
+			}
+
+			ret = dma_buf_lock_add_fence_reservation_callback(resource,
+									  resv,
+									  false);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
+#endif
+				break;
+			}
+
+			reservation_object_add_shared_fence(resv, &resource->fence);
+		} else {
+			ret = dma_buf_lock_add_fence_reservation_callback(resource,
+									  resv,
+									  true);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
+#endif
+				break;
+			}
+
+			reservation_object_add_excl_fence(resv, &resource->fence);
+		}
+	}
+
+	dma_buf_lock_release_fence_reservation(resource, &ww_ctx);
+
+	/* Test if the callbacks were already triggered */
+	if (!atomic_sub_return(DMA_BUF_LOCK_INIT_BIAS, &resource->fence_dep_count)) {
+		atomic_set(&resource->locked, 1);
+
+		/* Drop the extra reference taken for exclusive access */
+		if (resource->exclusive)
+			dma_buf_lock_fence_work(&resource->work);
+	}
+
+	if (IS_ERR_VALUE((unsigned long)ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	/* Signal the resource's fence. */
+	dma_fence_signal(&resource->fence);
+
+	dma_buf_lock_fence_free_callbacks(resource);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		if (resource->dma_bufs[i])
+			dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	dma_fence_put(&resource->fence);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100755
index 0000000..f2ae575
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100755
index 0000000..c382b79
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100755
index 0000000..66ca1bc
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/Makefile b/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100755
index 0000000..528582c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,36 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/build.bp b/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/build.bp
new file mode 100755
index 0000000..45508d3
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/build.bp
@@ -0,0 +1,23 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2017 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "dma-buf-test-exporter",
+    srcs: [
+        "Kbuild",
+        "dma-buf-test-exporter.c",
+    ],
+    kbuild_options: [
+        "CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m",
+    ],
+    defaults: ["kernel_defaults"],
+}
diff --git a/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100755
index 0000000..1a176b4
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,828 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif
+#include <linux/dma-mapping.h>
+#endif
+
+/* Maximum size allowed in a single DMA_BUF_TE_ALLOC call */
+#define DMA_BUF_TE_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	size_t nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+struct dma_buf_te_attachment {
+	struct sg_table *sg;
+	bool attachment_mapped;
+};
+
+static struct miscdevice te_device;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0))
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+#else
+static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+#endif
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	attachment->priv = kzalloc(sizeof(struct dma_buf_te_attachment), GFP_KERNEL);
+	if (!attachment->priv)
+		return -ENOMEM;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc = buf->priv;
+	struct dma_buf_te_attachment *pa = attachment->priv;
+
+	/* dma_buf is externally locked during call */
+
+	WARN(pa->attachment_mapped, "WARNING: dma-buf-test-exporter detected detach with open device mappings");
+
+	alloc->nr_attached_devices--;
+
+	kfree(pa);
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	struct dma_buf_te_attachment *pa = attachment->priv;
+	size_t i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+	if (WARN(pa->attachment_mapped,
+	    "WARNING: Attempted to map already mapped attachment."))
+		return ERR_PTR(-EBUSY);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	pa->attachment_mapped = true;
+	pa->sg = sg;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf_te_attachment *pa = attachment->priv;
+
+	alloc = attachment->dmabuf->priv;
+
+	mutex_lock(&attachment->dmabuf->lock);
+
+	WARN(!pa->attachment_mapped, "WARNING: Unmatched unmap of attachment.");
+
+	alloc->nr_device_mappings--;
+	pa->attachment_mapped = false;
+	pa->sg = NULL;
+	mutex_unlock(&attachment->dmabuf->lock);
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	size_t i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+static int dma_buf_te_sync(struct dma_buf *dmabuf,
+			enum dma_data_direction direction,
+			bool start_cpu_access)
+{
+	struct dma_buf_attachment *attachment;
+
+	mutex_lock(&dmabuf->lock);
+
+	list_for_each_entry(attachment, &dmabuf->attachments, node) {
+		struct dma_buf_te_attachment *pa = attachment->priv;
+		struct sg_table *sg = pa->sg;
+		if (!sg) {
+			dev_dbg(te_device.this_device, "no mapping for device %s\n", dev_name(attachment->dev));
+			continue;
+		}
+
+		if (start_cpu_access) {
+			dev_dbg(te_device.this_device, "sync cpu with device %s\n", dev_name(attachment->dev));
+
+			dma_sync_sg_for_cpu(attachment->dev, sg->sgl, sg->nents, direction);
+		} else {
+			dev_dbg(te_device.this_device, "sync device %s with cpu\n", dev_name(attachment->dev));
+
+			dma_sync_sg_for_device(attachment->dev, sg->sgl, sg->nents, direction);
+		}
+	}
+
+	mutex_unlock(&dmabuf->lock);
+	return 0;
+}
+
+#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
+static int dma_buf_te_begin_cpu_access(struct dma_buf *dmabuf,
+					enum dma_data_direction direction)
+#else
+static int dma_buf_te_begin_cpu_access(struct dma_buf *dmabuf, size_t start,
+					size_t len,
+					enum dma_data_direction direction)
+#endif
+{
+	return dma_buf_te_sync(dmabuf, direction, true);
+}
+
+#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
+static int dma_buf_te_end_cpu_access(struct dma_buf *dmabuf,
+				enum dma_data_direction direction)
+{
+	return dma_buf_te_sync(dmabuf, direction, false);
+}
+#else
+static void dma_buf_te_end_cpu_access(struct dma_buf *dmabuf, size_t start,
+				size_t len,
+				enum dma_data_direction direction)
+{
+	dma_buf_te_sync(dmabuf, direction, false);
+}
+#endif
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+#elif KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE
+static int dma_buf_te_mmap_fault(struct vm_fault *vmf)
+#else
+static vm_fault_t dma_buf_te_mmap_fault(struct vm_fault *vmf)
+#endif
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	dmabuf = vma->vm_private_data;
+#else
+	dmabuf = vmf->vma->vm_private_data;
+#endif
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0)
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+#endif
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return NULL;
+
+	return kmap(alloc->pages[page_num]);
+}
+static void dma_buf_te_kunmap(struct dma_buf *buf,
+		unsigned long page_num, void *addr)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return;
+
+	kunmap(alloc->pages[page_num]);
+	return;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+	.begin_cpu_access = dma_buf_te_begin_cpu_access,
+	.end_cpu_access = dma_buf_te_end_cpu_access,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
+	.kmap = dma_buf_te_kmap,
+	.kunmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic
+#else
+	.map = dma_buf_te_kmap,
+	.unmap = dma_buf_te_kunmap,
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0)
+	/* nop handlers for mandatory functions we ignore */
+	.map_atomic = dma_buf_te_kmap_atomic
+#endif
+#endif
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	size_t i = 0;
+	size_t max_nr_pages = DMA_BUF_TE_ALLOC_MAX_SIZE;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (max_nr_pages > SG_MAX_SINGLE_ALLOC)
+		max_nr_pages = SG_MAX_SINGLE_ALLOC;
+#endif
+
+	if (alloc_req.size > max_nr_pages) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %zu pages",
+				__func__, alloc_req.size, max_nr_pages);
+		goto invalid_size;
+	}
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %zu page structures",
+				__func__, alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO,
+				DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %zu pages",
+				__func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+	{
+		struct dma_buf_export_info export_info = {
+			.exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+			.owner = THIS_MODULE,
+#endif
+			.ops = &dma_buf_te_ops,
+			.size = alloc->nr_pages << PAGE_SHIFT,
+			.flags = O_CLOEXEC | O_RDWR,
+			.priv = alloc,
+		};
+
+		dma_buf = dma_buf_export(&export_info);
+	}
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	size_t i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+	if (ret)
+		goto no_cpu_access;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				ret = -EPERM;
+				goto no_kmap;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		offset += sg_dma_len(sg);
+	}
+
+no_kmap:
+	dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/bifrost/r19p0/kernel/drivers/base/memory_group_manager/Kbuild b/bifrost/r19p0/kernel/drivers/base/memory_group_manager/Kbuild
new file mode 100644
index 0000000..a049bed
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/memory_group_manager/Kbuild
@@ -0,0 +1,22 @@
+#
+# (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+obj-$(CONFIG_MALI_MEMORY_GROUP_MANAGER) := memory_group_manager.o
\ No newline at end of file
diff --git a/bifrost/r19p0/kernel/drivers/base/memory_group_manager/Kconfig b/bifrost/r19p0/kernel/drivers/base/memory_group_manager/Kconfig
new file mode 100644
index 0000000..da464ec
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/memory_group_manager/Kconfig
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+config MALI_MEMORY_GROUP_MANAGER
+	tristate "MALI_MEMORY_GROUP_MANAGER"
+	help
+	  This option enables an example implementation of a memory group manager
+	  for allocation and release of pages for memory pools managed by Mali GPU
+	  device drivers.
diff --git a/bifrost/r19p0/kernel/drivers/base/memory_group_manager/Makefile b/bifrost/r19p0/kernel/drivers/base/memory_group_manager/Makefile
new file mode 100644
index 0000000..a5bceae
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/memory_group_manager/Makefile
@@ -0,0 +1,35 @@
+#
+# (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" modules CONFIG_MALI_MEMORY_GROUP_MANAGER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r19p0/kernel/drivers/base/memory_group_manager/build.bp b/bifrost/r19p0/kernel/drivers/base/memory_group_manager/build.bp
new file mode 100644
index 0000000..04dbfd3
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/memory_group_manager/build.bp
@@ -0,0 +1,22 @@
+/*
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+bob_kernel_module {
+    name: "memory_group_manager",
+    srcs: [
+        "Kbuild",
+        "memory_group_manager.c",
+    ],
+    kbuild_options: ["CONFIG_MALI_MEMORY_GROUP_MANAGER=m"],
+    defaults: ["kernel_defaults"],
+}
diff --git a/bifrost/r19p0/kernel/drivers/base/memory_group_manager/memory_group_manager.c b/bifrost/r19p0/kernel/drivers/base/memory_group_manager/memory_group_manager.c
new file mode 100644
index 0000000..5e0ff3a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/memory_group_manager/memory_group_manager.c
@@ -0,0 +1,493 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
+#include <linux/mm.h>
+#include <linux/memory_group_manager.h>
+
+#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE)
+static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long pfn)
+{
+	int err = vm_insert_pfn(vma, addr, pfn);
+
+	if (unlikely(err == -ENOMEM))
+		return VM_FAULT_OOM;
+	if (unlikely(err < 0 && err != -EBUSY))
+		return VM_FAULT_SIGBUS;
+
+	return VM_FAULT_NOPAGE;
+}
+#endif
+
+#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE)
+static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long pfn, pgprot_t pgprot)
+{
+	if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot))
+		return VM_FAULT_SIGBUS;
+
+	return vmf_insert_pfn(vma, addr, pfn);
+}
+#endif
+
+#define IMPORTED_MEMORY_ID (MEMORY_GROUP_MANAGER_NR_GROUPS - 1)
+
+/**
+ * struct mgm_group - Structure to keep track of the number of allocated
+ *                    pages per group
+ *
+ * @size:  The number of allocated small(4KB) pages
+ * @lp_size:  The number of allocated large(2MB) pages
+ * @insert_pfn: The number of calls to map pages for CPU access.
+ * @update_gpu_pte: The number of calls to update GPU page table entries.
+ *
+ * This structure allows page allocation information to be displayed via
+ * debugfs. Display is organized per group with small and large sized pages.
+ */
+struct mgm_group {
+	size_t size;
+	size_t lp_size;
+	size_t insert_pfn;
+	size_t update_gpu_pte;
+};
+
+/**
+ * struct mgm_groups - Structure for groups of memory group manager
+ *
+ * @groups: To keep track of the number of allocated pages of all groups
+ * @dev: device attached
+ * @mgm_debugfs_root: debugfs root directory of memory group manager
+ *
+ * This structure allows page allocation information to be displayed via
+ * debugfs. Display is organized per group with small and large sized pages.
+ */
+struct mgm_groups {
+	struct mgm_group groups[MEMORY_GROUP_MANAGER_NR_GROUPS];
+	struct device *dev;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *mgm_debugfs_root;
+#endif
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+static int mgm_size_get(void *data, u64 *val)
+{
+	struct mgm_group *group = data;
+
+	*val = group->size;
+
+	return 0;
+}
+
+static int mgm_lp_size_get(void *data, u64 *val)
+{
+	struct mgm_group *group = data;
+
+	*val = group->lp_size;
+
+	return 0;
+}
+
+static int mgm_insert_pfn_get(void *data, u64 *val)
+{
+	struct mgm_group *group = data;
+
+	*val = group->insert_pfn;
+
+	return 0;
+}
+
+static int mgm_update_gpu_pte_get(void *data, u64 *val)
+{
+	struct mgm_group *group = data;
+
+	*val = group->update_gpu_pte;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_size, mgm_size_get, NULL, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_lp_size, mgm_lp_size_get, NULL, "%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_insert_pfn, mgm_insert_pfn_get, NULL,
+	"%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_update_gpu_pte, mgm_update_gpu_pte_get, NULL,
+	"%llu\n");
+
+static void mgm_term_debugfs(struct mgm_groups *data)
+{
+	debugfs_remove_recursive(data->mgm_debugfs_root);
+}
+
+#define MGM_DEBUGFS_GROUP_NAME_MAX 10
+static int mgm_initialize_debugfs(struct mgm_groups *mgm_data)
+{
+	int i;
+	struct dentry *e, *g;
+	char debugfs_group_name[MGM_DEBUGFS_GROUP_NAME_MAX];
+
+	/*
+	 * Create root directory of memory-group-manager
+	 */
+	mgm_data->mgm_debugfs_root =
+		debugfs_create_dir("physical-memory-group-manager", NULL);
+	if (IS_ERR(mgm_data->mgm_debugfs_root)) {
+		dev_err(mgm_data->dev, "fail to create debugfs root directory\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * Create debugfs files per group
+	 */
+	for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) {
+		scnprintf(debugfs_group_name, MGM_DEBUGFS_GROUP_NAME_MAX,
+				"group_%d", i);
+		g = debugfs_create_dir(debugfs_group_name,
+				mgm_data->mgm_debugfs_root);
+		if (IS_ERR(g)) {
+			dev_err(mgm_data->dev, "fail to create group[%d]\n", i);
+			goto remove_debugfs;
+		}
+
+		e = debugfs_create_file("size", 0444, g, &mgm_data->groups[i],
+				&fops_mgm_size);
+		if (IS_ERR(e)) {
+			dev_err(mgm_data->dev, "fail to create size[%d]\n", i);
+			goto remove_debugfs;
+		}
+
+		e = debugfs_create_file("lp_size", 0444, g,
+				&mgm_data->groups[i], &fops_mgm_lp_size);
+		if (IS_ERR(e)) {
+			dev_err(mgm_data->dev,
+				"fail to create lp_size[%d]\n", i);
+			goto remove_debugfs;
+		}
+
+		e = debugfs_create_file("insert_pfn", 0444, g,
+				&mgm_data->groups[i], &fops_mgm_insert_pfn);
+		if (IS_ERR(e)) {
+			dev_err(mgm_data->dev,
+				"fail to create insert_pfn[%d]\n", i);
+			goto remove_debugfs;
+		}
+
+		e = debugfs_create_file("update_gpu_pte", 0444, g,
+				&mgm_data->groups[i], &fops_mgm_update_gpu_pte);
+		if (IS_ERR(e)) {
+			dev_err(mgm_data->dev,
+				"fail to create update_gpu_pte[%d]\n", i);
+			goto remove_debugfs;
+		}
+	}
+
+	return 0;
+
+remove_debugfs:
+	mgm_term_debugfs(mgm_data);
+	return -ENODEV;
+}
+
+#else
+
+static void mgm_term_debugfs(struct mgm_groups *data)
+{
+}
+
+static int mgm_initialize_debugfs(struct mgm_groups *mgm_data)
+{
+	return 0;
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+#define ORDER_SMALL_PAGE 0
+#define ORDER_LARGE_PAGE 9
+static void update_size(struct memory_group_manager_device *mgm_dev, int
+		group_id, int order, bool alloc)
+{
+	struct mgm_groups *data = mgm_dev->data;
+
+	switch (order) {
+	case ORDER_SMALL_PAGE:
+		if (alloc)
+			data->groups[group_id].size++;
+		else {
+			WARN_ON(data->groups[group_id].size == 0);
+			data->groups[group_id].size--;
+		}
+	break;
+
+	case ORDER_LARGE_PAGE:
+		if (alloc)
+			data->groups[group_id].lp_size++;
+		else {
+			WARN_ON(data->groups[group_id].lp_size == 0);
+			data->groups[group_id].lp_size--;
+		}
+	break;
+
+	default:
+		dev_err(data->dev, "Unknown order(%d)\n", order);
+	break;
+	}
+}
+
+static struct page *example_mgm_alloc_page(
+	struct memory_group_manager_device *mgm_dev, int group_id,
+	gfp_t gfp_mask, unsigned int order)
+{
+	struct mgm_groups *const data = mgm_dev->data;
+	struct page *p;
+
+	dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d gfp_mask=0x%x order=%u\n",
+		__func__, (void *)mgm_dev, group_id, gfp_mask, order);
+
+	if (WARN_ON(group_id < 0) ||
+		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return NULL;
+
+	p = alloc_pages(gfp_mask, order);
+
+	if (p) {
+		update_size(mgm_dev, group_id, order, true);
+	} else {
+		struct mgm_groups *data = mgm_dev->data;
+
+		dev_err(data->dev, "alloc_pages failed\n");
+	}
+
+	return p;
+}
+
+static void example_mgm_free_page(
+	struct memory_group_manager_device *mgm_dev, int group_id,
+	struct page *page, unsigned int order)
+{
+	struct mgm_groups *const data = mgm_dev->data;
+
+	dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d page=%p order=%u\n",
+		__func__, (void *)mgm_dev, group_id, (void *)page, order);
+
+	if (WARN_ON(group_id < 0) ||
+		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return;
+
+	__free_pages(page, order);
+
+	update_size(mgm_dev, group_id, order, false);
+}
+
+static int example_mgm_get_import_memory_id(
+	struct memory_group_manager_device *mgm_dev,
+	struct memory_group_manager_import_data *import_data)
+{
+	struct mgm_groups *const data = mgm_dev->data;
+
+	dev_dbg(data->dev, "%s(mgm_dev=%p, import_data=%p (type=%d)\n",
+		__func__, (void *)mgm_dev, (void *)import_data,
+		(int)import_data->type);
+
+	if (!WARN_ON(!import_data)) {
+		WARN_ON(!import_data->u.dma_buf);
+
+		WARN_ON(import_data->type !=
+				MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF);
+	}
+
+	return IMPORTED_MEMORY_ID;
+}
+
+static u64 example_mgm_update_gpu_pte(
+	struct memory_group_manager_device *const mgm_dev, int const group_id,
+	int const mmu_level, u64 pte)
+{
+	struct mgm_groups *const data = mgm_dev->data;
+	const u32 pbha_bit_pos = 59; /* bits 62:59 */
+	const u32 pbha_bit_mask = 0xf; /* 4-bit */
+
+	dev_dbg(data->dev,
+		"%s(mgm_dev=%p, group_id=%d, mmu_level=%d, pte=0x%llx)\n",
+		__func__, (void *)mgm_dev, group_id, mmu_level, pte);
+
+	if (WARN_ON(group_id < 0) ||
+		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return pte;
+
+	pte |= ((u64)group_id & pbha_bit_mask) << pbha_bit_pos;
+
+	data->groups[group_id].update_gpu_pte++;
+
+	return pte;
+}
+
+static vm_fault_t example_mgm_vmf_insert_pfn_prot(
+	struct memory_group_manager_device *const mgm_dev, int const group_id,
+	struct vm_area_struct *const vma, unsigned long const addr,
+	unsigned long const pfn, pgprot_t const prot)
+{
+	struct mgm_groups *const data = mgm_dev->data;
+	vm_fault_t fault;
+
+	dev_dbg(data->dev,
+		"%s(mgm_dev=%p, group_id=%d, vma=%p, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n",
+		__func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn,
+		pgprot_val(prot));
+
+	if (WARN_ON(group_id < 0) ||
+		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return VM_FAULT_SIGBUS;
+
+	fault = vmf_insert_pfn_prot(vma, addr, pfn, prot);
+
+	if (fault == VM_FAULT_NOPAGE)
+		data->groups[group_id].insert_pfn++;
+	else
+		dev_err(data->dev, "vmf_insert_pfn_prot failed\n");
+
+	return fault;
+}
+
+static int mgm_initialize_data(struct mgm_groups *mgm_data)
+{
+	int i;
+
+	for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) {
+		mgm_data->groups[i].size = 0;
+		mgm_data->groups[i].lp_size = 0;
+		mgm_data->groups[i].insert_pfn = 0;
+		mgm_data->groups[i].update_gpu_pte = 0;
+	}
+
+	return mgm_initialize_debugfs(mgm_data);
+}
+
+static void mgm_term_data(struct mgm_groups *data)
+{
+	int i;
+
+	for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) {
+		if (data->groups[i].size != 0)
+			dev_warn(data->dev,
+				"%zu 0-order pages in group(%d) leaked\n",
+				data->groups[i].size, i);
+		if (data->groups[i].lp_size != 0)
+			dev_warn(data->dev,
+				"%zu 9 order pages in group(%d) leaked\n",
+				data->groups[i].lp_size, i);
+	}
+
+	mgm_term_debugfs(data);
+}
+
+static int memory_group_manager_probe(struct platform_device *pdev)
+{
+	struct memory_group_manager_device *mgm_dev;
+	struct mgm_groups *mgm_data;
+
+	mgm_dev = kzalloc(sizeof(*mgm_dev), GFP_KERNEL);
+	if (!mgm_dev)
+		return -ENOMEM;
+
+	mgm_dev->owner = THIS_MODULE;
+	mgm_dev->ops.mgm_alloc_page = example_mgm_alloc_page;
+	mgm_dev->ops.mgm_free_page = example_mgm_free_page;
+	mgm_dev->ops.mgm_get_import_memory_id =
+			example_mgm_get_import_memory_id;
+	mgm_dev->ops.mgm_vmf_insert_pfn_prot = example_mgm_vmf_insert_pfn_prot;
+	mgm_dev->ops.mgm_update_gpu_pte = example_mgm_update_gpu_pte;
+
+	mgm_data = kzalloc(sizeof(*mgm_data), GFP_KERNEL);
+	if (!mgm_data) {
+		kfree(mgm_dev);
+		return -ENOMEM;
+	}
+
+	mgm_dev->data = mgm_data;
+	mgm_data->dev = &pdev->dev;
+
+	if (mgm_initialize_data(mgm_data)) {
+		kfree(mgm_data);
+		kfree(mgm_dev);
+		return -ENOENT;
+	}
+
+	platform_set_drvdata(pdev, mgm_dev);
+	dev_info(&pdev->dev, "Memory group manager probed successfully\n");
+
+	return 0;
+}
+
+static int memory_group_manager_remove(struct platform_device *pdev)
+{
+	struct memory_group_manager_device *mgm_dev =
+		platform_get_drvdata(pdev);
+	struct mgm_groups *mgm_data = mgm_dev->data;
+
+	mgm_term_data(mgm_data);
+	kfree(mgm_data);
+
+	kfree(mgm_dev);
+
+	dev_info(&pdev->dev, "Memory group manager removed successfully\n");
+
+	return 0;
+}
+
+static const struct of_device_id memory_group_manager_dt_ids[] = {
+	{ .compatible = "arm,physical-memory-group-manager" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, memory_group_manager_dt_ids);
+
+static struct platform_driver memory_group_manager_driver = {
+	.probe = memory_group_manager_probe,
+	.remove = memory_group_manager_remove,
+	.driver = {
+		.name = "physical-memory-group-manager",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(memory_group_manager_dt_ids),
+		/*
+		 * Prevent the mgm_dev from being unbound and freed, as other's
+		 * may have pointers to it and would get confused, or crash, if
+		 * it suddenly disappear.
+		 */
+		.suppress_bind_attrs = true,
+	}
+};
+
+module_platform_driver(memory_group_manager_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/Kbuild b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/Kbuild
new file mode 100755
index 0000000..02f6cf6
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+obj-$(CONFIG_SMC_PROTECTED_MODE_SWITCHER) := smc_protected_mode_switcher.o
+smc_protected_mode_switcher-y := protected_mode_switcher_device.o \
+				 protected_mode_switcher_smc.o
diff --git a/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/Kconfig b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/Kconfig
new file mode 100755
index 0000000..02f90f0
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+config SMC_PROTECTED_MODE_SWITCHER
+	tristate "SMC protected mode switcher"
+	help
+	  This option enables the SMC protected mode switcher
diff --git a/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/Makefile b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/Makefile
new file mode 100755
index 0000000..fc05ee0
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include -I$(CURDIR)/../../gpu/arm/midgard -DCONFIG_SMC_PROTECTED_MODE_SWITCHER" CONFIG_SMC_PROTECTED_MODE_SWITCHER=m modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/build.bp b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/build.bp
new file mode 100755
index 0000000..feae0f8
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/build.bp
@@ -0,0 +1,28 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "smc_protected_mode_switcher",
+    enabled: false,
+    smc_protected_mode_switcher: {
+        enabled: true,
+    },
+    srcs: [
+        "Kbuild",
+        "protected_mode_switcher_device.c",
+        "protected_mode_switcher_smc.S",
+    ],
+    kbuild_options: [
+        "CONFIG_SMC_PROTECTED_MODE_SWITCHER=m",
+    ],
+    defaults: ["kernel_defaults"],
+}
diff --git a/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c
new file mode 100755
index 0000000..2051fd6
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/compiler.h>
+
+#include <linux/protected_mode_switcher.h>
+
+/*
+ * Protected Mode Switch
+ */
+
+#define SMC_FAST_CALL  (1 << 31)
+#define SMC_64         (1 << 30)
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_SIP    (2 << SMC_OEN_OFFSET)
+
+struct smc_protected_mode_device {
+	u16 smc_fid_enable;
+	u16 smc_fid_disable;
+	struct device *dev;
+};
+
+asmlinkage u64 __invoke_protected_mode_switch_smc(u64, u64, u64, u64);
+
+static u64 invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return __invoke_protected_mode_switch_smc(fid, arg0, arg1, arg2);
+}
+
+static int protected_mode_enable(struct protected_mode_device *protected_dev)
+{
+	struct smc_protected_mode_device *sdev = protected_dev->data;
+
+	if (!sdev)
+		/* Not supported */
+		return -EINVAL;
+
+	return invoke_smc(SMC_OEN_SIP,
+			sdev->smc_fid_enable, false,
+			0, 0, 0);
+
+}
+
+static int protected_mode_disable(struct protected_mode_device *protected_dev)
+{
+	struct smc_protected_mode_device *sdev = protected_dev->data;
+
+	if (!sdev)
+		/* Not supported */
+		return -EINVAL;
+
+	return invoke_smc(SMC_OEN_SIP,
+			sdev->smc_fid_disable, false,
+			0, 0, 0);
+}
+
+
+static int protected_mode_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct protected_mode_device *protected_dev;
+	struct smc_protected_mode_device *sdev;
+	u32 tmp = 0;
+
+	protected_dev = devm_kzalloc(&pdev->dev, sizeof(*protected_dev),
+			GFP_KERNEL);
+	if (!protected_dev)
+		return -ENOMEM;
+
+	sdev = devm_kzalloc(&pdev->dev, sizeof(*sdev), GFP_KERNEL);
+	if (!sdev) {
+		devm_kfree(&pdev->dev, protected_dev);
+		return -ENOMEM;
+	}
+
+	protected_dev->data = sdev;
+	protected_dev->ops.protected_mode_enable = protected_mode_enable;
+	protected_dev->ops.protected_mode_disable = protected_mode_disable;
+	sdev->dev = dev;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,protected_enable",
+			&tmp))
+		sdev->smc_fid_enable = tmp;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,protected_disable",
+			&tmp))
+		sdev->smc_fid_disable = tmp;
+
+	/* Check older property names, for compatibility with outdated DTBs */
+	if (!of_property_read_u32(dev->of_node, "arm,smc,secure_enable", &tmp))
+		sdev->smc_fid_enable = tmp;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,secure_disable", &tmp))
+		sdev->smc_fid_disable = tmp;
+
+	platform_set_drvdata(pdev, protected_dev);
+
+	dev_info(&pdev->dev, "Protected mode switcher %s loaded\n", pdev->name);
+	dev_info(&pdev->dev, "SMC enable: 0x%x\n", sdev->smc_fid_enable);
+	dev_info(&pdev->dev, "SMC disable: 0x%x\n", sdev->smc_fid_disable);
+
+	return 0;
+}
+
+static int protected_mode_remove(struct platform_device *pdev)
+{
+	dev_info(&pdev->dev, "Protected mode switcher %s removed\n",
+			pdev->name);
+
+	return 0;
+}
+
+static const struct of_device_id protected_mode_dt_ids[] = {
+	{ .compatible = "arm,smc-protected-mode-switcher" },
+	{ .compatible = "arm,secure-mode-switcher" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, protected_mode_dt_ids);
+
+static struct platform_driver protected_mode_driver = {
+	.probe = protected_mode_probe,
+	.remove = protected_mode_remove,
+	.driver = {
+		.name = "smc-protected-mode-switcher",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(protected_mode_dt_ids),
+	}
+};
+
+module_platform_driver(protected_mode_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S
new file mode 100755
index 0000000..5eae328
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/linkage.h>
+
+/* u64 invoke_protected_mode_switch_smc(u64 function_id, u64 arg0, u64 arg1,
+		u64 arg2) */
+ENTRY(__invoke_protected_mode_switch_smc)
+	smc	#0
+	ret
+ENDPROC(__invoke_protected_mode_switch_smc)
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/Kbuild
new file mode 100755
index 0000000..1a6fa3c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/Kbuild
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/Kconfig b/bifrost/r19p0/kernel/drivers/gpu/arm/Kconfig
new file mode 100755
index 0000000..693b86f
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/Kconfig
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Kbuild
new file mode 100755
index 0000000..75fcaba
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,172 @@
+#
+# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r19p0-01rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_USE_CSF ?= 0
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_COVERAGE ?= 0
+CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_USE_CSF=$(MALI_USE_CSF) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\"
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+DEFINES += -DMALI_KBASE_BUILD
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mem_pool_group.c \
+	mali_kbase_mmu.c \
+	mali_kbase_native_mgm.c \
+	mali_kbase_ctx_sched.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_hwcnt.c \
+	mali_kbase_hwcnt_backend_gpu.c \
+	mali_kbase_hwcnt_gpu.c \
+	mali_kbase_hwcnt_legacy.c \
+	mali_kbase_hwcnt_types.c \
+	mali_kbase_hwcnt_virtualizer.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_debug.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_debugfs_helper.c \
+	mali_kbase_timeline.c \
+	mali_kbase_timeline_io.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_tracepoints.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c \
+	mali_kbase_regs_history_debugfs.c \
+	thirdparty/mali_kbase_mmap.c
+
+
+ifeq ($(CONFIG_MALI_CINSTR_GWT),y)
+	SRC += mali_kbase_gwt.c
+endif
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_timeline_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+ccflags-y += -I$(KBASE_PATH)
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+# Kconfig passes in the name with quotes for in-tree builds - remove them.
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_NAME))
+MALI_PLATFORM_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR)
+include $(src)/$(MALI_PLATFORM_DIR)/Kbuild
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+  ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+    include $(src)/ipa/Kbuild
+  endif
+endif
+
+ifeq ($(MALI_USE_CSF),1)
+	include $(src)/csf/Kbuild
+endif
+
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \
+	mali_kbase_dma_fence.o \
+	mali_kbase_fence.o
+mali_kbase-$(CONFIG_SYNC) += \
+	mali_kbase_sync_android.o \
+	mali_kbase_sync_common.o
+mali_kbase-$(CONFIG_SYNC_FILE) += \
+	mali_kbase_sync_file.o \
+	mali_kbase_sync_common.o \
+	mali_kbase_fence.o
+
+include  $(src)/backend/gpu/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+
+ccflags-y += -I$(src)/backend/gpu
+subdir-ccflags-y += -I$(src)/backend/gpu
+
+# For kutf and mali_kutf_irq_latency_test
+obj-$(CONFIG_MALI_KUTF) += tests/
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Kconfig b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Kconfig
new file mode 100755
index 0000000..3d93aaa
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,259 @@
+#
+# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Enable Streamline tracing support"
+	depends on MALI_MIDGARD
+	default y
+	help
+	  Enables kbase tracing used by the Arm Streamline Performance Analyzer.
+	  The tracepoints are used to derive GPU activity charts in Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	default y
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if the Linux Kernel has built in
+	  support for DMA_BUF fences.
+
+config MALI_PLATFORM_NAME
+	depends on MALI_MIDGARD
+	string "Platform name"
+	default "devicetree"
+	help
+	  Enter the name of the desired platform configuration directory to
+	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
+	  exist.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_CORESTACK
+	bool "Support controlling power to the GPU core stack"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature on supported GPUs will let the driver powering
+	  on/off the GPU core stack independently without involving the Power
+	  Domain Controller. This should only be enabled on platforms which
+	  integration of the PDC to the Mali GPU is known to be problematic.
+	  This feature is currently only supported on t-Six and t-HEx GPUs.
+
+	  If unsure, say N.
+
+config MALI_PLATFORM_POWER_DOWN_ONLY
+	bool "Support disabling the power down of individual cores"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature will let the driver avoid power down of the
+	  shader cores, the tiler, and the L2 cache.
+	  The entire GPU would be powered down at once through the platform
+	  specific code.
+	  This may be required for certain platform configurations only.
+	  This also limits the available power policies.
+
+	  If unsure, say N.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE)
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_REAL_HW
+	def_bool !MALI_NO_MALI
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event. This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_2MB_ALLOC
+	bool "Attempt to allocate 2MB pages"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Rather than allocating all GPU memory page-by-page, attempt to
+	  allocate 2MB pages from the kernel. This reduces TLB pressure and
+	  helps to prevent memory fragmentation.
+
+	  If in doubt, say N
+
+config MALI_PWRSOFT_765
+	bool "PWRSOFT-765 ticket"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged
+	  in kernel v4.10, however if backported into the kernel then this
+	  option must be manually selected.
+
+	  If using kernel >= v4.10 then say N, otherwise if devfreq cooling
+	  changes have been backported say Y to avoid compilation errors.
+
+config MALI_MEMORY_FULLY_BACKED
+	bool "Memory fully physically-backed"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This option enables full physical backing of all virtual
+	  memory allocations in the kernel. Notice that this build
+	  option only affects allocations of grow-on-GPU-page-fault
+	  memory.
+
+config MALI_DMA_BUF_MAP_ON_DEMAND
+	bool "Map imported dma-bufs on demand"
+	depends on DMA_SHARED_BUFFER && MALI_MIDGARD
+	default n
+	help
+	  This option caused kbase to set up the GPU mapping of imported
+	  dma-buf when needed to run atoms.  This is the legacy behaviour.
+
+	  This is intended for testing and the option will get removed in the
+	  future.
+
+config MALI_DMA_BUF_LEGACY_COMPAT
+	bool "Enable legacy compatibility cache flush on dma-buf map"
+	depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND
+	default y
+	help
+	  This option enables compatibility with legacy dma-buf mapping
+	  behavior, then the dma-buf is mapped on import, by adding cache
+	  maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping,
+	  including a cache flush.
+
+# Instrumentation options.
+
+config MALI_JOB_DUMP
+	bool "Enable system level support needed for job dumping"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system level support needed for
+	  job dumping. This is typically used for instrumentation but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
+source "drivers/gpu/arm/midgard/tests/Kconfig"
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Makefile b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Makefile
new file mode 100755
index 0000000..53a1209
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,38 @@
+#
+# (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+KBASE_PATH_RELATIVE = $(CURDIR)
+
+ifeq ($(CONFIG_MALI_BUSLOG),y)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100755
index 0000000..6b0f81e
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010, 2013, 2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Mconfig b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Mconfig
new file mode 100755
index 0000000..60cb2a3
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/Mconfig
@@ -0,0 +1,247 @@
+#
+# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+menuconfig MALI_MIDGARD
+	bool "Mali Midgard series support"
+	default y
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Enable Streamline tracing support"
+	depends on MALI_MIDGARD && !BACKEND_USER
+	default y
+	help
+	  Enables kbase tracing used by the Arm Streamline Performance Analyzer.
+	  The tracepoints are used to derive GPU activity charts in Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD
+	default y if PLATFORM_JUNO
+	default y if PLATFORM_CUSTOM
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if the Linux Kernel has built in
+	  support for DMA_BUF fences.
+
+config MALI_PLATFORM_NAME
+	depends on MALI_MIDGARD
+	string "Platform name"
+	default "hisilicon" if PLATFORM_HIKEY960
+	default "hisilicon" if PLATFORM_HIKEY970
+	default "devicetree"
+	help
+	  Enter the name of the desired platform configuration directory to
+	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
+	  exist.
+
+	  When PLATFORM_CUSTOM is set, this needs to be set manually to
+	  pick up the desired platform files.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default y
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_CORESTACK
+	bool "Support controlling power to the GPU core stack"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature on supported GPUs will let the driver powering
+	  on/off the GPU core stack independently without involving the Power
+	  Domain Controller. This should only be enabled on platforms which
+	  integration of the PDC to the Mali GPU is known to be problematic.
+	  This feature is currently only supported on t-Six and t-HEx GPUs.
+
+	  If unsure, say N.
+
+config MALI_PLATFORM_POWER_DOWN_ONLY
+	bool "Support disabling the power down of individual cores"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature will let the driver avoid power down of the
+	  shader cores, the tiler, and the L2 cache.
+	  The entire GPU would be powered down at once through the platform
+	  specific code.
+	  This may be required for certain platform configurations only.
+	  This also limits the available power policies.
+
+	  If unsure, say N.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default y if DEBUG
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+choice
+	prompt "Error injection level"
+	default MALI_ERROR_INJECT_NONE
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_ERROR_INJECT_NONE
+	bool "disabled"
+	help
+	  Error injection is disabled.
+
+config MALI_ERROR_INJECT_TRACK_LIST
+	bool "error track list"
+	depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI
+	help
+	  Errors to inject are pre-configured by the user.
+
+config MALI_ERROR_INJECT_RANDOM
+	bool "random error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI
+	help
+	  Injected errors are random, rather than user-driven.
+
+endchoice
+
+config MALI_ERROR_INJECT_ON
+	string
+	default "0" if MALI_ERROR_INJECT_NONE
+	default "1" if MALI_ERROR_INJECT_TRACK_LIST
+	default "2" if MALI_ERROR_INJECT_RANDOM
+
+config MALI_ERROR_INJECT
+	bool
+	default y if !MALI_ERROR_INJECT_NONE
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_2MB_ALLOC
+	bool "Attempt to allocate 2MB pages"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Rather than allocating all GPU memory page-by-page, attempt to
+	  allocate 2MB pages from the kernel. This reduces TLB pressure and
+	  helps to prevent memory fragmentation.
+
+	  If in doubt, say N
+
+config MALI_PWRSOFT_765
+	bool "PWRSOFT-765 ticket"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  PWRSOFT-765 fixes devfreq cooling devices issues. However, they are
+	  not merged in mainline kernel yet. So this define helps to guard those
+	  parts of the code.
+
+config MALI_MEMORY_FULLY_BACKED
+	bool "Memory fully physically-backed"
+	default n
+	help
+	  This option enables full backing of all virtual memory allocations
+	  for the kernel. This only affects grow-on-GPU-page-fault memory.
+
+config MALI_DMA_BUF_MAP_ON_DEMAND
+	bool "Map imported dma-bufs on demand"
+	depends on MALI_MIDGARD
+	default n
+	default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED
+	help
+	  This option caused kbase to set up the GPU mapping of imported
+	  dma-buf when needed to run atoms.  This is the legacy behaviour.
+
+config MALI_DMA_BUF_LEGACY_COMPAT
+	bool "Enable legacy compatibility cache flush on dma-buf map"
+	depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND
+	default y
+	help
+	  This option enables compatibility with legacy dma-buf mapping
+	  behavior, then the dma-buf is mapped on import, by adding cache
+	  maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping,
+	  including a cache flush.
+
+config MALI_REAL_HW
+	bool
+	default y
+	default n if NO_MALI
+
+# Instrumentation options.
+
+# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig.
+# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig.
+
+source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100755
index 0000000..2414d51
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,61 @@
+#
+# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c \
+	backend/gpu/mali_kbase_l2_mmu_config.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_always_on_demand.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += \
+	backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100755
index 0000000..4a61f96
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,31 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100755
index 0000000..7378bfd
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	kbdev->current_gpu_coherency_mode = mode;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
+}
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100755
index 0000000..f78ada7
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100755
index 0000000..450f6e7
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset]);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100755
index 0000000..4e87c6a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,675 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_tracepoints.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#define dev_pm_opp_find_freq_floor opp_find_freq_floor
+#endif /* Linux >= 3.13 */
+
+/**
+ * opp_translate - Translate nominal OPP frequency from devicetree into real
+ *                 frequency and core mask
+ * @kbdev:     Device pointer
+ * @freq:      Nominal frequency
+ * @core_mask: Pointer to u64 to store core mask to
+ * @freqs:     Pointer to array of frequencies
+ * @volts:     Pointer to array of voltages
+ *
+ * This function will only perform translation if an operating-points-v2-mali
+ * table is present in devicetree. If one is not present then it will return an
+ * untranslated frequency and all cores enabled.
+ */
+static void opp_translate(struct kbase_device *kbdev, unsigned long freq,
+	u64 *core_mask, unsigned long *freqs, unsigned long *volts)
+{
+	unsigned int i;
+
+	for (i = 0; i < kbdev->num_opps; i++) {
+		if (kbdev->devfreq_table[i].opp_freq == freq) {
+			unsigned int j;
+
+			*core_mask = kbdev->devfreq_table[i].core_mask;
+			for (j = 0; j < kbdev->nr_clocks; j++) {
+				freqs[j] =
+					kbdev->devfreq_table[i].real_freqs[j];
+				volts[j] =
+					kbdev->devfreq_table[i].opp_volts[j];
+			}
+
+			break;
+		}
+	}
+
+	/* If failed to find OPP, return all cores enabled
+	 * and nominal frequency
+	 */
+	if (i == kbdev->num_opps) {
+		*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
+		for (i = 0; i < kbdev->nr_clocks; i++)
+			freqs[i] = freq;
+	}
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long nominal_freq;
+	unsigned long freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0};
+	unsigned long volts[BASE_MAX_NR_CLOCKS_REGULATORS] = {0};
+	unsigned int i;
+	u64 core_mask;
+
+	nominal_freq = *target_freq;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_lock();
+#endif
+	opp = devfreq_recommended_opp(dev, &nominal_freq, flags);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_unlock();
+#endif
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+	dev_pm_opp_put(opp);
+#endif
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_nominal_freq == nominal_freq) {
+		*target_freq = nominal_freq;
+		return 0;
+	}
+
+	opp_translate(kbdev, nominal_freq, &core_mask, freqs, volts);
+
+#ifdef CONFIG_REGULATOR
+	/* Regulators and clocks work in pairs: every clock has a regulator,
+	 * and we never expect to have more regulators than clocks.
+	 *
+	 * We always need to increase the voltage before increasing
+	 * the frequency of a regulator/clock pair, otherwise the clock
+	 * wouldn't have enough power to perform the transition.
+	 *
+	 * It's always safer to decrease the frequency before decreasing
+	 * voltage of a regulator/clock pair, otherwise the clock could have
+	 * problems operating if it is deprived of the necessary power
+	 * to sustain its current frequency (even if that happens for a short
+	 * transition interval).
+	 */
+	for (i = 0; i < kbdev->nr_clocks; i++) {
+		if (kbdev->regulators[i] &&
+				kbdev->current_voltages[i] != volts[i] &&
+				kbdev->current_freqs[i] < freqs[i]) {
+			int err;
+
+			err = regulator_set_voltage(kbdev->regulators[i],
+				volts[i], volts[i]);
+			if (!err) {
+				kbdev->current_voltages[i] = volts[i];
+			} else {
+				dev_err(dev, "Failed to increase voltage (%d) (target %lu)\n",
+					err, volts[i]);
+				return err;
+			}
+		}
+	}
+#endif
+
+	for (i = 0; i < kbdev->nr_clocks; i++) {
+		if (kbdev->clocks[i]) {
+			int err;
+
+			err = clk_set_rate(kbdev->clocks[i], freqs[i]);
+			if (!err) {
+				kbdev->current_freqs[i] = freqs[i];
+			} else {
+				dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+					freqs[i], *target_freq);
+				return err;
+			}
+		}
+	}
+
+#ifdef CONFIG_REGULATOR
+	for (i = 0; i < kbdev->nr_clocks; i++) {
+		if (kbdev->regulators[i] &&
+				kbdev->current_voltages[i] != volts[i] &&
+				kbdev->current_freqs[i] > freqs[i]) {
+			int err;
+
+			err = regulator_set_voltage(kbdev->regulators[i],
+				volts[i], volts[i]);
+			if (!err) {
+				kbdev->current_voltages[i] = volts[i];
+			} else {
+				dev_err(dev, "Failed to decrease voltage (%d) (target %lu)\n",
+					err, volts[i]);
+				return err;
+			}
+		}
+	}
+#endif
+
+	kbase_devfreq_set_core_mask(kbdev, core_mask);
+
+	*target_freq = nominal_freq;
+	kbdev->current_nominal_freq = nominal_freq;
+	kbdev->current_core_mask = core_mask;
+
+	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)nominal_freq);
+
+	return 0;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_nominal_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbasep_pm_metrics diff;
+
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->last_devfreq_metrics, &diff);
+
+	stat->busy_time = diff.time_busy;
+	stat->total_time = diff.time_busy + diff.time_idle;
+	stat->current_frequency = kbdev->current_nominal_freq;
+	stat->private_data = NULL;
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq;
+	struct dev_pm_opp *opp;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_lock();
+#endif
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_unlock();
+#endif
+	if (count < 0)
+		return count;
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_lock();
+#endif
+	for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
+		opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+		dev_pm_opp_put(opp);
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */
+
+		dp->freq_table[i] = freq;
+	}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_unlock();
+#endif
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = &kbdev->devfreq_profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev)
+{
+	kfree(kbdev->devfreq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
+{
+#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF)
+	/* OPP table initialization requires at least the capability to get
+	 * regulators and clocks from the device tree, as well as parsing
+	 * arrays of unsigned integer values.
+	 *
+	 * The whole initialization process shall simply be skipped if the
+	 * minimum capability is not available.
+	 */
+	return 0;
+#else
+	struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node,
+			"operating-points-v2", 0);
+	struct device_node *node;
+	int i = 0;
+	int count;
+	u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present;
+
+	if (!opp_node)
+		return 0;
+	if (!of_device_is_compatible(opp_node, "operating-points-v2-mali"))
+		return 0;
+
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	kbdev->devfreq_table = kmalloc_array(count,
+			sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
+	if (!kbdev->devfreq_table)
+		return -ENOMEM;
+
+	for_each_available_child_of_node(opp_node, node) {
+		const void *core_count_p;
+		u64 core_mask, opp_freq,
+			real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
+		int err;
+#ifdef CONFIG_REGULATOR
+		u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS];
+#endif
+
+		err = of_property_read_u64(node, "opp-hz", &opp_freq);
+		if (err) {
+			dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n",
+					err);
+			continue;
+		}
+
+
+#if BASE_MAX_NR_CLOCKS_REGULATORS > 1
+		err = of_property_read_u64_array(node, "opp-hz-real",
+				real_freqs, kbdev->nr_clocks);
+#else
+		WARN_ON(kbdev->nr_clocks != 1);
+		err = of_property_read_u64(node, "opp-hz-real", real_freqs);
+#endif
+		if (err < 0) {
+			dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n",
+					err);
+			continue;
+		}
+#ifdef CONFIG_REGULATOR
+		err = of_property_read_u32_array(node,
+			"opp-microvolt", opp_volts, kbdev->nr_regulators);
+		if (err < 0) {
+			dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d\n",
+					err);
+			continue;
+		}
+#endif
+
+		if (of_property_read_u64(node, "opp-core-mask", &core_mask))
+			core_mask = shader_present;
+		if (core_mask != shader_present &&
+				(kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) ||
+				 corestack_driver_control ||
+				 platform_power_down_only)) {
+
+			dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
+					opp_freq);
+			continue;
+		}
+
+		core_count_p = of_get_property(node, "opp-core-count", NULL);
+		if (core_count_p) {
+			u64 remaining_core_mask =
+				kbdev->gpu_props.props.raw_props.shader_present;
+			int core_count = be32_to_cpup(core_count_p);
+
+			core_mask = 0;
+
+			for (; core_count > 0; core_count--) {
+				int core = ffs(remaining_core_mask);
+
+				if (!core) {
+					dev_err(kbdev->dev, "OPP has more cores than GPU\n");
+					return -ENODEV;
+				}
+
+				core_mask |= (1ull << (core-1));
+				remaining_core_mask &= ~(1ull << (core-1));
+			}
+		}
+
+		if (!core_mask) {
+			dev_err(kbdev->dev, "OPP has invalid core mask of 0\n");
+			return -ENODEV;
+		}
+
+		kbdev->devfreq_table[i].opp_freq = opp_freq;
+		kbdev->devfreq_table[i].core_mask = core_mask;
+		if (kbdev->nr_clocks > 0) {
+			int j;
+
+			for (j = 0; j < kbdev->nr_clocks; j++)
+				kbdev->devfreq_table[i].real_freqs[j] =
+					real_freqs[j];
+		}
+#ifdef CONFIG_REGULATOR
+		if (kbdev->nr_regulators > 0) {
+			int j;
+
+			for (j = 0; j < kbdev->nr_regulators; j++)
+				kbdev->devfreq_table[i].opp_volts[j] =
+						opp_volts[j];
+		}
+#endif
+
+		dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n",
+				i, opp_freq, core_mask);
+
+		i++;
+	}
+
+	kbdev->num_opps = i;
+
+	return 0;
+#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+
+static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type)
+{
+	const char *p;
+
+	switch (type) {
+	case DEVFREQ_WORK_NONE:
+		p = "devfreq_none";
+		break;
+	case DEVFREQ_WORK_SUSPEND:
+		p = "devfreq_suspend";
+		break;
+	case DEVFREQ_WORK_RESUME:
+		p = "devfreq_resume";
+		break;
+	default:
+		p = "Unknown devfreq_type";
+	}
+	return p;
+}
+
+static void kbase_devfreq_suspend_resume_worker(struct work_struct *work)
+{
+	struct kbase_devfreq_queue_info *info = container_of(work,
+			struct kbase_devfreq_queue_info, work);
+	struct kbase_device *kbdev = container_of(info, struct kbase_device,
+			devfreq_queue);
+	unsigned long flags;
+	enum kbase_devfreq_work_type type, acted_type;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	type = kbdev->devfreq_queue.req_type;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	acted_type = kbdev->devfreq_queue.acted_type;
+	dev_dbg(kbdev->dev, "Worker handles queued req: %s (acted: %s)\n",
+		kbase_devfreq_req_type_name(type),
+		kbase_devfreq_req_type_name(acted_type));
+	switch (type) {
+	case DEVFREQ_WORK_SUSPEND:
+	case DEVFREQ_WORK_RESUME:
+		if (type != acted_type) {
+			if (type == DEVFREQ_WORK_RESUME)
+				devfreq_resume_device(kbdev->devfreq);
+			else
+				devfreq_suspend_device(kbdev->devfreq);
+			dev_dbg(kbdev->dev, "Devfreq transition occured: %s => %s\n",
+				kbase_devfreq_req_type_name(acted_type),
+				kbase_devfreq_req_type_name(type));
+			kbdev->devfreq_queue.acted_type = type;
+		}
+		break;
+	default:
+		WARN_ON(1);
+	}
+}
+
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
+
+void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
+				       enum kbase_devfreq_work_type work_type)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	unsigned long flags;
+
+	WARN_ON(work_type == DEVFREQ_WORK_NONE);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->devfreq_queue.req_type = work_type;
+	queue_work(kbdev->devfreq_queue.workq, &kbdev->devfreq_queue.work);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n",
+		kbase_devfreq_req_type_name(work_type));
+#endif
+}
+
+static int kbase_devfreq_work_init(struct kbase_device *kbdev)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	kbdev->devfreq_queue.req_type = DEVFREQ_WORK_NONE;
+	kbdev->devfreq_queue.acted_type = DEVFREQ_WORK_RESUME;
+
+	kbdev->devfreq_queue.workq = alloc_ordered_workqueue("devfreq_workq", 0);
+	if (!kbdev->devfreq_queue.workq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->devfreq_queue.work,
+			kbase_devfreq_suspend_resume_worker);
+#endif
+	return 0;
+}
+
+static void kbase_devfreq_work_term(struct kbase_device *kbdev)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	destroy_workqueue(kbdev->devfreq_queue.workq);
+#endif
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+	unsigned int i;
+
+	if (kbdev->nr_clocks == 0) {
+		dev_err(kbdev->dev, "Clock not available for devfreq\n");
+		return -ENODEV;
+	}
+
+	for (i = 0; i < kbdev->nr_clocks; i++) {
+		if (kbdev->clocks[i])
+			kbdev->current_freqs[i] =
+				clk_get_rate(kbdev->clocks[i]);
+		else
+			kbdev->current_freqs[i] = 0;
+	}
+	kbdev->current_nominal_freq = kbdev->current_freqs[0];
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freqs[0];
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	if (dp->max_state > 0) {
+		/* Record the maximum frequency possible */
+		kbdev->gpu_props.props.core_props.gpu_freq_khz_max =
+			dp->freq_table[0] / 1000;
+	};
+
+	err = kbase_devfreq_init_core_mask_table(kbdev);
+	if (err) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return err;
+	}
+
+	/* Initialise devfreq suspend/resume workqueue */
+	err = kbase_devfreq_work_init(kbdev);
+	if (err) {
+		kbase_devfreq_term_freq_table(kbdev);
+		dev_err(kbdev->dev, "Devfreq initialization failed");
+		return err;
+	}
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", NULL);
+	if (IS_ERR(kbdev->devfreq)) {
+		err = PTR_ERR(kbdev->devfreq);
+		goto add_device_failed;
+	}
+
+	/* devfreq_add_device only copies a few of kbdev->dev's fields, so
+	 * set drvdata explicitly so IPA models can access kbdev. */
+	dev_set_drvdata(&kbdev->devfreq->dev, kbdev);
+
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_ipa_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		goto cooling_failed;
+	}
+
+	kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+			kbdev->dev->of_node,
+			kbdev->devfreq,
+			&kbase_ipa_power_model_ops);
+	if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+		err = PTR_ERR(kbdev->devfreq_cooling);
+		dev_err(kbdev->dev,
+			"Failed to register cooling device (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+add_device_failed:
+	kbase_devfreq_work_term(kbdev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+
+	kbase_ipa_term(kbdev);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	kbase_devfreq_term_core_mask_table(kbdev);
+
+	kbase_devfreq_work_term(kbdev);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100755
index 0000000..6ffdcd8
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_devfreq_enqueue_work - Enqueue a work item for suspend/resume devfreq.
+ * @kbdev:      Device pointer
+ * @work_type:  The type of the devfreq work item, i.e. suspend or resume
+ */
+void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
+				enum kbase_devfreq_work_type work_type);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100755
index 0000000..567ebf1
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,349 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+	struct kbase_io_access *old_buf;
+	struct kbase_io_access *new_buf;
+	unsigned long flags;
+
+	if (!new_size)
+		goto out_err; /* The new size must not be 0 */
+
+	new_buf = vmalloc(new_size * sizeof(*h->buf));
+	if (!new_buf)
+		goto out_err;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	old_buf = h->buf;
+
+	/* Note: we won't bother with copying the old data over. The dumping
+	 * logic wouldn't work properly as it relies on 'count' both as a
+	 * counter and as an index to the buffer which would have changed with
+	 * the new array. This is a corner case that we don't need to support.
+	 */
+	h->count = 0;
+	h->size = new_size;
+	h->buf = new_buf;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	vfree(old_buf);
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+	h->enabled = false;
+	spin_lock_init(&h->lock);
+	h->count = 0;
+	h->size = 0;
+	h->buf = NULL;
+	if (kbase_io_history_resize(h, n))
+		return -1;
+
+	return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+	vfree(h->buf);
+	h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+		void __iomem const *addr, u32 value, u8 write)
+{
+	struct kbase_io_access *io;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	io = &h->buf[h->count % h->size];
+	io->addr = (uintptr_t)addr | write;
+	io->value = value;
+	++h->count;
+	/* If count overflows, move the index by the buffer size so the entire
+	 * buffer will still be dumped later */
+	if (unlikely(!h->count))
+		h->count = h->size;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+	struct kbase_io_history *const h = &kbdev->io_history;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!unlikely(h->enabled))
+		return;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	dev_err(kbdev->dev, "Register IO History:");
+	iters = (h->size > h->count) ? h->count : h->size;
+	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	writel(value, kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				value, 1);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				val, 0);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val);
+
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS));
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO));
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->cache_clean_in_progress) {
+		/* If this is called while another clean is in progress, we
+		 * can't rely on the current one to flush any new changes in
+		 * the cache. Instead, trigger another cache clean immediately
+		 * after this one finishes.
+		 */
+		kbdev->cache_clean_queued = true;
+		return;
+	}
+
+	/* Enable interrupt */
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES);
+
+	kbdev->cache_clean_in_progress = true;
+}
+
+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_gpu_start_cache_clean_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+static void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbdev->cache_clean_queued) {
+		kbdev->cache_clean_queued = false;
+
+		KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+				GPU_COMMAND_CLEAN_INV_CACHES);
+	} else {
+		/* Disable interrupt */
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED);
+
+		kbdev->cache_clean_in_progress = false;
+
+		wake_up(&kbdev->cache_clean_wait);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	while (kbdev->cache_clean_in_progress) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		wait_event_interruptible(kbdev->cache_clean_wait,
+				!kbdev->cache_clean_in_progress);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
+
+	/* kbase_pm_check_transitions (called by kbase_pm_power_changed) must
+	 * be called after the IRQ has been cleared. This is because it might
+	 * trigger further power transitions and we don't want to miss the
+	 * interrupt raised to notify us that these further transitions have
+	 * finished. The same applies to kbase_clean_caches_done() - if another
+	 * clean was queued, it might trigger another clean, which might
+	 * generate another interrupt which shouldn't be missed.
+	 */
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	if (val & POWER_CHANGED_ALL) {
+		kbase_pm_power_changed(kbdev);
+	} else if (val & CLEAN_CACHES_COMPLETED) {
+		/* When 'platform_power_down_only' is enabled, the L2 cache is
+		 * not powered down, but flushed before the GPU power down
+		 * (which is done by the platform code). So the L2 state machine
+		 * requests a cache flush. And when that flush completes, the L2
+		 * state machine needs to be re-invoked to proceed with the GPU
+		 * power down.
+		 * If cache line evict messages can be lost when shader cores
+		 * power down then we need to flush the L2 cache before powering
+		 * down cores. When the flush completes, the shaders' state
+		 * machine needs to be re-invoked to proceed with powering down
+		 * cores.
+		 */
+		if (platform_power_down_only ||
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921))
+			kbase_pm_power_changed(kbdev);
+	}
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100755
index 0000000..7886e96
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,89 @@
+/*
+ *
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
+
+/**
+ * kbase_gpu_start_cache_clean - Start a cache clean
+ * @kbdev: Kbase device
+ *
+ * Issue a cache clean and invalidate command to hardware. This function will
+ * take hwaccess_lock.
+ */
+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_start_cache_clean_nolock - Start a cache clean
+ * @kbdev: Kbase device
+ *
+ * Issue a cache clean and invalidate command to hardware. hwaccess_lock
+ * must be held by the caller.
+ */
+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish
+ * @kbdev: Kbase device
+ *
+ * This function will take hwaccess_lock, and may sleep.
+ */
+void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100755
index 0000000..97dd09a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_reset_gpu.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	err = kbase_pm_runtime_init(kbdev);
+	if (err)
+		goto fail_runtime_pm;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	err = kbase_hwaccess_pm_early_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	return 0;
+
+fail_pm:
+	kbase_release_interrupts(kbdev);
+fail_interrupts:
+	kbase_pm_runtime_term(kbdev);
+fail_runtime_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_early_term(kbdev);
+	kbase_release_interrupts(kbdev);
+	kbase_pm_runtime_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_late_init(kbdev);
+	if (err)
+		return err;
+
+	err = kbase_reset_gpu_init(kbdev);
+	if (err)
+		goto fail_reset_gpu_init;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		goto fail_pm_powerup;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	/* Do the initialisation of devfreq.
+	 * Devfreq needs backend_timer_init() for completion of its
+	 * initialisation and it also needs to catch the first callback
+	 * occurence of the runtime_suspend event for maintaining state
+	 * coherence with the backend power management, hence needs to be
+	 * placed before the kbase_pm_context_idle().
+	 */
+	err = kbase_backend_devfreq_init(kbdev);
+	if (err)
+		goto fail_devfreq_init;
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	/* Update gpuprops with L2_FEATURES if applicable */
+	kbase_gpuprops_update_l2_features(kbdev);
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_devfreq_init:
+	kbase_job_slot_term(kbdev);
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+fail_pm_powerup:
+	kbase_reset_gpu_term(kbdev);
+fail_reset_gpu_init:
+	kbase_hwaccess_pm_late_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_backend_devfreq_term(kbdev);
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+	kbase_reset_gpu_term(kbdev);
+	kbase_hwaccess_pm_late_term(kbdev);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100755
index 0000000..29018b2
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,125 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES));
+	regdump->core_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(CORE_FEATURES));
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES));
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES));
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES));
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT));
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT));
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)));
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)));
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS));
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE));
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE));
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES));
+	regdump->thread_tls_alloc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_TLS_ALLOC));
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO));
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI));
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO));
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI));
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO));
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI));
+
+	regdump->stack_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_LO));
+	regdump->stack_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_HI));
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES));
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
+void kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
+		regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES));
+	}
+}
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100755
index 0000000..5494c49
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,402 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_instr_hwcnt_enable *enable)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	u32 prfcnt_config;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* alignment failure */
+	if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_err;
+	}
+
+	/* Enable interrupt */
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = enable->dump_buffer;
+	kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+	if (enable->use_secondary)
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					enable->dump_buffer & 0xFFFFFFFF);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					enable->dump_buffer >> 32);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					enable->jm_bm);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					enable->shader_bm);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					enable->mmu_l2_bm);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							enable->tiler_bm);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							enable->tiler_bm);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+	kbdev->hwcnt.addr_bytes = 0ULL;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags, pm_flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Clean and invalidate the caches so we're sure the mmu tables for the
+	 * dump buffer is valid.
+	 */
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+	kbase_gpu_start_cache_clean_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	kbase_gpu_wait_cache_clean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
+			/* All finished and idle */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+			kbdev->hwcnt.backend.triggered = 1;
+			wake_up(&kbdev->hwcnt.backend.wait);
+		} else {
+			int ret;
+			/* Always clean and invalidate the cache after a successful dump
+			 */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+			ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+						&kbdev->hwcnt.backend.cache_clean_work);
+			KBASE_DEBUG_ASSERT(ret);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cache clean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100755
index 0000000..c9fb759
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,57 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100755
index 0000000..2254b9f
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100755
index 0000000..ca3c048
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100755
index 0000000..643f450
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,472 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016,2018-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+
+	if (!val) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS));
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100755
index 0000000..c8153ba
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,244 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx[js] == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		if (kbdev->as_to_kctx[i] == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_read(&kctx->refcount) != 1) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+
+	kbase_ctx_sched_release_ctx(kctx);
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		as_kctx = kbdev->as_to_kctx[i];
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running.
+		 * Note that a context will have at least 1 reference (which
+		 * was previously taken by kbasep_js_schedule_ctx()) until
+		 * descheduled.
+		 */
+		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+			atomic_read(&as_kctx->refcount) == 1) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_as *new_address_space = NULL;
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == kctx) {
+			WARN(1, "Context is already scheduled in\n");
+			return false;
+		}
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100755
index 0000000..b4d2ae1
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100755
index 0000000..794abbf
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1490 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_tracepoints.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_reset_gpu.h>
+#include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_hwcnt_context.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
+}
+
+static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
+				base_jd_core_req core_req,
+				int js)
+{
+	u64 affinity;
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+			BASE_JD_REQ_T) {
+		/* Tiler-only atom */
+		/* If the hardware supports XAFFINITY then we'll only enable
+		 * the tiler (which is the default so this is a no-op),
+		 * otherwise enable shader core 0.
+		 */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			affinity = 1;
+		else
+			affinity = 0;
+	} else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+			BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+		struct mali_base_gpu_coherent_group_info *coherency_info =
+			&kbdev->gpu_props.props.coherency_info;
+
+		affinity = kbdev->pm.backend.shaders_avail &
+				kbdev->pm.debug_core_mask[js];
+
+		/* JS2 on a dual core group system targets core group 1. All
+		 * other cases target core group 0.
+		 */
+		if (js == 2 && num_core_groups > 1)
+			affinity &= coherency_info->group[1].core_mask;
+		else
+			affinity &= coherency_info->group[0].core_mask;
+	} else {
+		/* Use all cores */
+		affinity = kbdev->pm.backend.shaders_avail &
+				kbdev->pm.debug_core_mask[js];
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					affinity & 0xFFFFFFFF);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					affinity >> 32);
+
+	return affinity;
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+	u64 affinity;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32);
+
+	affinity = kbase_job_write_affinity(kbdev, katom->core_req, js);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE))
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649))
+		cfg |= JS_CONFIG_START_MMU;
+
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx",
+				katom, kctx, js, jc_head);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32)affinity);
+
+	KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx,
+		js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START);
+
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head,
+			affinity, cfg);
+	KBASE_TLSTREAM_TL_RET_CTX_LPU(
+		kbdev,
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_LPU(
+			kbdev,
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string,
+						sizeof(js_string)),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the time the job was submitted, to
+ * work out the best estimate (which might still result in an over-estimate to
+ * the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	ktime_t timestamp_diff;
+	struct kbase_jd_atom *katom;
+
+	/* Checking the HEAD position for the job slot */
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	if (katom != NULL) {
+		timestamp_diff = ktime_sub(end_timestamp,
+				katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = end_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
+ * tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Make a tracepoint call to the instrumentation module informing that
+ * softstop happened on given lpu (job slot).
+ */
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
+					int js)
+{
+	KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
+		kbdev,
+		&kbdev->gpu_props.props.raw_props.js_features[js]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	end_timestamp = ktime_get();
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS));
+
+				if (completion_code == BASE_JD_EVENT_STOPPED) {
+					KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(
+						kbdev, NULL,
+						i, 0, TL_JS_EVENT_SOFT_STOP);
+
+					kbasep_trace_tl_event_lpu_softstop(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO)) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI))
+						 << 32);
+				} else if (completion_code ==
+						BASE_JD_EVENT_NOT_STARTED) {
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+				}
+
+				kbase_gpu_irq_evict(kbdev, i, completion_code);
+
+				/* Some jobs that encounter a BUS FAULT may result in corrupted
+				 * state causing future jobs to hang. Reset GPU before
+				 * allowing any other jobs on the slot to continue. */
+				if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) {
+					if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) {
+						if (kbase_prepare_to_reset_gpu_locked(kbdev))
+							kbase_reset_gpu_locked(kbdev);
+					}
+				}
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))));
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE));
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS)))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early_locked(kbdev);
+	}
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+						base_jd_core_req core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO)))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI)))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Invalidate all incomplete jobs in context to prevent resubmitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		struct kbase_jd_atom *katom = &kctx->jctx.atoms[i];
+
+		if ((katom->status != KBASE_JD_ATOM_STATE_COMPLETED) &&
+				(katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED))
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+	bool stop_sent = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if ((kbdev->js_ctx_scheduling_mode ==
+			KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) &&
+				(katom->kctx != kctx))
+			continue;
+
+		if (katom->sched_priority > priority) {
+			if (!stop_sent)
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(
+						kbdev,
+						target_katom);
+
+			kbase_job_slot_softstop(kbdev, js, katom);
+			stop_sent = true;
+		}
+	}
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT);
+
+	timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait,
+			kctx->jctx.job_nr == 0, timeout);
+
+	if (timeout != 0)
+		timeout = wait_event_timeout(
+			kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			!kbase_ctx_flag(kctx, KCTX_SCHEDULED),
+			timeout);
+
+	/* Neither wait timed out; all done! */
+	if (timeout != 0)
+		goto exit;
+
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev,
+			"Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+			ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+
+	/* Wait for the reset to complete */
+	kbase_reset_gpu_wait(kbdev);
+exit:
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH));
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * hwaccess_lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	kbase_io_history_dump(kbdev);
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE)));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO)));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)));
+	dev_err(kbdev->dev, "  TILER_CONFIG=0x%08x    JM_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)));
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	bool silent = false;
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Disable GPU hardware counters.
+	 * This call will block until counters are disabled.
+	 */
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock(&kbdev->mmu_mask_change);
+	kbase_pm_reset_start_locked(kbdev);
+
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts_nolock(kbdev);
+
+	spin_unlock(&kbdev->mmu_mask_change);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+		/* Ensure that L2 is not transitioning when we send the reset
+		 * command */
+		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2))
+			;
+
+		WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->protected_mode = false;
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	kbase_disjoint_state_down(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_reset_complete(kbdev);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_wait_for_desired_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
+	/* Try submitting some jobs to restart processing */
+	KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0);
+	kbase_js_sched_all(kbdev);
+
+	/* Process any pending slot updates */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_context_idle(kbdev);
+
+	/* Re-enable GPU hardware counters */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+
+int kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return -EAGAIN;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+
+	return 0;
+}
+
+bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
+
+int kbase_reset_gpu_wait(struct kbase_device *kbdev)
+{
+	wait_event(kbdev->hwaccess.backend.reset_wait,
+			atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+			== KBASE_RESET_GPU_NOT_PENDING);
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait);
+
+int kbase_reset_gpu_init(struct kbase_device *kbdev)
+{
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (kbdev->hwaccess.backend.reset_workq == NULL)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+
+	return 0;
+}
+
+void kbase_reset_gpu_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+}
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100755
index 0000000..452ddee
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string,
+						size_t js_size)
+{
+	snprintf(js_string, js_size, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_cache_clean - Cause a GPU cache clean & flush
+ * @kbdev: Device pointer
+ *
+ * Caller must not be in IRQ context
+ */
+void kbase_gpu_cache_clean(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100755
index 0000000..7cdaf98
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1727 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tracepoints.h>
+#include <mali_kbase_hwcnt_context.h>
+#include <mali_kbase_10969_workaround.h>
+#include <mali_kbase_reset_gpu.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+/**
+ * check_secure_atom - Check if the given atom is in the given secure state and
+ *                     has a ringbuffer state of at least
+ *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @katom:  Atom pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if atom is in the given state, false otherwise
+ */
+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
+{
+	if (katom->gpu_rb_state >=
+			KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+			((kbase_jd_katom_is_protected(katom) && secure) ||
+			(!kbase_jd_katom_is_protected(katom) && !secure)))
+		return true;
+
+	return false;
+}
+
+/**
+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
+ *                                secure state in the ringbuffers of at least
+ *                                state
+ *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
+ * @kbdev:  Device pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if any atoms are in the given state, false otherwise
+ */
+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
+		bool secure)
+{
+	int js, i;
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, i);
+
+			if (katom) {
+				if (check_secure_atom(katom, secure))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, kctx,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+		if (kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK) &&
+				(katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_HWCNT)) {
+			kbase_pm_protected_override_disable(kbdev);
+			kbase_pm_update_cores_state_nolock(kbdev);
+		}
+		if (!kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK) &&
+				(katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) {
+			kbase_pm_protected_override_disable(kbdev);
+			kbase_pm_update_cores_state_nolock(kbdev);
+		}
+
+		if (katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
+				katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK)
+			kbdev->protected_mode_transition = false;
+		/* If the atom has suspended hwcnt but has not yet entered
+		 * protected mode, then resume hwcnt now. If the GPU is now in
+		 * protected mode then hwcnt will be resumed by GPU reset so
+		 * don't resume it here.
+		 */
+		if (kbase_jd_katom_is_protected(katom) &&
+				((katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
+				 (katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) {
+			WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
+			kbdev->protected_mode_hwcnt_desired = true;
+			if (kbdev->protected_mode_hwcnt_disabled) {
+				kbase_hwcnt_context_enable(
+					kbdev->hwcnt_gpu_ctx);
+				kbdev->protected_mode_hwcnt_disabled = false;
+			}
+		}
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+			if (katom->atom_flags &
+					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
+				kbase_pm_protected_l2_override(kbdev, false);
+				katom->atom_flags &=
+					~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
+			}
+		}
+
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+/**
+ * other_slots_busy - Determine if any job slots other than @js are currently
+ *                    running atoms
+ * @kbdev: Device pointer
+ * @js:    Job slot
+ *
+ * Return: true if any slots other than @js are busy, false otherwise
+ */
+static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+{
+	int slot;
+
+	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
+		if (slot == js)
+			continue;
+
+		if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot,
+				KBASE_ATOM_GPU_RB_SUBMITTED))
+			return true;
+	}
+
+	return false;
+}
+
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
+{
+	return kbdev->protected_mode;
+}
+
+static void kbase_gpu_disable_coherent(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/*
+	 * When entering into protected mode, we must ensure that the
+	 * GPU is not operating in coherent mode as well. This is to
+	 * ensure that no protected memory can be leaked.
+	 */
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+}
+
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
+
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enable(
+				kbdev->protected_dev);
+
+		if (err) {
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
+		} else {
+			kbdev->protected_mode = true;
+			kbase_ipa_protection_mode_switch_event(kbdev);
+		}
+	}
+
+	return err;
+}
+
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
+
+	if (!kbdev->protected_ops)
+		return -EINVAL;
+
+	/* The protected mode disable callback will be called as part of reset
+	 */
+	return kbase_reset_gpu_silent(kbdev);
+}
+
+static int kbase_jm_protected_entry(struct kbase_device *kbdev,
+				struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	err = kbase_gpu_protected_mode_enter(kbdev);
+
+	/*
+	 * Regardless of result before this call, we are no longer
+	 * transitioning the GPU.
+	 */
+
+	kbdev->protected_mode_transition = false;
+	kbase_pm_protected_override_disable(kbdev);
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
+	if (err) {
+		/*
+		 * Failed to switch into protected mode, resume
+		 * GPU hwcnt and fail atom.
+		 */
+		WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
+		kbdev->protected_mode_hwcnt_desired = true;
+		if (kbdev->protected_mode_hwcnt_disabled) {
+			kbase_hwcnt_context_enable(
+				kbdev->hwcnt_gpu_ctx);
+			kbdev->protected_mode_hwcnt_disabled = false;
+		}
+
+		katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+		kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+		/*
+		 * Only return if head atom or previous atom
+		 * already removed - as atoms must be returned
+		 * in order.
+		 */
+		if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+		}
+
+		return -EINVAL;
+	}
+
+	/*
+	 * Protected mode sanity checks.
+	 */
+	KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) ==
+			kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]),
+			kbase_gpu_in_protected_mode(kbdev));
+	katom[idx]->gpu_rb_state =
+			KBASE_ATOM_GPU_RB_READY;
+
+	return err;
+}
+
+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	switch (katom[idx]->protected_state.enter) {
+	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+		/* If hwcnt is disabled, it means we didn't clean up correctly
+		 * during last exit from protected mode.
+		 */
+		WARN_ON(kbdev->protected_mode_hwcnt_disabled);
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_HWCNT;
+
+		kbdev->protected_mode_transition = true;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_HWCNT:
+		/* See if we can get away with disabling hwcnt atomically */
+		kbdev->protected_mode_hwcnt_desired = false;
+		if (!kbdev->protected_mode_hwcnt_disabled) {
+			if (kbase_hwcnt_context_disable_atomic(
+				kbdev->hwcnt_gpu_ctx))
+				kbdev->protected_mode_hwcnt_disabled = true;
+		}
+
+		/* We couldn't disable atomically, so kick off a worker */
+		if (!kbdev->protected_mode_hwcnt_disabled) {
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+			queue_work(system_wq,
+				&kbdev->protected_mode_hwcnt_disable_work);
+#else
+			queue_work(system_highpri_wq,
+				&kbdev->protected_mode_hwcnt_disable_work);
+#endif
+			return -EAGAIN;
+		}
+
+		/* Once reaching this point GPU must be
+		 * switched to protected mode or hwcnt
+		 * re-enabled. */
+
+		/*
+		 * Not in correct mode, begin protected mode switch.
+		 * Entering protected mode requires us to power down the L2,
+		 * and drop out of fully coherent mode.
+		 */
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+
+		kbase_pm_protected_override_enable(kbdev);
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
+		/* Avoid unnecessary waiting on non-ACE platforms. */
+		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+				/*
+				 * The L2 is still powered, wait for all the users to
+				 * finish with it before doing the actual reset.
+				 */
+				return -EAGAIN;
+			}
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY:
+		/*
+		 * When entering into protected mode, we must ensure that the
+		 * GPU is not operating in coherent mode as well. This is to
+		 * ensure that no protected memory can be leaked.
+		 */
+		kbase_gpu_disable_coherent(kbdev);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+			/*
+			 * Power on L2 caches; this will also result in the
+			 * correct value written to coherency enable register.
+			 */
+			kbase_pm_protected_l2_override(kbdev, true);
+
+			/*
+			 * Set the flag on the atom that additional
+			 * L2 references are taken.
+			 */
+			katom[idx]->atom_flags |=
+					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234))
+			return -EAGAIN;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+			/*
+			 * Check that L2 caches are powered and, if so,
+			 * enter protected mode.
+			 */
+			if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
+				/*
+				 * Remove additional L2 reference and reset
+				 * the atom flag which denotes it.
+				 */
+				if (katom[idx]->atom_flags &
+					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
+					kbase_pm_protected_l2_override(kbdev,
+							false);
+					katom[idx]->atom_flags &=
+						~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
+				}
+
+				err = kbase_jm_protected_entry(kbdev, katom, idx, js);
+
+				if (err)
+					return err;
+			} else {
+				/*
+				 * still waiting for L2 caches to power up
+				 */
+				return -EAGAIN;
+			}
+		} else {
+			err = kbase_jm_protected_entry(kbdev, katom, idx, js);
+
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	switch (katom[idx]->protected_state.exit) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		kbdev->protected_mode_transition = true;
+		kbase_pm_protected_override_enable(kbdev);
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+
+		if (err == -EAGAIN)
+			return -EAGAIN;
+
+		if (err) {
+			kbdev->protected_mode_transition = false;
+			kbase_pm_protected_override_disable(kbdev);
+
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			/* If we're exiting from protected mode, hwcnt must have
+			 * been disabled during entry.
+			 */
+			WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
+			kbdev->protected_mode_hwcnt_desired = true;
+			if (kbdev->protected_mode_hwcnt_disabled) {
+				kbase_hwcnt_context_enable(
+					kbdev->hwcnt_gpu_ctx);
+				kbdev->protected_mode_hwcnt_disabled = false;
+			}
+
+			return -EINVAL;
+		}
+
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		/* A GPU reset is issued when exiting protected mode. Once the
+		 * reset is done all atoms' state will also be reset. For this
+		 * reason, if the atom is still in this state we can safely
+		 * say that the reset has not completed i.e., we have not
+		 * finished exiting protected mode yet.
+		 */
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+void kbase_backend_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbase_reset_gpu_is_active(kbdev))
+		return;
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+			int ret;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+				if (kbase_gpu_check_secure_atoms(kbdev,
+						!kbase_jd_katom_is_protected(
+						katom[idx])))
+					break;
+
+				if ((idx == 1) && (kbase_jd_katom_is_protected(
+								katom[0]) !=
+						kbase_jd_katom_is_protected(
+								katom[1])))
+					break;
+
+				if (kbdev->protected_mode_transition)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+
+				if (!kbase_gpu_in_protected_mode(kbdev) &&
+					kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition into protected mode. */
+					ret = kbase_jm_enter_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				} else if (kbase_gpu_in_protected_mode(kbdev) &&
+					!kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition out of protected mode. */
+					ret = kbase_jm_exit_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				}
+				katom[idx]->protected_state.exit =
+						KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+				/* Atom needs no protected mode transition. */
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+				cores_ready = kbase_pm_cores_requested(kbdev,
+						true);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+
+				if (idx == 1) {
+					/* Only submit if head atom or previous
+					 * atom already submitted */
+					if ((katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+						katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+						break;
+
+					/* If intra-slot serialization in use
+					 * then don't submit atom to NEXT slot
+					 */
+					if (kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTRA_SLOT)
+						break;
+				}
+
+				/* If inter-slot serialization in use then don't
+				 * submit atom if any other slots are in use */
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTER_SLOT) &&
+						other_slots_busy(kbdev, js))
+					break;
+
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_RESET) &&
+						kbase_reset_gpu_is_active(kbdev))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_backend_slot_update(kbdev);
+}
+
+#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
+	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
+				u32 completion_code)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		(HAS_DEP(next_katom) || next_katom->sched_priority ==
+				katom->sched_priority) &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO))
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI))
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+
+		if (completion_code == BASE_JD_EVENT_STOPPED) {
+			KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[next_katom->slot_nr]);
+			KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, &kbdev->as
+					[next_katom->kctx->as_nr]);
+			KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[next_katom->slot_nr]);
+		}
+
+		if (next_katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/*
+	 * When a hard-stop is followed close after a soft-stop, the completion
+	 * code may be set to STOPPED, even though the job is terminated
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) {
+		if (completion_code == BASE_JD_EVENT_STOPPED &&
+				(katom->atom_flags &
+				KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) {
+			completion_code = BASE_JD_EVENT_TERMINATED;
+		}
+	}
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req &
+					BASE_JD_REQ_SKIP_CACHE_END)) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		/* When a job chain fails, on a T60x or when
+		 * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not
+		 * flushed. To prevent future evictions causing possible memory
+		 * corruption we need to flush the cache manually before any
+		 * affected memory gets reused. */
+		katom->need_cache_flush_cores_retained = true;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+				katom->device_nr >= 1) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained = true;
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx &&
+				next_katom->sched_priority ==
+				katom->sched_priority) {
+			WARN_ON(next_katom->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_SUBMITTED);
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+		if (!kbase_ctx_flag(katom->kctx, KCTX_DYING))
+			dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+					js, completion_code,
+					kbase_exception_name
+					(kbdev,
+					completion_code));
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+					HAS_DEP(katom_idx0) &&
+					katom_idx0->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+						katom_idx1->kctx == katom->kctx
+						&& HAS_DEP(katom_idx1) &&
+						katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					HAS_DEP(katom_idx1) &&
+					katom_idx1->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)
+		kbase_reset_gpu_silent(kbdev);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		katom = kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		katom = kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	if (katom) {
+		/* Cross-slot dependency has now become runnable. Try to submit
+		 * it. */
+
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
+	}
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_backend_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Reset should always take the GPU out of protected mode */
+	WARN_ON(kbase_gpu_in_protected_mode(kbdev));
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int atom_idx = 0;
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, atom_idx);
+			bool keep_in_jm_rb = false;
+
+			if (!katom)
+				break;
+			if (katom->protected_state.exit ==
+			    KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
+				/* protected mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
+					"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+					kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
+				KBASE_DEBUG_ASSERT_MSG(
+					(kbase_jd_katom_is_protected(katom) && js == 0) ||
+					!kbase_jd_katom_is_protected(katom),
+					"Protected atom on JS%d not supported", js);
+			}
+			if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
+			    !kbase_ctx_flag(katom->kctx, KCTX_DYING))
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				/* As the atom was not removed, increment the
+				 * index so that we read the correct atom in the
+				 * next iteration. */
+				atom_idx++;
+				continue;
+			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
+		}
+	}
+
+	/* Re-enable GPU hardware counters if we're resetting from protected
+	 * mode.
+	 */
+	kbdev->protected_mode_hwcnt_desired = true;
+	if (kbdev->protected_mode_hwcnt_disabled) {
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+		kbdev->protected_mode_hwcnt_disabled = false;
+
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev, kbdev);
+	}
+
+	kbdev->protected_mode_transition = false;
+	kbase_pm_protected_override_disable(kbdev);
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	katom->kctx->blocked_js[js][katom->sched_priority] = true;
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+	int prio_idx0 = 0, prio_idx1 = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom_idx0)
+		prio_idx0 = katom_idx0->sched_priority;
+	if (katom_idx1)
+		prio_idx1 = katom_idx1->sched_priority;
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+				(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+				(!kctx || katom_idx1->kctx == kctx) &&
+				prio_idx0 == prio_idx1);
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				katom_idx1->kctx->blocked_js[js][prio_idx1] =
+						true;
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1_valid && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT)) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO))
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI))
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT)) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO)) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI)) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_backend_cache_clean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->need_cache_flush_cores_retained) {
+		kbase_gpu_start_cache_clean(kbdev);
+		kbase_gpu_wait_cache_clean(kbdev);
+
+		katom->need_cache_flush_cores_retained = false;
+	}
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	kbase_backend_cache_clean(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req)
+{
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int js;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100755
index 0000000..c3b9f2d
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot to evict from
+ * @completion_code: Event code from job that was run.
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
+				u32 completion_code);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100755
index 0000000..1ffaa23
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,353 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_reset_gpu.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->ticks++;
+
+#if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP)
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->ticks = soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+
+					dev_dbg(kbdev->dev, "Soft-stop");
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * hwaccess_lock, so it's OK to observe
+					 * the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CONFIG_MALI_JOB_DUMP */
+				/* NOTE: During CONFIG_MALI_JOB_DUMP, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CONFIG_MALI_JOB_DUMP, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CONFIG_MALI_JOB_DUMP */
+			}
+		}
+	}
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100755
index 0000000..6576e55
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c
new file mode 100644
index 0000000..7bf9e4d
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_bits.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_l2_mmu_config.h"
+
+/**
+ * struct l2_mmu_config_limit_region
+ *
+ * @value:    The default value to load into the L2_MMU_CONFIG register
+ * @mask:     The shifted mask of the field in the L2_MMU_CONFIG register
+ * @shift:    The shift of where the field starts in the L2_MMU_CONFIG register
+ *            This should be the same value as the smaller of the two mask
+ *            values
+ */
+struct l2_mmu_config_limit_region {
+	u32 value, mask, shift;
+};
+
+/**
+ * struct l2_mmu_config_limit
+ *
+ * @product_model:    The GPU for which this entry applies
+ * @read:             Values for the read limit field
+ * @write:            Values for the write limit field
+ */
+struct l2_mmu_config_limit {
+	u32 product_model;
+	struct l2_mmu_config_limit_region read;
+	struct l2_mmu_config_limit_region write;
+};
+
+/*
+ * Zero represents no limit
+ *
+ * For TBEX TTRX and TNAX:
+ *   The value represents the number of outstanding reads (6 bits) or writes (5 bits)
+ *
+ * For all other GPUS it is a fraction see: mali_kbase_config_defaults.h
+ */
+static const struct l2_mmu_config_limit limits[] = {
+	 /* GPU                       read                  write            */
+	 {GPU_ID2_PRODUCT_TBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} },
+	 {GPU_ID2_PRODUCT_TTRX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} },
+	 {GPU_ID2_PRODUCT_TNAX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} },
+	 {GPU_ID2_PRODUCT_TGOX,
+	   {KBASE_3BIT_AID_32, GENMASK(14, 12), 12},
+	   {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} },
+	 {GPU_ID2_PRODUCT_TNOX,
+	   {KBASE_3BIT_AID_32, GENMASK(14, 12), 12},
+	   {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} },
+};
+
+void kbase_set_mmu_quirks(struct kbase_device *kbdev)
+{
+	/* All older GPUs had 2 bits for both fields, this is a default */
+	struct l2_mmu_config_limit limit = {
+		  0, /* Any GPU not in the limits array defined above */
+		 {KBASE_AID_32, GENMASK(25, 24), 24},
+		 {KBASE_AID_32, GENMASK(27, 26), 26}
+		};
+	u32 product_model, gpu_id;
+	u32 mmu_config;
+	int i;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_model = gpu_id & GPU_ID2_PRODUCT_MODEL;
+
+	for (i = 0; i < ARRAY_SIZE(limits); i++) {
+		if (product_model == limits[i].product_model) {
+			limit = limits[i];
+			break;
+		}
+	}
+
+	mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG));
+
+	mmu_config &= ~(limit.read.mask | limit.write.mask);
+	/* Can't use FIELD_PREP() macro here as the mask isn't constant */
+	mmu_config |= (limit.read.value << limit.read.shift) |
+		      (limit.write.value << limit.write.shift);
+
+	kbdev->hw_quirks_mmu = mmu_config;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h
new file mode 100644
index 0000000..25636ee
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ *//* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ */
+
+#ifndef _KBASE_L2_MMU_CONFIG_H_
+#define _KBASE_L2_MMU_CONFIG_H_
+/**
+ * kbase_set_mmu_quirks - Set the hw_quirks_mmu field of kbdev
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Use this function to initialise the hw_quirks_mmu field, for instance to set
+ * the MAX_READS and MAX_WRITES to sane defaults for each GPU.
+ */
+void kbase_set_mmu_quirks(struct kbase_device *kbdev);
+
+#endif /* _KBASE_L2_MMU_CONFIG_H */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100755
index 0000000..a70439d
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,400 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tracepoints.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
+
+	return status;
+}
+
+static void validate_protected_page_fault(struct kbase_device *kbdev)
+{
+	/* GPUs which support (native) protected mode shall not report page
+	 * fault addresses unless it has protected debug mode and protected
+	 * debug mode is turned on */
+	u32 protected_debug_mode = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		return;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		protected_debug_mode = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN;
+	}
+
+	if (!protected_debug_mode) {
+		/* fault_addr should never be reported in protected mode.
+		 * However, we just continue by printing an error message */
+		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+	}
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+		struct kbase_fault *fault;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+		if (kbase_as_has_bus_fault(as))
+			fault = &as->bf_data;
+		else
+			fault = &as->pf_data;
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+		/* find faulting address */
+		fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
+				AS_FAULTADDRESS_HI));
+		fault->addr <<= 32;
+		fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no,
+				AS_FAULTADDRESS_LO));
+
+		/* Mark the fault protected or not */
+		fault->protected_mode = kbdev->protected_mode;
+
+		if (kbdev->protected_mode && fault->addr) {
+			/* check if address reporting is allowed */
+			validate_protected_page_fault(kbdev);
+		}
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
+				AS_FAULTSTATUS));
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+			fault->extra_addr = kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
+			fault->extra_addr <<= 32;
+			fault->extra_addr |= kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
+		}
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as, fault);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u64 transcfg = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+		transcfg = current_setup->transcfg;
+
+		/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+		/* Clear PTW_MEMATTR bits */
+		transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+		/* Enable correct PTW_MEMATTR bits */
+		transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+		/* Ensure page-tables reads use read-allocate cache-policy in
+		 * the L2
+		 */
+		transcfg |= AS_TRANSCFG_R_ALLOCATE;
+
+		if (kbdev->system_coherency == COHERENCY_ACE) {
+			/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+			/* Clear PTW_SH bits */
+			transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+			/* Enable correct PTW_SH bits */
+			transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+		}
+
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+				transcfg);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+				(transcfg >> 32) & 0xFFFFFFFFUL);
+	} else {
+		if (kbdev->system_coherency == COHERENCY_ACE)
+			current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL);
+
+	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100755
index 0000000..a5bbdf5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _KBASE_MMU_HW_DIRECT_H_
+#define _KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _KBASE_MMU_HW_DIRECT_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100755
index 0000000..51a10a2
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static bool always_on_shaders_needed(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_shaders_needed,	/* shaders_needed */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100755
index 0000000..e7927cf
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    Shader Cores are powered up, regardless of whether or not they will be
+ *    needed later.
+ *
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ *   queued Job Chains:
+ *    Shader Cores are kept powered, regardless of whether or not they will be
+ *    needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100755
index 0000000..d9fc761
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,547 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwcnt_context.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+static void kbase_pm_hwcnt_disable_worker(struct work_struct *data);
+
+int kbase_pm_runtime_init(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+
+		if (callbacks->power_runtime_init_callback)
+			return callbacks->power_runtime_init_callback(kbdev);
+		else
+			return 0;
+	}
+
+	kbdev->pm.backend.callback_power_on = NULL;
+	kbdev->pm.backend.callback_power_off = NULL;
+	kbdev->pm.backend.callback_power_suspend = NULL;
+	kbdev->pm.backend.callback_power_resume = NULL;
+	kbdev->pm.callback_power_runtime_init = NULL;
+	kbdev->pm.callback_power_runtime_term = NULL;
+	kbdev->pm.backend.callback_power_runtime_on = NULL;
+	kbdev->pm.backend.callback_power_runtime_off = NULL;
+	kbdev->pm.backend.callback_power_runtime_idle = NULL;
+
+	return 0;
+}
+
+void kbase_pm_runtime_term(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.callback_power_runtime_term) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+	}
+}
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_early_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+			kbase_pm_gpu_poweroff_wait_wq);
+
+	kbdev->pm.backend.ca_cores_enabled = ~0ull;
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+
+	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	if (kbase_pm_state_machine_init(kbdev) != 0)
+		goto pm_state_machine_fail;
+
+	return 0;
+
+pm_state_machine_fail:
+	kbase_pm_policy_term(kbdev);
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+int kbase_hwaccess_pm_late_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.hwcnt_desired = false;
+	kbdev->pm.backend.hwcnt_disabled = true;
+	INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work,
+		kbase_pm_hwcnt_disable_worker);
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+	return 0;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	if (!is_resume) {
+		unsigned long flags;
+
+		/* Force update of L2 state - if we have abandoned a power off
+		 * then this may be required to power the L2 back on.
+		 */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_update_state(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* Update core status as required by the policy */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	if (!platform_power_down_only)
+		/* Wait for power transitions to complete. We do this with no locks held
+		 * so that we don't deadlock with any pending workqueues.
+		 */
+		kbase_pm_wait_for_desired_state(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	if (!backend->poweron_required) {
+		if (!platform_power_down_only) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			WARN_ON(backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF ||
+					backend->l2_state != KBASE_L2_OFF);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+
+		/* Disable interrupts and turn the clock off */
+		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+			/*
+			 * Page/bus faults are pending, must drop locks to
+			 * process.  Interrupts are disabled so no more faults
+			 * should be generated at this point.
+			 */
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			kbase_flush_mmu_wqs(kbdev);
+			mutex_lock(&js_devdata->runpool_mutex);
+			mutex_lock(&kbdev->pm.lock);
+
+			/* Turn off clock now that fault have been handled. We
+			 * dropped locks so poweron_required may have changed -
+			 * power back on if this is the case (effectively only
+			 * re-enabling of the interrupts would be done in this
+			 * case, as the clocks to GPU were not withdrawn yet).
+			 */
+			if (backend->poweron_required)
+				kbase_pm_clock_on(kbdev, false);
+			else
+				WARN_ON(!kbase_pm_clock_off(kbdev,
+						backend->poweroff_is_suspend));
+		}
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	backend->poweroff_wait_in_progress = false;
+	if (backend->poweron_required) {
+		backend->poweron_required = false;
+		kbdev->pm.backend.l2_desired = true;
+		kbase_pm_update_state(kbdev);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+static void kbase_pm_hwcnt_disable_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.hwcnt_disable_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+
+	bool do_disable;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!do_disable)
+		return;
+
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled;
+
+	if (do_disable) {
+		/* PM state did not change while we were doing the disable,
+		 * so commit the work we just performed and continue the state
+		 * machine.
+		 */
+		backend->hwcnt_disabled = true;
+		kbase_pm_update_state(kbdev);
+		kbase_backend_slot_update(kbdev);
+	} else {
+		/* PM state was updated while we were doing the disable,
+		 * so we need to undo the disable we just performed.
+		 */
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered)
+		goto unlock_hwaccess;
+
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		goto unlock_hwaccess;
+
+	/* Force all cores off */
+	kbdev->pm.backend.shaders_desired = false;
+	kbdev->pm.backend.l2_desired = false;
+
+	kbdev->pm.backend.poweroff_wait_in_progress = true;
+	kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+	kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true;
+
+	/* l2_desired being false should cause the state machine to
+	 * start powering off the L2. When it actually is powered off,
+	 * the interrupt handler will call kbase_pm_l2_update_state()
+	 * again, which will trigger the kbase_pm_gpu_poweroff_wait_wq.
+	 * Callers of this function will need to wait on poweroff_wait.
+	 */
+	kbase_pm_update_state(kbdev);
+
+unlock_hwaccess:
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+	wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			is_poweroff_in_progress(kbdev));
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_do_poweroff(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_early_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_state_machine_term(kbdev);
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
+}
+
+void kbase_hwaccess_pm_late_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work);
+
+	if (kbdev->pm.backend.hwcnt_disabled) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_update_state(kbdev);
+
+	kbase_backend_slot_update(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_do_poweroff(kbdev, true);
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100755
index 0000000..41f6429
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,106 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#ifdef CONFIG_MALI_NO_MALI
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+
+	if (kbdev->current_core_mask)
+		pm_backend->ca_cores_enabled = kbdev->current_core_mask;
+	else
+		pm_backend->ca_cores_enabled =
+				kbdev->gpu_props.props.raw_props.shader_present;
+#endif
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+}
+
+#ifdef CONFIG_MALI_DEVFREQ
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
+{
+	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
+		dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
+				core_mask, kbdev->pm.debug_core_mask_all);
+		goto unlock;
+	}
+
+	pm_backend->ca_cores_enabled = core_mask;
+
+	kbase_pm_update_state(kbdev);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
+			pm_backend->ca_cores_enabled);
+}
+#endif
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+#endif
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+#ifdef CONFIG_MALI_DEVFREQ
+	return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all;
+#else
+	return kbdev->gpu_props.props.raw_props.shader_present &
+			kbdev->pm.debug_core_mask_all;
+#endif
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+#ifdef CONFIG_MALI_NO_MALI
+	return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1);
+#else
+	return kbdev->pm.backend.pm_shaders_core_mask;
+#endif
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100755
index 0000000..5423e96
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,89 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the PM state synchronised shader cores for arranging
+ * HW performance counter dumps
+ *
+ * Return: The bit mask of PM state synchronised cores
+ */
+u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
new file mode 100755
index 0000000..f67ec65
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * A core availability policy for use with devfreq, where core masks are
+ * associated with OPPs.
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEVFREQ_H
+#define MALI_KBASE_PM_CA_DEVFREQ_H
+
+/**
+ * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy
+ *
+ * This contains data that is private to the devfreq core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ */
+struct kbasep_pm_ca_policy_devfreq {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops;
+
+/**
+ * kbase_devfreq_set_core_mask - Set core mask for policy to use
+ * @kbdev: Device pointer
+ * @core_mask: New core mask
+ *
+ * The new core mask will have immediate effect if the GPU is powered, or will
+ * take effect when it is next powered on.
+ */
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
+
+#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100755
index 0000000..e90c44d
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static bool coarse_demand_shaders_needed(struct kbase_device *kbdev)
+{
+	return kbase_pm_is_active(kbdev);
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	return kbase_pm_is_active(kbdev);
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_shaders_needed,		/* shaders_needed */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100755
index 0000000..304e5d7
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - Shader Cores are powered up, regardless of whether or not they will be
+ *    needed later.
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ *   queued Job Chains:
+ *  - Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100755
index 0000000..42d3cb0
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,476 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_always_on_demand.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * Maximum number of PM policies that may be active on a device.
+ */
+#define KBASE_PM_MAX_NUM_POLICIES (10)
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ * @KBASE_PM_CORE_STACK: Core stacks
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO,
+	KBASE_PM_CORE_STACK = STACK_PRESENT_LO
+};
+
+/**
+ * enum kbase_l2_core_state - The states used for the L2 cache & tiler power
+ *                            state machine.
+ *
+ * @KBASE_L2_OFF: The L2 cache and tiler are off
+ * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on
+ * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being
+ *                            enabled
+ * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled
+ * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being
+ *                             disabled
+ * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off
+ * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off
+ * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state
+ *                       are unknown
+ */
+enum kbase_l2_core_state {
+#define KBASEP_L2_STATE(n) KBASE_L2_ ## n,
+#include "mali_kbase_pm_l2_states.h"
+#undef KBASEP_L2_STATE
+};
+
+/**
+ * enum kbase_shader_core_state - The states used for the shaders' state machine.
+ *
+ * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off
+ * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have
+ *                                       been requested to power on and hwcnt
+ *                                       is being disabled
+ * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been
+ *                                      requested to power on.
+ * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt
+ *					already enabled.
+ * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks
+ *                                      are on, hwcnt disabled, and checks
+ *                                      to powering down or re-enabling
+ *                                      hwcnt.
+ * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to
+ *                                       power off, but they remain on for the
+ *                                       duration of the hysteresis timer
+ * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired
+ * @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the
+ *                                          level 2 cache is being flushed.
+ * @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders
+ *                                        are ready to be powered off.
+ * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders
+ *                                       have been requested to power off
+ * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks
+ *                                        have been requested to power off
+ * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are
+ *                                                  off, but the tick timer
+ *                                                  cancellation is still
+ *                                                  pending.
+ * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power
+ *                            states are unknown
+ */
+enum kbase_shader_core_state {
+#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n,
+#include "mali_kbase_pm_shader_states.h"
+#undef KBASEP_SHADER_STATE
+};
+
+/**
+ * struct kbasep_pm_metrics - Metrics data collected for use by the power
+ *                            management framework.
+ *
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ */
+struct kbasep_pm_metrics {
+	u32 time_busy;
+	u32 time_idle;
+	u32 busy_cl[2];
+	u32 busy_gl;
+};
+
+/**
+ * struct kbasep_pm_metrics_state - State required to collect the metrics in
+ *                                  struct kbasep_pm_metrics
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *  @values: The current values of the power management metrics. The
+ *           kbase_pm_get_dvfs_metrics() function is used to compare these
+ *           current values with the saved values from a previous invocation.
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @dvfs_last: values of the PM metrics from the last DVFS tick
+ *  @dvfs_diff: different between the current and previous PM metrics.
+ */
+struct kbasep_pm_metrics_state {
+	ktime_t time_period_start;
+	bool gpu_active;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+
+	struct kbasep_pm_metrics values;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+	struct kbasep_pm_metrics dvfs_last;
+	struct kbasep_pm_metrics dvfs_diff;
+#endif
+};
+
+/**
+ * struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer
+ * @wq: Work queue to wait for the timer to stopped
+ * @work: Work item which cancels the timer
+ * @timer: Timer for powering off the shader cores
+ * @configured_interval: Period of GPU poweroff timer
+ * @configured_ticks: User-configured number of ticks to wait after the shader
+ *                    power down request is received before turning off the cores
+ * @remaining_ticks: Number of remaining timer ticks until shaders are powered off
+ * @cancel_queued: True if the cancellation work item has been queued. This is
+ *                 required to ensure that it is not queued twice, e.g. after
+ *                 a reset, which could cause the timer to be incorrectly
+ *                 cancelled later by a delayed workitem.
+ * @needed: Whether the timer should restart itself
+ */
+struct kbasep_pm_tick_timer_state {
+	struct workqueue_struct *wq;
+	struct work_struct work;
+	struct hrtimer timer;
+
+	ktime_t configured_interval;
+	unsigned int configured_ticks;
+	unsigned int remaining_ticks;
+
+	bool cancel_queued;
+	bool needed;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_always_on_demand always_on_demand;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @pm_policy_data:    Private data for current PM policy
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired
+ *                             state according to the L2 and shader power state
+ *                             machines
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise. Access to this
+ *                     variable should be protected by: both the hwaccess_lock
+ *                     spinlock and the pm.lock mutex for writes; or at least
+ *                     one of either lock for reads.
+ * @pm_shaders_core_mask: Shader PM state synchronised shaders core mask. It
+ *                     holds the cores enabled in a hardware counters dump,
+ *                     and may differ from @shaders_avail when under different
+ *                     states and transitions.
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @shader_tick_timer: Structure to hold the shader poweroff tick timer state
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state
+ *                                       machine should invoke the poweroff
+ *                                       worker after the L2 has turned off.
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ * @ca_cores_enabled: Cores that are currently available
+ * @l2_state: The current state of the L2 cache state machine. See
+ *            &enum kbase_l2_core_state
+ * @l2_desired: True if the L2 cache should be powered on by the L2 cache state
+ *              machine
+ * @shaders_state: The current state of the shader state machine.
+ * @shaders_avail: This is updated by the state machine when it is in a state
+ *                 where it can handle changes to the core availability. This
+ *                 is internal to the shader state machine and should *not* be
+ *                 modified elsewhere.
+ * @shaders_desired: True if the PM active count or power policy requires the
+ *                   shader cores to be on. This is used as an input to the
+ *                   shader power state machine.  The current state of the
+ *                   cores may be different, but there should be transitions in
+ *                   progress that will eventually achieve this state (assuming
+ *                   that the policy doesn't change its mind in the mean time).
+ * @in_reset: True if a GPU is resetting and normal power manager operation is
+ *            suspended
+ * @protected_transition_override : True if a protected mode transition is in
+ *                                  progress and is overriding power manager
+ *                                  behaviour.
+ * @protected_l2_override : Non-zero if the L2 cache is required during a
+ *                          protected mode transition. Has no effect if not
+ *                          transitioning.
+ * @hwcnt_desired: True if we want GPU hardware counters to be enabled.
+ * @hwcnt_disabled: True if GPU hardware counters are not enabled.
+ * @hwcnt_disable_work: Work item to disable GPU hardware counters, used if
+ *                      atomic disable is not possible.
+ *
+ * Note:
+ * During an IRQ, @pm_current_policy can be NULL when the policy is being
+ * changed with kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context
+ * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will
+ * re-issue the policy functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_policy_data pm_policy_data;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	u64 pm_shaders_core_mask;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	struct kbasep_pm_metrics_state metrics;
+
+	struct kbasep_pm_tick_timer_state shader_tick_timer;
+
+	bool poweroff_wait_in_progress;
+	bool invoke_poweroff_wait_wq_when_l2_off;
+	bool poweron_required;
+	bool poweroff_is_suspend;
+
+	struct workqueue_struct *gpu_poweroff_wait_wq;
+	struct work_struct gpu_poweroff_wait_work;
+
+	wait_queue_head_t poweroff_wait;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+
+	u64 ca_cores_enabled;
+
+	enum kbase_l2_core_state l2_state;
+	enum kbase_shader_core_state shaders_state;
+	u64 shaders_avail;
+	bool l2_desired;
+	bool shaders_desired;
+
+	bool in_reset;
+
+	bool protected_transition_override;
+	int protected_l2_override;
+
+	bool hwcnt_desired;
+	bool hwcnt_disabled;
+	struct work_struct hwcnt_disable_work;
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND,
+#endif
+	KBASE_PM_POLICY_ID_ALWAYS_ON
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+#define KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY (1u)
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @shaders_needed:     Function called to find out if shader cores are needed
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to find out if shader cores are needed
+	 *
+	 * This needs to at least satisfy kbdev->pm.backend.shaders_desired,
+	 * and so must never return false when shaders_desired is true.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if shader cores are needed, false otherwise
+	 */
+	bool (*shaders_needed)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function must meet or exceed the requirements for power
+	 * indicated by kbase_pm_is_active().
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100755
index 0000000..d97ec23
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,2036 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_tracepoints.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_reset_gpu.h>
+#include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_hwcnt_context.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_l2_mmu_config.h>
+
+#include <linux/of.h>
+
+#ifdef CONFIG_MALI_CORESTACK
+bool corestack_driver_control = true;
+#else
+bool corestack_driver_control; /* Default value of 0/false */
+#endif
+module_param(corestack_driver_control, bool, 0444);
+MODULE_PARM_DESC(corestack_driver_control,
+		"Let the driver power on/off the GPU core stack independently "
+		"without involving the Power Domain Controller. This should "
+		"only be enabled on platforms for which integration of the PDC "
+		"to the Mali GPU is known to be problematic.");
+KBASE_EXPORT_TEST_API(corestack_driver_control);
+
+#ifdef CONFIG_MALI_PLATFORM_POWER_DOWN_ONLY
+bool platform_power_down_only = true;
+#else
+bool platform_power_down_only; /* Default value of 0/false */
+#endif
+module_param(platform_power_down_only, bool, 0444);
+MODULE_PARM_DESC(platform_power_down_only,
+		"Disable power down of individual cores.");
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+static bool kbase_pm_is_l2_desired(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.backend.protected_transition_override &&
+			kbdev->pm.backend.protected_l2_override)
+		return true;
+
+	if (kbdev->pm.backend.protected_transition_override &&
+			!kbdev->pm.backend.shaders_desired)
+		return false;
+
+	return kbdev->pm.backend.l2_desired;
+}
+
+void kbase_pm_protected_override_enable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.protected_transition_override = true;
+}
+void kbase_pm_protected_override_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.protected_transition_override = false;
+}
+
+void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (override) {
+		kbdev->pm.backend.protected_l2_override++;
+		WARN_ON(kbdev->pm.backend.protected_l2_override <= 0);
+	} else {
+		kbdev->pm.backend.protected_l2_override--;
+		WARN_ON(kbdev->pm.backend.protected_l2_override < 0);
+	}
+
+	kbase_pm_update_state(kbdev);
+}
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+	if (corestack_driver_control) {
+		if (core_type == KBASE_PM_CORE_STACK) {
+			switch (action) {
+			case ACTION_PRESENT:
+				return STACK_PRESENT_LO;
+			case ACTION_READY:
+				return STACK_READY_LO;
+			case ACTION_PWRON:
+				return STACK_PWRON_LO;
+			case ACTION_PWROFF:
+				return STACK_PWROFF_LO;
+			case ACTION_PWRTRANS:
+				return STACK_PWRTRANS_LO;
+			default:
+				WARN(1, "Invalid action for core type\n");
+			}
+		}
+	}
+
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+static void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	/* When 'platform_power_down_only' is enabled, no core type should be
+	 * turned off individually.
+	 */
+	KBASE_DEBUG_ASSERT(!(action == ACTION_PWROFF &&
+			platform_power_down_only));
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi);
+}
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and
+ * kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg));
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4));
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+	case KBASE_PM_CORE_STACK:
+		return kbdev->gpu_props.props.raw_props.stack_present;
+	default:
+		break;
+	}
+	KBASE_DEBUG_ASSERT(0);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* See if we can get away with disabling hwcnt
+	 * atomically, otherwise kick off a worker.
+	 */
+	if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) {
+		backend->hwcnt_disabled = true;
+	} else {
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+		queue_work(system_wq,
+			&backend->hwcnt_disable_work);
+#else
+		queue_work(system_highpri_wq,
+			&backend->hwcnt_disable_work);
+#endif
+	}
+}
+
+static void kbase_pm_l2_config_override(struct kbase_device *kbdev)
+{
+	u32 val;
+
+	/*
+	 * Skip if it is not supported
+	 */
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG))
+		return;
+
+	/*
+	 * Skip if size and hash are not given explicitly,
+	 * which means default values are used.
+	 */
+	if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0))
+		return;
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG));
+
+	if (kbdev->l2_size_override) {
+		val &= ~L2_CONFIG_SIZE_MASK;
+		val |= (kbdev->l2_size_override << L2_CONFIG_SIZE_SHIFT);
+	}
+
+	if (kbdev->l2_hash_override) {
+		val &= ~L2_CONFIG_HASH_MASK;
+		val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT);
+	}
+
+	dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val);
+
+	/* Write L2_CONFIG to override */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val);
+}
+
+static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
+{
+	const char *const strings[] = {
+#define KBASEP_L2_STATE(n) #n,
+#include "mali_kbase_pm_l2_states.h"
+#undef KBASEP_L2_STATE
+	};
+	if (WARN_ON((size_t)state >= ARRAY_SIZE(strings)))
+		return "Bad level 2 cache state";
+	else
+		return strings[state];
+}
+
+static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+	u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present;
+	u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present;
+	enum kbase_l2_core_state prev_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	do {
+		/* Get current state */
+		u64 l2_trans = kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2);
+		u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
+				KBASE_PM_CORE_L2);
+		u64 tiler_trans = kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_TILER);
+		u64 tiler_ready = kbase_pm_get_ready_cores(kbdev,
+				KBASE_PM_CORE_TILER);
+
+		/* mask off ready from trans in case transitions finished
+		 * between the register reads
+		 */
+		l2_trans &= ~l2_ready;
+		tiler_trans &= ~tiler_ready;
+
+		prev_state = backend->l2_state;
+
+		switch (backend->l2_state) {
+		case KBASE_L2_OFF:
+			if (kbase_pm_is_l2_desired(kbdev)) {
+				/*
+				 * Set the desired config for L2 before powering
+				 * it on
+				 */
+				kbase_pm_l2_config_override(kbdev);
+
+				/* L2 is required, power on.  Powering on the
+				 * tiler will also power the first L2 cache.
+				 */
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER,
+						tiler_present, ACTION_PWRON);
+
+				/* If we have more than one L2 cache then we
+				 * must power them on explicitly.
+				 */
+				if (l2_present != 1)
+					kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+							l2_present & ~1,
+							ACTION_PWRON);
+				backend->l2_state = KBASE_L2_PEND_ON;
+			}
+			break;
+
+		case KBASE_L2_PEND_ON:
+			if (!l2_trans && l2_ready == l2_present && !tiler_trans
+					&& tiler_ready == tiler_present) {
+				KBASE_TRACE_ADD(kbdev,
+						PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32)tiler_ready);
+				/*
+				 * Ensure snoops are enabled after L2 is powered
+				 * up. Note that kbase keeps track of the snoop
+				 * state, so safe to repeatedly call.
+				 */
+				kbase_pm_cache_snoop_enable(kbdev);
+
+				/* With the L2 enabled, we can now enable
+				 * hardware counters.
+				 */
+				backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE;
+
+				/* Now that the L2 is on, the shaders can start
+				 * powering on if they're required. The obvious
+				 * way to do this would be to call
+				 * kbase_pm_shaders_update_state() here.
+				 * However, that would make the two state
+				 * machines mutually recursive, as the opposite
+				 * would be needed for powering down. Instead,
+				 * callers of this function should use the
+				 * kbase_pm_update_state() wrapper, which will
+				 * call the shader state machine immediately
+				 * after the L2 (for power up), or
+				 * automatically re-invoke the L2 state machine
+				 * when the shaders power down.
+				 */
+			}
+			break;
+
+		case KBASE_L2_ON_HWCNT_ENABLE:
+			backend->hwcnt_desired = true;
+			if (backend->hwcnt_disabled) {
+				kbase_hwcnt_context_enable(
+					kbdev->hwcnt_gpu_ctx);
+				backend->hwcnt_disabled = false;
+			}
+			backend->l2_state = KBASE_L2_ON;
+			break;
+
+		case KBASE_L2_ON:
+			if (!kbase_pm_is_l2_desired(kbdev)) {
+				/* Do not power off L2 until the shaders and
+				 * core stacks are off.
+				 */
+				if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
+					break;
+
+				/* We need to make sure hardware counters are
+				 * disabled before powering down the L2, to
+				 * prevent loss of data.
+				 *
+				 * We waited until after the cores were powered
+				 * down to prevent ping-ponging between hwcnt
+				 * enabled and disabled, which would have
+				 * happened if userspace submitted more work
+				 * while we were trying to power down.
+				 */
+				backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE;
+			}
+			break;
+
+		case KBASE_L2_ON_HWCNT_DISABLE:
+			/* If the L2 became desired while we were waiting on the
+			 * worker to do the actual hwcnt disable (which might
+			 * happen if some work was submitted immediately after
+			 * the shaders powered off), then we need to early-out
+			 * of this state and re-enable hwcnt.
+			 *
+			 * If we get lucky, the hwcnt disable might not have
+			 * actually started yet, and the logic in the hwcnt
+			 * enable state will prevent the worker from
+			 * performing the disable entirely, preventing loss of
+			 * any hardware counter data.
+			 *
+			 * If the hwcnt disable has started, then we'll lose
+			 * a tiny amount of hardware counter data between the
+			 * disable and the re-enable occurring.
+			 *
+			 * This loss of data is preferable to the alternative,
+			 * which is to block the shader cores from doing any
+			 * work until we're sure hwcnt has been re-enabled.
+			 */
+			if (kbase_pm_is_l2_desired(kbdev)) {
+				backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE;
+				break;
+			}
+
+			backend->hwcnt_desired = false;
+			if (!backend->hwcnt_disabled) {
+				kbase_pm_trigger_hwcnt_disable(kbdev);
+			}
+
+			if (backend->hwcnt_disabled)
+				backend->l2_state = KBASE_L2_POWER_DOWN;
+			break;
+
+		case KBASE_L2_POWER_DOWN:
+			if (!platform_power_down_only)
+				/* Powering off the L2 will also power off the
+				 * tiler.
+				 */
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+						l2_present,
+						ACTION_PWROFF);
+			else
+				/* If L2 cache is powered then we must flush it
+				 * before we power off the GPU. Normally this
+				 * would have been handled when the L2 was
+				 * powered off.
+				 */
+				kbase_gpu_start_cache_clean_nolock(
+						kbdev);
+
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+					NULL, NULL, 0u, 0u);
+
+			backend->l2_state = KBASE_L2_PEND_OFF;
+			break;
+
+		case KBASE_L2_PEND_OFF:
+			if (!platform_power_down_only) {
+				/* We only need to check the L2 here - if the L2
+				 * is off then the tiler is definitely also off.
+				 */
+				if (!l2_trans && !l2_ready)
+					/* L2 is now powered off */
+					backend->l2_state = KBASE_L2_OFF;
+			} else {
+				if (!kbdev->cache_clean_in_progress)
+					backend->l2_state = KBASE_L2_OFF;
+			}
+			break;
+
+		case KBASE_L2_RESET_WAIT:
+			/* Reset complete  */
+			if (!backend->in_reset)
+				backend->l2_state = KBASE_L2_OFF;
+			break;
+
+		default:
+			WARN(1, "Invalid state in l2_state: %d",
+					backend->l2_state);
+		}
+
+		if (backend->l2_state != prev_state)
+			dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n",
+				kbase_l2_core_state_to_string(prev_state),
+				kbase_l2_core_state_to_string(
+					backend->l2_state));
+
+	} while (backend->l2_state != prev_state);
+
+	if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off &&
+			backend->l2_state == KBASE_L2_OFF) {
+		kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false;
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
+	}
+
+	if (backend->l2_state == KBASE_L2_ON)
+		return l2_present;
+	return 0;
+}
+
+static void shader_poweroff_timer_stop_callback(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbasep_pm_tick_timer_state *stt = container_of(data,
+			struct kbasep_pm_tick_timer_state, work);
+	struct kbase_device *kbdev = container_of(stt, struct kbase_device,
+			pm.backend.shader_tick_timer);
+
+	hrtimer_cancel(&stt->timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	stt->cancel_queued = false;
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_pm_update_state(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * shader_poweroff_timer_queue_cancel - cancel the shader poweroff tick timer
+ * @kbdev:      pointer to kbase device
+ *
+ * Synchronization between the shader state machine and the timer thread is
+ * difficult. This is because situations may arise where the state machine
+ * wants to start the timer, but the callback is already running, and has
+ * already passed the point at which it checks whether it is required, and so
+ * cancels itself, even though the state machine may have just tried to call
+ * hrtimer_start.
+ *
+ * This cannot be stopped by holding hwaccess_lock in the timer thread,
+ * because there are still infinitesimally small sections at the start and end
+ * of the callback where the lock is not held.
+ *
+ * Instead, a new state is added to the shader state machine,
+ * KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF. This is used to guarantee
+ * that when the shaders are switched off, the timer has definitely been
+ * cancelled. As a result, when KBASE_SHADERS_ON_CORESTACK_ON is left and the
+ * timer is started, it is guaranteed that either the timer is already running
+ * (from an availability change or cancelled timer), or hrtimer_start will
+ * succeed. It is critical to avoid ending up in
+ * KBASE_SHADERS_WAIT_OFF_CORESTACK_ON without the timer running, or it could
+ * hang there forever.
+ */
+static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_tick_timer_state *stt =
+			&kbdev->pm.backend.shader_tick_timer;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	stt->needed = false;
+
+	if (hrtimer_active(&stt->timer) && !stt->cancel_queued) {
+		stt->cancel_queued = true;
+		queue_work(stt->wq, &stt->work);
+	}
+}
+
+static const char *kbase_shader_core_state_to_string(
+	enum kbase_shader_core_state state)
+{
+	const char *const strings[] = {
+#define KBASEP_SHADER_STATE(n) #n,
+#include "mali_kbase_pm_shader_states.h"
+#undef KBASEP_SHADER_STATE
+	};
+	if (WARN_ON((size_t)state >= ARRAY_SIZE(strings)))
+		return "Bad shader core state";
+	else
+		return strings[state];
+}
+
+static void kbase_pm_shaders_update_state(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+	struct kbasep_pm_tick_timer_state *stt =
+			&kbdev->pm.backend.shader_tick_timer;
+	enum kbase_shader_core_state prev_state;
+	u64 stacks_avail = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (corestack_driver_control)
+		/* Always power on all the corestacks. Disabling certain
+		 * corestacks when their respective shaders are not in the
+		 * available bitmap is not currently supported.
+		 */
+		stacks_avail = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_STACK);
+
+	do {
+		u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER);
+		u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+		u64 stacks_trans = 0;
+		u64 stacks_ready = 0;
+
+		if (corestack_driver_control) {
+			stacks_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_STACK);
+			stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK);
+		}
+
+		/* mask off ready from trans in case transitions finished
+		 * between the register reads
+		 */
+		shaders_trans &= ~shaders_ready;
+		stacks_trans &= ~stacks_ready;
+
+		prev_state = backend->shaders_state;
+
+		switch (backend->shaders_state) {
+		case KBASE_SHADERS_OFF_CORESTACK_OFF:
+			/* Ignore changes to the shader core availability
+			 * except at certain points where we can handle it,
+			 * i.e. off and SHADERS_ON_CORESTACK_ON.
+			 */
+			backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev);
+			backend->pm_shaders_core_mask = 0;
+
+			if (backend->shaders_desired &&
+				backend->l2_state == KBASE_L2_ON) {
+				if (backend->hwcnt_desired &&
+					!backend->hwcnt_disabled) {
+					/* Trigger a hwcounter dump */
+					backend->hwcnt_desired = false;
+					kbase_pm_trigger_hwcnt_disable(kbdev);
+				}
+
+				if (backend->hwcnt_disabled) {
+					if (corestack_driver_control) {
+						kbase_pm_invoke(kbdev,
+							KBASE_PM_CORE_STACK,
+							stacks_avail,
+							ACTION_PWRON);
+					}
+					backend->shaders_state =
+						KBASE_SHADERS_OFF_CORESTACK_PEND_ON;
+				}
+			}
+			break;
+
+		case KBASE_SHADERS_OFF_CORESTACK_PEND_ON:
+			if (!stacks_trans && stacks_ready == stacks_avail) {
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+						backend->shaders_avail, ACTION_PWRON);
+
+				backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON;
+			}
+			break;
+
+		case KBASE_SHADERS_PEND_ON_CORESTACK_ON:
+			if (!shaders_trans && shaders_ready == backend->shaders_avail) {
+				KBASE_TRACE_ADD(kbdev,
+						PM_CORES_CHANGE_AVAILABLE,
+						NULL, NULL, 0u, (u32)shaders_ready);
+				backend->pm_shaders_core_mask = shaders_ready;
+				backend->hwcnt_desired = true;
+				if (backend->hwcnt_disabled) {
+					kbase_hwcnt_context_enable(
+						kbdev->hwcnt_gpu_ctx);
+					backend->hwcnt_disabled = false;
+				}
+				backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON;
+			}
+			break;
+
+		case KBASE_SHADERS_ON_CORESTACK_ON:
+			backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev);
+
+			/* If shaders to change state, trigger a counter dump */
+			if (!backend->shaders_desired ||
+				(backend->shaders_avail & ~shaders_ready)) {
+				backend->hwcnt_desired = false;
+				if (!backend->hwcnt_disabled)
+					kbase_pm_trigger_hwcnt_disable(kbdev);
+				backend->shaders_state =
+					KBASE_SHADERS_ON_CORESTACK_ON_RECHECK;
+			}
+			break;
+
+		case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK:
+			backend->shaders_avail =
+				kbase_pm_ca_get_core_mask(kbdev);
+
+			if (!backend->hwcnt_disabled) {
+				/* Wait for being disabled */
+				;
+			} else if (!backend->shaders_desired) {
+				if (kbdev->pm.backend.protected_transition_override ||
+						!stt->configured_ticks ||
+						WARN_ON(stt->cancel_queued)) {
+					backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
+				} else {
+					stt->remaining_ticks = stt->configured_ticks;
+					stt->needed = true;
+
+					/* The shader hysteresis timer is not
+					 * done the obvious way, which would be
+					 * to start an hrtimer when the shader
+					 * power off is requested. Instead,
+					 * use a 'tick' timer, and set the
+					 * remaining number of ticks on a power
+					 * off request.  This avoids the
+					 * latency of starting, then
+					 * immediately cancelling an hrtimer
+					 * when the shaders are re-requested
+					 * before the timeout expires.
+					 */
+					if (!hrtimer_active(&stt->timer))
+						hrtimer_start(&stt->timer,
+								stt->configured_interval,
+								HRTIMER_MODE_REL);
+
+					backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON;
+				}
+			} else {
+				if (backend->shaders_avail & ~shaders_ready) {
+					backend->shaders_avail |= shaders_ready;
+
+					kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+							backend->shaders_avail & ~shaders_ready,
+							ACTION_PWRON);
+				}
+				backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON;
+			}
+			break;
+
+		case KBASE_SHADERS_WAIT_OFF_CORESTACK_ON:
+			if (WARN_ON(!hrtimer_active(&stt->timer))) {
+				stt->remaining_ticks = 0;
+				backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
+			}
+
+			if (backend->shaders_desired) {
+				stt->remaining_ticks = 0;
+				backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK;
+			} else if (stt->remaining_ticks == 0) {
+				backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
+			}
+			break;
+
+		case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON:
+			shader_poweroff_timer_queue_cancel(kbdev);
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) {
+				kbase_gpu_start_cache_clean_nolock(kbdev);
+				backend->shaders_state =
+					KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON;
+			} else {
+				backend->shaders_state =
+					KBASE_SHADERS_READY_OFF_CORESTACK_ON;
+			}
+			break;
+
+		case KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON:
+			if (!kbdev->cache_clean_in_progress)
+				backend->shaders_state =
+					KBASE_SHADERS_READY_OFF_CORESTACK_ON;
+
+			break;
+
+		case KBASE_SHADERS_READY_OFF_CORESTACK_ON:
+			if (!platform_power_down_only)
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+						shaders_ready, ACTION_PWROFF);
+
+			KBASE_TRACE_ADD(kbdev,
+					PM_CORES_CHANGE_AVAILABLE,
+					NULL, NULL, 0u, 0u);
+
+			backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON;
+			break;
+
+		case KBASE_SHADERS_PEND_OFF_CORESTACK_ON:
+			if ((!shaders_trans && !shaders_ready) || platform_power_down_only) {
+				if (corestack_driver_control && !platform_power_down_only)
+					kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK,
+							stacks_avail, ACTION_PWROFF);
+
+				backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF;
+			}
+			break;
+
+		case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF:
+			if ((!stacks_trans && !stacks_ready) ||
+				platform_power_down_only) {
+				/* On powered off, re-enable the hwcnt */
+				backend->pm_shaders_core_mask = 0;
+				backend->hwcnt_desired = true;
+				if (backend->hwcnt_disabled) {
+					kbase_hwcnt_context_enable(
+						kbdev->hwcnt_gpu_ctx);
+					backend->hwcnt_disabled = false;
+				}
+				backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF;
+			}
+			break;
+
+		case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF:
+			if (!hrtimer_active(&stt->timer) && !stt->cancel_queued)
+				backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF;
+			break;
+
+		case KBASE_SHADERS_RESET_WAIT:
+			/* Reset complete */
+			if (!backend->in_reset)
+				backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF;
+			break;
+		}
+
+		if (backend->shaders_state != prev_state)
+			dev_dbg(kbdev->dev, "Shader state transition: %s to %s\n",
+				kbase_shader_core_state_to_string(prev_state),
+				kbase_shader_core_state_to_string(
+					backend->shaders_state));
+
+	} while (backend->shaders_state != prev_state);
+}
+
+static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
+{
+	bool in_desired_state = true;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbase_pm_is_l2_desired(kbdev) &&
+			kbdev->pm.backend.l2_state != KBASE_L2_ON)
+		in_desired_state = false;
+	else if (!kbase_pm_is_l2_desired(kbdev) &&
+			kbdev->pm.backend.l2_state != KBASE_L2_OFF)
+		in_desired_state = false;
+
+	if (kbdev->pm.backend.shaders_desired &&
+			kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON)
+		in_desired_state = false;
+	else if (!kbdev->pm.backend.shaders_desired &&
+			kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
+		in_desired_state = false;
+
+	return in_desired_state;
+}
+
+static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev)
+{
+	bool in_desired_state;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	in_desired_state = kbase_pm_is_in_desired_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return in_desired_state;
+}
+
+static bool kbase_pm_is_in_desired_state_with_l2_powered(
+		struct kbase_device *kbdev)
+{
+	bool in_desired_state = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (kbase_pm_is_in_desired_state_nolock(kbdev) &&
+			(kbdev->pm.backend.l2_state == KBASE_L2_ON))
+		in_desired_state = true;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return in_desired_state;
+}
+
+static void kbase_pm_trace_power_state(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_TLSTREAM_AUX_PM_STATE(
+			kbdev,
+			KBASE_PM_CORE_L2,
+			kbase_pm_get_ready_cores(
+				kbdev, KBASE_PM_CORE_L2));
+	KBASE_TLSTREAM_AUX_PM_STATE(
+			kbdev,
+			KBASE_PM_CORE_SHADER,
+			kbase_pm_get_ready_cores(
+				kbdev, KBASE_PM_CORE_SHADER));
+	KBASE_TLSTREAM_AUX_PM_STATE(
+			kbdev,
+			KBASE_PM_CORE_TILER,
+			kbase_pm_get_ready_cores(
+				kbdev,
+				KBASE_PM_CORE_TILER));
+
+	if (corestack_driver_control)
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				kbdev,
+				KBASE_PM_CORE_STACK,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_STACK));
+}
+
+void kbase_pm_update_state(struct kbase_device *kbdev)
+{
+	enum kbase_shader_core_state prev_shaders_state =
+			kbdev->pm.backend.shaders_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kbdev->pm.backend.gpu_powered)
+		return; /* Do nothing if the GPU is off */
+
+	kbase_pm_l2_update_state(kbdev);
+	kbase_pm_shaders_update_state(kbdev);
+
+	/* If the shaders just turned off, re-invoke the L2 state machine, in
+	 * case it was waiting for the shaders to turn off before powering down
+	 * the L2.
+	 */
+	if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF &&
+			kbdev->pm.backend.shaders_state == KBASE_SHADERS_OFF_CORESTACK_OFF)
+		kbase_pm_l2_update_state(kbdev);
+
+	if (kbase_pm_is_in_desired_state_nolock(kbdev)) {
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				true, kbdev->pm.backend.shaders_avail);
+
+		kbase_pm_trace_power_state(kbdev);
+
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+}
+
+static enum hrtimer_restart
+shader_tick_timer_callback(struct hrtimer *timer)
+{
+	struct kbasep_pm_tick_timer_state *stt = container_of(timer,
+			struct kbasep_pm_tick_timer_state, timer);
+	struct kbase_device *kbdev = container_of(stt, struct kbase_device,
+			pm.backend.shader_tick_timer);
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+	unsigned long flags;
+	enum hrtimer_restart restart = HRTIMER_NORESTART;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (stt->remaining_ticks &&
+			backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) {
+		stt->remaining_ticks--;
+
+		/* If the remaining ticks just changed from 1 to 0, invoke the
+		 * PM state machine to power off the shader cores.
+		 */
+		if (!stt->remaining_ticks && !backend->shaders_desired)
+			kbase_pm_update_state(kbdev);
+	}
+
+	if (stt->needed) {
+		hrtimer_forward_now(timer, stt->configured_interval);
+		restart = HRTIMER_RESTART;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return restart;
+}
+
+int kbase_pm_state_machine_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer;
+
+	stt->wq = alloc_workqueue("kbase_pm_shader_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!stt->wq)
+		return -ENOMEM;
+
+	INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback);
+
+	stt->needed = false;
+	hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	stt->timer.function = shader_tick_timer_callback;
+	stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	stt->configured_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+
+	return 0;
+}
+
+void kbase_pm_state_machine_term(struct kbase_device *kbdev)
+{
+	hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer);
+	destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq);
+}
+
+void kbase_pm_reset_start_locked(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	backend->in_reset = true;
+	backend->l2_state = KBASE_L2_RESET_WAIT;
+	backend->shaders_state = KBASE_SHADERS_RESET_WAIT;
+
+	/* We're in a reset, so hwcnt will have been synchronously disabled by
+	 * this function's caller as part of the reset process. We therefore
+	 * know that any call to kbase_hwcnt_context_disable_atomic, if
+	 * required to sync the hwcnt refcount with our internal state, is
+	 * guaranteed to succeed.
+	 */
+	backend->hwcnt_desired = false;
+	if (!backend->hwcnt_disabled) {
+		WARN_ON(!kbase_hwcnt_context_disable_atomic(
+			kbdev->hwcnt_gpu_ctx));
+		backend->hwcnt_disabled = true;
+	}
+
+	shader_poweroff_timer_queue_cancel(kbdev);
+}
+
+void kbase_pm_reset_complete(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	backend->in_reset = false;
+	kbase_pm_update_state(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+static void kbase_pm_timed_out(struct kbase_device *kbdev)
+{
+	dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+	dev_err(kbdev->dev, "Desired state :\n");
+	dev_err(kbdev->dev, "\tShader=%016llx\n",
+			kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0);
+	dev_err(kbdev->dev, "Current state :\n");
+	dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_READY_HI)),
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_READY_LO)));
+	dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_READY_HI)),
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_READY_LO)));
+	dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_READY_HI)),
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_READY_LO)));
+	dev_err(kbdev->dev, "Cores transitioning :\n");
+	dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					SHADER_PWRTRANS_HI)),
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					SHADER_PWRTRANS_LO)));
+	dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					TILER_PWRTRANS_HI)),
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					TILER_PWRTRANS_LO)));
+	dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					L2_PWRTRANS_HI)),
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					L2_PWRTRANS_LO)));
+
+	dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+}
+
+void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	int err;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_update_state(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbase_pm_is_in_desired_state_with_l2_powered(kbdev));
+
+	if (err < 0 && time_after(jiffies, timeout))
+		kbase_pm_timed_out(kbdev);
+}
+
+void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	int err;
+
+	/* Let the state machine latch the most recent desired state. */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_update_state(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbase_pm_is_in_desired_state(kbdev));
+
+	if (err < 0 && time_after(jiffies, timeout))
+		kbase_pm_timed_out(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_disable_interrupts_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		reset_required = kbdev->pm.backend.callback_power_on(kbdev);
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the L2 caches */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.l2_desired = true;
+	kbase_pm_update_state(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	WARN_ON(kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162))
+		kbdev->hw_quirks_sc |= SC_VAR_ALGORITHM;
+
+	if (!kbdev->hw_quirks_sc)
+		kbdev->hw_quirks_sc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_CONFIG));
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG));
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbase_set_mmu_quirks(kbdev);
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	kbdev->hw_quirks_jm = 0;
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+		u32 jm_values[4] = {0u, 0u, 0u, JM_MAX_JOB_THROTTLE_LIMIT};
+
+		/* If entry not in device tree (return value of this func != 0),
+		 * use defaults from jm_values[]'s initializer
+		 */
+		(void)of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values));
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm |= (jm_values[0] ?
+				JM_TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ?
+				JM_CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ?
+				JM_JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JM_JOB_THROTTLE_LIMIT_SHIFT);
+
+	} else if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+			   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+					   GPU_ID2_PRODUCT_TMIX)) {
+		/* Only for tMIx */
+		u32 coherency_features;
+
+		coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES));
+
+		/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+		 * documented for tMIx so force correct value here.
+		 */
+		if (coherency_features ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
+			kbdev->hw_quirks_jm |=
+				(COHERENCY_ACE_LITE | COHERENCY_ACE) <<
+				JM_FORCE_COHERENCY_FEATURES_SHIFT;
+		}
+	}
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING))
+		kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) {
+		int default_idvs_group_size = 0xF;
+		u32 tmp;
+
+		if (of_property_read_u32(kbdev->dev->of_node,
+					  "idvs-group-size", &tmp))
+			tmp = default_idvs_group_size;
+
+		if (tmp > JM_MAX_IDVS_GROUP_SIZE) {
+			dev_err(kbdev->dev,
+				"idvs-group-size of %d is too large. Maximum value is %d",
+				tmp, JM_MAX_IDVS_GROUP_SIZE);
+			tmp = default_idvs_group_size;
+		}
+
+		kbdev->hw_quirks_jm |= tmp << JM_IDVS_GROUP_SIZE_SHIFT;
+	}
+
+	if (!kbdev->hw_quirks_jm)
+		kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JM_CONFIG));
+
+#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
+	if (corestack_driver_control)
+		kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+			kbdev->hw_quirks_sc);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+			kbdev->hw_quirks_jm);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if (kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+static int kbase_pm_do_reset(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET);
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	return -EINVAL;
+}
+
+static int kbasep_protected_mode_enable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE);
+	return 0;
+}
+
+static int kbasep_protected_mode_disable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	return kbase_pm_do_reset(kbdev);
+}
+
+struct protected_mode_ops kbase_native_protected_ops = {
+	.protected_mode_enable = kbasep_protected_mode_enable,
+	.protected_mode_disable = kbasep_protected_mode_disable
+};
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		kbdev->pm.backend.gpu_powered = true;
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+				NULL, 0u, (u32)0u);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support)
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+	else
+		err = kbase_pm_do_reset(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	kbdev->protected_mode = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	if (err)
+		goto exit;
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS));
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		kbase_pm_enable_interrupts(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+exit:
+	/* Re-enable GPU hardware counters if we're resetting from protected
+	 * mode.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	kbdev->protected_mode_hwcnt_desired = true;
+	if (kbdev->protected_mode_hwcnt_disabled) {
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+		kbdev->protected_mode_hwcnt_disabled = false;
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+	return err;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on - i.e., the GPU must be
+ * on.
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100755
index 0000000..e88b3a8
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,624 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_update_state() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_desired_state - Wait for the desired power state to be
+ *                                   reached
+ *
+ * Wait for the L2 and shader power state machines to reach the states
+ * corresponding to the values of 'l2_desired' and 'shaders_desired'.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock,
+ * because this function will take that lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
+ *
+ * Wait for the L2 to be powered on, and for the L2 and shader state machines to
+ * stabilise by reaching the states corresponding to the values of 'l2_desired'
+ * and 'shaders_desired'.
+ *
+ * kbdev->pm.active_count must be non-zero when calling this function.
+ *
+ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock,
+ * because this function will take that lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_state - Update the L2 and shader power state machines
+ * @kbdev: Device pointer
+ */
+void kbase_pm_update_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_state_machine_init - Initialize the state machines, primarily the
+ *                               shader poweroff timer
+ * @kbdev: Device pointer
+ */
+int kbase_pm_state_machine_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_state_machine_term - Clean up the PM state machines' data
+ * @kbdev: Device pointer
+ */
+void kbase_pm_state_machine_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device
+ *
+ * Setup the power management callbacks and initialize/enable the runtime-pm
+ * for the Mali GPU platform device, using the callback function. This must be
+ * called before the kbase_pm_register_access_enable() function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+int kbase_pm_runtime_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_runtime_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
+			       struct kbasep_pm_metrics *last,
+			       struct kbasep_pm_metrics *diff);
+#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold hwaccess_lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEVFREQ
+/**
+ * kbase_devfreq_set_core_mask - Set devfreq core mask
+ * @kbdev:     Device pointer
+ * @core_mask: New core mask
+ *
+ * This function is used by devfreq to change the available core mask as
+ * required by Dynamic Core Scaling.
+ */
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
+#endif
+
+/**
+ * kbase_pm_reset_start_locked - Signal that GPU reset has started
+ * @kbdev: Device pointer
+ *
+ * Normal power management operation will be suspended until the reset has
+ * completed.
+ *
+ * Caller must hold hwaccess_lock.
+ */
+void kbase_pm_reset_start_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_reset_complete - Signal that GPU reset has completed
+ * @kbdev: Device pointer
+ *
+ * Normal power management operation will be resumed. The power manager will
+ * re-evaluate what cores are needed and power on or off as required.
+ */
+void kbase_pm_reset_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_protected_override_enable - Enable the protected mode override
+ * @kbdev: Device pointer
+ *
+ * When the protected mode override is enabled, all shader cores are requested
+ * to power down, and the L2 power state can be controlled by
+ * kbase_pm_protected_l2_override().
+ *
+ * Caller must hold hwaccess_lock.
+ */
+void kbase_pm_protected_override_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_protected_override_disable - Disable the protected mode override
+ * @kbdev: Device pointer
+ *
+ * Caller must hold hwaccess_lock.
+ */
+void kbase_pm_protected_override_disable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_protected_l2_override - Control the protected mode L2 override
+ * @kbdev: Device pointer
+ * @override: true to enable the override, false to disable
+ *
+ * When the driver is transitioning in or out of protected mode, the L2 cache is
+ * forced to power off. This can be overridden to force the L2 cache to power
+ * on. This is required to change coherency settings on some GPUs.
+ */
+void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override);
+
+/* If true, the driver should explicitly control corestack power management,
+ * instead of relying on the Power Domain Controller.
+ */
+extern bool corestack_driver_control;
+
+/* If true, disable powering-down of individual cores, and just power-down at
+ * the top-level using platform-specific code.
+ * If false, use the expected behaviour of controlling the individual cores
+ * from within the driver.
+ */
+extern bool platform_power_down_only;
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h
new file mode 100644
index 0000000..94bad77
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend-specific Power Manager level 2 cache state definitions.
+ * The function-like macro KBASEP_L2_STATE() must be defined before including
+ * this header file. This header file can be included multiple times in the
+ * same compilation unit with different definitions of KBASEP_L2_STATE().
+ */
+KBASEP_L2_STATE(OFF)
+KBASEP_L2_STATE(PEND_ON)
+KBASEP_L2_STATE(ON_HWCNT_ENABLE)
+KBASEP_L2_STATE(ON)
+KBASEP_L2_STATE(ON_HWCNT_DISABLE)
+KBASEP_L2_STATE(POWER_DOWN)
+KBASEP_L2_STATE(PEND_OFF)
+KBASEP_L2_STATE(RESET_WAIT)
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100755
index 0000000..6b9b686
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,295 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+#include <backend/gpu/mali_kbase_pm_defs.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_state *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_state, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+
+	kbdev->pm.backend.metrics.values.time_busy = 0;
+	kbdev->pm.backend.metrics.values.time_idle = 0;
+	kbdev->pm.backend.metrics.values.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.values.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.values.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.values.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
+			       struct kbasep_pm_metrics *last,
+			       struct kbasep_pm_metrics *diff)
+{
+	struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get());
+
+	memset(diff, 0, sizeof(*diff));
+	diff->time_busy = cur->time_busy - last->time_busy;
+	diff->time_idle = cur->time_idle - last->time_idle;
+	diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0];
+	diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1];
+	diff->busy_gl = cur->busy_gl - last->busy_gl;
+
+	*last = *cur;
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	int busy;
+	struct kbasep_pm_metrics *diff;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	diff = &kbdev->pm.backend.metrics.dvfs_diff;
+
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff);
+
+	utilisation = (100 * diff->time_busy) /
+			max(diff->time_busy + diff->time_idle, 1u);
+
+	busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
+	util_gl_share = (100 * diff->busy_gl) / busy;
+	util_cl_share[0] = (100 * diff->busy_cl[0]) / busy;
+	util_cl_share[1] = (100 * diff->busy_cl[1]) / busy;
+
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				if (!WARN_ON(device_nr >= 2))
+					kbdev->pm.backend.metrics.
+						active_cl_ctx[device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				if (!WARN_ON(js >= 2))
+					kbdev->pm.backend.metrics.
+						active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100755
index 0000000..3152424
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,253 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const all_policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_always_on_demand_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_always_on_demand_policy_ops,
+#endif
+	&kbase_pm_always_on_policy_ops
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+static void generate_filtered_policy_list(struct kbase_device *kbdev)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(all_policy_list); ++i) {
+		const struct kbase_pm_policy *pol = all_policy_list[i];
+
+		BUILD_BUG_ON(ARRAY_SIZE(all_policy_list) >
+			KBASE_PM_MAX_NUM_POLICIES);
+		if (platform_power_down_only &&
+				(pol->flags & KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY))
+			continue;
+
+		kbdev->policy_list[kbdev->policy_count++] = pol;
+	}
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	generate_filtered_policy_list(kbdev);
+	if (kbdev->policy_count == 0)
+		return -EINVAL;
+
+	kbdev->pm.backend.pm_current_policy = kbdev->policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+	WARN((kbase_pm_is_active(kbdev) && !active),
+		"GPU is active but policy '%s' is indicating that it can be powered off",
+		kbdev->pm.backend.pm_current_policy->name);
+
+	if (active) {
+		/* Power on the GPU and any cores requested by the policy */
+		if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off &&
+				pm->backend.poweroff_wait_in_progress) {
+			KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+			pm->backend.poweron_required = true;
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			/* Cancel the the invocation of
+			 * kbase_pm_gpu_poweroff_wait_wq() from the L2 state
+			 * machine. This is safe - it
+			 * invoke_poweroff_wait_wq_when_l2_off is true, then
+			 * the poweroff work hasn't even been queued yet,
+			 * meaning we can go straight to powering on.
+			 */
+			pm->backend.invoke_poweroff_wait_wq_when_l2_off = false;
+			pm->backend.poweroff_wait_in_progress = false;
+			pm->backend.l2_desired = true;
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbase_pm_do_poweron(kbdev, false);
+		}
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+			/* Power off the GPU immediately */
+			kbase_pm_do_poweroff(kbdev, false);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	bool shaders_desired;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		return;
+
+	if (kbdev->pm.backend.protected_transition_override)
+		/* We are trying to change in/out of protected mode - force all
+		 * cores off so that the L2 powers down */
+		shaders_desired = false;
+	else
+		shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev);
+
+	if (kbdev->pm.backend.shaders_desired != shaders_desired) {
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.shaders_desired);
+
+		kbdev->pm.backend.shaders_desired = shaders_desired;
+		kbase_pm_update_state(kbdev);
+	}
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+int kbase_pm_list_policies(struct kbase_device *kbdev,
+	const struct kbase_pm_policy * const **list)
+{
+	WARN_ON(kbdev->policy_count == 0);
+	if (list)
+		*list = kbdev->policy_list;
+
+	return kbdev->policy_count;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100755
index 0000000..966fce7
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,109 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cores_requested - Check that a power request has been locked into
+ *                            the HW.
+ * @kbdev:           Kbase device
+ * @shader_required: true if shaders are required
+ *
+ * Called by the scheduler to check if a power on request has been locked into
+ * the HW.
+ *
+ * Note that there is no guarantee that the cores are actually ready, however
+ * when the request has been locked into the HW, then it is safe to submit work
+ * since the HW will wait for the transition to ready.
+ *
+ * A reference must first be taken prior to making this call.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: true if the request to the HW was successfully made else false if the
+ *         request is still pending.
+ */
+static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev,
+		bool shader_required)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* If the L2 & tiler are not on or pending, then the tiler is not yet
+	 * available, and shaders are definitely not powered.
+	 */
+	if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON &&
+			kbdev->pm.backend.l2_state != KBASE_L2_ON &&
+			kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE)
+		return false;
+
+	if (shader_required &&
+			kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON &&
+			kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON &&
+			kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK)
+		return false;
+
+	return true;
+}
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h
new file mode 100644
index 0000000..3f89eb5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend-specific Power Manager shader core state definitions.
+ * The function-like macro KBASEP_SHADER_STATE() must be defined before
+ * including this header file. This header file can be included multiple
+ * times in the same compilation unit with different definitions of
+ * KBASEP_SHADER_STATE().
+ */
+KBASEP_SHADER_STATE(OFF_CORESTACK_OFF)
+KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON)
+KBASEP_SHADER_STATE(PEND_ON_CORESTACK_ON)
+KBASEP_SHADER_STATE(ON_CORESTACK_ON)
+KBASEP_SHADER_STATE(ON_CORESTACK_ON_RECHECK)
+KBASEP_SHADER_STATE(WAIT_OFF_CORESTACK_ON)
+KBASEP_SHADER_STATE(WAIT_FINISHED_CORESTACK_ON)
+KBASEP_SHADER_STATE(L2_FLUSHING_CORESTACK_ON)
+KBASEP_SHADER_STATE(READY_OFF_CORESTACK_ON)
+KBASEP_SHADER_STATE(PEND_OFF_CORESTACK_ON)
+KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_OFF)
+KBASEP_SHADER_STATE(OFF_CORESTACK_OFF_TIMER_PEND_OFF)
+KBASEP_SHADER_STATE(RESET_WAIT)
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100755
index 0000000..5e1b761
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,104 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI));
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO));
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI));
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI));
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO));
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI));
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kbdev: Kbase device
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_device *kbdev)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kbdev);
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO));
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/build.bp b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/build.bp
new file mode 100755
index 0000000..666f500
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/build.bp
@@ -0,0 +1,128 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2017-2019 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+/* Kernel-side tests may include mali_kbase's headers. Therefore any config
+ * options which affect the sizes of any structs (e.g. adding extra members)
+ * must be included in these defaults, so that the structs are consistent in
+ * both mali_kbase and the test modules. */
+bob_defaults {
+    name: "mali_kbase_shared_config_defaults",
+    no_mali: {
+        kbuild_options: ["CONFIG_MALI_NO_MALI=y"],
+    },
+    mali_real_hw: {
+        kbuild_options: ["CONFIG_MALI_REAL_HW=y"],
+    },
+    mali_devfreq: {
+        kbuild_options: ["CONFIG_MALI_DEVFREQ=y"],
+    },
+    mali_midgard_dvfs: {
+        kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"],
+    },
+    mali_debug: {
+        kbuild_options: ["CONFIG_MALI_DEBUG=y"],
+    },
+    buslog: {
+        kbuild_options: ["CONFIG_MALI_BUSLOG=y"],
+    },
+    cinstr_job_dump: {
+        kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"],
+    },
+    cinstr_vector_dump: {
+        kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"],
+    },
+    cinstr_gwt: {
+        kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"],
+    },
+    mali_gator_support: {
+        kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"],
+    },
+    mali_system_trace: {
+        kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"],
+    },
+    mali_pwrsoft_765: {
+        kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"],
+    },
+    mali_memory_fully_backed: {
+        kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
+    },
+    mali_dma_buf_map_on_demand: {
+        kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"],
+    },
+    mali_dma_buf_legacy_compat: {
+        kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
+    },
+    kbuild_options: [
+        "MALI_UNIT_TEST={{.unit_test_code}}",
+        "MALI_CUSTOMER_RELEASE={{.release}}",
+        "MALI_USE_CSF={{.gpu_has_csf}}",
+        "MALI_KERNEL_TEST_API={{.debug}}",
+    ],
+    defaults: ["kernel_defaults"],
+}
+
+bob_kernel_module {
+    name: "mali_kbase",
+    srcs: [
+        "*.c",
+        "*.h",
+        "Kbuild",
+        "backend/gpu/*.c",
+        "backend/gpu/*.h",
+        "backend/gpu/Kbuild",
+        "ipa/*.c",
+        "ipa/*.h",
+        "ipa/Kbuild",
+        "platform/*.h",
+        "platform/*/*.c",
+        "platform/*/*.h",
+        "platform/*/Kbuild",
+        "thirdparty/*.c",
+    ],
+    kbuild_options: [
+        "CONFIG_MALI_KUTF=n",
+        "CONFIG_MALI_MIDGARD=m",
+        "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
+        "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
+    ],
+    buslog: {
+        extra_symbols: [
+            "bus_logger",
+        ],
+    },
+    mali_corestack: {
+        kbuild_options: ["CONFIG_MALI_CORESTACK=y"],
+    },
+    mali_platform_power_down_only: {
+        kbuild_options: ["CONFIG_MALI_PLATFORM_POWER_DOWN_ONLY=y"],
+    },
+    mali_error_inject: {
+        kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
+    },
+    mali_error_inject_random: {
+        kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"],
+    },
+    cinstr_secondary_hwc: {
+        kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"],
+    },
+    mali_2mb_alloc: {
+        kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
+    },
+    gpu_has_csf: {
+        srcs: [
+            "csf/*.c",
+            "csf/*.h",
+            "csf/Kbuild",
+        ],
+    },
+    defaults: ["mali_kbase_shared_config_defaults"],
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100755
index 0000000..6498dcb
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,132 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100755
index 0000000..a15b558
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,117 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100755
index 0000000..6b87335
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild
new file mode 100755
index 0000000..3d9cf80
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	ipa/mali_kbase_ipa_simple.o \
+	ipa/mali_kbase_ipa.o \
+	ipa/mali_kbase_ipa_vinstr_g7x.o \
+	ipa/mali_kbase_ipa_vinstr_common.o
+
+mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
\ No newline at end of file
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
new file mode 100755
index 0000000..65b4edf
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
@@ -0,0 +1,667 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/thermal.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/of.h>
+#include "mali_kbase.h"
+#include "mali_kbase_ipa.h"
+#include "mali_kbase_ipa_debugfs.h"
+#include "mali_kbase_ipa_simple.h"
+#include "backend/gpu/mali_kbase_pm_internal.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#define dev_pm_opp_find_freq_exact opp_find_freq_exact
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp opp
+#endif
+
+#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
+
+static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
+	&kbase_simple_ipa_model_ops,
+	&kbase_g71_ipa_model_ops,
+	&kbase_g72_ipa_model_ops,
+	&kbase_g76_ipa_model_ops,
+	&kbase_g52_ipa_model_ops,
+	&kbase_g52_r1_ipa_model_ops,
+	&kbase_g51_ipa_model_ops,
+	&kbase_g77_ipa_model_ops
+};
+
+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
+{
+	int err = 0;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (model->ops->recalculate) {
+		err = model->ops->recalculate(model);
+		if (err) {
+			dev_err(model->kbdev->dev,
+				"recalculation of power model %s returned error %d\n",
+				model->ops->name, err);
+		}
+	}
+
+	return err;
+}
+
+const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
+							    const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
+		const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
+
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
+
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);
+
+const char *kbase_ipa_model_name_from_id(u32 gpu_id)
+{
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(prod_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			return "mali-g71-power-model";
+		case GPU_ID2_PRODUCT_THEX:
+			return "mali-g72-power-model";
+		case GPU_ID2_PRODUCT_TNOX:
+			return "mali-g76-power-model";
+		case GPU_ID2_PRODUCT_TSIX:
+			return "mali-g51-power-model";
+		case GPU_ID2_PRODUCT_TGOX:
+			if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
+					(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
+				/* g52 aliased to g76 power-model's ops */
+				return "mali-g52-power-model";
+			else
+				return "mali-g52_r1-power-model";
+		case GPU_ID2_PRODUCT_TTRX:
+			return "mali-g77-power-model";
+		default:
+			return KBASE_IPA_FALLBACK_MODEL_NAME;
+		}
+	}
+
+	return KBASE_IPA_FALLBACK_MODEL_NAME;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
+
+static struct device_node *get_model_dt_node(struct kbase_ipa_model *model)
+{
+	struct device_node *model_dt_node;
+	char compat_string[64];
+
+	snprintf(compat_string, sizeof(compat_string), "arm,%s",
+		 model->ops->name);
+
+	/* of_find_compatible_node() will call of_node_put() on the root node,
+	 * so take a reference on it first.
+	 */
+	of_node_get(model->kbdev->dev->of_node);
+	model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node,
+						NULL, compat_string);
+	if (!model_dt_node && !model->missing_dt_node_warning) {
+		dev_warn(model->kbdev->dev,
+			 "Couldn't find power_model DT node matching \'%s\'\n",
+			 compat_string);
+		model->missing_dt_node_warning = true;
+	}
+
+	return model_dt_node;
+}
+
+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
+				  const char *name, s32 *addr,
+				  size_t num_elems, bool dt_required)
+{
+	int err, i;
+	struct device_node *model_dt_node = get_model_dt_node(model);
+	char *origin;
+
+	err = of_property_read_u32_array(model_dt_node, name, addr, num_elems);
+	/* We're done with model_dt_node now, so drop the reference taken in
+	 * get_model_dt_node()/of_find_compatible_node().
+	 */
+	of_node_put(model_dt_node);
+
+	if (err && dt_required) {
+		memset(addr, 0, sizeof(s32) * num_elems);
+		dev_warn(model->kbdev->dev,
+			 "Error %d, no DT entry: %s.%s = %zu*[0]\n",
+			 err, model->ops->name, name, num_elems);
+		origin = "zero";
+	} else if (err && !dt_required) {
+		origin = "default";
+	} else /* !err */ {
+		origin = "DT";
+	}
+
+	/* Create a unique debugfs entry for each element */
+	for (i = 0; i < num_elems; ++i) {
+		char elem_name[32];
+
+		if (num_elems == 1)
+			snprintf(elem_name, sizeof(elem_name), "%s", name);
+		else
+			snprintf(elem_name, sizeof(elem_name), "%s.%d",
+				name, i);
+
+		dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
+			model->ops->name, elem_name, addr[i], origin);
+
+		err = kbase_ipa_model_param_add(model, elem_name,
+						&addr[i], sizeof(s32),
+						PARAM_TYPE_S32);
+		if (err)
+			goto exit;
+	}
+exit:
+	return err;
+}
+
+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
+				     const char *name, char *addr,
+				     size_t size, bool dt_required)
+{
+	int err;
+	struct device_node *model_dt_node = get_model_dt_node(model);
+	const char *string_prop_value;
+	char *origin;
+
+	err = of_property_read_string(model_dt_node, name,
+				      &string_prop_value);
+
+	/* We're done with model_dt_node now, so drop the reference taken in
+	 * get_model_dt_node()/of_find_compatible_node().
+	 */
+	of_node_put(model_dt_node);
+
+	if (err && dt_required) {
+		strncpy(addr, "", size - 1);
+		dev_warn(model->kbdev->dev,
+			 "Error %d, no DT entry: %s.%s = \'%s\'\n",
+			 err, model->ops->name, name, addr);
+		err = 0;
+		origin = "zero";
+	} else if (err && !dt_required) {
+		origin = "default";
+	} else /* !err */ {
+		strncpy(addr, string_prop_value, size - 1);
+		origin = "DT";
+	}
+
+	addr[size - 1] = '\0';
+
+	dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n",
+		model->ops->name, name, string_prop_value, origin);
+
+	err = kbase_ipa_model_param_add(model, name, addr, size,
+					PARAM_TYPE_STRING);
+	return err;
+}
+
+void kbase_ipa_term_model(struct kbase_ipa_model *model)
+{
+	if (!model)
+		return;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (model->ops->term)
+		model->ops->term(model);
+
+	kbase_ipa_model_param_free_all(model);
+
+	kfree(model);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_term_model);
+
+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
+					     const struct kbase_ipa_model_ops *ops)
+{
+	struct kbase_ipa_model *model;
+	int err;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	if (!ops || !ops->name)
+		return NULL;
+
+	model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL);
+	if (!model)
+		return NULL;
+
+	model->kbdev = kbdev;
+	model->ops = ops;
+	INIT_LIST_HEAD(&model->params);
+
+	err = model->ops->init(model);
+	if (err) {
+		dev_err(kbdev->dev,
+			"init of power model \'%s\' returned error %d\n",
+			ops->name, err);
+		kfree(model);
+		return NULL;
+	}
+
+	err = kbase_ipa_model_recalculate(model);
+	if (err) {
+		kbase_ipa_term_model(model);
+		return NULL;
+	}
+
+	return model;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_init_model);
+
+static void kbase_ipa_term_locked(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	/* Clean up the models */
+	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
+		kbase_ipa_term_model(kbdev->ipa.configured_model);
+	kbase_ipa_term_model(kbdev->ipa.fallback_model);
+
+	kbdev->ipa.configured_model = NULL;
+	kbdev->ipa.fallback_model = NULL;
+}
+
+int kbase_ipa_init(struct kbase_device *kbdev)
+{
+
+	const char *model_name;
+	const struct kbase_ipa_model_ops *ops;
+	struct kbase_ipa_model *default_model = NULL;
+	int err;
+
+	mutex_init(&kbdev->ipa.lock);
+	/*
+	 * Lock during init to avoid warnings from lockdep_assert_held (there
+	 * shouldn't be any concurrent access yet).
+	 */
+	mutex_lock(&kbdev->ipa.lock);
+
+	/* The simple IPA model must *always* be present.*/
+	ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME);
+
+	default_model = kbase_ipa_init_model(kbdev, ops);
+	if (!default_model) {
+		err = -EINVAL;
+		goto end;
+	}
+
+	kbdev->ipa.fallback_model = default_model;
+	err = of_property_read_string(kbdev->dev->of_node,
+				      "ipa-model",
+				      &model_name);
+	if (err) {
+		/* Attempt to load a match from GPU-ID */
+		u32 gpu_id;
+
+		gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		model_name = kbase_ipa_model_name_from_id(gpu_id);
+		dev_dbg(kbdev->dev,
+			"Inferring model from GPU ID 0x%x: \'%s\'\n",
+			gpu_id, model_name);
+		err = 0;
+	} else {
+		dev_dbg(kbdev->dev,
+			"Using ipa-model parameter from DT: \'%s\'\n",
+			model_name);
+	}
+
+	if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) {
+		ops = kbase_ipa_model_ops_find(kbdev, model_name);
+		kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops);
+		if (!kbdev->ipa.configured_model) {
+			dev_warn(kbdev->dev,
+				"Failed to initialize ipa-model: \'%s\'\n"
+				"Falling back on default model\n",
+				model_name);
+			kbdev->ipa.configured_model = default_model;
+		}
+	} else {
+		kbdev->ipa.configured_model = default_model;
+	}
+
+end:
+	if (err)
+		kbase_ipa_term_locked(kbdev);
+	else
+		dev_info(kbdev->dev,
+			 "Using configured power model %s, and fallback %s\n",
+			 kbdev->ipa.configured_model->ops->name,
+			 kbdev->ipa.fallback_model->ops->name);
+
+	mutex_unlock(&kbdev->ipa.lock);
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_init);
+
+void kbase_ipa_term(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->ipa.lock);
+	kbase_ipa_term_locked(kbdev);
+	mutex_unlock(&kbdev->ipa.lock);
+
+	mutex_destroy(&kbdev->ipa.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_term);
+
+/**
+ * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP
+ * @c:		Dynamic model coefficient, in pW/(Hz V^2). Should be in range
+ *		0 < c < 2^26 to prevent overflow.
+ * @freq:	Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz)
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Keep a record of the approximate range of each value at every stage of the
+ * calculation, to ensure we don't overflow. This makes heavy use of the
+ * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual
+ * calculations in decimal for increased accuracy.
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq,
+				     const u32 voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const u32 v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^3 < f_MHz < 2^10 MHz */
+	const u32 f_MHz = freq / 1000000;
+
+	/* Range: 2^11 < v2f_big < 2^26 kHz V^2 */
+	const u32 v2f_big = v2 * f_MHz;
+
+	/* Range: 2^1 < v2f < 2^16 MHz V^2 */
+	const u32 v2f = v2f_big / 1000;
+
+	/* Range (working backwards from next line): 0 < v2fc < 2^23 uW.
+	 * Must be < 2^42 to avoid overflowing the return value. */
+	const u64 v2fc = (u64) c * (u64) v2f;
+
+	/* Range: 0 < v2fc / 1000 < 2^13 mW */
+	return div_u64(v2fc, 1000);
+}
+
+/**
+ * kbase_scale_static_power() - Scale a static power coefficient to an OPP
+ * @c:		Static model coefficient, in uW/V^3. Should be in range
+ *		0 < c < 2^32 to prevent overflow.
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+u32 kbase_scale_static_power(const u32 c, const u32 voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const u32 v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^17 < v3_big < 2^29 m(V^2) mV */
+	const u32 v3_big = v2 * voltage;
+
+	/* Range: 2^7 < v3 < 2^19 m(V^3) */
+	const u32 v3 = v3_big / 1000;
+
+	/*
+	 * Range (working backwards from next line): 0 < v3c_big < 2^33 nW.
+	 * The result should be < 2^52 to avoid overflowing the return value.
+	 */
+	const u64 v3c_big = (u64) c * (u64) v3;
+
+	/* Range: 0 < v3c_big / 1000000 < 2^13 mW */
+	return div_u64(v3c_big, 1000000);
+}
+
+void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Record the event of GPU entering protected mode. */
+	kbdev->ipa_protection_mode_switched = true;
+}
+
+static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_model *model;
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbdev->ipa_protection_mode_switched ||
+			kbdev->ipa.force_fallback_model)
+		model = kbdev->ipa.fallback_model;
+	else
+		model = kbdev->ipa.configured_model;
+
+	/*
+	 * Having taken cognizance of the fact that whether GPU earlier
+	 * protected mode or not, the event can be now reset (if GPU is not
+	 * currently in protected mode) so that configured model is used
+	 * for the next sample.
+	 */
+	if (!kbdev->protected_mode)
+		kbdev->ipa_protection_mode_switched = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return model;
+}
+
+static u32 get_static_power_locked(struct kbase_device *kbdev,
+				   struct kbase_ipa_model *model,
+				   unsigned long voltage)
+{
+	u32 power = 0;
+	int err;
+	u32 power_coeff;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (!model->ops->get_static_coeff)
+		model = kbdev->ipa.fallback_model;
+
+	if (model->ops->get_static_coeff) {
+		err = model->ops->get_static_coeff(model, &power_coeff);
+		if (!err)
+			power = kbase_scale_static_power(power_coeff,
+							 (u32) voltage);
+	}
+
+	return power;
+}
+
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static unsigned long kbase_get_static_power(struct devfreq *df,
+					    unsigned long voltage)
+#else
+static unsigned long kbase_get_static_power(unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+	u32 power = 0;
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	if (!kbdev)
+		return 0ul;
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = get_current_model(kbdev);
+	power = get_static_power_locked(kbdev, model, voltage);
+
+	mutex_unlock(&kbdev->ipa.lock);
+
+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+	kbase_release_device(kbdev);
+#endif
+
+	return power;
+}
+
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static unsigned long kbase_get_dynamic_power(struct devfreq *df,
+					     unsigned long freq,
+					     unsigned long voltage)
+#else
+static unsigned long kbase_get_dynamic_power(unsigned long freq,
+					     unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+	u32 power_coeff = 0, power = 0;
+	int err = 0;
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	if (!kbdev)
+		return 0ul;
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = kbdev->ipa.fallback_model;
+
+	err = model->ops->get_dynamic_coeff(model, &power_coeff);
+
+	if (!err)
+		power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+	else
+		dev_err_ratelimited(kbdev->dev,
+				    "Model %s returned error code %d\n",
+				    model->ops->name, err);
+
+	mutex_unlock(&kbdev->ipa.lock);
+
+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+	kbase_release_device(kbdev);
+#endif
+
+	return power;
+}
+
+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
+				unsigned long freq,
+				unsigned long voltage)
+{
+	struct kbase_ipa_model *model;
+	u32 power_coeff = 0;
+	int err = 0;
+	struct kbasep_pm_metrics diff;
+	u64 total_time;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff);
+
+	model = get_current_model(kbdev);
+
+	err = model->ops->get_dynamic_coeff(model, &power_coeff);
+
+	/* If the counter model returns an error (e.g. switching back to
+	 * protected mode and failing to read counters, or a counter sample
+	 * with too few cycles), revert to the fallback model.
+	 */
+	if (err && model != kbdev->ipa.fallback_model) {
+		model = kbdev->ipa.fallback_model;
+		err = model->ops->get_dynamic_coeff(model, &power_coeff);
+	}
+
+	if (err)
+		return err;
+
+	*power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+
+	/* time_busy / total_time cannot be >1, so assigning the 64-bit
+	 * result of div_u64 to *power cannot overflow.
+	 */
+	total_time = diff.time_busy + (u64) diff.time_idle;
+	*power = div_u64(*power * (u64) diff.time_busy,
+			 max(total_time, 1ull));
+
+	*power += get_static_power_locked(kbdev, model, voltage);
+
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_get_real_power_locked);
+
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage)
+{
+	int ret;
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	mutex_lock(&kbdev->ipa.lock);
+	ret = kbase_get_real_power_locked(kbdev, power, freq, voltage);
+	mutex_unlock(&kbdev->ipa.lock);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_get_real_power);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops kbase_ipa_power_model_ops = {
+#else
+struct devfreq_cooling_power kbase_ipa_power_model_ops = {
+#endif
+	.get_static_power = &kbase_get_static_power,
+	.get_dynamic_power = &kbase_get_dynamic_power,
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	.get_real_power = &kbase_get_real_power,
+#endif
+};
+KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
new file mode 100755
index 0000000..81cafc8
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
@@ -0,0 +1,251 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_H_
+#define _KBASE_IPA_H_
+
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+
+struct devfreq;
+
+/**
+ * struct kbase_ipa_model - Object describing a particular IPA model.
+ * @kbdev:                    pointer to kbase device
+ * @model_data:               opaque pointer to model specific data, accessed
+ *                            only by model specific methods.
+ * @ops:                      pointer to object containing model specific methods.
+ * @params:                   head of the list of debugfs params added for model
+ * @missing_dt_node_warning:  flag to limit the matching power model DT not found
+ *                            warning to once.
+ */
+struct kbase_ipa_model {
+	struct kbase_device *kbdev;
+	void *model_data;
+	const struct kbase_ipa_model_ops *ops;
+	struct list_head params;
+	bool missing_dt_node_warning;
+};
+
+/**
+ * kbase_ipa_model_add_param_s32 - Add an integer model parameter
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @addr:	address where the value is stored
+ * @num_elems:	number of elements (1 if not an array)
+ * @dt_required: if false, a corresponding devicetree entry is not required,
+ *		 and the current value will be used. If true, a warning is
+ *		 output and the data is zeroed
+ *
+ * Return: 0 on success, or an error code
+ */
+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
+				  const char *name, s32 *addr,
+				  size_t num_elems, bool dt_required);
+
+/**
+ * kbase_ipa_model_add_param_string - Add a string model parameter
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @addr:	address where the value is stored
+ * @size:	size, in bytes, of the value storage (so the maximum string
+ *		length is size - 1)
+ * @dt_required: if false, a corresponding devicetree entry is not required,
+ *		 and the current value will be used. If true, a warning is
+ *		 output and the data is zeroed
+ *
+ * Return: 0 on success, or an error code
+ */
+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
+				     const char *name, char *addr,
+				     size_t size, bool dt_required);
+
+struct kbase_ipa_model_ops {
+	char *name;
+	/* The init, recalculate and term ops on the default model are always
+	 * called.  However, all the other models are only invoked if the model
+	 * is selected in the device tree. Otherwise they are never
+	 * initialized. Additional resources can be acquired by models in
+	 * init(), however they must be terminated in the term().
+	 */
+	int (*init)(struct kbase_ipa_model *model);
+	/* Called immediately after init(), or when a parameter is changed, so
+	 * that any coefficients derived from model parameters can be
+	 * recalculated. */
+	int (*recalculate)(struct kbase_ipa_model *model);
+	void (*term)(struct kbase_ipa_model *model);
+	/*
+	 * get_dynamic_coeff() - calculate dynamic power coefficient
+	 * @model:		pointer to model
+	 * @coeffp:		pointer to return value location
+	 *
+	 * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which
+	 * is then scaled by the IPA framework according to the current OPP's
+	 * frequency and voltage.
+	 *
+	 * Return: 0 on success, or an error code.
+	 */
+	int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
+	/*
+	 * get_static_coeff() - calculate static power coefficient
+	 * @model:		pointer to model
+	 * @coeffp:		pointer to return value location
+	 *
+	 * Calculate a static power coefficient, with units uW/(V^3), which is
+	 * scaled by the IPA framework according to the current OPP's voltage.
+	 *
+	 * Return: 0 on success, or an error code.
+	 */
+	int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
+};
+
+/**
+ * kbase_ipa_init - Initialize the IPA feature
+ * @kbdev:      pointer to kbase device
+ *
+ * simple IPA power model is initialized as a fallback model and if that
+ * initialization fails then IPA is not used.
+ * The device tree is read for the name of ipa model to be used, by using the
+ * property string "ipa-model". If that ipa model is supported then it is
+ * initialized but if the initialization fails then simple power model is used.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_term - Terminate the IPA feature
+ * @kbdev:      pointer to kbase device
+ *
+ * Both simple IPA power model and model retrieved from device tree are
+ * terminated.
+ */
+void kbase_ipa_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_model_recalculate - Recalculate the model coefficients
+ * @model:      pointer to the IPA model object, already initialized
+ *
+ * It shall be called immediately after the model has been initialized
+ * or when the model parameter has changed, so that any coefficients
+ * derived from parameters can be recalculated.
+ * Its a wrapper for the module specific recalculate() method.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_model_ops_find - Lookup an IPA model using its name
+ * @kbdev:      pointer to kbase device
+ * @name:       name of model to lookup
+ *
+ * Return: Pointer to model's 'ops' structure, or NULL if the lookup failed.
+ */
+const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
+							   const char *name);
+
+/**
+ * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID
+ * @gpu_id:     GPU ID of GPU the model will be used for
+ *
+ * Return: The name of the appropriate counter-based model, or the name of the
+ *         fallback model if no counter model exists.
+ */
+const char *kbase_ipa_model_name_from_id(u32 gpu_id);
+
+/**
+ * kbase_ipa_init_model - Initilaize the particular IPA model
+ * @kbdev:      pointer to kbase device
+ * @ops:        pointer to object containing model specific methods.
+ *
+ * Initialize the model corresponding to the @ops pointer passed.
+ * The init() method specified in @ops would be called.
+ *
+ * Return: pointer to kbase_ipa_model on success, NULL on error
+ */
+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
+					     const struct kbase_ipa_model_ops *ops);
+/**
+ * kbase_ipa_term_model - Terminate the particular IPA model
+ * @model:      pointer to the IPA model object, already initialized
+ *
+ * Terminate the model, using the term() method.
+ * Module specific parameters would be freed.
+ */
+void kbase_ipa_term_model(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into
+ *                                          protected mode
+ * @kbdev:      pointer to kbase device
+ *
+ * Makes IPA aware of the GPU switching to protected mode.
+ */
+void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev);
+
+extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g77_ipa_model_ops;
+
+/**
+ * kbase_get_real_power() - get the real power consumption of the GPU
+ * @df: dynamic voltage and frequency scaling information for the GPU.
+ * @power: where to store the power consumption, in mW.
+ * @freq: a frequency, in HZ.
+ * @voltage: a voltage, in mV.
+ *
+ * The returned value incorporates both static and dynamic power consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage);
+
+#if MALI_UNIT_TEST
+/* Called by kbase_get_real_power() to invoke the power models.
+ * Must be called with kbdev->ipa.lock held.
+ * This function is only exposed for use by unit tests.
+ */
+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
+				unsigned long freq,
+				unsigned long voltage);
+#endif /* MALI_UNIT_TEST */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops kbase_ipa_power_model_ops;
+#else
+extern struct devfreq_cooling_power kbase_ipa_power_model_ops;
+#endif
+
+#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
+{ }
+
+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
new file mode 100755
index 0000000..30a3b7d
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
@@ -0,0 +1,322 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_ipa.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0))
+#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
+#endif
+
+struct kbase_ipa_model_param {
+	char *name;
+	union {
+		void *voidp;
+		s32 *s32p;
+		char *str;
+	} addr;
+	size_t size;
+	enum kbase_ipa_model_param_type type;
+	struct kbase_ipa_model *model;
+	struct list_head link;
+};
+
+static int param_int_get(void *data, u64 *val)
+{
+	struct kbase_ipa_model_param *param = data;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	*(s64 *) val = *param->addr.s32p;
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return 0;
+}
+
+static int param_int_set(void *data, u64 val)
+{
+	struct kbase_ipa_model_param *param = data;
+	struct kbase_ipa_model *model = param->model;
+	s64 sval = (s64) val;
+	s32 old_val;
+	int err = 0;
+
+	if (sval < S32_MIN || sval > S32_MAX)
+		return -ERANGE;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	old_val = *param->addr.s32p;
+	*param->addr.s32p = val;
+	err = kbase_ipa_model_recalculate(model);
+	if (err < 0)
+		*param->addr.s32p = old_val;
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return err;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n");
+
+static ssize_t param_string_get(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct kbase_ipa_model_param *param = file->private_data;
+	ssize_t ret;
+	size_t len;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	len = strnlen(param->addr.str, param->size - 1) + 1;
+	ret = simple_read_from_buffer(user_buf, count, ppos,
+				      param->addr.str, len);
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return ret;
+}
+
+static ssize_t param_string_set(struct file *file, const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct kbase_ipa_model_param *param = file->private_data;
+	struct kbase_ipa_model *model = param->model;
+	char *old_str = NULL;
+	ssize_t ret = count;
+	size_t buf_size;
+	int err;
+
+	mutex_lock(&model->kbdev->ipa.lock);
+
+	if (count > param->size) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	old_str = kstrndup(param->addr.str, param->size, GFP_KERNEL);
+	if (!old_str) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	buf_size = min(param->size - 1, count);
+	if (copy_from_user(param->addr.str, user_buf, buf_size)) {
+		ret = -EFAULT;
+		goto end;
+	}
+
+	param->addr.str[buf_size] = '\0';
+
+	err = kbase_ipa_model_recalculate(model);
+	if (err < 0) {
+		ret = err;
+		strlcpy(param->addr.str, old_str, param->size);
+	}
+
+end:
+	kfree(old_str);
+	mutex_unlock(&model->kbdev->ipa.lock);
+
+	return ret;
+}
+
+static const struct file_operations fops_string = {
+	.owner = THIS_MODULE,
+	.read = param_string_get,
+	.write = param_string_set,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
+			      void *addr, size_t size,
+			      enum kbase_ipa_model_param_type type)
+{
+	struct kbase_ipa_model_param *param;
+
+	param = kzalloc(sizeof(*param), GFP_KERNEL);
+
+	if (!param)
+		return -ENOMEM;
+
+	/* 'name' is stack-allocated for array elements, so copy it into
+	 * heap-allocated storage */
+	param->name = kstrdup(name, GFP_KERNEL);
+
+	if (!param->name) {
+		kfree(param);
+		return -ENOMEM;
+	}
+
+	param->addr.voidp = addr;
+	param->size = size;
+	param->type = type;
+	param->model = model;
+
+	list_add(&param->link, &model->params);
+
+	return 0;
+}
+
+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_param *param_p, *param_n;
+
+	list_for_each_entry_safe(param_p, param_n, &model->params, link) {
+		list_del(&param_p->link);
+		kfree(param_p->name);
+		kfree(param_p);
+	}
+}
+
+static int force_fallback_model_get(void *data, u64 *val)
+{
+	struct kbase_device *kbdev = data;
+
+	mutex_lock(&kbdev->ipa.lock);
+	*val = kbdev->ipa.force_fallback_model;
+	mutex_unlock(&kbdev->ipa.lock);
+
+	return 0;
+}
+
+static int force_fallback_model_set(void *data, u64 val)
+{
+	struct kbase_device *kbdev = data;
+
+	mutex_lock(&kbdev->ipa.lock);
+	kbdev->ipa.force_fallback_model = (val ? true : false);
+	mutex_unlock(&kbdev->ipa.lock);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model,
+		force_fallback_model_get,
+		force_fallback_model_set,
+		"%llu\n");
+
+static int current_power_get(void *data, u64 *val)
+{
+	struct kbase_device *kbdev = data;
+	struct devfreq *df = kbdev->devfreq;
+	u32 power;
+
+	kbase_pm_context_active(kbdev);
+	/* The current model assumes that there's no more than one voltage
+	 * regulator currently available in the system.
+	 */
+	kbase_get_real_power(df, &power,
+		kbdev->current_nominal_freq,
+		(kbdev->current_voltages[0] / 1000));
+	kbase_pm_context_idle(kbdev);
+
+	*val = power;
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n");
+
+static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
+{
+	struct list_head *it;
+	struct dentry *dir;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	dir = debugfs_create_dir(model->ops->name,
+				 model->kbdev->mali_debugfs_directory);
+
+	if (!dir) {
+		dev_err(model->kbdev->dev,
+			"Couldn't create mali debugfs %s directory",
+			model->ops->name);
+		return;
+	}
+
+	list_for_each(it, &model->params) {
+		struct kbase_ipa_model_param *param =
+				list_entry(it,
+					   struct kbase_ipa_model_param,
+					   link);
+		const struct file_operations *fops = NULL;
+
+		switch (param->type) {
+		case PARAM_TYPE_S32:
+			fops = &fops_s32;
+			break;
+		case PARAM_TYPE_STRING:
+			fops = &fops_string;
+			break;
+		}
+
+		if (unlikely(!fops)) {
+			dev_err(model->kbdev->dev,
+				"Type not set for %s parameter %s\n",
+				model->ops->name, param->name);
+		} else {
+			debugfs_create_file(param->name, S_IRUGO | S_IWUSR,
+					    dir, param, fops);
+		}
+	}
+}
+
+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+	const char *name, s32 val)
+{
+	struct kbase_ipa_model_param *param;
+
+	mutex_lock(&model->kbdev->ipa.lock);
+
+	list_for_each_entry(param, &model->params, link) {
+		if (!strcmp(param->name, name)) {
+			if (param->type == PARAM_TYPE_S32) {
+				*param->addr.s32p = val;
+			} else {
+				dev_err(model->kbdev->dev,
+					"Wrong type for %s parameter %s\n",
+					model->ops->name, param->name);
+			}
+			break;
+		}
+	}
+
+	mutex_unlock(&model->kbdev->ipa.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32);
+
+void kbase_ipa_debugfs_init(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->ipa.lock);
+
+	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
+		kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model);
+	kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model);
+
+	debugfs_create_file("ipa_current_power", 0444,
+		kbdev->mali_debugfs_directory, kbdev, &current_power);
+	debugfs_create_file("ipa_force_fallback_model", 0644,
+		kbdev->mali_debugfs_directory, kbdev, &force_fallback_model);
+
+	mutex_unlock(&kbdev->ipa.lock);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h
new file mode 100755
index 0000000..a983d9c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_DEBUGFS_H_
+#define _KBASE_IPA_DEBUGFS_H_
+
+enum kbase_ipa_model_param_type {
+	PARAM_TYPE_S32 = 1,
+	PARAM_TYPE_STRING,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbase_ipa_debugfs_init(struct kbase_device *kbdev);
+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
+			      void *addr, size_t size,
+			      enum kbase_ipa_model_param_type type);
+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_model_param_set_s32 - Set an integer model parameter
+ *
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @val:	new value of the parameter
+ *
+ * This function is only exposed for use by unit tests running in
+ * kernel space. Normally it is expected that parameter values will
+ * instead be set via debugfs.
+ */
+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+	const char *name, s32 val);
+
+#else /* CONFIG_DEBUG_FS */
+
+static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model,
+					    const char *name, void *addr,
+					    size_t size,
+					    enum kbase_ipa_model_param_type type)
+{
+	return 0;
+}
+
+static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
+{ }
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* _KBASE_IPA_DEBUGFS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
new file mode 100755
index 0000000..852559e
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
@@ -0,0 +1,351 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <uapi/linux/thermal.h>
+#include <linux/thermal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#include <linux/of.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_ipa_simple.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#if MALI_UNIT_TEST
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+static unsigned long dummy_temp;
+
+static int kbase_simple_power_model_get_dummy_temp(
+	struct thermal_zone_device *tz,
+	unsigned long *temp)
+{
+	*temp = READ_ONCE(dummy_temp);
+	return 0;
+}
+
+#else
+static int dummy_temp;
+
+static int kbase_simple_power_model_get_dummy_temp(
+	struct thermal_zone_device *tz,
+	int *temp)
+{
+	*temp = READ_ONCE(dummy_temp);
+	return 0;
+}
+#endif
+
+/* Intercept calls to the kernel function using a macro */
+#ifdef thermal_zone_get_temp
+#undef thermal_zone_get_temp
+#endif
+#define thermal_zone_get_temp(tz, temp) \
+	kbase_simple_power_model_get_dummy_temp(tz, temp)
+
+void kbase_simple_power_model_set_dummy_temp(int temp)
+{
+	WRITE_ONCE(dummy_temp, temp);
+}
+KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp);
+
+#endif /* MALI_UNIT_TEST */
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms. The additional resources in this model
+ * should preferably be minimal, as this model is rarely used when a dynamic
+ * model is available.
+ */
+
+/**
+ * struct kbase_ipa_model_simple_data - IPA context per device
+ * @dynamic_coefficient: dynamic coefficient of the model
+ * @static_coefficient:  static coefficient of the model
+ * @ts:                  Thermal scaling coefficients of the model
+ * @tz_name:             Thermal zone name
+ * @gpu_tz:              thermal zone device
+ * @poll_temperature_thread: Handle for temperature polling thread
+ * @current_temperature: Most recent value of polled temperature
+ * @temperature_poll_interval_ms: How often temperature should be checked, in ms
+ */
+
+struct kbase_ipa_model_simple_data {
+	u32 dynamic_coefficient;
+	u32 static_coefficient;
+	s32 ts[4];
+	char tz_name[THERMAL_NAME_LENGTH];
+	struct thermal_zone_device *gpu_tz;
+	struct task_struct *poll_temperature_thread;
+	int current_temperature;
+	int temperature_poll_interval_ms;
+};
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+/**
+ * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient
+ * @ts:		Signed coefficients, in order t^0 to t^3, with units Deg^-N
+ * @t:		Temperature, in mDeg C. Range: -2^17 < t < 2^17
+ *
+ * Scale the temperature according to a cubic polynomial whose coefficients are
+ * provided in the device tree. The result is used to scale the static power
+ * coefficient, where 1000000 means no change.
+ *
+ * Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000.
+ */
+static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t)
+{
+	/* Range: -2^24 < t2 < 2^24 m(Deg^2) */
+	const s64 t2 = div_s64((t * t), 1000);
+
+	/* Range: -2^31 < t3 < 2^31 m(Deg^3) */
+	const s64 t3 = div_s64((t * t2), 1000);
+
+	/*
+	 * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in
+	 * Deg^-N, so we need to multiply the last coefficient by 1000.
+	 * Range: -2^63 < res_big < 2^63
+	 */
+	const s64 res_big = ts[3] * t3    /* +/- 2^62 */
+			  + ts[2] * t2    /* +/- 2^55 */
+			  + ts[1] * t     /* +/- 2^48 */
+			  + ts[0] * (s64)1000; /* +/- 2^41 */
+
+	/* Range: -2^60 < res_unclamped < 2^60 */
+	s64 res_unclamped = div_s64(res_big, 1000);
+
+	/* Clamp to range of 0x to 10x the static power */
+	return clamp(res_unclamped, (s64) 0, (s64) 10000000);
+}
+
+/* We can't call thermal_zone_get_temp() directly in model_static_coeff(),
+ * because we don't know if tz->lock is held in the same thread. So poll it in
+ * a separate thread to get around this. */
+static int poll_temperature(void *data)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *) data;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temp;
+#else
+	int temp;
+#endif
+
+	while (!kthread_should_stop()) {
+		struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz);
+
+		if (tz) {
+			int ret;
+
+			ret = thermal_zone_get_temp(tz, &temp);
+			if (ret) {
+				pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+						    ret);
+				temp = FALLBACK_STATIC_TEMPERATURE;
+			}
+		} else {
+			temp = FALLBACK_STATIC_TEMPERATURE;
+		}
+
+		WRITE_ONCE(model_data->current_temperature, temp);
+
+		msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms));
+	}
+
+	return 0;
+}
+
+static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	u32 temp_scaling_factor;
+	struct kbase_ipa_model_simple_data *model_data =
+		(struct kbase_ipa_model_simple_data *) model->model_data;
+	u64 coeff_big;
+	int temp;
+
+	temp = READ_ONCE(model_data->current_temperature);
+
+	/* Range: 0 <= temp_scaling_factor < 2^24 */
+	temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts,
+							    temp);
+
+	/*
+	 * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This
+	 * means static_coefficient must be in range
+	 * 0 <= static_coefficient < 2^28.
+	 */
+	coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor;
+	*coeffp = div_u64(coeff_big, 1000000);
+
+	return 0;
+}
+
+static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+		(struct kbase_ipa_model_simple_data *) model->model_data;
+
+	*coeffp = model_data->dynamic_coefficient;
+
+	return 0;
+}
+
+static int add_params(struct kbase_ipa_model *model)
+{
+	int err = 0;
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+
+	err = kbase_ipa_model_add_param_s32(model, "static-coefficient",
+					    &model_data->static_coefficient,
+					    1, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient",
+					    &model_data->dynamic_coefficient,
+					    1, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_s32(model, "ts",
+					    model_data->ts, 4, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_string(model, "thermal-zone",
+					       model_data->tz_name,
+					       sizeof(model_data->tz_name), true);
+	if (err)
+		goto end;
+
+	model_data->temperature_poll_interval_ms = 200;
+	err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms",
+					    &model_data->temperature_poll_interval_ms,
+					    1, false);
+
+end:
+	return err;
+}
+
+static int kbase_simple_power_model_init(struct kbase_ipa_model *model)
+{
+	int err;
+	struct kbase_ipa_model_simple_data *model_data;
+
+	model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data),
+			     GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model->model_data = (void *) model_data;
+
+	model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE;
+	model_data->poll_temperature_thread = kthread_run(poll_temperature,
+							  (void *) model_data,
+							  "mali-simple-power-model-temp-poll");
+	if (IS_ERR(model_data->poll_temperature_thread)) {
+		err = PTR_ERR(model_data->poll_temperature_thread);
+		kfree(model_data);
+		return err;
+	}
+
+	err = add_params(model);
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kthread_stop(model_data->poll_temperature_thread);
+		kfree(model_data);
+	}
+
+	return err;
+}
+
+static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+	struct thermal_zone_device *tz;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) {
+		model_data->gpu_tz = NULL;
+	} else {
+		char tz_name[THERMAL_NAME_LENGTH];
+
+		strlcpy(tz_name, model_data->tz_name, sizeof(tz_name));
+
+		/* Release ipa.lock so that thermal_list_lock is not acquired
+		 * with ipa.lock held, thereby avoid lock ordering violation
+		 * lockdep warning. The warning comes as a chain of locks
+		 * ipa.lock --> thermal_list_lock --> tz->lock gets formed
+		 * on registering devfreq cooling device when probe method
+		 * of mali platform driver is invoked.
+		 */
+		mutex_unlock(&model->kbdev->ipa.lock);
+		tz = thermal_zone_get_zone_by_name(tz_name);
+		mutex_lock(&model->kbdev->ipa.lock);
+
+		if (IS_ERR_OR_NULL(tz)) {
+			pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n",
+					    PTR_ERR(tz), tz_name);
+			return -EPROBE_DEFER;
+		}
+
+		/* Check if another thread raced against us & updated the
+		 * thermal zone name string. Update the gpu_tz pointer only if
+		 * the name string did not change whilst we retrieved the new
+		 * thermal_zone_device pointer, otherwise model_data->tz_name &
+		 * model_data->gpu_tz would become inconsistent with each other.
+		 * The below check will succeed only for the thread which last
+		 * updated the name string.
+		 */
+		if (strncmp(tz_name, model_data->tz_name, sizeof(tz_name)) == 0)
+			model_data->gpu_tz = tz;
+	}
+
+	return 0;
+}
+
+static void kbase_simple_power_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+
+	kthread_stop(model_data->poll_temperature_thread);
+
+	kfree(model_data);
+}
+
+struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = {
+		.name = "mali-simple-power-model",
+		.init = &kbase_simple_power_model_init,
+		.recalculate = &kbase_simple_power_model_recalculate,
+		.term = &kbase_simple_power_model_term,
+		.get_dynamic_coeff = &model_dynamic_coeff,
+		.get_static_coeff = &model_static_coeff,
+};
+KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h
new file mode 100755
index 0000000..fed67d5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_SIMPLE_H_
+#define _KBASE_IPA_SIMPLE_H_
+
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+
+extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops;
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value
+ * @temp: Temperature of the thermal zone, in millidegrees celsius.
+ *
+ * This is only intended for use in unit tests, to ensure that the temperature
+ * values used by the simple power model are predictable. Deterministic
+ * behavior is necessary to allow validation of the static power values
+ * computed by this model.
+ */
+void kbase_simple_power_model_set_dummy_temp(int temp);
+#endif /* MALI_UNIT_TEST */
+
+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+#endif /* _KBASE_IPA_SIMPLE_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
new file mode 100755
index 0000000..9fae8f1
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
@@ -0,0 +1,346 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#define DEFAULT_SCALING_FACTOR 5
+
+/* If the value of GPU_ACTIVE is below this, use the simple model
+ * instead, to avoid extrapolating small amounts of counter data across
+ * large sample periods.
+ */
+#define DEFAULT_MIN_SAMPLE_CYCLES 10000
+
+/**
+ * read_hwcnt() - read a counter value
+ * @model_data:		pointer to model data
+ * @offset:		offset, in bytes, into vinstr buffer
+ *
+ * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be
+ * incrementing every cycle over a ~100ms sample period at a high frequency,
+ * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27.
+ */
+static inline u32 kbase_ipa_read_hwcnt(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	u32 offset)
+{
+	u8 *p = (u8 *)model_data->dump_buf.dump_buf;
+
+	return *(u32 *)&p[offset];
+}
+
+static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
+{
+	s64 rtn;
+
+	if (a > 0 && (S64_MAX - a) < b)
+		rtn = S64_MAX;
+	else if (a < 0 && (S64_MIN - a) > b)
+		rtn = S64_MIN;
+	else
+		rtn = a + b;
+
+	return rtn;
+}
+
+s64 kbase_ipa_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	u64 core_mask;
+	u32 base = 0;
+	s64 ret = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			/* 0 < counter_value < 2^27 */
+			u32 counter_value = kbase_ipa_read_hwcnt(model_data,
+						       base + counter);
+
+			/* 0 < ret < 2^27 * max_num_cores = 2^32 */
+			ret = kbase_ipa_add_saturate(ret, counter_value);
+		}
+		base += KBASE_IPA_NR_BYTES_PER_BLOCK;
+		core_mask >>= 1;
+	}
+
+	/* Range: -2^54 < ret * coeff < 2^54 */
+	return ret * coeff;
+}
+
+s64 kbase_ipa_sum_all_memsys_blocks(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	u32 base = 0;
+	s64 ret = 0;
+	u32 i;
+
+	for (i = 0; i < num_blocks; i++) {
+		/* 0 < counter_value < 2^27 */
+		u32 counter_value = kbase_ipa_read_hwcnt(model_data,
+					       base + counter);
+
+		/* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */
+		ret = kbase_ipa_add_saturate(ret, counter_value);
+		base += KBASE_IPA_NR_BYTES_PER_BLOCK;
+	}
+
+	/* Range: -2^51 < ret * coeff < 2^51 */
+	return ret * coeff;
+}
+
+s64 kbase_ipa_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	/* Range: 0 < counter_value < 2^27 */
+	const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter);
+
+	/* Range: -2^49 < ret < 2^49 */
+	return counter_value * (s64) coeff;
+}
+
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	int errcode;
+	struct kbase_device *kbdev = model_data->kbdev;
+	struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt;
+	struct kbase_hwcnt_enable_map enable_map;
+	const struct kbase_hwcnt_metadata *metadata =
+		kbase_hwcnt_virtualizer_metadata(hvirt);
+
+	if (!metadata)
+		return -1;
+
+	errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map);
+	if (errcode) {
+		dev_err(kbdev->dev, "Failed to allocate IPA enable map");
+		return errcode;
+	}
+
+	kbase_hwcnt_enable_map_enable_all(&enable_map);
+
+	errcode = kbase_hwcnt_virtualizer_client_create(
+		hvirt, &enable_map, &model_data->hvirt_cli);
+	kbase_hwcnt_enable_map_free(&enable_map);
+	if (errcode) {
+		dev_err(kbdev->dev, "Failed to register IPA with virtualizer");
+		model_data->hvirt_cli = NULL;
+		return errcode;
+	}
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(
+		metadata, &model_data->dump_buf);
+	if (errcode) {
+		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+		kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
+		model_data->hvirt_cli = NULL;
+		return errcode;
+	}
+
+	return 0;
+}
+
+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	if (model_data->hvirt_cli) {
+		kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
+		kbase_hwcnt_dump_buffer_free(&model_data->dump_buf);
+		model_data->hvirt_cli = NULL;
+	}
+}
+
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+	s64 energy = 0;
+	size_t i;
+	u64 coeff = 0, coeff_mul = 0;
+	u64 start_ts_ns, end_ts_ns;
+	u32 active_cycles;
+	int err = 0;
+
+	err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli,
+		&start_ts_ns, &end_ts_ns, &model_data->dump_buf);
+	if (err)
+		goto err0;
+
+	/* Range: 0 (GPU not used at all), to the max sampling interval, say
+	 * 1s, * max GPU frequency (GPU 100% utilized).
+	 * 0 <= active_cycles <= 1 * ~2GHz
+	 * 0 <= active_cycles < 2^31
+	 */
+	active_cycles = model_data->get_active_cycles(model_data);
+
+	if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) {
+		err = -ENODATA;
+		goto err0;
+	}
+
+	/* Range: 1 <= active_cycles < 2^31 */
+	active_cycles = max(1u, active_cycles);
+
+	/* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around
+	 * -2^57 < energy < 2^57
+	 */
+	for (i = 0; i < model_data->groups_def_num; i++) {
+		const struct kbase_ipa_group *group = &model_data->groups_def[i];
+		s32 coeff = model_data->group_values[i];
+		s64 group_energy = group->op(model_data, coeff,
+					     group->counter_block_offset);
+
+		energy = kbase_ipa_add_saturate(energy, group_energy);
+	}
+
+	/* Range: 0 <= coeff < 2^57 */
+	if (energy > 0)
+		coeff = energy;
+
+	/* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this
+	 * can be constrained further: Counter values can only be increased by
+	 * a theoretical maximum of about 64k per clock cycle. Beyond this,
+	 * we'd have to sample every 1ms to avoid them overflowing at the
+	 * lowest clock frequency (say 100MHz). Therefore, we can write the
+	 * range of 'coeff' in terms of active_cycles:
+	 *
+	 * coeff = SUM(coeffN * counterN * num_cores_for_counterN)
+	 * coeff <= SUM(coeffN * counterN) * max_num_cores
+	 * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores
+	 *       (substitute max_counter = 2^16 * active_cycles)
+	 * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores
+	 * coeff <=    2^3         *    2^22   * 2^16 * active_cycles * 2^5
+	 * coeff <= 2^46 * active_cycles
+	 *
+	 * So after the division: 0 <= coeff <= 2^46
+	 */
+	coeff = div_u64(coeff, active_cycles);
+
+	/* Not all models were derived at the same reference voltage. Voltage
+	 * scaling is done by multiplying by V^2, so we need to *divide* by
+	 * Vref^2 here.
+	 * Range: 0 <= coeff <= 2^49
+	 */
+	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
+	/* Range: 0 <= coeff <= 2^52 */
+	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
+
+	/* Scale by user-specified integer factor.
+	 * Range: 0 <= coeff_mul < 2^57
+	 */
+	coeff_mul = coeff * model_data->scaling_factor;
+
+	/* The power models have results with units
+	 * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
+	 * becomes fW/(Hz V^2), which are the units of coeff_mul. However,
+	 * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
+	 * by 1000.
+	 * Range: 0 <= coeff_mul < 2^47
+	 */
+	coeff_mul = div_u64(coeff_mul, 1000u);
+
+err0:
+	/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
+	*coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16);
+	return err;
+}
+
+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
+				       const struct kbase_ipa_group *ipa_groups_def,
+				       size_t ipa_group_size,
+				       kbase_ipa_get_active_cycles_callback get_active_cycles,
+				       s32 reference_voltage)
+{
+	int err = 0;
+	size_t i;
+	struct kbase_ipa_model_vinstr_data *model_data;
+
+	if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles)
+		return -EINVAL;
+
+	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model_data->kbdev = model->kbdev;
+	model_data->groups_def = ipa_groups_def;
+	model_data->groups_def_num = ipa_group_size;
+	model_data->get_active_cycles = get_active_cycles;
+
+	model->model_data = (void *) model_data;
+
+	for (i = 0; i < model_data->groups_def_num; ++i) {
+		const struct kbase_ipa_group *group = &model_data->groups_def[i];
+
+		model_data->group_values[i] = group->default_value;
+		err = kbase_ipa_model_add_param_s32(model, group->name,
+					&model_data->group_values[i],
+					1, false);
+		if (err)
+			goto exit;
+	}
+
+	model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
+	err = kbase_ipa_model_add_param_s32(model, "scale",
+					    &model_data->scaling_factor,
+					    1, false);
+	if (err)
+		goto exit;
+
+	model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
+	err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
+					    &model_data->min_sample_cycles,
+					    1, false);
+	if (err)
+		goto exit;
+
+	model_data->reference_voltage = reference_voltage;
+	err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
+					    &model_data->reference_voltage,
+					    1, false);
+	if (err)
+		goto exit;
+
+	err = kbase_ipa_attach_vinstr(model_data);
+
+exit:
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kfree(model_data);
+	}
+	return err;
+}
+
+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+
+	kbase_ipa_detach_vinstr(model_data);
+	kfree(model_data);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
new file mode 100755
index 0000000..46e3cd4
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
@@ -0,0 +1,217 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_VINSTR_COMMON_H_
+#define _KBASE_IPA_VINSTR_COMMON_H_
+
+#include "mali_kbase.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_types.h"
+
+/* Maximum number of IPA groups for an IPA model. */
+#define KBASE_IPA_MAX_GROUP_DEF_NUM  16
+
+/* Number of bytes per hardware counter in a vinstr_buffer. */
+#define KBASE_IPA_NR_BYTES_PER_CNT    4
+
+/* Number of hardware counters per block in a vinstr_buffer. */
+#define KBASE_IPA_NR_CNT_PER_BLOCK   64
+
+/* Number of bytes per block in a vinstr_buffer. */
+#define KBASE_IPA_NR_BYTES_PER_BLOCK \
+	(KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT)
+
+struct kbase_ipa_model_vinstr_data;
+
+typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinstr_data *);
+
+/**
+ * struct kbase_ipa_model_vinstr_data - IPA context per device
+ * @kbdev:               pointer to kbase device
+ * @groups_def:          Array of IPA groups.
+ * @groups_def_num:      Number of elements in the array of IPA groups.
+ * @get_active_cycles:   Callback to return number of active cycles during
+ *                       counter sample period
+ * @hvirt_cli:           hardware counter virtualizer client handle
+ * @dump_buf:            buffer to dump hardware counters onto
+ * @reference_voltage:   voltage, in mV, of the operating point used when
+ *                       deriving the power model coefficients. Range approx
+ *                       0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13
+ * @scaling_factor:      User-specified power scaling factor. This is an
+ *                       integer, which is multiplied by the power coefficient
+ *                       just before OPP scaling.
+ *                       Range approx 0-32: 0 < scaling_factor < 2^5
+ * @min_sample_cycles:   If the value of the GPU_ACTIVE counter (the number of
+ *                       cycles the GPU was working) is less than
+ *                       min_sample_cycles, the counter model will return an
+ *                       error, causing the IPA framework to approximate using
+ *                       the cached simple model results instead. This may be
+ *                       more accurate than extrapolating  using a very small
+ *                       counter dump.
+ */
+struct kbase_ipa_model_vinstr_data {
+	struct kbase_device *kbdev;
+	s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM];
+	const struct kbase_ipa_group *groups_def;
+	size_t groups_def_num;
+	kbase_ipa_get_active_cycles_callback get_active_cycles;
+	struct kbase_hwcnt_virtualizer_client *hvirt_cli;
+	struct kbase_hwcnt_dump_buffer dump_buf;
+	s32 reference_voltage;
+	s32 scaling_factor;
+	s32 min_sample_cycles;
+};
+
+/**
+ * struct ipa_group - represents a single IPA group
+ * @name:               name of the IPA group
+ * @default_value:      default value of coefficient for IPA group.
+ *                      Coefficients are interpreted as fractions where the
+ *                      denominator is 1000000.
+ * @op:                 which operation to be performed on the counter values
+ * @counter_block_offset:  block offset in bytes of the counter used to calculate energy for IPA group
+ */
+struct kbase_ipa_group {
+	const char *name;
+	s32 default_value;
+	s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
+	u32 counter_block_offset;
+};
+
+/**
+ * kbase_ipa_sum_all_shader_cores() - sum a counter over all cores
+ * @model_data:		pointer to model data
+ * @coeff:		model coefficient. Unity is ~2^20, so range approx
+ *			+/- 4.0: -2^22 < coeff < 2^22
+ * @counter		offset in bytes of the counter used to calculate energy
+ *			for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter'
+ * across all shader cores.
+ *
+ * Return: Sum of counter values. Range: -2^54 < ret < 2^54
+ */
+s64 kbase_ipa_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks
+ * @model_data:		pointer to model data
+ * @coeff:		model coefficient. Unity is ~2^20, so range approx
+ *			+/- 4.0: -2^22 < coeff < 2^22
+ * @counter:		offset in bytes of the counter used to calculate energy
+ *			for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter' across all
+ * memory system blocks.
+ *
+ * Return: Sum of counter values. Range: -2^51 < ret < 2^51
+ */
+s64 kbase_ipa_sum_all_memsys_blocks(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * kbase_ipa_single_counter() - sum a single counter
+ * @model_data:		pointer to model data
+ * @coeff:		model coefficient. Unity is ~2^20, so range approx
+ *			+/- 4.0: -2^22 < coeff < 2^22
+ * @counter:		offset in bytes of the counter used to calculate energy
+ *			for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter'.
+ *
+ * Return: Counter value. Range: -2^49 < ret < 2^49
+ */
+s64 kbase_ipa_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * attach_vinstr() - attach a vinstr_buffer to an IPA model.
+ * @model_data		pointer to model data
+ *
+ * Attach a vinstr_buffer to an IPA model. The vinstr_buffer
+ * allows access to the hardware counters used to calculate
+ * energy consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
+
+/**
+ * detach_vinstr() - detach a vinstr_buffer from an IPA model.
+ * @model_data		pointer to model data
+ *
+ * Detach a vinstr_buffer from an IPA model.
+ */
+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
+
+/**
+ * kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters
+ * @model:		pointer to instantiated model
+ * @coeffp:		pointer to location where calculated power, in
+ *			pW/(Hz V^2), is stored.
+ *
+ * This is a GPU-agnostic implementation of the get_dynamic_coeff()
+ * function of an IPA model. It relies on the model being populated
+ * with GPU-specific attributes at initialization time.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
+
+/**
+ * kbase_ipa_vinstr_common_model_init() - initialize ipa power model
+ * @model:		ipa power model to initialize
+ * @ipa_groups_def:	array of ipa groups which sets coefficients for
+ *			the corresponding counters used in the ipa model
+ * @ipa_group_size:     number of elements in the array @ipa_groups_def
+ * @get_active_cycles:  callback to return the number of cycles the GPU was
+ *			active during the counter sample period.
+ * @reference_voltage:  voltage, in mV, of the operating point used when
+ *                      deriving the power model coefficients.
+ *
+ * This initialization function performs initialization steps common
+ * for ipa models based on counter values. In each call, the model
+ * passes its specific coefficient values per ipa counter group via
+ * @ipa_groups_def array.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
+				       const struct kbase_ipa_group *ipa_groups_def,
+				       size_t ipa_group_size,
+				       kbase_ipa_get_active_cycles_callback get_active_cycles,
+				       s32 reference_voltage);
+
+/**
+ * kbase_ipa_vinstr_common_model_term() - terminate ipa power model
+ * @model: ipa power model to terminate
+ *
+ * This function performs all necessary steps to terminate ipa power model
+ * including clean up of resources allocated to hold model data.
+ */
+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model);
+
+#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
new file mode 100755
index 0000000..a3d1fae
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
@@ -0,0 +1,420 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/thermal.h>
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase.h"
+
+
+/* Performance counter blocks base offsets */
+#define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define TILER_BASE          (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define MEMSYS_BASE         (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+
+/* JM counter block offsets */
+#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT *  6)
+
+/* Tiler counter block offsets */
+#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
+
+/* MEMSYS counter block offsets */
+#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
+
+/* SC counter block offsets */
+#define SC_FRAG_ACTIVE             (KBASE_IPA_NR_BYTES_PER_CNT *  4)
+#define SC_EXEC_CORE_ACTIVE        (KBASE_IPA_NR_BYTES_PER_CNT * 26)
+#define SC_EXEC_INSTR_FMA          (KBASE_IPA_NR_BYTES_PER_CNT * 27)
+#define SC_EXEC_INSTR_COUNT        (KBASE_IPA_NR_BYTES_PER_CNT * 28)
+#define SC_EXEC_INSTR_MSG          (KBASE_IPA_NR_BYTES_PER_CNT * 30)
+#define SC_TEX_FILT_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 39)
+#define SC_TEX_COORD_ISSUE         (KBASE_IPA_NR_BYTES_PER_CNT * 40)
+#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42)
+#define SC_VARY_INSTR              (KBASE_IPA_NR_BYTES_PER_CNT * 49)
+#define SC_VARY_SLOT_32            (KBASE_IPA_NR_BYTES_PER_CNT * 50)
+#define SC_VARY_SLOT_16            (KBASE_IPA_NR_BYTES_PER_CNT * 51)
+#define SC_BEATS_RD_LSC            (KBASE_IPA_NR_BYTES_PER_CNT * 56)
+#define SC_BEATS_WR_LSC            (KBASE_IPA_NR_BYTES_PER_CNT * 61)
+#define SC_BEATS_WR_TIB            (KBASE_IPA_NR_BYTES_PER_CNT * 62)
+
+/**
+ * get_jm_counter() - get performance counter offset inside the Job Manager block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the Job Manager block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						u32 counter_block_offset)
+{
+	return JM_BASE + counter_block_offset;
+}
+
+/**
+ * get_memsys_counter() - get performance counter offset inside the Memory System block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Memory System block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						    u32 counter_block_offset)
+{
+	/* The base address of Memory System performance counters is always the same, although their number
+	 * may vary based on the number of cores. For the moment it's ok to return a constant.
+	 */
+	return MEMSYS_BASE + counter_block_offset;
+}
+
+/**
+ * get_sc_counter() - get performance counter offset inside the Shader Cores block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Shader Cores block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						u32 counter_block_offset)
+{
+	const u32 sc_base = MEMSYS_BASE +
+		(model_data->kbdev->gpu_props.props.l2_props.num_l2_slices *
+		 KBASE_IPA_NR_BYTES_PER_BLOCK);
+
+	return sc_base + counter_block_offset;
+}
+
+/**
+ * memsys_single_counter() - calculate energy for a single Memory System performance counter.
+ * @model_data:   pointer to GPU model data.
+ * @coeff:        default value of coefficient for IPA group.
+ * @offset:       offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Memory System performance counter.
+ */
+static s64 kbase_g7x_sum_all_memsys_blocks(
+		struct kbase_ipa_model_vinstr_data *model_data,
+		s32 coeff,
+		u32 offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
+	return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter);
+}
+
+/**
+ * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a Shader Cores performance counter for all cores.
+ */
+static s64 kbase_g7x_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff,
+	u32 counter_block_offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_sc_counter(model_data,
+						       counter_block_offset);
+	return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter);
+}
+
+/**
+ * jm_single_counter() - calculate energy for a single Job Manager performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Job Manager performance counter.
+ */
+static s64 kbase_g7x_jm_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff,
+	u32 counter_block_offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_jm_counter(model_data,
+						     counter_block_offset);
+	return kbase_ipa_single_counter(model_data, coeff, counter);
+}
+
+/**
+ * get_active_cycles() - return the GPU_ACTIVE counter
+ * @model_data:            pointer to GPU model data.
+ *
+ * Return: the number of cycles the GPU was active during the counter sampling
+ * period.
+ */
+static u32 kbase_g7x_get_active_cycles(
+	struct kbase_ipa_model_vinstr_data *model_data)
+{
+	u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE);
+
+	/* Counters are only 32-bit, so we can safely multiply by 1 then cast
+	 * the 64-bit result back to a u32.
+	 */
+	return kbase_ipa_single_counter(model_data, 1, counter);
+}
+
+/** Table of IPA group definitions.
+ *
+ * For each IPA group, this table defines a function to access the given performance block counter (or counters,
+ * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
+ */
+
+static const struct kbase_ipa_group ipa_groups_def_g71[] = {
+	{
+		.name = "l2_access",
+		.default_value = 526300,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 301100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 197400,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -156400,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 115800,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_g72[] = {
+	{
+		.name = "l2_access",
+		.default_value = 393000,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 227000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 181900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -120200,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 133100,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_g76[] = {
+	{
+		.name = "gpu_active",
+		.default_value = 122000,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 488900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "vary_instr",
+		.default_value = 212100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_VARY_INSTR,
+	},
+	{
+		.name = "tex_tfch_num_operations",
+		.default_value = 288000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+	},
+	{
+		.name = "l2_access",
+		.default_value = 378100,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = {
+	{
+		.name = "gpu_active",
+		.default_value = 224200,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 384700,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "vary_instr",
+		.default_value = 271900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_VARY_INSTR,
+	},
+	{
+		.name = "tex_tfch_num_operations",
+		.default_value = 477700,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+	},
+	{
+		.name = "l2_access",
+		.default_value = 551400,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_g51[] = {
+	{
+		.name = "gpu_active",
+		.default_value = 201400,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 392700,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "vary_instr",
+		.default_value = 274000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_VARY_INSTR,
+	},
+	{
+		.name = "tex_tfch_num_operations",
+		.default_value = 528000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+	},
+	{
+		.name = "l2_access",
+		.default_value = 506400,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_g77[] = {
+	{
+		.name = "l2_access",
+		.default_value = 710800,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_msg",
+		.default_value = 2375300,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_MSG,
+	},
+	{
+		.name = "exec_instr_fma",
+		.default_value = 656100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_FMA,
+	},
+	{
+		.name = "tex_filt_num_operations",
+		.default_value = 318800,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 172800,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+
+#define IPA_POWER_MODEL_OPS(gpu, init_token) \
+	const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
+		.name = "mali-" #gpu "-power-model", \
+		.init = kbase_ ## init_token ## _power_model_init, \
+		.term = kbase_ipa_vinstr_common_model_term, \
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
+	}; \
+	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+
+#define STANDARD_POWER_MODEL(gpu, reference_voltage) \
+	static int kbase_ ## gpu ## _power_model_init(\
+			struct kbase_ipa_model *model) \
+	{ \
+		BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \
+				KBASE_IPA_MAX_GROUP_DEF_NUM); \
+		return kbase_ipa_vinstr_common_model_init(model, \
+				ipa_groups_def_ ## gpu, \
+				ARRAY_SIZE(ipa_groups_def_ ## gpu), \
+				kbase_g7x_get_active_cycles, \
+				(reference_voltage)); \
+	} \
+	IPA_POWER_MODEL_OPS(gpu, gpu)
+
+#define ALIAS_POWER_MODEL(gpu, as_gpu) \
+	IPA_POWER_MODEL_OPS(gpu, as_gpu)
+
+STANDARD_POWER_MODEL(g71, 800);
+STANDARD_POWER_MODEL(g72, 800);
+STANDARD_POWER_MODEL(g76, 800);
+STANDARD_POWER_MODEL(g52_r1, 1000);
+STANDARD_POWER_MODEL(g51, 1000);
+STANDARD_POWER_MODEL(g77, 1000);
+
+/* g52 is an alias of g76 (TNOX) for IPA */
+ALIAS_POWER_MODEL(g52, g76);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100755
index 0000000..bf27180
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,614 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tHEx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tSIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tDVx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tNOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tGOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tTRx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tNAx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tBEx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tULx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tDUx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tODx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tIDx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tVAx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tEGx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100755
index 0000000..7964c22
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,1409 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_TGOX_R1_1234,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TSIX_1792,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_3076,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TSIX_1792,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TSIX_1792,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TGOX_R1_1234,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_3076,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_3076,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_3076,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_3076,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tULx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tULx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tODx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tIDx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tIDx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tEGx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tEGx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100755
index 0000000..6caf36c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1767 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined(PAGE_MASK) && defined(PAGE_SHIFT)
+#define LOCAL_PAGE_SHIFT PAGE_SHIFT
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/* Physical memory group ID for normal usage.
+ */
+#define BASE_MEM_GROUP_DEFAULT (0)
+
+/* Number of physical memory groups.
+ */
+#define BASE_MEM_GROUP_COUNT (16)
+
+/**
+ * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD),
+ * which defines a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/* Memory allocation, access/hint flags.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+/* Will be permanently mapped in kernel space.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
+
+/* The allocation will completely reside within the same 4GB chunk in the GPU
+ * virtual space.
+ * Since this flag is primarily required only for the TLS memory which will
+ * not be used to contain executable code and also not used for Tiler heap,
+ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
+ */
+#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
+
+/* Userspace is not allowed to free this memory.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
+
+#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* Should be cached on the CPU
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the alloc
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Secure memory
+ */
+#define BASE_MEM_SECURE ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/**
+ * Bit 19 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ */
+#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+#define BASE_MEM_MAYBE_RESERVED_BIT_19 BASE_MEM_RESERVED_BIT_19
+
+/**
+ * Memory starting from the end of the initial commit is aligned to 'extent'
+ * pages, where 'extent' must be a power of 2 and no more than
+ * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
+
+/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu mode.
+ * Some components within the GPU might only be able to access memory that is
+ * GPU cacheable. Refer to the specific GPU implementation for more details.
+ * The 3 shareability flags will be ignored for GPU uncached memory.
+ * If used while importing USER_BUFFER type memory, then the import will fail
+ * if the memory is not aligned to GPU and CPU cache line width.
+ */
+#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
+
+/*
+ * Bits [22:25] for group_id (0~15).
+ *
+ * In user space, inline function base_mem_group_id_set() can be used with
+ * numeric value (0~15) to generate a specific memory group ID.
+ *
+ * group_id is packed into in.flags of kbase_ioctl_mem_alloc to be delivered to
+ * kernel space via ioctl and then kernel driver can use inline function
+ * base_mem_group_id_get() to extract group_id from flags.
+ */
+#define BASEP_MEM_GROUP_ID_SHIFT 22
+#define BASE_MEM_GROUP_ID_MASK \
+	((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
+
+/**
+ * Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 26
+
+/* A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/**
+ * base_mem_group_id_get() - Get group ID from flags
+ * @flags: Flags to pass to base_mem_alloc
+ *
+ * This inline function extracts the encoded group ID from flags
+ * and converts it into numeric value (0~15).
+ *
+ * Return: group ID(0~15) extracted from the parameter
+ */
+static inline int base_mem_group_id_get(base_mem_alloc_flags flags)
+{
+	LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0);
+	return (int)((flags & BASE_MEM_GROUP_ID_MASK) >>
+			BASEP_MEM_GROUP_ID_SHIFT);
+}
+
+/**
+ * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags
+ * @id: group ID(0~15) you want to encode
+ *
+ * This inline function encodes specific group ID into base_mem_alloc_flags.
+ * Parameter 'id' should lie in-between 0 to 15.
+ *
+ * Return: base_mem_alloc_flags with the group ID (id) encoded
+ *
+ * The return value can be combined with other flags against base_mem_alloc
+ * to identify a specific memory group.
+ */
+static inline base_mem_alloc_flags base_mem_group_id_set(int id)
+{
+	LOCAL_ASSERT(id >= 0);
+	LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT);
+
+	return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) &
+		BASE_MEM_GROUP_ID_MASK;
+}
+
+/* A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+
+/* A mask of all currently reserved flags
+ */
+#define BASE_MEM_FLAGS_RESERVED \
+	(BASE_MEM_RESERVED_BIT_8 | BASE_MEM_MAYBE_RESERVED_BIT_19)
+
+/* A mask of all the flags which are only valid for allocations within kbase,
+ * and may not be passed from user space.
+ */
+#define BASEP_MEM_FLAGS_KERNEL_ONLY \
+	(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE)
+
+/* A mask of all the flags that can be returned via the base_mem_get_flags()
+ * interface.
+ */
+#define BASE_MEM_FLAGS_QUERYABLE \
+	(BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \
+		BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \
+		BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \
+		BASEP_MEM_FLAGS_KERNEL_ONLY))
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	/**
+	 * Import type with value 1 is deprecated.
+	 */
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	address of imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	u64 ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-48<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+/* Mask to detect 4GB boundary (in page units) alignment */
+#define BASE_MEM_PFN_MASK_4GB  (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
+
+/**
+ * Limit on the 'extent' parameter for an allocation with the
+ * BASE_MEM_TILER_ALIGN_TOP flag set
+ *
+ * This is the same as the maximum limit for a Buffer Descriptor's chunk size
+ */
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \
+		(21u - (LOCAL_PAGE_SHIFT))
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \
+		(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2))
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
+#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
+
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completely unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
+ * initial commit is aligned to 'extent' pages, where 'extent' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP  (1 << 0)
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ * @bin_id:                     The JIT allocation bin, used in conjunction with
+ *                              @max_allocations to limit the number of each
+ *                              type of JIT allocation.
+ * @max_allocations:            The maximum number of allocations allowed within
+ *                              the bin specified by @bin_id. Should be the same
+ *                              for all JIT allocations within the same bin.
+ * @flags:                      flags specifying the special requirements for
+ *                              the JIT allocation.
+ * @padding:                    Expansion space - should be initialised to zero
+ * @usage_id:                   A hint about which allocation should be reused.
+ *                              The kernel should attempt to use a previous
+ *                              allocation with the same usage_id
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+	u8 bin_id;
+	u8 max_allocations;
+	u8 flags;
+	u8 padding[2];
+	u16 usage_id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/**
+ * No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/**
+ * Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  ((base_jd_core_req)1 << 1)
+#define BASE_JD_REQ_T   ((base_jd_core_req)1 << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  ((base_jd_core_req)1 << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   ((base_jd_core_req)1 << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and
+ * BASE_JD_REQ_SOFT_EVENT_WAIT.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/* 0x4 RESERVED for now */
+
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a single or multiple JIT allocations through a list
+ * of @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom. The number of @base_jit_alloc_info structures present in the
+ * list is passed via the nr_extres element of the atom
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a single or multiple JIT allocations created by
+ * @BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the JIT
+ * allocations is passed via the jc element of the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
+ * the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
+ * the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/*
+ * Returns non-zero value if core requirements passed define a soft job or
+ * a dependency only job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
+	((core_req & BASE_JD_REQ_SOFT_JOB) || \
+	(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling after the atoms have had dependencies
+ * resolved. For example, a low priority atom that has had its dependencies
+ * resolved might run before a higher priority atom that has not had its
+ * dependencies resolved.
+ *
+ * In general, fragment atoms do not affect non-fragment atoms with
+ * lower priorities, and vice versa. One exception is that there is only one
+ * priority value for each context. So a high-priority (e.g.) fragment atom
+ * could increase its context priority, causing its non-fragment atoms to also
+ * be scheduled sooner.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ *
+ * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
+ * scheduled between contexts. The default value, 0, will cause higher-priority
+ * atoms to be scheduled first, regardless of their context. The value 1 will
+ * use a round-robin algorithm when deciding which context's atoms to schedule
+ * next, so higher-priority atoms can only preempt lower priority atoms within
+ * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
+ * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
+ * In order to keep the size of the structure same, padding field has been adjusted
+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
+ * for possible future use. */
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	u64 extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources or JIT allocations */
+	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[1];
+	base_jd_core_req core_req;          /**< core requirements */
+} base_jd_atom_v2;
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @ref base_jd_submit and @ref base_jd_submit
+ * has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ *
+ * @pre @p fence must reference a @ref base_fence successfully initialized by
+ *      calling @ref base_fence_init.
+ * @pre @p fence was @e not initialized by calling @ref base_fence_import, nor
+ *      is it associated with a fence-trigger job that was already submitted
+ *      by calling @ref base_jd_submit.
+ * @post @p atom can be submitted by calling @ref base_jd_submit.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @ref base_jd_submit and
+ * @ref base_jd_submit has returned.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ *
+ * @pre @p fence must reference a @ref base_fence successfully initialized by
+ *      calling @ref base_fence_import, or it must be associated with a
+ *      fence-trigger job that was already submitted by calling
+ *      @ref base_jd_submit.
+ * @post @p atom can be submitted by calling @ref base_jd_submit.
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field
+ * of @ref base_jd_atom.
+ *
+ * It must not occupy the same CPU cache line(s) as any neighboring data.
+ * This is to avoid cases where access to pages containing the structure
+ * is shared between cached and un-cached memory regions, which would
+ * cause memory corruption.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[36];
+} base_dump_cpu_gpu_counters;
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistent guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algorithm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * required for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+	 * No defined values, but starts at 0 and increases by one for each
+	 * release status (alpha, beta, EAC, etc.).
+	 * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/* The maximum GPU frequency. Reported to applications by
+	 * clGetDeviceInfo()
+	 */
+	u32 gpu_freq_khz_max;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+
+	/**
+	 * The number of execution engines.
+	 */
+	u8 num_exec_engines;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[3];
+	u32 tls_alloc;              /* Number of threads per core that TLS must
+				     * be allocated for
+				     */
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 stack_present;
+
+	u32 l2_features;
+	u32 core_features;
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+
+	u32 thread_tls_alloc;
+};
+
+/**
+ * Return structure for base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this data structure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+typedef u32 base_context_create_flags;
+
+/** No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
+
+/** Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/** Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
+	((base_context_create_flags)1 << 1)
+
+
+/* Bit-shift used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
+
+/* Bitmask used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
+	((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+
+/* Bitpattern describing the base_context_create_flags that can be
+ * passed to the kernel
+ */
+#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
+	(BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
+	 BASEP_CONTEXT_MMU_GROUP_ID_MASK)
+
+/* Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
+ */
+#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+/** Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \
+	((base_context_create_flags)(1 << 31))
+
+/**
+ * base_context_mmu_group_id_set - Encode a memory group ID in
+ *                                 base_context_create_flags
+ *
+ * Memory allocated for GPU page tables will come from the specified group.
+ *
+ * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1).
+ *
+ * Return: Bitmask of flags to pass to base_context_init.
+ */
+static inline base_context_create_flags base_context_mmu_group_id_set(
+	int const group_id)
+{
+	LOCAL_ASSERT(group_id >= 0);
+	LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT);
+	return BASEP_CONTEXT_MMU_GROUP_ID_MASK &
+		((base_context_create_flags)group_id <<
+		BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
+}
+
+/**
+ * base_context_mmu_group_id_get - Decode a memory group ID from
+ *                                 base_context_create_flags
+ *
+ * Memory allocated for GPU page tables will come from the returned group.
+ *
+ * @flags: Bitmask of flags to pass to base_context_init.
+ *
+ * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1).
+ */
+static inline int base_context_mmu_group_id_get(
+	base_context_create_flags const flags)
+{
+	LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS));
+	return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >>
+			BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
+}
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/** @} end group base_api */
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact. */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
+		BASE_TLSTREAM_JOB_DUMPING_ENABLED)
+
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100755
index 0000000..52c8a4f
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,57 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100755
index 0000000..e3f209c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,717 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+#include <linux/sched/mm.h>
+#endif
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_linux.h>
+
+/*
+ * Include mali_kbase_defs.h first as this provides types needed by other local
+ * header files.
+ */
+#include "mali_kbase_defs.h"
+
+#include "mali_kbase_context.h"
+#include "mali_kbase_strings.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_ioctl.h"
+
+#include "ipa/mali_kbase_ipa.h"
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+
+
+#ifndef u64_to_user_ptr
+/* Introduced in Linux v4.6 */
+#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
+#endif
+
+
+/* Physical memory group ID for a special page which can alias several regions.
+ */
+#define KBASE_MEM_GROUP_SINK BASE_MEM_GROUP_DEFAULT
+
+/*
+ * Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+/**
+ * kbase_context_get_unmapped_area() - get an address range which is currently
+ *                                     unmapped.
+ * @kctx: A kernel base context (which has its own GPU address space).
+ * @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed
+ *        as Mali GPU driver decides about the mapping).
+ * @len: Length of the address range.
+ * @pgoff: Page offset within the GPU address space of the kbase context.
+ * @flags: Flags for the allocation.
+ *
+ * Finds the unmapped address range which satisfies requirements specific to
+ * GPU and those provided by the call parameters.
+ *
+ * 1) Requirement for allocations greater than 2MB:
+ * - alignment offset is set to 2MB and the alignment mask to 2MB decremented
+ * by 1.
+ *
+ * 2) Requirements imposed for the shader memory alignment:
+ * - alignment is decided by the number of GPU pc bits which can be read from
+ * GPU properties of the device associated with this kbase context; alignment
+ * offset is set to this value in bytes and the alignment mask to the offset
+ * decremented by 1.
+ * - allocations must not to be at 4GB boundaries. Such cases are indicated
+ * by the flag KBASE_REG_GPU_NX not being set (check the flags of the kbase
+ * region). 4GB boundaries can be checked against @ref BASE_MEM_MASK_4GB.
+ *
+ * 3) Requirements imposed for tiler memory alignment, cases indicated by
+ * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase
+ * region):
+ * - alignment offset is set to the difference between the kbase region
+ * extent (converted from the original value in pages to bytes) and the kbase
+ * region initial_commit (also converted from the original value in pages to
+ * bytes); alignment mask is set to the kbase region extent in bytes and
+ * decremented by 1.
+ *
+ * Return: if successful, address of the unmapped area aligned as required;
+ *         error code (negative) in case of failure;
+ */
+unsigned long kbase_context_get_unmapped_area(struct kbase_context *kctx,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+
+/**
+ * kbase_jd_submit - Submit atoms to the job dispatcher
+ *
+ * @kctx: The kbase context to submit to
+ * @user_addr: The address in user space of the struct base_jd_atom_v2 array
+ * @nr_atoms: The number of atoms in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6)
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_jd_submit(struct kbase_context *kctx,
+		void __user *user_addr, u32 nr_atoms, u32 stride,
+		bool uk6_atom);
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_job_done - Process completed jobs from job interrupt
+ * @kbdev: Pointer to the kbase device.
+ * @done: Bitmask of done or failed jobs, from JOB_IRQ_STAT register
+ *
+ * This function processes the completed, or failed, jobs from the GPU job
+ * slots, for the bits set in the @done bitmask.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+/**
+ * kbasep_jit_alloc_validate() - Validate the JIT allocation info.
+ *
+ * @kctx:	Pointer to the kbase context within which the JIT
+ *		allocation is to be validated.
+ * @info:	Pointer to struct @base_jit_alloc_info
+ *			which is to be validated.
+ * @return: 0 if jit allocation is valid; negative error code otherwise
+ */
+int kbasep_jit_alloc_validate(struct kbase_context *kctx,
+					struct base_jit_alloc_info *info);
+/**
+ * kbase_free_user_buffer() - Free memory allocated for struct
+ *		@kbase_debug_copy_buffer.
+ *
+ * @buffer:	Pointer to the memory location allocated for the object
+ *		of the type struct @kbase_debug_copy_buffer.
+ */
+static inline void kbase_free_user_buffer(
+		struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+/**
+ * kbase_mem_copy_from_extres_page() - Copy pages from external resources.
+ *
+ * @kctx:		kbase context within which the copying is to take place.
+ * @extres_pages:	Pointer to the pages which correspond to the external
+ *			resources from which the copying will take place.
+ * @pages:		Pointer to the pages to which the content is to be
+ *			copied from the provided external resources.
+ * @nr_pages:		Number of pages to copy.
+ * @target_page_nr:	Number of target pages which will be used for copying.
+ * @offset:		Offset into the target pages from which the copying
+ *			is to be performed.
+ * @to_copy:		Size of the chunk to be copied, in bytes.
+ */
+void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy);
+/**
+ * kbase_mem_copy_from_extres() - Copy from external resources.
+ *
+ * @kctx:	kbase context within which the copying is to take place.
+ * @buf_data:	Pointer to the information about external resources:
+ *		pages pertaining to the external resource, number of
+ *		pages to copy.
+ */
+int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data);
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom);
+#endif
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
+
+void kbasep_soft_job_timeout_worker(struct timer_list *timer);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * kbase_pm_is_active - Determine whether the GPU is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This takes into account whether there is an active context reference.
+ *
+ * Return: true if the GPU is active, false otherwise
+ */
+static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
+{
+	return kbdev->pm.active_count > 0;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset. Increasing the disjoint state also increases
+ * the count of disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/* kbase_io_history_init - initialize data struct for register access history
+ *
+ * @kbdev The register history to initialize
+ * @n The number of register accesses that the buffer could hold
+ *
+ * @return 0 if successfully initialized, failure otherwise
+ */
+int kbase_io_history_init(struct kbase_io_history *h, u16 n);
+
+/* kbase_io_history_term - uninit all resources for the register access history
+ *
+ * @h The register history to terminate
+ */
+void kbase_io_history_term(struct kbase_io_history *h);
+
+/* kbase_io_history_dump - print the register history to the kernel ring buffer
+ *
+ * @kbdev Pointer to kbase_device containing the register history to dump
+ */
+void kbase_io_history_dump(struct kbase_device *kbdev);
+
+/**
+ * kbase_io_history_resize - resize the register access history buffer.
+ *
+ * @h: Pointer to a valid register history to resize
+ * @new_size: Number of accesses the buffer could hold
+ *
+ * A successful resize will clear all recent register accesses.
+ * If resizing fails for any reason (e.g., could not allocate memory, invalid
+ * buffer size) then the original buffer will be kept intact.
+ *
+ * @return 0 if the buffer was resized, failure otherwise
+ */
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbase_io_history_resize CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100755
index 0000000..118511a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	struct tagged_addr *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (kbase_is_region_invalid_or_free(region))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = as_page(page_array[page_index]);
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = as_page(page_array[page_index + 1]);
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = as_page(page_array[page_index]);
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = as_page(page_array[page_index + 1]);
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100755
index 0000000..379a05a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+/**
+ * kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices
+ * @katom: atom representing the fragment job for which the WA has to be applied
+ *
+ * This workaround is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, it can happen
+ * that the restart index is out of bounds and the rerun causes a tile range
+ * fault. If this happens we try to clamp the restart index to a correct value.
+ */
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
new file mode 100755
index 0000000..6f638cc
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+
+static int kbase_as_fault_read(struct seq_file *sfile, void *data)
+{
+	uintptr_t as_no = (uintptr_t) sfile->private;
+
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	struct kbase_device *kbdev = NULL;
+
+	kbdev_list = kbase_dev_list_get();
+
+	list_for_each(entry, kbdev_list) {
+		kbdev = list_entry(entry, struct kbase_device, entry);
+
+		if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+
+			/* don't show this one again until another fault occors */
+			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
+
+			/* output the last page fault addr */
+			seq_printf(sfile, "%llu\n",
+				   (u64) kbdev->as[as_no].pf_data.addr);
+		}
+
+	}
+
+	kbase_dev_list_put(kbdev_list);
+
+	return 0;
+}
+
+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_as_fault_read, in->i_private);
+}
+
+static const struct file_operations as_fault_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_as_fault_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ *  Initialize debugfs entry for each address space
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	uint i;
+	char as_name[64];
+	struct dentry *debugfs_directory;
+
+	kbdev->debugfs_as_read_bitmap = 0ULL;
+
+	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].pf_data.addr) == sizeof(u64));
+
+	debugfs_directory = debugfs_create_dir("address_spaces",
+					       kbdev->mali_debugfs_directory);
+
+	if (debugfs_directory) {
+		for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
+			debugfs_create_file(as_name, S_IRUGO,
+					    debugfs_directory,
+					    (void *)(uintptr_t)i,
+					    &as_fault_fops);
+		}
+	} else {
+		dev_warn(kbdev->dev,
+			 "unable to create address_spaces debugfs directory");
+	}
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+	return;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
new file mode 100755
index 0000000..496d8b1
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
+#define _KBASE_AS_FAULT_DEBUG_FS_H
+
+/**
+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
+ *
+ * @kbdev: Pointer to kbase_device
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs
+ *
+ * @kbdev: Pointer to kbase_device
+ * @as_no: The address space the fault occurred on
+ */
+static inline void
+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_MALI_DEBUG */
+	return;
+}
+
+#endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h
new file mode 100644
index 0000000..2c11093
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h
@@ -0,0 +1,41 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ *//* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ */
+
+#ifndef _KBASE_BITS_H_
+#define _KBASE_BITS_H_
+
+#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
+#include <linux/bits.h>
+#else
+#include <linux/bitops.h>
+#endif
+
+#endif /* _KBASE_BITS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100755
index 0000000..27a03cf
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled
+ * KBASE_REG_GPU_CACHED: GPU cache should be enabled
+ *
+ * NOTE: Some components within the GPU might only be able to access memory
+ * that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for
+ * more details.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (!(flags & BASE_MEM_UNCACHED_GPU))
+		cache_flags |= KBASE_REG_GPU_CACHED;
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100755
index 0000000..8a1e529
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100755
index 0000000..ce7070d
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,48 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100755
index 0000000..1637fcb
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,299 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <linux/mm.h>
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+#include <linux/rbtree.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+#ifndef CONFIG_OF
+/**
+ * kbase_platform_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_register(void);
+
+/**
+ * kbase_platform_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_register()
+ */
+void kbase_platform_unregister(void);
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100755
index 0000000..447e059
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,217 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 * Restricting ID width will reduce performance & bus load due to GPU.
+	 */
+	KBASE_3BIT_AID_32 = 0x0,
+
+	/* Restrict GPU to 7/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_28 = 0x1,
+
+	/* Restrict GPU to 3/4 of maximum Address ID count. */
+	KBASE_3BIT_AID_24 = 0x2,
+
+	/* Restrict GPU to 5/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_20 = 0x3,
+
+	/* Restrict GPU to 1/2 of maximum Address ID count.  */
+	KBASE_3BIT_AID_16 = 0x4,
+
+	/* Restrict GPU to 3/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_12 = 0x5,
+
+	/* Restrict GPU to 1/4 of maximum Address ID count. */
+	KBASE_3BIT_AID_8  = 0x6,
+
+	/* Restrict GPU to 1/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_4  = 0x7
+};
+
+/**
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/**
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/**
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/**
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/**
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/**
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/**
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/**
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/**
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/**
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/**
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/**
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+/**
+ * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
+ * this isn't available, so we simply define a dummy value here. If devfreq
+ * is enabled the value will be read from there, otherwise this should be
+ * overridden by defining GPU_FREQ_KHZ_MAX in the platform file.
+ */
+#define DEFAULT_GPU_FREQ_KHZ_MAX (5000)
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100755
index 0000000..ed482e1
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,331 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_dma_fence.h>
+#include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_mem_pool_group.h>
+#include <mali_kbase_tracepoints.h>
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat,
+	base_context_create_flags const flags,
+	unsigned long const api_version,
+	struct file *const filp)
+{
+	struct kbase_context *kctx;
+	int err;
+	struct page *p;
+	struct kbasep_js_kctx_info *js_kctx_info = NULL;
+	unsigned long irq_flags = 0;
+
+	if (WARN_ON(!kbdev))
+		goto out;
+
+	/* Validate flags */
+	if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS)))
+		goto out;
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	atomic_set(&kctx->refcount, 0);
+	if (is_compat)
+		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
+#if defined(CONFIG_64BIT)
+	else
+		kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
+#endif /* !defined(CONFIG_64BIT) */
+
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_group_init(&kctx->mem_pools, kbdev,
+		&kbdev->mem_pool_defaults, &kbdev->mem_pools);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_both_pools;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	atomic_set(&kctx->drain_pending, 0);
+
+	mutex_init(&kctx->reg_lock);
+
+	spin_lock_init(&kctx->mem_partials_lock);
+	INIT_LIST_HEAD(&kctx->mem_partials);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kbdev, &kctx->mmu, kctx,
+		base_context_mmu_group_id_get(flags));
+	if (err)
+		goto term_dma_fence;
+
+	p = kbase_mem_alloc_page(
+		&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]);
+	if (!p)
+		goto no_sink_page;
+	kctx->aliasing_sink_page = as_tagged(page_to_phys(p));
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->legacy_hwcnt_lock);
+
+	kbase_timer_setup(&kctx->soft_job_timeout,
+			  kbasep_soft_job_timeout_worker);
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_add(&kctx->kctx_list_link, &kbdev->kctx_list);
+	KBASE_TLSTREAM_TL_NEW_CTX(kbdev, kctx, kctx->id, (u32)(kctx->tgid));
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	kctx->api_version = api_version;
+	kctx->filp = filp;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(
+		&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], p, false);
+no_sink_page:
+	kbase_mmu_term(kbdev, &kctx->mmu);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_both_pools:
+	kbase_mem_pool_group_term(&kctx->mem_pools);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_device *kbdev,
+		struct kbase_va_region *reg)
+{
+	dev_dbg(kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+	unsigned long flags;
+	struct page *p;
+
+	if (WARN_ON(!kctx))
+		return;
+
+	kbdev = kctx->kbdev;
+	if (WARN_ON(!kbdev))
+		return;
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx);
+	list_del(&kctx->kctx_list_link);
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_mem_pool_group_mark_dying(&kctx->mem_pools);
+
+	kbase_jd_zap_context(kctx);
+
+	/* We have already waited for the jobs to complete (and hereafter there
+	 * can be no more submissions for the context). However the wait could
+	 * have timedout and there could still be work items in flight that
+	 * would do the completion processing of jobs.
+	 * kbase_jd_exit() will destroy the 'job_done_wq'. And destroying the wq
+	 * will cause it do drain and implicitly wait for those work items to
+	 * complete.
+	 */
+	kbase_jd_exit(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	/* Removing the rest of the debugfs entries here as we want to keep the
+	 * atom debugfs interface alive until all atoms have completed. This
+	 * is useful for debugging hung contexts. */
+	debugfs_remove_recursive(kctx->kctx_dentry);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	kbase_event_cleanup(kctx);
+
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	p = as_page(kctx->aliasing_sink_page);
+	kbase_mem_pool_free(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK],
+		p, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kbdev, kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_dma_fence_term(kctx);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_remove_ctx(kctx);
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_term(kbdev, &kctx->mmu);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+
+	kbase_mem_pool_group_term(&kctx->mem_pools);
+
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+
+	kbase_pm_context_idle(kbdev);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
new file mode 100755
index 0000000..5037b4e
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
@@ -0,0 +1,125 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017, 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ *
+ * @kbdev:       Object representing an instance of GPU platform device,
+ *               allocated from the probe method of the Mali driver.
+ * @is_compat:   Force creation of a 32-bit context
+ * @flags:       Flags to set, which shall be any combination of
+ *               BASEP_CONTEXT_CREATE_KERNEL_FLAGS.
+ * @api_version: Application program interface version, as encoded in
+ *               a single integer by the KBASE_API_VERSION macro.
+ * @filp:        Pointer to the struct file corresponding to device file
+ *               /dev/malixx instance, passed to the file's open method.
+ *
+ * Up to one context can be created for each client that opens the device file
+ * /dev/malixx. Context creation is deferred until a special ioctl() system call
+ * is made on the device file. Each context has its own GPU address space.
+ *
+ * Return: new kbase context or NULL on failure
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat,
+	base_context_create_flags const flags,
+	unsigned long api_version,
+	struct file *filp);
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+					enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+	/*
+	 * Earlier kernel versions doesn't have atomic_andnot() or
+	 * atomic_and(). atomic_clear_mask() was only available on some
+	 * architectures and removed on arm in v3.13 on arm and arm64.
+	 *
+	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+	 * when atomic_andnot() becomes available.
+	 */
+	int old, new;
+
+	do {
+		old = atomic_read(&kctx->flags);
+		new = old & ~flag;
+
+	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+	atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to set
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100755
index 0000000..b2b5a08
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,4599 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#include <backend/gpu/mali_kbase_devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <ipa/mali_kbase_ipa_debugfs.h>
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#include "mali_kbase_debugfs_helper.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include "mali_kbase_regs_history_debugfs.h"
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_reset_gpu.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_ioctl.h"
+#include "mali_kbase_hwcnt_context.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_legacy.h"
+#include "mali_kbase_vinstr.h"
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+#include "mali_kbase_gwt.h"
+#endif
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task/in_compat_syscall */
+#include <linux/mman.h>
+#include <linux/version.h>
+#include <mali_kbase_hw.h>
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+
+#include <mali_kbase_config.h>
+
+
+#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE)
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <linux/pm_runtime.h>
+
+#include <mali_kbase_timeline.h>
+
+#include <mali_kbase_as_fault_debugfs.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static int kbase_dev_nr;
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+
+/**
+ * kbase_file_new - Create an object representing a device file
+ *
+ * @kbdev:  An instance of the GPU platform device, allocated from the probe
+ *          method of the driver.
+ * @filp:   Pointer to the struct file corresponding to device file
+ *          /dev/malixx instance, passed to the file's open method.
+ *
+ * In its initial state, the device file has no context (i.e. no GPU
+ * address space) and no API version number. Both must be assigned before
+ * kbase_file_get_kctx_if_setup_complete() can be used successfully.
+ *
+ * @return Address of an object representing a simulated device file, or NULL
+ *         on failure.
+ */
+static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev,
+	struct file *const filp)
+{
+	struct kbase_file *const kfile = kmalloc(sizeof(*kfile), GFP_KERNEL);
+
+	if (kfile) {
+		kfile->kbdev = kbdev;
+		kfile->filp = filp;
+		kfile->kctx = NULL;
+		kfile->api_version = 0;
+		atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN);
+	}
+	return kfile;
+}
+
+/**
+ * kbase_file_get_api_version - Set the application programmer interface version
+ *
+ * @kfile:  A device file created by kbase_file_new()
+ * @major:  Major version number (must not exceed 12 bits)
+ * @minor:  Major version number (must not exceed 12 bits)
+ *
+ * An application programmer interface (API) version must be specified
+ * before calling kbase_file_create_kctx(), otherwise an error is returned.
+ *
+ * If a version number was already set for the given @kfile (or is in the
+ * process of being set by another thread) then an error is returned.
+ *
+ * Return: 0 if successful, otherwise a negative error code.
+ */
+static int kbase_file_set_api_version(struct kbase_file *const kfile,
+	u16 const major, u16 const minor)
+{
+	if (WARN_ON(!kfile))
+		return -EINVAL;
+
+	/* setup pending, try to signal that we'll do the setup,
+	 * if setup was already in progress, err this call
+	 */
+	if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_VSN,
+		KBASE_FILE_VSN_IN_PROGRESS) != KBASE_FILE_NEED_VSN)
+		return -EPERM;
+
+	/* save the proposed version number for later use */
+	kfile->api_version = KBASE_API_VERSION(major, minor);
+
+	atomic_set(&kfile->setup_state, KBASE_FILE_NEED_CTX);
+	return 0;
+}
+
+/**
+ * kbase_file_get_api_version - Get the application programmer interface version
+ *
+ * @kfile:  A device file created by kbase_file_new()
+ *
+ * Return: The version number (encoded with KBASE_API_VERSION) or 0 if none has
+ *         been set.
+ */
+static unsigned long kbase_file_get_api_version(struct kbase_file *const kfile)
+{
+	if (WARN_ON(!kfile))
+		return 0;
+
+	if (atomic_read(&kfile->setup_state) < KBASE_FILE_NEED_CTX)
+		return 0;
+
+	return kfile->api_version;
+}
+
+/**
+ * kbase_file_create_kctx - Create a kernel base context
+ *
+ * @kfile:  A device file created by kbase_file_new()
+ * @flags:  Flags to set, which can be any combination of
+ *          BASEP_CONTEXT_CREATE_KERNEL_FLAGS.
+ *
+ * This creates a new context for the GPU platform device instance that was
+ * specified when kbase_file_new() was called. Each context has its own GPU
+ * address space. If a context was already created for the given @kfile (or is
+ * in the process of being created for it by another thread) then an error is
+ * returned.
+ *
+ * An API version number must have been set by kbase_file_set_api_version()
+ * before calling this function, otherwise an error is returned.
+ *
+ * Return: 0 if a new context was created, otherwise a negative error code.
+ */
+static int kbase_file_create_kctx(struct kbase_file *kfile,
+	base_context_create_flags flags);
+
+/**
+ * kbase_file_get_kctx_if_setup_complete - Get a kernel base context
+ *                                         pointer from a device file
+ *
+ * @kfile: A device file created by kbase_file_new()
+ *
+ * This function returns an error code (encoded with ERR_PTR) if no context
+ * has been created for the given @kfile. This makes it safe to use in
+ * circumstances where the order of initialization cannot be enforced, but
+ * only if the caller checks the return value.
+ *
+ * Return: Address of the kernel base context associated with the @kfile, or
+ *         NULL if no context exists.
+ */
+static struct kbase_context *kbase_file_get_kctx_if_setup_complete(
+	struct kbase_file *const kfile)
+{
+	if (WARN_ON(!kfile) ||
+		atomic_read(&kfile->setup_state) != KBASE_FILE_COMPLETE ||
+		WARN_ON(!kfile->kctx))
+		return NULL;
+
+	return kfile->kctx;
+}
+
+/**
+ * kbase_file_delete - Destroy an object representing a device file
+ *
+ * @kfile: A device file created by kbase_file_new()
+ *
+ * If any context was created for the @kfile then it is destroyed.
+ */
+static void kbase_file_delete(struct kbase_file *const kfile)
+{
+	struct kbase_device *kbdev = NULL;
+
+	if (WARN_ON(!kfile))
+		return;
+
+	kfile->filp->private_data = NULL;
+	kbdev = kfile->kbdev;
+
+	if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) {
+		struct kbase_context *kctx = kfile->kctx;
+
+#ifdef CONFIG_DEBUG_FS
+		kbasep_mem_profile_debugfs_remove(kctx);
+#endif
+
+		mutex_lock(&kctx->legacy_hwcnt_lock);
+		/* If this client was performing hardware counter dumping and
+		 * did not explicitly detach itself, destroy it now
+		 */
+		kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli);
+		kctx->legacy_hwcnt_cli = NULL;
+		mutex_unlock(&kctx->legacy_hwcnt_lock);
+
+		kbase_destroy_context(kctx);
+
+		dev_dbg(kbdev->dev, "deleted base context\n");
+	}
+
+	kbase_release_device(kbdev);
+
+	kfree(kfile);
+}
+
+static int kbase_api_handshake(struct kbase_file *kfile,
+			       struct kbase_ioctl_version_check *version)
+{
+	int err = 0;
+
+	switch (version->major) {
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				       (int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version
+		 */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+
+	/* save the proposed version number for later use */
+	err = kbase_file_set_api_version(kfile, version->major, version->minor);
+	if (unlikely(err))
+		return err;
+
+	/* For backward compatibility, we may need to create the context before
+	 * the flags have been set. Originally it was created on file open
+	 * (with job submission disabled) but we don't support that usage.
+	 */
+	if (kbase_file_get_api_version(kfile) < KBASE_API_VERSION(11, 15))
+		err = kbase_file_create_kctx(kfile,
+			BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED);
+
+	return err;
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ *
+ * @MALI_ERROR_NONE: Success
+ * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver
+ * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure
+ * @MALI_ERROR_FUNCTION_FAILED: Generic error code
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	/* Bit number 2 was earlier assigned to the runtime-pm initialization
+	 * stage (which has been merged with the backend_early stage).
+	 */
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_hwcnt_gpu_iface = (1u << 6),
+	inited_hwcnt_gpu_ctx = (1u << 7),
+	inited_hwcnt_gpu_virt = (1u << 8),
+	inited_vinstr = (1u << 9),
+	inited_backend_late = (1u << 10),
+	inited_device = (1u << 11),
+	inited_job_fault = (1u << 13),
+	inited_sysfs_group = (1u << 14),
+	inited_misc_register = (1u << 15),
+	inited_get_device = (1u << 16),
+	inited_dev_list = (1u << 17),
+	inited_debugfs = (1u << 18),
+	inited_gpu_device = (1u << 19),
+	inited_registers_map = (1u << 20),
+	inited_io_history = (1u << 21),
+	inited_power_control = (1u << 22),
+	inited_buslogger = (1u << 23),
+	inited_protected = (1u << 24),
+	inited_ctx_sched = (1u << 25)
+};
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strncmp(irq_res->name, "JOB", 4)) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "MMU", 4)) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "GPU", 4)) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+#ifdef CONFIG_DEBUG_FS
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \
+		!(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \
+		LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0))
+/*
+ * Older versions, before v4.6, of the kernel doesn't have
+ * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28
+ */
+static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+	char buf[4];
+
+	count = min(count, sizeof(buf) - 1);
+
+	if (copy_from_user(buf, s, count))
+		return -EFAULT;
+	buf[count] = '\0';
+
+	return strtobool(buf, res);
+}
+#endif
+
+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+	else
+		kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE);
+
+	return size;
+}
+
+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_infinite_cache_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = write_ctx_infinite_cache,
+	.read = read_ctx_infinite_cache,
+};
+
+static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf,
+		size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value) {
+#if defined(CONFIG_64BIT)
+		/* 32-bit clients cannot force SAME_VA */
+		if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+			return -EINVAL;
+		kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
+#else /* defined(CONFIG_64BIT) */
+		/* 32-bit clients cannot force SAME_VA */
+		return -EINVAL;
+#endif /* defined(CONFIG_64BIT) */
+	} else {
+		kbase_ctx_flag_clear(kctx, KCTX_FORCE_SAME_VA);
+	}
+
+	return size;
+}
+
+static ssize_t read_ctx_force_same_va(struct file *f, char __user *ubuf,
+		size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_force_same_va_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = write_ctx_force_same_va,
+	.read = read_ctx_force_same_va,
+};
+#endif /* CONFIG_DEBUG_FS */
+
+static int kbase_file_create_kctx(struct kbase_file *const kfile,
+	base_context_create_flags const flags)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx = NULL;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	if (WARN_ON(!kfile))
+		return -EINVAL;
+
+	/* setup pending, try to signal that we'll do the setup,
+	 * if setup was already in progress, err this call
+	 */
+	if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_CTX,
+		KBASE_FILE_CTX_IN_PROGRESS) != KBASE_FILE_NEED_CTX)
+		return -EPERM;
+
+	kbdev = kfile->kbdev;
+
+#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
+	kctx = kbase_create_context(kbdev, in_compat_syscall(),
+		flags, kfile->api_version, kfile->filp);
+#else
+	kctx = kbase_create_context(kbdev, is_compat_task(),
+		flags, kfile->api_version, kfile->filp);
+#endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */
+
+	/* if bad flags, will stay stuck in setup mode */
+	if (!kctx)
+		return -ENOMEM;
+
+	if (kbdev->infinite_cache_active_default)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		/* we don't treat this as a fail - just warn about it */
+		dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n");
+	} else {
+		debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
+				kctx, &kbase_infinite_cache_fops);
+		debugfs_create_file("force_same_va", 0600,
+				kctx->kctx_dentry, kctx,
+				&kbase_force_same_va_fops);
+
+		mutex_init(&kctx->mem_profile_lock);
+
+		kbasep_jd_debugfs_ctx_init(kctx);
+		kbase_debug_mem_view_init(kctx);
+
+		kbase_debug_job_fault_context_init(kctx);
+
+		kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
+
+		kbase_jit_debugfs_init(kctx);
+	}
+#endif /* CONFIG_DEBUG_FS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	kfile->kctx = kctx;
+	atomic_set(&kfile->setup_state, KBASE_FILE_COMPLETE);
+
+	return 0;
+}
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_file *kfile;
+	int ret = 0;
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kfile = kbase_file_new(kbdev, filp);
+	if (!kfile) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	filp->private_data = kfile;
+	filp->f_mode |= FMODE_UNSIGNED_OFFSET;
+
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_file *const kfile = filp->private_data;
+
+	kbase_file_delete(kfile);
+	return 0;
+}
+
+static int kbase_api_set_flags(struct kbase_file *kfile,
+		struct kbase_ioctl_set_flags *flags)
+{
+	int err = 0;
+	unsigned long const api_version = kbase_file_get_api_version(kfile);
+	struct kbase_context *kctx = NULL;
+
+	/* Validate flags */
+	if (flags->create_flags !=
+		(flags->create_flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))
+		return -EINVAL;
+
+	/* For backward compatibility, the context may have been created before
+	 * the flags were set.
+	 */
+	if (api_version >= KBASE_API_VERSION(11, 15)) {
+		err = kbase_file_create_kctx(kfile, flags->create_flags);
+	} else {
+		struct kbasep_js_kctx_info *js_kctx_info = NULL;
+		unsigned long irq_flags = 0;
+
+		/* If setup is incomplete (e.g. because the API version
+		 * wasn't set) then we have to give up.
+		 */
+		kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+		if (unlikely(!kctx))
+			return -EPERM;
+
+		js_kctx_info = &kctx->jctx.sched_info;
+		mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+		spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+		/* Translate the flags */
+		if ((flags->create_flags &
+			BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+			kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+		spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	}
+
+	return err;
+}
+
+static int kbase_api_job_submit(struct kbase_context *kctx,
+		struct kbase_ioctl_job_submit *submit)
+{
+	return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr),
+			submit->nr_atoms,
+			submit->stride, false);
+}
+
+static int kbase_api_get_gpuprops(struct kbase_context *kctx,
+		struct kbase_ioctl_get_gpuprops *get_props)
+{
+	struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props;
+	int err;
+
+	if (get_props->flags != 0) {
+		dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops");
+		return -EINVAL;
+	}
+
+	if (get_props->size == 0)
+		return kprops->prop_buffer_size;
+	if (get_props->size < kprops->prop_buffer_size)
+		return -EINVAL;
+
+	err = copy_to_user(u64_to_user_ptr(get_props->buffer),
+			kprops->prop_buffer,
+			kprops->prop_buffer_size);
+	if (err)
+		return -EFAULT;
+	return kprops->prop_buffer_size;
+}
+
+static int kbase_api_post_term(struct kbase_context *kctx)
+{
+	kbase_event_close(kctx);
+	return 0;
+}
+
+static int kbase_api_mem_alloc(struct kbase_context *kctx,
+		union kbase_ioctl_mem_alloc *alloc)
+{
+	struct kbase_va_region *reg;
+	u64 flags = alloc->in.flags;
+	u64 gpu_va;
+
+	rcu_read_lock();
+	/* Don't allow memory allocation until user space has set up the
+	 * tracking page (which sets kctx->process_mm). Also catches when we've
+	 * forked.
+	 */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	rcu_read_unlock();
+
+	if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY)
+		return -ENOMEM;
+
+	/* Force SAME_VA if a 64-bit client.
+	 * The only exception is GPU-executable memory if an EXEC_VA zone
+	 * has been initialized. In that case, GPU-executable memory may
+	 * or may not be SAME_VA.
+	 */
+	if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
+			kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) {
+		if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx))
+			flags |= BASE_MEM_SAME_VA;
+	}
+
+
+	reg = kbase_mem_alloc(kctx, alloc->in.va_pages,
+			alloc->in.commit_pages,
+			alloc->in.extent,
+			&flags, &gpu_va);
+
+	if (!reg)
+		return -ENOMEM;
+
+	alloc->out.flags = flags;
+	alloc->out.gpu_va = gpu_va;
+
+	return 0;
+}
+
+static int kbase_api_mem_query(struct kbase_context *kctx,
+		union kbase_ioctl_mem_query *query)
+{
+	return kbase_mem_query(kctx, query->in.gpu_addr,
+			query->in.query, &query->out.value);
+}
+
+static int kbase_api_mem_free(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_free *free)
+{
+	return kbase_mem_free(kctx, free->gpu_addr);
+}
+
+static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup)
+{
+	return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
+}
+
+static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_enable *enable)
+{
+	int ret;
+
+	mutex_lock(&kctx->legacy_hwcnt_lock);
+	if (enable->dump_buffer != 0) {
+		/* Non-zero dump buffer, so user wants to create the client */
+		if (kctx->legacy_hwcnt_cli == NULL) {
+			ret = kbase_hwcnt_legacy_client_create(
+				kctx->kbdev->hwcnt_gpu_virt,
+				enable,
+				&kctx->legacy_hwcnt_cli);
+		} else {
+			/* This context already has a client */
+			ret = -EBUSY;
+		}
+	} else {
+		/* Zero dump buffer, so user wants to destroy the client */
+		if (kctx->legacy_hwcnt_cli != NULL) {
+			kbase_hwcnt_legacy_client_destroy(
+				kctx->legacy_hwcnt_cli);
+			kctx->legacy_hwcnt_cli = NULL;
+			ret = 0;
+		} else {
+			/* This context has no client to destroy */
+			ret = -EINVAL;
+		}
+	}
+	mutex_unlock(&kctx->legacy_hwcnt_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_dump(struct kbase_context *kctx)
+{
+	int ret;
+
+	mutex_lock(&kctx->legacy_hwcnt_lock);
+	ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli);
+	mutex_unlock(&kctx->legacy_hwcnt_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_clear(struct kbase_context *kctx)
+{
+	int ret;
+
+	mutex_lock(&kctx->legacy_hwcnt_lock);
+	ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli);
+	mutex_unlock(&kctx->legacy_hwcnt_lock);
+
+	return ret;
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_api_hwcnt_set(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_values *values)
+{
+	gpu_model_set_dummy_prfcnt_sample(
+			(u32 __user *)(uintptr_t)values->data,
+			values->size);
+
+	return 0;
+}
+#endif
+
+static int kbase_api_disjoint_query(struct kbase_context *kctx,
+		struct kbase_ioctl_disjoint_query *query)
+{
+	query->counter = kbase_disjoint_event_get(kctx->kbdev);
+
+	return 0;
+}
+
+static int kbase_api_get_ddk_version(struct kbase_context *kctx,
+		struct kbase_ioctl_get_ddk_version *version)
+{
+	int ret;
+	int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+
+	if (version->version_buffer == 0)
+		return len;
+
+	if (version->size < len)
+		return -EOVERFLOW;
+
+	ret = copy_to_user(u64_to_user_ptr(version->version_buffer),
+			KERNEL_SIDE_DDK_VERSION_STRING,
+			sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+
+	if (ret)
+		return -EFAULT;
+
+	return len;
+}
+
+/* Defaults for legacy JIT init ioctl */
+#define DEFAULT_MAX_JIT_ALLOCATIONS 255
+#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */
+
+static int kbase_api_mem_jit_init_old(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_jit_init_old *jit_init)
+{
+	kctx->jit_version = 1;
+
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+			DEFAULT_MAX_JIT_ALLOCATIONS,
+			JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT);
+}
+
+static int kbase_api_mem_jit_init(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_jit_init *jit_init)
+{
+	int i;
+
+	kctx->jit_version = 2;
+
+	for (i = 0; i < sizeof(jit_init->padding); i++) {
+		/* Ensure all padding bytes are 0 for potential future
+		 * extension
+		 */
+		if (jit_init->padding[i])
+			return -EINVAL;
+	}
+
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+			jit_init->max_allocations, jit_init->trim_level,
+			jit_init->group_id);
+}
+
+static int kbase_api_mem_exec_init(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_exec_init *exec_init)
+{
+	return kbase_region_tracker_init_exec(kctx, exec_init->va_pages);
+}
+
+static int kbase_api_mem_sync(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_sync *sync)
+{
+	struct basep_syncset sset = {
+		.mem_handle.basep.handle = sync->handle,
+		.user_addr = sync->user_addr,
+		.size = sync->size,
+		.type = sync->type
+	};
+
+	return kbase_sync_now(kctx, &sset);
+}
+
+static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx,
+		union kbase_ioctl_mem_find_cpu_offset *find)
+{
+	return kbasep_find_enclosing_cpu_mapping_offset(
+			kctx,
+			find->in.cpu_addr,
+			find->in.size,
+			&find->out.offset);
+}
+
+static int kbase_api_mem_find_gpu_start_and_offset(struct kbase_context *kctx,
+		union kbase_ioctl_mem_find_gpu_start_and_offset *find)
+{
+	return kbasep_find_enclosing_gpu_mapping_start_and_offset(
+			kctx,
+			find->in.gpu_addr,
+			find->in.size,
+			&find->out.start,
+			&find->out.offset);
+}
+
+static int kbase_api_get_context_id(struct kbase_context *kctx,
+		struct kbase_ioctl_get_context_id *info)
+{
+	info->id = kctx->id;
+
+	return 0;
+}
+
+static int kbase_api_tlstream_acquire(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_acquire *acquire)
+{
+	return kbase_timeline_io_acquire(kctx->kbdev, acquire->flags);
+}
+
+static int kbase_api_tlstream_flush(struct kbase_context *kctx)
+{
+	kbase_timeline_streams_flush(kctx->kbdev->timeline);
+
+	return 0;
+}
+
+static int kbase_api_mem_commit(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_commit *commit)
+{
+	return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages);
+}
+
+static int kbase_api_mem_alias(struct kbase_context *kctx,
+		union kbase_ioctl_mem_alias *alias)
+{
+	struct base_mem_aliasing_info *ai;
+	u64 flags;
+	int err;
+
+	if (alias->in.nents == 0 || alias->in.nents > 2048)
+		return -EINVAL;
+
+	if (alias->in.stride > (U64_MAX / 2048))
+		return -EINVAL;
+
+	ai = vmalloc(sizeof(*ai) * alias->in.nents);
+	if (!ai)
+		return -ENOMEM;
+
+	err = copy_from_user(ai,
+			u64_to_user_ptr(alias->in.aliasing_info),
+			sizeof(*ai) * alias->in.nents);
+	if (err) {
+		vfree(ai);
+		return -EFAULT;
+	}
+
+	flags = alias->in.flags;
+	if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) {
+		vfree(ai);
+		return -EINVAL;
+	}
+
+	alias->out.gpu_va = kbase_mem_alias(kctx, &flags,
+			alias->in.stride, alias->in.nents,
+			ai, &alias->out.va_pages);
+
+	alias->out.flags = flags;
+
+	vfree(ai);
+
+	if (alias->out.gpu_va == 0)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int kbase_api_mem_import(struct kbase_context *kctx,
+		union kbase_ioctl_mem_import *import)
+{
+	int ret;
+	u64 flags = import->in.flags;
+
+	if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY)
+		return -ENOMEM;
+
+	ret = kbase_mem_import(kctx,
+			import->in.type,
+			u64_to_user_ptr(import->in.phandle),
+			import->in.padding,
+			&import->out.gpu_va,
+			&import->out.va_pages,
+			&flags);
+
+	import->out.flags = flags;
+
+	return ret;
+}
+
+static int kbase_api_mem_flags_change(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_flags_change *change)
+{
+	if (change->flags & BASEP_MEM_FLAGS_KERNEL_ONLY)
+		return -ENOMEM;
+
+	return kbase_mem_flags_change(kctx, change->gpu_va,
+			change->flags, change->mask);
+}
+
+static int kbase_api_stream_create(struct kbase_context *kctx,
+		struct kbase_ioctl_stream_create *stream)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	int fd, ret;
+
+	/* Name must be NULL-terminated and padded with NULLs, so check last
+	 * character is NULL
+	 */
+	if (stream->name[sizeof(stream->name)-1] != 0)
+		return -EINVAL;
+
+	ret = kbase_sync_fence_stream_create(stream->name, &fd);
+
+	if (ret)
+		return ret;
+	return fd;
+#else
+	return -ENOENT;
+#endif
+}
+
+static int kbase_api_fence_validate(struct kbase_context *kctx,
+		struct kbase_ioctl_fence_validate *validate)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	return kbase_sync_fence_validate(validate->fd);
+#else
+	return -ENOENT;
+#endif
+}
+
+static int kbase_api_mem_profile_add(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_profile_add *data)
+{
+	char *buf;
+	int err;
+
+	if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+		dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n");
+		return -EINVAL;
+	}
+
+	buf = kmalloc(data->len, GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(buf))
+		return -ENOMEM;
+
+	err = copy_from_user(buf, u64_to_user_ptr(data->buffer),
+			data->len);
+	if (err) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len);
+}
+
+static int kbase_api_soft_event_update(struct kbase_context *kctx,
+		struct kbase_ioctl_soft_event_update *update)
+{
+	if (update->flags != 0)
+		return -EINVAL;
+
+	return kbase_soft_event_update(kctx, update->event, update->new_status);
+}
+
+static int kbase_api_sticky_resource_map(struct kbase_context *kctx,
+		struct kbase_ioctl_sticky_resource_map *map)
+{
+	int ret;
+	u64 i;
+	u64 gpu_addr[BASE_EXT_RES_COUNT_MAX];
+
+	if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX)
+		return -EOVERFLOW;
+
+	ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address),
+			sizeof(u64) * map->count);
+
+	if (ret != 0)
+		return -EFAULT;
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (i = 0; i < map->count; i++) {
+		if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) {
+			/* Invalid resource */
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	if (ret != 0) {
+		while (i > 0) {
+			i--;
+			kbase_sticky_resource_release(kctx, NULL, gpu_addr[i]);
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
+static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx,
+		struct kbase_ioctl_sticky_resource_unmap *unmap)
+{
+	int ret;
+	u64 i;
+	u64 gpu_addr[BASE_EXT_RES_COUNT_MAX];
+
+	if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX)
+		return -EOVERFLOW;
+
+	ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address),
+			sizeof(u64) * unmap->count);
+
+	if (ret != 0)
+		return -EFAULT;
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (i = 0; i < unmap->count; i++) {
+		if (!kbase_sticky_resource_release(kctx, NULL, gpu_addr[i])) {
+			/* Invalid resource, but we keep going anyway */
+			ret = -EINVAL;
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
+#if MALI_UNIT_TEST
+static int kbase_api_tlstream_test(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_test *test)
+{
+	kbase_timeline_test(
+			kctx->kbdev,
+			test->tpw_count,
+			test->msg_delay,
+			test->msg_count,
+			test->aux_msg);
+
+	return 0;
+}
+
+static int kbase_api_tlstream_stats(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_stats *stats)
+{
+	kbase_timeline_stats(kctx->kbdev->timeline,
+			&stats->bytes_collected,
+			&stats->bytes_generated);
+
+	return 0;
+}
+#endif /* MALI_UNIT_TEST */
+
+
+#define KBASE_HANDLE_IOCTL(cmd, function, arg)    \
+	do {                                          \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \
+		return function(arg);                     \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg)    \
+	do {                                                   \
+		type param;                                        \
+		int err;                                           \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE);         \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
+		err = copy_from_user(&param, uarg, sizeof(param)); \
+		if (err)                                           \
+			return -EFAULT;                                \
+		return function(arg, &param);                      \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg)   \
+	do {                                                   \
+		type param;                                        \
+		int ret, err;                                      \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ);          \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
+		memset(&param, 0, sizeof(param));                  \
+		ret = function(arg, &param);                       \
+		err = copy_to_user(uarg, &param, sizeof(param));   \
+		if (err)                                           \
+			return -EFAULT;                                \
+		return ret;                                        \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg)     \
+	do {                                                       \
+		type param;                                            \
+		int ret, err;                                          \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));         \
+		err = copy_from_user(&param, uarg, sizeof(param));     \
+		if (err)                                               \
+			return -EFAULT;                                    \
+		ret = function(arg, &param);                           \
+		err = copy_to_user(uarg, &param, sizeof(param));       \
+		if (err)                                               \
+			return -EFAULT;                                    \
+		return ret;                                            \
+	} while (0)
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct kbase_file *const kfile = filp->private_data;
+	struct kbase_context *kctx = NULL;
+	struct kbase_device *kbdev = kfile->kbdev;
+	void __user *uarg = (void __user *)arg;
+
+	/* Only these ioctls are available until setup is complete */
+	switch (cmd) {
+	case KBASE_IOCTL_VERSION_CHECK:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK,
+				kbase_api_handshake,
+				struct kbase_ioctl_version_check,
+				kfile);
+		break;
+
+	case KBASE_IOCTL_SET_FLAGS:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS,
+				kbase_api_set_flags,
+				struct kbase_ioctl_set_flags,
+				kfile);
+		break;
+	}
+
+	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+	if (unlikely(!kctx))
+		return -EPERM;
+
+	/* Normal ioctls */
+	switch (cmd) {
+	case KBASE_IOCTL_JOB_SUBMIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT,
+				kbase_api_job_submit,
+				struct kbase_ioctl_job_submit,
+				kctx);
+		break;
+	case KBASE_IOCTL_GET_GPUPROPS:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS,
+				kbase_api_get_gpuprops,
+				struct kbase_ioctl_get_gpuprops,
+				kctx);
+		break;
+	case KBASE_IOCTL_POST_TERM:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM,
+				kbase_api_post_term,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_ALLOC:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC,
+				kbase_api_mem_alloc,
+				union kbase_ioctl_mem_alloc,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_QUERY:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY,
+				kbase_api_mem_query,
+				union kbase_ioctl_mem_query,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_FREE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE,
+				kbase_api_mem_free,
+				struct kbase_ioctl_mem_free,
+				kctx);
+		break;
+	case KBASE_IOCTL_DISJOINT_QUERY:
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY,
+				kbase_api_disjoint_query,
+				struct kbase_ioctl_disjoint_query,
+				kctx);
+		break;
+	case KBASE_IOCTL_GET_DDK_VERSION:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION,
+				kbase_api_get_ddk_version,
+				struct kbase_ioctl_get_ddk_version,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_JIT_INIT_OLD:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD,
+				kbase_api_mem_jit_init_old,
+				struct kbase_ioctl_mem_jit_init_old,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_JIT_INIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT,
+				kbase_api_mem_jit_init,
+				struct kbase_ioctl_mem_jit_init,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_EXEC_INIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT,
+				kbase_api_mem_exec_init,
+				struct kbase_ioctl_mem_exec_init,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_SYNC:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC,
+				kbase_api_mem_sync,
+				struct kbase_ioctl_mem_sync,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_FIND_CPU_OFFSET:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET,
+				kbase_api_mem_find_cpu_offset,
+				union kbase_ioctl_mem_find_cpu_offset,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET,
+				kbase_api_mem_find_gpu_start_and_offset,
+				union kbase_ioctl_mem_find_gpu_start_and_offset,
+				kctx);
+		break;
+	case KBASE_IOCTL_GET_CONTEXT_ID:
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID,
+				kbase_api_get_context_id,
+				struct kbase_ioctl_get_context_id,
+				kctx);
+		break;
+	case KBASE_IOCTL_TLSTREAM_ACQUIRE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE,
+				kbase_api_tlstream_acquire,
+				struct kbase_ioctl_tlstream_acquire,
+				kctx);
+		break;
+	case KBASE_IOCTL_TLSTREAM_FLUSH:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH,
+				kbase_api_tlstream_flush,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_COMMIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT,
+				kbase_api_mem_commit,
+				struct kbase_ioctl_mem_commit,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_ALIAS:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS,
+				kbase_api_mem_alias,
+				union kbase_ioctl_mem_alias,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_IMPORT:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT,
+				kbase_api_mem_import,
+				union kbase_ioctl_mem_import,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_FLAGS_CHANGE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE,
+				kbase_api_mem_flags_change,
+				struct kbase_ioctl_mem_flags_change,
+				kctx);
+		break;
+	case KBASE_IOCTL_STREAM_CREATE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE,
+				kbase_api_stream_create,
+				struct kbase_ioctl_stream_create,
+				kctx);
+		break;
+	case KBASE_IOCTL_FENCE_VALIDATE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE,
+				kbase_api_fence_validate,
+				struct kbase_ioctl_fence_validate,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_PROFILE_ADD:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD,
+				kbase_api_mem_profile_add,
+				struct kbase_ioctl_mem_profile_add,
+				kctx);
+		break;
+	case KBASE_IOCTL_SOFT_EVENT_UPDATE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE,
+				kbase_api_soft_event_update,
+				struct kbase_ioctl_soft_event_update,
+				kctx);
+		break;
+	case KBASE_IOCTL_STICKY_RESOURCE_MAP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP,
+				kbase_api_sticky_resource_map,
+				struct kbase_ioctl_sticky_resource_map,
+				kctx);
+		break;
+	case KBASE_IOCTL_STICKY_RESOURCE_UNMAP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP,
+				kbase_api_sticky_resource_unmap,
+				struct kbase_ioctl_sticky_resource_unmap,
+				kctx);
+		break;
+
+	/* Instrumentation. */
+	case KBASE_IOCTL_HWCNT_READER_SETUP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
+				kbase_api_hwcnt_reader_setup,
+				struct kbase_ioctl_hwcnt_reader_setup,
+				kctx);
+		break;
+	case KBASE_IOCTL_HWCNT_ENABLE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
+				kbase_api_hwcnt_enable,
+				struct kbase_ioctl_hwcnt_enable,
+				kctx);
+		break;
+	case KBASE_IOCTL_HWCNT_DUMP:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
+				kbase_api_hwcnt_dump,
+				kctx);
+		break;
+	case KBASE_IOCTL_HWCNT_CLEAR:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
+				kbase_api_hwcnt_clear,
+				kctx);
+		break;
+#ifdef CONFIG_MALI_NO_MALI
+	case KBASE_IOCTL_HWCNT_SET:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET,
+				kbase_api_hwcnt_set,
+				struct kbase_ioctl_hwcnt_values,
+				kctx);
+		break;
+#endif
+#ifdef CONFIG_MALI_CINSTR_GWT
+	case KBASE_IOCTL_CINSTR_GWT_START:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START,
+				kbase_gpu_gwt_start,
+				kctx);
+		break;
+	case KBASE_IOCTL_CINSTR_GWT_STOP:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP,
+				kbase_gpu_gwt_stop,
+				kctx);
+		break;
+	case KBASE_IOCTL_CINSTR_GWT_DUMP:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP,
+				kbase_gpu_gwt_dump,
+				union kbase_ioctl_cinstr_gwt_dump,
+				kctx);
+		break;
+#endif
+#if MALI_UNIT_TEST
+	case KBASE_IOCTL_TLSTREAM_TEST:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST,
+				kbase_api_tlstream_test,
+				struct kbase_ioctl_tlstream_test,
+				kctx);
+		break;
+	case KBASE_IOCTL_TLSTREAM_STATS:
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS,
+				kbase_api_tlstream_stats,
+				struct kbase_ioctl_tlstream_stats,
+				kctx);
+		break;
+#endif
+	}
+
+	dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd));
+
+	return -ENOIOCTLCMD;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_file *const kfile = filp->private_data;
+	struct kbase_context *const kctx =
+		kbase_file_get_kctx_if_setup_complete(kfile);
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (unlikely(!kctx))
+		return -EPERM;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_file *const kfile = filp->private_data;
+	struct kbase_context *const kctx =
+		kbase_file_get_kctx_if_setup_complete(kfile);
+
+	if (unlikely(!kctx))
+		return POLLERR;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma)
+{
+	struct kbase_file *const kfile = filp->private_data;
+	struct kbase_context *const kctx =
+		kbase_file_get_kctx_if_setup_complete(kfile);
+
+	if (unlikely(!kctx))
+		return -EPERM;
+
+	return kbase_context_mmap(kctx, vma);
+}
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned long kbase_get_unmapped_area(struct file *const filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	struct kbase_file *const kfile = filp->private_data;
+	struct kbase_context *const kctx =
+		kbase_file_get_kctx_if_setup_complete(kfile);
+
+	if (unlikely(!kctx))
+		return -EPERM;
+
+	return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags);
+}
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+	.get_unmapped_area = kbase_get_unmapped_area,
+};
+
+/**
+ * show_policy - Show callback for the power_policy sysfs file.
+ *
+ * This function is called to get the contents of the power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(kbdev, &policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * set_policy - Store callback for the power_policy sysfs file.
+ *
+ * This function is called when the power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls kbase_pm_set_policy() to change the
+ * policy.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(kbdev, &policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/*
+ * The sysfs file power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/*
+ * show_core_mask - Show callback for the core_mask sysfs file.
+ *
+ * This function is called to get the contents of the core_mask sysfs file.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/**
+ * set_core_mask - Store callback for the core_mask sysfs file.
+ *
+ * This function is called when the core_mask sysfs file is written to.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items, i;
+	ssize_t err = count;
+	unsigned long flags;
+	u64 shader_present, group0_core_mask;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items != 1 && items != 3) {
+		dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"
+			"Use format <core_mask>\n"
+			"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+		err = -EINVAL;
+		goto end;
+	}
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	shader_present = kbdev->gpu_props.props.raw_props.shader_present;
+	group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+	for (i = 0; i < 3; ++i) {
+		if ((new_core_mask[i] & shader_present) != new_core_mask[i]) {
+			dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)",
+					new_core_mask[i], i, shader_present);
+			err = -EINVAL;
+			goto unlock;
+
+		} else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) {
+			dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n",
+					new_core_mask[i], i,
+					kbdev->gpu_props.props.raw_props.shader_present,
+					kbdev->pm.backend.ca_cores_enabled);
+			err = -EINVAL;
+			goto unlock;
+
+		} else if (!(new_core_mask[i] & group0_core_mask)) {
+			dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n",
+					new_core_mask[i], i, group0_core_mask);
+			err = -EINVAL;
+			goto unlock;
+		}
+	}
+
+	if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+			kbdev->pm.debug_core_mask[1] !=
+					new_core_mask[1] ||
+			kbdev->pm.debug_core_mask[2] !=
+					new_core_mask[2]) {
+
+		kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+				new_core_mask[1], new_core_mask[2]);
+	}
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+end:
+	return err;
+}
+
+/*
+ * The sysfs file core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software jobs. Waiting soft event wait
+ * jobs will be cancelled after this period expires, while soft fence wait jobs
+ * will print debug information if the fence debug feature is enabled.
+ *
+ * This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_job_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_job_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) ||
+	    (soft_job_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+		   soft_job_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_job_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_job_timeout, set_soft_job_timeout);
+
+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
+				int default_ticks, u32 old_ticks)
+{
+	if (timeout_ms > 0) {
+		u64 ticks = timeout_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_period_ns);
+		if (!ticks)
+			return 1;
+		return ticks;
+	} else if (timeout_ms < 0) {
+		return default_ticks;
+	} else {
+		return old_ticks;
+	}
+}
+
+/**
+ * set_js_timeouts - Store callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS,
+ * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		struct kbasep_js_device_data *js_data = &kbdev->js_data;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
+	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
+			default, js_data->ticks_name); \
+	dev_dbg(kbdev->dev, "Overriding " #ticks_name \
+			" with %lu ticks (%lu ms)\n", \
+			(unsigned long)js_data->ticks_name, \
+			ms_name); \
+	} while (0)
+
+		UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms,
+				DEFAULT_JS_SOFT_STOP_TICKS);
+		UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl,
+				DEFAULT_JS_SOFT_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_HARD_STOP_TICKS_SS_8408 :
+				DEFAULT_JS_HARD_STOP_TICKS_SS);
+		UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl,
+				DEFAULT_JS_HARD_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping,
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING);
+		UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_RESET_TICKS_SS_8408 :
+				DEFAULT_JS_RESET_TICKS_SS);
+		UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl,
+				DEFAULT_JS_RESET_TICKS_CL);
+		UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping,
+				DEFAULT_JS_RESET_TICKS_DUMPING);
+
+		kbase_js_set_timeouts(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+static unsigned long get_js_timeout_in_ms(
+		u32 scheduling_period_ns,
+		u32 ticks)
+{
+	u64 ms = (u64)ticks * scheduling_period_ns;
+
+	do_div(ms, 1000000UL);
+	return ms;
+}
+
+/**
+ * show_js_timeouts - Show callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\
+		scheduling_period_ns, \
+		kbdev->js_data.name)
+
+	js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks);
+	js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl);
+	js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss);
+	js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl);
+	js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping);
+	js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss);
+	js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl);
+	js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef GET_TIMEOUT
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+static u32 get_new_js_timeout(
+		u32 old_period,
+		u32 old_ticks,
+		u32 new_scheduling_period_ns)
+{
+	u64 ticks = (u64)old_period * (u64)old_ticks;
+	do_div(ticks, new_scheduling_period_ns);
+	return ticks?ticks:1;
+}
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	struct kbasep_js_device_data *js_data;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	js_data = &kbdev->js_data;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&js_data->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	old_period = js_data->scheduling_period_ns;
+
+#define SET_TIMEOUT(name) \
+		(js_data->name = get_new_js_timeout(\
+				old_period, \
+				kbdev->js_data.name, \
+				new_scheduling_period_ns))
+
+	SET_TIMEOUT(soft_stop_ticks);
+	SET_TIMEOUT(soft_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_ss);
+	SET_TIMEOUT(hard_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_dumping);
+	SET_TIMEOUT(gpu_reset_ticks_ss);
+	SET_TIMEOUT(gpu_reset_ticks_cl);
+	SET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef SET_TIMEOUT
+
+	js_data->scheduling_period_ns = new_scheduling_period_ns;
+
+	kbase_js_set_timeouts(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&js_data->runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present.
+ * The ability to enable soft-stop when only a single context is present can be
+ * used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/* Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/**
+ * show_debug - Show callback for the debug_command sysfs file.
+ *
+ * This function is called to get the contents of the debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * issue_debug - Store callback for the debug_command sysfs file.
+ *
+ * This function is called when the debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @debug_commands to issue the command.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/* The sysfs file debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
+		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G72" },
+		{ .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G51" },
+		{ .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G76" },
+		{ .id = GPU_ID2_PRODUCT_TDVX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G31" },
+		{ .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G52" },
+		{ .id = GPU_ID2_PRODUCT_TTRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G77" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_pm_tick_timer_state *stt;
+	int items;
+	u64 gpu_poweroff_time;
+	unsigned int poweroff_shader_ticks, poweroff_gpu_ticks;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	stt = &kbdev->pm.backend.shader_tick_timer;
+	stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	stt->configured_ticks = poweroff_shader_ticks;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (poweroff_gpu_ticks != 0)
+		dev_warn(kbdev->dev, "Separate GPU poweroff delay no longer supported.\n");
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_pm_tick_timer_state *stt;
+	ssize_t ret;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	stt = &kbdev->pm.backend.shader_tick_timer;
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n",
+			ktime_to_ns(stt->configured_interval),
+			stt->configured_ticks);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_size);
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+	int err;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kbase_debugfs_helper_set_attr_from_string(buf,
+		kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_trim);
+
+	return err ? err : count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_max_size);
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+	int err;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kbase_debugfs_helper_set_attr_from_string(buf,
+		kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_set_max_size);
+
+	return err ? err : count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+/**
+ * show_lp_mem_pool_size - Show size of the large memory pages pool.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the number of large memory pages which currently populate the kbdev pool.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_lp_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_size);
+}
+
+/**
+ * set_lp_mem_pool_size - Set size of the large memory pages pool.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the number of large memory pages which should populate the kbdev pool.
+ * This may cause existing pages to be removed from the pool, or new pages to be created and then added to the pool.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_lp_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+	int err;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kbase_debugfs_helper_set_attr_from_string(buf,
+		kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_trim);
+
+	return err ? err : count;
+}
+
+static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size,
+		set_lp_mem_pool_size);
+
+/**
+ * show_lp_mem_pool_max_size - Show maximum size of the large memory pages pool.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the maximum number of large memory pages that the kbdev pool can possibly contain.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_max_size);
+}
+
+/**
+ * set_lp_mem_pool_max_size - Set maximum size of the large memory pages pool.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the maximum number of large memory pages that the kbdev pool can possibly contain.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+	int err;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kbase_debugfs_helper_set_attr_from_string(buf,
+		kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_set_max_size);
+
+	return err ? err : count;
+}
+
+static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
+		set_lp_mem_pool_max_size);
+
+/**
+ * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs
+ *                               entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the context scheduling mode information.
+ *
+ * This function is called to get the context scheduling mode being used by JS.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_ctx_scheduling_mode(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode);
+}
+
+/**
+ * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs
+ *                              entry.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called when the js_ctx_scheduling_mode sysfs file is written
+ * to. It checks the data written, and if valid updates the ctx scheduling mode
+ * being by JS.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_ctx_scheduling_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_context *kctx;
+	u32 new_js_ctx_scheduling_mode;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	int ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode);
+	if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) {
+		dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode"
+				" write operation.\n"
+				"Use format <js_ctx_scheduling_mode>\n");
+		return -EINVAL;
+	}
+
+	if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode)
+		return count;
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Update the context priority mode */
+	kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode;
+
+	/* Adjust priority of all the contexts as per the new mode */
+	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link)
+		kbase_js_update_ctx_priority(kctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode);
+
+	return count;
+}
+
+static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
+		show_js_ctx_scheduling_mode,
+		set_js_ctx_scheduling_mode);
+
+#ifdef MALI_KBASE_BUILD
+#ifdef CONFIG_DEBUG_FS
+
+/* Number of entries in serialize_jobs_settings[] */
+#define NR_SERIALIZE_JOBS_SETTINGS 5
+/* Maximum string length in serialize_jobs_settings[].name */
+#define MAX_SERIALIZE_JOBS_NAME_LEN 16
+
+static struct
+{
+	char *name;
+	u8 setting;
+} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = {
+	{"none", 0},
+	{"intra-slot", KBASE_SERIALIZE_INTRA_SLOT},
+	{"inter-slot", KBASE_SERIALIZE_INTER_SLOT},
+	{"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT},
+	{"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT |
+			KBASE_SERIALIZE_RESET}
+};
+
+/**
+ * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs
+ *                                  file
+ * @sfile: seq_file pointer
+ * @data:  Private callback data
+ *
+ * This function is called to get the contents of the serialize_jobs debugfs
+ * file. This is a list of the available settings with the currently active one
+ * surrounded by square brackets.
+ *
+ * Return: 0 on success, or an error code on error
+ */
+static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_device *kbdev = sfile->private;
+	int i;
+
+	CSTD_UNUSED(data);
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting)
+			seq_printf(sfile, "[%s] ",
+					serialize_jobs_settings[i].name);
+		else
+			seq_printf(sfile, "%s ",
+					serialize_jobs_settings[i].name);
+	}
+
+	seq_puts(sfile, "\n");
+
+	return 0;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs
+ *                                       debugfs file.
+ * @file:  File pointer
+ * @ubuf:  User buffer containing data to store
+ * @count: Number of bytes in user buffer
+ * @ppos:  File position
+ *
+ * This function is called when the serialize_jobs debugfs file is written to.
+ * It matches the requested setting against the available settings and if a
+ * matching setting is found updates kbdev->serialize_jobs.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	struct seq_file *s = file->private_data;
+	struct kbase_device *kbdev = s->private;
+	char buf[MAX_SERIALIZE_JOBS_NAME_LEN];
+	int i;
+	bool valid = false;
+
+	CSTD_UNUSED(ppos);
+
+	count = min_t(size_t, sizeof(buf) - 1, count);
+	if (copy_from_user(buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
+			kbdev->serialize_jobs =
+					serialize_jobs_settings[i].setting;
+			valid = true;
+			break;
+		}
+	}
+
+	if (!valid) {
+		dev_err(kbdev->dev, "serialize_jobs: invalid setting\n");
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs
+ *                                     debugfs file
+ * @in:   inode pointer
+ * @file: file pointer
+ *
+ * Return: Zero on success, error code on failure
+ */
+static int kbasep_serialize_jobs_debugfs_open(struct inode *in,
+		struct file *file)
+{
+	return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = kbasep_serialize_jobs_debugfs_open,
+	.read = seq_read,
+	.write = kbasep_serialize_jobs_debugfs_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_DEBUG_FS */
+#endif /* MALI_KBASE_BUILD */
+
+static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+		protected_mode_hwcnt_disable_work);
+	unsigned long flags;
+
+	bool do_disable;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	do_disable = !kbdev->protected_mode_hwcnt_desired &&
+		!kbdev->protected_mode_hwcnt_disabled;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!do_disable)
+		return;
+
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	do_disable = !kbdev->protected_mode_hwcnt_desired &&
+		!kbdev->protected_mode_hwcnt_disabled;
+
+	if (do_disable) {
+		/* Protected mode state did not change while we were doing the
+		 * disable, so commit the work we just performed and continue
+		 * the state machine.
+		 */
+		kbdev->protected_mode_hwcnt_disabled = true;
+		kbase_backend_slot_update(kbdev);
+	} else {
+		/* Protected mode state was updated while we were doing the
+		 * disable, so we need to undo the disable we just performed.
+		 */
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+static int kbasep_protected_mode_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_OF
+	struct device_node *protected_node;
+	struct platform_device *pdev;
+	struct protected_mode_device *protected_dev;
+#endif
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native protected ops */
+		kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev),
+				GFP_KERNEL);
+		if (!kbdev->protected_dev)
+			return -ENOMEM;
+		kbdev->protected_dev->data = kbdev;
+		kbdev->protected_ops = &kbase_native_protected_ops;
+		kbdev->protected_mode_support = true;
+		INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work,
+			kbasep_protected_mode_hwcnt_disable_worker);
+		kbdev->protected_mode_hwcnt_desired = true;
+		kbdev->protected_mode_hwcnt_disabled = false;
+		return 0;
+	}
+
+	kbdev->protected_mode_support = false;
+
+#ifdef CONFIG_OF
+	protected_node = of_parse_phandle(kbdev->dev->of_node,
+			"protected-mode-switcher", 0);
+
+	if (!protected_node)
+		protected_node = of_parse_phandle(kbdev->dev->of_node,
+				"secure-mode-switcher", 0);
+
+	if (!protected_node) {
+		/* If protected_node cannot be looked up then we assume
+		 * protected mode is not supported on this platform. */
+		dev_info(kbdev->dev, "Protected mode not available\n");
+		return 0;
+	}
+
+	pdev = of_find_device_by_node(protected_node);
+	if (!pdev)
+		return -EINVAL;
+
+	protected_dev = platform_get_drvdata(pdev);
+	if (!protected_dev)
+		return -EPROBE_DEFER;
+
+	kbdev->protected_ops = &protected_dev->ops;
+	kbdev->protected_dev = protected_dev;
+
+	if (kbdev->protected_ops) {
+		int err;
+
+		/* Make sure protected mode is disabled on startup */
+		mutex_lock(&kbdev->pm.lock);
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* protected_mode_disable() returns -EINVAL if not supported */
+		kbdev->protected_mode_support = (err != -EINVAL);
+	}
+#endif
+	return 0;
+}
+
+static void kbasep_protected_mode_term(struct kbase_device *kbdev)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work);
+		kfree(kbdev->protected_dev);
+	}
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = 0;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return err;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF)
+	/* Power control initialization requires at least the capability to get
+	 * regulators and clocks from the device tree, as well as parsing
+	 * arrays of unsigned integer values.
+	 *
+	 * The whole initialization process shall simply be skipped if the
+	 * minimum capability is not available.
+	 */
+	return 0;
+#else
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+	unsigned int i;
+#if defined(CONFIG_REGULATOR)
+	static const char *regulator_names[] = {
+		"mali", "shadercores"
+	};
+	BUILD_BUG_ON(ARRAY_SIZE(regulator_names) < BASE_MAX_NR_CLOCKS_REGULATORS);
+#endif /* CONFIG_REGULATOR */
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_REGULATOR)
+	/* Since the error code EPROBE_DEFER causes the entire probing
+	 * procedure to be restarted from scratch at a later time,
+	 * all regulators will be released before returning.
+	 *
+	 * Any other error is ignored and the driver will continue
+	 * operating with a partial initialization of regulators.
+	 */
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		kbdev->regulators[i] = regulator_get_optional(kbdev->dev,
+			regulator_names[i]);
+		if (IS_ERR_OR_NULL(kbdev->regulators[i])) {
+			err = PTR_ERR(kbdev->regulators[i]);
+			kbdev->regulators[i] = NULL;
+			break;
+		}
+	}
+	if (err == -EPROBE_DEFER) {
+		while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS))
+			regulator_put(kbdev->regulators[--i]);
+		return err;
+	}
+
+	kbdev->nr_regulators = i;
+	dev_dbg(&pdev->dev, "Regulators probed: %u\n", kbdev->nr_regulators);
+#endif
+
+	/* Having more clocks than regulators is acceptable, while the
+	 * opposite shall not happen.
+	 *
+	 * Since the error code EPROBE_DEFER causes the entire probing
+	 * procedure to be restarted from scratch at a later time,
+	 * all clocks and regulators will be released before returning.
+	 *
+	 * Any other error is ignored and the driver will continue
+	 * operating with a partial initialization of clocks.
+	 */
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		kbdev->clocks[i] = of_clk_get(kbdev->dev->of_node, i);
+		if (IS_ERR_OR_NULL(kbdev->clocks[i])) {
+			err = PTR_ERR(kbdev->clocks[i]);
+			kbdev->clocks[i] = NULL;
+			break;
+		}
+
+		err = clk_prepare_enable(kbdev->clocks[i]);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			clk_put(kbdev->clocks[i]);
+			break;
+		}
+	}
+	if (err == -EPROBE_DEFER) {
+		while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS)) {
+			clk_disable_unprepare(kbdev->clocks[--i]);
+			clk_put(kbdev->clocks[i]);
+		}
+		goto clocks_probe_defer;
+	}
+
+	kbdev->nr_clocks = i;
+	dev_dbg(&pdev->dev, "Clocks probed: %u\n", kbdev->nr_clocks);
+
+	/* Any error in parsing the OPP table from the device file
+	 * shall be ignored. The fact that the table may be absent or wrong
+	 * on the device tree of the platform shouldn't prevent the driver
+	 * from completing its initialization.
+	 */
+#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \
+	!defined(LSK_OPPV2_BACKPORT))
+	err = of_init_opp_table(kbdev->dev);
+	CSTD_UNUSED(err);
+#else
+
+#if defined(CONFIG_PM_OPP)
+#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
+	defined(CONFIG_REGULATOR))
+	if (kbdev->nr_regulators > 0) {
+		kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev,
+			regulator_names, BASE_MAX_NR_CLOCKS_REGULATORS);
+	}
+#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+	CSTD_UNUSED(err);
+#endif /* CONFIG_PM_OPP */
+
+#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */
+	return 0;
+
+clocks_probe_defer:
+#if defined(CONFIG_REGULATOR)
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++)
+		regulator_put(kbdev->regulators[i]);
+#endif
+	return err;
+#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+	unsigned int i;
+
+#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \
+	!defined(LSK_OPPV2_BACKPORT))
+#if KERNEL_VERSION(3, 19, 0) <= LINUX_VERSION_CODE
+	of_free_opp_table(kbdev->dev);
+#endif
+#else
+
+#if defined(CONFIG_PM_OPP)
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
+	defined(CONFIG_REGULATOR))
+	if (!IS_ERR_OR_NULL(kbdev->opp_table))
+		dev_pm_opp_put_regulators(kbdev->opp_table);
+#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* CONFIG_PM_OPP */
+
+#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */
+
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		if (kbdev->clocks[i]) {
+			if (__clk_is_enabled(kbdev->clocks[i]))
+				clk_disable_unprepare(kbdev->clocks[i]);
+			clk_put(kbdev->clocks[i]);
+			kbdev->clocks[i] = NULL;
+		} else
+			break;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		if (kbdev->regulators[i]) {
+			regulator_put(kbdev->regulators[i]);
+			kbdev->regulators[i] = NULL;
+		}
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef MALI_KBASE_BUILD
+#ifdef CONFIG_DEBUG_FS
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+MAKE_QUIRK_ACCESSORS(jm);
+
+
+/**
+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
+ * @file: File object to read is for
+ * @buf:  User buffer to populate with data
+ * @len:  Length of user buffer
+ * @ppos: Offset within file object
+ *
+ * Retrieves the current status of protected debug mode
+ * (0 = disabled, 1 = enabled)
+ *
+ * Return: Number of bytes added to user buffer
+ */
+static ssize_t debugfs_protected_debug_mode_read(struct file *file,
+				char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)file->private_data;
+	u32 gpu_status;
+	ssize_t ret_val;
+
+	kbase_pm_context_active(kbdev);
+	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS));
+	kbase_pm_context_idle(kbdev);
+
+	if (gpu_status & GPU_DBGEN)
+		ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2);
+	else
+		ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2);
+
+	return ret_val;
+}
+
+/*
+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops
+ *
+ * Contains the file operations for the "protected_debug_mode" debugfs file
+ */
+static const struct file_operations fops_protected_debug_mode = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = debugfs_protected_debug_mode_read,
+	.llseek = default_llseek,
+};
+
+static int kbase_device_debugfs_mem_pool_max_size_show(struct seq_file *sfile,
+	void *data)
+{
+	CSTD_UNUSED(data);
+	return kbase_debugfs_helper_seq_read(sfile,
+		MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_config_debugfs_max_size);
+}
+
+static ssize_t kbase_device_debugfs_mem_pool_max_size_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	int err = 0;
+
+	CSTD_UNUSED(ppos);
+	err = kbase_debugfs_helper_seq_write(file, ubuf, count,
+		MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_config_debugfs_set_max_size);
+
+	return err ? err : count;
+}
+
+static int kbase_device_debugfs_mem_pool_max_size_open(struct inode *in,
+	struct file *file)
+{
+	return single_open(file, kbase_device_debugfs_mem_pool_max_size_show,
+		in->i_private);
+}
+
+static const struct file_operations
+	kbase_device_debugfs_mem_pool_max_size_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_device_debugfs_mem_pool_max_size_open,
+	.read = seq_read,
+	.write = kbase_device_debugfs_mem_pool_max_size_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_init(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+	kbasep_regs_history_debugfs_init(kbdev);
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+	kbase_as_fault_debugfs_init(kbdev);
+	/* fops_* variables created by invocations of macro
+	 * MAKE_QUIRK_ACCESSORS() above. */
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+	debugfs_create_file("quirks_jm", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_jm_quirks);
+
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+
+	debugfs_create_file("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_defaults.small,
+			&kbase_device_debugfs_mem_pool_max_size_fops);
+
+	debugfs_create_file("lp_mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_defaults.large,
+			&kbase_device_debugfs_mem_pool_max_size_fops);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		debugfs_create_file("protected_debug_mode", S_IRUGO,
+				kbdev->mali_debugfs_directory, kbdev,
+				&fops_protected_debug_mode);
+	}
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->inited_subsys & inited_devfreq)
+		kbase_ipa_debugfs_init(kbdev);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_serialize_jobs_debugfs_fops);
+
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+#endif /* MALI_KBASE_BUILD */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev,
+		unsigned prod_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+
+	/* Only for tMIx :
+	 * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+	 * documented for tMIx so force correct value here.
+	 */
+	if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+		   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+				   GPU_ID2_PRODUCT_TMIX))
+		if (supported_coherency_bitmap ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE))
+			supported_coherency_bitmap |=
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_BUSLOG
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_job_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	&dev_attr_lp_mem_pool_size.attr,
+	&dev_attr_lp_mem_pool_max_size.attr,
+	&dev_attr_js_ctx_scheduling_mode.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kfree(kbdev->gpu_props.prop_buffer);
+
+#ifdef CONFIG_MALI_BUSLOG
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+#ifdef MALI_KBASE_BUILD
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+	if (kbdev->inited_subsys & inited_hwcnt_gpu_virt) {
+		kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt);
+		kbdev->inited_subsys &= ~inited_hwcnt_gpu_virt;
+	}
+
+	if (kbdev->inited_subsys & inited_hwcnt_gpu_ctx) {
+		kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx);
+		kbdev->inited_subsys &= ~inited_hwcnt_gpu_ctx;
+	}
+
+	if (kbdev->inited_subsys & inited_hwcnt_gpu_iface) {
+		kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface);
+		kbdev->inited_subsys &= ~inited_hwcnt_gpu_iface;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_timeline_term(kbdev->timeline);
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_protected) {
+		kbasep_protected_mode_term(kbdev);
+		kbdev->inited_subsys &= ~inited_protected;
+	}
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_ctx_sched) {
+		kbase_ctx_sched_term(kbdev);
+		kbdev->inited_subsys &= ~inited_ctx_sched;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+	if (kbdev->inited_subsys & inited_io_history) {
+		kbase_io_history_term(&kbdev->io_history);
+		kbdev->inited_subsys &= ~inited_io_history;
+	}
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+void kbase_backend_devfreq_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+}
+
+int kbase_backend_devfreq_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	/* Devfreq uses hardware counters, so must be initialized after it. */
+	int err = kbase_devfreq_init(kbdev);
+
+	if (!err)
+		kbdev->inited_subsys |= inited_devfreq;
+	else
+		dev_err(kbdev->dev, "Continuing without devfreq\n");
+#endif /* CONFIG_MALI_DEVFREQ */
+	return 0;
+}
+
+/* Number of register accesses for the buffer that we allocate during
+ * initialization time. The buffer size can be changed later via debugfs. */
+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	unsigned prod_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+	err = kbase_io_history_init(&kbdev->io_history,
+			KBASEP_DEFAULT_REGISTER_HISTORY_SIZE);
+	if (err) {
+		dev_err(&pdev->dev, "Register access history initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+	kbdev->inited_subsys |= inited_io_history;
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+	kbdev->id = kbase_dev_nr;
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain max configured gpu frequency, if devfreq is enabled then
+	 * this will be overridden by the highest operating point found
+	 */
+	core_props = &(kbdev->gpu_props.props.core_props);
+#ifdef GPU_FREQ_KHZ_MAX
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+#else
+	core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX;
+#endif
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	err = kbase_ctx_sched_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Context scheduler initialization failed (%d)\n",
+				err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_ctx_sched;
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, prod_id);
+
+	err = kbasep_protected_mode_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Protected mode subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_protected;
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	atomic_set(&kbdev->timeline_is_enabled, 0);
+	err = kbase_timeline_init(&kbdev->timeline, &kbdev->timeline_is_enabled);
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface);
+	if (err) {
+		dev_err(kbdev->dev, "GPU hwcnt backend creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_hwcnt_gpu_iface;
+
+	err = kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface,
+		&kbdev->hwcnt_gpu_ctx);
+	if (err) {
+		dev_err(kbdev->dev,
+			"GPU hwcnt context initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_hwcnt_gpu_ctx;
+
+	err = kbase_hwcnt_virtualizer_init(
+		kbdev->hwcnt_gpu_ctx,
+		KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS,
+		&kbdev->hwcnt_gpu_virt);
+	if (err) {
+		dev_err(kbdev->dev,
+			"GPU hwcnt virtualizer initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_hwcnt_gpu_virt;
+
+	err = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+	/* The initialization of the devfreq is now embedded inside the
+	 * kbase_backend_late_init(), calling the kbase_backend_devfreq_init()
+	 * before the first trigger of pm_context_idle(). */
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+
+#ifdef MALI_KBASE_BUILD
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->mdev.mode = 0666;
+	kbdev->inited_subsys |= inited_get_device;
+
+	/* This needs to happen before registering the device with misc_register(),
+	 * otherwise it causes a race condition between registering the device and a
+	 * uevent event being generated for userspace, causing udev rules to run
+	 * which might expect certain sysfs attributes present. As a result of the
+	 * race condition we avoid, some Mali sysfs entries may have appeared to
+	 * udev to not exist.
+
+	 * For more information, see
+	 * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the
+	 * paragraph that starts with "Word of warning", currently the second-last
+	 * paragraph.
+	 */
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+
+#ifdef CONFIG_MALI_BUSLOG
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	err = kbase_gpuprops_populate_user_buffer(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "GPU property population failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+#endif /* MALI_KBASE_BUILD */
+
+	return err;
+}
+
+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE
+
+/**
+ * kbase_device_suspend - Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_suspend(kbdev);
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	dev_dbg(dev, "Callback %s\n", __func__);
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND);
+		flush_workqueue(kbdev->devfreq_queue.workq);
+	}
+#endif
+	return 0;
+}
+
+/**
+ * kbase_device_resume - Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @dev:  The device to resume
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	dev_dbg(dev, "Callback %s\n", __func__);
+	if (kbdev->inited_subsys & inited_devfreq) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.active_count > 0)
+			kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME);
+		mutex_unlock(&kbdev->pm.lock);
+		flush_workqueue(kbdev->devfreq_queue.workq);
+	}
+#endif
+	return 0;
+}
+
+/**
+ * kbase_device_runtime_suspend - Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in
+ * which it will not be able to communicate with the CPU(s) and RAM due to
+ * power management.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	dev_dbg(dev, "Callback %s\n", __func__);
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND);
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/**
+ * kbase_device_runtime_resume - Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	dev_dbg(dev, "Callback %s\n", __func__);
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME);
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	dev_dbg(dev, "Callback %s\n", __func__);
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	/* Just need to update the device's last busy mark. Kernel will respect
+	 * the autosuspend delay and so won't suspend the device immediately.
+	 */
+	pm_runtime_mark_last_busy(kbdev->dev);
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/* The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_register();
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&kbase_platform_driver);
+
+	if (ret)
+		kbase_platform_unregister();
+
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+	kbase_platform_unregister();
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+
+void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value)
+{
+	trace_mali_pm_status(dev_id, event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(dev_id, event,
+		(kctx != NULL ? kctx->tgid : 0),
+		(kctx != NULL ? kctx->pid : 0),
+		atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(dev_id, event, value);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event)
+{
+	trace_mali_total_alloc_pages_change(dev_id, event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
new file mode 100755
index 0000000..bda0560
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_ctx_sched.h"
+
+int kbase_ctx_sched_init(struct kbase_device *kbdev)
+{
+	int as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+
+	kbdev->as_free = as_present; /* All ASs initially free */
+
+	memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx));
+
+	return 0;
+}
+
+void kbase_ctx_sched_term(struct kbase_device *kbdev)
+{
+	s8 i;
+
+	/* Sanity checks */
+	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
+		WARN_ON(kbdev->as_to_kctx[i] != NULL);
+		WARN_ON(!(kbdev->as_free & (1u << i)));
+	}
+}
+
+/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space
+ *
+ * @kbdev: The context for which to find a free address space
+ *
+ * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID
+ *
+ * This function returns an address space available for use. It would prefer
+ * returning an AS that has been previously assigned to the context to
+ * avoid having to reprogram the MMU.
+ */
+static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+	int free_as;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* First check if the previously assigned AS is available */
+	if ((kctx->as_nr != KBASEP_AS_NR_INVALID) &&
+			(kbdev->as_free & (1u << kctx->as_nr)))
+		return kctx->as_nr;
+
+	/* The previously assigned AS was taken, we'll be returning any free
+	 * AS at this point.
+	 */
+	free_as = ffs(kbdev->as_free) - 1;
+	if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces)
+		return free_as;
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->pm.backend.gpu_powered);
+
+	if (atomic_inc_return(&kctx->refcount) == 1) {
+		int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx);
+
+		if (free_as != KBASEP_AS_NR_INVALID) {
+			kbdev->as_free &= ~(1u << free_as);
+			/* Only program the MMU if the context has not been
+			 * assigned the same address space before.
+			 */
+			if (free_as != kctx->as_nr) {
+				struct kbase_context *const prev_kctx =
+					kbdev->as_to_kctx[free_as];
+
+				if (prev_kctx) {
+					WARN_ON(atomic_read(&prev_kctx->refcount) != 0);
+					kbase_mmu_disable(prev_kctx);
+					prev_kctx->as_nr = KBASEP_AS_NR_INVALID;
+				}
+
+				kctx->as_nr = free_as;
+				kbdev->as_to_kctx[free_as] = kctx;
+				kbase_mmu_update(kbdev, &kctx->mmu,
+					kctx->as_nr);
+			}
+		} else {
+			atomic_dec(&kctx->refcount);
+
+			/* Failed to find an available address space, we must
+			 * be returning an error at this point.
+			 */
+			WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID);
+		}
+	}
+
+	return kctx->as_nr;
+}
+
+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	WARN_ON(atomic_read(&kctx->refcount) == 0);
+	WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
+	WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+
+	atomic_inc(&kctx->refcount);
+}
+
+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_dec_return(&kctx->refcount) == 0)
+		kbdev->as_free |= (1u << kctx->as_nr);
+}
+
+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(atomic_read(&kctx->refcount) != 0);
+
+	if (kctx->as_nr != KBASEP_AS_NR_INVALID) {
+		if (kbdev->pm.backend.gpu_powered)
+			kbase_mmu_disable(kctx);
+
+		kbdev->as_to_kctx[kctx->as_nr] = NULL;
+		kctx->as_nr = KBASEP_AS_NR_INVALID;
+	}
+}
+
+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
+{
+	s8 i;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->pm.backend.gpu_powered);
+
+	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
+		struct kbase_context *kctx;
+
+		kctx = kbdev->as_to_kctx[i];
+		if (kctx) {
+			if (atomic_read(&kctx->refcount)) {
+				WARN_ON(kctx->as_nr != i);
+
+				kbase_mmu_update(kbdev, &kctx->mmu,
+					kctx->as_nr);
+			} else {
+				/* This context might have been assigned an
+				 * AS before, clear it.
+				 */
+				kbdev->as_to_kctx[kctx->as_nr] = NULL;
+				kctx->as_nr = KBASEP_AS_NR_INVALID;
+			}
+		} else {
+			kbase_mmu_disable_as(kbdev, i);
+		}
+	}
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
new file mode 100755
index 0000000..ab57a0d
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
@@ -0,0 +1,135 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CTX_SCHED_H_
+#define _KBASE_CTX_SCHED_H_
+
+#include <mali_kbase.h>
+
+/**
+ * The Context Scheduler manages address space assignment and reference
+ * counting to kbase_context. The interface has been designed to minimise
+ * interactions between the Job Scheduler and Power Management/MMU to support
+ * the existing Job Scheduler interface.
+ *
+ * The initial implementation of the Context Scheduler does not schedule
+ * contexts. Instead it relies on the Job Scheduler to make decisions of
+ * when to schedule/evict contexts if address spaces are starved. In the
+ * future, once an interface between the CS and JS has been devised to
+ * provide enough information about how each context is consuming GPU resources,
+ * those decisions can be made in the CS itself, thereby reducing duplicated
+ * code.
+ */
+
+/**
+ * kbase_ctx_sched_init - Initialise the context scheduler
+ * @kbdev: The device for which the context scheduler needs to be initialised
+ *
+ * This must be called during device initialisation. The number of hardware
+ * address spaces must already be established before calling this function.
+ *
+ * Return: 0 for success, otherwise failure
+ */
+int kbase_ctx_sched_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ctx_sched_term - Terminate the context scheduler
+ * @kbdev: The device for which the context scheduler needs to be terminated
+ *
+ * This must be called during device termination after all contexts have been
+ * destroyed.
+ */
+void kbase_ctx_sched_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
+ * @kctx: The context to which to retain a reference
+ *
+ * This function should be called whenever an address space should be assigned
+ * to a context and programmed onto the MMU. It should typically be called
+ * when jobs are ready to be submitted to the GPU.
+ *
+ * It can be called as many times as necessary. The address space will be
+ * assigned to the context for as long as there is a reference to said context.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ *
+ * Return: The address space that the context has been assigned to or
+ *         KBASEP_AS_NR_INVALID if no address space was available.
+ */
+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_retain_ctx_refcount
+ * @kctx: The context to which to retain a reference
+ *
+ * This function only retains a reference to the context. It must be called
+ * only when the context already has a reference.
+ *
+ * This is typically called inside an atomic session where we know the context
+ * is already scheduled in but want to take an extra reference to ensure that
+ * it doesn't get descheduled.
+ *
+ * The kbase_device::hwaccess_lock must be held whilst calling this function
+ */
+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context
+ * @kctx: The context from which to release a reference
+ *
+ * This function should be called whenever an address space could be unassigned
+ * from a context. When there are no more references to said context, the
+ * address space previously assigned to this context shall be reassigned to
+ * other contexts as needed.
+ *
+ * The kbase_device::hwaccess_lock must be held whilst calling this function
+ */
+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space
+ * @kctx: The context to be removed
+ *
+ * This function should be called when a context is being destroyed. The
+ * context must no longer have any reference. If it has been assigned an
+ * address space before then the AS will be unprogrammed.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_restore_all_as - Reprogram all address spaces
+ * @kbdev: The device for which address spaces to be reprogrammed
+ *
+ * This function shall reprogram all address spaces previously assigned to
+ * contexts. It can be used after the GPU is reset.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CTX_SCHED_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100755
index 0000000..118f787
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100755
index 0000000..2fdb72d
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100755
index 0000000..4873474
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,557 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static void kbase_ctx_remove_pending_event(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->kbdev->job_fault_event_lock, flags);
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			list_del(&event->head);
+			spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags);
+
+			wake_up(&kctx->kbdev->job_fault_resume_wq);
+			flush_work(&event->job_fault_work);
+
+			/* job_fault_event_list can only have a single atom for
+			 * each context.
+			 */
+			return;
+		}
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags);
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+static int wait_for_job_fault(struct kbase_device *kbdev)
+{
+#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE && \
+		KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
+	if (!kbase_is_job_fault_event_pending(kbdev))
+		return -EAGAIN;
+	else
+		return 0;
+#else
+	return wait_event_interruptible(kbdev->job_fault_wq,
+			kbase_is_job_fault_event_pending(kbdev));
+#endif
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (list_empty(event_list)) {
+		int err;
+
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+		err = wait_for_job_fault(kbdev);
+		if (err)
+			return err;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING))
+		return false;
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		kbase_backend_cache_clean(kbdev, event->katom);
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.owner = THIS_MODULE,
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx)
+{
+	WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING));
+
+	kbase_ctx_remove_pending_event(kctx);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100755
index 0000000..ef69627
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault
+ *					dumping on context termination.
+ *
+ * This function is called during context termination to unblock the atom for
+ * which the job fault occurred and also the atoms following it. This is needed
+ * otherwise the wait for zero jobs could timeout (leading to an assertion
+ * failure, kernel panic in debug builds) in the pathological case where
+ * although the thread/daemon capturing the job fault events is running,
+ * but for some reasons has stopped consuming the events.
+ *
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100755
index 0000000..c091f16
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,307 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+#if (KERNEL_VERSION(4, 1, 0) > LINUX_VERSION_CODE)
+#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
+#endif
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list)) {
+		kfree(data);
+		return NULL;
+	}
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = as_page(map->alloc->pages[data->offset]);
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_zone_open(struct rb_root *rbtree,
+						struct debug_mem_data *mem_data)
+{
+	int ret = 0;
+	struct rb_node *p;
+	struct kbase_va_region *reg;
+	struct debug_mem_mapping *mapping;
+
+	for (p = rb_first(rbtree); p; p = rb_next(p)) {
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+out:
+	return ret;
+}
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct kbase_context *const kctx = i->i_private;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	if (get_file_rcu(kctx->filp) == 0)
+		return -ENOENT;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		goto open_fail;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	kbase_gpu_vm_lock(kctx);
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+open_fail:
+	fput(kctx->filp);
+
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct kbase_context *const kctx = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx->filp);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.owner = THIS_MODULE,
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+void kbase_debug_mem_view_init(struct kbase_context *const kctx)
+{
+	/* Caller already ensures this, but we keep the pattern for
+	 * maintenance safety.
+	 */
+	if (WARN_ON(!kctx) ||
+		WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
+		return;
+
+	debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100755
index 0000000..b948b7c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015, 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_debug_mem_view_init - Initialize the mem_view sysfs file
+ * @kctx: Pointer to kernel base context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct kbase_context *kctx);
+
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c
new file mode 100644
index 0000000..0df75dd
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c
@@ -0,0 +1,187 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "mali_kbase_debugfs_helper.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/* Arbitrary maximum size to prevent user space allocating too much kernel
+ * memory
+ */
+#define DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE (256u)
+
+/**
+ * set_attr_from_string - Parse a string to set elements of an array
+ *
+ * This is the core of the implementation of
+ * kbase_debugfs_helper_set_attr_from_string. The only difference between the
+ * two functions is that this one requires the input string to be writable.
+ *
+ * @buf:         Input string to parse. Must be nul-terminated!
+ * @array:       Address of an object that can be accessed like an array.
+ * @nelems:      Number of elements in the array.
+ * @set_attr_fn: Function to be called back for each array element.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+static int set_attr_from_string(
+	char *const buf,
+	void *const array, size_t const nelems,
+	kbase_debugfs_helper_set_attr_fn const set_attr_fn)
+{
+	size_t index, err = 0;
+	char *ptr = buf;
+
+	for (index = 0; index < nelems && *ptr; ++index) {
+		unsigned long new_size;
+		size_t len;
+		char sep;
+
+		/* Drop leading spaces */
+		while (*ptr == ' ')
+			ptr++;
+
+		len = strcspn(ptr, "\n ");
+		if (len == 0) {
+			/* No more values (allow this) */
+			break;
+		}
+
+		/* Substitute a nul terminator for a space character
+		 * to make the substring valid for kstrtoul.
+		 */
+		sep = ptr[len];
+		if (sep == ' ')
+			ptr[len++] = '\0';
+
+		err = kstrtoul(ptr, 0, &new_size);
+		if (err)
+			break;
+
+		/* Skip the substring (including any premature nul terminator)
+		 */
+		ptr += len;
+
+		set_attr_fn(array, index, new_size);
+	}
+
+	return err;
+}
+
+int kbase_debugfs_helper_set_attr_from_string(
+	const char *const buf, void *const array, size_t const nelems,
+	kbase_debugfs_helper_set_attr_fn const set_attr_fn)
+{
+	char *const wbuf = kstrdup(buf, GFP_KERNEL);
+	int err = 0;
+
+	if (!wbuf)
+		return -ENOMEM;
+
+	err = set_attr_from_string(wbuf, array, nelems,
+		set_attr_fn);
+
+	kfree(wbuf);
+	return err;
+}
+
+ssize_t kbase_debugfs_helper_get_attr_to_string(
+	char *const buf, size_t const size,
+	void *const array, size_t const nelems,
+	kbase_debugfs_helper_get_attr_fn const get_attr_fn)
+{
+	ssize_t total = 0;
+	size_t index;
+
+	for (index = 0; index < nelems; ++index) {
+		const char *postfix = " ";
+
+		if (index == (nelems-1))
+			postfix = "\n";
+
+		total += scnprintf(buf + total, size - total, "%zu%s",
+				get_attr_fn(array, index), postfix);
+	}
+
+	return total;
+}
+
+int kbase_debugfs_helper_seq_write(struct file *const file,
+	const char __user *const ubuf, size_t const count,
+	size_t const nelems,
+	kbase_debugfs_helper_set_attr_fn const set_attr_fn)
+{
+	const struct seq_file *const sfile = file->private_data;
+	void *const array = sfile->private;
+	int err = 0;
+	char *buf;
+
+	if (WARN_ON(!array))
+		return -EINVAL;
+
+	if (WARN_ON(count > DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE))
+		return -EINVAL;
+
+	buf = kmalloc(count + 1, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, ubuf, count)) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	buf[count] = '\0';
+	err = set_attr_from_string(buf,
+		array, nelems, set_attr_fn);
+	kfree(buf);
+
+	return err;
+}
+
+int kbase_debugfs_helper_seq_read(struct seq_file *const sfile,
+	size_t const nelems,
+	kbase_debugfs_helper_get_attr_fn const get_attr_fn)
+{
+	void *const array = sfile->private;
+	size_t index;
+
+	if (WARN_ON(!array))
+		return -EINVAL;
+
+	for (index = 0; index < nelems; ++index) {
+		const char *postfix = " ";
+
+		if (index == (nelems-1))
+			postfix = "\n";
+
+		seq_printf(sfile, "%zu%s", get_attr_fn(array, index), postfix);
+	}
+	return 0;
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h
new file mode 100644
index 0000000..c3c9efa
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DEBUGFS_HELPER_H_
+#define _KBASE_DEBUGFS_HELPER_H_
+
+/**
+ * typedef kbase_debugfs_helper_set_attr_fn - Type of function to set an
+ *                                            attribute value from an array
+ *
+ * @array: Address of an object that can be accessed like an array.
+ * @index: An element index. The valid range depends on the use-case.
+ * @value: Attribute value to be set.
+ */
+typedef void (*kbase_debugfs_helper_set_attr_fn)(
+	void *array, size_t index, size_t value);
+
+/**
+ * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an
+ *                                             array
+ *
+ * The given function is called once for each attribute value found in the
+ * input string. It is not an error if the string specifies fewer attribute
+ * values than the specified number of array elements.
+ *
+ * The number base of each attribute value is detected automatically
+ * according to the standard rules (e.g. prefix "0x" for hexadecimal).
+ * Attribute values are separated by one or more space characters.
+ * Additional leading and trailing spaces are ignored.
+ *
+ * @buf:         Input string to parse. Must be nul-terminated!
+ * @array:       Address of an object that can be accessed like an array.
+ * @nelems:      Number of elements in the array.
+ * @set_attr_fn: Function to be called back for each array element.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_debugfs_helper_set_attr_from_string(
+	const char *buf, void *array, size_t nelems,
+	kbase_debugfs_helper_set_attr_fn set_attr_fn);
+
+/**
+ * typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an
+ *                                            attribute value from an array
+ *
+ * @array: Address of an object that can be accessed like an array.
+ * @index: An element index. The valid range depends on the use-case.
+ *
+ * Return: Value of attribute.
+ */
+typedef size_t (*kbase_debugfs_helper_get_attr_fn)(
+	void *array, size_t index);
+
+/**
+ * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string
+ *                                           from elements in an array
+ *
+ * The given function is called once for each array element to get the
+ * value of the attribute to be inspected. The attribute values are
+ * written to the buffer as a formatted string of decimal numbers
+ * separated by spaces and terminated by a linefeed.
+ *
+ * @buf:         Buffer in which to store the formatted output string.
+ * @size:        The size of the buffer, in bytes.
+ * @array:       Address of an object that can be accessed like an array.
+ * @nelems:      Number of elements in the array.
+ * @get_attr_fn: Function to be called back for each array element.
+ *
+ * Return: Number of characters written excluding the nul terminator.
+ */
+ssize_t kbase_debugfs_helper_get_attr_to_string(
+	char *buf, size_t size, void *array, size_t nelems,
+	kbase_debugfs_helper_get_attr_fn get_attr_fn);
+
+/**
+ * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an
+ *                                 array
+ *
+ * The virtual file must have been opened by calling single_open and passing
+ * the address of an object that can be accessed like an array.
+ *
+ * The given function is called once for each array element to get the
+ * value of the attribute to be inspected. The attribute values are
+ * written to the buffer as a formatted string of decimal numbers
+ * separated by spaces and terminated by a linefeed.
+ *
+ * @sfile:       A virtual file previously opened by calling single_open.
+ * @nelems:      Number of elements in the array.
+ * @get_attr_fn: Function to be called back for each array element.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_debugfs_helper_seq_read(
+	struct seq_file *const sfile, size_t const nelems,
+	kbase_debugfs_helper_get_attr_fn const get_attr_fn);
+
+/**
+ * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an
+ *                                  array
+ *
+ * The virtual file must have been opened by calling single_open and passing
+ * the address of an object that can be accessed like an array.
+ *
+ * The given function is called once for each attribute value found in the
+ * data written to the virtual file. For further details, refer to the
+ * description of set_attr_from_string.
+ *
+ * @file:        A virtual file previously opened by calling single_open.
+ * @ubuf:        Source address in user space.
+ * @count:       Number of bytes written to the virtual file.
+ * @nelems:      Number of elements in the array.
+ * @set_attr_fn: Function to be called back for each array element.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_debugfs_helper_seq_write(struct file *const file,
+	const char __user *const ubuf, size_t const count,
+	size_t const nelems,
+	kbase_debugfs_helper_set_attr_fn const set_attr_fn);
+
+#endif  /*_KBASE_DEBUGFS_HELPER_H_ */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100755
index 0000000..afd7545
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,2377 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_gpuprops_types.h>
+#include <mali_kbase_hwcnt_backend_gpu.h>
+#include <protected_mode_switcher.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sizes.h>
+
+#ifdef CONFIG_MALI_BUSLOG
+#include <linux/bus_logger.h>
+#endif
+
+#if defined(CONFIG_SYNC)
+#include <sync.h>
+#else
+#include "mali_kbase_fence_defs.h"
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+#include <linux/memory_group_manager.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_LEVEL(x) (x)
+
+#define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(0)
+
+#define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3)
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+/* Maximum number of pages of memory that require a permanent mapping, per
+ * kbase_context
+ */
+#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((1024ul * 1024ul) >> \
+								PAGE_SHIFT)
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps to
+ * disambiguate short-running job chains during soft/hard stopping of jobs
+ */
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+/* Atom is waiting for L2 caches to power up in order to enter protected mode */
+#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+/* Serialize atoms within a slot (ie only one atom per job slot) */
+#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0)
+/* Serialize atoms between slots (ie only one job slot running at any time) */
+#define KBASE_SERIALIZE_INTER_SLOT (1 << 1)
+/* Reset the GPU after each atom completion */
+#define KBASE_SERIALIZE_RESET (1 << 2)
+
+/* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer
+ * clients, to reduce undesired system load.
+ * If a virtualizer client requests a dump within this threshold period after
+ * some other client has performed a dump, a new dump won't be performed and
+ * the accumulated counter values for that client will be returned instead.
+ */
+#define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC)
+
+/* Maximum number of clock/regulator pairs that may be referenced by
+ * the device node.
+ * This is dependent on support for of_property_read_u64_array() in the
+ * kernel.
+ */
+#if (KERNEL_VERSION(4, 0, 0) <= LINUX_VERSION_CODE) || \
+			defined(LSK_OPPV2_BACKPORT)
+#define BASE_MAX_NR_CLOCKS_REGULATORS (2)
+#else
+#define BASE_MAX_NR_CLOCKS_REGULATORS (1)
+#endif
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+struct kbase_ipa_model_vinstr_data;
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * struct base_job_fault_event - keeps track of the atom which faulted or which
+ *                               completed after the faulty atom but before the
+ *                               debug data for faulty atom was dumped.
+ *
+ * @event_code:     event code for the atom, should != BASE_JD_EVENT_DONE for the
+ *                  atom which faulted.
+ * @katom:          pointer to the atom for which job fault occurred or which completed
+ *                  after the faulty atom.
+ * @job_fault_work: work item, queued only for the faulty atom, which waits for
+ *                  the dumping to get completed and then does the bottom half
+ *                  of job done for the atoms which followed the faulty atom.
+ * @head:           List head used to store the atom in the global list of faulty
+ *                  atoms or context specific list of atoms which got completed
+ *                  during the dump.
+ * @reg_offset:     offset of the register to be dumped next, only applicable for
+ *                  the faulty atom.
+ */
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+/**
+ * struct kbase_jd_atom_dependency - Contains the dependency info for an atom.
+ * @atom:          pointer to the dependee atom.
+ * @dep_type:      type of dependency on the dependee @atom, i.e. order or data
+ *                 dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency.
+ */
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * struct kbase_io_access - holds information about 1 register access
+ *
+ * @addr: first bit indicates r/w (r=0, w=1)
+ * @value: value written or read
+ */
+struct kbase_io_access {
+	uintptr_t addr;
+	u32 value;
+};
+
+/**
+ * struct kbase_io_history - keeps track of all recent register accesses
+ *
+ * @enabled: true if register accesses are recorded, false otherwise
+ * @lock: spinlock protecting kbase_io_access array
+ * @count: number of registers read/written
+ * @size: number of elements in kbase_io_access array
+ * @buf: array of kbase_io_access
+ */
+struct kbase_io_history {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool enabled;
+#else
+	u32 enabled;
+#endif
+
+	spinlock_t lock;
+	size_t count;
+	u16 size;
+	struct kbase_io_access *buf;
+};
+
+/**
+ * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the
+ *                           dependee atom.
+ * @dep:   pointer to the dependency info structure.
+ *
+ * Return: readonly reference to dependee atom.
+ */
+static inline const struct kbase_jd_atom *
+kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom *)(dep->atom);
+}
+
+/**
+ * kbase_jd_katom_dep_type -  Retrieves the dependency type info
+ *
+ * @dep:   pointer to the dependency info structure.
+ *
+ * Return: the type of dependency there is on the dependee atom.
+ */
+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * kbase_jd_katom_dep_set - sets up the dependency info structure
+ *                          as per the values passed.
+ * @const_dep:    pointer to the dependency info structure to be setup.
+ * @a:            pointer to the dependee atom.
+ * @type:         type of dependency there is on the dependee atom.
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * kbase_jd_katom_dep_clear - resets the dependency info structure
+ *
+ * @const_dep:    pointer to the dependency info structure to be setup.
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+/**
+ * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it becomes
+ *                                runnable, with respect to job slot ringbuffer/fifo.
+ * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, which
+ *                                implies that either atom has not become runnable
+ *                                due to dependency or has completed the execution
+ *                                on GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is blocked
+ *                                due to cross slot dependency, can't be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot fifo but
+ *                                is waiting for the completion of previously added atoms
+ *                                in current & other slots, as their protected mode
+ *                                requirements do not match with the current atom.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo and is
+ *                                waiting for completion of protected mode transition,
+ *                                needed before the atom is submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is waiting
+ *                                for the cores, which are needed to execute the job
+ *                                chain represented by the atom, to become available
+ * @KBASE_ATOM_GPU_RB_WAITING_AFFINITY: Atom is in slot fifo but is blocked on
+ *                                affinity due to rmu workaround for Hw issue 8987.
+ * @KBASE_ATOM_GPU_RB_READY:      Atom is in slot fifo and can be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_SUBMITTED:  Atom is in slot fifo and has been submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some failure,
+ *                                but only after the previously added atoms in fifo
+ *                                have completed or have also been returned to JS.
+ */
+enum kbase_atom_gpu_rb_state {
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	KBASE_ATOM_GPU_RB_READY,
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
+};
+
+/**
+ * enum kbase_atom_enter_protected_state - The state of an atom with respect to the
+ *                      preparation for GPU's entry into protected mode, becomes
+ *                      pertinent only after atom's state with respect to slot
+ *                      ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_ENTER_PROTECTED_CHECK:  Starting state. Check if there are any atoms
+ *                      currently submitted to GPU and protected mode transition is
+ *                      not already in progress.
+ * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to
+ *                      become disabled before entry into protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
+ *                      for the coherency change. L2 shall be powered down and GPU shall
+ *                      come out of fully coherent mode before entering protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change;
+ *                      for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on so that
+ *                      coherency register contains correct value when GPU enters
+ *                      protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for BASE_HW_ISSUE_TGOX_R1_1234 check
+ *                      that L2 is powered up and switch GPU to protected mode.
+ */
+enum kbase_atom_enter_protected_state {
+	/**
+	 * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
+	KBASE_ATOM_ENTER_PROTECTED_HWCNT,
+	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
+	KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY,
+	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
+};
+
+/**
+ * enum kbase_atom_exit_protected_state - The state of an atom with respect to the
+ *                      preparation for GPU's exit from protected mode, becomes
+ *                      pertinent only after atom's state with respect to slot
+ *                      ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any atoms
+ *                      currently submitted to GPU and protected mode transition is
+ *                      not already in progress.
+ * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
+ *                      for the reset, as exiting protected mode requires a reset.
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from protected mode
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to complete
+ */
+enum kbase_atom_exit_protected_state {
+	/**
+	 * NOTE: The integer value of this must match KBASE_ATOM_ENTER_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
+	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+	KBASE_ATOM_EXIT_PROTECTED_RESET,
+	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
+/**
+ * struct kbase_ext_res - Contains the info for external resources referred
+ *                        by an atom, which have been mapped on GPU side.
+ * @gpu_address:          Start address of the memory region allocated for
+ *                        the resource from GPU virtual address space.
+ * @alloc:                pointer to physical pages tracking object, set on
+ *                        mapping the external resource on GPU side.
+ */
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+/**
+ * struct kbase_jd_atom  - object representing the atom, containing the complete
+ *                         state and attributes of an atom.
+ * @work:                  work item for the bottom half processing of the atom,
+ *                         by JD or JS, after it got executed on GPU or the input
+ *                         fence got signaled
+ * @start_timestamp:       time at which the atom was submitted to the GPU, by
+ *                         updating the JS_HEAD_NEXTn register.
+ * @udata:                 copy of the user data sent for the atom in base_jd_submit.
+ * @kctx:                  Pointer to the base context with which the atom is associated.
+ * @dep_head:              Array of 2 list heads, pointing to the two list of atoms
+ *                         which are blocked due to dependency on this atom.
+ * @dep_item:              Array of 2 list heads, used to store the atom in the list of
+ *                         other atoms depending on the same dependee atom.
+ * @dep:                   Array containing the dependency info for the 2 atoms on which
+ *                         the atom depends upon.
+ * @jd_item:               List head used during job dispatch job_done processing - as
+ *                         dependencies may not be entirely resolved at this point,
+ *                         we need to use a separate list head.
+ * @in_jd_list:            flag set to true if atom's @jd_item is currently on a list,
+ *                         prevents atom being processed twice.
+ * @nr_extres:             number of external resources referenced by the atom.
+ * @extres:                pointer to the location containing info about @nr_extres
+ *                         external resources referenced by the atom.
+ * @device_nr:             indicates the coregroup with which the atom is associated,
+ *                         when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified.
+ * @jc:                    GPU address of the job-chain.
+ * @softjob_data:          Copy of data read from the user space buffer that @jc
+ *                         points to.
+ * @fence:                 Stores either an input or output sync fence, depending
+ *                         on soft-job type
+ * @sync_waiter:           Pointer to the sync fence waiter structure passed to the
+ *                         callback function on signaling of the input fence.
+ * @dma_fence:             object containing pointers to both input & output fences
+ *                         and other related members used for explicit sync through
+ *                         soft jobs and for the implicit synchronization required
+ *                         on access to external resources.
+ * @event_code:            Event code for the job chain represented by the atom, both
+ *                         HW and low-level SW events are represented by event codes.
+ * @core_req:              bitmask of BASE_JD_REQ_* flags specifying either Hw or Sw
+ *                         requirements for the job chain represented by the atom.
+ * @ticks:                 Number of scheduling ticks for which atom has been running
+ *                         on the GPU.
+ * @sched_priority:        Priority of the atom for Job scheduling, as per the
+ *                         KBASE_JS_ATOM_SCHED_PRIO_*.
+ * @poking:                Indicates whether poking of MMU is ongoing for the atom,
+ *                         as a WA for the issue HW_ISSUE_8316.
+ * @completed:             Wait queue to wait upon for the completion of atom.
+ * @status:                Indicates at high level at what stage the atom is in,
+ *                         as per KBASE_JD_ATOM_STATE_*, that whether it is not in
+ *                         use or its queued in JD or given to JS or submitted to Hw
+ *                         or it completed the execution on Hw.
+ * @work_id:               used for GPU tracepoints, its a snapshot of the 'work_id'
+ *                         counter in kbase_jd_context which is incremented on
+ *                         every call to base_jd_submit.
+ * @slot_nr:               Job slot chosen for the atom.
+ * @atom_flags:            bitmask of KBASE_KATOM_FLAG* flags capturing the exact
+ *                         low level state of the atom.
+ * @gpu_rb_state:          bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking
+ *                         atom's state after it has entered Job scheduler on becoming
+ *                         runnable. Atom could be blocked due to cross slot dependency
+ *                         or waiting for the shader cores to become available or
+ *                         waiting for protected mode transitions to complete.
+ * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU
+ *                         cache is needed for the atom and the shader cores used
+ *                         for atom have been kept on.
+ * @blocked:               flag indicating that atom's resubmission to GPU is
+ *                         blocked till the work item is scheduled to return the
+ *                         atom to JS.
+ * @pre_dep:               Pointer to atom that this atom has same-slot dependency on
+ * @post_dep:              Pointer to atom that has same-slot dependency on this atom
+ * @x_pre_dep:             Pointer to atom that this atom has cross-slot dependency on
+ * @x_post_dep:            Pointer to atom that has cross-slot dependency on this atom
+ * @flush_id:              The GPU's flush count recorded at the time of submission,
+ *                         used for the cache flush optimisation
+ * @fault_event:           Info for dumping the debug data on Job fault.
+ * @queue:                 List head used for 4 different purposes :
+ *                         Adds atom to the list of dma-buf fence waiting atoms.
+ *                         Adds atom to the list of atoms blocked due to cross
+ *                         slot dependency.
+ *                         Adds atom to the list of softjob atoms for which JIT
+ *                         allocation has been deferred
+ *                         Adds atom to the list of softjob atoms waiting for the
+ *                         signaling of fence.
+ * @jit_node:              Used to keep track of all JIT free/alloc jobs in submission order
+ * @jit_blocked:           Flag indicating that JIT allocation requested through
+ *                         softjob atom will be reattempted after the impending
+ *                         free of other active JIT allocations.
+ * @will_fail_event_code:  If non-zero, this indicates that the atom will fail
+ *                         with the set event_code when the atom is processed.
+ *                         Used for special handling of atoms, which have a data
+ *                         dependency on the failed atoms.
+ * @protected_state:       State of the atom, as per KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*,
+ *                         when transitioning into or out of protected mode. Atom will
+ *                         be either entering or exiting the protected mode.
+ * @runnable_tree_node:    The node added to context's job slot specific rb tree
+ *                         when the atom becomes runnable.
+ * @age:                   Age of atom relative to other atoms in the context, is
+ *                         snapshot of the age_count counter in kbase context.
+ */
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+	struct list_head jd_item;
+	bool in_jd_list;
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 jc;
+	void *softjob_data;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+	struct {
+		/* Use the functions/API defined in mali_kbase_fence.h to
+		 * when working with this sub struct */
+#if defined(CONFIG_SYNC_FILE)
+		/* Input fence */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+		struct fence *fence_in;
+#else
+		struct dma_fence *fence_in;
+#endif
+#endif
+		/* This points to the dma-buf output fence for this atom. If
+		 * this is NULL then there is no fence for this atom and the
+		 * following fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+		struct fence *fence;
+#else
+		struct dma_fence *fence;
+#endif
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;
+
+	u32 ticks;
+	int sched_priority;
+
+	int poking;
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	int slot_nr;
+
+	u32 atom_flags;
+
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	bool need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	struct kbase_jd_atom *pre_dep;
+	struct kbase_jd_atom *post_dep;
+
+	struct kbase_jd_atom *x_pre_dep;
+	struct kbase_jd_atom *x_post_dep;
+
+	u32 flush_id;
+
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	struct list_head queue;
+
+	struct list_head jit_node;
+	bool jit_blocked;
+
+	enum base_jd_event_code will_fail_event_code;
+
+	union {
+		enum kbase_atom_enter_protected_state enter;
+		enum kbase_atom_exit_protected_state exit;
+	} protected_state;
+
+	struct rb_node runnable_tree_node;
+
+	u32 age;
+};
+
+/**
+ * struct kbase_debug_copy_buffer - information about the buffer to be copied.
+ *
+ * @size:	size of the buffer in bytes
+ * @pages:	pointer to an array of pointers to the pages which contain
+ *		the buffer
+ * @is_vmalloc: true if @pages was allocated with vzalloc. false if @pages was
+ *              allocated with kcalloc
+ * @nr_pages:	number of pages
+ * @offset:	offset into the pages
+ * @gpu_alloc:	pointer to physical memory allocated by the GPU
+ * @extres_pages: array of pointers to the pages containing external resources
+ *		for this buffer
+ * @nr_extres_pages: number of pages in @extres_pages
+ */
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	bool is_vmalloc;
+	int nr_pages;
+	size_t offset;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+/**
+ * struct kbase_jd_context  - per context object encapsulating all the Job dispatcher
+ *                            related state.
+ * @lock:                     lock to serialize the updates made to the Job dispatcher
+ *                            state and kbase_jd_atom objects.
+ * @sched_info:               Structure encapsulating all the Job scheduling info.
+ * @atoms:                    Array of the objects representing atoms, containing
+ *                            the complete state and attributes of an atom.
+ * @job_nr:                   Tracks the number of atoms being processed by the
+ *                            kbase. This includes atoms that are not tracked by
+ *                            scheduler: 'not ready to run' & 'dependency-only' jobs.
+ * @zero_jobs_wait:           Waitq that reflects whether there are no jobs
+ *                            (including SW-only dependency jobs). This is set
+ *                            when no jobs are present on the ctx, and clear when
+ *                            there are jobs.
+ *                            This must be updated atomically with @job_nr.
+ *                            note: Job Dispatcher knows about more jobs than the
+ *                            Job Scheduler as it is unaware of jobs that are
+ *                            blocked on dependencies and SW-only dependency jobs.
+ *                            This waitq can be waited upon to find out when the
+ *                            context jobs are all done/cancelled (including those
+ *                            that might've been blocked on dependencies) - and so,
+ *                            whether it can be terminated. However, it should only
+ *                            be terminated once it is not present in the run-pool.
+ *                            Since the waitq is only set under @lock, the waiter
+ *                            should also briefly obtain and drop @lock to guarantee
+ *                            that the setter has completed its work on the kbase_context
+ * @job_done_wq:              Workqueue to which the per atom work item is queued
+ *                            for bottom half processing when the atom completes
+ *                            execution on GPU or the input fence get signaled.
+ * @tb_lock:                  Lock to serialize the write access made to @tb to
+ *                            to store the register access trace messages.
+ * @tb:                       Pointer to the Userspace accessible buffer storing
+ *                            the trace messages for register read/write accesses
+ *                            made by the Kbase. The buffer is filled in circular
+ *                            fashion.
+ * @tb_wrap_offset:           Offset to the end location in the trace buffer, the
+ *                            write pointer is moved to the beginning on reaching
+ *                            this offset.
+ * @work_id:                  atomic variable used for GPU tracepoints, incremented
+ *                            on every call to base_jd_submit.
+ */
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	u32 job_nr;
+
+	wait_queue_head_t zero_jobs_wait;
+
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * struct kbase_fault - object containing data relating to a page or bus fault.
+ * @addr:           Records the faulting address.
+ * @extra_addr:     Records the secondary fault address.
+ * @status:         Records the fault status as reported by Hw.
+ * @protected_mode: Flag indicating whether the fault occurred in protected mode
+ *                  or not.
+ */
+struct kbase_fault {
+	u64 addr;
+	u64 extra_addr;
+	u32 status;
+	bool protected_mode;
+};
+
+/**
+ * struct kbase_as   - object representing an address space of GPU.
+ * @number:            Index at which this address space structure is present
+ *                     in an array of address space structures embedded inside the
+ *                     struct kbase_device.
+ * @pf_wq:             Workqueue for processing work items related to Bus fault
+ *                     and Page fault handling.
+ * @work_pagefault:    Work item for the Page fault handling.
+ * @work_busfault:     Work item for the Bus fault handling.
+ * @fault_type:        Type of fault which occured for this address space,
+ *                     regular/unexpected Bus or Page fault.
+ * @pf_data:           Data relating to page fault.
+ * @bf_data:           Data relating to bus fault.
+ * @current_setup:     Stores the MMU configuration for this address space.
+ * @poke_wq:           Workqueue to process the work items queue for poking the
+ *                     MMU as a WA for BASE_HW_ISSUE_8316.
+ * @poke_work:         Work item to do the poking of MMU for this address space.
+ * @poke_refcount:     Refcount for the need of poking MMU. While the refcount is
+ *                     non zero the poking of MMU will continue.
+ *                     Protected by hwaccess_lock.
+ * @poke_state:        State indicating whether poking is in progress or it has
+ *                     been stopped. Protected by hwaccess_lock.
+ * @poke_timer:        Timer used to schedule the poking at regular intervals.
+ */
+struct kbase_as {
+	int number;
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	struct kbase_fault pf_data;
+	struct kbase_fault bf_data;
+	struct kbase_mmu_setup current_setup;
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	int poke_refcount;
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+/**
+ * struct kbase_mmu_table  - object representing a set of GPU page tables
+ * @mmu_teardown_pages:   Buffer of 4 Pages in size, used to cache the entries
+ *                        of top & intermediate level page tables to avoid
+ *                        repeated calls to kmap_atomic during the MMU teardown.
+ * @mmu_lock:             Lock to serialize the accesses made to multi level GPU
+ *                        page tables
+ * @pgd:                  Physical address of the page allocated for the top
+ *                        level page table of the context, this is used for
+ *                        MMU HW programming as the address translation will
+ *                        start from the top level page table.
+ * @group_id:             A memory group ID to be passed to a platform-specific
+ *                        memory group manager.
+ *                        Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @kctx:                 If this set of MMU tables belongs to a context then
+ *                        this is a back-reference to the context, otherwise
+ *                        it is NULL
+ */
+struct kbase_mmu_table {
+	u64 *mmu_teardown_pages;
+	struct mutex mmu_lock;
+	phys_addr_t pgd;
+	u8 group_id;
+	struct kbase_context *kctx;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+/**
+ * struct kbase_trace - object representing a trace message added to trace buffer
+ *                      kbase_device::trace_rbuf
+ * @timestamp:          CPU timestamp at which the trace message was added.
+ * @thread_id:          id of the thread in the context of which trace message
+ *                      was added.
+ * @cpu:                indicates which CPU the @thread_id was scheduled on when
+ *                      the trace message was added.
+ * @ctx:                Pointer to the kbase context for which the trace message
+ *                      was added. Will be NULL for certain trace messages like
+ *                      for traces added corresponding to power management events.
+ *                      Will point to the appropriate context corresponding to
+ *                      job-slot & context's reference count related events.
+ * @katom:              indicates if the trace message has atom related info.
+ * @atom_number:        id of the atom for which trace message was added.
+ *                      Only valid if @katom is true.
+ * @atom_udata:         Copy of the user data sent for the atom in base_jd_submit.
+ *                      Only valid if @katom is true.
+ * @gpu_addr:           GPU address of the job-chain represented by atom. Could
+ *                      be valid even if @katom is false.
+ * @info_val:           value specific to the type of event being traced. For the
+ *                      case where @katom is true, will be set to atom's affinity,
+ *                      i.e. bitmask of shader cores chosen for atom's execution.
+ * @code:               Identifies the event, refer enum kbase_trace_code.
+ * @jobslot:            job-slot for which trace message was added, valid only for
+ *                      job-slot management events.
+ * @refcount:           reference count for the context, valid for certain events
+ *                      related to scheduler core and policy.
+ * @flags:              indicates if info related to @jobslot & @refcount is present
+ *                      in the trace message, used during dumping of the message.
+ */
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/**
+	 * The reference count of active contexts on this device. Note that
+	 * some code paths keep shaders/the tiler powered whilst this is 0. Use
+	 * kbase_pm_is_active() instead to check for such cases.
+	 */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:        Kbase device where memory is used
+ * @cur_size:     Number of free pages currently in the pool (may exceed
+ *                @max_size in some corner cases)
+ * @max_size:     Maximum number of free pages in the pool
+ * @order:        order = 0 refers to a pool of 4 KB pages
+ *                order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
+ * @group_id:     A memory group ID to be passed to a platform-specific
+ *                memory group manager, if present. Immutable.
+ *                Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @pool_lock:    Lock protecting the pool - must be held when modifying
+ *                @cur_size and @page_list
+ * @page_list:    List of free pages in the pool
+ * @reclaim:      Shrinker for kernel reclaim of free pages
+ * @next_pool:    Pointer to next pool where pages can be allocated when this
+ *                pool is empty. Pages will spill over to the next pool when
+ *                this pool is full. Can be NULL if there is no next pool.
+ * @dying:        true if the pool is being terminated, and any ongoing
+ *                operations should be abandoned
+ * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from
+ *                this pool, eg during a grow operation
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	u8                  order;
+	u8                  group_id;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+
+	bool dying;
+	bool dont_reclaim;
+};
+
+/**
+ * struct kbase_mem_pool_group - a complete set of physical memory pools.
+ *
+ * Memory pools are used to allow efficient reallocation of previously-freed
+ * physical pages. A pair of memory pools is initialized for each physical
+ * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays
+ * should be indexed by physical memory group ID, the meaning of which is
+ * defined by the systems integrator.
+ *
+ * @small: Array of objects containing the state for pools of 4 KiB size
+ *         physical pages.
+ * @large: Array of objects containing the state for pools of 2 MiB size
+ *         physical pages.
+ */
+struct kbase_mem_pool_group {
+	struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS];
+	struct kbase_mem_pool large[MEMORY_GROUP_MANAGER_NR_GROUPS];
+};
+
+/**
+ * struct kbase_mem_pool_config - Initial configuration for a physical memory
+ *                                pool
+ *
+ * @max_size: Maximum number of free pages that the pool can hold.
+ */
+struct kbase_mem_pool_config {
+	size_t max_size;
+};
+
+/**
+ * struct kbase_mem_pool_group_config - Initial configuration for a complete
+ *                                      set of physical memory pools
+ *
+ * This array should be indexed by physical memory group ID, the meaning
+ * of which is defined by the systems integrator.
+ *
+ * @small: Array of initial configuration for pools of 4 KiB pages.
+ * @large: Array of initial configuration for pools of 2 MiB pages.
+ */
+struct kbase_mem_pool_group_config {
+	struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS];
+	struct kbase_mem_pool_config large[MEMORY_GROUP_MANAGER_NR_GROUPS];
+};
+
+/**
+ * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP
+ *                            frequency, real frequencies and core mask
+ * @real_freqs: Real GPU frequencies.
+ * @opp_volts: OPP voltages.
+ * @opp_freq:  Nominal OPP frequency
+ * @core_mask: Shader core mask
+ */
+struct kbase_devfreq_opp {
+	u64 opp_freq;
+	u64 core_mask;
+	u64 real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
+	u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS];
+};
+
+/* MMU mode flags */
+#define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */
+
+/**
+ * struct kbase_mmu_mode - object containing pointer to methods invoked for
+ *                         programming the MMU, as per the MMU mode supported
+ *                         by Hw.
+ * @update:           enable & setup/configure one of the GPU address space.
+ * @get_as_setup:     retrieve the configuration of one of the GPU address space.
+ * @disable_as:       disable one of the GPU address space.
+ * @pte_to_phy_addr:  retrieve the physical address encoded in the page table entry.
+ * @ate_is_valid:     check if the pte is a valid address translation entry
+ *                    encoding the physical address of the actual mapped page.
+ * @pte_is_valid:     check if the pte is a valid entry encoding the physical
+ *                    address of the next lower level page table.
+ * @entry_set_ate:    program the pte to be a valid address translation entry to
+ *                    encode the physical address of the actual page being mapped.
+ * @entry_set_pte:    program the pte to be a valid entry to encode the physical
+ *                    address of the next lower level page table.
+ * @entry_invalidate: clear out or invalidate the pte.
+ * @flags:            bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants.
+ */
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_device *kbdev,
+			struct kbase_mmu_table *mmut,
+			int as_nr);
+	void (*get_as_setup)(struct kbase_mmu_table *mmut,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate, int level);
+	int (*pte_is_valid)(u64 pte, int level);
+	void (*entry_set_ate)(u64 *entry, struct tagged_addr phy,
+			unsigned long flags, int level);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+	unsigned long flags;
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+
+
+#define DEVNAME_SIZE	16
+
+/**
+ * enum kbase_devfreq_work_type - The type of work to perform in the devfreq
+ *                                suspend/resume worker.
+ * @DEVFREQ_WORK_NONE:    Initilisation state.
+ * @DEVFREQ_WORK_SUSPEND: Call devfreq_suspend_device().
+ * @DEVFREQ_WORK_RESUME:  Call devfreq_resume_device().
+ */
+enum kbase_devfreq_work_type {
+	DEVFREQ_WORK_NONE,
+	DEVFREQ_WORK_SUSPEND,
+	DEVFREQ_WORK_RESUME
+};
+
+/**
+ * struct kbase_devfreq_queue_info - Object representing an instance for managing
+ *                                   the queued devfreq suspend/resume works.
+ * @workq:                 Workqueue for devfreq suspend/resume requests
+ * @work:                  Work item for devfreq suspend & resume
+ * @req_type:              Requested work type to be performed by the devfreq
+ *                         suspend/resume worker
+ * @acted_type:            Work type has been acted on by the worker, i.e. the
+ *                         internal recorded state of the suspend/resume
+ */
+struct kbase_devfreq_queue_info {
+	struct workqueue_struct *workq;
+	struct work_struct work;
+	enum kbase_devfreq_work_type req_type;
+	enum kbase_devfreq_work_type acted_type;
+};
+
+/**
+ * struct kbase_device   - Object representing an instance of GPU platform device,
+ *                         allocated from the probe method of mali driver.
+ * @hw_quirks_sc:          Configuration to be used for the shader cores as per
+ *                         the HW issues present in the GPU.
+ * @hw_quirks_tiler:       Configuration to be used for the Tiler as per the HW
+ *                         issues present in the GPU.
+ * @hw_quirks_mmu:         Configuration to be used for the MMU as per the HW
+ *                         issues present in the GPU.
+ * @hw_quirks_jm:          Configuration to be used for the Job Manager as per
+ *                         the HW issues present in the GPU.
+ * @entry:                 Links the device instance to the global list of GPU
+ *                         devices. The list would have as many entries as there
+ *                         are GPU device instances.
+ * @dev:                   Pointer to the kernel's generic/base representation
+ *                         of the GPU platform device.
+ * @mdev:                  Pointer to the miscellaneous device registered to
+ *                         provide Userspace access to kernel driver through the
+ *                         device file /dev/malixx.
+ * @reg_start:             Base address of the region in physical address space
+ *                         where GPU registers have been mapped.
+ * @reg_size:              Size of the region containing GPU registers
+ * @reg:                   Kernel virtual address of the region containing GPU
+ *                         registers, using which Driver will access the registers.
+ * @irqs:                  Array containing IRQ resource info for 3 types of
+ *                         interrupts : Job scheduling, MMU & GPU events (like
+ *                         power management, cache etc.)
+ * @clocks:                Pointer to the input clock resources referenced by
+ *                         the GPU device node.
+ * @nr_clocks:             Number of clocks set in the clocks array.
+ * @regulators:            Pointer to the structs corresponding to the
+ *                         regulators referenced by the GPU device node.
+ * @nr_regulators:         Number of regulators set in the regulators array.
+ * @opp_table:             Pointer to the device OPP structure maintaining the
+ *                         link to OPPs attached to a device. This is obtained
+ *                         after setting regulator names for the device.
+ * @devname:               string containing the name used for GPU device instance,
+ *                         miscellaneous device is registered using the same name.
+ * @id:                    Unique identifier for the device, indicates the number of
+ *                         devices which have been created so far.
+ * @model:                 Pointer, valid only when Driver is compiled to not access
+ *                         the real GPU Hw, to the dummy model which tries to mimic
+ *                         to some extent the state & behavior of GPU Hw in response
+ *                         to the register accesses made by the Driver.
+ * @irq_slab:              slab cache for allocating the work items queued when
+ *                         model mimics raising of IRQ to cause an interrupt on CPU.
+ * @irq_workq:             workqueue for processing the irq work items.
+ * @serving_job_irq:       function to execute work items queued when model mimics
+ *                         the raising of JS irq, mimics the interrupt handler
+ *                         processing JS interrupts.
+ * @serving_gpu_irq:       function to execute work items queued when model mimics
+ *                         the raising of GPU irq, mimics the interrupt handler
+ *                         processing GPU interrupts.
+ * @serving_mmu_irq:       function to execute work items queued when model mimics
+ *                         the raising of MMU irq, mimics the interrupt handler
+ *                         processing MMU interrupts.
+ * @reg_op_lock:           lock used by model to serialize the handling of register
+ *                         accesses made by the driver.
+ * @pm:                    Per device object for storing data for power management
+ *                         framework.
+ * @js_data:               Per device object encapsulating the current context of
+ *                         Job Scheduler, which is global to the device and is not
+ *                         tied to any particular struct kbase_context running on
+ *                         the device
+ * @mem_pools:             Global pools of free physical memory pages which can
+ *                         be used by all the contexts.
+ * @memdev:                keeps track of the in use physical pages allocated by
+ *                         the Driver.
+ * @mmu_mode:              Pointer to the object containing methods for programming
+ *                         the MMU, depending on the type of MMU supported by Hw.
+ * @mgm_dev:               Pointer to the memory group manager device attached
+ *                         to the GPU device. This points to an internal memory
+ *                         group manager if no platform-specific memory group
+ *                         manager was retrieved through device tree.
+ * @as:                    Array of objects representing address spaces of GPU.
+ * @as_free:               Bitpattern of free/available GPU address spaces.
+ * @as_to_kctx:            Array of pointers to struct kbase_context, having
+ *                         GPU adrress spaces assigned to them.
+ * @mmu_mask_change:       Lock to serialize the access to MMU interrupt mask
+ *                         register used in the handling of Bus & Page faults.
+ * @gpu_props:             Object containing complete information about the
+ *                         configuration/properties of GPU HW device in use.
+ * @hw_issues_mask:        List of SW workarounds for HW issues
+ * @hw_features_mask:      List of available HW features.
+ * @disjoint_event:        struct for keeping track of the disjoint information,
+ *                         that whether the GPU is in a disjoint state and the
+ *                         number of disjoint events that have occurred on GPU.
+ * @nr_hw_address_spaces:  Number of address spaces actually available in the
+ *                         GPU, remains constant after driver initialisation.
+ * @nr_user_address_spaces: Number of address spaces available to user contexts
+ * @hwcnt:                  Structure used for instrumentation and HW counters
+ *                         dumping
+ * @hwcnt_gpu_iface:       Backend interface for GPU hardware counter access.
+ * @hwcnt_gpu_ctx:         Context for GPU hardware counter access.
+ *                         @hwaccess_lock must be held when calling
+ *                         kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx.
+ * @hwcnt_gpu_virt:        Virtualizer for GPU hardware counters.
+ * @vinstr_ctx:            vinstr context created per device.
+ * @timeline_is_enabled:   Non zero, if there is at least one timeline client,
+ *                         zero otherwise.
+ * @timeline:              Timeline context created per device.
+ * @trace_lock:            Lock to serialize the access to trace buffer.
+ * @trace_first_out:       Index/offset in the trace buffer at which the first
+ *                         unread message is present.
+ * @trace_next_in:         Index/offset in the trace buffer at which the new
+ *                         message will be written.
+ * @trace_rbuf:            Pointer to the buffer storing debug messages/prints
+ *                         tracing the various events in Driver.
+ *                         The buffer is filled in circular fashion.
+ * @reset_timeout_ms:      Number of milliseconds to wait for the soft stop to
+ *                         complete for the GPU jobs before proceeding with the
+ *                         GPU reset.
+ * @cache_clean_in_progress: Set when a cache clean has been started, and
+ *                         cleared when it has finished. This prevents multiple
+ *                         cache cleans being done simultaneously.
+ * @cache_clean_queued:    Set if a cache clean is invoked while another is in
+ *                         progress. If this happens, another cache clean needs
+ *                         to be triggered immediately after completion of the
+ *                         current one.
+ * @cache_clean_wait:      Signalled when a cache clean has finished.
+ * @platform_context:      Platform specific private data to be accessed by
+ *                         platform specific config files only.
+ * @kctx_list:             List of kbase_contexts created for the device,
+ *                         including any contexts that might be created for
+ *                         hardware counters.
+ * @kctx_list_lock:        Lock protecting concurrent accesses to @kctx_list.
+ * @devfreq_profile:       Describes devfreq profile for the Mali GPU device, passed
+ *                         to devfreq_add_device() to add devfreq feature to Mali
+ *                         GPU device.
+ * @devfreq:               Pointer to devfreq structure for Mali GPU device,
+ *                         returned on the call to devfreq_add_device().
+ * @current_freqs:         The real frequencies, corresponding to
+ *                         @current_nominal_freq, at which the Mali GPU device
+ *                         is currently operating, as retrieved from
+ *                         @devfreq_table in the target callback of
+ *                         @devfreq_profile.
+ * @current_nominal_freq:  The nominal frequency currently used for the Mali GPU
+ *                         device as retrieved through devfreq_recommended_opp()
+ *                         using the freq value passed as an argument to target
+ *                         callback of @devfreq_profile
+ * @current_voltages:      The voltages corresponding to @current_nominal_freq,
+ *                         as retrieved from @devfreq_table in the target
+ *                         callback of @devfreq_profile.
+ * @current_core_mask:     bitmask of shader cores that are currently desired &
+ *                         enabled, corresponding to @current_nominal_freq as
+ *                         retrieved from @devfreq_table in the target callback
+ *                         of @devfreq_profile.
+ * @devfreq_table:         Pointer to the lookup table for converting between
+ *                         nominal OPP (operating performance point) frequency,
+ *                         and real frequency and core mask. This table is
+ *                         constructed according to operating-points-v2-mali
+ *                         table in devicetree.
+ * @num_opps:              Number of operating performance points available for the Mali
+ *                         GPU device.
+ * @devfreq_queue:         Per device object for storing data that manages devfreq
+ *                         suspend & resume request queue and the related items.
+ * @devfreq_cooling:       Pointer returned on registering devfreq cooling device
+ *                         corresponding to @devfreq.
+ * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected
+ *                         mode. It is a sticky flag which is cleared by IPA
+ *                         once it has made use of information that GPU had
+ *                         previously entered protected mode.
+ * @ipa:                   Top level structure for IPA, containing pointers to both
+ *                         configured & fallback models.
+ * @job_fault_debug:       Flag to control the dumping of debug data for job faults,
+ *                         set when the 'job_fault' debugfs file is opened.
+ * @mali_debugfs_directory: Root directory for the debugfs files created by the driver
+ * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing
+ *                         a sub-directory for every context.
+ * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault
+ *                         has occurred.
+ * @job_fault_wq:          Waitqueue to block the job fault dumping daemon till the
+ *                         occurrence of a job fault.
+ * @job_fault_resume_wq:   Waitqueue on which every context with a faulty job wait
+ *                         for the job fault dumping to complete before they can
+ *                         do bottom half of job done for the atoms which followed
+ *                         the faulty atom.
+ * @job_fault_resume_workq: workqueue to process the work items queued for the faulty
+ *                         atoms, whereby the work item function waits for the dumping
+ *                         to get completed.
+ * @job_fault_event_list:  List of atoms, each belonging to a different context, which
+ *                         generated a job fault.
+ * @job_fault_event_lock:  Lock to protect concurrent accesses to @job_fault_event_list
+ * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs
+ *                         file "read_register".
+ * @ctx_num:               Total number of contexts created for the device.
+ * @io_history:            Pointer to an object keeping a track of all recent
+ *                         register accesses. The history of register accesses
+ *                         can be read through "regs_history" debugfs file.
+ * @hwaccess:              Contains a pointer to active kbase context and GPU
+ *                         backend specific data for HW access layer.
+ * @faults_pending:        Count of page/bus faults waiting for bottom half processing
+ *                         via workqueues.
+ * @poweroff_pending:      Set when power off operation for GPU is started, reset when
+ *                         power on for GPU is started.
+ * @infinite_cache_active_default: Set to enable using infinite cache for all the
+ *                         allocations of a new context.
+ * @mem_pool_defaults:     Default configuration for the group of memory pools
+ *                         created for a new context.
+ * @current_gpu_coherency_mode: coherency mode in use, which can be different
+ *                         from @system_coherency, when using protected mode.
+ * @system_coherency:      coherency mode as retrieved from the device tree.
+ * @cci_snoop_enabled:     Flag to track when CCI snoops have been enabled.
+ * @snoop_enable_smc:      SMC function ID to call into Trusted firmware to
+ *                         enable cache snooping. Value of 0 indicates that it
+ *                         is not used.
+ * @snoop_disable_smc:     SMC function ID to call disable cache snooping.
+ * @protected_ops:         Pointer to the methods for switching in or out of the
+ *                         protected mode, as per the @protected_dev being used.
+ * @protected_dev:         Pointer to the protected mode switcher device attached
+ *                         to the GPU device retrieved through device tree if
+ *                         GPU do not support protected mode switching natively.
+ * @protected_mode:        set to TRUE when GPU is put into protected mode
+ * @protected_mode_transition: set to TRUE when GPU is transitioning into or
+ *                         out of protected mode.
+ * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be
+ *                         enabled. Counters must be disabled before transition
+ *                         into protected mode.
+ * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not
+ *                         enabled.
+ * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware
+ *                         counters, used if atomic disable is not possible.
+ * @protected_mode_support: set to true if protected mode is supported.
+ * @buslogger:              Pointer to the structure required for interfacing
+ *                          with the bus logger module to set the size of buffer
+ *                          used by the module for capturing bus logs.
+ * @irq_reset_flush:        Flag to indicate that GPU reset is in-flight and flush of
+ *                          IRQ + bottom half is being done, to prevent the writes
+ *                          to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers.
+ * @inited_subsys:          Bitmap of inited sub systems at the time of device probe.
+ *                          Used during device remove or for handling error in probe.
+ * @hwaccess_lock:          Lock, which can be taken from IRQ context, to serialize
+ *                          the updates made to Job dispatcher + scheduler states.
+ * @mmu_hw_mutex:           Protects access to MMU operations and address space
+ *                          related state.
+ * @serialize_jobs:         Currently used mode for serialization of jobs, both
+ *                          intra & inter slots serialization is supported.
+ * @backup_serialize_jobs:  Copy of the original value of @serialize_jobs taken
+ *                          when GWT is enabled. Used to restore the original value
+ *                          on disabling of GWT.
+ * @js_ctx_scheduling_mode: Context scheduling mode currently being used by
+ *                          Job Scheduler
+ * @l2_size_override:       Used to set L2 cache size via device tree blob
+ * @l2_hash_override:       Used to set L2 cache hash via device tree blob
+ * @policy_list:            A filtered list of policies available in the system.
+ * @policy_count:           Number of policies in the @policy_list.
+ */
+struct kbase_device {
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+
+	struct clk *clocks[BASE_MAX_NR_CLOCKS_REGULATORS];
+	unsigned int nr_clocks;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS];
+	unsigned int nr_regulators;
+#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
+	struct opp_table *opp_table;
+#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* CONFIG_REGULATOR */
+	char devname[DEVNAME_SIZE];
+	u32  id;
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif	/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool_group mem_pools;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct memory_group_manager_device *mgm_dev;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+	u16 as_free; /* Bitpattern of free Address Spaces */
+	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	s8 nr_hw_address_spaces;
+	s8 nr_user_address_spaces;
+
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+		u64 addr_bytes;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_hwcnt_backend_interface hwcnt_gpu_iface;
+	struct kbase_hwcnt_context *hwcnt_gpu_ctx;
+	struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt;
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	atomic_t               timeline_is_enabled;
+	struct kbase_timeline *timeline;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	u32 reset_timeout_ms;
+
+	bool cache_clean_in_progress;
+	bool cache_clean_queued;
+	wait_queue_head_t cache_clean_wait;
+
+	void *platform_context;
+
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
+	unsigned long current_nominal_freq;
+	unsigned long current_voltages[BASE_MAX_NR_CLOCKS_REGULATORS];
+	u64 current_core_mask;
+	struct kbase_devfreq_opp *devfreq_table;
+	int num_opps;
+	struct kbasep_pm_metrics last_devfreq_metrics;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	struct kbase_devfreq_queue_info devfreq_queue;
+#endif
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	struct devfreq_cooling_device *devfreq_cooling;
+#else
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+	bool ipa_protection_mode_switched;
+	struct {
+		/* Access to this struct must be with ipa.lock held */
+		struct mutex lock;
+		struct kbase_ipa_model *configured_model;
+		struct kbase_ipa_model *fallback_model;
+
+		/* Values of the PM utilization metrics from last time the
+		 * power model was invoked. The utilization is calculated as
+		 * the difference between last_metrics and the current values.
+		 */
+		struct kbasep_pm_metrics last_metrics;
+		/* Model data to pass to ipa_gpu_active/idle() */
+		struct kbase_ipa_model_vinstr_data *model_data;
+
+		/* true if use of fallback model has been forced by the User */
+		bool force_fallback_model;
+	} ipa;
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *mali_debugfs_directory;
+	struct dentry *debugfs_ctx_directory;
+
+#ifdef CONFIG_MALI_DEBUG
+	u64 debugfs_as_read_bitmap;
+#endif /* CONFIG_MALI_DEBUG */
+
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
+
+#if !MALI_CUSTOMER_RELEASE
+	struct {
+		u16 reg_offset;
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	atomic_t ctx_num;
+
+#ifdef CONFIG_DEBUG_FS
+	struct kbase_io_history io_history;
+#endif /* CONFIG_DEBUG_FS */
+
+	struct kbase_hwaccess_data hwaccess;
+
+	atomic_t faults_pending;
+
+	bool poweroff_pending;
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	struct kbase_mem_pool_group_config mem_pool_defaults;
+
+	u32 current_gpu_coherency_mode;
+	u32 system_coherency;
+
+	bool cci_snoop_enabled;
+
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	struct protected_mode_ops *protected_ops;
+
+	struct protected_mode_device *protected_dev;
+
+	bool protected_mode;
+
+	bool protected_mode_transition;
+
+	bool protected_mode_hwcnt_desired;
+
+	bool protected_mode_hwcnt_disabled;
+
+	struct work_struct protected_mode_hwcnt_disable_work;
+
+	bool protected_mode_support;
+
+#ifdef CONFIG_MALI_BUSLOG
+	struct bus_logger_client *buslogger;
+#endif
+
+	bool irq_reset_flush;
+
+	u32 inited_subsys;
+
+	spinlock_t hwaccess_lock;
+
+	struct mutex mmu_hw_mutex;
+
+	/* See KBASE_SERIALIZE_* for details */
+	u8 serialize_jobs;
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+	u8 backup_serialize_jobs;
+#endif
+
+	u8 l2_size_override;
+	u8 l2_hash_override;
+
+	/* See KBASE_JS_*_PRIORITY_MODE for details. */
+	u32 js_ctx_scheduling_mode;
+
+
+	const struct kbase_pm_policy *policy_list[KBASE_PM_MAX_NUM_POLICIES];
+	int policy_count;
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ *                 job slot.
+ * @x_dep_head:    Head item of the linked list of atoms blocked on cross-slot
+ *                 dependencies. Atoms on this list will be moved to the
+ *                 runnable_tree when the blocking atom completes.
+ *
+ * hwaccess_lock must be held when accessing this structure.
+ */
+struct jsctx_queue {
+	struct rb_root runnable_tree;
+	struct list_head x_dep_head;
+};
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+/**
+ * enum kbase_file_state - Initialization state of a file opened by @kbase_open
+ *
+ * @KBASE_FILE_NEED_VSN:        Initial state, awaiting API version.
+ * @KBASE_FILE_VSN_IN_PROGRESS: Indicates if setting an API version is in
+ *                              progress and other setup calls shall be
+ *                              rejected.
+ * @KBASE_FILE_NEED_CTX:        Indicates if the API version handshake has
+ *                              completed, awaiting context creation flags.
+ * @KBASE_FILE_CTX_IN_PROGRESS: Indicates if the context's setup is in progress
+ *                              and other setup calls shall be rejected.
+ * @KBASE_FILE_COMPLETE:        Indicates if the setup for context has
+ *                              completed, i.e. flags have been set for the
+ *                              context.
+ *
+ * The driver allows only limited interaction with user-space until setup
+ * is complete.
+ */
+enum kbase_file_state {
+	KBASE_FILE_NEED_VSN,
+	KBASE_FILE_VSN_IN_PROGRESS,
+	KBASE_FILE_NEED_CTX,
+	KBASE_FILE_CTX_IN_PROGRESS,
+	KBASE_FILE_COMPLETE
+};
+
+/**
+ * struct kbase_file - Object representing a file opened by @kbase_open
+ *
+ * @kbdev:               Object representing an instance of GPU platform device,
+ *                       allocated from the probe method of the Mali driver.
+ * @filp:                Pointer to the struct file corresponding to device file
+ *                       /dev/malixx instance, passed to the file's open method.
+ * @kctx:                Object representing an entity, among which GPU is
+ *                       scheduled and which gets its own GPU address space.
+ *                       Invalid until @setup_state is KBASE_FILE_COMPLETE.
+ * @api_version:         Contains the version number for User/kernel interface,
+ *                       used for compatibility check. Invalid until
+ *                       @setup_state is KBASE_FILE_NEED_CTX.
+ * @setup_state:         Initialization state of the file. Values come from
+ *                       the kbase_file_state enumeration.
+ */
+struct kbase_file {
+	struct kbase_device  *kbdev;
+	struct file          *filp;
+	struct kbase_context *kctx;
+	unsigned long         api_version;
+	atomic_t              setup_state;
+};
+
+/**
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
+ * context, to disable use of implicit dma-buf fences. This is used to avoid
+ * potential synchronization deadlocks.
+ *
+ * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory
+ * allocations. For 64-bit clients it is enabled by default, and disabled by
+ * default on 32-bit clients. Being able to clear this flag is only used for
+ * testing purposes of the custom zone allocation on 64-bit user-space builds,
+ * where we also require more control than is available through e.g. the JIT
+ * allocation mechanism. However, the 64-bit user-space client must still
+ * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled
+ * from it for job slot 0. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled
+ * from it for job slot 1. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled
+ * from it for job slot 2. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum kbase_context_flags {
+	KCTX_COMPAT = 1U << 0,
+	KCTX_RUNNABLE_REF = 1U << 1,
+	KCTX_ACTIVE = 1U << 2,
+	KCTX_PULLED = 1U << 3,
+	KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+	KCTX_INFINITE_CACHE = 1U << 5,
+	KCTX_SUBMIT_DISABLED = 1U << 6,
+	KCTX_PRIVILEGED = 1U << 7,
+	KCTX_SCHEDULED = 1U << 8,
+	KCTX_DYING = 1U << 9,
+	KCTX_NO_IMPLICIT_SYNC = 1U << 10,
+	KCTX_FORCE_SAME_VA = 1U << 11,
+	KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
+	KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
+	KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
+};
+
+struct kbase_sub_alloc {
+	struct list_head link;
+	struct page *page;
+	DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
+};
+
+/**
+ * struct kbase_context - Kernel base context
+ *
+ * @filp:                 Pointer to the struct file corresponding to device file
+ *                        /dev/malixx instance, passed to the file's open method.
+ * @kbdev:                Pointer to the Kbase device for which the context is created.
+ * @kctx_list_link:       Node into Kbase device list of contexts.
+ * @mmu:                  Structure holding details of the MMU tables for this
+ *                        context
+ * @id:                   Unique identifier for the context, indicates the number of
+ *                        contexts which have been created for the device so far.
+ * @api_version:          contains the version number for User/kernel interface,
+ *                        used for compatibility check.
+ * @event_list:           list of posted events about completed atoms, to be sent to
+ *                        event handling thread of Userpsace.
+ * @event_coalesce_list:  list containing events corresponding to successive atoms
+ *                        which have requested deferred delivery of the completion
+ *                        events to Userspace.
+ * @event_mutex:          Lock to protect the concurrent access to @event_list &
+ *                        @event_mutex.
+ * @event_closed:         Flag set through POST_TERM ioctl, indicates that Driver
+ *                        should stop posting events and also inform event handling
+ *                        thread that context termination is in progress.
+ * @event_workq:          Workqueue for processing work items corresponding to atoms
+ *                        that do not return an event to userspace.
+ * @event_count:          Count of the posted events to be consumed by Userspace.
+ * @event_coalesce_count: Count of the events present in @event_coalesce_list.
+ * @flags:                bitmap of enums from kbase_context_flags, indicating the
+ *                        state & attributes for the context.
+ * @aliasing_sink_page:   Special page used for KBASE_MEM_TYPE_ALIAS allocations,
+ *                        which can alias number of memory regions. The page is
+ *                        represent a region where it is mapped with a write-alloc
+ *                        cache setup, typically used when the write result of the
+ *                        GPU isn't needed, but the GPU must write anyway.
+ * @mem_partials_lock:    Lock for protecting the operations done on the elements
+ *                        added to @mem_partials list.
+ * @mem_partials:         List head for the list of large pages, 2MB in size, which
+ *                        which have been split into 4 KB pages and are used
+ *                        partially for the allocations >= 2 MB in size.
+ * @reg_lock:             Lock used for GPU virtual address space management operations,
+ *                        like adding/freeing a memory region in the address space.
+ *                        Can be converted to a rwlock ?.
+ * @reg_rbtree_same:      RB tree of the memory regions allocated from the SAME_VA
+ *                        zone of the GPU virtual address space. Used for allocations
+ *                        having the same value for GPU & CPU virtual address.
+ * @reg_rbtree_custom:    RB tree of the memory regions allocated from the CUSTOM_VA
+ *                        zone of the GPU virtual address space.
+ * @reg_rbtree_exec:      RB tree of the memory regions allocated from the EXEC_VA
+ *                        zone of the GPU virtual address space. Used for GPU-executable
+ *                        allocations which don't need the SAME_VA property.
+ * @cookies:              Bitmask containing of BITS_PER_LONG bits, used mainly for
+ *                        SAME_VA allocations to defer the reservation of memory region
+ *                        (from the GPU virtual address space) from base_mem_alloc
+ *                        ioctl to mmap system call. This helps returning unique
+ *                        handles, disguised as GPU VA, to Userspace from base_mem_alloc
+ *                        and later retrieving the pointer to memory region structure
+ *                        in the mmap handler.
+ * @pending_regions:      Array containing pointers to memory region structures,
+ *                        used in conjunction with @cookies bitmask mainly for
+ *                        providing a mechansim to have the same value for CPU &
+ *                        GPU virtual address.
+ * @event_queue:          Wait queue used for blocking the thread, which consumes
+ *                        the base_jd_event corresponding to an atom, when there
+ *                        are no more posted events.
+ * @tgid:                 Thread group ID of the process whose thread created
+ *                        the context (by calling KBASE_IOCTL_VERSION_CHECK or
+ *                        KBASE_IOCTL_SET_FLAGS, depending on the @api_version).
+ *                        This is usually, but not necessarily, the same as the
+ *                        process whose thread opened the device file
+ *                        /dev/malixx instance.
+ * @pid:                  ID of the thread, corresponding to process @tgid,
+ *                        which actually created the context. This is usually,
+ *                        but not necessarily, the same as the thread which
+ *                        opened the device file /dev/malixx instance.
+ * @jctx:                 object encapsulating all the Job dispatcher related state,
+ *                        including the array of atoms.
+ * @used_pages:           Keeps a track of the number of 4KB physical pages in use
+ *                        for the context.
+ * @nonmapped_pages:      Updated in the same way as @used_pages, except for the case
+ *                        when special tracking page is freed by userspace where it
+ *                        is reset to 0.
+ * @permanent_mapped_pages: Usage count of permanently mapped memory
+ * @mem_pools:            Context-specific pools of free physical memory pages.
+ * @reclaim:              Shrinker object registered with the kernel containing
+ *                        the pointer to callback function which is invoked under
+ *                        low memory conditions. In the callback function Driver
+ *                        frees up the memory for allocations marked as
+ *                        evictable/reclaimable.
+ * @evict_list:           List head for the list containing the allocations which
+ *                        can be evicted or freed up in the shrinker callback.
+ * @waiting_soft_jobs:    List head for the list containing softjob atoms, which
+ *                        are either waiting for the event set operation, or waiting
+ *                        for the signaling of input fence or waiting for the GPU
+ *                        device to powered on so as to dump the CPU/GPU timestamps.
+ * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent
+ *                        accesses.
+ * @dma_fence:            Object containing list head for the list of dma-buf fence
+ *                        waiting atoms and the waitqueue to process the work item
+ *                        queued for the atoms blocked on the signaling of dma-buf
+ *                        fences.
+ * @as_nr:                id of the address space being used for the scheduled in
+ *                        context. This is effectively part of the Run Pool, because
+ *                        it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst
+ *                        the context is scheduled in. The hwaccess_lock must be held
+ *                        whilst accessing this.
+ *                        If the context relating to this value of as_nr is required,
+ *                        then the context must be retained to ensure that it doesn't
+ *                        disappear whilst it is being used. Alternatively, hwaccess_lock
+ *                        can be held to ensure the context doesn't disappear (but this
+ *                        has restrictions on what other locks can be taken simutaneously).
+ * @refcount:             Keeps track of the number of users of this context. A user
+ *                        can be a job that is available for execution, instrumentation
+ *                        needing to 'pin' a context for counter collection, etc.
+ *                        If the refcount reaches 0 then this context is considered
+ *                        inactive and the previously programmed AS might be cleared
+ *                        at any point.
+ *                        Generally the reference count is incremented when the context
+ *                        is scheduled in and an atom is pulled from the context's per
+ *                        slot runnable tree.
+ * @mm_update_lock:       lock used for handling of special tracking page.
+ * @process_mm:           Pointer to the memory descriptor of the process which
+ *                        created the context. Used for accounting the physical
+ *                        pages used for GPU allocations, done for the context,
+ *                        to the memory consumed by the process.
+ * @same_va_end:          End address of the SAME_VA zone (in 4KB page units)
+ * @exec_va_start:        Start address of the EXEC_VA zone (in 4KB page units)
+ *                        or U64_MAX if the EXEC_VA zone is uninitialized.
+ * @gpu_va_end:           End address of the GPU va space (in 4KB page units)
+ * @jit_va:               Indicates if a JIT_VA zone has been created.
+ * @mem_profile_data:     Buffer containing the profiling information provided by
+ *                        Userspace, can be read through the mem_profile debugfs file.
+ * @mem_profile_size:     Size of the @mem_profile_data.
+ * @mem_profile_lock:     Lock to serialize the operations related to mem_profile
+ *                        debugfs file.
+ * @kctx_dentry:          Pointer to the debugfs directory created for every context,
+ *                        inside kbase_device::debugfs_ctx_directory, containing
+ *                        context specific files.
+ * @reg_dump:             Buffer containing a register offset & value pair, used
+ *                        for dumping job fault debug info.
+ * @job_fault_count:      Indicates that a job fault occurred for the context and
+ *                        dumping of its debug info is in progress.
+ * @job_fault_resume_event_list: List containing atoms completed after the faulty
+ *                        atom but before the debug data for faulty atom was dumped.
+ * @jsctx_queue:          Per slot & priority arrays of object containing the root
+ *                        of RB-tree holding currently runnable atoms on the job slot
+ *                        and the head item of the linked list of atoms blocked on
+ *                        cross-slot dependencies.
+ * @atoms_pulled:         Total number of atoms currently pulled from the context.
+ * @atoms_pulled_slot:    Per slot count of the number of atoms currently pulled
+ *                        from the context.
+ * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently
+ *                        pulled from the context. hwaccess_lock shall be held when
+ *                        accessing it.
+ * @blocked_js:           Indicates if the context is blocked from submitting atoms
+ *                        on a slot at a given priority. This is set to true, when
+ *                        the atom corresponding to context is soft/hard stopped or
+ *                        removed from the HEAD_NEXT register in response to
+ *                        soft/hard stop.
+ * @slots_pullable:       Bitmask of slots, indicating the slots for which the
+ *                        context has pullable atoms in the runnable tree.
+ * @work:                 Work structure used for deferred ASID assignment.
+ * @legacy_hwcnt_cli:     Pointer to the legacy userspace hardware counters
+ *                        client, there can be only such client per kbase
+ *                        context.
+ * @legacy_hwcnt_lock:    Lock used to prevent concurrent access to
+ *                        @legacy_hwcnt_cli.
+ * @completed_jobs:       List containing completed atoms for which base_jd_event is
+ *                        to be posted.
+ * @work_count:           Number of work items, corresponding to atoms, currently
+ *                        pending on job_done workqueue of @jctx.
+ * @soft_job_timeout:     Timer object used for failing/cancelling the waiting
+ *                        soft-jobs which have been blocked for more than the
+ *                        timeout value used for the soft-jobs
+ * @jit_alloc:            Array of 256 pointers to GPU memory regions, used for
+ *                        for JIT allocations.
+ * @jit_max_allocations:  Maximum number of JIT allocations allowed at once.
+ * @jit_current_allocations: Current number of in-flight JIT allocations.
+ * @jit_current_allocations_per_bin: Current number of in-flight JIT allocations per bin
+ * @jit_version:          version number indicating whether userspace is using
+ *                        old or new version of interface for JIT allocations
+ *	                  1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD
+ *	                  2 -> client used KBASE_IOCTL_MEM_JIT_INIT
+ * @jit_group_id:         A memory group ID to be passed to a platform-specific
+ *                        memory group manager.
+ *                        Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @jit_active_head:      List containing the JIT allocations which are in use.
+ * @jit_pool_head:        List containing the JIT allocations which have been
+ *                        freed up by userpsace and so not being used by them.
+ *                        Driver caches them to quickly fulfill requests for new
+ *                        JIT allocations. They are released in case of memory
+ *                        pressure as they are put on the @evict_list when they
+ *                        are freed up by userspace.
+ * @jit_destroy_head:     List containing the JIT allocations which were moved to it
+ *                        from @jit_pool_head, in the shrinker callback, after freeing
+ *                        their backing physical pages.
+ * @jit_evict_lock:       Lock used for operations done on JIT allocations and also
+ *                        for accessing @evict_list.
+ * @jit_work:             Work item queued to defer the freeing of memory region when
+ *                        JIT allocation is moved to @jit_destroy_head.
+ * @jit_atoms_head:       A list of the JIT soft-jobs, both alloc & free, in submission
+ *                        order, protected by kbase_jd_context.lock.
+ * @jit_pending_alloc:    A list of JIT alloc soft-jobs for which allocation will be
+ *                        reattempted after the impending free of other active JIT
+ *                        allocations.
+ * @ext_res_meta_head:    A list of sticky external resources which were requested to
+ *                        be mapped on GPU side, through a softjob atom of type
+ *                        EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl.
+ * @drain_pending:        Used to record that a flush/invalidate of the GPU caches was
+ *                        requested from atomic context, so that the next flush request
+ *                        can wait for the flush of GPU writes.
+ * @age_count:            Counter incremented on every call to jd_submit_atom,
+ *                        atom is assigned the snapshot of this counter, which
+ *                        is used to determine the atom's age when it is added to
+ *                        the runnable RB-tree.
+ * @trim_level:           Level of JIT allocation trimming to perform on free (0-100%)
+ * @gwt_enabled:          Indicates if tracking of GPU writes is enabled, protected by
+ *                        kbase_context.reg_lock.
+ * @gwt_was_enabled:      Simple sticky bit flag to know if GWT was ever enabled.
+ * @gwt_current_list:     A list of addresses for which GPU has generated write faults,
+ *                        after the last snapshot of it was sent to userspace.
+ * @gwt_snapshot_list:    Snapshot of the @gwt_current_list for sending to user space.
+ * @priority:             Indicates the context priority. Used along with @atoms_count
+ *                        for context scheduling, protected by hwaccess_lock.
+ * @atoms_count:          Number of gpu atoms currently in use, per priority
+ *
+ * A kernel base context is an entity among which the GPU is scheduled.
+ * Each context has its own GPU address space.
+ * Up to one context can be created for each client that opens the device file
+ * /dev/malixx. Context creation is deferred until a special ioctl() system call
+ * is made on the device file.
+ */
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	struct list_head kctx_list_link;
+	struct kbase_mmu_table mmu;
+
+	u32 id;
+	unsigned long api_version;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	atomic_t flags;
+
+	struct tagged_addr aliasing_sink_page;
+
+	spinlock_t              mem_partials_lock;
+	struct list_head        mem_partials;
+
+	struct mutex            reg_lock;
+	struct rb_root reg_rbtree_same;
+	struct rb_root reg_rbtree_custom;
+	struct rb_root reg_rbtree_exec;
+
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+	unsigned long permanent_mapped_pages;
+
+	struct kbase_mem_pool_group mem_pools;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	int as_nr;
+
+	atomic_t refcount;
+
+	spinlock_t         mm_update_lock;
+	struct mm_struct __rcu *process_mm;
+	u64 same_va_end;
+	u64 exec_va_start;
+	u64 gpu_va_end;
+	bool jit_va;
+
+#ifdef CONFIG_DEBUG_FS
+	char *mem_profile_data;
+	size_t mem_profile_size;
+	struct mutex mem_profile_lock;
+	struct dentry *kctx_dentry;
+
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	atomic_t atoms_pulled;
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
+			KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	u32 slots_pullable;
+
+	struct work_struct work;
+
+	struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli;
+	struct mutex legacy_hwcnt_lock;
+
+	struct list_head completed_jobs;
+	atomic_t work_count;
+
+	struct timer_list soft_job_timeout;
+
+	struct kbase_va_region *jit_alloc[256];
+	u8 jit_max_allocations;
+	u8 jit_current_allocations;
+	u8 jit_current_allocations_per_bin[256];
+	u8 jit_version;
+	u8 jit_group_id;
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_evict_lock;
+	struct work_struct jit_work;
+
+	struct list_head jit_atoms_head;
+	struct list_head jit_pending_alloc;
+
+	struct list_head ext_res_meta_head;
+
+	atomic_t drain_pending;
+
+	u32 age_count;
+
+	u8 trim_level;
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+	bool gwt_enabled;
+
+	bool gwt_was_enabled;
+
+	struct list_head gwt_current_list;
+
+	struct list_head gwt_snapshot_list;
+#endif
+
+	int priority;
+	s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+};
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+/**
+ * struct kbasep_gwt_list_element - Structure used to collect GPU
+ *                                  write faults.
+ * @link:                           List head for adding write faults.
+ * @region:                         Details of the region where we have the
+ *                                  faulting page address.
+ * @page_addr:                      Page address where GPU write fault occurred.
+ * @num_pages:                      The number of pages modified.
+ *
+ * Using this structure all GPU write faults are stored in a list.
+ */
+struct kbasep_gwt_list_element {
+	struct list_head link;
+	struct kbase_va_region *region;
+	u64 page_addr;
+	u64 num_pages;
+};
+
+#endif
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100755
index 0000000..a265082
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,537 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	kbdev->as[i].number = i;
+	kbdev->as[i].bf_data.addr = 0ULL;
+	kbdev->as[i].pf_data.addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq =
+			alloc_workqueue("mali_mmu%d_poker", 0, 1, i);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+	mutex_init(&kbdev->mmu_hw_mutex);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	init_waitqueue_head(&kbdev->cache_clean_wait);
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+		kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+	else
+		kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	WARN_ON(!list_empty(&kbdev->kctx_list));
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100755
index 0000000..68eb4ed
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
new file mode 100755
index 0000000..6a95900
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -0,0 +1,454 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+static void
+kbase_dma_fence_work(struct work_struct *pwork);
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+static void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_queue_work() - Queue work to handle @katom
+ * @katom: Pointer to atom for which to queue work
+ *
+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
+ * submit the atom.
+ */
+static void
+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	bool ret;
+
+	INIT_WORK(&katom->work, kbase_dma_fence_work);
+	ret = queue_work(kctx->dma_fence.wq, &katom->work);
+	/* Warn if work was already queued, that should not happen. */
+	WARN_ON(!ret);
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_fence_free_callbacks(katom);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (kbase_fence_dep_count_read(katom) != 0)
+		goto out;
+
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_fence_free_callbacks(katom);
+	/*
+	 * Queue atom on GPU, unless it has already completed due to a failing
+	 * dependency. Run jd_done_nolock() on the katom if it is completed.
+	 */
+	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
+		jd_done_nolock(katom, NULL);
+	else
+		kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+#else
+kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+#endif
+{
+	struct kbase_fence_cb *kcb = container_of(cb,
+				struct kbase_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+
+	if (kbase_fence_dep_count_dec_and_test(katom))
+		kbase_dma_fence_queue_work(katom);
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct reservation_object *resv,
+					 bool exclusive)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+#else
+	struct dma_fence *excl_fence = NULL;
+	struct dma_fence **shared_fences = NULL;
+#endif
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_fence_add_callback(katom,
+						excl_fence,
+						kbase_dma_fence_cb);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		dma_fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_fence_add_callback(katom,
+							shared_fences[i],
+							kbase_dma_fence_cb);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		dma_fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	if (err) {
+		/*
+		 * On error, cancel and clean up all callbacks that was set up
+		 * before the error.
+		 */
+		kbase_fence_free_callbacks(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	fence = kbase_fence_out_new(katom);
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	kbase_fence_dep_count_set(katom, 1);
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		kbase_fence_dep_count_set(katom, -1);
+		kbase_fence_out_remove(katom);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct reservation_object *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = reservation_object_reserve_shared(obj);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_shared_fence(obj, fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_excl_fence(obj, fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (kbase_fence_dep_count_dec_and_test(katom)) {
+			kbase_fence_dep_count_set(katom, -1);
+			kbase_fence_free_callbacks(katom);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	} else {
+		/* There was an error, cancel callbacks, set dep_count to -1 to
+		 * indicate that the atom has been handled (the caller will
+		 * kill it for us), signal the fence, free callbacks and the
+		 * fence.
+		 */
+		kbase_fence_free_callbacks(katom);
+		kbase_fence_dep_count_set(katom, -1);
+		kbase_dma_fence_signal(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct list_head *list = &kctx->dma_fence.waiting_resource;
+
+	while (!list_empty(list)) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_first_entry(list, struct kbase_jd_atom, queue);
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	if (kbase_fence_free_callbacks(katom))
+		kbase_dma_fence_queue_work(katom);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	/* Signal the atom's fence. */
+	dma_fence_signal(katom->dma_fence.fence);
+
+	kbase_fence_out_remove(katom);
+
+	kbase_fence_free_callbacks(katom);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
new file mode 100755
index 0000000..2a4d6fc
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
@@ -0,0 +1,136 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/list.h>
+#include <linux/reservation.h>
+#include <mali_kbase_fence.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct reservation_object **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100755
index 0000000..721af69
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,266 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016,2018-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tracepoints.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+	struct kbase_device *kbdev;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	kbdev = kctx->kbdev;
+	data = katom->udata;
+
+	KBASE_TLSTREAM_TL_NRET_ATOM_CTX(kbdev, katom, kctx);
+	KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED);
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c
new file mode 100755
index 0000000..96a6ab9
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c
@@ -0,0 +1,212 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_fence_defs.h>
+#include <mali_kbase_fence.h>
+#include <mali_kbase.h>
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_fence_lock);
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_get_driver_name(struct fence *fence)
+#else
+kbase_fence_get_driver_name(struct dma_fence *fence)
+#endif
+{
+	return kbase_drv_name;
+}
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_get_timeline_name(struct fence *fence)
+#else
+kbase_fence_get_timeline_name(struct dma_fence *fence)
+#endif
+{
+	return kbase_timeline_name;
+}
+
+static bool
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_enable_signaling(struct fence *fence)
+#else
+kbase_fence_enable_signaling(struct dma_fence *fence)
+#endif
+{
+	return true;
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_fence_value_str(struct fence *fence, char *str, int size)
+#else
+kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
+#endif
+{
+#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
+	snprintf(str, size, "%u", fence->seqno);
+#else
+	snprintf(str, size, "%llu", fence->seqno);
+#endif
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+const struct fence_ops kbase_fence_ops = {
+	.wait = fence_default_wait,
+#else
+const struct dma_fence_ops kbase_fence_ops = {
+	.wait = dma_fence_default_wait,
+#endif
+	.get_driver_name = kbase_fence_get_driver_name,
+	.get_timeline_name = kbase_fence_get_timeline_name,
+	.enable_signaling = kbase_fence_enable_signaling,
+	.fence_value_str = kbase_fence_fence_value_str
+};
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+struct fence *
+kbase_fence_out_new(struct kbase_jd_atom *katom)
+#else
+struct dma_fence *
+kbase_fence_out_new(struct kbase_jd_atom *katom)
+#endif
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	WARN_ON(katom->dma_fence.fence);
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	dma_fence_init(fence,
+		       &kbase_fence_ops,
+		       &kbase_fence_lock,
+		       katom->dma_fence.context,
+		       atomic_inc_return(&katom->dma_fence.seqno));
+
+	katom->dma_fence.fence = fence;
+
+	return fence;
+}
+
+bool
+kbase_fence_free_callbacks(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_cb *cb, *tmp;
+	bool res = false;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			int ret;
+
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			ret = atomic_dec_return(&katom->dma_fence.dep_count);
+
+			if (unlikely(ret == 0))
+				res = true;
+		}
+
+		/*
+		 * Release the reference taken in
+		 * kbase_fence_add_callback().
+		 */
+		dma_fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+
+	return res;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+int
+kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			 struct fence *fence,
+			 fence_func_t callback)
+#else
+int
+kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			 struct dma_fence *fence,
+			 dma_fence_func_t callback)
+#endif
+{
+	int err = 0;
+	struct kbase_fence_cb *kbase_fence_cb;
+
+	if (!fence)
+		return -EINVAL;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+	atomic_inc(&katom->dma_fence.dep_count);
+
+	err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb,
+				     callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, get the completion result */
+		err = dma_fence_get_status(fence);
+
+		/* remap success completion to err code */
+		if (err == 1)
+			err = 0;
+
+		kfree(kbase_fence_cb);
+		atomic_dec(&katom->dma_fence.dep_count);
+	} else if (err) {
+		kfree(kbase_fence_cb);
+		atomic_dec(&katom->dma_fence.dep_count);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in kbase_fence_free_callbacks().
+		 */
+		dma_fence_get(fence);
+		/* Add callback to katom's list of callbacks */
+		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
+	}
+
+	return err;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h
new file mode 100755
index 0000000..d7a65e0
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h
@@ -0,0 +1,280 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_FENCE_H_
+#define _KBASE_FENCE_H_
+
+/*
+ * mali_kbase_fence.[hc] has common fence code used by both
+ * - CONFIG_MALI_DMA_FENCE - implicit DMA fences
+ * - CONFIG_SYNC_FILE      - explicit fences beginning with 4.9 kernel
+ */
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/list.h>
+#include "mali_kbase_fence_defs.h"
+#include "mali_kbase.h"
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+extern const struct fence_ops kbase_fence_ops;
+#else
+extern const struct dma_fence_ops kbase_fence_ops;
+#endif
+
+/**
+* struct kbase_fence_cb - Mali dma-fence callback data struct
+* @fence_cb: Callback function
+* @katom:    Pointer to katom that is waiting on this callback
+* @fence:    Pointer to the fence object on which this callback is waiting
+* @node:     List head for linking this callback to the katom
+*/
+struct kbase_fence_cb {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence_cb fence_cb;
+	struct fence *fence;
+#else
+	struct dma_fence_cb fence_cb;
+	struct dma_fence *fence;
+#endif
+	struct kbase_jd_atom *katom;
+	struct list_head node;
+};
+
+/**
+ * kbase_fence_out_new() - Creates a new output fence and puts it on the atom
+ * @katom: Atom to create an output fence for
+ *
+ * return: A new fence object on success, NULL on failure.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
+#else
+struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
+#endif
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_fence_in_set() - Assign input fence to atom
+ * @katom: Atom to assign input fence to
+ * @fence: Input fence to assign to atom
+ *
+ * This function will take ownership of one fence reference!
+ */
+#define kbase_fence_fence_in_set(katom, fence) \
+	do { \
+		WARN_ON((katom)->dma_fence.fence_in); \
+		(katom)->dma_fence.fence_in = fence; \
+	} while (0)
+#endif
+
+/**
+ * kbase_fence_out_remove() - Removes the output fence from atom
+ * @katom: Atom to remove output fence for
+ *
+ * This will also release the reference to this fence which the atom keeps
+ */
+static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->dma_fence.fence) {
+		dma_fence_put(katom->dma_fence.fence);
+		katom->dma_fence.fence = NULL;
+	}
+}
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_out_remove() - Removes the input fence from atom
+ * @katom: Atom to remove input fence for
+ *
+ * This will also release the reference to this fence which the atom keeps
+ */
+static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->dma_fence.fence_in) {
+		dma_fence_put(katom->dma_fence.fence_in);
+		katom->dma_fence.fence_in = NULL;
+	}
+}
+#endif
+
+/**
+ * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us
+ * @katom: Atom to check output fence for
+ *
+ * Return: true if fence exists and is valid, otherwise false
+ */
+static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom)
+{
+	return katom->dma_fence.fence &&
+				katom->dma_fence.fence->ops == &kbase_fence_ops;
+}
+
+/**
+ * kbase_fence_out_signal() - Signal output fence of atom
+ * @katom: Atom to signal output fence for
+ * @status: Status to signal with (0 for success, < 0 for error)
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
+					 int status)
+{
+	if (status) {
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+		fence_set_error(katom->dma_fence.fence, status);
+#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
+		dma_fence_set_error(katom->dma_fence.fence, status);
+#else
+		katom->dma_fence.fence->status = status;
+#endif
+	}
+	return dma_fence_signal(katom->dma_fence.fence);
+}
+
+/**
+ * kbase_fence_add_callback() - Add callback on @fence to block @katom
+ * @katom: Pointer to katom that will be blocked by @fence
+ * @fence: Pointer to fence on which to set up the callback
+ * @callback: Pointer to function to be called when fence is signaled
+ *
+ * Caller needs to hold a reference to @fence when calling this function, and
+ * the caller is responsible for releasing that reference.  An additional
+ * reference to @fence will be taken when the callback was successfully set up
+ * and @fence needs to be kept valid until the callback has been called and
+ * cleanup have been done.
+ *
+ * Return: 0 on success: fence was either already signaled, or callback was
+ * set up. Negative error code is returned on error.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+int kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback);
+#else
+int kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct dma_fence *fence,
+			     dma_fence_func_t callback);
+#endif
+
+/**
+ * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value
+ * @katom: Atom to set dep_count for
+ * @val: value to set dep_count to
+ *
+ * The dep_count is available to the users of this module so that they can
+ * synchronize completion of the wait with cancellation and adding of more
+ * callbacks. For instance, a user could do the following:
+ *
+ * dep_count set to 1
+ * callback #1 added, dep_count is increased to 2
+ *                             callback #1 happens, dep_count decremented to 1
+ *                             since dep_count > 0, no completion is done
+ * callback #2 is added, dep_count is increased to 2
+ * dep_count decremented to 1
+ *                             callback #2 happens, dep_count decremented to 0
+ *                             since dep_count now is zero, completion executes
+ *
+ * The dep_count can also be used to make sure that the completion only
+ * executes once. This is typically done by setting dep_count to -1 for the
+ * thread that takes on this responsibility.
+ */
+static inline void
+kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val)
+{
+	atomic_set(&katom->dma_fence.dep_count, val);
+}
+
+/**
+ * kbase_fence_dep_count_dec_and_test() - Decrements dep_count
+ * @katom: Atom to decrement dep_count for
+ *
+ * See @kbase_fence_dep_count_set for general description about dep_count
+ *
+ * Return: true if value was decremented to zero, otherwise false
+ */
+static inline bool
+kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom)
+{
+	return atomic_dec_and_test(&katom->dma_fence.dep_count);
+}
+
+/**
+ * kbase_fence_dep_count_read() - Returns the current dep_count value
+ * @katom: Pointer to katom
+ *
+ * See @kbase_fence_dep_count_set for general description about dep_count
+ *
+ * Return: The current dep_count value
+ */
+static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom)
+{
+	return atomic_read(&katom->dma_fence.dep_count);
+}
+
+/**
+ * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ *
+ * Return: true if dep_count reached 0, otherwise false.
+ */
+bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom);
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_in_get() - Retrieve input fence for atom.
+ * @katom: Atom to get input fence from
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no input fence for atom
+ */
+#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in)
+#endif
+
+/**
+ * kbase_fence_out_get() - Retrieve output fence for atom.
+ * @katom: Atom to get output fence from
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no output fence for atom
+ */
+#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence)
+
+/**
+ * kbase_fence_put() - Releases a reference to a fence
+ * @fence: Fence to release reference for.
+ */
+#define kbase_fence_put(fence) dma_fence_put(fence)
+
+
+#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */
+
+#endif /* _KBASE_FENCE_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
new file mode 100755
index 0000000..607a95c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_FENCE_DEFS_H_
+#define _KBASE_FENCE_DEFS_H_
+
+/*
+ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*)
+ * This file hides the compatibility issues with this for the rest the driver
+ */
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+
+#include <linux/fence.h>
+
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+
+#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#else
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#endif
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
+	(a)->status ?: 1 \
+	: 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */
+
+#endif /* _KBASE_FENCE_DEFS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100755
index 0000000..6428f08
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#include <linux/types.h>
+
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+
+struct kbase_context;
+
+void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value);
+void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100755
index 0000000..24b99f2
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,138 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xFu  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFFu << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xFu  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956u
+#define GPU_ID_PI_T62X                    0x0620u
+#define GPU_ID_PI_T76X                    0x0750u
+#define GPU_ID_PI_T72X                    0x0720u
+#define GPU_ID_PI_TFRX                    0x0880u
+#define GPU_ID_PI_T86X                    0x0860u
+#define GPU_ID_PI_T82X                    0x0820u
+#define GPU_ID_PI_T83X                    0x0830u
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xFu << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xFu << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xFu << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+#define GPU_ID2_VERSION        (GPU_ID2_VERSION_MAJOR | \
+								GPU_ID2_VERSION_MINOR | \
+								GPU_ID2_VERSION_STATUS)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		(((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
+#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7, 0)
+#define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7, 3)
+#define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7, 1)
+#define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7, 2)
+#define GPU_ID2_PRODUCT_TEGX              GPU_ID2_MODEL_MAKE(8, 3)
+#define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(9, 0)
+#define GPU_ID2_PRODUCT_TNAX              GPU_ID2_MODEL_MAKE(9, 1)
+#define GPU_ID2_PRODUCT_TBEX              GPU_ID2_MODEL_MAKE(9, 2)
+#define GPU_ID2_PRODUCT_TULX              GPU_ID2_MODEL_MAKE(10, 0)
+#define GPU_ID2_PRODUCT_TDUX              GPU_ID2_MODEL_MAKE(10, 1)
+#define GPU_ID2_PRODUCT_TIDX              GPU_ID2_MODEL_MAKE(10, 3)
+#define GPU_ID2_PRODUCT_TVAX              GPU_ID2_MODEL_MAKE(10, 4)
+#define GPU_ID2_PRODUCT_TODX              GPU_ID2_MODEL_MAKE(10, 8)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		(((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		(((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		(((u32)status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+/* Statically set to 0 because the HW revision cannot be seen at compile time
+ * by the build system */
+#define GPU_HAS_CSF_VERSION_10_REVISION_2 (0)
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100755
index 0000000..2c42f5c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,103 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbase_context *kctx;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				kctx,
+				atomic_read(&(kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show, NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100755
index 0000000..28a871a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100755
index 0000000..302d383
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,579 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include "mali_kbase_ioctl.h"
+#include <linux/clk.h>
+#include <mali_kbase_pm_internal.h>
+#include <linux/of_platform.h>
+#include <linux/moduleparam.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.core_features = regdump.core_features;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present =
+		((u64) regdump.shader_present_hi << 32) +
+		regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present =
+		((u64) regdump.tiler_present_hi << 32) +
+		regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present =
+		((u64) regdump.l2_present_hi << 32) +
+		regdump.l2_present_lo;
+	gpu_props->raw_props.stack_present =
+		((u64) regdump.stack_present_hi << 32) +
+		regdump.stack_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+	gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc;
+}
+
+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)
+{
+	gpu_props->core_props.version_status =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+#if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+#else
+	gpu_props->core_props.gpu_available_memory_size =
+		totalram_pages() << PAGE_SHIFT;
+#endif
+
+	gpu_props->core_props.num_exec_engines =
+		KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	if (gpu_props->raw_props.thread_tls_alloc == 0)
+		gpu_props->thread_props.tls_alloc =
+				gpu_props->thread_props.max_threads;
+	else
+		gpu_props->thread_props.tls_alloc =
+				gpu_props->raw_props.thread_tls_alloc;
+
+#if GPU_HAS_CSF_VERSION_10_REVISION_2
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8);
+	gpu_props->thread_props.max_thread_group_split = 0;
+#else
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+#endif
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 * Additionally, add non-coherent mode, as this is always supported.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features |
+		COHERENCY_FEATURE_BIT(COHERENCY_NONE);
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
+		gpu_props->thread_props.max_thread_group_split = 0;
+}
+
+/*
+ * Module parameters to allow the L2 size and hash configuration to be
+ * overridden.
+ *
+ * These parameters must be set on insmod to take effect, and are not visible
+ * in sysfs.
+ */
+static u8 override_l2_size;
+module_param(override_l2_size, byte, 0);
+MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing");
+
+static u8 override_l2_hash;
+module_param(override_l2_hash, byte, 0);
+MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
+
+/**
+ * kbase_read_l2_config_from_dt - Read L2 configuration
+ * @kbdev: The kbase device for which to get the L2 configuration.
+ *
+ * Check for L2 configuration overrides in module parameters and device tree.
+ * Override values in module parameters take priority over override values in
+ * device tree.
+ *
+ * Return: true if either size or hash was overridden, false if no overrides
+ * were found.
+ */
+static bool kbase_read_l2_config_from_dt(struct kbase_device * const kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+
+	if (!np)
+		return false;
+
+	if (override_l2_size)
+		kbdev->l2_size_override = override_l2_size;
+	else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override))
+		kbdev->l2_size_override = 0;
+
+	if (override_l2_hash)
+		kbdev->l2_hash_override = override_l2_hash;
+	else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override))
+		kbdev->l2_hash_override = 0;
+
+	if (kbdev->l2_size_override || kbdev->l2_hash_override)
+		return true;
+
+	return false;
+}
+
+void kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
+		struct kbase_gpuprops_regdump regdump;
+		base_gpu_props *gpu_props = &kbdev->gpu_props.props;
+
+		/* Check for L2 cache size & hash overrides */
+		if (!kbase_read_l2_config_from_dt(kbdev))
+			return;
+
+		/* Need L2 to get powered to reflect to L2_FEATURES */
+		kbase_pm_context_active(kbdev);
+
+		/* Wait for the completion of L2 power transition */
+		kbase_pm_wait_for_l2_powered(kbdev);
+
+		/* Dump L2_FEATURES register */
+		kbase_backend_gpuprops_get_l2_features(kbdev, &regdump);
+
+		dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n",
+				regdump.l2_features);
+
+		/* Update gpuprops with reflected L2_FEATURES */
+		gpu_props->raw_props.l2_features = regdump.l2_features;
+		gpu_props->l2_props.log2_cache_size =
+			KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+		/* Let GPU idle */
+		kbase_pm_context_idle(kbdev);
+	}
+}
+
+static struct {
+	u32 type;
+	size_t offset;
+	int size;
+} gpu_property_mapping[] = {
+#define PROP(name, member) \
+	{KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \
+		sizeof(((struct base_gpu_props *)0)->member)}
+	PROP(PRODUCT_ID,                  core_props.product_id),
+	PROP(VERSION_STATUS,              core_props.version_status),
+	PROP(MINOR_REVISION,              core_props.minor_revision),
+	PROP(MAJOR_REVISION,              core_props.major_revision),
+	PROP(GPU_FREQ_KHZ_MAX,            core_props.gpu_freq_khz_max),
+	PROP(LOG2_PROGRAM_COUNTER_SIZE,   core_props.log2_program_counter_size),
+	PROP(TEXTURE_FEATURES_0,          core_props.texture_features[0]),
+	PROP(TEXTURE_FEATURES_1,          core_props.texture_features[1]),
+	PROP(TEXTURE_FEATURES_2,          core_props.texture_features[2]),
+	PROP(TEXTURE_FEATURES_3,          core_props.texture_features[3]),
+	PROP(GPU_AVAILABLE_MEMORY_SIZE,   core_props.gpu_available_memory_size),
+	PROP(NUM_EXEC_ENGINES,            core_props.num_exec_engines),
+
+	PROP(L2_LOG2_LINE_SIZE,           l2_props.log2_line_size),
+	PROP(L2_LOG2_CACHE_SIZE,          l2_props.log2_cache_size),
+	PROP(L2_NUM_L2_SLICES,            l2_props.num_l2_slices),
+
+	PROP(TILER_BIN_SIZE_BYTES,        tiler_props.bin_size_bytes),
+	PROP(TILER_MAX_ACTIVE_LEVELS,     tiler_props.max_active_levels),
+
+	PROP(MAX_THREADS,                 thread_props.max_threads),
+	PROP(MAX_WORKGROUP_SIZE,          thread_props.max_workgroup_size),
+	PROP(MAX_BARRIER_SIZE,            thread_props.max_barrier_size),
+	PROP(MAX_REGISTERS,               thread_props.max_registers),
+	PROP(MAX_TASK_QUEUE,              thread_props.max_task_queue),
+	PROP(MAX_THREAD_GROUP_SPLIT,      thread_props.max_thread_group_split),
+	PROP(IMPL_TECH,                   thread_props.impl_tech),
+	PROP(TLS_ALLOC,                   thread_props.tls_alloc),
+
+	PROP(RAW_SHADER_PRESENT,          raw_props.shader_present),
+	PROP(RAW_TILER_PRESENT,           raw_props.tiler_present),
+	PROP(RAW_L2_PRESENT,              raw_props.l2_present),
+	PROP(RAW_STACK_PRESENT,           raw_props.stack_present),
+	PROP(RAW_L2_FEATURES,             raw_props.l2_features),
+	PROP(RAW_CORE_FEATURES,           raw_props.core_features),
+	PROP(RAW_MEM_FEATURES,            raw_props.mem_features),
+	PROP(RAW_MMU_FEATURES,            raw_props.mmu_features),
+	PROP(RAW_AS_PRESENT,              raw_props.as_present),
+	PROP(RAW_JS_PRESENT,              raw_props.js_present),
+	PROP(RAW_JS_FEATURES_0,           raw_props.js_features[0]),
+	PROP(RAW_JS_FEATURES_1,           raw_props.js_features[1]),
+	PROP(RAW_JS_FEATURES_2,           raw_props.js_features[2]),
+	PROP(RAW_JS_FEATURES_3,           raw_props.js_features[3]),
+	PROP(RAW_JS_FEATURES_4,           raw_props.js_features[4]),
+	PROP(RAW_JS_FEATURES_5,           raw_props.js_features[5]),
+	PROP(RAW_JS_FEATURES_6,           raw_props.js_features[6]),
+	PROP(RAW_JS_FEATURES_7,           raw_props.js_features[7]),
+	PROP(RAW_JS_FEATURES_8,           raw_props.js_features[8]),
+	PROP(RAW_JS_FEATURES_9,           raw_props.js_features[9]),
+	PROP(RAW_JS_FEATURES_10,          raw_props.js_features[10]),
+	PROP(RAW_JS_FEATURES_11,          raw_props.js_features[11]),
+	PROP(RAW_JS_FEATURES_12,          raw_props.js_features[12]),
+	PROP(RAW_JS_FEATURES_13,          raw_props.js_features[13]),
+	PROP(RAW_JS_FEATURES_14,          raw_props.js_features[14]),
+	PROP(RAW_JS_FEATURES_15,          raw_props.js_features[15]),
+	PROP(RAW_TILER_FEATURES,          raw_props.tiler_features),
+	PROP(RAW_TEXTURE_FEATURES_0,      raw_props.texture_features[0]),
+	PROP(RAW_TEXTURE_FEATURES_1,      raw_props.texture_features[1]),
+	PROP(RAW_TEXTURE_FEATURES_2,      raw_props.texture_features[2]),
+	PROP(RAW_TEXTURE_FEATURES_3,      raw_props.texture_features[3]),
+	PROP(RAW_GPU_ID,                  raw_props.gpu_id),
+	PROP(RAW_THREAD_MAX_THREADS,      raw_props.thread_max_threads),
+	PROP(RAW_THREAD_MAX_WORKGROUP_SIZE,
+			raw_props.thread_max_workgroup_size),
+	PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
+	PROP(RAW_THREAD_FEATURES,         raw_props.thread_features),
+	PROP(RAW_THREAD_TLS_ALLOC,        raw_props.thread_tls_alloc),
+	PROP(RAW_COHERENCY_MODE,          raw_props.coherency_mode),
+
+	PROP(COHERENCY_NUM_GROUPS,        coherency_info.num_groups),
+	PROP(COHERENCY_NUM_CORE_GROUPS,   coherency_info.num_core_groups),
+	PROP(COHERENCY_COHERENCY,         coherency_info.coherency),
+	PROP(COHERENCY_GROUP_0,           coherency_info.group[0].core_mask),
+	PROP(COHERENCY_GROUP_1,           coherency_info.group[1].core_mask),
+	PROP(COHERENCY_GROUP_2,           coherency_info.group[2].core_mask),
+	PROP(COHERENCY_GROUP_3,           coherency_info.group[3].core_mask),
+	PROP(COHERENCY_GROUP_4,           coherency_info.group[4].core_mask),
+	PROP(COHERENCY_GROUP_5,           coherency_info.group[5].core_mask),
+	PROP(COHERENCY_GROUP_6,           coherency_info.group[6].core_mask),
+	PROP(COHERENCY_GROUP_7,           coherency_info.group[7].core_mask),
+	PROP(COHERENCY_GROUP_8,           coherency_info.group[8].core_mask),
+	PROP(COHERENCY_GROUP_9,           coherency_info.group[9].core_mask),
+	PROP(COHERENCY_GROUP_10,          coherency_info.group[10].core_mask),
+	PROP(COHERENCY_GROUP_11,          coherency_info.group[11].core_mask),
+	PROP(COHERENCY_GROUP_12,          coherency_info.group[12].core_mask),
+	PROP(COHERENCY_GROUP_13,          coherency_info.group[13].core_mask),
+	PROP(COHERENCY_GROUP_14,          coherency_info.group[14].core_mask),
+	PROP(COHERENCY_GROUP_15,          coherency_info.group[15].core_mask),
+
+#undef PROP
+};
+
+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *kprops = &kbdev->gpu_props;
+	struct base_gpu_props *props = &kprops->props;
+	u32 count = ARRAY_SIZE(gpu_property_mapping);
+	u32 i;
+	u32 size = 0;
+	u8 *p;
+
+	for (i = 0; i < count; i++) {
+		/* 4 bytes for the ID, and the size of the property */
+		size += 4 + gpu_property_mapping[i].size;
+	}
+
+	kprops->prop_buffer_size = size;
+	kprops->prop_buffer = kmalloc(size, GFP_KERNEL);
+
+	if (!kprops->prop_buffer) {
+		kprops->prop_buffer_size = 0;
+		return -ENOMEM;
+	}
+
+	p = kprops->prop_buffer;
+
+#define WRITE_U8(v) (*p++ = (v) & 0xFF)
+#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0)
+#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0)
+#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0)
+
+	for (i = 0; i < count; i++) {
+		u32 type = gpu_property_mapping[i].type;
+		u8 type_size;
+		void *field = ((u8 *)props) + gpu_property_mapping[i].offset;
+
+		switch (gpu_property_mapping[i].size) {
+		case 1:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U8;
+			break;
+		case 2:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U16;
+			break;
+		case 4:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U32;
+			break;
+		case 8:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U64;
+			break;
+		default:
+			dev_err(kbdev->dev,
+				"Invalid gpu_property_mapping type=%d size=%d",
+				type, gpu_property_mapping[i].size);
+			return -EINVAL;
+		}
+
+		WRITE_U32((type<<2) | type_size);
+
+		switch (type_size) {
+		case KBASE_GPUPROP_VALUE_SIZE_U8:
+			WRITE_U8(*((u8 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U16:
+			WRITE_U16(*((u16 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U32:
+			WRITE_U32(*((u32 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U64:
+			WRITE_U64(*((u64 *)field));
+			break;
+		default: /* Cannot be reached */
+			WARN_ON(1);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100755
index 0000000..8edba48
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2017,2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_update_l2_features - Update GPU property of L2_FEATURES
+ * @kbdev:   Device pointer
+ *
+ * This function updates l2_features and the log2 cache size.
+ */
+void kbase_gpuprops_update_l2_features(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer
+ * @kbdev: The kbase device
+ *
+ * Fills kbdev->gpu_props->prop_buffer with the GPU properties for user
+ * space to read.
+ */
+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value
+ * @gpu_props: the &base_gpu_props structure
+ *
+ * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into
+ * separate fields (version_status, minor_revision, major_revision, product_id)
+ * stored in base_gpu_props::core_props.
+ */
+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props);
+
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100755
index 0000000..d7877d1
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,98 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 core_features;
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 thread_tls_alloc;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 stack_present_lo;
+	u32 stack_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+
+	u32 prop_buffer_size;
+	void *prop_buffer;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c
new file mode 100755
index 0000000..75a0820
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c
@@ -0,0 +1,269 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_gwt.h"
+#include <linux/list_sort.h>
+
+static inline void kbase_gpu_gwt_setup_page_permission(
+				struct kbase_context *kctx,
+				unsigned long flag,
+				struct rb_node *node)
+{
+	struct rb_node *rbnode = node;
+
+	while (rbnode) {
+		struct kbase_va_region *reg;
+		int err = 0;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->nr_pages && !kbase_is_region_invalid_or_free(reg) &&
+					(reg->flags & KBASE_REG_GPU_WR)) {
+			err = kbase_mmu_update_pages(kctx, reg->start_pfn,
+					kbase_get_gpu_phy_pages(reg),
+					reg->gpu_alloc->nents,
+					reg->flags & flag,
+					reg->gpu_alloc->group_id);
+			if (err)
+				dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages failure\n");
+		}
+
+		rbnode = rb_next(rbnode);
+	}
+}
+
+static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx,
+					unsigned long flag)
+{
+	kbase_gpu_gwt_setup_page_permission(kctx, flag,
+				rb_first(&(kctx->reg_rbtree_same)));
+	kbase_gpu_gwt_setup_page_permission(kctx, flag,
+				rb_first(&(kctx->reg_rbtree_custom)));
+}
+
+
+int kbase_gpu_gwt_start(struct kbase_context *kctx)
+{
+	kbase_gpu_vm_lock(kctx);
+	if (kctx->gwt_enabled) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EBUSY;
+	}
+
+	INIT_LIST_HEAD(&kctx->gwt_current_list);
+	INIT_LIST_HEAD(&kctx->gwt_snapshot_list);
+
+	/* If GWT is enabled using new vector dumping format
+	 * from user space, back up status of the job serialization flag and
+	 * use full serialisation of jobs for dumping.
+	 * Status will be restored on end of dumping in gwt_stop.
+	 */
+	kctx->kbdev->backup_serialize_jobs = kctx->kbdev->serialize_jobs;
+	kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT |
+						KBASE_SERIALIZE_INTER_SLOT;
+
+	/* Mark gwt enabled before making pages read only in case a
+	   write page fault is triggered while we're still in this loop.
+	   (kbase_gpu_vm_lock() doesn't prevent this!)
+	*/
+	kctx->gwt_enabled = true;
+	kctx->gwt_was_enabled = true;
+
+	kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+}
+
+int kbase_gpu_gwt_stop(struct kbase_context *kctx)
+{
+	struct kbasep_gwt_list_element *pos, *n;
+
+	kbase_gpu_vm_lock(kctx);
+	if (!kctx->gwt_enabled) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EINVAL;
+	}
+
+	list_for_each_entry_safe(pos, n, &kctx->gwt_current_list, link) {
+		list_del(&pos->link);
+		kfree(pos);
+	}
+
+	list_for_each_entry_safe(pos, n, &kctx->gwt_snapshot_list, link) {
+		list_del(&pos->link);
+		kfree(pos);
+	}
+
+	kctx->kbdev->serialize_jobs = kctx->kbdev->backup_serialize_jobs;
+
+	kbase_gpu_gwt_setup_pages(kctx, ~0UL);
+
+	kctx->gwt_enabled = false;
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+}
+
+
+static int list_cmp_function(void *priv, struct list_head *a,
+				struct list_head *b)
+{
+	struct kbasep_gwt_list_element *elementA = container_of(a,
+				struct kbasep_gwt_list_element, link);
+	struct kbasep_gwt_list_element *elementB = container_of(b,
+				struct kbasep_gwt_list_element, link);
+
+	CSTD_UNUSED(priv);
+
+	if (elementA->page_addr > elementB->page_addr)
+		return 1;
+	return -1;
+}
+
+static void kbase_gpu_gwt_collate(struct kbase_context *kctx,
+		struct list_head *snapshot_list)
+{
+	struct kbasep_gwt_list_element *pos, *n;
+	struct kbasep_gwt_list_element *collated = NULL;
+
+	/* Sort the list */
+	list_sort(NULL, snapshot_list, list_cmp_function);
+
+	/* Combine contiguous areas. */
+	list_for_each_entry_safe(pos, n, snapshot_list, link) {
+		if (collated == NULL ||	collated->region !=
+					pos->region ||
+					(collated->page_addr +
+					(collated->num_pages * PAGE_SIZE)) !=
+					pos->page_addr) {
+			/* This is the first time through, a new region or
+			 * is not contiguous - start collating to this element
+			 */
+			collated = pos;
+		} else {
+			/* contiguous so merge */
+			collated->num_pages += pos->num_pages;
+			/* remove element from list */
+			list_del(&pos->link);
+			kfree(pos);
+		}
+	}
+}
+
+int kbase_gpu_gwt_dump(struct kbase_context *kctx,
+			union kbase_ioctl_cinstr_gwt_dump *gwt_dump)
+{
+	const u32 ubuf_size = gwt_dump->in.len;
+	u32 ubuf_count = 0;
+	__user void *user_addr = (__user void *)
+			(uintptr_t)gwt_dump->in.addr_buffer;
+	__user void *user_sizes = (__user void *)
+			(uintptr_t)gwt_dump->in.size_buffer;
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (!kctx->gwt_enabled) {
+		kbase_gpu_vm_unlock(kctx);
+		/* gwt_dump shouldn't be called when gwt is disabled */
+		return -EPERM;
+	}
+
+	if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer
+			|| !gwt_dump->in.size_buffer) {
+		kbase_gpu_vm_unlock(kctx);
+		/* We don't have any valid user space buffer to copy the
+		 * write modified addresses.
+		 */
+		return -EINVAL;
+	}
+
+	if (list_empty(&kctx->gwt_snapshot_list) &&
+			!list_empty(&kctx->gwt_current_list)) {
+
+		list_replace_init(&kctx->gwt_current_list,
+					&kctx->gwt_snapshot_list);
+
+		/* We have collected all write faults so far
+		 * and they will be passed on to user space.
+		 * Reset the page flags state to allow collection of
+		 * further write faults.
+		 */
+		kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR);
+
+		/* Sort and combine consecutive pages in the dump list*/
+		kbase_gpu_gwt_collate(kctx, &kctx->gwt_snapshot_list);
+	}
+
+	while ((!list_empty(&kctx->gwt_snapshot_list))) {
+		u64 addr_buffer[32];
+		u64 num_page_buffer[32];
+		u32 count = 0;
+		int err;
+		struct kbasep_gwt_list_element *dump_info, *n;
+
+		list_for_each_entry_safe(dump_info, n,
+				&kctx->gwt_snapshot_list, link) {
+			addr_buffer[count] = dump_info->page_addr;
+			num_page_buffer[count] = dump_info->num_pages;
+			count++;
+			list_del(&dump_info->link);
+			kfree(dump_info);
+			if (ARRAY_SIZE(addr_buffer) == count ||
+					ubuf_size == (ubuf_count + count))
+				break;
+		}
+
+		if (count) {
+			err = copy_to_user((user_addr +
+					(ubuf_count * sizeof(u64))),
+					(void *)addr_buffer,
+					count * sizeof(u64));
+			if (err) {
+				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
+				kbase_gpu_vm_unlock(kctx);
+				return err;
+			}
+			err = copy_to_user((user_sizes +
+					(ubuf_count * sizeof(u64))),
+					(void *)num_page_buffer,
+					count * sizeof(u64));
+			if (err) {
+				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
+				kbase_gpu_vm_unlock(kctx);
+				return err;
+			}
+
+			ubuf_count += count;
+		}
+
+		if (ubuf_count == ubuf_size)
+			break;
+	}
+
+	if (!list_empty(&kctx->gwt_snapshot_list))
+		gwt_dump->out.more_data_available = 1;
+	else
+		gwt_dump->out.more_data_available = 0;
+
+	gwt_dump->out.no_of_addr_collected = ubuf_count;
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h
new file mode 100755
index 0000000..7e7746e
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_GWT_H)
+#define _KBASE_GWT_H
+
+#include <mali_kbase.h>
+#include <mali_kbase_ioctl.h>
+
+/**
+ * kbase_gpu_gwt_start - Start the GPU write tracking
+ * @kctx: Pointer to kernel context
+ *
+ * @return 0 on success, error on failure.
+ */
+int kbase_gpu_gwt_start(struct kbase_context *kctx);
+
+/**
+ * kbase_gpu_gwt_stop - Stop the GPU write tracking
+ * @kctx: Pointer to kernel context
+ *
+ * @return 0 on success, error on failure.
+ */
+int kbase_gpu_gwt_stop(struct kbase_context *kctx);
+
+/**
+ * kbase_gpu_gwt_dump - Pass page address of faulting addresses to user space.
+ * @kctx:	Pointer to kernel context
+ * @gwt_dump:	User space data to be passed.
+ *
+ * @return 0 on success, error on failure.
+ */
+int kbase_gpu_gwt_dump(struct kbase_context *kctx,
+			union kbase_ioctl_cinstr_gwt_dump *gwt_dump);
+
+#endif /* _KBASE_GWT_H */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100755
index 0000000..8330698
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,585 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			features = base_hw_features_tHEx;
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			features = base_hw_features_tSIx;
+			break;
+		case GPU_ID2_PRODUCT_TDVX:
+			features = base_hw_features_tDVx;
+			break;
+		case GPU_ID2_PRODUCT_TNOX:
+			features = base_hw_features_tNOx;
+			break;
+		case GPU_ID2_PRODUCT_TGOX:
+			features = base_hw_features_tGOx;
+			break;
+		case GPU_ID2_PRODUCT_TEGX:
+			features = base_hw_features_tEGx;
+			break;
+		case GPU_ID2_PRODUCT_TTRX:
+			features = base_hw_features_tTRx;
+			break;
+		case GPU_ID2_PRODUCT_TNAX:
+			features = base_hw_features_tNAx;
+			break;
+		case GPU_ID2_PRODUCT_TBEX:
+			features = base_hw_features_tBEx;
+			break;
+		case GPU_ID2_PRODUCT_TULX:
+			features = base_hw_features_tULx;
+			break;
+		case GPU_ID2_PRODUCT_TDUX:
+			features = base_hw_features_tDUx;
+			break;
+		case GPU_ID2_PRODUCT_TODX:
+			features = base_hw_features_tODx;
+			break;
+		case GPU_ID2_PRODUCT_TIDX:
+			features = base_hw_features_tIDx;
+			break;
+		case GPU_ID2_PRODUCT_TVAX:
+			features = base_hw_features_tVAx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+
+#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP)
+	/* When dumping is enabled, need to disable flush reduction optimization
+	 * for GPUs on which it is safe to have only cache clean operation at
+	 * the end of job chain.
+	 * This is required to make job dumping work. There is some discrepancy
+	 * in the implementation of flush reduction optimization due to
+	 * unclear or ambiguous ARCH spec.
+	 */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE))
+		clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION,
+			&kbdev->hw_features_mask[0]);
+#endif
+}
+
+/**
+ * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID
+ * @kbdev: Device pointer
+ *
+ * Return: pointer to an array of hardware issues, terminated by
+ * BASE_HW_ISSUE_END.
+ *
+ * This function can only be used on new-format GPU IDs, i.e. those for which
+ * GPU_ID_IS_NEW_FORMAT evaluates as true. The GPU ID is read from the @kbdev.
+ *
+ * In debugging versions of the driver, unknown versions of a known GPU will
+ * be treated as the most recent known version not later than the actual
+ * version. In such circumstances, the GPU ID in @kbdev will also be replaced
+ * with the most recent known version.
+ *
+ * Note: The GPU configuration must have been read by kbase_gpuprops_get_props()
+ * before calling this function.
+ */
+static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
+					struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues = NULL;
+
+	struct base_hw_product {
+		u32 product_model;
+		struct {
+			u32 version;
+			const enum base_hw_issue *issues;
+		} map[7];
+	};
+
+	static const struct base_hw_product base_hw_products[] = {
+		{GPU_ID2_PRODUCT_TMIX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 1),
+		   base_hw_issues_tMIx_r0p0_05dev0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1},
+		  {U32_MAX /* sentinel value */, NULL} } },
+
+		{GPU_ID2_PRODUCT_THEX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2},
+		  {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TSIX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0},
+		  {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TDVX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TNOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TGOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TEGX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tEGx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TTRX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TNAX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TBEX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TULX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tULx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TDUX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TODX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TIDX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tIDx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TVAX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0},
+		  {U32_MAX, NULL} } },
+	};
+
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL;
+	const struct base_hw_product *product = NULL;
+	size_t p;
+
+	/* Stop when we reach the end of the products array. */
+	for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) {
+		if (product_model == base_hw_products[p].product_model) {
+			product = &base_hw_products[p];
+			break;
+		}
+	}
+
+	if (product != NULL) {
+		/* Found a matching product. */
+		const u32 version = gpu_id & GPU_ID2_VERSION;
+		u32 fallback_version = 0;
+		const enum base_hw_issue *fallback_issues = NULL;
+		size_t v;
+
+		/* Stop when we reach the end of the map. */
+		for (v = 0; product->map[v].version != U32_MAX; ++v) {
+
+			if (version == product->map[v].version) {
+				/* Exact match so stop. */
+				issues = product->map[v].issues;
+				break;
+			}
+
+			/* Check whether this is a candidate for most recent
+				known version not later than the actual
+				version. */
+			if ((version > product->map[v].version) &&
+				(product->map[v].version >= fallback_version)) {
+#if MALI_CUSTOMER_RELEASE
+				/* Match on version's major and minor fields */
+				if (((version ^ product->map[v].version) >>
+					GPU_ID2_VERSION_MINOR_SHIFT) == 0)
+#endif
+				{
+					fallback_version = product->map[v].version;
+					fallback_issues = product->map[v].issues;
+				}
+			}
+		}
+
+		if ((issues == NULL) && (fallback_issues != NULL)) {
+			/* Fall back to the issue set of the most recent known
+				version not later than the actual version. */
+			issues = fallback_issues;
+
+#if MALI_CUSTOMER_RELEASE
+			dev_warn(kbdev->dev,
+				"GPU hardware issue table may need updating:\n"
+#else
+			dev_info(kbdev->dev,
+#endif
+				"r%dp%d status %d is unknown; treating as r%dp%d status %d",
+				(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+					GPU_ID2_VERSION_MAJOR_SHIFT,
+				(gpu_id & GPU_ID2_VERSION_MINOR) >>
+					GPU_ID2_VERSION_MINOR_SHIFT,
+				(gpu_id & GPU_ID2_VERSION_STATUS) >>
+					GPU_ID2_VERSION_STATUS_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_MAJOR) >>
+					GPU_ID2_VERSION_MAJOR_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_MINOR) >>
+					GPU_ID2_VERSION_MINOR_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_STATUS) >>
+					GPU_ID2_VERSION_STATUS_SHIFT);
+
+			gpu_id &= ~GPU_ID2_VERSION;
+			gpu_id |= fallback_version;
+			kbdev->gpu_props.props.raw_props.gpu_id = gpu_id;
+
+			kbase_gpuprops_update_core_props_gpu_id(
+				&kbdev->gpu_props.props);
+		}
+	}
+	return issues;
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			issues = kbase_hw_get_issues_for_new_id(kbdev);
+			if (issues == NULL) {
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+
+#if !MALI_CUSTOMER_RELEASE
+			/* The GPU ID might have been replaced with the last
+			   known version of the same GPU. */
+			gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+#endif
+
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			case GPU_ID2_PRODUCT_THEX:
+				issues = base_hw_issues_model_tHEx;
+				break;
+			case GPU_ID2_PRODUCT_TSIX:
+				issues = base_hw_issues_model_tSIx;
+				break;
+			case GPU_ID2_PRODUCT_TDVX:
+				issues = base_hw_issues_model_tDVx;
+				break;
+			case GPU_ID2_PRODUCT_TNOX:
+				issues = base_hw_issues_model_tNOx;
+				break;
+			case GPU_ID2_PRODUCT_TGOX:
+				issues = base_hw_issues_model_tGOx;
+				break;
+			case GPU_ID2_PRODUCT_TEGX:
+				issues = base_hw_issues_model_tEGx;
+				break;
+			case GPU_ID2_PRODUCT_TTRX:
+				issues = base_hw_issues_model_tTRx;
+				break;
+			case GPU_ID2_PRODUCT_TNAX:
+				issues = base_hw_issues_model_tNAx;
+				break;
+			case GPU_ID2_PRODUCT_TBEX:
+				issues = base_hw_issues_model_tBEx;
+				break;
+			case GPU_ID2_PRODUCT_TULX:
+				issues = base_hw_issues_model_tULx;
+				break;
+			case GPU_ID2_PRODUCT_TDUX:
+				issues = base_hw_issues_model_tDUx;
+				break;
+			case GPU_ID2_PRODUCT_TODX:
+				issues = base_hw_issues_model_tODx;
+				break;
+			case GPU_ID2_PRODUCT_TIDX:
+				issues = base_hw_issues_model_tIDx;
+				break;
+			case GPU_ID2_PRODUCT_TVAX:
+				issues = base_hw_issues_model_tVAx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		dev_info(kbdev->dev,
+			"GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d",
+			(gpu_id & GPU_ID2_PRODUCT_MAJOR) >>
+				GPU_ID2_PRODUCT_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_MAJOR) >>
+				GPU_ID2_ARCH_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_MINOR) >>
+				GPU_ID2_ARCH_MINOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_REV) >>
+				GPU_ID2_ARCH_REV_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+				GPU_ID2_VERSION_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_MINOR) >>
+				GPU_ID2_VERSION_MINOR_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_STATUS) >>
+				GPU_ID2_VERSION_STATUS_SHIFT);
+	} else {
+		dev_info(kbdev->dev,
+			"GPU identified as 0x%04x r%dp%d status %d",
+			(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+				GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+			(gpu_id & GPU_ID_VERSION_MAJOR) >>
+				GPU_ID_VERSION_MAJOR_SHIFT,
+			(gpu_id & GPU_ID_VERSION_MINOR) >>
+				GPU_ID_VERSION_MINOR_SHIFT,
+			(gpu_id & GPU_ID_VERSION_STATUS) >>
+				GPU_ID_VERSION_STATUS_SHIFT);
+	}
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100755
index 0000000..f386b16
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID
+ * @kbdev: Device pointer
+ *
+ * Return: 0 if the GPU ID was recognized, otherwise -EINVAL.
+ *
+ * The GPU ID is read from the @kbdev.
+ *
+ * In debugging versions of the driver, unknown versions of a known GPU with a
+ * new-format ID will be treated as the most recent known version not later
+ * than the actual version. In such circumstances, the GPU ID in @kbdev will
+ * also be replaced with the most recent known version.
+ *
+ * Note: The GPU configuration must have been read by
+ * kbase_gpuprops_get_props() before calling this function.
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100755
index 0000000..d5e3d3a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_devfreq_init - Perform backend devfreq related initialization.
+ * @kbdev:      Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_devfreq_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_devfreq_term - Perform backend-devfreq termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100755
index 0000000..124a2d9
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/**
+ * struct kbase_hwaccess_data - object encapsulating the GPU backend specific
+ *                              data for the HW access layer.
+ *                              hwaccess_lock (a spinlock) must be held when
+ *                              accessing this structure.
+ * @active_kctx:     pointer to active kbase context which last submitted an
+ *                   atom to GPU and while the context is active it can
+ *                   submit new atoms to GPU from the irq context also, without
+ *                   going through the bottom half of job completion path.
+ * @backend:         GPU backend specific data for HW access layer
+ */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS];
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100755
index 0000000..62628b6
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2018, 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * The caller should ensure that GPU remains powered-on during this function.
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read
+ *                                       from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask. It will power-on the GPU if required.
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get_l2_features - Fill @regdump with L2_FEATURES read
+ *                                          from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads L2_FEATURES register that is dependent on the hardware
+ * features bitmask. It will power-on the GPU if required.
+ */
+void kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100755
index 0000000..d5b9099
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,142 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * struct kbase_instr_hwcnt_enable - Enable hardware counter collection.
+ * @dump_buffer:       GPU address to write counters to.
+ * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer.
+ * @jm_bm:             counters selection bitmask (JM).
+ * @shader_bm:         counters selection bitmask (Shader).
+ * @tiler_bm:          counters selection bitmask (Tiler).
+ * @mmu_l2_bm:         counters selection bitmask (MMU_L2).
+ * @use_secondary:     use secondary performance counters set for applicable
+ *                     counter blocks.
+ */
+struct kbase_instr_hwcnt_enable {
+	u64 dump_buffer;
+	u64 dump_buffer_bytes;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+	bool use_secondary;
+};
+
+/**
+ * kbase_instr_hwcnt_enable_internal() - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @enable:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				struct kbase_instr_hwcnt_enable *enable);
+
+/**
+ * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100755
index 0000000..c040753
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_backend_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_find_and_release_free_address_space() - Release a free AS
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * This function can evict an idle context from the runpool, freeing up the
+ * address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_ctx_sched_release()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned.
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ * @js:         Job slot to activate context on
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx, int js);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_cache_clean - Perform a cache clean if the given atom requires
+ *                            one
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the failed atom
+ *
+ * On some GPUs, the GPU cache must be cleaned following a failed atom. This
+ * function performs a clean if it is required by @katom.
+ */
+void kbase_backend_cache_clean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom);
+
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg updating timeouts of
+ * currently running atoms).
+ */
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+/* Object containing callbacks for enabling/disabling protected mode, used
+ * on GPU which supports protected mode switching natively.
+ */
+extern struct protected_mode_ops kbase_native_protected_ops;
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100755
index 0000000..44c16f4
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,234 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the power management framework was successfully initialized.
+ */
+int kbase_hwaccess_pm_early_init(struct kbase_device *kbdev);
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function (except
+ * @ref kbase_hwaccess_pm_early_init)
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the power management framework was successfully initialized.
+ */
+int kbase_hwaccess_pm_late_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_hwaccess_pm_early_term(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_hwaccess_pm_early_term or @ref kbase_hwaccess_pm_late_init)
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_hwaccess_pm_late_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * kbase_pm_list_policies - Retrieve a static list of the available policies.
+ *
+ * @kbdev:   The kbase device structure for the device.
+ * @list:    An array pointer to take the list of policies. This may be NULL.
+ *           The contents of this array must not be modified.
+ *
+ * Return: The number of policies
+ */
+int kbase_pm_list_policies(struct kbase_device *kbdev,
+	const struct kbase_pm_policy * const **list);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100755
index 0000000..f7539f5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,62 @@
+/*
+ *
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kbdev:	Kbase device
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_device *kbdev)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_device *kbdev);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c
new file mode 100755
index 0000000..265fc21
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c
@@ -0,0 +1,807 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Implementation of hardware counter context and accumulator APIs.
+ */
+
+#include "mali_kbase_hwcnt_context.h"
+#include "mali_kbase_hwcnt_accumulator.h"
+#include "mali_kbase_hwcnt_backend.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_malisw.h"
+#include "mali_kbase_debug.h"
+#include "mali_kbase_linux.h"
+
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+/**
+ * enum kbase_hwcnt_accum_state - Hardware counter accumulator states.
+ * @ACCUM_STATE_ERROR:    Error state, where all accumulator operations fail.
+ * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled.
+ * @ACCUM_STATE_ENABLED:  Enabled state, where dumping is enabled if there are
+ *                        any enabled counters.
+ */
+enum kbase_hwcnt_accum_state {
+	ACCUM_STATE_ERROR,
+	ACCUM_STATE_DISABLED,
+	ACCUM_STATE_ENABLED
+};
+
+/**
+ * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure.
+ * @backend:                Pointer to created counter backend.
+ * @state:                  The current state of the accumulator.
+ *                           - State transition from disabled->enabled or
+ *                             disabled->error requires state_lock.
+ *                           - State transition from enabled->disabled or
+ *                             enabled->error requires both accum_lock and
+ *                             state_lock.
+ *                           - Error state persists until next disable.
+ * @enable_map:             The current set of enabled counters.
+ *                           - Must only be modified while holding both
+ *                             accum_lock and state_lock.
+ *                           - Can be read while holding either lock.
+ *                           - Must stay in sync with enable_map_any_enabled.
+ * @enable_map_any_enabled: True if any counters in the map are enabled, else
+ *                          false. If true, and state is ACCUM_STATE_ENABLED,
+ *                          then the counter backend will be enabled.
+ *                           - Must only be modified while holding both
+ *                             accum_lock and state_lock.
+ *                           - Can be read while holding either lock.
+ *                           - Must stay in sync with enable_map.
+ * @scratch_map:            Scratch enable map, used as temporary enable map
+ *                          storage during dumps.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ * @accum_buf:              Accumulation buffer, where dumps will be accumulated
+ *                          into on transition to a disable state.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ * @accumulated:            True if the accumulation buffer has been accumulated
+ *                          into and not subsequently read from yet, else false.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ * @ts_last_dump_ns:        Timestamp (ns) of the end time of the most recent
+ *                          dump that was requested by the user.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ */
+struct kbase_hwcnt_accumulator {
+	struct kbase_hwcnt_backend *backend;
+	enum kbase_hwcnt_accum_state state;
+	struct kbase_hwcnt_enable_map enable_map;
+	bool enable_map_any_enabled;
+	struct kbase_hwcnt_enable_map scratch_map;
+	struct kbase_hwcnt_dump_buffer accum_buf;
+	bool accumulated;
+	u64 ts_last_dump_ns;
+};
+
+/**
+ * struct kbase_hwcnt_context - Hardware counter context structure.
+ * @iface:         Pointer to hardware counter backend interface.
+ * @state_lock:    Spinlock protecting state.
+ * @disable_count: Disable count of the context. Initialised to 1.
+ *                 Decremented when the accumulator is acquired, and incremented
+ *                 on release. Incremented on calls to
+ *                 kbase_hwcnt_context_disable[_atomic], and decremented on
+ *                 calls to kbase_hwcnt_context_enable.
+ *                  - Must only be read or modified while holding state_lock.
+ * @accum_lock:    Mutex protecting accumulator.
+ * @accum_inited:  Flag to prevent concurrent accumulator initialisation and/or
+ *                 termination. Set to true before accumulator initialisation,
+ *                 and false after accumulator termination.
+ *                  - Must only be modified while holding both accum_lock and
+ *                    state_lock.
+ *                  - Can be read while holding either lock.
+ * @accum:         Hardware counter accumulator structure.
+ */
+struct kbase_hwcnt_context {
+	const struct kbase_hwcnt_backend_interface *iface;
+	spinlock_t state_lock;
+	size_t disable_count;
+	struct mutex accum_lock;
+	bool accum_inited;
+	struct kbase_hwcnt_accumulator accum;
+};
+
+int kbase_hwcnt_context_init(
+	const struct kbase_hwcnt_backend_interface *iface,
+	struct kbase_hwcnt_context **out_hctx)
+{
+	struct kbase_hwcnt_context *hctx = NULL;
+
+	if (!iface || !out_hctx)
+		return -EINVAL;
+
+	hctx = kzalloc(sizeof(*hctx), GFP_KERNEL);
+	if (!hctx)
+		return -ENOMEM;
+
+	hctx->iface = iface;
+	spin_lock_init(&hctx->state_lock);
+	hctx->disable_count = 1;
+	mutex_init(&hctx->accum_lock);
+	hctx->accum_inited = false;
+
+	*out_hctx = hctx;
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init);
+
+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx)
+{
+	if (!hctx)
+		return;
+
+	/* Make sure we didn't leak the accumulator */
+	WARN_ON(hctx->accum_inited);
+	kfree(hctx);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term);
+
+/**
+ * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ */
+static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx)
+{
+	WARN_ON(!hctx);
+	WARN_ON(!hctx->accum_inited);
+
+	kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map);
+	kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf);
+	kbase_hwcnt_enable_map_free(&hctx->accum.enable_map);
+	hctx->iface->term(hctx->accum.backend);
+	memset(&hctx->accum, 0, sizeof(hctx->accum));
+}
+
+/**
+ * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
+{
+	int errcode;
+
+	WARN_ON(!hctx);
+	WARN_ON(!hctx->accum_inited);
+
+	errcode = hctx->iface->init(
+		hctx->iface->info, &hctx->accum.backend);
+	if (errcode)
+		goto error;
+
+	hctx->accum.state = ACCUM_STATE_ERROR;
+
+	errcode = kbase_hwcnt_enable_map_alloc(
+		hctx->iface->metadata, &hctx->accum.enable_map);
+	if (errcode)
+		goto error;
+
+	hctx->accum.enable_map_any_enabled = false;
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(
+		hctx->iface->metadata, &hctx->accum.accum_buf);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_enable_map_alloc(
+		hctx->iface->metadata, &hctx->accum.scratch_map);
+	if (errcode)
+		goto error;
+
+	hctx->accum.accumulated = false;
+
+	hctx->accum.ts_last_dump_ns =
+		hctx->iface->timestamp_ns(hctx->accum.backend);
+
+	return 0;
+
+error:
+	kbasep_hwcnt_accumulator_term(hctx);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the
+ *                                      disabled state, from the enabled or
+ *                                      error states.
+ * @hctx:       Non-NULL pointer to hardware counter context.
+ * @accumulate: True if we should accumulate before disabling, else false.
+ */
+static void kbasep_hwcnt_accumulator_disable(
+	struct kbase_hwcnt_context *hctx, bool accumulate)
+{
+	int errcode = 0;
+	bool backend_enabled = false;
+	struct kbase_hwcnt_accumulator *accum;
+	unsigned long flags;
+
+	WARN_ON(!hctx);
+	lockdep_assert_held(&hctx->accum_lock);
+	WARN_ON(!hctx->accum_inited);
+
+	accum = &hctx->accum;
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	WARN_ON(hctx->disable_count != 0);
+	WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED);
+
+	if ((hctx->accum.state == ACCUM_STATE_ENABLED) &&
+	    (accum->enable_map_any_enabled))
+		backend_enabled = true;
+
+	if (!backend_enabled)
+		hctx->accum.state = ACCUM_STATE_DISABLED;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	/* Early out if the backend is not already enabled */
+	if (!backend_enabled)
+		return;
+
+	if (!accumulate)
+		goto disable;
+
+	/* Try and accumulate before disabling */
+	errcode = hctx->iface->dump_request(accum->backend);
+	if (errcode)
+		goto disable;
+
+	errcode = hctx->iface->dump_wait(accum->backend);
+	if (errcode)
+		goto disable;
+
+	errcode = hctx->iface->dump_get(accum->backend,
+		&accum->accum_buf, &accum->enable_map, accum->accumulated);
+	if (errcode)
+		goto disable;
+
+	accum->accumulated = true;
+
+disable:
+	hctx->iface->dump_disable(accum->backend);
+
+	/* Regardless of any errors during the accumulate, put the accumulator
+	 * in the disabled state.
+	 */
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	hctx->accum.state = ACCUM_STATE_DISABLED;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+}
+
+/**
+ * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the
+ *                                     enabled state, from the disabled state.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ */
+static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
+{
+	int errcode = 0;
+	struct kbase_hwcnt_accumulator *accum;
+
+	WARN_ON(!hctx);
+	lockdep_assert_held(&hctx->state_lock);
+	WARN_ON(!hctx->accum_inited);
+	WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED);
+
+	accum = &hctx->accum;
+
+	/* The backend only needs enabling if any counters are enabled */
+	if (accum->enable_map_any_enabled)
+		errcode = hctx->iface->dump_enable_nolock(
+			accum->backend, &accum->enable_map);
+
+	if (!errcode)
+		accum->state = ACCUM_STATE_ENABLED;
+	else
+		accum->state = ACCUM_STATE_ERROR;
+}
+
+/**
+ * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date
+ *                                   values of enabled counters possible, and
+ *                                   optionally update the set of enabled
+ *                                   counters.
+ * @hctx :       Non-NULL pointer to the hardware counter context
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ * @new_map:     Pointer to the new counter enable map. If non-NULL, must have
+ *               the same metadata as the accumulator. If NULL, the set of
+ *               enabled counters will be unchanged.
+ */
+static int kbasep_hwcnt_accumulator_dump(
+	struct kbase_hwcnt_context *hctx,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf,
+	const struct kbase_hwcnt_enable_map *new_map)
+{
+	int errcode = 0;
+	unsigned long flags;
+	enum kbase_hwcnt_accum_state state;
+	bool dump_requested = false;
+	bool dump_written = false;
+	bool cur_map_any_enabled;
+	struct kbase_hwcnt_enable_map *cur_map;
+	bool new_map_any_enabled = false;
+	u64 dump_time_ns;
+	struct kbase_hwcnt_accumulator *accum;
+
+	WARN_ON(!hctx);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata));
+	WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata));
+	WARN_ON(!hctx->accum_inited);
+	lockdep_assert_held(&hctx->accum_lock);
+
+	accum = &hctx->accum;
+	cur_map = &accum->scratch_map;
+
+	/* Save out info about the current enable map */
+	cur_map_any_enabled = accum->enable_map_any_enabled;
+	kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map);
+
+	if (new_map)
+		new_map_any_enabled =
+			kbase_hwcnt_enable_map_any_enabled(new_map);
+
+	/*
+	 * We're holding accum_lock, so the accumulator state might transition
+	 * from disabled to enabled during this function (as enabling is lock
+	 * free), but it will never disable (as disabling needs to hold the
+	 * accum_lock), nor will it ever transition from enabled to error (as
+	 * an enable while we're already enabled is impossible).
+	 *
+	 * If we're already disabled, we'll only look at the accumulation buffer
+	 * rather than do a real dump, so a concurrent enable does not affect
+	 * us.
+	 *
+	 * If a concurrent enable fails, we might transition to the error
+	 * state, but again, as we're only looking at the accumulation buffer,
+	 * it's not an issue.
+	 */
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	state = accum->state;
+
+	/*
+	 * Update the new map now, such that if an enable occurs during this
+	 * dump then that enable will set the new map. If we're already enabled,
+	 * then we'll do it ourselves after the dump.
+	 */
+	if (new_map) {
+		kbase_hwcnt_enable_map_copy(
+			&accum->enable_map, new_map);
+		accum->enable_map_any_enabled = new_map_any_enabled;
+	}
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	/* Error state, so early out. No need to roll back any map updates */
+	if (state == ACCUM_STATE_ERROR)
+		return -EIO;
+
+	/* Initiate the dump if the backend is enabled. */
+	if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) {
+		/* Disable pre-emption, to make the timestamp as accurate as
+		 * possible.
+		 */
+		preempt_disable();
+		{
+			dump_time_ns = hctx->iface->timestamp_ns(
+				accum->backend);
+			if (dump_buf) {
+				errcode = hctx->iface->dump_request(
+					accum->backend);
+				dump_requested = true;
+			} else {
+				errcode = hctx->iface->dump_clear(
+					accum->backend);
+			}
+		}
+		preempt_enable();
+		if (errcode)
+			goto error;
+	} else {
+		dump_time_ns = hctx->iface->timestamp_ns(accum->backend);
+	}
+
+	/* Copy any accumulation into the dest buffer */
+	if (accum->accumulated && dump_buf) {
+		kbase_hwcnt_dump_buffer_copy(
+			dump_buf, &accum->accum_buf, cur_map);
+		dump_written = true;
+	}
+
+	/* Wait for any requested dumps to complete */
+	if (dump_requested) {
+		WARN_ON(state != ACCUM_STATE_ENABLED);
+		errcode = hctx->iface->dump_wait(accum->backend);
+		if (errcode)
+			goto error;
+	}
+
+	/* If we're enabled and there's a new enable map, change the enabled set
+	 * as soon after the dump has completed as possible.
+	 */
+	if ((state == ACCUM_STATE_ENABLED) && new_map) {
+		/* Backend is only enabled if there were any enabled counters */
+		if (cur_map_any_enabled)
+			hctx->iface->dump_disable(accum->backend);
+
+		/* (Re-)enable the backend if the new map has enabled counters.
+		 * No need to acquire the spinlock, as concurrent enable while
+		 * we're already enabled and holding accum_lock is impossible.
+		 */
+		if (new_map_any_enabled) {
+			errcode = hctx->iface->dump_enable(
+				accum->backend, new_map);
+			if (errcode)
+				goto error;
+		}
+	}
+
+	/* Copy, accumulate, or zero into the dest buffer to finish */
+	if (dump_buf) {
+		/* If we dumped, copy or accumulate it into the destination */
+		if (dump_requested) {
+			WARN_ON(state != ACCUM_STATE_ENABLED);
+			errcode = hctx->iface->dump_get(
+				accum->backend,
+				dump_buf,
+				cur_map,
+				dump_written);
+			if (errcode)
+				goto error;
+			dump_written = true;
+		}
+
+		/* If we've not written anything into the dump buffer so far, it
+		 * means there was nothing to write. Zero any enabled counters.
+		 */
+		if (!dump_written)
+			kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map);
+	}
+
+	/* Write out timestamps */
+	*ts_start_ns = accum->ts_last_dump_ns;
+	*ts_end_ns = dump_time_ns;
+
+	accum->accumulated = false;
+	accum->ts_last_dump_ns = dump_time_ns;
+
+	return 0;
+error:
+	/* An error was only physically possible if the backend was enabled */
+	WARN_ON(state != ACCUM_STATE_ENABLED);
+
+	/* Disable the backend, and transition to the error state */
+	hctx->iface->dump_disable(accum->backend);
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	accum->state = ACCUM_STATE_ERROR;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_context_disable() - Increment the disable count of the context.
+ * @hctx:       Non-NULL pointer to hardware counter context.
+ * @accumulate: True if we should accumulate before disabling, else false.
+ */
+static void kbasep_hwcnt_context_disable(
+	struct kbase_hwcnt_context *hctx, bool accumulate)
+{
+	unsigned long flags;
+
+	WARN_ON(!hctx);
+	lockdep_assert_held(&hctx->accum_lock);
+
+	if (!kbase_hwcnt_context_disable_atomic(hctx)) {
+		kbasep_hwcnt_accumulator_disable(hctx, accumulate);
+
+		spin_lock_irqsave(&hctx->state_lock, flags);
+
+		/* Atomic disable failed and we're holding the mutex, so current
+		 * disable count must be 0.
+		 */
+		WARN_ON(hctx->disable_count != 0);
+		hctx->disable_count++;
+
+		spin_unlock_irqrestore(&hctx->state_lock, flags);
+	}
+}
+
+int kbase_hwcnt_accumulator_acquire(
+	struct kbase_hwcnt_context *hctx,
+	struct kbase_hwcnt_accumulator **accum)
+{
+	int errcode = 0;
+	unsigned long flags;
+
+	if (!hctx || !accum)
+		return -EINVAL;
+
+	mutex_lock(&hctx->accum_lock);
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	if (!hctx->accum_inited)
+		/* Set accum initing now to prevent concurrent init */
+		hctx->accum_inited = true;
+	else
+		/* Already have an accum, or already being inited */
+		errcode = -EBUSY;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+	mutex_unlock(&hctx->accum_lock);
+
+	if (errcode)
+		return errcode;
+
+	errcode = kbasep_hwcnt_accumulator_init(hctx);
+
+	if (errcode) {
+		mutex_lock(&hctx->accum_lock);
+		spin_lock_irqsave(&hctx->state_lock, flags);
+
+		hctx->accum_inited = false;
+
+		spin_unlock_irqrestore(&hctx->state_lock, flags);
+		mutex_unlock(&hctx->accum_lock);
+
+		return errcode;
+	}
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	WARN_ON(hctx->disable_count == 0);
+	WARN_ON(hctx->accum.enable_map_any_enabled);
+
+	/* Decrement the disable count to allow the accumulator to be accessible
+	 * now that it's fully constructed.
+	 */
+	hctx->disable_count--;
+
+	/*
+	 * Make sure the accumulator is initialised to the correct state.
+	 * Regardless of initial state, counters don't need to be enabled via
+	 * the backend, as the initial enable map has no enabled counters.
+	 */
+	hctx->accum.state = (hctx->disable_count == 0) ?
+		ACCUM_STATE_ENABLED :
+		ACCUM_STATE_DISABLED;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	*accum = &hctx->accum;
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire);
+
+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_context *hctx;
+
+	if (!accum)
+		return;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+	mutex_lock(&hctx->accum_lock);
+
+	/* Double release is a programming error */
+	WARN_ON(!hctx->accum_inited);
+
+	/* Disable the context to ensure the accumulator is inaccesible while
+	 * we're destroying it. This performs the corresponding disable count
+	 * increment to the decrement done during acquisition.
+	 */
+	kbasep_hwcnt_context_disable(hctx, false);
+
+	mutex_unlock(&hctx->accum_lock);
+
+	kbasep_hwcnt_accumulator_term(hctx);
+
+	mutex_lock(&hctx->accum_lock);
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	hctx->accum_inited = false;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+	mutex_unlock(&hctx->accum_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release);
+
+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx)
+{
+	if (WARN_ON(!hctx))
+		return;
+
+	/* Try and atomically disable first, so we can avoid locking the mutex
+	 * if we don't need to.
+	 */
+	if (kbase_hwcnt_context_disable_atomic(hctx))
+		return;
+
+	mutex_lock(&hctx->accum_lock);
+
+	kbasep_hwcnt_context_disable(hctx, true);
+
+	mutex_unlock(&hctx->accum_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable);
+
+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx)
+{
+	unsigned long flags;
+	bool atomic_disabled = false;
+
+	if (WARN_ON(!hctx))
+		return false;
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	if (!WARN_ON(hctx->disable_count == SIZE_MAX)) {
+		/*
+		 * If disable count is non-zero or no counters are enabled, we
+		 * can just bump the disable count.
+		 *
+		 * Otherwise, we can't disable in an atomic context.
+		 */
+		if (hctx->disable_count != 0) {
+			hctx->disable_count++;
+			atomic_disabled = true;
+		} else {
+			WARN_ON(!hctx->accum_inited);
+			if (!hctx->accum.enable_map_any_enabled) {
+				hctx->disable_count++;
+				hctx->accum.state = ACCUM_STATE_DISABLED;
+				atomic_disabled = true;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	return atomic_disabled;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic);
+
+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx)
+{
+	unsigned long flags;
+
+	if (WARN_ON(!hctx))
+		return;
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	if (!WARN_ON(hctx->disable_count == 0)) {
+		if (hctx->disable_count == 1)
+			kbasep_hwcnt_accumulator_enable(hctx);
+
+		hctx->disable_count--;
+	}
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable);
+
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
+	struct kbase_hwcnt_context *hctx)
+{
+	if (!hctx)
+		return NULL;
+
+	return hctx->iface->metadata;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata);
+
+int kbase_hwcnt_accumulator_set_counters(
+	struct kbase_hwcnt_accumulator *accum,
+	const struct kbase_hwcnt_enable_map *new_map,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_context *hctx;
+
+	if (!accum || !new_map || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+	if ((new_map->metadata != hctx->iface->metadata) ||
+	    (dump_buf && (dump_buf->metadata != hctx->iface->metadata)))
+		return -EINVAL;
+
+	mutex_lock(&hctx->accum_lock);
+
+	errcode = kbasep_hwcnt_accumulator_dump(
+		hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
+
+	mutex_unlock(&hctx->accum_lock);
+
+	return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters);
+
+int kbase_hwcnt_accumulator_dump(
+	struct kbase_hwcnt_accumulator *accum,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_context *hctx;
+
+	if (!accum || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+	if (dump_buf && (dump_buf->metadata != hctx->iface->metadata))
+		return -EINVAL;
+
+	mutex_lock(&hctx->accum_lock);
+
+	errcode = kbasep_hwcnt_accumulator_dump(
+		hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
+
+	mutex_unlock(&hctx->accum_lock);
+
+	return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump);
+
+u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum)
+{
+	struct kbase_hwcnt_context *hctx;
+
+	if (WARN_ON(!accum))
+		return 0;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+	return hctx->iface->timestamp_ns(accum->backend);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h
new file mode 100755
index 0000000..eb82ea4
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h
@@ -0,0 +1,146 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter accumulator API.
+ */
+
+#ifndef _KBASE_HWCNT_ACCUMULATOR_H_
+#define _KBASE_HWCNT_ACCUMULATOR_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_context;
+struct kbase_hwcnt_accumulator;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator
+ *                                     for a hardware counter context.
+ * @hctx:  Non-NULL pointer to a hardware counter context.
+ * @accum: Non-NULL pointer to where the pointer to the created accumulator
+ *         will be stored on success.
+ *
+ * There can exist at most one instance of the hardware counter accumulator per
+ * context at a time.
+ *
+ * If multiple clients need access to the hardware counters at the same time,
+ * then an abstraction built on top of the single instance to the hardware
+ * counter accumulator is required.
+ *
+ * No counters will be enabled with the returned accumulator. A subsequent call
+ * to kbase_hwcnt_accumulator_set_counters must be used to turn them on.
+ *
+ * There are four components to a hardware counter dump:
+ *  - A set of enabled counters
+ *  - A start time
+ *  - An end time
+ *  - A dump buffer containing the accumulated counter values for all enabled
+ *    counters between the start and end times.
+ *
+ * For each dump, it is guaranteed that all enabled counters were active for the
+ * entirety of the period between the start and end times.
+ *
+ * It is also guaranteed that the start time of dump "n" is always equal to the
+ * end time of dump "n - 1".
+ *
+ * For all dumps, the values of any counters that were not enabled is undefined.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_acquire(
+	struct kbase_hwcnt_context *hctx,
+	struct kbase_hwcnt_accumulator **accum);
+
+/**
+ * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ *
+ * The accumulator must be released before the context the accumulator was
+ * created from is terminated.
+ */
+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum);
+
+/**
+ * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently
+ *                                          enabled counters, and enable a new
+ *                                          set of counters that will be used
+ *                                          for subsequent dumps.
+ * @accum:       Non-NULL pointer to the hardware counter accumulator.
+ * @new_map:     Non-NULL pointer to the new counter enable map. Must have the
+ *               same metadata as the accumulator.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * If this function fails for some unexpected reason (i.e. anything other than
+ * invalid args), then the accumulator will be put into the error state until
+ * the parent context is next disabled.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_set_counters(
+	struct kbase_hwcnt_accumulator *accum,
+	const struct kbase_hwcnt_enable_map *new_map,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled
+ *                                  counters.
+ * @accum:       Non-NULL pointer to the hardware counter accumulator.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * If this function fails for some unexpected reason (i.e. anything other than
+ * invalid args), then the accumulator will be put into the error state until
+ * the parent context is next disabled.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_dump(
+	struct kbase_hwcnt_accumulator *accum,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend
+ *                                          timestamp.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ *
+ * Return: Accumulator backend timestamp in nanoseconds.
+ */
+u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum);
+
+#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h
new file mode 100755
index 0000000..b7aa0e1
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h
@@ -0,0 +1,217 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Virtual interface for hardware counter backends.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_H_
+#define _KBASE_HWCNT_BACKEND_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/*
+ * struct kbase_hwcnt_backend_info - Opaque pointer to information used to
+ *                                   create an instance of a hardware counter
+ *                                   backend.
+ */
+struct kbase_hwcnt_backend_info;
+
+/*
+ * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter
+ *                                   backend, used to perform dumps.
+ */
+struct kbase_hwcnt_backend;
+
+/**
+ * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend.
+ * @info:        Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * All uses of the created hardware counter backend must be externally
+ * synchronised.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_init_fn)(
+	const struct kbase_hwcnt_backend_info *info,
+	struct kbase_hwcnt_backend **out_backend);
+
+/**
+ * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend.
+ * @backend: Pointer to backend to be terminated.
+ */
+typedef void (*kbase_hwcnt_backend_term_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend
+ *                                               timestamp.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * Return: Backend timestamp in nanoseconds.
+ */
+typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the
+ *                                              backend.
+ * @backend:    Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ *
+ * The enable_map must have been created using the interface's metadata.
+ * If the backend has already been enabled, an error is returned.
+ *
+ * May be called in an atomic context.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_enable_fn)(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping
+ *                                                     with the backend.
+ * @backend:    Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ *
+ * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be
+ * called in an atomic context with the spinlock documented by the specific
+ * backend interface held.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with
+ *                                               the backend.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is already disabled, does nothing.
+ * Any undumped counter values since the last dump get will be lost.
+ */
+typedef void (*kbase_hwcnt_backend_dump_disable_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped
+ *                                             counters.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled, returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_clear_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter
+ *                                               dump.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled or another dump is already in progress,
+ * returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_request_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested
+ *                                            counter dump has completed.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled, returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_wait_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the
+ *                                           counters dumped after the last dump
+ *                                           request into the dump buffer.
+ * @backend:     Non-NULL pointer to backend.
+ * @dump_buffer: Non-NULL pointer to destination dump buffer.
+ * @enable_map:  Non-NULL pointer to enable map specifying enabled values.
+ * @accumulate:  True if counters should be accumulated into dump_buffer, rather
+ *               than copied.
+ *
+ * If the backend is not enabled, returns an error.
+ * If a dump is in progress (i.e. dump_wait has not yet returned successfully)
+ * then the resultant contents of the dump buffer will be undefined.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_get_fn)(
+	struct kbase_hwcnt_backend *backend,
+	struct kbase_hwcnt_dump_buffer *dump_buffer,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	bool accumulate);
+
+/**
+ * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual
+ *                                        interface.
+ * @metadata:           Immutable hardware counter metadata.
+ * @info:               Immutable info used to initialise an instance of the
+ *                      backend.
+ * @init:               Function ptr to initialise an instance of the backend.
+ * @term:               Function ptr to terminate an instance of the backend.
+ * @timestamp_ns:       Function ptr to get the current backend timestamp.
+ * @dump_enable:        Function ptr to enable dumping.
+ * @dump_enable_nolock: Function ptr to enable dumping while the
+ *                      backend-specific spinlock is already held.
+ * @dump_disable:       Function ptr to disable dumping.
+ * @dump_clear:         Function ptr to clear counters.
+ * @dump_request:       Function ptr to request a dump.
+ * @dump_wait:          Function ptr to wait until dump to complete.
+ * @dump_get:           Function ptr to copy or accumulate dump into a dump
+ *                      buffer.
+ */
+struct kbase_hwcnt_backend_interface {
+	const struct kbase_hwcnt_metadata *metadata;
+	const struct kbase_hwcnt_backend_info *info;
+	kbase_hwcnt_backend_init_fn init;
+	kbase_hwcnt_backend_term_fn term;
+	kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns;
+	kbase_hwcnt_backend_dump_enable_fn dump_enable;
+	kbase_hwcnt_backend_dump_enable_nolock_fn dump_enable_nolock;
+	kbase_hwcnt_backend_dump_disable_fn dump_disable;
+	kbase_hwcnt_backend_dump_clear_fn dump_clear;
+	kbase_hwcnt_backend_dump_request_fn dump_request;
+	kbase_hwcnt_backend_dump_wait_fn dump_wait;
+	kbase_hwcnt_backend_dump_get_fn dump_get;
+};
+
+#endif /* _KBASE_HWCNT_BACKEND_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c
new file mode 100755
index 0000000..1e9c25a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c
@@ -0,0 +1,511 @@
+/*
+ *
+ * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_backend_gpu.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_hwaccess_instr.h"
+#ifdef CONFIG_MALI_NO_MALI
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+/**
+ * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance
+ *                                       of a GPU hardware counter backend.
+ * @kbdev:         KBase device.
+ * @use_secondary: True if secondary performance counters should be used,
+ *                 else false. Ignored if secondary counters are not supported.
+ * @metadata:      Hardware counter metadata.
+ * @dump_bytes:    Bytes of GPU memory required to perform a
+ *                 hardware counter dump.
+ */
+struct kbase_hwcnt_backend_gpu_info {
+	struct kbase_device *kbdev;
+	bool use_secondary;
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t dump_bytes;
+};
+
+/**
+ * struct kbase_hwcnt_backend_gpu - Instance of a GPU hardware counter backend.
+ * @info:         Info used to create the backend.
+ * @kctx:         KBase context used for GPU memory allocation and
+ *                counter dumping.
+ * @gpu_dump_va:  GPU hardware counter dump buffer virtual address.
+ * @cpu_dump_va:  CPU mapping of gpu_dump_va.
+ * @vmap:         Dump buffer vmap.
+ * @enabled:      True if dumping has been enabled, else false.
+ * @pm_core_mask:  PM state sync-ed shaders core mask for the enabled dumping.
+ */
+struct kbase_hwcnt_backend_gpu {
+	const struct kbase_hwcnt_backend_gpu_info *info;
+	struct kbase_context *kctx;
+	u64 gpu_dump_va;
+	void *cpu_dump_va;
+	struct kbase_vmap_struct *vmap;
+	bool enabled;
+	u64 pm_core_mask;
+};
+
+/* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
+static u64 kbasep_hwcnt_backend_gpu_timestamp_ns(
+	struct kbase_hwcnt_backend *backend)
+{
+	struct timespec ts;
+
+	(void)backend;
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int kbasep_hwcnt_backend_gpu_dump_enable_nolock(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_hwcnt_physical_enable_map phys;
+	struct kbase_instr_hwcnt_enable enable;
+
+	if (!backend_gpu || !enable_map || backend_gpu->enabled ||
+	    (enable_map->metadata != backend_gpu->info->metadata))
+		return -EINVAL;
+
+	kctx = backend_gpu->kctx;
+	kbdev = backend_gpu->kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map);
+
+	enable.jm_bm = phys.jm_bm;
+	enable.shader_bm = phys.shader_bm;
+	enable.tiler_bm = phys.tiler_bm;
+	enable.mmu_l2_bm = phys.mmu_l2_bm;
+	enable.use_secondary = backend_gpu->info->use_secondary;
+	enable.dump_buffer = backend_gpu->gpu_dump_va;
+	enable.dump_buffer_bytes = backend_gpu->info->dump_bytes;
+
+	errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
+	if (errcode)
+		goto error;
+
+	backend_gpu->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
+	backend_gpu->enabled = true;
+
+	return 0;
+error:
+	return errcode;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_fn */
+static int kbasep_hwcnt_backend_gpu_dump_enable(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	unsigned long flags;
+	int errcode;
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+	struct kbase_device *kbdev;
+
+	if (!backend_gpu)
+		return -EINVAL;
+
+	kbdev = backend_gpu->kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	errcode = kbasep_hwcnt_backend_gpu_dump_enable_nolock(
+		backend, enable_map);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return errcode;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_disable_fn */
+static void kbasep_hwcnt_backend_gpu_dump_disable(
+	struct kbase_hwcnt_backend *backend)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+
+	if (WARN_ON(!backend_gpu) || !backend_gpu->enabled)
+		return;
+
+	errcode = kbase_instr_hwcnt_disable_internal(backend_gpu->kctx);
+	WARN_ON(errcode);
+
+	backend_gpu->enabled = false;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_clear_fn */
+static int kbasep_hwcnt_backend_gpu_dump_clear(
+	struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+
+	if (!backend_gpu || !backend_gpu->enabled)
+		return -EINVAL;
+
+	return kbase_instr_hwcnt_clear(backend_gpu->kctx);
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_request_fn */
+static int kbasep_hwcnt_backend_gpu_dump_request(
+	struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+
+	if (!backend_gpu || !backend_gpu->enabled)
+		return -EINVAL;
+
+	return kbase_instr_hwcnt_request_dump(backend_gpu->kctx);
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_wait_fn */
+static int kbasep_hwcnt_backend_gpu_dump_wait(
+	struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+
+	if (!backend_gpu || !backend_gpu->enabled)
+		return -EINVAL;
+
+	return kbase_instr_hwcnt_wait_for_dump(backend_gpu->kctx);
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_get_fn */
+static int kbasep_hwcnt_backend_gpu_dump_get(
+	struct kbase_hwcnt_backend *backend,
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map,
+	bool accumulate)
+{
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+
+	if (!backend_gpu || !dst || !dst_enable_map ||
+	    (backend_gpu->info->metadata != dst->metadata) ||
+	    (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	/* Invalidate the kernel buffer before reading from it. */
+	kbase_sync_mem_regions(
+		backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU);
+
+	return kbase_hwcnt_gpu_dump_get(
+		dst, backend_gpu->cpu_dump_va, dst_enable_map,
+		backend_gpu->pm_core_mask, accumulate);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_dump_alloc() - Allocate a GPU dump buffer.
+ * @info:        Non-NULL pointer to GPU backend info.
+ * @kctx:        Non-NULL pointer to kbase context.
+ * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address
+ *               is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_dump_alloc(
+	const struct kbase_hwcnt_backend_gpu_info *info,
+	struct kbase_context *kctx,
+	u64 *gpu_dump_va)
+{
+	struct kbase_va_region *reg;
+	u64 flags;
+	u64 nr_pages;
+
+	WARN_ON(!info);
+	WARN_ON(!kctx);
+	WARN_ON(!gpu_dump_va);
+
+	flags = BASE_MEM_PROT_CPU_RD |
+		BASE_MEM_PROT_GPU_WR |
+		BASEP_MEM_PERMANENT_KERNEL_MAPPING |
+		BASE_MEM_CACHED_CPU;
+
+	if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE)
+		flags |= BASE_MEM_UNCACHED_GPU;
+
+	nr_pages = PFN_UP(info->dump_bytes);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va);
+
+	if (!reg)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_dump_free() - Free an allocated GPU dump buffer.
+ * @kctx:        Non-NULL pointer to kbase context.
+ * @gpu_dump_va: GPU dump buffer virtual address.
+ */
+static void kbasep_hwcnt_backend_gpu_dump_free(
+	struct kbase_context *kctx,
+	u64 gpu_dump_va)
+{
+	WARN_ON(!kctx);
+	if (gpu_dump_va)
+		kbase_mem_free(kctx, gpu_dump_va);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_destroy() - Destroy a GPU backend.
+ * @backend: Pointer to GPU backend to destroy.
+ *
+ * Can be safely called on a backend in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_gpu_destroy(
+	struct kbase_hwcnt_backend_gpu *backend)
+{
+	if (!backend)
+		return;
+
+	if (backend->kctx) {
+		struct kbase_context *kctx = backend->kctx;
+		struct kbase_device *kbdev = kctx->kbdev;
+
+		if (backend->cpu_dump_va)
+			kbase_phy_alloc_mapping_put(kctx, backend->vmap);
+
+		if (backend->gpu_dump_va)
+			kbasep_hwcnt_backend_gpu_dump_free(
+				kctx, backend->gpu_dump_va);
+
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+		kbase_destroy_context(kctx);
+	}
+
+	kfree(backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_create() - Create a GPU backend.
+ * @info:        Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_create(
+	const struct kbase_hwcnt_backend_gpu_info *info,
+	struct kbase_hwcnt_backend_gpu **out_backend)
+{
+	int errcode;
+	struct kbase_device *kbdev;
+	struct kbase_hwcnt_backend_gpu *backend = NULL;
+
+	WARN_ON(!info);
+	WARN_ON(!out_backend);
+
+	kbdev = info->kbdev;
+
+	backend = kzalloc(sizeof(*backend), GFP_KERNEL);
+	if (!backend)
+		goto alloc_error;
+
+	backend->info = info;
+
+	backend->kctx = kbase_create_context(kbdev, true,
+		BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
+	if (!backend->kctx)
+		goto alloc_error;
+
+	kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
+
+	errcode = kbasep_hwcnt_backend_gpu_dump_alloc(
+		info, backend->kctx, &backend->gpu_dump_va);
+	if (errcode)
+		goto error;
+
+	backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
+		backend->gpu_dump_va, &backend->vmap);
+	if (!backend->cpu_dump_va)
+		goto alloc_error;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va);
+#endif
+
+	*out_backend = backend;
+	return 0;
+
+alloc_error:
+	errcode = -ENOMEM;
+error:
+	kbasep_hwcnt_backend_gpu_destroy(backend);
+	return errcode;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_gpu_init(
+	const struct kbase_hwcnt_backend_info *info,
+	struct kbase_hwcnt_backend **out_backend)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_gpu *backend = NULL;
+
+	if (!info || !out_backend)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_gpu_create(
+		(const struct kbase_hwcnt_backend_gpu_info *) info, &backend);
+	if (errcode)
+		return errcode;
+
+	*out_backend = (struct kbase_hwcnt_backend *)backend;
+
+	return 0;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_term_fn */
+static void kbasep_hwcnt_backend_gpu_term(struct kbase_hwcnt_backend *backend)
+{
+	if (!backend)
+		return;
+
+	kbasep_hwcnt_backend_gpu_dump_disable(backend);
+	kbasep_hwcnt_backend_gpu_destroy(
+		(struct kbase_hwcnt_backend_gpu *)backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_info_destroy() - Destroy a GPU backend info.
+ * @info: Pointer to info to destroy.
+ *
+ * Can be safely called on a backend info in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_gpu_info_destroy(
+	const struct kbase_hwcnt_backend_gpu_info *info)
+{
+	if (!info)
+		return;
+
+	kbase_hwcnt_gpu_metadata_destroy(info->metadata);
+	kfree(info);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_info_create() - Create a GPU backend info.
+ * @kbdev: Non_NULL pointer to kbase device.
+ * @out_info: Non-NULL pointer to where info is stored on success.
+ *
+ * Return 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_info_create(
+	struct kbase_device *kbdev,
+	const struct kbase_hwcnt_backend_gpu_info **out_info)
+{
+	int errcode = -ENOMEM;
+	struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
+	struct kbase_hwcnt_backend_gpu_info *info = NULL;
+
+	WARN_ON(!kbdev);
+	WARN_ON(!out_info);
+
+	errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info);
+	if (errcode)
+		return errcode;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		goto error;
+
+	info->kbdev = kbdev;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	info->use_secondary = true;
+#else
+	info->use_secondary = false;
+#endif
+
+	errcode = kbase_hwcnt_gpu_metadata_create(
+		&hwcnt_gpu_info, info->use_secondary,
+		&info->metadata,
+		&info->dump_bytes);
+	if (errcode)
+		goto error;
+
+	*out_info = info;
+
+	return 0;
+error:
+	kbasep_hwcnt_backend_gpu_info_destroy(info);
+	return errcode;
+}
+
+int kbase_hwcnt_backend_gpu_create(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	int errcode;
+	const struct kbase_hwcnt_backend_gpu_info *info = NULL;
+
+	if (!kbdev || !iface)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_gpu_info_create(kbdev, &info);
+
+	if (errcode)
+		return errcode;
+
+	iface->metadata = info->metadata;
+	iface->info = (struct kbase_hwcnt_backend_info *)info;
+	iface->init = kbasep_hwcnt_backend_gpu_init;
+	iface->term = kbasep_hwcnt_backend_gpu_term;
+	iface->timestamp_ns = kbasep_hwcnt_backend_gpu_timestamp_ns;
+	iface->dump_enable = kbasep_hwcnt_backend_gpu_dump_enable;
+	iface->dump_enable_nolock = kbasep_hwcnt_backend_gpu_dump_enable_nolock;
+	iface->dump_disable = kbasep_hwcnt_backend_gpu_dump_disable;
+	iface->dump_clear = kbasep_hwcnt_backend_gpu_dump_clear;
+	iface->dump_request = kbasep_hwcnt_backend_gpu_dump_request;
+	iface->dump_wait = kbasep_hwcnt_backend_gpu_dump_wait;
+	iface->dump_get = kbasep_hwcnt_backend_gpu_dump_get;
+
+	return 0;
+}
+
+void kbase_hwcnt_backend_gpu_destroy(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	if (!iface)
+		return;
+
+	kbasep_hwcnt_backend_gpu_info_destroy(
+		(const struct kbase_hwcnt_backend_gpu_info *)iface->info);
+	memset(iface, 0, sizeof(*iface));
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h
new file mode 100755
index 0000000..7712f14
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h
@@ -0,0 +1,61 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for GPU
+ * backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_GPU_H_
+#define _KBASE_HWCNT_BACKEND_GPU_H_
+
+#include "mali_kbase_hwcnt_backend.h"
+
+struct kbase_device;
+
+/**
+ * kbase_hwcnt_backend_gpu_create() - Create a GPU hardware counter backend
+ *                                    interface.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @iface: Non-NULL pointer to backend interface structure that is filled in
+ *             on creation success.
+ *
+ * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_gpu_create(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_gpu_destroy() - Destroy a GPU hardware counter backend
+ *                                     interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_gpu_destroy(
+	struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_GPU_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h
new file mode 100755
index 0000000..bc50ad1
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h
@@ -0,0 +1,119 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter context API.
+ */
+
+#ifndef _KBASE_HWCNT_CONTEXT_H_
+#define _KBASE_HWCNT_CONTEXT_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_backend_interface;
+struct kbase_hwcnt_context;
+
+/**
+ * kbase_hwcnt_context_init() - Initialise a hardware counter context.
+ * @iface:    Non-NULL pointer to a hardware counter backend interface.
+ * @out_hctx: Non-NULL pointer to where the pointer to the created context will
+ *            be stored on success.
+ *
+ * On creation, the disable count of the context will be 0.
+ * A hardware counter accumulator can be acquired using a created context.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_context_init(
+	const struct kbase_hwcnt_backend_interface *iface,
+	struct kbase_hwcnt_context **out_hctx);
+
+/**
+ * kbase_hwcnt_context_term() - Terminate a hardware counter context.
+ * @hctx: Pointer to context to be terminated.
+ */
+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by
+ *                                  the context, so related counter data
+ *                                  structures can be created.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * Return: Non-NULL pointer to metadata, or NULL on error.
+ */
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
+	struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_disable() - Increment the disable count of the context.
+ * @hctx: Pointer to the hardware counter context.
+ *
+ * If a call to this function increments the disable count from 0 to 1, and
+ * an accumulator has been acquired, then a counter dump will be performed
+ * before counters are disabled via the backend interface.
+ *
+ * Subsequent dumps via the accumulator while counters are disabled will first
+ * return the accumulated dump, then will return dumps with zeroed counters.
+ *
+ * After this function call returns, it is guaranteed that counters will not be
+ * enabled via the backend interface.
+ */
+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the
+ *                                        context if possible in an atomic
+ *                                        context.
+ * @hctx: Pointer to the hardware counter context.
+ *
+ * This function will only succeed if hardware counters are effectively already
+ * disabled, i.e. there is no accumulator, the disable count is already
+ * non-zero, or the accumulator has no counters set.
+ *
+ * After this function call returns true, it is guaranteed that counters will
+ * not be enabled via the backend interface.
+ *
+ * Return: True if the disable count was incremented, else False.
+ */
+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_enable() - Decrement the disable count of the context.
+ * @hctx: Pointer to the hardware counter context.
+ *
+ * If a call to this function decrements the disable count from 1 to 0, and
+ * an accumulator has been acquired, then counters will be re-enabled via the
+ * backend interface.
+ *
+ * If an accumulator has been acquired and enabling counters fails for some
+ * reason, the accumulator will be placed into an error state.
+ *
+ * It is only valid to call this function one time for each prior returned call
+ * to kbase_hwcnt_context_disable.
+ *
+ * The spinlock documented in the backend interface that was passed in to
+ * kbase_hwcnt_context_init() must be held before calling this function.
+ */
+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx);
+
+#endif /* _KBASE_HWCNT_CONTEXT_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c
new file mode 100755
index 0000000..8581fe9
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c
@@ -0,0 +1,777 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+#ifdef CONFIG_MALI_NO_MALI
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#define KBASE_HWCNT_V4_BLOCKS_PER_GROUP 8
+#define KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP 4
+#define KBASE_HWCNT_V4_MAX_GROUPS \
+	(KBASE_HWCNT_AVAIL_MASK_BITS / KBASE_HWCNT_V4_BLOCKS_PER_GROUP)
+#define KBASE_HWCNT_V4_HEADERS_PER_BLOCK 4
+#define KBASE_HWCNT_V4_COUNTERS_PER_BLOCK 60
+#define KBASE_HWCNT_V4_VALUES_PER_BLOCK \
+	(KBASE_HWCNT_V4_HEADERS_PER_BLOCK + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK)
+/* Index of the PRFCNT_EN header into a V4 counter block */
+#define KBASE_HWCNT_V4_PRFCNT_EN_HEADER 2
+
+#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
+#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
+#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60
+#define KBASE_HWCNT_V5_VALUES_PER_BLOCK \
+	(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK)
+/* Index of the PRFCNT_EN header into a V5 counter block */
+#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
+
+/**
+ * kbasep_hwcnt_backend_gpu_metadata_v4_create() - Create hardware counter
+ *                                                 metadata for a v4 GPU.
+ * @v4_info:  Non-NULL pointer to hwcnt info for a v4 GPU.
+ * @metadata: Non-NULL pointer to where created metadata is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_metadata_v4_create(
+	const struct kbase_hwcnt_gpu_v4_info *v4_info,
+	const struct kbase_hwcnt_metadata **metadata)
+{
+	size_t grp;
+	int errcode = -ENOMEM;
+	struct kbase_hwcnt_description desc;
+	struct kbase_hwcnt_group_description *grps;
+	size_t avail_mask_bit;
+
+	WARN_ON(!v4_info);
+	WARN_ON(!metadata);
+
+	/* Check if there are enough bits in the availability mask to represent
+	 * all the hardware counter blocks in the system.
+	 */
+	if (v4_info->cg_count > KBASE_HWCNT_V4_MAX_GROUPS)
+		return -EINVAL;
+
+	grps = kcalloc(v4_info->cg_count, sizeof(*grps), GFP_KERNEL);
+	if (!grps)
+		goto clean_up;
+
+	desc.grp_cnt = v4_info->cg_count;
+	desc.grps = grps;
+
+	for (grp = 0; grp < v4_info->cg_count; grp++) {
+		size_t blk;
+		size_t sc;
+		const u64 core_mask = v4_info->cgs[grp].core_mask;
+		struct kbase_hwcnt_block_description *blks = kcalloc(
+			KBASE_HWCNT_V4_BLOCKS_PER_GROUP,
+			sizeof(*blks),
+			GFP_KERNEL);
+
+		if (!blks)
+			goto clean_up;
+
+		grps[grp].type = KBASE_HWCNT_GPU_GROUP_TYPE_V4;
+		grps[grp].blk_cnt = KBASE_HWCNT_V4_BLOCKS_PER_GROUP;
+		grps[grp].blks = blks;
+
+		for (blk = 0; blk < KBASE_HWCNT_V4_BLOCKS_PER_GROUP; blk++) {
+			blks[blk].inst_cnt = 1;
+			blks[blk].hdr_cnt =
+				KBASE_HWCNT_V4_HEADERS_PER_BLOCK;
+			blks[blk].ctr_cnt =
+				KBASE_HWCNT_V4_COUNTERS_PER_BLOCK;
+		}
+
+		for (sc = 0; sc < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; sc++) {
+			blks[sc].type = core_mask & (1ull << sc) ?
+				KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER :
+				KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED;
+		}
+
+		blks[4].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER;
+		blks[5].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2;
+		blks[6].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED;
+		blks[7].type = (grp == 0) ?
+			KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM :
+			KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED;
+
+		WARN_ON(KBASE_HWCNT_V4_BLOCKS_PER_GROUP != 8);
+	}
+
+	/* Initialise the availability mask */
+	desc.avail_mask = 0;
+	avail_mask_bit = 0;
+
+	for (grp = 0; grp < desc.grp_cnt; grp++) {
+		size_t blk;
+		const struct kbase_hwcnt_block_description *blks =
+			desc.grps[grp].blks;
+		for (blk = 0; blk < desc.grps[grp].blk_cnt; blk++) {
+			WARN_ON(blks[blk].inst_cnt != 1);
+			if (blks[blk].type !=
+			    KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED)
+				desc.avail_mask |= (1ull << avail_mask_bit);
+
+			avail_mask_bit++;
+		}
+	}
+
+	errcode = kbase_hwcnt_metadata_create(&desc, metadata);
+
+	/* Always clean up, as metadata will make a copy of the input args */
+clean_up:
+	if (grps) {
+		for (grp = 0; grp < v4_info->cg_count; grp++)
+			kfree(grps[grp].blks);
+		kfree(grps);
+	}
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_v4_dump_bytes() - Get the raw dump buffer size for a
+ *                                            V4 GPU.
+ * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU.
+ *
+ * Return: Size of buffer the V4 GPU needs to perform a counter dump.
+ */
+static size_t kbasep_hwcnt_backend_gpu_v4_dump_bytes(
+	const struct kbase_hwcnt_gpu_v4_info *v4_info)
+{
+	return v4_info->cg_count *
+		KBASE_HWCNT_V4_BLOCKS_PER_GROUP *
+		KBASE_HWCNT_V4_VALUES_PER_BLOCK *
+		KBASE_HWCNT_VALUE_BYTES;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter
+ *                                                 metadata for a v5 GPU.
+ * @v5_info:       Non-NULL pointer to hwcnt info for a v5 GPU.
+ * @use_secondary: True if secondary performance counters should be used, else
+ *                 false. Ignored if secondary counters are not supported.
+ * @metadata:      Non-NULL pointer to where created metadata is stored
+ *                 on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
+	const struct kbase_hwcnt_gpu_v5_info *v5_info,
+	bool use_secondary,
+	const struct kbase_hwcnt_metadata **metadata)
+{
+	struct kbase_hwcnt_description desc;
+	struct kbase_hwcnt_group_description group;
+	struct kbase_hwcnt_block_description
+		blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+	size_t non_sc_block_count;
+	size_t sc_block_count;
+
+	WARN_ON(!v5_info);
+	WARN_ON(!metadata);
+
+	/* Calculate number of block instances that aren't shader cores */
+	non_sc_block_count = 2 + v5_info->l2_count;
+	/* Calculate number of block instances that are shader cores */
+	sc_block_count = fls64(v5_info->core_mask);
+
+	/*
+	 * A system can have up to 64 shader cores, but the 64-bit
+	 * availability mask can't physically represent that many cores as well
+	 * as the other hardware blocks.
+	 * Error out if there are more blocks than our implementation can
+	 * support.
+	 */
+	if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
+		return -EINVAL;
+
+	/* One Job Manager block */
+	blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM;
+	blks[0].inst_cnt = 1;
+	blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+	/* One Tiler block */
+	blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
+	blks[1].inst_cnt = 1;
+	blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+	/* l2_count memsys blks */
+	blks[2].type = use_secondary ?
+		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 :
+		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
+	blks[2].inst_cnt = v5_info->l2_count;
+	blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+	/*
+	 * There are as many shader cores in the system as there are bits set in
+	 * the core mask. However, the dump buffer memory requirements need to
+	 * take into account the fact that the core mask may be non-contiguous.
+	 *
+	 * For example, a system with a core mask of 0b1011 has the same dump
+	 * buffer memory requirements as a system with 0b1111, but requires more
+	 * memory than a system with 0b0111. However, core 2 of the system with
+	 * 0b1011 doesn't physically exist, and the dump buffer memory that
+	 * accounts for that core will never be written to when we do a counter
+	 * dump.
+	 *
+	 * We find the core mask's last set bit to determine the memory
+	 * requirements, and embed the core mask into the availability mask so
+	 * we can determine later which shader cores physically exist.
+	 */
+	blks[3].type = use_secondary ?
+		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 :
+		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
+	blks[3].inst_cnt = sc_block_count;
+	blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+	WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
+
+	group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+	group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT;
+	group.blks = blks;
+
+	desc.grp_cnt = 1;
+	desc.grps = &group;
+
+	/* The JM, Tiler, and L2s are always available, and are before cores */
+	desc.avail_mask = (1ull << non_sc_block_count) - 1;
+	/* Embed the core mask directly in the availability mask */
+	desc.avail_mask |= (v5_info->core_mask << non_sc_block_count);
+
+	return kbase_hwcnt_metadata_create(&desc, metadata);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a
+ *                                            V5 GPU.
+ * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU.
+ *
+ * Return: Size of buffer the V5 GPU needs to perform a counter dump.
+ */
+static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes(
+	const struct kbase_hwcnt_gpu_v5_info *v5_info)
+{
+	WARN_ON(!v5_info);
+	return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) *
+		KBASE_HWCNT_V5_VALUES_PER_BLOCK *
+		KBASE_HWCNT_VALUE_BYTES;
+}
+
+int kbase_hwcnt_gpu_info_init(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_gpu_info *info)
+{
+	if (!kbdev || !info)
+		return -EINVAL;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* NO_MALI uses V5 layout, regardless of the underlying platform. */
+	info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+	info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+	info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+#else
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V4;
+		info->v4.cg_count = kbdev->gpu_props.num_core_groups;
+		info->v4.cgs = kbdev->gpu_props.props.coherency_info.group;
+	} else {
+		const struct base_gpu_props *props = &kbdev->gpu_props.props;
+		const size_t l2_count = props->l2_props.num_l2_slices;
+		const size_t core_mask =
+			props->coherency_info.group[0].core_mask;
+
+		info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+		info->v5.l2_count = l2_count;
+		info->v5.core_mask = core_mask;
+	}
+#endif
+	return 0;
+}
+
+int kbase_hwcnt_gpu_metadata_create(
+	const struct kbase_hwcnt_gpu_info *info,
+	bool use_secondary,
+	const struct kbase_hwcnt_metadata **out_metadata,
+	size_t *out_dump_bytes)
+{
+	int errcode;
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t dump_bytes;
+
+	if (!info || !out_metadata || !out_dump_bytes)
+		return -EINVAL;
+
+	switch (info->type) {
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+		dump_bytes = kbasep_hwcnt_backend_gpu_v4_dump_bytes(&info->v4);
+		errcode = kbasep_hwcnt_backend_gpu_metadata_v4_create(
+			&info->v4, &metadata);
+		break;
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+		dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5);
+		errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create(
+			&info->v5, use_secondary, &metadata);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (errcode)
+		return errcode;
+
+	/*
+	 * Dump abstraction size should be exactly the same size and layout as
+	 * the physical dump size, for backwards compatibility.
+	 */
+	WARN_ON(dump_bytes != metadata->dump_buf_bytes);
+
+	*out_metadata = metadata;
+	*out_dump_bytes = dump_bytes;
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create);
+
+void kbase_hwcnt_gpu_metadata_destroy(
+	const struct kbase_hwcnt_metadata *metadata)
+{
+	if (!metadata)
+		return;
+
+	kbase_hwcnt_metadata_destroy(metadata);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy);
+
+static bool is_block_type_shader(
+	const u64 grp_type,
+	const u64 blk_type,
+	const size_t blk)
+{
+	bool is_shader = false;
+
+	switch (grp_type) {
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+		/* blk-value in [0, KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP-1]
+		 * corresponds to a shader, or its implementation
+		 * reserved. As such, here we use the blk index value to
+		 * tell the reserved case.
+		 */
+		if (blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER ||
+		    (blk < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP &&
+		     blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED))
+			is_shader = true;
+		break;
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+		if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
+		    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2)
+			is_shader = true;
+		break;
+	default:
+		/* Warn on unknown group type */
+		WARN_ON(true);
+	}
+
+	return is_shader;
+}
+
+int kbase_hwcnt_gpu_dump_get(
+	struct kbase_hwcnt_dump_buffer *dst,
+	void *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map,
+	u64 pm_core_mask,
+	bool accumulate)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	const u32 *dump_src;
+	size_t src_offset, grp, blk, blk_inst;
+	size_t grp_prev = 0;
+	u64 core_mask = pm_core_mask;
+
+	if (!dst || !src || !dst_enable_map ||
+	    (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	metadata = dst->metadata;
+	dump_src = (const u32 *)src;
+	src_offset = 0;
+
+	kbase_hwcnt_metadata_for_each_block(
+		metadata, grp, blk, blk_inst) {
+		const size_t hdr_cnt =
+			kbase_hwcnt_metadata_block_headers_count(
+				metadata, grp, blk);
+		const size_t ctr_cnt =
+			kbase_hwcnt_metadata_block_counters_count(
+				metadata, grp, blk);
+		const u64 blk_type = kbase_hwcnt_metadata_block_type(
+			metadata, grp, blk);
+		const bool is_shader_core = is_block_type_shader(
+			kbase_hwcnt_metadata_group_type(metadata, grp),
+			blk_type, blk);
+
+		if (grp != grp_prev) {
+			/* grp change would only happen with V4. V5 and
+			 * further are envisaged to be single group
+			 * scenario only. Here needs to drop the lower
+			 * group core-mask by shifting right with
+			 * KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP.
+			 */
+			core_mask = pm_core_mask >>
+				KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP;
+			grp_prev = grp;
+		}
+
+		/* Early out if no values in the dest block are enabled */
+		if (kbase_hwcnt_enable_map_block_enabled(
+			dst_enable_map, grp, blk, blk_inst)) {
+			u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+				dst, grp, blk, blk_inst);
+			const u32 *src_blk = dump_src + src_offset;
+
+			if (!is_shader_core || (core_mask & 1)) {
+				if (accumulate) {
+					kbase_hwcnt_dump_buffer_block_accumulate(
+						dst_blk, src_blk, hdr_cnt,
+						ctr_cnt);
+				} else {
+					kbase_hwcnt_dump_buffer_block_copy(
+						dst_blk, src_blk,
+						(hdr_cnt + ctr_cnt));
+				}
+			} else if (!accumulate) {
+				kbase_hwcnt_dump_buffer_block_zero(
+					dst_blk, (hdr_cnt + ctr_cnt));
+			}
+		}
+
+		src_offset += (hdr_cnt + ctr_cnt);
+		if (is_shader_core)
+			core_mask = core_mask >> 1;
+	}
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get);
+
+/**
+ * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
+ *                                                    enable map abstraction to
+ *                                                    a physical block enable
+ *                                                    map.
+ * @lo: Low 64 bits of block enable map abstraction.
+ * @hi: High 64 bits of block enable map abstraction.
+ *
+ * The abstraction uses 128 bits to enable 128 block values, whereas the
+ * physical uses just 32 bits, as bit n enables values [n*4, n*4+3].
+ * Therefore, this conversion is lossy.
+ *
+ * Return: 32-bit physical block enable map.
+ */
+static inline u32 kbasep_hwcnt_backend_gpu_block_map_to_physical(
+	u64 lo,
+	u64 hi)
+{
+	u32 phys = 0;
+	u64 dwords[2] = {lo, hi};
+	size_t dword_idx;
+
+	for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+		const u64 dword = dwords[dword_idx];
+		u16 packed = 0;
+
+		size_t hword_bit;
+
+		for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+			const size_t dword_bit = hword_bit * 4;
+			const u16 mask =
+				((dword >> (dword_bit + 0)) & 0x1) |
+				((dword >> (dword_bit + 1)) & 0x1) |
+				((dword >> (dword_bit + 2)) & 0x1) |
+				((dword >> (dword_bit + 3)) & 0x1);
+			packed |= (mask << hword_bit);
+		}
+		phys |= ((u32)packed) << (16 * dword_idx);
+	}
+	return phys;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical
+ *                                                      block enable map to a
+ *                                                      block enable map
+ *                                                      abstraction.
+ * @phys: Physical 32-bit block enable map
+ * @lo:   Non-NULL pointer to where low 64 bits of block enable map abstraction
+ *        will be stored.
+ * @hi:   Non-NULL pointer to where high 64 bits of block enable map abstraction
+ *        will be stored.
+ */
+static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(
+	u32 phys,
+	u64 *lo,
+	u64 *hi)
+{
+	u64 dwords[2] = {0, 0};
+
+	size_t dword_idx;
+
+	for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+		const u16 packed = phys >> (16 * dword_idx);
+		u64 dword = 0;
+
+		size_t hword_bit;
+
+		for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+			const size_t dword_bit = hword_bit * 4;
+			const u64 mask = (packed >> (hword_bit)) & 0x1;
+
+			dword |= mask << (dword_bit + 0);
+			dword |= mask << (dword_bit + 1);
+			dword |= mask << (dword_bit + 2);
+			dword |= mask << (dword_bit + 3);
+		}
+		dwords[dword_idx] = dword;
+	}
+	*lo = dwords[0];
+	*hi = dwords[1];
+}
+
+void kbase_hwcnt_gpu_enable_map_to_physical(
+	struct kbase_hwcnt_physical_enable_map *dst,
+	const struct kbase_hwcnt_enable_map *src)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+
+	u64 jm_bm = 0;
+	u64 shader_bm = 0;
+	u64 tiler_bm = 0;
+	u64 mmu_l2_bm = 0;
+
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!src) || WARN_ON(!dst))
+		return;
+
+	metadata = src->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(
+		metadata, grp, blk, blk_inst) {
+		const u64 grp_type = kbase_hwcnt_metadata_group_type(
+			metadata, grp);
+		const u64 blk_type = kbase_hwcnt_metadata_block_type(
+			metadata, grp, blk);
+		const size_t blk_val_cnt =
+			kbase_hwcnt_metadata_block_values_count(
+				metadata, grp, blk);
+		const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
+			src, grp, blk, blk_inst);
+
+		switch ((enum kbase_hwcnt_gpu_group_type)grp_type) {
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+			WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK);
+			switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) {
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER:
+				shader_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER:
+				tiler_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2:
+				mmu_l2_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:
+				jm_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED:
+				break;
+			default:
+				WARN_ON(true);
+			}
+			break;
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+			WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
+			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
+				jm_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+				tiler_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+				shader_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+				mmu_l2_bm |= *blk_map;
+				break;
+			default:
+				WARN_ON(true);
+			}
+			break;
+		default:
+			WARN_ON(true);
+		}
+	}
+
+	dst->jm_bm =
+		kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0);
+	dst->shader_bm =
+		kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0);
+	dst->tiler_bm =
+		kbasep_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0);
+	dst->mmu_l2_bm =
+		kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical);
+
+void kbase_hwcnt_gpu_enable_map_from_physical(
+	struct kbase_hwcnt_enable_map *dst,
+	const struct kbase_hwcnt_physical_enable_map *src)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+
+	u64 ignored_hi;
+	u64 jm_bm;
+	u64 shader_bm;
+	u64 tiler_bm;
+	u64 mmu_l2_bm;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!src) || WARN_ON(!dst))
+		return;
+
+	metadata = dst->metadata;
+
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(
+		src->jm_bm, &jm_bm, &ignored_hi);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(
+		src->shader_bm, &shader_bm, &ignored_hi);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(
+		src->tiler_bm, &tiler_bm, &ignored_hi);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(
+		src->mmu_l2_bm, &mmu_l2_bm, &ignored_hi);
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		const u64 grp_type = kbase_hwcnt_metadata_group_type(
+			metadata, grp);
+		const u64 blk_type = kbase_hwcnt_metadata_block_type(
+			metadata, grp, blk);
+		const size_t blk_val_cnt =
+			kbase_hwcnt_metadata_block_values_count(
+				metadata, grp, blk);
+		u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
+			dst, grp, blk, blk_inst);
+
+		switch ((enum kbase_hwcnt_gpu_group_type)grp_type) {
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+			WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK);
+			switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) {
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER:
+				*blk_map = shader_bm;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER:
+				*blk_map = tiler_bm;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2:
+				*blk_map = mmu_l2_bm;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:
+				*blk_map = jm_bm;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED:
+				break;
+			default:
+				WARN_ON(true);
+			}
+			break;
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+			WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
+			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
+				*blk_map = jm_bm;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+				*blk_map = tiler_bm;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+				*blk_map = shader_bm;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+				*blk_map = mmu_l2_bm;
+				break;
+			default:
+				WARN_ON(true);
+			}
+			break;
+		default:
+			WARN_ON(true);
+		}
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical);
+
+void kbase_hwcnt_gpu_patch_dump_headers(
+	struct kbase_hwcnt_dump_buffer *buf,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!buf) || WARN_ON(!enable_map) ||
+	    WARN_ON(buf->metadata != enable_map->metadata))
+		return;
+
+	metadata = buf->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		const u64 grp_type =
+			kbase_hwcnt_metadata_group_type(metadata, grp);
+		u32 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(
+			buf, grp, blk, blk_inst);
+		const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
+			enable_map, grp, blk, blk_inst);
+		const u32 prfcnt_en =
+			kbasep_hwcnt_backend_gpu_block_map_to_physical(
+				blk_map[0], 0);
+
+		switch ((enum kbase_hwcnt_gpu_group_type)grp_type) {
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+			buf_blk[KBASE_HWCNT_V4_PRFCNT_EN_HEADER] = prfcnt_en;
+			break;
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+			buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en;
+			break;
+		default:
+			WARN_ON(true);
+		}
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h
new file mode 100755
index 0000000..12891e0
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h
@@ -0,0 +1,251 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_HWCNT_GPU_H_
+#define _KBASE_HWCNT_GPU_H_
+
+#include <linux/types.h>
+
+struct kbase_device;
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
+ *                                   identify metadata groups.
+ * @KBASE_HWCNT_GPU_GROUP_TYPE_V4: GPU V4 group type.
+ * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
+ */
+enum kbase_hwcnt_gpu_group_type {
+	KBASE_HWCNT_GPU_GROUP_TYPE_V4 = 0x10,
+	KBASE_HWCNT_GPU_GROUP_TYPE_V5,
+};
+
+/**
+ * enum kbase_hwcnt_gpu_v4_block_type - GPU V4 hardware counter block types,
+ *                                      used to identify metadata blocks.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER:   Shader block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER:    Tiler block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2:   MMU/L2 block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:       Job Manager block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: Reserved block.
+ */
+enum kbase_hwcnt_gpu_v4_block_type {
+	KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER = 0x20,
+	KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER,
+	KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2,
+	KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM,
+	KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED,
+};
+
+/**
+ * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
+ *                                      used to identify metadata blocks.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:      Job Manager block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:   Tiler block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:      Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:     Secondary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:  Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block.
+ */
+enum kbase_hwcnt_gpu_v5_block_type {
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
+};
+
+/**
+ * struct kbase_hwcnt_physical_enable_map - Representation of enable map
+ *                                          directly used by GPU.
+ * @jm_bm:     Job Manager counters selection bitmask.
+ * @shader_bm: Shader counters selection bitmask.
+ * @tiler_bm:  Tiler counters selection bitmask.
+ * @mmu_l2_bm: MMU_L2 counters selection bitmask.
+ */
+struct kbase_hwcnt_physical_enable_map {
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+};
+
+/**
+ * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs.
+ * @cg_count: Core group count.
+ * @cgs:      Non-NULL pointer to array of cg_count coherent group structures.
+ *
+ * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups,
+ * where each core group may have a physically different layout.
+ */
+struct kbase_hwcnt_gpu_v4_info {
+	size_t cg_count;
+	const struct mali_base_gpu_coherent_group *cgs;
+};
+
+/**
+ * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs.
+ * @l2_count:   L2 cache count.
+ * @core_mask:  Shader core mask. May be sparse.
+ */
+struct kbase_hwcnt_gpu_v5_info {
+	size_t l2_count;
+	u64 core_mask;
+};
+
+/**
+ * struct kbase_hwcnt_gpu_info - Tagged union with information about the current
+ *                               GPU's hwcnt blocks.
+ * @type: GPU type.
+ * @v4:   Info filled in if a v4 GPU.
+ * @v5:   Info filled in if a v5 GPU.
+ */
+struct kbase_hwcnt_gpu_info {
+	enum kbase_hwcnt_gpu_group_type type;
+	union {
+		struct kbase_hwcnt_gpu_v4_info v4;
+		struct kbase_hwcnt_gpu_v5_info v5;
+	};
+};
+
+/**
+ * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the
+ *                               hwcnt metadata.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @info:  Non-NULL pointer to data structure to be filled in.
+ *
+ * The initialised info struct will only be valid for use while kbdev is valid.
+ */
+int kbase_hwcnt_gpu_info_init(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_gpu_info *info);
+
+/**
+ * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the
+ *                                     current GPU.
+ * @info:           Non-NULL pointer to info struct initialised by
+ *                  kbase_hwcnt_gpu_info_init.
+ * @use_secondary:  True if secondary performance counters should be used, else
+ *                  false. Ignored if secondary counters are not supported.
+ * @out_metadata:   Non-NULL pointer to where created metadata is stored on
+ *                  success.
+ * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump
+ *                  buffer is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_gpu_metadata_create(
+	const struct kbase_hwcnt_gpu_info *info,
+	bool use_secondary,
+	const struct kbase_hwcnt_metadata **out_metadata,
+	size_t *out_dump_bytes);
+
+/**
+ * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata.
+ * @metadata: Pointer to metadata to destroy.
+ */
+void kbase_hwcnt_gpu_metadata_destroy(
+	const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw
+ *                              dump buffer in src into the dump buffer
+ *                              abstraction in dst.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src raw dump buffer, of same length
+ *                  as returned in out_dump_bytes parameter of
+ *                  kbase_hwcnt_gpu_metadata_create.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @pm_core_mask:   PM state synchronized shaders core mask with the dump.
+ * @accumulate:     True if counters in src should be accumulated into dst,
+ *                  rather than copied.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata as
+ * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get
+ * the length of src.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_gpu_dump_get(
+	struct kbase_hwcnt_dump_buffer *dst,
+	void *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map,
+	const u64 pm_core_mask,
+	bool accumulate);
+
+/**
+ * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
+ *                                            into a physical enable map.
+ * @dst: Non-NULL pointer to dst physical enable map.
+ * @src: Non-NULL pointer to src enable map abstraction.
+ *
+ * The src must have been created from a metadata returned from a call to
+ * kbase_hwcnt_gpu_metadata_create.
+ *
+ * This is a lossy conversion, as the enable map abstraction has one bit per
+ * individual counter block value, but the physical enable map uses 1 bit for
+ * every 4 counters, shared over all instances of a block.
+ */
+void kbase_hwcnt_gpu_enable_map_to_physical(
+	struct kbase_hwcnt_physical_enable_map *dst,
+	const struct kbase_hwcnt_enable_map *src);
+
+/**
+ * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
+ *                                              an enable map abstraction.
+ * @dst: Non-NULL pointer to dst enable map abstraction.
+ * @src: Non-NULL pointer to src physical enable map.
+ *
+ * The dst must have been created from a metadata returned from a call to
+ * kbase_hwcnt_gpu_metadata_create.
+ *
+ * This is a lossy conversion, as the physical enable map can technically
+ * support counter blocks with 128 counters each, but no hardware actually uses
+ * more than 64, so the enable map abstraction has nowhere to store the enable
+ * information for the 64 non-existent counters.
+ */
+void kbase_hwcnt_gpu_enable_map_from_physical(
+	struct kbase_hwcnt_enable_map *dst,
+	const struct kbase_hwcnt_physical_enable_map *src);
+
+/**
+ * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
+ *                                        enable headers in a dump buffer to
+ *                                        reflect the specified enable map.
+ * @buf:        Non-NULL pointer to dump buffer to patch.
+ * @enable_map: Non-NULL pointer to enable map.
+ *
+ * The buf and enable_map must have been created from a metadata returned from
+ * a call to kbase_hwcnt_gpu_metadata_create.
+ *
+ * This function should be used before handing off a dump buffer over the
+ * kernel-user boundary, to ensure the header is accurate for the enable map
+ * used by the user.
+ */
+void kbase_hwcnt_gpu_patch_dump_headers(
+	struct kbase_hwcnt_dump_buffer *buf,
+	const struct kbase_hwcnt_enable_map *enable_map);
+
+#endif /* _KBASE_HWCNT_GPU_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c
new file mode 100755
index 0000000..b0e6aee
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_legacy.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_ioctl.h"
+
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+/**
+ * struct kbase_hwcnt_legacy_client - Legacy hardware counter client.
+ * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned.
+ * @enable_map:    Counter enable map.
+ * @dump_buf:      Dump buffer used to manipulate dumps before copied to user.
+ * @hvcli:         Hardware counter virtualizer client.
+ */
+struct kbase_hwcnt_legacy_client {
+	void __user *user_dump_buf;
+	struct kbase_hwcnt_enable_map enable_map;
+	struct kbase_hwcnt_dump_buffer dump_buf;
+	struct kbase_hwcnt_virtualizer_client *hvcli;
+};
+
+int kbase_hwcnt_legacy_client_create(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_ioctl_hwcnt_enable *enable,
+	struct kbase_hwcnt_legacy_client **out_hlcli)
+{
+	int errcode;
+	struct kbase_hwcnt_legacy_client *hlcli;
+	const struct kbase_hwcnt_metadata *metadata;
+	struct kbase_hwcnt_physical_enable_map phys_em;
+
+	if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli)
+		return -EINVAL;
+
+	metadata = kbase_hwcnt_virtualizer_metadata(hvirt);
+
+	hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL);
+	if (!hlcli)
+		return -ENOMEM;
+
+	hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer;
+
+	errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map);
+	if (errcode)
+		goto error;
+
+	/* Translate from the ioctl enable map to the internal one */
+	phys_em.jm_bm = enable->jm_bm;
+	phys_em.shader_bm = enable->shader_bm;
+	phys_em.tiler_bm = enable->tiler_bm;
+	phys_em.mmu_l2_bm = enable->mmu_l2_bm;
+	kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em);
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_virtualizer_client_create(
+		hvirt, &hlcli->enable_map, &hlcli->hvcli);
+	if (errcode)
+		goto error;
+
+	*out_hlcli = hlcli;
+	return 0;
+
+error:
+	kbase_hwcnt_legacy_client_destroy(hlcli);
+	return errcode;
+}
+
+void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli)
+{
+	if (!hlcli)
+		return;
+
+	kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli);
+	kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf);
+	kbase_hwcnt_enable_map_free(&hlcli->enable_map);
+	kfree(hlcli);
+}
+
+int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli)
+{
+	int errcode;
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	if (!hlcli)
+		return -EINVAL;
+
+	/* Dump into the kernel buffer */
+	errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli,
+		&ts_start_ns, &ts_end_ns, &hlcli->dump_buf);
+	if (errcode)
+		return errcode;
+
+	/* Patch the dump buf headers, to hide the counters that other hwcnt
+	 * clients are using.
+	 */
+	kbase_hwcnt_gpu_patch_dump_headers(
+		&hlcli->dump_buf, &hlcli->enable_map);
+
+	/* Zero all non-enabled counters (current values are undefined) */
+	kbase_hwcnt_dump_buffer_zero_non_enabled(
+		&hlcli->dump_buf, &hlcli->enable_map);
+
+	/* Copy into the user's buffer */
+	errcode = copy_to_user(hlcli->user_dump_buf, hlcli->dump_buf.dump_buf,
+		hlcli->dump_buf.metadata->dump_buf_bytes);
+	/* Non-zero errcode implies user buf was invalid or too small */
+	if (errcode)
+		return -EFAULT;
+
+	return 0;
+}
+
+int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli)
+{
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	if (!hlcli)
+		return -EINVAL;
+
+	/* Dump with a NULL buffer to clear this client's counters */
+	return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli,
+		&ts_start_ns, &ts_end_ns, NULL);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h
new file mode 100755
index 0000000..7a610ae
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Legacy hardware counter interface, giving userspace clients simple,
+ * synchronous access to hardware counters.
+ *
+ * Any functions operating on an single legacy hardware counter client instance
+ * must be externally synchronised.
+ * Different clients may safely be used concurrently.
+ */
+
+#ifndef _KBASE_HWCNT_LEGACY_H_
+#define _KBASE_HWCNT_LEGACY_H_
+
+struct kbase_hwcnt_legacy_client;
+struct kbase_ioctl_hwcnt_enable;
+struct kbase_hwcnt_virtualizer;
+
+/**
+ * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client.
+ * @hvirt:     Non-NULL pointer to hardware counter virtualizer the client
+ *             should be attached to.
+ * @enable:    Non-NULL pointer to hwcnt_enable structure, containing a valid
+ *             pointer to a user dump buffer large enough to hold a dump, and
+ *             the counters that should be enabled.
+ * @out_hlcli: Non-NULL pointer to where the pointer to the created client will
+ *             be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_legacy_client_create(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_ioctl_hwcnt_enable *enable,
+	struct kbase_hwcnt_legacy_client **out_hlcli);
+
+/**
+ * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter
+ *                                       client.
+ * @hlcli: Pointer to the legacy hardware counter client.
+ *
+ * Will safely destroy a client in any partial state of construction.
+ */
+void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli);
+
+/**
+ * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the
+ *                                    client's user buffer.
+ * @hlcli: Non-NULL pointer to the legacy hardware counter client.
+ *
+ * This function will synchronously dump hardware counters into the user buffer
+ * specified on client creation, with the counters specified on client creation.
+ *
+ * The counters are automatically cleared after each dump, such that the next
+ * dump performed will return the counter values accumulated between the time of
+ * this function call and the next dump.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli);
+
+/**
+ * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter
+ *                                     dump.
+ * @hlcli: Non-NULL pointer to the legacy hardware counter client.
+ *
+ * This function will synchronously clear the hardware counters, such that the
+ * next dump performed will return the counter values accumulated between the
+ * time of this function call and the next dump.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli);
+
+#endif /* _KBASE_HWCNT_LEGACY_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100755
index 0000000..10706b8
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c
new file mode 100755
index 0000000..1e9efde
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c
@@ -0,0 +1,538 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+
+/* Minimum alignment of each block of hardware counters */
+#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \
+	(KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES)
+
+/**
+ * KBASE_HWCNT_ALIGN_UPWARDS() - Align a value to an alignment.
+ * @value:     The value to align upwards.
+ * @alignment: The alignment.
+ *
+ * Return: A number greater than or equal to value that is aligned to alignment.
+ */
+#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \
+	(value + ((alignment - (value % alignment)) % alignment))
+
+int kbase_hwcnt_metadata_create(
+	const struct kbase_hwcnt_description *desc,
+	const struct kbase_hwcnt_metadata **out_metadata)
+{
+	char *buf;
+	struct kbase_hwcnt_metadata *metadata;
+	struct kbase_hwcnt_group_metadata *grp_mds;
+	size_t grp;
+	size_t enable_map_count; /* Number of u64 bitfields (inc padding) */
+	size_t dump_buf_count; /* Number of u32 values (inc padding) */
+	size_t avail_mask_bits; /* Number of availability mask bits */
+
+	size_t size;
+	size_t offset;
+
+	if (!desc || !out_metadata)
+		return -EINVAL;
+
+	/* Calculate the bytes needed to tightly pack the metadata */
+
+	/* Top level metadata */
+	size = 0;
+	size += sizeof(struct kbase_hwcnt_metadata);
+
+	/* Group metadata */
+	size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
+
+	/* Block metadata */
+	for (grp = 0; grp < desc->grp_cnt; grp++) {
+		size += sizeof(struct kbase_hwcnt_block_metadata) *
+			desc->grps[grp].blk_cnt;
+	}
+
+	/* Single allocation for the entire metadata */
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* Use the allocated memory for the metadata and its members */
+
+	/* Bump allocate the top level metadata */
+	offset = 0;
+	metadata = (struct kbase_hwcnt_metadata *)(buf + offset);
+	offset += sizeof(struct kbase_hwcnt_metadata);
+
+	/* Bump allocate the group metadata */
+	grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset);
+	offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
+
+	enable_map_count = 0;
+	dump_buf_count = 0;
+	avail_mask_bits = 0;
+
+	for (grp = 0; grp < desc->grp_cnt; grp++) {
+		size_t blk;
+
+		const struct kbase_hwcnt_group_description *grp_desc =
+			desc->grps + grp;
+		struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp;
+
+		size_t group_enable_map_count = 0;
+		size_t group_dump_buffer_count = 0;
+		size_t group_avail_mask_bits = 0;
+
+		/* Bump allocate this group's block metadata */
+		struct kbase_hwcnt_block_metadata *blk_mds =
+			(struct kbase_hwcnt_block_metadata *)(buf + offset);
+		offset += sizeof(struct kbase_hwcnt_block_metadata) *
+			grp_desc->blk_cnt;
+
+		/* Fill in each block in the group's information */
+		for (blk = 0; blk < grp_desc->blk_cnt; blk++) {
+			const struct kbase_hwcnt_block_description *blk_desc =
+				grp_desc->blks + blk;
+			struct kbase_hwcnt_block_metadata *blk_md =
+				blk_mds + blk;
+			const size_t n_values =
+				blk_desc->hdr_cnt + blk_desc->ctr_cnt;
+
+			blk_md->type = blk_desc->type;
+			blk_md->inst_cnt = blk_desc->inst_cnt;
+			blk_md->hdr_cnt = blk_desc->hdr_cnt;
+			blk_md->ctr_cnt = blk_desc->ctr_cnt;
+			blk_md->enable_map_index = group_enable_map_count;
+			blk_md->enable_map_stride =
+				kbase_hwcnt_bitfield_count(n_values);
+			blk_md->dump_buf_index = group_dump_buffer_count;
+			blk_md->dump_buf_stride =
+				KBASE_HWCNT_ALIGN_UPWARDS(
+					n_values,
+					(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+					 KBASE_HWCNT_VALUE_BYTES));
+			blk_md->avail_mask_index = group_avail_mask_bits;
+
+			group_enable_map_count +=
+				blk_md->enable_map_stride * blk_md->inst_cnt;
+			group_dump_buffer_count +=
+				blk_md->dump_buf_stride * blk_md->inst_cnt;
+			group_avail_mask_bits += blk_md->inst_cnt;
+		}
+
+		/* Fill in the group's information */
+		grp_md->type = grp_desc->type;
+		grp_md->blk_cnt = grp_desc->blk_cnt;
+		grp_md->blk_metadata = blk_mds;
+		grp_md->enable_map_index = enable_map_count;
+		grp_md->dump_buf_index = dump_buf_count;
+		grp_md->avail_mask_index = avail_mask_bits;
+
+		enable_map_count += group_enable_map_count;
+		dump_buf_count += group_dump_buffer_count;
+		avail_mask_bits += group_avail_mask_bits;
+	}
+
+	/* Fill in the top level metadata's information */
+	metadata->grp_cnt = desc->grp_cnt;
+	metadata->grp_metadata = grp_mds;
+	metadata->enable_map_bytes =
+		enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
+	metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
+	metadata->avail_mask = desc->avail_mask;
+
+	WARN_ON(size != offset);
+	/* Due to the block alignment, there should be exactly one enable map
+	 * bit per 4 bytes in the dump buffer.
+	 */
+	WARN_ON(metadata->dump_buf_bytes !=
+		(metadata->enable_map_bytes *
+		 BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
+
+	*out_metadata = metadata;
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create);
+
+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
+{
+	kfree(metadata);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy);
+
+int kbase_hwcnt_enable_map_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	struct kbase_hwcnt_enable_map *enable_map)
+{
+	u64 *enable_map_buf;
+
+	if (!metadata || !enable_map)
+		return -EINVAL;
+
+	enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
+	if (!enable_map_buf)
+		return -ENOMEM;
+
+	enable_map->metadata = metadata;
+	enable_map->enable_map = enable_map_buf;
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc);
+
+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
+{
+	if (!enable_map)
+		return;
+
+	kfree(enable_map->enable_map);
+	enable_map->enable_map = NULL;
+	enable_map->metadata = NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free);
+
+int kbase_hwcnt_dump_buffer_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	u32 *buf;
+
+	if (!metadata || !dump_buf)
+		return -EINVAL;
+
+	buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	dump_buf->metadata = metadata;
+	dump_buf->dump_buf = buf;
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc);
+
+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	if (!dump_buf)
+		return;
+
+	kfree(dump_buf->dump_buf);
+	memset(dump_buf, 0, sizeof(*dump_buf));
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free);
+
+int kbase_hwcnt_dump_buffer_array_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	size_t n,
+	struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+{
+	struct kbase_hwcnt_dump_buffer *buffers;
+	size_t buf_idx;
+	unsigned int order;
+	unsigned long addr;
+
+	if (!metadata || !dump_bufs)
+		return -EINVAL;
+
+	/* Allocate memory for the dump buffer struct array */
+	buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers)
+		return -ENOMEM;
+
+	/* Allocate pages for the actual dump buffers, as they tend to be fairly
+	 * large.
+	 */
+	order = get_order(metadata->dump_buf_bytes * n);
+	addr = __get_free_pages(GFP_KERNEL, order);
+
+	if (!addr) {
+		kfree(buffers);
+		return -ENOMEM;
+	}
+
+	dump_bufs->page_addr = addr;
+	dump_bufs->page_order = order;
+	dump_bufs->buf_cnt = n;
+	dump_bufs->bufs = buffers;
+
+	/* Set the buffer of each dump buf */
+	for (buf_idx = 0; buf_idx < n; buf_idx++) {
+		const size_t offset = metadata->dump_buf_bytes * buf_idx;
+
+		buffers[buf_idx].metadata = metadata;
+		buffers[buf_idx].dump_buf = (u32 *)(addr + offset);
+	}
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc);
+
+void kbase_hwcnt_dump_buffer_array_free(
+	struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+{
+	if (!dump_bufs)
+		return;
+
+	kfree(dump_bufs->bufs);
+	free_pages(dump_bufs->page_addr, dump_bufs->page_order);
+	memset(dump_bufs, 0, sizeof(*dump_bufs));
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free);
+
+void kbase_hwcnt_dump_buffer_zero(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk;
+		size_t val_cnt;
+
+		if (!kbase_hwcnt_enable_map_block_enabled(
+			dst_enable_map, grp, blk, blk_inst))
+			continue;
+
+		dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		val_cnt = kbase_hwcnt_metadata_block_values_count(
+			metadata, grp, blk);
+
+		kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero);
+
+void kbase_hwcnt_dump_buffer_zero_strict(
+	struct kbase_hwcnt_dump_buffer *dst)
+{
+	if (WARN_ON(!dst))
+		return;
+
+	memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict);
+
+void kbase_hwcnt_dump_buffer_zero_non_enabled(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+			dst_enable_map, grp, blk, blk_inst);
+		size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+			metadata, grp, blk);
+
+		/* Align upwards to include padding bytes */
+		val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
+			(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+			 KBASE_HWCNT_VALUE_BYTES));
+
+		if (kbase_hwcnt_metadata_block_instance_avail(
+			metadata, grp, blk, blk_inst)) {
+			/* Block available, so only zero non-enabled values */
+			kbase_hwcnt_dump_buffer_block_zero_non_enabled(
+				dst_blk, blk_em, val_cnt);
+		} else {
+			/* Block not available, so zero the entire thing */
+			kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+		}
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled);
+
+void kbase_hwcnt_dump_buffer_copy(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!src) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk;
+		const u32 *src_blk;
+		size_t val_cnt;
+
+		if (!kbase_hwcnt_enable_map_block_enabled(
+			dst_enable_map, grp, blk, blk_inst))
+			continue;
+
+		dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		src_blk = kbase_hwcnt_dump_buffer_block_instance(
+			src, grp, blk, blk_inst);
+		val_cnt = kbase_hwcnt_metadata_block_values_count(
+			metadata, grp, blk);
+
+		kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy);
+
+void kbase_hwcnt_dump_buffer_copy_strict(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!src) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+			src, grp, blk, blk_inst);
+		const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+			dst_enable_map, grp, blk, blk_inst);
+		size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+			metadata, grp, blk);
+		/* Align upwards to include padding bytes */
+		val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
+			(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+			 KBASE_HWCNT_VALUE_BYTES));
+
+		kbase_hwcnt_dump_buffer_block_copy_strict(
+			dst_blk, src_blk, blk_em, val_cnt);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict);
+
+void kbase_hwcnt_dump_buffer_accumulate(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!src) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk;
+		const u32 *src_blk;
+		size_t hdr_cnt;
+		size_t ctr_cnt;
+
+		if (!kbase_hwcnt_enable_map_block_enabled(
+			dst_enable_map, grp, blk, blk_inst))
+			continue;
+
+		dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		src_blk = kbase_hwcnt_dump_buffer_block_instance(
+			src, grp, blk, blk_inst);
+		hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
+			metadata, grp, blk);
+		ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
+			metadata, grp, blk);
+
+		kbase_hwcnt_dump_buffer_block_accumulate(
+			dst_blk, src_blk, hdr_cnt, ctr_cnt);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate);
+
+void kbase_hwcnt_dump_buffer_accumulate_strict(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!src) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+			src, grp, blk, blk_inst);
+		const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+			dst_enable_map, grp, blk, blk_inst);
+		size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
+			metadata, grp, blk);
+		size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
+			metadata, grp, blk);
+		/* Align upwards to include padding bytes */
+		ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt,
+			(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+			 KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
+
+		kbase_hwcnt_dump_buffer_block_accumulate_strict(
+			dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h
new file mode 100755
index 0000000..4d78c84
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h
@@ -0,0 +1,1087 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter types.
+ * Contains structures for describing the physical layout of hardware counter
+ * dump buffers and enable maps within a system.
+ *
+ * Also contains helper functions for manipulation of these dump buffers and
+ * enable maps.
+ *
+ * Through use of these structures and functions, hardware counters can be
+ * enabled, copied, accumulated, and generally manipulated in a generic way,
+ * regardless of the physical counter dump layout.
+ *
+ * Terminology:
+ *
+ * Hardware Counter System:
+ *   A collection of hardware counter groups, making a full hardware counter
+ *   system.
+ * Hardware Counter Group:
+ *   A group of Hardware Counter Blocks (e.g. a t62x might have more than one
+ *   core group, so has one counter group per core group, where each group
+ *   may have a different number and layout of counter blocks).
+ * Hardware Counter Block:
+ *   A block of hardware counters (e.g. shader block, tiler block).
+ * Hardware Counter Block Instance:
+ *   An instance of a Hardware Counter Block (e.g. an MP4 GPU might have
+ *   4 shader block instances).
+ *
+ * Block Header:
+ *   A header value inside a counter block. Headers don't count anything,
+ *   so it is only valid to copy or zero them. Headers are always the first
+ *   values in the block.
+ * Block Counter:
+ *   A counter value inside a counter block. Counters can be zeroed, copied,
+ *   or accumulated. Counters are always immediately after the headers in the
+ *   block.
+ * Block Value:
+ *   A catch-all term for block headers and block counters.
+ *
+ * Enable Map:
+ *   An array of u64 bitfields, where each bit either enables exactly one
+ *   block value, or is unused (padding).
+ * Dump Buffer:
+ *   An array of u32 values, where each u32 corresponds either to one block
+ *   value, or is unused (padding).
+ * Availability Mask:
+ *   A bitfield, where each bit corresponds to whether a block instance is
+ *   physically available (e.g. an MP3 GPU may have a sparse core mask of
+ *   0b1011, meaning it only has 3 cores but for hardware counter dumps has the
+ *   same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this
+ *   case, the availability mask might be 0b1011111 (the exact layout will
+ *   depend on the specific hardware architecture), with the 3 extra early bits
+ *   corresponding to other block instances in the hardware counter system).
+ * Metadata:
+ *   Structure describing the physical layout of the enable map and dump buffers
+ *   for a specific hardware counter system.
+ *
+ */
+
+#ifndef _KBASE_HWCNT_TYPES_H_
+#define _KBASE_HWCNT_TYPES_H_
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include "mali_malisw.h"
+
+/* Number of bytes in each bitfield */
+#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64))
+
+/* Number of bits in each bitfield */
+#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE)
+
+/* Number of bytes for each counter value */
+#define KBASE_HWCNT_VALUE_BYTES (sizeof(u32))
+
+/* Number of bits in an availability mask (i.e. max total number of block
+ * instances supported in a Hardware Counter System)
+ */
+#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE)
+
+/**
+ * struct kbase_hwcnt_block_description - Description of one or more identical,
+ *                                        contiguous, Hardware Counter Blocks.
+ * @type:     The arbitrary identifier used to identify the type of the block.
+ * @inst_cnt: The number of Instances of the block.
+ * @hdr_cnt:  The number of 32-bit Block Headers in the block.
+ * @ctr_cnt:  The number of 32-bit Block Counters in the block.
+ */
+struct kbase_hwcnt_block_description {
+	u64 type;
+	size_t inst_cnt;
+	size_t hdr_cnt;
+	size_t ctr_cnt;
+};
+
+/**
+ * struct kbase_hwcnt_group_description - Description of one or more identical,
+ *                                        contiguous Hardware Counter Groups.
+ * @type:    The arbitrary identifier used to identify the type of the group.
+ * @blk_cnt: The number of types of Hardware Counter Block in the group.
+ * @blks:    Non-NULL pointer to an array of blk_cnt block descriptions,
+ *           describing each type of Hardware Counter Block in the group.
+ */
+struct kbase_hwcnt_group_description {
+	u64 type;
+	size_t blk_cnt;
+	const struct kbase_hwcnt_block_description *blks;
+};
+
+/**
+ * struct kbase_hwcnt_description - Description of a Hardware Counter System.
+ * @grp_cnt:    The number of Hardware Counter Groups.
+ * @grps:       Non-NULL pointer to an array of grp_cnt group descriptions,
+ *              describing each Hardware Counter Group in the system.
+ * @avail_mask: Flat Availability Mask for all block instances in the system.
+ */
+struct kbase_hwcnt_description {
+	size_t grp_cnt;
+	const struct kbase_hwcnt_group_description *grps;
+	u64 avail_mask;
+};
+
+/**
+ * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout
+ *                                     of a block in a Hardware Counter System's
+ *                                     Dump Buffers and Enable Maps.
+ * @type:              The arbitrary identifier used to identify the type of the
+ *                     block.
+ * @inst_cnt:          The number of Instances of the block.
+ * @hdr_cnt:           The number of 32-bit Block Headers in the block.
+ * @ctr_cnt:           The number of 32-bit Block Counters in the block.
+ * @enable_map_index:  Index in u64s into the parent's Enable Map where the
+ *                     Enable Map bitfields of the Block Instances described by
+ *                     this metadata start.
+ * @enable_map_stride: Stride in u64s between the Enable Maps of each of the
+ *                     Block Instances described by this metadata.
+ * @dump_buf_index:    Index in u32s into the parent's Dump Buffer where the
+ *                     Dump Buffers of the Block Instances described by this
+ *                     metadata start.
+ * @dump_buf_stride:   Stride in u32s between the Dump Buffers of each of the
+ *                     Block Instances described by this metadata.
+ * @avail_mask_index:  Index in bits into the parent's Availability Mask where
+ *                     the Availability Masks of the Block Instances described
+ *                     by this metadata start.
+ */
+struct kbase_hwcnt_block_metadata {
+	u64 type;
+	size_t inst_cnt;
+	size_t hdr_cnt;
+	size_t ctr_cnt;
+	size_t enable_map_index;
+	size_t enable_map_stride;
+	size_t dump_buf_index;
+	size_t dump_buf_stride;
+	size_t avail_mask_index;
+};
+
+/**
+ * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout
+ *                                     of a group of blocks in a Hardware
+ *                                     Counter System's Dump Buffers and Enable
+ *                                     Maps.
+ * @type:             The arbitrary identifier used to identify the type of the
+ *                    group.
+ * @blk_cnt:          The number of types of Hardware Counter Block in the
+ *                    group.
+ * @blk_metadata:     Non-NULL pointer to an array of blk_cnt block metadata,
+ *                    describing the physical layout of each type of Hardware
+ *                    Counter Block in the group.
+ * @enable_map_index: Index in u64s into the parent's Enable Map where the
+ *                    Enable Maps of the blocks within the group described by
+ *                    this metadata start.
+ * @dump_buf_index:   Index in u32s into the parent's Dump Buffer where the
+ *                    Dump Buffers of the blocks within the group described by
+ *                    metadata start.
+ * @avail_mask_index: Index in bits into the parent's Availability Mask where
+ *                    the Availability Masks of the blocks within the group
+ *                    described by this metadata start.
+ */
+struct kbase_hwcnt_group_metadata {
+	u64 type;
+	size_t blk_cnt;
+	const struct kbase_hwcnt_block_metadata *blk_metadata;
+	size_t enable_map_index;
+	size_t dump_buf_index;
+	size_t avail_mask_index;
+};
+
+/**
+ * struct kbase_hwcnt_metadata - Metadata describing the physical layout
+ *                               of Dump Buffers and Enable Maps within a
+ *                               Hardware Counter System.
+ * @grp_cnt:          The number of Hardware Counter Groups.
+ * @grp_metadata:     Non-NULL pointer to an array of grp_cnt group metadata,
+ *                    describing the physical layout of each Hardware Counter
+ *                    Group in the system.
+ * @enable_map_bytes: The size in bytes of an Enable Map needed for the system.
+ * @dump_buf_bytes:   The size in bytes of a Dump Buffer needed for the system.
+ * @avail_mask:       The Availability Mask for the system.
+ */
+struct kbase_hwcnt_metadata {
+	size_t grp_cnt;
+	const struct kbase_hwcnt_group_metadata *grp_metadata;
+	size_t enable_map_bytes;
+	size_t dump_buf_bytes;
+	u64 avail_mask;
+};
+
+/**
+ * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64
+ *                                 bitfields.
+ * @metadata:   Non-NULL pointer to metadata used to identify, and to describe
+ *              the layout of the enable map.
+ * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array
+ *              of u64 bitfields, each bit of which enables one hardware
+ *              counter.
+ */
+struct kbase_hwcnt_enable_map {
+	const struct kbase_hwcnt_metadata *metadata;
+	u64 *enable_map;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. Array of u32
+ *                                  values.
+ * @metadata: Non-NULL pointer to metadata used to identify, and to describe
+ *            the layout of the Dump Buffer.
+ * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array
+ *            of u32 values.
+ */
+struct kbase_hwcnt_dump_buffer {
+	const struct kbase_hwcnt_metadata *metadata;
+	u32 *dump_buf;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array.
+ * @page_addr:  Address of allocated pages. A single allocation is used for all
+ *              Dump Buffers in the array.
+ * @page_order: The allocation order of the pages.
+ * @buf_cnt:    The number of allocated Dump Buffers.
+ * @bufs:       Non-NULL pointer to the array of Dump Buffers.
+ */
+struct kbase_hwcnt_dump_buffer_array {
+	unsigned long page_addr;
+	unsigned int page_order;
+	size_t buf_cnt;
+	struct kbase_hwcnt_dump_buffer *bufs;
+};
+
+/**
+ * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object
+ *                                 from a description.
+ * @desc:     Non-NULL pointer to a hardware counter description.
+ * @metadata: Non-NULL pointer to where created metadata will be stored on
+ *            success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_metadata_create(
+	const struct kbase_hwcnt_description *desc,
+	const struct kbase_hwcnt_metadata **metadata);
+
+/**
+ * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object.
+ * @metadata: Pointer to hardware counter metadata
+ */
+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_metadata_group_count() - Get the number of groups.
+ * @metadata: Non-NULL pointer to metadata.
+ *
+ * Return: Number of hardware counter groups described by metadata.
+ */
+#define kbase_hwcnt_metadata_group_count(metadata) \
+	((metadata)->grp_cnt)
+
+/**
+ * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ *
+ * Return: Type of the group grp.
+ */
+#define kbase_hwcnt_metadata_group_type(metadata, grp) \
+	((metadata)->grp_metadata[(grp)].type)
+
+/**
+ * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ *
+ * Return: Number of blocks in group grp.
+ */
+#define kbase_hwcnt_metadata_block_count(metadata, grp) \
+	((metadata)->grp_metadata[(grp)].blk_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Type of the block blk in group grp.
+ */
+#define kbase_hwcnt_metadata_block_type(metadata, grp, blk) \
+	((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].type)
+
+/**
+ * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of
+ *                                               a block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of instances of block blk in group grp.
+ */
+#define kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk) \
+	((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].inst_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter
+ *                                              headers.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of u32 counter headers in each instance of block blk in
+ *         group grp.
+ */
+#define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \
+	((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of u32 counters in each instance of block blk in group
+ *         grp.
+ */
+#define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \
+	((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_values_count() - Get the number of values.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of u32 headers plus counters in each instance of block blk
+ *         in group grp.
+ */
+#define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \
+	(kbase_hwcnt_metadata_block_counters_count((metadata), (grp), (blk)) \
+	+ kbase_hwcnt_metadata_block_headers_count((metadata), (grp), (blk)))
+
+/**
+ * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in
+ *                                         the metadata.
+ * @md:       Non-NULL pointer to metadata.
+ * @grp:      size_t variable used as group iterator.
+ * @blk:      size_t variable used as block iterator.
+ * @blk_inst: size_t variable used as block instance iterator.
+ *
+ * Iteration order is group, then block, then block instance (i.e. linearly
+ * through memory).
+ */
+#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \
+	for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \
+		for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \
+			for ((blk_inst) = 0; (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); (blk_inst)++)
+
+/**
+ * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail
+ *                                          mask corresponding to the block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: The bit index into the avail mask for the block.
+ */
+static inline size_t kbase_hwcnt_metadata_block_avail_bit(
+	const struct kbase_hwcnt_metadata *metadata,
+	size_t grp,
+	size_t blk)
+{
+	const size_t bit =
+		metadata->grp_metadata[grp].avail_mask_index +
+		metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index;
+
+	return bit;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is
+ *                                               available.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: true if the block instance is available, else false.
+ */
+static inline bool kbase_hwcnt_metadata_block_instance_avail(
+	const struct kbase_hwcnt_metadata *metadata,
+	size_t grp,
+	size_t blk,
+	size_t blk_inst)
+{
+	const size_t bit = kbase_hwcnt_metadata_block_avail_bit(
+		metadata, grp, blk) + blk_inst;
+	const u64 mask = 1ull << bit;
+
+	return (metadata->avail_mask & mask) != 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_alloc() - Allocate an enable map.
+ * @metadata:   Non-NULL pointer to metadata describing the system.
+ * @enable_map: Non-NULL pointer to enable map to be initialised. Will be
+ *              initialised to all zeroes (i.e. all counters disabled).
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_enable_map_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * kbase_hwcnt_enable_map_free() - Free an enable map.
+ * @enable_map: Enable map to be freed.
+ *
+ * Can be safely called on an all-zeroed enable map structure, or on an already
+ * freed enable map.
+ */
+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block
+ *                                           instance's enable map.
+ * @map:      Non-NULL pointer to (const) enable map.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: (const) u64* to the bitfield(s) used as the enable map for the
+ *         block instance.
+ */
+#define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \
+	((map)->enable_map + \
+	 (map)->metadata->grp_metadata[(grp)].enable_map_index + \
+	 (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \
+	 (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst))
+
+/**
+ * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required
+ *                                to have at minimum one bit per value.
+ * @val_cnt: Number of values.
+ *
+ * Return: Number of required bitfields.
+ */
+static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt)
+{
+	return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) /
+		KBASE_HWCNT_BITFIELD_BITS;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block.
+ * @dst:      Non-NULL pointer to enable map.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ */
+static inline void kbase_hwcnt_enable_map_block_disable_all(
+	struct kbase_hwcnt_enable_map *dst,
+	size_t grp,
+	size_t blk,
+	size_t blk_inst)
+{
+	const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+		dst->metadata, grp, blk);
+	const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+	u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
+		dst, grp, blk, blk_inst);
+
+	memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES);
+}
+
+/**
+ * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map.
+ * @dst: Non-NULL pointer to enable map to zero.
+ */
+static inline void kbase_hwcnt_enable_map_disable_all(
+	struct kbase_hwcnt_enable_map *dst)
+{
+	memset(dst->enable_map, 0, dst->metadata->enable_map_bytes);
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block.
+ * @dst:      Non-NULL pointer to enable map.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ */
+static inline void kbase_hwcnt_enable_map_block_enable_all(
+	struct kbase_hwcnt_enable_map *dst,
+	size_t grp,
+	size_t blk,
+	size_t blk_inst)
+{
+	const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+		dst->metadata, grp, blk);
+	const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+	u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
+		dst, grp, blk, blk_inst);
+
+	size_t bitfld_idx;
+
+	for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
+		const u64 remaining_values = val_cnt -
+			(bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+		u64 block_enable_map_mask = U64_MAX;
+
+		if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
+			block_enable_map_mask = (1ull << remaining_values) - 1;
+
+		block_enable_map[bitfld_idx] = block_enable_map_mask;
+	}
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in an enable
+ *                                             map.
+ * @dst: Non-NULL pointer to enable map.
+ */
+static inline void kbase_hwcnt_enable_map_enable_all(
+	struct kbase_hwcnt_enable_map *dst)
+{
+	size_t grp, blk, blk_inst;
+
+	kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst)
+		kbase_hwcnt_enable_map_block_enable_all(
+			dst, grp, blk, blk_inst);
+}
+
+/**
+ * kbase_hwcnt_enable_map_copy() - Copy an enable map to another.
+ * @dst: Non-NULL pointer to destination enable map.
+ * @src: Non-NULL pointer to source enable map.
+ *
+ * The dst and src MUST have been created from the same metadata.
+ */
+static inline void kbase_hwcnt_enable_map_copy(
+	struct kbase_hwcnt_enable_map *dst,
+	const struct kbase_hwcnt_enable_map *src)
+{
+	memcpy(dst->enable_map,
+	       src->enable_map,
+	       dst->metadata->enable_map_bytes);
+}
+
+/**
+ * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst.
+ * @dst: Non-NULL pointer to destination enable map.
+ * @src: Non-NULL pointer to source enable map.
+ *
+ * The dst and src MUST have been created from the same metadata.
+ */
+static inline void kbase_hwcnt_enable_map_union(
+	struct kbase_hwcnt_enable_map *dst,
+	const struct kbase_hwcnt_enable_map *src)
+{
+	const size_t bitfld_count =
+		dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES;
+	size_t i;
+
+	for (i = 0; i < bitfld_count; i++)
+		dst->enable_map[i] |= src->enable_map[i];
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block
+ *                                          instance are enabled.
+ * @enable_map: Non-NULL pointer to enable map.
+ * @grp:        Index of the group in the metadata.
+ * @blk:        Index of the block in the group.
+ * @blk_inst:   Index of the block instance in the block.
+ *
+ * Return: true if any values in the block are enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_block_enabled(
+	const struct kbase_hwcnt_enable_map *enable_map,
+	size_t grp,
+	size_t blk,
+	size_t blk_inst)
+{
+	bool any_enabled = false;
+	const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+		enable_map->metadata, grp, blk);
+	const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+	const u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
+		enable_map, grp, blk, blk_inst);
+
+	size_t bitfld_idx;
+
+	for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
+		const u64 remaining_values = val_cnt -
+			(bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+		u64 block_enable_map_mask = U64_MAX;
+
+		if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
+			block_enable_map_mask = (1ull << remaining_values) - 1;
+
+		any_enabled = any_enabled ||
+			(block_enable_map[bitfld_idx] & block_enable_map_mask);
+	}
+
+	return any_enabled;
+}
+
+/**
+ * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled.
+ * @enable_map: Non-NULL pointer to enable map.
+ *
+ * Return: true if any values are enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_any_enabled(
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	size_t grp, blk, blk_inst;
+
+	kbase_hwcnt_metadata_for_each_block(
+		enable_map->metadata, grp, blk, blk_inst) {
+		if (kbase_hwcnt_enable_map_block_enabled(
+			enable_map, grp, blk, blk_inst))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block
+ *                                                instance is enabled.
+ * @bitfld:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to check in the block instance.
+ *
+ * Return: true if the value was enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_block_value_enabled(
+	const u64 *bitfld,
+	size_t val_idx)
+{
+	const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+	const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+	const u64 mask = 1ull << bit;
+
+	return (bitfld[idx] & mask) != 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block
+ *                                               instance.
+ * @bitfld:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to enable in the block instance.
+ */
+static inline void kbase_hwcnt_enable_map_block_enable_value(
+	u64 *bitfld,
+	size_t val_idx)
+{
+	const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+	const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+	const u64 mask = 1ull << bit;
+
+	bitfld[idx] |= mask;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block
+ *                                                instance.
+ * @bitfld:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to disable in the block instance.
+ */
+static inline void kbase_hwcnt_enable_map_block_disable_value(
+	u64 *bitfld,
+	size_t val_idx)
+{
+	const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+	const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+	const u64 mask = 1ull << bit;
+
+	bitfld[idx] &= ~mask;
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer.
+ * @metadata: Non-NULL pointer to metadata describing the system.
+ * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be
+ *            initialised to undefined values, so must be used as a copy dest,
+ *            or cleared before use.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_free() - Free a dump buffer.
+ * @dump_buf: Dump buffer to be freed.
+ *
+ * Can be safely called on an all-zeroed dump buffer structure, or on an already
+ * freed dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers.
+ * @metadata:  Non-NULL pointer to metadata describing the system.
+ * @n:         Number of dump buffers to allocate
+ * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. Each
+ *             dump buffer in the array will be initialised to undefined values,
+ *             so must be used as a copy dest, or cleared before use.
+ *
+ * A single contiguous page allocation will be used for all of the buffers
+ * inside the array, where:
+ * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_array_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	size_t n,
+	struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array.
+ * @dump_bufs: Dump buffer array to be freed.
+ *
+ * Can be safely called on an all-zeroed dump buffer array structure, or on an
+ * already freed dump buffer array.
+ */
+void kbase_hwcnt_dump_buffer_array_free(
+	struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block
+ *                                            instance's dump buffer.
+ * @buf:      Non-NULL pointer to (const) dump buffer.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: (const) u32* to the dump buffer for the block instance.
+ */
+#define kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst) \
+	((buf)->dump_buf + \
+	 (buf)->metadata->grp_metadata[(grp)].dump_buf_index + \
+	 (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_index + \
+	 (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_stride * (blk_inst))
+
+/**
+ * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst.
+ *                                  After the operation, all non-enabled values
+ *                                  will be undefined.
+ * @dst:            Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_zero(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_zero(
+	u32 *dst_blk,
+	size_t val_cnt)
+{
+	memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst.
+ *                                         After the operation, all values
+ *                                         (including padding bytes) will be
+ *                                         zero.
+ *                                         Slower than the non-strict variant.
+ * @dst: Non-NULL pointer to dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_zero_strict(
+	struct kbase_hwcnt_dump_buffer *dst);
+
+/**
+ * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in
+ *                                              dst (including padding bytes and
+ *                                              unavailable blocks).
+ *                                              After the operation, all enabled
+ *                                              values will be unchanged.
+ * @dst:            Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_zero_non_enabled(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled
+ *                                                    values in a block.
+ *                                                    After the operation, all
+ *                                                    enabled values will be
+ *                                                    unchanged.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(
+	u32 *dst_blk,
+	const u64 *blk_em,
+	size_t val_cnt)
+{
+	size_t val;
+
+	for (val = 0; val < val_cnt; val++) {
+		if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val))
+			dst_blk[val] = 0;
+	}
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst.
+ *                                  After the operation, all non-enabled values
+ *                                  will be undefined.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_copy(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_copy(
+	u32 *dst_blk,
+	const u32 *src_blk,
+	size_t val_cnt)
+{
+	/* Copy all the counters in the block instance.
+	 * Values of non-enabled counters are undefined.
+	 */
+	memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to
+ *                                         dst.
+ *                                         After the operation, all non-enabled
+ *                                         values (including padding bytes) will
+ *                                         be zero.
+ *                                         Slower than the non-strict variant.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_copy_strict(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values
+ *                                               from src to dst.
+ *                                               After the operation, all
+ *                                               non-enabled values will be
+ *                                               zero.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ *
+ * After the copy, any disabled values in dst will be zero.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_copy_strict(
+	u32 *dst_blk,
+	const u32 *src_blk,
+	const u64 *blk_em,
+	size_t val_cnt)
+{
+	size_t val;
+
+	for (val = 0; val < val_cnt; val++) {
+		bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(
+			blk_em, val);
+
+		dst_blk[val] = val_enabled ? src_blk[val] : 0;
+	}
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and
+ *                                        accumulate all enabled counters from
+ *                                        src to dst.
+ *                                        After the operation, all non-enabled
+ *                                        values will be undefined.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_accumulate(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and
+ *                                              accumulate all block counters
+ *                                              from src to dst.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @hdr_cnt: Number of headers in the block.
+ * @ctr_cnt: Number of counters in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_accumulate(
+	u32 *dst_blk,
+	const u32 *src_blk,
+	size_t hdr_cnt,
+	size_t ctr_cnt)
+{
+	size_t ctr;
+	/* Copy all the headers in the block instance.
+	 * Values of non-enabled headers are undefined.
+	 */
+	memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES);
+
+	/* Accumulate all the counters in the block instance.
+	 * Values of non-enabled counters are undefined.
+	 */
+	for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) {
+		u32 *dst_ctr = dst_blk + ctr;
+		const u32 *src_ctr = src_blk + ctr;
+
+		const u32 src_counter = *src_ctr;
+		const u32 dst_counter = *dst_ctr;
+
+		/* Saturating add */
+		u32 accumulated = src_counter + dst_counter;
+
+		if (accumulated < src_counter)
+			accumulated = U32_MAX;
+
+		*dst_ctr = accumulated;
+	}
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and
+ *                                               accumulate all enabled counters
+ *                                               from src to dst.
+ *                                               After the operation, all
+ *                                               non-enabled values (including
+ *                                               padding bytes) will be zero.
+ *                                               Slower than the non-strict
+ *                                               variant.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_accumulate_strict(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block
+ *                                                     headers and accumulate
+ *                                                     all block counters from
+ *                                                     src to dst.
+ *                                                     After the operation, all
+ *                                                     non-enabled values will
+ *                                                     be zero.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @hdr_cnt: Number of headers in the block.
+ * @ctr_cnt: Number of counters in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
+	u32 *dst_blk,
+	const u32 *src_blk,
+	const u64 *blk_em,
+	size_t hdr_cnt,
+	size_t ctr_cnt)
+{
+	size_t ctr;
+
+	kbase_hwcnt_dump_buffer_block_copy_strict(
+		dst_blk, src_blk, blk_em, hdr_cnt);
+
+	for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) {
+		bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(
+			blk_em, ctr);
+
+		u32 *dst_ctr = dst_blk + ctr;
+		const u32 *src_ctr = src_blk + ctr;
+
+		const u32 src_counter = *src_ctr;
+		const u32 dst_counter = *dst_ctr;
+
+		/* Saturating add */
+		u32 accumulated = src_counter + dst_counter;
+
+		if (accumulated < src_counter)
+			accumulated = U32_MAX;
+
+		*dst_ctr = ctr_enabled ? accumulated : 0;
+	}
+}
+
+#endif /* _KBASE_HWCNT_TYPES_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c
new file mode 100755
index 0000000..917e47c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c
@@ -0,0 +1,790 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_accumulator.h"
+#include "mali_kbase_hwcnt_context.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_malisw.h"
+#include "mali_kbase_debug.h"
+#include "mali_kbase_linux.h"
+
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+/**
+ * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure.
+ * @hctx:              Hardware counter context being virtualized.
+ * @dump_threshold_ns: Minimum threshold period for dumps between different
+ *                     clients where a new accumulator dump will not be
+ *                     performed, and instead accumulated values will be used.
+ *                     If 0, rate limiting is disabled.
+ * @metadata:          Hardware counter metadata.
+ * @lock:              Lock acquired at all entrypoints, to protect mutable
+ *                     state.
+ * @client_count:      Current number of virtualizer clients.
+ * @clients:           List of virtualizer clients.
+ * @accum:             Hardware counter accumulator. NULL if no clients.
+ * @scratch_map:       Enable map used as scratch space during counter changes.
+ * @scratch_buf:       Dump buffer used as scratch space during dumps.
+ * @ts_last_dump_ns:   End time of most recent dump across all clients.
+ */
+struct kbase_hwcnt_virtualizer {
+	struct kbase_hwcnt_context *hctx;
+	u64 dump_threshold_ns;
+	const struct kbase_hwcnt_metadata *metadata;
+	struct mutex lock;
+	size_t client_count;
+	struct list_head clients;
+	struct kbase_hwcnt_accumulator *accum;
+	struct kbase_hwcnt_enable_map scratch_map;
+	struct kbase_hwcnt_dump_buffer scratch_buf;
+	u64 ts_last_dump_ns;
+};
+
+/**
+ * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure.
+ * @node:        List node used for virtualizer client list.
+ * @hvirt:       Hardware counter virtualizer.
+ * @enable_map:  Enable map with client's current enabled counters.
+ * @accum_buf:   Dump buffer with client's current accumulated counters.
+ * @has_accum:   True if accum_buf contains any accumulated counters.
+ * @ts_start_ns: Counter collection start time of current dump.
+ */
+struct kbase_hwcnt_virtualizer_client {
+	struct list_head node;
+	struct kbase_hwcnt_virtualizer *hvirt;
+	struct kbase_hwcnt_enable_map enable_map;
+	struct kbase_hwcnt_dump_buffer accum_buf;
+	bool has_accum;
+	u64 ts_start_ns;
+};
+
+const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
+	struct kbase_hwcnt_virtualizer *hvirt)
+{
+	if (!hvirt)
+		return NULL;
+
+	return hvirt->metadata;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata);
+
+/**
+ * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory.
+ * @hvcli: Pointer to virtualizer client.
+ *
+ * Will safely free a client in any partial state of construction.
+ */
+static void kbasep_hwcnt_virtualizer_client_free(
+	struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+	if (!hvcli)
+		return;
+
+	kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf);
+	kbase_hwcnt_enable_map_free(&hvcli->enable_map);
+	kfree(hvcli);
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer
+ *                                         client.
+ * @metadata:  Non-NULL pointer to counter metadata.
+ * @out_hvcli: Non-NULL pointer to where created client will be stored on
+ *             success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	struct kbase_hwcnt_virtualizer_client **out_hvcli)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *hvcli = NULL;
+
+	WARN_ON(!metadata);
+	WARN_ON(!out_hvcli);
+
+	hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL);
+	if (!hvcli)
+		return -ENOMEM;
+
+	errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf);
+	if (errcode)
+		goto error;
+
+	*out_hvcli = hvcli;
+	return 0;
+error:
+	kbasep_hwcnt_virtualizer_client_free(hvcli);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a
+ *                                              client's accumulation buffer.
+ * @hvcli:    Non-NULL pointer to virtualizer client.
+ * @dump_buf: Non-NULL pointer to dump buffer to accumulate from.
+ */
+static void kbasep_hwcnt_virtualizer_client_accumulate(
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	WARN_ON(!hvcli);
+	WARN_ON(!dump_buf);
+	lockdep_assert_held(&hvcli->hvirt->lock);
+
+	if (hvcli->has_accum) {
+		/* If already some accumulation, accumulate */
+		kbase_hwcnt_dump_buffer_accumulate(
+			&hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+	} else {
+		/* If no accumulation, copy */
+		kbase_hwcnt_dump_buffer_copy(
+			&hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+	}
+	hvcli->has_accum = true;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter
+ *                                             accumulator after final client
+ *                                             removal.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Will safely terminate the accumulator in any partial state of initialisation.
+ */
+static void kbasep_hwcnt_virtualizer_accumulator_term(
+	struct kbase_hwcnt_virtualizer *hvirt)
+{
+	WARN_ON(!hvirt);
+	lockdep_assert_held(&hvirt->lock);
+	WARN_ON(hvirt->client_count);
+
+	kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf);
+	kbase_hwcnt_enable_map_free(&hvirt->scratch_map);
+	kbase_hwcnt_accumulator_release(hvirt->accum);
+	hvirt->accum = NULL;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter
+ *                                             accumulator before first client
+ *                                             addition.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_accumulator_init(
+	struct kbase_hwcnt_virtualizer *hvirt)
+{
+	int errcode;
+
+	WARN_ON(!hvirt);
+	lockdep_assert_held(&hvirt->lock);
+	WARN_ON(hvirt->client_count);
+	WARN_ON(hvirt->accum);
+
+	errcode = kbase_hwcnt_accumulator_acquire(
+		hvirt->hctx, &hvirt->accum);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_enable_map_alloc(
+		hvirt->metadata, &hvirt->scratch_map);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(
+		hvirt->metadata, &hvirt->scratch_buf);
+	if (errcode)
+		goto error;
+
+	return 0;
+error:
+	kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the
+ *                                       virtualizer.
+ * @hvirt:      Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:      Non-NULL pointer to the virtualizer client to add.
+ * @enable_map: Non-NULL pointer to client's initial enable map.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_add(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	int errcode = 0;
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!enable_map);
+	lockdep_assert_held(&hvirt->lock);
+
+	if (hvirt->client_count == 0)
+		/* First client added, so initialise the accumulator */
+		errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt);
+	if (errcode)
+		return errcode;
+
+	hvirt->client_count += 1;
+
+	if (hvirt->client_count == 1) {
+		/* First client, so just pass the enable map onwards as is */
+		errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+			enable_map, &ts_start_ns, &ts_end_ns, NULL);
+	} else {
+		struct kbase_hwcnt_virtualizer_client *pos;
+
+		/* Make the scratch enable map the union of all enable maps */
+		kbase_hwcnt_enable_map_copy(
+			&hvirt->scratch_map, enable_map);
+		list_for_each_entry(pos, &hvirt->clients, node)
+			kbase_hwcnt_enable_map_union(
+				&hvirt->scratch_map, &pos->enable_map);
+
+		/* Set the counters with the new union enable map */
+		errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+			&hvirt->scratch_map,
+			&ts_start_ns, &ts_end_ns,
+			&hvirt->scratch_buf);
+		/* Accumulate into only existing clients' accumulation bufs */
+		if (!errcode)
+			list_for_each_entry(pos, &hvirt->clients, node)
+				kbasep_hwcnt_virtualizer_client_accumulate(
+					pos, &hvirt->scratch_buf);
+	}
+	if (errcode)
+		goto error;
+
+	list_add(&hvcli->node, &hvirt->clients);
+	hvcli->hvirt = hvirt;
+	kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+	hvcli->has_accum = false;
+	hvcli->ts_start_ns = ts_end_ns;
+
+	/* Store the most recent dump time for rate limiting */
+	hvirt->ts_last_dump_ns = ts_end_ns;
+
+	return 0;
+error:
+	hvirt->client_count -= 1;
+	if (hvirt->client_count == 0)
+		kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the
+ *                                          virtualizer.
+ * @hvirt:      Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:      Non-NULL pointer to the virtualizer client to remove.
+ */
+static void kbasep_hwcnt_virtualizer_client_remove(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+	int errcode = 0;
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	lockdep_assert_held(&hvirt->lock);
+
+	list_del(&hvcli->node);
+	hvirt->client_count -= 1;
+
+	if (hvirt->client_count == 0) {
+		/* Last client removed, so terminate the accumulator */
+		kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+	} else {
+		struct kbase_hwcnt_virtualizer_client *pos;
+		/* Make the scratch enable map the union of all enable maps */
+		kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map);
+		list_for_each_entry(pos, &hvirt->clients, node)
+			kbase_hwcnt_enable_map_union(
+				&hvirt->scratch_map, &pos->enable_map);
+		/* Set the counters with the new union enable map */
+		errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+			&hvirt->scratch_map,
+			&ts_start_ns, &ts_end_ns,
+			&hvirt->scratch_buf);
+		/* Accumulate into remaining clients' accumulation bufs */
+		if (!errcode)
+			list_for_each_entry(pos, &hvirt->clients, node)
+				kbasep_hwcnt_virtualizer_client_accumulate(
+					pos, &hvirt->scratch_buf);
+
+		/* Store the most recent dump time for rate limiting */
+		hvirt->ts_last_dump_ns = ts_end_ns;
+	}
+	WARN_ON(errcode);
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
+ *                                                currently enabled counters,
+ *                                                and enable a new set of
+ *                                                counters that will be used for
+ *                                                subsequent dumps.
+ * @hvirt:       Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @enable_map:  Non-NULL pointer to the new counter enable map for the client.
+ *               Must have the same metadata as the virtualizer.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_set_counters(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *pos;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!enable_map);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(enable_map->metadata != hvirt->metadata);
+	WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+	lockdep_assert_held(&hvirt->lock);
+
+	/* Make the scratch enable map the union of all enable maps */
+	kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
+	list_for_each_entry(pos, &hvirt->clients, node)
+		/* Ignore the enable map of the selected client */
+		if (pos != hvcli)
+			kbase_hwcnt_enable_map_union(
+				&hvirt->scratch_map, &pos->enable_map);
+
+	/* Set the counters with the new union enable map */
+	errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+		&hvirt->scratch_map, ts_start_ns, ts_end_ns,
+		&hvirt->scratch_buf);
+	if (errcode)
+		return errcode;
+
+	/* Accumulate into all accumulation bufs except the selected client's */
+	list_for_each_entry(pos, &hvirt->clients, node)
+		if (pos != hvcli)
+			kbasep_hwcnt_virtualizer_client_accumulate(
+				pos, &hvirt->scratch_buf);
+
+	/* Finally, write into the dump buf */
+	if (dump_buf) {
+		const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
+
+		if (hvcli->has_accum) {
+			kbase_hwcnt_dump_buffer_accumulate(
+				&hvcli->accum_buf, src, &hvcli->enable_map);
+			src = &hvcli->accum_buf;
+		}
+		kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
+	}
+	hvcli->has_accum = false;
+
+	/* Update the selected client's enable map */
+	kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+
+	/* Fix up the timestamps */
+	*ts_start_ns = hvcli->ts_start_ns;
+	hvcli->ts_start_ns = *ts_end_ns;
+
+	/* Store the most recent dump time for rate limiting */
+	hvirt->ts_last_dump_ns = *ts_end_ns;
+
+	return errcode;
+}
+
+int kbase_hwcnt_virtualizer_client_set_counters(
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer *hvirt;
+
+	if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hvirt = hvcli->hvirt;
+
+	if ((enable_map->metadata != hvirt->metadata) ||
+	    (dump_buf && (dump_buf->metadata != hvirt->metadata)))
+		return -EINVAL;
+
+	mutex_lock(&hvirt->lock);
+
+	if ((hvirt->client_count == 1) && (!hvcli->has_accum)) {
+		/*
+		 * If there's only one client with no prior accumulation, we can
+		 * completely skip the virtualize and just pass through the call
+		 * to the accumulator, saving a fair few copies and
+		 * accumulations.
+		 */
+		errcode = kbase_hwcnt_accumulator_set_counters(
+			hvirt->accum, enable_map,
+			ts_start_ns, ts_end_ns, dump_buf);
+
+		if (!errcode) {
+			/* Update the selected client's enable map */
+			kbase_hwcnt_enable_map_copy(
+				&hvcli->enable_map, enable_map);
+
+			/* Fix up the timestamps */
+			*ts_start_ns = hvcli->ts_start_ns;
+			hvcli->ts_start_ns = *ts_end_ns;
+
+			/* Store the most recent dump time for rate limiting */
+			hvirt->ts_last_dump_ns = *ts_end_ns;
+		}
+	} else {
+		/* Otherwise, do the full virtualize */
+		errcode = kbasep_hwcnt_virtualizer_client_set_counters(
+			hvirt, hvcli, enable_map,
+			ts_start_ns, ts_end_ns, dump_buf);
+	}
+
+	mutex_unlock(&hvirt->lock);
+
+	return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters);
+
+/**
+ * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's
+ *                                        currently enabled counters.
+ * @hvirt:       Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_dump(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *pos;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+	lockdep_assert_held(&hvirt->lock);
+
+	/* Perform the dump */
+	errcode = kbase_hwcnt_accumulator_dump(hvirt->accum,
+		ts_start_ns, ts_end_ns, &hvirt->scratch_buf);
+	if (errcode)
+		return errcode;
+
+	/* Accumulate into all accumulation bufs except the selected client's */
+	list_for_each_entry(pos, &hvirt->clients, node)
+		if (pos != hvcli)
+			kbasep_hwcnt_virtualizer_client_accumulate(
+				pos, &hvirt->scratch_buf);
+
+	/* Finally, write into the dump buf */
+	if (dump_buf) {
+		const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
+
+		if (hvcli->has_accum) {
+			kbase_hwcnt_dump_buffer_accumulate(
+				&hvcli->accum_buf, src, &hvcli->enable_map);
+			src = &hvcli->accum_buf;
+		}
+		kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
+	}
+	hvcli->has_accum = false;
+
+	/* Fix up the timestamps */
+	*ts_start_ns = hvcli->ts_start_ns;
+	hvcli->ts_start_ns = *ts_end_ns;
+
+	/* Store the most recent dump time for rate limiting */
+	hvirt->ts_last_dump_ns = *ts_end_ns;
+
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_dump_rate_limited - Perform a dump of the
+ *                                           client's currently enabled counters
+ *                                           if it hasn't been rate limited,
+ *                                           otherwise return the client's most
+ *                                           recent accumulation.
+ * @hvirt:       Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_dump_rate_limited(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	bool rate_limited = true;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+	lockdep_assert_held(&hvirt->lock);
+
+	if (hvirt->dump_threshold_ns == 0) {
+		/* Threshold == 0, so rate limiting disabled */
+		rate_limited = false;
+	} else if (hvirt->ts_last_dump_ns == hvcli->ts_start_ns) {
+		/* Last dump was performed by this client, and dumps from an
+		 * individual client are never rate limited
+		 */
+		rate_limited = false;
+	} else {
+		const u64 ts_ns =
+			kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum);
+		const u64 time_since_last_dump_ns =
+			ts_ns - hvirt->ts_last_dump_ns;
+
+		/* Dump period equals or exceeds the threshold */
+		if (time_since_last_dump_ns >= hvirt->dump_threshold_ns)
+			rate_limited = false;
+	}
+
+	if (!rate_limited)
+		return kbasep_hwcnt_virtualizer_client_dump(
+			hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf);
+
+	/* If we've gotten this far, the client must have something accumulated
+	 * otherwise it is a logic error
+	 */
+	WARN_ON(!hvcli->has_accum);
+
+	if (dump_buf)
+		kbase_hwcnt_dump_buffer_copy(
+			dump_buf, &hvcli->accum_buf, &hvcli->enable_map);
+	hvcli->has_accum = false;
+
+	*ts_start_ns = hvcli->ts_start_ns;
+	*ts_end_ns = hvirt->ts_last_dump_ns;
+	hvcli->ts_start_ns = hvirt->ts_last_dump_ns;
+
+	return 0;
+}
+
+int kbase_hwcnt_virtualizer_client_dump(
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer *hvirt;
+
+	if (!hvcli || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hvirt = hvcli->hvirt;
+
+	if (dump_buf && (dump_buf->metadata != hvirt->metadata))
+		return -EINVAL;
+
+	mutex_lock(&hvirt->lock);
+
+	if ((hvirt->client_count == 1) && (!hvcli->has_accum)) {
+		/*
+		 * If there's only one client with no prior accumulation, we can
+		 * completely skip the virtualize and just pass through the call
+		 * to the accumulator, saving a fair few copies and
+		 * accumulations.
+		 */
+		errcode = kbase_hwcnt_accumulator_dump(
+			hvirt->accum, ts_start_ns, ts_end_ns, dump_buf);
+
+		if (!errcode) {
+			/* Fix up the timestamps */
+			*ts_start_ns = hvcli->ts_start_ns;
+			hvcli->ts_start_ns = *ts_end_ns;
+
+			/* Store the most recent dump time for rate limiting */
+			hvirt->ts_last_dump_ns = *ts_end_ns;
+		}
+	} else {
+		/* Otherwise, do the full virtualize */
+		errcode = kbasep_hwcnt_virtualizer_client_dump_rate_limited(
+			hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf);
+	}
+
+	mutex_unlock(&hvirt->lock);
+
+	return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump);
+
+int kbase_hwcnt_virtualizer_client_create(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	struct kbase_hwcnt_virtualizer_client **out_hvcli)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *hvcli;
+
+	if (!hvirt || !enable_map || !out_hvcli ||
+	    (enable_map->metadata != hvirt->metadata))
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_virtualizer_client_alloc(
+		hvirt->metadata, &hvcli);
+	if (errcode)
+		return errcode;
+
+	mutex_lock(&hvirt->lock);
+
+	errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map);
+
+	mutex_unlock(&hvirt->lock);
+
+	if (errcode) {
+		kbasep_hwcnt_virtualizer_client_free(hvcli);
+		return errcode;
+	}
+
+	*out_hvcli = hvcli;
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create);
+
+void kbase_hwcnt_virtualizer_client_destroy(
+	struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+	if (!hvcli)
+		return;
+
+	mutex_lock(&hvcli->hvirt->lock);
+
+	kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli);
+
+	mutex_unlock(&hvcli->hvirt->lock);
+
+	kbasep_hwcnt_virtualizer_client_free(hvcli);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy);
+
+int kbase_hwcnt_virtualizer_init(
+	struct kbase_hwcnt_context *hctx,
+	u64 dump_threshold_ns,
+	struct kbase_hwcnt_virtualizer **out_hvirt)
+{
+	struct kbase_hwcnt_virtualizer *virt;
+	const struct kbase_hwcnt_metadata *metadata;
+
+	if (!hctx || !out_hvirt)
+		return -EINVAL;
+
+	metadata = kbase_hwcnt_context_metadata(hctx);
+	if (!metadata)
+		return -EINVAL;
+
+	virt = kzalloc(sizeof(*virt), GFP_KERNEL);
+	if (!virt)
+		return -ENOMEM;
+
+	virt->hctx = hctx;
+	virt->dump_threshold_ns = dump_threshold_ns;
+	virt->metadata = metadata;
+
+	mutex_init(&virt->lock);
+	INIT_LIST_HEAD(&virt->clients);
+
+	*out_hvirt = virt;
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init);
+
+void kbase_hwcnt_virtualizer_term(
+	struct kbase_hwcnt_virtualizer *hvirt)
+{
+	if (!hvirt)
+		return;
+
+	/* Non-zero client count implies client leak */
+	if (WARN_ON(hvirt->client_count != 0)) {
+		struct kbase_hwcnt_virtualizer_client *pos, *n;
+
+		list_for_each_entry_safe(pos, n, &hvirt->clients, node)
+			kbase_hwcnt_virtualizer_client_destroy(pos);
+	}
+
+	WARN_ON(hvirt->client_count != 0);
+	WARN_ON(hvirt->accum);
+
+	kfree(hvirt);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h
new file mode 100755
index 0000000..8f628c3
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h
@@ -0,0 +1,145 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter virtualizer API.
+ *
+ * Virtualizes a hardware counter context, so multiple clients can access
+ * a single hardware counter resource as though each was the exclusive user.
+ */
+
+#ifndef _KBASE_HWCNT_VIRTUALIZER_H_
+#define _KBASE_HWCNT_VIRTUALIZER_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_context;
+struct kbase_hwcnt_virtualizer;
+struct kbase_hwcnt_virtualizer_client;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer.
+ * @hctx:              Non-NULL pointer to the hardware counter context to
+ *                     virtualize.
+ * @dump_threshold_ns: Minimum threshold period for dumps between different
+ *                     clients where a new accumulator dump will not be
+ *                     performed, and instead accumulated values will be used.
+ *                     If 0, rate limiting will be disabled.
+ * @out_hvirt:         Non-NULL pointer to where the pointer to the created
+ *                     virtualizer will be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_virtualizer_init(
+	struct kbase_hwcnt_context *hctx,
+	u64 dump_threshold_ns,
+	struct kbase_hwcnt_virtualizer **out_hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer.
+ * @hvirt: Pointer to virtualizer to be terminated.
+ */
+void kbase_hwcnt_virtualizer_term(
+	struct kbase_hwcnt_virtualizer *hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by
+ *                                    the virtualizer, so related counter data
+ *                                    structures can be created.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Return: Non-NULL pointer to metadata, or NULL on error.
+ */
+const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
+	struct kbase_hwcnt_virtualizer *hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client.
+ * @hvirt:      Non-NULL pointer to the hardware counter virtualizer.
+ * @enable_map: Non-NULL pointer to the enable map for the client. Must have the
+ *              same metadata as the virtualizer.
+ * @out_hvcli:  Non-NULL pointer to where the pointer to the created client will
+ *              be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_virtualizer_client_create(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	struct kbase_hwcnt_virtualizer_client **out_hvcli);
+
+/**
+ * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client.
+ * @hvcli: Pointer to the hardware counter client.
+ */
+void kbase_hwcnt_virtualizer_client_destroy(
+	struct kbase_hwcnt_virtualizer_client *hvcli);
+
+/**
+ * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
+ *                                               currently enabled counters, and
+ *                                               enable a new set of counters
+ *                                               that will be used for
+ *                                               subsequent dumps.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @enable_map:  Non-NULL pointer to the new counter enable map for the client.
+ *               Must have the same metadata as the virtualizer.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_virtualizer_client_set_counters(
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's
+ *                                       currently enabled counters.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_virtualizer_client_dump(
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf);
+
+#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
new file mode 100755
index 0000000..525bfb6
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -0,0 +1,900 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IOCTL_H_
+#define _KBASE_IOCTL_H_
+
+#ifdef __cpluscplus
+extern "C" {
+#endif
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+#define KBASE_IOCTL_TYPE 0x80
+
+/*
+ * 11.1:
+ * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
+ * 11.2:
+ * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_SECURE,
+ *   which some user-side clients prior to 11.2 might fault if they received
+ *   them
+ * 11.3:
+ * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and
+ *   KBASE_IOCTL_STICKY_RESOURCE_UNMAP
+ * 11.4:
+ * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
+ * 11.5:
+ * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
+ * 11.6:
+ * - Added flags field to base_jit_alloc_info structure, which can be used to
+ *   specify pseudo chunked tiler alignment for JIT allocations.
+ * 11.7:
+ * - Removed UMP support
+ * 11.8:
+ * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
+ * 11.9:
+ * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
+ *   under base_mem_alloc_flags
+ * 11.10:
+ * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
+ *   JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
+ *   with one softjob.
+ * 11.11:
+ * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
+ * 11.12:
+ * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS
+ * 11.13:
+ * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT
+ * 11.14:
+ * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set
+ *   under base_mem_alloc_flags
+ * 11.15:
+ * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags.
+ * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be
+ *   passed to mmap().
+ * 11.16:
+ * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf.
+ * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for
+ *   dma-buf. Now, buffers are mapped on GPU when first imported, no longer
+ *   requiring external resource or sticky resource tracking. UNLESS,
+ *   CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled.
+ */
+#define BASE_UK_VERSION_MAJOR 11
+#define BASE_UK_VERSION_MINOR 16
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility with kernel
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+/**
+ * struct kbase_ioctl_set_flags - Set kernel context creation flags
+ *
+ * @create_flags: Flags - see base_context_create_flags
+ */
+struct kbase_ioctl_set_flags {
+	__u32 create_flags;
+};
+
+#define KBASE_IOCTL_SET_FLAGS \
+	_IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
+
+/**
+ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
+ *
+ * @addr: Memory address of an array of struct base_jd_atom_v2
+ * @nr_atoms: Number of entries in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ */
+struct kbase_ioctl_job_submit {
+	__u64 addr;
+	__u32 nr_atoms;
+	__u32 stride;
+};
+
+#define KBASE_IOCTL_JOB_SUBMIT \
+	_IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
+
+/**
+ * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
+ *
+ * @buffer: Pointer to the buffer to store properties into
+ * @size: Size of the buffer
+ * @flags: Flags - must be zero for now
+ *
+ * The ioctl will return the number of bytes stored into @buffer or an error
+ * on failure (e.g. @size is too small). If @size is specified as 0 then no
+ * data will be written but the return value will be the number of bytes needed
+ * for all the properties.
+ *
+ * @flags may be used in the future to request a different format for the
+ * buffer. With @flags == 0 the following format is used.
+ *
+ * The buffer will be filled with pairs of values, a u32 key identifying the
+ * property followed by the value. The size of the value is identified using
+ * the bottom bits of the key. The value then immediately followed the key and
+ * is tightly packed (there is no padding). All keys and values are
+ * little-endian.
+ *
+ * 00 = u8
+ * 01 = u16
+ * 10 = u32
+ * 11 = u64
+ */
+struct kbase_ioctl_get_gpuprops {
+	__u64 buffer;
+	__u32 size;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_GET_GPUPROPS \
+	_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
+
+#define KBASE_IOCTL_POST_TERM \
+	_IO(KBASE_IOCTL_TYPE, 4)
+
+/**
+ * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
+ *
+ * @va_pages: The number of pages of virtual address space to reserve
+ * @commit_pages: The number of physical pages to allocate
+ * @extent: The number of extra pages to allocate on each GPU fault which grows
+ *          the region
+ * @flags: Flags
+ * @gpu_va: The GPU virtual address which is allocated
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_alloc {
+	struct {
+		__u64 va_pages;
+		__u64 commit_pages;
+		__u64 extent;
+		__u64 flags;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALLOC \
+	_IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
+
+/**
+ * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
+ * @gpu_addr: A GPU address contained within the region
+ * @query: The type of query
+ * @value: The result of the query
+ *
+ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_query {
+	struct {
+		__u64 gpu_addr;
+		__u64 query;
+	} in;
+	struct {
+		__u64 value;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_QUERY \
+	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
+
+#define KBASE_MEM_QUERY_COMMIT_SIZE	((u64)1)
+#define KBASE_MEM_QUERY_VA_SIZE		((u64)2)
+#define KBASE_MEM_QUERY_FLAGS		((u64)3)
+
+/**
+ * struct kbase_ioctl_mem_free - Free a memory region
+ * @gpu_addr: Handle to the region to free
+ */
+struct kbase_ioctl_mem_free {
+	__u64 gpu_addr;
+};
+
+#define KBASE_IOCTL_MEM_FREE \
+	_IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
+
+/**
+ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error
+ */
+struct kbase_ioctl_hwcnt_reader_setup {
+	__u32 buffer_count;
+	__u32 jm_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_READER_SETUP \
+	_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
+
+/**
+ * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
+ * @dump_buffer:  GPU address to write counters to
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ */
+struct kbase_ioctl_hwcnt_enable {
+	__u64 dump_buffer;
+	__u32 jm_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_ENABLE \
+	_IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable)
+
+#define KBASE_IOCTL_HWCNT_DUMP \
+	_IO(KBASE_IOCTL_TYPE, 10)
+
+#define KBASE_IOCTL_HWCNT_CLEAR \
+	_IO(KBASE_IOCTL_TYPE, 11)
+
+/**
+ * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
+ * @data:    Counter samples for the dummy model.
+ * @size:    Size of the counter sample data.
+ * @padding: Padding.
+ */
+struct kbase_ioctl_hwcnt_values {
+	__u64 data;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_HWCNT_SET \
+	_IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
+
+/**
+ * struct kbase_ioctl_disjoint_query - Query the disjoint counter
+ * @counter:   A counter of disjoint events in the kernel
+ */
+struct kbase_ioctl_disjoint_query {
+	__u32 counter;
+};
+
+#define KBASE_IOCTL_DISJOINT_QUERY \
+	_IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
+
+/**
+ * struct kbase_ioctl_get_ddk_version - Query the kernel version
+ * @version_buffer: Buffer to receive the kernel version string
+ * @size: Size of the buffer
+ * @padding: Padding
+ *
+ * The ioctl will return the number of bytes written into version_buffer
+ * (which includes a NULL byte) or a negative error code
+ *
+ * The ioctl request code has to be _IOW because the data in ioctl struct is
+ * being copied to the kernel, even though the kernel then writes out the
+ * version info to the buffer specified in the ioctl.
+ */
+struct kbase_ioctl_get_ddk_version {
+	__u64 version_buffer;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_DDK_VERSION \
+	_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
+
+/**
+ * struct kbase_ioctl_mem_jit_init_old - Initialise the JIT memory allocator
+ *
+ * @va_pages: Number of VA pages to reserve for JIT
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ *
+ * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
+ * backwards compatibility.
+ */
+struct kbase_ioctl_mem_jit_init_old {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT_OLD \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_old)
+
+/**
+ * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator
+ *
+ * @va_pages: Number of VA pages to reserve for JIT
+ * @max_allocations: Maximum number of concurrent allocations
+ * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @group_id: Group ID to be used for physical allocations
+ * @padding: Currently unused, must be zero
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ */
+struct kbase_ioctl_mem_jit_init {
+	__u64 va_pages;
+	__u8 max_allocations;
+	__u8 trim_level;
+	__u8 group_id;
+	__u8 padding[5];
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
+
+/**
+ * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
+ *
+ * @handle: GPU memory handle (GPU VA)
+ * @user_addr: The address where it is mapped in user space
+ * @size: The number of bytes to synchronise
+ * @type: The direction to synchronise: 0 is sync to memory (clean),
+ * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants.
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_mem_sync {
+	__u64 handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_MEM_SYNC \
+	_IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
+
+/**
+ * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
+ *
+ * @gpu_addr: The GPU address of the memory region
+ * @cpu_addr: The CPU address to locate
+ * @size: A size in bytes to validate is contained within the region
+ * @offset: The offset from the start of the memory region to @cpu_addr
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_find_cpu_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 cpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
+
+/**
+ * struct kbase_ioctl_get_context_id - Get the kernel context ID
+ *
+ * @id: The kernel context ID
+ */
+struct kbase_ioctl_get_context_id {
+	__u32 id;
+};
+
+#define KBASE_IOCTL_GET_CONTEXT_ID \
+	_IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
+
+/**
+ * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
+ *
+ * @flags: Flags
+ *
+ * The ioctl returns a file descriptor when successful
+ */
+struct kbase_ioctl_tlstream_acquire {
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_TLSTREAM_ACQUIRE \
+	_IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
+
+#define KBASE_IOCTL_TLSTREAM_FLUSH \
+	_IO(KBASE_IOCTL_TYPE, 19)
+
+/**
+ * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
+ *
+ * @gpu_addr: The memory region to modify
+ * @pages:    The number of physical pages that should be present
+ *
+ * The ioctl may return on the following error codes or 0 for success:
+ *   -ENOMEM: Out of memory
+ *   -EINVAL: Invalid arguments
+ */
+struct kbase_ioctl_mem_commit {
+	__u64 gpu_addr;
+	__u64 pages;
+};
+
+#define KBASE_IOCTL_MEM_COMMIT \
+	_IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
+
+/**
+ * union kbase_ioctl_mem_alias - Create an alias of memory regions
+ * @flags: Flags, see BASE_MEM_xxx
+ * @stride: Bytes between start of each memory region
+ * @nents: The number of regions to pack together into the alias
+ * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info
+ * @gpu_va: Address of the new alias
+ * @va_pages: Size of the new alias
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_alias {
+	struct {
+		__u64 flags;
+		__u64 stride;
+		__u64 nents;
+		__u64 aliasing_info;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALIAS \
+	_IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
+
+/**
+ * union kbase_ioctl_mem_import - Import memory for use by the GPU
+ * @flags: Flags, see BASE_MEM_xxx
+ * @phandle: Handle to the external memory
+ * @type: Type of external memory, see base_mem_import_type
+ * @padding: Amount of extra VA pages to append to the imported buffer
+ * @gpu_va: Address of the new alias
+ * @va_pages: Size of the new alias
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_import {
+	struct {
+		__u64 flags;
+		__u64 phandle;
+		__u32 type;
+		__u32 padding;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_IMPORT \
+	_IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
+
+/**
+ * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
+ * @gpu_va: The GPU region to modify
+ * @flags: The new flags to set
+ * @mask: Mask of the flags to modify
+ */
+struct kbase_ioctl_mem_flags_change {
+	__u64 gpu_va;
+	__u64 flags;
+	__u64 mask;
+};
+
+#define KBASE_IOCTL_MEM_FLAGS_CHANGE \
+	_IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
+
+/**
+ * struct kbase_ioctl_stream_create - Create a synchronisation stream
+ * @name: A name to identify this stream. Must be NULL-terminated.
+ *
+ * Note that this is also called a "timeline", but is named stream to avoid
+ * confusion with other uses of the word.
+ *
+ * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes.
+ *
+ * The ioctl returns a file descriptor.
+ */
+struct kbase_ioctl_stream_create {
+	char name[32];
+};
+
+#define KBASE_IOCTL_STREAM_CREATE \
+	_IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
+
+/**
+ * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
+ * @fd: The file descriptor to validate
+ */
+struct kbase_ioctl_fence_validate {
+	int fd;
+};
+
+#define KBASE_IOCTL_FENCE_VALIDATE \
+	_IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
+
+/**
+ * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
+ * @buffer: Pointer to the information
+ * @len: Length
+ * @padding: Padding
+ *
+ * The data provided is accessible through a debugfs file
+ */
+struct kbase_ioctl_mem_profile_add {
+	__u64 buffer;
+	__u32 len;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_MEM_PROFILE_ADD \
+	_IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
+
+/**
+ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
+ * @event: GPU address of the event which has been updated
+ * @new_status: The new status to set
+ * @flags: Flags for future expansion
+ */
+struct kbase_ioctl_soft_event_update {
+	__u64 event;
+	__u32 new_status;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
+	_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
+
+/**
+ * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
+ * @count: Number of resources
+ * @address: Array of u64 GPU addresses of the external resources to map
+ */
+struct kbase_ioctl_sticky_resource_map {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_MAP \
+	_IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map)
+
+/**
+ * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was
+ *                                          previously permanently mapped
+ * @count: Number of resources
+ * @address: Array of u64 GPU addresses of the external resources to unmap
+ */
+struct kbase_ioctl_sticky_resource_unmap {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \
+	_IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap)
+
+/**
+ * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of
+ *                                                   the GPU memory region for
+ *                                                   the given gpu address and
+ *                                                   the offset of that address
+ *                                                   into the region
+ *
+ * @gpu_addr: GPU virtual address
+ * @size: Size in bytes within the region
+ * @start: Address of the beginning of the memory region enclosing @gpu_addr
+ *         for the length of @offset bytes
+ * @offset: The offset from the start of the memory region to @gpu_addr
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_find_gpu_start_and_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 start;
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
+
+
+#define KBASE_IOCTL_CINSTR_GWT_START \
+	_IO(KBASE_IOCTL_TYPE, 33)
+
+#define KBASE_IOCTL_CINSTR_GWT_STOP \
+	_IO(KBASE_IOCTL_TYPE, 34)
+
+/**
+ * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses.
+ * @addr_buffer: Address of buffer to hold addresses of gpu modified areas.
+ * @size_buffer: Address of buffer to hold size of modified areas (in pages)
+ * @len: Number of addresses the buffers can hold.
+ * @more_data_available: Status indicating if more addresses are available.
+ * @no_of_addr_collected: Number of addresses collected into addr_buffer.
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ *
+ * This structure is used when performing a call to dump GPU write fault
+ * addresses.
+ */
+union kbase_ioctl_cinstr_gwt_dump {
+	struct {
+		__u64 addr_buffer;
+		__u64 size_buffer;
+		__u32 len;
+		__u32 padding;
+
+	} in;
+	struct {
+		__u32 no_of_addr_collected;
+		__u8 more_data_available;
+		__u8 padding[27];
+	} out;
+};
+
+#define KBASE_IOCTL_CINSTR_GWT_DUMP \
+	_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
+
+
+/**
+ * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone
+ *
+ * @va_pages: Number of VA pages to reserve for EXEC_VA
+ */
+struct kbase_ioctl_mem_exec_init {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_EXEC_INIT \
+	_IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
+
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
+
+/**
+ * struct kbase_ioctl_tlstream_test - Start a timeline stream test
+ *
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ */
+struct kbase_ioctl_tlstream_test {
+	__u32 tpw_count;
+	__u32 msg_delay;
+	__u32 msg_count;
+	__u32 aux_msg;
+};
+
+#define KBASE_IOCTL_TLSTREAM_TEST \
+	_IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test)
+
+/**
+ * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ */
+struct kbase_ioctl_tlstream_stats {
+	__u32 bytes_collected;
+	__u32 bytes_generated;
+};
+
+#define KBASE_IOCTL_TLSTREAM_STATS \
+	_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
+
+/**
+ * struct kbase_ioctl_cs_event_memory_write - Write an event memory address
+ * @cpu_addr: Memory address to write
+ * @value: Value to write
+ * @padding: Currently unused, must be zero
+ */
+struct kbase_ioctl_cs_event_memory_write {
+	__u64 cpu_addr;
+	__u8 value;
+	__u8 padding[7];
+};
+
+/**
+ * union kbase_ioctl_cs_event_memory_read - Read an event memory address
+ * @cpu_addr: Memory address to read
+ * @value: Value read
+ * @padding: Currently unused, must be zero
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_cs_event_memory_read {
+	struct {
+		__u64 cpu_addr;
+	} in;
+	struct {
+		__u8 value;
+		__u8 padding[7];
+	} out;
+};
+
+#endif
+
+/* Customer extension range */
+#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2)
+
+/* If the integration needs extra ioctl add them there
+ * like this:
+ *
+ * struct my_ioctl_args {
+ *  ....
+ * }
+ *
+ * #define KBASE_IOCTL_MY_IOCTL \
+ *         _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args)
+ */
+
+
+/**********************************
+ * Definitions for GPU properties *
+ **********************************/
+#define KBASE_GPUPROP_VALUE_SIZE_U8	(0x0)
+#define KBASE_GPUPROP_VALUE_SIZE_U16	(0x1)
+#define KBASE_GPUPROP_VALUE_SIZE_U32	(0x2)
+#define KBASE_GPUPROP_VALUE_SIZE_U64	(0x3)
+
+#define KBASE_GPUPROP_PRODUCT_ID			1
+#define KBASE_GPUPROP_VERSION_STATUS			2
+#define KBASE_GPUPROP_MINOR_REVISION			3
+#define KBASE_GPUPROP_MAJOR_REVISION			4
+/* 5 previously used for GPU speed */
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX			6
+/* 7 previously used for minimum GPU speed */
+#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE		8
+#define KBASE_GPUPROP_TEXTURE_FEATURES_0		9
+#define KBASE_GPUPROP_TEXTURE_FEATURES_1		10
+#define KBASE_GPUPROP_TEXTURE_FEATURES_2		11
+#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE		12
+
+#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE			13
+#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE		14
+#define KBASE_GPUPROP_L2_NUM_L2_SLICES			15
+
+#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES		16
+#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS		17
+
+#define KBASE_GPUPROP_MAX_THREADS			18
+#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE		19
+#define KBASE_GPUPROP_MAX_BARRIER_SIZE			20
+#define KBASE_GPUPROP_MAX_REGISTERS			21
+#define KBASE_GPUPROP_MAX_TASK_QUEUE			22
+#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT		23
+#define KBASE_GPUPROP_IMPL_TECH				24
+
+#define KBASE_GPUPROP_RAW_SHADER_PRESENT		25
+#define KBASE_GPUPROP_RAW_TILER_PRESENT			26
+#define KBASE_GPUPROP_RAW_L2_PRESENT			27
+#define KBASE_GPUPROP_RAW_STACK_PRESENT			28
+#define KBASE_GPUPROP_RAW_L2_FEATURES			29
+#define KBASE_GPUPROP_RAW_CORE_FEATURES			30
+#define KBASE_GPUPROP_RAW_MEM_FEATURES			31
+#define KBASE_GPUPROP_RAW_MMU_FEATURES			32
+#define KBASE_GPUPROP_RAW_AS_PRESENT			33
+#define KBASE_GPUPROP_RAW_JS_PRESENT			34
+#define KBASE_GPUPROP_RAW_JS_FEATURES_0			35
+#define KBASE_GPUPROP_RAW_JS_FEATURES_1			36
+#define KBASE_GPUPROP_RAW_JS_FEATURES_2			37
+#define KBASE_GPUPROP_RAW_JS_FEATURES_3			38
+#define KBASE_GPUPROP_RAW_JS_FEATURES_4			39
+#define KBASE_GPUPROP_RAW_JS_FEATURES_5			40
+#define KBASE_GPUPROP_RAW_JS_FEATURES_6			41
+#define KBASE_GPUPROP_RAW_JS_FEATURES_7			42
+#define KBASE_GPUPROP_RAW_JS_FEATURES_8			43
+#define KBASE_GPUPROP_RAW_JS_FEATURES_9			44
+#define KBASE_GPUPROP_RAW_JS_FEATURES_10		45
+#define KBASE_GPUPROP_RAW_JS_FEATURES_11		46
+#define KBASE_GPUPROP_RAW_JS_FEATURES_12		47
+#define KBASE_GPUPROP_RAW_JS_FEATURES_13		48
+#define KBASE_GPUPROP_RAW_JS_FEATURES_14		49
+#define KBASE_GPUPROP_RAW_JS_FEATURES_15		50
+#define KBASE_GPUPROP_RAW_TILER_FEATURES		51
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0		52
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1		53
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2		54
+#define KBASE_GPUPROP_RAW_GPU_ID			55
+#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS		56
+#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE	57
+#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE	58
+#define KBASE_GPUPROP_RAW_THREAD_FEATURES		59
+#define KBASE_GPUPROP_RAW_COHERENCY_MODE		60
+
+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS		61
+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS		62
+#define KBASE_GPUPROP_COHERENCY_COHERENCY		63
+#define KBASE_GPUPROP_COHERENCY_GROUP_0			64
+#define KBASE_GPUPROP_COHERENCY_GROUP_1			65
+#define KBASE_GPUPROP_COHERENCY_GROUP_2			66
+#define KBASE_GPUPROP_COHERENCY_GROUP_3			67
+#define KBASE_GPUPROP_COHERENCY_GROUP_4			68
+#define KBASE_GPUPROP_COHERENCY_GROUP_5			69
+#define KBASE_GPUPROP_COHERENCY_GROUP_6			70
+#define KBASE_GPUPROP_COHERENCY_GROUP_7			71
+#define KBASE_GPUPROP_COHERENCY_GROUP_8			72
+#define KBASE_GPUPROP_COHERENCY_GROUP_9			73
+#define KBASE_GPUPROP_COHERENCY_GROUP_10		74
+#define KBASE_GPUPROP_COHERENCY_GROUP_11		75
+#define KBASE_GPUPROP_COHERENCY_GROUP_12		76
+#define KBASE_GPUPROP_COHERENCY_GROUP_13		77
+#define KBASE_GPUPROP_COHERENCY_GROUP_14		78
+#define KBASE_GPUPROP_COHERENCY_GROUP_15		79
+
+#define KBASE_GPUPROP_TEXTURE_FEATURES_3		80
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3		81
+
+#define KBASE_GPUPROP_NUM_EXEC_ENGINES                  82
+
+#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC		83
+#define KBASE_GPUPROP_TLS_ALLOC				84
+
+#ifdef __cpluscplus
+}
+#endif
+
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100755
index 0000000..c984a82
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1544 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tracepoints.h>
+
+#include "mali_kbase_dma_fence.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const u64 p)
+{
+#ifdef CONFIG_COMPAT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return compat_ptr(p);
+#endif
+	return u64_to_user_ptr(p);
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#if defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.resv_objs = NULL,
+		.dma_fence_resv_count = 0,
+		.dma_fence_excl_bitmap = NULL
+	};
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	/*
+	 * When both dma-buf fence and Android native sync is enabled, we
+	 * disable dma-buf fence for contexts that are using Android native
+	 * fences.
+	 */
+	const bool implicit_sync = !kbase_ctx_flag(katom->kctx,
+						   KCTX_NO_IMPLICIT_SYNC);
+#else /* CONFIG_SYNC || CONFIG_SYNC_FILE*/
+	const bool implicit_sync = true;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+#endif /* CONFIG_MALI_DMA_FENCE */
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (!katom->extres)
+		return -ENOMEM;
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		info.resv_objs = kmalloc_array(katom->nr_extres,
+					sizeof(struct reservation_object *),
+					GFP_KERNEL);
+		if (!info.resv_objs) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+
+		info.dma_fence_excl_bitmap =
+				kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					sizeof(unsigned long), GFP_KERNEL);
+		if (!info.dma_fence_excl_bitmap) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res = &input_extres[res_no];
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+#ifdef CONFIG_MALI_DMA_FENCE
+		bool exclusive;
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+#endif
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (kbase_is_region_invalid_or_free(reg)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (implicit_sync &&
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct reservation_object *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock now */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		if (info.dma_fence_resv_count) {
+			int ret;
+
+			ret = kbase_dma_fence_wait(katom, &info);
+			if (ret < 0)
+				goto failed_dma_fence_setup;
+		}
+
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d, bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		struct kbase_jd_atom *other_dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			dep_atom->will_fail_event_code = dep_atom->event_code;
+		}
+		other_dep_atom = (struct kbase_jd_atom *)
+			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
+
+		if (!dep_atom->in_jd_list && (!other_dep_atom ||
+				(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
+				!dep_atom->will_fail_event_code &&
+				!other_dep_atom->will_fail_event_code))) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = kbase_fence_dep_count_read(dep_atom);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+			if (dep_satisfied) {
+				dep_atom->in_jd_list = true;
+				list_add_tail(&dep_atom->jd_item, out_list);
+			}
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+/**
+ * is_dep_valid - Validate that a dependency is valid for early dependency
+ *                submission
+ * @katom: Dependency atom to validate
+ *
+ * A dependency is valid if any of the following are true :
+ * - It does not exist (a non-existent dependency does not block submission)
+ * - It is in the job scheduler
+ * - It has completed, does not have a failure event code, and has not been
+ *   marked to fail in the future
+ *
+ * Return: true if valid, false otherwise
+ */
+static bool is_dep_valid(struct kbase_jd_atom *katom)
+{
+	/* If there's no dependency then this is 'valid' from the perspective of
+	 * early dependency submission */
+	if (!katom)
+		return true;
+
+	/* Dependency must have reached the job scheduler */
+	if (katom->status < KBASE_JD_ATOM_STATE_IN_JS)
+		return false;
+
+	/* If dependency has completed and has failed or will fail then it is
+	 * not valid */
+	if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+			(katom->event_code != BASE_JD_EVENT_DONE ||
+			katom->will_fail_event_code))
+		return false;
+
+	return true;
+}
+
+static void jd_try_submitting_deps(struct list_head *out_list,
+		struct kbase_jd_atom *node)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *pos;
+
+		list_for_each(pos, &node->dep_head[i]) {
+			struct kbase_jd_atom *dep_atom = list_entry(pos,
+					struct kbase_jd_atom, dep_item[i]);
+
+			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
+				/*Check if atom deps look sane*/
+				bool dep0_valid = is_dep_valid(
+						dep_atom->dep[0].atom);
+				bool dep1_valid = is_dep_valid(
+						dep_atom->dep[1].atom);
+				bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+				int dep_count;
+
+				dep_count = kbase_fence_dep_count_read(
+								dep_atom);
+				if (likely(dep_count == -1)) {
+					dep_satisfied = true;
+				} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+					dep_satisfied = false;
+				}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+				if (dep0_valid && dep1_valid && dep_satisfied) {
+					dep_atom->in_jd_list = true;
+					list_add(&dep_atom->jd_item, out_list);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->jd_item, &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item);
+		list_del(completed_jobs.prev);
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+					kbase_ctx_flag(kctx, KCTX_DYING));
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(runnable_jobs.next);
+			node->in_jd_list = false;
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					WARN_ON(!list_empty(&node->queue));
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				list_add_tail(&node->jd_item, &completed_jobs);
+			} else if (node->status == KBASE_JD_ATOM_STATE_IN_JS &&
+					!node->will_fail_event_code) {
+				/* Node successfully submitted, try submitting
+				 * dependencies as they may now be representable
+				 * in JS */
+				jd_try_submitting_deps(&runnable_jobs, node);
+			}
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->jd_item, completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	katom->start_timestamp.tv64 = 0;
+#else
+	katom->start_timestamp = 0;
+#endif
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->jc = user_atom->jc;
+	katom->core_req = user_atom->core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->pre_dep = NULL;
+	katom->post_dep = NULL;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
+	katom->softjob_data = NULL;
+
+	/* Implicitly sets katom->protected_state.enter as well. */
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+	katom->age = kctx->age_count++;
+
+	INIT_LIST_HEAD(&katom->queue);
+	INIT_LIST_HEAD(&katom->jd_item);
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_fence_dep_count_set(katom, -1);
+#endif
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				KBASE_TLSTREAM_TL_NEW_ATOM(
+						kbdev,
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				KBASE_TLSTREAM_TL_RET_ATOM_CTX(
+						kbdev,
+						katom, kctx);
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(
+						kbdev,
+						katom,
+						TL_ATOM_STATE_IDLE);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom will be sent back to user space.
+			 * Do not record any dependencies.
+			 */
+			KBASE_TLSTREAM_TL_NEW_ATOM(
+					kbdev,
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
+			KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom,
+					TL_ATOM_STATE_IDLE);
+
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+				/* This softjob has failed due to a previous
+				 * dependency, however we should still run the
+				 * prepare & finish functions
+				 */
+				int err = kbase_prepare_soft_job(katom);
+
+				if (err >= 0)
+					kbase_finish_soft_job(katom);
+			}
+
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+
+			if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+				/* This softjob has failed due to a previous
+				 * dependency, however we should still run the
+				 * prepare & finish functions
+				 */
+				if (kbase_prepare_soft_job(katom) != 0) {
+					katom->event_code =
+						BASE_JD_EVENT_JOB_INVALID;
+					ret = jd_done_nolock(katom, NULL);
+					goto out;
+				}
+			}
+
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	/* Create a new atom. */
+	KBASE_TLSTREAM_TL_NEW_ATOM(
+			kbdev,
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority);
+	KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject soft-job atom of certain types from accessing external resources */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) ||
+			 ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) ||
+			 ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting soft-job atom accessing external resources");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue(kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (kbase_fence_dep_count_read(katom) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+int kbase_jd_submit(struct kbase_context *kctx,
+		void __user *user_addr, u32 nr_atoms, u32 stride,
+		bool uk6_atom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the
+	 * jobs are reported by immediately failing them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+	if (stride != sizeof(base_jd_atom_v2)) {
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+		if (copy_from_user(&user_atom, user_addr,
+					sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			break;
+		}
+
+		user_addr = (void __user *)((uintptr_t) user_addr + stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_DONE);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if ((katom->event_code != BASE_JD_EVENT_DONE) &&
+			(!kbase_ctx_flag(katom->kctx, KCTX_DYING)))
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		context_idle = false;
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * KCTX_ACTIVE will have been set after we marked it as
+		 * inactive, and another pm reference will have been taken, so
+		 * drop our reference. But do not call kbase_jm_idle_ctx(), as
+		 * the context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but KCTX_ACTIVE will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then KCTX_ACTIVE will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * hwaccess_lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
+		    !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * hwaccess_lock. This is not performed if
+			 * KCTX_ACTIVE is set as in that case another pm
+			 * reference has been taken and a fast-start would be
+			 * valid.
+			 */
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE))
+				kbase_jm_idle_ctx(kbdev, kctx);
+			context_idle = true;
+		} else {
+			kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req);
+
+	if (context_idle)
+		kbase_pm_context_idle(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbdev->hwaccess_lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	del_timer_sync(&kctx->soft_job_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, queue);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+	kbase_debug_job_fault_kctx_unblock(kctx);
+#endif
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+		kctx->jctx.atoms[i].dma_fence.context =
+						dma_fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100755
index 0000000..2fc21fd
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,246 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/seq_file.h>
+#include <mali_kbase.h>
+#include <mali_kbase_jd_debugfs.h>
+#include <mali_kbase_dma_fence.h>
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif
+#include <mali_kbase_ioctl.h>
+
+struct kbase_jd_debugfs_depinfo {
+	u8 id;
+	char type;
+};
+
+static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
+					struct seq_file *sfile)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	struct kbase_sync_fence_info info;
+	int res;
+
+	switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		res = kbase_sync_fence_out_info_get(atom, &info);
+		if (0 == res) {
+			seq_printf(sfile, "Sa([%p]%d) ",
+				   info.fence, info.status);
+			break;
+		}
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		res = kbase_sync_fence_in_info_get(atom, &info);
+		if (0 == res) {
+			seq_printf(sfile, "Wa([%p]%d) ",
+				   info.fence, info.status);
+			break;
+		}
+	default:
+		break;
+	}
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		struct kbase_fence_cb *cb;
+
+		if (atom->dma_fence.fence) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+			struct fence *fence = atom->dma_fence.fence;
+#else
+			struct dma_fence *fence = atom->dma_fence.fence;
+#endif
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Sd(%u#%u: %s) ",
+#else
+					"Sd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					dma_fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+
+		list_for_each_entry(cb, &atom->dma_fence.callbacks,
+				    node) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+			struct fence *fence = cb->fence;
+#else
+			struct dma_fence *fence = cb->fence;
+#endif
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Wd(%u#%u: %s) ",
+#else
+					"Wd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					dma_fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+}
+
+static void kbasep_jd_debugfs_atom_deps(
+		struct kbase_jd_debugfs_depinfo *deps,
+		struct kbase_jd_atom *atom)
+{
+	struct kbase_context *kctx = atom->kctx;
+	int i;
+
+	for (i = 0; i < 2; i++)	{
+		deps[i].id = (unsigned)(atom->dep[i].atom ?
+				kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0);
+
+		switch (atom->dep[i].dep_type) {
+		case BASE_JD_DEP_TYPE_INVALID:
+			deps[i].type = ' ';
+			break;
+		case BASE_JD_DEP_TYPE_DATA:
+			deps[i].type = 'D';
+			break;
+		case BASE_JD_DEP_TYPE_ORDER:
+			deps[i].type = '>';
+			break;
+		default:
+			deps[i].type = '?';
+			break;
+		}
+	}
+}
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, " ID, Core req, St, CR,   Predeps,           Start time, Additional info...\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+		struct kbase_jd_debugfs_depinfo deps[2];
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		kbasep_jd_debugfs_atom_deps(deps, atom);
+
+		seq_printf(sfile,
+				"%3u, %8x, %2u, %c%3u %c%3u, %20lld, ",
+				i, atom->core_req, atom->status,
+				deps[0].type, deps[0].id,
+				deps[1].type, deps[1].id,
+				start_timestamp);
+
+
+		kbase_jd_debugfs_fence_info(atom, sfile);
+
+		seq_puts(sfile, "\n");
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.owner = THIS_MODULE,
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
+{
+	/* Caller already ensures this, but we keep the pattern for
+	 * maintenance safety.
+	 */
+	if (WARN_ON(!kctx) ||
+		WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
+		return;
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100755
index 0000000..697bdef
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#define MALI_JD_DEBUGFS_VERSION 3
+
+/* Forward declarations */
+struct kbase_context;
+
+/**
+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100755
index 0000000..da78a16
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx[js];
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == kctx)
+			kbdev->hwaccess.active_kctx[js] = NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		return kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+		return NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100755
index 0000000..c468ea4
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,115 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the hwaccess_lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+			struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100755
index 0000000..e2e1d17
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,2883 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tracepoints.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_ctx_sched.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	return atomic_read(&kctx->refcount);
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the mmu_hw_mutex and hwaccess_lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	as_nr = kctx->as_nr;
+	if (atomic_read(&kctx->refcount) > 0) {
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+
+		kbase_ctx_sched_retain_ctx_refcount(kctx);
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, atomic_read(&kctx->refcount));
+		result = true;
+	}
+
+	return result;
+}
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	return RB_EMPTY_ROOT(&rb->runnable_tree);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a queue and invoke @callback for each entry in the queue, and
+ * remove the entry from the queue.
+ *
+ * If entries are added to the queue while this is running those entries may, or
+ * may not be covered. To ensure that all entries in the buffer have been
+ * enumerated when this function returns jsctx->lock must be held when calling
+ * this function.
+ *
+ * The HW access lock must always be held when calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
+		struct rb_node *node = rb_first(&queue->runnable_tree);
+		struct kbase_jd_atom *entry = rb_entry(node,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		rb_erase(node, &queue->runnable_tree);
+		callback(kctx->kbdev, entry);
+	}
+
+	while (!list_empty(&queue->x_dep_head)) {
+		struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
+				struct kbase_jd_atom, queue);
+
+		list_del(queue->x_dep_head.next);
+
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	struct rb_node *node;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	node = rb_first(&rb->runnable_tree);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
+}
+
+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
+
+static void
+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (*new) {
+		struct kbase_jd_atom *entry = container_of(*new,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		parent = *new;
+		if (LESS_THAN_WRAP(katom->age, entry->age))
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&katom->runnable_tree_node, parent, new);
+	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
+
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_READY);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_tree_add(kctx, katom);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
+		atomic_read(&jsdd->soft_job_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&kbdev->hwaccess_lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) {
+			INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]);
+			INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]);
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	/* The caller must de-register all contexts before calling this
+	 */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+	KBASE_DEBUG_ASSERT(memcmp(
+				  js_devdata->runpool_irq.ctx_attr_ref_count,
+				  zero_ctx_attr_ref_count,
+				  sizeof(zero_ctx_attr_ref_count)) == 0);
+	CSTD_UNUSED(zero_ctx_attr_ref_count);
+}
+
+int kbasep_js_kctx_init(struct kbase_context *const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+	kbase_ctx_flag_clear(kctx, KCTX_DYING);
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
+			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int js;
+	bool update_ctx_count = false;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* The caller must de-register all jobs before calling this */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_nolock - Variant of
+ *                                         kbase_jd_ctx_list_add_pullable()
+ *                                         where the caller must hold
+ *                                         hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+			&kbdev->js_data.ctx_list_pullable[js][kctx->priority]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
+ *                                              kbase_js_ctx_list_add_pullable_head()
+ *                                              where the caller must hold
+ *                                              hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head_nolock(
+		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+			&kbdev->js_data.ctx_list_pullable[js][kctx->priority]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the
+ *                                           per-slot unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+		&kbdev->js_data.ctx_list_unpullable[js][kctx->priority]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable
+ *                                   or unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
+ *                                     where the caller must hold
+ *                                     hwaccess_lock
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i]))
+			continue;
+
+		kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next,
+				struct kbase_context,
+				jctx.sched_info.ctx.ctx_list_entry[js]);
+
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+		return kctx;
+	}
+	return NULL;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+		struct kbase_device *kbdev, int js)
+{
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return false;
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * same-slot dependency */
+				if (dep_atom->post_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+						(js == dep_js)) {
+					katom->pre_dep = dep_atom;
+					dep_atom->post_dep = katom;
+				}
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Move kctx to the pullable/upullable list as per the new priority */
+	if (new_priority != kctx->priority) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kctx->slots_pullable & (1 << js))
+				list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js][new_priority]);
+			else
+				list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_unpullable[js][new_priority]);
+		}
+
+		kctx->priority = new_priority;
+	}
+}
+
+void kbase_js_update_ctx_priority(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW;
+	int prio;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) {
+		/* Determine the new priority for context, as per the priority
+		 * of currently in-use atoms.
+		 */
+		for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+			if (kctx->atoms_count[prio]) {
+				new_priority = prio;
+				break;
+			}
+		}
+	}
+
+	kbase_js_set_ctx_priority(kctx, new_priority);
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (++kctx->atoms_count[atom->sched_priority] == 1)
+		kbase_js_update_ctx_priority(kctx);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		/* Undo the count, as the atom will get added again later but
+		 * leave the context priority adjusted or boosted, in case if
+		 * this was the first higher priority atom received for this
+		 * context.
+		 * This will prevent the scenario of priority inversion, where
+		 * another context having medium priority atoms keeps getting
+		 * scheduled over this context, which is having both lower and
+		 * higher priority atoms, but higher priority atoms are blocked
+		 * due to dependency on lower priority atoms. With priority
+		 * boost the high priority atom will get to run at earliest.
+		 */
+		kctx->atoms_count[atom->sched_priority]--;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx ==
+			kbdev->hwaccess.active_kctx[atom->slot_nr])
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context. Kill that job
+			 * by killing the context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Queue */
+			KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (--kctx->atoms_count[atom->sched_priority] == 0)
+		kbase_js_update_ctx_priority(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbase_context *found_kctx = NULL;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = kbdev->as_to_kctx[as_nr];
+
+	if (found_kctx != NULL)
+		kbase_ctx_sched_retain_ctx_refcount(found_kctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after
+ *                           releasing a context and/or atom
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst hwaccess_lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) {
+		/* A change in runpool ctx attributes might mean we can
+		 * run more jobs than before  */
+		result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+		KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+					kctx, NULL, 0u, 0);
+	}
+	return result;
+}
+
+/**
+ * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference
+ *                                          on a ctx and an atom's "retained state", only
+ *                                          taking the runpool and as transaction mutexes
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Has following requirements
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ *
+ * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating
+ *         the result of releasing a context that whether the caller should try
+ *         scheduling a new context or should try scheduling all contexts.
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	mutex_lock(&kbdev->pm.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/* Update refcount */
+	kbase_ctx_sched_release_ctx(kctx);
+	new_ref_count = atomic_read(&kctx->refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out.
+	 * Note that there'll always be at least 1 reference to the context
+	 * which was previously acquired by kbasep_js_schedule_ctx(). */
+	if (new_ref_count == 1 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		int num_slots = kbdev->gpu_props.num_job_slots;
+		int slot;
+
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+		KBASE_TLSTREAM_TL_NRET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		for (slot = 0; slot < num_slots; slot++) {
+			if (kbdev->hwaccess.active_kctx[slot] == kctx)
+				kbdev->hwaccess.active_kctx[slot] = NULL;
+		}
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after the KCTX_SHEDULED flag is changed, otherwise we
+		 * double-decount the attributes
+		 */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+
+		/* Recalculate pullable status for all slots */
+		for (slot = 0; slot < num_slots; slot++) {
+			if (kbase_js_ctx_pullable(kctx, slot, false))
+				kbase_js_ctx_list_add_pullable_nolock(kbdev,
+						kctx, slot);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED |
+			KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_backend_timeouts_changed(kbdev);
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kbase_ctx_sched_retain_ctx(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		as_nr = kbase_backend_find_and_release_free_address_space(
+				kbdev, kctx);
+		if (as_nr != KBASEP_AS_NR_INVALID) {
+			/* Attempt to retain the context again, this should
+			 * succeed */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			as_nr = kbase_ctx_sched_retain_ctx(kctx);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+
+			WARN_ON(as_nr == KBASEP_AS_NR_INVALID);
+		}
+	}
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Roll back the transaction so far and return */
+		kbase_ctx_sched_release_ctx(kctx);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	kbase_ctx_flag_set(kctx, KCTX_SCHEDULED);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		/* Roll back the transaction so far and return */
+		kbase_ctx_sched_release_ctx(kctx);
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx[js] = kctx;
+
+	KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the hwaccess_lock locking. If a suspend occurs *after* that lock
+	 * was taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	kbase_ctx_flag_clear(kctx, KCTX_PULLED_SINCE_ACTIVE_JS0 << js);
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Synchronize with any timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int js)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+			kbase_backend_use_ctx_sched(kbdev, kctx, js)) {
+		/* Context already has ASID - mark as active */
+		if (kbdev->hwaccess.active_kctx[js] != kctx) {
+			kbdev->hwaccess.active_kctx[js] = kctx;
+			kbase_ctx_flag_clear(kctx,
+					KCTX_PULLED_SINCE_ACTIVE_JS0 << js);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return true; /* Context already scheduled */
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	return kbasep_js_schedule_ctx(kbdev, kctx, js);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED);
+
+	is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED);
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			   kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		WARN_ON(!kbasep_js_runpool_retain_ctx(kbdev, kctx));
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbase_context *kctx = kbdev->as_to_kctx[i];
+
+		retained = retained << 1;
+
+		if (kctx && !(kbdev->as_free & (1u << i))) {
+			kbase_ctx_sched_retain_ctx_refcount(kctx);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+				++nr_privileged_ctx;
+				WARN_ON(nr_privileged_ctx != 1);
+			}
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbase_context *kctx = kbdev->as_to_kctx[i];
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js, prio;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+			struct kbase_context *kctx, *n;
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			list_for_each_entry_safe(kctx, n,
+				 &kbdev->js_data.ctx_list_unpullable[js][prio],
+				 jctx.sched_info.ctx.ctx_list_entry[js]) {
+				struct kbasep_js_kctx_info *js_kctx_info;
+				bool timer_sync = false;
+
+				/* Drop lock so we can take kctx mutexes */
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				js_kctx_info = &kctx->jctx.sched_info;
+
+				mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+				mutex_lock(&js_devdata->runpool_mutex);
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+				if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+					kbase_js_ctx_pullable(kctx, js, false))
+					timer_sync =
+						kbase_js_ctx_list_add_pullable_nolock(
+								kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+				if (timer_sync)
+					kbase_backend_ctx_count_changed(kbdev);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				/* Take lock before accessing list again */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			}
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
+			(katom->pre_dep && (katom->pre_dep->atom_flags &
+			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		int prio = katom->sched_priority;
+		int js = katom->slot_nr;
+		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+		list_add_tail(&katom->queue, &queue->x_dep_head);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+		enqueue_required = false;
+	} else {
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		/* Add atom to ring buffer. */
+		jsctx_tree_add(kctx, katom);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+	}
+
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the
+ *                         runnable_tree, ready for execution
+ * @katom: Atom to submit
+ *
+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set,
+ * but is still present in the x_dep list. If @katom has a same-slot dependent
+ * atom then that atom (and any dependents) will also be moved.
+ */
+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
+
+	while (katom) {
+		WARN_ON(!(katom->atom_flags &
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			list_del(&katom->queue);
+			katom->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+		} else {
+			break;
+		}
+
+		katom = katom->post_dep;
+	}
+}
+
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = katom->post_dep;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (next_katom) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE |
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
+			kbase_js_move_to_tree(x_dep);
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return NULL;
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kbdev, js))
+			return NULL;
+	}
+
+	kbase_ctx_flag_set(kctx, KCTX_PULLED);
+	kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js));
+
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+		kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+		atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++;
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	int prio = katom->sched_priority;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--;
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
+
+	/* If this slot has been blocked due to soft-stopped atoms, and all
+	 * atoms have now been processed, then unblock the slot */
+	if (!kctx->atoms_pulled_slot_pri[js][prio] &&
+			kctx->blocked_js[js][prio]) {
+		kctx->blocked_js[js][prio] = false;
+
+		/* Only mark the slot as pullable if the context is not idle -
+		 * that case is handled below */
+		if (atomic_read(&kctx->atoms_pulled) &&
+				kbase_js_ctx_pullable(kctx, js, true))
+			timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, js);
+	}
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!kbase_ctx_flag(kctx, KCTX_DYING)) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+	int prio = katom->sched_priority;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+		kctx->atoms_pulled_slot_pri[atom_slot][prio]--;
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		/* If this slot has been blocked due to soft-stopped atoms, and
+		 * all atoms have now been processed, then unblock the slot */
+		if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
+				&& kctx->blocked_js[atom_slot][prio]) {
+			kctx->blocked_js[atom_slot][prio] = false;
+			if (kbase_js_ctx_pullable(kctx, atom_slot, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+						kbdev, kctx, atom_slot);
+		}
+	}
+	WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot)) {
+		if (!list_empty(
+			&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
+			timer_sync |= kbase_js_ctx_list_remove_nolock(
+					kctx->kbdev, kctx, atom_slot);
+	}
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp)
+{
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+
+	kbdev = kctx->kbdev;
+
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+	KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL,
+		katom->slot_nr, 0, TL_JS_EVENT_STOP);
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+			(x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false);
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+		kbase_js_move_to_tree(x_dep);
+		if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false))
+			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+					x_dep->slot_nr);
+
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)
+			return x_dep;
+	}
+
+	return NULL;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS];
+	bool timer_sync = false;
+	bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		last_active[js] = kbdev->hwaccess.active_kctx[js];
+		ctx_waiting[js] = false;
+	}
+
+	while (js_mask) {
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx, js)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				if (kbase_js_ctx_pullable(kctx, js, false)
+				    || kbase_ctx_flag(kctx, KCTX_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_ctx_flag_clear(kctx, KCTX_PULLED);
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
+				bool pullable = kbase_js_ctx_pullable(kctx, js,
+						true);
+
+				/* Failed to pull jobs - push to head of list.
+				 * Unless this context is already 'active', in
+				 * which case it's effectively already scheduled
+				 * so push it to the back of the list. */
+				if (pullable && kctx == last_active[js] &&
+						kbase_ctx_flag(kctx,
+						(KCTX_PULLED_SINCE_ACTIVE_JS0 <<
+						js)))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else if (pullable)
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+								kctx->kbdev,
+								kctx, js);
+
+				/* If this context is not the active context,
+				 * but the active context is pullable on this
+				 * slot, then we need to remove the active
+				 * marker to prevent it from submitting atoms in
+				 * the IRQ handler, which would prevent this
+				 * context from making progress. */
+				if (last_active[js] && kctx != last_active[js]
+						&& kbase_js_ctx_pullable(
+						last_active[js], js, true))
+					ctx_waiting[js] = true;
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == last_active[js] &&
+				ctx_waiting[js])
+			kbdev->hwaccess.active_kctx[js] = NULL;
+	}
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the queue */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_set(kctx, KCTX_DYING);
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	return atomic_read(&kctx->refcount);
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100755
index 0000000..355da27
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,912 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_context.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase.
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold hwaccess_lock (as this will be obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the hwaccess_lock, (as this will be obtained
+ *   internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be
+ *   used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - mmu_hw_mutex, hwaccess_lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *   If the hwaccess_lock is already held, then the caller should use
+ *   kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - hwaccess_lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time.
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
+/**
+ * kbase_js_set_ctx_priority - set the context priority
+ * @kctx: Context pointer
+ * @new_priority: New priority value for the Context
+ *
+ * The context priority is set to a new value and it is moved to the
+ * pullable/unpullable list as per the new priority.
+ */
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority);
+
+
+/**
+ * kbase_js_update_ctx_priority - update the context priority
+ * @kctx: Context pointer
+ *
+ * The context priority gets updated as per the priority of atoms currently in
+ * use for that context, but only if system priority mode for context scheduling
+ * is being used.
+ */
+void kbase_js_update_ctx_priority(struct kbase_context *kctx);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)",
+			kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)",
+			kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guaranteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guaranteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_context *found_kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	found_kctx = kbdev->as_to_kctx[as_nr];
+	KBASE_DEBUG_ASSERT(found_kctx == NULL ||
+			atomic_read(&found_kctx->refcount) > 0);
+
+	return found_kctx;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100755
index 0000000..1ff230c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,283 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->hwaccess_lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100755
index 0000000..25fd397
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100755
index 0000000..052a0b3
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,416 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/*
+ * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode
+ */
+enum {
+	/*
+	 * In this mode, higher priority atoms will be scheduled first,
+	 * regardless of the context they belong to. Newly-runnable higher
+	 * priority atoms can preempt lower priority atoms currently running on
+	 * the GPU, even if they belong to a different context.
+	 */
+	KBASE_JS_SYSTEM_PRIORITY_MODE = 0,
+
+	/*
+	 * In this mode, the highest-priority atom will be chosen from each
+	 * context in turn using a round-robin algorithm, so priority only has
+	 * an effect within the context an atom belongs to. Newly-runnable
+	 * higher priority atoms can preempt the lower priority atoms currently
+	 * running on the GPU, but only if they belong to the same context.
+	 */
+	KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE,
+
+	/* Must be the last in the enum */
+	KBASE_JS_PRIORITY_MODE_COUNT,
+};
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace.
+ */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/* Sub-structure to collect together Job Scheduling data used in IRQ
+	 * context. The hwaccess_lock must be held when accessing. */
+	struct runpool_irq {
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' is allowed to submit jobs.
+		 */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+
+	/**< Value for JS_SOFT_JOB_TIMEOUT */
+	atomic_t soft_job_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/**
+		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100755
index 0000000..003ac9e
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,48 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100755
index 0000000..f55d8ea
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,3914 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+#include <linux/log2.h>
+#ifdef CONFIG_OF
+#include <linux/of_platform.h>
+#endif
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_tracepoints.h>
+#include <mali_kbase_native_mgm.h>
+#include <mali_kbase_mem_pool_group.h>
+
+
+/* Forward declarations */
+static void free_partial_locked(struct kbase_context *kctx,
+		struct kbase_mem_pool *pool, struct tagged_addr tp);
+
+static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
+{
+#if defined(CONFIG_ARM64)
+	/* VA_BITS can be as high as 48 bits, but all bits are available for
+	 * both user and kernel.
+	 */
+	size_t cpu_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	/* x86_64 can access 48 bits of VA, but the 48th is used to denote
+	 * kernel (1) vs userspace (0), so the max here is 47.
+	 */
+	size_t cpu_va_bits = 47;
+#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32)
+	size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE;
+#else
+#error "Unknown CPU VA width for this architecture"
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		cpu_va_bits = 32;
+#endif
+
+	return cpu_va_bits;
+}
+
+/* This function finds out which RB tree the given pfn from the GPU VA belongs
+ * to based on the memory zone the pfn refers to */
+static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
+								    u64 gpu_pfn)
+{
+	struct rb_root *rbtree = NULL;
+
+	/* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA
+	 * zone if this has been initialized.
+	 */
+	if (gpu_pfn >= kctx->exec_va_start)
+		rbtree = &kctx->reg_rbtree_exec;
+	else {
+		u64 same_va_end;
+
+#ifdef CONFIG_64BIT
+		if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+#endif /* CONFIG_64BIT */
+			same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
+#ifdef CONFIG_64BIT
+		else
+			same_va_end = kctx->same_va_end;
+#endif /* CONFIG_64BIT */
+
+		if (gpu_pfn >= same_va_end)
+			rbtree = &kctx->reg_rbtree_custom;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+	}
+
+	return rbtree;
+}
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = NULL;
+	struct rb_node *parent = NULL;
+	struct rb_root *rbtree = NULL;
+
+	rbtree = new_reg->rbtree;
+
+	link = &(rbtree->rb_node);
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+
+	rb_insert_color(&(new_reg->rblink), rbtree);
+}
+
+static struct kbase_va_region *find_region_enclosing_range_rbtree(
+		struct rb_root *rbtree, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+struct kbase_va_region *kbase_find_region_enclosing_address(
+		struct rb_root *rbtree, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_root *rbtree = NULL;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	return kbase_find_region_enclosing_address(rbtree, gpu_addr);
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+struct kbase_va_region *kbase_find_region_base_address(
+		struct rb_root *rbtree, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_root *rbtree = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	return kbase_find_region_base_address(rbtree, gpu_addr);
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
+		struct kbase_va_region *reg_reqs,
+		size_t nr_pages, size_t align_offset, size_t align_mask,
+		u64 *out_start_pfn)
+{
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+	rbtree = reg_reqs->rbtree;
+
+	for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE)) {
+			/* Check alignment */
+			u64 start_pfn = reg->start_pfn;
+
+			/* When align_offset == align, this sequence is
+			 * equivalent to:
+			 *   (start_pfn + align_mask) & ~(align_mask)
+			 *
+			 * Otherwise, it aligns to n*align + offset, for the
+			 * lowest value n that makes this still >start_pfn */
+			start_pfn += align_mask;
+			start_pfn -= (start_pfn - align_offset) & (align_mask);
+
+			if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) {
+				/* Can't end at 4GB boundary */
+				if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB))
+					start_pfn += align_offset;
+
+				/* Can't start at 4GB boundary */
+				if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB))
+					start_pfn += align_offset;
+
+				if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) ||
+				    !(start_pfn & BASE_MEM_PFN_MASK_4GB))
+					continue;
+			} else if (reg_reqs->flags &
+					KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
+				u64 end_pfn = start_pfn + nr_pages - 1;
+
+				if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) !=
+				    (end_pfn & ~BASE_MEM_PFN_MASK_4GB))
+					start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB;
+			}
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) {
+				*out_start_pfn = start_pfn;
+				return reg;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+int kbase_remove_va_region(struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+	struct rb_root *reg_rbtree = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	reg_rbtree = reg->rbtree;
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if (prev->flags & KBASE_REG_FREE) {
+			/* We're compatible with the previous VMA,
+			 * merge with it */
+			WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if (next->flags & KBASE_REG_FREE) {
+			WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kfree(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(reg_rbtree,
+				reg->start_pfn, reg->nr_pages,
+				reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * kbase_insert_va_region_nolock - Insert a VA region to the list,
+ * replacing the existing one.
+ *
+ * @new_reg: The new region to insert
+ * @at_reg: The region to replace
+ * @start_pfn: The Page Frame Number to insert at
+ * @nr_pages: The number of pages of the region
+ */
+static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
+		struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_root *reg_rbtree = NULL;
+	int err = 0;
+
+	reg_rbtree = at_reg->rbtree;
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink),
+								reg_rbtree);
+		kfree(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(reg_rbtree,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(new_front_reg);
+			kbase_region_tracker_insert(new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * kbase_add_va_region - Add a VA region to the region list for a context.
+ *
+ * @kctx: kbase context containing the region
+ * @reg: the region to add
+ * @addr: the address to insert the region at
+ * @nr_pages: the number of pages in the region
+ * @align: the minimum alignment in pages
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	int err = 0;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx);
+	int gpu_pc_bits =
+		kbdev->gpu_props.props.core_props.log2_program_counter_size;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* The executable allocation from the SAME_VA zone would already have an
+	 * appropriately aligned GPU VA chosen for it.
+	 * Also the executable allocation from EXEC_VA zone doesn't need the
+	 * special alignment.
+	 */
+	if (!(reg->flags & KBASE_REG_GPU_NX) && !addr &&
+	    ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) {
+		if (cpu_va_bits > gpu_pc_bits) {
+			align = max(align, (size_t)((1ULL << gpu_pc_bits)
+						>> PAGE_SHIFT));
+		}
+	}
+
+	do {
+		err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages,
+				align);
+		if (err != -ENOMEM)
+			break;
+
+		/*
+		 * If the allocation is not from the same zone as JIT
+		 * then don't retry, we're out of VA and there is
+		 * nothing which can be done about it.
+		 */
+		if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+				KBASE_REG_ZONE_CUSTOM_VA)
+			break;
+	} while (kbase_jit_evict(kctx));
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree
+ *
+ * Insert a region into the rbtree that was specified when the region was
+ * created. If addr is 0 a free area in the rbtree is used, otherwise the
+ * specified address is used.
+ *
+ * @kbdev: The kbase device
+ * @reg: The region to add
+ * @addr: The address to add the region at, or 0 to map at any available address
+ * @nr_pages: The size of the region in pages
+ * @align: The minimum alignment in pages
+ */
+int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
+		struct kbase_va_region *reg,
+		u64 addr, size_t nr_pages, size_t align)
+{
+	struct device *const dev = kbdev->dev;
+	struct rb_root *rbtree = NULL;
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	rbtree = reg->rbtree;
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT(is_power_of_2(align));
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region,
+	 * which *must* be free.
+	 */
+	if (gpu_pfn) {
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn,
+				nr_pages);
+		if (kbase_is_region_invalid(tmp)) {
+			dev_warn(dev, "Enclosing region not found or invalid: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		} else if (!kbase_is_region_free(tmp)) {
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n",
+					tmp->start_pfn, tmp->flags,
+					tmp->nr_pages, gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
+				nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+		}
+	} else {
+		/* Path 2: Map any free address which meets the requirements. */
+		u64 start_pfn;
+		size_t align_offset = align;
+		size_t align_mask = align - 1;
+
+		if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) {
+			WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
+					__func__,
+					(unsigned long)align);
+			align_mask  = reg->extent - 1;
+			align_offset = reg->extent - reg->initial_commit;
+		}
+
+		tmp = kbase_region_tracker_find_region_meeting_reqs(reg,
+				nr_pages, align_offset, align_mask,
+				&start_pfn);
+		if (tmp) {
+			err = kbase_insert_va_region_nolock(reg, tmp,
+							start_pfn, nr_pages);
+			if (unlikely(err)) {
+				dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages",
+					start_pfn, nr_pages);
+			}
+		} else {
+			dev_dbg(dev, "Failed to find a suitable region: %zu nr_pages, %zu align_offset, %zu align_mask\n",
+				nr_pages, align_offset, align_mask);
+			err = -ENOMEM;
+		}
+	}
+
+exit:
+	return err;
+}
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree_same = RB_ROOT;
+	kbase_region_tracker_insert(same_va_reg);
+
+	/* Although custom_va_reg and exec_va_reg don't always exist,
+	 * initialize unconditionally because of the mem_view debugfs
+	 * implementation which relies on them being empty.
+	 *
+	 * The difference between the two is that the EXEC_VA region
+	 * is never initialized at this stage.
+	 */
+	kctx->reg_rbtree_custom = RB_ROOT;
+	kctx->reg_rbtree_exec = RB_ROOT;
+
+	if (custom_va_reg)
+		kbase_region_tracker_insert(custom_va_reg);
+}
+
+static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(rbtree);
+		if (rbnode) {
+			rb_erase(rbnode, rbtree);
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			WARN_ON(reg->va_refcnt != 1);
+			/* Reset the start_pfn - as the rbtree is being
+			 * destroyed and we've already erased this region, there
+			 * is no further need to attempt to remove it.
+			 * This won't affect the cleanup if the region was
+			 * being used as a sticky resource as the cleanup
+			 * related to sticky resources anyways need to be
+			 * performed before the term of region tracker.
+			 */
+			reg->start_pfn = 0;
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
+}
+
+void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
+{
+	kbase_region_tracker_erase_rbtree(rbtree);
+}
+
+static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
+{
+	return min(kbase_get_num_cpu_va_bits(kctx),
+			(size_t) kctx->kbdev->gpu_props.mmu.va_bits);
+}
+
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = kbase_get_same_va_bits(kctx);
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have custom VA zones */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		custom_va_reg = kbase_alloc_free_region(
+				&kctx->reg_rbtree_custom,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+#ifdef CONFIG_64BIT
+	} else {
+		custom_va_size = 0;
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+	kctx->gpu_va_end = kctx->same_va_end + custom_va_size;
+	kctx->exec_va_start = U64_MAX;
+	kctx->jit_va = false;
+
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+#ifdef CONFIG_64BIT
+static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
+		u64 jit_va_pages)
+{
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* First verify that a JIT_VA zone has not been created already. */
+	if (kctx->jit_va)
+		return -EINVAL;
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va)
+		return -ENOMEM;
+
+	if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages)
+		return -ENOMEM;
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+
+	/*
+	 * The context will be destroyed if we fail here so no point
+	 * reverting the change we made to same_va.
+	 */
+	if (!custom_va_reg)
+		return -ENOMEM;
+
+	kbase_region_tracker_insert(custom_va_reg);
+	return 0;
+}
+#endif
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+		u8 max_allocations, u8 trim_level, int group_id)
+{
+	int err = 0;
+
+	if (trim_level > 100)
+		return -EINVAL;
+
+	if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
+		WARN_ON(group_id < 0))
+		return -EINVAL;
+
+	kbase_gpu_vm_lock(kctx);
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
+#endif
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+
+	if (!err) {
+		kctx->jit_max_allocations = max_allocations;
+		kctx->trim_level = trim_level;
+		kctx->jit_va = true;
+		kctx->jit_group_id = group_id;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return err;
+}
+
+int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
+{
+	struct kbase_va_region *shrinking_va_reg;
+	struct kbase_va_region *exec_va_reg;
+	u64 exec_va_start, exec_va_base_addr;
+	int err;
+
+	/* The EXEC_VA zone shall be created by making space at the end of the
+	 * address space. Firstly, verify that the number of EXEC_VA pages
+	 * requested by the client is reasonable and then make sure that it is
+	 * not greater than the address space itself before calculating the base
+	 * address of the new zone.
+	 */
+	if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES)
+		return -EINVAL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* First verify that a JIT_VA zone has not been created already. */
+	if (kctx->jit_va) {
+		err = -EPERM;
+		goto exit_unlock;
+	}
+
+	if (exec_va_pages > kctx->gpu_va_end) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
+	exec_va_start = kctx->gpu_va_end - exec_va_pages;
+	exec_va_base_addr = exec_va_start << PAGE_SHIFT;
+
+	shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			exec_va_base_addr);
+	if (!shrinking_va_reg) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
+	/* Make sure that the EXEC_VA region is still uninitialized */
+	if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) ==
+			KBASE_REG_ZONE_EXEC_VA) {
+		err = -EPERM;
+		goto exit_unlock;
+	}
+
+	if (shrinking_va_reg->nr_pages <= exec_va_pages) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
+	exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
+			exec_va_start,
+			exec_va_pages,
+			KBASE_REG_ZONE_EXEC_VA);
+	if (!exec_va_reg) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
+	shrinking_va_reg->nr_pages -= exec_va_pages;
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		kctx->same_va_end -= exec_va_pages;
+#endif
+	kctx->exec_va_start = exec_va_start;
+
+	kbase_region_tracker_insert(exec_va_reg);
+	err = 0;
+
+exit_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	int err = 0;
+	struct kbasep_mem_device *memdev;
+#ifdef CONFIG_OF
+	struct device_node *mgm_node = NULL;
+#endif
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
+		KBASE_MEM_POOL_MAX_SIZE_KCTX);
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	kbdev->mgm_dev = &kbase_native_mgm_dev;
+
+#ifdef CONFIG_OF
+	/* Check to see whether or not a platform-specific memory group manager
+	 * is configured and available.
+	 */
+	mgm_node = of_parse_phandle(kbdev->dev->of_node,
+		"physical-memory-group-manager", 0);
+	if (!mgm_node) {
+		dev_info(kbdev->dev,
+			"No memory group manager is configured\n");
+	} else {
+		struct platform_device *const pdev =
+			of_find_device_by_node(mgm_node);
+
+		if (!pdev) {
+			dev_err(kbdev->dev,
+				"The configured memory group manager was not found\n");
+		} else {
+			kbdev->mgm_dev = platform_get_drvdata(pdev);
+			if (!kbdev->mgm_dev) {
+				dev_info(kbdev->dev,
+					"Memory group manager is not ready\n");
+				err = -EPROBE_DEFER;
+			} else if (!try_module_get(kbdev->mgm_dev->owner)) {
+				dev_err(kbdev->dev,
+					"Failed to get memory group manger module\n");
+				err = -ENODEV;
+				kbdev->mgm_dev = NULL;
+			}
+		}
+		of_node_put(mgm_node);
+	}
+#endif
+
+	if (likely(!err)) {
+		struct kbase_mem_pool_group_config mem_pool_defaults;
+
+		kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV);
+
+		err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev,
+			&mem_pool_defaults, NULL);
+	}
+
+	return err;
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_group_term(&kbdev->mem_pools);
+
+	if (kbdev->mgm_dev)
+		module_put(kbdev->mgm_dev->owner);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
+		u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(rbtree != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->va_refcnt = 1;
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->rbtree = rbtree;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	INIT_LIST_HEAD(&new_reg->jit_node);
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+static struct kbase_context *kbase_reg_flags_to_kctx(
+		struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = NULL;
+	struct rb_root *rbtree = reg->rbtree;
+
+	switch (reg->flags & KBASE_REG_ZONE_MASK) {
+	case KBASE_REG_ZONE_CUSTOM_VA:
+		kctx = container_of(rbtree, struct kbase_context,
+				reg_rbtree_custom);
+		break;
+	case KBASE_REG_ZONE_SAME_VA:
+		kctx = container_of(rbtree, struct kbase_context,
+				reg_rbtree_same);
+		break;
+	case KBASE_REG_ZONE_EXEC_VA:
+		kctx = container_of(rbtree, struct kbase_context,
+				reg_rbtree_exec);
+		break;
+	default:
+		WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
+		break;
+	}
+
+	return kctx;
+}
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+
+		if (WARN_ON(!kctx))
+			return;
+
+		if (WARN_ON(kbase_is_region_invalid(reg)))
+			return;
+
+
+		mutex_lock(&kctx->jit_evict_lock);
+
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			mutex_unlock(&kctx->jit_evict_lock);
+
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		} else {
+			mutex_unlock(&kctx->jit_evict_lock);
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+
+		reg->flags |= KBASE_REG_VA_FREED;
+		kbase_va_region_alloc_put(kctx, reg);
+	} else {
+		kfree(reg);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+	unsigned long gwt_mask = ~0;
+	int group_id;
+	struct kbase_mem_phy_alloc *alloc;
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	alloc = reg->gpu_alloc;
+	group_id = alloc->group_id;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 const stride = alloc->imported.alias.stride;
+
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx->kbdev,
+						&kctx->mmu,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags & gwt_mask,
+						kctx->as_nr,
+						group_id);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					kctx->aliasing_sink_page,
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask & gwt_mask) | attr,
+					group_id);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx->kbdev,
+				&kctx->mmu,
+				reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags & gwt_mask,
+				kctx->as_nr,
+				group_id);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(alloc);
+	}
+
+	if (reg->flags & KBASE_REG_IMPORT_PAD &&
+	    !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) &&
+	    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM &&
+	    reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+		/* For padded imported dma-buf memory, map the dummy aliasing
+		 * page from the end of the dma-buf pages, to the end of the
+		 * region using a read only mapping.
+		 *
+		 * Only map when it's imported dma-buf memory that is currently
+		 * mapped.
+		 *
+		 * Assume reg->gpu_alloc->nents is the number of actual pages
+		 * in the dma-buf memory.
+		 */
+		err = kbase_mmu_insert_single_page(kctx,
+				reg->start_pfn + reg->gpu_alloc->nents,
+				kctx->aliasing_sink_page,
+				reg->nr_pages - reg->gpu_alloc->nents,
+				(reg->flags | KBASE_REG_GPU_RD) &
+				~KBASE_REG_GPU_WR,
+				KBASE_MEM_GROUP_SINK);
+		if (err)
+			goto bad_insert;
+	}
+
+	return err;
+
+bad_insert:
+	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+				 reg->start_pfn, reg->nr_pages,
+				 kctx->as_nr);
+
+	if (alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		while (i--)
+			if (alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(alloc->imported.alias.aliased[i].alloc);
+	}
+
+	kbase_remove_va_region(reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err = 0;
+	size_t i;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (!reg->gpu_alloc)
+		return -EINVAL;
+
+	/* Tear down down GPU page tables, depending on memory type. */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_ALIAS: /* Fall-through */
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+				reg->start_pfn, reg->nr_pages, kctx->as_nr);
+		break;
+	default:
+		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+			reg->start_pfn, kbase_reg_current_backed_size(reg),
+			kctx->as_nr);
+		break;
+	}
+
+	/* Update tracking, and other cleanup, depending on memory type. */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_ALIAS:
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+		break;
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+			struct kbase_alloc_import_user_buf *user_buf =
+				&reg->gpu_alloc->imported.user_buf;
+
+			if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
+				user_buf->current_mapping_usage_count &=
+					~PINNED_ON_IMPORT;
+
+				/* The allocation could still have active mappings. */
+				if (user_buf->current_mapping_usage_count == 0) {
+					kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
+						(reg->flags & KBASE_REG_GPU_WR));
+				}
+			}
+		}
+		/* Fall-through */
+	default:
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+		break;
+	}
+
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct vm_area_struct *vma;
+	struct kbase_cpu_mapping *map;
+	unsigned long vm_pgoff_in_region;
+	unsigned long vm_off_in_region;
+	unsigned long map_start;
+	size_t map_size;
+
+	lockdep_assert_held(&current->mm->mmap_sem);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	vma = find_vma_intersection(current->mm, uaddr, uaddr+size);
+
+	if (!vma || vma->vm_start > uaddr)
+		return NULL;
+	if (vma->vm_ops != &kbase_vm_ops)
+		/* Not ours! */
+		return NULL;
+
+	map = vma->vm_private_data;
+
+	if (map->kctx != kctx)
+		/* Not from this context! */
+		return NULL;
+
+	vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn;
+	vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT;
+	map_start = vma->vm_start - vm_off_in_region;
+	map_size = map->region->nr_pages << PAGE_SHIFT;
+
+	if ((uaddr + size) > (map_start + map_size))
+		/* Not within the CPU mapping */
+		return NULL;
+
+	*offset = (uaddr - vma->vm_start) + vm_off_in_region;
+
+	return map;
+}
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct kbase_cpu_mapping *map;
+
+	kbase_os_mem_map_lock(kctx);
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset);
+
+	kbase_os_mem_map_unlock(kctx);
+
+	if (!map)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx,
+		u64 gpu_addr, size_t size, u64 *start, u64 *offset)
+{
+	struct kbase_va_region *region;
+
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+
+	if (!region) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EINVAL;
+	}
+
+	*start = region->start_pfn << PAGE_SHIFT;
+
+	*offset = gpu_addr - *start;
+
+	if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+	phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa);
+	phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa);
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct basep_syncset *sset, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	struct tagged_addr *cpu_pa;
+	struct tagged_addr *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	u64 offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (kbase_is_region_invalid_or_free(reg)) {
+		dev_warn(kctx->kbdev->dev, "Can't find a valid region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/*
+	 * Handle imported memory before checking for KBASE_REG_CPU_CACHED. The
+	 * CPU mapping cacheability is defined by the owner of the imported
+	 * memory, and not by kbase, therefore we must assume that any imported
+	 * memory may be cached.
+	 */
+	if (kbase_mem_is_imported(reg->gpu_alloc->type)) {
+		err = kbase_mem_do_sync_imported(kctx, reg, sync_fn);
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	page_off = offset >> PAGE_SHIFT;
+	offset &= ~PAGE_MASK;
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	if (page_off > reg->nr_pages ||
+			page_off + page_count > reg->nr_pages) {
+		/* Sync overflows the region */
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Sync first page */
+	if (as_phys_addr_t(cpu_pa[page_off])) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!as_phys_addr_t(cpu_pa[page_off + i]))
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 &&
+	    as_phys_addr_t(cpu_pa[page_off + page_count - 1])) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset)
+{
+	int err = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(sset != NULL);
+
+	if (sset->mem_handle.basep.handle & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev,
+				"mem_handle: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (reg->flags & KBASE_REG_NO_USER_FREE) {
+		dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
+		dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid");
+		return -EINVAL;
+	}
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (kbase_is_region_invalid_or_free(reg)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED &&
+				!(flags & BASE_MEM_UNCACHED_GPU))
+			return -EINVAL;
+	} else if (flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	}
+
+	if (!(reg->flags & KBASE_REG_SHARE_BOTH) &&
+			flags & BASE_MEM_COHERENT_LOCAL) {
+		reg->flags |= KBASE_REG_SHARE_IN;
+	}
+
+	if (flags & BASE_MEM_TILER_ALIGN_TOP)
+		reg->flags |= KBASE_REG_TILER_ALIGN_TOP;
+
+
+	/* Set up default MEMATTR usage */
+	if (!(reg->flags & KBASE_REG_GPU_CACHED)) {
+		if (kctx->kbdev->mmu_mode->flags &
+				KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
+			/* Override shareability, and MEMATTR for uncached */
+			reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+			reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+		} else {
+			dev_warn(kctx->kbdev->dev,
+				"Can't allocate GPU uncached memory due to MMU in Legacy Mode\n");
+			return -EINVAL;
+		}
+	} else if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+
+	if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING)
+		reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
+
+	if (flags & BASEP_MEM_NO_USER_FREE)
+		reg->flags |= KBASE_REG_NO_USER_FREE;
+
+	if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
+		reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
+
+	return 0;
+}
+
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+		size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct tagged_addr *tp;
+
+	if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) ||
+	    WARN_ON(alloc->imported.native.kctx == NULL) ||
+	    WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
+		return -EINVAL;
+	}
+
+	if (alloc->reg) {
+		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+			goto invalid_request;
+	}
+
+	kctx = alloc->imported.native.kctx;
+	kbdev = kctx->kbdev;
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = atomic_add_return(
+		nr_pages_requested, &kctx->used_pages);
+	atomic_add(nr_pages_requested,
+		&kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + alloc->nents;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Check if we have enough pages requested so we can allocate a large
+	 * page (512 * 4KB = 2MB )
+	 */
+	if (nr_left >= (SZ_2M / SZ_4K)) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages(
+			&kctx->mem_pools.large[alloc->group_id],
+			 nr_lp * (SZ_2M / SZ_4K),
+			 tp,
+			 true);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			spin_lock(&kctx->mem_partials_lock);
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(sa->page +
+									   pidx),
+							      FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) {
+						/* unlink from partial list when full */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+			spin_unlock(&kctx->mem_partials_lock);
+		}
+
+		/* only if we actually have a chunk left <512. If more it indicates
+		 * that we couldn't allocate a 2MB above, so no point to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it */
+			struct page *np = NULL;
+
+			do {
+				int err;
+
+				np = kbase_mem_pool_alloc(
+					&kctx->mem_pools.large[
+						alloc->group_id]);
+				if (np)
+					break;
+
+				err = kbase_mem_pool_grow(
+					&kctx->mem_pools.large[alloc->group_id],
+					1);
+				if (err)
+					break;
+			} while (1);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *sa;
+				struct page *p;
+
+				sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+				if (!sa) {
+					kbase_mem_pool_free(
+						&kctx->mem_pools.large[
+							alloc->group_id],
+						np,
+						false);
+					goto no_new_partial;
+				}
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
+
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+
+				/* expose for later use */
+				spin_lock(&kctx->mem_partials_lock);
+				list_add(&sa->link, &kctx->mem_partials);
+				spin_unlock(&kctx->mem_partials_lock);
+			}
+		}
+	}
+no_new_partial:
+#endif
+
+	if (nr_left) {
+		res = kbase_mem_pool_alloc_pages(
+			&kctx->mem_pools.small[alloc->group_id],
+			nr_left, tp, false);
+		if (res <= 0)
+			goto alloc_failed;
+	}
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kbdev,
+			kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested) {
+		size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+		alloc->nents += nr_pages_to_free;
+
+		kbase_process_page_usage_inc(kctx, nr_pages_to_free);
+		atomic_add(nr_pages_to_free, &kctx->used_pages);
+		atomic_add(nr_pages_to_free,
+			&kctx->kbdev->memdev.used_pages);
+
+		kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+	}
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	atomic_sub(nr_pages_requested, &kctx->used_pages);
+	atomic_sub(nr_pages_requested,
+		&kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+	return -ENOMEM;
+}
+
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+		struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+		size_t nr_pages_requested,
+		struct kbase_sub_alloc **prealloc_sa)
+{
+	int new_page_count __maybe_unused;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct tagged_addr *tp;
+	struct tagged_addr *new_pages = NULL;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
+
+	lockdep_assert_held(&pool->pool_lock);
+
+#if !defined(CONFIG_MALI_2MB_ALLOC)
+	WARN_ON(pool->order);
+#endif
+
+	if (alloc->reg) {
+		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+			goto invalid_request;
+	}
+
+	kctx = alloc->imported.native.kctx;
+	kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = atomic_add_return(
+		nr_pages_requested, &kctx->used_pages);
+	atomic_add(nr_pages_requested,
+		&kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer
+	 */
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + alloc->nents;
+	new_pages = tp;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (pool->order) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages_locked(pool,
+						 nr_lp * (SZ_2M / SZ_4K),
+						 tp);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(
+							sa->page + pidx),
+							FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages,
+							SZ_2M / SZ_4K)) {
+						/* unlink from partial list when
+						 * full
+						 */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+		}
+
+		/* only if we actually have a chunk left <512. If more it
+		 * indicates that we couldn't allocate a 2MB above, so no point
+		 * to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it
+			 */
+			struct page *np = NULL;
+
+			np = kbase_mem_pool_alloc_locked(pool);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *const sa = *prealloc_sa;
+				struct page *p;
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
+
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(
+							page_to_phys(np + i),
+							FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+				/* Indicate to user that we'll free this memory
+				 * later.
+				 */
+				*prealloc_sa = NULL;
+
+				/* expose for later use */
+				list_add(&sa->link, &kctx->mem_partials);
+			}
+		}
+		if (nr_left)
+			goto alloc_failed;
+	} else {
+#endif
+		res = kbase_mem_pool_alloc_pages_locked(pool,
+						 nr_left,
+						 tp);
+		if (res <= 0)
+			goto alloc_failed;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kbdev,
+			kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return new_pages;
+
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested) {
+		size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+		struct tagged_addr *start_free = alloc->pages + alloc->nents;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+		if (pool->order) {
+			while (nr_pages_to_free) {
+				if (is_huge_head(*start_free)) {
+					kbase_mem_pool_free_pages_locked(
+						pool, 512,
+						start_free,
+						false, /* not dirty */
+						true); /* return to pool */
+					nr_pages_to_free -= 512;
+					start_free += 512;
+				} else if (is_partial(*start_free)) {
+					free_partial_locked(kctx, pool,
+							*start_free);
+					nr_pages_to_free--;
+					start_free++;
+				}
+			}
+		} else {
+#endif
+			kbase_mem_pool_free_pages_locked(pool,
+					nr_pages_to_free,
+					start_free,
+					false, /* not dirty */
+					true); /* return to pool */
+#ifdef CONFIG_MALI_2MB_ALLOC
+		}
+#endif
+	}
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	atomic_sub(nr_pages_requested, &kctx->used_pages);
+	atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+	return NULL;
+}
+
+static void free_partial(struct kbase_context *kctx, int group_id, struct
+		tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	p = as_page(tp);
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	spin_lock(&kctx->mem_partials_lock);
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free(
+			&kctx->mem_pools.large[group_id],
+			head_page,
+			true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+	spin_unlock(&kctx->mem_partials_lock);
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.native.kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	struct tagged_addr *start_free;
+	int new_page_count __maybe_unused;
+	size_t freed = 0;
+
+	if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) ||
+	    WARN_ON(alloc->imported.native.kctx == NULL) ||
+	    WARN_ON(alloc->nents < nr_pages_to_free) ||
+	    WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
+		return -EINVAL;
+	}
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			kbase_mem_pool_free_pages(
+				&kctx->mem_pools.large[alloc->group_id],
+				512,
+				start_free,
+				syncback,
+				reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			free_partial(kctx, alloc->group_id, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+				!is_huge(*local_end_free) &&
+				!is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages(
+				&kctx->mem_pools.small[alloc->group_id],
+				local_end_free - start_free,
+				start_free,
+				syncback,
+				reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = atomic_sub_return(freed,
+			&kctx->used_pages);
+		atomic_sub(freed,
+			&kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kbdev,
+			kctx->id,
+			(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+static void free_partial_locked(struct kbase_context *kctx,
+		struct kbase_mem_pool *pool, struct tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	lockdep_assert_held(&pool->pool_lock);
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	p = as_page(tp);
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free_locked(pool, head_page, true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+}
+
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+		struct kbase_mem_pool *pool, struct tagged_addr *pages,
+		size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.native.kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	struct tagged_addr *start_free;
+	size_t freed = 0;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	lockdep_assert_held(&pool->pool_lock);
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	/* early out if nothing to do */
+	if (!nr_pages_to_free)
+		return;
+
+	start_free = pages;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			WARN_ON(!pool->order);
+			kbase_mem_pool_free_pages_locked(pool,
+					512,
+					start_free,
+					syncback,
+					reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			WARN_ON(!pool->order);
+			free_partial_locked(kctx, pool, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			WARN_ON(pool->order);
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+			       !is_huge(*local_end_free) &&
+			       !is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages_locked(pool,
+					local_end_free - start_free,
+					start_free,
+					syncback,
+					reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		int new_page_count;
+
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = atomic_sub_return(freed,
+			&kctx->used_pages);
+		atomic_sub(freed,
+			&kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				kbdev,
+				kctx->id,
+				(u64)new_page_count);
+	}
+}
+
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+
+		if (!WARN_ON(!alloc->imported.native.kctx)) {
+			if (alloc->permanent_map)
+				kbase_phy_alloc_mapping_term(
+						alloc->imported.native.kctx,
+						alloc);
+
+			/*
+			 * The physical allocation must have been removed from
+			 * the eviction list before trying to free it.
+			 */
+			mutex_lock(
+				&alloc->imported.native.kctx->jit_evict_lock);
+			WARN_ON(!list_empty(&alloc->evict_node));
+			mutex_unlock(
+				&alloc->imported.native.kctx->jit_evict_lock);
+
+			kbase_process_page_usage_dec(
+					alloc->imported.native.kctx,
+					alloc->imported.native.nr_struct_pages);
+		}
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
+			WARN_ONCE(alloc->imported.umm.current_mapping_usage_count != 1,
+					"WARNING: expected excatly 1 mapping, got %d",
+					alloc->imported.umm.current_mapping_usage_count);
+			dma_buf_unmap_attachment(
+					alloc->imported.umm.dma_attachment,
+					alloc->imported.umm.sgt,
+					DMA_BIDIRECTIONAL);
+		}
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
+		if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+			vfree(alloc->imported.user_buf.pages);
+		else
+			kfree(alloc->imported.user_buf.pages);
+		break;
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU executable memory cannot:
+	 * - Be written by the GPU
+	 * - Be grown on GPU page fault
+	 * - Have the top of its initial commit aligned to 'extent' */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
+			(BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+			BASE_MEM_TILER_ALIGN_TOP)))
+		return false;
+
+	/* To have an allocation lie within a 4GB chunk is required only for
+	 * TLS memory, which will never be used to contain executable code
+	 * and also used for Tiler heap.
+	 */
+	if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags &
+			(BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
+	/* Should not combine BASE_MEM_COHERENT_LOCAL with
+	 * BASE_MEM_COHERENT_SYSTEM */
+	if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) ==
+			(BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM))
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* Imported memory cannot be aligned to the end of its initial commit */
+	if (flags & BASE_MEM_TILER_ALIGN_TOP)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
+		u64 va_pages, u64 commit_pages, u64 large_extent)
+{
+	struct device *dev = kctx->kbdev->dev;
+	int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT;
+	struct kbase_va_region test_reg;
+
+	/* kbase_va_region's extent member can be of variable size, so check against that type */
+	test_reg.extent = large_extent;
+
+#define KBASE_MSG_PRE "GPU allocation attempted with "
+
+	if (0 == va_pages) {
+		dev_warn(dev, KBASE_MSG_PRE "0 va_pages!");
+		return -EINVAL;
+	}
+
+	if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+		dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
+				(unsigned long long)va_pages);
+		return -ENOMEM;
+	}
+
+	/* Note: commit_pages is checked against va_pages during
+	 * kbase_alloc_phy_pages() */
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld",
+				(unsigned long long)va_pages,
+				(unsigned long long)gpu_pc_pages_max);
+
+		return -EINVAL;
+	}
+
+	if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
+			test_reg.extent == 0) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n");
+		return -EINVAL;
+	}
+
+	if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
+			test_reg.extent != 0) {
+		dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n");
+		return -EINVAL;
+	}
+
+	/* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */
+	if (flags & BASE_MEM_TILER_ALIGN_TOP) {
+#define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and "
+		unsigned long small_extent;
+
+		if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) {
+			dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld",
+					(unsigned long long)large_extent,
+					BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES);
+			return -EINVAL;
+		}
+		/* For use with is_power_of_2, which takes unsigned long, so
+		 * must ensure e.g. on 32-bit kernel it'll fit in that type */
+		small_extent = (unsigned long)large_extent;
+
+		if (!is_power_of_2(small_extent)) {
+			dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2",
+					small_extent);
+			return -EINVAL;
+		}
+
+		if (commit_pages > large_extent) {
+			dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld",
+					(unsigned long)commit_pages,
+					(unsigned long)large_extent);
+			return -EINVAL;
+		}
+#undef KBASE_MSG_PRE_FLAG
+	}
+
+	if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) &&
+	    (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space",
+				(unsigned long long)va_pages);
+		return -EINVAL;
+	}
+
+	return 0;
+#undef KBASE_MSG_PRE
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+#ifdef CONFIG_DEBUG_FS
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_init(struct kbase_context *kctx)
+{
+	/* Caller already ensures this, but we keep the pattern for
+	 * maintenance safety.
+	 */
+	if (WARN_ON(!kctx) ||
+		WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
+		return;
+
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_evict_lock);
+		if (list_empty(&kctx->jit_destroy_head)) {
+			mutex_unlock(&kctx->jit_evict_lock);
+			break;
+		}
+
+		reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		list_del(&reg->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+		reg->flags &= ~KBASE_REG_NO_USER_FREE;
+		kbase_mem_free_region(kctx, reg);
+		kbase_gpu_vm_unlock(kctx);
+	} while (1);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->jit_evict_lock);
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	INIT_LIST_HEAD(&kctx->jit_pending_alloc);
+	INIT_LIST_HEAD(&kctx->jit_atoms_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	kctx->jit_max_allocations = 0;
+	kctx->jit_current_allocations = 0;
+	kctx->trim_level = 0;
+
+	return 0;
+}
+
+/* Check if the allocation from JIT pool is of the same size as the new JIT
+ * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets
+ * the alignment requirements.
+ */
+static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx,
+	struct kbase_va_region *walker, struct base_jit_alloc_info *info)
+{
+	bool meet_reqs = true;
+
+	if (walker->nr_pages != info->va_pages)
+		meet_reqs = false;
+	else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+		size_t align = info->extent;
+		size_t align_mask = align - 1;
+
+		if ((walker->start_pfn + info->commit_pages) & align_mask)
+			meet_reqs = false;
+	}
+
+	return meet_reqs;
+}
+
+static int kbase_jit_grow(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info, struct kbase_va_region *reg)
+{
+	size_t delta;
+	size_t pages_required;
+	size_t old_size;
+	struct kbase_mem_pool *pool;
+	int ret = -ENOMEM;
+	struct tagged_addr *gpu_pages;
+	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
+	int i;
+
+	if (info->commit_pages > reg->nr_pages) {
+		/* Attempted to grow larger than maximum size */
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Make the physical backing no longer reclaimable */
+	if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+		goto update_failed;
+
+	if (reg->gpu_alloc->nents >= info->commit_pages)
+		goto done;
+
+	/* Grow the backing */
+	old_size = reg->gpu_alloc->nents;
+
+	/* Allocate some more pages */
+	delta = info->commit_pages - reg->gpu_alloc->nents;
+	pages_required = delta;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Preallocate memory for the sub-allocation structs */
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]),
+				GFP_KERNEL);
+		if (!prealloc_sas[i])
+			goto update_failed;
+	}
+
+	if (pages_required >= (SZ_2M / SZ_4K)) {
+		pool = &kctx->mem_pools.large[kctx->jit_group_id];
+		/* Round up to number of 2 MB pages required */
+		pages_required += ((SZ_2M / SZ_4K) - 1);
+		pages_required /= (SZ_2M / SZ_4K);
+	} else {
+#endif
+		pool = &kctx->mem_pools.small[kctx->jit_group_id];
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		pages_required *= 2;
+
+	spin_lock(&kctx->mem_partials_lock);
+	kbase_mem_pool_lock(pool);
+
+	/* As we can not allocate memory from the kernel with the vm_lock held,
+	 * grow the pool to the required size with the lock dropped. We hold the
+	 * pool lock to prevent another thread from allocating from the pool
+	 * between the grow and allocation.
+	 */
+	while (kbase_mem_pool_size(pool) < pages_required) {
+		int pool_delta = pages_required - kbase_mem_pool_size(pool);
+
+		kbase_mem_pool_unlock(pool);
+		spin_unlock(&kctx->mem_partials_lock);
+		kbase_gpu_vm_unlock(kctx);
+
+		if (kbase_mem_pool_grow(pool, pool_delta))
+			goto update_failed_unlocked;
+
+		kbase_gpu_vm_lock(kctx);
+		spin_lock(&kctx->mem_partials_lock);
+		kbase_mem_pool_lock(pool);
+	}
+
+	gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool,
+			delta, &prealloc_sas[0]);
+	if (!gpu_pages) {
+		kbase_mem_pool_unlock(pool);
+		spin_unlock(&kctx->mem_partials_lock);
+		goto update_failed;
+	}
+
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		struct tagged_addr *cpu_pages;
+
+		cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc,
+				pool, delta, &prealloc_sas[1]);
+		if (!cpu_pages) {
+			kbase_free_phy_pages_helper_locked(reg->gpu_alloc,
+					pool, gpu_pages, delta);
+			kbase_mem_pool_unlock(pool);
+			spin_unlock(&kctx->mem_partials_lock);
+			goto update_failed;
+		}
+	}
+	kbase_mem_pool_unlock(pool);
+	spin_unlock(&kctx->mem_partials_lock);
+
+	ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
+			old_size);
+	/*
+	 * The grow failed so put the allocation back in the
+	 * pool and return failure.
+	 */
+	if (ret)
+		goto update_failed;
+
+done:
+	ret = 0;
+
+	/* Update attributes of JIT allocation taken from the pool */
+	reg->initial_commit = info->commit_pages;
+	reg->extent = info->extent;
+
+update_failed:
+	kbase_gpu_vm_unlock(kctx);
+update_failed_unlocked:
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
+		kfree(prealloc_sas[i]);
+
+	return ret;
+}
+
+static void trace_jit_stats(struct kbase_context *kctx,
+		u32 bin_id, u32 max_allocations)
+{
+	const u32 alloc_count =
+		kctx->jit_current_allocations_per_bin[bin_id];
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	struct kbase_va_region *walker;
+	u32 va_pages = 0;
+	u32 ph_pages = 0;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(walker, &kctx->jit_active_head, jit_node) {
+		if (walker->jit_bin_id != bin_id)
+			continue;
+
+		va_pages += walker->nr_pages;
+		ph_pages += walker->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, kctx->id, bin_id,
+		max_allocations, alloc_count, va_pages, ph_pages);
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+
+	if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
+		/* Too many current allocations */
+		dev_dbg(kctx->kbdev->dev,
+			"Max JIT allocations limit reached: active allocations %d, max allocations %d\n",
+			kctx->jit_current_allocations,
+			kctx->jit_max_allocations);
+		return NULL;
+	}
+	if (info->max_allocations > 0 &&
+			kctx->jit_current_allocations_per_bin[info->bin_id] >=
+			info->max_allocations) {
+		/* Too many current allocations in this bin */
+		dev_dbg(kctx->kbdev->dev,
+			"Per bin limit of max JIT allocations reached: bin_id %d, active allocations %d, max allocations %d\n",
+			info->bin_id,
+			kctx->jit_current_allocations_per_bin[info->bin_id],
+			info->max_allocations);
+		return NULL;
+	}
+
+	mutex_lock(&kctx->jit_evict_lock);
+
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	if (info->usage_id != 0) {
+		/* First scan for an allocation with the same usage ID */
+		struct kbase_va_region *walker;
+		size_t current_diff = SIZE_MAX;
+
+		list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) {
+
+			if (walker->jit_usage_id == info->usage_id &&
+					walker->jit_bin_id == info->bin_id &&
+					meet_size_and_tiler_align_top_requirements(
+							kctx, walker, info)) {
+				size_t min_size, max_size, diff;
+
+				/*
+				 * The JIT allocations VA requirements have been
+				 * met, it's suitable but other allocations
+				 * might be a better fit.
+				 */
+				min_size = min_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				max_size = max_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				diff = max_size - min_size;
+
+				if (current_diff > diff) {
+					current_diff = diff;
+					reg = walker;
+				}
+
+				/* The allocation is an exact match */
+				if (current_diff == 0)
+					break;
+			}
+		}
+	}
+
+	if (!reg) {
+		/* No allocation with the same usage ID, or usage IDs not in
+		 * use. Search for an allocation we can reuse.
+		 */
+		struct kbase_va_region *walker;
+		size_t current_diff = SIZE_MAX;
+
+		list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) {
+
+			if (walker->jit_bin_id == info->bin_id &&
+					meet_size_and_tiler_align_top_requirements(
+							kctx, walker, info)) {
+				size_t min_size, max_size, diff;
+
+				/*
+				 * The JIT allocations VA requirements have been
+				 * met, it's suitable but other allocations
+				 * might be a better fit.
+				 */
+				min_size = min_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				max_size = max_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				diff = max_size - min_size;
+
+				if (current_diff > diff) {
+					current_diff = diff;
+					reg = walker;
+				}
+
+				/* The allocation is an exact match, so stop
+				 * looking.
+				 */
+				if (current_diff == 0)
+					break;
+			}
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_move(&reg->jit_node, &kctx->jit_active_head);
+
+		/*
+		 * Remove the allocation from the eviction list as it's no
+		 * longer eligible for eviction. This must be done before
+		 * dropping the jit_evict_lock
+		 */
+		list_del_init(&reg->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		if (kbase_jit_grow(kctx, info, reg) < 0) {
+			/*
+			 * An update to an allocation from the pool failed,
+			 * chances are slim a new allocation would fair any
+			 * better so return the allocation to the pool and
+			 * return the function with failure.
+			 */
+			dev_dbg(kctx->kbdev->dev,
+				"JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n",
+				info->va_pages, info->commit_pages);
+			goto update_failed_unlocked;
+		}
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL |
+				BASEP_MEM_NO_USER_FREE;
+		u64 gpu_addr;
+
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)
+			flags |= BASE_MEM_TILER_ALIGN_TOP;
+
+		flags |= base_mem_group_id_set(kctx->jit_group_id);
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr);
+		if (!reg) {
+			/* Most likely not enough GPU virtual space left for
+			 * the new JIT allocation.
+			 */
+			dev_dbg(kctx->kbdev->dev,
+				"Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n",
+				info->va_pages, info->commit_pages);
+			goto out_unlocked;
+		}
+
+		mutex_lock(&kctx->jit_evict_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_evict_lock);
+	}
+
+	kctx->jit_current_allocations++;
+	kctx->jit_current_allocations_per_bin[info->bin_id]++;
+
+	trace_jit_stats(kctx, info->bin_id, info->max_allocations);
+
+	reg->jit_usage_id = info->usage_id;
+	reg->jit_bin_id = info->bin_id;
+
+	return reg;
+
+update_failed_unlocked:
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	u64 old_pages;
+
+	/* Get current size of JIT region */
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (reg->initial_commit < old_pages) {
+		/* Free trim_level % of region, but don't go below initial
+		 * commit size
+		 */
+		u64 new_size = MAX(reg->initial_commit,
+			div_u64(old_pages * (100 - kctx->trim_level), 100));
+		u64 delta = old_pages - new_size;
+
+		if (delta) {
+			kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta,
+					old_pages);
+			kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta,
+					old_pages);
+
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+		}
+	}
+
+	kctx->jit_current_allocations--;
+	kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
+
+	trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX);
+
+	kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
+
+	kbase_gpu_vm_lock(kctx);
+	reg->flags |= KBASE_REG_DONT_NEED;
+	kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents);
+	kbase_gpu_vm_unlock(kctx);
+
+	/*
+	 * Add the allocation to the eviction list and the jit pool, after this
+	 * point the shrink can reclaim it, or it may be reused.
+	 */
+	mutex_lock(&kctx->jit_evict_lock);
+
+	/* This allocation can't already be on a list. */
+	WARN_ON(!list_empty(&reg->gpu_alloc->evict_node));
+	list_add(&reg->gpu_alloc->evict_node, &kctx->evict_list);
+
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+
+	mutex_unlock(&kctx->jit_evict_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+
+	if (WARN_ON(!kctx))
+		return;
+
+	lockdep_assert_held(&kctx->jit_evict_lock);
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	list_move(&reg->jit_node, &kctx->jit_destroy_head);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_evict_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+		list_del_init(&reg->gpu_alloc->evict_node);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	if (reg) {
+		reg->flags &= ~KBASE_REG_NO_USER_FREE;
+		kbase_mem_free_region(kctx, reg);
+	}
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	kbase_gpu_vm_lock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		list_del_init(&walker->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		walker->flags &= ~KBASE_REG_NO_USER_FREE;
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		list_del_init(&walker->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		walker->flags &= ~KBASE_REG_NO_USER_FREE;
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_gpu_vm_unlock(kctx);
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+}
+
+bool kbase_has_exec_va_zone(struct kbase_context *kctx)
+{
+	bool has_exec_va_zone;
+
+	kbase_gpu_vm_lock(kctx);
+	has_exec_va_zone = (kctx->exec_va_start != U64_MAX);
+	kbase_gpu_vm_unlock(kctx);
+
+	return has_exec_va_zone;
+}
+
+
+int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+	struct page **pages = alloc->imported.user_buf.pages;
+	unsigned long address = alloc->imported.user_buf.address;
+	struct mm_struct *mm = alloc->imported.user_buf.mm;
+	long pinned_pages;
+	long i;
+
+	if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF))
+		return -EINVAL;
+
+	if (alloc->nents) {
+		if (WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages))
+			return -EINVAL;
+		else
+			return 0;
+	}
+
+	if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm))
+		return -EINVAL;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
+KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#else
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#endif
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#else
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL, NULL);
+#endif
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	alloc->nents = pinned_pages;
+
+	return 0;
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	struct tagged_addr *pa;
+	long i;
+	unsigned long address;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+	unsigned long gwt_mask = ~0;
+	int err = kbase_jd_user_buf_pin_pages(kctx, reg);
+
+	if (err)
+		return err;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	pinned_pages = alloc->nents;
+	pages = alloc->imported.user_buf.pages;
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = as_tagged(page_to_phys(pages[i]));
+
+		local_size -= min;
+		offset = 0;
+	}
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+			pa, kbase_reg_current_backed_size(reg),
+			reg->flags & gwt_mask, kctx->as_nr,
+			alloc->group_id);
+	if (err == 0)
+		return 0;
+
+	/* fall down */
+unwind:
+	alloc->nents = 0;
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	while (++i < pinned_pages) {
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+/* This function would also perform the work of unpinning pages on Job Manager
+ * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT
+ * have a corresponding call to kbase_jd_user_buf_unpin_pages().
+ */
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm)
+{
+	int err;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) &&
+		    (!reg->gpu_alloc->nents))
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		err = kbase_mem_umm_map(kctx, reg);
+		if (err)
+			goto exit;
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		kbase_mem_umm_unmap(kctx, reg, alloc);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (!kbase_is_region_invalid_or_free(reg) &&
+					reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx->kbdev,
+						&kctx->mmu,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg),
+						kctx->as_nr);
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (kbase_is_region_invalid_or_free(reg))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100755
index 0000000..ca5bbee
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1679 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
+		int pages);
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	int      count;
+	int      free_on_close;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1u << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1u << 1)
+
+/* struct kbase_mem_phy_alloc - Physical pages tracking object.
+ *
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc
+ * is not shared with another region or client. CPU mappings are OK to
+ * exist when changing, as long as the tracked mappings objects are
+ * updated as part of the change.
+ *
+ * @kref: number of users of this alloc
+ * @gpu_mappings: count number of times mapped on the GPU
+ * @nents: 0..N
+ * @pages: N elements, only 0..nents are valid
+ * @mappings: List of CPU mappings of this physical memory allocation.
+ * @evict_node: Node used to store this allocation on the eviction list
+ * @evicted: Physical backing size when the pages where evicted
+ * @reg: Back reference to the region structure which created this
+ *       allocation, or NULL if it has been freed.
+ * @type: type of buffer
+ * @permanent_map: Kernel side mapping of the alloc, shall never be
+ *                 referred directly. kbase_phy_alloc_mapping_get() &
+ *                 kbase_phy_alloc_mapping_put() pair should be used
+ *                 around access to the kernel-side CPU mapping so that
+ *                 mapping doesn't disappear whilst it is being accessed.
+ * @properties: Bitmask of properties, e.g. KBASE_MEM_PHY_ALLOC_LARGE.
+ * @group_id: A memory group ID to be passed to a platform-specific
+ *            memory group manager, if present.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @imported: member in union valid based on @a type
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref;
+	atomic_t              gpu_mappings;
+	size_t                nents;
+	struct tagged_addr    *pages;
+	struct list_head      mappings;
+	struct list_head      evict_node;
+	size_t                evicted;
+	struct kbase_va_region *reg;
+	enum kbase_memory_type type;
+	struct kbase_vmap_struct *permanent_map;
+	u8 properties;
+	u8 group_id;
+
+	union {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		struct {
+			struct kbase_context *kctx;
+			/* Number of pages in this structure, including *pages.
+			 * Used for kernel memory tracking.
+			 */
+			size_t nr_struct_pages;
+		} native;
+		struct kbase_alloc_import_user_buf {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			/* top bit (1<<31) of current_mapping_usage_count
+			 * specifies that this import was pinned on import
+			 * See PINNED_ON_IMPORT
+			 */
+			u32 current_mapping_usage_count;
+			struct mm_struct *mm;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is
+ * used to signify that a buffer was pinned when it was imported. Since the
+ * reference count is limited by the number of atoms that can be submitted at
+ * once there should be no danger of overflowing into this bit.
+ * Stealing the top bit also has the benefit that
+ * current_mapping_usage_count != 0 if and only if the buffer is mapped.
+ */
+#define PINNED_ON_IMPORT	(1<<31)
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+/**
+ * kbase_mem_is_imported - Indicate whether a memory type is imported
+ *
+ * @type: the memory type
+ *
+ * Return: true if the memory type is imported, false otherwise
+ */
+static inline bool kbase_mem_is_imported(enum kbase_memory_type type)
+{
+	return (type == KBASE_MEM_TYPE_IMPORTED_UMM) ||
+		(type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct rb_root *rbtree;	/* Backlink to rb tree */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+	/* Initial commit, for aligning the start address and correctly growing
+	 * KBASE_REG_TILER_ALIGN_TOP regions */
+	size_t initial_commit;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached?
+ * Some components within the GPU might only be able to access memory that is
+ * GPU cacheable. Refer to the specific GPU implementation for more details.
+ */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */
+#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+/* Imported buffer is padded? */
+#define KBASE_REG_IMPORT_PAD        (1ul << 21)
+
+/* Bit 22 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags */
+#define KBASE_REG_RESERVED_BIT_22   (1ul << 22)
+
+/* The top of the initial commit is aligned to extent pages.
+ * Extent must be a power of 2 */
+#define KBASE_REG_TILER_ALIGN_TOP   (1ul << 23)
+
+/* Whilst this flag is set the GPU allocation is not supposed to be freed by
+ * user space. The flag will remain set for the lifetime of JIT allocations.
+ */
+#define KBASE_REG_NO_USER_FREE      (1ul << 24)
+
+/* Memory has permanent kernel side mapping */
+#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
+
+/* GPU VA region has been freed by the userspace, but still remains allocated
+ * due to the reference held by CPU mappings created on the GPU VA region.
+ *
+ * A region with this flag set has had kbase_gpu_munmap() called on it, but can
+ * still be looked-up in the region tracker as a non-free region. Hence must
+ * not create or update any more GPU mappings on such regions because they will
+ * not be unmapped when the region is finally destroyed.
+ *
+ * Since such regions are still present in the region tracker, new allocations
+ * attempted with BASE_MEM_SAME_VA might fail if their address intersects with
+ * a region with this flag set.
+ *
+ * In addition, this flag indicates the gpu_alloc member might no longer valid
+ * e.g. in infinite cache simulation.
+ */
+#define KBASE_REG_VA_FREED (1ul << 26)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from 4GB
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (0x100000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+/* The starting address and size of the GPU-executable zone are dynamic
+ * and depend on the platform and the number of pages requested by the
+ * user process, with an upper limit of 4 GB.
+ */
+#define KBASE_REG_ZONE_EXEC_VA           KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */
+
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+	/* The last JIT usage ID for this region */
+	u16 jit_usage_id;
+	/* The JIT bin this allocation came from */
+	u8 jit_bin_id;
+
+	int    va_refcnt; /* number of users of this va */
+};
+
+static inline bool kbase_is_region_free(struct kbase_va_region *reg)
+{
+	return (!reg || reg->flags & KBASE_REG_FREE);
+}
+
+static inline bool kbase_is_region_invalid(struct kbase_va_region *reg)
+{
+	return (!reg || reg->flags & KBASE_REG_VA_FREED);
+}
+
+static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg)
+{
+	/* Possibly not all functions that find regions would be using this
+	 * helper, so they need to be checked when maintaining this function.
+	 */
+	return (kbase_is_region_invalid(reg) ||	kbase_is_region_free(reg));
+}
+
+int kbase_remove_va_region(struct kbase_va_region *reg);
+static inline void kbase_region_refcnt_free(struct kbase_va_region *reg)
+{
+	/* If region was mapped then remove va region*/
+	if (reg->start_pfn)
+		kbase_remove_va_region(reg);
+
+	/* To detect use-after-free in debug builds */
+	KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	kfree(reg);
+}
+
+static inline struct kbase_va_region *kbase_va_region_alloc_get(
+		struct kbase_context *kctx, struct kbase_va_region *region)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+
+	WARN_ON(!region->va_refcnt);
+
+	/* non-atomic as kctx->reg_lock is held */
+	region->va_refcnt++;
+
+	return region;
+}
+
+static inline struct kbase_va_region *kbase_va_region_alloc_put(
+		struct kbase_context *kctx, struct kbase_va_region *region)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+
+	WARN_ON(region->va_refcnt <= 0);
+	WARN_ON(region->flags & KBASE_REG_FREE);
+
+	/* non-atomic as kctx->reg_lock is held */
+	region->va_refcnt--;
+	if (!region->va_refcnt)
+		kbase_region_refcnt_free(region);
+
+	return NULL;
+}
+
+/* Common functions */
+static inline struct tagged_addr *kbase_get_cpu_phy_pages(
+		struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline struct tagged_addr *kbase_get_gpu_phy_pages(
+		struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(
+		struct kbase_context *kctx, size_t nr_pages,
+		enum kbase_memory_type type, int group_id)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	if (type == KBASE_MEM_TYPE_NATIVE) {
+		alloc->imported.native.nr_struct_pages =
+				(alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		kbase_process_page_usage_inc(kctx,
+				alloc->imported.native.nr_struct_pages);
+	}
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+	alloc->group_id = group_id;
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx, int group_id)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE, group_id);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+
+	reg->cpu_alloc->imported.native.kctx = kctx;
+	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
+	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE, group_id);
+		if (IS_ERR_OR_NULL(reg->gpu_alloc)) {
+			kbase_mem_phy_alloc_put(reg->cpu_alloc);
+			return -ENOMEM;
+		}
+		reg->gpu_alloc->imported.native.kctx = kctx;
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	mutex_lock(&kctx->jit_evict_lock);
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	reg->flags &= ~KBASE_REG_FREE;
+
+	return 0;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * The order required for a 2MB page allocation (2^order * 4KB = 2MB)
+ */
+#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER	9
+
+/*
+ * The order required for a 4KB page allocation
+ */
+#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER	0
+
+/**
+ * kbase_mem_pool_config_set_max_size - Set maximum number of free pages in
+ *                                      initial configuration of a memory pool
+ *
+ * @config:   Initial configuration for a physical memory pool
+ * @max_size: Maximum number of free pages that a pool created from
+ *            @config can hold
+ */
+static inline void kbase_mem_pool_config_set_max_size(
+	struct kbase_mem_pool_config *const config, size_t const max_size)
+{
+	WRITE_ONCE(config->max_size, max_size);
+}
+
+/**
+ * kbase_mem_pool_config_get_max_size - Get maximum number of free pages from
+ *                                      initial configuration of a memory pool
+ *
+ * @config: Initial configuration for a physical memory pool
+ *
+ * Return: Maximum number of free pages that a pool created from @config
+ *         can hold
+ */
+static inline size_t kbase_mem_pool_config_get_max_size(
+	const struct kbase_mem_pool_config *const config)
+{
+	return READ_ONCE(config->max_size);
+}
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @config:    Initial configuration for the memory pool
+ * @order:     Page order for physical page size (order=0=>4kB, order=9=>2MB)
+ * @group_id:  A memory group ID to be passed to a platform-specific
+ *             memory group manager, if present.
+ *             Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the memory group manager. Similarly pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		const struct kbase_mem_pool_config *config,
+		unsigned int order,
+		int group_id,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Return NULL if no memory in the pool
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_alloc_locked() instead.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc_locked - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * If there are free pages in the pool, this function allocates a page from
+ * @pool. This function does not use @next_pool.
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_free_locked() instead.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_free_locked - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @p:     Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * If @pool is not full, this function adds @page to @pool. Otherwise, @page is
+ * freed to the kernel. This function does not use @next_pool.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_4k_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ * @partial_allowed: If fewer pages allocated is allowed
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return:
+ * On success number of pages allocated (could be less than nr_pages if
+ * partial_allowed).
+ * On error an error code.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_alloc_pages_locked() instead.
+ *
+ * The caller must not hold vm_lock, as this could cause a deadlock if
+ * the kernel OoM killer runs. If the caller must allocate pages while holding
+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
+		struct tagged_addr *pages, bool partial_allowed);
+
+/**
+ * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
+ * @pool:        Memory pool to allocate from
+ * @nr_4k_pages: Number of pages to allocate
+ * @pages:       Pointer to array where the physical address of the allocated
+ *               pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This
+ * version does not allocate new pages from the kernel, and therefore will never
+ * trigger the OoM killer. Therefore, it can be run while the vm_lock is held.
+ *
+ * As new pages can not be allocated, the caller must ensure there are
+ * sufficient pages in the pool. Usage of this function should look like :
+ *
+ *   kbase_gpu_vm_lock(kctx);
+ *   kbase_mem_pool_lock(pool)
+ *   while (kbase_mem_pool_size(pool) < pages_required) {
+ *     kbase_mem_pool_unlock(pool)
+ *     kbase_gpu_vm_unlock(kctx);
+ *     kbase_mem_pool_grow(pool)
+ *     kbase_gpu_vm_lock(kctx);
+ *     kbase_mem_pool_lock(pool)
+ *   }
+ *   kbase_mem_pool_alloc_pages_locked(pool)
+ *   kbase_mem_pool_unlock(pool)
+ *   Perform other processing that requires vm_lock...
+ *   kbase_gpu_vm_unlock(kctx);
+ *
+ * This ensures that the pool can be grown to the required size and that the
+ * allocation can complete without another thread using the newly grown pages.
+ *
+ * Return:
+ * On success number of pages allocated.
+ * On error an error code.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_4k_pages, struct tagged_addr *pages);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_free_pages_locked - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages, bool dirty,
+		bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return READ_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_grow - Grow the pool
+ * @pool:       Memory pool to grow
+ * @nr_to_grow: Number of pages to add to the pool
+ *
+ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
+ * become larger than the maximum size specified.
+ *
+ * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
+ */
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above the max_size for the pool.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ */
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+/**
+ * kbase_mem_pool_mark_dying - Mark that this pool is dying
+ * @pool:     Memory pool
+ *
+ * This will cause any ongoing allocation operations (eg growing on page fault)
+ * to be terminated.
+ */
+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_alloc_page - Allocate a new page for a device
+ * @pool:  Memory pool to allocate a page from
+ *
+ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that
+ * function can fail in the event the pool is empty.
+ *
+ * Return: A new page or NULL if no memory
+ */
+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_region_tracker_init - Initialize the region tracker data structure
+ * @kctx: kbase context
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx);
+
+/**
+ * kbase_region_tracker_init_jit - Initialize the JIT region
+ * @kctx: kbase context
+ * @jit_va_pages: Size of the JIT region in pages
+ * @max_allocations: Maximum number of allocations allowed for the JIT region
+ * @trim_level: Trim level for the JIT region
+ * @group_id: The physical group ID from which to allocate JIT memory.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+		u8 max_allocations, u8 trim_level, int group_id);
+
+/**
+ * kbase_region_tracker_init_exec - Initialize the EXEC_VA region
+ * @kctx: kbase context
+ * @exec_va_pages: Size of the JIT region in pages.
+ *                 It must not be greater than 4 GB.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages);
+
+/**
+ * kbase_region_tracker_term - Terminate the JIT region
+ * @kctx: kbase context
+ */
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+/**
+ * kbase_region_tracker_term_rbtree - Free memory for a region tracker
+ *
+ * This will free all the regions within the region tracker
+ *
+ * @rbtree: Region tracker tree root
+ */
+void kbase_region_tracker_term_rbtree(struct rb_root *rbtree);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
+		struct kbase_context *kctx, u64 gpu_addr);
+struct kbase_va_region *kbase_find_region_enclosing_address(
+		struct rb_root *rbtree, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(
+		struct kbase_context *kctx, u64 gpu_addr);
+struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree,
+		u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
+		u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg,
+		u64 addr, size_t nr_pages, size_t align);
+int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
+		struct kbase_va_region *reg, u64 addr, size_t nr_pages,
+		size_t align);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+
+/**
+ * kbase_check_alloc_sizes - check user space sizes parameters for an
+ *                           allocation
+ *
+ * @kctx:         kbase context
+ * @flags:        The flags passed from user space
+ * @va_pages:     The size of the requested region, in pages.
+ * @commit_pages: Number of pages to commit initially.
+ * @extent:       Number of pages to grow by on GPU page fault and/or alignment
+ *                (depending on flags)
+ *
+ * Makes checks on the size parameters passed in from user space for a memory
+ * allocation call, with respect to the flags requested.
+ *
+ * Return: 0 if sizes are valid for these flags, negative error code otherwise
+ */
+int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
+		u64 va_pages, u64 commit_pages, u64 extent);
+
+/**
+ * kbase_update_region_flags - Convert user space flags to kernel region flags
+ *
+ * @kctx:  kbase context
+ * @reg:   The region to update the flags on
+ * @flags: The flags passed from user space
+ *
+ * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and
+ * this function will fail if the system does not support system coherency.
+ *
+ * Return: 0 if successful, -EINVAL if the flags are not supported
+ */
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+/**
+ * kbase_mmu_init - Initialise an object representing GPU page tables
+ *
+ * The structure should be terminated using kbase_mmu_term()
+ *
+ * @kbdev:    Instance of GPU platform device, allocated from the probe method.
+ * @mmut:     GPU page tables to be initialized.
+ * @kctx:     Optional kbase context, may be NULL if this set of MMU tables
+ *            is not associated with a context.
+ * @group_id: The physical group ID from which to allocate GPU page tables.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return:    0 if successful, otherwise a negative error code.
+ */
+int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		struct kbase_context *kctx, int group_id);
+/**
+ * kbase_mmu_term - Terminate an object representing GPU page tables
+ *
+ * This will free any page tables that have been allocated
+ *
+ * @kbdev: Instance of GPU platform device, allocated from the probe method.
+ * @mmut:  GPU page tables to be destroyed.
+ */
+void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut);
+
+/**
+ * kbase_mmu_create_ate - Create an address translation entry
+ *
+ * @kbdev:    Instance of GPU platform device, allocated from the probe method.
+ * @phy:      Physical address of the page to be mapped for GPU access.
+ * @flags:    Bitmask of attributes of the GPU memory region being mapped.
+ * @level:    Page table level for which to build an address translation entry.
+ * @group_id: The physical memory group in which the page was allocated.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * This function creates an address translation entry to encode the physical
+ * address of a page to be mapped for access by the GPU, along with any extra
+ * attributes required for the GPU memory region.
+ *
+ * Return: An address translation entry, either in LPAE or AArch64 format
+ *         (depending on the driver's configuration).
+ */
+u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
+	struct tagged_addr phy, unsigned long flags, int level, int group_id);
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
+				    struct kbase_mmu_table *mmut,
+				    const u64 start_vpfn,
+				    struct tagged_addr *phys, size_t nr,
+				    unsigned long flags, int group_id);
+int kbase_mmu_insert_pages(struct kbase_device *kbdev,
+			   struct kbase_mmu_table *mmut, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags, int as_nr, int group_id);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr phys, size_t nr,
+					unsigned long flags, int group_id);
+
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
+			     struct kbase_mmu_table *mmut, u64 vpfn,
+			     size_t nr, int as_nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags, int const group_id);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_mmu_update - Configure an address space on the GPU to the specified
+ *                    MMU tables
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ *
+ * @kbdev: Kbase device structure
+ * @mmut:  The set of MMU tables to be configured on the address space
+ * @as_nr: The address space to be configured
+ */
+void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		int as_nr);
+
+/**
+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
+ * @kctx:	Kbase context
+ *
+ * Disable and perform the required cache maintenance to remove the all
+ * data from provided kbase context from the GPU caches.
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified
+ * address space.
+ * @kbdev:	Kbase device
+ * @as_nr:	The address space number to set to unmapped.
+ *
+ * This function must only be called during reset/power-up and it used to
+ * ensure the registers are in a known state.
+ *
+ * The caller must hold kbdev->mmu_hw_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+/**
+ * kbase_sync_now - Perform cache maintenance on a memory region
+ *
+ * @kctx: The kbase context of the region
+ * @sset: A syncset structure describing the region and direction of the
+ *        synchronisation required
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset);
+void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa,
+		struct tagged_addr gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU
+ * mapping of a memory allocation containing a given address range
+ *
+ * Searches for a CPU mapping of any part of any region that fully encloses the
+ * CPU virtual address range specified by @uaddr and @size. Returns a failure
+ * indication if only part of the address range lies within a CPU mapping.
+ *
+ * @kctx:      The kernel base context used for the allocation.
+ * @uaddr:     Start of the CPU virtual address range.
+ * @size:      Size of the CPU virtual address range (in bytes).
+ * @offset:    The offset from the start of the allocation to the specified CPU
+ *             virtual address.
+ *
+ * Return: 0 if offset was obtained successfully. Error code otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset);
+
+/**
+ * kbasep_find_enclosing_gpu_mapping_start_and_offset() - Find the address of
+ * the start of GPU virtual memory region which encloses @gpu_addr for the
+ * @size length in bytes
+ *
+ * Searches for the memory region in GPU virtual memory space which contains
+ * the region defined by the @gpu_addr and @size, where @gpu_addr is the
+ * beginning and @size the length in bytes of the provided region. If found,
+ * the location of the start address of the GPU virtual memory region is
+ * passed in @start pointer and the location of the offset of the region into
+ * the GPU virtual memory region is passed in @offset pointer.
+ *
+ * @kctx:	The kernel base context within which the memory is searched.
+ * @gpu_addr:	GPU virtual address for which the region is sought; defines
+ *              the beginning of the provided region.
+ * @size:       The length (in bytes) of the provided region for which the
+ *              GPU virtual memory region is sought.
+ * @start:      Pointer to the location where the address of the start of
+ *              the found GPU virtual memory region is.
+ * @offset:     Pointer to the location where the offset of @gpu_addr into
+ *              the found GPU virtual memory region is.
+ */
+int kbasep_find_enclosing_gpu_mapping_start_and_offset(
+		struct kbase_context *kctx,
+		u64 gpu_addr, size_t size, u64 *start, u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * kbase_alloc_phy_pages_helper - Allocates physical pages.
+ * @alloc:              allocation object to add pages to
+ * @nr_pages_requested: number of physical pages to allocate
+ *
+ * Allocates \a nr_pages_requested and updates the alloc object.
+ *
+ * Return: 0 if all pages have been successfully allocated. Error code otherwise
+ *
+ * Note : The caller must not hold vm_lock, as this could cause a deadlock if
+ * the kernel OoM killer runs. If the caller must allocate pages while holding
+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
+ *
+ * This function cannot be used from interrupt context
+ */
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+		size_t nr_pages_requested);
+
+/**
+ * kbase_alloc_phy_pages_helper_locked - Allocates physical pages.
+ * @alloc:              allocation object to add pages to
+ * @pool:               Memory pool to allocate from
+ * @nr_pages_requested: number of physical pages to allocate
+ * @prealloc_sa:        Information about the partial allocation if the amount
+ *                      of memory requested is not a multiple of 2MB. One
+ *                      instance of struct kbase_sub_alloc must be allocated by
+ *                      the caller iff CONFIG_MALI_2MB_ALLOC is enabled.
+ *
+ * Allocates \a nr_pages_requested and updates the alloc object. This function
+ * does not allocate new pages from the kernel, and therefore will never trigger
+ * the OoM killer. Therefore, it can be run while the vm_lock is held.
+ *
+ * As new pages can not be allocated, the caller must ensure there are
+ * sufficient pages in the pool. Usage of this function should look like :
+ *
+ *   kbase_gpu_vm_lock(kctx);
+ *   kbase_mem_pool_lock(pool)
+ *   while (kbase_mem_pool_size(pool) < pages_required) {
+ *     kbase_mem_pool_unlock(pool)
+ *     kbase_gpu_vm_unlock(kctx);
+ *     kbase_mem_pool_grow(pool)
+ *     kbase_gpu_vm_lock(kctx);
+ *     kbase_mem_pool_lock(pool)
+ *   }
+ *   kbase_alloc_phy_pages_helper_locked(pool)
+ *   kbase_mem_pool_unlock(pool)
+ *   Perform other processing that requires vm_lock...
+ *   kbase_gpu_vm_unlock(kctx);
+ *
+ * This ensures that the pool can be grown to the required size and that the
+ * allocation can complete without another thread using the newly grown pages.
+ *
+ * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then
+ * @pool must be alloc->imported.native.kctx->lp_mem_pool. Otherwise it must be
+ * alloc->imported.native.kctx->mem_pool.
+ * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be
+ * pre-allocated because we must not sleep (due to the usage of kmalloc())
+ * whilst holding pool->pool_lock.
+ * @prealloc_sa shall be set to NULL if it has been consumed by this function
+ * to indicate that the caller must not free it.
+ *
+ * Return: Pointer to array of allocated pages. NULL on failure.
+ *
+ * Note : Caller must hold pool->pool_lock
+ */
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+		struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+		size_t nr_pages_requested,
+		struct kbase_sub_alloc **prealloc_sa);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*
+* Return: 0 on success, otherwise a negative error code
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+/**
+ * kbase_free_phy_pages_helper_locked - Free pages allocated with
+ *                                      kbase_alloc_phy_pages_helper_locked()
+ * @alloc:            Allocation object to free pages from
+ * @pool:             Memory pool to return freed pages to
+ * @pages:            Pages allocated by kbase_alloc_phy_pages_helper_locked()
+ * @nr_pages_to_free: Number of physical pages to free
+ *
+ * This function atomically frees pages allocated with
+ * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page
+ * array that is returned by that function. @pool must be the pool that the
+ * pages were originally allocated from.
+ *
+ * If the mem_pool has been unlocked since the allocation then
+ * kbase_free_phy_pages_helper() should be used instead.
+ */
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+		struct kbase_mem_pool *pool, struct tagged_addr *pages,
+		size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+ * kbase_mmu_interrupt_process - Process a bus or page fault.
+ * @kbdev   The kbase_device the fault happened on
+ * @kctx    The kbase_context for the faulting address space if one was found.
+ * @as      The address space that has the fault
+ * @fault   Data relating to the fault
+ *
+ * This function will process a fault on a specific address space
+ */
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as,
+		struct kbase_fault *fault);
+
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_init(struct kbase_context *kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_has_exec_va_zone - EXEC_VA zone predicate
+ *
+ * Determine whether an EXEC_VA zone has been created for the GPU address space
+ * of the given kbase context.
+ *
+ * @kctx: kbase context
+ *
+ * Return: True if the kbase context has an EXEC_VA zone.
+ */
+bool kbase_has_exec_va_zone(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+
+/**
+ * kbase_jd_user_buf_pin_pages - Pin the pages of a user buffer.
+ * @kctx: kbase context.
+ * @reg:  The region associated with the imported user buffer.
+ *
+ * To successfully pin the pages for a user buffer the current mm_struct must
+ * be the same as the mm_struct of the user buffer. After successfully pinning
+ * the pages further calls to this function succeed without doing work.
+ *
+ * Return: zero on success or negative number on failure.
+ */
+int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
+		struct kbase_va_region *reg);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_pool_lock - Lock a memory pool
+ * @pool: Memory pool to lock
+ */
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+/**
+ * kbase_mem_pool_lock - Release a memory pool
+ * @pool: Memory pool to lock
+ */
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc);
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+/**
+ * kbase_mem_umm_map - Map dma-buf
+ * @kctx: Pointer to the kbase context
+ * @reg: Pointer to the region of the imported dma-buf to map
+ *
+ * Map a dma-buf on the GPU. The mappings are reference counted.
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int kbase_mem_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg);
+
+/**
+ * kbase_mem_umm_unmap - Unmap dma-buf
+ * @kctx: Pointer to the kbase context
+ * @reg: Pointer to the region of the imported dma-buf to unmap
+ * @alloc: Pointer to the alloc to release
+ *
+ * Unmap a dma-buf from the GPU. The mappings are reference counted.
+ *
+ * @reg must be the original region with GPU mapping of @alloc; or NULL. If
+ * @reg is NULL, or doesn't match @alloc, the GPU page table entries matching
+ * @reg will not be updated.
+ *
+ * @alloc must be a valid physical allocation of type
+ * KBASE_MEM_TYPE_IMPORTED_UMM that was previously mapped by
+ * kbase_mem_umm_map(). The dma-buf attachment referenced by @alloc will
+ * release it's mapping reference, and if the refcount reaches 0, also be be
+ * unmapped, regardless of the value of @reg.
+ */
+void kbase_mem_umm_unmap(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_mem_do_sync_imported - Sync caches for imported memory
+ * @kctx: Pointer to the kbase context
+ * @reg: Pointer to the region with imported memory to sync
+ * @sync_fn: The type of sync operation to perform
+ *
+ * Sync CPU caches for supported (currently only dma-buf (UMM)) memory.
+ * Attempting to sync unsupported imported memory types will result in an error
+ * code, -EINVAL.
+ *
+ * Return: 0 on success, or a negative error code.
+ */
+int kbase_mem_do_sync_imported(struct kbase_context *kctx,
+		struct kbase_va_region *reg, enum kbase_sync_type sync_fn);
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100755
index 0000000..9e121f0
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,3012 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+#include <linux/cache.h>
+#include <linux/memory_group_manager.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tracepoints.h>
+#include <mali_kbase_ioctl.h>
+
+#if KERNEL_VERSION(4, 17, 2) > LINUX_VERSION_CODE
+/* Enable workaround for ion for versions prior to v4.17.2 to avoid the potentially
+ * disruptive warnings which can come if begin_cpu_access and end_cpu_access
+ * methods are not called in pairs.
+ *
+ * dma_sync_sg_for_* calls will be made directly as a workaround.
+ *
+ * Note that some long term maintenance kernel versions (e.g. 4.9.x, 4.14.x) only require this
+ * workaround on their earlier releases. However it is still safe to use it on such releases, and
+ * it simplifies the version check.
+ *
+ * This will also address the case on kernels prior to 4.12, where ion lacks
+ * the cache maintenance in begin_cpu_access and end_cpu_access methods.
+ */
+#define KBASE_MEM_ION_SYNC_WORKAROUND
+#endif
+
+
+static int kbase_vmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 offset_bytes, size_t size,
+		struct kbase_vmap_struct *map);
+static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map);
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+
+/* Retrieve the associated region pointer if the GPU address corresponds to
+ * one of the event memory pages. The enclosing region, if found, shouldn't
+ * have been marked as free.
+ */
+static struct kbase_va_region *kbase_find_event_mem_region(
+			struct kbase_context *kctx, u64 gpu_addr)
+{
+
+	return NULL;
+}
+
+/**
+ * kbase_phy_alloc_mapping_init - Initialize the kernel side permanent mapping
+ *                                of the physical allocation belonging to a
+ *                                region
+ * @kctx:  The kernel base context @reg belongs to.
+ * @reg:   The region whose physical allocation is to be mapped
+ * @vsize: The size of the requested region, in pages
+ * @size:  The size in pages initially committed to the region
+ *
+ * Return: 0 on success, otherwise an error code indicating failure
+ *
+ * Maps the physical allocation backing a non-free @reg, so it may be
+ * accessed directly from the kernel. This is only supported for physical
+ * allocations of type KBASE_MEM_TYPE_NATIVE, and will fail for other types of
+ * physical allocation.
+ *
+ * The mapping is stored directly in the allocation that backs @reg. The
+ * refcount is not incremented at this point. Instead, use of the mapping should
+ * be surrounded by kbase_phy_alloc_mapping_get() and
+ * kbase_phy_alloc_mapping_put() to ensure it does not disappear whilst the
+ * client is accessing it.
+ *
+ * Both cached and uncached regions are allowed, but any sync operations are the
+ * responsibility of the client using the permanent mapping.
+ *
+ * A number of checks are made to ensure that a region that needs a permanent
+ * mapping can actually be supported:
+ * - The region must be created as fully backed
+ * - The region must not be growable
+ *
+ * This function will fail if those checks are not satisfied.
+ *
+ * On success, the region will also be forced into a certain kind:
+ * - It will no longer be growable
+ */
+static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx,
+		struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	size_t size_bytes = (size << PAGE_SHIFT);
+	struct kbase_vmap_struct *kern_mapping;
+	int err = 0;
+
+	/* Can only map in regions that are always fully committed
+	 * Don't setup the mapping twice
+	 * Only support KBASE_MEM_TYPE_NATIVE allocations
+	 */
+	if (vsize != size || reg->cpu_alloc->permanent_map != NULL ||
+			reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+		return -EINVAL;
+
+	if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
+			kctx->permanent_mapped_pages)) {
+		dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %lu pages",
+				(u64)size,
+				KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES,
+				kctx->permanent_mapped_pages);
+		return -ENOMEM;
+	}
+
+	kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL);
+	if (!kern_mapping)
+		return -ENOMEM;
+
+	err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping);
+	if (err < 0)
+		goto vmap_fail;
+
+	/* No support for growing or shrinking mapped regions */
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	reg->cpu_alloc->permanent_map = kern_mapping;
+	kctx->permanent_mapped_pages += size;
+
+	return 0;
+vmap_fail:
+	kfree(kern_mapping);
+	return err;
+}
+
+void kbase_phy_alloc_mapping_term(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	WARN_ON(!alloc->permanent_map);
+	kbase_vunmap_phy_pages(kctx, alloc->permanent_map);
+	kfree(alloc->permanent_map);
+
+	alloc->permanent_map = NULL;
+
+	/* Mappings are only done on cpu_alloc, so don't need to worry about
+	 * this being reduced a second time if a separate gpu_alloc is
+	 * freed
+	 */
+	WARN_ON(alloc->nents > kctx->permanent_mapped_pages);
+	kctx->permanent_mapped_pages -= alloc->nents;
+}
+
+void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx,
+		u64 gpu_addr,
+		struct kbase_vmap_struct **out_kern_mapping)
+{
+	struct kbase_va_region *reg;
+	void *kern_mem_ptr = NULL;
+	struct kbase_vmap_struct *kern_mapping;
+	u64 mapping_offset;
+
+	WARN_ON(!kctx);
+	WARN_ON(!out_kern_mapping);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* First do a quick lookup in the list of event memory regions */
+	reg = kbase_find_event_mem_region(kctx, gpu_addr);
+
+	if (!reg) {
+		reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx, gpu_addr);
+	}
+
+	if (kbase_is_region_invalid_or_free(reg))
+		goto out_unlock;
+
+	kern_mapping = reg->cpu_alloc->permanent_map;
+	if (kern_mapping == NULL)
+		goto out_unlock;
+
+	mapping_offset = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
+
+	/* Refcount the allocations to prevent them disappearing */
+	WARN_ON(reg->cpu_alloc != kern_mapping->cpu_alloc);
+	WARN_ON(reg->gpu_alloc != kern_mapping->gpu_alloc);
+	(void)kbase_mem_phy_alloc_get(kern_mapping->cpu_alloc);
+	(void)kbase_mem_phy_alloc_get(kern_mapping->gpu_alloc);
+
+	kern_mem_ptr = (void *)(uintptr_t)((uintptr_t)kern_mapping->addr + mapping_offset);
+	*out_kern_mapping = kern_mapping;
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return kern_mem_ptr;
+}
+
+void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
+		struct kbase_vmap_struct *kern_mapping)
+{
+	WARN_ON(!kctx);
+	WARN_ON(!kern_mapping);
+
+	WARN_ON(kctx != kern_mapping->cpu_alloc->imported.native.kctx);
+	WARN_ON(kern_mapping != kern_mapping->cpu_alloc->permanent_map);
+
+	kbase_mem_phy_alloc_put(kern_mapping->cpu_alloc);
+	kbase_mem_phy_alloc_put(kern_mapping->gpu_alloc);
+
+	/* kern_mapping and the gpu/cpu phy allocs backing it must not be used
+	 * from now on
+	 */
+}
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va)
+{
+	int zone;
+	struct kbase_va_region *reg;
+	struct rb_root *rbtree;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+
+	dev = kctx->kbdev->dev;
+	dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extent, 0x%llX flags\n",
+		va_pages, commit_pages, extent, *flags);
+
+	*gpu_va = 0; /* return 0 on failure */
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) {
+		/* Mask coherency flags if infinite cache is enabled to prevent
+		 * the skipping of syncs from BASE side.
+		 */
+		*flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED |
+			    BASE_MEM_COHERENT_SYSTEM);
+	}
+#endif
+
+	if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 &&
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) {
+		/* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent))
+		goto bad_sizes;
+
+#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
+	/* Ensure that memory is fully physically-backed. */
+	if (*flags & BASE_MEM_GROW_ON_GPF)
+		commit_pages = va_pages;
+#endif
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA) {
+		rbtree = &kctx->reg_rbtree_same;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	} else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) {
+		rbtree = &kctx->reg_rbtree_exec;
+		zone = KBASE_REG_ZONE_EXEC_VA;
+	} else {
+		rbtree = &kctx->reg_rbtree_custom;
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+	}
+
+	reg = kbase_alloc_free_region(rbtree, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	if (kbase_reg_prepare_native(reg, kctx,
+				base_mem_group_id_get(*flags)) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) {
+		/* kbase_check_alloc_sizes() already checks extent is valid for
+		 * assigning to reg->extent */
+		reg->extent = extent;
+	} else {
+		reg->extent = 0;
+	}
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+	reg->initial_commit = commit_pages;
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) {
+		/* Permanent kernel mappings must happen as soon as
+		 * reg->cpu_alloc->pages is ready. Currently this happens after
+		 * kbase_alloc_phy_pages(). If we move that to setup pages
+		 * earlier, also move this call too
+		 */
+		int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages,
+				commit_pages);
+		if (err < 0) {
+			kbase_gpu_vm_unlock(kctx);
+			goto no_kern_mapping;
+		}
+	}
+
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/*
+		 * 10.1-10.4 UKU userland relies on the kernel to call mmap.
+		 * For all other versions we can just return the cookie
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1) ||
+		    kctx->api_version > KBASE_API_VERSION(10, 4)) {
+			*gpu_va = (u64) cookie;
+			kbase_gpu_vm_unlock(kctx);
+			return reg;
+		}
+
+		kbase_va_region_alloc_get(kctx, reg);
+		kbase_gpu_vm_unlock(kctx);
+
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		kbase_gpu_vm_lock(kctx);
+
+		/* Since vm lock was released, check if the region has already
+		 * been freed meanwhile. This could happen if User was able to
+		 * second guess the cookie or the CPU VA and free the region
+		 * through the guessed value.
+		 */
+		if (reg->flags & KBASE_REG_VA_FREED) {
+			kbase_va_region_alloc_put(kctx, reg);
+			reg = NULL;
+		} else if (IS_ERR_VALUE(cpu_addr)) {
+			/* Once the vm lock is released, multiple scenarios can
+			 * arise under which the cookie could get re-assigned
+			 * to some other region.
+			 */
+			if (!WARN_ON(kctx->pending_regions[cookie_nr] &&
+				     (kctx->pending_regions[cookie_nr] != reg))) {
+				kctx->pending_regions[cookie_nr] = NULL;
+				kctx->cookies |= (1UL << cookie_nr);
+			}
+
+			/* Region has not been freed and we can be sure that
+			 * User won't be able to free the region now. So we
+			 * can free it ourselves.
+			 * If the region->start_pfn isn't zero then the
+			 * allocation will also be unmapped from GPU side.
+			 */
+			kbase_mem_free_region(kctx, reg);
+			kbase_va_region_alloc_put(kctx, reg);
+			reg = NULL;
+		} else {
+			kbase_va_region_alloc_put(kctx, reg);
+			*gpu_va = (u64) cpu_addr;
+		}
+
+		kbase_gpu_vm_unlock(kctx);
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_kern_mapping:
+no_mem:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+invalid_flags:
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_sizes:
+bad_flags:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx,
+		u64 gpu_addr, u64 query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	if (gpu_addr & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (kbase_is_region_invalid_or_free(reg))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		if (kctx->api_version >= KBASE_API_VERSION(11, 2)) {
+			/* Prior to 11.2, these were known about by user-side
+			 * but we did not return them. Returning some of these
+			 * caused certain clients that were not expecting them
+			 * to fail, so we omit all of them as a special-case
+			 * for compatibility reasons */
+			if (KBASE_REG_PF_GROW & reg->flags)
+				*out |= BASE_MEM_GROW_ON_GPF;
+			if (KBASE_REG_SECURE & reg->flags)
+				*out |= BASE_MEM_SECURE;
+		}
+		if (KBASE_REG_TILER_ALIGN_TOP & reg->flags)
+			*out |= BASE_MEM_TILER_ALIGN_TOP;
+		if (!(KBASE_REG_GPU_CACHED & reg->flags))
+			*out |= BASE_MEM_UNCACHED_GPU;
+		if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags)
+			*out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE;
+
+		*out |= base_mem_group_id_set(reg->cpu_alloc->group_id);
+
+		WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE,
+				"BASE_MEM_FLAGS_QUERYABLE needs updating\n");
+		*out &= BASE_MEM_FLAGS_QUERYABLE;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->jit_evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->jit_evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.native.kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int __maybe_unused new_page_count;
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = atomic_sub_return(alloc->nents,
+		&kctx->used_pages);
+	atomic_sub(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kbdev,
+			kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.native.kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int __maybe_unused new_page_count;
+
+	new_page_count = atomic_add_return(alloc->nents,
+		&kctx->used_pages);
+	atomic_add(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kbdev,
+			kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.native.kctx;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	/* This allocation can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.native.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	list_del_init(&gpu_alloc->evict_node);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int new_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE))
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (kbase_is_region_invalid_or_free(reg))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)
+		goto out_unlock;
+
+	/* shareability flags are ignored for GPU uncached memory */
+	if (!(reg->flags & KBASE_REG_GPU_CACHED)) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	new_flags = reg->flags & ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	new_flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
+		/* Future use will use the new flags, existing mapping
+		 * will NOT be updated as memory should not be in use
+		 * by the GPU when updating the flags.
+		 */
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		ret = 0;
+	} else if (reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+		/*
+		 * When CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is not enabled the
+		 * dma-buf GPU mapping should always be present, check that
+		 * this is the case and warn and skip the page table update if
+		 * not.
+		 *
+		 * Then update dma-buf GPU mapping with the new flags.
+		 *
+		 * Note: The buffer must not be in use on the GPU when
+		 * changing flags. If the buffer is in active use on
+		 * the GPU, there is a risk that the GPU may trigger a
+		 * shareability fault, as it will see the same
+		 * addresses from buffer with different shareability
+		 * properties.
+		 */
+		dev_dbg(kctx->kbdev->dev,
+			"Updating page tables on mem flag change\n");
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				new_flags,
+				reg->gpu_alloc->group_id);
+		if (ret)
+			dev_warn(kctx->kbdev->dev,
+				 "Failed to update GPU page tables on flag change: %d\n",
+				 ret);
+	} else
+		WARN_ON(!reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+#else
+	/* Reject when dma-buf support is not enabled. */
+	ret = -EINVAL;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+	/* If everything is good, then set the new flags on the region. */
+	if (!ret)
+		reg->flags = new_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+int kbase_mem_do_sync_imported(struct kbase_context *kctx,
+		struct kbase_va_region *reg, enum kbase_sync_type sync_fn)
+{
+	int ret = -EINVAL;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	struct dma_buf *dma_buf;
+	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* We assume that the same physical allocation object is used for both
+	 * GPU and CPU for imported buffers.
+	 */
+	WARN_ON(reg->cpu_alloc != reg->gpu_alloc);
+
+	/* Currently only handle dma-bufs */
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)
+		return ret;
+	/*
+	 * Attempting to sync with CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND
+	 * enabled can expose us to a Linux Kernel issue between v4.6 and
+	 * v4.19. We will not attempt to support cache syncs on dma-bufs that
+	 * are mapped on demand (i.e. not on import), even on pre-4.6, neither
+	 * on 4.20 or newer kernels, because this makes it difficult for
+	 * userspace to know when they can rely on the cache sync.
+	 * Instead, only support syncing when we always map dma-bufs on import,
+	 * or if the particular buffer is mapped right now.
+	 */
+	if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND) &&
+	    !reg->gpu_alloc->imported.umm.current_mapping_usage_count)
+		return ret;
+
+	dma_buf = reg->gpu_alloc->imported.umm.dma_buf;
+
+	switch (sync_fn) {
+	case KBASE_SYNC_TO_DEVICE:
+		dev_dbg(kctx->kbdev->dev,
+			"Syncing imported buffer at GPU VA %llx to GPU\n",
+			reg->start_pfn);
+#ifdef KBASE_MEM_ION_SYNC_WORKAROUND
+		if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) {
+			struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment;
+			struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt;
+
+			dma_sync_sg_for_device(attachment->dev, sgt->sgl,
+					sgt->nents, dir);
+			ret = 0;
+		}
+#else
+	/* Though the below version check could be superfluous depending upon the version condition
+	 * used for enabling KBASE_MEM_ION_SYNC_WORKAROUND, we still keep this check here to allow
+	 * ease of modification for non-ION systems or systems where ION has been patched.
+	 */
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
+		dma_buf_end_cpu_access(dma_buf,
+				0, dma_buf->size,
+				dir);
+		ret = 0;
+#else
+		ret = dma_buf_end_cpu_access(dma_buf,
+				dir);
+#endif
+#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
+		break;
+	case KBASE_SYNC_TO_CPU:
+		dev_dbg(kctx->kbdev->dev,
+			"Syncing imported buffer at GPU VA %llx to CPU\n",
+			reg->start_pfn);
+#ifdef KBASE_MEM_ION_SYNC_WORKAROUND
+		if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) {
+			struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment;
+			struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt;
+
+			dma_sync_sg_for_cpu(attachment->dev, sgt->sgl,
+					sgt->nents, dir);
+			ret = 0;
+		}
+#else
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
+				0, dma_buf->size,
+#endif
+				dir);
+#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
+		break;
+	};
+
+	if (unlikely(ret))
+		dev_warn(kctx->kbdev->dev,
+			 "Failed to sync mem region %pK at GPU VA %llx: %d\n",
+			 reg, reg->start_pfn, ret);
+
+#else /* CONFIG_DMA_SHARED_BUFFER */
+	CSTD_UNUSED(kctx);
+	CSTD_UNUSED(reg);
+	CSTD_UNUSED(sync_fn);
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+	return ret;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+/**
+ * kbase_mem_umm_unmap_attachment - Unmap dma-buf attachment
+ * @kctx: Pointer to kbase context
+ * @alloc: Pointer to allocation with imported dma-buf memory to unmap
+ *
+ * This will unmap a dma-buf. Must be called after the GPU page tables for the
+ * region have been torn down.
+ */
+static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx,
+					   struct kbase_mem_phy_alloc *alloc)
+{
+	struct tagged_addr *pa = alloc->pages;
+
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+				 alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+
+	memset(pa, 0xff, sizeof(*pa) * alloc->nents);
+	alloc->nents = 0;
+}
+
+/**
+ * kbase_mem_umm_map_attachment - Prepare attached dma-buf for GPU mapping
+ * @kctx: Pointer to kbase context
+ * @reg: Pointer to region with imported dma-buf memory to map
+ *
+ * Map the dma-buf and prepare the page array with the tagged Mali physical
+ * addresses for GPU mapping.
+ *
+ * Return: 0 on success, or negative error code
+ */
+static int kbase_mem_umm_map_attachment(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	struct tagged_addr *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+
+	WARN_ON_ONCE(alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM);
+	WARN_ON_ONCE(alloc->imported.umm.sgt);
+
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		size_t j, pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++)
+			*pa++ = as_tagged(sg_dma_address(s) +
+				(j << PAGE_SHIFT));
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (!(reg->flags & KBASE_REG_IMPORT_PAD) &&
+			WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto err_unmap_attachment;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = count;
+
+	return 0;
+
+err_unmap_attachment:
+	kbase_mem_umm_unmap_attachment(kctx, alloc);
+
+	return err;
+}
+
+int kbase_mem_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	int err;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long gwt_mask = ~0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	alloc = reg->gpu_alloc;
+
+	alloc->imported.umm.current_mapping_usage_count++;
+	if (alloc->imported.umm.current_mapping_usage_count != 1) {
+		if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT)) {
+			if (!kbase_is_region_invalid_or_free(reg)) {
+				err = kbase_mem_do_sync_imported(kctx, reg,
+						KBASE_SYNC_TO_DEVICE);
+				WARN_ON_ONCE(err);
+			}
+		}
+		return 0;
+	}
+
+	err = kbase_mem_umm_map_attachment(kctx, reg);
+	if (err)
+		goto bad_map_attachment;
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	err = kbase_mmu_insert_pages(kctx->kbdev,
+				     &kctx->mmu,
+				     reg->start_pfn,
+				     kbase_get_gpu_phy_pages(reg),
+				     kbase_reg_current_backed_size(reg),
+				     reg->flags & gwt_mask,
+				     kctx->as_nr,
+				     alloc->group_id);
+	if (err)
+		goto bad_insert;
+
+	if (reg->flags & KBASE_REG_IMPORT_PAD &&
+			!WARN_ON(reg->nr_pages < alloc->nents)) {
+		/* For padded imported dma-buf memory, map the dummy aliasing
+		 * page from the end of the dma-buf pages, to the end of the
+		 * region using a read only mapping.
+		 *
+		 * Assume alloc->nents is the number of actual pages in the
+		 * dma-buf memory.
+		 */
+		err = kbase_mmu_insert_single_page(kctx,
+				reg->start_pfn + alloc->nents,
+				kctx->aliasing_sink_page,
+				reg->nr_pages - alloc->nents,
+				(reg->flags | KBASE_REG_GPU_RD) &
+				~KBASE_REG_GPU_WR,
+				KBASE_MEM_GROUP_SINK);
+		if (err)
+			goto bad_pad_insert;
+	}
+
+	return 0;
+
+bad_pad_insert:
+	kbase_mmu_teardown_pages(kctx->kbdev,
+				 &kctx->mmu,
+				 reg->start_pfn,
+				 alloc->nents,
+				 kctx->as_nr);
+bad_insert:
+	kbase_mem_umm_unmap_attachment(kctx, alloc);
+bad_map_attachment:
+	alloc->imported.umm.current_mapping_usage_count--;
+
+	return err;
+}
+
+void kbase_mem_umm_unmap(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	alloc->imported.umm.current_mapping_usage_count--;
+	if (alloc->imported.umm.current_mapping_usage_count) {
+		if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT)) {
+			if (!kbase_is_region_invalid_or_free(reg)) {
+				int err = kbase_mem_do_sync_imported(kctx, reg,
+						KBASE_SYNC_TO_CPU);
+				WARN_ON_ONCE(err);
+			}
+		}
+		return;
+	}
+
+	if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) {
+		int err;
+
+		err = kbase_mmu_teardown_pages(kctx->kbdev,
+					       &kctx->mmu,
+					       reg->start_pfn,
+					       reg->nr_pages,
+					       kctx->as_nr);
+		WARN_ON(err);
+	}
+
+	kbase_mem_umm_unmap_attachment(kctx, alloc);
+}
+
+static int get_umm_memory_group_id(struct kbase_context *kctx,
+		struct dma_buf *dma_buf)
+{
+	int group_id = BASE_MEM_GROUP_DEFAULT;
+
+	if (kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id) {
+		struct memory_group_manager_import_data mgm_import_data;
+
+		mgm_import_data.type =
+			MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF;
+		mgm_import_data.u.dma_buf = dma_buf;
+
+		group_id = kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id(
+			kctx->kbdev->mgm_dev, &mgm_import_data);
+	}
+
+	return group_id;
+}
+
+/**
+ * kbase_mem_from_umm - Import dma-buf memory into kctx
+ * @kctx: Pointer to kbase context to import memory into
+ * @fd: File descriptor of dma-buf to import
+ * @va_pages: Pointer where virtual size of the region will be output
+ * @flags: Pointer to memory flags
+ * @padding: Number of read only padding pages to be inserted at the end of the
+ * GPU mapping of the dma-buf
+ *
+ * Return: Pointer to new kbase_va_region object of the imported dma-buf, or
+ * NULL on error.
+ *
+ * This function imports a dma-buf into kctx, and created a kbase_va_region
+ * object that wraps the dma-buf.
+ */
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
+		int fd, u64 *va_pages, u64 *flags, u32 padding)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
+	int group_id;
+
+	/* 64-bit address range is the max */
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		return NULL;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		return NULL;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (IS_ERR_OR_NULL(dma_attachment)) {
+		dma_buf_put(dma_buf);
+		return NULL;
+	}
+
+	*va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding;
+	if (!*va_pages) {
+		dma_buf_detach(dma_buf, dma_attachment);
+		dma_buf_put(dma_buf);
+		return NULL;
+	}
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	/*
+	 * Force CPU cached flag.
+	 *
+	 * We can't query the dma-buf exporter to get details about the CPU
+	 * cache attributes of CPU mappings, so we have to assume that the
+	 * buffer may be cached, and call into the exporter for cache
+	 * maintenance, and rely on the exporter to do the right thing when
+	 * handling our calls.
+	 */
+	*flags |= BASE_MEM_CACHED_CPU;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same,
+				0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
+				0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg) {
+		dma_buf_detach(dma_buf, dma_attachment);
+		dma_buf_put(dma_buf);
+		return NULL;
+	}
+
+	group_id = get_umm_memory_group_id(kctx, dma_buf);
+
+	reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages,
+			KBASE_MEM_TYPE_IMPORTED_UMM, group_id);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto error_out;
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	if (padding)
+		reg->flags |= KBASE_REG_IMPORT_PAD;
+
+	reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
+		int err;
+
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count = 1;
+
+		err = kbase_mem_umm_map_attachment(kctx, reg);
+		if (err) {
+			dev_warn(kctx->kbdev->dev,
+				 "Failed to map dma-buf %pK on GPU: %d\n",
+				 dma_buf, err);
+			goto error_out;
+		}
+
+		*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+	}
+
+	return reg;
+
+error_out:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+no_alloc:
+	kfree(reg);
+
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev)
+{
+	u32 cpu_cache_line_size = cache_line_size();
+	u32 gpu_cache_line_size =
+		(1UL << kbdev->gpu_props.props.l2_props.log2_line_size);
+
+	return ((cpu_cache_line_size > gpu_cache_line_size) ?
+				cpu_cache_line_size :
+				gpu_cache_line_size);
+}
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	long i;
+	struct kbase_va_region *reg;
+	struct rb_root *rbtree;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
+	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
+	struct kbase_alloc_import_user_buf *user_buf;
+	struct page **pages = NULL;
+
+	if ((address & (cache_line_alignment - 1)) != 0 ||
+			(size & (cache_line_alignment - 1)) != 0) {
+		if (*flags & BASE_MEM_UNCACHED_GPU) {
+			dev_warn(kctx->kbdev->dev,
+					"User buffer is not cache line aligned and marked as GPU uncached\n");
+			goto bad_size;
+		}
+
+		/* Coherency must be enabled to handle partial cache lines */
+		if (*flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+			/* Force coherent system required flag, import will
+			 * then fail if coherency isn't available
+			 */
+			*flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+		} else {
+			dev_warn(kctx->kbdev->dev,
+					"User buffer is not cache line aligned and no coherency enabled\n");
+			goto bad_size;
+		}
+	}
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+		rbtree = &kctx->reg_rbtree_same;
+	} else
+		rbtree = &kctx->reg_rbtree_custom;
+
+	reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(
+		kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+		BASE_MEM_GROUP_DEFAULT);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	user_buf = &reg->gpu_alloc->imported.user_buf;
+
+	user_buf->size = size;
+	user_buf->address = address;
+	user_buf->nr_pages = *va_pages;
+	user_buf->mm = current->mm;
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+	atomic_inc(&current->mm->mm_count);
+#else
+	mmgrab(current->mm);
+#endif
+	if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		user_buf->pages = vmalloc(*va_pages * sizeof(struct page *));
+	else
+		user_buf->pages = kmalloc_array(*va_pages,
+				sizeof(struct page *), GFP_KERNEL);
+
+	if (!user_buf->pages)
+		goto no_page_array;
+
+	/* If the region is coherent with the CPU then the memory is imported
+	 * and mapped onto the GPU immediately.
+	 * Otherwise get_user_pages is called as a sanity check, but with
+	 * NULL as the pages argument which will fault the pages, but not
+	 * pin them. The memory will then be pinned only around the jobs that
+	 * specify the region as an external resource.
+	 */
+	if (reg->flags & KBASE_REG_SHARE_BOTH) {
+		pages = user_buf->pages;
+		*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+	}
+
+	down_read(&current->mm->mmap_sem);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
+KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#else
+			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#endif
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#else
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#endif
+
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	if (pages) {
+		struct device *dev = kctx->kbdev->dev;
+		unsigned long local_size = user_buf->size;
+		unsigned long offset = user_buf->address & ~PAGE_MASK;
+		struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
+
+		/* Top bit signifies that this was pinned on import */
+		user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
+
+		for (i = 0; i < faulted_pages; i++) {
+			dma_addr_t dma_addr;
+			unsigned long min;
+
+			min = MIN(PAGE_SIZE - offset, local_size);
+			dma_addr = dma_map_page(dev, pages[i],
+					offset, min,
+					DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(dev, dma_addr))
+				goto unwind_dma_map;
+
+			user_buf->dma_addrs[i] = dma_addr;
+			pa[i] = as_tagged(page_to_phys(pages[i]));
+
+			local_size -= min;
+			offset = 0;
+		}
+
+		reg->gpu_alloc->nents = faulted_pages;
+	}
+
+	return reg;
+
+unwind_dma_map:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				user_buf->dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+fault_mismatch:
+	if (pages) {
+		for (i = 0; i < faulted_pages; i++)
+			put_page(pages[i]);
+	}
+no_page_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0,
+				*num_pages,
+				KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
+				0, *num_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS,
+		BASE_MEM_GROUP_DEFAULT);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (kbase_is_region_invalid_or_free(aliasing_reg))
+				goto bad_handle; /* Not found/already free */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED))
+				goto bad_handle; /* GPU uncached memory */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+	if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
+			kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA))
+		*flags |= BASE_MEM_SAME_VA;
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 &&
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) {
+		/* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
+		dev_warn(kctx->kbdev->dev,
+				"padding is only supported for UMM");
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags,
+					padding);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				uptr = compat_ptr(user_buffer.ptr);
+			else
+#endif
+				uptr = u64_to_user_ptr(user_buffer.ptr);
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	struct tagged_addr *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
+		reg->start_pfn + old_pages, phy_pages + old_pages, delta,
+		reg->flags, kctx->as_nr, reg->gpu_alloc->group_id);
+
+	return ret;
+}
+
+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 gpu_va_start = reg->start_pfn;
+
+	if (new_pages == old_pages)
+		/* Nothing to do */
+		return;
+
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping,
+			(gpu_va_start + new_pages)<<PAGE_SHIFT,
+			(old_pages - new_pages)<<PAGE_SHIFT, 1);
+}
+
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+			reg->start_pfn + new_pages, delta, kctx->as_nr);
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	if (gpu_addr & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (kbase_is_region_invalid_or_free(reg))
+		goto out_unlock;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+		goto out_unlock;
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE))
+		goto out_unlock;
+
+	/* Would overflow the VA region */
+	if (new_pages > reg->nr_pages)
+		goto out_unlock;
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1)
+		goto out_unlock;
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
+	/* Reject resizing commit size */
+	if (reg->flags & KBASE_REG_PF_GROW)
+		new_pages = reg->nr_pages;
+#endif
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				res = -ENOMEM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->free_on_close) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_va_region_alloc_put(map->kctx, map->region);
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma,
+					struct kbase_va_region *reg,
+					pgoff_t *start_off,
+					size_t nr_pages)
+{
+	struct kbase_aliased *aliased =
+		reg->cpu_alloc->imported.alias.aliased;
+
+	if (!reg->cpu_alloc->imported.alias.stride ||
+			reg->nr_pages < (*start_off + nr_pages)) {
+		return NULL;
+	}
+
+	while (*start_off >= reg->cpu_alloc->imported.alias.stride) {
+		aliased++;
+		*start_off -= reg->cpu_alloc->imported.alias.stride;
+	}
+
+	if (!aliased->alloc) {
+		/* sink page not available for dumping map */
+		return NULL;
+	}
+
+	if ((*start_off + nr_pages) > aliased->length) {
+		/* not fully backed by physical pages */
+		return NULL;
+	}
+
+	return aliased;
+}
+
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma,
+			struct vm_fault *vmf)
+{
+#else
+static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+#endif
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t map_start_pgoff;
+	pgoff_t fault_pgoff;
+	size_t i;
+	pgoff_t addr;
+	size_t nents;
+	struct tagged_addr *pages;
+	vm_fault_t ret = VM_FAULT_SIGBUS;
+	struct memory_group_manager_device *mgm_dev;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	map_start_pgoff = vma->vm_pgoff - map->region->start_pfn;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) {
+		struct kbase_aliased *aliased =
+		      get_aliased_alloc(vma, map->region, &map_start_pgoff, 1);
+
+		if (!aliased)
+			goto exit;
+
+		nents = aliased->length;
+		pages = aliased->alloc->pages + aliased->offset;
+	} else  {
+		nents = map->alloc->nents;
+		pages = map->alloc->pages;
+	}
+
+	fault_pgoff = map_start_pgoff + (vmf->pgoff - vma->vm_pgoff);
+
+	if (fault_pgoff >= nents)
+		goto exit;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto exit;
+
+	/* We are inserting all valid pages from the start of CPU mapping and
+	 * not from the fault location (the mmap handler was previously doing
+	 * the same).
+	 */
+	i = map_start_pgoff;
+	addr = (pgoff_t)(vma->vm_start >> PAGE_SHIFT);
+	mgm_dev = map->kctx->kbdev->mgm_dev;
+	while (i < nents && (addr < vma->vm_end >> PAGE_SHIFT)) {
+
+		ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
+			map->alloc->group_id, vma, addr << PAGE_SHIFT,
+			PFN_DOWN(as_phys_addr_t(pages[i])), vma->vm_page_prot);
+
+		if (ret != VM_FAULT_NOPAGE)
+			goto exit;
+
+		i++; addr++;
+	}
+
+exit:
+	kbase_gpu_vm_unlock(map->kctx);
+	return ret;
+}
+
+const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		struct vm_area_struct *vma,
+		void *kaddr,
+		size_t nr_pages,
+		unsigned long aligned_offset,
+		int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	int err = 0;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) {
+		pgoff_t rel_pgoff = vma->vm_pgoff - reg->start_pfn +
+					(aligned_offset >> PAGE_SHIFT);
+		struct kbase_aliased *aliased =
+			get_aliased_alloc(vma, reg, &rel_pgoff, nr_pages);
+
+		if (!aliased) {
+			err = -EINVAL;
+			kfree(map);
+			goto out;
+		}
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		vma->vm_flags |= VM_PFNMAP;
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->region = kbase_va_region_alloc_get(kctx, reg);
+	map->free_on_close = free_on_close;
+	map->kctx = kctx;
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+#ifdef CONFIG_MALI_VECTOR_DUMP
+static void kbase_free_unused_jit_allocations(struct kbase_context *kctx)
+{
+	/* Free all cached/unused JIT allocations as their contents are not
+	 * really needed for the replay. The GPU writes to them would already
+	 * have been captured through the GWT mechanism.
+	 * This considerably reduces the size of mmu-snapshot-file and it also
+	 * helps avoid segmentation fault issue during vector dumping of
+	 * complex contents when the unused JIT allocations are accessed to
+	 * dump their contents (as they appear in the page tables snapshot)
+	 * but they got freed by the shrinker under low memory scenarios
+	 * (which do occur with complex contents).
+	 */
+	while (kbase_jit_evict(kctx))
+		;
+}
+#endif
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
+			struct vm_area_struct *vma,
+			struct kbase_va_region **const reg,
+			void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+#ifdef CONFIG_MALI_VECTOR_DUMP
+	kbase_free_unused_jit_allocations(kctx);
+#endif
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages,
+			KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW,
+		BASE_MEM_GROUP_DEFAULT);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+static int kbasep_reg_mmap(struct kbase_context *kctx,
+			   struct vm_area_struct *vma,
+			   struct kbase_va_region **regm,
+			   size_t *nr_pages, size_t *aligned_offset)
+
+{
+	int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+	struct kbase_va_region *reg;
+	int err = 0;
+
+	*aligned_offset = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n");
+
+	/* SAME_VA stuff, fetch the right region */
+	reg = kctx->pending_regions[cookie];
+	if (!reg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) {
+		/* incorrect mmap size */
+		/* leave the cookie for a potential later
+		 * mapping, or to be reclaimed later when the
+		 * context is freed */
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) ||
+	    (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) {
+		/* VM flags inconsistent with region flags */
+		err = -EPERM;
+		dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n",
+							__FILE__, __LINE__);
+		goto out;
+	}
+
+	/* adjust down nr_pages to what we have physically */
+	*nr_pages = kbase_reg_current_backed_size(reg);
+
+	if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
+						reg->nr_pages, 1) != 0) {
+		dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
+		/* Unable to map in GPU space. */
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+	/* no need for the cookie anymore */
+	kctx->pending_regions[cookie] = NULL;
+	kctx->cookies |= (1UL << cookie);
+
+	/*
+	 * Overwrite the offset with the region start_pfn, so we effectively
+	 * map from offset 0 in the region. However subtract the aligned
+	 * offset so that when user space trims the mapping the beginning of
+	 * the trimmed VMA has the correct vm_pgoff;
+	 */
+	vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT);
+out:
+	*regm = reg;
+	dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n");
+
+	return err;
+}
+
+int kbase_context_mmap(struct kbase_context *const kctx,
+	struct vm_area_struct *const vma)
+{
+	struct kbase_va_region *reg = NULL;
+	void *kaddr = NULL;
+	size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+
+	if (!(vma->vm_flags & VM_READ))
+		vma->vm_flags &= ~VM_MAYREAD;
+	if (!(vma->vm_flags & VM_WRITE))
+		vma->vm_flags &= ~VM_MAYWRITE;
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		err = kbasep_reg_mmap(kctx, vma, &reg, &nr_pages,
+							&aligned_offset);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+					(u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (!kbase_is_region_invalid_or_free(reg)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages -
+					(vma->vm_pgoff - reg->start_pfn))) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			if ((vma->vm_flags & VM_READ &&
+					!(reg->flags & KBASE_REG_CPU_RD)) ||
+					(vma->vm_flags & VM_WRITE &&
+					!(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (KBASE_MEM_TYPE_IMPORTED_UMM ==
+							reg->cpu_alloc->type) {
+				if (0 != (vma->vm_pgoff - reg->start_pfn)) {
+					err = -EINVAL;
+					dev_warn(dev, "%s:%d attempt to do a partial map in a dma_buf: non-zero offset to dma_buf mapping!\n",
+						__FILE__, __LINE__);
+					goto out_unlock;
+				}
+				err = dma_buf_mmap(
+					reg->cpu_alloc->imported.umm.dma_buf,
+					vma, vma->vm_pgoff - reg->start_pfn);
+				goto out_unlock;
+			}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+				/* initial params check for aliased dumping map */
+				if (nr_pages > reg->gpu_alloc->imported.alias.stride ||
+					!reg->gpu_alloc->imported.alias.stride ||
+					!nr_pages) {
+					err = -EINVAL;
+					dev_warn(dev, "mmap aliased: invalid params!\n");
+					goto out_unlock;
+				}
+			}
+			else if (reg->cpu_alloc->nents <
+					(vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				/* limit what we map to the amount currently backed */
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+		} else {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+	} /* default */
+	} /* switch */
+
+	err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset,
+			free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_context_mmap);
+
+void kbase_sync_mem_regions(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map, enum kbase_sync_type dest)
+{
+	size_t i;
+	off_t const offset = map->offset_in_page;
+	size_t const page_count = PFN_UP(offset + map->size);
+
+	/* Sync first page */
+	size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size);
+	struct tagged_addr cpu_pa = map->cpu_pages[0];
+	struct tagged_addr gpu_pa = map->gpu_pages[0];
+
+	kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, dest);
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		cpu_pa = map->cpu_pages[i];
+		gpu_pa = map->gpu_pages[i];
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, dest);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1) {
+		cpu_pa = map->cpu_pages[page_count - 1];
+		gpu_pa = map->gpu_pages[page_count - 1];
+		sz = ((offset + map->size - 1) & ~PAGE_MASK) + 1;
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, dest);
+	}
+}
+
+static int kbase_vmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 offset_bytes, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	unsigned long page_index;
+	unsigned int offset_in_page = offset_bytes & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset_in_page + size);
+	struct tagged_addr *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+
+	if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc)
+		return -EINVAL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return -EINVAL;
+
+	page_index = offset_bytes >> PAGE_SHIFT;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		return -EINVAL;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		return -ENOMEM;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		return -EINVAL;
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		return -ENOMEM;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = as_page(page_array[page_index + i]);
+
+	/* Note: enforcing a RO prot_request onto prot is not done, since:
+	 * - CPU-arch-specific integration required
+	 * - kbase_vmap() requires no access checks to be made/enforced */
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		return -ENOMEM;
+
+	map->offset_in_page = offset_in_page;
+	map->cpu_alloc = reg->cpu_alloc;
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = reg->gpu_alloc;
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page);
+	map->size = size;
+	map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) &&
+		!kbase_mem_is_imported(map->gpu_alloc->type);
+
+	if (map->sync_needed)
+		kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
+
+	return 0;
+}
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	void *addr = NULL;
+	u64 offset_bytes;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	int err;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			gpu_addr);
+	if (kbase_is_region_invalid_or_free(reg))
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR}
+	 */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
+	offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
+	cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map);
+	if (err < 0)
+		goto fail_vmap_phy_pages;
+
+	addr = map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return addr;
+
+fail_vmap_phy_pages:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(cpu_alloc);
+	kbase_mem_phy_alloc_put(gpu_alloc);
+
+	return NULL;
+}
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	/* 0 is specified for prot_request to indicate no access checks should
+	 * be made.
+	 *
+	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
+	 * CPU-RO mapping is not enforced to allow this to work */
+	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	vunmap(addr);
+
+	if (map->sync_needed)
+		kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
+
+	map->offset_in_page = 0;
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->sync_needed = false;
+}
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	kbase_vunmap_phy_pages(kctx, map);
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100755
index 0000000..e34972f
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,443 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+/**
+ * kbase_mem_alloc - Create a new allocation for GPU
+ *
+ * @kctx:         The kernel context
+ * @va_pages:     The number of pages of virtual address space to reserve
+ * @commit_pages: The number of physical pages to allocate upfront
+ * @extent:       The number of extra pages to allocate on each GPU fault which
+ *                grows the region.
+ * @flags:        bitmask of BASE_MEM_* flags to convey special requirements &
+ *                properties for the new allocation.
+ * @gpu_va:       Start address of the memory region which was allocated from GPU
+ *                virtual address space.
+ *
+ * Return: 0 on success or error code
+ */
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va);
+
+/**
+ * kbase_mem_query - Query properties of a GPU memory region
+ *
+ * @kctx:     The kernel context
+ * @gpu_addr: A GPU address contained within the memory region
+ * @query:    The type of query, from KBASE_MEM_QUERY_* flags, which could be
+ *            regarding the amount of backing physical memory allocated so far
+ *            for the region or the size of the region or the flags associated
+ *            with the region.
+ * @out:      Pointer to the location to store the result of query.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query,
+		u64 *const out);
+
+/**
+ * kbase_mem_import - Import the external memory for use by the GPU
+ *
+ * @kctx:     The kernel context
+ * @type:     Type of external memory
+ * @phandle:  Handle to the external memory interpreted as per the type.
+ * @padding:  Amount of extra VA pages to append to the imported buffer
+ * @gpu_va:   GPU address assigned to the imported external memory
+ * @va_pages: Size of the memory region reserved from the GPU address space
+ * @flags:    bitmask of BASE_MEM_* flags to convey special requirements &
+ *            properties for the new allocation representing the external
+ *            memory.
+ * Return: 0 on success or error code
+ */
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+
+/**
+ * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more
+ *                   memory regions
+ *
+ * @kctx:      The kernel context
+ * @flags:     bitmask of BASE_MEM_* flags.
+ * @stride:    Bytes between start of each memory region
+ * @nents:     The number of regions to pack together into the alias
+ * @ai:        Pointer to the struct containing the memory aliasing info
+ * @num_pages: Number of pages the alias will cover
+ *
+ * Return: 0 on failure or otherwise the GPU VA for the alias
+ */
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+
+/**
+ * kbase_mem_flags_change - Change the flags for a memory region
+ *
+ * @kctx:     The kernel context
+ * @gpu_addr: A GPU address contained within the memory region to modify.
+ * @flags:    The new flags to set
+ * @mask:     Mask of the flags, from BASE_MEM_*, to modify.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+
+/**
+ * kbase_mem_commit - Change the physical backing size of a region
+ *
+ * @kctx: The kernel context
+ * @gpu_addr: Handle to the memory region
+ * @new_pages: Number of physical pages to back the region with
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages);
+
+/**
+ * kbase_context_mmap - Memory map method, gets invoked when mmap system call is
+ *                      issued on device file /dev/malixx.
+ * @kctx: The kernel context
+ * @vma:  Pointer to the struct containing the info where the GPU allocation
+ *        will be mapped in virtual address space of CPU.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_context_mmap(struct kbase_context *kctx, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	off_t offset_in_page;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	struct tagged_addr *cpu_pages;
+	struct tagged_addr *gpu_pages;
+	void *addr;
+	size_t size;
+	bool sync_needed;
+};
+
+
+/**
+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the
+ * requested access permissions are supported
+ * @kctx:         Context the VA range belongs to
+ * @gpu_addr:     Start address of VA range
+ * @size:         Size of VA range
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map:          Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check
+ * whether the region should allow the intended access, and return an error if
+ * disallowed. This is essential for security of imported memory, particularly
+ * a user buf from SHM mapped into the process as RO. In that case, write
+ * access must be checked if the intention is for kernel to write to the
+ * memory.
+ *
+ * The checks are also there to help catch access errors on memory where
+ * security is not a concern: imported memory that is always RW, and memory
+ * that was allocated and owned by the process attached to @kctx. In this case,
+ * it helps to identify memory that was was mapped with the wrong access type.
+ *
+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
+ * where either the security of memory is solely dependent on those flags, or
+ * when userspace code was expecting only the GPU to access the memory (e.g. HW
+ * workarounds).
+ *
+ * All cache maintenance operations shall be ignored if the
+ * memory region has been imported.
+ *
+ */
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vmap - Map a GPU VA range into the kernel safely
+ * @kctx:     Context the VA range belongs to
+ * @gpu_addr: Start address of VA range
+ * @size:     Size of VA range
+ * @map:      Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no
+ * checks to ensure the security of e.g. imported user bufs from RO SHM.
+ *
+ * Note: All cache maintenance operations shall be ignored if the memory region
+ * has been imported.
+ */
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vunmap - Unmap a GPU VA range from the kernel
+ * @kctx: Context the VA range belongs to
+ * @map:  Structure describing the mapping from the corresponding kbase_vmap()
+ *        call
+ *
+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained
+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * The reference taken on pages during kbase_vmap() is released.
+ *
+ * Note: All cache maintenance operations shall be ignored if the memory region
+ * has been imported.
+ */
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+extern const struct vm_operations_struct kbase_vm_ops;
+
+/**
+ * kbase_sync_mem_regions - Perform the cache maintenance for the kernel mode
+ *                          CPU mapping.
+ * @kctx: Context the CPU mapping belongs to.
+ * @map:  Structure describing the CPU mapping, setup previously by the
+ *        kbase_vmap() call.
+ * @dest: Indicates the type of maintenance required (i.e. flush or invalidate)
+ *
+ * Note: The caller shall ensure that CPU mapping is not revoked & remains
+ * active whilst the maintenance is in progress.
+ */
+void kbase_sync_mem_regions(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map, enum kbase_sync_type dest);
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ */
+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a
+ *                                physical allocation
+ * @kctx:  The kernel base context associated with the mapping
+ * @alloc: Pointer to the allocation to terminate
+ *
+ * This function will unmap the kernel mapping, and free any structures used to
+ * track it.
+ */
+void kbase_phy_alloc_mapping_term(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent
+ *                               mapping of a physical allocation
+ * @kctx:             The kernel base context @gpu_addr will be looked up in
+ * @gpu_addr:         The gpu address to lookup for the kernel-side CPU mapping
+ * @out_kern_mapping: Pointer to storage for a struct kbase_vmap_struct pointer
+ *                    which will be used for a call to
+ *                    kbase_phy_alloc_mapping_put()
+ *
+ * Return: Pointer to a kernel-side accessible location that directly
+ *         corresponds to @gpu_addr, or NULL on failure
+ *
+ * Looks up @gpu_addr to retrieve the CPU pointer that can be used to access
+ * that location kernel-side. Only certain kinds of memory have a permanent
+ * kernel mapping, refer to the internal functions
+ * kbase_reg_needs_kernel_mapping() and kbase_phy_alloc_mapping_init() for more
+ * information.
+ *
+ * If this function succeeds, a CPU access to the returned pointer will access
+ * the actual location represented by @gpu_addr. That is, the return value does
+ * not require any offset added to it to access the location specified in
+ * @gpu_addr
+ *
+ * The client must take care to either apply any necessary sync operations when
+ * accessing the data, or ensure that the enclosing region was coherent with
+ * the GPU, or uncached in the CPU.
+ *
+ * The refcount on the physical allocations backing the region are taken, so
+ * that they do not disappear whilst the client is accessing it. Once the
+ * client has finished accessing the memory, it must be released with a call to
+ * kbase_phy_alloc_mapping_put()
+ *
+ * Whilst this is expected to execute quickly (the mapping was already setup
+ * when the physical allocation was created), the call is not IRQ-safe due to
+ * the region lookup involved.
+ *
+ * An error code may indicate that:
+ * - a userside process has freed the allocation, and so @gpu_addr is no longer
+ *   valid
+ * - the region containing @gpu_addr does not support a permanent kernel mapping
+ */
+void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr,
+		struct kbase_vmap_struct **out_kern_mapping);
+
+/**
+ * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a
+ *                               physical allocation
+ * @kctx:         The kernel base context associated with the mapping
+ * @kern_mapping: Pointer to a struct kbase_phy_alloc_mapping pointer obtained
+ *                from a call to kbase_phy_alloc_mapping_get()
+ *
+ * Releases the reference to the allocations backing @kern_mapping that was
+ * obtained through a call to kbase_phy_alloc_mapping_get(). This must be used
+ * when the client no longer needs to access the kernel-side CPU pointer.
+ *
+ * If this was the last reference on the underlying physical allocations, they
+ * will go through the normal allocation free steps, which also includes an
+ * unmap of the permanent kernel mapping for those allocations.
+ *
+ * Due to these operations, the function is not IRQ-safe. However it is
+ * expected to execute quickly in the normal case, i.e. when the region holding
+ * the physical allocation is still present.
+ */
+void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
+		struct kbase_vmap_struct *kern_mapping);
+
+/**
+ * kbase_get_cache_line_alignment - Return cache line alignment
+ *
+ * Helper function to return the maximum cache line alignment considering
+ * both CPU and GPU cache sizes.
+ *
+ * Return: CPU and GPU cache line alignment, in bytes.
+ *
+ * @kbdev: Device pointer.
+ */
+u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100755
index 0000000..7011603
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,166 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+struct tagged_addr { phys_addr_t tagged_addr; };
+
+#define HUGE_PAGE    (1u << 0)
+#define HUGE_HEAD    (1u << 1)
+#define FROM_PARTIAL (1u << 2)
+
+/*
+ * Note: if macro for converting physical address to page is not defined
+ * in the kernel itself, it is defined hereby. This is to avoid build errors
+ * which are reported during builds for some architectures.
+ */
+#ifndef phys_to_page
+#define phys_to_page(phys)	(pfn_to_page((phys) >> PAGE_SHIFT))
+#endif
+
+/**
+ * as_phys_addr_t - Retrieve the physical address from tagged address by
+ *                  masking the lower order 12 bits.
+ * @t: tagged address to be translated.
+ *
+ * Return: physical address corresponding to tagged address.
+ */
+static inline phys_addr_t as_phys_addr_t(struct tagged_addr t)
+{
+	return t.tagged_addr & PAGE_MASK;
+}
+
+/**
+ * as_page - Retrieve the struct page from a tagged address
+ * @t: tagged address to be translated.
+ *
+ * Return: pointer to struct page corresponding to tagged address.
+ */
+static inline struct page *as_page(struct tagged_addr t)
+{
+	return phys_to_page(as_phys_addr_t(t));
+}
+
+/**
+ * as_tagged - Convert the physical address to tagged address type though
+ *             there is no tag info present, the lower order 12 bits will be 0
+ * @phys: physical address to be converted to tagged type
+ *
+ * This is used for 4KB physical pages allocated by the Driver or imported pages
+ * and is needed as physical pages tracking object stores the reference for
+ * physical pages using tagged address type in lieu of the type generally used
+ * for physical addresses.
+ *
+ * Return: address of tagged address type.
+ */
+static inline struct tagged_addr as_tagged(phys_addr_t phys)
+{
+	struct tagged_addr t;
+
+	t.tagged_addr = phys & PAGE_MASK;
+	return t;
+}
+
+/**
+ * as_tagged_tag - Form the tagged address by storing the tag or metadata in the
+ *                 lower order 12 bits of physial address
+ * @phys: physical address to be converted to tagged address
+ * @tag:  tag to be stored along with the physical address.
+ *
+ * The tag info is used while freeing up the pages
+ *
+ * Return: tagged address storing physical address & tag.
+ */
+static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag)
+{
+	struct tagged_addr t;
+
+	t.tagged_addr = (phys & PAGE_MASK) | (tag & ~PAGE_MASK);
+	return t;
+}
+
+/**
+ * is_huge - Check if the physical page is one of the 512 4KB pages of the
+ *           large page which was not split to be used partially
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page belongs to large page, or false
+ */
+static inline bool is_huge(struct tagged_addr t)
+{
+	return t.tagged_addr & HUGE_PAGE;
+}
+
+/**
+ * is_huge_head - Check if the physical page is the first 4KB page of the
+ *                512 4KB pages within a large page which was not split
+ *                to be used partially
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page is the first page of a large page, or false
+ */
+static inline bool is_huge_head(struct tagged_addr t)
+{
+	int mask = HUGE_HEAD | HUGE_PAGE;
+
+	return mask == (t.tagged_addr & mask);
+}
+
+/**
+ * is_partial - Check if the physical page is one of the 512 pages of the
+ *              large page which was split in 4KB pages to be used
+ *              partially for allocations >= 2 MB in size.
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page was taken from large page used partially, or false
+ */
+static inline bool is_partial(struct tagged_addr t)
+{
+	return t.tagged_addr & FROM_PARTIAL;
+}
+
+#endif /* _KBASE_LOWLEVEL_H */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100755
index 0000000..0723e32
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,856 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			(PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	int i;
+
+	for (i = 0; i < (1U << pool->order); i++)
+		clear_highpage(p+i);
+
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct kbase_device *const kbdev = pool->kbdev;
+	struct device *const dev = kbdev->dev;
+	dma_addr_t dma_addr;
+	int i;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	/* don't warn on higher order failures */
+	if (pool->order)
+		gfp |= __GFP_NOWARN;
+
+	p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev,
+		pool->group_id, gfp, pool->order);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order),
+				DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(dev, dma_addr)) {
+		kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev,
+			pool->group_id, p, pool->order);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+	for (i = 0; i < (1u << pool->order); i++)
+		kbase_set_dma_addr(p+i, dma_addr + PAGE_SIZE * i);
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct kbase_device *const kbdev = pool->kbdev;
+	struct device *const dev = kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+	int i;
+
+	dma_unmap_page(dev, dma_addr, (PAGE_SIZE << pool->order),
+		       DMA_BIDIRECTIONAL);
+	for (i = 0; i < (1u << pool->order); i++)
+		kbase_clear_dma_addr(p+i);
+
+	kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev,
+		pool->group_id, p, pool->order);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->dont_reclaim = true;
+	for (i = 0; i < nr_to_grow; i++) {
+		if (pool->dying) {
+			pool->dont_reclaim = false;
+			kbase_mem_pool_shrink_locked(pool, nr_to_grow);
+			kbase_mem_pool_unlock(pool);
+
+			return -ENOMEM;
+		}
+		kbase_mem_pool_unlock(pool);
+
+		p = kbase_mem_alloc_page(pool);
+		if (!p) {
+			kbase_mem_pool_lock(pool);
+			pool->dont_reclaim = false;
+			kbase_mem_pool_unlock(pool);
+
+			return -ENOMEM;
+		}
+
+		kbase_mem_pool_lock(pool);
+		kbase_mem_pool_add_locked(pool, p);
+	}
+	pool->dont_reclaim = false;
+	kbase_mem_pool_unlock(pool);
+
+	return 0;
+}
+
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+	int err = 0;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size > pool->max_size)
+		new_size = pool->max_size;
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		err = kbase_mem_pool_grow(pool, new_size - cur_size);
+
+	if (err) {
+		size_t grown_size = kbase_mem_pool_size(pool);
+
+		dev_warn(pool->kbdev->dev,
+			 "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n",
+			 (new_size - cur_size), (grown_size - cur_size));
+	}
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	size_t pool_size;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	kbase_mem_pool_lock(pool);
+	if (pool->dont_reclaim && !pool->dying) {
+		kbase_mem_pool_unlock(pool);
+		return 0;
+	}
+	pool_size = kbase_mem_pool_size(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return pool_size;
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	kbase_mem_pool_lock(pool);
+	if (pool->dont_reclaim && !pool->dying) {
+		kbase_mem_pool_unlock(pool);
+		return 0;
+	}
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan);
+
+	kbase_mem_pool_unlock(pool);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		const struct kbase_mem_pool_config *config,
+		unsigned int order,
+		int group_id,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	if (WARN_ON(group_id < 0) ||
+		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
+		return -EINVAL;
+	}
+
+	pool->cur_size = 0;
+	pool->max_size = kbase_mem_pool_config_get_max_size(config);
+	pool->order = order;
+	pool->group_id = group_id;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+	pool->dying = false;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool)
+{
+	kbase_mem_pool_lock(pool);
+	pool->dying = true;
+	kbase_mem_pool_unlock(pool);
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p, *tmp;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	LIST_HEAD(free_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		list_add(&p->lru, &free_list);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		list_for_each_entry(p, &spill_list, lru)
+			kbase_mem_pool_zero_page(pool, p);
+
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	list_for_each_entry_safe(p, tmp, &free_list, lru) {
+		list_del_init(&p->lru);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	do {
+		pool_dbg(pool, "alloc()\n");
+		p = kbase_mem_pool_remove(pool);
+
+		if (p)
+			return p;
+
+		pool = pool->next_pool;
+	} while (pool);
+
+	return NULL;
+}
+
+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	pool_dbg(pool, "alloc_locked()\n");
+	p = kbase_mem_pool_remove_locked(pool);
+
+	if (p)
+		return p;
+
+	return NULL;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	pool_dbg(pool, "free_locked()\n");
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add_locked(pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
+		struct tagged_addr *pages, bool partial_allowed)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i = 0;
+	int err = -ENOMEM;
+	size_t nr_pages_internal;
+
+	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
+
+	if (nr_pages_internal * (1u << pool->order) != nr_4k_pages)
+		return -EINVAL;
+
+	pool_dbg(pool, "alloc_pages(4k=%zu):\n", nr_4k_pages);
+	pool_dbg(pool, "alloc_pages(internal=%zu):\n", nr_pages_internal);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
+	while (nr_from_pool--) {
+		int j;
+		p = kbase_mem_pool_remove_locked(pool);
+		if (pool->order) {
+			pages[i++] = as_tagged_tag(page_to_phys(p),
+						   HUGE_HEAD | HUGE_PAGE);
+			for (j = 1; j < (1u << pool->order); j++)
+				pages[i++] = as_tagged_tag(page_to_phys(p) +
+							   PAGE_SIZE * j,
+							   HUGE_PAGE);
+		} else {
+			pages[i++] = as_tagged(page_to_phys(p));
+		}
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_4k_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_4k_pages - i, pages + i, partial_allowed);
+
+		if (err < 0)
+			goto err_rollback;
+
+		i += err;
+	} else {
+		/* Get any remaining pages from kernel */
+		while (i != nr_4k_pages) {
+			p = kbase_mem_alloc_page(pool);
+			if (!p) {
+				if (partial_allowed)
+					goto done;
+				else
+					goto err_rollback;
+			}
+
+			if (pool->order) {
+				int j;
+
+				pages[i++] = as_tagged_tag(page_to_phys(p),
+							   HUGE_PAGE |
+							   HUGE_HEAD);
+				for (j = 1; j < (1u << pool->order); j++) {
+					phys_addr_t phys;
+
+					phys = page_to_phys(p) + PAGE_SIZE * j;
+					pages[i++] = as_tagged_tag(phys,
+								   HUGE_PAGE);
+				}
+			} else {
+				pages[i++] = as_tagged(page_to_phys(p));
+			}
+		}
+	}
+
+done:
+	pool_dbg(pool, "alloc_pages(%zu) done\n", i);
+	return i;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_4k_pages, struct tagged_addr *pages)
+{
+	struct page *p;
+	size_t i;
+	size_t nr_pages_internal;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
+
+	if (nr_pages_internal * (1u << pool->order) != nr_4k_pages)
+		return -EINVAL;
+
+	pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages);
+	pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n",
+			nr_pages_internal);
+
+	if (kbase_mem_pool_size(pool) < nr_pages_internal) {
+		pool_dbg(pool, "Failed alloc\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_pages_internal; i++) {
+		int j;
+
+		p = kbase_mem_pool_remove_locked(pool);
+		if (pool->order) {
+			*pages++ = as_tagged_tag(page_to_phys(p),
+						   HUGE_HEAD | HUGE_PAGE);
+			for (j = 1; j < (1u << pool->order); j++) {
+				*pages++ = as_tagged_tag(page_to_phys(p) +
+							   PAGE_SIZE * j,
+							   HUGE_PAGE);
+			}
+		} else {
+			*pages++ = as_tagged(page_to_phys(p));
+		}
+	}
+
+	return nr_4k_pages;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+				     size_t nr_pages, struct tagged_addr *pages,
+				     bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
+			p = as_page(pages[i]);
+			if (zero)
+				kbase_mem_pool_zero_page(pool, p);
+			else if (sync)
+				kbase_mem_pool_sync_page(pool, p);
+
+			list_add(&p->lru, &new_page_list);
+			nr_to_pool++;
+		}
+		pages[i] = as_tagged(0);
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages,
+		bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
+			p = as_page(pages[i]);
+			if (zero)
+				kbase_mem_pool_zero_page(pool, p);
+			else if (sync)
+				kbase_mem_pool_sync_page(pool, p);
+
+			list_add(&p->lru, &new_page_list);
+			nr_to_pool++;
+		}
+		pages[i] = as_tagged(0);
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge(pages[i]) && !is_huge_head(pages[i])) {
+			pages[i] = as_tagged(0);
+			continue;
+		}
+
+		p = as_page(pages[i]);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = as_tagged(0);
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
+
+
+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages, bool dirty,
+		bool reclaimed)
+{
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false,
+				dirty);
+
+		i += nr_to_pool;
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge(pages[i]) && !is_huge_head(pages[i])) {
+			pages[i] = as_tagged(0);
+			continue;
+		}
+
+		p = as_page(pages[i]);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = as_tagged(0);
+	}
+
+	pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100755
index 0000000..9896202
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,187 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "mali_kbase_mem_pool_debugfs.h"
+#include "mali_kbase_debugfs_helper.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbase_mem_pool_debugfs_trim(void *const array, size_t const index,
+	size_t const value)
+{
+	struct kbase_mem_pool *const mem_pools = array;
+
+	if (WARN_ON(!mem_pools) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return;
+
+	kbase_mem_pool_trim(&mem_pools[index], value);
+}
+
+void kbase_mem_pool_debugfs_set_max_size(void *const array,
+	size_t const index, size_t const value)
+{
+	struct kbase_mem_pool *const mem_pools = array;
+
+	if (WARN_ON(!mem_pools) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return;
+
+	kbase_mem_pool_set_max_size(&mem_pools[index], value);
+}
+
+size_t kbase_mem_pool_debugfs_size(void *const array, size_t const index)
+{
+	struct kbase_mem_pool *const mem_pools = array;
+
+	if (WARN_ON(!mem_pools) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return 0;
+
+	return kbase_mem_pool_size(&mem_pools[index]);
+}
+
+size_t kbase_mem_pool_debugfs_max_size(void *const array, size_t const index)
+{
+	struct kbase_mem_pool *const mem_pools = array;
+
+	if (WARN_ON(!mem_pools) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return 0;
+
+	return kbase_mem_pool_max_size(&mem_pools[index]);
+}
+
+void kbase_mem_pool_config_debugfs_set_max_size(void *const array,
+	size_t const index, size_t const value)
+{
+	struct kbase_mem_pool_config *const configs = array;
+
+	if (WARN_ON(!configs) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return;
+
+	kbase_mem_pool_config_set_max_size(&configs[index], value);
+}
+
+size_t kbase_mem_pool_config_debugfs_max_size(void *const array,
+	size_t const index)
+{
+	struct kbase_mem_pool_config *const configs = array;
+
+	if (WARN_ON(!configs) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return 0;
+
+	return kbase_mem_pool_config_get_max_size(&configs[index]);
+}
+
+static int kbase_mem_pool_debugfs_size_show(struct seq_file *sfile, void *data)
+{
+	CSTD_UNUSED(data);
+	return kbase_debugfs_helper_seq_read(sfile,
+		MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_size);
+}
+
+static ssize_t kbase_mem_pool_debugfs_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	int err;
+
+	CSTD_UNUSED(ppos);
+	err = kbase_debugfs_helper_seq_write(file, ubuf, count,
+		MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_trim);
+	return err ? err : count;
+}
+
+static int kbase_mem_pool_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_mem_pool_debugfs_size_show,
+		in->i_private);
+}
+
+static const struct file_operations kbase_mem_pool_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_mem_pool_debugfs_open,
+	.read = seq_read,
+	.write = kbase_mem_pool_debugfs_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int kbase_mem_pool_debugfs_max_size_show(struct seq_file *sfile,
+	void *data)
+{
+	CSTD_UNUSED(data);
+	return kbase_debugfs_helper_seq_read(sfile,
+		MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_max_size);
+}
+
+static ssize_t kbase_mem_pool_debugfs_max_size_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	int err;
+
+	CSTD_UNUSED(ppos);
+	err = kbase_debugfs_helper_seq_write(file, ubuf, count,
+		MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_set_max_size);
+	return err ? err : count;
+}
+
+static int kbase_mem_pool_debugfs_max_size_open(struct inode *in,
+	struct file *file)
+{
+	return single_open(file, kbase_mem_pool_debugfs_max_size_show,
+		in->i_private);
+}
+
+static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_mem_pool_debugfs_max_size_open,
+	.read = seq_read,
+	.write = kbase_mem_pool_debugfs_max_size_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_context *kctx)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+		&kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+		&kctx->mem_pools.small, &kbase_mem_pool_debugfs_max_size_fops);
+
+	debugfs_create_file("lp_mem_pool_size", S_IRUGO | S_IWUSR, parent,
+		&kctx->mem_pools.large, &kbase_mem_pool_debugfs_fops);
+
+	debugfs_create_file("lp_mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+		&kctx->mem_pools.large, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100755
index 0000000..2932945
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H_
+#define _KBASE_MEM_POOL_DEBUGFS_H_
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool
+ * @parent:  Parent debugfs dentry
+ * @kctx:    The kbase context
+ *
+ * Adds four debugfs files under @parent:
+ * - mem_pool_size: get/set the current sizes of @kctx: mem_pools
+ * - mem_pool_max_size: get/set the max sizes of @kctx: mem_pools
+ * - lp_mem_pool_size: get/set the current sizes of @kctx: lp_mem_pool
+ * - lp_mem_pool_max_size: get/set the max sizes of @kctx:lp_mem_pool
+ */
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_context *kctx);
+
+/**
+ * kbase_mem_pool_debugfs_trim - Grow or shrink a memory pool to a new size
+ *
+ * @array: Address of the first in an array of physical memory pools.
+ * @index: A memory group ID to be used as an index into the array of memory
+ *         pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @value: New number of pages in the pool.
+ *
+ * If @value > current size, fill the pool with new pages from the kernel, but
+ * not above the max_size for the pool.
+ * If @value < current size, shrink the pool by freeing pages to the kernel.
+ */
+void kbase_mem_pool_debugfs_trim(void *array, size_t index, size_t value);
+
+/**
+ * kbase_mem_pool_debugfs_set_max_size - Set maximum number of free pages in
+ *                                       memory pool
+ *
+ * @array: Address of the first in an array of physical memory pools.
+ * @index: A memory group ID to be used as an index into the array of memory
+ *         pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @value: Maximum number of free pages the pool can hold.
+ *
+ * If the maximum size is reduced, the pool will be shrunk to adhere to the
+ * new limit. For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_debugfs_set_max_size(void *array, size_t index,
+	size_t value);
+
+/**
+ * kbase_mem_pool_debugfs_size - Get number of free pages in a memory pool
+ *
+ * @array: Address of the first in an array of physical memory pools.
+ * @index: A memory group ID to be used as an index into the array of memory
+ *         pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+size_t kbase_mem_pool_debugfs_size(void *array, size_t index);
+
+/**
+ * kbase_mem_pool_debugfs_max_size - Get maximum number of free pages in a
+ *                                   memory pool
+ *
+ * @array: Address of the first in an array of physical memory pools.
+ * @index: A memory group ID to be used as an index into the array of memory
+ *         pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+size_t kbase_mem_pool_debugfs_max_size(void *array, size_t index);
+
+/**
+ * kbase_mem_pool_config_debugfs_set_max_size - Set maximum number of free pages
+ *                                              in initial configuration of pool
+ *
+ * @array:  Array of initial configurations for a set of physical memory pools.
+ * @index:  A memory group ID to be used as an index into the array.
+ *          Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @value : Maximum number of free pages that a memory pool created from the
+ *          selected configuration can hold.
+ */
+void kbase_mem_pool_config_debugfs_set_max_size(void *array, size_t index,
+	size_t value);
+
+/**
+ * kbase_mem_pool_config_debugfs_max_size - Get maximum number of free pages
+ *                                          from initial configuration of pool
+ *
+ * @array:  Array of initial configurations for a set of physical memory pools.
+ * @index:  A memory group ID to be used as an index into the array.
+ *          Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return: Maximum number of free pages that a memory pool created from the
+ *         selected configuration can hold.
+ */
+size_t kbase_mem_pool_config_debugfs_max_size(void *array, size_t index);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H_ */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c
new file mode 100644
index 0000000..aa25548
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c
@@ -0,0 +1,115 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_pool_group.h>
+
+#include <linux/memory_group_manager.h>
+
+void kbase_mem_pool_group_config_set_max_size(
+	struct kbase_mem_pool_group_config *const configs,
+	size_t const max_size)
+{
+	size_t const large_max_size = max_size >>
+		(KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER -
+		KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER);
+	int gid;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) {
+		kbase_mem_pool_config_set_max_size(&configs->small[gid],
+			max_size);
+
+		kbase_mem_pool_config_set_max_size(&configs->large[gid],
+			large_max_size);
+	}
+}
+
+int kbase_mem_pool_group_init(
+	struct kbase_mem_pool_group *const mem_pools,
+	struct kbase_device *const kbdev,
+	const struct kbase_mem_pool_group_config *const configs,
+	struct kbase_mem_pool_group *next_pools)
+{
+	int gid, err = 0;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) {
+		err = kbase_mem_pool_init(&mem_pools->small[gid],
+			&configs->small[gid],
+			KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+			gid,
+			kbdev,
+			next_pools ? &next_pools->small[gid] : NULL);
+
+		if (!err) {
+			err = kbase_mem_pool_init(&mem_pools->large[gid],
+				&configs->large[gid],
+				KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+				gid,
+				kbdev,
+				next_pools ? &next_pools->large[gid] : NULL);
+			if (err)
+				kbase_mem_pool_term(&mem_pools->small[gid]);
+		}
+
+		/* Break out of the loop early to avoid incrementing the count
+		 * of memory pool pairs successfully initialized.
+		 */
+		if (err)
+			break;
+	}
+
+	if (err) {
+		/* gid gives the number of memory pool pairs successfully
+		 * initialized, which is one greater than the array index of the
+		 * last group.
+		 */
+		while (gid-- > 0) {
+			kbase_mem_pool_term(&mem_pools->small[gid]);
+			kbase_mem_pool_term(&mem_pools->large[gid]);
+		}
+	}
+
+	return err;
+}
+
+void kbase_mem_pool_group_mark_dying(
+	struct kbase_mem_pool_group *const mem_pools)
+{
+	int gid;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) {
+		kbase_mem_pool_mark_dying(&mem_pools->small[gid]);
+		kbase_mem_pool_mark_dying(&mem_pools->large[gid]);
+	}
+}
+
+void kbase_mem_pool_group_term(
+	struct kbase_mem_pool_group *const mem_pools)
+{
+	int gid;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) {
+		kbase_mem_pool_term(&mem_pools->small[gid]);
+		kbase_mem_pool_term(&mem_pools->large[gid]);
+	}
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h
new file mode 100644
index 0000000..0484f59
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_MEM_POOL_GROUP_H_
+#define _KBASE_MEM_POOL_GROUP_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mem_pool_group_config_init - Set the initial configuration for a
+ *                                    set of memory pools
+ *
+ * This function sets the initial configuration for every memory pool so that
+ * the maximum amount of free memory that each pool can hold is identical.
+ * The equivalent number of 2 MiB pages is calculated automatically for the
+ * purpose of configuring the large page pools.
+ *
+ * @configs:  Initial configuration for the set of memory pools
+ * @max_size: Maximum number of free 4 KiB pages each pool can hold
+ */
+void kbase_mem_pool_group_config_set_max_size(
+	struct kbase_mem_pool_group_config *configs, size_t max_size);
+
+/**
+ * kbase_mem_pool_group_init - Initialize a set of memory pools
+ *
+ * Initializes a complete set of physical memory pools. Memory pools are used to
+ * allow efficient reallocation of previously-freed physical pages. A pair of
+ * memory pools is initialized for each physical memory group: one for 4 KiB
+ * pages and one for 2 MiB pages.
+ *
+ * If @next_pools is not NULL then a request to allocate memory from an
+ * empty pool in @mem_pools will attempt to allocate from the equivalent pool
+ * in @next_pools before going to the memory group manager. Similarly
+ * pages can spill over to the equivalent pool in @next_pools when a pool
+ * is full in @mem_pools. Pages are zeroed before they spill over to another
+ * pool, to prevent leaking information between applications.
+ *
+ * @mem_pools:  Set of memory pools to initialize
+ * @kbdev:      Kbase device where memory is used
+ * @configs:    Initial configuration for the set of memory pools
+ * @next_pools: Set of memory pools from which to allocate memory if there
+ *              is no free memory in one of the @mem_pools
+ *
+ * Return: 0 on success, otherwise a negative error code
+ */
+int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools,
+	struct kbase_device *kbdev,
+	const struct kbase_mem_pool_group_config *configs,
+	struct kbase_mem_pool_group *next_pools);
+
+/**
+ * kbase_mem_pool_group_term - Mark a set of memory pools as dying
+ *
+ * Marks a complete set of physical memory pools previously initialized by
+ * @kbase_mem_pool_group_init as dying. This will cause any ongoing allocation
+ * operations (eg growing on page fault) to be terminated.
+ *
+ * @mem_pools: Set of memory pools to mark
+ */
+void kbase_mem_pool_group_mark_dying(struct kbase_mem_pool_group *mem_pools);
+
+/**
+ * kbase_mem_pool_group_term - Terminate a set of memory pools
+ *
+ * Terminates a complete set of physical memory pools previously initialized by
+ * @kbase_mem_pool_group_init.
+ *
+ * @mem_pools: Set of memory pools to terminate
+ */
+void kbase_mem_pool_group_term(struct kbase_mem_pool_group *mem_pools);
+
+#endif /* _KBASE_MEM_POOL_GROUP_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100755
index 0000000..5d38ed2
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+		kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+			err  = -ENOMEM;
+		} else if (!debugfs_create_file("mem_profile", 0444,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kbase_ctx_flag_set(kctx,
+					   KCTX_MEM_PROFILE_INITIALIZED);
+		}
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	} else {
+		kfree(data);
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+		err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100755
index 0000000..1462247
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100755
index 0000000..81e2886
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2018-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \
+	((size_t) (64 + ((80 + (56 * 64)) * 50) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h
new file mode 100644
index 0000000..99475b6
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h
@@ -0,0 +1,120 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_mipe_proto.h"
+
+/**
+ * This header generates MIPE tracepoint declaration BLOB at
+ * compile time.
+ *
+ * Before including this header, the following parameters
+ * must be defined:
+ *
+ * MIPE_HEADER_BLOB_VAR_NAME: the name of the variable
+ * where the result BLOB will be stored.
+ *
+ * MIPE_HEADER_TP_LIST: the list of tracepoints to process.
+ * It should be defined as follows:
+ * #define MIPE_HEADER_TP_LIST \
+ *     TP_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \
+ *     TP_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \
+ *     etc.
+ * Where the first argument is tracepoints name, the second
+ * argument is a short tracepoint description, the third argument
+ * argument types (see MIPE documentation), and the fourth argument
+ * is comma separated argument names.
+ *
+ * MIPE_HEADER_TP_LIST_COUNT: number of entries in MIPE_HEADER_TP_LIST.
+ *
+ * MIPE_HEADER_PKT_CLASS: MIPE packet class.
+ */
+
+#if !defined(MIPE_HEADER_BLOB_VAR_NAME)
+#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!"
+#endif
+
+#if !defined(MIPE_HEADER_TP_LIST)
+#error "MIPE_HEADER_TP_LIST must be defined!"
+#endif
+
+#if !defined(MIPE_HEADER_TP_LIST_COUNT)
+#error "MIPE_HEADER_TP_LIST_COUNT must be defined!"
+#endif
+
+#if !defined(MIPE_HEADER_PKT_CLASS)
+#error "MIPE_HEADER_PKT_CLASS must be defined!"
+#endif
+
+static const struct {
+	u32 _mipe_w0;
+	u32 _mipe_w1;
+	u8  _protocol_version;
+	u8  _pointer_size;
+	u32 _tp_count;
+#define TP_DESC(name, desc, arg_types, arg_names)       \
+	struct {                                        \
+		u32  _name;                             \
+		u32  _size_string_name;                 \
+		char _string_name[sizeof(#name)];       \
+		u32  _size_desc;                        \
+		char _desc[sizeof(desc)];               \
+		u32  _size_arg_types;                   \
+		char _arg_types[sizeof(arg_types)];     \
+		u32  _size_arg_names;                   \
+		char _arg_names[sizeof(arg_names)];     \
+	} __attribute__ ((__packed__)) __ ## name;
+
+	MIPE_HEADER_TP_LIST
+#undef TP_DESC
+
+} __attribute__ ((__packed__)) MIPE_HEADER_BLOB_VAR_NAME = {
+	._mipe_w0 = MIPE_PACKET_HEADER_W0(
+		TL_PACKET_FAMILY_TL,
+		MIPE_HEADER_PKT_CLASS,
+		TL_PACKET_TYPE_HEADER,
+		1),
+	._mipe_w1 = MIPE_PACKET_HEADER_W1(
+		sizeof(MIPE_HEADER_BLOB_VAR_NAME) - PACKET_HEADER_SIZE,
+		0),
+	._protocol_version = SWTRACE_VERSION,
+	._pointer_size = sizeof(void *),
+	._tp_count = MIPE_HEADER_TP_LIST_COUNT,
+#define TP_DESC(name, desc, arg_types, arg_names)       \
+	.__ ## name = {                                 \
+		._name = name,                          \
+		._size_string_name = sizeof(#name),     \
+		._string_name = #name,                  \
+		._size_desc = sizeof(desc),             \
+		._desc = desc,                          \
+		._size_arg_types = sizeof(arg_types),   \
+		._arg_types = arg_types,                \
+		._size_arg_names = sizeof(arg_names),   \
+		._arg_names = arg_names                 \
+	},
+	MIPE_HEADER_TP_LIST
+#undef TP_DESC
+};
+
+#undef MIPE_HEADER_BLOB_VAR_NAME
+#undef MIPE_HEADER_TP_LIST
+#undef MIPE_HEADER_TP_LIST_COUNT
+#undef MIPE_HEADER_PKT_CLASS
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h
new file mode 100644
index 0000000..fb61faa
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h
@@ -0,0 +1,113 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_MIPE_PROTO_H)
+#define _KBASE_MIPE_PROTO_H
+
+#define _BITFIELD_MASK_FIELD(pos, len) \
+	(((1 << len) - 1) << pos)
+
+#define _BITFIELD_SET_FIELD(pos, len, value) \
+	(_BITFIELD_MASK_FIELD(pos, len) & ((value) << pos))
+
+#define BITFIELD_SET(field_name, value) \
+	_BITFIELD_SET_FIELD(field_name ## _POS, field_name ## _LEN, value)
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation.
+ */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation.
+ */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* First word of a MIPE packet */
+#define MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id) \
+	(0                                          \
+	| BITFIELD_SET(PACKET_FAMILY,   pkt_family) \
+	| BITFIELD_SET(PACKET_CLASS,    pkt_class)  \
+	| BITFIELD_SET(PACKET_TYPE,     pkt_type)   \
+	| BITFIELD_SET(PACKET_STREAMID, stream_id))
+
+/* Second word of a MIPE packet */
+#define MIPE_PACKET_HEADER_W1(packet_length, seqbit) \
+	(0                                           \
+	| BITFIELD_SET(PACKET_LENGTH, packet_length) \
+	| BITFIELD_SET(PACKET_SEQBIT, seqbit))
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation.
+ */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation.
+ */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation.
+ */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL = 1,   /* timeline packets */
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation.
+ */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation.
+ */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER = 0,  /* stream's header/directory */
+	TL_PACKET_TYPE_BODY = 1,    /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+#endif /* _KBASE_MIPE_PROTO_H */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100755
index 0000000..a8cd5ac
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,2715 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_tracepoints.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_reset_gpu.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ *
+ * If sync is not set then transactions still in flight when the flush is issued
+ * may use the old page tables and the data they write will not be written out
+ * to memory, this function returns after the flush has been issued but
+ * before all accesses which might effect the flushed region have completed.
+ *
+ * If sync is set then accesses in the flushed region will be drained
+ * before data is flush and invalidated through L1, L2 and into memory,
+ * after which point this function will return.
+ */
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync);
+
+/**
+ * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches.
+ * @kbdev: Device pointer.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ * @as_nr: GPU address space number for which flush + invalidate is required.
+ *
+ * This is used for MMU tables which do not belong to a user space context.
+ */
+static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
+		u64 vpfn, size_t nr, bool sync, int as_nr);
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str,
+		struct kbase_fault *fault);
+
+static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr *phys, size_t nr,
+					unsigned long flags, int group_id);
+
+/**
+ * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
+ *                               a region on a GPU page fault
+ *
+ * @reg:           The region that will be backed with more pages
+ * @fault_rel_pfn: PFN of the fault relative to the start of the region
+ *
+ * This calculates how much to increase the backing of a region by, based on
+ * where a GPU page fault occurred and the flags in the region.
+ *
+ * This can be more than the minimum number of pages that would reach
+ * @fault_rel_pfn, for example to reduce the overall rate of page fault
+ * interrupts on a region, or to ensure that the end address is aligned.
+ *
+ * Return: the number of backed pages to increase by
+ */
+static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
+		struct kbase_va_region *reg, size_t fault_rel_pfn)
+{
+	size_t multiple = reg->extent;
+	size_t reg_current_size = kbase_reg_current_backed_size(reg);
+	size_t minimum_extra = fault_rel_pfn - reg_current_size + 1;
+	size_t remainder;
+
+	if (!multiple) {
+		dev_warn(kbdev->dev,
+				"VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
+				((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
+		return minimum_extra;
+	}
+
+	/* Calculate the remainder to subtract from minimum_extra to make it
+	 * the desired (rounded down) multiple of the extent.
+	 * Depending on reg's flags, the base used for calculating multiples is
+	 * different */
+	if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
+		/* multiple is based from the top of the initial commit, which
+		 * has been allocated in such a way that (start_pfn +
+		 * initial_commit) is already aligned to multiple. Hence the
+		 * pfn for the end of committed memory will also be aligned to
+		 * multiple */
+		size_t initial_commit = reg->initial_commit;
+
+		if (fault_rel_pfn < initial_commit) {
+			/* this case is just to catch in case it's been
+			 * recommitted by userspace to be smaller than the
+			 * initial commit */
+			minimum_extra = initial_commit - reg_current_size;
+			remainder = 0;
+		} else {
+			/* same as calculating (fault_rel_pfn - initial_commit + 1) */
+			size_t pages_after_initial = minimum_extra + reg_current_size - initial_commit;
+
+			remainder = pages_after_initial % multiple;
+		}
+	} else {
+		/* multiple is based from the current backed size, even if the
+		 * current backed size/pfn for end of committed memory are not
+		 * themselves aligned to multiple */
+		remainder = minimum_extra % multiple;
+	}
+
+	if (remainder == 0)
+		return minimum_extra;
+
+	return minimum_extra + multiple - remainder;
+}
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+static void kbase_gpu_mmu_handle_write_faulting_as(
+				struct kbase_device *kbdev,
+				struct kbase_as *faulting_as,
+				u64 start_pfn, size_t nr, u32 op)
+{
+	mutex_lock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+			KBASE_MMU_FAULT_TYPE_PAGE);
+	kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn,
+			nr, op, 1);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_hw_enable_fault(kbdev, faulting_as,
+			KBASE_MMU_FAULT_TYPE_PAGE);
+}
+
+static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
+			struct kbase_as *faulting_as)
+{
+	struct kbasep_gwt_list_element *pos;
+	struct kbase_va_region *region;
+	struct kbase_device *kbdev;
+	struct kbase_fault *fault;
+	u64 fault_pfn, pfn_offset;
+	u32 op;
+	int ret;
+	int as_no;
+
+	as_no = faulting_as->number;
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+	fault = &faulting_as->pf_data;
+	fault_pfn = fault->addr >> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Find region and check if it should be writable. */
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			fault->addr);
+	if (kbase_is_region_invalid_or_free(region)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU",
+				&faulting_as->pf_data);
+		return;
+	}
+
+	if (!(region->flags & KBASE_REG_GPU_WR)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Region does not have write permissions",
+				&faulting_as->pf_data);
+		return;
+	}
+
+	/* Capture addresses of faulting write location
+	 * for job dumping if write tracking is enabled.
+	 */
+	if (kctx->gwt_enabled) {
+		u64 page_addr = fault->addr & PAGE_MASK;
+		bool found = false;
+		/* Check if this write was already handled. */
+		list_for_each_entry(pos, &kctx->gwt_current_list, link) {
+			if (page_addr == pos->page_addr) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found) {
+			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+			if (pos) {
+				pos->region = region;
+				pos->page_addr = page_addr;
+				pos->num_pages = 1;
+				list_add(&pos->link, &kctx->gwt_current_list);
+			} else {
+				dev_warn(kbdev->dev, "kmalloc failure");
+			}
+		}
+	}
+
+	pfn_offset = fault_pfn - region->start_pfn;
+	/* Now make this faulting page writable to GPU. */
+	ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				1, region->flags, region->gpu_alloc->group_id);
+
+	/* flush L2 and unlock the VA (resumes the MMU) */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+		op = AS_COMMAND_FLUSH;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as,
+			fault_pfn, 1, op);
+
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
+			struct kbase_as	*faulting_as)
+{
+	struct kbase_fault *fault = &faulting_as->pf_data;
+
+	switch (fault->status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		kbase_gpu_mmu_handle_write_fault(kctx, faulting_as);
+		break;
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Execute Permission fault", fault);
+		break;
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Read Permission fault", fault);
+		break;
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown Permission fault", fault);
+		break;
+	}
+}
+#endif
+
+#define MAX_POOL_LEVEL 2
+
+/**
+ * page_fault_try_alloc - Try to allocate memory from a context pool
+ * @kctx:          Context pointer
+ * @region:        Region to grow
+ * @new_pages:     Number of 4 kB pages to allocate
+ * @pages_to_grow: Pointer to variable to store number of outstanding pages on
+ *                 failure. This can be either 4 kB or 2 MB pages, depending on
+ *                 the number of pages requested.
+ * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true
+ *                 for 2 MB, false for 4 kB.
+ * @prealloc_sas:  Pointer to kbase_sub_alloc structures
+ *
+ * This function will try to allocate as many pages as possible from the context
+ * pool, then if required will try to allocate the remaining pages from the
+ * device pool.
+ *
+ * This function will not allocate any new memory beyond that that is already
+ * present in the context or device pools. This is because it is intended to be
+ * called with the vm_lock held, which could cause recursive locking if the
+ * allocation caused the out-of-memory killer to run.
+ *
+ * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be
+ * a count of 2 MB pages, otherwise it will be a count of 4 kB pages.
+ *
+ * Return: true if successful, false on failure
+ */
+static bool page_fault_try_alloc(struct kbase_context *kctx,
+		struct kbase_va_region *region, size_t new_pages,
+		int *pages_to_grow, bool *grow_2mb_pool,
+		struct kbase_sub_alloc **prealloc_sas)
+{
+	struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL};
+	struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL};
+	size_t pages_alloced[MAX_POOL_LEVEL] = {0};
+	struct kbase_mem_pool *pool, *root_pool;
+	int pool_level = 0;
+	bool alloc_failed = false;
+	size_t pages_still_required;
+
+	if (WARN_ON(region->gpu_alloc->group_id >=
+		MEMORY_GROUP_MANAGER_NR_GROUPS)) {
+		/* Do not try to grow the memory pool */
+		*pages_to_grow = 0;
+		return false;
+	}
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (new_pages >= (SZ_2M / SZ_4K)) {
+		root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
+		*grow_2mb_pool = true;
+	} else {
+#endif
+		root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
+		*grow_2mb_pool = false;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	if (region->gpu_alloc != region->cpu_alloc)
+		new_pages *= 2;
+
+	pages_still_required = new_pages;
+
+	/* Determine how many pages are in the pools before trying to allocate.
+	 * Don't attempt to allocate & free if the allocation can't succeed.
+	 */
+	for (pool = root_pool; pool != NULL; pool = pool->next_pool) {
+		size_t pool_size_4k;
+
+		kbase_mem_pool_lock(pool);
+
+		pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
+		if (pool_size_4k >= pages_still_required)
+			pages_still_required = 0;
+		else
+			pages_still_required -= pool_size_4k;
+
+		kbase_mem_pool_unlock(pool);
+
+		if (!pages_still_required)
+			break;
+	}
+
+	if (pages_still_required) {
+		/* Insufficient pages in pools. Don't try to allocate - just
+		 * request a grow.
+		 */
+		*pages_to_grow = pages_still_required;
+
+		return false;
+	}
+
+	/* Since we've dropped the pool locks, the amount of memory in the pools
+	 * may change between the above check and the actual allocation.
+	 */
+	pool = root_pool;
+	for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) {
+		size_t pool_size_4k;
+		size_t pages_to_alloc_4k;
+		size_t pages_to_alloc_4k_per_alloc;
+
+		kbase_mem_pool_lock(pool);
+
+		/* Allocate as much as possible from this pool*/
+		pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
+		pages_to_alloc_4k = MIN(new_pages, pool_size_4k);
+		if (region->gpu_alloc == region->cpu_alloc)
+			pages_to_alloc_4k_per_alloc = pages_to_alloc_4k;
+		else
+			pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1;
+
+		pages_alloced[pool_level] = pages_to_alloc_4k;
+		if (pages_to_alloc_4k) {
+			gpu_pages[pool_level] =
+					kbase_alloc_phy_pages_helper_locked(
+						region->gpu_alloc, pool,
+						pages_to_alloc_4k_per_alloc,
+						&prealloc_sas[0]);
+
+			if (!gpu_pages[pool_level]) {
+				alloc_failed = true;
+			} else if (region->gpu_alloc != region->cpu_alloc) {
+				cpu_pages[pool_level] =
+					kbase_alloc_phy_pages_helper_locked(
+						region->cpu_alloc, pool,
+						pages_to_alloc_4k_per_alloc,
+						&prealloc_sas[1]);
+
+				if (!cpu_pages[pool_level])
+					alloc_failed = true;
+			}
+		}
+
+		kbase_mem_pool_unlock(pool);
+
+		if (alloc_failed) {
+			WARN_ON(!new_pages);
+			WARN_ON(pages_to_alloc_4k >= new_pages);
+			WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages);
+			break;
+		}
+
+		new_pages -= pages_to_alloc_4k;
+
+		if (!new_pages)
+			break;
+
+		pool = pool->next_pool;
+		if (!pool)
+			break;
+	}
+
+	if (new_pages) {
+		/* Allocation was unsuccessful */
+		int max_pool_level = pool_level;
+
+		pool = root_pool;
+
+		/* Free memory allocated so far */
+		for (pool_level = 0; pool_level <= max_pool_level;
+				pool_level++) {
+			kbase_mem_pool_lock(pool);
+
+			if (region->gpu_alloc != region->cpu_alloc) {
+				if (pages_alloced[pool_level] &&
+						cpu_pages[pool_level])
+					kbase_free_phy_pages_helper_locked(
+						region->cpu_alloc,
+						pool, cpu_pages[pool_level],
+						pages_alloced[pool_level]);
+			}
+
+			if (pages_alloced[pool_level] && gpu_pages[pool_level])
+				kbase_free_phy_pages_helper_locked(
+						region->gpu_alloc,
+						pool, gpu_pages[pool_level],
+						pages_alloced[pool_level]);
+
+			kbase_mem_pool_unlock(pool);
+
+			pool = pool->next_pool;
+		}
+
+		/*
+		 * If the allocation failed despite there being enough memory in
+		 * the pool, then just fail. Otherwise, try to grow the memory
+		 * pool.
+		 */
+		if (alloc_failed)
+			*pages_to_grow = 0;
+		else
+			*pages_to_grow = new_pages;
+
+		return false;
+	}
+
+	/* Allocation was successful. No pages to grow, return success. */
+	*pages_to_grow = 0;
+
+	return true;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	struct kbase_fault *fault;
+	int err;
+	bool grown = false;
+	int pages_to_grow;
+	bool grow_2mb_pool;
+	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
+	int i;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault = &faulting_as->pf_data;
+	fault_pfn = fault->addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	if (unlikely(fault->protected_mode)) {
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Protected mode fault", fault);
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+
+		goto fault_done;
+	}
+
+	fault_status = fault->status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+#ifdef CONFIG_MALI_CINSTR_GWT
+		/* If GWT was ever enabled then we need to handle
+		 * write fault pages even if the feature was disabled later.
+		 */
+		if (kctx->gwt_was_enabled) {
+			kbase_gpu_mmu_handle_permission_fault(kctx,
+							faulting_as);
+			goto fault_done;
+		}
+#endif
+
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure", fault);
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Translation table bus fault", fault);
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault", fault);
+		else
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Unknown fault code", fault);
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault", fault);
+		else
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Unknown fault code", fault);
+		goto fault_done;
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code", fault);
+		goto fault_done;
+	}
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Preallocate memory for the sub-allocation structs if necessary */
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+		if (!prealloc_sas[i]) {
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Failed pre-allocating memory for sub-allocations' metadata",
+					fault);
+			goto fault_done;
+		}
+	}
+#endif /* CONFIG_MALI_2MB_ALLOC */
+
+page_fault_retry:
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			fault->addr);
+	if (kbase_is_region_invalid_or_free(region)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU", fault);
+		goto fault_done;
+	}
+
+	if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"DMA-BUF is not mapped on the GPU", fault);
+		goto fault_done;
+	}
+
+	if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Bad physical memory group ID", fault);
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable", fault);
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown", fault);
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				fault->addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn);
+
+	/* cap to max vsize */
+	new_pages = min(new_pages, region->nr_pages - kbase_reg_current_backed_size(region));
+
+	if (0 == new_pages) {
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	pages_to_grow = 0;
+
+	spin_lock(&kctx->mem_partials_lock);
+	grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow,
+			&grow_2mb_pool, prealloc_sas);
+	spin_unlock(&kctx->mem_partials_lock);
+
+	if (grown) {
+		u64 pfn_offset;
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
+		/*
+		 * Note:
+		 * Issuing an MMU operation will unlock the MMU and cause the
+		 * translation to be replayed. If the page insertion fails then
+		 * rather then trying to continue the context should be killed
+		 * so the no_flush version of insert_pages is used which allows
+		 * us to unlock the MMU as we see fit.
+		 */
+		err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
+			region->start_pfn + pfn_offset,
+			&kbase_get_gpu_phy_pages(region)[pfn_offset],
+			new_pages, region->flags, region->gpu_alloc->group_id);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure", fault);
+			goto fault_done;
+		}
+		KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as,
+				fault->addr >> PAGE_SHIFT,
+				new_pages, op, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+		if (kctx->gwt_enabled) {
+			/* GWT also tracks growable regions. */
+			struct kbasep_gwt_list_element *pos;
+
+			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+			if (pos) {
+				pos->region = region;
+				pos->page_addr = (region->start_pfn +
+							pfn_offset) <<
+							 PAGE_SHIFT;
+				pos->num_pages = new_pages;
+				list_add(&pos->link,
+					&kctx->gwt_current_list);
+			} else {
+				dev_warn(kbdev->dev, "kmalloc failure");
+			}
+		}
+#endif
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		int ret = -ENOMEM;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* If the memory pool was insufficient then grow it and retry.
+		 * Otherwise fail the allocation.
+		 */
+		if (pages_to_grow > 0) {
+#ifdef CONFIG_MALI_2MB_ALLOC
+			if (grow_2mb_pool) {
+				/* Round page requirement up to nearest 2 MB */
+				struct kbase_mem_pool *const lp_mem_pool =
+					&kctx->mem_pools.large[
+					region->gpu_alloc->group_id];
+
+				pages_to_grow = (pages_to_grow +
+					((1 << lp_mem_pool->order) - 1))
+						>> lp_mem_pool->order;
+
+				ret = kbase_mem_pool_grow(lp_mem_pool,
+					pages_to_grow);
+			} else {
+#endif
+				struct kbase_mem_pool *const mem_pool =
+					&kctx->mem_pools.small[
+					region->gpu_alloc->group_id];
+
+				ret = kbase_mem_pool_grow(mem_pool,
+					pages_to_grow);
+#ifdef CONFIG_MALI_2MB_ALLOC
+			}
+#endif
+		}
+		if (ret < 0) {
+			/* failed to extend, handle as a normal PF */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page allocation failure", fault);
+		} else {
+			goto page_fault_retry;
+		}
+	}
+
+fault_done:
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
+		kfree(prealloc_sas[i]);
+
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+
+	p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]);
+	if (!p)
+		return 0;
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	/* If the MMU tables belong to a context then account the memory usage
+	 * to that context, otherwise the MMU tables are device wide and are
+	 * only accounted to the device.
+	 */
+	if (mmut->kctx) {
+		int new_page_count;
+
+		new_page_count = atomic_add_return(1,
+			&mmut->kctx->used_pages);
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kbdev,
+			mmut->kctx->id,
+			(u64)new_page_count);
+		kbase_process_page_usage_inc(mmut->kctx, 1);
+	}
+
+	atomic_add(1, &kbdev->memdev.used_pages);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p,
+		false);
+
+	return 0;
+}
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
+ * new table from the pool if needed and possible
+ */
+static int mmu_get_next_pgd(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		phys_addr_t *pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(*pgd);
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(*pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+		return -EINVAL;
+	}
+
+	target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
+		if (!target_pgd) {
+			dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
+					__func__);
+			kunmap(p);
+			return -ENOMEM;
+		}
+
+		kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	*pgd = target_pgd;
+
+	return 0;
+}
+
+/*
+ * Returns the PGD for the specified level of translation
+ */
+static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
+					struct kbase_mmu_table *mmut,
+					u64 vpfn,
+					int level,
+					phys_addr_t *out_pgd)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+	pgd = mmut->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
+		int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
+		/* Handle failure condition */
+		if (err) {
+			dev_dbg(kbdev->dev,
+				 "%s: mmu_get_next_pgd failure at level %d\n",
+				 __func__, l);
+			return err;
+		}
+	}
+
+	*out_pgd = pgd;
+
+	return 0;
+}
+
+static int mmu_get_bottom_pgd(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		u64 vpfn,
+		phys_addr_t *out_pgd)
+{
+	return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL,
+			out_pgd);
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		u64 from_vpfn, u64 to_vpfn)
+{
+	phys_addr_t pgd;
+	u64 vpfn = from_vpfn;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+	KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn);
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	mmu_mode = kbdev->mmu_mode;
+
+	while (vpfn < to_vpfn) {
+		unsigned int i;
+		unsigned int idx = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
+		unsigned int pcount = 0;
+		unsigned int left = to_vpfn - vpfn;
+		int level;
+		u64 *page;
+
+		if (count > left)
+			count = left;
+
+		/* need to check if this is a 2MB page or a 4kB */
+		pgd = mmut->pgd;
+
+		for (level = MIDGARD_MMU_TOPLEVEL;
+				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
+			idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+			page = kmap(phys_to_page(pgd));
+			if (mmu_mode->ate_is_valid(page[idx], level))
+				break; /* keep the mapping */
+			kunmap(phys_to_page(pgd));
+			pgd = mmu_mode->pte_to_phy_addr(page[idx]);
+		}
+
+		switch (level) {
+		case MIDGARD_MMU_LEVEL(2):
+			/* remap to single entry to update */
+			pcount = 1;
+			break;
+		case MIDGARD_MMU_BOTTOMLEVEL:
+			/* page count is the same as the logical count */
+			pcount = count;
+			break;
+		default:
+			dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n",
+			       __func__, level);
+			goto next;
+		}
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < pcount; i++)
+			mmu_mode->entry_invalidate(&page[idx + i]);
+
+		kbase_mmu_sync_pgd(kbdev,
+				   kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
+				   8 * pcount);
+		kunmap(phys_to_page(pgd));
+
+next:
+		vpfn += count;
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr phys, size_t nr,
+					unsigned long flags, int const group_id)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	kbdev = kctx->kbdev;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu.mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
+					vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu.mmu_lock);
+			err = kbase_mem_pool_grow(
+				&kbdev->mem_pools.small[
+					kctx->mmu.group_id],
+				MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu.mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kbdev,
+						&kctx->mmu,
+						recover_vpfn,
+						recover_vpfn + recover_count);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kbdev,
+						&kctx->mmu,
+						recover_vpfn,
+						recover_vpfn + recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			/* Fail if the current page is a valid ATE entry */
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+
+			pgd_page[ofs] = kbase_mmu_create_ate(kbdev,
+				phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id);
+		}
+
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+static inline void cleanup_empty_pte(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut, u64 *pte)
+{
+	phys_addr_t tmp_pgd;
+	struct page *tmp_p;
+
+	tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte);
+	tmp_p = phys_to_page(tmp_pgd);
+	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
+		tmp_p, false);
+
+	/* If the MMU tables belong to a context then we accounted the memory
+	 * usage to that context, so decrement here.
+	 */
+	if (mmut->kctx) {
+		kbase_process_page_usage_dec(mmut->kctx, 1);
+		atomic_sub(1, &mmut->kctx->used_pages);
+	}
+	atomic_sub(1, &kbdev->memdev.used_pages);
+}
+
+u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
+	struct tagged_addr const phy, unsigned long const flags,
+	int const level, int const group_id)
+{
+	u64 entry;
+
+	kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level);
+	return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev,
+		group_id, level, entry);
+}
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
+				    struct kbase_mmu_table *mmut,
+				    const u64 start_vpfn,
+				    struct tagged_addr *phys, size_t nr,
+				    unsigned long flags,
+				    int const group_id)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	u64 insert_vpfn = start_vpfn;
+	size_t remain = nr;
+	int err;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	/* Note that 0 is a valid start_vpfn */
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mmu_mode = kbdev->mmu_mode;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&mmut->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int vindex = insert_vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
+		struct page *p;
+		int cur_level;
+
+		if (count > remain)
+			count = remain;
+
+		if (!vindex && is_huge_head(*phys))
+			cur_level = MIDGARD_MMU_LEVEL(2);
+		else
+			cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+
+		/*
+		 * Repeatedly calling mmu_get_pgd_at_level() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn,
+						   cur_level, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&mmut->mmu_lock);
+			err = kbase_mem_pool_grow(
+				&kbdev->mem_pools.small[mmut->group_id],
+				cur_level);
+			mutex_lock(&mmut->mmu_lock);
+		} while (!err);
+
+		if (err) {
+			dev_warn(kbdev->dev,
+				 "%s: mmu_get_bottom_pgd failure\n", __func__);
+			if (insert_vpfn != start_vpfn) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kbdev,
+						mmut, start_vpfn, insert_vpfn);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "%s: kmap failure\n",
+				 __func__);
+			if (insert_vpfn != start_vpfn) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kbdev,
+						mmut, start_vpfn, insert_vpfn);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
+			int level_index = (insert_vpfn >> 9) & 0x1FF;
+			u64 *target = &pgd_page[level_index];
+
+			if (mmu_mode->pte_is_valid(*target, cur_level))
+				cleanup_empty_pte(kbdev, mmut, target);
+			*target = kbase_mmu_create_ate(kbdev, *phys, flags,
+				cur_level, group_id);
+		} else {
+			for (i = 0; i < count; i++) {
+				unsigned int ofs = vindex + i;
+				u64 *target = &pgd_page[ofs];
+
+				/* Warn if the current page is a valid ATE
+				 * entry. The page table shouldn't have anything
+				 * in the place where we are trying to put a
+				 * new entry. Modification to page table entries
+				 * should be performed with
+				 * kbase_mmu_update_pages()
+				 */
+				WARN_ON((*target & 1UL) != 0);
+
+				*target = kbase_mmu_create_ate(kbdev,
+					phys[i], flags, cur_level, group_id);
+			}
+		}
+
+		phys += count;
+		insert_vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kbdev,
+				kbase_dma_addr(p) + (vindex * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	err = 0;
+
+fail_unlock:
+	mutex_unlock(&mmut->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
+ * number 'as_nr'.
+ */
+int kbase_mmu_insert_pages(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut, u64 vpfn,
+		struct tagged_addr *phys, size_t nr,
+		unsigned long flags, int as_nr, int const group_id)
+{
+	int err;
+
+	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn,
+			phys, nr, flags, group_id);
+
+	if (mmut->kctx)
+		kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false);
+	else
+		kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, as_nr);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
+ */
+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	u32 op;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+				&kbdev->as[kctx->as_nr],
+				vpfn, nr, op, 0);
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to
+		 * recover */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * As this function could be called in interrupt context the sync
+	 * request can't block. Instead log the request and the next flush
+	 * request will pick it up.
+	 */
+	if ((!err) && sync &&
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+		atomic_set(&kctx->drain_pending, 1);
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+/* Perform a flush/invalidate on a particular address space
+ */
+static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
+		struct kbase_as *as,
+		u64 vpfn, size_t nr, bool sync, bool drain_pending)
+{
+	int err;
+	u32 op;
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* GPU is off so there's no need to perform flush/invalidate */
+		return;
+	}
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+			as, vpfn, nr, op, 0);
+
+	if (err) {
+		/* Flush failed to complete, assume the GPU has hung and
+		 * perform a reset to recover
+		 */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+	}
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * The transaction lock must be dropped before here
+	 * as kbase_wait_write_flush could take it if
+	 * the GPU was powered down (static analysis doesn't
+	 * know this can't happen).
+	 */
+	drain_pending |= (!err) && sync &&
+		kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367);
+	if (drain_pending) {
+		/* Wait for GPU to flush write buffer */
+		kbase_wait_write_flush(kbdev);
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	kbase_pm_context_idle(kbdev);
+}
+
+static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
+		u64 vpfn, size_t nr, bool sync, int as_nr)
+{
+	/* Skip if there is nothing to do */
+	if (nr) {
+		kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn,
+					nr, sync, false);
+	}
+}
+
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+	bool drain_pending = false;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (atomic_xchg(&kctx->drain_pending, 0))
+		drain_pending = true;
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	kbdev = kctx->kbdev;
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr],
+				vpfn, nr, sync, drain_pending);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+void kbase_mmu_update(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		int as_nr)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID);
+
+	kbdev->mmu_mode->update(kbdev, mmut, as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
+	 */
+	kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
+	struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr)
+{
+	phys_addr_t pgd;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err = -EFAULT;
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&mmut->mmu_lock);
+
+	mmu_mode = kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		unsigned int pcount;
+		int level;
+		u64 *page;
+
+		if (count > nr)
+			count = nr;
+
+		/* need to check if this is a 2MB or a 4kB page */
+		pgd = mmut->pgd;
+
+		for (level = MIDGARD_MMU_TOPLEVEL;
+				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
+			phys_addr_t next_pgd;
+
+			index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+			page = kmap(phys_to_page(pgd));
+			if (mmu_mode->ate_is_valid(page[index], level))
+				break; /* keep the mapping */
+			else if (!mmu_mode->pte_is_valid(page[index], level)) {
+				/* nothing here, advance */
+				switch (level) {
+				case MIDGARD_MMU_LEVEL(0):
+					count = 134217728;
+					break;
+				case MIDGARD_MMU_LEVEL(1):
+					count = 262144;
+					break;
+				case MIDGARD_MMU_LEVEL(2):
+					count = 512;
+					break;
+				case MIDGARD_MMU_LEVEL(3):
+					count = 1;
+					break;
+				}
+				if (count > nr)
+					count = nr;
+				goto next;
+			}
+			next_pgd = mmu_mode->pte_to_phy_addr(page[index]);
+			kunmap(phys_to_page(pgd));
+			pgd = next_pgd;
+		}
+
+		switch (level) {
+		case MIDGARD_MMU_LEVEL(0):
+		case MIDGARD_MMU_LEVEL(1):
+			dev_warn(kbdev->dev,
+				 "%s: No support for ATEs at level %d\n",
+				 __func__, level);
+			kunmap(phys_to_page(pgd));
+			goto out;
+		case MIDGARD_MMU_LEVEL(2):
+			/* can only teardown if count >= 512 */
+			if (count >= 512) {
+				pcount = 1;
+			} else {
+				dev_warn(kbdev->dev,
+					 "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
+					 __func__, count);
+				pcount = 0;
+			}
+			break;
+		case MIDGARD_MMU_BOTTOMLEVEL:
+			/* page count is the same as the logical count */
+			pcount = count;
+			break;
+		default:
+			dev_err(kbdev->dev,
+				"%s: found non-mapped memory, early out\n",
+				__func__);
+			vpfn += count;
+			nr -= count;
+			continue;
+		}
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < pcount; i++)
+			mmu_mode->entry_invalidate(&page[index + i]);
+
+		kbase_mmu_sync_pgd(kbdev,
+				   kbase_dma_addr(phys_to_page(pgd)) +
+				   8 * index, 8*pcount);
+
+next:
+		kunmap(phys_to_page(pgd));
+		vpfn += count;
+		nr -= count;
+	}
+	err = 0;
+out:
+	mutex_unlock(&mmut->mmu_lock);
+
+	if (mmut->kctx)
+		kbase_mmu_flush_invalidate(mmut->kctx, vpfn, requested_nr, true);
+	else
+		kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, requested_nr, true, as_nr);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU
+ *
+ * This will update page table entries that already exist on the GPU based on
+ * the new flags that are passed. It is used as a response to the changes of
+ * the memory attributes
+ *
+ * The caller is responsible for validating the memory attributes
+ *
+ * @kctx:  Kbase context
+ * @vpfn:  Virtual PFN (Page Frame Number) of the first page to update
+ * @phys:  Tagged physical addresses of the physical pages to replace the
+ *         current mappings
+ * @nr:    Number of pages to update
+ * @flags: Flags
+ * @group_id: The physical memory group in which the page was allocated.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ */
+static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr *phys, size_t nr,
+					unsigned long flags, int const group_id)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	int err;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu.mmu_lock);
+
+	kbdev = kctx->kbdev;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		do {
+			err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
+					vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu.mmu_lock);
+			err = kbase_mem_pool_grow(
+				&kbdev->mem_pools.small[
+					kctx->mmu.group_id],
+				MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu.mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kbdev->dev,
+				 "mmu_get_bottom_pgd failure\n");
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			pgd_page[index + i] = kbase_mmu_create_ate(kbdev,
+				phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
+				group_id);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	return err;
+}
+
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags, int const group_id)
+{
+	int err;
+
+	err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags,
+		group_id);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, true);
+	return err;
+}
+
+static void mmu_teardown_level(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut, phys_addr_t pgd,
+		int level, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	struct page *p;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize
+	 * kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
+				mmu_teardown_level(kbdev, mmut,
+						   target_pgd,
+						   level + 1,
+						   pgd_page_buffer +
+						   (PAGE_SIZE / sizeof(u64)));
+			}
+		}
+	}
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+
+	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
+		p, true);
+
+	atomic_sub(1, &kbdev->memdev.used_pages);
+
+	/* If MMU tables belong to a context then pages will have been accounted
+	 * against it, so we must decrement the usage counts here.
+	 */
+	if (mmut->kctx) {
+		kbase_process_page_usage_dec(mmut->kctx, 1);
+		atomic_sub(1, &mmut->kctx->used_pages);
+	}
+}
+
+int kbase_mmu_init(struct kbase_device *const kbdev,
+	struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
+	int const group_id)
+{
+	if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
+	    WARN_ON(group_id < 0))
+		return -EINVAL;
+
+	mmut->group_id = group_id;
+	mutex_init(&mmut->mmu_lock);
+	mmut->kctx = kctx;
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (mmut->mmu_teardown_pages == NULL)
+		return -ENOMEM;
+
+	mmut->pgd = 0;
+	/* We allocate pages into the kbdev memory pool, then
+	 * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
+	 * avoid allocations from the kernel happening with the lock held.
+	 */
+	while (!mmut->pgd) {
+		int err;
+
+		err = kbase_mem_pool_grow(
+			&kbdev->mem_pools.small[mmut->group_id],
+			MIDGARD_MMU_BOTTOMLEVEL);
+		if (err) {
+			kbase_mmu_term(kbdev, mmut);
+			return -ENOMEM;
+		}
+
+		mutex_lock(&mmut->mmu_lock);
+		mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
+		mutex_unlock(&mmut->mmu_lock);
+	}
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
+{
+	if (mmut->pgd) {
+		mutex_lock(&mmut->mmu_lock);
+		mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL,
+				mmut->mmu_teardown_pages);
+		mutex_unlock(&mmut->mmu_lock);
+
+		if (mmut->kctx)
+			KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
+	}
+
+	kfree(mmut->mmu_teardown_pages);
+	mutex_destroy(&mmut->mmu_lock);
+}
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_device *kbdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu.mmu_lock);
+
+	kbdev = kctx->kbdev;
+	mmu_mode = kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	mutex_lock(&kctx->mmu.mmu_lock);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t dump_size, size = 0;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu,
+					&as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+			size += sizeof(config);
+		}
+
+		dump_size = kbasep_mmu_dump_level(kctx,
+				kctx->mmu.pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!dump_size)
+			goto fail_free;
+
+		size += dump_size;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+
+		if (size > (nr_pages * PAGE_SIZE)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_fault *fault;
+	bool reset_status = false;
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+	fault = &faulting_as->bf_data;
+
+	/* Ensure that any pending page fault worker has completed */
+	flush_work(&faulting_as->work_pagefault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	if (unlikely(fault->protected_mode)) {
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure", fault);
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+		atomic_dec(&kbdev->faults_pending);
+		return;
+
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		unsigned long flags;
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Set the MMU into unmapped mode */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_disable(kctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "PERMISSION_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+		e = "ACCESS_FLAG";
+		break;
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "ACCESS_FLAG";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "ADDRESS_SIZE_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "MEMORY_ATTRIBUTES_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			return "ATOMIC";
+		else
+			return "UNKNOWN";
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str,
+		struct kbase_fault *fault)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+	bool reset_status = false;
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/* decode the fault status */
+	exception_type = fault->status & 0xFF;
+	access_type = (fault->status >> 8) & 0x3;
+	source_id = (fault->status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status: 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, fault->addr,
+		reason_str,
+		fault->status,
+		(fault->status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, fault->status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		if ((fault->addr >= kbdev->hwcnt.addr) &&
+				(fault->addr < (kbdev->hwcnt.addr +
+					kbdev->hwcnt.addr_bytes)))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_mmu_disable(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold hwaccess_lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the hwaccess_lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as,
+		struct kbase_fault *fault)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n",
+				kbase_as_has_bus_fault(as) ?
+						"Bus error" : "Page fault",
+				as->number, fault->addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			dev_warn(kbdev->dev,
+					"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+					as->number, fault->addr,
+					fault->extra_addr);
+		else
+			dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+					as->number, fault->addr);
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		WARN_ON(!queue_work(as->pf_wq, &as->work_busfault));
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault));
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100755
index 0000000..f49a1d4
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _KBASE_MMU_HW_H_
+#define _KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _KBASE_MMU_HW_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
new file mode 100755
index 0000000..7b9cc0c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -0,0 +1,223 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014, 2016-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+#include "mali_kbase_defs.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+/* For valid ATEs bit 1 = ((level == 3) ? 1 : 0).
+ * Valid ATE entries at level 3 are flagged with the value 3.
+ * Valid ATE entries at level 0-2 are flagged with the value 1.
+ */
+#define ENTRY_IS_ATE_L3		3ULL
+#define ENTRY_IS_ATE_L02	1ULL
+#define ENTRY_IS_INVAL		2ULL
+#define ENTRY_IS_PTE		3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_ACCESS_RW (1ULL << 6)     /* bits 6:7 */
+#define ENTRY_ACCESS_RO (3ULL << 6)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
+	WRITE_ONCE(*pte, phy);
+#else
+#ifdef CONFIG_64BIT
+	barrier();
+	*pte = phy;
+	barrier();
+#elif defined(CONFIG_ARM)
+	barrier();
+	asm volatile("ldrd r0, [%1]\n\t"
+		     "strd r0, %0\n\t"
+		     : "=m" (*pte)
+		     : "r" (&phy)
+		     : "r0", "r1");
+	barrier();
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_mmu_table *mmut,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register.
+	 */
+	setup->memattr =
+		(AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
+			(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
+			(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_WRITE_ALLOC           <<
+			(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF   <<
+			(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_WA         <<
+			(AS_MEMATTR_INDEX_OUTER_WA * 8)) |
+		(AS_MEMATTR_AARCH64_NON_CACHEABLE    <<
+			(AS_MEMATTR_INDEX_NON_CACHEABLE * 8));
+
+	setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK;
+	setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
+}
+
+static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		int as_nr)
+{
+	struct kbase_as *as;
+	struct kbase_mmu_setup *current_setup;
+
+	if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID))
+		return;
+
+	as = &kbdev->as[as_nr];
+	current_setup = &as->current_setup;
+
+	mmu_get_as_setup(mmut, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = 0ULL;
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate, int const level)
+{
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3);
+	else
+		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02);
+}
+
+static int pte_is_valid(u64 pte, int const level)
+{
+	/* PTEs cannot exist at the bottom level */
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		return false;
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* Set access flags - note that AArch64 stage 1 does not support
+	 * write-only access, so we use read/write instead
+	 */
+	if (flags & KBASE_REG_GPU_WR)
+		mmu_flags |= ENTRY_ACCESS_RW;
+	else if (flags & KBASE_REG_GPU_RD)
+		mmu_flags |= ENTRY_ACCESS_RO;
+
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry,
+		struct tagged_addr phy,
+		unsigned long flags,
+		int const level)
+{
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		page_table_entry_set(entry, as_phys_addr_t(phy) |
+				get_mmu_flags(flags) |
+				ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3);
+	else
+		page_table_entry_set(entry, as_phys_addr_t(phy) |
+				get_mmu_flags(flags) |
+				ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & PAGE_MASK) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const aarch64_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate,
+	.flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
+{
+	return &aarch64_mode;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100755
index 0000000..7ec90cf
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,214 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+#include "mali_kbase_defs.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
+	WRITE_ONCE(*pte, phy);
+#else
+#ifdef CONFIG_64BIT
+	barrier();
+	*pte = phy;
+	barrier();
+#elif defined(CONFIG_ARM)
+	barrier();
+	asm volatile("ldrd r0, [%1]\n\t"
+		     "strd r0, %0\n\t"
+		     : "=m" (*pte)
+		     : "r" (&phy)
+		     : "r0", "r1");
+	barrier();
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_mmu_table *mmut,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)mmut->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+	setup->transcfg = 0;
+}
+
+static void mmu_update(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		int as_nr)
+{
+	struct kbase_as *as;
+	struct kbase_mmu_setup *current_setup;
+
+	if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID))
+		return;
+
+	as = &kbdev->as[as_nr];
+	current_setup = &as->current_setup;
+
+	mmu_get_as_setup(mmut, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate, int const level)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte, int const level)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+	unsigned long memattr_idx;
+
+	memattr_idx = KBASE_REG_MEMATTR_VALUE(flags);
+	if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE,
+			"Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n"))
+		memattr_idx = AS_MEMATTR_INDEX_DEFAULT;
+	/* store mem_attr index as 4:2, noting that:
+	 * - macro called above ensures 3 bits already
+	 * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits
+	 */
+	mmu_flags = memattr_idx << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry,
+		struct tagged_addr phy,
+		unsigned long flags,
+		int const level)
+{
+	page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) |
+			     ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate,
+	.flags = 0
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c
new file mode 100644
index 0000000..022c056
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c
@@ -0,0 +1,179 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/memory_group_manager.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_native_mgm.h>
+
+#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE)
+static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long pfn)
+{
+	int err = vm_insert_pfn(vma, addr, pfn);
+
+	if (unlikely(err == -ENOMEM))
+		return VM_FAULT_OOM;
+	if (unlikely(err < 0 && err != -EBUSY))
+		return VM_FAULT_SIGBUS;
+
+	return VM_FAULT_NOPAGE;
+}
+#endif
+
+#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE)
+static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long pfn, pgprot_t pgprot)
+{
+	if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot))
+		return VM_FAULT_SIGBUS;
+
+	return vmf_insert_pfn(vma, addr, pfn);
+}
+#endif
+
+/**
+ * kbase_native_mgm_alloc - Native physical memory allocation method
+ *
+ * @mgm_dev:  The memory group manager the request is being made through.
+ * @group_id: A physical memory group ID, which must be valid but is not used.
+ *            Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+ * @gfp_mask: Bitmask of Get Free Page flags affecting allocator behavior.
+ * @order:    Page order for physical page size (order=0 means 4 KiB,
+ *            order=9 means 2 MiB).
+ *
+ * Delegates all memory allocation requests to the kernel's alloc_pages
+ * function.
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ */
+static struct page *kbase_native_mgm_alloc(
+	struct memory_group_manager_device *mgm_dev, int group_id,
+	gfp_t gfp_mask, unsigned int order)
+{
+	/*
+	 * Check that the base and the mgm defines, from separate header files,
+	 * for the max number of memory groups are compatible.
+	 */
+	BUILD_BUG_ON(BASE_MEM_GROUP_COUNT != MEMORY_GROUP_MANAGER_NR_GROUPS);
+	/*
+	 * Check that the mask used for storing the memory group ID is big
+	 * enough for the largest possible memory group ID.
+	 */
+	BUILD_BUG_ON((BASEP_CONTEXT_MMU_GROUP_ID_MASK
+				>> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+			< (BASE_MEM_GROUP_COUNT - 1));
+
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(group_id);
+
+	return alloc_pages(gfp_mask, order);
+}
+
+/**
+ * kbase_native_mgm_free - Native physical memory freeing method
+ *
+ * @mgm_dev:  The memory group manager the request is being made through.
+ * @group_id: A physical memory group ID, which must be valid but is not used.
+ *            Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+ * @page:     Address of the struct associated with a page of physical
+ *            memory that was allocated by calling kbase_native_mgm_alloc
+ *            with the same argument values.
+ * @order:    Page order for physical page size (order=0 means 4 KiB,
+ *            order=9 means 2 MiB).
+ *
+ * Delegates all memory freeing requests to the kernel's __free_pages function.
+ */
+static void kbase_native_mgm_free(struct memory_group_manager_device *mgm_dev,
+	int group_id, struct page *page, unsigned int order)
+{
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(group_id);
+
+	__free_pages(page, order);
+}
+
+/**
+ * kbase_native_mgm_vmf_insert_pfn_prot - Native method to map a page on the CPU
+ *
+ * @mgm_dev:  The memory group manager the request is being made through.
+ * @group_id: A physical memory group ID, which must be valid but is not used.
+ *            Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+ * @vma:      The virtual memory area to insert the page into.
+ * @addr:     An address contained in @vma to assign to the inserted page.
+ * @pfn:      The kernel Page Frame Number to insert at @addr in @vma.
+ * @pgprot:   Protection flags for the inserted page.
+ *
+ * Called from a CPU virtual memory page fault handler. Delegates all memory
+ * mapping requests to the kernel's vmf_insert_pfn_prot function.
+ *
+ * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page table
+ *         entry was successfully installed.
+ */
+static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot(
+		struct memory_group_manager_device *mgm_dev, int group_id,
+		struct vm_area_struct *vma, unsigned long addr,
+		unsigned long pfn, pgprot_t pgprot)
+{
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(group_id);
+
+	return vmf_insert_pfn_prot(vma, addr, pfn, pgprot);
+}
+
+/**
+ * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table
+ *                                   entry
+ *
+ * @mgm_dev:   The memory group manager the request is being made through.
+ * @group_id:  A physical memory group ID, which must be valid but is not used.
+ *             Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+ * @mmu_level: The level of the MMU page table where the page is getting mapped.
+ * @pte:       The prepared page table entry.
+ *
+ * This function simply returns the @pte without modification.
+ *
+ * Return: A GPU page table entry to be stored in a page table.
+ */
+static u64
+kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev,
+			      int group_id, int mmu_level, u64 pte)
+{
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(group_id);
+	CSTD_UNUSED(mmu_level);
+
+	return pte;
+}
+
+struct memory_group_manager_device kbase_native_mgm_dev = {
+	.ops = {
+		.mgm_alloc_page = kbase_native_mgm_alloc,
+		.mgm_free_page = kbase_native_mgm_free,
+		.mgm_get_import_memory_id = NULL,
+		.mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot,
+		.mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte,
+	},
+	.data = NULL
+};
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h
new file mode 100644
index 0000000..431b1f4
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_NATIVE_MGM_H_
+#define _KBASE_NATIVE_MGM_H_
+
+#include <linux/memory_group_manager.h>
+
+/**
+ * kbase_native_mgm_dev - Native memory group manager device
+ *
+ * An implementation of the memory group manager interface that is intended for
+ * internal use when no platform-specific memory group manager is available.
+ *
+ * It ignores the specified group ID and delegates to the kernel's physical
+ * memory allocation and freeing functions.
+ */
+extern struct memory_group_manager_device kbase_native_mgm_dev;
+
+#endif /* _KBASE_NATIVE_MGM_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100755
index 0000000..fbb090e
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_register);
+
+void kbase_platform_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_unregister);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100755
index 0000000..5699eb8
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,196 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hwcnt_context.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	if (c == 1) {
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting.
+		 */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Suspend vinstr. This blocks until the vinstr worker and timer are
+	 * no longer running.
+	 */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	/* Disable GPU hardware counters.
+	 * This call will block until counters are disabled.
+	 */
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+
+	/* Re-enable GPU hardware counters */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Resume vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100755
index 0000000..59a0314
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,180 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline
+ * function
+ */
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
new file mode 100755
index 0000000..53d9427
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
@@ -0,0 +1,136 @@
+/*
+ *
+ * (C) COPYRIGHT 2016, 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase.h"
+
+#include "mali_kbase_regs_history_debugfs.h"
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+#include <linux/debugfs.h>
+
+
+static int regs_history_size_get(void *data, u64 *val)
+{
+	struct kbase_io_history *const h = data;
+
+	*val = h->size;
+
+	return 0;
+}
+
+static int regs_history_size_set(void *data, u64 val)
+{
+	struct kbase_io_history *const h = data;
+
+	return kbase_io_history_resize(h, (u16)val);
+}
+
+
+DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
+		regs_history_size_get,
+		regs_history_size_set,
+		"%llu\n");
+
+
+/**
+ * regs_history_show - show callback for the register access history file.
+ *
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to dump all recent accesses to the GPU registers.
+ *
+ * @return 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int regs_history_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_io_history *const h = sfile->private;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!h->enabled) {
+		seq_puts(sfile, "The register access history is disabled\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	iters = (h->size > h->count) ? h->count : h->size;
+	seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+out:
+	return 0;
+}
+
+
+/**
+ * regs_history_open - open operation for regs_history debugfs file
+ *
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * @return file descriptor
+ */
+static int regs_history_open(struct inode *in, struct file *file)
+{
+	return single_open(file, &regs_history_show, in->i_private);
+}
+
+
+static const struct file_operations regs_history_fops = {
+	.owner = THIS_MODULE,
+	.open = &regs_history_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history.enabled);
+	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history, &regs_history_size_fops);
+	debugfs_create_file("regs_history", S_IRUGO,
+			kbdev->mali_debugfs_directory, &kbdev->io_history,
+			&regs_history_fops);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
new file mode 100755
index 0000000..a0078cb
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Header file for register access history support via debugfs
+ *
+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
+ *
+ * Usage:
+ * - regs_history_enabled: whether recording of register accesses is enabled.
+ *   Write 'y' to enable, 'n' to disable.
+ * - regs_history_size: size of the register history buffer, must be > 0
+ * - regs_history: return the information about last accesses to the registers.
+ */
+
+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H
+#define _KBASE_REGS_HISTORY_DEBUGFS_H
+
+struct kbase_device;
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/**
+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history
+ *
+ * @kbdev: Pointer to kbase_device containing the register history
+ */
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_regs_history_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif  /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h
new file mode 100644
index 0000000..df72eec
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h
@@ -0,0 +1,139 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_RESET_GPU_H_
+#define _KBASE_RESET_GPU_H_
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * Caller is expected to hold the kbdev->hwaccess_lock.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *           not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *           not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu (only on Job Manager GPUs).
+ *
+ * After this function is called the caller should call kbase_reset_gpu_wait()
+ * to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu_locked if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu (only on Job Manager GPUs).
+ * Caller is expected to hold the kbdev->hwaccess_lock.
+ *
+ * After this function is called, the caller should call kbase_reset_gpu_wait()
+ * to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs (applicable to Job Manager GPUs)
+ * and don't emit messages into the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ *
+ * Return: 0 if the reset was started successfully
+ *         -EAGAIN if another reset is currently in progress
+ */
+int kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_is_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset (or if the reset of
+ * GPU failed, not applicable to Job Manager GPUs).
+ */
+bool kbase_reset_gpu_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_wait - Wait for a GPU reset to complete
+ * @kbdev: Device pointer
+ *
+ * This function may wait indefinitely.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+int kbase_reset_gpu_wait(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_init - Initialize the GPU reset handling mechanism.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+int kbase_reset_gpu_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_term - Terminate the GPU reset handling mechanism.
+ *
+ * @kbdev: Device pointer
+ */
+void kbase_reset_gpu_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_register_complete_cb - Register the callback function to be
+ *                                        invoked on completion of GPU reset.
+ *
+ * @kbdev: Device pointer
+ * @complete_callback: Pointer to the callback function
+ */
+void kbase_reset_gpu_register_complete_cb(struct kbase_device *kbdev,
+			int (*complete_callback)(struct kbase_device *kbdev));
+
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100755
index 0000000..3470f58
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,91 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+/* __asmeq is not available on Kernel versions >= 4.20 */
+#ifndef __asmeq
+/*
+ * This is used to ensure the compiler did actually allocate the register we
+ * asked it for some inline assembly sequences.  Apparently we can't trust the
+ * compiler from one version to another so a bit of paranoia won't hurt.  This
+ * string is meant to be concatenated with the inline asm string and will
+ * cause compilation to stop on mismatch.  (for details, see gcc PR 15089)
+ */
+#define __asmeq(x, y)  ".ifnc " x "," y " ; .err ; .endif\n\t"
+#endif
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100755
index 0000000..221eb21
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100755
index 0000000..868e1ea
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1718 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif
+#include <linux/dma-mapping.h>
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tracepoints.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/cache.h>
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->queue, &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_del(&katom->queue);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed.
+	 */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule timeout of this atom after a period if it is not active */
+	if (!timer_pending(&kctx->soft_job_timeout)) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_job_timeout_ms);
+		mod_timer(&kctx->soft_job_timeout,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}
+}
+
+static int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*status = *mapped_evt;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	if ((new_status != BASE_JD_SOFT_EVENT_SET) &&
+	    (new_status != BASE_JD_SOFT_EVENT_RESET))
+		return -EINVAL;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*mapped_evt = new_status;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_vmap_struct map;
+	void *user_result;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] was previously in use
+		 * for 'waiting_soft_jobs'.
+		 */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* GPU_WR access is checked on the range for returning the result to
+	 * userspace for the following reasons:
+	 * - security, this is currently how imported user bufs are checked.
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
+	if (!user_result)
+		return 0;
+
+	memcpy(user_result, &data, sizeof(data));
+
+	kbase_vunmap(kctx, &map);
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+/* Called by the explicit fence mechanism when a fence wait has completed */
+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+#endif
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, queue);
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			if (katom->jc == evt) {
+				list_del(&katom->queue);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet.
+				 */
+				cancel_timer = 0;
+			}
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			/* Keep the timer running if fence debug is enabled and
+			 * there are waiting fence jobs.
+			 */
+			cancel_timer = 0;
+			break;
+#endif
+		}
+	}
+
+	if (cancel_timer)
+		del_timer(&kctx->soft_job_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep;
+
+		list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) {
+			if (dep->status == KBASE_JD_ATOM_STATE_UNUSED ||
+			    dep->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				continue;
+
+			if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					== BASE_JD_REQ_SOFT_FENCE_TRIGGER) {
+				/* Found blocked trigger fence. */
+				struct kbase_sync_fence_info info;
+
+				if (!kbase_sync_fence_in_info_get(dep, &info)) {
+					dev_warn(dev,
+						 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+						 kbase_jd_atom_id(kctx, dep),
+						 info.fence,
+						 info.name,
+						 kbase_sync_status_string(info.status));
+				 }
+			}
+
+			kbase_fence_debug_check_atom(dep);
+		}
+	}
+}
+
+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = katom->kctx->kbdev->dev;
+	int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms);
+	unsigned long lflags;
+	struct kbase_sync_fence_info info;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+
+	if (kbase_sync_fence_in_info_get(katom, &info)) {
+		/* Fence must have signaled just after timeout. */
+		spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+		return;
+	}
+
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+		 kctx->tgid, kctx->id,
+		 kbase_jd_atom_id(kctx, katom),
+		 info.fence, timeout_ms);
+	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+		 info.fence, info.name,
+		 kbase_sync_status_string(info.status));
+
+	/* Search for blocked trigger atoms */
+	kbase_fence_debug_check_atom(katom);
+
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	kbase_sync_fence_in_dump(katom);
+}
+
+struct kbase_fence_debug_work {
+	struct kbase_jd_atom *katom;
+	struct work_struct work;
+};
+
+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work)
+{
+	struct kbase_fence_debug_work *w = container_of(work,
+			struct kbase_fence_debug_work, work);
+	struct kbase_jd_atom *katom = w->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_fence_debug_wait_timeout(katom);
+	mutex_unlock(&kctx->jctx.lock);
+
+	kfree(w);
+}
+
+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_debug_work *work;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Enqueue fence debug worker. Use job_done_wq to get
+	 * debug print ordered with job completion.
+	 */
+	work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC);
+	/* Ignore allocation failure. */
+	if (work) {
+		work->katom = katom;
+		INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker);
+		queue_work(kctx->jctx.job_done_wq, &work->work);
+	}
+}
+#endif /* CONFIG_MALI_FENCE_DEBUG */
+
+void kbasep_soft_job_timeout_worker(struct timer_list *timer)
+{
+	struct kbase_context *kctx = container_of(timer, struct kbase_context,
+			soft_job_timeout);
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_job_timeout_ms);
+	ktime_t cur_time = ktime_get();
+	bool restarting = false;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(entry,
+				struct kbase_jd_atom, queue);
+		s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time,
+					katom->start_timestamp));
+
+		if (elapsed_time < (s64)timeout_ms) {
+			restarting = true;
+			continue;
+		}
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			/* Take it out of the list to ensure that it
+			 * will be cancelled in all cases
+			 */
+			list_del(&katom->queue);
+
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			INIT_WORK(&katom->work, kbasep_soft_event_complete_job);
+			queue_work(kctx->jctx.job_done_wq, &katom->work);
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			kbase_fence_debug_timeout(katom);
+			break;
+#endif
+		}
+	}
+
+	if (restarting)
+		mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	kbasep_add_waiting_with_timeout(katom);
+
+	return 1;
+}
+
+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+/**
+ * kbase_soft_event_update() - Update soft event state
+ * @kctx: Pointer to context
+ * @event: Event to update
+ * @new_status: New status value of event
+ *
+ * Update the event, and wake up any atoms waiting for the event.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+int kbase_soft_event_update(struct kbase_context *kctx,
+			     u64 event,
+			     unsigned char new_status)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->jctx.lock);
+
+	if (kbasep_write_soft_event_status(kctx, event, new_status)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, event);
+
+out:
+	mutex_unlock(&kctx->jctx.lock);
+
+	return err;
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc;
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		if (buffers[i].is_vmalloc)
+			vfree(buffers[i].pages);
+		else
+			kfree(buffers[i].pages);
+		if (gpu_alloc) {
+			switch (gpu_alloc->type) {
+			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+			{
+				kbase_free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->softjob_data = NULL;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+	katom->softjob_data = buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret) {
+		ret = -EFAULT;
+		goto out_cleanup;
+	}
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		if (last_page_addr < page_addr) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		if (nr_pages > (KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD /
+				sizeof(struct page *))) {
+			buffers[i].is_vmalloc = true;
+			buffers[i].pages = vzalloc(nr_pages *
+					sizeof(struct page *));
+		} else {
+			buffers[i].is_vmalloc = false;
+			buffers[i].pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+		}
+
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			/* get_user_pages_fast has failed - page array is not
+			 * valid. Don't try to release any pages.
+			 */
+			buffers[i].nr_pages = 0;
+
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			/* Adjust number of pages, so that we only attempt to
+			 * release pages in the array that we know are valid.
+			 */
+			buffers[i].nr_pages = pinned_pages;
+
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, user_extres.ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		if (kbase_is_region_invalid_or_free(reg) ||
+		    reg->gpu_alloc == NULL) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		buffers[i].nr_extres_pages = reg->nr_pages;
+
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages) {
+				/* Adjust number of pages, so that we only
+				 * attempt to release pages in the array that we
+				 * know are valid.
+				 */
+				if (ret < 0)
+					buffers[i].nr_extres_pages = 0;
+				else
+					buffers[i].nr_extres_pages = ret;
+
+				goto out_unlock;
+			}
+			ret = 0;
+			break;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	kfree(user_buffers);
+
+	return ret;
+}
+
+void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
+	int ret = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	size_t dma_to_copy;
+#endif
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	if (!gpu_alloc) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+		if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE)
+			dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch");
+
+		dma_to_copy = min(dma_buf->size,
+			(size_t)(buf_data->nr_extres_pages * PAGE_SIZE));
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, dma_to_copy,
+#endif
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < dma_to_copy/PAGE_SIZE; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, dma_to_copy,
+#endif
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
+	unsigned int i;
+
+	if (WARN_ON(!buffers))
+		return -EINVAL;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7)
+
+int kbasep_jit_alloc_validate(struct kbase_context *kctx,
+					struct base_jit_alloc_info *info)
+{
+	/* If the ID is zero, then fail the job */
+	if (info->id == 0)
+		return -EINVAL;
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages)
+		return -EINVAL;
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0)
+		return -EINVAL;
+
+	if (kctx->jit_version == 1) {
+		/* Old JIT didn't have usage_id, max_allocations, bin_id
+		 * or padding, so force them to zero
+		 */
+		info->usage_id = 0;
+		info->max_allocations = 0;
+		info->bin_id = 0;
+		info->flags = 0;
+		memset(info->padding, 0, sizeof(info->padding));
+	} else {
+		int j;
+
+		/* Check padding is all zeroed */
+		for (j = 0; j < sizeof(info->padding); j++) {
+			if (info->padding[j] != 0) {
+				return -EINVAL;
+			}
+		}
+
+		/* No bit other than TILER_ALIGN_TOP shall be set */
+		if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	u32 count;
+	int ret;
+	u32 i;
+
+	/* For backwards compatibility */
+	if (katom->nr_extres == 0)
+		katom->nr_extres = 1;
+	count = katom->nr_extres;
+
+	/* Sanity checks */
+	if (!data || count > kctx->jit_max_allocations ||
+			count > ARRAY_SIZE(kctx->jit_alloc)) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kmalloc_array(count, sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+	if (copy_from_user(info, data, sizeof(*info)*count) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+	katom->softjob_data = info;
+
+	for (i = 0; i < count; i++, info++) {
+		ret = kbasep_jit_alloc_validate(kctx, info);
+		if (ret)
+			goto free_info;
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(kbdev, katom,
+			info->va_pages, info->commit_pages, info->extent,
+			info->id, info->bin_id, info->max_allocations,
+			info->flags, info->usage_id);
+	}
+
+	katom->jit_blocked = false;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(katom->softjob_data);
+	katom->softjob_data = NULL;
+fail:
+	return ret;
+}
+
+static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom)
+{
+	if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) !=
+				BASE_JD_REQ_SOFT_JIT_FREE))
+		return NULL;
+
+	return (u8 *) katom->softjob_data;
+}
+
+static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct list_head *target_list_head = NULL;
+	struct kbase_jd_atom *entry;
+
+	list_for_each_entry(entry, &kctx->jit_pending_alloc, queue) {
+		if (katom->age < entry->age) {
+			target_list_head = &entry->queue;
+			break;
+		}
+	}
+
+	if (target_list_head == NULL)
+		target_list_head = &kctx->jit_pending_alloc;
+
+	list_add_tail(&katom->queue, target_list_head);
+}
+
+static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr, new_addr;
+	u32 count = katom->nr_extres;
+	u32 i;
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = katom->softjob_data;
+	if (WARN_ON(!info)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return 0;
+	}
+
+	for (i = 0; i < count; i++, info++) {
+		/* The JIT ID is still in use so fail the allocation */
+		if (kctx->jit_alloc[info->id]) {
+			katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+			return 0;
+		}
+	}
+
+	for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
+		if (kctx->jit_alloc[info->id]) {
+			/* The JIT ID is duplicated in this atom. Roll back
+			 * previous allocations and fail.
+			 */
+			u32 j;
+
+			info = katom->softjob_data;
+			for (j = 0; j < i; j++, info++) {
+				kbase_jit_free(kctx, kctx->jit_alloc[info->id]);
+				kctx->jit_alloc[info->id] =
+						(struct kbase_va_region *) -1;
+			}
+
+			katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+			return 0;
+		}
+
+		/* Create a JIT allocation */
+		reg = kbase_jit_allocate(kctx, info);
+		if (!reg) {
+			struct kbase_jd_atom *jit_atom;
+			bool can_block = false;
+
+			lockdep_assert_held(&kctx->jctx.lock);
+
+			jit_atom = list_first_entry(&kctx->jit_atoms_head,
+					struct kbase_jd_atom, jit_node);
+
+			list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) {
+				if (jit_atom == katom)
+					break;
+
+				if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						BASE_JD_REQ_SOFT_JIT_FREE) {
+					u8 *free_ids = kbase_jit_free_get_ids(jit_atom);
+
+					if (free_ids && *free_ids &&
+						kctx->jit_alloc[*free_ids]) {
+						/* A JIT free which is active and
+						 * submitted before this atom
+						 */
+						can_block = true;
+						break;
+					}
+				}
+			}
+
+			if (!can_block) {
+				/* Mark the failed allocation as well as the
+				 * other un-attempted allocations in the set,
+				 * so we know they are in use even if the
+				 * allocation itself failed.
+				 */
+				for (; i < count; i++, info++) {
+					kctx->jit_alloc[info->id] =
+						(struct kbase_va_region *) -1;
+				}
+
+				katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+				dev_warn_ratelimited(kbdev->dev, "JIT alloc softjob failed: atom id %d\n",
+						     kbase_jd_atom_id(kctx, katom));
+				return 0;
+			}
+
+			/* There are pending frees for an active allocation
+			 * so we should wait to see whether they free the
+			 * memory. Add to the list of atoms for which JIT
+			 * allocation is pending.
+			 */
+			kbase_jit_add_to_pending_alloc_list(katom);
+			katom->jit_blocked = true;
+
+			/* Rollback, the whole set will be re-attempted */
+			while (i-- > 0) {
+				info--;
+				kbase_jit_free(kctx, kctx->jit_alloc[info->id]);
+				kctx->jit_alloc[info->id] = NULL;
+			}
+
+			return 1;
+		}
+
+		/* Bind it to the user provided ID. */
+		kctx->jit_alloc[info->id] = reg;
+	}
+
+	for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
+		u64 entry_mmu_flags = 0;
+		/*
+		 * Write the address of the JIT allocation to the user provided
+		 * GPU allocation.
+		 */
+		ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+				&mapping);
+		if (!ptr) {
+			/*
+			 * Leave the allocations "live" as the JIT free atom
+			 * will be submitted anyway.
+			 */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			return 0;
+		}
+
+		reg = kctx->jit_alloc[info->id];
+		new_addr = reg->start_pfn << PAGE_SHIFT;
+		*ptr = new_addr;
+
+#if defined(CONFIG_MALI_VECTOR_DUMP)
+		/*
+		 * Retrieve the mmu flags for JIT allocation
+		 * only if dumping is enabled
+		 */
+		entry_mmu_flags = kbase_mmu_create_ate(kbdev,
+			(struct tagged_addr){ 0 }, reg->flags,
+			 MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id);
+#endif
+
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, katom,
+			info->gpu_alloc_addr, new_addr, info->flags,
+			entry_mmu_flags, info->id, info->commit_pages,
+			info->extent, info->va_pages);
+		kbase_vunmap(kctx, &mapping);
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	if (WARN_ON(!katom->softjob_data))
+		return;
+
+	/* Remove atom from jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = katom->softjob_data;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static int kbase_jit_free_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	u8 *ids;
+	u32 count = MAX(katom->nr_extres, 1);
+	u32 i;
+	int ret;
+
+	/* Sanity checks */
+	if (count > ARRAY_SIZE(kctx->jit_alloc)) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL);
+	if (!ids) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	katom->softjob_data = ids;
+
+	/* For backwards compatibility */
+	if (katom->nr_extres) {
+		/* Fail the job if there is no list of ids */
+		if (!data) {
+			ret = -EINVAL;
+			goto free_info;
+		}
+
+		if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) {
+			ret = -EINVAL;
+			goto free_info;
+		}
+	} else {
+		katom->nr_extres = 1;
+		*ids = (u8)katom->jc;
+	}
+	for (i = 0; i < count; i++)
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(kbdev, katom, ids[i]);
+
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	return 0;
+
+free_info:
+	kfree(katom->softjob_data);
+	katom->softjob_data = NULL;
+fail:
+	return ret;
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 *ids = kbase_jit_free_get_ids(katom);
+	u32 count = katom->nr_extres;
+	u32 i;
+
+	if (ids == NULL) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	for (i = 0; i < count; i++, ids++) {
+		/*
+		 * If the ID is zero or it is not in use yet then fail the job.
+		 */
+		if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			return;
+		}
+	}
+}
+
+static void kbasep_jit_free_finish_worker(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_finish_soft_job(katom);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
+{
+	struct list_head *i, *tmp;
+	struct kbase_context *kctx = katom->kctx;
+	LIST_HEAD(jit_pending_alloc_list);
+	u8 *ids;
+	size_t j;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	ids = kbase_jit_free_get_ids(katom);
+	if (WARN_ON(ids == NULL)) {
+		return;
+	}
+
+	/* Remove this atom from the kctx->jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	for (j = 0; j != katom->nr_extres; ++j) {
+		if ((ids[j] != 0) && (kctx->jit_alloc[ids[j]] != NULL)) {
+			/*
+			 * If the ID is valid but the allocation request failed
+			 * still succeed this soft job but don't try and free
+			 * the allocation.
+			 */
+			if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1) {
+				KBASE_TLSTREAM_TL_JIT_USEDPAGES(kctx->kbdev,
+					kctx->jit_alloc[ids[j]]->
+					gpu_alloc->nents, ids[j]);
+				kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]);
+			}
+			kctx->jit_alloc[ids[j]] = NULL;
+		}
+	}
+	/* Free the list of ids */
+	kfree(ids);
+
+	list_splice_tail_init(&kctx->jit_pending_alloc, &jit_pending_alloc_list);
+
+	list_for_each_safe(i, tmp, &jit_pending_alloc_list) {
+		struct kbase_jd_atom *pending_atom = list_entry(i,
+				struct kbase_jd_atom, queue);
+		if (kbase_jit_allocate_process(pending_atom) == 0) {
+			/* Atom has completed */
+			INIT_WORK(&pending_atom->work,
+					kbasep_jit_free_finish_worker);
+			queue_work(kctx->jctx.job_done_wq, &pending_atom->work);
+		}
+	}
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	katom->softjob_data = ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = katom->softjob_data;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (i > 0) {
+		u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+
+		--i;
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = katom->softjob_data;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	int ret = 0;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom);
+
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		ret = kbase_dump_cpu_gpu_time(katom);
+		break;
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		katom->event_code = kbase_sync_fence_out_trigger(katom,
+				katom->event_code == BASE_JD_EVENT_DONE ?
+								0 : -EFAULT);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+	{
+		ret = kbase_sync_fence_in_wait(katom);
+
+		if (ret == 1) {
+#ifdef CONFIG_MALI_FENCE_DEBUG
+			kbasep_add_waiting_with_timeout(katom);
+#else
+			kbasep_add_waiting_soft_job(katom);
+#endif
+		}
+		break;
+	}
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		ret = kbasep_soft_event_wait(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		ret = kbase_jit_allocate_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom);
+	return ret;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_sync_fence_in_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (!IS_ALIGNED(katom->jc, cache_line_size()))
+				return -EINVAL;
+		}
+		break;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_sync_fence_out_create(katom,
+							 fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				kbase_sync_fence_out_remove(katom);
+				kbase_sync_fence_close_fd(fd);
+				fence.basep.fd = -EINVAL;
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+			int ret;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			ret = kbase_sync_fence_in_from_fd(katom,
+							  fence.basep.fd);
+			if (ret < 0)
+				return ret;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			/*
+			 * Set KCTX_NO_IMPLICIT_FENCE in the context the first
+			 * time a soft fence wait job is observed. This will
+			 * prevent the implicit dma-buf fence to conflict with
+			 * the Android native sync fences.
+			 */
+			if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC))
+				kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC);
+#endif /* CONFIG_MALI_DMA_FENCE */
+		}
+		break;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		return kbase_jit_free_prepare(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signaled, do it now */
+		kbase_sync_fence_out_trigger(katom, katom->event_code ==
+				BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release katom's reference to fence object */
+		kbase_sync_fence_in_remove(katom);
+		break;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		kbasep_remove_waiting_soft_job(katom_iter);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		}
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
new file mode 100755
index 0000000..22caa4a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
@@ -0,0 +1,28 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include "mali_kbase_strings.h"
+
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
+const char kbase_drv_name[] = KBASE_DRV_NAME;
+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
new file mode 100755
index 0000000..d2f1825
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+extern const char kbase_drv_name[];
+extern const char kbase_timeline_name[];
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100755
index 0000000..785b9ff
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,222 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ * This file contains our internal "API" for explicit fences.
+ * It hides the implementation details of the actual explicit fence mechanism
+ * used (Android fences or sync file with DMA fences).
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include <linux/syscalls.h>
+#ifdef CONFIG_SYNC
+#include <sync.h>
+#endif
+#ifdef CONFIG_SYNC_FILE
+#include "mali_kbase_fence_defs.h"
+#include <linux/sync_file.h>
+#endif
+
+#include "mali_kbase.h"
+
+/**
+ * struct kbase_sync_fence_info - Information about a fence
+ * @fence: Pointer to fence (type is void*, as underlaying struct can differ)
+ * @name: The name given to this fence when it was created
+ * @status: < 0 means error, 0 means active, 1 means signaled
+ *
+ * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get()
+ * to get the information.
+ */
+struct kbase_sync_fence_info {
+	void *fence;
+	char name[32];
+	int status;
+};
+
+/**
+ * kbase_sync_fence_stream_create() - Create a stream object
+ * @name: Name of stream (only used to ease debugging/visualization)
+ * @out_fd: A file descriptor representing the created stream object
+ *
+ * Can map down to a timeline implementation in some implementations.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd);
+
+/**
+ * kbase_sync_fence_out_create Create an explicit output fence to specified atom
+ * @katom: Atom to assign the new explicit fence to
+ * @stream_fd: File descriptor for stream object to create fence on
+ *
+ * return: Valid file descriptor to fence or < 0 on error
+ */
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd);
+
+/**
+ * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom
+ * @katom: Atom to assign the existing explicit fence to
+ * @fd: File descriptor to an existing fence
+ *
+ * Assigns an explicit input fence to atom.
+ * This can later be waited for by calling @kbase_sync_fence_in_wait
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd);
+
+/**
+ * kbase_sync_fence_validate() - Validate a fd to be a valid fence
+ * @fd: File descriptor to check
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ *
+ * return 0: if fd is for a valid fence, < 0 if invalid
+ */
+int kbase_sync_fence_validate(int fd);
+
+/**
+ * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom
+ * @katom: Atom with an explicit fence to signal
+ * @result: < 0 means signal with error, 0 >= indicates success
+ *
+ * Signal output fence attached on katom and remove the fence from the atom.
+ *
+ * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE
+ */
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result);
+
+/**
+ * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled
+ * @katom: Atom with explicit fence to wait for
+ *
+ * If the fence is already signaled, then 0 is returned, and the caller must
+ * continue processing of the katom.
+ *
+ * If the fence isn't already signaled, then this kbase_sync framework will
+ * take responsibility to continue the processing once the fence is signaled.
+ *
+ * return: 0 if already signaled, otherwise 1
+ */
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits
+ * @katom: Atom to cancel wait for
+ *
+ * This function is fully responsible for continuing processing of this atom
+ * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all)
+ */
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_in_remove() - Remove the input fence from the katom
+ * @katom: Atom to remove explicit input fence for
+ *
+ * This will also release the corresponding reference.
+ */
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_out_remove() - Remove the output fence from the katom
+ * @katom: Atom to remove explicit output fence for
+ *
+ * This will also release the corresponding reference.
+ */
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence
+ * @fd: File descriptor to close
+ */
+static inline void kbase_sync_fence_close_fd(int fd)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
+	ksys_close(fd);
+#else
+	sys_close(fd);
+#endif
+}
+
+/**
+ * kbase_sync_fence_in_info_get() - Retrieves information about input fence
+ * @katom: Atom to get fence information from
+ * @info: Struct to be filled with fence information
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info);
+
+/**
+ * kbase_sync_fence_out_info_get() - Retrieves information about output fence
+ * @katom: Atom to get fence information from
+ * @info: Struct to be filled with fence information
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				  struct kbase_sync_fence_info *info);
+
+#if defined(CONFIG_SYNC_FILE)
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+void kbase_sync_fence_info_get(struct fence *fence,
+			       struct kbase_sync_fence_info *info);
+#else
+void kbase_sync_fence_info_get(struct dma_fence *fence,
+			       struct kbase_sync_fence_info *info);
+#endif
+#endif
+
+/**
+ * kbase_sync_status_string() - Get string matching @status
+ * @status: Value of fence status.
+ *
+ * return: Pointer to string describing @status.
+ */
+const char *kbase_sync_status_string(int status);
+
+/*
+ * Internal worker used to continue processing of atom.
+ */
+void kbase_sync_fence_wait_worker(struct work_struct *data);
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+/**
+ * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state
+ * @katom: Atom to trigger fence debug dump for
+ */
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom);
+#endif
+
+#endif /* MALI_KBASE_SYNC_H */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c
new file mode 100755
index 0000000..75940fb
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c
@@ -0,0 +1,542 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Code for supporting explicit Android fences (CONFIG_SYNC)
+ * Known to be good for kernels 4.5 and earlier.
+ * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels
+ * (see mali_kbase_sync_file.c)
+ */
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signaled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatibility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+static struct mali_sync_timeline *to_mali_sync_timeline(
+						struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt),
+						sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(
+							sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signaled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signaled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+static struct sync_timeline *mali_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops,
+				  sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signaled, 0);
+
+	return tl;
+}
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	if (!out_fd)
+		return -EINVAL;
+
+	tl = mali_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are
+ * allocated.
+ */
+static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent,
+					    sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+	katom->fence = sync_fence_fdget(fd);
+	if (katom->fence == NULL) {
+		/* The only way the fence can be NULL is if userspace closed it
+		 * for us, so we don't need to clear it up */
+		fd = -EINVAL;
+		goto out;
+	}
+
+out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
+{
+	katom->fence = sync_fence_fdget(fd);
+	return katom->fence ? 0 : -ENOENT;
+}
+
+int kbase_sync_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+/* Returns true if the specified timeline is allocated by Mali */
+static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are
+ * allocated.
+ *
+ * If they are signaled in the wrong order then a message will be printed in
+ * debug builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as
+ * success.
+ */
+static void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(
+							sync_pt_parent(pt));
+	int signaled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signaled = atomic_read(&mtl->signaled);
+
+		diff = signaled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signaling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signaled,
+				signaled, mpt->order) != signaled);
+}
+
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+	if (!katom->fence)
+		return BASE_JD_EVENT_JOB_CANCELLED;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly)
+		 * come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head,
+			      struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	kbase_sync_fence_out_remove(katom);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static inline int kbase_fence_get_status(struct sync_fence *fence)
+{
+	if (!fence)
+		return -ENOENT;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	return fence->status;
+#else
+	return atomic_read(&fence->status);
+#endif
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence,
+				      struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter,
+					struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+	if (kbase_fence_get_status(fence) < 0)
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the
+	 * job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding
+	 * kctx->jctx.lock and the callbacks are run synchronously from
+	 * sync_timeline_signal. So we simply defer the work.
+	 */
+
+	INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signaled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	return 1;
+}
+
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for
+		 * kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->fence) {
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+	}
+}
+
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->fence) {
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+	}
+}
+
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+	if (!katom->fence)
+		return -ENOENT;
+
+	info->fence = katom->fence;
+	info->status = kbase_fence_get_status(katom->fence);
+	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	return 0;
+}
+
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+	if (!katom->fence)
+		return -ENOENT;
+
+	info->fence = katom->fence;
+	info->status = kbase_fence_get_status(katom->fence);
+	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
+{
+	/* Dump out the full state of all the Android sync fences.
+	 * The function sync_dump() isn't exported to modules, so force
+	 * sync_fence_wait() to time out to trigger sync_dump().
+	 */
+	if (katom->fence)
+		sync_fence_wait(katom->fence, 1);
+}
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
new file mode 100755
index 0000000..03c0df5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
@@ -0,0 +1,49 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * @file mali_kbase_sync_common.c
+ *
+ * Common code for our explicit fence functionality
+ */
+
+#include <linux/workqueue.h>
+#include "mali_kbase.h"
+#include "mali_kbase_sync.h"
+
+void kbase_sync_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kbase_soft_event_wait_callback(katom);
+}
+
+const char *kbase_sync_status_string(int status)
+{
+	if (status == 0)
+		return "active";
+	else if (status > 0)
+		return "signaled";
+	else
+		return "error";
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
new file mode 100755
index 0000000..0679c48
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
@@ -0,0 +1,366 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE)
+ * Introduced in kernel 4.9.
+ * Android explicit fences (CONFIG_SYNC) can be used for older kernels
+ * (see mali_kbase_sync_android.c)
+ */
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/sync_file.h>
+#include <linux/slab.h>
+#include "mali_kbase_fence_defs.h"
+#include "mali_kbase_sync.h"
+#include "mali_kbase_fence.h"
+#include "mali_kbase.h"
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE
+};
+
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
+{
+	if (!out_fd)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, NULL,
+				   O_RDONLY | O_CLOEXEC);
+	if (*out_fd < 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct sync_file *sync_file;
+	int fd;
+
+	fence = kbase_fence_out_new(katom);
+	if (!fence)
+		return -ENOMEM;
+
+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE)
+	/* Take an extra reference to the fence on behalf of the sync_file.
+	 * This is only needed on older kernels where sync_file_create()
+	 * does not take its own reference. This was changed in v4.9.68,
+	 * where sync_file_create() now takes its own reference.
+	 */
+	dma_fence_get(fence);
+#endif
+
+	/* create a sync_file fd representing the fence */
+	sync_file = sync_file_create(fence);
+	if (!sync_file) {
+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE)
+		dma_fence_put(fence);
+#endif
+		kbase_fence_out_remove(katom);
+		return -ENOMEM;
+	}
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		fput(sync_file->file);
+		kbase_fence_out_remove(katom);
+		return fd;
+	}
+
+	fd_install(fd, sync_file->file);
+
+	return fd;
+}
+
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence = sync_file_get_fence(fd);
+#else
+	struct dma_fence *fence = sync_file_get_fence(fd);
+#endif
+
+	if (!fence)
+		return -ENOENT;
+
+	kbase_fence_fence_in_set(katom, fence);
+
+	return 0;
+}
+
+int kbase_sync_fence_validate(int fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence = sync_file_get_fence(fd);
+#else
+	struct dma_fence *fence = sync_file_get_fence(fd);
+#endif
+
+	if (!fence)
+		return -EINVAL;
+
+	dma_fence_put(fence);
+
+	return 0; /* valid */
+}
+
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
+{
+	int res;
+
+	if (!kbase_fence_out_is_ours(katom)) {
+		/* Not our fence */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	res = kbase_fence_out_signal(katom, result);
+	if (unlikely(res < 0)) {
+		dev_warn(katom->kctx->kbdev->dev,
+				"fence_signal() failed with %d\n", res);
+	}
+
+	kbase_sync_fence_out_remove(katom);
+
+	return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static void kbase_fence_wait_callback(struct fence *fence,
+				      struct fence_cb *cb)
+#else
+static void kbase_fence_wait_callback(struct dma_fence *fence,
+				      struct dma_fence_cb *cb)
+#endif
+{
+	struct kbase_fence_cb *kcb = container_of(cb,
+				struct kbase_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Cancel atom if fence is erroneous */
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \
+	 (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE))
+	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error)
+#else
+	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0)
+#endif
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	if (kbase_fence_dep_count_dec_and_test(katom)) {
+		/* We take responsibility of handling this */
+		kbase_fence_dep_count_set(katom, -1);
+
+		/* To prevent a potential deadlock we schedule the work onto the
+		 * job_done_wq workqueue
+		 *
+		 * The issue is that we may signal the timeline while holding
+		 * kctx->jctx.lock and the callbacks are run synchronously from
+		 * sync_timeline_signal. So we simply defer the work.
+		 */
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(kctx->jctx.job_done_wq, &katom->work);
+	}
+}
+
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
+{
+	int err;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_in_get(katom);
+	if (!fence)
+		return 0; /* no input fence to wait for, good to go! */
+
+	kbase_fence_dep_count_set(katom, 1);
+
+	err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback);
+
+	kbase_fence_put(fence);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (kbase_fence_dep_count_dec_and_test(katom)) {
+			kbase_fence_free_callbacks(katom);
+			kbase_fence_dep_count_set(katom, -1);
+			return 0; /* Already signaled, good to go right now */
+		}
+
+		/* Callback installed, so we just need to wait for it... */
+	} else {
+		/* Failure */
+		kbase_fence_free_callbacks(katom);
+		kbase_fence_dep_count_set(katom, -1);
+
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	return 1; /* completion to be done later by callback/worker */
+}
+
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (!kbase_fence_free_callbacks(katom)) {
+		/* The wait wasn't cancelled -
+		 * leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Take responsibility of completion */
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	kbase_fence_out_remove(katom);
+}
+
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	kbase_fence_free_callbacks(katom);
+	kbase_fence_in_remove(katom);
+}
+
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+void kbase_sync_fence_info_get(struct fence *fence,
+			       struct kbase_sync_fence_info *info)
+#else
+void kbase_sync_fence_info_get(struct dma_fence *fence,
+			       struct kbase_sync_fence_info *info)
+#endif
+{
+	info->fence = fence;
+
+	/* translate into CONFIG_SYNC status:
+	 * < 0 : error
+	 * 0 : active
+	 * 1 : signaled
+	 */
+	if (dma_fence_is_signaled(fence)) {
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \
+	 (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE))
+		int status = fence->error;
+#else
+		int status = fence->status;
+#endif
+		if (status < 0)
+			info->status = status; /* signaled with error */
+		else
+			info->status = 1; /* signaled with success */
+	} else  {
+		info->status = 0; /* still active (unsignaled) */
+	}
+
+#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
+	scnprintf(info->name, sizeof(info->name), "%u#%u",
+		  fence->context, fence->seqno);
+#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
+	scnprintf(info->name, sizeof(info->name), "%llu#%u",
+		  fence->context, fence->seqno);
+#else
+	scnprintf(info->name, sizeof(info->name), "%llu#%llu",
+		  fence->context, fence->seqno);
+#endif
+}
+
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_in_get(katom);
+	if (!fence)
+		return -ENOENT;
+
+	kbase_sync_fence_info_get(fence, info);
+
+	kbase_fence_put(fence);
+
+	return 0;
+}
+
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				  struct kbase_sync_fence_info *info)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_out_get(katom);
+	if (!fence)
+		return -ENOENT;
+
+	kbase_sync_fence_info_get(fence, info);
+
+	kbase_fence_put(fence);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
+{
+	/* Not implemented */
+}
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline.c
new file mode 100644
index 0000000..17470fc
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline.c
@@ -0,0 +1,342 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_timeline.h"
+#include "mali_kbase_timeline_priv.h"
+#include "mali_kbase_tracepoints.h"
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/*****************************************************************************/
+
+/* These values are used in mali_kbase_tracepoints.h
+ * to retrieve the streams from a kbase_timeline instance.
+ */
+const size_t __obj_stream_offset =
+	offsetof(struct kbase_timeline, streams)
+	+ sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_OBJ;
+
+const size_t __aux_stream_offset =
+	offsetof(struct kbase_timeline, streams)
+	+ sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_AUX;
+
+/**
+ * kbasep_timeline_autoflush_timer_callback - autoflush timer callback
+ * @timer:  Timer list
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_timeline_autoflush_timer_callback(struct timer_list *timer)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+	struct kbase_timeline *timeline =
+		container_of(timer, struct kbase_timeline, autoflush_timer);
+
+	CSTD_UNUSED(timer);
+
+	for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT;
+			stype++) {
+		struct kbase_tlstream *stream = &timeline->streams[stype];
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (af_cnt < 0)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		kbase_tlstream_flush_stream(stream);
+	}
+
+	if (atomic_read(&timeline->autoflush_timer_active))
+		rcode = mod_timer(
+				&timeline->autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+
+
+/*****************************************************************************/
+
+int kbase_timeline_init(struct kbase_timeline **timeline,
+		atomic_t *timeline_is_enabled)
+{
+	enum tl_stream_type i;
+	struct kbase_timeline *result;
+
+	if (!timeline || !timeline_is_enabled)
+		return -EINVAL;
+
+	result = kzalloc(sizeof(*result), GFP_KERNEL);
+	if (!result)
+		return -ENOMEM;
+
+	mutex_init(&result->reader_lock);
+	init_waitqueue_head(&result->event_queue);
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++)
+		kbase_tlstream_init(&result->streams[i], i,
+			&result->event_queue);
+
+	/* Initialize autoflush timer. */
+	atomic_set(&result->autoflush_timer_active, 0);
+	kbase_timer_setup(&result->autoflush_timer,
+			  kbasep_timeline_autoflush_timer_callback);
+	result->is_enabled = timeline_is_enabled;
+
+	*timeline = result;
+	return 0;
+}
+
+void kbase_timeline_term(struct kbase_timeline *timeline)
+{
+	enum tl_stream_type i;
+
+	if (!timeline)
+		return;
+
+	for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++)
+		kbase_tlstream_term(&timeline->streams[i]);
+
+	kfree(timeline);
+}
+
+static void kbase_create_timeline_objects(struct kbase_device *kbdev)
+{
+	unsigned int lpu_id;
+	unsigned int as_nr;
+	struct kbase_context *kctx;
+	struct kbase_timeline *timeline = kbdev->timeline;
+	struct kbase_tlstream *summary =
+		&timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY];
+
+	/* Summarize the LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		__kbase_tlstream_tl_new_lpu(summary, lpu, lpu_id, *lpu);
+	}
+
+	/* Summarize the Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		__kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	__kbase_tlstream_tl_new_gpu(summary,
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		__kbase_tlstream_tl_lifelink_lpu_gpu(summary, lpu, kbdev);
+	}
+
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		__kbase_tlstream_tl_lifelink_as_gpu(summary,
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Lock the context list, to ensure no changes to the list are made
+	 * while we're summarizing the contexts and their contents.
+	 */
+	mutex_lock(&kbdev->kctx_list_lock);
+
+	/* For each context in the device... */
+	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
+		/* Summarize the context itself */
+		__kbase_tlstream_tl_new_ctx(summary,
+				kctx,
+				kctx->id,
+				(u32)(kctx->tgid));
+	};
+
+	/* Reset body stream buffers while holding the kctx lock.
+	 * This ensures we can't fire both summary and normal tracepoints for
+	 * the same objects.
+	 * If we weren't holding the lock, it's possible that the summarized
+	 * objects could have been created, destroyed, or used after we
+	 * constructed the summary stream tracepoints, but before we reset
+	 * the body stream, resulting in losing those object event tracepoints.
+	 */
+	kbase_timeline_streams_body_reset(timeline);
+
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space.
+	 */
+	kbase_timeline_streams_flush(timeline);
+}
+
+#ifdef CONFIG_MALI_DEVFREQ
+static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev)
+{
+	struct devfreq *devfreq = kbdev->devfreq;
+
+	/* Devfreq initialization failure isn't a fatal error, so devfreq might
+	 * be null.
+	 */
+	if (devfreq) {
+		unsigned long cur_freq = 0;
+
+		mutex_lock(&devfreq->lock);
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+		cur_freq = kbdev->current_nominal_freq;
+#else
+		cur_freq = devfreq->last_status.current_frequency;
+#endif
+		KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)cur_freq);
+		mutex_unlock(&devfreq->lock);
+	}
+}
+#endif /* CONFIG_MALI_DEVFREQ */
+
+int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
+{
+	int ret;
+	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
+	struct kbase_timeline *timeline = kbdev->timeline;
+
+	if (!atomic_cmpxchg(timeline->is_enabled, 0, tlstream_enabled)) {
+		int rcode;
+
+		ret = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				timeline,
+				O_RDONLY | O_CLOEXEC);
+		if (ret < 0) {
+			atomic_set(timeline->is_enabled, 0);
+			return ret;
+		}
+
+		/* Reset and initialize header streams. */
+		kbase_tlstream_reset(
+			&timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]);
+
+		timeline->obj_header_btc = obj_desc_header_size;
+		timeline->aux_header_btc = aux_desc_header_size;
+
+		/* Start autoflush timer. */
+		atomic_set(&timeline->autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&timeline->autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+		/* If job dumping is enabled, readjust the software event's
+		 * timeout as the default value of 3 seconds is often
+		 * insufficient.
+		 */
+		if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) {
+			dev_info(kbdev->dev,
+					"Job dumping is enabled, readjusting the software event's timeout\n");
+			atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+					1800000);
+		}
+
+		/* Summary stream was cleared during acquire.
+		 * Create static timeline objects that will be
+		 * read by client.
+		 */
+		kbase_create_timeline_objects(kbdev);
+
+#ifdef CONFIG_MALI_DEVFREQ
+		/* Devfreq target tracepoints are only fired when the target
+		 * changes, so we won't know the current target unless we
+		 * send it now.
+		 */
+		kbase_tlstream_current_devfreq_target(kbdev);
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	} else {
+		ret = -EBUSY;
+	}
+
+	return ret;
+}
+
+void kbase_timeline_streams_flush(struct kbase_timeline *timeline)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbase_tlstream_flush_stream(&timeline->streams[stype]);
+}
+
+void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline)
+{
+	kbase_tlstream_reset(
+			&timeline->streams[TL_STREAM_TYPE_OBJ]);
+	kbase_tlstream_reset(
+			&timeline->streams[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_timeline_stats(struct kbase_timeline *timeline,
+		u32 *bytes_collected, u32 *bytes_generated)
+{
+	enum tl_stream_type stype;
+
+	KBASE_DEBUG_ASSERT(bytes_collected);
+
+	/* Accumulate bytes generated per stream  */
+	*bytes_generated = 0;
+	for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT;
+			stype++)
+		*bytes_generated += atomic_read(
+			&timeline->streams[stype].bytes_generated);
+
+	*bytes_collected = atomic_read(&timeline->bytes_collected);
+}
+#endif /* MALI_UNIT_TEST */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline.h
new file mode 100644
index 0000000..d800288
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline.h
@@ -0,0 +1,121 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_TIMELINE_H)
+#define _KBASE_TIMELINE_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+struct kbase_timeline;
+
+/**
+ * kbase_timeline_init - initialize timeline infrastructure in kernel
+ * @timeline:            Newly created instance of kbase_timeline will
+ *                       be stored in this pointer.
+ * @timeline_is_enabled: Timeline status will be written to this variable
+ *                       when a client is attached/detached. The variable
+ *                       must be valid while timeline instance is valid.
+ * Return: zero on success, negative number on error
+ */
+int kbase_timeline_init(struct kbase_timeline **timeline,
+	atomic_t *timeline_is_enabled);
+
+/**
+ * kbase_timeline_term - terminate timeline infrastructure in kernel
+ *
+ * @timeline:     Timeline instance to be terminated. It must be previously created
+ *                with kbase_timeline_init().
+ */
+void kbase_timeline_term(struct kbase_timeline *timeline);
+
+/**
+ * kbase_timeline_io_acquire - acquire timeline stream file descriptor
+ * @kbdev:     Kbase device
+ * @flags:     Timeline stream flags
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) return will be a negative value.
+ *
+ * Return: file descriptor on success, negative number on error
+ */
+int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags);
+
+/**
+ * kbase_timeline_streams_flush - flush timeline streams.
+ * @timeline:     Timeline instance
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_timeline_streams_flush(struct kbase_timeline *timeline);
+
+/**
+ * kbase_timeline_streams_body_reset - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ * @timeline:     Timeline instance
+ */
+void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_timeline_test - start timeline stream data generator
+ * @kbdev:     Kernel common context
+ * @tpw_count: Number of trace point writers in each context
+ * @msg_delay: Time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: Number of trace points written by one writer
+ * @aux_msg:   If non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_timeline_test(
+	struct kbase_device *kbdev,
+	unsigned int tpw_count,
+	unsigned int msg_delay,
+	unsigned int msg_count,
+	int          aux_msg);
+
+/**
+ * kbase_timeline_stats - read timeline stream statistics
+ * @timeline:        Timeline instance
+ * @bytes_collected: Will hold number of bytes read by the user
+ * @bytes_generated: Will hold number of bytes generated by trace points
+ */
+void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+#endif /* _KBASE_TIMELINE_H */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline_io.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline_io.c
new file mode 100644
index 0000000..ffcf84a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline_io.c
@@ -0,0 +1,314 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase_timeline_priv.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
+
+#include <linux/poll.h>
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_timeline_io_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait);
+static int kbasep_timeline_io_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+const struct file_operations kbasep_tlstream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbasep_timeline_io_release,
+	.read    = kbasep_timeline_io_read,
+	.poll    = kbasep_timeline_io_poll,
+};
+
+/**
+ * kbasep_timeline_io_packet_pending - check timeline streams for pending packets
+ * @timeline:      Timeline instance
+ * @ready_stream:  Pointer to variable where stream will be placed
+ * @rb_idx_raw:    Pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_timeline_io_packet_pending(
+		struct kbase_timeline  *timeline,
+		struct kbase_tlstream **ready_stream,
+		unsigned int           *rb_idx_raw)
+{
+	enum tl_stream_type i;
+
+	KBASE_DEBUG_ASSERT(ready_stream);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; ++i) {
+		struct kbase_tlstream *stream = &timeline->streams[i];
+		*rb_idx_raw = atomic_read(&stream->rbi);
+		/* Read buffer index may be updated by writer in case of
+		 * overflow. Read and write buffer indexes must be
+		 * loaded in correct order.
+		 */
+		smp_rmb();
+		if (atomic_read(&stream->wbi) != *rb_idx_raw) {
+			*ready_stream = stream;
+			return 1;
+		}
+
+	}
+
+	return 0;
+}
+
+/**
+ * kbasep_timeline_copy_header - copy timeline headers to the user
+ * @timeline:    Timeline instance
+ * @buffer:      Pointer to the buffer provided by user
+ * @size:        Maximum amount of data that can be stored in the buffer
+ * @copy_len:    Pointer to amount of bytes that has been copied already
+ *               within the read system call.
+ *
+ * This helper function checks if timeline headers have not been sent
+ * to the user, and if so, sends them. @ref copy_len is respectively
+ * updated.
+ *
+ * Returns: 0 if success, -1 if copy_to_user has failed.
+ */
+static inline int kbasep_timeline_copy_header(
+	struct kbase_timeline *timeline,
+	char __user *buffer,
+	size_t size,
+	ssize_t *copy_len)
+{
+	if (timeline->obj_header_btc) {
+		size_t offset = obj_desc_header_size -
+			timeline->obj_header_btc;
+
+		size_t header_cp_size = MIN(
+			size - *copy_len,
+			timeline->obj_header_btc);
+
+		if (copy_to_user(
+			    &buffer[*copy_len],
+			    &obj_desc_header[offset],
+			    header_cp_size))
+			return -1;
+
+		timeline->obj_header_btc -= header_cp_size;
+		*copy_len += header_cp_size;
+	}
+
+	if (timeline->aux_header_btc) {
+		size_t offset = aux_desc_header_size -
+			timeline->aux_header_btc;
+		size_t header_cp_size = MIN(
+			size - *copy_len,
+			timeline->aux_header_btc);
+
+		if (copy_to_user(
+			    &buffer[*copy_len],
+			    &aux_desc_header[offset],
+			    header_cp_size))
+			return -1;
+
+		timeline->aux_header_btc -= header_cp_size;
+		*copy_len += header_cp_size;
+	}
+	return 0;
+}
+
+
+/**
+ * kbasep_timeline_io_read - copy data from streams to buffer provided by user
+ * @filp:   Pointer to file structure
+ * @buffer: Pointer to the buffer provided by user
+ * @size:   Maximum amount of data that can be stored in the buffer
+ * @f_pos:  Pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_timeline_io_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+	struct kbase_timeline *timeline;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(f_pos);
+
+	if (WARN_ON(!filp->private_data))
+		return -EFAULT;
+
+	timeline = (struct kbase_timeline *) filp->private_data;
+
+	if (!buffer)
+		return -EINVAL;
+
+	if ((*f_pos < 0) || (size < PACKET_SIZE))
+		return -EINVAL;
+
+	mutex_lock(&timeline->reader_lock);
+
+	while (copy_len < size) {
+		struct kbase_tlstream *stream = NULL;
+		unsigned int        rb_idx_raw = 0;
+		unsigned int        wb_idx_raw;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		if (kbasep_timeline_copy_header(
+			    timeline, buffer, size, &copy_len)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If we already read some packets and there is no
+		 * packet pending then return back to user.
+		 * If we don't have any data yet, wait for packet to be
+		 * submitted.
+		 */
+		if (copy_len > 0) {
+			if (!kbasep_timeline_io_packet_pending(
+						timeline,
+						&stream,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						timeline->event_queue,
+						kbasep_timeline_io_packet_pending(
+							timeline,
+							&stream,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		if (WARN_ON(!stream)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content.
+		 */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&stream->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					stream->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the distance between read buffer index and write
+		 * buffer index became more than PACKET_COUNT, then overflow
+		 * happened and we need to ignore the last portion of bytes
+		 * that we have just sent to user.
+		 */
+		smp_rmb();
+		wb_idx_raw = atomic_read(&stream->wbi);
+
+		if (wb_idx_raw - rb_idx_raw < PACKET_COUNT) {
+			copy_len += rb_size;
+			atomic_inc(&stream->rbi);
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &timeline->bytes_collected);
+#endif /* MALI_UNIT_TEST */
+
+		} else {
+			const unsigned int new_rb_idx_raw =
+				wb_idx_raw - PACKET_COUNT + 1;
+			/* Adjust read buffer index to the next valid buffer */
+			atomic_set(&stream->rbi, new_rb_idx_raw);
+		}
+	}
+
+	mutex_unlock(&timeline->reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_timeline_io_poll - poll timeline stream for packets
+ * @filp: Pointer to file structure
+ * @wait: Pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_tlstream *stream;
+	unsigned int        rb_idx;
+	struct kbase_timeline *timeline;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	if (WARN_ON(!filp->private_data))
+		return -EFAULT;
+
+	timeline = (struct kbase_timeline *) filp->private_data;
+
+	poll_wait(filp, &timeline->event_queue, wait);
+	if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_timeline_io_release - release timeline stream descriptor
+ * @inode: Pointer to inode structure
+ * @filp:  Pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_timeline *timeline;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(filp->private_data);
+
+	CSTD_UNUSED(inode);
+
+	timeline = (struct kbase_timeline *) filp->private_data;
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&timeline->autoflush_timer_active, 0);
+	del_timer_sync(&timeline->autoflush_timer);
+
+	atomic_set(timeline->is_enabled, 0);
+	return 0;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline_priv.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline_priv.h
new file mode 100644
index 0000000..e4a4a20
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_timeline_priv.h
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_TIMELINE_PRIV_H)
+#define _KBASE_TIMELINE_PRIV_H
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+
+#include <linux/timer.h>
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+
+/**
+ * struct kbase_timeline - timeline state structure
+ * @streams:                The timeline streams generated by kernel
+ * @autoflush_timer:        Autoflush timer
+ * @autoflush_timer_active: If non-zero autoflush timer is active
+ * @reader_lock:            Reader lock. Only one reader is allowed to
+ *                          have access to the timeline streams at any given time.
+ * @event_queue:            Timeline stream event queue
+ * @bytes_collected:        Number of bytes read by user
+ * @is_enabled:             Zero, if timeline is disabled. Timeline stream flags
+ *                          otherwise. See kbase_timeline_io_acquire().
+ * @obj_header_btc:         Remaining bytes to copy for the object stream header
+ * @aux_header_btc:         Remaining bytes to copy for the aux stream header
+ */
+struct kbase_timeline {
+	struct kbase_tlstream streams[TL_STREAM_TYPE_COUNT];
+	struct timer_list autoflush_timer;
+	atomic_t          autoflush_timer_active;
+	struct mutex      reader_lock;
+	wait_queue_head_t event_queue;
+#if MALI_UNIT_TEST
+	atomic_t          bytes_collected;
+#endif /* MALI_UNIT_TEST */
+	atomic_t         *is_enabled;
+	size_t            obj_header_btc;
+	size_t            aux_header_btc;
+};
+
+extern const struct file_operations kbasep_tlstream_fops;
+
+#endif /* _KBASE_TIMELINE_PRIV_H */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tl_serialize.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tl_serialize.h
new file mode 100644
index 0000000..90808ce
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tl_serialize.h
@@ -0,0 +1,127 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_TL_SERIALIZE_H)
+#define _KBASE_TL_SERIALIZE_H
+
+#include <mali_kbase.h>
+
+#include <linux/timer.h>
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/**
+ * kbasep_serialize_bytes - serialize bytes to the message buffer
+ *
+ * Serialize bytes as is using memcpy()
+ *
+ * @buffer:    Message buffer
+ * @pos:       Message buffer offset
+ * @bytes:     Bytes to serialize
+ * @len:       Length of bytes array
+ *
+ * Return: updated position in the buffer
+ */
+static inline size_t kbasep_serialize_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_serialize_string - serialize string to the message buffer
+ *
+ * String is serialized as 4 bytes for string size,
+ * then string content and then null terminator.
+ *
+ * @buffer:         Message buffer
+ * @pos:            Message buffer offset
+ * @string:         String to serialize
+ * @max_write_size: Number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static inline size_t kbasep_serialize_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator.
+	 */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_serialize_timestamp - serialize timestamp to the message buffer
+ *
+ * Get current timestamp using kbasep_get_timestamp()
+ * and serialize it as 64 bit unsigned integer.
+ *
+ * @buffer: Message buffer
+ * @pos:    Message buffer offset
+ *
+ * Return: updated position in the buffer
+ */
+static inline size_t kbasep_serialize_timestamp(void *buffer, size_t pos)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+
+	return kbasep_serialize_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+#endif /* _KBASE_TL_SERIALIZE_H */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100755
index 0000000..2a76bc0
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,287 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_tlstream.h"
+#include "mali_kbase_tl_serialize.h"
+#include "mali_kbase_mipe_proto.h"
+
+/**
+ * kbasep_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_packet_header_setup(
+	char                  *buffer,
+	enum tl_packet_family pkt_family,
+	enum tl_packet_class  pkt_class,
+	enum tl_packet_type   pkt_type,
+	unsigned int          stream_id,
+	int                   numbered)
+{
+	u32 words[2] = {
+		MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id),
+		MIPE_PACKET_HEADER_W1(0, !!numbered),
+	};
+	memcpy(buffer, words, sizeof(words));
+}
+
+/**
+ * kbasep_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ * @numbered:   non-zero if the stream is numbered
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_packet_header_update(
+		char  *buffer,
+		size_t data_size,
+		int    numbered)
+{
+	u32 word0;
+	u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered);
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+void kbase_tlstream_reset(struct kbase_tlstream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+				&stream->buffer[i].size,
+				PACKET_HEADER_SIZE +
+				PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0).
+ */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+void kbase_tlstream_init(
+	struct kbase_tlstream *stream,
+	enum tl_stream_type    stream_type,
+	wait_queue_head_t     *ready_read)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_packet_header_setup(
+			stream->buffer[i].data,
+			tl_stream_cfg[stream_type].pkt_family,
+			tl_stream_cfg[stream_type].pkt_class,
+			tl_stream_cfg[stream_type].pkt_type,
+			tl_stream_cfg[stream_type].stream_id,
+			stream->numbered);
+
+#if MALI_UNIT_TEST
+	atomic_set(&stream->bytes_generated, 0);
+#endif
+	stream->ready_read = ready_read;
+
+	kbase_tlstream_reset(stream);
+}
+
+void kbase_tlstream_term(struct kbase_tlstream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbase_tlstream_msgbuf_submit - submit packet to user space
+ * @stream:     Pointer to the stream structure
+ * @wb_idx_raw: Write buffer index
+ * @wb_size:    Length of data stored in the current buffer
+ *
+ * Updates currently written buffer with the packet header.
+ * Then write index is incremented and the buffer is handed to user space.
+ * Parameters of the new buffer are returned using provided arguments.
+ *
+ * Return: length of data in the new buffer
+ *
+ * Warning: the user must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct kbase_tlstream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_packet_header_update(
+		stream->buffer[wb_idx].data,
+		wb_size - PACKET_HEADER_SIZE,
+		stream->numbered);
+
+	if (stream->numbered)
+		kbasep_packet_number_update(
+			stream->buffer[wb_idx].data,
+			wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	atomic_inc(&stream->wbi);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(stream->ready_read);
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+char *kbase_tlstream_msgbuf_acquire(
+	struct kbase_tlstream *stream,
+	size_t              msg_size,
+	unsigned long       *flags) __acquires(&stream->lock)
+{
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(
+		PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+		msg_size);
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &stream->bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+void kbase_tlstream_msgbuf_release(
+	struct kbase_tlstream *stream,
+	unsigned long       flags) __releases(&stream->lock)
+{
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+void kbase_tlstream_flush_stream(
+	struct kbase_tlstream *stream)
+{
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100755
index 0000000..5797738
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,167 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/wait.h>
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP)
+	#define PACKET_COUNT       64
+#else
+	#define PACKET_COUNT       32
+#endif
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/**
+ * struct kbase_tlstream - timeline stream structure
+ * @lock:              Message order lock
+ * @buffer:            Array of buffers
+ * @wbi:               Write buffer index
+ * @rbi:               Read buffer index
+ * @numbered:          If non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: Counter tracking stream's autoflush state
+ * @ready_read:        Pointer to a wait queue, which is signaled when
+ *                     timeline messages are ready for collection.
+ * @bytes_generated:   Number of bytes generated by tracepoint messages
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream.
+ *
+ * Each message in the sequence must bear a timestamp that is
+ * greater than the previous message in the same stream. For this reason
+ * a lock is held throughout the process of message creation.
+ *
+ * Each stream contains a set of buffers. Each buffer will hold one MIPE
+ * packet. In case there is no free space required to store the incoming
+ * message the oldest buffer is discarded. Each packet in timeline body
+ * stream has a sequence number embedded, this value must increment
+ * monotonically and is used by the packets receiver to discover these
+ * buffer overflows.
+ *
+ * The autoflush counter is set to a negative number when there is no data
+ * pending for flush and it is set to zero on every update of the buffer. The
+ * autoflush timer will increment the counter by one on every expiry. If there
+ * is no activity on the buffer for two consecutive timer expiries, the stream
+ * buffer will be flushed.
+ */
+struct kbase_tlstream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+	wait_queue_head_t *ready_read;
+#if MALI_UNIT_TEST
+	atomic_t bytes_generated;
+#endif
+};
+
+/* Types of streams generated by timeline. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_FIRST,
+	TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/**
+ * kbase_tlstream_init - initialize timeline stream
+ * @stream:      Pointer to the stream structure
+ * @stream_type: Stream type
+ * @ready_read:  Pointer to a wait queue to signal when
+ *               timeline messages are ready for collection.
+ */
+void kbase_tlstream_init(struct kbase_tlstream *stream,
+	enum tl_stream_type stream_type,
+	wait_queue_head_t  *ready_read);
+
+/**
+ * kbase_tlstream_term - terminate timeline stream
+ * @stream: Pointer to the stream structure
+ */
+void kbase_tlstream_term(struct kbase_tlstream *stream);
+
+/**
+ * kbase_tlstream_reset - reset stream
+ * @stream:    Pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+void kbase_tlstream_reset(struct kbase_tlstream *stream);
+
+/**
+ * kbase_tlstream_msgbuf_acquire - lock selected stream and reserve a buffer
+ * @stream:      Pointer to the stream structure
+ * @msg_size:    Message size
+ * @flags:       Pointer to store flags passed back on stream release
+ *
+ * Lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where a message can be stored
+ *
+ * Warning: The stream must be released with kbase_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while the stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+char *kbase_tlstream_msgbuf_acquire(struct kbase_tlstream *stream,
+	size_t msg_size, unsigned long *flags) __acquires(&stream->lock);
+
+/**
+ * kbase_tlstream_msgbuf_release - unlock selected stream
+ * @stream:    Pointer to the stream structure
+ * @flags:     Value obtained during stream acquire
+ *
+ * Release the stream that has been previously
+ * locked with a call to kbase_tlstream_msgbuf_acquire().
+ */
+void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream,
+	unsigned long flags) __releases(&stream->lock);
+
+/**
+ * kbase_tlstream_flush_stream - flush stream
+ * @stream:     Pointer to the stream structure
+ *
+ * Flush pending data in the timeline stream.
+ */
+void kbase_tlstream_flush_stream(struct kbase_tlstream *stream);
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100755
index 0000000..77fb818
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,261 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tracepoints.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tracepoints.c
new file mode 100644
index 0000000..2c55127
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tracepoints.c
@@ -0,0 +1,2836 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py.
+ * DO NOT EDIT.
+ */
+
+#include "mali_kbase_tracepoints.h"
+#include "mali_kbase_tlstream.h"
+#include "mali_kbase_tl_serialize.h"
+
+/* clang-format off */
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY,
+	KBASE_TL_ATTRIB_ATOM_STATE,
+	KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
+	KBASE_TL_ATTRIB_ATOM_JIT,
+	KBASE_TL_JIT_USEDPAGES,
+	KBASE_TL_ATTRIB_ATOM_JITALLOCINFO,
+	KBASE_TL_ATTRIB_ATOM_JITFREEINFO,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+	KBASE_TL_EVENT_LPU_SOFTSTOP,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_START,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_END,
+	KBASE_JD_GPU_SOFT_RESET,
+	KBASE_TL_NEW_KCPUQUEUE,
+	KBASE_TL_RET_KCPUQUEUE_CTX,
+	KBASE_TL_DEL_KCPUQUEUE,
+	KBASE_TL_NRET_KCPUQUEUE_CTX,
+	KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL,
+	KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY,
+	KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT,
+	KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER,
+	KBASE_OBJ_MSG_COUNT,
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC,
+	KBASE_AUX_DEVFREQ_TARGET,
+	KBASE_AUX_PROTECTED_ENTER_START,
+	KBASE_AUX_PROTECTED_ENTER_END,
+	KBASE_AUX_PROTECTED_LEAVE_START,
+	KBASE_AUX_PROTECTED_LEAVE_END,
+	KBASE_AUX_JIT_STATS,
+	KBASE_AUX_EVENT_JOB_SLOT,
+	KBASE_AUX_MSG_COUNT,
+};
+
+#define OBJ_TL_LIST \
+	TP_DESC(KBASE_TL_NEW_CTX, \
+		"object ctx is created", \
+		"@pII", \
+		"ctx,ctx_nr,tgid") \
+	TP_DESC(KBASE_TL_NEW_GPU, \
+		"object gpu is created", \
+		"@pII", \
+		"gpu,gpu_id,core_count") \
+	TP_DESC(KBASE_TL_NEW_LPU, \
+		"object lpu is created", \
+		"@pII", \
+		"lpu,lpu_nr,lpu_fn") \
+	TP_DESC(KBASE_TL_NEW_ATOM, \
+		"object atom is created", \
+		"@pI", \
+		"atom,atom_nr") \
+	TP_DESC(KBASE_TL_NEW_AS, \
+		"address space object is created", \
+		"@pI", \
+		"address_space,as_nr") \
+	TP_DESC(KBASE_TL_DEL_CTX, \
+		"context is destroyed", \
+		"@p", \
+		"ctx") \
+	TP_DESC(KBASE_TL_DEL_ATOM, \
+		"atom is destroyed", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_TL_LIFELINK_LPU_GPU, \
+		"lpu is deleted with gpu", \
+		"@pp", \
+		"lpu,gpu") \
+	TP_DESC(KBASE_TL_LIFELINK_AS_GPU, \
+		"address space is deleted with gpu", \
+		"@pp", \
+		"address_space,gpu") \
+	TP_DESC(KBASE_TL_RET_CTX_LPU, \
+		"context is retained by lpu", \
+		"@pp", \
+		"ctx,lpu") \
+	TP_DESC(KBASE_TL_RET_ATOM_CTX, \
+		"atom is retained by context", \
+		"@pp", \
+		"atom,ctx") \
+	TP_DESC(KBASE_TL_RET_ATOM_LPU, \
+		"atom is retained by lpu", \
+		"@pps", \
+		"atom,lpu,attrib_match_list") \
+	TP_DESC(KBASE_TL_NRET_CTX_LPU, \
+		"context is released by lpu", \
+		"@pp", \
+		"ctx,lpu") \
+	TP_DESC(KBASE_TL_NRET_ATOM_CTX, \
+		"atom is released by context", \
+		"@pp", \
+		"atom,ctx") \
+	TP_DESC(KBASE_TL_NRET_ATOM_LPU, \
+		"atom is released by lpu", \
+		"@pp", \
+		"atom,lpu") \
+	TP_DESC(KBASE_TL_RET_AS_CTX, \
+		"address space is retained by context", \
+		"@pp", \
+		"address_space,ctx") \
+	TP_DESC(KBASE_TL_NRET_AS_CTX, \
+		"address space is released by context", \
+		"@pp", \
+		"address_space,ctx") \
+	TP_DESC(KBASE_TL_RET_ATOM_AS, \
+		"atom is retained by address space", \
+		"@pp", \
+		"atom,address_space") \
+	TP_DESC(KBASE_TL_NRET_ATOM_AS, \
+		"atom is released by address space", \
+		"@pp", \
+		"atom,address_space") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \
+		"atom job slot attributes", \
+		"@pLLI", \
+		"atom,descriptor,affinity,config") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \
+		"atom priority", \
+		"@pI", \
+		"atom,prio") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \
+		"atom state", \
+		"@pI", \
+		"atom,state") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \
+		"atom caused priority change", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \
+		"jit done for atom", \
+		"@pLLILILLL", \
+		"atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \
+	TP_DESC(KBASE_TL_JIT_USEDPAGES, \
+		"used pages for jit", \
+		"@LI", \
+		"used_pages,j_id") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \
+		"Information about JIT allocations", \
+		"@pLLLIIIII", \
+		"atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,jit_flags,usg_id") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \
+		"Information about JIT frees", \
+		"@pI", \
+		"atom,j_id") \
+	TP_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \
+		"address space attributes", \
+		"@pLLL", \
+		"address_space,transtab,memattr,transcfg") \
+	TP_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \
+		"softstop event on given lpu", \
+		"@p", \
+		"lpu") \
+	TP_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \
+		"atom softstopped", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \
+		"atom softstop issued", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \
+		"atom soft job has started", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \
+		"atom soft job has completed", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_JD_GPU_SOFT_RESET, \
+		"gpu soft reset", \
+		"@p", \
+		"gpu") \
+	TP_DESC(KBASE_TL_NEW_KCPUQUEUE, \
+		"New KCPU Queue", \
+		"@ppI", \
+		"kcpu_queue,ctx,kcpuq_num_pending_cmds") \
+	TP_DESC(KBASE_TL_RET_KCPUQUEUE_CTX, \
+		"Context retains KCPU Queue", \
+		"@pp", \
+		"kcpu_queue,ctx") \
+	TP_DESC(KBASE_TL_DEL_KCPUQUEUE, \
+		"Delete KCPU Queue", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_NRET_KCPUQUEUE_CTX, \
+		"Context releases KCPU Queue", \
+		"@pp", \
+		"kcpu_queue,ctx") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \
+		"KCPU Queue enqueues Signal on Fence", \
+		"@pL", \
+		"kcpu_queue,fence") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \
+		"KCPU Queue enqueues Wait on Fence", \
+		"@pL", \
+		"kcpu_queue,fence") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
+		"Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
+		"Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object", \
+		"@pLI", \
+		"kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
+		"End array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \
+		"Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \
+		"Array item of KCPU Queue enqueues Set on Cross Queue Sync Object", \
+		"@pL", \
+		"kcpu_queue,cqs_obj_gpu_addr") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \
+		"End array of KCPU Queue enqueues Set on Cross Queue Sync Object", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
+		"Begin array of KCPU Queue enqueues Debug Copy", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
+		"Array item of KCPU Queue enqueues Debug Copy", \
+		"@pL", \
+		"kcpu_queue,debugcopy_dst_size") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
+		"End array of KCPU Queue enqueues Debug Copy", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \
+		"KCPU Queue enqueues Map Import", \
+		"@pL", \
+		"kcpu_queue,map_import_buf_gpu_addr") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \
+		"KCPU Queue enqueues Unmap Import", \
+		"@pL", \
+		"kcpu_queue,map_import_buf_gpu_addr") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
+		"Begin array of KCPU Queue enqueues JIT Alloc", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
+		"Array item of KCPU Queue enqueues JIT Alloc", \
+		"@pLLLLIIIII", \
+		"kcpu_queue,jit_alloc_gpu_alloc_addr_dest,jit_alloc_va_pages,jit_alloc_commit_pages,jit_alloc_extent,jit_alloc_jit_id,jit_alloc_bin_id,jit_alloc_max_allocations,jit_alloc_flags,jit_alloc_usage_id") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
+		"End array of KCPU Queue enqueues JIT Alloc", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \
+		"Begin array of KCPU Queue enqueues JIT Free", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \
+		"Array item of KCPU Queue enqueues JIT Free", \
+		"@pI", \
+		"kcpu_queue,jit_alloc_jit_id") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \
+		"End array of KCPU Queue enqueues JIT Free", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \
+		"KCPU Queue starts a Signal on Fence", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \
+		"KCPU Queue ends a Signal on Fence", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \
+		"KCPU Queue starts a Wait on Fence", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \
+		"KCPU Queue ends a Wait on Fence", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \
+		"KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \
+		"KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START, \
+		"KCPU Queue starts a Set on an array of Cross Queue Sync Objects", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END, \
+		"KCPU Queue ends a Set on an array of Cross Queue Sync Objects", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \
+		"KCPU Queue starts an array of Debug Copys", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \
+		"KCPU Queue ends an array of Debug Copys", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \
+		"KCPU Queue starts a Map Import", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \
+		"KCPU Queue ends a Map Import", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \
+		"KCPU Queue starts an Unmap Import", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \
+		"KCPU Queue ends an Unmap Import", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \
+		"KCPU Queue starts an array of JIT Allocs", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
+		"Begin array of KCPU Queue ends an array of JIT Allocs", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
+		"Array item of KCPU Queue ends an array of JIT Allocs", \
+		"@pLL", \
+		"kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
+		"End array of KCPU Queue ends an array of JIT Allocs", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START, \
+		"KCPU Queue starts an array of JIT Frees", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
+		"Begin array of KCPU Queue ends an array of JIT Frees", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
+		"Array item of KCPU Queue ends an array of JIT Frees", \
+		"@pL", \
+		"kcpu_queue,jit_free_pages_used") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
+		"End array of KCPU Queue ends an array of JIT Frees", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER, \
+		"KCPU Queue executes an Error Barrier", \
+		"@p", \
+		"kcpu_queue") \
+
+#define MIPE_HEADER_BLOB_VAR_NAME    __obj_desc_header
+#define MIPE_HEADER_TP_LIST          OBJ_TL_LIST
+#define MIPE_HEADER_TP_LIST_COUNT    KBASE_OBJ_MSG_COUNT
+#define MIPE_HEADER_PKT_CLASS        TL_PACKET_CLASS_OBJ
+
+#include "mali_kbase_mipe_gen_header.h"
+
+const char   *obj_desc_header = (const char *) &__obj_desc_header;
+const size_t  obj_desc_header_size = sizeof(__obj_desc_header);
+
+#define AUX_TL_LIST \
+	TP_DESC(KBASE_AUX_PM_STATE, \
+		"PM state", \
+		"@IL", \
+		"core_type,core_state_bitset") \
+	TP_DESC(KBASE_AUX_PAGEFAULT, \
+		"Page fault", \
+		"@IIL", \
+		"ctx_nr,as_nr,page_cnt_change") \
+	TP_DESC(KBASE_AUX_PAGESALLOC, \
+		"Total alloc pages change", \
+		"@IL", \
+		"ctx_nr,page_cnt") \
+	TP_DESC(KBASE_AUX_DEVFREQ_TARGET, \
+		"New device frequency target", \
+		"@L", \
+		"target_freq") \
+	TP_DESC(KBASE_AUX_PROTECTED_ENTER_START, \
+		"enter protected mode start", \
+		"@p", \
+		"gpu") \
+	TP_DESC(KBASE_AUX_PROTECTED_ENTER_END, \
+		"enter protected mode end", \
+		"@p", \
+		"gpu") \
+	TP_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \
+		"leave protected mode start", \
+		"@p", \
+		"gpu") \
+	TP_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \
+		"leave protected mode end", \
+		"@p", \
+		"gpu") \
+	TP_DESC(KBASE_AUX_JIT_STATS, \
+		"per-bin JIT statistics", \
+		"@IIIIII", \
+		"ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \
+	TP_DESC(KBASE_AUX_EVENT_JOB_SLOT, \
+		"event on a given job slot", \
+		"@pIII", \
+		"ctx,slot_nr,atom_nr,event") \
+
+#define MIPE_HEADER_BLOB_VAR_NAME    __aux_desc_header
+#define MIPE_HEADER_TP_LIST          AUX_TL_LIST
+#define MIPE_HEADER_TP_LIST_COUNT    KBASE_AUX_MSG_COUNT
+#define MIPE_HEADER_PKT_CLASS        TL_PACKET_CLASS_AUX
+
+#include "mali_kbase_mipe_gen_header.h"
+
+const char   *aux_desc_header = (const char *) &__aux_desc_header;
+const size_t  aux_desc_header_size = sizeof(__aux_desc_header);
+
+void __kbase_tlstream_tl_new_ctx(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	u32 ctx_nr,
+	u32 tgid)
+{
+	const u32 msg_id = KBASE_TL_NEW_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx)
+		+ sizeof(ctx_nr)
+		+ sizeof(tgid)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &tgid, sizeof(tgid));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_new_gpu(
+	struct kbase_tlstream *stream,
+	const void *gpu,
+	u32 gpu_id,
+	u32 core_count)
+{
+	const u32 msg_id = KBASE_TL_NEW_GPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		+ sizeof(gpu_id)
+		+ sizeof(core_count)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu_id, sizeof(gpu_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &core_count, sizeof(core_count));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_new_lpu(
+	struct kbase_tlstream *stream,
+	const void *lpu,
+	u32 lpu_nr,
+	u32 lpu_fn)
+{
+	const u32 msg_id = KBASE_TL_NEW_LPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(lpu)
+		+ sizeof(lpu_nr)
+		+ sizeof(lpu_fn)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu_nr, sizeof(lpu_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu_fn, sizeof(lpu_fn));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_new_atom(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 atom_nr)
+{
+	const u32 msg_id = KBASE_TL_NEW_ATOM;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(atom_nr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom_nr, sizeof(atom_nr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_new_as(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	u32 as_nr)
+{
+	const u32 msg_id = KBASE_TL_NEW_AS;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(address_space)
+		+ sizeof(as_nr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &as_nr, sizeof(as_nr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(
+	struct kbase_tlstream *stream,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_DEL_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_del_atom(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_DEL_ATOM;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_lifelink_lpu_gpu(
+	struct kbase_tlstream *stream,
+	const void *lpu,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(lpu)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_lifelink_as_gpu(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(address_space)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	const void *lpu)
+{
+	const u32 msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx)
+		+ sizeof(lpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *lpu,
+	const char *attrib_match_list)
+{
+	const u32 msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t s0 = sizeof(u32) + sizeof(char)
+		+ strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(lpu)
+		+ s0
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+	pos = kbasep_serialize_string(buffer,
+		pos, attrib_match_list, s0);
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	const void *lpu)
+{
+	const u32 msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx)
+		+ sizeof(lpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *lpu)
+{
+	const u32 msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(lpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(address_space)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(address_space)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *address_space)
+{
+	const u32 msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(address_space)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *address_space)
+{
+	const u32 msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(address_space)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 descriptor,
+	u64 affinity,
+	u32 config)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(descriptor)
+		+ sizeof(affinity)
+		+ sizeof(config)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &descriptor, sizeof(descriptor));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &affinity, sizeof(affinity));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &config, sizeof(config));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 prio)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(prio)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &prio, sizeof(prio));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_state(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 state)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(state)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &state, sizeof(state));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_prioritized(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_jit(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 edit_addr,
+	u64 new_addr,
+	u32 jit_flags,
+	u64 mem_flags,
+	u32 j_id,
+	u64 com_pgs,
+	u64 extent,
+	u64 va_pgs)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(edit_addr)
+		+ sizeof(new_addr)
+		+ sizeof(jit_flags)
+		+ sizeof(mem_flags)
+		+ sizeof(j_id)
+		+ sizeof(com_pgs)
+		+ sizeof(extent)
+		+ sizeof(va_pgs)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &edit_addr, sizeof(edit_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &new_addr, sizeof(new_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_flags, sizeof(jit_flags));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &mem_flags, sizeof(mem_flags));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &j_id, sizeof(j_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &com_pgs, sizeof(com_pgs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &extent, sizeof(extent));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &va_pgs, sizeof(va_pgs));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_jit_usedpages(
+	struct kbase_tlstream *stream,
+	u64 used_pages,
+	u32 j_id)
+{
+	const u32 msg_id = KBASE_TL_JIT_USEDPAGES;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(used_pages)
+		+ sizeof(j_id)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &used_pages, sizeof(used_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &j_id, sizeof(j_id));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_jitallocinfo(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 va_pgs,
+	u64 com_pgs,
+	u64 extent,
+	u32 j_id,
+	u32 bin_id,
+	u32 max_allocs,
+	u32 jit_flags,
+	u32 usg_id)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITALLOCINFO;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(va_pgs)
+		+ sizeof(com_pgs)
+		+ sizeof(extent)
+		+ sizeof(j_id)
+		+ sizeof(bin_id)
+		+ sizeof(max_allocs)
+		+ sizeof(jit_flags)
+		+ sizeof(usg_id)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &va_pgs, sizeof(va_pgs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &com_pgs, sizeof(com_pgs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &extent, sizeof(extent));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &j_id, sizeof(j_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &bin_id, sizeof(bin_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &max_allocs, sizeof(max_allocs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_flags, sizeof(jit_flags));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &usg_id, sizeof(usg_id));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 j_id)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITFREEINFO;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(j_id)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &j_id, sizeof(j_id));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	u64 transtab,
+	u64 memattr,
+	u64 transcfg)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(address_space)
+		+ sizeof(transtab)
+		+ sizeof(memattr)
+		+ sizeof(transcfg)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &transtab, sizeof(transtab));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &memattr, sizeof(memattr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &transcfg, sizeof(transcfg));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_lpu_softstop(
+	struct kbase_tlstream *stream,
+	const void *lpu)
+{
+	const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(lpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_ex(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_issue(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softjob_start(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softjob_end(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_pm_state(
+	struct kbase_tlstream *stream,
+	u32 core_type,
+	u64 core_state_bitset)
+{
+	const u32 msg_id = KBASE_AUX_PM_STATE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(core_type)
+		+ sizeof(core_state_bitset)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &core_type, sizeof(core_type));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &core_state_bitset, sizeof(core_state_bitset));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_pagefault(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u32 as_nr,
+	u64 page_cnt_change)
+{
+	const u32 msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx_nr)
+		+ sizeof(as_nr)
+		+ sizeof(page_cnt_change)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &as_nr, sizeof(as_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &page_cnt_change, sizeof(page_cnt_change));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u64 page_cnt)
+{
+	const u32 msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx_nr)
+		+ sizeof(page_cnt)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &page_cnt, sizeof(page_cnt));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_devfreq_target(
+	struct kbase_tlstream *stream,
+	u64 target_freq)
+{
+	const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(target_freq)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &target_freq, sizeof(target_freq));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_protected_enter_start(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_protected_enter_end(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_protected_leave_start(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_protected_leave_end(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_jit_stats(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u32 bid,
+	u32 max_allocs,
+	u32 allocs,
+	u32 va_pages,
+	u32 ph_pages)
+{
+	const u32 msg_id = KBASE_AUX_JIT_STATS;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx_nr)
+		+ sizeof(bid)
+		+ sizeof(max_allocs)
+		+ sizeof(allocs)
+		+ sizeof(va_pages)
+		+ sizeof(ph_pages)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &bid, sizeof(bid));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &max_allocs, sizeof(max_allocs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &allocs, sizeof(allocs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &va_pages, sizeof(va_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ph_pages, sizeof(ph_pages));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_event_job_slot(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	u32 slot_nr,
+	u32 atom_nr,
+	u32 event)
+{
+	const u32 msg_id = KBASE_AUX_EVENT_JOB_SLOT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx)
+		+ sizeof(slot_nr)
+		+ sizeof(atom_nr)
+		+ sizeof(event)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &slot_nr, sizeof(slot_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom_nr, sizeof(atom_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &event, sizeof(event));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_new_kcpuqueue(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx,
+	u32 kcpuq_num_pending_cmds)
+{
+	const u32 msg_id = KBASE_TL_NEW_KCPUQUEUE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(ctx)
+		+ sizeof(kcpuq_num_pending_cmds)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpuq_num_pending_cmds, sizeof(kcpuq_num_pending_cmds));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_kcpuqueue_ctx(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_RET_KCPUQUEUE_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_del_kcpuqueue(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_DEL_KCPUQUEUE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_kcpuqueue_ctx(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_NRET_KCPUQUEUE_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_signal(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 fence)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(fence)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &fence, sizeof(fence));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 fence)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(fence)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &fence, sizeof(fence));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u32 cqs_obj_compare_value)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(cqs_obj_gpu_addr)
+		+ sizeof(cqs_obj_compare_value)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(cqs_obj_gpu_addr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 debugcopy_dst_size)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(debugcopy_dst_size)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &debugcopy_dst_size, sizeof(debugcopy_dst_size));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_map_import(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 map_import_buf_gpu_addr)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(map_import_buf_gpu_addr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 map_import_buf_gpu_addr)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(map_import_buf_gpu_addr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_alloc_gpu_alloc_addr_dest,
+	u64 jit_alloc_va_pages,
+	u64 jit_alloc_commit_pages,
+	u64 jit_alloc_extent,
+	u32 jit_alloc_jit_id,
+	u32 jit_alloc_bin_id,
+	u32 jit_alloc_max_allocations,
+	u32 jit_alloc_flags,
+	u32 jit_alloc_usage_id)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(jit_alloc_gpu_alloc_addr_dest)
+		+ sizeof(jit_alloc_va_pages)
+		+ sizeof(jit_alloc_commit_pages)
+		+ sizeof(jit_alloc_extent)
+		+ sizeof(jit_alloc_jit_id)
+		+ sizeof(jit_alloc_bin_id)
+		+ sizeof(jit_alloc_max_allocations)
+		+ sizeof(jit_alloc_flags)
+		+ sizeof(jit_alloc_usage_id)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_gpu_alloc_addr_dest, sizeof(jit_alloc_gpu_alloc_addr_dest));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_va_pages, sizeof(jit_alloc_va_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_commit_pages, sizeof(jit_alloc_commit_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_extent, sizeof(jit_alloc_extent));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_bin_id, sizeof(jit_alloc_bin_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_max_allocations, sizeof(jit_alloc_max_allocations));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_flags, sizeof(jit_alloc_flags));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_usage_id, sizeof(jit_alloc_usage_id));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 jit_alloc_jit_id)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(jit_alloc_jit_id)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_alloc_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_alloc_gpu_alloc_addr,
+	u64 jit_alloc_mmu_flags)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(jit_alloc_gpu_alloc_addr)
+		+ sizeof(jit_alloc_mmu_flags)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_gpu_alloc_addr, sizeof(jit_alloc_gpu_alloc_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_mmu_flags, sizeof(jit_alloc_mmu_flags));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_free_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_free_pages_used)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(jit_free_pages_used)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_free_pages_used, sizeof(jit_free_pages_used));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_errorbarrier(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+/* clang-format on */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tracepoints.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tracepoints.h
new file mode 100644
index 0000000..7346493
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tracepoints.h
@@ -0,0 +1,2417 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py.
+ * DO NOT EDIT.
+ */
+
+#if !defined(_KBASE_TRACEPOINTS_H)
+#define _KBASE_TRACEPOINTS_H
+
+/* Tracepoints are abstract callbacks notifying that some important
+ * software or hardware event has happened.
+ *
+ * In this particular implementation, it results into a MIPE
+ * timeline event and, in some cases, it also fires an ftrace event
+ * (a.k.a. Gator events, see details below).
+ */
+
+#include "mali_kbase.h"
+#include "mali_kbase_gator.h"
+
+#include <linux/types.h>
+#include <linux/atomic.h>
+
+/* clang-format off */
+
+struct kbase_tlstream;
+
+extern const size_t __obj_stream_offset;
+extern const size_t __aux_stream_offset;
+
+/* This macro dispatches a kbase_tlstream from
+ * a kbase_device instance. Only AUX or OBJ
+ * streams can be dispatched. It is aware of
+ * kbase_timeline binary representation and
+ * relies on offset variables:
+ * __obj_stream_offset and __aux_stream_offset.
+ */
+#define __TL_DISPATCH_STREAM(kbdev, stype) \
+	((struct kbase_tlstream *) \
+	 ((u8 *)kbdev->timeline + __ ## stype ## _stream_offset))
+
+struct tp_desc;
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+extern const char   *obj_desc_header;
+extern const size_t  obj_desc_header_size;
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+extern const char   *aux_desc_header;
+extern const size_t  aux_desc_header_size;
+
+#define TL_ATOM_STATE_IDLE 0
+#define TL_ATOM_STATE_READY 1
+#define TL_ATOM_STATE_DONE 2
+#define TL_ATOM_STATE_POSTED 3
+
+#define TL_JS_EVENT_START     GATOR_JOB_SLOT_START
+#define TL_JS_EVENT_STOP      GATOR_JOB_SLOT_STOP
+#define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED
+
+#define TLSTREAM_ENABLED (1 << 31)
+
+void __kbase_tlstream_tl_new_ctx(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	u32 ctx_nr,
+	u32 tgid);
+void __kbase_tlstream_tl_new_gpu(
+	struct kbase_tlstream *stream,
+	const void *gpu,
+	u32 gpu_id,
+	u32 core_count);
+void __kbase_tlstream_tl_new_lpu(
+	struct kbase_tlstream *stream,
+	const void *lpu,
+	u32 lpu_nr,
+	u32 lpu_fn);
+void __kbase_tlstream_tl_new_atom(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 atom_nr);
+void __kbase_tlstream_tl_new_as(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	u32 as_nr);
+void __kbase_tlstream_tl_del_ctx(
+	struct kbase_tlstream *stream,
+	const void *ctx);
+void __kbase_tlstream_tl_del_atom(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_tl_lifelink_lpu_gpu(
+	struct kbase_tlstream *stream,
+	const void *lpu,
+	const void *gpu);
+void __kbase_tlstream_tl_lifelink_as_gpu(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *gpu);
+void __kbase_tlstream_tl_ret_ctx_lpu(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	const void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *ctx);
+void __kbase_tlstream_tl_ret_atom_lpu(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *lpu,
+	const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	const void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *ctx);
+void __kbase_tlstream_tl_nret_atom_lpu(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *address_space);
+void __kbase_tlstream_tl_nret_atom_as(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *address_space);
+void __kbase_tlstream_tl_attrib_atom_config(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 descriptor,
+	u64 affinity,
+	u32 config);
+void __kbase_tlstream_tl_attrib_atom_priority(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 prio);
+void __kbase_tlstream_tl_attrib_atom_state(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 state);
+void __kbase_tlstream_tl_attrib_atom_prioritized(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_tl_attrib_atom_jit(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 edit_addr,
+	u64 new_addr,
+	u32 jit_flags,
+	u64 mem_flags,
+	u32 j_id,
+	u64 com_pgs,
+	u64 extent,
+	u64 va_pgs);
+void __kbase_tlstream_tl_jit_usedpages(
+	struct kbase_tlstream *stream,
+	u64 used_pages,
+	u32 j_id);
+void __kbase_tlstream_tl_attrib_atom_jitallocinfo(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 va_pgs,
+	u64 com_pgs,
+	u64 extent,
+	u32 j_id,
+	u32 bin_id,
+	u32 max_allocs,
+	u32 jit_flags,
+	u32 usg_id);
+void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 j_id);
+void __kbase_tlstream_tl_attrib_as_config(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	u64 transtab,
+	u64 memattr,
+	u64 transcfg);
+void __kbase_tlstream_tl_event_lpu_softstop(
+	struct kbase_tlstream *stream,
+	const void *lpu);
+void __kbase_tlstream_tl_event_atom_softstop_ex(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_tl_event_atom_softstop_issue(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_start(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_end(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_jd_gpu_soft_reset(
+	struct kbase_tlstream *stream,
+	const void *gpu);
+void __kbase_tlstream_aux_pm_state(
+	struct kbase_tlstream *stream,
+	u32 core_type,
+	u64 core_state_bitset);
+void __kbase_tlstream_aux_pagefault(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u32 as_nr,
+	u64 page_cnt_change);
+void __kbase_tlstream_aux_pagesalloc(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u64 page_cnt);
+void __kbase_tlstream_aux_devfreq_target(
+	struct kbase_tlstream *stream,
+	u64 target_freq);
+void __kbase_tlstream_aux_protected_enter_start(
+	struct kbase_tlstream *stream,
+	const void *gpu);
+void __kbase_tlstream_aux_protected_enter_end(
+	struct kbase_tlstream *stream,
+	const void *gpu);
+void __kbase_tlstream_aux_protected_leave_start(
+	struct kbase_tlstream *stream,
+	const void *gpu);
+void __kbase_tlstream_aux_protected_leave_end(
+	struct kbase_tlstream *stream,
+	const void *gpu);
+void __kbase_tlstream_aux_jit_stats(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u32 bid,
+	u32 max_allocs,
+	u32 allocs,
+	u32 va_pages,
+	u32 ph_pages);
+void __kbase_tlstream_aux_event_job_slot(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	u32 slot_nr,
+	u32 atom_nr,
+	u32 event);
+void __kbase_tlstream_tl_new_kcpuqueue(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx,
+	u32 kcpuq_num_pending_cmds);
+void __kbase_tlstream_tl_ret_kcpuqueue_ctx(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx);
+void __kbase_tlstream_tl_del_kcpuqueue(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_nret_kcpuqueue_ctx(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx);
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_signal(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 fence);
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 fence);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u32 cqs_obj_compare_value);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 debugcopy_dst_size);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_map_import(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 map_import_buf_gpu_addr);
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 map_import_buf_gpu_addr);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_alloc_gpu_alloc_addr_dest,
+	u64 jit_alloc_va_pages,
+	u64 jit_alloc_commit_pages,
+	u64 jit_alloc_extent,
+	u32 jit_alloc_jit_id,
+	u32 jit_alloc_bin_id,
+	u32 jit_alloc_max_allocations,
+	u32 jit_alloc_flags,
+	u32 jit_alloc_usage_id);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 jit_alloc_jit_id);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_alloc_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_alloc_gpu_alloc_addr,
+	u64 jit_alloc_mmu_flags);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_free_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_free_pages_used);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_errorbarrier(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+
+struct kbase_tlstream;
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_CTX -
+ *   object ctx is created
+ *
+ * @kbdev:	Kbase device
+ * @ctx:	Name of the context object
+ * @ctx_nr:	Kernel context number
+ * @tgid:	Thread Group Id
+ */
+#define KBASE_TLSTREAM_TL_NEW_CTX(	\
+	kbdev,	\
+	ctx,	\
+	ctx_nr,	\
+	tgid	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				ctx, ctx_nr, tgid);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_GPU -
+ *   object gpu is created
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ * @gpu_id:	Name of the GPU object
+ * @core_count:	Number of cores this GPU hosts
+ */
+#define KBASE_TLSTREAM_TL_NEW_GPU(	\
+	kbdev,	\
+	gpu,	\
+	gpu_id,	\
+	core_count	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_gpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				gpu, gpu_id, core_count);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_LPU -
+ *   object lpu is created
+ *
+ * @kbdev:	Kbase device
+ * @lpu:	Name of the Logical Processing Unit object
+ * @lpu_nr:	Sequential number assigned to the newly created LPU
+ * @lpu_fn:	Property describing functional abilities of this LPU
+ */
+#define KBASE_TLSTREAM_TL_NEW_LPU(	\
+	kbdev,	\
+	lpu,	\
+	lpu_nr,	\
+	lpu_fn	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_lpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				lpu, lpu_nr, lpu_fn);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_ATOM -
+ *   object atom is created
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @atom_nr:	Sequential number of an atom
+ */
+#define KBASE_TLSTREAM_TL_NEW_ATOM(	\
+	kbdev,	\
+	atom,	\
+	atom_nr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_atom(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, atom_nr);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_AS -
+ *   address space object is created
+ *
+ * @kbdev:	Kbase device
+ * @address_space:	Name of the address space object
+ * @as_nr:	Address space number
+ */
+#define KBASE_TLSTREAM_TL_NEW_AS(	\
+	kbdev,	\
+	address_space,	\
+	as_nr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_as(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				address_space, as_nr);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_CTX -
+ *   context is destroyed
+ *
+ * @kbdev:	Kbase device
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_DEL_CTX(	\
+	kbdev,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_del_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_ATOM -
+ *   atom is destroyed
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_DEL_ATOM(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_del_atom(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU -
+ *   lpu is deleted with gpu
+ *
+ * @kbdev:	Kbase device
+ * @lpu:	Name of the Logical Processing Unit object
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU(	\
+	kbdev,	\
+	lpu,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_lifelink_lpu_gpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				lpu, gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_LIFELINK_AS_GPU -
+ *   address space is deleted with gpu
+ *
+ * @kbdev:	Kbase device
+ * @address_space:	Name of the address space object
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_TL_LIFELINK_AS_GPU(	\
+	kbdev,	\
+	address_space,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_lifelink_as_gpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				address_space, gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_CTX_LPU -
+ *   context is retained by lpu
+ *
+ * @kbdev:	Kbase device
+ * @ctx:	Name of the context object
+ * @lpu:	Name of the Logical Processing Unit object
+ */
+#define KBASE_TLSTREAM_TL_RET_CTX_LPU(	\
+	kbdev,	\
+	ctx,	\
+	lpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_ctx_lpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				ctx, lpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX -
+ *   atom is retained by context
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(	\
+	kbdev,	\
+	atom,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_atom_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU -
+ *   atom is retained by lpu
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @lpu:	Name of the Logical Processing Unit object
+ * @attrib_match_list:	List containing match operator attributes
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(	\
+	kbdev,	\
+	atom,	\
+	lpu,	\
+	attrib_match_list	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_atom_lpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, lpu, attrib_match_list);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU -
+ *   context is released by lpu
+ *
+ * @kbdev:	Kbase device
+ * @ctx:	Name of the context object
+ * @lpu:	Name of the Logical Processing Unit object
+ */
+#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(	\
+	kbdev,	\
+	ctx,	\
+	lpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_ctx_lpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				ctx, lpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX -
+ *   atom is released by context
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(	\
+	kbdev,	\
+	atom,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_atom_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU -
+ *   atom is released by lpu
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @lpu:	Name of the Logical Processing Unit object
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(	\
+	kbdev,	\
+	atom,	\
+	lpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_atom_lpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, lpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_AS_CTX -
+ *   address space is retained by context
+ *
+ * @kbdev:	Kbase device
+ * @address_space:	Name of the address space object
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_RET_AS_CTX(	\
+	kbdev,	\
+	address_space,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_as_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				address_space, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_AS_CTX -
+ *   address space is released by context
+ *
+ * @kbdev:	Kbase device
+ * @address_space:	Name of the address space object
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_NRET_AS_CTX(	\
+	kbdev,	\
+	address_space,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_as_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				address_space, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_AS -
+ *   atom is retained by address space
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @address_space:	Name of the address space object
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_AS(	\
+	kbdev,	\
+	atom,	\
+	address_space	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_atom_as(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, address_space);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS -
+ *   atom is released by address space
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @address_space:	Name of the address space object
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(	\
+	kbdev,	\
+	atom,	\
+	address_space	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_atom_as(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, address_space);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG -
+ *   atom job slot attributes
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @descriptor:	Job descriptor address
+ * @affinity:	Job affinity
+ * @config:	Job config
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(	\
+	kbdev,	\
+	atom,	\
+	descriptor,	\
+	affinity,	\
+	config	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_attrib_atom_config(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, descriptor, affinity, config);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY -
+ *   atom priority
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @prio:	Atom priority
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(	\
+	kbdev,	\
+	atom,	\
+	prio	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_tl_attrib_atom_priority(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom, prio);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE -
+ *   atom state
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @state:	Atom state
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(	\
+	kbdev,	\
+	atom,	\
+	state	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_tl_attrib_atom_state(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom, state);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED -
+ *   atom caused priority change
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_tl_attrib_atom_prioritized(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT -
+ *   jit done for atom
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @edit_addr:	Address edited by jit
+ * @new_addr:	Address placed into the edited location
+ * @jit_flags:	Flags specifying the special requirements for
+ * the JIT allocation.
+ * @mem_flags:	Flags defining the properties of a memory region
+ * @j_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests.
+ * @com_pgs:	The minimum number of physical pages which
+ * should back the allocation.
+ * @extent:	Granularity of physical pages to grow the
+ * allocation by during a fault.
+ * @va_pgs:	The minimum number of virtual pages required
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(	\
+	kbdev,	\
+	atom,	\
+	edit_addr,	\
+	new_addr,	\
+	jit_flags,	\
+	mem_flags,	\
+	j_id,	\
+	com_pgs,	\
+	extent,	\
+	va_pgs	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED)	\
+			__kbase_tlstream_tl_attrib_atom_jit(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom, edit_addr, new_addr, jit_flags, mem_flags, j_id, com_pgs, extent, va_pgs);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_JIT_USEDPAGES -
+ *   used pages for jit
+ *
+ * @kbdev:	Kbase device
+ * @used_pages:	Number of pages used for jit
+ * @j_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests.
+ */
+#define KBASE_TLSTREAM_TL_JIT_USEDPAGES(	\
+	kbdev,	\
+	used_pages,	\
+	j_id	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_jit_usedpages(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				used_pages, j_id);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO -
+ *   Information about JIT allocations
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @va_pgs:	The minimum number of virtual pages required
+ * @com_pgs:	The minimum number of physical pages which
+ * should back the allocation.
+ * @extent:	Granularity of physical pages to grow the
+ * allocation by during a fault.
+ * @j_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests.
+ * @bin_id:	The JIT allocation bin, used in conjunction with
+ * max_allocations to limit the number of each
+ * type of JIT allocation.
+ * @max_allocs:	Maximum allocations allowed in this bin.
+ * @jit_flags:	Flags specifying the special requirements for
+ * the JIT allocation.
+ * @usg_id:	A hint about which allocation should be reused.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(	\
+	kbdev,	\
+	atom,	\
+	va_pgs,	\
+	com_pgs,	\
+	extent,	\
+	j_id,	\
+	bin_id,	\
+	max_allocs,	\
+	jit_flags,	\
+	usg_id	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_attrib_atom_jitallocinfo(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, va_pgs, com_pgs, extent, j_id, bin_id, max_allocs, jit_flags, usg_id);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO -
+ *   Information about JIT frees
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @j_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(	\
+	kbdev,	\
+	atom,	\
+	j_id	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_attrib_atom_jitfreeinfo(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, j_id);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG -
+ *   address space attributes
+ *
+ * @kbdev:	Kbase device
+ * @address_space:	Name of the address space object
+ * @transtab:	Configuration of the TRANSTAB register
+ * @memattr:	Configuration of the MEMATTR register
+ * @transcfg:	Configuration of the TRANSCFG register (or zero if not present)
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(	\
+	kbdev,	\
+	address_space,	\
+	transtab,	\
+	memattr,	\
+	transcfg	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_attrib_as_config(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				address_space, transtab, memattr, transcfg);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP -
+ *   softstop event on given lpu
+ *
+ * @kbdev:	Kbase device
+ * @lpu:	Name of the Logical Processing Unit object
+ */
+#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(	\
+	kbdev,	\
+	lpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_lpu_softstop(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				lpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX -
+ *   atom softstopped
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_atom_softstop_ex(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE -
+ *   atom softstop issued
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_atom_softstop_issue(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START -
+ *   atom soft job has started
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_atom_softjob_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END -
+ *   atom soft job has completed
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_atom_softjob_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET -
+ *   gpu soft reset
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_jd_gpu_soft_reset(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PM_STATE -
+ *   PM state
+ *
+ * @kbdev:	Kbase device
+ * @core_type:	Core type (shader, tiler, l2 cache, l3 cache)
+ * @core_state_bitset:	64bits bitmask reporting power state of the cores
+ * (1-ON, 0-OFF)
+ */
+#define KBASE_TLSTREAM_AUX_PM_STATE(	\
+	kbdev,	\
+	core_type,	\
+	core_state_bitset	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pm_state(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				core_type, core_state_bitset);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGEFAULT -
+ *   Page fault
+ *
+ * @kbdev:	Kbase device
+ * @ctx_nr:	Kernel context number
+ * @as_nr:	Address space number
+ * @page_cnt_change:	Number of pages to be added
+ */
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(	\
+	kbdev,	\
+	ctx_nr,	\
+	as_nr,	\
+	page_cnt_change	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pagefault(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				ctx_nr, as_nr, page_cnt_change);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGESALLOC -
+ *   Total alloc pages change
+ *
+ * @kbdev:	Kbase device
+ * @ctx_nr:	Kernel context number
+ * @page_cnt:	Number of pages used by the context
+ */
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(	\
+	kbdev,	\
+	ctx_nr,	\
+	page_cnt	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pagesalloc(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				ctx_nr, page_cnt);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET -
+ *   New device frequency target
+ *
+ * @kbdev:	Kbase device
+ * @target_freq:	New target frequency
+ */
+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(	\
+	kbdev,	\
+	target_freq	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_devfreq_target(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				target_freq);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START -
+ *   enter protected mode start
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_aux_protected_enter_start(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END -
+ *   enter protected mode end
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_aux_protected_enter_end(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START -
+ *   leave protected mode start
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_aux_protected_leave_start(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END -
+ *   leave protected mode end
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_aux_protected_leave_end(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_JIT_STATS -
+ *   per-bin JIT statistics
+ *
+ * @kbdev:	Kbase device
+ * @ctx_nr:	Kernel context number
+ * @bid:	JIT bin id
+ * @max_allocs:	Maximum allocations allowed in this bin.
+ * @allocs:	Number of active allocations in this bin
+ * @va_pages:	Number of virtual pages allocated in this bin
+ * @ph_pages:	Number of physical pages allocated in this bin
+ */
+#define KBASE_TLSTREAM_AUX_JIT_STATS(	\
+	kbdev,	\
+	ctx_nr,	\
+	bid,	\
+	max_allocs,	\
+	allocs,	\
+	va_pages,	\
+	ph_pages	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_jit_stats(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT -
+ *   event on a given job slot
+ *
+ * @kbdev:	Kbase device
+ * @ctx:	Name of the context object
+ * @slot_nr:	Job slot number
+ * @atom_nr:	Sequential number of an atom
+ * @event:	Event type. One of TL_JS_EVENT values
+ */
+#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(	\
+	kbdev,	\
+	ctx,	\
+	slot_nr,	\
+	atom_nr,	\
+	event	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_event_job_slot(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				ctx, slot_nr, atom_nr, event);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_KCPUQUEUE -
+ *   New KCPU Queue
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @ctx:	Name of the context object
+ * @kcpuq_num_pending_cmds:	Number of commands already enqueued
+ * in the KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_NEW_KCPUQUEUE(	\
+	kbdev,	\
+	kcpu_queue,	\
+	ctx,	\
+	kcpuq_num_pending_cmds	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_kcpuqueue(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, ctx, kcpuq_num_pending_cmds);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_KCPUQUEUE_CTX -
+ *   Context retains KCPU Queue
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_RET_KCPUQUEUE_CTX(	\
+	kbdev,	\
+	kcpu_queue,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_kcpuqueue_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_KCPUQUEUE -
+ *   Delete KCPU Queue
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_DEL_KCPUQUEUE(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_del_kcpuqueue(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_KCPUQUEUE_CTX -
+ *   Context releases KCPU Queue
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_NRET_KCPUQUEUE_CTX(	\
+	kbdev,	\
+	kcpu_queue,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_kcpuqueue_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL -
+ *   KCPU Queue enqueues Signal on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @fence:	Fence object handle
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(	\
+	kbdev,	\
+	kcpu_queue,	\
+	fence	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_signal(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, fence);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT -
+ *   KCPU Queue enqueues Wait on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @fence:	Fence object handle
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT(	\
+	kbdev,	\
+	kcpu_queue,	\
+	fence	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_wait(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, fence);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT -
+ *   Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_wait(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT -
+ *   Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @cqs_obj_gpu_addr:	CQS Object GPU ptr
+ * @cqs_obj_compare_value:	Semaphore value that should be exceeded
+ * for the WAIT to pass
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr,	\
+	cqs_obj_compare_value	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_wait(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, cqs_obj_gpu_addr, cqs_obj_compare_value);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT -
+ *   End array of KCPU Queue enqueues Wait on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_wait(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET -
+ *   Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_set(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET -
+ *   Array item of KCPU Queue enqueues Set on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @cqs_obj_gpu_addr:	CQS Object GPU ptr
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_set(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, cqs_obj_gpu_addr);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET -
+ *   End array of KCPU Queue enqueues Set on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_set(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY -
+ *   Begin array of KCPU Queue enqueues Debug Copy
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_debugcopy(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY -
+ *   Array item of KCPU Queue enqueues Debug Copy
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @debugcopy_dst_size:	Debug Copy destination size
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY(	\
+	kbdev,	\
+	kcpu_queue,	\
+	debugcopy_dst_size	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_debugcopy(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, debugcopy_dst_size);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY -
+ *   End array of KCPU Queue enqueues Debug Copy
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_debugcopy(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT -
+ *   KCPU Queue enqueues Map Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @map_import_buf_gpu_addr:	Map import buffer GPU ptr
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT(	\
+	kbdev,	\
+	kcpu_queue,	\
+	map_import_buf_gpu_addr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_enqueue_map_import(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, map_import_buf_gpu_addr);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT -
+ *   KCPU Queue enqueues Unmap Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @map_import_buf_gpu_addr:	Map import buffer GPU ptr
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(	\
+	kbdev,	\
+	kcpu_queue,	\
+	map_import_buf_gpu_addr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, map_import_buf_gpu_addr);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
+ *   Begin array of KCPU Queue enqueues JIT Alloc
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_alloc(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
+ *   Array item of KCPU Queue enqueues JIT Alloc
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @jit_alloc_gpu_alloc_addr_dest:	The GPU virtual address to write
+ * the JIT allocated GPU virtual address to
+ * @jit_alloc_va_pages:	The minimum number of virtual pages required
+ * @jit_alloc_commit_pages:	The minimum number of physical pages which
+ * should back the allocation
+ * @jit_alloc_extent:	Granularity of physical pages to grow the allocation
+ * by during a fault
+ * @jit_alloc_jit_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests. Zero is not a valid value
+ * @jit_alloc_bin_id:	The JIT allocation bin, used in conjunction with
+ * max_allocations to limit the number of each type of JIT allocation
+ * @jit_alloc_max_allocations:	The maximum number of allocations
+ * allowed within the bin specified by bin_id. Should be the same for all
+ * JIT allocations within the same bin.
+ * @jit_alloc_flags:	Flags specifying the special requirements for the
+ * JIT allocation
+ * @jit_alloc_usage_id:	A hint about which allocation should be
+ * reused. The kernel should attempt to use a previous allocation with the same
+ * usage_id
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(	\
+	kbdev,	\
+	kcpu_queue,	\
+	jit_alloc_gpu_alloc_addr_dest,	\
+	jit_alloc_va_pages,	\
+	jit_alloc_commit_pages,	\
+	jit_alloc_extent,	\
+	jit_alloc_jit_id,	\
+	jit_alloc_bin_id,	\
+	jit_alloc_max_allocations,	\
+	jit_alloc_flags,	\
+	jit_alloc_usage_id	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, jit_alloc_gpu_alloc_addr_dest, jit_alloc_va_pages, jit_alloc_commit_pages, jit_alloc_extent, jit_alloc_jit_id, jit_alloc_bin_id, jit_alloc_max_allocations, jit_alloc_flags, jit_alloc_usage_id);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
+ *   End array of KCPU Queue enqueues JIT Alloc
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_alloc(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE -
+ *   Begin array of KCPU Queue enqueues JIT Free
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_free(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE -
+ *   Array item of KCPU Queue enqueues JIT Free
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @jit_alloc_jit_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests. Zero is not a valid value
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE(	\
+	kbdev,	\
+	kcpu_queue,	\
+	jit_alloc_jit_id	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_free(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, jit_alloc_jit_id);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE -
+ *   End array of KCPU Queue enqueues JIT Free
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_free(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START -
+ *   KCPU Queue starts a Signal on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END -
+ *   KCPU Queue ends a Signal on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START -
+ *   KCPU Queue starts a Wait on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END -
+ *   KCPU Queue ends a Wait on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START -
+ *   KCPU Queue starts a Wait on an array of Cross Queue Sync Objects
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END -
+ *   KCPU Queue ends a Wait on an array of Cross Queue Sync Objects
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START -
+ *   KCPU Queue starts a Set on an array of Cross Queue Sync Objects
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END -
+ *   KCPU Queue ends a Set on an array of Cross Queue Sync Objects
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START -
+ *   KCPU Queue starts an array of Debug Copys
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END -
+ *   KCPU Queue ends an array of Debug Copys
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START -
+ *   KCPU Queue starts a Map Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_map_import_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END -
+ *   KCPU Queue ends a Map Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_map_import_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START -
+ *   KCPU Queue starts an Unmap Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END -
+ *   KCPU Queue ends an Unmap Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START -
+ *   KCPU Queue starts an array of JIT Allocs
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_jit_alloc_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END -
+ *   Begin array of KCPU Queue ends an array of JIT Allocs
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_alloc_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END -
+ *   Array item of KCPU Queue ends an array of JIT Allocs
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @jit_alloc_gpu_alloc_addr:	The JIT allocated GPU virtual address
+ * @jit_alloc_mmu_flags:	The MMU flags for the JIT allocation
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	jit_alloc_gpu_alloc_addr,	\
+	jit_alloc_mmu_flags	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_alloc_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, jit_alloc_gpu_alloc_addr, jit_alloc_mmu_flags);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END -
+ *   End array of KCPU Queue ends an array of JIT Allocs
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_alloc_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START -
+ *   KCPU Queue starts an array of JIT Frees
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_jit_free_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END -
+ *   Begin array of KCPU Queue ends an array of JIT Frees
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_free_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END -
+ *   Array item of KCPU Queue ends an array of JIT Frees
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @jit_free_pages_used:	The actual number of pages used by the JIT
+ * allocation
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	jit_free_pages_used	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_free_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, jit_free_pages_used);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END -
+ *   End array of KCPU Queue ends an array of JIT Frees
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_free_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER -
+ *   KCPU Queue executes an Error Barrier
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_errorbarrier(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+
+/* Gator tracepoints are hooked into TLSTREAM interface.
+ * When the following tracepoints are called, corresponding
+ * Gator tracepoint will be called as well.
+ */
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+/* `event` is one of TL_JS_EVENT values here.
+ * The values of TL_JS_EVENT are guaranteed to match
+ * with corresponding GATOR_JOB_SLOT values.
+ */
+#undef KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT
+#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, \
+	context, slot_nr, atom_nr, event)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		kbase_trace_mali_job_slots_event(kbdev->id,	\
+			GATOR_MAKE_EVENT(event, slot_nr),	\
+			context, (u8) atom_nr);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_event_job_slot(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				context, slot_nr, atom_nr, event);	\
+	} while (0)
+
+#undef KBASE_TLSTREAM_AUX_PM_STATE
+#define KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		kbase_trace_mali_pm_status(kbdev->id,	\
+			core_type, state);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pm_state(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				core_type, state);	\
+	} while (0)
+
+#undef KBASE_TLSTREAM_AUX_PAGEFAULT
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, \
+	ctx_nr, as_nr, page_cnt_change)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		kbase_trace_mali_page_fault_insert_pages(kbdev->id,	\
+			as_nr,	\
+			page_cnt_change);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pagefault(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				ctx_nr, as_nr, page_cnt_change);	\
+	} while (0)
+
+/* kbase_trace_mali_total_alloc_pages_change is handled differently here.
+ * We stream the total amount of pages allocated for `kbdev` rather
+ * than `page_count`, which is per-context.
+ */
+#undef KBASE_TLSTREAM_AUX_PAGESALLOC
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, ctx_nr, page_cnt)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		u32 global_pages_count = \
+			atomic_read(&kbdev->memdev.used_pages);	\
+			\
+		kbase_trace_mali_total_alloc_pages_change(kbdev->id,	\
+			global_pages_count);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pagesalloc(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				ctx_nr, page_cnt);	\
+	} while (0)
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+/* clang-format on */
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100755
index 0000000..8d4f044
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+static inline void kbase_timer_setup(struct timer_list *timer,
+				     void (*callback)(struct timer_list *timer))
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
+	setup_timer(timer, (void (*)(unsigned long)) callback,
+			(unsigned long) timer);
+#else
+	timer_setup(timer, callback, 0);
+#endif
+}
+
+#ifndef WRITE_ONCE
+	#ifdef ASSIGN_ONCE
+		#define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x)
+	#else
+		#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
+	#endif
+#endif
+
+#ifndef READ_ONCE
+	#define READ_ONCE(x) ACCESS_ONCE(x)
+#endif
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100755
index 0000000..377642d
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,990 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_vinstr.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase_hwcnt_reader.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_ioctl.h"
+#include "mali_malisw.h"
+#include "mali_kbase_debug.h"
+
+#include <linux/anon_inodes.h>
+#include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API 1
+
+/* The minimum allowed interval between dumps (equivalent to 10KHz) */
+#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC)
+
+/* The maximum allowed buffers per client */
+#define MAX_BUFFER_COUNT 32
+
+/**
+ * struct kbase_vinstr_context - IOCTL interface for userspace hardware
+ *                               counters.
+ * @hvirt:         Hardware counter virtualizer used by vinstr.
+ * @metadata:      Hardware counter metadata provided by virtualizer.
+ * @lock:          Lock protecting all vinstr state.
+ * @suspend_count: Suspend reference count. If non-zero, timer and worker are
+ *                 prevented from being re-scheduled.
+ * @client_count:  Number of vinstr clients.
+ * @clients:       List of vinstr clients.
+ * @dump_timer:    Timer that enqueues dump_work to a workqueue.
+ * @dump_work:     Worker for performing periodic counter dumps.
+ */
+struct kbase_vinstr_context {
+	struct kbase_hwcnt_virtualizer *hvirt;
+	const struct kbase_hwcnt_metadata *metadata;
+	struct mutex lock;
+	size_t suspend_count;
+	size_t client_count;
+	struct list_head clients;
+	struct hrtimer dump_timer;
+	struct work_struct dump_work;
+};
+
+/**
+ * struct kbase_vinstr_client - A vinstr client attached to a vinstr context.
+ * @vctx:              Vinstr context client is attached to.
+ * @hvcli:             Hardware counter virtualizer client.
+ * @node:              Node used to attach this client to list in vinstr
+ *                     context.
+ * @dump_interval_ns:  Interval between periodic dumps. If 0, not a periodic
+ *                     client.
+ * @next_dump_time_ns: Time in ns when this client's next periodic dump must
+ *                     occur. If 0, not a periodic client.
+ * @enable_map:        Counters enable map.
+ * @dump_bufs:         Array of dump buffers allocated by this client.
+ * @dump_bufs_meta:    Metadata of dump buffers.
+ * @meta_idx:          Index of metadata being accessed by userspace.
+ * @read_idx:          Index of buffer read by userspace.
+ * @write_idx:         Index of buffer being written by dump worker.
+ * @waitq:             Client's notification queue.
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context *vctx;
+	struct kbase_hwcnt_virtualizer_client *hvcli;
+	struct list_head node;
+	u64 next_dump_time_ns;
+	u32 dump_interval_ns;
+	struct kbase_hwcnt_enable_map enable_map;
+	struct kbase_hwcnt_dump_buffer_array dump_bufs;
+	struct kbase_hwcnt_reader_metadata *dump_bufs_meta;
+	atomic_t meta_idx;
+	atomic_t read_idx;
+	atomic_t write_idx;
+	wait_queue_head_t waitq;
+};
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+	struct file *filp,
+	poll_table *wait);
+
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+	struct file *filp,
+	unsigned int cmd,
+	unsigned long arg);
+
+static int kbasep_vinstr_hwcnt_reader_mmap(
+	struct file *filp,
+	struct vm_area_struct *vma);
+
+static int kbasep_vinstr_hwcnt_reader_release(
+	struct inode *inode,
+	struct file *filp);
+
+/* Vinstr client file operations */
+static const struct file_operations vinstr_client_fops = {
+	.owner = THIS_MODULE,
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/**
+ * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds.
+ *
+ * Return: Current time in nanoseconds.
+ */
+static u64 kbasep_vinstr_timestamp_ns(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time.
+ * @cur_ts_ns: Current time in nanoseconds.
+ * @interval:  Interval between dumps in nanoseconds.
+ *
+ * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump
+ *         time that occurs after cur_ts_ns.
+ */
+static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval)
+{
+	/* Non-periodic client */
+	if (interval == 0)
+		return 0;
+
+	/*
+	 * Return the next interval after the current time relative to t=0.
+	 * This means multiple clients with the same period will synchronise,
+	 * regardless of when they were started, allowing the worker to be
+	 * scheduled less frequently.
+	 */
+	do_div(cur_ts_ns, interval);
+	return (cur_ts_ns + 1) * interval;
+}
+
+/**
+ * kbasep_vinstr_client_dump() - Perform a dump for a client.
+ * @vcli:     Non-NULL pointer to a vinstr client.
+ * @event_id: Event type that triggered the dump.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_vinstr_client_dump(
+	struct kbase_vinstr_client *vcli,
+	enum base_hwcnt_reader_event event_id)
+{
+	int errcode;
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+	unsigned int write_idx;
+	unsigned int read_idx;
+	struct kbase_hwcnt_dump_buffer *dump_buf;
+	struct kbase_hwcnt_reader_metadata *meta;
+
+	WARN_ON(!vcli);
+	lockdep_assert_held(&vcli->vctx->lock);
+
+	write_idx = atomic_read(&vcli->write_idx);
+	read_idx = atomic_read(&vcli->read_idx);
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == vcli->dump_bufs.buf_cnt)
+		return -EBUSY;
+	write_idx %= vcli->dump_bufs.buf_cnt;
+
+	dump_buf = &vcli->dump_bufs.bufs[write_idx];
+	meta = &vcli->dump_bufs_meta[write_idx];
+
+	errcode = kbase_hwcnt_virtualizer_client_dump(
+		vcli->hvcli, &ts_start_ns, &ts_end_ns, dump_buf);
+	if (errcode)
+		return errcode;
+
+	/* Patch the dump buf headers, to hide the counters that other hwcnt
+	 * clients are using.
+	 */
+	kbase_hwcnt_gpu_patch_dump_headers(dump_buf, &vcli->enable_map);
+
+	/* Zero all non-enabled counters (current values are undefined) */
+	kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map);
+
+	meta->timestamp = ts_end_ns;
+	meta->event_id = event_id;
+	meta->buffer_idx = write_idx;
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&vcli->write_idx);
+	wake_up_interruptible(&vcli->waitq);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_client_clear() - Reset all the client's counters to zero.
+ * @vcli: Non-NULL pointer to a vinstr client.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli)
+{
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	WARN_ON(!vcli);
+	lockdep_assert_held(&vcli->vctx->lock);
+
+	/* A virtualizer dump with a NULL buffer will just clear the virtualizer
+	 * client's buffer.
+	 */
+	return kbase_hwcnt_virtualizer_client_dump(
+		vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL);
+}
+
+/**
+ * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic
+ *                                     vinstr clients, then reschedule the dump
+ *                                     worker appropriately.
+ * @vctx: Non-NULL pointer to the vinstr context.
+ *
+ * If there are no periodic clients, then the dump worker will not be
+ * rescheduled. Else, the dump worker will be rescheduled for the next periodic
+ * client dump.
+ */
+static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx)
+{
+	u64 cur_ts_ns;
+	u64 earliest_next_ns = U64_MAX;
+	struct kbase_vinstr_client *pos;
+
+	WARN_ON(!vctx);
+	lockdep_assert_held(&vctx->lock);
+
+	cur_ts_ns = kbasep_vinstr_timestamp_ns();
+
+	/*
+	 * Update each client's next dump time, and find the earliest next
+	 * dump time if any of the clients have a non-zero interval.
+	 */
+	list_for_each_entry(pos, &vctx->clients, node) {
+		const u64 cli_next_ns =
+			kbasep_vinstr_next_dump_time_ns(
+				cur_ts_ns, pos->dump_interval_ns);
+
+		/* Non-zero next dump time implies a periodic client */
+		if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns))
+			earliest_next_ns = cli_next_ns;
+
+		pos->next_dump_time_ns = cli_next_ns;
+	}
+
+	/* Cancel the timer if it is already pending */
+	hrtimer_cancel(&vctx->dump_timer);
+
+	/* Start the timer if there are periodic clients and vinstr is not
+	 * suspended.
+	 */
+	if ((earliest_next_ns != U64_MAX) &&
+	    (vctx->suspend_count == 0) &&
+	    !WARN_ON(earliest_next_ns < cur_ts_ns))
+		hrtimer_start(
+			&vctx->dump_timer,
+			ns_to_ktime(earliest_next_ns - cur_ts_ns),
+			HRTIMER_MODE_REL);
+}
+
+/**
+ * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients
+ *                              that need to be dumped, then reschedules itself.
+ * @work: Work structure.
+ */
+static void kbasep_vinstr_dump_worker(struct work_struct *work)
+{
+	struct kbase_vinstr_context *vctx =
+		container_of(work, struct kbase_vinstr_context, dump_work);
+	struct kbase_vinstr_client *pos;
+	u64 cur_time_ns;
+
+	mutex_lock(&vctx->lock);
+
+	cur_time_ns = kbasep_vinstr_timestamp_ns();
+
+	/* Dump all periodic clients whose next dump time is before the current
+	 * time.
+	 */
+	list_for_each_entry(pos, &vctx->clients, node) {
+		if ((pos->next_dump_time_ns != 0) &&
+			(pos->next_dump_time_ns < cur_time_ns))
+			kbasep_vinstr_client_dump(
+				pos, BASE_HWCNT_READER_EVENT_PERIODIC);
+	}
+
+	/* Update the next dump times of all periodic clients, then reschedule
+	 * this worker at the earliest next dump time.
+	 */
+	kbasep_vinstr_reschedule_worker(vctx);
+
+	mutex_unlock(&vctx->lock);
+}
+
+/**
+ * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for
+ *                              execution as soon as possible.
+ * @timer: Timer structure.
+ */
+static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer)
+{
+	struct kbase_vinstr_context *vctx =
+		container_of(timer, struct kbase_vinstr_context, dump_timer);
+
+	/* We don't need to check vctx->suspend_count here, as the suspend
+	 * function will ensure that any worker enqueued here is immediately
+	 * cancelled, and the worker itself won't reschedule this timer if
+	 * suspend_count != 0.
+	 */
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+	queue_work(system_wq, &vctx->dump_work);
+#else
+	queue_work(system_highpri_wq, &vctx->dump_work);
+#endif
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_client_destroy() - Destroy a vinstr client.
+ * @vcli: vinstr client. Must not be attached to a vinstr context.
+ */
+static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli)
+{
+	if (!vcli)
+		return;
+
+	kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli);
+	kfree(vcli->dump_bufs_meta);
+	kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs);
+	kbase_hwcnt_enable_map_free(&vcli->enable_map);
+	kfree(vcli);
+}
+
+/**
+ * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to
+ *                                 the vinstr context.
+ * @vctx:     Non-NULL pointer to vinstr context.
+ * @setup:    Non-NULL pointer to hardware counter ioctl setup structure.
+ *            setup->buffer_count must not be 0.
+ * @out_vcli: Non-NULL pointer to where created client will be stored on
+ *            success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_vinstr_client_create(
+	struct kbase_vinstr_context *vctx,
+	struct kbase_ioctl_hwcnt_reader_setup *setup,
+	struct kbase_vinstr_client **out_vcli)
+{
+	int errcode;
+	struct kbase_vinstr_client *vcli;
+	struct kbase_hwcnt_physical_enable_map phys_em;
+
+	WARN_ON(!vctx);
+	WARN_ON(!setup);
+	WARN_ON(setup->buffer_count == 0);
+
+	vcli = kzalloc(sizeof(*vcli), GFP_KERNEL);
+	if (!vcli)
+		return -ENOMEM;
+
+	vcli->vctx = vctx;
+
+	errcode = kbase_hwcnt_enable_map_alloc(
+		vctx->metadata, &vcli->enable_map);
+	if (errcode)
+		goto error;
+
+	phys_em.jm_bm = setup->jm_bm;
+	phys_em.shader_bm = setup->shader_bm;
+	phys_em.tiler_bm = setup->tiler_bm;
+	phys_em.mmu_l2_bm = setup->mmu_l2_bm;
+	kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em);
+
+	errcode = kbase_hwcnt_dump_buffer_array_alloc(
+		vctx->metadata, setup->buffer_count, &vcli->dump_bufs);
+	if (errcode)
+		goto error;
+
+	errcode = -ENOMEM;
+	vcli->dump_bufs_meta = kmalloc_array(
+		setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL);
+	if (!vcli->dump_bufs_meta)
+		goto error;
+
+	errcode = kbase_hwcnt_virtualizer_client_create(
+		vctx->hvirt, &vcli->enable_map, &vcli->hvcli);
+	if (errcode)
+		goto error;
+
+	init_waitqueue_head(&vcli->waitq);
+
+	*out_vcli = vcli;
+	return 0;
+error:
+	kbasep_vinstr_client_destroy(vcli);
+	return errcode;
+}
+
+int kbase_vinstr_init(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_vinstr_context **out_vctx)
+{
+	struct kbase_vinstr_context *vctx;
+	const struct kbase_hwcnt_metadata *metadata;
+
+	if (!hvirt || !out_vctx)
+		return -EINVAL;
+
+	metadata = kbase_hwcnt_virtualizer_metadata(hvirt);
+	if (!metadata)
+		return -EINVAL;
+
+	vctx = kzalloc(sizeof(*vctx), GFP_KERNEL);
+	if (!vctx)
+		return -ENOMEM;
+
+	vctx->hvirt = hvirt;
+	vctx->metadata = metadata;
+
+	mutex_init(&vctx->lock);
+	INIT_LIST_HEAD(&vctx->clients);
+	hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	vctx->dump_timer.function = kbasep_vinstr_dump_timer;
+	INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker);
+
+	*out_vctx = vctx;
+	return 0;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vctx)
+{
+	if (!vctx)
+		return;
+
+	cancel_work_sync(&vctx->dump_work);
+
+	/* Non-zero client count implies client leak */
+	if (WARN_ON(vctx->client_count != 0)) {
+		struct kbase_vinstr_client *pos, *n;
+
+		list_for_each_entry_safe(pos, n, &vctx->clients, node) {
+			list_del(&pos->node);
+			vctx->client_count--;
+			kbasep_vinstr_client_destroy(pos);
+		}
+	}
+
+	WARN_ON(vctx->client_count != 0);
+	kfree(vctx);
+}
+
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx)
+{
+	if (WARN_ON(!vctx))
+		return;
+
+	mutex_lock(&vctx->lock);
+
+	if (!WARN_ON(vctx->suspend_count == SIZE_MAX))
+		vctx->suspend_count++;
+
+	mutex_unlock(&vctx->lock);
+
+	/* Always sync cancel the timer and then the worker, regardless of the
+	 * new suspend count.
+	 *
+	 * This ensures concurrent calls to kbase_vinstr_suspend() always block
+	 * until vinstr is fully suspended.
+	 *
+	 * The timer is cancelled before the worker, as the timer
+	 * unconditionally re-enqueues the worker, but the worker checks the
+	 * suspend_count that we just incremented before rescheduling the timer.
+	 *
+	 * Therefore if we cancel the worker first, the timer might re-enqueue
+	 * the worker before we cancel the timer, but the opposite is not
+	 * possible.
+	 */
+	hrtimer_cancel(&vctx->dump_timer);
+	cancel_work_sync(&vctx->dump_work);
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vctx)
+{
+	if (WARN_ON(!vctx))
+		return;
+
+	mutex_lock(&vctx->lock);
+
+	if (!WARN_ON(vctx->suspend_count == 0)) {
+		vctx->suspend_count--;
+
+		/* Last resume, so re-enqueue the worker if we have any periodic
+		 * clients.
+		 */
+		if (vctx->suspend_count == 0) {
+			struct kbase_vinstr_client *pos;
+			bool has_periodic_clients = false;
+
+			list_for_each_entry(pos, &vctx->clients, node) {
+				if (pos->dump_interval_ns != 0) {
+					has_periodic_clients = true;
+					break;
+				}
+			}
+
+			if (has_periodic_clients)
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+				queue_work(system_wq, &vctx->dump_work);
+#else
+				queue_work(system_highpri_wq, &vctx->dump_work);
+#endif
+		}
+	}
+
+	mutex_unlock(&vctx->lock);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(
+	struct kbase_vinstr_context *vctx,
+	struct kbase_ioctl_hwcnt_reader_setup *setup)
+{
+	int errcode;
+	int fd;
+	struct kbase_vinstr_client *vcli = NULL;
+
+	if (!vctx || !setup ||
+	    (setup->buffer_count == 0) ||
+	    (setup->buffer_count > MAX_BUFFER_COUNT))
+		return -EINVAL;
+
+	errcode = kbasep_vinstr_client_create(vctx, setup, &vcli);
+	if (errcode)
+		goto error;
+
+	errcode = anon_inode_getfd(
+		"[mali_vinstr_desc]",
+		&vinstr_client_fops,
+		vcli,
+		O_RDONLY | O_CLOEXEC);
+	if (errcode < 0)
+		goto error;
+
+	fd = errcode;
+
+	/* Add the new client. No need to reschedule worker, as not periodic */
+	mutex_lock(&vctx->lock);
+
+	vctx->client_count++;
+	list_add(&vcli->node, &vctx->clients);
+
+	mutex_unlock(&vctx->lock);
+
+	return fd;
+error:
+	kbasep_vinstr_client_destroy(vcli);
+	return errcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready
+ *                                             buffers.
+ * @cli: Non-NULL pointer to vinstr client.
+ *
+ * Return: Non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet.
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+	struct kbase_vinstr_client *cli)
+{
+	WARN_ON(!cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
+ *
+ * Return: 0 on success, else error code.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_dump(
+	struct kbase_vinstr_client *cli)
+{
+	int errcode;
+
+	mutex_lock(&cli->vctx->lock);
+
+	errcode = kbasep_vinstr_client_dump(
+		cli, BASE_HWCNT_READER_EVENT_MANUAL);
+
+	mutex_unlock(&cli->vctx->lock);
+	return errcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
+ *
+ * Return: 0 on success, else error code.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_clear(
+	struct kbase_vinstr_client *cli)
+{
+	int errcode;
+
+	mutex_lock(&cli->vctx->lock);
+
+	errcode = kbasep_vinstr_client_clear(cli);
+
+	mutex_unlock(&cli->vctx->lock);
+	return errcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command.
+ * @cli:    Non-NULL pointer to vinstr client.
+ * @buffer: Non-NULL pointer to userspace buffer.
+ * @size:   Size of buffer.
+ *
+ * Return: 0 on success, else error code.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+	struct kbase_vinstr_client *cli,
+	void __user *buffer,
+	size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx];
+
+	/* Metadata sanity check. */
+	WARN_ON(idx != meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command.
+ * @cli:    Non-NULL pointer to vinstr client.
+ * @buffer: Non-NULL pointer to userspace buffer.
+ * @size:   Size of buffer.
+ *
+ * Return: 0 on success, else error code.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+	struct kbase_vinstr_client *cli,
+	void __user *buffer,
+	size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->dump_bufs.buf_cnt;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command.
+ * @cli:      Non-NULL pointer to vinstr client.
+ * @interval: Periodic dumping interval (disable periodic dumping if 0).
+ *
+ * Return: 0 always.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+	struct kbase_vinstr_client *cli,
+	u32 interval)
+{
+	mutex_lock(&cli->vctx->lock);
+
+	if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS))
+		interval = DUMP_INTERVAL_MIN_NS;
+	/* Update the interval, and put in a dummy next dump time */
+	cli->dump_interval_ns = interval;
+	cli->next_dump_time_ns = 0;
+
+	/*
+	 * If it's a periodic client, kick off the worker early to do a proper
+	 * timer reschedule. Return value is ignored, as we don't care if the
+	 * worker is already queued.
+	 */
+	if ((interval != 0) && (cli->vctx->suspend_count == 0))
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+		queue_work(system_wq, &cli->vctx->dump_work);
+#else
+		queue_work(system_highpri_wq, &cli->vctx->dump_work);
+#endif
+
+	mutex_unlock(&cli->vctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command.
+ * @cli:      Non-NULL pointer to vinstr client.
+ * @event_id: ID of event to enable.
+ *
+ * Return: 0 always.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	/* No-op, as events aren't supported */
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl
+ *                                                    command.
+ * @cli:      Non-NULL pointer to vinstr client.
+ * @event_id: ID of event to disable.
+ *
+ * Return: 0 always.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+	struct kbase_vinstr_client *cli,
+	enum base_hwcnt_reader_event event_id)
+{
+	/* No-op, as events aren't supported */
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command.
+ * @cli:   Non-NULL pointer to vinstr client.
+ * @hwver: Non-NULL pointer to user buffer where HW version will be stored.
+ *
+ * Return: 0 on success, else error code.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+	struct kbase_vinstr_client *cli,
+	u32 __user *hwver)
+{
+	u32 ver = 0;
+	const enum kbase_hwcnt_gpu_group_type type =
+		kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0);
+
+	switch (type) {
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+		ver = 4;
+		break;
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+		ver = 5;
+		break;
+	default:
+		WARN_ON(true);
+	}
+
+	if (ver != 0) {
+		return put_user(ver, hwver);
+	} else {
+		return -EINVAL;
+	}
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl.
+ * @filp:   Non-NULL pointer to file structure.
+ * @cmd:    User command.
+ * @arg:    Command's argument.
+ *
+ * Return: 0 on success, else error code.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+	struct file *filp,
+	unsigned int cmd,
+	unsigned long arg)
+{
+	long rcode;
+	struct kbase_vinstr_client *cli;
+
+	if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER))
+		return -EINVAL;
+
+	cli = filp->private_data;
+	if (!cli)
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+			cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		rcode = put_user(
+			(u32)cli->vctx->metadata->dump_buf_bytes,
+			(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+			cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+			cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+			cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+			cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+			cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		WARN_ON(true);
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll.
+ * @filp: Non-NULL pointer to file structure.
+ * @wait: Non-NULL pointer to poll table.
+ *
+ * Return: POLLIN if data can be read without blocking, 0 if data can not be
+ *         read without blocking, else error code.
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+	struct file *filp,
+	poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	if (!filp || !wait)
+		return -EINVAL;
+
+	cli = filp->private_data;
+	if (!cli)
+		return -EINVAL;
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap.
+ * @filp: Non-NULL pointer to file structure.
+ * @vma:  Non-NULL pointer to vma structure.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(
+	struct file *filp,
+	struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	unsigned long vm_size, size, addr, pfn, offset;
+
+	if (!filp || !vma)
+		return -EINVAL;
+
+	cli = filp->private_data;
+	if (!cli)
+		return -EINVAL;
+
+	vm_size = vma->vm_end - vma->vm_start;
+	size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if (vm_size > size - offset)
+		return -EINVAL;
+
+	addr = __pa(cli->dump_bufs.page_addr + offset);
+	pfn = addr >> PAGE_SHIFT;
+
+	return remap_pfn_range(
+		vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release.
+ * @inode: Non-NULL pointer to inode structure.
+ * @filp:  Non-NULL pointer to file structure.
+ *
+ * Return: 0 always.
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+	struct file *filp)
+{
+	struct kbase_vinstr_client *vcli = filp->private_data;
+
+	mutex_lock(&vcli->vctx->lock);
+
+	vcli->vctx->client_count--;
+	list_del(&vcli->node);
+
+	mutex_unlock(&vcli->vctx->lock);
+
+	kbasep_vinstr_client_destroy(vcli);
+
+	return 0;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100755
index 0000000..81d315f
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,91 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Vinstr, used to provide an ioctl for userspace access to periodic hardware
+ * counters.
+ */
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+struct kbase_vinstr_context;
+struct kbase_hwcnt_virtualizer;
+struct kbase_ioctl_hwcnt_reader_setup;
+
+/**
+ * kbase_vinstr_init() - Initialise a vinstr context.
+ * @hvirt:    Non-NULL pointer to the hardware counter virtualizer.
+ * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr
+ *            context will be stored on success.
+ *
+ * On creation, the suspend count of the context will be 0.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_vinstr_init(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_vinstr_context **out_vctx);
+
+/**
+ * kbase_vinstr_term() - Terminate a vinstr context.
+ * @vctx: Pointer to the vinstr context to be terminated.
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vctx);
+
+/**
+ * kbase_vinstr_suspend() - Increment the suspend count of the context.
+ * @vctx: Non-NULL pointer to the vinstr context to be suspended.
+ *
+ * After this function call returns, it is guaranteed that all timers and
+ * workers in vinstr will be cancelled, and will not be re-triggered until
+ * after the context has been resumed. In effect, this means no new counter
+ * dumps will occur for any existing or subsequently added periodic clients.
+ */
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx);
+
+/**
+ * kbase_vinstr_resume() - Decrement the suspend count of the context.
+ * @vctx: Non-NULL pointer to the vinstr context to be resumed.
+ *
+ * If a call to this function decrements the suspend count from 1 to 0, then
+ * normal operation of vinstr will be resumed (i.e. counter dumps will once
+ * again be automatically triggered for all periodic clients).
+ *
+ * It is only valid to call this function one time for each prior returned call
+ * to kbase_vinstr_suspend.
+ */
+void kbase_vinstr_resume(struct kbase_vinstr_context *vctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader
+ *                                     client.
+ * @vinstr_ctx: Non-NULL pointer to the vinstr context.
+ * @setup:      Non-NULL pointer to the hwcnt reader configuration.
+ *
+ * Return: file descriptor on success, else a (negative) error code.
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+	struct kbase_vinstr_context *vinstr_ctx,
+	struct kbase_ioctl_hwcnt_reader_setup *setup);
+
+#endif /* _KBASE_VINSTR_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100755
index 0000000..6c6a8c6
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100755
index 0000000..96296ac
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,138 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - Reports change of job slot status.
+ * @gpu_id:   Kbase device id
+ * @event_id: ORed together bitfields representing a type of event,
+ *            made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(u32 gpu_id, u32 event_id, u32 tgid, u32 pid,
+		u8 job_id),
+	TP_ARGS(gpu_id, event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(u32, gpu_id)
+		__field(u32, event_id)
+		__field(u32, tgid)
+		__field(u32, pid)
+		__field(u8,  job_id)
+	),
+	TP_fast_assign(
+		__entry->gpu_id   = gpu_id;
+		__entry->event_id = event_id;
+		__entry->tgid     = tgid;
+		__entry->pid      = pid;
+		__entry->job_id   = job_id;
+	),
+	TP_printk("gpu=%u event=%u tgid=%u pid=%u job_id=%u",
+		__entry->gpu_id, __entry->event_id,
+		__entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Reports change of power management status.
+ * @gpu_id:   Kbase device id
+ * @event_id: Core type (shader, tiler, L2 cache)
+ * @value:    64bits bitmask reporting either power status of
+ *            the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(u32 gpu_id, u32 event_id, u64 value),
+	TP_ARGS(gpu_id, event_id, value),
+	TP_STRUCT__entry(
+		__field(u32, gpu_id)
+		__field(u32, event_id)
+		__field(u64, value)
+	),
+	TP_fast_assign(
+		__entry->gpu_id   = gpu_id;
+		__entry->event_id = event_id;
+		__entry->value    = value;
+	),
+	TP_printk("gpu=%u event %u = %llu",
+		__entry->gpu_id, __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Reports an MMU page fault
+ * resulting in new pages being mapped.
+ * @gpu_id:   Kbase device id
+ * @event_id: MMU address space number
+ * @value:    Number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(u32 gpu_id, s32 event_id, u64 value),
+	TP_ARGS(gpu_id, event_id, value),
+	TP_STRUCT__entry(
+		__field(u32, gpu_id)
+		__field(s32, event_id)
+		__field(u64, value)
+	),
+	TP_fast_assign(
+		__entry->gpu_id   = gpu_id;
+		__entry->event_id = event_id;
+		__entry->value    = value;
+	),
+	TP_printk("gpu=%u event %d = %llu",
+		__entry->gpu_id, __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_total_alloc_pages_change - Reports that the total number of
+ * allocated pages has changed.
+ * @gpu_id:   Kbase device id
+ * @event_id: Total number of pages allocated
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(u32 gpu_id, s64 event_id),
+	TP_ARGS(gpu_id, event_id),
+	TP_STRUCT__entry(
+		__field(u32, gpu_id)
+		__field(s64, event_id)
+	),
+	TP_fast_assign(
+		__entry->gpu_id   = gpu_id;
+		__entry->event_id = event_id;
+	),
+	TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id)
+);
+
+#endif /* _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100755
index 0000000..3a4db10
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,109 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+#endif /* _MALISW_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100755
index 0000000..29d5df3
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,31 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100755
index 0000000..a4aba19
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,554 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+#include "mali_midg_regmap_jm.h"
+
+/* Begin Register Offsets */
+/* GPU control registers */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000   /* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004   /* (RO) Level 2 cache features */
+#define TILER_FEATURES          0x00C   /* (RO) Tiler Features */
+#define MEM_FEATURES            0x010   /* (RO) Memory system features */
+#define MMU_FEATURES            0x014   /* (RO) MMU features */
+#define AS_PRESENT              0x018   /* (RO) Address space slots present */
+#define GPU_IRQ_RAWSTAT         0x020   /* (RW) */
+#define GPU_IRQ_CLEAR           0x024   /* (WO) */
+#define GPU_IRQ_MASK            0x028   /* (RW) */
+#define GPU_IRQ_STATUS          0x02C   /* (RO) */
+
+#define GPU_COMMAND             0x030   /* (WO) */
+#define GPU_STATUS              0x034   /* (RO) */
+
+#define GPU_DBGEN               (1 << 8)    /* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C   /* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040   /* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044   /* (RO) GPU exception fault address, high word */
+
+#define L2_CONFIG               0x048   /* (RW) Level 2 cache configuration */
+
+#define PWR_KEY                 0x050   /* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054   /* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058   /* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060   /* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064   /* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068   /* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C   /* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070   /* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074   /* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C   /* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090   /* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094   /* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098   /* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C   /* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS      0x0A0   /* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8   /* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC   /* (RO) Thread features */
+#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that TLS must be allocated for */
+
+#define TEXTURE_FEATURES_0      0x0B0   /* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4   /* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8   /* (RO) Support flags for indexed texture formats 64..95 */
+#define TEXTURE_FEATURES_3      0x0BC   /* (RO) Support flags for texture order */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100   /* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104   /* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110   /* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114   /* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120   /* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124   /* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+#define SHADER_READY_LO         0x140   /* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144   /* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150   /* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154   /* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160   /* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164   /* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+#define SHADER_PWRON_LO         0x180   /* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184   /* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190   /* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194   /* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0   /* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4   /* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0   /* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4   /* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0   /* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4   /* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0   /* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4   /* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200   /* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204   /* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210   /* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214   /* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220   /* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224   /* (RO) Level 2 cache power transition bitmap, high word */
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240   /* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244   /* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250   /* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254   /* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260   /* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264   /* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300   /* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304   /* (RW) Coherency enable */
+
+#define SHADER_CONFIG           0xF04   /* (RW) Shader core configuration (implementation-specific) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration (implementation-specific) */
+#define L2_MMU_CONFIG           0xF0C   /* (RW) L2 cache and MMU configuration (implementation-specific) */
+
+/* Job control registers */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000   /* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004   /* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008   /* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */
+
+/* MMU control registers */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000   /* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400   /* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440   /* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480   /* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0   /* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500   /* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540   /* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580   /* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0   /* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600   /* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640   /* Configuration registers for address space 9 */
+#define MMU_AS10                0x680   /* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0   /* Configuration registers for address space 11 */
+#define MMU_AS12                0x700   /* Configuration registers for address space 12 */
+#define MMU_AS13                0x740   /* Configuration registers for address space 13 */
+#define MMU_AS14                0x780   /* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0   /* Configuration registers for address space 15 */
+
+/* MMU address space control registers */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)    /* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)    /* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)    /* Set when a reset has completed. */
+#define POWER_CHANGED_SINGLE    (1 << 9)    /* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)   /* Set when all cores have finished powering up or down. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)   /* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)   /* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+		| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+ * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS    16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)       (1UL << (n))
+#define MMU_BUS_ERROR(n)        (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                      (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT         (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT          (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT        (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG               (0x3<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT        (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT   (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK         (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC       (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX           (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ         (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE        (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
+#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
+
+/*
+ * Begin Command Values
+ */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+#if GPU_HAS_CSF_VERSION_10_REVISION_2
+/* GPU_COMMAND codes */
+#define GPU_COMMAND_CODE_NOP                0x00 /* No operation, nothing happens */
+#define GPU_COMMAND_CODE_RESET              0x01 /* Reset the GPU */
+#define GPU_COMMAND_CODE_PRFCNT             0x02 /* Clear or sample performance counters */
+#define GPU_COMMAND_CODE_TIME               0x03 /* Configure time sources */
+#define GPU_COMMAND_CODE_FLUSH_CACHES       0x04 /* Flush caches */
+#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */
+#define GPU_COMMAND_CODE_FINISH_HALT        0x06 /* Halt CSF */
+
+/* GPU_COMMAND_RESET payloads */
+
+/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state.
+ * Power domains will remain powered on.
+ */
+#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00
+
+/* This will leave the state of active command streams UNDEFINED, but will leave the external bus in a defined and
+ * idle state.
+ */
+#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01
+
+/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave
+ * the system bus in an inconsistent state. Use only as a last resort when nothing else works.
+ */
+#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02
+
+/* GPU_COMMAND_PRFCNT payloads */
+#define GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE 0x01 /* Sample performance counters */
+#define GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR  0x02 /* Clear performance counters */
+
+/* GPU_COMMAND_TIME payloads */
+#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */
+#define GPU_COMMAND_TIME_ENABLE  0x01 /* Enable cycle counter */
+
+/* GPU_COMMAND_FLUSH_CACHES payloads */
+#define GPU_COMMAND_FLUSH_PAYLOAD_NONE             0x00 /* No flush */
+#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN            0x01 /* Clean the caches */
+#define GPU_COMMAND_FLUSH_PAYLOAD_INVALIDATE       0x02 /* Invalidate the caches */
+#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE 0x03 /* Clean and invalidate the caches */
+
+/* GPU_COMMAND command + payload */
+#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \
+	((u32)opcode || ((u32)payload << 8))
+
+/* Final GPU_COMMAND form */
+/* No operation, nothing happens */
+#define GPU_COMMAND_NOP \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0)
+
+/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_SOFT_RESET \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET)
+
+/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET)
+
+/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_CLEAR \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR)
+
+/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_PRFCNT_SAMPLE \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE)
+
+/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_START \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE)
+
+/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE)
+
+/* Clean all caches */
+#define GPU_COMMAND_CLEAN_CACHES \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN)
+
+/* Clean and invalidate all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE)
+
+/* Places the GPU in protected mode */
+#define GPU_COMMAND_SET_PROTECTED_MODE \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0)
+
+/* Halt CSF */
+#define GPU_COMMAND_FINISH_HALT \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0)
+#else
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00 /* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02 /* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03 /* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04 /* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05 /* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06 /* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07 /* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08 /* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */
+#endif
+
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE            (1 << 2)    /* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE    (1 << 7)    /* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT        0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT          4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT   8 /* Set select position. */
+
+/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_OFF          0
+/* The performance counters are enabled, but are only written out when a
+ * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register.
+ */
+#define PRFCNT_CONFIG_MODE_MANUAL       1
+/* The performance counters are enabled, and are written out each time a tile
+ * finishes rendering.
+ */
+#define PRFCNT_CONFIG_MODE_TILE         2
+
+/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+/* Set to inner non-cacheable, outer-non-cacheable
+ * Setting defined by the alloc bits is ignored, but set to a valid encoding:
+ * - no-alloc on read
+ * - no alloc on write
+ */
+#define AS_MEMATTR_AARCH64_NON_CACHEABLE  0x4Cull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+/* There is no LPAE support for non-cacheable, since the memory type is always
+ * write-back.
+ * Marking this setting as reserved for LPAE
+ */
+#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
+
+/* Symbols for default MEMATTR to use
+ * Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
+#define AS_MEMATTR_INDEX_NON_CACHEABLE         5
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_TLS_HASH_ENABLE          (1ul << 17)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+#define SC_VAR_ALGORITHM            (1ul << 29)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+/* End TILER_CONFIG register */
+
+/* L2_CONFIG register */
+#define L2_CONFIG_SIZE_SHIFT        16
+#define L2_CONFIG_SIZE_MASK         (0xFFul << L2_CONFIG_SIZE_SHIFT)
+#define L2_CONFIG_HASH_SHIFT        24
+#define L2_CONFIG_HASH_MASK         (0xFFul << L2_CONFIG_HASH_SHIFT)
+/* End L2_CONFIG register */
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap_jm.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap_jm.h
new file mode 100644
index 0000000..69996e2
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap_jm.h
@@ -0,0 +1,198 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _MIDG_REGMAP_JM_H_
+#define _MIDG_REGMAP_JM_H_
+
+/* GPU control registers */
+
+#define CORE_FEATURES           0x008   /* (RO) Shader Core Features */
+#define JS_PRESENT              0x01C   /* (RO) Job slots present */
+#define LATEST_FLUSH            0x038   /* (RO) Flush ID of latest clean-and-invalidate operation */
+#define GROUPS_L2_COHERENT      (1 << 0)    /* Cores groups are l2 coherent */
+
+#define JS0_FEATURES            0x0C0   /* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4   /* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8   /* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC   /* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0   /* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4   /* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8   /* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC   /* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0   /* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4   /* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8   /* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC   /* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0   /* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4   /* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8   /* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC   /* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define JM_CONFIG               0xF00   /* (RW) Job manager configuration (implementation-specific) */
+
+/* Job control registers */
+
+#define JOB_IRQ_JS_STATE        0x010   /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014   /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800   /* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880   /* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900   /* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980   /* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00   /* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80   /* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00   /* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80   /* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00   /* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80   /* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00   /* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80   /* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00   /* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80   /* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00   /* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80   /* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+/* No JM-specific MMU control registers */
+/* No JM-specific MMU address space control registers */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* JS<n>_FEATURES register */
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* JM_CONFIG register */
+#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
+#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
+#define JM_IDVS_GROUP_SIZE_SHIFT (16)
+#define JM_MAX_IDVS_GROUP_SIZE (0x3F)
+
+#endif /* _MIDG_REGMAP_JM_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_uk.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100755
index 0000000..701f390
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100755
index 0000000..ef9fb96
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME
+#
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100755
index 0000000..35565df
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_clock.o \
+	$(MALI_PLATFORM_DIR)/mpgpu.o \
+	$(MALI_PLATFORM_DIR)/meson_main2.o \
+	$(MALI_PLATFORM_DIR)/platform_gx.o \
+	$(MALI_PLATFORM_DIR)/scaling.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
new file mode 100755
index 0000000..d131a4b
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
@@ -0,0 +1,658 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)
+
+//disable print
+//#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+int mali_pm_statue = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "ao io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	mpdata->dvfs_table_size = 0;
+
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+	dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size);
+	dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n",
+			mpdata->dvfs_table,
+			sizeof(mpdata->dvfs_table[0]));
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->def_clock);
+	}
+	if (mpdata->def_clock > mpdata->scale_info.maxclk)
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+#else
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	if ((0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) &&
+			!IS_ERR(dvfs_tbl->clkp_handle) &&
+			(0 != dvfs_tbl->clkp_freq)) {
+		clk_prepare_enable(dvfs_tbl->clkp_handle);
+		clk_set_rate(dvfs_tbl->clkp_handle, dvfs_tbl->clkp_freq);
+	}
+	clk_prepare_enable(clk_mali);
+	clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+
+	GPU_CLK_DBG();
+	do_gettimeofday(&start);
+	ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+	do_gettimeofday(&end);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	GPU_CLK_DBG();
+	clk_disable_unprepare(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+				&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		} else if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+			dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+			if (IS_ERR(dvfs_tbl->clkp_handle)) {
+				dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+			}
+			ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+			if (ret) {
+				dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+			}
+		}
+
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->def_clock);
+	}
+	if (mpdata->def_clock > mpdata->scale_info.maxclk)
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux");
+#if 0
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+#endif
+	if (IS_ERR(mpdata->clk_mali)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100755
index 0000000..d7dd246
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,126 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/version.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long t83x_static_power(unsigned long voltage)
+{
+#if 0
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+#else
+	return 0;
+#endif
+}
+
+static unsigned long t83x_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+struct devfreq_cooling_ops t83x_model_ops = {
+#else
+struct devfreq_cooling_power t83x_model_ops = {
+#endif
+	.get_static_power = t83x_static_power,
+	.get_dynamic_power = t83x_dynamic_power,
+};
+
+#endif
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
+
+#ifndef CONFIG_OF
+int kbase_platform_register(void)
+{
+	return 0;
+}
+
+void kbase_platform_unregister(void)
+{
+}
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100755
index 0000000..1c56015
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (750000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (100000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+extern struct kbase_platform_funcs_conf dt_funcs_conf;
+#define PLATFORM_FUNCS (&dt_funcs_conf)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&t83x_model_ops)
+extern struct devfreq_cooling_ops t83x_model_ops;
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+/**
+ * Autosuspend delay
+ *
+ * The delay time (in milliseconds) to be used for autosuspend
+ */
+#define AUTO_SUSPEND_DELAY (100)
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c
new file mode 100755
index 0000000..dc1654f
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c
@@ -0,0 +1,336 @@
+/**
+ ** Meson hardware specific initialization
+ **/
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include <mali_kbase.h>
+#include "mali_kbase_meson.h"
+
+int meson_gpu_reset(struct kbase_device *kbdev)
+{
+	struct meson_context *platform = kbdev->platform_context;
+    void __iomem *reg_base_reset = platform->reg_base_reset;
+    u32 value;
+
+    //JOHNT
+    // Level reset mail
+ 
+    // Level reset mail
+    //writel(~(0x1<<14), reg_base_reset + P_RESET2_MASK * 4);
+    //writel(~(0x1<<14), reg_base_reset + P_RESET2_LEVEL * 4);
+
+    //writel(0xffffffff, reg_base_reset + P_RESET2_LEVEL * 4);
+    //writel(0xffffffff, reg_base_reset + P_RESET0_LEVEL * 4);
+
+    MESON_PRINT("%s, %d\n", __func__, __LINE__);
+    MESON_PRINT("reg_base=%p, reset0=%p\n", reg_base_reset, reg_base_reset + RESET0_MASK * 4);
+    udelay(100);
+#if 0
+    value = readl(reg_base_reset + RESET0_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    MESON_PRINT("%s, %d\n", __func__, __LINE__);
+    value = readl(reg_base_reset + RESET0_MASK * 4);
+    value = value & (~(0x1<<20));
+    writel(value, reg_base_reset + RESET0_MASK * 4);
+
+    udelay(100);
+#if 0
+    value = readl(reg_base_reset + RESET0_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value);
+    udelay(100);
+#endif
+
+    value = readl(reg_base_reset + RESET0_LEVEL * 4);
+    value = value & (~(0x1<<20));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET0_LEVEL * 4);
+    udelay(100);
+
+#if 0
+    value = readl(reg_base_reset + RESET0_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value);
+    udelay(100);
+#endif
+
+///////////////
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    value = readl(reg_base_reset + RESET2_MASK * 4);
+    value = value & (~(0x1<<14));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET2_MASK * 4);
+
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    value = readl(reg_base_reset + RESET2_LEVEL * 4);
+    value = value & (~(0x1<<14));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET2_LEVEL * 4);
+    udelay(100);
+
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    value = readl(reg_base_reset + RESET0_LEVEL * 4);
+    value = value | ((0x1<<20));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET0_LEVEL * 4);
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    value = readl(reg_base_reset + RESET2_LEVEL * 4);
+    value = value | ((0x1<<14));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET2_LEVEL * 4);
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(10); // OR POLL for reset done
+
+    return 0;
+}
+
+void  meson_gpu_pwr_on(struct kbase_device *kbdev, u32  mask)
+{
+    u32    part1_done = 0;
+    u32     value = 0;
+    u32     count = 0;
+
+    kbdev->pm.backend.gpu_powered = true;
+    MESON_PRINT("%s, %d begin\n", __func__,__LINE__);
+
+#if 0
+    value = 0x10 | (0x1<<16);
+#else
+    value = 0xfff | (0x20<<16);
+#endif
+    while (part1_done != value) {
+        Mali_WrReg(GPU_CONTROL_REG(PWR_KEY), 0x2968A819);
+        Mali_WrReg(GPU_CONTROL_REG(PWR_OVERRIDE1), value);
+        part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+        MESON_PRINT("write again, count=%d, overrider1=%x\n", count, part1_done);
+        udelay(20);
+        count ++;
+        if (0 == (count %100)) MESON_PRINT("write again, count%d\n", count);
+    }
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("write again, count=%d, overrider1=%x\n", count, part1_done);
+
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    MESON_PRINT("%s, %d\n", __func__,__LINE__);
+    if ((mask & 0x1) != 0 ) {
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+        Mali_WrReg(0x00000190, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x00000194, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x000001a0, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x000001a4, 0xffffffff);    // Power on all cores
+    }
+    MESON_PRINT("power on %d\n", __LINE__);
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x00000180, mask >> 1);    // Power on all cores
+        Mali_WrReg(0x00000184, 0x0);    // Power on all cores
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+    }
+
+    MESON_PRINT("%s, %d\n", __func__,__LINE__);
+    if ( mask != 0 ) {
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+        udelay(10);
+        part1_done = Mali_RdReg(0x0000020);
+        while(part1_done ==0) {
+            part1_done = Mali_RdReg(0x00000020);
+            udelay(10);
+        }
+
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+        Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+    }
+    MESON_PRINT("%s, %d end\n", __func__,__LINE__);
+}
+
+void  meson_gpu_pwr_off(struct kbase_device *kbdev, u32  mask)
+{
+#if 1
+    u32 part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x000001C0, mask >> 1);    // Power off all cores
+        Mali_WrReg(0x000001C4, 0x0);          // Power off all cores
+    }
+
+    if (  (mask & 0x1) != 0 ) {
+        Mali_WrReg(0x000001D0, 0xffffffff);    // Power off all cores
+        Mali_WrReg(0x000001D4, 0xffffffff);    // Power off all cores
+        Mali_WrReg(0x000001E0, 0xffffffff);    // Power off all cores
+        Mali_WrReg(0x000001E4, 0xffffffff);    // Power off all cores
+    }
+
+    if ( mask != 0 ) {
+        part1_done = Mali_RdReg(0x0000020);
+        while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); }
+        MESON_PRINT("Mali_pwr_off:gpu_irq : %x\n", part1_done);
+        Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+    }
+#endif
+}
+
+
+
+
+static int kbase_platform_meson_init(struct kbase_device *kbdev)
+{
+#if 0
+	int err;
+#endif
+	struct device_node *gpu_dn = kbdev->dev->of_node;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+	struct meson_context *platform;
+    u32 part1_done = 0;
+
+	platform = kmalloc(sizeof(struct meson_context), GFP_KERNEL);
+
+	if (!platform)
+		return -ENOMEM;
+
+	memset(platform, 0, sizeof(struct meson_context));
+
+	kbdev->platform_context = (void *) platform;
+
+    platform->reg_base_reset = of_iomap(gpu_dn, 1);
+	_dev_info(kbdev->dev, "reset io source  0x%p\n",platform->reg_base_reset);
+
+	platform->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(kbdev->dev, "ao io source  0x%p\n", platform->reg_base_aobus);
+
+	platform->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(kbdev->dev, "hiu io source  0x%p\n", platform->reg_base_hiubus);
+
+    platform->clk_mali = devm_clk_get(kbdev->dev, "gpu_mux");
+	if (IS_ERR(platform->clk_mali)) {
+		dev_err(kbdev->dev, "failed to get clock pointer\n");
+	} else {
+        clk_prepare_enable(platform->clk_mali);
+        clk_set_rate(platform->clk_mali, 285000000);
+    }
+    MESON_PRINT("%s, %d begin\n", __func__, __LINE__);
+    meson_gpu_reset(kbdev);
+    meson_gpu_pwr_on(kbdev, 0xe);
+
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done);
+    meson_gpu_pwr_off(kbdev, 0xe);
+#if 1
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done);
+
+    meson_gpu_reset(kbdev);
+    meson_gpu_pwr_on(kbdev, 0xe);
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done);
+#endif
+#if 0
+	platform->cmu_pmu_status = 0;
+	platform->dvfs_wq = NULL;
+	platform->polling_speed = 100;
+	gpu_debug_level = DVFS_WARNING;
+#endif
+
+	mutex_init(&platform->gpu_clock_lock);
+	mutex_init(&platform->gpu_dvfs_handler_lock);
+	spin_lock_init(&platform->gpu_dvfs_spinlock);
+#if 0
+	err = gpu_control_module_init(kbdev);
+	if (err)
+		goto clock_init_fail;
+
+	/* dvfs gobernor init*/
+	gpu_dvfs_governor_init(kbdev, G3D_DVFS_GOVERNOR_DEFAULT);
+#endif
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags);
+	platform->wakeup_lock = 0;
+	spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+#if 0
+	/* dvfs handler init*/
+	gpu_dvfs_handler_init(kbdev);
+
+	err = gpu_notifier_init(kbdev);
+	if (err)
+		goto notifier_init_fail;
+
+	err = gpu_create_sysfs_file(kbdev->dev);
+	if (err)
+		goto sysfs_init_fail;
+#endif
+
+    MESON_PRINT("%s, %d end\n", __func__, __LINE__);
+	return 0;
+#if 0
+clock_init_fail:
+notifier_init_fail:
+sysfs_init_fail:
+	kfree(platform);
+
+	return err;
+#endif
+}
+
+/**
+ ** Meson hardware specific termination
+ **/
+static void kbase_platform_meson_term(struct kbase_device *kbdev)
+{
+	struct meson_context *platform;
+	platform = (struct meson_context *) kbdev->platform_context;
+#if 0
+	gpu_notifier_term();
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	gpu_dvfs_handler_deinit(kbdev);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	gpu_control_module_term(kbdev);
+#endif
+
+	kfree(kbdev->platform_context);
+	kbdev->platform_context = 0;
+
+#if 0
+	gpu_remove_sysfs_file(kbdev->dev);
+#endif
+}
+
+struct kbase_platform_funcs_conf platform_funcs = {
+	.platform_init_func = &kbase_platform_meson_init,
+	.platform_term_func = &kbase_platform_meson_term,
+};
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h
new file mode 100644
index 0000000..89ff21b
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h
@@ -0,0 +1,65 @@
+
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+#define RESET0_MASK     0x00
+#define RESET1_MASK     0x01
+#define RESET2_MASK     0x02
+
+#define RESET0_LEVEL    0x10
+#define RESET1_LEVEL    0x11
+#define RESET2_LEVEL    0x12
+
+#define Mali_WrReg(regnum, value)   writel((value),(kbdev->reg + (regnum)))
+#define Mali_RdReg(regnum)          readl(kbdev->reg + (regnum))
+#define MESON_PRINT(...)            
+
+struct meson_context {
+	struct mutex gpu_clock_lock;
+	struct mutex gpu_dvfs_handler_lock;
+	spinlock_t gpu_dvfs_spinlock;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	int utilization;
+	int util_gl_share;
+	int util_cl_share[2];
+#ifdef CONFIG_CPU_THERMAL_IPA
+	int norm_utilisation;
+	int freq_for_normalisation;
+	unsigned long long power;
+#endif /* CONFIG_CPU_THERMAL_IPA */
+	int max_lock;
+	int min_lock;
+#if 0
+	int user_max_lock[NUMBER_LOCK];
+	int user_min_lock[NUMBER_LOCK];
+#endif
+	int target_lock_type;
+	int down_requirement;
+	bool wakeup_lock;
+	int governor_num;
+	int governor_type;
+	char governor_list[100];
+	bool dvfs_status;
+#ifdef CONFIG_CPU_THERMAL_IPA
+	int time_tick;
+	u32 time_busy;
+	u32 time_idle;
+#endif /* CONFIG_CPU_THERMAL_IPA */
+#endif
+	int cur_clock;
+	int cur_voltage;
+	int voltage_margin;
+	bool tmu_status;
+	int debug_level;
+	int polling_speed;
+	struct workqueue_struct *dvfs_wq;
+    void __iomem *reg_base_reset;
+    void __iomem *reg_base_aobus;
+    void __iomem *reg_base_hiubus;
+    struct clk  *clk_mali;
+    struct clk  *clk_gp;
+};
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100755
index 0000000..0ae00b4
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_config_platform.h"
+
+void *reg_base_hiubus = NULL;
+u32  override_value_aml = 0;
+static int first = 1;
+
+#define RESET0_MASK    0x10
+#define RESET1_MASK    0x11
+#define RESET2_MASK    0x12
+
+#define RESET0_LEVEL    0x20
+#define RESET1_LEVEL    0x21
+#define RESET2_LEVEL    0x22
+#define Rd(r)                           readl((reg_base_hiubus) + ((r)<<2))
+#define Wr(r, v)                        writel((v), ((reg_base_hiubus) + ((r)<<2)))
+#define Mali_WrReg(regnum, value)   kbase_reg_write(kbdev, (regnum), (value))
+#define Mali_RdReg(regnum)          kbase_reg_read(kbdev, (regnum))
+#define stimulus_print   printk
+#define stimulus_display printk
+#define Mali_pwr_off(x)  Mali_pwr_off_with_kdev(kbdev, (x))
+
+extern u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type);
+
+//[0]:CG [1]:SC0 [2]:SC2
+static void  Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    uint32_t    shader_present;
+    uint32_t    tiler_present;
+    uint32_t    l2_present;
+
+    part1_done = 0;
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    shader_present = Mali_RdReg(0x100);
+    tiler_present  = Mali_RdReg(0x110);
+    l2_present     = Mali_RdReg(0x120);
+    printk("shader_present=%d, tiler_present=%d, l2_present=%d\n",
+            shader_present, tiler_present, l2_present);
+
+    if (  mask == 0 ) {
+        Mali_WrReg(0x00000180, 0xffffffff);   // Power on all cores (shader low)
+        Mali_WrReg(0x00000184, 0xffffffff);   // Power on all cores (shader high)
+        Mali_WrReg(0x00000190, 0xffffffff);   // Power on all cores (tiler low)
+        Mali_WrReg(0x00000194, 0xffffffff);   // Power on all cores (tiler high)
+        Mali_WrReg(0x000001a0, 0xffffffff);   // Power on all cores (l2 low)
+        Mali_WrReg(0x000001a4, 0xffffffff);   // Power on all cores (l2 high)
+    } else {
+        Mali_WrReg(0x00000180, mask);    // Power on all cores (shader low)
+        Mali_WrReg(0x00000184, 0);       // Power on all cores (shader high)
+        Mali_WrReg(0x00000190, mask);    // Power on all cores (tiler low)
+        Mali_WrReg(0x00000194, 0);       // Power on all cores (tiler high)
+        Mali_WrReg(0x000001a0, mask);    // Power on all cores (l2 low)
+        Mali_WrReg(0x000001a4, 0);       // Power on all cores (l2 high)
+    }
+
+    part1_done = Mali_RdReg(0x0000020);
+    while(part1_done ==0) { part1_done = Mali_RdReg(0x00000020); }
+    stimulus_display("Mali_pwr_on:gpu_irq : %x\n", part1_done);
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+}
+
+//[0]:CG [1]:SC0 [2]:SC2
+#if 0
+static void  Mali_pwr_off_with_kdev( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    if ( mask == 0 ) {
+        Mali_WrReg(0x000001C0, 0xffffffff);    // Power off all cores (tiler low)
+        Mali_WrReg(0x000001C4, 0xffffffff);    // Power off all cores (tiler high)
+        Mali_WrReg(0x000001D0, 0xffffffff);    // Power off all cores (l2 low)
+        Mali_WrReg(0x000001D4, 0xffffffff);    // Power off all cores (l2 high)
+        Mali_WrReg(0x000001E0, 0xffffffff);    // Power off all cores (shader low)
+        Mali_WrReg(0x000001E4, 0xffffffff);    // Power off all cores (shader high)
+    } else {
+        Mali_WrReg(0x000001C0, mask);    // Power off all cores  (tiler low)
+        Mali_WrReg(0x000001C4, 0x0);     // Power off all cores  (tiler high)
+        Mali_WrReg(0x000001D0, mask);    // Power off all cores  (l2 low)
+        Mali_WrReg(0x000001D4, 0x0);     // Power off all cores  (l2 high)
+        Mali_WrReg(0x000001E0, mask);    // Power off all cores  (shader low)
+        Mali_WrReg(0x000001E4, 0x0);     // Power off all cores  (shader high)
+    }
+
+    part1_done = Mali_RdReg(0x0000020);
+    while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); }
+    stimulus_display("Mali_pwr_off:gpu_irq : %x\n", part1_done);
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+}
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret = 1; /* Assume GPU has been powered off */
+	int error;
+	struct platform_device *pdev = to_platform_device(kbdev->dev);
+	struct resource *reg_res;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+    //printk("20151013, %s, %d\n", __FILE__, __LINE__);
+    if (first == 0) goto ret;
+
+    reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+    if (!reg_res) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) 
+        reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res));
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+
+//JOHNT
+    // Level reset mail
+
+    // Level reset mail
+    //Wr(P_RESET2_MASK, ~(0x1<<14));
+    //Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    //Wr(P_RESET2_LEVEL, 0xffffffff);
+    //Wr(P_RESET0_LEVEL, 0xffffffff);
+
+    value = Rd(RESET0_MASK);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_MASK, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+///////////////
+    value = Rd(RESET2_MASK);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_MASK, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value | ((0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value | ((0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    udelay(10); // OR POLL for reset done
+
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819);
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16));
+
+    Mali_pwr_on_with_kdev(kbdev, 0x1);
+    //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n",
+    //        kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus);
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+    first = 0;
+    //printk("%s, %d\n", __FILE__, __LINE__);
+ret:
+	error = pm_runtime_get_sync(kbdev->dev);
+	if (error == 1) {
+		/*
+		 * Let core know that the chip has not been
+		 * powered off, so we can save on re-initialization.
+		 */
+		ret = 0;
+	}
+	udelay(100);
+#if 1
+
+    core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+    l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+    tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+    //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready);
+#endif
+	dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
+
+	return ret;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+    //printk("%s, %d\n", __FILE__, __LINE__);
+#if 0
+    iounmap(reg_base_hiubus);
+    reg_base_hiubus = NULL;
+#endif
+	pm_runtime_mark_last_busy(kbdev->dev);
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	
+	pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY);
+	pm_runtime_use_autosuspend(kbdev->dev);
+	pm_runtime_set_active(kbdev->dev);
+	
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+
+	if (!pm_runtime_enabled(kbdev->dev)) {
+		dev_warn(kbdev->dev, "pm_runtime not enabled");
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+#endif
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
new file mode 100644
index 0000000..b541090
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
new file mode 100644
index 0000000..64de2d5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_defs.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+extern void mali_post_init(void);
+struct kbase_device;
+//static int gpu_dvfs_probe(struct platform_device *pdev)
+int platform_dt_init_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	int err = -1;
+
+	err = mali_meson_init_start(pdev);
+    mali_meson_init_finish(pdev);
+    mpgpu_class_init();
+    mali_post_init();
+    return err;
+}
+
+//static int gpu_dvfs_remove(struct platform_device *pdev)
+void platform_dt_term_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+    mpgpu_class_exit();
+	mali_meson_uninit(pdev);
+
+}
+
+static u32 last_utilisation, last_util_gl_share, last_util_cl_share[2];
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    last_utilisation = utilisation;
+    last_util_gl_share = util_gl_share;
+    last_util_cl_share[0] = util_cl_share[0];
+    last_util_cl_share[1] = util_cl_share[1];
+    mali_gpu_utilization_callback(utilisation*255/100);
+    return 1;
+}
+
+u32 mpgpu_get_utilization(void)
+{
+    return last_utilisation;
+}
+u32 mpgpu_get_util_gl_share(void)
+{
+    return last_util_gl_share;
+}
+u32 mpgpu_get_util_cl_share(u32 *util)
+{
+    util[0] = last_util_cl_share[0];
+    util[1] = last_util_cl_share[1];
+    return 0;
+}
+
+struct kbase_platform_funcs_conf dt_funcs_conf = {
+    .platform_init_func = platform_dt_init_func,
+    .platform_term_func = platform_dt_term_func,
+};
+#if 0
+static const struct of_device_id gpu_dvfs_ids[] = {
+	{ .compatible = "meson, gpu-dvfs-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpu_dvfs_ids);
+
+static struct platform_driver gpu_dvfs_driver = {
+	.driver = {
+		.name = "meson-gpu-dvfs",
+		.owner = THIS_MODULE,
+		.of_match_table = gpu_dvfs_ids,
+	},
+	.probe = gpu_dvfs_probe,
+	.remove = gpu_dvfs_remove,
+};
+module_platform_driver(gpu_dvfs_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Amlogic SH, MM");
+MODULE_DESCRIPTION("Driver for the Meson GPU dvfs");
+#endif
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
new file mode 100755
index 0000000..65f8e14
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
@@ -0,0 +1,38 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mpgpu_class_init(void);
+void mpgpu_class_exit(void);
+void mali_gpu_utilization_callback(int utilization_pp);
+
+u32 mpgpu_get_utilization(void);
+u32 mpgpu_get_util_gl_share(void);
+u32 mpgpu_get_util_cl_share(u32 *util);
+u32 mpgpu_get_gpu_err_count(void);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
new file mode 100755
index 0000000..831ae72
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
@@ -0,0 +1,313 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+//#include <mali_kbase.h>
+#include "meson_main2.h"
+
+int meson_gpu_data_invalid_count = 0;
+int meson_gpu_fault = 0;
+
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+static ssize_t utilization_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", mpgpu_get_utilization());
+}
+
+static ssize_t util_gl_share_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", mpgpu_get_util_gl_share());
+}
+
+static ssize_t util_cl_share_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+    u32 val[2];
+
+    mpgpu_get_util_cl_share(val);
+
+	return sprintf(buf, "%d  %d\n", val[0], val[1]);
+}
+
+u32 mpgpu_get_gpu_err_count(void)
+{
+    return (meson_gpu_fault + meson_gpu_data_invalid_count);
+}
+
+static ssize_t meson_gpu_get_err_count(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", mpgpu_get_gpu_err_count());
+}
+
+static ssize_t mpgpu_set_err_count(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	meson_gpu_fault = val;
+
+	return count;
+}
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+	__ATTR(utilization,	0644, utilization_read, NULL),
+	__ATTR(util_gl,	    0644, util_gl_share_read, NULL),
+	__ATTR(util_cl,	    0644, util_cl_share_read, NULL),
+	__ATTR(gpu_err,	    0644, meson_gpu_get_err_count, mpgpu_set_err_count),
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+
+int mpgpu_class_init(void)
+{
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+}
+
+void  mpgpu_class_exit(void)
+{
+	class_unregister(&mpgpu_class);
+}
+
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
new file mode 100755
index 0000000..3997300
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
@@ -0,0 +1,246 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#ifdef CONFIG_AMLOGIC_GPU_THERMAL
+#include <linux/amlogic/gpu_cooling.h>
+#include <linux/amlogic/gpucore_cooling.h>
+//#include <linux/amlogic/aml_thermal_hw.h>
+#include <linux/amlogic/meson_cooldev.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq <= mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq <= mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_AMLOGIC_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+#ifndef MESON_DRV_BRING
+    return 55;
+#else
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+#endif
+}
+#endif
+#endif
+
+#ifdef CONFIG_AMLOGIC_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+#ifndef MESON_DRV_BRING
+    return 2;
+#else
+    return mali_executor_get_num_cores_enabled();
+#endif
+}
+#endif
+#endif
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_AMLOGIC_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        meson_gcooldev_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        meson_gcooldev_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
new file mode 100644
index 0000000..4fa0773
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
@@ -0,0 +1,584 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+
+#if AMLOGIC_GPU_USE_GPPLL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)
+#include <linux/amlogic/amports/gp_pll.h>
+#else
+#include <linux/amlogic/media/clk/gp_pll.h>
+#endif
+#endif
+
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+extern int  mali_pm_statue;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+#if AMLOGIC_GPU_USE_GPPLL
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+#endif
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+#if AMLOGIC_GPU_USE_GPPLL
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+#else
+	if ((0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) &&
+			!IS_ERR(dvfs_tbl->clkp_handle) &&
+			(0 != dvfs_tbl->clkp_freq)) {
+		clk_prepare_enable(dvfs_tbl->clkp_handle);
+		clk_set_rate(dvfs_tbl->clkp_handle, dvfs_tbl->clkp_freq);
+	}
+
+#endif
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+#if AMLOGIC_GPU_USE_GPPLL==0
+	if ((0 == strcmp(pdvfs[lastStep].clk_parent,"gp0_pll")) &&
+		(0 != strcmp(pdvfs[execStep].clk_parent, "gp0_pll"))) {
+			clk_disable_unprepare(pdvfs[lastStep].clkp_handle);
+	}
+#endif
+
+	lastStep = execStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+#endif
+
+}
+#if AMLOGIC_GPU_USE_GPPLL
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+#endif
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+    cores = cores;
+    return 0;
+}
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    if (err < 0) scalingdbg(1, "set pp failed");
+
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+    printk("mali_plat=%p\n", mali_plat);
+	num_cores_enabled = pmali_plat->sc_mpp;
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+#endif
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	40, /* 40% */
+	50, /* 50% */
+	90, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< utilization_pp)
+		ret = enable_one_core();
+	else if (0 < utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(int utilization_gpu, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(int utilization_pp, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization_pp,  ld_up);
+	if (utilization_pp >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization_pp <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(utilization_pp);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(utilization_pp);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100755
index 0000000..6780e4c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100755
index 0000000..fac3cd5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..d165ce2
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_config_platform.h"
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100755
index 0000000..51b408e
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100755
index 0000000..fac3cd5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..efca0a5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100755
index 0000000..e07709c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100755
index 0000000..fac3cd5
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..b6714b9
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100755
index 0000000..ef1ec70
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h
new file mode 100755
index 0000000..8778d81
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _PROTECTED_MODE_SWITCH_H_
+#define _PROTECTED_MODE_SWITCH_H_
+
+struct protected_mode_device;
+
+/**
+ * struct protected_mode_ops - Callbacks for protected mode switch operations
+ *
+ * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_disable: Callback to disable protected mode for device
+ */
+struct protected_mode_ops {
+	/**
+	 * protected_mode_enable() - Enable protected mode on device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enable)(
+			struct protected_mode_device *protected_dev);
+
+	/**
+	 * protected_mode_disable() - Disable protected mode on device, and
+	 *                            reset device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_disable)(
+			struct protected_mode_device *protected_dev);
+};
+
+/**
+ * struct protected_mode_device - Device structure for protected mode devices
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_mode_device {
+	struct protected_mode_ops ops;
+	void *data;
+};
+
+#endif /* _PROTECTED_MODE_SWITCH_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild
new file mode 100755
index 0000000..df16a77
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+obj-$(CONFIG_MALI_KUTF) += kutf/
+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig
new file mode 100755
index 0000000..fa91aea
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+source "drivers/gpu/arm/midgard/tests/kutf/Kconfig"
+source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig"
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/Mconfig b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/Mconfig
new file mode 100755
index 0000000..af4e383
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/Mconfig
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+
+config UNIT_TEST_KERNEL_MODULES
+	bool
+	default y if UNIT_TEST_CODE && BUILD_KERNEL_MODULES
+	default n
+
+config BUILD_IPA_TESTS
+	bool
+	default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ
+	default n
+
+config BUILD_IPA_UNIT_TESTS
+	bool
+	default y if NO_MALI && BUILD_IPA_TESTS
+	default n
+
+config BUILD_CSF_TESTS
+	bool
+	default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF
+	default n
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h
new file mode 100755
index 0000000..15e168c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h
@@ -0,0 +1,77 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_HELPERS_H_
+#define _KERNEL_UTF_HELPERS_H_
+
+/* kutf_helpers.h
+ * Test helper functions for the kernel UTF test infrastructure.
+ *
+ * These functions provide methods for enqueuing/dequeuing lines of text sent
+ * by user space. They are used to implement the transfer of "userdata" from
+ * user space to kernel.
+ */
+
+#include <kutf/kutf_suite.h>
+
+/**
+ * kutf_helper_input_dequeue() - Dequeue a line sent by user space
+ * @context:    KUTF context
+ * @str_size:   Pointer to an integer to receive the size of the string
+ *
+ * If no line is available then this function will wait (interruptibly) until
+ * a line is available.
+ *
+ * Return: The line dequeued, ERR_PTR(-EINTR) if interrupted or NULL on end
+ * of data.
+ */
+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size);
+
+/**
+ * kutf_helper_input_enqueue() - Enqueue a line sent by user space
+ * @context:   KUTF context
+ * @str:       The user space address of the line
+ * @size:      The length in bytes of the string
+ *
+ * This function will use copy_from_user to copy the string out of user space.
+ * The string need not be NULL-terminated (@size should not include the NULL
+ * termination).
+ *
+ * As a special case @str==NULL and @size==0 is valid to mark the end of input,
+ * but callers should use kutf_helper_input_enqueue_end_of_data() instead.
+ *
+ * Return: 0 on success, -EFAULT if the line cannot be copied from user space,
+ * -ENOMEM if out of memory.
+ */
+int kutf_helper_input_enqueue(struct kutf_context *context,
+		const char __user *str, size_t size);
+
+/**
+ * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent
+ * @context:    KUTF context
+ *
+ * After this function has been called, kutf_helper_input_dequeue() will always
+ * return NULL.
+ */
+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context);
+
+#endif	/* _KERNEL_UTF_HELPERS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h
new file mode 100755
index 0000000..3b1300e
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h
@@ -0,0 +1,179 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_HELPERS_USER_H_
+#define _KERNEL_UTF_HELPERS_USER_H_
+
+/* kutf_helpers.h
+ * Test helper functions for the kernel UTF test infrastructure, whose
+ * implementation mirrors that of similar functions for kutf-userside
+ */
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_helpers.h>
+
+
+#define KUTF_HELPER_MAX_VAL_NAME_LEN 255
+
+enum kutf_helper_valtype {
+	KUTF_HELPER_VALTYPE_INVALID,
+	KUTF_HELPER_VALTYPE_U64,
+	KUTF_HELPER_VALTYPE_STR,
+
+	KUTF_HELPER_VALTYPE_COUNT /* Must be last */
+};
+
+struct kutf_helper_named_val {
+	enum kutf_helper_valtype type;
+	char *val_name;
+	union {
+		u64 val_u64;
+		char *val_str;
+	} u;
+};
+
+/* Extra error values for certain helpers when we want to distinguish between
+ * Linux's own error values too.
+ *
+ * These can only be used on certain functions returning an int type that are
+ * documented as returning one of these potential values, they cannot be used
+ * from functions return a ptr type, since we can't decode it with PTR_ERR
+ *
+ * No negative values are used - Linux error codes should be used instead, and
+ * indicate a problem in accessing the data file itself (are generally
+ * unrecoverable)
+ *
+ * Positive values indicate correct access but invalid parsing (can be
+ * recovered from assuming data in the future is correct) */
+enum kutf_helper_err {
+	/* No error - must be zero */
+	KUTF_HELPER_ERR_NONE = 0,
+	/* Named value parsing encountered an invalid name */
+	KUTF_HELPER_ERR_INVALID_NAME,
+	/* Named value parsing of string or u64 type encountered extra
+	 * characters after the value (after the last digit for a u64 type or
+	 * after the string end delimiter for string type) */
+	KUTF_HELPER_ERR_CHARS_AFTER_VAL,
+	/* Named value parsing of string type couldn't find the string end
+	 * delimiter.
+	 *
+	 * This cannot be encountered when the NAME="value" message exceeds the
+	 * textbuf's maximum line length, because such messages are not checked
+	 * for an end string delimiter */
+	KUTF_HELPER_ERR_NO_END_DELIMITER,
+	/* Named value didn't parse as any of the known types */
+	KUTF_HELPER_ERR_INVALID_VALUE,
+};
+
+
+/* Send named NAME=value pair, u64 value
+ *
+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long
+ *
+ * Any failure will be logged on the suite's current test fixture
+ *
+ * Returns 0 on success, non-zero on failure
+ */
+int kutf_helper_send_named_u64(struct kutf_context *context,
+		const char *val_name, u64 val);
+
+/* Get the maximum length of a string that can be represented as a particular
+ * NAME="value" pair without string-value truncation in the kernel's buffer
+ *
+ * Given val_name and the kernel buffer's size, this can be used to determine
+ * the maximum length of a string that can be sent as val_name="value" pair
+ * without having the string value truncated. Any string longer than this will
+ * be truncated at some point during communication to this size.
+ *
+ * It is assumed that val_name is a valid name for
+ * kutf_helper_send_named_str(), and no checking will be made to
+ * ensure this.
+ *
+ * Returns the maximum string length that can be represented, or a negative
+ * value if the NAME="value" encoding itself wouldn't fit in kern_buf_sz
+ */
+int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz);
+
+/* Send named NAME="str" pair
+ *
+ * no escaping allowed in str. Any of the following characters will terminate
+ * the string: '"' '\\' '\n'
+ *
+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long
+ *
+ * Any failure will be logged on the suite's current test fixture
+ *
+ * Returns 0 on success, non-zero on failure */
+int kutf_helper_send_named_str(struct kutf_context *context,
+		const char *val_name, const char *val_str);
+
+/* Receive named NAME=value pair
+ *
+ * This can receive u64 and string values - check named_val->type
+ *
+ * If you are not planning on dynamic handling of the named value's name and
+ * type, then kutf_helper_receive_check_val() is more useful as a
+ * convenience function.
+ *
+ * String members of named_val will come from memory allocated on the fixture's mempool
+ *
+ * Returns 0 on success. Negative value on failure to receive from the 'run'
+ * file, positive value indicates an enum kutf_helper_err value for correct
+ * reception of data but invalid parsing */
+int kutf_helper_receive_named_val(
+		struct kutf_context *context,
+		struct kutf_helper_named_val *named_val);
+
+/* Receive and validate NAME=value pair
+ *
+ * As with kutf_helper_receive_named_val, but validate that the
+ * name and type are as expected, as a convenience for a common pattern found
+ * in tests.
+ *
+ * NOTE: this only returns an error value if there was actually a problem
+ * receiving data.
+ *
+ * NOTE: If the underlying data was received correctly, but:
+ * - isn't of the expected name
+ * - isn't the expected type
+ * - isn't correctly parsed for the type
+ * then the following happens:
+ * - failure result is recorded
+ * - named_val->type will be KUTF_HELPER_VALTYPE_INVALID
+ * - named_val->u will contain some default value that should be relatively
+ *   harmless for the test, including being writable in the case of string
+ *   values
+ * - return value will be 0 to indicate success
+ *
+ * The rationale behind this is that we'd prefer to continue the rest of the
+ * test with failures propagated, rather than hitting a timeout */
+int kutf_helper_receive_check_val(
+		struct kutf_helper_named_val *named_val,
+		struct kutf_context *context,
+		const char *expect_val_name,
+		enum kutf_helper_valtype expect_val_type);
+
+/* Output a named value to kmsg */
+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val);
+
+
+#endif	/* _KERNEL_UTF_HELPERS_USER_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h
new file mode 100755
index 0000000..988559d
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_MEM_H_
+#define _KERNEL_UTF_MEM_H_
+
+/* kutf_mem.h
+ * Functions for management of memory pools in the kernel.
+ *
+ * This module implements a memory pool allocator, allowing a test
+ * implementation to allocate linked allocations which can then be freed by a
+ * single free which releases all of the resources held by the entire pool.
+ *
+ * Note that it is not possible to free single resources within the pool once
+ * allocated.
+ */
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+/**
+ * struct kutf_mempool - the memory pool context management structure
+ * @head:	list head on which the allocations in this context are added to
+ * @lock:	mutex for concurrent allocation from multiple threads
+ *
+ */
+struct kutf_mempool {
+	struct list_head head;
+	struct mutex lock;
+};
+
+/**
+ * kutf_mempool_init() - Initialize a memory pool.
+ * @pool:	Memory pool structure to initialize, provided by the user
+ *
+ * Return:	zero on success
+ */
+int kutf_mempool_init(struct kutf_mempool *pool);
+
+/**
+ * kutf_mempool_alloc() - Allocate memory from a pool
+ * @pool:	Memory pool to allocate from
+ * @size:	Size of memory wanted in number of bytes
+ *
+ * Return:	Pointer to memory on success, NULL on failure.
+ */
+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size);
+
+/**
+ * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it.
+ * @pool:	The memory pool to free
+ */
+void kutf_mempool_destroy(struct kutf_mempool *pool);
+#endif	/* _KERNEL_UTF_MEM_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h
new file mode 100755
index 0000000..49ebeb4
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h
@@ -0,0 +1,181 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_RESULTSET_H_
+#define _KERNEL_UTF_RESULTSET_H_
+
+/* kutf_resultset.h
+ * Functions and structures for handling test results and result sets.
+ *
+ * This section of the kernel UTF contains structures and functions used for the
+ * management of Results and Result Sets.
+ */
+
+/**
+ * enum kutf_result_status - Status values for a single Test error.
+ * @KUTF_RESULT_BENCHMARK:	Result is a meta-result containing benchmark
+ *                              results.
+ * @KUTF_RESULT_SKIP:		The test was skipped.
+ * @KUTF_RESULT_UNKNOWN:	The test has an unknown result.
+ * @KUTF_RESULT_PASS:		The test result passed.
+ * @KUTF_RESULT_DEBUG:		The test result passed, but raised a debug
+ *                              message.
+ * @KUTF_RESULT_INFO:		The test result passed, but raised
+ *                              an informative message.
+ * @KUTF_RESULT_WARN:		The test result passed, but raised a warning
+ *                              message.
+ * @KUTF_RESULT_FAIL:		The test result failed with a non-fatal error.
+ * @KUTF_RESULT_FATAL:		The test result failed with a fatal error.
+ * @KUTF_RESULT_ABORT:		The test result failed due to a non-UTF
+ *                              assertion failure.
+ * @KUTF_RESULT_USERDATA:	User data is ready to be read,
+ *                              this is not seen outside the kernel
+ * @KUTF_RESULT_USERDATA_WAIT:	Waiting for user data to be sent,
+ *                              this is not seen outside the kernel
+ * @KUTF_RESULT_TEST_FINISHED:	The test has finished, no more results will
+ *                              be produced. This is not seen outside kutf
+ */
+enum kutf_result_status {
+	KUTF_RESULT_BENCHMARK = -3,
+	KUTF_RESULT_SKIP    = -2,
+	KUTF_RESULT_UNKNOWN = -1,
+
+	KUTF_RESULT_PASS    = 0,
+	KUTF_RESULT_DEBUG   = 1,
+	KUTF_RESULT_INFO    = 2,
+	KUTF_RESULT_WARN    = 3,
+	KUTF_RESULT_FAIL    = 4,
+	KUTF_RESULT_FATAL   = 5,
+	KUTF_RESULT_ABORT   = 6,
+
+	KUTF_RESULT_USERDATA      = 7,
+	KUTF_RESULT_USERDATA_WAIT = 8,
+	KUTF_RESULT_TEST_FINISHED = 9
+};
+
+/* The maximum size of a kutf_result_status result when
+ * converted to a string
+ */
+#define KUTF_ERROR_MAX_NAME_SIZE 21
+
+#ifdef __KERNEL__
+
+#include <kutf/kutf_mem.h>
+#include <linux/wait.h>
+
+struct kutf_context;
+
+/**
+ * struct kutf_result - Represents a single test result.
+ * @node:	Next result in the list of results.
+ * @status:	The status summary (pass / warn / fail / etc).
+ * @message:	A more verbose status message.
+ */
+struct kutf_result {
+	struct list_head            node;
+	enum kutf_result_status     status;
+	const char                  *message;
+};
+
+/**
+ * KUTF_RESULT_SET_WAITING_FOR_INPUT - Test is waiting for user data
+ *
+ * This flag is set within a struct kutf_result_set whenever the test is blocked
+ * waiting for user data. Attempts to dequeue results when this flag is set
+ * will cause a dummy %KUTF_RESULT_USERDATA_WAIT result to be produced. This
+ * is used to output a warning message and end of file.
+ */
+#define KUTF_RESULT_SET_WAITING_FOR_INPUT 1
+
+/**
+ * struct kutf_result_set - Represents a set of results.
+ * @results:	List head of a struct kutf_result list for storing the results
+ * @waitq:	Wait queue signalled whenever new results are added.
+ * @flags:	Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT
+ */
+struct kutf_result_set {
+	struct list_head          results;
+	wait_queue_head_t         waitq;
+	int                       flags;
+};
+
+/**
+ * kutf_create_result_set() - Create a new result set
+ *                            to which results can be added.
+ *
+ * Return: The created result set.
+ */
+struct kutf_result_set *kutf_create_result_set(void);
+
+/**
+ * kutf_add_result() - Add a result to the end of an existing result set.
+ *
+ * @context:	The kutf context
+ * @status:	The result status to add.
+ * @message:	The result message to add.
+ *
+ * Return: 0 if the result is successfully added. -ENOMEM if allocation fails.
+ */
+int kutf_add_result(struct kutf_context *context,
+		enum kutf_result_status status, const char *message);
+
+/**
+ * kutf_remove_result() - Remove a result from the head of a result set.
+ * @set:	The result set.
+ *
+ * This function will block until there is a result to read. The wait is
+ * interruptible, so this function will return with an ERR_PTR if interrupted.
+ *
+ * Return: result or ERR_PTR if interrupted
+ */
+struct kutf_result *kutf_remove_result(
+		struct kutf_result_set *set);
+
+/**
+ * kutf_destroy_result_set() - Free a previously created result set.
+ *
+ * @results:	The result set whose resources to free.
+ */
+void kutf_destroy_result_set(struct kutf_result_set *results);
+
+/**
+ * kutf_set_waiting_for_input() - The test is waiting for userdata
+ *
+ * @set: The result set to update
+ *
+ * Causes the result set to always have results and return a fake
+ * %KUTF_RESULT_USERDATA_WAIT result.
+ */
+void kutf_set_waiting_for_input(struct kutf_result_set *set);
+
+/**
+ * kutf_clear_waiting_for_input() - The test is no longer waiting for userdata
+ *
+ * @set: The result set to update
+ *
+ * Cancels the effect of kutf_set_waiting_for_input()
+ */
+void kutf_clear_waiting_for_input(struct kutf_result_set *set);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _KERNEL_UTF_RESULTSET_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h
new file mode 100755
index 0000000..8d75f50
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h
@@ -0,0 +1,569 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_SUITE_H_
+#define _KERNEL_UTF_SUITE_H_
+
+/* kutf_suite.h
+ * Functions for management of test suites.
+ *
+ * This collection of data structures, macros, and functions are used to
+ * create Test Suites, Tests within those Test Suites, and Fixture variants
+ * of each test.
+ */
+
+#include <linux/kref.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+
+#include <kutf/kutf_mem.h>
+#include <kutf/kutf_resultset.h>
+
+/* Arbitrary maximum size to prevent user space allocating too much kernel
+ * memory
+ */
+#define KUTF_MAX_LINE_LENGTH (1024u)
+
+/**
+ * Pseudo-flag indicating an absence of any specified test class. Note that
+ * tests should not be annotated with this constant as it is simply a zero
+ * value; tests without a more specific class must be marked with the flag
+ * KUTF_F_TEST_GENERIC.
+ */
+#define KUTF_F_TEST_NONE                ((unsigned int)(0))
+
+/**
+ * Class indicating this test is a smoke test.
+ * A given set of smoke tests should be quick to run, enabling rapid turn-around
+ * of "regress-on-commit" test runs.
+ */
+#define KUTF_F_TEST_SMOKETEST           ((unsigned int)(1 << 1))
+
+/**
+ * Class indicating this test is a performance test.
+ * These tests typically produce a performance metric, such as "time to run" or
+ * "frames per second",
+ */
+#define KUTF_F_TEST_PERFORMANCE         ((unsigned int)(1 << 2))
+
+/**
+ * Class indicating that this test is a deprecated test.
+ * These tests have typically been replaced by an alternative test which is
+ * more efficient, or has better coverage.
+ */
+#define KUTF_F_TEST_DEPRECATED          ((unsigned int)(1 << 3))
+
+/**
+ * Class indicating that this test is a known failure.
+ * These tests have typically been run and failed, but marking them as a known
+ * failure means it is easier to triage results.
+ *
+ * It is typically more convenient to triage known failures using the
+ * results database and web UI, as this means there is no need to modify the
+ * test code.
+ */
+#define KUTF_F_TEST_EXPECTED_FAILURE    ((unsigned int)(1 << 4))
+
+/**
+ * Class indicating that this test is a generic test, which is not a member of
+ * a more specific test class. Tests which are not created with a specific set
+ * of filter flags by the user are assigned this test class by default.
+ */
+#define KUTF_F_TEST_GENERIC             ((unsigned int)(1 << 5))
+
+/**
+ * Class indicating this test is a resource allocation failure test.
+ * A resource allocation failure test will test that an error code is
+ * correctly propagated when an allocation fails.
+ */
+#define KUTF_F_TEST_RESFAIL             ((unsigned int)(1 << 6))
+
+/**
+ * Additional flag indicating that this test is an expected failure when
+ * run in resource failure mode. These tests are never run when running
+ * the low resource mode.
+ */
+#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7))
+
+/**
+ * Flag reserved for user-defined filter zero.
+ */
+#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24))
+
+/**
+ * Flag reserved for user-defined filter one.
+ */
+#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25))
+
+/**
+ * Flag reserved for user-defined filter two.
+ */
+#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26))
+
+/**
+ * Flag reserved for user-defined filter three.
+ */
+#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27))
+
+/**
+ * Flag reserved for user-defined filter four.
+ */
+#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28))
+
+/**
+ * Flag reserved for user-defined filter five.
+ */
+#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29))
+
+/**
+ * Flag reserved for user-defined filter six.
+ */
+#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30))
+
+/**
+ * Flag reserved for user-defined filter seven.
+ */
+#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31))
+
+/**
+ * Pseudo-flag indicating that all test classes should be executed.
+ */
+#define KUTF_F_TEST_ALL                 ((unsigned int)(0xFFFFFFFFU))
+
+/**
+ * union kutf_callback_data - Union used to store test callback data
+ * @ptr_value:		pointer to the location where test callback data
+ *                      are stored
+ * @u32_value:		a number which represents test callback data
+ */
+union kutf_callback_data {
+	void *ptr_value;
+	u32  u32_value;
+};
+
+/**
+ * struct kutf_userdata_line - A line of user data to be returned to the user
+ * @node:   struct list_head to link this into a list
+ * @str:    The line of user data to return to user space
+ * @size:   The number of bytes within @str
+ */
+struct kutf_userdata_line {
+	struct list_head node;
+	char *str;
+	size_t size;
+};
+
+/**
+ * KUTF_USERDATA_WARNING_OUTPUT - Flag specifying that a warning has been output
+ *
+ * If user space reads the "run" file while the test is waiting for user data,
+ * then the framework will output a warning message and set this flag within
+ * struct kutf_userdata. A subsequent read will then simply return an end of
+ * file condition rather than outputting the warning again. The upshot of this
+ * is that simply running 'cat' on a test which requires user data will produce
+ * the warning followed by 'cat' exiting due to EOF - which is much more user
+ * friendly than blocking indefinitely waiting for user data.
+ */
+#define KUTF_USERDATA_WARNING_OUTPUT  1
+
+/**
+ * struct kutf_userdata - Structure holding user data
+ * @flags:       See %KUTF_USERDATA_WARNING_OUTPUT
+ * @input_head:  List of struct kutf_userdata_line containing user data
+ *               to be read by the kernel space test.
+ * @input_waitq: Wait queue signalled when there is new user data to be
+ *               read by the kernel space test.
+ */
+struct kutf_userdata {
+	unsigned long flags;
+	struct list_head input_head;
+	wait_queue_head_t input_waitq;
+};
+
+/**
+ * struct kutf_context - Structure representing a kernel test context
+ * @kref:		Refcount for number of users of this context
+ * @suite:		Convenience pointer to the suite this context
+ *                      is running
+ * @test_fix:		The fixture that is being run in this context
+ * @fixture_pool:	The memory pool used for the duration of
+ *                      the fixture/text context.
+ * @fixture:		The user provided fixture structure.
+ * @fixture_index:	The index (id) of the current fixture.
+ * @fixture_name:	The name of the current fixture (or NULL if unnamed).
+ * @test_data:		Any user private data associated with this test
+ * @result_set:		All the results logged by this test context
+ * @status:		The status of the currently running fixture.
+ * @expected_status:	The expected status on exist of the currently
+ *                      running fixture.
+ * @work:		Work item to enqueue onto the work queue to run the test
+ * @userdata:		Structure containing the user data for the test to read
+ */
+struct kutf_context {
+	struct kref                     kref;
+	struct kutf_suite               *suite;
+	struct kutf_test_fixture        *test_fix;
+	struct kutf_mempool             fixture_pool;
+	void                            *fixture;
+	unsigned int                    fixture_index;
+	const char                      *fixture_name;
+	union kutf_callback_data        test_data;
+	struct kutf_result_set          *result_set;
+	enum kutf_result_status         status;
+	enum kutf_result_status         expected_status;
+
+	struct work_struct              work;
+	struct kutf_userdata            userdata;
+};
+
+/**
+ * struct kutf_suite - Structure representing a kernel test suite
+ * @app:			The application this suite belongs to.
+ * @name:			The name of this suite.
+ * @suite_data:			Any user private data associated with this
+ *                              suite.
+ * @create_fixture:		Function used to create a new fixture instance
+ * @remove_fixture:		Function used to destroy a new fixture instance
+ * @fixture_variants:		The number of variants (must be at least 1).
+ * @suite_default_flags:	Suite global filter flags which are set on
+ *                              all tests.
+ * @node:			List node for suite_list
+ * @dir:			The debugfs directory for this suite
+ * @test_list:			List head to store all the tests which are
+ *                              part of this suite
+ */
+struct kutf_suite {
+	struct kutf_application        *app;
+	const char                     *name;
+	union kutf_callback_data       suite_data;
+	void *(*create_fixture)(struct kutf_context *context);
+	void  (*remove_fixture)(struct kutf_context *context);
+	unsigned int                   fixture_variants;
+	unsigned int                   suite_default_flags;
+	struct list_head               node;
+	struct dentry                  *dir;
+	struct list_head               test_list;
+};
+
+/* ============================================================================
+	Application functions
+============================================================================ */
+
+/**
+ * kutf_create_application() - Create an in kernel test application.
+ * @name:	The name of the test application.
+ *
+ * Return: pointer to the kutf_application  on success or NULL
+ * on failure
+ */
+struct kutf_application *kutf_create_application(const char *name);
+
+/**
+ * kutf_destroy_application() - Destroy an in kernel test application.
+ *
+ * @app:	The test application to destroy.
+ */
+void kutf_destroy_application(struct kutf_application *app);
+
+/* ============================================================================
+	Suite functions
+============================================================================ */
+
+/**
+ * kutf_create_suite() - Create a kernel test suite.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *                      functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *                      is stored in the fixture pointer in the context for
+ *                      use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ *
+ * Suite names must be unique. Should two suites with the same name be
+ * registered with the same application then this function will fail, if they
+ * are registered with different applications then the function will not detect
+ * this and the call will succeed.
+ *
+ * Return: pointer to the created kutf_suite on success or NULL
+ * on failure
+ */
+struct kutf_suite *kutf_create_suite(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context));
+
+/**
+ * kutf_create_suite_with_filters() - Create a kernel test suite with user
+ *                                    defined default filters.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *                      functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *			is stored in the fixture pointer in the context for
+ *			use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ * @filters:		Filters to apply to a test if it doesn't provide its own
+ *
+ * Suite names must be unique. Should two suites with the same name be
+ * registered with the same application then this function will fail, if they
+ * are registered with different applications then the function will not detect
+ * this and the call will succeed.
+ *
+ * Return: pointer to the created kutf_suite on success or NULL on failure
+ */
+struct kutf_suite *kutf_create_suite_with_filters(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters);
+
+/**
+ * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with
+ *                                             user defined default filters.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *			functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *			is stored in the fixture pointer in the context for
+ *			use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ * @filters:		Filters to apply to a test if it doesn't provide its own
+ * @suite_data:		Suite specific callback data, provided during the
+ *			running of the test in the kutf_context
+ *
+ * Return: pointer to the created kutf_suite on success or NULL
+ * on failure
+ */
+struct kutf_suite *kutf_create_suite_with_filters_and_data(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data suite_data);
+
+/**
+ * kutf_add_test() - Add a test to a kernel test suite.
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ *
+ * Note: As no filters are provided the test will use the suite filters instead
+ */
+void kutf_add_test(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context));
+
+/**
+ * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ * @filters:	A set of filtering flags, assigning test categories.
+ */
+void kutf_add_test_with_filters(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters);
+
+/**
+ * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite
+ *					   with filters.
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ * @filters:	A set of filtering flags, assigning test categories.
+ * @test_data:	Test specific callback data, provided during the
+ *		running of the test in the kutf_context
+ */
+void kutf_add_test_with_filters_and_data(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data test_data);
+
+
+/* ============================================================================
+	Test functions
+============================================================================ */
+/**
+ * kutf_test_log_result_external() - Log a result which has been created
+ *                                   externally into a in a standard form
+ *                                   recognized by the log parser.
+ * @context:	The test context the test is running in
+ * @message:	The message for this result
+ * @new_status:	The result status of this log message
+ */
+void kutf_test_log_result_external(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status);
+
+/**
+ * kutf_test_expect_abort() - Tell the kernel that you expect the current
+ *                            fixture to produce an abort.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_abort(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_fatal() - Tell the kernel that you expect the current
+ *                            fixture to produce a fatal error.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_fatal(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_fail() - Tell the kernel that you expect the current
+ *                           fixture to fail.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_fail(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_warn() - Tell the kernel that you expect the current
+ *                           fixture to produce a warning.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_warn(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_pass() - Tell the kernel that you expect the current
+ *                           fixture to pass.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_pass(struct kutf_context *context);
+
+/**
+ * kutf_test_skip() - Tell the kernel that the test should be skipped.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_skip(struct kutf_context *context);
+
+/**
+ * kutf_test_skip_msg() - Tell the kernel that this test has been skipped,
+ *                        supplying a reason string.
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the reason for the skip.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a prebaked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_skip_msg(struct kutf_context *context, const char *message);
+
+/**
+ * kutf_test_pass() - Tell the kernel that this test has passed.
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the reason for the pass.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_pass(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_debug() - Send a debug message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the debug information.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_debug(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_info() - Send an information message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the information message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_info(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_warn() - Send a warning message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the warning message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_warn(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_fail() - Tell the kernel that a test has failed
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the failure message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_fail(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the fatal error message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_fatal(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test
+ *
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_abort(struct kutf_context *context);
+
+#endif	/* _KERNEL_UTF_SUITE_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h
new file mode 100755
index 0000000..25b8285
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_UTILS_H_
+#define _KERNEL_UTF_UTILS_H_
+
+/* kutf_utils.h
+ * Utilities for the kernel UTF test infrastructure.
+ *
+ * This collection of library functions are provided for use by kernel UTF
+ * and users of kernel UTF which don't directly fit within the other
+ * code modules.
+ */
+
+#include <kutf/kutf_mem.h>
+
+/**
+ * Maximum size of the message strings within kernel UTF, messages longer then
+ * this will be truncated.
+ */
+#define KUTF_MAX_DSPRINTF_LEN	1024
+
+/**
+ * kutf_dsprintf() - dynamic sprintf
+ * @pool:	memory pool to allocate from
+ * @fmt:	The format string describing the string to document.
+ * @...		The parameters to feed in to the format string.
+ *
+ * This function implements sprintf which dynamically allocates memory to store
+ * the string. The library will free the memory containing the string when the
+ * result set is cleared or destroyed.
+ *
+ * Note The returned string may be truncated to fit an internal temporary
+ * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length.
+ *
+ * Return: Returns pointer to allocated string, or NULL on error.
+ */
+const char *kutf_dsprintf(struct kutf_mempool *pool,
+		const char *fmt, ...);
+
+#endif	/* _KERNEL_UTF_UTILS_H_ */
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild
new file mode 100755
index 0000000..2531d41
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ccflags-y += -I$(src)/../include
+
+obj-$(CONFIG_MALI_KUTF) += kutf.o
+
+kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o kutf_helpers.o kutf_helpers_user.o
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig
new file mode 100755
index 0000000..0cdb474
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+config MALI_KUTF
+ tristate "Mali Kernel Unit Test Framework"
+ default m
+ help
+   Enables MALI testing framework. To compile it as a module,
+   choose M here - this will generate a single module called kutf.
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile
new file mode 100755
index 0000000..d848e87
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile
@@ -0,0 +1,35 @@
+#
+# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS=-I$(CURDIR)/../include modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp
new file mode 100755
index 0000000..f0c7a0c
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp
@@ -0,0 +1,33 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018-2019 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "kutf",
+    defaults: [
+        "kernel_defaults",
+        "kutf_includes",
+    ],
+    srcs: [
+        "Kbuild",
+        "kutf_helpers.c",
+        "kutf_helpers_user.c",
+        "kutf_mem.c",
+        "kutf_resultset.c",
+        "kutf_suite.c",
+        "kutf_utils.c",
+    ],
+    kbuild_options: ["CONFIG_MALI_KUTF=m"],
+    enabled: false,
+    base_build_kutf: {
+        enabled: true,
+    },
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c
new file mode 100755
index 0000000..cab5add
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF test helpers */
+#include <kutf/kutf_helpers.h>
+
+#include <linux/err.h>
+#include <linux/jiffies.h>
+#include <linux/sched.h>
+#include <linux/preempt.h>
+#include <linux/wait.h>
+#include <linux/uaccess.h>
+
+static DEFINE_SPINLOCK(kutf_input_lock);
+
+static bool pending_input(struct kutf_context *context)
+{
+	bool input_pending;
+
+	spin_lock(&kutf_input_lock);
+
+	input_pending = !list_empty(&context->userdata.input_head);
+
+	spin_unlock(&kutf_input_lock);
+
+	return input_pending;
+}
+
+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size)
+{
+	struct kutf_userdata_line *line;
+
+	spin_lock(&kutf_input_lock);
+
+	while (list_empty(&context->userdata.input_head)) {
+		int err;
+
+		kutf_set_waiting_for_input(context->result_set);
+
+		spin_unlock(&kutf_input_lock);
+
+		err = wait_event_interruptible(context->userdata.input_waitq,
+				pending_input(context));
+
+		if (err)
+			return ERR_PTR(-EINTR);
+
+		spin_lock(&kutf_input_lock);
+	}
+
+	line = list_first_entry(&context->userdata.input_head,
+			struct kutf_userdata_line, node);
+	if (line->str) {
+		/*
+		 * Unless it is the end-of-input marker,
+		 * remove it from the list
+		 */
+		list_del(&line->node);
+	}
+
+	spin_unlock(&kutf_input_lock);
+
+	if (str_size)
+		*str_size = line->size;
+	return line->str;
+}
+
+int kutf_helper_input_enqueue(struct kutf_context *context,
+		const char __user *str, size_t size)
+{
+	struct kutf_userdata_line *line;
+
+	line = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(*line) + size + 1);
+	if (!line)
+		return -ENOMEM;
+	if (str) {
+		unsigned long bytes_not_copied;
+
+		line->size = size;
+		line->str = (void *)(line + 1);
+		bytes_not_copied = copy_from_user(line->str, str, size);
+		if (bytes_not_copied != 0)
+			return -EFAULT;
+		/* Zero terminate the string */
+		line->str[size] = '\0';
+	} else {
+		/* This is used to mark the end of input */
+		WARN_ON(size);
+		line->size = 0;
+		line->str = NULL;
+	}
+
+	spin_lock(&kutf_input_lock);
+
+	list_add_tail(&line->node, &context->userdata.input_head);
+
+	kutf_clear_waiting_for_input(context->result_set);
+
+	spin_unlock(&kutf_input_lock);
+
+	wake_up(&context->userdata.input_waitq);
+
+	return 0;
+}
+
+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context)
+{
+	kutf_helper_input_enqueue(context, NULL, 0);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c
new file mode 100755
index 0000000..108fa82
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c
@@ -0,0 +1,468 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF test helpers that mirror those for kutf-userside */
+#include <kutf/kutf_helpers_user.h>
+#include <kutf/kutf_helpers.h>
+#include <kutf/kutf_utils.h>
+
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+const char *valtype_names[] = {
+	"INVALID",
+	"U64",
+	"STR",
+};
+
+static const char *get_val_type_name(enum kutf_helper_valtype valtype)
+{
+	/* enums can be signed or unsigned (implementation dependant), so
+	 * enforce it to prevent:
+	 * a) "<0 comparison on unsigned type" warning - if we did both upper
+	 *    and lower bound check
+	 * b) incorrect range checking if it was a signed type - if we did
+	 *    upper bound check only */
+	unsigned int type_idx = (unsigned int)valtype;
+
+	if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT)
+		type_idx = (unsigned int)KUTF_HELPER_VALTYPE_INVALID;
+
+	return valtype_names[type_idx];
+}
+
+/* Check up to str_len chars of val_str to see if it's a valid value name:
+ *
+ * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator
+ * - And, each char is in the character set [A-Z0-9_] */
+static int validate_val_name(const char *val_str, int str_len)
+{
+	int i = 0;
+
+	for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) {
+		char val_chr = val_str[i];
+
+		if (val_chr >= 'A' && val_chr <= 'Z')
+			continue;
+		if (val_chr >= '0' && val_chr <= '9')
+			continue;
+		if (val_chr == '_')
+			continue;
+
+		/* Character not in the set [A-Z0-9_] - report error */
+		return 1;
+	}
+
+	/* Names of 0 length are not valid */
+	if (i == 0)
+		return 1;
+	/* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */
+	if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'))
+		return 1;
+
+	return 0;
+}
+
+/* Find the length of the valid part of the string when it will be in quotes
+ * e.g. "str"
+ *
+ * That is, before any '\\', '\n' or '"' characters. This is so we don't have
+ * to escape the string */
+static int find_quoted_string_valid_len(const char *str)
+{
+	char *ptr;
+	const char *check_chars = "\\\n\"";
+
+	ptr = strpbrk(str, check_chars);
+	if (ptr)
+		return (int)(ptr-str);
+
+	return (int)strlen(str);
+}
+
+static int kutf_helper_userdata_enqueue(struct kutf_context *context,
+		const char *str)
+{
+	char *str_copy;
+	size_t len;
+	int err;
+
+	len = strlen(str)+1;
+
+	str_copy = kutf_mempool_alloc(&context->fixture_pool, len);
+	if (!str_copy)
+		return -ENOMEM;
+
+	strcpy(str_copy, str);
+
+	err = kutf_add_result(context, KUTF_RESULT_USERDATA, str_copy);
+
+	return err;
+}
+
+#define MAX_U64_HEX_LEN 16
+/* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */
+#define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1)
+
+int kutf_helper_send_named_u64(struct kutf_context *context,
+		const char *val_name, u64 val)
+{
+	int ret = 1;
+	char msgbuf[NAMED_U64_VAL_BUF_SZ];
+	const char *errmsg = NULL;
+
+	if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': Invalid value name", val_name);
+		goto out_err;
+	}
+
+	ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val);
+	if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d",
+				val_name, NAMED_U64_VAL_BUF_SZ, ret);
+		goto out_err;
+	}
+
+	ret = kutf_helper_userdata_enqueue(context, msgbuf);
+	if (ret) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': send returned %d",
+				val_name, ret);
+		goto out_err;
+	}
+
+	return ret;
+out_err:
+	kutf_test_fail(context, errmsg);
+	return ret;
+}
+EXPORT_SYMBOL(kutf_helper_send_named_u64);
+
+#define NAMED_VALUE_SEP "="
+#define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\""
+#define NAMED_STR_END_DELIM "\""
+
+int kutf_helper_max_str_len_for_kern(const char *val_name,
+		int kern_buf_sz)
+{
+	const int val_name_len = strlen(val_name);
+	const int start_delim_len = strlen(NAMED_STR_START_DELIM);
+	const int end_delim_len = strlen(NAMED_STR_END_DELIM);
+	int max_msg_len = kern_buf_sz;
+	int max_str_len;
+
+	max_str_len = max_msg_len - val_name_len - start_delim_len -
+		end_delim_len;
+
+	return max_str_len;
+}
+EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern);
+
+int kutf_helper_send_named_str(struct kutf_context *context,
+		const char *val_name,
+		const char *val_str)
+{
+	int val_str_len;
+	int str_buf_sz;
+	char *str_buf = NULL;
+	int ret = 1;
+	char *copy_ptr;
+	int val_name_len;
+	int start_delim_len = strlen(NAMED_STR_START_DELIM);
+	int end_delim_len = strlen(NAMED_STR_END_DELIM);
+	const char *errmsg = NULL;
+
+	if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': Invalid value name", val_name);
+		goto out_err;
+	}
+	val_name_len = strlen(val_name);
+
+	val_str_len = find_quoted_string_valid_len(val_str);
+
+	/* (name length) + ("=\"" length) + (val_str len) + ("\"" length) + terminator */
+	str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1;
+
+	/* Using kmalloc() here instead of mempool since we know we need to free
+	 * before we return */
+	str_buf = kmalloc(str_buf_sz, GFP_KERNEL);
+	if (!str_buf) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d",
+				val_name, str_buf_sz);
+		goto out_err;
+	}
+	copy_ptr = str_buf;
+
+	/* Manually copy each string component instead of snprintf because
+	 * val_str may need to end early, and less error path handling */
+
+	/* name */
+	memcpy(copy_ptr, val_name, val_name_len);
+	copy_ptr += val_name_len;
+
+	/* str start delimiter */
+	memcpy(copy_ptr, NAMED_STR_START_DELIM, start_delim_len);
+	copy_ptr += start_delim_len;
+
+	/* str value */
+	memcpy(copy_ptr, val_str, val_str_len);
+	copy_ptr += val_str_len;
+
+	/* str end delimiter */
+	memcpy(copy_ptr, NAMED_STR_END_DELIM, end_delim_len);
+	copy_ptr += end_delim_len;
+
+	/* Terminator */
+	*copy_ptr = '\0';
+
+	ret = kutf_helper_userdata_enqueue(context, str_buf);
+
+	if (ret) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send str value named '%s': send returned %d",
+				val_name, ret);
+		goto out_err;
+	}
+
+	kfree(str_buf);
+	return ret;
+
+out_err:
+	kutf_test_fail(context, errmsg);
+	kfree(str_buf);
+	return ret;
+}
+EXPORT_SYMBOL(kutf_helper_send_named_str);
+
+int kutf_helper_receive_named_val(
+		struct kutf_context *context,
+		struct kutf_helper_named_val *named_val)
+{
+	size_t recv_sz;
+	char *recv_str;
+	char *search_ptr;
+	char *name_str = NULL;
+	int name_len;
+	int strval_len;
+	enum kutf_helper_valtype type = KUTF_HELPER_VALTYPE_INVALID;
+	char *strval = NULL;
+	u64 u64val = 0;
+	int err = KUTF_HELPER_ERR_INVALID_VALUE;
+
+	recv_str = kutf_helper_input_dequeue(context, &recv_sz);
+	if (!recv_str)
+		return -EBUSY;
+	else if (IS_ERR(recv_str))
+		return PTR_ERR(recv_str);
+
+	/* Find the '=', grab the name and validate it */
+	search_ptr = strnchr(recv_str, recv_sz, NAMED_VALUE_SEP[0]);
+	if (search_ptr) {
+		name_len = search_ptr - recv_str;
+		if (!validate_val_name(recv_str, name_len)) {
+			/* no need to reallocate - just modify string in place */
+			name_str = recv_str;
+			name_str[name_len] = '\0';
+
+			/* Move until after the '=' */
+			recv_str += (name_len + 1);
+			recv_sz -= (name_len + 1);
+		}
+	}
+	if (!name_str) {
+		pr_err("Invalid name part for received string '%s'\n",
+				recv_str);
+		return KUTF_HELPER_ERR_INVALID_NAME;
+	}
+
+	/* detect value type */
+	if (*recv_str == NAMED_STR_START_DELIM[1]) {
+		/* string delimiter start*/
+		++recv_str;
+		--recv_sz;
+
+		/* Find end of string */
+		search_ptr = strnchr(recv_str, recv_sz, NAMED_STR_END_DELIM[0]);
+		if (search_ptr) {
+			strval_len = search_ptr - recv_str;
+			/* Validate the string to ensure it contains no quotes */
+			if (strval_len == find_quoted_string_valid_len(recv_str)) {
+				/* no need to reallocate - just modify string in place */
+				strval = recv_str;
+				strval[strval_len] = '\0';
+
+				/* Move until after the end delimiter */
+				recv_str += (strval_len + 1);
+				recv_sz -= (strval_len + 1);
+				type = KUTF_HELPER_VALTYPE_STR;
+			} else {
+				pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str);
+				err = KUTF_HELPER_ERR_CHARS_AFTER_VAL;
+			}
+		} else {
+			pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str);
+			err = KUTF_HELPER_ERR_NO_END_DELIMITER;
+		}
+	} else {
+		/* possibly a number value - strtoull will parse it */
+		err = kstrtoull(recv_str, 0, &u64val);
+		/* unlike userspace can't get an end ptr, but if kstrtoull()
+		 * reads characters after the number it'll report -EINVAL */
+		if (!err) {
+			int len_remain = strnlen(recv_str, recv_sz);
+
+			type = KUTF_HELPER_VALTYPE_U64;
+			recv_str += len_remain;
+			recv_sz -= len_remain;
+		} else {
+			/* special case: not a number, report as such */
+			pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str);
+		}
+	}
+
+	if (type == KUTF_HELPER_VALTYPE_INVALID)
+		return err;
+
+	/* Any remaining characters - error */
+	if (strnlen(recv_str, recv_sz) != 0) {
+		pr_err("Characters remain after value of type %s: '%s'\n",
+				get_val_type_name(type), recv_str);
+		return KUTF_HELPER_ERR_CHARS_AFTER_VAL;
+	}
+
+	/* Success - write into the output structure */
+	switch (type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		named_val->u.val_u64 = u64val;
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		named_val->u.val_str = strval;
+		break;
+	default:
+		pr_err("Unreachable, fix kutf_helper_receive_named_val\n");
+		/* Coding error, report as though 'run' file failed */
+		return -EINVAL;
+	}
+
+	named_val->val_name = name_str;
+	named_val->type = type;
+
+	return KUTF_HELPER_ERR_NONE;
+}
+EXPORT_SYMBOL(kutf_helper_receive_named_val);
+
+#define DUMMY_MSG "<placeholder due to test fail>"
+int kutf_helper_receive_check_val(
+		struct kutf_helper_named_val *named_val,
+		struct kutf_context *context,
+		const char *expect_val_name,
+		enum kutf_helper_valtype expect_val_type)
+{
+	int err;
+
+	err = kutf_helper_receive_named_val(context, named_val);
+	if (err < 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to receive value named '%s'",
+				expect_val_name);
+		kutf_test_fail(context, msg);
+		return err;
+	} else if (err > 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Named-value parse error when expecting value named '%s'",
+				expect_val_name);
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+	if (strcmp(named_val->val_name, expect_val_name) != 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Expecting to receive value named '%s' but got '%s'",
+				expect_val_name, named_val->val_name);
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+
+	if (named_val->type != expect_val_type) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Expecting value named '%s' to be of type %s but got %s",
+				expect_val_name, get_val_type_name(expect_val_type),
+				get_val_type_name(named_val->type));
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+	return err;
+
+out_fail_and_fixup:
+	/* Produce a valid but incorrect value */
+	switch (expect_val_type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		named_val->u.val_u64 = 0ull;
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		{
+			char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG));
+
+			if (!str)
+				return -1;
+
+			strcpy(str, DUMMY_MSG);
+			named_val->u.val_str = str;
+			break;
+		}
+	default:
+		break;
+	}
+
+	/* Indicate that this is invalid */
+	named_val->type = KUTF_HELPER_VALTYPE_INVALID;
+
+	/* But at least allow the caller to continue in the test with failures */
+	return 0;
+}
+EXPORT_SYMBOL(kutf_helper_receive_check_val);
+
+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val)
+{
+	switch (named_val->type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		pr_warn("%s=0x%llx\n", named_val->val_name, named_val->u.val_u64);
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		pr_warn("%s=\"%s\"\n", named_val->val_name, named_val->u.val_str);
+		break;
+	case KUTF_HELPER_VALTYPE_INVALID:
+		pr_warn("%s is invalid\n", named_val->val_name);
+		break;
+	default:
+		pr_warn("%s has unknown type %d\n", named_val->val_name, named_val->type);
+		break;
+	}
+}
+EXPORT_SYMBOL(kutf_helper_output_named_val);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c
new file mode 100755
index 0000000..fd98bea
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c
@@ -0,0 +1,108 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF memory management functions */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <kutf/kutf_mem.h>
+
+
+/**
+ * struct kutf_alloc_entry - Structure representing an allocation.
+ * @node:	List node for use with kutf_mempool.
+ * @data:	Data area of the allocation
+ */
+struct kutf_alloc_entry {
+	struct list_head node;
+	u8 data[0];
+};
+
+int kutf_mempool_init(struct kutf_mempool *pool)
+{
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		return -1;
+	}
+
+	INIT_LIST_HEAD(&pool->head);
+	mutex_init(&pool->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(kutf_mempool_init);
+
+void kutf_mempool_destroy(struct kutf_mempool *pool)
+{
+	struct list_head *remove;
+	struct list_head *tmp;
+
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		return;
+	}
+
+	mutex_lock(&pool->lock);
+	list_for_each_safe(remove, tmp, &pool->head) {
+		struct kutf_alloc_entry *remove_alloc;
+
+		remove_alloc = list_entry(remove, struct kutf_alloc_entry, node);
+		list_del(&remove_alloc->node);
+		kfree(remove_alloc);
+	}
+	mutex_unlock(&pool->lock);
+
+}
+EXPORT_SYMBOL(kutf_mempool_destroy);
+
+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size)
+{
+	struct kutf_alloc_entry *ret;
+
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		goto fail_pool;
+	}
+
+	mutex_lock(&pool->lock);
+
+	ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL);
+	if (!ret) {
+		pr_err("Failed to allocate memory\n");
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&ret->node);
+	list_add(&ret->node, &pool->head);
+
+	mutex_unlock(&pool->lock);
+
+	return &ret->data[0];
+
+fail_alloc:
+	mutex_unlock(&pool->lock);
+fail_pool:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_mempool_alloc);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c
new file mode 100755
index 0000000..94ecfa4
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF result management functions */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_resultset.h>
+
+/* Lock to protect all result structures */
+static DEFINE_SPINLOCK(kutf_result_lock);
+
+struct kutf_result_set *kutf_create_result_set(void)
+{
+	struct kutf_result_set *set;
+
+	set = kmalloc(sizeof(*set), GFP_KERNEL);
+	if (!set) {
+		pr_err("Failed to allocate resultset");
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&set->results);
+	init_waitqueue_head(&set->waitq);
+	set->flags = 0;
+
+	return set;
+
+fail_alloc:
+	return NULL;
+}
+
+int kutf_add_result(struct kutf_context *context,
+		enum kutf_result_status status,
+		const char *message)
+{
+	struct kutf_mempool *mempool = &context->fixture_pool;
+	struct kutf_result_set *set = context->result_set;
+	/* Create the new result */
+	struct kutf_result *new_result;
+
+	BUG_ON(set == NULL);
+
+	new_result = kutf_mempool_alloc(mempool, sizeof(*new_result));
+	if (!new_result) {
+		pr_err("Result allocation failed\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&new_result->node);
+	new_result->status = status;
+	new_result->message = message;
+
+	spin_lock(&kutf_result_lock);
+
+	list_add_tail(&new_result->node, &set->results);
+
+	spin_unlock(&kutf_result_lock);
+
+	wake_up(&set->waitq);
+
+	return 0;
+}
+
+void kutf_destroy_result_set(struct kutf_result_set *set)
+{
+	if (!list_empty(&set->results))
+		pr_err("kutf_destroy_result_set: Unread results from test\n");
+
+	kfree(set);
+}
+
+static bool kutf_has_result(struct kutf_result_set *set)
+{
+	bool has_result;
+
+	spin_lock(&kutf_result_lock);
+	if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT)
+		/* Pretend there are results if waiting for input */
+		has_result = true;
+	else
+		has_result = !list_empty(&set->results);
+	spin_unlock(&kutf_result_lock);
+
+	return has_result;
+}
+
+struct kutf_result *kutf_remove_result(struct kutf_result_set *set)
+{
+	struct kutf_result *result = NULL;
+	int ret;
+
+	do {
+		ret = wait_event_interruptible(set->waitq,
+				kutf_has_result(set));
+
+		if (ret)
+			return ERR_PTR(ret);
+
+		spin_lock(&kutf_result_lock);
+
+		if (!list_empty(&set->results)) {
+			result = list_first_entry(&set->results,
+					struct kutf_result,
+					node);
+			list_del(&result->node);
+		} else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) {
+			/* Return a fake result */
+			static struct kutf_result waiting = {
+				.status = KUTF_RESULT_USERDATA_WAIT
+			};
+			result = &waiting;
+		}
+		/* If result == NULL then there was a race with the event
+		 * being removed between the check in kutf_has_result and
+		 * the lock being obtained. In this case we retry
+		 */
+
+		spin_unlock(&kutf_result_lock);
+	} while (result == NULL);
+
+	return result;
+}
+
+void kutf_set_waiting_for_input(struct kutf_result_set *set)
+{
+	spin_lock(&kutf_result_lock);
+	set->flags |= KUTF_RESULT_SET_WAITING_FOR_INPUT;
+	spin_unlock(&kutf_result_lock);
+
+	wake_up(&set->waitq);
+}
+
+void kutf_clear_waiting_for_input(struct kutf_result_set *set)
+{
+	spin_lock(&kutf_result_lock);
+	set->flags &= ~KUTF_RESULT_SET_WAITING_FOR_INPUT;
+	spin_unlock(&kutf_result_lock);
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
new file mode 100755
index 0000000..f3a8e9b
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
@@ -0,0 +1,1203 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF suite, test and fixture management including user to kernel
+ * interaction */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+
+#include <generated/autoconf.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_resultset.h>
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_helpers.h>
+
+#if defined(CONFIG_DEBUG_FS)
+
+/**
+ * struct kutf_application - Structure which represents kutf application
+ * @name:	The name of this test application.
+ * @dir:	The debugfs directory for this test
+ * @suite_list:	List head to store all the suites which are part of this
+ *              application
+ */
+struct kutf_application {
+	const char         *name;
+	struct dentry      *dir;
+	struct list_head   suite_list;
+};
+
+/**
+ * struct kutf_test_function - Structure which represents kutf test function
+ * @suite:		Back reference to the suite this test function
+ *                      belongs to
+ * @filters:		Filters that apply to this test function
+ * @test_id:		Test ID
+ * @execute:		Function to run for this test
+ * @test_data:		Static data for this test
+ * @node:		List node for test_list
+ * @variant_list:	List head to store all the variants which can run on
+ *                      this function
+ * @dir:		debugfs directory for this test function
+ */
+struct kutf_test_function {
+	struct kutf_suite  *suite;
+	unsigned int       filters;
+	unsigned int       test_id;
+	void (*execute)(struct kutf_context *context);
+	union kutf_callback_data test_data;
+	struct list_head   node;
+	struct list_head   variant_list;
+	struct dentry      *dir;
+};
+
+/**
+ * struct kutf_test_fixture - Structure which holds information on the kutf
+ *                            test fixture
+ * @test_func:		Test function this fixture belongs to
+ * @fixture_index:	Index of this fixture
+ * @node:		List node for variant_list
+ * @dir:		debugfs directory for this test fixture
+ */
+struct kutf_test_fixture {
+	struct kutf_test_function *test_func;
+	unsigned int              fixture_index;
+	struct list_head          node;
+	struct dentry             *dir;
+};
+
+static struct dentry *base_dir;
+static struct workqueue_struct *kutf_workq;
+
+/**
+ * struct kutf_convert_table - Structure which keeps test results
+ * @result_name:	Status of the test result
+ * @result:		Status value for a single test
+ */
+struct kutf_convert_table {
+	char                    result_name[50];
+	enum kutf_result_status result;
+};
+
+struct kutf_convert_table kutf_convert[] = {
+#define ADD_UTF_RESULT(_name) \
+{ \
+	#_name, \
+	_name, \
+},
+ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK)
+ADD_UTF_RESULT(KUTF_RESULT_SKIP)
+ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN)
+ADD_UTF_RESULT(KUTF_RESULT_PASS)
+ADD_UTF_RESULT(KUTF_RESULT_DEBUG)
+ADD_UTF_RESULT(KUTF_RESULT_INFO)
+ADD_UTF_RESULT(KUTF_RESULT_WARN)
+ADD_UTF_RESULT(KUTF_RESULT_FAIL)
+ADD_UTF_RESULT(KUTF_RESULT_FATAL)
+ADD_UTF_RESULT(KUTF_RESULT_ABORT)
+};
+
+#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert))
+
+/**
+ * kutf_create_context() - Create a test context in which a specific fixture
+ *                         of an application will be run and its results
+ *                         reported back to the user
+ * @test_fix:	Test fixture to be run.
+ *
+ * The context's refcount will be initialized to 1.
+ *
+ * Return: Returns the created test context on success or NULL on failure
+ */
+static struct kutf_context *kutf_create_context(
+		struct kutf_test_fixture *test_fix);
+
+/**
+ * kutf_destroy_context() - Destroy a previously created test context, only
+ *                          once its refcount has become zero
+ * @kref:	pointer to kref member within the context
+ *
+ * This should only be used via a kref_put() call on the context's kref member
+ */
+static void kutf_destroy_context(struct kref *kref);
+
+/**
+ * kutf_context_get() - increment refcount on a context
+ * @context:	the kutf context
+ *
+ * This must be used when the lifetime of the context might exceed that of the
+ * thread creating @context
+ */
+static void kutf_context_get(struct kutf_context *context);
+
+/**
+ * kutf_context_put() - decrement refcount on a context, destroying it when it
+ *                      reached zero
+ * @context:	the kutf context
+ *
+ * This must be used only after a corresponding kutf_context_get() call on
+ * @context, and the caller no longer needs access to @context.
+ */
+static void kutf_context_put(struct kutf_context *context);
+
+/**
+ * kutf_set_result() - Set the test result against the specified test context
+ * @context:	Test context
+ * @status:	Result status
+ */
+static void kutf_set_result(struct kutf_context *context,
+		enum kutf_result_status status);
+
+/**
+ * kutf_set_expected_result() - Set the expected test result for the specified
+ *                              test context
+ * @context:		Test context
+ * @expected_status:	Expected result status
+ */
+static void kutf_set_expected_result(struct kutf_context *context,
+		enum kutf_result_status expected_status);
+
+/**
+ * kutf_result_to_string() - Converts a KUTF result into a string
+ * @result_str:      Output result string
+ * @result:          Result status to convert
+ *
+ * Return: 1 if test result was successfully converted to string, 0 otherwise
+ */
+static int kutf_result_to_string(char **result_str,
+		enum kutf_result_status result)
+{
+	int i;
+	int ret = 0;
+
+	for (i = 0; i < UTF_CONVERT_SIZE; i++) {
+		if (result == kutf_convert[i].result) {
+			*result_str = kutf_convert[i].result_name;
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+/**
+ * kutf_debugfs_const_string_read() - Simple debugfs read callback which
+ *                                    returns a constant string
+ * @file:	Opened file to read from
+ * @buf:	User buffer to write the data into
+ * @len:	Amount of data to read
+ * @ppos:	Offset into file to read from
+ *
+ * Return: On success, the number of bytes read and offset @ppos advanced by
+ *         this number; on error, negative value
+ */
+static ssize_t kutf_debugfs_const_string_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	char *str = file->private_data;
+
+	return simple_read_from_buffer(buf, len, ppos, str, strlen(str));
+}
+
+static const struct file_operations kutf_debugfs_const_string_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = kutf_debugfs_const_string_read,
+	.llseek  = default_llseek,
+};
+
+/**
+ * kutf_add_explicit_result() - Check if an explicit result needs to be added
+ * @context:	KUTF test context
+ */
+static void kutf_add_explicit_result(struct kutf_context *context)
+{
+	switch (context->expected_status) {
+	case KUTF_RESULT_UNKNOWN:
+		break;
+
+	case KUTF_RESULT_WARN:
+		if (context->status == KUTF_RESULT_WARN)
+			kutf_test_pass(context,
+					"Pass (expected warn occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected warn missing)");
+		break;
+
+	case KUTF_RESULT_FAIL:
+		if (context->status == KUTF_RESULT_FAIL)
+			kutf_test_pass(context,
+					"Pass (expected fail occurred)");
+		else if (context->status != KUTF_RESULT_SKIP) {
+			/* Force the expected status so the fail gets logged */
+			context->expected_status = KUTF_RESULT_PASS;
+			kutf_test_fail(context,
+					"Fail (expected fail missing)");
+		}
+		break;
+
+	case KUTF_RESULT_FATAL:
+		if (context->status == KUTF_RESULT_FATAL)
+			kutf_test_pass(context,
+					"Pass (expected fatal occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected fatal missing)");
+		break;
+
+	case KUTF_RESULT_ABORT:
+		if (context->status == KUTF_RESULT_ABORT)
+			kutf_test_pass(context,
+					"Pass (expected abort occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected abort missing)");
+		break;
+	default:
+		break;
+	}
+}
+
+static void kutf_run_test(struct work_struct *data)
+{
+	struct kutf_context *test_context = container_of(data,
+			struct kutf_context, work);
+	struct kutf_suite *suite = test_context->suite;
+	struct kutf_test_function *test_func;
+
+	test_func = test_context->test_fix->test_func;
+
+	/*
+	 * Call the create fixture function if required before the
+	 * fixture is run
+	 */
+	if (suite->create_fixture)
+		test_context->fixture = suite->create_fixture(test_context);
+
+	/* Only run the test if the fixture was created (if required) */
+	if ((suite->create_fixture && test_context->fixture) ||
+			(!suite->create_fixture)) {
+		/* Run this fixture */
+		test_func->execute(test_context);
+
+		if (suite->remove_fixture)
+			suite->remove_fixture(test_context);
+
+		kutf_add_explicit_result(test_context);
+	}
+
+	kutf_add_result(test_context, KUTF_RESULT_TEST_FINISHED, NULL);
+
+	kutf_context_put(test_context);
+}
+
+/**
+ * kutf_debugfs_run_open() Debugfs open callback for the "run" entry.
+ * @inode:	inode of the opened file
+ * @file:	Opened file to read from
+ *
+ * This function creates a KUTF context and queues it onto a workqueue to be
+ * run asynchronously. The resulting file descriptor can be used to communicate
+ * userdata to the test and to read back the results of the test execution.
+ *
+ * Return: 0 on success
+ */
+static int kutf_debugfs_run_open(struct inode *inode, struct file *file)
+{
+	struct kutf_test_fixture *test_fix = inode->i_private;
+	struct kutf_context *test_context;
+	int err = 0;
+
+	test_context = kutf_create_context(test_fix);
+	if (!test_context) {
+		err = -ENOMEM;
+		goto finish;
+	}
+
+	file->private_data = test_context;
+
+	/* This reference is release by the kutf_run_test */
+	kutf_context_get(test_context);
+
+	queue_work(kutf_workq, &test_context->work);
+
+finish:
+	return err;
+}
+
+#define USERDATA_WARNING_MESSAGE "WARNING: This test requires userdata\n"
+
+/**
+ * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry.
+ * @file:	Opened file to read from
+ * @buf:	User buffer to write the data into
+ * @len:	Amount of data to read
+ * @ppos:	Offset into file to read from
+ *
+ * This function emits the results of the test, blocking until they are
+ * available.
+ *
+ * If the test involves user data then this will also return user data records
+ * to user space. If the test is waiting for user data then this function will
+ * output a message (to make the likes of 'cat' display it), followed by
+ * returning 0 to mark the end of file.
+ *
+ * Results will be emitted one at a time, once all the results have been read
+ * 0 will be returned to indicate there is no more data.
+ *
+ * Return: Number of bytes read.
+ */
+static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct kutf_context *test_context = file->private_data;
+	struct kutf_result *res;
+	unsigned long bytes_not_copied;
+	ssize_t bytes_copied = 0;
+	char *kutf_str_ptr = NULL;
+	size_t kutf_str_len = 0;
+	size_t message_len = 0;
+	char separator = ':';
+	char terminator = '\n';
+
+	res = kutf_remove_result(test_context->result_set);
+
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	/*
+	 * Handle 'fake' results - these results are converted to another
+	 * form before being returned from the kernel
+	 */
+	switch (res->status) {
+	case KUTF_RESULT_TEST_FINISHED:
+		return 0;
+	case KUTF_RESULT_USERDATA_WAIT:
+		if (test_context->userdata.flags &
+				KUTF_USERDATA_WARNING_OUTPUT) {
+			/*
+			 * Warning message already output,
+			 * signal end-of-file
+			 */
+			return 0;
+		}
+
+		message_len = sizeof(USERDATA_WARNING_MESSAGE)-1;
+		if (message_len > len)
+			message_len = len;
+
+		bytes_not_copied = copy_to_user(buf,
+				USERDATA_WARNING_MESSAGE,
+				message_len);
+		if (bytes_not_copied != 0)
+			return -EFAULT;
+		test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT;
+		return message_len;
+	case KUTF_RESULT_USERDATA:
+		message_len = strlen(res->message);
+		if (message_len > len-1) {
+			message_len = len-1;
+			pr_warn("User data truncated, read not long enough\n");
+		}
+		bytes_not_copied = copy_to_user(buf, res->message,
+				message_len);
+		if (bytes_not_copied != 0) {
+			pr_warn("Failed to copy data to user space buffer\n");
+			return -EFAULT;
+		}
+		/* Finally the terminator */
+		bytes_not_copied = copy_to_user(&buf[message_len],
+				&terminator, 1);
+		if (bytes_not_copied != 0) {
+			pr_warn("Failed to copy data to user space buffer\n");
+			return -EFAULT;
+		}
+		return message_len+1;
+	default:
+		/* Fall through - this is a test result */
+		break;
+	}
+
+	/* Note: This code assumes a result is read completely */
+	kutf_result_to_string(&kutf_str_ptr, res->status);
+	if (kutf_str_ptr)
+		kutf_str_len = strlen(kutf_str_ptr);
+
+	if (res->message)
+		message_len = strlen(res->message);
+
+	if ((kutf_str_len + 1 + message_len + 1) > len) {
+		pr_err("Not enough space in user buffer for a single result");
+		return 0;
+	}
+
+	/* First copy the result string */
+	if (kutf_str_ptr) {
+		bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr,
+						kutf_str_len);
+		bytes_copied += kutf_str_len - bytes_not_copied;
+		if (bytes_not_copied)
+			goto exit;
+	}
+
+	/* Then the separator */
+	bytes_not_copied = copy_to_user(&buf[bytes_copied],
+					&separator, 1);
+	bytes_copied += 1 - bytes_not_copied;
+	if (bytes_not_copied)
+		goto exit;
+
+	/* Finally Next copy the result string */
+	if (res->message) {
+		bytes_not_copied = copy_to_user(&buf[bytes_copied],
+						res->message, message_len);
+		bytes_copied += message_len - bytes_not_copied;
+		if (bytes_not_copied)
+			goto exit;
+	}
+
+	/* Finally the terminator */
+	bytes_not_copied = copy_to_user(&buf[bytes_copied],
+					&terminator, 1);
+	bytes_copied += 1 - bytes_not_copied;
+
+exit:
+	return bytes_copied;
+}
+
+/**
+ * kutf_debugfs_run_write() Debugfs write callback for the "run" entry.
+ * @file:	Opened file to write to
+ * @buf:	User buffer to read the data from
+ * @len:	Amount of data to write
+ * @ppos:	Offset into file to write to
+ *
+ * This function allows user and kernel to exchange extra data necessary for
+ * the test fixture.
+ *
+ * The data is added to the first struct kutf_context running the fixture
+ *
+ * Return: Number of bytes written
+ */
+static ssize_t kutf_debugfs_run_write(struct file *file,
+		const char __user *buf, size_t len, loff_t *ppos)
+{
+	int ret = 0;
+	struct kutf_context *test_context = file->private_data;
+
+	if (len > KUTF_MAX_LINE_LENGTH)
+		return -EINVAL;
+
+	ret = kutf_helper_input_enqueue(test_context, buf, len);
+	if (ret < 0)
+		return ret;
+
+	return len;
+}
+
+/**
+ * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry.
+ * @inode:	File entry representation
+ * @file:	A specific opening of the file
+ *
+ * Release any resources that were created during the opening of the file
+ *
+ * Note that resources may not be released immediately, that might only happen
+ * later when other users of the kutf_context release their refcount.
+ *
+ * Return: 0 on success
+ */
+static int kutf_debugfs_run_release(struct inode *inode, struct file *file)
+{
+	struct kutf_context *test_context = file->private_data;
+
+	kutf_helper_input_enqueue_end_of_data(test_context);
+
+	kutf_context_put(test_context);
+	return 0;
+}
+
+static const struct file_operations kutf_debugfs_run_ops = {
+	.owner = THIS_MODULE,
+	.open = kutf_debugfs_run_open,
+	.read = kutf_debugfs_run_read,
+	.write = kutf_debugfs_run_write,
+	.release = kutf_debugfs_run_release,
+	.llseek  = default_llseek,
+};
+
+/**
+ * create_fixture_variant() - Creates a fixture variant for the specified
+ *                            test function and index and the debugfs entries
+ *                            that represent it.
+ * @test_func:		Test function
+ * @fixture_index:	Fixture index
+ *
+ * Return: 0 on success, negative value corresponding to error code in failure
+ */
+static int create_fixture_variant(struct kutf_test_function *test_func,
+		unsigned int fixture_index)
+{
+	struct kutf_test_fixture *test_fix;
+	char name[11];	/* Enough to print the MAX_UINT32 + the null terminator */
+	struct dentry *tmp;
+	int err;
+
+	test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL);
+	if (!test_fix) {
+		pr_err("Failed to create debugfs directory when adding fixture\n");
+		err = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	test_fix->test_func = test_func;
+	test_fix->fixture_index = fixture_index;
+
+	snprintf(name, sizeof(name), "%d", fixture_index);
+	test_fix->dir = debugfs_create_dir(name, test_func->dir);
+	if (!test_func->dir) {
+		pr_err("Failed to create debugfs directory when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_dir;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_file;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	tmp = debugfs_create_file_unsafe(
+#else
+	tmp = debugfs_create_file(
+#endif
+			"run", 0600, test_fix->dir,
+			test_fix,
+			&kutf_debugfs_run_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"run\" when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_file;
+	}
+
+	list_add(&test_fix->node, &test_func->variant_list);
+	return 0;
+
+fail_file:
+	debugfs_remove_recursive(test_fix->dir);
+fail_dir:
+	kfree(test_fix);
+fail_alloc:
+	return err;
+}
+
+/**
+ * kutf_remove_test_variant() - Destroy a previously created fixture variant.
+ * @test_fix:	Test fixture
+ */
+static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix)
+{
+	debugfs_remove_recursive(test_fix->dir);
+	kfree(test_fix);
+}
+
+void kutf_add_test_with_filters_and_data(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data test_data)
+{
+	struct kutf_test_function *test_func;
+	struct dentry *tmp;
+	unsigned int i;
+
+	test_func = kmalloc(sizeof(*test_func), GFP_KERNEL);
+	if (!test_func) {
+		pr_err("Failed to allocate memory when adding test %s\n", name);
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&test_func->variant_list);
+
+	test_func->dir = debugfs_create_dir(name, suite->dir);
+	if (!test_func->dir) {
+		pr_err("Failed to create debugfs directory when adding test %s\n", name);
+		goto fail_dir;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	test_func->filters = filters;
+	tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir,
+				 &test_func->filters);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	test_func->test_id = id;
+	tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir,
+				 &test_func->test_id);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	for (i = 0; i < suite->fixture_variants; i++) {
+		if (create_fixture_variant(test_func, i)) {
+			pr_err("Failed to create fixture %d when adding test %s\n", i, name);
+			goto fail_file;
+		}
+	}
+
+	test_func->suite = suite;
+	test_func->execute = execute;
+	test_func->test_data = test_data;
+
+	list_add(&test_func->node, &suite->test_list);
+	return;
+
+fail_file:
+	debugfs_remove_recursive(test_func->dir);
+fail_dir:
+	kfree(test_func);
+fail_alloc:
+	return;
+}
+EXPORT_SYMBOL(kutf_add_test_with_filters_and_data);
+
+void kutf_add_test_with_filters(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters)
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+
+	kutf_add_test_with_filters_and_data(suite,
+					    id,
+					    name,
+					    execute,
+					    suite->suite_default_flags,
+					    data);
+}
+EXPORT_SYMBOL(kutf_add_test_with_filters);
+
+void kutf_add_test(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context))
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+
+	kutf_add_test_with_filters_and_data(suite,
+					    id,
+					    name,
+					    execute,
+					    suite->suite_default_flags,
+					    data);
+}
+EXPORT_SYMBOL(kutf_add_test);
+
+/**
+ * kutf_remove_test(): Remove a previously added test function.
+ * @test_func: Test function
+ */
+static void kutf_remove_test(struct kutf_test_function *test_func)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &test_func->variant_list) {
+		struct kutf_test_fixture *test_fix;
+
+		test_fix = list_entry(pos, struct kutf_test_fixture, node);
+		kutf_remove_test_variant(test_fix);
+	}
+
+	list_del(&test_func->node);
+	debugfs_remove_recursive(test_func->dir);
+	kfree(test_func);
+}
+
+struct kutf_suite *kutf_create_suite_with_filters_and_data(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data suite_data)
+{
+	struct kutf_suite *suite;
+	struct dentry *tmp;
+
+	suite = kmalloc(sizeof(*suite), GFP_KERNEL);
+	if (!suite) {
+		pr_err("Failed to allocate memory when creating suite %s\n", name);
+		goto fail_kmalloc;
+	}
+
+	suite->dir = debugfs_create_dir(name, app->dir);
+	if (!suite->dir) {
+		pr_err("Failed to create debugfs directory when adding test %s\n", name);
+		goto fail_debugfs;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	INIT_LIST_HEAD(&suite->test_list);
+	suite->app = app;
+	suite->name = name;
+	suite->fixture_variants = fixture_count;
+	suite->create_fixture = create_fixture;
+	suite->remove_fixture = remove_fixture;
+	suite->suite_default_flags = filters;
+	suite->suite_data = suite_data;
+
+	list_add(&suite->node, &app->suite_list);
+
+	return suite;
+
+fail_file:
+	debugfs_remove_recursive(suite->dir);
+fail_debugfs:
+	kfree(suite);
+fail_kmalloc:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data);
+
+struct kutf_suite *kutf_create_suite_with_filters(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters)
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+	return kutf_create_suite_with_filters_and_data(app,
+						       name,
+						       fixture_count,
+						       create_fixture,
+						       remove_fixture,
+						       filters,
+						       data);
+}
+EXPORT_SYMBOL(kutf_create_suite_with_filters);
+
+struct kutf_suite *kutf_create_suite(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context))
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+	return kutf_create_suite_with_filters_and_data(app,
+						       name,
+						       fixture_count,
+						       create_fixture,
+						       remove_fixture,
+						       KUTF_F_TEST_GENERIC,
+						       data);
+}
+EXPORT_SYMBOL(kutf_create_suite);
+
+/**
+ * kutf_destroy_suite() - Destroy a previously added test suite.
+ * @suite:	Test suite
+ */
+static void kutf_destroy_suite(struct kutf_suite *suite)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &suite->test_list) {
+		struct kutf_test_function *test_func;
+
+		test_func = list_entry(pos, struct kutf_test_function, node);
+		kutf_remove_test(test_func);
+	}
+
+	list_del(&suite->node);
+	debugfs_remove_recursive(suite->dir);
+	kfree(suite);
+}
+
+struct kutf_application *kutf_create_application(const char *name)
+{
+	struct kutf_application *app;
+	struct dentry *tmp;
+
+	app = kmalloc(sizeof(*app), GFP_KERNEL);
+	if (!app) {
+		pr_err("Failed to create allocate memory when creating application %s\n", name);
+		goto fail_kmalloc;
+	}
+
+	app->dir = debugfs_create_dir(name, base_dir);
+	if (!app->dir) {
+		pr_err("Failed to create debugfs direcotry when creating application %s\n", name);
+		goto fail_debugfs;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name);
+		goto fail_file;
+	}
+
+	INIT_LIST_HEAD(&app->suite_list);
+	app->name = name;
+
+	return app;
+
+fail_file:
+	debugfs_remove_recursive(app->dir);
+fail_debugfs:
+	kfree(app);
+fail_kmalloc:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_create_application);
+
+void kutf_destroy_application(struct kutf_application *app)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &app->suite_list) {
+		struct kutf_suite *suite;
+
+		suite = list_entry(pos, struct kutf_suite, node);
+		kutf_destroy_suite(suite);
+	}
+
+	debugfs_remove_recursive(app->dir);
+	kfree(app);
+}
+EXPORT_SYMBOL(kutf_destroy_application);
+
+static struct kutf_context *kutf_create_context(
+		struct kutf_test_fixture *test_fix)
+{
+	struct kutf_context *new_context;
+
+	new_context = kmalloc(sizeof(*new_context), GFP_KERNEL);
+	if (!new_context) {
+		pr_err("Failed to allocate test context");
+		goto fail_alloc;
+	}
+
+	new_context->result_set = kutf_create_result_set();
+	if (!new_context->result_set) {
+		pr_err("Failed to create result set");
+		goto fail_result_set;
+	}
+
+	new_context->test_fix = test_fix;
+	/* Save the pointer to the suite as the callbacks will require it */
+	new_context->suite = test_fix->test_func->suite;
+	new_context->status = KUTF_RESULT_UNKNOWN;
+	new_context->expected_status = KUTF_RESULT_UNKNOWN;
+
+	kutf_mempool_init(&new_context->fixture_pool);
+	new_context->fixture = NULL;
+	new_context->fixture_index = test_fix->fixture_index;
+	new_context->fixture_name = NULL;
+	new_context->test_data = test_fix->test_func->test_data;
+
+	new_context->userdata.flags = 0;
+	INIT_LIST_HEAD(&new_context->userdata.input_head);
+	init_waitqueue_head(&new_context->userdata.input_waitq);
+
+	INIT_WORK(&new_context->work, kutf_run_test);
+
+	kref_init(&new_context->kref);
+
+	return new_context;
+
+fail_result_set:
+	kfree(new_context);
+fail_alloc:
+	return NULL;
+}
+
+static void kutf_destroy_context(struct kref *kref)
+{
+	struct kutf_context *context;
+
+	context = container_of(kref, struct kutf_context, kref);
+	kutf_destroy_result_set(context->result_set);
+	kutf_mempool_destroy(&context->fixture_pool);
+	kfree(context);
+}
+
+static void kutf_context_get(struct kutf_context *context)
+{
+	kref_get(&context->kref);
+}
+
+static void kutf_context_put(struct kutf_context *context)
+{
+	kref_put(&context->kref, kutf_destroy_context);
+}
+
+
+static void kutf_set_result(struct kutf_context *context,
+		enum kutf_result_status status)
+{
+	context->status = status;
+}
+
+static void kutf_set_expected_result(struct kutf_context *context,
+		enum kutf_result_status expected_status)
+{
+	context->expected_status = expected_status;
+}
+
+/**
+ * kutf_test_log_result() - Log a result for the specified test context
+ * @context:	Test context
+ * @message:	Result string
+ * @new_status:	Result status
+ */
+static void kutf_test_log_result(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status)
+{
+	if (context->status < new_status)
+		context->status = new_status;
+
+	if (context->expected_status != new_status)
+		kutf_add_result(context, new_status, message);
+}
+
+void kutf_test_log_result_external(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status)
+{
+	kutf_test_log_result(context, message, new_status);
+}
+EXPORT_SYMBOL(kutf_test_log_result_external);
+
+void kutf_test_expect_abort(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_ABORT);
+}
+EXPORT_SYMBOL(kutf_test_expect_abort);
+
+void kutf_test_expect_fatal(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_FATAL);
+}
+EXPORT_SYMBOL(kutf_test_expect_fatal);
+
+void kutf_test_expect_fail(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_FAIL);
+}
+EXPORT_SYMBOL(kutf_test_expect_fail);
+
+void kutf_test_expect_warn(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_WARN);
+}
+EXPORT_SYMBOL(kutf_test_expect_warn);
+
+void kutf_test_expect_pass(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_PASS);
+}
+EXPORT_SYMBOL(kutf_test_expect_pass);
+
+void kutf_test_skip(struct kutf_context *context)
+{
+	kutf_set_result(context, KUTF_RESULT_SKIP);
+	kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN);
+
+	kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP);
+}
+EXPORT_SYMBOL(kutf_test_skip);
+
+void kutf_test_skip_msg(struct kutf_context *context, const char *message)
+{
+	kutf_set_result(context, KUTF_RESULT_SKIP);
+	kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN);
+
+	kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool,
+			     "Test skipped: %s", message), KUTF_RESULT_SKIP);
+	kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP);
+}
+EXPORT_SYMBOL(kutf_test_skip_msg);
+
+void kutf_test_debug(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_DEBUG);
+}
+EXPORT_SYMBOL(kutf_test_debug);
+
+void kutf_test_pass(struct kutf_context *context, char const *message)
+{
+	static const char explicit_message[] = "(explicit pass)";
+
+	if (!message)
+		message = explicit_message;
+
+	kutf_test_log_result(context, message, KUTF_RESULT_PASS);
+}
+EXPORT_SYMBOL(kutf_test_pass);
+
+void kutf_test_info(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_INFO);
+}
+EXPORT_SYMBOL(kutf_test_info);
+
+void kutf_test_warn(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_WARN);
+}
+EXPORT_SYMBOL(kutf_test_warn);
+
+void kutf_test_fail(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_FAIL);
+}
+EXPORT_SYMBOL(kutf_test_fail);
+
+void kutf_test_fatal(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_FATAL);
+}
+EXPORT_SYMBOL(kutf_test_fatal);
+
+void kutf_test_abort(struct kutf_context *context)
+{
+	kutf_test_log_result(context, "", KUTF_RESULT_ABORT);
+}
+EXPORT_SYMBOL(kutf_test_abort);
+
+/**
+ * init_kutf_core() - Module entry point.
+ *
+ * Create the base entry point in debugfs.
+ */
+static int __init init_kutf_core(void)
+{
+	kutf_workq = alloc_workqueue("kutf workq", WQ_UNBOUND, 1);
+	if (!kutf_workq)
+		return -ENOMEM;
+
+	base_dir = debugfs_create_dir("kutf_tests", NULL);
+	if (!base_dir) {
+		destroy_workqueue(kutf_workq);
+		kutf_workq = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * exit_kutf_core() - Module exit point.
+ *
+ * Remove the base entry point in debugfs.
+ */
+static void __exit exit_kutf_core(void)
+{
+	debugfs_remove_recursive(base_dir);
+
+	if (kutf_workq)
+		destroy_workqueue(kutf_workq);
+}
+
+#else	/* defined(CONFIG_DEBUG_FS) */
+
+/**
+ * init_kutf_core() - Module entry point.
+ *
+ * Stub for when build against a kernel without debugfs support
+ */
+static int __init init_kutf_core(void)
+{
+	pr_debug("KUTF requires a kernel with debug fs support");
+
+	return -ENODEV;
+}
+
+/**
+ * exit_kutf_core() - Module exit point.
+ *
+ * Stub for when build against a kernel without debugfs support
+ */
+static void __exit exit_kutf_core(void)
+{
+}
+#endif	/* defined(CONFIG_DEBUG_FS) */
+
+MODULE_LICENSE("GPL");
+
+module_init(init_kutf_core);
+module_exit(exit_kutf_core);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c
new file mode 100755
index 0000000..7f5ac51
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF utility functions */
+
+#include <linux/mutex.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_mem.h>
+
+static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN];
+
+DEFINE_MUTEX(buffer_lock);
+
+const char *kutf_dsprintf(struct kutf_mempool *pool,
+		const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	int size;
+	void *buffer;
+
+	mutex_lock(&buffer_lock);
+	va_start(args, fmt);
+	len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args);
+	va_end(args);
+
+	if (len < 0) {
+		pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt);
+		goto fail_format;
+	}
+
+	if (len >= sizeof(tmp_buffer)) {
+		pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt);
+		size = sizeof(tmp_buffer);
+	} else {
+		size = len + 1;
+	}
+
+	buffer = kutf_mempool_alloc(pool, size);
+	if (!buffer)
+		goto fail_alloc;
+
+	memcpy(buffer, tmp_buffer, size);
+	mutex_unlock(&buffer_lock);
+
+	return buffer;
+
+fail_alloc:
+fail_format:
+	mutex_unlock(&buffer_lock);
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_dsprintf);
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild
new file mode 100755
index 0000000..ca8c512
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android
+
+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o
+
+mali_kutf_irq_test-y := mali_kutf_irq_test_main.o
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig
new file mode 100755
index 0000000..4a3863a
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig
@@ -0,0 +1,29 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+config MALI_IRQ_LATENCY
+ tristate "Mali GPU IRQ latency measurement"
+ depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF
+ default m
+ help
+   This option will build a test module mali_kutf_irq_test that
+   can determine the latency of the Mali GPU IRQ on your system.
+   Choosing M here will generate a single module called mali_kutf_irq_test.
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
new file mode 100755
index 0000000..9218a40
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2015, 2017-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+TEST_CCFLAGS := \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_USE_CSF=$(MALI_USE_CSF) \
+	$(SCONS_CFLAGS) \
+	-I$(CURDIR)/../include \
+	-I$(CURDIR)/../../../../../../include \
+	-I$(CURDIR)/../../../ \
+	-I$(CURDIR)/../../ \
+	-I$(CURDIR)/../../backend/gpu \
+	-I$(CURDIR)/ \
+	-I$(srctree)/drivers/staging/android \
+	-I$(srctree)/include/linux
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS="$(TEST_CCFLAGS)" KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../kutf/Module.symvers $(CURDIR)/../../Module.symvers" modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
new file mode 100755
index 0000000..971f092
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
@@ -0,0 +1,32 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018-2019 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "mali_kutf_irq_test",
+    defaults: [
+        "mali_kbase_shared_config_defaults",
+        "kernel_test_includes",
+    ],
+    srcs: [
+        "Kbuild",
+        "mali_kutf_irq_test_main.c",
+    ],
+    extra_symbols: [
+        "mali_kbase",
+        "kutf",
+    ],
+    enabled: false,
+    base_build_kutf: {
+        enabled: true,
+        kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"],
+    },
+}
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
new file mode 100755
index 0000000..4181b7f
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -0,0 +1,273 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include "mali_kbase.h"
+#include <midgard/backend/gpu/mali_kbase_device_internal.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_utils.h>
+
+/*
+ * This file contains the code which is used for measuring interrupt latency
+ * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is
+ * used with this purpose and it is called within KUTF framework - a kernel
+ * unit test framework. The measured latency provided by this test should
+ * be representative for the latency of the Mali JOB/MMU IRQs as well.
+ */
+
+/* KUTF test application pointer for this test */
+struct kutf_application *irq_app;
+
+/**
+ * struct kutf_irq_fixture data - test fixture used by the test functions.
+ * @kbdev:	kbase device for the GPU.
+ *
+ */
+struct kutf_irq_fixture_data {
+	struct kbase_device *kbdev;
+};
+
+#define SEC_TO_NANO(s)	      ((s)*1000000000LL)
+
+/* ID for the GPU IRQ */
+#define GPU_IRQ_HANDLER 2
+
+#define NR_TEST_IRQS 1000000
+
+/* IRQ for the test to trigger. Currently MULTIPLE_GPU_FAULTS as we would not
+ * expect to see this in normal use (e.g., when Android is running). */
+#define TEST_IRQ MULTIPLE_GPU_FAULTS
+
+#define IRQ_TIMEOUT HZ
+
+/* Kernel API for setting irq throttle hook callback and irq time in us*/
+extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+		irq_handler_t custom_handler,
+		int irq_type);
+extern irqreturn_t kbase_gpu_irq_handler(int irq, void *data);
+
+static DECLARE_WAIT_QUEUE_HEAD(wait);
+static bool triggered;
+static u64 irq_time;
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+/**
+ * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler
+ * @irq:  IRQ number
+ * @data: Data associated with this IRQ
+ *
+ * Return: state of the IRQ
+ */
+static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data)
+{
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS));
+	if (val & TEST_IRQ) {
+		struct timespec tval;
+
+		getnstimeofday(&tval);
+		irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
+
+		triggered = true;
+		wake_up(&wait);
+
+		return IRQ_HANDLED;
+	}
+
+	/* Trigger main irq handler */
+	return kbase_gpu_irq_handler(irq, data);
+}
+
+/**
+ * mali_kutf_irq_default_create_fixture() - Creates the fixture data required
+ *                                          for all the tests in the irq suite.
+ * @context:             KUTF context.
+ *
+ * Return: Fixture data created on success or NULL on failure
+ */
+static void *mali_kutf_irq_default_create_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data;
+
+	data = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(struct kutf_irq_fixture_data));
+
+	if (!data)
+		goto fail;
+
+	/* Acquire the kbase device */
+	data->kbdev = kbase_find_device(-1);
+	if (data->kbdev == NULL) {
+		kutf_test_fail(context, "Failed to find kbase device");
+		goto fail;
+	}
+
+	return data;
+
+fail:
+	return NULL;
+}
+
+/**
+ * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously
+ *                          created by mali_kutf_irq_default_create_fixture.
+ *
+ * @context:             KUTF context.
+ */
+static void mali_kutf_irq_default_remove_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+
+	kbase_release_device(kbdev);
+}
+
+/**
+ * mali_kutf_irq_latency() - measure GPU IRQ latency
+ * @context:		kutf context within which to perform the test
+ *
+ * The test triggers IRQs manually, and measures the
+ * time between triggering the IRQ and the IRQ handler being executed.
+ *
+ * This is not a traditional test, in that the pass/fail status has little
+ * meaning (other than indicating that the IRQ handler executed at all). Instead
+ * the results are in the latencies provided with the test result. There is no
+ * meaningful pass/fail result that can be obtained here, instead the latencies
+ * are provided for manual analysis only.
+ */
+static void mali_kutf_irq_latency(struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+	u64 min_time = U64_MAX, max_time = 0, average_time = 0;
+	int i;
+	bool test_failed = false;
+
+	/* Force GPU to be powered */
+	kbase_pm_context_active(kbdev);
+
+	kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler,
+			GPU_IRQ_HANDLER);
+
+	for (i = 0; i < NR_TEST_IRQS; i++) {
+		struct timespec tval;
+		u64 start_time;
+		int ret;
+
+		triggered = false;
+		getnstimeofday(&tval);
+		start_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
+
+		/* Trigger fake IRQ */
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+				TEST_IRQ);
+
+		ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT);
+
+		if (ret == 0) {
+			kutf_test_fail(context, "Timed out waiting for IRQ\n");
+			test_failed = true;
+			break;
+		}
+
+		if ((irq_time - start_time) < min_time)
+			min_time = irq_time - start_time;
+		if ((irq_time - start_time) > max_time)
+			max_time = irq_time - start_time;
+		average_time += irq_time - start_time;
+
+		udelay(10);
+	}
+
+	/* Go back to default handler */
+	kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER);
+
+	kbase_pm_context_idle(kbdev);
+
+	if (!test_failed) {
+		const char *results;
+
+		do_div(average_time, NR_TEST_IRQS);
+		results = kutf_dsprintf(&context->fixture_pool,
+				"Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n",
+				min_time, max_time, average_time);
+		kutf_test_pass(context, results);
+	}
+}
+
+/**
+ * Module entry point for this test.
+ */
+int mali_kutf_irq_test_main_init(void)
+{
+	struct kutf_suite *suite;
+
+	irq_app = kutf_create_application("irq");
+
+	if (NULL == irq_app) {
+		pr_warn("Creation of test application failed!\n");
+		return -ENOMEM;
+	}
+
+	suite = kutf_create_suite(irq_app, "irq_default",
+			1, mali_kutf_irq_default_create_fixture,
+			mali_kutf_irq_default_remove_fixture);
+
+	if (NULL == suite) {
+		pr_warn("Creation of test suite failed!\n");
+		kutf_destroy_application(irq_app);
+		return -ENOMEM;
+	}
+
+	kutf_add_test(suite, 0x0, "irq_latency",
+			mali_kutf_irq_latency);
+	return 0;
+}
+
+/**
+ * Module exit point for this test.
+ */
+void mali_kutf_irq_test_main_exit(void)
+{
+	kutf_destroy_application(irq_app);
+}
+
+module_init(mali_kutf_irq_test_main_init);
+module_exit(mali_kutf_irq_test_main_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
new file mode 100755
index 0000000..f266d8e
--- /dev/null
+++ b/bifrost/r19p0/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
@@ -0,0 +1,366 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ *//*
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "linux/mman.h"
+#include "../mali_kbase.h"
+
+/* mali_kbase_mmap.c
+ *
+ * This file contains Linux specific implementation of
+ * kbase_context_get_unmapped_area() interface.
+ */
+
+
+/**
+ * align_and_check() - Align the specified pointer to the provided alignment and
+ *                     check that it is still in range.
+ * @gap_end:        Highest possible start address for allocation (end of gap in
+ *                  address space)
+ * @gap_start:      Start address of current memory area / gap in address space
+ * @info:           vm_unmapped_area_info structure passed to caller, containing
+ *                  alignment, length and limits for the allocation
+ * @is_shader_code: True if the allocation is for shader code (which has
+ *                  additional alignment requirements)
+ * @is_same_4gb_page: True if the allocation needs to reside completely within
+ *                    a 4GB chunk
+ *
+ * Return: true if gap_end is now aligned correctly and is still in range,
+ *         false otherwise
+ */
+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
+		struct vm_unmapped_area_info *info, bool is_shader_code,
+		bool is_same_4gb_page)
+{
+	/* Compute highest gap address at the desired alignment */
+	(*gap_end) -= info->length;
+	(*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
+
+	if (is_shader_code) {
+		/* Check for 4GB boundary */
+		if (0 == (*gap_end & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+		if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+
+		if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
+				info->length) & BASE_MEM_MASK_4GB))
+			return false;
+	} else if (is_same_4gb_page) {
+		unsigned long start = *gap_end;
+		unsigned long end = *gap_end + info->length;
+		unsigned long mask = ~((unsigned long)U32_MAX);
+
+		/* Check if 4GB boundary is straddled */
+		if ((start & mask) != ((end - 1) & mask)) {
+			unsigned long offset = end - (end & mask);
+			/* This is to ensure that alignment doesn't get
+			 * disturbed in an attempt to prevent straddling at
+			 * 4GB boundary. The GPU VA is aligned to 2MB when the
+			 * allocation size is > 2MB and there is enough CPU &
+			 * GPU virtual space.
+			 */
+			unsigned long rounded_offset =
+					ALIGN(offset, info->align_mask + 1);
+
+			start -= rounded_offset;
+			end -= rounded_offset;
+
+			*gap_end = start;
+
+			/* The preceding 4GB boundary shall not get straddled,
+			 * even after accounting for the alignment, as the
+			 * size of allocation is limited to 4GB and the initial
+			 * start location was already aligned.
+			 */
+			WARN_ON((start & mask) != ((end - 1) & mask));
+		}
+	}
+
+
+	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
+		return false;
+
+
+	return true;
+}
+
+/**
+ * kbase_unmapped_area_topdown() - allocates new areas top-down from
+ *                                 below the stack limit.
+ * @info:              Information about the memory area to allocate.
+ * @is_shader_code:    Boolean which denotes whether the allocated area is
+ *                      intended for the use by shader core in which case a
+ *                      special alignment requirements apply.
+ * @is_same_4gb_page: Boolean which indicates whether the allocated area needs
+ *                    to reside completely within a 4GB chunk.
+ *
+ * The unmapped_area_topdown() function in the Linux kernel is not exported
+ * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a
+ * module and also make use of the fact that some of the requirements for
+ * the unmapped area are known in advance, we implemented an extended version
+ * of this function and prefixed it with 'kbase_'.
+ *
+ * The difference in the call parameter list comes from the fact that
+ * kbase_unmapped_area_topdown() is called with additional parameters which
+ * are provided to indicate whether the allocation is for a shader core memory,
+ * which has additional alignment requirements, and whether the allocation can
+ * straddle a 4GB boundary.
+ *
+ * The modification of the original Linux function lies in how the computation
+ * of the highest gap address at the desired alignment is performed once the
+ * gap with desirable properties is found. For this purpose a special function
+ * is introduced (@ref align_and_check()) which beside computing the gap end
+ * at the desired alignment also performs additional alignment checks for the
+ * case when the memory is executable shader core memory, for which it is
+ * ensured that the gap does not end on a 4GB boundary, and for the case when
+ * memory needs to be confined within a 4GB chunk.
+ *
+ * Return: address of the found gap end (high limit) if area is found;
+ *         -ENOMEM if search is unsuccessful
+*/
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info, bool is_shader_code, bool is_same_4gb_page)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit) {
+		if (align_and_check(&gap_end, gap_start, info,
+				is_shader_code, is_same_4gb_page))
+			return gap_end;
+	}
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length) {
+			/* We found a suitable gap. Clip it with the original
+			 * high_limit. */
+			if (gap_end > info->high_limit)
+				gap_end = info->high_limit;
+
+			if (align_and_check(&gap_end, gap_start, info,
+					is_shader_code, is_same_4gb_page))
+				return gap_end;
+		}
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+	return -ENOMEM;
+}
+
+
+/* This function is based on Linux kernel's arch_get_unmapped_area, but
+ * simplified slightly. Modifications come from the fact that some values
+ * about the memory area are known in advance.
+ */
+unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+	unsigned long align_offset = 0;
+	unsigned long align_mask = 0;
+	unsigned long high_limit = mm->mmap_base;
+	unsigned long low_limit = PAGE_SIZE;
+	int cpu_va_bits = BITS_PER_LONG;
+	int gpu_pc_bits =
+	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	bool is_shader_code = false;
+	bool is_same_4gb_page = false;
+	unsigned long ret;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+#ifdef CONFIG_64BIT
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+
+		high_limit = min_t(unsigned long, mm->mmap_base,
+				(kctx->same_va_end << PAGE_SHIFT));
+
+		/* If there's enough (> 33 bits) of GPU VA space, align
+		 * to 2MB boundaries.
+		 */
+		if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
+			if (len >= SZ_2M) {
+				align_offset = SZ_2M;
+				align_mask = SZ_2M - 1;
+			}
+		}
+
+		low_limit = SZ_2M;
+	} else {
+		cpu_va_bits = 32;
+	}
+#endif /* CONFIG_64BIT */
+	if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
+		(PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
+			int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+			struct kbase_va_region *reg;
+
+			/* Need to hold gpu vm lock when using reg */
+			kbase_gpu_vm_lock(kctx);
+			reg = kctx->pending_regions[cookie];
+			if (!reg) {
+				kbase_gpu_vm_unlock(kctx);
+				return -EINVAL;
+			}
+			if (!(reg->flags & KBASE_REG_GPU_NX)) {
+				if (cpu_va_bits > gpu_pc_bits) {
+					align_offset = 1ULL << gpu_pc_bits;
+					align_mask = align_offset - 1;
+					is_shader_code = true;
+				}
+			} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
+				unsigned long extent_bytes =
+				     (unsigned long)(reg->extent << PAGE_SHIFT);
+				/* kbase_check_alloc_sizes() already satisfies
+				 * these checks, but they're here to avoid
+				 * maintenance hazards due to the assumptions
+				 * involved */
+				WARN_ON(reg->extent > (ULONG_MAX >> PAGE_SHIFT));
+				WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT));
+				WARN_ON(!is_power_of_2(extent_bytes));
+				align_mask = extent_bytes - 1;
+				align_offset =
+				      extent_bytes - (reg->initial_commit << PAGE_SHIFT);
+			} else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
+				is_same_4gb_page = true;
+			}
+			kbase_gpu_vm_unlock(kctx);
+#ifndef CONFIG_64BIT
+	} else {
+		return current->mm->get_unmapped_area(
+			kctx->filp, addr, len, pgoff, flags);
+#endif
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = low_limit;
+	info.high_limit = high_limit;
+	info.align_offset = align_offset;
+	info.align_mask = align_mask;
+
+	ret = kbase_unmapped_area_topdown(&info, is_shader_code,
+			is_same_4gb_page);
+
+	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
+			high_limit < (kctx->same_va_end << PAGE_SHIFT)) {
+		/* Retry above mmap_base */
+		info.low_limit = mm->mmap_base;
+		info.high_limit = min_t(u64, TASK_SIZE,
+					(kctx->same_va_end << PAGE_SHIFT));
+
+		ret = kbase_unmapped_area_topdown(&info, is_shader_code,
+				is_same_4gb_page);
+	}
+
+	return ret;
+}
diff --git a/bifrost/r19p0/kernel/include/linux/dma-buf-test-exporter.h b/bifrost/r19p0/kernel/include/linux/dma-buf-test-exporter.h
new file mode 100755
index 0000000..95bc6f8
--- /dev/null
+++ b/bifrost/r19p0/kernel/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/bifrost/r19p0/kernel/include/linux/memory_group_manager.h b/bifrost/r19p0/kernel/include/linux/memory_group_manager.h
new file mode 100644
index 0000000..b1ac253
--- /dev/null
+++ b/bifrost/r19p0/kernel/include/linux/memory_group_manager.h
@@ -0,0 +1,198 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _MEMORY_GROUP_MANAGER_H_
+#define _MEMORY_GROUP_MANAGER_H_
+
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/version.h>
+
+#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE)
+typedef int vm_fault_t;
+#endif
+
+#define MEMORY_GROUP_MANAGER_NR_GROUPS (16)
+
+struct memory_group_manager_device;
+struct memory_group_manager_import_data;
+
+/**
+ * struct memory_group_manager_ops - Callbacks for memory group manager
+ *                                   operations
+ *
+ * @mgm_alloc_page:           Callback to allocate physical memory in a group
+ * @mgm_free_page:            Callback to free physical memory in a group
+ * @mgm_get_import_memory_id: Callback to get the group ID for imported memory
+ * @mgm_update_gpu_pte:       Callback to modify a GPU page table entry
+ * @mgm_vmf_insert_pfn_prot:  Callback to map a physical memory page for the CPU
+ */
+struct memory_group_manager_ops {
+	/**
+	 * mgm_alloc_page - Allocate a physical memory page in a group
+	 *
+	 * @mgm_dev:  The memory group manager through which the request is
+	 *            being made.
+	 * @group_id: A physical memory group ID. The meaning of this is defined
+	 *            by the systems integrator. Its valid range is
+	 *            0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+	 * @gfp_mask: Bitmask of Get Free Page flags affecting allocator
+	 *            behavior.
+	 * @order:    Page order for physical page size (order=0 means 4 KiB,
+	 *            order=9 means 2 MiB).
+	 *
+	 * Return: Pointer to allocated page, or NULL if allocation failed.
+	 */
+	struct page *(*mgm_alloc_page)(
+		struct memory_group_manager_device *mgm_dev, int group_id,
+		gfp_t gfp_mask, unsigned int order);
+
+	/**
+	 * mgm_free_page - Free a physical memory page in a group
+	 *
+	 * @mgm_dev:  The memory group manager through which the request
+	 *            is being made.
+	 * @group_id: A physical memory group ID. The meaning of this is
+	 *            defined by the systems integrator. Its valid range is
+	 *            0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+	 * @page:     Address of the struct associated with a page of physical
+	 *            memory that was allocated by calling the mgm_alloc_page
+	 *            method of the same memory pool with the same values of
+	 *            @group_id and @order.
+	 * @order:    Page order for physical page size (order=0 means 4 KiB,
+	 *            order=9 means 2 MiB).
+	 */
+	void (*mgm_free_page)(
+		struct memory_group_manager_device *mgm_dev, int group_id,
+		struct page *page, unsigned int order);
+
+	/**
+	 * mgm_get_import_memory_id - Get the physical memory group ID for the
+	 *                            imported memory
+	 *
+	 * @mgm_dev:     The memory group manager through which the request
+	 *               is being made.
+	 * @import_data: Pointer to the data which describes imported memory.
+	 *
+	 * Note that provision of this call back is optional, where it is not
+	 * provided this call back pointer must be set to NULL to indicate it
+	 * is not in use.
+	 *
+	 * Return: The memory group ID to use when mapping pages from this
+	 *         imported memory.
+	 */
+	int (*mgm_get_import_memory_id)(
+		struct memory_group_manager_device *mgm_dev,
+		struct memory_group_manager_import_data *import_data);
+
+	/**
+	 * mgm_update_gpu_pte - Modify a GPU page table entry for a memory group
+	 *
+	 * @mgm_dev:   The memory group manager through which the request
+	 *             is being made.
+	 * @group_id:  A physical memory group ID. The meaning of this is
+	 *             defined by the systems integrator. Its valid range is
+	 *             0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+	 * @mmu_level: The level of the page table entry in @ate.
+	 * @pte:       The page table entry to modify, in LPAE or AArch64 format
+	 *             (depending on the driver's configuration). This should be
+	 *             decoded to determine the physical address and any other
+	 *             properties of the mapping the manager requires.
+	 *
+	 * This function allows the memory group manager to modify a GPU page
+	 * table entry before it is stored by the kbase module (controller
+	 * driver). It may set certain bits in the page table entry attributes
+	 * or in the physical address, based on the physical memory group ID.
+	 *
+	 * Return: A modified GPU page table entry to be stored in a page table.
+	 */
+	u64 (*mgm_update_gpu_pte)(struct memory_group_manager_device *mgm_dev,
+			int group_id, int mmu_level, u64 pte);
+
+	/**
+	 * mgm_vmf_insert_pfn_prot - Map a physical page in a group for the CPU
+	 *
+	 * @mgm_dev:   The memory group manager through which the request
+	 *             is being made.
+	 * @group_id:  A physical memory group ID. The meaning of this is
+	 *             defined by the systems integrator. Its valid range is
+	 *             0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+	 * @vma:       The virtual memory area to insert the page into.
+	 * @addr:      A virtual address (in @vma) to assign to the page.
+	 * @pfn:       The kernel Page Frame Number to insert at @addr in @vma.
+	 * @pgprot:    Protection flags for the inserted page.
+	 *
+	 * Called from a CPU virtual memory page fault handler. This function
+	 * creates a page table entry from the given parameter values and stores
+	 * it at the appropriate location (unlike mgm_update_gpu_pte, which
+	 * returns a modified entry).
+	 *
+	 * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page
+	 *         table entry was successfully installed.
+	 */
+	vm_fault_t (*mgm_vmf_insert_pfn_prot)(
+		struct memory_group_manager_device *mgm_dev, int group_id,
+		struct vm_area_struct *vma, unsigned long addr,
+		unsigned long pfn, pgprot_t pgprot);
+};
+
+/**
+ * struct memory_group_manager_device - Device structure for a memory group
+ *                                      manager
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * In order for a systems integrator to provide custom behaviors for memory
+ * operations performed by the kbase module (controller driver), they must
+ * provide a platform-specific driver module which implements this interface.
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct memory_group_manager_device {
+	struct memory_group_manager_ops ops;
+	void *data;
+	struct module *owner;
+};
+
+
+enum memory_group_manager_import_type {
+	MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF
+};
+
+/**
+ * struct memory_group_manager_import_data - Structure describing the imported
+ *                                           memory
+ *
+ * @type  - type of imported memory
+ * @u     - Union describing the imported memory
+ *
+ */
+struct memory_group_manager_import_data {
+	enum memory_group_manager_import_type type;
+	union {
+		struct dma_buf *dma_buf;
+	} u;
+};
+
+#endif /* _MEMORY_GROUP_MANAGER_H_ */
diff --git a/bifrost/r19p0/kernel/include/linux/protected_mode_switcher.h b/bifrost/r19p0/kernel/include/linux/protected_mode_switcher.h
new file mode 100755
index 0000000..8778d81
--- /dev/null
+++ b/bifrost/r19p0/kernel/include/linux/protected_mode_switcher.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _PROTECTED_MODE_SWITCH_H_
+#define _PROTECTED_MODE_SWITCH_H_
+
+struct protected_mode_device;
+
+/**
+ * struct protected_mode_ops - Callbacks for protected mode switch operations
+ *
+ * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_disable: Callback to disable protected mode for device
+ */
+struct protected_mode_ops {
+	/**
+	 * protected_mode_enable() - Enable protected mode on device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enable)(
+			struct protected_mode_device *protected_dev);
+
+	/**
+	 * protected_mode_disable() - Disable protected mode on device, and
+	 *                            reset device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_disable)(
+			struct protected_mode_device *protected_dev);
+};
+
+/**
+ * struct protected_mode_device - Device structure for protected mode devices
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_mode_device {
+	struct protected_mode_ops ops;
+	void *data;
+};
+
+#endif /* _PROTECTED_MODE_SWITCH_H_ */
diff --git a/bifrost/r19p0/kernel/license.txt b/bifrost/r19p0/kernel/license.txt
new file mode 100755
index 0000000..77c14bd
--- /dev/null
+++ b/bifrost/r19p0/kernel/license.txt
@@ -0,0 +1,198 @@
+GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE
+
+THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE.
+
+
+
+Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form.
+
+
+
+GPL Licence
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+Version 2, June 1991
+
+
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+
+
+Preamble
+
+
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
+
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program).
+
+Whether that is true depends on what the Program does.
+
+
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty;
+
+and give any other recipients of the Program a copy of this License along with the Program.
+
+
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the
+
+Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein.
+
+You are not responsible for enforcing compliance by third parties to this License.
+
+
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+
+
+NO WARRANTY
+
+
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+/end
+
diff --git a/bifrost/r19p0/kernel/patches/trusted_firmware_gpu_coherency_toggle.patch b/bifrost/r19p0/kernel/patches/trusted_firmware_gpu_coherency_toggle.patch
new file mode 100755
index 0000000..985d423
--- /dev/null
+++ b/bifrost/r19p0/kernel/patches/trusted_firmware_gpu_coherency_toggle.patch
@@ -0,0 +1,117 @@
+commit 5f0a5507eb6c9ed3745b89a1246bb7a7b6141857
+Author: Petter Lundanes <petter.lundanes@arm.com>
+Date:   Mon Feb 9 14:37:06 2015 +0100
+
+    Adds a service to enable/disable GPU cache snooping
+    
+    Adds a service to enable/disable cache snooping over
+    CCI for GPU on request from SMC (mali kernel driver).
+
+diff --git a/bl31/bl31.mk b/bl31/bl31.mk
+index 4c25a60..cd0b530 100644
+--- a/bl31/bl31.mk
++++ b/bl31/bl31.mk
+@@ -50,7 +50,8 @@ BL31_SOURCES		+=	bl31/bl31_main.c				\
+ 				services/std_svc/psci/psci_helpers.S		\
+ 				services/std_svc/psci/psci_main.c		\
+ 				services/std_svc/psci/psci_setup.c		\
+-				services/std_svc/psci/psci_system_off.c
++				services/std_svc/psci/psci_system_off.c \
++				services/sips/arm/css/svc.c
+ 
+ ifeq (${USE_COHERENT_MEM}, 1)
+ BL31_SOURCES		+=	lib/locks/bakery/bakery_lock_coherent.c
+diff --git a/services/sips/arm/css/svc.c b/services/sips/arm/css/svc.c
+new file mode 100644
+index 0000000..bbc63a4
+--- /dev/null
++++ b/services/sips/arm/css/svc.c
+@@ -0,0 +1,88 @@
++#include <debug.h>
++#include <runtime_svc.h>
++#include <stdint.h>
++#include <cci400.h>
++#include <uuid.h>
++#include <mmio.h>
++
++
++/* This should be updated to the CCI port the GPU is connect to on your platform.*/
++#define GPU_CCI_PORT 3U
++
++/*
++ * See SMC calling convention for details.
++ * Bit 31 - 1 = Fast call.
++ * Bits 29:24 - 0x2 = SIP service call.
++ * Bits 15:00 - Function number.
++ *
++ * These must correspond to the IDs given in device tree for the device driver.
++ */
++#define SMC_FID_ENABLE_COHERENCY (0x8200ff04)
++#define SMC_FID_DISABLE_COHERENCY (0x8200ff05)
++
++/* Setup CSS Services */
++static int32_t css_svc_setup(void)
++{
++	return 0;
++}
++
++static void cci_enable_gpu_coherency(unsigned int cciport)
++{
++	/* Enable Snoops */
++	mmio_write_32(CCI400_BASE + SLAVE_IFACE_OFFSET(cciport) + SNOOP_CTRL_REG,
++		      (SNOOP_EN_BIT));
++
++	/* Wait for the dust to settle down */
++	while (mmio_read_32(CCI400_BASE + STATUS_REG) & CHANGE_PENDING_BIT)
++		;
++}
++static void cci_disable_gpu_coherency(unsigned int cciport)
++{
++	/* Disable Snoops */
++	mmio_write_32(CCI400_BASE + SLAVE_IFACE_OFFSET(cciport) + SNOOP_CTRL_REG,
++		      0U);
++
++	/* Wait for the dust to settle down */
++	while (mmio_read_32(CCI400_BASE + STATUS_REG) & CHANGE_PENDING_BIT)
++		;
++}
++
++/*
++ * Top-level CSS Service SMC handler.
++ */
++uint64_t css_svc_smc_handler(uint32_t smc_fid,
++			     uint64_t x1,
++			     uint64_t x2,
++			     uint64_t x3,
++			     uint64_t x4,
++			     void *cookie,
++			     void *handle,
++			     uint64_t flags)
++{
++	switch (smc_fid) {
++	case SMC_FID_ENABLE_COHERENCY:
++	{
++		cci_enable_gpu_coherency(GPU_CCI_PORT);
++		SMC_RET1(handle, 0x0);
++	}
++	case SMC_FID_DISABLE_COHERENCY:
++	{
++		cci_disable_gpu_coherency(GPU_CCI_PORT);
++		SMC_RET1(handle, 0x0);
++	}
++	default:
++		WARN("Unimplemented CSS Service Call: 0x%x \n", smc_fid);
++		SMC_RET1(handle, SMC_UNK);
++	}
++}
++
++/* Register as runtime service */
++DECLARE_RT_SVC(
++		css_svc,
++
++		OEN_SIP_START,
++		OEN_SIP_END,
++		SMC_TYPE_FAST,
++		css_svc_setup,
++		css_svc_smc_handler
++);
diff --git a/bifrost/r9p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/bifrost/r9p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100644
index 0000000..4698e5c
--- /dev/null
+++ b/bifrost/r9p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,141 @@
+#
+# (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points-v2 : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets the power model parameters. Two power models are currently
+  defined which include "mali-simple-power-model" and "mali-g71-power-model".
+	- mali-simple-power-model: this model derives the GPU power usage based
+	  on the GPU voltage scaled by the system temperature. Note: it was
+	  designed for the Juno platform, and may not be suitable for others.
+		- compatible: Should be "arm,mali-simple-power-model"
+		- dynamic-coefficient: Coefficient, in pW/(Hz V^2), which is
+		  multiplied by v^2*f to calculate the dynamic power consumption.
+		- static-coefficient: Coefficient, in uW/V^3, which is
+		  multiplied by v^3 to calculate the static power consumption.
+		- ts: An array containing coefficients for the temperature
+		  scaling factor. This is used to scale the static power by a
+		  factor of tsf/1000000,
+		  where tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0],
+		  and T = temperature in degrees.
+		- thermal-zone: A string identifying the thermal zone used for
+		  the GPU
+		- temp-poll-interval-ms: the interval at which the system
+		  temperature is polled
+	- mali-g71-power-model: this model derives the GPU power usage based on
+	  performance counters, so is more accurate.
+		- compatible: Should be "arm,mali-g71-power-model"
+		- scale: the dynamic power calculated by the power model is
+		  scaled by a factor of "scale"/1000. This value should be
+		  chosen to match a particular implementation.
+	* Note: the kernel could use either of the 2 power models (simple and
+	  counter-based) at different points so care should be taken to configure
+	  both power models in the device tree (specifically dynamic-coefficient,
+	  static-coefficient and scale) to best match the platform.
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+- ipa-model : Sets the IPA model to be used for power management. GPU probe will fail if the
+	      model is not found in the registered models list. If no model is specified here,
+	      a gpu-id based model is picked if available, otherwise the default model is used.
+	- mali-simple-power-model: Default model used on mali
+- protected-mode-switcher : Phandle to device implemented protected mode switching functionality.
+Refer to Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt for one implementation.
+
+Example for a Mali GPU:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points-v2 = <&gpu_opp_table>;
+	power_model@0 {
+		compatible = "arm,mali-simple-power-model";
+		static-coefficient = <2427750>;
+		dynamic-coefficient = <4687>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+	power_model@1 {
+		compatible = "arm,mali-g71-power-model";
+		scale = <5>;
+	};
+};
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2";
+
+	opp@533000000 {
+		opp-hz = /bits/ 64 <533000000>;
+		opp-microvolt = <1250000>;
+	};
+	opp@450000000 {
+		opp-hz = /bits/ 64 <450000000>;
+		opp-microvolt = <1150000>;
+	};
+	opp@400000000 {
+		opp-hz = /bits/ 64 <400000000>;
+		opp-microvolt = <1125000>;
+	};
+	opp@350000000 {
+		opp-hz = /bits/ 64 <350000000>;
+		opp-microvolt = <1075000>;
+	};
+	opp@266000000 {
+		opp-hz = /bits/ 64 <266000000>;
+		opp-microvolt = <1025000>;
+	};
+	opp@160000000 {
+		opp-hz = /bits/ 64 <160000000>;
+		opp-microvolt = <925000>;
+	};
+	opp@100000000 {
+		opp-hz = /bits/ 64 <100000000>;
+		opp-microvolt = <912500>;
+	};
+};
diff --git a/bifrost/r9p0/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt b/bifrost/r9p0/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt
new file mode 100644
index 0000000..8a1596a
--- /dev/null
+++ b/bifrost/r9p0/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM SMC protected mode switcher devices
+
+Required properties :
+
+- compatible : Must be "arm,smc-protected-mode-switcher"
+- arm,smc,protected_enable : SMC call ID to enable protected mode
+- arm,smc,protected_disable : SMC call ID to disable protected mode and reset
+			      device
+
+An example node :
+
+	gpu_switcher {
+		compatible = "arm,smc-protected-mode-switcher";
+		arm,smc,protected_enable = <0xff06>;
+		arm,smc,protected_disable = <0xff07>;
+	};
diff --git a/bifrost/r9p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt b/bifrost/r9p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt
new file mode 100644
index 0000000..22be9ba
--- /dev/null
+++ b/bifrost/r9p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt
@@ -0,0 +1,163 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard OPP
+
+* OPP Table Node
+
+This describes the OPPs belonging to a device. This node can have following
+properties:
+
+Required properties:
+- compatible: Allow OPPs to express their compatibility. It should be:
+  "operating-points-v2", "operating-points-v2-mali".
+
+- OPP nodes: One or more OPP nodes describing voltage-current-frequency
+  combinations. Their name isn't significant but their phandle can be used to
+  reference an OPP.
+
+* OPP Node
+
+This defines voltage-current-frequency combinations along with other related
+properties.
+
+Required properties:
+- opp-hz: Nominal frequency in Hz, expressed as a 64-bit big-endian integer.
+  This should be treated as a relative performance measurement, taking both GPU
+  frequency and core mask into account.
+
+Optional properties:
+- opp-hz-real: Real frequency in Hz, expressed as a 64-bit big-endian integer.
+  If this is not present then the nominal frequency will be used instead.
+
+- opp-core-mask: Shader core mask. If neither this or opp-core-count are present
+  then all shader cores will be used for this OPP.
+
+- opp-core-count: Number of cores to use for this OPP. If this is present then
+  the driver will build a core mask using the available core mask provided by
+  the GPU hardware.
+
+  If neither this nor opp-core-mask are present then all shader cores will be
+  used for this OPP.
+
+  If both this and opp-core-mask are present then opp-core-mask is ignored.
+
+- opp-microvolt: voltage in micro Volts.
+
+  A single regulator's voltage is specified with an array of size one or three.
+  Single entry is for target voltage and three entries are for <target min max>
+  voltages.
+
+  Entries for multiple regulators must be present in the same order as
+  regulators are specified in device's DT node.
+
+- opp-microvolt-<name>: Named opp-microvolt property. This is exactly similar to
+  the above opp-microvolt property, but allows multiple voltage ranges to be
+  provided for the same OPP. At runtime, the platform can pick a <name> and
+  matching opp-microvolt-<name> property will be enabled for all OPPs. If the
+  platform doesn't pick a specific <name> or the <name> doesn't match with any
+  opp-microvolt-<name> properties, then opp-microvolt property shall be used, if
+  present.
+
+- opp-microamp: The maximum current drawn by the device in microamperes
+  considering system specific parameters (such as transients, process, aging,
+  maximum operating temperature range etc.) as necessary. This may be used to
+  set the most efficient regulator operating mode.
+
+  Should only be set if opp-microvolt is set for the OPP.
+
+  Entries for multiple regulators must be present in the same order as
+  regulators are specified in device's DT node. If this property isn't required
+  for few regulators, then this should be marked as zero for them. If it isn't
+  required for any regulator, then this property need not be present.
+
+- opp-microamp-<name>: Named opp-microamp property. Similar to
+  opp-microvolt-<name> property, but for microamp instead.
+
+- clock-latency-ns: Specifies the maximum possible transition latency (in
+  nanoseconds) for switching to this OPP from any other OPP.
+
+- turbo-mode: Marks the OPP to be used only for turbo modes. Turbo mode is
+  available on some platforms, where the device can run over its operating
+  frequency for a short duration of time limited by the device's power, current
+  and thermal limits.
+
+- opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in
+  the table should have this.
+
+- opp-supported-hw: This enables us to select only a subset of OPPs from the
+  larger OPP table, based on what version of the hardware we are running on. We
+  still can't have multiple nodes with the same opp-hz value in OPP table.
+
+  It's an user defined array containing a hierarchy of hardware version numbers,
+  supported by the OPP. For example: a platform with hierarchy of three levels
+  of versions (A, B and C), this field should be like <X Y Z>, where X
+  corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z
+  corresponds to version hierarchy C.
+
+  Each level of hierarchy is represented by a 32 bit value, and so there can be
+  only 32 different supported version per hierarchy. i.e. 1 bit per version. A
+  value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy
+  level. And a value of 0x00000000 will disable the OPP completely, and so we
+  never want that to happen.
+
+  If 32 values aren't sufficient for a version hierarchy, than that version
+  hierarchy can be contained in multiple 32 bit values. i.e. <X Y Z1 Z2> in the
+  above example, Z1 & Z2 refer to the version hierarchy Z.
+
+- status: Marks the node enabled/disabled.
+
+Example for a Juno with Mali T624:
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2", "operating-points-v2-mali";
+
+	opp@112500000 {
+		opp-hz = /bits/ 64 <112500000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-mask = /bits/ 64 <0x1>;
+		opp-suspend;
+	};
+	opp@225000000 {
+		opp-hz = /bits/ 64 <225000000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-count = <2>;
+	};
+	opp@450000000 {
+		opp-hz = /bits/ 64 <450000000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-mask = /bits/ 64 <0xf>;
+	};
+	opp@487500000 {
+		opp-hz = /bits/ 64 <487500000>;
+		opp-microvolt = <825000>;
+	};
+	opp@525000000 {
+		opp-hz = /bits/ 64 <525000000>;
+		opp-microvolt = <850000>;
+	};
+	opp@562500000 {
+		opp-hz = /bits/ 64 <562500000>;
+		opp-microvolt = <875000>;
+	};
+	opp@600000000 {
+		opp-hz = /bits/ 64 <600000000>;
+		opp-microvolt = <900000>;
+	};
+};
+
diff --git a/bifrost/r9p0/kernel/Documentation/dma-buf-test-exporter.txt b/bifrost/r9p0/kernel/Documentation/dma-buf-test-exporter.txt
new file mode 100644
index 0000000..4208010
--- /dev/null
+++ b/bifrost/r9p0/kernel/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/sconscript b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/sconscript
new file mode 100644
index 0000000..7c7c242
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2012, 2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+if env.KernelVersion() >= (3, 18, 34):
+    SConscript("src/sconscript")
+    if env["tests"] and Glob("tests/sconscript"):
+        SConscript("tests/sconscript")
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/Kbuild b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100644
index 0000000..68dad07
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/Makefile b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/Makefile
new file mode 100644
index 0000000..cf76132
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100644
index 0000000..9d924ff
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,850 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/version.h>
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/reservation.h>
+#include <linux/dma-buf.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+
+#include <linux/fence.h>
+
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
+	(a)->status ?: 1 \
+	: 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+#define DMA_BUF_LOCK_INIT_BIAS  0xFF
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence fence;
+#else
+	struct dma_fence fence;
+#endif
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	atomic_t fence_dep_count;		/* Number of dma-fence dependencies */
+	struct list_head dma_fence_callbacks;	/* list of all callbacks set up to wait on other fences */
+	wait_queue_head_t wait;
+	struct kref refcount;
+	struct list_head link;
+	int count;
+} dma_buf_lock_resource;
+
+/**
+ * struct dma_buf_lock_fence_cb - Callback data struct for dma-fence
+ * @fence_cb: Callback function
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @res:      Pointer to dma_buf_lock_resource that is waiting on this callback
+ * @node:     List head for linking this callback to the lock resource
+ */
+struct dma_buf_lock_fence_cb {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence_cb fence_cb;
+	struct fence *fence;
+#else
+	struct dma_fence_cb fence_cb;
+	struct dma_fence *fence;
+#endif
+	struct dma_buf_lock_resource *res;
+	struct list_head node;
+};
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+
+/*** dma_buf_lock fence part ***/
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(dma_buf_lock_fence_lock);
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_get_driver_name(struct fence *fence)
+#else
+dma_buf_lock_fence_get_driver_name(struct dma_fence *fence)
+#endif
+{
+	return "dma_buf_lock";
+}
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_get_timeline_name(struct fence *fence)
+#else
+dma_buf_lock_fence_get_timeline_name(struct dma_fence *fence)
+#endif
+{
+	return "dma_buf_lock.timeline";
+}
+
+static bool
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_enable_signaling(struct fence *fence)
+#else
+dma_buf_lock_fence_enable_signaling(struct dma_fence *fence)
+#endif
+{
+	return true;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+const struct fence_ops dma_buf_lock_fence_ops = {
+	.wait = fence_default_wait,
+#else
+const struct dma_fence_ops dma_buf_lock_fence_ops = {
+	.wait = dma_fence_default_wait,
+#endif
+	.get_driver_name = dma_buf_lock_fence_get_driver_name,
+	.get_timeline_name = dma_buf_lock_fence_get_timeline_name,
+	.enable_signaling = dma_buf_lock_fence_enable_signaling,
+};
+
+static void
+dma_buf_lock_fence_init(dma_buf_lock_resource *resource)
+{
+	dma_fence_init(&resource->fence,
+		       &dma_buf_lock_fence_ops,
+		       &dma_buf_lock_fence_lock,
+		       0,
+		       0);
+}
+
+static void
+dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource)
+{
+	struct dma_buf_lock_fence_cb *cb, *tmp;
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &resource->dma_fence_callbacks, node) {
+		/* Cancel callbacks that hasn't been called yet and release the
+		 * reference taken in dma_buf_lock_fence_add_callback().
+		 */
+		dma_fence_remove_callback(cb->fence, &cb->fence_cb);
+		dma_fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_callback(struct fence *fence, struct fence_cb *cb)
+#else
+dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+#endif
+{
+	struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb,
+				struct dma_buf_lock_fence_cb,
+				fence_cb);
+	dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
+
+#if DMA_BUF_LOCK_DEBUG
+	printk(KERN_DEBUG "dma_buf_lock_fence_callback\n");
+#endif
+
+	/* Callback function will be invoked in atomic context. */
+
+	if (atomic_dec_and_test(&resource->fence_dep_count)) {
+		atomic_set(&resource->locked, 1);
+		wake_up(&resource->wait);
+		/* A work item can be queued at this point to invoke
+		 * dma_buf_lock_fence_free_callbacks.
+		 */
+	}
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static int
+dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
+				struct fence *fence,
+				fence_func_t callback)
+#else
+static int
+dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
+				struct dma_fence *fence,
+				dma_fence_func_t callback)
+#endif
+{
+	int err = 0;
+	struct dma_buf_lock_fence_cb *fence_cb;
+
+	if (!fence)
+		return -EINVAL;
+
+	fence_cb = kmalloc(sizeof(*fence_cb), GFP_KERNEL);
+	if (!fence_cb)
+		return -ENOMEM;
+
+	fence_cb->fence = fence;
+	fence_cb->res   = resource;
+	INIT_LIST_HEAD(&fence_cb->node);
+
+	err = dma_fence_add_callback(fence, &fence_cb->fence_cb,
+				     callback);
+
+	if (err == -ENOENT) {
+		/* Fence signaled, get the completion result */
+		err = dma_fence_get_status(fence);
+
+		/* remap success completion to err code */
+		if (err == 1)
+			err = 0;
+
+		kfree(fence_cb);
+	} else if (err) {
+		kfree(fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in dma_buf_lock_fence_free_callbacks().
+		 */
+		dma_fence_get(fence);
+		atomic_inc(&resource->fence_dep_count);
+		/* Add callback to resource's list of callbacks */
+		list_add(&fence_cb->node, &resource->dma_fence_callbacks);
+	}
+
+	return err;
+}
+
+static int
+dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource,
+					    struct reservation_object *resv,
+					    bool exclusive)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+#else
+	struct dma_fence *excl_fence = NULL;
+	struct dma_fence **shared_fences = NULL;
+#endif
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = dma_buf_lock_fence_add_callback(resource,
+						      excl_fence,
+						      dma_buf_lock_fence_callback);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		dma_fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = dma_buf_lock_fence_add_callback(resource,
+							      shared_fences[i],
+							      dma_buf_lock_fence_callback);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		dma_fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	return err;
+}
+
+static void
+dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource,
+				       struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < resource->count; r++)
+		ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
+	ww_acquire_fini(ctx);
+}
+
+static int
+dma_buf_lock_acquire_fence_reservation(dma_buf_lock_resource *resource,
+				       struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_resv = NULL;
+	unsigned int content_resv_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < resource->count; r++) {
+		if (resource->dma_bufs[r]->resv == content_resv) {
+			content_resv = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&resource->dma_bufs[r]->resv->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_resv_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
+
+	if (content_resv)
+		ww_mutex_unlock(&content_resv->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "deadlock at dma_buf fd %i\n",
+		       resource->list_of_dma_buf_fds[content_resv_idx]);
+#endif
+		content_resv = resource->dma_bufs[content_resv_idx]->resv;
+		ww_mutex_lock_slow(&content_resv->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.owner		= THIS_MODULE,
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related reservation objects, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	struct ww_acquire_ctx ww_ctx;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Initialize the fence associated with dma_buf_lock resource */
+	dma_buf_lock_fence_init(resource);
+
+	INIT_LIST_HEAD(&resource->dma_fence_callbacks);
+
+	atomic_set(&resource->fence_dep_count, DMA_BUF_LOCK_INIT_BIAS);
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Check the reservation object associated with dma_buf */
+		if (NULL == resource->dma_bufs[i]->resv)
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "dma_buf_lock_dolock : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
+		       resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
+#endif
+	}
+
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops,
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx);
+	if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d locking reservations.\n", ret);
+#endif
+		put_unused_fd(fd);
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return ret;
+	}
+
+	for (i = 0; i < request->count; i++) {
+		struct reservation_object *resv = resource->dma_bufs[i]->resv;
+
+		if (!test_bit(i, &resource->exclusive)) {
+			ret = reservation_object_reserve_shared(resv);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d reserving space for shared fence.\n", ret);
+#endif
+				break;
+			}
+
+			ret = dma_buf_lock_add_fence_reservation_callback(resource,
+									  resv,
+									  false);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
+#endif
+				break;
+			}
+
+			reservation_object_add_shared_fence(resv, &resource->fence);
+		} else {
+			ret = dma_buf_lock_add_fence_reservation_callback(resource,
+									  resv,
+									  true);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
+#endif
+				break;
+			}
+
+			reservation_object_add_excl_fence(resv, &resource->fence);
+		}
+	}
+
+	dma_buf_lock_release_fence_reservation(resource, &ww_ctx);
+
+	if (IS_ERR_VALUE((unsigned long)ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+	/* Test if the callbacks were already triggered */
+	if (!atomic_sub_return(DMA_BUF_LOCK_INIT_BIAS, &resource->fence_dep_count))
+		atomic_set(&resource->locked, 1);
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	/* Signal the resource's fence. */
+	dma_fence_signal(&resource->fence);
+
+	dma_buf_lock_fence_free_callbacks(resource);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		if (resource->dma_bufs[i])
+			dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	dma_fence_put(&resource->fence);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100644
index 0000000..e1b9348
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/sconscript b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/sconscript
new file mode 100644
index 0000000..b8724f1
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_lock/src/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')]
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] dma_buf_lock'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100644
index 0000000..56b9f86
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100644
index 0000000..974f0c2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/Makefile b/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100644
index 0000000..06e3d5c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100644
index 0000000..1b1c650
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,719 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+#else
+static int dma_buf_te_mmap_fault(struct vm_fault *vmf)
+#endif
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	dmabuf = vma->vm_private_data;
+#else
+	dmabuf = vmf->vma->vm_private_data;
+#endif
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return NULL;
+
+	return kmap(alloc->pages[page_num]);
+}
+static void dma_buf_te_kunmap(struct dma_buf *buf,
+		unsigned long page_num, void *addr)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return;
+
+	kunmap(alloc->pages[page_num]);
+	return;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
+	.kmap = dma_buf_te_kmap,
+	.kunmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic
+#else
+	.map = dma_buf_te_kmap,
+	.unmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.map_atomic = dma_buf_te_kmap_atomic
+#endif
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (alloc_req.size > SG_MAX_SINGLE_ALLOC) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %lu pages",
+				__func__, alloc_req.size, SG_MAX_SINGLE_ALLOC);
+		goto invalid_size;
+	}
+#endif
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO,
+				DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+	{
+		struct dma_buf_export_info export_info = {
+			.exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+			.owner = THIS_MODULE,
+#endif
+			.ops = &dma_buf_te_ops,
+			.size = alloc->nr_pages << PAGE_SHIFT,
+			.flags = O_CLOEXEC | O_RDWR,
+			.priv = alloc,
+		};
+
+		dma_buf = dma_buf_export(&export_info);
+	}
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+	if (ret)
+		goto no_cpu_access;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				ret = -EPERM;
+				goto no_kmap;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		offset += sg_dma_len(sg);
+	}
+
+no_kmap:
+	dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/sconscript b/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
new file mode 100644
index 0000000..5f42141
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+mod = env.BuildKernelModule('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', Glob('*.c'))
+env.KernelObjTarget('dma-buf-test-exporter', mod)
diff --git a/bifrost/r9p0/kernel/drivers/base/sconscript b/bifrost/r9p0/kernel/drivers/base/sconscript
new file mode 100644
index 0000000..46f5241
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/sconscript
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import( 'env' )
+
+if Glob('bus_logger/sconscript'):
+	if env['buslog'] == '1':
+		SConscript('bus_logger/sconscript')
+
+if Glob('mali_fpga_sysctl/sconscript'):
+	SConscript('mali_fpga_sysctl/sconscript')
+
+if Glob('dma_buf_lock/sconscript'):
+	SConscript('dma_buf_lock/sconscript')
+
+if Glob('ump/sconscript'):
+	SConscript('ump/sconscript')
+
+if Glob('kds/sconscript'):
+	SConscript('kds/sconscript')
+
+if Glob('dma_buf_test_exporter/sconscript'):
+	SConscript('dma_buf_test_exporter/sconscript')
+
+if Glob('smc_protected_mode_switcher/sconscript'):
+	if env['platform_config'] == 'juno_soc':
+		SConscript('smc_protected_mode_switcher/sconscript')
+
diff --git a/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/Kbuild b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/Kbuild
new file mode 100644
index 0000000..244c70b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-$(CONFIG_SMC_PROTECTED_MODE_SWITCHER) := smc_protected_mode_switcher.o
+smc_protected_mode_switcher-y := protected_mode_switcher_device.o \
+				 protected_mode_switcher_smc.o
diff --git a/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/Kconfig b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/Kconfig
new file mode 100644
index 0000000..27a6bab
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config SMC_PROTECTED_MODE_SWITCHER
+	tristate "SMC protected mode switcher"
+	help
+	  This option enables the SMC protected mode switcher
diff --git a/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/Makefile b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/Makefile
new file mode 100644
index 0000000..3e1788c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/Makefile
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include -I$(CURDIR)/../../gpu/arm/midgard -DCONFIG_SMC_PROTECTED_MODE_SWITCHER" CONFIG_SMC_PROTECTED_MODE_SWITCHER=m modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c
new file mode 100644
index 0000000..31c57cd
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/compiler.h>
+
+#include <linux/protected_mode_switcher.h>
+
+/*
+ * Protected Mode Switch
+ */
+
+#define SMC_FAST_CALL  (1 << 31)
+#define SMC_64         (1 << 30)
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_SIP    (2 << SMC_OEN_OFFSET)
+
+struct smc_protected_mode_device {
+	u16 smc_fid_enable;
+	u16 smc_fid_disable;
+	struct device *dev;
+};
+
+asmlinkage u64 __invoke_protected_mode_switch_smc(u64, u64, u64, u64);
+
+static u64 invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return __invoke_protected_mode_switch_smc(fid, arg0, arg1, arg2);
+}
+
+static int protected_mode_enable(struct protected_mode_device *protected_dev)
+{
+	struct smc_protected_mode_device *sdev = protected_dev->data;
+
+	if (!sdev)
+		/* Not supported */
+		return -EINVAL;
+
+	return invoke_smc(SMC_OEN_SIP,
+			sdev->smc_fid_enable, false,
+			0, 0, 0);
+
+}
+
+static int protected_mode_disable(struct protected_mode_device *protected_dev)
+{
+	struct smc_protected_mode_device *sdev = protected_dev->data;
+
+	if (!sdev)
+		/* Not supported */
+		return -EINVAL;
+
+	return invoke_smc(SMC_OEN_SIP,
+			sdev->smc_fid_disable, false,
+			0, 0, 0);
+}
+
+
+static int protected_mode_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct protected_mode_device *protected_dev;
+	struct smc_protected_mode_device *sdev;
+	u32 tmp = 0;
+
+	protected_dev = devm_kzalloc(&pdev->dev, sizeof(*protected_dev),
+			GFP_KERNEL);
+	if (!protected_dev)
+		return -ENOMEM;
+
+	sdev = devm_kzalloc(&pdev->dev, sizeof(*sdev), GFP_KERNEL);
+	if (!sdev) {
+		devm_kfree(&pdev->dev, protected_dev);
+		return -ENOMEM;
+	}
+
+	protected_dev->data = sdev;
+	protected_dev->ops.protected_mode_enable = protected_mode_enable;
+	protected_dev->ops.protected_mode_disable = protected_mode_disable;
+	sdev->dev = dev;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,protected_enable",
+			&tmp))
+		sdev->smc_fid_enable = tmp;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,protected_disable",
+			&tmp))
+		sdev->smc_fid_disable = tmp;
+
+	/* Check older property names, for compatibility with outdated DTBs */
+	if (!of_property_read_u32(dev->of_node, "arm,smc,secure_enable", &tmp))
+		sdev->smc_fid_enable = tmp;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,secure_disable", &tmp))
+		sdev->smc_fid_disable = tmp;
+
+	platform_set_drvdata(pdev, protected_dev);
+
+	dev_info(&pdev->dev, "Protected mode switcher %s loaded\n", pdev->name);
+	dev_info(&pdev->dev, "SMC enable: 0x%x\n", sdev->smc_fid_enable);
+	dev_info(&pdev->dev, "SMC disable: 0x%x\n", sdev->smc_fid_disable);
+
+	return 0;
+}
+
+static int protected_mode_remove(struct platform_device *pdev)
+{
+	dev_info(&pdev->dev, "Protected mode switcher %s removed\n",
+			pdev->name);
+
+	return 0;
+}
+
+static const struct of_device_id protected_mode_dt_ids[] = {
+	{ .compatible = "arm,smc-protected-mode-switcher" },
+	{ .compatible = "arm,secure-mode-switcher" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, protected_mode_dt_ids);
+
+static struct platform_driver protected_mode_driver = {
+	.probe = protected_mode_probe,
+	.remove = protected_mode_remove,
+	.driver = {
+		.name = "smc-protected-mode-switcher",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(protected_mode_dt_ids),
+	}
+};
+
+module_platform_driver(protected_mode_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S
new file mode 100644
index 0000000..5eae328
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/linkage.h>
+
+/* u64 invoke_protected_mode_switch_smc(u64 function_id, u64 arg0, u64 arg1,
+		u64 arg2) */
+ENTRY(__invoke_protected_mode_switch_smc)
+	smc	#0
+	ret
+ENDPROC(__invoke_protected_mode_switch_smc)
diff --git a/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/sconscript b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/sconscript
new file mode 100644
index 0000000..f740a71
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/smc_protected_mode_switcher/sconscript
@@ -0,0 +1,43 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+Import('env')
+
+if env['v'] != '1':
+	env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}'
+
+src = [ Glob('#kernel/drivers/base/smc_protected_mode_switcher/*.c'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/*.S'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/*.h'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/K*'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/Makefile') ]
+
+env.Append( CPPPATH = '#kernel/include' )
+env['smc_protected_mode_switcher'] = 1
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] smc_protected_mode_switcher'))
+	cmd = env.Command('$STATIC_LIB_PATH/smc_protected_mode_switcher.ko', src, [])
+	env.ProgTarget('smc_protected_mode_switcher', cmd)
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make && cp smc_protected_mode_switcher.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/smc_protected_mode_switcher.ko', src, [makeAction])
+	env.ProgTarget('smc_protected_mode_switcher', cmd)
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/Kbuild b/bifrost/r9p0/kernel/drivers/base/ump/Kbuild
new file mode 100644
index 0000000..2bbdba2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += src/
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/Kconfig b/bifrost/r9p0/kernel/drivers/base/ump/Kconfig
new file mode 100644
index 0000000..f7451e6
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config UMP
+	tristate "Enable Unified Memory Provider (UMP) support"
+	default n
+    help
+	  Enable this option to build support for the ARM UMP module.
+	  UMP can be used by the Mali T6xx module to improve performance
+	  by reducing the copying of data by sharing memory.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate one module, called ump.
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/docs/Doxyfile b/bifrost/r9p0/kernel/drivers/base/ump/docs/Doxyfile
new file mode 100644
index 0000000..fbec8eb
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/docs/Doxyfile
@@ -0,0 +1,125 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/include/linux/ump-common.h ../../kernel/include/linux/ump.h
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           += ../../kernel/drivers/base/ump
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           +=
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/docs/sconscript b/bifrost/r9p0/kernel/drivers/base/ump/docs/sconscript
new file mode 100644
index 0000000..521278d
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/docs/sconscript
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+doxygen_sources = [
+	'Doxyfile',
+	Glob('*.png'),
+	Glob('../*.h'),
+	Glob('../src/*.h') ]
+
+if env['doc'] == '1':
+        doxygen_target = env.Command('doxygen/html/index.html', doxygen_sources,
+                                     ['cd ${SOURCE.dir} && doxygen'])
+        env.Clean(doxygen_target, './doxygen')
+
+        Alias('doxygen', doxygen_target)
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/example_kernel_api.c b/bifrost/r9p0/kernel/drivers/base/ump/example_kernel_api.c
new file mode 100644
index 0000000..858dd8b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/example_kernel_api.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Example routine to display information about an UMP allocation
+ * The routine takes an secure_id which can come from a different kernel module
+ * or from a client application (i.e. an ioctl).
+ * It creates a ump handle from the secure id (which validates the secure id)
+ * and if successful dumps the physical memory information.
+ * It follows the API and pins the memory while "using" the physical memory.
+ * Finally it calls the release function to indicate it's finished with the handle.
+ *
+ * If the function can't look up the handle it fails with return value -1.
+ * If the testy succeeds then it return 0.
+ * */
+
+static int display_ump_memory_information(ump_secure_id secure_id)
+{
+	const ump_dd_physical_block_64 * ump_blocks = NULL;
+	ump_dd_handle ump_mem;
+	uint64_t nr_blocks;
+	int i;
+	ump_alloc_flags flags;
+
+	/* get a handle from the secure id */
+	ump_mem = ump_dd_from_secure_id(secure_id);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE == ump_mem)
+	{
+		/* invalid ID received */
+		return -1;
+	}
+
+	/* at this point we know we've added a reference to the ump allocation, so we must release it with ump_dd_release */
+
+	ump_dd_phys_blocks_get_64(ump_mem, &nr_blocks, &ump_blocks);
+	flags = ump_dd_allocation_flags_get(ump_mem);
+
+	printf("UMP allocation with secure ID %u consists of %zd physical block(s):\n", secure_id, nr_blocks);
+
+	for(i=0; i<nr_blocks; ++i)
+	{
+		printf("\tBlock %d: 0x%08zX size 0x%08zX\n", i, ump_blocks[i].addr, ump_blocks[i].size);
+	}
+
+	printf("and was allocated using the following bitflag combo:  0x%lX\n", flags);
+
+	ump_dd_release(ump_mem);
+
+	return 0;
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/example_user_api.c b/bifrost/r9p0/kernel/drivers/base/ump/example_user_api.c
new file mode 100644
index 0000000..d143a64
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/example_user_api.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <ump/ump.h>
+#include <memory.h>
+#include <stdio.h>
+
+/*
+ * Example routine to exercise the user space UMP api.
+ * This routine initializes the UMP api and allocates some CPU+device X memory.
+ * No usage hints are given, so the driver will use the default cacheability policy.
+ * With the allocation it creates a duplicate handle and plays with the reference count.
+ * Then it simulates interacting with a device and contains pseudo code for the device.
+ *
+ * If any error is detected correct cleanup will be performed and -1 will be returned.
+ * If successful then 0 will be returned.
+ */
+
+static int test_ump_user_api(void)
+{
+	/* This is the size we try to allocate*/
+	const size_t alloc_size = 4096;
+
+	ump_handle h = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_copy = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+	void * mapping = NULL;
+
+	ump_result ump_api_res;
+	int result = -1;
+
+	ump_secure_id id;
+
+	size_t size_returned;
+
+	ump_api_res = ump_open();
+	if (UMP_OK != ump_api_res)
+	{
+		/* failed to open an ump session */
+		/* early out */
+		return -1;
+	}
+
+	h = ump_allocate_64(alloc_size, UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | UMP_PROT_X_RD | UMP_PROT_X_WR);
+	/* the refcount is now 1 */
+	if (UMP_INVALID_MEMORY_HANDLE == h)
+	{
+		/* allocation failure */
+		goto cleanup;
+	}
+
+	/* this is how we could share this allocation with another process */
+
+	/* in process A: */
+	id = ump_secure_id_get(h);
+	/* still ref count 1 */
+	/* send the id to process B */
+
+	/* in process B: */
+	/* receive the id from A */
+	h_clone = ump_from_secure_id(id);
+	/* the ref count of the allocation is now 2 (one from each handle to it) */
+	/* do something ... */
+	/* release our clone */
+	ump_release(h_clone); /* safe to call even if ump_from_secure_id failed */
+	h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+
+	/* a simple save-for-future-use logic inside the driver would just copy the handle (but add a ref manually too!) */
+	/*
+	 * void assign_memory_to_job(h)
+	 * {
+	  */
+	h_copy = h;
+	ump_retain(h_copy); /* manual retain needed as we just assigned the handle, now 2 */
+	/*
+	 * }
+	 *
+	 * void job_completed(void)
+	 * {
+	 */
+	 ump_release(h_copy); /* normal handle release as if we got via an ump_allocate */
+	 h_copy = UMP_INVALID_MEMORY_HANDLE;
+	 /*
+	 * }
+	 */
+	
+	/* we're now back at ref count 1, and only h is a valid handle */
+	/* enough handle duplication show-off, let's play with the contents instead */
+
+	mapping = ump_map(h, 0, alloc_size);
+	if (NULL == mapping)
+	{
+		/* mapping failure, either out of address space or some other error */
+		goto cleanup;
+	}
+
+	memset(mapping, 0, alloc_size);
+
+	/* let's pretend we're going to start some hw device on this buffer and read the result afterwards */
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN, mapping, alloc_size);
+ 	/*
+		device cache invalidate
+
+		memory barrier
+
+		start device
+
+		memory barrier
+
+		wait for device
+
+		memory barrier
+
+		device cache clean
+
+		memory barrier
+	*/
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN_AND_INVALIDATE, mapping, alloc_size);
+
+	/* we could now peek at the result produced by the hw device, which is now accessible via our mapping */
+
+	/* unmap the buffer when we're done with it */
+	ump_unmap(h, mapping, alloc_size);
+
+	result = 0;
+
+cleanup:
+	ump_release(h);
+	h = UMP_INVALID_MEMORY_HANDLE;
+
+	ump_close();
+
+	return result;
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/sconscript b/bifrost/r9p0/kernel/drivers/base/ump/sconscript
new file mode 100644
index 0000000..b4aa8b6
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if Glob('src/sconscript') and int(env['ump']) == 1:
+	SConscript( 'src/sconscript' )
+	SConscript( 'docs/sconscript' )
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/Kbuild b/bifrost/r9p0/kernel/drivers/base/ump/src/Kbuild
new file mode 100644
index 0000000..de6d307
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/Kbuild
@@ -0,0 +1,50 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Paths required for build
+UMP_PATH = $(src)/../..
+UMP_DEVICEDRV_PATH = $(src)/.
+
+# Set up defaults if not defined by the user
+MALI_UNIT_TEST ?= 0
+
+SRC :=\
+	common/ump_kernel_core.c \
+	common/ump_kernel_descriptor_mapping.c \
+	linux/ump_kernel_linux.c \
+	linux/ump_kernel_linux_mem.c
+
+UNIT_TEST_DEFINES=
+ifeq ($(MALI_UNIT_TEST), 1)
+	MALI_DEBUG ?= 1
+
+	UNIT_TEST_DEFINES = -DMALI_UNIT_TEST=1 \
+	                    -DMALI_DEBUG=$(MALI_DEBUG)
+endif
+
+# Use our defines when compiling
+ccflags-y += -I$(UMP_PATH) -I$(UMP_DEVICEDRV_PATH) $(UNIT_TEST_DEFINES)
+
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_UMP) += ump.o
+ifeq ($(CONFIG_ION),y)
+ccflags-y += -I$(srctree)/drivers/staging/android/ion -I$(srctree)/include/linux
+obj-$(CONFIG_UMP) += imports/ion/ump_kernel_import_ion.o
+endif
+
+# Tell the Linux build system to enable building of our .c files
+ump-y := $(SRC:.c=.o)
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/Makefile b/bifrost/r9p0/kernel/drivers/base/ump/src/Makefile
new file mode 100644
index 0000000..dfd5945
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/Makefile
@@ -0,0 +1,79 @@
+#
+# (C) COPYRIGHT 2008-2014, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifneq ($(KBUILD_EXTMOD),)
+include $(KBUILD_EXTMOD)/Makefile.common
+else
+include ./Makefile.common
+endif
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+RELATIVE_ROOT=../../../../..
+ROOT = $(CURDIR)/$(RELATIVE_ROOT)
+
+EXTRA_CFLAGS=-I$(CURDIR)/../../../../include
+
+ifeq ($(MALI_UNIT_TEST),1)
+	EXTRA_CFLAGS += -DMALI_UNIT_TEST=$(MALI_UNIT_TEST)
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+CONFIG ?= $(ARCH)
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+# Validate selected config
+ifneq ($(shell [ -d arch-$(CONFIG) ] && [ -f arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(shell pwd))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L arch ] && rm arch)
+$(shell ln -sf arch-$(CONFIG) arch)
+$(shell touch arch/config.h)
+endif
+
+EXTRA_SYMBOLS=
+
+ifeq ($(MALI_UNIT_TEST),1)
+	KBASE_PATH=$(ROOT)/kernel/drivers/gpu/arm/midgard
+	EXTRA_SYMBOLS+=$(KBASE_PATH)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="$(EXTRA_CFLAGS) $(SCONS_CFLAGS)" CONFIG_UMP=m KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/Makefile.common b/bifrost/r9p0/kernel/drivers/base/ump/src/Makefile.common
new file mode 100644
index 0000000..f29a4c1
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/Makefile.common
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2008-2010, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+SRC = $(UMP_FILE_PREFIX)/common/ump_kernel_core.c \
+      $(UMP_FILE_PREFIX)/common/ump_kernel_descriptor_mapping.c
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/arch-arm/config.h b/bifrost/r9p0/kernel/drivers/base/ump/src/arch-arm/config.h
new file mode 100644
index 0000000..152d98f
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/arch-arm/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/arch-arm64/config.h b/bifrost/r9p0/kernel/drivers/base/ump/src/arch-arm64/config.h
new file mode 100644
index 0000000..cfb14ca
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/arch-arm64/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c b/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
new file mode 100644
index 0000000..5a8ad7c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
@@ -0,0 +1,728 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* module headers */
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+/* local headers */
+#include <common/ump_kernel_core.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#define UMP_FLAGS_RANGE ((UMP_PROT_SHAREABLE<<1) - 1u)
+
+static umpp_device device;
+
+ump_result umpp_core_constructor(void)
+{
+	mutex_init(&device.secure_id_map_lock);
+	device.secure_id_map = umpp_descriptor_mapping_create(UMP_EXPECTED_IDS, UMP_MAX_IDS);
+	if (NULL != device.secure_id_map)
+	{
+		if (UMP_OK == umpp_device_initialize())
+		{
+			return UMP_OK;
+		}
+		umpp_descriptor_mapping_destroy(device.secure_id_map);
+	}
+	mutex_destroy(&device.secure_id_map_lock);
+
+	return UMP_ERROR;
+}
+
+void umpp_core_destructor(void)
+{
+	umpp_device_terminate();
+	umpp_descriptor_mapping_destroy(device.secure_id_map);
+	mutex_destroy(&device.secure_id_map_lock);
+}
+
+umpp_session *umpp_core_session_start(void)
+{
+	umpp_session * session;
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (NULL != session)
+	{
+		mutex_init(&session->session_lock);
+
+		INIT_LIST_HEAD(&session->memory_usage);
+
+		/* try to create import client session, not a failure if they fail to initialize */
+		umpp_import_handlers_init(session);
+	}
+
+	return session;
+}
+
+void umpp_core_session_end(umpp_session *session)
+{
+	umpp_session_memory_usage * usage, *_usage;
+	UMP_ASSERT(session);
+
+	list_for_each_entry_safe(usage, _usage, &session->memory_usage, link)
+	{
+		printk(KERN_WARNING "UMP: Memory usage cleanup, releasing secure ID %d\n", ump_dd_secure_id_get(usage->mem));
+		ump_dd_release(usage->mem);
+		kfree(usage);
+
+	}
+
+	/* we should now not hold any imported memory objects,
+	 * detatch all import handlers */
+	umpp_import_handlers_term(session);
+
+	mutex_destroy(&session->session_lock);
+	kfree(session);
+}
+
+ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	umpp_allocation * alloc;
+	int i;
+
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD ) ) == 0 ||
+	    ( flags & (UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read and one write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL | __GFP_HARDWALL);
+
+	if (NULL == alloc)
+		goto out1;
+
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+	alloc->size = size;
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	if (0 != umpp_phys_commit(alloc))
+	{
+		goto out2;
+	}
+
+	/* all set up, allocate an ID for it */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	return alloc;
+
+out3:
+	umpp_phys_free(alloc);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+uint64_t ump_dd_size_get_64(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->size;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_size_get(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->size <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->size;
+}
+
+ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->id;
+}
+
+ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	return alloc->flags;
+}
+
+ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id)
+{
+	umpp_allocation * alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+
+	mutex_lock(&device.secure_id_map_lock);
+
+	if (0 == umpp_descriptor_mapping_lookup(device.secure_id_map, secure_id, (void**)&alloc))
+	{
+		if (NULL != alloc->filter_func)
+		{
+			if (!alloc->filter_func(secure_id, alloc, alloc->callback_data))
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /* the filter denied access */
+			}
+		}
+
+		/* check permission to access it */
+		if ((UMP_DD_INVALID_MEMORY_HANDLE != alloc) && !(alloc->flags & UMP_PROT_SHAREABLE))
+		{
+			if (alloc->owner != get_current()->pid)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /*no rights for the current process*/
+			}
+		}
+
+		if (UMP_DD_INVALID_MEMORY_HANDLE != alloc)
+		{
+			if( ump_dd_retain(alloc) != UMP_DD_SUCCESS)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+			}
+		}
+	}
+	mutex_unlock(&device.secure_id_map_lock);
+
+	return alloc;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	return ump_dd_from_secure_id(secure_id);
+}
+
+int ump_dd_retain(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* check for overflow */
+	while(1)
+	{
+		int refcnt = atomic_read(&alloc->refcount);
+		if (refcnt + 1 > 0)
+		{
+			if(atomic_cmpxchg(&alloc->refcount, refcnt, refcnt + 1) == refcnt)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			return -EBUSY;
+		}
+	}
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_add(ump_dd_handle mem)
+{
+	ump_dd_retain(mem);
+}
+
+
+void ump_dd_release(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+	uint32_t new_cnt;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* secure the id for lookup while releasing */
+	mutex_lock(&device.secure_id_map_lock);
+
+	/* do the actual release */
+	new_cnt = atomic_sub_return(1, &alloc->refcount);
+	if (0 == new_cnt)
+	{
+		/* remove from the table as this was the last ref */
+		umpp_descriptor_mapping_remove(device.secure_id_map, alloc->id);
+	}
+
+	/* release the lock as early as possible */
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if (0 != new_cnt)
+	{
+		/* exit if still have refs */
+		return;
+	}
+
+	UMP_ASSERT(list_empty(&alloc->map_list));
+
+	/* cleanup */
+	if (NULL != alloc->final_release_func)
+	{
+		alloc->final_release_func(alloc, alloc->callback_data);
+	}
+
+	if (0 == (alloc->management_flags & UMP_MGMT_EXTERNAL))
+	{
+		umpp_phys_free(alloc);
+	}
+	else
+	{
+		kfree(alloc->block_array);
+	}
+
+	mutex_destroy(&alloc->map_list_lock);
+
+	kfree(alloc);
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_release(ump_dd_handle mem)
+{
+	ump_dd_release(mem);
+}
+
+void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(pCount);
+	UMP_ASSERT(pArray);
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+	*pCount = alloc->blocksCount;
+	*pArray = alloc->block_array;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks)
+{
+	const umpp_allocation * alloc;
+	unsigned long i;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	if((uint64_t)num_blocks != alloc->blocksCount)
+	{
+		return UMP_DD_INVALID;
+	}
+
+	for( i = 0; i < num_blocks; i++)
+	{
+		UMP_ASSERT(alloc->block_array[i].addr <= UMP_UINT32_MAX);
+		UMP_ASSERT(alloc->block_array[i].size <= UMP_UINT32_MAX);
+
+		blocks[i].addr = (unsigned long)alloc->block_array[i].addr;
+		blocks[i].size = (unsigned long)alloc->block_array[i].size;
+	}
+
+	return UMP_DD_SUCCESS;
+}
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(block);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	UMP_ASSERT(alloc->block_array[index].addr <= UMP_UINT32_MAX);
+	UMP_ASSERT(alloc->block_array[index].size <= UMP_UINT32_MAX);
+
+	block->addr = (unsigned long)alloc->block_array[index].addr;
+	block->size = (unsigned long)alloc->block_array[index].size;
+
+	return UMP_DD_SUCCESS;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->blocksCount <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->blocksCount;
+}
+
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void *uaddr, size_t size)
+{
+	umpp_cpu_mapping *map;
+
+	void *target_first = uaddr;
+	void *target_last = (void*)((uintptr_t)uaddr - 1 + size);
+
+	if (target_last < target_first) /* wrapped */
+	{
+		return NULL;
+	}
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if ( map->vaddr_start <= target_first &&
+		   (void*)((uintptr_t)map->vaddr_start + (map->nr_pages << PAGE_SHIFT) - 1) >= target_last)
+		{
+			goto out;
+		}
+	}
+	map = NULL;
+out:
+	mutex_unlock(&alloc->map_list_lock);
+
+	return map;
+}
+
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map)
+{
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(map);
+	mutex_lock(&alloc->map_list_lock);
+	list_add(&map->link, &alloc->map_list);
+	mutex_unlock(&alloc->map_list_lock);
+}
+
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target)
+{
+	umpp_cpu_mapping * map;
+
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(target);
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if (map == target)
+		{
+			list_del(&target->link);
+			kfree(target);
+			mutex_unlock(&alloc->map_list_lock);
+			return;
+		}
+	}
+
+	/* not found, error */
+	UMP_ASSERT(0);
+}
+
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const  block_index, uint64_t * const block_internal_offset)
+{
+	uint64_t i;
+
+	for (i = 0 ; i < alloc->blocksCount; i++)
+	{
+		if (offset < alloc->block_array[i].size)
+		{
+			/* found the block_array element containing this offset */
+			*block_index = i;
+			*block_internal_offset = offset;
+			return 0;
+		}
+		offset -= alloc->block_array[i].size;
+	}
+
+	return -ENXIO;
+}
+
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size)
+{
+	umpp_allocation * alloc;
+	void *vaddr;
+	umpp_cpu_mapping * mapping;
+	uint64_t virt_page_off; /* offset of given address from beginning of the virtual mapping */
+	uint64_t phys_page_off; /* offset of the virtual mapping from the beginning of the physical buffer */
+	uint64_t page_count; /* number of pages to sync */
+	uint64_t i;
+	uint64_t block_idx;
+	uint64_t block_offset;
+	uint64_t paddr;
+
+	UMP_ASSERT((UMP_MSYNC_CLEAN == op) || (UMP_MSYNC_CLEAN_AND_INVALIDATE == op));
+
+	alloc = (umpp_allocation*)mem;
+	vaddr = (void*)(uintptr_t)address;
+
+	if((alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+	{
+		/* mapping is not cached */
+		return;
+	}
+
+	mapping = umpp_dd_find_enclosing_mapping(alloc, vaddr, size);
+	if (NULL == mapping)
+	{
+		printk(KERN_WARNING "UMP: Illegal cache sync address %lx\n", (uintptr_t)vaddr);
+		return; /* invalid pointer or size causes out-of-bounds */
+	}
+
+	/* we already know that address + size don't wrap around as umpp_dd_find_enclosing_mapping didn't fail */
+	page_count = ((((((uintptr_t)address + size - 1) & PAGE_MASK) - ((uintptr_t)address & PAGE_MASK))) >> PAGE_SHIFT) + 1;
+	virt_page_off = (vaddr - mapping->vaddr_start) >> PAGE_SHIFT;
+	phys_page_off = mapping->page_off;
+
+	if (umpp_dd_find_start_block(alloc, (virt_page_off + phys_page_off) << PAGE_SHIFT, &block_idx, &block_offset))
+	{
+		/* should not fail as a valid mapping was found, so the phys mem must exists */
+		printk(KERN_WARNING "UMP: Unable to find physical start block with offset %llx\n", virt_page_off + phys_page_off);
+		UMP_ASSERT(0);
+		return;
+	}
+
+	paddr = alloc->block_array[block_idx].addr + block_offset + (((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1));
+
+	for (i = 0; i < page_count; i++)
+	{
+		size_t offset = ((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1);
+		size_t sz = min((size_t)PAGE_SIZE - offset, size);
+
+		/* check if we've overrrun the current block, if so move to the next block */
+		if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+		{
+			block_idx++;
+			UMP_ASSERT(block_idx < alloc->blocksCount);
+			paddr = alloc->block_array[block_idx].addr;
+		}
+
+		if (UMP_MSYNC_CLEAN == op)
+		{
+			ump_sync_to_memory(paddr, vaddr, sz);
+		}
+		else /* (UMP_MSYNC_CLEAN_AND_INVALIDATE == op) already validated on entry */
+		{
+			ump_sync_to_cpu(paddr, vaddr, sz);
+		}
+
+		/* advance to next page  */
+		vaddr = (void*)((uintptr_t)vaddr + sz);
+		size -= sz;
+		paddr += sz;
+	}
+}
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	uint64_t size = 0;
+	uint64_t i;
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		size += blocks[i].size;
+	}
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD
+	    | UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read or write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc),__GFP_HARDWALL | GFP_KERNEL);
+
+	if (NULL == alloc)
+	{
+		goto out1;
+	}
+
+	alloc->block_array = kzalloc(sizeof(ump_dd_physical_block_64) * num_blocks,__GFP_HARDWALL | GFP_KERNEL);
+	if (NULL == alloc->block_array)
+	{
+		goto out2;
+	}
+
+	memcpy(alloc->block_array, blocks, sizeof(ump_dd_physical_block_64) * num_blocks);
+
+	alloc->size = size;
+	alloc->blocksCount = num_blocks;
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	/* all set up, allocate an ID */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	alloc->management_flags |= UMP_MGMT_EXTERNAL;
+
+	return alloc;
+
+out3:
+	kfree(alloc->block_array);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+
+/*
+ * UMP v1 API
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks)
+{
+	ump_dd_handle mem;
+	ump_dd_physical_block_64 *block_64_array;
+	ump_alloc_flags flags = UMP_V1_API_DEFAULT_ALLOCATION_FLAGS;
+	unsigned long i;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	block_64_array = kzalloc(num_blocks * sizeof(*block_64_array), __GFP_HARDWALL | GFP_KERNEL);
+
+	if(block_64_array == NULL)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/* copy physical blocks */
+	for( i = 0; i < num_blocks; i++)
+	{
+		block_64_array[i].addr = blocks[i].addr;
+		block_64_array[i].size = blocks[i].size;
+	}
+
+	mem = ump_dd_create_from_phys_blocks_64(block_64_array, num_blocks, flags, NULL, NULL, NULL);
+
+	kfree(block_64_array);
+
+	return mem;
+
+}
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h b/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
new file mode 100644
index 0000000..314af87
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
@@ -0,0 +1,221 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_CORE_H_
+#define _UMP_KERNEL_CORE_H_
+
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/cred.h>
+#include <asm/mmu_context.h>
+
+#include <linux/ump-common.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+
+/* forward decl */
+struct umpp_session;
+
+/**
+ * UMP handle metadata.
+ * Tracks various data about a handle not of any use to user space
+ */
+typedef enum
+{
+	UMP_MGMT_EXTERNAL = (1ul << 0) /**< Handle created via the ump_dd_create_from_phys_blocks interface */
+	/* (1ul << 31) not to be used */
+} umpp_management_flags;
+
+/**
+ * Structure tracking the single global UMP device.
+ * Holds global data like the ID map
+ */
+typedef struct umpp_device
+{
+	struct mutex secure_id_map_lock; /**< Lock protecting access to the map */
+	umpp_descriptor_mapping * secure_id_map; /**< Map of all known secure IDs on the system */
+} umpp_device;
+
+/**
+ * Structure tracking all memory allocations of a UMP allocation.
+ * Tracks info about an mapping so we can verify cache maintenace
+ * operations and help in the unmap cleanup.
+ */
+typedef struct umpp_cpu_mapping
+{
+	struct list_head        link; /**< link to list of mappings for an allocation */
+	void                  *vaddr_start; /**< CPU VA start of the mapping */
+	size_t                nr_pages; /**< Size (in pages) of the mapping */
+	uint64_t              page_off; /**< Offset (in pages) from start of the allocation where the mapping starts */
+	ump_dd_handle         handle; /**< Which handle this mapping is linked to */
+	struct umpp_session * session; /**< Which session created the mapping */
+} umpp_cpu_mapping;
+
+/**
+ * Structure tracking UMP allocation.
+ * Represent a memory allocation with its ID.
+ * Tracks all needed meta-data about an allocation.
+ * */
+typedef struct umpp_allocation
+{
+	ump_secure_id id; /**< Secure ID of the allocation */
+	atomic_t refcount; /**< Usage count */
+
+	ump_alloc_flags flags; /**< Flags for all supported devices */
+	uint32_t management_flags; /**< Managment flags tracking */
+
+	pid_t owner; /**< The process ID owning the memory if not sharable */
+
+	ump_dd_security_filter filter_func; /**< Hook to verify use, called during retains from new clients */
+	ump_dd_final_release_callback final_release_func; /**< Hook called when the last reference is removed */
+	void* callback_data; /**< Additional data given to release hook */
+
+	uint64_t size; /**< Size (in bytes) of the allocation */
+	uint64_t blocksCount; /**< Number of physsical blocks the allocation is built up of */
+	ump_dd_physical_block_64 * block_array; /**< Array, one entry per block, describing block start and length */
+
+	struct mutex     map_list_lock; /**< Lock protecting the map_list */
+	struct list_head map_list; /**< Tracks all CPU VA mappings of this allocation */
+
+	void * backendData; /**< Physical memory backend meta-data */
+} umpp_allocation;
+
+/**
+ * Structure tracking use of UMP memory by a session.
+ * Tracks the use of an allocation by a session so session termination can clean up any outstanding references.
+ * Also protects agains non-matched release calls from user space.
+ */
+typedef struct umpp_session_memory_usage
+{
+	ump_secure_id id; /**< ID being used. For quick look-up */
+	ump_dd_handle mem; /**< Handle being used. */
+
+	/**
+	 * Track how many times has the process retained this handle in the kernel.
+	 * This should usually just be 1(allocated or resolved) or 2(mapped),
+	 * but could be more if someone is playing with the low-level API
+	 * */
+	atomic_t process_usage_count;
+
+	struct list_head link; /**< link to other usage trackers for a session */
+} umpp_session_memory_usage;
+
+/**
+ * Structure representing a session/client.
+ * Tracks the UMP allocations being used by this client.
+ */
+typedef struct umpp_session
+{
+	struct mutex session_lock; /**< Lock for memory usage manipulation */
+	struct list_head memory_usage; /**< list of memory currently being used by the this session */
+	void*  import_handler_data[UMPP_EXTERNAL_MEM_COUNT]; /**< Import modules per-session data pointer */
+} umpp_session;
+
+/**
+ * UMP core setup.
+ * Called by any OS specific startup function to initialize the common part.
+ * @return UMP_OK if core initialized correctly, any other value for errors
+ */
+ump_result umpp_core_constructor(void);
+
+/**
+ * UMP core teardown.
+ * Called by any OS specific unload function to clean up the common part.
+ */
+void umpp_core_destructor(void);
+
+/**
+ * UMP session start.
+ * Called by any OS specific session handler when a new session is detected
+ * @return Non-NULL if a matching core session could be set up. NULL on failure
+ */
+umpp_session *umpp_core_session_start(void);
+
+/**
+ * UMP session end.
+ * Called by any OS specific session handler when a session is ended/terminated.
+ * @param session The core session object returned by ump_core_session_start
+ */
+void umpp_core_session_end(umpp_session *session);
+
+/**
+ * Find a mapping object (if any) for this allocation.
+ * Called by any function needing to identify a mapping from a user virtual address.
+ * Verifies that the whole range to be within a mapping object.
+ * @param alloc The UMP allocation to find a matching mapping object of
+ * @param uaddr User mapping address to find the mapping object for
+ * @param size Length of the mapping
+ * @return NULL on error (no match found), pointer to mapping object if match found
+ */
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void* uaddr, size_t size);
+
+/**
+ * Register a new mapping of an allocation.
+ * Called by functions creating a new mapping of an allocation, typically OS specific handlers.
+ * @param alloc The allocation object which has been mapped
+ * @param map Info about the mapping
+ */
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map);
+
+/**
+ * Remove and free mapping object from an allocation.
+ * @param alloc The allocation object to remove the mapping info from
+ * @param target The mapping object to remove
+ */
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target);
+
+/**
+ * Helper to find a block in the blockArray which holds a given byte offset.
+ * @param alloc The allocation object to find the block in
+ * @param offset Offset (in bytes) from allocation start to find the block of
+ * @param[out] block_index Pointer to the index of the block matching
+ * @param[out] block_internal_offset Offset within the returned block of the searched offset
+ * @return 0 if a matching block was found, any other value for error
+ */
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const block_index, uint64_t * const block_internal_offset);
+
+/**
+ * Cache maintenance helper.
+ * Performs the requested cache operation on the given handle.
+ * @param mem Allocation handle
+ * @param op Cache maintenance operation to perform
+ * @param address User mapping at which to do the operation
+ * @param size Length (in bytes) of the range to do the operation on
+ */
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size);
+
+/**
+ * Import module session early init.
+ * Calls session_begin on all installed import modules.
+ * @param session The core session object to initialized the import handler for
+ * */
+void umpp_import_handlers_init(umpp_session * session);
+
+/**
+ * Import module session cleanup.
+ * Calls session_end on all import modules bound to the session.
+ * @param session The core session object to initialized the import handler for
+ */
+void umpp_import_handlers_term(umpp_session * session);
+
+#endif /* _UMP_KERNEL_CORE_H_ */
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c b/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
new file mode 100644
index 0000000..c5b0d74
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <common/ump_kernel_priv.h>
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(umpp_descriptor_table * table);
+
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries)
+{
+	umpp_descriptor_mapping * map = kzalloc(sizeof(umpp_descriptor_mapping), GFP_KERNEL);
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map)
+	{
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table)
+		{
+			init_rwsem( &map->lock);
+			set_bit(0, map->table->usage);
+			map->max_nr_mappings_allowed = max_entries;
+			map->current_nr_mappings = init_entries;
+			return map;
+
+			descriptor_table_free(map->table);
+		}
+		kfree(map);
+	}
+	return NULL;
+}
+
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map)
+{
+	UMP_ASSERT(NULL != map);
+	descriptor_table_free(map->table);
+	kfree(map);
+}
+
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target)
+{
+ 	int descriptor = 0;
+	UMP_ASSERT(NULL != map);
+	down_write( &map->lock);
+	descriptor = find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings)
+	{
+		/* no free descriptor, try to expand the table */
+		umpp_descriptor_table * new_table;
+		umpp_descriptor_table * old_table = map->table;
+		int nr_mappings_new = map->current_nr_mappings + BITS_PER_LONG;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+ 		memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+ 		memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void*));
+
+ 		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	set_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	up_write(&map->lock);
+	return descriptor;
+}
+
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target)
+{
+	int result = -EINVAL;
+ 	UMP_ASSERT(map);
+	UMP_ASSERT(target);
+ 	down_read(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	}
+	/* keep target untouched if the descriptor was not found */
+	up_read(&map->lock);
+	return result;
+}
+
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor)
+{
+	UMP_ASSERT(map);
+ 	down_write(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		map->table->mappings[descriptor] = NULL;
+		clear_bit(descriptor, map->table->usage);
+	}
+	up_write(&map->lock);
+}
+
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count)
+{
+	umpp_descriptor_table * table;
+
+	table = kzalloc(sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG) + (sizeof(void*) * count), __GFP_HARDWALL | GFP_KERNEL );
+
+	if (NULL != table)
+	{
+		table->usage = (unsigned long*)((u8*)table + sizeof(umpp_descriptor_table));
+		table->mappings = (void**)((u8*)table + sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(umpp_descriptor_table * table)
+{
+ 	UMP_ASSERT(table);
+	kfree(table);
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h b/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
new file mode 100644
index 0000000..d06c145
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+#define _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct umpp_descriptor_table
+{
+	/* keep as a unsigned long to rely on the OS's bitops support */
+	unsigned long * usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used(1) or not(0) */
+	void** mappings; /**< Array of the pointers the descriptors map to */
+} umpp_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct umpp_descriptor_mapping
+{
+	struct rw_semaphore lock; /**< Lock protecting access to the mapping object */
+	unsigned int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	unsigned int current_nr_mappings; /**< Current number of possible mappings */
+	umpp_descriptor_table * table; /**< Pointer to the current mapping table */
+} umpp_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object.
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries.
+ * ID 0 is reserved so the number of available entries will be max - 1.
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param[in] map The map to free
+ */
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param[in] map The map to allocate a new entry in
+ * @param[in] target The value to map to
+ * @return The descriptor allocated, ID 0 on failure.
+ */
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param[in] map The map to lookup the descriptor id in
+ * @param[in] descriptor The descriptor ID to lookup
+ * @param[out] target Pointer to a pointer which will receive the stored value
+ *
+ * @return 0 on success lookup, -EINVAL on lookup failure.
+ */
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param[in] map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor);
+
+#endif /* _UMP_KERNEL_DESCRIPTOR_MAPPING_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h b/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
new file mode 100644
index 0000000..38b6f1b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_PRIV_H_
+#define _UMP_KERNEL_PRIV_H_
+
+#ifdef __KERNEL__
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <asm/cacheflush.h>
+#endif
+
+
+#define UMP_EXPECTED_IDS 64
+#define UMP_MAX_IDS 32768
+
+#ifdef __KERNEL__
+#define UMP_ASSERT(expr) \
+		if (!(expr)) { \
+			printk(KERN_ERR "UMP: Assertion failed! %s,%s,%s,line=%d\n",\
+					#expr,__FILE__,__func__,__LINE__); \
+					BUG(); \
+		}
+
+static inline void ump_sync_to_memory(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/*TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE);
+	mb(); /* for outer_sync (if needed) */
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+
+static inline void ump_sync_to_cpu(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE);
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+#endif /* __KERNEL__*/
+#endif /* _UMP_KERNEL_PRIV_H_ */
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/imports/ion/Makefile b/bifrost/r9p0/kernel/drivers/base/ump/src/imports/ion/Makefile
new file mode 100644
index 0000000..ef74b27
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/imports/ion/Makefile
@@ -0,0 +1,53 @@
+#
+# (C) COPYRIGHT 2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/../../../../..
+KBUILD_EXTRA_SYMBOLS += "$(KBUILD_EXTMOD)/../../Module.symvers"
+
+SRC += ump_kernel_import_ion.c
+
+MODULE:=ump_ion_import.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+$(MODULE:.ko=-objs) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+#
+#
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/imports/ion/sconscript b/bifrost/r9p0/kernel/drivers/base/ump/src/imports/ion/sconscript
new file mode 100644
index 0000000..cff24c8
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/imports/ion/sconscript
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ion = env.Clone()
+
+if env_ion['ump_ion'] != '1':
+	Return()
+
+# Source files required for UMP.
+ion_src = [Glob('#kernel/drivers/base/ump/src/imports/ion/*.c')]
+
+# Note: cleaning via the Linux kernel build system does not yet work
+if env_ion.GetOption('clean') :
+	makeAction=Action("cd ${SOURCE.dir} && make clean", '$MAKECOMSTR')
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make PLATFORM=${platform} && cp ump_ion_import.ko $STATIC_LIB_PATH/ump_ion_import.ko", '$MAKECOMSTR')
+# The target is ump_import_ion.ko, built from the source in ion_src, via the action makeAction
+# ump_import_ion.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+# kernel build system, after which it can be installed to the directory specified if
+# "libs_install" is set; this is done by LibTarget.
+cmd = env_ion.Command('$STATIC_LIB_PATH/ump_ion_import.ko', ion_src, [makeAction])
+
+# Until we fathom out how the invoke the Linux build system to clean, we can use Clean
+# to remove generated files.
+
+patterns = ['*.mod.c', '*.o', '*.ko', '*.a', '.*.cmd', 'modules.order', '.tmp_versions', 'Module.symvers']
+
+for p in patterns:
+	Clean(cmd, Glob('#kernel/drivers/base/ump/src/imports/ion/%s' % p))
+
+env_ion.Depends('$STATIC_LIB_PATH/ump_ion_import.ko', '$STATIC_LIB_PATH/ump.ko')
+env_ion.KernelObjTarget('ump', cmd)
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c b/bifrost/r9p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
new file mode 100644
index 0000000..12b2e32
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/dma-mapping.h>
+#include "ion.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+struct ion_wrapping_info
+{
+	struct ion_client *   ion_client;
+	struct ion_handle *   ion_handle;
+	int                   num_phys_blocks;
+	struct scatterlist *  sglist;
+};
+
+static struct ion_device * ion_device_get(void)
+{
+	/* < Customer to provide implementation >
+	 * Return a pointer to the global ion_device on the system
+	 */
+	return NULL;
+}
+
+static int import_ion_client_create(void** const custom_session_data)
+{
+	struct ion_client ** ion_client;
+
+	ion_client = (struct ion_client**)custom_session_data;
+
+	*ion_client = ion_client_create(ion_device_get(), "ump");
+
+	return PTR_RET(*ion_client);
+}
+
+
+static void import_ion_client_destroy(void* custom_session_data)
+{
+	struct ion_client * ion_client;
+
+	ion_client = (struct ion_client*)custom_session_data;
+	BUG_ON(!ion_client);
+
+	ion_client_destroy(ion_client);
+}
+
+
+static void import_ion_final_release_callback(const ump_dd_handle handle, void * info)
+{
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!info);
+
+	(void)handle;
+	ion_info = (struct ion_wrapping_info*)info;
+
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+	kfree(ion_info);
+	module_put(THIS_MODULE);
+}
+
+static ump_dd_handle import_ion_import(void * custom_session_data, void * pfd, ump_alloc_flags flags)
+{
+	int fd;
+	ump_dd_handle ump_handle;
+	struct scatterlist * sg;
+	int num_dma_blocks;
+	ump_dd_physical_block_64 * phys_blocks;
+	unsigned long i;
+	struct sg_table * sgt;
+
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!custom_session_data);
+	BUG_ON(!pfd);
+
+	ion_info = kzalloc(GFP_KERNEL, sizeof(*ion_info));
+	if (NULL == ion_info)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	ion_info->ion_client = (struct ion_client*)custom_session_data;
+
+	if (get_user(fd, (int*)pfd))
+	{
+		goto out;
+	}
+
+	ion_info->ion_handle = ion_import_dma_buf(ion_info->ion_client, fd);
+
+	if (IS_ERR_OR_NULL(ion_info->ion_handle))
+	{
+		goto out;
+	}
+
+	sgt = ion_sg_table(ion_info->ion_client, ion_info->ion_handle);
+	if (IS_ERR_OR_NULL(sgt))
+	{
+		goto ion_dma_map_failed;
+	}
+
+	ion_info->sglist = sgt->sgl;
+
+	sg = ion_info->sglist;
+	while (sg)
+	{
+		ion_info->num_phys_blocks++;
+		sg = sg_next(sg);
+	}
+
+	num_dma_blocks = dma_map_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	if (0 == num_dma_blocks)
+	{
+		goto linux_dma_map_failed;
+	}
+
+	phys_blocks = vmalloc(num_dma_blocks * sizeof(*phys_blocks));
+	if (NULL == phys_blocks)
+	{
+		goto vmalloc_failed;
+	}
+
+	for_each_sg(ion_info->sglist, sg, num_dma_blocks, i)
+	{
+		phys_blocks[i].addr = sg_phys(sg);
+		phys_blocks[i].size = sg_dma_len(sg);
+	}
+
+	ump_handle = ump_dd_create_from_phys_blocks_64(phys_blocks, num_dma_blocks, flags, NULL, import_ion_final_release_callback, ion_info);
+
+	vfree(phys_blocks);
+
+	if (ump_handle != UMP_DD_INVALID_MEMORY_HANDLE)
+	{
+		/*
+		 * As we have a final release callback installed
+		 * we must keep the module locked until
+		 * the callback has been triggered
+		 * */
+		__module_get(THIS_MODULE);
+		return ump_handle;
+	}
+
+	/* failed*/
+vmalloc_failed:
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+linux_dma_map_failed:
+ion_dma_map_failed:
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+out:
+	kfree(ion_info);
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+struct ump_import_handler import_handler_ion =
+{
+	.linux_module =  THIS_MODULE,
+	.session_begin = import_ion_client_create,
+	.session_end =   import_ion_client_destroy,
+	.import =        import_ion_import
+};
+
+static int __init import_ion_initialize_module(void)
+{
+	/* register with UMP */
+	return ump_import_module_register(UMP_EXTERNAL_MEM_TYPE_ION, &import_handler_ion);
+}
+
+static void __exit import_ion_cleanup_module(void)
+{
+	/* unregister import handler */
+	ump_import_module_unregister(UMP_EXTERNAL_MEM_TYPE_ION);
+}
+
+/* Setup init and exit functions for this module */
+module_init(import_ion_initialize_module);
+module_exit(import_ion_cleanup_module);
+
+/* And some module information */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/imports/sconscript b/bifrost/r9p0/kernel/drivers/base/ump/src/imports/sconscript
new file mode 100644
index 0000000..8f50683
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/imports/sconscript
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os, sys
+Import('env')
+
+import_modules = [ os.path.join( path, 'sconscript' ) for path in sorted(os.listdir( os.getcwd() )) ]
+
+for m in import_modules:
+	if os.path.exists(m):
+		SConscript( m, variant_dir=os.path.join( env['BUILD_DIR_PATH'], os.path.dirname(m) ), duplicate=0 )
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c b/bifrost/r9p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
new file mode 100644
index 0000000..0ec4ba6
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
@@ -0,0 +1,829 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump-ioctl.h>
+#include <linux/ump.h>
+
+#include <asm/uaccess.h>	         /* copy_*_user */
+#include <linux/compat.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/device.h>            /* class registration support */
+#include <linux/uaccess.h>
+
+#include <common/ump_kernel_core.h>
+
+#include "ump_kernel_linux_mem.h"
+#include <ump_arch.h>
+
+
+struct ump_linux_device
+{
+	struct cdev cdev;
+	struct class * ump_class;
+};
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+#define UMP_REV_STRING "1.0"
+
+char * ump_revision = UMP_REV_STRING;
+module_param(ump_revision, charp, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_revision, "Revision info");
+
+static int umpp_linux_open(struct inode *inode, struct file *filp);
+static int umpp_linux_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+/* This variable defines the file operations this UMP device driver offers */
+static struct file_operations ump_fops =
+{
+	.owner   = THIS_MODULE,
+	.open    = umpp_linux_open,
+	.release = umpp_linux_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = umpp_linux_ioctl,
+#else
+	.ioctl   = umpp_linux_ioctl,
+#endif
+	.compat_ioctl = umpp_linux_ioctl,
+	.mmap = umpp_linux_mmap
+};
+
+/* import module handling */
+DEFINE_MUTEX(import_list_lock);
+struct ump_import_handler *  import_handlers[UMPP_EXTERNAL_MEM_COUNT];
+
+/* The global variable containing the global device data */
+static struct ump_linux_device ump_linux_device;
+
+#define DBG_MSG(level, ...) do { \
+if ((level) <=  ump_debug_level)\
+{\
+printk(KERN_DEBUG "UMP<" #level ">:\n" __VA_ARGS__);\
+} \
+} while (0)
+
+#define MSG_ERR(...) do{ \
+printk(KERN_ERR "UMP: ERR: %s\n           %s()%4d\n", __FILE__, __func__  , __LINE__) ; \
+printk(KERN_ERR __VA_ARGS__); \
+printk(KERN_ERR "\n"); \
+} while(0)
+
+#define MSG(...) do{ \
+printk(KERN_INFO "UMP: " __VA_ARGS__);\
+} while (0)
+
+/*
+ * This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int __init umpp_linux_initialize_module(void)
+{
+	ump_result err;
+
+	err = umpp_core_constructor();
+	if (UMP_OK != err)
+	{
+		MSG_ERR("UMP device driver init failed\n");
+		return -ENOTTY;
+	}
+
+	MSG("UMP device driver %s loaded\n", UMP_REV_STRING);
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void __exit umpp_linux_cleanup_module(void)
+{
+	DBG_MSG(2, "Unloading UMP device driver\n");
+	umpp_core_destructor();
+	DBG_MSG(2, "Module unloaded\n");
+}
+
+
+
+/*
+ * Initialize the UMP device driver.
+ */
+ump_result umpp_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+
+	if (0 == ump_major)
+	{
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	}
+	else
+	{
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err)
+	{
+		memset(&ump_linux_device, 0, sizeof(ump_linux_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_linux_device.cdev, &ump_fops);
+		ump_linux_device.cdev.owner = THIS_MODULE;
+		ump_linux_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_linux_device.cdev, dev, 1/*count*/);
+		if (0 == err)
+		{
+
+			ump_linux_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_linux_device.ump_class))
+			{
+				err = PTR_ERR(ump_linux_device.ump_class);
+			}
+			else
+			{
+				struct device * mdev;
+				mdev = device_create(ump_linux_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return UMP_OK;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(ump_linux_device.ump_class);
+			}
+			cdev_del(&ump_linux_device.cdev);
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return UMP_ERROR;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void umpp_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+	device_destroy(ump_linux_device.ump_class, dev);
+	class_destroy(ump_linux_device.ump_class);
+
+	/* unregister char device */
+	cdev_del(&ump_linux_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+}
+
+
+static int umpp_linux_open(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = umpp_core_session_start();
+	if (NULL == session)
+	{
+		return -EFAULT;
+	}
+	
+	filp->private_data = session;
+
+	return 0;
+}
+
+static int umpp_linux_release(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = filp->private_data;
+
+	umpp_core_session_end(session);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/**************************/
+/*ioctl specific functions*/
+/**************************/
+static int do_ump_dd_allocate(umpp_session * session, ump_k_allocate * params)
+{
+	ump_dd_handle new_allocation;
+	new_allocation = ump_dd_allocate_64(params->size, params->alloc_flags, NULL, NULL, NULL);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	printk(KERN_WARNING "UMP: Allocation FAILED\n");
+	return -ENOMEM;
+}
+
+static int do_ump_dd_retain(umpp_session * session, ump_k_retain * params)
+{
+	umpp_session_memory_usage * it;
+
+	mutex_lock(&session->session_lock);
+
+	/* try to find it on the session usage list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found to already be in use */
+			/* check for overflow */
+			while(1)
+			{
+				int refcnt = atomic_read(&it->process_usage_count);
+				if (refcnt + 1 > 0)
+				{
+					/* add a process local ref */
+					if(atomic_cmpxchg(&it->process_usage_count, refcnt, refcnt + 1) == refcnt)
+					{
+						mutex_unlock(&session->session_lock);
+						return 0;
+					}
+				}
+				else
+				{
+					/* maximum usage cap reached */
+					mutex_unlock(&session->session_lock);
+					return -EBUSY;
+				}
+			}
+		}
+	}
+	/* try to look it up globally */
+
+	it = kmalloc(sizeof(*it), GFP_KERNEL);
+
+	if (NULL != it)
+	{
+		it->mem = ump_dd_from_secure_id(params->secure_id);
+		if (UMP_DD_INVALID_MEMORY_HANDLE != it->mem)
+		{
+			/* found, add it to the session usage list */
+			it->id = params->secure_id;
+			atomic_set(&it->process_usage_count, 1);
+			list_add(&it->link, &session->memory_usage);
+		}
+		else
+		{
+			/* not found */
+			kfree(it);
+			it = NULL;
+		}
+	}
+
+	mutex_unlock(&session->session_lock);
+
+	return (NULL != it) ? 0 : -ENODEV;
+}
+
+
+static int do_ump_dd_release(umpp_session * session, ump_k_release * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only do a release if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			result = 0;
+
+			if (0 == atomic_sub_return(1, &it->process_usage_count))
+			{
+				/* last ref in this process remove from the usage list and remove the underlying ref */
+				list_del(&it->link);
+				ump_dd_release(it->mem);
+				kfree(it);
+			}
+
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_sizequery(umpp_session * session, ump_k_sizequery * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->size = ump_dd_size_get_64(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_allocation_flags_get(umpp_session * session, ump_k_allocation_flags * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->alloc_flags = ump_dd_allocation_flags_get(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_msync_now(umpp_session * session, ump_k_msync * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, do the cache op */
+#ifdef CONFIG_COMPAT
+			if (is_compat_task())
+			{
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, compat_ptr(params->mapped_ptr.compat_value), params->size);
+				result = 0;
+			}
+			else
+			{
+#endif
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, params->mapped_ptr.value, params->size);
+				result = 0;
+#ifdef CONFIG_COMPAT
+			}
+#endif
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+
+void umpp_import_handlers_init(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		if (import_handlers[i])
+		{
+			import_handlers[i]->session_begin(&session->import_handler_data[i]);
+			/* It is OK if session_begin returned an error.
+			 * We won't do any import calls if so */
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+void umpp_import_handlers_term(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		/* only call if session_begin succeeded */
+		if (session->import_handler_data[i] != NULL)
+		{
+			/* if session_beging succeeded the handler
+			 * should not have unregistered with us */
+			BUG_ON(!import_handlers[i]);
+			import_handlers[i]->session_end(session->import_handler_data[i]);
+			session->import_handler_data[i] = NULL;
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler)
+{
+	int res = -EEXIST;
+
+	/* validate input */
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+	BUG_ON(!handler);
+	BUG_ON(!handler->linux_module);
+	BUG_ON(!handler->session_begin);
+	BUG_ON(!handler->session_end);
+	BUG_ON(!handler->import);
+
+	mutex_lock(&import_list_lock);
+
+	if (!import_handlers[type])
+	{
+		import_handlers[type] = handler;
+		res = 0;
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return res;
+}
+
+void ump_import_module_unregister(enum ump_external_memory_type type)
+{
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+
+	mutex_lock(&import_list_lock);
+	/* an error to call this if ump_import_module_register didn't succeed */
+	BUG_ON(!import_handlers[type]);
+	import_handlers[type] = NULL;
+	mutex_unlock(&import_list_lock);
+}
+
+static struct ump_import_handler * import_handler_get(unsigned int type_id)
+{
+	enum ump_external_memory_type type;
+	struct ump_import_handler * handler;
+
+	/* validate and convert input */
+	/* handle bad data here, not just BUG_ON */
+	if (type_id == 0 || type_id >= UMPP_EXTERNAL_MEM_COUNT)
+		return NULL;
+
+	type = (enum ump_external_memory_type)type_id;
+
+	/* find the handler */
+	mutex_lock(&import_list_lock);
+
+	handler = import_handlers[type];
+
+	if (handler)
+	{
+		if (!try_module_get(handler->linux_module))
+		{
+			handler = NULL;
+		}
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return handler;
+}
+
+static void import_handler_put(struct ump_import_handler * handler)
+{
+	module_put(handler->linux_module);
+}
+
+static int do_ump_dd_import(umpp_session * session, ump_k_import * params)
+{
+	ump_dd_handle new_allocation = UMP_DD_INVALID_MEMORY_HANDLE;
+	struct ump_import_handler * handler;
+
+	handler = import_handler_get(params->type);
+
+	if (handler)
+	{
+		/* try late binding if not already bound */
+		if (!session->import_handler_data[params->type])
+		{
+			handler->session_begin(&session->import_handler_data[params->type]);
+		}
+
+		/* do we have a bound session? */
+		if (session->import_handler_data[params->type])
+		{
+			new_allocation = handler->import( session->import_handler_data[params->type],
+		                                      params->phandle.value,
+		                                      params->alloc_flags);
+		}
+
+		/* done with the handler */
+		import_handler_put(handler);
+	}
+
+	/* did the import succeed? */
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	return -ENOMEM;
+
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int ret;
+	uint64_t msg[(UMP_CALL_MAX_SIZE+7)>>3]; /* alignment fixup */
+	uint32_t size = _IOC_SIZE(cmd);
+	struct umpp_session *session = filp->private_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* unused arg */
+#endif
+
+	/*
+	 * extract the type and number bitfields, and don't decode
+	 * wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
+	 */
+	if (_IOC_TYPE(cmd) != UMP_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if (_IOC_NR(cmd) > UMP_IOC_MAXNR)
+	{
+		return -ENOTTY;
+	}
+
+	switch(cmd)
+	{
+		case UMP_FUNC_ALLOCATE:
+			if (size != sizeof(ump_k_allocate))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocate(session, (ump_k_allocate *)&msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_SIZEQUERY:
+			if (size != sizeof(ump_k_sizequery))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_sizequery(session,(ump_k_sizequery*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_MSYNC:
+			if (size != sizeof(ump_k_msync))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_msync_now(session,(ump_k_msync*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_IMPORT:
+			if (size != sizeof(ump_k_import))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user*)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_import(session, (ump_k_import*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		/* used only by v1 API */
+		case UMP_FUNC_ALLOCATION_FLAGS_GET:
+			if (size != sizeof(ump_k_allocation_flags))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocation_flags_get(session,(ump_k_allocation_flags*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_RETAIN:
+			if (size != sizeof(ump_k_retain))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_retain(session,(ump_k_retain*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		case UMP_FUNC_RELEASE:
+			if (size != sizeof(ump_k_release))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_release(session,(ump_k_release*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		default:
+			/* not ours */
+			return -ENOTTY;
+	}
+	/*redundant below*/
+	return -ENOTTY;
+}
+
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_allocate_64);
+EXPORT_SYMBOL(ump_dd_allocation_flags_get);
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get_64);
+EXPORT_SYMBOL(ump_dd_size_get_64);
+EXPORT_SYMBOL(ump_dd_retain);
+EXPORT_SYMBOL(ump_dd_release);
+EXPORT_SYMBOL(ump_dd_create_from_phys_blocks_64);
+
+/* import API */
+EXPORT_SYMBOL(ump_import_module_register);
+EXPORT_SYMBOL(ump_import_module_unregister);
+
+
+
+/* V1 API */
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+
+/* Setup init and exit functions for this module */
+module_init(umpp_linux_initialize_module);
+module_exit(umpp_linux_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(UMP_REV_STRING);
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c b/bifrost/r9p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
new file mode 100644
index 0000000..43e28ee
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
@@ -0,0 +1,258 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+#include <linux/version.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h> /* memory mananger definitions */
+#include <linux/pfn.h>
+#include <linux/highmem.h> /*kmap*/
+
+#include <linux/compat.h> /* is_compat_task */
+
+#include <common/ump_kernel_core.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)
+#define phys_to_pfn_t(phys, flags) (phys >> PAGE_SHIFT)
+#else
+#include <linux/pfn_t.h>
+#endif
+
+static void umpp_vm_close(struct vm_area_struct *vma)
+{
+	umpp_cpu_mapping * mapping;
+	umpp_session * session;
+	ump_dd_handle handle;
+
+	mapping = (umpp_cpu_mapping*)vma->vm_private_data;
+	UMP_ASSERT(mapping);
+	
+	session = mapping->session;
+	handle = mapping->handle;
+
+	umpp_dd_remove_cpu_mapping(mapping->handle, mapping); /* will free the mapping object */
+	ump_dd_release(handle);
+}
+
+
+static const struct vm_operations_struct umpp_vm_ops = {
+	.close = umpp_vm_close
+};
+
+int umpp_phys_commit(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	/* round up to a page boundary */
+	alloc->size = (alloc->size + PAGE_SIZE - 1) & ~((uint64_t)PAGE_SIZE-1) ;
+	/* calculate number of pages */
+	alloc->blocksCount = alloc->size >> PAGE_SHIFT;
+
+	if( (sizeof(ump_dd_physical_block_64) * alloc->blocksCount) > ((size_t)-1))
+	{
+		printk(KERN_WARNING "UMP: umpp_phys_commit - trying to allocate more than possible\n");
+		return -ENOMEM;
+	}
+
+	alloc->block_array = kmalloc(sizeof(ump_dd_physical_block_64) * alloc->blocksCount, __GFP_HARDWALL | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (NULL == alloc->block_array)
+	{
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		void * mp;
+		struct page * page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD);
+		if (NULL == page)
+		{
+			break;
+		}
+
+		alloc->block_array[i].addr = PFN_PHYS(page_to_pfn(page));
+		alloc->block_array[i].size = PAGE_SIZE;
+
+		mp = kmap(page);
+		if (NULL == mp)
+		{
+			__free_page(page);
+			break;
+		}
+
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		ump_sync_to_memory(PFN_PHYS(page_to_pfn(page)), mp, PAGE_SIZE);
+		kunmap(page);
+	}
+
+	if (i == alloc->blocksCount)
+	{
+		return 0;
+	}
+	else
+	{
+		uint64_t j;
+		for (j = 0; j < i; j++)
+		{
+			struct page * page;
+			page = pfn_to_page(alloc->block_array[j].addr >> PAGE_SHIFT);
+			__free_page(page);
+		}
+		
+		kfree(alloc->block_array);
+
+		return -ENOMEM;
+	}
+}
+
+void umpp_phys_free(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		__free_page(pfn_to_page(alloc->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	kfree(alloc->block_array);
+}
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma)
+{
+	ump_secure_id id;
+	ump_dd_handle h;
+	size_t offset;
+	int err = -EINVAL;
+	size_t length = vma->vm_end - vma->vm_start;
+
+	umpp_cpu_mapping * map = NULL;
+	umpp_session *session = filp->private_data;
+
+	if ( 0 == length )
+	{
+		return -EINVAL;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (NULL == map)
+	{
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* unpack our arg */
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	if (is_compat_task())
+	{
+#endif
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_32;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_32;
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	}
+	else
+	{
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_64;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_64;
+	}
+#endif
+
+	h = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE != h)
+	{
+		uint64_t i;
+		uint64_t block_idx;
+		uint64_t block_offset;
+		uint64_t paddr;
+		umpp_allocation * alloc;
+		uint64_t last_byte;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO | VM_MIXEDMAP | VM_DONTDUMP;
+#else
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO | VM_MIXEDMAP;
+#endif
+		vma->vm_ops = &umpp_vm_ops;
+		vma->vm_private_data = map;
+
+		alloc = (umpp_allocation*)h;
+
+		if( (alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+		{
+			/* cache disabled flag set, disable caching for cpu mappings */
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		}
+
+		last_byte = length + (offset << PAGE_SHIFT) - 1;
+		if (last_byte >= alloc->size || last_byte < (offset << PAGE_SHIFT))
+		{
+			goto err_out;
+		}
+
+		if (umpp_dd_find_start_block(alloc, offset << PAGE_SHIFT, &block_idx, &block_offset))
+		{
+			goto err_out;
+		}
+
+		paddr = alloc->block_array[block_idx].addr + block_offset;
+
+		for (i = 0; i < (length >> PAGE_SHIFT); i++)
+		{
+			/* check if we've overrrun the current block, if so move to the next block */
+			if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+			{
+				block_idx++;
+				UMP_ASSERT(block_idx < alloc->blocksCount);
+				paddr = alloc->block_array[block_idx].addr;
+			}
+
+			err = vm_insert_mixed(vma,
+					vma->vm_start + (i << PAGE_SHIFT),
+					phys_to_pfn_t(paddr, 0));
+			paddr += PAGE_SIZE;
+		}
+
+		map->vaddr_start = (void*)vma->vm_start;
+		map->nr_pages = length >> PAGE_SHIFT;
+		map->page_off = offset;
+		map->handle = h;
+		map->session = session;
+
+		umpp_dd_add_cpu_mapping(h, map);
+
+		return 0;
+
+		err_out:
+
+		ump_dd_release(h);
+	}
+
+	kfree(map);
+
+out:
+
+	return err;
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h b/bifrost/r9p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
new file mode 100644
index 0000000..4fbf3b2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_LINUX_MEM_H_
+#define _UMP_KERNEL_LINUX_MEM_H_
+
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma);
+
+#endif /* _UMP_KERNEL_LINUX_MEM_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/sconscript b/bifrost/r9p0/kernel/drivers/base/ump/src/sconscript
new file mode 100644
index 0000000..8fba351
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/sconscript
@@ -0,0 +1,38 @@
+#
+# (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ump = env.Clone()
+
+# Source files required for UMP.
+ump_src = [
+    Glob('common/*.c'),
+    Glob('imports/*/*.c'),
+    Glob('linux/*.c'),
+]
+
+make_args = env_ump.kernel_get_config_defines(ret_list = True) + [
+    'PLATFORM=%s' % env_ump['platform'],
+    'MALI_UNIT_TEST=%s' % env_ump['unit'],
+]
+
+mod = env_ump.BuildKernelModule('$STATIC_LIB_PATH/ump.ko', ump_src,
+                                make_args = make_args)
+env_ump.KernelObjTarget('ump', mod)
+
+SConscript( 'imports/sconscript' )
+
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/src/ump_arch.h b/bifrost/r9p0/kernel/drivers/base/ump/src/ump_arch.h
new file mode 100644
index 0000000..2303d56
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/src/ump_arch.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_ARCH_H_
+#define _UMP_ARCH_H_
+
+#include <common/ump_kernel_core.h>
+
+/**
+ * Device specific setup.
+ * Called by the UMP core code to to host OS/device specific setup.
+ * Typical use case is device node creation for talking to user space.
+ * @return UMP_OK on success, any other value on failure
+ */
+extern ump_result umpp_device_initialize(void);
+
+/**
+ * Device specific teardown.
+ * Undo any things done by ump_device_initialize.
+ */
+extern void umpp_device_terminate(void);
+
+extern int umpp_phys_commit(umpp_allocation * alloc);
+extern void umpp_phys_free(umpp_allocation * alloc);
+
+#endif /* _UMP_ARCH_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/base/ump/ump_ref_drv.h b/bifrost/r9p0/kernel/drivers/base/ump/ump_ref_drv.h
new file mode 100644
index 0000000..9a265fe
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/base/ump/ump_ref_drv.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_ref_drv.h
+ *
+ * This file contains the link to user space part of the UMP API for usage by MALI 400 gralloc.
+ *
+ */
+
+#ifndef _UMP_REF_DRV_H_
+#define _UMP_REF_DRV_H_
+
+#include <ump/ump.h>
+
+
+#endif /* _UMP_REF_DRV_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/Kbuild
new file mode 100644
index 0000000..19c7e9a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/Kconfig b/bifrost/r9p0/kernel/drivers/gpu/arm/Kconfig
new file mode 100644
index 0000000..1f30eb5
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/Kconfig
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_context.o.d b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_context.o.d
new file mode 100644
index 0000000..4a8bd2c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_context.o.d
@@ -0,0 +1,271 @@
+mali_kbase_context.o: \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c \
+ include/linux/kconfig.h include/generated/autoconf.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h \
+ include/generated/uapi/linux/version.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ include/linux/bug.h arch/arm64/include/asm/bug.h \
+ arch/arm64/include/asm/brk-imm.h include/asm-generic/bug.h \
+ include/linux/compiler.h include/linux/compiler-gcc.h \
+ include/uapi/linux/types.h arch/arm64/include/generated/asm/types.h \
+ include/uapi/asm-generic/types.h include/asm-generic/int-ll64.h \
+ include/uapi/asm-generic/int-ll64.h \
+ arch/arm64/include/uapi/asm/bitsperlong.h \
+ include/asm-generic/bitsperlong.h include/uapi/asm-generic/bitsperlong.h \
+ include/uapi/linux/posix_types.h include/linux/stddef.h \
+ include/uapi/linux/stddef.h arch/arm64/include/uapi/asm/posix_types.h \
+ include/uapi/asm-generic/posix_types.h include/linux/kernel.h \
+ /opt/gcc-linaro-aarch64-linux-gnu-4.9-2014.09_linux/lib/gcc/aarch64-linux-gnu/4.9.2/include/stdarg.h \
+ include/linux/linkage.h include/linux/stringify.h include/linux/export.h \
+ arch/arm64/include/asm/linkage.h include/linux/types.h \
+ include/linux/bitops.h arch/arm64/include/asm/bitops.h \
+ arch/arm64/include/asm/barrier.h include/asm-generic/barrier.h \
+ include/asm-generic/bitops/builtin-__ffs.h \
+ include/asm-generic/bitops/builtin-ffs.h \
+ include/asm-generic/bitops/builtin-__fls.h \
+ include/asm-generic/bitops/builtin-fls.h \
+ include/asm-generic/bitops/ffz.h include/asm-generic/bitops/fls64.h \
+ include/asm-generic/bitops/find.h include/asm-generic/bitops/sched.h \
+ include/asm-generic/bitops/hweight.h \
+ include/asm-generic/bitops/arch_hweight.h \
+ include/asm-generic/bitops/const_hweight.h \
+ include/asm-generic/bitops/lock.h \
+ include/asm-generic/bitops/non-atomic.h include/asm-generic/bitops/le.h \
+ arch/arm64/include/uapi/asm/byteorder.h \
+ include/linux/byteorder/little_endian.h \
+ include/uapi/linux/byteorder/little_endian.h include/linux/swab.h \
+ include/uapi/linux/swab.h arch/arm64/include/generated/asm/swab.h \
+ include/uapi/asm-generic/swab.h include/linux/byteorder/generic.h \
+ include/linux/log2.h include/linux/typecheck.h include/linux/printk.h \
+ include/linux/init.h include/linux/kern_levels.h include/linux/cache.h \
+ include/uapi/linux/kernel.h include/uapi/linux/sysinfo.h \
+ arch/arm64/include/asm/cache.h arch/arm64/include/asm/cachetype.h \
+ arch/arm64/include/asm/cputype.h arch/arm64/include/asm/sysreg.h \
+ arch/arm64/include/asm/opcodes.h \
+ arch/arm64/include/../../arm/include/asm/opcodes.h \
+ arch/arm64/include/asm/page.h include/uapi/linux/const.h \
+ include/linux/personality.h include/uapi/linux/personality.h \
+ arch/arm64/include/asm/pgtable-types.h \
+ include/asm-generic/pgtable-nopud.h arch/arm64/include/asm/memory.h \
+ arch/arm64/include/generated/asm/sizes.h include/asm-generic/sizes.h \
+ include/linux/sizes.h include/linux/mmdebug.h \
+ include/asm-generic/memory_model.h include/linux/pfn.h \
+ include/asm-generic/getorder.h include/linux/atomic.h \
+ arch/arm64/include/asm/atomic.h arch/arm64/include/asm/lse.h \
+ arch/arm64/include/asm/atomic_ll_sc.h arch/arm64/include/asm/cmpxchg.h \
+ include/asm-generic/atomic-long.h include/linux/highmem.h \
+ include/linux/fs.h include/linux/wait.h include/linux/list.h \
+ include/linux/poison.h include/linux/spinlock.h include/linux/preempt.h \
+ arch/arm64/include/generated/asm/preempt.h include/asm-generic/preempt.h \
+ include/linux/thread_info.h arch/arm64/include/asm/thread_info.h \
+ include/linux/irqflags.h arch/arm64/include/asm/irqflags.h \
+ arch/arm64/include/asm/ptrace.h arch/arm64/include/uapi/asm/ptrace.h \
+ arch/arm64/include/asm/hwcap.h arch/arm64/include/uapi/asm/hwcap.h \
+ include/asm-generic/ptrace.h include/linux/bottom_half.h \
+ include/linux/spinlock_types.h arch/arm64/include/asm/spinlock_types.h \
+ include/linux/lockdep.h include/linux/rwlock_types.h \
+ arch/arm64/include/asm/spinlock.h arch/arm64/include/asm/processor.h \
+ include/linux/string.h include/uapi/linux/string.h \
+ arch/arm64/include/asm/string.h arch/arm64/include/asm/alternative.h \
+ arch/arm64/include/asm/cpucaps.h arch/arm64/include/asm/insn.h \
+ arch/arm64/include/asm/fpsimd.h arch/arm64/include/asm/hw_breakpoint.h \
+ arch/arm64/include/asm/cpufeature.h include/linux/jump_label.h \
+ arch/arm64/include/asm/jump_label.h arch/arm64/include/asm/virt.h \
+ arch/arm64/include/asm/sections.h include/asm-generic/sections.h \
+ arch/arm64/include/asm/pgtable-hwdef.h include/linux/rwlock.h \
+ include/linux/spinlock_api_smp.h include/linux/rwlock_api_smp.h \
+ arch/arm64/include/generated/asm/current.h include/asm-generic/current.h \
+ include/uapi/linux/wait.h include/linux/kdev_t.h \
+ include/uapi/linux/kdev_t.h include/linux/dcache.h \
+ include/linux/rculist.h include/linux/rcupdate.h include/linux/threads.h \
+ include/linux/cpumask.h include/linux/bitmap.h include/linux/seqlock.h \
+ include/linux/completion.h include/linux/debugobjects.h \
+ include/linux/ktime.h include/linux/time.h include/linux/math64.h \
+ arch/arm64/include/generated/asm/div64.h include/asm-generic/div64.h \
+ include/linux/time64.h include/uapi/linux/time.h include/linux/jiffies.h \
+ include/linux/timex.h include/uapi/linux/timex.h \
+ include/uapi/linux/param.h arch/arm64/include/uapi/asm/param.h \
+ include/asm-generic/param.h include/uapi/asm-generic/param.h \
+ arch/arm64/include/asm/timex.h arch/arm64/include/asm/arch_timer.h \
+ include/clocksource/arm_arch_timer.h include/linux/timecounter.h \
+ include/asm-generic/timex.h include/generated/timeconst.h \
+ include/linux/timekeeping.h include/linux/errno.h \
+ include/uapi/linux/errno.h arch/arm64/include/generated/asm/errno.h \
+ include/uapi/asm-generic/errno.h include/uapi/asm-generic/errno-base.h \
+ include/linux/rcutree.h include/linux/rculist_bl.h \
+ include/linux/list_bl.h include/linux/bit_spinlock.h \
+ include/linux/lockref.h include/generated/bounds.h \
+ include/linux/stringhash.h include/linux/hash.h include/linux/path.h \
+ include/linux/stat.h arch/arm64/include/asm/stat.h \
+ arch/arm64/include/uapi/asm/stat.h include/uapi/asm-generic/stat.h \
+ arch/arm64/include/asm/compat.h include/linux/sched.h \
+ include/uapi/linux/sched.h include/linux/sched/prio.h \
+ include/linux/capability.h include/uapi/linux/capability.h \
+ include/linux/plist.h include/linux/rbtree.h include/linux/nodemask.h \
+ include/linux/numa.h include/linux/mm_types.h include/linux/auxvec.h \
+ include/uapi/linux/auxvec.h arch/arm64/include/uapi/asm/auxvec.h \
+ include/linux/rwsem.h include/linux/err.h include/linux/osq_lock.h \
+ arch/arm64/include/generated/asm/rwsem.h include/asm-generic/rwsem.h \
+ include/linux/uprobes.h include/linux/page-flags-layout.h \
+ arch/arm64/include/asm/sparsemem.h include/linux/workqueue.h \
+ include/linux/timer.h include/linux/sysctl.h include/linux/uidgid.h \
+ include/linux/highuid.h include/uapi/linux/sysctl.h \
+ arch/arm64/include/asm/mmu.h include/linux/cputime.h \
+ arch/arm64/include/generated/asm/cputime.h include/asm-generic/cputime.h \
+ include/asm-generic/cputime_jiffies.h include/linux/smp.h \
+ include/linux/llist.h arch/arm64/include/asm/smp.h include/linux/sem.h \
+ include/uapi/linux/sem.h include/linux/ipc.h include/uapi/linux/ipc.h \
+ arch/arm64/include/generated/asm/ipcbuf.h \
+ include/uapi/asm-generic/ipcbuf.h \
+ arch/arm64/include/generated/asm/sembuf.h \
+ include/uapi/asm-generic/sembuf.h include/linux/shm.h \
+ include/uapi/linux/shm.h arch/arm64/include/generated/asm/shmbuf.h \
+ include/uapi/asm-generic/shmbuf.h arch/arm64/include/asm/shmparam.h \
+ include/uapi/asm-generic/shmparam.h include/linux/signal.h \
+ include/uapi/linux/signal.h arch/arm64/include/uapi/asm/signal.h \
+ include/asm-generic/signal.h include/uapi/asm-generic/signal.h \
+ include/uapi/asm-generic/signal-defs.h \
+ arch/arm64/include/uapi/asm/sigcontext.h \
+ arch/arm64/include/uapi/asm/siginfo.h include/asm-generic/siginfo.h \
+ include/uapi/asm-generic/siginfo.h include/linux/pid.h \
+ include/linux/percpu.h arch/arm64/include/asm/percpu.h \
+ include/asm-generic/percpu.h include/linux/percpu-defs.h \
+ include/linux/topology.h include/linux/mmzone.h \
+ include/linux/pageblock-flags.h include/linux/memory_hotplug.h \
+ include/linux/notifier.h include/linux/mutex.h include/linux/srcu.h \
+ arch/arm64/include/asm/topology.h include/asm-generic/topology.h \
+ include/linux/seccomp.h include/uapi/linux/seccomp.h \
+ arch/arm64/include/asm/seccomp.h arch/arm64/include/asm/unistd.h \
+ arch/arm64/include/uapi/asm/unistd.h include/asm-generic/unistd.h \
+ include/uapi/asm-generic/unistd.h include/asm-generic/seccomp.h \
+ include/uapi/linux/unistd.h include/linux/rtmutex.h \
+ include/linux/resource.h include/uapi/linux/resource.h \
+ arch/arm64/include/generated/asm/resource.h \
+ include/asm-generic/resource.h include/uapi/asm-generic/resource.h \
+ include/linux/hrtimer.h include/linux/timerqueue.h include/linux/kcov.h \
+ include/uapi/linux/kcov.h include/linux/task_io_accounting.h \
+ include/linux/latencytop.h include/linux/cred.h include/linux/key.h \
+ include/linux/assoc_array.h include/linux/selinux.h include/linux/gfp.h \
+ include/uapi/linux/magic.h include/linux/cgroup-defs.h \
+ include/uapi/linux/limits.h include/linux/idr.h \
+ include/linux/percpu-refcount.h include/linux/percpu-rwsem.h \
+ include/linux/rcu_sync.h include/linux/bpf-cgroup.h \
+ include/uapi/linux/bpf.h include/uapi/linux/bpf_common.h \
+ include/linux/cgroup_subsys.h include/uapi/linux/stat.h \
+ include/linux/list_lru.h include/linux/shrinker.h \
+ include/linux/radix-tree.h include/linux/semaphore.h \
+ include/uapi/linux/fiemap.h include/linux/migrate_mode.h \
+ include/linux/blk_types.h include/linux/bvec.h \
+ include/linux/delayed_call.h include/uapi/linux/fs.h \
+ include/uapi/linux/ioctl.h arch/arm64/include/generated/asm/ioctl.h \
+ include/asm-generic/ioctl.h include/uapi/asm-generic/ioctl.h \
+ include/linux/quota.h include/linux/percpu_counter.h \
+ include/uapi/linux/dqblk_xfs.h include/linux/dqblk_v1.h \
+ include/linux/dqblk_v2.h include/linux/dqblk_qtree.h \
+ include/linux/projid.h include/uapi/linux/quota.h \
+ include/linux/nfs_fs_i.h include/linux/fcntl.h \
+ include/uapi/linux/fcntl.h arch/arm64/include/uapi/asm/fcntl.h \
+ include/uapi/asm-generic/fcntl.h include/linux/mm.h \
+ include/linux/debug_locks.h include/linux/range.h \
+ include/linux/page_ext.h include/linux/stacktrace.h \
+ include/linux/stackdepot.h include/linux/page_ref.h \
+ include/linux/page-flags.h include/linux/tracepoint-defs.h \
+ include/linux/static_key.h arch/arm64/include/asm/pgtable.h \
+ arch/arm64/include/asm/proc-fns.h arch/arm64/include/asm/pgtable-prot.h \
+ arch/arm64/include/asm/fixmap.h arch/arm64/include/asm/boot.h \
+ include/asm-generic/fixmap.h include/asm-generic/pgtable.h \
+ include/linux/huge_mm.h include/linux/vmstat.h \
+ include/linux/vm_event_item.h include/linux/uaccess.h \
+ arch/arm64/include/asm/uaccess.h arch/arm64/include/asm/kernel-pgtable.h \
+ include/linux/kasan-checks.h arch/arm64/include/asm/compiler.h \
+ include/linux/hardirq.h include/linux/ftrace_irq.h include/linux/vtime.h \
+ include/linux/context_tracking_state.h arch/arm64/include/asm/hardirq.h \
+ arch/arm64/include/asm/irq.h include/asm-generic/irq.h \
+ include/linux/irq_cpustat.h arch/arm64/include/asm/cacheflush.h \
+ arch/arm64/include/generated/asm/kmap_types.h \
+ include/asm-generic/kmap_types.h include/linux/slab.h \
+ include/linux/kmemleak.h include/linux/kasan.h include/linux/vmalloc.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h \
+ include/linux/platform_device.h include/linux/device.h \
+ include/linux/ioport.h include/linux/kobject.h include/linux/sysfs.h \
+ include/linux/kernfs.h include/linux/kobject_ns.h include/linux/kref.h \
+ include/linux/klist.h include/linux/pinctrl/devinfo.h \
+ include/linux/pinctrl/consumer.h include/linux/seq_file.h \
+ include/linux/pinctrl/pinctrl-state.h include/linux/pm.h \
+ include/linux/ratelimit.h arch/arm64/include/asm/device.h \
+ include/linux/pm_wakeup.h include/linux/mod_devicetable.h \
+ include/linux/uuid.h include/uapi/linux/uuid.h \
+ include/linux/miscdevice.h include/uapi/linux/major.h \
+ include/linux/module.h include/linux/kmod.h include/linux/elf.h \
+ arch/arm64/include/asm/elf.h arch/arm64/include/generated/asm/user.h \
+ include/asm-generic/user.h include/uapi/linux/elf.h \
+ include/uapi/linux/elf-em.h include/linux/moduleparam.h \
+ include/linux/extable.h include/linux/rbtree_latch.h \
+ arch/arm64/include/asm/module.h include/asm-generic/module.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ include/linux/dma-mapping.h include/linux/dma-debug.h \
+ include/linux/dma-direction.h include/linux/scatterlist.h \
+ arch/arm64/include/asm/io.h \
+ arch/arm64/include/generated/asm/early_ioremap.h \
+ include/asm-generic/early_ioremap.h include/xen/xen.h \
+ include/asm-generic/io.h include/asm-generic/pci_iomap.h \
+ include/linux/kmemcheck.h arch/arm64/include/asm/dma-mapping.h \
+ arch/arm64/include/asm/xen/hypervisor.h \
+ arch/arm64/include/../../arm/include/asm/xen/hypervisor.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h \
+ include/linux/mempool.h include/linux/file.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h \
+ include/linux/fence.h include/linux/debugfs.h include/linux/clk.h \
+ include/linux/regulator/consumer.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_core_linux.o.d b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_core_linux.o.d
new file mode 100644
index 0000000..a0d5b9b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_core_linux.o.d
@@ -0,0 +1,324 @@
+mali_kbase_core_linux.o: \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c \
+ include/linux/kconfig.h include/generated/autoconf.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h \
+ include/generated/uapi/linux/version.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ include/linux/bug.h arch/arm64/include/asm/bug.h \
+ arch/arm64/include/asm/brk-imm.h include/asm-generic/bug.h \
+ include/linux/compiler.h include/linux/compiler-gcc.h \
+ include/uapi/linux/types.h arch/arm64/include/generated/asm/types.h \
+ include/uapi/asm-generic/types.h include/asm-generic/int-ll64.h \
+ include/uapi/asm-generic/int-ll64.h \
+ arch/arm64/include/uapi/asm/bitsperlong.h \
+ include/asm-generic/bitsperlong.h include/uapi/asm-generic/bitsperlong.h \
+ include/uapi/linux/posix_types.h include/linux/stddef.h \
+ include/uapi/linux/stddef.h arch/arm64/include/uapi/asm/posix_types.h \
+ include/uapi/asm-generic/posix_types.h include/linux/kernel.h \
+ /opt/gcc-linaro-aarch64-linux-gnu-4.9-2014.09_linux/lib/gcc/aarch64-linux-gnu/4.9.2/include/stdarg.h \
+ include/linux/linkage.h include/linux/stringify.h include/linux/export.h \
+ arch/arm64/include/asm/linkage.h include/linux/types.h \
+ include/linux/bitops.h arch/arm64/include/asm/bitops.h \
+ arch/arm64/include/asm/barrier.h include/asm-generic/barrier.h \
+ include/asm-generic/bitops/builtin-__ffs.h \
+ include/asm-generic/bitops/builtin-ffs.h \
+ include/asm-generic/bitops/builtin-__fls.h \
+ include/asm-generic/bitops/builtin-fls.h \
+ include/asm-generic/bitops/ffz.h include/asm-generic/bitops/fls64.h \
+ include/asm-generic/bitops/find.h include/asm-generic/bitops/sched.h \
+ include/asm-generic/bitops/hweight.h \
+ include/asm-generic/bitops/arch_hweight.h \
+ include/asm-generic/bitops/const_hweight.h \
+ include/asm-generic/bitops/lock.h \
+ include/asm-generic/bitops/non-atomic.h include/asm-generic/bitops/le.h \
+ arch/arm64/include/uapi/asm/byteorder.h \
+ include/linux/byteorder/little_endian.h \
+ include/uapi/linux/byteorder/little_endian.h include/linux/swab.h \
+ include/uapi/linux/swab.h arch/arm64/include/generated/asm/swab.h \
+ include/uapi/asm-generic/swab.h include/linux/byteorder/generic.h \
+ include/linux/log2.h include/linux/typecheck.h include/linux/printk.h \
+ include/linux/init.h include/linux/kern_levels.h include/linux/cache.h \
+ include/uapi/linux/kernel.h include/uapi/linux/sysinfo.h \
+ arch/arm64/include/asm/cache.h arch/arm64/include/asm/cachetype.h \
+ arch/arm64/include/asm/cputype.h arch/arm64/include/asm/sysreg.h \
+ arch/arm64/include/asm/opcodes.h \
+ arch/arm64/include/../../arm/include/asm/opcodes.h \
+ arch/arm64/include/asm/page.h include/uapi/linux/const.h \
+ include/linux/personality.h include/uapi/linux/personality.h \
+ arch/arm64/include/asm/pgtable-types.h \
+ include/asm-generic/pgtable-nopud.h arch/arm64/include/asm/memory.h \
+ arch/arm64/include/generated/asm/sizes.h include/asm-generic/sizes.h \
+ include/linux/sizes.h include/linux/mmdebug.h \
+ include/asm-generic/memory_model.h include/linux/pfn.h \
+ include/asm-generic/getorder.h include/linux/atomic.h \
+ arch/arm64/include/asm/atomic.h arch/arm64/include/asm/lse.h \
+ arch/arm64/include/asm/atomic_ll_sc.h arch/arm64/include/asm/cmpxchg.h \
+ include/asm-generic/atomic-long.h include/linux/highmem.h \
+ include/linux/fs.h include/linux/wait.h include/linux/list.h \
+ include/linux/poison.h include/linux/spinlock.h include/linux/preempt.h \
+ arch/arm64/include/generated/asm/preempt.h include/asm-generic/preempt.h \
+ include/linux/thread_info.h arch/arm64/include/asm/thread_info.h \
+ include/linux/irqflags.h arch/arm64/include/asm/irqflags.h \
+ arch/arm64/include/asm/ptrace.h arch/arm64/include/uapi/asm/ptrace.h \
+ arch/arm64/include/asm/hwcap.h arch/arm64/include/uapi/asm/hwcap.h \
+ include/asm-generic/ptrace.h include/linux/bottom_half.h \
+ include/linux/spinlock_types.h arch/arm64/include/asm/spinlock_types.h \
+ include/linux/lockdep.h include/linux/rwlock_types.h \
+ arch/arm64/include/asm/spinlock.h arch/arm64/include/asm/processor.h \
+ include/linux/string.h include/uapi/linux/string.h \
+ arch/arm64/include/asm/string.h arch/arm64/include/asm/alternative.h \
+ arch/arm64/include/asm/cpucaps.h arch/arm64/include/asm/insn.h \
+ arch/arm64/include/asm/fpsimd.h arch/arm64/include/asm/hw_breakpoint.h \
+ arch/arm64/include/asm/cpufeature.h include/linux/jump_label.h \
+ arch/arm64/include/asm/jump_label.h arch/arm64/include/asm/virt.h \
+ arch/arm64/include/asm/sections.h include/asm-generic/sections.h \
+ arch/arm64/include/asm/pgtable-hwdef.h include/linux/rwlock.h \
+ include/linux/spinlock_api_smp.h include/linux/rwlock_api_smp.h \
+ arch/arm64/include/generated/asm/current.h include/asm-generic/current.h \
+ include/uapi/linux/wait.h include/linux/kdev_t.h \
+ include/uapi/linux/kdev_t.h include/linux/dcache.h \
+ include/linux/rculist.h include/linux/rcupdate.h include/linux/threads.h \
+ include/linux/cpumask.h include/linux/bitmap.h include/linux/seqlock.h \
+ include/linux/completion.h include/linux/debugobjects.h \
+ include/linux/ktime.h include/linux/time.h include/linux/math64.h \
+ arch/arm64/include/generated/asm/div64.h include/asm-generic/div64.h \
+ include/linux/time64.h include/uapi/linux/time.h include/linux/jiffies.h \
+ include/linux/timex.h include/uapi/linux/timex.h \
+ include/uapi/linux/param.h arch/arm64/include/uapi/asm/param.h \
+ include/asm-generic/param.h include/uapi/asm-generic/param.h \
+ arch/arm64/include/asm/timex.h arch/arm64/include/asm/arch_timer.h \
+ include/clocksource/arm_arch_timer.h include/linux/timecounter.h \
+ include/asm-generic/timex.h include/generated/timeconst.h \
+ include/linux/timekeeping.h include/linux/errno.h \
+ include/uapi/linux/errno.h arch/arm64/include/generated/asm/errno.h \
+ include/uapi/asm-generic/errno.h include/uapi/asm-generic/errno-base.h \
+ include/linux/rcutree.h include/linux/rculist_bl.h \
+ include/linux/list_bl.h include/linux/bit_spinlock.h \
+ include/linux/lockref.h include/generated/bounds.h \
+ include/linux/stringhash.h include/linux/hash.h include/linux/path.h \
+ include/linux/stat.h arch/arm64/include/asm/stat.h \
+ arch/arm64/include/uapi/asm/stat.h include/uapi/asm-generic/stat.h \
+ arch/arm64/include/asm/compat.h include/linux/sched.h \
+ include/uapi/linux/sched.h include/linux/sched/prio.h \
+ include/linux/capability.h include/uapi/linux/capability.h \
+ include/linux/plist.h include/linux/rbtree.h include/linux/nodemask.h \
+ include/linux/numa.h include/linux/mm_types.h include/linux/auxvec.h \
+ include/uapi/linux/auxvec.h arch/arm64/include/uapi/asm/auxvec.h \
+ include/linux/rwsem.h include/linux/err.h include/linux/osq_lock.h \
+ arch/arm64/include/generated/asm/rwsem.h include/asm-generic/rwsem.h \
+ include/linux/uprobes.h include/linux/page-flags-layout.h \
+ arch/arm64/include/asm/sparsemem.h include/linux/workqueue.h \
+ include/linux/timer.h include/linux/sysctl.h include/linux/uidgid.h \
+ include/linux/highuid.h include/uapi/linux/sysctl.h \
+ arch/arm64/include/asm/mmu.h include/linux/cputime.h \
+ arch/arm64/include/generated/asm/cputime.h include/asm-generic/cputime.h \
+ include/asm-generic/cputime_jiffies.h include/linux/smp.h \
+ include/linux/llist.h arch/arm64/include/asm/smp.h include/linux/sem.h \
+ include/uapi/linux/sem.h include/linux/ipc.h include/uapi/linux/ipc.h \
+ arch/arm64/include/generated/asm/ipcbuf.h \
+ include/uapi/asm-generic/ipcbuf.h \
+ arch/arm64/include/generated/asm/sembuf.h \
+ include/uapi/asm-generic/sembuf.h include/linux/shm.h \
+ include/uapi/linux/shm.h arch/arm64/include/generated/asm/shmbuf.h \
+ include/uapi/asm-generic/shmbuf.h arch/arm64/include/asm/shmparam.h \
+ include/uapi/asm-generic/shmparam.h include/linux/signal.h \
+ include/uapi/linux/signal.h arch/arm64/include/uapi/asm/signal.h \
+ include/asm-generic/signal.h include/uapi/asm-generic/signal.h \
+ include/uapi/asm-generic/signal-defs.h \
+ arch/arm64/include/uapi/asm/sigcontext.h \
+ arch/arm64/include/uapi/asm/siginfo.h include/asm-generic/siginfo.h \
+ include/uapi/asm-generic/siginfo.h include/linux/pid.h \
+ include/linux/percpu.h arch/arm64/include/asm/percpu.h \
+ include/asm-generic/percpu.h include/linux/percpu-defs.h \
+ include/linux/topology.h include/linux/mmzone.h \
+ include/linux/pageblock-flags.h include/linux/memory_hotplug.h \
+ include/linux/notifier.h include/linux/mutex.h include/linux/srcu.h \
+ arch/arm64/include/asm/topology.h include/asm-generic/topology.h \
+ include/linux/seccomp.h include/uapi/linux/seccomp.h \
+ arch/arm64/include/asm/seccomp.h arch/arm64/include/asm/unistd.h \
+ arch/arm64/include/uapi/asm/unistd.h include/asm-generic/unistd.h \
+ include/uapi/asm-generic/unistd.h include/asm-generic/seccomp.h \
+ include/uapi/linux/unistd.h include/linux/rtmutex.h \
+ include/linux/resource.h include/uapi/linux/resource.h \
+ arch/arm64/include/generated/asm/resource.h \
+ include/asm-generic/resource.h include/uapi/asm-generic/resource.h \
+ include/linux/hrtimer.h include/linux/timerqueue.h include/linux/kcov.h \
+ include/uapi/linux/kcov.h include/linux/task_io_accounting.h \
+ include/linux/latencytop.h include/linux/cred.h include/linux/key.h \
+ include/linux/assoc_array.h include/linux/selinux.h include/linux/gfp.h \
+ include/uapi/linux/magic.h include/linux/cgroup-defs.h \
+ include/uapi/linux/limits.h include/linux/idr.h \
+ include/linux/percpu-refcount.h include/linux/percpu-rwsem.h \
+ include/linux/rcu_sync.h include/linux/bpf-cgroup.h \
+ include/uapi/linux/bpf.h include/uapi/linux/bpf_common.h \
+ include/linux/cgroup_subsys.h include/uapi/linux/stat.h \
+ include/linux/list_lru.h include/linux/shrinker.h \
+ include/linux/radix-tree.h include/linux/semaphore.h \
+ include/uapi/linux/fiemap.h include/linux/migrate_mode.h \
+ include/linux/blk_types.h include/linux/bvec.h \
+ include/linux/delayed_call.h include/uapi/linux/fs.h \
+ include/uapi/linux/ioctl.h arch/arm64/include/generated/asm/ioctl.h \
+ include/asm-generic/ioctl.h include/uapi/asm-generic/ioctl.h \
+ include/linux/quota.h include/linux/percpu_counter.h \
+ include/uapi/linux/dqblk_xfs.h include/linux/dqblk_v1.h \
+ include/linux/dqblk_v2.h include/linux/dqblk_qtree.h \
+ include/linux/projid.h include/uapi/linux/quota.h \
+ include/linux/nfs_fs_i.h include/linux/fcntl.h \
+ include/uapi/linux/fcntl.h arch/arm64/include/uapi/asm/fcntl.h \
+ include/uapi/asm-generic/fcntl.h include/linux/mm.h \
+ include/linux/debug_locks.h include/linux/range.h \
+ include/linux/page_ext.h include/linux/stacktrace.h \
+ include/linux/stackdepot.h include/linux/page_ref.h \
+ include/linux/page-flags.h include/linux/tracepoint-defs.h \
+ include/linux/static_key.h arch/arm64/include/asm/pgtable.h \
+ arch/arm64/include/asm/proc-fns.h arch/arm64/include/asm/pgtable-prot.h \
+ arch/arm64/include/asm/fixmap.h arch/arm64/include/asm/boot.h \
+ include/asm-generic/fixmap.h include/asm-generic/pgtable.h \
+ include/linux/huge_mm.h include/linux/vmstat.h \
+ include/linux/vm_event_item.h include/linux/uaccess.h \
+ arch/arm64/include/asm/uaccess.h arch/arm64/include/asm/kernel-pgtable.h \
+ include/linux/kasan-checks.h arch/arm64/include/asm/compiler.h \
+ include/linux/hardirq.h include/linux/ftrace_irq.h include/linux/vtime.h \
+ include/linux/context_tracking_state.h arch/arm64/include/asm/hardirq.h \
+ arch/arm64/include/asm/irq.h include/asm-generic/irq.h \
+ include/linux/irq_cpustat.h arch/arm64/include/asm/cacheflush.h \
+ arch/arm64/include/generated/asm/kmap_types.h \
+ include/asm-generic/kmap_types.h include/linux/slab.h \
+ include/linux/kmemleak.h include/linux/kasan.h include/linux/vmalloc.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h \
+ include/linux/platform_device.h include/linux/device.h \
+ include/linux/ioport.h include/linux/kobject.h include/linux/sysfs.h \
+ include/linux/kernfs.h include/linux/kobject_ns.h include/linux/kref.h \
+ include/linux/klist.h include/linux/pinctrl/devinfo.h \
+ include/linux/pinctrl/consumer.h include/linux/seq_file.h \
+ include/linux/pinctrl/pinctrl-state.h include/linux/pm.h \
+ include/linux/ratelimit.h arch/arm64/include/asm/device.h \
+ include/linux/pm_wakeup.h include/linux/mod_devicetable.h \
+ include/linux/uuid.h include/uapi/linux/uuid.h \
+ include/linux/miscdevice.h include/uapi/linux/major.h \
+ include/linux/module.h include/linux/kmod.h include/linux/elf.h \
+ arch/arm64/include/asm/elf.h arch/arm64/include/generated/asm/user.h \
+ include/asm-generic/user.h include/uapi/linux/elf.h \
+ include/uapi/linux/elf-em.h include/linux/moduleparam.h \
+ include/linux/extable.h include/linux/rbtree_latch.h \
+ arch/arm64/include/asm/module.h include/asm-generic/module.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ include/linux/dma-mapping.h include/linux/dma-debug.h \
+ include/linux/dma-direction.h include/linux/scatterlist.h \
+ arch/arm64/include/asm/io.h \
+ arch/arm64/include/generated/asm/early_ioremap.h \
+ include/asm-generic/early_ioremap.h include/xen/xen.h \
+ include/asm-generic/io.h include/asm-generic/pci_iomap.h \
+ include/linux/kmemcheck.h arch/arm64/include/asm/dma-mapping.h \
+ arch/arm64/include/asm/xen/hypervisor.h \
+ arch/arm64/include/../../arm/include/asm/xen/hypervisor.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h \
+ include/linux/mempool.h include/linux/file.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h \
+ include/linux/fence.h include/linux/debugfs.h include/linux/clk.h \
+ include/linux/regulator/consumer.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h \
+ include/linux/poll.h include/uapi/linux/poll.h \
+ arch/arm64/include/generated/asm/poll.h include/uapi/asm-generic/poll.h \
+ include/linux/of.h include/linux/property.h include/linux/fwnode.h \
+ include/linux/of_platform.h include/linux/of_device.h \
+ include/linux/cpu.h include/linux/node.h include/linux/cpuhotplug.h \
+ include/linux/interrupt.h include/linux/irqreturn.h \
+ include/linux/irqnr.h include/uapi/linux/irqnr.h include/linux/compat.h \
+ include/linux/socket.h arch/arm64/include/generated/asm/socket.h \
+ include/uapi/asm-generic/socket.h \
+ arch/arm64/include/generated/asm/sockios.h \
+ include/uapi/asm-generic/sockios.h include/uapi/linux/sockios.h \
+ include/linux/uio.h include/uapi/linux/uio.h include/uapi/linux/socket.h \
+ include/uapi/linux/if.h include/uapi/linux/libc-compat.h \
+ include/uapi/linux/hdlc/ioctl.h include/uapi/linux/aio_abi.h \
+ include/linux/mman.h include/uapi/linux/mman.h \
+ arch/arm64/include/generated/asm/mman.h include/uapi/asm-generic/mman.h \
+ include/uapi/asm-generic/mman-common.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h \
+ include/linux/syscalls.h include/trace/syscall.h \
+ include/linux/tracepoint.h include/linux/trace_events.h \
+ include/linux/ring_buffer.h include/linux/trace_seq.h \
+ include/linux/seq_buf.h include/linux/perf_event.h \
+ include/uapi/linux/perf_event.h arch/arm64/include/asm/perf_event.h \
+ arch/arm64/include/generated/asm/local64.h include/asm-generic/local64.h \
+ arch/arm64/include/generated/asm/local.h include/asm-generic/local.h \
+ include/linux/pid_namespace.h include/linux/nsproxy.h \
+ include/linux/ns_common.h include/linux/ftrace.h \
+ include/linux/trace_clock.h \
+ arch/arm64/include/generated/asm/trace_clock.h \
+ include/asm-generic/trace_clock.h include/linux/kallsyms.h \
+ include/linux/ptrace.h include/uapi/linux/ptrace.h \
+ arch/arm64/include/asm/ftrace.h include/linux/irq_work.h \
+ arch/arm64/include/asm/irq_work.h include/linux/jump_label_ratelimit.h \
+ include/linux/perf_regs.h arch/arm64/include/uapi/asm/perf_regs.h \
+ include/linux/cgroup.h include/uapi/linux/cgroupstats.h \
+ include/uapi/linux/taskstats.h include/linux/user_namespace.h \
+ include/linux/sync_file.h include/linux/fence-array.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h \
+ include/linux/delay.h arch/arm64/include/generated/asm/delay.h \
+ include/asm-generic/delay.h include/linux/pm_opp.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_debug.o.d b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_debug.o.d
new file mode 100644
index 0000000..aab4b85
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_debug.o.d
@@ -0,0 +1,268 @@
+mali_kbase_debug.o: \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c \
+ include/linux/kconfig.h include/generated/autoconf.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h \
+ include/generated/uapi/linux/version.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ include/linux/bug.h arch/arm64/include/asm/bug.h \
+ arch/arm64/include/asm/brk-imm.h include/asm-generic/bug.h \
+ include/linux/compiler.h include/linux/compiler-gcc.h \
+ include/uapi/linux/types.h arch/arm64/include/generated/asm/types.h \
+ include/uapi/asm-generic/types.h include/asm-generic/int-ll64.h \
+ include/uapi/asm-generic/int-ll64.h \
+ arch/arm64/include/uapi/asm/bitsperlong.h \
+ include/asm-generic/bitsperlong.h include/uapi/asm-generic/bitsperlong.h \
+ include/uapi/linux/posix_types.h include/linux/stddef.h \
+ include/uapi/linux/stddef.h arch/arm64/include/uapi/asm/posix_types.h \
+ include/uapi/asm-generic/posix_types.h include/linux/kernel.h \
+ /opt/gcc-linaro-aarch64-linux-gnu-4.9-2014.09_linux/lib/gcc/aarch64-linux-gnu/4.9.2/include/stdarg.h \
+ include/linux/linkage.h include/linux/stringify.h include/linux/export.h \
+ arch/arm64/include/asm/linkage.h include/linux/types.h \
+ include/linux/bitops.h arch/arm64/include/asm/bitops.h \
+ arch/arm64/include/asm/barrier.h include/asm-generic/barrier.h \
+ include/asm-generic/bitops/builtin-__ffs.h \
+ include/asm-generic/bitops/builtin-ffs.h \
+ include/asm-generic/bitops/builtin-__fls.h \
+ include/asm-generic/bitops/builtin-fls.h \
+ include/asm-generic/bitops/ffz.h include/asm-generic/bitops/fls64.h \
+ include/asm-generic/bitops/find.h include/asm-generic/bitops/sched.h \
+ include/asm-generic/bitops/hweight.h \
+ include/asm-generic/bitops/arch_hweight.h \
+ include/asm-generic/bitops/const_hweight.h \
+ include/asm-generic/bitops/lock.h \
+ include/asm-generic/bitops/non-atomic.h include/asm-generic/bitops/le.h \
+ arch/arm64/include/uapi/asm/byteorder.h \
+ include/linux/byteorder/little_endian.h \
+ include/uapi/linux/byteorder/little_endian.h include/linux/swab.h \
+ include/uapi/linux/swab.h arch/arm64/include/generated/asm/swab.h \
+ include/uapi/asm-generic/swab.h include/linux/byteorder/generic.h \
+ include/linux/log2.h include/linux/typecheck.h include/linux/printk.h \
+ include/linux/init.h include/linux/kern_levels.h include/linux/cache.h \
+ include/uapi/linux/kernel.h include/uapi/linux/sysinfo.h \
+ arch/arm64/include/asm/cache.h arch/arm64/include/asm/cachetype.h \
+ arch/arm64/include/asm/cputype.h arch/arm64/include/asm/sysreg.h \
+ arch/arm64/include/asm/opcodes.h \
+ arch/arm64/include/../../arm/include/asm/opcodes.h \
+ arch/arm64/include/asm/page.h include/uapi/linux/const.h \
+ include/linux/personality.h include/uapi/linux/personality.h \
+ arch/arm64/include/asm/pgtable-types.h \
+ include/asm-generic/pgtable-nopud.h arch/arm64/include/asm/memory.h \
+ arch/arm64/include/generated/asm/sizes.h include/asm-generic/sizes.h \
+ include/linux/sizes.h include/linux/mmdebug.h \
+ include/asm-generic/memory_model.h include/linux/pfn.h \
+ include/asm-generic/getorder.h include/linux/atomic.h \
+ arch/arm64/include/asm/atomic.h arch/arm64/include/asm/lse.h \
+ arch/arm64/include/asm/atomic_ll_sc.h arch/arm64/include/asm/cmpxchg.h \
+ include/asm-generic/atomic-long.h include/linux/highmem.h \
+ include/linux/fs.h include/linux/wait.h include/linux/list.h \
+ include/linux/poison.h include/linux/spinlock.h include/linux/preempt.h \
+ arch/arm64/include/generated/asm/preempt.h include/asm-generic/preempt.h \
+ include/linux/thread_info.h arch/arm64/include/asm/thread_info.h \
+ include/linux/irqflags.h arch/arm64/include/asm/irqflags.h \
+ arch/arm64/include/asm/ptrace.h arch/arm64/include/uapi/asm/ptrace.h \
+ arch/arm64/include/asm/hwcap.h arch/arm64/include/uapi/asm/hwcap.h \
+ include/asm-generic/ptrace.h include/linux/bottom_half.h \
+ include/linux/spinlock_types.h arch/arm64/include/asm/spinlock_types.h \
+ include/linux/lockdep.h include/linux/rwlock_types.h \
+ arch/arm64/include/asm/spinlock.h arch/arm64/include/asm/processor.h \
+ include/linux/string.h include/uapi/linux/string.h \
+ arch/arm64/include/asm/string.h arch/arm64/include/asm/alternative.h \
+ arch/arm64/include/asm/cpucaps.h arch/arm64/include/asm/insn.h \
+ arch/arm64/include/asm/fpsimd.h arch/arm64/include/asm/hw_breakpoint.h \
+ arch/arm64/include/asm/cpufeature.h include/linux/jump_label.h \
+ arch/arm64/include/asm/jump_label.h arch/arm64/include/asm/virt.h \
+ arch/arm64/include/asm/sections.h include/asm-generic/sections.h \
+ arch/arm64/include/asm/pgtable-hwdef.h include/linux/rwlock.h \
+ include/linux/spinlock_api_smp.h include/linux/rwlock_api_smp.h \
+ arch/arm64/include/generated/asm/current.h include/asm-generic/current.h \
+ include/uapi/linux/wait.h include/linux/kdev_t.h \
+ include/uapi/linux/kdev_t.h include/linux/dcache.h \
+ include/linux/rculist.h include/linux/rcupdate.h include/linux/threads.h \
+ include/linux/cpumask.h include/linux/bitmap.h include/linux/seqlock.h \
+ include/linux/completion.h include/linux/debugobjects.h \
+ include/linux/ktime.h include/linux/time.h include/linux/math64.h \
+ arch/arm64/include/generated/asm/div64.h include/asm-generic/div64.h \
+ include/linux/time64.h include/uapi/linux/time.h include/linux/jiffies.h \
+ include/linux/timex.h include/uapi/linux/timex.h \
+ include/uapi/linux/param.h arch/arm64/include/uapi/asm/param.h \
+ include/asm-generic/param.h include/uapi/asm-generic/param.h \
+ arch/arm64/include/asm/timex.h arch/arm64/include/asm/arch_timer.h \
+ include/clocksource/arm_arch_timer.h include/linux/timecounter.h \
+ include/asm-generic/timex.h include/generated/timeconst.h \
+ include/linux/timekeeping.h include/linux/errno.h \
+ include/uapi/linux/errno.h arch/arm64/include/generated/asm/errno.h \
+ include/uapi/asm-generic/errno.h include/uapi/asm-generic/errno-base.h \
+ include/linux/rcutree.h include/linux/rculist_bl.h \
+ include/linux/list_bl.h include/linux/bit_spinlock.h \
+ include/linux/lockref.h include/generated/bounds.h \
+ include/linux/stringhash.h include/linux/hash.h include/linux/path.h \
+ include/linux/stat.h arch/arm64/include/asm/stat.h \
+ arch/arm64/include/uapi/asm/stat.h include/uapi/asm-generic/stat.h \
+ arch/arm64/include/asm/compat.h include/linux/sched.h \
+ include/uapi/linux/sched.h include/linux/sched/prio.h \
+ include/linux/capability.h include/uapi/linux/capability.h \
+ include/linux/plist.h include/linux/rbtree.h include/linux/nodemask.h \
+ include/linux/numa.h include/linux/mm_types.h include/linux/auxvec.h \
+ include/uapi/linux/auxvec.h arch/arm64/include/uapi/asm/auxvec.h \
+ include/linux/rwsem.h include/linux/err.h include/linux/osq_lock.h \
+ arch/arm64/include/generated/asm/rwsem.h include/asm-generic/rwsem.h \
+ include/linux/uprobes.h include/linux/page-flags-layout.h \
+ arch/arm64/include/asm/sparsemem.h include/linux/workqueue.h \
+ include/linux/timer.h include/linux/sysctl.h include/linux/uidgid.h \
+ include/linux/highuid.h include/uapi/linux/sysctl.h \
+ arch/arm64/include/asm/mmu.h include/linux/cputime.h \
+ arch/arm64/include/generated/asm/cputime.h include/asm-generic/cputime.h \
+ include/asm-generic/cputime_jiffies.h include/linux/smp.h \
+ include/linux/llist.h arch/arm64/include/asm/smp.h include/linux/sem.h \
+ include/uapi/linux/sem.h include/linux/ipc.h include/uapi/linux/ipc.h \
+ arch/arm64/include/generated/asm/ipcbuf.h \
+ include/uapi/asm-generic/ipcbuf.h \
+ arch/arm64/include/generated/asm/sembuf.h \
+ include/uapi/asm-generic/sembuf.h include/linux/shm.h \
+ include/uapi/linux/shm.h arch/arm64/include/generated/asm/shmbuf.h \
+ include/uapi/asm-generic/shmbuf.h arch/arm64/include/asm/shmparam.h \
+ include/uapi/asm-generic/shmparam.h include/linux/signal.h \
+ include/uapi/linux/signal.h arch/arm64/include/uapi/asm/signal.h \
+ include/asm-generic/signal.h include/uapi/asm-generic/signal.h \
+ include/uapi/asm-generic/signal-defs.h \
+ arch/arm64/include/uapi/asm/sigcontext.h \
+ arch/arm64/include/uapi/asm/siginfo.h include/asm-generic/siginfo.h \
+ include/uapi/asm-generic/siginfo.h include/linux/pid.h \
+ include/linux/percpu.h arch/arm64/include/asm/percpu.h \
+ include/asm-generic/percpu.h include/linux/percpu-defs.h \
+ include/linux/topology.h include/linux/mmzone.h \
+ include/linux/pageblock-flags.h include/linux/memory_hotplug.h \
+ include/linux/notifier.h include/linux/mutex.h include/linux/srcu.h \
+ arch/arm64/include/asm/topology.h include/asm-generic/topology.h \
+ include/linux/seccomp.h include/uapi/linux/seccomp.h \
+ arch/arm64/include/asm/seccomp.h arch/arm64/include/asm/unistd.h \
+ arch/arm64/include/uapi/asm/unistd.h include/asm-generic/unistd.h \
+ include/uapi/asm-generic/unistd.h include/asm-generic/seccomp.h \
+ include/uapi/linux/unistd.h include/linux/rtmutex.h \
+ include/linux/resource.h include/uapi/linux/resource.h \
+ arch/arm64/include/generated/asm/resource.h \
+ include/asm-generic/resource.h include/uapi/asm-generic/resource.h \
+ include/linux/hrtimer.h include/linux/timerqueue.h include/linux/kcov.h \
+ include/uapi/linux/kcov.h include/linux/task_io_accounting.h \
+ include/linux/latencytop.h include/linux/cred.h include/linux/key.h \
+ include/linux/assoc_array.h include/linux/selinux.h include/linux/gfp.h \
+ include/uapi/linux/magic.h include/linux/cgroup-defs.h \
+ include/uapi/linux/limits.h include/linux/idr.h \
+ include/linux/percpu-refcount.h include/linux/percpu-rwsem.h \
+ include/linux/rcu_sync.h include/linux/bpf-cgroup.h \
+ include/uapi/linux/bpf.h include/uapi/linux/bpf_common.h \
+ include/linux/cgroup_subsys.h include/uapi/linux/stat.h \
+ include/linux/list_lru.h include/linux/shrinker.h \
+ include/linux/radix-tree.h include/linux/semaphore.h \
+ include/uapi/linux/fiemap.h include/linux/migrate_mode.h \
+ include/linux/blk_types.h include/linux/bvec.h \
+ include/linux/delayed_call.h include/uapi/linux/fs.h \
+ include/uapi/linux/ioctl.h arch/arm64/include/generated/asm/ioctl.h \
+ include/asm-generic/ioctl.h include/uapi/asm-generic/ioctl.h \
+ include/linux/quota.h include/linux/percpu_counter.h \
+ include/uapi/linux/dqblk_xfs.h include/linux/dqblk_v1.h \
+ include/linux/dqblk_v2.h include/linux/dqblk_qtree.h \
+ include/linux/projid.h include/uapi/linux/quota.h \
+ include/linux/nfs_fs_i.h include/linux/fcntl.h \
+ include/uapi/linux/fcntl.h arch/arm64/include/uapi/asm/fcntl.h \
+ include/uapi/asm-generic/fcntl.h include/linux/mm.h \
+ include/linux/debug_locks.h include/linux/range.h \
+ include/linux/page_ext.h include/linux/stacktrace.h \
+ include/linux/stackdepot.h include/linux/page_ref.h \
+ include/linux/page-flags.h include/linux/tracepoint-defs.h \
+ include/linux/static_key.h arch/arm64/include/asm/pgtable.h \
+ arch/arm64/include/asm/proc-fns.h arch/arm64/include/asm/pgtable-prot.h \
+ arch/arm64/include/asm/fixmap.h arch/arm64/include/asm/boot.h \
+ include/asm-generic/fixmap.h include/asm-generic/pgtable.h \
+ include/linux/huge_mm.h include/linux/vmstat.h \
+ include/linux/vm_event_item.h include/linux/uaccess.h \
+ arch/arm64/include/asm/uaccess.h arch/arm64/include/asm/kernel-pgtable.h \
+ include/linux/kasan-checks.h arch/arm64/include/asm/compiler.h \
+ include/linux/hardirq.h include/linux/ftrace_irq.h include/linux/vtime.h \
+ include/linux/context_tracking_state.h arch/arm64/include/asm/hardirq.h \
+ arch/arm64/include/asm/irq.h include/asm-generic/irq.h \
+ include/linux/irq_cpustat.h arch/arm64/include/asm/cacheflush.h \
+ arch/arm64/include/generated/asm/kmap_types.h \
+ include/asm-generic/kmap_types.h include/linux/slab.h \
+ include/linux/kmemleak.h include/linux/kasan.h include/linux/vmalloc.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h \
+ include/linux/platform_device.h include/linux/device.h \
+ include/linux/ioport.h include/linux/kobject.h include/linux/sysfs.h \
+ include/linux/kernfs.h include/linux/kobject_ns.h include/linux/kref.h \
+ include/linux/klist.h include/linux/pinctrl/devinfo.h \
+ include/linux/pinctrl/consumer.h include/linux/seq_file.h \
+ include/linux/pinctrl/pinctrl-state.h include/linux/pm.h \
+ include/linux/ratelimit.h arch/arm64/include/asm/device.h \
+ include/linux/pm_wakeup.h include/linux/mod_devicetable.h \
+ include/linux/uuid.h include/uapi/linux/uuid.h \
+ include/linux/miscdevice.h include/uapi/linux/major.h \
+ include/linux/module.h include/linux/kmod.h include/linux/elf.h \
+ arch/arm64/include/asm/elf.h arch/arm64/include/generated/asm/user.h \
+ include/asm-generic/user.h include/uapi/linux/elf.h \
+ include/uapi/linux/elf-em.h include/linux/moduleparam.h \
+ include/linux/extable.h include/linux/rbtree_latch.h \
+ arch/arm64/include/asm/module.h include/asm-generic/module.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ include/linux/dma-mapping.h include/linux/dma-debug.h \
+ include/linux/dma-direction.h include/linux/scatterlist.h \
+ arch/arm64/include/asm/io.h \
+ arch/arm64/include/generated/asm/early_ioremap.h \
+ include/asm-generic/early_ioremap.h include/xen/xen.h \
+ include/asm-generic/io.h include/asm-generic/pci_iomap.h \
+ include/linux/kmemcheck.h arch/arm64/include/asm/dma-mapping.h \
+ arch/arm64/include/asm/xen/hypervisor.h \
+ arch/arm64/include/../../arm/include/asm/xen/hypervisor.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h \
+ include/linux/mempool.h include/linux/file.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h \
+ include/linux/fence.h include/linux/debugfs.h include/linux/clk.h \
+ include/linux/regulator/consumer.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_device.o.d b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_device.o.d
new file mode 100644
index 0000000..c5f163c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_device.o.d
@@ -0,0 +1,277 @@
+mali_kbase_device.o: \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c \
+ include/linux/kconfig.h include/generated/autoconf.h \
+ include/linux/debugfs.h include/linux/fs.h include/linux/linkage.h \
+ include/linux/compiler.h include/linux/compiler-gcc.h \
+ include/uapi/linux/types.h arch/arm64/include/generated/asm/types.h \
+ include/uapi/asm-generic/types.h include/asm-generic/int-ll64.h \
+ include/uapi/asm-generic/int-ll64.h \
+ arch/arm64/include/uapi/asm/bitsperlong.h \
+ include/asm-generic/bitsperlong.h include/uapi/asm-generic/bitsperlong.h \
+ include/uapi/linux/posix_types.h include/linux/stddef.h \
+ include/uapi/linux/stddef.h arch/arm64/include/uapi/asm/posix_types.h \
+ include/uapi/asm-generic/posix_types.h include/linux/stringify.h \
+ include/linux/export.h arch/arm64/include/asm/linkage.h \
+ include/linux/wait.h include/linux/list.h include/linux/types.h \
+ include/linux/poison.h include/uapi/linux/const.h include/linux/kernel.h \
+ /opt/gcc-linaro-aarch64-linux-gnu-4.9-2014.09_linux/lib/gcc/aarch64-linux-gnu/4.9.2/include/stdarg.h \
+ include/linux/bitops.h arch/arm64/include/asm/bitops.h \
+ arch/arm64/include/asm/barrier.h include/asm-generic/barrier.h \
+ include/asm-generic/bitops/builtin-__ffs.h \
+ include/asm-generic/bitops/builtin-ffs.h \
+ include/asm-generic/bitops/builtin-__fls.h \
+ include/asm-generic/bitops/builtin-fls.h \
+ include/asm-generic/bitops/ffz.h include/asm-generic/bitops/fls64.h \
+ include/asm-generic/bitops/find.h include/asm-generic/bitops/sched.h \
+ include/asm-generic/bitops/hweight.h \
+ include/asm-generic/bitops/arch_hweight.h \
+ include/asm-generic/bitops/const_hweight.h \
+ include/asm-generic/bitops/lock.h \
+ include/asm-generic/bitops/non-atomic.h include/asm-generic/bitops/le.h \
+ arch/arm64/include/uapi/asm/byteorder.h \
+ include/linux/byteorder/little_endian.h \
+ include/uapi/linux/byteorder/little_endian.h include/linux/swab.h \
+ include/uapi/linux/swab.h arch/arm64/include/generated/asm/swab.h \
+ include/uapi/asm-generic/swab.h include/linux/byteorder/generic.h \
+ include/linux/log2.h include/linux/typecheck.h include/linux/printk.h \
+ include/linux/init.h include/linux/kern_levels.h include/linux/cache.h \
+ include/uapi/linux/kernel.h include/uapi/linux/sysinfo.h \
+ arch/arm64/include/asm/cache.h arch/arm64/include/asm/cachetype.h \
+ arch/arm64/include/asm/cputype.h arch/arm64/include/asm/sysreg.h \
+ arch/arm64/include/asm/opcodes.h \
+ arch/arm64/include/../../arm/include/asm/opcodes.h \
+ include/linux/spinlock.h include/linux/preempt.h \
+ arch/arm64/include/generated/asm/preempt.h include/asm-generic/preempt.h \
+ include/linux/thread_info.h include/linux/bug.h \
+ arch/arm64/include/asm/bug.h arch/arm64/include/asm/brk-imm.h \
+ include/asm-generic/bug.h arch/arm64/include/asm/thread_info.h \
+ include/linux/irqflags.h arch/arm64/include/asm/irqflags.h \
+ arch/arm64/include/asm/ptrace.h arch/arm64/include/uapi/asm/ptrace.h \
+ arch/arm64/include/asm/hwcap.h arch/arm64/include/uapi/asm/hwcap.h \
+ include/asm-generic/ptrace.h include/linux/bottom_half.h \
+ include/linux/spinlock_types.h arch/arm64/include/asm/spinlock_types.h \
+ include/linux/lockdep.h include/linux/rwlock_types.h \
+ arch/arm64/include/asm/spinlock.h arch/arm64/include/asm/lse.h \
+ arch/arm64/include/asm/processor.h include/linux/string.h \
+ include/uapi/linux/string.h arch/arm64/include/asm/string.h \
+ arch/arm64/include/asm/alternative.h arch/arm64/include/asm/cpucaps.h \
+ arch/arm64/include/asm/insn.h arch/arm64/include/asm/fpsimd.h \
+ arch/arm64/include/asm/hw_breakpoint.h \
+ arch/arm64/include/asm/cpufeature.h include/linux/jump_label.h \
+ arch/arm64/include/asm/jump_label.h arch/arm64/include/asm/virt.h \
+ arch/arm64/include/asm/sections.h include/asm-generic/sections.h \
+ arch/arm64/include/asm/pgtable-hwdef.h include/linux/rwlock.h \
+ include/linux/spinlock_api_smp.h include/linux/rwlock_api_smp.h \
+ include/linux/atomic.h arch/arm64/include/asm/atomic.h \
+ arch/arm64/include/asm/atomic_ll_sc.h arch/arm64/include/asm/cmpxchg.h \
+ include/asm-generic/atomic-long.h \
+ arch/arm64/include/generated/asm/current.h include/asm-generic/current.h \
+ include/uapi/linux/wait.h include/linux/kdev_t.h \
+ include/uapi/linux/kdev_t.h include/linux/dcache.h \
+ include/linux/rculist.h include/linux/rcupdate.h include/linux/threads.h \
+ include/linux/cpumask.h include/linux/bitmap.h include/linux/seqlock.h \
+ include/linux/completion.h include/linux/debugobjects.h \
+ include/linux/ktime.h include/linux/time.h include/linux/math64.h \
+ arch/arm64/include/generated/asm/div64.h include/asm-generic/div64.h \
+ include/linux/time64.h include/uapi/linux/time.h include/linux/jiffies.h \
+ include/linux/timex.h include/uapi/linux/timex.h \
+ include/uapi/linux/param.h arch/arm64/include/uapi/asm/param.h \
+ include/asm-generic/param.h include/uapi/asm-generic/param.h \
+ arch/arm64/include/asm/timex.h arch/arm64/include/asm/arch_timer.h \
+ include/clocksource/arm_arch_timer.h include/linux/timecounter.h \
+ include/asm-generic/timex.h include/generated/timeconst.h \
+ include/linux/timekeeping.h include/linux/errno.h \
+ include/uapi/linux/errno.h arch/arm64/include/generated/asm/errno.h \
+ include/uapi/asm-generic/errno.h include/uapi/asm-generic/errno-base.h \
+ include/linux/rcutree.h include/linux/rculist_bl.h \
+ include/linux/list_bl.h include/linux/bit_spinlock.h \
+ include/linux/lockref.h include/generated/bounds.h \
+ include/linux/stringhash.h include/linux/hash.h include/linux/path.h \
+ include/linux/stat.h arch/arm64/include/asm/stat.h \
+ arch/arm64/include/uapi/asm/stat.h include/uapi/asm-generic/stat.h \
+ arch/arm64/include/asm/compat.h include/linux/sched.h \
+ include/uapi/linux/sched.h include/linux/sched/prio.h \
+ include/linux/capability.h include/uapi/linux/capability.h \
+ include/linux/plist.h include/linux/rbtree.h include/linux/nodemask.h \
+ include/linux/numa.h include/linux/mm_types.h include/linux/auxvec.h \
+ include/uapi/linux/auxvec.h arch/arm64/include/uapi/asm/auxvec.h \
+ include/linux/rwsem.h include/linux/err.h include/linux/osq_lock.h \
+ arch/arm64/include/generated/asm/rwsem.h include/asm-generic/rwsem.h \
+ include/linux/uprobes.h include/linux/page-flags-layout.h \
+ arch/arm64/include/asm/sparsemem.h include/linux/workqueue.h \
+ include/linux/timer.h include/linux/sysctl.h include/linux/uidgid.h \
+ include/linux/highuid.h include/uapi/linux/sysctl.h \
+ arch/arm64/include/asm/page.h include/linux/personality.h \
+ include/uapi/linux/personality.h arch/arm64/include/asm/pgtable-types.h \
+ include/asm-generic/pgtable-nopud.h arch/arm64/include/asm/memory.h \
+ arch/arm64/include/generated/asm/sizes.h include/asm-generic/sizes.h \
+ include/linux/sizes.h include/linux/mmdebug.h \
+ include/asm-generic/memory_model.h include/linux/pfn.h \
+ include/asm-generic/getorder.h arch/arm64/include/asm/mmu.h \
+ include/linux/cputime.h arch/arm64/include/generated/asm/cputime.h \
+ include/asm-generic/cputime.h include/asm-generic/cputime_jiffies.h \
+ include/linux/smp.h include/linux/llist.h arch/arm64/include/asm/smp.h \
+ include/linux/sem.h include/uapi/linux/sem.h include/linux/ipc.h \
+ include/uapi/linux/ipc.h arch/arm64/include/generated/asm/ipcbuf.h \
+ include/uapi/asm-generic/ipcbuf.h \
+ arch/arm64/include/generated/asm/sembuf.h \
+ include/uapi/asm-generic/sembuf.h include/linux/shm.h \
+ include/uapi/linux/shm.h arch/arm64/include/generated/asm/shmbuf.h \
+ include/uapi/asm-generic/shmbuf.h arch/arm64/include/asm/shmparam.h \
+ include/uapi/asm-generic/shmparam.h include/linux/signal.h \
+ include/uapi/linux/signal.h arch/arm64/include/uapi/asm/signal.h \
+ include/asm-generic/signal.h include/uapi/asm-generic/signal.h \
+ include/uapi/asm-generic/signal-defs.h \
+ arch/arm64/include/uapi/asm/sigcontext.h \
+ arch/arm64/include/uapi/asm/siginfo.h include/asm-generic/siginfo.h \
+ include/uapi/asm-generic/siginfo.h include/linux/pid.h \
+ include/linux/percpu.h arch/arm64/include/asm/percpu.h \
+ include/asm-generic/percpu.h include/linux/percpu-defs.h \
+ include/linux/topology.h include/linux/mmzone.h \
+ include/linux/pageblock-flags.h include/linux/memory_hotplug.h \
+ include/linux/notifier.h include/linux/mutex.h include/linux/srcu.h \
+ arch/arm64/include/asm/topology.h include/asm-generic/topology.h \
+ include/linux/seccomp.h include/uapi/linux/seccomp.h \
+ arch/arm64/include/asm/seccomp.h arch/arm64/include/asm/unistd.h \
+ arch/arm64/include/uapi/asm/unistd.h include/asm-generic/unistd.h \
+ include/uapi/asm-generic/unistd.h include/asm-generic/seccomp.h \
+ include/uapi/linux/unistd.h include/linux/rtmutex.h \
+ include/linux/resource.h include/uapi/linux/resource.h \
+ arch/arm64/include/generated/asm/resource.h \
+ include/asm-generic/resource.h include/uapi/asm-generic/resource.h \
+ include/linux/hrtimer.h include/linux/timerqueue.h include/linux/kcov.h \
+ include/uapi/linux/kcov.h include/linux/task_io_accounting.h \
+ include/linux/latencytop.h include/linux/cred.h include/linux/key.h \
+ include/linux/assoc_array.h include/linux/selinux.h include/linux/gfp.h \
+ include/uapi/linux/magic.h include/linux/cgroup-defs.h \
+ include/uapi/linux/limits.h include/linux/idr.h \
+ include/linux/percpu-refcount.h include/linux/percpu-rwsem.h \
+ include/linux/rcu_sync.h include/linux/bpf-cgroup.h \
+ include/uapi/linux/bpf.h include/uapi/linux/bpf_common.h \
+ include/linux/cgroup_subsys.h include/uapi/linux/stat.h \
+ include/linux/list_lru.h include/linux/shrinker.h \
+ include/linux/radix-tree.h include/linux/semaphore.h \
+ include/uapi/linux/fiemap.h include/linux/migrate_mode.h \
+ include/linux/blk_types.h include/linux/bvec.h \
+ include/linux/delayed_call.h include/uapi/linux/fs.h \
+ include/uapi/linux/ioctl.h arch/arm64/include/generated/asm/ioctl.h \
+ include/asm-generic/ioctl.h include/uapi/asm-generic/ioctl.h \
+ include/linux/quota.h include/linux/percpu_counter.h \
+ include/uapi/linux/dqblk_xfs.h include/linux/dqblk_v1.h \
+ include/linux/dqblk_v2.h include/linux/dqblk_qtree.h \
+ include/linux/projid.h include/uapi/linux/quota.h \
+ include/linux/nfs_fs_i.h include/linux/fcntl.h \
+ include/uapi/linux/fcntl.h arch/arm64/include/uapi/asm/fcntl.h \
+ include/uapi/asm-generic/fcntl.h include/linux/seq_file.h \
+ include/linux/dma-mapping.h include/linux/device.h \
+ include/linux/ioport.h include/linux/kobject.h include/linux/sysfs.h \
+ include/linux/kernfs.h include/linux/kobject_ns.h include/linux/kref.h \
+ include/linux/klist.h include/linux/pinctrl/devinfo.h \
+ include/linux/pinctrl/consumer.h include/linux/pinctrl/pinctrl-state.h \
+ include/linux/pm.h include/linux/ratelimit.h \
+ arch/arm64/include/asm/device.h include/linux/pm_wakeup.h \
+ include/linux/dma-debug.h include/linux/dma-direction.h \
+ include/linux/scatterlist.h include/linux/mm.h \
+ include/linux/debug_locks.h include/linux/range.h \
+ include/linux/page_ext.h include/linux/stacktrace.h \
+ include/linux/stackdepot.h include/linux/page_ref.h \
+ include/linux/page-flags.h include/linux/tracepoint-defs.h \
+ include/linux/static_key.h arch/arm64/include/asm/pgtable.h \
+ arch/arm64/include/asm/proc-fns.h arch/arm64/include/asm/pgtable-prot.h \
+ arch/arm64/include/asm/fixmap.h arch/arm64/include/asm/boot.h \
+ include/asm-generic/fixmap.h include/asm-generic/pgtable.h \
+ include/linux/huge_mm.h include/linux/vmstat.h \
+ include/linux/vm_event_item.h arch/arm64/include/asm/io.h \
+ arch/arm64/include/generated/asm/early_ioremap.h \
+ include/asm-generic/early_ioremap.h include/xen/xen.h \
+ include/asm-generic/io.h include/asm-generic/pci_iomap.h \
+ include/linux/vmalloc.h include/linux/kmemcheck.h \
+ arch/arm64/include/asm/dma-mapping.h \
+ arch/arm64/include/asm/xen/hypervisor.h \
+ arch/arm64/include/../../arm/include/asm/xen/hypervisor.h \
+ include/linux/module.h include/linux/kmod.h include/linux/elf.h \
+ arch/arm64/include/asm/elf.h arch/arm64/include/generated/asm/user.h \
+ include/asm-generic/user.h include/uapi/linux/elf.h \
+ include/uapi/linux/elf-em.h include/linux/moduleparam.h \
+ include/linux/extable.h include/linux/rbtree_latch.h \
+ arch/arm64/include/asm/module.h include/asm-generic/module.h \
+ include/linux/of_platform.h include/linux/mod_devicetable.h \
+ include/linux/uuid.h include/uapi/linux/uuid.h include/linux/of_device.h \
+ include/linux/cpu.h include/linux/node.h include/linux/cpuhotplug.h \
+ include/linux/platform_device.h include/linux/of.h \
+ include/linux/property.h include/linux/fwnode.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h \
+ include/generated/uapi/linux/version.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ include/linux/highmem.h include/linux/uaccess.h \
+ arch/arm64/include/asm/uaccess.h arch/arm64/include/asm/kernel-pgtable.h \
+ include/linux/kasan-checks.h arch/arm64/include/asm/compiler.h \
+ include/linux/hardirq.h include/linux/ftrace_irq.h include/linux/vtime.h \
+ include/linux/context_tracking_state.h arch/arm64/include/asm/hardirq.h \
+ arch/arm64/include/asm/irq.h include/asm-generic/irq.h \
+ include/linux/irq_cpustat.h arch/arm64/include/asm/cacheflush.h \
+ arch/arm64/include/generated/asm/kmap_types.h \
+ include/asm-generic/kmap_types.h include/linux/slab.h \
+ include/linux/kmemleak.h include/linux/kasan.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h \
+ include/linux/miscdevice.h include/uapi/linux/major.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h \
+ include/linux/mempool.h include/linux/file.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h \
+ include/linux/fence.h include/linux/clk.h \
+ include/linux/regulator/consumer.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_gator_api.o.d b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_gator_api.o.d
new file mode 100644
index 0000000..6daa727
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_gator_api.o.d
@@ -0,0 +1,276 @@
+mali_kbase_gator_api.o: \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c \
+ include/linux/kconfig.h include/generated/autoconf.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h \
+ include/generated/uapi/linux/version.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ include/linux/bug.h arch/arm64/include/asm/bug.h \
+ arch/arm64/include/asm/brk-imm.h include/asm-generic/bug.h \
+ include/linux/compiler.h include/linux/compiler-gcc.h \
+ include/uapi/linux/types.h arch/arm64/include/generated/asm/types.h \
+ include/uapi/asm-generic/types.h include/asm-generic/int-ll64.h \
+ include/uapi/asm-generic/int-ll64.h \
+ arch/arm64/include/uapi/asm/bitsperlong.h \
+ include/asm-generic/bitsperlong.h include/uapi/asm-generic/bitsperlong.h \
+ include/uapi/linux/posix_types.h include/linux/stddef.h \
+ include/uapi/linux/stddef.h arch/arm64/include/uapi/asm/posix_types.h \
+ include/uapi/asm-generic/posix_types.h include/linux/kernel.h \
+ /opt/gcc-linaro-aarch64-linux-gnu-4.9-2014.09_linux/lib/gcc/aarch64-linux-gnu/4.9.2/include/stdarg.h \
+ include/linux/linkage.h include/linux/stringify.h include/linux/export.h \
+ arch/arm64/include/asm/linkage.h include/linux/types.h \
+ include/linux/bitops.h arch/arm64/include/asm/bitops.h \
+ arch/arm64/include/asm/barrier.h include/asm-generic/barrier.h \
+ include/asm-generic/bitops/builtin-__ffs.h \
+ include/asm-generic/bitops/builtin-ffs.h \
+ include/asm-generic/bitops/builtin-__fls.h \
+ include/asm-generic/bitops/builtin-fls.h \
+ include/asm-generic/bitops/ffz.h include/asm-generic/bitops/fls64.h \
+ include/asm-generic/bitops/find.h include/asm-generic/bitops/sched.h \
+ include/asm-generic/bitops/hweight.h \
+ include/asm-generic/bitops/arch_hweight.h \
+ include/asm-generic/bitops/const_hweight.h \
+ include/asm-generic/bitops/lock.h \
+ include/asm-generic/bitops/non-atomic.h include/asm-generic/bitops/le.h \
+ arch/arm64/include/uapi/asm/byteorder.h \
+ include/linux/byteorder/little_endian.h \
+ include/uapi/linux/byteorder/little_endian.h include/linux/swab.h \
+ include/uapi/linux/swab.h arch/arm64/include/generated/asm/swab.h \
+ include/uapi/asm-generic/swab.h include/linux/byteorder/generic.h \
+ include/linux/log2.h include/linux/typecheck.h include/linux/printk.h \
+ include/linux/init.h include/linux/kern_levels.h include/linux/cache.h \
+ include/uapi/linux/kernel.h include/uapi/linux/sysinfo.h \
+ arch/arm64/include/asm/cache.h arch/arm64/include/asm/cachetype.h \
+ arch/arm64/include/asm/cputype.h arch/arm64/include/asm/sysreg.h \
+ arch/arm64/include/asm/opcodes.h \
+ arch/arm64/include/../../arm/include/asm/opcodes.h \
+ arch/arm64/include/asm/page.h include/uapi/linux/const.h \
+ include/linux/personality.h include/uapi/linux/personality.h \
+ arch/arm64/include/asm/pgtable-types.h \
+ include/asm-generic/pgtable-nopud.h arch/arm64/include/asm/memory.h \
+ arch/arm64/include/generated/asm/sizes.h include/asm-generic/sizes.h \
+ include/linux/sizes.h include/linux/mmdebug.h \
+ include/asm-generic/memory_model.h include/linux/pfn.h \
+ include/asm-generic/getorder.h include/linux/atomic.h \
+ arch/arm64/include/asm/atomic.h arch/arm64/include/asm/lse.h \
+ arch/arm64/include/asm/atomic_ll_sc.h arch/arm64/include/asm/cmpxchg.h \
+ include/asm-generic/atomic-long.h include/linux/highmem.h \
+ include/linux/fs.h include/linux/wait.h include/linux/list.h \
+ include/linux/poison.h include/linux/spinlock.h include/linux/preempt.h \
+ arch/arm64/include/generated/asm/preempt.h include/asm-generic/preempt.h \
+ include/linux/thread_info.h arch/arm64/include/asm/thread_info.h \
+ include/linux/irqflags.h arch/arm64/include/asm/irqflags.h \
+ arch/arm64/include/asm/ptrace.h arch/arm64/include/uapi/asm/ptrace.h \
+ arch/arm64/include/asm/hwcap.h arch/arm64/include/uapi/asm/hwcap.h \
+ include/asm-generic/ptrace.h include/linux/bottom_half.h \
+ include/linux/spinlock_types.h arch/arm64/include/asm/spinlock_types.h \
+ include/linux/lockdep.h include/linux/rwlock_types.h \
+ arch/arm64/include/asm/spinlock.h arch/arm64/include/asm/processor.h \
+ include/linux/string.h include/uapi/linux/string.h \
+ arch/arm64/include/asm/string.h arch/arm64/include/asm/alternative.h \
+ arch/arm64/include/asm/cpucaps.h arch/arm64/include/asm/insn.h \
+ arch/arm64/include/asm/fpsimd.h arch/arm64/include/asm/hw_breakpoint.h \
+ arch/arm64/include/asm/cpufeature.h include/linux/jump_label.h \
+ arch/arm64/include/asm/jump_label.h arch/arm64/include/asm/virt.h \
+ arch/arm64/include/asm/sections.h include/asm-generic/sections.h \
+ arch/arm64/include/asm/pgtable-hwdef.h include/linux/rwlock.h \
+ include/linux/spinlock_api_smp.h include/linux/rwlock_api_smp.h \
+ arch/arm64/include/generated/asm/current.h include/asm-generic/current.h \
+ include/uapi/linux/wait.h include/linux/kdev_t.h \
+ include/uapi/linux/kdev_t.h include/linux/dcache.h \
+ include/linux/rculist.h include/linux/rcupdate.h include/linux/threads.h \
+ include/linux/cpumask.h include/linux/bitmap.h include/linux/seqlock.h \
+ include/linux/completion.h include/linux/debugobjects.h \
+ include/linux/ktime.h include/linux/time.h include/linux/math64.h \
+ arch/arm64/include/generated/asm/div64.h include/asm-generic/div64.h \
+ include/linux/time64.h include/uapi/linux/time.h include/linux/jiffies.h \
+ include/linux/timex.h include/uapi/linux/timex.h \
+ include/uapi/linux/param.h arch/arm64/include/uapi/asm/param.h \
+ include/asm-generic/param.h include/uapi/asm-generic/param.h \
+ arch/arm64/include/asm/timex.h arch/arm64/include/asm/arch_timer.h \
+ include/clocksource/arm_arch_timer.h include/linux/timecounter.h \
+ include/asm-generic/timex.h include/generated/timeconst.h \
+ include/linux/timekeeping.h include/linux/errno.h \
+ include/uapi/linux/errno.h arch/arm64/include/generated/asm/errno.h \
+ include/uapi/asm-generic/errno.h include/uapi/asm-generic/errno-base.h \
+ include/linux/rcutree.h include/linux/rculist_bl.h \
+ include/linux/list_bl.h include/linux/bit_spinlock.h \
+ include/linux/lockref.h include/generated/bounds.h \
+ include/linux/stringhash.h include/linux/hash.h include/linux/path.h \
+ include/linux/stat.h arch/arm64/include/asm/stat.h \
+ arch/arm64/include/uapi/asm/stat.h include/uapi/asm-generic/stat.h \
+ arch/arm64/include/asm/compat.h include/linux/sched.h \
+ include/uapi/linux/sched.h include/linux/sched/prio.h \
+ include/linux/capability.h include/uapi/linux/capability.h \
+ include/linux/plist.h include/linux/rbtree.h include/linux/nodemask.h \
+ include/linux/numa.h include/linux/mm_types.h include/linux/auxvec.h \
+ include/uapi/linux/auxvec.h arch/arm64/include/uapi/asm/auxvec.h \
+ include/linux/rwsem.h include/linux/err.h include/linux/osq_lock.h \
+ arch/arm64/include/generated/asm/rwsem.h include/asm-generic/rwsem.h \
+ include/linux/uprobes.h include/linux/page-flags-layout.h \
+ arch/arm64/include/asm/sparsemem.h include/linux/workqueue.h \
+ include/linux/timer.h include/linux/sysctl.h include/linux/uidgid.h \
+ include/linux/highuid.h include/uapi/linux/sysctl.h \
+ arch/arm64/include/asm/mmu.h include/linux/cputime.h \
+ arch/arm64/include/generated/asm/cputime.h include/asm-generic/cputime.h \
+ include/asm-generic/cputime_jiffies.h include/linux/smp.h \
+ include/linux/llist.h arch/arm64/include/asm/smp.h include/linux/sem.h \
+ include/uapi/linux/sem.h include/linux/ipc.h include/uapi/linux/ipc.h \
+ arch/arm64/include/generated/asm/ipcbuf.h \
+ include/uapi/asm-generic/ipcbuf.h \
+ arch/arm64/include/generated/asm/sembuf.h \
+ include/uapi/asm-generic/sembuf.h include/linux/shm.h \
+ include/uapi/linux/shm.h arch/arm64/include/generated/asm/shmbuf.h \
+ include/uapi/asm-generic/shmbuf.h arch/arm64/include/asm/shmparam.h \
+ include/uapi/asm-generic/shmparam.h include/linux/signal.h \
+ include/uapi/linux/signal.h arch/arm64/include/uapi/asm/signal.h \
+ include/asm-generic/signal.h include/uapi/asm-generic/signal.h \
+ include/uapi/asm-generic/signal-defs.h \
+ arch/arm64/include/uapi/asm/sigcontext.h \
+ arch/arm64/include/uapi/asm/siginfo.h include/asm-generic/siginfo.h \
+ include/uapi/asm-generic/siginfo.h include/linux/pid.h \
+ include/linux/percpu.h arch/arm64/include/asm/percpu.h \
+ include/asm-generic/percpu.h include/linux/percpu-defs.h \
+ include/linux/topology.h include/linux/mmzone.h \
+ include/linux/pageblock-flags.h include/linux/memory_hotplug.h \
+ include/linux/notifier.h include/linux/mutex.h include/linux/srcu.h \
+ arch/arm64/include/asm/topology.h include/asm-generic/topology.h \
+ include/linux/seccomp.h include/uapi/linux/seccomp.h \
+ arch/arm64/include/asm/seccomp.h arch/arm64/include/asm/unistd.h \
+ arch/arm64/include/uapi/asm/unistd.h include/asm-generic/unistd.h \
+ include/uapi/asm-generic/unistd.h include/asm-generic/seccomp.h \
+ include/uapi/linux/unistd.h include/linux/rtmutex.h \
+ include/linux/resource.h include/uapi/linux/resource.h \
+ arch/arm64/include/generated/asm/resource.h \
+ include/asm-generic/resource.h include/uapi/asm-generic/resource.h \
+ include/linux/hrtimer.h include/linux/timerqueue.h include/linux/kcov.h \
+ include/uapi/linux/kcov.h include/linux/task_io_accounting.h \
+ include/linux/latencytop.h include/linux/cred.h include/linux/key.h \
+ include/linux/assoc_array.h include/linux/selinux.h include/linux/gfp.h \
+ include/uapi/linux/magic.h include/linux/cgroup-defs.h \
+ include/uapi/linux/limits.h include/linux/idr.h \
+ include/linux/percpu-refcount.h include/linux/percpu-rwsem.h \
+ include/linux/rcu_sync.h include/linux/bpf-cgroup.h \
+ include/uapi/linux/bpf.h include/uapi/linux/bpf_common.h \
+ include/linux/cgroup_subsys.h include/uapi/linux/stat.h \
+ include/linux/list_lru.h include/linux/shrinker.h \
+ include/linux/radix-tree.h include/linux/semaphore.h \
+ include/uapi/linux/fiemap.h include/linux/migrate_mode.h \
+ include/linux/blk_types.h include/linux/bvec.h \
+ include/linux/delayed_call.h include/uapi/linux/fs.h \
+ include/uapi/linux/ioctl.h arch/arm64/include/generated/asm/ioctl.h \
+ include/asm-generic/ioctl.h include/uapi/asm-generic/ioctl.h \
+ include/linux/quota.h include/linux/percpu_counter.h \
+ include/uapi/linux/dqblk_xfs.h include/linux/dqblk_v1.h \
+ include/linux/dqblk_v2.h include/linux/dqblk_qtree.h \
+ include/linux/projid.h include/uapi/linux/quota.h \
+ include/linux/nfs_fs_i.h include/linux/fcntl.h \
+ include/uapi/linux/fcntl.h arch/arm64/include/uapi/asm/fcntl.h \
+ include/uapi/asm-generic/fcntl.h include/linux/mm.h \
+ include/linux/debug_locks.h include/linux/range.h \
+ include/linux/page_ext.h include/linux/stacktrace.h \
+ include/linux/stackdepot.h include/linux/page_ref.h \
+ include/linux/page-flags.h include/linux/tracepoint-defs.h \
+ include/linux/static_key.h arch/arm64/include/asm/pgtable.h \
+ arch/arm64/include/asm/proc-fns.h arch/arm64/include/asm/pgtable-prot.h \
+ arch/arm64/include/asm/fixmap.h arch/arm64/include/asm/boot.h \
+ include/asm-generic/fixmap.h include/asm-generic/pgtable.h \
+ include/linux/huge_mm.h include/linux/vmstat.h \
+ include/linux/vm_event_item.h include/linux/uaccess.h \
+ arch/arm64/include/asm/uaccess.h arch/arm64/include/asm/kernel-pgtable.h \
+ include/linux/kasan-checks.h arch/arm64/include/asm/compiler.h \
+ include/linux/hardirq.h include/linux/ftrace_irq.h include/linux/vtime.h \
+ include/linux/context_tracking_state.h arch/arm64/include/asm/hardirq.h \
+ arch/arm64/include/asm/irq.h include/asm-generic/irq.h \
+ include/linux/irq_cpustat.h arch/arm64/include/asm/cacheflush.h \
+ arch/arm64/include/generated/asm/kmap_types.h \
+ include/asm-generic/kmap_types.h include/linux/slab.h \
+ include/linux/kmemleak.h include/linux/kasan.h include/linux/vmalloc.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h \
+ include/linux/platform_device.h include/linux/device.h \
+ include/linux/ioport.h include/linux/kobject.h include/linux/sysfs.h \
+ include/linux/kernfs.h include/linux/kobject_ns.h include/linux/kref.h \
+ include/linux/klist.h include/linux/pinctrl/devinfo.h \
+ include/linux/pinctrl/consumer.h include/linux/seq_file.h \
+ include/linux/pinctrl/pinctrl-state.h include/linux/pm.h \
+ include/linux/ratelimit.h arch/arm64/include/asm/device.h \
+ include/linux/pm_wakeup.h include/linux/mod_devicetable.h \
+ include/linux/uuid.h include/uapi/linux/uuid.h \
+ include/linux/miscdevice.h include/uapi/linux/major.h \
+ include/linux/module.h include/linux/kmod.h include/linux/elf.h \
+ arch/arm64/include/asm/elf.h arch/arm64/include/generated/asm/user.h \
+ include/asm-generic/user.h include/uapi/linux/elf.h \
+ include/uapi/linux/elf-em.h include/linux/moduleparam.h \
+ include/linux/extable.h include/linux/rbtree_latch.h \
+ arch/arm64/include/asm/module.h include/asm-generic/module.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ include/linux/dma-mapping.h include/linux/dma-debug.h \
+ include/linux/dma-direction.h include/linux/scatterlist.h \
+ arch/arm64/include/asm/io.h \
+ arch/arm64/include/generated/asm/early_ioremap.h \
+ include/asm-generic/early_ioremap.h include/xen/xen.h \
+ include/asm-generic/io.h include/asm-generic/pci_iomap.h \
+ include/linux/kmemcheck.h arch/arm64/include/asm/dma-mapping.h \
+ arch/arm64/include/asm/xen/hypervisor.h \
+ arch/arm64/include/../../arm/include/asm/xen/hypervisor.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h \
+ include/linux/mempool.h include/linux/file.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h \
+ include/linux/fence.h include/linux/debugfs.h include/linux/clk.h \
+ include/linux/regulator/consumer.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_mem_linux.o.d b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_mem_linux.o.d
new file mode 100644
index 0000000..e11d8c8
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.mali_kbase_mem_linux.o.d
@@ -0,0 +1,284 @@
+mali_kbase_mem_linux.o: \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c \
+ include/linux/kconfig.h include/generated/autoconf.h \
+ include/linux/compat.h include/linux/types.h include/uapi/linux/types.h \
+ arch/arm64/include/generated/asm/types.h \
+ include/uapi/asm-generic/types.h include/asm-generic/int-ll64.h \
+ include/uapi/asm-generic/int-ll64.h \
+ arch/arm64/include/uapi/asm/bitsperlong.h \
+ include/asm-generic/bitsperlong.h include/uapi/asm-generic/bitsperlong.h \
+ include/uapi/linux/posix_types.h include/linux/stddef.h \
+ include/uapi/linux/stddef.h include/linux/compiler.h \
+ include/linux/compiler-gcc.h arch/arm64/include/uapi/asm/posix_types.h \
+ include/uapi/asm-generic/posix_types.h include/linux/stat.h \
+ arch/arm64/include/asm/stat.h arch/arm64/include/uapi/asm/stat.h \
+ include/uapi/asm-generic/stat.h arch/arm64/include/asm/compat.h \
+ include/linux/sched.h include/uapi/linux/sched.h \
+ include/linux/sched/prio.h arch/arm64/include/uapi/asm/param.h \
+ include/asm-generic/param.h include/uapi/asm-generic/param.h \
+ include/linux/capability.h include/uapi/linux/capability.h \
+ include/linux/threads.h include/linux/kernel.h \
+ /opt/gcc-linaro-aarch64-linux-gnu-4.9-2014.09_linux/lib/gcc/aarch64-linux-gnu/4.9.2/include/stdarg.h \
+ include/linux/linkage.h include/linux/stringify.h include/linux/export.h \
+ arch/arm64/include/asm/linkage.h include/linux/bitops.h \
+ arch/arm64/include/asm/bitops.h arch/arm64/include/asm/barrier.h \
+ include/asm-generic/barrier.h include/asm-generic/bitops/builtin-__ffs.h \
+ include/asm-generic/bitops/builtin-ffs.h \
+ include/asm-generic/bitops/builtin-__fls.h \
+ include/asm-generic/bitops/builtin-fls.h \
+ include/asm-generic/bitops/ffz.h include/asm-generic/bitops/fls64.h \
+ include/asm-generic/bitops/find.h include/asm-generic/bitops/sched.h \
+ include/asm-generic/bitops/hweight.h \
+ include/asm-generic/bitops/arch_hweight.h \
+ include/asm-generic/bitops/const_hweight.h \
+ include/asm-generic/bitops/lock.h \
+ include/asm-generic/bitops/non-atomic.h include/asm-generic/bitops/le.h \
+ arch/arm64/include/uapi/asm/byteorder.h \
+ include/linux/byteorder/little_endian.h \
+ include/uapi/linux/byteorder/little_endian.h include/linux/swab.h \
+ include/uapi/linux/swab.h arch/arm64/include/generated/asm/swab.h \
+ include/uapi/asm-generic/swab.h include/linux/byteorder/generic.h \
+ include/linux/log2.h include/linux/typecheck.h include/linux/printk.h \
+ include/linux/init.h include/linux/kern_levels.h include/linux/cache.h \
+ include/uapi/linux/kernel.h include/uapi/linux/sysinfo.h \
+ arch/arm64/include/asm/cache.h arch/arm64/include/asm/cachetype.h \
+ arch/arm64/include/asm/cputype.h arch/arm64/include/asm/sysreg.h \
+ arch/arm64/include/asm/opcodes.h \
+ arch/arm64/include/../../arm/include/asm/opcodes.h include/linux/timex.h \
+ include/uapi/linux/timex.h include/linux/time.h include/linux/seqlock.h \
+ include/linux/spinlock.h include/linux/preempt.h include/linux/list.h \
+ include/linux/poison.h include/uapi/linux/const.h \
+ arch/arm64/include/generated/asm/preempt.h include/asm-generic/preempt.h \
+ include/linux/thread_info.h include/linux/bug.h \
+ arch/arm64/include/asm/bug.h arch/arm64/include/asm/brk-imm.h \
+ include/asm-generic/bug.h arch/arm64/include/asm/thread_info.h \
+ include/linux/irqflags.h arch/arm64/include/asm/irqflags.h \
+ arch/arm64/include/asm/ptrace.h arch/arm64/include/uapi/asm/ptrace.h \
+ arch/arm64/include/asm/hwcap.h arch/arm64/include/uapi/asm/hwcap.h \
+ include/asm-generic/ptrace.h include/linux/bottom_half.h \
+ include/linux/spinlock_types.h arch/arm64/include/asm/spinlock_types.h \
+ include/linux/lockdep.h include/linux/rwlock_types.h \
+ arch/arm64/include/asm/spinlock.h arch/arm64/include/asm/lse.h \
+ arch/arm64/include/asm/processor.h include/linux/string.h \
+ include/uapi/linux/string.h arch/arm64/include/asm/string.h \
+ arch/arm64/include/asm/alternative.h arch/arm64/include/asm/cpucaps.h \
+ arch/arm64/include/asm/insn.h arch/arm64/include/asm/fpsimd.h \
+ arch/arm64/include/asm/hw_breakpoint.h \
+ arch/arm64/include/asm/cpufeature.h include/linux/jump_label.h \
+ arch/arm64/include/asm/jump_label.h arch/arm64/include/asm/virt.h \
+ arch/arm64/include/asm/sections.h include/asm-generic/sections.h \
+ arch/arm64/include/asm/pgtable-hwdef.h include/linux/rwlock.h \
+ include/linux/spinlock_api_smp.h include/linux/rwlock_api_smp.h \
+ include/linux/atomic.h arch/arm64/include/asm/atomic.h \
+ arch/arm64/include/asm/atomic_ll_sc.h arch/arm64/include/asm/cmpxchg.h \
+ include/asm-generic/atomic-long.h include/linux/math64.h \
+ arch/arm64/include/generated/asm/div64.h include/asm-generic/div64.h \
+ include/linux/time64.h include/uapi/linux/time.h \
+ include/uapi/linux/param.h arch/arm64/include/asm/timex.h \
+ arch/arm64/include/asm/arch_timer.h include/clocksource/arm_arch_timer.h \
+ include/linux/timecounter.h include/asm-generic/timex.h \
+ include/linux/jiffies.h include/generated/timeconst.h \
+ include/linux/plist.h include/linux/rbtree.h include/linux/rcupdate.h \
+ include/linux/cpumask.h include/linux/bitmap.h \
+ include/linux/completion.h include/linux/wait.h \
+ arch/arm64/include/generated/asm/current.h include/asm-generic/current.h \
+ include/uapi/linux/wait.h include/linux/debugobjects.h \
+ include/linux/ktime.h include/linux/timekeeping.h include/linux/errno.h \
+ include/uapi/linux/errno.h arch/arm64/include/generated/asm/errno.h \
+ include/uapi/asm-generic/errno.h include/uapi/asm-generic/errno-base.h \
+ include/linux/rcutree.h include/linux/nodemask.h include/linux/numa.h \
+ include/linux/mm_types.h include/linux/auxvec.h \
+ include/uapi/linux/auxvec.h arch/arm64/include/uapi/asm/auxvec.h \
+ include/linux/rwsem.h include/linux/err.h include/linux/osq_lock.h \
+ arch/arm64/include/generated/asm/rwsem.h include/asm-generic/rwsem.h \
+ include/linux/uprobes.h include/linux/page-flags-layout.h \
+ include/generated/bounds.h arch/arm64/include/asm/sparsemem.h \
+ include/linux/workqueue.h include/linux/timer.h include/linux/sysctl.h \
+ include/linux/uidgid.h include/linux/highuid.h \
+ include/uapi/linux/sysctl.h arch/arm64/include/asm/page.h \
+ include/linux/personality.h include/uapi/linux/personality.h \
+ arch/arm64/include/asm/pgtable-types.h \
+ include/asm-generic/pgtable-nopud.h arch/arm64/include/asm/memory.h \
+ arch/arm64/include/generated/asm/sizes.h include/asm-generic/sizes.h \
+ include/linux/sizes.h include/linux/mmdebug.h \
+ include/asm-generic/memory_model.h include/linux/pfn.h \
+ include/asm-generic/getorder.h arch/arm64/include/asm/mmu.h \
+ include/linux/cputime.h arch/arm64/include/generated/asm/cputime.h \
+ include/asm-generic/cputime.h include/asm-generic/cputime_jiffies.h \
+ include/linux/smp.h include/linux/llist.h arch/arm64/include/asm/smp.h \
+ include/linux/sem.h include/uapi/linux/sem.h include/linux/ipc.h \
+ include/uapi/linux/ipc.h arch/arm64/include/generated/asm/ipcbuf.h \
+ include/uapi/asm-generic/ipcbuf.h \
+ arch/arm64/include/generated/asm/sembuf.h \
+ include/uapi/asm-generic/sembuf.h include/linux/shm.h \
+ include/uapi/linux/shm.h arch/arm64/include/generated/asm/shmbuf.h \
+ include/uapi/asm-generic/shmbuf.h arch/arm64/include/asm/shmparam.h \
+ include/uapi/asm-generic/shmparam.h include/linux/signal.h \
+ include/uapi/linux/signal.h arch/arm64/include/uapi/asm/signal.h \
+ include/asm-generic/signal.h include/uapi/asm-generic/signal.h \
+ include/uapi/asm-generic/signal-defs.h \
+ arch/arm64/include/uapi/asm/sigcontext.h \
+ arch/arm64/include/uapi/asm/siginfo.h include/asm-generic/siginfo.h \
+ include/uapi/asm-generic/siginfo.h include/linux/pid.h \
+ include/linux/percpu.h arch/arm64/include/asm/percpu.h \
+ include/asm-generic/percpu.h include/linux/percpu-defs.h \
+ include/linux/topology.h include/linux/mmzone.h \
+ include/linux/pageblock-flags.h include/linux/memory_hotplug.h \
+ include/linux/notifier.h include/linux/mutex.h include/linux/srcu.h \
+ arch/arm64/include/asm/topology.h include/asm-generic/topology.h \
+ include/linux/seccomp.h include/uapi/linux/seccomp.h \
+ arch/arm64/include/asm/seccomp.h arch/arm64/include/asm/unistd.h \
+ arch/arm64/include/uapi/asm/unistd.h include/asm-generic/unistd.h \
+ include/uapi/asm-generic/unistd.h include/asm-generic/seccomp.h \
+ include/uapi/linux/unistd.h include/linux/rculist.h \
+ include/linux/rtmutex.h include/linux/resource.h \
+ include/uapi/linux/resource.h \
+ arch/arm64/include/generated/asm/resource.h \
+ include/asm-generic/resource.h include/uapi/asm-generic/resource.h \
+ include/linux/hrtimer.h include/linux/timerqueue.h include/linux/kcov.h \
+ include/uapi/linux/kcov.h include/linux/task_io_accounting.h \
+ include/linux/latencytop.h include/linux/cred.h include/linux/key.h \
+ include/linux/assoc_array.h include/linux/selinux.h include/linux/gfp.h \
+ include/uapi/linux/magic.h include/linux/cgroup-defs.h \
+ include/uapi/linux/limits.h include/linux/idr.h \
+ include/linux/percpu-refcount.h include/linux/percpu-rwsem.h \
+ include/linux/rcu_sync.h include/linux/bpf-cgroup.h \
+ include/uapi/linux/bpf.h include/uapi/linux/bpf_common.h \
+ include/linux/cgroup_subsys.h include/uapi/linux/stat.h \
+ include/linux/socket.h arch/arm64/include/generated/asm/socket.h \
+ include/uapi/asm-generic/socket.h \
+ arch/arm64/include/generated/asm/sockios.h \
+ include/uapi/asm-generic/sockios.h include/uapi/linux/sockios.h \
+ include/linux/uio.h include/uapi/linux/uio.h include/uapi/linux/socket.h \
+ include/uapi/linux/if.h include/uapi/linux/libc-compat.h \
+ include/uapi/linux/hdlc/ioctl.h include/linux/fs.h \
+ include/linux/kdev_t.h include/uapi/linux/kdev_t.h \
+ include/linux/dcache.h include/linux/rculist_bl.h \
+ include/linux/list_bl.h include/linux/bit_spinlock.h \
+ include/linux/lockref.h include/linux/stringhash.h include/linux/hash.h \
+ include/linux/path.h include/linux/list_lru.h include/linux/shrinker.h \
+ include/linux/radix-tree.h include/linux/semaphore.h \
+ include/uapi/linux/fiemap.h include/linux/migrate_mode.h \
+ include/linux/blk_types.h include/linux/bvec.h \
+ include/linux/delayed_call.h include/uapi/linux/fs.h \
+ include/uapi/linux/ioctl.h arch/arm64/include/generated/asm/ioctl.h \
+ include/asm-generic/ioctl.h include/uapi/asm-generic/ioctl.h \
+ include/linux/quota.h include/linux/percpu_counter.h \
+ include/uapi/linux/dqblk_xfs.h include/linux/dqblk_v1.h \
+ include/linux/dqblk_v2.h include/linux/dqblk_qtree.h \
+ include/linux/projid.h include/uapi/linux/quota.h \
+ include/linux/nfs_fs_i.h include/linux/fcntl.h \
+ include/uapi/linux/fcntl.h arch/arm64/include/uapi/asm/fcntl.h \
+ include/uapi/asm-generic/fcntl.h include/uapi/linux/aio_abi.h \
+ include/linux/mm.h include/linux/debug_locks.h include/linux/range.h \
+ include/linux/page_ext.h include/linux/stacktrace.h \
+ include/linux/stackdepot.h include/linux/page_ref.h \
+ include/linux/page-flags.h include/linux/tracepoint-defs.h \
+ include/linux/static_key.h arch/arm64/include/asm/pgtable.h \
+ arch/arm64/include/asm/proc-fns.h arch/arm64/include/asm/pgtable-prot.h \
+ arch/arm64/include/asm/fixmap.h arch/arm64/include/asm/boot.h \
+ include/asm-generic/fixmap.h include/asm-generic/pgtable.h \
+ include/linux/huge_mm.h include/linux/vmstat.h \
+ include/linux/vm_event_item.h include/linux/mman.h \
+ include/uapi/linux/mman.h arch/arm64/include/generated/asm/mman.h \
+ include/uapi/asm-generic/mman.h include/uapi/asm-generic/mman-common.h \
+ include/generated/uapi/linux/version.h include/linux/dma-mapping.h \
+ include/linux/device.h include/linux/ioport.h include/linux/kobject.h \
+ include/linux/sysfs.h include/linux/kernfs.h include/linux/kobject_ns.h \
+ include/linux/kref.h include/linux/klist.h \
+ include/linux/pinctrl/devinfo.h include/linux/pinctrl/consumer.h \
+ include/linux/seq_file.h include/linux/pinctrl/pinctrl-state.h \
+ include/linux/pm.h include/linux/ratelimit.h \
+ arch/arm64/include/asm/device.h include/linux/pm_wakeup.h \
+ include/linux/dma-debug.h include/linux/dma-direction.h \
+ include/linux/scatterlist.h arch/arm64/include/asm/io.h \
+ arch/arm64/include/generated/asm/early_ioremap.h \
+ include/asm-generic/early_ioremap.h include/xen/xen.h \
+ include/asm-generic/io.h include/asm-generic/pci_iomap.h \
+ include/linux/vmalloc.h include/linux/kmemcheck.h \
+ arch/arm64/include/asm/dma-mapping.h \
+ arch/arm64/include/asm/xen/hypervisor.h \
+ arch/arm64/include/../../arm/include/asm/xen/hypervisor.h \
+ include/linux/dma-buf.h include/linux/file.h include/linux/fence.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ include/linux/highmem.h include/linux/uaccess.h \
+ arch/arm64/include/asm/uaccess.h arch/arm64/include/asm/kernel-pgtable.h \
+ include/linux/kasan-checks.h arch/arm64/include/asm/compiler.h \
+ include/linux/hardirq.h include/linux/ftrace_irq.h include/linux/vtime.h \
+ include/linux/context_tracking_state.h arch/arm64/include/asm/hardirq.h \
+ arch/arm64/include/asm/irq.h include/asm-generic/irq.h \
+ include/linux/irq_cpustat.h arch/arm64/include/asm/cacheflush.h \
+ arch/arm64/include/generated/asm/kmap_types.h \
+ include/asm-generic/kmap_types.h include/linux/slab.h \
+ include/linux/kmemleak.h include/linux/kasan.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h \
+ include/linux/platform_device.h include/linux/mod_devicetable.h \
+ include/linux/uuid.h include/uapi/linux/uuid.h \
+ include/linux/miscdevice.h include/uapi/linux/major.h \
+ include/linux/module.h include/linux/kmod.h include/linux/elf.h \
+ arch/arm64/include/asm/elf.h arch/arm64/include/generated/asm/user.h \
+ include/asm-generic/user.h include/uapi/linux/elf.h \
+ include/uapi/linux/elf-em.h include/linux/moduleparam.h \
+ include/linux/extable.h include/linux/rbtree_latch.h \
+ arch/arm64/include/asm/module.h include/asm-generic/module.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h \
+ include/linux/mempool.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h \
+ include/linux/debugfs.h include/linux/clk.h \
+ include/linux/regulator/consumer.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.tmp_versions/mali_kbase.mod b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.tmp_versions/mali_kbase.mod
new file mode 100644
index 0000000..7ab0ba0
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/.tmp_versions/mali_kbase.mod
@@ -0,0 +1,3 @@
+/home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.ko
+/home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_device.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_event.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_context.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.o /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.o
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Kbuild
new file mode 100755
index 0000000..09f9eb9
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,173 @@
+#
+# (C) COPYRIGHT 2012-2016, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r9p0-01rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECT),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ldflags-y += --strip-debug
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_ctx_sched.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c \
+	mali_kbase_regs_history_debugfs.c
+
+
+
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+ccflags-y += -I$(KBASE_PATH)
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+# Kconfig passes in the name with quotes for in-tree builds - remove them.
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_NAME))
+MALI_PLATFORM_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR)
+include $(src)/$(MALI_PLATFORM_DIR)/Kbuild
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+  ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+    include $(src)/ipa/Kbuild
+  endif
+endif
+
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \
+	mali_kbase_dma_fence.o \
+	mali_kbase_fence.o
+mali_kbase-$(CONFIG_SYNC) += \
+	mali_kbase_sync_android.o \
+	mali_kbase_sync_common.o
+mali_kbase-$(CONFIG_SYNC_FILE) += \
+	mali_kbase_sync_file.o \
+	mali_kbase_sync_common.o \
+	mali_kbase_fence.o
+
+ifeq ($(MALI_MOCK_TEST),1)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+
+include  $(src)/backend/gpu/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+
+ccflags-y += -I$(src)/backend/gpu
+subdir-ccflags-y += -I$(src)/backend/gpu
+
+# For kutf and mali_kutf_irq_latency_test
+obj-$(CONFIG_MALI_KUTF) += tests/
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Kconfig b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Kconfig
new file mode 100644
index 0000000..bcc2d4a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,201 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if the Linux Kernel has built in
+	  support for DMA_BUF fences.
+
+config MALI_PLATFORM_NAME
+	depends on MALI_MIDGARD
+	string "Platform name"
+	default "devicetree"
+	help
+	  Enter the name of the desired platform configuration directory to
+	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
+	  exist.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_CORESTACK
+	bool "Support controlling power to the GPU core stack"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature on supported GPUs will let the driver powering
+	  on/off the GPU core stack independently without involving the Power
+	  Domain Controller. This should only be enabled on platforms which
+	  integration of the PDC to the Mali GPU is known to be problematic.
+	  This feature is currently only supported on t-Six and t-HEx GPUs.
+
+	  If unsure, say N.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE)
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_2MB_ALLOC
+	bool "Attempt to allocate 2MB pages"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Rather than allocating all GPU memory page-by-page, attempt to
+	  allocate 2MB pages from the kernel. This reduces TLB pressure and
+	  helps to prevent memory fragmentation.
+
+	  If in doubt, say N
+
+config MALI_PWRSOFT_765
+	bool "PWRSOFT-765 ticket"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged
+	  in kernel v4.10, however if backported into the kernel then this
+	  option must be manually selected.
+
+	  If using kernel >= v4.10 then say N, otherwise if devfreq cooling
+	  changes have been backported say Y to avoid compilation errors.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
+source "drivers/gpu/arm/midgard/tests/Kconfig"
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Makefile b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Makefile
new file mode 100644
index 0000000..b7b261a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,38 @@
+#
+# (C) COPYRIGHT 2010-2016, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100644
index 0000000..2bef9c2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/.mali_kbase_instr_backend.o.d b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/.mali_kbase_instr_backend.o.d
new file mode 100644
index 0000000..6b2ce47
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/.mali_kbase_instr_backend.o.d
@@ -0,0 +1,275 @@
+mali_kbase_instr_backend.o: \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c \
+ include/linux/kconfig.h include/generated/autoconf.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h \
+ include/generated/uapi/linux/version.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ include/linux/bug.h arch/arm64/include/asm/bug.h \
+ arch/arm64/include/asm/brk-imm.h include/asm-generic/bug.h \
+ include/linux/compiler.h include/linux/compiler-gcc.h \
+ include/uapi/linux/types.h arch/arm64/include/generated/asm/types.h \
+ include/uapi/asm-generic/types.h include/asm-generic/int-ll64.h \
+ include/uapi/asm-generic/int-ll64.h \
+ arch/arm64/include/uapi/asm/bitsperlong.h \
+ include/asm-generic/bitsperlong.h include/uapi/asm-generic/bitsperlong.h \
+ include/uapi/linux/posix_types.h include/linux/stddef.h \
+ include/uapi/linux/stddef.h arch/arm64/include/uapi/asm/posix_types.h \
+ include/uapi/asm-generic/posix_types.h include/linux/kernel.h \
+ /opt/gcc-linaro-aarch64-linux-gnu-4.9-2014.09_linux/lib/gcc/aarch64-linux-gnu/4.9.2/include/stdarg.h \
+ include/linux/linkage.h include/linux/stringify.h include/linux/export.h \
+ arch/arm64/include/asm/linkage.h include/linux/types.h \
+ include/linux/bitops.h arch/arm64/include/asm/bitops.h \
+ arch/arm64/include/asm/barrier.h include/asm-generic/barrier.h \
+ include/asm-generic/bitops/builtin-__ffs.h \
+ include/asm-generic/bitops/builtin-ffs.h \
+ include/asm-generic/bitops/builtin-__fls.h \
+ include/asm-generic/bitops/builtin-fls.h \
+ include/asm-generic/bitops/ffz.h include/asm-generic/bitops/fls64.h \
+ include/asm-generic/bitops/find.h include/asm-generic/bitops/sched.h \
+ include/asm-generic/bitops/hweight.h \
+ include/asm-generic/bitops/arch_hweight.h \
+ include/asm-generic/bitops/const_hweight.h \
+ include/asm-generic/bitops/lock.h \
+ include/asm-generic/bitops/non-atomic.h include/asm-generic/bitops/le.h \
+ arch/arm64/include/uapi/asm/byteorder.h \
+ include/linux/byteorder/little_endian.h \
+ include/uapi/linux/byteorder/little_endian.h include/linux/swab.h \
+ include/uapi/linux/swab.h arch/arm64/include/generated/asm/swab.h \
+ include/uapi/asm-generic/swab.h include/linux/byteorder/generic.h \
+ include/linux/log2.h include/linux/typecheck.h include/linux/printk.h \
+ include/linux/init.h include/linux/kern_levels.h include/linux/cache.h \
+ include/uapi/linux/kernel.h include/uapi/linux/sysinfo.h \
+ arch/arm64/include/asm/cache.h arch/arm64/include/asm/cachetype.h \
+ arch/arm64/include/asm/cputype.h arch/arm64/include/asm/sysreg.h \
+ arch/arm64/include/asm/opcodes.h \
+ arch/arm64/include/../../arm/include/asm/opcodes.h \
+ arch/arm64/include/asm/page.h include/uapi/linux/const.h \
+ include/linux/personality.h include/uapi/linux/personality.h \
+ arch/arm64/include/asm/pgtable-types.h \
+ include/asm-generic/pgtable-nopud.h arch/arm64/include/asm/memory.h \
+ arch/arm64/include/generated/asm/sizes.h include/asm-generic/sizes.h \
+ include/linux/sizes.h include/linux/mmdebug.h \
+ include/asm-generic/memory_model.h include/linux/pfn.h \
+ include/asm-generic/getorder.h include/linux/atomic.h \
+ arch/arm64/include/asm/atomic.h arch/arm64/include/asm/lse.h \
+ arch/arm64/include/asm/atomic_ll_sc.h arch/arm64/include/asm/cmpxchg.h \
+ include/asm-generic/atomic-long.h include/linux/highmem.h \
+ include/linux/fs.h include/linux/wait.h include/linux/list.h \
+ include/linux/poison.h include/linux/spinlock.h include/linux/preempt.h \
+ arch/arm64/include/generated/asm/preempt.h include/asm-generic/preempt.h \
+ include/linux/thread_info.h arch/arm64/include/asm/thread_info.h \
+ include/linux/irqflags.h arch/arm64/include/asm/irqflags.h \
+ arch/arm64/include/asm/ptrace.h arch/arm64/include/uapi/asm/ptrace.h \
+ arch/arm64/include/asm/hwcap.h arch/arm64/include/uapi/asm/hwcap.h \
+ include/asm-generic/ptrace.h include/linux/bottom_half.h \
+ include/linux/spinlock_types.h arch/arm64/include/asm/spinlock_types.h \
+ include/linux/lockdep.h include/linux/rwlock_types.h \
+ arch/arm64/include/asm/spinlock.h arch/arm64/include/asm/processor.h \
+ include/linux/string.h include/uapi/linux/string.h \
+ arch/arm64/include/asm/string.h arch/arm64/include/asm/alternative.h \
+ arch/arm64/include/asm/cpucaps.h arch/arm64/include/asm/insn.h \
+ arch/arm64/include/asm/fpsimd.h arch/arm64/include/asm/hw_breakpoint.h \
+ arch/arm64/include/asm/cpufeature.h include/linux/jump_label.h \
+ arch/arm64/include/asm/jump_label.h arch/arm64/include/asm/virt.h \
+ arch/arm64/include/asm/sections.h include/asm-generic/sections.h \
+ arch/arm64/include/asm/pgtable-hwdef.h include/linux/rwlock.h \
+ include/linux/spinlock_api_smp.h include/linux/rwlock_api_smp.h \
+ arch/arm64/include/generated/asm/current.h include/asm-generic/current.h \
+ include/uapi/linux/wait.h include/linux/kdev_t.h \
+ include/uapi/linux/kdev_t.h include/linux/dcache.h \
+ include/linux/rculist.h include/linux/rcupdate.h include/linux/threads.h \
+ include/linux/cpumask.h include/linux/bitmap.h include/linux/seqlock.h \
+ include/linux/completion.h include/linux/debugobjects.h \
+ include/linux/ktime.h include/linux/time.h include/linux/math64.h \
+ arch/arm64/include/generated/asm/div64.h include/asm-generic/div64.h \
+ include/linux/time64.h include/uapi/linux/time.h include/linux/jiffies.h \
+ include/linux/timex.h include/uapi/linux/timex.h \
+ include/uapi/linux/param.h arch/arm64/include/uapi/asm/param.h \
+ include/asm-generic/param.h include/uapi/asm-generic/param.h \
+ arch/arm64/include/asm/timex.h arch/arm64/include/asm/arch_timer.h \
+ include/clocksource/arm_arch_timer.h include/linux/timecounter.h \
+ include/asm-generic/timex.h include/generated/timeconst.h \
+ include/linux/timekeeping.h include/linux/errno.h \
+ include/uapi/linux/errno.h arch/arm64/include/generated/asm/errno.h \
+ include/uapi/asm-generic/errno.h include/uapi/asm-generic/errno-base.h \
+ include/linux/rcutree.h include/linux/rculist_bl.h \
+ include/linux/list_bl.h include/linux/bit_spinlock.h \
+ include/linux/lockref.h include/generated/bounds.h \
+ include/linux/stringhash.h include/linux/hash.h include/linux/path.h \
+ include/linux/stat.h arch/arm64/include/asm/stat.h \
+ arch/arm64/include/uapi/asm/stat.h include/uapi/asm-generic/stat.h \
+ arch/arm64/include/asm/compat.h include/linux/sched.h \
+ include/uapi/linux/sched.h include/linux/sched/prio.h \
+ include/linux/capability.h include/uapi/linux/capability.h \
+ include/linux/plist.h include/linux/rbtree.h include/linux/nodemask.h \
+ include/linux/numa.h include/linux/mm_types.h include/linux/auxvec.h \
+ include/uapi/linux/auxvec.h arch/arm64/include/uapi/asm/auxvec.h \
+ include/linux/rwsem.h include/linux/err.h include/linux/osq_lock.h \
+ arch/arm64/include/generated/asm/rwsem.h include/asm-generic/rwsem.h \
+ include/linux/uprobes.h include/linux/page-flags-layout.h \
+ arch/arm64/include/asm/sparsemem.h include/linux/workqueue.h \
+ include/linux/timer.h include/linux/sysctl.h include/linux/uidgid.h \
+ include/linux/highuid.h include/uapi/linux/sysctl.h \
+ arch/arm64/include/asm/mmu.h include/linux/cputime.h \
+ arch/arm64/include/generated/asm/cputime.h include/asm-generic/cputime.h \
+ include/asm-generic/cputime_jiffies.h include/linux/smp.h \
+ include/linux/llist.h arch/arm64/include/asm/smp.h include/linux/sem.h \
+ include/uapi/linux/sem.h include/linux/ipc.h include/uapi/linux/ipc.h \
+ arch/arm64/include/generated/asm/ipcbuf.h \
+ include/uapi/asm-generic/ipcbuf.h \
+ arch/arm64/include/generated/asm/sembuf.h \
+ include/uapi/asm-generic/sembuf.h include/linux/shm.h \
+ include/uapi/linux/shm.h arch/arm64/include/generated/asm/shmbuf.h \
+ include/uapi/asm-generic/shmbuf.h arch/arm64/include/asm/shmparam.h \
+ include/uapi/asm-generic/shmparam.h include/linux/signal.h \
+ include/uapi/linux/signal.h arch/arm64/include/uapi/asm/signal.h \
+ include/asm-generic/signal.h include/uapi/asm-generic/signal.h \
+ include/uapi/asm-generic/signal-defs.h \
+ arch/arm64/include/uapi/asm/sigcontext.h \
+ arch/arm64/include/uapi/asm/siginfo.h include/asm-generic/siginfo.h \
+ include/uapi/asm-generic/siginfo.h include/linux/pid.h \
+ include/linux/percpu.h arch/arm64/include/asm/percpu.h \
+ include/asm-generic/percpu.h include/linux/percpu-defs.h \
+ include/linux/topology.h include/linux/mmzone.h \
+ include/linux/pageblock-flags.h include/linux/memory_hotplug.h \
+ include/linux/notifier.h include/linux/mutex.h include/linux/srcu.h \
+ arch/arm64/include/asm/topology.h include/asm-generic/topology.h \
+ include/linux/seccomp.h include/uapi/linux/seccomp.h \
+ arch/arm64/include/asm/seccomp.h arch/arm64/include/asm/unistd.h \
+ arch/arm64/include/uapi/asm/unistd.h include/asm-generic/unistd.h \
+ include/uapi/asm-generic/unistd.h include/asm-generic/seccomp.h \
+ include/uapi/linux/unistd.h include/linux/rtmutex.h \
+ include/linux/resource.h include/uapi/linux/resource.h \
+ arch/arm64/include/generated/asm/resource.h \
+ include/asm-generic/resource.h include/uapi/asm-generic/resource.h \
+ include/linux/hrtimer.h include/linux/timerqueue.h include/linux/kcov.h \
+ include/uapi/linux/kcov.h include/linux/task_io_accounting.h \
+ include/linux/latencytop.h include/linux/cred.h include/linux/key.h \
+ include/linux/assoc_array.h include/linux/selinux.h include/linux/gfp.h \
+ include/uapi/linux/magic.h include/linux/cgroup-defs.h \
+ include/uapi/linux/limits.h include/linux/idr.h \
+ include/linux/percpu-refcount.h include/linux/percpu-rwsem.h \
+ include/linux/rcu_sync.h include/linux/bpf-cgroup.h \
+ include/uapi/linux/bpf.h include/uapi/linux/bpf_common.h \
+ include/linux/cgroup_subsys.h include/uapi/linux/stat.h \
+ include/linux/list_lru.h include/linux/shrinker.h \
+ include/linux/radix-tree.h include/linux/semaphore.h \
+ include/uapi/linux/fiemap.h include/linux/migrate_mode.h \
+ include/linux/blk_types.h include/linux/bvec.h \
+ include/linux/delayed_call.h include/uapi/linux/fs.h \
+ include/uapi/linux/ioctl.h arch/arm64/include/generated/asm/ioctl.h \
+ include/asm-generic/ioctl.h include/uapi/asm-generic/ioctl.h \
+ include/linux/quota.h include/linux/percpu_counter.h \
+ include/uapi/linux/dqblk_xfs.h include/linux/dqblk_v1.h \
+ include/linux/dqblk_v2.h include/linux/dqblk_qtree.h \
+ include/linux/projid.h include/uapi/linux/quota.h \
+ include/linux/nfs_fs_i.h include/linux/fcntl.h \
+ include/uapi/linux/fcntl.h arch/arm64/include/uapi/asm/fcntl.h \
+ include/uapi/asm-generic/fcntl.h include/linux/mm.h \
+ include/linux/debug_locks.h include/linux/range.h \
+ include/linux/page_ext.h include/linux/stacktrace.h \
+ include/linux/stackdepot.h include/linux/page_ref.h \
+ include/linux/page-flags.h include/linux/tracepoint-defs.h \
+ include/linux/static_key.h arch/arm64/include/asm/pgtable.h \
+ arch/arm64/include/asm/proc-fns.h arch/arm64/include/asm/pgtable-prot.h \
+ arch/arm64/include/asm/fixmap.h arch/arm64/include/asm/boot.h \
+ include/asm-generic/fixmap.h include/asm-generic/pgtable.h \
+ include/linux/huge_mm.h include/linux/vmstat.h \
+ include/linux/vm_event_item.h include/linux/uaccess.h \
+ arch/arm64/include/asm/uaccess.h arch/arm64/include/asm/kernel-pgtable.h \
+ include/linux/kasan-checks.h arch/arm64/include/asm/compiler.h \
+ include/linux/hardirq.h include/linux/ftrace_irq.h include/linux/vtime.h \
+ include/linux/context_tracking_state.h arch/arm64/include/asm/hardirq.h \
+ arch/arm64/include/asm/irq.h include/asm-generic/irq.h \
+ include/linux/irq_cpustat.h arch/arm64/include/asm/cacheflush.h \
+ arch/arm64/include/generated/asm/kmap_types.h \
+ include/asm-generic/kmap_types.h include/linux/slab.h \
+ include/linux/kmemleak.h include/linux/kasan.h include/linux/vmalloc.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h \
+ include/linux/platform_device.h include/linux/device.h \
+ include/linux/ioport.h include/linux/kobject.h include/linux/sysfs.h \
+ include/linux/kernfs.h include/linux/kobject_ns.h include/linux/kref.h \
+ include/linux/klist.h include/linux/pinctrl/devinfo.h \
+ include/linux/pinctrl/consumer.h include/linux/seq_file.h \
+ include/linux/pinctrl/pinctrl-state.h include/linux/pm.h \
+ include/linux/ratelimit.h arch/arm64/include/asm/device.h \
+ include/linux/pm_wakeup.h include/linux/mod_devicetable.h \
+ include/linux/uuid.h include/uapi/linux/uuid.h \
+ include/linux/miscdevice.h include/uapi/linux/major.h \
+ include/linux/module.h include/linux/kmod.h include/linux/elf.h \
+ arch/arm64/include/asm/elf.h arch/arm64/include/generated/asm/user.h \
+ include/asm-generic/user.h include/uapi/linux/elf.h \
+ include/uapi/linux/elf-em.h include/linux/moduleparam.h \
+ include/linux/extable.h include/linux/rbtree_latch.h \
+ arch/arm64/include/asm/module.h include/asm-generic/module.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ include/linux/dma-mapping.h include/linux/dma-debug.h \
+ include/linux/dma-direction.h include/linux/scatterlist.h \
+ arch/arm64/include/asm/io.h \
+ arch/arm64/include/generated/asm/early_ioremap.h \
+ include/asm-generic/early_ioremap.h include/xen/xen.h \
+ include/asm-generic/io.h include/asm-generic/pci_iomap.h \
+ include/linux/kmemcheck.h arch/arm64/include/asm/dma-mapping.h \
+ arch/arm64/include/asm/xen/hypervisor.h \
+ arch/arm64/include/../../arm/include/asm/xen/hypervisor.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h \
+ include/linux/mempool.h include/linux/file.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h \
+ include/linux/fence.h include/linux/debugfs.h include/linux/clk.h \
+ include/linux/regulator/consumer.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h \
+ /home/jiyu.yang/bak/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100644
index 0000000..5f700e9
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,60 @@
+#
+# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += \
+	backend/gpu/mali_kbase_devfreq.c \
+	backend/gpu/mali_kbase_pm_ca_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100644
index 0000000..c8ae87e
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100644
index 0000000..fef9a2c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	kbdev->current_gpu_coherency_mode = mode;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100644
index 0000000..fe98691
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100644
index 0000000..7851ea6
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100644
index 0000000..e280322
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,413 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#define dev_pm_opp_find_freq_floor opp_find_freq_floor
+#endif /* Linux >= 3.13 */
+
+/**
+ * opp_translate - Translate nominal OPP frequency from devicetree into real
+ *                 frequency and core mask
+ * @kbdev:     Device pointer
+ * @freq:      Nominal frequency
+ * @core_mask: Pointer to u64 to store core mask to
+ *
+ * Return: Real target frequency
+ *
+ * This function will only perform translation if an operating-points-v2-mali
+ * table is present in devicetree. If one is not present then it will return an
+ * untranslated frequency and all cores enabled.
+ */
+static unsigned long opp_translate(struct kbase_device *kbdev,
+		unsigned long freq, u64 *core_mask)
+{
+	int i;
+
+	for (i = 0; i < kbdev->num_opps; i++) {
+		if (kbdev->opp_table[i].opp_freq == freq) {
+			*core_mask = kbdev->opp_table[i].core_mask;
+			return kbdev->opp_table[i].real_freq;
+		}
+	}
+
+	/* Failed to find OPP - return all cores enabled & nominal frequency */
+	*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
+
+	return freq;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long nominal_freq;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+	u64 core_mask;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	nominal_freq = freq;
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_nominal_freq == nominal_freq) {
+		*target_freq = nominal_freq;
+		return 0;
+	}
+
+	freq = opp_translate(kbdev, nominal_freq, &core_mask);
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	if (kbdev->pm.backend.ca_current_policy->id ==
+			KBASE_PM_CA_POLICY_ID_DEVFREQ)
+		kbase_devfreq_set_core_mask(kbdev, core_mask);
+
+	*target_freq = nominal_freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_nominal_freq = nominal_freq;
+	kbdev->current_freq = freq;
+	kbdev->current_core_mask = core_mask;
+
+	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq);
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_nominal_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_nominal_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
+		opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
+{
+	struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node,
+			"operating-points-v2", 0);
+	struct device_node *node;
+	int i = 0;
+	int count;
+
+	if (!opp_node)
+		return 0;
+	if (!of_device_is_compatible(opp_node, "operating-points-v2-mali"))
+		return 0;
+
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	kbdev->opp_table = kmalloc_array(count,
+			sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
+	if (!kbdev->opp_table)
+		return -ENOMEM;
+
+	for_each_available_child_of_node(opp_node, node) {
+		u64 core_mask;
+		u64 opp_freq, real_freq;
+		const void *core_count_p;
+
+		if (of_property_read_u64(node, "opp-hz", &opp_freq)) {
+			dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n");
+			continue;
+		}
+		if (of_property_read_u64(node, "opp-hz-real", &real_freq))
+			real_freq = opp_freq;
+		if (of_property_read_u64(node, "opp-core-mask", &core_mask))
+			core_mask =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		core_count_p = of_get_property(node, "opp-core-count", NULL);
+		if (core_count_p) {
+			u64 remaining_core_mask =
+				kbdev->gpu_props.props.raw_props.shader_present;
+			int core_count = be32_to_cpup(core_count_p);
+
+			core_mask = 0;
+
+			for (; core_count > 0; core_count--) {
+				int core = ffs(remaining_core_mask);
+
+				if (!core) {
+					dev_err(kbdev->dev, "OPP has more cores than GPU\n");
+					return -ENODEV;
+				}
+
+				core_mask |= (1ull << (core-1));
+				remaining_core_mask &= ~(1ull << (core-1));
+			}
+		}
+
+		if (!core_mask) {
+			dev_err(kbdev->dev, "OPP has invalid core mask of 0\n");
+			return -ENODEV;
+		}
+
+		kbdev->opp_table[i].opp_freq = opp_freq;
+		kbdev->opp_table[i].real_freq = real_freq;
+		kbdev->opp_table[i].core_mask = core_mask;
+
+		dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n",
+				i, opp_freq, real_freq, core_mask);
+
+		i++;
+	}
+
+	kbdev->num_opps = i;
+
+	return 0;
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	if (!kbdev->clock) {
+		dev_err(kbdev->dev, "Clock not available for devfreq\n");
+		return -ENODEV;
+	}
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+	kbdev->current_nominal_freq = kbdev->current_freq;
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	err = kbase_devfreq_init_core_mask_table(kbdev);
+	if (err)
+		return err;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", NULL);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	/* devfreq_add_device only copies a few of kbdev->dev's fields, so
+	 * set drvdata explicitly so IPA models can access kbdev. */
+	dev_set_drvdata(&kbdev->devfreq->dev, kbdev);
+
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_ipa_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		goto cooling_failed;
+	}
+
+	kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+			kbdev->dev->of_node,
+			kbdev->devfreq,
+			&kbase_ipa_power_model_ops);
+	if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+		err = PTR_ERR(kbdev->devfreq_cooling);
+		dev_err(kbdev->dev,
+			"Failed to register cooling device (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+
+	kbase_ipa_term(kbdev);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	kfree(kbdev->opp_table);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100644
index 0000000..c0bf8b1
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100644
index 0000000..dcdf15c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,255 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+	struct kbase_io_access *old_buf;
+	struct kbase_io_access *new_buf;
+	unsigned long flags;
+
+	if (!new_size)
+		goto out_err; /* The new size must not be 0 */
+
+	new_buf = vmalloc(new_size * sizeof(*h->buf));
+	if (!new_buf)
+		goto out_err;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	old_buf = h->buf;
+
+	/* Note: we won't bother with copying the old data over. The dumping
+	 * logic wouldn't work properly as it relies on 'count' both as a
+	 * counter and as an index to the buffer which would have changed with
+	 * the new array. This is a corner case that we don't need to support.
+	 */
+	h->count = 0;
+	h->size = new_size;
+	h->buf = new_buf;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	vfree(old_buf);
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+	h->enabled = false;
+	spin_lock_init(&h->lock);
+	h->count = 0;
+	h->size = 0;
+	h->buf = NULL;
+	if (kbase_io_history_resize(h, n))
+		return -1;
+
+	return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+	vfree(h->buf);
+	h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+		void __iomem const *addr, u32 value, u8 write)
+{
+	struct kbase_io_access *io;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	io = &h->buf[h->count % h->size];
+	io->addr = (uintptr_t)addr | write;
+	io->value = value;
+	++h->count;
+	/* If count overflows, move the index by the buffer size so the entire
+	 * buffer will still be dumped later */
+	if (unlikely(!h->count))
+		h->count = h->size;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+	struct kbase_io_history *const h = &kbdev->io_history;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!unlikely(h->enabled))
+		return;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	dev_err(kbdev->dev, "Register IO History:");
+	iters = (h->size > h->count) ? h->count : h->size;
+	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	writel(value, kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				value, 1);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				val, 0);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100644
index 0000000..5b20445
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100644
index 0000000..6bceba1
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	err = kbase_pm_runtime_init(kbdev);
+	if (err)
+		goto fail_runtime_pm;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	return 0;
+
+fail_pm:
+	kbase_release_interrupts(kbdev);
+fail_interrupts:
+	kbase_pm_runtime_term(kbdev);
+fail_runtime_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_term(kbdev);
+	kbase_release_interrupts(kbdev);
+	kbase_pm_runtime_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100644
index 0000000..b395325
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+
+	regdump->stack_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_LO), NULL);
+	regdump->stack_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100644
index 0000000..7ad309e
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,492 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100644
index 0000000..4794672
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100644
index 0000000..e96aeae
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100644
index 0000000..8781561
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100644
index 0000000..8416b80
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,469 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100644
index 0000000..c660c80
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,235 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		if (kbdev->as_to_kctx[i] == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_read(&kctx->refcount) != 1) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+
+	kbase_ctx_sched_release_ctx(kctx);
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		as_kctx = kbdev->as_to_kctx[i];
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running.
+		 * Note that a context will have at least 1 reference (which
+		 * was previously taken by kbasep_js_schedule_ctx()) until
+		 * descheduled.
+		 */
+		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+			atomic_read(&as_kctx->refcount) == 1) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		WARN(1, "Context is already scheduled in\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100644
index 0000000..08a7400
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100644
index 0000000..0a2a0b7
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1459 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649))
+		cfg |= JS_CONFIG_START_MMU;
+
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
+			katom->affinity, cfg);
+	KBASE_TLSTREAM_TL_RET_CTX_LPU(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_LPU(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string,
+						sizeof(js_string)),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the time the job was submitted, to
+ * work out the best estimate (which might still result in an over-estimate to
+ * the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		timestamp_diff = ktime_sub(end_timestamp,
+				katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = end_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
+ * tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Make a tracepoint call to the instrumentation module informing that
+ * softstop happened on given lpu (job slot).
+ */
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
+					int js)
+{
+	KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
+		&kbdev->gpu_props.props.raw_props.js_features[js]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0,
+					sizeof(kbdev->slot_submit_count_irq));
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbasep_trace_tl_event_lpu_softstop(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+						base_jd_core_req core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+	bool stop_sent = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if (katom->kctx != kctx)
+			continue;
+
+		if (katom->sched_priority > priority) {
+			if (!stop_sent)
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(
+						target_katom);
+
+			kbase_job_slot_softstop(kbdev, js, katom);
+			stop_sent = true;
+		}
+	}
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT);
+
+	timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait,
+			kctx->jctx.job_nr == 0, timeout);
+
+	if (timeout != 0)
+		timeout = wait_event_timeout(
+			kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			!kbase_ctx_flag(kctx, KCTX_SCHEDULED),
+			timeout);
+
+	/* Neither wait timed out; all done! */
+	if (timeout != 0)
+		goto exit;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev,
+			"Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+			ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+
+	/* Wait for the reset to complete */
+	wait_event(kbdev->hwaccess.backend.reset_wait,
+			atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+			== KBASE_RESET_GPU_NOT_PENDING);
+#else
+	dev_warn(kbdev->dev,
+		"Jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+		ZAP_TIMEOUT);
+
+#endif
+exit:
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * hwaccess_lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	kbase_io_history_dump(kbdev);
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+	dev_err(kbdev->dev, "  TILER_CONFIG=0x%08x    JM_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL));
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	bool try_schedule = false;
+	bool silent = false;
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	spin_lock(&kbdev->hwaccess_lock);
+	spin_lock(&kbdev->mmu_mask_change);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts_nolock(kbdev);
+
+	spin_unlock(&kbdev->mmu_mask_change);
+	spin_unlock(&kbdev->hwaccess_lock);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+		/* Ensure that L2 is not transitioning when we send the reset
+		 * command */
+		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2))
+			;
+
+		WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->protected_mode = false;
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	/* Process any pending slot updates */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_context_idle(kbdev);
+
+	/* Release vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+
+void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+}
+
+bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100644
index 0000000..1f382b3
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string,
+						size_t js_size)
+{
+	snprintf(js_string, js_size, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_cacheclean - Cause a GPU cache clean & flush
+ * @kbdev: Device pointer
+ *
+ * Caller must not be in IRQ context
+ */
+void kbase_gpu_cacheclean(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100644
index 0000000..a41e7b5
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1947 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+/**
+ * check_secure_atom - Check if the given atom is in the given secure state and
+ *                     has a ringbuffer state of at least
+ *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @katom:  Atom pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if atom is in the given state, false otherwise
+ */
+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
+{
+	if (katom->gpu_rb_state >=
+			KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+			((kbase_jd_katom_is_protected(katom) && secure) ||
+			(!kbase_jd_katom_is_protected(katom) && !secure)))
+		return true;
+
+	return false;
+}
+
+/**
+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
+ *                                secure state in the ringbuffers of at least
+ *                                state
+ *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
+ * @kbdev:  Device pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if any atoms are in the given state, false otherwise
+ */
+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
+		bool secure)
+{
+	int js, i;
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, i);
+
+			if (katom) {
+				if (check_secure_atom(katom, secure))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * hwaccess_lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+		if (katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
+				katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK)
+			kbdev->protected_mode_transition = false;
+
+		if (kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) {
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			/* Go back to configured model for IPA */
+			kbase_ipa_model_use_configured_locked(kbdev);
+		}
+
+
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+/**
+ * other_slots_busy - Determine if any job slots other than @js are currently
+ *                    running atoms
+ * @kbdev: Device pointer
+ * @js:    Job slot
+ *
+ * Return: true if any slots other than @js are busy, false otherwise
+ */
+static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+{
+	int slot;
+
+	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
+		if (slot == js)
+			continue;
+
+		if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot,
+				KBASE_ATOM_GPU_RB_SUBMITTED))
+			return true;
+	}
+
+	return false;
+}
+
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
+{
+	return kbdev->protected_mode;
+}
+
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
+
+	/*
+	 * When entering into protected mode, we must ensure that the
+	 * GPU is not operating in coherent mode as well. This is to
+	 * ensure that no protected memory can be leaked.
+	 */
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enable(
+				kbdev->protected_dev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
+		else
+			kbdev->protected_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
+
+	if (!kbdev->protected_ops)
+		return -EINVAL;
+
+	/* The protected mode disable callback will be called as part of reset
+	 */
+	kbase_reset_gpu_silent(kbdev);
+
+	return 0;
+}
+
+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	switch (katom[idx]->protected_state.enter) {
+	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		kbdev->protected_mode_transition = true;
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_VINSTR;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
+		if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+			/*
+			 * We can't switch now because
+			 * the vinstr core state switch
+			 * is not done yet.
+			 */
+			return -EAGAIN;
+		}
+
+		/* Use generic model for IPA in protected mode */
+		kbase_ipa_model_use_fallback_locked(kbdev);
+
+		/* Once reaching this point GPU must be
+		 * switched to protected mode or vinstr
+		 * re-enabled. */
+
+		/*
+		 * Not in correct mode, begin protected mode switch.
+		 * Entering protected mode requires us to power down the L2,
+		 * and drop out of fully coherent mode.
+		 */
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
+		/* Avoid unnecessary waiting on non-ACE platforms. */
+		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+				/*
+				* The L2 is still powered, wait for all the users to
+				* finish with it before doing the actual reset.
+				*/
+				return -EAGAIN;
+			}
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+
+		/* No jobs running, so we can switch GPU mode right now. */
+		err = kbase_gpu_protected_mode_enter(kbdev);
+
+		/*
+		 * Regardless of result, we are no longer transitioning
+		 * the GPU.
+		 */
+		kbdev->protected_mode_transition = false;
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev);
+		if (err) {
+			/*
+			 * Failed to switch into protected mode, resume
+			 * vinstr core and fail atom.
+			 */
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order. */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			/* Go back to configured model for IPA */
+			kbase_ipa_model_use_configured_locked(kbdev);
+
+			return -EINVAL;
+		}
+
+		/* Protected mode sanity checks. */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) ==
+			kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]),
+			kbase_gpu_in_protected_mode(kbdev));
+		katom[idx]->gpu_rb_state =
+			KBASE_ATOM_GPU_RB_READY;
+	}
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+
+	switch (katom[idx]->protected_state.exit) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		kbdev->protected_mode_transition = true;
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+
+		if (err) {
+			kbdev->protected_mode_transition = false;
+
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			/* Use generic model for IPA in protected mode */
+			kbase_ipa_model_use_fallback_locked(kbdev);
+
+			return -EINVAL;
+		}
+
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		/* A GPU reset is issued when exiting protected mode. Once the
+		 * reset is done all atoms' state will also be reset. For this
+		 * reason, if the atom is still in this state we can safely
+		 * say that the reset has not completed i.e., we have not
+		 * finished exiting protected mode yet.
+		 */
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+void kbase_backend_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+			int ret;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+				if (kbase_gpu_check_secure_atoms(kbdev,
+						!kbase_jd_katom_is_protected(
+						katom[idx])))
+					break;
+
+				if ((idx == 1) && (kbase_jd_katom_is_protected(
+								katom[0]) !=
+						kbase_jd_katom_is_protected(
+								katom[1])))
+					break;
+
+				if (kbdev->protected_mode_transition)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+
+				if (!kbase_gpu_in_protected_mode(kbdev) &&
+					kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition into protected mode. */
+					ret = kbase_jm_enter_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				} else if (kbase_gpu_in_protected_mode(kbdev) &&
+					!kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition out of protected mode. */
+					ret = kbase_jm_exit_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				}
+				katom[idx]->protected_state.exit =
+						KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+				/* Atom needs no protected mode transition. */
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+
+				if (idx == 1) {
+					/* Only submit if head atom or previous
+					 * atom already submitted */
+					if ((katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+						katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+						break;
+
+					/* If intra-slot serialization in use
+					 * then don't submit atom to NEXT slot
+					 */
+					if (kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTRA_SLOT)
+						break;
+				}
+
+				/* If inter-slot serialization in use then don't
+				 * submit atom if any other slots are in use */
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTER_SLOT) &&
+						other_slots_busy(kbdev, js))
+					break;
+
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_RESET) &&
+						kbase_reset_gpu_active(kbdev))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_backend_slot_update(kbdev);
+}
+
+#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
+	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		HAS_DEP(next_katom) &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as
+					[katom->kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[katom->slot_nr]);
+
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/*
+	 * When a hard-stop is followed close after a soft-stop, the completion
+	 * code may be set to STOPPED, even though the job is terminated
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) {
+		if (completion_code == BASE_JD_EVENT_STOPPED &&
+				(katom->atom_flags &
+				KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) {
+			completion_code = BASE_JD_EVENT_TERMINATED;
+		}
+	}
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req &
+					BASE_JD_REQ_SKIP_CACHE_END)) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		/* When a job chain fails, on a T60x or when
+		 * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not
+		 * flushed. To prevent future evictions causing possible memory
+		 * corruption we need to flush the cache manually before any
+		 * affected memory gets reused. */
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx &&
+				next_katom->sched_priority ==
+				katom->sched_priority) {
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+					HAS_DEP(katom_idx0) &&
+					katom_idx0->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+						katom_idx1->kctx == katom->kctx
+						&& HAS_DEP(katom_idx1) &&
+						katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					HAS_DEP(katom_idx1) &&
+					katom_idx1->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)
+		kbase_reset_gpu_silent(kbdev);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		katom = kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		katom = kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	if (katom) {
+		/* Cross-slot dependency has now become runnable. Try to submit
+		 * it. */
+
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
+	}
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_backend_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Reset should always take the GPU out of protected mode */
+	WARN_ON(kbase_gpu_in_protected_mode(kbdev));
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int atom_idx = 0;
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, atom_idx);
+			bool keep_in_jm_rb = false;
+
+			if (!katom)
+				break;
+			if (katom->protected_state.exit ==
+					KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)
+			{
+				KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
+
+				kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+				/* protected mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
+					"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+					kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
+				KBASE_DEBUG_ASSERT_MSG(
+					(kbase_jd_katom_is_protected(katom) && js == 0) ||
+					!kbase_jd_katom_is_protected(katom),
+					"Protected atom on JS%d not supported", js);
+			}
+			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+				katom->affinity = 0;
+				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				/* As the atom was not removed, increment the
+				 * index so that we read the correct atom in the
+				 * next iteration. */
+				atom_idx++;
+				continue;
+			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
+		}
+	}
+
+	kbdev->protected_mode_transition = false;
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	katom->kctx->blocked_js[js][katom->sched_priority] = true;
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+	int prio_idx0 = 0, prio_idx1 = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom_idx0)
+		prio_idx0 = katom_idx0->sched_priority;
+	if (katom_idx1)
+		prio_idx1 = katom_idx1->sched_priority;
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+				(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+				(!kctx || katom_idx1->kctx == kctx) &&
+				prio_idx0 == prio_idx1);
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				katom_idx1->kctx->blocked_js[js][prio_idx1] =
+						true;
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_job_evicted(katom_idx1);
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_job_evicted(katom_idx1);
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->need_cache_flush_cores_retained) {
+		unsigned long flags;
+
+		kbase_gpu_cacheclean(kbdev);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	kbase_backend_cacheclean(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->affinity = 0;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
+			coreref_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int js;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100644
index 0000000..1e0e05a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:         Device pointer
+ * @js:            Job slot to evict from
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100644
index 0000000..54d8ddd
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+#include "mali_kbase_hw.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		 /* If the hardware supports XAFFINITY then we'll only enable
+		  * the tiler (which is the default so this is a no-op),
+		  * otherwise enable shader core 0. */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = 1;
+		else
+			*affinity = 0;
+
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required for hardware without XAFFINITY
+	 * support (notes above) */
+	if (core_req & BASE_JD_REQ_T) {
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = *affinity | 1;
+	}
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100644
index 0000000..35d9781
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold hwaccess_lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100644
index 0000000..63d048f
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,348 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->ticks++;
+
+#ifndef CONFIG_MALI_JOB_DUMP
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->ticks = soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * hwaccess_lock, so it's OK to observe
+					 * the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CONFIG_MALI_JOB_DUMP */
+				/* NOTE: During CONFIG_MALI_JOB_DUMP, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CONFIG_MALI_JOB_DUMP, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CONFIG_MALI_JOB_DUMP */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100644
index 0000000..3f53779
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100644
index 0000000..aa1817c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+static void validate_protected_page_fault(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	/* GPUs which support (native) protected mode shall not report page
+	 * fault addresses unless it has protected debug mode and protected
+	 * debug mode is turned on */
+	u32 protected_debug_mode = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		return;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		protected_debug_mode = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS),
+				kctx) & GPU_DBGEN;
+	}
+
+	if (!protected_debug_mode) {
+		/* fault_addr should never be reported in protected mode.
+		 * However, we just continue by printing an error message */
+		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+	}
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* Mark the fault protected or not */
+		as->protected_mode = kbdev->protected_mode;
+
+		if (kbdev->protected_mode && as->fault_addr)
+		{
+			/* check if address reporting is allowed */
+			validate_protected_page_fault(kbdev, kctx);
+		}
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+			as->fault_extra_addr = kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+					kctx);
+			as->fault_extra_addr <<= 32;
+			as->fault_extra_addr |= kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+					kctx);
+		}
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+		transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+		/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+		/* Clear PTW_MEMATTR bits */
+		transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+		/* Enable correct PTW_MEMATTR bits */
+		transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+		if (kbdev->system_coherency == COHERENCY_ACE) {
+			/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+			/* Clear PTW_SH bits */
+			transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+			/* Enable correct PTW_SH bits */
+			transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+		}
+
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+				transcfg, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+				(current_setup->transcfg >> 32) & 0xFFFFFFFFUL,
+				kctx);
+	} else {
+		if (kbdev->system_coherency == COHERENCY_ACE)
+			current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100644
index 0000000..c02253c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100644
index 0000000..0614348
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100644
index 0000000..f9d244b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100644
index 0000000..a871eae
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,496 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+
+int kbase_pm_runtime_init(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+
+		if (callbacks->power_runtime_init_callback)
+			return callbacks->power_runtime_init_callback(kbdev);
+		else
+			return 0;
+	}
+
+	kbdev->pm.backend.callback_power_on = NULL;
+	kbdev->pm.backend.callback_power_off = NULL;
+	kbdev->pm.backend.callback_power_suspend = NULL;
+	kbdev->pm.backend.callback_power_resume = NULL;
+	kbdev->pm.callback_power_runtime_init = NULL;
+	kbdev->pm.callback_power_runtime_term = NULL;
+	kbdev->pm.backend.callback_power_runtime_on = NULL;
+	kbdev->pm.backend.callback_power_runtime_off = NULL;
+	kbdev->pm.backend.callback_power_runtime_idle = NULL;
+
+	return 0;
+}
+
+void kbase_pm_runtime_term(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.callback_power_runtime_term) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+	}
+}
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+			kbase_pm_gpu_poweroff_wait_wq);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+#if !PLATFORM_POWER_DOWN_ONLY
+	/* Wait for power transitions to complete. We do this with no locks held
+	 * so that we don't deadlock with any pending workqueues */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	kbase_pm_check_transitions_sync(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+#if PLATFORM_POWER_DOWN_ONLY
+	if (kbdev->pm.backend.gpu_powered) {
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/* If L2 cache is powered then we must flush it before
+			 * we power off the GPU. Normally this would have been
+			 * handled when the L2 was powered off. */
+			kbase_gpu_cacheclean(kbdev);
+		}
+	}
+#endif /* PLATFORM_POWER_DOWN_ONLY */
+
+	if (!backend->poweron_required) {
+#if !PLATFORM_POWER_DOWN_ONLY
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		WARN_ON(kbdev->l2_available_bitmap ||
+				kbdev->shader_available_bitmap ||
+				kbdev->tiler_available_bitmap);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+		/* Consume any change-state events */
+		kbase_timeline_pm_check_handle_event(kbdev,
+					KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		/* Disable interrupts and turn the clock off */
+		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+			/*
+			 * Page/bus faults are pending, must drop locks to
+			 * process.  Interrupts are disabled so no more faults
+			 * should be generated at this point.
+			 */
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			kbase_flush_mmu_wqs(kbdev);
+			mutex_lock(&js_devdata->runpool_mutex);
+			mutex_lock(&kbdev->pm.lock);
+
+			/* Turn off clock now that fault have been handled. We
+			 * dropped locks so poweron_required may have changed -
+			 * power back on if this is the case.*/
+			if (backend->poweron_required)
+				kbase_pm_clock_on(kbdev, false);
+			else
+				WARN_ON(!kbase_pm_clock_off(kbdev,
+						backend->poweroff_is_suspend));
+		}
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	backend->poweroff_wait_in_progress = false;
+	if (backend->poweron_required) {
+		backend->poweron_required = false;
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
+		/* Force all cores off */
+		kbdev->pm.backend.desired_shader_state = 0;
+		kbdev->pm.backend.desired_tiler_state = 0;
+
+		/* Force all cores to be unavailable, in the situation where
+		 * transitions are in progress for some cores but not others,
+		 * and kbase_pm_check_transitions_nolock can not immediately
+		 * power off the cores */
+		kbdev->shader_available_bitmap = 0;
+		kbdev->tiler_available_bitmap = 0;
+		kbdev->l2_available_bitmap = 0;
+
+		kbdev->pm.backend.poweroff_wait_in_progress = true;
+		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/*Kick off wq here. Callers will have to wait*/
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
+	} else {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+	wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			is_poweroff_in_progress(kbdev));
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_init_core_use_bitmaps(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, true);
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100644
index 0000000..85890f1
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#ifdef CONFIG_MALI_DEVFREQ
+	&kbase_pm_ca_devfreq_policy_ops,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_random_policy_ops
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100644
index 0000000..ee9e751
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
new file mode 100644
index 0000000..19f1f73
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
@@ -0,0 +1,246 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation.
+ *
+ * This policy periodically selects a new core mask at demand. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/printk.h>
+
+#ifndef MALI_CA_TIMER
+static struct workqueue_struct *mali_ca_wq = NULL;
+#endif
+static int num_shader_cores_to_be_enabled = 0;
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+#ifndef MALI_CA_TIMER
+	struct kbase_device *kbdev = (struct kbase_device *)priv;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+#endif
+
+    num_shader_cores_to_be_enabled = cores -1;
+    printk("pp num=%d\n", num_shader_cores_to_be_enabled);
+    if (num_shader_cores_to_be_enabled < 1)
+        num_shader_cores_to_be_enabled = 1;
+
+#ifndef MALI_CA_TIMER
+    queue_work(mali_ca_wq, &data->wq_work);
+#endif
+
+    return 0;
+}
+
+#ifdef MALI_CA_TIMER
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void demand_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+#if 0
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+#endif
+    data->cores_desired = num_shader_cores_to_be_enabled;
+
+	if (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+            dev_info(kbdev->dev, "error, ca demand policy : new core mask=%llX\n",
+				data->cores_desired);
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "ca demand policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(demand_timer_callback);
+#else
+static void do_ca_scaling(struct work_struct *work)
+{
+    //unsigned long flags;
+	struct kbase_device *kbdev = = container_of(work,
+            struct kbasep_pm_ca_policy_demand, wq_work);
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+    data->cores_desired =  num_shader_cores_to_be_enabled;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+
+}
+#endif
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+#ifdef MALI_CA_TIMER
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = demand_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+#else
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_ca_wq = alloc_workqueue("gpu_ca_wq", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_ca_wq = create_workqueue("gpu_ca_wq");
+#endif
+	INIT_WORK(&data->wq_work, do_ca_scaling);
+    if (mali_ca_wq == NULL) printk("Unable to create gpu scaling workqueue\n");
+#endif
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+#ifdef MALI_CA_TIMER
+	del_timer(&data->core_change_timer);
+#else
+    if (mali_ca_wq) {
+        flush_workqueue(mali_ca_wq);
+        destroy_workqueue(mali_ca_wq);
+        mali_ca_wq = NULL;
+    }
+#endif
+}
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.demand.cores_enabled;
+}
+
+static void demand_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the demand core availability
+ * policy.
+ *
+ * This is the static structure that defines the demand core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_demand_policy_ops);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
new file mode 100644
index 0000000..a3dfe2a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEMAND_H
+#define MALI_KBASE_PM_CA_DEMAND_H
+#include <linux/workqueue.h>
+/**
+ * struct kbasep_pm_ca_policy_demand - Private structure for demand ca policy
+ *
+ * This contains data that is private to the demand core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+#define MALI_CA_TIMER 1
+struct kbasep_pm_ca_policy_demand {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+#ifdef MALI_CA_TIMER
+	struct timer_list core_change_timer;
+#else
+	struct work_struct wq_work;
+#endif
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops;
+extern int mali_perf_set_num_pp_cores(int cores);
+
+#endif /* MALI_KBASE_PM_CA_DEMAND_H */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
new file mode 100644
index 0000000..66bf660
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A core availability policy implementing core mask selection from devfreq OPPs
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/version.h>
+
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	data->cores_desired = core_mask;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+static void devfreq_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+
+	if (kbdev->current_core_mask) {
+		data->cores_enabled = kbdev->current_core_mask;
+		data->cores_desired = kbdev->current_core_mask;
+	} else {
+		data->cores_enabled =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		data->cores_desired =
+				kbdev->gpu_props.props.raw_props.shader_present;
+	}
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void devfreq_term(struct kbase_device *kbdev)
+{
+}
+
+static u64 devfreq_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled;
+}
+
+static void devfreq_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the devfreq core availability
+ * policy.
+ *
+ * This is the static structure that defines the devfreq core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = {
+	"devfreq",			/* name */
+	devfreq_init,			/* init */
+	devfreq_term,			/* term */
+	devfreq_get_core_mask,		/* get_core_mask */
+	devfreq_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEVFREQ,	/* id */
+};
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
new file mode 100644
index 0000000..7ab3cd4
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A core availability policy for use with devfreq, where core masks are
+ * associated with OPPs.
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEVFREQ_H
+#define MALI_KBASE_PM_CA_DEVFREQ_H
+
+/**
+ * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy
+ *
+ * This contains data that is private to the devfreq core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ */
+struct kbasep_pm_ca_policy_devfreq {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops;
+
+/**
+ * kbase_devfreq_set_core_mask - Set core mask for policy to use
+ * @kbdev: Device pointer
+ * @core_mask: New core mask
+ *
+ * The new core mask will have immediate effect if the GPU is powered, or will
+ * take effect when it is next powered on.
+ */
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
+
+#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100644
index 0000000..864612d
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100644
index 0000000..a763155
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
new file mode 100644
index 0000000..0d1b220
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation.
+ *
+ * This policy periodically selects a new core mask at random. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void random_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(random_timer_callback);
+
+static void random_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = random_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+}
+
+static void random_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	del_timer(&data->core_change_timer);
+}
+
+static u64 random_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.random.cores_enabled;
+}
+
+static void random_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the random core availability
+ * policy.
+ *
+ * This is the static structure that defines the random core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops = {
+	"random",			/* name */
+	random_init,			/* init */
+	random_term,			/* term */
+	random_get_core_mask,		/* get_core_mask */
+	random_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_RANDOM,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_random_policy_ops);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
new file mode 100644
index 0000000..a8c9b15
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_RANDOM_H
+#define MALI_KBASE_PM_CA_RANDOM_H
+
+/**
+ * struct kbasep_pm_ca_policy_random - Private structure for random ca policy
+ *
+ * This contains data that is private to the random core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+struct kbasep_pm_ca_policy_random {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+	struct timer_list core_change_timer;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_RANDOM_H */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100644
index 0000000..f891fa2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100644
index 0000000..749d305
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100644
index 0000000..352744e
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,519 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#include "mali_kbase_pm_ca_devfreq.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ * @KBASE_PM_CORE_STACK: Core stacks
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO,
+	KBASE_PM_CORE_STACK = STACK_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
+ *                                 management framework.
+ *
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+	struct kbasep_pm_ca_policy_devfreq devfreq;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @powering_on_stack_state: A bit mask indicating which core stacks are
+ *                           currently in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        and/or timers are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 powering_on_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_data metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	bool poweroff_wait_in_progress;
+	bool poweron_required;
+	bool poweroff_is_suspend;
+
+	struct workqueue_struct *gpu_poweroff_wait_wq;
+	struct work_struct gpu_poweroff_wait_work;
+
+	wait_queue_head_t poweroff_wait;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+	KBASE_PM_CA_POLICY_ID_DEVFREQ,
+	KBASE_PM_CA_POLICY_ID_RANDOM
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * Maximum length of a CA policy names
+ */
+#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1];
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100644
index 0000000..81322fd
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100644
index 0000000..c0c84b6
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100755
index 0000000..26802e4
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1682 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+#ifdef CONFIG_MALI_CORESTACK
+	if (core_type == KBASE_PM_CORE_STACK) {
+		switch (action) {
+		case ACTION_PRESENT:
+			return STACK_PRESENT_LO;
+		case ACTION_READY:
+			return STACK_READY_LO;
+		case ACTION_PWRON:
+			return STACK_PWRON_LO;
+		case ACTION_PWROFF:
+			return STACK_PWROFF_LO;
+		case ACTION_PWRTRANS:
+			return STACK_PWRTRANS_LO;
+		default:
+			BUG();
+		}
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+static void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		KBASE_TLSTREAM_AUX_PM_STATE(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and
+ * kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev)
+{
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+	case KBASE_PM_CORE_STACK:
+		return kbdev->gpu_props.props.raw_props.stack_present;
+	default:
+		break;
+	}
+	KBASE_DEBUG_ASSERT(0);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	if (trans) /* Do not progress if any cores are transitioning */
+		return false;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+#if !PLATFORM_POWER_DOWN_ONLY
+	kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+#endif
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ * @tilers_powered: The bit mask of tilers that are desired to be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered,
+		u64 tilers_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	/* Power up the required L2(s) for the tiler */
+	if (tilers_powered)
+		desired |= 1;
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+#ifdef CONFIG_MALI_CORESTACK
+u64 kbase_pm_core_stack_mask(u64 cores)
+{
+	u64 stack_mask = 0;
+	size_t const MAX_CORE_ID = 31;
+	size_t const NUM_CORES_PER_STACK = 4;
+	size_t i;
+
+	for (i = 0; i <= MAX_CORE_ID; ++i) {
+		if (test_bit(i, (unsigned long *)&cores)) {
+			/* Every core which ID >= 16 is filled to stacks 4-7
+			 * instead of 0-3 */
+			size_t const stack_num = (i >= 16) ?
+				(i % NUM_CORES_PER_STACK) + 4 :
+				(i % NUM_CORES_PER_STACK);
+			set_bit(stack_num, (unsigned long *)&stack_mask);
+		}
+	}
+
+	return stack_mask;
+}
+#endif /* CONFIG_MALI_CORESTACK */
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 desired_stack_state;
+	u64 stacks_powered;
+#endif /* CONFIG_MALI_CORESTACK */
+	u64 cores_powered;
+	u64 tilers_powered;
+	u64 tiler_available_bitmap;
+	u64 tiler_transitioning_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+	u64 l2_inuse_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+#ifdef CONFIG_MALI_CORESTACK
+	/* Work out which core stacks want to be powered */
+	desired_stack_state = kbase_pm_core_stack_mask(cores_powered);
+	stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) |
+		desired_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* Work out which tilers want to be powered */
+	tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_TILER);
+	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered, tilers_powered);
+
+	l2_inuse_bitmap = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered | shader_transitioning_bitmap,
+			tilers_powered | tiler_transitioning_bitmap);
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (stacks_powered)
+		desired_l2_state |= 1;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state)
+		desired_l2_state |= 1;
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+			KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap,
+			&l2_available_bitmap,
+			&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_STACK, desired_stack_state, 0,
+				&kbdev->stack_available_bitmap,
+				&kbdev->pm.backend.powering_on_stack_state);
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+#endif
+
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_STACK,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO),
+					NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO), NULL));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO), NULL));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	} else {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_disable_interrupts_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	if (!kbdev->hw_quirks_sc)
+		kbdev->hw_quirks_sc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_CONFIG), NULL);
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+
+	/* Limit read & write ID width for AXI */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) {
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS);
+		kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_ARID_LIMIT & 0x7) <<
+				L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT;
+
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES);
+		kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_AWID_LIMIT & 0x7) <<
+				L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT;
+	} else {
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+		kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+		kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+	}
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	kbdev->hw_quirks_jm = 0;
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm |= (jm_values[0] ?
+				JM_TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ?
+				JM_CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ?
+				JM_JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JM_JOB_THROTTLE_LIMIT_SHIFT);
+
+	} else if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+			   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+					   GPU_ID2_PRODUCT_TMIX)) {
+		/* Only for tMIx */
+		u32 coherency_features;
+
+		coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+		 * documented for tMIx so force correct value here.
+		 */
+		if (coherency_features ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
+			kbdev->hw_quirks_jm |=
+				(COHERENCY_ACE_LITE | COHERENCY_ACE) <<
+				JM_FORCE_COHERENCY_FEATURES_SHIFT;
+		}
+	}
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING))
+		kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE;
+
+	if (!kbdev->hw_quirks_jm)
+		kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JM_CONFIG), NULL);
+
+#ifdef CONFIG_MALI_CORESTACK
+#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
+	kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+#endif /* CONFIG_MALI_CORESTACK */
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+			kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+			kbdev->hw_quirks_jm, NULL);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if (kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+static int kbase_pm_do_reset(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET, NULL);
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	return -EINVAL;
+}
+
+static int kbasep_protected_mode_enable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
+
+static int kbasep_protected_mode_disable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	return kbase_pm_do_reset(kbdev);
+}
+
+struct protected_mode_ops kbase_native_protected_ops = {
+	.protected_mode_enable = kbasep_protected_mode_enable,
+	.protected_mode_disable = kbasep_protected_mode_disable
+};
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+	bool resume_vinstr = false;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support)
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+	else
+		err = kbase_pm_do_reset(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->protected_mode)
+		resume_vinstr = true;
+	kbdev->protected_mode = false;
+	kbase_ipa_model_use_configured_locked(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	if (err)
+		goto exit;
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+		kbase_pm_release_l2_caches(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+exit:
+	/* If GPU is leaving protected mode resume vinstr operation. */
+	if (kbdev->vinstr_ctx && resume_vinstr)
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	return err;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100644
index 0000000..7b77823
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,563 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required
+ *                                   and used cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device
+ *
+ * Setup the power management callbacks and initialize/enable the runtime-pm
+ * for the Mali GPU platform device, using the callback function. This must be
+ * called before the kbase_pm_register_access_enable() function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+int kbase_pm_runtime_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_runtime_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold hwaccess_lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100644
index 0000000..024248c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+					int *util_gl_share,
+					int util_cl_share[2],
+					ktime_t now)
+{
+	int utilisation;
+	int busy;
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	return utilisation;
+}
+
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	ktime_t now;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	now = ktime_get();
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+			util_cl_share, now);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				if (!WARN_ON(device_nr >= 2))
+					kbdev->pm.backend.metrics.
+						active_cl_ctx[device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				if (!WARN_ON(js >= 2))
+					kbdev->pm.backend.metrics.
+						active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100755
index 0000000..075f020
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,973 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+	&kbase_pm_coarse_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		kbase_pm_do_poweroff(kbdev, false);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		if (pm->backend.poweroff_wait_in_progress) {
+			pm->backend.poweron_required = true;
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbase_pm_do_poweron(kbdev, false);
+		}
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				kbase_pm_do_poweroff(kbdev, false);
+			}
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		return;
+
+	if (kbdev->protected_mode_transition &&	!kbdev->shader_needed_bitmap &&
+			!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt) {
+		/* We are trying to change in/out of protected mode - force all
+		 * cores off so that the L2 powers down */
+		desired_bitmap = 0;
+		desired_tiler_bitmap = 0;
+	} else {
+		desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+		desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+			desired_tiler_bitmap = 1;
+		else
+			desired_tiler_bitmap = 0;
+
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+			/* Unless XAFFINITY is supported, enable core 0 if tiler
+			 * required, regardless of core availability */
+			if (kbdev->tiler_needed_cnt > 0 ||
+					kbdev->tiler_inuse_cnt > 0)
+				desired_bitmap |= 1;
+		}
+	}
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks &&
+					!kbdev->protected_mode_transition)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks &&
+				!kbdev->protected_mode_transition)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		return (kbdev->pm.backend.poweroff_wait_in_progress ||
+				kbdev->pm.backend.pm_current_policy == NULL) ?
+				KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->l2_users_count++;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100644
index 0000000..611a90e
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100644
index 0000000..d992989
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,103 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100644
index 0000000..35088ab
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100644
index 0000000..35ff2f1
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100644
index 0000000..7ae05c2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100644
index 0000000..159b993
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild
new file mode 100644
index 0000000..8e37f40
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild
@@ -0,0 +1,27 @@
+#
+# (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+mali_kbase-y += \
+	ipa/mali_kbase_ipa_simple.o \
+	ipa/mali_kbase_ipa.o
+
+mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
+
+ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_g71.c),)
+  mali_kbase-y += \
+	ipa/mali_kbase_ipa_vinstr_g71.o \
+	ipa/mali_kbase_ipa_vinstr_common.o
+
+endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
new file mode 100644
index 0000000..1450c2c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
@@ -0,0 +1,590 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/thermal.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/of.h>
+#include "mali_kbase.h"
+#include "mali_kbase_ipa.h"
+#include "mali_kbase_ipa_debugfs.h"
+#include "mali_kbase_ipa_simple.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#define dev_pm_opp_find_freq_exact opp_find_freq_exact
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp opp
+#endif
+
+#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
+#define KBASE_IPA_G71_MODEL_NAME      "mali-g71-power-model"
+
+static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
+	&kbase_simple_ipa_model_ops,
+	&kbase_g71_ipa_model_ops
+};
+
+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
+{
+	int err = 0;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (model->ops->recalculate) {
+		err = model->ops->recalculate(model);
+		if (err) {
+			dev_err(model->kbdev->dev,
+				"recalculation of power model %s returned error %d\n",
+				model->ops->name, err);
+		}
+	}
+
+	return err;
+}
+
+static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
+							    const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
+		struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
+
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
+
+	return NULL;
+}
+
+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
+{
+	atomic_set(&kbdev->ipa_use_configured_model, false);
+}
+
+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
+{
+	atomic_set(&kbdev->ipa_use_configured_model, true);
+}
+
+const char *kbase_ipa_model_name_from_id(u32 gpu_id)
+{
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(prod_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			return KBASE_IPA_G71_MODEL_NAME;
+		default:
+			return KBASE_IPA_FALLBACK_MODEL_NAME;
+		}
+	}
+
+	return KBASE_IPA_FALLBACK_MODEL_NAME;
+}
+
+static struct device_node *get_model_dt_node(struct kbase_ipa_model *model)
+{
+	struct device_node *model_dt_node;
+	char compat_string[64];
+
+	snprintf(compat_string, sizeof(compat_string), "arm,%s",
+		 model->ops->name);
+
+	model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node,
+						NULL, compat_string);
+	if (!model_dt_node && !model->missing_dt_node_warning) {
+		dev_warn(model->kbdev->dev,
+			 "Couldn't find power_model DT node matching \'%s\'\n",
+			 compat_string);
+		model->missing_dt_node_warning = true;
+	}
+
+	return model_dt_node;
+}
+
+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
+				  const char *name, s32 *addr,
+				  size_t num_elems, bool dt_required)
+{
+	int err, i;
+	struct device_node *model_dt_node = get_model_dt_node(model);
+	char *origin;
+
+	err = of_property_read_u32_array(model_dt_node, name, addr, num_elems);
+
+	if (err && dt_required) {
+		memset(addr, 0, sizeof(s32) * num_elems);
+		dev_warn(model->kbdev->dev,
+			 "Error %d, no DT entry: %s.%s = %zu*[0]\n",
+			 err, model->ops->name, name, num_elems);
+		origin = "zero";
+	} else if (err && !dt_required) {
+		origin = "default";
+	} else /* !err */ {
+		origin = "DT";
+	}
+
+	/* Create a unique debugfs entry for each element */
+	for (i = 0; i < num_elems; ++i) {
+		char elem_name[32];
+
+		if (num_elems == 1)
+			snprintf(elem_name, sizeof(elem_name), "%s", name);
+		else
+			snprintf(elem_name, sizeof(elem_name), "%s.%d",
+				name, i);
+
+		dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
+			model->ops->name, elem_name, addr[i], origin);
+
+		err = kbase_ipa_model_param_add(model, elem_name,
+						&addr[i], sizeof(s32),
+						PARAM_TYPE_S32);
+		if (err)
+			goto exit;
+	}
+exit:
+	return err;
+}
+
+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
+				     const char *name, char *addr,
+				     size_t size, bool dt_required)
+{
+	int err;
+	struct device_node *model_dt_node = get_model_dt_node(model);
+	const char *string_prop_value;
+	char *origin;
+
+	err = of_property_read_string(model_dt_node, name,
+				      &string_prop_value);
+	if (err && dt_required) {
+		strncpy(addr, "", size - 1);
+		dev_warn(model->kbdev->dev,
+			 "Error %d, no DT entry: %s.%s = \'%s\'\n",
+			 err, model->ops->name, name, addr);
+		err = 0;
+		origin = "zero";
+	} else if (err && !dt_required) {
+		origin = "default";
+	} else /* !err */ {
+		strncpy(addr, string_prop_value, size - 1);
+		origin = "DT";
+	}
+
+	addr[size - 1] = '\0';
+
+	dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n",
+		model->ops->name, name, string_prop_value, origin);
+
+	err = kbase_ipa_model_param_add(model, name, addr, size,
+					PARAM_TYPE_STRING);
+
+	return err;
+}
+
+void kbase_ipa_term_model(struct kbase_ipa_model *model)
+{
+	if (!model)
+		return;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (model->ops->term)
+		model->ops->term(model);
+
+	kbase_ipa_model_param_free_all(model);
+
+	kfree(model);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_term_model);
+
+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
+					     struct kbase_ipa_model_ops *ops)
+{
+	struct kbase_ipa_model *model;
+	int err;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	if (!ops || !ops->name)
+		return NULL;
+
+	model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL);
+	if (!model)
+		return NULL;
+
+	model->kbdev = kbdev;
+	model->ops = ops;
+	INIT_LIST_HEAD(&model->params);
+
+	err = model->ops->init(model);
+	if (err) {
+		dev_err(kbdev->dev,
+			"init of power model \'%s\' returned error %d\n",
+			ops->name, err);
+		kfree(model);
+		return NULL;
+	}
+
+	err = kbase_ipa_model_recalculate(model);
+	if (err) {
+		kbase_ipa_term_model(model);
+		return NULL;
+	}
+
+	return model;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_init_model);
+
+static void kbase_ipa_term_locked(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	/* Clean up the models */
+	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
+		kbase_ipa_term_model(kbdev->ipa.configured_model);
+	kbase_ipa_term_model(kbdev->ipa.fallback_model);
+
+	kbdev->ipa.configured_model = NULL;
+	kbdev->ipa.fallback_model = NULL;
+}
+
+int kbase_ipa_init(struct kbase_device *kbdev)
+{
+
+	const char *model_name;
+	struct kbase_ipa_model_ops *ops;
+	struct kbase_ipa_model *default_model = NULL;
+	int err;
+
+	mutex_init(&kbdev->ipa.lock);
+	/*
+	 * Lock during init to avoid warnings from lockdep_assert_held (there
+	 * shouldn't be any concurrent access yet).
+	 */
+	mutex_lock(&kbdev->ipa.lock);
+
+	/* The simple IPA model must *always* be present.*/
+	ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME);
+
+	if (!ops->do_utilization_scaling_in_framework) {
+		dev_err(kbdev->dev,
+			"Fallback IPA model %s should not account for utilization\n",
+			ops->name);
+		err = -EINVAL;
+		goto end;
+	}
+
+	default_model = kbase_ipa_init_model(kbdev, ops);
+	if (!default_model) {
+		err = -EINVAL;
+		goto end;
+	}
+
+	kbdev->ipa.fallback_model = default_model;
+	err = of_property_read_string(kbdev->dev->of_node,
+				      "ipa-model",
+				      &model_name);
+	if (err) {
+		/* Attempt to load a match from GPU-ID */
+		u32 gpu_id;
+
+		gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		model_name = kbase_ipa_model_name_from_id(gpu_id);
+		dev_dbg(kbdev->dev,
+			"Inferring model from GPU ID 0x%x: \'%s\'\n",
+			gpu_id, model_name);
+		err = 0;
+	} else {
+		dev_dbg(kbdev->dev,
+			"Using ipa-model parameter from DT: \'%s\'\n",
+			model_name);
+	}
+
+	if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) {
+		ops = kbase_ipa_model_ops_find(kbdev, model_name);
+		kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops);
+		if (!kbdev->ipa.configured_model) {
+			err = -EINVAL;
+			goto end;
+		}
+	} else {
+		kbdev->ipa.configured_model = default_model;
+	}
+
+	kbase_ipa_model_use_configured_locked(kbdev);
+
+end:
+	if (err)
+		kbase_ipa_term_locked(kbdev);
+	else
+		dev_info(kbdev->dev,
+			 "Using configured power model %s, and fallback %s\n",
+			 kbdev->ipa.configured_model->ops->name,
+			 kbdev->ipa.fallback_model->ops->name);
+
+	mutex_unlock(&kbdev->ipa.lock);
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_init);
+
+void kbase_ipa_term(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->ipa.lock);
+	kbase_ipa_term_locked(kbdev);
+	mutex_unlock(&kbdev->ipa.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_term);
+
+/**
+ * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP
+ * @c:		Dynamic model coefficient, in pW/(Hz V^2). Should be in range
+ *		0 < c < 2^26 to prevent overflow.
+ * @freq:	Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz)
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Keep a record of the approximate range of each value at every stage of the
+ * calculation, to ensure we don't overflow. This makes heavy use of the
+ * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual
+ * calculations in decimal for increased accuracy.
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq,
+				     const u32 voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const u32 v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^3 < f_MHz < 2^10 MHz */
+	const u32 f_MHz = freq / 1000000;
+
+	/* Range: 2^11 < v2f_big < 2^26 kHz V^2 */
+	const u32 v2f_big = v2 * f_MHz;
+
+	/* Range: 2^1 < v2f < 2^16 MHz V^2 */
+	const u32 v2f = v2f_big / 1000;
+
+	/* Range (working backwards from next line): 0 < v2fc < 2^23 uW.
+	 * Must be < 2^42 to avoid overflowing the return value. */
+	const u64 v2fc = (u64) c * (u64) v2f;
+
+	/* Range: 0 < v2fc / 1000 < 2^13 mW */
+	return div_u64(v2fc, 1000);
+}
+
+/**
+ * kbase_scale_static_power() - Scale a static power coefficient to an OPP
+ * @c:		Static model coefficient, in uW/V^3. Should be in range
+ *		0 < c < 2^32 to prevent overflow.
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+u32 kbase_scale_static_power(const u32 c, const u32 voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const u32 v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^17 < v3_big < 2^29 m(V^2) mV */
+	const u32 v3_big = v2 * voltage;
+
+	/* Range: 2^7 < v3 < 2^19 m(V^3) */
+	const u32 v3 = v3_big / 1000;
+
+	/*
+	 * Range (working backwards from next line): 0 < v3c_big < 2^33 nW.
+	 * The result should be < 2^52 to avoid overflowing the return value.
+	 */
+	const u64 v3c_big = (u64) c * (u64) v3;
+
+	/* Range: 0 < v3c_big / 1000000 < 2^13 mW */
+	return div_u64(v3c_big, 1000000);
+}
+
+static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	if (atomic_read(&kbdev->ipa_use_configured_model))
+		return kbdev->ipa.configured_model;
+	else
+		return kbdev->ipa.fallback_model;
+}
+
+static u32 get_static_power_locked(struct kbase_device *kbdev,
+				   struct kbase_ipa_model *model,
+				   unsigned long voltage)
+{
+	u32 power = 0;
+	int err;
+	u32 power_coeff;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (!model->ops->get_static_coeff)
+		model = kbdev->ipa.fallback_model;
+
+	if (model->ops->get_static_coeff) {
+		err = model->ops->get_static_coeff(model, &power_coeff);
+		if (!err)
+			power = kbase_scale_static_power(power_coeff,
+							 (u32) voltage);
+	}
+
+	return power;
+}
+
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static unsigned long kbase_get_static_power(struct devfreq *df,
+					    unsigned long voltage)
+#else
+static unsigned long kbase_get_static_power(unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+	u32 power = 0;
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = get_current_model(kbdev);
+	power = get_static_power_locked(kbdev, model, voltage);
+
+	mutex_unlock(&kbdev->ipa.lock);
+
+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+	kbase_release_device(kbdev);
+#endif
+
+	return power;
+}
+
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static unsigned long kbase_get_dynamic_power(struct devfreq *df,
+					     unsigned long freq,
+					     unsigned long voltage)
+#else
+static unsigned long kbase_get_dynamic_power(unsigned long freq,
+					     unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+	u32 power_coeff = 0, power = 0;
+	int err = 0;
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = kbdev->ipa.fallback_model;
+
+	err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+
+	if (!err)
+		power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+	else
+		dev_err_ratelimited(kbdev->dev,
+				    "Model %s returned error code %d\n",
+				    model->ops->name, err);
+
+	mutex_unlock(&kbdev->ipa.lock);
+
+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+	kbase_release_device(kbdev);
+#endif
+
+	return power;
+}
+
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage)
+{
+	struct kbase_ipa_model *model;
+	u32 power_coeff = 0;
+	int err = 0;
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = get_current_model(kbdev);
+
+	err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+
+	/* If we switch to protected model between get_current_model() and
+	 * get_dynamic_coeff(), counter reading could fail. If that happens
+	 * (unlikely, but possible), revert to the fallback model. */
+	if (err && model != kbdev->ipa.fallback_model) {
+		model = kbdev->ipa.fallback_model;
+		err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+	}
+
+	if (err)
+		goto exit_unlock;
+
+	*power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+
+	if (model->ops->do_utilization_scaling_in_framework) {
+		struct devfreq_dev_status *status = &df->last_status;
+		unsigned long total_time = max(status->total_time, 1ul);
+		u64 busy_time = min(status->busy_time, total_time);
+
+		*power = div_u64((u64) *power * (u64) busy_time, total_time);
+	}
+
+	*power += get_static_power_locked(kbdev, model, voltage);
+
+exit_unlock:
+	mutex_unlock(&kbdev->ipa.lock);
+
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_get_real_power);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops kbase_ipa_power_model_ops = {
+#else
+struct devfreq_cooling_power kbase_ipa_power_model_ops = {
+#endif
+	.get_static_power = &kbase_get_static_power,
+	.get_dynamic_power = &kbase_get_dynamic_power,
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	.get_real_power = &kbase_get_real_power,
+#endif
+};
+KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
new file mode 100644
index 0000000..469f33c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
@@ -0,0 +1,165 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IPA_H_
+#define _KBASE_IPA_H_
+
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+
+struct devfreq;
+
+struct kbase_ipa_model {
+	struct list_head link;
+	struct kbase_device *kbdev;
+	void *model_data;
+	struct kbase_ipa_model_ops *ops;
+	struct list_head params;
+	bool missing_dt_node_warning;
+};
+
+/**
+ * kbase_ipa_model_add_param_s32 - Add an integer model parameter
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @addr:	address where the value is stored
+ * @num_elems:	number of elements (1 if not an array)
+ * @dt_required: if false, a corresponding devicetree entry is not required,
+ *		 and the current value will be used. If true, a warning is
+ *		 output and the data is zeroed
+ *
+ * Return: 0 on success, or an error code
+ */
+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
+				  const char *name, s32 *addr,
+				  size_t num_elems, bool dt_required);
+
+/**
+ * kbase_ipa_model_add_param_string - Add a string model parameter
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @addr:	address where the value is stored
+ * @size:	size, in bytes, of the value storage (so the maximum string
+ *		length is size - 1)
+ * @dt_required: if false, a corresponding devicetree entry is not required,
+ *		 and the current value will be used. If true, a warning is
+ *		 output and the data is zeroed
+ *
+ * Return: 0 on success, or an error code
+ */
+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
+				     const char *name, char *addr,
+				     size_t size, bool dt_required);
+
+struct kbase_ipa_model_ops {
+	char *name;
+	/* The init, recalculate and term ops on the default model are always
+	 * called.  However, all the other models are only invoked if the model
+	 * is selected in the device tree. Otherwise they are never
+	 * initialized. Additional resources can be acquired by models in
+	 * init(), however they must be terminated in the term().
+	 */
+	int (*init)(struct kbase_ipa_model *model);
+	/* Called immediately after init(), or when a parameter is changed, so
+	 * that any coefficients derived from model parameters can be
+	 * recalculated. */
+	int (*recalculate)(struct kbase_ipa_model *model);
+	void (*term)(struct kbase_ipa_model *model);
+	/*
+	 * get_dynamic_coeff() - calculate dynamic power coefficient
+	 * @model:		pointer to model
+	 * @coeffp:		pointer to return value location
+	 * @current_freq:	frequency the GPU has been running at for the
+	 *			previous sampling period.
+	 *
+	 * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which
+	 * is then scaled by the IPA framework according to the current OPP's
+	 * frequency and voltage.
+	 *
+	 * Return: 0 on success, or an error code.
+	 */
+	int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp,
+				 u32 current_freq);
+	/*
+	 * get_static_coeff() - calculate static power coefficient
+	 * @model:		pointer to model
+	 * @coeffp:		pointer to return value location
+	 *
+	 * Calculate a static power coefficient, with units uW/(V^3), which is
+	 * scaled by the IPA framework according to the current OPP's voltage.
+	 *
+	 * Return: 0 on success, or an error code.
+	 */
+	int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
+	/* If false, the model's get_dynamic_coeff() method accounts for how
+	 * long the GPU was active over the sample period. If true, the
+	 * framework will scale the calculated power according to the
+	 * utilization stats recorded by devfreq in get_real_power(). */
+	bool do_utilization_scaling_in_framework;
+};
+
+/* Models can be registered only in the platform's platform_init_func call */
+int kbase_ipa_model_ops_register(struct kbase_device *kbdev,
+			     struct kbase_ipa_model_ops *new_model_ops);
+struct kbase_ipa_model *kbase_ipa_get_model(struct kbase_device *kbdev,
+					    const char *name);
+
+int kbase_ipa_init(struct kbase_device *kbdev);
+void kbase_ipa_term(struct kbase_device *kbdev);
+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev);
+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev);
+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
+					     struct kbase_ipa_model_ops *ops);
+void kbase_ipa_term_model(struct kbase_ipa_model *model);
+
+extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_get_real_power() - get the real power consumption of the GPU
+ * @df: dynamic voltage and frequency scaling information for the GPU.
+ * @power: where to store the power consumption, in mW.
+ * @freq: a frequency, in HZ.
+ * @voltage: a voltage, in mV.
+ *
+ * This function is only exposed for use by unit tests. The returned value
+ * incorporates both static and dynamic power consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage);
+#endif /* MALI_UNIT_TEST */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops kbase_ipa_power_model_ops;
+#else
+extern struct devfreq_cooling_power kbase_ipa_power_model_ops;
+#endif
+
+#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+static inline void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
+{ }
+
+static inline void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
+{ }
+
+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
new file mode 100644
index 0000000..d3ac7c3
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
@@ -0,0 +1,249 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_ipa.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0))
+#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
+#endif
+
+struct kbase_ipa_model_param {
+	char *name;
+	union {
+		void *voidp;
+		s32 *s32p;
+		char *str;
+	} addr;
+	size_t size;
+	enum kbase_ipa_model_param_type type;
+	struct kbase_ipa_model *model;
+	struct list_head link;
+};
+
+static int param_int_get(void *data, u64 *val)
+{
+	struct kbase_ipa_model_param *param = data;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	*(s64 *) val = *param->addr.s32p;
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return 0;
+}
+
+static int param_int_set(void *data, u64 val)
+{
+	struct kbase_ipa_model_param *param = data;
+	struct kbase_ipa_model *model = param->model;
+	s64 sval = (s64) val;
+	int err = 0;
+
+	if (sval < S32_MIN || sval > S32_MAX)
+		return -ERANGE;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	*param->addr.s32p = val;
+	err = kbase_ipa_model_recalculate(model);
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return err;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n");
+
+static ssize_t param_string_get(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct kbase_ipa_model_param *param = file->private_data;
+	ssize_t ret;
+	size_t len;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	len = strnlen(param->addr.str, param->size - 1) + 1;
+	ret = simple_read_from_buffer(user_buf, count, ppos,
+				      param->addr.str, len);
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return ret;
+}
+
+static ssize_t param_string_set(struct file *file, const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct kbase_ipa_model_param *param = file->private_data;
+	struct kbase_ipa_model *model = param->model;
+	ssize_t ret = count;
+	size_t buf_size;
+	int err;
+
+	mutex_lock(&model->kbdev->ipa.lock);
+
+	if (count > param->size) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	buf_size = min(param->size - 1, count);
+	if (copy_from_user(param->addr.str, user_buf, buf_size)) {
+		ret = -EFAULT;
+		goto end;
+	}
+
+	param->addr.str[buf_size] = '\0';
+
+	err = kbase_ipa_model_recalculate(model);
+	if (err < 0)
+		ret = err;
+
+end:
+	mutex_unlock(&model->kbdev->ipa.lock);
+
+	return ret;
+}
+
+static const struct file_operations fops_string = {
+	.read = param_string_get,
+	.write = param_string_set,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
+			      void *addr, size_t size,
+			      enum kbase_ipa_model_param_type type)
+{
+	struct kbase_ipa_model_param *param;
+
+	param = kzalloc(sizeof(*param), GFP_KERNEL);
+
+	if (!param)
+		return -ENOMEM;
+
+	/* 'name' is stack-allocated for array elements, so copy it into
+	 * heap-allocated storage */
+	param->name = kstrdup(name, GFP_KERNEL);
+
+	if (!param->name) {
+		kfree(param);
+		return -ENOMEM;
+	}
+
+	param->addr.voidp = addr;
+	param->size = size;
+	param->type = type;
+	param->model = model;
+
+	list_add(&param->link, &model->params);
+
+	return 0;
+}
+
+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_param *param_p, *param_n;
+
+	list_for_each_entry_safe(param_p, param_n, &model->params, link) {
+		list_del(&param_p->link);
+		kfree(param_p->name);
+		kfree(param_p);
+	}
+}
+
+static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
+{
+	struct list_head *it;
+	struct dentry *dir;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	dir = debugfs_create_dir(model->ops->name,
+				 model->kbdev->mali_debugfs_directory);
+
+	if (!dir) {
+		dev_err(model->kbdev->dev,
+			"Couldn't create mali debugfs %s directory",
+			model->ops->name);
+		return;
+	}
+
+	list_for_each(it, &model->params) {
+		struct kbase_ipa_model_param *param =
+				list_entry(it,
+					   struct kbase_ipa_model_param,
+					   link);
+		const struct file_operations *fops = NULL;
+
+		switch (param->type) {
+		case PARAM_TYPE_S32:
+			fops = &fops_s32;
+			break;
+		case PARAM_TYPE_STRING:
+			fops = &fops_string;
+			break;
+		}
+
+		if (unlikely(!fops)) {
+			dev_err(model->kbdev->dev,
+				"Type not set for %s parameter %s\n",
+				model->ops->name, param->name);
+		} else {
+			debugfs_create_file(param->name, S_IRUGO | S_IWUSR,
+					    dir, param, fops);
+		}
+	}
+}
+
+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+	const char *name, s32 val)
+{
+	struct kbase_ipa_model_param *param;
+
+	mutex_lock(&model->kbdev->ipa.lock);
+
+	list_for_each_entry(param, &model->params, link) {
+		if (!strcmp(param->name, name)) {
+			if (param->type == PARAM_TYPE_S32) {
+				*param->addr.s32p = val;
+			} else {
+				dev_err(model->kbdev->dev,
+					"Wrong type for %s parameter %s\n",
+					model->ops->name, param->name);
+			}
+			break;
+		}
+	}
+
+	mutex_unlock(&model->kbdev->ipa.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32);
+
+void kbase_ipa_debugfs_init(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->ipa.lock);
+
+	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
+		kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model);
+	kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model);
+
+	mutex_unlock(&kbdev->ipa.lock);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h
new file mode 100644
index 0000000..f624de9
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IPA_DEBUGFS_H_
+#define _KBASE_IPA_DEBUGFS_H_
+
+enum kbase_ipa_model_param_type {
+	PARAM_TYPE_S32 = 1,
+	PARAM_TYPE_STRING,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbase_ipa_debugfs_init(struct kbase_device *kbdev);
+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
+			      void *addr, size_t size,
+			      enum kbase_ipa_model_param_type type);
+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_model_param_set_s32 - Set an integer model parameter
+ *
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @val:	new value of the parameter
+ *
+ * This function is only exposed for use by unit tests running in
+ * kernel space. Normally it is expected that parameter values will
+ * instead be set via debugfs.
+ */
+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+	const char *name, s32 val);
+
+#else /* CONFIG_DEBUG_FS */
+
+static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model,
+					    const char *name, void *addr,
+					    size_t size,
+					    enum kbase_ipa_model_param_type type)
+{
+	return 0;
+}
+
+static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
+{ }
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* _KBASE_IPA_DEBUGFS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
new file mode 100644
index 0000000..70e08b3
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
@@ -0,0 +1,323 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/thermal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#include <linux/of.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_ipa_simple.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#if MALI_UNIT_TEST
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+static unsigned long dummy_temp;
+
+static int kbase_simple_power_model_get_dummy_temp(
+	struct thermal_zone_device *tz,
+	unsigned long *temp)
+{
+	*temp = ACCESS_ONCE(dummy_temp);
+	return 0;
+}
+
+#else
+static int dummy_temp;
+
+static int kbase_simple_power_model_get_dummy_temp(
+	struct thermal_zone_device *tz,
+	int *temp)
+{
+	*temp = ACCESS_ONCE(dummy_temp);
+	return 0;
+}
+#endif
+
+/* Intercept calls to the kernel function using a macro */
+#ifdef thermal_zone_get_temp
+#undef thermal_zone_get_temp
+#endif
+#define thermal_zone_get_temp(tz, temp) \
+	kbase_simple_power_model_get_dummy_temp(tz, temp)
+
+void kbase_simple_power_model_set_dummy_temp(int temp)
+{
+	ACCESS_ONCE(dummy_temp) = temp;
+}
+KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp);
+
+#endif /* MALI_UNIT_TEST */
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms. The additional resources in this model
+ * should preferably be minimal, as this model is rarely used when a dynamic
+ * model is available.
+ */
+
+/**
+ * struct kbase_ipa_model_simple_data - IPA context per device
+ * @dynamic_coefficient: dynamic coefficient of the model
+ * @static_coefficient:  static coefficient of the model
+ * @ts:                  Thermal scaling coefficients of the model
+ * @tz_name:             Thermal zone name
+ * @gpu_tz:              thermal zone device
+ * @poll_temperature_thread: Handle for temperature polling thread
+ * @current_temperature: Most recent value of polled temperature
+ * @temperature_poll_interval_ms: How often temperature should be checked, in ms
+ */
+
+struct kbase_ipa_model_simple_data {
+	u32 dynamic_coefficient;
+	u32 static_coefficient;
+	s32 ts[4];
+	char tz_name[16];
+	struct thermal_zone_device *gpu_tz;
+	struct task_struct *poll_temperature_thread;
+	int current_temperature;
+	int temperature_poll_interval_ms;
+};
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+/**
+ * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient
+ * @ts:		Signed coefficients, in order t^0 to t^3, with units Deg^-N
+ * @t:		Temperature, in mDeg C. Range: -2^17 < t < 2^17
+ *
+ * Scale the temperature according to a cubic polynomial whose coefficients are
+ * provided in the device tree. The result is used to scale the static power
+ * coefficient, where 1000000 means no change.
+ *
+ * Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000.
+ */
+static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t)
+{
+	/* Range: -2^24 < t2 < 2^24 m(Deg^2) */
+	const s64 t2 = div_s64((t * t), 1000);
+
+	/* Range: -2^31 < t3 < 2^31 m(Deg^3) */
+	const s64 t3 = div_s64((t * t2), 1000);
+
+	/*
+	 * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in
+	 * Deg^-N, so we need to multiply the last coefficient by 1000.
+	 * Range: -2^63 < res_big < 2^63
+	 */
+	const s64 res_big = ts[3] * t3    /* +/- 2^62 */
+			  + ts[2] * t2    /* +/- 2^55 */
+			  + ts[1] * t     /* +/- 2^48 */
+			  + ts[0] * 1000; /* +/- 2^41 */
+
+	/* Range: -2^60 < res_unclamped < 2^60 */
+	s64 res_unclamped = div_s64(res_big, 1000);
+
+	/* Clamp to range of 0x to 10x the static power */
+	return clamp(res_unclamped, (s64) 0, (s64) 10000000);
+}
+
+/* We can't call thermal_zone_get_temp() directly in model_static_coeff(),
+ * because we don't know if tz->lock is held in the same thread. So poll it in
+ * a separate thread to get around this. */
+static int poll_temperature(void *data)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *) data;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temp;
+#else
+	int temp;
+#endif
+
+	while (!kthread_should_stop()) {
+		struct thermal_zone_device *tz = ACCESS_ONCE(model_data->gpu_tz);
+
+		if (tz) {
+			int ret;
+
+			ret = thermal_zone_get_temp(tz, &temp);
+			if (ret) {
+				pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+						    ret);
+				temp = FALLBACK_STATIC_TEMPERATURE;
+			}
+		} else {
+			temp = FALLBACK_STATIC_TEMPERATURE;
+		}
+
+		ACCESS_ONCE(model_data->current_temperature) = temp;
+
+		msleep_interruptible(ACCESS_ONCE(model_data->temperature_poll_interval_ms));
+	}
+
+	return 0;
+}
+
+static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	u32 temp_scaling_factor;
+	struct kbase_ipa_model_simple_data *model_data =
+		(struct kbase_ipa_model_simple_data *) model->model_data;
+	u64 coeff_big;
+	int temp;
+
+	temp = ACCESS_ONCE(model_data->current_temperature);
+
+	/* Range: 0 <= temp_scaling_factor < 2^24 */
+	temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts,
+							    temp);
+
+	/*
+	 * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This
+	 * means static_coefficient must be in range
+	 * 0 <= static_coefficient < 2^28.
+	 */
+	coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor;
+	*coeffp = div_u64(coeff_big, 1000000);
+
+	return 0;
+}
+
+static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
+			       u32 current_freq)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+		(struct kbase_ipa_model_simple_data *) model->model_data;
+
+	*coeffp = model_data->dynamic_coefficient;
+
+	return 0;
+}
+
+static int add_params(struct kbase_ipa_model *model)
+{
+	int err = 0;
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+
+	err = kbase_ipa_model_add_param_s32(model, "static-coefficient",
+					    &model_data->static_coefficient,
+					    1, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient",
+					    &model_data->dynamic_coefficient,
+					    1, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_s32(model, "ts",
+					    model_data->ts, 4, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_string(model, "thermal-zone",
+					       model_data->tz_name,
+					       sizeof(model_data->tz_name), true);
+	if (err)
+		goto end;
+
+	model_data->temperature_poll_interval_ms = 200;
+	err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms",
+					    &model_data->temperature_poll_interval_ms,
+					    1, false);
+
+end:
+	return err;
+}
+
+static int kbase_simple_power_model_init(struct kbase_ipa_model *model)
+{
+	int err;
+	struct kbase_ipa_model_simple_data *model_data;
+
+	model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data),
+			     GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model->model_data = (void *) model_data;
+
+	model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE;
+	model_data->poll_temperature_thread = kthread_run(poll_temperature,
+							  (void *) model_data,
+							  "mali-simple-power-model-temp-poll");
+	if (IS_ERR(model_data->poll_temperature_thread)) {
+		kfree(model_data);
+		return PTR_ERR(model_data->poll_temperature_thread);
+	}
+
+	err = add_params(model);
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kthread_stop(model_data->poll_temperature_thread);
+		kfree(model_data);
+	}
+
+	return err;
+}
+
+static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+	struct thermal_zone_device *tz;
+
+	if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) {
+		tz = NULL;
+	} else {
+		tz = thermal_zone_get_zone_by_name(model_data->tz_name);
+
+		if (IS_ERR_OR_NULL(tz)) {
+			pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n",
+					    PTR_ERR(tz), model_data->tz_name);
+			tz = NULL;
+			return -EPROBE_DEFER;
+		}
+	}
+
+	ACCESS_ONCE(model_data->gpu_tz) = tz;
+
+	return 0;
+}
+
+static void kbase_simple_power_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+
+	kthread_stop(model_data->poll_temperature_thread);
+
+	kfree(model_data);
+}
+
+struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = {
+		.name = "mali-simple-power-model",
+		.init = &kbase_simple_power_model_init,
+		.recalculate = &kbase_simple_power_model_recalculate,
+		.term = &kbase_simple_power_model_term,
+		.get_dynamic_coeff = &model_dynamic_coeff,
+		.get_static_coeff = &model_static_coeff,
+		.do_utilization_scaling_in_framework = true,
+};
+KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h
new file mode 100644
index 0000000..e78d617
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IPA_SIMPLE_H_
+#define _KBASE_IPA_SIMPLE_H_
+
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+
+extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops;
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value
+ * @temp: Temperature of the thermal zone, in millidegrees celsius.
+ *
+ * This is only intended for use in unit tests, to ensure that the temperature
+ * values used by the simple power model are predictable. Deterministic
+ * behavior is necessary to allow validation of the static power values
+ * computed by this model.
+ */
+void kbase_simple_power_model_set_dummy_temp(int temp);
+#endif /* MALI_UNIT_TEST */
+
+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+#endif /* _KBASE_IPA_SIMPLE_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
new file mode 100644
index 0000000..9b9fa0e
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
@@ -0,0 +1,229 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_ipa_vinstr_common.h"
+
+#if MALI_UNIT_TEST
+static ktime_t dummy_time;
+
+/* Intercept calls to the kernel function using a macro */
+#ifdef ktime_get
+#undef ktime_get
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+#define ktime_get() (ACCESS_ONCE(dummy_time))
+
+void kbase_ipa_set_dummy_time(ktime_t t)
+{
+	ACCESS_ONCE(dummy_time) = t;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_set_dummy_time);
+#else
+#define ktime_get() (READ_ONCE(dummy_time))
+
+void kbase_ipa_set_dummy_time(ktime_t t)
+{
+	WRITE_ONCE(dummy_time, t);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_set_dummy_time);
+
+#endif
+
+#endif /* MALI_UNIT_TEST */
+
+/**
+ * read_hwcnt() - read a counter value
+ * @model_data:		pointer to model data
+ * @offset:		offset, in bytes, into vinstr buffer
+ *
+ * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be
+ * incrementing every cycle over a ~100ms sample period at a high frequency,
+ * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27.
+ */
+static inline u32 kbase_ipa_read_hwcnt(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	u32 offset)
+{
+	u8 *p = model_data->vinstr_buffer;
+
+	return *(u32 *)&p[offset];
+}
+
+static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
+{
+	if (S64_MAX - a < b)
+		return S64_MAX;
+	return a + b;
+}
+
+s64 kbase_ipa_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	u64 core_mask;
+	u32 base = 0;
+	s64 ret = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			/* 0 < counter_value < 2^27 */
+			u32 counter_value = kbase_ipa_read_hwcnt(model_data,
+						       base + counter);
+
+			/* 0 < ret < 2^27 * max_num_cores = 2^32 */
+			ret = kbase_ipa_add_saturate(ret, counter_value);
+		}
+		base += KBASE_IPA_NR_BYTES_PER_BLOCK;
+		core_mask >>= 1;
+	}
+
+	/* Range: -2^54 < ret < 2^54 */
+	ret *= coeff;
+
+	return div_s64(ret, 1000000);
+}
+
+s64 kbase_ipa_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	/* Range: 0 < counter_value < 2^27 */
+	const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter);
+
+	/* Range: -2^49 < ret < 2^49 */
+	const s64 multiplied = (s64) counter_value * (s64) coeff;
+
+	/* Range: -2^29 < return < 2^29 */
+	return div_s64(multiplied, 1000000);
+}
+
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	size_t dump_size;
+
+	dump_size = kbase_vinstr_dump_size(kbdev);
+	model_data->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!model_data->vinstr_buffer) {
+		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+		return -1;
+	}
+
+	setup.jm_bm = ~0u;
+	setup.shader_bm = ~0u;
+	setup.tiler_bm = ~0u;
+	setup.mmu_l2_bm = ~0u;
+	model_data->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
+			&setup, model_data->vinstr_buffer);
+	if (!model_data->vinstr_cli) {
+		dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
+		kfree(model_data->vinstr_buffer);
+		model_data->vinstr_buffer = NULL;
+		return -1;
+	}
+
+	model_data->last_sample_read_time = ktime_get();
+	kbase_vinstr_hwc_clear(model_data->vinstr_cli);
+
+	return 0;
+}
+
+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	if (model_data->vinstr_cli)
+		kbase_vinstr_detach_client(model_data->vinstr_cli);
+	model_data->vinstr_cli = NULL;
+	kfree(model_data->vinstr_buffer);
+	model_data->vinstr_buffer = NULL;
+}
+
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
+	u32 current_freq)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+	s64 energy = 0;
+	size_t i;
+	ktime_t now = ktime_get();
+	ktime_t time_since_last_sample =
+			ktime_sub(now, model_data->last_sample_read_time);
+	/* Range: 2^0 < time_since_last_sample_ms < 2^10 (1-1000ms) */
+	s64 time_since_last_sample_ms = ktime_to_ms(time_since_last_sample);
+	u64 coeff = 0;
+	u64 num_cycles;
+	int err = 0;
+
+	err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
+				    BASE_HWCNT_READER_EVENT_MANUAL);
+	if (err)
+		goto err0;
+
+	model_data->last_sample_read_time = now;
+
+	/* Range of 'energy' is +/- 2^34 * number of IPA groups, so around
+	 * -2^38 < energy < 2^38 */
+	for (i = 0; i < model_data->groups_def_num; i++) {
+		const struct kbase_ipa_group *group = &model_data->groups_def[i];
+		s32 coeff, group_energy;
+
+		coeff = model_data->group_values[i];
+		group_energy = group->op(model_data, coeff, group->counter_block_offset);
+
+		energy = kbase_ipa_add_saturate(energy, group_energy);
+	}
+
+	/* Range: 0 <= coeff < 2^38 */
+	if (energy > 0)
+		coeff = energy;
+
+	/* Scale by user-specified factor and divide by 1000. But actually
+	 * cancel the division out, because we want the num_cycles in KHz and
+	 * don't want to lose precision. */
+
+	/* Range: 0 < coeff < 2^53 */
+	coeff = coeff * model_data->scaling_factor;
+
+	if (time_since_last_sample_ms == 0) {
+		time_since_last_sample_ms = 1;
+	} else if (time_since_last_sample_ms < 0) {
+		err = -ERANGE;
+		goto err0;
+	}
+
+	/* Range: 2^20 < num_cycles < 2^40 mCycles */
+	num_cycles = (u64) current_freq * (u64) time_since_last_sample_ms;
+	/* Range: 2^10 < num_cycles < 2^30 Cycles */
+	num_cycles = div_u64(num_cycles, 1000000);
+
+	/* num_cycles should never be 0 in _normal_ usage (because we expect
+	 * frequencies on the order of MHz and >10ms polling intervals), but
+	 * protect against divide-by-zero anyway. */
+	if (num_cycles == 0)
+		num_cycles = 1;
+
+	/* Range: 0 < coeff < 2^43 */
+	coeff = div_u64(coeff, num_cycles);
+
+err0:
+	/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
+	*coeffp = clamp(coeff, (u64) 0, (u64) 1 << 16);
+	return err;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
new file mode 100644
index 0000000..d212c87
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
@@ -0,0 +1,163 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IPA_VINSTR_COMMON_H_
+#define _KBASE_IPA_VINSTR_COMMON_H_
+
+#include "mali_kbase.h"
+
+/* Maximum length for the name of an IPA group. */
+#define KBASE_IPA_MAX_GROUP_NAME_LEN 15
+
+/* Maximum number of IPA groups for an IPA model. */
+#define KBASE_IPA_MAX_GROUP_DEF_NUM  16
+
+/* Number of bytes per hardware counter in a vinstr_buffer. */
+#define KBASE_IPA_NR_BYTES_PER_CNT    4
+
+/* Number of hardware counters per block in a vinstr_buffer. */
+#define KBASE_IPA_NR_CNT_PER_BLOCK   64
+
+/* Number of bytes per block in a vinstr_buffer. */
+#define KBASE_IPA_NR_BYTES_PER_BLOCK \
+	(KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT)
+
+
+
+/**
+ * struct kbase_ipa_model_vinstr_data - IPA context per device
+ * @kbdev:               pointer to kbase device
+ * @groups_def:          Array of IPA groups.
+ * @groups_def_num:      Number of elements in the array of IPA groups.
+ * @vinstr_cli:          vinstr client handle
+ * @vinstr_buffer:       buffer to dump hardware counters onto
+ * @last_sample_read_time: timestamp of last vinstr buffer read
+ * @scaling_factor:      user-specified power scaling factor. This is
+ *                       interpreted as a fraction where the denominator is
+ *                       1000. Range approx 0.0-32.0:
+ *                       0 < scaling_factor < 2^15
+ */
+struct kbase_ipa_model_vinstr_data {
+	struct kbase_device *kbdev;
+	s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM];
+	const struct kbase_ipa_group *groups_def;
+	size_t groups_def_num;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	ktime_t last_sample_read_time;
+	s32 scaling_factor;
+};
+
+/**
+ * struct ipa_group - represents a single IPA group
+ * @name:               name of the IPA group
+ * @default_value:      default value of coefficient for IPA group.
+ *                      Coefficients are interpreted as fractions where the
+ *                      denominator is 1000000.
+ * @op:                 which operation to be performed on the counter values
+ * @counter_block_offset:  block offset in bytes of the counter used to calculate energy for IPA group
+ */
+struct kbase_ipa_group {
+	char name[KBASE_IPA_MAX_GROUP_NAME_LEN + 1];
+	s32 default_value;
+	s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
+	u32 counter_block_offset;
+};
+
+/**
+ * sum_all_shader_cores() - sum a counter over all cores
+ * @model_data		pointer to model data
+ * @coeff		model coefficient. Unity is ~2^20, so range approx
+ * +/- 4.0: -2^22 < coeff < 2^22
+ * @counter     offset in bytes of the counter used to calculate energy for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter'
+ * across all shader cores.
+ *
+ * Return: Sum of counter values. Range: -2^34 < ret < 2^34
+ */
+s64 kbase_ipa_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * sum_single_counter() - sum a single counter
+ * @model_data		pointer to model data
+ * @coeff		model coefficient. Unity is ~2^20, so range approx
+ * +/- 4.0: -2^22 < coeff < 2^22
+ * @counter     offset in bytes of the counter used to calculate energy for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter'.
+ *
+ * Return: Counter value. Range: -2^34 < ret < 2^34
+ */
+s64 kbase_ipa_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * attach_vinstr() - attach a vinstr_buffer to an IPA model.
+ * @model_data		pointer to model data
+ *
+ * Attach a vinstr_buffer to an IPA model. The vinstr_buffer
+ * allows access to the hardware counters used to calculate
+ * energy consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
+
+/**
+ * detach_vinstr() - detach a vinstr_buffer from an IPA model.
+ * @model_data		pointer to model data
+ *
+ * Detach a vinstr_buffer from an IPA model.
+ */
+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
+
+/**
+ * kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters
+ * @model:		pointer to instantiated model
+ * @coeffp:		pointer to location where calculated power, in
+ *			pW/(Hz V^2), is stored.
+ * @current_freq:	frequency the GPU has been running at over the sample
+ *			period. In Hz. Range: 10 MHz < 1GHz,
+ *			2^20 < current_freq < 2^30
+ *
+ * This is a GPU-agnostic implementation of the get_dynamic_coeff()
+ * function of an IPA model. It relies on the model being populated
+ * with GPU-specific attributes at initialization time.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
+	u32 current_freq);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_ipa_set_dummy_time() - set a dummy monotonic time value
+ * @t: a monotonic time value
+ *
+ * This is only intended for use in unit tests, to ensure that the kernel time
+ * values used by a power model are predictable. Deterministic behavior is
+ * necessary to allow validation of the dynamic power values computed by the
+ * model.
+ */
+void kbase_ipa_set_dummy_time(ktime_t t);
+#endif /* MALI_UNIT_TEST */
+
+#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c
new file mode 100644
index 0000000..4e4c059
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c
@@ -0,0 +1,251 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/thermal.h>
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+
+/* Performance counter blocks base offsets */
+#define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define TILER_BASE          (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define MEMSYS_BASE         (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+
+/* JM counter block offsets */
+#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT *  6)
+
+/* Tiler counter block offsets */
+#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
+
+/* MEMSYS counter block offsets */
+#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
+
+/* SC counter block offsets */
+#define SC_FRAG_ACTIVE      (KBASE_IPA_NR_BYTES_PER_CNT *  4)
+#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26)
+#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28)
+#define SC_TEX_COORD_ISSUE  (KBASE_IPA_NR_BYTES_PER_CNT * 40)
+#define SC_VARY_SLOT_32     (KBASE_IPA_NR_BYTES_PER_CNT * 50)
+#define SC_VARY_SLOT_16     (KBASE_IPA_NR_BYTES_PER_CNT * 51)
+#define SC_BEATS_RD_LSC     (KBASE_IPA_NR_BYTES_PER_CNT * 56)
+#define SC_BEATS_WR_LSC     (KBASE_IPA_NR_BYTES_PER_CNT * 61)
+#define SC_BEATS_WR_TIB     (KBASE_IPA_NR_BYTES_PER_CNT * 62)
+
+/** Maximum number of cores for which a single Memory System block of performance counters is present. */
+#define KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4)
+
+
+/**
+ * get_jm_counter() - get performance counter offset inside the Job Manager block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the Job Manager block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g71_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
+                                                u32 counter_block_offset)
+{
+	return JM_BASE + counter_block_offset;
+}
+
+/**
+ * get_memsys_counter() - get peformance counter offset inside the Memory System block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Memory System block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g71_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
+                                                    u32 counter_block_offset)
+{
+	/* The base address of Memory System performance counters is always the same, although their number
+	 * may vary based on the number of cores. For the moment it's ok to return a constant.
+	 */
+	return MEMSYS_BASE + counter_block_offset;
+}
+
+/**
+ * get_sc_counter() - get performance counter offset inside the Shader Cores block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Shader Cores block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g71_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
+                                                u32 counter_block_offset)
+{
+	const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ?
+	                    SC0_BASE_ONE_MEMSYS :
+	                    SC0_BASE_TWO_MEMSYS;
+
+	return sc_base + counter_block_offset;
+}
+
+/**
+ * memsys_single_counter() - calculate energy for a single Memory System performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Memory System performance counter.
+ */
+static s64 kbase_g71_memsys_single_counter(
+    struct kbase_ipa_model_vinstr_data *model_data,
+    s32 coeff,
+    u32 counter_block_offset)
+{
+	return kbase_ipa_single_counter(model_data, coeff,
+	                                kbase_g71_power_model_get_memsys_counter(model_data, counter_block_offset));
+}
+
+/**
+ * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a Shader Cores performance counter for all cores.
+ */
+static s64 kbase_g71_sum_all_shader_cores(
+    struct kbase_ipa_model_vinstr_data *model_data,
+    s32 coeff,
+    u32 counter_block_offset)
+{
+	return kbase_ipa_sum_all_shader_cores(model_data, coeff,
+	                                      kbase_g71_power_model_get_sc_counter(model_data, counter_block_offset));
+}
+
+/**
+ * jm_single_counter() - calculate energy for a single Job Manager performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Job Manager performance counter.
+ */
+static s64 kbase_g71_jm_single_counter(
+    struct kbase_ipa_model_vinstr_data *model_data,
+    s32 coeff,
+    u32 counter_block_offset)
+{
+	return kbase_ipa_single_counter(model_data, coeff,
+	                                kbase_g71_power_model_get_jm_counter(model_data, counter_block_offset));
+}
+
+/** Table of IPA group definitions.
+ *
+ * For each IPA group, this table defines a function to access the given performance block counter (or counters,
+ * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
+ */
+static const struct kbase_ipa_group ipa_groups_def[] = {
+	{
+		.name = "l2_access",
+		.default_value = 526300,
+		.op = kbase_g71_memsys_single_counter,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 301100,
+		.op = kbase_g71_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 197400,
+		.op = kbase_g71_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -156400,
+		.op = kbase_g71_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 115800,
+		.op = kbase_g71_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static int kbase_g71_power_model_init(struct kbase_ipa_model *model)
+{
+	int i, err = 0;
+	struct kbase_ipa_model_vinstr_data *model_data;
+
+	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model_data->kbdev = model->kbdev;
+	model_data->groups_def = ipa_groups_def;
+	BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def) > KBASE_IPA_MAX_GROUP_DEF_NUM);
+	model_data->groups_def_num = ARRAY_SIZE(ipa_groups_def);
+
+	model->model_data = (void *) model_data;
+
+	for (i = 0; i < ARRAY_SIZE(ipa_groups_def); ++i) {
+		const struct kbase_ipa_group *group = &ipa_groups_def[i];
+
+		model_data->group_values[i] = group->default_value;
+		err = kbase_ipa_model_add_param_s32(model, group->name,
+					&model_data->group_values[i],
+					1, false);
+		if (err)
+			goto exit;
+	}
+
+	model_data->scaling_factor = 5;
+	err = kbase_ipa_model_add_param_s32(model, "scale",
+					    &model_data->scaling_factor,
+					    1, false);
+	if (err)
+		goto exit;
+
+	err = kbase_ipa_attach_vinstr(model_data);
+
+exit:
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kfree(model_data);
+	}
+	return err;
+}
+
+static void kbase_g71_power_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+
+	kbase_ipa_detach_vinstr(model_data);
+	kfree(model_data);
+}
+
+
+struct kbase_ipa_model_ops kbase_g71_ipa_model_ops = {
+		.name = "mali-g71-power-model",
+		.init = kbase_g71_power_model_init,
+		.term = kbase_g71_power_model_term,
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff,
+		.do_utilization_scaling_in_framework = false,
+};
+KBASE_EXPORT_TEST_API(kbase_g71_ipa_model_ops);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100644
index 0000000..c077461
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,432 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tHEx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tSIx[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tDVx[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tNOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tGOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tKAx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tTRx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tBOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100644
index 0000000..58710f6
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,1193 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tKAx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tKAx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tBOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tBOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100644
index 0000000..94d1d74
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1802 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD),
+ * which defines a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/* Memory allocation, access/hint flags.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+	/* BASE_MEM_HINT flags have been removed, but their values are reserved
+	 * for backwards compatibility with older user-space drivers. The values
+	 * can be re-used once support for r5p0 user-space drivers is removed,
+	 * presumably in r7p0.
+	 *
+	 * RESERVED: (1U << 5)
+	 * RESERVED: (1U << 6)
+	 * RESERVED: (1U << 7)
+	 * RESERVED: (1U << 8)
+	 */
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* Should be cached on the CPU
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the alloc
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Secure memory
+ */
+#define BASE_MEM_SECURE ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/**
+ * Bit 19 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ **/
+#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+
+/* Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 20
+
+/* A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id.
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	address of imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	u64 ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completely unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/**
+ * No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/**
+ * Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  ((base_jd_core_req)1 << 1)
+#define BASE_JD_REQ_T   ((base_jd_core_req)1 << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  ((base_jd_core_req)1 << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   ((base_jd_core_req)1 << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
+ * the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
+ * the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/*
+ * Returns non-zero value if core requirements passed define a soft job or
+ * a dependency only job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
+	((core_req & BASE_JD_REQ_SOFT_JOB) || \
+	(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
+ * In order to keep the size of the structure same, padding field has been adjusted
+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
+ * for possible future use. */
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	u64 extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[1];
+	base_jd_core_req core_req;          /**< core requirements */
+} base_jd_atom_v2;
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @ref base_jd_submit and @ref base_jd_submit
+ * has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ *
+ * @pre @p fence must reference a @ref base_fence successfully initialized by
+ *      calling @ref base_fence_init.
+ * @pre @p fence was @e not initialized by calling @ref base_fence_import, nor
+ *      is it associated with a fence-trigger job that was already submitted
+ *      by calling @ref base_jd_submit.
+ * @post @p atom can be submitted by calling @ref base_jd_submit.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @ref base_jd_submit and
+ * @ref base_jd_submit has returned.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ *
+ * @pre @p fence must reference a @ref base_fence successfully initialized by
+ *      calling @ref base_fence_import, or it must be associated with a
+ *      fence-trigger job that was already submitted by calling
+ *      @ref base_jd_submit.
+ * @post @p atom can be submitted by calling @ref base_jd_submit.
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field
+ * of @ref base_jd_atom.
+ *
+ * It must not occupy the same CPU cache line(s) as any neighboring data.
+ * This is to avoid cases where access to pages containing the structure
+ * is shared between cached and un-cached memory regions, which would
+ * cause memory corruption.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[36];
+} base_dump_cpu_gpu_counters;
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistent guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ *  - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ *  so they won't accidentally pick another file with 'mali_t600' in its name
+ *  - When the build doesn't work, the System Integrator may think the DDK is
+ *  doesn't work, and attempt to fix it themselves:
+ *   - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ *   error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ *   you.
+ *   - For a \#include mechanism, checks must still be made elsewhere, which the
+ *   System Integrator may try working around by setting \#defines (such as
+ *   VA_BITS) themselves in their plat_config.h. In the  worst case, they may
+ *   set the prevention-mechanism \#define of
+ *   "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ *   - In this case, they would believe they are on the right track, because
+ *   the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+   digraph plat_config_mechanism {
+	   rankdir=BT
+	   size="6,6"
+
+       "mali_base.h";
+	   "gpu/mali_gpu.h";
+
+	   node [ shape=box ];
+	   {
+	       rank = same; ordering = out;
+
+		   "gpu/mali_gpu_props.h";
+		   "base/midg_gpus/mali_t600.h";
+		   "base/midg_gpus/other_midg_gpu.h";
+	   }
+	   { rank = same; "plat/plat_config.h"; }
+	   {
+	       rank = same;
+		   "gpu/mali_gpu.h" [ shape=box ];
+		   gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+		   select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+	   }
+	   node [ shape=box ];
+	   { rank = same; "plat/plat_config.h"; }
+	   { rank = same; "mali_base.h"; }
+
+	   "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h";
+	   "mali_base.h" -> "plat/plat_config.h" ;
+	   "mali_base.h" -> select_gpu ;
+
+	   "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+	   gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+	   select_gpu -> "base/midg_gpus/mali_t600.h" ;
+	   select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+   }
+   @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algorithm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * required for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+	 * No defined values, but starts at 0 and increases by one for each
+	 * release status (alpha, beta, EAC, etc.).
+	 * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/**
+	 * This property is deprecated since it has not contained the real current
+	 * value of GPU clock speed. It is kept here only for backwards compatibility.
+	 * For the new ioctl interface, it is ignored and is treated as a padding
+	 * to keep the structure of the same size and retain the placement of its
+	 * members.
+	 */
+	u32 gpu_speed_mhz;
+
+	/**
+	 * @usecase GPU clock max/min speed is required for computing best/worst case
+	 * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+	 *  Midgard Architecture).
+	 * Also, GPU clock max speed is used for OpenCL's clGetDeviceInfo() function.
+	 */
+	u32 gpu_freq_khz_max;
+	u32 gpu_freq_khz_min;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 stack_present;
+
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this data structure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+	/** No flags set */
+	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+	/** Base context is a 'System Monitor' context for Hardware counters.
+	 *
+	 * One important side effect of this is that job submission is disabled. */
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+/** Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+} base_jd_replay_payload;
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact. */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
+		BASE_TLSTREAM_JOB_DUMPING_ENABLED)
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100644
index 0000000..4a98a72
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100644
index 0000000..be454a2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100644
index 0000000..11113a9
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,611 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+#include <linux/sched/mm.h>
+#endif
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_linux.h>
+
+/*
+ * Include mali_kbase_defs.h first as this provides types needed by other local
+ * header files.
+ */
+#include "mali_kbase_defs.h"
+
+#include "mali_kbase_context.h"
+#include "mali_kbase_strings.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+
+#include "ipa/mali_kbase_ipa.h"
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+
+#ifndef u64_to_user_ptr
+/* Introduced in Linux v4.6 */
+#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
+#endif
+
+/*
+ * Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+
+/**
+ * kbase_jd_submit - Submit atoms to the job dispatcher
+ *
+ * @kctx: The kbase context to submit to
+ * @user_addr: The address in user space of the struct base_jd_atom_v2 array
+ * @nr_atoms: The number of atoms in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6)
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_jd_submit(struct kbase_context *kctx,
+		void __user *user_addr, u32 nr_atoms, u32 stride,
+		bool uk6_atom);
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom);
+#endif
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+void kbasep_soft_job_timeout_worker(unsigned long data);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEBUG
+/**
+ * kbase_set_driver_inactive - Force driver to go inactive
+ * @kbdev:    Device pointer
+ * @inactive: true if driver should go inactive, false otherwise
+ *
+ * Forcing the driver inactive will cause all future IOCTLs to wait until the
+ * driver is made active again. This is intended solely for the use of tests
+ * which require that no jobs are running while the test executes.
+ */
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/* kbase_io_history_init - initialize data struct for register access history
+ *
+ * @kbdev The register history to initialize
+ * @n The number of register accesses that the buffer could hold
+ *
+ * @return 0 if successfully initialized, failure otherwise
+ */
+int kbase_io_history_init(struct kbase_io_history *h, u16 n);
+
+/* kbase_io_history_term - uninit all resources for the register access history
+ *
+ * @h The register history to terminate
+ */
+void kbase_io_history_term(struct kbase_io_history *h);
+
+/* kbase_io_history_dump - print the register history to the kernel ring buffer
+ *
+ * @kbdev Pointer to kbase_device containing the register history to dump
+ */
+void kbase_io_history_dump(struct kbase_device *kbdev);
+
+/**
+ * kbase_io_history_resize - resize the register access history buffer.
+ *
+ * @h: Pointer to a valid register history to resize
+ * @new_size: Number of accesses the buffer could hold
+ *
+ * A successful resize will clear all recent register accesses.
+ * If resizing fails for any reason (e.g., could not allocate memory, invalid
+ * buffer size) then the original buffer will be kept intact.
+ *
+ * @return 0 if the buffer was resized, failure otherwise
+ */
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbase_io_history_resize CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+#endif
+
+
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100644
index 0000000..6b3559d
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	struct tagged_addr *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = phys_to_page(as_phys_addr_t(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = phys_to_page(as_phys_addr_t(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = phys_to_page(as_phys_addr_t(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = phys_to_page(as_phys_addr_t(page_array[page_index +
+								   1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100644
index 0000000..099a298
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
new file mode 100644
index 0000000..f910fe9
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+
+static int kbase_as_fault_read(struct seq_file *sfile, void *data)
+{
+	uintptr_t as_no = (uintptr_t) sfile->private;
+
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	struct kbase_device *kbdev = NULL;
+
+	kbdev_list = kbase_dev_list_get();
+
+	list_for_each(entry, kbdev_list) {
+		kbdev = list_entry(entry, struct kbase_device, entry);
+
+		if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+
+			/* don't show this one again until another fault occors */
+			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
+
+			/* output the last page fault addr */
+			seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr);
+		}
+
+	}
+
+	kbase_dev_list_put(kbdev_list);
+
+	return 0;
+}
+
+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_as_fault_read , in->i_private);
+}
+
+static const struct file_operations as_fault_fops = {
+	.open = kbase_as_fault_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ *  Initialize debugfs entry for each address space
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	uint i;
+	char as_name[64];
+	struct dentry *debugfs_directory;
+
+	kbdev->debugfs_as_read_bitmap = 0ULL;
+
+	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
+
+	debugfs_directory = debugfs_create_dir("address_spaces",
+		kbdev->mali_debugfs_directory);
+
+	if(debugfs_directory) {
+		for(i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
+			debugfs_create_file(as_name, S_IRUGO,
+				debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops);
+		}
+	}
+	else
+		dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory");
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+	return;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
new file mode 100644
index 0000000..3ed2248
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
+#define _KBASE_AS_FAULT_DEBUG_FS_H
+
+/**
+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
+ *
+ * @kbdev: Pointer to kbase_device
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs
+ *
+ * @kbdev: Pointer to kbase_device
+ * @as_no: The address space the fault occurred on
+ */
+static inline void
+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_MALI_DEBUG */
+	return;
+}
+
+#endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100644
index 0000000..1d11de6
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100644
index 0000000..0c18bdb
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100644
index 0000000..fb615ae
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100644
index 0000000..212e3b1
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,343 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see  kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+/**
+ * kbase_platform_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_register(void);
+
+/**
+ * kbase_platform_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_register()
+ */
+void kbase_platform_unregister(void);
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100644
index 0000000..aee6b0c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,271 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 * Restricting ID width will reduce performance & bus load due to GPU.
+	 */
+	KBASE_3BIT_AID_32 = 0x0,
+
+	/* Restrict GPU to 7/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_28 = 0x1,
+
+	/* Restrict GPU to 3/4 of maximum Address ID count. */
+	KBASE_3BIT_AID_24 = 0x2,
+
+	/* Restrict GPU to 5/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_20 = 0x3,
+
+	/* Restrict GPU to 1/2 of maximum Address ID count.  */
+	KBASE_3BIT_AID_16 = 0x4,
+
+	/* Restrict GPU to 3/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_12 = 0x5,
+
+	/* Restrict GPU to 1/4 of maximum Address ID count. */
+	KBASE_3BIT_AID_8  = 0x6,
+
+	/* Restrict GPU to 1/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_4  = 0x7
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_3BIT_ARID_LIMIT KBASE_3BIT_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_3BIT_AWID_LIMIT KBASE_3BIT_AID_32
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/*
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+/*
+ * Perform GPU power down using only platform specific code, skipping DDK power
+ * management.
+ *
+ * If this is non-zero then kbase will avoid powering down shader cores, the
+ * tiler, and the L2 cache, instead just powering down the entire GPU through
+ * platform specific code. This may be required for certain platform
+ * integrations.
+ *
+ * Note that as this prevents kbase from powering down shader cores, this limits
+ * the available power policies to coarse_demand and always_on.
+ */
+#define PLATFORM_POWER_DOWN_ONLY (0)
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100644
index 0000000..f43db48
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,359 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_dma_fence.h>
+#include <mali_kbase_ctx_sched.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	atomic_set(&kctx->refcount, 0);
+	if (is_compat)
+		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+				  kbdev->mem_pool_max_size_default,
+				  KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+				  kctx->kbdev,
+				  &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_pool_init(&kctx->lp_mem_pool,
+				  (kbdev->mem_pool_max_size_default >> 9),
+				  KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+				  kctx->kbdev,
+				  &kbdev->lp_mem_pool);
+	if (err)
+		goto free_mem_pool;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_both_pools;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	atomic_set(&kctx->drain_pending, 0);
+
+	mutex_init(&kctx->reg_lock);
+
+	mutex_init(&kctx->mem_partials_lock);
+	INIT_LIST_HEAD(&kctx->mem_partials);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kctx);
+	if (err)
+		goto term_dma_fence;
+
+	do {
+		err = kbase_mem_pool_grow(&kctx->mem_pool,
+				MIDGARD_MMU_BOTTOMLEVEL);
+		if (err)
+			goto pgd_no_mem;
+
+		mutex_lock(&kctx->mmu_lock);
+		kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+		mutex_unlock(&kctx->mmu_lock);
+	} while (!kctx->pgd);
+
+	p = kbase_mem_alloc_page(&kctx->mem_pool);
+	if (!p)
+		goto no_sink_page;
+	kctx->aliasing_sink_page = as_tagged(page_to_phys(p));
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	setup_timer(&kctx->soft_job_timeout,
+		    kbasep_soft_job_timeout_worker,
+		    (uintptr_t)kctx);
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+no_sink_page:
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+pgd_no_mem:
+	kbase_mmu_term(kctx);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_both_pools:
+	kbase_mem_pool_term(&kctx->lp_mem_pool);
+free_mem_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+	unsigned long flags;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_jd_zap_context(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	/* Removing the rest of the debugfs entries here as we want to keep the
+	 * atom debugfs interface alive until all atoms have completed. This
+	 * is useful for debugging hung contexts. */
+	debugfs_remove_recursive(kctx->kctx_dentry);
+#endif
+
+	kbase_event_cleanup(kctx);
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	p = phys_to_page(as_phys_addr_t(kctx->aliasing_sink_page));
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_dma_fence_term(kctx);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_remove_ctx(kctx);
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	kbase_mem_pool_term(&kctx->lp_mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+
+	kbase_pm_context_idle(kbdev);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
new file mode 100644
index 0000000..a3f5bb0
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
@@ -0,0 +1,90 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+					enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+	/*
+	 * Earlier kernel versions doesn't have atomic_andnot() or
+	 * atomic_and(). atomic_clear_mask() was only available on some
+	 * architectures and removed on arm in v3.13 on arm and arm64.
+	 *
+	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+	 * when atomic_andnot() becomes available.
+	 */
+	int old, new;
+
+	do {
+		old = atomic_read(&kctx->flags);
+		new = old & ~flag;
+
+	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+	atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100644
index 0000000..a6b0ac7
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,4184 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#include <backend/gpu/mali_kbase_devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <ipa/mali_kbase_ipa_debugfs.h>
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include "mali_kbase_regs_history_debugfs.h"
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_ioctl.h"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task */
+#include <linux/mman.h>
+#include <linux/version.h>
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+#include <linux/clk.h>
+#include <linux/delay.h>
+
+#include <mali_kbase_config.h>
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <mali_kbase_tlstream.h>
+
+#include <mali_kbase_as_fault_debugfs.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static int kbase_dev_nr;
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+
+static int kbase_api_handshake(struct kbase_context *kctx,
+		struct kbase_ioctl_version_check *version)
+{
+	switch (version->major) {
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				(int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+
+	/* save the proposed version number for later use */
+	kctx->api_version = KBASE_API_VERSION(version->major, version->minor);
+
+	return 0;
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ *
+ * @MALI_ERROR_NONE: Success
+ * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver
+ * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure
+ * @MALI_ERROR_FUNCTION_FAILED: Generic error code
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	/* Bit number 2 was earlier assigned to the runtime-pm initialization
+	 * stage (which has been merged with the backend_early stage).
+	 */
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+
+	inited_job_fault = (1u << 10),
+	inited_sysfs_group = (1u << 11),
+	inited_misc_register = (1u << 12),
+	inited_get_device = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_io_history = (1u << 18),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20),
+	inited_protected = (1u << 21),
+	inited_ctx_sched = (1u << 22)
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define INACTIVE_WAIT_MS (5000)
+
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive)
+{
+	kbdev->driver_inactive = inactive;
+	wake_up(&kbdev->driver_inactive_wait);
+
+	/* Wait for any running IOCTLs to complete */
+	if (inactive)
+		msleep(INACTIVE_WAIT_MS);
+}
+KBASE_EXPORT_TEST_API(kbase_set_driver_inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strncmp(irq_res->name, "JOB", 4)) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "MMU", 4)) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "GPU", 4)) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \
+		!(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \
+		LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0))
+/*
+ * Older versions, before v4.6, of the kernel doesn't have
+ * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28
+ */
+static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+	char buf[32];
+
+	count = min(sizeof(buf), count);
+
+	if (copy_from_user(buf, s, count))
+		return -EFAULT;
+	buf[count] = '\0';
+
+	return strtobool(buf, res);
+}
+#endif
+
+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+	else
+		kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE);
+
+	return size;
+}
+
+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_infinite_cache_fops = {
+	.open = simple_open,
+	.write = write_ctx_infinite_cache,
+	.read = read_ctx_infinite_cache,
+};
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kctx = kbase_create_context(kbdev, is_compat_task());
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+	kctx->filp = filp;
+
+	if (kbdev->infinite_cache_active_default)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
+			    kctx, &kbase_infinite_cache_fops);
+
+	mutex_init(&kctx->mem_profile_lock);
+
+	kbasep_jd_debugfs_ctx_init(kctx);
+	kbase_debug_mem_view_init(filp);
+
+	kbase_debug_job_fault_context_init(kctx);
+
+	kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool, &kctx->lp_mem_pool);
+
+	kbase_jit_debugfs_init(kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			KBASE_TLSTREAM_TL_NEW_CTX(
+					element->kctx,
+					element->kctx->id,
+					(u32)(element->kctx->tgid));
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+	KBASE_TLSTREAM_TL_DEL_CTX(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	kbasep_mem_profile_debugfs_remove(kctx);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	if (kctx->vinstr_cli) {
+		struct kbase_uk_hwcnt_setup setup;
+
+		setup.dump_buffer = 0llu;
+		kbase_vinstr_legacy_hwc_setup(
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup);
+	}
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+static int kbase_api_set_flags(struct kbase_context *kctx,
+		struct kbase_ioctl_set_flags *flags)
+{
+	int err;
+
+	/* setup pending, try to signal that we'll do the setup,
+	 * if setup was already in progress, err this call
+	 */
+	if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+		return -EINVAL;
+
+	err = kbase_context_set_create_flags(kctx, flags->create_flags);
+	/* if bad flags, will stay stuck in setup mode */
+	if (err)
+		return err;
+
+	atomic_set(&kctx->setup_complete, 1);
+	return 0;
+}
+
+static int kbase_api_job_submit(struct kbase_context *kctx,
+		struct kbase_ioctl_job_submit *submit)
+{
+	return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr),
+			submit->nr_atoms,
+			submit->stride, false);
+}
+
+static int kbase_api_get_gpuprops(struct kbase_context *kctx,
+		struct kbase_ioctl_get_gpuprops *get_props)
+{
+	struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props;
+	int err;
+
+	if (get_props->flags != 0) {
+		dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops");
+		return -EINVAL;
+	}
+
+	if (get_props->size == 0)
+		return kprops->prop_buffer_size;
+	if (get_props->size < kprops->prop_buffer_size)
+		return -EINVAL;
+
+	err = copy_to_user(u64_to_user_ptr(get_props->buffer),
+			kprops->prop_buffer,
+			kprops->prop_buffer_size);
+	if (err)
+		return -EFAULT;
+	return kprops->prop_buffer_size;
+}
+
+static int kbase_api_post_term(struct kbase_context *kctx)
+{
+	kbase_event_close(kctx);
+	return 0;
+}
+
+static int kbase_api_mem_alloc(struct kbase_context *kctx,
+		union kbase_ioctl_mem_alloc *alloc)
+{
+	struct kbase_va_region *reg;
+	u64 flags = alloc->in.flags;
+	u64 gpu_va;
+
+#if defined(CONFIG_64BIT)
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* force SAME_VA if a 64-bit client */
+		flags |= BASE_MEM_SAME_VA;
+	}
+#endif
+
+	reg = kbase_mem_alloc(kctx, alloc->in.va_pages,
+			alloc->in.commit_pages,
+			alloc->in.extent,
+			&flags, &gpu_va);
+
+	if (!reg)
+		return -ENOMEM;
+
+	alloc->out.flags = flags;
+	alloc->out.gpu_va = gpu_va;
+
+	return 0;
+}
+
+static int kbase_api_mem_query(struct kbase_context *kctx,
+		union kbase_ioctl_mem_query *query)
+{
+	return kbase_mem_query(kctx, query->in.gpu_addr,
+			query->in.query, &query->out.value);
+}
+
+static int kbase_api_mem_free(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_free *free)
+{
+	return kbase_mem_free(kctx, free->gpu_addr);
+}
+
+static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup)
+{
+	int ret;
+	struct kbase_uk_hwcnt_reader_setup args = {
+		.buffer_count = setup->buffer_count,
+		.jm_bm = setup->jm_bm,
+		.shader_bm = setup->shader_bm,
+		.tiler_bm = setup->tiler_bm,
+		.mmu_l2_bm = setup->mmu_l2_bm
+	};
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, &args);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	if (ret)
+		return ret;
+	return args.fd;
+}
+
+static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_enable *enable)
+{
+	int ret;
+	struct kbase_uk_hwcnt_setup args = {
+		.dump_buffer = enable->dump_buffer,
+		.jm_bm = enable->jm_bm,
+		.shader_bm = enable->shader_bm,
+		.tiler_bm = enable->tiler_bm,
+		.mmu_l2_bm = enable->mmu_l2_bm
+	};
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx,
+			&kctx->vinstr_cli, &args);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_dump(struct kbase_context *kctx)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwc_dump(kctx->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_clear(struct kbase_context *kctx)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwc_clear(kctx->vinstr_cli);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_disjoint_query(struct kbase_context *kctx,
+		struct kbase_ioctl_disjoint_query *query)
+{
+	query->counter = kbase_disjoint_event_get(kctx->kbdev);
+
+	return 0;
+}
+
+static int kbase_api_get_ddk_version(struct kbase_context *kctx,
+		struct kbase_ioctl_get_ddk_version *version)
+{
+	int ret;
+	int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+
+	if (version->version_buffer == 0)
+		return len;
+
+	if (version->size < len)
+		return -EOVERFLOW;
+
+	ret = copy_to_user(u64_to_user_ptr(version->version_buffer),
+			KERNEL_SIDE_DDK_VERSION_STRING,
+			sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+
+	if (ret)
+		return -EFAULT;
+
+	return len;
+}
+
+static int kbase_api_mem_jit_init(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_jit_init *jit_init)
+{
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages);
+}
+
+static int kbase_api_mem_sync(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_sync *sync)
+{
+	struct basep_syncset sset = {
+		.mem_handle.basep.handle = sync->handle,
+		.user_addr = sync->user_addr,
+		.size = sync->size,
+		.type = sync->type
+	};
+
+	return kbase_sync_now(kctx, &sset);
+}
+
+static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx,
+		union kbase_ioctl_mem_find_cpu_offset *find)
+{
+	return kbasep_find_enclosing_cpu_mapping_offset(
+			kctx,
+			find->in.cpu_addr,
+			find->in.size,
+			&find->out.offset);
+}
+
+static int kbase_api_get_context_id(struct kbase_context *kctx,
+		struct kbase_ioctl_get_context_id *info)
+{
+	info->id = kctx->id;
+
+	return 0;
+}
+
+static int kbase_api_tlstream_acquire(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_acquire *acquire)
+{
+	return kbase_tlstream_acquire(kctx, acquire->flags);
+}
+
+static int kbase_api_tlstream_flush(struct kbase_context *kctx)
+{
+	kbase_tlstream_flush_streams();
+
+	return 0;
+}
+
+static int kbase_api_mem_commit(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_commit *commit)
+{
+	return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages);
+}
+
+static int kbase_api_mem_alias(struct kbase_context *kctx,
+		union kbase_ioctl_mem_alias *alias)
+{
+	struct base_mem_aliasing_info *ai;
+	u64 flags;
+	int err;
+
+	if (alias->in.nents == 0 || alias->in.nents > 2048)
+		return -EINVAL;
+
+	ai = vmalloc(sizeof(*ai) * alias->in.nents);
+	if (!ai)
+		return -ENOMEM;
+
+	err = copy_from_user(ai,
+			u64_to_user_ptr(alias->in.aliasing_info),
+			sizeof(*ai) * alias->in.nents);
+	if (err) {
+		vfree(ai);
+		return -EFAULT;
+	}
+
+	flags = alias->in.flags;
+
+	alias->out.gpu_va = kbase_mem_alias(kctx, &flags,
+			alias->in.stride, alias->in.nents,
+			ai, &alias->out.va_pages);
+
+	alias->out.flags = flags;
+
+	vfree(ai);
+
+	if (alias->out.gpu_va == 0)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int kbase_api_mem_import(struct kbase_context *kctx,
+		union kbase_ioctl_mem_import *import)
+{
+	int ret;
+	u64 flags = import->in.flags;
+
+	ret = kbase_mem_import(kctx,
+			import->in.type,
+			u64_to_user_ptr(import->in.phandle),
+			import->in.padding,
+			&import->out.gpu_va,
+			&import->out.va_pages,
+			&flags);
+
+	import->out.flags = flags;
+
+	return ret;
+}
+
+static int kbase_api_mem_flags_change(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_flags_change *change)
+{
+	return kbase_mem_flags_change(kctx, change->gpu_va,
+			change->flags, change->mask);
+}
+
+static int kbase_api_stream_create(struct kbase_context *kctx,
+		struct kbase_ioctl_stream_create *stream)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	int fd, ret;
+
+	/* Name must be NULL-terminated and padded with NULLs, so check last
+	 * character is NULL
+	 */
+	if (stream->name[sizeof(stream->name)-1] != 0)
+		return -EINVAL;
+
+	ret = kbase_sync_fence_stream_create(stream->name, &fd);
+
+	if (ret)
+		return ret;
+	return fd;
+#else
+	return -ENOENT;
+#endif
+}
+
+static int kbase_api_fence_validate(struct kbase_context *kctx,
+		struct kbase_ioctl_fence_validate *validate)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	return kbase_sync_fence_validate(validate->fd);
+#else
+	return -ENOENT;
+#endif
+}
+
+static int kbase_api_get_profiling_controls(struct kbase_context *kctx,
+		struct kbase_ioctl_get_profiling_controls *controls)
+{
+	int ret;
+
+	if (controls->count > (FBDUMP_CONTROL_MAX - FBDUMP_CONTROL_MIN))
+		return -EINVAL;
+
+	ret = copy_to_user(u64_to_user_ptr(controls->buffer),
+			&kctx->kbdev->kbase_profiling_controls[
+				FBDUMP_CONTROL_MIN],
+			controls->count * sizeof(u32));
+
+	if (ret)
+		return -EFAULT;
+	return 0;
+}
+
+static int kbase_api_mem_profile_add(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_profile_add *data)
+{
+	char *buf;
+	int err;
+
+	if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+		dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n");
+		return -EINVAL;
+	}
+
+	buf = kmalloc(data->len, GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(buf))
+		return -ENOMEM;
+
+	err = copy_from_user(buf, u64_to_user_ptr(data->buffer),
+			data->len);
+	if (err) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len);
+}
+
+static int kbase_api_soft_event_update(struct kbase_context *kctx,
+		struct kbase_ioctl_soft_event_update *update)
+{
+	if (update->flags != 0)
+		return -EINVAL;
+
+	return kbase_soft_event_update(kctx, update->event, update->new_status);
+}
+
+#if MALI_UNIT_TEST
+static int kbase_api_tlstream_test(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_test *test)
+{
+	kbase_tlstream_test(
+			test->tpw_count,
+			test->msg_delay,
+			test->msg_count,
+			test->aux_msg);
+
+	return 0;
+}
+
+static int kbase_api_tlstream_stats(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_stats *stats)
+{
+	kbase_tlstream_stats(
+			&stats->bytes_collected,
+			&stats->bytes_generated);
+
+	return 0;
+}
+#endif /* MALI_UNIT_TEST */
+
+#define KBASE_HANDLE_IOCTL(cmd, function)                          \
+	case cmd:                                                  \
+	do {                                                       \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE);          \
+		return function(kctx);                             \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_IN(cmd, function, type)                 \
+	case cmd:                                                  \
+	do {                                                       \
+		type param;                                        \
+		int err;                                           \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE);         \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
+		err = copy_from_user(&param, uarg, sizeof(param)); \
+		if (err)                                           \
+			return -EFAULT;                            \
+		return function(kctx, &param);                     \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type)                \
+	case cmd:                                                  \
+	do {                                                       \
+		type param;                                        \
+		int ret, err;                                      \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ);          \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
+		ret = function(kctx, &param);                      \
+		err = copy_to_user(uarg, &param, sizeof(param));   \
+		if (err)                                           \
+			return -EFAULT;                            \
+		return ret;                                        \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type)                  \
+	case cmd:                                                      \
+	do {                                                           \
+		type param;                                            \
+		int ret, err;                                          \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));         \
+		err = copy_from_user(&param, uarg, sizeof(param));     \
+		if (err)                                               \
+			return -EFAULT;                                \
+		ret = function(kctx, &param);                          \
+		err = copy_to_user(uarg, &param, sizeof(param));       \
+		if (err)                                               \
+			return -EFAULT;                                \
+		return ret;                                            \
+	} while (0)
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	void __user *uarg = (void __user *)arg;
+
+	/* Only these ioctls are available until setup is complete */
+	switch (cmd) {
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK,
+				kbase_api_handshake,
+				struct kbase_ioctl_version_check);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS,
+				kbase_api_set_flags,
+				struct kbase_ioctl_set_flags);
+	}
+
+	/* Block call until version handshake and setup is complete */
+	if (kctx->api_version == 0 || !atomic_read(&kctx->setup_complete))
+		return -EINVAL;
+
+	/* Normal ioctls */
+	switch (cmd) {
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT,
+				kbase_api_job_submit,
+				struct kbase_ioctl_job_submit);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS,
+				kbase_api_get_gpuprops,
+				struct kbase_ioctl_get_gpuprops);
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM,
+				kbase_api_post_term);
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC,
+				kbase_api_mem_alloc,
+				union kbase_ioctl_mem_alloc);
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY,
+				kbase_api_mem_query,
+				union kbase_ioctl_mem_query);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE,
+				kbase_api_mem_free,
+				struct kbase_ioctl_mem_free);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
+				kbase_api_hwcnt_reader_setup,
+				struct kbase_ioctl_hwcnt_reader_setup);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
+				kbase_api_hwcnt_enable,
+				struct kbase_ioctl_hwcnt_enable);
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
+				kbase_api_hwcnt_dump);
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
+				kbase_api_hwcnt_clear);
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY,
+				kbase_api_disjoint_query,
+				struct kbase_ioctl_disjoint_query);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION,
+				kbase_api_get_ddk_version,
+				struct kbase_ioctl_get_ddk_version);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT,
+				kbase_api_mem_jit_init,
+				struct kbase_ioctl_mem_jit_init);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC,
+				kbase_api_mem_sync,
+				struct kbase_ioctl_mem_sync);
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET,
+				kbase_api_mem_find_cpu_offset,
+				union kbase_ioctl_mem_find_cpu_offset);
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID,
+				kbase_api_get_context_id,
+				struct kbase_ioctl_get_context_id);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE,
+				kbase_api_tlstream_acquire,
+				struct kbase_ioctl_tlstream_acquire);
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH,
+				kbase_api_tlstream_flush);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT,
+				kbase_api_mem_commit,
+				struct kbase_ioctl_mem_commit);
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS,
+				kbase_api_mem_alias,
+				union kbase_ioctl_mem_alias);
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT,
+				kbase_api_mem_import,
+				union kbase_ioctl_mem_import);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE,
+				kbase_api_mem_flags_change,
+				struct kbase_ioctl_mem_flags_change);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE,
+				kbase_api_stream_create,
+				struct kbase_ioctl_stream_create);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE,
+				kbase_api_fence_validate,
+				struct kbase_ioctl_fence_validate);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS,
+				kbase_api_get_profiling_controls,
+				struct kbase_ioctl_get_profiling_controls);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD,
+				kbase_api_mem_profile_add,
+				struct kbase_ioctl_mem_profile_add);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE,
+				kbase_api_soft_event_update,
+				struct kbase_ioctl_soft_event_update);
+
+#if MALI_UNIT_TEST
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST,
+				kbase_api_tlstream_test,
+				struct kbase_ioctl_tlstream_test);
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS,
+				kbase_api_tlstream_stats,
+				struct kbase_ioctl_tlstream_stats);
+#endif
+	}
+
+	dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd));
+
+	return -ENOIOCTLCMD;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+
+/**
+ * align_and_check - Align the specified pointer to the provided alignment and
+ *                   check that it is still in range.
+ * @gap_end:        Highest possible start address for allocation (end of gap in
+ *                  address space)
+ * @gap_start:      Start address of current memory area / gap in address space
+ * @info:           vm_unmapped_area_info structure passed to caller, containing
+ *                  alignment, length and limits for the allocation
+ * @is_shader_code: True if the allocation is for shader code (which has
+ *                  additional alignment requirements)
+ *
+ * Return: true if gap_end is now aligned correctly and is still in range,
+ *         false otherwise
+ */
+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
+		struct vm_unmapped_area_info *info, bool is_shader_code)
+{
+	/* Compute highest gap address at the desired alignment */
+	(*gap_end) -= info->length;
+	(*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
+
+	if (is_shader_code) {
+		/* Check for 4GB boundary */
+		if (0 == (*gap_end & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+		if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+
+		if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
+				info->length) & BASE_MEM_MASK_4GB))
+			return false;
+	}
+
+
+	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
+		return false;
+
+
+	return true;
+}
+
+/* The following function is taken from the kernel and just
+ * renamed. As it's not exported to modules we must copy-paste it here.
+ */
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info, bool is_shader_code)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit) {
+		if (align_and_check(&gap_end, gap_start, info, is_shader_code))
+			return gap_end;
+	}
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length) {
+			/* We found a suitable gap. Clip it with the original
+			 * high_limit. */
+			if (gap_end > info->high_limit)
+				gap_end = info->high_limit;
+
+			if (align_and_check(&gap_end, gap_start, info,
+					is_shader_code))
+				return gap_end;
+		}
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+	return -ENOMEM;
+}
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	/* based on get_unmapped_area, but simplified slightly due to that some
+	 * values are known in advance */
+	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+	unsigned long align_offset = 0;
+	unsigned long align_mask = 0;
+	unsigned long high_limit = mm->mmap_base;
+	unsigned long low_limit = PAGE_SIZE;
+	int cpu_va_bits = BITS_PER_LONG;
+	int gpu_pc_bits =
+	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	bool is_shader_code = false;
+	unsigned long ret;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+#ifdef CONFIG_64BIT
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+
+		if (kbase_hw_has_feature(kctx->kbdev,
+						BASE_HW_FEATURE_33BIT_VA)) {
+			high_limit = kctx->same_va_end << PAGE_SHIFT;
+		} else {
+			high_limit = min_t(unsigned long, mm->mmap_base,
+					(kctx->same_va_end << PAGE_SHIFT));
+			if (len >= SZ_2M) {
+				align_offset = SZ_2M;
+				align_mask = SZ_2M - 1;
+			}
+		}
+
+		low_limit = SZ_2M;
+	} else {
+		cpu_va_bits = 32;
+	}
+#endif /* CONFIG_64BIT */
+	if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
+		(PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
+			int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+			if (!kctx->pending_regions[cookie])
+				return -EINVAL;
+
+			if (!(kctx->pending_regions[cookie]->flags &
+							KBASE_REG_GPU_NX)) {
+				if (cpu_va_bits > gpu_pc_bits) {
+					align_offset = 1ULL << gpu_pc_bits;
+					align_mask = align_offset - 1;
+					is_shader_code = true;
+				}
+			}
+#ifndef CONFIG_64BIT
+	} else {
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+						      flags);
+#endif
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = low_limit;
+	info.high_limit = high_limit;
+	info.align_offset = align_offset;
+	info.align_mask = align_mask;
+
+	ret = kbase_unmapped_area_topdown(&info, is_shader_code);
+
+	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
+			high_limit < (kctx->same_va_end << PAGE_SHIFT)) {
+		/* Retry above mmap_base */
+		info.low_limit = mm->mmap_base;
+		info.high_limit = min_t(u64, TASK_SIZE,
+					(kctx->same_va_end << PAGE_SHIFT));
+
+		ret = kbase_unmapped_area_topdown(&info, is_shader_code);
+	}
+
+	return ret;
+}
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+	.get_unmapped_area = kbase_get_unmapped_area,
+};
+
+/**
+ * show_policy - Show callback for the power_policy sysfs file.
+ *
+ * This function is called to get the contents of the power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * set_policy - Store callback for the power_policy sysfs file.
+ *
+ * This function is called when the power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls kbase_pm_set_policy() to change the
+ * policy.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/*
+ * The sysfs file power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/**
+ * show_ca_policy - Show callback for the core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * set_ca_policy - Store callback for the core_availability_policy sysfs file.
+ *
+ * This function is called when the core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls kbase_pm_set_policy() to change
+ * the policy.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/*
+ * The sysfs file core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/*
+ * show_core_mask - Show callback for the core_mask sysfs file.
+ *
+ * This function is called to get the contents of the core_mask sysfs file.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/**
+ * set_core_mask - Store callback for the core_mask sysfs file.
+ *
+ * This function is called when the core_mask sysfs file is written to.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	if (items == 1 || items == 3) {
+		u64 shader_present =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		u64 group0_core_mask =
+				kbdev->gpu_props.props.coherency_info.group[0].
+				core_mask;
+
+		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+				!(new_core_mask[0] & group0_core_mask) ||
+			(new_core_mask[1] & shader_present) !=
+						new_core_mask[1] ||
+				!(new_core_mask[1] & group0_core_mask) ||
+			(new_core_mask[2] & shader_present) !=
+						new_core_mask[2] ||
+				!(new_core_mask[2] & group0_core_mask)) {
+			dev_err(dev, "power_policy: invalid core specification\n");
+			return -EINVAL;
+		}
+
+		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+				kbdev->pm.debug_core_mask[1] !=
+						new_core_mask[1] ||
+				kbdev->pm.debug_core_mask[2] !=
+						new_core_mask[2]) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+					new_core_mask[1], new_core_mask[2]);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+		"Use format <core_mask>\n"
+		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+	return -EINVAL;
+}
+
+/*
+ * The sysfs file core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software jobs. Waiting soft event wait
+ * jobs will be cancelled after this period expires, while soft fence wait jobs
+ * will print debug information if the fence debug feature is enabled.
+ *
+ * This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_job_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_job_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) ||
+	    (soft_job_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+		   soft_job_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_job_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_job_timeout, set_soft_job_timeout);
+
+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
+				int default_ticks, u32 old_ticks)
+{
+	if (timeout_ms > 0) {
+		u64 ticks = timeout_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_period_ns);
+		if (!ticks)
+			return 1;
+		return ticks;
+	} else if (timeout_ms < 0) {
+		return default_ticks;
+	} else {
+		return old_ticks;
+	}
+}
+
+/**
+ * set_js_timeouts - Store callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS,
+ * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		struct kbasep_js_device_data *js_data = &kbdev->js_data;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
+	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
+			default, js_data->ticks_name); \
+	dev_dbg(kbdev->dev, "Overriding " #ticks_name \
+			" with %lu ticks (%lu ms)\n", \
+			(unsigned long)js_data->ticks_name, \
+			ms_name); \
+	} while (0)
+
+		UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms,
+				DEFAULT_JS_SOFT_STOP_TICKS);
+		UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl,
+				DEFAULT_JS_SOFT_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_HARD_STOP_TICKS_SS_8408 :
+				DEFAULT_JS_HARD_STOP_TICKS_SS);
+		UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl,
+				DEFAULT_JS_HARD_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping,
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING);
+		UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_RESET_TICKS_SS_8408 :
+				DEFAULT_JS_RESET_TICKS_SS);
+		UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl,
+				DEFAULT_JS_RESET_TICKS_CL);
+		UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping,
+				DEFAULT_JS_RESET_TICKS_DUMPING);
+
+		kbase_js_set_timeouts(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+static unsigned long get_js_timeout_in_ms(
+		u32 scheduling_period_ns,
+		u32 ticks)
+{
+	u64 ms = (u64)ticks * scheduling_period_ns;
+
+	do_div(ms, 1000000UL);
+	return ms;
+}
+
+/**
+ * show_js_timeouts - Show callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\
+		scheduling_period_ns, \
+		kbdev->js_data.name)
+
+	js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks);
+	js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl);
+	js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss);
+	js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl);
+	js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping);
+	js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss);
+	js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl);
+	js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef GET_TIMEOUT
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+static u32 get_new_js_timeout(
+		u32 old_period,
+		u32 old_ticks,
+		u32 new_scheduling_period_ns)
+{
+	u64 ticks = (u64)old_period * (u64)old_ticks;
+	do_div(ticks, new_scheduling_period_ns);
+	return ticks?ticks:1;
+}
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	struct kbasep_js_device_data *js_data;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	js_data = &kbdev->js_data;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&js_data->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	old_period = js_data->scheduling_period_ns;
+
+#define SET_TIMEOUT(name) \
+		(js_data->name = get_new_js_timeout(\
+				old_period, \
+				kbdev->js_data.name, \
+				new_scheduling_period_ns))
+
+	SET_TIMEOUT(soft_stop_ticks);
+	SET_TIMEOUT(soft_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_ss);
+	SET_TIMEOUT(hard_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_dumping);
+	SET_TIMEOUT(gpu_reset_ticks_ss);
+	SET_TIMEOUT(gpu_reset_ticks_cl);
+	SET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef SET_TIMEOUT
+
+	js_data->scheduling_period_ns = new_scheduling_period_ns;
+
+	kbase_js_set_timeouts(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&js_data->runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/**
+ * set_force_replay - Store callback for the force_replay sysfs file.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/**
+ * show_force_replay - Show callback for the force_replay sysfs file.
+ *
+ * This function is called to get the contents of the force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file force_replay.
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present.
+ * The ability to enable soft-stop when only a single context is present can be
+ * used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/* Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/**
+ * show_debug - Show callback for the debug_command sysfs file.
+ *
+ * This function is called to get the contents of the debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * issue_debug - Store callback for the debug_command sysfs file.
+ *
+ * This function is called when the debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @debug_commands to issue the command.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/* The sysfs file debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
+		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G72" },
+		{ .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G51" },
+		{ .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-TNOx" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_max_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+/**
+ * show_lp_mem_pool_size - Show size of the large memory pages pool.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the number of large memory pages which currently populate the kbdev pool.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_lp_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_size(&kbdev->lp_mem_pool));
+}
+
+/**
+ * set_lp_mem_pool_size - Set size of the large memory pages pool.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the number of large memory pages which should populate the kbdev pool.
+ * This may cause existing pages to be removed from the pool, or new pages to be created and then added to the pool.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_lp_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	unsigned long new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->lp_mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size,
+		set_lp_mem_pool_size);
+
+/**
+ * show_lp_mem_pool_max_size - Show maximum size of the large memory pages pool.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the maximum number of large memory pages that the kbdev pool can possibly contain.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_max_size(&kbdev->lp_mem_pool));
+}
+
+/**
+ * set_lp_mem_pool_max_size - Set maximum size of the large memory pages pool.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the maximum number of large memory pages that the kbdev pool can possibly contain.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	unsigned long new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->lp_mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
+		set_lp_mem_pool_max_size);
+
+#ifdef CONFIG_DEBUG_FS
+
+/* Number of entries in serialize_jobs_settings[] */
+#define NR_SERIALIZE_JOBS_SETTINGS 5
+/* Maximum string length in serialize_jobs_settings[].name */
+#define MAX_SERIALIZE_JOBS_NAME_LEN 16
+
+static struct
+{
+	char *name;
+	u8 setting;
+} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = {
+	{"none", 0},
+	{"intra-slot", KBASE_SERIALIZE_INTRA_SLOT},
+	{"inter-slot", KBASE_SERIALIZE_INTER_SLOT},
+	{"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT},
+	{"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT |
+			KBASE_SERIALIZE_RESET}
+};
+
+/**
+ * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs
+ *                                  file
+ * @sfile: seq_file pointer
+ * @data:  Private callback data
+ *
+ * This function is called to get the contents of the serialize_jobs debugfs
+ * file. This is a list of the available settings with the currently active one
+ * surrounded by square brackets.
+ *
+ * Return: 0 on success, or an error code on error
+ */
+static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_device *kbdev = sfile->private;
+	int i;
+
+	CSTD_UNUSED(data);
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting)
+			seq_printf(sfile, "[%s] ",
+					serialize_jobs_settings[i].name);
+		else
+			seq_printf(sfile, "%s ",
+					serialize_jobs_settings[i].name);
+	}
+
+	seq_puts(sfile, "\n");
+
+	return 0;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs
+ *                                       debugfs file.
+ * @file:  File pointer
+ * @ubuf:  User buffer containing data to store
+ * @count: Number of bytes in user buffer
+ * @ppos:  File position
+ *
+ * This function is called when the serialize_jobs debugfs file is written to.
+ * It matches the requested setting against the available settings and if a
+ * matching setting is found updates kbdev->serialize_jobs.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	struct seq_file *s = file->private_data;
+	struct kbase_device *kbdev = s->private;
+	char buf[MAX_SERIALIZE_JOBS_NAME_LEN];
+	int i;
+	bool valid = false;
+
+	CSTD_UNUSED(ppos);
+
+	count = min_t(size_t, sizeof(buf) - 1, count);
+	if (copy_from_user(buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
+			kbdev->serialize_jobs =
+					serialize_jobs_settings[i].setting;
+			valid = true;
+			break;
+		}
+	}
+
+	if (!valid) {
+		dev_err(kbdev->dev, "serialize_jobs: invalid setting\n");
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs
+ *                                     debugfs file
+ * @in:   inode pointer
+ * @file: file pointer
+ *
+ * Return: Zero on success, error code on failure
+ */
+static int kbasep_serialize_jobs_debugfs_open(struct inode *in,
+		struct file *file)
+{
+	return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
+	.open = kbasep_serialize_jobs_debugfs_open,
+	.read = seq_read,
+	.write = kbasep_serialize_jobs_debugfs_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_DEBUG_FS */
+
+static int kbasep_protected_mode_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_OF
+	struct device_node *protected_node;
+	struct platform_device *pdev;
+	struct protected_mode_device *protected_dev;
+#endif
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native protected ops */
+		kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev),
+				GFP_KERNEL);
+		if (!kbdev->protected_dev)
+			return -ENOMEM;
+		kbdev->protected_dev->data = kbdev;
+		kbdev->protected_ops = &kbase_native_protected_ops;
+		kbdev->protected_mode_support = true;
+		return 0;
+	}
+
+	kbdev->protected_mode_support = false;
+
+#ifdef CONFIG_OF
+	protected_node = of_parse_phandle(kbdev->dev->of_node,
+			"protected-mode-switcher", 0);
+
+	if (!protected_node)
+		protected_node = of_parse_phandle(kbdev->dev->of_node,
+				"secure-mode-switcher", 0);
+
+	if (!protected_node) {
+		/* If protected_node cannot be looked up then we assume
+		 * protected mode is not supported on this platform. */
+		dev_info(kbdev->dev, "Protected mode not available\n");
+		return 0;
+	}
+
+	pdev = of_find_device_by_node(protected_node);
+	if (!pdev)
+		return -EINVAL;
+
+	protected_dev = platform_get_drvdata(pdev);
+	if (!protected_dev)
+		return -EPROBE_DEFER;
+
+	kbdev->protected_ops = &protected_dev->ops;
+	kbdev->protected_dev = protected_dev;
+
+	if (kbdev->protected_ops) {
+		int err;
+
+		/* Make sure protected mode is disabled on startup */
+		mutex_lock(&kbdev->pm.lock);
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* protected_mode_disable() returns -EINVAL if not supported */
+		kbdev->protected_mode_support = (err != -EINVAL);
+	}
+#endif
+	return 0;
+}
+
+static void kbasep_protected_mode_term(struct kbase_device *kbdev)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		kfree(kbdev->protected_dev);
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = 0;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return err;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \
+	|| defined(LSK_OPPV2_BACKPORT)
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) || \
+		defined(LSK_OPPV2_BACKPORT)
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+MAKE_QUIRK_ACCESSORS(jm);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
+ * @file: File object to read is for
+ * @buf:  User buffer to populate with data
+ * @len:  Length of user buffer
+ * @ppos: Offset within file object
+ *
+ * Retrieves the current status of protected debug mode
+ * (0 = disabled, 1 = enabled)
+ *
+ * Return: Number of bytes added to user buffer
+ */
+static ssize_t debugfs_protected_debug_mode_read(struct file *file,
+				char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)file->private_data;
+	u32 gpu_status;
+	ssize_t ret_val;
+
+	kbase_pm_context_active(kbdev);
+	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL);
+	kbase_pm_context_idle(kbdev);
+
+	if (gpu_status & GPU_DBGEN)
+		ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2);
+	else
+		ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2);
+
+	return ret_val;
+}
+
+/*
+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops
+ *
+ * Contains the file operations for the "protected_debug_mode" debugfs file
+ */
+static const struct file_operations fops_protected_debug_mode = {
+	.open = simple_open,
+	.read = debugfs_protected_debug_mode_read,
+	.llseek = default_llseek,
+};
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_init(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+	kbasep_regs_history_debugfs_init(kbdev);
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+	kbase_as_fault_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	/* fops_* variables created by invocations of macro
+	 * MAKE_QUIRK_ACCESSORS() above. */
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+	debugfs_create_file("quirks_jm", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_jm_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+
+	debugfs_create_size_t("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_max_size_default);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		debugfs_create_file("protected_debug_mode", S_IRUGO,
+				kbdev->mali_debugfs_directory, kbdev,
+				&fops_protected_debug_mode);
+	}
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->inited_subsys & inited_devfreq)
+		kbase_ipa_debugfs_init(kbdev);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_serialize_jobs_debugfs_fops);
+#endif /* CONFIG_DEBUG_FS */
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev,
+		unsigned prod_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+
+	/* Only for tMIx :
+	 * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+	 * documented for tMIx so force correct value here.
+	 */
+	if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+		   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+				   GPU_ID2_PRODUCT_TMIX))
+		if (supported_coherency_bitmap ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE))
+			supported_coherency_bitmap |=
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_job_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_availability_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	&dev_attr_lp_mem_pool_size.attr,
+	&dev_attr_lp_mem_pool_max_size.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kfree(kbdev->gpu_props.prop_buffer);
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_protected) {
+		kbasep_protected_mode_term(kbdev);
+		kbdev->inited_subsys &= ~inited_protected;
+	}
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_ctx_sched) {
+		kbase_ctx_sched_term(kbdev);
+		kbdev->inited_subsys &= ~inited_ctx_sched;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+	if (kbdev->inited_subsys & inited_io_history) {
+		kbase_io_history_term(&kbdev->io_history);
+		kbdev->inited_subsys &= ~inited_io_history;
+	}
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+
+/* Number of register accesses for the buffer that we allocate during
+ * initialization time. The buffer size can be changed later via debugfs. */
+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	unsigned prod_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+#ifdef CONFIG_OF
+	err = kbase_platform_early_init();
+	if (err) {
+		dev_err(&pdev->dev, "Early platform initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+#endif
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+	err = kbase_io_history_init(&kbdev->io_history,
+			KBASEP_DEFAULT_REGISTER_HISTORY_SIZE);
+	if (err) {
+		dev_err(&pdev->dev, "Register access history initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+	kbdev->inited_subsys |= inited_io_history;
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+	core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	err = kbase_ctx_sched_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Context scheduler initialization failed (%d)\n",
+				err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_ctx_sched;
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, prod_id);
+
+	err = kbasep_protected_mode_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Protected mode subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_protected;
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+	/* Initialize the kctx list. This is used by vinstr. */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	/* Devfreq uses vinstr, so must be initialized after it. */
+	err = kbase_devfreq_init(kbdev);
+	if (!err)
+		kbdev->inited_subsys |= inited_devfreq;
+	else
+		dev_err(kbdev->dev, "Continuing without devfreq\n");
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->inited_subsys |= inited_get_device;
+
+	/* This needs to happen before registering the device with misc_register(),
+	 * otherwise it causes a race condition between registering the device and a
+	 * uevent event being generated for userspace, causing udev rules to run
+	 * which might expect certain sysfs attributes present. As a result of the
+	 * race condition we avoid, some Mali sysfs entries may have appeared to
+	 * udev to not exist.
+
+	 * For more information, see
+	 * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the
+	 * paragraph that starts with "Word of warning", currently the second-last
+	 * paragraph.
+	 */
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	err = kbase_gpuprops_populate_user_buffer(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "GPU property population failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+
+	return err;
+}
+
+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE
+
+/**
+ * kbase_device_suspend - Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/**
+ * kbase_device_resume - Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @dev:  The device to resume
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_resume_device(kbdev->devfreq);
+#endif
+	return 0;
+}
+
+/**
+ * kbase_device_runtime_suspend - Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in
+ * which it will not be able to communicate with the CPU(s) and RAM due to
+ * power management.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/**
+ * kbase_device_runtime_resume - Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_resume_device(kbdev->devfreq);
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/* The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_early_init();
+	if (ret)
+		return ret;
+
+	ret = kbase_platform_register();
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&kbase_platform_driver);
+
+	if (ret)
+		kbase_platform_unregister();
+
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+	kbase_platform_unregister();
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
new file mode 100644
index 0000000..e2f7baa
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
@@ -0,0 +1,203 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_ctx_sched.h"
+
+int kbase_ctx_sched_init(struct kbase_device *kbdev)
+{
+	int as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+
+	kbdev->as_free = as_present; /* All ASs initially free */
+
+	memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx));
+
+	return 0;
+}
+
+void kbase_ctx_sched_term(struct kbase_device *kbdev)
+{
+	s8 i;
+
+	/* Sanity checks */
+	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
+		WARN_ON(kbdev->as_to_kctx[i] != NULL);
+		WARN_ON(!(kbdev->as_free & (1u << i)));
+	}
+}
+
+/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space
+ *
+ * @kbdev: The context for which to find a free address space
+ *
+ * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID
+ *
+ * This function returns an address space available for use. It would prefer
+ * returning an AS that has been previously assigned to the context to
+ * avoid having to reprogram the MMU.
+ */
+static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+	int free_as;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* First check if the previously assigned AS is available */
+	if ((kctx->as_nr != KBASEP_AS_NR_INVALID) &&
+			(kbdev->as_free & (1u << kctx->as_nr)))
+		return kctx->as_nr;
+
+	/* The previously assigned AS was taken, we'll be returning any free
+	 * AS at this point.
+	 */
+	free_as = ffs(kbdev->as_free) - 1;
+	if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces)
+		return free_as;
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->pm.backend.gpu_powered);
+
+	if (atomic_inc_return(&kctx->refcount) == 1) {
+		int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx);
+
+		if (free_as != KBASEP_AS_NR_INVALID) {
+			kbdev->as_free &= ~(1u << free_as);
+			/* Only program the MMU if the context has not been
+			 * assigned the same address space before.
+			 */
+			if (free_as != kctx->as_nr) {
+				struct kbase_context *const prev_kctx =
+					kbdev->as_to_kctx[free_as];
+
+				if (prev_kctx) {
+					WARN_ON(atomic_read(&prev_kctx->refcount) != 0);
+					kbase_mmu_disable(prev_kctx);
+					prev_kctx->as_nr = KBASEP_AS_NR_INVALID;
+				}
+
+				kctx->as_nr = free_as;
+				kbdev->as_to_kctx[free_as] = kctx;
+				kbase_mmu_update(kctx);
+			}
+		} else {
+			atomic_dec(&kctx->refcount);
+
+			/* Failed to find an available address space, we must
+			 * be returning an error at this point.
+			 */
+			WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID);
+		}
+	}
+
+	return kctx->as_nr;
+}
+
+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	WARN_ON(atomic_read(&kctx->refcount) == 0);
+	WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
+	WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+
+	atomic_inc(&kctx->refcount);
+}
+
+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_dec_return(&kctx->refcount) == 0)
+		kbdev->as_free |= (1u << kctx->as_nr);
+}
+
+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(atomic_read(&kctx->refcount) != 0);
+
+	if (kctx->as_nr != KBASEP_AS_NR_INVALID) {
+		if (kbdev->pm.backend.gpu_powered)
+			kbase_mmu_disable(kctx);
+
+		kbdev->as_to_kctx[kctx->as_nr] = NULL;
+		kctx->as_nr = KBASEP_AS_NR_INVALID;
+	}
+}
+
+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
+{
+	s8 i;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->pm.backend.gpu_powered);
+
+	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
+		struct kbase_context *kctx;
+
+		kctx = kbdev->as_to_kctx[i];
+		if (kctx) {
+			if (atomic_read(&kctx->refcount)) {
+				WARN_ON(kctx->as_nr != i);
+
+				kbase_mmu_update(kctx);
+			} else {
+				/* This context might have been assigned an
+				 * AS before, clear it.
+				 */
+				kbdev->as_to_kctx[kctx->as_nr] = NULL;
+				kctx->as_nr = KBASEP_AS_NR_INVALID;
+			}
+		} else {
+			kbase_mmu_disable_as(kbdev, i);
+		}
+	}
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
new file mode 100644
index 0000000..77d2232
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CTX_SCHED_H_
+#define _KBASE_CTX_SCHED_H_
+
+#include <mali_kbase.h>
+
+/* The Context Scheduler manages address space assignment and reference
+ * counting to kbase_context. The interface has been designed to minimise
+ * interactions between the Job Scheduler and Power Management/MMU to support
+ * the existing Job Scheduler interface.
+ *
+ * The initial implementation of the Context Scheduler does not schedule
+ * contexts. Instead it relies on the Job Scheduler to make decisions of
+ * when to schedule/evict contexts if address spaces are starved. In the
+ * future, once an interface between the CS and JS has been devised to
+ * provide enough information about how each context is consuming GPU resources,
+ * those decisions can be made in the CS itself, thereby reducing duplicated
+ * code.
+ */
+
+/* base_ctx_sched_init - Initialise the context scheduler
+ *
+ * @kbdev: The device for which the context scheduler needs to be
+ *         initialised
+ *
+ * Return: 0 for success, otherwise failure
+ *
+ * This must be called during device initilisation. The number of hardware
+ * address spaces must already be established before calling this function.
+ */
+int kbase_ctx_sched_init(struct kbase_device *kbdev);
+
+/* base_ctx_sched_term - Terminate the context scheduler
+ *
+ * @kbdev: The device for which the context scheduler needs to be
+ *         terminated
+ *
+ * This must be called during device termination after all contexts have been
+ * destroyed.
+ */
+void kbase_ctx_sched_term(struct kbase_device *kbdev);
+
+/* kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
+ *
+ * @kctx: The context to which to retain a reference
+ *
+ * Return: The address space that the context has been assigned to or
+ *         KBASEP_AS_NR_INVALID if no address space was available.
+ *
+ * This function should be called whenever an address space should be assigned
+ * to a context and programmed onto the MMU. It should typically be called
+ * when jobs are ready to be submitted to the GPU.
+ *
+ * It can be called as many times as necessary. The address space will be
+ * assigned to the context for as long as there is a reference to said context.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
+
+/* kbase_ctx_sched_retain_ctx_refcount
+ *
+ * @kctx: The context to which to retain a reference
+ *
+ * This function only retains a reference to the context. It must be called
+ * only when the context already has a reference.
+ *
+ * This is typically called inside an atomic session where we know the context
+ * is already scheduled in but want to take an extra reference to ensure that
+ * it doesn't get descheduled.
+ *
+ * The kbase_device::hwaccess_lock must be held whilst calling this function
+ */
+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx);
+
+/* kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context
+ *
+ * @kctx: The context from which to release a reference
+ *
+ * This function should be called whenever an address space could be unassigned
+ * from a context. When there are no more references to said context, the
+ * address space previously assigned to this context shall be reassigned to
+ * other contexts as needed.
+ *
+ * The kbase_device::hwaccess_lock must be held whilst calling this function
+ */
+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
+
+/* kbase_ctx_sched_remove_ctx - Unassign previously assigned address space
+ *
+ * @kctx: The context to be removed
+ *
+ * This function should be called when a context is being destroyed. The
+ * context must no longer have any reference. If it has been assigned an
+ * address space before then the AS will be unprogrammed.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);
+
+/* kbase_ctx_sched_restore_all_as - Reprogram all address spaces
+ *
+ * @kbdev: The device for which address spaces to be reprogrammed
+ *
+ * This function shall reprogram all address spaces previously assigned to
+ * contexts. It can be used after the GPU is reset.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CTX_SCHED_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100644
index 0000000..fb57ac2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100644
index 0000000..d7873c5
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100644
index 0000000..f29430d
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,499 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				 kbase_is_job_fault_event_pending(kbdev)))
+			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		kbase_backend_cacheclean(kbdev, event->katom);
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100644
index 0000000..a2bf898
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,96 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100644
index 0000000..c65b4df
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,306 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list)) {
+		kfree(data);
+		return NULL;
+	}
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = phys_to_page(as_phys_addr_t(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_zone_open(struct rb_root *rbtree,
+						struct debug_mem_data *mem_data)
+{
+	int ret = 0;
+	struct rb_node *p;
+	struct kbase_va_region *reg;
+	struct debug_mem_mapping *mapping;
+
+	for (p = rb_first(rbtree); p; p = rb_next(p)) {
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+out:
+	return ret;
+}
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUSR, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100644
index 0000000..20ab51a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100644
index 0000000..09415b3
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,1625 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_gpuprops_types.h>
+#include <protected_mode_switcher.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sizes.h>
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#include <linux/bus_logger.h>
+#endif
+
+
+#if defined(CONFIG_SYNC)
+#include <sync.h>
+#else
+#include "mali_kbase_fence_defs.h"
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#define MIDGARD_MMU_LEVEL(x) (x)
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(0)
+#else
+#define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(1)
+#endif
+
+#define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3)
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+/* Serialize atoms within a slot (ie only one atom per job slot) */
+#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0)
+/* Serialize atoms between slots (ie only one job slot running at any time) */
+#define KBASE_SERIALIZE_INTER_SLOT (1 << 1)
+/* Reset the GPU after each atom completion */
+#define KBASE_SERIALIZE_RESET (1 << 2)
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+
+#ifdef CONFIG_DEBUG_FS
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * struct kbase_io_access - holds information about 1 register access
+ *
+ * @addr: first bit indicates r/w (r=0, w=1)
+ * @value: value written or read
+ */
+struct kbase_io_access {
+	uintptr_t addr;
+	u32 value;
+};
+
+/**
+ * struct kbase_io_history - keeps track of all recent register accesses
+ *
+ * @enabled: true if register accesses are recorded, false otherwise
+ * @lock: spinlock protecting kbase_io_access array
+ * @count: number of registers read/written
+ * @size: number of elements in kbase_io_access array
+ * @buf: array of kbase_io_access
+ */
+struct kbase_io_history {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool enabled;
+#else
+	u32 enabled;
+#endif
+
+	spinlock_t lock;
+	size_t count;
+	u16 size;
+	struct kbase_io_access *buf;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom *)(dep->atom);
+}
+
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency.
+ * @param[in] a      The ATOM to be set as a dependency.
+ * @param     type   The ATOM dependency type to be set.
+ *
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency to be cleared.
+ *
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+enum kbase_atom_gpu_rb_state {
+	/* Atom is not currently present in slot ringbuffer */
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	/* Atom is in slot ringbuffer but is blocked on a previous atom */
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	/* Atom is in slot ringbuffer but is waiting for a previous protected
+	 * mode transition to complete */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
+	/* Atom is in slot ringbuffer but is waiting for proected mode
+	 * transition */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
+	/* Atom is in slot ringbuffer but is waiting for cores to become
+	 * available */
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	/* Atom is in slot ringbuffer but is blocked on affinity */
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	/* Atom is in slot ringbuffer and ready to run */
+	KBASE_ATOM_GPU_RB_READY,
+	/* Atom is in slot ringbuffer and has been submitted to the GPU */
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	/* Atom must be returned to JS as soon as it reaches the head of the
+	 * ringbuffer due to a previous failure */
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
+};
+
+enum kbase_atom_enter_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition into protected mode is required.
+	 *
+	 * NOTE: The integer value of this must
+	 *       match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
+	/* Wait for vinstr to suspend. */
+	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
+	/* Wait for the L2 to become idle in preparation for
+	 * the coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
+	/* End state;
+	 * Prepare coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
+};
+
+enum kbase_atom_exit_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition out of protected mode is required.
+	 *
+	 * NOTE: The integer value of this must
+	 *       match KBASE_ATOM_ENTER_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
+	/* Wait for the L2 to become idle in preparation
+	 * for the reset. */
+	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+	/* Issue the protected reset. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET,
+	/* End state;
+	 * Wait for the reset to complete. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+	/* List head used during job dispatch job_done processing - as
+	 * dependencies may not be entirely resolved at this point, we need to
+	 * use a separate list head. */
+	struct list_head jd_item;
+	/* true if atom's jd_item is currently on a list. Prevents atom being
+	 * processed twice. */
+	bool in_jd_list;
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	enum kbase_atom_coreref_state coreref_state;
+#if defined(CONFIG_SYNC)
+	/* Stores either an input or output fence, depending on soft-job type */
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+	struct {
+		/* Use the functions/API defined in mali_kbase_fence.h to
+		 * when working with this sub struct */
+#if defined(CONFIG_SYNC_FILE)
+		/* Input fence */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+		struct fence *fence_in;
+#else
+		struct dma_fence *fence_in;
+#endif
+#endif
+		/* This points to the dma-buf output fence for this atom. If
+		 * this is NULL then there is no fence for this atom and the
+		 * following fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+		struct fence *fence;
+#else
+		struct dma_fence *fence;
+#endif
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;	    /**< core requirements */
+	/** Job Slot to retry submitting to if submission from IRQ handler failed
+	 *
+	 * NOTE: see if this can be unified into the another member e.g. the event */
+	int retry_submit_on_slot;
+
+	u32 ticks;
+	/* JS atom priority with respect to other atoms on its kctx. */
+	int sched_priority;
+
+	int poking;		/* BASE_HW_ISSUE_8316 */
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+	int slot_nr;
+
+	u32 atom_flags;
+
+	/* Number of times this atom has been retried. Used by replay soft job.
+	 */
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	u64 need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	/* Pointer to atom that this atom has same-slot dependency on */
+	struct kbase_jd_atom *pre_dep;
+	/* Pointer to atom that has same-slot dependency on this atom */
+	struct kbase_jd_atom *post_dep;
+
+	/* Pointer to atom that this atom has cross-slot dependency on */
+	struct kbase_jd_atom *x_pre_dep;
+	/* Pointer to atom that has cross-slot dependency on this atom */
+	struct kbase_jd_atom *x_post_dep;
+
+	/* The GPU's flush count recorded at the time of submission, used for
+	 * the cache flush optimisation */
+	u32 flush_id;
+
+	struct kbase_jd_atom_backend backend;
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	/* List head used for three different purposes:
+	 *  1. Overflow list for JS ring buffers. If an atom is ready to run,
+	 *     but there is no room in the JS ring buffer, then the atom is put
+	 *     on the ring buffer's overflow list using this list node.
+	 *  2. List of waiting soft jobs.
+	 */
+	struct list_head queue;
+
+	/* Used to keep track of all JIT free/alloc jobs in submission order
+	 */
+	struct list_head jit_node;
+	bool jit_blocked;
+
+	/* If non-zero, this indicates that the atom will fail with the set
+	 * event_code when the atom is processed. */
+	enum base_jd_event_code will_fail_event_code;
+
+	/* Atoms will only ever be transitioning into, or out of
+	 * protected mode so we do not need two separate fields.
+	 */
+	union {
+		enum kbase_atom_enter_protected_state enter;
+		enum kbase_atom_exit_protected_state exit;
+	} protected_state;
+
+	struct rb_node runnable_tree_node;
+
+	/* 'Age' of atom relative to other atoms in the context. */
+	u32 age;
+};
+
+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	/** Tracks all job-dispatch jobs.  This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	u32 job_nr;
+
+	/** Waitq that reflects whether there are no jobs (including SW-only
+	 * dependency jobs). This is set when no jobs are present on the ctx,
+	 * and clear when there are jobs.
+	 *
+	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+	 * and SW-only dependency jobs.
+	 *
+	 * This waitq can be waited upon to find out when the context jobs are all
+	 * done/cancelled (including those that might've been blocked on
+	 * dependencies) - and so, whether it can be terminated. However, it should
+	 * only be terminated once it is not present in the run-pool (see
+	 * kbasep_js_kctx_info::ctx::is_scheduled).
+	 *
+	 * Since the waitq is only set under kbase_jd_context::lock,
+	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+	 * guarentee that the setter has completed its work on the kbase_context
+	 *
+	 * This must be updated atomically with:
+	 * - kbase_jd_context::job_nr */
+	wait_queue_head_t zero_jobs_wait;
+
+	/** Job Done workqueue. */
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+	int number;
+
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	bool protected_mode;
+	u32 fault_status;
+	u64 fault_addr;
+	u64 fault_extra_addr;
+
+	struct kbase_mmu_setup current_setup;
+
+	/* BASE_HW_ISSUE_8316  */
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	/** Protected by hwaccess_lock */
+	int poke_refcount;
+	/** Protected by hwaccess_lock */
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold hwaccess_lock when accessing this */
+	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - true indicates that the transition is ongoing
+	 * Expected to be protected by hwaccess_lock */
+	bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:     Kbase device where memory is used
+ * @cur_size:  Number of free pages currently in the pool (may exceed @max_size
+ *             in some corner cases)
+ * @max_size:  Maximum number of free pages in the pool
+ * @order:     order = 0 refers to a pool of 4 KB pages
+ *             order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
+ * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
+ *             and @page_list
+ * @page_list: List of free pages in the pool
+ * @reclaim:   Shrinker for kernel reclaim of free pages
+ * @next_pool: Pointer to next pool where pages can be allocated when this pool
+ *             is empty. Pages will spill over to the next pool when this pool
+ *             is full. Can be NULL if there is no next pool.
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	size_t		    order;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+};
+
+/**
+ * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP
+ *                            frequency, and real frequency and core mask
+ * @opp_freq:  Nominal OPP frequency
+ * @real_freq: Real GPU frequency
+ * @core_mask: Shader core mask
+ */
+struct kbase_devfreq_opp {
+	u64 opp_freq;
+	u64 real_freq;
+	u64 core_mask;
+};
+
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_context *kctx);
+	void (*get_as_setup)(struct kbase_context *kctx,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate, unsigned int level);
+	int (*pte_is_valid)(u64 pte, unsigned int level);
+	void (*entry_set_ate)(u64 *entry, struct tagged_addr phy,
+			unsigned long flags, unsigned int level);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+
+#define DEVNAME_SIZE	16
+
+struct kbase_device {
+	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+
+	struct clk *clock;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif	/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool mem_pool;
+	struct kbase_mem_pool lp_mem_pool;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+	/* The below variables (as_free and as_to_kctx) are managed by the
+	 * Context Scheduler. The kbasep_js_device_data::runpool_irq::lock must
+	 * be held whilst accessing these.
+	 */
+	u16 as_free; /* Bitpattern of free Address Spaces */
+	/* Mapping from active Address Spaces to kbase_context */
+	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
+
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	/** List of SW workarounds for HW issues */
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	/** List of features available */
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	/* Bitmaps of cores that are currently in use (running jobs).
+	 * These should be kept up to date by the job scheduler.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 *
+	 * kbase_pm_check_transitions_nolock() should be called when bits are
+	 * cleared to update the power management system and allow transitions to
+	 * occur. */
+	u64 shader_inuse_bitmap;
+
+	/* Refcount for cores in use */
+	u32 shader_inuse_cnt[64];
+
+	/* Bitmaps of cores the JS needs for jobs ready to run */
+	u64 shader_needed_bitmap;
+
+	/* Refcount for cores needed */
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	/* struct for keeping track of the disjoint information
+	 *
+	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
+	 * The count is the number of disjoint events that have occurred on the GPU
+	 */
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+	u32 l2_users_count;
+
+	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+	 * submitting new jobs to any cores that are not marked as available.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 */
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+	u64 stack_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
+	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+
+	/* Structure used for instrumentation and HW counters dumping */
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+	void *platform_context;
+
+	/* List of kbase_contexts created */
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_nominal_freq;
+	unsigned long current_voltage;
+	u64 current_core_mask;
+	struct kbase_devfreq_opp *opp_table;
+	int num_opps;
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	struct devfreq_cooling_device *devfreq_cooling;
+#else
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+	/* Current IPA model - true for configured model, false for fallback */
+	atomic_t ipa_use_configured_model;
+	struct {
+		/* Access to this struct must be with ipa.lock held */
+		struct mutex lock;
+		struct kbase_ipa_model *configured_model;
+		struct kbase_ipa_model *fallback_model;
+	} ipa;
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+	/*
+	 * Control for enabling job dump on failure, set when control debugfs
+	 * is opened.
+	 */
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	/* directory for debugfs entries */
+	struct dentry *mali_debugfs_directory;
+	/* Root directory for per context entry */
+	struct dentry *debugfs_ctx_directory;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* bit for each as, set if there is new data to report */
+	u64 debugfs_as_read_bitmap;
+#endif /* CONFIG_MALI_DEBUG */
+
+	/* failed job dump, used for separate debug process */
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
+	struct kbase_context *kctx_fault;
+
+#if !MALI_CUSTOMER_RELEASE
+	/* Per-device data for register dumping interface */
+	struct {
+		u16 reg_offset; /* Offset of a GPU_CONTROL register to be
+				   dumped upon request */
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	/* fbdump profiling controls set by gator */
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	/* Number of jobs that are run before a job is forced to fail and
+	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+	 * failures. */
+	int force_replay_limit;
+	/* Count of jobs between forced failures. Incremented on each job. A
+	 * job is forced to fail once this is greater than or equal to
+	 * force_replay_limit. */
+	int force_replay_count;
+	/* Core requirement for jobs to be failed and replayed. May be zero. */
+	base_jd_core_req force_replay_core_req;
+	/* true if force_replay_limit should be randomized. The random
+	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+	 */
+	bool force_replay_random;
+#endif
+
+	/* Total number of created contexts */
+	atomic_t ctx_num;
+
+#ifdef CONFIG_DEBUG_FS
+	/* Holds the most recent register accesses */
+	struct kbase_io_history io_history;
+#endif /* CONFIG_DEBUG_FS */
+
+	struct kbase_hwaccess_data hwaccess;
+
+	/* Count of page/bus faults waiting for workqueues to process */
+	atomic_t faults_pending;
+
+	/* true if GPU is powered off or power off operation is in progress */
+	bool poweroff_pending;
+
+
+	/* defaults for new context created for this device */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	size_t mem_pool_max_size_default;
+
+	/* current gpu coherency mode */
+	u32 current_gpu_coherency_mode;
+	/* system coherency mode  */
+	u32 system_coherency;
+	/* Flag to track when cci snoops have been enabled on the interface */
+	bool cci_snoop_enabled;
+
+	/* SMC function IDs to call into Trusted firmware to enable/disable
+	 * cache snooping. Value of 0 indicates that they are not used
+	 */
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	/* Protected mode operations */
+	struct protected_mode_ops *protected_ops;
+
+	/* Protected device attached to this kbase device */
+	struct protected_mode_device *protected_dev;
+
+	/*
+	 * true when GPU is put into protected mode
+	 */
+	bool protected_mode;
+
+	/*
+	 * true when GPU is transitioning into or out of protected mode
+	 */
+	bool protected_mode_transition;
+
+	/*
+	 * true if protected mode is supported
+	 */
+	bool protected_mode_support;
+
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_queue_head_t driver_inactive_wait;
+	bool driver_inactive;
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	/*
+	 * Bus logger integration.
+	 */
+	struct bus_logger_client *buslogger;
+#endif
+	/* Boolean indicating if an IRQ flush during reset is in progress. */
+	bool irq_reset_flush;
+
+	/* list of inited sub systems. Used during terminate/error recovery */
+	u32 inited_subsys;
+
+	spinlock_t hwaccess_lock;
+
+	/* Protects access to MMU operations */
+	struct mutex mmu_hw_mutex;
+
+	/* Current serialization mode. See KBASE_SERIALIZE_* for details */
+	u8 serialize_jobs;
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ *                 job slot.
+ * @x_dep_head:    Head item of the linked list of atoms blocked on cross-slot
+ *                 dependencies. Atoms on this list will be moved to the
+ *                 runnable_tree when the blocking atom completes.
+ *
+ * hwaccess_lock must be held when accessing this structure.
+ */
+struct jsctx_queue {
+	struct rb_root runnable_tree;
+	struct list_head x_dep_head;
+};
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+/**
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
+ * context, to disable use of implicit dma-buf fences. This is used to avoid
+ * potential synchronization deadlocks.
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum kbase_context_flags {
+	KCTX_COMPAT = 1U << 0,
+	KCTX_RUNNABLE_REF = 1U << 1,
+	KCTX_ACTIVE = 1U << 2,
+	KCTX_PULLED = 1U << 3,
+	KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+	KCTX_INFINITE_CACHE = 1U << 5,
+	KCTX_SUBMIT_DISABLED = 1U << 6,
+	KCTX_PRIVILEGED = 1U << 7,
+	KCTX_SCHEDULED = 1U << 8,
+	KCTX_DYING = 1U << 9,
+	KCTX_NO_IMPLICIT_SYNC = 1U << 10,
+};
+
+struct kbase_sub_alloc {
+	struct list_head link;
+	struct page *page;
+	DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
+};
+
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	u32 id; /* System wide unique id */
+	unsigned long api_version;
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	atomic_t flags;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	u64 *mmu_teardown_pages;
+
+	struct tagged_addr aliasing_sink_page;
+
+	struct mutex            mem_partials_lock;
+	struct list_head        mem_partials;
+
+	struct mutex            mmu_lock;
+	struct mutex            reg_lock; /* To be converted to a rwlock? */
+	struct rb_root reg_rbtree_same; /* RB tree of GPU (live) regions,
+					 * SAME_VA zone */
+	struct rb_root reg_rbtree_exec; /* RB tree of GPU (live) regions,
+					 * EXEC zone */
+	struct rb_root reg_rbtree_custom; /* RB tree of GPU (live) regions,
+					 * CUSTOM_VA zone */
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_pool mem_pool;
+	struct kbase_mem_pool lp_mem_pool;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+	/** This is effectively part of the Run Pool, because it only has a valid
+	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+	 *
+	 * The hwaccess_lock must be held whilst accessing this.
+	 *
+	 * If the context relating to this as_nr is required, you must use
+	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+	 * whilst you're using it. Alternatively, just hold the hwaccess_lock
+	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
+	 * you can take whilst doing this) */
+	int as_nr;
+
+	/* Keeps track of the number of users of this context. A user can be a
+	 * job that is available for execution, instrumentation needing to 'pin'
+	 * a context for counter collection, etc. If the refcount reaches 0 then
+	 * this context is considered inactive and the previously programmed
+	 * AS might be cleared at any point.
+	 */
+	atomic_t refcount;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct *process_mm;
+	/* End of the SAME_VA zone */
+	u64 same_va_end;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	/* Content of mem_profile file */
+	char *mem_profile_data;
+	/* Size of @c mem_profile_data */
+	size_t mem_profile_size;
+	/* Mutex guarding memory profile state */
+	struct mutex mem_profile_lock;
+	/* Memory profile directory under debugfs */
+	struct dentry *kctx_dentry;
+
+	/* for job fault debug */
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	/* This list will keep the following atoms during the dump
+	 * in the same context
+	 */
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	/* Number of atoms currently pulled from this context */
+	atomic_t atoms_pulled;
+	/* Number of atoms currently pulled from this context, per slot */
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	/* Number of atoms currently pulled from this context, per slot and
+	 * priority. Hold hwaccess_lock when accessing */
+	int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
+			KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/* true if slot is blocked on the given priority. This will be set on a
+	 * soft-stop */
+	bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/* Bitmask of slots that can be pulled from */
+	u32 slots_pullable;
+
+	/* Backend specific data */
+	struct kbase_context_backend backend;
+
+	/* Work structure used for deferred ASID assignment */
+	struct work_struct work;
+
+	/* Only one userspace vinstr client per kbase context */
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+
+	/* List of completed jobs waiting for events to be posted */
+	struct list_head completed_jobs;
+	/* Number of work items currently pending on job_done_wq */
+	atomic_t work_count;
+
+	/* Waiting soft-jobs will fail when this timer expires */
+	struct timer_list soft_job_timeout;
+
+	/* JIT allocation management */
+	struct kbase_va_region *jit_alloc[256];
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_evict_lock;
+	struct work_struct jit_work;
+
+	/* A list of the JIT soft-jobs in submission order
+	 * (protected by kbase_jd_context.lock)
+	 */
+	struct list_head jit_atoms_head;
+	/* A list of pending JIT alloc soft-jobs (using the 'queue' list_head)
+	 * (protected by kbase_jd_context.lock)
+	 */
+	struct list_head jit_pending_alloc;
+
+	/* External sticky resource management */
+	struct list_head ext_res_meta_head;
+
+	/* Used to record that a drain was requested from atomic context */
+	atomic_t drain_pending;
+
+	/* Current age count, used to determine age for newly submitted atoms */
+	u32 age_count;
+};
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100644
index 0000000..717795b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,673 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+	mutex_init(&kbdev->mmu_hw_mutex);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+		kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+	else
+		kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+
+#ifdef CONFIG_MALI_DEBUG
+	init_waitqueue_head(&kbdev->driver_inactive_wait);
+#endif /* CONFIG_MALI_DEBUG */
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100644
index 0000000..f70bccc
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
new file mode 100644
index 0000000..9197743
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -0,0 +1,449 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+static void
+kbase_dma_fence_work(struct work_struct *pwork);
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+static void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_queue_work() - Queue work to handle @katom
+ * @katom: Pointer to atom for which to queue work
+ *
+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
+ * submit the atom.
+ */
+static void
+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	bool ret;
+
+	INIT_WORK(&katom->work, kbase_dma_fence_work);
+	ret = queue_work(kctx->dma_fence.wq, &katom->work);
+	/* Warn if work was already queued, that should not happen. */
+	WARN_ON(!ret);
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_fence_free_callbacks(katom);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (kbase_fence_dep_count_read(katom) != 0)
+		goto out;
+
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_fence_free_callbacks(katom);
+	/*
+	 * Queue atom on GPU, unless it has already completed due to a failing
+	 * dependency. Run jd_done_nolock() on the katom if it is completed.
+	 */
+	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
+		jd_done_nolock(katom, NULL);
+	else
+		kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+#else
+kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+#endif
+{
+	struct kbase_fence_cb *kcb = container_of(cb,
+				struct kbase_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+
+	if (kbase_fence_dep_count_dec_and_test(katom))
+		kbase_dma_fence_queue_work(katom);
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct reservation_object *resv,
+					 bool exclusive)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+#else
+	struct dma_fence *excl_fence = NULL;
+	struct dma_fence **shared_fences = NULL;
+#endif
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_fence_add_callback(katom,
+						excl_fence,
+						kbase_dma_fence_cb);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		dma_fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_fence_add_callback(katom,
+							shared_fences[i],
+							kbase_dma_fence_cb);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		dma_fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	if (err) {
+		/*
+		 * On error, cancel and clean up all callbacks that was set up
+		 * before the error.
+		 */
+		kbase_fence_free_callbacks(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	fence = kbase_fence_out_new(katom);
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	kbase_fence_dep_count_set(katom, 1);
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		kbase_fence_dep_count_set(katom, -1);
+		kbase_fence_out_remove(katom);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct reservation_object *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = reservation_object_reserve_shared(obj);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_shared_fence(obj, fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_excl_fence(obj, fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (kbase_fence_dep_count_dec_and_test(katom)) {
+			kbase_fence_dep_count_set(katom, -1);
+			kbase_fence_free_callbacks(katom);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	} else {
+		/* There was an error, cancel callbacks, set dep_count to -1 to
+		 * indicate that the atom has been handled (the caller will
+		 * kill it for us), signal the fence, free callbacks and the
+		 * fence.
+		 */
+		kbase_fence_free_callbacks(katom);
+		kbase_fence_dep_count_set(katom, -1);
+		kbase_dma_fence_signal(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct list_head *list = &kctx->dma_fence.waiting_resource;
+
+	while (!list_empty(list)) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_first_entry(list, struct kbase_jd_atom, queue);
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	if (kbase_fence_free_callbacks(katom))
+		kbase_dma_fence_queue_work(katom);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	/* Signal the atom's fence. */
+	dma_fence_signal(katom->dma_fence.fence);
+
+	kbase_fence_out_remove(katom);
+
+	kbase_fence_free_callbacks(katom);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
new file mode 100644
index 0000000..c9ab403
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/list.h>
+#include <linux/reservation.h>
+#include <mali_kbase_fence.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct reservation_object **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100644
index 0000000..1881486
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx);
+	KBASE_TLSTREAM_TL_DEL_ATOM(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED);
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c
new file mode 100644
index 0000000..17b5621
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_fence_defs.h>
+#include <mali_kbase_fence.h>
+#include <mali_kbase.h>
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_fence_lock);
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_get_driver_name(struct fence *fence)
+#else
+kbase_fence_get_driver_name(struct dma_fence *fence)
+#endif
+{
+	return kbase_drv_name;
+}
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_get_timeline_name(struct fence *fence)
+#else
+kbase_fence_get_timeline_name(struct dma_fence *fence)
+#endif
+{
+	return kbase_timeline_name;
+}
+
+static bool
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_enable_signaling(struct fence *fence)
+#else
+kbase_fence_enable_signaling(struct dma_fence *fence)
+#endif
+{
+	return true;
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_fence_value_str(struct fence *fence, char *str, int size)
+#else
+kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
+#endif
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+const struct fence_ops kbase_fence_ops = {
+	.wait = fence_default_wait,
+#else
+const struct dma_fence_ops kbase_fence_ops = {
+	.wait = dma_fence_default_wait,
+#endif
+	.get_driver_name = kbase_fence_get_driver_name,
+	.get_timeline_name = kbase_fence_get_timeline_name,
+	.enable_signaling = kbase_fence_enable_signaling,
+	.fence_value_str = kbase_fence_fence_value_str
+};
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+struct fence *
+kbase_fence_out_new(struct kbase_jd_atom *katom)
+#else
+struct dma_fence *
+kbase_fence_out_new(struct kbase_jd_atom *katom)
+#endif
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	WARN_ON(katom->dma_fence.fence);
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	dma_fence_init(fence,
+		       &kbase_fence_ops,
+		       &kbase_fence_lock,
+		       katom->dma_fence.context,
+		       atomic_inc_return(&katom->dma_fence.seqno));
+
+	katom->dma_fence.fence = fence;
+
+	return fence;
+}
+
+bool
+kbase_fence_free_callbacks(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_cb *cb, *tmp;
+	bool res = false;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			int ret;
+
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			ret = atomic_dec_return(&katom->dma_fence.dep_count);
+
+			if (unlikely(ret == 0))
+				res = true;
+		}
+
+		/*
+		 * Release the reference taken in
+		 * kbase_fence_add_callback().
+		 */
+		dma_fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+
+	return res;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+int
+kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			 struct fence *fence,
+			 fence_func_t callback)
+#else
+int
+kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			 struct dma_fence *fence,
+			 dma_fence_func_t callback)
+#endif
+{
+	int err = 0;
+	struct kbase_fence_cb *kbase_fence_cb;
+
+	if (!fence)
+		return -EINVAL;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+
+	err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb,
+				     callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, get the completion result */
+		err = dma_fence_get_status(fence);
+
+		/* remap success completion to err code */
+		if (err == 1)
+			err = 0;
+
+		kfree(kbase_fence_cb);
+	} else if (err) {
+		kfree(kbase_fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in kbase_fence_free_callbacks().
+		 */
+		dma_fence_get(fence);
+		atomic_inc(&katom->dma_fence.dep_count);
+		/* Add callback to katom's list of callbacks */
+		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
+	}
+
+	return err;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h
new file mode 100644
index 0000000..d187e50
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_FENCE_H_
+#define _KBASE_FENCE_H_
+
+/*
+ * mali_kbase_fence.[hc] has common fence code used by both
+ * - CONFIG_MALI_DMA_FENCE - implicit DMA fences
+ * - CONFIG_SYNC_FILE      - explicit fences beginning with 4.9 kernel
+ */
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/list.h>
+#include "mali_kbase_fence_defs.h"
+#include "mali_kbase.h"
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+extern const struct fence_ops kbase_fence_ops;
+#else
+extern const struct dma_fence_ops kbase_fence_ops;
+#endif
+
+/**
+* struct kbase_fence_cb - Mali dma-fence callback data struct
+* @fence_cb: Callback function
+* @katom:    Pointer to katom that is waiting on this callback
+* @fence:    Pointer to the fence object on which this callback is waiting
+* @node:     List head for linking this callback to the katom
+*/
+struct kbase_fence_cb {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence_cb fence_cb;
+	struct fence *fence;
+#else
+	struct dma_fence_cb fence_cb;
+	struct dma_fence *fence;
+#endif
+	struct kbase_jd_atom *katom;
+	struct list_head node;
+};
+
+/**
+ * kbase_fence_out_new() - Creates a new output fence and puts it on the atom
+ * @katom: Atom to create an output fence for
+ *
+ * return: A new fence object on success, NULL on failure.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
+#else
+struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
+#endif
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_fence_in_set() - Assign input fence to atom
+ * @katom: Atom to assign input fence to
+ * @fence: Input fence to assign to atom
+ *
+ * This function will take ownership of one fence reference!
+ */
+#define kbase_fence_fence_in_set(katom, fence) \
+	do { \
+		WARN_ON((katom)->dma_fence.fence_in); \
+		(katom)->dma_fence.fence_in = fence; \
+	} while (0)
+#endif
+
+/**
+ * kbase_fence_out_remove() - Removes the output fence from atom
+ * @katom: Atom to remove output fence for
+ *
+ * This will also release the reference to this fence which the atom keeps
+ */
+static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->dma_fence.fence) {
+		dma_fence_put(katom->dma_fence.fence);
+		katom->dma_fence.fence = NULL;
+	}
+}
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_out_remove() - Removes the input fence from atom
+ * @katom: Atom to remove input fence for
+ *
+ * This will also release the reference to this fence which the atom keeps
+ */
+static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->dma_fence.fence_in) {
+		dma_fence_put(katom->dma_fence.fence_in);
+		katom->dma_fence.fence_in = NULL;
+	}
+}
+#endif
+
+/**
+ * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us
+ * @katom: Atom to check output fence for
+ *
+ * Return: true if fence exists and is valid, otherwise false
+ */
+static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom)
+{
+	return katom->dma_fence.fence &&
+				katom->dma_fence.fence->ops == &kbase_fence_ops;
+}
+
+/**
+ * kbase_fence_out_signal() - Signal output fence of atom
+ * @katom: Atom to signal output fence for
+ * @status: Status to signal with (0 for success, < 0 for error)
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
+					 int status)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68))
+	katom->dma_fence.fence->error = status;
+#else
+	katom->dma_fence.fence->status = status;
+#endif
+	return dma_fence_signal(katom->dma_fence.fence);
+}
+
+/**
+ * kbase_fence_add_callback() - Add callback on @fence to block @katom
+ * @katom: Pointer to katom that will be blocked by @fence
+ * @fence: Pointer to fence on which to set up the callback
+ * @callback: Pointer to function to be called when fence is signaled
+ *
+ * Caller needs to hold a reference to @fence when calling this function, and
+ * the caller is responsible for releasing that reference.  An additional
+ * reference to @fence will be taken when the callback was successfully set up
+ * and @fence needs to be kept valid until the callback has been called and
+ * cleanup have been done.
+ *
+ * Return: 0 on success: fence was either already signaled, or callback was
+ * set up. Negative error code is returned on error.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+int kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback);
+#else
+int kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct dma_fence *fence,
+			     dma_fence_func_t callback);
+#endif
+
+/**
+ * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value
+ * @katom: Atom to set dep_count for
+ * @val: value to set dep_count to
+ *
+ * The dep_count is available to the users of this module so that they can
+ * synchronize completion of the wait with cancellation and adding of more
+ * callbacks. For instance, a user could do the following:
+ *
+ * dep_count set to 1
+ * callback #1 added, dep_count is increased to 2
+ *                             callback #1 happens, dep_count decremented to 1
+ *                             since dep_count > 0, no completion is done
+ * callback #2 is added, dep_count is increased to 2
+ * dep_count decremented to 1
+ *                             callback #2 happens, dep_count decremented to 0
+ *                             since dep_count now is zero, completion executes
+ *
+ * The dep_count can also be used to make sure that the completion only
+ * executes once. This is typically done by setting dep_count to -1 for the
+ * thread that takes on this responsibility.
+ */
+static inline void
+kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val)
+{
+	atomic_set(&katom->dma_fence.dep_count, val);
+}
+
+/**
+ * kbase_fence_dep_count_dec_and_test() - Decrements dep_count
+ * @katom: Atom to decrement dep_count for
+ *
+ * See @kbase_fence_dep_count_set for general description about dep_count
+ *
+ * Return: true if value was decremented to zero, otherwise false
+ */
+static inline bool
+kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom)
+{
+	return atomic_dec_and_test(&katom->dma_fence.dep_count);
+}
+
+/**
+ * kbase_fence_dep_count_read() - Returns the current dep_count value
+ * @katom: Pointer to katom
+ *
+ * See @kbase_fence_dep_count_set for general description about dep_count
+ *
+ * Return: The current dep_count value
+ */
+static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom)
+{
+	return atomic_read(&katom->dma_fence.dep_count);
+}
+
+/**
+ * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ *
+ * Return: true if dep_count reached 0, otherwise false.
+ */
+bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom);
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_in_get() - Retrieve input fence for atom.
+ * @katom: Atom to get input fence from
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no input fence for atom
+ */
+#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in)
+#endif
+
+/**
+ * kbase_fence_out_get() - Retrieve output fence for atom.
+ * @katom: Atom to get output fence from
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no output fence for atom
+ */
+#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence)
+
+/**
+ * kbase_fence_put() - Releases a reference to a fence
+ * @fence: Fence to release reference for.
+ */
+#define kbase_fence_put(fence) dma_fence_put(fence)
+
+
+#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */
+
+#endif /* _KBASE_FENCE_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
new file mode 100644
index 0000000..32243a9
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
@@ -0,0 +1,62 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_FENCE_DEFS_H_
+#define _KBASE_FENCE_DEFS_H_
+
+/*
+ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*)
+ * This file hides the compatibility issues with this for the rest the driver
+ */
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+
+#include <linux/fence.h>
+
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 68))
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#else
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#endif
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
+	(a)->status ?: 1 \
+	: 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */
+
+#endif /* _KBASE_FENCE_DEFS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100644
index 0000000..ce65b55
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100644
index 0000000..fac4d94
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,338 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
+};
+
+static void dump_worker(struct work_struct *work);
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+	uint32_t product_id;
+	uint32_t count;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	product_id = kbdev->gpu_props.props.core_props.product_id;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			hardware_counters = hardware_counters_mali_tHEx;
+			count = ARRAY_SIZE(hardware_counters_mali_tHEx);
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			hardware_counters = hardware_counters_mali_tSIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tSIx);
+			break;
+		case GPU_ID2_PRODUCT_TNOX:
+			hardware_counters = hardware_counters_mali_tNOx;
+			count = ARRAY_SIZE(hardware_counters_mali_tNOx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	} else {
+		switch (product_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	*total_counters = count;
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	uint32_t dump_size = 0, i = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
+		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+	in_out_info->size = dump_size;
+
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
+
+	return hand;
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+
+	if (opaque_handles && success) {
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		schedule_work(&opaque_handles->dump_work);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100644
index 0000000..ef9ac0f
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100644
index 0000000..6fe6530
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2169 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MISS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MISS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MISS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MISS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MISS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MISS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MISS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#include "mali_kbase_gator_hwcnt_names_tmix.h"
+
+#include "mali_kbase_gator_hwcnt_names_thex.h"
+
+#include "mali_kbase_gator_hwcnt_names_tsix.h"
+
+#include "mali_kbase_gator_hwcnt_names_tnox.h"
+
+#include "mali_kbase_gator_hwcnt_names_tkax.h"
+
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
new file mode 100644
index 0000000..15fd4ef
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+
+static const char * const hardware_counters_mali_tHEx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MESSAGES_SENT",
+	"THEx_MESSAGES_RECEIVED",
+	"THEx_GPU_ACTIVE",
+	"THEx_IRQ_ACTIVE",
+	"THEx_JS0_JOBS",
+	"THEx_JS0_TASKS",
+	"THEx_JS0_ACTIVE",
+	"",
+	"THEx_JS0_WAIT_READ",
+	"THEx_JS0_WAIT_ISSUE",
+	"THEx_JS0_WAIT_DEPEND",
+	"THEx_JS0_WAIT_FINISH",
+	"THEx_JS1_JOBS",
+	"THEx_JS1_TASKS",
+	"THEx_JS1_ACTIVE",
+	"",
+	"THEx_JS1_WAIT_READ",
+	"THEx_JS1_WAIT_ISSUE",
+	"THEx_JS1_WAIT_DEPEND",
+	"THEx_JS1_WAIT_FINISH",
+	"THEx_JS2_JOBS",
+	"THEx_JS2_TASKS",
+	"THEx_JS2_ACTIVE",
+	"",
+	"THEx_JS2_WAIT_READ",
+	"THEx_JS2_WAIT_ISSUE",
+	"THEx_JS2_WAIT_DEPEND",
+	"THEx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"THEx_TILER_ACTIVE",
+	"THEx_JOBS_PROCESSED",
+	"THEx_TRIANGLES",
+	"THEx_LINES",
+	"THEx_POINTS",
+	"THEx_FRONT_FACING",
+	"THEx_BACK_FACING",
+	"THEx_PRIM_VISIBLE",
+	"THEx_PRIM_CULLED",
+	"THEx_PRIM_CLIPPED",
+	"THEx_PRIM_SAT_CULLED",
+	"THEx_BIN_ALLOC_INIT",
+	"THEx_BIN_ALLOC_OVERFLOW",
+	"THEx_BUS_READ",
+	"",
+	"THEx_BUS_WRITE",
+	"THEx_LOADING_DESC",
+	"THEx_IDVS_POS_SHAD_REQ",
+	"THEx_IDVS_POS_SHAD_WAIT",
+	"THEx_IDVS_POS_SHAD_STALL",
+	"THEx_IDVS_POS_FIFO_FULL",
+	"THEx_PREFETCH_STALL",
+	"THEx_VCACHE_HIT",
+	"THEx_VCACHE_MISS",
+	"THEx_VCACHE_LINE_WAIT",
+	"THEx_VFETCH_POS_READ_WAIT",
+	"THEx_VFETCH_VERTEX_WAIT",
+	"THEx_VFETCH_STALL",
+	"THEx_PRIMASSY_STALL",
+	"THEx_BBOX_GEN_STALL",
+	"THEx_IDVS_VBU_HIT",
+	"THEx_IDVS_VBU_MISS",
+	"THEx_IDVS_VBU_LINE_DEALLOCATE",
+	"THEx_IDVS_VAR_SHAD_REQ",
+	"THEx_IDVS_VAR_SHAD_STALL",
+	"THEx_BINNER_STALL",
+	"THEx_ITER_STALL",
+	"THEx_COMPRESS_MISS",
+	"THEx_COMPRESS_STALL",
+	"THEx_PCACHE_HIT",
+	"THEx_PCACHE_MISS",
+	"THEx_PCACHE_MISS_STALL",
+	"THEx_PCACHE_EVICT_STALL",
+	"THEx_PMGR_PTR_WR_STALL",
+	"THEx_PMGR_PTR_RD_STALL",
+	"THEx_PMGR_CMD_WR_STALL",
+	"THEx_WRBUF_ACTIVE",
+	"THEx_WRBUF_HIT",
+	"THEx_WRBUF_MISS",
+	"THEx_WRBUF_NO_FREE_LINE_STALL",
+	"THEx_WRBUF_NO_AXI_ID_STALL",
+	"THEx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"THEx_UTLB_TRANS",
+	"THEx_UTLB_TRANS_HIT",
+	"THEx_UTLB_TRANS_STALL",
+	"THEx_UTLB_TRANS_MISS_DELAY",
+	"THEx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"THEx_FRAG_ACTIVE",
+	"THEx_FRAG_PRIMITIVES",
+	"THEx_FRAG_PRIM_RAST",
+	"THEx_FRAG_FPK_ACTIVE",
+	"THEx_FRAG_STARVING",
+	"THEx_FRAG_WARPS",
+	"THEx_FRAG_PARTIAL_WARPS",
+	"THEx_FRAG_QUADS_RAST",
+	"THEx_FRAG_QUADS_EZS_TEST",
+	"THEx_FRAG_QUADS_EZS_UPDATE",
+	"THEx_FRAG_QUADS_EZS_KILL",
+	"THEx_FRAG_LZS_TEST",
+	"THEx_FRAG_LZS_KILL",
+	"",
+	"THEx_FRAG_PTILES",
+	"THEx_FRAG_TRANS_ELIM",
+	"THEx_QUAD_FPK_KILLER",
+	"",
+	"THEx_COMPUTE_ACTIVE",
+	"THEx_COMPUTE_TASKS",
+	"THEx_COMPUTE_WARPS",
+	"THEx_COMPUTE_STARVING",
+	"THEx_EXEC_CORE_ACTIVE",
+	"THEx_EXEC_ACTIVE",
+	"THEx_EXEC_INSTR_COUNT",
+	"THEx_EXEC_INSTR_DIVERGED",
+	"THEx_EXEC_INSTR_STARVING",
+	"THEx_ARITH_INSTR_SINGLE_FMA",
+	"THEx_ARITH_INSTR_DOUBLE",
+	"THEx_ARITH_INSTR_MSG",
+	"THEx_ARITH_INSTR_MSG_ONLY",
+	"THEx_TEX_INSTR",
+	"THEx_TEX_INSTR_MIPMAP",
+	"THEx_TEX_INSTR_COMPRESSED",
+	"THEx_TEX_INSTR_3D",
+	"THEx_TEX_INSTR_TRILINEAR",
+	"THEx_TEX_COORD_ISSUE",
+	"THEx_TEX_COORD_STALL",
+	"THEx_TEX_STARVE_CACHE",
+	"THEx_TEX_STARVE_FILTER",
+	"THEx_LS_MEM_READ_FULL",
+	"THEx_LS_MEM_READ_SHORT",
+	"THEx_LS_MEM_WRITE_FULL",
+	"THEx_LS_MEM_WRITE_SHORT",
+	"THEx_LS_MEM_ATOMIC",
+	"THEx_VARY_INSTR",
+	"THEx_VARY_SLOT_32",
+	"THEx_VARY_SLOT_16",
+	"THEx_ATTR_INSTR",
+	"THEx_ARITH_INSTR_FP_MUL",
+	"THEx_BEATS_RD_FTC",
+	"THEx_BEATS_RD_FTC_EXT",
+	"THEx_BEATS_RD_LSC",
+	"THEx_BEATS_RD_LSC_EXT",
+	"THEx_BEATS_RD_TEX",
+	"THEx_BEATS_RD_TEX_EXT",
+	"THEx_BEATS_RD_OTHER",
+	"THEx_BEATS_WR_LSC",
+	"THEx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"THEx_L2_RD_MSG_IN",
+	"THEx_L2_RD_MSG_IN_STALL",
+	"THEx_L2_WR_MSG_IN",
+	"THEx_L2_WR_MSG_IN_STALL",
+	"THEx_L2_SNP_MSG_IN",
+	"THEx_L2_SNP_MSG_IN_STALL",
+	"THEx_L2_RD_MSG_OUT",
+	"THEx_L2_RD_MSG_OUT_STALL",
+	"THEx_L2_WR_MSG_OUT",
+	"THEx_L2_ANY_LOOKUP",
+	"THEx_L2_READ_LOOKUP",
+	"THEx_L2_WRITE_LOOKUP",
+	"THEx_L2_EXT_SNOOP_LOOKUP",
+	"THEx_L2_EXT_READ",
+	"THEx_L2_EXT_READ_NOSNP",
+	"THEx_L2_EXT_READ_UNIQUE",
+	"THEx_L2_EXT_READ_BEATS",
+	"THEx_L2_EXT_AR_STALL",
+	"THEx_L2_EXT_AR_CNT_Q1",
+	"THEx_L2_EXT_AR_CNT_Q2",
+	"THEx_L2_EXT_AR_CNT_Q3",
+	"THEx_L2_EXT_RRESP_0_127",
+	"THEx_L2_EXT_RRESP_128_191",
+	"THEx_L2_EXT_RRESP_192_255",
+	"THEx_L2_EXT_RRESP_256_319",
+	"THEx_L2_EXT_RRESP_320_383",
+	"THEx_L2_EXT_WRITE",
+	"THEx_L2_EXT_WRITE_NOSNP_FULL",
+	"THEx_L2_EXT_WRITE_NOSNP_PTL",
+	"THEx_L2_EXT_WRITE_SNP_FULL",
+	"THEx_L2_EXT_WRITE_SNP_PTL",
+	"THEx_L2_EXT_WRITE_BEATS",
+	"THEx_L2_EXT_W_STALL",
+	"THEx_L2_EXT_AW_CNT_Q1",
+	"THEx_L2_EXT_AW_CNT_Q2",
+	"THEx_L2_EXT_AW_CNT_Q3",
+	"THEx_L2_EXT_SNOOP",
+	"THEx_L2_EXT_SNOOP_STALL",
+	"THEx_L2_EXT_SNOOP_RESP_CLEAN",
+	"THEx_L2_EXT_SNOOP_RESP_DATA",
+	"THEx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
new file mode 100644
index 0000000..a131d45
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TKAX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TKAX_H_
+
+static const char * const hardware_counters_mali_tKAx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_MESSAGES_SENT",
+	"TKAx_MESSAGES_RECEIVED",
+	"TKAx_GPU_ACTIVE",
+	"TKAx_IRQ_ACTIVE",
+	"TKAx_JS0_JOBS",
+	"TKAx_JS0_TASKS",
+	"TKAx_JS0_ACTIVE",
+	"",
+	"TKAx_JS0_WAIT_READ",
+	"TKAx_JS0_WAIT_ISSUE",
+	"TKAx_JS0_WAIT_DEPEND",
+	"TKAx_JS0_WAIT_FINISH",
+	"TKAx_JS1_JOBS",
+	"TKAx_JS1_TASKS",
+	"TKAx_JS1_ACTIVE",
+	"",
+	"TKAx_JS1_WAIT_READ",
+	"TKAx_JS1_WAIT_ISSUE",
+	"TKAx_JS1_WAIT_DEPEND",
+	"TKAx_JS1_WAIT_FINISH",
+	"TKAx_JS2_JOBS",
+	"TKAx_JS2_TASKS",
+	"TKAx_JS2_ACTIVE",
+	"",
+	"TKAx_JS2_WAIT_READ",
+	"TKAx_JS2_WAIT_ISSUE",
+	"TKAx_JS2_WAIT_DEPEND",
+	"TKAx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_TILER_ACTIVE",
+	"TKAx_JOBS_PROCESSED",
+	"TKAx_TRIANGLES",
+	"TKAx_LINES",
+	"TKAx_POINTS",
+	"TKAx_FRONT_FACING",
+	"TKAx_BACK_FACING",
+	"TKAx_PRIM_VISIBLE",
+	"TKAx_PRIM_CULLED",
+	"TKAx_PRIM_CLIPPED",
+	"TKAx_PRIM_SAT_CULLED",
+	"TKAx_BIN_ALLOC_INIT",
+	"TKAx_BIN_ALLOC_OVERFLOW",
+	"TKAx_BUS_READ",
+	"",
+	"TKAx_BUS_WRITE",
+	"TKAx_LOADING_DESC",
+	"TKAx_IDVS_POS_SHAD_REQ",
+	"TKAx_IDVS_POS_SHAD_WAIT",
+	"TKAx_IDVS_POS_SHAD_STALL",
+	"TKAx_IDVS_POS_FIFO_FULL",
+	"TKAx_PREFETCH_STALL",
+	"TKAx_VCACHE_HIT",
+	"TKAx_VCACHE_MISS",
+	"TKAx_VCACHE_LINE_WAIT",
+	"TKAx_VFETCH_POS_READ_WAIT",
+	"TKAx_VFETCH_VERTEX_WAIT",
+	"TKAx_VFETCH_STALL",
+	"TKAx_PRIMASSY_STALL",
+	"TKAx_BBOX_GEN_STALL",
+	"TKAx_IDVS_VBU_HIT",
+	"TKAx_IDVS_VBU_MISS",
+	"TKAx_IDVS_VBU_LINE_DEALLOCATE",
+	"TKAx_IDVS_VAR_SHAD_REQ",
+	"TKAx_IDVS_VAR_SHAD_STALL",
+	"TKAx_BINNER_STALL",
+	"TKAx_ITER_STALL",
+	"TKAx_COMPRESS_MISS",
+	"TKAx_COMPRESS_STALL",
+	"TKAx_PCACHE_HIT",
+	"TKAx_PCACHE_MISS",
+	"TKAx_PCACHE_MISS_STALL",
+	"TKAx_PCACHE_EVICT_STALL",
+	"TKAx_PMGR_PTR_WR_STALL",
+	"TKAx_PMGR_PTR_RD_STALL",
+	"TKAx_PMGR_CMD_WR_STALL",
+	"TKAx_WRBUF_ACTIVE",
+	"TKAx_WRBUF_HIT",
+	"TKAx_WRBUF_MISS",
+	"TKAx_WRBUF_NO_FREE_LINE_STALL",
+	"TKAx_WRBUF_NO_AXI_ID_STALL",
+	"TKAx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TKAx_UTLB_TRANS",
+	"TKAx_UTLB_TRANS_HIT",
+	"TKAx_UTLB_TRANS_STALL",
+	"TKAx_UTLB_TRANS_MISS_DELAY",
+	"TKAx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_FRAG_ACTIVE",
+	"TKAx_FRAG_PRIMITIVES",
+	"TKAx_FRAG_PRIM_RAST",
+	"TKAx_FRAG_FPK_ACTIVE",
+	"TKAx_FRAG_STARVING",
+	"TKAx_FRAG_WARPS",
+	"TKAx_FRAG_PARTIAL_WARPS",
+	"TKAx_FRAG_QUADS_RAST",
+	"TKAx_FRAG_QUADS_EZS_TEST",
+	"TKAx_FRAG_QUADS_EZS_UPDATE",
+	"TKAx_FRAG_QUADS_EZS_KILL",
+	"TKAx_FRAG_LZS_TEST",
+	"TKAx_FRAG_LZS_KILL",
+	"",
+	"TKAx_FRAG_PTILES",
+	"TKAx_FRAG_TRANS_ELIM",
+	"TKAx_QUAD_FPK_KILLER",
+	"",
+	"TKAx_COMPUTE_ACTIVE",
+	"TKAx_COMPUTE_TASKS",
+	"TKAx_COMPUTE_WARPS",
+	"TKAx_COMPUTE_STARVING",
+	"TKAx_EXEC_CORE_ACTIVE",
+	"TKAx_EXEC_ACTIVE",
+	"TKAx_EXEC_INSTR_COUNT",
+	"TKAx_EXEC_INSTR_DIVERGED",
+	"TKAx_EXEC_INSTR_STARVING",
+	"TKAx_ARITH_INSTR_SINGLE_FMA",
+	"TKAx_ARITH_INSTR_DOUBLE",
+	"TKAx_ARITH_INSTR_MSG",
+	"TKAx_ARITH_INSTR_MSG_ONLY",
+	"TKAx_TEX_MSGI_NUM_QUADS",
+	"TKAx_TEX_DFCH_NUM_PASSES",
+	"TKAx_TEX_DFCH_NUM_PASSES_MISS",
+	"TKAx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TKAx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TKAx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TKAx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TKAx_TEX_TFCH_NUM_OPERATIONS",
+	"TKAx_TEX_FILT_NUM_OPERATIONS",
+	"TKAx_LS_MEM_READ_FULL",
+	"TKAx_LS_MEM_READ_SHORT",
+	"TKAx_LS_MEM_WRITE_FULL",
+	"TKAx_LS_MEM_WRITE_SHORT",
+	"TKAx_LS_MEM_ATOMIC",
+	"TKAx_VARY_INSTR",
+	"TKAx_VARY_SLOT_32",
+	"TKAx_VARY_SLOT_16",
+	"TKAx_ATTR_INSTR",
+	"TKAx_ARITH_INSTR_FP_MUL",
+	"TKAx_BEATS_RD_FTC",
+	"TKAx_BEATS_RD_FTC_EXT",
+	"TKAx_BEATS_RD_LSC",
+	"TKAx_BEATS_RD_LSC_EXT",
+	"TKAx_BEATS_RD_TEX",
+	"TKAx_BEATS_RD_TEX_EXT",
+	"TKAx_BEATS_RD_OTHER",
+	"TKAx_BEATS_WR_LSC_WB",
+	"TKAx_BEATS_WR_TIB",
+	"TKAx_BEATS_WR_LSC_OTHER",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TKAx_L2_RD_MSG_IN",
+	"TKAx_L2_RD_MSG_IN_STALL",
+	"TKAx_L2_WR_MSG_IN",
+	"TKAx_L2_WR_MSG_IN_STALL",
+	"TKAx_L2_SNP_MSG_IN",
+	"TKAx_L2_SNP_MSG_IN_STALL",
+	"TKAx_L2_RD_MSG_OUT",
+	"TKAx_L2_RD_MSG_OUT_STALL",
+	"TKAx_L2_WR_MSG_OUT",
+	"TKAx_L2_ANY_LOOKUP",
+	"TKAx_L2_READ_LOOKUP",
+	"TKAx_L2_WRITE_LOOKUP",
+	"TKAx_L2_EXT_SNOOP_LOOKUP",
+	"TKAx_L2_EXT_READ",
+	"TKAx_L2_EXT_READ_NOSNP",
+	"TKAx_L2_EXT_READ_UNIQUE",
+	"TKAx_L2_EXT_READ_BEATS",
+	"TKAx_L2_EXT_AR_STALL",
+	"TKAx_L2_EXT_AR_CNT_Q1",
+	"TKAx_L2_EXT_AR_CNT_Q2",
+	"TKAx_L2_EXT_AR_CNT_Q3",
+	"TKAx_L2_EXT_RRESP_0_127",
+	"TKAx_L2_EXT_RRESP_128_191",
+	"TKAx_L2_EXT_RRESP_192_255",
+	"TKAx_L2_EXT_RRESP_256_319",
+	"TKAx_L2_EXT_RRESP_320_383",
+	"TKAx_L2_EXT_WRITE",
+	"TKAx_L2_EXT_WRITE_NOSNP_FULL",
+	"TKAx_L2_EXT_WRITE_NOSNP_PTL",
+	"TKAx_L2_EXT_WRITE_SNP_FULL",
+	"TKAx_L2_EXT_WRITE_SNP_PTL",
+	"TKAx_L2_EXT_WRITE_BEATS",
+	"TKAx_L2_EXT_W_STALL",
+	"TKAx_L2_EXT_AW_CNT_Q1",
+	"TKAx_L2_EXT_AW_CNT_Q2",
+	"TKAx_L2_EXT_AW_CNT_Q3",
+	"TKAx_L2_EXT_SNOOP",
+	"TKAx_L2_EXT_SNOOP_STALL",
+	"TKAx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TKAx_L2_EXT_SNOOP_RESP_DATA",
+	"TKAx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
new file mode 100644
index 0000000..8a215f7
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+
+static const char * const hardware_counters_mali_tMIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MESSAGES_SENT",
+	"TMIx_MESSAGES_RECEIVED",
+	"TMIx_GPU_ACTIVE",
+	"TMIx_IRQ_ACTIVE",
+	"TMIx_JS0_JOBS",
+	"TMIx_JS0_TASKS",
+	"TMIx_JS0_ACTIVE",
+	"",
+	"TMIx_JS0_WAIT_READ",
+	"TMIx_JS0_WAIT_ISSUE",
+	"TMIx_JS0_WAIT_DEPEND",
+	"TMIx_JS0_WAIT_FINISH",
+	"TMIx_JS1_JOBS",
+	"TMIx_JS1_TASKS",
+	"TMIx_JS1_ACTIVE",
+	"",
+	"TMIx_JS1_WAIT_READ",
+	"TMIx_JS1_WAIT_ISSUE",
+	"TMIx_JS1_WAIT_DEPEND",
+	"TMIx_JS1_WAIT_FINISH",
+	"TMIx_JS2_JOBS",
+	"TMIx_JS2_TASKS",
+	"TMIx_JS2_ACTIVE",
+	"",
+	"TMIx_JS2_WAIT_READ",
+	"TMIx_JS2_WAIT_ISSUE",
+	"TMIx_JS2_WAIT_DEPEND",
+	"TMIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_TILER_ACTIVE",
+	"TMIx_JOBS_PROCESSED",
+	"TMIx_TRIANGLES",
+	"TMIx_LINES",
+	"TMIx_POINTS",
+	"TMIx_FRONT_FACING",
+	"TMIx_BACK_FACING",
+	"TMIx_PRIM_VISIBLE",
+	"TMIx_PRIM_CULLED",
+	"TMIx_PRIM_CLIPPED",
+	"TMIx_PRIM_SAT_CULLED",
+	"TMIx_BIN_ALLOC_INIT",
+	"TMIx_BIN_ALLOC_OVERFLOW",
+	"TMIx_BUS_READ",
+	"",
+	"TMIx_BUS_WRITE",
+	"TMIx_LOADING_DESC",
+	"TMIx_IDVS_POS_SHAD_REQ",
+	"TMIx_IDVS_POS_SHAD_WAIT",
+	"TMIx_IDVS_POS_SHAD_STALL",
+	"TMIx_IDVS_POS_FIFO_FULL",
+	"TMIx_PREFETCH_STALL",
+	"TMIx_VCACHE_HIT",
+	"TMIx_VCACHE_MISS",
+	"TMIx_VCACHE_LINE_WAIT",
+	"TMIx_VFETCH_POS_READ_WAIT",
+	"TMIx_VFETCH_VERTEX_WAIT",
+	"TMIx_VFETCH_STALL",
+	"TMIx_PRIMASSY_STALL",
+	"TMIx_BBOX_GEN_STALL",
+	"TMIx_IDVS_VBU_HIT",
+	"TMIx_IDVS_VBU_MISS",
+	"TMIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TMIx_IDVS_VAR_SHAD_REQ",
+	"TMIx_IDVS_VAR_SHAD_STALL",
+	"TMIx_BINNER_STALL",
+	"TMIx_ITER_STALL",
+	"TMIx_COMPRESS_MISS",
+	"TMIx_COMPRESS_STALL",
+	"TMIx_PCACHE_HIT",
+	"TMIx_PCACHE_MISS",
+	"TMIx_PCACHE_MISS_STALL",
+	"TMIx_PCACHE_EVICT_STALL",
+	"TMIx_PMGR_PTR_WR_STALL",
+	"TMIx_PMGR_PTR_RD_STALL",
+	"TMIx_PMGR_CMD_WR_STALL",
+	"TMIx_WRBUF_ACTIVE",
+	"TMIx_WRBUF_HIT",
+	"TMIx_WRBUF_MISS",
+	"TMIx_WRBUF_NO_FREE_LINE_STALL",
+	"TMIx_WRBUF_NO_AXI_ID_STALL",
+	"TMIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TMIx_UTLB_TRANS",
+	"TMIx_UTLB_TRANS_HIT",
+	"TMIx_UTLB_TRANS_STALL",
+	"TMIx_UTLB_TRANS_MISS_DELAY",
+	"TMIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_FRAG_ACTIVE",
+	"TMIx_FRAG_PRIMITIVES",
+	"TMIx_FRAG_PRIM_RAST",
+	"TMIx_FRAG_FPK_ACTIVE",
+	"TMIx_FRAG_STARVING",
+	"TMIx_FRAG_WARPS",
+	"TMIx_FRAG_PARTIAL_WARPS",
+	"TMIx_FRAG_QUADS_RAST",
+	"TMIx_FRAG_QUADS_EZS_TEST",
+	"TMIx_FRAG_QUADS_EZS_UPDATE",
+	"TMIx_FRAG_QUADS_EZS_KILL",
+	"TMIx_FRAG_LZS_TEST",
+	"TMIx_FRAG_LZS_KILL",
+	"",
+	"TMIx_FRAG_PTILES",
+	"TMIx_FRAG_TRANS_ELIM",
+	"TMIx_QUAD_FPK_KILLER",
+	"",
+	"TMIx_COMPUTE_ACTIVE",
+	"TMIx_COMPUTE_TASKS",
+	"TMIx_COMPUTE_WARPS",
+	"TMIx_COMPUTE_STARVING",
+	"TMIx_EXEC_CORE_ACTIVE",
+	"TMIx_EXEC_ACTIVE",
+	"TMIx_EXEC_INSTR_COUNT",
+	"TMIx_EXEC_INSTR_DIVERGED",
+	"TMIx_EXEC_INSTR_STARVING",
+	"TMIx_ARITH_INSTR_SINGLE_FMA",
+	"TMIx_ARITH_INSTR_DOUBLE",
+	"TMIx_ARITH_INSTR_MSG",
+	"TMIx_ARITH_INSTR_MSG_ONLY",
+	"TMIx_TEX_INSTR",
+	"TMIx_TEX_INSTR_MIPMAP",
+	"TMIx_TEX_INSTR_COMPRESSED",
+	"TMIx_TEX_INSTR_3D",
+	"TMIx_TEX_INSTR_TRILINEAR",
+	"TMIx_TEX_COORD_ISSUE",
+	"TMIx_TEX_COORD_STALL",
+	"TMIx_TEX_STARVE_CACHE",
+	"TMIx_TEX_STARVE_FILTER",
+	"TMIx_LS_MEM_READ_FULL",
+	"TMIx_LS_MEM_READ_SHORT",
+	"TMIx_LS_MEM_WRITE_FULL",
+	"TMIx_LS_MEM_WRITE_SHORT",
+	"TMIx_LS_MEM_ATOMIC",
+	"TMIx_VARY_INSTR",
+	"TMIx_VARY_SLOT_32",
+	"TMIx_VARY_SLOT_16",
+	"TMIx_ATTR_INSTR",
+	"TMIx_ARITH_INSTR_FP_MUL",
+	"TMIx_BEATS_RD_FTC",
+	"TMIx_BEATS_RD_FTC_EXT",
+	"TMIx_BEATS_RD_LSC",
+	"TMIx_BEATS_RD_LSC_EXT",
+	"TMIx_BEATS_RD_TEX",
+	"TMIx_BEATS_RD_TEX_EXT",
+	"TMIx_BEATS_RD_OTHER",
+	"TMIx_BEATS_WR_LSC",
+	"TMIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_RD_MSG_IN",
+	"TMIx_L2_RD_MSG_IN_STALL",
+	"TMIx_L2_WR_MSG_IN",
+	"TMIx_L2_WR_MSG_IN_STALL",
+	"TMIx_L2_SNP_MSG_IN",
+	"TMIx_L2_SNP_MSG_IN_STALL",
+	"TMIx_L2_RD_MSG_OUT",
+	"TMIx_L2_RD_MSG_OUT_STALL",
+	"TMIx_L2_WR_MSG_OUT",
+	"TMIx_L2_ANY_LOOKUP",
+	"TMIx_L2_READ_LOOKUP",
+	"TMIx_L2_WRITE_LOOKUP",
+	"TMIx_L2_EXT_SNOOP_LOOKUP",
+	"TMIx_L2_EXT_READ",
+	"TMIx_L2_EXT_READ_NOSNP",
+	"TMIx_L2_EXT_READ_UNIQUE",
+	"TMIx_L2_EXT_READ_BEATS",
+	"TMIx_L2_EXT_AR_STALL",
+	"TMIx_L2_EXT_AR_CNT_Q1",
+	"TMIx_L2_EXT_AR_CNT_Q2",
+	"TMIx_L2_EXT_AR_CNT_Q3",
+	"TMIx_L2_EXT_RRESP_0_127",
+	"TMIx_L2_EXT_RRESP_128_191",
+	"TMIx_L2_EXT_RRESP_192_255",
+	"TMIx_L2_EXT_RRESP_256_319",
+	"TMIx_L2_EXT_RRESP_320_383",
+	"TMIx_L2_EXT_WRITE",
+	"TMIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TMIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TMIx_L2_EXT_WRITE_SNP_FULL",
+	"TMIx_L2_EXT_WRITE_SNP_PTL",
+	"TMIx_L2_EXT_WRITE_BEATS",
+	"TMIx_L2_EXT_W_STALL",
+	"TMIx_L2_EXT_AW_CNT_Q1",
+	"TMIx_L2_EXT_AW_CNT_Q2",
+	"TMIx_L2_EXT_AW_CNT_Q3",
+	"TMIx_L2_EXT_SNOOP",
+	"TMIx_L2_EXT_SNOOP_STALL",
+	"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TMIx_L2_EXT_SNOOP_RESP_DATA",
+	"TMIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
new file mode 100644
index 0000000..b6175a5
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TNOX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TNOX_H_
+
+static const char * const hardware_counters_mali_tNOx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_MESSAGES_SENT",
+	"TNOx_MESSAGES_RECEIVED",
+	"TNOx_GPU_ACTIVE",
+	"TNOx_IRQ_ACTIVE",
+	"TNOx_JS0_JOBS",
+	"TNOx_JS0_TASKS",
+	"TNOx_JS0_ACTIVE",
+	"",
+	"TNOx_JS0_WAIT_READ",
+	"TNOx_JS0_WAIT_ISSUE",
+	"TNOx_JS0_WAIT_DEPEND",
+	"TNOx_JS0_WAIT_FINISH",
+	"TNOx_JS1_JOBS",
+	"TNOx_JS1_TASKS",
+	"TNOx_JS1_ACTIVE",
+	"",
+	"TNOx_JS1_WAIT_READ",
+	"TNOx_JS1_WAIT_ISSUE",
+	"TNOx_JS1_WAIT_DEPEND",
+	"TNOx_JS1_WAIT_FINISH",
+	"TNOx_JS2_JOBS",
+	"TNOx_JS2_TASKS",
+	"TNOx_JS2_ACTIVE",
+	"",
+	"TNOx_JS2_WAIT_READ",
+	"TNOx_JS2_WAIT_ISSUE",
+	"TNOx_JS2_WAIT_DEPEND",
+	"TNOx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_TILER_ACTIVE",
+	"TNOx_JOBS_PROCESSED",
+	"TNOx_TRIANGLES",
+	"TNOx_LINES",
+	"TNOx_POINTS",
+	"TNOx_FRONT_FACING",
+	"TNOx_BACK_FACING",
+	"TNOx_PRIM_VISIBLE",
+	"TNOx_PRIM_CULLED",
+	"TNOx_PRIM_CLIPPED",
+	"TNOx_PRIM_SAT_CULLED",
+	"TNOx_BIN_ALLOC_INIT",
+	"TNOx_BIN_ALLOC_OVERFLOW",
+	"TNOx_BUS_READ",
+	"",
+	"TNOx_BUS_WRITE",
+	"TNOx_LOADING_DESC",
+	"TNOx_IDVS_POS_SHAD_REQ",
+	"TNOx_IDVS_POS_SHAD_WAIT",
+	"TNOx_IDVS_POS_SHAD_STALL",
+	"TNOx_IDVS_POS_FIFO_FULL",
+	"TNOx_PREFETCH_STALL",
+	"TNOx_VCACHE_HIT",
+	"TNOx_VCACHE_MISS",
+	"TNOx_VCACHE_LINE_WAIT",
+	"TNOx_VFETCH_POS_READ_WAIT",
+	"TNOx_VFETCH_VERTEX_WAIT",
+	"TNOx_VFETCH_STALL",
+	"TNOx_PRIMASSY_STALL",
+	"TNOx_BBOX_GEN_STALL",
+	"TNOx_IDVS_VBU_HIT",
+	"TNOx_IDVS_VBU_MISS",
+	"TNOx_IDVS_VBU_LINE_DEALLOCATE",
+	"TNOx_IDVS_VAR_SHAD_REQ",
+	"TNOx_IDVS_VAR_SHAD_STALL",
+	"TNOx_BINNER_STALL",
+	"TNOx_ITER_STALL",
+	"TNOx_COMPRESS_MISS",
+	"TNOx_COMPRESS_STALL",
+	"TNOx_PCACHE_HIT",
+	"TNOx_PCACHE_MISS",
+	"TNOx_PCACHE_MISS_STALL",
+	"TNOx_PCACHE_EVICT_STALL",
+	"TNOx_PMGR_PTR_WR_STALL",
+	"TNOx_PMGR_PTR_RD_STALL",
+	"TNOx_PMGR_CMD_WR_STALL",
+	"TNOx_WRBUF_ACTIVE",
+	"TNOx_WRBUF_HIT",
+	"TNOx_WRBUF_MISS",
+	"TNOx_WRBUF_NO_FREE_LINE_STALL",
+	"TNOx_WRBUF_NO_AXI_ID_STALL",
+	"TNOx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TNOx_UTLB_TRANS",
+	"TNOx_UTLB_TRANS_HIT",
+	"TNOx_UTLB_TRANS_STALL",
+	"TNOx_UTLB_TRANS_MISS_DELAY",
+	"TNOx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_FRAG_ACTIVE",
+	"TNOx_FRAG_PRIMITIVES",
+	"TNOx_FRAG_PRIM_RAST",
+	"TNOx_FRAG_FPK_ACTIVE",
+	"TNOx_FRAG_STARVING",
+	"TNOx_FRAG_WARPS",
+	"TNOx_FRAG_PARTIAL_WARPS",
+	"TNOx_FRAG_QUADS_RAST",
+	"TNOx_FRAG_QUADS_EZS_TEST",
+	"TNOx_FRAG_QUADS_EZS_UPDATE",
+	"TNOx_FRAG_QUADS_EZS_KILL",
+	"TNOx_FRAG_LZS_TEST",
+	"TNOx_FRAG_LZS_KILL",
+	"",
+	"TNOx_FRAG_PTILES",
+	"TNOx_FRAG_TRANS_ELIM",
+	"TNOx_QUAD_FPK_KILLER",
+	"",
+	"TNOx_COMPUTE_ACTIVE",
+	"TNOx_COMPUTE_TASKS",
+	"TNOx_COMPUTE_WARPS",
+	"TNOx_COMPUTE_STARVING",
+	"TNOx_EXEC_CORE_ACTIVE",
+	"TNOx_EXEC_ACTIVE",
+	"TNOx_EXEC_INSTR_COUNT",
+	"TNOx_EXEC_INSTR_DIVERGED",
+	"TNOx_EXEC_INSTR_STARVING",
+	"TNOx_ARITH_INSTR_SINGLE_FMA",
+	"TNOx_ARITH_INSTR_DOUBLE",
+	"TNOx_ARITH_INSTR_MSG",
+	"TNOx_ARITH_INSTR_MSG_ONLY",
+	"TNOx_TEX_MSGI_NUM_QUADS",
+	"TNOx_TEX_DFCH_NUM_PASSES",
+	"TNOx_TEX_DFCH_NUM_PASSES_MISS",
+	"TNOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TNOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TNOx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TNOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TNOx_TEX_TFCH_NUM_OPERATIONS",
+	"TNOx_TEX_FILT_NUM_OPERATIONS",
+	"TNOx_LS_MEM_READ_FULL",
+	"TNOx_LS_MEM_READ_SHORT",
+	"TNOx_LS_MEM_WRITE_FULL",
+	"TNOx_LS_MEM_WRITE_SHORT",
+	"TNOx_LS_MEM_ATOMIC",
+	"TNOx_VARY_INSTR",
+	"TNOx_VARY_SLOT_32",
+	"TNOx_VARY_SLOT_16",
+	"TNOx_ATTR_INSTR",
+	"TNOx_ARITH_INSTR_FP_MUL",
+	"TNOx_BEATS_RD_FTC",
+	"TNOx_BEATS_RD_FTC_EXT",
+	"TNOx_BEATS_RD_LSC",
+	"TNOx_BEATS_RD_LSC_EXT",
+	"TNOx_BEATS_RD_TEX",
+	"TNOx_BEATS_RD_TEX_EXT",
+	"TNOx_BEATS_RD_OTHER",
+	"TNOx_BEATS_WR_LSC_WB",
+	"TNOx_BEATS_WR_TIB",
+	"TNOx_BEATS_WR_LSC_OTHER",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TNOx_L2_RD_MSG_IN",
+	"TNOx_L2_RD_MSG_IN_STALL",
+	"TNOx_L2_WR_MSG_IN",
+	"TNOx_L2_WR_MSG_IN_STALL",
+	"TNOx_L2_SNP_MSG_IN",
+	"TNOx_L2_SNP_MSG_IN_STALL",
+	"TNOx_L2_RD_MSG_OUT",
+	"TNOx_L2_RD_MSG_OUT_STALL",
+	"TNOx_L2_WR_MSG_OUT",
+	"TNOx_L2_ANY_LOOKUP",
+	"TNOx_L2_READ_LOOKUP",
+	"TNOx_L2_WRITE_LOOKUP",
+	"TNOx_L2_EXT_SNOOP_LOOKUP",
+	"TNOx_L2_EXT_READ",
+	"TNOx_L2_EXT_READ_NOSNP",
+	"TNOx_L2_EXT_READ_UNIQUE",
+	"TNOx_L2_EXT_READ_BEATS",
+	"TNOx_L2_EXT_AR_STALL",
+	"TNOx_L2_EXT_AR_CNT_Q1",
+	"TNOx_L2_EXT_AR_CNT_Q2",
+	"TNOx_L2_EXT_AR_CNT_Q3",
+	"TNOx_L2_EXT_RRESP_0_127",
+	"TNOx_L2_EXT_RRESP_128_191",
+	"TNOx_L2_EXT_RRESP_192_255",
+	"TNOx_L2_EXT_RRESP_256_319",
+	"TNOx_L2_EXT_RRESP_320_383",
+	"TNOx_L2_EXT_WRITE",
+	"TNOx_L2_EXT_WRITE_NOSNP_FULL",
+	"TNOx_L2_EXT_WRITE_NOSNP_PTL",
+	"TNOx_L2_EXT_WRITE_SNP_FULL",
+	"TNOx_L2_EXT_WRITE_SNP_PTL",
+	"TNOx_L2_EXT_WRITE_BEATS",
+	"TNOx_L2_EXT_W_STALL",
+	"TNOx_L2_EXT_AW_CNT_Q1",
+	"TNOx_L2_EXT_AW_CNT_Q2",
+	"TNOx_L2_EXT_AW_CNT_Q3",
+	"TNOx_L2_EXT_SNOOP",
+	"TNOx_L2_EXT_SNOOP_STALL",
+	"TNOx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TNOx_L2_EXT_SNOOP_RESP_DATA",
+	"TNOx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TNOX_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
new file mode 100644
index 0000000..fb6a143
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+
+static const char * const hardware_counters_mali_tSIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MESSAGES_SENT",
+	"TSIx_MESSAGES_RECEIVED",
+	"TSIx_GPU_ACTIVE",
+	"TSIx_IRQ_ACTIVE",
+	"TSIx_JS0_JOBS",
+	"TSIx_JS0_TASKS",
+	"TSIx_JS0_ACTIVE",
+	"",
+	"TSIx_JS0_WAIT_READ",
+	"TSIx_JS0_WAIT_ISSUE",
+	"TSIx_JS0_WAIT_DEPEND",
+	"TSIx_JS0_WAIT_FINISH",
+	"TSIx_JS1_JOBS",
+	"TSIx_JS1_TASKS",
+	"TSIx_JS1_ACTIVE",
+	"",
+	"TSIx_JS1_WAIT_READ",
+	"TSIx_JS1_WAIT_ISSUE",
+	"TSIx_JS1_WAIT_DEPEND",
+	"TSIx_JS1_WAIT_FINISH",
+	"TSIx_JS2_JOBS",
+	"TSIx_JS2_TASKS",
+	"TSIx_JS2_ACTIVE",
+	"",
+	"TSIx_JS2_WAIT_READ",
+	"TSIx_JS2_WAIT_ISSUE",
+	"TSIx_JS2_WAIT_DEPEND",
+	"TSIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_TILER_ACTIVE",
+	"TSIx_JOBS_PROCESSED",
+	"TSIx_TRIANGLES",
+	"TSIx_LINES",
+	"TSIx_POINTS",
+	"TSIx_FRONT_FACING",
+	"TSIx_BACK_FACING",
+	"TSIx_PRIM_VISIBLE",
+	"TSIx_PRIM_CULLED",
+	"TSIx_PRIM_CLIPPED",
+	"TSIx_PRIM_SAT_CULLED",
+	"TSIx_BIN_ALLOC_INIT",
+	"TSIx_BIN_ALLOC_OVERFLOW",
+	"TSIx_BUS_READ",
+	"",
+	"TSIx_BUS_WRITE",
+	"TSIx_LOADING_DESC",
+	"TSIx_IDVS_POS_SHAD_REQ",
+	"TSIx_IDVS_POS_SHAD_WAIT",
+	"TSIx_IDVS_POS_SHAD_STALL",
+	"TSIx_IDVS_POS_FIFO_FULL",
+	"TSIx_PREFETCH_STALL",
+	"TSIx_VCACHE_HIT",
+	"TSIx_VCACHE_MISS",
+	"TSIx_VCACHE_LINE_WAIT",
+	"TSIx_VFETCH_POS_READ_WAIT",
+	"TSIx_VFETCH_VERTEX_WAIT",
+	"TSIx_VFETCH_STALL",
+	"TSIx_PRIMASSY_STALL",
+	"TSIx_BBOX_GEN_STALL",
+	"TSIx_IDVS_VBU_HIT",
+	"TSIx_IDVS_VBU_MISS",
+	"TSIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TSIx_IDVS_VAR_SHAD_REQ",
+	"TSIx_IDVS_VAR_SHAD_STALL",
+	"TSIx_BINNER_STALL",
+	"TSIx_ITER_STALL",
+	"TSIx_COMPRESS_MISS",
+	"TSIx_COMPRESS_STALL",
+	"TSIx_PCACHE_HIT",
+	"TSIx_PCACHE_MISS",
+	"TSIx_PCACHE_MISS_STALL",
+	"TSIx_PCACHE_EVICT_STALL",
+	"TSIx_PMGR_PTR_WR_STALL",
+	"TSIx_PMGR_PTR_RD_STALL",
+	"TSIx_PMGR_CMD_WR_STALL",
+	"TSIx_WRBUF_ACTIVE",
+	"TSIx_WRBUF_HIT",
+	"TSIx_WRBUF_MISS",
+	"TSIx_WRBUF_NO_FREE_LINE_STALL",
+	"TSIx_WRBUF_NO_AXI_ID_STALL",
+	"TSIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TSIx_UTLB_TRANS",
+	"TSIx_UTLB_TRANS_HIT",
+	"TSIx_UTLB_TRANS_STALL",
+	"TSIx_UTLB_TRANS_MISS_DELAY",
+	"TSIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_FRAG_ACTIVE",
+	"TSIx_FRAG_PRIMITIVES",
+	"TSIx_FRAG_PRIM_RAST",
+	"TSIx_FRAG_FPK_ACTIVE",
+	"TSIx_FRAG_STARVING",
+	"TSIx_FRAG_WARPS",
+	"TSIx_FRAG_PARTIAL_WARPS",
+	"TSIx_FRAG_QUADS_RAST",
+	"TSIx_FRAG_QUADS_EZS_TEST",
+	"TSIx_FRAG_QUADS_EZS_UPDATE",
+	"TSIx_FRAG_QUADS_EZS_KILL",
+	"TSIx_FRAG_LZS_TEST",
+	"TSIx_FRAG_LZS_KILL",
+	"",
+	"TSIx_FRAG_PTILES",
+	"TSIx_FRAG_TRANS_ELIM",
+	"TSIx_QUAD_FPK_KILLER",
+	"",
+	"TSIx_COMPUTE_ACTIVE",
+	"TSIx_COMPUTE_TASKS",
+	"TSIx_COMPUTE_WARPS",
+	"TSIx_COMPUTE_STARVING",
+	"TSIx_EXEC_CORE_ACTIVE",
+	"TSIx_EXEC_ACTIVE",
+	"TSIx_EXEC_INSTR_COUNT",
+	"TSIx_EXEC_INSTR_DIVERGED",
+	"TSIx_EXEC_INSTR_STARVING",
+	"TSIx_ARITH_INSTR_SINGLE_FMA",
+	"TSIx_ARITH_INSTR_DOUBLE",
+	"TSIx_ARITH_INSTR_MSG",
+	"TSIx_ARITH_INSTR_MSG_ONLY",
+	"TSIx_TEX_MSGI_NUM_QUADS",
+	"TSIx_TEX_DFCH_NUM_PASSES",
+	"TSIx_TEX_DFCH_NUM_PASSES_MISS",
+	"TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TSIx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TSIx_TEX_TFCH_NUM_OPERATIONS",
+	"TSIx_TEX_FILT_NUM_OPERATIONS",
+	"TSIx_LS_MEM_READ_FULL",
+	"TSIx_LS_MEM_READ_SHORT",
+	"TSIx_LS_MEM_WRITE_FULL",
+	"TSIx_LS_MEM_WRITE_SHORT",
+	"TSIx_LS_MEM_ATOMIC",
+	"TSIx_VARY_INSTR",
+	"TSIx_VARY_SLOT_32",
+	"TSIx_VARY_SLOT_16",
+	"TSIx_ATTR_INSTR",
+	"TSIx_ARITH_INSTR_FP_MUL",
+	"TSIx_BEATS_RD_FTC",
+	"TSIx_BEATS_RD_FTC_EXT",
+	"TSIx_BEATS_RD_LSC",
+	"TSIx_BEATS_RD_LSC_EXT",
+	"TSIx_BEATS_RD_TEX",
+	"TSIx_BEATS_RD_TEX_EXT",
+	"TSIx_BEATS_RD_OTHER",
+	"TSIx_BEATS_WR_LSC",
+	"TSIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TSIx_L2_RD_MSG_IN",
+	"TSIx_L2_RD_MSG_IN_STALL",
+	"TSIx_L2_WR_MSG_IN",
+	"TSIx_L2_WR_MSG_IN_STALL",
+	"TSIx_L2_SNP_MSG_IN",
+	"TSIx_L2_SNP_MSG_IN_STALL",
+	"TSIx_L2_RD_MSG_OUT",
+	"TSIx_L2_RD_MSG_OUT_STALL",
+	"TSIx_L2_WR_MSG_OUT",
+	"TSIx_L2_ANY_LOOKUP",
+	"TSIx_L2_READ_LOOKUP",
+	"TSIx_L2_WRITE_LOOKUP",
+	"TSIx_L2_EXT_SNOOP_LOOKUP",
+	"TSIx_L2_EXT_READ",
+	"TSIx_L2_EXT_READ_NOSNP",
+	"TSIx_L2_EXT_READ_UNIQUE",
+	"TSIx_L2_EXT_READ_BEATS",
+	"TSIx_L2_EXT_AR_STALL",
+	"TSIx_L2_EXT_AR_CNT_Q1",
+	"TSIx_L2_EXT_AR_CNT_Q2",
+	"TSIx_L2_EXT_AR_CNT_Q3",
+	"TSIx_L2_EXT_RRESP_0_127",
+	"TSIx_L2_EXT_RRESP_128_191",
+	"TSIx_L2_EXT_RRESP_192_255",
+	"TSIx_L2_EXT_RRESP_256_319",
+	"TSIx_L2_EXT_RRESP_320_383",
+	"TSIx_L2_EXT_WRITE",
+	"TSIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TSIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TSIx_L2_EXT_WRITE_SNP_FULL",
+	"TSIx_L2_EXT_WRITE_SNP_PTL",
+	"TSIx_L2_EXT_WRITE_BEATS",
+	"TSIx_L2_EXT_W_STALL",
+	"TSIx_L2_EXT_AW_CNT_Q1",
+	"TSIx_L2_EXT_AW_CNT_Q2",
+	"TSIx_L2_EXT_AW_CNT_Q3",
+	"TSIx_L2_EXT_SNOOP",
+	"TSIx_L2_EXT_SNOOP_STALL",
+	"TSIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TSIx_L2_EXT_SNOOP_RESP_DATA",
+	"TSIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100644
index 0000000..32b9513
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+#define GPU_ID2_VERSION        (GPU_ID2_VERSION_MAJOR | \
+								GPU_ID2_VERSION_MINOR | \
+								GPU_ID2_VERSION_STATUS)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6u, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6u, 1)
+#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7u, 0)
+#define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7u, 3)
+#define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7u, 1)
+#define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7u, 2)
+#define GPU_ID2_PRODUCT_TKAX              GPU_ID2_MODEL_MAKE(8u, 0)
+#define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(8u, 1)
+#define GPU_ID2_PRODUCT_TBOX              GPU_ID2_MODEL_MAKE(8u, 2)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100644
index 0000000..6df0a1c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100644
index 0000000..7045693
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100644
index 0000000..624d078
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,467 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include "mali_kbase_ioctl.h"
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present =
+		((u64) regdump.shader_present_hi << 32) +
+		regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present =
+		((u64) regdump.tiler_present_hi << 32) +
+		regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present =
+		((u64) regdump.l2_present_hi << 32) +
+		regdump.l2_present_lo;
+	gpu_props->raw_props.stack_present =
+		((u64) regdump.stack_present_hi << 32) +
+		regdump.stack_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+}
+
+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)
+{
+	gpu_props->core_props.version_status =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 * Additionally, add non-coherent mode, as this is always supported.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features |
+		COHERENCY_FEATURE_BIT(COHERENCY_NONE);
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
+		gpu_props->thread_props.max_thread_group_split = 0;
+}
+
+static struct {
+	u32 type;
+	size_t offset;
+	int size;
+} gpu_property_mapping[] = {
+#define PROP(name, member) \
+	{KBASE_GPUPROP_ ## name, offsetof(struct mali_base_gpu_props, member), \
+		sizeof(((struct mali_base_gpu_props *)0)->member)}
+	PROP(PRODUCT_ID,                  core_props.product_id),
+	PROP(VERSION_STATUS,              core_props.version_status),
+	PROP(MINOR_REVISION,              core_props.minor_revision),
+	PROP(MAJOR_REVISION,              core_props.major_revision),
+	PROP(GPU_SPEED_MHZ,               core_props.gpu_speed_mhz),
+	PROP(GPU_FREQ_KHZ_MAX,            core_props.gpu_freq_khz_max),
+	PROP(GPU_FREQ_KHZ_MIN,            core_props.gpu_freq_khz_min),
+	PROP(LOG2_PROGRAM_COUNTER_SIZE,   core_props.log2_program_counter_size),
+	PROP(TEXTURE_FEATURES_0,          core_props.texture_features[0]),
+	PROP(TEXTURE_FEATURES_1,          core_props.texture_features[1]),
+	PROP(TEXTURE_FEATURES_2,          core_props.texture_features[2]),
+	PROP(TEXTURE_FEATURES_3,          core_props.texture_features[3]),
+	PROP(GPU_AVAILABLE_MEMORY_SIZE,   core_props.gpu_available_memory_size),
+
+	PROP(L2_LOG2_LINE_SIZE,           l2_props.log2_line_size),
+	PROP(L2_LOG2_CACHE_SIZE,          l2_props.log2_cache_size),
+	PROP(L2_NUM_L2_SLICES,            l2_props.num_l2_slices),
+
+	PROP(TILER_BIN_SIZE_BYTES,        tiler_props.bin_size_bytes),
+	PROP(TILER_MAX_ACTIVE_LEVELS,     tiler_props.max_active_levels),
+
+	PROP(MAX_THREADS,                 thread_props.max_threads),
+	PROP(MAX_WORKGROUP_SIZE,          thread_props.max_workgroup_size),
+	PROP(MAX_BARRIER_SIZE,            thread_props.max_barrier_size),
+	PROP(MAX_REGISTERS,               thread_props.max_registers),
+	PROP(MAX_TASK_QUEUE,              thread_props.max_task_queue),
+	PROP(MAX_THREAD_GROUP_SPLIT,      thread_props.max_thread_group_split),
+	PROP(IMPL_TECH,                   thread_props.impl_tech),
+
+	PROP(RAW_SHADER_PRESENT,          raw_props.shader_present),
+	PROP(RAW_TILER_PRESENT,           raw_props.tiler_present),
+	PROP(RAW_L2_PRESENT,              raw_props.l2_present),
+	PROP(RAW_STACK_PRESENT,           raw_props.stack_present),
+	PROP(RAW_L2_FEATURES,             raw_props.l2_features),
+	PROP(RAW_SUSPEND_SIZE,            raw_props.suspend_size),
+	PROP(RAW_MEM_FEATURES,            raw_props.mem_features),
+	PROP(RAW_MMU_FEATURES,            raw_props.mmu_features),
+	PROP(RAW_AS_PRESENT,              raw_props.as_present),
+	PROP(RAW_JS_PRESENT,              raw_props.js_present),
+	PROP(RAW_JS_FEATURES_0,           raw_props.js_features[0]),
+	PROP(RAW_JS_FEATURES_1,           raw_props.js_features[1]),
+	PROP(RAW_JS_FEATURES_2,           raw_props.js_features[2]),
+	PROP(RAW_JS_FEATURES_3,           raw_props.js_features[3]),
+	PROP(RAW_JS_FEATURES_4,           raw_props.js_features[4]),
+	PROP(RAW_JS_FEATURES_5,           raw_props.js_features[5]),
+	PROP(RAW_JS_FEATURES_6,           raw_props.js_features[6]),
+	PROP(RAW_JS_FEATURES_7,           raw_props.js_features[7]),
+	PROP(RAW_JS_FEATURES_8,           raw_props.js_features[8]),
+	PROP(RAW_JS_FEATURES_9,           raw_props.js_features[9]),
+	PROP(RAW_JS_FEATURES_10,          raw_props.js_features[10]),
+	PROP(RAW_JS_FEATURES_11,          raw_props.js_features[11]),
+	PROP(RAW_JS_FEATURES_12,          raw_props.js_features[12]),
+	PROP(RAW_JS_FEATURES_13,          raw_props.js_features[13]),
+	PROP(RAW_JS_FEATURES_14,          raw_props.js_features[14]),
+	PROP(RAW_JS_FEATURES_15,          raw_props.js_features[15]),
+	PROP(RAW_TILER_FEATURES,          raw_props.tiler_features),
+	PROP(RAW_TEXTURE_FEATURES_0,      raw_props.texture_features[0]),
+	PROP(RAW_TEXTURE_FEATURES_1,      raw_props.texture_features[1]),
+	PROP(RAW_TEXTURE_FEATURES_2,      raw_props.texture_features[2]),
+	PROP(RAW_TEXTURE_FEATURES_3,      raw_props.texture_features[3]),
+	PROP(RAW_GPU_ID,                  raw_props.gpu_id),
+	PROP(RAW_THREAD_MAX_THREADS,      raw_props.thread_max_threads),
+	PROP(RAW_THREAD_MAX_WORKGROUP_SIZE,
+			raw_props.thread_max_workgroup_size),
+	PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
+	PROP(RAW_THREAD_FEATURES,         raw_props.thread_features),
+	PROP(RAW_COHERENCY_MODE,          raw_props.coherency_mode),
+
+	PROP(COHERENCY_NUM_GROUPS,        coherency_info.num_groups),
+	PROP(COHERENCY_NUM_CORE_GROUPS,   coherency_info.num_core_groups),
+	PROP(COHERENCY_COHERENCY,         coherency_info.coherency),
+	PROP(COHERENCY_GROUP_0,           coherency_info.group[0].core_mask),
+	PROP(COHERENCY_GROUP_1,           coherency_info.group[1].core_mask),
+	PROP(COHERENCY_GROUP_2,           coherency_info.group[2].core_mask),
+	PROP(COHERENCY_GROUP_3,           coherency_info.group[3].core_mask),
+	PROP(COHERENCY_GROUP_4,           coherency_info.group[4].core_mask),
+	PROP(COHERENCY_GROUP_5,           coherency_info.group[5].core_mask),
+	PROP(COHERENCY_GROUP_6,           coherency_info.group[6].core_mask),
+	PROP(COHERENCY_GROUP_7,           coherency_info.group[7].core_mask),
+	PROP(COHERENCY_GROUP_8,           coherency_info.group[8].core_mask),
+	PROP(COHERENCY_GROUP_9,           coherency_info.group[9].core_mask),
+	PROP(COHERENCY_GROUP_10,          coherency_info.group[10].core_mask),
+	PROP(COHERENCY_GROUP_11,          coherency_info.group[11].core_mask),
+	PROP(COHERENCY_GROUP_12,          coherency_info.group[12].core_mask),
+	PROP(COHERENCY_GROUP_13,          coherency_info.group[13].core_mask),
+	PROP(COHERENCY_GROUP_14,          coherency_info.group[14].core_mask),
+	PROP(COHERENCY_GROUP_15,          coherency_info.group[15].core_mask),
+
+#undef PROP
+};
+
+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *kprops = &kbdev->gpu_props;
+	struct mali_base_gpu_props *props = &kprops->props;
+	u32 count = ARRAY_SIZE(gpu_property_mapping);
+	u32 i;
+	u32 size = 0;
+	u8 *p;
+
+	for (i = 0; i < count; i++) {
+		/* 4 bytes for the ID, and the size of the property */
+		size += 4 + gpu_property_mapping[i].size;
+	}
+
+	kprops->prop_buffer_size = size;
+	kprops->prop_buffer = kmalloc(size, GFP_KERNEL);
+
+	if (!kprops->prop_buffer) {
+		kprops->prop_buffer_size = 0;
+		return -ENOMEM;
+	}
+
+	p = kprops->prop_buffer;
+
+#define WRITE_U8(v) (*p++ = (v) & 0xFF)
+#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0)
+#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0)
+#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0)
+
+	for (i = 0; i < count; i++) {
+		u32 type = gpu_property_mapping[i].type;
+		u8 type_size;
+		void *field = ((u8 *)props) + gpu_property_mapping[i].offset;
+
+		switch (gpu_property_mapping[i].size) {
+		case 1:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U8;
+			break;
+		case 2:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U16;
+			break;
+		case 4:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U32;
+			break;
+		case 8:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U64;
+			break;
+		default:
+			dev_err(kbdev->dev,
+				"Invalid gpu_property_mapping type=%d size=%d",
+				type, gpu_property_mapping[i].size);
+			return -EINVAL;
+		}
+
+		WRITE_U32((type<<2) | type_size);
+
+		switch (type_size) {
+		case KBASE_GPUPROP_VALUE_SIZE_U8:
+			WRITE_U8(*((u8 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U16:
+			WRITE_U16(*((u16 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U32:
+			WRITE_U32(*((u32 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U64:
+			WRITE_U64(*((u64 *)field));
+			break;
+		default: /* Cannot be reached */
+			WARN_ON(1);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100644
index 0000000..e88af25
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer
+ * @kbdev: The kbase device
+ *
+ * Fills kbdev->gpu_props->prop_buffer with the GPU properties for user
+ * space to read.
+ */
+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value
+ * @gpu_props: the &base_gpu_props structure
+ *
+ * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into
+ * separate fields (version_status, minor_revision, major_revision, product_id)
+ * stored in base_gpu_props::core_props.
+ */
+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props);
+
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100644
index 0000000..10794fc
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 stack_present_lo;
+	u32 stack_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+
+	u32 prop_buffer_size;
+	void *prop_buffer;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100644
index 0000000..89342e1
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,492 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			features = base_hw_features_tHEx;
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			features = base_hw_features_tSIx;
+			break;
+		case GPU_ID2_PRODUCT_TDVX:
+			features = base_hw_features_tDVx;
+			break;
+		case GPU_ID2_PRODUCT_TNOX:
+			features = base_hw_features_tNOx;
+			break;
+		case GPU_ID2_PRODUCT_TGOX:
+			features = base_hw_features_tGOx;
+			break;
+		case GPU_ID2_PRODUCT_TKAX:
+			features = base_hw_features_tKAx;
+			break;
+		case GPU_ID2_PRODUCT_TTRX:
+			features = base_hw_features_tTRx;
+			break;
+		case GPU_ID2_PRODUCT_TBOX:
+			features = base_hw_features_tBOx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+/**
+ * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID
+ * @kbdev: Device pointer
+ *
+ * Return: pointer to an array of hardware issues, terminated by
+ * BASE_HW_ISSUE_END.
+ *
+ * This function can only be used on new-format GPU IDs, i.e. those for which
+ * GPU_ID_IS_NEW_FORMAT evaluates as true. The GPU ID is read from the @kbdev.
+ *
+ * In debugging versions of the driver, unknown versions of a known GPU will
+ * be treated as the most recent known version not later than the actual
+ * version. In such circumstances, the GPU ID in @kbdev will also be replaced
+ * with the most recent known version.
+ *
+ * Note: The GPU configuration must have been read by kbase_gpuprops_get_props()
+ * before calling this function.
+ */
+static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
+					struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues = NULL;
+
+	struct base_hw_product {
+		u32 product_model;
+		struct {
+			u32 version;
+			const enum base_hw_issue *issues;
+		} map[7];
+	};
+
+	static const struct base_hw_product base_hw_products[] = {
+		{GPU_ID2_PRODUCT_TMIX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 1),
+		   base_hw_issues_tMIx_r0p0_05dev0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1},
+		  {U32_MAX /* sentinel value */, NULL} } },
+
+		{GPU_ID2_PRODUCT_THEX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2},
+		  {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TSIX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0},
+		  {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TDVX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TNOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TGOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TKAX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tKAx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TTRX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TBOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBOx_r0p0},
+		  {U32_MAX, NULL} } },
+	};
+
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL;
+	const struct base_hw_product *product = NULL;
+	size_t p;
+
+	/* Stop when we reach the end of the products array. */
+	for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) {
+		if (product_model == base_hw_products[p].product_model) {
+			product = &base_hw_products[p];
+			break;
+		}
+	}
+
+	if (product != NULL) {
+		/* Found a matching product. */
+		const u32 version = gpu_id & GPU_ID2_VERSION;
+#if !MALI_CUSTOMER_RELEASE
+		u32 fallback_version = 0;
+		const enum base_hw_issue *fallback_issues = NULL;
+#endif
+		size_t v;
+
+		/* Stop when we reach the end of the map. */
+		for (v = 0; product->map[v].version != U32_MAX; ++v) {
+
+			if (version == product->map[v].version) {
+				/* Exact match so stop. */
+				issues = product->map[v].issues;
+				break;
+			}
+
+#if !MALI_CUSTOMER_RELEASE
+			/* Check whether this is a candidate for most recent
+				known version not later than the actual
+				version. */
+			if ((version > product->map[v].version) &&
+				(product->map[v].version >= fallback_version)) {
+				fallback_version = product->map[v].version;
+				fallback_issues = product->map[v].issues;
+			}
+#endif
+		}
+
+#if !MALI_CUSTOMER_RELEASE
+		if ((issues == NULL) && (fallback_issues != NULL)) {
+			/* Fall back to the issue set of the most recent known
+				version not later than the actual version. */
+			issues = fallback_issues;
+
+			dev_info(kbdev->dev,
+				"r%dp%d status %d is unknown; treating as r%dp%d status %d",
+				(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+					GPU_ID2_VERSION_MAJOR_SHIFT,
+				(gpu_id & GPU_ID2_VERSION_MINOR) >>
+					GPU_ID2_VERSION_MINOR_SHIFT,
+				(gpu_id & GPU_ID2_VERSION_STATUS) >>
+					GPU_ID2_VERSION_STATUS_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_MAJOR) >>
+					GPU_ID2_VERSION_MAJOR_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_MINOR) >>
+					GPU_ID2_VERSION_MINOR_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_STATUS) >>
+					GPU_ID2_VERSION_STATUS_SHIFT);
+
+			gpu_id &= ~GPU_ID2_VERSION;
+			gpu_id |= fallback_version;
+			kbdev->gpu_props.props.raw_props.gpu_id = gpu_id;
+
+			kbase_gpuprops_update_core_props_gpu_id(
+				&kbdev->gpu_props.props);
+		}
+#endif
+	}
+	return issues;
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			issues = kbase_hw_get_issues_for_new_id(kbdev);
+			if (issues == NULL) {
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+
+#if !MALI_CUSTOMER_RELEASE
+			/* The GPU ID might have been replaced with the last
+			   known version of the same GPU. */
+			gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+#endif
+
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			case GPU_ID2_PRODUCT_THEX:
+				issues = base_hw_issues_model_tHEx;
+				break;
+			case GPU_ID2_PRODUCT_TSIX:
+				issues = base_hw_issues_model_tSIx;
+				break;
+			case GPU_ID2_PRODUCT_TDVX:
+				issues = base_hw_issues_model_tDVx;
+				break;
+			case GPU_ID2_PRODUCT_TNOX:
+				issues = base_hw_issues_model_tNOx;
+				break;
+			case GPU_ID2_PRODUCT_TGOX:
+				issues = base_hw_issues_model_tGOx;
+				break;
+			case GPU_ID2_PRODUCT_TKAX:
+				issues = base_hw_issues_model_tKAx;
+				break;
+			case GPU_ID2_PRODUCT_TTRX:
+				issues = base_hw_issues_model_tTRx;
+				break;
+			case GPU_ID2_PRODUCT_TBOX:
+				issues = base_hw_issues_model_tBOx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		dev_info(kbdev->dev,
+			"GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d",
+			(gpu_id & GPU_ID2_PRODUCT_MAJOR) >>
+				GPU_ID2_PRODUCT_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_MAJOR) >>
+				GPU_ID2_ARCH_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_MINOR) >>
+				GPU_ID2_ARCH_MINOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_REV) >>
+				GPU_ID2_ARCH_REV_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+				GPU_ID2_VERSION_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_MINOR) >>
+				GPU_ID2_VERSION_MINOR_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_STATUS) >>
+				GPU_ID2_VERSION_STATUS_SHIFT);
+	} else {
+		dev_info(kbdev->dev,
+			"GPU identified as 0x%04x r%dp%d status %d",
+			(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+				GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+			(gpu_id & GPU_ID_VERSION_MAJOR) >>
+				GPU_ID_VERSION_MAJOR_SHIFT,
+			(gpu_id & GPU_ID_VERSION_MINOR) >>
+				GPU_ID_VERSION_MINOR_SHIFT,
+			(gpu_id & GPU_ID_VERSION_STATUS) >>
+				GPU_ID_VERSION_STATUS_SHIFT);
+	}
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100644
index 0000000..754250c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID
+ * @kbdev: Device pointer
+ *
+ * Return: 0 if the GPU ID was recognized, otherwise -EINVAL.
+ *
+ * The GPU ID is read from the @kbdev.
+ *
+ * In debugging versions of the driver, unknown versions of a known GPU with a
+ * new-format ID will be treated as the most recent known version not later
+ * than the actual version. In such circumstances, the GPU ID in @kbdev will
+ * also be replaced with the most recent known version.
+ *
+ * Note: The GPU configuration must have been read by
+ * kbase_gpuprops_get_props() before calling this function.
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100644
index 0000000..b09be99
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100644
index 0000000..0acf297
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/* The hwaccess_lock (a spinlock) must be held when accessing this structure */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx;
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100644
index 0000000..cf8a813
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100644
index 0000000..5de2b75
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @setup:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100644
index 0000000..750fda2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,381 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_backend_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_find_and_release_free_address_space() - Release a free AS
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * This function can evict an idle context from the runpool, freeing up the
+ * address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_ctx_sched_release()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned.
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_cacheclean - Perform a cache clean if the given atom requires
+ *                            one
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the failed atom
+ *
+ * On some GPUs, the GPU cache must be cleaned following a failed atom. This
+ * function performs a clean if it is required by @katom.
+ */
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom);
+
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ * @affinity:      Affinity of atom
+ * @coreref_state: Coreref state of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ *				  @js
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg updating timeouts of
+ * currently running atoms).
+ */
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs and don't emit messages into
+ * the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ */
+void kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset.
+ */
+bool kbase_reset_gpu_active(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+extern struct protected_mode_ops kbase_native_protected_ops;
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100644
index 0000000..71c7d49
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100644
index 0000000..89d26ea
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100644
index 0000000..cf7bf1b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
new file mode 100644
index 0000000..041c61a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -0,0 +1,668 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IOCTL_H_
+#define _KBASE_IOCTL_H_
+
+#ifdef __cpluscplus
+extern "C" {
+#endif
+
+#include <linux/types.h>
+
+#define KBASE_IOCTL_TYPE 0x80
+
+#define BASE_UK_VERSION_MAJOR 11
+#define BASE_UK_VERSION_MINOR 0
+
+#ifdef ANDROID
+/* Android's definition of ioctl is incorrect, specifying the type argument as
+ * 'int'. This creates a warning when using _IOWR (as the top bit is set). Work
+ * round this by redefining _IOC to include a case to 'int'.
+ */
+#undef _IOC
+#define _IOC(dir, type, nr, size) \
+	((int)(((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | \
+	((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT)))
+#endif
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility with kernel
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+/**
+ * struct kbase_ioctl_set_flags - Set kernel context creation flags
+ *
+ * @create_flags: Flags - see base_context_create_flags
+ */
+struct kbase_ioctl_set_flags {
+	__u32 create_flags;
+};
+
+#define KBASE_IOCTL_SET_FLAGS \
+	_IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
+
+/**
+ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
+ *
+ * @addr: Memory address of an array of struct base_jd_atom_v2
+ * @nr_atoms: Number of entries in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ */
+struct kbase_ioctl_job_submit {
+	__u64 addr;
+	__u32 nr_atoms;
+	__u32 stride;
+};
+
+#define KBASE_IOCTL_JOB_SUBMIT \
+	_IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
+
+/**
+ * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
+ *
+ * @buffer: Pointer to the buffer to store properties into
+ * @size: Size of the buffer
+ * @flags: Flags - must be zero for now
+ *
+ * The ioctl will return the number of bytes stored into @buffer or an error
+ * on failure (e.g. @size is too small). If @size is specified as 0 then no
+ * data will be written but the return value will be the number of bytes needed
+ * for all the properties.
+ *
+ * @flags may be used in the future to request a different format for the
+ * buffer. With @flags == 0 the following format is used.
+ *
+ * The buffer will be filled with pairs of values, a u32 key identifying the
+ * property followed by the value. The size of the value is identified using
+ * the bottom bits of the key. The value then immediately followed the key and
+ * is tightly packed (there is no padding). All keys and values are
+ * little-endian.
+ *
+ * 00 = u8
+ * 01 = u16
+ * 10 = u32
+ * 11 = u64
+ */
+struct kbase_ioctl_get_gpuprops {
+	__u64 buffer;
+	__u32 size;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_GET_GPUPROPS \
+	_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
+
+#define KBASE_IOCTL_POST_TERM \
+	_IO(KBASE_IOCTL_TYPE, 4)
+
+/**
+ * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
+ *
+ * @va_pages: The number of pages of virtual address space to reserve
+ * @commit_pages: The number of physical pages to allocate
+ * @extent: The number of extra pages to allocate on each GPU fault which grows
+ *          the region
+ * @flags: Flags
+ * @gpu_va: The GPU virtual address which is allocated
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_alloc {
+	struct {
+		__u64 va_pages;
+		__u64 commit_pages;
+		__u64 extent;
+		__u64 flags;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALLOC \
+	_IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
+
+/**
+ * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
+ * @gpu_addr: A GPU address contained within the region
+ * @query: The type of query
+ * @value: The result of the query
+ *
+ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_query {
+	struct {
+		__u64 gpu_addr;
+		__u64 query;
+	} in;
+	struct {
+		__u64 value;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_QUERY \
+	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
+
+#define KBASE_MEM_QUERY_COMMIT_SIZE	1
+#define KBASE_MEM_QUERY_VA_SIZE		2
+#define KBASE_MEM_QUERY_FLAGS		3
+
+/**
+ * struct kbase_ioctl_mem_free - Free a memory region
+ * @gpu_addr: Handle to the region to free
+ */
+struct kbase_ioctl_mem_free {
+	__u64 gpu_addr;
+};
+
+#define KBASE_IOCTL_MEM_FREE \
+	_IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
+
+/**
+ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error
+ */
+struct kbase_ioctl_hwcnt_reader_setup {
+	__u32 buffer_count;
+	__u32 jm_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_READER_SETUP \
+	_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
+
+/**
+ * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
+ * @dump_buffer:  GPU address to write counters to
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ */
+struct kbase_ioctl_hwcnt_enable {
+	__u64 dump_buffer;
+	__u32 jm_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_ENABLE \
+	_IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable)
+
+#define KBASE_IOCTL_HWCNT_DUMP \
+	_IO(KBASE_IOCTL_TYPE, 10)
+
+#define KBASE_IOCTL_HWCNT_CLEAR \
+	_IO(KBASE_IOCTL_TYPE, 11)
+
+/**
+ * struct kbase_ioctl_disjoint_query - Query the disjoint counter
+ * @counter:   A counter of disjoint events in the kernel
+ */
+struct kbase_ioctl_disjoint_query {
+	__u32 counter;
+};
+
+#define KBASE_IOCTL_DISJOINT_QUERY \
+	_IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
+
+/**
+ * struct kbase_ioctl_get_ddk_version - Query the kernel version
+ * @version_buffer: Buffer to receive the kernel version string
+ * @size: Size of the buffer
+ * @padding: Padding
+ *
+ * The ioctl will return the number of bytes written into version_buffer
+ * (which includes a NULL byte) or a negative error code
+ */
+struct kbase_ioctl_get_ddk_version {
+	__u64 version_buffer;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_DDK_VERSION \
+	_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
+
+/**
+ * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator
+ *
+ * @va_pages: Number of VA pages to reserve for JIT
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ */
+struct kbase_ioctl_mem_jit_init {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
+
+/**
+ * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
+ *
+ * @handle: GPU memory handle (GPU VA)
+ * @user_addr: The address where it is mapped in user space
+ * @size: The number of bytes to synchronise
+ * @type: The direction to synchronise: 0 is sync to memory (clean),
+ * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants.
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_mem_sync {
+	__u64 handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_MEM_SYNC \
+	_IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
+
+/**
+ * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
+ *
+ * @gpu_addr: The GPU address of the memory region
+ * @cpu_addr: The CPU address to locate
+ * @size: A size in bytes to validate is contained within the region
+ * @offset: The offset from the start of the memory region to @cpu_addr
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_find_cpu_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 cpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
+
+/**
+ * struct kbase_ioctl_get_context_id - Get the kernel context ID
+ *
+ * @id: The kernel context ID
+ */
+struct kbase_ioctl_get_context_id {
+	__u32 id;
+};
+
+#define KBASE_IOCTL_GET_CONTEXT_ID \
+	_IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
+
+/**
+ * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
+ *
+ * @flags: Flags
+ *
+ * The ioctl returns a file descriptor when successful
+ */
+struct kbase_ioctl_tlstream_acquire {
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_TLSTREAM_ACQUIRE \
+	_IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
+
+#define KBASE_IOCTL_TLSTREAM_FLUSH \
+	_IO(KBASE_IOCTL_TYPE, 19)
+
+/**
+ * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
+ *
+ * @gpu_addr: The memory region to modify
+ * @pages:    The number of physical pages that should be present
+ *
+ * The ioctl may return on the following error codes or 0 for success:
+ *   -ENOMEM: Out of memory
+ *   -EINVAL: Invalid arguments
+ */
+struct kbase_ioctl_mem_commit {
+	__u64 gpu_addr;
+	__u64 pages;
+};
+
+#define KBASE_IOCTL_MEM_COMMIT \
+	_IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
+
+/**
+ * union kbase_ioctl_mem_alias - Create an alias of memory regions
+ * @flags: Flags, see BASE_MEM_xxx
+ * @stride: Bytes between start of each memory region
+ * @nents: The number of regions to pack together into the alias
+ * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info
+ * @gpu_va: Address of the new alias
+ * @va_pages: Size of the new alias
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_alias {
+	struct {
+		__u64 flags;
+		__u64 stride;
+		__u64 nents;
+		__u64 aliasing_info;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALIAS \
+	_IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
+
+/**
+ * union kbase_ioctl_mem_import - Import memory for use by the GPU
+ * @flags: Flags, see BASE_MEM_xxx
+ * @phandle: Handle to the external memory
+ * @type: Type of external memory, see base_mem_import_type
+ * @padding: Amount of extra VA pages to append to the imported buffer
+ * @gpu_va: Address of the new alias
+ * @va_pages: Size of the new alias
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_import {
+	struct {
+		__u64 flags;
+		__u64 phandle;
+		__u32 type;
+		__u32 padding;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_IMPORT \
+	_IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
+
+/**
+ * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
+ * @gpu_va: The GPU region to modify
+ * @flags: The new flags to set
+ * @mask: Mask of the flags to modify
+ */
+struct kbase_ioctl_mem_flags_change {
+	__u64 gpu_va;
+	__u64 flags;
+	__u64 mask;
+};
+
+#define KBASE_IOCTL_MEM_FLAGS_CHANGE \
+	_IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
+
+/**
+ * struct kbase_ioctl_stream_create - Create a synchronisation stream
+ * @name: A name to identify this stream. Must be NULL-terminated.
+ *
+ * Note that this is also called a "timeline", but is named stream to avoid
+ * confusion with other uses of the word.
+ *
+ * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes.
+ *
+ * The ioctl returns a file descriptor.
+ */
+struct kbase_ioctl_stream_create {
+	char name[32];
+};
+
+#define KBASE_IOCTL_STREAM_CREATE \
+	_IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
+
+/**
+ * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
+ * @fd: The file descriptor to validate
+ */
+struct kbase_ioctl_fence_validate {
+	int fd;
+};
+
+#define KBASE_IOCTL_FENCE_VALIDATE \
+	_IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
+
+/**
+ * struct kbase_ioctl_get_profiling_controls - Get the profiling controls
+ * @count: The size of @buffer in u32 words
+ * @buffer: The buffer to receive the profiling controls
+ * @padding: Padding
+ */
+struct kbase_ioctl_get_profiling_controls {
+	__u64 buffer;
+	__u32 count;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_PROFILING_CONTROLS \
+	_IOW(KBASE_IOCTL_TYPE, 26, struct kbase_ioctl_get_profiling_controls)
+
+/**
+ * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
+ * @buffer: Pointer to the information
+ * @len: Length
+ * @padding: Padding
+ *
+ * The data provided is accessible through a debugfs file
+ */
+struct kbase_ioctl_mem_profile_add {
+	__u64 buffer;
+	__u32 len;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_MEM_PROFILE_ADD \
+	_IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
+
+/**
+ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
+ * @event: GPU address of the event which has been updated
+ * @new_status: The new status to set
+ * @flags: Flags for future expansion
+ */
+struct kbase_ioctl_soft_event_update {
+	__u64 event;
+	__u32 new_status;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
+	_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
+
+/* IOCTLs 29-32 are reserved */
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
+
+/**
+ * struct kbase_ioctl_tlstream_test - Start a timeline stream test
+ *
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ */
+struct kbase_ioctl_tlstream_test {
+	__u32 tpw_count;
+	__u32 msg_delay;
+	__u32 msg_count;
+	__u32 aux_msg;
+};
+
+#define KBASE_IOCTL_TLSTREAM_TEST \
+	_IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test)
+
+/**
+ * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ */
+struct kbase_ioctl_tlstream_stats {
+	__u32 bytes_collected;
+	__u32 bytes_generated;
+};
+
+#define KBASE_IOCTL_TLSTREAM_STATS \
+	_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
+
+#endif
+
+/**********************************
+ * Definitions for GPU properties *
+ **********************************/
+#define KBASE_GPUPROP_VALUE_SIZE_U8	(0x0)
+#define KBASE_GPUPROP_VALUE_SIZE_U16	(0x1)
+#define KBASE_GPUPROP_VALUE_SIZE_U32	(0x2)
+#define KBASE_GPUPROP_VALUE_SIZE_U64	(0x3)
+
+#define KBASE_GPUPROP_PRODUCT_ID			1
+#define KBASE_GPUPROP_VERSION_STATUS			2
+#define KBASE_GPUPROP_MINOR_REVISION			3
+#define KBASE_GPUPROP_MAJOR_REVISION			4
+#define KBASE_GPUPROP_GPU_SPEED_MHZ			5
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX			6
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MIN			7
+#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE		8
+#define KBASE_GPUPROP_TEXTURE_FEATURES_0		9
+#define KBASE_GPUPROP_TEXTURE_FEATURES_1		10
+#define KBASE_GPUPROP_TEXTURE_FEATURES_2		11
+#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE		12
+
+#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE			13
+#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE		14
+#define KBASE_GPUPROP_L2_NUM_L2_SLICES			15
+
+#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES		16
+#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS		17
+
+#define KBASE_GPUPROP_MAX_THREADS			18
+#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE		19
+#define KBASE_GPUPROP_MAX_BARRIER_SIZE			20
+#define KBASE_GPUPROP_MAX_REGISTERS			21
+#define KBASE_GPUPROP_MAX_TASK_QUEUE			22
+#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT		23
+#define KBASE_GPUPROP_IMPL_TECH				24
+
+#define KBASE_GPUPROP_RAW_SHADER_PRESENT		25
+#define KBASE_GPUPROP_RAW_TILER_PRESENT			26
+#define KBASE_GPUPROP_RAW_L2_PRESENT			27
+#define KBASE_GPUPROP_RAW_STACK_PRESENT			28
+#define KBASE_GPUPROP_RAW_L2_FEATURES			29
+#define KBASE_GPUPROP_RAW_SUSPEND_SIZE			30
+#define KBASE_GPUPROP_RAW_MEM_FEATURES			31
+#define KBASE_GPUPROP_RAW_MMU_FEATURES			32
+#define KBASE_GPUPROP_RAW_AS_PRESENT			33
+#define KBASE_GPUPROP_RAW_JS_PRESENT			34
+#define KBASE_GPUPROP_RAW_JS_FEATURES_0			35
+#define KBASE_GPUPROP_RAW_JS_FEATURES_1			36
+#define KBASE_GPUPROP_RAW_JS_FEATURES_2			37
+#define KBASE_GPUPROP_RAW_JS_FEATURES_3			38
+#define KBASE_GPUPROP_RAW_JS_FEATURES_4			39
+#define KBASE_GPUPROP_RAW_JS_FEATURES_5			40
+#define KBASE_GPUPROP_RAW_JS_FEATURES_6			41
+#define KBASE_GPUPROP_RAW_JS_FEATURES_7			42
+#define KBASE_GPUPROP_RAW_JS_FEATURES_8			43
+#define KBASE_GPUPROP_RAW_JS_FEATURES_9			44
+#define KBASE_GPUPROP_RAW_JS_FEATURES_10		45
+#define KBASE_GPUPROP_RAW_JS_FEATURES_11		46
+#define KBASE_GPUPROP_RAW_JS_FEATURES_12		47
+#define KBASE_GPUPROP_RAW_JS_FEATURES_13		48
+#define KBASE_GPUPROP_RAW_JS_FEATURES_14		49
+#define KBASE_GPUPROP_RAW_JS_FEATURES_15		50
+#define KBASE_GPUPROP_RAW_TILER_FEATURES		51
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0		52
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1		53
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2		54
+#define KBASE_GPUPROP_RAW_GPU_ID			55
+#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS		56
+#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE	57
+#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE	58
+#define KBASE_GPUPROP_RAW_THREAD_FEATURES		59
+#define KBASE_GPUPROP_RAW_COHERENCY_MODE		60
+
+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS		61
+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS		62
+#define KBASE_GPUPROP_COHERENCY_COHERENCY		63
+#define KBASE_GPUPROP_COHERENCY_GROUP_0			64
+#define KBASE_GPUPROP_COHERENCY_GROUP_1			65
+#define KBASE_GPUPROP_COHERENCY_GROUP_2			66
+#define KBASE_GPUPROP_COHERENCY_GROUP_3			67
+#define KBASE_GPUPROP_COHERENCY_GROUP_4			68
+#define KBASE_GPUPROP_COHERENCY_GROUP_5			69
+#define KBASE_GPUPROP_COHERENCY_GROUP_6			70
+#define KBASE_GPUPROP_COHERENCY_GROUP_7			71
+#define KBASE_GPUPROP_COHERENCY_GROUP_8			72
+#define KBASE_GPUPROP_COHERENCY_GROUP_9			73
+#define KBASE_GPUPROP_COHERENCY_GROUP_10		74
+#define KBASE_GPUPROP_COHERENCY_GROUP_11		75
+#define KBASE_GPUPROP_COHERENCY_GROUP_12		76
+#define KBASE_GPUPROP_COHERENCY_GROUP_13		77
+#define KBASE_GPUPROP_COHERENCY_GROUP_14		78
+#define KBASE_GPUPROP_COHERENCY_GROUP_15		79
+
+#define KBASE_GPUPROP_TEXTURE_FEATURES_3		80
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3		81
+
+#ifdef __cpluscplus
+}
+#endif
+
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100644
index 0000000..aa545fd
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1607 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tlstream.h>
+
+#include "mali_kbase_dma_fence.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const u64 p)
+{
+#ifdef CONFIG_COMPAT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return compat_ptr(p);
+#endif
+	return u64_to_user_ptr(p);
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#if defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.dma_fence_resv_count = 0,
+	};
+#ifdef CONFIG_SYNC
+	/*
+	 * When both dma-buf fence and Android native sync is enabled, we
+	 * disable dma-buf fence for contexts that are using Android native
+	 * fences.
+	 */
+	const bool implicit_sync = !kbase_ctx_flag(katom->kctx,
+						   KCTX_NO_IMPLICIT_SYNC);
+#else /* CONFIG_SYNC */
+	const bool implicit_sync = true;
+#endif /* CONFIG_SYNC */
+#endif /* CONFIG_MALI_DMA_FENCE */
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		info.resv_objs = kmalloc_array(katom->nr_extres,
+					sizeof(struct reservation_object *),
+					GFP_KERNEL);
+		if (!info.resv_objs) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+
+		info.dma_fence_excl_bitmap =
+				kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					sizeof(unsigned long), GFP_KERNEL);
+		if (!info.dma_fence_excl_bitmap) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
+
+		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (implicit_sync &&
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct reservation_object *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock now */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		if (info.dma_fence_resv_count) {
+			int ret;
+
+			ret = kbase_dma_fence_wait(katom, &info);
+			if (ret < 0)
+				goto failed_dma_fence_setup;
+		}
+
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d, bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		struct kbase_jd_atom *other_dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		other_dep_atom = (struct kbase_jd_atom *)
+			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
+
+		if (!dep_atom->in_jd_list && (!other_dep_atom ||
+				(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
+				!dep_atom->will_fail_event_code &&
+				!other_dep_atom->will_fail_event_code))) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = kbase_fence_dep_count_read(dep_atom);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+			if (dep_satisfied) {
+				dep_atom->in_jd_list = true;
+				list_add_tail(&dep_atom->jd_item, out_list);
+			}
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/**
+ * is_dep_valid - Validate that a dependency is valid for early dependency
+ *                submission
+ * @katom: Dependency atom to validate
+ *
+ * A dependency is valid if any of the following are true :
+ * - It does not exist (a non-existent dependency does not block submission)
+ * - It is in the job scheduler
+ * - It has completed, does not have a failure event code, and has not been
+ *   marked to fail in the future
+ *
+ * Return: true if valid, false otherwise
+ */
+static bool is_dep_valid(struct kbase_jd_atom *katom)
+{
+	/* If there's no dependency then this is 'valid' from the perspective of
+	 * early dependency submission */
+	if (!katom)
+		return true;
+
+	/* Dependency must have reached the job scheduler */
+	if (katom->status < KBASE_JD_ATOM_STATE_IN_JS)
+		return false;
+
+	/* If dependency has completed and has failed or will fail then it is
+	 * not valid */
+	if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+			(katom->event_code != BASE_JD_EVENT_DONE ||
+			katom->will_fail_event_code))
+		return false;
+
+	return true;
+}
+
+static void jd_try_submitting_deps(struct list_head *out_list,
+		struct kbase_jd_atom *node)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *pos;
+
+		list_for_each(pos, &node->dep_head[i]) {
+			struct kbase_jd_atom *dep_atom = list_entry(pos,
+					struct kbase_jd_atom, dep_item[i]);
+
+			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
+				/*Check if atom deps look sane*/
+				bool dep0_valid = is_dep_valid(
+						dep_atom->dep[0].atom);
+				bool dep1_valid = is_dep_valid(
+						dep_atom->dep[1].atom);
+				bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+				int dep_count;
+
+				dep_count = kbase_fence_dep_count_read(
+								dep_atom);
+				if (likely(dep_count == -1)) {
+					dep_satisfied = true;
+				} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+					dep_satisfied = false;
+				}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+				if (dep0_valid && dep1_valid && dep_satisfied) {
+					dep_atom->in_jd_list = true;
+					list_add(&dep_atom->jd_item, out_list);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->jd_item, &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item);
+		list_del(completed_jobs.prev);
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+					kbase_ctx_flag(kctx, KCTX_DYING));
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(runnable_jobs.next);
+			node->in_jd_list = false;
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE) ==
+					BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait soft job
+					 * then remove it from the list of sync
+					 * waiters.
+					 */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						kbasep_remove_waiting_soft_job(node);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				list_add_tail(&node->jd_item, &completed_jobs);
+			} else if (node->status == KBASE_JD_ATOM_STATE_IN_JS &&
+					!node->will_fail_event_code) {
+				/* Node successfully submitted, try submitting
+				 * dependencies as they may now be representable
+				 * in JS */
+				jd_try_submitting_deps(&runnable_jobs, node);
+			}
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->jd_item, completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	katom->start_timestamp.tv64 = 0;
+#else
+	katom->start_timestamp = 0;
+#endif
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = user_atom->core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->pre_dep = NULL;
+	katom->post_dep = NULL;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
+
+	/* Implicitly sets katom->protected_state.enter as well. */
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+	katom->age = kctx->age_count++;
+
+	INIT_LIST_HEAD(&katom->jd_item);
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_fence_dep_count_set(katom, -1);
+#endif
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				KBASE_TLSTREAM_TL_NEW_ATOM(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				KBASE_TLSTREAM_TL_RET_ATOM_CTX(
+						katom, kctx);
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+						TL_ATOM_STATE_IDLE);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom is going through soft replay or
+			 * will be sent back to user space. Do not record any
+			 * dependencies. */
+			KBASE_TLSTREAM_TL_NEW_ATOM(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+			KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+					TL_ATOM_STATE_IDLE);
+
+			if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	/* Create a new atom recording all dependencies it was set up with. */
+	KBASE_TLSTREAM_TL_NEW_ATOM(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority);
+	KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+	for (i = 0; i < 2; i++)
+		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+					&katom->dep[i])) {
+			KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(
+					(void *)kbase_jd_katom_dep_atom(
+						&katom->dep[i]),
+					(void *)katom);
+		} else if (BASE_JD_DEP_TYPE_INVALID !=
+				user_atom->pre_dep[i].dependency_type) {
+			/* Resolved dependency. */
+			int dep_atom_number =
+				user_atom->pre_dep[i].atom_id;
+			struct kbase_jd_atom *dep_atom =
+				&jctx->atoms[dep_atom_number];
+
+			KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(
+					(void *)dep_atom,
+					(void *)katom);
+		}
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue(kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (kbase_fence_dep_count_read(katom) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom, NULL);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+int kbase_jd_submit(struct kbase_context *kctx,
+		void __user *user_addr, u32 nr_atoms, u32 stride,
+		bool uk6_atom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the
+	 * jobs are reported by immediately failing them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+	if (stride != sizeof(base_jd_atom_v2)) {
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(nr_atoms,
+				&kctx->timeline.jd_atoms_in_flight));
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+		if (copy_from_user(&user_atom, user_addr,
+					sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+				atomic_sub_return(nr_atoms - i,
+				&kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+
+		user_addr = (void __user *)((uintptr_t) user_addr + stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		context_idle = false;
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * KCTX_ACTIVE will have been set after we marked it as
+		 * inactive, and another pm reference will have been taken, so
+		 * drop our reference. But do not call kbase_jm_idle_ctx(), as
+		 * the context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but KCTX_ACTIVE will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then KCTX_ACTIVE will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * hwaccess_lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
+		    !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * hwaccess_lock. This is not performed if
+			 * KCTX_ACTIVE is set as in that case another pm
+			 * reference has been taken and a fast-start would be
+			 * valid.
+			 */
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE))
+				kbase_jm_idle_ctx(kbdev, kctx);
+			context_idle = true;
+		} else {
+			kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+
+	if (context_idle)
+		kbase_pm_context_idle(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbdev->hwaccess_lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	del_timer_sync(&kctx->soft_job_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, queue);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+		kctx->jctx.atoms[i].dma_fence.context =
+						dma_fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100644
index 0000000..25f1ed5
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,236 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/seq_file.h>
+#include <mali_kbase.h>
+#include <mali_kbase_jd_debugfs.h>
+#include <mali_kbase_dma_fence.h>
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif
+#include <mali_kbase_ioctl.h>
+
+struct kbase_jd_debugfs_depinfo {
+	u8 id;
+	char type;
+};
+
+static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
+					struct seq_file *sfile)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	struct kbase_sync_fence_info info;
+	int res;
+
+	switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		res = kbase_sync_fence_out_info_get(atom, &info);
+		if (0 == res) {
+			seq_printf(sfile, "Sa([%p]%d) ",
+				   info.fence, info.status);
+			break;
+		}
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		res = kbase_sync_fence_in_info_get(atom, &info);
+		if (0 == res) {
+			seq_printf(sfile, "Wa([%p]%d) ",
+				   info.fence, info.status);
+			break;
+		}
+	default:
+		break;
+	}
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		struct kbase_fence_cb *cb;
+
+		if (atom->dma_fence.fence) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+			struct fence *fence = atom->dma_fence.fence;
+#else
+			struct dma_fence *fence = atom->dma_fence.fence;
+#endif
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Sd(%u#%u: %s) ",
+#else
+					"Sd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					dma_fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+
+		list_for_each_entry(cb, &atom->dma_fence.callbacks,
+				    node) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+			struct fence *fence = cb->fence;
+#else
+			struct dma_fence *fence = cb->fence;
+#endif
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Wd(%u#%u: %s) ",
+#else
+					"Wd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					dma_fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+}
+
+static void kbasep_jd_debugfs_atom_deps(
+		struct kbase_jd_debugfs_depinfo *deps,
+		struct kbase_jd_atom *atom)
+{
+	struct kbase_context *kctx = atom->kctx;
+	int i;
+
+	for (i = 0; i < 2; i++)	{
+		deps[i].id = (unsigned)(atom->dep[i].atom ?
+				kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0);
+
+		switch (atom->dep[i].dep_type) {
+		case BASE_JD_DEP_TYPE_INVALID:
+			deps[i].type = ' ';
+			break;
+		case BASE_JD_DEP_TYPE_DATA:
+			deps[i].type = 'D';
+			break;
+		case BASE_JD_DEP_TYPE_ORDER:
+			deps[i].type = '>';
+			break;
+		default:
+			deps[i].type = '?';
+			break;
+		}
+	}
+}
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, " ID, Core req, St, CR,   Predeps,           Start time, Additional info...\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+		struct kbase_jd_debugfs_depinfo deps[2];
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		kbasep_jd_debugfs_atom_deps(deps, atom);
+
+		seq_printf(sfile,
+				"%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ",
+				i, atom->core_req, atom->status,
+				atom->coreref_state,
+				deps[0].type, deps[0].id,
+				deps[1].type, deps[1].id,
+				start_timestamp);
+
+
+		kbase_jd_debugfs_fence_info(atom, sfile);
+
+		seq_puts(sfile, "\n");
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100644
index 0000000..fae3291
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#define MALI_JD_DEBUGFS_VERSION 2
+
+/* Forward declarations */
+struct kbase_context;
+
+/**
+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100644
index 0000000..0c5c6a6
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx;
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->hwaccess.active_kctx == kctx)
+		kbdev->hwaccess.active_kctx = NULL;
+}
+
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		return kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+		return NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100644
index 0000000..a74ee24
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the hwaccess_lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+			struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100644
index 0000000..a3e8248
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,2789 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_ctx_sched.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	return atomic_read(&kctx->refcount);
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the mmu_hw_mutex and hwaccess_lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	as_nr = kctx->as_nr;
+	if (atomic_read(&kctx->refcount) > 0) {
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+
+		kbase_ctx_sched_retain_ctx_refcount(kctx);
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, atomic_read(&kctx->refcount));
+		result = true;
+	}
+
+	return result;
+}
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	return RB_EMPTY_ROOT(&rb->runnable_tree);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a queue and invoke @callback for each entry in the queue, and
+ * remove the entry from the queue.
+ *
+ * If entries are added to the queue while this is running those entries may, or
+ * may not be covered. To ensure that all entries in the buffer have been
+ * enumerated when this function returns jsctx->lock must be held when calling
+ * this function.
+ *
+ * The HW access lock must always be held when calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
+		struct rb_node *node = rb_first(&queue->runnable_tree);
+		struct kbase_jd_atom *entry = rb_entry(node,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		rb_erase(node, &queue->runnable_tree);
+		callback(kctx->kbdev, entry);
+	}
+
+	while (!list_empty(&queue->x_dep_head)) {
+		struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
+				struct kbase_jd_atom, queue);
+
+		list_del(queue->x_dep_head.next);
+
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	struct rb_node *node;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	node = rb_first(&rb->runnable_tree);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
+}
+
+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
+
+static void
+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (*new) {
+		struct kbase_jd_atom *entry = container_of(*new,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		parent = *new;
+		if (LESS_THAN_WRAP(katom->age, entry->age))
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&katom->runnable_tree_node, parent, new);
+	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_tree_add(kctx, katom);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
+		atomic_read(&jsdd->soft_job_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&kbdev->hwaccess_lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
+		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+	}
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	/* The caller must de-register all contexts before calling this
+	 */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+	KBASE_DEBUG_ASSERT(memcmp(
+	        js_devdata->runpool_irq.ctx_attr_ref_count,
+	        zero_ctx_attr_ref_count,
+	        sizeof(zero_ctx_attr_ref_count)) == 0);
+	CSTD_UNUSED(zero_ctx_attr_ref_count);
+}
+
+int kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+	kbase_ctx_flag_clear(kctx, KCTX_DYING);
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
+			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int js;
+	bool update_ctx_count = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* The caller must de-register all jobs before calling this */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_nolock - Variant of
+ *                                         kbase_jd_ctx_list_add_pullable()
+ *                                         where the caller must hold
+ *                                         hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
+ *                                              kbase_js_ctx_list_add_pullable_head()
+ *                                              where the caller must hold
+ *                                              hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head_nolock(
+		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the
+ *                                           per-slot unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+				&kbdev->js_data.ctx_list_unpullable[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable
+ *                                   or unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
+ *                                     where the caller must hold
+ *                                     hwaccess_lock
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
+		return NULL;
+
+	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
+					struct kbase_context,
+					jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+		struct kbase_device *kbdev, int js)
+{
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return false;
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * same-slot dependency */
+				if (dep_atom->post_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+						(js == dep_js)) {
+					katom->pre_dep = dep_atom;
+					dep_atom->post_dep = katom;
+				}
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Setup any scheduling information */
+	kbasep_js_clear_job_retry_submit(atom);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY);
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context. Kill that job
+			 * by killing the context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Queue */
+			KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbase_context *found_kctx = NULL;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = kbdev->as_to_kctx[as_nr];
+
+	if (found_kctx != NULL)
+		kbase_ctx_sched_retain_ctx_refcount(found_kctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_release_result - Try running more jobs after releasing a context
+ *                            and/or atom
+ *
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst hwaccess_lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (js_devdata->nr_user_contexts_running != 0) {
+		bool retry_submit = false;
+		int retry_jobslot = 0;
+
+		if (katom_retained_state)
+			retry_submit = kbasep_js_get_atom_retry_submit_slot(
+					katom_retained_state, &retry_jobslot);
+
+		if (runpool_ctx_attr_change || retry_submit) {
+			/* A change in runpool ctx attributes might mean we can
+			 * run more jobs than before  */
+			result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+			KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+						kctx, NULL, 0u, retry_jobslot);
+		}
+	}
+	return result;
+}
+
+/*
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state
+ * change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	mutex_lock(&kbdev->pm.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/* Update refcount */
+	kbase_ctx_sched_release_ctx(kctx);
+	new_ref_count = atomic_read(&kctx->refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out.
+	 * Note that there'll always be at least 1 reference to the context
+	 * which was previously acquired by kbasep_js_schedule_ctx(). */
+	if (new_ref_count == 1 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		int num_slots = kbdev->gpu_props.num_job_slots;
+		int slot;
+
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+		KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		if (kbdev->hwaccess.active_kctx == kctx)
+			kbdev->hwaccess.active_kctx = NULL;
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after the KCTX_SHEDULED flag is changed, otherwise we
+		 * double-decount the attributes
+		 */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+
+		/* Recalculate pullable status for all slots */
+		for (slot = 0; slot < num_slots; slot++) {
+			if (kbase_js_ctx_pullable(kctx, slot, false))
+				kbase_js_ctx_list_add_pullable_nolock(kbdev,
+						kctx, slot);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED |
+			KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_backend_timeouts_changed(kbdev);
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kbase_ctx_sched_retain_ctx(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		as_nr = kbase_backend_find_and_release_free_address_space(
+				kbdev, kctx);
+		if (as_nr != KBASEP_AS_NR_INVALID) {
+			/* Attempt to retain the context again, this should
+			 * succeed */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			as_nr = kbase_ctx_sched_retain_ctx(kctx);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+
+			WARN_ON(as_nr == KBASEP_AS_NR_INVALID);
+		}
+	}
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Roll back the transaction so far and return */
+		kbase_ctx_sched_release_ctx(kctx);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	kbase_ctx_flag_set(kctx, KCTX_SCHEDULED);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		/* Roll back the transaction so far and return */
+		kbase_ctx_sched_release_ctx(kctx);
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+	KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the hwaccess_lock locking. If a suspend occurs *after* that lock
+	 * was taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Synchronize with any timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+			kbase_backend_use_ctx_sched(kbdev, kctx)) {
+		/* Context already has ASID - mark as active */
+		kbdev->hwaccess.active_kctx = kctx;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return true; /* Context already scheduled */
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kbasep_js_schedule_ctx(kbdev, kctx);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED);
+
+	is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED);
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			   kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbase_context *kctx = kbdev->as_to_kctx[i];
+
+		retained = retained << 1;
+
+		if (kctx) {
+			kbase_ctx_sched_retain_ctx_refcount(kctx);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+				++nr_privileged_ctx;
+				WARN_ON(nr_privileged_ctx != 1);
+			}
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbase_context *kctx = kbdev->as_to_kctx[i];
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_context *kctx, *n;
+
+		list_for_each_entry_safe(kctx, n,
+				&kbdev->js_data.ctx_list_unpullable[js],
+				jctx.sched_info.ctx.ctx_list_entry[js]) {
+			struct kbasep_js_kctx_info *js_kctx_info;
+			unsigned long flags;
+			bool timer_sync = false;
+
+			js_kctx_info = &kctx->jctx.sched_info;
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+				kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync =
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kbdev);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
+			(katom->pre_dep && (katom->pre_dep->atom_flags &
+			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		int prio = katom->sched_priority;
+		int js = katom->slot_nr;
+		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+		list_add_tail(&katom->queue, &queue->x_dep_head);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+		enqueue_required = false;
+	} else {
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		/* Add atom to ring buffer. */
+		jsctx_tree_add(kctx, katom);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+	}
+
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the
+ *                         runnable_tree, ready for execution
+ * @katom: Atom to submit
+ *
+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set,
+ * but is still present in the x_dep list. If @katom has a same-slot dependent
+ * atom then that atom (and any dependents) will also be moved.
+ */
+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
+
+	while (katom) {
+		WARN_ON(!(katom->atom_flags &
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			list_del(&katom->queue);
+			katom->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+		} else {
+			break;
+		}
+
+		katom = katom->post_dep;
+	}
+}
+
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = katom->post_dep;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (next_katom) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE |
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
+			kbase_js_move_to_tree(x_dep);
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return NULL;
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kbdev, js))
+			return NULL;
+	}
+
+	kbase_ctx_flag_set(kctx, KCTX_PULLED);
+
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+		kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+		atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++;
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	int prio = katom->sched_priority;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--;
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
+
+	/* If this slot has been blocked due to soft-stopped atoms, and all
+	 * atoms have now been processed, then unblock the slot */
+	if (!kctx->atoms_pulled_slot_pri[js][prio] &&
+			kctx->blocked_js[js][prio]) {
+		kctx->blocked_js[js][prio] = false;
+
+		/* Only mark the slot as pullable if the context is not idle -
+		 * that case is handled below */
+		if (atomic_read(&kctx->atoms_pulled) &&
+				kbase_js_ctx_pullable(kctx, js, true))
+			timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, js);
+	}
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!kbase_ctx_flag(kctx, KCTX_DYING)) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+	int prio = katom->sched_priority;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+		kctx->atoms_pulled_slot_pri[atom_slot][prio]--;
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		/* If this slot has been blocked due to soft-stopped atoms, and
+		 * all atoms have now been processed, then unblock the slot */
+		if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
+				&& kctx->blocked_js[atom_slot][prio]) {
+			kctx->blocked_js[atom_slot][prio] = false;
+			if (kbase_js_ctx_pullable(kctx, atom_slot, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+						kbdev, kctx, atom_slot);
+		}
+	}
+	WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot)) {
+		if (!list_empty(
+			&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
+			timer_sync |= kbase_js_ctx_list_remove_nolock(
+					kctx->kbdev, kctx, atom_slot);
+	}
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp)
+{
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+
+	kbdev = kctx->kbdev;
+
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+			(x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false);
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+		kbase_js_move_to_tree(x_dep);
+		if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false))
+			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+					x_dep->slot_nr);
+
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)
+			return x_dep;
+	}
+
+	return NULL;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *last_active;
+	bool timer_sync = false;
+	bool ctx_waiting = false;
+
+	js_devdata = &kbdev->js_data;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	last_active = kbdev->hwaccess.active_kctx;
+
+	while (js_mask) {
+		int js;
+
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				if (kbase_js_ctx_pullable(kctx, js, false)
+				    || kbase_ctx_flag(kctx, KCTX_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_ctx_flag_clear(kctx, KCTX_PULLED);
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
+				bool pullable = kbase_js_ctx_pullable(kctx, js,
+						true);
+
+				/* Failed to pull jobs - push to head of list.
+				 * Unless this context is already 'active', in
+				 * which case it's effectively already scheduled
+				 * so push it to the back of the list. */
+				if (pullable && kctx == last_active)
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else if (pullable)
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+								kctx->kbdev,
+								kctx, js);
+
+				/* If this context is not the active context,
+				 * but the active context is pullable on this
+				 * slot, then we need to remove the active
+				 * marker to prevent it from submitting atoms in
+				 * the IRQ handler, which would prevent this
+				 * context from making progress. */
+				if (last_active && kctx != last_active &&
+						kbase_js_ctx_pullable(
+						last_active, js, true))
+					ctx_waiting = true;
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	if (kbdev->hwaccess.active_kctx == last_active && ctx_waiting)
+		kbdev->hwaccess.active_kctx = NULL;
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the queue */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_set(kctx, KCTX_DYING);
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	return atomic_read(&kctx->refcount);
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100644
index 0000000..ddada8e
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,925 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_context.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase.
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold hwaccess_lock (as this will be obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the hwaccess_lock, (as this will be obtained
+ *   internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be
+ *   used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - mmu_hw_mutex, hwaccess_lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *   If the hwaccess_lock is already held, then the caller should use
+ *   kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - hwaccess_lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time.
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+	atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+	KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot ==
+					KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID)
+				|| (atom->retry_submit_on_slot == js));
+
+	atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+	retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+	int js = katom_retained_state->retry_submit_on_slot;
+
+	*res = js;
+	return (bool) (js >= 0);
+}
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guaranteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guaranteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_context *found_kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	found_kctx = kbdev->as_to_kctx[as_nr];
+	KBASE_DEBUG_ASSERT(found_kctx == NULL ||
+			atomic_read(&found_kctx->refcount) > 0);
+
+	return found_kctx;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100644
index 0000000..321506a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,301 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->hwaccess_lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100644
index 0000000..ce91833
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100644
index 0000000..ba8b644
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,386 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/* Sub-structure to collect together Job Scheduling data used in IRQ
+	 * context. The hwaccess_lock must be held when accessing. */
+	struct runpool_irq {
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' is allowed to submit jobs.
+		 */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+
+	/**< Value for JS_SOFT_JOB_TIMEOUT */
+	atomic_t soft_job_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/**
+		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/** Job Slot to retry submitting to if submission from IRQ handler failed */
+	int retry_submit_on_slot;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100644
index 0000000..6d1e61f
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100644
index 0000000..375e484
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,2822 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_tlstream.h>
+
+/* This function finds out which RB tree the given GPU VA region belongs to
+ * based on the region zone */
+static struct rb_root *kbase_reg_flags_to_rbtree(struct kbase_context *kctx,
+						    struct kbase_va_region *reg)
+{
+	struct rb_root *rbtree = NULL;
+
+	switch (reg->flags & KBASE_REG_ZONE_MASK) {
+	case KBASE_REG_ZONE_CUSTOM_VA:
+		rbtree = &kctx->reg_rbtree_custom;
+		break;
+	case KBASE_REG_ZONE_EXEC:
+		rbtree = &kctx->reg_rbtree_exec;
+		break;
+	case KBASE_REG_ZONE_SAME_VA:
+		rbtree = &kctx->reg_rbtree_same;
+		/* fall through */
+	default:
+		rbtree = &kctx->reg_rbtree_same;
+		break;
+	}
+
+	return rbtree;
+}
+
+/* This function finds out which RB tree the given pfn from the GPU VA belongs
+ * to based on the memory zone the pfn refers to */
+static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
+								    u64 gpu_pfn)
+{
+	struct rb_root *rbtree = NULL;
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif /* CONFIG_64BIT */
+		if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE)
+			rbtree = &kctx->reg_rbtree_custom;
+		else if (gpu_pfn >= KBASE_REG_ZONE_EXEC_BASE)
+			rbtree = &kctx->reg_rbtree_exec;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+#ifdef CONFIG_64BIT
+	} else {
+		if (gpu_pfn >= kctx->same_va_end)
+			rbtree = &kctx->reg_rbtree_custom;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+	}
+#endif /* CONFIG_64BIT */
+
+	return rbtree;
+}
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx,
+						struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = NULL;
+	struct rb_node *parent = NULL;
+	struct rb_root *rbtree = NULL;
+
+	rbtree = kbase_reg_flags_to_rbtree(kctx, new_reg);
+
+	link = &(rbtree->rb_node);
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+
+	rb_insert_color(&(new_reg->rblink), rbtree);
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, start_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_root *rbtree = NULL;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+
+	rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs);
+
+	rbnode = rb_first(rbtree);
+
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE)) {
+			/* Check alignment */
+			u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+				return reg;
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+	struct rb_root *reg_rbtree = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	reg_rbtree = kbase_reg_flags_to_rbtree(kctx, reg);
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if (prev->flags & KBASE_REG_FREE) {
+			/* We're compatible with the previous VMA,
+			 * merge with it */
+			WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if (next->flags & KBASE_REG_FREE) {
+			WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_root *reg_rbtree = NULL;
+	int err = 0;
+
+	reg_rbtree = kbase_reg_flags_to_rbtree(kctx, at_reg);
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink),
+								reg_rbtree);
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+		if (!(tmp->flags & KBASE_REG_FREE)) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.  */
+	{
+		u64 start_pfn;
+
+		/*
+		 * Depending on the zone the allocation request is for
+		 * we might need to retry it.
+		 */
+		do {
+			tmp = kbase_region_tracker_find_region_meeting_reqs(
+					kctx, reg, nr_pages, align);
+			if (tmp) {
+				start_pfn = (tmp->start_pfn + align - 1) &
+						~(align - 1);
+				err = kbase_insert_va_region_nolock(kctx, reg,
+						tmp, start_pfn, nr_pages);
+				break;
+			}
+
+			/*
+			 * If the allocation is not from the same zone as JIT
+			 * then don't retry, we're out of VA and there is
+			 * nothing which can be done about it.
+			 */
+			if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+					KBASE_REG_ZONE_CUSTOM_VA)
+				break;
+		} while (kbase_jit_evict(kctx));
+
+		if (!tmp)
+			err = -ENOMEM;
+	}
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *exec_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree_same = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* Although exec and custom_va_reg don't always exist,
+	 * initialize unconditionally because of the mem_view debugfs
+	 * implementation which relies on these being empty */
+	kctx->reg_rbtree_exec = RB_ROOT;
+	kctx->reg_rbtree_custom = RB_ROOT;
+
+	if (exec_reg)
+		kbase_region_tracker_insert(kctx, exec_reg);
+	if (custom_va_reg)
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+}
+
+static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(rbtree);
+		if (rbnode) {
+			rb_erase(rbnode, rbtree);
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		same_va_bits = 32;
+	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+#endif
+
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
+		err = -EINVAL;
+		goto fail_unlock;
+	}
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have exec and custom VA zones */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_exec;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_exec:
+	kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
+{
+#ifdef CONFIG_64BIT
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits;
+	u64 total_va_size;
+	int err;
+
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return 0;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	if (same_va->nr_pages < jit_va_pages ||
+			kctx->same_va_end < jit_va_pages) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(kctx,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(kctx, custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	ret = kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV,
+			KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+			kbdev,
+			NULL);
+	if (ret)
+		return ret;
+
+	ret = kbase_mem_pool_init(&kbdev->lp_mem_pool,
+			(KBASE_MEM_POOL_MAX_SIZE_KBDEV >> 9),
+			KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+			kbdev,
+			NULL);
+	if (ret)
+		kbase_mem_pool_term(&kbdev->mem_pool);
+
+	return ret;
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_term(&kbdev->mem_pool);
+	kbase_mem_pool_term(&kbdev->lp_mem_pool);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(reg->kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					kctx->aliasing_sink_page,
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type ==
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		struct kbase_alloc_import_user_buf *user_buf =
+			&reg->gpu_alloc->imported.user_buf;
+
+		if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
+			user_buf->current_mapping_usage_count &=
+				~PINNED_ON_IMPORT;
+
+			kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
+					(reg->flags & KBASE_REG_GPU_WR));
+		}
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct vm_area_struct *vma;
+	struct kbase_cpu_mapping *map;
+	unsigned long vm_pgoff_in_region;
+	unsigned long vm_off_in_region;
+	unsigned long map_start;
+	size_t map_size;
+
+	lockdep_assert_held(&current->mm->mmap_sem);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	vma = find_vma_intersection(current->mm, uaddr, uaddr+size);
+
+	if (!vma || vma->vm_start > uaddr)
+		return NULL;
+	if (vma->vm_ops != &kbase_vm_ops)
+		/* Not ours! */
+		return NULL;
+
+	map = vma->vm_private_data;
+
+	if (map->kctx != kctx)
+		/* Not from this context! */
+		return NULL;
+
+	vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn;
+	vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT;
+	map_start = vma->vm_start - vm_off_in_region;
+	map_size = map->region->nr_pages << PAGE_SHIFT;
+
+	if ((uaddr + size) > (map_start + map_size))
+		/* Not within the CPU mapping */
+		return NULL;
+
+	*offset = (uaddr - vma->vm_start) + vm_off_in_region;
+
+	return map;
+}
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct kbase_cpu_mapping *map;
+
+	kbase_os_mem_map_lock(kctx);
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset);
+
+	kbase_os_mem_map_unlock(kctx);
+
+	if (!map)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+	phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa);
+	phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa);
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct basep_syncset *sset, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	struct tagged_addr *cpu_pa;
+	struct tagged_addr *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	u64 offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) ||
+			kbase_mem_is_imported(reg->gpu_alloc->type))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	page_off = offset >> PAGE_SHIFT;
+	offset &= ~PAGE_MASK;
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	if (page_off > reg->nr_pages ||
+			page_off + page_count > reg->nr_pages) {
+		/* Sync overflows the region */
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Sync first page */
+	if (as_phys_addr_t(cpu_pa[page_off])) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!as_phys_addr_t(cpu_pa[page_off + i]))
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 &&
+	    as_phys_addr_t(cpu_pa[page_off + page_count - 1])) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset)
+{
+	int err = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(sset != NULL);
+
+	if (sset->mem_handle.basep.handle & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev,
+				"mem_handle: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
+		dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid");
+		return -EINVAL;
+	}
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+			return -EINVAL;
+	} else if (flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	}
+
+	if (!(reg->flags & KBASE_REG_SHARE_BOTH) &&
+			flags & BASE_MEM_COHERENT_LOCAL) {
+		reg->flags |= KBASE_REG_SHARE_IN;
+	}
+
+	/* Set up default MEMATTR usage */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+
+	return 0;
+}
+
+int kbase_alloc_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t old_page_count = alloc->nents;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct tagged_addr *tp;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (alloc->reg) {
+		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+			goto invalid_request;
+	}
+
+	kctx = alloc->imported.kctx;
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + old_page_count;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Check if we have enough pages requested so we can allocate a large
+	 * page (512 * 4KB = 2MB )
+	 */
+	if (nr_left >= (SZ_2M / SZ_4K)) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages(&kctx->lp_mem_pool,
+						 nr_lp * (SZ_2M / SZ_4K),
+						 tp,
+						 true);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			mutex_lock(&kctx->mem_partials_lock);
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(sa->page +
+									   pidx),
+							      FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) {
+						/* unlink from partial list when full */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+			mutex_unlock(&kctx->mem_partials_lock);
+		}
+
+		/* only if we actually have a chunk left <512. If more it indicates
+		 * that we couldn't allocate a 2MB above, so no point to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it */
+			struct page *np = NULL;
+
+			do {
+				int err;
+
+				np = kbase_mem_pool_alloc(&kctx->lp_mem_pool);
+				if (np)
+					break;
+				err = kbase_mem_pool_grow(&kctx->lp_mem_pool, 1);
+				if (err)
+					break;
+			} while (1);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *sa;
+				struct page *p;
+
+				sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+				if (!sa) {
+					kbase_mem_pool_free(&kctx->lp_mem_pool, np, false);
+					goto no_new_partial;
+				}
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
+
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+
+				/* expose for later use */
+				mutex_lock(&kctx->mem_partials_lock);
+				list_add(&sa->link, &kctx->mem_partials);
+				mutex_unlock(&kctx->mem_partials_lock);
+			}
+		}
+	}
+no_new_partial:
+#endif
+
+	if (nr_left) {
+		res = kbase_mem_pool_alloc_pages(&kctx->mem_pool,
+						 nr_left,
+						 tp,
+						 false);
+		if (res <= 0)
+			goto alloc_failed;
+	}
+
+	/*
+	 * Request a zone cache update, this scans only the new pages an
+	 * appends their information to the zone cache. if the update
+	 * fails then clear the cache so we fall-back to doing things
+	 * page by page.
+	 */
+	if (kbase_zone_cache_update(alloc, old_page_count) != 0)
+		kbase_zone_cache_clear(alloc);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested)
+		kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
+				  nr_pages_requested - nr_left,
+				  alloc->pages + old_page_count,
+				  false,
+				  false);
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+	return -ENOMEM;
+}
+
+static void free_partial(struct kbase_context *kctx, struct tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	p = phys_to_page(as_phys_addr_t(tp));
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	mutex_lock(&kctx->mem_partials_lock);
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free(&kctx->lp_mem_pool, head_page, true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+	mutex_unlock(&kctx->mem_partials_lock);
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	struct tagged_addr *start_free;
+	int new_page_count __maybe_unused;
+	size_t freed = 0;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
+	/*
+	 * Clear the zone cache, we don't expect JIT allocations to be
+	 * shrunk in parts so there is no point trying to optimize for that
+	 * by scanning for the changes caused by freeing this memory and
+	 * updating the existing cache entries.
+	 */
+	kbase_zone_cache_clear(alloc);
+
+
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
+					512,
+					start_free,
+					syncback,
+					reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			free_partial(kctx, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+			       !is_huge(*local_end_free) &&
+			       !is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages(&kctx->mem_pool,
+					local_end_free - start_free,
+					start_free,
+					syncback,
+					reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = kbase_atomic_sub_pages(freed,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(freed,
+				       &kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				kctx->id,
+				(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		WARN_ON(!alloc->imported.kctx);
+		/*
+		 * The physical allocation must have been removed from the
+		 * eviction list before trying to free it.
+		 */
+		WARN_ON(!list_empty(&alloc->evict_node));
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+ #ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ump_dd_release(alloc->imported.ump_handle);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
+		kfree(alloc->imported.user_buf.pages);
+		break;
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+#ifdef CONFIG_DEBUG_FS
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_init(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_evict_lock);
+		if (list_empty(&kctx->jit_destroy_head)) {
+			mutex_unlock(&kctx->jit_evict_lock);
+			break;
+		}
+
+		reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		list_del(&reg->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+		kbase_mem_free_region(kctx, reg);
+		kbase_gpu_vm_unlock(kctx);
+	} while (1);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	INIT_LIST_HEAD(&kctx->jit_pending_alloc);
+	INIT_LIST_HEAD(&kctx->jit_atoms_head);
+
+	return 0;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+	struct kbase_va_region *walker;
+	struct kbase_va_region *temp;
+	size_t current_diff = SIZE_MAX;
+
+	int ret;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+
+		if (walker->nr_pages >= info->va_pages) {
+			size_t min_size, max_size, diff;
+
+			/*
+			 * The JIT allocations VA requirements have been
+			 * meet, it's suitable but other allocations
+			 * might be a better fit.
+			 */
+			min_size = min_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			max_size = max_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			diff = max_size - min_size;
+
+			if (current_diff > diff) {
+				current_diff = diff;
+				reg = walker;
+			}
+
+			/* The allocation is an exact match, stop looking */
+			if (current_diff == 0)
+				break;
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_move(&reg->jit_node, &kctx->jit_active_head);
+
+		/*
+		 * Remove the allocation from the eviction list as it's no
+		 * longer eligible for eviction. This must be done before
+		 * dropping the jit_evict_lock
+		 */
+		list_del_init(&reg->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+
+		/* Make the physical backing no longer reclaimable */
+		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+			goto update_failed;
+
+		/* Grow the backing if required */
+		if (reg->gpu_alloc->nents < info->commit_pages) {
+			size_t delta;
+			size_t old_size = reg->gpu_alloc->nents;
+
+			/* Allocate some more pages */
+			delta = info->commit_pages - reg->gpu_alloc->nents;
+			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
+					!= 0)
+				goto update_failed;
+
+			if (reg->cpu_alloc != reg->gpu_alloc) {
+				if (kbase_alloc_phy_pages_helper(
+						reg->cpu_alloc, delta) != 0) {
+					kbase_free_phy_pages_helper(
+							reg->gpu_alloc, delta);
+					goto update_failed;
+				}
+			}
+
+			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
+					info->commit_pages, old_size);
+			/*
+			 * The grow failed so put the allocation back in the
+			 * pool and return failure.
+			 */
+			if (ret)
+				goto update_failed;
+		}
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL;
+		u64 gpu_addr;
+
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr);
+		if (!reg)
+			goto out_unlocked;
+
+		mutex_lock(&kctx->jit_evict_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_evict_lock);
+	}
+
+	return reg;
+
+update_failed:
+	/*
+	 * An update to an allocation from the pool failed, chances
+	 * are slim a new allocation would fair any better so return
+	 * the allocation to the pool and return the function with failure.
+	 */
+	kbase_gpu_vm_unlock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	/* The physical backing of memory in the pool is always reclaimable */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mem_evictable_make(reg->gpu_alloc);
+	kbase_gpu_vm_unlock(kctx);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = reg->kctx;
+
+	lockdep_assert_held(&kctx->jit_evict_lock);
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	list_move(&reg->jit_node, &kctx->jit_destroy_head);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_evict_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	if (reg)
+		kbase_mem_free_region(kctx, reg);
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+
+	kbase_gpu_vm_lock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	struct tagged_addr *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct mm_struct *mm;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	mm = alloc->imported.user_buf.mm;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#else
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL, NULL);
+#endif
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = as_tagged(page_to_phys(pages[i]));
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+			kbase_reg_current_backed_size(reg),
+			reg->flags);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	while (++i < pinned_pages) {
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	struct tagged_addr *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		size_t j, pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = as_tagged(sg_dma_address(s) +
+				(j << PAGE_SHIFT));
+		WARN_ONCE(j < pages,
+			  "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (!(reg->flags & KBASE_REG_IMPORT_PAD) &&
+			WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto err_unmap_attachment;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = reg->nr_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			count,
+			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+	if (err)
+		goto err_unmap_attachment;
+
+	if (reg->flags & KBASE_REG_IMPORT_PAD) {
+		err = kbase_mmu_insert_single_page(kctx,
+				reg->start_pfn + count,
+				kctx->aliasing_sink_page,
+				reg->nr_pages - count,
+				(reg->flags | KBASE_REG_GPU_RD) &
+				~KBASE_REG_GPU_WR);
+		if (err)
+			goto err_teardown_orig_pages;
+	}
+
+	return 0;
+
+err_teardown_orig_pages:
+	kbase_mmu_teardown_pages(kctx, reg->start_pfn, count);
+err_unmap_attachment:
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+			alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+	case KBASE_MEM_TYPE_IMPORTED_UMP: {
+		break;
+	}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc) {
+				int err;
+
+				err = kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						alloc->nents);
+				WARN_ON(err);
+			}
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100644
index 0000000..815fab4
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1127 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	int      count;
+	int      free_on_close;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMP,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	struct tagged_addr    *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	struct list_head       zone_cache;
+
+	/* member in union valid based on @a type */
+	union {
+#ifdef CONFIG_UMP
+		ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+		struct kbase_alloc_import_user_buf {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			/* top bit (1<<31) of current_mapping_usage_count
+			 * specifies that this import was pinned on import
+			 * See PINNED_ON_IMPORT
+			 */
+			u32 current_mapping_usage_count;
+			struct mm_struct *mm;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is
+ * used to signify that a buffer was pinned when it was imported. Since the
+ * reference count is limited by the number of atoms that can be submitted at
+ * once there should be no danger of overflowing into this bit.
+ * Stealing the top bit also has the benefit that
+ * current_mapping_usage_count != 0 if and only if the buffer is mapped.
+ */
+#define PINNED_ON_IMPORT	(1<<31)
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+/**
+ * kbase_mem_is_imported - Indicate whether a memory type is imported
+ *
+ * @type: the memory type
+ *
+ * Return: true if the memory type is imported, false otherwise
+ */
+static inline bool kbase_mem_is_imported(enum kbase_memory_type type)
+{
+	return (type == KBASE_MEM_TYPE_IMPORTED_UMP) ||
+		(type == KBASE_MEM_TYPE_IMPORTED_UMM) ||
+		(type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* Bit 8 is unused */
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+/* Imported buffer is padded? */
+#define KBASE_REG_IMPORT_PAD        (1ul << 21)
+
+/* Bit 22 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags */
+#define KBASE_REG_RESERVED_BIT_22   (1ul << 22)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+/*
+ * Dedicated 16MB region for shader code:
+ * VA range 0x101000000-0x102000000
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_EXEC_BASE    (0x101000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+};
+
+/* Common functions */
+static inline struct tagged_addr *kbase_get_cpu_phy_pages(
+		struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline struct tagged_addr *kbase_get_gpu_phy_pages(
+		struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+	INIT_LIST_HEAD(&alloc->zone_cache);
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+	reg->cpu_alloc->imported.kctx = kctx;
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
+	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		reg->gpu_alloc->imported.kctx = kctx;
+		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	INIT_LIST_HEAD(&reg->jit_node);
+	reg->flags &= ~KBASE_REG_FREE;
+	return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * The order required for a 2MB page allocation (2^order * 4KB = 2MB)
+ */
+#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER	9
+
+/*
+ * The order required for a 4KB page allocation
+ */
+#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER	0
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @max_size:  Maximum number of free pages the pool can hold
+ * @order:     Page order for physical page size (order=0=>4kB, order=9=>2MB)
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		size_t order,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Return NULL if no memory in the pool
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ * @partial_allowed: If fewer pages allocated is allowed
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return:
+ * On success number of pages allocated (could be less than nr_pages if
+ * partial_allowed).
+ * On error an error code.
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool partial_allowed);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return ACCESS_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_grow - Grow the pool
+ * @pool:       Memory pool to grow
+ * @nr_to_grow: Number of pages to add to the pool
+ *
+ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
+ * become larger than the maximum size specified.
+ *
+ * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
+ */
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above the max_size for the pool.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ */
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+/**
+ * kbase_mem_alloc_page - Allocate a new page for a device
+ * @pool:  Memory pool to allocate a page from
+ *
+ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that
+ * function can fail in the event the pool is empty.
+ *
+ * Return: A new page or NULL if no memory
+ */
+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+
+/**
+ * kbase_update_region_flags - Convert user space flags to kernel region flags
+ *
+ * @kctx:  kbase context
+ * @reg:   The region to update the flags on
+ * @flags: The flags passed from user space
+ *
+ * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and
+ * this function will fail if the system does not support system coherency.
+ *
+ * Return: 0 if successful, -EINVAL if the flags are not supported
+ */
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  struct tagged_addr *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  struct tagged_addr *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
+ * @kctx:	Kbase context
+ *
+ * Disable and perform the required cache maintenance to remove the all
+ * data from provided kbase context from the GPU caches.
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified
+ * address space.
+ * @kbdev:	Kbase device
+ * @as_nr:	The address space number to set to unmapped.
+ *
+ * This function must only be called during reset/power-up and it used to
+ * ensure the registers are in a known state.
+ *
+ * The caller must hold kbdev->mmu_hw_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+/**
+ * kbase_sync_now - Perform cache maintenance on a memory region
+ *
+ * @kctx: The kbase context of the region
+ * @sset: A syncset structure describing the region and direction of the
+ *        synchronisation required
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset);
+void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa,
+		struct tagged_addr gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU
+ * mapping of a memory allocation containing a given address range
+ *
+ * Searches for a CPU mapping of any part of any region that fully encloses the
+ * CPU virtual address range specified by @uaddr and @size. Returns a failure
+ * indication if only part of the address range lies within a CPU mapping.
+ *
+ * @kctx:      The kernel base context used for the allocation.
+ * @uaddr:     Start of the CPU virtual address range.
+ * @size:      Size of the CPU virtual address range (in bytes).
+ * @offset:    The offset from the start of the allocation to the specified CPU
+ *             virtual address.
+ *
+ * Return: 0 if offset was obtained successfully. Error code otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_init(struct kbase_context *kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_zone_cache_update - Update the memory zone cache after new pages have
+ * been added.
+ * @alloc:        The physical memory allocation to build the cache for.
+ * @start_offset: Offset to where the new pages start.
+ *
+ * Updates an existing memory zone cache, updating the counters for the
+ * various zones.
+ * If the memory allocation doesn't already have a zone cache assume that
+ * one isn't created and thus don't do anything.
+ *
+ * Return: Zero cache was updated, negative error code on error.
+ */
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset);
+
+/**
+ * kbase_zone_cache_build - Build the memory zone cache.
+ * @alloc:        The physical memory allocation to build the cache for.
+ *
+ * Create a new zone cache for the provided physical memory allocation if
+ * one doesn't already exist, if one does exist then just return.
+ *
+ * Return: Zero if the zone cache was created, negative error code on error.
+ */
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_zone_cache_clear - Clear the memory zone cache.
+ * @alloc:        The physical memory allocation to clear the cache on.
+ */
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc);
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100644
index 0000000..37f7a6a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,2683 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+#include <linux/cache.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_ioctl.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ *
+ * Note: Caller must be holding the processes mmap_sem lock.
+ */
+static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va)
+{
+	int zone;
+	int gpu_pc_bits;
+	struct kbase_va_region *reg;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+
+	dev = kctx->kbdev->dev;
+	*gpu_va = 0; /* return 0 on failure */
+
+	gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+
+	if (0 == va_pages) {
+		dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+		goto bad_size;
+	}
+
+	if (va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+	    (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+		goto bad_ex_size;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & BASE_MEM_GROW_ON_GPF) {
+		reg->extent = extent;
+		if (reg->extent == 0)
+			goto invalid_extent;
+	}
+	else
+		reg->extent = 0;
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/*
+		 * 10.1-10.4 UKU userland relies on the kernel to call mmap.
+		 * For all other versions we can just return the cookie
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1) ||
+		    kctx->api_version > KBASE_API_VERSION(10, 4)) {
+			*gpu_va = (u64) cookie;
+			return reg;
+		}
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kbase_gpu_vm_lock(kctx);
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+
+		*gpu_va = (u64) cpu_addr;
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_mem:
+invalid_extent:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+invalid_flags:
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	if (gpu_addr & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->jit_evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->jit_evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+struct kbase_mem_zone_cache_entry {
+	/* List head used to link the cache entry to the memory allocation. */
+	struct list_head zone_node;
+	/* The zone the cacheline is for. */
+	struct zone *zone;
+	/* The number of pages in the allocation which belong to this zone. */
+	u64 count;
+};
+
+static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	struct kbase_mem_zone_cache_entry *cache = NULL;
+	size_t i;
+	int ret = 0;
+
+	for (i = start_offset; i < alloc->nents; i++) {
+		struct page *p = phys_to_page(as_phys_addr_t(alloc->pages[i]));
+		struct zone *zone = page_zone(p);
+		bool create = true;
+
+		if (cache && (cache->zone == zone)) {
+			/*
+			 * Fast path check as most of the time adjacent
+			 * pages come from the same zone.
+			 */
+			create = false;
+		} else {
+			/*
+			 * Slow path check, walk all the cache entries to see
+			 * if we already know about this zone.
+			 */
+			list_for_each_entry(cache, &alloc->zone_cache, zone_node) {
+				if (cache->zone == zone) {
+					create = false;
+					break;
+				}
+			}
+		}
+
+		/* This zone wasn't found in the cache, create an entry for it */
+		if (create) {
+			cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+			if (!cache) {
+				ret = -ENOMEM;
+				goto bail;
+			}
+			cache->zone = zone;
+			cache->count = 0;
+			list_add(&cache->zone_node, &alloc->zone_cache);
+		}
+
+		cache->count++;
+	}
+	return 0;
+
+bail:
+	return ret;
+}
+
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	/*
+	 * Bail if the zone cache is empty, only update the cache if it
+	 * existed in the first place.
+	 */
+	if (list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, start_offset);
+}
+
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc)
+{
+	/* Bail if the zone cache already exists */
+	if (!list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, 0);
+}
+
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_mem_zone_cache_entry *walker;
+
+	while(!list_empty(&alloc->zone_cache)){
+		walker = list_first_entry(&alloc->zone_cache,
+				struct kbase_mem_zone_cache_entry, zone_node);
+		list_del(&walker->zone_node);
+		kfree(walker);
+	}
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p;
+			struct zone *zone;
+
+			p = phys_to_page(as_phys_addr_t(alloc->pages[i]));
+			zone = page_zone(p);
+
+			zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 * then remove it from the reclaimable accounting. */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(-zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p;
+			struct zone *zone;
+
+			p = phys_to_page(as_phys_addr_t(alloc->pages[i]));
+			zone = page_zone(p);
+			zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* This alloction can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	mutex_lock(&kctx->jit_evict_lock);
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	list_del_init(&gpu_alloc->evict_node);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE))
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+	switch (reg->gpu_alloc->type) {
+#ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn,
+					     kbase_get_gpu_phy_pages(reg),
+				             reg->gpu_alloc->nents, reg->flags);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	/* roll back on error, i.e. not UMP */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	ump_dd_handle umph;
+	u64 block_count;
+	const ump_dd_physical_block_64 *block_array;
+	u64 i, j;
+	int page = 0;
+	ump_alloc_flags ump_flags;
+	ump_alloc_flags cpu_flags;
+	ump_alloc_flags gpu_flags;
+
+	if (*flags & BASE_MEM_SECURE)
+		goto bad_flags;
+
+	umph = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+		goto bad_id;
+
+	ump_flags = ump_dd_allocation_flags_get(umph);
+	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+			UMP_DEVICE_MASK;
+
+	*va_pages = ump_dd_size_get_64(umph);
+	*va_pages >>= PAGE_SHIFT;
+
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (*flags & BASE_MEM_SAME_VA)
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	else
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!reg)
+		goto no_region;
+
+	/* we've got pages to map now, and support SAME_VA */
+	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->gpu_alloc->imported.ump_handle = umph;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
+
+	/* Override import flags based on UMP flags */
+	*flags &= ~(BASE_MEM_CACHED_CPU);
+	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+		reg->flags |= KBASE_REG_CPU_CACHED;
+		*flags |= BASE_MEM_CACHED_CPU;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_WR) {
+		reg->flags |= KBASE_REG_CPU_WR;
+		*flags |= BASE_MEM_PROT_CPU_WR;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_RD) {
+		reg->flags |= KBASE_REG_CPU_RD;
+		*flags |= BASE_MEM_PROT_CPU_RD;
+	}
+
+	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+		reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (gpu_flags & UMP_PROT_DEVICE_WR) {
+		reg->flags |= KBASE_REG_GPU_WR;
+		*flags |= BASE_MEM_PROT_GPU_WR;
+	}
+
+	if (gpu_flags & UMP_PROT_DEVICE_RD) {
+		reg->flags |= KBASE_REG_GPU_RD;
+		*flags |= BASE_MEM_PROT_GPU_RD;
+	}
+
+	/* ump phys block query */
+	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+	for (i = 0; i < block_count; i++) {
+		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+			struct tagged_addr tagged;
+
+			tagged = as_tagged(block_array[i].addr +
+					   (j << PAGE_SHIFT));
+			reg->gpu_alloc->pages[page] = tagged;
+			page++;
+		}
+	}
+	reg->gpu_alloc->nents = *va_pages;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	ump_dd_release(umph);
+bad_id:
+bad_flags:
+	return NULL;
+}
+#endif				/* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
+		int fd, u64 *va_pages, u64 *flags, u32 padding)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	if (padding)
+		reg->flags |= KBASE_REG_IMPORT_PAD;
+
+	reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx)
+{
+	u32 cpu_cache_line_size = cache_line_size();
+	u32 gpu_cache_line_size =
+		(1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+
+	return ((cpu_cache_line_size > gpu_cache_line_size) ?
+				cpu_cache_line_size :
+				gpu_cache_line_size);
+}
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	long i;
+	struct kbase_va_region *reg;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
+	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx);
+	struct kbase_alloc_import_user_buf *user_buf;
+	struct page **pages = NULL;
+
+	if ((address & (cache_line_alignment - 1)) != 0 ||
+			(size & (cache_line_alignment - 1)) != 0) {
+		/* Coherency must be enabled to handle partial cache lines */
+		if (*flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+			/* Force coherent system required flag, import will
+			 * then fail if coherency isn't available
+			 */
+			*flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+		} else {
+			dev_warn(kctx->kbdev->dev,
+					"User buffer is not cache line aligned and no coherency enabled\n");
+			goto bad_size;
+		}
+	}
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	}
+
+	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages,
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	user_buf = &reg->gpu_alloc->imported.user_buf;
+
+	user_buf->size = size;
+	user_buf->address = address;
+	user_buf->nr_pages = *va_pages;
+	user_buf->mm = current->mm;
+	user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *),
+			GFP_KERNEL);
+
+	if (!user_buf->pages)
+		goto no_page_array;
+
+	/* If the region is coherent with the CPU then the memory is imported
+	 * and mapped onto the GPU immediately.
+	 * Otherwise get_user_pages is called as a sanity check, but with
+	 * NULL as the pages argument which will fault the pages, but not
+	 * pin them. The memory will then be pinned only around the jobs that
+	 * specify the region as an external resource.
+	 */
+	if (reg->flags & KBASE_REG_SHARE_BOTH) {
+		pages = user_buf->pages;
+		*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+	}
+
+	down_read(&current->mm->mmap_sem);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#else
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#endif
+
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	atomic_inc(&current->mm->mm_count);
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	if (pages) {
+		struct device *dev = kctx->kbdev->dev;
+		unsigned long local_size = user_buf->size;
+		unsigned long offset = user_buf->address & ~PAGE_MASK;
+		struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
+
+		/* Top bit signifies that this was pinned on import */
+		user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
+
+		for (i = 0; i < faulted_pages; i++) {
+			dma_addr_t dma_addr;
+			unsigned long min;
+
+			min = MIN(PAGE_SIZE - offset, local_size);
+			dma_addr = dma_map_page(dev, pages[i],
+					offset, min,
+					DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(dev, dma_addr))
+				goto unwind_dma_map;
+
+			user_buf->dma_addrs[i] = dma_addr;
+			pa[i] = as_tagged(page_to_phys(pages[i]));
+
+			local_size -= min;
+			offset = 0;
+		}
+
+		reg->gpu_alloc->nents = faulted_pages;
+	}
+
+	return reg;
+
+unwind_dma_map:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				user_buf->dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+fault_mismatch:
+	if (pages) {
+		for (i = 0; i < faulted_pages; i++)
+			put_page(pages[i]);
+	}
+	kfree(user_buf->pages);
+no_page_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
+		dev_warn(kctx->kbdev->dev,
+				"padding is only supported for UMM");
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_UMP
+	case BASE_MEM_IMPORT_TYPE_UMP: {
+		ump_secure_id id;
+
+		if (get_user(id, (ump_secure_id __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_ump(kctx, id, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags,
+					padding);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				uptr = compat_ptr(user_buffer.ptr);
+			else
+#endif
+				uptr = u64_to_user_ptr(user_buffer.ptr);
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	struct tagged_addr *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags);
+
+	return ret;
+}
+
+static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 gpu_va_start = reg->start_pfn;
+
+	if (new_pages == old_pages)
+		/* Nothing to do */
+		return;
+
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping,
+			(gpu_va_start + new_pages)<<PAGE_SHIFT,
+			(old_pages - new_pages)<<PAGE_SHIFT, 1);
+}
+
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx,
+			reg->start_pfn + new_pages, delta);
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	if (gpu_addr & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+		goto out_unlock;
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE))
+		goto out_unlock;
+
+	/* Would overflow the VA region */
+	if (new_pages > reg->nr_pages)
+		goto out_unlock;
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1)
+		goto out_unlock;
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				res = -ENOMEM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->free_on_close) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#else
+static int kbase_cpu_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+#endif
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+	pgoff_t addr;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	rel_pgoff = vmf->pgoff - map->region->start_pfn;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	i = rel_pgoff;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT);
+#else
+	addr = (pgoff_t)(vmf->address >> PAGE_SHIFT);
+#endif
+	while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) {
+		int ret = vm_insert_pfn(vma, addr << PAGE_SHIFT,
+		    PFN_DOWN(as_phys_addr_t(map->alloc->pages[i])));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+
+		i++; addr++;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	return VM_FAULT_SIGBUS;
+}
+
+const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	struct tagged_addr *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		unsigned long addr = vma->vm_start + aligned_offset;
+		u64 start_off = vma->vm_pgoff - reg->start_pfn +
+			(aligned_offset>>PAGE_SHIFT);
+
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			phys_addr_t phys;
+
+			phys = as_phys_addr_t(page_array[i + start_off]);
+			err = vm_insert_pfn(vma, addr, PFN_DOWN(phys));
+			if (WARN_ON(err))
+				break;
+
+			addr += PAGE_SIZE;
+		}
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->region = reg;
+	map->free_on_close = free_on_close;
+	map->kctx = reg->kctx;
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kbase_device_trace_buffer_install(kctx, tb, size);
+		if (err) {
+			vfree(tb);
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->cpu_alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+static int kbasep_reg_mmap(struct kbase_context *kctx,
+			   struct vm_area_struct *vma,
+			   struct kbase_va_region **regm,
+			   size_t *nr_pages, size_t *aligned_offset)
+
+{
+	int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+	struct kbase_va_region *reg;
+	int err = 0;
+
+	*aligned_offset = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n");
+
+	/* SAME_VA stuff, fetch the right region */
+	reg = kctx->pending_regions[cookie];
+	if (!reg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) {
+		/* incorrect mmap size */
+		/* leave the cookie for a potential later
+		 * mapping, or to be reclaimed later when the
+		 * context is freed */
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) ||
+	    (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) {
+		/* VM flags inconsistent with region flags */
+		err = -EPERM;
+		dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n",
+							__FILE__, __LINE__);
+		goto out;
+	}
+
+	/* adjust down nr_pages to what we have physically */
+	*nr_pages = kbase_reg_current_backed_size(reg);
+
+	if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
+						reg->nr_pages, 1) != 0) {
+		dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
+		/* Unable to map in GPU space. */
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+	/* no need for the cookie anymore */
+	kctx->pending_regions[cookie] = NULL;
+	kctx->cookies |= (1UL << cookie);
+
+	/*
+	 * Overwrite the offset with the region start_pfn, so we effectively
+	 * map from offset 0 in the region. However subtract the aligned
+	 * offset so that when user space trims the mapping the beginning of
+	 * the trimmed VMA has the correct vm_pgoff;
+	 */
+	vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT);
+out:
+	*regm = reg;
+	dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n");
+
+	return err;
+}
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg = NULL;
+	void *kaddr = NULL;
+	size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+
+	/* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+	vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		err = kbasep_reg_mmap(kctx, vma, &reg, &nr_pages,
+							&aligned_offset);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+					(u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages -
+					(vma->vm_pgoff - reg->start_pfn))) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (KBASE_MEM_TYPE_IMPORTED_UMM ==
+							reg->cpu_alloc->type) {
+				err = dma_buf_mmap(
+					reg->cpu_alloc->imported.umm.dma_buf,
+					vma, vma->vm_pgoff - reg->start_pfn);
+				goto out_unlock;
+			}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+		} else {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+	} /* default */
+	} /* switch */
+
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+static void kbasep_sync_mem_regions(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map, enum kbase_sync_type dest)
+{
+	size_t i;
+	off_t const offset = (uintptr_t)map->gpu_addr & ~PAGE_MASK;
+	size_t const page_count = PFN_UP(offset + map->size);
+
+	/* Sync first page */
+	size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size);
+	struct tagged_addr cpu_pa = map->cpu_pages[0];
+	struct tagged_addr gpu_pa = map->gpu_pages[0];
+
+	kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, dest);
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		cpu_pa = map->cpu_pages[i];
+		gpu_pa = map->gpu_pages[i];
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, dest);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1) {
+		cpu_pa = map->cpu_pages[page_count - 1];
+		gpu_pa = map->gpu_pages[page_count - 1];
+		sz = ((offset + map->size - 1) & ~PAGE_MASK) + 1;
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, dest);
+	}
+}
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	struct tagged_addr *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = phys_to_page(as_phys_addr_t(page_array[page_index +
+								  i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+	/* Note: enforcing a RO prot_request onto prot is not done, since:
+	 * - CPU-arch-specific integration required
+	 * - kbase_vmap() requires no access checks to be made/enforced */
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) &&
+		!kbase_mem_is_imported(map->gpu_alloc->type);
+
+	if (map->sync_needed)
+		kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	/* 0 is specified for prot_request to indicate no access checks should
+	 * be made.
+	 *
+	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
+	 * CPU-RO mapping is not enforced to allow this to work */
+	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	vunmap(addr);
+
+	if (map->sync_needed)
+		kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
+	map->gpu_addr = 0;
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->sync_needed = false;
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	struct tagged_addr *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	unsigned long attrs = DMA_ATTR_WRITE_COMBINE;
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+	u32 i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	if (kbase_update_region_flags(kctx, reg, flags) != 0)
+		goto invalid_flags;
+
+	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = as_tagged(dma_pa + ((dma_addr_t)i << PAGE_SHIFT));
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+invalid_flags:
+	kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+		       handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100644
index 0000000..db35f62
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,240 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va);
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+
+/**
+ * kbase_mem_commit - Change the physical backing size of a region
+ *
+ * @kctx: The kernel context
+ * @gpu_addr: Handle to the memory region
+ * @new_pages: Number of physical pages to back the region with
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages);
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	u64 gpu_addr;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	struct tagged_addr *cpu_pages;
+	struct tagged_addr *gpu_pages;
+	void *addr;
+	size_t size;
+	bool sync_needed;
+};
+
+
+/**
+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the
+ * requested access permissions are supported
+ * @kctx:         Context the VA range belongs to
+ * @gpu_addr:     Start address of VA range
+ * @size:         Size of VA range
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map:          Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check
+ * whether the region should allow the intended access, and return an error if
+ * disallowed. This is essential for security of imported memory, particularly
+ * a user buf from SHM mapped into the process as RO. In that case, write
+ * access must be checked if the intention is for kernel to write to the
+ * memory.
+ *
+ * The checks are also there to help catch access errors on memory where
+ * security is not a concern: imported memory that is always RW, and memory
+ * that was allocated and owned by the process attached to @kctx. In this case,
+ * it helps to identify memory that was was mapped with the wrong access type.
+ *
+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
+ * where either the security of memory is solely dependent on those flags, or
+ * when userspace code was expecting only the GPU to access the memory (e.g. HW
+ * workarounds).
+ *
+ * All cache maintenance operations shall be ignored if the
+ * memory region has been imported.
+ *
+ */
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vmap - Map a GPU VA range into the kernel safely
+ * @kctx:     Context the VA range belongs to
+ * @gpu_addr: Start address of VA range
+ * @size:     Size of VA range
+ * @map:      Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no
+ * checks to ensure the security of e.g. imported user bufs from RO SHM.
+ *
+ * Note: All cache maintenance operations shall be ignored if the memory region
+ * has been imported.
+ */
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vunmap - Unmap a GPU VA range from the kernel
+ * @kctx: Context the VA range belongs to
+ * @map:  Structure describing the mapping from the corresponding kbase_vmap()
+ *        call
+ *
+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained
+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * The reference taken on pages during kbase_vmap() is released.
+ *
+ * Note: All cache maintenance operations shall be ignored if the memory region
+ * has been imported.
+ */
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+extern const struct vm_operations_struct kbase_vm_ops;
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100644
index 0000000..f4e8849
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,89 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+struct tagged_addr { phys_addr_t tagged_addr; };
+
+#define HUGE_PAGE    (1u << 0)
+#define HUGE_HEAD    (1u << 1)
+#define FROM_PARTIAL (1u << 2)
+
+static inline phys_addr_t as_phys_addr_t(struct tagged_addr t)
+{
+	return t.tagged_addr & PAGE_MASK;
+}
+
+static inline struct tagged_addr as_tagged(phys_addr_t phys)
+{
+	struct tagged_addr t;
+
+	t.tagged_addr = phys & PAGE_MASK;
+	return t;
+}
+
+static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag)
+{
+	struct tagged_addr t;
+
+	t.tagged_addr = (phys & PAGE_MASK) | (tag & ~PAGE_MASK);
+	return t;
+}
+
+static inline bool is_huge(struct tagged_addr t)
+{
+	return t.tagged_addr & HUGE_PAGE;
+}
+
+static inline bool is_huge_head(struct tagged_addr t)
+{
+	int mask = HUGE_HEAD | HUGE_PAGE;
+
+	return mask == (t.tagged_addr & mask);
+}
+
+static inline bool is_partial(struct tagged_addr t)
+{
+	return t.tagged_addr & FROM_PARTIAL;
+}
+
+#endif /* _KBASE_LOWLEVEL_H */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100644
index 0000000..696730a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,651 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_for_each_entry(p, page_list, lru) {
+		zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+	}
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			(PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	int i;
+
+	for (i = 0; i < (1U << pool->order); i++)
+		clear_highpage(p+i);
+
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr;
+	int i;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	if (current->flags & PF_KTHREAD) {
+		/* Don't trigger OOM killer from kernel threads, e.g. when
+		 * growing memory on GPU page fault */
+		gfp |= __GFP_NORETRY;
+	}
+
+	/* don't warn on higer order failures */
+	if (pool->order)
+		gfp |= __GFP_NOWARN;
+
+	p = alloc_pages(gfp, pool->order);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order),
+				DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		__free_pages(p, pool->order);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+	for (i = 0; i < (1u << pool->order); i++)
+		kbase_set_dma_addr(p+i, dma_addr + PAGE_SIZE * i);
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+	int i;
+
+	dma_unmap_page(dev, dma_addr, (PAGE_SIZE << pool->order),
+		       DMA_BIDIRECTIONAL);
+	for (i = 0; i < (1u << pool->order); i++)
+		kbase_clear_dma_addr(p+i);
+	__free_pages(p, pool->order);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	for (i = 0; i < nr_to_grow; i++) {
+		p = kbase_mem_alloc_page(pool);
+		if (!p)
+			return -ENOMEM;
+		kbase_mem_pool_add(pool, p);
+	}
+
+	return 0;
+}
+
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+	int err = 0;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size > pool->max_size)
+		new_size = pool->max_size;
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		err = kbase_mem_pool_grow(pool, new_size - cur_size);
+
+	if (err) {
+		size_t grown_size = kbase_mem_pool_size(pool);
+
+		dev_warn(pool->kbdev->dev,
+			 "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n",
+			 (new_size - cur_size), (grown_size - cur_size));
+	}
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+	pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool));
+	return kbase_mem_pool_size(pool);
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		size_t order,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	pool->cur_size = 0;
+	pool->max_size = max_size;
+	pool->order = order;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			kbase_mem_pool_zero_page(pool, p);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	do {
+		pool_dbg(pool, "alloc()\n");
+		p = kbase_mem_pool_remove(pool);
+
+		if (p)
+			return p;
+
+		pool = pool->next_pool;
+	} while (pool);
+
+	return NULL;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
+		struct tagged_addr *pages, bool partial_allowed)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i = 0;
+	int err = -ENOMEM;
+	size_t nr_pages_internal;
+
+	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
+
+	if (nr_pages_internal * (1u << pool->order) != nr_4k_pages)
+		return -EINVAL;
+
+	pool_dbg(pool, "alloc_pages(4k=%zu):\n", nr_4k_pages);
+	pool_dbg(pool, "alloc_pages(internal=%zu):\n", nr_pages_internal);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
+	while (nr_from_pool--) {
+		int j;
+		p = kbase_mem_pool_remove_locked(pool);
+		if (pool->order) {
+			pages[i++] = as_tagged_tag(page_to_phys(p),
+						   HUGE_HEAD | HUGE_PAGE);
+			for (j = 1; j < (1u << pool->order); j++)
+				pages[i++] = as_tagged_tag(page_to_phys(p) +
+							   PAGE_SIZE * j,
+							   HUGE_PAGE);
+		} else {
+			pages[i++] = as_tagged(page_to_phys(p));
+		}
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_4k_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_4k_pages - i, pages + i, partial_allowed);
+
+		if (err < 0)
+			goto err_rollback;
+
+		i += err;
+	} else {
+		/* Get any remaining pages from kernel */
+		while (i != nr_4k_pages) {
+			p = kbase_mem_alloc_page(pool);
+			if (!p) {
+				if (partial_allowed)
+					goto done;
+				else
+					goto err_rollback;
+			}
+
+			if (pool->order) {
+				int j;
+
+				pages[i++] = as_tagged_tag(page_to_phys(p),
+							   HUGE_PAGE |
+							   HUGE_HEAD);
+				for (j = 1; j < (1u << pool->order); j++) {
+					phys_addr_t phys;
+
+					phys = page_to_phys(p) + PAGE_SIZE * j;
+					pages[i++] = as_tagged_tag(phys,
+								   HUGE_PAGE);
+				}
+			} else {
+				pages[i++] = as_tagged(page_to_phys(p));
+			}
+		}
+	}
+
+done:
+	pool_dbg(pool, "alloc_pages(%zu) done\n", i);
+
+	return i;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+				     size_t nr_pages, struct tagged_addr *pages,
+				     bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
+			p = phys_to_page(as_phys_addr_t(pages[i]));
+			if (zero)
+				kbase_mem_pool_zero_page(pool, p);
+			else if (sync)
+				kbase_mem_pool_sync_page(pool, p);
+
+			list_add(&p->lru, &new_page_list);
+			nr_to_pool++;
+		}
+		pages[i] = as_tagged(0);
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge(pages[i]) && !is_huge_head(pages[i])) {
+			pages[i] = as_tagged(0);
+			continue;
+		}
+
+		p = phys_to_page(as_phys_addr_t(pages[i]));
+
+		if (reclaimed)
+			zone_page_state_add(-1, page_zone(p),
+					NR_SLAB_RECLAIMABLE);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = as_tagged(0);
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100644
index 0000000..319cf25
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,88 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <mali_kbase_mem_pool_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_trim(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
+		kbase_mem_pool_debugfs_size_get,
+		kbase_mem_pool_debugfs_size_set,
+		"%llu\n");
+
+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_max_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_set_max_size(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
+		kbase_mem_pool_debugfs_max_size_get,
+		kbase_mem_pool_debugfs_max_size_set,
+		"%llu\n");
+
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool,
+		struct kbase_mem_pool *lp_pool)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_max_size_fops);
+
+	debugfs_create_file("lp_mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			lp_pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("lp_mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			lp_pool, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100644
index 0000000..496eaf3
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H
+#define _KBASE_MEM_POOL_DEBUGFS_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool
+ * @parent:  Parent debugfs dentry
+ * @pool:    Memory pool of small pages to control
+ * @lp_pool: Memory pool of large pages to control
+ *
+ * Adds four debugfs files under @parent:
+ * - mem_pool_size: get/set the current size of @pool
+ * - mem_pool_max_size: get/set the max size of @pool
+ * - lp_mem_pool_size: get/set the current size of @lp_pool
+ * - lp_mem_pool_max_size: get/set the max size of @lp_pool
+ */
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool,
+		struct kbase_mem_pool *lp_pool);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100644
index 0000000..d58fd8d
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,121 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+		kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		if (!debugfs_create_file("mem_profile", S_IRUGO,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kbase_ctx_flag_set(kctx,
+					   KCTX_MEM_PROFILE_INITIALIZED);
+		}
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	} else {
+		kfree(data);
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+		err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100644
index 0000000..a1dc2e0
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100644
index 0000000..82f0702
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100644
index 0000000..1694779
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,2149 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_time.h>
+#include <mali_kbase_mem.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ *
+ * If sync is not set then transactions still in flight when the flush is issued
+ * may use the old page tables and the data they write will not be written out
+ * to memory, this function returns after the flush has been issued but
+ * before all accesses which might effect the flushed region have completed.
+ *
+ * If sync is set then accesses in the flushed region will be drained
+ * before data is flush and invalidated through L1, L2 and into memory,
+ * after which point this function will return.
+ */
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync);
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+/**
+ * make_multiple() - Calculate the nearest integral multiple of a given number.
+ *
+ * @minimum: The number to round up.
+ * @multiple: The number of which the function shall calculate an integral multiple.
+ * @result: The number rounded up to the nearest integral multiple in case of success,
+ *          or just the number itself in case of failure.
+ *
+ * Return: 0 in case of success, or -1 in case of failure.
+ */
+static int make_multiple(size_t minimum, size_t multiple, size_t *result)
+{
+	int err = -1;
+
+	*result = minimum;
+
+	if (multiple > 0) {
+		size_t remainder = minimum % multiple;
+
+		if (remainder != 0)
+			*result = minimum + multiple - remainder;
+		err = 0;
+	}
+
+	return err;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	if (unlikely(faulting_as->protected_mode))
+	{
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Protected mode fault");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+
+		goto fault_done;
+	}
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Translation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		else
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Unknown fault code");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		else
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Unknown fault code");
+		goto fault_done;
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"DMA-BUF is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	err = make_multiple(fault_rel_pfn - kbase_reg_current_backed_size(region) + 1,
+			region->extent,
+			&new_pages);
+	WARN_ON(err);
+
+	/* cap to max vsize */
+	new_pages = min(new_pages, region->nr_pages - kbase_reg_current_backed_size(region));
+
+	if (0 == new_pages) {
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
+		if (region->gpu_alloc != region->cpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					region->cpu_alloc, new_pages) == 0) {
+				grown = true;
+			} else {
+				kbase_free_phy_pages_helper(region->gpu_alloc,
+						new_pages);
+			}
+		} else {
+			grown = true;
+		}
+	}
+
+
+	if (grown) {
+		u64 pfn_offset;
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
+		/*
+		 * Note:
+		 * Issuing an MMU operation will unlock the MMU and cause the
+		 * translation to be replayed. If the page insertion fails then
+		 * rather then trying to continue the context should be killed
+		 * so the no_flush version of insert_pages is used which allows
+		 * us to unlock the MMU as we see fit.
+		 */
+		err = kbase_mmu_insert_pages_no_flush(kctx,
+				region->start_pfn + pfn_offset,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				new_pages, region->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+		KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* failed to extend, handle as a normal PF */
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Page allocation failure");
+	}
+
+fault_done:
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	p = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!p)
+		goto sub_pages;
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
+ * new table from the pool if needed and possible
+ */
+static int mmu_get_next_pgd(struct kbase_context *kctx,
+		phys_addr_t *pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(*pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(*pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return -EINVAL;
+	}
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return -ENOMEM;
+		}
+
+		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	*pgd = target_pgd;
+
+	return 0;
+}
+
+/*
+ * Returns the PGD for the specified level of translation
+ */
+static int mmu_get_pgd_at_level(struct kbase_context *kctx,
+					u64 vpfn,
+					unsigned int level,
+					phys_addr_t *out_pgd)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
+		int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l);
+		/* Handle failure condition */
+		if (err) {
+			dev_dbg(kctx->kbdev->dev,
+				 "%s: mmu_get_next_pgd failure at level %d\n",
+				 __func__, l);
+			return err;
+		}
+	}
+
+	*out_pgd = pgd;
+
+	return 0;
+}
+
+#define mmu_get_bottom_pgd(kctx, vpfn, out_pgd) \
+	mmu_get_pgd_at_level((kctx), (vpfn), MIDGARD_MMU_BOTTOMLEVEL, (out_pgd))
+
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx,
+					      u64 from_vpfn, u64 to_vpfn)
+{
+	phys_addr_t pgd;
+	u64 vpfn = from_vpfn;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+	KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (vpfn < to_vpfn) {
+		unsigned int i;
+		unsigned int idx = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
+		unsigned int pcount = 0;
+		unsigned int left = to_vpfn - vpfn;
+		unsigned int level;
+		u64 *page;
+
+		if (count > left)
+			count = left;
+
+		/* need to check if this is a 2MB page or a 4kB */
+		pgd = kctx->pgd;
+
+		for (level = MIDGARD_MMU_TOPLEVEL;
+				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
+			idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+			page = kmap(phys_to_page(pgd));
+			if (mmu_mode->ate_is_valid(page[idx], level))
+				break; /* keep the mapping */
+			kunmap(phys_to_page(pgd));
+			pgd = mmu_mode->pte_to_phy_addr(page[idx]);
+		}
+
+		switch (level) {
+		case MIDGARD_MMU_LEVEL(2):
+			/* remap to single entry to update */
+			pcount = 1;
+			break;
+		case MIDGARD_MMU_BOTTOMLEVEL:
+			/* page count is the same as the logical count */
+			pcount = count;
+			break;
+		default:
+			dev_warn(kctx->kbdev->dev, "%sNo support for ATEs at level %d\n",
+			       __func__, level);
+			goto next;
+		}
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < pcount; i++)
+			mmu_mode->entry_invalidate(&page[idx + i]);
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				   kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
+				   8 * pcount);
+		kunmap(phys_to_page(pgd));
+
+next:
+		vpfn += count;
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_vpfn +
+								  recover_count
+								  );
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_vpfn +
+								  recover_count
+								  );
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			/* Fail if the current page is a valid ATE entry */
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+
+			mmu_mode->entry_set_ate(&pgd_page[ofs],
+						phys, flags,
+						MIDGARD_MMU_BOTTOMLEVEL);
+		}
+
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+static inline void cleanup_empty_pte(struct kbase_context *kctx, u64 *pte)
+{
+	phys_addr_t tmp_pgd;
+	struct page *tmp_p;
+
+	tmp_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(*pte);
+	tmp_p = phys_to_page(tmp_pgd);
+	kbase_mem_pool_free(&kctx->mem_pool, tmp_p, false);
+	kbase_process_page_usage_dec(kctx, 1);
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+}
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx,
+				    const u64 start_vpfn,
+				    struct tagged_addr *phys, size_t nr,
+				    unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	u64 insert_vpfn = start_vpfn;
+	size_t remain = nr;
+	int err;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(start_vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int vindex = insert_vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
+		struct page *p;
+		unsigned int cur_level;
+
+		if (count > remain)
+			count = remain;
+
+		if (!vindex && is_huge_head(*phys))
+			cur_level = MIDGARD_MMU_LEVEL(2);
+		else
+			cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+
+		/*
+		 * Repeatedly calling mmu_get_pgd_at_level() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_pgd_at_level(kctx, insert_vpfn, cur_level,
+						   &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					cur_level);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+
+		if (err) {
+			dev_warn(kctx->kbdev->dev,
+				 "%s: mmu_get_bottom_pgd failure\n", __func__);
+			if (insert_vpfn != start_vpfn) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  start_vpfn,
+								  insert_vpfn);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "%s: kmap failure\n",
+				 __func__);
+			if (insert_vpfn != start_vpfn) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  start_vpfn,
+								  insert_vpfn);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
+			unsigned int level_index = (insert_vpfn >> 9) & 0x1FF;
+			u64 *target = &pgd_page[level_index];
+
+			if (mmu_mode->pte_is_valid(*target, cur_level))
+				cleanup_empty_pte(kctx, target);
+			mmu_mode->entry_set_ate(target, *phys, flags,
+						cur_level);
+		} else {
+			for (i = 0; i < count; i++) {
+				unsigned int ofs = vindex + i;
+				u64 *target = &pgd_page[ofs];
+
+				/* Fail if the current page is a valid ATE entry
+				 */
+				KBASE_DEBUG_ASSERT(0 == (*target & 1UL));
+
+				kctx->kbdev->mmu_mode->entry_set_ate(target,
+						phys[i], flags, cur_level);
+			}
+		}
+
+		phys += count;
+		insert_vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (vindex * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  struct tagged_addr *phys, size_t nr,
+				  unsigned long flags)
+{
+	int err;
+
+	err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
+ */
+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	u32 op;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+				&kbdev->as[kctx->as_nr],
+				kctx, vpfn, nr, op, 0);
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to
+		 * recover */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * As this function could be called in interrupt context the sync
+	 * request can't block. Instead log the request and the next flush
+	 * request will pick it up.
+	 */
+	if ((!err) && sync &&
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
+		atomic_set(&kctx->drain_pending, 1);
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+#ifndef CONFIG_MALI_NO_MALI
+	bool drain_pending = false;
+
+	if (atomic_xchg(&kctx->drain_pending, 0))
+		drain_pending = true;
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	kbdev = kctx->kbdev;
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		if (!kbase_pm_context_active_handle_suspend(kbdev,
+			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+			int err;
+			u32 op;
+
+			/* AS transaction begin */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+
+			if (sync)
+				op = AS_COMMAND_FLUSH_MEM;
+			else
+				op = AS_COMMAND_FLUSH_PT;
+
+			err = kbase_mmu_hw_do_operation(kbdev,
+						&kbdev->as[kctx->as_nr],
+						kctx, vpfn, nr, op, 0);
+
+#if KBASE_GPU_RESET_EN
+			if (err) {
+				/* Flush failed to complete, assume the
+				 * GPU has hung and perform a reset to
+				 * recover */
+				dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+				if (kbase_prepare_to_reset_gpu(kbdev))
+					kbase_reset_gpu(kbdev);
+			}
+#endif /* KBASE_GPU_RESET_EN */
+
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+			/* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+			/*
+			 * The transaction lock must be dropped before here
+			 * as kbase_wait_write_flush could take it if
+			 * the GPU was powered down (static analysis doesn't
+			 * know this can't happen).
+			 */
+			drain_pending |= (!err) && sync &&
+					kbase_hw_has_issue(kctx->kbdev,
+							BASE_HW_ISSUE_6367);
+			if (drain_pending) {
+				/* Wait for GPU to flush write buffer */
+				kbase_wait_write_flush(kctx);
+			}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+			kbase_pm_context_idle(kbdev);
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
+	 */
+	kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err = -EFAULT;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		unsigned int pcount;
+		unsigned int level;
+		u64 *page;
+
+		if (count > nr)
+			count = nr;
+
+		/* need to check if this is a 2MB or a 4kB page */
+		pgd = kctx->pgd;
+
+		for (level = MIDGARD_MMU_TOPLEVEL;
+				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
+			phys_addr_t next_pgd;
+
+			index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+			page = kmap(phys_to_page(pgd));
+			if (mmu_mode->ate_is_valid(page[index], level))
+				break; /* keep the mapping */
+			else if (!mmu_mode->pte_is_valid(page[index], level)) {
+				/* nothing here, advance */
+				switch (level) {
+				case MIDGARD_MMU_LEVEL(0):
+					count = 134217728;
+					break;
+				case MIDGARD_MMU_LEVEL(1):
+					count = 262144;
+					break;
+				case MIDGARD_MMU_LEVEL(2):
+					count = 512;
+					break;
+				case MIDGARD_MMU_LEVEL(3):
+					count = 1;
+					break;
+				}
+				if (count > nr)
+					count = nr;
+				goto next;
+			}
+			next_pgd = mmu_mode->pte_to_phy_addr(page[index]);
+			kunmap(phys_to_page(pgd));
+			pgd = next_pgd;
+		}
+
+		switch (level) {
+		case MIDGARD_MMU_LEVEL(0):
+		case MIDGARD_MMU_LEVEL(1):
+			dev_warn(kctx->kbdev->dev,
+				 "%s: No support for ATEs at level %d\n",
+				 __func__, level);
+			kunmap(phys_to_page(pgd));
+			goto out;
+		case MIDGARD_MMU_LEVEL(2):
+			/* can only teardown if count >= 512 */
+			if (count >= 512) {
+				pcount = 1;
+			} else {
+				dev_warn(kctx->kbdev->dev,
+					 "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
+					 __func__, count);
+				pcount = 0;
+			}
+			break;
+		case MIDGARD_MMU_BOTTOMLEVEL:
+			/* page count is the same as the logical count */
+			pcount = count;
+			break;
+		default:
+			dev_err(kctx->kbdev->dev,
+				"%s: found non-mapped memory, early out\n",
+				__func__);
+			vpfn += count;
+			nr -= count;
+			continue;
+		}
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < pcount; i++)
+			mmu_mode->entry_invalidate(&page[index + i]);
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				   kbase_dma_addr(phys_to_page(pgd)) +
+				   8 * index, 8*pcount);
+
+next:
+		kunmap(phys_to_page(pgd));
+		vpfn += count;
+		nr -= count;
+	}
+	err = 0;
+out:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
+			vpfn, phys, nr);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev,
+				 "mmu_get_bottom_pgd failure\n");
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+						flags, MIDGARD_MMU_BOTTOMLEVEL);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd,
+			       int level, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	struct page *p;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize
+	 * kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
+				mmu_teardown_level(kctx,
+						   target_pgd,
+						   level + 1,
+						   pgd_page_buffer +
+						   (PAGE_SIZE / sizeof(u64)));
+			}
+		}
+	}
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	kbase_mem_pool_free(&kctx->mem_pool, p, true);
+	kbase_process_page_usage_dec(kctx, 1);
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	mutex_init(&kctx->mmu_lock);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	int new_page_count = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	mutex_lock(&kctx->mmu_lock);
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL,
+			   kctx->mmu_teardown_pages);
+	mutex_unlock(&kctx->mmu_lock);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	mutex_lock(&kctx->mmu_lock);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t dump_size, size = 0;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+			size += sizeof(config);
+		}
+
+		dump_size = kbasep_mmu_dump_level(kctx,
+				kctx->pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!dump_size)
+			goto fail_free;
+
+		size += dump_size;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+
+		if (size > (nr_pages * PAGE_SIZE)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	if (unlikely(faulting_as->protected_mode))
+	{
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+		atomic_dec(&kbdev->faults_pending);
+		return;
+
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		unsigned long flags;
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Set the MMU into unmapped mode */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_disable(kctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "PERMISSION_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+		e = "ACCESS_FLAG";
+		break;
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "ACCESS_FLAG";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "ADDRESS_SIZE_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "MEMORY_ATTRIBUTES_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			return "ATOMIC";
+		else
+			return "UNKNOWN";
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status: 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_mmu_disable(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold hwaccess_lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the hwaccess_lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+#if KBASE_GPU_RESET_EN
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+#endif /* KBASE_GPU_RESET_EN */
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			dev_warn(kbdev->dev,
+					"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+					as->number, as->fault_addr,
+					as->fault_extra_addr);
+		else
+			dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+					as->number, as->fault_addr);
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		WARN_ON(!queue_work(as->pf_wq, &as->work_busfault));
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault));
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100644
index 0000000..986e959
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
new file mode 100644
index 0000000..0fb717b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -0,0 +1,214 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+#include "mali_kbase_defs.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+/* For valid ATEs bit 1 = ((level == 3) ? 1 : 0).
+ * Valid ATE entries at level 3 are flagged with the value 3.
+ * Valid ATE entries at level 0-2 are flagged with the value 1.
+ */
+#define ENTRY_IS_ATE_L3		3ULL
+#define ENTRY_IS_ATE_L02	1ULL
+#define ENTRY_IS_INVAL		2ULL
+#define ENTRY_IS_PTE		3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_ACCESS_RW (1ULL << 6)     /* bits 6:7 */
+#define ENTRY_ACCESS_RO (3ULL << 6)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register.
+	 */
+	setup->memattr =
+		(AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
+			(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
+			(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_WRITE_ALLOC           <<
+			(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF   <<
+			(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_WA         <<
+			(AS_MEMATTR_INDEX_OUTER_WA * 8));
+
+	setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK;
+	setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = 0ULL;
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate, unsigned int level)
+{
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3);
+	else
+		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02);
+}
+
+static int pte_is_valid(u64 pte, unsigned int level)
+{
+	/* PTEs cannot exist at the bottom level */
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		return false;
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* Set access flags - note that AArch64 stage 1 does not support
+	 * write-only access, so we use read/write instead
+	 */
+	if (flags & KBASE_REG_GPU_WR)
+		mmu_flags |= ENTRY_ACCESS_RW;
+	else if (flags & KBASE_REG_GPU_RD)
+		mmu_flags |= ENTRY_ACCESS_RO;
+
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry,
+		struct tagged_addr phy,
+		unsigned long flags,
+		unsigned int level)
+{
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		page_table_entry_set(entry, as_phys_addr_t(phy) |
+				get_mmu_flags(flags) |
+				ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3);
+	else
+		page_table_entry_set(entry, as_phys_addr_t(phy) |
+				get_mmu_flags(flags) |
+				ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & PAGE_MASK) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const aarch64_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
+{
+	return &aarch64_mode;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100644
index 0000000..f080fdc
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,199 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+#include "mali_kbase_defs.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated
+	 * memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+	setup->transcfg = 0;
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate, unsigned int level)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte, unsigned int level)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry,
+		struct tagged_addr phy,
+		unsigned long flags,
+		unsigned int level)
+{
+	page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) |
+			     ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100644
index 0000000..0152b35
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,119 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_register);
+
+void kbase_platform_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_unregister);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100644
index 0000000..97d5434
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,205 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_vinstr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1)
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+
+	/* Resume vinstr operation */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100644
index 0000000..37fa247
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100644
index 0000000..7fb674e
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
new file mode 100644
index 0000000..c970650
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+
+#include "mali_kbase_regs_history_debugfs.h"
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+#include <linux/debugfs.h>
+
+
+static int regs_history_size_get(void *data, u64 *val)
+{
+	struct kbase_io_history *const h = data;
+
+	*val = h->size;
+
+	return 0;
+}
+
+static int regs_history_size_set(void *data, u64 val)
+{
+	struct kbase_io_history *const h = data;
+
+	return kbase_io_history_resize(h, (u16)val);
+}
+
+
+DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
+		regs_history_size_get,
+		regs_history_size_set,
+		"%llu\n");
+
+
+/**
+ * regs_history_show - show callback for the register access history file.
+ *
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to dump all recent accesses to the GPU registers.
+ *
+ * @return 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int regs_history_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_io_history *const h = sfile->private;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!h->enabled) {
+		seq_puts(sfile, "The register access history is disabled\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	iters = (h->size > h->count) ? h->count : h->size;
+	seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+out:
+	return 0;
+}
+
+
+/**
+ * regs_history_open - open operation for regs_history debugfs file
+ *
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * @return file descriptor
+ */
+static int regs_history_open(struct inode *in, struct file *file)
+{
+	return single_open(file, &regs_history_show, in->i_private);
+}
+
+
+static const struct file_operations regs_history_fops = {
+	.open = &regs_history_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history.enabled);
+	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history, &regs_history_size_fops);
+	debugfs_create_file("regs_history", S_IRUGO,
+			kbdev->mali_debugfs_directory, &kbdev->io_history,
+			&regs_history_fops);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
new file mode 100644
index 0000000..f108370
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Header file for register access history support via debugfs
+ *
+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
+ *
+ * Usage:
+ * - regs_history_enabled: whether recording of register accesses is enabled.
+ *   Write 'y' to enable, 'n' to disable.
+ * - regs_history_size: size of the register history buffer, must be > 0
+ * - regs_history: return the information about last accesses to the registers.
+ */
+
+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H
+#define _KBASE_REGS_HISTORY_DEBUGFS_H
+
+struct kbase_device;
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/**
+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history
+ *
+ * @kbdev: Pointer to kbase_device containing the register history
+ */
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_regs_history_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif  /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100644
index 0000000..203a065
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1150 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct fragment_job {
+	struct job_descriptor_header header;
+
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_descriptor_header *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next_job._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next_job._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+	job = &frag_job->header;
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->exception_status = JOB_NOT_STARTED;
+
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
+
+	if (job->job_type == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->job_barrier = 1;
+
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
+
+	job->job_index += hw_job_id_offset;
+
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
+	} else {
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
+		else
+			fbd_address = (u64)frag_job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
+
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
+		else
+			jc = job->next_job._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list = 0;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload = NULL;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+
+		int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP;
+		int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC;
+		int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+		int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+
+		if (t_atom_type != BASE_JD_REQ_T) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x",
+			    t_atom_type, BASE_JD_REQ_T);
+		}
+		if (f_atom_type != BASE_JD_REQ_FS) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n",
+			    f_atom_type, BASE_JD_REQ_FS);
+		}
+		if (t_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n");
+		}
+		if (f_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n");
+		}
+
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom, NULL);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_descriptor_header *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+		dump_job_head(kctx, "\njob_head structure:\n", job);
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
+		else
+			job_header = job->next_job._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	/* Don't replay this atom if these issues are not present in the
+	 * hardware */
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) &&
+			!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) {
+		dev_dbg(kbdev->dev, "Hardware does not need replay workaround");
+
+		/* Signal failure to userspace */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+
+		return false;
+	}
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100644
index 0000000..43175c8
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100644
index 0000000..9bff3d2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100644
index 0000000..127ada0
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1513 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif
+#include <linux/dma-mapping.h>
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/cache.h>
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->queue, &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_del(&katom->queue);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed.
+	 */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule timeout of this atom after a period if it is not active */
+	if (!timer_pending(&kctx->soft_job_timeout)) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_job_timeout_ms);
+		mod_timer(&kctx->soft_job_timeout,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}
+}
+
+static int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*status = *mapped_evt;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	if ((new_status != BASE_JD_SOFT_EVENT_SET) &&
+	    (new_status != BASE_JD_SOFT_EVENT_RESET))
+		return -EINVAL;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*mapped_evt = new_status;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_vmap_struct map;
+	void *user_result;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] was previously in use
+		 * for 'waiting_soft_jobs'.
+		 */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* GPU_WR access is checked on the range for returning the result to
+	 * userspace for the following reasons:
+	 * - security, this is currently how imported user bufs are checked.
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
+	if (!user_result)
+		return 0;
+
+	memcpy(user_result, &data, sizeof(data));
+
+	kbase_vunmap(kctx, &map);
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+/* Called by the explicit fence mechanism when a fence wait has completed */
+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+#endif
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, queue);
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			if (katom->jc == evt) {
+				list_del(&katom->queue);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet.
+				 */
+				cancel_timer = 0;
+			}
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			/* Keep the timer running if fence debug is enabled and
+			 * there are waiting fence jobs.
+			 */
+			cancel_timer = 0;
+			break;
+#endif
+		}
+	}
+
+	if (cancel_timer)
+		del_timer(&kctx->soft_job_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep;
+
+		list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) {
+			if (dep->status == KBASE_JD_ATOM_STATE_UNUSED ||
+			    dep->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				continue;
+
+			if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					== BASE_JD_REQ_SOFT_FENCE_TRIGGER) {
+				/* Found blocked trigger fence. */
+				struct kbase_sync_fence_info info;
+
+				if (!kbase_sync_fence_in_info_get(dep, &info)) {
+					dev_warn(dev,
+						 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+						 kbase_jd_atom_id(kctx, dep),
+						 info.fence,
+						 info.name,
+						 kbase_sync_status_string(info.status));
+				 }
+			}
+
+			kbase_fence_debug_check_atom(dep);
+		}
+	}
+}
+
+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = katom->kctx->kbdev->dev;
+	int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms);
+	unsigned long lflags;
+	struct kbase_sync_fence_info info;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+
+	if (kbase_sync_fence_in_info_get(katom, &info)) {
+		/* Fence must have signaled just after timeout. */
+		spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+		return;
+	}
+
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+		 kctx->tgid, kctx->id,
+		 kbase_jd_atom_id(kctx, katom),
+		 info.fence, timeout_ms);
+	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+		 info.fence, info.name,
+		 kbase_sync_status_string(info.status));
+
+	/* Search for blocked trigger atoms */
+	kbase_fence_debug_check_atom(katom);
+
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	kbase_sync_fence_in_dump(katom);
+}
+
+struct kbase_fence_debug_work {
+	struct kbase_jd_atom *katom;
+	struct work_struct work;
+};
+
+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work)
+{
+	struct kbase_fence_debug_work *w = container_of(work,
+			struct kbase_fence_debug_work, work);
+	struct kbase_jd_atom *katom = w->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_fence_debug_wait_timeout(katom);
+	mutex_unlock(&kctx->jctx.lock);
+
+	kfree(w);
+}
+
+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_debug_work *work;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Enqueue fence debug worker. Use job_done_wq to get
+	 * debug print ordered with job completion.
+	 */
+	work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC);
+	/* Ignore allocation failure. */
+	if (work) {
+		work->katom = katom;
+		INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker);
+		queue_work(kctx->jctx.job_done_wq, &work->work);
+	}
+}
+#endif /* CONFIG_MALI_FENCE_DEBUG */
+
+void kbasep_soft_job_timeout_worker(unsigned long data)
+{
+	struct kbase_context *kctx = (struct kbase_context *)data;
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_job_timeout_ms);
+	struct timer_list *timer = &kctx->soft_job_timeout;
+	ktime_t cur_time = ktime_get();
+	bool restarting = false;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(entry,
+				struct kbase_jd_atom, queue);
+		s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time,
+					katom->start_timestamp));
+
+		if (elapsed_time < (s64)timeout_ms) {
+			restarting = true;
+			continue;
+		}
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			/* Take it out of the list to ensure that it
+			 * will be cancelled in all cases
+			 */
+			list_del(&katom->queue);
+
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			INIT_WORK(&katom->work, kbasep_soft_event_complete_job);
+			queue_work(kctx->jctx.job_done_wq, &katom->work);
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			kbase_fence_debug_timeout(katom);
+			break;
+#endif
+		}
+	}
+
+	if (restarting)
+		mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	kbasep_add_waiting_with_timeout(katom);
+
+	return 1;
+}
+
+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+/**
+ * kbase_soft_event_update() - Update soft event state
+ * @kctx: Pointer to context
+ * @event: Event to update
+ * @new_status: New status value of event
+ *
+ * Update the event, and wake up any atoms waiting for the event.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+int kbase_soft_event_update(struct kbase_context *kctx,
+			     u64 event,
+			     unsigned char new_status)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->jctx.lock);
+
+	if (kbasep_write_soft_event_status(kctx, event, new_status)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, event);
+
+out:
+	mutex_unlock(&kctx->jctx.lock);
+
+	return err;
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc;
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+		if (gpu_alloc) {
+			switch (gpu_alloc->type) {
+			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+			{
+				free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->jc = 0;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		katom->jc = 0;
+		goto out_cleanup;
+	}
+	katom->jc = (u64)(uintptr_t)buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret) {
+		ret = -EFAULT;
+		goto out_cleanup;
+	}
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, user_extres.ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		if (NULL == reg || NULL == reg->gpu_alloc ||
+				(reg->flags & KBASE_REG_FREE)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		buffers[i].nr_extres_pages = reg->nr_pages;
+
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages)
+				goto out_unlock;
+			ret = 0;
+			break;
+		}
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+		{
+			dev_warn(katom->kctx->kbdev->dev,
+					"UMP is not supported for debug_copy jobs\n");
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	kfree(user_buffers);
+
+	return ret;
+}
+
+static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	size_t dma_to_copy;
+	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	if (!gpu_alloc) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+		if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE)
+			dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch");
+
+		dma_to_copy = min(dma_buf->size,
+			(size_t)(buf_data->nr_extres_pages * PAGE_SIZE));
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, dma_to_copy,
+#endif
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < dma_to_copy/PAGE_SIZE; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, dma_to_copy,
+#endif
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	struct kbase_context *kctx = katom->kctx;
+	int ret;
+
+	/* Fail the job if there is no info structure */
+	if (!data) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* If the ID is zero then fail the job */
+	if (info->id == 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & 0x7) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Replace the user pointer with our kernel allocated info structure */
+	katom->jc = (u64)(uintptr_t) info;
+	katom->jit_blocked = false;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(info);
+fail:
+	katom->jc = 0;
+	return ret;
+}
+
+static u8 kbase_jit_free_get_id(struct kbase_jd_atom *katom)
+{
+	if (WARN_ON(katom->core_req != BASE_JD_REQ_SOFT_JIT_FREE))
+		return 0;
+
+	return (u8) katom->jc;
+}
+
+static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr, new_addr;
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+
+	/* The JIT ID is still in use so fail the allocation */
+	if (kctx->jit_alloc[info->id]) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return 0;
+	}
+
+	/* Create a JIT allocation */
+	reg = kbase_jit_allocate(kctx, info);
+	if (!reg) {
+		struct kbase_jd_atom *jit_atom;
+		bool can_block = false;
+
+		lockdep_assert_held(&kctx->jctx.lock);
+
+		jit_atom = list_first_entry(&kctx->jit_atoms_head,
+				struct kbase_jd_atom, jit_node);
+
+		list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) {
+			if (jit_atom == katom)
+				break;
+			if (jit_atom->core_req == BASE_JD_REQ_SOFT_JIT_FREE) {
+				u8 free_id = kbase_jit_free_get_id(jit_atom);
+
+				if (free_id && kctx->jit_alloc[free_id]) {
+					/* A JIT free which is active and
+					 * submitted before this atom
+					 */
+					can_block = true;
+					break;
+				}
+			}
+		}
+
+		if (!can_block) {
+			/* Mark the allocation so we know it's in use even if
+			 * the allocation itself fails.
+			 */
+			kctx->jit_alloc[info->id] =
+				(struct kbase_va_region *) -1;
+
+			katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+			return 0;
+		}
+
+		/* There are pending frees for an active allocation
+		 * so we should wait to see whether they free the memory.
+		 * Add to the beginning of the list to ensure that the atom is
+		 * processed only once in kbase_jit_free_finish
+		 */
+		list_add(&katom->queue, &kctx->jit_pending_alloc);
+		katom->jit_blocked = true;
+
+		return 1;
+	}
+
+	/*
+	 * Write the address of the JIT allocation to the user provided
+	 * GPU allocation.
+	 */
+	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+			&mapping);
+	if (!ptr) {
+		/*
+		 * Leave the allocation "live" as the JIT free jit will be
+		 * submitted anyway.
+		 */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return 0;
+	}
+
+	new_addr = reg->start_pfn << PAGE_SHIFT;
+	*ptr = new_addr;
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
+			katom, info->gpu_alloc_addr, new_addr);
+	kbase_vunmap(kctx, &mapping);
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	/*
+	 * Bind it to the user provided ID. Do this last so we can check for
+	 * the JIT free racing this JIT alloc job.
+	 */
+	kctx->jit_alloc[info->id] = reg;
+
+	return 0;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Remove atom from jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static int kbase_jit_free_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	return 0;
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 id = kbase_jit_free_get_id(katom);
+
+	/*
+	 * If the ID is zero or it is not in use yet then fail the job.
+	 */
+	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	/*
+	 * If the ID is valid but the allocation request failed still succeed
+	 * this soft job but don't try and free the allocation.
+	 */
+	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+		kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+	kctx->jit_alloc[id] = NULL;
+}
+
+static void kbasep_jit_free_finish_worker(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_finish_soft_job(katom);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
+{
+	struct list_head *i, *tmp;
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	/* Remove this atom from the kctx->jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	list_for_each_safe(i, tmp, &kctx->jit_pending_alloc) {
+		struct kbase_jd_atom *pending_atom = list_entry(i,
+				struct kbase_jd_atom, queue);
+		if (kbase_jit_allocate_process(pending_atom) == 0) {
+			/* Atom has completed */
+			INIT_WORK(&pending_atom->work,
+					kbasep_jit_free_finish_worker);
+			queue_work(kctx->jctx.job_done_wq, &pending_atom->work);
+		}
+	}
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	/*
+	 * Replace the user pointer with our kernel allocated
+	 * ext_res structure.
+	 */
+	katom->jc = (u64)(uintptr_t) ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (i > 0) {
+		u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+
+		--i;
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		katom->event_code = kbase_sync_fence_out_trigger(katom,
+				katom->event_code == BASE_JD_EVENT_DONE ?
+								0 : -EFAULT);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+	{
+		int ret = kbase_sync_fence_in_wait(katom);
+
+		if (ret == 1) {
+#ifdef CONFIG_MALI_FENCE_DEBUG
+			kbasep_add_waiting_with_timeout(katom);
+#else
+			kbasep_add_waiting_soft_job(katom);
+#endif
+		}
+		return ret;
+	}
+#endif
+
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		return kbasep_soft_event_wait(katom);
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_process(katom);
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_sync_fence_in_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (!IS_ALIGNED(katom->jc, cache_line_size()))
+				return -EINVAL;
+		}
+		break;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_sync_fence_out_create(katom,
+							 fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				kbase_sync_fence_out_remove(katom);
+				kbase_sync_fence_close_fd(fd);
+				fence.basep.fd = -EINVAL;
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+			int ret;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			ret = kbase_sync_fence_in_from_fd(katom,
+							  fence.basep.fd);
+			if (ret < 0)
+				return ret;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			/*
+			 * Set KCTX_NO_IMPLICIT_FENCE in the context the first
+			 * time a soft fence wait job is observed. This will
+			 * prevent the implicit dma-buf fence to conflict with
+			 * the Android native sync fences.
+			 */
+			if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC))
+				kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC);
+#endif /* CONFIG_MALI_DMA_FENCE */
+		}
+		break;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_REPLAY:
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		return kbase_jit_free_prepare(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signaled, do it now */
+		kbase_sync_fence_out_trigger(katom, katom->event_code ==
+				BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release katom's reference to fence object */
+		kbase_sync_fence_in_remove(katom);
+		break;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		kbasep_remove_waiting_soft_job(katom_iter);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		} else {
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
new file mode 100644
index 0000000..c98762c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
@@ -0,0 +1,23 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include "mali_kbase_strings.h"
+
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
+const char kbase_drv_name[] = KBASE_DRV_NAME;
+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
new file mode 100644
index 0000000..41b8fdb
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
@@ -0,0 +1,19 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+extern const char kbase_drv_name[];
+extern const char kbase_timeline_name[];
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100644
index 0000000..de72147
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,203 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ * This file contains our internal "API" for explicit fences.
+ * It hides the implementation details of the actual explicit fence mechanism
+ * used (Android fences or sync file with DMA fences).
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include <linux/syscalls.h>
+#ifdef CONFIG_SYNC
+#include <sync.h>
+#endif
+#ifdef CONFIG_SYNC_FILE
+#include "mali_kbase_fence_defs.h"
+#include <linux/sync_file.h>
+#endif
+
+#include "mali_kbase.h"
+
+/**
+ * struct kbase_sync_fence_info - Information about a fence
+ * @fence: Pointer to fence (type is void*, as underlaying struct can differ)
+ * @name: The name given to this fence when it was created
+ * @status: < 0 means error, 0 means active, 1 means signaled
+ *
+ * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get()
+ * to get the information.
+ */
+struct kbase_sync_fence_info {
+	void *fence;
+	char name[32];
+	int status;
+};
+
+/**
+ * kbase_sync_fence_stream_create() - Create a stream object
+ * @name: Name of stream (only used to ease debugging/visualization)
+ * @out_fd: A file descriptor representing the created stream object
+ *
+ * Can map down to a timeline implementation in some implementations.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd);
+
+/**
+ * kbase_sync_fence_out_create Create an explicit output fence to specified atom
+ * @katom: Atom to assign the new explicit fence to
+ * @stream_fd: File descriptor for stream object to create fence on
+ *
+ * return: Valid file descriptor to fence or < 0 on error
+ */
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd);
+
+/**
+ * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom
+ * @katom: Atom to assign the existing explicit fence to
+ * @fd: File descriptor to an existing fence
+ *
+ * Assigns an explicit input fence to atom.
+ * This can later be waited for by calling @kbase_sync_fence_in_wait
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd);
+
+/**
+ * kbase_sync_fence_validate() - Validate a fd to be a valid fence
+ * @fd: File descriptor to check
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ *
+ * return 0: if fd is for a valid fence, < 0 if invalid
+ */
+int kbase_sync_fence_validate(int fd);
+
+/**
+ * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom
+ * @katom: Atom with an explicit fence to signal
+ * @result: < 0 means signal with error, 0 >= indicates success
+ *
+ * Signal output fence attached on katom and remove the fence from the atom.
+ *
+ * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE
+ */
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result);
+
+/**
+ * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled
+ * @katom: Atom with explicit fence to wait for
+ *
+ * If the fence is already signaled, then 0 is returned, and the caller must
+ * continue processing of the katom.
+ *
+ * If the fence isn't already signaled, then this kbase_sync framework will
+ * take responsibility to continue the processing once the fence is signaled.
+ *
+ * return: 0 if already signaled, otherwise 1
+ */
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits
+ * @katom: Atom to cancel wait for
+ *
+ * This function is fully responsible for continuing processing of this atom
+ * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all)
+ */
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_in_remove() - Remove the input fence from the katom
+ * @katom: Atom to remove explicit input fence for
+ *
+ * This will also release the corresponding reference.
+ */
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_out_remove() - Remove the output fence from the katom
+ * @katom: Atom to remove explicit output fence for
+ *
+ * This will also release the corresponding reference.
+ */
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence
+ * @fd: File descriptor to close
+ */
+static inline void kbase_sync_fence_close_fd(int fd)
+{
+	sys_close(fd);
+}
+
+/**
+ * kbase_sync_fence_in_info_get() - Retrieves information about input fence
+ * @katom: Atom to get fence information from
+ * @info: Struct to be filled with fence information
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info);
+
+/**
+ * kbase_sync_fence_out_info_get() - Retrieves information about output fence
+ * @katom: Atom to get fence information from
+ * @info: Struct to be filled with fence information
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				  struct kbase_sync_fence_info *info);
+
+/**
+ * kbase_sync_status_string() - Get string matching @status
+ * @status: Value of fence status.
+ *
+ * return: Pointer to string describing @status.
+ */
+const char *kbase_sync_status_string(int status);
+
+/*
+ * Internal worker used to continue processing of atom.
+ */
+void kbase_sync_fence_wait_worker(struct work_struct *data);
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+/**
+ * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state
+ * @katom: Atom to trigger fence debug dump for
+ */
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom);
+#endif
+
+#endif /* MALI_KBASE_SYNC_H */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c
new file mode 100644
index 0000000..d7349dc
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c
@@ -0,0 +1,537 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Code for supporting explicit Android fences (CONFIG_SYNC)
+ * Known to be good for kernels 4.5 and earlier.
+ * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels
+ * (see mali_kbase_sync_file.c)
+ */
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signaled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatibility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+static struct mali_sync_timeline *to_mali_sync_timeline(
+						struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt),
+						sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(
+							sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signaled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signaled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+static struct sync_timeline *mali_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops,
+				  sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signaled, 0);
+
+	return tl;
+}
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	if (!out_fd)
+		return -EINVAL;
+
+	tl = mali_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are
+ * allocated.
+ */
+static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent,
+					    sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+	katom->fence = sync_fence_fdget(fd);
+	if (katom->fence == NULL) {
+		/* The only way the fence can be NULL is if userspace closed it
+		 * for us, so we don't need to clear it up */
+		fd = -EINVAL;
+		goto out;
+	}
+
+out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
+{
+	katom->fence = sync_fence_fdget(fd);
+	return katom->fence ? 0 : -ENOENT;
+}
+
+int kbase_sync_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+/* Returns true if the specified timeline is allocated by Mali */
+static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are
+ * allocated.
+ *
+ * If they are signaled in the wrong order then a message will be printed in
+ * debug builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as
+ * success.
+ */
+static void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(
+							sync_pt_parent(pt));
+	int signaled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signaled = atomic_read(&mtl->signaled);
+
+		diff = signaled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signaling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signaled,
+				signaled, mpt->order) != signaled);
+}
+
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+	if (!katom->fence)
+		return BASE_JD_EVENT_JOB_CANCELLED;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly)
+		 * come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head,
+			      struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	kbase_sync_fence_out_remove(katom);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static inline int kbase_fence_get_status(struct sync_fence *fence)
+{
+	if (!fence)
+		return -ENOENT;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	return fence->status;
+#else
+	return atomic_read(&fence->status);
+#endif
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence,
+				      struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter,
+					struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+	if (kbase_fence_get_status(fence) < 0)
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the
+	 * job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding
+	 * kctx->jctx.lock and the callbacks are run synchronously from
+	 * sync_timeline_signal. So we simply defer the work.
+	 */
+
+	INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signaled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	return 1;
+}
+
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for
+		 * kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->fence) {
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+	}
+}
+
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->fence) {
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+	}
+}
+
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+	if (!katom->fence)
+		return -ENOENT;
+
+	info->fence = katom->fence;
+	info->status = kbase_fence_get_status(katom->fence);
+	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	return 0;
+}
+
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+	if (!katom->fence)
+		return -ENOENT;
+
+	info->fence = katom->fence;
+	info->status = kbase_fence_get_status(katom->fence);
+	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
+{
+	/* Dump out the full state of all the Android sync fences.
+	 * The function sync_dump() isn't exported to modules, so force
+	 * sync_fence_wait() to time out to trigger sync_dump().
+	 */
+	if (katom->fence)
+		sync_fence_wait(katom->fence, 1);
+}
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
new file mode 100644
index 0000000..457def2
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * @file mali_kbase_sync_common.c
+ *
+ * Common code for our explicit fence functionality
+ */
+
+#include <linux/workqueue.h>
+#include "mali_kbase.h"
+
+void kbase_sync_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kbase_soft_event_wait_callback(katom);
+}
+
+const char *kbase_sync_status_string(int status)
+{
+	if (status == 0)
+		return "signaled";
+	else if (status > 0)
+		return "active";
+	else
+		return "error";
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
new file mode 100644
index 0000000..cb00c33
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
@@ -0,0 +1,348 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE)
+ * Introduced in kernel 4.9.
+ * Android explicit fences (CONFIG_SYNC) can be used for older kernels
+ * (see mali_kbase_sync_android.c)
+ */
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/sync_file.h>
+#include <linux/slab.h>
+#include "mali_kbase_fence_defs.h"
+#include "mali_kbase_sync.h"
+#include "mali_kbase_fence.h"
+#include "mali_kbase.h"
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE
+};
+
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
+{
+	if (!out_fd)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, NULL,
+				   O_RDONLY | O_CLOEXEC);
+	if (*out_fd < 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct sync_file *sync_file;
+	int fd;
+
+	fence = kbase_fence_out_new(katom);
+	if (!fence)
+		return -ENOMEM;
+
+	/* Take an extra reference to the fence on behalf of the katom.
+	 * This is needed because sync_file_create() will take ownership of
+	 * one of these refs */
+	dma_fence_get(fence);
+
+	/* create a sync_file fd representing the fence */
+	sync_file = sync_file_create(fence);
+	if (!sync_file) {
+		dma_fence_put(fence);
+		kbase_fence_out_remove(katom);
+		return -ENOMEM;
+	}
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		fput(sync_file->file);
+		kbase_fence_out_remove(katom);
+		return fd;
+	}
+
+	fd_install(fd, sync_file->file);
+
+	return fd;
+}
+
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence = sync_file_get_fence(fd);
+#else
+	struct dma_fence *fence = sync_file_get_fence(fd);
+#endif
+
+	if (!fence)
+		return -ENOENT;
+
+	kbase_fence_fence_in_set(katom, fence);
+
+	return 0;
+}
+
+int kbase_sync_fence_validate(int fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence = sync_file_get_fence(fd);
+#else
+	struct dma_fence *fence = sync_file_get_fence(fd);
+#endif
+
+	if (!fence)
+		return -EINVAL;
+
+	dma_fence_put(fence);
+
+	return 0; /* valid */
+}
+
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
+{
+	int res;
+
+	if (!kbase_fence_out_is_ours(katom)) {
+		/* Not our fence */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	res = kbase_fence_out_signal(katom, result);
+	if (unlikely(res < 0)) {
+		dev_warn(katom->kctx->kbdev->dev,
+				"fence_signal() failed with %d\n", res);
+	}
+
+	kbase_sync_fence_out_remove(katom);
+
+	return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static void kbase_fence_wait_callback(struct fence *fence,
+				      struct fence_cb *cb)
+#else
+static void kbase_fence_wait_callback(struct dma_fence *fence,
+				      struct dma_fence_cb *cb)
+#endif
+{
+	struct kbase_fence_cb *kcb = container_of(cb,
+				struct kbase_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Cancel atom if fence is erroneous */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68))
+	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error)
+#else
+	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0)
+#endif
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	if (kbase_fence_dep_count_dec_and_test(katom)) {
+		/* We take responsibility of handling this */
+		kbase_fence_dep_count_set(katom, -1);
+
+		/* To prevent a potential deadlock we schedule the work onto the
+		 * job_done_wq workqueue
+		 *
+		 * The issue is that we may signal the timeline while holding
+		 * kctx->jctx.lock and the callbacks are run synchronously from
+		 * sync_timeline_signal. So we simply defer the work.
+		 */
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(kctx->jctx.job_done_wq, &katom->work);
+	}
+}
+
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
+{
+	int err;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_in_get(katom);
+	if (!fence)
+		return 0; /* no input fence to wait for, good to go! */
+
+	kbase_fence_dep_count_set(katom, 1);
+
+	err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback);
+
+	kbase_fence_put(fence);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (kbase_fence_dep_count_dec_and_test(katom)) {
+			kbase_fence_free_callbacks(katom);
+			kbase_fence_dep_count_set(katom, -1);
+			return 0; /* Already signaled, good to go right now */
+		}
+
+		/* Callback installed, so we just need to wait for it... */
+	} else {
+		/* Failure */
+		kbase_fence_free_callbacks(katom);
+		kbase_fence_dep_count_set(katom, -1);
+
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	return 1; /* completion to be done later by callback/worker */
+}
+
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (!kbase_fence_free_callbacks(katom)) {
+		/* The wait wasn't cancelled -
+		 * leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Take responsibility of completion */
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	kbase_fence_out_remove(katom);
+}
+
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	kbase_fence_free_callbacks(katom);
+	kbase_fence_in_remove(katom);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static void kbase_sync_fence_info_get(struct fence *fence,
+				      struct kbase_sync_fence_info *info)
+#else
+static void kbase_sync_fence_info_get(struct dma_fence *fence,
+				      struct kbase_sync_fence_info *info)
+#endif
+{
+	info->fence = fence;
+
+	/* translate into CONFIG_SYNC status:
+	 * < 0 : error
+	 * 0 : active
+	 * 1 : signaled
+	 */
+	if (dma_fence_is_signaled(fence)) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68))
+		int status = fence->error;
+#else
+		int status = fence->status;
+#endif
+		if (status < 0)
+			info->status = status; /* signaled with error */
+		else
+			info->status = 1; /* signaled with success */
+	} else  {
+		info->status = 0; /* still active (unsignaled) */
+	}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	scnprintf(info->name, sizeof(info->name), "%u#%u",
+		  fence->context, fence->seqno);
+#else
+	scnprintf(info->name, sizeof(info->name), "%llu#%u",
+		  fence->context, fence->seqno);
+#endif
+}
+
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_in_get(katom);
+	if (!fence)
+		return -ENOENT;
+
+	kbase_sync_fence_info_get(fence, info);
+
+	kbase_fence_put(fence);
+
+	return 0;
+}
+
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				  struct kbase_sync_fence_info *info)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_out_get(katom);
+	if (!fence)
+		return -ENOENT;
+
+	kbase_sync_fence_info_get(fence, info);
+
+	kbase_fence_put(fence);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
+{
+	/* Not implemented */
+}
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100644
index 0000000..d01aa23
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,2572 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_NDEP_ATOM_ATOM,
+	KBASE_TL_RDEP_ATOM_ATOM,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY,
+	KBASE_TL_ATTRIB_ATOM_STATE,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
+	KBASE_TL_ATTRIB_ATOM_JIT,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+	KBASE_TL_EVENT_LPU_SOFTSTOP,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+
+	/* Job dump specific events. */
+	KBASE_JD_GPU_SOFT_RESET
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC,
+	KBASE_AUX_DEVFREQ_TARGET,
+	KBASE_AUX_PROTECTED_ENTER_START,
+	KBASE_AUX_PROTECTED_ENTER_END,
+	KBASE_AUX_PROTECTED_LEAVE_START,
+	KBASE_AUX_PROTECTED_LEAVE_END
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: counter tracking stream's autoflush state
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pII",
+		"ctx,ctx_nr,tgid"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_NEW_AS,
+		__stringify(KBASE_TL_NEW_AS),
+		"address space object is created",
+		"@pI",
+		"address_space,as_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"ctx"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_LIFELINK_AS_GPU,
+		__stringify(KBASE_TL_LIFELINK_AS_GPU),
+		"address space is deleted with gpu",
+		"@pp",
+		"address_space,gpu"
+	},
+	{
+		KBASE_TL_RET_CTX_LPU,
+		__stringify(KBASE_TL_RET_CTX_LPU),
+		"context is retained by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pps",
+		"atom,lpu,attrib_match_list"
+	},
+	{
+		KBASE_TL_NRET_CTX_LPU,
+		__stringify(KBASE_TL_NRET_CTX_LPU),
+		"context is released by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_RET_AS_CTX,
+		__stringify(KBASE_TL_RET_AS_CTX),
+		"address space is retained by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_NRET_AS_CTX,
+		__stringify(KBASE_TL_NRET_AS_CTX),
+		"address space is released by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_AS,
+		__stringify(KBASE_TL_RET_ATOM_AS),
+		"atom is retained by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_NRET_ATOM_AS,
+		__stringify(KBASE_TL_NRET_ATOM_AS),
+		"atom is released by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_DEP_ATOM_ATOM,
+		__stringify(KBASE_TL_DEP_ATOM_ATOM),
+		"atom2 depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_NDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
+		"atom2 no longer depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_RDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
+		"resolved dependecy of atom2 depending on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
+		"atom job slot attributes",
+		"@pLLI",
+		"atom,descriptor,affinity,config"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY),
+		"atom priority",
+		"@pI",
+		"atom,prio"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_STATE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_STATE),
+		"atom state",
+		"@pI",
+		"atom,state"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE),
+		"atom caused priority change",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_JIT,
+		__stringify(KBASE_TL_ATTRIB_ATOM_JIT),
+		"jit done for atom",
+		"@pLL",
+		"atom,edit_addr,new_addr"
+	},
+	{
+		KBASE_TL_ATTRIB_AS_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
+		"address space attributes",
+		"@pLLL",
+		"address_space,transtab,memattr,transcfg"
+	},
+	{
+		KBASE_TL_EVENT_LPU_SOFTSTOP,
+		__stringify(KBASE_TL_EVENT_LPU_SOFTSTOP),
+		"softstop event on given lpu",
+		"@p",
+		"lpu"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX),
+		"atom softstopped",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+		__stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE),
+		"atom softstop issued",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_JD_GPU_SOFT_RESET,
+		__stringify(KBASE_JD_GPU_SOFT_RESET),
+		"gpu soft reset",
+		"@p",
+		"gpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@IL",
+		"ctx_nr,page_cnt_change"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@IL",
+		"ctx_nr,page_cnt"
+	},
+	{
+		KBASE_AUX_DEVFREQ_TARGET,
+		__stringify(KBASE_AUX_DEVFREQ_TARGET),
+		"New device frequency target",
+		"@L",
+		"target_freq"
+	},
+	{
+		KBASE_AUX_PROTECTED_ENTER_START,
+		__stringify(KBASE_AUX_PROTECTED_ENTER_START),
+		"enter protected mode start",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_ENTER_END,
+		__stringify(KBASE_AUX_PROTECTED_ENTER_END),
+		"enter protected mode end",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_LEAVE_START,
+		__stringify(KBASE_AUX_PROTECTED_LEAVE_START),
+		"leave protected mode start",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_LEAVE_END,
+		__stringify(KBASE_AUX_PROTECTED_LEAVE_END),
+		"leave protected mode end",
+		"@p",
+		"gpu"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags) __acquires(&stream->lock)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags) __releases(&stream->lock)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @data:  unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(data);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (wb_size > min_size) {
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(f_pos);
+
+	if (!buffer)
+		return -EINVAL;
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw = 0;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the rbi still points to the packet we just processed
+		 * then there was no overflow so we add the copied size to
+		 * copy_len and move rbi on to the next packet
+		 */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = SWTRACE_VERSION; /* protocol version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 0);
+	setup_timer(&autoflush_timer,
+			kbasep_tlstream_autoflush_timer_callback,
+			0);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	unsigned int                    as_nr;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu);
+	}
+
+	/* Create Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev);
+	}
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(
+				element->kctx,
+				element->kctx->id,
+				(u32)(element->kctx->tgid));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream.
+	 */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space.
+	 */
+	kbase_tlstream_flush_streams();
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags)
+{
+	int ret;
+	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
+
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) {
+		int rcode;
+
+		ret = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (ret < 0) {
+			atomic_set(&kbase_tlstream_enabled, 0);
+			return ret;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+		/* If job dumping is enabled, readjust the software event's
+		 * timeout as the default value of 3 seconds is often
+		 * insufficient. */
+		if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) {
+			dev_info(kctx->kbdev->dev,
+					"Job dumping is enabled, readjusting the software event's timeout\n");
+			atomic_set(&kctx->kbdev->js_data.soft_job_timeout_ms,
+					1800000);
+		}
+
+		/* Summary stream was cleared during acquire.
+		 * Create static timeline objects that will be
+		 * read by client.
+		 */
+		kbase_create_timeline_objects(kctx);
+
+	} else {
+		ret = -EBUSY;
+	}
+
+	return ret;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+			sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+			strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t  msg_size =
+			sizeof(msg_id) + sizeof(u64) +
+			sizeof(atom) + sizeof(lpu) + msg_s0;
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_string(
+			buffer, pos, attrib_match_list, msg_s0);
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+		sizeof(jd) + sizeof(affinity) + sizeof(config);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &jd, sizeof(jd));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &affinity, sizeof(affinity));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &config, sizeof(config));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &prio, sizeof(prio));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_jit(
+		void *atom, u64 edit_addr, u64 new_addr)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_JIT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom)
+		+ sizeof(edit_addr) + sizeof(new_addr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &edit_addr, sizeof(edit_addr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &new_addr, sizeof(new_addr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
+		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transtab, sizeof(transtab));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &memattr, sizeof(memattr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transcfg, sizeof(transcfg));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu)
+{
+	const u32     msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+{
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq)
+{
+	const u32       msg_id = KBASE_AUX_DEVFREQ_TARGET;
+	const size_t    msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(target_freq);
+	unsigned long   flags;
+	char            *buffer;
+	size_t          pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &target_freq, sizeof(target_freq));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_protected_enter_start(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+void __kbase_tlstream_aux_protected_enter_end(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_protected_leave_start(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+void __kbase_tlstream_aux_protected_leave_end(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100644
index 0000000..c0a1117
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,623 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx:  kernel common context
+ * @flags: timeline stream flags
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) return will be a negative value.
+ *
+ * Return: file descriptor on success, negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+#define TL_ATOM_STATE_IDLE 0
+#define TL_ATOM_STATE_READY 1
+#define TL_ATOM_STATE_DONE 2
+#define TL_ATOM_STATE_POSTED 3
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio);
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state);
+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom);
+void __kbase_tlstream_tl_attrib_atom_jit(
+		void *atom, u64 edit_addr, u64 new_addr);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq);
+void __kbase_tlstream_aux_protected_enter_start(void *gpu);
+void __kbase_tlstream_aux_protected_enter_end(void *gpu);
+void __kbase_tlstream_aux_protected_leave_start(void *gpu);
+void __kbase_tlstream_aux_protected_leave_end(void *gpu);
+
+#define TLSTREAM_ENABLED (1 << 31)
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & TLSTREAM_ENABLED)                     \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...)                     \
+	do {                                                            \
+		int enabled = atomic_read(&kbase_tlstream_enabled);     \
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \
+			__kbase_tlstream_##trace_name(__VA_ARGS__);     \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_JD(trace_name, ...)                      \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED)    \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+/*****************************************************************************/
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary
+ * @as: name of the address space object
+ * @nr: sequential number assigned to this address space
+ *
+ * Function emits a timeline message informing about address space creation.
+ * Address space is created with one attribute: number identifying this
+ * address space.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU
+ * @as:  name of the address space object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that address space object
+ * shall be deleted along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_CTX(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
+ */
+#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being held by the context object.
+ */
+#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being released by atom.
+ */
+#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by address space and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by address space.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_DEP_ATOM_ATOM - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM - dependency between atoms resolved
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM - information about already resolved dependency between atoms
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes
+ * @atom:     name of the atom object
+ * @jd:       job descriptor address
+ * @affinity: job affinity
+ * @config:   job config
+ *
+ * Function emits a timeline message containing atom attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority
+ * @atom: name of the atom object
+ * @prio: atom priority
+ *
+ * Function emits a timeline message containing atom priority.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state
+ * @atom:  name of the atom object
+ * @state: atom state
+ *
+ * Function emits a timeline message containing atom state.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE - atom caused priority change
+ * @atom:  name of the atom object
+ *
+ * Function emits a timeline message signalling priority change
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(atom) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority_change, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit happened on atom
+ * @atom:       atom identifier
+ * @edit_addr:  address edited by jit
+ * @new_addr:   address placed into the edited location
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr) \
+	__TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, new_addr)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes
+ * @as:       assigned address space
+ * @transtab: configuration of the TRANSTAB register
+ * @memattr:  configuration of the MEMATTR register
+ * @transcfg: configuration of the TRANSCFG register (or zero if not present)
+ *
+ * Function emits a timeline message containing address space attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ex
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_LPU_softstop
+ * @lpu:        name of the LPU object
+ */
+#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \
+	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_issue
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
+
+/**
+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset
+ * @gpu:        name of the GPU object
+ *
+ * This imperative tracepoint is specific to job dumping.
+ * Function emits a timeline message indicating GPU soft reset.
+ */
+#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
+
+
+/**
+ * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
+ */
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated
+ *                                 pages is changed
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
+ */
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
+
+/**
+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS
+ *                                     frequency
+ * @target_freq: new target frequency
+ */
+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \
+	__TRACE_IF_ENABLED(aux_devfreq_target, target_freq)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - The GPU has started transitioning
+ *                                            to protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU is starting to
+ * transition to protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - The GPU has finished transitioning
+ *                                          to protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU has finished
+ * transitioning to protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - The GPU has started transitioning
+ *                                            to non-protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU is starting to
+ * transition to non-protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - The GPU has finished transitioning
+ *                                          to non-protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU has finished
+ * transitioning to non-protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu)
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100644
index 0000000..e2e0544
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100644
index 0000000..5830e87
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,236 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbase_backend_inspect_head(kbdev, js);
+			WARN_ON(!next_katom);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = true;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the end of the transition */
+	if (kbdev->timeline.l2_transitioning) {
+		kbdev->timeline.l2_transitioning = false;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100644
index 0000000..619072f
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,363 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_trace_timeline_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
+				(int)kctx->timeline.owner_tgid,              \
+				count);                                      \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
+				CTX_FLOW_ATOM_READY,                         \
+				(int)kctx->timeline.owner_tgid,              \
+				atom_id);                                    \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_ACTIVE,                      \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_NEXT,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_HEAD,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_STOPPING,                    \
+				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+				js, count);                                  \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_ACTIVE, active);            \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_TILER_ACTIVE,               \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_L2_ACTIVE,                  \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_POWERING,               \
+				1);                                          \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
+				1);                                          \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_SEND_EVENT,                       \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_HANDLE_EVENT,                     \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _consumerof_atom_number);                \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _producerof_atom_number_completed);      \
+	} while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
+				trace_code, 1);                              \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
+				count);                                      \
+	} while (0)
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+		enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100644
index 0000000..156a95a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
+"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain start (SW approximated)", "%d,%d,%d",
+"_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain stop (SW approximated)",  "%d,%d,%d",
+"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100644
index 0000000..be474ff
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100644
index 0000000..fd7252d
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100644
index 0000000..c0d4292
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,2075 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/hrtimer.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+#ifdef CONFIG_MALI_NO_MALI
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif
+
+/*****************************************************************************/
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API        1
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC            1000000000ull /* ns */
+
+/* The time resolution of dumping service. */
+#define DUMPING_RESOLUTION      500000ull /* ns */
+
+/* The maximal supported number of dumping buffers. */
+#define MAX_BUFFER_COUNT        32
+
+/* Size and number of hw counters blocks. */
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+/*****************************************************************************/
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+enum vinstr_state {
+	VINSTR_IDLE,
+	VINSTR_DUMPING,
+	VINSTR_SUSPENDING,
+	VINSTR_SUSPENDED,
+	VINSTR_RESUMING
+};
+
+/**
+ * struct kbase_vinstr_context - vinstr context per device
+ * @lock:              protects the entire vinstr context
+ * @kbdev:             pointer to kbase device
+ * @kctx:              pointer to kbase context
+ * @vmap:              vinstr vmap for mapping hwcnt dump buffer
+ * @gpu_va:            GPU hwcnt dump buffer address
+ * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
+ * @dump_size:         size of the dump buffer in bytes
+ * @bitmap:            current set of counters monitored, not always in sync
+ *                     with hardware
+ * @reprogram:         when true, reprogram hwcnt block with the new set of
+ *                     counters
+ * @state:             vinstr state
+ * @state_lock:        protects information about vinstr state
+ * @suspend_waitq:     notification queue to trigger state re-validation
+ * @suspend_cnt:       reference counter of vinstr's suspend state
+ * @suspend_work:      worker to execute on entering suspended state
+ * @resume_work:       worker to execute on leaving suspended state
+ * @nclients:          number of attached clients, pending or otherwise
+ * @waiting_clients:   head of list of clients being periodically sampled
+ * @idle_clients:      head of list of clients being idle
+ * @suspended_clients: head of list of clients being suspended
+ * @thread:            periodic sampling thread
+ * @waitq:             notification queue of sampling thread
+ * @request_pending:   request for action for sampling thread
+ * @clients_present:   when true, we have at least one client
+ *                     Note: this variable is in sync. with nclients and is
+ *                     present to preserve simplicity. Protected by state_lock.
+ */
+struct kbase_vinstr_context {
+	struct mutex             lock;
+	struct kbase_device      *kbdev;
+	struct kbase_context     *kctx;
+
+	struct kbase_vmap_struct vmap;
+	u64                      gpu_va;
+	void                     *cpu_va;
+	size_t                   dump_size;
+	u32                      bitmap[4];
+	bool                     reprogram;
+
+	enum vinstr_state        state;
+	struct spinlock          state_lock;
+	wait_queue_head_t        suspend_waitq;
+	unsigned int             suspend_cnt;
+	struct work_struct       suspend_work;
+	struct work_struct       resume_work;
+
+	u32                      nclients;
+	struct list_head         waiting_clients;
+	struct list_head         idle_clients;
+	struct list_head         suspended_clients;
+
+	struct task_struct       *thread;
+	wait_queue_head_t        waitq;
+	atomic_t                 request_pending;
+
+	bool                     clients_present;
+};
+
+/**
+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
+ * @vinstr_ctx:    vinstr context client is attached to
+ * @list:          node used to attach this client to list in vinstr context
+ * @buffer_count:  number of buffers this client is using
+ * @event_mask:    events this client reacts to
+ * @dump_size:     size of one dump buffer in bytes
+ * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
+ * @accum_buffer:  temporary accumulation buffer for preserving counters
+ * @dump_time:     next time this clients shall request hwcnt dump
+ * @dump_interval: interval between periodic hwcnt dumps
+ * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
+ * @dump_buffers_meta: metadata of dump buffers
+ * @meta_idx:      index of metadata being accessed by userspace
+ * @read_idx:      index of buffer read by userspace
+ * @write_idx:     index of buffer being written by dumping service
+ * @waitq:         client's notification queue
+ * @pending:       when true, client has attached but hwcnt not yet updated
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context        *vinstr_ctx;
+	struct list_head                   list;
+	unsigned int                       buffer_count;
+	u32                                event_mask;
+	size_t                             dump_size;
+	u32                                bitmap[4];
+	void __user                        *legacy_buffer;
+	void                               *kernel_buffer;
+	void                               *accum_buffer;
+	u64                                dump_time;
+	u32                                dump_interval;
+	char                               *dump_buffers;
+	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
+	atomic_t                           meta_idx;
+	atomic_t                           read_idx;
+	atomic_t                           write_idx;
+	wait_queue_head_t                  waitq;
+	bool                               pending;
+};
+
+/**
+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
+ * @hrtimer:    high resolution timer
+ * @vinstr_ctx: vinstr context
+ */
+struct kbasep_vinstr_wake_up_timer {
+	struct hrtimer              hrtimer;
+	struct kbase_vinstr_context *vinstr_ctx;
+};
+
+/*****************************************************************************/
+
+static int kbasep_vinstr_service_task(void *data);
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+		struct file *filp,
+		poll_table  *wait);
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+		struct file   *filp,
+		unsigned int  cmd,
+		unsigned long arg);
+static int kbasep_vinstr_hwcnt_reader_mmap(
+		struct file           *filp,
+		struct vm_area_struct *vma);
+static int kbasep_vinstr_hwcnt_reader_release(
+		struct inode *inode,
+		struct file  *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations vinstr_client_fops = {
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/*****************************************************************************/
+
+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_uk_hwcnt_setup setup;
+	int err;
+
+	setup.dump_buffer = vinstr_ctx->gpu_va;
+	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	setup.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+
+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err) {
+		dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
+				kctx);
+		return;
+	}
+
+	/* Release the context. This had its own Power Manager Active reference. */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference. */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
+}
+
+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	disable_hwcnt(vinstr_ctx);
+	return enable_hwcnt(vinstr_ctx);
+}
+
+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
+}
+
+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
+{
+	size_t dump_size;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else
+#endif /* CONFIG_MALI_NO_MALI */
+	{
+		/* assume v5 for now */
+		base_gpu_props *props = &kbdev->gpu_props.props;
+		u32 nr_l2 = props->l2_props.num_l2_slices;
+		u64 core_mask = props->coherency_info.group[0].core_mask;
+		u32 nr_blocks = fls64(core_mask);
+
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_blocks) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
+
+static int kbasep_vinstr_map_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	u64 flags, nr_pages;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	nr_pages = PFN_UP(vinstr_ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&vinstr_ctx->gpu_va);
+	if (!reg)
+		return -ENOMEM;
+
+	vinstr_ctx->cpu_va = kbase_vmap(
+			kctx,
+			vinstr_ctx->gpu_va,
+			vinstr_ctx->dump_size,
+			&vinstr_ctx->vmap);
+	if (!vinstr_ctx->cpu_va) {
+		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbasep_vinstr_unmap_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+
+	kbase_vunmap(kctx, &vinstr_ctx->vmap);
+	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+}
+
+/**
+ * kbasep_vinstr_create_kctx - create kernel context for vinstr
+ * @vinstr_ctx: vinstr context
+ * Return: zero on success
+ */
+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element = NULL;
+	unsigned long flags;
+	bool enable_backend = false;
+	int err;
+
+	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
+	if (!vinstr_ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
+	if (err)
+		goto failed_map;
+
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		KBASE_TLSTREAM_TL_NEW_CTX(
+				vinstr_ctx->kctx,
+				vinstr_ctx->kctx->id,
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	/* Don't enable hardware counters if vinstr is suspended.
+	 * Note that vinstr resume code is run under vinstr context lock,
+	 * lower layer will be enabled as needed on resume. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE == vinstr_ctx->state)
+		enable_backend = true;
+	vinstr_ctx->clients_present = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (enable_backend)
+		err = enable_hwcnt(vinstr_ctx);
+	if (err)
+		goto failed_enable;
+
+	vinstr_ctx->thread = kthread_run(
+			kbasep_vinstr_service_task,
+			vinstr_ctx,
+			"mali_vinstr_service");
+	if (IS_ERR(vinstr_ctx->thread)) {
+		err = PTR_ERR(vinstr_ctx->thread);
+		goto failed_kthread;
+	}
+
+	return 0;
+
+failed_kthread:
+	disable_hwcnt(vinstr_ctx);
+failed_enable:
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->clients_present = false;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+	if (element) {
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_del(&element->link);
+		kfree(element);
+		mutex_unlock(&kbdev->kctx_list_lock);
+		KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+	}
+failed_map:
+	kbase_destroy_context(vinstr_ctx->kctx);
+	vinstr_ctx->kctx = NULL;
+	return err;
+}
+
+/**
+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+	unsigned long                   flags;
+
+	/* Release hw counters dumping resources. */
+	vinstr_ctx->thread = NULL;
+	disable_hwcnt(vinstr_ctx);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+
+	/* Simplify state transitions by specifying that we have no clients. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->clients_present = false;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Destroy context. */
+	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Inform timeline client about context destruction. */
+	KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+
+	vinstr_ctx->kctx = NULL;
+}
+
+/**
+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
+ *
+ * Return: vinstr opaque client handle or NULL on failure
+ */
+static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
+		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
+		u32 bitmap[4], void *argp, void *kernel_buffer)
+{
+	struct task_struct         *thread = NULL;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	if (buffer_count > MAX_BUFFER_COUNT
+	    || (buffer_count & (buffer_count - 1)))
+		return NULL;
+
+	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->vinstr_ctx   = vinstr_ctx;
+	cli->buffer_count = buffer_count;
+	cli->event_mask   =
+		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
+		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
+	cli->pending      = true;
+
+	hwcnt_bitmap_set(cli->bitmap, bitmap);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
+	vinstr_ctx->reprogram = true;
+
+	/* If this is the first client, create the vinstr kbase
+	 * context. This context is permanently resident until the
+	 * last client exits. */
+	if (!vinstr_ctx->nclients) {
+		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
+		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
+			goto error;
+
+		vinstr_ctx->reprogram = false;
+		cli->pending = false;
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it. We need a per client kernel buffer for accumulating
+	 * the counters. */
+	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	if (!cli->accum_buffer)
+		goto error;
+
+	/* Prepare buffers. */
+	if (cli->buffer_count) {
+		int *fd = (int *)argp;
+		size_t tmp;
+
+		/* Allocate area for buffers metadata storage. */
+		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
+			cli->buffer_count;
+		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
+		if (!cli->dump_buffers_meta)
+			goto error;
+
+		/* Allocate required number of dumping buffers. */
+		cli->dump_buffers = (char *)__get_free_pages(
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(cli->dump_size * cli->buffer_count));
+		if (!cli->dump_buffers)
+			goto error;
+
+		/* Create descriptor for user-kernel data exchange. */
+		*fd = anon_inode_getfd(
+				"[mali_vinstr_desc]",
+				&vinstr_client_fops,
+				cli,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd)
+			goto error;
+	} else if (kernel_buffer) {
+		cli->kernel_buffer = kernel_buffer;
+	} else {
+		cli->legacy_buffer = (void __user *)argp;
+	}
+
+	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->meta_idx, 0);
+	atomic_set(&cli->write_idx, 0);
+	init_waitqueue_head(&cli->waitq);
+
+	vinstr_ctx->nclients++;
+	list_add(&cli->list, &vinstr_ctx->idle_clients);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return cli;
+
+error:
+	kfree(cli->dump_buffers_meta);
+	if (cli->dump_buffers)
+		free_pages(
+				(unsigned long)cli->dump_buffers,
+				get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	if (!vinstr_ctx->nclients && vinstr_ctx->kctx) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+	kfree(cli);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+
+	return NULL;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	struct kbase_vinstr_client  *iter, *tmp;
+	struct task_struct          *thread = NULL;
+	u32 zerobitmap[4] = { 0 };
+	int cli_found = 0;
+
+	KBASE_DEBUG_ASSERT(cli);
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
+		if (iter == cli) {
+			vinstr_ctx->reprogram = true;
+			cli_found = 1;
+			list_del(&iter->list);
+			break;
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->waiting_clients, list) {
+			if (iter == cli) {
+				vinstr_ctx->reprogram = true;
+				cli_found = 1;
+				list_del(&iter->list);
+				break;
+			}
+		}
+	}
+	KBASE_DEBUG_ASSERT(cli_found);
+
+	kfree(cli->dump_buffers_meta);
+	free_pages(
+			(unsigned long)cli->dump_buffers,
+			get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	kfree(cli);
+
+	vinstr_ctx->nclients--;
+	if (!vinstr_ctx->nclients) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
+	u32 i, nr_l2;
+	u64 core_mask;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (0ull != core_mask) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		if (0ull != (core_mask & 1ull)) {
+			/* if block is not reserved update header */
+			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+			*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		}
+		dst += block_size;
+		src += block_size;
+
+		core_mask >>= 1;
+	}
+}
+
+/**
+ * accum_clients - accumulate dumped hw counters for all known clients
+ * @vinstr_ctx: vinstr context
+ */
+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4 = 0;
+
+#ifndef CONFIG_MALI_NO_MALI
+	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ *
+ * Return: timestamp value
+ */
+static u64 kbasep_vinstr_get_timestamp(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_add_dump_request - register client's dumping request
+ * @cli:             requesting client
+ * @waiting_clients: list of pending dumping requests
+ */
+static void kbasep_vinstr_add_dump_request(
+		struct kbase_vinstr_client *cli,
+		struct list_head *waiting_clients)
+{
+	struct kbase_vinstr_client *tmp;
+
+	if (list_empty(waiting_clients)) {
+		list_add(&cli->list, waiting_clients);
+		return;
+	}
+	list_for_each_entry(tmp, waiting_clients, list) {
+		if (tmp->dump_time > cli->dump_time) {
+			list_add_tail(&cli->list, &tmp->list);
+			return;
+		}
+	}
+	list_add_tail(&cli->list, waiting_clients);
+}
+
+/**
+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
+ *                                        dump and accumulate them for known
+ *                                        clients
+ * @vinstr_ctx: vinstr context
+ * @timestamp: pointer where collection timestamp will be recorded
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_collect_and_accumulate(
+		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+{
+	unsigned long flags;
+	int rcode;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		return -EAGAIN;
+	} else {
+		vinstr_ctx->state = VINSTR_DUMPING;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Request HW counters dump.
+	 * Disable preemption to make dump timestamp more accurate. */
+	preempt_disable();
+	*timestamp = kbasep_vinstr_get_timestamp();
+	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
+	preempt_enable();
+
+	if (!rcode)
+		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
+	WARN_ON(rcode);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state)
+	{
+	case VINSTR_SUSPENDING:
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_IDLE;
+		wake_up_all(&vinstr_ctx->suspend_waitq);
+		break;
+	default:
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Accumulate values of collected counters. */
+	if (!rcode)
+		accum_clients(vinstr_ctx);
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
+ *                                  buffer
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_fill_dump_buffer(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	unsigned int write_idx = atomic_read(&cli->write_idx);
+	unsigned int read_idx  = atomic_read(&cli->read_idx);
+
+	struct kbase_hwcnt_reader_metadata *meta;
+	void                               *buffer;
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == cli->buffer_count)
+		return -1;
+	write_idx %= cli->buffer_count;
+
+	/* Fill in dump buffer and its metadata. */
+	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
+	meta   = &cli->dump_buffers_meta[write_idx];
+	meta->timestamp  = timestamp;
+	meta->event_id   = event_id;
+	meta->buffer_idx = write_idx;
+	memcpy(buffer, cli->accum_buffer, cli->dump_size);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
+ *                                         allocated in userspace
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of legacy ioctl interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_legacy(
+		struct kbase_vinstr_client *cli)
+{
+	void __user  *buffer = cli->legacy_buffer;
+	int          rcode;
+
+	/* Copy data to user buffer. */
+	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
+	if (rcode) {
+		pr_warn("error while copying buffer to user\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+		struct kbase_vinstr_client *cli)
+{
+	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_reprogram(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	bool suspended = false;
+
+	/* Don't enable hardware counters if vinstr is suspended. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state)
+		suspended = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (suspended)
+		return;
+
+	/* Change to suspended state is done while holding vinstr context
+	 * lock. Below code will then no re-enable the instrumentation. */
+
+	if (vinstr_ctx->reprogram) {
+		struct kbase_vinstr_client *iter;
+
+		if (!reprogram_hwcnt(vinstr_ctx)) {
+			vinstr_ctx->reprogram = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->idle_clients,
+					list)
+				iter->pending = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->waiting_clients,
+					list)
+				iter->pending = false;
+		}
+	}
+}
+
+/**
+ * kbasep_vinstr_update_client - copy accumulated counters to user readable
+ *                               buffer and notify the user
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_update_client(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	int rcode = 0;
+
+	/* Copy collected counters to user readable buffer. */
+	if (cli->buffer_count)
+		rcode = kbasep_vinstr_fill_dump_buffer(
+				cli, timestamp, event_id);
+	else if (cli->kernel_buffer)
+		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
+	else
+		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
+
+	if (rcode)
+		goto exit;
+
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&cli->write_idx);
+	wake_up_interruptible(&cli->waitq);
+
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+exit:
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ *
+ * @hrtimer: high resolution timer
+ *
+ * Return: High resolution timer restart enum.
+ */
+static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
+		struct hrtimer *hrtimer)
+{
+	struct kbasep_vinstr_wake_up_timer *timer =
+		container_of(
+			hrtimer,
+			struct kbasep_vinstr_wake_up_timer,
+			hrtimer);
+
+	KBASE_DEBUG_ASSERT(timer);
+
+	atomic_set(&timer->vinstr_ctx->request_pending, 1);
+	wake_up_all(&timer->vinstr_ctx->waitq);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_service_task - HWC dumping service thread
+ *
+ * @data: Pointer to vinstr context structure.
+ *
+ * Return: 0 on success; -ENOMEM if timer allocation fails
+ */
+static int kbasep_vinstr_service_task(void *data)
+{
+	struct kbase_vinstr_context        *vinstr_ctx = data;
+	struct kbasep_vinstr_wake_up_timer *timer;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+
+	if (!timer) {
+		dev_warn(vinstr_ctx->kbdev->dev, "Timer allocation failed!\n");
+		return -ENOMEM;
+	}
+
+	hrtimer_init(&timer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	timer->hrtimer.function = kbasep_vinstr_wake_up_callback;
+	timer->vinstr_ctx       = vinstr_ctx;
+
+	while (!kthread_should_stop()) {
+		struct kbase_vinstr_client *cli = NULL;
+		struct kbase_vinstr_client *tmp;
+		int                        rcode;
+
+		u64              timestamp = kbasep_vinstr_get_timestamp();
+		u64              dump_time = 0;
+		struct list_head expired_requests;
+
+		/* Hold lock while performing operations on lists of clients. */
+		mutex_lock(&vinstr_ctx->lock);
+
+		/* Closing thread must not interact with client requests. */
+		if (current == vinstr_ctx->thread) {
+			atomic_set(&vinstr_ctx->request_pending, 0);
+
+			if (!list_empty(&vinstr_ctx->waiting_clients)) {
+				cli = list_first_entry(
+						&vinstr_ctx->waiting_clients,
+						struct kbase_vinstr_client,
+						list);
+				dump_time = cli->dump_time;
+			}
+		}
+
+		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
+			mutex_unlock(&vinstr_ctx->lock);
+
+			/* Sleep until next dumping event or service request. */
+			if (cli) {
+				u64 diff = dump_time - timestamp;
+
+				hrtimer_start(
+						&timer->hrtimer,
+						ns_to_ktime(diff),
+						HRTIMER_MODE_REL);
+			}
+			wait_event(
+					vinstr_ctx->waitq,
+					atomic_read(
+						&vinstr_ctx->request_pending) ||
+					kthread_should_stop());
+			hrtimer_cancel(&timer->hrtimer);
+			continue;
+		}
+
+		rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
+				&timestamp);
+
+		INIT_LIST_HEAD(&expired_requests);
+
+		/* Find all expired requests. */
+		list_for_each_entry_safe(
+				cli,
+				tmp,
+				&vinstr_ctx->waiting_clients,
+				list) {
+			s64 tdiff =
+				(s64)(timestamp + DUMPING_RESOLUTION) -
+				(s64)cli->dump_time;
+			if (tdiff >= 0ll) {
+				list_del(&cli->list);
+				list_add(&cli->list, &expired_requests);
+			} else {
+				break;
+			}
+		}
+
+		/* Fill data for each request found. */
+		list_for_each_entry_safe(cli, tmp, &expired_requests, list) {
+			/* Ensure that legacy buffer will not be used from
+			 * this kthread context. */
+			BUG_ON(0 == cli->buffer_count);
+			/* Expect only periodically sampled clients. */
+			BUG_ON(0 == cli->dump_interval);
+
+			if (!rcode)
+				kbasep_vinstr_update_client(
+						cli,
+						timestamp,
+						BASE_HWCNT_READER_EVENT_PERIODIC);
+
+			/* Set new dumping time. Drop missed probing times. */
+			do {
+				cli->dump_time += cli->dump_interval;
+			} while (cli->dump_time < timestamp);
+
+			list_del(&cli->list);
+			kbasep_vinstr_add_dump_request(
+					cli,
+					&vinstr_ctx->waiting_clients);
+		}
+
+		/* Reprogram counters set if required. */
+		kbasep_vinstr_reprogram(vinstr_ctx);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	}
+
+	kfree(timer);
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
+ * @cli: pointer to vinstr client structure
+ *
+ * Return: non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+		struct kbase_vinstr_client *cli)
+{
+	KBASE_DEBUG_ASSERT(cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+
+	/* Metadata sanity check. */
+	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @interval: periodic dumping interval (disable periodic dumping if zero)
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+		struct kbase_vinstr_client *cli, u32 interval)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_del(&cli->list);
+
+	cli->dump_interval = interval;
+
+	/* If interval is non-zero, enable periodic dumping for this client. */
+	if (cli->dump_interval) {
+		if (DUMPING_RESOLUTION > cli->dump_interval)
+			cli->dump_interval = DUMPING_RESOLUTION;
+		cli->dump_time =
+			kbasep_vinstr_get_timestamp() + cli->dump_interval;
+
+		kbasep_vinstr_add_dump_request(
+				cli, &vinstr_ctx->waiting_clients);
+
+		atomic_set(&vinstr_ctx->request_pending, 1);
+		wake_up_all(&vinstr_ctx->waitq);
+	} else {
+		list_add(&cli->list, &vinstr_ctx->idle_clients);
+	}
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
+ * @event_id: id of event
+ * Return: event_mask or zero if event is not supported or maskable
+ */
+static u32 kbasep_vinstr_hwcnt_reader_event_mask(
+		enum base_hwcnt_reader_event event_id)
+{
+	u32 event_mask = 0;
+
+	switch (event_id) {
+	case BASE_HWCNT_READER_EVENT_PREJOB:
+	case BASE_HWCNT_READER_EVENT_POSTJOB:
+		/* These event are maskable. */
+		event_mask = (1 << event_id);
+		break;
+
+	case BASE_HWCNT_READER_EVENT_MANUAL:
+	case BASE_HWCNT_READER_EVENT_PERIODIC:
+		/* These event are non-maskable. */
+	default:
+		/* These event are not supported. */
+		break;
+	}
+
+	return event_mask;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to enable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask |= event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to disable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask &= ~event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
+ * @cli:   pointer to vinstr client structure
+ * @hwver: pointer to user buffer where hw version will be stored
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+		struct kbase_vinstr_client *cli, u32 __user *hwver)
+{
+#ifndef CONFIG_MALI_NO_MALI
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+#endif
+
+	u32                         ver = 5;
+
+#ifndef CONFIG_MALI_NO_MALI
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+		ver = 4;
+#endif
+
+	return put_user(ver, hwver);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
+ * @filp:   pointer to file structure
+ * @cmd:    user command
+ * @arg:    command's argument
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	long                       rcode = 0;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+				cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
+		rcode = put_user(
+				(u32)cli->vinstr_ctx->dump_size,
+				(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbase_vinstr_hwc_dump(
+				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbase_vinstr_hwc_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+				cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
+		poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
+ * @filp: pointer to file structure
+ * @vma:  pointer to vma structure
+ * Return: zero on success
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	unsigned long size, addr, pfn, offset;
+	unsigned long vm_size = vma->vm_end - vma->vm_start;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(vma);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	size = cli->buffer_count * cli->dump_size;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if (vm_size > size - offset)
+		return -EINVAL;
+
+	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	pfn = addr >> PAGE_SHIFT;
+
+	return remap_pfn_range(
+			vma,
+			vma->vm_start,
+			pfn,
+			vm_size,
+			vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ * Return always return zero
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+		struct file *filp)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	kbase_vinstr_detach_client(cli);
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
+ * @kbdev: pointer to kbase device structure
+ */
+static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	down(&js_devdata->schedule_sem);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	up(&js_devdata->schedule_sem);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker suspending vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_suspend_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			suspend_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		disable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_SUSPENDED;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * This must happen after vinstr was suspended. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker resuming vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_resume_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			resume_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		enable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_IDLE;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * Note that scheduler state machine might requested re-entry to
+	 * protected mode before vinstr was resumed.
+	 * This must happen after vinstr was release. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
+	if (!vinstr_ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	mutex_init(&vinstr_ctx->lock);
+	spin_lock_init(&vinstr_ctx->state_lock);
+	vinstr_ctx->kbdev = kbdev;
+	vinstr_ctx->thread = NULL;
+	vinstr_ctx->state = VINSTR_IDLE;
+	vinstr_ctx->suspend_cnt = 0;
+	INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
+	INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
+	init_waitqueue_head(&vinstr_ctx->suspend_waitq);
+
+	atomic_set(&vinstr_ctx->request_pending, 0);
+	init_waitqueue_head(&vinstr_ctx->waitq);
+
+	return vinstr_ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	/* Stop service thread first. */
+	if (vinstr_ctx->thread)
+		kthread_stop(vinstr_ctx->thread);
+
+	/* Wait for workers. */
+	flush_work(&vinstr_ctx->suspend_work);
+	flush_work(&vinstr_ctx->resume_work);
+
+	while (1) {
+		struct list_head *list = &vinstr_ctx->idle_clients;
+
+		if (list_empty(list)) {
+			list = &vinstr_ctx->waiting_clients;
+			if (list_empty(list))
+				break;
+		}
+
+		cli = list_first_entry(list, struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		kfree(cli->accum_buffer);
+		kfree(cli);
+		vinstr_ctx->nclients--;
+	}
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	if (vinstr_ctx->kctx)
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	kfree(vinstr_ctx);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup)
+{
+	struct kbase_vinstr_client  *cli;
+	u32                         bitmap[4];
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(setup->buffer_count);
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	cli = kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			setup->buffer_count,
+			bitmap,
+			&setup->fd,
+			NULL);
+
+	if (!cli)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (setup->dump_buffer) {
+		u32 bitmap[4];
+
+		bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+		if (*cli)
+			return -EBUSY;
+
+		*cli = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				(void *)(long)setup->dump_buffer,
+				NULL);
+
+		if (!(*cli))
+			return -ENOMEM;
+	} else {
+		if (!*cli)
+			return -EINVAL;
+
+		kbase_vinstr_detach_client(*cli);
+		*cli = NULL;
+	}
+
+	return 0;
+}
+
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer)
+{
+	u32 bitmap[4];
+
+	if (!vinstr_ctx || !setup || !kernel_buffer)
+		return NULL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	return kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			0,
+			bitmap,
+			NULL,
+			kernel_buffer);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	int                         rcode = 0;
+	struct kbase_vinstr_context *vinstr_ctx;
+	u64                         timestamp;
+	u32                         event_mask;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
+	event_mask = 1 << event_id;
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (event_mask & cli->event_mask) {
+		rcode = kbasep_vinstr_collect_and_accumulate(
+				vinstr_ctx,
+				&timestamp);
+		if (rcode)
+			goto exit;
+
+		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
+		if (rcode)
+			goto exit;
+
+		kbasep_vinstr_reprogram(vinstr_ctx);
+	}
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
+
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	int                         rcode;
+	u64                         unused;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	if (rcode)
+		goto exit;
+	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
+	if (rcode)
+		goto exit;
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	kbasep_vinstr_reprogram(vinstr_ctx);
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		vinstr_ctx->suspend_cnt++;
+		/* overflow shall not happen */
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		ret = 0;
+		break;
+
+	case VINSTR_IDLE:
+		if (vinstr_ctx->clients_present) {
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+			schedule_work(&vinstr_ctx->suspend_work);
+		} else {
+			vinstr_ctx->state = VINSTR_SUSPENDED;
+
+			vinstr_ctx->suspend_cnt++;
+			/* overflow shall not happen */
+			WARN_ON(0 == vinstr_ctx->suspend_cnt);
+			ret = 0;
+		}
+		break;
+
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_SUSPENDING:
+		/* fall through */
+	case VINSTR_RESUMING:
+		break;
+
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
+	if (VINSTR_SUSPENDED == vinstr_ctx->state) {
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		vinstr_ctx->suspend_cnt--;
+		if (0 == vinstr_ctx->suspend_cnt) {
+			if (vinstr_ctx->clients_present) {
+				vinstr_ctx->state = VINSTR_RESUMING;
+				schedule_work(&vinstr_ctx->resume_work);
+			} else {
+				vinstr_ctx->state = VINSTR_IDLE;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100644
index 0000000..4dbf7ee
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,177 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+#include <mali_kbase_hwcnt_reader.h>
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+struct kbase_uk_hwcnt_setup {
+	/* IN */
+	u64 dump_buffer;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 unused_1; /* keep for backwards compatibility */
+	u32 mmu_l2_bm;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_reader_setup {
+	/* IN */
+	u32 buffer_count;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+
+	/* OUT */
+	s32 fd;
+};
+/*****************************************************************************/
+
+/**
+ * kbase_vinstr_init() - initialize the vinstr core
+ * @kbdev: kbase device
+ *
+ * Return: pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - terminate the vinstr core
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
+ * @vinstr_ctx: vinstr context
+ * @setup:      reader's configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+		struct kbase_vinstr_context        *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup);
+
+/**
+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
+ * @vinstr_ctx: vinstr context
+ * @cli:        pointer where to store pointer to new vinstr client structure
+ * @setup:      hwc configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count and setup->fd are not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer);
+
+/**
+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
+ * @cli:      pointer to vinstr client
+ * @event_id: id of event that triggered hwcnt dump
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_dump(
+		struct kbase_vinstr_client   *cli,
+		enum base_hwcnt_reader_event event_id);
+
+/**
+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
+ *                          a given kbase context
+ * @cli: pointer to vinstr client
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Return: 0 on success, or negative if state change is in progress
+ *
+ * Warning: This API call is non-generic. It is meant to be used only by
+ *          job scheduler state machine.
+ *
+ * Function initiates vinstr switch to suspended state. Once it was called
+ * vinstr enters suspending state. If function return non-zero value, it
+ * indicates that state switch is not complete and function must be called
+ * again. On state switch vinstr will trigger job scheduler state machine
+ * cycle.
+ */
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_suspend - suspends operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function initiates vinstr switch to suspended state. Then it blocks until
+ * operation is completed.
+ */
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_resume - resumes operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function can be called only if it was preceded by a successful call
+ * to kbase_vinstr_suspend.
+ */
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
+#endif /* _KBASE_VINSTR_H_ */
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100644
index 0000000..5d6b402
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100644
index 0000000..2be06a5
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,189 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100644
index 0000000..9945293
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644
index 0000000..a509cbd
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100644
index 0000000..59adfb8
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,619 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
+						size */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+#define LATEST_FLUSH            0x038	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+#define GPU_DBGEN               (1 << 8)	/* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+#define TEXTURE_FEATURES_3      0x0BC	/* (RO) Support flags for texture order */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+
+#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+
+/* Symbols for default MEMATTR to use
+ * Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT      (12)
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS            (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT     (15)
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES           (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_TLS_HASH_ENABLE          (1ul << 17)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+/* JM_CONFIG register */
+
+#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
+#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
+#define JM_IDVS_GROUP_SIZE_SHIFT (16)
+#define JM_MAX_IDVS_GROUP_SIZE (0x3F)
+/* End JM_CONFIG register */
+
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100644
index 0000000..bd5f661
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->atom_id,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->count)
+);
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_uk.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100644
index 0000000..841d03f
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100644
index 0000000..38835d3
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME
+#
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100644
index 0000000..5043558
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
+	$(MALI_PLATFORM_DIR)/mali_clock.o \
+	$(MALI_PLATFORM_DIR)/mpgpu.o \
+	$(MALI_PLATFORM_DIR)/meson_main2.o \
+	$(MALI_PLATFORM_DIR)/platform_gx.o \
+	$(MALI_PLATFORM_DIR)/scaling.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
new file mode 100644
index 0000000..fa31485
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
@@ -0,0 +1,650 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)
+
+//disable print
+//#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+int mali_pm_statue = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "ao io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	mpdata->dvfs_table_size = 0;
+
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+	dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size);
+	dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n",
+			mpdata->dvfs_table,
+			sizeof(mpdata->dvfs_table[0]));
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+#else
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	//mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+    clk_set_rate(clk_mali, 500000000);
+    clk_set_rate(clk_mali, 667000000);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+
+	GPU_CLK_DBG();
+	do_gettimeofday(&start);
+	ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+	do_gettimeofday(&end);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	GPU_CLK_DBG();
+	clk_disable_unprepare(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+				&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		} else if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+			dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+			if (IS_ERR(dvfs_tbl->clkp_handle)) {
+				dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+			}
+			ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+			if (ret) {
+				dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+			}
+		}
+
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	if (mpdata->def_clock > mpdata->scale_info.maxclk)
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux");
+#if 0
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+#endif
+	if (IS_ERR(mpdata->clk_mali)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100644
index 0000000..8d762b0
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,120 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/version.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long t83x_static_power(unsigned long voltage)
+{
+#if 0
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+#else
+	return 0;
+#endif
+}
+
+static unsigned long t83x_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+struct devfreq_cooling_ops t83x_model_ops = {
+#else
+struct devfreq_cooling_power t83x_model_ops = {
+#endif
+	.get_static_power = t83x_static_power,
+	.get_dynamic_power = t83x_dynamic_power,
+};
+
+#endif
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
+
+#ifndef CONFIG_OF
+int kbase_platform_register(void)
+{
+	return 0;
+}
+
+void kbase_platform_unregister(void)
+{
+}
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100755
index 0000000..3ab8dfd
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (750000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (100000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+extern struct kbase_platform_funcs_conf dt_funcs_conf;
+#define PLATFORM_FUNCS (&dt_funcs_conf)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&t83x_model_ops)
+extern struct devfreq_cooling_ops t83x_model_ops;
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+/**
+ * Autosuspend delay
+ *
+ * The delay time (in milliseconds) to be used for autosuspend
+ */
+#define AUTO_SUSPEND_DELAY (100)
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c
new file mode 100755
index 0000000..dc1654f
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c
@@ -0,0 +1,336 @@
+/**
+ ** Meson hardware specific initialization
+ **/
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include <mali_kbase.h>
+#include "mali_kbase_meson.h"
+
+int meson_gpu_reset(struct kbase_device *kbdev)
+{
+	struct meson_context *platform = kbdev->platform_context;
+    void __iomem *reg_base_reset = platform->reg_base_reset;
+    u32 value;
+
+    //JOHNT
+    // Level reset mail
+ 
+    // Level reset mail
+    //writel(~(0x1<<14), reg_base_reset + P_RESET2_MASK * 4);
+    //writel(~(0x1<<14), reg_base_reset + P_RESET2_LEVEL * 4);
+
+    //writel(0xffffffff, reg_base_reset + P_RESET2_LEVEL * 4);
+    //writel(0xffffffff, reg_base_reset + P_RESET0_LEVEL * 4);
+
+    MESON_PRINT("%s, %d\n", __func__, __LINE__);
+    MESON_PRINT("reg_base=%p, reset0=%p\n", reg_base_reset, reg_base_reset + RESET0_MASK * 4);
+    udelay(100);
+#if 0
+    value = readl(reg_base_reset + RESET0_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    MESON_PRINT("%s, %d\n", __func__, __LINE__);
+    value = readl(reg_base_reset + RESET0_MASK * 4);
+    value = value & (~(0x1<<20));
+    writel(value, reg_base_reset + RESET0_MASK * 4);
+
+    udelay(100);
+#if 0
+    value = readl(reg_base_reset + RESET0_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value);
+    udelay(100);
+#endif
+
+    value = readl(reg_base_reset + RESET0_LEVEL * 4);
+    value = value & (~(0x1<<20));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET0_LEVEL * 4);
+    udelay(100);
+
+#if 0
+    value = readl(reg_base_reset + RESET0_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value);
+    udelay(100);
+#endif
+
+///////////////
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    value = readl(reg_base_reset + RESET2_MASK * 4);
+    value = value & (~(0x1<<14));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET2_MASK * 4);
+
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    value = readl(reg_base_reset + RESET2_LEVEL * 4);
+    value = value & (~(0x1<<14));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET2_LEVEL * 4);
+    udelay(100);
+
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    value = readl(reg_base_reset + RESET0_LEVEL * 4);
+    value = value | ((0x1<<20));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET0_LEVEL * 4);
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(100);
+    value = readl(reg_base_reset + RESET2_LEVEL * 4);
+    value = value | ((0x1<<14));
+    //MESON_PRINT("line(%d), value=%x\n", __LINE__, value);
+    writel(value, reg_base_reset + RESET2_LEVEL * 4);
+#if 0
+    value = readl(reg_base_reset + RESET2_REGISTER * 4);
+    MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value);
+#endif
+
+    udelay(10); // OR POLL for reset done
+
+    return 0;
+}
+
+void  meson_gpu_pwr_on(struct kbase_device *kbdev, u32  mask)
+{
+    u32    part1_done = 0;
+    u32     value = 0;
+    u32     count = 0;
+
+    kbdev->pm.backend.gpu_powered = true;
+    MESON_PRINT("%s, %d begin\n", __func__,__LINE__);
+
+#if 0
+    value = 0x10 | (0x1<<16);
+#else
+    value = 0xfff | (0x20<<16);
+#endif
+    while (part1_done != value) {
+        Mali_WrReg(GPU_CONTROL_REG(PWR_KEY), 0x2968A819);
+        Mali_WrReg(GPU_CONTROL_REG(PWR_OVERRIDE1), value);
+        part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+        MESON_PRINT("write again, count=%d, overrider1=%x\n", count, part1_done);
+        udelay(20);
+        count ++;
+        if (0 == (count %100)) MESON_PRINT("write again, count%d\n", count);
+    }
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("write again, count=%d, overrider1=%x\n", count, part1_done);
+
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    MESON_PRINT("%s, %d\n", __func__,__LINE__);
+    if ((mask & 0x1) != 0 ) {
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+        Mali_WrReg(0x00000190, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x00000194, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x000001a0, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x000001a4, 0xffffffff);    // Power on all cores
+    }
+    MESON_PRINT("power on %d\n", __LINE__);
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x00000180, mask >> 1);    // Power on all cores
+        Mali_WrReg(0x00000184, 0x0);    // Power on all cores
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+    }
+
+    MESON_PRINT("%s, %d\n", __func__,__LINE__);
+    if ( mask != 0 ) {
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+        udelay(10);
+        part1_done = Mali_RdReg(0x0000020);
+        while(part1_done ==0) {
+            part1_done = Mali_RdReg(0x00000020);
+            udelay(10);
+        }
+
+        MESON_PRINT("%s, %d\n", __func__,__LINE__);
+        Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+    }
+    MESON_PRINT("%s, %d end\n", __func__,__LINE__);
+}
+
+void  meson_gpu_pwr_off(struct kbase_device *kbdev, u32  mask)
+{
+#if 1
+    u32 part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x000001C0, mask >> 1);    // Power off all cores
+        Mali_WrReg(0x000001C4, 0x0);          // Power off all cores
+    }
+
+    if (  (mask & 0x1) != 0 ) {
+        Mali_WrReg(0x000001D0, 0xffffffff);    // Power off all cores
+        Mali_WrReg(0x000001D4, 0xffffffff);    // Power off all cores
+        Mali_WrReg(0x000001E0, 0xffffffff);    // Power off all cores
+        Mali_WrReg(0x000001E4, 0xffffffff);    // Power off all cores
+    }
+
+    if ( mask != 0 ) {
+        part1_done = Mali_RdReg(0x0000020);
+        while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); }
+        MESON_PRINT("Mali_pwr_off:gpu_irq : %x\n", part1_done);
+        Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+    }
+#endif
+}
+
+
+
+
+static int kbase_platform_meson_init(struct kbase_device *kbdev)
+{
+#if 0
+	int err;
+#endif
+	struct device_node *gpu_dn = kbdev->dev->of_node;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+	struct meson_context *platform;
+    u32 part1_done = 0;
+
+	platform = kmalloc(sizeof(struct meson_context), GFP_KERNEL);
+
+	if (!platform)
+		return -ENOMEM;
+
+	memset(platform, 0, sizeof(struct meson_context));
+
+	kbdev->platform_context = (void *) platform;
+
+    platform->reg_base_reset = of_iomap(gpu_dn, 1);
+	_dev_info(kbdev->dev, "reset io source  0x%p\n",platform->reg_base_reset);
+
+	platform->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(kbdev->dev, "ao io source  0x%p\n", platform->reg_base_aobus);
+
+	platform->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(kbdev->dev, "hiu io source  0x%p\n", platform->reg_base_hiubus);
+
+    platform->clk_mali = devm_clk_get(kbdev->dev, "gpu_mux");
+	if (IS_ERR(platform->clk_mali)) {
+		dev_err(kbdev->dev, "failed to get clock pointer\n");
+	} else {
+        clk_prepare_enable(platform->clk_mali);
+        clk_set_rate(platform->clk_mali, 285000000);
+    }
+    MESON_PRINT("%s, %d begin\n", __func__, __LINE__);
+    meson_gpu_reset(kbdev);
+    meson_gpu_pwr_on(kbdev, 0xe);
+
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done);
+    meson_gpu_pwr_off(kbdev, 0xe);
+#if 1
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done);
+
+    meson_gpu_reset(kbdev);
+    meson_gpu_pwr_on(kbdev, 0xe);
+    part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1));
+    MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done);
+#endif
+#if 0
+	platform->cmu_pmu_status = 0;
+	platform->dvfs_wq = NULL;
+	platform->polling_speed = 100;
+	gpu_debug_level = DVFS_WARNING;
+#endif
+
+	mutex_init(&platform->gpu_clock_lock);
+	mutex_init(&platform->gpu_dvfs_handler_lock);
+	spin_lock_init(&platform->gpu_dvfs_spinlock);
+#if 0
+	err = gpu_control_module_init(kbdev);
+	if (err)
+		goto clock_init_fail;
+
+	/* dvfs gobernor init*/
+	gpu_dvfs_governor_init(kbdev, G3D_DVFS_GOVERNOR_DEFAULT);
+#endif
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags);
+	platform->wakeup_lock = 0;
+	spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+#if 0
+	/* dvfs handler init*/
+	gpu_dvfs_handler_init(kbdev);
+
+	err = gpu_notifier_init(kbdev);
+	if (err)
+		goto notifier_init_fail;
+
+	err = gpu_create_sysfs_file(kbdev->dev);
+	if (err)
+		goto sysfs_init_fail;
+#endif
+
+    MESON_PRINT("%s, %d end\n", __func__, __LINE__);
+	return 0;
+#if 0
+clock_init_fail:
+notifier_init_fail:
+sysfs_init_fail:
+	kfree(platform);
+
+	return err;
+#endif
+}
+
+/**
+ ** Meson hardware specific termination
+ **/
+static void kbase_platform_meson_term(struct kbase_device *kbdev)
+{
+	struct meson_context *platform;
+	platform = (struct meson_context *) kbdev->platform_context;
+#if 0
+	gpu_notifier_term();
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	gpu_dvfs_handler_deinit(kbdev);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	gpu_control_module_term(kbdev);
+#endif
+
+	kfree(kbdev->platform_context);
+	kbdev->platform_context = 0;
+
+#if 0
+	gpu_remove_sysfs_file(kbdev->dev);
+#endif
+}
+
+struct kbase_platform_funcs_conf platform_funcs = {
+	.platform_init_func = &kbase_platform_meson_init,
+	.platform_term_func = &kbase_platform_meson_term,
+};
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h
new file mode 100644
index 0000000..89ff21b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h
@@ -0,0 +1,65 @@
+
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+#define RESET0_MASK     0x00
+#define RESET1_MASK     0x01
+#define RESET2_MASK     0x02
+
+#define RESET0_LEVEL    0x10
+#define RESET1_LEVEL    0x11
+#define RESET2_LEVEL    0x12
+
+#define Mali_WrReg(regnum, value)   writel((value),(kbdev->reg + (regnum)))
+#define Mali_RdReg(regnum)          readl(kbdev->reg + (regnum))
+#define MESON_PRINT(...)            
+
+struct meson_context {
+	struct mutex gpu_clock_lock;
+	struct mutex gpu_dvfs_handler_lock;
+	spinlock_t gpu_dvfs_spinlock;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	int utilization;
+	int util_gl_share;
+	int util_cl_share[2];
+#ifdef CONFIG_CPU_THERMAL_IPA
+	int norm_utilisation;
+	int freq_for_normalisation;
+	unsigned long long power;
+#endif /* CONFIG_CPU_THERMAL_IPA */
+	int max_lock;
+	int min_lock;
+#if 0
+	int user_max_lock[NUMBER_LOCK];
+	int user_min_lock[NUMBER_LOCK];
+#endif
+	int target_lock_type;
+	int down_requirement;
+	bool wakeup_lock;
+	int governor_num;
+	int governor_type;
+	char governor_list[100];
+	bool dvfs_status;
+#ifdef CONFIG_CPU_THERMAL_IPA
+	int time_tick;
+	u32 time_busy;
+	u32 time_idle;
+#endif /* CONFIG_CPU_THERMAL_IPA */
+#endif
+	int cur_clock;
+	int cur_voltage;
+	int voltage_margin;
+	bool tmu_status;
+	int debug_level;
+	int polling_speed;
+	struct workqueue_struct *dvfs_wq;
+    void __iomem *reg_base_reset;
+    void __iomem *reg_base_aobus;
+    void __iomem *reg_base_hiubus;
+    struct clk  *clk_mali;
+    struct clk  *clk_gp;
+};
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100755
index 0000000..ece08be
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_config_platform.h"
+
+void *reg_base_hiubus = NULL;
+u32  override_value_aml = 0;
+static int first = 1;
+
+#define RESET0_MASK    0x10
+#define RESET1_MASK    0x11
+#define RESET2_MASK    0x12
+
+#define RESET0_LEVEL    0x20
+#define RESET1_LEVEL    0x21
+#define RESET2_LEVEL    0x22
+#define Rd(r)                           readl((reg_base_hiubus) + ((r)<<2))
+#define Wr(r, v)                        writel((v), ((reg_base_hiubus) + ((r)<<2)))
+#define Mali_WrReg(regnum, value)   kbase_reg_write(kbdev, (regnum), (value), NULL)
+#define Mali_RdReg(regnum)          kbase_reg_read(kbdev, (regnum), NULL)
+#define stimulus_print   printk
+#define stimulus_display printk
+#define Mali_pwr_off(x)  Mali_pwr_off_with_kdev(kbdev, (x))
+
+extern u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type);
+
+//[0]:CG [1]:SC0 [2]:SC2
+static void  Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    uint32_t    shader_present;
+    uint32_t    tiler_present;
+    uint32_t    l2_present;
+
+    part1_done = 0;
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    shader_present = Mali_RdReg(0x100);
+    tiler_present  = Mali_RdReg(0x110);
+    l2_present     = Mali_RdReg(0x120);
+    printk("shader_present=%d, tiler_present=%d, l2_present=%d\n",
+            shader_present, tiler_present, l2_present);
+
+    if (  mask == 0 ) {
+        Mali_WrReg(0x00000180, 0xffffffff);   // Power on all cores (shader low)
+        Mali_WrReg(0x00000184, 0xffffffff);   // Power on all cores (shader high)
+        Mali_WrReg(0x00000190, 0xffffffff);   // Power on all cores (tiler low)
+        Mali_WrReg(0x00000194, 0xffffffff);   // Power on all cores (tiler high)
+        Mali_WrReg(0x000001a0, 0xffffffff);   // Power on all cores (l2 low)
+        Mali_WrReg(0x000001a4, 0xffffffff);   // Power on all cores (l2 high)
+    } else {
+        Mali_WrReg(0x00000180, mask);    // Power on all cores (shader low)
+        Mali_WrReg(0x00000184, 0);       // Power on all cores (shader high)
+        Mali_WrReg(0x00000190, mask);    // Power on all cores (tiler low)
+        Mali_WrReg(0x00000194, 0);       // Power on all cores (tiler high)
+        Mali_WrReg(0x000001a0, mask);    // Power on all cores (l2 low)
+        Mali_WrReg(0x000001a4, 0);       // Power on all cores (l2 high)
+    }
+
+    part1_done = Mali_RdReg(0x0000020);
+    while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); }
+    stimulus_display("Mali_pwr_on:gpu_irq : %x\n", part1_done);
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+}
+
+//[0]:CG [1]:SC0 [2]:SC2
+#if 0
+static void  Mali_pwr_off_with_kdev( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+
+    if ( mask == 0 ) {
+        Mali_WrReg(0x000001C0, 0xffffffff);    // Power off all cores (tiler low)
+        Mali_WrReg(0x000001C4, 0xffffffff);    // Power off all cores (tiler high)
+        Mali_WrReg(0x000001D0, 0xffffffff);    // Power off all cores (l2 low)
+        Mali_WrReg(0x000001D4, 0xffffffff);    // Power off all cores (l2 high)
+        Mali_WrReg(0x000001E0, 0xffffffff);    // Power off all cores (shader low)
+        Mali_WrReg(0x000001E4, 0xffffffff);    // Power off all cores (shader high)
+    } else {
+        Mali_WrReg(0x000001C0, mask);    // Power off all cores  (tiler low)
+        Mali_WrReg(0x000001C4, 0x0);     // Power off all cores  (tiler high)
+        Mali_WrReg(0x000001D0, mask);    // Power off all cores  (l2 low)
+        Mali_WrReg(0x000001D4, 0x0);     // Power off all cores  (l2 high)
+        Mali_WrReg(0x000001E0, mask);    // Power off all cores  (shader low)
+        Mali_WrReg(0x000001E4, 0x0);     // Power off all cores  (shader high)
+    }
+
+    part1_done = Mali_RdReg(0x0000020);
+    while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); }
+    stimulus_display("Mali_pwr_off:gpu_irq : %x\n", part1_done);
+    Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts
+}
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret = 1; /* Assume GPU has been powered off */
+	int error;
+	struct platform_device *pdev = to_platform_device(kbdev->dev);
+	struct resource *reg_res;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+    //printk("20151013, %s, %d\n", __FILE__, __LINE__);
+    if (first == 0) goto ret;
+
+    reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+    if (!reg_res) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) 
+        reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res));
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+
+//JOHNT
+    // Level reset mail
+
+    // Level reset mail
+    //Wr(P_RESET2_MASK, ~(0x1<<14));
+    //Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    //Wr(P_RESET2_LEVEL, 0xffffffff);
+    //Wr(P_RESET0_LEVEL, 0xffffffff);
+
+    value = Rd(RESET0_MASK);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_MASK, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+///////////////
+    value = Rd(RESET2_MASK);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_MASK, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value | ((0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value | ((0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    udelay(10); // OR POLL for reset done
+
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+    Mali_pwr_on_with_kdev(kbdev, 0x1);
+    //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n",
+    //        kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus);
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+    first = 0;
+    //printk("%s, %d\n", __FILE__, __LINE__);
+ret:
+	error = pm_runtime_get_sync(kbdev->dev);
+	if (error == 1) {
+		/*
+		 * Let core know that the chip has not been
+		 * powered off, so we can save on re-initialization.
+		 */
+		ret = 0;
+	}
+	udelay(100);
+#if 1
+
+    core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+    l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+    tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+    //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready);
+#endif
+	dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
+
+	return ret;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+    //printk("%s, %d\n", __FILE__, __LINE__);
+#if 0
+    iounmap(reg_base_hiubus);
+    reg_base_hiubus = NULL;
+#endif
+	pm_runtime_mark_last_busy(kbdev->dev);
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	
+	pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY);
+	pm_runtime_use_autosuspend(kbdev->dev);
+	pm_runtime_set_active(kbdev->dev);
+	
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+
+	if (!pm_runtime_enabled(kbdev->dev)) {
+		dev_warn(kbdev->dev, "pm_runtime not enabled");
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+#endif
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
new file mode 100644
index 0000000..b541090
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
new file mode 100644
index 0000000..7687f92
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_defs.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+extern void mali_post_init(void);
+struct kbase_device;
+//static int gpu_dvfs_probe(struct platform_device *pdev)
+int platform_dt_init_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	int err = -1;
+
+	err = mali_meson_init_start(pdev);
+    mali_meson_init_finish(pdev);
+    mpgpu_class_init();
+    mali_post_init();
+    return err;
+}
+
+//static int gpu_dvfs_remove(struct platform_device *pdev)
+void platform_dt_term_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+    mpgpu_class_exit();
+	mali_meson_uninit(pdev);
+
+}
+
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    mali_gpu_utilization_callback(utilisation*255/100);
+    return 1;
+}
+
+struct kbase_platform_funcs_conf dt_funcs_conf = {
+    .platform_init_func = platform_dt_init_func,
+    .platform_term_func = platform_dt_term_func,
+};
+#if 0
+static const struct of_device_id gpu_dvfs_ids[] = {
+	{ .compatible = "meson, gpu-dvfs-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpu_dvfs_ids);
+
+static struct platform_driver gpu_dvfs_driver = {
+	.driver = {
+		.name = "meson-gpu-dvfs",
+		.owner = THIS_MODULE,
+		.of_match_table = gpu_dvfs_ids,
+	},
+	.probe = gpu_dvfs_probe,
+	.remove = gpu_dvfs_remove,
+};
+module_platform_driver(gpu_dvfs_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Amlogic SH, MM");
+MODULE_DESCRIPTION("Driver for the Meson GPU dvfs");
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
new file mode 100644
index 0000000..ca79752
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
@@ -0,0 +1,33 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mpgpu_class_init(void);
+void mpgpu_class_exit(void);
+void mali_gpu_utilization_callback(int utilization_pp);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
new file mode 100644
index 0000000..0cc5035
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
@@ -0,0 +1,359 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+#include "meson_main2.h"
+
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+#if 0
+static ssize_t max_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxpp:%d, maxpp_sysfs:%d, total=%d\n",
+			pmali_plat->scale_info.maxpp, pmali_plat->maxpp_sysfs,
+			pmali_plat->cfg_pp);
+	return sprintf(buf, "%d\n", pmali_plat->cfg_pp);
+}
+
+static ssize_t max_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_pp) || (val < pinfo->minpp))
+		return -EINVAL;
+
+	pmali_plat->maxpp_sysfs = val;
+	pinfo->maxpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minpp);
+}
+
+static ssize_t min_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxpp) || (val < 1))
+		return -EINVAL;
+
+	pinfo->minpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+#endif
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+#if 0
+static ssize_t current_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+	return sprintf(buf, "%d\n", pp);
+}
+
+static ssize_t current_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+
+	get_mali_rt_clkpp(&clk, &pp);
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(clk, val, 1);
+
+	return count;
+}
+#endif
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+#if 0
+	__ATTR(min_pp,		0644, min_pp_read,	min_pp_write),
+	__ATTR(max_pp,		0644, max_pp_read,	max_pp_write),
+#endif
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+#if 0
+	__ATTR(cur_pp,		0644, current_pp_read,	current_pp_write),
+#endif
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+
+int mpgpu_class_init(void)
+{
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+}
+
+void  mpgpu_class_exit(void)
+{
+	class_unregister(&mpgpu_class);
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
new file mode 100644
index 0000000..26a56f6
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
@@ -0,0 +1,245 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#ifdef CONFIG_GPU_THERMAL
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#include <linux/amlogic/aml_thermal_hw.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq <= mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq <= mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+#ifndef MESON_DRV_BRING
+    return 55;
+#else
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+#endif
+}
+#endif
+#endif
+
+#ifdef CONFIG_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+#ifndef MESON_DRV_BRING
+    return 2;
+#else
+    return mali_executor_get_num_cores_enabled();
+#endif
+}
+#endif
+#endif
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
new file mode 100644
index 0000000..4fa0773
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
@@ -0,0 +1,584 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+
+#if AMLOGIC_GPU_USE_GPPLL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)
+#include <linux/amlogic/amports/gp_pll.h>
+#else
+#include <linux/amlogic/media/clk/gp_pll.h>
+#endif
+#endif
+
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+extern int  mali_pm_statue;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+#if AMLOGIC_GPU_USE_GPPLL
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+#endif
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+#if AMLOGIC_GPU_USE_GPPLL
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+#else
+	if ((0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) &&
+			!IS_ERR(dvfs_tbl->clkp_handle) &&
+			(0 != dvfs_tbl->clkp_freq)) {
+		clk_prepare_enable(dvfs_tbl->clkp_handle);
+		clk_set_rate(dvfs_tbl->clkp_handle, dvfs_tbl->clkp_freq);
+	}
+
+#endif
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+#if AMLOGIC_GPU_USE_GPPLL==0
+	if ((0 == strcmp(pdvfs[lastStep].clk_parent,"gp0_pll")) &&
+		(0 != strcmp(pdvfs[execStep].clk_parent, "gp0_pll"))) {
+			clk_disable_unprepare(pdvfs[lastStep].clkp_handle);
+	}
+#endif
+
+	lastStep = execStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+#endif
+
+}
+#if AMLOGIC_GPU_USE_GPPLL
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+#endif
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+    cores = cores;
+    return 0;
+}
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    if (err < 0) scalingdbg(1, "set pp failed");
+
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+    printk("mali_plat=%p\n", mali_plat);
+	num_cores_enabled = pmali_plat->sc_mpp;
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+#endif
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	40, /* 40% */
+	50, /* 50% */
+	90, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< utilization_pp)
+		ret = enable_one_core();
+	else if (0 < utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(int utilization_gpu, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(int utilization_pp, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization_pp,  ld_up);
+	if (utilization_pp >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization_pp <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(utilization_pp);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(utilization_pp);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100644
index 0000000..7cb3be7
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/Kbuild
new file mode 100644
index 0000000..d40d798
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_devicetree.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_devicetree.c
new file mode 100644
index 0000000..299d0e7
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_devicetree.c
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
+
+#ifndef CONFIG_OF
+int kbase_platform_register(void)
+{
+	return 0;
+}
+
+void kbase_platform_unregister(void)
+{
+}
+#endif
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_platform.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_platform.h
new file mode 100644
index 0000000..2ceca34
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_config_platform.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (5000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (5000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+/**
+ * Autosuspend delay
+ *
+ * The delay time (in milliseconds) to be used for autosuspend
+ */
+#define AUTO_SUSPEND_DELAY (100)
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_runtime_pm.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_runtime_pm.c
new file mode 100644
index 0000000..8e2fd74
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/meson/mali_kbase_runtime_pm.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/pm_runtime.h>
+#include "mali_kbase_config_platform.h"
+
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    trace_printk("utilisation=%d, util_gl_share=%d\n", utilisation, util_gl_share);
+    return 1;
+}
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret = 1; /* Assume GPU has been powered off */
+	int error;
+
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+	error = pm_runtime_get_sync(kbdev->dev);
+	if (error == 1) {
+		/*
+		 * Let core know that the chip has not been
+		 * powered off, so we can save on re-initialization.
+		 */
+		ret = 0;
+	}
+
+	dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
+
+	return ret;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+
+	pm_runtime_mark_last_busy(kbdev->dev);
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+
+	pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY);
+	pm_runtime_use_autosuspend(kbdev->dev);
+
+	pm_runtime_set_active(kbdev->dev);
+	pm_runtime_enable(kbdev->dev);
+
+	if (!pm_runtime_enabled(kbdev->dev)) {
+		dev_warn(kbdev->dev, "pm_runtime not enabled");
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+#endif
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
+
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100644
index 0000000..d9d5e90
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100644
index 0000000..02835f1
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,75 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq()
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq()
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..745884f
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+#include "mali_kbase_config_platform.h"
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100644
index 0000000..4665f98
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START	(0x10000000)
+#define SYS_CFGDATA_OFFSET		(0x000000A0)
+#define SYS_CFGCTRL_OFFSET		(0x000000A4)
+#define SYS_CFGSTAT_OFFSET		(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		(1 << 31)
+#define READ_REG_BIT_VALUE			(0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			(0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		(1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			(1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE	(0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE		(2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		(1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			(1 <<  1)
+
+#define FEED_REG_BIT_MASK			(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+/* the following three values used for reading
+ * HBI value of the LogicTile daughterboard */
+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000)
+#define VE_SYS_PROC_ID1_OFFSET (0x00000088)
+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define VE_DEFAULT_GPU_FREQ_MIN 5000
+#define VE_DEFAULT_GPU_FREQ_MAX 5000
+
+
+#define CPU_CLOCK_SPEED_UNDEFINED (0)
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed
+ * @cpu_clock - the value of CPU clock speed in MHz
+ *
+ * Returns 0 on success, error code otherwise.
+ *
+ * The implementation is platform specific.
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	int err = 0;
+	u32 reg_val = 0;
+	u32 osc2_value = 0;
+	u32 pa_divide = 0;
+	u32 pb_divide = 0;
+	u32 pc_divide = 0;
+	void __iomem *syscfg_reg = NULL;
+	void __iomem *scc_reg = NULL;
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) {
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+
+	/* Init the value in case something goes wrong */
+	*cpu_clock = 0;
+
+	/* Map CPU register into virtual memory */
+	syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+	if (syscfg_reg == NULL) {
+		err = -EIO;
+		goto syscfg_reg_map_failed;
+	}
+
+	scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+	if (scc_reg == NULL) {
+		err = -EIO;
+		goto scc_reg_map_failed;
+	}
+
+	raw_spin_lock(&syscfg_lock);
+
+	/* Read SYS regs - OSC2 */
+	reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET);
+
+	/* Check if there is any other undergoing request */
+	if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) {
+		err = -EBUSY;
+		goto ongoing_request;
+	}
+	/* Reset the CGFGSTAT reg */
+	writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET));
+
+	writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE |
+			DCC_DEFAULT_BIT_VALUE |
+			SYS_CFG_OSC_FUNC_BIT_VALUE |
+			SITE_DEFAULT_BIT_VALUE |
+			BOARD_STACK_POS_DEFAULT_BIT_VALUE |
+			DEVICE_DEFAULT_BIT_VALUE,
+			(syscfg_reg + SYS_CFGCTRL_OFFSET));
+	/* Wait for the transaction to complete */
+	while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) &
+			SYS_CFG_COMPLETE_BIT_VALUE))
+		;
+	/* Read SYS_CFGSTAT Register to get the status of submitted
+	 * transaction */
+	reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET);
+
+	if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+		/* Error while setting register */
+		err = -EIO;
+		goto set_reg_error;
+	}
+
+	osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET);
+	/* Read the SCC CFGRW0 register */
+	reg_val = readl(scc_reg);
+
+	/*
+	 * Select the appropriate feed:
+	 * CFGRW0[0] - CLKOB
+	 * CFGRW0[1] - CLKOC
+	 * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+	 */
+	/* Calculate the  FCLK */
+	if (IS_SINGLE_BIT_SET(reg_val, 0)) {
+		/* CFGRW0[0] - CLKOB */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[10:7] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PB_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 1)) {
+		/* CFGRW0[1] - CLKOC */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[14:11] */
+		pc_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PC_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PC_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {
+		/* CFGRW0[2] - FACLK */
+		/* CFGRW0[18:15] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PA_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[22:19] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PB_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else {
+		err = -EIO;
+	}
+
+set_reg_error:
+ongoing_request:
+	raw_spin_unlock(&syscfg_lock);
+	*cpu_clock /= HZ_IN_MHZ;
+
+	if (!err)
+		cpu_clock_speed = *cpu_clock;
+
+	iounmap(scc_reg);
+
+scc_reg_map_failed:
+	iounmap(syscfg_reg);
+
+syscfg_reg_map_failed:
+
+	return err;
+}
+
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function determines which LogicTile type is used by the platform by
+ * reading the HBI value of the daughterboard which holds the LogicTile:
+ *
+ * 0x217 HBI0217 Virtex-6
+ * 0x192 HBI0192 Virtex-5
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+static u32 kbase_get_platform_logic_tile_type(void)
+{
+	void __iomem *syscfg_reg = NULL;
+	u32 sys_procid1 = 0;
+
+	syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
+	if (NULL != syscfg_reg) {
+		sys_procid1 = readl(syscfg_reg);
+		iounmap(syscfg_reg);
+	}
+
+	return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
+}
+
+u32 kbase_get_platform_min_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MIN;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MIN;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MIN;
+	}
+}
+
+u32 kbase_get_platform_max_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MAX;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MAX;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MAX;
+	}
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100644
index 0000000..da86569
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+/**
+ * Get the minimum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_min_freq(void);
+
+/**
+ * Get the maximum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_max_freq(void);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100644
index 0000000..df87c74
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100644
index 0000000..0efbf39
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..5a69758
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100644
index 0000000..d9d5e90
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100644
index 0000000..dbdf21e
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,75 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..5d8ec2d
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100644
index 0000000..816dff4
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	/* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+	*cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+	return 0;
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100644
index 0000000..23647cc
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100644
index 0000000..5fa9b39
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h
new file mode 100644
index 0000000..5dc2f3b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PROTECTED_MODE_SWITCH_H_
+#define _PROTECTED_MODE_SWITCH_H_
+
+struct protected_mode_device;
+
+/**
+ * struct protected_mode_ops - Callbacks for protected mode switch operations
+ *
+ * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_disable: Callback to disable protected mode for device
+ */
+struct protected_mode_ops {
+	/**
+	 * protected_mode_enable() - Enable protected mode on device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enable)(
+			struct protected_mode_device *protected_dev);
+
+	/**
+	 * protected_mode_disable() - Disable protected mode on device, and
+	 *                            reset device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_disable)(
+			struct protected_mode_device *protected_dev);
+};
+
+/**
+ * struct protected_mode_device - Device structure for protected mode devices
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_mode_device {
+	struct protected_mode_ops ops;
+	void *data;
+};
+
+#endif /* _PROTECTED_MODE_SWITCH_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/sconscript b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/sconscript
new file mode 100644
index 0000000..f11933a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/sconscript
@@ -0,0 +1,64 @@
+#
+# (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import sys
+Import('env')
+
+SConscript( 'tests/sconscript' )
+
+mock_test = 0
+
+# Source files required for kbase.
+kbase_src = [
+	Glob('*.c'),
+	Glob('backend/*/*.c'),
+	Glob('internal/*/*.c'),
+	Glob('ipa/*.c'),
+	Glob('platform/%s/*.c' % env['platform_config']),
+]
+
+if env['platform_config']=='juno_soc':
+	kbase_src += [Glob('platform/devicetree/*.c')]
+else:
+	kbase_src += [Glob('platform/%s/*.c' % env['platform_config'])]
+
+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
+	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
+	mock_test = 1
+
+make_args = env.kernel_get_config_defines(ret_list = True) + [
+	'PLATFORM=%s' % env['platform'],
+	'MALI_ERROR_INJECT_ON=%s' % env['error_inject'],
+	'MALI_KERNEL_TEST_API=%s' % env['debug'],
+	'MALI_UNIT_TEST=%s' % env['unit'],
+	'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
+	'MALI_MOCK_TEST=%s' % mock_test,
+	'MALI_CUSTOMER_RELEASE=%s' % env['release'],
+	'MALI_COVERAGE=%s' % env['coverage'],
+]
+
+kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src,
+                              make_args = make_args)
+
+# need Module.symvers from ump.ko build
+if int(env['ump']) == 1:
+	env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko')
+
+if 'smc_protected_mode_switcher' in env:
+	env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/smc_protected_mode_switcher.ko')
+
+env.KernelObjTarget('kbase', kbase)
+
+env.AppendUnique(BASE=['cutils_linked_list'])
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild
new file mode 100644
index 0000000..b4bed04
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-$(CONFIG_MALI_KUTF) += kutf/
+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig
new file mode 100644
index 0000000..da0515c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+source "drivers/gpu/arm/midgard/tests/kutf/Kconfig"
+source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig"
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h
new file mode 100644
index 0000000..3667687
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_HELPERS_H_
+#define _KERNEL_UTF_HELPERS_H_
+
+/* kutf_helpers.h
+ * Test helper functions for the kernel UTF test infrastructure.
+ *
+ * These functions provide methods for enqueuing/dequeuing lines of text sent
+ * by user space. They are used to implement the transfer of "userdata" from
+ * user space to kernel.
+ */
+
+#include <kutf/kutf_suite.h>
+
+/**
+ * kutf_helper_input_dequeue() - Dequeue a line sent by user space
+ * @context:    KUTF context
+ * @str_size:   Pointer to an integer to receive the size of the string
+ *
+ * If no line is available then this function will wait (interruptibly) until
+ * a line is available.
+ *
+ * Return: The line dequeued, ERR_PTR(-EINTR) if interrupted or NULL on end
+ * of data.
+ */
+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size);
+
+/**
+ * kutf_helper_input_enqueue() - Enqueue a line sent by user space
+ * @context:   KUTF context
+ * @str:       The user space address of the line
+ * @size:      The length in bytes of the string
+ *
+ * This function will use copy_from_user to copy the string out of user space.
+ * The string need not be NULL-terminated (@size should not include the NULL
+ * termination).
+ *
+ * As a special case @str==NULL and @size==0 is valid to mark the end of input,
+ * but callers should use kutf_helper_input_enqueue_end_of_data() instead.
+ *
+ * Return: 0 on success, -EFAULT if the line cannot be copied from user space,
+ * -ENOMEM if out of memory.
+ */
+int kutf_helper_input_enqueue(struct kutf_context *context,
+		const char __user *str, size_t size);
+
+/**
+ * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent
+ * @context:    KUTF context
+ *
+ * After this function has been called, kutf_helper_input_dequeue() will always
+ * return NULL.
+ */
+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context);
+
+#endif	/* _KERNEL_UTF_HELPERS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h
new file mode 100644
index 0000000..c7b5263
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h
@@ -0,0 +1,174 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_HELPERS_USER_H_
+#define _KERNEL_UTF_HELPERS_USER_H_
+
+/* kutf_helpers.h
+ * Test helper functions for the kernel UTF test infrastructure, whose
+ * implementation mirrors that of similar functions for kutf-userside
+ */
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_helpers.h>
+
+
+#define KUTF_HELPER_MAX_VAL_NAME_LEN 255
+
+enum kutf_helper_valtype {
+	KUTF_HELPER_VALTYPE_INVALID,
+	KUTF_HELPER_VALTYPE_U64,
+	KUTF_HELPER_VALTYPE_STR,
+
+	KUTF_HELPER_VALTYPE_COUNT /* Must be last */
+};
+
+struct kutf_helper_named_val {
+	enum kutf_helper_valtype type;
+	char *val_name;
+	union {
+		u64 val_u64;
+		char *val_str;
+	} u;
+};
+
+/* Extra error values for certain helpers when we want to distinguish between
+ * Linux's own error values too.
+ *
+ * These can only be used on certain functions returning an int type that are
+ * documented as returning one of these potential values, they cannot be used
+ * from functions return a ptr type, since we can't decode it with PTR_ERR
+ *
+ * No negative values are used - Linux error codes should be used instead, and
+ * indicate a problem in accessing the data file itself (are generally
+ * unrecoverable)
+ *
+ * Positive values indicate correct access but invalid parsing (can be
+ * recovered from assuming data in the future is correct) */
+enum kutf_helper_err {
+	/* No error - must be zero */
+	KUTF_HELPER_ERR_NONE = 0,
+	/* Named value parsing encountered an invalid name */
+	KUTF_HELPER_ERR_INVALID_NAME,
+	/* Named value parsing of string or u64 type encountered extra
+	 * characters after the value (after the last digit for a u64 type or
+	 * after the string end delimiter for string type) */
+	KUTF_HELPER_ERR_CHARS_AFTER_VAL,
+	/* Named value parsing of string type couldn't find the string end
+	 * delimiter.
+	 *
+	 * This cannot be encountered when the NAME="value" message exceeds the
+	 * textbuf's maximum line length, because such messages are not checked
+	 * for an end string delimiter */
+	KUTF_HELPER_ERR_NO_END_DELIMITER,
+	/* Named value didn't parse as any of the known types */
+	KUTF_HELPER_ERR_INVALID_VALUE,
+};
+
+
+/* Send named NAME=value pair, u64 value
+ *
+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long
+ *
+ * Any failure will be logged on the suite's current test fixture
+ *
+ * Returns 0 on success, non-zero on failure
+ */
+int kutf_helper_send_named_u64(struct kutf_context *context,
+		const char *val_name, u64 val);
+
+/* Get the maximum length of a string that can be represented as a particular
+ * NAME="value" pair without string-value truncation in the kernel's buffer
+ *
+ * Given val_name and the kernel buffer's size, this can be used to determine
+ * the maximum length of a string that can be sent as val_name="value" pair
+ * without having the string value truncated. Any string longer than this will
+ * be truncated at some point during communication to this size.
+ *
+ * It is assumed that val_name is a valid name for
+ * kutf_helper_send_named_str(), and no checking will be made to
+ * ensure this.
+ *
+ * Returns the maximum string length that can be represented, or a negative
+ * value if the NAME="value" encoding itself wouldn't fit in kern_buf_sz
+ */
+int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz);
+
+/* Send named NAME="str" pair
+ *
+ * no escaping allowed in str. Any of the following characters will terminate
+ * the string: '"' '\\' '\n'
+ *
+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long
+ *
+ * Any failure will be logged on the suite's current test fixture
+ *
+ * Returns 0 on success, non-zero on failure */
+int kutf_helper_send_named_str(struct kutf_context *context,
+		const char *val_name, const char *val_str);
+
+/* Receive named NAME=value pair
+ *
+ * This can receive u64 and string values - check named_val->type
+ *
+ * If you are not planning on dynamic handling of the named value's name and
+ * type, then kutf_helper_receive_check_val() is more useful as a
+ * convenience function.
+ *
+ * String members of named_val will come from memory allocated on the fixture's mempool
+ *
+ * Returns 0 on success. Negative value on failure to receive from the 'run'
+ * file, positive value indicates an enum kutf_helper_err value for correct
+ * reception of data but invalid parsing */
+int kutf_helper_receive_named_val(
+		struct kutf_context *context,
+		struct kutf_helper_named_val *named_val);
+
+/* Receive and validate NAME=value pair
+ *
+ * As with kutf_helper_receive_named_val, but validate that the
+ * name and type are as expected, as a convenience for a common pattern found
+ * in tests.
+ *
+ * NOTE: this only returns an error value if there was actually a problem
+ * receiving data.
+ *
+ * NOTE: If the underlying data was received correctly, but:
+ * - isn't of the expected name
+ * - isn't the expected type
+ * - isn't correctly parsed for the type
+ * then the following happens:
+ * - failure result is recorded
+ * - named_val->type will be KUTF_HELPER_VALTYPE_INVALID
+ * - named_val->u will contain some default value that should be relatively
+ *   harmless for the test, including being writable in the case of string
+ *   values
+ * - return value will be 0 to indicate success
+ *
+ * The rationale behind this is that we'd prefer to continue the rest of the
+ * test with failures propagated, rather than hitting a timeout */
+int kutf_helper_receive_check_val(
+		struct kutf_helper_named_val *named_val,
+		struct kutf_context *context,
+		const char *expect_val_name,
+		enum kutf_helper_valtype expect_val_type);
+
+/* Output a named value to kmsg */
+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val);
+
+
+#endif	/* _KERNEL_UTF_HELPERS_USER_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h
new file mode 100644
index 0000000..584c9dd
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_MEM_H_
+#define _KERNEL_UTF_MEM_H_
+
+/* kutf_mem.h
+ * Functions for management of memory pools in the kernel.
+ *
+ * This module implements a memory pool allocator, allowing a test
+ * implementation to allocate linked allocations which can then be freed by a
+ * single free which releases all of the resources held by the entire pool.
+ *
+ * Note that it is not possible to free single resources within the pool once
+ * allocated.
+ */
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+/**
+ * struct kutf_mempool - the memory pool context management structure
+ * @head:	list head on which the allocations in this context are added to
+ * @lock:	mutex for concurrent allocation from multiple threads
+ *
+ */
+struct kutf_mempool {
+	struct list_head head;
+	struct mutex lock;
+};
+
+/**
+ * kutf_mempool_init() - Initialize a memory pool.
+ * @pool:	Memory pool structure to initialize, provided by the user
+ *
+ * Return:	zero on success
+ */
+int kutf_mempool_init(struct kutf_mempool *pool);
+
+/**
+ * kutf_mempool_alloc() - Allocate memory from a pool
+ * @pool:	Memory pool to allocate from
+ * @size:	Size of memory wanted in number of bytes
+ *
+ * Return:	Pointer to memory on success, NULL on failure.
+ */
+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size);
+
+/**
+ * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it.
+ * @pool:	The memory pool to free
+ */
+void kutf_mempool_destroy(struct kutf_mempool *pool);
+#endif	/* _KERNEL_UTF_MEM_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h
new file mode 100644
index 0000000..6787f71
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h
@@ -0,0 +1,176 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_RESULTSET_H_
+#define _KERNEL_UTF_RESULTSET_H_
+
+/* kutf_resultset.h
+ * Functions and structures for handling test results and result sets.
+ *
+ * This section of the kernel UTF contains structures and functions used for the
+ * management of Results and Result Sets.
+ */
+
+/**
+ * enum kutf_result_status - Status values for a single Test error.
+ * @KUTF_RESULT_BENCHMARK:	Result is a meta-result containing benchmark
+ *                              results.
+ * @KUTF_RESULT_SKIP:		The test was skipped.
+ * @KUTF_RESULT_UNKNOWN:	The test has an unknown result.
+ * @KUTF_RESULT_PASS:		The test result passed.
+ * @KUTF_RESULT_DEBUG:		The test result passed, but raised a debug
+ *                              message.
+ * @KUTF_RESULT_INFO:		The test result passed, but raised
+ *                              an informative message.
+ * @KUTF_RESULT_WARN:		The test result passed, but raised a warning
+ *                              message.
+ * @KUTF_RESULT_FAIL:		The test result failed with a non-fatal error.
+ * @KUTF_RESULT_FATAL:		The test result failed with a fatal error.
+ * @KUTF_RESULT_ABORT:		The test result failed due to a non-UTF
+ *                              assertion failure.
+ * @KUTF_RESULT_USERDATA:	User data is ready to be read,
+ *                              this is not seen outside the kernel
+ * @KUTF_RESULT_USERDATA_WAIT:	Waiting for user data to be sent,
+ *                              this is not seen outside the kernel
+ * @KUTF_RESULT_TEST_FINISHED:	The test has finished, no more results will
+ *                              be produced. This is not seen outside kutf
+ */
+enum kutf_result_status {
+	KUTF_RESULT_BENCHMARK = -3,
+	KUTF_RESULT_SKIP    = -2,
+	KUTF_RESULT_UNKNOWN = -1,
+
+	KUTF_RESULT_PASS    = 0,
+	KUTF_RESULT_DEBUG   = 1,
+	KUTF_RESULT_INFO    = 2,
+	KUTF_RESULT_WARN    = 3,
+	KUTF_RESULT_FAIL    = 4,
+	KUTF_RESULT_FATAL   = 5,
+	KUTF_RESULT_ABORT   = 6,
+
+	KUTF_RESULT_USERDATA      = 7,
+	KUTF_RESULT_USERDATA_WAIT = 8,
+	KUTF_RESULT_TEST_FINISHED = 9
+};
+
+/* The maximum size of a kutf_result_status result when
+ * converted to a string
+ */
+#define KUTF_ERROR_MAX_NAME_SIZE 21
+
+#ifdef __KERNEL__
+
+#include <kutf/kutf_mem.h>
+#include <linux/wait.h>
+
+struct kutf_context;
+
+/**
+ * struct kutf_result - Represents a single test result.
+ * @node:	Next result in the list of results.
+ * @status:	The status summary (pass / warn / fail / etc).
+ * @message:	A more verbose status message.
+ */
+struct kutf_result {
+	struct list_head            node;
+	enum kutf_result_status     status;
+	const char                  *message;
+};
+
+/**
+ * KUTF_RESULT_SET_WAITING_FOR_INPUT - Test is waiting for user data
+ *
+ * This flag is set within a struct kutf_result_set whenever the test is blocked
+ * waiting for user data. Attempts to dequeue results when this flag is set
+ * will cause a dummy %KUTF_RESULT_USERDATA_WAIT result to be produced. This
+ * is used to output a warning message and end of file.
+ */
+#define KUTF_RESULT_SET_WAITING_FOR_INPUT 1
+
+/**
+ * struct kutf_result_set - Represents a set of results.
+ * @results:	List head of a struct kutf_result list for storing the results
+ * @waitq:	Wait queue signalled whenever new results are added.
+ * @flags:	Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT
+ */
+struct kutf_result_set {
+	struct list_head          results;
+	wait_queue_head_t         waitq;
+	int                       flags;
+};
+
+/**
+ * kutf_create_result_set() - Create a new result set
+ *                            to which results can be added.
+ *
+ * Return: The created result set.
+ */
+struct kutf_result_set *kutf_create_result_set(void);
+
+/**
+ * kutf_add_result() - Add a result to the end of an existing result set.
+ *
+ * @context:	The kutf context
+ * @status:	The result status to add.
+ * @message:	The result message to add.
+ *
+ * Return: 0 if the result is successfully added. -ENOMEM if allocation fails.
+ */
+int kutf_add_result(struct kutf_context *context,
+		enum kutf_result_status status, const char *message);
+
+/**
+ * kutf_remove_result() - Remove a result from the head of a result set.
+ * @set:	The result set.
+ *
+ * This function will block until there is a result to read. The wait is
+ * interruptible, so this function will return with an ERR_PTR if interrupted.
+ *
+ * Return: result or ERR_PTR if interrupted
+ */
+struct kutf_result *kutf_remove_result(
+		struct kutf_result_set *set);
+
+/**
+ * kutf_destroy_result_set() - Free a previously created result set.
+ *
+ * @results:	The result set whose resources to free.
+ */
+void kutf_destroy_result_set(struct kutf_result_set *results);
+
+/**
+ * kutf_set_waiting_for_input() - The test is waiting for userdata
+ *
+ * @set: The result set to update
+ *
+ * Causes the result set to always have results and return a fake
+ * %KUTF_RESULT_USERDATA_WAIT result.
+ */
+void kutf_set_waiting_for_input(struct kutf_result_set *set);
+
+/**
+ * kutf_clear_waiting_for_input() - The test is no longer waiting for userdata
+ *
+ * @set: The result set to update
+ *
+ * Cancels the effect of kutf_set_waiting_for_input()
+ */
+void kutf_clear_waiting_for_input(struct kutf_result_set *set);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _KERNEL_UTF_RESULTSET_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h
new file mode 100644
index 0000000..ca30e57
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h
@@ -0,0 +1,564 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_SUITE_H_
+#define _KERNEL_UTF_SUITE_H_
+
+/* kutf_suite.h
+ * Functions for management of test suites.
+ *
+ * This collection of data structures, macros, and functions are used to
+ * create Test Suites, Tests within those Test Suites, and Fixture variants
+ * of each test.
+ */
+
+#include <linux/kref.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+
+#include <kutf/kutf_mem.h>
+#include <kutf/kutf_resultset.h>
+
+/* Arbitrary maximum size to prevent user space allocating too much kernel
+ * memory
+ */
+#define KUTF_MAX_LINE_LENGTH (1024u)
+
+/**
+ * Pseudo-flag indicating an absence of any specified test class. Note that
+ * tests should not be annotated with this constant as it is simply a zero
+ * value; tests without a more specific class must be marked with the flag
+ * KUTF_F_TEST_GENERIC.
+ */
+#define KUTF_F_TEST_NONE                ((unsigned int)(0))
+
+/**
+ * Class indicating this test is a smoke test.
+ * A given set of smoke tests should be quick to run, enabling rapid turn-around
+ * of "regress-on-commit" test runs.
+ */
+#define KUTF_F_TEST_SMOKETEST           ((unsigned int)(1 << 1))
+
+/**
+ * Class indicating this test is a performance test.
+ * These tests typically produce a performance metric, such as "time to run" or
+ * "frames per second",
+ */
+#define KUTF_F_TEST_PERFORMANCE         ((unsigned int)(1 << 2))
+
+/**
+ * Class indicating that this test is a deprecated test.
+ * These tests have typically been replaced by an alternative test which is
+ * more efficient, or has better coverage.
+ */
+#define KUTF_F_TEST_DEPRECATED          ((unsigned int)(1 << 3))
+
+/**
+ * Class indicating that this test is a known failure.
+ * These tests have typically been run and failed, but marking them as a known
+ * failure means it is easier to triage results.
+ *
+ * It is typically more convenient to triage known failures using the
+ * results database and web UI, as this means there is no need to modify the
+ * test code.
+ */
+#define KUTF_F_TEST_EXPECTED_FAILURE    ((unsigned int)(1 << 4))
+
+/**
+ * Class indicating that this test is a generic test, which is not a member of
+ * a more specific test class. Tests which are not created with a specific set
+ * of filter flags by the user are assigned this test class by default.
+ */
+#define KUTF_F_TEST_GENERIC             ((unsigned int)(1 << 5))
+
+/**
+ * Class indicating this test is a resource allocation failure test.
+ * A resource allocation failure test will test that an error code is
+ * correctly propagated when an allocation fails.
+ */
+#define KUTF_F_TEST_RESFAIL             ((unsigned int)(1 << 6))
+
+/**
+ * Additional flag indicating that this test is an expected failure when
+ * run in resource failure mode. These tests are never run when running
+ * the low resource mode.
+ */
+#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7))
+
+/**
+ * Flag reserved for user-defined filter zero.
+ */
+#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24))
+
+/**
+ * Flag reserved for user-defined filter one.
+ */
+#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25))
+
+/**
+ * Flag reserved for user-defined filter two.
+ */
+#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26))
+
+/**
+ * Flag reserved for user-defined filter three.
+ */
+#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27))
+
+/**
+ * Flag reserved for user-defined filter four.
+ */
+#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28))
+
+/**
+ * Flag reserved for user-defined filter five.
+ */
+#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29))
+
+/**
+ * Flag reserved for user-defined filter six.
+ */
+#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30))
+
+/**
+ * Flag reserved for user-defined filter seven.
+ */
+#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31))
+
+/**
+ * Pseudo-flag indicating that all test classes should be executed.
+ */
+#define KUTF_F_TEST_ALL                 ((unsigned int)(0xFFFFFFFFU))
+
+/**
+ * union kutf_callback_data - Union used to store test callback data
+ * @ptr_value:		pointer to the location where test callback data
+ *                      are stored
+ * @u32_value:		a number which represents test callback data
+ */
+union kutf_callback_data {
+	void *ptr_value;
+	u32  u32_value;
+};
+
+/**
+ * struct kutf_userdata_line - A line of user data to be returned to the user
+ * @node:   struct list_head to link this into a list
+ * @str:    The line of user data to return to user space
+ * @size:   The number of bytes within @str
+ */
+struct kutf_userdata_line {
+	struct list_head node;
+	char *str;
+	size_t size;
+};
+
+/**
+ * KUTF_USERDATA_WARNING_OUTPUT - Flag specifying that a warning has been output
+ *
+ * If user space reads the "run" file while the test is waiting for user data,
+ * then the framework will output a warning message and set this flag within
+ * struct kutf_userdata. A subsequent read will then simply return an end of
+ * file condition rather than outputting the warning again. The upshot of this
+ * is that simply running 'cat' on a test which requires user data will produce
+ * the warning followed by 'cat' exiting due to EOF - which is much more user
+ * friendly than blocking indefinitely waiting for user data.
+ */
+#define KUTF_USERDATA_WARNING_OUTPUT  1
+
+/**
+ * struct kutf_userdata - Structure holding user data
+ * @flags:       See %KUTF_USERDATA_WARNING_OUTPUT
+ * @input_head:  List of struct kutf_userdata_line containing user data
+ *               to be read by the kernel space test.
+ * @input_waitq: Wait queue signalled when there is new user data to be
+ *               read by the kernel space test.
+ */
+struct kutf_userdata {
+	unsigned long flags;
+	struct list_head input_head;
+	wait_queue_head_t input_waitq;
+};
+
+/**
+ * struct kutf_context - Structure representing a kernel test context
+ * @kref:		Refcount for number of users of this context
+ * @suite:		Convenience pointer to the suite this context
+ *                      is running
+ * @test_fix:		The fixture that is being run in this context
+ * @fixture_pool:	The memory pool used for the duration of
+ *                      the fixture/text context.
+ * @fixture:		The user provided fixture structure.
+ * @fixture_index:	The index (id) of the current fixture.
+ * @fixture_name:	The name of the current fixture (or NULL if unnamed).
+ * @test_data:		Any user private data associated with this test
+ * @result_set:		All the results logged by this test context
+ * @status:		The status of the currently running fixture.
+ * @expected_status:	The expected status on exist of the currently
+ *                      running fixture.
+ * @work:		Work item to enqueue onto the work queue to run the test
+ * @userdata:		Structure containing the user data for the test to read
+ */
+struct kutf_context {
+	struct kref                     kref;
+	struct kutf_suite               *suite;
+	struct kutf_test_fixture        *test_fix;
+	struct kutf_mempool             fixture_pool;
+	void                            *fixture;
+	unsigned int                    fixture_index;
+	const char                      *fixture_name;
+	union kutf_callback_data        test_data;
+	struct kutf_result_set          *result_set;
+	enum kutf_result_status         status;
+	enum kutf_result_status         expected_status;
+
+	struct work_struct              work;
+	struct kutf_userdata            userdata;
+};
+
+/**
+ * struct kutf_suite - Structure representing a kernel test suite
+ * @app:			The application this suite belongs to.
+ * @name:			The name of this suite.
+ * @suite_data:			Any user private data associated with this
+ *                              suite.
+ * @create_fixture:		Function used to create a new fixture instance
+ * @remove_fixture:		Function used to destroy a new fixture instance
+ * @fixture_variants:		The number of variants (must be at least 1).
+ * @suite_default_flags:	Suite global filter flags which are set on
+ *                              all tests.
+ * @node:			List node for suite_list
+ * @dir:			The debugfs directory for this suite
+ * @test_list:			List head to store all the tests which are
+ *                              part of this suite
+ */
+struct kutf_suite {
+	struct kutf_application        *app;
+	const char                     *name;
+	union kutf_callback_data       suite_data;
+	void *(*create_fixture)(struct kutf_context *context);
+	void  (*remove_fixture)(struct kutf_context *context);
+	unsigned int                   fixture_variants;
+	unsigned int                   suite_default_flags;
+	struct list_head               node;
+	struct dentry                  *dir;
+	struct list_head               test_list;
+};
+
+/* ============================================================================
+	Application functions
+============================================================================ */
+
+/**
+ * kutf_create_application() - Create an in kernel test application.
+ * @name:	The name of the test application.
+ *
+ * Return: pointer to the kutf_application  on success or NULL
+ * on failure
+ */
+struct kutf_application *kutf_create_application(const char *name);
+
+/**
+ * kutf_destroy_application() - Destroy an in kernel test application.
+ *
+ * @app:	The test application to destroy.
+ */
+void kutf_destroy_application(struct kutf_application *app);
+
+/* ============================================================================
+	Suite functions
+============================================================================ */
+
+/**
+ * kutf_create_suite() - Create a kernel test suite.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *                      functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *                      is stored in the fixture pointer in the context for
+ *                      use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ *
+ * Suite names must be unique. Should two suites with the same name be
+ * registered with the same application then this function will fail, if they
+ * are registered with different applications then the function will not detect
+ * this and the call will succeed.
+ *
+ * Return: pointer to the created kutf_suite on success or NULL
+ * on failure
+ */
+struct kutf_suite *kutf_create_suite(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context));
+
+/**
+ * kutf_create_suite_with_filters() - Create a kernel test suite with user
+ *                                    defined default filters.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *                      functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *			is stored in the fixture pointer in the context for
+ *			use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ * @filters:		Filters to apply to a test if it doesn't provide its own
+ *
+ * Suite names must be unique. Should two suites with the same name be
+ * registered with the same application then this function will fail, if they
+ * are registered with different applications then the function will not detect
+ * this and the call will succeed.
+ *
+ * Return: pointer to the created kutf_suite on success or NULL on failure
+ */
+struct kutf_suite *kutf_create_suite_with_filters(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters);
+
+/**
+ * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with
+ *                                             user defined default filters.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *			functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *			is stored in the fixture pointer in the context for
+ *			use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ * @filters:		Filters to apply to a test if it doesn't provide its own
+ * @suite_data:		Suite specific callback data, provided during the
+ *			running of the test in the kutf_context
+ *
+ * Return: pointer to the created kutf_suite on success or NULL
+ * on failure
+ */
+struct kutf_suite *kutf_create_suite_with_filters_and_data(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data suite_data);
+
+/**
+ * kutf_add_test() - Add a test to a kernel test suite.
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ *
+ * Note: As no filters are provided the test will use the suite filters instead
+ */
+void kutf_add_test(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context));
+
+/**
+ * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ * @filters:	A set of filtering flags, assigning test categories.
+ */
+void kutf_add_test_with_filters(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters);
+
+/**
+ * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite
+ *					   with filters.
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ * @filters:	A set of filtering flags, assigning test categories.
+ * @test_data:	Test specific callback data, provided during the
+ *		running of the test in the kutf_context
+ */
+void kutf_add_test_with_filters_and_data(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data test_data);
+
+
+/* ============================================================================
+	Test functions
+============================================================================ */
+/**
+ * kutf_test_log_result_external() - Log a result which has been created
+ *                                   externally into a in a standard form
+ *                                   recognized by the log parser.
+ * @context:	The test context the test is running in
+ * @message:	The message for this result
+ * @new_status:	The result status of this log message
+ */
+void kutf_test_log_result_external(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status);
+
+/**
+ * kutf_test_expect_abort() - Tell the kernel that you expect the current
+ *                            fixture to produce an abort.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_abort(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_fatal() - Tell the kernel that you expect the current
+ *                            fixture to produce a fatal error.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_fatal(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_fail() - Tell the kernel that you expect the current
+ *                           fixture to fail.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_fail(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_warn() - Tell the kernel that you expect the current
+ *                           fixture to produce a warning.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_warn(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_pass() - Tell the kernel that you expect the current
+ *                           fixture to pass.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_pass(struct kutf_context *context);
+
+/**
+ * kutf_test_skip() - Tell the kernel that the test should be skipped.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_skip(struct kutf_context *context);
+
+/**
+ * kutf_test_skip_msg() - Tell the kernel that this test has been skipped,
+ *                        supplying a reason string.
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the reason for the skip.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a prebaked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_skip_msg(struct kutf_context *context, const char *message);
+
+/**
+ * kutf_test_pass() - Tell the kernel that this test has passed.
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the reason for the pass.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_pass(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_debug() - Send a debug message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the debug information.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_debug(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_info() - Send an information message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the information message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_info(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_warn() - Send a warning message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the warning message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_warn(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_fail() - Tell the kernel that a test has failed
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the failure message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_fail(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the fatal error message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_fatal(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test
+ *
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_abort(struct kutf_context *context);
+
+#endif	/* _KERNEL_UTF_SUITE_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h
new file mode 100644
index 0000000..c458c1f
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_UTILS_H_
+#define _KERNEL_UTF_UTILS_H_
+
+/* kutf_utils.h
+ * Utilities for the kernel UTF test infrastructure.
+ *
+ * This collection of library functions are provided for use by kernel UTF
+ * and users of kernel UTF which don't directly fit within the other
+ * code modules.
+ */
+
+#include <kutf/kutf_mem.h>
+
+/**
+ * Maximum size of the message strings within kernel UTF, messages longer then
+ * this will be truncated.
+ */
+#define KUTF_MAX_DSPRINTF_LEN	1024
+
+/**
+ * kutf_dsprintf() - dynamic sprintf
+ * @pool:	memory pool to allocate from
+ * @fmt:	The format string describing the string to document.
+ * @...		The parameters to feed in to the format string.
+ *
+ * This function implements sprintf which dynamically allocates memory to store
+ * the string. The library will free the memory containing the string when the
+ * result set is cleared or destroyed.
+ *
+ * Note The returned string may be truncated to fit an internal temporary
+ * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length.
+ *
+ * Return: Returns pointer to allocated string, or NULL on error.
+ */
+const char *kutf_dsprintf(struct kutf_mempool *pool,
+		const char *fmt, ...);
+
+#endif	/* _KERNEL_UTF_UTILS_H_ */
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild
new file mode 100644
index 0000000..97f8005
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ccflags-y += -I$(src)/../include
+
+obj-$(CONFIG_MALI_KUTF) += kutf.o
+
+kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o kutf_helpers.o kutf_helpers_user.o
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig
new file mode 100644
index 0000000..6a87bdb
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig
@@ -0,0 +1,22 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config MALI_KUTF
+ tristate "Mali Kernel Unit Test Framework"
+ default m
+ help
+   Enables MALI testing framework. To compile it as a module,
+   choose M here - this will generate a single module called kutf.
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile
new file mode 100644
index 0000000..010c92c
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile
@@ -0,0 +1,29 @@
+#
+# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS=-I$(CURDIR)/../include modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c
new file mode 100644
index 0000000..bf887a5
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF test helpers */
+#include <kutf/kutf_helpers.h>
+
+#include <linux/err.h>
+#include <linux/jiffies.h>
+#include <linux/sched.h>
+#include <linux/preempt.h>
+#include <linux/wait.h>
+#include <linux/uaccess.h>
+
+static DEFINE_SPINLOCK(kutf_input_lock);
+
+static bool pending_input(struct kutf_context *context)
+{
+	bool input_pending;
+
+	spin_lock(&kutf_input_lock);
+
+	input_pending = !list_empty(&context->userdata.input_head);
+
+	spin_unlock(&kutf_input_lock);
+
+	return input_pending;
+}
+
+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size)
+{
+	struct kutf_userdata_line *line;
+
+	spin_lock(&kutf_input_lock);
+
+	while (list_empty(&context->userdata.input_head)) {
+		int err;
+
+		kutf_set_waiting_for_input(context->result_set);
+
+		spin_unlock(&kutf_input_lock);
+
+		err = wait_event_interruptible(context->userdata.input_waitq,
+				pending_input(context));
+
+		if (err)
+			return ERR_PTR(-EINTR);
+
+		spin_lock(&kutf_input_lock);
+	}
+
+	line = list_first_entry(&context->userdata.input_head,
+			struct kutf_userdata_line, node);
+	if (line->str) {
+		/*
+		 * Unless it is the end-of-input marker,
+		 * remove it from the list
+		 */
+		list_del(&line->node);
+	}
+
+	spin_unlock(&kutf_input_lock);
+
+	if (str_size)
+		*str_size = line->size;
+	return line->str;
+}
+
+int kutf_helper_input_enqueue(struct kutf_context *context,
+		const char __user *str, size_t size)
+{
+	struct kutf_userdata_line *line;
+
+	line = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(*line) + size + 1);
+	if (!line)
+		return -ENOMEM;
+	if (str) {
+		unsigned long bytes_not_copied;
+
+		line->size = size;
+		line->str = (void *)(line + 1);
+		bytes_not_copied = copy_from_user(line->str, str, size);
+		if (bytes_not_copied != 0)
+			return -EFAULT;
+		/* Zero terminate the string */
+		line->str[size] = '\0';
+	} else {
+		/* This is used to mark the end of input */
+		WARN_ON(size);
+		line->size = 0;
+		line->str = NULL;
+	}
+
+	spin_lock(&kutf_input_lock);
+
+	list_add_tail(&line->node, &context->userdata.input_head);
+
+	kutf_clear_waiting_for_input(context->result_set);
+
+	spin_unlock(&kutf_input_lock);
+
+	wake_up(&context->userdata.input_waitq);
+
+	return 0;
+}
+
+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context)
+{
+	kutf_helper_input_enqueue(context, NULL, 0);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c
new file mode 100644
index 0000000..9e8ab99
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c
@@ -0,0 +1,462 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF test helpers that mirror those for kutf-userside */
+#include <kutf/kutf_helpers_user.h>
+#include <kutf/kutf_helpers.h>
+#include <kutf/kutf_utils.h>
+
+#include <linux/err.h>
+#include <linux/slab.h>
+
+const char *valtype_names[] = {
+	"INVALID",
+	"U64",
+	"STR",
+};
+
+static const char *get_val_type_name(enum kutf_helper_valtype valtype)
+{
+	/* enums can be signed or unsigned (implementation dependant), so
+	 * enforce it to prevent:
+	 * a) "<0 comparison on unsigned type" warning - if we did both upper
+	 *    and lower bound check
+	 * b) incorrect range checking if it was a signed type - if we did
+	 *    upper bound check only */
+	unsigned int type_idx = (unsigned int)valtype;
+
+	if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT)
+		type_idx = (unsigned int)KUTF_HELPER_VALTYPE_INVALID;
+
+	return valtype_names[type_idx];
+}
+
+/* Check up to str_len chars of val_str to see if it's a valid value name:
+ *
+ * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator
+ * - And, each char is in the character set [A-Z0-9_] */
+static int validate_val_name(const char *val_str, int str_len)
+{
+	int i = 0;
+
+	for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) {
+		char val_chr = val_str[i];
+
+		if (val_chr >= 'A' && val_chr <= 'Z')
+			continue;
+		if (val_chr >= '0' && val_chr <= '9')
+			continue;
+		if (val_chr == '_')
+			continue;
+
+		/* Character not in the set [A-Z0-9_] - report error */
+		return 1;
+	}
+
+	/* Names of 0 length are not valid */
+	if (i == 0)
+		return 1;
+	/* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */
+	if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'))
+		return 1;
+
+	return 0;
+}
+
+/* Find the length of the valid part of the string when it will be in quotes
+ * e.g. "str"
+ *
+ * That is, before any '\\', '\n' or '"' characters. This is so we don't have
+ * to escape the string */
+static int find_quoted_string_valid_len(const char *str)
+{
+	char *ptr;
+	const char *check_chars = "\\\n\"";
+
+	ptr = strpbrk(str, check_chars);
+	if (ptr)
+		return (int)(ptr-str);
+
+	return (int)strlen(str);
+}
+
+static int kutf_helper_userdata_enqueue(struct kutf_context *context,
+		const char *str)
+{
+	char *str_copy;
+	size_t len;
+	int err;
+
+	len = strlen(str)+1;
+
+	str_copy = kutf_mempool_alloc(&context->fixture_pool, len);
+	if (!str_copy)
+		return -ENOMEM;
+
+	strcpy(str_copy, str);
+
+	err = kutf_add_result(context, KUTF_RESULT_USERDATA, str_copy);
+
+	return err;
+}
+
+#define MAX_U64_HEX_LEN 16
+/* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */
+#define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1)
+
+int kutf_helper_send_named_u64(struct kutf_context *context,
+		const char *val_name, u64 val)
+{
+	int ret = 1;
+	char msgbuf[NAMED_U64_VAL_BUF_SZ];
+	const char *errmsg = NULL;
+
+	if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': Invalid value name", val_name);
+		goto out_err;
+	}
+
+	ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val);
+	if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d",
+				val_name, NAMED_U64_VAL_BUF_SZ, ret);
+		goto out_err;
+	}
+
+	ret = kutf_helper_userdata_enqueue(context, msgbuf);
+	if (ret) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': send returned %d",
+				val_name, ret);
+		goto out_err;
+	}
+
+	return ret;
+out_err:
+	kutf_test_fail(context, errmsg);
+	return ret;
+}
+EXPORT_SYMBOL(kutf_helper_send_named_u64);
+
+#define NAMED_VALUE_SEP "="
+#define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\""
+#define NAMED_STR_END_DELIM "\""
+
+int kutf_helper_max_str_len_for_kern(const char *val_name,
+		int kern_buf_sz)
+{
+	const int val_name_len = strlen(val_name);
+	const int start_delim_len = strlen(NAMED_STR_START_DELIM);
+	const int end_delim_len = strlen(NAMED_STR_END_DELIM);
+	int max_msg_len = kern_buf_sz;
+	int max_str_len;
+
+	max_str_len = max_msg_len - val_name_len - start_delim_len -
+		end_delim_len;
+
+	return max_str_len;
+}
+EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern);
+
+int kutf_helper_send_named_str(struct kutf_context *context,
+		const char *val_name,
+		const char *val_str)
+{
+	int val_str_len;
+	int str_buf_sz;
+	char *str_buf = NULL;
+	int ret = 1;
+	char *copy_ptr;
+	int val_name_len;
+	int start_delim_len = strlen(NAMED_STR_START_DELIM);
+	int end_delim_len = strlen(NAMED_STR_END_DELIM);
+	const char *errmsg = NULL;
+
+	if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': Invalid value name", val_name);
+		goto out_err;
+	}
+	val_name_len = strlen(val_name);
+
+	val_str_len = find_quoted_string_valid_len(val_str);
+
+	/* (name length) + ("=\"" length) + (val_str len) + ("\"" length) + terminator */
+	str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1;
+
+	/* Using kmalloc() here instead of mempool since we know we need to free
+	 * before we return */
+	str_buf = kmalloc(str_buf_sz, GFP_KERNEL);
+	if (!str_buf) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d",
+				val_name, str_buf_sz);
+		goto out_err;
+	}
+	copy_ptr = str_buf;
+
+	/* Manually copy each string component instead of snprintf because
+	 * val_str may need to end early, and less error path handling */
+
+	/* name */
+	memcpy(copy_ptr, val_name, val_name_len);
+	copy_ptr += val_name_len;
+
+	/* str start delimiter */
+	memcpy(copy_ptr, NAMED_STR_START_DELIM, start_delim_len);
+	copy_ptr += start_delim_len;
+
+	/* str value */
+	memcpy(copy_ptr, val_str, val_str_len);
+	copy_ptr += val_str_len;
+
+	/* str end delimiter */
+	memcpy(copy_ptr, NAMED_STR_END_DELIM, end_delim_len);
+	copy_ptr += end_delim_len;
+
+	/* Terminator */
+	*copy_ptr = '\0';
+
+	ret = kutf_helper_userdata_enqueue(context, str_buf);
+
+	if (ret) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send str value named '%s': send returned %d",
+				val_name, ret);
+		goto out_err;
+	}
+
+	kfree(str_buf);
+	return ret;
+
+out_err:
+	kutf_test_fail(context, errmsg);
+	kfree(str_buf);
+	return ret;
+}
+EXPORT_SYMBOL(kutf_helper_send_named_str);
+
+int kutf_helper_receive_named_val(
+		struct kutf_context *context,
+		struct kutf_helper_named_val *named_val)
+{
+	size_t recv_sz;
+	char *recv_str;
+	char *search_ptr;
+	char *name_str = NULL;
+	int name_len;
+	int strval_len;
+	enum kutf_helper_valtype type = KUTF_HELPER_VALTYPE_INVALID;
+	char *strval = NULL;
+	u64 u64val = 0;
+	int err = KUTF_HELPER_ERR_INVALID_VALUE;
+
+	recv_str = kutf_helper_input_dequeue(context, &recv_sz);
+	if (!recv_str)
+		return -EBUSY;
+	else if (IS_ERR(recv_str))
+		return PTR_ERR(recv_str);
+
+	/* Find the '=', grab the name and validate it */
+	search_ptr = strnchr(recv_str, recv_sz, NAMED_VALUE_SEP[0]);
+	if (search_ptr) {
+		name_len = search_ptr - recv_str;
+		if (!validate_val_name(recv_str, name_len)) {
+			/* no need to reallocate - just modify string in place */
+			name_str = recv_str;
+			name_str[name_len] = '\0';
+
+			/* Move until after the '=' */
+			recv_str += (name_len + 1);
+			recv_sz -= (name_len + 1);
+		}
+	}
+	if (!name_str) {
+		pr_err("Invalid name part for received string '%s'\n",
+				recv_str);
+		return KUTF_HELPER_ERR_INVALID_NAME;
+	}
+
+	/* detect value type */
+	if (*recv_str == NAMED_STR_START_DELIM[1]) {
+		/* string delimiter start*/
+		++recv_str;
+		--recv_sz;
+
+		/* Find end of string */
+		search_ptr = strnchr(recv_str, recv_sz, NAMED_STR_END_DELIM[0]);
+		if (search_ptr) {
+			strval_len = search_ptr - recv_str;
+			/* Validate the string to ensure it contains no quotes */
+			if (strval_len == find_quoted_string_valid_len(recv_str)) {
+				/* no need to reallocate - just modify string in place */
+				strval = recv_str;
+				strval[strval_len] = '\0';
+
+				/* Move until after the end delimiter */
+				recv_str += (strval_len + 1);
+				recv_sz -= (strval_len + 1);
+				type = KUTF_HELPER_VALTYPE_STR;
+			} else {
+				pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str);
+				err = KUTF_HELPER_ERR_CHARS_AFTER_VAL;
+			}
+		} else {
+			pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str);
+			err = KUTF_HELPER_ERR_NO_END_DELIMITER;
+		}
+	} else {
+		/* possibly a number value - strtoull will parse it */
+		err = kstrtoull(recv_str, 0, &u64val);
+		/* unlike userspace can't get an end ptr, but if kstrtoull()
+		 * reads characters after the number it'll report -EINVAL */
+		if (!err) {
+			int len_remain = strnlen(recv_str, recv_sz);
+
+			type = KUTF_HELPER_VALTYPE_U64;
+			recv_str += len_remain;
+			recv_sz -= len_remain;
+		} else {
+			/* special case: not a number, report as such */
+			pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str);
+		}
+	}
+
+	if (type == KUTF_HELPER_VALTYPE_INVALID)
+		return err;
+
+	/* Any remaining characters - error */
+	if (strnlen(recv_str, recv_sz) != 0) {
+		pr_err("Characters remain after value of type %s: '%s'\n",
+				get_val_type_name(type), recv_str);
+		return KUTF_HELPER_ERR_CHARS_AFTER_VAL;
+	}
+
+	/* Success - write into the output structure */
+	switch (type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		named_val->u.val_u64 = u64val;
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		named_val->u.val_str = strval;
+		break;
+	default:
+		pr_err("Unreachable, fix kutf_helper_receive_named_val\n");
+		/* Coding error, report as though 'run' file failed */
+		return -EINVAL;
+	}
+
+	named_val->val_name = name_str;
+	named_val->type = type;
+
+	return KUTF_HELPER_ERR_NONE;
+}
+EXPORT_SYMBOL(kutf_helper_receive_named_val);
+
+#define DUMMY_MSG "<placeholder due to test fail>"
+int kutf_helper_receive_check_val(
+		struct kutf_helper_named_val *named_val,
+		struct kutf_context *context,
+		const char *expect_val_name,
+		enum kutf_helper_valtype expect_val_type)
+{
+	int err;
+
+	err = kutf_helper_receive_named_val(context, named_val);
+	if (err < 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to receive value named '%s'",
+				expect_val_name);
+		kutf_test_fail(context, msg);
+		return err;
+	} else if (err > 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Named-value parse error when expecting value named '%s'",
+				expect_val_name);
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+	if (strcmp(named_val->val_name, expect_val_name) != 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Expecting to receive value named '%s' but got '%s'",
+				expect_val_name, named_val->val_name);
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+
+	if (named_val->type != expect_val_type) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Expecting value named '%s' to be of type %s but got %s",
+				expect_val_name, get_val_type_name(expect_val_type),
+				get_val_type_name(named_val->type));
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+	return err;
+
+out_fail_and_fixup:
+	/* Produce a valid but incorrect value */
+	switch (expect_val_type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		named_val->u.val_u64 = 0ull;
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		{
+			char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG));
+
+			if (!str)
+				return -1;
+
+			strcpy(str, DUMMY_MSG);
+			named_val->u.val_str = str;
+			break;
+		}
+	default:
+		break;
+	}
+
+	/* Indicate that this is invalid */
+	named_val->type = KUTF_HELPER_VALTYPE_INVALID;
+
+	/* But at least allow the caller to continue in the test with failures */
+	return 0;
+}
+EXPORT_SYMBOL(kutf_helper_receive_check_val);
+
+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val)
+{
+	switch (named_val->type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		pr_warn("%s=0x%llx\n", named_val->val_name, named_val->u.val_u64);
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		pr_warn("%s=\"%s\"\n", named_val->val_name, named_val->u.val_str);
+		break;
+	case KUTF_HELPER_VALTYPE_INVALID:
+		pr_warn("%s is invalid\n", named_val->val_name);
+		break;
+	default:
+		pr_warn("%s has unknown type %d\n", named_val->val_name, named_val->type);
+		break;
+	}
+}
+EXPORT_SYMBOL(kutf_helper_output_named_val);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c
new file mode 100644
index 0000000..a75e15f
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF memory management functions */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include <kutf/kutf_mem.h>
+
+
+/**
+ * struct kutf_alloc_entry - Structure representing an allocation.
+ * @node:	List node for use with kutf_mempool.
+ * @data:	Data area of the allocation
+ */
+struct kutf_alloc_entry {
+	struct list_head node;
+	u8 data[0];
+};
+
+int kutf_mempool_init(struct kutf_mempool *pool)
+{
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		return -1;
+	}
+
+	INIT_LIST_HEAD(&pool->head);
+	mutex_init(&pool->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(kutf_mempool_init);
+
+void kutf_mempool_destroy(struct kutf_mempool *pool)
+{
+	struct list_head *remove;
+	struct list_head *tmp;
+
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		return;
+	}
+
+	mutex_lock(&pool->lock);
+	list_for_each_safe(remove, tmp, &pool->head) {
+		struct kutf_alloc_entry *remove_alloc;
+
+		remove_alloc = list_entry(remove, struct kutf_alloc_entry, node);
+		list_del(&remove_alloc->node);
+		kfree(remove_alloc);
+	}
+	mutex_unlock(&pool->lock);
+
+}
+EXPORT_SYMBOL(kutf_mempool_destroy);
+
+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size)
+{
+	struct kutf_alloc_entry *ret;
+
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		goto fail_pool;
+	}
+
+	mutex_lock(&pool->lock);
+
+	ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL);
+	if (!ret) {
+		pr_err("Failed to allocate memory\n");
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&ret->node);
+	list_add(&ret->node, &pool->head);
+
+	mutex_unlock(&pool->lock);
+
+	return &ret->data[0];
+
+fail_alloc:
+	mutex_unlock(&pool->lock);
+fail_pool:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_mempool_alloc);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c
new file mode 100644
index 0000000..41645a4
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c
@@ -0,0 +1,159 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF result management functions */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_resultset.h>
+
+/* Lock to protect all result structures */
+static DEFINE_SPINLOCK(kutf_result_lock);
+
+struct kutf_result_set *kutf_create_result_set(void)
+{
+	struct kutf_result_set *set;
+
+	set = kmalloc(sizeof(*set), GFP_KERNEL);
+	if (!set) {
+		pr_err("Failed to allocate resultset");
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&set->results);
+	init_waitqueue_head(&set->waitq);
+	set->flags = 0;
+
+	return set;
+
+fail_alloc:
+	return NULL;
+}
+
+int kutf_add_result(struct kutf_context *context,
+		enum kutf_result_status status,
+		const char *message)
+{
+	struct kutf_mempool *mempool = &context->fixture_pool;
+	struct kutf_result_set *set = context->result_set;
+	/* Create the new result */
+	struct kutf_result *new_result;
+
+	BUG_ON(set == NULL);
+
+	new_result = kutf_mempool_alloc(mempool, sizeof(*new_result));
+	if (!new_result) {
+		pr_err("Result allocation failed\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&new_result->node);
+	new_result->status = status;
+	new_result->message = message;
+
+	spin_lock(&kutf_result_lock);
+
+	list_add_tail(&new_result->node, &set->results);
+
+	spin_unlock(&kutf_result_lock);
+
+	wake_up(&set->waitq);
+
+	return 0;
+}
+
+void kutf_destroy_result_set(struct kutf_result_set *set)
+{
+	if (!list_empty(&set->results))
+		pr_err("kutf_destroy_result_set: Unread results from test\n");
+
+	kfree(set);
+}
+
+static bool kutf_has_result(struct kutf_result_set *set)
+{
+	bool has_result;
+
+	spin_lock(&kutf_result_lock);
+	if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT)
+		/* Pretend there are results if waiting for input */
+		has_result = true;
+	else
+		has_result = !list_empty(&set->results);
+	spin_unlock(&kutf_result_lock);
+
+	return has_result;
+}
+
+struct kutf_result *kutf_remove_result(struct kutf_result_set *set)
+{
+	struct kutf_result *result = NULL;
+	int ret;
+
+	do {
+		ret = wait_event_interruptible(set->waitq,
+				kutf_has_result(set));
+
+		if (ret)
+			return ERR_PTR(ret);
+
+		spin_lock(&kutf_result_lock);
+
+		if (!list_empty(&set->results)) {
+			result = list_first_entry(&set->results,
+					struct kutf_result,
+					node);
+			list_del(&result->node);
+		} else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) {
+			/* Return a fake result */
+			static struct kutf_result waiting = {
+				.status = KUTF_RESULT_USERDATA_WAIT
+			};
+			result = &waiting;
+		}
+		/* If result == NULL then there was a race with the event
+		 * being removed between the check in kutf_has_result and
+		 * the lock being obtained. In this case we retry
+		 */
+
+		spin_unlock(&kutf_result_lock);
+	} while (result == NULL);
+
+	return result;
+}
+
+void kutf_set_waiting_for_input(struct kutf_result_set *set)
+{
+	spin_lock(&kutf_result_lock);
+	set->flags |= KUTF_RESULT_SET_WAITING_FOR_INPUT;
+	spin_unlock(&kutf_result_lock);
+
+	wake_up(&set->waitq);
+}
+
+void kutf_clear_waiting_for_input(struct kutf_result_set *set)
+{
+	spin_lock(&kutf_result_lock);
+	set->flags &= ~KUTF_RESULT_SET_WAITING_FOR_INPUT;
+	spin_unlock(&kutf_result_lock);
+}
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
new file mode 100644
index 0000000..4968f24
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
@@ -0,0 +1,1220 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF suite, test and fixture management including user to kernel
+ * interaction */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+
+#include <generated/autoconf.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_resultset.h>
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_helpers.h>
+
+#if defined(CONFIG_DEBUG_FS)
+
+/**
+ * struct kutf_application - Structure which represents kutf application
+ * @name:	The name of this test application.
+ * @dir:	The debugfs directory for this test
+ * @suite_list:	List head to store all the suites which are part of this
+ *              application
+ */
+struct kutf_application {
+	const char         *name;
+	struct dentry      *dir;
+	struct list_head   suite_list;
+};
+
+/**
+ * struct kutf_test_function - Structure which represents kutf test function
+ * @suite:		Back reference to the suite this test function
+ *                      belongs to
+ * @filters:		Filters that apply to this test function
+ * @test_id:		Test ID
+ * @execute:		Function to run for this test
+ * @test_data:		Static data for this test
+ * @node:		List node for test_list
+ * @variant_list:	List head to store all the variants which can run on
+ *                      this function
+ * @dir:		debugfs directory for this test function
+ */
+struct kutf_test_function {
+	struct kutf_suite  *suite;
+	unsigned int       filters;
+	unsigned int       test_id;
+	void (*execute)(struct kutf_context *context);
+	union kutf_callback_data test_data;
+	struct list_head   node;
+	struct list_head   variant_list;
+	struct dentry      *dir;
+};
+
+/**
+ * struct kutf_test_fixture - Structure which holds information on the kutf
+ *                            test fixture
+ * @test_func:		Test function this fixture belongs to
+ * @fixture_index:	Index of this fixture
+ * @node:		List node for variant_list
+ * @dir:		debugfs directory for this test fixture
+ */
+struct kutf_test_fixture {
+	struct kutf_test_function *test_func;
+	unsigned int              fixture_index;
+	struct list_head          node;
+	struct dentry             *dir;
+};
+
+static struct dentry *base_dir;
+static struct workqueue_struct *kutf_workq;
+
+/**
+ * struct kutf_convert_table - Structure which keeps test results
+ * @result_name:	Status of the test result
+ * @result:		Status value for a single test
+ */
+struct kutf_convert_table {
+	char                    result_name[50];
+	enum kutf_result_status result;
+};
+
+struct kutf_convert_table kutf_convert[] = {
+#define ADD_UTF_RESULT(_name) \
+{ \
+	#_name, \
+	_name, \
+},
+ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK)
+ADD_UTF_RESULT(KUTF_RESULT_SKIP)
+ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN)
+ADD_UTF_RESULT(KUTF_RESULT_PASS)
+ADD_UTF_RESULT(KUTF_RESULT_DEBUG)
+ADD_UTF_RESULT(KUTF_RESULT_INFO)
+ADD_UTF_RESULT(KUTF_RESULT_WARN)
+ADD_UTF_RESULT(KUTF_RESULT_FAIL)
+ADD_UTF_RESULT(KUTF_RESULT_FATAL)
+ADD_UTF_RESULT(KUTF_RESULT_ABORT)
+};
+
+#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert))
+
+/**
+ * kutf_create_context() - Create a test context in which a specific fixture
+ *                         of an application will be run and its results
+ *                         reported back to the user
+ * @test_fix:	Test fixture to be run.
+ *
+ * The context's refcount will be initialized to 1.
+ *
+ * Return: Returns the created test context on success or NULL on failure
+ */
+static struct kutf_context *kutf_create_context(
+		struct kutf_test_fixture *test_fix);
+
+/**
+ * kutf_destroy_context() - Destroy a previously created test context, only
+ *                          once its refcount has become zero
+ * @kref:	pointer to kref member within the context
+ *
+ * This should only be used via a kref_put() call on the context's kref member
+ */
+static void kutf_destroy_context(struct kref *kref);
+
+/**
+ * kutf_context_get() - increment refcount on a context
+ * @context:	the kutf context
+ *
+ * This must be used when the lifetime of the context might exceed that of the
+ * thread creating @context
+ */
+static void kutf_context_get(struct kutf_context *context);
+
+/**
+ * kutf_context_put() - decrement refcount on a context, destroying it when it
+ *                      reached zero
+ * @context:	the kutf context
+ *
+ * This must be used only after a corresponding kutf_context_get() call on
+ * @context, and the caller no longer needs access to @context.
+ */
+static void kutf_context_put(struct kutf_context *context);
+
+/**
+ * kutf_set_result() - Set the test result against the specified test context
+ * @context:	Test context
+ * @status:	Result status
+ */
+static void kutf_set_result(struct kutf_context *context,
+		enum kutf_result_status status);
+
+/**
+ * kutf_set_expected_result() - Set the expected test result for the specified
+ *                              test context
+ * @context:		Test context
+ * @expected_status:	Expected result status
+ */
+static void kutf_set_expected_result(struct kutf_context *context,
+		enum kutf_result_status expected_status);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0))
+/* Pre 3.4.0 kernels don't have the simple_open helper */
+
+/**
+ * simple_open() - Helper for file opening which stores the inode private data
+ *                 into the file private data
+ * @inode:	File entry representation
+ * @file:	A specific opening of the file
+ *
+ * Return: always 0; if inode private data do not exist, the file will not
+ *         be assigned private data
+ */
+static int simple_open(struct inode *inode, struct file *file)
+{
+	if (inode->i_private)
+		file->private_data = inode->i_private;
+	return 0;
+}
+#endif
+
+/**
+ * kutf_result_to_string() - Converts a KUTF result into a string
+ * @result_str:      Output result string
+ * @result:          Result status to convert
+ *
+ * Return: 1 if test result was successfully converted to string, 0 otherwise
+ */
+static int kutf_result_to_string(char **result_str,
+		enum kutf_result_status result)
+{
+	int i;
+	int ret = 0;
+
+	for (i = 0; i < UTF_CONVERT_SIZE; i++) {
+		if (result == kutf_convert[i].result) {
+			*result_str = kutf_convert[i].result_name;
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+/**
+ * kutf_debugfs_const_string_read() - Simple debugfs read callback which
+ *                                    returns a constant string
+ * @file:	Opened file to read from
+ * @buf:	User buffer to write the data into
+ * @len:	Amount of data to read
+ * @ppos:	Offset into file to read from
+ *
+ * Return: On success, the number of bytes read and offset @ppos advanced by
+ *         this number; on error, negative value
+ */
+static ssize_t kutf_debugfs_const_string_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	char *str = file->private_data;
+
+	return simple_read_from_buffer(buf, len, ppos, str, strlen(str));
+}
+
+static const struct file_operations kutf_debugfs_const_string_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = kutf_debugfs_const_string_read,
+	.llseek  = default_llseek,
+};
+
+/**
+ * kutf_add_explicit_result() - Check if an explicit result needs to be added
+ * @context:	KUTF test context
+ */
+static void kutf_add_explicit_result(struct kutf_context *context)
+{
+	switch (context->expected_status) {
+	case KUTF_RESULT_UNKNOWN:
+		if (context->status == KUTF_RESULT_UNKNOWN)
+			kutf_test_pass(context, "(implicit pass)");
+		break;
+
+	case KUTF_RESULT_WARN:
+		if (context->status == KUTF_RESULT_WARN)
+			kutf_test_pass(context,
+					"Pass (expected warn occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected warn missing)");
+		break;
+
+	case KUTF_RESULT_FAIL:
+		if (context->status == KUTF_RESULT_FAIL)
+			kutf_test_pass(context,
+					"Pass (expected fail occurred)");
+		else if (context->status != KUTF_RESULT_SKIP) {
+			/* Force the expected status so the fail gets logged */
+			context->expected_status = KUTF_RESULT_PASS;
+			kutf_test_fail(context,
+					"Fail (expected fail missing)");
+		}
+		break;
+
+	case KUTF_RESULT_FATAL:
+		if (context->status == KUTF_RESULT_FATAL)
+			kutf_test_pass(context,
+					"Pass (expected fatal occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected fatal missing)");
+		break;
+
+	case KUTF_RESULT_ABORT:
+		if (context->status == KUTF_RESULT_ABORT)
+			kutf_test_pass(context,
+					"Pass (expected abort occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected abort missing)");
+		break;
+	default:
+		break;
+	}
+}
+
+static void kutf_run_test(struct work_struct *data)
+{
+	struct kutf_context *test_context = container_of(data,
+			struct kutf_context, work);
+	struct kutf_suite *suite = test_context->suite;
+	struct kutf_test_function *test_func;
+
+	test_func = test_context->test_fix->test_func;
+
+	/*
+	 * Call the create fixture function if required before the
+	 * fixture is run
+	 */
+	if (suite->create_fixture)
+		test_context->fixture = suite->create_fixture(test_context);
+
+	/* Only run the test if the fixture was created (if required) */
+	if ((suite->create_fixture && test_context->fixture) ||
+			(!suite->create_fixture)) {
+		/* Run this fixture */
+		test_func->execute(test_context);
+
+		if (suite->remove_fixture)
+			suite->remove_fixture(test_context);
+
+		kutf_add_explicit_result(test_context);
+	}
+
+	kutf_add_result(test_context, KUTF_RESULT_TEST_FINISHED, NULL);
+
+	kutf_context_put(test_context);
+}
+
+/**
+ * kutf_debugfs_run_open() Debugfs open callback for the "run" entry.
+ * @inode:	inode of the opened file
+ * @file:	Opened file to read from
+ *
+ * This function creates a KUTF context and queues it onto a workqueue to be
+ * run asynchronously. The resulting file descriptor can be used to communicate
+ * userdata to the test and to read back the results of the test execution.
+ *
+ * Return: 0 on success
+ */
+static int kutf_debugfs_run_open(struct inode *inode, struct file *file)
+{
+	struct kutf_test_fixture *test_fix = inode->i_private;
+	struct kutf_context *test_context;
+	int err = 0;
+
+	test_context = kutf_create_context(test_fix);
+	if (!test_context) {
+		err = -ENOMEM;
+		goto finish;
+	}
+
+	file->private_data = test_context;
+
+	/* This reference is release by the kutf_run_test */
+	kutf_context_get(test_context);
+
+	queue_work(kutf_workq, &test_context->work);
+
+finish:
+	return err;
+}
+
+#define USERDATA_WARNING_MESSAGE "WARNING: This test requires userdata\n"
+
+/**
+ * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry.
+ * @file:	Opened file to read from
+ * @buf:	User buffer to write the data into
+ * @len:	Amount of data to read
+ * @ppos:	Offset into file to read from
+ *
+ * This function emits the results of the test, blocking until they are
+ * available.
+ *
+ * If the test involves user data then this will also return user data records
+ * to user space. If the test is waiting for user data then this function will
+ * output a message (to make the likes of 'cat' display it), followed by
+ * returning 0 to mark the end of file.
+ *
+ * Results will be emitted one at a time, once all the results have been read
+ * 0 will be returned to indicate there is no more data.
+ *
+ * Return: Number of bytes read.
+ */
+static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct kutf_context *test_context = file->private_data;
+	struct kutf_result *res;
+	unsigned long bytes_not_copied;
+	ssize_t bytes_copied = 0;
+	char *kutf_str_ptr = NULL;
+	size_t kutf_str_len = 0;
+	size_t message_len = 0;
+	char separator = ':';
+	char terminator = '\n';
+
+	res = kutf_remove_result(test_context->result_set);
+
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	/*
+	 * Handle 'fake' results - these results are converted to another
+	 * form before being returned from the kernel
+	 */
+	switch (res->status) {
+	case KUTF_RESULT_TEST_FINISHED:
+		return 0;
+	case KUTF_RESULT_USERDATA_WAIT:
+		if (test_context->userdata.flags &
+				KUTF_USERDATA_WARNING_OUTPUT) {
+			/*
+			 * Warning message already output,
+			 * signal end-of-file
+			 */
+			return 0;
+		}
+
+		message_len = sizeof(USERDATA_WARNING_MESSAGE)-1;
+		if (message_len > len)
+			message_len = len;
+
+		bytes_not_copied = copy_to_user(buf,
+				USERDATA_WARNING_MESSAGE,
+				message_len);
+		if (bytes_not_copied != 0)
+			return -EFAULT;
+		test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT;
+		return message_len;
+	case KUTF_RESULT_USERDATA:
+		message_len = strlen(res->message);
+		if (message_len > len-1) {
+			message_len = len-1;
+			pr_warn("User data truncated, read not long enough\n");
+		}
+		bytes_not_copied = copy_to_user(buf, res->message,
+				message_len);
+		if (bytes_not_copied != 0) {
+			pr_warn("Failed to copy data to user space buffer\n");
+			return -EFAULT;
+		}
+		/* Finally the terminator */
+		bytes_not_copied = copy_to_user(&buf[message_len],
+				&terminator, 1);
+		if (bytes_not_copied != 0) {
+			pr_warn("Failed to copy data to user space buffer\n");
+			return -EFAULT;
+		}
+		return message_len+1;
+	default:
+		/* Fall through - this is a test result */
+		break;
+	}
+
+	/* Note: This code assumes a result is read completely */
+	kutf_result_to_string(&kutf_str_ptr, res->status);
+	if (kutf_str_ptr)
+		kutf_str_len = strlen(kutf_str_ptr);
+
+	if (res->message)
+		message_len = strlen(res->message);
+
+	if ((kutf_str_len + 1 + message_len + 1) > len) {
+		pr_err("Not enough space in user buffer for a single result");
+		return 0;
+	}
+
+	/* First copy the result string */
+	if (kutf_str_ptr) {
+		bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr,
+						kutf_str_len);
+		bytes_copied += kutf_str_len - bytes_not_copied;
+		if (bytes_not_copied)
+			goto exit;
+	}
+
+	/* Then the separator */
+	bytes_not_copied = copy_to_user(&buf[bytes_copied],
+					&separator, 1);
+	bytes_copied += 1 - bytes_not_copied;
+	if (bytes_not_copied)
+		goto exit;
+
+	/* Finally Next copy the result string */
+	if (res->message) {
+		bytes_not_copied = copy_to_user(&buf[bytes_copied],
+						res->message, message_len);
+		bytes_copied += message_len - bytes_not_copied;
+		if (bytes_not_copied)
+			goto exit;
+	}
+
+	/* Finally the terminator */
+	bytes_not_copied = copy_to_user(&buf[bytes_copied],
+					&terminator, 1);
+	bytes_copied += 1 - bytes_not_copied;
+
+exit:
+	return bytes_copied;
+}
+
+/**
+ * kutf_debugfs_run_write() Debugfs write callback for the "run" entry.
+ * @file:	Opened file to write to
+ * @buf:	User buffer to read the data from
+ * @len:	Amount of data to write
+ * @ppos:	Offset into file to write to
+ *
+ * This function allows user and kernel to exchange extra data necessary for
+ * the test fixture.
+ *
+ * The data is added to the first struct kutf_context running the fixture
+ *
+ * Return: Number of bytes written
+ */
+static ssize_t kutf_debugfs_run_write(struct file *file,
+		const char __user *buf, size_t len, loff_t *ppos)
+{
+	int ret = 0;
+	struct kutf_context *test_context = file->private_data;
+
+	if (len > KUTF_MAX_LINE_LENGTH)
+		return -EINVAL;
+
+	ret = kutf_helper_input_enqueue(test_context, buf, len);
+	if (ret < 0)
+		return ret;
+
+	return len;
+}
+
+/**
+ * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry.
+ * @inode:	File entry representation
+ * @file:	A specific opening of the file
+ *
+ * Release any resources that were created during the opening of the file
+ *
+ * Note that resources may not be released immediately, that might only happen
+ * later when other users of the kutf_context release their refcount.
+ *
+ * Return: 0 on success
+ */
+static int kutf_debugfs_run_release(struct inode *inode, struct file *file)
+{
+	struct kutf_context *test_context = file->private_data;
+
+	kutf_helper_input_enqueue_end_of_data(test_context);
+
+	kutf_context_put(test_context);
+	return 0;
+}
+
+static const struct file_operations kutf_debugfs_run_ops = {
+	.owner = THIS_MODULE,
+	.open = kutf_debugfs_run_open,
+	.read = kutf_debugfs_run_read,
+	.write = kutf_debugfs_run_write,
+	.release = kutf_debugfs_run_release,
+	.llseek  = default_llseek,
+};
+
+/**
+ * create_fixture_variant() - Creates a fixture variant for the specified
+ *                            test function and index and the debugfs entries
+ *                            that represent it.
+ * @test_func:		Test function
+ * @fixture_index:	Fixture index
+ *
+ * Return: 0 on success, negative value corresponding to error code in failure
+ */
+static int create_fixture_variant(struct kutf_test_function *test_func,
+		unsigned int fixture_index)
+{
+	struct kutf_test_fixture *test_fix;
+	char name[11];	/* Enough to print the MAX_UINT32 + the null terminator */
+	struct dentry *tmp;
+	int err;
+
+	test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL);
+	if (!test_fix) {
+		pr_err("Failed to create debugfs directory when adding fixture\n");
+		err = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	test_fix->test_func = test_func;
+	test_fix->fixture_index = fixture_index;
+
+	snprintf(name, sizeof(name), "%d", fixture_index);
+	test_fix->dir = debugfs_create_dir(name, test_func->dir);
+	if (!test_func->dir) {
+		pr_err("Failed to create debugfs directory when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_dir;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_file;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	tmp = debugfs_create_file_unsafe(
+#else
+	tmp = debugfs_create_file(
+#endif
+			"run", 0600, test_fix->dir,
+			test_fix,
+			&kutf_debugfs_run_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"run\" when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_file;
+	}
+
+	list_add(&test_fix->node, &test_func->variant_list);
+	return 0;
+
+fail_file:
+	debugfs_remove_recursive(test_fix->dir);
+fail_dir:
+	kfree(test_fix);
+fail_alloc:
+	return err;
+}
+
+/**
+ * kutf_remove_test_variant() - Destroy a previously created fixture variant.
+ * @test_fix:	Test fixture
+ */
+static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix)
+{
+	debugfs_remove_recursive(test_fix->dir);
+	kfree(test_fix);
+}
+
+void kutf_add_test_with_filters_and_data(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data test_data)
+{
+	struct kutf_test_function *test_func;
+	struct dentry *tmp;
+	unsigned int i;
+
+	test_func = kmalloc(sizeof(*test_func), GFP_KERNEL);
+	if (!test_func) {
+		pr_err("Failed to allocate memory when adding test %s\n", name);
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&test_func->variant_list);
+
+	test_func->dir = debugfs_create_dir(name, suite->dir);
+	if (!test_func->dir) {
+		pr_err("Failed to create debugfs directory when adding test %s\n", name);
+		goto fail_dir;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	test_func->filters = filters;
+	tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir,
+				 &test_func->filters);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	test_func->test_id = id;
+	tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir,
+				 &test_func->test_id);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	for (i = 0; i < suite->fixture_variants; i++) {
+		if (create_fixture_variant(test_func, i)) {
+			pr_err("Failed to create fixture %d when adding test %s\n", i, name);
+			goto fail_file;
+		}
+	}
+
+	test_func->suite = suite;
+	test_func->execute = execute;
+	test_func->test_data = test_data;
+
+	list_add(&test_func->node, &suite->test_list);
+	return;
+
+fail_file:
+	debugfs_remove_recursive(test_func->dir);
+fail_dir:
+	kfree(test_func);
+fail_alloc:
+	return;
+}
+EXPORT_SYMBOL(kutf_add_test_with_filters_and_data);
+
+void kutf_add_test_with_filters(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters)
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+
+	kutf_add_test_with_filters_and_data(suite,
+					    id,
+					    name,
+					    execute,
+					    suite->suite_default_flags,
+					    data);
+}
+EXPORT_SYMBOL(kutf_add_test_with_filters);
+
+void kutf_add_test(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context))
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+
+	kutf_add_test_with_filters_and_data(suite,
+					    id,
+					    name,
+					    execute,
+					    suite->suite_default_flags,
+					    data);
+}
+EXPORT_SYMBOL(kutf_add_test);
+
+/**
+ * kutf_remove_test(): Remove a previously added test function.
+ * @test_func: Test function
+ */
+static void kutf_remove_test(struct kutf_test_function *test_func)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &test_func->variant_list) {
+		struct kutf_test_fixture *test_fix;
+
+		test_fix = list_entry(pos, struct kutf_test_fixture, node);
+		kutf_remove_test_variant(test_fix);
+	}
+
+	list_del(&test_func->node);
+	debugfs_remove_recursive(test_func->dir);
+	kfree(test_func);
+}
+
+struct kutf_suite *kutf_create_suite_with_filters_and_data(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data suite_data)
+{
+	struct kutf_suite *suite;
+	struct dentry *tmp;
+
+	suite = kmalloc(sizeof(*suite), GFP_KERNEL);
+	if (!suite) {
+		pr_err("Failed to allocate memory when creating suite %s\n", name);
+		goto fail_kmalloc;
+	}
+
+	suite->dir = debugfs_create_dir(name, app->dir);
+	if (!suite->dir) {
+		pr_err("Failed to create debugfs directory when adding test %s\n", name);
+		goto fail_debugfs;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	INIT_LIST_HEAD(&suite->test_list);
+	suite->app = app;
+	suite->name = name;
+	suite->fixture_variants = fixture_count;
+	suite->create_fixture = create_fixture;
+	suite->remove_fixture = remove_fixture;
+	suite->suite_default_flags = filters;
+	suite->suite_data = suite_data;
+
+	list_add(&suite->node, &app->suite_list);
+
+	return suite;
+
+fail_file:
+	debugfs_remove_recursive(suite->dir);
+fail_debugfs:
+	kfree(suite);
+fail_kmalloc:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data);
+
+struct kutf_suite *kutf_create_suite_with_filters(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters)
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+	return kutf_create_suite_with_filters_and_data(app,
+						       name,
+						       fixture_count,
+						       create_fixture,
+						       remove_fixture,
+						       filters,
+						       data);
+}
+EXPORT_SYMBOL(kutf_create_suite_with_filters);
+
+struct kutf_suite *kutf_create_suite(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context))
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+	return kutf_create_suite_with_filters_and_data(app,
+						       name,
+						       fixture_count,
+						       create_fixture,
+						       remove_fixture,
+						       KUTF_F_TEST_GENERIC,
+						       data);
+}
+EXPORT_SYMBOL(kutf_create_suite);
+
+/**
+ * kutf_destroy_suite() - Destroy a previously added test suite.
+ * @suite:	Test suite
+ */
+static void kutf_destroy_suite(struct kutf_suite *suite)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &suite->test_list) {
+		struct kutf_test_function *test_func;
+
+		test_func = list_entry(pos, struct kutf_test_function, node);
+		kutf_remove_test(test_func);
+	}
+
+	list_del(&suite->node);
+	debugfs_remove_recursive(suite->dir);
+	kfree(suite);
+}
+
+struct kutf_application *kutf_create_application(const char *name)
+{
+	struct kutf_application *app;
+	struct dentry *tmp;
+
+	app = kmalloc(sizeof(*app), GFP_KERNEL);
+	if (!app) {
+		pr_err("Failed to create allocate memory when creating application %s\n", name);
+		goto fail_kmalloc;
+	}
+
+	app->dir = debugfs_create_dir(name, base_dir);
+	if (!app->dir) {
+		pr_err("Failed to create debugfs direcotry when creating application %s\n", name);
+		goto fail_debugfs;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name);
+		goto fail_file;
+	}
+
+	INIT_LIST_HEAD(&app->suite_list);
+	app->name = name;
+
+	return app;
+
+fail_file:
+	debugfs_remove_recursive(app->dir);
+fail_debugfs:
+	kfree(app);
+fail_kmalloc:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_create_application);
+
+void kutf_destroy_application(struct kutf_application *app)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &app->suite_list) {
+		struct kutf_suite *suite;
+
+		suite = list_entry(pos, struct kutf_suite, node);
+		kutf_destroy_suite(suite);
+	}
+
+	debugfs_remove_recursive(app->dir);
+	kfree(app);
+}
+EXPORT_SYMBOL(kutf_destroy_application);
+
+static struct kutf_context *kutf_create_context(
+		struct kutf_test_fixture *test_fix)
+{
+	struct kutf_context *new_context;
+
+	new_context = kmalloc(sizeof(*new_context), GFP_KERNEL);
+	if (!new_context) {
+		pr_err("Failed to allocate test context");
+		goto fail_alloc;
+	}
+
+	new_context->result_set = kutf_create_result_set();
+	if (!new_context->result_set) {
+		pr_err("Failed to create result set");
+		goto fail_result_set;
+	}
+
+	new_context->test_fix = test_fix;
+	/* Save the pointer to the suite as the callbacks will require it */
+	new_context->suite = test_fix->test_func->suite;
+	new_context->status = KUTF_RESULT_UNKNOWN;
+	new_context->expected_status = KUTF_RESULT_UNKNOWN;
+
+	kutf_mempool_init(&new_context->fixture_pool);
+	new_context->fixture = NULL;
+	new_context->fixture_index = test_fix->fixture_index;
+	new_context->fixture_name = NULL;
+	new_context->test_data = test_fix->test_func->test_data;
+
+	new_context->userdata.flags = 0;
+	INIT_LIST_HEAD(&new_context->userdata.input_head);
+	init_waitqueue_head(&new_context->userdata.input_waitq);
+
+	INIT_WORK(&new_context->work, kutf_run_test);
+
+	kref_init(&new_context->kref);
+
+	return new_context;
+
+fail_result_set:
+	kfree(new_context);
+fail_alloc:
+	return NULL;
+}
+
+static void kutf_destroy_context(struct kref *kref)
+{
+	struct kutf_context *context;
+
+	context = container_of(kref, struct kutf_context, kref);
+	kutf_destroy_result_set(context->result_set);
+	kutf_mempool_destroy(&context->fixture_pool);
+	kfree(context);
+}
+
+static void kutf_context_get(struct kutf_context *context)
+{
+	kref_get(&context->kref);
+}
+
+static void kutf_context_put(struct kutf_context *context)
+{
+	kref_put(&context->kref, kutf_destroy_context);
+}
+
+
+static void kutf_set_result(struct kutf_context *context,
+		enum kutf_result_status status)
+{
+	context->status = status;
+}
+
+static void kutf_set_expected_result(struct kutf_context *context,
+		enum kutf_result_status expected_status)
+{
+	context->expected_status = expected_status;
+}
+
+/**
+ * kutf_test_log_result() - Log a result for the specified test context
+ * @context:	Test context
+ * @message:	Result string
+ * @new_status:	Result status
+ */
+static void kutf_test_log_result(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status)
+{
+	if (context->status < new_status)
+		context->status = new_status;
+
+	if (context->expected_status != new_status)
+		kutf_add_result(context, new_status, message);
+}
+
+void kutf_test_log_result_external(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status)
+{
+	kutf_test_log_result(context, message, new_status);
+}
+EXPORT_SYMBOL(kutf_test_log_result_external);
+
+void kutf_test_expect_abort(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_ABORT);
+}
+EXPORT_SYMBOL(kutf_test_expect_abort);
+
+void kutf_test_expect_fatal(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_FATAL);
+}
+EXPORT_SYMBOL(kutf_test_expect_fatal);
+
+void kutf_test_expect_fail(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_FAIL);
+}
+EXPORT_SYMBOL(kutf_test_expect_fail);
+
+void kutf_test_expect_warn(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_WARN);
+}
+EXPORT_SYMBOL(kutf_test_expect_warn);
+
+void kutf_test_expect_pass(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_PASS);
+}
+EXPORT_SYMBOL(kutf_test_expect_pass);
+
+void kutf_test_skip(struct kutf_context *context)
+{
+	kutf_set_result(context, KUTF_RESULT_SKIP);
+	kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN);
+
+	kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP);
+}
+EXPORT_SYMBOL(kutf_test_skip);
+
+void kutf_test_skip_msg(struct kutf_context *context, const char *message)
+{
+	kutf_set_result(context, KUTF_RESULT_SKIP);
+	kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN);
+
+	kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool,
+			     "Test skipped: %s", message), KUTF_RESULT_SKIP);
+	kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP);
+}
+EXPORT_SYMBOL(kutf_test_skip_msg);
+
+void kutf_test_debug(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_DEBUG);
+}
+EXPORT_SYMBOL(kutf_test_debug);
+
+void kutf_test_pass(struct kutf_context *context, char const *message)
+{
+	static const char explicit_message[] = "(explicit pass)";
+
+	if (!message)
+		message = explicit_message;
+
+	kutf_test_log_result(context, message, KUTF_RESULT_PASS);
+}
+EXPORT_SYMBOL(kutf_test_pass);
+
+void kutf_test_info(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_INFO);
+}
+EXPORT_SYMBOL(kutf_test_info);
+
+void kutf_test_warn(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_WARN);
+}
+EXPORT_SYMBOL(kutf_test_warn);
+
+void kutf_test_fail(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_FAIL);
+}
+EXPORT_SYMBOL(kutf_test_fail);
+
+void kutf_test_fatal(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_FATAL);
+}
+EXPORT_SYMBOL(kutf_test_fatal);
+
+void kutf_test_abort(struct kutf_context *context)
+{
+	kutf_test_log_result(context, "", KUTF_RESULT_ABORT);
+}
+EXPORT_SYMBOL(kutf_test_abort);
+
+/**
+ * init_kutf_core() - Module entry point.
+ *
+ * Create the base entry point in debugfs.
+ */
+static int __init init_kutf_core(void)
+{
+	kutf_workq = alloc_workqueue("kutf workq", WQ_UNBOUND, 1);
+	if (!kutf_workq)
+		return -ENOMEM;
+
+	base_dir = debugfs_create_dir("kutf_tests", NULL);
+	if (!base_dir) {
+		destroy_workqueue(kutf_workq);
+		kutf_workq = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * exit_kutf_core() - Module exit point.
+ *
+ * Remove the base entry point in debugfs.
+ */
+static void __exit exit_kutf_core(void)
+{
+	debugfs_remove_recursive(base_dir);
+
+	if (kutf_workq)
+		destroy_workqueue(kutf_workq);
+}
+
+#else	/* defined(CONFIG_DEBUG_FS) */
+
+/**
+ * init_kutf_core() - Module entry point.
+ *
+ * Stub for when build against a kernel without debugfs support
+ */
+static int __init init_kutf_core(void)
+{
+	pr_debug("KUTF requires a kernel with debug fs support");
+
+	return -ENODEV;
+}
+
+/**
+ * exit_kutf_core() - Module exit point.
+ *
+ * Stub for when build against a kernel without debugfs support
+ */
+static void __exit exit_kutf_core(void)
+{
+}
+#endif	/* defined(CONFIG_DEBUG_FS) */
+
+MODULE_LICENSE("GPL");
+
+module_init(init_kutf_core);
+module_exit(exit_kutf_core);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c
new file mode 100644
index 0000000..a429a2d
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF utility functions */
+
+#include <linux/mutex.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_mem.h>
+
+static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN];
+
+DEFINE_MUTEX(buffer_lock);
+
+const char *kutf_dsprintf(struct kutf_mempool *pool,
+		const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	int size;
+	void *buffer;
+
+	mutex_lock(&buffer_lock);
+	va_start(args, fmt);
+	len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args);
+	va_end(args);
+
+	if (len < 0) {
+		pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt);
+		goto fail_format;
+	}
+
+	if (len >= sizeof(tmp_buffer)) {
+		pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt);
+		size = sizeof(tmp_buffer);
+	} else {
+		size = len + 1;
+	}
+
+	buffer = kutf_mempool_alloc(pool, size);
+	if (!buffer)
+		goto fail_alloc;
+
+	memcpy(buffer, tmp_buffer, size);
+	mutex_unlock(&buffer_lock);
+
+	return buffer;
+
+fail_alloc:
+fail_format:
+	mutex_unlock(&buffer_lock);
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_dsprintf);
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/sconscript b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/sconscript
new file mode 100644
index 0000000..d7f1124
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/kutf/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2014-2016, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('kutf_env')
+
+make_args = kutf_env.kernel_get_config_defines(ret_list = True)
+
+mod = kutf_env.BuildKernelModule('$STATIC_LIB_PATH/kutf.ko', Glob('*.c'), make_args = make_args)
+kutf_env.KernelObjTarget('kutf', mod)
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild
new file mode 100644
index 0000000..0cd9ceb
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android
+
+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o
+
+mali_kutf_irq_test-y := mali_kutf_irq_test_main.o
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig
new file mode 100644
index 0000000..f4553d3
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config MALI_IRQ_LATENCY
+ tristate "Mali GPU IRQ latency measurement"
+ depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF
+ default m
+ help
+   This option will build a test module mali_kutf_irq_test that
+   can determine the latency of the Mali GPU IRQ on your system.
+   Choosing M here will generate a single module called mali_kutf_irq_test.
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
new file mode 100644
index 0000000..2ac4f97
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
@@ -0,0 +1,47 @@
+#
+# (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+TEST_CCFLAGS := \
+	-DMALI_DEBUG=$(MALI_DEBUG) \
+	-DMALI_BACKEND_KERNEL=$(MALI_BACKEND_KERNEL) \
+	-DMALI_NO_MALI=$(MALI_NO_MALI) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_USE_UMP=$(MALI_USE_UMP) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	$(SCONS_CFLAGS) \
+	-I$(CURDIR)/../include \
+	-I$(CURDIR)/../../../../../../include \
+	-I$(CURDIR)/../../../ \
+	-I$(CURDIR)/../../ \
+	-I$(CURDIR)/../../backend/gpu \
+	-I$(CURDIR)/ \
+	-I$(srctree)/drivers/staging/android \
+	-I$(srctree)/include/linux
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS="$(TEST_CCFLAGS)" KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../kutf/Module.symvers $(CURDIR)/../../Module.symvers" modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
new file mode 100644
index 0000000..c9cc444
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -0,0 +1,269 @@
+/*
+ *
+ * (C) COPYRIGHT 2016, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include "mali_kbase.h"
+#include <midgard/backend/gpu/mali_kbase_device_internal.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_utils.h>
+
+/*
+ * This file contains the code which is used for measuring interrupt latency
+ * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is
+ * used with this purpose and it is called within KUTF framework - a kernel
+ * unit test framework. The measured latency provided by this test should
+ * be representative for the latency of the Mali JOB/MMU IRQs as well.
+ */
+
+/* KUTF test application pointer for this test */
+struct kutf_application *irq_app;
+
+/**
+ * struct kutf_irq_fixture data - test fixture used by the test functions.
+ * @kbdev:	kbase device for the GPU.
+ *
+ */
+struct kutf_irq_fixture_data {
+	struct kbase_device *kbdev;
+};
+
+#define SEC_TO_NANO(s)	      ((s)*1000000000LL)
+
+/* ID for the GPU IRQ */
+#define GPU_IRQ_HANDLER 2
+
+#define NR_TEST_IRQS 1000000
+
+/* IRQ for the test to trigger. Currently MULTIPLE_GPU_FAULTS as we would not
+ * expect to see this in normal use (e.g., when Android is running). */
+#define TEST_IRQ MULTIPLE_GPU_FAULTS
+
+#define IRQ_TIMEOUT HZ
+
+/* Kernel API for setting irq throttle hook callback and irq time in us*/
+extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+		irq_handler_t custom_handler,
+		int irq_type);
+extern irqreturn_t kbase_gpu_irq_handler(int irq, void *data);
+
+static DECLARE_WAIT_QUEUE_HEAD(wait);
+static bool triggered;
+static u64 irq_time;
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+/**
+ * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler
+ * @irq:  IRQ number
+ * @data: Data associated with this IRQ
+ *
+ * Return: state of the IRQ
+ */
+static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data)
+{
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+	if (val & TEST_IRQ) {
+		struct timespec tval;
+
+		getnstimeofday(&tval);
+		irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val,
+				NULL);
+
+		triggered = true;
+		wake_up(&wait);
+
+		return IRQ_HANDLED;
+	}
+
+	/* Trigger main irq handler */
+	return kbase_gpu_irq_handler(irq, data);
+}
+
+/**
+ * mali_kutf_irq_default_create_fixture() - Creates the fixture data required
+ *                                          for all the tests in the irq suite.
+ * @context:             KUTF context.
+ *
+ * Return: Fixture data created on success or NULL on failure
+ */
+static void *mali_kutf_irq_default_create_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data;
+
+	data = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(struct kutf_irq_fixture_data));
+
+	if (!data)
+		goto fail;
+
+	/* Acquire the kbase device */
+	data->kbdev = kbase_find_device(-1);
+	if (data->kbdev == NULL) {
+		kutf_test_fail(context, "Failed to find kbase device");
+		goto fail;
+	}
+
+	return data;
+
+fail:
+	return NULL;
+}
+
+/**
+ * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously
+ *                          created by mali_kutf_irq_default_create_fixture.
+ *
+ * @context:             KUTF context.
+ */
+static void mali_kutf_irq_default_remove_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+
+	kbase_release_device(kbdev);
+}
+
+/**
+ * mali_kutf_irq_latency() - measure GPU IRQ latency
+ * @context:		kutf context within which to perform the test
+ *
+ * The test triggers IRQs manually, and measures the
+ * time between triggering the IRQ and the IRQ handler being executed.
+ *
+ * This is not a traditional test, in that the pass/fail status has little
+ * meaning (other than indicating that the IRQ handler executed at all). Instead
+ * the results are in the latencies provided with the test result. There is no
+ * meaningful pass/fail result that can be obtained here, instead the latencies
+ * are provided for manual analysis only.
+ */
+static void mali_kutf_irq_latency(struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+	u64 min_time = U64_MAX, max_time = 0, average_time = 0;
+	int i;
+	bool test_failed = false;
+
+	/* Force GPU to be powered */
+	kbase_pm_context_active(kbdev);
+
+	kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler,
+			GPU_IRQ_HANDLER);
+
+	for (i = 0; i < NR_TEST_IRQS; i++) {
+		struct timespec tval;
+		u64 start_time;
+		int ret;
+
+		triggered = false;
+		getnstimeofday(&tval);
+		start_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
+
+		/* Trigger fake IRQ */
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+				TEST_IRQ, NULL);
+
+		ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT);
+
+		if (ret == 0) {
+			kutf_test_fail(context, "Timed out waiting for IRQ\n");
+			test_failed = true;
+			break;
+		}
+
+		if ((irq_time - start_time) < min_time)
+			min_time = irq_time - start_time;
+		if ((irq_time - start_time) > max_time)
+			max_time = irq_time - start_time;
+		average_time += irq_time - start_time;
+
+		udelay(10);
+	}
+
+	/* Go back to default handler */
+	kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER);
+
+	kbase_pm_context_idle(kbdev);
+
+	if (!test_failed) {
+		const char *results;
+
+		do_div(average_time, NR_TEST_IRQS);
+		results = kutf_dsprintf(&context->fixture_pool,
+				"Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n",
+				min_time, max_time, average_time);
+		kutf_test_pass(context, results);
+	}
+}
+
+/**
+ * Module entry point for this test.
+ */
+int mali_kutf_irq_test_main_init(void)
+{
+	struct kutf_suite *suite;
+
+	irq_app = kutf_create_application("irq");
+
+	if (NULL == irq_app) {
+		pr_warn("Creation of test application failed!\n");
+		return -ENOMEM;
+	}
+
+	suite = kutf_create_suite(irq_app, "irq_default",
+			1, mali_kutf_irq_default_create_fixture,
+			mali_kutf_irq_default_remove_fixture);
+
+	if (NULL == suite) {
+		pr_warn("Creation of test suite failed!\n");
+		kutf_destroy_application(irq_app);
+		return -ENOMEM;
+	}
+
+	kutf_add_test(suite, 0x0, "irq_latency",
+			mali_kutf_irq_latency);
+	return 0;
+}
+
+/**
+ * Module exit point for this test.
+ */
+void mali_kutf_irq_test_main_exit(void)
+{
+	kutf_destroy_application(irq_app);
+}
+
+module_init(mali_kutf_irq_test_main_init);
+module_exit(mali_kutf_irq_test_main_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
new file mode 100644
index 0000000..280fa98
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+Import('env')
+
+src = [Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/*.c'), Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile')]
+
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[CLEAN] mali_kutf_irq_test'))
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [])
+	env.KernelObjTarget('mali_kutf_irq_test', cmd)
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} MALI_BACKEND_KERNEL=1 MALI_ERROR_INJECT_ON=${error_inject} MALI_NO_MALI=${no_mali} MALI_UNIT_TEST=${unit} MALI_USE_UMP=${ump} MALI_CUSTOMER_RELEASE=${release} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction])
+	env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko')
+	env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko')
+	env.KernelObjTarget('mali_kutf_irq_test', cmd)
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/sconscript b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/sconscript
new file mode 100644
index 0000000..0458411
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/midgard/tests/sconscript
@@ -0,0 +1,38 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import ('env')
+
+kutf_env = env.Clone()
+kutf_env.Append(CPPPATH = '#kernel/drivers/gpu/arm/midgard/tests/include')
+Export('kutf_env')
+
+if Glob('internal/sconscript'):
+	SConscript('internal/sconscript')
+
+if kutf_env['debug'] == '1':
+	SConscript('kutf/sconscript')
+	SConscript('mali_kutf_irq_test/sconscript')
+
+	if Glob('kutf_test/sconscript'):
+		SConscript('kutf_test/sconscript')
+
+	if Glob('kutf_test_runner/sconscript'):
+		SConscript('kutf_test_runner/sconscript')
+
+if env['unit'] == '1':
+	SConscript('mali_kutf_ipa_test/sconscript')
+	SConscript('mali_kutf_ipa_unit_test/sconscript')
+	SConscript('mali_kutf_vinstr_test/sconscript')
diff --git a/bifrost/r9p0/kernel/drivers/gpu/arm/sconscript b/bifrost/r9p0/kernel/drivers/gpu/arm/sconscript
new file mode 100644
index 0000000..924511b
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/arm/sconscript
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+
+
+SConscript('midgard/sconscript')
+
diff --git a/bifrost/r9p0/kernel/drivers/gpu/sconscript b/bifrost/r9p0/kernel/drivers/gpu/sconscript
new file mode 100644
index 0000000..b81fb4d
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/gpu/sconscript
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010-2013,2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+SConscript('arm/sconscript')
+
diff --git a/bifrost/r9p0/kernel/drivers/sconscript b/bifrost/r9p0/kernel/drivers/sconscript
new file mode 100644
index 0000000..87e231a
--- /dev/null
+++ b/bifrost/r9p0/kernel/drivers/sconscript
@@ -0,0 +1,22 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+if Glob('base/sconscript'):
+	SConscript('base/sconscript')
+
+if Glob('video/sconscript'):
+	SConscript('video/sconscript')
+
+SConscript('gpu/sconscript')
diff --git a/bifrost/r9p0/kernel/include/linux/dma-buf-test-exporter.h b/bifrost/r9p0/kernel/include/linux/dma-buf-test-exporter.h
new file mode 100644
index 0000000..98984ba
--- /dev/null
+++ b/bifrost/r9p0/kernel/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/bifrost/r9p0/kernel/include/linux/protected_mode_switcher.h b/bifrost/r9p0/kernel/include/linux/protected_mode_switcher.h
new file mode 100644
index 0000000..5dc2f3b
--- /dev/null
+++ b/bifrost/r9p0/kernel/include/linux/protected_mode_switcher.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PROTECTED_MODE_SWITCH_H_
+#define _PROTECTED_MODE_SWITCH_H_
+
+struct protected_mode_device;
+
+/**
+ * struct protected_mode_ops - Callbacks for protected mode switch operations
+ *
+ * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_disable: Callback to disable protected mode for device
+ */
+struct protected_mode_ops {
+	/**
+	 * protected_mode_enable() - Enable protected mode on device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enable)(
+			struct protected_mode_device *protected_dev);
+
+	/**
+	 * protected_mode_disable() - Disable protected mode on device, and
+	 *                            reset device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_disable)(
+			struct protected_mode_device *protected_dev);
+};
+
+/**
+ * struct protected_mode_device - Device structure for protected mode devices
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_mode_device {
+	struct protected_mode_ops ops;
+	void *data;
+};
+
+#endif /* _PROTECTED_MODE_SWITCH_H_ */
diff --git a/bifrost/r9p0/kernel/include/linux/ump-common.h b/bifrost/r9p0/kernel/include/linux/ump-common.h
new file mode 100644
index 0000000..0015bda
--- /dev/null
+++ b/bifrost/r9p0/kernel/include/linux/ump-common.h
@@ -0,0 +1,252 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/bifrost/r9p0/kernel/include/linux/ump-import.h b/bifrost/r9p0/kernel/include/linux/ump-import.h
new file mode 100644
index 0000000..89ce727
--- /dev/null
+++ b/bifrost/r9p0/kernel/include/linux/ump-import.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/bifrost/r9p0/kernel/include/linux/ump-ioctl.h b/bifrost/r9p0/kernel/include/linux/ump-ioctl.h
new file mode 100644
index 0000000..caf4c0b
--- /dev/null
+++ b/bifrost/r9p0/kernel/include/linux/ump-ioctl.h
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H_
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/bifrost/r9p0/kernel/include/linux/ump.h b/bifrost/r9p0/kernel/include/linux/ump.h
new file mode 100644
index 0000000..16f9c39
--- /dev/null
+++ b/bifrost/r9p0/kernel/include/linux/ump.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a true to indicate that access to the handle is allowed or @n
+ * @a false to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a true to permit access
+ * @li @a false to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+#ifdef CONFIG_KDS
+/**
+ * Retrieve the KDS resource for the specified UMP memory.
+ *
+ * The KDS resource should be used to synchronize access to the UMP allocation.
+ * See the KDS API for how to do that.
+ *
+ * @param mem Handle to the UMP memory to query.
+ * @return Pointer to the KDS resource controlling access to the UMP memory.
+ */
+UMP_KERNEL_API_EXPORT struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem);
+#endif
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
diff --git a/bifrost/r9p0/kernel/license.txt b/bifrost/r9p0/kernel/license.txt
new file mode 100644
index 0000000..77c14bd
--- /dev/null
+++ b/bifrost/r9p0/kernel/license.txt
@@ -0,0 +1,198 @@
+GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE
+
+THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE.
+
+
+
+Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form.
+
+
+
+GPL Licence
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+Version 2, June 1991
+
+
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+
+
+Preamble
+
+
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
+
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program).
+
+Whether that is true depends on what the Program does.
+
+
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty;
+
+and give any other recipients of the Program a copy of this License along with the Program.
+
+
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the
+
+Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein.
+
+You are not responsible for enforcing compliance by third parties to this License.
+
+
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+
+
+NO WARRANTY
+
+
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+/end
+
diff --git a/bifrost/r9p0/kernel/sconscript b/bifrost/r9p0/kernel/sconscript
new file mode 100644
index 0000000..79d94b8
--- /dev/null
+++ b/bifrost/r9p0/kernel/sconscript
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+if env['backend'] == 'kernel' and int(env['kernel_modules']) == 1:
+	SConscript('drivers/sconscript')
diff --git a/bindings/arm/mali-utgard.txt b/bindings/arm/mali-utgard.txt
new file mode 100644
index 0000000..2c5879b
--- /dev/null
+++ b/bindings/arm/mali-utgard.txt
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+* ARM Mali-300/400/450 GPU
+
+Required properties:
+- compatible:
+	At least one of these: "arm,mali-300", "arm,mali-400", "arm,mali-450"
+	Always: "arm,mali-utgard"
+	Mali-450 can also include "arm,mali-400" as it is compatible.
+	- "arm,mali-400", "arm,mali-utgard" for any Mali-400 GPU.
+	- "arm,mali-450", "arm,mali-400", "arm,mali-utgard" for any Mali-450 GPU.
+- reg:
+	Physical base address and length of the GPU's registers.
+- interrupts:
+	- List of all Mali interrupts.
+	- This list must match the number of and the order of entries in
+		interrupt-names.
+- interrupt-names:
+	- IRQPP<X> - Name for PP interrupts.
+	- IRQPPMMU<X> -  Name for interrupts from the PP MMU.
+	- IRQPP - Name for the PP broadcast interrupt (Mali-450 only).
+	- IRQGP - Name for the GP interrupt.
+	- IRQGPMMU - Name for the interrupt from the GP MMU.
+	- IRQPMU - Name for the PMU interrupt (If pmu is implemented in HW, it must be contained).
+
+Optional properties:
+- pmu_domain_config:
+	- If the Mali internal PMU is present and the PMU IRQ is specified in
+		interrupt/interrupt-names ("IRQPMU").This contains the mapping of
+		Mali HW units to the PMU power domain.
+	-Mali Dynamic power domain configuration in sequence from 0-11, like:
+		<GP PP0 PP1 PP2 PP3  PP4 PP5 PP6 PP7 L2$0 L2$1 L2$2>.
+- pmu-switch-delay:
+	- Only needed if the power gates are connected to the PMU in a high fanout
+		network. This value is the number of Mali clock cycles it takes to
+		enable the power gates and turn on the power mesh. This value will
+		have no effect if a daisy chain implementation is used.
+
+Platform related properties:
+- clocks: Phandle to clock for Mali utgard device.
+- clock-names: the corresponding names of clock in clocks property.
+- regulator: Phandle to regulator which is power supplier of mali device.
+
+Example for a Mali400_MP1_PMU device:
+
+/ {
+	...
+
+	gpu@12300000 {
+		compatible = "arm,mali-400", "arm,mali-utgard";
+		reg = <0x12300000 0x30000>;
+		interrupts = <0 55 4>, <0 56 4>, <0 57 4>, <0 58 4>, <0 59 4>;
+		interrupt-names = "IRQGP", "IRQGPMMU", "IRQPP0", "IRQPPMMU0", "IRQPMU";
+
+		pmu_domain_config = <0x1 0x4 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x2 0x0 0x0>;
+		pmu_switch_delay = <0xff>;
+		clocks = <clock 122>, <clock 123>;
+		clock-names = "mali_parent", "mali";
+		vdd_g3d-supply = <regulator_Phandle>;
+	};
+}
diff --git a/build_mali.sh b/build_mali.sh
new file mode 100755
index 0000000..2b6c3c9
--- /dev/null
+++ b/build_mali.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+exec_name=$0
+
+set -o errtrace
+trap 'echo Fatal error: script ${exec_name} aborting at line $LINENO, command \"$BASH_COMMAND\" returned $?; exit 1' ERR
+
+KERNEL_ARCH=arm64
+LOCAL_PATH=$(pwd)
+CROSS_COMPILE=${LOCAL_PATH}/../prebuilt/toolchain/aarch64/bin/aarch64-cros-linux-gnu-
+COMPILE_PATH=${LOCAL_PATH}/../prebuilt/toolchain/aarch64/bin/
+cc_clang=${LOCAL_PATH}/../prebuilt/toolchain/aarch64/bin/aarch64-cros-linux-gnu-clang
+KDIR=${LOCAL_PATH}/../kernel
+cpu_num=$(grep -c processor /proc/cpuinfo)
+
+GPU_VERSION_UTGARD=r7p0
+GPU_VERSION_BIFROST=r19p0
+
+BIFROST_PATH="${LOCAL_PATH}/bifrost/${GPU_VERSION_BIFROST}"
+
+echo "make mali module KERNEL_ARCH is ${KERNEL_ARCH}"
+
+function usage(){
+  echo "Usage: ${exec_name} <board> [workspace path]"
+  echo "supported boards: elaine-(p0 , p1)"
+  echo "                  estelle-(p1, p2, b1, b3, b4)"
+  echo "                  lucy-(p0)"
+  echo "                  newman-(p1, p2, p2_1, b1, b3, b4)"
+  echo "                  puddy-(p0, pdfc, p1)"
+}
+
+function run_module_make(){
+  pushd .
+  echo "***** building $1 *****"
+  case $1 in
+    utgard)
+      echo "GPU_DRV_VERSION is ${GPU_VERSION_UTGARD}"
+      make -C ${LOCAL_PATH}/utgard KDIR=${KDIR} \
+            ARCH=${KERNEL_ARCH} CROSS_COMPILE=${CROSS_COMPILE} \
+	    GPU_DRV_VERSION=${GPU_VERSION_UTGARD} -j$cpu_num gpu
+    ;;
+    bifrost)
+      echo "GPU_DRV_VERSION is ${GPU_VERSION_BIFROST}"
+	  make CC=${cc_clang} LD=${CROSS_COMPILE}ld.lld NM=${COMPILE_PATH}llvm-nm OBJCOPY=${COMPILE_PATH}llvm-objcopy -C ${KDIR} \
+			ARCH=${KERNEL_ARCH} M=${BIFROST_PATH}/kernel/drivers/gpu/arm/midgard \
+			EXTRA_CFLAGS="-DCONFIG_MALI_PLATFORM_DEVICETREE -DCONFIG_MALI_MIDGARD_DVFS -DCONFIG_MALI_BACKEND=gpu -I${BIFROST_PATH}/kernel/include \
+			-DCONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=1 -DCONFIG_MALI_DMA_BUF_LEGACY_COMPAT=0 -Wno-error" \
+			CONFIG_MALI_MIDGARD=m CONFIG_MALI_PLATFORM_DEVICETREE=y CONFIG_MALI_MIDGARD_DVFS=y CONFIG_MALI_BACKEND=gpu \
+			-j$cpu_num modules
+    ;;
+  esac
+  echo "***** completed building *****"
+  popd
+}
+
+if (( $# < 1 ))
+then
+  usage
+  exit 2
+fi
+
+readonly board=$1
+readonly workspace_path=$2
+readonly product=`echo ${board} | cut -d "-" -f1`
+
+case $product in
+  elaine|estelle|lucy|newman|puddy)
+    run_module_make bifrost
+    if [ ! -z $workspace_path ]; then
+      prebuilt_path=${workspace_path}/vendor/amlogic/${product}/prebuilt
+      mali_ko_path=${prebuilt_path}/kernel/modules
+      fct_mali_ko_path=${prebuilt_path}/factory/kernel/modules
+      mkdir -p ${mali_ko_path}
+      mkdir -p ${fct_mali_ko_path}
+      cp ${BIFROST_PATH}/kernel/drivers/gpu/arm/midgard/mali_kbase.ko ${mali_ko_path}/mali.${board}.ko
+      cp ${BIFROST_PATH}/kernel/drivers/gpu/arm/midgard/mali_kbase.ko ${fct_mali_ko_path}/mali.${board}.ko
+    fi
+    ;;
+  *)
+    echo "unknown product: $product"
+    exit 1
+esac
diff --git a/egl/x11/drm_module/mali_drm/Makefile b/egl/x11/drm_module/mali_drm/Makefile
new file mode 100644
index 0000000..bf0ec73
--- /dev/null
+++ b/egl/x11/drm_module/mali_drm/Makefile
@@ -0,0 +1,77 @@
+#
+# Copyright (C) 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+MALI_DRM_FILE_PREFIX =
+
+# For each arch check: CROSS_COMPILE , KDIR , CFLAGS += -DARCH
+
+ARCH ?= arm
+## @note Should allow overriding of building MALI DRM for non-debug:
+EXTRA_CFLAGS += -DDEBUG
+
+DRM_SYMVERS_FILE = ../drm/Module.symvers
+KBUILD_EXTRA_SYMBOLS = $(KBUILD_EXTMOD)/$(DRM_SYMVERS_FILE)
+
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/include -I$(KBUILD_EXTMOD)/../drm/include/
+
+SRC +=	$(MALI_DRM_FILE_PREFIX)mali/mali_drv.c \
+	$(MALI_DRM_FILE_PREFIX)mali/mali_mm.c
+
+# Selecting files to compile by parsing the config file
+
+MODULE:=mali_drm.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+ifeq ($(ARCH), arm)
+	# when compiling for ARM we're cross compiling
+	export CROSS_COMPILE ?= arm-none-linux-gnueabi-
+	# default to Virtex5
+	CONFIG ?= pb-virtex5
+else
+	# Compiling for the host
+	CONFIG ?= $(shell uname -m)
+endif
+
+# default cpu to select
+CPU ?= ct11mp
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
diff --git a/egl/x11/drm_module/mali_drm/include/Kbuild b/egl/x11/drm_module/mali_drm/include/Kbuild
new file mode 100755
index 0000000..c921ab1
--- /dev/null
+++ b/egl/x11/drm_module/mali_drm/include/Kbuild
@@ -0,0 +1,11 @@
+unifdef-y += drm.h drm_sarea.h
+unifdef-y += i810_drm.h
+unifdef-y += i830_drm.h
+unifdef-y += i915_drm.h
+unifdef-y += mga_drm.h
+unifdef-y += r128_drm.h
+unifdef-y += radeon_drm.h
+unifdef-y += sis_drm.h
+unifdef-y += savage_drm.h
+unifdef-y += via_drm.h
+unifdef-y += mali_drm.h
diff --git a/egl/x11/drm_module/mali_drm/include/mali_drm.h b/egl/x11/drm_module/mali_drm/include/mali_drm.h
new file mode 100644
index 0000000..f59bdc8
--- /dev/null
+++ b/egl/x11/drm_module/mali_drm/include/mali_drm.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DRM_H__
+#define __MALI_DRM_H__
+
+/* Mali specific ioctls */
+#define NOT_USED_0_3
+#define DRM_MALI_FB_ALLOC   0x04
+#define DRM_MALI_FB_FREE            0x05
+#define NOT_USED_6_12
+#define DRM_MALI_MEM_INIT   0x13
+#define DRM_MALI_MEM_ALLOC  0x14
+#define DRM_MALI_MEM_FREE   0x15
+#define DRM_MALI_FB_INIT            0x16
+
+#define DRM_IOCTL_MALI_FB_ALLOC     DRM_IOWR(DRM_COMMAND_BASE + DRM_MALI_FB_ALLOC, drm_mali_mem_t)
+#define DRM_IOCTL_MALI_FB_FREE      DRM_IOW( DRM_COMMAND_BASE + DRM_MALI_FB_FREE, drm_mali_mem_t)
+#define DRM_IOCTL_MALI_MEM_INIT     DRM_IOWR(DRM_COMMAND_BASE + DRM_MALI_MEM_INIT, drm_mali_mem_t)
+#define DRM_IOCTL_MALI_MEM_ALLOC    DRM_IOWR(DRM_COMMAND_BASE + DRM_MALI_MEM_ALLOC, drm_mali_mem_t)
+#define DRM_IOCTL_MALI_MEM_FREE     DRM_IOW( DRM_COMMAND_BASE + DRM_MALI_MEM_FREE, drm_mali_mem_t)
+#define DRM_IOCTL_MALI_FB_INIT      DRM_IOW( DRM_COMMAND_BASE + DRM_MALI_FB_INIT, drm_mali_fb_t)
+
+typedef struct
+{
+	int context;
+	unsigned int offset;
+	unsigned int size;
+	unsigned long free;
+} drm_mali_mem_t;
+
+typedef struct
+{
+	unsigned int offset, size;
+} drm_mali_fb_t;
+
+#endif /* __MALI_DRM_H__ */
diff --git a/egl/x11/drm_module/mali_drm/mali/Makefile b/egl/x11/drm_module/mali_drm/mali/Makefile
new file mode 100644
index 0000000..f861f8c
--- /dev/null
+++ b/egl/x11/drm_module/mali_drm/mali/Makefile
@@ -0,0 +1,19 @@
+#
+# Copyright (C) 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+ccflags-y = -Iinclude/drm
+mali_drm-y := mali_drv.o mali_mm.o
+
+obj-$(CONFIG_DRM_MALI)   += mali_drm.o
+
+
diff --git a/egl/x11/drm_module/mali_drm/mali/mali_drv.c b/egl/x11/drm_module/mali_drm/mali/mali_drv.c
new file mode 100644
index 0000000..cef4551
--- /dev/null
+++ b/egl/x11/drm_module/mali_drm/mali/mali_drv.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/vermagic.h>
+#include <linux/version.h>
+#include "drmP.h"
+#include "mali_drm.h"
+#include "mali_drv.h"
+
+static struct platform_device *pdev;
+
+static int mali_platform_drm_probe(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int mali_platform_drm_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int mali_platform_drm_suspend(struct platform_device *dev, pm_message_t state)
+{
+	return 0;
+}
+
+static int mali_platform_drm_resume(struct platform_device *dev)
+{
+	return 0;
+}
+
+
+static char mali_drm_device_name[] = "mali_drm";
+static struct platform_driver platform_drm_driver =
+{
+	.probe = mali_platform_drm_probe,
+	.remove = mali_platform_drm_remove,
+	.suspend = mali_platform_drm_suspend,
+	.resume = mali_platform_drm_resume,
+	.driver = {
+		.name = mali_drm_device_name,
+		.owner = THIS_MODULE,
+	},
+};
+
+#if 0
+static const struct drm_device_id dock_device_ids[] =
+{
+	{"MALIDRM", 0},
+	{"", 0},
+};
+#endif
+
+static int mali_driver_load(struct drm_device *dev, unsigned long chipset)
+{
+	int ret;
+	unsigned long base, size;
+	drm_mali_private_t *dev_priv;
+	printk(KERN_ERR "DRM: mali_driver_load start\n");
+
+	dev_priv = drm_calloc(1, sizeof(drm_mali_private_t), DRM_MEM_DRIVER);
+
+	if (dev_priv == NULL)
+	{
+		return -ENOMEM;
+	}
+
+	dev->dev_private = (void *)dev_priv;
+
+	if (NULL == dev->platformdev)
+	{
+		dev->platformdev = platform_device_register_simple(mali_drm_device_name, 0, NULL, 0);
+		pdev = dev->platformdev;
+	}
+
+#if 0
+	base = drm_get_resource_start(dev, 1);
+	size = drm_get_resource_len(dev, 1);
+#endif
+	ret = drm_sman_init(&dev_priv->sman, 2, 12, 8);
+
+	if (ret)
+	{
+		drm_free(dev_priv, sizeof(dev_priv), DRM_MEM_DRIVER);
+	}
+
+	//if ( ret ) kfree( dev_priv );
+
+	printk(KERN_ERR "DRM: mali_driver_load done\n");
+
+	return ret;
+}
+
+static int mali_driver_unload(struct drm_device *dev)
+{
+	drm_mali_private_t *dev_priv = dev->dev_private;
+	printk(KERN_ERR "DRM: mali_driver_unload start\n");
+
+	drm_sman_takedown(&dev_priv->sman);
+	drm_free(dev_priv, sizeof(*dev_priv), DRM_MEM_DRIVER);
+	//kfree( dev_priv );
+	printk(KERN_ERR "DRM: mali_driver_unload done\n");
+
+	return 0;
+}
+
+static struct drm_driver driver =
+{
+	.driver_features = DRIVER_USE_PLATFORM_DEVICE,
+	.load = mali_driver_load,
+	.unload = mali_driver_unload,
+	.context_dtor = NULL,
+	.dma_quiescent = mali_idle,
+	.reclaim_buffers = NULL,
+	.reclaim_buffers_idlelocked = mali_reclaim_buffers_locked,
+	.lastclose = mali_lastclose,
+	.get_map_ofs = drm_core_get_map_ofs,
+	.get_reg_ofs = drm_core_get_reg_ofs,
+	.ioctls = mali_ioctls,
+	.fops = {
+		.owner = THIS_MODULE,
+		.open = drm_open,
+		.release = drm_release,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
+		.ioctl = drm_ioctl,
+#else
+		.unlocked_ioctl = drm_ioctl,
+#endif
+		.mmap = drm_mmap,
+		.poll = drm_poll,
+		.fasync = drm_fasync,
+	},
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static int __init mali_init(void)
+{
+	driver.num_ioctls = mali_max_ioctl;
+	return drm_init(&driver);
+}
+
+static void __exit mali_exit(void)
+{
+	platform_device_unregister(pdev);
+	drm_exit(&driver);
+}
+
+module_init(mali_init);
+module_exit(mali_exit);
+
+MODULE_INFO(vermagic, VERMAGIC_STRING);
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/egl/x11/drm_module/mali_drm/mali/mali_drv.h b/egl/x11/drm_module/mali_drm/mali/mali_drv.h
new file mode 100644
index 0000000..df86ab8
--- /dev/null
+++ b/egl/x11/drm_module/mali_drm/mali/mali_drv.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALI_DRV_H_
+#define _MALI_DRV_H_
+
+#define DRIVER_AUTHOR       "ARM"
+#define DRIVER_NAME     "mali_drm"
+#define DRIVER_DESC     "DRM module for Mali-200, Mali-400"
+#define DRIVER_DATE     "20100520"
+#define DRIVER_MAJOR        0
+#define DRIVER_MINOR        1
+#define DRIVER_PATCHLEVEL   0
+
+#include "drm_sman.h"
+
+typedef struct drm_mali_private
+{
+	drm_local_map_t *mmio;
+	unsigned int idle_fault;
+	struct drm_sman sman;
+	int vram_initialized;
+	unsigned long vram_offset;
+} drm_mali_private_t;
+
+extern int mali_idle(struct drm_device *dev);
+extern void mali_reclaim_buffers_locked(struct drm_device *dev, struct drm_file *file_priv);
+extern void mali_lastclose(struct drm_device *dev);
+extern struct drm_ioctl_desc mali_ioctls[];
+extern int mali_max_ioctl;
+
+#endif /* _MALI_DRV_H_ */
diff --git a/egl/x11/drm_module/mali_drm/mali/mali_mm.c b/egl/x11/drm_module/mali_drm/mali/mali_mm.c
new file mode 100644
index 0000000..fe289fc
--- /dev/null
+++ b/egl/x11/drm_module/mali_drm/mali/mali_mm.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "drmP.h"
+#include "mali_drm.h"
+#include "mali_drv.h"
+
+#define VIDEO_TYPE 0
+#define MEM_TYPE 1
+
+#define MALI_MM_ALIGN_SHIFT 4
+#define MALI_MM_ALIGN_MASK ( (1 << MALI_MM_ALIGN_SHIFT) - 1)
+
+
+static void *mali_sman_mm_allocate(void *private, unsigned long size, unsigned alignment)
+{
+	printk(KERN_ERR "DRM: %s\n", __func__);
+	return NULL;
+}
+
+static void mali_sman_mm_free(void *private, void *ref)
+{
+	printk(KERN_ERR "DRM: %s\n", __func__);
+}
+
+static void mali_sman_mm_destroy(void *private)
+{
+	printk(KERN_ERR "DRM: %s\n", __func__);
+}
+
+static unsigned long mali_sman_mm_offset(void *private, void *ref)
+{
+	printk(KERN_ERR "DRM: %s\n", __func__);
+	return ~((unsigned long)ref);
+}
+
+static int mali_fb_init(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	drm_mali_private_t *dev_priv = dev->dev_private;
+	drm_mali_fb_t *fb = data;
+	int ret;
+	printk(KERN_ERR "DRM: %s\n", __func__);
+
+	mutex_lock(&dev->struct_mutex);
+	{
+		struct drm_sman_mm sman_mm;
+		sman_mm.private = (void *)0xFFFFFFFF;
+		sman_mm.allocate = mali_sman_mm_allocate;
+		sman_mm.free = mali_sman_mm_free;
+		sman_mm.destroy = mali_sman_mm_destroy;
+		sman_mm.offset = mali_sman_mm_offset;
+		ret = drm_sman_set_manager(&dev_priv->sman, VIDEO_TYPE, &sman_mm);
+	}
+
+	if (ret)
+	{
+		DRM_ERROR("VRAM memory manager initialisation error\n");
+		mutex_unlock(&dev->struct_mutex);
+		return ret;
+	}
+
+	dev_priv->vram_initialized = 1;
+	dev_priv->vram_offset = fb->offset;
+
+	mutex_unlock(&dev->struct_mutex);
+	DRM_DEBUG("offset = %u, size = %u\n", fb->offset, fb->size);
+
+	return 0;
+}
+
+static int mali_drm_alloc(struct drm_device *dev, struct drm_file *file_priv, void *data, int pool)
+{
+	drm_mali_private_t *dev_priv = dev->dev_private;
+	drm_mali_mem_t *mem = data;
+	int retval = 0;
+	struct drm_memblock_item *item;
+	printk(KERN_ERR "DRM: %s\n", __func__);
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (0 == dev_priv->vram_initialized)
+	{
+		DRM_ERROR("Attempt to allocate from uninitialized memory manager.\n");
+		mutex_unlock(&dev->struct_mutex);
+		return -EINVAL;
+	}
+
+	mem->size = (mem->size + MALI_MM_ALIGN_MASK) >> MALI_MM_ALIGN_SHIFT;
+	item = drm_sman_alloc(&dev_priv->sman, pool, mem->size, 0,
+	                      (unsigned long)file_priv);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	if (item)
+	{
+		mem->offset = dev_priv->vram_offset + (item->mm->offset(item->mm, item->mm_info) << MALI_MM_ALIGN_SHIFT);
+		mem->free = item->user_hash.key;
+		mem->size = mem->size << MALI_MM_ALIGN_SHIFT;
+	}
+	else
+	{
+		mem->offset = 0;
+		mem->size = 0;
+		mem->free = 0;
+		retval = -ENOMEM;
+	}
+
+	DRM_DEBUG("alloc %d, size = %d, offset = %d\n", pool, mem->size, mem->offset);
+
+	return retval;
+}
+
+static int mali_drm_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	drm_mali_private_t *dev_priv = dev->dev_private;
+	drm_mali_mem_t *mem = data;
+	int ret;
+	printk(KERN_ERR "DRM: %s\n", __func__);
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm_sman_free_key(&dev_priv->sman, mem->free);
+	mutex_unlock(&dev->struct_mutex);
+	DRM_DEBUG("free = 0x%lx\n", mem->free);
+
+	return ret;
+}
+
+static int mali_fb_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	printk(KERN_ERR "DRM: %s\n", __func__);
+	return mali_drm_alloc(dev, file_priv, data, VIDEO_TYPE);
+}
+
+static int mali_ioctl_mem_init(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	drm_mali_private_t *dev_priv = dev->dev_private;
+	drm_mali_mem_t *mem = data;
+	int ret;
+	dev_priv = dev->dev_private;
+	printk(KERN_ERR "DRM: %s\n", __func__);
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm_sman_set_range(&dev_priv->sman, MEM_TYPE, 0, mem->size >> MALI_MM_ALIGN_SHIFT);
+
+	if (ret)
+	{
+		DRM_ERROR("MEM memory manager initialisation error\n");
+		mutex_unlock(&dev->struct_mutex);
+		return ret;
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+static int mali_ioctl_mem_alloc(struct drm_device *dev, void *data,
+                                struct drm_file *file_priv)
+{
+
+	printk(KERN_ERR "DRM: %s\n", __func__);
+	return mali_drm_alloc(dev, file_priv, data, MEM_TYPE);
+}
+
+static drm_local_map_t *mem_reg_init(struct drm_device *dev)
+{
+	struct drm_map_list *entry;
+	drm_local_map_t *map;
+	printk(KERN_ERR "DRM: %s\n", __func__);
+
+	list_for_each_entry(entry, &dev->maplist, head)
+	{
+		map = entry->map;
+
+		if (!map)
+		{
+			continue;
+		}
+
+		if (map->type == _DRM_REGISTERS)
+		{
+			return map;
+		}
+	}
+	return NULL;
+}
+
+int mali_idle(struct drm_device *dev)
+{
+	drm_mali_private_t *dev_priv = dev->dev_private;
+	uint32_t idle_reg;
+	unsigned long end;
+	int i;
+	printk(KERN_ERR "DRM: %s\n", __func__);
+
+	if (dev_priv->idle_fault)
+	{
+		return 0;
+	}
+
+	return 0;
+}
+
+
+void mali_lastclose(struct drm_device *dev)
+{
+	drm_mali_private_t *dev_priv = dev->dev_private;
+	printk(KERN_ERR "DRM: %s\n", __func__);
+
+	if (!dev_priv)
+	{
+		return;
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	drm_sman_cleanup(&dev_priv->sman);
+	dev_priv->vram_initialized = 0;
+	dev_priv->mmio = NULL;
+	mutex_unlock(&dev->struct_mutex);
+}
+
+void mali_reclaim_buffers_locked(struct drm_device *dev, struct drm_file *file_priv)
+{
+	drm_mali_private_t *dev_priv = dev->dev_private;
+	printk(KERN_ERR "DRM: %s\n", __func__);
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (drm_sman_owner_clean(&dev_priv->sman, (unsigned long)file_priv))
+	{
+		mutex_unlock(&dev->struct_mutex);
+		return;
+	}
+
+	if (dev->driver->dma_quiescent)
+	{
+		dev->driver->dma_quiescent(dev);
+	}
+
+	drm_sman_owner_cleanup(&dev_priv->sman, (unsigned long)file_priv);
+	mutex_unlock(&dev->struct_mutex);
+	return;
+}
+
+struct drm_ioctl_desc mali_ioctls[] =
+{
+	DRM_IOCTL_DEF(DRM_MALI_FB_ALLOC, mali_fb_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MALI_FB_FREE, mali_drm_free, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MALI_MEM_INIT, mali_ioctl_mem_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_MALI_MEM_ALLOC, mali_ioctl_mem_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MALI_MEM_FREE, mali_drm_free, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MALI_FB_INIT, mali_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
+};
+
+int mali_max_ioctl = DRM_ARRAY_SIZE(mali_ioctls);
diff --git a/egl/x11/drm_module/readme b/egl/x11/drm_module/readme
new file mode 100755
index 0000000..578d725
--- /dev/null
+++ b/egl/x11/drm_module/readme
@@ -0,0 +1,8 @@
+How to build:
+KDIR=/work/kernel-2.6.35.7 make
+
+How to install:
+insmod drm/drm.ko
+insmod mali_drm/mali_drm.ko
+insmod trunk/src/devicedrv/mali/mali.ko
+
diff --git a/gpu.mk b/gpu.mk
new file mode 100644
index 0000000..0764a0a
--- /dev/null
+++ b/gpu.mk
@@ -0,0 +1,98 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -le 25 && echo OK),OK)
+ifneq (,$(wildcard device/amlogic/$(TARGET_PRODUCT)/BoardConfig.mk))
+include device/amlogic/$(TARGET_PRODUCT)/BoardConfig.mk
+else
+ifneq (,$(wildcard device/customer/$(TARGET_PRODUCT)/BoardConfig.mk))
+include device/customer/$(TARGET_PRODUCT)/BoardConfig.mk
+else
+ifneq (,$(wildcard $(TARGET_DEVICE_DIR)/BoardConfig.mk))
+include $(TARGET_DEVICE_DIR)/BoardConfig.mk
+else
+$(error "find the BoardConfig file, then include it")
+endif
+endif
+endif
+endif
+
+LOCAL_KK=0
+ifeq ($(GPU_TYPE), t82x)
+	LOCAL_KK=1
+endif
+
+ifeq ($(GPU_TYPE), t83x)
+	LOCAL_KK=1
+endif
+
+GPU_MODS_OUT?=system/lib
+ifeq ($(LOCAL_KK),0)
+GPU_DRV_VERSION?=r6p1
+GPU_DRV_VERSION:=$(strip $(GPU_DRV_VERSION))
+MALI_PLAT=hardware/arm/gpu/utgard/platform
+MALI=hardware/arm/gpu/utgard/${GPU_DRV_VERSION}
+MALI_OUT=$(PRODUCT_OUT)/obj/mali
+KERNEL_ARCH ?= arm
+#TODO rm shell cmd
+define gpu-modules
+
+$(MALI_KO):
+	rm $(MALI_OUT) -rf
+	mkdir -p $(MALI_OUT)
+	cp $(MALI)/* $(MALI_OUT)/ -airf
+	cp $(MALI_PLAT) $(MALI_OUT)/ -airf
+	@echo "make mali module KERNEL_ARCH is $(KERNEL_ARCH)"
+	@echo "make mali module MALI_OUT is $(MALI_OUT)"
+	@echo "make mali module MAKE is $(MAKE)"
+	@echo "GPU_DRV_VERSION is ${GPU_DRV_VERSION}"
+	$(MAKE) -C $(shell pwd)/$(PRODUCT_OUT)/obj/KERNEL_OBJ M=$(shell pwd)/$(MALI_OUT)/ \
+	ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(PREFIX_CROSS_COMPILE) CONFIG_MALI400=m  CONFIG_MALI450=m    \
+	EXTRA_CFLAGS="-DCONFIG_MALI400=m -DCONFIG_MALI450=m" \
+	CONFIG_GPU_THERMAL=y CONFIG_AM_VDEC_H264_4K2K=y modules
+
+	@echo "GPU_MODS_OUT is $(GPU_MODS_OUT)"
+	mkdir -p $(PRODUCT_OUT)/$(GPU_MODS_OUT)
+	cp $(MALI_OUT)/mali.ko $(PRODUCT_OUT)/$(GPU_MODS_OUT)/mali.ko
+endef
+
+else
+
+GPU_DRV_VERSION?=r11p0
+GPU_DRV_VERSION:=$(strip $(GPU_DRV_VERSION))
+MALI=hardware/arm/gpu/midgard/${GPU_DRV_VERSION}
+MALI_OUT=$(PRODUCT_OUT)/obj/t83x
+KERNEL_ARCH ?= arm
+
+define gpu-modules
+
+$(MALI_KO):
+	rm $(MALI_OUT) -rf
+	mkdir -p $(MALI_OUT)
+	cp $(MALI)/* $(MALI_OUT)/ -airf
+	@echo "make mali module KERNEL_ARCH is $(KERNEL_ARCH) current dir is $(shell pwd)"
+	@echo "MALI is $(MALI), MALI_OUT is $(MALI_OUT)"
+	@echo "GPU_DRV_VERSION is ${GPU_DRV_VERSION}"
+	$(MAKE) -C $(shell pwd)/$(PRODUCT_OUT)/obj/KERNEL_OBJ M=$(shell pwd)/$(MALI_OUT)/kernel/drivers/gpu/arm/midgard \
+	ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(PREFIX_CROSS_COMPILE) \
+	EXTRA_CFLAGS="-DCONFIG_MALI_PLATFORM_DEVICETREE -DCONFIG_MALI_MIDGARD_DVFS -DCONFIG_MALI_BACKEND=gpu" \
+	CONFIG_MALI_MIDGARD=m CONFIG_MALI_PLATFORM_DEVICETREE=y CONFIG_MALI_MIDGARD_DVFS=y CONFIG_MALI_BACKEND=gpu modules
+
+	mkdir -p $(PRODUCT_OUT)/$(GPU_MODS_OUT)
+	@echo "GPU_MODS_OUT is $(GPU_MODS_OUT)"
+	cp $(MALI_OUT)/kernel/drivers/gpu/arm/midgard/mali_kbase.ko $(PRODUCT_OUT)/$(GPU_MODS_OUT)/mali.ko
+	@echo "make mali module finished current dir is $(shell pwd)"
+endef
+endif
diff --git a/lib/Android.mk b/lib/Android.mk
new file mode 120000
index 0000000..35cfd1d
--- /dev/null
+++ b/lib/Android.mk
@@ -0,0 +1 @@
+lib.mk
\ No newline at end of file
diff --git a/lib/END_USER_LICENCE_AGREEMENT.txt b/lib/END_USER_LICENCE_AGREEMENT.txt
new file mode 100644
index 0000000..5875318
--- /dev/null
+++ b/lib/END_USER_LICENCE_AGREEMENT.txt
@@ -0,0 +1,194 @@
+LES-PRE-20769
+SP-Version: 1.0
+25 November 2015
+
+END USER LICENCE AGREEMENT FOR THE MALI USERSPACE DRIVER ("Mali DRIVER")
+
+THIS END USER LICENCE AGREEMENT ("LICENCE") IS A LEGAL AGREEMENT
+BETWEEN YOU (EITHER A SINGLE INDIVIDUAL, OR SINGLE LEGAL ENTITY) AND
+ARM LIMITED ("ARM") FOR THE USE OF THE SOFTWARE ACCOMPANYING THIS
+LICENCE. ARM IS ONLY WILLING TO LICENSE THE SOFTWARE TO YOU ON
+CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THIS LICENCE. BY
+INSTALLING OR OTHERWISE USING OR COPYING THE SOFTWARE YOU INDICATE
+THAT YOU AGREE TO BE BOUND BY ALL OF THE TERMS OF THIS LICENCE. IF YOU
+DO NOT AGREE TO THE TERMS OF THIS LICENCE, ARM IS UNWILLING TO LICENSE
+THE SOFTWARE TO YOU AND YOU MAY NOT INSTALL, USE OR COPY THE SOFTWARE,
+AND YOU SHOULD PROMPTLY RETURN THE SOFTWARE TO YOUR SUPPLIER.
+
+"Applications" means applications for use solely in conjunction with
+Mali-based products manufactured under licence from ARM.
+
+"Output" means data resulting from your use of the Software and all
+direct and indirect derivatives thereof.
+
+"Software" means any software, firmware and data accompanying this
+Licence, any printed, electronic or online documentation supplied with
+it under the terms of this Licence for the Mali Driver.
+
+1. LICENCE GRANTS TO YOU.
+
+1.1 ARM hereby grants to you, subject to the terms and conditions of
+this Licence, a non-exclusive, non-transferable, revocable, worldwide
+licence to:
+
+(i)	use and copy the Software or certain components or optional
+	functionality in the Software, as applicable, solely for the
+	purposes of running, designing or developing Applications; and
+
+(ii)	subject to Clause 1.2, distribute the whole of the Software;
+	and/or (b) the whole or any part of the Software together
+	with, or as incorporated into, Applications; and
+
+1.2 If you choose to redistribute the whole or any part of the
+Software pursuant to the licences granted in Clause 1.1(ii), you
+agree: (i) not to use ARM's or any of its licensors names, logos or
+trademarks to market Applications; (ii) to retain any and all
+copyright notices and other notices (whether ARM's or its licensor's)
+which are included with the Software; and (iii) include a copy of this
+Licence with such redistribution.
+
+2. RESTRICTIONS ON USE OF THE SOFTWARE.
+
+BENCHMARKING: This Licence does not prevent you from using the
+Software for benchmarking purposes. However, you shall ensure that any
+and all benchmarking data relating to the Software, and any other
+results of your use or testing of the Software which are indicative of
+its performance, efficacy, reliability or quality, shall not be used
+to disparage ARM, its products or services, or in a manner that, in
+ARM's reasonable judgment, may diminish or otherwise damage the
+reputation of ARM.
+
+COPYRIGHT AND RESERVATION OF RIGHTS: The Software is owned by ARM or
+its licensors and is protected by copyright and other intellectual
+property laws and international treaties. The Software is licensed not
+sold. You acquire no rights to the Software other than as expressly
+provided by this Licence. You shall not remove from the Software any
+copyright notice or other notice and shall ensure that any such notice
+is reproduced in any copies of the whole or any part of the Software
+made by you or other permitted users.
+
+REVERSE ENGINEERING: Except to the extent that such activity is
+permitted by applicable law you shall not reverse engineer, decompile
+or disassemble any of the Software. If the Software was provided to
+you in Europe you shall not reverse engineer, decompile or disassemble
+any of the Software for the purposes of error correction.
+
+RESTRICTED USE: You agree that you shall not use the Software or the
+Output other than pursuant to and in accordance with the exercise of
+any of the licences granted under this Licence. Without limiting the
+generality of the foregoing, you shall not use the Software or any
+Output: (a) for determining if any features, functions or processes
+provided by the Software are covered by any patents or patent
+applications owned by you or a third party; or (b) for developing
+technology, applications or products which avoid any of ARM's
+intellectual property in the Software licensed hereunder; or (c) as a
+reference for modifying existing patents or patent applications or
+creating any continuation, continuation in part, or extension of
+existing patents or patent applications.
+
+3. SUPPORT.
+
+ARM is not under an obligation to provide support, but it may do so at
+its own discretion, and if it does, it will only be in respect of the
+Software as delivered.
+
+4. NO WARRANTIES.
+
+YOU AGREE THAT THE SOFTWARE IS LICENSED "AS IS", AND THAT ARM
+EXPRESSLY DISCLAIMS ALL REPRESENTATIONS, WARRANTIES, CONDITIONS OR
+OTHER TERMS, EXPRESS OR IMPLIED OR STATUTORY, INCLUDING WITHOUT
+LIMITATION THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, SATISFACTORY
+QUALITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+
+YOU EXPRESSLY ASSUME ALL LIABILITIES AND RISKS, FOR USE OR OPERATION
+OF APPLICATIONS, INCLUDING WITHOUT LIMITATION, APPLICATIONS DESIGNED
+OR INTENDED FOR MISSION CRITICAL APPLICATIONS, SUCH AS PACEMAKERS,
+WEAPONRY, AIRCRAFT NAVIGATION, FACTORY CONTROL SYSTEMS, ETC. SHOULD
+THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE ENTIRE COST OF ALL
+NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+5. LIMITATION OF LIABILITY.
+
+TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL
+ARM BE LIABLE FOR ANY INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
+DAMAGES (INCLUDING LOSS OF PROFITS) ARISING OUT OF THE USE OR
+INABILITY TO USE THE SOFTWARE WHETHER BASED ON A CLAIM UNDER CONTRACT,
+TORT OR OTHER LEGAL THEORY, EVEN IF ARM WAS ADVISED OF THE POSSIBILITY
+OF SUCH DAMAGES.
+
+ARM does not seek to limit or exclude liability for death or personal
+injury arising from ARM's negligence or ARM's fraud and because some
+jurisdictions do not permit the exclusion or limitation of liability
+for consequential or incidental damages the above limitation relating
+to liability for consequential damages may not apply to you.
+
+NOTWITHSTANDING ANYTHING TO THE CONTRARY CONTAINED IN THIS LICENCE,
+THE MAXIMUM LIABILITY OF ARM TO YOU IN AGGREGATE FOR ALL CLAIMS MADE
+AGAINST ARM IN CONTRACT TORT OR OTHERWISE UNDER OR IN CONNECTION WITH
+THE SUBJECT MATTER OF THIS LICENCE SHALL NOT EXCEED THE GREATER OF:
+(I) THE TOTAL OF SUMS PAID BY YOU TO ARM (IF ANY) FOR THIS LICENCE;
+AND (II) $10.00 USD. THE EXISTENCE OF MORE THAN ONE CLAIM WILL NOT
+ENLARGE OR EXTEND THE LIMIT.
+
+6. U.S. GOVERNMENT END USERS.
+
+US Government Restrictions: Use, duplication, reproduction, release,
+modification, disclosure or transfer of the Software is restricted in
+accordance with the terms of this Licence.
+
+7. TERM AND TERMINATION.
+
+This Licence shall remain in force until terminated by you or by ARM.
+Without prejudice to any of its other rights if you are in breach of
+any of the terms and conditions of this Licence then ARM may terminate
+this Licence immediately upon giving written notice to you or on
+thirty (30) days written notice without cause. You may terminate this
+Licence at any time. Upon termination of this Licence by you or by ARM
+, you shall stop using the Software and destroy all copies of the
+Software in your possession, together with all documentation and
+related materials. The provisions of clauses 2, 3, 4, 5, 6, 7, and 8
+shall survive termination of this Licence.
+
+8. GENERAL.
+
+This Licence is governed by English Law. Except where ARM agrees
+otherwise in: (i) a written contract signed by you and ARM; or (ii) a
+written contract provided by ARM and accepted by you, this is the only
+agreement between you and ARM relating to the Software and it may only
+be modified by written agreement between you and ARM. Except as
+expressly agreed in writing, this Licence may not be modified by
+purchase orders, advertising or other representation by any person. If
+any clause or sentence in this Licence is held by a court of law to be
+illegal or unenforceable the remaining provisions of this Licence
+shall not be affected thereby. The failure by ARM to enforce any of
+the provisions of this Licence, unless waived in writing, shall not
+constitute a waiver of ARM's rights to enforce such provision or any
+other provision of this Licence in the future.
+
+At ARM's request, you agree to check your computers for installations
+of the Software and any other information requested by ARM relating to
+Software installation and to provide this information to ARM. You
+agree that auditors nominated by ARM may also perform such checking
+and reporting on behalf of ARM by prior appointment during your normal
+business hours on seven (7) days' notice. ARM shall bear the auditors'
+costs for that audit unless it reveals unlicensed usage in which case
+you shall promptly reimburse ARM for all reasonable costs and
+expenses, including professional fees, relating to such audit. Any
+information which is disclosed to ARM or such auditors during checking
+or audit shall be treated as your confidential information and shall
+only be used by ARM for licence management, compliance and enforcement
+purposes.
+
+The Software provided under this Agreement is subject to U.K.,
+European Union, and U.S. export control laws and regulations,
+including the U.S. Export Administration Act and its associated
+regulations (hereafter collectively referred to as "Export
+Regulations").  LICENSEE agrees to comply fully with all such Export
+Regulations and LICENSEE agrees that it shall not, either directly or
+indirectly, export in breach of the Export Regulations, any Software
+received under this Agreement, nor any direct products thereof; (i) to
+any country, company or person subject to export restrictions or
+sanctions under the Export Regulations; or (ii) for any prohibited end
+use, which at the time of export requires an export license or other
+governmental approval, without first obtaining such license or
+approval.
diff --git a/lib/README b/lib/README
new file mode 100644
index 0000000..5c30c35
--- /dev/null
+++ b/lib/README
@@ -0,0 +1,17 @@
+this is Android GPU Library
+
+GPU_TARGET_PLATFORM  is the same as the Kernel.
+   cpu      kernel    android     GPU_TARGET_PLATFORM
+cortex-a9    32bit     32bit          default_7a
+cortex-a53   32bit     32bit          default_7a
+cortex-a53   64bit     32bit          default_8a
+cortex-a53   64bit     64bit          default_8a
+
+GPU_TARGET_PLATFORM?= default_7a
+
+library symbols was put at
+ftp://ftp-china.amlogic.com/mbox/gpu-lib/
+
+ddk git
+
+git clone ssh://android@10.8.9.5/platform/hardware/gpu/mali/ddk -b aml-64bit-r5p1
diff --git a/lib/dvalin_ion/libGLES_mali_default_7a_32-o-r10p0.so b/lib/dvalin_ion/libGLES_mali_default_7a_32-o-r10p0.so
new file mode 120000
index 0000000..d27ac75
--- /dev/null
+++ b/lib/dvalin_ion/libGLES_mali_default_7a_32-o-r10p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-o-r10p0.so
\ No newline at end of file
diff --git a/lib/dvalin_ion/libGLES_mali_default_7a_32-o-r9p0.so b/lib/dvalin_ion/libGLES_mali_default_7a_32-o-r9p0.so
new file mode 120000
index 0000000..b424238
--- /dev/null
+++ b/lib/dvalin_ion/libGLES_mali_default_7a_32-o-r9p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-o-r9p0.so
\ No newline at end of file
diff --git a/lib/dvalin_ion/libGLES_mali_default_8a_32-o-r10p0.so b/lib/dvalin_ion/libGLES_mali_default_8a_32-o-r10p0.so
new file mode 100755
index 0000000..c90549b
--- /dev/null
+++ b/lib/dvalin_ion/libGLES_mali_default_8a_32-o-r10p0.so
Binary files differ
diff --git a/lib/dvalin_ion/libGLES_mali_default_8a_32-o-r9p0.so b/lib/dvalin_ion/libGLES_mali_default_8a_32-o-r9p0.so
new file mode 100644
index 0000000..d7e60ec
--- /dev/null
+++ b/lib/dvalin_ion/libGLES_mali_default_8a_32-o-r9p0.so
Binary files differ
diff --git a/lib/dvalin_ion/libGLES_mali_default_8a_64-o-r10p0.so b/lib/dvalin_ion/libGLES_mali_default_8a_64-o-r10p0.so
new file mode 100755
index 0000000..8aae6e1
--- /dev/null
+++ b/lib/dvalin_ion/libGLES_mali_default_8a_64-o-r10p0.so
Binary files differ
diff --git a/lib/dvalin_ion/libGLES_mali_default_8a_64-o-r9p0.so b/lib/dvalin_ion/libGLES_mali_default_8a_64-o-r9p0.so
new file mode 100644
index 0000000..1460631
--- /dev/null
+++ b/lib/dvalin_ion/libGLES_mali_default_8a_64-o-r9p0.so
Binary files differ
diff --git a/lib/lib.mk b/lib/lib.mk
new file mode 100755
index 0000000..136ca45
--- /dev/null
+++ b/lib/lib.mk
@@ -0,0 +1,77 @@
+
+MALI_LIB_PREBUILT=true
+#build in hardware/amlogic/ddk
+ifneq (,$(wildcard vendor/arm))
+MALI_LIB_PREBUILT=false
+endif
+ifneq (,$(wildcard hardware/amlogic/ddk))
+MALI_LIB_PREBUILT=false
+endif
+#build in hardware/arm/gpu/ddk
+ifneq (,$(wildcard hardware/arm/gpu/ddk))
+MALI_LIB_PREBUILT=false
+endif
+ifneq (,$(wildcard vendor/arm/t83x))
+MALI_LIB_PREBUILT=false
+endif
+#already in hardware/arm/gpu/lib
+
+ifeq ($(MALI_LIB_PREBUILT),true)
+LOCAL_PATH:= $(call my-dir)
+include $(CLEAR_VARS)
+
+TARGET:=$(GPU_TYPE)
+TARGET?=mali400
+ifeq ($(USING_MALI450), true)
+TARGET=mali450
+endif
+
+TARGET:=$(TARGET)_ion
+GPU_TARGET_PLATFORM ?= default_7a
+$(info Android.mk GPU_DRV_VERSION is ${GPU_DRV_VERSION})
+ifeq ($(GPU_ARCH),midgard)
+GPU_DRV_VERSION?=r11p0
+else
+GPU_DRV_VERSION?=r6p1
+endif
+
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 26 && echo OK),OK)
+LOCAL_ANDROID_VERSION_NUM:=o-${GPU_DRV_VERSION}
+else
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 24 && echo OK),OK)
+LOCAL_ANDROID_VERSION_NUM:=n-${GPU_DRV_VERSION}
+else
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 23 && echo OK),OK)
+LOCAL_ANDROID_VERSION_NUM:=m-${GPU_DRV_VERSION}
+else
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 22 && echo OK),OK)
+LOCAL_ANDROID_VERSION_NUM:=l-${GPU_DRV_VERSION}
+else
+LOCAL_ANDROID_VERSION_NUM:=k-${GPU_DRV_VERSION}
+endif
+endif
+endif
+endif
+LOCAL_MODULE := libGLES_mali
+LOCAL_MULTILIB := both
+LOCAL_MODULE_SUFFIX := .so
+LOCAL_MODULE_TAGS := optional
+LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 26 && echo OK),OK)
+LOCAL_MODULE_PATH    := $(TARGET_OUT_VENDOR)/egl
+LOCAL_MODULE_PATH_32 := $(TARGET_OUT_VENDOR)/lib/egl
+LOCAL_MODULE_PATH_64 := $(TARGET_OUT_VENDOR)/lib64/egl
+else
+LOCAL_MODULE_PATH    := $(TARGET_OUT_SHARED_LIBRARIES)/egl
+LOCAL_MODULE_PATH_32 := $(TARGET_OUT)/lib/egl
+LOCAL_MODULE_PATH_64 := $(TARGET_OUT)/lib64/egl
+endif
+ifeq ($(TARGET_2ND_ARCH),)
+LOCAL_SRC_FILES    	 := $(TARGET)/libGLES_mali_$(GPU_TARGET_PLATFORM)_32-$(LOCAL_ANDROID_VERSION_NUM).so
+else
+LOCAL_SRC_FILES_32   := $(TARGET)/libGLES_mali_$(GPU_TARGET_PLATFORM)_32-$(LOCAL_ANDROID_VERSION_NUM).so
+LOCAL_SRC_FILES_64	 := $(TARGET)/libGLES_mali_$(GPU_TARGET_PLATFORM)_64-$(LOCAL_ANDROID_VERSION_NUM).so
+endif
+include $(BUILD_PREBUILT)
+
+endif
diff --git a/lib/mali450_ion/libGLES_mali_default_7a_32-k-r5p1.so b/lib/mali450_ion/libGLES_mali_default_7a_32-k-r5p1.so
new file mode 100644
index 0000000..56f051a
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_7a_32-k-r5p1.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_7a_32-k-r6p1.so b/lib/mali450_ion/libGLES_mali_default_7a_32-k-r6p1.so
new file mode 100644
index 0000000..e73bf7e
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_7a_32-k-r6p1.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_7a_32-l-r6p1.so b/lib/mali450_ion/libGLES_mali_default_7a_32-l-r6p1.so
new file mode 120000
index 0000000..c11bbb7
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_7a_32-l-r6p1.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-l-r6p1.so
\ No newline at end of file
diff --git a/lib/mali450_ion/libGLES_mali_default_7a_32-m-r6p1.so b/lib/mali450_ion/libGLES_mali_default_7a_32-m-r6p1.so
new file mode 120000
index 0000000..02339f8
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_7a_32-m-r6p1.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r6p1.so
\ No newline at end of file
diff --git a/lib/mali450_ion/libGLES_mali_default_7a_32-m-r7p0.so b/lib/mali450_ion/libGLES_mali_default_7a_32-m-r7p0.so
new file mode 120000
index 0000000..e5bb71f
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_7a_32-m-r7p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r7p0.so
\ No newline at end of file
diff --git a/lib/mali450_ion/libGLES_mali_default_7a_32-n-r7p0.so b/lib/mali450_ion/libGLES_mali_default_7a_32-n-r7p0.so
new file mode 120000
index 0000000..079b4b0
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_7a_32-n-r7p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-n-r7p0.so
\ No newline at end of file
diff --git a/lib/mali450_ion/libGLES_mali_default_7a_32-o-r8p0.so b/lib/mali450_ion/libGLES_mali_default_7a_32-o-r8p0.so
new file mode 120000
index 0000000..6853e0d
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_7a_32-o-r8p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-o-r8p0.so
\ No newline at end of file
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_32-l-r6p1.so b/lib/mali450_ion/libGLES_mali_default_8a_32-l-r6p1.so
new file mode 100644
index 0000000..23dfd32
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_32-l-r6p1.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_32-l-r7p0.so b/lib/mali450_ion/libGLES_mali_default_8a_32-l-r7p0.so
new file mode 100644
index 0000000..502739a
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_32-l-r7p0.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_32-m-r6p1.so b/lib/mali450_ion/libGLES_mali_default_8a_32-m-r6p1.so
new file mode 100644
index 0000000..1ced703
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_32-m-r6p1.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_32-m-r7p0.so b/lib/mali450_ion/libGLES_mali_default_8a_32-m-r7p0.so
new file mode 100644
index 0000000..291dda0
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_32-m-r7p0.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_32-n-r7p0.so b/lib/mali450_ion/libGLES_mali_default_8a_32-n-r7p0.so
new file mode 100644
index 0000000..a66cf47
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_32-n-r7p0.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_32-o-r8p0.so b/lib/mali450_ion/libGLES_mali_default_8a_32-o-r8p0.so
new file mode 100644
index 0000000..c805d1b
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_32-o-r8p0.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_64-l-r6p1.so b/lib/mali450_ion/libGLES_mali_default_8a_64-l-r6p1.so
new file mode 100644
index 0000000..35cf6b0
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_64-l-r6p1.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_64-l-r7p0.so b/lib/mali450_ion/libGLES_mali_default_8a_64-l-r7p0.so
new file mode 100644
index 0000000..4a4fcac
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_64-l-r7p0.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_64-m-r6p1.so b/lib/mali450_ion/libGLES_mali_default_8a_64-m-r6p1.so
new file mode 100644
index 0000000..aaa44d2
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_64-m-r6p1.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_64-m-r7p0.so b/lib/mali450_ion/libGLES_mali_default_8a_64-m-r7p0.so
new file mode 100644
index 0000000..c216162
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_64-m-r7p0.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_64-n-r7p0.so b/lib/mali450_ion/libGLES_mali_default_8a_64-n-r7p0.so
new file mode 100644
index 0000000..8d0904c
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_64-n-r7p0.so
Binary files differ
diff --git a/lib/mali450_ion/libGLES_mali_default_8a_64-o-r8p0.so b/lib/mali450_ion/libGLES_mali_default_8a_64-o-r8p0.so
new file mode 100644
index 0000000..f07f081
--- /dev/null
+++ b/lib/mali450_ion/libGLES_mali_default_8a_64-o-r8p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_7a_32-k-r11p0.so b/lib/t82x_ion/libGLES_mali_default_7a_32-k-r11p0.so
new file mode 120000
index 0000000..fc7d316
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_7a_32-k-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-k-r11p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_7a_32-l-r11p0.so b/lib/t82x_ion/libGLES_mali_default_7a_32-l-r11p0.so
new file mode 120000
index 0000000..de29fa0
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_7a_32-l-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-l-r11p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_7a_32-l-r12p0.so b/lib/t82x_ion/libGLES_mali_default_7a_32-l-r12p0.so
new file mode 120000
index 0000000..ab2d10b
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_7a_32-l-r12p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-l-r12p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_7a_32-l-r13p0.so b/lib/t82x_ion/libGLES_mali_default_7a_32-l-r13p0.so
new file mode 120000
index 0000000..45c4f15
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_7a_32-l-r13p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-l-r13p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_7a_32-m-r11p0.so b/lib/t82x_ion/libGLES_mali_default_7a_32-m-r11p0.so
new file mode 120000
index 0000000..3401fcd
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_7a_32-m-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r11p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_7a_32-m-r12p0.so b/lib/t82x_ion/libGLES_mali_default_7a_32-m-r12p0.so
new file mode 120000
index 0000000..01d52fb
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_7a_32-m-r12p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r12p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_7a_32-m-r16p0.so b/lib/t82x_ion/libGLES_mali_default_7a_32-m-r16p0.so
new file mode 120000
index 0000000..a57a1c1
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_7a_32-m-r16p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r16p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_7a_32-n-r11p0.so b/lib/t82x_ion/libGLES_mali_default_7a_32-n-r11p0.so
new file mode 120000
index 0000000..9f89f41
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_7a_32-n-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_7a_32-m-r11p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_7a_32-n-r16p0.so b/lib/t82x_ion/libGLES_mali_default_7a_32-n-r16p0.so
new file mode 120000
index 0000000..8b4bede
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_7a_32-n-r16p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-n-r16p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-k-r11p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-k-r11p0.so
new file mode 100644
index 0000000..1d67516
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-k-r11p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-l-r11p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-l-r11p0.so
new file mode 100644
index 0000000..61b0176
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-l-r11p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-l-r12p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-l-r12p0.so
new file mode 100644
index 0000000..52942da
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-l-r12p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-l-r13p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-l-r13p0.so
new file mode 100644
index 0000000..2375861
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-l-r13p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-m-r11p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-m-r11p0.so
new file mode 100644
index 0000000..08e62df
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-m-r11p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-m-r12p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-m-r12p0.so
new file mode 100644
index 0000000..52858f7
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-m-r12p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-m-r13p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-m-r13p0.so
new file mode 100644
index 0000000..e1a157a
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-m-r13p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-m-r16p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-m-r16p0.so
new file mode 100644
index 0000000..bd5d560
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-m-r16p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-n-r11p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-n-r11p0.so
new file mode 120000
index 0000000..3401fcd
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-n-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r11p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-n-r16p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-n-r16p0.so
new file mode 100644
index 0000000..d39af2c
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-n-r16p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-o-r21p0.so b/lib/t82x_ion/libGLES_mali_default_8a_32-o-r21p0.so
new file mode 100644
index 0000000..b328603
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_32-o-r21p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_64-l-r11p0.so b/lib/t82x_ion/libGLES_mali_default_8a_64-l-r11p0.so
new file mode 100644
index 0000000..8a264d8
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_64-l-r11p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_64-l-r12p0.so b/lib/t82x_ion/libGLES_mali_default_8a_64-l-r12p0.so
new file mode 100644
index 0000000..48a5f6f
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_64-l-r12p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_64-l-r13p0.so b/lib/t82x_ion/libGLES_mali_default_8a_64-l-r13p0.so
new file mode 100644
index 0000000..271d8b6
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_64-l-r13p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_64-m-r11p0.so b/lib/t82x_ion/libGLES_mali_default_8a_64-m-r11p0.so
new file mode 100644
index 0000000..835a5b9
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_64-m-r11p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_64-m-r12p0.so b/lib/t82x_ion/libGLES_mali_default_8a_64-m-r12p0.so
new file mode 120000
index 0000000..01d52fb
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_64-m-r12p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r12p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_64-m-r14p0.so b/lib/t82x_ion/libGLES_mali_default_8a_64-m-r14p0.so
new file mode 100644
index 0000000..a660528
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_64-m-r14p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_64-m-r16p0.so b/lib/t82x_ion/libGLES_mali_default_8a_64-m-r16p0.so
new file mode 100644
index 0000000..70ec2ae
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_64-m-r16p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_64-n-r11p0.so b/lib/t82x_ion/libGLES_mali_default_8a_64-n-r11p0.so
new file mode 120000
index 0000000..e79d849
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_64-n-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_64-m-r11p0.so
\ No newline at end of file
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_64-n-r16p0.so b/lib/t82x_ion/libGLES_mali_default_8a_64-n-r16p0.so
new file mode 100644
index 0000000..7ad3cb6
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_64-n-r16p0.so
Binary files differ
diff --git a/lib/t82x_ion/libGLES_mali_default_8a_64-o-r21p0.so b/lib/t82x_ion/libGLES_mali_default_8a_64-o-r21p0.so
new file mode 100644
index 0000000..46666a2
--- /dev/null
+++ b/lib/t82x_ion/libGLES_mali_default_8a_64-o-r21p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_7a_32-l-r11p0.so b/lib/t83x_ion/libGLES_mali_default_7a_32-l-r11p0.so
new file mode 120000
index 0000000..de29fa0
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_7a_32-l-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-l-r11p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_7a_32-l-r12p0.so b/lib/t83x_ion/libGLES_mali_default_7a_32-l-r12p0.so
new file mode 120000
index 0000000..ab2d10b
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_7a_32-l-r12p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-l-r12p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_7a_32-l-r13p0.so b/lib/t83x_ion/libGLES_mali_default_7a_32-l-r13p0.so
new file mode 120000
index 0000000..45c4f15
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_7a_32-l-r13p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-l-r13p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_7a_32-m-r11p0.so b/lib/t83x_ion/libGLES_mali_default_7a_32-m-r11p0.so
new file mode 120000
index 0000000..3401fcd
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_7a_32-m-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r11p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_7a_32-m-r12p0.so b/lib/t83x_ion/libGLES_mali_default_7a_32-m-r12p0.so
new file mode 120000
index 0000000..01d52fb
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_7a_32-m-r12p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r12p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_7a_32-m-r16p0.so b/lib/t83x_ion/libGLES_mali_default_7a_32-m-r16p0.so
new file mode 120000
index 0000000..a57a1c1
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_7a_32-m-r16p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r16p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_7a_32-n-r11p0.so b/lib/t83x_ion/libGLES_mali_default_7a_32-n-r11p0.so
new file mode 120000
index 0000000..9f89f41
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_7a_32-n-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_7a_32-m-r11p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_7a_32-n-r16p0.so b/lib/t83x_ion/libGLES_mali_default_7a_32-n-r16p0.so
new file mode 120000
index 0000000..8b4bede
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_7a_32-n-r16p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-n-r16p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-l-r11p0.so b/lib/t83x_ion/libGLES_mali_default_8a_32-l-r11p0.so
new file mode 100644
index 0000000..d36e31d
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_32-l-r11p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-l-r12p0.so b/lib/t83x_ion/libGLES_mali_default_8a_32-l-r12p0.so
new file mode 100644
index 0000000..13a7656
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_32-l-r12p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-l-r13p0.so b/lib/t83x_ion/libGLES_mali_default_8a_32-l-r13p0.so
new file mode 100644
index 0000000..4f4258a
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_32-l-r13p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-m-r11p0.so b/lib/t83x_ion/libGLES_mali_default_8a_32-m-r11p0.so
new file mode 100644
index 0000000..4537e5a
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_32-m-r11p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-m-r12p0.so b/lib/t83x_ion/libGLES_mali_default_8a_32-m-r12p0.so
new file mode 100644
index 0000000..13fbfcc
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_32-m-r12p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-m-r13p0.so b/lib/t83x_ion/libGLES_mali_default_8a_32-m-r13p0.so
new file mode 100644
index 0000000..af71a1a
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_32-m-r13p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-m-r16p0.so b/lib/t83x_ion/libGLES_mali_default_8a_32-m-r16p0.so
new file mode 100644
index 0000000..f64f168
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_32-m-r16p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-n-r11p0.so b/lib/t83x_ion/libGLES_mali_default_8a_32-n-r11p0.so
new file mode 120000
index 0000000..3401fcd
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_32-n-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r11p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-n-r16p0.so b/lib/t83x_ion/libGLES_mali_default_8a_32-n-r16p0.so
new file mode 100644
index 0000000..b649dba
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_32-n-r16p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-o-r21p0.so b/lib/t83x_ion/libGLES_mali_default_8a_32-o-r21p0.so
new file mode 100644
index 0000000..8c70bce
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_32-o-r21p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-l-r11p0.so b/lib/t83x_ion/libGLES_mali_default_8a_64-l-r11p0.so
new file mode 100644
index 0000000..521a2d5
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_64-l-r11p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-l-r12p0.so b/lib/t83x_ion/libGLES_mali_default_8a_64-l-r12p0.so
new file mode 100644
index 0000000..77a72f1
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_64-l-r12p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-l-r13p0.so b/lib/t83x_ion/libGLES_mali_default_8a_64-l-r13p0.so
new file mode 100644
index 0000000..2c1236e
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_64-l-r13p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-m-r11p0.so b/lib/t83x_ion/libGLES_mali_default_8a_64-m-r11p0.so
new file mode 100644
index 0000000..2b1687a
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_64-m-r11p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-m-r12p0.so b/lib/t83x_ion/libGLES_mali_default_8a_64-m-r12p0.so
new file mode 120000
index 0000000..01d52fb
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_64-m-r12p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_32-m-r12p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-m-r14p0.so b/lib/t83x_ion/libGLES_mali_default_8a_64-m-r14p0.so
new file mode 100644
index 0000000..0635244
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_64-m-r14p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-m-r16p0.so b/lib/t83x_ion/libGLES_mali_default_8a_64-m-r16p0.so
new file mode 100644
index 0000000..829aebf
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_64-m-r16p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-n-r11p0.so b/lib/t83x_ion/libGLES_mali_default_8a_64-n-r11p0.so
new file mode 120000
index 0000000..e79d849
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_64-n-r11p0.so
@@ -0,0 +1 @@
+libGLES_mali_default_8a_64-m-r11p0.so
\ No newline at end of file
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-n-r16p0.so b/lib/t83x_ion/libGLES_mali_default_8a_64-n-r16p0.so
new file mode 100644
index 0000000..8246a84
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_64-n-r16p0.so
Binary files differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-o-r21p0.so b/lib/t83x_ion/libGLES_mali_default_8a_64-o-r21p0.so
new file mode 100644
index 0000000..ff7ce4a
--- /dev/null
+++ b/lib/t83x_ion/libGLES_mali_default_8a_64-o-r21p0.so
Binary files differ
diff --git a/mali/.version b/mali/.version
new file mode 100755
index 0000000..f84a6cb
--- /dev/null
+++ b/mali/.version
@@ -0,0 +1 @@
+r8p0-01rel0
diff --git a/mali/Kbuild b/mali/Kbuild
new file mode 100755
index 0000000..a2f8357
--- /dev/null
+++ b/mali/Kbuild
@@ -0,0 +1,262 @@
+#
+# Copyright (C) 2010-2011 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+# This file is called by the Linux build system.
+
+# set up defaults if not defined by the user
+include $(src)/platform/Kbuild.amlogic
+
+TIMESTAMP ?= default
+OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB ?= 16
+USING_GPU_UTILIZATION ?= 0
+PROFILING_SKIP_PP_JOBS ?= 0
+PROFILING_SKIP_PP_AND_GP_JOBS ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP ?= 0
+MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS ?= 0
+MALI_UPPER_HALF_SCHEDULING ?= 1
+MALI_ENABLE_CPU_CYCLES ?= 0
+
+# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases:
+# The ARM proprietary product will only include the license/proprietary directory
+# The GPL product will only include the license/gpl directory
+ifeq ($(wildcard $(src)/linux/license/gpl/*),)
+    ccflags-y += -I$(src)/linux/license/proprietary
+    ifeq ($(CONFIG_MALI400_PROFILING),y)
+        $(error Profiling is incompatible with non-GPL license)
+    endif
+    ifeq ($(CONFIG_PM_RUNTIME),y)
+        $(error Runtime PM is incompatible with non-GPL license)
+    endif
+    ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
+        $(error DMA-BUF is incompatible with non-GPL license)
+    endif
+    $(error Linux Device integration is incompatible with non-GPL license)
+else
+    ccflags-y += -I$(src)/linux/license/gpl
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+ifeq ($(MALI_PLATFORM_FILES),)
+ifeq ($(CONFIG_ARCH_EXYNOS4),y)
+EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+export MALI_PLATFORM=exynos4
+export MALI_PLATFORM_FILES_BUILDIN = $(notdir $(wildcard $(src)/platform/$(MALI_PLATFORM)/*.c))
+export MALI_PLATFORM_FILES_ADD_PREFIX = $(addprefix platform/$(MALI_PLATFORM)/,$(MALI_PLATFORM_FILES_BUILDIN)) 
+endif
+endif
+
+mali-y += \
+	linux/mali_osk_atomics.o \
+	linux/mali_osk_irq.o \
+	linux/mali_osk_wq.o \
+	linux/mali_osk_locks.o \
+	linux/mali_osk_wait_queue.o \
+	linux/mali_osk_low_level_mem.o \
+	linux/mali_osk_math.o \
+	linux/mali_osk_memory.o \
+	linux/mali_osk_misc.o \
+	linux/mali_osk_mali.o \
+	linux/mali_osk_notification.o \
+	linux/mali_osk_time.o \
+	linux/mali_osk_timers.o \
+	linux/mali_osk_bitmap.o
+
+mali-y += linux/mali_memory.o linux/mali_memory_os_alloc.o
+mali-y += linux/mali_memory_external.o
+mali-y += linux/mali_memory_block_alloc.o
+mali-y += linux/mali_memory_swap_alloc.o
+
+mali-y += \
+	linux/mali_memory_manager.o \
+	linux/mali_memory_virtual.o \
+	linux/mali_memory_util.o \
+	linux/mali_memory_cow.o \
+	linux/mali_memory_defer_bind.o
+
+mali-y += \
+	linux/mali_ukk_mem.o \
+	linux/mali_ukk_gp.o \
+	linux/mali_ukk_pp.o \
+	linux/mali_ukk_core.o \
+	linux/mali_ukk_soft_job.o \
+	linux/mali_ukk_timeline.o
+
+mali-$(CONFIG_MALI_DEVFREQ) += \
+	linux/mali_devfreq.o \
+	common/mali_pm_metrics.o
+
+# Source files which always are included in a build
+mali-y += \
+	common/mali_kernel_core.o \
+	linux/mali_kernel_linux.o \
+	common/mali_session.o \
+	linux/mali_device_pause_resume.o \
+	common/mali_kernel_vsync.o \
+	linux/mali_ukk_vsync.o \
+	linux/mali_kernel_sysfs.o \
+	common/mali_mmu.o \
+	common/mali_mmu_page_directory.o \
+	common/mali_mem_validation.o \
+	common/mali_hw_core.o \
+	common/mali_gp.o \
+	common/mali_pp.o \
+	common/mali_pp_job.o \
+	common/mali_gp_job.o \
+	common/mali_soft_job.o \
+	common/mali_scheduler.o \
+	common/mali_executor.o \
+	common/mali_group.o \
+	common/mali_dlbu.o \
+	common/mali_broadcast.o \
+	common/mali_pm.o \
+	common/mali_pmu.o \
+	common/mali_user_settings_db.o \
+	common/mali_kernel_utilization.o \
+	common/mali_control_timer.o \
+	common/mali_l2_cache.o \
+	common/mali_timeline.o \
+	common/mali_timeline_fence_wait.o \
+	common/mali_timeline_sync_fence.o \
+	common/mali_spinlock_reentrant.o \
+	common/mali_pm_domain.o \
+	linux/mali_osk_pm.o \
+	linux/mali_pmu_power_up_down.o \
+	__malidrv_build_info.o
+
+ifneq ($(wildcard $(src)/linux/mali_slp_global_lock.c),)
+	mali-y += linux/mali_slp_global_lock.o
+endif
+
+ifneq ($(MALI_PLATFORM_FILES),)
+	mali-y += $(MALI_PLATFORM_FILES:.c=.o)
+endif
+
+ifneq ($(MALI_PLATFORM_FILES_ADD_PREFIX),)
+	mali-y += $(MALI_PLATFORM_FILES_ADD_PREFIX:.c=.o)
+endif
+
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_ukk_profiling.o
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_osk_profiling.o
+
+mali-$(CONFIG_MALI400_INTERNAL_PROFILING) += linux/mali_profiling_internal.o timestamp-$(TIMESTAMP)/mali_timestamp.o
+ccflags-$(CONFIG_MALI400_INTERNAL_PROFILING) += -I$(src)/timestamp-$(TIMESTAMP)
+
+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_dma_buf.o
+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_secure.o
+mali-$(CONFIG_SYNC) += linux/mali_sync.o
+mali-$(CONFIG_SYNC) += linux/mali_internal_sync.o
+mali-$(CONFIG_SYNC_FILE) += linux/mali_sync.o
+mali-$(CONFIG_SYNC_FILE) += linux/mali_internal_sync.o
+mali-$(CONFIG_MALI_DMA_BUF_FENCE) += linux/mali_dma_fence.o
+ccflags-$(CONFIG_SYNC) += -Idrivers/staging/android
+ccflags-$(CONFIG_SYNC_FILE) += -Idrivers/staging/android
+
+mali-$(CONFIG_MALI400_UMP) += linux/mali_memory_ump.o
+
+mali-$(CONFIG_MALI_DVFS) += common/mali_dvfs_policy.o
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI400) := mali.o
+
+ccflags-y += $(EXTRA_DEFINES)
+
+# Set up our defines, which will be passed to gcc
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP)
+ccflags-y += -DMALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED=$(MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED)
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS)
+ifdef CONFIG_MALI400_DEBUG
+ccflags-y += -DMALI_STATE_TRACKING=1
+else
+ccflags-y += -DMALI_STATE_TRACKING=0
+endif
+ccflags-y += -DMALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+ccflags-y += -DUSING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+ccflags-y += -DMALI_ENABLE_CPU_CYCLES=$(MALI_ENABLE_CPU_CYCLES)
+
+ifeq ($(MALI_UPPER_HALF_SCHEDULING),1)
+	ccflags-y += -DMALI_UPPER_HALF_SCHEDULING
+endif
+
+#build-in include path is different
+ifeq ($(MALI_PLATFORM_FILES),)
+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../ump/include/
+else
+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../../ump/include/ump
+endif
+ccflags-$(CONFIG_MALI400_DEBUG) += -DDEBUG
+
+# Use our defines when compiling
+ccflags-y += -I$(src) -I$(src)/include -I$(src)/common -I$(src)/linux -I$(src)/platform
+
+# Get subversion revision number, fall back to only ${MALI_RELEASE_NAME} if no svn info is available
+MALI_RELEASE_NAME=$(shell cat $(src)/.version 2> /dev/null)
+
+SVN_INFO = (cd $(src); svn info 2>/dev/null)
+
+ifneq ($(shell $(SVN_INFO) 2>/dev/null),)
+# SVN detected
+SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null)
+DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV)
+CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-)
+CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-)
+REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-)
+
+else # SVN
+GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null)
+ifneq ($(GIT_REV),)
+# Git detected
+DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV)
+CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci")
+CHANGED_REVISION := $(GIT_REV)
+REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null)
+
+else # Git
+# No Git or SVN detected
+DRIVER_REV := $(MALI_RELEASE_NAME)
+CHANGE_DATE := $(MALI_RELEASE_NAME)
+CHANGED_REVISION := $(MALI_RELEASE_NAME)
+endif
+endif
+
+ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\"
+
+VERSION_STRINGS :=
+VERSION_STRINGS += API_VERSION=$(shell cd $(src); grep "\#define _MALI_API_VERSION" $(FILES_PREFIX)include/linux/mali/mali_utgard_uk_types.h | cut -d' ' -f 3 )
+VERSION_STRINGS += REPO_URL=$(REPO_URL)
+VERSION_STRINGS += REVISION=$(DRIVER_REV)
+VERSION_STRINGS += CHANGED_REVISION=$(CHANGED_REVISION)
+VERSION_STRINGS += CHANGE_DATE=$(CHANGE_DATE)
+VERSION_STRINGS += BUILD_DATE=$(shell date)
+ifdef CONFIG_MALI400_DEBUG
+VERSION_STRINGS += BUILD=debug
+else
+VERSION_STRINGS += BUILD=release
+endif
+VERSION_STRINGS += TARGET_PLATFORM=$(TARGET_PLATFORM)
+VERSION_STRINGS += MALI_PLATFORM=$(MALI_PLATFORM)
+VERSION_STRINGS += KDIR=$(KDIR)
+VERSION_STRINGS += OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+VERSION_STRINGS += USING_UMP=$(CONFIG_MALI400_UMP)
+VERSION_STRINGS += USING_PROFILING=$(CONFIG_MALI400_PROFILING)
+VERSION_STRINGS += USING_INTERNAL_PROFILING=$(CONFIG_MALI400_INTERNAL_PROFILING)
+VERSION_STRINGS += USING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+VERSION_STRINGS += USING_DVFS=$(CONFIG_MALI_DVFS)
+VERSION_STRINGS += USING_DMA_BUF_FENCE = $(CONFIG_MALI_DMA_BUF_FENCE)
+VERSION_STRINGS += MALI_UPPER_HALF_SCHEDULING=$(MALI_UPPER_HALF_SCHEDULING)
+
+# Create file with Mali driver configuration
+$(src)/__malidrv_build_info.c:
+	@echo 'const char *__malidrv_build_info(void) { return "malidrv: $(VERSION_STRINGS)";}' > $(src)/__malidrv_build_info.c
diff --git a/mali/Kconfig b/mali/Kconfig
new file mode 100755
index 0000000..fbf08de
--- /dev/null
+++ b/mali/Kconfig
@@ -0,0 +1,136 @@
+menu "Mali GPU OpenGL device driver"
+config MALI400
+	tristate "Mali-300/400/450 support"
+	depends on ARM || ARM64
+	default m
+	select DMA_SHARED_BUFFER
+	---help---
+	  This enables support for the ARM Mali-300, Mali-400, and Mali-450
+	  GPUs.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called mali.
+
+config MALI470
+	bool "Enable Mali-470 support"
+	depends on MALI400
+	---help---
+	  This enables support for Mali-470 specific features.
+
+config MALI400_DEBUG
+	bool "Enable debug in Mali driver"
+	depends on MALI400
+	default n
+	---help---
+	  This enabled extra debug checks and messages in the Mali driver.
+
+config MALI400_PROFILING_EXTRA_SUPPORT
+	bool "Select other items in kernel to support PROFILING."
+	depends on MALI400_PROFILING
+	select PROFILING
+	select FTRACE
+	select PERF_EVENTS
+	select ENABLE_DEFAULT_TRACERS
+	select DEBUG_MUTEXES
+	select HIGH_RES_TIMERS
+	select HW_PERF_EVENTS
+	select CPU_FREQ
+	select MALI400_DEBUG
+
+config MALI400_PROFILING
+	bool "Enable Mali profiling"
+	depends on MALI400
+	select TRACEPOINTS
+	default n
+	---help---
+	  This enables gator profiling of Mali GPU events.
+
+config MALI400_INTERNAL_PROFILING
+	bool "Enable internal Mali profiling API"
+	depends on MALI400_PROFILING
+	default n
+	---help---
+	  This enables the internal legacy Mali profiling API.
+
+config MALI400_UMP
+	bool "Enable UMP support"
+	depends on MALI400
+	---help---
+	  This enables support for the UMP memory sharing API in the Mali driver.
+
+config MALI_DVFS
+	bool "Enable Mali dynamically frequency change"
+	depends on MALI400 && !MALI_DEVFREQ
+	default n
+	---help---
+	  This enables support for dynamic change frequency of Mali with the goal of lowering power consumption.
+
+config MALI_DMA_BUF_MAP_ON_ATTACH
+	bool "Map dma-buf attachments on attach"
+	depends on MALI400 && DMA_SHARED_BUFFER
+	default y
+	---help---
+	  This makes the Mali driver map dma-buf attachments after doing
+	  attach. If this is not set the dma-buf attachments will be mapped for
+	  every time the GPU need to access the buffer.
+
+	  Mapping for each access can cause lower performance.
+
+config MALI_SHARED_INTERRUPTS
+	bool "Support for shared interrupts"
+	depends on MALI400
+	default n
+	---help---
+	  Adds functionality required to properly support shared interrupts.  Without this support,
+	  the device driver will fail during insmod if it detects shared interrupts.  This also
+	  works when the GPU is not using shared interrupts, but might have a slight performance
+	  impact.
+
+if ARCH_MESON6
+config	MESON6_GPU_EXTRA
+	bool "M6 fix"
+	depends on MALI400
+	default y
+	select MALI_SHARED_INTERRUPTS
+endif
+
+config MALI_PMU_PARALLEL_POWER_UP
+	bool "Power up Mali PMU domains in parallel"
+	depends on MALI400
+	default n
+	---help---
+	  This makes the Mali driver power up all PMU power domains in parallel, instead of
+	  powering up domains one by one, with a slight delay in between. Powering on all power
+	  domains at the same time may cause peak currents higher than what some systems can handle.
+	  These systems must not enable this option.
+
+config MALI_DT
+	bool "Using device tree to initialize module"
+	depends on MALI400 && OF
+	default n
+	---help---
+	  This enable the Mali driver to choose the device tree path to get platform resoures
+	  and disable the old config method. Mali driver could run on the platform which the
+	  device tree is enabled in kernel and corresponding hardware description is implemented
+	  properly in device DTS file.
+
+config MALI_DEVFREQ
+	bool "Using devfreq to tuning frequency"
+	depends on MALI400 && PM_DEVFREQ
+	default n
+	---help---
+	Support devfreq for Mali.
+
+	Using the devfreq framework and, by default, the simpleondemand
+	governor, the frequency of Mali will be dynamically selected from the
+	available OPPs.
+
+config MALI_QUIET
+	bool "Make Mali driver very quiet"
+	depends on MALI400 && !MALI400_DEBUG
+	default n
+	---help---
+	  This forces the Mali driver to never print any messages.
+
+	  If unsure, say N.
+endmenu
diff --git a/mali/Makefile b/mali/Makefile
new file mode 100755
index 0000000..5a259fe
--- /dev/null
+++ b/mali/Makefile
@@ -0,0 +1,216 @@
+#
+# Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+USE_UMPV2=0
+USING_PROFILING ?= 1
+USING_INTERNAL_PROFILING ?= 0
+USING_DVFS ?= 1
+USING_DMA_BUF_FENCE ?= 0
+MALI_HEATMAPS_ENABLED ?= 0
+MALI_DMA_BUF_MAP_ON_ATTACH ?= 1
+MALI_DMA_BUF_LAZY_MAP ?= 0
+MALI_PMU_PARALLEL_POWER_UP ?= 0
+USING_DT ?= 0
+MALI_MEM_SWAP_TRACKING ?= 0
+USING_DEVFREQ ?= 0
+
+# The Makefile sets up "arch" based on the CONFIG, creates the version info
+# string and the __malidrv_build_info.c file, and then call the Linux build
+# system to actually build the driver. After that point the Kbuild file takes
+# over.
+
+# set up defaults if not defined by the user
+ARCH ?= arm
+
+OSKOS=linux
+FILES_PREFIX=
+
+check_cc2 = \
+	$(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \
+	then \
+		echo "$(2)"; \
+	else \
+		echo "$(3)"; \
+	fi ;)
+
+# This conditional makefile exports the global definition ARM_INTERNAL_BUILD. Customer releases will not include arm_internal.mak
+-include ../../../arm_internal.mak
+
+# Give warning of old config parameters are used
+ifneq ($(CONFIG),)
+$(warning "You have specified the CONFIG variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+ifneq ($(CPU),)
+$(warning "You have specified the CPU variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+# Include the mapping between TARGET_PLATFORM and KDIR + MALI_PLATFORM
+-include MALI_CONFIGURATION
+export KDIR ?= $(KDIR-$(TARGET_PLATFORM))
+export MALI_PLATFORM ?= $(MALI_PLATFORM-$(TARGET_PLATFORM))
+
+ifneq ($(TARGET_PLATFORM),)
+ifeq ($(MALI_PLATFORM),)
+$(error "Invalid TARGET_PLATFORM: $(TARGET_PLATFORM)")
+endif
+endif
+
+# validate lookup result
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(TARGET_PLATFORM))
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+ifeq ($(USING_UMP),1)
+export CONFIG_MALI400_UMP=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_UMP=1
+ifeq ($(USE_UMPV2),1)
+UMP_SYMVERS_FILE ?= ../umpv2/Module.symvers
+else
+UMP_SYMVERS_FILE ?= ../ump/Module.symvers
+endif
+KBUILD_EXTRA_SYMBOLS = $(realpath $(UMP_SYMVERS_FILE))
+$(warning $(KBUILD_EXTRA_SYMBOLS))
+endif
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+include $(KDIR)/.config
+
+ifeq ($(ARCH), arm)
+# when compiling for ARM we're cross compiling
+export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-)
+endif
+
+# report detected/selected settings
+ifdef ARM_INTERNAL_BUILD
+$(warning TARGET_PLATFORM $(TARGET_PLATFORM))
+$(warning KDIR $(KDIR))
+$(warning MALI_PLATFORM $(MALI_PLATFORM))
+endif
+
+# Set up build config
+export CONFIG_MALI400=m
+export CONFIG_MALI450=y
+export CONFIG_MALI470=y
+
+export EXTRA_DEFINES += -DCONFIG_MALI400=1
+export EXTRA_DEFINES += -DCONFIG_MALI450=1
+export EXTRA_DEFINES += -DCONFIG_MALI470=1
+
+ifneq ($(MALI_PLATFORM),)
+export EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+export MALI_PLATFORM_FILES = $(wildcard platform/$(MALI_PLATFORM)/*.c)
+endif
+
+ifeq ($(USING_PROFILING),1)
+ifeq ($(CONFIG_TRACEPOINTS),)
+$(warning CONFIG_TRACEPOINTS required for profiling)
+else
+export CONFIG_MALI400_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_PROFILING=1
+ifeq ($(USING_INTERNAL_PROFILING),1)
+export CONFIG_MALI400_INTERNAL_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_INTERNAL_PROFILING=1
+endif
+ifeq ($(MALI_HEATMAPS_ENABLED),1)
+export MALI_HEATMAPS_ENABLED=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_HEATMAPS_ENABLED
+endif
+endif
+endif
+
+ifeq ($(MALI_DMA_BUF_MAP_ON_ATTACH),1)
+export CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_MAP_ON_ATTACH
+endif
+
+ifeq ($(MALI_DMA_BUF_LAZY_MAP),1)
+ifeq ($(MALI_DMA_BUF_MAP_ON_ATTACH),0)
+export CONFIG_MALI_DMA_BUF_LAZY_MAP=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_LAZY_MAP
+else
+$(warning "You want to enable MALI_DMA_BUF_LAZY_MAP but MALI_DMA_BUF_MAP_ON_ATTACH was enabled.")
+endif
+endif
+
+ifeq ($(MALI_SHARED_INTERRUPTS),1)
+export CONFIG_MALI_SHARED_INTERRUPTS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_SHARED_INTERRUPTS
+endif
+
+ifeq ($(USING_DVFS),1)
+export CONFIG_MALI_DVFS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DVFS
+endif
+
+ifeq ($(USING_DMA_BUF_FENCE),1)
+export CONFIG_MALI_DMA_BUF_FENCE=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_FENCE
+endif
+
+ifeq ($(MALI_PMU_PARALLEL_POWER_UP),1)
+export CONFIG_MALI_PMU_PARALLEL_POWER_UP=y
+export EXTRA_DEFINES += -DCONFIG_MALI_PMU_PARALLEL_POWER_UP
+endif
+
+ifdef CONFIG_OF
+ifeq ($(USING_DT),1)
+export CONFIG_MALI_DT=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DT
+endif
+endif
+
+ifeq ($(USING_DEVFREQ), 1)
+ifdef CONFIG_PM_DEVFREQ
+export CONFIG_MALI_DEVFREQ=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DEVFREQ=1
+else
+$(warning "You want to support DEVFREQ but kernel didn't support DEVFREQ.")
+endif
+endif
+
+ifneq ($(BUILD),release)
+# Debug
+export CONFIG_MALI400_DEBUG=y
+else
+# Release
+ifeq ($(MALI_QUIET),1)
+export CONFIG_MALI_QUIET=y
+export EXTRA_DEFINES += -DCONFIG_MALI_QUIET
+endif
+endif
+
+ifeq ($(MALI_SKIP_JOBS),1)
+EXTRA_DEFINES += -DPROFILING_SKIP_PP_JOBS=1 -DPROFILING_SKIP_GP_JOBS=1
+endif
+
+ifeq ($(MALI_MEM_SWAP_TRACKING),1)
+EXTRA_DEFINES += -DMALI_MEM_SWAP_TRACKING=1
+endif
+
+all: $(UMP_SYMVERS_FILE)
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules
+	@rm $(FILES_PREFIX)__malidrv_build_info.c $(FILES_PREFIX)__malidrv_build_info.o
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+kernelrelease:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) kernelrelease
+
+export CONFIG KBUILD_EXTRA_SYMBOLS
diff --git a/mali/clean.sh b/mali/clean.sh
new file mode 100755
index 0000000..130fd73
--- /dev/null
+++ b/mali/clean.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# rm *.o, *.cmd, *~
+find . -name "*.o" -o -name "*.cmd" -o -name "*~" | xargs rm -rf
+rm	Module.symvers
+rm	mali.ko
+rm	mali.mod.c
+rm	modules.order
diff --git a/mali/common/mali_broadcast.c b/mali/common/mali_broadcast.c
new file mode 100755
index 0000000..4c4b2bc
--- /dev/null
+++ b/mali/common/mali_broadcast.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_broadcast.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#define MALI_BROADCAST_REGISTER_SIZE      0x1000
+#define MALI_BROADCAST_REG_BROADCAST_MASK    0x0
+#define MALI_BROADCAST_REG_INTERRUPT_MASK    0x4
+
+struct mali_bcast_unit {
+	struct mali_hw_core hw_core;
+	u32 current_mask;
+};
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_bcast_unit *bcast_unit = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+	MALI_DEBUG_PRINT(2, ("Broadcast: Creating Mali Broadcast unit: %s\n",
+			     resource->description));
+
+	bcast_unit = _mali_osk_malloc(sizeof(struct mali_bcast_unit));
+	if (NULL == bcast_unit) {
+		MALI_PRINT_ERROR(("Broadcast: Failed to allocate memory for Broadcast unit\n"));
+		return NULL;
+	}
+
+	if (_MALI_OSK_ERR_OK == mali_hw_core_create(&bcast_unit->hw_core,
+			resource, MALI_BROADCAST_REGISTER_SIZE)) {
+		bcast_unit->current_mask = 0;
+		mali_bcast_reset(bcast_unit);
+
+		return bcast_unit;
+	} else {
+		MALI_PRINT_ERROR(("Broadcast: Failed map broadcast unit\n"));
+	}
+
+	_mali_osk_free(bcast_unit);
+
+	return NULL;
+}
+
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	mali_hw_core_delete(&bcast_unit->hw_core);
+	_mali_osk_free(bcast_unit);
+}
+
+/* Call this function to add the @group's id into bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit,
+			  struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask |= (bcast_id); /* add PP core to broadcast */
+	broadcast_mask |= (bcast_id << 16); /* add MMU to broadcast */
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+/* Call this function to remove @group's id from bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit,
+			     struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask &= ~((bcast_id << 16) | bcast_id);
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4,
+			 ("Broadcast: setting mask 0x%08X + 0x%08X (reset)\n",
+			  bcast_unit->current_mask,
+			  bcast_unit->current_mask & 0xFF));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    bcast_unit->current_mask);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    bcast_unit->current_mask & 0xFF);
+}
+
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4, ("Broadcast: setting mask 0x0 + 0x0 (disable)\n"));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    0x0);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    0x0);
+}
diff --git a/mali/common/mali_broadcast.h b/mali/common/mali_broadcast.h
new file mode 100755
index 0000000..e12e8a2
--- /dev/null
+++ b/mali/common/mali_broadcast.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BROADCAST_H__
+#define __MALI_BROADCAST_H__
+
+/*
+ *  Interface for the broadcast unit on Mali-450.
+ *
+ * - Represents up to 8 × (MMU + PP) pairs.
+ * - Supports dynamically changing which (MMU + PP) pairs receive the broadcast by
+ *   setting a mask.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_group.h"
+
+struct mali_bcast_unit;
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource);
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit);
+
+/* Add a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Remove a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Re-set cached mask. This needs to be called after having been suspended. */
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Disable broadcast unit
+ *
+ * mali_bcast_enable must be called to re-enable the unit. Cores may not be
+ * added or removed when the unit is disabled.
+ */
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Re-enable broadcast unit
+ *
+ * This resets the masks to include the cores present when mali_bcast_disable was called.
+ */
+MALI_STATIC_INLINE void mali_bcast_enable(struct mali_bcast_unit *bcast_unit)
+{
+	mali_bcast_reset(bcast_unit);
+}
+
+#endif /* __MALI_BROADCAST_H__ */
diff --git a/mali/common/mali_control_timer.c b/mali/common/mali_control_timer.c
new file mode 100755
index 0000000..fc6ceb4
--- /dev/null
+++ b/mali/common/mali_control_timer.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+static u64 period_start_time = 0;
+
+static _mali_osk_timer_t *mali_control_timer = NULL;
+static mali_bool timer_running = MALI_FALSE;
+
+static u32 mali_control_timeout = 1000;
+
+void mali_control_timer_add(u32 timeout)
+{
+	_mali_osk_timer_add(mali_control_timer, _mali_osk_time_mstoticks(timeout));
+}
+
+static void mali_control_timer_callback(void *arg)
+{
+	if (mali_utilization_enabled()) {
+		struct mali_gpu_utilization_data *util_data = NULL;
+		u64 time_period = 0;
+		mali_bool need_add_timer = MALI_TRUE;
+
+		/* Calculate gpu utilization */
+		util_data = mali_utilization_calculate(&period_start_time, &time_period, &need_add_timer);
+
+		if (util_data) {
+#if defined(CONFIG_MALI_DVFS)
+			mali_dvfs_policy_realize(util_data, time_period);
+#else
+			mali_utilization_platform_realize(util_data);
+#endif
+
+			if (MALI_TRUE == need_add_timer) {
+				mali_control_timer_add(mali_control_timeout);
+			}
+		}
+	}
+}
+
+/* Init a timer (for now it is used for GPU utilization and dvfs) */
+_mali_osk_errcode_t mali_control_timer_init(void)
+{
+	_mali_osk_device_data data;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Use device specific settings (if defined) */
+		if (0 != data.control_interval) {
+			mali_control_timeout = data.control_interval;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Timer: %u\n", mali_control_timeout));
+		}
+	}
+
+	mali_control_timer = _mali_osk_timer_init();
+	if (NULL == mali_control_timer) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	_mali_osk_timer_setcallback(mali_control_timer, mali_control_timer_callback, NULL);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_control_timer_term(void)
+{
+	if (NULL != mali_control_timer) {
+		_mali_osk_timer_del(mali_control_timer);
+		timer_running = MALI_FALSE;
+		_mali_osk_timer_term(mali_control_timer);
+		mali_control_timer = NULL;
+	}
+}
+
+mali_bool mali_control_timer_resume(u64 time_now)
+{
+	mali_utilization_data_assert_locked();
+
+	if (timer_running != MALI_TRUE) {
+		timer_running = MALI_TRUE;
+
+		period_start_time = time_now;
+
+		mali_utilization_reset();
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+void mali_control_timer_pause(void)
+{
+	mali_utilization_data_assert_locked();
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+	}
+}
+
+void mali_control_timer_suspend(mali_bool suspend)
+{
+	mali_utilization_data_lock();
+
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+
+		mali_utilization_data_unlock();
+
+		if (suspend == MALI_TRUE) {
+			_mali_osk_timer_del(mali_control_timer);
+			mali_utilization_reset();
+		}
+	} else {
+		mali_utilization_data_unlock();
+	}
+}
diff --git a/mali/common/mali_control_timer.h b/mali/common/mali_control_timer.h
new file mode 100755
index 0000000..1265390
--- /dev/null
+++ b/mali/common/mali_control_timer.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_CONTROL_TIMER_H__
+#define __MALI_CONTROL_TIMER_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_control_timer_init(void);
+
+void mali_control_timer_term(void);
+
+mali_bool mali_control_timer_resume(u64 time_now);
+
+void mali_control_timer_suspend(mali_bool suspend);
+void mali_control_timer_pause(void);
+
+void mali_control_timer_add(u32 timeout);
+
+#endif /* __MALI_CONTROL_TIMER_H__ */
+
diff --git a/mali/common/mali_dlbu.c b/mali/common/mali_dlbu.c
new file mode 100755
index 0000000..4f2a121
--- /dev/null
+++ b/mali/common/mali_dlbu.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_dlbu.h"
+#include "mali_memory.h"
+#include "mali_pp.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+/**
+ * Size of DLBU registers in bytes
+ */
+#define MALI_DLBU_SIZE 0x400
+
+mali_dma_addr mali_dlbu_phys_addr = 0;
+static mali_io_address mali_dlbu_cpu_addr = NULL;
+
+/**
+ * DLBU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_dlbu_register {
+	MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR = 0x0000, /**< Master tile list physical base address;
+                                                             31:12 Physical address to the page used for the DLBU
+                                                             0 DLBU enable - set this bit to 1 enables the AXI bus
+                                                             between PPs and L2s, setting to 0 disables the router and
+                                                             no further transactions are sent to DLBU */
+	MALI_DLBU_REGISTER_MASTER_TLLIST_VADDR     = 0x0004, /**< Master tile list virtual base address;
+                                                             31:12 Virtual address to the page used for the DLBU */
+	MALI_DLBU_REGISTER_TLLIST_VBASEADDR     = 0x0008, /**< Tile list virtual base address;
+                                                             31:12 Virtual address to the tile list. This address is used when
+                                                             calculating the call address sent to PP.*/
+	MALI_DLBU_REGISTER_FB_DIM                 = 0x000C, /**< Framebuffer dimension;
+                                                             23:16 Number of tiles in Y direction-1
+                                                             7:0 Number of tiles in X direction-1 */
+	MALI_DLBU_REGISTER_TLLIST_CONF       = 0x0010, /**< Tile list configuration;
+                                                             29:28 select the size of each allocated block: 0=128 bytes, 1=256, 2=512, 3=1024
+                                                             21:16 2^n number of tiles to be binned to one tile list in Y direction
+                                                             5:0 2^n number of tiles to be binned to one tile list in X direction */
+	MALI_DLBU_REGISTER_START_TILE_POS         = 0x0014, /**< Start tile positions;
+                                                             31:24 start position in Y direction for group 1
+                                                             23:16 start position in X direction for group 1
+                                                             15:8 start position in Y direction for group 0
+                                                             7:0 start position in X direction for group 0 */
+	MALI_DLBU_REGISTER_PP_ENABLE_MASK         = 0x0018, /**< PP enable mask;
+                                                             7 enable PP7 for load balancing
+                                                             6 enable PP6 for load balancing
+                                                             5 enable PP5 for load balancing
+                                                             4 enable PP4 for load balancing
+                                                             3 enable PP3 for load balancing
+                                                             2 enable PP2 for load balancing
+                                                             1 enable PP1 for load balancing
+                                                             0 enable PP0 for load balancing */
+} mali_dlbu_register;
+
+typedef enum {
+	PP0ENABLE = 0,
+	PP1ENABLE,
+	PP2ENABLE,
+	PP3ENABLE,
+	PP4ENABLE,
+	PP5ENABLE,
+	PP6ENABLE,
+	PP7ENABLE
+} mali_dlbu_pp_enable;
+
+struct mali_dlbu_core {
+	struct mali_hw_core     hw_core;           /**< Common for all HW cores */
+	u32                     pp_cores_mask;     /**< This is a mask for the PP cores whose operation will be controlled by LBU
+                                                      see MALI_DLBU_REGISTER_PP_ENABLE_MASK register */
+};
+
+_mali_osk_errcode_t mali_dlbu_initialize(void)
+{
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Initializing\n"));
+
+	if (_MALI_OSK_ERR_OK ==
+	    mali_mmu_get_table_page(&mali_dlbu_phys_addr,
+				    &mali_dlbu_cpu_addr)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_dlbu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: terminating\n"));
+
+	if (0 != mali_dlbu_phys_addr && 0 != mali_dlbu_cpu_addr) {
+		mali_mmu_release_table_page(mali_dlbu_phys_addr,
+					    mali_dlbu_cpu_addr);
+		mali_dlbu_phys_addr = 0;
+		mali_dlbu_cpu_addr = 0;
+	}
+}
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_dlbu_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Creating Mali dynamic load balancing unit: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_dlbu_core));
+	if (NULL != core) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI_DLBU_SIZE)) {
+			core->pp_cores_mask = 0;
+			if (_MALI_OSK_ERR_OK == mali_dlbu_reset(core)) {
+				return core;
+			}
+			MALI_PRINT_ERROR(("Failed to reset DLBU %s\n", core->hw_core.description));
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali DLBU: Failed to allocate memory for DLBU core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	mali_hw_core_delete(&dlbu->hw_core);
+	_mali_osk_free(dlbu);
+}
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu)
+{
+	u32 dlbu_registers[7];
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: mali_dlbu_reset: %s\n", dlbu->hw_core.description));
+
+	dlbu_registers[0] = mali_dlbu_phys_addr | 1; /* bit 0 enables the whole core */
+	dlbu_registers[1] = MALI_DLBU_VIRT_ADDR;
+	dlbu_registers[2] = 0;
+	dlbu_registers[3] = 0;
+	dlbu_registers[4] = 0;
+	dlbu_registers[5] = 0;
+	dlbu_registers[6] = dlbu->pp_cores_mask;
+
+	/* write reset values to core registers */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR, dlbu_registers, 7);
+
+	err = _MALI_OSK_ERR_OK;
+
+	return err;
+}
+
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	mali_hw_core_register_write(&dlbu->hw_core, MALI_DLBU_REGISTER_PP_ENABLE_MASK, dlbu->pp_cores_mask);
+}
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask |= bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Adding core[%d] New mask= 0x%02x\n", bcast_id , dlbu->pp_cores_mask));
+}
+
+/* Remove a group from the DLBU */
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask &= ~bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Removing core[%d] New mask= 0x%02x\n", bcast_id, dlbu->pp_cores_mask));
+}
+
+/* Configure the DLBU for \a job. This needs to be done before the job is started on the groups in the DLBU. */
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job)
+{
+	u32 *registers;
+	MALI_DEBUG_ASSERT(job);
+	registers = mali_pp_job_get_dlbu_registers(job);
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: Starting job\n"));
+
+	/* Writing 4 registers:
+	 * DLBU registers except the first two (written once at DLBU initialisation / reset) and the PP_ENABLE_MASK register */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_TLLIST_VBASEADDR, registers, 4);
+
+}
diff --git a/mali/common/mali_dlbu.h b/mali/common/mali_dlbu.h
new file mode 100755
index 0000000..c031f11
--- /dev/null
+++ b/mali/common/mali_dlbu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DLBU_H__
+#define __MALI_DLBU_H__
+
+#define MALI_DLBU_VIRT_ADDR 0xFFF00000 /* master tile virtual address fixed at this value and mapped into every session */
+
+#include "mali_osk.h"
+
+struct mali_pp_job;
+struct mali_group;
+struct mali_dlbu_core;
+
+extern mali_dma_addr mali_dlbu_phys_addr;
+
+_mali_osk_errcode_t mali_dlbu_initialize(void);
+void mali_dlbu_terminate(void);
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource);
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu);
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+
+/** @brief Called to update HW after DLBU state changed
+ *
+ * This function must be called after \a mali_dlbu_add_group or \a
+ * mali_dlbu_remove_group to write the updated mask to hardware, unless the
+ * same is accomplished by calling \a mali_dlbu_reset.
+ */
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job);
+
+#endif /* __MALI_DLBU_H__ */
diff --git a/mali/common/mali_dvfs_policy.c b/mali/common/mali_dvfs_policy.c
new file mode 100755
index 0000000..1094f9d
--- /dev/null
+++ b/mali/common/mali_dvfs_policy.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_dvfs_policy.h"
+#include "mali_osk_mali.h"
+#include "mali_osk_profiling.h"
+
+#define CLOCK_TUNING_TIME_DEBUG 0
+
+#define MAX_PERFORMANCE_VALUE 256
+#define MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(percent) ((int) ((percent)*(MAX_PERFORMANCE_VALUE)/100.0 + 0.5))
+
+/** The max fps the same as display vsync default 60, can set by module insert parameter */
+int mali_max_system_fps = 60;
+/** A lower limit on their desired FPS default 58, can set by module insert parameter */
+int mali_desired_fps = 58;
+
+static int mali_fps_step1 = 0;
+static int mali_fps_step2 = 0;
+
+static int clock_step = -1;
+static int cur_clk_step = -1;
+static struct mali_gpu_clock *gpu_clk = NULL;
+
+/*Function prototype */
+static int (*mali_gpu_set_freq)(int) = NULL;
+static int (*mali_gpu_get_freq)(void) = NULL;
+
+static mali_bool mali_dvfs_enabled = MALI_FALSE;
+
+#define NUMBER_OF_NANOSECONDS_PER_SECOND  1000000000ULL
+static u32 calculate_window_render_fps(u64 time_period)
+{
+	u32 max_window_number;
+	u64 tmp;
+	u64 max = time_period;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 time_period_shift;
+	u32 max_window_number_shift;
+	u32 ret_val;
+
+	max_window_number = mali_session_max_window_num();
+
+	/* To avoid float division, extend the dividend to ns unit */
+	tmp = (u64)max_window_number * NUMBER_OF_NANOSECONDS_PER_SECOND;
+	if (tmp > time_period) {
+		max = tmp;
+	}
+
+	/*
+	 * We may have 64-bit values, a dividend or a divisor or both
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 */
+	leading_zeroes = _mali_osk_clz((u32)(max >> 32));
+	shift_val = 32 - leading_zeroes;
+
+	time_period_shift = (u32)(time_period >> shift_val);
+	max_window_number_shift = (u32)(tmp >> shift_val);
+
+	ret_val = max_window_number_shift / time_period_shift;
+
+	return ret_val;
+}
+
+static bool mali_pickup_closest_avail_clock(int target_clock_mhz, mali_bool pick_clock_up)
+{
+	int i = 0;
+	bool clock_changed = false;
+
+	/* Round up the closest available frequency step for target_clock_hz */
+	for (i = 0; i < gpu_clk->num_of_steps; i++) {
+		/* Find the first item > target_clock_hz */
+		if (((int)(gpu_clk->item[i].clock) - target_clock_mhz) > 0) {
+			break;
+		}
+	}
+
+	/* If the target clock greater than the maximum clock just pick the maximum one*/
+	if (i == gpu_clk->num_of_steps) {
+		i = gpu_clk->num_of_steps - 1;
+	} else {
+		if ((!pick_clock_up) && (i > 0)) {
+			i = i - 1;
+		}
+	}
+
+	clock_step = i;
+	if (cur_clk_step != clock_step) {
+		clock_changed = true;
+	}
+
+	return clock_changed;
+}
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period)
+{
+	int under_perform_boundary_value = 0;
+	int over_perform_boundary_value = 0;
+	int current_fps = 0;
+	int current_gpu_util = 0;
+	bool clock_changed = false;
+#if CLOCK_TUNING_TIME_DEBUG
+	struct timeval start;
+	struct timeval stop;
+	unsigned int elapse_time;
+	do_gettimeofday(&start);
+#endif
+	u32 window_render_fps;
+
+	if (NULL == gpu_clk) {
+		MALI_DEBUG_PRINT(2, ("Enable DVFS but patform doesn't Support freq change. \n"));
+		return;
+	}
+
+	window_render_fps = calculate_window_render_fps(time_period);
+
+	current_fps = window_render_fps;
+	current_gpu_util = data->utilization_gpu;
+
+	/* Get the specific under_perform_boundary_value and over_perform_boundary_value */
+	if ((mali_desired_fps <= current_fps) && (current_fps < mali_max_system_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(90);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+	} else if ((mali_fps_step1 <= current_fps) && (current_fps < mali_desired_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	} else if ((mali_fps_step2 <= current_fps) && (current_fps < mali_fps_step1)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(50);
+	} else {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	}
+
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: gpu util = %d \n", current_gpu_util));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: under_perform = %d,  over_perform = %d \n", under_perform_boundary_value, over_perform_boundary_value));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: render fps = %d,  pressure render fps = %d \n", current_fps, window_render_fps));
+
+	/* Get current clock value */
+	cur_clk_step = mali_gpu_get_freq();
+
+	/* Consider offscreen */
+	if (0 == current_fps) {
+		/* GP or PP under perform, need to give full power */
+		if (current_gpu_util > over_perform_boundary_value) {
+			if (cur_clk_step != gpu_clk->num_of_steps - 1) {
+				clock_changed = true;
+				clock_step = gpu_clk->num_of_steps - 1;
+			}
+		}
+
+		/* If GPU is idle, use lowest power */
+		if (0 == current_gpu_util) {
+			if (cur_clk_step != 0) {
+				clock_changed = true;
+				clock_step = 0;
+			}
+		}
+
+		goto real_setting;
+	}
+
+	/* 2. Calculate target clock if the GPU clock can be tuned */
+	if (-1 != cur_clk_step) {
+		int target_clk_mhz = -1;
+		mali_bool pick_clock_up = MALI_TRUE;
+
+		if (current_gpu_util > under_perform_boundary_value) {
+			/* when under perform, need to consider the fps part */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util * mali_desired_fps / under_perform_boundary_value / current_fps;
+			pick_clock_up = MALI_TRUE;
+		} else if (current_gpu_util < over_perform_boundary_value) {
+			/* when over perform, did't need to consider fps, system didn't want to reach desired fps */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util / under_perform_boundary_value;
+			pick_clock_up = MALI_FALSE;
+		}
+
+		if (-1 != target_clk_mhz) {
+			clock_changed = mali_pickup_closest_avail_clock(target_clk_mhz, pick_clock_up);
+		}
+	}
+
+real_setting:
+	if (clock_changed) {
+		mali_gpu_set_freq(clock_step);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      gpu_clk->item[clock_step].clock,
+					      gpu_clk->item[clock_step].vol / 1000,
+					      0, 0, 0);
+	}
+
+#if CLOCK_TUNING_TIME_DEBUG
+	do_gettimeofday(&stop);
+
+	elapse_time = timeval_to_ns(&stop) - timeval_to_ns(&start);
+	MALI_DEBUG_PRINT(2, ("Using ARM power policy:  eclapse time = %d\n", elapse_time));
+#endif
+}
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void)
+{
+	_mali_osk_device_data data;
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if ((NULL != data.get_clock_info) && (NULL != data.set_freq) && (NULL != data.get_freq)) {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: using arm dvfs policy \n"));
+
+
+			mali_fps_step1 = mali_max_system_fps / 3;
+			mali_fps_step2 = mali_max_system_fps / 5;
+
+			data.get_clock_info(&gpu_clk);
+
+			if (gpu_clk != NULL) {
+#ifdef DEBUG
+				int i;
+				for (i = 0; i < gpu_clk->num_of_steps; i++) {
+					MALI_DEBUG_PRINT(5, ("mali gpu clock info: step%d clock(%d)Hz,vol(%d) \n",
+							     i, gpu_clk->item[i].clock, gpu_clk->item[i].vol));
+				}
+#endif
+			} else {
+				MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform didn't define enough info for ddk to do DVFS \n"));
+			}
+
+			mali_gpu_get_freq = data.get_freq;
+			mali_gpu_set_freq = data.set_freq;
+
+			if ((NULL != gpu_clk) && (gpu_clk->num_of_steps > 0)
+			    && (NULL != mali_gpu_get_freq) && (NULL != mali_gpu_set_freq)) {
+				mali_dvfs_enabled = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	} else {
+		err = _MALI_OSK_ERR_FAULT;
+		MALI_DEBUG_PRINT(2, ("Mali DVFS init: get platform data error .\n"));
+	}
+
+	return err;
+}
+
+/*
+ * Always give full power when start a new period,
+ * if mali dvfs enabled, for performance consideration
+ */
+void mali_dvfs_policy_new_period(void)
+{
+	/* Always give full power when start a new period */
+	unsigned int cur_clk_step = 0;
+
+	cur_clk_step = mali_gpu_get_freq();
+
+	if (cur_clk_step != (gpu_clk->num_of_steps - 1)) {
+		mali_gpu_set_freq(gpu_clk->num_of_steps - 1);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, gpu_clk->item[gpu_clk->num_of_steps - 1].clock,
+					      gpu_clk->item[gpu_clk->num_of_steps - 1].vol / 1000, 0, 0, 0);
+	}
+}
+
+mali_bool mali_dvfs_policy_enabled(void)
+{
+	return mali_dvfs_enabled;
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item)
+{
+	if (mali_platform_device != NULL) {
+
+		struct mali_gpu_device_data *device_data = NULL;
+		device_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+
+		if ((NULL != device_data->get_clock_info) && (NULL != device_data->get_freq)) {
+
+			int cur_clk_step = device_data->get_freq();
+			struct mali_gpu_clock *mali_gpu_clk = NULL;
+
+			device_data->get_clock_info(&mali_gpu_clk);
+			clk_item->clock = mali_gpu_clk->item[cur_clk_step].clock;
+			clk_item->vol = mali_gpu_clk->item[cur_clk_step].vol;
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	}
+}
+#endif
+
diff --git a/mali/common/mali_dvfs_policy.h b/mali/common/mali_dvfs_policy.h
new file mode 100755
index 0000000..1770426
--- /dev/null
+++ b/mali/common/mali_dvfs_policy.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DVFS_POLICY_H__
+#define __MALI_DVFS_POLICY_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period);
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void);
+
+void mali_dvfs_policy_new_period(void);
+
+mali_bool mali_dvfs_policy_enabled(void);
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif/* __MALI_DVFS_POLICY_H__ */
diff --git a/mali/common/mali_executor.c b/mali/common/mali_executor.c
new file mode 100755
index 0000000..0359c77
--- /dev/null
+++ b/mali/common/mali_executor.c
@@ -0,0 +1,2697 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_executor.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_timeline.h"
+#include "mali_osk_profiling.h"
+#include "mali_session.h"
+#include "mali_osk_mali.h"
+
+/*
+ * If dma_buf with map on demand is used, we defer job deletion and job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_DELETE 1
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif /* !defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) */
+
+/*
+ * ---------- static type definitions (structs, enums, etc) ----------
+ */
+
+enum mali_executor_state_t {
+	EXEC_STATE_NOT_PRESENT, /* Virtual group on Mali-300/400 (do not use) */
+	EXEC_STATE_DISABLED,    /* Disabled by core scaling (do not use) */
+	EXEC_STATE_EMPTY,       /* No child groups for virtual group (do not use) */
+	EXEC_STATE_INACTIVE,    /* Can be used, but must be activate first */
+	EXEC_STATE_IDLE,        /* Active and ready to be used */
+	EXEC_STATE_WORKING,     /* Executing a job */
+};
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock for this module (protecting all HW access except L2 caches) */
+_mali_osk_spinlock_irq_t *mali_executor_lock_obj = NULL;
+
+mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/*
+ * ---------- static variables ----------
+ */
+
+/* Used to defer job scheduling */
+static _mali_osk_wq_work_t *executor_wq_high_pri = NULL;
+
+/* Store version from GP and PP (user space wants to know this) */
+static u32 pp_version = 0;
+static u32 gp_version = 0;
+
+/* List of physical PP groups which are disabled by some external source */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_disabled);
+static u32 group_list_disabled_count = 0;
+
+/* List of groups which can be used, but activate first */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_inactive);
+static u32 group_list_inactive_count = 0;
+
+/* List of groups which are active and ready to be used */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_idle);
+static u32 group_list_idle_count = 0;
+
+/* List of groups which are executing a job */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_working);
+static u32 group_list_working_count = 0;
+
+/* Virtual group (if any) */
+static struct mali_group *virtual_group = NULL;
+
+/* Virtual group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t virtual_group_state = EXEC_STATE_NOT_PRESENT;
+
+/* GP group */
+static struct mali_group *gp_group = NULL;
+
+/* GP group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t gp_group_state = EXEC_STATE_NOT_PRESENT;
+
+static u32 gp_returned_cookie = 0;
+
+/* Total number of physical PP cores present */
+static u32 num_physical_pp_cores_total = 0;
+
+/* Number of physical cores which are enabled */
+static u32 num_physical_pp_cores_enabled = 0;
+
+/* Enable or disable core scaling */
+static mali_bool core_scaling_enabled = MALI_TRUE;
+
+/* Variables to allow safe pausing of the scheduler */
+static _mali_osk_wait_queue_t *executor_working_wait_queue = NULL;
+static u32 pause_count = 0;
+
+/* PP cores haven't been enabled because of some pp cores haven't been disabled. */
+static int core_scaling_delay_up_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+
+/* Variables used to implement notify pp core changes to userspace when core scaling
+ * is finished in mali_executor_complete_group() function. */
+static _mali_osk_wq_work_t *executor_wq_notify_core_change = NULL;
+static _mali_osk_wait_queue_t *executor_notify_core_change_wait_queue = NULL;
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+static mali_bool mali_executor_is_suspended(void *data);
+static mali_bool mali_executor_is_working(void);
+static void mali_executor_disable_empty_virtual(void);
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group);
+static mali_bool mali_executor_has_virtual_group(void);
+static mali_bool mali_executor_virtual_group_is_usable(void);
+static void mali_executor_schedule(void);
+static void mali_executor_wq_schedule(void *arg);
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job);
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done);
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state);
+
+static void mali_executor_group_enable_internal(struct mali_group *group);
+static void mali_executor_group_disable_internal(struct mali_group *group);
+static void mali_executor_core_scale(unsigned int target_core_nr);
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group);
+static void mali_executor_notify_core_change(u32 num_cores);
+static void mali_executor_wq_notify_core_change(void *arg);
+static void mali_executor_change_group_status_disabled(struct mali_group *group);
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group);
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_executor_initialize(void)
+{
+	mali_executor_lock_obj = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_EXECUTOR);
+	if (NULL == mali_executor_lock_obj) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_high_pri = _mali_osk_wq_create_work_high_pri(mali_executor_wq_schedule, NULL);
+	if (NULL == executor_wq_high_pri) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_working_wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == executor_working_wait_queue) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_notify_core_change = _mali_osk_wq_create_work(mali_executor_wq_notify_core_change, NULL);
+	if (NULL == executor_wq_notify_core_change) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_notify_core_change_wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == executor_notify_core_change_wait_queue) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_terminate(void)
+{
+	if (NULL != executor_notify_core_change_wait_queue) {
+		_mali_osk_wait_queue_term(executor_notify_core_change_wait_queue);
+		executor_notify_core_change_wait_queue = NULL;
+	}
+
+	if (NULL != executor_wq_notify_core_change) {
+		_mali_osk_wq_delete_work(executor_wq_notify_core_change);
+		executor_wq_notify_core_change = NULL;
+	}
+
+	if (NULL != executor_working_wait_queue) {
+		_mali_osk_wait_queue_term(executor_working_wait_queue);
+		executor_working_wait_queue = NULL;
+	}
+
+	if (NULL != executor_wq_high_pri) {
+		_mali_osk_wq_delete_work(executor_wq_high_pri);
+		executor_wq_high_pri = NULL;
+	}
+
+	if (NULL != mali_executor_lock_obj) {
+		_mali_osk_spinlock_irq_term(mali_executor_lock_obj);
+		mali_executor_lock_obj = NULL;
+	}
+}
+
+void mali_executor_populate(void)
+{
+	u32 num_groups;
+	u32 i;
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	/* Do we have a virtual group? */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (mali_group_is_virtual(group)) {
+			virtual_group = group;
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			break;
+		}
+	}
+
+	/* Find all the available physical GP and PP cores */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+			struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+
+			if (!mali_group_is_virtual(group)) {
+				if (NULL != pp_core) {
+					if (0 == pp_version) {
+						/* Retrieve PP version from the first available PP core */
+						pp_version = mali_pp_core_get_version(pp_core);
+					}
+
+					if (NULL != virtual_group) {
+						mali_executor_lock();
+						mali_group_add_group(virtual_group, group);
+						mali_executor_unlock();
+					} else {
+						_mali_osk_list_add(&group->executor_list, &group_list_inactive);
+						group_list_inactive_count++;
+					}
+
+					num_physical_pp_cores_total++;
+				} else {
+					MALI_DEBUG_ASSERT_POINTER(gp_core);
+
+					if (0 == gp_version) {
+						/* Retrieve GP version */
+						gp_version = mali_gp_core_get_version(gp_core);
+					}
+
+					gp_group = group;
+					gp_group_state = EXEC_STATE_INACTIVE;
+				}
+
+			}
+		}
+	}
+
+	num_physical_pp_cores_enabled = num_physical_pp_cores_total;
+}
+
+void mali_executor_depopulate(void)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state);
+
+	if (NULL != gp_group) {
+		mali_group_delete(gp_group);
+		gp_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state);
+
+	if (NULL != virtual_group) {
+		mali_group_delete(virtual_group);
+		virtual_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&group_list_working));
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+}
+
+void mali_executor_suspend(void)
+{
+	mali_executor_lock();
+
+	/* Increment the pause_count so that no more jobs will be scheduled */
+	pause_count++;
+
+	mali_executor_unlock();
+
+	_mali_osk_wait_queue_wait_event(executor_working_wait_queue,
+					mali_executor_is_suspended, NULL);
+
+	/*
+	 * mali_executor_complete_XX() leaves jobs in idle state.
+	 * deactivate option is used when we are going to power down
+	 * the entire GPU (OS suspend) and want a consistent SW vs HW
+	 * state.
+	 */
+	mali_executor_lock();
+
+	mali_executor_deactivate_list_idle(MALI_TRUE);
+
+	/*
+	 * The following steps are used to deactive all of activated
+	 * (MALI_GROUP_STATE_ACTIVE) and activating (MALI_GROUP
+	 * _STAET_ACTIVATION_PENDING) groups, to make sure the variable
+	 * pd_mask_wanted is equal with 0. */
+	if (MALI_GROUP_STATE_INACTIVE != mali_group_get_state(gp_group)) {
+		gp_group_state = EXEC_STATE_INACTIVE;
+		mali_group_deactivate(gp_group);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (MALI_GROUP_STATE_INACTIVE
+		    != mali_group_get_state(virtual_group)) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			mali_group_deactivate(virtual_group);
+		}
+	}
+
+	if (0 < group_list_inactive_count) {
+		struct mali_group *group;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+					    &group_list_inactive,
+					    struct mali_group, executor_list) {
+			if (MALI_GROUP_STATE_ACTIVATION_PENDING
+			    == mali_group_get_state(group)) {
+				mali_group_deactivate(group);
+			}
+
+			/*
+			 * On mali-450 platform, we may have physical group in the group inactive
+			 * list, and its state is MALI_GROUP_STATE_ACTIVATION_PENDING, so we only
+			 * deactivate it is not enough, we still also need add it back to virtual group.
+			 * And now, virtual group must be in INACTIVE state, so it's safe to add
+			 * physical group to virtual group at this point.
+			 */
+			if (NULL != virtual_group) {
+				_mali_osk_list_delinit(&group->executor_list);
+				group_list_inactive_count--;
+
+				mali_group_add_group(virtual_group, group);
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_resume(void)
+{
+	mali_executor_lock();
+
+	/* Decrement pause_count to allow scheduling again (if it reaches 0) */
+	pause_count--;
+	if (0 == pause_count) {
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+u32 mali_executor_get_num_cores_total(void)
+{
+	return num_physical_pp_cores_total;
+}
+
+u32 mali_executor_get_num_cores_enabled(void)
+{
+	return num_physical_pp_cores_enabled;
+}
+
+struct mali_pp_core *mali_executor_get_virtual_pp(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(virtual_group);
+	MALI_DEBUG_ASSERT_POINTER(virtual_group->pp_core);
+	return virtual_group->pp_core;
+}
+
+struct mali_group *mali_executor_get_virtual_group(void)
+{
+	return virtual_group;
+}
+
+void mali_executor_zap_all_active(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+	mali_bool ret;
+
+	mali_executor_lock();
+
+	/*
+	 * This function is a bit complicated because
+	 * mali_group_zap_session() can fail. This only happens because the
+	 * group is in an unhandled page fault status.
+	 * We need to make sure this page fault is handled before we return,
+	 * so that we know every single outstanding MMU transactions have
+	 * completed. This will allow caller to safely remove physical pages
+	 * when we have returned.
+	 */
+
+	MALI_DEBUG_ASSERT(NULL != gp_group);
+	ret = mali_group_zap_session(gp_group, session);
+	if (MALI_FALSE == ret) {
+		struct mali_gp_job *gp_job = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+		MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		ret = mali_group_zap_session(virtual_group, session);
+		if (MALI_FALSE == ret) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_TRUE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working,
+				    struct mali_group, executor_list) {
+		ret = mali_group_zap_session(group, session);
+		if (MALI_FALSE == ret) {
+			ret = mali_group_zap_session(group, session);
+			if (MALI_FALSE == ret) {
+				struct mali_pp_job *pp_job = NULL;
+
+				mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+				if (NULL != pp_job) {
+					/* PP job completed, free it */
+					mali_scheduler_complete_pp_job(pp_job,
+								       0, MALI_TRUE,
+								       MALI_TRUE);
+				}
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule)
+{
+	if (MALI_SCHEDULER_MASK_EMPTY != mask) {
+		if (MALI_TRUE == deferred_schedule) {
+			_mali_osk_wq_schedule_work_high_pri(executor_wq_high_pri);
+		} else {
+			/* Schedule from this thread*/
+			mali_executor_lock();
+			mali_executor_schedule();
+			mali_executor_unlock();
+		}
+	}
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: GP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor GP: Job %d Timeout on %s\n",
+			    mali_gp_job_get_id(group->gp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_gp(group);
+		if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result);
+#endif
+
+	mali_group_mask_all_interrupts_gp(group);
+
+	if (MALI_INTERRUPT_RESULT_SUCCESS_VS == int_result) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only VS completed so far, while PLBU is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (MALI_INTERRUPT_RESULT_SUCCESS_PLBU == int_result) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only PLBU completed so far, while VS is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (MALI_INTERRUPT_RESULT_OOM == int_result) {
+		struct mali_gp_job *job = mali_group_get_running_gp_job(group);
+
+		/* PLBU out of mem */
+		MALI_DEBUG_PRINT(3, ("Executor: PLBU needs more heap memory\n"));
+
+#if defined(CONFIG_MALI400_PROFILING)
+		/* Give group a chance to generate a SUSPEND event */
+		mali_group_oom(group);
+#endif
+
+		/*
+		 * no need to hold interrupt raised while
+		 * waiting for more memory.
+		 */
+		mali_executor_send_gp_oom_to_user(job);
+
+		mali_executor_unlock();
+
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (MALI_INTERRUPT_RESULT_ERROR == int_result) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_executor_unlock();
+
+		if (MALI_FALSE == time_out) {
+			mali_group_schedule_bottom_half_gp(group);
+		}
+	} else {
+		struct mali_gp_job *job;
+		mali_bool success;
+
+		if (MALI_TRUE == time_out) {
+			mali_group_dump_status(group);
+		}
+
+		success = (int_result != MALI_INTERRUPT_RESULT_ERROR) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, &job, NULL);
+
+		mali_executor_unlock();
+
+		/* GP jobs always fully complete */
+		MALI_DEBUG_ASSERT(NULL != job);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, success,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: PP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (in_upper_half) {
+		if (mali_group_is_in_virtual(group)) {
+			/* Child groups should never handle PP interrupts */
+			MALI_DEBUG_ASSERT(!mali_group_has_timed_out(group));
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+	MALI_DEBUG_ASSERT(!mali_group_is_in_virtual(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor PP: Job %d Timeout on %s\n",
+			    mali_pp_job_get_id(group->pp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_pp(group);
+		if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	} else if (MALI_INTERRUPT_RESULT_SUCCESS == int_result) {
+		if (mali_group_is_virtual(group) && mali_group_pp_is_active(group)) {
+			/* Some child groups are still working, so nothing to do right now */
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (MALI_INTERRUPT_RESULT_ERROR == int_result) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_group_mask_all_interrupts_pp(group);
+		mali_executor_unlock();
+
+		if (MALI_FALSE == time_out) {
+			mali_group_schedule_bottom_half_pp(group);
+		}
+	} else {
+		struct mali_pp_job *job = NULL;
+		mali_bool success;
+
+		if (MALI_TRUE == time_out) {
+			mali_group_dump_status(group);
+		}
+
+		success = (int_result == MALI_INTERRUPT_RESULT_SUCCESS) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, NULL, &job);
+
+		mali_executor_unlock();
+
+		if (NULL != job) {
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+
+	MALI_DEBUG_PRINT(4, ("Executor: MMU interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	int_result = mali_group_get_interrupt_result_mmu(group);
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_ERROR == int_result);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	if (in_upper_half) {
+		/* Don't bother to do processing of errors in upper half */
+
+		struct mali_group *parent = group->parent_group;
+
+		mali_mmu_mask_all_interrupts(group->mmu);
+
+		mali_executor_unlock();
+
+		if (NULL == parent) {
+			mali_group_schedule_bottom_half_mmu(group);
+		} else {
+			mali_group_schedule_bottom_half_mmu(parent);
+		}
+
+	} else {
+		struct mali_gp_job *gp_job = NULL;
+		struct mali_pp_job *pp_job = NULL;
+
+#ifdef DEBUG
+
+		u32 fault_address = mali_mmu_get_page_fault_addr(group->mmu);
+		u32 status = mali_mmu_get_status(group->mmu);
+		MALI_DEBUG_PRINT(2, ("Executor: Mali page fault detected at 0x%x from bus id %d of type %s on %s\n",
+				     (void *)(uintptr_t)fault_address,
+				     (status >> 6) & 0x1F,
+				     (status & 32) ? "write" : "read",
+				     group->mmu->hw_core.description));
+		MALI_DEBUG_PRINT(3, ("Executor: MMU rawstat = 0x%08X, MMU status = 0x%08X\n",
+				     mali_mmu_get_rawstat(group->mmu), status));
+		mali_mmu_pagedir_diag(mali_session_get_page_directory(group->session), fault_address);
+#endif
+
+		mali_executor_complete_group(group, MALI_FALSE, &gp_job, &pp_job);
+
+		mali_executor_unlock();
+
+		if (NULL != gp_job) {
+			MALI_DEBUG_ASSERT(NULL == pp_job);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_TRUE, MALI_TRUE);
+		} else if (NULL != pp_job) {
+			MALI_DEBUG_ASSERT(NULL == gp_job);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(pp_job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups)
+{
+	u32 i;
+	mali_bool child_groups_activated = MALI_FALSE;
+	mali_bool do_schedule = MALI_FALSE;
+#if defined(DEBUG)
+	u32 num_activated = 0;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(0 < num_groups);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_up(groups[i]);
+
+		if ((MALI_GROUP_STATE_ACTIVATION_PENDING != mali_group_get_state(groups[i]) ||
+		     (MALI_TRUE != mali_executor_group_is_in_state(groups[i], EXEC_STATE_INACTIVE)))) {
+			/* nothing more to do for this group */
+			continue;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Executor: activating group %s\n",
+				     mali_group_core_description(groups[i])));
+
+#if defined(DEBUG)
+		num_activated++;
+#endif
+
+		if (mali_group_is_in_virtual(groups[i])) {
+			/*
+			 * At least one child group of virtual group is powered on.
+			 */
+			child_groups_activated = MALI_TRUE;
+		} else if (MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			/* Set gp and pp not in virtual to active. */
+			mali_group_set_active(groups[i]);
+		}
+
+		/* Move group from inactive to idle list */
+		if (groups[i] == gp_group) {
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  gp_group_state);
+			gp_group_state = EXEC_STATE_IDLE;
+		} else if (MALI_FALSE == mali_group_is_in_virtual(groups[i])
+			   && MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_group_is_in_state(groups[i],
+					  EXEC_STATE_INACTIVE));
+
+			mali_executor_change_state_pp_physical(groups[i],
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+		}
+
+		do_schedule = MALI_TRUE;
+	}
+
+	if (mali_executor_has_virtual_group() &&
+	    MALI_TRUE == child_groups_activated &&
+	    MALI_GROUP_STATE_ACTIVATION_PENDING ==
+	    mali_group_get_state(virtual_group)) {
+		/*
+		 * Try to active virtual group while it may be not sucessful every time,
+		 * because there is one situation that not all of child groups are powered on
+		 * in one time and virtual group is in activation pending state.
+		 */
+		if (mali_group_set_active(virtual_group)) {
+			/* Move group from inactive to idle */
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  virtual_group_state);
+			virtual_group_state = EXEC_STATE_IDLE;
+
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u  physical activated, 1 virtual activated.\n", num_groups, num_activated));
+		} else {
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+		}
+	} else {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+	}
+
+	if (MALI_TRUE == do_schedule) {
+		/* Trigger a schedule */
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_group_power_down(struct mali_group *groups[],
+				    u32 num_groups)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(0 < num_groups);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		/* Groups must be either disabled or inactive. while for virtual group,
+		 * it maybe in empty state, because when we meet pm_runtime_suspend,
+		 * virtual group could be powered off, and before we acquire mali_executor_lock,
+		 * we must release mali_pm_state_lock, if there is a new physical job was queued,
+		 * all of physical groups in virtual group could be pulled out, so we only can
+		 * powered down an empty virtual group. Those physical groups will be powered
+		 * up in following pm_runtime_resume callback function.
+		 */
+		MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(groups[i],
+				  EXEC_STATE_DISABLED) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_INACTIVE) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_EMPTY));
+
+		MALI_DEBUG_PRINT(3, ("Executor: powering down group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_down(groups[i]);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups completed\n", num_groups));
+
+	mali_executor_unlock();
+}
+
+void mali_executor_abort_session(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *tmp_group;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Executor: Aborting all jobs from session 0x%08X.\n",
+			  session));
+
+	mali_executor_lock();
+
+	if (mali_group_get_session(gp_group) == session) {
+		if (EXEC_STATE_WORKING == gp_group_state) {
+			struct mali_gp_job *gp_job = NULL;
+
+			mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+			MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+			/* GP job completed, make sure it is freed */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_TRUE, MALI_TRUE);
+		} else {
+			/* Same session, but not working, so just clear it */
+			mali_group_clear_session(gp_group);
+		}
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (EXEC_STATE_WORKING == virtual_group_state
+		    && mali_group_get_session(virtual_group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_TRUE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working,
+				    struct mali_group, executor_list) {
+		if (mali_group_get_session(group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_TRUE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	mali_executor_unlock();
+}
+
+
+void mali_executor_core_scaling_enable(void)
+{
+	/* PS: Core scaling is by default enabled */
+	core_scaling_enabled = MALI_TRUE;
+}
+
+void mali_executor_core_scaling_disable(void)
+{
+	core_scaling_enabled = MALI_FALSE;
+}
+
+mali_bool mali_executor_core_scaling_is_enabled(void)
+{
+	return core_scaling_enabled;
+}
+
+void mali_executor_group_enable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_enable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+/*
+ * If a physical group is inactive or idle, we should disable it immediately,
+ * if group is in virtual, and virtual group is idle, disable given physical group in it.
+ */
+void mali_executor_group_disable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_disable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+mali_bool mali_executor_group_is_disabled(struct mali_group *group)
+{
+	/* NB: This function is not optimized for time critical usage */
+
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+	ret = mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED);
+	mali_executor_unlock();
+
+	return ret;
+}
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override)
+{
+	if (target_core_nr == num_physical_pp_cores_enabled) return 0;
+	if (MALI_FALSE == core_scaling_enabled && MALI_FALSE == override) return -EPERM;
+	if (target_core_nr > num_physical_pp_cores_total) return -EINVAL;
+	if (0 == target_core_nr) return -EINVAL;
+
+	mali_executor_core_scale(target_core_nr);
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+
+	return 0;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	mali_executor_lock();
+
+	switch (gp_group_state) {
+	case EXEC_STATE_INACTIVE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state INACTIVE\n");
+		break;
+	case EXEC_STATE_IDLE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state IDLE\n");
+		break;
+	case EXEC_STATE_WORKING:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state WORKING\n");
+		break;
+	default:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in unknown/illegal state %u\n",
+					gp_group_state);
+		break;
+	}
+
+	n += mali_group_dump_state(gp_group, buf + n, size - n);
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in WORKING state (count = %u):\n",
+				group_list_working_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in IDLE state (count = %u):\n",
+				group_list_idle_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in INACTIVE state (count = %u):\n",
+				group_list_inactive_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in DISABLED state (count = %u):\n",
+				group_list_disabled_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		switch (virtual_group_state) {
+		case EXEC_STATE_EMPTY:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state EMPTY\n");
+			break;
+		case EXEC_STATE_INACTIVE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state INACTIVE\n");
+			break;
+		case EXEC_STATE_IDLE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state IDLE\n");
+			break;
+		case EXEC_STATE_WORKING:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state WORKING\n");
+			break;
+		default:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in unknown/illegal state %u\n",
+						virtual_group_state);
+			break;
+		}
+
+		n += mali_group_dump_state(virtual_group, buf + n, size - n);
+	}
+
+	mali_executor_unlock();
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_total_cores = num_physical_pp_cores_total;
+	args->number_of_enabled_cores = num_physical_pp_cores_enabled;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = pp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_cores = 1;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = gp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (_MALIGP_JOB_RESUME_WITH_NEW_HEAP == args->code) {
+		_mali_osk_notification_t *new_notification = NULL;
+
+		new_notification = _mali_osk_notification_create(
+					   _MALI_NOTIFICATION_GP_STALLED,
+					   sizeof(_mali_uk_gp_job_suspended_s));
+
+		if (NULL != new_notification) {
+			MALI_DEBUG_PRINT(3, ("Executor: Resuming job %u with new heap; 0x%08X - 0x%08X\n",
+					     args->cookie, args->arguments[0], args->arguments[1]));
+
+			mali_executor_lock();
+
+			/* Resume the job in question if it is still running */
+			job = mali_group_get_running_gp_job(gp_group);
+			if (NULL != job &&
+			    args->cookie == mali_gp_job_get_id(job) &&
+			    session == mali_gp_job_get_session(job)) {
+				/*
+				 * Correct job is running, resume with new heap
+				 */
+
+				mali_gp_job_set_oom_notification(job,
+								 new_notification);
+
+				/* This will also re-enable interrupts */
+				mali_group_resume_gp_with_new_heap(gp_group,
+								   args->cookie,
+								   args->arguments[0],
+								   args->arguments[1]);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_DEBUG_PRINT(2, ("Executor: Unable to resume  gp job becasue gp time out or any other unexpected reason!\n"));
+
+				_mali_osk_notification_delete(new_notification);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Executor: Failed to allocate notification object. Will abort GP job.\n"));
+		}
+	} else {
+		MALI_DEBUG_PRINT(2, ("Executor: Aborting job %u, no new heap provided\n", args->cookie));
+	}
+
+	mali_executor_lock();
+
+	/* Abort the job in question if it is still running */
+	job = mali_group_get_running_gp_job(gp_group);
+	if (NULL != job &&
+	    args->cookie == mali_gp_job_get_id(job) &&
+	    session == mali_gp_job_get_session(job)) {
+		/* Correct job is still running */
+		struct mali_gp_job *job_done = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &job_done, NULL);
+
+		/* The same job should have completed */
+		MALI_DEBUG_ASSERT(job_done == job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(job_done, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	mali_executor_unlock();
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+void mali_executor_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_executor_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Executor: lock taken\n"));
+}
+
+void mali_executor_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Executor: Releasing lock\n"));
+	_mali_osk_spinlock_irq_unlock(mali_executor_lock_obj);
+}
+
+static mali_bool mali_executor_is_suspended(void *data)
+{
+	mali_bool ret;
+
+	/* This callback does not use the data pointer. */
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	ret = pause_count > 0 && !mali_executor_is_working();
+
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static mali_bool mali_executor_is_working()
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return (0 != group_list_working_count ||
+		EXEC_STATE_WORKING == gp_group_state ||
+		EXEC_STATE_WORKING == virtual_group_state);
+}
+
+static void mali_executor_disable_empty_virtual(void)
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_EMPTY);
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_WORKING);
+
+	if (mali_group_is_empty(virtual_group)) {
+		virtual_group_state = EXEC_STATE_EMPTY;
+	}
+}
+
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	/* Only rejoining after job has completed (still active) */
+	MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+			  mali_group_get_state(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_has_virtual_group());
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_group_is_virtual(group));
+
+	/* Make sure group and virtual group have same status */
+
+	if (MALI_GROUP_STATE_INACTIVE == mali_group_get_state(virtual_group)) {
+		if (mali_group_deactivate(group)) {
+			trigger_pm_update = MALI_TRUE;
+		}
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else if (MALI_GROUP_STATE_ACTIVATION_PENDING ==
+		   mali_group_get_state(virtual_group)) {
+		/*
+		 * Activation is pending for virtual group, leave
+		 * this child group as active.
+		 */
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+				  mali_group_get_state(virtual_group));
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_IDLE;
+		}
+	}
+
+	/* Remove group from idle list */
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group,
+			  EXEC_STATE_IDLE));
+	_mali_osk_list_delinit(&group->executor_list);
+	group_list_idle_count--;
+
+	/*
+	 * And finally rejoin the virtual group
+	 * group will start working on same job as virtual_group,
+	 * if virtual_group is working on a job
+	 */
+	mali_group_add_group(virtual_group, group);
+
+	return trigger_pm_update;
+}
+
+static mali_bool mali_executor_has_virtual_group(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != virtual_group) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+}
+
+static mali_bool mali_executor_virtual_group_is_usable(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return ((EXEC_STATE_INACTIVE == virtual_group_state ||
+		EXEC_STATE_IDLE == virtual_group_state) && (virtual_group->state != MALI_GROUP_STATE_ACTIVATION_PENDING)) ?
+	       MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+}
+
+static mali_bool mali_executor_tackle_gp_bound(void)
+{
+	struct mali_pp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	job = mali_scheduler_job_pp_physical_peek();
+
+	if (NULL != job && MALI_TRUE == mali_is_mali400()) {
+		if (0 < group_list_working_count &&
+		    mali_pp_job_is_large_and_unstarted(job)) {
+			return MALI_TRUE;
+		}
+	}
+
+	return MALI_FALSE;
+}
+
+static mali_bool mali_executor_schedule_is_early_out(mali_bool *gpu_secure_mode_is_needed)
+{
+	struct mali_pp_job *next_pp_job_to_start = NULL;
+	struct mali_group *group;
+	struct mali_group *tmp_group;
+	struct mali_pp_job *physical_pp_job_working = NULL;
+	struct mali_pp_job *virtual_pp_job_working = NULL;
+	mali_bool gpu_working_in_protected_mode = MALI_FALSE;
+	mali_bool gpu_working_in_non_protected_mode = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	*gpu_secure_mode_is_needed = MALI_FALSE;
+
+	/* Check if the gpu secure mode is supported, exit if not.*/
+	if (MALI_FALSE == _mali_osk_gpu_secure_mode_is_supported()) {
+		return MALI_FALSE;
+	}
+
+	/* Check if need to set gpu secure mode for the next pp job,
+	 * get the next pp job that will be scheduled  if exist.
+	 */
+	next_pp_job_to_start = mali_scheduler_job_pp_next();
+
+	/* Check current pp physical/virtual running job is protected job or not if exist.*/
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working,
+				    struct mali_group, executor_list) {
+		physical_pp_job_working = group->pp_running_job;
+		break;
+	}
+
+	if (EXEC_STATE_WORKING == virtual_group_state) {
+		virtual_pp_job_working = virtual_group->pp_running_job;
+	}
+
+	if (NULL != physical_pp_job_working) {
+		if (MALI_TRUE == mali_pp_job_is_protected_job(physical_pp_job_working)) {
+			gpu_working_in_protected_mode = MALI_TRUE;
+		} else {
+			gpu_working_in_non_protected_mode = MALI_TRUE;
+		}
+	} else if (NULL != virtual_pp_job_working) {
+		if (MALI_TRUE == mali_pp_job_is_protected_job(virtual_pp_job_working)) {
+			gpu_working_in_protected_mode = MALI_TRUE;
+		} else {
+			gpu_working_in_non_protected_mode = MALI_TRUE;
+		}
+	} else if (EXEC_STATE_WORKING == gp_group_state) {
+		gpu_working_in_non_protected_mode = MALI_TRUE;
+	}
+
+	/* If the next pp job is the protected pp job.*/
+	if ((NULL != next_pp_job_to_start) && MALI_TRUE == mali_pp_job_is_protected_job(next_pp_job_to_start)) {
+		/* if gp is working or any non-protected pp job is working now, unable to schedule protected pp job. */
+		if (MALI_TRUE == gpu_working_in_non_protected_mode)
+			return MALI_TRUE;
+
+		*gpu_secure_mode_is_needed = MALI_TRUE;
+		return MALI_FALSE;
+
+	}
+
+	if (MALI_TRUE == gpu_working_in_protected_mode) {
+		/* Unable to schedule non-protected pp job/gp job if exist protected pp running jobs*/
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+/*
+ * This is where jobs are actually started.
+ */
+static void mali_executor_schedule(void)
+{
+	u32 i;
+	u32 num_physical_needed = 0;
+	u32 num_physical_to_process = 0;
+	mali_bool trigger_pm_update = MALI_FALSE;
+	mali_bool deactivate_idle_group = MALI_TRUE;
+	mali_bool gpu_secure_mode_is_needed = MALI_FALSE;
+	mali_bool is_gpu_secure_mode = MALI_FALSE;
+	/* Physical groups + jobs to start in this function */
+	struct mali_group *groups_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	struct mali_pp_job *jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	u32 sub_jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	int num_jobs_to_start = 0;
+
+	/* Virtual job to start in this function */
+	struct mali_pp_job *virtual_job_to_start = NULL;
+
+	/* GP job to start in this function */
+	struct mali_gp_job *gp_job_to_start = NULL;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (pause_count > 0) {
+		/* Execution is suspended, don't schedule any jobs. */
+		return;
+	}
+
+	/* Lock needed in order to safely handle the job queues */
+	mali_scheduler_lock();
+
+	/* 1. Check the schedule if need to early out. */
+	if (MALI_TRUE == mali_executor_schedule_is_early_out(&gpu_secure_mode_is_needed)) {
+		mali_scheduler_unlock();
+		return;
+	}
+
+	/* 2. Activate gp firstly if have gp job queued. */
+	if ((EXEC_STATE_INACTIVE == gp_group_state)
+	    && (0 < mali_scheduler_job_gp_count())
+	    && (gpu_secure_mode_is_needed == MALI_FALSE)) {
+
+		enum mali_group_state state =
+			mali_group_activate(gp_group);
+		if (MALI_GROUP_STATE_ACTIVE == state) {
+			/* Set GP group state to idle */
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			trigger_pm_update = MALI_TRUE;
+		}
+	}
+
+	/* 3. Prepare as many physical groups as needed/possible */
+
+	num_physical_needed = mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed);
+
+	/* On mali-450 platform, we don't need to enter in this block frequently. */
+	if (0 < num_physical_needed) {
+
+		if (num_physical_needed <= group_list_idle_count) {
+			/* We have enough groups on idle list already */
+			num_physical_to_process = num_physical_needed;
+			num_physical_needed = 0;
+		} else {
+			/* We need to get a hold of some more groups */
+			num_physical_to_process = group_list_idle_count;
+			num_physical_needed -= group_list_idle_count;
+		}
+
+		if (0 < num_physical_needed) {
+
+			/* 3.1. Activate groups which are inactive */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive,
+						    struct mali_group, executor_list) {
+				enum mali_group_state state =
+					mali_group_activate(group);
+				if (MALI_GROUP_STATE_ACTIVE == state) {
+					/* Move from inactive to idle */
+					mali_executor_change_state_pp_physical(group,
+									       &group_list_inactive,
+									       &group_list_inactive_count,
+									       &group_list_idle,
+									       &group_list_idle_count);
+					num_physical_to_process++;
+				} else {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				num_physical_needed--;
+				if (0 == num_physical_needed) {
+					/* We have activated all the groups we need */
+					break;
+				}
+			}
+		}
+
+		if (mali_executor_virtual_group_is_usable()) {
+
+			/*
+			 * 3.2. And finally, steal and activate groups
+			 * from virtual group if we need even more
+			 */
+			while (0 < num_physical_needed) {
+				struct mali_group *group;
+
+				group = mali_group_acquire_group(virtual_group);
+				if (NULL != group) {
+					enum mali_group_state state;
+
+					mali_executor_disable_empty_virtual();
+
+					state = mali_group_activate(group);
+					if (MALI_GROUP_STATE_ACTIVE == state) {
+						/* Group is ready, add to idle list */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_idle);
+						group_list_idle_count++;
+						num_physical_to_process++;
+					} else {
+						/*
+						 * Group is not ready yet,
+						 * add to inactive list
+						 */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_inactive);
+						group_list_inactive_count++;
+
+						trigger_pm_update = MALI_TRUE;
+					}
+					num_physical_needed--;
+				} else {
+					/*
+					 * We could not get enough groups
+					 * from the virtual group.
+					 */
+					break;
+				}
+			}
+		}
+
+		/* 3.3. Assign physical jobs to groups */
+
+		if (0 < num_physical_to_process) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle,
+						    struct mali_group, executor_list) {
+				struct mali_pp_job *job = NULL;
+				u32 sub_job = MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+
+				MALI_DEBUG_ASSERT(num_jobs_to_start <
+						  MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				MALI_DEBUG_ASSERT(0 <
+						  mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed));
+
+				/* If the next pp job is non-protected, check if gp bound now. */
+				if ((MALI_FALSE == gpu_secure_mode_is_needed)
+				    && (mali_executor_hint_is_enabled(MALI_EXECUTOR_HINT_GP_BOUND))
+				    && (MALI_TRUE == mali_executor_tackle_gp_bound())) {
+					/*
+					* We're gp bound,
+					* don't start this right now.
+					*/
+					deactivate_idle_group = MALI_FALSE;
+					num_physical_to_process = 0;
+					break;
+				}
+
+				job = mali_scheduler_job_pp_physical_get(
+					      &sub_job);
+
+				if (MALI_FALSE == gpu_secure_mode_is_needed) {
+					MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_protected_job(job));
+				} else {
+					MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_protected_job(job));
+				}
+
+				MALI_DEBUG_ASSERT_POINTER(job);
+				MALI_DEBUG_ASSERT(sub_job <= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				/* Put job + group on list of jobs to start later on */
+
+				groups_to_start[num_jobs_to_start] = group;
+				jobs_to_start[num_jobs_to_start] = job;
+				sub_jobs_to_start[num_jobs_to_start] = sub_job;
+				num_jobs_to_start++;
+
+				/* Move group from idle to working */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_working,
+								       &group_list_working_count);
+
+				num_physical_to_process--;
+				if (0 == num_physical_to_process) {
+					/* Got all we needed */
+					break;
+				}
+			}
+		}
+	}
+
+	/* 4. Deactivate idle pp group , must put deactive here before active vitual group
+	 *    for cover case first only has physical job in normal queue but group inactive,
+	 *    so delay the job start go to active group, when group activated,
+	 *    call scheduler again, but now if we get high queue virtual job,
+	 *    we will do nothing in schedule cause executor schedule stop
+	 */
+
+	if (MALI_TRUE == mali_executor_deactivate_list_idle(deactivate_idle_group
+			&& (!mali_timeline_has_physical_pp_job()))) {
+		trigger_pm_update = MALI_TRUE;
+	}
+
+	/* 5. Activate virtual group, if needed */
+	if (EXEC_STATE_INACTIVE == virtual_group_state &&
+	    MALI_TRUE ==  mali_scheduler_job_next_is_virtual()) {
+		struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek();
+		if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job))
+		    || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) {
+			enum mali_group_state state =
+				mali_group_activate(virtual_group);
+			if (MALI_GROUP_STATE_ACTIVE == state) {
+				/* Set virtual group state to idle */
+				virtual_group_state = EXEC_STATE_IDLE;
+			} else {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 6. To power up group asap,  trigger pm update only when no need to swith the gpu mode. */
+
+	is_gpu_secure_mode = _mali_osk_gpu_secure_mode_is_enabled();
+
+	if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == is_gpu_secure_mode)
+	    || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == is_gpu_secure_mode)) {
+		if (MALI_TRUE == trigger_pm_update) {
+			trigger_pm_update = MALI_FALSE;
+			mali_pm_update_async();
+		}
+	}
+
+	/* 7. Assign jobs to idle virtual group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == virtual_group_state) {
+		if (MALI_TRUE == mali_scheduler_job_next_is_virtual()) {
+			struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek();
+			if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job))
+			    || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) {
+				virtual_job_to_start =
+					mali_scheduler_job_pp_virtual_get();
+				virtual_group_state = EXEC_STATE_WORKING;
+			}
+		} else if (!mali_timeline_has_virtual_pp_job()) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+
+			if (mali_group_deactivate(virtual_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 8. Assign job to idle GP group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == gp_group_state && MALI_FALSE == gpu_secure_mode_is_needed) {
+		if (0 < mali_scheduler_job_gp_count()) {
+			gp_job_to_start = mali_scheduler_job_gp_get();
+			gp_group_state = EXEC_STATE_WORKING;
+		} else if (!mali_timeline_has_gp_job()) {
+			gp_group_state = EXEC_STATE_INACTIVE;
+			if (mali_group_deactivate(gp_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 9. We no longer need the schedule/queue lock */
+
+	mali_scheduler_unlock();
+
+	/* 10. start jobs */
+	if (NULL != virtual_job_to_start) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(virtual_group));
+		mali_group_start_pp_job(virtual_group,
+					virtual_job_to_start, 0, is_gpu_secure_mode);
+	}
+
+	for (i = 0; i < num_jobs_to_start; i++) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(
+					  groups_to_start[i]));
+		mali_group_start_pp_job(groups_to_start[i],
+					jobs_to_start[i],
+					sub_jobs_to_start[i], is_gpu_secure_mode);
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(gp_group);
+
+	if (NULL != gp_job_to_start) {
+		MALI_DEBUG_ASSERT(!mali_group_gp_is_active(gp_group));
+		mali_group_start_gp_job(gp_group, gp_job_to_start, is_gpu_secure_mode);
+	}
+
+	/* 11. Trigger any pending PM updates */
+	if (MALI_TRUE == trigger_pm_update) {
+		mali_pm_update_async();
+	}
+}
+
+/* Handler for deferred schedule requests */
+static void mali_executor_wq_schedule(void *arg)
+{
+	MALI_IGNORE(arg);
+	mali_executor_lock();
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job)
+{
+	_mali_uk_gp_job_suspended_s *jobres;
+	_mali_osk_notification_t *notification;
+
+	notification = mali_gp_job_get_oom_notification(job);
+
+	/*
+	 * Remember the id we send to user space, so we have something to
+	 * verify when we get a response
+	 */
+	gp_returned_cookie = mali_gp_job_get_id(job);
+
+	jobres = (_mali_uk_gp_job_suspended_s *)notification->result_buffer;
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	jobres->cookie = gp_returned_cookie;
+
+	mali_session_send_notification(mali_gp_job_get_session(job),
+				       notification);
+}
+static struct mali_gp_job *mali_executor_complete_gp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_gp(group, success);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	gp_group_state = EXEC_STATE_IDLE;
+
+	/* This will potentially queue more GP and PP jobs */
+	mali_timeline_tracker_release(&job->tracker);
+
+	/* Signal PP job */
+	mali_gp_job_signal_pp_tracker(job, success);
+
+	return job;
+}
+
+static struct mali_pp_job *mali_executor_complete_pp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_pp_job *job;
+	u32 sub_job;
+	mali_bool job_is_done;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_pp(group, success, &sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	if (mali_group_is_virtual(group)) {
+		virtual_group_state = EXEC_STATE_IDLE;
+	} else {
+		/* Move from working to idle state */
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_working,
+						       &group_list_working_count,
+						       &group_list_idle,
+						       &group_list_idle_count);
+	}
+
+	/* It is the executor module which owns the jobs themselves by now */
+	mali_pp_job_mark_sub_job_completed(job, success);
+	job_is_done = mali_pp_job_is_complete(job);
+
+	if (job_is_done) {
+		/* This will potentially queue more GP and PP jobs */
+		mali_timeline_tracker_release(&job->tracker);
+	}
+
+	return job;
+}
+
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done)
+{
+	struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+	struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	mali_bool pp_job_is_done = MALI_TRUE;
+
+	if (NULL != gp_core) {
+		gp_job = mali_executor_complete_gp(group, success);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(pp_core);
+		MALI_IGNORE(pp_core);
+		pp_job = mali_executor_complete_pp(group, success);
+
+		pp_job_is_done = mali_pp_job_is_complete(pp_job);
+	}
+
+	if (pause_count > 0) {
+		/* Execution has been suspended */
+
+		if (!mali_executor_is_working()) {
+			/* Last job completed, wake up sleepers */
+			_mali_osk_wait_queue_wake_up(
+				executor_working_wait_queue);
+		}
+	} else if (MALI_TRUE == mali_group_disable_requested(group)) {
+		mali_executor_core_scale_in_group_complete(group);
+
+		mali_executor_schedule();
+	} else {
+		/* try to schedule new jobs */
+		mali_executor_schedule();
+	}
+
+	if (NULL != gp_job) {
+		MALI_DEBUG_ASSERT_POINTER(gp_job_done);
+		*gp_job_done = gp_job;
+	} else if (pp_job_is_done) {
+		MALI_DEBUG_ASSERT_POINTER(pp_job);
+		MALI_DEBUG_ASSERT_POINTER(pp_job_done);
+		*pp_job_done = pp_job;
+	}
+}
+
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	/*
+	 * It's a bit more complicated to change the state for the physical PP
+	 * groups since their state is determined by the list they are on.
+	 */
+#if defined(DEBUG)
+	mali_bool found = MALI_FALSE;
+	struct mali_group *group_iter;
+	struct mali_group *temp;
+	u32 old_counted = 0;
+	u32 new_counted = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(old_list);
+	MALI_DEBUG_ASSERT_POINTER(old_count);
+	MALI_DEBUG_ASSERT_POINTER(new_list);
+	MALI_DEBUG_ASSERT_POINTER(new_count);
+
+	/*
+	 * Verify that group is present on old list,
+	 * and that the count is correct
+	 */
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, old_list,
+				    struct mali_group, executor_list) {
+		old_counted++;
+		if (group == group_iter) {
+			found = MALI_TRUE;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, new_list,
+				    struct mali_group, executor_list) {
+		new_counted++;
+	}
+
+	if (MALI_FALSE == found) {
+		if (old_list == &group_list_idle) {
+			MALI_DEBUG_PRINT(1, (" old Group list is idle,"));
+		} else if (old_list == &group_list_inactive) {
+			MALI_DEBUG_PRINT(1, (" old Group list is inactive,"));
+		} else if (old_list == &group_list_working) {
+			MALI_DEBUG_PRINT(1, (" old Group list is working,"));
+		} else if (old_list == &group_list_disabled) {
+			MALI_DEBUG_PRINT(1, (" old Group list is disable,"));
+		}
+
+		if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_WORKING)) {
+			MALI_DEBUG_PRINT(1, (" group in working \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_INACTIVE)) {
+			MALI_DEBUG_PRINT(1, (" group in inactive \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_IDLE)) {
+			MALI_DEBUG_PRINT(1, (" group in idle \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)) {
+			MALI_DEBUG_PRINT(1, (" but group in disabled \n"));
+		}
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == found);
+	MALI_DEBUG_ASSERT(0 < (*old_count));
+	MALI_DEBUG_ASSERT((*old_count) == old_counted);
+	MALI_DEBUG_ASSERT((*new_count) == new_counted);
+#endif
+
+	_mali_osk_list_move(&group->executor_list, new_list);
+	(*old_count)--;
+	(*new_count)++;
+}
+
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	_mali_osk_list_add(&group->executor_list, new_list);
+	(*new_count)++;
+}
+
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (gp_group == group) {
+		if (gp_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else if (virtual_group == group || mali_group_is_in_virtual(group)) {
+		if (virtual_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else {
+		/* Physical PP group */
+		struct mali_group *group_iter;
+		struct mali_group *temp;
+		_mali_osk_list_t *list;
+
+		if (EXEC_STATE_DISABLED == state) {
+			list = &group_list_disabled;
+		} else if (EXEC_STATE_INACTIVE == state) {
+			list = &group_list_inactive;
+		} else if (EXEC_STATE_IDLE == state) {
+			list = &group_list_idle;
+		} else {
+			MALI_DEBUG_ASSERT(EXEC_STATE_WORKING == state);
+			list = &group_list_working;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, list,
+					    struct mali_group, executor_list) {
+			if (group_iter == group) {
+				return MALI_TRUE;
+			}
+		}
+	}
+
+	/* group not in correct state */
+	return MALI_FALSE;
+}
+
+static void mali_executor_group_enable_internal(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	/* Put into inactive state (== "lowest" enabled state) */
+	if (group == gp_group) {
+		MALI_DEBUG_ASSERT(EXEC_STATE_DISABLED == gp_group_state);
+		gp_group_state = EXEC_STATE_INACTIVE;
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_disabled,
+						       &group_list_disabled_count,
+						       &group_list_inactive,
+						       &group_list_inactive_count);
+
+		++num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Enabling group id %d \n", group->pp_core->core_id));
+	}
+
+	if (MALI_GROUP_STATE_ACTIVE == mali_group_activate(group)) {
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_power_is_on(group));
+
+		/* Move from inactive to idle */
+		if (group == gp_group) {
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			mali_executor_change_state_pp_physical(group,
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+
+			if (mali_executor_has_virtual_group()) {
+				if (mali_executor_physical_rejoin_virtual(group)) {
+					mali_pm_update_async();
+				}
+			}
+		}
+	} else {
+		mali_pm_update_async();
+	}
+}
+
+static void mali_executor_group_disable_internal(struct mali_group *group)
+{
+	mali_bool working;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	working = mali_executor_group_is_in_state(group, EXEC_STATE_WORKING);
+	if (MALI_TRUE == working) {
+		/** Group to be disabled once it completes current work,
+		 * when virtual group completes, also check child groups for this flag */
+		mali_group_set_disable_request(group, MALI_TRUE);
+		return;
+	}
+
+	/* Put into disabled state */
+	if (group == gp_group) {
+		/* GP group */
+		MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state);
+		gp_group_state = EXEC_STATE_DISABLED;
+	} else {
+		if (mali_group_is_in_virtual(group)) {
+			/* A child group of virtual group. move the specific group from virtual group */
+			MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state);
+
+			mali_executor_set_state_pp_physical(group,
+							    &group_list_disabled,
+							    &group_list_disabled_count);
+
+			mali_group_remove_group(virtual_group, group);
+			mali_executor_disable_empty_virtual();
+		} else {
+			mali_executor_change_group_status_disabled(group);
+		}
+
+		--num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Disabling group id %d \n", group->pp_core->core_id));
+	}
+
+	if (MALI_GROUP_STATE_INACTIVE != group->state) {
+		if (MALI_TRUE == mali_group_deactivate(group)) {
+			mali_pm_update_async();
+		}
+	}
+}
+
+static void mali_executor_notify_core_change(u32 num_cores)
+{
+	mali_bool done = MALI_FALSE;
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		return;
+	}
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (0 == num_sessions_alloc) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (NULL == notobjs) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			/* there is probably no point in trying again, system must be really low on memory and probably unusable now anyway */
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE, sizeof(_mali_uk_pp_num_cores_changed_s));
+			if (NULL != notobjs[i]) {
+				_mali_uk_pp_num_cores_changed_s *data = notobjs[i]->result_buffer;
+				data->number_of_enabled_cores = num_cores;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (NULL != notobjs[used_notification_objects]) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (NULL != notobjs[used_notification_objects]) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+static mali_bool mali_executor_core_scaling_is_done(void *data)
+{
+	u32 i;
+	u32 num_groups;
+	mali_bool ret = MALI_TRUE;
+
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			if (MALI_TRUE == group->disable_requested && NULL != mali_group_get_pp_core(group)) {
+				ret = MALI_FALSE;
+				break;
+			}
+		}
+	}
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static void mali_executor_wq_notify_core_change(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		return;
+	}
+
+	_mali_osk_wait_queue_wait_event(executor_notify_core_change_wait_queue,
+					mali_executor_core_scaling_is_done, NULL);
+
+	mali_executor_notify_core_change(num_physical_pp_cores_enabled);
+}
+
+/**
+ * Clear all disable request from the _last_ core scaling behavior.
+ */
+static void mali_executor_core_scaling_reset(void)
+{
+	u32 i;
+	u32 num_groups;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			group->disable_requested = MALI_FALSE;
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		core_scaling_delay_up_mask[i] = 0;
+	}
+}
+
+static void mali_executor_core_scale(unsigned int target_core_nr)
+{
+	int current_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int target_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int i;
+
+	MALI_DEBUG_ASSERT(0 < target_core_nr);
+	MALI_DEBUG_ASSERT(num_physical_pp_cores_total >= target_core_nr);
+
+	mali_executor_lock();
+
+	if (target_core_nr < num_physical_pp_cores_enabled) {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: disabling %d cores\n", target_core_nr, num_physical_pp_cores_enabled - target_core_nr));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: enabling %d cores\n", target_core_nr, target_core_nr - num_physical_pp_cores_enabled));
+	}
+
+	/* When a new core scaling request is comming,  we should remove the un-doing
+	 * part of the last core scaling request.  It's safe because we have only one
+	 * lock(executor lock) protection. */
+	mali_executor_core_scaling_reset();
+
+	mali_pm_get_best_power_cost_mask(num_physical_pp_cores_enabled, current_core_scaling_mask);
+	mali_pm_get_best_power_cost_mask(target_core_nr, target_core_scaling_mask);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		target_core_scaling_mask[i] = target_core_scaling_mask[i] - current_core_scaling_mask[i];
+		MALI_DEBUG_PRINT(5, ("target_core_scaling_mask[%d] = %d\n", i, target_core_scaling_mask[i]));
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 > target_core_scaling_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_disable_internal(group);
+						target_core_scaling_mask[i]++;
+						if ((0 == target_core_scaling_mask[i])) {
+							break;
+						}
+
+					}
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		/**
+		 * Target_core_scaling_mask[i] is bigger than 0,
+		 * means we need to enable some pp cores in
+		 * this domain whose domain index is i.
+		 */
+		if (0 < target_core_scaling_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			if (num_physical_pp_cores_enabled >= target_core_nr) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_enable_internal(group);
+						target_core_scaling_mask[i]--;
+
+						if ((0 == target_core_scaling_mask[i]) || num_physical_pp_cores_enabled == target_core_nr) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * Here, we may still have some pp cores not been enabled because of some
+	 * pp cores need to be disabled are still in working state.
+	 */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 < target_core_scaling_mask[i]) {
+			core_scaling_delay_up_mask[i] = target_core_scaling_mask[i];
+		}
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group)
+{
+	int num_pp_cores_disabled = 0;
+	int num_pp_cores_to_enable = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_disable_requested(group));
+
+	/* Disable child group of virtual group */
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *child;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			if (MALI_TRUE == mali_group_disable_requested(child)) {
+				mali_group_set_disable_request(child, MALI_FALSE);
+				mali_executor_group_disable_internal(child);
+				num_pp_cores_disabled++;
+			}
+		}
+		mali_group_set_disable_request(group, MALI_FALSE);
+	} else {
+		mali_executor_group_disable_internal(group);
+		mali_group_set_disable_request(group, MALI_FALSE);
+		if (NULL != mali_group_get_pp_core(group)) {
+			num_pp_cores_disabled++;
+		}
+	}
+
+	num_pp_cores_to_enable = num_pp_cores_disabled;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 < core_scaling_delay_up_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			if (0 == num_pp_cores_to_enable) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *disabled_group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(disabled_group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(disabled_group) && mali_executor_group_is_in_state(disabled_group, EXEC_STATE_DISABLED)) {
+						mali_executor_group_enable_internal(disabled_group);
+						core_scaling_delay_up_mask[i]--;
+						num_pp_cores_to_enable--;
+
+						if ((0 == core_scaling_delay_up_mask[i]) || 0 == num_pp_cores_to_enable) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	_mali_osk_wait_queue_wake_up(executor_notify_core_change_wait_queue);
+}
+
+static void mali_executor_change_group_status_disabled(struct mali_group *group)
+{
+	/* Physical PP group */
+	mali_bool idle;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	idle = mali_executor_group_is_in_state(group, EXEC_STATE_IDLE);
+	if (MALI_TRUE == idle) {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_idle,
+						       &group_list_idle_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_inactive,
+						       &group_list_inactive_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	}
+}
+
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	if (group_list_idle_count > 0) {
+		if (mali_executor_has_virtual_group()) {
+
+			/* Rejoin virtual group on Mali-450 */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_executor_physical_rejoin_virtual(
+					    group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+			}
+		} else if (deactivate_idle_group) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			/* Deactivate group on Mali-300/400 */
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_group_deactivate(group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				/* Move from idle to inactive */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_inactive,
+								       &group_list_inactive_count);
+			}
+		}
+	}
+
+	return trigger_pm_update;
+}
+
+void mali_executor_running_status_print(void)
+{
+	struct mali_group *group = NULL;
+	struct mali_group *temp = NULL;
+
+	MALI_PRINT(("GP running job: %p\n", gp_group->gp_running_job));
+	if ((gp_group->gp_core) && (gp_group->is_working)) {
+		mali_group_dump_status(gp_group);
+	}
+	MALI_PRINT(("Physical PP groups in WORKING state (count = %u):\n", group_list_working_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		MALI_PRINT(("PP running job: %p, subjob %d \n", group->pp_running_job, group->pp_running_sub_job));
+		mali_group_dump_status(group);
+	}
+	MALI_PRINT(("Physical PP groups in INACTIVE state (count = %u):\n", group_list_inactive_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+	MALI_PRINT(("Physical PP groups in IDLE state (count = %u):\n", group_list_idle_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+	MALI_PRINT(("Physical PP groups in DISABLED state (count = %u):\n", group_list_disabled_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		MALI_PRINT(("Virtual group running job: %p\n", virtual_group->pp_running_job));
+		MALI_PRINT(("Virtual group status: %d\n", virtual_group_state));
+		MALI_PRINT(("Virtual group->status: %d\n", virtual_group->state));
+		MALI_PRINT(("\tSW power: %s\n", virtual_group->power_is_on ? "On" : "Off"));
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp, &virtual_group->group_list,
+					    struct mali_group, group_list) {
+			int i = 0;
+			MALI_PRINT(("\tchild group(%s) running job: %p\n", group->pp_core->hw_core.description, group->pp_running_job));
+			MALI_PRINT(("\tchild group(%s)->status: %d\n", group->pp_core->hw_core.description, group->state));
+			MALI_PRINT(("\tchild group(%s) SW power: %s\n", group->pp_core->hw_core.description, group->power_is_on ? "On" : "Off"));
+#if MALI_STATE_TRACKING
+			if (group->pm_domain) {
+				MALI_PRINT(("\tPower domain: id %u\n", mali_pm_domain_get_id(group->pm_domain)));
+				MALI_PRINT(("\tMask:0x%04x \n", mali_pm_domain_get_mask(group->pm_domain)));
+				MALI_PRINT(("\tUse-count:%u \n", mali_pm_domain_get_use_count(group->pm_domain)));
+				MALI_PRINT(("\tCurrent power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_current_mask()) ? "On" : "Off"));
+				MALI_PRINT(("\tWanted  power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_wanted_mask()) ? "On" : "Off"));
+			}
+#endif
+
+			for (i = 0; i < 2; i++) {
+				if (NULL != group->l2_cache_core[i]) {
+					struct mali_pm_domain *domain;
+					domain = mali_l2_cache_get_pm_domain(group->l2_cache_core[i]);
+					MALI_PRINT(("\t L2(index %d) group SW power: %s\n", i, group->l2_cache_core[i]->power_is_on ? "On" : "Off"));
+#if MALI_STATE_TRACKING
+					if (domain) {
+						MALI_PRINT(("\tL2 Power domain: id %u\n", mali_pm_domain_get_id(domain)));
+						MALI_PRINT(("\tL2 Mask:0x%04x \n", mali_pm_domain_get_mask(domain)));
+						MALI_PRINT(("\tL2 Use-count:%u \n", mali_pm_domain_get_use_count(domain)));
+						MALI_PRINT(("\tL2 Current power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_current_mask()) ? "On" : "Off"));
+						MALI_PRINT(("\tL2 Wanted  power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_wanted_mask()) ? "On" : "Off"));
+					}
+#endif
+				}
+			}
+		}
+		if (EXEC_STATE_WORKING == virtual_group_state) {
+			mali_group_dump_status(virtual_group);
+		}
+	}
+}
+
+void mali_executor_status_dump(void)
+{
+	mali_executor_lock();
+	mali_scheduler_lock();
+
+	/* print schedule queue status */
+	mali_scheduler_gp_pp_job_queue_print();
+
+	mali_scheduler_unlock();
+	mali_executor_unlock();
+}
diff --git a/mali/common/mali_executor.h b/mali/common/mali_executor.h
new file mode 100755
index 0000000..1d69dc3
--- /dev/null
+++ b/mali/common/mali_executor.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2012, 2014-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_EXECUTOR_H__
+#define __MALI_EXECUTOR_H__
+
+#include "mali_osk.h"
+#include "mali_scheduler_types.h"
+#include "mali_kernel_common.h"
+
+typedef enum {
+	MALI_EXECUTOR_HINT_GP_BOUND = 0
+#define MALI_EXECUTOR_HINT_MAX        1
+} mali_executor_hint;
+
+extern mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/* forward declare struct instead of using include */
+struct mali_session_data;
+struct mali_group;
+struct mali_pp_core;
+
+extern _mali_osk_spinlock_irq_t *mali_executor_lock_obj;
+
+#define MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+_mali_osk_errcode_t mali_executor_initialize(void);
+void mali_executor_terminate(void);
+
+void mali_executor_populate(void);
+void mali_executor_depopulate(void);
+
+void mali_executor_suspend(void);
+void mali_executor_resume(void);
+
+u32 mali_executor_get_num_cores_total(void);
+u32 mali_executor_get_num_cores_enabled(void);
+struct mali_pp_core *mali_executor_get_virtual_pp(void);
+struct mali_group *mali_executor_get_virtual_group(void);
+
+void mali_executor_zap_all_active(struct mali_session_data *session);
+
+/**
+ * Schedule GP and PP according to bitmask.
+ *
+ * @param mask A scheduling bitmask.
+ * @param deferred_schedule MALI_TRUE if schedule should be deferred, MALI_FALSE if not.
+ */
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule);
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group, mali_bool in_upper_half);
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups);
+void mali_executor_group_power_down(struct mali_group *groups[], u32 num_groups);
+
+void mali_executor_abort_session(struct mali_session_data *session);
+
+void mali_executor_core_scaling_enable(void);
+void mali_executor_core_scaling_disable(void);
+mali_bool mali_executor_core_scaling_is_enabled(void);
+
+void mali_executor_group_enable(struct mali_group *group);
+void mali_executor_group_disable(struct mali_group *group);
+mali_bool mali_executor_group_is_disabled(struct mali_group *group);
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override);
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size);
+#endif
+
+MALI_STATIC_INLINE void mali_executor_hint_enable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_executor_hint_disable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_executor_hint_is_enabled(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	return mali_executor_hints[hint];
+}
+
+void mali_executor_running_status_print(void);
+void mali_executor_status_dump(void);
+void mali_executor_lock(void);
+void mali_executor_unlock(void);
+#endif /* __MALI_EXECUTOR_H__ */
diff --git a/mali/common/mali_gp.c b/mali/common/mali_gp.c
new file mode 100755
index 0000000..60da8ac
--- /dev/null
+++ b/mali/common/mali_gp.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "regs/mali_gp_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+#include <mali_platform.h>
+
+static struct mali_gp_core *mali_global_gp_core = NULL;
+
+/* Interrupt handlers */
+static void mali_gp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group)
+{
+	struct mali_gp_core *core = NULL;
+
+	MALI_DEBUG_ASSERT(NULL == mali_global_gp_core);
+	MALI_DEBUG_PRINT(2, ("Mali GP: Creating Mali GP core: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_gp_core));
+	if (NULL != core) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALIGP2_REGISTER_ADDRESS_SPACE_SIZE)) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_gp_reset(core);
+
+			if (_MALI_OSK_ERR_OK == ret) {
+				ret = mali_group_add_gp_core(group, core);
+				if (_MALI_OSK_ERR_OK == ret) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_gp,
+								       group,
+								       mali_gp_irq_probe_trigger,
+								       mali_gp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (NULL != core->irq) {
+						MALI_DEBUG_PRINT(4, ("Mali GP: set global gp core from 0x%08X to 0x%08X\n", mali_global_gp_core, core));
+						mali_global_gp_core = core;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali GP: Failed to setup interrupt handlers for GP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_gp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali GP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for GP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_gp_delete(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+	mali_global_gp_core = NULL;
+	_mali_osk_free(core);
+}
+
+void mali_gp_stop_bus(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_gp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_SLOW; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS) & MALIGP2_REG_VAL_STATUS_BUS_STOPPED) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_SLOW == i) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to stop bus on %s\n", core->hw_core.description));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_gp_hard_reset(struct mali_gp_core *core)
+{
+	const u32 reset_wait_target_register = MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	const u32 reset_default_value = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(4, ("Mali GP: Hard reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali GP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_default_value); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+}
+
+void mali_gp_reset_async(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALI400GP_REG_VAL_IRQ_RESET_COMPLETED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALI400GP_REG_VAL_CMD_SOFT_RESET);
+
+}
+
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		rawstat = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+		if (rawstat & MALI400GP_REG_VAL_IRQ_RESET_COMPLETED) {
+			break;
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core)
+{
+	mali_gp_reset_async(core);
+	return mali_gp_reset_wait(core);
+}
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 startcmd = 0;
+	u32 *frame_registers = mali_gp_job_get_frame_registers(job);
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	if (mali_gp_job_has_vs_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_VS;
+	}
+
+	if (mali_gp_job_has_plbu_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_PLBU;
+	}
+
+	MALI_DEBUG_ASSERT(0 != startcmd);
+
+	mali_hw_core_register_write_array_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR, frame_registers, MALIGP2_NUM_REGS_FRAME);
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali GP: Starting job (0x%08x) on core %s with command 0x%08X\n", job, core->hw_core.description, startcmd));
+
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_GP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, startcmd);
+#else
+	{
+		u32 bits = 0;
+
+		if (mali_gp_job_has_vs_job(job))
+			bits = MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+		if (mali_gp_job_has_plbu_job(job))
+			bits |= MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+
+		mali_hw_core_register_write_relaxed(&core->hw_core,
+						    MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, bits);
+	}
+#endif
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr)
+{
+	u32 irq_readout;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+
+	if (irq_readout & MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | MALIGP2_REG_VAL_IRQ_HANG));
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); /* re-enable interrupts */
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR, start_addr);
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR, end_addr);
+
+		MALI_DEBUG_PRINT(3, ("Mali GP: Resuming job\n"));
+
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+		_mali_osk_write_mem_barrier();
+	}
+	/*
+	 * else: core has been reset between PLBU_OUT_OF_MEM interrupt and this new heap response.
+	 * A timeout or a page fault on Mali-200 PP core can cause this behaviour.
+	 */
+}
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void)
+{
+	return mali_global_gp_core;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_gp_irq_probe_trigger(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT);
+	if (MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+/* ------ local helper functions below --------- */
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tGP: %s\n", core->hw_core.description);
+
+	return n;
+}
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		val0 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_gp_job_set_perf_counter_value0(job, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C0, val0);
+		_mali_osk_profiling_record_global_counters(COUNTER_VP_0_C0, val0);
+#endif
+
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		val1 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_gp_job_set_perf_counter_value1(job, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C1, val1);
+		_mali_osk_profiling_record_global_counters(COUNTER_VP_0_C1, val1);
+#endif
+	}
+}
diff --git a/mali/common/mali_gp.h b/mali/common/mali_gp.h
new file mode 100755
index 0000000..ecbe70e
--- /dev/null
+++ b/mali/common/mali_gp.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_H__
+#define __MALI_GP_H__
+
+#include "mali_osk.h"
+#include "mali_gp_job.h"
+#include "mali_hw_core.h"
+#include "regs/mali_gp_regs.h"
+
+struct mali_group;
+
+/**
+ * Definition of the GP core struct
+ * Used to track a GP core in the system.
+ */
+struct mali_gp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_gp_initialize(void);
+void mali_gp_terminate(void);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group);
+void mali_gp_delete(struct mali_gp_core *core);
+
+void mali_gp_stop_bus(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core);
+void mali_gp_reset_async(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core);
+void mali_gp_hard_reset(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core);
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job);
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr);
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core);
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size);
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job);
+
+MALI_STATIC_INLINE const char *mali_gp_core_description(struct mali_gp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_gp_get_interrupt_result(struct mali_gp_core *core)
+{
+	u32 stat_used = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT) &
+			MALIGP2_REG_VAL_IRQ_MASK_USED;
+
+	if (0 == stat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if ((MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST |
+		    MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST) == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	} else if (MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_VS;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_PLBU;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM & stat_used) {
+		return MALI_INTERRUPT_RESULT_OOM;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_get_rawstat(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_is_active(struct mali_gp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS);
+	return (status & MALIGP2_REG_VAL_STATUS_MASK_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_gp_mask_all_interrupts(struct mali_gp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_gp_enable_interrupts(struct mali_gp_core *core, enum mali_interrupt_result exceptions)
+{
+	/* Enable all interrupts, except those specified in exceptions */
+	u32 value;
+
+	if (MALI_INTERRUPT_RESULT_SUCCESS_VS == exceptions) {
+		/* Enable all used except VS complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+	} else {
+		MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_SUCCESS_PLBU ==
+				  exceptions);
+		/* Enable all used except PLBU complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+	}
+
+	mali_hw_core_register_write(&core->hw_core,
+				    MALIGP2_REG_ADDR_MGMT_INT_MASK,
+				    value);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_read_plbu_alloc_start_addr(struct mali_gp_core *core)
+{
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR);
+}
+
+#endif /* __MALI_GP_H__ */
diff --git a/mali/common/mali_gp_job.c b/mali/common/mali_gp_job.c
new file mode 100755
index 0000000..fb8dcd8
--- /dev/null
+++ b/mali/common/mali_gp_job.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_defer_bind.h"
+
+static u32 gp_counter_src0 = MALI_HW_CORE_NO_COUNTER;      /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 gp_counter_src1 = MALI_HW_CORE_NO_COUNTER;           /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job);
+
+
+static int _mali_gp_add_varying_allocations(struct mali_session_data *session,
+		struct mali_gp_job *job,
+		u32 *alloc,
+		u32 num)
+{
+	int i = 0;
+	struct mali_gp_allocation_node *alloc_node;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	for (i = 0 ; i < num ; i++) {
+		MALI_DEBUG_ASSERT(alloc[i]);
+		alloc_node = _mali_osk_calloc(1, sizeof(struct mali_gp_allocation_node));
+		if (alloc_node) {
+			INIT_LIST_HEAD(&alloc_node->node);
+			/* find mali allocation structure by vaddress*/
+			mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, alloc[i], 0);
+
+			if (likely(mali_vma_node)) {
+				mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+				MALI_DEBUG_ASSERT(alloc[i] == mali_vma_node->vm_node.start);
+			} else {
+				MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,can't find allocation %d by address =0x%x, num=%d\n", i, alloc[i], num));
+				_mali_osk_free(alloc_node);
+				goto fail;
+			}
+			alloc_node->alloc = mali_alloc;
+			/* add to gp job varying alloc list*/
+			list_move(&alloc_node->node, &job->varying_alloc);
+		} else
+			goto fail;
+	}
+
+	return 0;
+fail:
+	MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,failed to alloc memory!\n"));
+	_mali_gp_del_varying_allocations(job);
+	return -1;
+}
+
+
+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job)
+{
+	struct mali_gp_allocation_node *alloc_node, *tmp_node;
+
+	list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) {
+		list_del(&alloc_node->node);
+		kfree(alloc_node);
+	}
+	INIT_LIST_HEAD(&job->varying_alloc);
+}
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker)
+{
+	struct mali_gp_job *job;
+	u32 perf_counter_flag;
+	u32 __user *memory_list = NULL;
+	struct mali_gp_allocation_node *alloc_node, *tmp_node;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_gp_job));
+	if (NULL != job) {
+		job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_FINISHED, sizeof(_mali_uk_gp_job_finished_s));
+		if (NULL == job->finished_notification) {
+			goto fail3;
+		}
+
+		job->oom_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_STALLED, sizeof(_mali_uk_gp_job_suspended_s));
+		if (NULL == job->oom_notification) {
+			goto fail2;
+		}
+
+		if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_gp_start_job_s))) {
+			goto fail1;
+		}
+
+		perf_counter_flag = mali_gp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			mali_gp_job_set_perf_counter_src0(job, mali_gp_job_get_gp_counter_src0());
+			mali_gp_job_set_perf_counter_src1(job, mali_gp_job_get_gp_counter_src1());
+		}
+
+		_mali_osk_list_init(&job->list);
+		job->session = session;
+		job->id = id;
+		job->heap_current_addr = job->uargs.frame_registers[4];
+		job->perf_counter_value0 = 0;
+		job->perf_counter_value1 = 0;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+
+		INIT_LIST_HEAD(&job->varying_alloc);
+		INIT_LIST_HEAD(&job->vary_todo);
+		job->dmem = NULL;
+
+		if (job->uargs.deferred_mem_num > session->allocation_mgr.mali_allocation_num) {
+			MALI_PRINT_ERROR(("Mali GP job: The number of  varying buffer to defer bind  is invalid !\n"));
+			goto fail1;
+		}
+
+		/* add varying allocation list*/
+		if (job->uargs.deferred_mem_num > 0) {
+			/* copy varying list from user space*/
+			job->varying_list = _mali_osk_calloc(1, sizeof(u32) * job->uargs.deferred_mem_num);
+			if (!job->varying_list) {
+				MALI_PRINT_ERROR(("Mali GP job: allocate varying_list failed varying_alloc_num = %d !\n", job->uargs.deferred_mem_num));
+				goto fail1;
+			}
+
+			memory_list = (u32 __user *)(uintptr_t)job->uargs.deferred_mem_list;
+
+			if (0 != _mali_osk_copy_from_user(job->varying_list, memory_list, sizeof(u32) * job->uargs.deferred_mem_num)) {
+				MALI_PRINT_ERROR(("Mali GP job: Failed to copy varying list from user space!\n"));
+				goto fail;
+			}
+
+			if (unlikely(_mali_gp_add_varying_allocations(session, job, job->varying_list,
+					job->uargs.deferred_mem_num))) {
+				MALI_PRINT_ERROR(("Mali GP job: _mali_gp_add_varying_allocations failed!\n"));
+				goto fail;
+			}
+
+			/* do preparetion for each allocation */
+			list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) {
+				if (unlikely(_MALI_OSK_ERR_OK != mali_mem_defer_bind_allocation_prepare(alloc_node->alloc, &job->vary_todo, &job->required_varying_memsize))) {
+					MALI_PRINT_ERROR(("Mali GP job: mali_mem_defer_bind_allocation_prepare failed!\n"));
+					goto fail;
+				}
+			}
+
+			_mali_gp_del_varying_allocations(job);
+
+			/* bind varying here, to avoid memory latency issue. */
+			{
+				struct mali_defer_mem_block dmem_block;
+
+				INIT_LIST_HEAD(&dmem_block.free_pages);
+				atomic_set(&dmem_block.num_free_pages, 0);
+
+				if (mali_mem_prepare_mem_for_job(job, &dmem_block)) {
+					MALI_PRINT_ERROR(("Mali GP job: mali_mem_prepare_mem_for_job failed!\n"));
+					goto fail;
+				}
+				if (_MALI_OSK_ERR_OK != mali_mem_defer_bind(job, &dmem_block)) {
+					MALI_PRINT_ERROR(("gp job create, mali_mem_defer_bind failed! GP %x fail!", job));
+					goto fail;
+				}
+			}
+
+			if (job->uargs.varying_memsize > MALI_UK_BIG_VARYING_SIZE) {
+				job->big_job = 1;
+			}
+		}
+		job->pp_tracker = pp_tracker;
+		if (NULL != job->pp_tracker) {
+			/* Take a reference on PP job's tracker that will be released when the GP
+			   job is done. */
+			mali_timeline_system_tracker_get(session->timeline_system, pp_tracker);
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_GP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		return job;
+	} else {
+		MALI_PRINT_ERROR(("Mali GP job: _mali_osk_calloc failed!\n"));
+		return NULL;
+	}
+
+
+fail:
+	_mali_osk_free(job->varying_list);
+	/* Handle allocate fail here, free all varying node */
+	{
+		struct mali_backend_bind_list *bkn, *bkn_tmp;
+		list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) {
+			list_del(&bkn->node);
+			_mali_osk_free(bkn);
+		}
+	}
+fail1:
+	_mali_osk_notification_delete(job->oom_notification);
+fail2:
+	_mali_osk_notification_delete(job->finished_notification);
+fail3:
+	_mali_osk_free(job);
+	return NULL;
+}
+
+void mali_gp_job_delete(struct mali_gp_job *job)
+{
+	struct mali_backend_bind_list *bkn, *bkn_tmp;
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(NULL == job->pp_tracker);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	_mali_osk_free(job->varying_list);
+
+	/* Handle allocate fail here, free all varying node */
+	list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) {
+		list_del(&bkn->node);
+		_mali_osk_free(bkn);
+	}
+
+	mali_mem_defer_dmem_free(job);
+
+	/* de-allocate the pre-allocated oom notifications */
+	if (NULL != job->oom_notification) {
+		_mali_osk_notification_delete(job->oom_notification);
+		job->oom_notification = NULL;
+	}
+	if (NULL != job->finished_notification) {
+		_mali_osk_notification_delete(job->finished_notification);
+		job->finished_notification = NULL;
+	}
+
+	_mali_osk_free(job);
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_gp_job *iter;
+	struct mali_gp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_gp_job, list) {
+
+		/* A span is used to handle job ID wrapping. */
+		bool job_is_after = (mali_gp_job_get_id(job) -
+				     mali_gp_job_get_id(iter)) <
+				    MALI_SCHEDULER_JOB_ID_SPAN;
+
+		if (job_is_after) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+u32 mali_gp_job_get_gp_counter_src0(void)
+{
+	return gp_counter_src0;
+}
+
+void mali_gp_job_set_gp_counter_src0(u32 counter)
+{
+	gp_counter_src0 = counter;
+}
+
+u32 mali_gp_job_get_gp_counter_src1(void)
+{
+	return gp_counter_src1;
+}
+
+void mali_gp_job_set_gp_counter_src1(u32 counter)
+{
+	gp_counter_src1 = counter;
+}
+
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (NULL != job->pp_tracker) {
+		schedule_mask |= mali_timeline_system_tracker_put(job->session->timeline_system, job->pp_tracker, MALI_FALSE == success);
+		job->pp_tracker = NULL;
+	}
+
+	return schedule_mask;
+}
diff --git a/mali/common/mali_gp_job.h b/mali/common/mali_gp_job.h
new file mode 100755
index 0000000..6a67543
--- /dev/null
+++ b/mali/common/mali_gp_job.h
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_JOB_H__
+#define __MALI_GP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_timeline.h"
+#include "mali_scheduler_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#include "mali_timeline.h"
+
+struct mali_defer_mem;
+/**
+ * This structure represents a GP job
+ *
+ * The GP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the GP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_gp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_gp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	struct mali_timeline_tracker *pp_tracker;          /**< Pointer to Timeline tracker for PP job that depends on this job. */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+
+	/*
+	 * These members are used by the executor and/or group,
+	 * protected by executor lock
+	 */
+	_mali_osk_notification_t *oom_notification;        /**< Notification sent back to userspace on OOM */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 heap_current_addr;                             /**< Holds the current HEAP address when the job has completed */
+	u32 perf_counter_value0;                           /**< Value of performance counter 0 (to be returned to user space) */
+	u32 perf_counter_value1;                           /**< Value of performance counter 1 (to be returned to user space) */
+	struct mali_defer_mem *dmem;                                          /** < used for defer bind to store dmem info */
+	struct list_head varying_alloc;                    /**< hold the list of varying allocations */
+	u32 bind_flag;                                     /** < flag for deferbind*/
+	u32 *varying_list;                                 /**< varying memory list need to to defer bind*/
+	struct list_head vary_todo;                        /**< list of backend list need to do defer bind*/
+	u32 required_varying_memsize;                      /** < size of varying memory to reallocate*/
+	u32 big_job;                                       /** < if the gp job have large varying output and may take long time*/
+};
+
+#define MALI_DEFER_BIND_MEMORY_PREPARED (0x1 << 0)
+#define MALI_DEFER_BIND_MEMORY_BINDED (0x1 << 2)
+
+struct mali_gp_allocation_node {
+	struct list_head node;
+	mali_mem_allocation *alloc;
+};
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker);
+void mali_gp_job_delete(struct mali_gp_job *job);
+
+u32 mali_gp_job_get_gp_counter_src0(void);
+void mali_gp_job_set_gp_counter_src0(u32 counter);
+u32 mali_gp_job_get_gp_counter_src1(void);
+void mali_gp_job_set_gp_counter_src1(u32 counter);
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_cache_order(struct mali_gp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_cache_order(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_gp_job_get_user_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_frame_builder_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_flush_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_pid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_tid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_frame_registers(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_gp_job_get_session(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_vs_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[0] != job->uargs.frame_registers[1]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_plbu_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[2] != job->uargs.frame_registers[3]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_current_heap_addr(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->heap_current_addr;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_current_heap_addr(struct mali_gp_job *job, u32 heap_addr)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->heap_current_addr = heap_addr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_flag(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src1;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src0(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src0 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src1(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src1 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value0(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0 = value;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value1(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1 = value;
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_gp_job_list_move(struct mali_gp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_gp_job_list_remove(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_gp_job_get_finished_notification(struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *mali_gp_job_get_oom_notification(
+	struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job->oom_notification);
+
+	notification = job->oom_notification;
+	job->oom_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_oom_notification(
+	struct mali_gp_job *job,
+	_mali_osk_notification_t *notification)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(NULL == job->oom_notification);
+	job->oom_notification = notification;
+}
+
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_gp_job_get_tracker(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_timeline_point_ptr(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+/**
+ * Release reference on tracker for PP job that depends on this GP job.
+ *
+ * @note If GP job has a reference on tracker, this function MUST be called before the GP job is
+ * deleted.
+ *
+ * @param job GP job that is done.
+ * @param success MALI_TRUE if job completed successfully, MALI_FALSE if not.
+ * @return A scheduling bitmask indicating whether scheduling needs to be done.
+ */
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success);
+
+#endif /* __MALI_GP_JOB_H__ */
diff --git a/mali/common/mali_group.c b/mali/common/mali_group.c
new file mode 100755
index 0000000..2878639
--- /dev/null
+++ b/mali/common/mali_group.c
@@ -0,0 +1,1949 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+#include "mali_kernel_common.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_mmu.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_scheduler.h"
+#include "mali_osk_profiling.h"
+#include "mali_osk_mali.h"
+#include "mali_pm_domain.h"
+#include "mali_pm.h"
+#include "mali_executor.h"
+#include <mali_platform.h>
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+
+#define MALI_MAX_NUM_DOMAIN_REFS (MALI_MAX_NUMBER_OF_GROUPS * 2)
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num);
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+static struct mali_group *mali_global_groups[MALI_MAX_NUMBER_OF_GROUPS] = { NULL, };
+static u32 mali_global_num_groups = 0;
+
+/* SW timer for job execution */
+int mali_max_job_runtime = MALI_MAX_JOB_RUNTIME_DEFAULT;
+
+/* local helper functions */
+static void mali_group_bottom_half_mmu(void *data);
+static void mali_group_bottom_half_gp(void *data);
+static void mali_group_bottom_half_pp(void *data);
+static void mali_group_timeout(void *data);
+static void mali_group_reset_pp(struct mali_group *group);
+static void mali_group_reset_mmu(struct mali_group *group);
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session, mali_bool is_reload);
+static void mali_group_recovery_reset(struct mali_group *group);
+
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+        struct mali_dlbu_core *dlbu,
+        struct mali_bcast_unit *bcast,
+        u32 domain_index)
+{
+    struct mali_group *group = NULL;
+
+    if (mali_global_num_groups >= MALI_MAX_NUMBER_OF_GROUPS) {
+        MALI_PRINT_ERROR(("Mali group: Too many group objects created\n"));
+        return NULL;
+    }
+
+    group = _mali_osk_calloc(1, sizeof(struct mali_group));
+    if (NULL != group) {
+        group->timeout_timer = _mali_osk_timer_init();
+        if (NULL != group->timeout_timer) {
+            _mali_osk_timer_setcallback(group->timeout_timer, mali_group_timeout, (void *)group);
+
+            group->l2_cache_core[0] = core;
+            _mali_osk_list_init(&group->group_list);
+            _mali_osk_list_init(&group->executor_list);
+            _mali_osk_list_init(&group->pm_domain_list);
+            group->bcast_core = bcast;
+            group->dlbu_core = dlbu;
+
+            /* register this object as a part of the correct power domain */
+            if ((NULL != core) || (NULL != dlbu) || (NULL != bcast))
+                group->pm_domain = mali_pm_register_group(domain_index, group);
+
+            mali_global_groups[mali_global_num_groups] = group;
+            mali_global_num_groups++;
+
+            return group;
+        }
+        _mali_osk_free(group);
+    }
+
+    return NULL;
+}
+
+void mali_group_delete(struct mali_group *group)
+{
+    u32 i;
+
+    MALI_DEBUG_PRINT(4, ("Deleting group %s\n",
+                mali_group_core_description(group)));
+
+    MALI_DEBUG_ASSERT(NULL == group->parent_group);
+    MALI_DEBUG_ASSERT((MALI_GROUP_STATE_INACTIVE == group->state) || ((MALI_GROUP_STATE_ACTIVATION_PENDING == group->state)));
+
+    /* Delete the resources that this group owns */
+    if (NULL != group->gp_core) {
+        mali_gp_delete(group->gp_core);
+    }
+
+    if (NULL != group->pp_core) {
+        mali_pp_delete(group->pp_core);
+    }
+
+    if (NULL != group->mmu) {
+        mali_mmu_delete(group->mmu);
+    }
+
+    if (mali_group_is_virtual(group)) {
+        /* Remove all groups from virtual group */
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            child->parent_group = NULL;
+            mali_group_delete(child);
+        }
+
+        mali_dlbu_delete(group->dlbu_core);
+
+        if (NULL != group->bcast_core) {
+            mali_bcast_unit_delete(group->bcast_core);
+        }
+    }
+
+    for (i = 0; i < mali_global_num_groups; i++) {
+        if (mali_global_groups[i] == group) {
+            mali_global_groups[i] = NULL;
+            mali_global_num_groups--;
+
+            if (i != mali_global_num_groups) {
+                /* We removed a group from the middle of the array -- move the last
+                 * group to the current position to close the gap */
+                mali_global_groups[i] = mali_global_groups[mali_global_num_groups];
+                mali_global_groups[mali_global_num_groups] = NULL;
+            }
+
+            break;
+        }
+    }
+
+    if (NULL != group->timeout_timer) {
+        _mali_osk_timer_del(group->timeout_timer);
+        _mali_osk_timer_term(group->timeout_timer);
+    }
+
+    if (NULL != group->bottom_half_work_mmu) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+    }
+
+    if (NULL != group->bottom_half_work_gp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_gp);
+    }
+
+    if (NULL != group->bottom_half_work_pp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_pp);
+    }
+
+    _mali_osk_free(group);
+}
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group, struct mali_mmu_core *mmu_core)
+{
+    /* This group object now owns the MMU core object */
+    group->mmu = mmu_core;
+    group->bottom_half_work_mmu = _mali_osk_wq_create_work(mali_group_bottom_half_mmu, group);
+    if (NULL == group->bottom_half_work_mmu) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_mmu_core(struct mali_group *group)
+{
+    /* This group object no longer owns the MMU core object */
+    group->mmu = NULL;
+    if (NULL != group->bottom_half_work_mmu) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+    }
+}
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group, struct mali_gp_core *gp_core)
+{
+    /* This group object now owns the GP core object */
+    group->gp_core = gp_core;
+    group->bottom_half_work_gp = _mali_osk_wq_create_work(mali_group_bottom_half_gp, group);
+    if (NULL == group->bottom_half_work_gp) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_gp_core(struct mali_group *group)
+{
+    /* This group object no longer owns the GP core object */
+    group->gp_core = NULL;
+    if (NULL != group->bottom_half_work_gp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_gp);
+    }
+}
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group, struct mali_pp_core *pp_core)
+{
+    /* This group object now owns the PP core object */
+    group->pp_core = pp_core;
+    group->bottom_half_work_pp = _mali_osk_wq_create_work(mali_group_bottom_half_pp, group);
+    if (NULL == group->bottom_half_work_pp) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_pp_core(struct mali_group *group)
+{
+    /* This group object no longer owns the PP core object */
+    group->pp_core = NULL;
+    if (NULL != group->bottom_half_work_pp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_pp);
+    }
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(4, ("Group: Activating group %s\n",
+                mali_group_core_description(group)));
+
+    if (MALI_GROUP_STATE_INACTIVE == group->state) {
+        /* Group is inactive, get PM refs in order to power up */
+
+        /*
+         * We'll take a maximum of 2 power domain references pr group,
+         * one for the group itself, and one for it's L2 cache.
+         */
+        struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+        struct mali_group *groups[MALI_MAX_NUM_DOMAIN_REFS];
+        u32 num_domains = 0;
+        mali_bool all_groups_on;
+
+        /* Deal with child groups first */
+        if (mali_group_is_virtual(group)) {
+            /*
+             * The virtual group might have 0, 1 or 2 L2s in
+             * its l2_cache_core array, but we ignore these and
+             * let the child groups take the needed L2 cache ref
+             * on behalf of the virtual group.
+             * In other words; The L2 refs are taken in pair with
+             * the physical group which the L2 is attached to.
+             */
+            struct mali_group *child;
+            struct mali_group *temp;
+
+            /*
+             * Child group is inactive, get PM
+             * refs in order to power up.
+             */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp,
+                    &group->group_list,
+                    struct mali_group, group_list) {
+                MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE
+                        == child->state);
+
+                child->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+                MALI_DEBUG_ASSERT_POINTER(
+                        child->pm_domain);
+                domains[num_domains] = child->pm_domain;
+                groups[num_domains] = child;
+                num_domains++;
+
+                /*
+                 * Take L2 domain ref for child group.
+                 */
+                MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS
+                        > num_domains);
+                domains[num_domains] = mali_l2_cache_get_pm_domain(
+                        child->l2_cache_core[0]);
+                groups[num_domains] = NULL;
+                MALI_DEBUG_ASSERT(NULL ==
+                        child->l2_cache_core[1]);
+                num_domains++;
+            }
+        } else {
+            /* Take L2 domain ref for physical groups. */
+            MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                    num_domains);
+
+            domains[num_domains] = mali_l2_cache_get_pm_domain(
+                    group->l2_cache_core[0]);
+            groups[num_domains] = NULL;
+            MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]);
+            num_domains++;
+        }
+
+        /* Do the group itself last (it's dependencies first) */
+
+        group->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+        MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+        domains[num_domains] = group->pm_domain;
+        groups[num_domains] = group;
+        num_domains++;
+
+        all_groups_on = mali_pm_get_domain_refs(domains, groups,
+                num_domains);
+
+        /*
+         * Complete activation for group, include
+         * virtual group or physical group.
+         */
+        if (MALI_TRUE == all_groups_on) {
+
+            mali_group_set_active(group);
+        }
+    } else if (MALI_GROUP_STATE_ACTIVE == group->state) {
+        /* Already active */
+        MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+    } else {
+        /*
+         * Activation already pending, group->power_is_on could
+         * be both true or false. We need to wait for power up
+         * notification anyway.
+         */
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING
+                == group->state);
+    }
+
+    MALI_DEBUG_PRINT(4, ("Group: group %s activation result: %s\n",
+                mali_group_core_description(group),
+                MALI_GROUP_STATE_ACTIVE == group->state ?
+                "ACTIVE" : "PENDING"));
+
+    return group->state;
+}
+
+mali_bool mali_group_set_active(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING == group->state);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+
+    MALI_DEBUG_PRINT(4, ("Group: Activation completed for %s\n",
+                mali_group_core_description(group)));
+
+    if (mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+                struct mali_group, group_list) {
+            if (MALI_TRUE != child->power_is_on) {
+                return MALI_FALSE;
+            }
+
+            child->state = MALI_GROUP_STATE_ACTIVE;
+        }
+
+        mali_group_reset(group);
+    }
+
+    /* Go to ACTIVE state */
+    group->state = MALI_GROUP_STATE_ACTIVE;
+
+    return MALI_TRUE;
+}
+
+mali_bool mali_group_deactivate(struct mali_group *group)
+{
+    struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+    u32 num_domains = 0;
+    mali_bool power_down = MALI_FALSE;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE != group->state);
+
+    MALI_DEBUG_PRINT(3, ("Group: Deactivating group %s\n",
+                mali_group_core_description(group)));
+
+    group->state = MALI_GROUP_STATE_INACTIVE;
+
+    MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+    domains[num_domains] = group->pm_domain;
+    num_domains++;
+
+    if (mali_group_is_virtual(group)) {
+        /* Release refs for all child groups */
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp,
+                &group->group_list,
+                struct mali_group, group_list) {
+            child->state = MALI_GROUP_STATE_INACTIVE;
+
+            MALI_DEBUG_ASSERT_POINTER(child->pm_domain);
+            domains[num_domains] = child->pm_domain;
+            num_domains++;
+
+            /* Release L2 cache domain for child groups */
+            MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                    num_domains);
+            domains[num_domains] = mali_l2_cache_get_pm_domain(
+                    child->l2_cache_core[0]);
+            MALI_DEBUG_ASSERT(NULL == child->l2_cache_core[1]);
+            num_domains++;
+        }
+
+        /*
+         * Must do mali_group_power_down() steps right here for
+         * virtual group, because virtual group itself is likely to
+         * stay powered on, however child groups are now very likely
+         * to be powered off (and thus lose their state).
+         */
+
+        mali_group_clear_session(group);
+        /*
+         * Disable the broadcast unit (clear it's mask).
+         * This is needed in case the GPU isn't actually
+         * powered down at this point and groups are
+         * removed from an inactive virtual group.
+         * If not, then the broadcast unit will intercept
+         * their interrupts!
+         */
+        mali_bcast_disable(group->bcast_core);
+    } else {
+        /* Release L2 cache domain for physical groups */
+        MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                num_domains);
+        domains[num_domains] = mali_l2_cache_get_pm_domain(
+                group->l2_cache_core[0]);
+        MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]);
+        num_domains++;
+    }
+
+    power_down = mali_pm_put_domain_refs(domains, num_domains);
+
+    return power_down;
+}
+
+void mali_group_power_up(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Power up for %s\n",
+                mali_group_core_description(group)));
+
+    group->power_is_on = MALI_TRUE;
+
+    if (MALI_FALSE == mali_group_is_virtual(group)
+            && MALI_FALSE == mali_group_is_in_virtual(group)) {
+        mali_group_reset(group);
+    }
+
+    /*
+     * When we just acquire only one physical group form virt group,
+     * we should remove the bcast&dlbu mask from virt group and
+     * reset bcast and dlbu core, although part of pp cores in virt
+     * group maybe not be powered on.
+     */
+    if (MALI_TRUE == mali_group_is_virtual(group)) {
+        mali_bcast_reset(group->bcast_core);
+        mali_dlbu_update_mask(group->dlbu_core);
+    }
+}
+
+void mali_group_power_down(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Power down for %s\n",
+                mali_group_core_description(group)));
+
+    group->power_is_on = MALI_FALSE;
+
+    if (mali_group_is_virtual(group)) {
+        /*
+         * What we do for physical jobs in this function should
+         * already have been done in mali_group_deactivate()
+         * for virtual group.
+         */
+        MALI_DEBUG_ASSERT(NULL == group->session);
+    } else {
+        mali_group_clear_session(group);
+    }
+}
+
+MALI_DEBUG_CODE(static void mali_group_print_virtual(struct mali_group *vgroup)
+        {
+        u32 i;
+        struct mali_group *group;
+        struct mali_group *temp;
+
+        MALI_DEBUG_PRINT(4, ("Virtual group %s (%p)\n",
+                mali_group_core_description(vgroup),
+                vgroup));
+        MALI_DEBUG_PRINT(4, ("l2_cache_core[0] = %p, ref = %d\n", vgroup->l2_cache_core[0], vgroup->l2_cache_core_ref_count[0]));
+        MALI_DEBUG_PRINT(4, ("l2_cache_core[1] = %p, ref = %d\n", vgroup->l2_cache_core[1], vgroup->l2_cache_core_ref_count[1]));
+
+        i = 0;
+        _MALI_OSK_LIST_FOREACHENTRY(group, temp, &vgroup->group_list, struct mali_group, group_list) {
+        MALI_DEBUG_PRINT(4, ("[%d] %s (%p), l2_cache_core[0] = %p\n",
+                i, mali_group_core_description(group),
+                group, group->l2_cache_core[0]));
+        i++;
+        }
+        })
+
+static void mali_group_dump_core_status(struct mali_group *group)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(NULL != group->gp_core || (NULL != group->pp_core && !mali_group_is_virtual(group)));
+
+	if (NULL != group->gp_core) {
+		MALI_PRINT(("Dump Group %s\n", group->gp_core->hw_core.description));
+
+		for (i = 0; i < 0xA8; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->gp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 12)));
+		}
+
+
+	} else {
+		MALI_PRINT(("Dump Group %s\n", group->pp_core->hw_core.description));
+
+		for (i = 0; i < 0x5c; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 12)));
+		}
+
+		/* Ignore some minor registers */
+		for (i = 0x1000; i < 0x1068; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 12)));
+		}
+	}
+
+	MALI_PRINT(("Dump Group MMU\n"));
+	for (i = 0; i < 0x24; i += 0x10) {
+		MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->mmu->hw_core, i),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 4),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 8),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 12)));
+	}
+}
+
+
+/**
+ * @Dump group status
+ */
+void mali_group_dump_status(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *group_c;
+		struct mali_group *temp;
+		_MALI_OSK_LIST_FOREACHENTRY(group_c, temp, &group->group_list, struct mali_group, group_list) {
+			mali_group_dump_core_status(group_c);
+		}
+	} else {
+		mali_group_dump_core_status(group);
+	}
+}
+
+/**
+ * @brief Add child group to virtual group parent
+ */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child)
+{
+    mali_bool found;
+    u32 i;
+
+    MALI_DEBUG_PRINT(3, ("Adding group %s to virtual group %s\n",
+                mali_group_core_description(child),
+                mali_group_core_description(parent)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+    MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+    MALI_DEBUG_ASSERT(NULL == child->parent_group);
+
+    _mali_osk_list_addtail(&child->group_list, &parent->group_list);
+
+    child->parent_group = parent;
+
+    MALI_DEBUG_ASSERT_POINTER(child->l2_cache_core[0]);
+
+    MALI_DEBUG_PRINT(4, ("parent->l2_cache_core: [0] = %p, [1] = %p\n", parent->l2_cache_core[0], parent->l2_cache_core[1]));
+    MALI_DEBUG_PRINT(4, ("child->l2_cache_core: [0] = %p, [1] = %p\n", child->l2_cache_core[0], child->l2_cache_core[1]));
+
+    /* Keep track of the L2 cache cores of child groups */
+    found = MALI_FALSE;
+    for (i = 0; i < 2; i++) {
+        if (parent->l2_cache_core[i] == child->l2_cache_core[0]) {
+            MALI_DEBUG_ASSERT(parent->l2_cache_core_ref_count[i] > 0);
+            parent->l2_cache_core_ref_count[i]++;
+            found = MALI_TRUE;
+        }
+    }
+
+    if (!found) {
+        /* First time we see this L2 cache, add it to our list */
+        i = (NULL == parent->l2_cache_core[0]) ? 0 : 1;
+
+        MALI_DEBUG_PRINT(4, ("First time we see l2_cache %p. Adding to [%d] = %p\n", child->l2_cache_core[0], i, parent->l2_cache_core[i]));
+
+        MALI_DEBUG_ASSERT(NULL == parent->l2_cache_core[i]);
+
+        parent->l2_cache_core[i] = child->l2_cache_core[0];
+        parent->l2_cache_core_ref_count[i]++;
+    }
+
+    /* Update Broadcast Unit and DLBU */
+    mali_bcast_add_group(parent->bcast_core, child);
+    mali_dlbu_add_group(parent->dlbu_core, child);
+
+    if (MALI_TRUE == parent->power_is_on) {
+        mali_bcast_reset(parent->bcast_core);
+        mali_dlbu_update_mask(parent->dlbu_core);
+    }
+
+    if (MALI_TRUE == child->power_is_on) {
+        if (NULL == parent->session) {
+            if (NULL != child->session) {
+                /*
+                 * Parent has no session, so clear
+                 * child session as well.
+                 */
+                mali_mmu_activate_empty_page_directory(child->mmu);
+            }
+        } else {
+            if (parent->session == child->session) {
+                /* We already have same session as parent,
+                 * so a simple zap should be enough.
+                 */
+                mali_mmu_zap_tlb(child->mmu);
+            } else {
+                /*
+                 * Parent has a different session, so we must
+                 * switch to that sessions page table
+                 */
+                mali_mmu_activate_page_directory(child->mmu, mali_session_get_page_directory(parent->session));
+            }
+
+            /* It is the parent which keeps the session from now on */
+            child->session = NULL;
+        }
+    } else {
+        /* should have been cleared when child was powered down */
+        MALI_DEBUG_ASSERT(NULL == child->session);
+    }
+
+    /* Start job on child when parent is active */
+    if (NULL != parent->pp_running_job) {
+        struct mali_pp_job *job = parent->pp_running_job;
+
+        MALI_DEBUG_PRINT(3, ("Group %x joining running job %d on virtual group %x\n",
+                    child, mali_pp_job_get_id(job), parent));
+
+        /* Only allowed to add active child to an active parent */
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == parent->state);
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == child->state);
+
+        mali_pp_job_start(child->pp_core, job, mali_pp_core_get_id(child->pp_core), MALI_TRUE);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_pp_core_description(group->pp_core),
+                sched_clock(), mali_pp_job_get_tid(job),
+                0, mali_pp_job_get_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+    }
+
+    MALI_DEBUG_CODE(mali_group_print_virtual(parent);)
+}
+
+/**
+ * @brief Remove child group from virtual group parent
+ */
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child)
+{
+    u32 i;
+
+    MALI_DEBUG_PRINT(3, ("Removing group %s from virtual group %s\n",
+                mali_group_core_description(child),
+                mali_group_core_description(parent)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+    MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+    MALI_DEBUG_ASSERT(parent == child->parent_group);
+
+    /* Update Broadcast Unit and DLBU */
+    mali_bcast_remove_group(parent->bcast_core, child);
+    mali_dlbu_remove_group(parent->dlbu_core, child);
+
+    if (MALI_TRUE == parent->power_is_on) {
+        mali_bcast_reset(parent->bcast_core);
+        mali_dlbu_update_mask(parent->dlbu_core);
+    }
+
+    child->session = parent->session;
+    child->parent_group = NULL;
+
+    _mali_osk_list_delinit(&child->group_list);
+    if (_mali_osk_list_empty(&parent->group_list)) {
+        parent->session = NULL;
+    }
+
+    /* Keep track of the L2 cache cores of child groups */
+    i = (child->l2_cache_core[0] == parent->l2_cache_core[0]) ? 0 : 1;
+
+    MALI_DEBUG_ASSERT(child->l2_cache_core[0] == parent->l2_cache_core[i]);
+
+    parent->l2_cache_core_ref_count[i]--;
+    if (parent->l2_cache_core_ref_count[i] == 0) {
+        parent->l2_cache_core[i] = NULL;
+    }
+
+    MALI_DEBUG_CODE(mali_group_print_virtual(parent));
+}
+
+struct mali_group *mali_group_acquire_group(struct mali_group *parent)
+{
+    struct mali_group *child = NULL;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+
+    if (!_mali_osk_list_empty(&parent->group_list)) {
+        child = _MALI_OSK_LIST_ENTRY(parent->group_list.prev, struct mali_group, group_list);
+        mali_group_remove_group(parent, child);
+    }
+
+    if (NULL != child) {
+        if (MALI_GROUP_STATE_ACTIVE != parent->state
+                && MALI_TRUE == child->power_is_on) {
+            mali_group_reset(child);
+        }
+    }
+
+    return child;
+}
+
+void mali_group_reset(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(NULL == group->gp_running_job);
+    MALI_DEBUG_ASSERT(NULL == group->pp_running_job);
+    MALI_DEBUG_ASSERT(NULL == group->session);
+
+    MALI_DEBUG_PRINT(3, ("Group: reset of %s\n",
+                mali_group_core_description(group)));
+
+    if (NULL != group->dlbu_core) {
+        mali_dlbu_reset(group->dlbu_core);
+    }
+
+    if (NULL != group->bcast_core) {
+        mali_bcast_reset(group->bcast_core);
+    }
+
+    MALI_DEBUG_ASSERT(NULL != group->mmu);
+    mali_group_reset_mmu(group);
+
+    if (NULL != group->gp_core) {
+        MALI_DEBUG_ASSERT(NULL == group->pp_core);
+        mali_gp_reset(group->gp_core);
+    } else {
+        MALI_DEBUG_ASSERT(NULL != group->pp_core);
+        mali_group_reset_pp(group);
+    }
+}
+
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job, mali_bool gpu_secure_mode_pre_enabled)
+{
+    struct mali_session_data *session;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Starting GP job 0x%08X on group %s\n",
+                job,
+                mali_group_core_description(group)));
+
+    session = mali_gp_job_get_session(job);
+
+    MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+    mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_gp_job_get_cache_order(job));
+
+	/* Reset GPU and disable gpu secure mode if needed. */
+	if (MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+		_mali_osk_gpu_reset_and_secure_mode_disable();
+		/* Need to disable the pmu interrupt mask register */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+		}
+	}
+
+	/* Reload mmu page table if needed */
+	if (MALI_TRUE == gpu_secure_mode_pre_enabled) {
+		mali_group_reset(group);
+		mali_group_activate_page_directory(group, session, MALI_TRUE);
+	} else {
+		mali_group_activate_page_directory(group, session, MALI_FALSE);
+	}
+
+    mali_gp_job_start(group->gp_core, job);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0) |
+            MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+            mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job), 0, 0, 0);
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+            mali_gp_job_get_pid(job), mali_gp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+    trace_mali_core_active(mali_gp_job_get_pid(job), 1 /* active */, 1 /* GP */,  0 /* core */,
+            mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+    if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+            (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+        mali_group_report_l2_cache_counters_per_core(group, 0);
+    }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+    trace_gpu_sched_switch(mali_gp_core_description(group->gp_core),
+            sched_clock(), mali_gp_job_get_tid(job),
+            0, mali_gp_job_get_id(job));
+#endif
+
+    group->gp_running_job = job;
+    group->is_working = MALI_TRUE;
+
+    /* Setup SW timer and record start time */
+    group->start_time = _mali_osk_time_tickcount();
+    _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+    MALI_DEBUG_PRINT(4, ("Group: Started GP job 0x%08X on group %s at %u\n",
+                job,
+                mali_group_core_description(group),
+                group->start_time));
+}
+
+/* Used to set all the registers except frame renderer list address and fragment shader stack address
+ * It means the caller must set these two registers properly before calling this function
+ */
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job, mali_bool gpu_secure_mode_pre_enabled)
+{
+    struct mali_session_data *session;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Starting PP job 0x%08X part %u/%u on group %s\n",
+                job, sub_job + 1,
+                mali_pp_job_get_sub_job_count(job),
+                mali_group_core_description(group)));
+
+    session = mali_pp_job_get_session(job);
+
+    if (NULL != group->l2_cache_core[0]) {
+        mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_pp_job_get_cache_order(job));
+    }
+
+    if (NULL != group->l2_cache_core[1]) {
+        mali_l2_cache_invalidate_conditional(group->l2_cache_core[1], mali_pp_job_get_cache_order(job));
+    }
+
+	/* Reset GPU and change gpu secure mode if needed. */
+	if (MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+		_mali_osk_gpu_reset_and_secure_mode_enable();
+		/* Need to disable the pmu interrupt mask register */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+		}
+	} else if (MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+		_mali_osk_gpu_reset_and_secure_mode_disable();
+		/* Need to disable the pmu interrupt mask register */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+		}
+	}
+
+	/* Reload the mmu page table if needed */
+	if ((MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == gpu_secure_mode_pre_enabled)
+	    || (MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == gpu_secure_mode_pre_enabled)) {
+		mali_group_reset(group);
+		mali_group_activate_page_directory(group, session, MALI_TRUE);
+	} else {
+		mali_group_activate_page_directory(group, session, MALI_FALSE);
+	}
+
+    if (mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+        u32 core_num = 0;
+
+        MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+        /* Configure DLBU for the job */
+        mali_dlbu_config_job(group->dlbu_core, job);
+
+        /* Write stack address for each child group */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            mali_pp_write_addr_stack(child->pp_core, job);
+            core_num++;
+        }
+
+        mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+    } else {
+        mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+    }
+
+    /* if the group is virtual, loop through physical groups which belong to this group
+     * and call profiling events for its cores as virtual */
+    if (MALI_TRUE == mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                    mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                    mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+            trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                    mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+        }
+
+#if defined(CONFIG_MALI400_PROFILING)
+        if (0 != group->l2_cache_core_ref_count[0]) {
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+            }
+        }
+        if (0 != group->l2_cache_core_ref_count[1]) {
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+            }
+        }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+    } else { /* group is physical - call profiling events for physical cores */
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+                mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+            mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+        }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+    }
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+    trace_gpu_sched_switch(mali_pp_core_description(group->pp_core),
+            sched_clock(), mali_pp_job_get_tid(job),
+            0, mali_pp_job_get_id(job));
+#endif
+
+    group->pp_running_job = job;
+    group->pp_running_sub_job = sub_job;
+    group->is_working = MALI_TRUE;
+
+    /* Setup SW timer and record start time */
+    group->start_time = _mali_osk_time_tickcount();
+    _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+    MALI_DEBUG_PRINT(4, ("Group: Started PP job 0x%08X part %u/%u on group %s at %u\n",
+                job, sub_job + 1,
+                mali_pp_job_get_sub_job_count(job),
+                mali_group_core_description(group),
+                group->start_time));
+
+}
+
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr)
+{
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+    mali_l2_cache_invalidate(group->l2_cache_core[0]);
+
+    mali_mmu_zap_tlb_without_stall(group->mmu);
+
+    mali_gp_resume_with_new_heap(group->gp_core, start_addr, end_addr);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+            0, 0, 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+    trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 1 /* active */, 1 /* GP */,  0 /* core */,
+            mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+}
+
+static void mali_group_reset_mmu(struct mali_group *group)
+{
+    struct mali_group *child;
+    struct mali_group *temp;
+    _mali_osk_errcode_t err;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (!mali_group_is_virtual(group)) {
+        /* This is a physical group or an idle virtual group -- simply wait for
+         * the reset to complete. */
+        err = mali_mmu_reset(group->mmu);
+        MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+    } else { /* virtual group */
+        /* Loop through all members of this virtual group and wait
+         * until they are done resetting.
+         */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            err = mali_mmu_reset(child->mmu);
+            MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+        }
+    }
+}
+
+static void mali_group_reset_pp(struct mali_group *group)
+{
+    struct mali_group *child;
+    struct mali_group *temp;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    mali_pp_reset_async(group->pp_core);
+
+    if (!mali_group_is_virtual(group) || NULL == group->pp_running_job) {
+        /* This is a physical group or an idle virtual group -- simply wait for
+         * the reset to complete. */
+        mali_pp_reset_wait(group->pp_core);
+    } else {
+        /* Loop through all members of this virtual group and wait until they
+         * are done resetting.
+         */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            mali_pp_reset_wait(child->pp_core);
+        }
+    }
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job)
+{
+    struct mali_pp_job *pp_job_to_return;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_running_job);
+    MALI_DEBUG_ASSERT_POINTER(sub_job);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working);
+
+    /* Stop/clear the timeout timer. */
+    _mali_osk_timer_del_async(group->timeout_timer);
+
+    if (NULL != group->pp_running_job) {
+
+        /* Deal with HW counters and profiling */
+
+        if (MALI_TRUE == mali_group_is_virtual(group)) {
+            struct mali_group *child;
+            struct mali_group *temp;
+
+            /* update performance counters from each physical pp core within this virtual group */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                mali_pp_update_performance_counters(group->pp_core, child->pp_core, group->pp_running_job, mali_pp_core_get_id(child->pp_core));
+            }
+
+#if defined(CONFIG_MALI400_PROFILING)
+            /* send profiling data per physical core */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                        MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                        MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                        mali_pp_job_get_perf_counter_value0(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+                        mali_pp_job_get_perf_counter_value1(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+                        mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+                        0, 0);
+
+                trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+                        0 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                        mali_pp_job_get_frame_builder_id(group->pp_running_job),
+                        mali_pp_job_get_flush_id(group->pp_running_job));
+            }
+            if (0 != group->l2_cache_core_ref_count[0]) {
+                if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                        (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+                }
+            }
+            if (0 != group->l2_cache_core_ref_count[1]) {
+                if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) &&
+                        (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) {
+                    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+                }
+            }
+
+#endif
+        } else {
+            /* update performance counters for a physical group's pp core */
+            mali_pp_update_performance_counters(group->pp_core, group->pp_core, group->pp_running_job, group->pp_running_sub_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+                    mali_pp_job_get_perf_counter_value0(group->pp_running_job, group->pp_running_sub_job),
+                    mali_pp_job_get_perf_counter_value1(group->pp_running_job, group->pp_running_sub_job),
+                    mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+                    0, 0);
+
+            trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+                    0 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+                    mali_pp_job_get_frame_builder_id(group->pp_running_job),
+                    mali_pp_job_get_flush_id(group->pp_running_job));
+
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+            }
+#endif
+        }
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_gp_core_description(group->gp_core),
+                sched_clock(), 0, 0, 0);
+#endif
+
+    }
+
+    if (success) {
+        /* Only do soft reset for successful jobs, a full recovery
+         * reset will be done for failed jobs. */
+        mali_pp_reset_async(group->pp_core);
+    }
+
+    pp_job_to_return = group->pp_running_job;
+    group->pp_running_job = NULL;
+    group->is_working = MALI_FALSE;
+    *sub_job = group->pp_running_sub_job;
+
+    if (!success) {
+        MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+        mali_group_recovery_reset(group);
+    } else if (_MALI_OSK_ERR_OK != mali_pp_reset_wait(group->pp_core)) {
+        MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+        mali_group_recovery_reset(group);
+    }
+
+    return pp_job_to_return;
+}
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success)
+{
+    struct mali_gp_job *gp_job_to_return;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_running_job);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working);
+
+    /* Stop/clear the timeout timer. */
+    _mali_osk_timer_del_async(group->timeout_timer);
+
+    if (NULL != group->gp_running_job) {
+        mali_gp_update_performance_counters(group->gp_core, group->gp_running_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+                mali_gp_job_get_perf_counter_value0(group->gp_running_job),
+                mali_gp_job_get_perf_counter_value1(group->gp_running_job),
+                mali_gp_job_get_perf_counter_src0(group->gp_running_job) | (mali_gp_job_get_perf_counter_src1(group->gp_running_job) << 8),
+                0, 0);
+
+        if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0])))
+            mali_group_report_l2_cache_counters_per_core(group, 0);
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_pp_core_description(group->pp_core),
+                sched_clock(), 0, 0, 0);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 0 /* active */, 1 /* GP */,  0 /* core */,
+                mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+
+        mali_gp_job_set_current_heap_addr(group->gp_running_job,
+                mali_gp_read_plbu_alloc_start_addr(group->gp_core));
+    }
+
+    if (success) {
+        /* Only do soft reset for successful jobs, a full recovery
+         * reset will be done for failed jobs. */
+        mali_gp_reset_async(group->gp_core);
+    }
+
+    gp_job_to_return = group->gp_running_job;
+    group->gp_running_job = NULL;
+    group->is_working = MALI_FALSE;
+
+    if (!success) {
+        MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+        mali_group_recovery_reset(group);
+    } else if (_MALI_OSK_ERR_OK != mali_gp_reset_wait(group->gp_core)) {
+        MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+        mali_group_recovery_reset(group);
+    }
+
+    return gp_job_to_return;
+}
+
+struct mali_group *mali_group_get_glob_group(u32 index)
+{
+    if (mali_global_num_groups > index) {
+        return mali_global_groups[index];
+    }
+
+    return NULL;
+}
+
+u32 mali_group_get_glob_num_groups(void)
+{
+    return mali_global_num_groups;
+}
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session, mali_bool is_reload)
+{
+    MALI_DEBUG_PRINT(5, ("Mali group: Activating page directory 0x%08X from session 0x%08X on group %s\n",
+                mali_session_get_page_directory(session), session,
+                mali_group_core_description(group)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (group->session != session || MALI_TRUE == is_reload) {
+		/* Different session than last time, so we need to do some work */
+		MALI_DEBUG_PRINT(5, ("Mali group: Activate session: %08x previous: %08x on group %s\n",
+				     session, group->session,
+				     mali_group_core_description(group)));
+		mali_mmu_activate_page_directory(group->mmu, mali_session_get_page_directory(session));
+		group->session = session;
+	} else {
+		/* Same session as last time, so no work required */
+		MALI_DEBUG_PRINT(4, ("Mali group: Activate existing session 0x%08X on group %s\n",
+				     session->page_directory,
+				     mali_group_core_description(group)));
+		mali_mmu_zap_tlb_without_stall(group->mmu);
+	}
+}
+
+static void mali_group_recovery_reset(struct mali_group *group)
+{
+    _mali_osk_errcode_t err;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    /* Stop cores, bus stop */
+    if (NULL != group->pp_core) {
+        mali_pp_stop_bus(group->pp_core);
+    } else {
+        mali_gp_stop_bus(group->gp_core);
+    }
+
+    /* Flush MMU and clear page fault (if any) */
+    mali_mmu_activate_fault_flush_page_directory(group->mmu);
+    mali_mmu_page_fault_done(group->mmu);
+
+    /* Wait for cores to stop bus, then do a hard reset on them */
+    if (NULL != group->pp_core) {
+        if (mali_group_is_virtual(group)) {
+            struct mali_group *child, *temp;
+
+            /* Disable the broadcast unit while we do reset directly on the member cores. */
+            mali_bcast_disable(group->bcast_core);
+
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                mali_pp_stop_bus_wait(child->pp_core);
+                mali_pp_hard_reset(child->pp_core);
+            }
+
+            mali_bcast_enable(group->bcast_core);
+        } else {
+            mali_pp_stop_bus_wait(group->pp_core);
+            mali_pp_hard_reset(group->pp_core);
+        }
+    } else {
+        mali_gp_stop_bus_wait(group->gp_core);
+        mali_gp_hard_reset(group->gp_core);
+    }
+
+    /* Reset MMU */
+    err = mali_mmu_reset(group->mmu);
+    MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+    MALI_IGNORE(err);
+
+    group->session = NULL;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size)
+{
+    int n = 0;
+    int i;
+    struct mali_group *child;
+    struct mali_group *temp;
+
+    if (mali_group_is_virtual(group)) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Virtual PP Group: %p\n", group);
+    } else if (mali_group_is_in_virtual(group)) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Child PP Group: %p\n", group);
+    } else if (NULL != group->pp_core) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Physical PP Group: %p\n", group);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "GP Group: %p\n", group);
+    }
+
+    switch (group->state) {
+        case MALI_GROUP_STATE_INACTIVE:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: INACTIVE\n");
+            break;
+        case MALI_GROUP_STATE_ACTIVATION_PENDING:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: ACTIVATION_PENDING\n");
+            break;
+        case MALI_GROUP_STATE_ACTIVE:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: MALI_GROUP_STATE_ACTIVE\n");
+            break;
+        default:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: UNKNOWN (%d)\n", group->state);
+            MALI_DEBUG_ASSERT(0);
+            break;
+    }
+
+    n += _mali_osk_snprintf(buf + n, size - n,
+            "\tSW power: %s\n",
+            group->power_is_on ? "On" : "Off");
+
+    n += mali_pm_dump_state_domain(group->pm_domain, buf + n, size - n);
+
+    for (i = 0; i < 2; i++) {
+        if (NULL != group->l2_cache_core[i]) {
+            struct mali_pm_domain *domain;
+            domain = mali_l2_cache_get_pm_domain(
+                    group->l2_cache_core[i]);
+            n += mali_pm_dump_state_domain(domain,
+                    buf + n, size - n);
+        }
+    }
+
+    if (group->gp_core) {
+        n += mali_gp_dump_state(group->gp_core, buf + n, size - n);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "\tGP running job: %p\n", group->gp_running_job);
+    }
+
+    if (group->pp_core) {
+        n += mali_pp_dump_state(group->pp_core, buf + n, size - n);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "\tPP running job: %p, subjob %d \n",
+                group->pp_running_job,
+                group->pp_running_sub_job);
+    }
+
+    _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+            struct mali_group, group_list) {
+        n += mali_group_dump_state(child, buf + n, size - n);
+    }
+
+    return n;
+}
+#endif
+
+/* Kasin added. */
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+#include <platform/meson_m400/mali_fix.h>
+#define INT_MALI_PP2_MMU ( 6+32)
+struct _mali_osk_irq_t_struct;
+u32 get_irqnum(struct _mali_osk_irq_t_struct* irq);
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_mmu_core *mmu = group->mmu;
+#endif
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (MALI_FALSE == group->power_is_on)
+        MALI_SUCCESS;
+    if (get_irqnum(mmu->irq) == INT_MALI_PP2_MMU)
+    {
+        if (group == NULL || group->pp_core == NULL)
+            MALI_SUCCESS;
+        if (group->pp_core->core_id == 0) {
+            if (malifix_get_mmu_int_process_state(0) == MMU_INT_HIT)
+                malifix_set_mmu_int_process_state(0, MMU_INT_TOP);
+            else
+                MALI_SUCCESS;
+        }
+        else if (group->pp_core->core_id == 1) {
+            if (malifix_get_mmu_int_process_state(1) == MMU_INT_HIT)
+                malifix_set_mmu_int_process_state(1, MMU_INT_TOP);
+            else
+                MALI_SUCCESS;
+        } else
+            MALI_SUCCESS;
+    }
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    ret = mali_executor_interrupt_mmu(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		if (NULL != group->gp_core) {
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+						      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+						      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+						      0, 0, /* No pid and tid for interrupt handler */
+						      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+						      0xFFFFFFFF, 0);
+		} else {
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+						      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+						      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+						      0, 0, /* No pid and tid for interrupt handler */
+						      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+							      mali_pp_core_get_id(group->pp_core)),
+						      0xFFFFFFFF, 0);
+		}
+
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    return ret;
+}
+
+static void mali_group_bottom_half_mmu(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_mmu_core *mmu = group->mmu;
+#endif
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+
+    mali_executor_interrupt_mmu(group, MALI_FALSE);
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (get_irqnum(mmu->irq) == INT_MALI_PP2_MMU)
+    {
+        if (group->pp_core->core_id == 0) {
+            if (malifix_get_mmu_int_process_state(0) == MMU_INT_TOP)
+                malifix_set_mmu_int_process_state(0, MMU_INT_NONE);
+        }
+        else if (group->pp_core->core_id == 1) {
+            if (malifix_get_mmu_int_process_state(1) == MMU_INT_TOP)
+                malifix_set_mmu_int_process_state(1, MMU_INT_NONE);
+        }
+    }
+#endif
+}
+
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+
+    MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+                mali_gp_get_rawstat(group->gp_core),
+                mali_group_core_description(group)));
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    ret = mali_executor_interrupt_gp(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+					      0xFFFFFFFF, 0);
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    return ret;
+}
+
+static void mali_group_bottom_half_gp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+
+    mali_executor_interrupt_gp(group, MALI_FALSE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+}
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+int PP0_int_cnt = 0;
+int mali_PP0_int_cnt(void)
+{
+    return PP0_int_cnt;
+}
+EXPORT_SYMBOL(mali_PP0_int_cnt);
+
+int PP1_int_cnt = 0;
+int mali_PP1_int_cnt(void)
+{
+    return PP1_int_cnt;
+}
+EXPORT_SYMBOL(mali_PP1_int_cnt);
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_pp_core *core = group->pp_core;
+#endif
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+    MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+                mali_pp_get_rawstat(group->pp_core),
+                mali_group_core_description(group)));
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    ret = mali_executor_interrupt_pp(group, MALI_TRUE);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (core->core_id == 0)
+        PP0_int_cnt++;
+    else if (core->core_id == 1)
+        PP1_int_cnt++;
+#endif
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+						      mali_pp_core_get_id(group->pp_core)),
+					      0xFFFFFFFF, 0);
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    return ret;
+}
+
+static void mali_group_bottom_half_pp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+    mali_executor_interrupt_pp(group, MALI_FALSE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+}
+
+static void mali_group_timeout(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+    MALI_DEBUG_ASSERT_POINTER(group);
+
+    MALI_DEBUG_PRINT(2, ("Group: timeout handler for %s at %u\n",
+                mali_group_core_description(group),
+                _mali_osk_time_tickcount()));
+
+    if (mali_core_timeout < 65533)
+        mali_core_timeout++;
+    if (NULL != group->gp_core) {
+        mali_group_schedule_bottom_half_gp(group);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        mali_group_schedule_bottom_half_pp(group);
+    }
+}
+
+mali_bool mali_group_zap_session(struct mali_group *group,
+        struct mali_session_data *session)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(session);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (group->session != session) {
+        /* not running from this session */
+        return MALI_TRUE; /* success */
+    }
+
+    if (group->is_working) {
+        /* The Zap also does the stall and disable_stall */
+        mali_bool zap_success = mali_mmu_zap_tlb(group->mmu);
+        return zap_success;
+    } else {
+        /* Just remove the session instead of zapping */
+        mali_group_clear_session(group);
+        return MALI_TRUE; /* success */
+    }
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num)
+{
+    u32 source0 = 0;
+    u32 value0 = 0;
+    u32 source1 = 0;
+    u32 value1 = 0;
+    u32 profiling_channel = 0;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    switch (core_num) {
+        case 0:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+            break;
+        case 1:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS;
+            break;
+        case 2:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS;
+            break;
+        default:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+            break;
+    }
+
+    if (0 == core_num) {
+        mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+    }
+    if (1 == core_num) {
+        if (1 == mali_l2_cache_get_id(group->l2_cache_core[0])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+        } else if (1 == mali_l2_cache_get_id(group->l2_cache_core[1])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+        }
+    }
+    if (2 == core_num) {
+        if (2 == mali_l2_cache_get_id(group->l2_cache_core[0])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+        } else if (2 == mali_l2_cache_get_id(group->l2_cache_core[1])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+        }
+    }
+
+    _mali_osk_profiling_add_event(profiling_channel, source1 << 8 | source0, value0, value1, 0, 0);
+}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
diff --git a/mali/common/mali_group.h b/mali/common/mali_group.h
new file mode 100755
index 0000000..afe966f
--- /dev/null
+++ b/mali/common/mali_group.h
@@ -0,0 +1,460 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GROUP_H__
+#define __MALI_GROUP_H__
+
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_mmu.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_session.h"
+#include "mali_osk_profiling.h"
+
+/**
+ * @brief Default max runtime [ms] for a core job - used by timeout timers
+ */
+#define MALI_MAX_JOB_RUNTIME_DEFAULT 5000
+
+extern int mali_max_job_runtime;
+
+#define MALI_MAX_NUMBER_OF_GROUPS 10
+#define MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS 8
+
+enum mali_group_state {
+	MALI_GROUP_STATE_INACTIVE,
+	MALI_GROUP_STATE_ACTIVATION_PENDING,
+	MALI_GROUP_STATE_ACTIVE,
+};
+
+/**
+ * The structure represents a render group
+ * A render group is defined by all the cores that share the same Mali MMU
+ */
+
+struct mali_group {
+	struct mali_mmu_core        *mmu;
+	struct mali_session_data    *session;
+
+	enum mali_group_state        state;
+	mali_bool                    power_is_on;
+
+	mali_bool                    is_working;
+	unsigned long                start_time; /* in ticks */
+
+	struct mali_gp_core         *gp_core;
+	struct mali_gp_job          *gp_running_job;
+
+	struct mali_pp_core         *pp_core;
+	struct mali_pp_job          *pp_running_job;
+	u32                         pp_running_sub_job;
+
+	struct mali_pm_domain       *pm_domain;
+
+	struct mali_l2_cache_core   *l2_cache_core[2];
+	u32                         l2_cache_core_ref_count[2];
+
+	/* Parent virtual group (if any) */
+	struct mali_group           *parent_group;
+
+	struct mali_dlbu_core       *dlbu_core;
+	struct mali_bcast_unit      *bcast_core;
+
+	/* Used for working groups which needs to be disabled */
+	mali_bool                    disable_requested;
+
+	/* Used by group to link child groups (for virtual group) */
+	_mali_osk_list_t            group_list;
+
+	/* Used by executor module in order to link groups of same state */
+	_mali_osk_list_t            executor_list;
+
+	/* Used by PM domains to link groups of same domain */
+	_mali_osk_list_t             pm_domain_list;
+
+	_mali_osk_wq_work_t         *bottom_half_work_mmu;
+	_mali_osk_wq_work_t         *bottom_half_work_gp;
+	_mali_osk_wq_work_t         *bottom_half_work_pp;
+
+	_mali_osk_timer_t           *timeout_timer;
+};
+
+/** @brief Create a new Mali group object
+ *
+ * @return A pointer to a new group object
+ */
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+				     struct mali_dlbu_core *dlbu,
+				     struct mali_bcast_unit *bcast,
+				     u32 domain_index);
+
+void mali_group_dump_status(struct mali_group *group);
+
+void mali_group_delete(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group,
+		struct mali_mmu_core *mmu_core);
+void mali_group_remove_mmu_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group,
+		struct mali_gp_core *gp_core);
+void mali_group_remove_gp_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group,
+		struct mali_pp_core *pp_core);
+void mali_group_remove_pp_core(struct mali_group *group);
+
+MALI_STATIC_INLINE const char *mali_group_core_description(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	if (NULL != group->pp_core) {
+		return mali_pp_core_description(group->pp_core);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+		return mali_gp_core_description(group->gp_core);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_is_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != group->dlbu_core);
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Check if a group is a part of a virtual group or not
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_in_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != group->parent_group) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Reset group
+ *
+ * This function will reset the entire group,
+ * including all the cores present in the group.
+ *
+ * @param group Pointer to the group to reset
+ */
+void mali_group_reset(struct mali_group *group);
+
+MALI_STATIC_INLINE struct mali_session_data *mali_group_get_session(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return group->session;
+}
+
+MALI_STATIC_INLINE void mali_group_clear_session(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (NULL != group->session) {
+		mali_mmu_activate_empty_page_directory(group->mmu);
+		group->session = NULL;
+	}
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group);
+
+/*
+ * Change state from ACTIVATION_PENDING to ACTIVE
+ * For virtual group, all childs need to be ACTIVE first
+ */
+mali_bool mali_group_set_active(struct mali_group *group);
+
+/*
+ * @return MALI_TRUE means one or more domains can now be powered off,
+ * and caller should call either mali_pm_update_async() or
+ * mali_pm_update_sync() in order to do so.
+ */
+mali_bool mali_group_deactivate(struct mali_group *group);
+
+MALI_STATIC_INLINE enum mali_group_state mali_group_get_state(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->state;
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_power_is_on(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->power_is_on;
+}
+
+void mali_group_power_up(struct mali_group *group);
+void mali_group_power_down(struct mali_group *group);
+
+MALI_STATIC_INLINE void mali_group_set_disable_request(
+	struct mali_group *group, mali_bool disable)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	group->disable_requested = disable;
+
+	/**
+	 * When one of child group's disable_requeset is set TRUE, then
+	 * the disable_request of parent group should also be set to TRUE.
+	 * While, the disable_request of parent group should only be set to FALSE
+	 * only when all of its child group's disable_request are set to FALSE.
+	 */
+	if (NULL != group->parent_group && MALI_TRUE == disable) {
+		group->parent_group->disable_requested = disable;
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_disable_requested(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->disable_requested;
+}
+
+/** @brief Virtual groups */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child);
+struct mali_group *mali_group_acquire_group(struct mali_group *parent);
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child);
+
+/** @brief Checks if the group is working.
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_working(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	if (mali_group_is_in_virtual(group)) {
+		struct mali_group *tmp_group = mali_executor_get_virtual_group();
+		return tmp_group->is_working;
+	}
+	return group->is_working;
+}
+
+MALI_STATIC_INLINE struct mali_gp_job *mali_group_get_running_gp_job(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->gp_running_job;
+}
+
+/** @brief Zap MMU TLB on all groups
+ *
+ * Zap TLB on group if \a session is active.
+ */
+mali_bool mali_group_zap_session(struct mali_group *group,
+				 struct mali_session_data *session);
+
+/** @brief Get pointer to GP core object
+ */
+MALI_STATIC_INLINE struct mali_gp_core *mali_group_get_gp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->gp_core;
+}
+
+/** @brief Get pointer to PP core object
+ */
+MALI_STATIC_INLINE struct mali_pp_core *mali_group_get_pp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->pp_core;
+}
+
+/** @brief Start GP job
+ */
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job, mali_bool gpu_secure_mode_pre_enabled);
+
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job, mali_bool gpu_secure_mode_pre_enabled);
+
+/** @brief Start virtual group Job on a virtual group
+*/
+void mali_group_start_job_on_virtual(struct mali_group *group, struct mali_pp_job *job, u32 first_subjob, u32 last_subjob);
+
+
+/** @brief Start a subjob from a particular on a specific PP group
+*/
+void mali_group_start_job_on_group(struct mali_group *group, struct mali_pp_job *job, u32 subjob);
+
+
+/** @brief remove all the unused groups in tmp_unused group  list, so that the group is in consistent status.
+ */
+void mali_group_non_dlbu_job_done_virtual(struct mali_group *group);
+
+
+/** @brief Resume GP job that suspended waiting for more heap memory
+ */
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_get_interrupt_result(group->gp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_get_interrupt_result(group->pp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_mmu_get_interrupt_result(group->mmu);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_gp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_is_active(group->gp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_pp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_is_active(group->pp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_has_timed_out(struct mali_group *group)
+{
+	unsigned long time_cost;
+	struct mali_group *tmp_group = group;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* if the group is in virtual need to use virtual_group's start time */
+	if (mali_group_is_in_virtual(group)) {
+		tmp_group = mali_executor_get_virtual_group();
+	}
+
+	time_cost = _mali_osk_time_tickcount() - tmp_group->start_time;
+	if (_mali_osk_time_mstoticks(mali_max_job_runtime) <= time_cost) {
+		/*
+		 * current tick is at or after timeout end time,
+		 * so this is a valid timeout
+		 */
+		return MALI_TRUE;
+	} else {
+		/*
+		 * Not a valid timeout. A HW interrupt probably beat
+		 * us to it, and the timer wasn't properly deleted
+		 * (async deletion used due to atomic context).
+		 */
+		return MALI_FALSE;
+	}
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_mask_all_interrupts(group->gp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_mask_all_interrupts(group->pp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_enable_interrupts_gp(
+	struct mali_group *group,
+	enum mali_interrupt_result exceptions)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	mali_gp_enable_interrupts(group->gp_core, exceptions);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_gp);
+}
+
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_pp);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_mmu);
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job);
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success);
+
+#if defined(CONFIG_MALI400_PROFILING)
+MALI_STATIC_INLINE void mali_group_oom(struct mali_group *group)
+{
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+				      MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+				      0, 0, 0, 0, 0);
+}
+#endif
+
+struct mali_group *mali_group_get_glob_group(u32 index);
+u32 mali_group_get_glob_num_groups(void);
+
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size);
+
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data);
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data);
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data);
+
+MALI_STATIC_INLINE mali_bool mali_group_is_empty(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(mali_group_is_virtual(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return _mali_osk_list_empty(&group->group_list);
+}
+
+#endif /* __MALI_GROUP_H__ */
diff --git a/mali/common/mali_hw_core.c b/mali/common/mali_hw_core.c
new file mode 100755
index 0000000..dd10cfe
--- /dev/null
+++ b/mali/common/mali_hw_core.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_osk_mali.h"
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size)
+{
+	core->phys_addr = resource->base;
+	core->phys_offset = resource->base - _mali_osk_resource_base_address();
+	core->description = resource->description;
+	core->size = reg_size;
+
+	MALI_DEBUG_ASSERT(core->phys_offset < core->phys_addr);
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_mem_reqregion(core->phys_addr, core->size, core->description)) {
+		core->mapped_registers = _mali_osk_mem_mapioregion(core->phys_addr, core->size, core->description);
+		if (NULL != core->mapped_registers) {
+			return _MALI_OSK_ERR_OK;
+		} else {
+			MALI_PRINT_ERROR(("Failed to map memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+		}
+		_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+	} else {
+		MALI_PRINT_ERROR(("Failed to request memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_hw_core_delete(struct mali_hw_core *core)
+{
+	if (NULL != core->mapped_registers) {
+		_mali_osk_mem_unmapioregion(core->phys_addr, core->size, core->mapped_registers);
+		core->mapped_registers = NULL;
+	}
+	_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+}
diff --git a/mali/common/mali_hw_core.h b/mali/common/mali_hw_core.h
new file mode 100755
index 0000000..e435725
--- /dev/null
+++ b/mali/common/mali_hw_core.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_HW_CORE_H__
+#define __MALI_HW_CORE_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * The common parts for all Mali HW cores (GP, PP, MMU, L2 and PMU)
+ * This struct is embedded inside all core specific structs.
+ */
+struct mali_hw_core {
+	uintptr_t phys_addr;              /**< Physical address of the registers */
+	u32 phys_offset;                  /**< Offset from start of Mali to registers */
+	u32 size;                         /**< Size of registers */
+	mali_io_address mapped_registers; /**< Virtual mapping of the registers */
+	const char *description;          /**< Name of unit (as specified in device configuration) */
+};
+
+#define MALI_REG_POLL_COUNT_FAST 1000000
+#define MALI_REG_POLL_COUNT_SLOW 1000000
+
+/*
+ * GP and PP core translate their int_stat/rawstat into one of these
+ */
+enum mali_interrupt_result {
+	MALI_INTERRUPT_RESULT_NONE,
+	MALI_INTERRUPT_RESULT_SUCCESS,
+	MALI_INTERRUPT_RESULT_SUCCESS_VS,
+	MALI_INTERRUPT_RESULT_SUCCESS_PLBU,
+	MALI_INTERRUPT_RESULT_OOM,
+	MALI_INTERRUPT_RESULT_ERROR
+};
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size);
+void mali_hw_core_delete(struct mali_hw_core *core);
+
+MALI_STATIC_INLINE u32 mali_hw_core_register_read(struct mali_hw_core *core, u32 relative_address)
+{
+	u32 read_val;
+	read_val = _mali_osk_mem_ioread32(core->mapped_registers, relative_address);
+	MALI_DEBUG_PRINT(6, ("register_read for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, read_val));
+	return read_val;
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+}
+
+/* Conditionally write a register.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 new_val, const u32 old_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	if (old_val != new_val) {
+		_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+	}
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32(core->mapped_registers, relative_address, new_val);
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs)
+{
+	u32 i;
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+	}
+}
+
+/* Conditionally write a set of registers.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs, const u32 *old_array)
+{
+	u32 i;
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		if (old_array[i] != write_array[i]) {
+			mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+		}
+	}
+}
+
+#endif /* __MALI_HW_CORE_H__ */
diff --git a/mali/common/mali_kernel_common.h b/mali/common/mali_kernel_common.h
new file mode 100755
index 0000000..9bae265
--- /dev/null
+++ b/mali/common/mali_kernel_common.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_COMMON_H__
+#define __MALI_KERNEL_COMMON_H__
+
+#include "mali_osk.h"
+
+/* Make sure debug is defined when it should be */
+#ifndef DEBUG
+#if defined(_DEBUG)
+#define DEBUG
+#endif
+#endif
+
+/* The file include several useful macros for error checking, debugging and printing.
+ * - MALI_PRINTF(...)           Do not use this function: Will be included in Release builds.
+ * - MALI_DEBUG_PRINT(nr, (X) ) Prints the second argument if nr<=MALI_DEBUG_LEVEL.
+ * - MALI_DEBUG_ERROR( (X) )    Prints an errortext, a source trace, and the given error message.
+ * - MALI_DEBUG_ASSERT(exp,(X)) If the asserted expr is false, the program will exit.
+ * - MALI_DEBUG_ASSERT_POINTER(pointer)  Triggers if the pointer is a zero pointer.
+ * - MALI_DEBUG_CODE( X )       The code inside the macro is only compiled in Debug builds.
+ *
+ * The (X) means that you must add an extra parenthesis around the argumentlist.
+ *
+ * The  printf function: MALI_PRINTF(...) is routed to _mali_osk_debugmsg
+ *
+ * Suggested range for the DEBUG-LEVEL is [1:6] where
+ * [1:2] Is messages with highest priority, indicate possible errors.
+ * [3:4] Is messages with medium priority, output important variables.
+ * [5:6] Is messages with low priority, used during extensive debugging.
+ */
+
+/**
+*  Fundamental error macro. Reports an error code. This is abstracted to allow us to
+*  easily switch to a different error reporting method if we want, and also to allow
+*  us to search for error returns easily.
+*
+*  Note no closing semicolon - this is supplied in typical usage:
+*
+*  MALI_ERROR(MALI_ERROR_OUT_OF_MEMORY);
+*/
+#define MALI_ERROR(error_code) return (error_code)
+
+/**
+ *  Basic error macro, to indicate success.
+ *  Note no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_SUCCESS;
+ */
+#define MALI_SUCCESS MALI_ERROR(_MALI_OSK_ERR_OK)
+
+/**
+ *  Basic error macro. This checks whether the given condition is true, and if not returns
+ *  from this function with the supplied error code. This is a macro so that we can override it
+ *  for stress testing.
+ *
+ *  Note that this uses the do-while-0 wrapping to ensure that we don't get problems with dangling
+ *  else clauses. Note also no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_CHECK((p!=NULL), ERROR_NO_OBJECT);
+ */
+#define MALI_CHECK(condition, error_code) do { if(!(condition)) MALI_ERROR(error_code); } while(0)
+
+/**
+ *  Error propagation macro. If the expression given is anything other than
+ *  _MALI_OSK_NO_ERROR, then the value is returned from the enclosing function
+ *  as an error code. This effectively acts as a guard clause, and propagates
+ *  error values up the call stack. This uses a temporary value to ensure that
+ *  the error expression is not evaluated twice.
+ *  If the counter for forcing a failure has been set using _mali_force_error,
+ *  this error will be returned without evaluating the expression in
+ *  MALI_CHECK_NO_ERROR
+ */
+#define MALI_CHECK_NO_ERROR(expression) \
+	do { _mali_osk_errcode_t _check_no_error_result=(expression); \
+		if(_check_no_error_result != _MALI_OSK_ERR_OK) \
+			MALI_ERROR(_check_no_error_result); \
+	} while(0)
+
+/**
+ *  Pointer check macro. Checks non-null pointer.
+ */
+#define MALI_CHECK_NON_NULL(pointer, error_code) MALI_CHECK( ((pointer)!=NULL), (error_code) )
+
+/**
+ *  Error macro with goto. This checks whether the given condition is true, and if not jumps
+ *  to the specified label using a goto. The label must therefore be local to the function in
+ *  which this macro appears. This is most usually used to execute some clean-up code before
+ *  exiting with a call to ERROR.
+ *
+ *  Like the other macros, this is a macro to allow us to override the condition if we wish,
+ *  e.g. to force an error during stress testing.
+ */
+#define MALI_CHECK_GOTO(condition, label) do { if(!(condition)) goto label; } while(0)
+
+/**
+ *  Explicitly ignore a parameter passed into a function, to suppress compiler warnings.
+ *  Should only be used with parameter names.
+ */
+#define MALI_IGNORE(x) x=x
+
+#if defined(CONFIG_MALI_QUIET)
+#define MALI_PRINTF(args)
+#else
+#define MALI_PRINTF(args) _mali_osk_dbgmsg args;
+#endif
+
+#define MALI_PRINT_ERROR(args) do{ \
+		MALI_PRINTF(("Mali: ERR: %s\n" ,__FILE__)); \
+		MALI_PRINTF(("           %s()%4d\n           ", __FUNCTION__, __LINE__)) ; \
+		MALI_PRINTF(args); \
+		MALI_PRINTF(("\n")); \
+	} while(0)
+
+#define MALI_PRINT(args) do{ \
+		MALI_PRINTF(("Mali: ")); \
+		MALI_PRINTF(args); \
+	} while (0)
+
+#ifdef DEBUG
+#ifndef mali_debug_level
+extern int mali_debug_level;
+#endif
+
+#define MALI_DEBUG_CODE(code) code
+#define MALI_DEBUG_PRINT(level, args)  do { \
+		if((level) <=  mali_debug_level)\
+		{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } \
+	} while (0)
+
+#define MALI_DEBUG_PRINT_ERROR(args) MALI_PRINT_ERROR(args)
+
+#define MALI_DEBUG_PRINT_IF(level,condition,args)  \
+	if((condition)&&((level) <=  mali_debug_level))\
+	{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+#define MALI_DEBUG_PRINT_ELSE(level, args)\
+	else if((level) <=  mali_debug_level)\
+	{ MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+/**
+ * @note these variants of DEBUG ASSERTS will cause a debugger breakpoint
+ * to be entered (see _mali_osk_break() ). An alternative would be to call
+ * _mali_osk_abort(), on OSs that support it.
+ */
+#define MALI_DEBUG_PRINT_ASSERT(condition, args) do  {if( !(condition)) { MALI_PRINT_ERROR(args); _mali_osk_break(); } } while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do  {if( (pointer)== NULL) {MALI_PRINT_ERROR(("NULL pointer " #pointer)); _mali_osk_break();} } while(0)
+#define MALI_DEBUG_ASSERT(condition) do  {if( !(condition)) {MALI_PRINT_ERROR(("ASSERT failed: " #condition )); _mali_osk_break();} } while(0)
+
+#else /* DEBUG */
+
+#define MALI_DEBUG_CODE(code)
+#define MALI_DEBUG_PRINT(string,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ERROR(args) do {} while(0)
+#define MALI_DEBUG_PRINT_IF(level,condition,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ELSE(level,condition,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ASSERT(condition,args) do {} while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do {} while(0)
+#define MALI_DEBUG_ASSERT(condition) do {} while(0)
+
+#endif /* DEBUG */
+
+/**
+ * variables from user space cannot be dereferenced from kernel space; tagging them
+ * with __user allows the GCC compiler to generate a warning. Other compilers may
+ * not support this so we define it here as an empty macro if the compiler doesn't
+ * define it.
+ */
+#ifndef __user
+#define __user
+#endif
+
+#endif /* __MALI_KERNEL_COMMON_H__ */
diff --git a/mali/common/mali_kernel_core.c b/mali/common/mali_kernel_core.c
new file mode 100755
index 0000000..bee4033
--- /dev/null
+++ b/mali/common/mali_kernel_core.c
@@ -0,0 +1,1349 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_kernel_core.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_mmu.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_utilization.h"
+#include "mali_l2_cache.h"
+#include "mali_timeline.h"
+#include "mali_soft_job.h"
+#include "mali_pm_domain.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#include "mali_control_timer.h"
+#include "mali_dvfs_policy.h"
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+#include <linux/dma-fence.h>
+#else
+#include <linux/fence.h>
+#endif
+#endif
+
+#define MALI_SHARED_MEMORY_DEFAULT_SIZE 0xffffffff
+
+/* Mali GPU memory. Real values come from module parameter or from device specific data */
+unsigned int mali_dedicated_mem_start = 0;
+unsigned int mali_dedicated_mem_size = 0;
+
+/* Default shared memory size is set to 4G. */
+unsigned int mali_shared_mem_size = MALI_SHARED_MEMORY_DEFAULT_SIZE;
+
+/* Frame buffer memory to be accessible by Mali GPU */
+int mali_fb_start = 0;
+int mali_fb_size = 0;
+
+/* Mali max job runtime */
+extern int mali_max_job_runtime;
+
+/** Start profiling from module load? */
+int mali_boot_profiling = 0;
+
+/** Limits for the number of PP cores behind each L2 cache. */
+int mali_max_pp_cores_group_1 = 0xFF;
+int mali_max_pp_cores_group_2 = 0xFF;
+
+int mali_inited_pp_cores_group_1 = 0;
+int mali_inited_pp_cores_group_2 = 0;
+
+static _mali_product_id_t global_product_id = _MALI_PRODUCT_ID_UNKNOWN;
+static uintptr_t global_gpu_base_address = 0;
+static u32 global_gpu_major_version = 0;
+static u32 global_gpu_minor_version = 0;
+
+mali_bool mali_gpu_class_is_mali450 = MALI_FALSE;
+mali_bool mali_gpu_class_is_mali470 = MALI_FALSE;
+
+static _mali_osk_errcode_t mali_set_global_gpu_base_address(void)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	global_gpu_base_address = _mali_osk_resource_base_address();
+	if (0 == global_gpu_base_address) {
+		err = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return err;
+}
+
+static u32 mali_get_bcast_id(_mali_osk_resource_t *resource_pp)
+{
+	switch (resource_pp->base - global_gpu_base_address) {
+	case 0x08000:
+	case 0x20000: /* fall-through for aliased mapping */
+		return 0x01;
+	case 0x0A000:
+	case 0x22000: /* fall-through for aliased mapping */
+		return 0x02;
+	case 0x0C000:
+	case 0x24000: /* fall-through for aliased mapping */
+		return 0x04;
+	case 0x0E000:
+	case 0x26000: /* fall-through for aliased mapping */
+		return 0x08;
+	case 0x28000:
+		return 0x10;
+	case 0x2A000:
+		return 0x20;
+	case 0x2C000:
+		return 0x40;
+	case 0x2E000:
+		return 0x80;
+	default:
+		return 0;
+	}
+}
+
+static _mali_osk_errcode_t mali_parse_product_info(void)
+{
+	_mali_osk_resource_t first_pp_resource;
+
+	/* Find the first PP core resource (again) */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PP0, &first_pp_resource)) {
+		/* Create a dummy PP object for this core so that we can read the version register */
+		struct mali_group *group = mali_group_create(NULL, NULL, NULL, MALI_DOMAIN_INDEX_PP0);
+		if (NULL != group) {
+			struct mali_pp_core *pp_core = mali_pp_create(&first_pp_resource, group, MALI_FALSE, mali_get_bcast_id(&first_pp_resource));
+			if (NULL != pp_core) {
+				u32 pp_version;
+
+				pp_version = mali_pp_core_get_version(pp_core);
+
+				mali_group_delete(group);
+
+				global_gpu_major_version = (pp_version >> 8) & 0xFF;
+				global_gpu_minor_version = pp_version & 0xFF;
+
+				switch (pp_version >> 16) {
+				case MALI200_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI200;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-200 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					MALI_PRINT_ERROR(("Mali-200 is not supported by this driver.\n"));
+					_mali_osk_abort();
+					break;
+				case MALI300_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI300;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-300 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI400_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI400;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-400 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI450_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI450;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-450 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI470_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI470;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-470 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				default:
+					MALI_DEBUG_PRINT(2, ("Found unknown Mali GPU (r%up%u)\n", global_gpu_major_version, global_gpu_minor_version));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_PRINT_ERROR(("Failed to create initial PP object\n"));
+			}
+		} else {
+			MALI_PRINT_ERROR(("Failed to create initial group object\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("First PP core not specified in config file\n"));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+static void mali_delete_groups(void)
+{
+	struct mali_group *group;
+
+	group = mali_group_get_glob_group(0);
+	while (NULL != group) {
+		mali_group_delete(group);
+		group = mali_group_get_glob_group(0);
+	}
+
+	MALI_DEBUG_ASSERT(0 == mali_group_get_glob_num_groups());
+}
+
+static void mali_delete_l2_cache_cores(void)
+{
+	struct mali_l2_cache_core *l2;
+
+	l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	while (NULL != l2) {
+		mali_l2_cache_delete(l2);
+		l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	}
+
+	MALI_DEBUG_ASSERT(0 == mali_l2_cache_core_get_glob_num_l2_cores());
+}
+
+static struct mali_l2_cache_core *mali_create_l2_cache_core(_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (NULL != resource) {
+
+		MALI_DEBUG_PRINT(3, ("Found L2 cache %s\n", resource->description));
+
+		l2_cache = mali_l2_cache_create(resource, domain_index);
+		if (NULL == l2_cache) {
+			MALI_PRINT_ERROR(("Failed to create L2 cache object\n"));
+			return NULL;
+		}
+	}
+	MALI_DEBUG_PRINT(3, ("Created L2 cache core object\n"));
+
+	return l2_cache;
+}
+
+static _mali_osk_errcode_t mali_parse_config_l2_cache(void)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (mali_is_mali400()) {
+		_mali_osk_resource_t l2_resource;
+		if (_MALI_OSK_ERR_OK != _mali_osk_resource_find(MALI400_OFFSET_L2_CACHE0, &l2_resource)) {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		l2_cache = mali_create_l2_cache_core(&l2_resource, MALI_DOMAIN_INDEX_L20);
+		if (NULL == l2_cache) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else if (mali_is_mali450()) {
+		/*
+		 * L2 for GP    at 0x10000
+		 * L2 for PP0-3 at 0x01000
+		 * L2 for PP4-7 at 0x11000 (optional)
+		 */
+
+		_mali_osk_resource_t l2_gp_resource;
+		_mali_osk_resource_t l2_pp_grp0_resource;
+		_mali_osk_resource_t l2_pp_grp1_resource;
+
+		/* Make cluster for GP's L2 */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE0, &l2_gp_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for GP\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_gp_resource, MALI_DOMAIN_INDEX_L20);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for GP in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Find corresponding l2 domain */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE1, &l2_pp_grp0_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 0\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp0_resource, MALI_DOMAIN_INDEX_L21);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for PP group 0 in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Second PP core group is optional, don't fail if we don't find it */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE2, &l2_pp_grp1_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp1_resource, MALI_DOMAIN_INDEX_L22);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		}
+	} else if (mali_is_mali470()) {
+		_mali_osk_resource_t l2c1_resource;
+
+		/* Make cluster for L2C1 */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI470_OFFSET_L2_CACHE1, &l2c1_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-470 L2 cache 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2c1_resource, MALI_DOMAIN_INDEX_L21);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for L2C1\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static struct mali_group *mali_create_group(struct mali_l2_cache_core *cache,
+		_mali_osk_resource_t *resource_mmu,
+		_mali_osk_resource_t *resource_gp,
+		_mali_osk_resource_t *resource_pp,
+		u32 domain_index)
+{
+	struct mali_mmu_core *mmu;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(3, ("Starting new group for MMU %s\n", resource_mmu->description));
+
+	/* Create the group object */
+	group = mali_group_create(cache, NULL, NULL, domain_index);
+	if (NULL == group) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU %s\n", resource_mmu->description));
+		return NULL;
+	}
+
+	/* Create the MMU object inside group */
+	mmu = mali_mmu_create(resource_mmu, group, MALI_FALSE);
+	if (NULL == mmu) {
+		MALI_PRINT_ERROR(("Failed to create MMU object\n"));
+		mali_group_delete(group);
+		return NULL;
+	}
+
+	if (NULL != resource_gp) {
+		/* Create the GP core object inside this group */
+		struct mali_gp_core *gp_core = mali_gp_create(resource_gp, group);
+		if (NULL == gp_core) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create GP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	if (NULL != resource_pp) {
+		struct mali_pp_core *pp_core;
+
+		/* Create the PP core object inside this group */
+		pp_core = mali_pp_create(resource_pp, group, MALI_FALSE, mali_get_bcast_id(resource_pp));
+		if (NULL == pp_core) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create PP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	return group;
+}
+
+static _mali_osk_errcode_t mali_create_virtual_group(_mali_osk_resource_t *resource_mmu_pp_bcast,
+		_mali_osk_resource_t *resource_pp_bcast,
+		_mali_osk_resource_t *resource_dlbu,
+		_mali_osk_resource_t *resource_bcast)
+{
+	struct mali_mmu_core *mmu_pp_bcast_core;
+	struct mali_pp_core *pp_bcast_core;
+	struct mali_dlbu_core *dlbu_core;
+	struct mali_bcast_unit *bcast_core;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(2, ("Starting new virtual group for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+
+	/* Create the DLBU core object */
+	dlbu_core = mali_dlbu_create(resource_dlbu);
+	if (NULL == dlbu_core) {
+		MALI_PRINT_ERROR(("Failed to create DLBU object \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the Broadcast unit core */
+	bcast_core = mali_bcast_unit_create(resource_bcast);
+	if (NULL == bcast_core) {
+		MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the group object */
+#if defined(DEBUG)
+	/* Get a physical PP group to temporarily add to broadcast unit.  IRQ
+	 * verification needs a physical group in the broadcast unit to test
+	 * the broadcast unit interrupt line. */
+	{
+		struct mali_group *phys_group = NULL;
+		int i;
+		for (i = 0; i < mali_group_get_glob_num_groups(); i++) {
+			phys_group = mali_group_get_glob_group(i);
+			if (NULL != mali_group_get_pp_core(phys_group)) break;
+		}
+		MALI_DEBUG_ASSERT(NULL != mali_group_get_pp_core(phys_group));
+
+		/* Add the group temporarily to the broadcast, and update the
+		 * broadcast HW. Since the HW is not updated when removing the
+		 * group the IRQ check will work when the virtual PP is created
+		 * later.
+		 *
+		 * When the virtual group gets populated, the actually used
+		 * groups will be added to the broadcast unit and the HW will
+		 * be updated.
+		 */
+		mali_bcast_add_group(bcast_core, phys_group);
+		mali_bcast_reset(bcast_core);
+		mali_bcast_remove_group(bcast_core, phys_group);
+	}
+#endif /* DEBUG */
+	group = mali_group_create(NULL, dlbu_core, bcast_core, MALI_DOMAIN_INDEX_DUMMY);
+	if (NULL == group) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+		mali_bcast_unit_delete(bcast_core);
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the MMU object inside group */
+	mmu_pp_bcast_core = mali_mmu_create(resource_mmu_pp_bcast, group, MALI_TRUE);
+	if (NULL == mmu_pp_bcast_core) {
+		MALI_PRINT_ERROR(("Failed to create MMU PP broadcast object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the PP core object inside this group */
+	pp_bcast_core = mali_pp_create(resource_pp_bcast, group, MALI_TRUE, 0);
+	if (NULL == pp_bcast_core) {
+		/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+		MALI_PRINT_ERROR(("Failed to create PP object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_groups(void)
+{
+	struct mali_group *group;
+	int cluster_id_gp = 0;
+	int cluster_id_pp_grp0 = 0;
+	int cluster_id_pp_grp1 = 0;
+	int i;
+
+	_mali_osk_resource_t resource_gp;
+	_mali_osk_resource_t resource_gp_mmu;
+	_mali_osk_resource_t resource_pp[8];
+	_mali_osk_resource_t resource_pp_mmu[8];
+	_mali_osk_resource_t resource_pp_mmu_bcast;
+	_mali_osk_resource_t resource_pp_bcast;
+	_mali_osk_resource_t resource_dlbu;
+	_mali_osk_resource_t resource_bcast;
+	_mali_osk_errcode_t resource_gp_found;
+	_mali_osk_errcode_t resource_gp_mmu_found;
+	_mali_osk_errcode_t resource_pp_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_bcast_found;
+	_mali_osk_errcode_t resource_pp_bcast_found;
+	_mali_osk_errcode_t resource_dlbu_found;
+	_mali_osk_errcode_t resource_bcast_found;
+
+	if (!(mali_is_mali400() || mali_is_mali450() || mali_is_mali470())) {
+		/* No known HW core */
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (MALI_MAX_JOB_RUNTIME_DEFAULT == mali_max_job_runtime) {
+		/* Group settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+			/* Use device specific settings (if defined) */
+			if (0 != data.max_job_runtime) {
+				mali_max_job_runtime = data.max_job_runtime;
+			}
+		}
+	}
+
+	if (mali_is_mali450()) {
+		/* Mali-450 have separate L2s for GP, and PP core group(s) */
+		cluster_id_pp_grp0 = 1;
+		cluster_id_pp_grp1 = 2;
+	}
+
+	resource_gp_found = _mali_osk_resource_find(MALI_OFFSET_GP, &resource_gp);
+	resource_gp_mmu_found = _mali_osk_resource_find(MALI_OFFSET_GP_MMU, &resource_gp_mmu);
+	resource_pp_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0, &(resource_pp[0]));
+	resource_pp_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1, &(resource_pp[1]));
+	resource_pp_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2, &(resource_pp[2]));
+	resource_pp_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3, &(resource_pp[3]));
+	resource_pp_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4, &(resource_pp[4]));
+	resource_pp_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5, &(resource_pp[5]));
+	resource_pp_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6, &(resource_pp[6]));
+	resource_pp_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7, &(resource_pp[7]));
+	resource_pp_mmu_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0_MMU, &(resource_pp_mmu[0]));
+	resource_pp_mmu_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1_MMU, &(resource_pp_mmu[1]));
+	resource_pp_mmu_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2_MMU, &(resource_pp_mmu[2]));
+	resource_pp_mmu_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3_MMU, &(resource_pp_mmu[3]));
+	resource_pp_mmu_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4_MMU, &(resource_pp_mmu[4]));
+	resource_pp_mmu_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5_MMU, &(resource_pp_mmu[5]));
+	resource_pp_mmu_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6_MMU, &(resource_pp_mmu[6]));
+	resource_pp_mmu_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7_MMU, &(resource_pp_mmu[7]));
+
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		resource_bcast_found = _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast);
+		resource_dlbu_found = _mali_osk_resource_find(MALI_OFFSET_DLBU, &resource_dlbu);
+		resource_pp_mmu_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST_MMU, &resource_pp_mmu_bcast);
+		resource_pp_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST, &resource_pp_bcast);
+
+		if (_MALI_OSK_ERR_OK != resource_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_dlbu_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_mmu_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_bcast_found) {
+			/* Missing mandatory core(s) for Mali-450 or Mali-470 */
+			MALI_DEBUG_PRINT(2, ("Missing mandatory resources, Mali-450 needs DLBU, Broadcast unit, virtual PP core and virtual MMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK != resource_gp_found ||
+	    _MALI_OSK_ERR_OK != resource_gp_mmu_found ||
+	    _MALI_OSK_ERR_OK != resource_pp_found[0] ||
+	    _MALI_OSK_ERR_OK != resource_pp_mmu_found[0]) {
+		/* Missing mandatory core(s) */
+		MALI_DEBUG_PRINT(2, ("Missing mandatory resource, need at least one GP and one PP, both with a separate MMU\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(1 <= mali_l2_cache_core_get_glob_num_l2_cores());
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_gp), &resource_gp_mmu, &resource_gp, NULL, MALI_DOMAIN_INDEX_GP);
+	if (NULL == group) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create group for first (and mandatory) PP core */
+	MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= (cluster_id_pp_grp0 + 1)); /* >= 1 on Mali-300 and Mali-400, >= 2 on Mali-450 */
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[0], NULL, &resource_pp[0], MALI_DOMAIN_INDEX_PP0);
+	if (NULL == group) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_inited_pp_cores_group_1++;
+
+	/* Create groups for rest of the cores in the first PP core group */
+	for (i = 1; i < 4; i++) { /* First half of the PP cores belong to first core group */
+		if (mali_inited_pp_cores_group_1 < mali_max_pp_cores_group_1) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (NULL == group) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_1++;
+			}
+		}
+	}
+
+	/* Create groups for cores in the second PP core group */
+	for (i = 4; i < 8; i++) { /* Second half of the PP cores belong to second core group */
+		if (mali_inited_pp_cores_group_2 < mali_max_pp_cores_group_2) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= 2); /* Only Mali-450 have a second core group */
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp1), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (NULL == group) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_2++;
+			}
+		}
+	}
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		_mali_osk_errcode_t err = mali_create_virtual_group(&resource_pp_mmu_bcast, &resource_pp_bcast, &resource_dlbu, &resource_bcast);
+		if (_MALI_OSK_ERR_OK != err) {
+			return err;
+		}
+	}
+
+	mali_max_pp_cores_group_1 = mali_inited_pp_cores_group_1;
+	mali_max_pp_cores_group_2 = mali_inited_pp_cores_group_2;
+	MALI_DEBUG_PRINT(2, ("%d+%d PP cores initialized\n", mali_inited_pp_cores_group_1, mali_inited_pp_cores_group_2));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_check_shared_interrupts(void)
+{
+#if !defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_TRUE == _mali_osk_shared_interrupts()) {
+		MALI_PRINT_ERROR(("Shared interrupts detected, but driver support is not enabled\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* !defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	/* It is OK to compile support for shared interrupts even if Mali is not using it. */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_pmu(void)
+{
+	_mali_osk_resource_t resource_pmu;
+
+	MALI_DEBUG_ASSERT(0 != global_gpu_base_address);
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PMU, &resource_pmu)) {
+		struct mali_pmu_core *pmu;
+
+		pmu = mali_pmu_create(&resource_pmu);
+		if (NULL == pmu) {
+			MALI_PRINT_ERROR(("Failed to create PMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	/* It's ok if the PMU doesn't exist */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_memory(void)
+{
+	_mali_osk_device_data data = { 0, };
+	_mali_osk_errcode_t ret;
+
+	/* The priority of setting the value of mali_shared_mem_size,
+	 * mali_dedicated_mem_start and mali_dedicated_mem_size:
+	 * 1. module parameter;
+	 * 2. platform data;
+	 * 3. default value;
+	 **/
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Memory settings are not overridden by module parameters, so use device settings */
+		if (0 == mali_dedicated_mem_start && 0 == mali_dedicated_mem_size) {
+			/* Use device specific settings (if defined) */
+			mali_dedicated_mem_start = data.dedicated_mem_start;
+			mali_dedicated_mem_size = data.dedicated_mem_size;
+		}
+
+		if (MALI_SHARED_MEMORY_DEFAULT_SIZE == mali_shared_mem_size &&
+		    0 != data.shared_mem_size) {
+			mali_shared_mem_size = data.shared_mem_size;
+		}
+	}
+
+	if (0 < mali_dedicated_mem_size && 0 != mali_dedicated_mem_start) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (dedicated: 0x%08X@0x%08X)\n",
+				     mali_dedicated_mem_size, mali_dedicated_mem_start));
+
+		/* Dedicated memory */
+		ret = mali_memory_core_resource_dedicated_memory(mali_dedicated_mem_start, mali_dedicated_mem_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register dedicated memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 < mali_shared_mem_size) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (shared: 0x%08X)\n", mali_shared_mem_size));
+
+		/* Shared OS memory */
+		ret = mali_memory_core_resource_os_memory(mali_shared_mem_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register shared OS memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 == mali_fb_start && 0 == mali_fb_size) {
+		/* Frame buffer settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+			/* Use device specific settings (if defined) */
+			mali_fb_start = data.fb_start;
+			mali_fb_size = data.fb_size;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Using device defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Using module defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	}
+
+	if (0 != mali_fb_size) {
+		/* Register frame buffer */
+		ret = mali_mem_validation_add_range(mali_fb_start, mali_fb_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register frame buffer memory region\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_detect_gpu_class(void)
+{
+	if (_mali_osk_identify_gpu_resource() == 0x450)
+		mali_gpu_class_is_mali450 = MALI_TRUE;
+
+	if (_mali_osk_identify_gpu_resource() == 0x470)
+		mali_gpu_class_is_mali470 = MALI_TRUE;
+}
+
+static _mali_osk_errcode_t mali_init_hw_reset(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	_mali_osk_resource_t resource_bcast;
+
+	/* Ensure broadcast unit is in a good state before we start creating
+	 * groups and cores.
+	 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast)) {
+		struct mali_bcast_unit *bcast_core;
+
+		bcast_core = mali_bcast_unit_create(&resource_bcast);
+		if (NULL == bcast_core) {
+			MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_bcast_unit_delete(bcast_core);
+	}
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_initialize_subsystems(void)
+{
+	_mali_osk_errcode_t err;
+
+#ifdef CONFIG_MALI_DT
+	err = _mali_osk_resource_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	mali_pp_job_initialize();
+
+	err = mali_timeline_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_session_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/*Try to init gpu secure mode */
+	_mali_osk_gpu_secure_mode_init();
+
+#if defined(CONFIG_MALI400_PROFILING)
+	err = _mali_osk_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (_MALI_OSK_ERR_OK != err) {
+		/* No biggie if we weren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	err = mali_memory_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_executor_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_scheduler_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Configure memory early, needed by mali_mmu_initialize. */
+	err = mali_parse_config_memory();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_set_global_gpu_base_address();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect GPU class (uses L2 cache count) */
+	mali_detect_gpu_class();
+
+	err = mali_check_shared_interrupts();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the MALI PMU (will not touch HW!) */
+	err = mali_parse_config_pmu();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the power management module */
+	err = mali_pm_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Make sure the entire GPU stays on for the rest of this function */
+	mali_pm_init_begin();
+
+	/* Ensure HW is in a good state before starting to access cores. */
+	err = mali_init_hw_reset();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect which Mali GPU we are dealing with */
+	err = mali_parse_product_info();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* The global_product_id is now populated with the correct Mali GPU */
+
+	/* Start configuring the actual Mali hardware. */
+
+	err = mali_mmu_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		err = mali_dlbu_initialize();
+		if (_MALI_OSK_ERR_OK != err) {
+			mali_pm_init_end();
+			mali_terminate_subsystems();
+			return err;
+		}
+	}
+
+	err = mali_parse_config_l2_cache();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_parse_config_groups();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Move groups into executor */
+	mali_executor_populate();
+
+	/* Need call after all group has assigned a domain */
+	mali_pm_power_cost_setup();
+
+	/* Initialize the GPU timer */
+	err = mali_control_timer_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the GPU utilization tracking */
+	err = mali_utilization_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	err = mali_dvfs_policy_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	/* Allowing the system to be turned off */
+	mali_pm_init_end();
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+void mali_terminate_subsystems(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(2, ("terminate_subsystems() called\n"));
+
+	mali_utilization_term();
+	mali_control_timer_term();
+
+	mali_executor_depopulate();
+	mali_delete_groups(); /* Delete groups not added to executor */
+	mali_executor_terminate();
+
+	mali_scheduler_terminate();
+	mali_pp_job_terminate();
+	mali_delete_l2_cache_cores();
+	mali_mmu_terminate();
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		mali_dlbu_terminate();
+	}
+
+	mali_pm_terminate();
+
+	if (NULL != pmu) {
+		mali_pmu_delete(pmu);
+	}
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_term();
+#endif
+
+	_mali_osk_gpu_secure_mode_deinit();
+
+	mali_memory_terminate();
+
+	mali_session_terminate();
+
+	mali_timeline_terminate();
+
+	global_gpu_base_address = 0;
+}
+
+_mali_product_id_t mali_kernel_core_get_product_id(void)
+{
+	return global_product_id;
+}
+
+u32 mali_kernel_core_get_gpu_major_version(void)
+{
+	return global_gpu_major_version;
+}
+
+u32 mali_kernel_core_get_gpu_minor_version(void)
+{
+	return global_gpu_minor_version;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (NULL == queue) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		args->type = _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS;
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* receive a notification, might sleep */
+	err = _mali_osk_notification_queue_receive(queue, &notification);
+	if (_MALI_OSK_ERR_OK != err) {
+		MALI_ERROR(err); /* errcode returned, pass on to caller */
+	}
+
+	/* copy the buffer to the user */
+	args->type = (_mali_uk_notification_type)notification->notification_type;
+	_mali_osk_memcpy(&args->data, notification->result_buffer, notification->result_buffer_size);
+
+	/* finished with the notification */
+	_mali_osk_notification_delete(notification);
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args)
+{
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (NULL == queue) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		return _MALI_OSK_ERR_OK;
+	}
+
+	notification = _mali_osk_notification_create(args->type, 0);
+	if (NULL == notification) {
+		MALI_PRINT_ERROR(("Failed to create notification object\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	_mali_osk_notification_queue_send(queue, notification);
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args)
+{
+	wait_queue_head_t *queue;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	queue = mali_session_get_wait_queue();
+
+	/* check pending big job number, might sleep if larger than MAX allowed number */
+	if (wait_event_interruptible(*queue, MALI_MAX_PENDING_BIG_JOB > mali_scheduler_job_gp_big_job_count())) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (!session->use_high_priority_job_queue) {
+		session->use_high_priority_job_queue = MALI_TRUE;
+		MALI_DEBUG_PRINT(2, ("Session 0x%08X with pid %d was granted higher priority.\n", session, _mali_osk_get_pid()));
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_open(void **context)
+{
+	u32 i;
+	struct mali_session_data *session;
+
+	/* allocated struct to track this session */
+	session = (struct mali_session_data *)_mali_osk_calloc(1, sizeof(struct mali_session_data));
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_NOMEM);
+
+	MALI_DEBUG_PRINT(3, ("Session starting\n"));
+
+	/* create a response queue for this session */
+	session->ioctl_queue = _mali_osk_notification_queue_init();
+	if (NULL == session->ioctl_queue) {
+		goto err;
+	}
+
+	/*create a wait queue for this session */
+	session->wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == session->wait_queue) {
+		goto err_wait_queue;
+	}
+
+	session->page_directory = mali_mmu_pagedir_alloc();
+	if (NULL == session->page_directory) {
+		goto err_mmu;
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_mmu_pagedir_map(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE)) {
+		MALI_PRINT_ERROR(("Failed to map DLBU page into session\n"));
+		goto err_mmu;
+	}
+
+	if (0 != mali_dlbu_phys_addr) {
+		mali_mmu_pagedir_update(session->page_directory, MALI_DLBU_VIRT_ADDR, mali_dlbu_phys_addr,
+					_MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_memory_session_begin(session)) {
+		goto err_session;
+	}
+
+	/* Create soft system. */
+	session->soft_job_system = mali_soft_job_system_create(session);
+	if (NULL == session->soft_job_system) {
+		goto err_soft;
+	}
+
+	/* Initialize the dma fence context.*/
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	session->fence_context = dma_fence_context_alloc(1);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+	session->fence_context = fence_context_alloc(1);
+	_mali_osk_atomic_init(&session->fence_seqno, 0);
+#else
+	MALI_PRINT_ERROR(("The kernel version not support dma fence!\n"));
+	goto err_time_line;
+#endif
+#endif
+
+	/* Create timeline system. */
+	session->timeline_system = mali_timeline_system_create(session);
+	if (NULL == session->timeline_system) {
+		goto err_time_line;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_init(&session->number_of_window_jobs, 0);
+#endif
+
+	_mali_osk_atomic_init(&session->number_of_pp_jobs, 0);
+
+	session->use_high_priority_job_queue = MALI_FALSE;
+
+	/* Initialize list of PP jobs on this session. */
+	_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_list);
+
+	/* Initialize the pp_job_fb_lookup_list array used to quickly lookup jobs from a given frame builder */
+	for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i) {
+		_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_fb_lookup_list[i]);
+	}
+
+	session->pid = _mali_osk_get_pid();
+	session->comm = _mali_osk_get_comm();
+	session->max_mali_mem_allocated_size = 0;
+	for (i = 0; i < MALI_MEM_TYPE_MAX; i ++) {
+		atomic_set(&session->mali_mem_array[i], 0);
+	}
+	atomic_set(&session->mali_mem_allocated_pages, 0);
+	*context = (void *)session;
+
+	/* Add session to the list of all sessions. */
+	mali_session_add(session);
+
+	MALI_DEBUG_PRINT(3, ("Session started\n"));
+	return _MALI_OSK_ERR_OK;
+
+err_time_line:
+	mali_soft_job_system_destroy(session->soft_job_system);
+err_soft:
+	mali_memory_session_end(session);
+err_session:
+	mali_mmu_pagedir_free(session->page_directory);
+err_mmu:
+	_mali_osk_wait_queue_term(session->wait_queue);
+err_wait_queue:
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+err:
+	_mali_osk_free(session);
+	MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+}
+
+#if defined(DEBUG)
+/* parameter used for debug */
+extern u32 num_pm_runtime_resume;
+extern u32 num_pm_updates;
+extern u32 num_pm_updates_up;
+extern u32 num_pm_updates_down;
+#endif
+
+_mali_osk_errcode_t _mali_ukk_close(void **context)
+{
+	struct mali_session_data *session;
+	MALI_CHECK_NON_NULL(context, _MALI_OSK_ERR_INVALID_ARGS);
+	session = (struct mali_session_data *)*context;
+
+	MALI_DEBUG_PRINT(3, ("Session ending\n"));
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+	MALI_DEBUG_ASSERT_POINTER(session->timeline_system);
+
+	/* Remove session from list of all sessions. */
+	mali_session_remove(session);
+
+	/* This flag is used to prevent queueing of jobs due to activation. */
+	session->is_aborting = MALI_TRUE;
+
+	/* Stop the soft job timer. */
+	mali_timeline_system_stop_timer(session->timeline_system);
+
+	/* Abort queued jobs */
+	mali_scheduler_abort_session(session);
+
+	/* Abort executing jobs */
+	mali_executor_abort_session(session);
+
+	/* Abort the soft job system. */
+	mali_soft_job_system_abort(session->soft_job_system);
+
+	/* Force execution of all pending bottom half processing for GP and PP. */
+	_mali_osk_wq_flush();
+
+	/* The session PP list should now be empty. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_list));
+
+	/* At this point the GP and PP scheduler no longer has any jobs queued or running from this
+	 * session, and all soft jobs in the soft job system has been destroyed. */
+
+	/* Any trackers left in the timeline system are directly or indirectly waiting on external
+	 * sync fences.  Cancel all sync fence waiters to trigger activation of all remaining
+	 * trackers.  This call will sleep until all timelines are empty. */
+	mali_timeline_system_abort(session->timeline_system);
+
+	/* Flush pending work.
+	 * Needed to make sure all bottom half processing related to this
+	 * session has been completed, before we free internal data structures.
+	 */
+	_mali_osk_wq_flush();
+
+	/* Destroy timeline system. */
+	mali_timeline_system_destroy(session->timeline_system);
+	session->timeline_system = NULL;
+
+	/* Destroy soft system. */
+	mali_soft_job_system_destroy(session->soft_job_system);
+	session->soft_job_system = NULL;
+
+	/*Wait for the session job lists become empty.*/
+	_mali_osk_wait_queue_wait_event(session->wait_queue, mali_session_pp_job_is_empty, (void *) session);
+
+	/* Free remaining memory allocated to this session */
+	mali_memory_session_end(session);
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_term(&session->number_of_window_jobs);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_stop_sampling(session->pid);
+#endif
+
+	/* Free session data structures */
+	mali_mmu_pagedir_unmap(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE);
+	mali_mmu_pagedir_free(session->page_directory);
+	_mali_osk_wait_queue_term(session->wait_queue);
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+	_mali_osk_free(session);
+
+	*context = NULL;
+
+	MALI_DEBUG_PRINT(3, ("Session has ended\n"));
+
+#if defined(DEBUG)
+	MALI_DEBUG_PRINT(3, ("Stats: # runtime resumes: %u\n", num_pm_runtime_resume));
+	MALI_DEBUG_PRINT(3, ("       # PM updates: .... %u (up %u, down %u)\n", num_pm_updates, num_pm_updates_up, num_pm_updates_down));
+
+	num_pm_runtime_resume = 0;
+	num_pm_updates = 0;
+	num_pm_updates_up = 0;
+	num_pm_updates_down = 0;
+#endif
+
+	return _MALI_OSK_ERR_OK;;
+}
+
+#if MALI_STATE_TRACKING
+u32 _mali_kernel_core_dump_state(char *buf, u32 size)
+{
+	int n = 0; /* Number of bytes written to buf */
+
+	n += mali_scheduler_dump_state(buf + n, size - n);
+	n += mali_executor_dump_state(buf + n, size - n);
+
+	return n;
+}
+#endif
diff --git a/mali/common/mali_kernel_core.h b/mali/common/mali_kernel_core.h
new file mode 100644
index 0000000..cf6af32
--- /dev/null
+++ b/mali/common/mali_kernel_core.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_CORE_H__
+#define __MALI_KERNEL_CORE_H__
+
+#include "mali_osk.h"
+
+typedef enum {
+	_MALI_PRODUCT_ID_UNKNOWN,
+	_MALI_PRODUCT_ID_MALI200,
+	_MALI_PRODUCT_ID_MALI300,
+	_MALI_PRODUCT_ID_MALI400,
+	_MALI_PRODUCT_ID_MALI450,
+	_MALI_PRODUCT_ID_MALI470,
+} _mali_product_id_t;
+
+extern mali_bool mali_gpu_class_is_mali450;
+extern mali_bool mali_gpu_class_is_mali470;
+
+_mali_osk_errcode_t mali_initialize_subsystems(void);
+
+void mali_terminate_subsystems(void);
+
+_mali_product_id_t mali_kernel_core_get_product_id(void);
+
+u32 mali_kernel_core_get_gpu_major_version(void);
+
+u32 mali_kernel_core_get_gpu_minor_version(void);
+
+u32 _mali_kernel_core_dump_state(char *buf, u32 size);
+
+MALI_STATIC_INLINE mali_bool mali_is_mali470(void)
+{
+	return mali_gpu_class_is_mali470;
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali450(void)
+{
+	return mali_gpu_class_is_mali450;
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali400(void)
+{
+	if (mali_gpu_class_is_mali450 || mali_gpu_class_is_mali470)
+		return MALI_FALSE;
+
+	return MALI_TRUE;
+}
+#endif /* __MALI_KERNEL_CORE_H__ */
diff --git a/mali/common/mali_kernel_utilization.c b/mali/common/mali_kernel_utilization.c
new file mode 100755
index 0000000..ca7ebea
--- /dev/null
+++ b/mali/common/mali_kernel_utilization.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_scheduler.h"
+
+#include "mali_executor.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+/* Thresholds for GP bound detection. */
+#define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
+#define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
+
+static _mali_osk_spinlock_irq_t *utilization_data_lock;
+
+static u32 num_running_gp_cores = 0;
+static u32 num_running_pp_cores = 0;
+
+static u64 work_start_time_gpu = 0;
+static u64 work_start_time_gp = 0;
+static u64 work_start_time_pp = 0;
+static u64 accumulated_work_time_gpu = 0;
+static u64 accumulated_work_time_gp = 0;
+static u64 accumulated_work_time_pp = 0;
+
+static u32 last_utilization_gpu = 0 ;
+static u32 last_utilization_gp = 0 ;
+static u32 last_utilization_pp = 0 ;
+
+void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
+
+/* Define the first timer control timer timeout in milliseconds */
+static u32 mali_control_first_timeout = 100;
+static struct mali_gpu_utilization_data mali_util_data = {0, };
+
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer)
+{
+	u64 time_now;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 work_normalized_gpu;
+	u32 work_normalized_gp;
+	u32 work_normalized_pp;
+	u32 period_normalized;
+	u32 utilization_gpu;
+	u32 utilization_gp;
+	u32 utilization_pp;
+
+	mali_utilization_data_lock();
+
+	time_now = _mali_osk_time_get_ns();
+
+	*time_period = time_now - *start_time;
+
+	if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
+		mali_control_timer_pause();
+		/*
+		 * No work done for this period
+		 * - No need to reschedule timer
+		 * - Report zero usage
+		 */
+		last_utilization_gpu = 0;
+		last_utilization_gp = 0;
+		last_utilization_pp = 0;
+
+		mali_util_data.utilization_gpu = last_utilization_gpu;
+		mali_util_data.utilization_gp = last_utilization_gp;
+		mali_util_data.utilization_pp = last_utilization_pp;
+
+		mali_utilization_data_unlock();
+
+		*need_add_timer = MALI_FALSE;
+
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+
+		MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
+		MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
+		MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
+
+		return &mali_util_data;
+	}
+
+	/* If we are currently busy, update working period up to now */
+	if (work_start_time_gpu != 0) {
+		accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+		work_start_time_gpu = time_now;
+
+		/* GP and/or PP will also be busy if the GPU is busy at this point */
+
+		if (work_start_time_gp != 0) {
+			accumulated_work_time_gp += (time_now - work_start_time_gp);
+			work_start_time_gp = time_now;
+		}
+
+		if (work_start_time_pp != 0) {
+			accumulated_work_time_pp += (time_now - work_start_time_pp);
+			work_start_time_pp = time_now;
+		}
+	}
+
+	/*
+	 * We have two 64-bit values, a dividend and a divisor.
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 * We shift the dividend up and possibly the divisor down, making the result X in 256.
+	 */
+
+	/* Shift the 64-bit values down so they fit inside a 32-bit integer */
+	leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32));
+	shift_val = 32 - leading_zeroes;
+	work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
+	work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
+	work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
+	period_normalized = (u32)(*time_period >> shift_val);
+
+	/*
+	 * Now, we should report the usage in parts of 256
+	 * this means we must shift up the dividend or down the divisor by 8
+	 * (we could do a combination, but we just use one for simplicity,
+	 * but the end result should be good enough anyway)
+	 */
+	if (period_normalized > 0x00FFFFFF) {
+		/* The divisor is so big that it is safe to shift it down */
+		period_normalized >>= 8;
+	} else {
+		/*
+		 * The divisor is so small that we can shift up the dividend, without loosing any data.
+		 * (dividend is always smaller than the divisor)
+		 */
+		work_normalized_gpu <<= 8;
+		work_normalized_gp <<= 8;
+		work_normalized_pp <<= 8;
+	}
+
+	utilization_gpu = work_normalized_gpu / period_normalized;
+	utilization_gp = work_normalized_gp / period_normalized;
+	utilization_pp = work_normalized_pp / period_normalized;
+
+	last_utilization_gpu = utilization_gpu;
+	last_utilization_gp = utilization_gp;
+	last_utilization_pp = utilization_pp;
+
+	if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) &&
+	    (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) {
+		mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND);
+	} else {
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+	}
+
+	/* starting a new period */
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	*start_time = time_now;
+
+	mali_util_data.utilization_gp = last_utilization_gp;
+	mali_util_data.utilization_gpu = last_utilization_gpu;
+	mali_util_data.utilization_pp = last_utilization_pp;
+
+	mali_utilization_data_unlock();
+
+	*need_add_timer = MALI_TRUE;
+
+	MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
+	MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
+	MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
+
+	return &mali_util_data;
+}
+
+_mali_osk_errcode_t mali_utilization_init(void)
+{
+#if USING_GPU_UTILIZATION
+	_mali_osk_device_data data;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if (NULL != data.utilization_callback) {
+			mali_utilization_callback = data.utilization_callback;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed \n"));
+		}
+	}
+#endif /* defined(USING_GPU_UTILIZATION) */
+
+	if (NULL == mali_utilization_callback) {
+		MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n"));
+	}
+
+	utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
+	if (NULL == utilization_data_lock) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	num_running_gp_cores = 0;
+	num_running_pp_cores = 0;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_utilization_term(void)
+{
+	if (NULL != utilization_data_lock) {
+		_mali_osk_spinlock_irq_term(utilization_data_lock);
+	}
+}
+
+void mali_utilization_gp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_gp_cores;
+	if (1 == num_running_gp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First GP core started, consider GP busy from now and onwards */
+		work_start_time_gp = time_now;
+
+		if (0 == num_running_pp_cores) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			is_resume  = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+				/* Do some policy in new period for performance consideration */
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (0 == last_utilization_gpu) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_pp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_pp_cores;
+	if (1 == num_running_pp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First PP core started, consider PP busy from now and onwards */
+		work_start_time_pp = time_now;
+
+		if (0 == num_running_gp_cores) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			/* Start a new period if stoped */
+			is_resume = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (0 == last_utilization_gpu) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_gp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_gp_cores;
+	if (0 == num_running_gp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last GP core ended, consider GP idle from now and onwards */
+		accumulated_work_time_gp += (time_now - work_start_time_gp);
+		work_start_time_gp = 0;
+
+		if (0 == num_running_pp_cores) {
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+void mali_utilization_pp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_pp_cores;
+	if (0 == num_running_pp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last PP core ended, consider PP idle from now and onwards */
+		accumulated_work_time_pp += (time_now - work_start_time_pp);
+		work_start_time_pp = 0;
+
+		if (0 == num_running_gp_cores) {
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+mali_bool mali_utilization_enabled(void)
+{
+#if defined(CONFIG_MALI_DVFS)
+	return mali_dvfs_policy_enabled();
+#else
+	return (NULL != mali_utilization_callback);
+#endif /* defined(CONFIG_MALI_DVFS) */
+}
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data)
+{
+	MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback);
+
+	mali_utilization_callback(util_data);
+}
+
+void mali_utilization_reset(void)
+{
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	last_utilization_gpu = 0;
+	last_utilization_gp = 0;
+	last_utilization_pp = 0;
+}
+
+void mali_utilization_data_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(utilization_data_lock);
+}
+
+void mali_utilization_data_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(utilization_data_lock);
+}
+
+void mali_utilization_data_assert_locked(void)
+{
+	MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock);
+}
+
+u32 _mali_ukk_utilization_gp_pp(void)
+{
+	return last_utilization_gpu;
+}
+
+u32 _mali_ukk_utilization_gp(void)
+{
+	return last_utilization_gp;
+}
+
+u32 _mali_ukk_utilization_pp(void)
+{
+	return last_utilization_pp;
+}
diff --git a/mali/common/mali_kernel_utilization.h b/mali/common/mali_kernel_utilization.h
new file mode 100755
index 0000000..5206225
--- /dev/null
+++ b/mali/common/mali_kernel_utilization.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_UTILIZATION_H__
+#define __MALI_KERNEL_UTILIZATION_H__
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_osk.h"
+
+/**
+ * Initialize/start the Mali GPU utilization metrics reporting.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_utilization_init(void);
+
+/**
+ * Terminate the Mali GPU utilization metrics reporting
+ */
+void mali_utilization_term(void);
+
+/**
+ * Check if Mali utilization is enabled
+ */
+mali_bool mali_utilization_enabled(void);
+
+/**
+ * Should be called when a job is about to execute a GP job
+ */
+void mali_utilization_gp_start(void);
+
+/**
+ * Should be called when a job has completed executing a GP job
+ */
+void mali_utilization_gp_end(void);
+
+/**
+ * Should be called when a job is about to execute a PP job
+ */
+void mali_utilization_pp_start(void);
+
+/**
+ * Should be called when a job has completed executing a PP job
+ */
+void mali_utilization_pp_end(void);
+
+/**
+ * Should be called to calcution the GPU utilization
+ */
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer);
+
+_mali_osk_spinlock_irq_t *mali_utilization_get_lock(void);
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data);
+
+void mali_utilization_data_lock(void);
+
+void mali_utilization_data_unlock(void);
+
+void mali_utilization_data_assert_locked(void);
+
+void mali_utilization_reset(void);
+
+
+#endif /* __MALI_KERNEL_UTILIZATION_H__ */
diff --git a/mali/common/mali_kernel_vsync.c b/mali/common/mali_kernel_vsync.c
new file mode 100755
index 0000000..3b2b108
--- /dev/null
+++ b/mali/common/mali_kernel_vsync.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+#include "mali_osk_profiling.h"
+
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args)
+{
+	_mali_uk_vsync_event event = (_mali_uk_vsync_event)args->event;
+	MALI_IGNORE(event); /* event is not used for release code, and that is OK */
+
+	/*
+	 * Manually generate user space events in kernel space.
+	 * This saves user space from calling kernel space twice in this case.
+	 * We just need to remember to add pid and tid manually.
+	 */
+	if (event == _MALI_UK_VSYNC_EVENT_BEGIN_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+	if (event == _MALI_UK_VSYNC_EVENT_END_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("Received VSYNC event: %d\n", event));
+	MALI_SUCCESS;
+}
+
diff --git a/mali/common/mali_l2_cache.c b/mali/common/mali_l2_cache.c
new file mode 100755
index 0000000..60c3957
--- /dev/null
+++ b/mali/common/mali_l2_cache.c
@@ -0,0 +1,540 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_scheduler.h"
+#include "mali_pm.h"
+#include "mali_pm_domain.h"
+
+/**
+ * Size of the Mali L2 cache registers in bytes
+ */
+#define MALI400_L2_CACHE_REGISTERS_SIZE 0x30
+
+/**
+ * Mali L2 cache register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_l2_cache_register {
+	MALI400_L2_CACHE_REGISTER_SIZE         = 0x0004,
+	MALI400_L2_CACHE_REGISTER_STATUS       = 0x0008,
+	/*unused                               = 0x000C */
+	MALI400_L2_CACHE_REGISTER_COMMAND      = 0x0010,
+	MALI400_L2_CACHE_REGISTER_CLEAR_PAGE   = 0x0014,
+	MALI400_L2_CACHE_REGISTER_MAX_READS    = 0x0018,
+	MALI400_L2_CACHE_REGISTER_ENABLE       = 0x001C,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0 = 0x0020,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0 = 0x0024,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1 = 0x0028,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1 = 0x002C,
+} mali_l2_cache_register;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_command {
+	MALI400_L2_CACHE_COMMAND_CLEAR_ALL = 0x01,
+} mali_l2_cache_command;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_enable {
+	MALI400_L2_CACHE_ENABLE_DEFAULT = 0x0, /* Default */
+	MALI400_L2_CACHE_ENABLE_ACCESS = 0x01,
+	MALI400_L2_CACHE_ENABLE_READ_ALLOCATE = 0x02,
+} mali_l2_cache_enable;
+
+/**
+ * Mali L2 cache status bits
+ */
+typedef enum mali_l2_cache_status {
+	MALI400_L2_CACHE_STATUS_COMMAND_BUSY = 0x01,
+	MALI400_L2_CACHE_STATUS_DATA_BUSY    = 0x02,
+} mali_l2_cache_status;
+
+#define MALI400_L2_MAX_READS_NOT_SET -1
+
+static struct mali_l2_cache_core *
+	mali_global_l2s[MALI_MAX_NUMBER_OF_L2_CACHE_CORES] = { NULL, };
+static u32 mali_global_num_l2s = 0;
+
+int mali_l2_max_reads = MALI400_L2_MAX_READS_NOT_SET;
+
+
+/* Local helper functions */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache);
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val);
+
+static void mali_l2_cache_lock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_lock(cache->lock);
+}
+
+static void mali_l2_cache_unlock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_unlock(cache->lock);
+}
+
+/* Implementation of the L2 cache interface */
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *cache = NULL;
+#if defined(DEBUG)
+	u32 cache_size;
+#endif
+
+	MALI_DEBUG_PRINT(4, ("Mali L2 cache: Creating Mali L2 cache: %s\n",
+			     resource->description));
+
+	if (mali_global_num_l2s >= MALI_MAX_NUMBER_OF_L2_CACHE_CORES) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Too many L2 caches\n"));
+		return NULL;
+	}
+
+	cache = _mali_osk_malloc(sizeof(struct mali_l2_cache_core));
+	if (NULL == cache) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to allocate memory for L2 cache core\n"));
+		return NULL;
+	}
+
+	cache->core_id =  mali_global_num_l2s;
+	cache->counter_src0 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_src1 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_value0_base = 0;
+	cache->counter_value1_base = 0;
+	cache->pm_domain = NULL;
+	cache->power_is_on = MALI_FALSE;
+	cache->last_invalidated_id = 0;
+
+	if (_MALI_OSK_ERR_OK != mali_hw_core_create(&cache->hw_core,
+			resource, MALI400_L2_CACHE_REGISTERS_SIZE)) {
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	cache_size = mali_hw_core_register_read(&cache->hw_core,
+						MALI400_L2_CACHE_REGISTER_SIZE);
+	MALI_DEBUG_PRINT(2, ("Mali L2 cache: Created %s: % 3uK, %u-way, % 2ubyte cache line, % 3ubit external bus\n",
+			     resource->description,
+			     1 << (((cache_size >> 16) & 0xff) - 10),
+			     1 << ((cache_size >> 8) & 0xff),
+			     1 << (cache_size & 0xff),
+			     1 << ((cache_size >> 24) & 0xff)));
+#endif
+
+	cache->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_L2);
+	if (NULL == cache->lock) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to create counter lock for L2 cache core %s\n",
+				  cache->hw_core.description));
+		mali_hw_core_delete(&cache->hw_core);
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+	/* register with correct power domain */
+	cache->pm_domain = mali_pm_register_l2_cache(
+				   domain_index, cache);
+
+	mali_global_l2s[mali_global_num_l2s] = cache;
+	mali_global_num_l2s++;
+
+	return cache;
+}
+
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		if (mali_global_l2s[i] != cache) {
+			continue;
+		}
+
+		mali_global_l2s[i] = NULL;
+		mali_global_num_l2s--;
+
+		if (i == mali_global_num_l2s) {
+			/* Removed last element, nothing more to do */
+			break;
+		}
+
+		/*
+		 * We removed a l2 cache from the middle of the array,
+		 * so move the last l2 cache to current position
+		 */
+		mali_global_l2s[i] = mali_global_l2s[mali_global_num_l2s];
+		mali_global_l2s[mali_global_num_l2s] = NULL;
+
+		/* All good */
+		break;
+	}
+
+	_mali_osk_spinlock_irq_term(cache->lock);
+	mali_hw_core_delete(&cache->hw_core);
+	_mali_osk_free(cache);
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	mali_l2_cache_reset(cache);
+
+	if ((1 << MALI_DOMAIN_INDEX_DUMMY) != cache->pm_domain->pmu_mask)
+		MALI_DEBUG_ASSERT(MALI_FALSE == cache->power_is_on);
+	cache->power_is_on = MALI_TRUE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == cache->power_is_on);
+
+	/*
+	 * The HW counters will start from zero again when we resume,
+	 * but we should report counters as always increasing.
+	 * Take a copy of the HW values now in order to add this to
+	 * the values we report after being powered up.
+	 *
+	 * The physical power off of the L2 cache might be outside our
+	 * own control (e.g. runtime PM). That is why we must manually
+	 * set set the counter value to zero as well.
+	 */
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value0_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0, 0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value1_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1, 0);
+	}
+
+
+	cache->power_is_on = MALI_FALSE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter)
+{
+	u32 reg_offset_src;
+	u32 reg_offset_val;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(source_id >= 0 && source_id <= 1);
+
+	mali_l2_cache_lock(cache);
+
+	if (0 == source_id) {
+		/* start counting from 0 */
+		cache->counter_value0_base = 0;
+		cache->counter_src0 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0;
+	} else {
+		/* start counting from 0 */
+		cache->counter_value1_base = 0;
+		cache->counter_src1 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1;
+	}
+
+	if (cache->power_is_on) {
+		u32 hw_src;
+
+		if (MALI_HW_CORE_NO_COUNTER != counter) {
+			hw_src = counter;
+		} else {
+			hw_src = 0; /* disable value for HW */
+		}
+
+		/* Set counter src */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_src, hw_src);
+
+		/* Make sure the HW starts counting from 0 again */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_val, 0);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(NULL != src0);
+	MALI_DEBUG_ASSERT(NULL != value0);
+	MALI_DEBUG_ASSERT(NULL != src1);
+	MALI_DEBUG_ASSERT(NULL != value1);
+
+	mali_l2_cache_lock(cache);
+
+	*src0 = cache->counter_src0;
+	*src1 = cache->counter_src1;
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		if (MALI_TRUE == cache->power_is_on) {
+			*value0 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		} else {
+			*value0 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value0 += cache->counter_value0_base;
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		if (MALI_TRUE == cache->power_is_on) {
+			*value1 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		} else {
+			*value1 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value1 += cache->counter_value1_base;
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index)
+{
+	if (mali_global_num_l2s > index) {
+		return mali_global_l2s[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void)
+{
+	return mali_global_num_l2s;
+}
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (NULL == cache) {
+		return;
+	}
+
+	mali_l2_cache_lock(cache);
+
+	cache->last_invalidated_id = mali_scheduler_get_new_cache_order();
+	mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+				   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (NULL == cache) {
+		return;
+	}
+
+	/*
+	 * If the last cache invalidation was done by a job with a higher id we
+	 * don't have to flush. Since user space will store jobs w/ their
+	 * corresponding memory in sequence (first job #0, then job #1, ...),
+	 * we don't have to flush for job n-1 if job n has already invalidated
+	 * the cache since we know for sure that job n-1's memory was already
+	 * written when job n was started.
+	 */
+
+	mali_l2_cache_lock(cache);
+
+	if (((s32)id) > ((s32)cache->last_invalidated_id)) {
+		/* Set latest invalidated id to current "point in time" */
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+		mali_l2_cache_send_command(cache,
+					   MALI400_L2_CACHE_REGISTER_COMMAND,
+					   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_all(void)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		_mali_osk_errcode_t ret;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (MALI_TRUE != cache->power_is_on) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+
+		ret = mali_l2_cache_send_command(cache,
+						 MALI400_L2_CACHE_REGISTER_COMMAND,
+						 MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to invalidate cache\n"));
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		u32 j;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (MALI_TRUE != cache->power_is_on) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		for (j = 0; j < num_pages; j++) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_l2_cache_send_command(cache,
+							 MALI400_L2_CACHE_REGISTER_CLEAR_PAGE,
+							 pages[j]);
+			if (_MALI_OSK_ERR_OK != ret) {
+				MALI_PRINT_ERROR(("Failed to invalidate cache (page)\n"));
+			}
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+/* -------- local helper functions below -------- */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache)
+{
+    MALI_DEBUG_ASSERT_POINTER(cache);
+    MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+    /* Kasin Added, skip off power domain. */
+    if (cache && cache->pm_domain && cache->pm_domain->power_is_on == MALI_FALSE) {
+        printk("===========%s, %d skip off power domain?\n", __FUNCTION__, __LINE__);
+    }
+
+
+    /* Invalidate cache (just to keep it in a known state at startup) */
+    mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+            MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+    /* Enable cache */
+    mali_hw_core_register_write(&cache->hw_core,
+            MALI400_L2_CACHE_REGISTER_ENABLE,
+            (u32)MALI400_L2_CACHE_ENABLE_ACCESS |
+            (u32)MALI400_L2_CACHE_ENABLE_READ_ALLOCATE);
+
+	if (MALI400_L2_MAX_READS_NOT_SET != mali_l2_max_reads) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_MAX_READS,
+					    (u32)mali_l2_max_reads);
+	}
+
+	/* Restart any performance counters (if enabled) */
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0,
+					    cache->counter_src0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1,
+					    cache->counter_src1);
+	}
+}
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val)
+{
+	int i = 0;
+	const int loop_count = 100000;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+	/*
+	 * First, wait for L2 cache command handler to go idle.
+	 * (Commands received while processing another command will be ignored)
+	 */
+	for (i = 0; i < loop_count; i++) {
+		if (!(mali_hw_core_register_read(&cache->hw_core,
+						 MALI400_L2_CACHE_REGISTER_STATUS) &
+		      (u32)MALI400_L2_CACHE_STATUS_COMMAND_BUSY)) {
+			break;
+		}
+	}
+
+	if (i == loop_count) {
+		MALI_DEBUG_PRINT(1, ("Mali L2 cache: aborting wait for command interface to go idle\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* then issue the command */
+	mali_hw_core_register_write(&cache->hw_core, reg, val);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/mali/common/mali_l2_cache.h b/mali/common/mali_l2_cache.h
new file mode 100755
index 0000000..de92ad6
--- /dev/null
+++ b/mali/common/mali_l2_cache.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_L2_CACHE_H__
+#define __MALI_KERNEL_L2_CACHE_H__
+
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+#define MALI_MAX_NUMBER_OF_L2_CACHE_CORES  3
+/* Maximum 1 GP and 4 PP for an L2 cache core (Mali-400 MP4) */
+#define MALI_MAX_NUMBER_OF_GROUPS_PER_L2_CACHE 5
+
+/**
+ * Definition of the L2 cache core struct
+ * Used to track a L2 cache unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_l2_cache_core {
+	/* Common HW core functionality */
+	struct mali_hw_core hw_core;
+
+	/* Synchronize L2 cache access */
+	_mali_osk_spinlock_irq_t *lock;
+
+	/* Unique core ID */
+	u32 core_id;
+
+	/* The power domain this L2 cache belongs to */
+	struct mali_pm_domain *pm_domain;
+
+	/* MALI_TRUE if power is on for this L2 cache */
+	mali_bool power_is_on;
+
+	/* A "timestamp" to avoid unnecessary flushes */
+	u32 last_invalidated_id;
+
+	/* Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src0;
+
+	/* Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src1;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value0_base;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value1_base;
+
+	/* Used by PM domains to link L2 caches of same domain */
+	_mali_osk_list_t pm_domain_list;
+};
+
+_mali_osk_errcode_t mali_l2_cache_initialize(void);
+void mali_l2_cache_terminate(void);
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index);
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_get_id(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->core_id;
+}
+
+MALI_STATIC_INLINE struct mali_pm_domain *mali_l2_cache_get_pm_domain(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->pm_domain;
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache);
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache);
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src0(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src1(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src1;
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1);
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index);
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void);
+
+struct mali_group *mali_l2_cache_get_group(
+	struct mali_l2_cache_core *cache, u32 index);
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache);
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id);
+
+void mali_l2_cache_invalidate_all(void);
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages);
+
+#endif /* __MALI_KERNEL_L2_CACHE_H__ */
diff --git a/mali/common/mali_mem_validation.c b/mali/common/mali_mem_validation.c
new file mode 100644
index 0000000..dddfb58
--- /dev/null
+++ b/mali/common/mali_mem_validation.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_mem_validation.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#define MALI_INVALID_MEM_ADDR 0xFFFFFFFF
+
+typedef struct {
+	u32 phys_base;        /**< Mali physical base of the memory, page aligned */
+	u32 size;             /**< size in bytes of the memory, multiple of page size */
+} _mali_mem_validation_t;
+
+static _mali_mem_validation_t mali_mem_validator = { MALI_INVALID_MEM_ADDR, MALI_INVALID_MEM_ADDR };
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size)
+{
+	/* Check that no other MEM_VALIDATION resources exist */
+	if (MALI_INVALID_MEM_ADDR != mali_mem_validator.phys_base) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; another range is already specified\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Check restrictions on page alignment */
+	if ((0 != (start & (~_MALI_OSK_CPU_PAGE_MASK))) ||
+	    (0 != (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; incorrect alignment\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_mem_validator.phys_base = start;
+	mali_mem_validator.size = size;
+	MALI_DEBUG_PRINT(2, ("Memory Validator installed for Mali physical address base=0x%08X, size=0x%08X\n",
+			     mali_mem_validator.phys_base, mali_mem_validator.size));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size)
+{
+#if 0
+	if (phys_addr < (phys_addr + size)) { /* Don't allow overflow (or zero size) */
+		if ((0 == (phys_addr & (~_MALI_OSK_CPU_PAGE_MASK))) &&
+		    (0 == (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+			if ((phys_addr          >= mali_mem_validator.phys_base) &&
+			    ((phys_addr + (size - 1)) >= mali_mem_validator.phys_base) &&
+			    (phys_addr          <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1))) &&
+			    ((phys_addr + (size - 1)) <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1)))) {
+				MALI_DEBUG_PRINT(3, ("Accepted range 0x%08X + size 0x%08X (= 0x%08X)\n", phys_addr, size, (phys_addr + size - 1)));
+				return _MALI_OSK_ERR_OK;
+			}
+		}
+	}
+
+	MALI_PRINT_ERROR(("MALI PHYSICAL RANGE VALIDATION ERROR: The range supplied was: phys_base=0x%08X, size=0x%08X\n", phys_addr, size));
+
+	return _MALI_OSK_ERR_FAULT;
+#else
+	return _MALI_OSK_ERR_OK;
+#endif
+}
diff --git a/mali/common/mali_mem_validation.h b/mali/common/mali_mem_validation.h
new file mode 100644
index 0000000..f7e6cb1
--- /dev/null
+++ b/mali/common/mali_mem_validation.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2011-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEM_VALIDATION_H__
+#define __MALI_MEM_VALIDATION_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size);
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size);
+
+#endif /* __MALI_MEM_VALIDATION_H__ */
diff --git a/mali/common/mali_mmu.c b/mali/common/mali_mmu.c
new file mode 100755
index 0000000..c08ce1f
--- /dev/null
+++ b/mali/common/mali_mmu.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_ukk.h"
+
+#include "mali_mmu.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_mmu_page_directory.h"
+
+/**
+ * Size of the MMU registers in bytes
+ */
+#define MALI_MMU_REGISTERS_SIZE 0x24
+
+/**
+ * MMU commands
+ * These are the commands that can be sent
+ * to the MMU unit.
+ */
+typedef enum mali_mmu_command {
+	MALI_MMU_COMMAND_ENABLE_PAGING = 0x00, /**< Enable paging (memory translation) */
+	MALI_MMU_COMMAND_DISABLE_PAGING = 0x01, /**< Disable paging (memory translation) */
+	MALI_MMU_COMMAND_ENABLE_STALL = 0x02, /**<  Enable stall on page fault */
+	MALI_MMU_COMMAND_DISABLE_STALL = 0x03, /**< Disable stall on page fault */
+	MALI_MMU_COMMAND_ZAP_CACHE = 0x04, /**< Zap the entire page table cache */
+	MALI_MMU_COMMAND_PAGE_FAULT_DONE = 0x05, /**< Page fault processed */
+	MALI_MMU_COMMAND_HARD_RESET = 0x06 /**< Reset the MMU back to power-on settings */
+} mali_mmu_command;
+
+static void mali_mmu_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data);
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu);
+
+/* page fault queue flush helper pages
+ * note that the mapping pointers are currently unused outside of the initialization functions */
+static mali_dma_addr mali_page_fault_flush_page_directory = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_directory_mapping = NULL;
+static mali_dma_addr mali_page_fault_flush_page_table = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_table_mapping = NULL;
+static mali_dma_addr mali_page_fault_flush_data_page = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_data_page_mapping = NULL;
+
+/* an empty page directory (no address valid) which is active on any MMU not currently marked as in use */
+static mali_dma_addr mali_empty_page_directory_phys   = MALI_INVALID_PAGE;
+static mali_io_address mali_empty_page_directory_virt = NULL;
+
+
+_mali_osk_errcode_t mali_mmu_initialize(void)
+{
+	/* allocate the helper pages */
+	mali_empty_page_directory_phys = mali_allocate_empty_page(&mali_empty_page_directory_virt);
+	if (0 == mali_empty_page_directory_phys) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate empty page directory.\n"));
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_create_fault_flush_pages(&mali_page_fault_flush_page_directory,
+			&mali_page_fault_flush_page_directory_mapping,
+			&mali_page_fault_flush_page_table,
+			&mali_page_fault_flush_page_table_mapping,
+			&mali_page_fault_flush_data_page,
+			&mali_page_fault_flush_data_page_mapping)) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate fault flush pages\n"));
+		mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		mali_empty_page_directory_virt = NULL;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mmu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali MMU: terminating\n"));
+
+	/* Free global helper pages */
+	mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+	mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+	mali_empty_page_directory_virt = NULL;
+
+	/* Free the page fault flush pages */
+	mali_destroy_fault_flush_pages(&mali_page_fault_flush_page_directory,
+				       &mali_page_fault_flush_page_directory_mapping,
+				       &mali_page_fault_flush_page_table,
+				       &mali_page_fault_flush_page_table_mapping,
+				       &mali_page_fault_flush_data_page,
+				       &mali_page_fault_flush_data_page_mapping);
+}
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual)
+{
+	struct mali_mmu_core *mmu = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+
+	MALI_DEBUG_PRINT(2, ("Mali MMU: Creating Mali MMU: %s\n", resource->description));
+
+	mmu = _mali_osk_calloc(1, sizeof(struct mali_mmu_core));
+	if (NULL != mmu) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&mmu->hw_core, resource, MALI_MMU_REGISTERS_SIZE)) {
+			if (_MALI_OSK_ERR_OK == mali_group_add_mmu_core(group, mmu)) {
+				if (is_virtual) {
+					/* Skip reset and IRQ setup for virtual MMU */
+					return mmu;
+				}
+
+				if (_MALI_OSK_ERR_OK == mali_mmu_reset(mmu)) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					mmu->irq = _mali_osk_irq_init(resource->irq,
+								      mali_group_upper_half_mmu,
+								      group,
+								      mali_mmu_probe_trigger,
+								      mali_mmu_probe_ack,
+								      mmu,
+								      resource->description);
+					if (NULL != mmu->irq) {
+						return mmu;
+					} else {
+						MALI_PRINT_ERROR(("Mali MMU: Failed to setup interrupt handlers for MMU %s\n", mmu->hw_core.description));
+					}
+				}
+				mali_group_remove_mmu_core(group);
+			} else {
+				MALI_PRINT_ERROR(("Mali MMU: Failed to add core %s to group\n", mmu->hw_core.description));
+			}
+			mali_hw_core_delete(&mmu->hw_core);
+		}
+
+		_mali_osk_free(mmu);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for MMU\n"));
+	}
+
+	return NULL;
+}
+
+void mali_mmu_delete(struct mali_mmu_core *mmu)
+{
+	if (NULL != mmu->irq) {
+		_mali_osk_irq_term(mmu->irq);
+	}
+
+	mali_hw_core_delete(&mmu->hw_core);
+	_mali_osk_free(mmu);
+}
+
+static void mali_mmu_enable_paging(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_PAGING);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS) & MALI_MMU_STATUS_BIT_PAGING_ENABLED) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Enable paging request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+	}
+}
+
+/**
+ * Issues the enable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ * @return MALI_TRUE if HW stall was successfully engaged, otherwise MALI_FALSE (req timed out)
+ */
+static mali_bool mali_mmu_enable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(4, ("MMU stall is implicit when Paging is not enabled.\n"));
+		return MALI_TRUE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(3, ("Aborting MMU stall request since it is in pagefault state.\n"));
+		return MALI_FALSE;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if ((mmu_status & MALI_MMU_STATUS_BIT_STALL_ACTIVE) && (0 == (mmu_status & MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE))) {
+			break;
+		}
+		if (0 == (mmu_status & (MALI_MMU_STATUS_BIT_PAGING_ENABLED))) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_DEBUG_PRINT(2, ("Enable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return MALI_FALSE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU stall request since it has a pagefault.\n"));
+		return MALI_FALSE;
+	}
+
+	return MALI_TRUE;
+}
+
+/**
+ * Issues the disable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ */
+static void mali_mmu_disable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(3, ("MMU disable skipped since it was not enabled.\n"));
+		return;
+	}
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU disable stall request since it is in pagefault state.\n"));
+		return;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_DISABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		u32 status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (0 == (status & MALI_MMU_STATUS_BIT_STALL_ACTIVE)) {
+			break;
+		}
+		if (status &  MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) MALI_DEBUG_PRINT(1, ("Disable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(4, ("Mali MMU: %s: Leaving page fault mode\n", mmu->hw_core.description));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_PAGE_FAULT_DONE);
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, 0xCAFEBABE);
+	MALI_DEBUG_ASSERT(0xCAFEB000 == mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_HARD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR) == 0) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Reset request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	if (!stall_success) {
+		err = _MALI_OSK_ERR_BUSY;
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali MMU: mali_kernel_mmu_reset: %s\n", mmu->hw_core.description));
+
+	if (_MALI_OSK_ERR_OK == mali_mmu_raw_reset(mmu)) {
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+		/* no session is active, so just activate the empty page directory */
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, mali_empty_page_directory_phys);
+		mali_mmu_enable_paging(mmu);
+		err = _MALI_OSK_ERR_OK;
+	}
+	mali_mmu_disable_stall(mmu);
+
+	return err;
+}
+
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success = mali_mmu_enable_stall(mmu);
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+	if (MALI_FALSE == stall_success) {
+		/* False means that it is in Pagefault state. Not possible to disable_stall then */
+		return MALI_FALSE;
+	}
+
+	mali_mmu_disable_stall(mmu);
+	return MALI_TRUE;
+}
+
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+}
+
+
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_ZAP_ONE_LINE, MALI_MMU_PDE_ENTRY(mali_address));
+}
+
+static void mali_mmu_activate_address_space(struct mali_mmu_core *mmu, u32 page_directory)
+{
+	/* The MMU must be in stalled or page fault mode, for this writing to work */
+	MALI_DEBUG_ASSERT(0 != (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)
+				& (MALI_MMU_STATUS_BIT_STALL_ACTIVE | MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE)));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, page_directory);
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+}
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir)
+{
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(5, ("Asked to activate page directory 0x%x on MMU %s\n", pagedir, mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+	mali_mmu_activate_address_space(mmu, pagedir->page_directory);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+	MALI_DEBUG_PRINT(3, ("Activating the empty page directory on MMU %s\n", mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+
+	/* This function can only be called when the core is idle, so it could not fail. */
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+
+	mali_mmu_activate_address_space(mmu, mali_empty_page_directory_phys);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(3, ("Activating the page fault flush page directory on MMU %s\n", mmu->hw_core.description));
+	stall_success = mali_mmu_enable_stall(mmu);
+	/* This function is expect to fail the stalling, since it might be in PageFault mode when it is called */
+	mali_mmu_activate_address_space(mmu, mali_page_fault_flush_page_directory);
+	if (MALI_TRUE == stall_success) mali_mmu_disable_stall(mmu);
+}
+
+/* Is called when we want the mmu to give an interrupt */
+static void mali_mmu_probe_trigger(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+}
+
+/* Is called when the irq probe wants the mmu to acknowledge an interrupt from the hw */
+extern int mali_page_fault;
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	u32 int_stat;
+
+	int_stat = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+
+	MALI_DEBUG_PRINT(2, ("mali_mmu_probe_irq_acknowledge: intstat 0x%x\n", int_stat));
+	if (int_stat & MALI_MMU_INTERRUPT_PAGE_FAULT) {
+		MALI_DEBUG_PRINT(2, ("Probe: Page fault detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_PAGE_FAULT);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Page fault detect: FAILED\n"));
+		mali_page_fault++;
+	}
+
+	if (int_stat & MALI_MMU_INTERRUPT_READ_BUS_ERROR) {
+		MALI_DEBUG_PRINT(2, ("Probe: Bus read error detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Bus read error detect: FAILED\n"));
+		mali_page_fault++;
+	}
+
+	if ((int_stat & (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) ==
+	    (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+#if 0
+void mali_mmu_print_state(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(2, ("MMU: State of %s is 0x%08x\n", mmu->hw_core.description, mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+#endif
diff --git a/mali/common/mali_mmu.h b/mali/common/mali_mmu.h
new file mode 100755
index 0000000..ac31c7a
--- /dev/null
+++ b/mali/common/mali_mmu.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_H__
+#define __MALI_MMU_H__
+
+#include "mali_osk.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_hw_core.h"
+
+/* Forward declaration from mali_group.h */
+struct mali_group;
+
+/**
+ * MMU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_mmu_register {
+	MALI_MMU_REGISTER_DTE_ADDR = 0x0000, /**< Current Page Directory Pointer */
+	MALI_MMU_REGISTER_STATUS = 0x0004, /**< Status of the MMU */
+	MALI_MMU_REGISTER_COMMAND = 0x0008, /**< Command register, used to control the MMU */
+	MALI_MMU_REGISTER_PAGE_FAULT_ADDR = 0x000C, /**< Logical address of the last page fault */
+	MALI_MMU_REGISTER_ZAP_ONE_LINE = 0x010, /**< Used to invalidate the mapping of a single page from the MMU */
+	MALI_MMU_REGISTER_INT_RAWSTAT = 0x0014, /**< Raw interrupt status, all interrupts visible */
+	MALI_MMU_REGISTER_INT_CLEAR = 0x0018, /**< Indicate to the MMU that the interrupt has been received */
+	MALI_MMU_REGISTER_INT_MASK = 0x001C, /**< Enable/disable types of interrupts */
+	MALI_MMU_REGISTER_INT_STATUS = 0x0020 /**< Interrupt status based on the mask */
+} mali_mmu_register;
+
+/**
+ * MMU interrupt register bits
+ * Each cause of the interrupt is reported
+ * through the (raw) interrupt status registers.
+ * Multiple interrupts can be pending, so multiple bits
+ * can be set at once.
+ */
+typedef enum mali_mmu_interrupt {
+	MALI_MMU_INTERRUPT_PAGE_FAULT = 0x01, /**< A page fault occured */
+	MALI_MMU_INTERRUPT_READ_BUS_ERROR = 0x02 /**< A bus read error occured */
+} mali_mmu_interrupt;
+
+typedef enum mali_mmu_status_bits {
+	MALI_MMU_STATUS_BIT_PAGING_ENABLED      = 1 << 0,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE   = 1 << 1,
+	MALI_MMU_STATUS_BIT_STALL_ACTIVE        = 1 << 2,
+	MALI_MMU_STATUS_BIT_IDLE                = 1 << 3,
+	MALI_MMU_STATUS_BIT_REPLAY_BUFFER_EMPTY = 1 << 4,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_IS_WRITE = 1 << 5,
+	MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE    = 1 << 31,
+} mali_mmu_status_bits;
+
+/**
+ * Definition of the MMU struct
+ * Used to track a MMU unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_mmu_core {
+	struct mali_hw_core hw_core; /**< Common for all HW cores */
+	_mali_osk_irq_t *irq;        /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_mmu_initialize(void);
+
+void mali_mmu_terminate(void);
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual);
+void mali_mmu_delete(struct mali_mmu_core *mmu);
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu);
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu);
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu);
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address);
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir);
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu);
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu);
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_mmu_get_interrupt_result(struct mali_mmu_core *mmu)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+	if (0 == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+
+MALI_STATIC_INLINE u32 mali_mmu_get_int_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_rawstat(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE void mali_mmu_mask_all_interrupts(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, 0);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_page_fault_addr(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_PAGE_FAULT_ADDR);
+}
+
+#endif /* __MALI_MMU_H__ */
diff --git a/mali/common/mali_mmu_page_directory.c b/mali/common/mali_mmu_page_directory.c
new file mode 100755
index 0000000..74d8cc8
--- /dev/null
+++ b/mali/common/mali_mmu_page_directory.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_memory.h"
+#include "mali_l2_cache.h"
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data);
+
+u32 mali_allocate_empty_page(mali_io_address *virt_addr)
+{
+	_mali_osk_errcode_t err;
+	mali_io_address mapping;
+	mali_dma_addr address;
+
+	if (_MALI_OSK_ERR_OK != mali_mmu_get_table_page(&address, &mapping)) {
+		/* Allocation failed */
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to get table page for empty pgdir\n"));
+		return 0;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	err = fill_page(mapping, 0);
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_mmu_release_table_page(address, mapping);
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to zero page\n"));
+		return 0;
+	}
+
+	*virt_addr = mapping;
+	return address;
+}
+
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr)
+{
+	if (MALI_INVALID_PAGE != address) {
+		mali_mmu_release_table_page(address, virt_addr);
+	}
+}
+
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	_mali_osk_errcode_t err;
+
+	err = mali_mmu_get_table_page(data_page, data_page_mapping);
+	if (_MALI_OSK_ERR_OK == err) {
+		err = mali_mmu_get_table_page(page_table, page_table_mapping);
+		if (_MALI_OSK_ERR_OK == err) {
+			err = mali_mmu_get_table_page(page_directory, page_directory_mapping);
+			if (_MALI_OSK_ERR_OK == err) {
+				fill_page(*data_page_mapping, 0);
+				fill_page(*page_table_mapping, *data_page | MALI_MMU_FLAGS_DEFAULT);
+				fill_page(*page_directory_mapping, *page_table | MALI_MMU_FLAGS_PRESENT);
+				MALI_SUCCESS;
+			}
+			mali_mmu_release_table_page(*page_table, *page_table_mapping);
+			*page_table = MALI_INVALID_PAGE;
+		}
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+	}
+	return err;
+}
+
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	if (MALI_INVALID_PAGE != *page_directory) {
+		mali_mmu_release_table_page(*page_directory, *page_directory_mapping);
+		*page_directory = MALI_INVALID_PAGE;
+		*page_directory_mapping = NULL;
+	}
+
+	if (MALI_INVALID_PAGE != *page_table) {
+		mali_mmu_release_table_page(*page_table, *page_table_mapping);
+		*page_table = MALI_INVALID_PAGE;
+		*page_table_mapping = NULL;
+	}
+
+	if (MALI_INVALID_PAGE != *data_page) {
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+		*data_page_mapping = NULL;
+	}
+}
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data)
+{
+	int i;
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	for (i = 0; i < MALI_MMU_PAGE_SIZE / 4; i++) {
+		_mali_osk_mem_iowrite32_relaxed(mapping, i * sizeof(u32), data);
+	}
+	_mali_osk_mem_barrier();
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	_mali_osk_errcode_t err;
+	mali_io_address pde_mapping;
+	mali_dma_addr pde_phys;
+	int i, page_count;
+	u32 start_address;
+	if (last_pde < first_pde)
+		return _MALI_OSK_ERR_INVALID_ARGS;
+
+	for (i = first_pde; i <= last_pde; i++) {
+		if (0 == (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+						 i * sizeof(u32)) & MALI_MMU_FLAGS_PRESENT)) {
+			/* Page table not present */
+			MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]);
+			MALI_DEBUG_ASSERT(NULL == pagedir->page_entries_mapped[i]);
+
+			err = mali_mmu_get_table_page(&pde_phys, &pde_mapping);
+			if (_MALI_OSK_ERR_OK != err) {
+				MALI_PRINT_ERROR(("Failed to allocate page table page.\n"));
+				return err;
+			}
+			pagedir->page_entries_mapped[i] = pde_mapping;
+
+			/* Update PDE, mark as present */
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32),
+							pde_phys | MALI_MMU_FLAGS_PRESENT);
+
+			MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]);
+		}
+
+		if (first_pde == last_pde) {
+			pagedir->page_entries_usage_count[i] += size / MALI_MMU_PAGE_SIZE;
+		} else if (i == first_pde) {
+			start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE;
+			page_count = (start_address + MALI_MMU_VIRTUAL_PAGE_SIZE - mali_address) / MALI_MMU_PAGE_SIZE;
+			pagedir->page_entries_usage_count[i] += page_count;
+		} else if (i == last_pde) {
+			start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE;
+			page_count = (mali_address + size - start_address) / MALI_MMU_PAGE_SIZE;
+			pagedir->page_entries_usage_count[i] += page_count;
+		} else {
+			pagedir->page_entries_usage_count[i] = 1024;
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE void mali_mmu_zero_pte(mali_io_address page_table, u32 mali_address, u32 size)
+{
+	int i;
+	const int first_pte = MALI_MMU_PTE_ENTRY(mali_address);
+	const int last_pte = MALI_MMU_PTE_ENTRY(mali_address + size - 1);
+
+	for (i = first_pte; i <= last_pte; i++) {
+		_mali_osk_mem_iowrite32_relaxed(page_table, i * sizeof(u32), 0);
+	}
+}
+
+static u32 mali_page_directory_get_phys_address(struct mali_page_directory *pagedir, u32 index)
+{
+	return (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				       index * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK);
+}
+
+
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	u32 left = size;
+	int i;
+	mali_bool pd_changed = MALI_FALSE;
+	u32 pages_to_invalidate[3]; /* hard-coded to 3: max two pages from the PT level plus max one page from PD level */
+	u32 num_pages_inv = 0;
+	mali_bool invalidate_all = MALI_FALSE; /* safety mechanism in case page_entries_usage_count is unreliable */
+
+	/* For all page directory entries in range. */
+	for (i = first_pde; i <= last_pde; i++) {
+		u32 size_in_pde, offset;
+
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[i]);
+		MALI_DEBUG_ASSERT(0 != pagedir->page_entries_usage_count[i]);
+
+		/* Offset into page table, 0 if mali_address is 4MiB aligned */
+		offset = (mali_address & (MALI_MMU_VIRTUAL_PAGE_SIZE - 1));
+		if (left < MALI_MMU_VIRTUAL_PAGE_SIZE - offset) {
+			size_in_pde = left;
+		} else {
+			size_in_pde = MALI_MMU_VIRTUAL_PAGE_SIZE - offset;
+		}
+
+		pagedir->page_entries_usage_count[i] -= size_in_pde / MALI_MMU_PAGE_SIZE;
+
+		/* If entire page table is unused, free it */
+		if (0 == pagedir->page_entries_usage_count[i]) {
+			u32 page_phys;
+			void *page_virt;
+			MALI_DEBUG_PRINT(4, ("Releasing page table as this is the last reference\n"));
+			/* last reference removed, no need to zero out each PTE  */
+
+			page_phys = MALI_MMU_ENTRY_ADDRESS(_mali_osk_mem_ioread32(pagedir->page_directory_mapped, i * sizeof(u32)));
+			page_virt = pagedir->page_entries_mapped[i];
+			pagedir->page_entries_mapped[i] = NULL;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+
+			mali_mmu_release_table_page(page_phys, page_virt);
+			pd_changed = MALI_TRUE;
+		} else {
+			MALI_DEBUG_ASSERT(num_pages_inv < 2);
+			if (num_pages_inv < 2) {
+				pages_to_invalidate[num_pages_inv] = mali_page_directory_get_phys_address(pagedir, i);
+				num_pages_inv++;
+			} else {
+				invalidate_all = MALI_TRUE;
+			}
+
+			/* If part of the page table is still in use, zero the relevant PTEs */
+			mali_mmu_zero_pte(pagedir->page_entries_mapped[i], mali_address, size_in_pde);
+		}
+
+		left -= size_in_pde;
+		mali_address += size_in_pde;
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* L2 pages invalidation */
+	if (MALI_TRUE == pd_changed) {
+		MALI_DEBUG_ASSERT(num_pages_inv < 3);
+		if (num_pages_inv < 3) {
+			pages_to_invalidate[num_pages_inv] = pagedir->page_directory;
+			num_pages_inv++;
+		} else {
+			invalidate_all = MALI_TRUE;
+		}
+	}
+
+	if (invalidate_all) {
+		mali_l2_cache_invalidate_all();
+	} else {
+		mali_l2_cache_invalidate_all_pages(pages_to_invalidate, num_pages_inv);
+	}
+
+	MALI_SUCCESS;
+}
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void)
+{
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	mali_dma_addr phys;
+
+	pagedir = _mali_osk_calloc(1, sizeof(struct mali_page_directory));
+	if (NULL == pagedir) {
+		return NULL;
+	}
+
+	err = mali_mmu_get_table_page(&phys, &pagedir->page_directory_mapped);
+	if (_MALI_OSK_ERR_OK != err) {
+		_mali_osk_free(pagedir);
+		return NULL;
+	}
+
+	pagedir->page_directory = (u32)phys;
+
+	/* Zero page directory */
+	fill_page(pagedir->page_directory_mapped, 0);
+
+	return pagedir;
+}
+
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir)
+{
+	const int num_page_table_entries = sizeof(pagedir->page_entries_mapped) / sizeof(pagedir->page_entries_mapped[0]);
+	int i;
+
+	/* Free referenced page tables and zero PDEs. */
+	for (i = 0; i < num_page_table_entries; i++) {
+		if (pagedir->page_directory_mapped && (_mali_osk_mem_ioread32(
+				pagedir->page_directory_mapped,
+				sizeof(u32)*i) & MALI_MMU_FLAGS_PRESENT)) {
+			mali_dma_addr phys = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+					     i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+			mali_mmu_release_table_page(phys, pagedir->page_entries_mapped[i]);
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* Free the page directory page. */
+	mali_mmu_release_table_page(pagedir->page_directory, pagedir->page_directory_mapped);
+
+	_mali_osk_free(pagedir);
+}
+
+
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits)
+{
+	u32 end_address = mali_address + size;
+	u32 mali_phys = (u32)phys_address;
+
+	/* Map physical pages into MMU page tables */
+	for (; mali_address < end_address; mali_address += MALI_MMU_PAGE_SIZE, mali_phys += MALI_MMU_PAGE_SIZE) {
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)]);
+		_mali_osk_mem_iowrite32_relaxed(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)],
+						MALI_MMU_PTE_ENTRY(mali_address) * sizeof(u32),
+						mali_phys | permission_bits);
+	}
+}
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr)
+{
+#if defined(DEBUG)
+	u32 pde_index, pte_index;
+	u32 pde, pte;
+
+	pde_index = MALI_MMU_PDE_ENTRY(fault_addr);
+	pte_index = MALI_MMU_PTE_ENTRY(fault_addr);
+
+
+	pde = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				     pde_index * sizeof(u32));
+
+
+	if (pde & MALI_MMU_FLAGS_PRESENT) {
+		u32 pte_addr = MALI_MMU_ENTRY_ADDRESS(pde);
+
+		pte = _mali_osk_mem_ioread32(pagedir->page_entries_mapped[pde_index],
+					     pte_index * sizeof(u32));
+
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table present: %08x\n"
+				     "\t\tPTE: %08x, page %08x is %s\n",
+				     fault_addr, pte_addr, pte,
+				     MALI_MMU_ENTRY_ADDRESS(pte),
+				     pte & MALI_MMU_FLAGS_DEFAULT ? "rw" : "not present"));
+	} else {
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table not present: %08x\n",
+				     fault_addr, pde));
+	}
+#else
+	MALI_IGNORE(pagedir);
+	MALI_IGNORE(fault_addr);
+#endif
+}
+
+/* For instrumented */
+struct dump_info {
+	u32 buffer_left;
+	u32 register_writes_size;
+	u32 page_table_dump_size;
+	u32 *buffer;
+};
+
+static _mali_osk_errcode_t writereg(u32 where, u32 what, const char *comment, struct dump_info *info)
+{
+	if (NULL != info) {
+		info->register_writes_size += sizeof(u32) * 2; /* two 32-bit words */
+
+		if (NULL != info->buffer) {
+			/* check that we have enough space */
+			if (info->buffer_left < sizeof(u32) * 2) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = where;
+			info->buffer++;
+
+			*info->buffer = what;
+			info->buffer++;
+
+			info->buffer_left -= sizeof(u32) * 2;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t mali_mmu_dump_page(mali_io_address page, u32 phys_addr, struct dump_info *info)
+{
+	if (NULL != info) {
+		/* 4096 for the page and 4 bytes for the address */
+		const u32 page_size_in_elements = MALI_MMU_PAGE_SIZE / 4;
+		const u32 page_size_in_bytes = MALI_MMU_PAGE_SIZE;
+		const u32 dump_size_in_bytes = MALI_MMU_PAGE_SIZE + 4;
+
+		info->page_table_dump_size += dump_size_in_bytes;
+
+		if (NULL != info->buffer) {
+			if (info->buffer_left < dump_size_in_bytes) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = phys_addr;
+			info->buffer++;
+
+			_mali_osk_memcpy(info->buffer, page, page_size_in_bytes);
+			info->buffer += page_size_in_elements;
+
+			info->buffer_left -= dump_size_in_bytes;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_page_table(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_DEBUG_ASSERT_POINTER(pagedir);
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	if (NULL != pagedir->page_directory_mapped) {
+		int i;
+
+		MALI_CHECK_NO_ERROR(
+			mali_mmu_dump_page(pagedir->page_directory_mapped, pagedir->page_directory, info)
+		);
+
+		for (i = 0; i < 1024; i++) {
+			if (NULL != pagedir->page_entries_mapped[i]) {
+				MALI_CHECK_NO_ERROR(
+					mali_mmu_dump_page(pagedir->page_entries_mapped[i],
+							   _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+									   i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK, info)
+				);
+			}
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_registers(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_CHECK_NO_ERROR(writereg(0x00000000, pagedir->page_directory,
+				     "set the page directory address", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 4, "zap???", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 0, "enable paging", info));
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+	args->size = info.register_writes_size + info.page_table_dump_size;
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+
+	info.buffer_left = args->size;
+	info.buffer = (u32 *)(uintptr_t)args->buffer;
+
+	args->register_writes = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+
+	args->page_table_dump = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+
+	args->register_writes_size = info.register_writes_size;
+	args->page_table_dump_size = info.page_table_dump_size;
+
+	MALI_SUCCESS;
+}
diff --git a/mali/common/mali_mmu_page_directory.h b/mali/common/mali_mmu_page_directory.h
new file mode 100755
index 0000000..4fc1058
--- /dev/null
+++ b/mali/common/mali_mmu_page_directory.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_PAGE_DIRECTORY_H__
+#define __MALI_MMU_PAGE_DIRECTORY_H__
+
+#include "mali_osk.h"
+
+/**
+ * Size of an MMU page in bytes
+ */
+#define MALI_MMU_PAGE_SIZE 0x1000
+
+/*
+ * Size of the address space referenced by a page table page
+ */
+#define MALI_MMU_VIRTUAL_PAGE_SIZE 0x400000 /* 4 MiB */
+
+/**
+ * Page directory index from address
+ * Calculates the page directory index from the given address
+ */
+#define MALI_MMU_PDE_ENTRY(address) (((address)>>22) & 0x03FF)
+
+/**
+ * Page table index from address
+ * Calculates the page table index from the given address
+ */
+#define MALI_MMU_PTE_ENTRY(address) (((address)>>12) & 0x03FF)
+
+/**
+ * Extract the memory address from an PDE/PTE entry
+ */
+#define MALI_MMU_ENTRY_ADDRESS(value) ((value) & 0xFFFFFC00)
+
+#define MALI_INVALID_PAGE ((u32)(~0))
+
+/**
+ *
+ */
+typedef enum mali_mmu_entry_flags {
+	MALI_MMU_FLAGS_PRESENT = 0x01,
+	MALI_MMU_FLAGS_READ_PERMISSION = 0x02,
+	MALI_MMU_FLAGS_WRITE_PERMISSION = 0x04,
+	MALI_MMU_FLAGS_OVERRIDE_CACHE  = 0x8,
+	MALI_MMU_FLAGS_WRITE_CACHEABLE  = 0x10,
+	MALI_MMU_FLAGS_WRITE_ALLOCATE  = 0x20,
+	MALI_MMU_FLAGS_WRITE_BUFFERABLE  = 0x40,
+	MALI_MMU_FLAGS_READ_CACHEABLE  = 0x80,
+	MALI_MMU_FLAGS_READ_ALLOCATE  = 0x100,
+	MALI_MMU_FLAGS_MASK = 0x1FF,
+} mali_mmu_entry_flags;
+
+
+#define MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE ( \
+		MALI_MMU_FLAGS_PRESENT | \
+		MALI_MMU_FLAGS_READ_PERMISSION |  \
+		MALI_MMU_FLAGS_WRITE_PERMISSION | \
+		MALI_MMU_FLAGS_OVERRIDE_CACHE | \
+		MALI_MMU_FLAGS_WRITE_CACHEABLE | \
+		MALI_MMU_FLAGS_WRITE_BUFFERABLE | \
+		MALI_MMU_FLAGS_READ_CACHEABLE | \
+		MALI_MMU_FLAGS_READ_ALLOCATE )
+
+#define MALI_MMU_FLAGS_DEFAULT ( \
+				 MALI_MMU_FLAGS_PRESENT | \
+				 MALI_MMU_FLAGS_READ_PERMISSION |  \
+				 MALI_MMU_FLAGS_WRITE_PERMISSION )
+
+
+struct mali_page_directory {
+	u32 page_directory; /**< Physical address of the memory session's page directory */
+	mali_io_address page_directory_mapped; /**< Pointer to the mapped version of the page directory into the kernel's address space */
+
+	mali_io_address page_entries_mapped[1024]; /**< Pointers to the page tables which exists in the page directory mapped into the kernel's address space */
+	u32   page_entries_usage_count[1024]; /**< Tracks usage count of the page table pages, so they can be releases on the last reference */
+};
+
+/* Map Mali virtual address space (i.e. ensure page tables exist for the virtual range)  */
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+
+/* Back virtual address space with actual pages. Assumes input is contiguous and 4k aligned. */
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits);
+
+u32 mali_allocate_empty_page(mali_io_address *virtual);
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr);
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void);
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir);
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr);
+
+#endif /* __MALI_MMU_PAGE_DIRECTORY_H__ */
diff --git a/mali/common/mali_osk.h b/mali/common/mali_osk.h
new file mode 100755
index 0000000..5c20fc3
--- /dev/null
+++ b/mali/common/mali_osk.h
@@ -0,0 +1,1389 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk.h
+ * Defines the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_H__
+#define __MALI_OSK_H__
+
+#include <linux/seq_file.h>
+#include "mali_osk_types.h"
+#include "mali_osk_specific.h"           /* include any per-os specifics */
+#include "mali_osk_locks.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @addtogroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+#ifdef DEBUG
+/** @brief Macro for asserting that the current thread holds a given lock
+ */
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) MALI_DEBUG_ASSERT(_mali_osk_lock_get_owner((_mali_osk_lock_debug_t *)l) == _mali_osk_get_tid());
+
+/** @brief returns a lock's owner (thread id) if debugging is enabled
+ */
+#else
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) do {} while(0)
+#endif
+
+#define _mali_osk_ctxprintf     seq_printf
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Find the containing structure of another structure
+ *
+ * This is the reverse of the operation 'offsetof'. This means that the
+ * following condition is satisfied:
+ *
+ *   ptr == _MALI_OSK_CONTAINER_OF( &ptr->member, type, member )
+ *
+ * When ptr is of type 'type'.
+ *
+ * Its purpose it to recover a larger structure that has wrapped a smaller one.
+ *
+ * @note no type or memory checking occurs to ensure that a wrapper structure
+ * does in fact exist, and that it is being recovered with respect to the
+ * correct member.
+ *
+ * @param ptr the pointer to the member that is contained within the larger
+ * structure
+ * @param type the type of the structure that contains the member
+ * @param member the name of the member in the structure that ptr points to.
+ * @return a pointer to a \a type object which contains \a member, as pointed
+ * to by \a ptr.
+ */
+#define _MALI_OSK_CONTAINER_OF(ptr, type, member) \
+	((type *)( ((char *)ptr) - offsetof(type,member) ))
+
+/** @addtogroup _mali_osk_wq
+ * @{ */
+
+/** @brief Initialize work queues (for deferred work)
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_wq_init(void);
+
+/** @brief Terminate work queues (for deferred work)
+ */
+void _mali_osk_wq_term(void);
+
+/** @brief Create work in the work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, \a handler will be called with \a data as the argument.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delete_work()
+ * when no longer needed.
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief A high priority version of \a _mali_osk_wq_create_work()
+ *
+ * Creates a work object which can be scheduled in the high priority work queue.
+ *
+ * This is unfortunately needed to get low latency scheduling of the Mali cores.  Normally we would
+ * schedule the next job in hw_irq or tasklet, but often we can't since we need to synchronously map
+ * and unmap shared memory when a job is connected to external fences (timelines). And this requires
+ * taking a mutex.
+ *
+ * We do signal a lot of other (low priority) work also as part of the job being finished, and if we
+ * don't set this Mali scheduling thread as high priority, we see that the CPU scheduler often runs
+ * random things instead of starting the next GPU job when the GPU is idle.  So setting the gpu
+ * scheduler to high priority does give a visually more responsive system.
+ *
+ * Start the high priority work with: \a _mali_osk_wq_schedule_work_high_pri()
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will flush the work queue to ensure that the work handler will not
+ * be called after deletion.
+ */
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the work handler
+ *
+ * _mali_osk_wq_schedule_work provides a mechanism for enqueuing deferred calls
+ * to the work handler. After calling \ref _mali_osk_wq_schedule_work(), the
+ * work handler will be scheduled to run at some point in the future.
+ *
+ * Typically this is called by the IRQ upper-half to defer further processing of
+ * IRQ-related work to the IRQ bottom-half handler. This is necessary for work
+ * that cannot be done in an IRQ context by the IRQ upper-half handler. Timer
+ * callbacks also use this mechanism, because they are treated as though they
+ * operate in an IRQ context. Refer to \ref _mali_osk_timer_t for more
+ * information.
+ *
+ * Code that operates in a kernel-process context (with no IRQ context
+ * restrictions) may also enqueue deferred calls to the IRQ bottom-half. The
+ * advantage over direct calling is that deferred calling allows the caller and
+ * IRQ bottom half to hold the same mutex, with a guarantee that they will not
+ * deadlock just by using this mechanism.
+ *
+ * _mali_osk_wq_schedule_work() places deferred call requests on a queue, to
+ * allow for more than one thread to make a deferred call. Therfore, if it is
+ * called 'K' times, then the IRQ bottom-half will be scheduled 'K' times too.
+ * 'K' is a number that is implementation-specific.
+ *
+ * _mali_osk_wq_schedule_work() is guaranteed to not block on:
+ * - enqueuing a deferred call request.
+ * - the completion of the work handler.
+ *
+ * This is to prevent deadlock. For example, if _mali_osk_wq_schedule_work()
+ * blocked, then it would cause a deadlock when the following two conditions
+ * hold:
+ * - The work handler callback (of type _mali_osk_wq_work_handler_t) locks
+ * a mutex
+ * - And, at the same time, the caller of _mali_osk_wq_schedule_work() also
+ * holds the same mutex
+ *
+ * @note care must be taken to not overflow the queue that
+ * _mali_osk_wq_schedule_work() operates on. Code must be structured to
+ * ensure that the number of requests made to the queue is bounded. Otherwise,
+ * work will be lost.
+ *
+ * The queue that _mali_osk_wq_schedule_work implements is a FIFO of N-writer,
+ * 1-reader type. The writers are the callers of _mali_osk_wq_schedule_work
+ * (all OSK-registered IRQ upper-half handlers in the system, watchdog timers,
+ * callers from a Kernel-process context). The reader is a single thread that
+ * handles all OSK-registered work.
+ *
+ * @param work a pointer to the _mali_osk_wq_work_t object corresponding to the
+ * work to begin processing.
+ */
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the high priority work handler
+ *
+ * Function is the same as \a _mali_osk_wq_schedule_work() with the only
+ * difference that it runs in a high (real time) priority on the system.
+ *
+ * Should only be used as a substitue for doing the same work in interrupts.
+ *
+ * This is allowed to sleep, but the work should be small since it will block
+ * all other applications.
+*/
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work);
+
+/** @brief Flush the work queue
+ *
+ * This will flush the OSK work queue, ensuring all work in the queue has
+ * completed before returning.
+ *
+ * Since this blocks on the completion of work in the work-queue, the
+ * caller of this function \b must \b not hold any mutexes that are taken by
+ * any registered work handler. To do so may cause a deadlock.
+ *
+ */
+void _mali_osk_wq_flush(void);
+
+/** @brief Create work in the delayed work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, a timer will be start and the \a handler will be called with
+ * \a data as the argument when timer out
+ *
+ * Refer to \ref _mali_osk_wq_delayed_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delayed_delete_work_nonflush()
+ * when no longer needed.
+ */
+_mali_osk_wq_delayed_work_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work without waiting for it to finish
+ *
+ * Note that the \a work callback function may still be running on return from
+ * _mali_osk_wq_delayed_cancel_work_async().
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work and wait for it to finish
+ *
+ * When this function returns, the \a work was either cancelled or it finished running.
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Put \a work task in global workqueue after delay
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue.
+ *
+ * If \a work was already on a queue, this function will return without doing anything
+ *
+ * @param work job to be done
+ * @param delay number of jiffies to wait or 0 for immediate execution
+ */
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay);
+
+/** @} */ /* end group _mali_osk_wq */
+
+
+/** @addtogroup _mali_osk_irq
+ * @{ */
+
+/** @brief Initialize IRQ handling for a resource
+ *
+ * Registers an interrupt handler \a uhandler for the given IRQ number \a irqnum.
+ * \a data will be passed as argument to the handler when an interrupt occurs.
+ *
+ * If \a irqnum is -1, _mali_osk_irq_init will probe for the IRQ number using
+ * the supplied \a trigger_func and \a ack_func. These functions will also
+ * receive \a data as their argument.
+ *
+ * @param irqnum The IRQ number that the resource uses, as seen by the CPU.
+ * The value -1 has a special meaning which indicates the use of probing, and
+ * trigger_func and ack_func must be non-NULL.
+ * @param uhandler The interrupt handler, corresponding to a ISR handler for
+ * the resource
+ * @param int_data resource specific data, which will be passed to uhandler
+ * @param trigger_func Optional: a function to trigger the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param ack_func Optional: a function to acknowledge the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param probe_data resource-specific data, which will be passed to
+ * (if present) trigger_func and ack_func
+ * @param description textual description of the IRQ resource.
+ * @return on success, a pointer to a _mali_osk_irq_t object, which represents
+ * the IRQ handling on this resource. NULL on failure.
+ */
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description);
+
+/** @brief Terminate IRQ handling on a resource.
+ *
+ * This will disable the interrupt from the device, and then waits for any
+ * currently executing IRQ handlers to complete.
+ *
+ * @note If work is deferred to an IRQ bottom-half handler through
+ * \ref _mali_osk_wq_schedule_work(), be sure to flush any remaining work
+ * with \ref _mali_osk_wq_flush() or (implicitly) with \ref _mali_osk_wq_delete_work()
+ *
+ * @param irq a pointer to the _mali_osk_irq_t object corresponding to the
+ * resource whose IRQ handling is to be terminated.
+ */
+void _mali_osk_irq_term(_mali_osk_irq_t *irq);
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @addtogroup _mali_osk_atomic
+ * @{ */
+
+/** @brief Decrement an atomic counter
+ *
+ * @note It is an error to decrement the counter beyond -(1<<23)
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom);
+
+/** @brief Decrement an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter
+ * @return The new value, after decrement */
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter
+ *
+ * @note It is an error to increment the counter beyond (1<<23)-1
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter */
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom);
+
+/** @brief Initialize an atomic counter
+ *
+ * @note the parameter required is a u32, and so signed integers should be
+ * cast to u32.
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the value to initialize the atomic counter.
+ */
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val);
+
+/** @brief Read a value from an atomic counter
+ *
+ * This can only be safely used to determine the value of the counter when it
+ * is guaranteed that other threads will not be modifying the counter. This
+ * makes its usefulness limited.
+ *
+ * @param atom pointer to an atomic counter
+ */
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom);
+
+/** @brief Terminate an atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ */
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom);
+
+/** @brief Assign a new val to atomic counter, and return the old atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the new value assign to the atomic counter
+ * @return the old value of the atomic counter
+ */
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val);
+/** @} */  /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_memory OSK Memory Allocation
+ * @{ */
+
+/** @brief Allocate zero-initialized memory.
+ *
+ * Returns a buffer capable of containing at least \a n elements of \a size
+ * bytes each. The buffer is initialized to zero.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * @param n Number of elements to allocate
+ * @param size Size of each element
+ * @return On success, the zero-initialized buffer allocated. NULL on failure
+ */
+void *_mali_osk_calloc(u32 n, u32 size);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_malloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_malloc() and _mali_osk_calloc()
+ * must be freed before the application exits. Otherwise,
+ * a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_free(void *ptr);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * This function is potentially slower than _mali_osk_malloc() and _mali_osk_calloc(),
+ * but do support bigger sizes.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_valloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_valloc() must be freed before the
+ * application exits. Otherwise a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_vfree(void *ptr);
+
+/** @brief Copies memory.
+ *
+ * Copies the \a len bytes from the buffer pointed by the parameter \a src
+ * directly to the buffer pointed by \a dst.
+ *
+ * It is an error for \a src to overlap \a dst anywhere in \a len bytes.
+ *
+ * @param dst Pointer to the destination array where the content is to be
+ * copied.
+ * @param src Pointer to the source of data to be copied.
+ * @param len Number of bytes to copy.
+ * @return \a dst is always passed through unmodified.
+ */
+void *_mali_osk_memcpy(void *dst, const void *src, u32 len);
+
+/** @brief Fills memory.
+ *
+ * Sets the first \a n bytes of the block of memory pointed to by \a s to
+ * the specified value
+ * @param s Pointer to the block of memory to fill.
+ * @param c Value to be set, passed as u32. Only the 8 Least Significant Bits (LSB)
+ * are used.
+ * @param n Number of bytes to be set to the value.
+ * @return \a s is always passed through unmodified
+ */
+void *_mali_osk_memset(void *s, u32 c, u32 n);
+/** @} */ /* end group _mali_osk_memory */
+
+
+/** @brief Checks the amount of memory allocated
+ *
+ * Checks that not more than \a max_allocated bytes are allocated.
+ *
+ * Some OS bring up an interactive out of memory dialogue when the
+ * system runs out of memory. This can stall non-interactive
+ * apps (e.g. automated test runs). This function can be used to
+ * not trigger the OOM dialogue by keeping allocations
+ * within a certain limit.
+ *
+ * @return MALI_TRUE when \a max_allocated bytes are not in use yet. MALI_FALSE
+ * when at least \a max_allocated bytes are in use.
+ */
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated);
+
+
+/** @addtogroup _mali_osk_low_level_memory
+ * @{ */
+
+/** @brief Issue a memory barrier
+ *
+ * This defines an arbitrary memory barrier operation, which forces an ordering constraint
+ * on memory read and write operations.
+ */
+void _mali_osk_mem_barrier(void);
+
+/** @brief Issue a write memory barrier
+ *
+ * This defines an write memory barrier operation which forces an ordering constraint
+ * on memory write operations.
+ */
+void _mali_osk_write_mem_barrier(void);
+
+/** @brief Map a physically contiguous region into kernel space
+ *
+ * This is primarily used for mapping in registers from resources, and Mali-MMU
+ * page tables. The mapping is only visable from kernel-space.
+ *
+ * Access has to go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @param phys CPU-physical base address of the memory to map in. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * map in
+ * @param description A textual description of the memory being mapped in.
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure.
+ */
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Unmap a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_mapioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt an unmap twice
+ * - unmap only part of a range obtained through _mali_osk_mem_mapioregion
+ * - unmap more than the range obtained through  _mali_osk_mem_mapioregion
+ * - unmap an address range that was not successfully mapped using
+ * _mali_osk_mem_mapioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in. This must be aligned to the system's page size, which is assumed
+ * to be 4K
+ * @param size The number of bytes that were originally mapped in.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address mapping);
+
+/** @brief Allocate and Map a physically contiguous region into kernel space
+ *
+ * This is used for allocating physically contiguous regions (such as Mali-MMU
+ * page tables) and mapping them into kernel space. The mapping is only
+ * visible from kernel-space.
+ *
+ * The alignment of the returned memory is guaranteed to be at least
+ * _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * Access must go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @note This function is primarily to provide support for OSs that are
+ * incapable of separating the tasks 'allocate physically contiguous memory'
+ * and 'map it into kernel space'
+ *
+ * @param[out] phys CPU-physical base address of memory that was allocated.
+ * (*phys) will be guaranteed to be aligned to at least
+ * _MALI_OSK_CPU_PAGE_SIZE on success.
+ *
+ * @param[in] size the number of bytes of physically contiguous memory to
+ * allocate. This must be a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure, and (*phys) is unmodified.
+ */
+mali_io_address _mali_osk_mem_allocioregion(u32 *phys, u32 size);
+
+/** @brief Free a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_allocioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt a free twice on the same ioregion
+ * - free only part of a range obtained through _mali_osk_mem_allocioregion
+ * - free more than the range obtained through  _mali_osk_mem_allocioregion
+ * - free an address range that was not successfully mapped using
+ * _mali_osk_mem_allocioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in, which was aligned to _MALI_OSK_CPU_PAGE_SIZE.
+ * @param size The number of bytes that were originally mapped in, which was
+ * a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_freeioregion(u32 phys, u32 size, mali_io_address mapping);
+
+/** @brief Request a region of physically contiguous memory
+ *
+ * This is used to ensure exclusive access to a region of physically contigous
+ * memory.
+ *
+ * It is acceptable to implement this as a stub. However, it is then the job
+ * of the System Integrator to ensure that no other device driver will be using
+ * the physical address ranges used by Mali, while the Mali device driver is
+ * loaded.
+ *
+ * @param phys CPU-physical base address of the memory to request. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * request.
+ * @param description A textual description of the memory being requested.
+ * @return _MALI_OSK_ERR_OK on success. Otherwise, a suitable
+ * _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Un-request a region of physically contiguous memory
+ *
+ * This is used to release a regious of physically contiguous memory previously
+ * requested through _mali_osk_mem_reqregion, so that other device drivers may
+ * use it. This will be called at time of Mali device driver termination.
+ *
+ * It is a programming error to attempt to:
+ * - unrequest a region twice
+ * - unrequest only part of a range obtained through _mali_osk_mem_reqregion
+ * - unrequest more than the range obtained through  _mali_osk_mem_reqregion
+ * - unrequest an address range that was not successfully requested using
+ * _mali_osk_mem_reqregion
+ *
+ * @param phys CPU-physical base address of the memory to un-request. This must
+ * be aligned to the system's page size, which is assumed to be 4K
+ * @param size the number of bytes of physically contiguous address space to
+ * un-request.
+ */
+void _mali_osk_mem_unreqregion(uintptr_t phys, u32 size);
+
+/** @brief Read from a location currently mapped in through
+ * _mali_osk_mem_mapioregion
+ *
+ * This reads a 32-bit word from a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to read from memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to read from
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @return the 32-bit word from the specified location.
+ */
+u32 _mali_osk_mem_ioread32(volatile mali_io_address mapping, u32 offset);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion without memory barriers
+ *
+ * This write a 32-bit word to a 32-bit aligned location without using memory barrier.
+ * It is a programming error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion with write memory barrier
+ *
+ * This write a 32-bit word to a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32(volatile mali_io_address mapping, u32 offset, u32 val);
+
+/** @brief Flush all CPU caches
+ *
+ * This should only be implemented if flushing of the cache is required for
+ * memory mapped in through _mali_osk_mem_mapregion.
+ */
+void _mali_osk_cache_flushall(void);
+
+/** @brief Flush any caches necessary for the CPU and MALI to have the same view of a range of uncached mapped memory
+ *
+ * This should only be implemented if your OS doesn't do a full cache flush (inner & outer)
+ * after allocating uncached mapped memory.
+ *
+ * Some OS do not perform a full cache flush (including all outer caches) for uncached mapped memory.
+ * They zero the memory through a cached mapping, then flush the inner caches but not the outer caches.
+ * This is required for MALI to have the correct view of the memory.
+ */
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size);
+
+/** @brief Safely copy as much data as possible from src to dest
+ *
+ * Do not crash if src or dest isn't available.
+ *
+ * @param dest Destination buffer (limited to user space mapped Mali memory)
+ * @param src Source buffer
+ * @param size Number of bytes to copy
+ * @return Number of bytes actually copied
+ */
+u32 _mali_osk_mem_write_safe(void *dest, const void *src, u32 size);
+
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+
+/** @addtogroup _mali_osk_notification
+ *
+ * User space notification framework
+ *
+ * Communication with user space of asynchronous events is performed through a
+ * synchronous call to the \ref u_k_api.
+ *
+ * Since the events are asynchronous, the events have to be queued until a
+ * synchronous U/K API call can be made by user-space. A U/K API call might also
+ * be received before any event has happened. Therefore the notifications the
+ * different subsystems wants to send to user space has to be queued for later
+ * reception, or a U/K API call has to be blocked until an event has occured.
+ *
+ * Typical uses of notifications are after running of jobs on the hardware or
+ * when changes to the system is detected that needs to be relayed to user
+ * space.
+ *
+ * After an event has occured user space has to be notified using some kind of
+ * message. The notification framework supports sending messages to waiting
+ * threads or queueing of messages until a U/K API call is made.
+ *
+ * The notification queue is a FIFO. There are no restrictions on the numbers
+ * of readers or writers in the queue.
+ *
+ * A message contains what user space needs to identifiy how to handle an
+ * event. This includes a type field and a possible type specific payload.
+ *
+ * A notification to user space is represented by a
+ * \ref _mali_osk_notification_t object. A sender gets hold of such an object
+ * using _mali_osk_notification_create(). The buffer given by the
+ * _mali_osk_notification_t::result_buffer field in the object is used to store
+ * any type specific data. The other fields are internal to the queue system
+ * and should not be touched.
+ *
+ * @{ */
+
+/** @brief Create a notification object
+ *
+ * Returns a notification object which can be added to the queue of
+ * notifications pending for user space transfer.
+ *
+ * The implementation will initialize all members of the
+ * \ref _mali_osk_notification_t object. In particular, the
+ * _mali_osk_notification_t::result_buffer member will be initialized to point
+ * to \a size bytes of storage, and that storage will be suitably aligned for
+ * storage of any structure. That is, the created buffer meets the same
+ * requirements as _mali_osk_malloc().
+ *
+ * The notification object must be deleted when not in use. Use
+ * _mali_osk_notification_delete() for deleting it.
+ *
+ * @note You \b must \b not call _mali_osk_free() on a \ref _mali_osk_notification_t,
+ * object, or on a _mali_osk_notification_t::result_buffer. You must only use
+ * _mali_osk_notification_delete() to free the resources assocaited with a
+ * \ref _mali_osk_notification_t object.
+ *
+ * @param type The notification type
+ * @param size The size of the type specific buffer to send
+ * @return Pointer to a notification object with a suitable buffer, or NULL on error.
+ */
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size);
+
+/** @brief Delete a notification object
+ *
+ * This must be called to reclaim the resources of a notification object. This
+ * includes:
+ * - The _mali_osk_notification_t::result_buffer
+ * - The \ref _mali_osk_notification_t itself.
+ *
+ * A notification object \b must \b not be used after it has been deleted by
+ * _mali_osk_notification_delete().
+ *
+ * In addition, the notification object may not be deleted while it is in a
+ * queue. That is, if it has been placed on a queue with
+ * _mali_osk_notification_queue_send(), then it must not be deleted until
+ * it has been received by a call to _mali_osk_notification_queue_receive().
+ * Otherwise, the queue may be corrupted.
+ *
+ * @param object the notification object to delete.
+ */
+void _mali_osk_notification_delete(_mali_osk_notification_t *object);
+
+/** @brief Create a notification queue
+ *
+ * Creates a notification queue which can be used to queue messages for user
+ * delivery and get queued messages from
+ *
+ * The queue is a FIFO, and has no restrictions on the numbers of readers or
+ * writers.
+ *
+ * When the queue is no longer in use, it must be terminated with
+ * \ref _mali_osk_notification_queue_term(). Failure to do so will result in a
+ * memory leak.
+ *
+ * @return Pointer to a new notification queue or NULL on error.
+ */
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void);
+
+/** @brief Destroy a notification queue
+ *
+ * Destroys a notification queue and frees associated resources from the queue.
+ *
+ * A notification queue \b must \b not be destroyed in the following cases:
+ * - while there are \ref _mali_osk_notification_t objects in the queue.
+ * - while there are writers currently acting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_send() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_send() on the queue in the future.
+ * - while there are readers currently waiting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_receive() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_receive() on the queue in the future.
+ *
+ * Therefore, all \ref _mali_osk_notification_t objects must be flushed and
+ * deleted by the code that makes use of the notification queues, since only
+ * they know the structure of the _mali_osk_notification_t::result_buffer
+ * (even if it may only be a flat sturcture).
+ *
+ * @note Since the queue is a FIFO, the code using notification queues may
+ * create its own 'flush' type of notification, to assist in flushing the
+ * queue.
+ *
+ * Once the queue has been destroyed, it must not be used again.
+ *
+ * @param queue The queue to destroy
+ */
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue);
+
+/** @brief Schedule notification for delivery
+ *
+ * When a \ref _mali_osk_notification_t object has been created successfully
+ * and set up, it may be added to the queue of objects waiting for user space
+ * transfer.
+ *
+ * The sending will not block if the queue is full.
+ *
+ * A \ref _mali_osk_notification_t object \b must \b not be put on two different
+ * queues at the same time, or enqueued twice onto a single queue before
+ * reception. However, it is acceptable for it to be requeued \em after reception
+ * from a call to _mali_osk_notification_queue_receive(), even onto the same queue.
+ *
+ * Again, requeuing must also not enqueue onto two different queues at the same
+ * time, or enqueue onto the same queue twice before reception.
+ *
+ * @param queue The notification queue to add this notification to
+ * @param object The entry to add
+ */
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object);
+
+/** @brief Receive a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the thread will sleep until one becomes ready.
+ * Therefore, notifications may not be received into an
+ * IRQ or 'atomic' context (that is, a context where sleeping is disallowed).
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success. _MALI_OSK_ERR_RESTARTSYSCALL if the sleep was interrupted.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @brief Dequeues a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the function call will return an error code.
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if queue was empty.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @addtogroup _mali_osk_timer
+ *
+ * Timers use the OS's representation of time, which are 'ticks'. This is to
+ * prevent aliasing problems between the internal timer time, and the time
+ * asked for.
+ *
+ * @{ */
+
+/** @brief Initialize a timer
+ *
+ * Allocates resources for a new timer, and initializes them. This does not
+ * start the timer.
+ *
+ * @return a pointer to the allocated timer object, or NULL on failure.
+ */
+_mali_osk_timer_t *_mali_osk_timer_init(void);
+
+/** @brief Start a timer
+ *
+ * It is an error to start a timer without setting the callback via
+ * _mali_osk_timer_setcallback().
+ *
+ * It is an error to use this to start an already started timer.
+ *
+ * The timer will expire in \a ticks_to_expire ticks, at which point, the
+ * callback function will be invoked with the callback-specific data,
+ * as registered by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to start
+ * @param ticks_to_expire the amount of time in ticks for the timer to run
+ * before triggering.
+ */
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Modify a timer
+ *
+ * Set the relative time at which a timer will expire, and start it if it is
+ * stopped. If \a ticks_to_expire 0 the timer fires immediately.
+ *
+ * It is an error to modify a timer without setting the callback via
+ *  _mali_osk_timer_setcallback().
+ *
+ * The timer will expire at \a ticks_to_expire from the time of the call, at
+ * which point, the callback function will be invoked with the
+ * callback-specific data, as set by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to modify, and start if necessary
+ * @param ticks_to_expire the \em absolute time in ticks at which this timer
+ * should trigger.
+ *
+ */
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Stop a timer, and block on its completion.
+ *
+ * Stop the timer. When the function returns, it is guaranteed that the timer's
+ * callback will not be running on any CPU core.
+ *
+ * Since stoping the timer blocks on compeletion of the callback, the callback
+ * may not obtain any mutexes that the caller holds. Otherwise, a deadlock will
+ * occur.
+ *
+ * @note While the callback itself is guaranteed to not be running, work
+ * enqueued on the work-queue by the timer (with
+ * \ref _mali_osk_wq_schedule_work()) may still run. The timer callback and
+ * work handler must take this into account.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ *
+ */
+void _mali_osk_timer_del(_mali_osk_timer_t *tim);
+
+/** @brief Stop a timer.
+ *
+ * Stop the timer. When the function returns, the timer's callback may still be
+ * running on any CPU core.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ */
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim);
+
+/** @brief Check if timer is pending.
+ *
+ * Check if timer is active.
+ *
+ * @param tim the timer to check
+ * @return MALI_TRUE if time is active, MALI_FALSE if it is not active
+ */
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim);
+
+/** @brief Set a timer's callback parameters.
+ *
+ * This must be called at least once before a timer is started/modified.
+ *
+ * After a timer has been stopped or expires, the callback remains set. This
+ * means that restarting the timer will call the same function with the same
+ * parameters on expiry.
+ *
+ * @param tim the timer to set callback on.
+ * @param callback Function to call when timer expires
+ * @param data Function-specific data to supply to the function on expiry.
+ */
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data);
+
+/** @brief Terminate a timer, and deallocate resources.
+ *
+ * The timer must first be stopped by calling _mali_osk_timer_del().
+ *
+ * It is a programming error for _mali_osk_timer_term() to be called on:
+ * - timer that is currently running
+ * - a timer that is currently executing its callback.
+ *
+ * @param tim the timer to deallocate.
+ */
+void _mali_osk_timer_term(_mali_osk_timer_t *tim);
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @defgroup _mali_osk_time OSK Time functions
+ *
+ * \ref _mali_osk_time use the OS's representation of time, which are
+ * 'ticks'. This is to prevent aliasing problems between the internal timer
+ * time, and the time asked for.
+ *
+ * OS tick time is measured as a u32. The time stored in a u32 may either be
+ * an absolute time, or a time delta between two events. Whilst it is valid to
+ * use math opeartors to \em change the tick value represented as a u32, it
+ * is often only meaningful to do such operations on time deltas, rather than
+ * on absolute time. However, it is meaningful to add/subtract time deltas to
+ * absolute times.
+ *
+ * Conversion between tick time and milliseconds (ms) may not be loss-less,
+ * and are \em implementation \em depenedant.
+ *
+ * Code use OS time must take this into account, since:
+ * - a small OS time may (or may not) be rounded
+ * - a large time may (or may not) overflow
+ *
+ * @{ */
+
+/** @brief Return whether ticka occurs after or at the same time as  tickb
+ *
+ * Systems where ticks can wrap must handle that.
+ *
+ * @param ticka ticka
+ * @param tickb tickb
+ * @return MALI_TRUE if ticka represents a time that occurs at or after tickb.
+ */
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb);
+
+/** @brief Convert milliseconds to OS 'ticks'
+ *
+ * @param ms time interval in milliseconds
+ * @return the corresponding time interval in OS ticks.
+ */
+unsigned long _mali_osk_time_mstoticks(u32 ms);
+
+/** @brief Convert OS 'ticks' to milliseconds
+ *
+ * @param ticks time interval in OS ticks.
+ * @return the corresponding time interval in milliseconds
+ */
+u32 _mali_osk_time_tickstoms(unsigned long ticks);
+
+
+/** @brief Get the current time in OS 'ticks'.
+ * @return the current time in OS 'ticks'.
+ */
+unsigned long _mali_osk_time_tickcount(void);
+
+/** @brief Cause a microsecond delay
+ *
+ * The delay will have microsecond resolution, and is necessary for correct
+ * operation of the driver. At worst, the delay will be \b at least \a usecs
+ * microseconds, and so may be (significantly) more.
+ *
+ * This function may be implemented as a busy-wait, which is the most sensible
+ * implementation. On OSs where there are situations in which a thread must not
+ * sleep, this is definitely implemented as a busy-wait.
+ *
+ * @param usecs the number of microseconds to wait for.
+ */
+void _mali_osk_time_ubusydelay(u32 usecs);
+
+/** @brief Return time in nano seconds, since any given reference.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_time_get_ns(void);
+
+/** @brief Return time in nano seconds, since boot time.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_boot_time_get_ns(void);
+
+/** @} */ /* end group _mali_osk_time */
+
+/** @defgroup _mali_osk_math OSK Math
+ * @{ */
+
+/** @brief Count Leading Zeros (Little-endian)
+ *
+ * @note This function must be implemented to support the reference
+ * implementation of _mali_osk_find_first_zero_bit, as defined in
+ * mali_osk_bitops.h.
+ *
+ * @param val 32-bit words to count leading zeros on
+ * @return the number of leading zeros.
+ */
+u32 _mali_osk_clz(u32 val);
+
+/** @brief find last (most-significant) bit set
+ *
+ * @param val 32-bit words to count last bit set on
+ * @return last bit set.
+ */
+u32 _mali_osk_fls(u32 val);
+
+/** @} */ /* end group _mali_osk_math */
+
+/** @addtogroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+
+/** @brief Initialize an empty Wait Queue */
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.
+ */
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ * @param timeout timeout in ms
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.  Will return if time
+ * exceeds timeout.
+ */
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout);
+
+/** @brief Wake up all threads in wait queue if their respective conditions are
+ * true
+ *
+ * @param queue the queue whose threads should be woken up
+ *
+ * Wake up all threads in wait queue \a queue whose condition is now true.
+ */
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue);
+
+/** @brief terminate a wait queue
+ *
+ * @param queue the queue to terminate.
+ */
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue);
+/** @} */ /* end group _mali_osk_wait_queue */
+
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Output a device driver debug message.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ */
+void _mali_osk_dbgmsg(const char *fmt, ...);
+
+/** @brief Print fmt into buf.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param buf a pointer to the result buffer
+ * @param size the total number of bytes allowed to write to \a buf
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ * @return The number of bytes written to \a buf
+ */
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...);
+
+/** @brief Abnormal process abort.
+ *
+ * Terminates the caller-process if this function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h.
+ *
+ * This function will never return - because to continue from a Debug assert
+ * could cause even more problems, and hinder debugging of the initial problem.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_abort(void);
+
+/** @brief Sets breakpoint at point where function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h,
+ * to assist in debugging. If debugging at this level is not required, then this
+ * function may be implemented as a stub.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_break(void);
+
+/** @brief Return an identificator for calling process.
+ *
+ * @return Identificator for calling process.
+ */
+u32 _mali_osk_get_pid(void);
+
+/** @brief Return an name for calling process.
+ *
+ * @return name for calling process.
+ */
+char *_mali_osk_get_comm(void);
+
+/** @brief Return an identificator for calling thread.
+ *
+ * @return Identificator for calling thread.
+ */
+u32 _mali_osk_get_tid(void);
+
+
+/** @brief Take a reference to the power manager system for the Mali device (synchronously).
+ *
+ * When function returns successfully, Mali is ON.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_put() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void);
+
+/** @brief Take a reference to the external power manager system for the Mali device (asynchronously).
+ *
+ * Mali might not yet be on after this function as returned.
+ * Please use \a _mali_osk_pm_dev_barrier() or \a _mali_osk_pm_dev_ref_get_sync()
+ * to wait for Mali to be powered on.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_dec() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void);
+
+/** @brief Release the reference to the external power manger system for the Mali device.
+ *
+ * When reference count reach zero, the cores can be off.
+ *
+ * @note This must be used to release references taken with
+ * \a _mali_osk_pm_dev_ref_get_sync() or \a _mali_osk_pm_dev_ref_get_sync().
+ */
+void _mali_osk_pm_dev_ref_put(void);
+
+/** @brief Block until pending PM operations are done
+ */
+void _mali_osk_pm_dev_barrier(void);
+
+/** @} */ /* end group  _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_bitmap OSK Bitmap
+ * @{ */
+
+/** @brief Allocate a unique number from the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @return An unique existence in the bitmap object.
+ */
+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap);
+
+/** @brief Free a interger to the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @param obj An number allocated from bitmap object.
+ */
+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj);
+
+/** @brief Allocate continuous number from the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @return start number of the continuous number block.
+ */
+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt);
+
+/** @brief Free a block of continuous number block to the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @param obj Start number.
+ * @param cnt The size of the continuous number block.
+ */
+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt);
+
+/** @brief Available count could be used to allocate in the given bitmap object.
+ *
+ */
+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap);
+
+/** @brief Initialize an bitmap object..
+ *
+ * @param bitmap An poiter of uninitialized bitmap object.
+ * @param num Size of thei bitmap object and decide the memory size allocated.
+ * @param reserve start number used to allocate.
+ */
+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve);
+
+/** @brief Free the given bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ */
+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap);
+/** @} */ /* end group  _mali_osk_bitmap */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Check standard inlines */
+#ifndef MALI_STATIC_INLINE
+#error MALI_STATIC_INLINE not defined on your OS
+#endif
+
+#ifndef MALI_NON_STATIC_INLINE
+#error MALI_NON_STATIC_INLINE not defined on your OS
+#endif
+
+#endif /* __MALI_OSK_H__ */
diff --git a/mali/common/mali_osk_bitops.h b/mali/common/mali_osk_bitops.h
new file mode 100755
index 0000000..61e0a91
--- /dev/null
+++ b/mali/common/mali_osk_bitops.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitops.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_BITOPS_H__
+#define __MALI_OSK_BITOPS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void _mali_internal_clear_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(NULL != addr);
+
+	(*addr) &= ~(1 << bit);
+}
+
+MALI_STATIC_INLINE void _mali_internal_set_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(NULL != addr);
+
+	(*addr) |= (1 << bit);
+}
+
+MALI_STATIC_INLINE u32 _mali_internal_test_bit(u32 bit, u32 value)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	return value & (1 << bit);
+}
+
+MALI_STATIC_INLINE int _mali_internal_find_first_zero_bit(u32 value)
+{
+	u32 inverted;
+	u32 negated;
+	u32 isolated;
+	u32 leading_zeros;
+
+	/* Begin with xxx...x0yyy...y, where ys are 1, number of ys is in range  0..31 */
+	inverted = ~value; /* zzz...z1000...0 */
+	/* Using count_trailing_zeros on inverted value -
+	 * See ARM System Developers Guide for details of count_trailing_zeros */
+
+	/* Isolate the zero: it is preceeded by a run of 1s, so add 1 to it */
+	negated = (u32) - inverted ; /* -a == ~a + 1 (mod 2^n) for n-bit numbers */
+	/* negated = xxx...x1000...0 */
+
+	isolated = negated & inverted ; /* xxx...x1000...0 & zzz...z1000...0, zs are ~xs */
+	/* And so the first zero bit is in the same position as the 1 == number of 1s that preceeded it
+	 * Note that the output is zero if value was all 1s */
+
+	leading_zeros = _mali_osk_clz(isolated);
+
+	return 31 - leading_zeros;
+}
+
+
+/** @defgroup _mali_osk_bitops OSK Non-atomic Bit-operations
+ * @{ */
+
+/**
+ * These bit-operations do not work atomically, and so locks must be used if
+ * atomicity is required.
+ *
+ * Reference implementations for Little Endian are provided, and so it should
+ * not normally be necessary to re-implement these. Efficient bit-twiddling
+ * techniques are used where possible, implemented in portable C.
+ *
+ * Note that these reference implementations rely on _mali_osk_clz() being
+ * implemented.
+ */
+
+/** @brief Clear a bit in a sequence of 32-bit words
+ * @param nr bit number to clear, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_clear_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_clear_bit(nr, addr);
+}
+
+/** @brief Set a bit in a sequence of 32-bit words
+ * @param nr bit number to set, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_set_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_set_bit(nr, addr);
+}
+
+/** @brief Test a bit in a sequence of 32-bit words
+ * @param nr bit number to test, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ * @return zero if bit was clear, non-zero if set. Do not rely on the return
+ * value being related to the actual word under test.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_test_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	return _mali_internal_test_bit(nr, *addr);
+}
+
+/* Return maxbit if not found */
+/** @brief Find the first zero bit in a sequence of 32-bit words
+ * @param addr starting point for search.
+ * @param maxbit the maximum number of bits to search
+ * @return the number of the first zero bit found, or maxbit if none were found
+ * in the specified range.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_find_first_zero_bit(const u32 *addr, u32 maxbit)
+{
+	u32 total;
+
+	for (total = 0; total < maxbit; total += 32, ++addr) {
+		int result;
+		result = _mali_internal_find_first_zero_bit(*addr);
+
+		/* non-negative signifies the bit was found */
+		if (result >= 0) {
+			total += (u32)result;
+			break;
+		}
+	}
+
+	/* Now check if we reached maxbit or above */
+	if (total >= maxbit) {
+		total = maxbit;
+	}
+
+	return total; /* either the found bit nr, or maxbit if not found */
+}
+/** @} */ /* end group _mali_osk_bitops */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_BITOPS_H__ */
diff --git a/mali/common/mali_osk_list.h b/mali/common/mali_osk_list.h
new file mode 100755
index 0000000..a41f5ab
--- /dev/null
+++ b/mali/common/mali_osk_list.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_list.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_LIST_H__
+#define __MALI_OSK_LIST_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void __mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = new_entry;
+	new_entry->next = next;
+	new_entry->prev = prev;
+	prev->next = new_entry;
+}
+
+MALI_STATIC_INLINE void __mali_osk_list_del(_mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** Reference implementations of Doubly-linked Circular Lists are provided.
+ * There is often no need to re-implement these.
+ *
+ * @note The implementation may differ subtly from any lists the OS provides.
+ * For this reason, these lists should not be mixed with OS-specific lists
+ * inside the OSK/UKK implementation. */
+
+/** @brief Initialize a list to be a head of an empty list
+ * @param exp the list to initialize. */
+#define _MALI_OSK_INIT_LIST_HEAD(exp) _mali_osk_list_init(exp)
+
+/** @brief Define a list variable, which is uninitialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD(exp) _mali_osk_list_t exp
+
+/** @brief Define a list variable, which is initialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD_STATIC_INIT(exp) _mali_osk_list_t exp = { &exp, &exp }
+
+/** @brief Initialize a list element.
+ *
+ * All list elements must be initialized before use.
+ *
+ * Do not use on any list element that is present in a list without using
+ * _mali_osk_list_del first, otherwise this will break the list.
+ *
+ * @param list the list element to initialize
+ */
+MALI_STATIC_INLINE void _mali_osk_list_init(_mali_osk_list_t *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/** @brief Insert a single list element after an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the first element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the next
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list, list->next);
+}
+
+/** @brief Insert a single list element before an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the last element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the previous
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_addtail(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list->prev, list);
+}
+
+/** @brief Remove a single element from a list
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will be uninitialized, and so should not be traversed. It must be
+ * initialized before further use.
+ *
+ * @param list the list element to remove.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_del(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+}
+
+/** @brief Remove a single element from a list, and re-initialize it
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will initialized, and so can be used as normal.
+ *
+ * @param list the list element to remove and initialize.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_delinit(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+	_mali_osk_list_init(list);
+}
+
+/** @brief Determine whether a list is empty.
+ *
+ * An empty list is one that contains a single element that points to itself.
+ *
+ * @param list the list to check.
+ * @return non-zero if the list is empty, and zero otherwise.
+ */
+MALI_STATIC_INLINE mali_bool _mali_osk_list_empty(_mali_osk_list_t *list)
+{
+	return list->next == list;
+}
+
+/** @brief Move a list element from one list to another.
+ *
+ * The list element must be initialized.
+ *
+ * As an example, moving a list item to the head of a new list causes this item
+ * to be the first element in the new list.
+ *
+ * @param move the list element to move
+ * @param list the new list into which the element will be inserted, as the next
+ * element in the list.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move(_mali_osk_list_t *move_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_del(move_entry->prev, move_entry->next);
+	_mali_osk_list_add(move_entry, list);
+}
+
+/** @brief Move an entire list
+ *
+ * The list element must be initialized.
+ *
+ * Allows you to move a list from one list head to another list head
+ *
+ * @param old_list The existing list head
+ * @param new_list The new list head (must be an empty list)
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move_list(_mali_osk_list_t *old_list, _mali_osk_list_t *new_list)
+{
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(new_list));
+	if (!_mali_osk_list_empty(old_list)) {
+		new_list->next = old_list->next;
+		new_list->prev = old_list->prev;
+		new_list->next->prev = new_list;
+		new_list->prev->next = new_list;
+		old_list->next = old_list;
+		old_list->prev = old_list;
+	}
+}
+
+/** @brief Find the containing structure of a list
+ *
+ * When traversing a list, this is used to recover the containing structure,
+ * given that is contains a _mali_osk_list_t member.
+ *
+ * Each list must be of structures of one type, and must link the same members
+ * together, otherwise it will not be possible to correctly recover the
+ * sturctures that the lists link.
+ *
+ * @note no type or memory checking occurs to ensure that a structure does in
+ * fact exist for the list entry, and that it is being recovered with respect
+ * to the correct list member.
+ *
+ * @param ptr the pointer to the _mali_osk_list_t member in this structure
+ * @param type the type of the structure that contains the member
+ * @param member the member of the structure that ptr points to.
+ * @return a pointer to a \a type object which contains the _mali_osk_list_t
+ * \a member, as pointed to by the _mali_osk_list_t \a *ptr.
+ */
+#define _MALI_OSK_LIST_ENTRY(ptr, type, member) \
+	_MALI_OSK_CONTAINER_OF(ptr, type, member)
+
+/** @brief Enumerate a list safely
+ *
+ * With this macro, lists can be enumerated in a 'safe' manner. That is,
+ * entries can be deleted from the list without causing an error during
+ * enumeration. To achieve this, a 'temporary' pointer is required, which must
+ * be provided to the macro.
+ *
+ * Use it like a 'for()', 'while()' or 'do()' construct, and so it must be
+ * followed by a statement or compound-statement which will be executed for
+ * each list entry.
+ *
+ * Upon loop completion, providing that an early out was not taken in the
+ * loop body, then it is guaranteed that ptr->member == list, even if the loop
+ * body never executed.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY(ptr, tmp, list, type, member)         \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->next, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.next, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.next, type, member))
+
+/** @brief Enumerate a list in reverse order safely
+ *
+ * This macro is identical to @ref _MALI_OSK_LIST_FOREACHENTRY, except that
+ * entries are enumerated in reverse order.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY_REVERSE(ptr, tmp, list, type, member) \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->prev, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.prev, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.prev, type, member))
+
+/** @} */ /* end group _mali_osk_list */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_LIST_H__ */
diff --git a/mali/common/mali_osk_mali.h b/mali/common/mali_osk_mali.h
new file mode 100755
index 0000000..f242cd4
--- /dev/null
+++ b/mali/common/mali_osk_mali.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.h
+ * Defines the OS abstraction layer which is specific for the Mali kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_MALI_H__
+#define __MALI_OSK_MALI_H__
+
+#include <linux/mali/mali_utgard.h>
+#include <mali_osk.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CONFIG_MALI_DEVFREQ
+struct mali_device {
+	struct device *dev;
+#ifdef CONFIG_HAVE_CLK
+	struct clk *clock;
+#endif
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+#endif
+	struct mali_pm_metrics_data mali_metrics;
+};
+#endif
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Struct with device specific configuration data
+ */
+typedef struct mali_gpu_device_data _mali_osk_device_data;
+
+#ifdef CONFIG_MALI_DT
+/** @brief Initialize those device resources when we use device tree
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_resource_initialize(void);
+#endif
+
+/** @brief Find Mali GPU HW resource
+ *
+ * @param addr Address of Mali GPU resource to find
+ * @param res Storage for resource information if resource is found.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if resource is not found
+ */
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res);
+
+
+/** @brief Find Mali GPU HW base address
+ *
+ * @return 0 if resources are found, otherwise the Mali GPU component with lowest address.
+ */
+uintptr_t _mali_osk_resource_base_address(void);
+
+/** @brief Find the specific GPU resource.
+ *
+ * @return value
+ * 0x400 if Mali 400 specific GPU resource identified
+ * 0x450 if Mali 450 specific GPU resource identified
+ * 0x470 if Mali 470 specific GPU resource identified
+ *
+ */
+u32 _mali_osk_identify_gpu_resource(void);
+
+/** @brief Retrieve the Mali GPU specific data
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data);
+
+/** @brief Find the pmu domain config from device data.
+ *
+ * @param domain_config_array used to store pmu domain config found in device data.
+ * @param array_size is the size of array domain_config_array.
+ */
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size);
+
+/** @brief Get Mali PMU switch delay
+ *
+ *@return pmu switch delay if it is configured
+ */
+u32 _mali_osk_get_pmu_switch_delay(void);
+
+/** @brief Determines if Mali GPU has been configured with shared interrupts.
+ *
+ * @return MALI_TRUE if shared interrupts, MALI_FALSE if not.
+ */
+mali_bool _mali_osk_shared_interrupts(void);
+
+/** @brief Initialize the gpu secure mode.
+ * The gpu secure mode will initially be in a disabled state.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void);
+
+/** @brief Deinitialize the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void);
+
+/** @brief Reset GPU and enable the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_enable(void);
+
+/** @brief Reset GPU and disable the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_disable(void);
+
+/** @brief Check if the gpu secure mode has been enabled.
+ * @return MALI_TRUE if enabled, otherwise MALI_FALSE.
+ */
+mali_bool _mali_osk_gpu_secure_mode_is_enabled(void);
+
+/** @brief Check if the gpu secure mode is supported.
+ * @return MALI_TRUE if supported, otherwise MALI_FALSE.
+ */
+mali_bool _mali_osk_gpu_secure_mode_is_supported(void);
+
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_MALI_H__ */
diff --git a/mali/common/mali_osk_profiling.h b/mali/common/mali_osk_profiling.h
new file mode 100755
index 0000000..eca6ad4
--- /dev/null
+++ b/mali/common/mali_osk_profiling.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_OSK_PROFILING_H__
+#define __MALI_OSK_PROFILING_H__
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+
+#include "mali_linux_trace.h"
+#include "mali_profiling_events.h"
+#include "mali_profiling_gator_api.h"
+
+#define MALI_PROFILING_MAX_BUFFER_ENTRIES 1048576
+
+#define MALI_PROFILING_NO_HW_COUNTER = ((u32)-1)
+
+/** @defgroup _mali_osk_profiling External profiling connectivity
+ * @{ */
+
+/**
+ * Initialize the profiling module.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start);
+
+/*
+ * Terminate the profiling module.
+ */
+void _mali_osk_profiling_term(void);
+
+/**
+ * Stop the profile sampling operation.
+ */
+void _mali_osk_profiling_stop_sampling(u32 pid);
+
+/**
+ * Start recording profiling data
+ *
+ * The specified limit will determine how large the capture buffer is.
+ * MALI_PROFILING_MAX_BUFFER_ENTRIES determines the maximum size allowed by the device driver.
+ *
+ * @param limit The desired maximum number of events to record on input, the actual maximum on output.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_start(u32 *limit);
+
+/**
+ * Add an profiling event
+ *
+ * @param event_id The event identificator.
+ * @param data0 First data parameter, depending on event_id specified.
+ * @param data1 Second data parameter, depending on event_id specified.
+ * @param data2 Third data parameter, depending on event_id specified.
+ * @param data3 Fourth data parameter, depending on event_id specified.
+ * @param data4 Fifth data parameter, depending on event_id specified.
+ */
+void    _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+/**
+ * Report a hardware counter event.
+ *
+ * @param counter_id The ID of the counter.
+ * @param value The value of the counter.
+ */
+
+/* Call Linux tracepoint directly */
+#define _mali_osk_profiling_report_hw_counter(counter_id, value) trace_mali_hw_counter(counter_id, value)
+
+/**
+ * Report SW counters
+ *
+ * @param counters array of counter values
+ */
+void _mali_osk_profiling_report_sw_counters(u32 *counters);
+
+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value);
+
+/**
+ * Stop recording profiling data
+ *
+ * @param count Returns the number of recorded events.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_stop(u32 *count);
+
+/**
+ * Retrieves the number of events that can be retrieved
+ *
+ * @return The number of recorded events that can be retrieved.
+ */
+u32 _mali_osk_profiling_get_count(void);
+
+/**
+ * Retrieve an event
+ *
+ * @param index Event index (start with 0 and continue until this function fails to retrieve all events)
+ * @param timestamp The timestamp for the retrieved event will be stored here.
+ * @param event_id The event ID for the retrieved event will be stored here.
+ * @param data The 5 data values for the retrieved event will be stored here.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+
+/**
+ * Clear the recorded buffer.
+ *
+ * This is needed in order to start another recording.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_clear(void);
+
+/**
+ * Checks if a recording of profiling data is in progress
+ *
+ * @return MALI_TRUE if recording of profiling data is in progress, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_is_recording(void);
+
+/**
+ * Checks if profiling data is available for retrival
+ *
+ * @return MALI_TRUE if profiling data is avaiable, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_have_recording(void);
+
+/** @} */ /* end group _mali_osk_profiling */
+
+#else /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+/* Dummy add_event, for when profiling is disabled. */
+
+#define _mali_osk_profiling_add_event(event_id, data0, data1, data2, data3, data4)
+
+#endif /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+#endif /* __MALI_OSK_PROFILING_H__ */
+
+
diff --git a/mali/common/mali_osk_types.h b/mali/common/mali_osk_types.h
new file mode 100755
index 0000000..6e9a133
--- /dev/null
+++ b/mali/common/mali_osk_types.h
@@ -0,0 +1,471 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_types.h
+ * Defines types of the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_TYPES_H__
+#define __MALI_OSK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_osk_miscellaneous OSK Miscellaneous functions, constants and types
+ * @{ */
+
+/* Define integer types used by OSK. Note: these currently clash with Linux so we only define them if not defined already */
+#ifndef __KERNEL__
+typedef unsigned char      u8;
+typedef signed char        s8;
+typedef unsigned short     u16;
+typedef signed short       s16;
+typedef unsigned int       u32;
+typedef signed int         s32;
+typedef unsigned long long u64;
+#define BITS_PER_LONG (sizeof(long)*8)
+#else
+/* Ensure Linux types u32, etc. are defined */
+#include <linux/types.h>
+#endif
+
+/** @brief Mali Boolean type which uses MALI_TRUE and MALI_FALSE
+  */
+typedef unsigned long mali_bool;
+
+#ifndef MALI_TRUE
+#define MALI_TRUE ((mali_bool)1)
+#endif
+
+#ifndef MALI_FALSE
+#define MALI_FALSE ((mali_bool)0)
+#endif
+
+#define MALI_HW_CORE_NO_COUNTER     ((u32)-1)
+
+
+#define MALI_S32_MAX 0x7fffffff
+
+/**
+ * @brief OSK Error codes
+ *
+ * Each OS may use its own set of error codes, and may require that the
+ * User/Kernel interface take certain error code. This means that the common
+ * error codes need to be sufficiently rich to pass the correct error code
+ * thorugh from the OSK to U/K layer, across all OSs.
+ *
+ * The result is that some error codes will appear redundant on some OSs.
+ * Under all OSs, the OSK layer must translate native OS error codes to
+ * _mali_osk_errcode_t codes. Similarly, the U/K layer must translate from
+ * _mali_osk_errcode_t codes to native OS error codes.
+ */
+typedef enum {
+	_MALI_OSK_ERR_OK = 0, /**< Success. */
+	_MALI_OSK_ERR_FAULT = -1, /**< General non-success */
+	_MALI_OSK_ERR_INVALID_FUNC = -2, /**< Invalid function requested through User/Kernel interface (e.g. bad IOCTL number) */
+	_MALI_OSK_ERR_INVALID_ARGS = -3, /**< Invalid arguments passed through User/Kernel interface */
+	_MALI_OSK_ERR_NOMEM = -4, /**< Insufficient memory */
+	_MALI_OSK_ERR_TIMEOUT = -5, /**< Timeout occurred */
+	_MALI_OSK_ERR_RESTARTSYSCALL = -6, /**< Special: On certain OSs, must report when an interruptable mutex is interrupted. Ignore otherwise. */
+	_MALI_OSK_ERR_ITEM_NOT_FOUND = -7, /**< Table Lookup failed */
+	_MALI_OSK_ERR_BUSY = -8, /**< Device/operation is busy. Try again later */
+	_MALI_OSK_ERR_UNSUPPORTED = -9, /**< Optional part of the interface used, and is unsupported */
+} _mali_osk_errcode_t;
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wq OSK work queues
+ * @{ */
+
+/** @brief Private type for work objects */
+typedef struct _mali_osk_wq_work_s _mali_osk_wq_work_t;
+typedef struct _mali_osk_wq_delayed_work_s _mali_osk_wq_delayed_work_t;
+
+/** @brief Work queue handler function
+ *
+ * This function type is called when the work is scheduled by the work queue,
+ * e.g. as an IRQ bottom-half handler.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for more information on the
+ * work-queue and work handlers.
+ *
+ * @param arg resource-specific data
+ */
+typedef void (*_mali_osk_wq_work_handler_t)(void *arg);
+
+/* @} */ /* end group _mali_osk_wq */
+
+/** @defgroup _mali_osk_irq OSK IRQ handling
+ * @{ */
+
+/** @brief Private type for IRQ handling objects */
+typedef struct _mali_osk_irq_t_struct _mali_osk_irq_t;
+
+/** @brief Optional function to trigger an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data */
+typedef void (*_mali_osk_irq_trigger_t)(void *arg);
+
+/** @brief Optional function to acknowledge an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was successful, or a suitable _mali_osk_errcode_t on failure. */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_ack_t)(void *arg);
+
+/** @brief IRQ 'upper-half' handler callback.
+ *
+ * This function is implemented by the common layer to do the initial handling of a
+ * resource's IRQ. This maps on to the concept of an ISR that does the minimum
+ * work necessary before handing off to an IST.
+ *
+ * The communication of the resource-specific data from the ISR to the IST is
+ * handled by the OSK implementation.
+ *
+ * On most systems, the IRQ upper-half handler executes in IRQ context.
+ * Therefore, the system may have restrictions about what can be done in this
+ * context
+ *
+ * If an IRQ upper-half handler requires more work to be done than can be
+ * acheived in an IRQ context, then it may defer the work with
+ * _mali_osk_wq_schedule_work(). Refer to \ref _mali_osk_wq_create_work() for
+ * more information.
+ *
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was correctly handled, or a suitable
+ * _mali_osk_errcode_t otherwise.
+ */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_uhandler_t)(void *arg);
+
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @defgroup _mali_osk_atomic OSK Atomic counters
+ * @{ */
+
+/** @brief Public type of atomic counters
+ *
+ * This is public for allocation on stack. On systems that support it, this is just a single 32-bit value.
+ * On others, it could be encapsulating an object stored elsewhere.
+ *
+ * Regardless of implementation, the \ref _mali_osk_atomic functions \b must be used
+ * for all accesses to the variable's value, even if atomicity is not required.
+ * Do not access u.val or u.obj directly.
+ */
+typedef struct {
+	union {
+		u32 val;
+		void *obj;
+	} u;
+} _mali_osk_atomic_t;
+/** @} */ /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+
+/** @brief OSK Mutual Exclusion Lock ordered list
+ *
+ * This lists the various types of locks in the system and is used to check
+ * that locks are taken in the correct order.
+ *
+ * - Holding more than one lock of the same order at the same time is not
+ *   allowed.
+ * - Taking a lock of a lower order than the highest-order lock currently held
+ *   is not allowed.
+ *
+ */
+typedef enum {
+	/*  ||    Locks    ||  */
+	/*  ||   must be   ||  */
+	/* _||_  taken in _||_ */
+	/* \  /    this   \  / */
+	/*  \/    order!   \/  */
+
+	_MALI_OSK_LOCK_ORDER_FIRST = 0,
+
+	_MALI_OSK_LOCK_ORDER_SESSIONS,
+	_MALI_OSK_LOCK_ORDER_MEM_SESSION,
+	_MALI_OSK_LOCK_ORDER_MEM_INFO,
+	_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE,
+	_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP,
+	_MALI_OSK_LOCK_ORDER_PM_EXECUTION,
+	_MALI_OSK_LOCK_ORDER_EXECUTOR,
+	_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED,
+	_MALI_OSK_LOCK_ORDER_PROFILING,
+	_MALI_OSK_LOCK_ORDER_L2,
+	_MALI_OSK_LOCK_ORDER_L2_COMMAND,
+	_MALI_OSK_LOCK_ORDER_UTILIZATION,
+	_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS,
+	_MALI_OSK_LOCK_ORDER_PM_STATE,
+
+	_MALI_OSK_LOCK_ORDER_LAST,
+} _mali_osk_lock_order_t;
+
+
+/** @brief OSK Mutual Exclusion Lock flags type
+ *
+ * - Any lock can use the order parameter.
+ */
+typedef enum {
+	_MALI_OSK_LOCKFLAG_UNORDERED        = 0x1, /**< Indicate that the order of this lock should not be checked */
+	_MALI_OSK_LOCKFLAG_ORDERED          = 0x2,
+	/** @enum _mali_osk_lock_flags_t
+	 *
+	 * Flags from 0x10000--0x80000000 are RESERVED for User-mode */
+
+} _mali_osk_lock_flags_t;
+
+/** @brief Mutual Exclusion Lock Mode Optimization hint
+ *
+ * The lock mode is used to implement the read/write locking of locks when we call
+ * functions _mali_osk_mutex_rw_init/wait/signal/term/. In this case, the RO mode can
+ * be used to allow multiple concurrent readers, but no writers. The RW mode is used for
+ * writers, and so will wait for all readers to release the lock (if any present).
+ * Further readers and writers will wait until the writer releases the lock.
+ *
+ * The mode is purely an optimization hint: for example, it is permissible for
+ * all locks to behave in RW mode, regardless of that supplied.
+ *
+ * It is an error to attempt to use locks in anything other that RW mode when
+ * call functions _mali_osk_mutex_rw_wait/signal().
+ *
+ */
+typedef enum {
+	_MALI_OSK_LOCKMODE_UNDEF = -1,  /**< Undefined lock mode. For internal use only */
+	_MALI_OSK_LOCKMODE_RW    = 0x0, /**< Read-write mode, default. All readers and writers are mutually-exclusive */
+	_MALI_OSK_LOCKMODE_RO,          /**< Read-only mode, to support multiple concurrent readers, but mutual exclusion in the presence of writers. */
+	/** @enum _mali_osk_lock_mode_t
+	 *
+	 * Lock modes 0x40--0x7F are RESERVED for User-mode */
+} _mali_osk_lock_mode_t;
+
+/** @brief Private types for Mutual Exclusion lock objects */
+typedef struct _mali_osk_lock_debug_s _mali_osk_lock_debug_t;
+typedef struct _mali_osk_spinlock_s _mali_osk_spinlock_t;
+typedef struct _mali_osk_spinlock_irq_s _mali_osk_spinlock_irq_t;
+typedef struct _mali_osk_mutex_s _mali_osk_mutex_t;
+typedef struct _mali_osk_mutex_rw_s _mali_osk_mutex_rw_t;
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @defgroup _mali_osk_low_level_memory OSK Low-level Memory Operations
+ * @{ */
+
+/**
+ * @brief Private data type for use in IO accesses to/from devices.
+ *
+ * This represents some range that is accessible from the device. Examples
+ * include:
+ * - Device Registers, which could be readable and/or writeable.
+ * - Memory that the device has access to, for storing configuration structures.
+ *
+ * Access to this range must be made through the _mali_osk_mem_ioread32() and
+ * _mali_osk_mem_iowrite32() functions.
+ */
+typedef struct _mali_io_address *mali_io_address;
+
+/** @defgroup _MALI_OSK_CPU_PAGE CPU Physical page size macros.
+ *
+ * The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The CPU Physical Page Size has been assumed to be the same as the Mali
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** CPU Page Order, as log to base 2 of the Page size. @see _MALI_OSK_CPU_PAGE_SIZE */
+#define _MALI_OSK_CPU_PAGE_ORDER ((u32)12)
+/** CPU Page Size, in bytes.               */
+#define _MALI_OSK_CPU_PAGE_SIZE (((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER))
+/** CPU Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_CPU_PAGE_MASK (~((((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER)) - ((u32)1)))
+/** @} */ /* end of group _MALI_OSK_CPU_PAGE */
+
+/** @defgroup _MALI_OSK_MALI_PAGE Mali Physical Page size macros
+ *
+ * Mali Physical page size macros. The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The Mali Physical Page Size has been assumed to be the same as the CPU
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** Mali Page Order, as log to base 2 of the Page size. @see _MALI_OSK_MALI_PAGE_SIZE */
+#define _MALI_OSK_MALI_PAGE_ORDER PAGE_SHIFT
+/** Mali Page Size, in bytes.               */
+#define _MALI_OSK_MALI_PAGE_SIZE PAGE_SIZE
+/** Mali Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_MALI_PAGE_MASK PAGE_MASK
+/** @} */ /* end of group _MALI_OSK_MALI_PAGE*/
+
+/** @brief flags for mapping a user-accessible memory range
+ *
+ * Where a function with prefix '_mali_osk_mem_mapregion' accepts flags as one
+ * of the function parameters, it will use one of these. These allow per-page
+ * control over mappings. Compare with the mali_memory_allocation_flag type,
+ * which acts over an entire range
+ *
+ * These may be OR'd together with bitwise OR (|), but must be cast back into
+ * the type after OR'ing.
+ */
+typedef enum {
+	_MALI_OSK_MEM_MAPREGION_FLAG_OS_ALLOCATED_PHYSADDR = 0x1, /**< Physical address is OS Allocated */
+} _mali_osk_mem_mapregion_flags_t;
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+/** @defgroup _mali_osk_notification OSK Notification Queues
+ * @{ */
+
+/** @brief Private type for notification queue objects */
+typedef struct _mali_osk_notification_queue_t_struct _mali_osk_notification_queue_t;
+
+/** @brief Public notification data object type */
+typedef struct _mali_osk_notification_t_struct {
+	u32 notification_type;   /**< The notification type */
+	u32 result_buffer_size; /**< Size of the result buffer to copy to user space */
+	void *result_buffer;    /**< Buffer containing any type specific data */
+} _mali_osk_notification_t;
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @defgroup _mali_osk_timer OSK Timer Callbacks
+ * @{ */
+
+/** @brief Function to call when a timer expires
+ *
+ * When a timer expires, this function is called. Note that on many systems,
+ * a timer callback will be executed in IRQ context. Therefore, restrictions
+ * may apply on what can be done inside the timer callback.
+ *
+ * If a timer requires more work to be done than can be acheived in an IRQ
+ * context, then it may defer the work with a work-queue. For example, it may
+ * use \ref _mali_osk_wq_schedule_work() to make use of a bottom-half handler
+ * to carry out the remaining work.
+ *
+ * Stopping the timer with \ref _mali_osk_timer_del() blocks on compeletion of
+ * the callback. Therefore, the callback may not obtain any mutexes also held
+ * by any callers of _mali_osk_timer_del(). Otherwise, a deadlock may occur.
+ *
+ * @param arg Function-specific data */
+typedef void (*_mali_osk_timer_callback_t)(void *arg);
+
+/** @brief Private type for Timer Callback Objects */
+typedef struct _mali_osk_timer_t_struct _mali_osk_timer_t;
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** @brief Public List objects.
+ *
+ * To use, add a _mali_osk_list_t member to the structure that may become part
+ * of a list. When traversing the _mali_osk_list_t objects, use the
+ * _MALI_OSK_CONTAINER_OF() macro to recover the structure from its
+ *_mali_osk_list_t member
+ *
+ * Each structure may have multiple _mali_osk_list_t members, so that the
+ * structure is part of multiple lists. When traversing lists, ensure that the
+ * correct _mali_osk_list_t member is used, because type-checking will be
+ * lost by the compiler.
+ */
+typedef struct _mali_osk_list_s {
+	struct _mali_osk_list_s *next;
+	struct _mali_osk_list_s *prev;
+} _mali_osk_list_t;
+/** @} */ /* end group _mali_osk_list */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief resource description struct
+ *
+ * Platform independent representation of a Mali HW resource
+ */
+typedef struct _mali_osk_resource {
+	const char *description;        /**< short description of the resource */
+	uintptr_t base;                 /**< Physical base address of the resource, as seen by Mali resources. */
+	const char *irq_name;           /**< Name of irq belong to this resource */
+	u32 irq;                        /**< IRQ number delivered to the CPU, or -1 to tell the driver to probe for it (if possible) */
+} _mali_osk_resource_t;
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+/** @brief Private type for wait queue objects */
+typedef struct _mali_osk_wait_queue_t_struct _mali_osk_wait_queue_t;
+/** @} */ /* end group _mali_osk_wait_queue */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+/** @brief Mali print ctx type which uses seq_file
+  */
+typedef struct seq_file _mali_osk_print_ctx;
+
+#define _MALI_OSK_BITMAP_INVALIDATE_INDEX -1
+
+typedef struct _mali_osk_bitmap {
+	u32         reserve;
+	u32         last;
+	u32         max;
+	u32         avail;
+	_mali_osk_spinlock_t   *lock;
+	unsigned long          *table;
+} _mali_osk_bitmap_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_TYPES_H__ */
diff --git a/mali/common/mali_pm.c b/mali/common/mali_pm.c
new file mode 100755
index 0000000..1ef03a6
--- /dev/null
+++ b/mali/common/mali_pm.c
@@ -0,0 +1,1362 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pm.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_scheduler.h"
+#include "mali_group.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+
+#include "mali_executor.h"
+#include "mali_control_timer.h"
+
+#if defined(DEBUG)
+u32 num_pm_runtime_resume = 0;
+u32 num_pm_updates = 0;
+u32 num_pm_updates_up = 0;
+u32 num_pm_updates_down = 0;
+#endif
+
+#define MALI_PM_DOMAIN_DUMMY_MASK (1 << MALI_DOMAIN_INDEX_DUMMY)
+
+/* lock protecting power state (including pm_domains) */
+static _mali_osk_spinlock_irq_t *pm_lock_state = NULL;
+
+/* the wanted domain mask (protected by pm_lock_state) */
+static u32 pd_mask_wanted = 0;
+
+/* used to deferring the actual power changes */
+static _mali_osk_wq_work_t *pm_work = NULL;
+
+/* lock protecting power change execution */
+static _mali_osk_mutex_t *pm_lock_exec = NULL;
+
+/* PMU domains which are actually powered on (protected by pm_lock_exec) */
+static u32 pmu_mask_current = 0;
+
+/*
+ * domains which marked as powered on (protected by pm_lock_exec)
+ * This can be different from pmu_mask_current right after GPU power on
+ * if the PMU domains default to powered up.
+ */
+static u32 pd_mask_current = 0;
+
+static u16 domain_config[MALI_MAX_NUMBER_OF_DOMAINS] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	1 << MALI_DOMAIN_INDEX_DUMMY
+};
+
+/* The relative core power cost */
+#define MALI_GP_COST 3
+#define MALI_PP_COST 6
+#define MALI_L2_COST 1
+
+/*
+ *We have MALI_MAX_NUMBER_OF_PP_PHYSICAL_CORES + 1 rows in this matrix
+ *because we mush store the mask of different pp cores: 0, 1, 2, 3, 4, 5, 6, 7, 8.
+ */
+static int mali_pm_domain_power_cost_result[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1][MALI_MAX_NUMBER_OF_DOMAINS];
+/*
+ * Keep track of runtime PM state, so that we know
+ * how to resume during OS resume.
+ */
+#ifdef CONFIG_PM_RUNTIME
+static mali_bool mali_pm_runtime_active = MALI_FALSE;
+#else
+/* when kernel don't enable PM_RUNTIME, set the flag always true,
+ * for GPU will not power off by runtime */
+static mali_bool mali_pm_runtime_active = MALI_TRUE;
+#endif
+
+static void mali_pm_state_lock(void);
+static void mali_pm_state_unlock(void);
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void);
+static void mali_pm_set_pmu_domain_config(void);
+static u32 mali_pm_get_registered_cores_mask(void);
+static void mali_pm_update_sync_internal(void);
+static mali_bool mali_pm_common_suspend(void);
+static void mali_pm_update_work(void *data);
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+const char *mali_pm_group_stats_to_string(void);
+#endif
+
+_mali_osk_errcode_t mali_pm_initialize(void)
+{
+	_mali_osk_errcode_t err;
+	struct mali_pmu_core *pmu;
+
+	pm_lock_state = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (NULL == pm_lock_state) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_lock_exec = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+					    _MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (NULL == pm_lock_exec) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_work = _mali_osk_wq_create_work(mali_pm_update_work, NULL);
+	if (NULL == pm_work) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pmu = mali_pmu_get_global_pmu_core();
+	if (NULL != pmu) {
+		/*
+		 * We have a Mali PMU, set the correct domain
+		 * configuration (default or custom)
+		 */
+
+		u32 registered_cores_mask;
+
+		mali_pm_set_pmu_domain_config();
+
+		registered_cores_mask = mali_pm_get_registered_cores_mask();
+		mali_pmu_set_registered_cores_mask(pmu, registered_cores_mask);
+
+		MALI_DEBUG_ASSERT(0 == pd_mask_wanted);
+	}
+
+	/* Create all power domains needed (at least one dummy domain) */
+	err = mali_pm_create_pm_domains();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_terminate();
+		return err;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pm_terminate(void)
+{
+	if (NULL != pm_work) {
+		_mali_osk_wq_delete_work(pm_work);
+		pm_work = NULL;
+	}
+
+	mali_pm_domain_terminate();
+
+	if (NULL != pm_lock_exec) {
+		_mali_osk_mutex_term(pm_lock_exec);
+		pm_lock_exec = NULL;
+	}
+
+	if (NULL != pm_lock_state) {
+		_mali_osk_spinlock_irq_term(pm_lock_state);
+		pm_lock_state = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (NULL == domain) {
+		MALI_DEBUG_ASSERT(0 == domain_config[domain_index]);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(0 != domain_config[domain_index]);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != domain);
+
+	mali_pm_domain_add_l2_cache(domain, l2_cache);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (NULL == domain) {
+		MALI_DEBUG_ASSERT(0 == domain_config[domain_index]);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(0 != domain_config[domain_index]);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != domain);
+
+	mali_pm_domain_add_group(domain, group);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains)
+{
+	mali_bool ret = MALI_TRUE; /* Assume all is powered on instantly */
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		pd_mask_wanted |= mali_pm_domain_ref_get(domains[i]);
+		if (MALI_FALSE == mali_pm_domain_power_is_on(domains[i])) {
+			/*
+			 * Tell caller that the corresponding group
+			 * was not already powered on.
+			 */
+			ret = MALI_FALSE;
+		} else {
+			/*
+			 * There is a time gap between we power on the domain and
+			 * set the power state of the corresponding groups to be on.
+			 */
+			if (NULL != groups[i] &&
+			    MALI_FALSE == mali_group_power_is_on(groups[i])) {
+				ret = MALI_FALSE;
+			}
+		}
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (get refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains)
+{
+	u32 mask = 0;
+	mali_bool ret;
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		mask |= mali_pm_domain_ref_put(domains[i]);
+	}
+
+	if (0 == mask) {
+		/* return false, all domains should still stay on */
+		ret = MALI_FALSE;
+	} else {
+		/* Assert that we are dealing with a change */
+		MALI_DEBUG_ASSERT((pd_mask_wanted & mask) == mask);
+
+		/* Update our desired domain mask */
+		pd_mask_wanted &= ~mask;
+
+		/* return true; one or more domains can now be powered down */
+		ret = MALI_TRUE;
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (put refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+void mali_pm_init_begin(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	_mali_osk_pm_dev_ref_get_sync();
+
+	/* Ensure all PMU domains are on */
+	if (NULL != pmu) {
+		mali_pmu_power_up_all(pmu);
+	}
+}
+
+void mali_pm_init_end(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	/* Ensure all PMU domains are off */
+	if (NULL != pmu) {
+		mali_pmu_power_down_all(pmu);
+	}
+
+	_mali_osk_pm_dev_ref_put();
+}
+
+void mali_pm_update_sync(void)
+{
+	mali_pm_exec_lock();
+
+	if (MALI_TRUE == mali_pm_runtime_active) {
+		/*
+		 * Only update if GPU is powered on.
+		 * Deactivation of the last group will result in both a
+		 * deferred runtime PM suspend operation and
+		 * deferred execution of this function.
+		 * mali_pm_runtime_active will be false if runtime PM
+		 * executed first and thus the GPU is now fully powered off.
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_update_async(void)
+{
+	_mali_osk_wq_schedule_work(pm_work);
+}
+
+void mali_pm_os_suspend(mali_bool os_suspend)
+{
+	int ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS suspend\n"));
+
+	/* Suspend execution of all jobs, and go to inactive state */
+	mali_executor_suspend();
+
+	if (os_suspend) {
+		mali_control_timer_suspend(MALI_TRUE);
+	}
+
+	mali_pm_exec_lock();
+
+	ret = mali_pm_common_suspend();
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == ret);
+	MALI_IGNORE(ret);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_os_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS resume\n"));
+
+	mali_pm_exec_lock();
+
+#if defined(DEBUG)
+	mali_pm_state_lock();
+
+	/* Assert that things are as we left them in os_suspend(). */
+	MALI_DEBUG_ASSERT(0 == pd_mask_wanted);
+	MALI_DEBUG_ASSERT(0 == pd_mask_current);
+	MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+	mali_pm_state_unlock();
+#endif
+
+	if (MALI_TRUE == mali_pm_runtime_active) {
+		/* Runtime PM was active, so reset PMU */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+			pmu_mask_current = mali_pmu_get_mask(pmu);
+
+			MALI_DEBUG_PRINT(3, ("Mali PM: OS resume 0x%x \n", pmu_mask_current));
+		}
+
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	/* Start executing jobs again */
+	mali_executor_resume();
+}
+
+mali_bool mali_pm_runtime_suspend(void)
+{
+	mali_bool ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: Runtime suspend\n"));
+
+	mali_pm_exec_lock();
+
+	/*
+	 * Put SW state directly into "off" state, and do not bother to power
+	 * down each power domain, because entire GPU will be powered off
+	 * when we return.
+	 * For runtime PM suspend, in contrast to OS suspend, there is a race
+	 * between this function and the mali_pm_update_sync_internal(), which
+	 * is fine...
+	 */
+	ret = mali_pm_common_suspend();
+	if (MALI_TRUE == ret) {
+		mali_pm_runtime_active = MALI_FALSE;
+	} else {
+		/*
+		 * Process the "power up" instead,
+		 * which could have been "lost"
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	return ret;
+}
+
+void mali_pm_runtime_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	mali_pm_exec_lock();
+
+	mali_pm_runtime_active = MALI_TRUE;
+
+#if defined(DEBUG)
+	++num_pm_runtime_resume;
+
+	mali_pm_state_lock();
+
+	/*
+	 * Assert that things are as we left them in runtime_suspend(),
+	 * except for pd_mask_wanted which normally will be the reason we
+	 * got here (job queued => domains wanted)
+	 */
+	MALI_DEBUG_ASSERT(0 == pd_mask_current);
+	MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+	mali_pm_state_unlock();
+#endif
+
+	if (NULL != pmu) {
+		mali_pmu_reset(pmu);
+		pmu_mask_current = mali_pmu_get_mask(pmu);
+		MALI_DEBUG_PRINT(3, ("Mali PM: Runtime resume 0x%x \n", pmu_mask_current));
+	}
+
+	/*
+	 * Normally we are resumed because a job has just been queued.
+	 * pd_mask_wanted should thus be != 0.
+	 * It is however possible for others to take a Mali Runtime PM ref
+	 * without having a job queued.
+	 * We should however always call mali_pm_update_sync_internal(),
+	 * because this will take care of any potential mismatch between
+	 * pmu_mask_current and pd_mask_current.
+	 */
+	mali_pm_update_sync_internal();
+
+	mali_pm_exec_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tPower domain: id %u\n",
+				mali_pm_domain_get_id(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tMask: 0x%04x\n",
+				mali_pm_domain_get_mask(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tUse count: %u\n",
+				mali_pm_domain_get_use_count(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tCurrent power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_current) ?
+				"On" : "Off");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tWanted power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_wanted) ?
+				"On" : "Off");
+
+	return n;
+}
+#endif
+
+static void mali_pm_state_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(pm_lock_state);
+}
+
+static void mali_pm_state_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(pm_lock_state);
+}
+
+void mali_pm_exec_lock(void)
+{
+	_mali_osk_mutex_wait(pm_lock_exec);
+}
+
+void mali_pm_exec_unlock(void)
+{
+	_mali_osk_mutex_signal(pm_lock_exec);
+}
+
+static void mali_pm_domain_power_up(u32 power_up_mask,
+				    struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS],
+				    u32 *num_groups_up,
+				    struct mali_l2_cache_core *l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				    u32 *num_l2_up)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_up_mask;
+
+	MALI_DEBUG_ASSERT(0 != power_up_mask);
+	MALI_DEBUG_ASSERT_POINTER(groups_up);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_up);
+	MALI_DEBUG_ASSERT(0 == *num_groups_up);
+	MALI_DEBUG_ASSERT_POINTER(l2_up);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_up);
+	MALI_DEBUG_ASSERT(0 == *num_l2_up);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering up domains: . [%s]\n",
+			  mali_pm_mask_to_string(power_up_mask)));
+
+	pd_mask_current |= power_up_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (0 != domain_bit) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(
+				domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered up */
+		mali_pm_domain_set_power_on(domain, MALI_TRUE);
+
+		/*
+		 * Make a note of the L2 and/or group(s) to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(
+						    domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_up <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_up[*num_l2_up] = l2_cache;
+			(*num_l2_up)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_up <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_up[*num_groups_up] = group;
+
+			(*num_groups_up)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+static void mali_pm_domain_power_down(u32 power_down_mask,
+				      struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS],
+				      u32 *num_groups_down,
+				      struct mali_l2_cache_core *l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				      u32 *num_l2_down)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_down_mask;
+
+	MALI_DEBUG_ASSERT(0 != power_down_mask);
+	MALI_DEBUG_ASSERT_POINTER(groups_down);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_down);
+	MALI_DEBUG_ASSERT(0 == *num_groups_down);
+	MALI_DEBUG_ASSERT_POINTER(l2_down);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_down);
+	MALI_DEBUG_ASSERT(0 == *num_l2_down);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering down domains: [%s]\n",
+			  mali_pm_mask_to_string(power_down_mask)));
+
+	pd_mask_current &= ~power_down_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (0 != domain_bit) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered down */
+		mali_pm_domain_set_power_on(domain, MALI_FALSE);
+
+		/*
+		 * Make a note of the L2s and/or groups to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_down <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_down[*num_l2_down] = l2_cache;
+			(*num_l2_down)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_down <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_down[*num_groups_down] = group;
+			(*num_groups_down)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+
+/*
+ * Execute pending power domain changes
+ * pm_lock_exec lock must be taken by caller.
+ */
+static void mali_pm_update_sync_internal(void)
+{
+	/*
+	 * This should only be called in non-atomic context
+	 * (normally as deferred work)
+	 *
+	 * Look at the pending power domain changes, and execute these.
+	 * Make sure group and schedulers are notified about changes.
+	 */
+
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	u32 power_down_mask;
+	u32 power_up_mask;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+
+#if defined(DEBUG)
+	++num_pm_updates;
+#endif
+
+	/* Hold PM state lock while we look at (and obey) the wanted state */
+	mali_pm_state_lock();
+
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	/* Figure out which cores we need to power on */
+	power_up_mask = pd_mask_wanted &
+			(pd_mask_wanted ^ pd_mask_current);
+
+	if (0 != power_up_mask) {
+		u32 power_up_mask_pmu;
+		struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_up = 0;
+		struct mali_l2_cache_core *
+			l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_up = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_up;
+#endif
+
+		/*
+		 * Make sure dummy/global domain is always included when
+		 * powering up, since this is controlled by runtime PM,
+		 * and device power is on at this stage.
+		 */
+		power_up_mask |= MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* Power up only real PMU domains */
+		power_up_mask_pmu = power_up_mask & ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* But not those that happen to be powered on already */
+		power_up_mask_pmu &= (power_up_mask ^ pmu_mask_current) &
+				     power_up_mask;
+
+		if (0 != power_up_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current |= power_up_mask_pmu;
+			mali_pmu_power_up(pmu, power_up_mask_pmu);
+		}
+
+		/*
+		 * Put the domains themselves in power up state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_up(power_up_mask,
+					groups_up, &num_groups_up,
+					l2_up, &num_l2_up);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/* Notify each L2 cache that we have be powered up */
+		for (i = 0; i < num_l2_up; i++) {
+			mali_l2_cache_power_up(l2_up[i]);
+		}
+
+		/*
+		 * Tell execution module about all the groups we have
+		 * powered up. Groups will be notified as a result of this.
+		 */
+		mali_executor_group_power_up(groups_up, num_groups_up);
+
+		/* Lock state again before checking for power down */
+		mali_pm_state_lock();
+	}
+
+	/* Figure out which cores we need to power off */
+	power_down_mask = pd_mask_current &
+			  (pd_mask_wanted ^ pd_mask_current);
+
+	/*
+	 * Never power down the dummy/global domain here. This is to be done
+	 * from a suspend request (since this domain is only physicall powered
+	 * down at that point)
+	 */
+	power_down_mask &= ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+	if (0 != power_down_mask) {
+		u32 power_down_mask_pmu;
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_down;
+#endif
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(power_down_mask,
+					  groups_down, &num_groups_down,
+					  l2_down, &num_l2_down);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (0 < num_groups_down) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (0 != power_down_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+
+		}
+	} else {
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		u32 power_down_mask_pmu;
+
+		/* No need for state lock since we'll only update PMU */
+		mali_pm_state_unlock();
+
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (0 != power_down_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+		}
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM update post: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+}
+
+static mali_bool mali_pm_common_suspend(void)
+{
+	mali_pm_state_lock();
+
+	if (0 != pd_mask_wanted) {
+		MALI_DEBUG_PRINT(5, ("PM: Aborting suspend operation\n\n\n"));
+		mali_pm_state_unlock();
+		return MALI_FALSE;
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	if (0 != pd_mask_current) {
+		/*
+		 * We have still some domains powered on.
+		 * It is for instance very normal that at least the
+		 * dummy/global domain is marked as powered on at this point.
+		 * (because it is physically powered on until this function
+		 * returns)
+		 */
+
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(pd_mask_current,
+					  groups_down,
+					  &num_groups_down,
+					  l2_down,
+					  &num_l2_down);
+
+		MALI_DEBUG_ASSERT(0 == pd_mask_current);
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (0 < num_groups_down) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		pmu_mask_current = 0;
+	} else {
+		MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+		mali_pm_state_unlock();
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current domain mask:  [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current PMU mask: ... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Group power stats: .. <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	return MALI_TRUE;
+}
+
+static void mali_pm_update_work(void *data)
+{
+	MALI_IGNORE(data);
+	mali_pm_update_sync();
+}
+
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void)
+{
+	int i;
+
+	/* Create all domains (including dummy domain) */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0x0 == domain_config[i]) continue;
+
+		if (NULL == mali_pm_domain_create(domain_config[i])) {
+			return _MALI_OSK_ERR_NOMEM;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_pm_set_default_pm_domain_config(void)
+{
+	MALI_DEBUG_ASSERT(0 != _mali_osk_resource_base_address());
+
+	/* GP core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_GP, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_GP] = 0x01;
+	}
+
+	/* PP0 - PP3 core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP0, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 2;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 1;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 0;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP1, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 3;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP2, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 4;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP3, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 5;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 1;
+		}
+	}
+
+	/* PP4 - PP7 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP4, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP4] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP5, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP5] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP6, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP6] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP7, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP7] = 0x01 << 3;
+	}
+
+	/* L2gp/L2PP0/L2PP4 */
+	if (mali_is_mali400()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI400_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 1;
+		}
+	} else if (mali_is_mali450()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 0;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 1;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE2, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L22] = 0x01 << 3;
+		}
+	} else if (mali_is_mali470()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI470_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 0;
+		}
+	}
+}
+
+static u32 mali_pm_get_registered_cores_mask(void)
+{
+	int i = 0;
+	u32 mask = 0;
+
+	for (i = 0; i < MALI_DOMAIN_INDEX_DUMMY; i++) {
+		mask |= domain_config[i];
+	}
+
+	return mask;
+}
+
+static void mali_pm_set_pmu_domain_config(void)
+{
+	int i = 0;
+
+	_mali_osk_device_data_pmu_config_get(domain_config, MALI_MAX_NUMBER_OF_DOMAINS - 1);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (0 != domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("Using customer pmu config:\n"));
+			break;
+		}
+	}
+
+	if (MALI_MAX_NUMBER_OF_DOMAINS - 1 == i) {
+		MALI_DEBUG_PRINT(2, ("Using hw detect pmu config:\n"));
+		mali_pm_set_default_pm_domain_config();
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("domain_config[%d] = 0x%x \n", i, domain_config[i]));
+		}
+	}
+	/* Can't override dummy domain mask */
+	domain_config[MALI_DOMAIN_INDEX_DUMMY] =
+		1 << MALI_DOMAIN_INDEX_DUMMY;
+}
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_DOMAINS + 1];
+	int bit;
+	int str_pos = 0;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (NULL != pm_lock_exec) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (bit = MALI_MAX_NUMBER_OF_DOMAINS - 1; bit >= 0; bit--) {
+		if (mask & (1 << bit)) {
+			bit_str[str_pos] = 'X';
+		} else {
+			bit_str[str_pos] = '-';
+		}
+		str_pos++;
+	}
+
+	bit_str[MALI_MAX_NUMBER_OF_DOMAINS] = '\0';
+
+	return bit_str;
+}
+
+const char *mali_pm_group_stats_to_string(void)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_GROUPS + 1];
+	u32 num_groups = mali_group_get_glob_num_groups();
+	u32 i;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (NULL != pm_lock_exec) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (i = 0; i < num_groups && i < MALI_MAX_NUMBER_OF_GROUPS; i++) {
+		struct mali_group *group;
+
+		group = mali_group_get_glob_group(i);
+
+		if (MALI_TRUE == mali_group_power_is_on(group)) {
+			bit_str[i] = 'X';
+		} else {
+			bit_str[i] = '-';
+		}
+	}
+
+	bit_str[i] = '\0';
+
+	return bit_str;
+}
+#endif
+
+/*
+ * num_pp is the number of PP cores which will be powered on given this mask
+ * cost is the total power cost of cores which will be powered on given this mask
+ */
+static void mali_pm_stat_from_mask(u32 mask, u32 *num_pp, u32 *cost)
+{
+	u32 i;
+
+	/* loop through all cores */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (!(domain_config[i] & mask)) {
+			continue;
+		}
+
+		switch (i) {
+		case MALI_DOMAIN_INDEX_GP:
+			*cost += MALI_GP_COST;
+
+			break;
+		case MALI_DOMAIN_INDEX_PP0: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP1: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP2: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP3:
+			if (mali_is_mali400()) {
+				if ((domain_config[MALI_DOMAIN_INDEX_L20] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L20])) {
+					*num_pp += 1;
+				}
+			} else {
+				if ((domain_config[MALI_DOMAIN_INDEX_L21] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L21])) {
+					*num_pp += 1;
+				}
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_PP4: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP5: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP6: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP7:
+			MALI_DEBUG_ASSERT(mali_is_mali450());
+
+			if ((domain_config[MALI_DOMAIN_INDEX_L22] & mask)
+			    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+				== domain_config[MALI_DOMAIN_INDEX_L22])) {
+				*num_pp += 1;
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_L20: /* Fall through */
+		case MALI_DOMAIN_INDEX_L21: /* Fall through */
+		case MALI_DOMAIN_INDEX_L22:
+			*cost += MALI_L2_COST;
+
+			break;
+		}
+	}
+}
+
+void mali_pm_power_cost_setup(void)
+{
+	/*
+	 * Two parallel arrays which store the best domain mask and its cost
+	 * The index is the number of PP cores, E.g. Index 0 is for 1 PP option,
+	 * might have mask 0x2 and with cost of 1, lower cost is better
+	 */
+	u32 best_mask[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	u32 best_cost[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	/* Array cores_in_domain is used to store the total pp cores in each pm domain. */
+	u32 cores_in_domain[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	/* Domain_count is used to represent the max domain we have.*/
+	u32 max_domain_mask = 0;
+	u32 max_domain_id = 0;
+	u32 always_on_pp_cores = 0;
+
+	u32 num_pp, cost, mask;
+	u32 i, j , k;
+
+	/* Initialize statistics */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) {
+		best_mask[i] = 0;
+		best_cost[i] = 0xFFFFFFFF; /* lower cost is better */
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1; i++) {
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			mali_pm_domain_power_cost_result[i][j] = 0;
+		}
+	}
+
+	/* Caculate number of pp cores of a given domain config. */
+	for (i = MALI_DOMAIN_INDEX_PP0; i <= MALI_DOMAIN_INDEX_PP7; i++) {
+		if (0 < domain_config[i]) {
+			/* Get the max domain mask value used to caculate power cost
+			 * and we don't count in always on pp cores. */
+			if (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i]
+			    && max_domain_mask < domain_config[i]) {
+				max_domain_mask = domain_config[i];
+			}
+
+			if (MALI_PM_DOMAIN_DUMMY_MASK == domain_config[i]) {
+				always_on_pp_cores++;
+			}
+		}
+	}
+	max_domain_id = _mali_osk_fls(max_domain_mask);
+
+	/*
+	 * Try all combinations of power domains and check how many PP cores
+	 * they have and their power cost.
+	 */
+	for (mask = 0; mask < (1 << max_domain_id); mask++) {
+		num_pp = 0;
+		cost = 0;
+
+		mali_pm_stat_from_mask(mask, &num_pp, &cost);
+
+		/* This mask is usable for all MP1 up to num_pp PP cores, check statistics for all */
+		for (i = 0; i < num_pp; i++) {
+			if (best_cost[i] >= cost) {
+				best_cost[i] = cost;
+				best_mask[i] = mask;
+			}
+		}
+	}
+
+	/*
+	 * If we want to enable x pp cores, if x is less than number of always_on pp cores,
+	 * all of pp cores we will enable must be always_on pp cores.
+	 */
+	for (i = 0; i < mali_executor_get_num_cores_total(); i++) {
+		if (i < always_on_pp_cores) {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= i + 1;
+		} else {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= always_on_pp_cores;
+		}
+	}
+
+	/* In this loop, variable i represent for the number of non-always on pp cores we want to enabled. */
+	for (i = 0; i < (mali_executor_get_num_cores_total() - always_on_pp_cores); i++) {
+		if (best_mask[i] == 0) {
+			/* This MP variant is not available */
+			continue;
+		}
+
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			cores_in_domain[j] = 0;
+		}
+
+		for (j = MALI_DOMAIN_INDEX_PP0; j <= MALI_DOMAIN_INDEX_PP7; j++) {
+			if (0 < domain_config[j]
+			    && (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i])) {
+				cores_in_domain[_mali_osk_fls(domain_config[j]) - 1]++;
+			}
+		}
+
+		/* In this loop, j represent for the number we have already enabled.*/
+		for (j = 0; j <= i;) {
+			/* j used to visit all of domain to get the number of pp cores remained in it. */
+			for (k = 0; k < max_domain_id; k++) {
+				/* If domain k in best_mask[i] is enabled and this domain has extra pp cores,
+				 * we know we must pick at least one pp core from this domain.
+				 * And then we move to next enabled pm domain. */
+				if ((best_mask[i] & (0x1 << k)) && (0 < cores_in_domain[k])) {
+					cores_in_domain[k]--;
+					mali_pm_domain_power_cost_result[always_on_pp_cores + i + 1][k]++;
+					j++;
+					if (j > i) {
+						break;
+					}
+				}
+			}
+		}
+	}
+}
+
+/*
+ * When we are doing core scaling,
+ * this function is called to return the best mask to
+ * achieve the best pp group power cost.
+ */
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst)
+{
+	MALI_DEBUG_ASSERT((mali_executor_get_num_cores_total() >= num_requested) && (0 <= num_requested));
+
+	_mali_osk_memcpy(dst, mali_pm_domain_power_cost_result[num_requested], MALI_MAX_NUMBER_OF_DOMAINS * sizeof(int));
+}
+
+u32 mali_pm_get_current_mask(void)
+{
+	return pd_mask_current;
+}
+
+u32 mali_pm_get_wanted_mask(void)
+{
+	return pd_mask_wanted;
+}
diff --git a/mali/common/mali_pm.h b/mali/common/mali_pm.h
new file mode 100755
index 0000000..3c11977
--- /dev/null
+++ b/mali/common/mali_pm.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_H__
+#define __MALI_PM_H__
+
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+
+#define MALI_DOMAIN_INDEX_GP        0
+#define MALI_DOMAIN_INDEX_PP0       1
+#define MALI_DOMAIN_INDEX_PP1       2
+#define MALI_DOMAIN_INDEX_PP2       3
+#define MALI_DOMAIN_INDEX_PP3       4
+#define MALI_DOMAIN_INDEX_PP4       5
+#define MALI_DOMAIN_INDEX_PP5       6
+#define MALI_DOMAIN_INDEX_PP6       7
+#define MALI_DOMAIN_INDEX_PP7       8
+#define MALI_DOMAIN_INDEX_L20       9
+#define MALI_DOMAIN_INDEX_L21      10
+#define MALI_DOMAIN_INDEX_L22      11
+/*
+ * The dummy domain is used when there is no physical power domain
+ * (e.g. no PMU or always on cores)
+ */
+#define MALI_DOMAIN_INDEX_DUMMY    12
+#define MALI_MAX_NUMBER_OF_DOMAINS 13
+
+/**
+ * Initialize the Mali PM module
+ *
+ * PM module covers Mali PM core, PM domains and Mali PMU
+ */
+_mali_osk_errcode_t mali_pm_initialize(void);
+
+/**
+ * Terminate the Mali PM module
+ */
+void mali_pm_terminate(void);
+
+void mali_pm_exec_lock(void);
+void mali_pm_exec_unlock(void);
+
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache);
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group);
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains);
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains);
+
+void mali_pm_init_begin(void);
+void mali_pm_init_end(void);
+
+void mali_pm_update_sync(void);
+void mali_pm_update_async(void);
+
+/* Callback functions for system power management */
+void mali_pm_os_suspend(mali_bool os_suspend);
+void mali_pm_os_resume(void);
+
+mali_bool mali_pm_runtime_suspend(void);
+void mali_pm_runtime_resume(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size);
+#endif
+
+void mali_pm_power_cost_setup(void);
+
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst);
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+#endif
+
+u32 mali_pm_get_current_mask(void);
+u32 mali_pm_get_wanted_mask(void);
+#endif /* __MALI_PM_H__ */
diff --git a/mali/common/mali_pm_domain.c b/mali/common/mali_pm_domain.c
new file mode 100755
index 0000000..9db8488
--- /dev/null
+++ b/mali/common/mali_pm_domain.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+
+static struct mali_pm_domain *mali_pm_domains[MALI_MAX_NUMBER_OF_DOMAINS] =
+{ NULL, };
+
+void mali_pm_domain_initialize(void)
+{
+	/* Domains will be initialized/created on demand */
+}
+
+void mali_pm_domain_terminate(void)
+{
+	int i;
+
+	/* Delete all domains that has been created */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		mali_pm_domain_delete(mali_pm_domains[i]);
+		mali_pm_domains[i] = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask)
+{
+	struct mali_pm_domain *domain = NULL;
+	u32 domain_id = 0;
+
+	domain = mali_pm_domain_get_from_mask(pmu_mask);
+	if (NULL != domain) return domain;
+
+	MALI_DEBUG_PRINT(2,
+			 ("Mali PM domain: Creating Mali PM domain (mask=0x%08X)\n",
+			  pmu_mask));
+
+	domain = (struct mali_pm_domain *)_mali_osk_malloc(
+			 sizeof(struct mali_pm_domain));
+	if (NULL != domain) {
+		domain->power_is_on = MALI_FALSE;
+		domain->pmu_mask = pmu_mask;
+		domain->use_count = 0;
+		_mali_osk_list_init(&domain->group_list);
+		_mali_osk_list_init(&domain->l2_cache_list);
+
+		domain_id = _mali_osk_fls(pmu_mask) - 1;
+		/* Verify the domain_id */
+		MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > domain_id);
+		/* Verify that pmu_mask only one bit is set */
+		MALI_DEBUG_ASSERT((1 << domain_id) == pmu_mask);
+		mali_pm_domains[domain_id] = domain;
+
+		return domain;
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Unable to create PM domain\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pm_domain_delete(struct mali_pm_domain *domain)
+{
+	if (NULL == domain) {
+		return;
+	}
+
+	_mali_osk_list_delinit(&domain->group_list);
+	_mali_osk_list_delinit(&domain->l2_cache_list);
+
+	_mali_osk_free(domain);
+}
+
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	/*
+	 * Use addtail because virtual group is created last and it needs
+	 * to be at the end of the list (in order to be activated after
+	 * all children.
+	 */
+	_mali_osk_list_addtail(&group->pm_domain_list, &domain->group_list);
+}
+
+void mali_pm_domain_add_l2_cache(struct mali_pm_domain *domain,
+				 struct mali_l2_cache_core *l2_cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(l2_cache);
+	_mali_osk_list_add(&l2_cache->pm_domain_list, &domain->l2_cache_list);
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask)
+{
+	u32 id = 0;
+
+	if (0 == mask) {
+		return NULL;
+	}
+
+	id = _mali_osk_fls(mask) - 1;
+
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == mask);
+
+	return mali_pm_domains[id];
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id)
+{
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+
+	return mali_pm_domains[id];
+}
+
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	if (0 == domain->use_count) {
+		_mali_osk_pm_dev_ref_get_async();
+	}
+
+	++domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_get, use_count => %u\n", domain, domain->use_count));
+
+	/* Return our mask so caller can check this against wanted mask */
+	return domain->pmu_mask;
+}
+
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	--domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_put, use_count => %u\n", domain, domain->use_count));
+
+	if (0 == domain->use_count) {
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	/*
+	 * Return the PMU mask which now could be be powered down
+	 * (the bit for this domain).
+	 * This is the responsibility of the caller (mali_pm)
+	 */
+	return (0 == domain->use_count ? domain->pmu_mask : 0);
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain)
+{
+	u32 id = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT(0 != domain->pmu_mask);
+
+	id = _mali_osk_fls(domain->pmu_mask) - 1;
+
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == domain->pmu_mask);
+	/* Verify that we have stored the domain at right id/index */
+	MALI_DEBUG_ASSERT(domain == mali_pm_domains[id]);
+
+	return id;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void)
+{
+	int i;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (NULL == mali_pm_domains[i]) {
+			/* Nothing to check */
+			continue;
+		}
+
+		if (MALI_TRUE == mali_pm_domains[i]->power_is_on) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+
+		if (0 != mali_pm_domains[i]->use_count) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+	}
+
+	return MALI_TRUE;
+}
+#endif
diff --git a/mali/common/mali_pm_domain.h b/mali/common/mali_pm_domain.h
new file mode 100755
index 0000000..2ac8c0d
--- /dev/null
+++ b/mali/common/mali_pm_domain.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_DOMAIN_H__
+#define __MALI_PM_DOMAIN_H__
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#include "mali_l2_cache.h"
+#include "mali_group.h"
+#include "mali_pmu.h"
+
+/* Instances are protected by PM state lock */
+struct mali_pm_domain {
+	mali_bool power_is_on;
+	s32 use_count;
+	u32 pmu_mask;
+
+	/* Zero or more groups can belong to this domain */
+	_mali_osk_list_t group_list;
+
+	/* Zero or more L2 caches can belong to this domain */
+	_mali_osk_list_t l2_cache_list;
+};
+
+
+void mali_pm_domain_initialize(void);
+void mali_pm_domain_terminate(void);
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask);
+void mali_pm_domain_delete(struct mali_pm_domain *domain);
+
+void mali_pm_domain_add_l2_cache(
+	struct mali_pm_domain *domain,
+	struct mali_l2_cache_core *l2_cache);
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group);
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask);
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id);
+
+/* Ref counting */
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain);
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_group_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->group_list;
+}
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_l2_cache_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->l2_cache_list;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pm_domain_power_is_on(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->power_is_on;
+}
+
+MALI_STATIC_INLINE void mali_pm_domain_set_power_on(
+	struct mali_pm_domain *domain,
+	mali_bool power_is_on)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	domain->power_is_on = power_is_on;
+}
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_use_count(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->use_count;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_mask(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->pmu_mask;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void);
+#endif
+
+#endif /* __MALI_PM_DOMAIN_H__ */
diff --git a/mali/common/mali_pm_metrics.c b/mali/common/mali_pm_metrics.c
new file mode 100644
index 0000000..981ec81
--- /dev/null
+++ b/mali/common/mali_pm_metrics.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_pm_metrics.h"
+#include "mali_osk_locks.h"
+#include "mali_osk_mali.h"
+#include <linux/ktime.h>
+
+#define MALI_PM_TIME_SHIFT 0
+#define MALI_UTILIZATION_MAX_PERIOD 80000000/* ns = 100ms */
+
+_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev)
+{
+	int i = 0;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	mdev->mali_metrics.time_period_start = ktime_get();
+	mdev->mali_metrics.time_period_start_gp = mdev->mali_metrics.time_period_start;
+	mdev->mali_metrics.time_period_start_pp = mdev->mali_metrics.time_period_start;
+
+	mdev->mali_metrics.time_busy = 0;
+	mdev->mali_metrics.time_idle = 0;
+	mdev->mali_metrics.prev_busy = 0;
+	mdev->mali_metrics.prev_idle = 0;
+	mdev->mali_metrics.num_running_gp_cores = 0;
+	mdev->mali_metrics.num_running_pp_cores = 0;
+	mdev->mali_metrics.time_busy_gp = 0;
+	mdev->mali_metrics.time_idle_gp = 0;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) {
+		mdev->mali_metrics.time_busy_pp[i] = 0;
+		mdev->mali_metrics.time_idle_pp[i] = 0;
+	}
+	mdev->mali_metrics.gpu_active = MALI_FALSE;
+
+	mdev->mali_metrics.lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST);
+	if (NULL == mdev->mali_metrics.lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pm_metrics_term(struct mali_device *mdev)
+{
+	_mali_osk_spinlock_irq_term(mdev->mali_metrics.lock);
+}
+
+/*caller needs to hold mdev->mali_metrics.lock before calling this function*/
+void mali_pm_record_job_status(struct mali_device *mdev)
+{
+	ktime_t now;
+	ktime_t diff;
+	u64 ns_time;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	now = ktime_get();
+	diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+
+	ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	mdev->mali_metrics.time_busy += ns_time;
+	mdev->mali_metrics.time_period_start = now;
+}
+
+void mali_pm_record_gpu_idle(mali_bool is_gp)
+{
+	ktime_t now;
+	ktime_t diff;
+	u64 ns_time;
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	now = ktime_get();
+
+	if (MALI_TRUE == is_gp) {
+		--mdev->mali_metrics.num_running_gp_cores;
+		if (0 == mdev->mali_metrics.num_running_gp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp);
+			ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_busy_gp += ns_time;
+			mdev->mali_metrics.time_period_start_gp = now;
+
+			if (0 == mdev->mali_metrics.num_running_pp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_busy += ns_time;
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_FALSE;
+			}
+		}
+	} else {
+		--mdev->mali_metrics.num_running_pp_cores;
+		if (0 == mdev->mali_metrics.num_running_pp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp);
+			ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_busy_pp[0] += ns_time;
+			mdev->mali_metrics.time_period_start_pp = now;
+
+			if (0 == mdev->mali_metrics.num_running_gp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_busy += ns_time;
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_FALSE;
+			}
+		}
+	}
+
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_record_gpu_active(mali_bool is_gp)
+{
+	ktime_t now;
+	ktime_t diff;
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	now = ktime_get();
+
+	if (MALI_TRUE == is_gp) {
+		mdev->mali_metrics.num_running_gp_cores++;
+		if (1 == mdev->mali_metrics.num_running_gp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp);
+			mdev->mali_metrics.time_idle_gp += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_period_start_gp = now;
+			if (0 == mdev->mali_metrics.num_running_pp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+		}
+	} else {
+		mdev->mali_metrics.num_running_pp_cores++;
+		if (1 == mdev->mali_metrics.num_running_pp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp);
+			mdev->mali_metrics.time_idle_pp[0] += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_period_start_pp = now;
+			if (0 == mdev->mali_metrics.num_running_gp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+		}
+	}
+
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+
+/*caller needs to hold mdev->mali_metrics.lock before calling this function*/
+static void mali_pm_get_dvfs_utilisation_calc(struct mali_device *mdev, ktime_t now)
+{
+	ktime_t diff;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+
+	if (mdev->mali_metrics.gpu_active) {
+		mdev->mali_metrics.time_busy += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	} else {
+		mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	}
+}
+
+/* Caller needs to hold mdev->mali_metrics.lock before calling this function. */
+static void mali_pm_reset_dvfs_utilisation_unlocked(struct mali_device *mdev, ktime_t now)
+{
+	/* Store previous value */
+	mdev->mali_metrics.prev_idle = mdev->mali_metrics.time_idle;
+	mdev->mali_metrics.prev_busy = mdev->mali_metrics.time_busy;
+
+	/* Reset current values */
+	mdev->mali_metrics.time_period_start = now;
+	mdev->mali_metrics.time_period_start_gp = now;
+	mdev->mali_metrics.time_period_start_pp = now;
+	mdev->mali_metrics.time_idle = 0;
+	mdev->mali_metrics.time_busy = 0;
+
+	mdev->mali_metrics.time_busy_gp = 0;
+	mdev->mali_metrics.time_idle_gp = 0;
+	mdev->mali_metrics.time_busy_pp[0] = 0;
+	mdev->mali_metrics.time_idle_pp[0] = 0;
+}
+
+void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev)
+{
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	mali_pm_reset_dvfs_utilisation_unlocked(mdev, ktime_get());
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_get_dvfs_utilisation(struct mali_device *mdev,
+				  unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	u64 busy = 0;
+	u64 total = 0;
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+
+	mali_pm_get_dvfs_utilisation_calc(mdev, now);
+
+	busy = mdev->mali_metrics.time_busy;
+	total = busy + mdev->mali_metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		mali_pm_reset_dvfs_utilisation_unlocked(mdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += mdev->mali_metrics.prev_idle +
+			 mdev->mali_metrics.prev_busy;
+		busy += mdev->mali_metrics.prev_busy;
+	}
+
+	*total_out = (unsigned long)total;
+	*busy_out = (unsigned long)busy;
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_metrics_spin_lock(void)
+{
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_metrics_spin_unlock(void)
+{
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
diff --git a/mali/common/mali_pm_metrics.h b/mali/common/mali_pm_metrics.h
new file mode 100644
index 0000000..256f448
--- /dev/null
+++ b/mali/common/mali_pm_metrics.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_METRICS_H__
+#define __MALI_PM_METRICS_H__
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include "mali_osk_locks.h"
+#include "mali_group.h"
+
+struct mali_device;
+
+/**
+ * Metrics data collected for use by the power management framework.
+ */
+struct mali_pm_metrics_data {
+	ktime_t time_period_start;
+	u64 time_busy;
+	u64 time_idle;
+	u64 prev_busy;
+	u64 prev_idle;
+	u32 num_running_gp_cores;
+	u32 num_running_pp_cores;
+	ktime_t time_period_start_gp;
+	u64 time_busy_gp;
+	u64 time_idle_gp;
+	ktime_t time_period_start_pp;
+	u64 time_busy_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	u64 time_idle_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	mali_bool gpu_active;
+	_mali_osk_spinlock_irq_t *lock;
+};
+
+/**
+ * Initialize/start the Mali GPU pm_metrics metrics reporting.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev);
+
+/**
+ * Terminate the Mali GPU pm_metrics metrics reporting
+ */
+void mali_pm_metrics_term(struct mali_device *mdev);
+
+/**
+ * Should be called when a job is about to execute a GPU job
+ */
+void mali_pm_record_gpu_active(mali_bool is_gp);
+
+/**
+ * Should be called when a job is finished
+ */
+void mali_pm_record_gpu_idle(mali_bool is_gp);
+
+void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev);
+
+void mali_pm_get_dvfs_utilisation(struct mali_device *mdev, unsigned long *total_out, unsigned long *busy_out);
+
+void mali_pm_metrics_spin_lock(void);
+
+void mali_pm_metrics_spin_unlock(void);
+#else
+void mali_pm_record_gpu_idle(mali_bool is_gp) {}
+void mali_pm_record_gpu_active(mali_bool is_gp) {}
+#endif
+#endif /* __MALI_PM_METRICS_H__ */
diff --git a/mali/common/mali_pmu.c b/mali/common/mali_pmu.c
new file mode 100755
index 0000000..bf0d413
--- /dev/null
+++ b/mali/common/mali_pmu.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu.c
+ * Mali driver functions for Mali 400 PMU hardware
+ */
+#include "mali_hw_core.h"
+#include "mali_pmu.h"
+#include "mali_pp.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm.h"
+#include "mali_osk_mali.h"
+
+struct mali_pmu_core *mali_global_pmu_core = NULL;
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu);
+
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource)
+{
+	struct mali_pmu_core *pmu;
+
+	MALI_DEBUG_ASSERT(NULL == mali_global_pmu_core);
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Creating Mali PMU core\n"));
+
+	pmu = (struct mali_pmu_core *)_mali_osk_malloc(
+		      sizeof(struct mali_pmu_core));
+	if (NULL != pmu) {
+		pmu->registered_cores_mask = 0; /* to be set later */
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&pmu->hw_core,
+				resource, PMU_REGISTER_ADDRESS_SPACE_SIZE)) {
+
+			pmu->switch_delay = _mali_osk_get_pmu_switch_delay();
+
+			mali_global_pmu_core = pmu;
+
+			return pmu;
+		}
+		_mali_osk_free(pmu);
+	}
+
+	return NULL;
+}
+
+void mali_pmu_delete(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu == mali_global_pmu_core);
+
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Deleting Mali PMU core\n"));
+
+	mali_global_pmu_core = NULL;
+
+	mali_hw_core_delete(&pmu->hw_core);
+	_mali_osk_free(pmu);
+}
+
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask)
+{
+	pmu->registered_cores_mask = mask;
+}
+
+void mali_pmu_reset(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	/* Setup the desired defaults */
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_MASK, 0);
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_SW_DELAY, pmu->switch_delay);
+}
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	mali_pmu_reset(pmu);
+
+	/* Now simply power up the domains which are marked as powered down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_up(pmu, stat);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	/* Now simply power down the domains which are marked as powered up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_down(pmu, (~stat) & pmu->registered_cores_mask);
+
+	mali_pm_exec_unlock();
+}
+
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power down: ...................... [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+
+	/*
+	 * Assert that we are not powering down domains which are already
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+
+	mask  &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY);
+
+	if (0 == mask || 0 == ((~stat) & mask)) return _MALI_OSK_ERR_OK;
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_DOWN, mask);
+
+	/*
+	 * Do not wait for interrupt on Mali-300/400 if all domains are
+	 * powered off by our power down command, because the HW will simply
+	 * not generate an interrupt in this case.
+	 */
+	if (mali_is_mali450() || mali_is_mali470() || pmu->registered_cores_mask != (mask | stat)) {
+		err = mali_pmu_wait_for_command_finish(pmu);
+		if (_MALI_OSK_ERR_OK != err) {
+			return err;
+		}
+	} else {
+		mali_hw_core_register_write(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+	}
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+#endif
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+#if !defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	u32 current_domain;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power up: ........................ [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	stat &= pmu->registered_cores_mask;
+
+	mask  &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY);
+	if (0 == mask || 0 == (stat & mask)) return _MALI_OSK_ERR_OK;
+
+	/*
+	 * Assert that we are only powering up domains which are currently
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+
+#if defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_UP, mask);
+
+	err = mali_pmu_wait_for_command_finish(pmu);
+	if (_MALI_OSK_ERR_OK != err) {
+		return err;
+	}
+#else
+	for (current_domain = 1;
+	     current_domain <= pmu->registered_cores_mask;
+	     current_domain <<= 1) {
+		if (current_domain & mask & stat) {
+			mali_hw_core_register_write(&pmu->hw_core,
+						    PMU_REG_ADDR_MGMT_POWER_UP,
+						    current_domain);
+
+			err = mali_pmu_wait_for_command_finish(pmu);
+			if (_MALI_OSK_ERR_OK != err) {
+				return err;
+			}
+		}
+	}
+#endif
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+#endif /* defined(DEBUG) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu)
+{
+	u32 rawstat;
+	u32 timeout = MALI_REG_POLL_COUNT_SLOW;
+
+	MALI_DEBUG_ASSERT(pmu);
+
+	/* Wait for the command to complete */
+	do {
+		rawstat = mali_hw_core_register_read(&pmu->hw_core,
+						     PMU_REG_ADDR_MGMT_INT_RAWSTAT);
+		--timeout;
+	} while (0 == (rawstat & PMU_REG_VAL_IRQ) && 0 < timeout);
+
+	MALI_DEBUG_ASSERT(0 < timeout);
+
+	if (0 == timeout) {
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/mali/common/mali_pmu.h b/mali/common/mali_pmu.h
new file mode 100755
index 0000000..36ff58e
--- /dev/null
+++ b/mali/common/mali_pmu.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_platform.h
+ * Platform specific Mali driver functions
+ */
+
+#ifndef __MALI_PMU_H__
+#define __MALI_PMU_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_hw_core.h"
+
+/** @brief MALI inbuilt PMU hardware info and PMU hardware has knowledge of cores power mask
+ */
+struct mali_pmu_core {
+	struct mali_hw_core hw_core;
+	u32 registered_cores_mask;
+	u32 switch_delay;
+};
+
+/** @brief Register layout for hardware PMU
+ */
+typedef enum {
+	PMU_REG_ADDR_MGMT_POWER_UP                  = 0x00,     /*< Power up register */
+	PMU_REG_ADDR_MGMT_POWER_DOWN                = 0x04,     /*< Power down register */
+	PMU_REG_ADDR_MGMT_STATUS                    = 0x08,     /*< Core sleep status register */
+	PMU_REG_ADDR_MGMT_INT_MASK                  = 0x0C,     /*< Interrupt mask register */
+	PMU_REG_ADDR_MGMT_INT_RAWSTAT               = 0x10,     /*< Interrupt raw status register */
+	PMU_REG_ADDR_MGMT_INT_CLEAR                 = 0x18,     /*< Interrupt clear register */
+	PMU_REG_ADDR_MGMT_SW_DELAY                  = 0x1C,     /*< Switch delay register */
+	PMU_REGISTER_ADDRESS_SPACE_SIZE             = 0x28,     /*< Size of register space */
+} pmu_reg_addr_mgmt_addr;
+
+#define PMU_REG_VAL_IRQ 1
+
+extern struct mali_pmu_core *mali_global_pmu_core;
+
+/** @brief Initialisation of MALI PMU
+ *
+ * This is called from entry point of the driver in order to create and intialize the PMU resource
+ *
+ * @param resource it will be a pointer to a PMU resource
+ * @param number_of_pp_cores Number of found PP resources in configuration
+ * @param number_of_l2_caches Number of found L2 cache resources in configuration
+ * @return The created PMU object, or NULL in case of failure.
+ */
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource);
+
+/** @brief It deallocates the PMU resource
+ *
+ * This is called on the exit of the driver to terminate the PMU resource
+ *
+ * @param pmu Pointer to PMU core object to delete
+ */
+void mali_pmu_delete(struct mali_pmu_core *pmu);
+
+/** @brief Set registered cores mask
+ *
+ * @param pmu Pointer to PMU core object
+ * @param mask All available/valid domain bits
+ */
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief Retrieves the Mali PMU core object (if any)
+ *
+ * @return The Mali PMU object, or NULL if no PMU exists.
+ */
+MALI_STATIC_INLINE struct mali_pmu_core *mali_pmu_get_global_pmu_core(void)
+{
+	return mali_global_pmu_core;
+}
+
+/** @brief Reset PMU core
+ *
+ * @param pmu Pointer to PMU core object to reset
+ */
+void mali_pmu_reset(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu);
+
+/** @brief Returns a mask of the currently powered up domains
+ *
+ * @param pmu Pointer to PMU core object
+ */
+MALI_STATIC_INLINE u32 mali_pmu_get_mask(struct mali_pmu_core *pmu)
+{
+	u32 stat = mali_hw_core_register_read(&pmu->hw_core, PMU_REG_ADDR_MGMT_STATUS);
+	return ((~stat) & pmu->registered_cores_mask);
+}
+
+/** @brief MALI GPU power down using MALI in-built PMU
+ *
+ * Called to power down the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power down
+ * @param mask Mask specifying which power domains to power down
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief MALI GPU power up using MALI in-built PMU
+ *
+ * Called to power up the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power up
+ * @param mask Mask specifying which power domains to power up
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask);
+
+#endif /* __MALI_PMU_H__ */
diff --git a/mali/common/mali_pp.c b/mali/common/mali_pp.c
new file mode 100755
index 0000000..e1cd470
--- /dev/null
+++ b/mali/common/mali_pp.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp_job.h"
+#include "mali_pp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+#include <mali_platform.h>
+
+/* Number of frame registers on Mali-200 */
+#define MALI_PP_MALI200_NUM_FRAME_REGISTERS ((0x04C/4)+1)
+/* Number of frame registers on Mali-300 and later */
+#define MALI_PP_MALI400_NUM_FRAME_REGISTERS ((0x058/4)+1)
+
+static struct mali_pp_core *mali_global_pp_cores[MALI_MAX_NUMBER_OF_PP_CORES] = { NULL };
+static u32 mali_global_num_pp_cores = 0;
+
+/* Interrupt handlers */
+static void mali_pp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id)
+{
+	struct mali_pp_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali PP: Creating Mali PP core: %s\n", resource->description));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Base address of PP core: 0x%x\n", resource->base));
+
+	if (mali_global_num_pp_cores >= MALI_MAX_NUMBER_OF_PP_CORES) {
+		MALI_PRINT_ERROR(("Mali PP: Too many PP core objects created\n"));
+		return NULL;
+	}
+
+	core = _mali_osk_calloc(1, sizeof(struct mali_pp_core));
+	if (NULL != core) {
+		core->core_id = mali_global_num_pp_cores;
+		core->bcast_id = bcast_id;
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI200_REG_SIZEOF_REGISTER_BANK)) {
+			_mali_osk_errcode_t ret;
+
+			if (!is_virtual) {
+				ret = mali_pp_reset(core);
+			} else {
+				ret = _MALI_OSK_ERR_OK;
+			}
+
+			if (_MALI_OSK_ERR_OK == ret) {
+				ret = mali_group_add_pp_core(group, core);
+				if (_MALI_OSK_ERR_OK == ret) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					MALI_DEBUG_ASSERT(!is_virtual || -1 != resource->irq);
+
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_pp,
+								       group,
+								       mali_pp_irq_probe_trigger,
+								       mali_pp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (NULL != core->irq) {
+						mali_global_pp_cores[mali_global_num_pp_cores] = core;
+						mali_global_num_pp_cores++;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali PP: Failed to setup interrupt handlers for PP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_pp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali PP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali PP: Failed to allocate memory for PP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pp_delete(struct mali_pp_core *core)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+
+	/* Remove core from global list */
+	for (i = 0; i < mali_global_num_pp_cores; i++) {
+		if (mali_global_pp_cores[i] == core) {
+			mali_global_pp_cores[i] = NULL;
+			mali_global_num_pp_cores--;
+
+			if (i != mali_global_num_pp_cores) {
+				/* We removed a PP core from the middle of the array -- move the last
+				 * PP core to the current position to close the gap */
+				mali_global_pp_cores[i] = mali_global_pp_cores[mali_global_num_pp_cores];
+				mali_global_pp_cores[mali_global_num_pp_cores] = NULL;
+			}
+
+			break;
+		}
+	}
+
+	_mali_osk_free(core);
+}
+
+void mali_pp_stop_bus(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	/* Will only send the stop bus command, and not wait for it to complete */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_pp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS) & MALI200_REG_VAL_STATUS_BUS_STOPPED)
+			break;
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to stop bus on %s. Status: 0x%08x\n", core->hw_core.description, mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Frame register reset values.
+ * Taken from the Mali400 TRM, 3.6. Pixel processor control register summary */
+static const u32 mali_frame_registers_reset_values[_MALI_PP_MAX_FRAME_REGISTERS] = {
+	0x0, /* Renderer List Address Register */
+	0x0, /* Renderer State Word Base Address Register */
+	0x0, /* Renderer Vertex Base Register */
+	0x2, /* Feature Enable Register */
+	0x0, /* Z Clear Value Register */
+	0x0, /* Stencil Clear Value Register */
+	0x0, /* ABGR Clear Value 0 Register */
+	0x0, /* ABGR Clear Value 1 Register */
+	0x0, /* ABGR Clear Value 2 Register */
+	0x0, /* ABGR Clear Value 3 Register */
+	0x0, /* Bounding Box Left Right Register */
+	0x0, /* Bounding Box Bottom Register */
+	0x0, /* FS Stack Address Register */
+	0x0, /* FS Stack Size and Initial Value Register */
+	0x0, /* Reserved */
+	0x0, /* Reserved */
+	0x0, /* Origin Offset X Register */
+	0x0, /* Origin Offset Y Register */
+	0x75, /* Subpixel Specifier Register */
+	0x0, /* Tiebreak mode Register */
+	0x0, /* Polygon List Format Register */
+	0x0, /* Scaling Register */
+	0x0 /* Tilebuffer configuration Register */
+};
+
+/* WBx register reset values */
+static const u32 mali_wb_registers_reset_values[_MALI_PP_MAX_WB_REGISTERS] = {
+	0x0, /* WBx Source Select Register */
+	0x0, /* WBx Target Address Register */
+	0x0, /* WBx Target Pixel Format Register */
+	0x0, /* WBx Target AA Format Register */
+	0x0, /* WBx Target Layout */
+	0x0, /* WBx Target Scanline Length */
+	0x0, /* WBx Target Flags Register */
+	0x0, /* WBx MRT Enable Register */
+	0x0, /* WBx MRT Offset Register */
+	0x0, /* WBx Global Test Enable Register */
+	0x0, /* WBx Global Test Reference Value Register */
+	0x0  /* WBx Global Test Compare Function Register */
+};
+
+/* Performance Counter 0 Enable Register reset value */
+static const u32 mali_perf_cnt_enable_reset_value = 0;
+
+extern int pp_hardware_reset;
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core)
+{
+	/* Bus must be stopped before calling this function */
+	const u32 reset_wait_target_register = MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(2, ("Mali PP: Hard reset of core %s\n", core->hw_core.description));
+	pp_hardware_reset ++;
+
+	/* Set register to a bogus value. The register will be used to detect when reset is complete */
+	mali_hw_core_register_write_relaxed(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+
+	/* Force core to reset */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET);
+
+	/* Wait for reset to be complete */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali PP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, 0x00000000); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pp_reset_async(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET);
+}
+
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		u32 status =  mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+		if (!(status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE)) {
+			rawstat = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+			if (rawstat == MALI400PP_REG_VAL_IRQ_RESET_COMPLETED) {
+				break;
+			}
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core)
+{
+	mali_pp_reset_async(core);
+	return mali_pp_reset_wait(core);
+}
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual)
+{
+	u32 relative_address;
+	u32 start_index;
+	u32 nr_of_regs;
+	u32 *frame_registers = mali_pp_job_get_frame_registers(job);
+	u32 *wb0_registers = mali_pp_job_get_wb0_registers(job);
+	u32 *wb1_registers = mali_pp_job_get_wb1_registers(job);
+	u32 *wb2_registers = mali_pp_job_get_wb2_registers(job);
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, sub_job);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Write frame registers */
+
+	/*
+	 * There are two frame registers which are different for each sub job:
+	 * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME)
+	 * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK)
+	 */
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]);
+
+	/* For virtual jobs, the stack address shouldn't be broadcast but written individually */
+	if (!mali_pp_job_is_virtual(job) || restart_virtual) {
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]);
+	}
+
+	/* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */
+	relative_address = MALI200_REG_ADDR_RSW;
+	start_index = MALI200_REG_ADDR_RSW / sizeof(u32);
+	nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* MALI200_REG_ADDR_STACK_SIZE */
+	relative_address = MALI200_REG_ADDR_STACK_SIZE;
+	start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32);
+
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core,
+			relative_address, frame_registers[start_index],
+			mali_frame_registers_reset_values[start_index]);
+
+	/* Skip 2 reserved registers */
+
+	/* Write remaining registers */
+	relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X;
+	start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+	nr_of_regs = MALI_PP_MALI400_NUM_FRAME_REGISTERS - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* Write WBx registers */
+	if (wb0_registers[0]) { /* M200_WB0_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb1_registers[0]) { /* M200_WB1_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb2_registers[0]) { /* M200_WB2_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+
+#ifdef CONFIG_MALI400_HEATMAPS_ENABLED
+	if (job->uargs.perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_CONTR, ((job->uargs.tilesx & 0x3FF) << 16) | 1);
+		mali_hw_core_register_write_relaxed(&core->hw_core,  MALI200_REG_ADDR_MGMT_PERFMON_BASE, job->uargs.heatmap_mem & 0xFFFFFFF8);
+	}
+#endif /* CONFIG_MALI400_HEATMAPS_ENABLED */
+
+	MALI_DEBUG_PRINT(3, ("Mali PP: Starting job 0x%08X part %u/%u on PP core %s\n", job, sub_job + 1, mali_pp_job_get_sub_job_count(job), core->hw_core.description));
+
+	/* Adding barrier to make sure all rester writes are finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_PP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING);
+#else
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_END_OF_FRAME);
+#endif
+
+	/* Adding barrier to make sure previous rester writes is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index)
+{
+	if (mali_global_num_pp_cores > index) {
+		return mali_global_pp_cores[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_pp_get_glob_num_pp_cores(void)
+{
+	return mali_global_num_pp_cores;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_pp_irq_probe_trigger(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_BUS_ERROR);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS);
+	if (MALI200_REG_VAL_IRQ_BUS_ERROR & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_BUS_ERROR);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+#if 0
+static void mali_pp_print_registers(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_VERSION = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_RAWSTAT = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_MASK = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE)));
+}
+#endif
+
+#if 0
+void mali_pp_print_state(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: State: 0x%08x\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+}
+#endif
+
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, subjob);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, subjob);
+#if defined(CONFIG_MALI400_PROFILING)
+	int counter_index = COUNTER_FP_0_C0 + (2 * child->core_id);
+#endif
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		val0 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_pp_job_set_perf_counter_value0(job, subjob, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index, val0);
+		_mali_osk_profiling_record_global_counters(counter_index, val0);
+#endif
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		val1 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_pp_job_set_perf_counter_value1(job, subjob, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index + 1, val1);
+		_mali_osk_profiling_record_global_counters(counter_index + 1, val1);
+#endif
+	}
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tPP #%d: %s\n", core->core_id, core->hw_core.description);
+
+	return n;
+}
+#endif
diff --git a/mali/common/mali_pp.h b/mali/common/mali_pp.h
new file mode 100755
index 0000000..7633ecd
--- /dev/null
+++ b/mali/common/mali_pp.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_H__
+#define __MALI_PP_H__
+
+#include "mali_osk.h"
+#include "mali_pp_job.h"
+#include "mali_hw_core.h"
+
+struct mali_group;
+
+#define MALI_MAX_NUMBER_OF_PP_CORES        9
+
+/**
+ * Definition of the PP core struct
+ * Used to track a PP core in the system.
+ */
+struct mali_pp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+	u32                  core_id;           /**< Unique core ID */
+	u32                  bcast_id;          /**< The "flag" value used by the Mali-450 broadcast and DLBU unit */
+};
+
+_mali_osk_errcode_t mali_pp_initialize(void);
+void mali_pp_terminate(void);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id);
+void mali_pp_delete(struct mali_pp_core *core);
+
+void mali_pp_stop_bus(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core);
+void mali_pp_reset_async(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core);
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual);
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core);
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->core_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_bcast_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->bcast_id;
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index);
+u32 mali_pp_get_glob_num_pp_cores(void);
+
+/* Debug */
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size);
+
+/**
+ * Put instrumented HW counters from the core(s) to the job object (if enabled)
+ *
+ * parent and child is always the same, except for virtual jobs on Mali-450.
+ * In this case, the counters will be enabled on the virtual core (parent),
+ * but values need to be read from the child cores.
+ *
+ * @param parent The core used to see if the counters was enabled
+ * @param child The core to actually read the values from
+ * @job Job object to update with counter values (if enabled)
+ * @subjob Which subjob the counters are applicable for (core ID for virtual jobs)
+ */
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob);
+
+MALI_STATIC_INLINE const char *mali_pp_core_description(struct mali_pp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_pp_get_interrupt_result(struct mali_pp_core *core)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT) &
+			   MALI200_REG_VAL_IRQ_MASK_USED;
+	if (0 == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if (MALI200_REG_VAL_IRQ_END_OF_FRAME == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_get_rawstat(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+
+MALI_STATIC_INLINE u32 mali_pp_is_active(struct mali_pp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+	return (status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_mask_all_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_pp_enable_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+}
+
+MALI_STATIC_INLINE void mali_pp_write_addr_renderer_list(struct mali_pp_core *core,
+		struct mali_pp_job *job, u32 subjob)
+{
+	u32 addr = mali_pp_job_get_addr_frame(job, subjob);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_FRAME, addr);
+}
+
+
+MALI_STATIC_INLINE void mali_pp_write_addr_stack(struct mali_pp_core *core, struct mali_pp_job *job)
+{
+	u32 addr = mali_pp_job_get_addr_stack(job, core->core_id);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_STACK, addr);
+}
+
+#endif /* __MALI_PP_H__ */
diff --git a/mali/common/mali_pp_job.c b/mali/common/mali_pp_job.c
new file mode 100755
index 0000000..c2ed210
--- /dev/null
+++ b/mali/common/mali_pp_job.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+#include "mali_memory_swap_alloc.h"
+#include "mali_scheduler.h"
+
+static u32 pp_counter_src0 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 pp_counter_src1 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static _mali_osk_atomic_t pp_counter_per_sub_job_count; /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+static u32 pp_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+static u32 pp_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+
+void mali_pp_job_initialize(void)
+{
+	_mali_osk_atomic_init(&pp_counter_per_sub_job_count, 0);
+}
+
+void mali_pp_job_terminate(void)
+{
+	_mali_osk_atomic_term(&pp_counter_per_sub_job_count);
+}
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session,
+				       _mali_uk_pp_start_job_s __user *uargs, u32 id)
+{
+	struct mali_pp_job *job;
+	u32 perf_counter_flag;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_pp_job));
+	if (NULL != job) {
+
+		_mali_osk_list_init(&job->list);
+		_mali_osk_list_init(&job->session_fb_lookup_list);
+		_mali_osk_atomic_inc(&session->number_of_pp_jobs);
+
+		if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_pp_start_job_s))) {
+			goto fail;
+		}
+
+		if (job->uargs.num_cores > _MALI_PP_MAX_SUB_JOBS) {
+			MALI_PRINT_ERROR(("Mali PP job: Too many sub jobs specified in job object\n"));
+			goto fail;
+		}
+
+		if (!mali_pp_job_use_no_notification(job)) {
+			job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_FINISHED, sizeof(_mali_uk_pp_job_finished_s));
+			if (NULL == job->finished_notification) goto fail;
+		}
+
+		perf_counter_flag = mali_pp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			u32 sub_job_count = _mali_osk_atomic_read(&pp_counter_per_sub_job_count);
+
+			/* These counters apply for all virtual jobs, and where no per sub job counter is specified */
+			job->uargs.perf_counter_src0 = pp_counter_src0;
+			job->uargs.perf_counter_src1 = pp_counter_src1;
+
+			/* We only copy the per sub job array if it is enabled with at least one counter */
+			if (0 < sub_job_count) {
+				job->perf_counter_per_sub_job_count = sub_job_count;
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src0, pp_counter_per_sub_job_src0, sizeof(pp_counter_per_sub_job_src0));
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src1, pp_counter_per_sub_job_src1, sizeof(pp_counter_per_sub_job_src1));
+			}
+		}
+
+		job->session = session;
+		job->id = id;
+
+		job->sub_jobs_num = job->uargs.num_cores ? job->uargs.num_cores : 1;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+		_mali_osk_atomic_init(&job->sub_jobs_completed, 0);
+		_mali_osk_atomic_init(&job->sub_job_errors, 0);
+		job->swap_status = MALI_NO_SWAP_IN;
+		job->user_notification = MALI_FALSE;
+		job->num_pp_cores_in_virtual = 0;
+
+		if (job->uargs.num_memory_cookies > session->allocation_mgr.mali_allocation_num) {
+			MALI_PRINT_ERROR(("Mali PP job: The number of memory cookies is invalid !\n"));
+			goto fail;
+		}
+
+		if (job->uargs.num_memory_cookies > 0) {
+			u32 size;
+			u32 __user *memory_cookies = (u32 __user *)(uintptr_t)job->uargs.memory_cookies;
+
+			size = sizeof(*memory_cookies) * (job->uargs.num_memory_cookies);
+
+			job->memory_cookies = _mali_osk_malloc(size);
+			if (NULL == job->memory_cookies) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to allocate %d bytes of memory cookies!\n", size));
+				goto fail;
+			}
+
+			if (0 != _mali_osk_copy_from_user(job->memory_cookies, memory_cookies, size)) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to copy %d bytes of memory cookies from user!\n", size));
+				goto fail;
+			}
+		}
+
+		if (_MALI_OSK_ERR_OK != mali_pp_job_check(job)) {
+			/* Not a valid job. */
+			goto fail;
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_PP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		mali_mem_swap_in_pages(job);
+
+		return job;
+	}
+
+fail:
+	if (NULL != job) {
+		mali_pp_job_delete(job);
+	}
+
+	return NULL;
+}
+
+void mali_pp_job_delete(struct mali_pp_job *job)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->session_fb_lookup_list));
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL != job->memory_cookies) {
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+		/* Unmap buffers attached to job */
+		mali_dma_buf_unmap_job(job);
+#endif
+		if (MALI_NO_SWAP_IN != job->swap_status) {
+			mali_mem_swap_out_pages(job);
+		}
+
+		_mali_osk_free(job->memory_cookies);
+	}
+
+	if (job->user_notification) {
+		mali_scheduler_return_pp_job_to_user(job,
+						     job->num_pp_cores_in_virtual);
+	}
+
+	if (NULL != job->finished_notification) {
+		_mali_osk_notification_delete(job->finished_notification);
+	}
+
+	_mali_osk_atomic_term(&job->sub_jobs_completed);
+	_mali_osk_atomic_term(&job->sub_job_errors);
+	_mali_osk_atomic_dec(&session->number_of_pp_jobs);
+	_mali_osk_free(job);
+
+	_mali_osk_wait_queue_wake_up(session->wait_queue);
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_pp_job *iter;
+	struct mali_pp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_pp_job, list) {
+		/* job should be started after iter if iter is in progress. */
+		if (0 < iter->sub_jobs_started) {
+			break;
+		}
+
+		/*
+		 * job should be started after iter if it has a higher
+		 * job id. A span is used to handle job id wrapping.
+		 */
+		if ((mali_pp_job_get_id(job) -
+		     mali_pp_job_get_id(iter)) <
+		    MALI_SCHEDULER_JOB_ID_SPAN) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		return job->uargs.perf_counter_src0;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src0[sub_job]) {
+		return job->perf_counter_per_sub_job_src0[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src0;
+}
+
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		/* Virtual jobs always use the global job counter */
+		return job->uargs.perf_counter_src1;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src1[sub_job]) {
+		return job->perf_counter_per_sub_job_src1[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src1;
+}
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter)
+{
+	pp_counter_src0 = counter;
+}
+
+void mali_pp_job_set_pp_counter_global_src1(u32 counter)
+{
+	pp_counter_src1 = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src0[sub_job]) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == counter) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src0[sub_job] = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src1[sub_job]) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == counter) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src1[sub_job] = counter;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src0(void)
+{
+	return pp_counter_src0;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src1(void)
+{
+	return pp_counter_src1;
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src0[sub_job];
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src1[sub_job];
+}
diff --git a/mali/common/mali_pp_job.h b/mali/common/mali_pp_job.h
new file mode 100755
index 0000000..0ebd138
--- /dev/null
+++ b/mali/common/mali_pp_job.h
@@ -0,0 +1,594 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_JOB_H__
+#define __MALI_PP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_core.h"
+#include "mali_dlbu.h"
+#include "mali_timeline.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#include "linux/mali_dma_fence.h"
+#endif
+
+typedef enum pp_job_status {
+	MALI_NO_SWAP_IN,
+	MALI_SWAP_IN_FAIL,
+	MALI_SWAP_IN_SUCC,
+} pp_job_status;
+
+/**
+ * This structure represents a PP job, including all sub jobs.
+ *
+ * The PP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the PP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_pp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_pp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+	u32 perf_counter_per_sub_job_count;                /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+	u32 perf_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src0 */
+	u32 perf_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src1 */
+	u32 sub_jobs_num;                                  /**< Number of subjobs; set to 1 for Mali-450 if DLBU is used, otherwise equals number of PP cores */
+
+	pp_job_status swap_status;                         /**< Used to track each PP job swap status, if fail, we need to drop them in scheduler part */
+	mali_bool user_notification;                       /**< When we deferred delete PP job, we need to judge if we need to send job finish notification to user space */
+	u32 num_pp_cores_in_virtual;                       /**< How many PP cores we have when job finished */
+
+	/*
+	 * These members are used by both scheduler and executor.
+	 * They are "protected" by atomic operations.
+	 */
+	_mali_osk_atomic_t sub_jobs_completed;                            /**< Number of completed sub-jobs in this superjob */
+	_mali_osk_atomic_t sub_job_errors;                                /**< Bitfield with errors (errors for each single sub-job is or'ed together) */
+
+	/*
+	 * These members are used by scheduler, but only when no one else
+	 * knows about this job object but the working function.
+	 * No lock is thus needed for these.
+	 */
+	u32 *memory_cookies;                               /**< Memory cookies attached to job */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+	_mali_osk_list_t session_fb_lookup_list;           /**< Used to link jobs together from the same frame builder in the session */
+
+	u32 sub_jobs_started;                              /**< Total number of sub-jobs started (always started in ascending order) */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 perf_counter_value0[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 0 (to be returned to user space), one for each sub job */
+	u32 perf_counter_value1[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 1 (to be returned to user space), one for each sub job */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	struct mali_dma_fence_context dma_fence_context; /**< The mali dma fence context to record dma fence waiters that this job wait for */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct dma_fence *rendered_dma_fence; /**< the new dma fence link to this job */
+#else
+	struct fence *rendered_dma_fence; /**< the new dma fence link to this job */
+#endif
+#endif
+};
+
+void mali_pp_job_initialize(void);
+void mali_pp_job_terminate(void);
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, _mali_uk_pp_start_job_s *uargs, u32 id);
+void mali_pp_job_delete(struct mali_pp_job *job);
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job);
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job);
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter);
+void mali_pp_job_set_pp_counter_global_src1(u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter);
+
+u32 mali_pp_job_get_pp_counter_global_src0(void);
+u32 mali_pp_job_get_pp_counter_global_src1(void);
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job);
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job);
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_cache_order(struct mali_pp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_cache_order(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_pp_job_get_user_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_frame_builder_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_flush_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_pid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_tid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_frame_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_dlbu_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.dlbu_registers;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_virtual(struct mali_pp_job *job)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (0 == job->uargs.num_cores) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_frame(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (mali_pp_job_is_virtual(job)) {
+		return MALI_DLBU_VIRT_ADDR;
+	} else if (0 == sub_job) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_FRAME / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_frame[sub_job - 1];
+	}
+
+	return 0;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_stack(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (0 == sub_job) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_STACK / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_stack[sub_job - 1];
+	}
+
+	return 0;
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_pp_job_list_addtail(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	_mali_osk_list_addtail(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_move(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb0_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb1_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb2_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb0_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb1_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb2_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb0(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb1(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb2(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_all_writeback_unit_disabled(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT]
+	   ) {
+		/* At least one output unit active */
+		return MALI_FALSE;
+	}
+
+	/* All outputs are disabled - we can abort the job */
+	return MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_add(struct mali_pp_job *job)
+{
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	fb_lookup_id = MALI_PP_JOB_FB_LOOKUP_LIST_MASK & job->uargs.frame_builder_id;
+
+	MALI_DEBUG_ASSERT(MALI_PP_JOB_FB_LOOKUP_LIST_SIZE > fb_lookup_id);
+
+	_mali_osk_list_addtail(&job->session_fb_lookup_list,
+			       &job->session->pp_job_fb_lookup_list[fb_lookup_id]);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->session_fb_lookup_list);
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_pp_job_get_session(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_started_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (0 < job->sub_jobs_started) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_unstarted_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (job->sub_jobs_started < job->sub_jobs_num) ? MALI_TRUE : MALI_FALSE;
+}
+
+/* Function used when we are terminating a session with jobs. Return TRUE if it has a rendering job.
+   Makes sure that no new subjobs are started. */
+MALI_STATIC_INLINE void mali_pp_job_mark_unstarted_failed(struct mali_pp_job *job)
+{
+	u32 jobs_remaining;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	jobs_remaining = job->sub_jobs_num - job->sub_jobs_started;
+	job->sub_jobs_started += jobs_remaining;
+
+	/* Not the most optimal way, but this is only used in error cases */
+	for (i = 0; i < jobs_remaining; i++) {
+		_mali_osk_atomic_inc(&job->sub_jobs_completed);
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_complete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->sub_jobs_num ==
+		_mali_osk_atomic_read(&job->sub_jobs_completed)) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_first_unstarted_sub_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return job->sub_jobs_started;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->sub_jobs_num;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_unstarted_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(job->sub_jobs_num >= job->sub_jobs_started);
+	return (job->sub_jobs_num - job->sub_jobs_started);
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_num_memory_cookies(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.num_memory_cookies;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_memory_cookie(
+	struct mali_pp_job *job, u32 index)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(index < job->uargs.num_memory_cookies);
+	MALI_DEBUG_ASSERT_POINTER(job->memory_cookies);
+	return job->memory_cookies[index];
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_needs_dma_buf_mapping(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (0 < job->uargs.num_memory_cookies) {
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_started(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Assert that we are marking the "first unstarted sub job" as started */
+	MALI_DEBUG_ASSERT(job->sub_jobs_started == sub_job);
+
+	job->sub_jobs_started++;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_completed(struct mali_pp_job *job, mali_bool success)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_atomic_inc(&job->sub_jobs_completed);
+	if (MALI_FALSE == success) {
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_was_success(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (0 == _mali_osk_atomic_read(&job->sub_job_errors)) {
+		return MALI_TRUE;
+	}
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_use_no_notification(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_NO_NOTIFICATION) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_pilot_job(struct mali_pp_job *job)
+{
+	/*
+	 * A pilot job is currently identified as jobs which
+	 * require no callback notification.
+	 */
+	return mali_pp_job_use_no_notification(job);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_pp_job_get_finished_notification(struct mali_pp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_window_surface(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE)
+	       ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_protected_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_PROTECTED)
+	       ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_flag(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value0(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0[sub_job];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value1(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1[sub_job];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value0(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0[sub_job] = value;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value1(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1[sub_job] = value;
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_pp_job_check(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (mali_pp_job_is_virtual(job) && job->sub_jobs_num != 1) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Returns MALI_TRUE if this job has more than two sub jobs and all sub jobs are unstarted.
+ *
+ * @param job Job to check.
+ * @return MALI_TRUE if job has more than two sub jobs and all sub jobs are unstarted, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_large_and_unstarted(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_pp_job_is_virtual(job));
+
+	return (0 == job->sub_jobs_started && 2 < job->sub_jobs_num);
+}
+
+/**
+ * Get PP job's Timeline tracker.
+ *
+ * @param job PP job.
+ * @return Pointer to Timeline tracker for the job.
+ */
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_pp_job_get_tracker(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_timeline_point_ptr(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+#endif /* __MALI_PP_JOB_H__ */
diff --git a/mali/common/mali_scheduler.c b/mali/common/mali_scheduler.c
new file mode 100755
index 0000000..5410f86
--- /dev/null
+++ b/mali/common/mali_scheduler.c
@@ -0,0 +1,1548 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_profiling.h"
+#include "mali_kernel_utilization.h"
+#include "mali_timeline.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+#include "mali_group.h"
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include "mali_pm_metrics.h"
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#include "mali_dma_fence.h"
+#include <linux/dma-buf.h>
+#endif
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+/*
+ * ---------- static defines/constants ----------
+ */
+
+/*
+ * If dma_buf with map on demand is used, we defer job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif
+#endif
+
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock protecting this module */
+_mali_osk_spinlock_irq_t *mali_scheduler_lock_obj = NULL;
+
+/* Queue of jobs to be executed on the GP group */
+struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+struct mali_scheduler_job_queue job_queue_pp;
+
+_mali_osk_atomic_t mali_job_id_autonumber;
+_mali_osk_atomic_t mali_job_cache_order_autonumber;
+/*
+ * ---------- static variables ----------
+ */
+
+_mali_osk_wq_work_t *scheduler_wq_pp_job_delete = NULL;
+_mali_osk_spinlock_irq_t *scheduler_pp_job_delete_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_deletion_queue);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static _mali_osk_wq_work_t *scheduler_wq_pp_job_queue = NULL;
+static _mali_osk_spinlock_irq_t *scheduler_pp_job_queue_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_queue_list);
+#endif
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job);
+static _mali_osk_errcode_t mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job, mali_timeline_point *point);
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job);
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job);
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success);
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job);
+void mali_scheduler_do_pp_job_delete(void *arg);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job);
+static void mali_scheduler_do_pp_job_queue(void *arg);
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_scheduler_initialize(void)
+{
+	_mali_osk_atomic_init(&mali_job_id_autonumber, 0);
+	_mali_osk_atomic_init(&mali_job_cache_order_autonumber, 0);
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.high_pri);
+	job_queue_gp.depth = 0;
+	job_queue_gp.big_job_num = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.high_pri);
+	job_queue_pp.depth = 0;
+	job_queue_pp.big_job_num = 0;
+
+	mali_scheduler_lock_obj = _mali_osk_spinlock_irq_init(
+					  _MALI_OSK_LOCKFLAG_ORDERED,
+					  _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (NULL == mali_scheduler_lock_obj) {
+		mali_scheduler_terminate();
+	}
+
+	scheduler_wq_pp_job_delete = _mali_osk_wq_create_work(
+					     mali_scheduler_do_pp_job_delete, NULL);
+	if (NULL == scheduler_wq_pp_job_delete) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_delete_lock = _mali_osk_spinlock_irq_init(
+					       _MALI_OSK_LOCKFLAG_ORDERED,
+					       _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (NULL == scheduler_pp_job_delete_lock) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	scheduler_wq_pp_job_queue = _mali_osk_wq_create_work(
+					    mali_scheduler_do_pp_job_queue, NULL);
+	if (NULL == scheduler_wq_pp_job_queue) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_queue_lock = _mali_osk_spinlock_irq_init(
+					      _MALI_OSK_LOCKFLAG_ORDERED,
+					      _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (NULL == scheduler_pp_job_queue_lock) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_scheduler_terminate(void)
+{
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (NULL != scheduler_pp_job_queue_lock) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_queue_lock);
+		scheduler_pp_job_queue_lock = NULL;
+	}
+
+	if (NULL != scheduler_wq_pp_job_queue) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_queue);
+		scheduler_wq_pp_job_queue = NULL;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	if (NULL != scheduler_pp_job_delete_lock) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_delete_lock);
+		scheduler_pp_job_delete_lock = NULL;
+	}
+
+	if (NULL != scheduler_wq_pp_job_delete) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_delete);
+		scheduler_wq_pp_job_delete = NULL;
+	}
+
+	if (NULL != mali_scheduler_lock_obj) {
+		_mali_osk_spinlock_irq_term(mali_scheduler_lock_obj);
+		mali_scheduler_lock_obj = NULL;
+	}
+
+	_mali_osk_atomic_term(&mali_job_cache_order_autonumber);
+	_mali_osk_atomic_term(&mali_job_id_autonumber);
+}
+
+u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure)
+{
+	/*
+	 * Count how many physical sub jobs are present from the head of queue
+	 * until the first virtual job is present.
+	 * Early out when we have reached maximum number of PP cores (8)
+	 */
+	u32 count = 0;
+	struct mali_pp_job *job;
+	struct mali_pp_job *temp;
+
+	/* Check for partially started normal pri jobs */
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					   struct mali_pp_job, list);
+
+		MALI_DEBUG_ASSERT_POINTER(job);
+
+		if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) {
+			/*
+			 * Remember; virtual jobs can't be queued and started
+			 * at the same time, so this must be a physical job
+			 */
+			if ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			    || (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job))) {
+
+				count += mali_pp_job_unstarted_sub_job_count(job);
+				if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+					return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+				}
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if ((MALI_FALSE == mali_pp_job_is_virtual(job))
+		    && ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			|| (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) {
+
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if ((MALI_FALSE == mali_pp_job_is_virtual(job))
+		    && (MALI_FALSE == mali_pp_job_has_started_sub_jobs(job))
+		    && ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			|| (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) {
+
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+	return count;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_next(void)
+{
+	struct mali_pp_job *job;
+	struct mali_pp_job *temp;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	/* Check for partially started normal pri jobs */
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					   struct mali_pp_job, list);
+
+		MALI_DEBUG_ASSERT_POINTER(job);
+
+		if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) {
+			return job;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		return job;
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		return job;
+	}
+
+	return NULL;
+}
+
+mali_bool mali_scheduler_job_next_is_virtual(void)
+{
+	struct mali_pp_job *job;
+
+	job = mali_scheduler_job_pp_virtual_peek();
+	if (NULL != job) {
+		MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void)
+{
+	_mali_osk_list_t *queue;
+	struct mali_gp_job *job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT(0 < job_queue_gp.depth);
+	MALI_DEBUG_ASSERT(job_queue_gp.big_job_num <= job_queue_gp.depth);
+
+	if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+		MALI_DEBUG_ASSERT(!_mali_osk_list_empty(queue));
+	}
+
+	job = _MALI_OSK_LIST_ENTRY(queue->next, struct mali_gp_job, list);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_gp_job_list_remove(job);
+	job_queue_gp.depth--;
+	if (job->big_job) {
+		job_queue_gp.big_job_num --;
+		if (job_queue_gp.big_job_num < MALI_MAX_PENDING_BIG_JOB) {
+			/* wake up process */
+			wait_queue_head_t *queue = mali_session_get_wait_queue();
+			wake_up(queue);
+		}
+	}
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	/*
+	 * For PP jobs we favour partially started jobs in normal
+	 * priority queue over unstarted jobs in high priority queue
+	 */
+
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					       struct mali_pp_job, list);
+		MALI_DEBUG_ASSERT(NULL != tmp_job);
+
+		if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job ||
+	    MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) {
+		/*
+		 * There isn't a partially started job in normal queue, so
+		 * look in high priority queue.
+		 */
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+						       struct mali_pp_job, list);
+			MALI_DEBUG_ASSERT(NULL != tmp_job);
+
+			if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+					       struct mali_pp_job, list);
+
+		if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job) {
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+						       struct mali_pp_job, list);
+
+			if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_physical_peek();
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_virtual(job));
+
+	if (NULL != job) {
+		*sub_job = mali_pp_job_get_first_unstarted_sub_job(job);
+
+		mali_pp_job_mark_sub_job_started(job, *sub_job);
+		if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(job)) {
+			/* Remove from queue when last sub job has been retrieved */
+			mali_pp_job_list_remove(job);
+		}
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_virtual_peek();
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_virtual(job));
+
+	if (NULL != job) {
+		MALI_DEBUG_ASSERT(0 ==
+				  mali_pp_job_get_first_unstarted_sub_job(job));
+		MALI_DEBUG_ASSERT(1 ==
+				  mali_pp_job_get_sub_job_count(job));
+
+		mali_pp_job_mark_sub_job_started(job, 0);
+
+		mali_pp_job_list_remove(job);
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_gp_job_get_id(job), job));
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_gp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(job));
+		mali_gp_job_signal_pp_tracker(job, MALI_FALSE);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, MALI_FALSE,
+					       MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+
+	return MALI_SCHEDULER_MASK_GP;
+}
+
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_pp_job_get_id(job), job));
+
+	if (MALI_TRUE == mali_timeline_tracker_activation_error(
+		    mali_pp_job_get_tracker(job))) {
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Job %u (0x%08X) activated with error, aborting.\n",
+				     mali_pp_job_get_id(job), job));
+
+		mali_scheduler_lock();
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (mali_pp_job_needs_dma_buf_mapping(job)) {
+		mali_scheduler_deferred_pp_job_queue(job);
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_pp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+	return MALI_SCHEDULER_MASK_PP;
+}
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	if (user_notification) {
+		mali_scheduler_return_gp_job_to_user(job, success);
+	}
+
+	if (dequeued) {
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_gp_end();
+		}
+		mali_pm_record_gpu_idle(MALI_TRUE);
+	}
+
+	mali_gp_job_delete(job);
+}
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	job->user_notification = user_notification;
+	job->num_pp_cores_in_virtual = num_cores_in_virtual;
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	if (NULL != job->rendered_dma_fence)
+		mali_dma_fence_signal_and_put(&job->rendered_dma_fence);
+#endif
+
+	if (dequeued) {
+#if defined(CONFIG_MALI_DVFS)
+		if (mali_pp_job_is_window_surface(job)) {
+			struct mali_session_data *session;
+			session = mali_pp_job_get_session(job);
+			mali_session_inc_num_window_jobs(session);
+		}
+#endif
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_pp_end();
+		}
+		mali_pm_record_gpu_idle(MALI_FALSE);
+	}
+
+	/* With ZRAM feature enabled, all pp jobs will be force to use deferred delete. */
+	mali_scheduler_deferred_pp_job_delete(job);
+}
+
+void mali_scheduler_abort_session(struct mali_session_data *session)
+{
+	struct mali_gp_job *gp_job;
+	struct mali_gp_job *gp_tmp;
+	struct mali_pp_job *pp_job;
+	struct mali_pp_job *pp_tmp;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_gp);
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_pp);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali scheduler: Aborting all queued jobs from session 0x%08X.\n",
+			     session));
+
+	mali_scheduler_lock();
+
+	/* Remove from GP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.normal_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+			job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0;
+		}
+	}
+
+	/* Remove from GP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.high_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+			job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0;
+		}
+	}
+
+	/* Remove from PP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							      &removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/* Remove from PP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							      &removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/*
+	 * Release scheduler lock so we can release trackers
+	 * (which will potentially queue new jobs)
+	 */
+	mali_scheduler_unlock();
+
+	/* Release and complete all (non-running) found GP jobs  */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &removed_jobs_gp,
+				    struct mali_gp_job, list) {
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(gp_job));
+		mali_gp_job_signal_pp_tracker(gp_job, MALI_FALSE);
+		_mali_osk_list_delinit(&gp_job->list);
+		mali_scheduler_complete_gp_job(gp_job,
+					       MALI_FALSE, MALI_TRUE, MALI_TRUE);
+	}
+
+	/* Release and complete non-running PP jobs */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, &removed_jobs_pp,
+				    struct mali_pp_job, list) {
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(pp_job));
+		_mali_osk_list_delinit(&pp_job->list);
+		mali_scheduler_complete_pp_job(pp_job, 0,
+					       MALI_TRUE, MALI_TRUE);
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx,
+		_mali_uk_gp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_gp_job_create(session, uargs, mali_scheduler_get_new_id(),
+				 NULL);
+	if (NULL == job) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_gp_job_get_timeline_point_ptr(job);
+
+	point = mali_scheduler_submit_gp_job(session, job);
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the job was started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx,
+		_mali_uk_pp_start_job_s *uargs)
+{
+	_mali_osk_errcode_t ret;
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_pp_job_create(session, uargs, mali_scheduler_get_new_id());
+	if (NULL == job) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(job);
+
+	/* Submit PP job. */
+	ret = mali_scheduler_submit_pp_job(session, job, &point);
+	job = NULL;
+
+	if (_MALI_OSK_ERR_OK == ret) {
+		if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+			/*
+			* Let user space know that something failed
+			* after the jobs were started.
+			*/
+			return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+		}
+	}
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx,
+		_mali_uk_pp_and_gp_start_job_s *uargs)
+{
+	_mali_osk_errcode_t ret;
+	struct mali_session_data *session;
+	_mali_uk_pp_and_gp_start_job_s kargs;
+	struct mali_pp_job *pp_job;
+	struct mali_gp_job *gp_job;
+	u32 __user *point_ptr = NULL;
+	mali_timeline_point point;
+	_mali_uk_pp_start_job_s __user *pp_args;
+	_mali_uk_gp_start_job_s __user *gp_args;
+
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+
+	session = (struct mali_session_data *) ctx;
+
+	if (0 != _mali_osk_copy_from_user(&kargs, uargs,
+					  sizeof(_mali_uk_pp_and_gp_start_job_s))) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	pp_args = (_mali_uk_pp_start_job_s __user *)(uintptr_t)kargs.pp_args;
+	gp_args = (_mali_uk_gp_start_job_s __user *)(uintptr_t)kargs.gp_args;
+
+	pp_job = mali_pp_job_create(session, pp_args,
+				    mali_scheduler_get_new_id());
+	if (NULL == pp_job) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	gp_job = mali_gp_job_create(session, gp_args,
+				    mali_scheduler_get_new_id(),
+				    mali_pp_job_get_tracker(pp_job));
+	if (NULL == gp_job) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		mali_pp_job_delete(pp_job);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(pp_job);
+
+	/* Submit GP job. */
+	mali_scheduler_submit_gp_job(session, gp_job);
+	gp_job = NULL;
+
+	/* Submit PP job. */
+	ret = mali_scheduler_submit_pp_job(session, pp_job, &point);
+	pp_job = NULL;
+
+	if (_MALI_OSK_ERR_OK == ret) {
+		if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+			/*
+			* Let user space know that something failed
+			* after the jobs were started.
+			*/
+			return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+		}
+	}
+
+	return ret;
+}
+
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	fb_lookup_id = args->fb_id & MALI_PP_JOB_FB_LOOKUP_LIST_MASK;
+
+	mali_scheduler_lock();
+
+	/* Iterate over all jobs for given frame builder_id. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp,
+				    &session->pp_job_fb_lookup_list[fb_lookup_id],
+				    struct mali_pp_job, session_fb_lookup_list) {
+		MALI_DEBUG_CODE(u32 disable_mask = 0);
+
+		if (mali_pp_job_get_frame_builder_id(job) !=
+		    (u32) args->fb_id) {
+			MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Disable WB mismatching FB.\n"));
+			continue;
+		}
+
+		MALI_DEBUG_CODE(disable_mask |= 0xD << (4 * 3));
+
+		if (mali_pp_job_get_wb0_source_addr(job) == args->wb0_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x1 << (4 * 1));
+			mali_pp_job_disable_wb0(job);
+		}
+
+		if (mali_pp_job_get_wb1_source_addr(job) == args->wb1_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x2 << (4 * 2));
+			mali_pp_job_disable_wb1(job);
+		}
+
+		if (mali_pp_job_get_wb2_source_addr(job) == args->wb2_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x3 << (4 * 3));
+			mali_pp_job_disable_wb2(job);
+		}
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Disable WB: 0x%X.\n",
+				     disable_mask));
+	}
+
+	mali_scheduler_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "GP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_gp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.normal_pri) ?
+				"empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.high_pri) ?
+				"empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"PP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_pp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.normal_pri)
+				? "empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.high_pri)
+				? "empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job)
+{
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Add job to Timeline system. */
+	point = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_gp_job_get_tracker(job), MALI_TIMELINE_GP);
+
+	return point;
+}
+
+static _mali_osk_errcode_t mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job, mali_timeline_point *point)
+
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	struct ww_acquire_ctx ww_actx;
+	u32 i;
+	u32 num_memory_cookies = 0;
+	struct reservation_object **reservation_object_list = NULL;
+	unsigned int num_reservation_object = 0;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_scheduler_lock();
+	/*
+	 * Adding job to the lookup list used to quickly discard
+	 * writeback units of queued jobs.
+	 */
+	mali_pp_job_fb_lookup_add(job);
+	mali_scheduler_unlock();
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+
+	/* Allocate the reservation_object_list to list the dma reservation object of dependent dma buffer */
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	if (0 < num_memory_cookies) {
+		reservation_object_list = kzalloc(sizeof(struct reservation_object *) * num_memory_cookies, GFP_KERNEL);
+		if (NULL == reservation_object_list) {
+			MALI_PRINT_ERROR(("Failed to alloc the reservation object list.\n"));
+			ret = _MALI_OSK_ERR_NOMEM;
+			goto failed_to_alloc_reservation_object_list;
+		}
+	}
+
+	/* Add the dma reservation object into reservation_object_list*/
+	for (i = 0; i < num_memory_cookies; i++) {
+		mali_mem_backend *mem_backend = NULL;
+		struct reservation_object *tmp_reservation_object = NULL;
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		mem_backend = mali_mem_backend_struct_search(session, mali_addr);
+
+		MALI_DEBUG_ASSERT_POINTER(mem_backend);
+
+		if (NULL == mem_backend) {
+			MALI_PRINT_ERROR(("Failed to find the memory backend for memory cookie[%d].\n", i));
+			goto failed_to_find_mem_backend;
+		}
+
+		if (MALI_MEM_DMA_BUF != mem_backend->type)
+			continue;
+
+		tmp_reservation_object = mem_backend->dma_buf.attachment->buf->resv;
+
+		if (NULL != tmp_reservation_object) {
+			mali_dma_fence_add_reservation_object_list(tmp_reservation_object,
+					reservation_object_list, &num_reservation_object);
+		}
+	}
+
+	/*
+	 * Add the mali dma fence callback to wait for all dependent dma buf,
+	 * and extend the timeline system to support dma fence,
+	 * then create the new internal dma fence to replace all last dma fence for dependent dma buf.
+	 */
+	if (0 < num_reservation_object) {
+		int error;
+		int num_dma_fence_waiter = 0;
+		/* Create one new dma fence.*/
+		job->rendered_dma_fence = mali_dma_fence_new(job->session->fence_context,
+					  _mali_osk_atomic_inc_return(&job->session->fence_seqno));
+
+		if (NULL == job->rendered_dma_fence) {
+			MALI_PRINT_ERROR(("Failed to creat one new dma fence.\n"));
+			ret = _MALI_OSK_ERR_FAULT;
+			goto failed_to_create_dma_fence;
+		}
+
+		/* In order to avoid deadlock, wait/wound mutex lock to lock all dma buffers*/
+
+		error = mali_dma_fence_lock_reservation_object_list(reservation_object_list,
+				num_reservation_object, &ww_actx);
+
+		if (0 != error) {
+			MALI_PRINT_ERROR(("Failed to lock all reservation objects.\n"));
+			ret = _MALI_OSK_ERR_FAULT;
+			goto failed_to_lock_reservation_object_list;
+		}
+
+		mali_dma_fence_context_init(&job->dma_fence_context,
+					    mali_timeline_dma_fence_callback, (void *)job);
+
+		/* Add dma fence waiters and dma fence callback. */
+		for (i = 0; i < num_reservation_object; i++) {
+			ret = mali_dma_fence_context_add_waiters(&job->dma_fence_context, reservation_object_list[i]);
+			if (_MALI_OSK_ERR_OK != ret) {
+				MALI_PRINT_ERROR(("Failed to add waiter into mali dma fence context.\n"));
+				goto failed_to_add_dma_fence_waiter;
+			}
+		}
+
+		for (i = 0; i < num_reservation_object; i++) {
+			reservation_object_add_excl_fence(reservation_object_list[i], job->rendered_dma_fence);
+		}
+
+		num_dma_fence_waiter = job->dma_fence_context.num_dma_fence_waiter;
+
+		/* Add job to Timeline system. */
+		(*point) = mali_timeline_system_add_tracker(session->timeline_system,
+				mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+
+		if (0 != num_dma_fence_waiter) {
+			mali_dma_fence_context_dec_count(&job->dma_fence_context);
+		}
+
+		/* Unlock all wait/wound mutex lock. */
+		mali_dma_fence_unlock_reservation_object_list(reservation_object_list,
+				num_reservation_object, &ww_actx);
+	} else {
+		/* Add job to Timeline system. */
+		(*point) = mali_timeline_system_add_tracker(session->timeline_system,
+				mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+	}
+
+	kfree(reservation_object_list);
+	return ret;
+#else
+	/* Add job to Timeline system. */
+	(*point) = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+#endif
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+failed_to_add_dma_fence_waiter:
+	mali_dma_fence_context_term(&job->dma_fence_context);
+	mali_dma_fence_unlock_reservation_object_list(reservation_object_list,
+			num_reservation_object, &ww_actx);
+failed_to_lock_reservation_object_list:
+	mali_dma_fence_signal_and_put(&job->rendered_dma_fence);
+failed_to_create_dma_fence:
+failed_to_find_mem_backend:
+	if (NULL != reservation_object_list)
+		kfree(reservation_object_list);
+failed_to_alloc_reservation_object_list:
+	mali_pp_job_fb_lookup_remove(job);
+#endif
+	return ret;
+}
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_gp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	}
+
+	mali_gp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	/* Determine which queue the job should be added to. */
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+	}
+
+	job_queue_gp.depth += 1;
+	job_queue_gp.big_job_num += (job->big_job) ? 1 : 0;
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_gp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the GP as busy from the
+		 * time a GP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_gp_start();
+	}
+
+	mali_pm_record_gpu_active(MALI_TRUE);
+
+	/* Add profiling events for job enqueued */
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE,
+		mali_gp_job_get_pid(job),
+		mali_gp_job_get_tid(job),
+		mali_gp_job_get_frame_builder_id(job),
+		mali_gp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_gp_job_get_tid(job),
+			      mali_gp_job_get_id(job), "GP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali GP scheduler: Job %u (0x%08X) queued\n",
+			     mali_gp_job_get_id(job), job));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue = NULL;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	} else if (unlikely(MALI_SWAP_IN_FAIL == job->swap_status)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while swap in failed.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE;
+	}
+
+	mali_pp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_pp.high_pri;
+	} else {
+		queue = &job_queue_pp.normal_pri;
+	}
+
+	job_queue_pp.depth +=
+		mali_pp_job_get_sub_job_count(job);
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_pp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the PP as busy from the
+		 * time a PP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_pp_start();
+	}
+
+	mali_pm_record_gpu_active(MALI_FALSE);
+
+	/* Add profiling events for job enqueued */
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE,
+		mali_pp_job_get_pid(job),
+		mali_pp_job_get_tid(job),
+		mali_pp_job_get_frame_builder_id(job),
+		mali_pp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_pp_job_get_tid(job),
+			      mali_pp_job_get_id(job), "PP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali PP scheduler: %s job %u (0x%08X) with %u parts queued.\n",
+			     mali_pp_job_is_virtual(job)
+			     ? "Virtual" : "Physical",
+			     mali_pp_job_get_id(job), job,
+			     mali_pp_job_get_sub_job_count(job)));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success)
+{
+	_mali_uk_gp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_gp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->pending_big_job_num = mali_scheduler_job_gp_big_job_count();
+
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	if (MALI_TRUE == success) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+	jobres->heap_current_addr = mali_gp_job_get_current_heap_addr(job);
+	jobres->perf_counter0 = mali_gp_job_get_perf_counter_value0(job);
+	jobres->perf_counter1 = mali_gp_job_get_perf_counter_value1(job);
+
+	mali_session_send_notification(session, notification);
+}
+
+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual)
+{
+	u32 i;
+	u32 num_counters_to_copy;
+	_mali_uk_pp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	if (MALI_TRUE == mali_pp_job_use_no_notification(job)) {
+		return;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_pp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->user_job_ptr = mali_pp_job_get_user_id(job);
+	if (MALI_TRUE == mali_pp_job_was_success(job)) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+
+	if (mali_pp_job_is_virtual(job)) {
+		num_counters_to_copy = num_cores_in_virtual;
+	} else {
+		num_counters_to_copy = mali_pp_job_get_sub_job_count(job);
+	}
+
+	for (i = 0; i < num_counters_to_copy; i++) {
+		jobres->perf_counter0[i] =
+			mali_pp_job_get_perf_counter_value0(job, i);
+		jobres->perf_counter1[i] =
+			mali_pp_job_get_perf_counter_value1(job, i);
+		jobres->perf_counter_src0 =
+			mali_pp_job_get_pp_counter_global_src0();
+		jobres->perf_counter_src1 =
+			mali_pp_job_get_pp_counter_global_src1();
+	}
+
+	mali_session_send_notification(session, notification);
+}
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_deletion_queue);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_delete);
+}
+
+void mali_scheduler_do_pp_job_delete(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be deleted, so we can release
+	 * the lock before we start deleting the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_deletion_queue, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		_mali_osk_list_delinit(&job->list);
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+		mali_dma_fence_context_term(&job->dma_fence_context);
+#endif
+
+		mali_pp_job_delete(job); /* delete the job object itself */
+	}
+}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_queue_list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_queue);
+}
+
+static void mali_scheduler_do_pp_job_queue(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be queued, so we can release
+	 * the lock before we start queueing the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_queue_list, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	/* First loop through all jobs and do the pre-work (no locks needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_needs_dma_buf_mapping(job)) {
+			/*
+			 * This operation could fail, but we continue anyway,
+			 * because the worst that could happen is that this
+			 * job will fail due to a Mali page fault.
+			 */
+			mali_dma_buf_map_job(job);
+		}
+	}
+
+	mali_scheduler_lock();
+
+	/* Then loop through all jobs again to queue them (lock needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+
+		/* Remove from scheduler_pp_job_queue_list before queueing */
+		mali_pp_job_list_remove(job);
+
+		if (mali_scheduler_queue_pp_job(job)) {
+			/* Job queued successfully */
+			schedule_mask |= MALI_SCHEDULER_MASK_PP;
+		} else {
+			/* Failed to enqueue job, release job (with error) */
+			mali_pp_job_fb_lookup_remove(job);
+			mali_pp_job_mark_unstarted_failed(job);
+
+			/* unlock scheduler in this uncommon case */
+			mali_scheduler_unlock();
+
+			schedule_mask |= mali_timeline_tracker_release(
+						 mali_pp_job_get_tracker(job));
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job, 0, MALI_TRUE,
+						       MALI_FALSE);
+
+			mali_scheduler_lock();
+		}
+	}
+
+	mali_scheduler_unlock();
+
+	/* Trigger scheduling of jobs */
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+void mali_scheduler_gp_pp_job_queue_print(void)
+{
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_gp_job *tmp_gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	struct mali_pp_job *tmp_pp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+	/* dump job queup status */
+	if ((0 == job_queue_gp.depth) && (0 == job_queue_pp.depth)) {
+		MALI_PRINT(("No GP&PP job in the job queue.\n"));
+		return;
+	}
+
+	MALI_PRINT(("Total (%d) GP job in the job queue.\n", job_queue_gp.depth));
+	if (job_queue_gp.depth > 0) {
+		if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.high_pri,
+						    struct mali_gp_job, list) {
+				MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job high_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid));
+			}
+		}
+
+		if (!_mali_osk_list_empty(&job_queue_gp.normal_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.normal_pri,
+						    struct mali_gp_job, list) {
+				MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job normal_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid));
+			}
+		}
+	}
+
+	MALI_PRINT(("Total (%d) PP job in the job queue.\n", job_queue_pp.depth));
+	if (job_queue_pp.depth > 0) {
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.high_pri,
+						    struct mali_pp_job, list) {
+				if (mali_pp_job_is_virtual(pp_job)) {
+					MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				} else {
+					MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				}
+			}
+		}
+
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.normal_pri,
+						    struct mali_pp_job, list) {
+				if (mali_pp_job_is_virtual(pp_job)) {
+					MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				} else {
+					MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				}
+			}
+		}
+	}
+
+	/* dump group running job status */
+	mali_executor_running_status_print();
+}
diff --git a/mali/common/mali_scheduler.h b/mali/common/mali_scheduler.h
new file mode 100755
index 0000000..8a7ebc9
--- /dev/null
+++ b/mali/common/mali_scheduler.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_H__
+#define __MALI_SCHEDULER_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_scheduler_types.h"
+#include "mali_session.h"
+
+struct mali_scheduler_job_queue {
+	_MALI_OSK_LIST_HEAD(normal_pri); /* Queued jobs with normal priority */
+	_MALI_OSK_LIST_HEAD(high_pri);   /* Queued jobs with high priority */
+	u32 depth;                       /* Depth of combined queues. */
+	u32 big_job_num;
+};
+
+extern _mali_osk_spinlock_irq_t *mali_scheduler_lock_obj;
+
+/* Queue of jobs to be executed on the GP group */
+extern struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+extern struct mali_scheduler_job_queue job_queue_pp;
+
+extern _mali_osk_atomic_t mali_job_id_autonumber;
+extern _mali_osk_atomic_t mali_job_cache_order_autonumber;
+
+#define MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+_mali_osk_errcode_t mali_scheduler_initialize(void);
+void mali_scheduler_terminate(void);
+
+MALI_STATIC_INLINE void mali_scheduler_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_scheduler_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: scheduler lock taken.\n"));
+}
+
+MALI_STATIC_INLINE void mali_scheduler_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: Releasing scheduler lock.\n"));
+	_mali_osk_spinlock_irq_unlock(mali_scheduler_lock_obj);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_count(void)
+{
+	return job_queue_gp.depth;
+}
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_big_job_count(void)
+{
+	return job_queue_gp.big_job_num;
+}
+
+u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure);
+
+mali_bool mali_scheduler_job_next_is_virtual(void);
+struct mali_pp_job *mali_scheduler_job_pp_next(void);
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void);
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_id(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_id_autonumber);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_cache_order(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_cache_order_autonumber);
+}
+
+/**
+ * @brief Used by the Timeline system to queue a GP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The GP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job);
+
+/**
+ * @brief Used by the Timeline system to queue a PP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The PP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job);
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_abort_session(struct mali_session_data *session);
+
+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual);
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size);
+#endif
+
+void mali_scheduler_gp_pp_job_queue_print(void);
+
+#endif /* __MALI_SCHEDULER_H__ */
diff --git a/mali/common/mali_scheduler_types.h b/mali/common/mali_scheduler_types.h
new file mode 100755
index 0000000..0819c59
--- /dev/null
+++ b/mali/common/mali_scheduler_types.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_TYPES_H__
+#define __MALI_SCHEDULER_TYPES_H__
+
+#include "mali_osk.h"
+
+#define MALI_SCHEDULER_JOB_ID_SPAN 65535
+
+/**
+ * Bitmask used for defered scheduling of subsystems.
+ */
+typedef u32 mali_scheduler_mask;
+
+#define MALI_SCHEDULER_MASK_GP (1<<0)
+#define MALI_SCHEDULER_MASK_PP (1<<1)
+
+#define MALI_SCHEDULER_MASK_EMPTY 0
+#define MALI_SCHEDULER_MASK_ALL (MALI_SCHEDULER_MASK_GP | MALI_SCHEDULER_MASK_PP)
+
+#endif /* __MALI_SCHEDULER_TYPES_H__ */
diff --git a/mali/common/mali_session.c b/mali/common/mali_session.c
new file mode 100755
index 0000000..95819f6
--- /dev/null
+++ b/mali/common/mali_session.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/sched.h>
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_session.h"
+#include "mali_ukk.h"
+#ifdef MALI_MEM_SWAP_TRACKING
+#include "mali_memory_swap_alloc.h"
+#endif
+
+_MALI_OSK_LIST_HEAD(mali_sessions);
+static u32 mali_session_count = 0;
+
+_mali_osk_spinlock_irq_t *mali_sessions_lock = NULL;
+wait_queue_head_t pending_queue;
+
+_mali_osk_errcode_t mali_session_initialize(void)
+{
+	_MALI_OSK_INIT_LIST_HEAD(&mali_sessions);
+	/* init wait queue for big varying job */
+	init_waitqueue_head(&pending_queue);
+
+	mali_sessions_lock = _mali_osk_spinlock_irq_init(
+				     _MALI_OSK_LOCKFLAG_ORDERED,
+				     _MALI_OSK_LOCK_ORDER_SESSIONS);
+	if (NULL == mali_sessions_lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_session_terminate(void)
+{
+	if (NULL != mali_sessions_lock) {
+		_mali_osk_spinlock_irq_term(mali_sessions_lock);
+		mali_sessions_lock = NULL;
+	}
+}
+
+void mali_session_add(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_add(&session->link, &mali_sessions);
+	mali_session_count++;
+	mali_session_unlock();
+}
+
+void mali_session_remove(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_delinit(&session->link);
+	mali_session_count--;
+	mali_session_unlock();
+}
+
+u32 mali_session_get_count(void)
+{
+	return mali_session_count;
+}
+
+mali_bool mali_session_pp_job_is_empty(void *data)
+{
+	struct mali_session_data *session = (struct mali_session_data *)data;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 == _mali_osk_atomic_read(&session->number_of_pp_jobs)) {
+		return MALI_TRUE;
+	}
+	return MALI_FALSE;
+}
+
+wait_queue_head_t *mali_session_get_wait_queue(void)
+{
+	return &pending_queue;
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in window render frame per sec calculate
+ */
+#if defined(CONFIG_MALI_DVFS)
+u32 mali_session_max_window_num(void)
+{
+	struct mali_session_data *session, *tmp;
+	u32 max_window_num = 0;
+	u32 tmp_number = 0;
+
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		tmp_number = _mali_osk_atomic_xchg(
+				     &session->number_of_window_jobs, 0);
+		if (max_window_num < tmp_number) {
+			max_window_num = tmp_number;
+		}
+	}
+
+	mali_session_unlock();
+
+	return max_window_num;
+}
+#endif
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx)
+{
+	struct mali_session_data *session, *tmp;
+	char task_comm[TASK_COMM_LEN];
+	char task_default[] = "not found";
+	struct task_struct *ttask;
+	u32 mali_mem_usage;
+	u32 total_mali_mem_size;
+#ifdef MALI_MEM_SWAP_TRACKING
+	u32 swap_pool_size;
+	u32 swap_unlock_size;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(print_ctx);
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		ttask = pid_task(find_vpid(session->pid), PIDTYPE_PID);
+		if (ttask)
+			strncpy(task_comm, ttask->comm, sizeof(ttask->comm));
+		else
+			strncpy(task_comm, task_default, sizeof(task_default));
+#ifdef MALI_MEM_SWAP_TRACKING
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u %-25s %-10u  %-15u  %-15u  %-10u  %-10u %-10u\n",
+				    session->comm, session->pid, task_comm,
+				    (unsigned int)((atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)session->max_mali_mem_allocated_size,
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_SWAP])) * _MALI_OSK_MALI_PAGE_SIZE)
+				   );
+#else
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u %-25s %-10u  %-15u  %-15u  %-10u  %-10u\n",
+				    session->comm, session->pid, task_comm,
+				    (unsigned int)((atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)session->max_mali_mem_allocated_size,
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE)
+				   );
+
+#endif
+	}
+	mali_session_unlock();
+	mali_mem_usage  = _mali_ukk_report_memory_usage();
+	total_mali_mem_size = _mali_ukk_report_total_memory_size();
+	_mali_osk_ctxprintf(print_ctx, "Mali mem usage: %u\nMali mem limit: %u\n", mali_mem_usage, total_mali_mem_size);
+#ifdef MALI_MEM_SWAP_TRACKING
+	mali_mem_swap_tracking(&swap_pool_size, &swap_unlock_size);
+	_mali_osk_ctxprintf(print_ctx, "Mali swap mem pool : %u\nMali swap mem unlock: %u\n", swap_pool_size, swap_unlock_size);
+#endif
+}
diff --git a/mali/common/mali_session.h b/mali/common/mali_session.h
new file mode 100755
index 0000000..f669a31
--- /dev/null
+++ b/mali/common/mali_session.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SESSION_H__
+#define __MALI_SESSION_H__
+
+#include "mali_mmu_page_directory.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_memory_types.h"
+#include "mali_memory_manager.h"
+
+struct mali_timeline_system;
+struct mali_soft_system;
+
+/* Number of frame builder job lists per session. */
+#define MALI_PP_JOB_FB_LOOKUP_LIST_SIZE 16
+#define MALI_PP_JOB_FB_LOOKUP_LIST_MASK (MALI_PP_JOB_FB_LOOKUP_LIST_SIZE - 1)
+/*Max pending big job allowed in kernel*/
+#define MALI_MAX_PENDING_BIG_JOB (2)
+
+struct mali_session_data {
+	_mali_osk_notification_queue_t *ioctl_queue;
+
+	_mali_osk_wait_queue_t *wait_queue; /**The wait queue to wait for the number of pp job become 0.*/
+
+	_mali_osk_mutex_t *memory_lock; /**< Lock protecting the vm manipulation */
+	_mali_osk_mutex_t *cow_lock; /** < Lock protecting the cow memory free manipulation */
+#if 0
+	_mali_osk_list_t memory_head; /**< Track all the memory allocated in this session, for freeing on abnormal termination */
+#endif
+	struct mali_page_directory *page_directory; /**< MMU page directory for this session */
+
+	_MALI_OSK_LIST_HEAD(link); /**< Link for list of all sessions */
+	_MALI_OSK_LIST_HEAD(pp_job_list); /**< List of all PP jobs on this session */
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_t number_of_window_jobs; /**< Record the window jobs completed on this session in a period */
+#endif
+	_mali_osk_atomic_t number_of_pp_jobs; /** < Record the pp jobs on this session */
+
+	_mali_osk_list_t pp_job_fb_lookup_list[MALI_PP_JOB_FB_LOOKUP_LIST_SIZE]; /**< List of PP job lists per frame builder id.  Used to link jobs from same frame builder. */
+	struct mali_soft_job_system *soft_job_system; /**< Soft job system for this session. */
+	struct mali_timeline_system *timeline_system; /**< Timeline system for this session. */
+
+	mali_bool is_aborting; /**< MALI_TRUE if the session is aborting, MALI_FALSE if not. */
+	mali_bool use_high_priority_job_queue; /**< If MALI_TRUE, jobs added from this session will use the high priority job queues. */
+	u32 pid;
+	char *comm;
+	atomic_t mali_mem_array[MALI_MEM_TYPE_MAX]; /**< The array to record mem types' usage for this session. */
+	atomic_t mali_mem_allocated_pages; /** The current allocated mali memory pages, which include mali os memory and mali dedicated memory.*/
+	size_t max_mali_mem_allocated_size; /**< The past max mali memory allocated size, which include mali os memory and mali dedicated memory. */
+	/* Added for new memroy system */
+	struct mali_allocation_manager allocation_mgr;
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	u32 fence_context;      /** <  The execution dma fence context this fence is run on. */
+	_mali_osk_atomic_t fence_seqno; /** < Alinear increasing sequence number for this dma fence context. */
+#endif
+};
+
+_mali_osk_errcode_t mali_session_initialize(void);
+void mali_session_terminate(void);
+
+/* List of all sessions. Actual list head in mali_kernel_core.c */
+extern _mali_osk_list_t mali_sessions;
+/* Lock to protect modification and access to the mali_sessions list */
+extern _mali_osk_spinlock_irq_t *mali_sessions_lock;
+
+MALI_STATIC_INLINE void mali_session_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_sessions_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(mali_sessions_lock);
+}
+
+void mali_session_add(struct mali_session_data *session);
+void mali_session_remove(struct mali_session_data *session);
+u32 mali_session_get_count(void);
+mali_bool mali_session_pp_job_is_empty(void *data);
+wait_queue_head_t *mali_session_get_wait_queue(void);
+
+#define MALI_SESSION_FOREACH(session, tmp, link) \
+	_MALI_OSK_LIST_FOREACHENTRY(session, tmp, &mali_sessions, struct mali_session_data, link)
+
+MALI_STATIC_INLINE struct mali_page_directory *mali_session_get_page_directory(struct mali_session_data *session)
+{
+	return session->page_directory;
+}
+
+MALI_STATIC_INLINE void mali_session_memory_lock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_wait(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_memory_unlock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_signal(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_send_notification(struct mali_session_data *session, _mali_osk_notification_t *object)
+{
+	_mali_osk_notification_queue_send(session->ioctl_queue, object);
+}
+
+#if defined(CONFIG_MALI_DVFS)
+
+MALI_STATIC_INLINE void mali_session_inc_num_window_jobs(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_atomic_inc(&session->number_of_window_jobs);
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in  window render frame per sec calculate
+ */
+u32 mali_session_max_window_num(void);
+
+#endif
+
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx);
+
+#endif /* __MALI_SESSION_H__ */
diff --git a/mali/common/mali_soft_job.c b/mali/common/mali_soft_job.c
new file mode 100755
index 0000000..739ec78
--- /dev/null
+++ b/mali/common/mali_soft_job.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_soft_job.h"
+#include "mali_osk.h"
+#include "mali_timeline.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+
+MALI_STATIC_INLINE void mali_soft_job_system_lock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	_mali_osk_spinlock_irq_lock(system->lock);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: soft system %p lock taken\n", system));
+	MALI_DEBUG_ASSERT(0 == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = _mali_osk_get_tid());
+}
+
+MALI_STATIC_INLINE void mali_soft_job_system_unlock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: releasing soft system %p lock\n", system));
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = 0);
+	_mali_osk_spinlock_irq_unlock(system->lock);
+}
+
+#if defined(DEBUG)
+MALI_STATIC_INLINE void mali_soft_job_system_assert_locked(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+}
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system) mali_soft_job_system_assert_locked(system)
+#else
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system)
+#endif /* defined(DEBUG) */
+
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session)
+{
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	system = (struct mali_soft_job_system *) _mali_osk_calloc(1, sizeof(struct mali_soft_job_system));
+	if (NULL == system) {
+		return NULL;
+	}
+
+	system->session = session;
+
+	system->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (NULL == system->lock) {
+		mali_soft_job_system_destroy(system);
+		return NULL;
+	}
+	system->lock_owner = 0;
+	system->last_job_id = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&(system->jobs_used));
+
+	return system;
+}
+
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	/* All jobs should be free at this point. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&(system->jobs_used)));
+
+	if (NULL != system) {
+		if (NULL != system->lock) {
+			_mali_osk_spinlock_irq_term(system->lock);
+		}
+		_mali_osk_free(system);
+	}
+}
+
+static void mali_soft_job_system_free_job(struct mali_soft_job_system *system, struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id);
+	MALI_DEBUG_ASSERT(system == job->system);
+
+	_mali_osk_list_del(&(job->system_list));
+
+	mali_soft_job_system_unlock(job->system);
+
+	_mali_osk_free(job);
+}
+
+MALI_STATIC_INLINE struct mali_soft_job *mali_soft_job_system_lookup_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job, *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		if (job->id == job_id)
+			return job;
+	}
+
+	return NULL;
+}
+
+void mali_soft_job_destroy(struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: destroying soft job %u (0x%08X)\n", job->id, job));
+
+	if (NULL != job) {
+		if (0 < _mali_osk_atomic_dec_return(&job->refcount)) return;
+
+		_mali_osk_atomic_term(&job->refcount);
+
+		if (NULL != job->activated_notification) {
+			_mali_osk_notification_delete(job->activated_notification);
+			job->activated_notification = NULL;
+		}
+
+		mali_soft_job_system_free_job(job->system, job);
+	}
+}
+
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job)
+{
+	struct mali_soft_job *job;
+	_mali_osk_notification_t *notification = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT((MALI_SOFT_JOB_TYPE_USER_SIGNALED == type) ||
+			  (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == type));
+
+	notification = _mali_osk_notification_create(_MALI_NOTIFICATION_SOFT_ACTIVATED, sizeof(_mali_uk_soft_job_activated_s));
+	if (unlikely(NULL == notification)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to allocate notification"));
+		return NULL;
+	}
+
+	job = _mali_osk_malloc(sizeof(struct mali_soft_job));
+	if (unlikely(NULL == job)) {
+		MALI_DEBUG_PRINT(2, ("Mali Soft Job: system alloc job failed. \n"));
+		return NULL;
+	}
+
+	mali_soft_job_system_lock(system);
+
+	job->system = system;
+	job->id = system->last_job_id++;
+	job->state = MALI_SOFT_JOB_STATE_ALLOCATED;
+
+	_mali_osk_list_add(&(job->system_list), &(system->jobs_used));
+
+	job->type = type;
+	job->user_job = user_job;
+	job->activated = MALI_FALSE;
+
+	job->activated_notification = notification;
+
+	_mali_osk_atomic_init(&job->refcount, 1);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state);
+	MALI_DEBUG_ASSERT(system == job->system);
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id);
+
+	mali_soft_job_system_unlock(system);
+
+	return job;
+}
+
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence)
+{
+	mali_timeline_point point;
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	system = job->system;
+
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(system->session->timeline_system);
+
+	mali_soft_job_system_lock(system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state);
+	job->state = MALI_SOFT_JOB_STATE_STARTED;
+
+	mali_soft_job_system_unlock(system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: starting soft job %u (0x%08X)\n", job->id, job));
+
+	mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_SOFT, fence, job);
+	point = mali_timeline_system_add_tracker(system->session->timeline_system, &job->tracker, MALI_TIMELINE_SOFT);
+
+	return point;
+}
+
+static mali_bool mali_soft_job_is_activated(void *data)
+{
+	struct mali_soft_job *job;
+
+	job = (struct mali_soft_job *) data;
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	return job->activated;
+}
+
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job;
+	struct mali_timeline_system *timeline_system;
+	mali_scheduler_mask schedule_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(system);
+
+	job = mali_soft_job_system_lookup_job(system, job_id);
+
+	if ((NULL == job) || (MALI_SOFT_JOB_TYPE_USER_SIGNALED != job->type)
+	    || !(MALI_SOFT_JOB_STATE_STARTED == job->state || MALI_SOFT_JOB_STATE_TIMED_OUT == job->state)) {
+		mali_soft_job_system_unlock(system);
+		MALI_PRINT_ERROR(("Mali Soft Job: invalid soft job id %u", job_id));
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) {
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(system);
+
+		MALI_DEBUG_ASSERT(MALI_TRUE == job->activated);
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: soft job %u (0x%08X) was timed out\n", job->id, job));
+		mali_soft_job_destroy(job);
+
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+	job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+	mali_soft_job_system_unlock(system);
+
+	/* Since the job now is in signaled state, timeouts from the timeline system will be
+	 * ignored, and it is not possible to signal this job again. */
+
+	timeline_system = system->session->timeline_system;
+	MALI_DEBUG_ASSERT_POINTER(timeline_system);
+
+	/* Wait until activated. */
+	_mali_osk_wait_queue_wait_event(timeline_system->wait_queue, mali_soft_job_is_activated, (void *) job);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: signaling soft job %u (0x%08X)\n", job->id, job));
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+
+	mali_soft_job_destroy(job);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_soft_job_send_activated_notification(struct mali_soft_job *job)
+{
+	if (NULL != job->activated_notification) {
+		_mali_uk_soft_job_activated_s *res = job->activated_notification->result_buffer;
+		res->user_job = job->user_job;
+		mali_session_send_notification(job->system->session, job->activated_notification);
+	}
+	job->activated_notification = NULL;
+}
+
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline activation for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		MALI_DEBUG_PRINT(3, ("Mali Soft Job: Soft job %u (0x%08X) activated while session is aborting.\n", job->id, job));
+
+		mali_soft_job_system_unlock(job->system);
+
+		/* Since we are in shutdown, we can ignore the scheduling bitmask. */
+		mali_timeline_tracker_release(&job->tracker);
+		mali_soft_job_destroy(job);
+		return schedule_mask;
+	}
+
+	/* Send activated notification. */
+	mali_soft_job_send_activated_notification(job);
+
+	/* Wake up sleeping signaler. */
+	job->activated = MALI_TRUE;
+
+	/* If job type is self signaled, release tracker, move soft job to free list, and scheduler at once */
+	if (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == job->type) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(job->system);
+
+		schedule_mask |= mali_timeline_tracker_release(&job->tracker);
+
+		mali_soft_job_destroy(job);
+	} else {
+		_mali_osk_wait_queue_wake_up(job->tracker.system->wait_queue);
+
+		mali_soft_job_system_unlock(job->system);
+	}
+
+	return schedule_mask;
+}
+
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+	MALI_DEBUG_ASSERT(MALI_TRUE == job->activated);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline timeout for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED  == job->state ||
+			  MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		/* The session is aborting.  This job will be released and destroyed by @ref
+		 * mali_soft_job_system_abort(). */
+		mali_soft_job_system_unlock(job->system);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	if (MALI_SOFT_JOB_STATE_STARTED != job->state) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+		/* The job is about to be signaled, ignore timeout. */
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeout on soft job %u (0x%08X) in signaled state.\n", job->id, job));
+		mali_soft_job_system_unlock(job->system);
+		return schedule_mask;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+	job->state = MALI_SOFT_JOB_STATE_TIMED_OUT;
+	_mali_osk_atomic_inc(&job->refcount);
+
+	mali_soft_job_system_unlock(job->system);
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+
+	mali_soft_job_destroy(job);
+
+	return schedule_mask;
+}
+
+void mali_soft_job_system_abort(struct mali_soft_job_system *system)
+{
+	struct mali_soft_job *job, *tmp;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(jobs);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting soft job system for session 0x%08X.\n", system->session));
+
+	mali_soft_job_system_lock(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED   == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (MALI_SOFT_JOB_STATE_STARTED == job->state) {
+			/* If the job has been activated, we have to release the tracker and destroy
+			 * the job.  If not, the tracker will be released and the job destroyed when
+			 * it is activated. */
+			if (MALI_TRUE == job->activated) {
+				MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting unsignaled soft job %u (0x%08X).\n", job->id, job));
+
+				job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+				_mali_osk_list_move(&job->system_list, &jobs);
+			}
+		} else if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) {
+			MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting timed out soft job %u (0x%08X).\n", job->id, job));
+
+			/* We need to destroy this soft job. */
+			_mali_osk_list_move(&job->system_list, &jobs);
+		}
+	}
+
+	mali_soft_job_system_unlock(system);
+
+	/* Release and destroy jobs. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &jobs, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED  == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (MALI_SOFT_JOB_STATE_SIGNALED == job->state) {
+			mali_timeline_tracker_release(&job->tracker);
+		}
+
+		/* Move job back to used list before destroying. */
+		_mali_osk_list_move(&job->system_list, &system->jobs_used);
+
+		mali_soft_job_destroy(job);
+	}
+
+	mali_executor_schedule_from_mask(MALI_SCHEDULER_MASK_ALL, MALI_FALSE);
+}
diff --git a/mali/common/mali_soft_job.h b/mali/common/mali_soft_job.h
new file mode 100755
index 0000000..4dd0589
--- /dev/null
+++ b/mali/common/mali_soft_job.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SOFT_JOB_H__
+#define __MALI_SOFT_JOB_H__
+
+#include "mali_osk.h"
+
+#include "mali_timeline.h"
+
+struct mali_timeline_fence;
+struct mali_session_data;
+struct mali_soft_job;
+struct mali_soft_job_system;
+
+/**
+ * Soft job types.
+ *
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED will only complete after activation if either
+ * they are signaled by user-space (@ref mali_soft_job_system_signaled_job) or if they are timed out
+ * by the Timeline system.
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_SELF_SIGNALED will release job resource automatically
+ * in kernel when the job is activated.
+ */
+typedef enum mali_soft_job_type {
+	MALI_SOFT_JOB_TYPE_SELF_SIGNALED,
+	MALI_SOFT_JOB_TYPE_USER_SIGNALED,
+} mali_soft_job_type;
+
+/**
+ * Soft job state.
+ *
+ * mali_soft_job_system_start_job a job will first be allocated.The job's state set to MALI_SOFT_JOB_STATE_ALLOCATED.
+ * Once the job is added to the timeline system, the state changes to MALI_SOFT_JOB_STATE_STARTED.
+ *
+ * For soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED the state is changed to
+ * MALI_SOFT_JOB_STATE_SIGNALED when @ref mali_soft_job_system_signal_job is called and the soft
+ * job's state is MALI_SOFT_JOB_STATE_STARTED or MALI_SOFT_JOB_STATE_TIMED_OUT.
+ *
+ * If a soft job of type MALI_SOFT_JOB_TYPE_USER_SIGNALED is timed out before being signaled, the
+ * state is changed to MALI_SOFT_JOB_STATE_TIMED_OUT.  This can only happen to soft jobs in state
+ * MALI_SOFT_JOB_STATE_STARTED.
+ *
+ */
+typedef enum mali_soft_job_state {
+	MALI_SOFT_JOB_STATE_ALLOCATED,
+	MALI_SOFT_JOB_STATE_STARTED,
+	MALI_SOFT_JOB_STATE_SIGNALED,
+	MALI_SOFT_JOB_STATE_TIMED_OUT,
+} mali_soft_job_state;
+
+#define MALI_SOFT_JOB_INVALID_ID ((u32) -1)
+
+/**
+ * Soft job struct.
+ *
+ * Soft job can be used to represent any kind of CPU work done in kernel-space.
+ */
+typedef struct mali_soft_job {
+	mali_soft_job_type            type;                   /**< Soft job type.  Must be one of MALI_SOFT_JOB_TYPE_*. */
+	u64                           user_job;               /**< Identifier for soft job in user space. */
+	_mali_osk_atomic_t            refcount;               /**< Soft jobs are reference counted to prevent premature deletion. */
+	struct mali_timeline_tracker  tracker;                /**< Timeline tracker for soft job. */
+	mali_bool                     activated;              /**< MALI_TRUE if the job has been activated, MALI_FALSE if not. */
+	_mali_osk_notification_t     *activated_notification; /**< Pre-allocated notification object for ACTIVATED_NOTIFICATION. */
+
+	/* Protected by soft job system lock. */
+	u32                           id;                     /**< Used by user-space to find corresponding soft job in kernel-space. */
+	mali_soft_job_state           state;                  /**< State of soft job, must be one of MALI_SOFT_JOB_STATE_*. */
+	struct mali_soft_job_system  *system;                 /**< The soft job system this job is in. */
+	_mali_osk_list_t              system_list;            /**< List element used by soft job system. */
+} mali_soft_job;
+
+/**
+ * Per-session soft job system.
+ *
+ * The soft job system is used to manage all soft jobs that belongs to a session.
+ */
+typedef struct mali_soft_job_system {
+	struct mali_session_data *session;                    /**< The session this soft job system belongs to. */
+	_MALI_OSK_LIST_HEAD(jobs_used);                       /**< List of all allocated soft jobs. */
+
+	_mali_osk_spinlock_irq_t *lock;                       /**< Lock used to protect soft job system and its soft jobs. */
+	u32 lock_owner;                                       /**< Contains tid of thread that locked the system or 0, if not locked. */
+	u32 last_job_id;                                      /**< Recored the last job id protected by lock. */
+} mali_soft_job_system;
+
+/**
+ * Create a soft job system.
+ *
+ * @param session The session this soft job system will belong to.
+ * @return The new soft job system, or NULL if unsuccessful.
+ */
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session);
+
+/**
+ * Destroy a soft job system.
+ *
+ * @note The soft job must not have any started or activated jobs.  Call @ref
+ * mali_soft_job_system_abort first.
+ *
+ * @param system The soft job system we are destroying.
+ */
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system);
+
+/**
+ * Create a soft job.
+ *
+ * @param system Soft job system to create soft job from.
+ * @param type Type of the soft job.
+ * @param user_job Identifier for soft job in user space.
+ * @return New soft job if successful, NULL if not.
+ */
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job);
+
+/**
+ * Destroy soft job.
+ *
+ * @param job Soft job to destroy.
+ */
+void mali_soft_job_destroy(struct mali_soft_job *job);
+
+/**
+ * Start a soft job.
+ *
+ * The soft job will be added to the Timeline system which will then activate it after all
+ * dependencies have been resolved.
+ *
+ * Create soft jobs with @ref mali_soft_job_create before starting them.
+ *
+ * @param job Soft job to start.
+ * @param fence Fence representing dependencies for this soft job.
+ * @return Point on soft job timeline.
+ */
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence);
+
+/**
+ * Use by user-space to signal that a soft job has completed.
+ *
+ * @note Only valid for soft jobs with type MALI_SOFT_JOB_TYPE_USER_SIGNALED.
+ *
+ * @note The soft job must be in state MALI_SOFT_JOB_STATE_STARTED for the signal to be successful.
+ *
+ * @note If the soft job was signaled successfully, or it received a time out, the soft job will be
+ * destroyed after this call and should no longer be used.
+ *
+ * @note This function will block until the soft job has been activated.
+ *
+ * @param system The soft job system the job was started in.
+ * @param job_id ID of soft job we are signaling.
+ *
+ * @return _MALI_OSK_ERR_ITEM_NOT_FOUND if the soft job ID was invalid, _MALI_OSK_ERR_TIMEOUT if the
+ * soft job was timed out or _MALI_OSK_ERR_OK if we successfully signaled the soft job.
+ */
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id);
+
+/**
+ * Used by the Timeline system to activate a soft job.
+ *
+ * @param job The soft job that is being activated.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job);
+
+/**
+ * Used by the Timeline system to timeout a soft job.
+ *
+ * A soft job is timed out if it completes or is signaled later than MALI_TIMELINE_TIMEOUT_HZ after
+ * activation.
+ *
+ * @param job The soft job that is being timed out.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job);
+
+/**
+ * Used to cleanup activated soft jobs in the soft job system on session abort.
+ *
+ * @param system The soft job system that is being aborted.
+ */
+void mali_soft_job_system_abort(struct mali_soft_job_system *system);
+
+#endif /* __MALI_SOFT_JOB_H__ */
diff --git a/mali/common/mali_spinlock_reentrant.c b/mali/common/mali_spinlock_reentrant.c
new file mode 100755
index 0000000..28e2e17
--- /dev/null
+++ b/mali/common/mali_spinlock_reentrant.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_spinlock_reentrant.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order)
+{
+	struct mali_spinlock_reentrant *spinlock;
+
+	spinlock = _mali_osk_calloc(1, sizeof(struct mali_spinlock_reentrant));
+	if (NULL == spinlock) {
+		return NULL;
+	}
+
+	spinlock->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, lock_order);
+	if (NULL == spinlock->lock) {
+		mali_spinlock_reentrant_term(spinlock);
+		return NULL;
+	}
+
+	return spinlock;
+}
+
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT(0 == spinlock->counter && 0 == spinlock->owner);
+
+	if (NULL != spinlock->lock) {
+		_mali_osk_spinlock_irq_term(spinlock->lock);
+	}
+
+	_mali_osk_free(spinlock);
+}
+
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid);
+
+	MALI_DEBUG_PRINT(5, ("%s ^\n", __FUNCTION__));
+
+	if (tid != spinlock->owner) {
+		_mali_osk_spinlock_irq_lock(spinlock->lock);
+		MALI_DEBUG_ASSERT(0 == spinlock->owner && 0 == spinlock->counter);
+		spinlock->owner = tid;
+	}
+
+	MALI_DEBUG_PRINT(5, ("%s v\n", __FUNCTION__));
+
+	++spinlock->counter;
+}
+
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid && tid == spinlock->owner);
+
+	--spinlock->counter;
+	if (0 == spinlock->counter) {
+		spinlock->owner = 0;
+		MALI_DEBUG_PRINT(5, ("%s release last\n", __FUNCTION__));
+		_mali_osk_spinlock_irq_unlock(spinlock->lock);
+	}
+}
diff --git a/mali/common/mali_spinlock_reentrant.h b/mali/common/mali_spinlock_reentrant.h
new file mode 100755
index 0000000..793875a
--- /dev/null
+++ b/mali/common/mali_spinlock_reentrant.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SPINLOCK_REENTRANT_H__
+#define __MALI_SPINLOCK_REENTRANT_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * Reentrant spinlock.
+ */
+struct mali_spinlock_reentrant {
+	_mali_osk_spinlock_irq_t *lock;
+	u32               owner;
+	u32               counter;
+};
+
+/**
+ * Create a new reentrant spinlock.
+ *
+ * @param lock_order Lock order.
+ * @return New reentrant spinlock.
+ */
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order);
+
+/**
+ * Terminate reentrant spinlock and free any associated resources.
+ *
+ * @param spinlock Reentrant spinlock to terminate.
+ */
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock);
+
+/**
+ * Wait for reentrant spinlock to be signaled.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Signal reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Check if thread is holding reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ * @return MALI_TRUE if thread is holding spinlock, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_spinlock_reentrant_is_held(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	return (tid == spinlock->owner && 0 < spinlock->counter);
+}
+
+#endif /* __MALI_SPINLOCK_REENTRANT_H__ */
diff --git a/mali/common/mali_timeline.c b/mali/common/mali_timeline.c
new file mode 100755
index 0000000..4ed5f86
--- /dev/null
+++ b/mali/common/mali_timeline.c
@@ -0,0 +1,1911 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/file.h>
+#include "mali_timeline.h"
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_soft_job.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+
+#define MALI_TIMELINE_SYSTEM_LOCKED(system) (mali_spinlock_reentrant_is_held((system)->spinlock, _mali_osk_get_tid()))
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+_mali_osk_wq_work_t *sync_fence_callback_work_t = NULL;
+_mali_osk_spinlock_irq_t *sync_fence_callback_list_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(sync_fence_callback_queue);
+#endif
+
+/*
+ * Following three elements are used to record how many
+ * gp, physical pp or virtual pp jobs are delayed in the whole
+ * timeline system, we can use these three value to decide
+ * if need to deactivate idle group.
+ */
+_mali_osk_atomic_t gp_tracker_count;
+_mali_osk_atomic_t phy_pp_tracker_count;
+_mali_osk_atomic_t virt_pp_tracker_count;
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter);
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+struct mali_deferred_fence_put_entry {
+	struct hlist_node list;
+	struct sync_fence *fence;
+};
+
+static HLIST_HEAD(mali_timeline_sync_fence_to_free_list);
+static DEFINE_SPINLOCK(mali_timeline_sync_fence_to_free_lock);
+
+static void put_sync_fences(struct work_struct *ignore)
+{
+	struct hlist_head list;
+	struct hlist_node *tmp, *pos;
+	unsigned long flags;
+	struct mali_deferred_fence_put_entry *o;
+
+	spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+	hlist_move_list(&mali_timeline_sync_fence_to_free_list, &list);
+	spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+	hlist_for_each_entry_safe(o, pos, tmp, &list, list) {
+		sync_fence_put(o->fence);
+		kfree(o);
+	}
+}
+
+static DECLARE_DELAYED_WORK(delayed_sync_fence_put, put_sync_fences);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+/* Callback that is called when a sync fence a tracker is waiting on is signaled. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+static void mali_timeline_sync_fence_callback(struct sync_fence *sync_fence, struct sync_fence_waiter *sync_fence_waiter)
+#else
+static void mali_timeline_sync_fence_callback(struct mali_internal_sync_fence *sync_fence, struct mali_internal_sync_fence_waiter *sync_fence_waiter)
+#endif
+{
+	struct mali_timeline_tracker *tracker;
+
+	MALI_IGNORE(sync_fence);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_waiter);
+
+	tracker = _MALI_OSK_CONTAINER_OF(sync_fence_waiter, struct mali_timeline_tracker, sync_fence_waiter);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	_mali_osk_spinlock_irq_lock(sync_fence_callback_list_lock);
+	_mali_osk_list_addtail(&tracker->sync_fence_signal_list, &sync_fence_callback_queue);
+	_mali_osk_spinlock_irq_unlock(sync_fence_callback_list_lock);
+
+	_mali_osk_wq_schedule_work(sync_fence_callback_work_t);
+}
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+static mali_scheduler_mask mali_timeline_tracker_time_out(struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_SOFT == tracker->type);
+
+	return mali_soft_job_system_timeout_job((struct mali_soft_job *) tracker->job);
+}
+
+static void mali_timeline_timer_callback(void *data)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker;
+	struct mali_timeline *timeline;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	timeline = (struct mali_timeline *) data;
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	if (!system->timer_enabled) {
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		return;
+	}
+
+	tracker = timeline->tracker_tail;
+	timeline->timer_active = MALI_FALSE;
+
+	if (NULL != tracker && MALI_TRUE == tracker->timer_active) {
+		/* This is likely the delayed work that has been schedule out before cancelled. */
+		if (MALI_TIMELINE_TIMEOUT_HZ > (_mali_osk_time_tickcount() - tracker->os_tick_activate)) {
+			mali_spinlock_reentrant_signal(system->spinlock, tid);
+			return;
+		}
+
+		schedule_mask = mali_timeline_tracker_time_out(tracker);
+		tracker->timer_active = MALI_FALSE;
+	} else {
+		MALI_PRINT_ERROR(("Mali Timeline: Soft job timer callback without a waiting tracker.\n"));
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+	system->timer_enabled = MALI_FALSE;
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (NULL != timeline->delayed_work) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			timeline->timer_active = MALI_FALSE;
+		}
+	}
+}
+
+static void mali_timeline_destroy(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	if (NULL != timeline) {
+		/* Assert that the timeline object has been properly cleaned up before destroying it. */
+		MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_head);
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+		MALI_DEBUG_ASSERT(NULL == timeline->waiter_head);
+		MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail);
+		MALI_DEBUG_ASSERT(NULL != timeline->system);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_MAX > timeline->id);
+
+		if (NULL != timeline->delayed_work) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			_mali_osk_wq_delayed_delete_work_nonflush(timeline->delayed_work);
+		}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+		if (NULL != timeline->sync_tl) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			sync_timeline_destroy(timeline->sync_tl);
+#else
+			mali_internal_sync_timeline_destroy(timeline->sync_tl);
+#endif
+		}
+#else
+		_mali_osk_free(timeline);
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+	}
+}
+
+static struct mali_timeline *mali_timeline_create(struct mali_timeline_system *system, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(id < MALI_TIMELINE_MAX);
+
+	timeline = (struct mali_timeline *) _mali_osk_calloc(1, sizeof(struct mali_timeline));
+	if (NULL == timeline) {
+		return NULL;
+	}
+
+	/* Initially the timeline is empty. */
+#if defined(MALI_TIMELINE_DEBUG_START_POINT)
+	/* Start the timeline a bit before wrapping when debugging. */
+	timeline->point_next = UINT_MAX - MALI_TIMELINE_MAX_POINT_SPAN - 128;
+#else
+	timeline->point_next = 1;
+#endif
+	timeline->point_oldest = timeline->point_next;
+
+	/* The tracker and waiter lists will initially be empty. */
+
+	timeline->system = system;
+	timeline->id = id;
+
+	timeline->delayed_work = _mali_osk_wq_delayed_create_work(mali_timeline_timer_callback, timeline);
+	if (NULL == timeline->delayed_work) {
+		mali_timeline_destroy(timeline);
+		return NULL;
+	}
+
+	timeline->timer_active = MALI_FALSE;
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	{
+		char timeline_name[32];
+
+		switch (id) {
+		case MALI_TIMELINE_GP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-gp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_PP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-pp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_SOFT:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-soft", _mali_osk_get_pid());
+			break;
+		default:
+			MALI_PRINT_ERROR(("Mali Timeline: Invalid timeline id %d\n", id));
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->destroyed = MALI_FALSE;
+
+		timeline->sync_tl = mali_sync_timeline_create(timeline, timeline_name);
+		if (NULL == timeline->sync_tl) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+		if (NULL == timeline->spinlock) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+	}
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+	return timeline;
+}
+
+static void mali_timeline_insert_tracker(struct mali_timeline *timeline, struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	if (mali_timeline_is_full(timeline)) {
+		/* Don't add tracker if timeline is full. */
+		tracker->point = MALI_TIMELINE_NO_POINT;
+		return;
+	}
+
+	tracker->timeline = timeline;
+	tracker->point    = timeline->point_next;
+
+	/* Find next available point. */
+	timeline->point_next++;
+	if (MALI_TIMELINE_NO_POINT == timeline->point_next) {
+		timeline->point_next++;
+	}
+
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+
+	if (MALI_TIMELINE_TRACKER_GP == tracker->type) {
+		_mali_osk_atomic_inc(&gp_tracker_count);
+	} else if (MALI_TIMELINE_TRACKER_PP == tracker->type) {
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_inc(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_inc(&phy_pp_tracker_count);
+		}
+	}
+
+	/* Add tracker as new head on timeline's tracker list. */
+	if (NULL == timeline->tracker_head) {
+		/* Tracker list is empty. */
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+
+		timeline->tracker_tail = tracker;
+
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_next);
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_prev);
+	} else {
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next);
+
+		tracker->timeline_prev = timeline->tracker_head;
+		timeline->tracker_head->timeline_next = tracker;
+
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_next);
+	}
+	timeline->tracker_head = tracker;
+
+	MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next);
+	MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail->timeline_prev);
+}
+
+/* Inserting the waiter object into the given timeline */
+static void mali_timeline_insert_waiter(struct mali_timeline *timeline, struct mali_timeline_waiter *waiter_new)
+{
+	struct mali_timeline_waiter *waiter_prev;
+	struct mali_timeline_waiter *waiter_next;
+
+	/* Waiter time must be between timeline head and tail, and there must
+	 * be less than MALI_TIMELINE_MAX_POINT_SPAN elements between */
+	MALI_DEBUG_ASSERT((waiter_new->point - timeline->point_oldest) < MALI_TIMELINE_MAX_POINT_SPAN);
+	MALI_DEBUG_ASSERT((-waiter_new->point + timeline->point_next) < MALI_TIMELINE_MAX_POINT_SPAN);
+
+	/* Finding out where to put this waiter, in the linked waiter list of the given timeline **/
+	waiter_prev = timeline->waiter_head; /* Insert new after  waiter_prev */
+	waiter_next = NULL;                  /* Insert new before waiter_next */
+
+	/* Iterating backwards from head (newest) to tail (oldest) until we
+	 * find the correct spot to insert the new waiter */
+	while (waiter_prev && mali_timeline_point_after(waiter_prev->point, waiter_new->point)) {
+		waiter_next = waiter_prev;
+		waiter_prev = waiter_prev->timeline_prev;
+	}
+
+	if (NULL == waiter_prev && NULL == waiter_next) {
+		/* list is empty */
+		timeline->waiter_head = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else if (NULL == waiter_next) {
+		/* insert at head */
+		waiter_new->timeline_prev = timeline->waiter_head;
+		timeline->waiter_head->timeline_next = waiter_new;
+		timeline->waiter_head = waiter_new;
+	} else if (NULL == waiter_prev) {
+		/* insert at tail */
+		waiter_new->timeline_next = timeline->waiter_tail;
+		timeline->waiter_tail->timeline_prev = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else {
+		/* insert between */
+		waiter_new->timeline_next = waiter_next;
+		waiter_new->timeline_prev = waiter_prev;
+		waiter_next->timeline_prev = waiter_new;
+		waiter_prev->timeline_next = waiter_new;
+	}
+}
+
+static void mali_timeline_update_delayed_work(struct mali_timeline *timeline)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *oldest_tracker;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SOFT == timeline->id);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Timer is disabled, early out. */
+	if (!system->timer_enabled) return;
+
+	oldest_tracker = timeline->tracker_tail;
+	if (NULL != oldest_tracker && 0 == oldest_tracker->trigger_ref_count) {
+		if (MALI_FALSE == oldest_tracker->timer_active) {
+			if (MALI_TRUE == timeline->timer_active) {
+				_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+			}
+			_mali_osk_wq_delayed_schedule_work(timeline->delayed_work, MALI_TIMELINE_TIMEOUT_HZ);
+			oldest_tracker->timer_active = MALI_TRUE;
+			timeline->timer_active = MALI_TRUE;
+		}
+	} else if (MALI_TRUE == timeline->timer_active) {
+		_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+		timeline->timer_active = MALI_FALSE;
+	}
+}
+
+static mali_scheduler_mask mali_timeline_update_oldest_point(struct mali_timeline *timeline)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	MALI_DEBUG_CODE({
+		struct mali_timeline_system *system = timeline->system;
+		MALI_DEBUG_ASSERT_POINTER(system);
+
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	});
+
+	if (NULL != timeline->tracker_tail) {
+		/* Set oldest point to oldest tracker's point */
+		timeline->point_oldest = timeline->tracker_tail->point;
+	} else {
+		/* No trackers, mark point list as empty */
+		timeline->point_oldest = timeline->point_next;
+	}
+
+	/* Release all waiters no longer on the timeline's point list.
+	 * Releasing a waiter can trigger this function to be called again, so
+	 * we do not store any pointers on stack. */
+	while (NULL != timeline->waiter_tail) {
+		u32 waiter_time_relative;
+		u32 time_head_relative;
+		struct mali_timeline_waiter *waiter = timeline->waiter_tail;
+
+		time_head_relative = timeline->point_next - timeline->point_oldest;
+		waiter_time_relative = waiter->point - timeline->point_oldest;
+
+		if (waiter_time_relative < time_head_relative) {
+			/* This and all following waiters are on the point list, so we are done. */
+			break;
+		}
+
+		/* Remove waiter from timeline's waiter list. */
+		if (NULL != waiter->timeline_next) {
+			waiter->timeline_next->timeline_prev = NULL;
+		} else {
+			/* This was the last waiter */
+			timeline->waiter_head = NULL;
+		}
+		timeline->waiter_tail = waiter->timeline_next;
+
+		/* Release waiter.  This could activate a tracker, if this was
+		 * the last waiter for the tracker. */
+		schedule_mask |= mali_timeline_system_release_waiter(timeline->system, waiter);
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > type);
+
+	/* Zero out all tracker members. */
+	_mali_osk_memset(tracker, 0, sizeof(*tracker));
+
+	tracker->type = type;
+	tracker->job = job;
+	tracker->trigger_ref_count = 1;  /* Prevents any callback from trigging while adding it */
+	tracker->os_tick_create = _mali_osk_time_tickcount();
+	MALI_DEBUG_CODE(tracker->magic = MALI_TIMELINE_TRACKER_MAGIC);
+
+	tracker->activation_error = MALI_TIMELINE_ACTIVATION_ERROR_NONE;
+
+	/* Copy fence. */
+	if (NULL != fence) {
+		_mali_osk_memcpy(&tracker->fence, fence, sizeof(struct mali_timeline_fence));
+	}
+}
+
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker_next, *tracker_prev;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	/* Upon entry a group lock will be held, but not a scheduler lock. */
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	/* Tracker should have been triggered */
+	MALI_DEBUG_ASSERT(0 == tracker->trigger_ref_count);
+
+	/* All waiters should have been released at this point */
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_head);
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: releasing tracker for job 0x%08X\n", tracker->job));
+
+	timeline = tracker->timeline;
+	if (NULL == timeline) {
+		/* Tracker was not on a timeline, there is nothing to release. */
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Tracker should still be on timeline */
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, tracker->point));
+
+	/* Tracker is no longer valid. */
+	MALI_DEBUG_CODE(tracker->magic = 0);
+
+	tracker_next = tracker->timeline_next;
+	tracker_prev = tracker->timeline_prev;
+	tracker->timeline_next = NULL;
+	tracker->timeline_prev = NULL;
+
+	/* Removing tracker from timeline's tracker list */
+	if (NULL == tracker_next) {
+		/* This tracker was the head */
+		timeline->tracker_head = tracker_prev;
+	} else {
+		tracker_next->timeline_prev = tracker_prev;
+	}
+
+	if (NULL == tracker_prev) {
+		/* This tracker was the tail */
+		timeline->tracker_tail = tracker_next;
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+		/* Update the timeline's oldest time and release any waiters */
+		schedule_mask |= mali_timeline_update_oldest_point(timeline);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	} else {
+		tracker_prev->timeline_next = tracker_next;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Update delayed work only when it is the soft job timeline */
+	if (MALI_TIMELINE_SOFT == tracker->timeline->id) {
+		mali_timeline_update_delayed_work(tracker->timeline);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_release_waiter_list(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *tail,
+		struct mali_timeline_waiter *head)
+{
+	struct mali_timeline_waiter    *waiter = NULL;
+	struct mali_timeline_waiter    *next = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(head);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	head->tracker_next = system->waiter_empty_list;
+	system->waiter_empty_list = tail;
+
+	waiter = system->waiter_empty_list;
+	while (NULL != waiter) {
+		next = waiter->tracker_next;
+		_mali_osk_free(waiter);
+		waiter = next;
+	}
+	system->waiter_empty_list = NULL;
+}
+
+static mali_scheduler_mask mali_timeline_tracker_activate(struct mali_timeline_tracker *tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	struct mali_timeline_system *system;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker->os_tick_activate = _mali_osk_time_tickcount();
+
+	if (NULL != tracker->waiter_head) {
+		mali_timeline_system_release_waiter_list(system, tracker->waiter_tail, tracker->waiter_head);
+		tracker->waiter_head = NULL;
+		tracker->waiter_tail = NULL;
+	}
+
+	switch (tracker->type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		schedule_mask = mali_scheduler_activate_gp_job((struct mali_gp_job *) tracker->job);
+
+		_mali_osk_atomic_dec(&gp_tracker_count);
+		break;
+	case MALI_TIMELINE_TRACKER_PP:
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_dec(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_dec(&phy_pp_tracker_count);
+		}
+		schedule_mask = mali_scheduler_activate_pp_job((struct mali_pp_job *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SOFT:
+		timeline = tracker->timeline;
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		schedule_mask |= mali_soft_job_system_activate_job((struct mali_soft_job *) tracker->job);
+
+		/* Start a soft timer to make sure the soft job be released in a limited time */
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		mali_timeline_update_delayed_work(timeline);
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		break;
+	case MALI_TIMELINE_TRACKER_WAIT:
+		mali_timeline_fence_wait_activate((struct mali_timeline_fence_wait_tracker *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SYNC:
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+		mali_timeline_sync_fence_activate((struct mali_timeline_sync_fence_tracker *) tracker->job);
+#else
+		MALI_PRINT_ERROR(("Mali Timeline: sync tracker not supported\n", tracker->type));
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+		break;
+	default:
+		MALI_PRINT_ERROR(("Mali Timeline - Illegal tracker type: %d\n", tracker->type));
+		break;
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker)
+{
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->trigger_ref_count++;
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error)
+{
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->trigger_ref_count--;
+
+	tracker->activation_error |= activation_error;
+
+	if (0 == tracker->trigger_ref_count) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(uk_fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		fence->points[i] = uk_fence->points[i];
+	}
+
+	fence->sync_fd = uk_fence->sync_fd;
+}
+
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session)
+{
+	u32 i;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: creating timeline system\n"));
+
+	system = (struct mali_timeline_system *) _mali_osk_calloc(1, sizeof(struct mali_timeline_system));
+	if (NULL == system) {
+		return NULL;
+	}
+
+	system->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+	if (NULL == system->spinlock) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		system->timelines[i] = mali_timeline_create(system, (enum mali_timeline_id)i);
+		if (NULL == system->timelines[i]) {
+			mali_timeline_system_destroy(system);
+			return NULL;
+		}
+	}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	system->signaled_sync_tl = mali_sync_timeline_create(NULL, "mali-always-signaled");
+	if (NULL == system->signaled_sync_tl) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+	system->waiter_empty_list = NULL;
+	system->session = session;
+	system->timer_enabled = MALI_TRUE;
+
+	system->wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == system->wait_queue) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	return system;
+}
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE) ||defined(CONFIG_SYNC) ||defined(CONFIG_SYNC_FILE)
+/**
+ * Check if there are any trackers left on timeline.
+ *
+ * Used as a wait queue conditional.
+ *
+ * @param data Timeline.
+ * @return MALI_TRUE if there are no trackers on timeline, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_has_no_trackers(void *data)
+{
+	struct mali_timeline *timeline = (struct mali_timeline *) data;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	return mali_timeline_is_empty(timeline);
+}
+#if defined(CONFIG_SYNC) ||defined(CONFIG_SYNC_FILE)
+/**
+ * Cancel sync fence waiters waited upon by trackers on all timelines.
+ *
+ * Will return after all timelines have no trackers left.
+ *
+ * @param system Timeline system.
+ */
+static void mali_timeline_cancel_sync_fence_waiters(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+	struct mali_timeline_tracker *tracker, *tracker_next;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(tracker_list);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Cancel sync fence waiters. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		tracker_next = timeline->tracker_tail;
+		while (NULL != tracker_next) {
+			tracker = tracker_next;
+			tracker_next = tracker->timeline_next;
+
+			if (NULL == tracker->sync_fence) continue;
+
+			MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling sync fence wait for tracker 0x%08X.\n", tracker));
+
+			/* Cancel sync fence waiter. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			if (0 == sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter)) {
+#else
+			if (0 == mali_internal_sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter)) {
+#endif
+				/* Callback was not called, move tracker to local list. */
+				_mali_osk_list_add(&tracker->sync_fence_cancel_list, &tracker_list);
+			}
+		}
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/* Manually call sync fence callback in order to release waiter and trigger activation of tracker. */
+	_MALI_OSK_LIST_FOREACHENTRY(tracker, tracker_next, &tracker_list, struct mali_timeline_tracker, sync_fence_cancel_list) {
+		mali_timeline_sync_fence_callback(tracker->sync_fence, &tracker->sync_fence_waiter);
+	}
+
+	/* Sleep until all sync fence callbacks are done and all timelines are empty. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline);
+	}
+}
+
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+static void mali_timeline_cancel_dma_fence_waiters(struct mali_timeline_system *system)
+{
+	u32 i, j;
+	u32 tid = _mali_osk_get_tid();
+	struct mali_pp_job *pp_job = NULL;
+	struct mali_pp_job *next_pp_job = NULL;
+	struct mali_timeline *timeline = NULL;
+	struct mali_timeline_tracker *tracker, *tracker_next;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(pp_job_list);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Cancel dma fence waiters. */
+	timeline = system->timelines[MALI_TIMELINE_PP];
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker_next = timeline->tracker_tail;
+	while (NULL != tracker_next) {
+		mali_bool fence_is_signaled = MALI_TRUE;
+		tracker = tracker_next;
+		tracker_next = tracker->timeline_next;
+
+		if (NULL == tracker->waiter_dma_fence) continue;
+		pp_job = (struct mali_pp_job *)tracker->job;
+		MALI_DEBUG_ASSERT_POINTER(pp_job);
+		MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling dma fence waiter for tracker 0x%08X.\n", tracker));
+
+		for (j = 0; j < pp_job->dma_fence_context.num_dma_fence_waiter; j++) {
+			if (pp_job->dma_fence_context.mali_dma_fence_waiters[j]) {
+				/* Cancel a previously callback from the fence.
+				* This function returns true if the callback is successfully removed,
+				* or false if the fence has already been signaled.
+				*/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+				bool ret = dma_fence_remove_callback(pp_job->dma_fence_context.mali_dma_fence_waiters[j]->fence,
+								     &pp_job->dma_fence_context.mali_dma_fence_waiters[j]->base);
+
+#else
+				bool ret = fence_remove_callback(pp_job->dma_fence_context.mali_dma_fence_waiters[j]->fence,
+								 &pp_job->dma_fence_context.mali_dma_fence_waiters[j]->base);
+#endif
+				if (ret) {
+					fence_is_signaled = MALI_FALSE;
+				}
+			}
+		}
+
+		/* Callbacks were not called, move pp job to local list. */
+		if (MALI_FALSE == fence_is_signaled)
+			_mali_osk_list_add(&pp_job->list, &pp_job_list);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/* Manually call dma fence callback in order to release waiter and trigger activation of tracker. */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, next_pp_job, &pp_job_list, struct mali_pp_job, list) {
+		mali_timeline_dma_fence_callback((void *)pp_job);
+	}
+
+	/* Sleep until all dma fence callbacks are done and all timelines are empty. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline);
+	}
+}
+#endif
+#endif
+void mali_timeline_system_abort(struct mali_timeline_system *system)
+{
+	MALI_DEBUG_CODE(u32 tid = _mali_osk_get_tid(););
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: Aborting timeline system for session 0x%08X.\n", system->session));
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	mali_timeline_cancel_sync_fence_waiters(system);
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	mali_timeline_cancel_dma_fence_waiters(system);
+#endif
+
+	/* Should not be any waiters or trackers left at this point. */
+	MALI_DEBUG_CODE({
+		u32 i;
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i)
+		{
+			struct mali_timeline *timeline = system->timelines[i];
+			MALI_DEBUG_ASSERT_POINTER(timeline);
+			MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+			MALI_DEBUG_ASSERT(NULL == timeline->tracker_head);
+			MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+			MALI_DEBUG_ASSERT(NULL == timeline->waiter_head);
+			MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail);
+		}
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+	});
+}
+
+void mali_timeline_system_destroy(struct mali_timeline_system *system)
+{
+	u32 i;
+	struct mali_timeline_waiter *waiter, *next;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	u32 tid = _mali_osk_get_tid();
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: destroying timeline system\n"));
+
+	if (NULL != system) {
+
+		/* There should be no waiters left on this queue. */
+		if (NULL != system->wait_queue) {
+			_mali_osk_wait_queue_term(system->wait_queue);
+			system->wait_queue = NULL;
+		}
+
+		/* Free all waiters in empty list */
+		waiter = system->waiter_empty_list;
+		while (NULL != waiter) {
+			next = waiter->tracker_next;
+			_mali_osk_free(waiter);
+			waiter = next;
+		}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+		if (NULL != system->signaled_sync_tl) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			sync_timeline_destroy(system->signaled_sync_tl);
+#else
+			mali_internal_sync_timeline_destroy(system->signaled_sync_tl);
+#endif
+		}
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if ((NULL != system->timelines[i]) && (NULL != system->timelines[i]->spinlock)) {
+				mali_spinlock_reentrant_wait(system->timelines[i]->spinlock, tid);
+				system->timelines[i]->destroyed = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->timelines[i]->spinlock, tid);
+			}
+		}
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if (NULL != system->timelines[i]) {
+				mali_timeline_destroy(system->timelines[i]);
+			}
+		}
+
+		if (NULL != system->spinlock) {
+			mali_spinlock_reentrant_term(system->spinlock);
+		}
+
+		_mali_osk_free(system);
+	}
+}
+
+/**
+ * Find how many waiters are needed for a given fence.
+ *
+ * @param fence The fence to check.
+ * @return Number of waiters needed for fence.
+ */
+static u32 mali_timeline_fence_num_waiters(struct mali_timeline_fence *fence)
+{
+	u32 i, num_waiters = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		if (MALI_TIMELINE_NO_POINT != fence->points[i]) {
+			++num_waiters;
+		}
+	}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	if (-1 != fence->sync_fd) ++num_waiters;
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+	return num_waiters;
+}
+
+static struct mali_timeline_waiter *mali_timeline_system_get_zeroed_waiter(struct mali_timeline_system *system)
+{
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	waiter = system->waiter_empty_list;
+	if (NULL != waiter) {
+		/* Remove waiter from empty list and zero it */
+		system->waiter_empty_list = waiter->tracker_next;
+		_mali_osk_memset(waiter, 0, sizeof(*waiter));
+	}
+
+	/* Return NULL if list was empty. */
+	return waiter;
+}
+
+static void mali_timeline_system_allocate_waiters(struct mali_timeline_system *system,
+		struct mali_timeline_waiter **tail,
+		struct mali_timeline_waiter **head,
+		int max_num_waiters)
+{
+	u32 i, tid = _mali_osk_get_tid();
+	mali_bool do_alloc;
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT_POINTER(head);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	*head = *tail = NULL;
+	do_alloc = MALI_FALSE;
+	i = 0;
+	while (i < max_num_waiters) {
+		if (MALI_FALSE == do_alloc) {
+			waiter = mali_timeline_system_get_zeroed_waiter(system);
+			if (NULL == waiter) {
+				do_alloc = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+				continue;
+			}
+		} else {
+			waiter = _mali_osk_calloc(1, sizeof(struct mali_timeline_waiter));
+			if (NULL == waiter) break;
+		}
+		++i;
+		if (NULL == *tail) {
+			*tail = waiter;
+			*head = waiter;
+		} else {
+			(*head)->tracker_next = waiter;
+			*head = waiter;
+		}
+	}
+	if (MALI_TRUE == do_alloc) {
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+	}
+}
+
+/**
+ * Create waiters for the given tracker. The tracker is activated when all waiters are release.
+ *
+ * @note Tracker can potentially be activated before this function returns.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker we will create waiters for.
+ * @param waiter_tail List of pre-allocated waiters.
+ * @param waiter_head List of pre-allocated waiters.
+ */
+static void mali_timeline_system_create_waiters_and_unlock(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		struct mali_timeline_waiter *waiter_tail,
+		struct mali_timeline_waiter *waiter_head)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_fence *sync_fence = NULL;
+#else
+	struct mali_internal_sync_fence *sync_fence = NULL;
+#endif
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_head);
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+	MALI_DEBUG_ASSERT(NULL != tracker->job);
+
+	/* Creating waiter object for all the timelines the fence is put on. Inserting this waiter
+	 * into the timelines sorted list of waiters */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		mali_timeline_point point;
+		struct mali_timeline *timeline;
+		struct mali_timeline_waiter *waiter;
+
+		/* Get point on current timeline from tracker's fence. */
+		point = tracker->fence.points[i];
+
+		if (likely(MALI_TIMELINE_NO_POINT == point)) {
+			/* Fence contains no point on this timeline so we don't need a waiter. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n",
+					  point, timeline->point_oldest, timeline->point_next));
+			continue;
+		}
+
+		if (likely(mali_timeline_is_point_released(timeline, point))) {
+			/* Tracker representing the point has been released so we don't need a
+			 * waiter. */
+			continue;
+		}
+
+		/* The point is on timeline. */
+		MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, point));
+
+		/* Get a new zeroed waiter object. */
+		if (likely(NULL != waiter_tail)) {
+			waiter = waiter_tail;
+			waiter_tail = waiter_tail->tracker_next;
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			continue;
+		}
+
+		/* Yanking the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = point;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (NULL == tracker->waiter_head) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Add waiter to timeline. */
+		mali_timeline_insert_waiter(timeline, waiter);
+	}
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	if (-1 != tracker->fence.sync_fd) {
+		int ret;
+		struct mali_timeline_waiter *waiter;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+		sync_fence = sync_fence_fdget(tracker->fence.sync_fd);
+#else
+		sync_fence = mali_internal_sync_fence_fdget(tracker->fence.sync_fd);
+#endif
+		if (unlikely(NULL == sync_fence)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", tracker->fence.sync_fd));
+			goto exit;
+		}
+
+		/* Check if we have a zeroed waiter object available. */
+		if (unlikely(NULL == waiter_tail)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			goto exit;
+		}
+
+		/* Start asynchronous wait that will release waiter when the fence is signaled. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+		sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback);
+		ret = sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter);
+#else
+		mali_internal_sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback);
+		ret = mali_internal_sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter);
+#endif
+		if (1 == ret) {
+			/* Fence already signaled, no waiter needed. */
+			tracker->fence.sync_fd = -1;
+			goto exit;
+		} else if (0 != ret) {
+			MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, ret));
+			tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+			goto exit;
+		}
+
+		/* Grab new zeroed waiter object. */
+		waiter = waiter_tail;
+		waiter_tail = waiter_tail->tracker_next;
+
+		/* Increase the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = MALI_TIMELINE_NO_POINT;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (NULL == tracker->waiter_head) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Also store waiter in separate field for easy access by sync callback. */
+		tracker->waiter_sync = waiter;
+
+		/* Store the sync fence in tracker so we can retrieve in abort session, if needed. */
+		tracker->sync_fence = sync_fence;
+
+		sync_fence = NULL;
+	}
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)*/
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	if ((NULL != tracker->timeline) && (MALI_TIMELINE_PP == tracker->timeline->id)) {
+
+		struct mali_pp_job *job = (struct mali_pp_job *)tracker->job;
+
+		if (0 < job->dma_fence_context.num_dma_fence_waiter) {
+			struct mali_timeline_waiter *waiter;
+			/* Check if we have a zeroed waiter object available. */
+			if (unlikely(NULL == waiter_tail)) {
+				MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+				goto exit;
+			}
+
+			/* Grab new zeroed waiter object. */
+			waiter = waiter_tail;
+			waiter_tail = waiter_tail->tracker_next;
+
+			/* Increase the trigger ref count of the tracker. */
+			tracker->trigger_ref_count++;
+
+			waiter->point   = MALI_TIMELINE_NO_POINT;
+			waiter->tracker = tracker;
+
+			/* Insert waiter on tracker's singly-linked waiter list. */
+			if (NULL == tracker->waiter_head) {
+				/* list is empty */
+				MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+				tracker->waiter_tail = waiter;
+			} else {
+				tracker->waiter_head->tracker_next = waiter;
+			}
+			tracker->waiter_head = waiter;
+
+			/* Also store waiter in separate field for easy access by sync callback. */
+			tracker->waiter_dma_fence = waiter;
+		}
+	}
+#endif /* defined(CONFIG_MALI_DMA_BUF_FENCE)*/
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE) ||defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+exit:
+#endif /* defined(CONFIG_MALI_DMA_BUF_FENCE) || defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+	if (NULL != waiter_tail) {
+		mali_timeline_system_release_waiter_list(system, waiter_tail, waiter_head);
+	}
+
+	/* Release the initial trigger ref count. */
+	tracker->trigger_ref_count--;
+
+	/* If there were no waiters added to this tracker we activate immediately. */
+	if (0 == tracker->trigger_ref_count) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	if (NULL != sync_fence) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+		sync_fence_put(sync_fence);
+#else
+		fput(sync_fence->file);
+#endif
+	}
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id)
+{
+	int num_waiters = 0;
+	struct mali_timeline_waiter *waiter_tail, *waiter_head;
+	u32 tid = _mali_osk_get_tid();
+
+	mali_timeline_point point = MALI_TIMELINE_NO_POINT;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == system->session->is_aborting);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > tracker->type);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: adding tracker for job %p, timeline: %d\n", tracker->job, timeline_id));
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->system = system;
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	num_waiters = mali_timeline_fence_num_waiters(&tracker->fence);
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	if (MALI_TIMELINE_PP == timeline_id) {
+		struct mali_pp_job *job = (struct mali_pp_job *)tracker->job;
+		if (0 < job->dma_fence_context.num_dma_fence_waiter)
+			num_waiters++;
+	}
+#endif
+
+	/* Allocate waiters. */
+	mali_timeline_system_allocate_waiters(system, &waiter_tail, &waiter_head, num_waiters);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Add tracker to timeline.  This will allocate a point for the tracker on the timeline. If
+	 * timeline ID is MALI_TIMELINE_NONE the tracker will NOT be added to a timeline and the
+	 * point will be MALI_TIMELINE_NO_POINT.
+	 *
+	 * NOTE: the tracker can fail to be added if the timeline is full.  If this happens, the
+	 * point will be MALI_TIMELINE_NO_POINT. */
+	MALI_DEBUG_ASSERT(timeline_id < MALI_TIMELINE_MAX || timeline_id == MALI_TIMELINE_NONE);
+	if (likely(timeline_id < MALI_TIMELINE_MAX)) {
+		struct mali_timeline *timeline = system->timelines[timeline_id];
+		mali_timeline_insert_tracker(timeline, tracker);
+		MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	}
+
+	point = tracker->point;
+
+	/* Create waiters for tracker based on supplied fence.  Each waiter will increase the
+	 * trigger ref count. */
+	mali_timeline_system_create_waiters_and_unlock(system, tracker, waiter_tail, waiter_head);
+	tracker = NULL;
+
+	/* At this point the tracker object might have been freed so we should no longer
+	 * access it. */
+
+
+	/* The tracker will always be activated after calling add_tracker, even if NO_POINT is
+	 * returned. */
+	return point;
+}
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter)
+{
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker = waiter->tracker;
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	/* At this point the waiter has been removed from the timeline's waiter list, but it is
+	 * still on the tracker's waiter list.  All of the tracker's waiters will be released when
+	 * the tracker is activated. */
+
+	waiter->point   = MALI_TIMELINE_NO_POINT;
+	waiter->tracker = NULL;
+
+	tracker->trigger_ref_count--;
+	if (0 == tracker->trigger_ref_count) {
+		/* This was the last waiter; activate tracker */
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	return schedule_mask;
+}
+
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id)
+{
+	mali_timeline_point point;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	if (MALI_TIMELINE_MAX <= timeline_id) {
+		return MALI_TIMELINE_NO_POINT;
+	}
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	timeline = system->timelines[timeline_id];
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	point = MALI_TIMELINE_NO_POINT;
+	if (timeline->point_oldest != timeline->point_next) {
+		point = timeline->point_next - 1;
+		if (MALI_TIMELINE_NO_POINT == point) point--;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return point;
+}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+static void mali_timeline_do_sync_fence_callback(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_timeline_tracker *tracker;
+	struct mali_timeline_tracker *tmp_tracker;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be deleted, so we can release
+	 * the lock before we start deleting the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(sync_fence_callback_list_lock);
+	_mali_osk_list_move_list(&sync_fence_callback_queue, &list);
+	_mali_osk_spinlock_irq_unlock(sync_fence_callback_list_lock);
+
+	_MALI_OSK_LIST_FOREACHENTRY(tracker, tmp_tracker, &list,
+				    struct mali_timeline_tracker, sync_fence_signal_list) {
+		mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+		mali_bool is_aborting = MALI_FALSE;
+		int fence_status = 0;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+		struct sync_fence *sync_fence = NULL;
+#else
+		struct mali_internal_sync_fence *sync_fence = NULL;
+#endif
+		struct mali_timeline_system  *system = NULL;
+		struct mali_timeline_waiter  *waiter = NULL;
+
+		_mali_osk_list_delinit(&tracker->sync_fence_signal_list);
+
+		sync_fence = tracker->sync_fence;
+		MALI_DEBUG_ASSERT_POINTER(sync_fence);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+		fence_status = sync_fence->status;
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+		fence_status = atomic_read(&sync_fence->status);
+#else
+		fence_status = sync_fence->fence->ops->signaled(sync_fence->fence);
+#endif
+
+		system = tracker->system;
+		MALI_DEBUG_ASSERT_POINTER(system);
+		MALI_DEBUG_ASSERT_POINTER(system->session);
+
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+		is_aborting = system->session->is_aborting;
+		if (!is_aborting && (0 > fence_status)) {
+			MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, fence_status));
+			tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+		}
+
+		waiter = tracker->waiter_sync;
+		MALI_DEBUG_ASSERT_POINTER(waiter);
+
+		tracker->sync_fence = NULL;
+		tracker->fence.sync_fd = -1;
+
+		schedule_mask |= mali_timeline_system_release_waiter(system, waiter);
+
+		/* If aborting, wake up sleepers that are waiting for sync fence callbacks to complete. */
+		if (is_aborting) {
+			_mali_osk_wait_queue_wake_up(system->wait_queue);
+		}
+
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+		/*
+		 * Older versions of Linux, before 3.5, doesn't support fput() in interrupt
+		 * context. For those older kernels, allocate a list object and put the
+		 * fence object on that and defer the call to sync_fence_put() to a workqueue.
+		 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+		{
+			struct mali_deferred_fence_put_entry *obj;
+
+			obj = kzalloc(sizeof(struct mali_deferred_fence_put_entry), GFP_ATOMIC);
+			if (obj) {
+				unsigned long flags;
+				mali_bool schedule = MALI_FALSE;
+
+				obj->fence = sync_fence;
+
+				spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+				if (hlist_empty(&mali_timeline_sync_fence_to_free_list))
+					schedule = MALI_TRUE;
+				hlist_add_head(&obj->list, &mali_timeline_sync_fence_to_free_list);
+				spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+				if (schedule)
+					schedule_delayed_work(&delayed_sync_fence_put, 0);
+			}
+		}
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+		sync_fence_put(sync_fence);
+#else
+		fput(sync_fence->file);
+#endif
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+		if (!is_aborting) {
+			mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE);
+		}
+	}
+}
+#endif
+_mali_osk_errcode_t mali_timeline_initialize(void)
+{
+	_mali_osk_atomic_init(&gp_tracker_count, 0);
+	_mali_osk_atomic_init(&phy_pp_tracker_count, 0);
+	_mali_osk_atomic_init(&virt_pp_tracker_count, 0);
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	sync_fence_callback_list_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST);
+	if (NULL == sync_fence_callback_list_lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	sync_fence_callback_work_t = _mali_osk_wq_create_work(
+					     mali_timeline_do_sync_fence_callback, NULL);
+
+	if (NULL == sync_fence_callback_work_t) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_timeline_terminate(void)
+{
+	_mali_osk_atomic_term(&gp_tracker_count);
+	_mali_osk_atomic_term(&phy_pp_tracker_count);
+	_mali_osk_atomic_term(&virt_pp_tracker_count);
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	if (NULL != sync_fence_callback_list_lock) {
+		_mali_osk_spinlock_irq_term(sync_fence_callback_list_lock);
+		sync_fence_callback_list_lock = NULL;
+	}
+
+	if (NULL != sync_fence_callback_work_t) {
+		_mali_osk_wq_delete_work(sync_fence_callback_work_t);
+		sync_fence_callback_work_t = NULL;
+	}
+#endif
+}
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+static mali_bool is_waiting_on_timeline(struct mali_timeline_tracker *tracker, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT_POINTER(tracker->timeline);
+	timeline = tracker->timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline->system);
+	system = timeline->system;
+
+	if (MALI_TIMELINE_MAX > id) {
+		if (MALI_TIMELINE_NO_POINT != tracker->fence.points[id]) {
+			return mali_timeline_is_point_on(system->timelines[id], tracker->fence.points[id]);
+		} else {
+			return MALI_FALSE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_NONE == id);
+		return MALI_FALSE;
+	}
+}
+
+static const char *timeline_id_to_string(enum mali_timeline_id id)
+{
+	switch (id) {
+	case MALI_TIMELINE_GP:
+		return "GP";
+	case MALI_TIMELINE_PP:
+		return "PP";
+	case MALI_TIMELINE_SOFT:
+		return "SOFT";
+	default:
+		return "NONE";
+	}
+}
+
+static const char *timeline_tracker_type_to_string(enum mali_timeline_tracker_type type)
+{
+	switch (type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		return "GP";
+	case MALI_TIMELINE_TRACKER_PP:
+		return "PP";
+	case MALI_TIMELINE_TRACKER_SOFT:
+		return "SOFT";
+	case MALI_TIMELINE_TRACKER_WAIT:
+		return "WAIT";
+	case MALI_TIMELINE_TRACKER_SYNC:
+		return "SYNC";
+	default:
+		return "INVALID";
+	}
+}
+
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	timeline = tracker->timeline;
+
+	if (0 != tracker->trigger_ref_count) {
+		return MALI_TIMELINE_TS_WAITING;
+	}
+
+	if (timeline && (timeline->tracker_tail == tracker || NULL != tracker->timeline_prev)) {
+		return MALI_TIMELINE_TS_ACTIVE;
+	}
+
+	if (timeline && (MALI_TIMELINE_NO_POINT == tracker->point)) {
+		return MALI_TIMELINE_TS_INIT;
+	}
+
+	return MALI_TIMELINE_TS_FINISH;
+}
+
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	if (0 != tracker->trigger_ref_count) {
+		if (print_ctx)
+			_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+					     tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+					     is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+					     is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+					     is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+					     tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job));
+		else
+			MALI_DEBUG_PRINT(2, ("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+					    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+					    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+					    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+					    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+					    tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job)));
+	} else {
+		if (print_ctx)
+			_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+					    tracker_type, tracker->point, state_char,
+					    tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job));
+		else
+			MALI_DEBUG_PRINT(2, ("TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+					     tracker_type, tracker->point, state_char,
+					     tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job)));
+
+	}
+#else
+	if (0 != tracker->trigger_ref_count) {
+		if (print_ctx)
+			_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+					    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+					    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+					    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+					    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+					    (unsigned int)(uintptr_t)(tracker->job));
+		else
+			MALI_DEBUG_PRINT(2, ("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+					     tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+					     is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+					     is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+					     is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+					     (unsigned int)(uintptr_t)(tracker->job)));
+	} else {
+		if (print_ctx)
+			_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  job:(0x%08X)\n",
+					     tracker_type, tracker->point, state_char,
+					     (unsigned int)(uintptr_t)(tracker->job)));
+		else
+			MALI_DEBUG_PRINT(2, ("TL:  %s %u %c  job:(0x%08X)\n",
+					    tracker_type, tracker->point, state_char,
+					    (unsigned int)(uintptr_t)(tracker->job)));
+
+	}
+#endif
+}
+
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (NULL != tracker) {
+		mali_timeline_debug_print_tracker(tracker, print_ctx);
+		tracker = tracker->timeline_next;
+	}
+}
+
+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0))
+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	if (0 != tracker->trigger_ref_count) {
+		MALI_PRINT(("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+			    tracker->fence.sync_fd, tracker->sync_fence, tracker->job));
+	} else {
+		MALI_PRINT(("TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char,
+			    tracker->fence.sync_fd, tracker->sync_fence, tracker->job));
+	}
+#else
+	if (0 != tracker->trigger_ref_count) {
+		MALI_PRINT(("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+			    tracker->job));
+	} else {
+		MALI_PRINT(("TL:  %s %u %c  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char,
+			    tracker->job));
+	}
+#endif
+}
+
+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (NULL != tracker) {
+		mali_timeline_debug_direct_print_tracker(tracker);
+		tracker = tracker->timeline_next;
+	}
+}
+
+#endif
+
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx)
+{
+	int i;
+	int num_printed = 0;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Print all timelines */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (NULL == timeline->tracker_head) continue;
+		if (print_ctx)
+			_mali_osk_ctxprintf(print_ctx, "TL: Timeline %s:\n",
+					    timeline_id_to_string((enum mali_timeline_id)i));
+		else
+			MALI_DEBUG_PRINT(2, ("TL: Timeline %s: oldest (%u) next(%u)\n",
+					     timeline_id_to_string((enum mali_timeline_id)i), timeline->point_oldest, timeline->point_next));
+
+		mali_timeline_debug_print_timeline(timeline, print_ctx);
+		num_printed++;
+	}
+
+	if (0 == num_printed) {
+		if (print_ctx)
+			_mali_osk_ctxprintf(print_ctx, "TL: All timelines empty\n");
+		else
+			MALI_DEBUG_PRINT(2, ("TL: All timelines empty\n"));
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+void mali_timeline_dma_fence_callback(void *pp_job_ptr)
+{
+	struct mali_timeline_system  *system;
+	struct mali_timeline_waiter  *waiter;
+	struct mali_timeline_tracker *tracker;
+	struct mali_pp_job *pp_job = (struct mali_pp_job *)pp_job_ptr;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool is_aborting = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_POINTER(pp_job);
+
+	tracker = &pp_job->tracker;
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	waiter = tracker->waiter_dma_fence;
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	schedule_mask |= mali_timeline_system_release_waiter(system, waiter);
+
+	is_aborting = system->session->is_aborting;
+
+	/* If aborting, wake up sleepers that are waiting for dma fence callbacks to complete. */
+	if (is_aborting) {
+		_mali_osk_wait_queue_wake_up(system->wait_queue);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	if (!is_aborting) {
+		mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE);
+	}
+}
+#endif
diff --git a/mali/common/mali_timeline.h b/mali/common/mali_timeline.h
new file mode 100755
index 0000000..3aeb75c
--- /dev/null
+++ b/mali/common/mali_timeline.h
@@ -0,0 +1,563 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMELINE_H__
+#define __MALI_TIMELINE_H__
+
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+#include "mali_sync.h"
+#include "mali_scheduler_types.h"
+#include <linux/version.h>
+
+/**
+ * Soft job timeout.
+ *
+ * Soft jobs have to be signaled as complete after activation.  Normally this is done by user space,
+ * but in order to guarantee that every soft job is completed, we also have a timer.
+ */
+#define MALI_TIMELINE_TIMEOUT_HZ ((unsigned long) (HZ * 3 / 2)) /* 1500 ms. */
+
+/**
+ * Timeline type.
+ */
+typedef enum mali_timeline_id {
+	MALI_TIMELINE_GP   = MALI_UK_TIMELINE_GP,   /**< GP job timeline. */
+	MALI_TIMELINE_PP   = MALI_UK_TIMELINE_PP,   /**< PP job timeline. */
+	MALI_TIMELINE_SOFT = MALI_UK_TIMELINE_SOFT, /**< Soft job timeline. */
+	MALI_TIMELINE_MAX  = MALI_UK_TIMELINE_MAX
+} mali_timeline_id;
+
+/**
+ * Used by trackers that should not be added to a timeline (@ref mali_timeline_system_add_tracker).
+ */
+#define MALI_TIMELINE_NONE MALI_TIMELINE_MAX
+
+/**
+ * Tracker type.
+ */
+typedef enum mali_timeline_tracker_type {
+	MALI_TIMELINE_TRACKER_GP   = 0, /**< Tracker used by GP jobs. */
+	MALI_TIMELINE_TRACKER_PP   = 1, /**< Tracker used by PP jobs. */
+	MALI_TIMELINE_TRACKER_SOFT = 2, /**< Tracker used by soft jobs. */
+	MALI_TIMELINE_TRACKER_WAIT = 3, /**< Tracker used for fence wait. */
+	MALI_TIMELINE_TRACKER_SYNC = 4, /**< Tracker used for sync fence. */
+	MALI_TIMELINE_TRACKER_MAX  = 5,
+} mali_timeline_tracker_type;
+
+/**
+ * Tracker activation error.
+ */
+typedef u32 mali_timeline_activation_error;
+#define MALI_TIMELINE_ACTIVATION_ERROR_NONE      0
+#define MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT  (1<<1)
+#define MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT (1<<0)
+
+/**
+ * Type used to represent a point on a timeline.
+ */
+typedef u32 mali_timeline_point;
+
+/**
+ * Used to represent that no point on a timeline.
+ */
+#define MALI_TIMELINE_NO_POINT ((mali_timeline_point) 0)
+
+/**
+ * The maximum span of points on a timeline.  A timeline will be considered full if the difference
+ * between the oldest and newest points is equal or larger to this value.
+ */
+#define MALI_TIMELINE_MAX_POINT_SPAN 65536
+
+/**
+ * Magic value used to assert on validity of trackers.
+ */
+#define MALI_TIMELINE_TRACKER_MAGIC 0xabcdabcd
+
+struct mali_timeline;
+struct mali_timeline_waiter;
+struct mali_timeline_tracker;
+
+/**
+ * Timeline fence.
+ */
+struct mali_timeline_fence {
+	mali_timeline_point points[MALI_TIMELINE_MAX]; /**< For each timeline, a point or MALI_TIMELINE_NO_POINT. */
+	s32                 sync_fd;                   /**< A file descriptor representing a sync fence, or -1. */
+};
+
+/**
+ * Timeline system.
+ *
+ * The Timeline system has a set of timelines associated with a session.
+ */
+struct mali_timeline_system {
+	struct mali_spinlock_reentrant *spinlock;   /**< Spin lock protecting the timeline system */
+	struct mali_timeline           *timelines[MALI_TIMELINE_MAX]; /**< The timelines in this system */
+
+	/* Single-linked list of unused waiter objects.  Uses the tracker_next field in tracker. */
+	struct mali_timeline_waiter    *waiter_empty_list;
+
+	struct mali_session_data       *session;    /**< Session that owns this system. */
+
+	mali_bool                       timer_enabled; /**< Set to MALI_TRUE if soft job timer should be enabled, MALI_FALSE if not. */
+
+	_mali_osk_wait_queue_t         *wait_queue; /**< Wait queue. */
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_timeline           *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */
+#else
+	struct mali_internal_sync_timeline           *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */
+#endif
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+};
+
+/**
+ * Timeline.  Each Timeline system will have MALI_TIMELINE_MAX timelines.
+ */
+struct mali_timeline {
+	mali_timeline_point           point_next;   /**< The next available point. */
+	mali_timeline_point           point_oldest; /**< The oldest point not released. */
+
+	/* Double-linked list of trackers.  Sorted in ascending order by tracker->time_number with
+	 * tail pointing to the tracker with the oldest time. */
+	struct mali_timeline_tracker *tracker_head;
+	struct mali_timeline_tracker *tracker_tail;
+
+	/* Double-linked list of waiters.  Sorted in ascending order by waiter->time_number_wait
+	 * with tail pointing to the waiter with oldest wait time. */
+	struct mali_timeline_waiter  *waiter_head;
+	struct mali_timeline_waiter  *waiter_tail;
+
+	struct mali_timeline_system  *system;       /**< Timeline system this timeline belongs to. */
+	enum mali_timeline_id         id;           /**< Timeline type. */
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_timeline         *sync_tl;      /**< Sync timeline that corresponds to this timeline. */
+#else
+	struct mali_internal_sync_timeline *sync_tl;
+#endif
+	mali_bool destroyed;
+	struct mali_spinlock_reentrant *spinlock;       /**< Spin lock protecting the timeline system */
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+	/* The following fields are used to time out soft job trackers. */
+	_mali_osk_wq_delayed_work_t  *delayed_work;
+	mali_bool                     timer_active;
+};
+
+/**
+ * Timeline waiter.
+ */
+struct mali_timeline_waiter {
+	mali_timeline_point           point;         /**< Point on timeline we are waiting for to be released. */
+	struct mali_timeline_tracker *tracker;       /**< Tracker that is waiting. */
+
+	struct mali_timeline_waiter  *timeline_next; /**< Next waiter on timeline's waiter list. */
+	struct mali_timeline_waiter  *timeline_prev; /**< Previous waiter on timeline's waiter list. */
+
+	struct mali_timeline_waiter  *tracker_next;  /**< Next waiter on tracker's waiter list. */
+};
+
+/**
+ * Timeline tracker.
+ */
+struct mali_timeline_tracker {
+	MALI_DEBUG_CODE(u32            magic); /**< Should always be MALI_TIMELINE_TRACKER_MAGIC for a valid tracker. */
+
+	mali_timeline_point            point; /**< Point on timeline for this tracker */
+
+	struct mali_timeline_tracker  *timeline_next; /**< Next tracker on timeline's tracker list */
+	struct mali_timeline_tracker  *timeline_prev; /**< Previous tracker on timeline's tracker list */
+
+	u32                            trigger_ref_count; /**< When zero tracker will be activated */
+	mali_timeline_activation_error activation_error;  /**< Activation error. */
+	struct mali_timeline_fence     fence;             /**< Fence used to create this tracker */
+
+	/* Single-linked list of waiters.  Sorted in order of insertions with
+	 * tail pointing to first waiter. */
+	struct mali_timeline_waiter   *waiter_head;
+	struct mali_timeline_waiter   *waiter_tail;
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	/* These are only used if the tracker is waiting on a sync fence. */
+	struct mali_timeline_waiter   *waiter_sync; /**< A direct pointer to timeline waiter representing sync fence. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_fence_waiter       sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */
+	struct sync_fence             *sync_fence;   /**< The sync fence this tracker is waiting on. */
+#else
+	struct mali_internal_sync_fence_waiter       sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */
+	struct mali_internal_sync_fence             *sync_fence;   /**< The sync fence this tracker is waiting on. */
+#endif
+	_mali_osk_list_t               sync_fence_cancel_list; /**< List node used to cancel sync fence waiters. */
+	_mali_osk_list_t                sync_fence_signal_list; /** < List node used to singal sync fence callback function. */
+
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	struct mali_timeline_waiter   *waiter_dma_fence; /**< A direct pointer to timeline waiter representing dma fence. */
+#endif
+
+	struct mali_timeline_system   *system;       /**< Timeline system. */
+	struct mali_timeline          *timeline;     /**< Timeline, or NULL if not on a timeline. */
+	enum mali_timeline_tracker_type type;        /**< Type of tracker. */
+	void                          *job;          /**< Owner of tracker. */
+
+	/* The following fields are used to time out soft job trackers. */
+	unsigned long                 os_tick_create;
+	unsigned long                 os_tick_activate;
+	mali_bool                     timer_active;
+};
+
+extern _mali_osk_atomic_t gp_tracker_count;
+extern _mali_osk_atomic_t phy_pp_tracker_count;
+extern _mali_osk_atomic_t virt_pp_tracker_count;
+
+/**
+ * What follows is a set of functions to check the state of a timeline and to determine where on a
+ * timeline a given point is.  Most of these checks will translate the timeline so the oldest point
+ * on the timeline is aligned with zero.  Remember that all of these calculation are done on
+ * unsigned integers.
+ *
+ * The following example illustrates the three different states a point can be in.  The timeline has
+ * been translated to put the oldest point at zero:
+ *
+ *
+ *
+ *                               [ point is in forbidden zone ]
+ *                                          64k wide
+ *                                MALI_TIMELINE_MAX_POINT_SPAN
+ *
+ *    [ point is on timeline     )                            ( point is released ]
+ *
+ *    0--------------------------##############################--------------------2^32 - 1
+ *    ^                          ^
+ *    \                          |
+ *     oldest point on timeline  |
+ *                               \
+ *                                next point on timeline
+ */
+
+/**
+ * Compare two timeline points
+ *
+ * Returns true if a is after b, false if a is before or equal to b.
+ *
+ * This funcion ignores MALI_TIMELINE_MAX_POINT_SPAN. Wrapping is supported and
+ * the result will be correct if the points is less then UINT_MAX/2 apart.
+ *
+ * @param a Point on timeline
+ * @param b Point on timeline
+ * @return MALI_TRUE if a is after b
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_point_after(mali_timeline_point a, mali_timeline_point b)
+{
+	return 0 > ((s32)b) - ((s32)a);
+}
+
+/**
+ * Check if a point is on timeline.  A point is on a timeline if it is greater than, or equal to,
+ * the oldest point, and less than the next point.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is on timeline, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_on(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	return (point - timeline->point_oldest) < (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Check if a point has been released.  A point is released if it is older than the oldest point on
+ * the timeline, newer than the next point, and also not in the forbidden zone.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point has been release, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_released(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	mali_timeline_point point_normalized;
+	mali_timeline_point next_normalized;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	point_normalized = point - timeline->point_oldest;
+	next_normalized = timeline->point_next - timeline->point_oldest;
+
+	return point_normalized > (next_normalized + MALI_TIMELINE_MAX_POINT_SPAN);
+}
+
+/**
+ * Check if a point is valid.  A point is valid if is on the timeline or has been released.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is valid, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_valid(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return mali_timeline_is_point_on(timeline, point) || mali_timeline_is_point_released(timeline, point);
+}
+
+/**
+ * Check if timeline is empty (has no points on it).  A timeline is empty if next == oldest.
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is empty, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_empty(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return timeline->point_next == timeline->point_oldest;
+}
+
+/**
+ * Check if timeline is full.  A valid timeline cannot span more than 64k points (@ref
+ * MALI_TIMELINE_MAX_POINT_SPAN).
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is full, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_full(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return MALI_TIMELINE_MAX_POINT_SPAN <= (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Create a new timeline system.
+ *
+ * @param session The session this timeline system will belong to.
+ * @return New timeline system.
+ */
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session);
+
+/**
+ * Abort timeline system.
+ *
+ * This will release all pending waiters in the timeline system causing all trackers to be
+ * activated.
+ *
+ * @param system Timeline system to abort all jobs from.
+ */
+void mali_timeline_system_abort(struct mali_timeline_system *system);
+
+/**
+ * Destroy an empty timeline system.
+ *
+ * @note @ref mali_timeline_system_abort() should be called prior to this function.
+ *
+ * @param system Timeline system to destroy.
+ */
+void mali_timeline_system_destroy(struct mali_timeline_system *system);
+
+/**
+ * Stop the soft job timer.
+ *
+ * @param system Timeline system
+ */
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system);
+
+/**
+ * Add a tracker to a timeline system and optionally also on a timeline.
+ *
+ * Once added to the timeline system, the tracker is guaranteed to be activated.  The tracker can be
+ * activated before this function returns.  Thus, it is also possible that the tracker is released
+ * before this function returns, depending on the tracker type.
+ *
+ * @note Tracker must be initialized (@ref mali_timeline_tracker_init) before being added to the
+ * timeline system.
+ *
+ * @param system Timeline system the tracker will be added to.
+ * @param tracker The tracker to be added.
+ * @param timeline_id Id of the timeline the tracker will be added to, or
+ *                    MALI_TIMELINE_NONE if it should not be added on a timeline.
+ * @return Point on timeline identifying this tracker, or MALI_TIMELINE_NO_POINT if not on timeline.
+ */
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Get latest point on timeline.
+ *
+ * @param system Timeline system.
+ * @param timeline_id Id of timeline to get latest point from.
+ * @return Latest point on timeline, or MALI_TIMELINE_NO_POINT if the timeline is empty.
+ */
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Initialize tracker.
+ *
+ * Must be called before tracker is added to timeline system (@ref mali_timeline_system_add_tracker).
+ *
+ * @param tracker Tracker to initialize.
+ * @param type Type of tracker.
+ * @param fence Fence used to set up dependencies for tracker.
+ * @param job Pointer to job struct this tracker is associated with.
+ */
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job);
+
+/**
+ * Grab trigger ref count on tracker.
+ *
+ * This will prevent tracker from being activated until the trigger ref count reaches zero.
+ *
+ * @note Tracker must have been initialized (@ref mali_timeline_tracker_init).
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ */
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker);
+
+/**
+ * Release trigger ref count on tracker.
+ *
+ * If the trigger ref count reaches zero, the tracker will be activated.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ * @param activation_error Error bitmask if activated with error, or MALI_TIMELINE_ACTIVATION_ERROR_NONE if no error.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error);
+
+/**
+ * Release a tracker from the timeline system.
+ *
+ * This is used to signal that the job being tracker is finished, either due to normal circumstances
+ * (job complete/abort) or due to a timeout.
+ *
+ * We may need to schedule some subsystems after a tracker has been released and the returned
+ * bitmask will tell us if it is necessary.  If the return value is non-zero, this value needs to be
+ * sent as an input parameter to @ref mali_scheduler_schedule_from_mask() to do the scheduling.
+ *
+ * @note Tracker must have been activated before being released.
+ * @warning Not calling @ref mali_scheduler_schedule_from_mask() after releasing a tracker can lead
+ * to a deadlock.
+ *
+ * @param tracker Tracker being released.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_tracker_activation_error(
+	struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	return (MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT &
+		tracker->activation_error) ? MALI_TRUE : MALI_FALSE;
+}
+
+/**
+ * Copy data from a UK fence to a Timeline fence.
+ *
+ * @param fence Timeline fence.
+ * @param uk_fence UK fence.
+ */
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence);
+
+_mali_osk_errcode_t mali_timeline_initialize(void);
+
+void mali_timeline_terminate(void);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_gp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&gp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_physical_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&phy_pp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_virtual_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&virt_pp_tracker_count);
+}
+
+#if defined(DEBUG)
+#define MALI_TIMELINE_DEBUG_FUNCTIONS
+#endif /* DEBUG */
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+/**
+ * Tracker state.  Used for debug printing.
+ */
+typedef enum mali_timeline_tracker_state {
+	MALI_TIMELINE_TS_INIT    = 0,
+	MALI_TIMELINE_TS_WAITING = 1,
+	MALI_TIMELINE_TS_ACTIVE  = 2,
+	MALI_TIMELINE_TS_FINISH  = 3,
+} mali_timeline_tracker_state;
+
+/**
+ * Get tracker state.
+ *
+ * @param tracker Tracker to check.
+ * @return State of tracker.
+ */
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker);
+
+/**
+ * Print debug information about tracker.
+ *
+ * @param tracker Tracker to print.
+ */
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx);
+
+/**
+ * Print debug information about timeline.
+ *
+ * @param timeline Timeline to print.
+ */
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx);
+
+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0))
+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker);
+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline);
+#endif
+
+/**
+ * Print debug information about timeline system.
+ *
+ * @param system Timeline system to print.
+ */
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx);
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+/**
+ * The timeline dma fence callback when dma fence signal.
+ *
+ * @param pp_job_ptr The pointer to pp job that link to the signaled dma fence.
+ */
+void mali_timeline_dma_fence_callback(void *pp_job_ptr);
+#endif
+
+#endif /* __MALI_TIMELINE_H__ */
diff --git a/mali/common/mali_timeline_fence_wait.c b/mali/common/mali_timeline_fence_wait.c
new file mode 100755
index 0000000..8e709a2
--- /dev/null
+++ b/mali/common/mali_timeline_fence_wait.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/file.h>
+#include "mali_timeline_fence_wait.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+
+/**
+ * Allocate a fence waiter tracker.
+ *
+ * @return New fence waiter if successful, NULL if not.
+ */
+static struct mali_timeline_fence_wait_tracker *mali_timeline_fence_wait_tracker_alloc(void)
+{
+	return (struct mali_timeline_fence_wait_tracker *) _mali_osk_calloc(1, sizeof(struct mali_timeline_fence_wait_tracker));
+}
+
+/**
+ * Free fence waiter tracker.
+ *
+ * @param wait Fence wait tracker to free.
+ */
+static void mali_timeline_fence_wait_tracker_free(struct mali_timeline_fence_wait_tracker *wait)
+{
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	_mali_osk_atomic_term(&wait->refcount);
+	_mali_osk_free(wait);
+}
+
+/**
+ * Check if fence wait tracker has been activated.  Used as a wait queue condition.
+ *
+ * @param data Fence waiter.
+ * @return MALI_TRUE if tracker has been activated, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_tracker_is_activated(void *data)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+
+	wait = (struct mali_timeline_fence_wait_tracker *) data;
+	MALI_DEBUG_ASSERT_POINTER(wait);
+
+	return wait->activated;
+}
+
+/**
+ * Check if fence has been signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Timeline fence.
+ * @return MALI_TRUE if fence is signaled, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_check_status(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool ret = MALI_TRUE;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_fence *sync_fence = NULL;
+#else
+	struct mali_internal_sync_fence *sync_fence = NULL;
+#endif
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		mali_timeline_point   point;
+
+		point = fence->points[i];
+
+		if (likely(MALI_TIMELINE_NO_POINT == point)) {
+			/* Fence contains no point on this timeline. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n", point, timeline->point_oldest, timeline->point_next));
+		}
+
+		if (!mali_timeline_is_point_released(timeline, point)) {
+			ret = MALI_FALSE;
+			goto exit;
+		}
+	}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	if (-1 != fence->sync_fd) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+#else
+		sync_fence = mali_internal_sync_fence_fdget(fence->sync_fd);
+#endif
+		if (likely(NULL != sync_fence)) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+			if (0 == sync_fence->status) {
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+			if (0 < atomic_read(&sync_fence->status)) {
+#else
+			if (0 == sync_fence->fence->ops->signaled(sync_fence->fence)) {
+#endif
+				ret = MALI_FALSE;
+
+			} else {
+				ret = MALI_TRUE;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", fence->sync_fd));
+		}
+	}
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+exit:
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	if (NULL != sync_fence) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+		sync_fence_put(sync_fence);
+#else
+		fput(sync_fence->file);
+#endif
+	}
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+	return ret;
+}
+
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+	mali_timeline_point point;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: wait on fence\n"));
+
+	if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY == timeout) {
+		return mali_timeline_fence_wait_check_status(system, fence);
+	}
+
+	wait = mali_timeline_fence_wait_tracker_alloc();
+	if (unlikely(NULL == wait)) {
+		MALI_PRINT_ERROR(("Mali Timeline: failed to allocate data for fence wait\n"));
+		return MALI_FALSE;
+	}
+
+	wait->activated = MALI_FALSE;
+	wait->system = system;
+
+	/* Initialize refcount to two references.  The reference first will be released by this
+	 * function after the wait is over.  The second reference will be released when the tracker
+	 * is activated. */
+	_mali_osk_atomic_init(&wait->refcount, 2);
+
+	/* Add tracker to timeline system, but not to a timeline. */
+	mali_timeline_tracker_init(&wait->tracker, MALI_TIMELINE_TRACKER_WAIT, fence, wait);
+	point = mali_timeline_system_add_tracker(system, &wait->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point);
+	MALI_IGNORE(point);
+
+	/* Wait for the tracker to be activated or time out. */
+	if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER == timeout) {
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait);
+	} else {
+		_mali_osk_wait_queue_wait_event_timeout(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait, timeout);
+	}
+
+	ret = wait->activated;
+
+	if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+
+	return ret;
+}
+
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *wait)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	MALI_DEBUG_ASSERT_POINTER(wait->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for fence wait tracker\n"));
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == wait->activated);
+	wait->activated = MALI_TRUE;
+
+	_mali_osk_wait_queue_wake_up(wait->system->wait_queue);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&wait->tracker);
+	MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask);
+	MALI_IGNORE(schedule_mask);
+
+	if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+}
diff --git a/mali/common/mali_timeline_fence_wait.h b/mali/common/mali_timeline_fence_wait.h
new file mode 100755
index 0000000..348652b
--- /dev/null
+++ b/mali/common/mali_timeline_fence_wait.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_fence_wait.h
+ *
+ * This file contains functions used to wait until a Timeline fence is signaled.
+ */
+
+#ifndef __MALI_TIMELINE_FENCE_WAIT_H__
+#define __MALI_TIMELINE_FENCE_WAIT_H__
+
+#include "mali_osk.h"
+#include "mali_timeline.h"
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, a timer is not used and the
+ * function only returns when the fence is signaled.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER ((u32) -1)
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, the function will return
+ * immediately with the current state of the fence.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY 0
+
+/**
+ * Fence wait tracker.
+ *
+ * The fence wait tracker is added to the Timeline system with the fence we are waiting on as a
+ * dependency.  We will then perform a blocking wait, possibly with a timeout, until the tracker is
+ * activated, which happens when the fence is signaled.
+ */
+struct mali_timeline_fence_wait_tracker {
+	mali_bool activated;                  /**< MALI_TRUE if the tracker has been activated, MALI_FALSE if not. */
+	_mali_osk_atomic_t refcount;          /**< Reference count. */
+	struct mali_timeline_system *system;  /**< Timeline system. */
+	struct mali_timeline_tracker tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Wait for a fence to be signaled, or timeout is reached.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to wait on.
+ * @param timeout Timeout in ms, or MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER or
+ * MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY.
+ * @return MALI_TRUE if signaled, MALI_FALSE if timed out.
+ */
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout);
+
+/**
+ * Used by the Timeline system to activate a fence wait tracker.
+ *
+ * @param fence_wait_tracker Fence waiter tracker.
+ */
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *fence_wait_tracker);
+
+#endif /* __MALI_TIMELINE_FENCE_WAIT_H__ */
diff --git a/mali/common/mali_timeline_sync_fence.c b/mali/common/mali_timeline_sync_fence.c
new file mode 100755
index 0000000..4541ca6
--- /dev/null
+++ b/mali/common/mali_timeline_sync_fence.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/file.h>
+#include "mali_timeline_sync_fence.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_sync.h"
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+
+/**
+ * Creates a sync fence tracker and a sync fence.  Adds sync fence tracker to Timeline system and
+ * returns sync fence.  The sync fence will be signaled when the sync fence tracker is activated.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return Sync fence that will be signaled when tracker is activated.
+ */
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+static struct sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point)
+#else
+static struct mali_internal_sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point)
+#endif
+{
+	struct mali_timeline_sync_fence_tracker *sync_fence_tracker;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_fence                       *sync_fence;
+#else
+	struct mali_internal_sync_fence                       *sync_fence;
+#endif
+	struct mali_timeline_fence               fence;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	/* Allocate sync fence tracker. */
+	sync_fence_tracker = _mali_osk_calloc(1, sizeof(struct mali_timeline_sync_fence_tracker));
+	if (NULL == sync_fence_tracker) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence_tracker allocation failed\n"));
+		return NULL;
+	}
+
+	/* Create sync flag. */
+	MALI_DEBUG_ASSERT_POINTER(timeline->sync_tl);
+	sync_fence_tracker->flag = mali_sync_flag_create(timeline->sync_tl, point);
+	if (NULL == sync_fence_tracker->flag) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_flag creation failed\n"));
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Create sync fence from sync flag. */
+	sync_fence = mali_sync_flag_create_fence(sync_fence_tracker->flag);
+	if (NULL == sync_fence) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence creation failed\n"));
+		mali_sync_flag_put(sync_fence_tracker->flag);
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Setup fence for tracker. */
+	_mali_osk_memset(&fence, 0, sizeof(struct mali_timeline_fence));
+	fence.sync_fd = -1;
+	fence.points[timeline->id] = point;
+
+	/* Finally, add the tracker to Timeline system. */
+	mali_timeline_tracker_init(&sync_fence_tracker->tracker, MALI_TIMELINE_TRACKER_SYNC, &fence, sync_fence_tracker);
+	point = mali_timeline_system_add_tracker(timeline->system, &sync_fence_tracker->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point);
+
+	return sync_fence;
+}
+
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	u32 i;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_fence *sync_fence_acc = NULL;
+#else
+	struct mali_internal_sync_fence *sync_fence_acc = NULL;
+#endif
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+		struct sync_fence *sync_fence;
+#else
+		struct mali_internal_sync_fence *sync_fence;
+#endif
+		if (MALI_TIMELINE_NO_POINT == fence->points[i]) continue;
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		sync_fence = mali_timeline_sync_fence_create_and_add_tracker(timeline, fence->points[i]);
+		if (NULL == sync_fence) goto error;
+
+		if (NULL != sync_fence_acc) {
+			/* Merge sync fences. */
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (NULL == sync_fence_acc) goto error;
+		} else {
+			/* This was the first sync fence created. */
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (-1 != fence->sync_fd) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+		struct sync_fence *sync_fence;
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+#else
+		struct mali_internal_sync_fence *sync_fence;
+		sync_fence = mali_internal_sync_fence_fdget(fence->sync_fd);
+#endif
+
+		if (NULL == sync_fence) goto error;
+
+		if (NULL != sync_fence_acc) {
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (NULL == sync_fence_acc) goto error;
+		} else {
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (NULL == sync_fence_acc) {
+		MALI_DEBUG_ASSERT_POINTER(system->signaled_sync_tl);
+
+		/* There was nothing to wait on, so return an already signaled fence. */
+
+		sync_fence_acc = mali_sync_timeline_create_signaled_fence(system->signaled_sync_tl);
+		if (NULL == sync_fence_acc) goto error;
+	}
+
+	/* Return file descriptor for the accumulated sync fence. */
+	return mali_sync_fence_fd_alloc(sync_fence_acc);
+
+error:
+	if (NULL != sync_fence_acc) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+		sync_fence_put(sync_fence_acc);
+#else
+		fput(sync_fence_acc->file);
+#endif
+	}
+
+	return -1;
+}
+
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker->flag);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for sync fence tracker\n"));
+
+	/* Signal flag and release reference. */
+	mali_sync_flag_signal(sync_fence_tracker->flag, 0);
+	mali_sync_flag_put(sync_fence_tracker->flag);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&sync_fence_tracker->tracker);
+	MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask);
+
+	_mali_osk_free(sync_fence_tracker);
+}
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
diff --git a/mali/common/mali_timeline_sync_fence.h b/mali/common/mali_timeline_sync_fence.h
new file mode 100755
index 0000000..a88f4fb
--- /dev/null
+++ b/mali/common/mali_timeline_sync_fence.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_sync_fence.h
+ *
+ * This file contains code related to creating sync fences from timeline fences.
+ */
+
+#ifndef __MALI_TIMELINE_SYNC_FENCE_H__
+#define __MALI_TIMELINE_SYNC_FENCE_H__
+
+#include "mali_timeline.h"
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+
+/**
+ * Sync fence tracker.
+ */
+struct mali_timeline_sync_fence_tracker {
+	struct mali_sync_flag        *flag;    /**< Sync flag used to connect tracker and sync fence. */
+	struct mali_timeline_tracker  tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Create a sync fence that will be signaled when @ref fence is signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to create sync fence from.
+ * @return File descriptor for new sync fence, or -1 on error.
+ */
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence);
+
+/**
+ * Used by the Timeline system to activate a sync fence tracker.
+ *
+ * @param sync_fence_tracker Sync fence tracker.
+ *
+ */
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker);
+
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+#endif /* __MALI_TIMELINE_SYNC_FENCE_H__ */
diff --git a/mali/common/mali_ukk.h b/mali/common/mali_ukk.h
new file mode 100755
index 0000000..79829b9
--- /dev/null
+++ b/mali/common/mali_ukk.h
@@ -0,0 +1,551 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk.h
+ * Defines the kernel-side interface of the user-kernel interface
+ */
+
+#ifndef __MALI_UKK_H__
+#define __MALI_UKK_H__
+
+#include "mali_osk.h"
+#include "mali_uk_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * - The _mali_uk functions are an abstraction of the interface to the device
+ * driver. On certain OSs, this would be implemented via the IOCTL interface.
+ * On other OSs, it could be via extension of some Device Driver Class, or
+ * direct function call for Bare metal/RTOSs.
+ * - It is important to note that:
+ *   -  The Device Driver has implemented the _mali_ukk set of functions
+ *   -  The Base Driver calls the corresponding set of _mali_uku functions.
+ * - What requires porting is solely the calling mechanism from User-side to
+ * Kernel-side, and propagating back the results.
+ * - Each U/K function is associated with a (group, number) pair from
+ * \ref _mali_uk_functions to make it possible for a common function in the
+ * Base Driver and Device Driver to route User/Kernel calls from/to the
+ * correct _mali_uk function. For example, in an IOCTL system, the IOCTL number
+ * would be formed based on the group and number assigned to the _mali_uk
+ * function, as listed in \ref _mali_uk_functions. On the user-side, each
+ * _mali_uku function would just make an IOCTL with the IOCTL-code being an
+ * encoded form of the (group, number) pair. On the kernel-side, the Device
+ * Driver's IOCTL handler decodes the IOCTL-code back into a (group, number)
+ * pair, and uses this to determine which corresponding _mali_ukk should be
+ * called.
+ *   - Refer to \ref _mali_uk_functions for more information about this
+ * (group, number) pairing.
+ * - In a system where there is no distinction between user and kernel-side,
+ * the U/K interface may be implemented as:@code
+ * MALI_STATIC_INLINE _mali_osk_errcode_t _mali_uku_examplefunction( _mali_uk_examplefunction_s *args )
+ * {
+ *     return mali_ukk_examplefunction( args );
+ * }
+ * @endcode
+ * - Therefore, all U/K calls behave \em as \em though they were direct
+ * function calls (but the \b implementation \em need \em not be a direct
+ * function calls)
+ *
+ * @note Naming the _mali_uk functions the same on both User and Kernel sides
+ * on non-RTOS systems causes debugging issues when setting breakpoints. In
+ * this case, it is not clear which function the breakpoint is put on.
+ * Therefore the _mali_uk functions in user space are prefixed with \c _mali_uku
+ * and in kernel space with \c _mali_ukk. The naming for the argument
+ * structures is unaffected.
+ *
+ * - The _mali_uk functions are synchronous.
+ * - Arguments to the _mali_uk functions are passed in a structure. The only
+ * parameter passed to the _mali_uk functions is a pointer to this structure.
+ * This first member of this structure, ctx, is a pointer to a context returned
+ * by _mali_uku_open(). For example:@code
+ * typedef struct
+ * {
+ *     void *ctx;
+ *     u32 number_of_cores;
+ * } _mali_uk_get_gp_number_of_cores_s;
+ * @endcode
+ *
+ * - Each _mali_uk function has its own argument structure named after the
+ *  function. The argument is distinguished by the _s suffix.
+ * - The argument types are defined by the base driver and user-kernel
+ *  interface.
+ * - All _mali_uk functions return a standard \ref _mali_osk_errcode_t.
+ * - Only arguments of type input or input/output need be initialized before
+ * calling a _mali_uk function.
+ * - Arguments of type output and input/output are only valid when the
+ * _mali_uk function returns \ref _MALI_OSK_ERR_OK.
+ * - The \c ctx member is always invalid after it has been used by a
+ * _mali_uk function, except for the context management functions
+ *
+ *
+ * \b Interface \b restrictions
+ *
+ * The requirements of the interface mean that an implementation of the
+ * User-kernel interface may do no 'real' work. For example, the following are
+ * illegal in the User-kernel implementation:
+ * - Calling functions necessary for operation on all systems,  which would
+ * not otherwise get called on RTOS systems.
+ *     - For example, a  U/K interface that calls multiple _mali_ukk functions
+ * during one particular U/K call. This could not be achieved by the same code
+ * which uses direct function calls for the U/K interface.
+ * -  Writing in values to the args members, when otherwise these members would
+ * not hold a useful value for a direct function call U/K interface.
+ *     - For example, U/K interface implementation that take NULL members in
+ * their arguments structure from the user side, but those members are
+ * replaced with non-NULL values in the kernel-side of the U/K interface
+ * implementation. A scratch area for writing data is one such example. In this
+ * case, a direct function call U/K interface would segfault, because no code
+ * would be present to replace the NULL pointer with a meaningful pointer.
+ *     - Note that we discourage the case where the U/K implementation changes
+ * a NULL argument member to non-NULL, and then the Device Driver code (outside
+ * of the U/K layer) re-checks this member for NULL, and corrects it when
+ * necessary. Whilst such code works even on direct function call U/K
+ * intefaces, it reduces the testing coverage of the Device Driver code. This
+ * is because we have no way of testing the NULL == value path on an OS
+ * implementation.
+ *
+ * A number of allowable examples exist where U/K interfaces do 'real' work:
+ * - The 'pointer switching' technique for \ref _mali_ukk_get_system_info
+ *     - In this case, without the pointer switching on direct function call
+ * U/K interface, the Device Driver code still sees the same thing: a pointer
+ * to which it can write memory. This is because such a system has no
+ * distinction between a user and kernel pointer.
+ * - Writing an OS-specific value into the ukk_private member for
+ * _mali_ukk_mem_mmap().
+ *     - In this case, this value is passed around by Device Driver code, but
+ * its actual value is never checked. Device Driver code simply passes it from
+ * the U/K layer to the OSK layer, where it can be acted upon. In this case,
+ * \em some OS implementations of the U/K (_mali_ukk_mem_mmap()) and OSK
+ * (_mali_osk_mem_mapregion_init()) functions will collaborate on the
+ *  meaning of ukk_private member. On other OSs, it may be unused by both
+ * U/K and OSK layers
+ *     - Therefore, on error inside the U/K interface implementation itself,
+ * it will be as though the _mali_ukk function itself had failed, and cleaned
+ * up after itself.
+ *     - Compare this to a direct function call U/K implementation, where all
+ * error cleanup is handled by the _mali_ukk function itself. The direct
+ * function call U/K interface implementation is automatically atomic.
+ *
+ * The last example highlights a consequence of all U/K interface
+ * implementations: they must be atomic with respect to the Device Driver code.
+ * And therefore, should Device Driver code succeed but the U/K implementation
+ * fail afterwards (but before return to user-space), then the U/K
+ * implementation must cause appropriate cleanup actions to preserve the
+ * atomicity of the interface.
+ *
+ * @{
+ */
+
+
+/** @defgroup _mali_uk_context U/K Context management
+ *
+ * These functions allow for initialisation of the user-kernel interface once per process.
+ *
+ * Generally the context will store the OS specific object to communicate with the kernel device driver and further
+ * state information required by the specific implementation. The context is shareable among all threads in the caller process.
+ *
+ * On IOCTL systems, this is likely to be a file descriptor as a result of opening the kernel device driver.
+ *
+ * On a bare-metal/RTOS system with no distinction between kernel and
+ * user-space, the U/K interface simply calls the _mali_ukk variant of the
+ * function by direct function call. In this case, the context returned is the
+ * mali_session_data from _mali_ukk_open().
+ *
+ * The kernel side implementations of the U/K interface expect the first member of the argument structure to
+ * be the context created by _mali_uku_open(). On some OS implementations, the meaning of this context
+ * will be different between user-side and kernel-side. In which case, the kernel-side will need to replace this context
+ * with the kernel-side equivalent, because user-side will not have access to kernel-side data. The context parameter
+ * in the argument structure therefore has to be of type input/output.
+ *
+ * It should be noted that the caller cannot reuse the \c ctx member of U/K
+ * argument structure after a U/K call, because it may be overwritten. Instead,
+ * the context handle must always be stored  elsewhere, and copied into
+ * the appropriate U/K argument structure for each user-side call to
+ * the U/K interface. This is not usually a problem, since U/K argument
+ * structures are usually placed on the stack.
+ *
+ * @{ */
+
+/** @brief Begin a new Mali Device Driver session
+ *
+ * This is used to obtain a per-process context handle for all future U/K calls.
+ *
+ * @param context pointer to storage to return a (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_open(void **context);
+
+/** @brief End a Mali Device Driver session
+ *
+ * This should be called when the process no longer requires use of the Mali Device Driver.
+ *
+ * The context handle must not be used after it has been closed.
+ *
+ * @param context pointer to a stored (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_close(void **context);
+
+/** @} */ /* end group _mali_uk_context */
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ *
+ * The core functions provide the following functionality:
+ * - verify that the user and kernel API are compatible
+ * - retrieve information about the cores and memory banks in the system
+ * - wait for the result of jobs started on a core
+ *
+ * @{ */
+
+/** @brief Waits for a job notification.
+ *
+ * Sleeps until notified or a timeout occurs. Returns information about the notification.
+ *
+ * @param args see _mali_uk_wait_for_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args);
+
+/** @brief Post a notification to the notification queue of this application.
+ *
+ * @param args see _mali_uk_post_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * This function is obsolete, but kept to allow old, incompatible user space
+ * clients to robustly detect the incompatibility.
+ *
+ * @param args see _mali_uk_get_api_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * @param args see _mali_uk_get_api_version_v2_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args);
+
+/** @brief Get the user space settings applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_settings_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args);
+
+/** @brief Get a user space setting applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_setting_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args);
+
+/* @brief Grant or deny high priority scheduling for this session.
+ *
+ * @param args see _mali_uk_request_high_priority_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args);
+
+/** @brief Make process sleep if the pending big job in kernel  >= MALI_MAX_PENDING_BIG_JOB
+ *
+ */
+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args);
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ *
+ * The memory functions provide functionality with and without a Mali-MMU present.
+ *
+ * For Mali-MMU based systems, the following functionality is provided:
+ * - Initialize and terminate MALI virtual address space
+ * - Allocate/deallocate physical memory to a MALI virtual address range and map into/unmap from the
+ * current process address space
+ * - Map/unmap external physical memory into the MALI virtual address range
+ *
+ * For Mali-nonMMU based systems:
+ * - Allocate/deallocate MALI memory
+ *
+ * @{ */
+
+/** @brief Map Mali Memory into the current user process
+ *
+ * Maps Mali memory into the current user process in a generic way.
+ *
+ * This function is to be used for Mali-MMU mode. The function is available in both Mali-MMU and Mali-nonMMU modes,
+ * but should not be called by a user process in Mali-nonMMU mode.
+ *
+ * The implementation and operation of _mali_ukk_mem_mmap() is dependant on whether the driver is built for Mali-MMU
+ * or Mali-nonMMU:
+ * - In the nonMMU case, _mali_ukk_mem_mmap() requires a physical address to be specified. For this reason, an OS U/K
+ * implementation should not allow this to be called from user-space. In any case, nonMMU implementations are
+ * inherently insecure, and so the overall impact is minimal. Mali-MMU mode should be used if security is desired.
+ * - In the MMU case, _mali_ukk_mem_mmap() the _mali_uk_mem_mmap_s::phys_addr
+ * member is used for the \em Mali-virtual address desired for the mapping. The
+ * implementation of _mali_ukk_mem_mmap() will allocate both the CPU-virtual
+ * and CPU-physical addresses, and can cope with mapping a contiguous virtual
+ * address range to a sequence of non-contiguous physical pages. In this case,
+ * the CPU-physical addresses are not communicated back to the user-side, as
+ * they are unnecsessary; the \em Mali-virtual address range must be used for
+ * programming Mali structures.
+ *
+ * In the second (MMU) case, _mali_ukk_mem_mmap() handles management of
+ * CPU-virtual and CPU-physical ranges, but the \em caller must manage the
+ * \em Mali-virtual address range from the user-side.
+ *
+ * @note Mali-virtual address ranges are entirely separate between processes.
+ * It is not possible for a process to accidentally corrupt another process'
+ * \em Mali-virtual address space.
+ *
+ * @param args see _mali_uk_mem_mmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_mmap(_mali_uk_mem_mmap_s *args);
+
+/** @brief Unmap Mali Memory from the current user process
+ *
+ * Unmaps Mali memory from the current user process in a generic way. This only operates on Mali memory supplied
+ * from _mali_ukk_mem_mmap().
+ *
+ * @param args see _mali_uk_mem_munmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_munmap(_mali_uk_mem_munmap_s *args);
+
+/** @brief Determine the buffer size necessary for an MMU page table dump.
+ * @param args see _mali_uk_query_mmu_page_table_dump_size_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args);
+/** @brief Dump MMU Page tables.
+ * @param args see _mali_uk_dump_mmu_page_table_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args);
+
+/** @brief Write user data to specified Mali memory without causing segfaults.
+ * @param args see _mali_uk_mem_write_safe_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args);
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ *
+ * The Fragment Processor (aka PP (Pixel Processor)) functions provide the following functionality:
+ * - retrieving version of the fragment processors
+ * - determine number of fragment processors
+ * - starting a job on a fragment processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Fragment Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started instead and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx, _mali_uk_pp_start_job_s *uargs);
+
+/**
+ * @brief Issue a request to start new jobs on both Vertex Processor and Fragment Processor.
+ *
+ * @note Will call into @ref _mali_ukk_pp_start_job and @ref _mali_ukk_gp_start_job.
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_and_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx, _mali_uk_pp_and_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Fragment Processors in the system
+ *
+ * @param args see _mali_uk_get_pp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Fragment Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_ukk_get_pp_number_of_cores() indicated at least one Fragment
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args);
+
+/** @brief Disable Write-back unit(s) on specified job
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ */
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args);
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ *
+ * The Vertex Processor (aka GP (Geometry Processor)) functions provide the following functionality:
+ * - retrieving version of the Vertex Processors
+ * - determine number of Vertex Processors available
+ * - starting a job on a Vertex Processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Vertex Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx, _mali_uk_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Vertex Processors in the system.
+ *
+ * @param args see _mali_uk_get_gp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Vertex Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_uk_get_gp_number_of_cores() indicated at least one Vertex
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_gp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args);
+
+/** @brief Resume or abort suspended Vertex Processor jobs.
+ *
+ * After receiving notification that a Vertex Processor job was suspended from
+ * _mali_ukk_wait_for_notification() you can use this function to resume or abort the job.
+ *
+ * @param args see _mali_uk_gp_suspend_response_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args);
+
+/** @} */ /* end group _mali_uk_gp */
+
+#if defined(CONFIG_MALI400_PROFILING)
+/** @addtogroup _mali_uk_profiling U/K Timeline profiling module
+ * @{ */
+
+/** @brief Add event to profiling buffer.
+ *
+ * @param args see _mali_uk_profiling_add_event_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args);
+
+/** @brief Get profiling stream fd.
+ *
+ * @param args see _mali_uk_profiling_stream_fd_get_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args);
+
+/** @brief Profiling control set.
+ *
+ * @param args see _mali_uk_profiling_control_set_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args);
+
+/** @} */ /* end group _mali_uk_profiling */
+#endif
+
+/** @addtogroup _mali_uk_vsync U/K VSYNC reporting module
+ * @{ */
+
+/** @brief Report events related to vsync.
+ *
+ * @note Events should be reported when starting to wait for vsync and when the
+ * waiting is finished. This information can then be used in kernel space to
+ * complement the GPU utilization metric.
+ *
+ * @param args see _mali_uk_vsync_event_report_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args);
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @addtogroup _mali_sw_counters_report U/K Software counter reporting
+ * @{ */
+
+/** @brief Report software counters.
+ *
+ * @param args see _mali_uk_sw_counters_report_s in "mali_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args);
+
+/** @} */ /* end group _mali_sw_counters_report */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+u32 _mali_ukk_report_memory_usage(void);
+
+u32 _mali_ukk_report_total_memory_size(void);
+
+u32 _mali_ukk_utilization_gp_pp(void);
+
+u32 _mali_ukk_utilization_gp(void);
+
+u32 _mali_ukk_utilization_pp(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_H__ */
diff --git a/mali/common/mali_user_settings_db.c b/mali/common/mali_user_settings_db.c
new file mode 100755
index 0000000..a1a613f
--- /dev/null
+++ b/mali/common/mali_user_settings_db.c
@@ -0,0 +1,147 @@
+/**
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_user_settings_db.h"
+#include "mali_session.h"
+
+static u32 mali_user_settings[_MALI_UK_USER_SETTING_MAX];
+const char *_mali_uk_user_setting_descriptions[] = _MALI_UK_USER_SETTING_DESCRIPTIONS;
+
+static void mali_user_settings_notify(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool done = MALI_FALSE;
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (0 == num_sessions_alloc) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (NULL == notobjs) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_SETTINGS_CHANGED,
+					sizeof(_mali_uk_settings_changed_s));
+			if (NULL != notobjs[i]) {
+				_mali_uk_settings_changed_s *data;
+				data = notobjs[i]->result_buffer;
+
+				data->setting = setting;
+				data->value = value;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about setting change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (NULL != notobjs[used_notification_objects]) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (NULL != notobjs[used_notification_objects]) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool notify = MALI_FALSE;
+
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid user setting %ud\n"));
+		return;
+	}
+
+	if (mali_user_settings[setting] != value) {
+		notify = MALI_TRUE;
+	}
+
+	mali_user_settings[setting] = value;
+
+	if (notify) {
+		mali_user_settings_notify(setting, value);
+	}
+}
+
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting)
+{
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		return 0;
+	}
+
+	return mali_user_settings[setting];
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args)
+{
+	_mali_uk_user_setting_t setting;
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	setting = args->setting;
+
+	if (_MALI_UK_USER_SETTING_MAX > setting) {
+		args->value = mali_user_settings[setting];
+		return _MALI_OSK_ERR_OK;
+	} else {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	_mali_osk_memcpy(args->settings, mali_user_settings, sizeof(mali_user_settings));
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/mali/common/mali_user_settings_db.h b/mali/common/mali_user_settings_db.h
new file mode 100755
index 0000000..6828dc7
--- /dev/null
+++ b/mali/common/mali_user_settings_db.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) 2012-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_USER_SETTINGS_DB_H__
+#define __MALI_USER_SETTINGS_DB_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+
+/** @brief Set Mali user setting in DB
+ *
+ * Update the DB with a new value for \a setting. If the value is different from theprevious set value running sessions will be notified of the change.
+ *
+ * @param setting the setting to be changed
+ * @param value the new value to set
+ */
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value);
+
+/** @brief Get current Mali user setting value from DB
+ *
+ * @param setting the setting to extract
+ * @return the value of the selected setting
+ */
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* __MALI_KERNEL_USER_SETTING__ */
diff --git a/mali/include/linux/mali/mali_utgard.h b/mali/include/linux/mali/mali_utgard.h
new file mode 100755
index 0000000..e58ed1f
--- /dev/null
+++ b/mali/include/linux/mali/mali_utgard.h
@@ -0,0 +1,526 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_utgard.h
+ * Defines types and interface exposed by the Mali Utgard device driver
+ */
+
+#ifndef __MALI_UTGARD_H__
+#define __MALI_UTGARD_H__
+
+#include "mali_osk_types.h"
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#include "mali_pm_metrics.h"
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#endif
+
+#define MALI_GPU_NAME_UTGARD "mali-utgard"
+
+
+#define MALI_OFFSET_GP                    0x00000
+#define MALI_OFFSET_GP_MMU                0x03000
+
+#define MALI_OFFSET_PP0                   0x08000
+#define MALI_OFFSET_PP0_MMU               0x04000
+#define MALI_OFFSET_PP1                   0x0A000
+#define MALI_OFFSET_PP1_MMU               0x05000
+#define MALI_OFFSET_PP2                   0x0C000
+#define MALI_OFFSET_PP2_MMU               0x06000
+#define MALI_OFFSET_PP3                   0x0E000
+#define MALI_OFFSET_PP3_MMU               0x07000
+
+#define MALI_OFFSET_PP4                   0x28000
+#define MALI_OFFSET_PP4_MMU               0x1C000
+#define MALI_OFFSET_PP5                   0x2A000
+#define MALI_OFFSET_PP5_MMU               0x1D000
+#define MALI_OFFSET_PP6                   0x2C000
+#define MALI_OFFSET_PP6_MMU               0x1E000
+#define MALI_OFFSET_PP7                   0x2E000
+#define MALI_OFFSET_PP7_MMU               0x1F000
+
+#define MALI_OFFSET_L2_RESOURCE0          0x01000
+#define MALI_OFFSET_L2_RESOURCE1          0x10000
+#define MALI_OFFSET_L2_RESOURCE2          0x11000
+
+#define MALI400_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE1
+#define MALI450_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE2          MALI_OFFSET_L2_RESOURCE2
+#define MALI470_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+
+#define MALI_OFFSET_BCAST                 0x13000
+#define MALI_OFFSET_DLBU                  0x14000
+
+#define MALI_OFFSET_PP_BCAST              0x16000
+#define MALI_OFFSET_PP_BCAST_MMU          0x15000
+
+#define MALI_OFFSET_PMU                   0x02000
+#define MALI_OFFSET_DMA                   0x12000
+
+/* Mali-300 */
+
+#define MALI_GPU_RESOURCES_MALI300(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI300_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+/* Mali-400 */
+
+#define MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali-450 */
+#define MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI450_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP4, pp3_irq, base_addr + MALI_OFFSET_PP4_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP5, pp4_irq, base_addr + MALI_OFFSET_PP5_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP6, pp5_irq, base_addr + MALI_OFFSET_PP6_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP6_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP4, pp4_irq, base_addr + MALI_OFFSET_PP4_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP5, pp5_irq, base_addr + MALI_OFFSET_PP5_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(6, base_addr + MALI_OFFSET_PP6, pp6_irq, base_addr + MALI_OFFSET_PP6_MMU, pp6_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(7, base_addr + MALI_OFFSET_PP7, pp7_irq, base_addr + MALI_OFFSET_PP7_MMU, pp7_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP8_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali - 470 */
+#define MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCE_L2(addr) \
+	{ \
+		.name = "Mali_L2", \
+			.flags = IORESOURCE_MEM, \
+				 .start = addr, \
+					  .end   = addr + 0x200, \
+	},
+
+#define MALI_GPU_RESOURCE_GP(gp_addr, gp_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_GP_WITH_MMU(gp_addr, gp_irq, gp_mmu_addr, gp_mmu_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_mmu_addr, \
+					  .end =   gp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_mmu_irq, \
+					  .end =   gp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PP(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_WITH_MMU(id, pp_addr, pp_irq, pp_mmu_addr, pp_mmu_irq) \
+	{ \
+		.name = "Mali_PP" #id, \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_addr, \
+					  .end =   pp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_mmu_irq, \
+					  .end =   pp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_MMU(mmu_addr, mmu_irq) \
+	{ \
+		.name = "Mali_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = mmu_addr, \
+					  .end =   mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = mmu_irq, \
+					  .end =   mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PMU(pmu_addr) \
+	{ \
+		.name = "Mali_PMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pmu_addr, \
+					  .end =   pmu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DMA(dma_addr) \
+	{ \
+		.name = "Mali_DMA", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dma_addr, \
+					  .end = dma_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DLBU(dlbu_addr) \
+	{ \
+		.name = "Mali_DLBU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dlbu_addr, \
+					  .end = dlbu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_BCAST(bcast_addr) \
+	{ \
+		.name = "Mali_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = bcast_addr, \
+					  .end = bcast_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_PP_BCAST(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_Broadcast_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_MMU_BCAST(pp_mmu_bcast_addr) \
+	{ \
+		.name = "Mali_PP_MMU_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_bcast_addr, \
+					  .end = pp_mmu_bcast_addr + 0x100, \
+	},
+
+	struct mali_gpu_utilization_data {
+		unsigned int utilization_gpu; /* Utilization for GP and all PP cores combined, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_gp;  /* Utilization for GP core only, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_pp;  /* Utilization for all PP cores combined, 0 = no utilization, 256 = full utilization */
+	};
+
+	struct mali_gpu_clk_item {
+		unsigned int clock; /* unit(MHz) */
+		unsigned int vol;
+	};
+
+	struct mali_gpu_clock {
+		struct mali_gpu_clk_item *item;
+		unsigned int num_of_steps;
+	};
+
+	struct mali_gpu_device_data {
+		/* Shared GPU memory */
+		unsigned long shared_mem_size;
+
+		/*
+		 * Mali PMU switch delay.
+		 * Only needed if the power gates are connected to the PMU in a high fanout
+		 * network. This value is the number of Mali clock cycles it takes to
+		 * enable the power gates and turn on the power mesh.
+		 * This value will have no effect if a daisy chain implementation is used.
+		 */
+		u32 pmu_switch_delay;
+
+		/* Mali Dynamic power domain configuration in sequence from 0-11
+		 *  GP  PP0 PP1  PP2  PP3  PP4  PP5  PP6  PP7, L2$0 L2$1 L2$2
+		 */
+		u16 pmu_domain_config[12];
+
+		/* Dedicated GPU memory range (physical). */
+		unsigned long dedicated_mem_start;
+		unsigned long dedicated_mem_size;
+
+		/* Frame buffer memory to be accessible by Mali GPU (physical) */
+		unsigned long fb_start;
+		unsigned long fb_size;
+
+		/* Max runtime [ms] for jobs */
+		int max_job_runtime;
+
+		/* Report GPU utilization and related control in this interval (specified in ms) */
+		unsigned long control_interval;
+
+		/* Function that will receive periodic GPU utilization numbers */
+		void (*utilization_callback)(struct mali_gpu_utilization_data *data);
+
+		/* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */
+		int (*set_freq)(int setting_clock_step);
+		/* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */
+		void (*get_clock_info)(struct mali_gpu_clock **data);
+		/* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */
+		int (*get_freq)(void);
+		/* Function that init the mali gpu secure mode */
+		int (*secure_mode_init)(void);
+		/* Function that deinit the mali gpu secure mode */
+		void (*secure_mode_deinit)(void);
+		/* Function that reset GPU and enable gpu secure mode */
+		int (*gpu_reset_and_secure_mode_enable)(void);
+		/* Function that Reset GPU and disable gpu secure mode */
+		int (*gpu_reset_and_secure_mode_disable)(void);
+		/* ipa related interface customer need register */
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+		struct devfreq_cooling_power *gpu_cooling_ops;
+#endif
+	};
+
+	/**
+	 * Pause the scheduling and power state changes of Mali device driver.
+	 * mali_dev_resume() must always be called as soon as possible after this function
+	 * in order to resume normal operation of the Mali driver.
+	 */
+	void mali_dev_pause(void);
+
+	/**
+	 * Resume scheduling and allow power changes in Mali device driver.
+	 * This must always be called after mali_dev_pause().
+	 */
+	void mali_dev_resume(void);
+
+	/** @brief Set the desired number of PP cores to use.
+	 *
+	 * The internal Mali PMU will be used, if present, to physically power off the PP cores.
+	 *
+	 * @param num_cores The number of desired cores
+	 * @return 0 on success, otherwise error. -EINVAL means an invalid number of cores was specified.
+	 */
+	int mali_perf_set_num_pp_cores(unsigned int num_cores);
+
+#endif
diff --git a/mali/include/linux/mali/mali_utgard_ioctl.h b/mali/include/linux/mali/mali_utgard_ioctl.h
new file mode 100755
index 0000000..dfe152b
--- /dev/null
+++ b/mali/include/linux/mali/mali_utgard_ioctl.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __MALI_UTGARD_IOCTL_H__
+#define __MALI_UTGARD_IOCTL_H__
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/fs.h>       /* file system operations */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file mali_kernel_ioctl.h
+ * Interface to the Linux device driver.
+ * This file describes the interface needed to use the Linux device driver.
+ * Its interface is designed to used by the HAL implementation through a thin arch layer.
+ */
+
+/**
+ * ioctl commands
+ */
+
+#define MALI_IOC_BASE           0x82
+#define MALI_IOC_CORE_BASE      (_MALI_UK_CORE_SUBSYSTEM      + MALI_IOC_BASE)
+#define MALI_IOC_MEMORY_BASE    (_MALI_UK_MEMORY_SUBSYSTEM    + MALI_IOC_BASE)
+#define MALI_IOC_PP_BASE        (_MALI_UK_PP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_GP_BASE        (_MALI_UK_GP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_PROFILING_BASE (_MALI_UK_PROFILING_SUBSYSTEM + MALI_IOC_BASE)
+#define MALI_IOC_VSYNC_BASE     (_MALI_UK_VSYNC_SUBSYSTEM + MALI_IOC_BASE)
+
+#define MALI_IOC_WAIT_FOR_NOTIFICATION      _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_WAIT_FOR_NOTIFICATION, _mali_uk_wait_for_notification_s)
+#define MALI_IOC_GET_API_VERSION            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, u32)
+#define MALI_IOC_GET_API_VERSION_V2         _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, _mali_uk_get_api_version_v2_s)
+#define MALI_IOC_POST_NOTIFICATION          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_POST_NOTIFICATION, _mali_uk_post_notification_s)
+#define MALI_IOC_GET_USER_SETTING           _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTING, _mali_uk_get_user_setting_s)
+#define MALI_IOC_GET_USER_SETTINGS          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTINGS, _mali_uk_get_user_settings_s)
+#define MALI_IOC_REQUEST_HIGH_PRIORITY      _IOW (MALI_IOC_CORE_BASE, _MALI_UK_REQUEST_HIGH_PRIORITY, _mali_uk_request_high_priority_s)
+#define MALI_IOC_TIMELINE_GET_LATEST_POINT  _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_GET_LATEST_POINT, _mali_uk_timeline_get_latest_point_s)
+#define MALI_IOC_TIMELINE_WAIT              _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_WAIT, _mali_uk_timeline_wait_s)
+#define MALI_IOC_TIMELINE_CREATE_SYNC_FENCE _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_CREATE_SYNC_FENCE, _mali_uk_timeline_create_sync_fence_s)
+#define MALI_IOC_SOFT_JOB_START             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_START, _mali_uk_soft_job_start_s)
+#define MALI_IOC_SOFT_JOB_SIGNAL            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_SIGNAL, _mali_uk_soft_job_signal_s)
+#define MALI_IOC_PENDING_SUBMIT             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_PENDING_SUBMIT, _mali_uk_pending_submit_s)
+
+#define MALI_IOC_MEM_ALLOC                  _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_ALLOC_MEM, _mali_uk_alloc_mem_s)
+#define MALI_IOC_MEM_FREE                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_FREE_MEM, _mali_uk_free_mem_s)
+#define MALI_IOC_MEM_BIND                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_BIND_MEM, _mali_uk_bind_mem_s)
+#define MALI_IOC_MEM_UNBIND                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_UNBIND_MEM, _mali_uk_unbind_mem_s)
+#define MALI_IOC_MEM_COW                    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MEM, _mali_uk_cow_mem_s)
+#define MALI_IOC_MEM_COW_MODIFY_RANGE       _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MODIFY_RANGE, _mali_uk_cow_modify_range_s)
+#define MALI_IOC_MEM_RESIZE                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_RESIZE_MEM, _mali_uk_mem_resize_s)
+#define MALI_IOC_MEM_DMA_BUF_GET_SIZE       _IOR(MALI_IOC_MEMORY_BASE, _MALI_UK_DMA_BUF_GET_SIZE, _mali_uk_dma_buf_get_size_s)
+#define MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE _IOR (MALI_IOC_MEMORY_BASE, _MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, _mali_uk_query_mmu_page_table_dump_size_s)
+#define MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_DUMP_MMU_PAGE_TABLE, _mali_uk_dump_mmu_page_table_s)
+#define MALI_IOC_MEM_WRITE_SAFE             _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_MEM_WRITE_SAFE, _mali_uk_mem_write_safe_s)
+
+#define MALI_IOC_PP_START_JOB               _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_START_JOB, _mali_uk_pp_start_job_s)
+#define MALI_IOC_PP_AND_GP_START_JOB        _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_AND_GP_START_JOB, _mali_uk_pp_and_gp_start_job_s)
+#define MALI_IOC_PP_NUMBER_OF_CORES_GET     _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_NUMBER_OF_CORES, _mali_uk_get_pp_number_of_cores_s)
+#define MALI_IOC_PP_CORE_VERSION_GET        _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_CORE_VERSION, _mali_uk_get_pp_core_version_s)
+#define MALI_IOC_PP_DISABLE_WB              _IOW (MALI_IOC_PP_BASE, _MALI_UK_PP_DISABLE_WB, _mali_uk_pp_disable_wb_s)
+
+#define MALI_IOC_GP2_START_JOB              _IOWR(MALI_IOC_GP_BASE, _MALI_UK_GP_START_JOB, _mali_uk_gp_start_job_s)
+#define MALI_IOC_GP2_NUMBER_OF_CORES_GET    _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_NUMBER_OF_CORES, _mali_uk_get_gp_number_of_cores_s)
+#define MALI_IOC_GP2_CORE_VERSION_GET       _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_CORE_VERSION, _mali_uk_get_gp_core_version_s)
+#define MALI_IOC_GP2_SUSPEND_RESPONSE       _IOW (MALI_IOC_GP_BASE, _MALI_UK_GP_SUSPEND_RESPONSE,_mali_uk_gp_suspend_response_s)
+
+#define MALI_IOC_PROFILING_ADD_EVENT        _IOWR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_ADD_EVENT, _mali_uk_profiling_add_event_s)
+#define MALI_IOC_PROFILING_REPORT_SW_COUNTERS  _IOW (MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_REPORT_SW_COUNTERS, _mali_uk_sw_counters_report_s)
+#define MALI_IOC_PROFILING_MEMORY_USAGE_GET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_MEMORY_USAGE_GET, _mali_uk_profiling_memory_usage_get_s)
+#define MALI_IOC_PROFILING_STREAM_FD_GET        _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_STREAM_FD_GET, _mali_uk_profiling_stream_fd_get_s)
+#define MALI_IOC_PROILING_CONTROL_SET   _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_CONTROL_SET, _mali_uk_profiling_control_set_s)
+
+#define MALI_IOC_VSYNC_EVENT_REPORT         _IOW (MALI_IOC_VSYNC_BASE, _MALI_UK_VSYNC_EVENT_REPORT, _mali_uk_vsync_event_report_s)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_IOCTL_H__ */
diff --git a/mali/include/linux/mali/mali_utgard_profiling_events.h b/mali/include/linux/mali/mali_utgard_profiling_events.h
new file mode 100755
index 0000000..f505e91
--- /dev/null
+++ b/mali/include/linux/mali/mali_utgard_profiling_events.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef _MALI_UTGARD_PROFILING_EVENTS_H_
+#define _MALI_UTGARD_PROFILING_EVENTS_H_
+
+/*
+ * The event ID is a 32 bit value consisting of different fields
+ * reserved, 4 bits, for future use
+ * event type, 4 bits, cinstr_profiling_event_type_t
+ * event channel, 8 bits, the source of the event.
+ * event data, 16 bit field, data depending on event type
+ */
+
+/**
+ * Specifies what kind of event this is
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_TYPE_SINGLE  = 0 << 24,
+	MALI_PROFILING_EVENT_TYPE_START   = 1 << 24,
+	MALI_PROFILING_EVENT_TYPE_STOP    = 2 << 24,
+	MALI_PROFILING_EVENT_TYPE_SUSPEND = 3 << 24,
+	MALI_PROFILING_EVENT_TYPE_RESUME  = 4 << 24,
+} cinstr_profiling_event_type_t;
+
+
+/**
+ * Secifies the channel/source of the event
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_CHANNEL_SOFTWARE =  0 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GP0      =  1 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP0      =  5 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP1      =  6 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP2      =  7 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP3      =  8 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP4      =  9 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP5      = 10 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP6      = 11 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP7      = 12 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GPU      = 21 << 16,
+} cinstr_profiling_event_channel_t;
+
+
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(num) (((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) + (num)) << 16)
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(num) (((MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) + (num)) << 16)
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_NONE                  = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_NEW_FRAME         = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FLUSH                 = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SWAP_BUFFERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FB_EVENT              = 4,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE            = 5,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE            = 6,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_READBACK              = 7,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_WRITEBACK             = 8,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_ENTER_API_FUNC        = 10,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC        = 11,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_DISCARD_ATTACHMENTS   = 13,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_TRY_LOCK          = 53,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_LOCK              = 54,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_UNLOCK            = 55,
+	MALI_PROFILING_EVENT_REASON_SINGLE_LOCK_CONTENDED           = 56,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_MALI_FENCE_DUP    = 57,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SET_PP_JOB_FENCE  = 58,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_WAIT_SYNC         = 59,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_FENCE_SYNC = 60,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_NATIVE_FENCE_SYNC = 61,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FENCE_FLUSH       = 62,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FLUSH_SERVER_WAITS = 63,
+} cinstr_profiling_event_reason_single_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ * to inform whether the core is physical or virtual
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL  = 0,
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL   = 1,
+} cinstr_profiling_event_reason_start_stop_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ */
+typedef enum {
+	/*MALI_PROFILING_EVENT_REASON_START_STOP_SW_NONE            = 0,*/
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_MALI            = 1,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_CALLBACK_THREAD = 2,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_WORKER_THREAD   = 3,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF     = 4,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF      = 5,
+} cinstr_profiling_event_reason_start_stop_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SUSPEND/RESUME is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_NONE                     =  0, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PIPELINE_FULL            =  1, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC                    = 26, /* used in some build configurations */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_WAIT           = 27, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_SYNC           = 28, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_FILTER_CLEANUP   = 29, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_TEXTURE          = 30, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_MIPLEVEL       = 31, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_READPIXELS     = 32, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SWAP_IMMEDIATE  = 33, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_QUEUE_BUFFER         = 34, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_DEQUEUE_BUFFER       = 35, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_UMP_LOCK                 = 36, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_GLOBAL_LOCK          = 37, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_SWAP                 = 38, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_MALI_EGL_IMAGE_SYNC_WAIT = 39, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GP_JOB_HANDLING          = 40, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PP_JOB_HANDLING          = 41, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_MERGE     = 42, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_DUP       = 43,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_FLUSH_SERVER_WAITS   = 44,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SYNC            = 45, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_JOBS_WAIT             = 46, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOFRAMES_WAIT         = 47, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOJOBS_WAIT           = 48, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_SUBMIT_LIMITER_WAIT      = 49, /* USED */
+} cinstr_profiling_event_reason_suspend_resume_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from a HW channel (GPx+PPx)
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_NONE          = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_INTERRUPT     = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH         = 2,
+} cinstr_profiling_event_reason_single_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_NONE              = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE  = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS      = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS      = 4,
+} cinstr_profiling_event_reason_single_gpu_t;
+
+/**
+ * These values are applicable for the 3rd data parameter when
+ * the type MALI_PROFILING_EVENT_TYPE_START is used from the software channel
+ * with the MALI_PROFILING_EVENT_REASON_START_STOP_BOTTOM_HALF reason.
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_DATA_CORE_GP0             =  1,
+	MALI_PROFILING_EVENT_DATA_CORE_PP0             =  5,
+	MALI_PROFILING_EVENT_DATA_CORE_PP1             =  6,
+	MALI_PROFILING_EVENT_DATA_CORE_PP2             =  7,
+	MALI_PROFILING_EVENT_DATA_CORE_PP3             =  8,
+	MALI_PROFILING_EVENT_DATA_CORE_PP4             =  9,
+	MALI_PROFILING_EVENT_DATA_CORE_PP5             = 10,
+	MALI_PROFILING_EVENT_DATA_CORE_PP6             = 11,
+	MALI_PROFILING_EVENT_DATA_CORE_PP7             = 12,
+	MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU         = 22, /* GP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU         = 26, /* PP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP1_MMU         = 27, /* PP1 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP2_MMU         = 28, /* PP2 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP3_MMU         = 29, /* PP3 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP4_MMU         = 30, /* PP4 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP5_MMU         = 31, /* PP5 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP6_MMU         = 32, /* PP6 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP7_MMU         = 33, /* PP7 + 21 */
+
+} cinstr_profiling_event_data_core_t;
+
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU + (num))
+
+
+#endif /*_MALI_UTGARD_PROFILING_EVENTS_H_*/
diff --git a/mali/include/linux/mali/mali_utgard_profiling_gator_api.h b/mali/include/linux/mali/mali_utgard_profiling_gator_api.h
new file mode 100755
index 0000000..c82d8b1
--- /dev/null
+++ b/mali/include/linux/mali/mali_utgard_profiling_gator_api.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2013, 2015-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__
+#define __MALI_UTGARD_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_PROFILING_API_VERSION 4
+
+#define MAX_NUM_L2_CACHE_CORES 3
+#define MAX_NUM_FP_CORES 8
+#define MAX_NUM_VP_CORES 1
+
+#define _MALI_SPCIAL_COUNTER_DESCRIPTIONS \
+	{                                           \
+		"Filmstrip_cnt0",                 \
+		"Frequency",       \
+		"Voltage",       \
+		"vertex",     \
+		"fragment",         \
+		"Total_alloc_pages",        \
+	};
+
+#define _MALI_MEM_COUTNER_DESCRIPTIONS \
+	{                                           \
+		"untyped_memory",                 \
+		"vertex_index_buffer",       \
+		"texture_buffer",       \
+		"varying_buffer",     \
+		"render_target",         \
+		"pbuffer_buffer",        \
+		"plbu_heap",            \
+		"pointer_array_buffer",             \
+		"slave_tilelist",          \
+		"untyped_gp_cmdlist",     \
+		"polygon_cmdlist",               \
+		"texture_descriptor",               \
+		"render_state_word",               \
+		"shader",               \
+		"stream_buffer",               \
+		"fragment_stack",               \
+		"uniform",               \
+		"untyped_frame_pool",               \
+		"untyped_surface",               \
+	};
+
+/** The list of events supported by the Mali DDK. */
+typedef enum {
+	/* Vertex processor activity */
+	ACTIVITY_VP_0 = 0,
+
+	/* Fragment processor activity */
+	ACTIVITY_FP_0,
+	ACTIVITY_FP_1,
+	ACTIVITY_FP_2,
+	ACTIVITY_FP_3,
+	ACTIVITY_FP_4,
+	ACTIVITY_FP_5,
+	ACTIVITY_FP_6,
+	ACTIVITY_FP_7,
+
+	/* L2 cache counters */
+	COUNTER_L2_0_C0,
+	COUNTER_L2_0_C1,
+	COUNTER_L2_1_C0,
+	COUNTER_L2_1_C1,
+	COUNTER_L2_2_C0,
+	COUNTER_L2_2_C1,
+
+	/* Vertex processor counters */
+	COUNTER_VP_0_C0,
+	COUNTER_VP_0_C1,
+
+	/* Fragment processor counters */
+	COUNTER_FP_0_C0,
+	COUNTER_FP_0_C1,
+	COUNTER_FP_1_C0,
+	COUNTER_FP_1_C1,
+	COUNTER_FP_2_C0,
+	COUNTER_FP_2_C1,
+	COUNTER_FP_3_C0,
+	COUNTER_FP_3_C1,
+	COUNTER_FP_4_C0,
+	COUNTER_FP_4_C1,
+	COUNTER_FP_5_C0,
+	COUNTER_FP_5_C1,
+	COUNTER_FP_6_C0,
+	COUNTER_FP_6_C1,
+	COUNTER_FP_7_C0,
+	COUNTER_FP_7_C1,
+
+	/*
+	 * If more hardware counters are added, the _mali_osk_hw_counter_table
+	 * below should also be updated.
+	 */
+
+	/* EGL software counters */
+	COUNTER_EGL_BLIT_TIME,
+
+	/* GLES software counters */
+	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_ARRAYS_CALLS,
+	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_POINTS,
+	COUNTER_GLES_DRAW_LINES,
+	COUNTER_GLES_DRAW_LINE_LOOP,
+	COUNTER_GLES_DRAW_LINE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLES,
+	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLE_FAN,
+	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+	COUNTER_GLES_UPLOAD_VBO_TIME,
+	COUNTER_GLES_NUM_FLUSHES,
+	COUNTER_GLES_NUM_VSHADERS_GENERATED,
+	COUNTER_GLES_NUM_FSHADERS_GENERATED,
+	COUNTER_GLES_VSHADER_GEN_TIME,
+	COUNTER_GLES_FSHADER_GEN_TIME,
+	COUNTER_GLES_INPUT_TRIANGLES,
+	COUNTER_GLES_VXCACHE_HIT,
+	COUNTER_GLES_VXCACHE_MISS,
+	COUNTER_GLES_VXCACHE_COLLISION,
+	COUNTER_GLES_CULLED_TRIANGLES,
+	COUNTER_GLES_CULLED_LINES,
+	COUNTER_GLES_BACKFACE_TRIANGLES,
+	COUNTER_GLES_GBCLIP_TRIANGLES,
+	COUNTER_GLES_GBCLIP_LINES,
+	COUNTER_GLES_TRIANGLES_DRAWN,
+	COUNTER_GLES_DRAWCALL_TIME,
+	COUNTER_GLES_TRIANGLES_COUNT,
+	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+	COUNTER_GLES_FAN_TRIANGLES_COUNT,
+	COUNTER_GLES_LINES_COUNT,
+	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+	COUNTER_GLES_STRIP_LINES_COUNT,
+	COUNTER_GLES_LOOP_LINES_COUNT,
+
+	/* Special counter */
+
+	/* Framebuffer capture pseudo-counter */
+	COUNTER_FILMSTRIP,
+	COUNTER_FREQUENCY,
+	COUNTER_VOLTAGE,
+	COUNTER_VP_ACTIVITY,
+	COUNTER_FP_ACTIVITY,
+	COUNTER_TOTAL_ALLOC_PAGES,
+
+	/* Memory usage counter */
+	COUNTER_MEM_UNTYPED,
+	COUNTER_MEM_VB_IB,
+	COUNTER_MEM_TEXTURE,
+	COUNTER_MEM_VARYING,
+	COUNTER_MEM_RT,
+	COUNTER_MEM_PBUFFER,
+	/* memory usages for gp command */
+	COUNTER_MEM_PLBU_HEAP,
+	COUNTER_MEM_POINTER_ARRAY,
+	COUNTER_MEM_SLAVE_TILELIST,
+	COUNTER_MEM_UNTYPE_GP_CMDLIST,
+	/* memory usages for polygon list command */
+	COUNTER_MEM_POLYGON_CMDLIST,
+	/* memory usages for pp command */
+	COUNTER_MEM_TD,
+	COUNTER_MEM_RSW,
+	/* other memory usages */
+	COUNTER_MEM_SHADER,
+	COUNTER_MEM_STREAMS,
+	COUNTER_MEM_FRAGMENT_STACK,
+	COUNTER_MEM_UNIFORM,
+	/* Special mem usage, which is used for mem pool allocation */
+	COUNTER_MEM_UNTYPE_MEM_POOL,
+	COUNTER_MEM_UNTYPE_SURFACE,
+
+	NUMBER_OF_EVENTS
+} _mali_osk_counter_id;
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_7
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_7_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+#define FIRST_SPECIAL_COUNTER   COUNTER_FILMSTRIP
+#define LAST_SPECIAL_COUNTER    COUNTER_TOTAL_ALLOC_PAGES
+
+#define FIRST_MEM_COUNTER               COUNTER_MEM_UNTYPED
+#define LAST_MEM_COUNTER                COUNTER_MEM_UNTYPE_SURFACE
+
+#define MALI_PROFILING_MEM_COUNTERS_NUM (LAST_MEM_COUNTER - FIRST_MEM_COUNTER + 1)
+#define MALI_PROFILING_SPECIAL_COUNTERS_NUM     (LAST_SPECIAL_COUNTER - FIRST_SPECIAL_COUNTER + 1)
+#define MALI_PROFILING_SW_COUNTERS_NUM  (LAST_SW_COUNTER - FIRST_SW_COUNTER + 1)
+
+/**
+ * Define the stream header type for porfiling stream.
+ */
+#define  STREAM_HEADER_FRAMEBUFFER 0x05         /* The stream packet header type for framebuffer dumping. */
+#define STREAM_HEADER_COUNTER_VALUE  0x09       /* The stream packet header type for hw/sw/memory counter sampling. */
+#define STREAM_HEADER_CORE_ACTIVITY 0x0a                /* The stream packet header type for activity counter sampling. */
+#define STREAM_HEADER_SIZE      5
+
+/**
+ * Define the packet header type of profiling control packet.
+ */
+#define PACKET_HEADER_ERROR            0x80             /* The response packet header type if error. */
+#define PACKET_HEADER_ACK              0x81             /* The response packet header type if OK. */
+#define PACKET_HEADER_COUNTERS_REQUEST 0x82             /* The control packet header type to request counter information from ddk. */
+#define PACKET_HEADER_COUNTERS_ACK         0x83         /* The response packet header type to send out counter information. */
+#define PACKET_HEADER_COUNTERS_ENABLE  0x84             /* The control packet header type to enable counters. */
+#define PACKET_HEADER_START_CAPTURE_VALUE            0x85               /* The control packet header type to start capture values. */
+
+#define PACKET_HEADER_SIZE      5
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters {
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+} _mali_profiling_core_counters;
+
+/**
+ * Structure to pass performance counter data of Mali L2 cache cores
+ */
+typedef struct _mali_profiling_l2_counter_values {
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+/**
+ * Structure to pass data defining Mali instance in use:
+ *
+ * mali_product_id - Mali product id
+ * mali_version_major - Mali version major number
+ * mali_version_minor - Mali version minor number
+ * num_of_l2_cores - number of L2 cache cores
+ * num_of_fp_cores - number of fragment processor cores
+ * num_of_vp_cores - number of vertex processor cores
+ */
+typedef struct _mali_profiling_mali_version {
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+/**
+ * Structure to define the mali profiling counter struct.
+ */
+typedef struct mali_profiling_counter {
+	char counter_name[40];
+	u32 counter_id;
+	u32 counter_event;
+	u32 prev_counter_value;
+	u32 current_counter_value;
+	u32 key;
+	int enabled;
+} mali_profiling_counter;
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define MEM_COUNTER_ENABLE (5)
+#define ANNOTATE_PROFILING_ENABLE (6)
+
+void _mali_profiling_control(u32 action, u32 value);
+
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+int _mali_profiling_set_event(u32 counter_id, s32 event_id);
+
+u32 _mali_profiling_get_api_version(void);
+
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */
diff --git a/mali/include/linux/mali/mali_utgard_uk_types.h b/mali/include/linux/mali/mali_utgard_uk_types.h
new file mode 100755
index 0000000..99835a5
--- /dev/null
+++ b/mali/include/linux/mali/mali_utgard_uk_types.h
@@ -0,0 +1,1100 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+/**
+ * @file mali_uk_types.h
+ * Defines the types and constants used in the user-kernel interface
+ */
+
+#ifndef __MALI_UTGARD_UK_TYPES_H__
+#define __MALI_UTGARD_UK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Iteration functions depend on these values being consecutive. */
+#define MALI_UK_TIMELINE_GP   0
+#define MALI_UK_TIMELINE_PP   1
+#define MALI_UK_TIMELINE_SOFT 2
+#define MALI_UK_TIMELINE_MAX  3
+
+#define MALI_UK_BIG_VARYING_SIZE  (1024*1024*2)
+
+typedef struct {
+	u32 points[MALI_UK_TIMELINE_MAX];
+	s32 sync_fd;
+} _mali_uk_fence_t;
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_uk_core U/K Core
+ * @{ */
+
+/** Definition of subsystem numbers, to assist in creating a unique identifier
+ * for each U/K call.
+ *
+ * @see _mali_uk_functions */
+typedef enum {
+	_MALI_UK_CORE_SUBSYSTEM,      /**< Core Group of U/K calls */
+	_MALI_UK_MEMORY_SUBSYSTEM,    /**< Memory Group of U/K calls */
+	_MALI_UK_PP_SUBSYSTEM,        /**< Fragment Processor Group of U/K calls */
+	_MALI_UK_GP_SUBSYSTEM,        /**< Vertex Processor Group of U/K calls */
+	_MALI_UK_PROFILING_SUBSYSTEM, /**< Profiling Group of U/K calls */
+	_MALI_UK_VSYNC_SUBSYSTEM,     /**< VSYNC Group of U/K calls */
+} _mali_uk_subsystem_t;
+
+/** Within a function group each function has its unique sequence number
+ * to assist in creating a unique identifier for each U/K call.
+ *
+ * An ordered pair of numbers selected from
+ * ( \ref _mali_uk_subsystem_t,\ref  _mali_uk_functions) will uniquely identify the
+ * U/K call across all groups of functions, and all functions. */
+typedef enum {
+	/** Core functions */
+
+	_MALI_UK_OPEN                    = 0, /**< _mali_ukk_open() */
+	_MALI_UK_CLOSE,                       /**< _mali_ukk_close() */
+	_MALI_UK_WAIT_FOR_NOTIFICATION,       /**< _mali_ukk_wait_for_notification() */
+	_MALI_UK_GET_API_VERSION,             /**< _mali_ukk_get_api_version() */
+	_MALI_UK_POST_NOTIFICATION,           /**< _mali_ukk_post_notification() */
+	_MALI_UK_GET_USER_SETTING,            /**< _mali_ukk_get_user_setting() *//**< [out] */
+	_MALI_UK_GET_USER_SETTINGS,           /**< _mali_ukk_get_user_settings() *//**< [out] */
+	_MALI_UK_REQUEST_HIGH_PRIORITY,       /**< _mali_ukk_request_high_priority() */
+	_MALI_UK_TIMELINE_GET_LATEST_POINT,   /**< _mali_ukk_timeline_get_latest_point() */
+	_MALI_UK_TIMELINE_WAIT,               /**< _mali_ukk_timeline_wait() */
+	_MALI_UK_TIMELINE_CREATE_SYNC_FENCE,  /**< _mali_ukk_timeline_create_sync_fence() */
+	_MALI_UK_SOFT_JOB_START,              /**< _mali_ukk_soft_job_start() */
+	_MALI_UK_SOFT_JOB_SIGNAL,             /**< _mali_ukk_soft_job_signal() */
+	_MALI_UK_PENDING_SUBMIT,             /**< _mali_ukk_pending_submit() */
+
+	/** Memory functions */
+
+	_MALI_UK_ALLOC_MEM                = 0,   /**< _mali_ukk_alloc_mem() */
+	_MALI_UK_FREE_MEM,                       /**< _mali_ukk_free_mem() */
+	_MALI_UK_BIND_MEM,                       /**< _mali_ukk_mem_bind() */
+	_MALI_UK_UNBIND_MEM,                     /**< _mali_ukk_mem_unbind() */
+	_MALI_UK_COW_MEM,                        /**< _mali_ukk_mem_cow() */
+	_MALI_UK_COW_MODIFY_RANGE,               /**< _mali_ukk_mem_cow_modify_range() */
+	_MALI_UK_RESIZE_MEM,                     /**<._mali_ukk_mem_resize() */
+	_MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, /**< _mali_ukk_mem_get_mmu_page_table_dump_size() */
+	_MALI_UK_DUMP_MMU_PAGE_TABLE,            /**< _mali_ukk_mem_dump_mmu_page_table() */
+	_MALI_UK_DMA_BUF_GET_SIZE,               /**< _mali_ukk_dma_buf_get_size() */
+	_MALI_UK_MEM_WRITE_SAFE,                 /**< _mali_uku_mem_write_safe() */
+
+	/** Common functions for each core */
+
+	_MALI_UK_START_JOB           = 0,     /**< Start a Fragment/Vertex Processor Job on a core */
+	_MALI_UK_GET_NUMBER_OF_CORES,         /**< Get the number of Fragment/Vertex Processor cores */
+	_MALI_UK_GET_CORE_VERSION,            /**< Get the Fragment/Vertex Processor version compatible with all cores */
+
+	/** Fragment Processor Functions  */
+
+	_MALI_UK_PP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_pp_start_job() */
+	_MALI_UK_GET_PP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_pp_number_of_cores() */
+	_MALI_UK_GET_PP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_pp_core_version() */
+	_MALI_UK_PP_DISABLE_WB,                                           /**< _mali_ukk_pp_job_disable_wb() */
+	_MALI_UK_PP_AND_GP_START_JOB,                                     /**< _mali_ukk_pp_and_gp_start_job() */
+
+	/** Vertex Processor Functions  */
+
+	_MALI_UK_GP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_gp_start_job() */
+	_MALI_UK_GET_GP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_gp_number_of_cores() */
+	_MALI_UK_GET_GP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_gp_core_version() */
+	_MALI_UK_GP_SUSPEND_RESPONSE,                                     /**< _mali_ukk_gp_suspend_response() */
+
+	/** Profiling functions */
+
+	_MALI_UK_PROFILING_ADD_EVENT     = 0, /**< __mali_uku_profiling_add_event() */
+	_MALI_UK_PROFILING_REPORT_SW_COUNTERS,/**< __mali_uku_profiling_report_sw_counters() */
+	_MALI_UK_PROFILING_MEMORY_USAGE_GET,  /**< __mali_uku_profiling_memory_usage_get() */
+	_MALI_UK_PROFILING_STREAM_FD_GET, /** < __mali_uku_profiling_stream_fd_get() */
+	_MALI_UK_PROFILING_CONTROL_SET, /** < __mali_uku_profiling_control_set() */
+
+	/** VSYNC reporting fuctions */
+	_MALI_UK_VSYNC_EVENT_REPORT      = 0, /**< _mali_ukk_vsync_event_report() */
+} _mali_uk_functions;
+
+/** @defgroup _mali_uk_getsysteminfo U/K Get System Info
+ * @{ */
+
+/**
+ * Type definition for the core version number.
+ * Used when returning the version number read from a core
+ *
+ * Its format is that of the 32-bit Version register for a particular core.
+ * Refer to the "Mali200 and MaliGP2 3D Graphics Processor Technical Reference
+ * Manual", ARM DDI 0415C, for more information.
+ */
+typedef u32 _mali_core_version;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @defgroup _mali_uk_gp_suspend_response_s Vertex Processor Suspend Response
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_gp_suspend_response()
+ *
+ * When _mali_wait_for_notification() receives notification that a
+ * Vertex Processor job was suspended, you need to send a response to indicate
+ * what needs to happen with this job. You can either abort or resume the job.
+ *
+ * - set @c code to indicate response code. This is either @c _MALIGP_JOB_ABORT or
+ * @c _MALIGP_JOB_RESUME_WITH_NEW_HEAP to indicate you will provide a new heap
+ * for the job that will resolve the out of memory condition for the job.
+ * - copy the @c cookie value from the @c _mali_uk_gp_job_suspended_s notification;
+ * this is an identifier for the suspended job
+ * - set @c arguments[0] and @c arguments[1] to zero if you abort the job. If
+ * you resume it, @c argument[0] should specify the Mali start address for the new
+ * heap and @c argument[1] the Mali end address of the heap.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ */
+typedef enum _maligp_job_suspended_response_code {
+	_MALIGP_JOB_ABORT,                  /**< Abort the Vertex Processor job */
+	_MALIGP_JOB_RESUME_WITH_NEW_HEAP    /**< Resume the Vertex Processor job with a new heap */
+} _maligp_job_suspended_response_code;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 cookie;                     /**< [in] cookie from the _mali_uk_gp_job_suspended_s notification */
+	_maligp_job_suspended_response_code code; /**< [in] abort or resume response code, see \ref _maligp_job_suspended_response_code */
+	u32 arguments[2];               /**< [in] 0 when aborting a job. When resuming a job, the Mali start and end address for a new heap to resume the job with */
+} _mali_uk_gp_suspend_response_s;
+
+/** @} */ /* end group _mali_uk_gp_suspend_response_s */
+
+/** @defgroup _mali_uk_gpstartjob_s Vertex Processor Start Job
+ * @{ */
+
+/** @brief Status indicating the result of the execution of a Vertex or Fragment processor job  */
+typedef enum {
+	_MALI_UK_JOB_STATUS_END_SUCCESS         = 1 << (16 + 0),
+	_MALI_UK_JOB_STATUS_END_OOM             = 1 << (16 + 1),
+	_MALI_UK_JOB_STATUS_END_ABORT           = 1 << (16 + 2),
+	_MALI_UK_JOB_STATUS_END_TIMEOUT_SW      = 1 << (16 + 3),
+	_MALI_UK_JOB_STATUS_END_HANG            = 1 << (16 + 4),
+	_MALI_UK_JOB_STATUS_END_SEG_FAULT       = 1 << (16 + 5),
+	_MALI_UK_JOB_STATUS_END_ILLEGAL_JOB     = 1 << (16 + 6),
+	_MALI_UK_JOB_STATUS_END_UNKNOWN_ERR     = 1 << (16 + 7),
+	_MALI_UK_JOB_STATUS_END_SHUTDOWN        = 1 << (16 + 8),
+	_MALI_UK_JOB_STATUS_END_SYSTEM_UNUSABLE = 1 << (16 + 9)
+} _mali_uk_job_status;
+
+#define MALIGP2_NUM_REGS_FRAME (6)
+
+/** @brief Arguments for _mali_ukk_gp_start_job()
+ *
+ * To start a Vertex Processor job
+ * - associate the request with a reference to a @c mali_gp_job_info by setting
+ * user_job_ptr to the address of the @c mali_gp_job_info of the job.
+ * - set @c priority to the priority of the @c mali_gp_job_info
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_gp_job_info into @c frame_registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ * When @c _mali_ukk_gp_start_job() returns @c _MALI_OSK_ERR_OK, status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, @c _mali_wait_for_notification() will be notified
+ * that the job finished or got suspended. It may get suspended due to
+ * resource shortage. If it finished (see _mali_ukk_wait_for_notification())
+ * the notification will contain a @c _mali_uk_gp_job_finished_s result. If
+ * it got suspended the notification will contain a @c _mali_uk_gp_job_suspended_s
+ * result.
+ *
+ * The @c _mali_uk_gp_job_finished_s contains the job status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ * In case the job got suspended, @c _mali_uk_gp_job_suspended_s contains
+ * the @c user_job_ptr identifier used to start the job with, the @c reason
+ * why the job stalled (see \ref _maligp_job_suspended_reason) and a @c cookie
+ * to identify the core on which the job stalled.  This @c cookie will be needed
+ * when responding to this nofication by means of _mali_ukk_gp_suspend_response().
+ * (see _mali_ukk_gp_suspend_response()). The response is either to abort or
+ * resume the job. If the job got suspended due to an out of memory condition
+ * you may be able to resolve this by providing more memory and resuming the job.
+ *
+ */
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;                   /**< [in] identifier for the job in user space, a @c mali_gp_job_info* */
+	u32 priority;                       /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[MALIGP2_NUM_REGS_FRAME]; /**< [in] core specific registers associated with this job */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;            /**< [in,out] pointer to u32: location where point on gp timeline for this job will be written */
+	u32 varying_memsize;            /** < [in] size of varying memory to use deffer bind*/
+	u32 deferred_mem_num;
+	u64 deferred_mem_list;         /** < [in] memory hanlde list of varying buffer to use deffer bind */
+} _mali_uk_gp_start_job_s;
+
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE (1<<0) /**< Enable performance counter SRC0 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE (1<<1) /**< Enable performance counter SRC1 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE (1<<2) /**< Enable per tile (aka heatmap) generation with for a job (using the enabled counter sources) */
+
+/** @} */ /* end group _mali_uk_gpstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;               /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;     /**< [out] status of finished job */
+	u32 heap_current_addr;          /**< [out] value of the GP PLB PL heap start address register */
+	u32 perf_counter0;              /**< [out] value of performance counter 0 (see ARM DDI0415A) */
+	u32 perf_counter1;              /**< [out] value of performance counter 1 (see ARM DDI0415A) */
+	u32 pending_big_job_num;
+} _mali_uk_gp_job_finished_s;
+
+typedef struct {
+	u64 user_job_ptr;                    /**< [out] identifier for the job in user space */
+	u32 cookie;                          /**< [out] identifier for the core in kernel space on which the job stalled */
+} _mali_uk_gp_job_suspended_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+
+/** @defgroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+#define _MALI_PP_MAX_SUB_JOBS 8
+
+#define _MALI_PP_MAX_FRAME_REGISTERS ((0x058/4)+1)
+
+#define _MALI_PP_MAX_WB_REGISTERS ((0x02C/4)+1)
+
+#define _MALI_DLBU_MAX_REGISTERS 4
+
+/** Flag for _mali_uk_pp_start_job_s */
+#define _MALI_PP_JOB_FLAG_NO_NOTIFICATION (1<<0)
+#define _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE (1<<1)
+#define _MALI_PP_JOB_FLAG_PROTECTED (1<<2)
+
+/** @defgroup _mali_uk_ppstartjob_s Fragment Processor Start Job
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_pp_start_job()
+ *
+ * To start a Fragment Processor job
+ * - associate the request with a reference to a mali_pp_job by setting
+ * @c user_job_ptr to the address of the @c mali_pp_job of the job.
+ * - set @c priority to the priority of the mali_pp_job
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_pp_job into @c frame_registers.
+ * For MALI200 you also need to copy the write back 0,1 and 2 registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context in @c ctx that was returned from _mali_ukk_open()
+ *
+ * When _mali_ukk_pp_start_job() returns @c _MALI_OSK_ERR_OK, @c status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, _mali_wait_for_notification() will be notified
+ * when the job finished. The notification will contain a
+ * @c _mali_uk_pp_job_finished_s result. It contains the @c user_job_ptr
+ * identifier used to start the job with, the job @c status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than @c watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;               /**< [in] identifier for the job in user space */
+	u32 priority;                   /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[_MALI_PP_MAX_FRAME_REGISTERS];         /**< [in] core specific registers associated with first sub job, see ARM DDI0415A */
+	u32 frame_registers_addr_frame[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_FRAME registers for sub job 1-7 */
+	u32 frame_registers_addr_stack[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_STACK registers for sub job 1-7 */
+	u32 wb0_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb1_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb2_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 dlbu_registers[_MALI_DLBU_MAX_REGISTERS]; /**< [in] Dynamic load balancing unit registers */
+	u32 num_cores;                      /**< [in] Number of cores to set up (valid range: 1-8(M450) or 4(M400)) */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	u32 flags;                          /**< [in] See _MALI_PP_JOB_FLAG_* for a list of avaiable flags */
+	u32 tilesx;                         /**< [in] number of tiles in the x direction (needed for heatmap generation */
+	u32 tilesy;                         /**< [in] number of tiles in y direction (needed for reading the heatmap memory) */
+	u32 heatmap_mem;                    /**< [in] memory address to store counter values per tile (aka heatmap) */
+	u32 num_memory_cookies;             /**< [in] number of memory cookies attached to job */
+	u64 memory_cookies;               /**< [in] pointer to array of u32 memory cookies attached to job */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;           /**< [in,out] pointer to location of u32 where point on pp timeline for this job will be written */
+} _mali_uk_pp_start_job_s;
+
+typedef struct {
+	u64 ctx;       /**< [in,out] user-kernel context (trashed on output) */
+	u64 gp_args;   /**< [in,out] GP uk arguments (see _mali_uk_gp_start_job_s) */
+	u64 pp_args;   /**< [in,out] PP uk arguments (see _mali_uk_pp_start_job_s) */
+} _mali_uk_pp_and_gp_start_job_s;
+
+/** @} */ /* end group _mali_uk_ppstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;                          /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;                /**< [out] status of finished job */
+	u32 perf_counter0[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 0 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter1[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 1 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter_src0;
+	u32 perf_counter_src1;
+} _mali_uk_pp_job_finished_s;
+
+typedef struct {
+	u32 number_of_enabled_cores;               /**< [out] the new number of enabled cores */
+} _mali_uk_pp_num_cores_changed_s;
+
+
+
+/**
+ * Flags to indicate write-back units
+ */
+typedef enum {
+	_MALI_UK_PP_JOB_WB0 = 1,
+	_MALI_UK_PP_JOB_WB1 = 2,
+	_MALI_UK_PP_JOB_WB2 = 4,
+} _mali_uk_pp_job_wbx_flag;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 fb_id;                      /**< [in] Frame builder ID of job to disable WB units for */
+	u32 wb0_memory;
+	u32 wb1_memory;
+	u32 wb2_memory;
+} _mali_uk_pp_disable_wb_s;
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+/** @defgroup _mali_uk_soft_job U/K Soft Job
+ * @{ */
+
+typedef struct {
+	u64 ctx;                            /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job;                       /**< [in] identifier for the job in user space */
+	u64 job_id_ptr;                     /**< [in,out] pointer to location of u32 where job id will be written */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u32 point;                          /**< [out] point on soft timeline for this job */
+	u32 type;                           /**< [in] type of soft job */
+} _mali_uk_soft_job_start_s;
+
+typedef struct {
+	u64 user_job;                       /**< [out] identifier for the job in user space */
+} _mali_uk_soft_job_activated_s;
+
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 job_id;                         /**< [in] id for soft job */
+} _mali_uk_soft_job_signal_s;
+
+/** @} */ /* end group _mali_uk_soft_job */
+
+typedef struct {
+	u32 counter_id;
+	u32 key;
+	int enable;
+} _mali_uk_annotate_profiling_mem_counter_s;
+
+typedef struct {
+	u32 sampling_rate;
+	int enable;
+} _mali_uk_annotate_profiling_enable_s;
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ * @{ */
+
+/** @defgroup _mali_uk_waitfornotification_s Wait For Notification
+ * @{ */
+
+/** @brief Notification type encodings
+ *
+ * Each Notification type is an ordered pair of (subsystem,id), and is unique.
+ *
+ * The encoding of subsystem,id into a 32-bit word is:
+ * encoding = (( subsystem << _MALI_NOTIFICATION_SUBSYSTEM_SHIFT ) & _MALI_NOTIFICATION_SUBSYSTEM_MASK)
+ *            | (( id <<  _MALI_NOTIFICATION_ID_SHIFT ) & _MALI_NOTIFICATION_ID_MASK)
+ *
+ * @see _mali_uk_wait_for_notification_s
+ */
+typedef enum {
+	/** core notifications */
+
+	_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x20,
+	_MALI_NOTIFICATION_APPLICATION_QUIT = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x40,
+	_MALI_NOTIFICATION_SETTINGS_CHANGED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x80,
+	_MALI_NOTIFICATION_SOFT_ACTIVATED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x100,
+
+	/** Fragment Processor notifications */
+
+	_MALI_NOTIFICATION_PP_FINISHED = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x20,
+
+	/** Vertex Processor notifications */
+
+	_MALI_NOTIFICATION_GP_FINISHED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_GP_STALLED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x20,
+
+	/** Profiling notifications */
+	_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x20,
+} _mali_uk_notification_type;
+
+/** to assist in splitting up 32-bit notification value in subsystem and id value */
+#define _MALI_NOTIFICATION_SUBSYSTEM_MASK 0xFFFF0000
+#define _MALI_NOTIFICATION_SUBSYSTEM_SHIFT 16
+#define _MALI_NOTIFICATION_ID_MASK 0x0000FFFF
+#define _MALI_NOTIFICATION_ID_SHIFT 0
+
+
+/** @brief Enumeration of possible settings which match mali_setting_t in user space
+ *
+ *
+ */
+typedef enum {
+	_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE = 0,
+	_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_DEPTHBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_STENCILBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_PER_TILE_COUNTERS_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_COMPOSITOR,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_WINDOW,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_OTHER,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR,
+	_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED,
+	_MALI_UK_USER_SETTING_MAX,
+} _mali_uk_user_setting_t;
+
+/* See mali_user_settings_db.c */
+extern const char *_mali_uk_user_setting_descriptions[];
+#define _MALI_UK_USER_SETTING_DESCRIPTIONS \
+	{                                           \
+		"sw_events_enable",                 \
+		"colorbuffer_capture_enable",       \
+		"depthbuffer_capture_enable",       \
+		"stencilbuffer_capture_enable",     \
+		"per_tile_counters_enable",         \
+		"buffer_capture_compositor",        \
+		"buffer_capture_window",            \
+		"buffer_capture_other",             \
+		"buffer_capture_n_frames",          \
+		"buffer_capture_resize_factor",     \
+		"sw_counters_enable",               \
+	};
+
+/** @brief struct to hold the value to a particular setting as seen in the kernel space
+ */
+typedef struct {
+	_mali_uk_user_setting_t setting;
+	u32 value;
+} _mali_uk_settings_changed_s;
+
+/** @brief Arguments for _mali_ukk_wait_for_notification()
+ *
+ * On successful return from _mali_ukk_wait_for_notification(), the members of
+ * this structure will indicate the reason for notification.
+ *
+ * Specifically, the source of the notification can be identified by the
+ * subsystem and id fields of the mali_uk_notification_type in the code.type
+ * member. The type member is encoded in a way to divide up the types into a
+ * subsystem field, and a per-subsystem ID field. See
+ * _mali_uk_notification_type for more information.
+ *
+ * Interpreting the data union member depends on the notification type:
+ *
+ * - type == _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS
+ *     - The kernel side is shutting down. No further
+ * _mali_uk_wait_for_notification() calls should be made.
+ *     - In this case, the value of the data union member is undefined.
+ *     - This is used to indicate to the user space client that it should close
+ * the connection to the Mali Device Driver.
+ * - type == _MALI_NOTIFICATION_PP_FINISHED
+ *    - The notification data is of type _mali_uk_pp_job_finished_s. It contains the user_job_ptr
+ * identifier used to start the job with, the job status, the number of milliseconds the job took to render,
+ * and values of core registers when the job finished (irq status, performance counters, renderer list
+ * address).
+ *    - A job has finished succesfully when its status member is _MALI_UK_JOB_STATUS_FINISHED.
+ *    - If the hardware detected a timeout while rendering the job, or software detected the job is
+ * taking more than watchdog_msecs (see _mali_ukk_pp_start_job()) to complete, the status member will
+ * indicate _MALI_UK_JOB_STATUS_HANG.
+ *    - If the hardware detected a bus error while accessing memory associated with the job, status will
+ * indicate _MALI_UK_JOB_STATUS_SEG_FAULT.
+ *    - Status will indicate MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to stop the job but the job
+ * didn't start the hardware yet, e.g. when the driver closes.
+ * - type == _MALI_NOTIFICATION_GP_FINISHED
+ *     - The notification data is of type _mali_uk_gp_job_finished_s. The notification is similar to that of
+ * type == _MALI_NOTIFICATION_PP_FINISHED, except that several other GP core register values are returned.
+ * The status values have the same meaning for type == _MALI_NOTIFICATION_PP_FINISHED.
+ * - type == _MALI_NOTIFICATION_GP_STALLED
+ *     - The nofication data is of type _mali_uk_gp_job_suspended_s. It contains the user_job_ptr
+ * identifier used to start the job with, the reason why the job stalled and a cookie to identify the core on
+ * which the job stalled.
+ *     - The reason member of gp_job_suspended is set to _MALIGP_JOB_SUSPENDED_OUT_OF_MEMORY
+ * when the polygon list builder unit has run out of memory.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [out] Type of notification available */
+	union {
+		_mali_uk_gp_job_suspended_s gp_job_suspended;/**< [out] Notification data for _MALI_NOTIFICATION_GP_STALLED notification type */
+		_mali_uk_gp_job_finished_s  gp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_GP_FINISHED notification type */
+		_mali_uk_pp_job_finished_s  pp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_PP_FINISHED notification type */
+		_mali_uk_settings_changed_s setting_changed;/**< [out] Notification data for _MALI_NOTIFICAATION_SETTINGS_CHANGED notification type */
+		_mali_uk_soft_job_activated_s soft_job_activated; /**< [out] Notification data for _MALI_NOTIFICATION_SOFT_ACTIVATED notification type */
+		_mali_uk_annotate_profiling_mem_counter_s profiling_mem_counter;
+		_mali_uk_annotate_profiling_enable_s profiling_enable;
+	} data;
+} _mali_uk_wait_for_notification_s;
+
+/** @brief Arguments for _mali_ukk_post_notification()
+ *
+ * Posts the specified notification to the notification queue for this application.
+ * This is used to send a quit message to the callback thread.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [in] Type of notification to post */
+} _mali_uk_post_notification_s;
+
+/** @} */ /* end group _mali_uk_waitfornotification_s */
+
+/** @defgroup _mali_uk_getapiversion_s Get API Version
+ * @{ */
+
+/** helpers for Device Driver API version handling */
+
+/** @brief Encode a version ID from a 16-bit input
+ *
+ * @note the input is assumed to be 16 bits. It must not exceed 16 bits. */
+#define _MAKE_VERSION_ID(x) (((x) << 16UL) | (x))
+
+/** @brief Check whether a 32-bit value is likely to be Device Driver API
+ * version ID. */
+#define _IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF))
+
+/** @brief Decode a 16-bit version number from a 32-bit Device Driver API version
+ * ID */
+#define _GET_VERSION(x) (((x) >> 16UL) & 0xFFFF)
+
+/** @brief Determine whether two 32-bit encoded version IDs match */
+#define _IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y))))
+
+/**
+ * API version define.
+ * Indicates the version of the kernel API
+ * The version is a 16bit integer incremented on each API change.
+ * The 16bit integer is stored twice in a 32bit integer
+ * For example, for version 1 the value would be 0x00010001
+ */
+#define _MALI_API_VERSION 900
+#define _MALI_UK_API_VERSION _MAKE_VERSION_ID(_MALI_API_VERSION)
+
+/**
+ * The API version is a 16-bit integer stored in both the lower and upper 16-bits
+ * of a 32-bit value. The 16-bit API version value is incremented on each API
+ * change. Version 1 would be 0x00010001. Used in _mali_uk_get_api_version_s.
+ */
+typedef u32 _mali_uk_api_version;
+
+/** @brief Arguments for _mali_uk_get_api_version()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u32 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_s;
+
+/** @brief Arguments for _mali_uk_get_api_version_v2()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u64 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_v2_s;
+
+/** @} */ /* end group _mali_uk_getapiversion_s */
+
+/** @defgroup _mali_uk_get_user_settings_s Get user space settings */
+
+/** @brief struct to keep the matching values of the user space settings within certain context
+ *
+ * Each member of the settings array corresponds to a matching setting in the user space and its value is the value
+ * of that particular setting.
+ *
+ * All settings are given reference to the context pointed to by the ctx pointer.
+ *
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	u32 settings[_MALI_UK_USER_SETTING_MAX]; /**< [out] The values for all settings */
+} _mali_uk_get_user_settings_s;
+
+/** @brief struct to hold the value of a particular setting from the user space within a given context
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_user_setting_t setting; /**< [in] setting to get */
+	u32 value;                       /**< [out] value of setting */
+} _mali_uk_get_user_setting_s;
+
+/** @brief Arguments for _mali_ukk_request_high_priority() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_request_high_priority_s;
+
+/** @brief Arguments for _mali_ukk_pending_submit() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_pending_submit_s;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_memory U/K Memory
+ * @{ */
+
+#define _MALI_MEMORY_ALLOCATE_RESIZEABLE  (1<<4) /* BUFFER can trim dow/grow*/
+#define _MALI_MEMORY_ALLOCATE_NO_BIND_GPU (1<<5) /*Not map to GPU when allocate, must call bind later*/
+#define _MALI_MEMORY_ALLOCATE_SWAPPABLE   (1<<6) /* Allocate swappale memory. */
+#define _MALI_MEMORY_ALLOCATE_DEFER_BIND (1<<7) /*Not map to GPU when allocate, must call bind later*/
+#define _MALI_MEMORY_ALLOCATE_SECURE (1<<8) /* Allocate secure memory. */
+
+
+typedef struct {
+	u64 ctx;                                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                                    /**< [in] GPU virtual address */
+	u32 vsize;                                        /**< [in] vitrual size of the allocation */
+	u32 psize;                                        /**< [in] physical size of the allocation */
+	u32 flags;
+	u64 backend_handle;                               /**< [out] backend handle */
+	s32 secure_shared_fd;                           /** < [in] the mem handle for secure mem */
+} _mali_uk_alloc_mem_s;
+
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                /**< [in] use as handle to free allocation */
+	u32 free_pages_nr;      /** < [out] record the number of free pages */
+} _mali_uk_free_mem_s;
+
+
+#define _MALI_MEMORY_BIND_BACKEND_UMP             (1<<8)
+#define _MALI_MEMORY_BIND_BACKEND_DMA_BUF         (1<<9)
+#define _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY     (1<<10)
+#define _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY (1<<11)
+#define _MALI_MEMORY_BIND_BACKEND_EXT_COW         (1<<12)
+#define _MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION (1<<13)
+
+
+#define _MALI_MEMORY_BIND_BACKEND_MASK (_MALI_MEMORY_BIND_BACKEND_UMP| \
+					_MALI_MEMORY_BIND_BACKEND_DMA_BUF |\
+					_MALI_MEMORY_BIND_BACKEND_MALI_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXT_COW |\
+					_MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION)
+
+
+#define _MALI_MEMORY_GPU_READ_ALLOCATE            (1<<16)
+
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 vaddr;                                      /**< [in] mali address to map the physical memory to */
+	u32 size;                                       /**< [in] size */
+	u32 flags;                                      /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 padding;                                    /** padding for 32/64 struct alignment */
+	union {
+		struct {
+			u32 secure_id;                  /**< [in] secure id */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ump;
+		struct {
+			u32 mem_fd;                     /**< [in] Memory descriptor */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_dma_buf;
+		struct {
+			u32 phys_addr;                  /**< [in] physical address */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ext_memory;
+	} mem_union;
+} _mali_uk_bind_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 flags;                                      /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 vaddr;                                      /**<  [in] identifier for mapped memory object in kernel space  */
+} _mali_uk_unbind_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 target_handle;                              /**< [in] handle of allocation need to do COW */
+	u32 target_offset;                              /**< [in] offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, PAGE_SIZE align)*/
+	u32 target_size;                                /**< [in] size of target allocation to do COW (for support memory bank, PAGE_SIZE align)(in byte) */
+	u32 range_start;                                /**< [in] re allocate range start offset, offset from the start of allocation (PAGE_SIZE align)*/
+	u32 range_size;                                 /**< [in] re allocate size (PAGE_SIZE align)*/
+	u32 vaddr;                                      /**< [in] mali address for the new allocaiton */
+	u32 backend_handle;                             /**< [out] backend handle */
+	u32 flags;
+} _mali_uk_cow_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 range_start;                                /**< [in] re allocate range start offset, offset from the start of allocation */
+	u32 size;                                       /**< [in] re allocate size*/
+	u32 vaddr;                                      /**< [in] mali address for the new allocaiton */
+	s32 change_pages_nr;                            /**< [out] record the page number change for cow operation */
+} _mali_uk_cow_modify_range_s;
+
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 mem_fd;                     /**< [in] Memory descriptor */
+	u32 size;                       /**< [out] size */
+} _mali_uk_dma_buf_get_size_s;
+
+/** Flag for _mali_uk_map_external_mem_s, _mali_uk_attach_ump_mem_s and _mali_uk_attach_dma_buf_s */
+#define _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE (1<<0)
+
+
+typedef struct {
+	u64 ctx;                                /**< [in,out] user-kernel context (trashed on output) */
+	u64 vaddr;                              /* the buffer to do resize*/
+	u32 psize;                              /* wanted physical size of this memory */
+} _mali_uk_mem_resize_s;
+
+/**
+ * @brief Arguments for _mali_uk[uk]_mem_write_safe()
+ */
+typedef struct {
+	u64 ctx;  /**< [in,out] user-kernel context (trashed on output) */
+	u64 src;  /**< [in] Pointer to source data */
+	u64 dest; /**< [in] Destination Mali buffer */
+	u32 size;   /**< [in,out] Number of bytes to write/copy on input, number of bytes actually written/copied on output */
+} _mali_uk_mem_write_safe_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [out] size of MMU page table information (registers + page tables) */
+} _mali_uk_query_mmu_page_table_dump_size_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [in] size of buffer to receive mmu page table information */
+	u64 buffer;                   /**< [in,out] buffer to receive mmu page table information */
+	u32 register_writes_size;       /**< [out] size of MMU register dump */
+	u64 register_writes;           /**< [out] pointer within buffer where MMU register dump is stored */
+	u32 page_table_dump_size;       /**< [out] size of MMU page table dump */
+	u64 page_table_dump;           /**< [out] pointer within buffer where MMU page table dump is stored */
+} _mali_uk_dump_mmu_page_table_s;
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_pp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_number_of_cores(), @c number_of_cores
+ * will contain the number of Fragment Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_total_cores;      /**< [out] Total number of Fragment Processor cores in the system */
+	u32 number_of_enabled_cores;    /**< [out] Number of enabled Fragment Processor cores */
+} _mali_uk_get_pp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_pp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_core_version(), @c version contains
+ * the version that all Fragment Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version  */
+	u32 padding;
+} _mali_uk_get_pp_core_version_s;
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_gp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_number_of_cores(), @c number_of_cores
+ * will contain the number of Vertex Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_cores;            /**< [out] number of Vertex Processor cores in the system */
+} _mali_uk_get_gp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_gp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_core_version(), @c version contains
+ * the version that all Vertex Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version */
+} _mali_uk_get_gp_core_version_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 event_id;                   /**< [in] event id to register (see  enum mali_profiling_events for values) */
+	u32 data[5];                    /**< [in] event specific data */
+} _mali_uk_profiling_add_event_s;
+
+typedef struct {
+	u64 ctx;                     /**< [in,out] user-kernel context (trashed on output) */
+	u32 memory_usage;              /**< [out] total memory usage */
+	u32 vaddr;                                      /**< [in] mali address for the cow allocaiton */
+	s32 change_pages_nr;            /**< [out] record the page number change for cow operation */
+} _mali_uk_profiling_memory_usage_get_s;
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ * @{ */
+
+/** @brief Arguments to _mali_ukk_mem_mmap()
+ *
+ * Use of the phys_addr member depends on whether the driver is compiled for
+ * Mali-MMU or nonMMU:
+ * - in the nonMMU case, this is the physical address of the memory as seen by
+ * the CPU (which may be a constant offset from that used by Mali)
+ * - in the MMU case, this is the Mali Virtual base address of the memory to
+ * allocate, and the particular physical pages used to back the memory are
+ * entirely determined by _mali_ukk_mem_mmap(). The details of the physical pages
+ * are not reported to user-space for security reasons.
+ *
+ * The cookie member must be stored for use later when freeing the memory by
+ * calling _mali_ukk_mem_munmap(). In the Mali-MMU case, the cookie is secure.
+ *
+ * The ukk_private word must be set to zero when calling from user-space. On
+ * Kernel-side, the  OS implementation of the U/K interface can use it to
+ * communicate data to the OS implementation of the OSK layer. In particular,
+ * _mali_ukk_get_big_block() directly calls _mali_ukk_mem_mmap directly, and
+ * will communicate its own ukk_private word through the ukk_private member
+ * here. The common code itself will not inspect or modify the ukk_private
+ * word, and so it may be safely used for whatever purposes necessary to
+ * integrate Mali Memory handling into the OS.
+ *
+ * The uku_private member is currently reserved for use by the user-side
+ * implementation of the U/K interface. Its value must be zero.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [out] Returns user-space virtual address for the mapping */
+	u32 size;                       /**< [in] Size of the requested mapping */
+	u32 phys_addr;                  /**< [in] Physical address - could be offset, depending on caller+callee convention */
+	mali_bool writeable;
+} _mali_uk_mem_mmap_s;
+
+/** @brief Arguments to _mali_ukk_mem_munmap()
+ *
+ * The cookie and mapping members must be that returned from the same previous
+ * call to _mali_ukk_mem_mmap(). The size member must correspond to cookie
+ * and mapping - that is, it must be the value originally supplied to a call to
+ * _mali_ukk_mem_mmap that returned the values of mapping and cookie.
+ *
+ * An error will be returned if an attempt is made to unmap only part of the
+ * originally obtained range, or to unmap more than was originally obtained.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [in] The mapping returned from mmap call */
+	u32 size;                       /**< [in] The size passed to mmap call */
+} _mali_uk_mem_munmap_s;
+/** @} */ /* end group _mali_uk_memory */
+
+/** @defgroup _mali_uk_vsync U/K VSYNC Wait Reporting Module
+ * @{ */
+
+/** @brief VSYNC events
+ *
+ * These events are reported when DDK starts to wait for vsync and when the
+ * vsync has occured and the DDK can continue on the next frame.
+ */
+typedef enum _mali_uk_vsync_event {
+	_MALI_UK_VSYNC_EVENT_BEGIN_WAIT = 0,
+	_MALI_UK_VSYNC_EVENT_END_WAIT
+} _mali_uk_vsync_event;
+
+/** @brief Arguments to _mali_ukk_vsync_event()
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_vsync_event event;     /**< [in] VSYNCH event type */
+} _mali_uk_vsync_event_report_s;
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @defgroup _mali_uk_sw_counters_report U/K Software Counter Reporting
+ * @{ */
+
+/** @brief Software counter values
+ *
+ * Values recorded for each of the software counters during a single renderpass.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 counters;                  /**< [in] The array of u32 counter values */
+	u32 num_counters;              /**< [in] The number of elements in counters array */
+} _mali_uk_sw_counters_report_s;
+
+/** @} */ /* end group _mali_uk_sw_counters_report */
+
+/** @defgroup _mali_uk_timeline U/K Mali Timeline
+ * @{ */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 timeline;                   /**< [in] timeline id */
+	u32 point;                      /**< [out] latest point on timeline */
+} _mali_uk_timeline_get_latest_point_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] fence */
+	u32 timeout;                    /**< [in] timeout (0 for no wait, -1 for blocking) */
+	u32 status;                     /**< [out] status of fence (1 if signaled, 0 if timeout) */
+} _mali_uk_timeline_wait_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] mali fence to create linux sync fence from */
+	s32 sync_fd;                    /**< [out] file descriptor for new linux sync fence */
+} _mali_uk_timeline_create_sync_fence_s;
+
+/** @} */ /* end group _mali_uk_timeline */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+typedef struct {
+	u64 ctx;                 /**< [in,out] user-kernel context (trashed on output) */
+	s32 stream_fd;   /**< [in] The profiling kernel base stream fd handle */
+} _mali_uk_profiling_stream_fd_get_s;
+
+typedef struct {
+	u64 ctx;        /**< [in,out] user-kernel context (trashed on output) */
+	u64 control_packet_data; /**< [in] the control packet data for control settings */
+	u32 control_packet_size;  /**< [in] The control packet size */
+	u64 response_packet_data; /** < [out] The response packet data */
+	u32 response_packet_size; /** < [in,out] The response packet data */
+} _mali_uk_profiling_control_set_s;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_UK_TYPES_H__ */
diff --git a/mali/linux/license/gpl/mali_kernel_license.h b/mali/linux/license/gpl/mali_kernel_license.h
new file mode 100755
index 0000000..264555d
--- /dev/null
+++ b/mali/linux/license/gpl/mali_kernel_license.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_license.h
+ * Defines for the macro MODULE_LICENSE.
+ */
+
+#ifndef __MALI_KERNEL_LICENSE_H__
+#define __MALI_KERNEL_LICENSE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_KERNEL_LINUX_LICENSE     "GPL"
+#define MALI_LICENSE_IS_GPL 1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LICENSE_H__ */
diff --git a/mali/linux/mali_devfreq.c b/mali/linux/mali_devfreq.c
new file mode 100644
index 0000000..0b0ba14
--- /dev/null
+++ b/mali/linux/mali_devfreq.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/driver.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+#include "mali_pm_metrics.h"
+
+static int
+mali_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		MALI_PRINT_ERROR(("Failed to get opp (%ld)\n", PTR_ERR(opp)));
+		return PTR_ERR(opp);
+	}
+
+	MALI_DEBUG_PRINT(2, ("mali_devfreq_target:set_freq = %lld flags = 0x%x\n", freq, flags));
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (mdev->current_freq == freq) {
+		*target_freq = freq;
+		mali_pm_reset_dvfs_utilisation(mdev);
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (mdev->regulator && mdev->current_voltage != voltage
+	    && mdev->current_freq < freq) {
+		err = regulator_set_voltage(mdev->regulator, voltage, voltage);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to increase voltage (%d)\n", err));
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(mdev->clock, freq);
+	if (err) {
+		MALI_PRINT_ERROR(("Failed to set clock %lu (target %lu)\n", freq, *target_freq));
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (mdev->regulator && mdev->current_voltage != voltage
+	    && mdev->current_freq > freq) {
+		err = regulator_set_voltage(mdev->regulator, voltage, voltage);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to decrease voltage (%d)\n", err));
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	mdev->current_voltage = voltage;
+	mdev->current_freq = freq;
+
+	mali_pm_reset_dvfs_utilisation(mdev);
+
+	return err;
+}
+
+static int
+mali_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	*freq = mdev->current_freq;
+
+	MALI_DEBUG_PRINT(2, ("mali_devfreq_cur_freq: freq = %d \n", *freq));
+	return 0;
+}
+
+static int
+mali_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = mdev->current_freq;
+
+	mali_pm_get_dvfs_utilisation(mdev,
+				     &stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	memcpy(&mdev->devfreq->last_status, stat, sizeof(*stat));
+#endif
+
+	return 0;
+}
+
+/* setup platform specific opp in platform.c*/
+int __weak setup_opps(void)
+{
+	return 0;
+}
+
+/* term platform specific opp in platform.c*/
+int __weak term_opps(struct device *dev)
+{
+	return 0;
+}
+
+static int mali_devfreq_init_freq_table(struct mali_device *mdev,
+					struct devfreq_dev_profile *dp)
+{
+	int err, count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	err = setup_opps();
+	if (err)
+		return err;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(mdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	MALI_DEBUG_PRINT(2, ("mali devfreq table count %d\n", count));
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				       GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(mdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+		MALI_DEBUG_PRINT(2, ("mali devfreq table array[%d] = %d\n", i, freq));
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		MALI_PRINT_ERROR(("Unable to enumerate all OPPs (%d!=%d)\n",
+				  count, i));
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void mali_devfreq_term_freq_table(struct mali_device *mdev)
+{
+	struct devfreq_dev_profile *dp = mdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+	term_opps(mdev->dev);
+}
+
+static void mali_devfreq_exit(struct device *dev)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	mali_devfreq_term_freq_table(mdev);
+}
+
+int mali_devfreq_init(struct mali_device *mdev)
+{
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct devfreq_cooling_power *callbacks = NULL;
+	_mali_osk_device_data data;
+#endif
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	MALI_DEBUG_PRINT(2, ("Init Mali devfreq\n"));
+
+	if (!mdev->clock)
+		return -ENODEV;
+
+	mdev->current_freq = clk_get_rate(mdev->clock);
+
+	dp = &mdev->devfreq_profile;
+
+	dp->initial_freq = mdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = mali_devfreq_target;
+	dp->get_dev_status = mali_devfreq_status;
+	dp->get_cur_freq = mali_devfreq_cur_freq;
+	dp->exit = mali_devfreq_exit;
+
+	if (mali_devfreq_init_freq_table(mdev, dp))
+		return -EFAULT;
+
+	mdev->devfreq = devfreq_add_device(mdev->dev, dp,
+					   "simple_ondemand", NULL);
+	if (IS_ERR(mdev->devfreq)) {
+		mali_devfreq_term_freq_table(mdev);
+		return PTR_ERR(mdev->devfreq);
+	}
+
+	err = devfreq_register_opp_notifier(mdev->dev, mdev->devfreq);
+	if (err) {
+		MALI_PRINT_ERROR(("Failed to register OPP notifier (%d)\n", err));
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	/* Initilization last_status it will be used when first power allocate called */
+	mdev->devfreq->last_status.current_frequency = mdev->current_freq;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if (NULL != data.gpu_cooling_ops) {
+			callbacks = data.gpu_cooling_ops;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Thermal: Callback handler installed \n"));
+		}
+	}
+
+	if (callbacks) {
+		mdev->devfreq_cooling = of_devfreq_cooling_register_power(
+						mdev->dev->of_node,
+						mdev->devfreq,
+						callbacks);
+		if (IS_ERR_OR_NULL(mdev->devfreq_cooling)) {
+			err = PTR_ERR(mdev->devfreq_cooling);
+			MALI_PRINT_ERROR(("Failed to register cooling device (%d)\n", err));
+			goto cooling_failed;
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali GPU Thermal Cooling installed \n"));
+		}
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	err = devfreq_remove_device(mdev->devfreq);
+	if (err)
+		MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err));
+	else
+		mdev->devfreq = NULL;
+
+	return err;
+}
+
+void mali_devfreq_term(struct mali_device *mdev)
+{
+	int err;
+
+	MALI_DEBUG_PRINT(2, ("Term Mali devfreq\n"));
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	devfreq_cooling_unregister(mdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq);
+
+	err = devfreq_remove_device(mdev->devfreq);
+	if (err)
+		MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err));
+	else
+		mdev->devfreq = NULL;
+}
diff --git a/mali/linux/mali_devfreq.h b/mali/linux/mali_devfreq.h
new file mode 100644
index 0000000..faf84f2
--- /dev/null
+++ b/mali/linux/mali_devfreq.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef _MALI_DEVFREQ_H_
+#define _MALI_DEVFREQ_H_
+
+int mali_devfreq_init(struct mali_device *mdev);
+
+void mali_devfreq_term(struct mali_device *mdev);
+
+#endif
diff --git a/mali/linux/mali_device_pause_resume.c b/mali/linux/mali_device_pause_resume.c
new file mode 100755
index 0000000..cb2f702
--- /dev/null
+++ b/mali/linux/mali_device_pause_resume.c
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_device_pause_resume.c
+ * Implementation of the Mali pause/resume functionality
+ */
+
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+
+void mali_dev_pause(void)
+{
+	/*
+	 * Deactive all groups to prevent hardware being touched
+	 * during the period of mali device pausing
+	 */
+	mali_pm_os_suspend(MALI_FALSE);
+}
+
+EXPORT_SYMBOL(mali_dev_pause);
+
+void mali_dev_resume(void)
+{
+	mali_pm_os_resume();
+}
+
+EXPORT_SYMBOL(mali_dev_resume);
diff --git a/mali/linux/mali_dma_fence.c b/mali/linux/mali_dma_fence.c
new file mode 100755
index 0000000..7db30b8
--- /dev/null
+++ b/mali/linux/mali_dma_fence.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/version.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include "mali_dma_fence.h"
+#include <linux/atomic.h>
+#include <linux/workqueue.h>
+#endif
+
+static DEFINE_SPINLOCK(mali_dma_fence_lock);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static bool mali_dma_fence_enable_signaling(struct dma_fence *fence)
+{
+	MALI_IGNORE(fence);
+	return true;
+}
+
+static const char *mali_dma_fence_get_driver_name(struct dma_fence *fence)
+{
+	MALI_IGNORE(fence);
+	return "mali";
+}
+
+static const char *mali_dma_fence_get_timeline_name(struct dma_fence *fence)
+{
+	MALI_IGNORE(fence);
+	return "mali_dma_fence";
+}
+
+static const struct dma_fence_ops mali_dma_fence_ops = {
+	.get_driver_name = mali_dma_fence_get_driver_name,
+	.get_timeline_name = mali_dma_fence_get_timeline_name,
+	.enable_signaling = mali_dma_fence_enable_signaling,
+	.signaled = NULL,
+	.wait = dma_fence_default_wait,
+	.release = NULL
+};
+#else
+static bool mali_dma_fence_enable_signaling(struct fence *fence)
+{
+	MALI_IGNORE(fence);
+	return true;
+}
+
+static const char *mali_dma_fence_get_driver_name(struct fence *fence)
+{
+	MALI_IGNORE(fence);
+	return "mali";
+}
+
+static const char *mali_dma_fence_get_timeline_name(struct fence *fence)
+{
+	MALI_IGNORE(fence);
+	return "mali_dma_fence";
+}
+
+static const struct fence_ops mali_dma_fence_ops = {
+	.get_driver_name = mali_dma_fence_get_driver_name,
+	.get_timeline_name = mali_dma_fence_get_timeline_name,
+	.enable_signaling = mali_dma_fence_enable_signaling,
+	.signaled = NULL,
+	.wait = fence_default_wait,
+	.release = NULL
+};
+#endif
+
+static void mali_dma_fence_context_cleanup(struct mali_dma_fence_context *dma_fence_context)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	for (i = 0; i < dma_fence_context->num_dma_fence_waiter; i++) {
+		if (dma_fence_context->mali_dma_fence_waiters[i]) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+			dma_fence_remove_callback(dma_fence_context->mali_dma_fence_waiters[i]->fence,
+						  &dma_fence_context->mali_dma_fence_waiters[i]->base);
+			dma_fence_put(dma_fence_context->mali_dma_fence_waiters[i]->fence);
+
+#else
+			fence_remove_callback(dma_fence_context->mali_dma_fence_waiters[i]->fence,
+					      &dma_fence_context->mali_dma_fence_waiters[i]->base);
+			fence_put(dma_fence_context->mali_dma_fence_waiters[i]->fence);
+#endif
+			kfree(dma_fence_context->mali_dma_fence_waiters[i]);
+			dma_fence_context->mali_dma_fence_waiters[i] = NULL;
+		}
+	}
+
+	if (NULL != dma_fence_context->mali_dma_fence_waiters)
+		kfree(dma_fence_context->mali_dma_fence_waiters);
+
+	dma_fence_context->mali_dma_fence_waiters = NULL;
+	dma_fence_context->num_dma_fence_waiter = 0;
+}
+
+static void mali_dma_fence_context_work_func(struct work_struct *work_handle)
+{
+	struct mali_dma_fence_context *dma_fence_context;
+
+	MALI_DEBUG_ASSERT_POINTER(work_handle);
+
+	dma_fence_context = container_of(work_handle, struct mali_dma_fence_context, work_handle);
+
+	dma_fence_context->cb_func(dma_fence_context->pp_job_ptr);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static void mali_dma_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+#else
+static void mali_dma_fence_callback(struct fence *fence, struct fence_cb *cb)
+#endif
+{
+	struct mali_dma_fence_waiter *dma_fence_waiter = NULL;
+	struct mali_dma_fence_context *dma_fence_context = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(cb);
+
+	MALI_IGNORE(fence);
+
+	dma_fence_waiter = container_of(cb, struct mali_dma_fence_waiter, base);
+	dma_fence_context = dma_fence_waiter->parent;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	if (atomic_dec_and_test(&dma_fence_context->count))
+		schedule_work(&dma_fence_context->work_handle);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static _mali_osk_errcode_t mali_dma_fence_add_callback(struct mali_dma_fence_context *dma_fence_context, struct dma_fence *fence)
+#else
+static _mali_osk_errcode_t mali_dma_fence_add_callback(struct mali_dma_fence_context *dma_fence_context, struct fence *fence)
+#endif
+{
+	int ret = 0;
+	struct mali_dma_fence_waiter *dma_fence_waiter;
+	struct mali_dma_fence_waiter **dma_fence_waiters;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	dma_fence_waiters = krealloc(dma_fence_context->mali_dma_fence_waiters,
+				     (dma_fence_context->num_dma_fence_waiter + 1)
+				     * sizeof(struct mali_dma_fence_waiter *),
+				     GFP_KERNEL);
+
+	if (NULL == dma_fence_waiters) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to realloc the dma fence waiters.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	dma_fence_context->mali_dma_fence_waiters = dma_fence_waiters;
+
+	dma_fence_waiter = kzalloc(sizeof(struct mali_dma_fence_waiter), GFP_KERNEL);
+
+	if (NULL == dma_fence_waiter) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to create mali dma fence waiter.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	dma_fence_get(fence);
+#else
+	fence_get(fence);
+#endif
+	dma_fence_waiter->fence = fence;
+	dma_fence_waiter->parent = dma_fence_context;
+	atomic_inc(&dma_fence_context->count);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	ret = dma_fence_add_callback(fence, &dma_fence_waiter->base,
+				 mali_dma_fence_callback);
+#else
+	ret = fence_add_callback(fence, &dma_fence_waiter->base,
+				 mali_dma_fence_callback);
+#endif
+	if (0 > ret) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+		dma_fence_put(fence);
+#else
+		fence_put(fence);
+#endif
+		kfree(dma_fence_waiter);
+		atomic_dec(&dma_fence_context->count);
+		if (-ENOENT == ret) {
+			/*-ENOENT if fence has already been signaled, return _MALI_OSK_ERR_OK*/
+			return _MALI_OSK_ERR_OK;
+		}
+		/* Failed to add the fence callback into fence, return _MALI_OSK_ERR_FAULT*/
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into fence.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	dma_fence_context->mali_dma_fence_waiters[dma_fence_context->num_dma_fence_waiter] = dma_fence_waiter;
+	dma_fence_context->num_dma_fence_waiter++;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+ struct dma_fence *mali_dma_fence_new(u32  context, u32 seqno)
+ #else
+struct fence *mali_dma_fence_new(u32  context, u32 seqno)
+#endif
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct dma_fence *fence = NULL;
+	fence = kzalloc(sizeof(struct dma_fence), GFP_KERNEL);
+#else
+	struct fence *fence = NULL;
+	fence = kzalloc(sizeof(struct fence), GFP_KERNEL);
+#endif
+	if (NULL == fence) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to create dma fence.\n"));
+		return fence;
+	}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	dma_fence_init(fence,
+		   &mali_dma_fence_ops,
+		   &mali_dma_fence_lock,
+		   context, seqno);
+#else
+	fence_init(fence,
+		   &mali_dma_fence_ops,
+		   &mali_dma_fence_lock,
+		   context, seqno);
+#endif
+	return fence;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+void mali_dma_fence_signal_and_put(struct dma_fence **fence)
+#else
+void mali_dma_fence_signal_and_put(struct fence **fence)
+#endif
+{
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(*fence);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	dma_fence_signal(*fence);
+	dma_fence_put(*fence);
+#else
+	fence_signal(*fence);
+	fence_put(*fence);
+#endif
+	*fence = NULL;
+}
+
+void mali_dma_fence_context_init(struct mali_dma_fence_context *dma_fence_context,
+				 mali_dma_fence_context_callback_func_t  cb_func,
+				 void *pp_job_ptr)
+{
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	INIT_WORK(&dma_fence_context->work_handle, mali_dma_fence_context_work_func);
+	atomic_set(&dma_fence_context->count, 1);
+	dma_fence_context->num_dma_fence_waiter = 0;
+	dma_fence_context->mali_dma_fence_waiters = NULL;
+	dma_fence_context->cb_func = cb_func;
+	dma_fence_context->pp_job_ptr = pp_job_ptr;
+}
+
+_mali_osk_errcode_t mali_dma_fence_context_add_waiters(struct mali_dma_fence_context *dma_fence_context,
+		struct reservation_object *dma_reservation_object)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+	u32 shared_count = 0, i;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct dma_fence *exclusive_fence = NULL;
+	struct dma_fence **shared_fences = NULL;
+#else
+	struct fence *exclusive_fence = NULL;
+	struct fence **shared_fences = NULL;
+#endif
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object);
+
+	/* Get all the shared/exclusive fences in the reservation object of dma buf*/
+	ret = reservation_object_get_fences_rcu(dma_reservation_object, &exclusive_fence,
+						&shared_count, &shared_fences);
+	if (ret < 0) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to get  shared or exclusive_fence dma fences from  the reservation object of dma buf.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (exclusive_fence) {
+		ret = mali_dma_fence_add_callback(dma_fence_context, exclusive_fence);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into exclusive fence.\n"));
+			mali_dma_fence_context_cleanup(dma_fence_context);
+			goto ended;
+		}
+	}
+
+
+	for (i = 0; i < shared_count; i++) {
+		ret = mali_dma_fence_add_callback(dma_fence_context, shared_fences[i]);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into shared fence [%d].\n", i));
+			mali_dma_fence_context_cleanup(dma_fence_context);
+			break;
+		}
+	}
+
+ended:
+
+	if (exclusive_fence)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+		dma_fence_put(exclusive_fence);
+#else
+		fence_put(exclusive_fence);
+#endif
+
+	if (shared_fences) {
+		for (i = 0; i < shared_count; i++) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+			dma_fence_put(shared_fences[i]);
+#else
+			fence_put(shared_fences[i]);
+#endif
+		}
+		kfree(shared_fences);
+	}
+
+	return ret;
+}
+
+
+void mali_dma_fence_context_term(struct mali_dma_fence_context *dma_fence_context)
+{
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+	atomic_set(&dma_fence_context->count, 0);
+	if (dma_fence_context->work_handle.func) {
+		cancel_work_sync(&dma_fence_context->work_handle);
+	}
+	mali_dma_fence_context_cleanup(dma_fence_context);
+}
+
+void mali_dma_fence_context_dec_count(struct mali_dma_fence_context *dma_fence_context)
+{
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	if (atomic_dec_and_test(&dma_fence_context->count))
+		schedule_work(&dma_fence_context->work_handle);
+}
+
+
+void mali_dma_fence_add_reservation_object_list(struct reservation_object *dma_reservation_object,
+		struct reservation_object **dma_reservation_object_list,
+		u32 *num_dma_reservation_object)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object);
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object_list);
+	MALI_DEBUG_ASSERT_POINTER(num_dma_reservation_object);
+
+	for (i = 0; i < *num_dma_reservation_object; i++) {
+		if (dma_reservation_object_list[i] == dma_reservation_object)
+			return;
+	}
+
+	dma_reservation_object_list[*num_dma_reservation_object] = dma_reservation_object;
+	(*num_dma_reservation_object)++;
+}
+
+int mali_dma_fence_lock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx)
+{
+	u32 i;
+
+	struct reservation_object *reservation_object_to_slow_lock = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object_list);
+	MALI_DEBUG_ASSERT_POINTER(ww_actx);
+
+	ww_acquire_init(ww_actx, &reservation_ww_class);
+
+again:
+	for (i = 0; i < num_dma_reservation_object; i++) {
+		int ret;
+
+		if (dma_reservation_object_list[i] == reservation_object_to_slow_lock) {
+			reservation_object_to_slow_lock = NULL;
+			continue;
+		}
+
+		ret = ww_mutex_lock(&dma_reservation_object_list[i]->lock, ww_actx);
+
+		if (ret < 0) {
+			u32  slow_lock_index = i;
+
+			/* unlock all pre locks we have already locked.*/
+			while (i > 0) {
+				i--;
+				ww_mutex_unlock(&dma_reservation_object_list[i]->lock);
+			}
+
+			if (NULL != reservation_object_to_slow_lock)
+				ww_mutex_unlock(&reservation_object_to_slow_lock->lock);
+
+			if (ret == -EDEADLK) {
+				reservation_object_to_slow_lock = dma_reservation_object_list[slow_lock_index];
+				ww_mutex_lock_slow(&reservation_object_to_slow_lock->lock, ww_actx);
+				goto again;
+			}
+			ww_acquire_fini(ww_actx);
+			MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to lock all dma reservation objects.\n", i));
+			return ret;
+		}
+	}
+
+	ww_acquire_done(ww_actx);
+	return 0;
+}
+
+void mali_dma_fence_unlock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx)
+{
+	u32 i;
+
+	for (i = 0; i < num_dma_reservation_object; i++)
+		ww_mutex_unlock(&dma_reservation_object_list[i]->lock);
+
+	ww_acquire_fini(ww_actx);
+}
diff --git a/mali/linux/mali_dma_fence.h b/mali/linux/mali_dma_fence.h
new file mode 100755
index 0000000..112fdd1
--- /dev/null
+++ b/mali/linux/mali_dma_fence.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_dma_fence.h
+ *
+ * Mali interface for Linux dma buf fence objects.
+ */
+
+#ifndef _MALI_DMA_FENCE_H_
+#define _MALI_DMA_FENCE_H_
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+#include <linux/dma-fence.h>
+#else
+#include <linux/fence.h>
+#endif
+#include <linux/reservation.h>
+#endif
+
+struct mali_dma_fence_context;
+
+/* The mali dma fence context callback function */
+typedef void (*mali_dma_fence_context_callback_func_t)(void *pp_job_ptr);
+
+struct mali_dma_fence_waiter {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct dma_fence *fence;
+	struct dma_fence_cb base;
+#else
+	struct fence_cb base;
+	struct fence *fence;
+#endif
+	struct mali_dma_fence_context *parent;
+};
+
+struct mali_dma_fence_context {
+	struct work_struct work_handle;
+	struct mali_dma_fence_waiter **mali_dma_fence_waiters;
+	u32 num_dma_fence_waiter;
+	atomic_t count;
+	void *pp_job_ptr; /* the mali pp job pointer */;
+	mali_dma_fence_context_callback_func_t cb_func;
+};
+
+/* Create a dma fence
+ * @param context The execution context this fence is run on
+ * @param seqno A linearly increasing sequence number for this context
+ * @return the new dma fence if success, or NULL on failure.
+ */
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+ struct dma_fence *mali_dma_fence_new(u32  context, u32 seqno);
+ #else
+struct fence *mali_dma_fence_new(u32  context, u32 seqno);
+#endif
+/* Signal and put dma fence
+ * @param fence The dma fence to signal and put
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+void mali_dma_fence_signal_and_put(struct dma_fence **fence);
+#else
+void mali_dma_fence_signal_and_put(struct fence **fence);
+#endif
+/**
+ * Initialize a mali dma fence context for pp job.
+ * @param dma_fence_context The mali dma fence context to initialize.
+ * @param cb_func The dma fence context callback function to call when all dma fence release.
+ * @param pp_job_ptr The pp_job to call function with.
+ */
+void mali_dma_fence_context_init(struct mali_dma_fence_context *dma_fence_context,
+				 mali_dma_fence_context_callback_func_t  cb_func,
+				 void *pp_job_ptr);
+
+/**
+ * Add new mali dma fence waiter into mali dma fence context
+ * @param dma_fence_context The mali dma fence context
+ * @param dma_reservation_object the reservation object to create new mali dma fence waiters
+ * @return _MALI_OSK_ERR_OK if success, or not.
+ */
+_mali_osk_errcode_t mali_dma_fence_context_add_waiters(struct mali_dma_fence_context *dma_fence_context,
+		struct reservation_object *dma_reservation_object);
+
+/**
+ * Release the dma fence context
+ * @param dma_fence_text The mali dma fence context.
+ */
+void mali_dma_fence_context_term(struct mali_dma_fence_context *dma_fence_context);
+
+/**
+ * Decrease the dma fence context atomic count
+ * @param dma_fence_text The mali dma fence context.
+ */
+void mali_dma_fence_context_dec_count(struct mali_dma_fence_context *dma_fence_context);
+
+/**
+ * Get all reservation object
+ * @param dma_reservation_object The reservation object to add into the reservation object list
+ * @param dma_reservation_object_list The reservation object list to store all reservation object
+ * @param num_dma_reservation_object The number of all reservation object
+ */
+void mali_dma_fence_add_reservation_object_list(struct reservation_object *dma_reservation_object,
+		struct reservation_object **dma_reservation_object_list,
+		u32 *num_dma_reservation_object);
+
+/**
+ * Wait/wound mutex lock to lock all reservation object.
+ */
+int mali_dma_fence_lock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32  num_dma_reservation_object, struct ww_acquire_ctx *ww_actx);
+
+/**
+ * Wait/wound mutex lock to unlock all reservation object.
+ */
+void mali_dma_fence_unlock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx);
+#endif
diff --git a/mali/linux/mali_internal_sync.c b/mali/linux/mali_internal_sync.c
new file mode 100644
index 0000000..43d34f1
--- /dev/null
+++ b/mali/linux/mali_internal_sync.c
@@ -0,0 +1,763 @@
+/*
+ * This confidential and proprietary software may be used only as
+ * authorised by a licensing agreement from ARM Limited
+ * (C) COPYRIGHT 2012-2016 ARM Limited
+ * ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorised
+ * copies and copies may only be made to the extent permitted
+ * by a licensing agreement from ARM Limited.
+ */
+
+#include "mali_internal_sync.h"
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+#include <linux/ioctl.h>
+#include <linux/export.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/anon_inodes.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#if defined(DEBUG)
+#include "mali_session.h"
+#include "mali_timeline.h"
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static const struct dma_fence_ops fence_ops;
+#else
+static const struct fence_ops fence_ops;
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static struct mali_internal_sync_point *mali_internal_fence_to_sync_pt(struct dma_fence *fence)
+#else
+static struct mali_internal_sync_point *mali_internal_fence_to_sync_pt(struct fence *fence)
+#endif
+{
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	return container_of(fence, struct mali_internal_sync_point, base);
+}
+
+static inline struct mali_internal_sync_timeline *mali_internal_sync_pt_to_sync_timeline(struct mali_internal_sync_point *sync_pt)
+{
+	MALI_DEBUG_ASSERT_POINTER(sync_pt);
+	return container_of(sync_pt->base.lock, struct mali_internal_sync_timeline, sync_pt_list_lock);
+}
+
+static void mali_internal_sync_timeline_free(struct kref *kref_count)
+{
+	struct mali_internal_sync_timeline *sync_timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(kref_count);
+
+	sync_timeline = container_of(kref_count, struct mali_internal_sync_timeline, kref_count);
+
+	if (sync_timeline->ops->release_obj)
+		sync_timeline->ops->release_obj(sync_timeline);
+
+	kfree(sync_timeline);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+static void mali_internal_fence_check_cb_func(struct fence *fence, struct fence_cb *cb)
+#else
+static void mali_internal_fence_check_cb_func(struct dma_fence *fence, struct dma_fence_cb *cb)
+#endif
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	struct mali_internal_sync_fence_cb *check;
+#else
+	struct mali_internal_sync_fence_waiter *waiter;
+#endif
+	struct mali_internal_sync_fence *sync_fence;
+	int ret;
+	MALI_DEBUG_ASSERT_POINTER(cb);
+	MALI_IGNORE(fence);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	check = container_of(cb, struct mali_internal_sync_fence_cb, cb);
+	sync_fence = check->sync_file;
+#else
+	waiter = container_of(cb, struct mali_internal_sync_fence_waiter, cb);
+	sync_fence = (struct mali_internal_sync_fence *)waiter->work.private;
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	ret = atomic_dec_and_test(&sync_fence->status);
+	if (ret)
+		wake_up_all(&sync_fence->wq);
+#else	
+	ret =sync_fence->fence->ops->signaled(sync_fence->fence);
+
+	if (0 > ret)
+		MALI_PRINT_ERROR(("Mali internal sync:Failed to wait fence  0x%x for sync_fence 0x%x.\n", fence, sync_fence));
+	if (1 == ret)
+		wake_up_all(&sync_fence->wq);
+#endif
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+static void mali_internal_sync_fence_add_fence(struct mali_internal_sync_fence *sync_fence, struct fence *sync_pt)
+{
+	int fence_num = 0;
+	MALI_DEBUG_ASSERT_POINTER(sync_fence);
+	MALI_DEBUG_ASSERT_POINTER(sync_pt);
+
+	fence_num = sync_fence->num_fences;
+
+	sync_fence->cbs[fence_num].fence = sync_pt;
+	sync_fence->cbs[fence_num].sync_file = sync_fence;
+
+	if (!fence_add_callback(sync_pt, &sync_fence->cbs[fence_num].cb, mali_internal_fence_check_cb_func)) {
+		fence_get(sync_pt);
+		sync_fence->num_fences++;
+		atomic_inc(&sync_fence->status);
+	}
+}
+#endif
+
+static int mali_internal_sync_fence_wake_up_wq(wait_queue_t *curr, unsigned mode,
+		int wake_flags, void *key)
+{
+	struct mali_internal_sync_fence_waiter *wait;
+	MALI_IGNORE(mode);
+	MALI_IGNORE(wake_flags);
+	MALI_IGNORE(key);
+
+	wait = container_of(curr, struct mali_internal_sync_fence_waiter, work);
+	list_del_init(&wait->work.task_list);
+
+	wait->callback(wait->work.private, wait);
+	return 1;
+}
+
+struct mali_internal_sync_timeline *mali_internal_sync_timeline_create(const struct mali_internal_sync_timeline_ops *ops,
+		int size, const char *name)
+{
+	struct mali_internal_sync_timeline *sync_timeline = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(ops);
+
+	if (size < sizeof(struct mali_internal_sync_timeline)) {
+		MALI_PRINT_ERROR(("Mali internal sync:Invalid size to create the mali internal sync timeline.\n"));
+		goto err;
+	}
+
+	sync_timeline = kzalloc(size, GFP_KERNEL);
+	if (NULL == sync_timeline) {
+		MALI_PRINT_ERROR(("Mali internal sync:Failed to  allocate buffer  for the mali internal sync timeline.\n"));
+		goto err;
+	}
+	kref_init(&sync_timeline->kref_count);
+	sync_timeline->ops = ops;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	sync_timeline->fence_context = dma_fence_context_alloc(1);
+#else
+	sync_timeline->fence_context = fence_context_alloc(1);
+#endif
+	strlcpy(sync_timeline->name, name, sizeof(sync_timeline->name));
+
+	INIT_LIST_HEAD(&sync_timeline->sync_pt_list_head);
+	spin_lock_init(&sync_timeline->sync_pt_list_lock);
+
+	return sync_timeline;
+err:
+	if (NULL != sync_timeline) {
+		kfree(sync_timeline);
+	}
+	return NULL;
+}
+
+void mali_internal_sync_timeline_destroy(struct mali_internal_sync_timeline *sync_timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(sync_timeline);
+
+	sync_timeline->destroyed = MALI_TRUE;
+
+	smp_wmb();
+
+	mali_internal_sync_timeline_signal(sync_timeline);
+	kref_put(&sync_timeline->kref_count, mali_internal_sync_timeline_free);
+}
+
+void mali_internal_sync_timeline_signal(struct mali_internal_sync_timeline *sync_timeline)
+{
+	unsigned long flags;
+	struct mali_internal_sync_point *sync_pt, *next;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_timeline);
+
+	spin_lock_irqsave(&sync_timeline->sync_pt_list_lock, flags);
+
+	list_for_each_entry_safe(sync_pt, next, &sync_timeline->sync_pt_list_head,
+				 sync_pt_list) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+		if (dma_fence_is_signaled_locked(&sync_pt->base))
+#else
+		if (fence_is_signaled_locked(&sync_pt->base))
+#endif
+			list_del_init(&sync_pt->sync_pt_list);
+	}
+
+	spin_unlock_irqrestore(&sync_timeline->sync_pt_list_lock, flags);
+}
+
+struct mali_internal_sync_point *mali_internal_sync_point_create(struct mali_internal_sync_timeline *sync_timeline, int size)
+{
+	unsigned long flags;
+	struct mali_internal_sync_point *sync_pt = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_timeline);
+
+	if (size < sizeof(struct mali_internal_sync_point)) {
+		MALI_PRINT_ERROR(("Mali internal sync:Invalid size to create the mali internal sync point.\n"));
+		goto err;
+	}
+
+	sync_pt = kzalloc(size, GFP_KERNEL);
+	if (NULL == sync_pt) {
+		MALI_PRINT_ERROR(("Mali internal sync:Failed to  allocate buffer  for the mali internal sync point.\n"));
+		goto err;
+	}
+	spin_lock_irqsave(&sync_timeline->sync_pt_list_lock, flags);
+	kref_get(&sync_timeline->kref_count);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	dma_fence_init(&sync_pt->base, &fence_ops, &sync_timeline->sync_pt_list_lock,
+		       sync_timeline->fence_context, ++sync_timeline->value);
+#else
+	fence_init(&sync_pt->base, &fence_ops, &sync_timeline->sync_pt_list_lock,
+		   sync_timeline->fence_context, ++sync_timeline->value);
+#endif
+	INIT_LIST_HEAD(&sync_pt->sync_pt_list);
+	spin_unlock_irqrestore(&sync_timeline->sync_pt_list_lock, flags);
+
+	return sync_pt;
+err:
+	if (NULL != sync_pt) {
+		kfree(sync_pt);
+	}
+	return NULL;
+}
+
+struct mali_internal_sync_fence *mali_internal_sync_fence_fdget(int fd)
+{
+	struct file *file = fget(fd);
+
+	if (NULL == file) {
+		return NULL;
+	}
+
+	return file->private_data;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+struct mali_internal_sync_fence *mali_internal_sync_fence_merge(
+	struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2)
+{
+	struct mali_internal_sync_fence *new_sync_fence;
+	int i, j, num_fence1, num_fence2, total_fences;
+	struct fence *fence0 = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence2);
+
+	num_fence1 = sync_fence1->num_fences;
+	num_fence2 = sync_fence2->num_fences;
+
+	total_fences = num_fence1 + num_fence2;
+
+	i =0;
+	j = 0;
+	
+	if (num_fence1 > 0) {
+		fence0 = sync_fence1->cbs[i].fence;
+		i = 1;
+	}
+	else if(num_fence2 > 0) {
+		fence0 = sync_fence2->cbs[i].fence;
+		j =1;
+	}
+		
+	new_sync_fence = (struct mali_internal_sync_fence *)sync_file_create(fence0);
+	if (NULL == new_sync_fence) {
+		MALI_PRINT_ERROR(("Mali internal sync:Failed to  create the mali internal sync fence when merging sync fence.\n"));
+		return NULL;
+	}
+
+	fence_remove_callback(new_sync_fence->cb[0].fence, &new_sync_fence->cb[0].cb);
+	new_sync_fence->num_fences = 0;
+	atomic_dec(&new_sync_fence->status);
+
+	for (; i < num_fence1 && j < num_fence2;) {
+		struct fence *fence1 = sync_fence1->cbs[i].fence;
+		struct fence *fence2 = sync_fence2->cbs[j].fence;
+
+		if (fence1->context < fence2->context) {
+			mali_internal_sync_fence_add_fence(new_sync_fence, fence1);
+
+			i++;
+		} else if (fence1->context > fence2->context) {
+			mali_internal_sync_fence_add_fence(new_sync_fence, fence2);
+
+			j++;
+		} else {
+			if (fence1->seqno - fence2->seqno <= INT_MAX)
+				mali_internal_sync_fence_add_fence(new_sync_fence, fence1);
+			else
+				mali_internal_sync_fence_add_fence(new_sync_fence, fence2);
+			i++;
+			j++;
+		}
+	}
+
+	for (; i < num_fence1; i++)
+		mali_internal_sync_fence_add_fence(new_sync_fence, sync_fence1->cbs[i].fence);
+
+	for (; j < num_fence2; j++)
+		mali_internal_sync_fence_add_fence(new_sync_fence, sync_fence2->cbs[j].fence);
+
+	return new_sync_fence;
+}
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+static struct fence **mali_internal_get_fences(struct mali_internal_sync_fence *sync_fence, int *num_fences)
+#else
+static struct dma_fence **mali_internal_get_fences(struct mali_internal_sync_fence *sync_fence, int *num_fences)
+#endif
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	if (sync_fence->fence->ops == &fence_array_ops) {
+		struct fence_array *fence_array = container_of(sync_fence->fence, struct fence_array, base);
+		*num_fences = fence_array->num_fences;
+		return fence_array->fences;
+	}
+#else
+	if (sync_fence->fence->ops == &dma_fence_array_ops) {
+		struct dma_fence_array *fence_array = container_of(sync_fence->fence, struct dma_fence_array, base);
+		*num_fences = fence_array->num_fences;
+		return fence_array->fences;
+	}
+#endif
+	*num_fences = 1;
+	return &sync_fence->fence;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+static void mali_internal_add_fence_array(struct fence **fences, int *num_fences, struct fence *fence)
+#else
+static void mali_internal_add_fence_array(struct dma_fence **fences, int *num_fences, struct dma_fence *fence)
+#endif
+{
+	fences[*num_fences] = fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	if (!fence_is_signaled(fence)) {
+		fence_get(fence);
+		(*num_fences)++;
+	}
+#else
+	if (!dma_fence_is_signaled(fence)) {
+		dma_fence_get(fence);
+		(*num_fences)++;
+	}
+#endif
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+static int mali_internal_sync_fence_set_fence_array(struct mali_internal_sync_fence *sync_fence,
+			       struct fence **fences, int num_fences)
+#else
+static int mali_internal_sync_fence_set_fence_array(struct mali_internal_sync_fence *sync_fence,
+			       struct dma_fence **fences, int num_fences)
+#endif
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	struct fence_array *array;
+#else
+	struct dma_fence_array *array;
+#endif
+	MALI_DEBUG_ASSERT(1 != num_fences);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	array = fence_array_create(num_fences, fences,
+					   fence_context_alloc(1), 1, false);
+#else
+	array = dma_fence_array_create(num_fences, fences,
+					   dma_fence_context_alloc(1), 1, false);
+#endif
+	if (!array)
+		return -ENOMEM;
+
+	sync_fence->fence = &array->base;
+
+	return 0;
+}
+
+struct mali_internal_sync_fence *mali_internal_sync_fence_merge(
+	struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2)
+{
+		struct mali_internal_sync_fence *sync_fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+		struct fence **fences, **nfences, **fences1, **fences2;
+#else
+		struct dma_fence **fences, **nfences, **fences1, **fences2;
+#endif
+		int real_num_fences, i, j, num_fences, num_fences1, num_fences2;
+
+		fences1 = mali_internal_get_fences(sync_fence1, &num_fences1);
+		fences2 = mali_internal_get_fences(sync_fence2, &num_fences2);
+		
+		num_fences = num_fences1 + num_fences2;
+	
+		fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL);
+		if (!fences) {
+			MALI_PRINT_ERROR(("Mali internal sync:Failed to  alloc buffer for fences.\n"));
+			goto fences_alloc_failed;
+		}
+
+		for (real_num_fences = i = j = 0; i < num_fences1 && j < num_fences2; ) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+			struct fence *fence1 = fences1[i];
+			struct fence *fence2 = fences2[j];
+#else
+			struct dma_fence *fence1 = fences1[i];
+			struct dma_fence *fence2 = fences2[j];
+#endif
+			if (fence1->context < fence2->context) {
+				mali_internal_add_fence_array(fences, &real_num_fences, fence1);
+	
+				i++;
+			} else if (fence1->context > fence2->context) {
+				mali_internal_add_fence_array(fences, &real_num_fences, fence2);
+	
+				j++;
+			} else {
+				if (fence1->seqno - fence2->seqno <= INT_MAX)
+					mali_internal_add_fence_array(fences, &real_num_fences, fence1);
+				else
+					mali_internal_add_fence_array(fences, &real_num_fences, fence2);
+	
+				i++;
+				j++;
+			}
+		}
+	
+		for (; i < num_fences1; i++)
+			mali_internal_add_fence_array(fences, &real_num_fences, fences1[i]);
+	
+		for (; j < num_fences2; j++)
+			mali_internal_add_fence_array(fences, &real_num_fences, fences2[j]);
+
+		if (0 == real_num_fences)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+			fences[real_num_fences++] = fence_get(fences1[0]);
+#else
+			fences[real_num_fences++] = dma_fence_get(fences1[0]);
+#endif
+	
+		if (num_fences > real_num_fences) {
+			nfences = krealloc(fences, real_num_fences * sizeof(*fences),
+					  GFP_KERNEL);
+			if (!nfences)
+				goto nfences_alloc_failed;
+	
+			fences = nfences;
+		}
+
+		sync_fence = (struct mali_internal_sync_fence *)sync_file_create(fences[0]);
+		if (NULL == sync_fence) {
+			MALI_PRINT_ERROR(("Mali internal sync:Failed to  create the mali internal sync fence when merging sync fence.\n"));
+			goto sync_fence_alloc_failed;
+		}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+		fence_put(fences[0]);
+#else
+		dma_fence_put(fences[0]);
+#endif
+
+		if (mali_internal_sync_fence_set_fence_array(sync_fence, fences, real_num_fences) < 0) {
+			MALI_PRINT_ERROR(("Mali internal sync:Failed to  set fence for sync fence.\n"));
+			goto sync_fence_set_failed;
+		}
+	
+		return sync_fence;
+
+sync_fence_set_failed:
+	fput(sync_fence->file);
+sync_fence_alloc_failed:
+	for (i = 0; i < real_num_fences; i++)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+		fence_put(fences[i]);
+#else
+		dma_fence_put(fences[i]);
+#endif
+nfences_alloc_failed:
+	kfree(fences);
+fences_alloc_failed:
+	return NULL;
+}
+#endif
+
+void mali_internal_sync_fence_waiter_init(struct mali_internal_sync_fence_waiter *waiter,
+		mali_internal_sync_callback_t callback)
+{
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+	MALI_DEBUG_ASSERT_POINTER(callback);
+
+	INIT_LIST_HEAD(&waiter->work.task_list);
+	waiter->callback = callback;
+}
+
+int mali_internal_sync_fence_wait_async(struct mali_internal_sync_fence *sync_fence,
+					struct mali_internal_sync_fence_waiter *waiter)
+{
+	int err;
+	unsigned long flags;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence);
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	err = atomic_read(&sync_fence->status);
+
+	if (0 > err)
+		return err;
+
+	if (!err)
+		return 1;
+
+	init_waitqueue_func_entry(&waiter->work, mali_internal_sync_fence_wake_up_wq);
+	waiter->work.private = sync_fence;
+
+	spin_lock_irqsave(&sync_fence->wq.lock, flags);
+	err = atomic_read(&sync_fence->status);
+
+	if (0 < err)
+		__add_wait_queue_tail(&sync_fence->wq, &waiter->work);
+	spin_unlock_irqrestore(&sync_fence->wq.lock, flags);
+
+	if (0 > err)
+		return err;
+
+	return !err;
+#else
+	if ((sync_fence->fence) && (sync_fence->fence->ops) && (sync_fence->fence->ops->signaled))
+		err = sync_fence->fence->ops->signaled(sync_fence->fence);
+	else
+		err = -1;
+
+	if (0 > err)
+		return err;
+
+	if (1 == err)
+		return err;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	err = dma_fence_add_callback(sync_fence->fence, &waiter->cb, mali_internal_fence_check_cb_func);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+	err = fence_add_callback(sync_fence->fence, &waiter->cb, mali_internal_fence_check_cb_func);
+#endif
+
+	if (0 != err)
+		return err;
+
+	init_waitqueue_func_entry(&waiter->work, mali_internal_sync_fence_wake_up_wq);
+	waiter->work.private = sync_fence;
+
+	spin_lock_irqsave(&sync_fence->wq.lock, flags);
+	err =  sync_fence->fence->ops->signaled(sync_fence->fence);
+
+	if (0 == err)
+		__add_wait_queue_tail(&sync_fence->wq, &waiter->work);
+	spin_unlock_irqrestore(&sync_fence->wq.lock, flags);
+
+	return err;
+#endif
+}
+
+int mali_internal_sync_fence_cancel_async(struct mali_internal_sync_fence *sync_fence,
+		struct mali_internal_sync_fence_waiter *waiter)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence);
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	spin_lock_irqsave(&sync_fence->wq.lock, flags);
+	if (!list_empty(&waiter->work.task_list))
+		list_del_init(&waiter->work.task_list);
+	else
+		ret = -ENOENT;
+	spin_unlock_irqrestore(&sync_fence->wq.lock, flags);
+
+	if (0 == ret) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+        dma_fence_remove_callback(sync_fence->fence, &waiter->cb);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+        fence_remove_callback(sync_fence->fence, &waiter->cb);
+#endif
+
+	}
+
+	return ret;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static const char *mali_internal_fence_get_driver_name(struct dma_fence *fence)
+#else
+static const char *mali_internal_fence_get_driver_name(struct fence *fence)
+#endif
+{
+	struct mali_internal_sync_point *sync_pt;
+	struct mali_internal_sync_timeline *parent;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	sync_pt = mali_internal_fence_to_sync_pt(fence);
+	parent = mali_internal_sync_pt_to_sync_timeline(sync_pt);
+
+	return parent->ops->driver_name;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static const char *mali_internal_fence_get_timeline_name(struct dma_fence *fence)
+#else
+static const char *mali_internal_fence_get_timeline_name(struct fence *fence)
+#endif
+{
+	struct mali_internal_sync_point *sync_pt;
+	struct mali_internal_sync_timeline *parent;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	sync_pt = mali_internal_fence_to_sync_pt(fence);
+	parent = mali_internal_sync_pt_to_sync_timeline(sync_pt);
+
+	return parent->name;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static void mali_internal_fence_release(struct dma_fence *fence)
+#else
+static void mali_internal_fence_release(struct fence *fence)
+#endif
+{
+	unsigned long flags;
+	struct mali_internal_sync_point *sync_pt;
+	struct mali_internal_sync_timeline *parent;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	sync_pt = mali_internal_fence_to_sync_pt(fence);
+	parent = mali_internal_sync_pt_to_sync_timeline(sync_pt);
+
+
+	spin_lock_irqsave(fence->lock, flags);
+#if 0
+	if (WARN_ON_ONCE(!list_empty(&sync_pt->sync_pt_list)))
+		list_del(&sync_pt->sync_pt_list);
+#else
+	//sync_pt_list empty is possible, dont show warn.
+	if (!list_empty(&sync_pt->sync_pt_list))
+		list_del(&sync_pt->sync_pt_list);
+#endif
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	if (parent->ops->free_pt)
+		parent->ops->free_pt(sync_pt);
+
+	kref_put(&parent->kref_count, mali_internal_sync_timeline_free);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	dma_fence_free(&sync_pt->base);
+#else
+	fence_free(&sync_pt->base);
+#endif
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static bool mali_internal_fence_signaled(struct dma_fence *fence)
+#else
+static bool mali_internal_fence_signaled(struct fence *fence)
+#endif
+{
+	int ret;
+	struct mali_internal_sync_point *sync_pt;
+	struct mali_internal_sync_timeline *parent;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	sync_pt = mali_internal_fence_to_sync_pt(fence);
+	parent = mali_internal_sync_pt_to_sync_timeline(sync_pt);
+
+	ret = parent->ops->has_signaled(sync_pt);
+	if (0 > ret)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+		fence->error = ret;
+#else
+		fence->status = ret;
+#endif
+	return ret;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static bool mali_internal_fence_enable_signaling(struct dma_fence *fence)
+#else
+static bool mali_internal_fence_enable_signaling(struct fence *fence)
+#endif
+{
+	struct mali_internal_sync_point *sync_pt;
+	struct mali_internal_sync_timeline *parent;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	sync_pt = mali_internal_fence_to_sync_pt(fence);
+	parent = mali_internal_sync_pt_to_sync_timeline(sync_pt);
+
+	if (mali_internal_fence_signaled(fence))
+		return false;
+
+	list_add_tail(&sync_pt->sync_pt_list, &parent->sync_pt_list_head);
+	return true;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static void mali_internal_fence_value_str(struct dma_fence *fence, char *str, int size)
+#else
+static void mali_internal_fence_value_str(struct fence *fence, char *str, int size)
+#endif
+{
+	struct mali_internal_sync_point *sync_pt;
+	struct mali_internal_sync_timeline *parent;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_IGNORE(str);
+	MALI_IGNORE(size);
+
+	sync_pt = mali_internal_fence_to_sync_pt(fence);
+	parent = mali_internal_sync_pt_to_sync_timeline(sync_pt);
+
+	parent->ops->print_sync_pt(sync_pt);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static const struct dma_fence_ops fence_ops = {
+#else
+static const struct fence_ops fence_ops = {
+#endif
+	.get_driver_name = mali_internal_fence_get_driver_name,
+	.get_timeline_name = mali_internal_fence_get_timeline_name,
+	.enable_signaling = mali_internal_fence_enable_signaling,
+	.signaled = mali_internal_fence_signaled,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	.wait = dma_fence_default_wait,
+#else
+	.wait = fence_default_wait,
+#endif
+	.release = mali_internal_fence_release,
+	.fence_value_str = mali_internal_fence_value_str,
+};
+#endif
diff --git a/mali/linux/mali_internal_sync.h b/mali/linux/mali_internal_sync.h
new file mode 100755
index 0000000..d801cfc
--- /dev/null
+++ b/mali/linux/mali_internal_sync.h
@@ -0,0 +1,182 @@
+/*
+ * This confidential and proprietary software may be used only as
+ * authorised by a licensing agreement from ARM Limited
+ * (C) COPYRIGHT 2012-2015 ARM Limited
+ * ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorised
+ * copies and copies may only be made to the extent permitted
+ * by a licensing agreement from ARM Limited.
+ */
+
+/**
+ * @file mali_internal_sync.h
+ *
+ * Mali internal structure/interface for sync.
+ */
+
+#ifndef _MALI_INTERNAL_SYNC_H
+#define _MALI_INTERNAL_SYNC_H
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)
+#include <sync.h>
+#else
+#include <linux/sync_file.h>
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+#include <linux/dma-fence.h>
+#else
+#include <linux/fence.h>
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+#include <linux/fence-array.h>
+#else
+#include <linux/dma-fence-array.h>
+#endif
+#endif
+
+struct mali_internal_sync_timeline;
+struct mali_internal_sync_point;
+struct mali_internal_sync_fence;
+
+struct mali_internal_sync_timeline_ops {
+	const char *driver_name;
+	int (*has_signaled)(struct mali_internal_sync_point *pt);
+	void (*free_pt)(struct mali_internal_sync_point *sync_pt);
+	void (*release_obj)(struct mali_internal_sync_timeline *sync_timeline);
+	void (*print_sync_pt)(struct mali_internal_sync_point *sync_pt);
+};
+
+struct mali_internal_sync_timeline {
+	struct kref             kref_count;
+	const struct mali_internal_sync_timeline_ops  *ops;
+	char                    name[32];
+	bool                    destroyed;
+	int                     fence_context;
+	int                     value;
+	spinlock_t              sync_pt_list_lock;
+	struct list_head        sync_pt_list_head;
+};
+
+struct mali_internal_sync_point {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct dma_fence base;
+#else
+	struct fence base;
+#endif
+	struct list_head        sync_pt_list;
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+struct mali_internal_sync_fence_cb {
+	struct fence_cb cb;
+	struct fence *fence;
+	struct mali_internal_sync_fence *sync_file;
+};
+#endif
+
+struct mali_internal_sync_fence {
+	struct file             *file;
+	struct kref             kref;
+	char		name[32];
+#ifdef CONFIG_DEBUG_FS
+	struct list_head	sync_file_list;
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	int num_fences;
+#endif
+	wait_queue_head_t       wq;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	atomic_t                status;
+	struct mali_internal_sync_fence_cb    cbs[];
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	struct fence *fence;
+	struct fence_cb cb;
+#else
+	struct dma_fence *fence;
+	struct dma_fence_cb cb;
+#endif
+};
+
+struct mali_internal_sync_fence_waiter;
+
+typedef void (*mali_internal_sync_callback_t)(struct mali_internal_sync_fence *sync_fence,
+		struct mali_internal_sync_fence_waiter *waiter);
+
+struct mali_internal_sync_fence_waiter {
+	wait_queue_t work;
+	mali_internal_sync_callback_t callback;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	struct fence_cb cb;
+#else
+	struct dma_fence_cb cb;
+#endif
+#endif
+};
+
+/**
+ * Create a mali internal sync timeline.
+ * @param ops The implementation ops for the mali internal sync timeline
+ * @param size The size to allocate
+ * @param name The sync_timeline name
+ * @return The new mali internal sync timeline if successful, NULL if not.
+ */
+struct mali_internal_sync_timeline *mali_internal_sync_timeline_create(const struct mali_internal_sync_timeline_ops *ops,
+		int size, const char *name);
+
+/**
+ * Destroy one mali internal sync timeline.
+ * @param sync_timeline The mali internal sync timeline to destroy.
+ */
+void mali_internal_sync_timeline_destroy(struct mali_internal_sync_timeline *sync_timeline);
+
+/**
+ * Signal one mali internal sync timeline.
+ * @param sync_timeline The mali internal sync timeline to signal.
+ */
+void mali_internal_sync_timeline_signal(struct mali_internal_sync_timeline *sync_timeline);
+
+/**
+ * Create one mali internal sync point.
+ * @param sync_timeline The mali internal sync timeline to add this mali internal sync point.
+  * @return the new mali internal sync point if successful, NULL if not.
+ */
+struct mali_internal_sync_point *mali_internal_sync_point_create(struct mali_internal_sync_timeline *sync_timeline, int size);
+
+/**
+ * Merge mali internal sync fences
+ * @param sync_fence1 The mali internal sync fence to merge
+ * @param sync_fence2 The mali internal sync fence to merge
+ * @return the new mali internal sync fence if successful, NULL if not.
+ */
+struct mali_internal_sync_fence *mali_internal_sync_fence_merge(struct mali_internal_sync_fence *sync_fence1,
+		struct mali_internal_sync_fence *sync_fence2);
+
+/**
+ * Get the mali internal sync fence from sync fd
+ * @param fd The sync handle to get the mali internal sync fence
+ * @return the mali internal sync fence if successful, NULL if not.
+ */
+struct mali_internal_sync_fence *mali_internal_sync_fence_fdget(int fd);
+
+
+void mali_internal_sync_fence_waiter_init(struct mali_internal_sync_fence_waiter *waiter,
+		mali_internal_sync_callback_t callback);
+
+int mali_internal_sync_fence_wait_async(struct mali_internal_sync_fence *sync_fence,
+					struct mali_internal_sync_fence_waiter *waiter);
+
+int mali_internal_sync_fence_cancel_async(struct mali_internal_sync_fence *sync_fence,
+		struct mali_internal_sync_fence_waiter *waiter);
+
+#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)*/
+#endif /* _MALI_INTERNAL_SYNC_H */
diff --git a/mali/linux/mali_kernel_linux.c b/mali/linux/mali_kernel_linux.c
new file mode 100755
index 0000000..70dab67
--- /dev/null
+++ b/mali/linux/mali_kernel_linux.c
@@ -0,0 +1,1149 @@
+/**
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_linux.c
+ * Implementation of the Linux device driver entrypoints
+ */
+#include <linux/module.h>   /* kernel module definitions */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/mm.h>       /* memory manager definitions */
+#include <linux/mali/mali_utgard_ioctl.h>
+#include <linux/version.h>
+#include <linux/device.h>
+#include "mali_kernel_license.h"
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/bug.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_ukk.h"
+#include "mali_ukk_wrappers.h"
+#include "mali_kernel_sysfs.h"
+#include "mali_pm.h"
+#include "mali_kernel_license.h"
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_swap_alloc.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+#include "mali_osk_profiling.h"
+#include "mali_dvfs_policy.h"
+
+static int is_first_resume = 1;
+/*Store the clk and vol for boot/insmod and mali_resume*/
+static struct mali_gpu_clk_item mali_gpu_clk[2];
+#endif
+
+/* Streamline support for the Mali driver */
+#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_MALI400_PROFILING)
+/* Ask Linux to create the tracepoints */
+#define CREATE_TRACE_POINTS
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_hw_counter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counters);
+#endif /* CONFIG_TRACEPOINTS */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include "mali_devfreq.h"
+#include "mali_osk_mali.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
+#include <linux/pm_opp.h>
+#else
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif /* Linux >= 3.13*/
+#define dev_pm_opp_of_add_table of_init_opp_table
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
+#define dev_pm_opp_of_remove_table of_free_opp_table
+#endif /* Linux >= 3.19 */
+#endif /* Linux >= 4.4.0 */
+#endif
+
+/* from the __malidrv_build_info.c file that is generated during build */
+extern const char *__malidrv_build_info(void);
+extern void mali_post_init(void);
+extern int mali_pdev_dts_init(struct platform_device* mali_gpu_device);
+extern int mpgpu_class_init(void);
+extern void mpgpu_class_exit(void);
+
+int mali_page_fault = 0;
+module_param(mali_page_fault, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_page_fault, "mali_page_fault");
+
+int pp_hardware_reset = 0;
+module_param(pp_hardware_reset, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(pp_hardware_reset, "mali_hardware_reset");
+/* Module parameter to control log level */
+int mali_debug_level = 2;
+module_param(mali_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_debug_level, "Higher number, more dmesg output");
+
+extern int mali_max_job_runtime;
+module_param(mali_max_job_runtime, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_job_runtime, "Maximum allowed job runtime in msecs.\nJobs will be killed after this no matter what");
+
+extern int mali_l2_max_reads;
+module_param(mali_l2_max_reads, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_l2_max_reads, "Maximum reads for Mali L2 cache");
+
+extern unsigned int mali_dedicated_mem_start;
+module_param(mali_dedicated_mem_start, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_dedicated_mem_start, "Physical start address of dedicated Mali GPU memory.");
+
+extern unsigned int mali_dedicated_mem_size;
+module_param(mali_dedicated_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_dedicated_mem_size, "Size of dedicated Mali GPU memory.");
+
+extern unsigned int mali_shared_mem_size;
+module_param(mali_shared_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_shared_mem_size, "Size of shared Mali GPU memory.");
+
+#if defined(CONFIG_MALI400_PROFILING)
+extern int mali_boot_profiling;
+module_param(mali_boot_profiling, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_boot_profiling, "Start profiling as a part of Mali driver initialization");
+#endif
+
+extern int mali_max_pp_cores_group_1;
+module_param(mali_max_pp_cores_group_1, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_pp_cores_group_1, "Limit the number of PP cores to use from first PP group.");
+
+extern int mali_max_pp_cores_group_2;
+module_param(mali_max_pp_cores_group_2, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_pp_cores_group_2, "Limit the number of PP cores to use from second PP group (Mali-450 only).");
+
+extern unsigned int mali_mem_swap_out_threshold_value;
+module_param(mali_mem_swap_out_threshold_value, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_mem_swap_out_threshold_value, "Threshold value used to limit how much swappable memory cached in Mali driver.");
+
+#if defined(CONFIG_MALI_DVFS)
+/** the max fps the same as display vsync default 60, can set by module insert parameter */
+extern int mali_max_system_fps;
+module_param(mali_max_system_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_system_fps, "Max system fps the same as display VSYNC.");
+
+/** a lower limit on their desired FPS default 58, can set by module insert parameter*/
+extern int mali_desired_fps;
+module_param(mali_desired_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_desired_fps, "A bit lower than max_system_fps which user desired fps");
+#endif
+
+#if MALI_ENABLE_CPU_CYCLES
+#include <linux/cpumask.h>
+#include <linux/timer.h>
+#include <asm/smp.h>
+static struct timer_list mali_init_cpu_clock_timers[8];
+static u32 mali_cpu_clock_last_value[8] = {0,};
+#endif
+
+/* Export symbols from common code: mali_user_settings.c */
+#include "mali_user_settings_db.h"
+EXPORT_SYMBOL(mali_set_user_setting);
+EXPORT_SYMBOL(mali_get_user_setting);
+
+static char mali_dev_name[] = "mali"; /* should be const, but the functions we call requires non-cost */
+
+/* This driver only supports one Mali device, and this variable stores this single platform device */
+struct platform_device *mali_platform_device = NULL;
+
+/* This driver only supports one Mali device, and this variable stores the exposed misc device (/dev/mali) */
+static struct miscdevice mali_miscdevice = { 0, };
+
+static int mali_miscdevice_register(struct platform_device *pdev);
+static void mali_miscdevice_unregister(void);
+
+static int mali_open(struct inode *inode, struct file *filp);
+static int mali_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static int mali_probe(struct platform_device *pdev);
+static int mali_remove(struct platform_device *pdev);
+
+static int mali_driver_suspend_scheduler(struct device *dev);
+static int mali_driver_resume_scheduler(struct device *dev);
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev);
+static int mali_driver_runtime_resume(struct device *dev);
+static int mali_driver_runtime_idle(struct device *dev);
+#endif
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#if defined(CONFIG_MALI_DT)
+extern int mali_platform_device_init(struct platform_device *device);
+extern int mali_platform_device_deinit(struct platform_device *device);
+#else
+extern int mali_platform_device_register(void);
+extern int mali_platform_device_unregister(void);
+#endif
+#endif
+
+/* Linux power management operations provided by the Mali device driver */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
+struct pm_ext_ops mali_dev_ext_pm_ops = {
+	.base =
+	{
+		.suspend = mali_driver_suspend_scheduler,
+		.resume = mali_driver_resume_scheduler,
+		.freeze = mali_driver_suspend_scheduler,
+		.thaw =   mali_driver_resume_scheduler,
+	},
+};
+#else
+static const struct dev_pm_ops mali_dev_pm_ops = {
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = mali_driver_runtime_suspend,
+	.runtime_resume = mali_driver_runtime_resume,
+	.runtime_idle = mali_driver_runtime_idle,
+#endif
+	.suspend = mali_driver_suspend_scheduler,
+	.resume = mali_driver_resume_scheduler,
+	.freeze = mali_driver_suspend_scheduler,
+	.thaw = mali_driver_resume_scheduler,
+	.poweroff = mali_driver_suspend_scheduler,
+};
+#endif
+
+#ifdef CONFIG_MALI_DT
+static struct of_device_id base_dt_ids[] = {
+	{.compatible = "arm,mali-300"},
+	{.compatible = "arm,mali-400"},
+	{.compatible = "arm,mali-450"},
+	{.compatible = "arm,mali-470"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, base_dt_ids);
+#endif
+
+/* The Mali device driver struct */
+static struct platform_driver mali_platform_driver = {
+	.probe  = mali_probe,
+	.remove = mali_remove,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
+	.pm = &mali_dev_ext_pm_ops,
+#endif
+	.driver =
+	{
+		.name   = MALI_GPU_NAME_UTGARD,
+		.owner  = THIS_MODULE,
+		.bus = &platform_bus_type,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29))
+		.pm = &mali_dev_pm_ops,
+#endif
+#ifdef CONFIG_MALI_DT
+		.of_match_table = of_match_ptr(base_dt_ids),
+#endif
+	},
+};
+
+/* Linux misc device operations (/dev/mali) */
+struct file_operations mali_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_open,
+	.release = mali_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl = mali_ioctl,
+#else
+	.ioctl = mali_ioctl,
+#endif
+	.compat_ioctl = mali_ioctl,
+	.mmap = mali_mmap
+};
+
+#if MALI_ENABLE_CPU_CYCLES
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64)
+{
+	/* The CPU assembly reference used is: ARM Architecture Reference Manual ARMv7-AR C.b */
+	u32 write_value;
+
+	/* See B4.1.116 PMCNTENSET, Performance Monitors Count Enable Set register, VMSA */
+	/* setting p15 c9 c12 1 to 0x8000000f==CPU_CYCLE_ENABLE |EVENT_3_ENABLE|EVENT_2_ENABLE|EVENT_1_ENABLE|EVENT_0_ENABLE */
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
+
+
+	/* See B4.1.117 PMCR, Performance Monitors Control Register. Writing to p15, c9, c12, 0 */
+	write_value = 1 << 0; /* Bit 0 set. Enable counters */
+	if (reset) {
+		write_value |= 1 << 1; /* Reset event counters */
+		write_value |= 1 << 2; /* Reset cycle counter  */
+	}
+	if (enable_divide_by_64) {
+		write_value |= 1 << 3; /* Enable the Clock divider by 64 */
+	}
+	write_value |= 1 << 4; /* Export enable. Not needed */
+	asm volatile("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(write_value));
+
+	/* PMOVSR Overflow Flag Status Register - Clear Clock and Event overflows */
+	asm volatile("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f));
+
+
+	/* See B4.1.124 PMUSERENR - setting p15 c9 c14 to 1" */
+	/* User mode access to the Performance Monitors enabled. */
+	/* Lets User space read cpu clock cycles */
+	asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1));
+}
+
+/** A timer function that configures the cycle clock counter on current CPU.
+ * The function \a mali_init_cpu_time_counters_on_all_cpus sets up this
+ * function to trigger on all Cpus during module load.
+ */
+static void mali_init_cpu_clock_timer_func(unsigned long data)
+{
+	int reset_counters, enable_divide_clock_counter_by_64;
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+	unsigned int sample1;
+
+	MALI_IGNORE(data);
+
+	reset_counters = 1;
+	enable_divide_clock_counter_by_64 = 0;
+	mali_init_cpu_time_counters(reset_counters, enable_divide_clock_counter_by_64);
+
+	sample0 = mali_get_cpu_cyclecount();
+	sample1 = mali_get_cpu_cyclecount();
+
+	MALI_DEBUG_PRINT(3, ("Init Cpu %d cycle counter- First two samples: %08x %08x \n", current_cpu, sample0, sample1));
+}
+
+/** A timer functions for storing current time on all cpus.
+ * Used for checking if the clocks have similar values or if they are drifting.
+ */
+static void mali_print_cpu_clock_timer_func(unsigned long data)
+{
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+
+	MALI_IGNORE(data);
+	sample0 = mali_get_cpu_cyclecount();
+	if (current_cpu < 8) {
+		mali_cpu_clock_last_value[current_cpu] = sample0;
+	}
+}
+
+/** Init the performance registers on all CPUs to count clock cycles.
+ * For init \a print_only should be 0.
+ * If \a print_only is 1, it will intead print the current clock value of all CPUs.
+ */
+void mali_init_cpu_time_counters_on_all_cpus(int print_only)
+{
+	int i = 0;
+	int cpu_number;
+	int jiffies_trigger;
+	int jiffies_wait;
+
+	jiffies_wait = msecs_to_jiffies(20);
+	jiffies_trigger = jiffies + jiffies_wait;
+
+	for (i = 0 ; i < 8 ; i++) {
+		init_timer(&mali_init_cpu_clock_timers[i]);
+		if (print_only) mali_init_cpu_clock_timers[i].function = mali_print_cpu_clock_timer_func;
+		else            mali_init_cpu_clock_timers[i].function = mali_init_cpu_clock_timer_func;
+		mali_init_cpu_clock_timers[i].expires = jiffies_trigger ;
+	}
+	cpu_number = cpumask_first(cpu_online_mask);
+	for (i = 0 ; i < 8 ; i++) {
+		int next_cpu;
+		add_timer_on(&mali_init_cpu_clock_timers[i], cpu_number);
+		next_cpu = cpumask_next(cpu_number, cpu_online_mask);
+		if (next_cpu >= nr_cpu_ids) break;
+		cpu_number = next_cpu;
+	}
+
+	while (jiffies_wait) jiffies_wait = schedule_timeout_uninterruptible(jiffies_wait);
+
+	for (i = 0 ; i < 8 ; i++) {
+		del_timer_sync(&mali_init_cpu_clock_timers[i]);
+	}
+
+	if (print_only) {
+		if ((0 == mali_cpu_clock_last_value[2]) && (0 == mali_cpu_clock_last_value[3])) {
+			/* Diff can be printed if we want to check if the clocks are in sync
+			int diff = mali_cpu_clock_last_value[0] - mali_cpu_clock_last_value[1];*/
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1]));
+		} else {
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1], mali_cpu_clock_last_value[2], mali_cpu_clock_last_value[3]));
+		}
+	}
+}
+#endif
+
+int mali_module_init(void)
+{
+	int err = 0;
+
+	MALI_DEBUG_PRINT(2, ("Inserting Mali v%d device driver. \n", _MALI_API_VERSION));
+	//MALI_DEBUG_PRINT(2, ("Compiled: %s, time: %s.\n", __DATE__, __TIME__));
+	MALI_DEBUG_PRINT(2, ("Driver revision: %s\n", SVN_REV_STRING));
+
+#if MALI_ENABLE_CPU_CYCLES
+	mali_init_cpu_time_counters_on_all_cpus(0);
+	MALI_DEBUG_PRINT(2, ("CPU cycle counter setup complete\n"));
+	/* Printing the current cpu counters */
+	mali_init_cpu_time_counters_on_all_cpus(1);
+#endif
+
+	/* Initialize module wide settings */
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering device\n"));
+	err = mali_platform_device_register();
+	if (0 != err) {
+		return err;
+	}
+#endif
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering driver\n"));
+
+	err = platform_driver_register(&mali_platform_driver);
+
+	if (0 != err) {
+		MALI_DEBUG_PRINT(2, ("mali_module_init() Failed to register driver (%d)\n", err));
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+		mali_platform_device_unregister();
+#endif
+#endif
+		mali_platform_device = NULL;
+		return err;
+	}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	err = _mali_internal_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (0 != err) {
+		/* No biggie if we wheren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	/* Tracing the current frequency and voltage from boot/insmod*/
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item(),to record current clk info.*/
+	mali_get_current_gpu_clk_item(&mali_gpu_clk[0]);
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[0].clock,
+				      mali_gpu_clk[0].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	MALI_PRINT(("Mali device driver loaded\n"));
+
+	mpgpu_class_init();
+
+	return 0; /* Success */
+}
+
+void mali_module_exit(void)
+{
+	MALI_DEBUG_PRINT(2, ("Unloading Mali v%d device driver.\n", _MALI_API_VERSION));
+
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering driver\n"));
+
+	platform_driver_unregister(&mali_platform_driver);
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering device\n"));
+	mali_platform_device_unregister();
+#endif
+#endif
+
+	/* Tracing the current frequency and voltage from rmmod*/
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	_mali_internal_profiling_term();
+#endif
+	mpgpu_class_exit();
+
+	MALI_PRINT(("Mali device driver unloaded\n"));
+}
+
+#ifdef CONFIG_MALI_DEVFREQ
+struct mali_device *mali_device_alloc(void)
+{
+	return kzalloc(sizeof(struct mali_device), GFP_KERNEL);
+}
+
+void mali_device_free(struct mali_device *mdev)
+{
+	kfree(mdev);
+}
+#endif
+
+static int mali_probe(struct platform_device *pdev)
+{
+	int err;
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev;
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_probe(): Called for platform device %s\n", pdev->name));
+
+	if (NULL != mali_platform_device) {
+		/* Already connected to a device, return error */
+		MALI_PRINT_ERROR(("mali_probe(): The Mali driver is already connected with a Mali device."));
+		return -EEXIST;
+	}
+
+	mali_platform_device = pdev;
+
+#ifdef CONFIG_MALI_DT
+	/* If we use DT to initialize our DDK, we have to prepare somethings. */
+	err = mali_platform_device_init(mali_platform_device);
+	if (0 != err) {
+		MALI_PRINT_ERROR(("mali_probe(): Failed to initialize platform device."));
+		mali_platform_device = NULL;
+		return -EFAULT;
+	}
+#endif
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mdev = mali_device_alloc();
+	if (!mdev) {
+		MALI_PRINT_ERROR(("Can't allocate mali device private data\n"));
+		return -ENOMEM;
+	}
+
+	mdev->dev = &pdev->dev;
+	dev_set_drvdata(mdev->dev, mdev);
+
+	/*Initilization clock and regulator*/
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_REGULATOR)
+	mdev->regulator = regulator_get_optional(mdev->dev, "mali");
+	if (IS_ERR_OR_NULL(mdev->regulator)) {
+		MALI_DEBUG_PRINT(2, ("Continuing without Mali regulator control\n"));
+		mdev->regulator = NULL;
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+	if (dev_pm_opp_of_add_table(mdev->dev) < 0)
+		MALI_DEBUG_PRINT(3, ("OPP table not found\n"));
+#endif
+
+	/* Need to name the gpu clock "clk_mali" in the device tree */
+	mdev->clock = clk_get(mdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(mdev->clock)) {
+		MALI_DEBUG_PRINT(2, ("Continuing without Mali clock control\n"));
+		mdev->clock = NULL;
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(mdev->clock);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to prepare and enable clock (%d)\n", err));
+			goto clock_prepare_failed;
+		}
+	}
+
+	/* initilize pm metrics related */
+	if (mali_pm_metrics_init(mdev) < 0) {
+		MALI_DEBUG_PRINT(2, ("mali pm metrics init failed\n"));
+		goto pm_metrics_init_failed;
+	}
+
+	if (mali_devfreq_init(mdev) < 0) {
+		MALI_DEBUG_PRINT(2, ("mali devfreq init failed\n"));
+		goto devfreq_init_failed;
+	}
+#endif
+
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_wq_init()) {
+		/* Initialize the Mali GPU HW specified by pdev */
+		if (_MALI_OSK_ERR_OK == mali_initialize_subsystems()) {
+			/* Register a misc device (so we are accessible from user space) */
+			err = mali_miscdevice_register(pdev);
+			if (0 == err) {
+				/* Setup sysfs entries */
+				err = mali_sysfs_register(mali_dev_name);
+
+				if (0 == err) {
+					mali_post_init();
+					MALI_DEBUG_PRINT(2, ("mali_probe(): Successfully initialized driver for platform device %s\n", pdev->name));
+
+					return 0;
+				} else {
+					MALI_PRINT_ERROR(("mali_probe(): failed to register sysfs entries"));
+				}
+				mali_miscdevice_unregister();
+			} else {
+				MALI_PRINT_ERROR(("mali_probe(): failed to register Mali misc device."));
+			}
+			mali_terminate_subsystems();
+		} else {
+			MALI_PRINT_ERROR(("mali_probe(): Failed to initialize Mali device driver."));
+		}
+		_mali_osk_wq_term();
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mali_devfreq_term(mdev);
+devfreq_init_failed:
+	mali_pm_metrics_term(mdev);
+pm_metrics_init_failed:
+	clk_disable_unprepare(mdev->clock);
+clock_prepare_failed:
+	clk_put(mdev->clock);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_PM_OPP)
+	dev_pm_opp_of_remove_table(mdev->dev);
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_REGULATOR)
+	regulator_put(mdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+	mali_device_free(mdev);
+#endif
+
+#ifdef CONFIG_MALI_DT
+	mali_platform_device_deinit(mali_platform_device);
+#endif
+	mali_platform_device = NULL;
+	return -EFAULT;
+}
+
+static int mali_remove(struct platform_device *pdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(&pdev->dev);
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_remove() called for platform device %s\n", pdev->name));
+	mali_sysfs_unregister();
+	mali_miscdevice_unregister();
+	mali_terminate_subsystems();
+	_mali_osk_wq_term();
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mali_devfreq_term(mdev);
+
+	mali_pm_metrics_term(mdev);
+
+	if (mdev->clock) {
+		clk_disable_unprepare(mdev->clock);
+		clk_put(mdev->clock);
+		mdev->clock = NULL;
+	}
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_PM_OPP)
+	dev_pm_opp_of_remove_table(mdev->dev);
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_REGULATOR)
+	regulator_put(mdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+	mali_device_free(mdev);
+#endif
+
+#ifdef CONFIG_MALI_DT
+	mali_platform_device_deinit(mali_platform_device);
+#endif
+	mali_platform_device = NULL;
+	return 0;
+}
+
+static int mali_miscdevice_register(struct platform_device *pdev)
+{
+	int err;
+
+	mali_miscdevice.minor = MISC_DYNAMIC_MINOR;
+	mali_miscdevice.name = mali_dev_name;
+	mali_miscdevice.fops = &mali_fops;
+	mali_miscdevice.parent = get_device(&pdev->dev);
+
+	err = misc_register(&mali_miscdevice);
+	if (0 != err) {
+		MALI_PRINT_ERROR(("Failed to register misc device, misc_register() returned %d\n", err));
+	}
+
+	return err;
+}
+
+static void mali_miscdevice_unregister(void)
+{
+	misc_deregister(&mali_miscdevice);
+}
+
+static int mali_driver_suspend_scheduler(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(mdev->devfreq);
+#endif
+
+	mali_pm_os_suspend(MALI_TRUE);
+	/* Tracing the frequency and voltage after mali is suspended */
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+	return 0;
+}
+
+static int mali_driver_resume_scheduler(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+	mali_pm_os_resume();
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(mdev->devfreq);
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	if (MALI_TRUE == mali_pm_runtime_suspend()) {
+		/* Tracing the frequency and voltage after mali is suspended */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      0,
+					      0,
+					      0, 0, 0);
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+		MALI_DEBUG_PRINT(4, ("devfreq_suspend_device: stop devfreq monitor\n"));
+		devfreq_suspend_device(mdev->devfreq);
+#endif
+
+		return 0;
+	} else {
+		return -EBUSY;
+	}
+}
+
+static int mali_driver_runtime_resume(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	mali_pm_runtime_resume();
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	MALI_DEBUG_PRINT(4, ("devfreq_resume_device: start devfreq monitor\n"));
+	devfreq_resume_device(mdev->devfreq);
+#endif
+	return 0;
+}
+
+static int mali_driver_runtime_idle(struct device *dev)
+{
+	/* Nothing to do */
+	return 0;
+}
+#endif
+
+static int mali_open(struct inode *inode, struct file *filp)
+{
+	struct mali_session_data *session_data;
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_open() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	/* allocated struct to track this session */
+	err = _mali_ukk_open((void **)&session_data);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* initialize file pointer */
+	filp->f_pos = 0;
+
+	/* link in our session data */
+	filp->private_data = (void *)session_data;
+
+	filp->f_mapping = mali_mem_swap_get_global_swap_file()->f_mapping;
+
+	return 0;
+}
+
+static int mali_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_release() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	err = _mali_ukk_close((void **)&filp->private_data);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int map_errcode(_mali_osk_errcode_t err)
+{
+	switch (err) {
+	case _MALI_OSK_ERR_OK :
+		return 0;
+	case _MALI_OSK_ERR_FAULT:
+		return -EFAULT;
+	case _MALI_OSK_ERR_INVALID_FUNC:
+		return -ENOTTY;
+	case _MALI_OSK_ERR_INVALID_ARGS:
+		return -EINVAL;
+	case _MALI_OSK_ERR_NOMEM:
+		return -ENOMEM;
+	case _MALI_OSK_ERR_TIMEOUT:
+		return -ETIMEDOUT;
+	case _MALI_OSK_ERR_RESTARTSYSCALL:
+		return -ERESTARTSYS;
+	case _MALI_OSK_ERR_ITEM_NOT_FOUND:
+		return -ENOENT;
+	default:
+		return -EFAULT;
+	}
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int err;
+	struct mali_session_data *session_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	/* inode not used */
+	(void)inode;
+#endif
+
+	MALI_DEBUG_PRINT(7, ("Ioctl received 0x%08X 0x%08lX\n", cmd, arg));
+
+	session_data = (struct mali_session_data *)filp->private_data;
+	if (NULL == session_data) {
+		MALI_DEBUG_PRINT(7, ("filp->private_data was NULL\n"));
+		return -ENOTTY;
+	}
+
+	if (NULL == (void *)arg) {
+		MALI_DEBUG_PRINT(7, ("arg was NULL\n"));
+		return -ENOTTY;
+	}
+
+	switch (cmd) {
+	case MALI_IOC_WAIT_FOR_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_wait_for_notification_s), sizeof(u64)));
+		err = wait_for_notification_wrapper(session_data, (_mali_uk_wait_for_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION_V2:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_api_version_v2_s), sizeof(u64)));
+		err = get_api_version_v2_wrapper(session_data, (_mali_uk_get_api_version_v2_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION:
+		err = get_api_version_wrapper(session_data, (_mali_uk_get_api_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_POST_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_post_notification_s), sizeof(u64)));
+		err = post_notification_wrapper(session_data, (_mali_uk_post_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_USER_SETTINGS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_user_settings_s), sizeof(u64)));
+		err = get_user_settings_wrapper(session_data, (_mali_uk_get_user_settings_s __user *)arg);
+		break;
+
+	case MALI_IOC_REQUEST_HIGH_PRIORITY:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_request_high_priority_s), sizeof(u64)));
+		err = request_high_priority_wrapper(session_data, (_mali_uk_request_high_priority_s __user *)arg);
+		break;
+
+	case MALI_IOC_PENDING_SUBMIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pending_submit_s), sizeof(u64)));
+		err = pending_submit_wrapper(session_data, (_mali_uk_pending_submit_s __user *)arg);
+		break;
+
+#if defined(CONFIG_MALI400_PROFILING)
+	case MALI_IOC_PROFILING_ADD_EVENT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_add_event_s), sizeof(u64)));
+		err = profiling_add_event_wrapper(session_data, (_mali_uk_profiling_add_event_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_sw_counters_report_s), sizeof(u64)));
+		err = profiling_report_sw_counters_wrapper(session_data, (_mali_uk_sw_counters_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_STREAM_FD_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_stream_fd_get_s), sizeof(u64)));
+		err = profiling_get_stream_fd_wrapper(session_data, (_mali_uk_profiling_stream_fd_get_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROILING_CONTROL_SET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_control_set_s), sizeof(u64)));
+		err = profiling_control_set_wrapper(session_data, (_mali_uk_profiling_control_set_s __user *)arg);
+		break;
+#else
+
+	case MALI_IOC_PROFILING_ADD_EVENT:          /* FALL-THROUGH */
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS: /* FALL-THROUGH */
+		MALI_DEBUG_PRINT(2, ("Profiling not supported\n"));
+		err = -ENOTTY;
+		break;
+#endif
+
+	case MALI_IOC_PROFILING_MEMORY_USAGE_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_memory_usage_get_s), sizeof(u64)));
+		err = mem_usage_get_wrapper(session_data, (_mali_uk_profiling_memory_usage_get_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_ALLOC:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_alloc_mem_s), sizeof(u64)));
+		err = mem_alloc_wrapper(session_data, (_mali_uk_alloc_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_FREE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_free_mem_s), sizeof(u64)));
+		err = mem_free_wrapper(session_data, (_mali_uk_free_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_BIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_bind_mem_s), sizeof(u64)));
+		err = mem_bind_wrapper(session_data, (_mali_uk_bind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_UNBIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_unbind_mem_s), sizeof(u64)));
+		err = mem_unbind_wrapper(session_data, (_mali_uk_unbind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_COW:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_mem_s), sizeof(u64)));
+		err = mem_cow_wrapper(session_data, (_mali_uk_cow_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_COW_MODIFY_RANGE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_modify_range_s), sizeof(u64)));
+		err = mem_cow_modify_range_wrapper(session_data, (_mali_uk_cow_modify_range_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_RESIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_resize_s), sizeof(u64)));
+		err = mem_resize_mem_wrapper(session_data, (_mali_uk_mem_resize_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_WRITE_SAFE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_write_safe_s), sizeof(u64)));
+		err = mem_write_safe_wrapper(session_data, (_mali_uk_mem_write_safe_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_query_mmu_page_table_dump_size_s), sizeof(u64)));
+		err = mem_query_mmu_page_table_dump_size_wrapper(session_data, (_mali_uk_query_mmu_page_table_dump_size_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dump_mmu_page_table_s), sizeof(u64)));
+		err = mem_dump_mmu_page_table_wrapper(session_data, (_mali_uk_dump_mmu_page_table_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DMA_BUF_GET_SIZE:
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dma_buf_get_size_s), sizeof(u64)));
+		err = mali_dma_buf_get_size(session_data, (_mali_uk_dma_buf_get_size_s __user *)arg);
+#else
+		MALI_DEBUG_PRINT(2, ("DMA-BUF not supported\n"));
+		err = -ENOTTY;
+#endif
+		break;
+
+	case MALI_IOC_PP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_start_job_s), sizeof(u64)));
+		err = pp_start_job_wrapper(session_data, (_mali_uk_pp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_AND_GP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_and_gp_start_job_s), sizeof(u64)));
+		err = pp_and_gp_start_job_wrapper(session_data, (_mali_uk_pp_and_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_number_of_cores_s), sizeof(u64)));
+		err = pp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_pp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_core_version_s), sizeof(u64)));
+		err = pp_get_core_version_wrapper(session_data, (_mali_uk_get_pp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_DISABLE_WB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_disable_wb_s), sizeof(u64)));
+		err = pp_disable_wb_wrapper(session_data, (_mali_uk_pp_disable_wb_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_start_job_s), sizeof(u64)));
+		err = gp_start_job_wrapper(session_data, (_mali_uk_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_number_of_cores_s), sizeof(u64)));
+		err = gp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_gp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_core_version_s), sizeof(u64)));
+		err = gp_get_core_version_wrapper(session_data, (_mali_uk_get_gp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_SUSPEND_RESPONSE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_suspend_response_s), sizeof(u64)));
+		err = gp_suspend_response_wrapper(session_data, (_mali_uk_gp_suspend_response_s __user *)arg);
+		break;
+
+	case MALI_IOC_VSYNC_EVENT_REPORT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_vsync_event_report_s), sizeof(u64)));
+		err = vsync_event_report_wrapper(session_data, (_mali_uk_vsync_event_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_TIMELINE_GET_LATEST_POINT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_get_latest_point_s), sizeof(u64)));
+		err = timeline_get_latest_point_wrapper(session_data, (_mali_uk_timeline_get_latest_point_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_WAIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_wait_s), sizeof(u64)));
+		err = timeline_wait_wrapper(session_data, (_mali_uk_timeline_wait_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_CREATE_SYNC_FENCE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_create_sync_fence_s), sizeof(u64)));
+		err = timeline_create_sync_fence_wrapper(session_data, (_mali_uk_timeline_create_sync_fence_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_START:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_start_s), sizeof(u64)));
+		err = soft_job_start_wrapper(session_data, (_mali_uk_soft_job_start_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_SIGNAL:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_signal_s), sizeof(u64)));
+		err = soft_job_signal_wrapper(session_data, (_mali_uk_soft_job_signal_s __user *)arg);
+		break;
+
+	default:
+		MALI_DEBUG_PRINT(2, ("No handler for ioctl 0x%08X 0x%08lX\n", cmd, arg));
+		err = -ENOTTY;
+	};
+
+	return err;
+}
+
+
+module_init(mali_module_init);
+module_exit(mali_module_exit);
+
+MODULE_LICENSE(MALI_KERNEL_LINUX_LICENSE);
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(SVN_REV_STRING);
diff --git a/mali/linux/mali_kernel_linux.h b/mali/linux/mali_kernel_linux.h
new file mode 100755
index 0000000..9b4307e
--- /dev/null
+++ b/mali/linux/mali_kernel_linux.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_LINUX_H__
+#define __MALI_KERNEL_LINUX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/idr.h>
+#include <linux/rbtree.h>
+#include "mali_kernel_license.h"
+#include "mali_osk_types.h"
+#include <linux/version.h>
+
+extern struct platform_device *mali_platform_device;
+
+/* After 3.19.0 kenrel droped CONFIG_PM_RUNTIME define,define by ourself */
+#if defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
+#define CONFIG_PM_RUNTIME 1
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/mali/linux/mali_kernel_sysfs.c b/mali/linux/mali_kernel_sysfs.c
new file mode 100755
index 0000000..bf0cc88
--- /dev/null
+++ b/mali/linux/mali_kernel_sysfs.c
@@ -0,0 +1,1415 @@
+/**
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+/**
+ * @file mali_kernel_sysfs.c
+ * Implementation of some sysfs data exports
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include "mali_kernel_license.h"
+#include "mali_kernel_common.h"
+#include "mali_ukk.h"
+
+#if MALI_LICENSE_IS_GPL
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_sysfs.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include <linux/slab.h>
+#include "mali_osk_profiling.h"
+#endif
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_kernel_core.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+
+#define PRIVATE_DATA_COUNTER_MAKE_GP(src) (src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP(src) ((1 << 24) | src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(src, sub_job) ((1 << 24) | (1 << 16) | (sub_job << 8) | src)
+#define PRIVATE_DATA_COUNTER_IS_PP(a) ((((a) >> 24) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SRC(a) (a & 0xFF)
+#define PRIVATE_DATA_COUNTER_IS_SUB_JOB(a) ((((a) >> 16) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SUB_JOB(a) (((a) >> 8) & 0xFF)
+
+#define POWER_BUFFER_SIZE 3
+
+static struct dentry *mali_debugfs_dir = NULL;
+
+typedef enum {
+	_MALI_DEVICE_SUSPEND,
+	_MALI_DEVICE_RESUME,
+	_MALI_DEVICE_DVFS_PAUSE,
+	_MALI_DEVICE_DVFS_RESUME,
+	_MALI_MAX_EVENTS
+} _mali_device_debug_power_events;
+
+static const char *const mali_power_events[_MALI_MAX_EVENTS] = {
+	[_MALI_DEVICE_SUSPEND] = "suspend",
+	[_MALI_DEVICE_RESUME] = "resume",
+	[_MALI_DEVICE_DVFS_PAUSE] = "dvfs_pause",
+	[_MALI_DEVICE_DVFS_RESUME] = "dvfs_resume",
+};
+
+static mali_bool power_always_on_enabled = MALI_FALSE;
+
+static int open_copy_private_data(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t group_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	r = snprintf(buffer, 64, "%u\n",
+		     mali_executor_group_is_disabled(group) ? 0 : 1);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static ssize_t group_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	unsigned long val;
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	r = kstrtoul(&buffer[0], 10, &val);
+	if (0 != r) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_group_enable(group);
+		break;
+	case 0:
+		mali_executor_group_disable(group);
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static const struct file_operations group_enabled_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = group_enabled_read,
+	.write = group_enabled_write,
+};
+
+static ssize_t hw_core_base_addr_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_hw_core *hw_core;
+
+	hw_core = (struct mali_hw_core *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(hw_core);
+
+	r = snprintf(buffer, 64, "0x%lX\n", hw_core->phys_addr);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations hw_core_base_addr_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = hw_core_base_addr_read,
+};
+
+static ssize_t profiling_counter_src_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	int r;
+	u32 val;
+
+	if (MALI_TRUE == is_pp) {
+		/* PP counter */
+		if (MALI_TRUE == is_sub_job) {
+			/* Get counter for a particular sub job */
+			if (0 == src_id) {
+				val = mali_pp_job_get_pp_counter_sub_job_src0(sub_job);
+			} else {
+				val = mali_pp_job_get_pp_counter_sub_job_src1(sub_job);
+			}
+		} else {
+			/* Get default counter for all PP sub jobs */
+			if (0 == src_id) {
+				val = mali_pp_job_get_pp_counter_global_src0();
+			} else {
+				val = mali_pp_job_get_pp_counter_global_src1();
+			}
+		}
+	} else {
+		/* GP counter */
+		if (0 == src_id) {
+			val = mali_gp_job_get_gp_counter_src0();
+		} else {
+			val = mali_gp_job_get_gp_counter_src1();
+		}
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == val) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_counter_src_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	if (MALI_TRUE == is_pp) {
+		/* PP counter */
+		if (MALI_TRUE == is_sub_job) {
+			/* Set counter for a particular sub job */
+			if (0 == src_id) {
+				mali_pp_job_set_pp_counter_sub_job_src0(sub_job, (u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_sub_job_src1(sub_job, (u32)val);
+			}
+		} else {
+			/* Set default counter for all PP sub jobs */
+			if (0 == src_id) {
+				mali_pp_job_set_pp_counter_global_src0((u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_global_src1((u32)val);
+			}
+		}
+	} else {
+		/* GP counter */
+		if (0 == src_id) {
+			mali_gp_job_set_gp_counter_src0((u32)val);
+		} else {
+			mali_gp_job_set_gp_counter_src1((u32)val);
+		}
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_counter_src_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = profiling_counter_src_read,
+	.write = profiling_counter_src_write,
+};
+
+static ssize_t l2_l2x_counter_srcx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 val;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	if (0 == src_id) {
+		val = mali_l2_cache_core_get_counter_src0(l2_core);
+	} else {
+		val = mali_l2_cache_core_get_counter_src1(l2_core);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == val) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	mali_l2_cache_core_set_counter_src(l2_core, src_id, (u32)val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_all_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	long val;
+	int ret;
+	u32 l2_id;
+	struct mali_l2_cache_core *l2_cache;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	l2_id = 0;
+	l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	while (NULL != l2_cache) {
+		mali_l2_cache_core_set_counter_src(l2_cache, src_id, (u32)val);
+
+		/* try next L2 */
+		l2_id++;
+		l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_l2x_counter_src0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_l2x_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_all_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_all_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src0_read,
+	.write = l2_l2x_counter_src0_write,
+};
+
+static const struct file_operations l2_l2x_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src1_read,
+	.write = l2_l2x_counter_src1_write,
+};
+
+static const struct file_operations l2_all_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src0_write,
+};
+
+static const struct file_operations l2_all_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src1_write,
+};
+
+static ssize_t l2_l2x_counter_valx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 src0 = 0;
+	u32 val0 = 0;
+	u32 src1 = 0;
+	u32 val1 = 0;
+	u32 val = -1;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	mali_l2_cache_core_get_counter_values(l2_core, &src0, &val0, &src1, &val1);
+
+	if (0 == src_id) {
+		if (MALI_HW_CORE_NO_COUNTER != val0) {
+			val = val0;
+		}
+	} else {
+		if (MALI_HW_CORE_NO_COUNTER != val1) {
+			val = val1;
+		}
+	}
+
+	r = snprintf(buf, 64, "%u\n", val);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_val0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_val1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_val0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val0_read,
+};
+
+static const struct file_operations l2_l2x_counter_val1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val1_read,
+};
+
+static ssize_t power_always_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (0 != ret) {
+		return ret;
+	}
+
+	/* Update setting (not exactly thread safe) */
+	if (1 == val && MALI_FALSE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_TRUE;
+		_mali_osk_pm_dev_ref_get_sync();
+	} else if (0 == val && MALI_TRUE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_FALSE;
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t power_always_on_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (MALI_TRUE == power_always_on_enabled) {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "1\n", 2);
+	} else {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "0\n", 2);
+	}
+}
+
+static const struct file_operations power_always_on_fops = {
+	.owner = THIS_MODULE,
+	.read  = power_always_on_read,
+	.write = power_always_on_write,
+};
+
+static ssize_t power_power_events_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_SUSPEND], strlen(mali_power_events[_MALI_DEVICE_SUSPEND]) - 1)) {
+		mali_pm_os_suspend(MALI_TRUE);
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_RESUME], strlen(mali_power_events[_MALI_DEVICE_RESUME]) - 1)) {
+		mali_pm_os_resume();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_PAUSE], strlen(mali_power_events[_MALI_DEVICE_DVFS_PAUSE]) - 1)) {
+		mali_dev_pause();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_RESUME], strlen(mali_power_events[_MALI_DEVICE_DVFS_RESUME]) - 1)) {
+		mali_dev_resume();
+	}
+	*ppos += cnt;
+	return cnt;
+}
+
+static loff_t power_power_events_seek(struct file *file, loff_t offset, int orig)
+{
+	file->f_pos = offset;
+	return 0;
+}
+
+static const struct file_operations power_power_events_fops = {
+	.owner = THIS_MODULE,
+	.write = power_power_events_write,
+	.llseek = power_power_events_seek,
+};
+
+#if MALI_STATE_TRACKING
+static int mali_seq_internal_state_show(struct seq_file *seq_file, void *v)
+{
+	u32 len = 0;
+	u32 size;
+	char *buf;
+
+	size = seq_get_buf(seq_file, &buf);
+
+	if (!size) {
+		return -ENOMEM;
+	}
+
+	/* Create the internal state dump. */
+	len  = snprintf(buf + len, size - len, "Mali device driver %s\n", SVN_REV_STRING);
+	len += snprintf(buf + len, size - len, "License: %s\n\n", MALI_KERNEL_LINUX_LICENSE);
+
+	len += _mali_kernel_core_dump_state(buf + len, size - len);
+
+	seq_commit(seq_file, len);
+
+	return 0;
+}
+
+static int mali_seq_internal_state_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mali_seq_internal_state_show, NULL);
+}
+
+static const struct file_operations mali_seq_internal_state_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_seq_internal_state_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif /* MALI_STATE_TRACKING */
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+static ssize_t profiling_record_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = snprintf(buf, 64, "%u\n", _mali_internal_profiling_is_recording() ? 1 : 0);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_record_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	unsigned long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val != 0) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* This can be made configurable at a later stage if we need to */
+
+		/* check if we are already recording */
+		if (MALI_TRUE == _mali_internal_profiling_is_recording()) {
+			MALI_DEBUG_PRINT(3, ("Recording of profiling events already in progress\n"));
+			return -EFAULT;
+		}
+
+		/* check if we need to clear out an old recording first */
+		if (MALI_TRUE == _mali_internal_profiling_have_recording()) {
+			if (_MALI_OSK_ERR_OK != _mali_internal_profiling_clear()) {
+				MALI_DEBUG_PRINT(3, ("Failed to clear existing recording of profiling events\n"));
+				return -EFAULT;
+			}
+		}
+
+		/* start recording profiling data */
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) {
+			MALI_DEBUG_PRINT(3, ("Failed to start recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Profiling recording started (max %u events)\n", limit));
+	} else {
+		/* stop recording profiling data */
+		u32 count = 0;
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_stop(&count)) {
+			MALI_DEBUG_PRINT(2, ("Failed to stop recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Profiling recording stopped (recorded %u events)\n", count));
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_record_fops = {
+	.owner = THIS_MODULE,
+	.read  = profiling_record_read,
+	.write = profiling_record_write,
+};
+
+static void *profiling_events_start(struct seq_file *s, loff_t *pos)
+{
+	loff_t *spos;
+
+	/* check if we have data avaiable */
+	if (MALI_TRUE != _mali_internal_profiling_have_recording()) {
+		return NULL;
+	}
+
+	spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
+	if (NULL == spos) {
+		return NULL;
+	}
+
+	*spos = *pos;
+	return spos;
+}
+
+static void *profiling_events_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	loff_t *spos = v;
+
+	/* check if we have data avaiable */
+	if (MALI_TRUE != _mali_internal_profiling_have_recording()) {
+		return NULL;
+	}
+
+	/* check if the next entry actually is avaiable */
+	if (_mali_internal_profiling_get_count() <= (u32)(*spos + 1)) {
+		return NULL;
+	}
+
+	*pos = ++*spos;
+	return spos;
+}
+
+static void profiling_events_stop(struct seq_file *s, void *v)
+{
+	kfree(v);
+}
+
+static int profiling_events_show(struct seq_file *seq_file, void *v)
+{
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) * spos;
+
+	/* Retrieve all events */
+	if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, &timestamp, &event_id, data)) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u\n", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int profiling_events_show_human_readable(struct seq_file *seq_file, void *v)
+{
+#define MALI_EVENT_ID_IS_HW(event_id) (((event_id & 0x00FF0000) >= MALI_PROFILING_EVENT_CHANNEL_GP0) && ((event_id & 0x00FF0000) <= MALI_PROFILING_EVENT_CHANNEL_PP7))
+
+	static u64 start_time = 0;
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) * spos;
+
+	/* Retrieve all events */
+	if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, &timestamp, &event_id, data)) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u # ", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+
+		if (0 == index) {
+			start_time = timestamp;
+		}
+
+		seq_printf(seq_file, "[%06u] ", index);
+
+		switch (event_id & 0x0F000000) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			seq_printf(seq_file, "SINGLE | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			seq_printf(seq_file, "START | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			seq_printf(seq_file, "STOP | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_SUSPEND:
+			seq_printf(seq_file, "SUSPEND | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_RESUME:
+			seq_printf(seq_file, "RESUME | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%01X | ", (event_id & 0x0F000000) >> 24);
+			break;
+		}
+
+		switch (event_id & 0x00FF0000) {
+		case MALI_PROFILING_EVENT_CHANNEL_SOFTWARE:
+			seq_printf(seq_file, "SW | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GP0:
+			seq_printf(seq_file, "GP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP0:
+			seq_printf(seq_file, "PP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP1:
+			seq_printf(seq_file, "PP1 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP2:
+			seq_printf(seq_file, "PP2 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP3:
+			seq_printf(seq_file, "PP3 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP4:
+			seq_printf(seq_file, "PP4 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP5:
+			seq_printf(seq_file, "PP5 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP6:
+			seq_printf(seq_file, "PP6 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP7:
+			seq_printf(seq_file, "PP7 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GPU:
+			seq_printf(seq_file, "GPU | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%02X | ", (event_id & 0x00FF0000) >> 16);
+			break;
+		}
+
+		if (MALI_EVENT_ID_IS_HW(event_id)) {
+			if (((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_START) || ((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_STOP)) {
+				switch (event_id & 0x0000FFFF) {
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL:
+					seq_printf(seq_file, "PHYSICAL | ");
+					break;
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL:
+					seq_printf(seq_file, "VIRTUAL | ");
+					break;
+				default:
+					seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+					break;
+				}
+			} else {
+				seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+			}
+		} else {
+			seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+		}
+
+		seq_printf(seq_file, "T0 + 0x%016llX\n", timestamp - start_time);
+
+		return 0;
+	}
+
+	return 0;
+}
+
+static const struct seq_operations profiling_events_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show
+};
+
+static int profiling_events_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_seq_ops);
+}
+
+static const struct file_operations profiling_events_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static const struct seq_operations profiling_events_human_readable_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show_human_readable
+};
+
+static int profiling_events_human_readable_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_human_readable_seq_ops);
+}
+
+static const struct file_operations profiling_events_human_readable_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_human_readable_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+static int memory_debugfs_show(struct seq_file *s, void *private_data)
+{
+#ifdef MALI_MEM_SWAP_TRACKING
+	seq_printf(s, "  %-25s  %-10s %-25s %-10s  %-15s  %-15s  %-10s  %-10s %-10s\n"\
+		   "=======================================================================================================================================\n",
+		   "Name (:bytes)", "pid", "pid-name", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem", "swap_mem");
+#else
+	seq_printf(s, "  %-25s  %-10s %-25s %-10s  %-15s  %-15s  %-10s  %-10s\n"\
+		   "=======================================================================================================================================\n",
+		   "Name (:bytes)", "pid", "pid-name", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem");
+#endif
+	mali_session_memory_tracking(s);
+	return 0;
+}
+
+static int memory_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, memory_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations memory_usage_fops = {
+	.owner = THIS_MODULE,
+	.open = memory_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static ssize_t utilization_gp_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_gp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+
+static const struct file_operations utilization_gp_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_pp_read,
+};
+
+static const struct file_operations utilization_gp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_read,
+};
+
+static const struct file_operations utilization_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_pp_read,
+};
+
+static ssize_t user_settings_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	_mali_uk_user_setting_t setting;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (0 != ret) {
+		return ret;
+	}
+
+	/* Update setting */
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	mali_set_user_setting(setting, val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t user_settings_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 value;
+	_mali_uk_user_setting_t setting;
+
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	value = mali_get_user_setting(setting);
+
+	r = snprintf(buf, 64, "%u\n", value);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static const struct file_operations user_settings_fops = {
+	.owner = THIS_MODULE,
+	.open = open_copy_private_data,
+	.read = user_settings_read,
+	.write = user_settings_write,
+};
+
+static int mali_sysfs_user_settings_register(void)
+{
+	struct dentry *mali_user_settings_dir = debugfs_create_dir("userspace_settings", mali_debugfs_dir);
+
+	if (mali_user_settings_dir != NULL) {
+		long i;
+		for (i = 0; i < _MALI_UK_USER_SETTING_MAX; i++) {
+			debugfs_create_file(_mali_uk_user_setting_descriptions[i],
+					    0600, mali_user_settings_dir, (void *)i,
+					    &user_settings_fops);
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t pp_num_cores_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (0 != ret) {
+		return -EINVAL;
+	}
+
+	ret = mali_executor_set_perf_level(val, MALI_TRUE); /* override even if core scaling is disabled */
+	if (ret) {
+		return ret;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_num_cores_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_enabled());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_num_cores_enabled_write,
+	.read = pp_num_cores_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t pp_num_cores_total_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_total());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_total_fops = {
+	.owner = THIS_MODULE,
+	.read = pp_num_cores_total_read,
+};
+
+static ssize_t pp_core_scaling_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (0 != ret) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_core_scaling_enable();
+		break;
+	case 0:
+		mali_executor_core_scaling_disable();
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_core_scaling_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	return simple_read_from_buffer(buf, count, offp, mali_executor_core_scaling_is_enabled() ? "1\n" : "0\n", 2);
+}
+static const struct file_operations pp_core_scaling_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_core_scaling_enabled_write,
+	.read = pp_core_scaling_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t version_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r = 0;
+	char buffer[64];
+
+	switch (mali_kernel_core_get_product_id()) {
+	case _MALI_PRODUCT_ID_MALI200:
+		r = snprintf(buffer, 64, "Mali-200\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI300:
+		r = snprintf(buffer, 64, "Mali-300\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI400:
+		r = snprintf(buffer, 64, "Mali-400 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI450:
+		r = snprintf(buffer, 64, "Mali-450 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI470:
+		r = snprintf(buffer, 64, "Mali-470 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_UNKNOWN:
+		return -EINVAL;
+		break;
+	};
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations version_fops = {
+	.owner = THIS_MODULE,
+	.read = version_read,
+};
+
+#if defined(DEBUG)
+static int timeline_debugfs_show(struct seq_file *s, void *private_data)
+{
+	struct mali_session_data *session, *tmp;
+	u32 session_seq = 1;
+
+	seq_printf(s, "timeline system info: \n=================\n\n");
+
+	mali_session_lock();
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		seq_printf(s, "session %d <%p> start:\n", session_seq, session);
+		mali_timeline_debug_print_system(session->timeline_system, s);
+		seq_printf(s, "session %d end\n\n\n", session_seq++);
+	}
+	mali_session_unlock();
+
+	return 0;
+}
+
+static int timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, timeline_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations timeline_dump_fops = {
+	.owner = THIS_MODULE,
+	.open = timeline_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release
+};
+#endif
+
+int mali_sysfs_register(const char *mali_dev_name)
+{
+	mali_debugfs_dir = debugfs_create_dir(mali_dev_name, NULL);
+	if (ERR_PTR(-ENODEV) == mali_debugfs_dir) {
+		/* Debugfs not supported. */
+		mali_debugfs_dir = NULL;
+	} else {
+		if (NULL != mali_debugfs_dir) {
+			/* Debugfs directory created successfully; create files now */
+			struct dentry *mali_power_dir;
+			struct dentry *mali_gp_dir;
+			struct dentry *mali_pp_dir;
+			struct dentry *mali_l2_dir;
+			struct dentry *mali_profiling_dir;
+
+			debugfs_create_file("version", 0400, mali_debugfs_dir, NULL, &version_fops);
+
+			mali_power_dir = debugfs_create_dir("power", mali_debugfs_dir);
+			if (mali_power_dir != NULL) {
+				debugfs_create_file("always_on", 0600, mali_power_dir, NULL, &power_always_on_fops);
+				debugfs_create_file("power_events", 0200, mali_power_dir, NULL, &power_power_events_fops);
+			}
+
+			mali_gp_dir = debugfs_create_dir("gp", mali_debugfs_dir);
+			if (mali_gp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+					if (NULL != gp_core) {
+						struct dentry *mali_gp_gpx_dir;
+						mali_gp_gpx_dir = debugfs_create_dir("gp0", mali_gp_dir);
+						if (NULL != mali_gp_gpx_dir) {
+							debugfs_create_file("base_addr", 0400, mali_gp_gpx_dir, &gp_core->hw_core, &hw_core_base_addr_fops);
+							debugfs_create_file("enabled", 0600, mali_gp_gpx_dir, group, &group_enabled_fops);
+						}
+						break; /* no need to look for any other GP cores */
+					}
+
+				}
+			}
+
+			mali_pp_dir = debugfs_create_dir("pp", mali_debugfs_dir);
+			if (mali_pp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				debugfs_create_file("num_cores_total", 0400, mali_pp_dir, NULL, &pp_num_cores_total_fops);
+				debugfs_create_file("num_cores_enabled", 0600, mali_pp_dir, NULL, &pp_num_cores_enabled_fops);
+				debugfs_create_file("core_scaling_enabled", 0600, mali_pp_dir, NULL, &pp_core_scaling_enabled_fops);
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+					if (NULL != pp_core) {
+						char buf[16];
+						struct dentry *mali_pp_ppx_dir;
+						_mali_osk_snprintf(buf, sizeof(buf), "pp%u", mali_pp_core_get_id(pp_core));
+						mali_pp_ppx_dir = debugfs_create_dir(buf, mali_pp_dir);
+						if (NULL != mali_pp_ppx_dir) {
+							debugfs_create_file("base_addr", 0400, mali_pp_ppx_dir, &pp_core->hw_core, &hw_core_base_addr_fops);
+							if (!mali_group_is_virtual(group)) {
+								debugfs_create_file("enabled", 0600, mali_pp_ppx_dir, group, &group_enabled_fops);
+							}
+						}
+					}
+				}
+			}
+
+			mali_l2_dir = debugfs_create_dir("l2", mali_debugfs_dir);
+			if (mali_l2_dir != NULL) {
+				struct dentry *mali_l2_all_dir;
+				u32 l2_id;
+				struct mali_l2_cache_core *l2_cache;
+
+				mali_l2_all_dir = debugfs_create_dir("all", mali_l2_dir);
+				if (mali_l2_all_dir != NULL) {
+					debugfs_create_file("counter_src0", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src0_fops);
+					debugfs_create_file("counter_src1", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src1_fops);
+				}
+
+				l2_id = 0;
+				l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				while (NULL != l2_cache) {
+					char buf[16];
+					struct dentry *mali_l2_l2x_dir;
+					_mali_osk_snprintf(buf, sizeof(buf), "l2%u", l2_id);
+					mali_l2_l2x_dir = debugfs_create_dir(buf, mali_l2_dir);
+					if (NULL != mali_l2_l2x_dir) {
+						debugfs_create_file("counter_src0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src0_fops);
+						debugfs_create_file("counter_src1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src1_fops);
+						debugfs_create_file("counter_val0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val0_fops);
+						debugfs_create_file("counter_val1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val1_fops);
+						debugfs_create_file("base_addr", 0400, mali_l2_l2x_dir, &l2_cache->hw_core, &hw_core_base_addr_fops);
+					}
+
+					/* try next L2 */
+					l2_id++;
+					l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				}
+			}
+
+			debugfs_create_file("gpu_memory", 0444, mali_debugfs_dir, NULL, &memory_usage_fops);
+
+			debugfs_create_file("utilization_gp_pp", 0400, mali_debugfs_dir, NULL, &utilization_gp_pp_fops);
+			debugfs_create_file("utilization_gp", 0400, mali_debugfs_dir, NULL, &utilization_gp_fops);
+			debugfs_create_file("utilization_pp", 0400, mali_debugfs_dir, NULL, &utilization_pp_fops);
+
+			mali_profiling_dir = debugfs_create_dir("profiling", mali_debugfs_dir);
+			if (mali_profiling_dir != NULL) {
+				u32 max_sub_jobs;
+				long i;
+				struct dentry *mali_profiling_gp_dir;
+				struct dentry *mali_profiling_pp_dir;
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				struct dentry *mali_profiling_proc_dir;
+#endif
+				/*
+				 * Create directory where we can set GP HW counters.
+				 */
+				mali_profiling_gp_dir = debugfs_create_dir("gp", mali_profiling_dir);
+				if (mali_profiling_gp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(1), &profiling_counter_src_fops);
+				}
+
+				/*
+				 * Create directory where we can set PP HW counters.
+				 * Possible override with specific HW counters for a particular sub job
+				 * (Disable core scaling before using the override!)
+				 */
+				mali_profiling_pp_dir = debugfs_create_dir("pp", mali_profiling_dir);
+				if (mali_profiling_pp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(1), &profiling_counter_src_fops);
+				}
+
+				max_sub_jobs = mali_executor_get_num_cores_total();
+				for (i = 0; i < max_sub_jobs; i++) {
+					char buf[16];
+					struct dentry *mali_profiling_pp_x_dir;
+					_mali_osk_snprintf(buf, sizeof(buf), "%u", i);
+					mali_profiling_pp_x_dir = debugfs_create_dir(buf, mali_profiling_pp_dir);
+					if (NULL != mali_profiling_pp_x_dir) {
+						debugfs_create_file("counter_src0",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(0, i),
+								    &profiling_counter_src_fops);
+						debugfs_create_file("counter_src1",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(1, i),
+								    &profiling_counter_src_fops);
+					}
+				}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				mali_profiling_proc_dir = debugfs_create_dir("proc", mali_profiling_dir);
+				if (mali_profiling_proc_dir != NULL) {
+					struct dentry *mali_profiling_proc_default_dir = debugfs_create_dir("default", mali_profiling_proc_dir);
+					if (mali_profiling_proc_default_dir != NULL) {
+						debugfs_create_file("enable", 0600, mali_profiling_proc_default_dir, (void *)_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, &user_settings_fops);
+					}
+				}
+				debugfs_create_file("record", 0600, mali_profiling_dir, NULL, &profiling_record_fops);
+				debugfs_create_file("events", 0400, mali_profiling_dir, NULL, &profiling_events_fops);
+				debugfs_create_file("events_human_readable", 0400, mali_profiling_dir, NULL, &profiling_events_human_readable_fops);
+#endif
+			}
+
+#if MALI_STATE_TRACKING
+			debugfs_create_file("state_dump", 0400, mali_debugfs_dir, NULL, &mali_seq_internal_state_fops);
+#endif
+
+#if defined(DEBUG)
+			debugfs_create_file("timeline_dump", 0400, mali_debugfs_dir, NULL, &timeline_dump_fops);
+#endif
+			if (mali_sysfs_user_settings_register()) {
+				/* Failed to create the debugfs entries for the user settings DB. */
+				MALI_DEBUG_PRINT(2, ("Failed to create user setting debugfs files. Ignoring...\n"));
+			}
+		}
+	}
+
+	/* Success! */
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	if (NULL != mali_debugfs_dir) {
+		debugfs_remove_recursive(mali_debugfs_dir);
+	}
+	return 0;
+}
+
+#else /* MALI_LICENSE_IS_GPL */
+
+/* Dummy implementations for non-GPL */
+
+int mali_sysfs_register(struct mali_dev *device, dev_t dev, const char *mali_dev_name)
+{
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	return 0;
+}
+
+#endif /* MALI_LICENSE_IS_GPL */
diff --git a/mali/linux/mali_kernel_sysfs.h b/mali/linux/mali_kernel_sysfs.h
new file mode 100755
index 0000000..9dad7f2
--- /dev/null
+++ b/mali/linux/mali_kernel_sysfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_SYSFS_H__
+#define __MALI_KERNEL_SYSFS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/device.h>
+
+#define MALI_PROC_DIR "driver/mali"
+
+int mali_sysfs_register(const char *mali_dev_name);
+int mali_sysfs_unregister(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/mali/linux/mali_linux_trace.h b/mali/linux/mali_linux_trace.h
new file mode 100755
index 0000000..08682a7
--- /dev/null
+++ b/mali/linux/mali_linux_trace.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#if !defined (MALI_LINUX_TRACE_H) || defined (TRACE_HEADER_MULTI_READ)
+#define MALI_LINUX_TRACE_H
+
+#include <linux/types.h>
+
+#include <linux/stringify.h>
+#include <linux/tracepoint.h>
+
+#undef  TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#ifndef TRACEPOINTS_ENABLED
+#define TRACE_SYSTEM_STRING __stringfy(TRACE_SYSTEM)
+#endif
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+/**
+ * Define the tracepoint used to communicate the status of a GPU. Called
+ * when a GPU turns on or turns off.
+ *
+ * @param event_id The type of the event. This parameter is a bitfield
+ *  encoding the type of the event.
+ *
+ * @param d0 First data parameter.
+ * @param d1 Second data parameter.
+ * @param d2 Third data parameter.
+ * @param d3 Fourth data parameter.
+ * @param d4 Fifth data parameter.
+ */
+TRACE_EVENT(mali_timeline_event,
+
+	    TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1,
+		     unsigned int d2, unsigned int d3, unsigned int d4),
+
+	    TP_ARGS(event_id, d0, d1, d2, d3, d4),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, event_id)
+		    __field(unsigned int, d0)
+		    __field(unsigned int, d1)
+		    __field(unsigned int, d2)
+		    __field(unsigned int, d3)
+		    __field(unsigned int, d4)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->event_id = event_id;
+		    __entry->d0 = d0;
+		    __entry->d1 = d1;
+		    __entry->d2 = d2;
+		    __entry->d3 = d3;
+		    __entry->d4 = d4;
+	    ),
+
+	    TP_printk("event=%d", __entry->event_id)
+	   );
+
+/**
+ * Define a tracepoint used to regsiter the value of a hardware counter.
+ * Hardware counters belonging to the vertex or fragment processor are
+ * reported via this tracepoint each frame, whilst L2 cache hardware
+ * counters are reported continuously.
+ *
+ * @param counter_id The counter ID.
+ * @param value The value of the counter.
+ */
+TRACE_EVENT(mali_hw_counter,
+
+	    TP_PROTO(unsigned int counter_id, unsigned int value),
+
+	    TP_ARGS(counter_id, value),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, counter_id)
+		    __field(unsigned int, value)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->counter_id = counter_id;
+	    ),
+
+	    TP_printk("event %d = %d", __entry->counter_id, __entry->value)
+	   );
+
+/**
+ * Define a tracepoint used to send a bundle of software counters.
+ *
+ * @param counters The bundle of counters.
+ */
+TRACE_EVENT(mali_sw_counters,
+
+	    TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters),
+
+	    TP_ARGS(pid, tid, surface_id, counters),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(pid_t, tid)
+		    __field(void *, surface_id)
+		    __field(unsigned int *, counters)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->tid = tid;
+		    __entry->surface_id = surface_id;
+		    __entry->counters = counters;
+	    ),
+
+	    TP_printk("counters were %s", __entry->counters == NULL ? "NULL" : "not NULL")
+	   );
+
+/**
+ * Define a tracepoint used to gather core activity for systrace
+ * @param pid The process id for which the core activity originates from
+ * @param active If the core is active (1) or not (0)
+ * @param core_type The type of core active, either GP (1) or PP (0)
+ * @param core_id The core id that is active for the core_type
+ * @param frame_builder_id The frame builder id associated with this core activity
+ * @param flush_id The flush id associated with this core activity
+ */
+TRACE_EVENT(mali_core_active,
+
+	    TP_PROTO(pid_t pid, unsigned int active, unsigned int core_type, unsigned int core_id, unsigned int frame_builder_id, unsigned int flush_id),
+
+	    TP_ARGS(pid, active, core_type, core_id, frame_builder_id, flush_id),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(unsigned int, active)
+		    __field(unsigned int, core_type)
+		    __field(unsigned int, core_id)
+		    __field(unsigned int, frame_builder_id)
+		    __field(unsigned int, flush_id)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->active = active;
+		    __entry->core_type = core_type;
+		    __entry->core_id = core_id;
+		    __entry->frame_builder_id = frame_builder_id;
+		    __entry->flush_id = flush_id;
+	    ),
+
+	    TP_printk("%s|%d|%s%i:%x|%d", __entry->active ? "S" : "F", __entry->pid, __entry->core_type ? "GP" : "PP", __entry->core_id, __entry->flush_id, __entry->frame_builder_id)
+	   );
+
+#endif /* MALI_LINUX_TRACE_H */
+
+/* This part must exist outside the header guard. */
+#include <trace/define_trace.h>
+
diff --git a/mali/linux/mali_memory.c b/mali/linux/mali_memory.c
new file mode 100755
index 0000000..78fdd05
--- /dev/null
+++ b/mali/linux/mali_memory.c
@@ -0,0 +1,585 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_executor.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_util.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_swap_alloc.h"
+#include "mali_memory_defer_bind.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_secure.h"
+#endif
+
+extern unsigned int mali_dedicated_mem_size;
+extern unsigned int mali_shared_mem_size;
+
+#define MALI_VM_NUM_FAULT_PREFETCH (0x8)
+
+static void mali_mem_vma_open(struct vm_area_struct *vma)
+{
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+	MALI_DEBUG_PRINT(4, ("Open called on vma %p\n", vma));
+
+	/* If need to share the allocation, add ref_count here */
+	mali_allocation_ref(alloc);
+	return;
+}
+static void mali_mem_vma_close(struct vm_area_struct *vma)
+{
+	/* If need to share the allocation, unref ref_count here */
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+
+	if (NULL        != alloc) {
+		struct file *filp = NULL;
+		struct mali_session_data *session = NULL;
+
+		filp = vma->vm_file;
+		MALI_DEBUG_ASSERT(filp);
+		session = (struct mali_session_data *)filp->private_data;
+		MALI_DEBUG_ASSERT(session);
+
+		mali_session_memory_lock(session);
+		vma->vm_private_data = NULL;
+		mali_session_memory_unlock(session);
+
+		mali_allocation_unref(&alloc);
+	}
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+static int mali_mem_vma_fault(struct vm_fault *vmf)
+#else
+static int mali_mem_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+#endif
+{
+	struct file *filp = NULL;
+	struct mali_session_data *session = NULL;
+	mali_mem_allocation *alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret;
+	int prefetch_num = MALI_VM_NUM_FAULT_PREFETCH;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+	struct vm_area_struct *vma = vmf->vma;
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	unsigned long address = (unsigned long)vmf->address;
+#else
+	unsigned long address = (unsigned long)vmf->virtual_address;
+#endif
+	filp = vma->vm_file;
+	MALI_DEBUG_ASSERT(filp);
+	session = (struct mali_session_data *)filp->private_data;
+	MALI_DEBUG_ASSERT(session);
+	mali_session_memory_lock(session);
+	if (NULL == vma->vm_private_data) {
+		MALI_DEBUG_PRINT(1, ("mali_vma_fault: The memory has been freed!\n"));
+		mali_session_memory_unlock(session);
+		return VM_FAULT_SIGBUS;
+	} else {
+		alloc = (mali_mem_allocation *)vma->vm_private_data;
+		MALI_DEBUG_ASSERT(alloc->backend_handle);
+		MALI_DEBUG_ASSERT(alloc->cpu_mapping.vma == vma);
+		MALI_DEBUG_ASSERT((unsigned long)alloc->cpu_mapping.addr <= address);
+		mali_allocation_ref(alloc);
+	}
+	mali_session_memory_unlock(session);
+
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		mali_allocation_unref(&alloc);
+		return VM_FAULT_SIGBUS;
+	}
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(mem_bkend->type == alloc->type);
+
+	if ((mem_bkend->type == MALI_MEM_COW && (MALI_MEM_BACKEND_FLAG_SWAP_COWED !=
+			(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) &&
+	    (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE)) {
+		/*check if use page fault to do COW*/
+		MALI_DEBUG_PRINT(4, ("mali_vma_fault: do cow allocate on demand!, address=0x%x\n", address));
+		mutex_lock(&mem_bkend->mutex);
+		ret = mali_mem_cow_allocate_on_demand(mem_bkend,
+						      (address - vma->vm_start) / PAGE_SIZE);
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (ret != _MALI_OSK_ERR_OK) {
+			mali_allocation_unref(&alloc);
+			return VM_FAULT_OOM;
+		}
+		prefetch_num = 1;
+
+		/* handle COW modified range cpu mapping
+		 we zap the mapping in cow_modify_range, it will trigger page fault
+		 when CPU access it, so here we map it to CPU*/
+		mutex_lock(&mem_bkend->mutex);
+		ret = mali_mem_cow_cpu_map_pages_locked(mem_bkend, vma, address, prefetch_num);
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+			mali_allocation_unref(&alloc);
+			return VM_FAULT_SIGBUS;
+		}
+	} else if ((mem_bkend->type == MALI_MEM_SWAP) ||
+		   (mem_bkend->type == MALI_MEM_COW && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+		u32 offset_in_bkend = (address - vma->vm_start) / PAGE_SIZE;
+		int ret = _MALI_OSK_ERR_OK;
+
+		mutex_lock(&mem_bkend->mutex);
+		if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE) {
+			ret = mali_mem_swap_cow_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page);
+		} else {
+			ret = mali_mem_swap_allocate_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page);
+		}
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(2, ("Mali swap memory page fault process failed, address=0x%x\n", address));
+			mali_allocation_unref(&alloc);
+			return VM_FAULT_OOM;
+		} else {
+			mali_allocation_unref(&alloc);
+			return VM_FAULT_LOCKED;
+		}
+	} else {
+		MALI_PRINT_ERROR(("Mali vma fault! It never happen, indicating some logic errors in caller.\n"));
+		mali_allocation_unref(&alloc);
+		/*NOT support yet or OOM*/
+		return VM_FAULT_OOM;
+	}
+
+	mali_allocation_unref(&alloc);
+	return VM_FAULT_NOPAGE;
+}
+
+static struct vm_operations_struct mali_kernel_vm_ops = {
+	.open = mali_mem_vma_open,
+	.close = mali_mem_vma_close,
+	.fault = mali_mem_vma_fault,
+};
+
+
+/** @ map mali allocation to CPU address
+*
+* Supported backend types:
+* --MALI_MEM_OS
+* -- need to add COW?
+ *Not supported backend types:
+* -_MALI_MEMORY_BIND_BACKEND_UMP
+* -_MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* -_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+*
+*/
+int mali_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct mali_session_data *session;
+	mali_mem_allocation *mali_alloc = NULL;
+	u32 mali_addr = vma->vm_pgoff << PAGE_SHIFT;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret = -EFAULT;
+
+	session = (struct mali_session_data *)filp->private_data;
+	if (NULL == session) {
+		MALI_PRINT_ERROR(("mmap called without any session data available\n"));
+		return -EFAULT;
+	}
+
+	MALI_DEBUG_PRINT(4, ("MMap() handler: start=0x%08X, phys=0x%08X, size=0x%08X vma->flags 0x%08x\n",
+			     (unsigned int)vma->vm_start, (unsigned int)(vma->vm_pgoff << PAGE_SHIFT),
+			     (unsigned int)(vma->vm_end - vma->vm_start), vma->vm_flags));
+
+	/* Operations used on any memory system */
+	/* do not need to anything in vm open/close now */
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		if (unlikely(mali_addr != mali_vma_node->vm_node.start)) {
+			/* only allow to use start address for mmap */
+			MALI_DEBUG_PRINT(1, ("mali_addr != mali_vma_node->vm_node.start\n"));
+			return -EFAULT;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(NULL == mali_vma_node);
+		return -EFAULT;
+	}
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	if (mali_alloc->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) {
+		MALI_DEBUG_PRINT(1, ("ERROR : trying to access varying memory by CPU!\n"));
+		return -EFAULT;
+	}
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		return -EFAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+
+	if ((vma->vm_start + mem_bkend->size) > vma->vm_end) {
+		MALI_PRINT_ERROR(("mali_mmap: out of memory mapping map_size %d, physical_size %d\n",  vma->vm_end - vma->vm_start, mem_bkend->size));
+		return -EFAULT;
+	}
+
+	if (!(MALI_MEM_SWAP == mali_alloc->type ||
+	      (MALI_MEM_COW == mali_alloc->type && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) {
+		/* Set some bits which indicate that, the memory is IO memory, meaning
+		 * that no paging is to be performed and the memory should not be
+		 * included in crash dumps. And that the memory is reserved, meaning
+		 * that it's present and can never be paged out (see also previous
+		 * entry)
+		 */
+		vma->vm_flags |= VM_IO;
+		vma->vm_flags |= VM_DONTCOPY;
+		vma->vm_flags |= VM_PFNMAP;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+		vma->vm_flags |= VM_RESERVED;
+#else
+		vma->vm_flags |= VM_DONTDUMP;
+		vma->vm_flags |= VM_DONTEXPAND;
+#endif
+	} else if (MALI_MEM_SWAP == mali_alloc->type) {
+		vma->vm_pgoff = mem_bkend->start_idx;
+	}
+
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	vma->vm_ops = &mali_kernel_vm_ops;
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	/* If it's a copy-on-write mapping, map to read only */
+	if (!(vma->vm_flags & VM_WRITE)) {
+		MALI_DEBUG_PRINT(4, ("mmap allocation with read only !\n"));
+		/* add VM_WRITE for do_page_fault will check this when a write fault */
+		vma->vm_flags |= VM_WRITE | VM_READ;
+		vma->vm_page_prot = PAGE_READONLY;
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE;
+		goto out;
+	}
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		ret = mali_mem_os_cpu_map(mem_bkend, vma);
+	} else if (mem_bkend->type == MALI_MEM_COW &&
+		   (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+		ret = mali_mem_cow_cpu_map(mem_bkend, vma);
+	} else if (mem_bkend->type == MALI_MEM_BLOCK) {
+		ret = mali_mem_block_cpu_map(mem_bkend, vma);
+	} else if ((mem_bkend->type == MALI_MEM_SWAP) || (mem_bkend->type == MALI_MEM_COW &&
+			(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) {
+		/*For swappable memory, CPU page table will be created by page fault handler. */
+		ret = 0;
+	} else if (mem_bkend->type == MALI_MEM_SECURE) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		ret = mali_mem_secure_cpu_map(mem_bkend, vma);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n"));
+		return -EFAULT;
+#endif
+	} else {
+		/* Not support yet*/
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of backend memory! \n"));
+		return -EFAULT;
+	}
+
+	if (ret != 0) {
+		MALI_DEBUG_PRINT(1, ("ret != 0\n"));
+		return -EFAULT;
+	}
+out:
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == mali_alloc->magic);
+
+	vma->vm_private_data = (void *)mali_alloc;
+	mali_alloc->cpu_mapping.vma = vma;
+
+	mali_allocation_ref(mali_alloc);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor)
+{
+	u32 size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic);
+
+	/* Map dma-buf into this session's page tables */
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start, size);
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size)
+{
+	u32 old_size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic);
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		new_size  += MALI_MMU_PAGE_SIZE;
+	}
+
+	if (new_size > old_size) {
+		MALI_DEBUG_ASSERT(new_size <= descriptor->mali_vma_node.vm_node.size);
+		return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start + old_size, new_size - old_size);
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags)
+{
+	if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	/* Umap and flush L2 */
+	mali_mmu_pagedir_unmap(session->page_directory, vaddr, size);
+	mali_executor_zap_all_active(session);
+}
+
+u32 _mali_ukk_report_memory_usage(void)
+{
+	u32 sum = 0;
+
+	if (MALI_TRUE == mali_memory_have_dedicated_memory()) {
+		sum += mali_mem_block_allocator_stat();
+	}
+
+	sum += mali_mem_os_stat();
+
+	return sum;
+}
+
+u32 _mali_ukk_report_total_memory_size(void)
+{
+	return mali_dedicated_mem_size + mali_shared_mem_size;
+}
+
+
+/**
+ * Per-session memory descriptor mapping table sizes
+ */
+#define MALI_MEM_DESCRIPTORS_INIT 64
+#define MALI_MEM_DESCRIPTORS_MAX 65536
+
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session_data)
+{
+	MALI_DEBUG_PRINT(5, ("Memory session begin\n"));
+
+	session_data->memory_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+				    _MALI_OSK_LOCK_ORDER_MEM_SESSION);
+
+	if (NULL == session_data->memory_lock) {
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	session_data->cow_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0);
+	if (NULL == session_data->cow_lock) {
+		_mali_osk_mutex_term(session_data->memory_lock);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_memory_manager_init(&session_data->allocation_mgr);
+
+	MALI_DEBUG_PRINT(5, ("MMU session begin: success\n"));
+	MALI_SUCCESS;
+}
+
+void mali_memory_session_end(struct mali_session_data *session)
+{
+	MALI_DEBUG_PRINT(3, ("MMU session end\n"));
+
+	if (NULL == session) {
+		MALI_DEBUG_PRINT(1, ("No session data found during session end\n"));
+		return;
+	}
+	/* free allocation */
+	mali_free_session_allocations(session);
+	/* do some check in unint*/
+	mali_memory_manager_uninit(&session->allocation_mgr);
+
+	/* Free the lock */
+	_mali_osk_mutex_term(session->memory_lock);
+	_mali_osk_mutex_term(session->cow_lock);
+	return;
+}
+
+_mali_osk_errcode_t mali_memory_initialize(void)
+{
+	_mali_osk_errcode_t err;
+
+	idr_init(&mali_backend_idr);
+	mutex_init(&mali_idr_mutex);
+
+	err = mali_mem_swap_init();
+	if (err != _MALI_OSK_ERR_OK) {
+		return err;
+	}
+	err = mali_mem_os_init();
+	if (_MALI_OSK_ERR_OK == err) {
+		err = mali_mem_defer_bind_manager_init();
+	}
+
+	return err;
+}
+
+void mali_memory_terminate(void)
+{
+	mali_mem_swap_term();
+	mali_mem_defer_bind_manager_destory();
+	mali_mem_os_term();
+	if (mali_memory_have_dedicated_memory()) {
+		mali_mem_block_allocator_destroy();
+	}
+}
+
+
+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type)
+{
+	mali_page_node *page_node = NULL;
+
+	page_node = kzalloc(sizeof(mali_page_node), GFP_KERNEL);
+	MALI_DEBUG_ASSERT(NULL != page_node);
+
+	if (page_node) {
+		page_node->type = type;
+		INIT_LIST_HEAD(&page_node->list);
+	}
+
+	return page_node;
+}
+
+void _mali_page_node_ref(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* add ref to this page */
+		get_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_add_ref(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		atomic_inc(&node->swap_it->ref_count);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+}
+
+void _mali_page_node_unref(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* unref to this page */
+		put_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_dec_ref(node);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+}
+
+
+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_OS == node->type);
+	node->page = page;
+}
+
+
+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_SWAP == node->type);
+	node->swap_it = item;
+}
+
+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_BLOCK == node->type);
+	node->blk_it = item;
+}
+
+
+int _mali_page_node_get_ref_count(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* get ref count of this page */
+		return page_count(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_get_ref_count(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return atomic_read(&node->swap_it->ref_count);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+	return -1;
+}
+
+
+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return page_private(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return _mali_blk_item_get_phy_addr(node->blk_it);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return node->swap_it->dma_addr;
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+	return 0;
+}
+
+
+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return page_to_pfn(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		/* get phy addr for BLOCK page*/
+		return _mali_blk_item_get_pfn(node->blk_it);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return page_to_pfn(node->swap_it->page);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+	return 0;
+}
+
+
diff --git a/mali/linux/mali_memory.h b/mali/linux/mali_memory.h
new file mode 100755
index 0000000..e5e4f66
--- /dev/null
+++ b/mali/linux/mali_memory.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_H__
+#define __MALI_MEMORY_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+
+_mali_osk_errcode_t mali_memory_initialize(void);
+void mali_memory_terminate(void);
+
+/** @brief Allocate a page table page
+ *
+ * Allocate a page for use as a page directory or page table. The page is
+ * mapped into kernel space.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise an error code
+ * @param table_page GPU pointer to the allocated page
+ * @param mapping CPU pointer to the mapping of the allocated page
+ */
+MALI_STATIC_INLINE _mali_osk_errcode_t
+mali_mmu_get_table_page(mali_dma_addr *table_page, mali_io_address *mapping)
+{
+	return mali_mem_os_get_table_page(table_page, mapping);
+}
+
+/** @brief Release a page table page
+ *
+ * Release a page table page allocated through \a mali_mmu_get_table_page
+ *
+ * @param pa the GPU address of the page to release
+ */
+MALI_STATIC_INLINE void
+mali_mmu_release_table_page(mali_dma_addr phys, void *virt)
+{
+	mali_mem_os_release_table_page(phys, virt);
+}
+
+/** @brief mmap function
+ *
+ * mmap syscalls on the Mali device node will end up here.
+ *
+ * This function allocates Mali memory and maps it on CPU and Mali.
+ */
+int mali_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/** @brief Start a new memory session
+ *
+ * Called when a process opens the Mali device node.
+ *
+ * @param session Pointer to session to initialize
+ */
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session);
+
+/** @brief Close a memory session
+ *
+ * Called when a process closes the Mali device node.
+ *
+ * Memory allocated by the session will be freed
+ *
+ * @param session Pointer to the session to terminate
+ */
+void mali_memory_session_end(struct mali_session_data *session);
+
+/** @brief Prepare Mali page tables for mapping
+ *
+ * This function will prepare the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ */
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor);
+
+/** @brief Resize Mali page tables for mapping
+ *
+ * This function will Resize the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ * @param new_size The new size of descriptor
+ */
+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size);
+
+/** @brief Free Mali page tables for mapping
+ *
+ * This function will unmap pages from Mali memory and free the page tables
+ * that are now unused.
+ *
+ * The updated pages in the Mali L2 cache will be invalidated, and the MMU TLBs will be zapped if necessary.
+ *
+ * @param descriptor Pointer to the memory descriptor to unmap
+ */
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags);
+
+/** @brief Parse resource and prepare the OS memory allocator
+ *
+ * @param size Maximum size to allocate for Mali GPU.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size);
+
+/** @brief Parse resource and prepare the dedicated memory allocator
+ *
+ * @param start Physical start address of dedicated Mali GPU memory.
+ * @param size Size of dedicated Mali GPU memory.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+
+
+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type);
+
+void _mali_page_node_ref(struct mali_page_node *node);
+void _mali_page_node_unref(struct mali_page_node *node);
+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page);
+
+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item);
+
+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item);
+
+int _mali_page_node_get_ref_count(struct mali_page_node *node);
+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node);
+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node);
+
+#endif /* __MALI_MEMORY_H__ */
diff --git a/mali/linux/mali_memory_block_alloc.c b/mali/linux/mali_memory_block_alloc.c
new file mode 100755
index 0000000..af3dd4d
--- /dev/null
+++ b/mali/linux/mali_memory_block_alloc.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_memory.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_osk.h"
+#include <linux/mutex.h>
+
+
+static mali_block_allocator *mali_mem_block_gobal_allocator = NULL;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item)
+{
+	return (item->phy_addr & ~(MALI_BLOCK_REF_MASK));
+}
+
+
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item)
+{
+	return (item->phy_addr / MALI_BLOCK_SIZE);
+}
+
+
+u32 mali_mem_block_get_ref_count(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	return (node->blk_it->phy_addr & MALI_BLOCK_REF_MASK);
+}
+
+
+/* Increase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+
+u32 mali_mem_block_add_ref(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) < MALI_BLOCK_MAX_REF_COUNT);
+	return (node->blk_it->phy_addr++ & MALI_BLOCK_REF_MASK);
+}
+
+/* Decase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+u32 mali_mem_block_dec_ref(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) > 0);
+	return (node->blk_it->phy_addr-- & MALI_BLOCK_REF_MASK);
+}
+
+
+static mali_block_allocator *mali_mem_block_allocator_create(u32 base_address, u32 size)
+{
+	mali_block_allocator *info;
+	u32 usable_size;
+	u32 num_blocks;
+	mali_page_node *m_node;
+	mali_block_item *mali_blk_items = NULL;
+	int i = 0;
+
+	usable_size = size & ~(MALI_BLOCK_SIZE - 1);
+	MALI_DEBUG_PRINT(3, ("Mali block allocator create for region starting at 0x%08X length 0x%08X\n", base_address, size));
+	MALI_DEBUG_PRINT(4, ("%d usable bytes\n", usable_size));
+	num_blocks = usable_size / MALI_BLOCK_SIZE;
+	MALI_DEBUG_PRINT(4, ("which becomes %d blocks\n", num_blocks));
+
+	if (usable_size == 0) {
+		MALI_DEBUG_PRINT(1, ("Memory block of size %d is unusable\n", size));
+		return NULL;
+	}
+
+	info = _mali_osk_calloc(1, sizeof(mali_block_allocator));
+	if (NULL != info) {
+		INIT_LIST_HEAD(&info->free);
+		spin_lock_init(&info->sp_lock);
+		info->total_num = num_blocks;
+		mali_blk_items = _mali_osk_calloc(1, sizeof(mali_block_item) * num_blocks);
+
+		if (mali_blk_items) {
+			info->items = mali_blk_items;
+			/* add blocks(4k size) to free list*/
+			for (i = 0 ; i < num_blocks ; i++) {
+				/* add block information*/
+				mali_blk_items[i].phy_addr = base_address + (i * MALI_BLOCK_SIZE);
+				/* add  to free list */
+				m_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK);
+				if (m_node == NULL)
+					goto fail;
+				_mali_page_node_add_block_item(m_node, &(mali_blk_items[i]));
+				list_add_tail(&m_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			}
+			return info;
+		}
+	}
+fail:
+	mali_mem_block_allocator_destroy();
+	return NULL;
+}
+
+void mali_mem_block_allocator_destroy(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(info);
+	MALI_DEBUG_PRINT(4, ("Memory block destroy !\n"));
+
+	if (NULL == info)
+		return;
+
+	list_for_each_entry_safe(m_page, m_tmp , &info->free, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		list_del(&m_page->list);
+		kfree(m_page);
+	}
+
+	_mali_osk_free(info->items);
+	_mali_osk_free(info);
+}
+
+u32 mali_mem_block_release(mali_mem_backend *mem_bkend)
+{
+	mali_mem_allocation *alloc = mem_bkend->mali_allocation;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_block_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	free_pages_nr = mali_mem_block_free(&mem_bkend->block_mem);
+	mutex_unlock(&mem_bkend->mutex);
+	return free_pages_nr;
+}
+
+
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem: Allocate size = 0x%x\n", size));
+	/*do some init */
+	INIT_LIST_HEAD(&block_mem->pfns);
+
+	spin_lock(&info->sp_lock);
+	/*check if have enough space*/
+	if (atomic_read(&info->free_num) > page_count) {
+		list_for_each_entry_safe(m_page, m_tmp , &info->free, list) {
+			if (page_count > 0) {
+				MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+				MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(m_page) == 0);
+				list_move(&m_page->list, &block_mem->pfns);
+				block_mem->count++;
+				atomic_dec(&info->free_num);
+				_mali_page_node_ref(m_page);
+			} else {
+				break;
+			}
+			page_count--;
+		}
+	} else {
+		/* can't allocate from BLOCK memory*/
+		spin_unlock(&info->sp_lock);
+		return -1;
+	}
+
+	spin_unlock(&info->sp_lock);
+	return 0;
+}
+
+u32 mali_mem_block_free(mali_mem_block_mem *block_mem)
+{
+	u32 free_pages_nr = 0;
+
+	free_pages_nr = mali_mem_block_free_list(&block_mem->pfns);
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem free : allocated size = 0x%x, free size = 0x%x\n", block_mem->count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+	block_mem->count = 0;
+	MALI_DEBUG_ASSERT(list_empty(&block_mem->pfns));
+
+	return free_pages_nr;
+}
+
+
+u32 mali_mem_block_free_list(struct list_head *list)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	u32 free_pages_nr = 0;
+
+	if (info) {
+		spin_lock(&info->sp_lock);
+		list_for_each_entry_safe(m_page, m_tmp , list, list) {
+			if (1 == _mali_page_node_get_ref_count(m_page)) {
+				free_pages_nr++;
+			}
+			mali_mem_block_free_node(m_page);
+		}
+		spin_unlock(&info->sp_lock);
+	}
+	return free_pages_nr;
+}
+
+/* free the node,*/
+void mali_mem_block_free_node(struct mali_page_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (1 == _mali_page_node_get_ref_count(node)) {
+			/*Move to free list*/
+			_mali_page_node_unref(node);
+			list_move_tail(&node->list, &info->free);
+			atomic_add(1, &info->free_num);
+		} else {
+			_mali_page_node_unref(node);
+			list_del(&node->list);
+			kfree(node);
+		}
+	}
+}
+
+/* unref the node, but not free it */
+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	mali_page_node *new_node;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (1 == _mali_page_node_get_ref_count(node)) {
+			/* allocate a  new node, Add to free list, keep the old node*/
+			_mali_page_node_unref(node);
+			new_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK);
+			if (new_node) {
+				memcpy(new_node, node, sizeof(mali_page_node));
+				list_add(&new_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			} else
+				return _MALI_OSK_ERR_FAULT;
+
+		} else {
+			_mali_page_node_unref(node);
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		phys = _mali_page_node_get_dma_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+		/* Verify that the "physical" address is 32-bit and
+		 * usable for Mali, when on a system with bus addresses
+		 * wider than 32-bit. */
+		MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+		mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	int ret;
+	mali_mem_block_mem *block_mem = &mem_bkend->block_mem;
+	unsigned long addr = vma->vm_start;
+	struct mali_page_node *m_page;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		ret = vm_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page));
+
+		if (unlikely(0 != ret)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size)
+{
+	mali_block_allocator *allocator;
+
+	/* Do the low level linux operation first */
+
+	/* Request ownership of the memory */
+	if (_MALI_OSK_ERR_OK != _mali_osk_mem_reqregion(start, size, "Dedicated Mali GPU memory")) {
+		MALI_DEBUG_PRINT(1, ("Failed to request memory region for frame buffer (0x%08X - 0x%08X)\n", start, start + size - 1));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create generic block allocator object to handle it */
+	allocator = mali_mem_block_allocator_create(start, size);
+
+	if (NULL == allocator) {
+		MALI_DEBUG_PRINT(1, ("Memory bank registration failed\n"));
+		_mali_osk_mem_unreqregion(start, size);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_mem_block_gobal_allocator = (mali_block_allocator *)allocator;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool mali_memory_have_dedicated_memory(void)
+{
+	return mali_mem_block_gobal_allocator ? MALI_TRUE : MALI_FALSE;
+}
+
+u32 mali_mem_block_allocator_stat(void)
+{
+	mali_block_allocator *allocator = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(allocator);
+
+
+	return (allocator->total_num - atomic_read(&allocator->free_num)) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/mali/linux/mali_memory_block_alloc.h b/mali/linux/mali_memory_block_alloc.h
new file mode 100755
index 0000000..3e11ef2
--- /dev/null
+++ b/mali/linux/mali_memory_block_alloc.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2010, 2013, 2015-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BLOCK_ALLOCATOR_H__
+#define __MALI_BLOCK_ALLOCATOR_H__
+
+#include "mali_session.h"
+#include "mali_memory.h"
+#include <linux/spinlock.h>
+
+#include "mali_memory_types.h"
+
+#define MALI_BLOCK_SIZE (PAGE_SIZE)  /* 4 kB, manage BLOCK memory as page size */
+#define MALI_BLOCK_REF_MASK (0xFFF)
+#define MALI_BLOCK_MAX_REF_COUNT (0xFFF)
+
+
+
+typedef struct mali_block_allocator {
+	/*
+	* In free list, each node's ref_count is 0,
+	* ref_count added when allocated or referenced in COW
+	*/
+	mali_block_item *items; /* information for each block item*/
+	struct list_head free; /*free list of mali_memory_node*/
+	spinlock_t sp_lock; /*lock for reference count & free list opertion*/
+	u32 total_num; /* Number of total pages*/
+	atomic_t free_num; /*number of free pages*/
+} mali_block_allocator;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item);
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item);
+u32 mali_mem_block_get_ref_count(mali_page_node *node);
+u32 mali_mem_block_add_ref(mali_page_node *node);
+u32 mali_mem_block_dec_ref(mali_page_node *node);
+u32 mali_mem_block_release(mali_mem_backend *mem_bkend);
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size);
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+mali_bool mali_memory_have_dedicated_memory(void);
+u32 mali_mem_block_free(mali_mem_block_mem *block_mem);
+u32 mali_mem_block_free_list(struct list_head *list);
+void mali_mem_block_free_node(struct mali_page_node *node);
+void mali_mem_block_allocator_destroy(void);
+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node);
+u32 mali_mem_block_allocator_stat(void);
+
+#endif /* __MALI_BLOCK_ALLOCATOR_H__ */
diff --git a/mali/linux/mali_memory_cow.c b/mali/linux/mali_memory_cow.c
new file mode 100644
index 0000000..ae417db
--- /dev/null
+++ b/mali/linux/mali_memory_cow.c
@@ -0,0 +1,776 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#ifdef CONFIG_ARM
+#include <asm/outercache.h>
+#endif
+#include <asm/dma-mapping.h>
+
+#include "mali_memory.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_swap_alloc.h"
+
+/**
+* allocate pages for COW backend and flush cache
+*/
+static struct page *mali_mem_cow_alloc_page(void)
+
+{
+	mali_mem_os_mem os_mem;
+	struct mali_page_node *node;
+	struct page *new_page;
+
+	int ret = 0;
+	/* allocate pages from os mem */
+	ret = mali_mem_os_alloc_pages(&os_mem, _MALI_OSK_MALI_PAGE_SIZE);
+
+	if (ret) {
+		return NULL;
+	}
+
+	MALI_DEBUG_ASSERT(1 == os_mem.count);
+
+	node = _MALI_OSK_CONTAINER_OF(os_mem.pages.next, struct mali_page_node, list);
+	new_page = node->page;
+	node->page = NULL;
+	list_del(&node->list);
+	kfree(node);
+
+	return new_page;
+}
+
+
+static struct list_head *_mali_memory_cow_get_node_list(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size)
+{
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == target_bk->type || MALI_MEM_COW == target_bk->type ||
+			  MALI_MEM_BLOCK == target_bk->type || MALI_MEM_SWAP == target_bk->type);
+
+	if (MALI_MEM_OS == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->os_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->os_mem.count);
+		return &target_bk->os_mem.pages;
+	} else if (MALI_MEM_COW == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->cow_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->cow_mem.count);
+		return  &target_bk->cow_mem.pages;
+	} else if (MALI_MEM_BLOCK == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->block_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->block_mem.count);
+		return  &target_bk->block_mem.pfns;
+	} else if (MALI_MEM_SWAP == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->swap_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->swap_mem.count);
+		return  &target_bk->swap_mem.pages;
+	}
+
+	return NULL;
+}
+
+/**
+* Do COW for os memory - support do COW for memory from bank memory
+* The range_start/size can be zero, which means it will call cow_modify_range
+* latter.
+* This function allocate new pages for COW backend from os mem for a modified range
+* It will keep the page which not in the modified range and Add ref to it
+*
+* @target_bk - target allocation's backend(the allocation need to do COW)
+* @target_offset - the offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, 4K align)
+* @target_size - size of target allocation to do COW (for support memory bank)
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range
+*/
+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp, *page_node;
+	int target_page = 0;
+	struct page *new_page;
+	struct list_head *pages = NULL;
+
+	pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size);
+
+	if (NULL == pages) {
+		MALI_DEBUG_PRINT_ERROR(("No memory page  need to cow ! \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(0 == cow->count);
+
+	INIT_LIST_HEAD(&cow->pages);
+	mutex_lock(&target_bk->mutex);
+	list_for_each_entry_safe(m_page, m_tmp, pages, list) {
+		/* add page from (target_offset,target_offset+size) to cow backend */
+		if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) {
+
+			/* allocate a new page node, alway use OS memory for COW */
+			page_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+
+			if (NULL == page_node) {
+				mutex_unlock(&target_bk->mutex);
+				goto error;
+			}
+
+			INIT_LIST_HEAD(&page_node->list);
+
+			/* check if in the modified range*/
+			if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+			    (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+				/* need to allocate a new page */
+				/* To simplify the case, All COW memory is allocated from os memory ?*/
+				new_page = mali_mem_cow_alloc_page();
+
+				if (NULL == new_page) {
+					kfree(page_node);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				_mali_page_node_add_page(page_node, new_page);
+			} else {
+				/*Add Block memory case*/
+				if (m_page->type != MALI_PAGE_NODE_BLOCK) {
+					_mali_page_node_add_page(page_node, m_page->page);
+				} else {
+					page_node->type = MALI_PAGE_NODE_BLOCK;
+					_mali_page_node_add_block_item(page_node, m_page->blk_it);
+				}
+
+				/* add ref to this page */
+				_mali_page_node_ref(m_page);
+			}
+
+			/* add it to COW backend page list */
+			list_add_tail(&page_node->list, &cow->pages);
+			cow->count++;
+		}
+		target_page++;
+	}
+	mutex_unlock(&target_bk->mutex);
+	return _MALI_OSK_ERR_OK;
+error:
+	mali_mem_cow_release(backend, MALI_FALSE);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_memory_cow_swap_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp, *page_node;
+	int target_page = 0;
+	struct mali_swap_item *swap_item;
+	struct list_head *pages = NULL;
+
+	pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size);
+	if (NULL == pages) {
+		MALI_DEBUG_PRINT_ERROR(("No swap memory page need to cow ! \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(0 == cow->count);
+
+	INIT_LIST_HEAD(&cow->pages);
+	mutex_lock(&target_bk->mutex);
+
+	backend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN;
+
+	list_for_each_entry_safe(m_page, m_tmp, pages, list) {
+		/* add page from (target_offset,target_offset+size) to cow backend */
+		if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) {
+
+			/* allocate a new page node, use swap memory for COW memory swap cowed flag. */
+			page_node = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP);
+
+			if (NULL == page_node) {
+				mutex_unlock(&target_bk->mutex);
+				goto error;
+			}
+
+			/* check if in the modified range*/
+			if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+			    (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+				/* need to allocate a new page */
+				/* To simplify the case, All COW memory is allocated from os memory ?*/
+				swap_item = mali_mem_swap_alloc_swap_item();
+
+				if (NULL == swap_item) {
+					kfree(page_node);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				swap_item->idx = mali_mem_swap_idx_alloc();
+
+				if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) {
+					MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW.\n"));
+					kfree(page_node);
+					kfree(swap_item);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				_mali_page_node_add_swap_item(page_node, swap_item);
+			} else {
+				_mali_page_node_add_swap_item(page_node, m_page->swap_it);
+
+				/* add ref to this page */
+				_mali_page_node_ref(m_page);
+			}
+
+			list_add_tail(&page_node->list, &cow->pages);
+			cow->count++;
+		}
+		target_page++;
+	}
+	mutex_unlock(&target_bk->mutex);
+
+	return _MALI_OSK_ERR_OK;
+error:
+	mali_mem_swap_release(backend, MALI_FALSE);
+	return _MALI_OSK_ERR_FAULT;
+
+}
+
+
+_mali_osk_errcode_t _mali_mem_put_page_node(mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return mali_mem_os_put_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_unref_node(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return _mali_mem_swap_put_page_node(node);
+	} else
+		MALI_DEBUG_ASSERT(0);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/**
+* Modify a range of a exist COW backend
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range(in byte)
+*/
+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_allocation *alloc = NULL;
+	struct mali_session_data *session;
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp;
+	LIST_HEAD(pages);
+	struct page *new_page;
+	u32 count = 0;
+	s32 change_pages_nr = 0;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+
+	if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	alloc = backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type);
+	MALI_DEBUG_ASSERT(((range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE) <= cow->count);
+
+	mutex_lock(&backend->mutex);
+
+	/* free pages*/
+	list_for_each_entry_safe(m_page, m_tmp, &cow->pages, list) {
+
+		/* check if in the modified range*/
+		if ((count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+			if (MALI_PAGE_NODE_SWAP != m_page->type) {
+				new_page = mali_mem_cow_alloc_page();
+
+				if (NULL == new_page) {
+					goto error;
+				}
+				if (1 != _mali_page_node_get_ref_count(m_page))
+					change_pages_nr++;
+				/* unref old page*/
+				_mali_osk_mutex_wait(session->cow_lock);
+				if (_mali_mem_put_page_node(m_page)) {
+					__free_page(new_page);
+					_mali_osk_mutex_signal(session->cow_lock);
+					goto error;
+				}
+				_mali_osk_mutex_signal(session->cow_lock);
+				/* add new page*/
+				/* always use OS for COW*/
+				m_page->type = MALI_PAGE_NODE_OS;
+				_mali_page_node_add_page(m_page, new_page);
+			} else {
+				struct mali_swap_item *swap_item;
+
+				swap_item = mali_mem_swap_alloc_swap_item();
+
+				if (NULL == swap_item) {
+					goto error;
+				}
+
+				swap_item->idx = mali_mem_swap_idx_alloc();
+
+				if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) {
+					MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW modify range.\n"));
+					kfree(swap_item);
+					goto error;
+				}
+
+				if (1 != _mali_page_node_get_ref_count(m_page)) {
+					change_pages_nr++;
+				}
+
+				if (_mali_mem_put_page_node(m_page)) {
+					mali_mem_swap_free_swap_item(swap_item);
+					goto error;
+				}
+
+				_mali_page_node_add_swap_item(m_page, swap_item);
+			}
+		}
+		count++;
+	}
+	cow->change_pages_nr  = change_pages_nr;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == alloc->type);
+
+	/* ZAP cpu mapping(modified range), and do cpu mapping here if need */
+	if (NULL != alloc->cpu_mapping.vma) {
+		MALI_DEBUG_ASSERT(0 != alloc->backend_handle);
+		MALI_DEBUG_ASSERT(NULL != alloc->cpu_mapping.vma);
+		MALI_DEBUG_ASSERT(alloc->cpu_mapping.vma->vm_end - alloc->cpu_mapping.vma->vm_start >= range_size);
+
+		if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+			zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size);
+
+			ret = mali_mem_cow_cpu_map_pages_locked(backend, alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start  + range_start, range_size / _MALI_OSK_MALI_PAGE_SIZE);
+
+			if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+				MALI_DEBUG_PRINT(2, ("mali_memory_cow_modify_range: cpu mapping failed !\n"));
+				ret =  _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			/* used to trigger page fault for swappable cowed memory. */
+			alloc->cpu_mapping.vma->vm_flags |= VM_PFNMAP;
+			alloc->cpu_mapping.vma->vm_flags |= VM_MIXEDMAP;
+
+			zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size);
+			/* delete this flag to let swappble is ummapped regard to stauct page not page frame. */
+			alloc->cpu_mapping.vma->vm_flags &= ~VM_PFNMAP;
+			alloc->cpu_mapping.vma->vm_flags &= ~VM_MIXEDMAP;
+		}
+	}
+
+error:
+	mutex_unlock(&backend->mutex);
+	return ret;
+
+}
+
+
+/**
+* Allocate pages for COW backend
+* @alloc  -allocation for COW allocation
+* @target_bk - target allocation's backend(the allocation need to do COW)
+* @target_offset - the offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, 4K align)
+* @target_size - size of target allocation to do COW (for support memory bank)(in byte)
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range(in byte)
+*/
+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk,
+				       u32 target_offset,
+				       u32 target_size,
+				       mali_mem_backend *backend,
+				       u32 range_start,
+				       u32 range_size)
+{
+	struct mali_session_data *session = backend->mali_allocation->session;
+
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size & offset must be a multiple of the system page size */
+	if (target_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (target_offset % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check backend type */
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type);
+
+	switch (target_bk->type) {
+	case MALI_MEM_OS:
+	case MALI_MEM_BLOCK:
+		return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		break;
+	case MALI_MEM_COW:
+		if (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) {
+			return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		} else {
+			return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		}
+		break;
+	case MALI_MEM_SWAP:
+		return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		break;
+	case MALI_MEM_EXTERNAL:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("External physical memory not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	case MALI_MEM_DMA_BUF:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("DMA buffer not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	case MALI_MEM_UMP:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("UMP buffer not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	default:
+		/*Not support yet*/
+		MALI_DEBUG_PRINT_ERROR(("Invalid memory type not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Map COW backend memory to mali
+* Support OS/BLOCK for mali_page_node
+*/
+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size)
+{
+	mali_mem_allocation *cow_alloc;
+	struct mali_page_node *m_page;
+	struct mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	u32 virt, start;
+
+	cow_alloc = mem_bkend->mali_allocation;
+	virt = cow_alloc->mali_vma_node.vm_node.start;
+	start = virt;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT_POINTER(cow_alloc);
+
+	session = cow_alloc->session;
+	pagedir = session->page_directory;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+	list_for_each_entry(m_page, &mem_bkend->cow_mem.pages, list) {
+		if ((virt - start >= range_start) && (virt - start < range_start + range_size)) {
+			dma_addr_t phys = _mali_page_node_get_dma_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+			mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys,
+						MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+	return 0;
+}
+
+/**
+* Map COW backend to cpu
+* support OS/BLOCK memory
+*/
+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+
+	list_for_each_entry(m_page, &cow->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		ret = vm_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page));
+
+		if (unlikely(0 != ret)) {
+			return ret;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+/**
+* Map some pages(COW backend) to CPU vma@vaddr
+*@ mem_bkend - COW backend
+*@ vma
+*@ vaddr -start CPU vaddr mapped to
+*@ num - max number of pages to map to CPU vaddr
+*/
+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend,
+		struct vm_area_struct *vma,
+		unsigned long vaddr,
+		int num)
+{
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	int offset;
+	int count ;
+	unsigned long vstart = vma->vm_start;
+	count = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+	MALI_DEBUG_ASSERT(0 == vaddr % _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE);
+	offset = (vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE;
+
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if ((count >= offset) && (count < offset + num)) {
+			ret = vm_insert_pfn(vma, vaddr, _mali_page_node_get_pfn(m_page));
+
+			if (unlikely(0 != ret)) {
+				if (count == offset) {
+					return _MALI_OSK_ERR_FAULT;
+				} else {
+					/* ret is EBUSY when page isn't in modify range, but now it's OK*/
+					return _MALI_OSK_ERR_OK;
+				}
+			}
+			vaddr += _MALI_OSK_MALI_PAGE_SIZE;
+		}
+		count++;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+* Release COW backend memory
+* free it directly(put_page--unref page), not put into pool
+*/
+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped)
+{
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags)) {
+		/* Unmap the memory from the mali virtual address space. */
+		if (MALI_TRUE == is_mali_mapped)
+			mali_mem_os_mali_unmap(alloc);
+		/* free cow backend list*/
+		_mali_osk_mutex_wait(session->cow_lock);
+		free_pages_nr = mali_mem_os_free(&mem_bkend->cow_mem.pages, mem_bkend->cow_mem.count, MALI_TRUE);
+		_mali_osk_mutex_signal(session->cow_lock);
+
+		free_pages_nr += mali_mem_block_free_list(&mem_bkend->cow_mem.pages);
+
+		MALI_DEBUG_ASSERT(list_empty(&mem_bkend->cow_mem.pages));
+	} else {
+		free_pages_nr = mali_mem_swap_release(mem_bkend, is_mali_mapped);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("COW Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->cow_mem.count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+
+	mem_bkend->cow_mem.count = 0;
+	return free_pages_nr;
+}
+
+
+/* Dst node could os node or swap node. */
+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node)
+{
+	void *dst, *src;
+	struct page *dst_page;
+	dma_addr_t dma_addr;
+
+	MALI_DEBUG_ASSERT(src_node != NULL);
+	MALI_DEBUG_ASSERT(dst_node != NULL);
+	MALI_DEBUG_ASSERT(dst_node->type == MALI_PAGE_NODE_OS
+			  || dst_node->type == MALI_PAGE_NODE_SWAP);
+
+	if (dst_node->type == MALI_PAGE_NODE_OS) {
+		dst_page = dst_node->page;
+	} else {
+		dst_page = dst_node->swap_it->page;
+	}
+
+	dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(dst_node),
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* map it , and copy the content*/
+	dst = kmap_atomic(dst_page);
+
+	if (src_node->type == MALI_PAGE_NODE_OS ||
+	    src_node->type == MALI_PAGE_NODE_SWAP) {
+		struct page *src_page;
+
+		if (src_node->type == MALI_PAGE_NODE_OS) {
+			src_page = src_node->page;
+		} else {
+			src_page = src_node->swap_it->page;
+		}
+
+		/* Clear and invaliate cache */
+		/* In ARM architecture, speculative read may pull stale data into L1 cache
+		 * for kernel linear mapping page table. DMA_BIDIRECTIONAL could
+		 * invalidate the L1 cache so that following read get the latest data
+		*/
+		dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(src_node),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		src = kmap_atomic(src_page);
+		memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE);
+		kunmap_atomic(src);
+		dma_addr = dma_map_page(&mali_platform_device->dev, src_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		if (src_node->type == MALI_PAGE_NODE_SWAP) {
+			src_node->swap_it->dma_addr = dma_addr;
+		}
+	} else if (src_node->type == MALI_PAGE_NODE_BLOCK) {
+		/*
+		* use ioremap to map src for BLOCK memory
+		*/
+		src = ioremap_nocache(_mali_page_node_get_dma_addr(src_node), _MALI_OSK_MALI_PAGE_SIZE);
+		memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE);
+		iounmap(src);
+	}
+	kunmap_atomic(dst);
+	dma_addr = dma_map_page(&mali_platform_device->dev, dst_page,
+				0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	if (dst_node->type == MALI_PAGE_NODE_SWAP) {
+		dst_node->swap_it->dma_addr = dma_addr;
+	}
+}
+
+
+/*
+* allocate page on demand when CPU access it,
+* THis used in page fault handler
+*/
+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page)
+{
+	struct page *new_page = NULL;
+	struct mali_page_node *new_node = NULL;
+	int i = 0;
+	struct mali_page_node *m_page, *found_node = NULL;
+	struct  mali_session_data *session = NULL;
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT(offset_page < mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_PRINT(4, ("mali_mem_cow_allocate_on_demand !, offset_page =0x%x\n", offset_page));
+
+	/* allocate new page here */
+	new_page = mali_mem_cow_alloc_page();
+	if (!new_page)
+		return _MALI_OSK_ERR_NOMEM;
+
+	new_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+	if (!new_node) {
+		__free_page(new_page);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	/* find the page in backend*/
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if (i == offset_page) {
+			found_node = m_page;
+			break;
+		}
+		i++;
+	}
+	MALI_DEBUG_ASSERT(found_node);
+	if (NULL == found_node) {
+		__free_page(new_page);
+		kfree(new_node);
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	_mali_page_node_add_page(new_node, new_page);
+
+	/* Copy the src page's content to new page */
+	_mali_mem_cow_copy_page(found_node, new_node);
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend->mali_allocation);
+	session = mem_bkend->mali_allocation->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	if (1 != _mali_page_node_get_ref_count(found_node)) {
+		atomic_add(1, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		mem_bkend->cow_mem.change_pages_nr++;
+	}
+
+	_mali_osk_mutex_wait(session->cow_lock);
+	if (_mali_mem_put_page_node(found_node)) {
+		__free_page(new_page);
+		kfree(new_node);
+		_mali_osk_mutex_signal(session->cow_lock);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	_mali_osk_mutex_signal(session->cow_lock);
+
+	list_replace(&found_node->list, &new_node->list);
+
+	kfree(found_node);
+
+	/* map to GPU side*/
+	_mali_osk_mutex_wait(session->memory_lock);
+	mali_mem_cow_mali_map(mem_bkend, offset_page * _MALI_OSK_MALI_PAGE_SIZE, _MALI_OSK_MALI_PAGE_SIZE);
+	_mali_osk_mutex_signal(session->memory_lock);
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/mali/linux/mali_memory_cow.h b/mali/linux/mali_memory_cow.h
new file mode 100644
index 0000000..9b9e834
--- /dev/null
+++ b/mali/linux/mali_memory_cow.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_COW_H__
+#define __MALI_MEMORY_COW_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include "mali_memory_types.h"
+
+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend,
+		struct vm_area_struct *vma,
+		unsigned long vaddr,
+		int num);
+
+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk,
+				       u32 target_offset,
+				       u32 target_size,
+				       mali_mem_backend *backend,
+				       u32 range_start,
+				       u32 range_size);
+
+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size);
+
+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size);
+
+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node);
+
+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size);
+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped);
+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page);
+#endif
+
diff --git a/mali/linux/mali_memory_defer_bind.c b/mali/linux/mali_memory_defer_bind.c
new file mode 100644
index 0000000..a13eea0
--- /dev/null
+++ b/mali/linux/mali_memory_defer_bind.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#ifdef CONFIG_ARM
+#include <asm/outercache.h>
+#endif
+#include <asm/dma-mapping.h>
+
+#include "mali_memory.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory_defer_bind.h"
+#include "mali_executor.h"
+#include "mali_osk.h"
+#include "mali_scheduler.h"
+#include "mali_gp_job.h"
+
+mali_defer_bind_manager *mali_dmem_man = NULL;
+
+static u32 mali_dmem_get_gp_varying_size(struct mali_gp_job *gp_job)
+{
+	return gp_job->required_varying_memsize / _MALI_OSK_MALI_PAGE_SIZE;
+}
+
+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void)
+{
+	mali_dmem_man = _mali_osk_calloc(1, sizeof(struct mali_defer_bind_manager));
+	if (!mali_dmem_man)
+		return _MALI_OSK_ERR_NOMEM;
+
+	atomic_set(&mali_dmem_man->num_used_pages, 0);
+	atomic_set(&mali_dmem_man->num_dmem, 0);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_mem_defer_bind_manager_destory(void)
+{
+	if (mali_dmem_man) {
+		MALI_DEBUG_ASSERT(0 == atomic_read(&mali_dmem_man->num_dmem));
+		kfree(mali_dmem_man);
+	}
+	mali_dmem_man = NULL;
+}
+
+
+/*allocate pages from OS memory*/
+_mali_osk_errcode_t mali_mem_defer_alloc_mem(u32 require, struct mali_session_data *session, mali_defer_mem_block *dblock)
+{
+	int retval = 0;
+	u32 num_pages = require;
+	mali_mem_os_mem os_mem;
+
+	retval = mali_mem_os_alloc_pages(&os_mem, num_pages * _MALI_OSK_MALI_PAGE_SIZE);
+
+	/* add to free pages list */
+	if (0 == retval) {
+		MALI_DEBUG_PRINT(4, ("mali_mem_defer_alloc_mem ,,*** pages allocate = 0x%x \n", num_pages));
+		list_splice(&os_mem.pages, &dblock->free_pages);
+		atomic_add(os_mem.count, &dblock->num_free_pages);
+		atomic_add(os_mem.count, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		return _MALI_OSK_ERR_OK;
+	} else
+		return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock)
+{
+	u32 require_page;
+
+	if (!next_gp_job)
+		return _MALI_OSK_ERR_FAULT;
+
+	require_page = mali_dmem_get_gp_varying_size(next_gp_job);
+
+	MALI_DEBUG_PRINT(4, ("mali_mem_defer_prepare_mem_work, require alloc page 0x%x\n",
+			     require_page));
+	/* allocate more pages from OS */
+	if (_MALI_OSK_ERR_OK != mali_mem_defer_alloc_mem(require_page, next_gp_job->session, dblock)) {
+		MALI_DEBUG_PRINT(1, ("ERROR##mali_mem_defer_prepare_mem_work, allocate page failed!!"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	next_gp_job->bind_flag = MALI_DEFER_BIND_MEMORY_PREPARED;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* do preparetion for allocation before defer bind */
+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list, u32 *required_varying_memsize)
+{
+	mali_mem_backend *mem_bkend = NULL;
+	struct mali_backend_bind_list *bk_list = _mali_osk_calloc(1, sizeof(struct mali_backend_bind_list));
+	if (NULL == bk_list)
+		return _MALI_OSK_ERR_FAULT;
+
+	INIT_LIST_HEAD(&bk_list->node);
+	/* Get backend memory */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in defer bind!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		_mali_osk_free(bk_list);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+
+	/* If the mem backend has already been bound, no need to bind again.*/
+	if (mem_bkend->os_mem.count > 0) {
+		_mali_osk_free(bk_list);
+		return _MALI_OSK_ERR_OK;
+	}
+
+	MALI_DEBUG_PRINT(4, ("bind_allocation_prepare:: allocation =%x vaddr=0x%x!\n", alloc, alloc->mali_vma_node.vm_node.start));
+
+	INIT_LIST_HEAD(&mem_bkend->os_mem.pages);
+
+	bk_list->bkend = mem_bkend;
+	bk_list->vaddr = alloc->mali_vma_node.vm_node.start;
+	bk_list->session = alloc->session;
+	bk_list->page_num = mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE;
+	*required_varying_memsize +=  mem_bkend->size;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+
+	/* add to job to do list */
+	list_add(&bk_list->node, list);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+
+/* bind phyiscal memory to allocation
+This function will be called in IRQ handler*/
+static _mali_osk_errcode_t mali_mem_defer_bind_allocation(struct mali_backend_bind_list *bk_node,
+		struct list_head *pages)
+{
+	struct mali_session_data *session = bk_node->session;
+	mali_mem_backend *mem_bkend = bk_node->bkend;
+	MALI_DEBUG_PRINT(4, ("mali_mem_defer_bind_allocation, bind bkend = %x page num=0x%x vaddr=%x session=%x\n", mem_bkend, bk_node->page_num, bk_node->vaddr, session));
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+	list_splice(pages, &mem_bkend->os_mem.pages);
+	mem_bkend->os_mem.count = bk_node->page_num;
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		mali_mem_os_mali_map(&mem_bkend->os_mem, session, bk_node->vaddr, 0,
+				     mem_bkend->os_mem.count, MALI_MMU_FLAGS_DEFAULT);
+	}
+	smp_wmb();
+	bk_node->flag = MALI_DEFER_BIND_MEMORY_BINDED;
+	mem_bkend->flags &= ~MALI_MEM_BACKEND_FLAG_NOT_BINDED;
+	mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_BINDED;
+	return _MALI_OSK_ERR_OK;
+}
+
+
+static struct list_head *mali_mem_defer_get_free_page_list(u32 count, struct list_head *pages, mali_defer_mem_block *dblock)
+{
+	int i = 0;
+	struct mali_page_node *m_page, *m_tmp;
+
+	if (atomic_read(&dblock->num_free_pages) < count) {
+		return NULL;
+	} else {
+		list_for_each_entry_safe(m_page, m_tmp, &dblock->free_pages, list) {
+			if (i < count) {
+				list_move_tail(&m_page->list, pages);
+			} else {
+				break;
+			}
+			i++;
+		}
+		MALI_DEBUG_ASSERT(i == count);
+		atomic_sub(count, &dblock->num_free_pages);
+		return pages;
+	}
+}
+
+
+/* called in job start IOCTL to bind physical memory for each allocations
+@ bk_list backend list to do defer bind
+@ pages page list to do this bind
+@ count number of pages
+*/
+_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp,
+					struct mali_defer_mem_block *dmem_block)
+{
+	struct mali_defer_mem *dmem = NULL;
+	struct mali_backend_bind_list *bkn, *bkn_tmp;
+	LIST_HEAD(pages);
+
+	if (gp->required_varying_memsize != (atomic_read(&dmem_block->num_free_pages) * _MALI_OSK_MALI_PAGE_SIZE)) {
+		MALI_DEBUG_PRINT_ERROR(("#BIND:  The memsize of varying buffer not match to the pagesize of the dmem_block!!## \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_PRINT(4, ("#BIND: GP job=%x## \n", gp));
+	dmem = (mali_defer_mem *)_mali_osk_calloc(1, sizeof(struct mali_defer_mem));
+	if (dmem) {
+		INIT_LIST_HEAD(&dmem->node);
+		gp->dmem = dmem;
+	} else {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	atomic_add(1, &mali_dmem_man->num_dmem);
+	/* for each bk_list backend, do bind */
+	list_for_each_entry_safe(bkn, bkn_tmp , &gp->vary_todo, node) {
+		INIT_LIST_HEAD(&pages);
+		if (likely(mali_mem_defer_get_free_page_list(bkn->page_num, &pages, dmem_block))) {
+			list_del(&bkn->node);
+			mali_mem_defer_bind_allocation(bkn, &pages);
+			_mali_osk_free(bkn);
+		} else {
+			/* not enough memory will not happen */
+			MALI_DEBUG_PRINT_ERROR(("#BIND: NOT enough memory when binded !!## \n"));
+			_mali_osk_free(gp->dmem);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+	}
+
+	if (!list_empty(&gp->vary_todo)) {
+		MALI_DEBUG_PRINT_ERROR(("#BIND:  The deferbind backend list isn't empty !!## \n"));
+		_mali_osk_free(gp->dmem);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	dmem->flag = MALI_DEFER_BIND_MEMORY_BINDED;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_defer_dmem_free(struct mali_gp_job *gp)
+{
+	if (gp->dmem) {
+		atomic_dec(&mali_dmem_man->num_dmem);
+		_mali_osk_free(gp->dmem);
+	}
+}
+
diff --git a/mali/linux/mali_memory_defer_bind.h b/mali/linux/mali_memory_defer_bind.h
new file mode 100644
index 0000000..4cf6f16
--- /dev/null
+++ b/mali/linux/mali_memory_defer_bind.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_MEMORY_DEFER_BIND_H_
+#define __MALI_MEMORY_DEFER_BIND_H_
+
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_gp_job;
+
+typedef struct mali_defer_mem {
+	struct list_head node;   /*dlist node in bind manager */
+	u32 flag;
+} mali_defer_mem;
+
+
+typedef struct mali_defer_mem_block {
+	struct list_head free_pages; /* page pool */
+	atomic_t num_free_pages;
+} mali_defer_mem_block;
+
+/* varying memory list need to bind */
+typedef struct mali_backend_bind_list {
+	struct list_head node;
+	struct mali_mem_backend *bkend;
+	u32 vaddr;
+	u32 page_num;
+	struct mali_session_data *session;
+	u32 flag;
+} mali_backend_bind_lists;
+
+
+typedef struct mali_defer_bind_manager {
+	atomic_t num_used_pages;
+	atomic_t num_dmem;
+} mali_defer_bind_manager;
+
+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void);
+void mali_mem_defer_bind_manager_destory(void);
+_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp, struct mali_defer_mem_block *dmem_block);
+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list,  u32 *required_varying_memsize);
+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock);
+void mali_mem_defer_dmem_free(struct mali_gp_job *gp);
+
+#endif
diff --git a/mali/linux/mali_memory_dma_buf.c b/mali/linux/mali_memory_dma_buf.c
new file mode 100755
index 0000000..59107fa
--- /dev/null
+++ b/mali/linux/mali_memory_dma_buf.c
@@ -0,0 +1,403 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/fs.h>      /* file system operations */
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+#include <linux/rbtree.h>
+#include <linux/platform_device.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_virtual.h"
+#include "mali_pp_job.h"
+
+/*
+ * Map DMA buf attachment \a mem into \a session at virtual address \a virt.
+ */
+static int mali_dma_buf_map(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_dma_buf_attachment *mem;
+	struct  mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	struct scatterlist *sg;
+	u32 virt, flags, unmap_dma_size;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	unmap_dma_size = mem->buf->size;
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(mem->session == session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	flags = alloc->flags;
+
+	mali_session_memory_lock(session);
+	mem->map_ref++;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: map attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+#if (!defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)) && (defined(CONFIG_MALI_DMA_BUF_LAZY_MAP))
+	if (MALI_FALSE == mem->is_mapped)
+#else
+	if (1 == mem->map_ref)
+#endif
+	{
+		/* First reference taken, so we need to map the dma buf */
+		MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+		mem->sgt = dma_buf_map_attachment(mem->attachment, DMA_BIDIRECTIONAL);
+		if (IS_ERR_OR_NULL(mem->sgt)) {
+			MALI_DEBUG_PRINT_ERROR(("Failed to map dma-buf attachment\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -EFAULT;
+		}
+
+		err = mali_mem_mali_map_prepare(alloc);
+		if (_MALI_OSK_ERR_OK != err) {
+			MALI_DEBUG_PRINT(1, ("Mapping of DMA memory failed\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -ENOMEM;
+		}
+
+		pagedir = mali_session_get_page_directory(session);
+		MALI_DEBUG_ASSERT_POINTER(pagedir);
+
+		for_each_sg(mem->sgt->sgl, sg, mem->sgt->nents, i) {
+			u32 size = sg_dma_len(sg);
+			dma_addr_t phys = sg_dma_address(sg);
+
+			unmap_dma_size -= size;
+			/* sg must be page aligned. */
+			MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE);
+			MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF));
+
+			mali_mmu_pagedir_update(pagedir, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+			virt += size;
+		}
+
+		if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+			u32 guard_phys;
+			MALI_DEBUG_PRINT(7, ("Mapping in extra guard page\n"));
+
+			guard_phys = sg_dma_address(mem->sgt->sgl);
+			mali_mmu_pagedir_update(pagedir, virt, guard_phys, MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+
+		mem->is_mapped = MALI_TRUE;
+
+		if (0 != unmap_dma_size) {
+			MALI_DEBUG_PRINT_ERROR(("The dma buf size isn't equal to the total scatterlists' dma length.\n"));
+			mali_session_memory_unlock(session);
+			return -EFAULT;
+		}
+
+		/* Wake up any thread waiting for buffer to become mapped */
+		wake_up_all(&mem->wait_queue);
+
+		mali_session_memory_unlock(session);
+	} else {
+		MALI_DEBUG_ASSERT(mem->is_mapped);
+		mali_session_memory_unlock(session);
+	}
+
+	return 0;
+}
+
+static void mali_dma_buf_unmap(mali_mem_allocation *alloc, struct mali_dma_buf_attachment *mem)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+
+	mali_session_memory_lock(alloc->session);
+	mem->map_ref--;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: unmap attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (0 == mem->map_ref) {
+		if (NULL != mem->sgt) {
+			dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL);
+			 mem->sgt = NULL;
+		}
+		if (MALI_TRUE == mem->is_mapped) {
+			mali_mem_mali_map_free(alloc->session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+					       alloc->flags);
+		}
+		mem->is_mapped = MALI_FALSE;
+	}
+
+	/* Wake up any thread waiting for buffer to become unmapped */
+	wake_up_all(&mem->wait_queue);
+
+	mali_session_memory_unlock(alloc->session);
+}
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	_mali_osk_errcode_t err;
+	int i;
+	int ret = 0;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+		if (MALI_MEM_DMA_BUF != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+
+		err = mali_dma_buf_map(mem_bkend);
+		if (0 != err) {
+			MALI_DEBUG_PRINT_ERROR(("Mali DMA-buf: Failed to map dma-buf for mali address %x\n", mali_addr));
+			ret = -EFAULT;
+			continue;
+		}
+	}
+	return ret;
+}
+
+void mali_dma_buf_unmap_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	int i;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+		if (MALI_MEM_DMA_BUF != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+		mali_dma_buf_unmap(mem_bkend->mali_allocation, mem);
+	}
+}
+#endif /* !CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH */
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *user_arg)
+{
+	_mali_uk_dma_buf_get_size_s args;
+	int fd;
+	struct dma_buf *buf;
+
+	/* get call arguments from user space. copy_from_user returns how many bytes which where NOT copied */
+	if (0 != copy_from_user(&args, (void __user *)user_arg, sizeof(_mali_uk_dma_buf_get_size_s))) {
+		return -EFAULT;
+	}
+
+	/* Do DMA-BUF stuff */
+	fd = args.mem_fd;
+
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma-buf from fd: %d\n", fd));
+		return PTR_RET(buf);
+	}
+
+	if (0 != put_user(buf->size, &user_arg->size)) {
+		dma_buf_put(buf);
+		return -EFAULT;
+	}
+
+	dma_buf_put(buf);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags)
+{
+	struct dma_buf *buf;
+	struct mali_dma_buf_attachment *dma_mem;
+	struct  mali_session_data *session = alloc->session;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	/* get dma buffer */
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Currently, mapping of the full buffer are supported. */
+	if (alloc->psize != buf->size) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem = _mali_osk_calloc(1, sizeof(struct mali_dma_buf_attachment));
+	if (NULL == dma_mem) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem->buf = buf;
+	dma_mem->session = session;
+#if (!defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)) && (defined(CONFIG_MALI_DMA_BUF_LAZY_MAP))
+	dma_mem->map_ref = 1;
+#else
+	dma_mem->map_ref = 0;
+#endif
+	init_waitqueue_head(&dma_mem->wait_queue);
+
+	dma_mem->attachment = dma_buf_attach(dma_mem->buf, &mali_platform_device->dev);
+	if (NULL == dma_mem->attachment) {
+		goto failed_dma_attach;
+	}
+
+	mem_backend->dma_buf.attachment = dma_mem;
+
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	/* Map memory into session's Mali virtual address space. */
+	if (0 != mali_dma_buf_map(mem_backend)) {
+		goto Failed_dma_map;
+	}
+#endif
+
+	return _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+Failed_dma_map:
+	mali_dma_buf_unmap(alloc, dma_mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(dma_mem->wait_queue, !dma_mem->is_mapped);
+	MALI_DEBUG_ASSERT(!dma_mem->is_mapped);
+	dma_buf_detach(dma_mem->buf, dma_mem->attachment);
+failed_dma_attach:
+	_mali_osk_free(dma_mem);
+failed_alloc_mem:
+	dma_buf_put(buf);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend)
+{
+	struct mali_dma_buf_attachment *mem;
+	struct  mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_DMA_BUF == mem_backend->type);
+
+	mem = mem_backend->dma_buf.attachment;
+
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_PRINT(3, ("Mali DMA-buf: release attachment %p\n", mem));
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend->mali_allocation);
+	session = mem_backend->mali_allocation->session;
+
+#if (defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)) ||((!defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)) && (defined(CONFIG_MALI_DMA_BUF_LAZY_MAP)))
+	/* We mapped implicitly on attach, so we need to unmap on release */
+	mali_dma_buf_unmap(mem_backend->mali_allocation, mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(mem->wait_queue, !mem->is_mapped);
+	MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+	dma_buf_detach(mem->buf, mem->attachment);
+	dma_buf_put(mem->buf);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	_mali_osk_free(mem);
+	mali_session_memory_unlock(session);
+}
diff --git a/mali/linux/mali_memory_dma_buf.h b/mali/linux/mali_memory_dma_buf.h
new file mode 100755
index 0000000..37b6f6b
--- /dev/null
+++ b/mali/linux/mali_memory_dma_buf.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_DMA_BUF_H__
+#define __MALI_MEMORY_DMA_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+struct mali_pp_job;
+
+struct mali_dma_buf_attachment;
+struct mali_dma_buf_attachment {
+	struct dma_buf *buf;
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct mali_session_data *session;
+	int map_ref;
+	struct mutex map_lock;
+	mali_bool is_mapped;
+	wait_queue_head_t wait_queue;
+};
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *arg);
+
+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend);
+
+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags);
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job);
+void mali_dma_buf_unmap_job(struct mali_pp_job *job);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/mali/linux/mali_memory_external.c b/mali/linux/mali_memory_external.c
new file mode 100755
index 0000000..c0097f6
--- /dev/null
+++ b/mali/linux/mali_memory_external.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_uk_types.h"
+
+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT(MALI_MEM_EXTERNAL == mem_backend->type);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag)
+{
+	struct mali_session_data *session;
+	_mali_osk_errcode_t err;
+	u32 virt, phys, size;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	size = alloc->psize;
+	session = (struct mali_session_data *)(uintptr_t)alloc->session;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check arguments */
+	/* NULL might be a valid Mali address */
+	if (!size) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size must be a multiple of the system page size */
+	if (size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* Validate the mali physical range */
+	if (_MALI_OSK_ERR_OK != mali_mem_validation_check(phys_addr, size)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (flag & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mali_session_memory_lock(session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	phys = phys_addr;
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_session_memory_unlock(session);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	mali_mmu_pagedir_update(session->page_directory, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+	if (alloc->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		mali_mmu_pagedir_update(session->page_directory, virt + size, phys, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map physical memory 0x%x-0x%x into virtual memory 0x%x\n",
+			  phys_addr, (phys_addr + size - 1),
+			  virt));
+	mali_session_memory_unlock(session);
+
+	MALI_SUCCESS;
+}
+
diff --git a/mali/linux/mali_memory_external.h b/mali/linux/mali_memory_external.h
new file mode 100644
index 0000000..800ac2a
--- /dev/null
+++ b/mali/linux/mali_memory_external.h
@@ -0,0 +1,29 @@
+
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_EXTERNAL_H__
+#define __MALI_MEMORY_EXTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag);
+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/mali/linux/mali_memory_manager.c b/mali/linux/mali_memory_manager.c
new file mode 100755
index 0000000..afbcf78
--- /dev/null
+++ b/mali/linux/mali_memory_manager.c
@@ -0,0 +1,1033 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include <linux/platform_device.h>
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_secure.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_util.h"
+#include "mali_memory_external.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_ukk.h"
+#include "mali_memory_swap_alloc.h"
+
+#if (LINUX_VERSION_CODE == KERNEL_VERSION(3, 14, 0)) || (LINUX_VERSION_CODE == KERNEL_VERSION(4, 9, 0))
+#define AML_MALI_DEBUG 1
+#endif
+
+/*
+* New memory system interface
+*/
+
+/*inti idr for backend memory */
+struct idr mali_backend_idr;
+struct mutex mali_idr_mutex;
+
+extern void show_mem(unsigned int flags);
+
+/* init allocation manager */
+int mali_memory_manager_init(struct mali_allocation_manager *mgr)
+{
+	/* init Locks */
+	rwlock_init(&mgr->vm_lock);
+	mutex_init(&mgr->list_mutex);
+
+	/* init link */
+	INIT_LIST_HEAD(&mgr->head);
+
+	/* init RB tree */
+	mgr->allocation_mgr_rb = RB_ROOT;
+	mgr->mali_allocation_num = 0;
+	return 0;
+}
+
+/* Deinit allocation manager
+* Do some check for debug
+*/
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr)
+{
+	/* check RB tree is empty */
+	MALI_DEBUG_ASSERT(((void *)(mgr->allocation_mgr_rb.rb_node) == (void *)rb_last(&mgr->allocation_mgr_rb)));
+	/* check allocation List */
+	MALI_DEBUG_ASSERT(list_empty(&mgr->head));
+}
+
+/* Prepare memory descriptor */
+static mali_mem_allocation *mali_mem_allocation_struct_create(struct mali_session_data *session)
+{
+	mali_mem_allocation *mali_allocation;
+
+	/* Allocate memory */
+	mali_allocation = (mali_mem_allocation *)kzalloc(sizeof(mali_mem_allocation), GFP_KERNEL);
+	if (NULL == mali_allocation) {
+#ifdef AML_MALI_DEBUG
+		MALI_PRINT_ERROR(("mali_mem_allocation_struct_create: descriptor was NULL\n"));
+		show_mem(SHOW_MEM_FILTER_NODES);
+#endif
+
+		return NULL;
+	}
+
+	MALI_DEBUG_CODE(mali_allocation->magic = MALI_MEM_ALLOCATION_VALID_MAGIC);
+
+	/* do init */
+	mali_allocation->flags = 0;
+	mali_allocation->session = session;
+
+	INIT_LIST_HEAD(&mali_allocation->list);
+	_mali_osk_atomic_init(&mali_allocation->mem_alloc_refcount, 1);
+
+	/**
+	*add to session list
+	*/
+	mutex_lock(&session->allocation_mgr.list_mutex);
+	list_add_tail(&mali_allocation->list, &session->allocation_mgr.head);
+	session->allocation_mgr.mali_allocation_num++;
+	mutex_unlock(&session->allocation_mgr.list_mutex);
+
+	return mali_allocation;
+}
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+	mutex_lock(&alloc->session->allocation_mgr.list_mutex);
+	list_del(&alloc->list);
+	alloc->session->allocation_mgr.mali_allocation_num--;
+	mutex_unlock(&alloc->session->allocation_mgr.list_mutex);
+
+	kfree(alloc);
+}
+
+int mali_mem_backend_struct_create(mali_mem_backend **backend, u32 psize)
+{
+	mali_mem_backend *mem_backend = NULL;
+	s32 ret = -ENOSPC;
+	s32 index = -1;
+	*backend = (mali_mem_backend *)kzalloc(sizeof(mali_mem_backend), GFP_KERNEL);
+	if (NULL == *backend) {
+#ifdef AML_MALI_DEBUG
+		MALI_PRINT_ERROR(("mali_mem_backend_struct_create: backend descriptor was NULL\n"));
+		show_mem(SHOW_MEM_FILTER_NODES);
+#endif
+		return -1;
+	}
+	mem_backend = *backend;
+	mem_backend->size = psize;
+	mutex_init(&mem_backend->mutex);
+	INIT_LIST_HEAD(&mem_backend->list);
+	mem_backend->using_count = 0;
+
+
+	/* link backend with id */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
+again:
+	if (!idr_pre_get(&mali_backend_idr, GFP_KERNEL)) {
+		kfree(mem_backend);
+		return -ENOMEM;
+	}
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_get_new_above(&mali_backend_idr, mem_backend, 1, &index);
+	mutex_unlock(&mali_idr_mutex);
+
+	if (-ENOSPC == ret) {
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+	if (-EAGAIN == ret)
+		goto again;
+#else
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_alloc(&mali_backend_idr, mem_backend, 1, MALI_S32_MAX, GFP_KERNEL);
+	mutex_unlock(&mali_idr_mutex);
+	index = ret;
+	if (ret < 0) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: Can't allocate idr for backend! \n"));
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+#endif
+	return index;
+}
+
+
+static void mali_mem_backend_struct_destory(mali_mem_backend **backend, s32 backend_handle)
+{
+	mali_mem_backend *mem_backend = *backend;
+
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_backend);
+	*backend = NULL;
+}
+
+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address)
+{
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_address, 0);
+	if (NULL == mali_vma_node)  {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_search:vma node was NULL\n"));
+		return NULL;
+	}
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(NULL != mem_bkend);
+	return mem_bkend;
+}
+
+static _mali_osk_errcode_t mali_mem_resize(struct mali_session_data *session, mali_mem_backend *mem_backend, u32 physical_size)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	mali_mem_os_mem tmp_os_mem;
+	s32 change_page_count;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n"));
+	MALI_DEBUG_ASSERT(0 == physical_size %  MALI_MMU_PAGE_SIZE);
+
+	mali_allocation = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(mali_allocation);
+
+	MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE & mali_allocation->flags);
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mali_allocation->type);
+
+	mutex_lock(&mem_backend->mutex);
+
+	/* Do resize*/
+	if (physical_size > mem_backend->size) {
+		u32 add_size = physical_size - mem_backend->size;
+
+		MALI_DEBUG_ASSERT(0 == add_size %  MALI_MMU_PAGE_SIZE);
+
+		/* Allocate new pages from os mem */
+		retval = mali_mem_os_alloc_pages(&tmp_os_mem, add_size);
+
+		if (retval) {
+			if (-ENOMEM == retval) {
+				ret = _MALI_OSK_ERR_NOMEM;
+			} else {
+				ret = _MALI_OSK_ERR_FAULT;
+			}
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory allocation failed !\n"));
+			goto failed_alloc_memory;
+		}
+
+		MALI_DEBUG_ASSERT(tmp_os_mem.count == add_size / MALI_MMU_PAGE_SIZE);
+
+		/* Resize the memory of the backend */
+		ret = mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory	resizing failed !\n"));
+			goto failed_resize_pages;
+		}
+
+		/*Resize cpu mapping */
+		if (NULL != mali_allocation->cpu_mapping.vma) {
+			ret = mali_mem_os_resize_cpu_map_locked(mem_backend, mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start  + mem_backend->size, add_size);
+			if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+				MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: cpu mapping failed !\n"));
+				goto  failed_cpu_map;
+			}
+		}
+
+		/* Resize mali mapping */
+		_mali_osk_mutex_wait(session->memory_lock);
+		ret = mali_mem_mali_map_resize(mali_allocation, physical_size);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_resize: mali map resize fail !\n"));
+			goto failed_gpu_map;
+		}
+
+		ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, mali_allocation->mali_vma_node.vm_node.start,
+					   mali_allocation->psize / MALI_MMU_PAGE_SIZE, add_size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties);
+		if (ret) {
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: mali mapping failed !\n"));
+			goto failed_gpu_map;
+		}
+
+		_mali_osk_mutex_signal(session->memory_lock);
+	} else {
+		u32 dec_size, page_count;
+		u32 vaddr = 0;
+		INIT_LIST_HEAD(&tmp_os_mem.pages);
+		tmp_os_mem.count = 0;
+
+		dec_size = mem_backend->size - physical_size;
+		MALI_DEBUG_ASSERT(0 == dec_size %  MALI_MMU_PAGE_SIZE);
+
+		page_count = dec_size / MALI_MMU_PAGE_SIZE;
+		vaddr = mali_allocation->mali_vma_node.vm_node.start + physical_size;
+
+		/* Resize the memory of the backend */
+		ret = mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, physical_size / MALI_MMU_PAGE_SIZE, page_count);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(4, ("_mali_ukk_mem_resize: mali map resize failed!\n"));
+			goto failed_resize_pages;
+		}
+
+		/* Resize mali map */
+		_mali_osk_mutex_wait(session->memory_lock);
+		mali_mem_mali_map_free(session, dec_size, vaddr, mali_allocation->flags);
+		_mali_osk_mutex_signal(session->memory_lock);
+
+		/* Zap cpu mapping */
+		if (0 != mali_allocation->cpu_mapping.addr) {
+			MALI_DEBUG_ASSERT(NULL != mali_allocation->cpu_mapping.vma);
+			zap_vma_ptes(mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start + physical_size, dec_size);
+		}
+
+		/* Free those extra pages */
+		mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE);
+	}
+
+	/* Resize memory allocation and memory backend */
+	change_page_count = (s32)(physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE;
+	mali_allocation->psize = physical_size;
+	mem_backend->size = physical_size;
+	mutex_unlock(&mem_backend->mutex);
+
+	if (change_page_count > 0) {
+		atomic_add(change_page_count, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+
+	} else {
+		atomic_sub((s32)(-change_page_count), &session->mali_mem_allocated_pages);
+	}
+
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+failed_cpu_map:
+	if (physical_size > mem_backend->size) {
+		mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, mem_backend->size / MALI_MMU_PAGE_SIZE,
+					 (physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE);
+	} else {
+		mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count);
+	}
+failed_resize_pages:
+	if (0 != tmp_os_mem.count)
+		mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE);
+failed_alloc_memory:
+
+	mutex_unlock(&mem_backend->mutex);
+	return ret;
+}
+
+
+/* Set GPU MMU properties */
+static void _mali_memory_gpu_map_property_set(u32 *properties, u32 flags)
+{
+	if (_MALI_MEMORY_GPU_READ_ALLOCATE & flags) {
+		*properties = MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE;
+	} else {
+		*properties = MALI_MMU_FLAGS_DEFAULT;
+	}
+}
+
+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size)
+{
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+	u32 new_physical_size;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(0 == add_size %  MALI_MMU_PAGE_SIZE);
+
+	/* Get the memory backend that need to be resize. */
+	mem_backend = mali_mem_backend_struct_search(session, mali_addr);
+
+	if (NULL == mem_backend)  {
+		MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n"));
+		return ret;
+	}
+
+	mali_allocation = mem_backend->mali_allocation;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_allocation);
+
+	new_physical_size = add_size + mem_backend->size;
+
+	if (new_physical_size > (mali_allocation->mali_vma_node.vm_node.size))
+		return ret;
+
+	MALI_DEBUG_ASSERT(new_physical_size != mem_backend->size);
+
+	ret = mali_mem_resize(session, mem_backend, new_physical_size);
+
+	return ret;
+}
+
+/**
+*  function@_mali_ukk_mem_allocate - allocate mali memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args)
+{
+	struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_allocate, vaddr=0x%x, size =0x%x! \n", args->gpu_vaddr, args->psize));
+
+	if (args->vsize < args->psize) {
+		MALI_PRINT_ERROR(("_mali_ukk_mem_allocate: vsize %d  shouldn't be less than psize %d\n", args->vsize, args->psize));
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	} else if ((args->vsize % _MALI_OSK_MALI_PAGE_SIZE) || (args->psize % _MALI_OSK_MALI_PAGE_SIZE)) {
+		MALI_PRINT_ERROR(("_mali_ukk_mem_allocate: not supported non page aligned size-->pszie %d, vsize %d\n",  args->psize, args->vsize));
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	} else if ((args->vsize != args->psize) && ((args->flags & _MALI_MEMORY_ALLOCATE_SWAPPABLE) || (args->flags & _MALI_MEMORY_ALLOCATE_SECURE))) {
+		MALI_PRINT_ERROR(("_mali_ukk_mem_allocate: not supported mem resizeable for mem flag %d\n",  args->flags));
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	/* Check if the address is allocated
+	*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->gpu_vaddr, 0);
+
+	if (unlikely(mali_vma_node)) {
+		MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used ! \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+	/**
+	*create mali memory allocation
+	*/
+
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_allocate: Failed to create allocation struct! \n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->psize;
+	mali_allocation->vsize = args->vsize;
+
+	/* MALI_MEM_OS if need to support mem resize,
+	 * or MALI_MEM_BLOCK if have dedicated memory,
+	 * or MALI_MEM_OS,
+	 * or MALI_MEM_SWAP.
+	 */
+	if (args->flags & _MALI_MEMORY_ALLOCATE_SWAPPABLE) {
+		mali_allocation->type = MALI_MEM_SWAP;
+	} else if (args->flags & _MALI_MEMORY_ALLOCATE_RESIZEABLE) {
+		mali_allocation->type = MALI_MEM_OS;
+		mali_allocation->flags |= MALI_MEM_FLAG_CAN_RESIZE;
+	} else if (args->flags & _MALI_MEMORY_ALLOCATE_SECURE) {
+		mali_allocation->type = MALI_MEM_SECURE;
+	} else if (MALI_TRUE == mali_memory_have_dedicated_memory()) {
+		mali_allocation->type = MALI_MEM_BLOCK;
+	} else {
+		mali_allocation->type = MALI_MEM_OS;
+	}
+
+	/**
+	*add allocation node to RB tree for index
+	*/
+	mali_allocation->mali_vma_node.vm_node.start = args->gpu_vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->vsize;
+
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, args->psize);
+	if (mali_allocation->backend_handle < 0) {
+		ret = _MALI_OSK_ERR_NOMEM;
+		MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n"));
+		goto failed_alloc_backend;
+	}
+
+
+	mem_backend->mali_allocation = mali_allocation;
+	mem_backend->type = mali_allocation->type;
+
+	mali_allocation->mali_mapping.addr = args->gpu_vaddr;
+
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+	/* do prepare for MALI mapping */
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+
+		ret = mali_mem_mali_map_prepare(mali_allocation);
+		if (0 != ret) {
+			_mali_osk_mutex_signal(session->memory_lock);
+			goto failed_prepare_map;
+		}
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+
+	if (mali_allocation->psize == 0) {
+		mem_backend->os_mem.count = 0;
+		INIT_LIST_HEAD(&mem_backend->os_mem.pages);
+		goto done;
+	}
+
+	if (args->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) {
+		mali_allocation->flags |= _MALI_MEMORY_ALLOCATE_DEFER_BIND;
+		mem_backend->flags |= MALI_MEM_BACKEND_FLAG_NOT_BINDED;
+		/* init for defer bind backend*/
+		mem_backend->os_mem.count = 0;
+		INIT_LIST_HEAD(&mem_backend->os_mem.pages);
+
+		goto done;
+	}
+
+	if (likely(mali_allocation->psize > 0)) {
+
+		if (MALI_MEM_SECURE == mem_backend->type) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+			ret = mali_mem_secure_attach_dma_buf(&mem_backend->secure_mem, mem_backend->size, args->secure_shared_fd);
+			if (_MALI_OSK_ERR_OK != ret) {
+				MALI_DEBUG_PRINT(1, ("Failed to attach dma buf for secure memory! \n"));
+				goto failed_alloc_pages;
+			}
+#else
+			ret = _MALI_OSK_ERR_UNSUPPORTED;
+			MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory! \n"));
+			goto failed_alloc_pages;
+#endif
+		} else {
+
+			/**
+			*allocate physical memory
+			*/
+			if (mem_backend->type == MALI_MEM_OS) {
+				retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+			} else if (mem_backend->type == MALI_MEM_BLOCK) {
+				/* try to allocated from BLOCK memory first, then try OS memory if failed.*/
+				if (mali_mem_block_alloc(&mem_backend->block_mem, mem_backend->size)) {
+					retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+					mem_backend->type = MALI_MEM_OS;
+					mali_allocation->type = MALI_MEM_OS;
+				}
+			} else if (MALI_MEM_SWAP == mem_backend->type) {
+				retval = mali_mem_swap_alloc_pages(&mem_backend->swap_mem, mali_allocation->mali_vma_node.vm_node.size, &mem_backend->start_idx);
+			}  else {
+				/* ONLY support mem_os type */
+				MALI_DEBUG_ASSERT(0);
+			}
+
+			if (retval) {
+				ret = _MALI_OSK_ERR_NOMEM;
+				MALI_DEBUG_PRINT(1, (" can't allocate enough pages! \n"));
+				goto failed_alloc_pages;
+			}
+		}
+	}
+
+	/**
+	*map to GPU side
+	*/
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+		/* Map on Mali */
+
+		if (mem_backend->type == MALI_MEM_OS) {
+			ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, args->gpu_vaddr, 0,
+						   mem_backend->size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties);
+
+		} else if (mem_backend->type == MALI_MEM_BLOCK) {
+			mali_mem_block_mali_map(&mem_backend->block_mem, session, args->gpu_vaddr,
+						mali_allocation->mali_mapping.properties);
+		} else if (mem_backend->type == MALI_MEM_SWAP) {
+			ret = mali_mem_swap_mali_map(&mem_backend->swap_mem, session, args->gpu_vaddr,
+						     mali_allocation->mali_mapping.properties);
+		} else if (mem_backend->type == MALI_MEM_SECURE) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+			ret = mali_mem_secure_mali_map(&mem_backend->secure_mem, session, args->gpu_vaddr, mali_allocation->mali_mapping.properties);
+#endif
+		} else { /* unsupport type */
+			MALI_DEBUG_ASSERT(0);
+		}
+
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+done:
+	if (MALI_MEM_OS == mem_backend->type) {
+		atomic_add(mem_backend->os_mem.count, &session->mali_mem_allocated_pages);
+	} else if (MALI_MEM_BLOCK == mem_backend->type) {
+		atomic_add(mem_backend->block_mem.count, &session->mali_mem_allocated_pages);
+	} else if (MALI_MEM_SECURE == mem_backend->type) {
+		atomic_add(mem_backend->secure_mem.count, &session->mali_mem_allocated_pages);
+	} else {
+		MALI_DEBUG_ASSERT(MALI_MEM_SWAP == mem_backend->type);
+		atomic_add(mem_backend->swap_mem.count, &session->mali_mem_allocated_pages);
+		atomic_add(mem_backend->swap_mem.count, &session->mali_mem_array[mem_backend->type]);
+	}
+
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+	return _MALI_OSK_ERR_OK;
+
+failed_alloc_pages:
+	mali_mem_mali_map_free(session, mali_allocation->psize, mali_allocation->mali_vma_node.vm_node.start, mali_allocation->flags);
+failed_prepare_map:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	u32 vaddr = args->gpu_vaddr;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, vaddr, 0);
+	if (NULL == mali_vma_node) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_free: invalid addr: 0x%x\n", vaddr));
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+
+	if (mali_alloc) {
+		if ((MALI_MEM_UMP == mali_alloc->type) || (MALI_MEM_DMA_BUF == mali_alloc->type)
+		    || (MALI_MEM_EXTERNAL == mali_alloc->type)) {
+			MALI_PRINT_ERROR(("_mali_ukk_mem_free: not supported for memory type %d\n",  mali_alloc->type));
+			return _MALI_OSK_ERR_UNSUPPORTED;
+		}
+		/* check ref_count */
+		args->free_pages_nr = mali_allocation_unref(&mali_alloc);
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Function _mali_ukk_mem_bind -- bind a external memory to a new GPU address
+* It will allocate a new mem allocation and bind external memory to it.
+* Supported backend type are:
+* _MALI_MEMORY_BIND_BACKEND_UMP
+* _MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+* CPU access is not supported yet
+*/
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_bind, vaddr=0x%x, size =0x%x! \n", args->vaddr, args->size));
+
+	/**
+	* allocate mali allocation.
+	*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->size;
+	mali_allocation->vsize = args->size;
+	mali_allocation->mali_mapping.addr = args->vaddr;
+
+	/* add allocation node to RB tree for index  */
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->size;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* allocate backend*/
+	if (mali_allocation->psize > 0) {
+		mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+		if (mali_allocation->backend_handle < 0) {
+			goto Failed_alloc_backend;
+		}
+
+	} else {
+		goto Failed_alloc_backend;
+	}
+
+	mem_backend->size = mali_allocation->psize;
+	mem_backend->mali_allocation = mali_allocation;
+
+	switch (args->flags & _MALI_MEMORY_BIND_BACKEND_MASK) {
+	case  _MALI_MEMORY_BIND_BACKEND_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_allocation->type = MALI_MEM_UMP;
+		mem_backend->type = MALI_MEM_UMP;
+		ret = mali_mem_bind_ump_buf(mali_allocation, mem_backend,
+					    args->mem_union.bind_ump.secure_id, args->mem_union.bind_ump.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind ump buf failed\n"));
+			goto  Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("UMP not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case  _MALI_MEMORY_BIND_BACKEND_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_allocation->type = MALI_MEM_DMA_BUF;
+		mem_backend->type = MALI_MEM_DMA_BUF;
+		ret = mali_mem_bind_dma_buf(mali_allocation, mem_backend,
+					    args->mem_union.bind_dma_buf.mem_fd, args->mem_union.bind_dma_buf.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind dma buf failed\n"));
+			goto Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY:
+		/* not allowed */
+		MALI_DEBUG_PRINT_ERROR(("Mali internal memory type not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY:
+		mali_allocation->type = MALI_MEM_EXTERNAL;
+		mem_backend->type = MALI_MEM_EXTERNAL;
+		ret = mali_mem_bind_ext_buf(mali_allocation, mem_backend, args->mem_union.bind_ext_memory.phys_addr,
+					    args->mem_union.bind_ext_memory.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind external buf failed\n"));
+			goto Failed_bind_backend;
+		}
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXT_COW:
+		/* not allowed */
+		MALI_DEBUG_PRINT_ERROR(("External cow memory  type not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+
+	default:
+		MALI_DEBUG_PRINT_ERROR(("Invalid memory type  not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+	}
+	MALI_DEBUG_ASSERT(0 == mem_backend->size % MALI_MMU_PAGE_SIZE);
+	atomic_add(mem_backend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_backend->type]);
+	return _MALI_OSK_ERR_OK;
+
+Failed_bind_backend:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+
+Failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	MALI_DEBUG_PRINT(1, (" _mali_ukk_mem_bind, return ERROR! \n"));
+	return ret;
+}
+
+
+/*
+* Function _mali_ukk_mem_unbind -- unbind a external memory to a new GPU address
+* This function unbind the backend memory and free the allocation
+* no ref_count for this type of memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args)
+{
+	/**/
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	u32 mali_addr = args->vaddr;
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_unbind, vaddr=0x%x! \n", args->vaddr));
+
+	/* find the allocation by vaddr */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		mali_allocation = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	} else {
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	if (NULL != mali_allocation) {
+
+		if ((MALI_MEM_UMP != mali_allocation->type) && (MALI_MEM_DMA_BUF != mali_allocation->type)
+		    && (MALI_MEM_EXTERNAL != mali_allocation->type)) {
+			MALI_PRINT_ERROR(("_mali_ukk_mem_unbind not supported for memory type %d\n",  mali_allocation->type));
+			return _MALI_OSK_ERR_UNSUPPORTED;
+		}
+
+		/* check ref_count */
+		mali_allocation_unref(&mali_allocation);
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/*
+* Function _mali_ukk_mem_cow --  COW for an allocation
+* This function allocate new pages for  a range (range, range+size) of allocation
+*  And Map it(keep use the not in range pages from target allocation ) to an GPU vaddr
+*/
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_backend *target_backend = NULL;
+	mali_mem_backend *mem_backend = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_allocation = NULL;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	/* Get the target backend for cow */
+	target_backend = mali_mem_backend_struct_search(session, args->target_handle);
+
+	if (NULL == target_backend || 0 == target_backend->size) {
+		MALI_DEBUG_ASSERT_POINTER(target_backend);
+		MALI_DEBUG_ASSERT(0 != target_backend->size);
+		return ret;
+	}
+
+	/*Cow not support resized mem */
+	MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE != (MALI_MEM_FLAG_CAN_RESIZE & target_backend->mali_allocation->flags));
+
+	/* Check if the new mali address is allocated */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->vaddr, 0);
+
+	if (unlikely(mali_vma_node)) {
+		MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used ! \n"));
+		return ret;
+	}
+
+	/* create new alloction for COW*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to create allocation struct!\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->target_size;
+	mali_allocation->vsize = args->target_size;
+	mali_allocation->type = MALI_MEM_COW;
+
+	/*add allocation node to RB tree for index*/
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = mali_allocation->vsize;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* create new backend for COW memory */
+	mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+	if (mali_allocation->backend_handle < 0) {
+		ret = _MALI_OSK_ERR_NOMEM;
+		MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n"));
+		goto failed_alloc_backend;
+	}
+	mem_backend->mali_allocation = mali_allocation;
+	mem_backend->type = mali_allocation->type;
+
+	if (target_backend->type == MALI_MEM_SWAP ||
+	    (MALI_MEM_COW == target_backend->type && (MALI_MEM_BACKEND_FLAG_SWAP_COWED & target_backend->flags))) {
+		mem_backend->flags |= MALI_MEM_BACKEND_FLAG_SWAP_COWED;
+		/**
+		 *     CoWed swap backends couldn't be mapped as non-linear vma, because if one
+		 * vma is set with flag VM_NONLINEAR, the vma->vm_private_data will be used by kernel,
+		 * while in mali driver, we use this variable to store the pointer of mali_allocation, so there
+		 * is a conflict.
+		 *     To resolve this problem, we have to do some fake things, we reserved about 64MB
+		 * space from index 0, there isn't really page's index will be set from 0 to (64MB>>PAGE_SHIFT_NUM),
+		 * and all of CoWed swap memory backends' start_idx will be assigned with 0, and these
+		 * backends will be mapped as linear and will add to priority tree of global swap file, while
+		 * these vmas will never be found by using normal page->index, these pages in those vma
+		 * also couldn't be swapped out.
+		 */
+		mem_backend->start_idx = 0;
+	}
+
+	/* Add the target backend's cow count, also allocate new pages for COW backend from os mem
+	*for a modified range and keep the page which not in the modified range and Add ref to it
+	*/
+	MALI_DEBUG_PRINT(3, ("Cow mapping: target_addr: 0x%x;  cow_addr: 0x%x,  size: %u\n", target_backend->mali_allocation->mali_vma_node.vm_node.start,
+			     mali_allocation->mali_vma_node.vm_node.start, mali_allocation->mali_vma_node.vm_node.size));
+
+	ret = mali_memory_do_cow(target_backend, args->target_offset, args->target_size, mem_backend, args->range_start, args->range_size);
+	if (_MALI_OSK_ERR_OK != ret) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to cow!\n"));
+		goto failed_do_cow;
+	}
+
+	/**
+	*map to GPU side
+	*/
+	mali_allocation->mali_mapping.addr = args->vaddr;
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+
+	_mali_osk_mutex_wait(session->memory_lock);
+	/* Map on Mali */
+	ret = mali_mem_mali_map_prepare(mali_allocation);
+	if (0 != ret) {
+		MALI_DEBUG_PRINT(1, (" prepare map fail! \n"));
+		goto failed_gpu_map;
+	}
+
+	if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+		mali_mem_cow_mali_map(mem_backend, 0, mem_backend->size);
+	}
+
+	_mali_osk_mutex_signal(session->memory_lock);
+
+	mutex_lock(&target_backend->mutex);
+	target_backend->flags |= MALI_MEM_BACKEND_FLAG_COWED;
+	mutex_unlock(&target_backend->mutex);
+
+	atomic_add(args->range_size / MALI_MMU_PAGE_SIZE, &session->mali_mem_allocated_pages);
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+	mali_mem_cow_release(mem_backend, MALI_FALSE);
+	mem_backend->cow_mem.count = 0;
+failed_do_cow:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_backend *mem_backend = NULL;
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_cow_modify_range called! \n"));
+	/* Get the backend that need to be modified. */
+	mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+
+	if (NULL == mem_backend || 0 == mem_backend->size) {
+		MALI_DEBUG_ASSERT_POINTER(mem_backend);
+		MALI_DEBUG_ASSERT(0 != mem_backend->size);
+		return ret;
+	}
+
+	if (MALI_MEM_COW  != mem_backend->type) {
+		MALI_PRINT_ERROR(("_mali_ukk_mem_cow_modify_range: not supported for memory type %d !\n", mem_backend->type));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	ret =  mali_memory_cow_modify_range(mem_backend, args->range_start, args->size);
+	args->change_pages_nr = mem_backend->cow_mem.change_pages_nr;
+	if (_MALI_OSK_ERR_OK != ret)
+		return  ret;
+	_mali_osk_mutex_wait(session->memory_lock);
+	if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+		mali_mem_cow_mali_map(mem_backend, args->range_start, args->size);
+	}
+	_mali_osk_mutex_signal(session->memory_lock);
+
+	atomic_add(args->change_pages_nr, &session->mali_mem_allocated_pages);
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args)
+{
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n"));
+	MALI_DEBUG_ASSERT(0 == args->psize %  MALI_MMU_PAGE_SIZE);
+
+	/* Get the memory backend that need to be resize. */
+	mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+
+	if (NULL == mem_backend)  {
+		MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n"));
+		return ret;
+	}
+
+	MALI_DEBUG_ASSERT(args->psize != mem_backend->size);
+
+	ret = mali_mem_resize(session, mem_backend, args->psize);
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args)
+{
+	args->memory_usage = _mali_ukk_report_memory_usage();
+	if (0 != args->vaddr) {
+		mali_mem_backend *mem_backend = NULL;
+		struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+		/* Get the backend that need to be modified. */
+		mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+		if (NULL == mem_backend) {
+			MALI_DEBUG_ASSERT_POINTER(mem_backend);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		if (MALI_MEM_COW == mem_backend->type)
+			args->change_pages_nr = mem_backend->cow_mem.change_pages_nr;
+	}
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/mali/linux/mali_memory_manager.h b/mali/linux/mali_memory_manager.h
new file mode 100644
index 0000000..caccd9e
--- /dev/null
+++ b/mali/linux/mali_memory_manager.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_MANAGER_H__
+#define __MALI_MEMORY_MANAGER_H__
+
+#include "mali_osk.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_allocation_manager {
+	rwlock_t vm_lock;
+	struct rb_root allocation_mgr_rb;
+	struct list_head head;
+	struct mutex list_mutex;
+	u32 mali_allocation_num;
+};
+
+extern struct idr mali_backend_idr;
+extern struct mutex mali_idr_mutex;
+
+int mali_memory_manager_init(struct mali_allocation_manager *mgr);
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr);
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc);
+
+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size);
+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address);
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args);
+
+
+#endif
+
diff --git a/mali/linux/mali_memory_os_alloc.c b/mali/linux/mali_memory_os_alloc.c
new file mode 100755
index 0000000..cf037aa
--- /dev/null
+++ b/mali/linux/mali_memory_os_alloc.c
@@ -0,0 +1,844 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+
+#include "mali_osk.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_kernel_linux.h"
+
+/* Minimum size of allocator page pool */
+#define MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB * 256)
+#define MALI_OS_MEMORY_POOL_TRIM_JIFFIES (10 * CONFIG_HZ) /* Default to 10s */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+static unsigned long dma_attrs_wc= 0;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+/* Write combine dma_attrs */
+static DEFINE_DMA_ATTRS(dma_attrs_wc);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask);
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask);
+#endif
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc);
+#endif
+#endif
+static void mali_mem_os_trim_pool(struct work_struct *work);
+extern void show_mem(unsigned int flags);
+
+struct mali_mem_os_allocator mali_mem_os_allocator = {
+	.pool_lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+	.pool_pages = LIST_HEAD_INIT(mali_mem_os_allocator.pool_pages),
+	.pool_count = 0,
+
+	.allocated_pages = ATOMIC_INIT(0),
+	.allocation_limit = 0,
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	.shrinker.shrink = mali_mem_os_shrink,
+#else
+	.shrinker.count_objects = mali_mem_os_shrink_count,
+	.shrinker.scan_objects = mali_mem_os_shrink,
+#endif
+	.shrinker.seeks = DEFAULT_SEEKS,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool, TIMER_DEFERRABLE),
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
+	.timed_shrinker = __DEFERRED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#else
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#endif
+};
+
+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag)
+{
+	LIST_HEAD(pages);
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	if (MALI_TRUE == cow_flag) {
+		list_for_each_entry_safe(m_page, m_tmp, os_pages, list) {
+			/*only handle OS node here */
+			if (m_page->type == MALI_PAGE_NODE_OS) {
+				if (1 == _mali_page_node_get_ref_count(m_page)) {
+					list_move(&m_page->list, &pages);
+					atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+					free_pages_nr ++;
+				} else {
+					_mali_page_node_unref(m_page);
+					m_page->page = NULL;
+					list_del(&m_page->list);
+					kfree(m_page);
+				}
+			}
+		}
+	} else {
+		list_cut_position(&pages, os_pages, os_pages->prev);
+		atomic_sub(pages_count, &mali_mem_os_allocator.allocated_pages);
+		free_pages_nr = pages_count;
+	}
+
+	/* Put pages on pool. */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_splice(&pages, &mali_mem_os_allocator.pool_pages);
+	mali_mem_os_allocator.pool_count += free_pages_nr;
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(5, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+	return free_pages_nr;
+}
+
+/**
+* put page without put it into page pool
+*/
+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page)
+{
+	MALI_DEBUG_ASSERT_POINTER(page);
+	if (1 == page_count(page)) {
+		atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+		dma_unmap_page(&mali_platform_device->dev, page_private(page),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+		ClearPagePrivate(page);
+	}
+	put_page(page);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_from);
+	MALI_DEBUG_ASSERT_POINTER(mem_to);
+
+	if (mem_from->count < start_page + page_count) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	list_for_each_entry_safe(m_page, m_tmp, &mem_from->pages, list) {
+		if (i >= start_page && i < start_page + page_count) {
+			list_move_tail(&m_page->list, &mem_to->pages);
+			mem_from->count--;
+			mem_to->count++;
+		}
+		i++;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size)
+{
+	struct page *new_page;
+	LIST_HEAD(pages_list);
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	size_t remaining = page_count;
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&os_mem->pages);
+	os_mem->count = page_count;
+
+	/* Grab pages from pool. */
+	{
+		size_t pool_pages;
+		spin_lock(&mali_mem_os_allocator.pool_lock);
+		pool_pages = min(remaining, mali_mem_os_allocator.pool_count);
+		for (i = pool_pages; i > 0; i--) {
+			BUG_ON(list_empty(&mali_mem_os_allocator.pool_pages));
+			list_move(mali_mem_os_allocator.pool_pages.next, &pages_list);
+		}
+		mali_mem_os_allocator.pool_count -= pool_pages;
+		remaining -= pool_pages;
+		spin_unlock(&mali_mem_os_allocator.pool_lock);
+	}
+
+	/* Process pages from pool. */
+	i = 0;
+	list_for_each_entry_safe(m_page, m_tmp, &pages_list, list) {
+		BUG_ON(NULL == m_page);
+
+		list_move_tail(&m_page->list, &os_mem->pages);
+	}
+
+	/* Allocate new pages, if needed. */
+	for (i = 0; i < remaining; i++) {
+		dma_addr_t dma_addr;
+		gfp_t flags = __GFP_ZERO | __GFP_REPEAT | __GFP_NOWARN | __GFP_COLD;
+		int err;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+		flags |= GFP_HIGHUSER;
+#else
+#ifdef CONFIG_ZONE_DMA32
+		flags |= GFP_DMA32;
+#else
+#ifdef CONFIG_ZONE_DMA
+		flags |= GFP_DMA;
+#else
+		/* arm64 utgard only work on < 4G, but the kernel
+		 * didn't provide method to allocte memory < 4G
+		 */
+		MALI_DEBUG_ASSERT(0);
+#endif
+#endif
+#endif
+
+		new_page = alloc_page(flags);
+
+		if (unlikely(NULL == new_page)) {
+			/* Calculate the number of pages actually allocated, and free them. */
+#ifdef AML_MALI_DEBUG
+			MALI_PRINT_ERROR(("alloc_page() return NULL\n"));
+			show_mem(SHOW_MEM_FILTER_NODES);
+#endif
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -ENOMEM;
+		}
+
+		/* Ensure page is flushed from CPU caches. */
+		dma_addr = dma_map_page(&mali_platform_device->dev, new_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_unmap_page(&mali_platform_device->dev, dma_addr,
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_addr = dma_map_page(&mali_platform_device->dev, new_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		err = dma_mapping_error(&mali_platform_device->dev, dma_addr);
+		if (unlikely(err)) {
+			MALI_DEBUG_PRINT_ERROR(("OS Mem: Failed to DMA map page %p: %u",
+						new_page, err));
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -EFAULT;
+		}
+
+		/* Store page phys addr */
+		SetPagePrivate(new_page);
+		set_page_private(new_page, dma_addr);
+
+		m_page = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+		if (unlikely(NULL == m_page)) {
+#ifdef AML_MALI_DEBUG
+			show_mem(SHOW_MEM_FILTER_NODES);
+#endif
+			MALI_PRINT_ERROR(("OS Mem: Can't allocate mali_page node! \n"));
+			dma_unmap_page(&mali_platform_device->dev, page_private(new_page),
+				       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+			ClearPagePrivate(new_page);
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -EFAULT;
+		}
+		m_page->page = new_page;
+
+		list_add_tail(&m_page->list, &os_mem->pages);
+	}
+
+	atomic_add(page_count, &mali_mem_os_allocator.allocated_pages);
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Stopping pool trim timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	u32 virt;
+	u32 prop = props;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	MALI_DEBUG_ASSERT(start_page <= os_mem->count);
+	MALI_DEBUG_ASSERT((start_page + mapping_pgae_num) <= os_mem->count);
+
+	if ((start_page + mapping_pgae_num) == os_mem->count) {
+
+		virt = vaddr + MALI_MMU_PAGE_SIZE * (start_page + mapping_pgae_num);
+
+		list_for_each_entry_reverse(m_page, &os_mem->pages, list) {
+
+			virt -= MALI_MMU_PAGE_SIZE;
+			if (mapping_pgae_num > 0) {
+				dma_addr_t phys = page_private(m_page->page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+				/* Verify that the "physical" address is 32-bit and
+				* usable for Mali, when on a system with bus addresses
+				* wider than 32-bit. */
+				MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+				mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+			} else {
+				break;
+			}
+			mapping_pgae_num--;
+		}
+
+	} else {
+		u32 i = 0;
+		virt = vaddr;
+		list_for_each_entry(m_page, &os_mem->pages, list) {
+
+			if (i >= start_page) {
+				dma_addr_t phys = page_private(m_page->page);
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+				/* Verify that the "physical" address is 32-bit and
+				* usable for Mali, when on a system with bus addresses
+				* wider than 32-bit. */
+				MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+				mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+			}
+			i++;
+			virt += MALI_MMU_PAGE_SIZE;
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	mali_mem_os_mem *os_mem = &mem_bkend->os_mem;
+	struct mali_page_node *m_page;
+	struct page *page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type);
+
+	list_for_each_entry(m_page, &os_mem->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		page = m_page->page;
+		ret = vm_insert_pfn(vma, addr, page_to_pfn(page));
+
+		if (unlikely(0 != ret)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size)
+{
+	mali_mem_os_mem *os_mem = &mem_bkend->os_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	int offset;
+	int mapping_page_num;
+	int count ;
+
+	unsigned long vstart = vma->vm_start;
+	count = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+	MALI_DEBUG_ASSERT(0 == start_vaddr % _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE);
+	offset = (start_vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT(offset <= os_mem->count);
+	mapping_page_num = mappig_size / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT((offset + mapping_page_num) <= os_mem->count);
+
+	if ((offset + mapping_page_num) == os_mem->count) {
+
+		unsigned long vm_end = start_vaddr + mappig_size;
+
+		list_for_each_entry_reverse(m_page, &os_mem->pages, list) {
+
+			vm_end -= _MALI_OSK_MALI_PAGE_SIZE;
+			if (mapping_page_num > 0) {
+				ret = vm_insert_pfn(vma, vm_end, page_to_pfn(m_page->page));
+
+				if (unlikely(0 != ret)) {
+					/*will return -EBUSY If the page has already been mapped into table, but it's OK*/
+					if (-EBUSY == ret) {
+						break;
+					} else {
+						MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, offset is %d,page_count is %d\n",
+								     ret,  offset + mapping_page_num, os_mem->count));
+					}
+					return _MALI_OSK_ERR_FAULT;
+				}
+			} else {
+				break;
+			}
+			mapping_page_num--;
+
+		}
+	} else {
+
+		list_for_each_entry(m_page, &os_mem->pages, list) {
+			if (count >= offset) {
+
+				ret = vm_insert_pfn(vma, vstart, page_to_pfn(m_page->page));
+
+				if (unlikely(0 != ret)) {
+					/*will return -EBUSY If the page has already been mapped into table, but it's OK*/
+					if (-EBUSY == ret) {
+						break;
+					} else {
+						MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, count is %d, offset is %d,page_count is %d\n",
+								     ret, count, offset, os_mem->count));
+					}
+					return _MALI_OSK_ERR_FAULT;
+				}
+			}
+			count++;
+			vstart += _MALI_OSK_MALI_PAGE_SIZE;
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 mali_mem_os_release(mali_mem_backend *mem_bkend)
+{
+
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type);
+
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_os_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	/* Free pages */
+	if (MALI_MEM_BACKEND_FLAG_COWED & mem_bkend->flags) {
+		/* Lock to avoid the free race condition for the cow shared memory page node. */
+		_mali_osk_mutex_wait(session->cow_lock);
+		free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_TRUE);
+		_mali_osk_mutex_signal(session->cow_lock);
+	} else {
+		free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_FALSE);
+	}
+	mutex_unlock(&mem_bkend->mutex);
+
+	MALI_DEBUG_PRINT(4, ("OS Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->os_mem.count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+
+	mem_bkend->os_mem.count = 0;
+	return free_pages_nr;
+}
+
+
+#define MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE 128
+static struct {
+	struct {
+		mali_dma_addr phys;
+		mali_io_address mapping;
+	} page[MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE];
+	size_t count;
+	spinlock_t lock;
+} mali_mem_page_table_page_pool = {
+	.count = 0,
+	.lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+};
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_NOMEM;
+	dma_addr_t tmp_phys;
+
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (0 < mali_mem_page_table_page_pool.count) {
+		u32 i = --mali_mem_page_table_page_pool.count;
+		*phys = mali_mem_page_table_page_pool.page[i].phys;
+		*mapping = mali_mem_page_table_page_pool.page[i].mapping;
+
+		ret = _MALI_OSK_ERR_OK;
+	}
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	if (_MALI_OSK_ERR_OK != ret) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, dma_attrs_wc);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, &dma_attrs_wc);
+#else
+		*mapping = dma_alloc_writecombine(&mali_platform_device->dev,
+						  _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys, GFP_KERNEL);
+#endif
+		if (NULL != *mapping) {
+			ret = _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			/* Verify that the "physical" address is 32-bit and
+			 * usable for Mali, when on a system with bus addresses
+			 * wider than 32-bit. */
+			MALI_DEBUG_ASSERT(0 == (tmp_phys >> 32));
+#endif
+
+			*phys = (mali_dma_addr)tmp_phys;
+		}
+	}
+#ifdef AML_MALI_DEBUG
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_PRINT_ERROR(("dma_alloc_attrs() return NULL\n"));
+			show_mem(SHOW_MEM_FILTER_NODES);
+		}
+#endif
+
+	return ret;
+}
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt)
+{
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE > mali_mem_page_table_page_pool.count) {
+		u32 i = mali_mem_page_table_page_pool.count;
+		mali_mem_page_table_page_pool.page[i].phys = phys;
+		mali_mem_page_table_page_pool.page[i].mapping = virt;
+
+		++mali_mem_page_table_page_pool.count;
+
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+	} else {
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			     _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			     dma_attrs_wc);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			       _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			       &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE, virt, phys);
+#endif
+	}
+}
+
+void mali_mem_os_free_page_node(struct mali_page_node *m_page)
+{
+	struct page *page = m_page->page;
+	MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_OS);
+
+	if (1  == page_count(page)) {
+		dma_unmap_page(&mali_platform_device->dev, page_private(page),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+		ClearPagePrivate(page);
+	}
+	__free_page(page);
+	m_page->page = NULL;
+	list_del(&m_page->list);
+	kfree(m_page);
+}
+
+/* The maximum number of page table pool pages to free in one go. */
+#define MALI_MEM_OS_CHUNK_TO_FREE 64UL
+
+/* Free a certain number of pages from the page table page pool.
+ * The pool lock must be held when calling the function, and the lock will be
+ * released before returning.
+ */
+static void mali_mem_os_page_table_pool_free(size_t nr_to_free)
+{
+	mali_dma_addr phys_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	void *virt_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	u32 i;
+
+	MALI_DEBUG_ASSERT(nr_to_free <= MALI_MEM_OS_CHUNK_TO_FREE);
+
+	/* Remove nr_to_free pages from the pool and store them locally on stack. */
+	for (i = 0; i < nr_to_free; i++) {
+		u32 pool_index = mali_mem_page_table_page_pool.count - i - 1;
+
+		phys_arr[i] = mali_mem_page_table_page_pool.page[pool_index].phys;
+		virt_arr[i] = mali_mem_page_table_page_pool.page[pool_index].mapping;
+	}
+
+	mali_mem_page_table_page_pool.count -= nr_to_free;
+
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	/* After releasing the spinlock: free the pages we removed from the pool. */
+	for (i = 0; i < nr_to_free; i++) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], dma_attrs_wc);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE,
+				      virt_arr[i], (dma_addr_t)phys_arr[i]);
+#endif
+	}
+}
+
+static void mali_mem_os_trim_page_table_page_pool(void)
+{
+	size_t nr_to_free = 0;
+	size_t nr_to_keep;
+
+	/* Keep 2 page table pages for each 1024 pages in the page cache. */
+	nr_to_keep = mali_mem_os_allocator.pool_count / 512;
+	/* And a minimum of eight pages, to accomodate new sessions. */
+	nr_to_keep += 8;
+
+	if (0 == spin_trylock(&mali_mem_page_table_page_pool.lock)) return;
+
+	if (nr_to_keep < mali_mem_page_table_page_pool.count) {
+		nr_to_free = mali_mem_page_table_page_pool.count - nr_to_keep;
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, nr_to_free);
+	}
+
+	/* Pool lock will be released by the callee. */
+	mali_mem_os_page_table_pool_free(nr_to_free);
+}
+
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	return mali_mem_os_allocator.pool_count;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask)
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask)
+#endif /* Linux < 2.6.35 */
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#endif /* Linux < 3.12.0 */
+#endif /* Linux < 3.0.0 */
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unsigned long flags;
+	struct list_head *le, pages;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+	int nr = nr_to_scan;
+#else
+	int nr = sc->nr_to_scan;
+#endif
+
+	if (0 == nr) {
+		return mali_mem_os_shrink_count(shrinker, sc);
+	}
+
+	if (0 == spin_trylock_irqsave(&mali_mem_os_allocator.pool_lock, flags)) {
+		/* Not able to lock. */
+		return -1;
+	}
+
+	if (0 == mali_mem_os_allocator.pool_count) {
+		/* No pages availble */
+		spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+		return 0;
+	}
+
+	/* Release from general page pool */
+	nr = min((size_t)nr, mali_mem_os_allocator.pool_count);
+	mali_mem_os_allocator.pool_count -= nr;
+	list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+		--nr;
+		if (0 == nr) break;
+	}
+	list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page_node(m_page);
+	}
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) {
+		/* Pools are empty, stop timer */
+		MALI_DEBUG_PRINT(5, ("Stopping timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	return mali_mem_os_shrink_count(shrinker, sc);
+#else
+	return nr;
+#endif
+}
+
+static void mali_mem_os_trim_pool(struct work_struct *data)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	struct list_head *le;
+	LIST_HEAD(pages);
+	size_t nr_to_free;
+
+	MALI_IGNORE(data);
+
+	MALI_DEBUG_PRINT(3, ("OS Mem: Trimming pool %u\n", mali_mem_os_allocator.pool_count));
+
+	/* Release from general page pool */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		size_t count = mali_mem_os_allocator.pool_count - MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES;
+		const size_t min_to_free = min(64, MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES);
+
+		/* Free half the pages on the pool above the static limit. Or 64 pages, 256KB. */
+		nr_to_free = max(count / 2, min_to_free);
+
+		mali_mem_os_allocator.pool_count -= nr_to_free;
+		list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+			--nr_to_free;
+			if (0 == nr_to_free) break;
+		}
+		list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	}
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page_node(m_page);
+	}
+
+	/* Release some pages from page table page pool */
+	mali_mem_os_trim_page_table_page_pool();
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+}
+
+_mali_osk_errcode_t mali_mem_os_init(void)
+{
+	mali_mem_os_allocator.wq = alloc_workqueue("mali-mem", WQ_UNBOUND, 1);
+	if (NULL == mali_mem_os_allocator.wq) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+#if LINUX_VERSION_CODE >=  KERNEL_VERSION(4, 8, 0)
+	dma_attrs_wc = DMA_ATTR_WRITE_COMBINE;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &dma_attrs_wc);
+#endif
+
+	register_shrinker(&mali_mem_os_allocator.shrinker);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_os_term(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unregister_shrinker(&mali_mem_os_allocator.shrinker);
+	cancel_delayed_work_sync(&mali_mem_os_allocator.timed_shrinker);
+
+	if (NULL != mali_mem_os_allocator.wq) {
+		destroy_workqueue(mali_mem_os_allocator.wq);
+		mali_mem_os_allocator.wq = NULL;
+	}
+
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_for_each_entry_safe(m_page, m_tmp, &mali_mem_os_allocator.pool_pages, list) {
+		mali_mem_os_free_page_node(m_page);
+
+		--mali_mem_os_allocator.pool_count;
+	}
+	BUG_ON(mali_mem_os_allocator.pool_count);
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	/* Release from page table page pool */
+	do {
+		u32 nr_to_free;
+
+		spin_lock(&mali_mem_page_table_page_pool.lock);
+
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, mali_mem_page_table_page_pool.count);
+
+		/* Pool lock will be released by the callee. */
+		mali_mem_os_page_table_pool_free(nr_to_free);
+	} while (0 != mali_mem_page_table_page_pool.count);
+}
+
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size)
+{
+	mali_mem_os_allocator.allocation_limit = size;
+
+	MALI_SUCCESS;
+}
+
+u32 mali_mem_os_stat(void)
+{
+	return atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/mali/linux/mali_memory_os_alloc.h b/mali/linux/mali_memory_os_alloc.h
new file mode 100755
index 0000000..b92fffe
--- /dev/null
+++ b/mali/linux/mali_memory_os_alloc.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_OS_ALLOC_H__
+#define __MALI_MEMORY_OS_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_memory_types.h"
+
+
+/** @brief Release Mali OS memory
+ *
+ * The session memory_lock must be held when calling this function.
+ *
+ * @param mem_bkend Pointer to the mali_mem_backend to release
+ */
+u32 mali_mem_os_release(mali_mem_backend *mem_bkend);
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping);
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt);
+
+_mali_osk_errcode_t mali_mem_os_init(void);
+
+void mali_mem_os_term(void);
+
+u32 mali_mem_os_stat(void);
+
+void mali_mem_os_free_page_node(struct mali_page_node *m_page);
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size);
+
+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag);
+
+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page);
+
+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count);
+
+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props);
+
+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+
+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size);
+
+#endif /* __MALI_MEMORY_OS_ALLOC_H__ */
diff --git a/mali/linux/mali_memory_secure.c b/mali/linux/mali_memory_secure.c
new file mode 100644
index 0000000..7856ae6
--- /dev/null
+++ b/mali/linux/mali_memory_secure.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_memory.h"
+#include "mali_memory_secure.h"
+#include "mali_osk.h"
+#include <linux/mutex.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-buf.h>
+
+_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd)
+{
+	struct dma_buf *buf;
+	MALI_DEBUG_ASSERT_POINTER(secure_mem);
+
+	/* get dma buffer */
+	buf = dma_buf_get(mem_fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf!\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (size != buf->size) {
+		MALI_DEBUG_PRINT_ERROR(("The secure mem size not match to the dma buf size!\n"));
+		goto failed_alloc_mem;
+	}
+
+	secure_mem->buf =  buf;
+	secure_mem->attachment = dma_buf_attach(secure_mem->buf, &mali_platform_device->dev);
+	if (NULL == secure_mem->attachment) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf attachment!\n"));
+		goto failed_dma_attach;
+	}
+
+	secure_mem->sgt = dma_buf_map_attachment(secure_mem->attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(secure_mem->sgt)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to map dma buf attachment\n"));
+		goto  failed_dma_map;
+	}
+
+	secure_mem->count = size / MALI_MMU_PAGE_SIZE;
+
+	return _MALI_OSK_ERR_OK;
+
+failed_dma_map:
+	dma_buf_detach(secure_mem->buf, secure_mem->attachment);
+failed_dma_attach:
+failed_alloc_mem:
+	dma_buf_put(buf);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir;
+	struct scatterlist *sg;
+	u32 virt = vaddr;
+	u32 prop = props;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(secure_mem);
+	MALI_DEBUG_ASSERT_POINTER(secure_mem->sgt);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	pagedir = session->page_directory;
+
+	for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) {
+		u32 size = sg_dma_len(sg);
+		dma_addr_t phys = sg_dma_address(sg);
+
+		/* sg must be page aligned. */
+		MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE);
+		MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF));
+
+		mali_mmu_pagedir_update(pagedir, virt, phys, size, prop);
+
+		MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x gpu virtual address: 0x%x! \n", phys, virt));
+		virt += size;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+
+	int ret = 0;
+	struct scatterlist *sg;
+	mali_mem_secure *secure_mem = &mem_bkend->secure_mem;
+	unsigned long addr = vma->vm_start;
+	int i;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE);
+
+	for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) {
+		phys_addr_t phys;
+		dma_addr_t dev_addr;
+		u32 size, j;
+		dev_addr = sg_dma_address(sg);
+#if defined(CONFIG_ARM64) ||LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+		phys =  dma_to_phys(&mali_platform_device->dev, dev_addr);
+#else
+		phys = page_to_phys(pfn_to_page(dma_to_pfn(&mali_platform_device->dev, dev_addr)));
+#endif
+		size = sg_dma_len(sg);
+		MALI_DEBUG_ASSERT(0 == size % _MALI_OSK_MALI_PAGE_SIZE);
+
+		for (j = 0; j < size / _MALI_OSK_MALI_PAGE_SIZE; j++) {
+			ret = vm_insert_pfn(vma, addr, PFN_DOWN(phys));
+
+			if (unlikely(0 != ret)) {
+				return -EFAULT;
+			}
+			addr += _MALI_OSK_MALI_PAGE_SIZE;
+			phys += _MALI_OSK_MALI_PAGE_SIZE;
+
+			MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x , cpu virtual address: 0x%x! \n", phys, addr));
+		}
+	}
+	return ret;
+}
+
+u32 mali_mem_secure_release(mali_mem_backend *mem_bkend)
+{
+	struct mali_mem_secure *mem;
+	mali_mem_allocation *alloc = mem_bkend->mali_allocation;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE);
+
+	mem = &mem_bkend->secure_mem;
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_ASSERT_POINTER(mem->sgt);
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_secure_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL);
+	dma_buf_detach(mem->buf, mem->attachment);
+	dma_buf_put(mem->buf);
+	mutex_unlock(&mem_bkend->mutex);
+
+	free_pages_nr = mem->count;
+
+	return free_pages_nr;
+}
+
+
diff --git a/mali/linux/mali_memory_secure.h b/mali/linux/mali_memory_secure.h
new file mode 100644
index 0000000..cb85767
--- /dev/null
+++ b/mali/linux/mali_memory_secure.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2015-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_SECURE_H__
+#define __MALI_MEMORY_SECURE_H__
+
+#include "mali_session.h"
+#include "mali_memory.h"
+#include <linux/spinlock.h>
+
+#include "mali_memory_types.h"
+
+_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd);
+
+_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+
+void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+
+u32 mali_mem_secure_release(mali_mem_backend *mem_bkend);
+
+#endif /* __MALI_MEMORY_SECURE_H__ */
diff --git a/mali/linux/mali_memory_swap_alloc.c b/mali/linux/mali_memory_swap_alloc.c
new file mode 100644
index 0000000..132bad4
--- /dev/null
+++ b/mali/linux/mali_memory_swap_alloc.c
@@ -0,0 +1,950 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/idr.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+#include <linux/shmem_fs.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_memory.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_cow.h"
+#include "mali_ukk.h"
+#include "mali_kernel_utilization.h"
+#include "mali_memory_swap_alloc.h"
+
+
+static struct _mali_osk_bitmap idx_mgr;
+static struct file *global_swap_file;
+static struct address_space *global_swap_space;
+static _mali_osk_wq_work_t *mali_mem_swap_out_workq = NULL;
+static u32 mem_backend_swapped_pool_size;
+#ifdef MALI_MEM_SWAP_TRACKING
+static u32 mem_backend_swapped_unlock_size;
+#endif
+/* Lock order: mem_backend_swapped_pool_lock  > each memory backend's mutex lock.
+ * This lock used to protect mem_backend_swapped_pool_size and mem_backend_swapped_pool. */
+static struct mutex mem_backend_swapped_pool_lock;
+static struct list_head mem_backend_swapped_pool;
+
+extern struct mali_mem_os_allocator mali_mem_os_allocator;
+
+#define MALI_SWAP_LOW_MEM_DEFAULT_VALUE (60*1024*1024)
+#define MALI_SWAP_INVALIDATE_MALI_ADDRESS (0)               /* Used to mark the given memory cookie is invalidate. */
+#define MALI_SWAP_GLOBAL_SWAP_FILE_SIZE (0xFFFFFFFF)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_SHIFT)
+#else
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_CACHE_SHIFT)
+#endif
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE (1 << 15) /* Reserved for CoW nonlinear swap backend memory, the space size is 128MB. */
+
+unsigned int mali_mem_swap_out_threshold_value = MALI_SWAP_LOW_MEM_DEFAULT_VALUE;
+
+/**
+ * We have two situations to do shrinking things, one is we met low GPU utilization which shows GPU needn't touch too
+ * swappable backends in short time, and the other one is we add new swappable backends, the total pool size exceed
+ * the threshold value of the swapped pool size.
+ */
+typedef enum {
+	MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION = 100,
+	MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS = 257,
+} _mali_mem_swap_pool_shrink_type_t;
+
+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg);
+
+_mali_osk_errcode_t mali_mem_swap_init(void)
+{
+	gfp_t flags = __GFP_NORETRY | __GFP_NOWARN;
+
+	if (_MALI_OSK_ERR_OK != _mali_osk_bitmap_init(&idx_mgr, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE)) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	global_swap_file = shmem_file_setup("mali_swap", MALI_SWAP_GLOBAL_SWAP_FILE_SIZE, VM_NORESERVE);
+	if (IS_ERR(global_swap_file)) {
+		_mali_osk_bitmap_term(&idx_mgr);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	global_swap_space = global_swap_file->f_path.dentry->d_inode->i_mapping;
+
+	mali_mem_swap_out_workq = _mali_osk_wq_create_work(mali_mem_swap_swapped_bkend_pool_check_for_low_utilization, NULL);
+	if (NULL == mali_mem_swap_out_workq) {
+		_mali_osk_bitmap_term(&idx_mgr);
+		fput(global_swap_file);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+	flags |= GFP_HIGHUSER;
+#else
+#ifdef CONFIG_ZONE_DMA32
+	flags |= GFP_DMA32;
+#else
+#ifdef CONFIG_ZONE_DMA
+	flags |= GFP_DMA;
+#else
+	/* arm64 utgard only work on < 4G, but the kernel
+	 * didn't provide method to allocte memory < 4G
+	 */
+	MALI_DEBUG_ASSERT(0);
+#endif
+#endif
+#endif
+
+	/* When we use shmem_read_mapping_page to allocate/swap-in, it will
+	 * use these flags to allocate new page if need.*/
+	mapping_set_gfp_mask(global_swap_space, flags);
+
+	mem_backend_swapped_pool_size = 0;
+#ifdef MALI_MEM_SWAP_TRACKING
+	mem_backend_swapped_unlock_size = 0;
+#endif
+	mutex_init(&mem_backend_swapped_pool_lock);
+	INIT_LIST_HEAD(&mem_backend_swapped_pool);
+
+	MALI_DEBUG_PRINT(2, ("Mali SWAP: Swap out threshold vaule is %uM\n", mali_mem_swap_out_threshold_value >> 20));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_swap_term(void)
+{
+	_mali_osk_bitmap_term(&idx_mgr);
+
+	fput(global_swap_file);
+
+	_mali_osk_wq_delete_work(mali_mem_swap_out_workq);
+
+	MALI_DEBUG_ASSERT(list_empty(&mem_backend_swapped_pool));
+	MALI_DEBUG_ASSERT(0 == mem_backend_swapped_pool_size);
+
+	return;
+}
+
+struct file *mali_mem_swap_get_global_swap_file(void)
+{
+	return  global_swap_file;
+}
+
+/* Judge if swappable backend in swapped pool. */
+static mali_bool mali_memory_swap_backend_in_swapped_pool(mali_mem_backend *mem_bkend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+
+	return !list_empty(&mem_bkend->list);
+}
+
+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+
+	mutex_lock(&mem_backend_swapped_pool_lock);
+	mutex_lock(&mem_bkend->mutex);
+
+	if (MALI_FALSE == mali_memory_swap_backend_in_swapped_pool(mem_bkend)) {
+		mutex_unlock(&mem_bkend->mutex);
+		mutex_unlock(&mem_backend_swapped_pool_lock);
+		return;
+	}
+
+	MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list));
+
+	list_del_init(&mem_bkend->list);
+
+	mutex_unlock(&mem_bkend->mutex);
+
+	mem_backend_swapped_pool_size -= mem_bkend->size;
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+}
+
+static void mali_mem_swap_out_page_node(mali_page_node *page_node)
+{
+	MALI_DEBUG_ASSERT(page_node);
+
+	dma_unmap_page(&mali_platform_device->dev, page_node->swap_it->dma_addr,
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+	set_page_dirty(page_node->swap_it->page);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+	put_page(page_node->swap_it->page);
+#else
+	page_cache_release(page_node->swap_it->page);
+#endif
+}
+
+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend)
+{
+	mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex));
+
+	if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN)) {
+		return;
+	}
+
+	mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN;
+
+	list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+		mali_mem_swap_out_page_node(m_page);
+	}
+
+	return;
+}
+
+static void mali_mem_swap_unlock_partial_locked_mem_backend(mali_mem_backend *mem_bkend, mali_page_node *page_node)
+{
+	mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex));
+
+	list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+		if (m_page == page_node) {
+			break;
+		}
+		mali_mem_swap_out_page_node(m_page);
+	}
+}
+
+static void mali_mem_swap_swapped_bkend_pool_shrink(_mali_mem_swap_pool_shrink_type_t shrink_type)
+{
+	mali_mem_backend *bkend, *tmp_bkend;
+	long system_free_size;
+	u32 last_gpu_utilization, gpu_utilization_threshold_value, temp_swap_out_threshold_value;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_backend_swapped_pool_lock));
+
+	if (MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION == shrink_type) {
+		/**
+		 * When we met that system memory is very low and Mali locked swappable memory size is less than
+		 * threshold value, and at the same time, GPU load is very low and don't need high performance,
+		 * at this condition, we can unlock more swap memory backend from swapped backends pool.
+		 */
+		gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION;
+		temp_swap_out_threshold_value = (mali_mem_swap_out_threshold_value >> 2);
+	} else {
+		/* When we add swappable memory backends to swapped pool, we need to think that we couldn't
+		* hold too much swappable backends in Mali driver, and also we need considering performance.
+		* So there is a balance for swapping out memory backend, we should follow the following conditions:
+		* 1. Total memory size in global mem backend swapped pool is more than the defined threshold value.
+		* 2. System level free memory size is less than the defined threshold value.
+		* 3. Please note that GPU utilization problem isn't considered in this condition.
+		*/
+		gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS;
+		temp_swap_out_threshold_value = mali_mem_swap_out_threshold_value;
+	}
+
+	/* Get system free pages number. */
+	system_free_size = global_page_state(NR_FREE_PAGES) * PAGE_SIZE;
+	last_gpu_utilization = _mali_ukk_utilization_gp_pp();
+
+	if ((last_gpu_utilization < gpu_utilization_threshold_value)
+	    && (system_free_size < mali_mem_swap_out_threshold_value)
+	    && (mem_backend_swapped_pool_size > temp_swap_out_threshold_value)) {
+		list_for_each_entry_safe(bkend, tmp_bkend, &mem_backend_swapped_pool, list) {
+			if (mem_backend_swapped_pool_size <= temp_swap_out_threshold_value) {
+				break;
+			}
+
+			mutex_lock(&bkend->mutex);
+
+			/* check if backend is in use. */
+			if (0 < bkend->using_count) {
+				mutex_unlock(&bkend->mutex);
+				continue;
+			}
+
+			mali_mem_swap_unlock_single_mem_backend(bkend);
+			list_del_init(&bkend->list);
+			mem_backend_swapped_pool_size -= bkend->size;
+#ifdef MALI_MEM_SWAP_TRACKING
+			mem_backend_swapped_unlock_size += bkend->size;
+#endif
+			mutex_unlock(&bkend->mutex);
+		}
+	}
+
+	return;
+}
+
+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	mutex_lock(&mem_backend_swapped_pool_lock);
+
+	mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION);
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+}
+
+/**
+ * After PP job finished, we add all of swappable memory backend used by this PP
+ * job to the tail of the global swapped pool, and if the total size of swappable memory is more than threshold
+ * value, we also need to shrink the swapped pool start from the head of the list.
+ */
+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend)
+{
+	mutex_lock(&mem_backend_swapped_pool_lock);
+	mutex_lock(&mem_bkend->mutex);
+
+	if (mali_memory_swap_backend_in_swapped_pool(mem_bkend)) {
+		MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list));
+
+		list_del_init(&mem_bkend->list);
+		list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool);
+		mutex_unlock(&mem_bkend->mutex);
+		mutex_unlock(&mem_backend_swapped_pool_lock);
+		return;
+	}
+
+	list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool);
+
+	mutex_unlock(&mem_bkend->mutex);
+	mem_backend_swapped_pool_size += mem_bkend->size;
+
+	mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS);
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+	return;
+}
+
+
+u32 mali_mem_swap_idx_alloc(void)
+{
+	return _mali_osk_bitmap_alloc(&idx_mgr);
+}
+
+void mali_mem_swap_idx_free(u32 idx)
+{
+	_mali_osk_bitmap_free(&idx_mgr, idx);
+}
+
+static u32 mali_mem_swap_idx_range_alloc(u32 count)
+{
+	u32 index;
+
+	index = _mali_osk_bitmap_alloc_range(&idx_mgr, count);
+
+	return index;
+}
+
+static void mali_mem_swap_idx_range_free(u32 idx, int num)
+{
+	_mali_osk_bitmap_free_range(&idx_mgr, idx, num);
+}
+
+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void)
+{
+	mali_swap_item *swap_item;
+
+	swap_item = kzalloc(sizeof(mali_swap_item), GFP_KERNEL);
+
+	if (NULL == swap_item) {
+		return NULL;
+	}
+
+	atomic_set(&swap_item->ref_count, 1);
+	swap_item->page = NULL;
+	atomic_add(1, &mali_mem_os_allocator.allocated_pages);
+
+	return swap_item;
+}
+
+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item)
+{
+	struct inode *file_node;
+	long long start, end;
+
+	/* If this swap item is shared, we just reduce the reference counter. */
+	if (0 == atomic_dec_return(&swap_item->ref_count)) {
+		file_node = global_swap_file->f_path.dentry->d_inode;
+		start = swap_item->idx;
+		start = start << 12;
+		end = start + PAGE_SIZE;
+
+		shmem_truncate_range(file_node, start, (end - 1));
+
+		mali_mem_swap_idx_free(swap_item->idx);
+
+		atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+
+		kfree(swap_item);
+	}
+}
+
+/* Used to allocate new swap item for new memory allocation and cow page for write. */
+struct mali_page_node *_mali_mem_swap_page_node_allocate(void)
+{
+	struct mali_page_node *m_page;
+
+	m_page = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP);
+
+	if (NULL == m_page) {
+		return NULL;
+	}
+
+	m_page->swap_it = mali_mem_swap_alloc_swap_item();
+
+	if (NULL == m_page->swap_it) {
+		kfree(m_page);
+		return NULL;
+	}
+
+	return m_page;
+}
+
+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page)
+{
+
+	mali_mem_swap_free_swap_item(m_page->swap_it);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page)
+{
+	_mali_mem_swap_put_page_node(m_page);
+
+	kfree(m_page);
+
+	return;
+}
+
+u32 mali_mem_swap_free(mali_mem_swap *swap_mem)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(swap_mem);
+
+	list_for_each_entry_safe(m_page, m_tmp, &swap_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP);
+
+		/* free the page node and release the swap item, if the ref count is 1,
+		 * then need also free the swap item. */
+		list_del(&m_page->list);
+		if (1 == _mali_page_node_get_ref_count(m_page)) {
+			free_pages_nr++;
+		}
+
+		_mali_mem_swap_page_node_free(m_page);
+	}
+
+	return free_pages_nr;
+}
+
+static u32 mali_mem_swap_cow_free(mali_mem_cow *cow_mem)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(cow_mem);
+
+	list_for_each_entry_safe(m_page, m_tmp, &cow_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP);
+
+		/* free the page node and release the swap item, if the ref count is 1,
+		 * then need also free the swap item. */
+		list_del(&m_page->list);
+		if (1 == _mali_page_node_get_ref_count(m_page)) {
+			free_pages_nr++;
+		}
+
+		_mali_mem_swap_page_node_free(m_page);
+	}
+
+	return free_pages_nr;
+}
+
+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped)
+{
+	mali_mem_allocation *alloc;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	if (is_mali_mapped) {
+		mali_mem_swap_mali_unmap(alloc);
+	}
+
+	mali_memory_swap_list_backend_delete(mem_bkend);
+
+	mutex_lock(&mem_bkend->mutex);
+	/* To make sure the given memory backend was unlocked from Mali side,
+	 * and then free this memory block. */
+	mali_mem_swap_unlock_single_mem_backend(mem_bkend);
+	mutex_unlock(&mem_bkend->mutex);
+
+	if (MALI_MEM_SWAP == mem_bkend->type) {
+		free_pages_nr = mali_mem_swap_free(&mem_bkend->swap_mem);
+	} else {
+		free_pages_nr = mali_mem_swap_cow_free(&mem_bkend->cow_mem);
+	}
+
+	return free_pages_nr;
+}
+
+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node)
+{
+	MALI_DEBUG_ASSERT(NULL != page_node);
+
+	page_node->swap_it->page = shmem_read_mapping_page(global_swap_space, page_node->swap_it->idx);
+
+	if (IS_ERR(page_node->swap_it->page)) {
+		MALI_DEBUG_PRINT_ERROR(("SWAP Mem: failed to swap in page with index: %d.\n", page_node->swap_it->idx));
+		return MALI_FALSE;
+	}
+
+	/* Ensure page is flushed from CPU caches. */
+	page_node->swap_it->dma_addr = dma_map_page(&mali_platform_device->dev, page_node->swap_it->page,
+				       0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	return MALI_TRUE;
+}
+
+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx)
+{
+	size_t page_count = PAGE_ALIGN(size) / PAGE_SIZE;
+	struct mali_page_node *m_page;
+	long system_free_size;
+	u32 i, index;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT(NULL != swap_mem);
+	MALI_DEBUG_ASSERT(NULL != bkend_idx);
+	MALI_DEBUG_ASSERT(page_count <= MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	INIT_LIST_HEAD(&swap_mem->pages);
+	swap_mem->count = page_count;
+	index = mali_mem_swap_idx_range_alloc(page_count);
+
+	if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == index) {
+		MALI_PRINT_ERROR(("Mali Swap: Failed to allocate continuous index for swappable Mali memory."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	for (i = 0; i < page_count; i++) {
+		m_page = _mali_mem_swap_page_node_allocate();
+
+		if (NULL == m_page) {
+			MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Failed to allocate mali page node."));
+			swap_mem->count = i;
+
+			mali_mem_swap_free(swap_mem);
+			mali_mem_swap_idx_range_free(index + i, page_count - i);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		m_page->swap_it->idx = index + i;
+
+		ret = mali_mem_swap_in_page_node(m_page);
+
+		if (MALI_FALSE == ret) {
+			MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Allocate new page from SHMEM file failed."));
+			_mali_mem_swap_page_node_free(m_page);
+			mali_mem_swap_idx_range_free(index + i + 1, page_count - i - 1);
+
+			swap_mem->count = i;
+			mali_mem_swap_free(swap_mem);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+
+		list_add_tail(&m_page->list, &swap_mem->pages);
+	}
+
+	system_free_size = global_page_state(NR_FREE_PAGES) * PAGE_SIZE;
+
+	if ((system_free_size < mali_mem_swap_out_threshold_value)
+	    && (mem_backend_swapped_pool_size > (mali_mem_swap_out_threshold_value >> 2))
+	    && mali_utilization_enabled()) {
+		_mali_osk_wq_schedule_work(mali_mem_swap_out_workq);
+	}
+
+	*bkend_idx = index;
+	return 0;
+}
+
+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+/* Insert these pages from shmem to mali page table*/
+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &swap_mem->pages, list) {
+		MALI_DEBUG_ASSERT(NULL != m_page->swap_it->page);
+		phys = m_page->swap_it->dma_addr;
+
+		mali_mmu_pagedir_update(pagedir, virt, phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_in_pages(struct mali_pp_job *job)
+{
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	struct mali_page_node *m_page;
+	mali_bool swap_in_success = MALI_TRUE;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		if (NULL == mali_vma_node) {
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			swap_in_success = MALI_FALSE;
+			MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr));
+			continue;
+		}
+
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+
+		if (MALI_MEM_SWAP != mali_alloc->type &&
+		    MALI_MEM_COW != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on GPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		/* We neednot hold backend's lock here, race safe.*/
+		if ((MALI_MEM_COW == mem_bkend->type) &&
+		    (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+			continue;
+		}
+
+		mutex_lock(&mem_bkend->mutex);
+
+		/* When swap_in_success is MALI_FALSE, it means this job has memory backend that could not be swapped in,
+		 * and it will be aborted in mali scheduler, so here, we just mark those memory cookies which
+		 * should not be swapped out when delete job to invalide */
+		if (MALI_FALSE == swap_in_success) {
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+		/* Before swap in, checking if this memory backend has been swapped in by the latest flushed jobs. */
+		++mem_bkend->using_count;
+
+		if (1 < mem_bkend->using_count) {
+			MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags));
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+		if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)) {
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+
+		list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+			if (MALI_FALSE == mali_mem_swap_in_page_node(m_page)) {
+				/* Don't have enough memory to swap in page, so release pages have already been swapped
+				 * in and then mark this pp job to be fail. */
+				mali_mem_swap_unlock_partial_locked_mem_backend(mem_bkend, m_page);
+				swap_in_success = MALI_FALSE;
+				break;
+			}
+		}
+
+		if (swap_in_success) {
+#ifdef MALI_MEM_SWAP_TRACKING
+			mem_backend_swapped_unlock_size -= mem_bkend->size;
+#endif
+			_mali_osk_mutex_wait(session->memory_lock);
+			mali_mem_swap_mali_map(&mem_bkend->swap_mem, session, mali_alloc->mali_mapping.addr, mali_alloc->mali_mapping.properties);
+			_mali_osk_mutex_signal(session->memory_lock);
+
+			/* Remove the unlock flag from mem backend flags, mark this backend has been swapped in. */
+			mem_bkend->flags &= ~(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN);
+			mutex_unlock(&mem_bkend->mutex);
+		} else {
+			--mem_bkend->using_count;
+			/* Marking that this backend is not swapped in, need not to be processed anymore. */
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			mutex_unlock(&mem_bkend->mutex);
+		}
+	}
+
+	job->swap_status = swap_in_success ? MALI_SWAP_IN_SUCC : MALI_SWAP_IN_FAIL;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_out_pages(struct mali_pp_job *job)
+{
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		if (MALI_SWAP_INVALIDATE_MALI_ADDRESS == mali_addr) {
+			continue;
+		}
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+
+		if (NULL == mali_vma_node) {
+			MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr));
+			continue;
+		}
+
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+
+		if (MALI_MEM_SWAP != mali_alloc->type &&
+		    MALI_MEM_COW != mali_alloc->type) {
+			continue;
+		}
+
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		/* We neednot hold backend's lock here, race safe.*/
+		if ((MALI_MEM_COW == mem_bkend->type) &&
+		    (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+			continue;
+		}
+
+		mutex_lock(&mem_bkend->mutex);
+
+		MALI_DEBUG_ASSERT(0 < mem_bkend->using_count);
+
+		/* Reducing the using_count of mem backend means less pp job are using this memory backend,
+		 * if this count get to zero, it means no pp job is using it now, could put it to swap out list. */
+		--mem_bkend->using_count;
+
+		if (0 < mem_bkend->using_count) {
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+		mutex_unlock(&mem_bkend->mutex);
+
+		mali_memory_swap_list_backend_add(mem_bkend);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep)
+{
+	struct mali_page_node *m_page, *found_node = NULL;
+	struct page *found_page;
+	mali_mem_swap *swap = NULL;
+	mali_mem_cow *cow = NULL;
+	dma_addr_t dma_addr;
+	u32 i = 0;
+
+	if (MALI_MEM_SWAP == mem_bkend->type) {
+		swap = &mem_bkend->swap_mem;
+		list_for_each_entry(m_page, &swap->pages, list) {
+			if (i == offset) {
+				found_node = m_page;
+				break;
+			}
+			i++;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+		MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags));
+
+		cow = &mem_bkend->cow_mem;
+		list_for_each_entry(m_page, &cow->pages, list) {
+			if (i == offset) {
+				found_node = m_page;
+				break;
+			}
+			i++;
+		}
+	}
+
+	if (NULL == found_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	found_page = shmem_read_mapping_page(global_swap_space, found_node->swap_it->idx);
+
+	if (!IS_ERR(found_page)) {
+		lock_page(found_page);
+		dma_addr = dma_map_page(&mali_platform_device->dev, found_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		dma_unmap_page(&mali_platform_device->dev, dma_addr,
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+		*pagep = found_page;
+	} else {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep)
+{
+	struct mali_page_node *m_page, *found_node = NULL, *new_node = NULL;
+	mali_mem_cow *cow = NULL;
+	u32 i = 0;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED));
+	MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags));
+	MALI_DEBUG_ASSERT(!mali_memory_swap_backend_in_swapped_pool(mem_bkend));
+
+	cow = &mem_bkend->cow_mem;
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if (i == offset) {
+			found_node = m_page;
+			break;
+		}
+		i++;
+	}
+
+	if (NULL == found_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	new_node = _mali_mem_swap_page_node_allocate();
+
+	if (NULL == new_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	new_node->swap_it->idx = mali_mem_swap_idx_alloc();
+
+	if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == new_node->swap_it->idx) {
+		MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW on demand.\n"));
+		kfree(new_node->swap_it);
+		kfree(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (MALI_FALSE == mali_mem_swap_in_page_node(new_node)) {
+		_mali_mem_swap_page_node_free(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* swap in found node for copy in kernel. */
+	if (MALI_FALSE == mali_mem_swap_in_page_node(found_node)) {
+		mali_mem_swap_out_page_node(new_node);
+		_mali_mem_swap_page_node_free(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_mem_cow_copy_page(found_node, new_node);
+
+	list_replace(&found_node->list, &new_node->list);
+
+	if (1 != _mali_page_node_get_ref_count(found_node)) {
+		atomic_add(1, &mem_bkend->mali_allocation->session->mali_mem_allocated_pages);
+		if (atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > mem_bkend->mali_allocation->session->max_mali_mem_allocated_size) {
+			mem_bkend->mali_allocation->session->max_mali_mem_allocated_size = atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		mem_bkend->cow_mem.change_pages_nr++;
+	}
+
+	mali_mem_swap_out_page_node(found_node);
+	_mali_mem_swap_page_node_free(found_node);
+
+	/* When swap in the new page node, we have called dma_map_page for this page.\n */
+	dma_unmap_page(&mali_platform_device->dev, new_node->swap_it->dma_addr,
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	lock_page(new_node->swap_it->page);
+
+	*pagep = new_node->swap_it->page;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+#ifdef MALI_MEM_SWAP_TRACKING
+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size)
+{
+	*swap_pool_size = mem_backend_swapped_pool_size;
+	*unlock_size =  mem_backend_swapped_unlock_size;
+}
+#endif
diff --git a/mali/linux/mali_memory_swap_alloc.h b/mali/linux/mali_memory_swap_alloc.h
new file mode 100644
index 0000000..3624d71
--- /dev/null
+++ b/mali/linux/mali_memory_swap_alloc.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_SWAP_ALLOC_H__
+#define __MALI_MEMORY_SWAP_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include "mali_memory_types.h"
+#include "mali_pp_job.h"
+
+/**
+ * Initialize memory swapping module.
+ */
+_mali_osk_errcode_t mali_mem_swap_init(void);
+
+void mali_mem_swap_term(void);
+
+/**
+ * Return global share memory file to other modules.
+ */
+struct file *mali_mem_swap_get_global_swap_file(void);
+
+/**
+ * Unlock the given memory backend and pages in it could be swapped out by kernel.
+ */
+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend);
+
+/**
+ * Remove the given memory backend from global swap list.
+ */
+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend);
+
+/**
+ * Add the given memory backend to global swap list.
+ */
+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend);
+
+/**
+ * Allocate 1 index from bitmap used as page index in global swap file.
+ */
+u32 mali_mem_swap_idx_alloc(void);
+
+void mali_mem_swap_idx_free(u32 idx);
+
+/**
+ * Allocate a new swap item without page index.
+ */
+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void);
+
+/**
+ * Free a swap item, truncate the corresponding space in page cache and free index of page.
+ */
+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item);
+
+/**
+ * Allocate a page node with swap item.
+ */
+struct mali_page_node *_mali_mem_swap_page_node_allocate(void);
+
+/**
+ * Reduce the reference count of given page node and if return 0, just free this page node.
+ */
+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page);
+
+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page);
+
+/**
+ * Free a swappable memory backend.
+ */
+u32 mali_mem_swap_free(mali_mem_swap *swap_mem);
+
+/**
+ * Ummap and free.
+ */
+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped);
+
+/**
+ * Read in a page from global swap file with the pre-allcated page index.
+ */
+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node);
+
+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx);
+
+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+
+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc);
+
+/**
+ * When pp job created, we need swap in all of memory backend needed by this pp job.
+ */
+int mali_mem_swap_in_pages(struct mali_pp_job *job);
+
+/**
+ * Put all of memory backends used this pp job to the global swap list.
+ */
+int mali_mem_swap_out_pages(struct mali_pp_job *job);
+
+/**
+ * This will be called in page fault to process CPU read&write.
+ */
+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep) ;
+
+/**
+ * Used to process cow on demand for swappable memory backend.
+ */
+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep);
+
+#ifdef MALI_MEM_SWAP_TRACKING
+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size);
+#endif
+#endif /* __MALI_MEMORY_SWAP_ALLOC_H__ */
+
diff --git a/mali/linux/mali_memory_types.h b/mali/linux/mali_memory_types.h
new file mode 100755
index 0000000..60cd9f4
--- /dev/null
+++ b/mali/linux/mali_memory_types.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_TYPES_H__
+#define __MALI_MEMORY_TYPES_H__
+
+#include <linux/mm.h>
+
+#if defined(CONFIG_MALI400_UMP)
+#include "ump_kernel_interface.h"
+#endif
+
+typedef u32 mali_address_t;
+
+typedef enum mali_mem_type {
+	MALI_MEM_OS,
+	MALI_MEM_EXTERNAL,
+	MALI_MEM_SWAP,
+	MALI_MEM_DMA_BUF,
+	MALI_MEM_UMP,
+	MALI_MEM_BLOCK,
+	MALI_MEM_COW,
+	MALI_MEM_SECURE,
+	MALI_MEM_TYPE_MAX,
+} mali_mem_type;
+
+typedef struct mali_block_item {
+	/* for block type, the block_phy is alway page size align
+	* so use low 12bit used for ref_cout.
+	*/
+	unsigned long phy_addr;
+} mali_block_item;
+
+/**
+ * idx is used to locate the given page in the address space of swap file.
+ * ref_count is used to mark how many memory backends are using this item.
+ */
+typedef struct mali_swap_item {
+	u32 idx;
+	atomic_t ref_count;
+	struct page *page;
+	dma_addr_t dma_addr;
+} mali_swap_item;
+
+typedef enum mali_page_node_type {
+	MALI_PAGE_NODE_OS,
+	MALI_PAGE_NODE_BLOCK,
+	MALI_PAGE_NODE_SWAP,
+} mali_page_node_type;
+
+typedef struct mali_page_node {
+	struct list_head list;
+	union {
+		struct page *page;
+		mali_block_item *blk_it; /*pointer to block item*/
+		mali_swap_item *swap_it;
+	};
+
+	u32 type;
+} mali_page_node;
+
+typedef struct mali_mem_os_mem {
+	struct list_head pages;
+	u32 count;
+} mali_mem_os_mem;
+
+typedef struct mali_mem_dma_buf {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+	struct mali_dma_buf_attachment *attachment;
+#endif
+} mali_mem_dma_buf;
+
+typedef struct mali_mem_external {
+	dma_addr_t phys;
+	u32 size;
+} mali_mem_external;
+
+typedef struct mali_mem_ump {
+#if defined(CONFIG_MALI400_UMP)
+	ump_dd_handle handle;
+#endif
+} mali_mem_ump;
+
+typedef struct block_allocator_allocation {
+	/* The list will be released in reverse order */
+	struct block_info *last_allocated;
+	u32 mapping_length;
+	struct block_allocator *info;
+} block_allocator_allocation;
+
+typedef struct mali_mem_block_mem {
+	struct list_head pfns;
+	u32 count;
+} mali_mem_block_mem;
+
+typedef struct mali_mem_virt_mali_mapping {
+	mali_address_t addr; /* Virtual Mali address */
+	u32 properties;      /* MMU Permissions + cache, must match MMU HW */
+} mali_mem_virt_mali_mapping;
+
+typedef struct mali_mem_virt_cpu_mapping {
+	void __user *addr;
+	struct vm_area_struct *vma;
+} mali_mem_virt_cpu_mapping;
+
+#define MALI_MEM_ALLOCATION_VALID_MAGIC 0xdeda110c
+#define MALI_MEM_ALLOCATION_FREED_MAGIC 0x10101010
+
+typedef struct mali_mm_node {
+	/* MALI GPU vaddr start, use u32 for mmu only support 32bit address*/
+	uint32_t start; /* GPU vaddr */
+	uint32_t size;  /* GPU allocation virtual size */
+	unsigned allocated : 1;
+} mali_mm_node;
+
+typedef struct mali_vma_node {
+	struct mali_mm_node vm_node;
+	struct rb_node vm_rb;
+} mali_vma_node;
+
+
+typedef struct mali_mem_allocation {
+	MALI_DEBUG_CODE(u32 magic);
+	mali_mem_type type;                /**< Type of memory */
+	u32 flags;                         /**< Flags for this allocation */
+
+	struct mali_session_data *session; /**< Pointer to session that owns the allocation */
+
+	mali_mem_virt_cpu_mapping cpu_mapping; /**< CPU mapping */
+	mali_mem_virt_mali_mapping mali_mapping; /**< Mali mapping */
+
+	/* add for new memory system */
+	struct mali_vma_node mali_vma_node;
+	u32 vsize; /* virtual size*/
+	u32 psize; /* physical backend memory size*/
+	struct list_head list;
+	s32 backend_handle; /* idr for mem_backend */
+	_mali_osk_atomic_t mem_alloc_refcount;
+} mali_mem_allocation;
+
+struct mali_mem_os_allocator {
+	spinlock_t pool_lock;
+	struct list_head pool_pages;
+	size_t pool_count;
+
+	atomic_t allocated_pages;
+	size_t allocation_limit;
+
+	struct shrinker shrinker;
+	struct delayed_work timed_shrinker;
+	struct workqueue_struct *wq;
+};
+
+/* COW backend memory type */
+typedef struct mali_mem_cow {
+	struct list_head pages;  /**< all pages for this cow backend allocation,
+                                                                including new allocated pages for modified range*/
+	u32 count;               /**< number of pages */
+	s32 change_pages_nr;
+} mali_mem_cow;
+
+typedef struct mali_mem_swap {
+	struct list_head pages;
+	u32 count;
+} mali_mem_swap;
+
+typedef struct mali_mem_secure {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+	struct dma_buf *buf;
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+#endif
+	u32 count;
+} mali_mem_secure;
+
+#define MALI_MEM_BACKEND_FLAG_COWED                   (0x1)  /* COW has happen on this backend */
+#define MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE        (0x2)  /* This is an COW backend, mapped as not allowed cpu to write */
+#define MALI_MEM_BACKEND_FLAG_SWAP_COWED              (0x4)  /* Mark the given backend is cowed from swappable memory. */
+/* Mark this backend is not swapped_in in MALI driver, and before using it,
+ * we should swap it in and set up corresponding page table. */
+#define MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN            (0x8)
+#define MALI_MEM_BACKEND_FLAG_NOT_BINDED              (0x1 << 5) /* this backend it not back with physical memory, used for defer bind */
+#define MALI_MEM_BACKEND_FLAG_BINDED              (0x1 << 6) /* this backend it back with physical memory, used for defer bind */
+
+typedef struct mali_mem_backend {
+	mali_mem_type type;                /**< Type of backend memory */
+	u32 flags;                         /**< Flags for this allocation */
+	u32 size;
+	/* Union selected by type. */
+	union {
+		mali_mem_os_mem os_mem;       /**< MALI_MEM_OS */
+		mali_mem_external ext_mem;    /**< MALI_MEM_EXTERNAL */
+		mali_mem_dma_buf dma_buf;     /**< MALI_MEM_DMA_BUF */
+		mali_mem_ump ump_mem;         /**< MALI_MEM_UMP */
+		mali_mem_block_mem block_mem; /**< MALI_MEM_BLOCK */
+		mali_mem_cow cow_mem;
+		mali_mem_swap swap_mem;
+		mali_mem_secure secure_mem;
+	};
+	mali_mem_allocation *mali_allocation;
+	struct mutex mutex;
+	mali_mem_type cow_type;
+
+	struct list_head list;           /**< Used to link swappable memory backend to the global swappable list */
+	int using_count;                 /**< Mark how many PP jobs are using this memory backend */
+	u32 start_idx;                   /**< If the correspondign vma of this backend is linear, this value will be used to set vma->vm_pgoff */
+} mali_mem_backend;
+
+#define MALI_MEM_FLAG_MALI_GUARD_PAGE (_MALI_MAP_EXTERNAL_MAP_GUARD_PAGE)
+#define MALI_MEM_FLAG_DONT_CPU_MAP    (1 << 1)
+#define MALI_MEM_FLAG_CAN_RESIZE  (_MALI_MEMORY_ALLOCATE_RESIZEABLE)
+#endif /* __MALI_MEMORY_TYPES__ */
diff --git a/mali/linux/mali_memory_ump.c b/mali/linux/mali_memory_ump.c
new file mode 100755
index 0000000..72bef08
--- /dev/null
+++ b/mali/linux/mali_memory_ump.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory.h"
+#include "ump_kernel_interface.h"
+
+static int mali_mem_ump_map(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 nr_blocks;
+	u32 i;
+	ump_dd_physical_block *ump_blocks;
+	struct mali_page_directory *pagedir;
+	u32 offset = 0;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem);
+
+	nr_blocks = ump_dd_phys_block_count_get(ump_mem);
+	if (nr_blocks == 0) {
+		MALI_DEBUG_PRINT(1, ("No block count\n"));
+		return -EINVAL;
+	}
+
+	ump_blocks = _mali_osk_malloc(sizeof(*ump_blocks) * nr_blocks);
+	if (NULL == ump_blocks) {
+		return -ENOMEM;
+	}
+
+	if (UMP_DD_INVALID == ump_dd_phys_blocks_get(ump_mem, ump_blocks, nr_blocks)) {
+		_mali_osk_free(ump_blocks);
+		return -EFAULT;
+	}
+
+	pagedir = session->page_directory;
+
+	mali_session_memory_lock(session);
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (_MALI_OSK_ERR_OK != err) {
+		MALI_DEBUG_PRINT(1, ("Mapping of UMP memory failed\n"));
+
+		_mali_osk_free(ump_blocks);
+		mali_session_memory_unlock(session);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_blocks; ++i) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		MALI_DEBUG_PRINT(7, ("Mapping in 0x%08x size %d\n", ump_blocks[i].addr , ump_blocks[i].size));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[i].addr,
+					ump_blocks[i].size, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += ump_blocks[i].size;
+	}
+
+	if (alloc->flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		/* Map in an extra virtual guard page at the end of the VMA */
+		MALI_DEBUG_PRINT(6, ("Mapping in extra guard page\n"));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[0].addr, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+	mali_session_memory_unlock(session);
+	_mali_osk_free(ump_blocks);
+	return 0;
+}
+
+static void mali_mem_ump_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags)
+{
+	ump_dd_handle ump_mem;
+	int ret;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map ump memory with secure id %d into virtual memory 0x%08X, size 0x%08X\n",
+			  secure_id, alloc->mali_vma_node.vm_node.start, alloc->mali_vma_node.vm_node.size));
+
+	ump_mem = ump_dd_handle_create_from_secure_id(secure_id);
+	if (UMP_DD_HANDLE_INVALID == ump_mem) MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mem_backend->ump_mem.handle = ump_mem;
+
+	ret = mali_mem_ump_map(mem_backend);
+	if (0 != ret) {
+		ump_dd_reference_release(ump_mem);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	MALI_DEBUG_PRINT(3, ("Returning from UMP bind\n"));
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	mali_mem_ump_unmap(alloc);
+	ump_dd_reference_release(ump_mem);
+}
+
diff --git a/mali/linux/mali_memory_ump.h b/mali/linux/mali_memory_ump.h
new file mode 100644
index 0000000..23f59ae
--- /dev/null
+++ b/mali/linux/mali_memory_ump.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_UMP_BUF_H__
+#define __MALI_MEMORY_UMP_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags);
+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/mali/linux/mali_memory_util.c b/mali/linux/mali_memory_util.c
new file mode 100755
index 0000000..5114388
--- /dev/null
+++ b/mali/linux/mali_memory_util.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_secure.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_external.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_swap_alloc.h"
+
+
+
+/**
+*function @_mali_free_allocation_mem - free a memory allocation
+*/
+static u32 _mali_free_allocation_mem(mali_mem_allocation *mali_alloc)
+{
+	mali_mem_backend *mem_bkend = NULL;
+	u32 free_pages_nr = 0;
+
+	struct mali_session_data *session = mali_alloc->session;
+	MALI_DEBUG_PRINT(4, (" _mali_free_allocation_mem, psize =0x%x! \n", mali_alloc->psize));
+	if (0 == mali_alloc->psize)
+		goto out;
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+	switch (mem_bkend->type) {
+	case MALI_MEM_OS:
+		free_pages_nr = mali_mem_os_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+	case MALI_MEM_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_mem_unbind_ump_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+#else
+		MALI_DEBUG_PRINT(1, ("UMP not supported\n"));
+#endif
+		break;
+	case MALI_MEM_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_mem_unbind_dma_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported\n"));
+#endif
+		break;
+	case MALI_MEM_EXTERNAL:
+		mali_mem_unbind_ext_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+		break;
+
+	case MALI_MEM_BLOCK:
+		free_pages_nr = mali_mem_block_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+
+	case MALI_MEM_COW:
+		if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) {
+			free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE);
+		} else {
+			free_pages_nr = mali_mem_cow_release(mem_bkend, MALI_TRUE);
+		}
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+	case MALI_MEM_SWAP:
+		free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		atomic_sub(free_pages_nr, &session->mali_mem_array[mem_bkend->type]);
+		break;
+	case MALI_MEM_SECURE:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		free_pages_nr = mali_mem_secure_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n"));
+#endif
+		break;
+	default:
+		MALI_DEBUG_PRINT(1, ("mem type %d is not in the mali_mem_type enum.\n", mem_bkend->type));
+		break;
+	}
+
+	if ((NULL != mali_alloc->cpu_mapping.vma) && (mali_alloc == (mali_alloc->cpu_mapping.vma)->vm_private_data))
+		(mali_alloc->cpu_mapping.vma)->vm_private_data = NULL;
+
+	/*Remove backend memory idex */
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_bkend);
+out:
+	/* remove memory allocation  */
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_alloc->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_alloc);
+	return free_pages_nr;
+}
+
+/**
+*  ref_count for allocation
+*/
+u32 mali_allocation_unref(struct mali_mem_allocation **alloc)
+{
+	u32 free_pages_nr = 0;
+	mali_mem_allocation *mali_alloc = *alloc;
+	*alloc = NULL;
+	if (0 == _mali_osk_atomic_dec_return(&mali_alloc->mem_alloc_refcount)) {
+		free_pages_nr = _mali_free_allocation_mem(mali_alloc);
+	}
+	return free_pages_nr;
+}
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc)
+{
+	_mali_osk_atomic_inc(&alloc->mem_alloc_refcount);
+}
+
+void mali_free_session_allocations(struct mali_session_data *session)
+{
+	struct mali_mem_allocation *entry, *next;
+
+	MALI_DEBUG_PRINT(4, (" mali_free_session_allocations! \n"));
+
+	list_for_each_entry_safe(entry, next, &session->allocation_mgr.head, list) {
+		mali_allocation_unref(&entry);
+	}
+}
diff --git a/mali/linux/mali_memory_util.h b/mali/linux/mali_memory_util.h
new file mode 100644
index 0000000..0a23b77
--- /dev/null
+++ b/mali/linux/mali_memory_util.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef __MALI_MEMORY_UTIL_H__
+#define __MALI_MEMORY_UTIL_H__
+u32 mali_allocation_unref(struct mali_mem_allocation **alloc);
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc);
+
+void mali_free_session_allocations(struct mali_session_data *session);
+
+#endif
diff --git a/mali/linux/mali_memory_virtual.c b/mali/linux/mali_memory_virtual.c
new file mode 100644
index 0000000..a217e43
--- /dev/null
+++ b/mali/linux/mali_memory_virtual.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+
+
+/**
+*internal helper to link node into the rb-tree
+*/
+static inline void _mali_vma_offset_add_rb(struct mali_allocation_manager *mgr,
+		struct mali_vma_node *node)
+{
+	struct rb_node **iter = &mgr->allocation_mgr_rb.rb_node;
+	struct rb_node *parent = NULL;
+	struct mali_vma_node *iter_node;
+
+	while (likely(*iter)) {
+		parent = *iter;
+		iter_node = rb_entry(*iter, struct mali_vma_node, vm_rb);
+
+		if (node->vm_node.start < iter_node->vm_node.start)
+			iter = &(*iter)->rb_left;
+		else if (node->vm_node.start > iter_node->vm_node.start)
+			iter = &(*iter)->rb_right;
+		else
+			MALI_DEBUG_ASSERT(0);
+	}
+
+	rb_link_node(&node->vm_rb, parent, iter);
+	rb_insert_color(&node->vm_rb, &mgr->allocation_mgr_rb);
+}
+
+/**
+ * mali_vma_offset_add() - Add offset node to RB Tree
+ */
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node)
+{
+	int ret = 0;
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		goto out;
+	}
+
+	_mali_vma_offset_add_rb(mgr, node);
+	/* set to allocated */
+	node->vm_node.allocated = 1;
+
+out:
+	write_unlock(&mgr->vm_lock);
+	return ret;
+}
+
+/**
+ * mali_vma_offset_remove() - Remove offset node from RB tree
+ */
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node)
+{
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		rb_erase(&node->vm_rb, &mgr->allocation_mgr_rb);
+		memset(&node->vm_node, 0, sizeof(node->vm_node));
+	}
+	write_unlock(&mgr->vm_lock);
+}
+
+/**
+* mali_vma_offset_search - Search the node in RB tree
+*/
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start, unsigned long pages)
+{
+	struct mali_vma_node *node, *best;
+	struct rb_node *iter;
+	unsigned long offset;
+	read_lock(&mgr->vm_lock);
+
+	iter = mgr->allocation_mgr_rb.rb_node;
+	best = NULL;
+
+	while (likely(iter)) {
+		node = rb_entry(iter, struct mali_vma_node, vm_rb);
+		offset = node->vm_node.start;
+		if (start >= offset) {
+			iter = iter->rb_right;
+			best = node;
+			if (start == offset)
+				break;
+		} else {
+			iter = iter->rb_left;
+		}
+	}
+
+	if (best) {
+		offset = best->vm_node.start + best->vm_node.size;
+		if (offset <= start + pages)
+			best = NULL;
+	}
+	read_unlock(&mgr->vm_lock);
+
+	return best;
+}
+
diff --git a/mali/linux/mali_memory_virtual.h b/mali/linux/mali_memory_virtual.h
new file mode 100644
index 0000000..2a70cbb
--- /dev/null
+++ b/mali/linux/mali_memory_virtual.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_GPU_VMEM_H__
+#define __MALI_GPU_VMEM_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+
+
+
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node);
+
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node);
+
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start,    unsigned long pages);
+
+#endif
diff --git a/mali/linux/mali_osk_atomics.c b/mali/linux/mali_osk_atomics.c
new file mode 100755
index 0000000..03f4421
--- /dev/null
+++ b/mali/linux/mali_osk_atomics.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_atomics.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <asm/atomic.h>
+#include "mali_kernel_common.h"
+
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom)
+{
+	atomic_dec((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_dec_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom)
+{
+	atomic_inc((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_inc_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val)
+{
+	MALI_DEBUG_ASSERT_POINTER(atom);
+	atomic_set((atomic_t *)&atom->u.val, val);
+}
+
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom)
+{
+	return atomic_read((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom)
+{
+	MALI_IGNORE(atom);
+}
+
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val)
+{
+	return atomic_xchg((atomic_t *)&atom->u.val, val);
+}
diff --git a/mali/linux/mali_osk_bitmap.c b/mali/linux/mali_osk_bitmap.c
new file mode 100644
index 0000000..67cc7e4
--- /dev/null
+++ b/mali/linux/mali_osk_bitmap.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2010, 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitmap.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/bitmap.h>
+#include <linux/vmalloc.h>
+#include "common/mali_kernel_common.h"
+#include "mali_osk_types.h"
+#include "mali_osk.h"
+
+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap)
+{
+	u32 obj;
+
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+
+	obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->reserve);
+
+	if (obj < bitmap->max) {
+		set_bit(obj, bitmap->table);
+	} else {
+		obj = -1;
+	}
+
+	if (obj != -1)
+		--bitmap->avail;
+	_mali_osk_spinlock_unlock(bitmap->lock);
+
+	return obj;
+}
+
+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_bitmap_free_range(bitmap, obj, 1);
+}
+
+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt)
+{
+	u32 obj;
+
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	if (0 >= cnt) {
+		return -1;
+	}
+
+	if (1 == cnt) {
+		return _mali_osk_bitmap_alloc(bitmap);
+	}
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+	obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+					 bitmap->last, cnt, 0);
+
+	if (obj >= bitmap->max) {
+		obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+						 bitmap->reserve, cnt, 0);
+	}
+
+	if (obj < bitmap->max) {
+		bitmap_set(bitmap->table, obj, cnt);
+
+		bitmap->last = (obj + cnt);
+		if (bitmap->last >= bitmap->max) {
+			bitmap->last = bitmap->reserve;
+		}
+	} else {
+		obj = -1;
+	}
+
+	if (obj != -1) {
+		bitmap->avail -= cnt;
+	}
+
+	_mali_osk_spinlock_unlock(bitmap->lock);
+
+	return obj;
+}
+
+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	return bitmap->avail;
+}
+
+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+	bitmap_clear(bitmap->table, obj, cnt);
+	bitmap->last = min(bitmap->last, obj);
+
+	bitmap->avail += cnt;
+	_mali_osk_spinlock_unlock(bitmap->lock);
+}
+
+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+	MALI_DEBUG_ASSERT(reserve <= num);
+
+	bitmap->reserve = reserve;
+	bitmap->last = reserve;
+	bitmap->max  = num;
+	bitmap->avail = num - reserve;
+	bitmap->lock = _mali_osk_spinlock_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST);
+	if (!bitmap->lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) *
+				sizeof(long), GFP_KERNEL);
+	if (!bitmap->table) {
+		_mali_osk_spinlock_term(bitmap->lock);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	if (NULL != bitmap->lock) {
+		_mali_osk_spinlock_term(bitmap->lock);
+	}
+
+	if (NULL != bitmap->table) {
+		kfree(bitmap->table);
+	}
+}
+
diff --git a/mali/linux/mali_osk_irq.c b/mali/linux/mali_osk_irq.c
new file mode 100755
index 0000000..960ed81
--- /dev/null
+++ b/mali/linux/mali_osk_irq.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_irq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+#include <linux/slab.h>	/* For memory allocation */
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+typedef struct _mali_osk_irq_t_struct {
+	u32 irqnum;
+	void *data;
+	_mali_osk_irq_uhandler_t uhandler;
+} mali_osk_irq_object_t;
+
+typedef irqreturn_t (*irq_handler_func_t)(int, void *, struct pt_regs *);
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id);   /* , struct pt_regs *regs*/
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+u32 get_irqnum(struct _mali_osk_irq_t_struct* irq)
+{
+	if (irq)
+		return irq->irqnum;
+	else
+		return 0;
+}
+#endif
+
+#if defined(DEBUG)
+
+struct test_interrupt_data {
+	_mali_osk_irq_ack_t ack_func;
+	void *probe_data;
+	mali_bool interrupt_received;
+	wait_queue_head_t wq;
+};
+
+static irqreturn_t test_interrupt_upper_half(int port_name, void *dev_id)
+{
+	irqreturn_t ret = IRQ_NONE;
+	struct test_interrupt_data *data = (struct test_interrupt_data *)dev_id;
+
+	if (_MALI_OSK_ERR_OK == data->ack_func(data->probe_data)) {
+		data->interrupt_received = MALI_TRUE;
+		wake_up(&data->wq);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static _mali_osk_errcode_t test_interrupt(u32 irqnum,
+		_mali_osk_irq_trigger_t trigger_func,
+		_mali_osk_irq_ack_t ack_func,
+		void *probe_data,
+		const char *description)
+{
+	unsigned long irq_flags = 0;
+	struct test_interrupt_data data = {
+		.ack_func = ack_func,
+		.probe_data = probe_data,
+		.interrupt_received = MALI_FALSE,
+	};
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	if (0 != request_irq(irqnum, test_interrupt_upper_half, irq_flags, description, &data)) {
+		MALI_DEBUG_PRINT(2, ("Unable to install test IRQ handler for core '%s'\n", description));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	init_waitqueue_head(&data.wq);
+
+	trigger_func(probe_data);
+	wait_event_timeout(data.wq, data.interrupt_received, 100);
+
+	free_irq(irqnum, &data);
+
+	if (data.interrupt_received) {
+		MALI_DEBUG_PRINT(3, ("%s: Interrupt test OK\n", description));
+		return _MALI_OSK_ERR_OK;
+	} else {
+		MALI_PRINT_ERROR(("%s: Failed interrupt test on %u\n", description, irqnum));
+		return _MALI_OSK_ERR_FAULT;
+	}
+}
+
+#endif /* defined(DEBUG) */
+
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description)
+{
+	mali_osk_irq_object_t *irq_object;
+	unsigned long irq_flags = 0;
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	irq_object = kmalloc(sizeof(mali_osk_irq_object_t), GFP_KERNEL);
+	if (NULL == irq_object) {
+		return NULL;
+	}
+
+	if (-1 == irqnum) {
+		/* Probe for IRQ */
+		if ((NULL != trigger_func) && (NULL != ack_func)) {
+			unsigned long probe_count = 3;
+			_mali_osk_errcode_t err;
+			int irq;
+
+			MALI_DEBUG_PRINT(2, ("Probing for irq\n"));
+
+			do {
+				unsigned long mask;
+
+				mask = probe_irq_on();
+				trigger_func(probe_data);
+
+				_mali_osk_time_ubusydelay(5);
+
+				irq = probe_irq_off(mask);
+				err = ack_func(probe_data);
+			} while (irq < 0 && (err == _MALI_OSK_ERR_OK) && probe_count--);
+
+			if (irq < 0 || (_MALI_OSK_ERR_OK != err)) irqnum = -1;
+			else irqnum = irq;
+		} else irqnum = -1; /* no probe functions, fault */
+
+		if (-1 != irqnum) {
+			/* found an irq */
+			MALI_DEBUG_PRINT(2, ("Found irq %d\n", irqnum));
+		} else {
+			MALI_DEBUG_PRINT(2, ("Probe for irq failed\n"));
+		}
+	}
+
+	irq_object->irqnum = irqnum;
+	irq_object->uhandler = uhandler;
+	irq_object->data = int_data;
+
+	if (-1 == irqnum) {
+		MALI_DEBUG_PRINT(2, ("No IRQ for core '%s' found during probe\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	/* Verify that the configured interrupt settings are working */
+	if (_MALI_OSK_ERR_OK != test_interrupt(irqnum, trigger_func, ack_func, probe_data, description)) {
+		MALI_DEBUG_PRINT(2, ("Test of IRQ(%d) handler for core '%s' failed\n", irqnum, description));
+		kfree(irq_object);
+		return NULL;
+	}
+#endif
+
+	if (0 != request_irq(irqnum, irq_handler_upper_half, irq_flags, description, irq_object)) {
+		MALI_DEBUG_PRINT(2, ("Unable to install IRQ handler for core '%s'\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+	return irq_object;
+}
+
+void _mali_osk_irq_term(_mali_osk_irq_t *irq)
+{
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)irq;
+	free_irq(irq_object->irqnum, irq_object);
+	kfree(irq_object);
+}
+
+
+/** This function is called directly in interrupt context from the OS just after
+ * the CPU get the hw-irq from mali, or other devices on the same IRQ-channel.
+ * It is registered one of these function for each mali core. When an interrupt
+ * arrives this function will be called equal times as registered mali cores.
+ * That means that we only check one mali core in one function call, and the
+ * core we check for each turn is given by the \a dev_id variable.
+ * If we detect an pending interrupt on the given core, we mask the interrupt
+ * out by settging the core's IRQ_MASK register to zero.
+ * Then we schedule the mali_core_irq_handler_bottom_half to run as high priority
+ * work queue job.
+ */
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id)   /* , struct pt_regs *regs*/
+{
+	irqreturn_t ret = IRQ_NONE;
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)dev_id;
+
+	if (_MALI_OSK_ERR_OK == irq_object->uhandler(irq_object->data)) {
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
diff --git a/mali/linux/mali_osk_locks.c b/mali/linux/mali_osk_locks.c
new file mode 100755
index 0000000..4fa9398
--- /dev/null
+++ b/mali/linux/mali_osk_locks.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk_locks.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+
+#ifdef DEBUG
+#ifdef LOCK_ORDER_CHECKING
+static DEFINE_SPINLOCK(lock_tracking_lock);
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order);
+#endif /* LOCK_ORDER_CHECKING */
+
+void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+{
+	checker->orig_flags = flags;
+	checker->owner = 0;
+
+#ifdef LOCK_ORDER_CHECKING
+	checker->order = order;
+	checker->next = NULL;
+#endif
+}
+
+void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker)
+{
+	checker->owner = _mali_osk_get_tid();
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		if (!add_lock_to_log_and_check(checker, _mali_osk_get_tid())) {
+			printk(KERN_ERR "%d: ERROR lock %p taken while holding a lock of a higher order.\n",
+			       _mali_osk_get_tid(), checker);
+			dump_stack();
+		}
+	}
+#endif
+}
+
+void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker)
+{
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		remove_lock_from_log(checker, _mali_osk_get_tid());
+	}
+#endif
+	checker->owner = 0;
+}
+
+
+#ifdef LOCK_ORDER_CHECKING
+/* Lock order checking
+ * -------------------
+ *
+ * To assure that lock ordering scheme defined by _mali_osk_lock_order_t is strictly adhered to, the
+ * following function will, together with a linked list and some extra members in _mali_osk_lock_debug_s,
+ * make sure that a lock that is taken has a higher order than the current highest-order lock a
+ * thread holds.
+ *
+ * This is done in the following manner:
+ * - A linked list keeps track of locks held by a thread.
+ * - A `next' pointer is added to each lock. This is used to chain the locks together.
+ * - When taking a lock, the `add_lock_to_log_and_check' makes sure that taking
+ *   the given lock is legal. It will follow the linked list  to find the last
+ *   lock taken by this thread. If the last lock's order was lower than the
+ *   lock that is to be taken, it appends the new lock to the list and returns
+ *   true, if not, it return false. This return value is assert()'ed on in
+ *   _mali_osk_lock_wait().
+ */
+
+static struct _mali_osk_lock_debug_s *lock_lookup_list;
+
+static void dump_lock_tracking_list(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 1;
+
+	/* print list for debugging purposes */
+	l = lock_lookup_list;
+
+	while (NULL != l) {
+		printk(" [lock: %p, tid_owner: %d, order: %d] ->", l, l->owner, l->order);
+		l = l->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+	printk(" NULL\n");
+}
+
+static int tracking_list_length(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 0;
+	l = lock_lookup_list;
+
+	while (NULL != l) {
+		l = l->next;
+		n++;
+		MALI_DEBUG_ASSERT(n < 100);
+	}
+	return n;
+}
+
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	mali_bool ret = MALI_FALSE;
+	_mali_osk_lock_order_t highest_order_for_tid = _MALI_OSK_LOCK_ORDER_FIRST;
+	struct _mali_osk_lock_debug_s *highest_order_lock = (struct _mali_osk_lock_debug_s *)0xbeefbabe;
+	struct _mali_osk_lock_debug_s *l;
+	unsigned long local_lock_flag;
+	u32 len;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+
+	l  = lock_lookup_list;
+	if (NULL == l) { /* This is the first lock taken by this thread -- record and return true */
+		lock_lookup_list = lock;
+		spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+		return MALI_TRUE;
+	} else {
+		/* Traverse the locks taken and find the lock of the highest order.
+		 * Since several threads may hold locks, each lock's owner must be
+		 * checked so that locks not owned by this thread can be ignored. */
+		for (;;) {
+			MALI_DEBUG_ASSERT_POINTER(l);
+			if (tid == l->owner && l->order >= highest_order_for_tid) {
+				highest_order_for_tid = l->order;
+				highest_order_lock = l;
+			}
+
+			if (NULL != l->next) {
+				l = l->next;
+			} else {
+				break;
+			}
+		}
+
+		l->next = lock;
+		l->next = NULL;
+	}
+
+	/* We have now found the highest order lock currently held by this thread and can see if it is
+	 * legal to take the requested lock. */
+	ret = highest_order_for_tid < lock->order;
+
+	if (!ret) {
+		printk(KERN_ERR "Took lock of order %d (%s) while holding lock of order %d (%s)\n",
+		       lock->order, lock_order_to_string(lock->order),
+		       highest_order_for_tid, lock_order_to_string(highest_order_for_tid));
+		dump_lock_tracking_list();
+	}
+
+	if (len + 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+	return ret;
+}
+
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	struct _mali_osk_lock_debug_s *curr;
+	struct _mali_osk_lock_debug_s *prev = NULL;
+	unsigned long local_lock_flag;
+	u32 len;
+	u32 n = 0;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+	curr = lock_lookup_list;
+
+	if (NULL == curr) {
+		printk(KERN_ERR "Error: Lock tracking list was empty on call to remove_lock_from_log\n");
+		dump_lock_tracking_list();
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(curr);
+
+
+	while (lock != curr) {
+		prev = curr;
+
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		curr = curr->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+
+	if (NULL == prev) {
+		lock_lookup_list = curr->next;
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		MALI_DEBUG_ASSERT_POINTER(prev);
+		prev->next = curr->next;
+	}
+
+	lock->next = NULL;
+
+	if (len - 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+}
+
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order)
+{
+	switch (order) {
+	case _MALI_OSK_LOCK_ORDER_SESSIONS:
+		return "_MALI_OSK_LOCK_ORDER_SESSIONS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_SESSION:
+		return "_MALI_OSK_LOCK_ORDER_MEM_SESSION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_INFO:
+		return "_MALI_OSK_LOCK_ORDER_MEM_INFO";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_PT_CACHE:
+		return "_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP:
+		return "_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_EXECUTION:
+		return "_MALI_OSK_LOCK_ORDER_PM_EXECUTION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_EXECUTOR:
+		return "_MALI_OSK_LOCK_ORDER_EXECUTOR";
+		break;
+	case _MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM:
+		return "_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DMA_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_DMA_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PROFILING:
+		return "_MALI_OSK_LOCK_ORDER_PROFILING";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2:
+		return "_MALI_OSK_LOCK_ORDER_L2";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_L2_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_UTILIZATION:
+		return "_MALI_OSK_LOCK_ORDER_UTILIZATION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS:
+		return "_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_STATE:
+		return "_MALI_OSK_LOCK_ORDER_PM_STATE";
+		break;
+	default:
+		return "<UNKNOWN_LOCK_ORDER>";
+	}
+}
+#endif /* LOCK_ORDER_CHECKING */
+#endif /* DEBUG */
diff --git a/mali/linux/mali_osk_locks.h b/mali/linux/mali_osk_locks.h
new file mode 100755
index 0000000..a05586f
--- /dev/null
+++ b/mali/linux/mali_osk_locks.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.h
+ * Defines OS abstraction of lock and mutex
+ */
+#ifndef _MALI_OSK_LOCKS_H
+#define _MALI_OSK_LOCKS_H
+
+#include <linux/spinlock.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+
+#include <linux/slab.h>
+
+#include "mali_osk_types.h"
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+	/* When DEBUG is enabled, this struct will be used to track owner, mode and order checking */
+#ifdef DEBUG
+	struct _mali_osk_lock_debug_s {
+		u32 owner;
+		_mali_osk_lock_flags_t orig_flags;
+		_mali_osk_lock_order_t order;
+		struct _mali_osk_lock_debug_s *next;
+	};
+#endif
+
+	/* Anstraction of spinlock_t */
+	struct _mali_osk_spinlock_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		spinlock_t spinlock;
+	};
+
+	/* Abstration of spinlock_t and lock flag which is used to store register's state before locking */
+	struct _mali_osk_spinlock_irq_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+
+		spinlock_t spinlock;
+		unsigned long flags;
+	};
+
+	/* Abstraction of rw_semaphore in OS */
+	struct _mali_osk_mutex_rw_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+		_mali_osk_lock_mode_t mode;
+#endif
+
+		struct rw_semaphore rw_sema;
+	};
+
+	/* Mutex and mutex_interruptible functions share the same osk mutex struct */
+	struct _mali_osk_mutex_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		struct mutex mutex;
+	};
+
+#ifdef DEBUG
+	/** @brief _mali_osk_locks_debug_init/add/remove() functions are declared when DEBUG is enabled and
+	 * defined in file mali_osk_locks.c. When LOCK_ORDER_CHECKING is enabled, calling these functions when we
+	 * init/lock/unlock a lock/mutex, we could track lock order of a given tid. */
+	void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order);
+	void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker);
+	void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker);
+
+	/** @brief This function can return a given lock's owner when DEBUG     is enabled. */
+	static inline u32 _mali_osk_lock_get_owner(struct _mali_osk_lock_debug_s *lock)
+	{
+		return lock->owner;
+	}
+#else
+#define _mali_osk_locks_debug_init(x, y, z) do {} while (0)
+#define _mali_osk_locks_debug_add(x) do {} while (0)
+#define _mali_osk_locks_debug_remove(x) do {} while (0)
+#endif
+
+	/** @brief Before use _mali_osk_spin_lock, init function should be used to allocate memory and initial spinlock*/
+	static inline _mali_osk_spinlock_t *_mali_osk_spinlock_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_spinlock_t), GFP_KERNEL);
+		if (NULL == lock) {
+			return NULL;
+		}
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock a spinlock */
+	static inline void  _mali_osk_spinlock_lock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		spin_lock(&lock->spinlock);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock a spinlock */
+	static inline void _mali_osk_spinlock_unlock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock(&lock->spinlock);
+	}
+
+	/** @brief Free a memory block which the argument lock pointed to and its type must be
+	 * _mali_osk_spinlock_t *. */
+	static inline void _mali_osk_spinlock_term(_mali_osk_spinlock_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_spinlock_irq_lock/unlock/term() is called, init function should be
+	 * called to initial spinlock and flags in struct _mali_osk_spinlock_irq_t. */
+	static inline _mali_osk_spinlock_irq_t *_mali_osk_spinlock_irq_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_irq_t *lock = NULL;
+		lock = kmalloc(sizeof(_mali_osk_spinlock_irq_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+
+		lock->flags = 0;
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock spinlock and save the register's state */
+	static inline void _mali_osk_spinlock_irq_lock(_mali_osk_spinlock_irq_t *lock)
+	{
+		unsigned long tmp_flags;
+
+		BUG_ON(NULL == lock);
+		spin_lock_irqsave(&lock->spinlock, tmp_flags);
+		lock->flags = tmp_flags;
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock spinlock with saved register's state */
+	static inline void _mali_osk_spinlock_irq_unlock(_mali_osk_spinlock_irq_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock_irqrestore(&lock->spinlock, lock->flags);
+	}
+
+	/** @brief Destroy a given memory block which lock pointed to, and the lock type must be
+	 * _mali_osk_spinlock_irq_t *. */
+	static inline void _mali_osk_spinlock_irq_term(_mali_osk_spinlock_irq_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_mutex_rw_wait/signal/term() is called, we should call
+	 * _mali_osk_mutex_rw_init() to kmalloc a memory block and initial part of elements in it. */
+	static inline _mali_osk_mutex_rw_t *_mali_osk_mutex_rw_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_rw_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_rw_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+
+		init_rwsem(&lock->rw_sema);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief When call _mali_osk_mutex_rw_wait/signal() functions, the second argument mode
+	 * should be assigned with value _MALI_OSK_LOCKMODE_RO or _MALI_OSK_LOCKMODE_RW */
+	static inline void _mali_osk_mutex_rw_wait(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(NULL == lock);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			down_read(&lock->rw_sema);
+		} else {
+			down_write(&lock->rw_sema);
+		}
+
+#ifdef DEBUG
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			lock->mode = mode;
+		} else { /* mode == _MALI_OSK_LOCKMODE_RO */
+			lock->mode = mode;
+		}
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+#endif
+	}
+
+	/** @brief Up lock->rw_sema with up_read/write() accordinf argument mode's value. */
+	static inline void  _mali_osk_mutex_rw_signal(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(NULL == lock);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+#ifdef DEBUG
+		/* make sure the thread releasing the lock actually was the owner */
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+			/* This lock now has no owner */
+			lock->checker.owner = 0;
+		}
+#endif
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			up_read(&lock->rw_sema);
+		} else {
+			up_write(&lock->rw_sema);
+		}
+	}
+
+	/** @brief Free a given memory block which lock pointed to and its type must be
+	 * _mali_sok_mutex_rw_t *. */
+	static inline void _mali_osk_mutex_rw_term(_mali_osk_mutex_rw_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Mutex & mutex_interruptible share the same init and term function, because they have the
+	 * same osk mutex struct, and the difference between them is which locking function they use */
+	static inline _mali_osk_mutex_t *_mali_osk_mutex_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+		mutex_init(&lock->mutex);
+
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief  Lock the lock->mutex with mutex_lock_interruptible function */
+	static inline _mali_osk_errcode_t _mali_osk_mutex_wait_interruptible(_mali_osk_mutex_t *lock)
+	{
+		_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+		BUG_ON(NULL == lock);
+
+		if (mutex_lock_interruptible(&lock->mutex)) {
+			printk(KERN_WARNING "Mali: Can not lock mutex\n");
+			err = _MALI_OSK_ERR_RESTARTSYSCALL;
+		}
+
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+		return err;
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock_interruptible() function. */
+	static inline void _mali_osk_mutex_signal_interruptible(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Lock the lock->mutex just with mutex_lock() function which could not be interruptted. */
+	static inline void _mali_osk_mutex_wait(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		mutex_lock(&lock->mutex);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock() function. */
+	static inline void _mali_osk_mutex_signal(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Free a given memory block which lock point. */
+	static inline void _mali_osk_mutex_term(_mali_osk_mutex_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif
diff --git a/mali/linux/mali_osk_low_level_mem.c b/mali/linux/mali_osk_low_level_mem.c
new file mode 100755
index 0000000..bc713a1
--- /dev/null
+++ b/mali/linux/mali_osk_low_level_mem.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_low_level_mem.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <asm/io.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+void _mali_osk_mem_barrier(void)
+{
+	mb();
+}
+
+void _mali_osk_write_mem_barrier(void)
+{
+	wmb();
+}
+
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description)
+{
+	return (mali_io_address)ioremap_nocache(phys, size);
+}
+
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address virt)
+{
+	iounmap((void *)virt);
+}
+
+_mali_osk_errcode_t inline _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description)
+{
+#if MALI_LICENSE_IS_GPL
+	return _MALI_OSK_ERR_OK; /* GPL driver gets the mem region for the resources registered automatically */
+#else
+	return ((NULL == request_mem_region(phys, size, description)) ? _MALI_OSK_ERR_NOMEM : _MALI_OSK_ERR_OK);
+#endif
+}
+
+void inline _mali_osk_mem_unreqregion(uintptr_t phys, u32 size)
+{
+#if !MALI_LICENSE_IS_GPL
+	release_mem_region(phys, size);
+#endif
+}
+
+void inline _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	__raw_writel(cpu_to_le32(val), ((u8 *)addr) + offset);
+}
+
+u32 inline _mali_osk_mem_ioread32(volatile mali_io_address addr, u32 offset)
+{
+	return ioread32(((u8 *)addr) + offset);
+}
+
+void inline _mali_osk_mem_iowrite32(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	iowrite32(val, ((u8 *)addr) + offset);
+}
+
+void _mali_osk_cache_flushall(void)
+{
+	/** @note Cached memory is not currently supported in this implementation */
+}
+
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size)
+{
+	_mali_osk_write_mem_barrier();
+}
+
+u32 _mali_osk_mem_write_safe(void __user *dest, const void __user *src, u32 size)
+{
+#define MALI_MEM_SAFE_COPY_BLOCK_SIZE 4096
+	u32 retval = 0;
+	void *temp_buf;
+
+	temp_buf = kmalloc(MALI_MEM_SAFE_COPY_BLOCK_SIZE, GFP_KERNEL);
+	if (NULL != temp_buf) {
+		u32 bytes_left_to_copy = size;
+		u32 i;
+		for (i = 0; i < size; i += MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+			u32 size_to_copy;
+			u32 size_copied;
+			u32 bytes_left;
+
+			if (bytes_left_to_copy > MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+				size_to_copy = MALI_MEM_SAFE_COPY_BLOCK_SIZE;
+			} else {
+				size_to_copy = bytes_left_to_copy;
+			}
+
+			bytes_left = copy_from_user(temp_buf, ((char *)src) + i, size_to_copy);
+			size_copied = size_to_copy - bytes_left;
+
+			bytes_left = copy_to_user(((char *)dest) + i, temp_buf, size_copied);
+			size_copied -= bytes_left;
+
+			bytes_left_to_copy -= size_copied;
+			retval += size_copied;
+
+			if (size_copied != size_to_copy) {
+				break; /* Early out, we was not able to copy this entire block */
+			}
+		}
+
+		kfree(temp_buf);
+	}
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args)
+{
+	void __user *src;
+	void __user *dst;
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (NULL == session) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	src = (void __user *)(uintptr_t)args->src;
+	dst = (void __user *)(uintptr_t)args->dest;
+
+	/* Return number of bytes actually copied */
+	args->size = _mali_osk_mem_write_safe(dst, src, args->size);
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/mali/linux/mali_osk_mali.c b/mali/linux/mali_osk_mali.c
new file mode 100755
index 0000000..f47b343
--- /dev/null
+++ b/mali/linux/mali_osk_mali.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.c
+ * Implementation of the OS abstraction layer which is specific for the Mali kernel device driver
+ */
+#include <linux/kernel.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+#include <linux/platform_device.h>
+#include <linux/mali/mali_utgard.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h" /* MALI_xxx macros */
+#include "mali_osk.h"           /* kernel side OS functions */
+#include "mali_kernel_linux.h"
+
+static mali_bool mali_secure_mode_enabled = MALI_FALSE;
+static mali_bool mali_secure_mode_supported = MALI_FALSE;
+
+/* Function that init the mali gpu secure mode */
+void (*mali_secure_mode_deinit)(void) = NULL;
+/* Function that reset GPU and enable the mali gpu secure mode */
+int (*mali_gpu_reset_and_secure_mode_enable)(void) = NULL;
+/* Function that reset GPU and disable the mali gpu secure mode */
+int (*mali_gpu_reset_and_secure_mode_disable)(void) = NULL;
+
+#ifdef CONFIG_MALI_DT
+
+#define MALI_OSK_INVALID_RESOURCE_ADDRESS 0xFFFFFFFF
+
+/**
+ * Define the max number of resource we could have.
+ */
+#define MALI_OSK_MAX_RESOURCE_NUMBER 27
+
+/**
+ * Define the max number of resource with interrupts, and they are
+ * the first 20 elements in array mali_osk_resource_bank.
+ */
+#define MALI_OSK_RESOURCE_WITH_IRQ_NUMBER 20
+
+/**
+ * pp core start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_PP_LOCATION_START 2
+#define MALI_OSK_RESOURCE_PP_LOCATION_END 17
+
+/**
+ * L2 cache start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_L2_LOCATION_START 20
+#define MALI_OSK_RESOURCE_l2_LOCATION_END 22
+
+/**
+ * DMA unit location.
+ */
+#define MALI_OSK_RESOURCE_DMA_LOCATION 26
+
+static _mali_osk_resource_t mali_osk_resource_bank[MALI_OSK_MAX_RESOURCE_NUMBER] = {
+	{.description = "Mali_GP", .base = MALI_OFFSET_GP, .irq_name = "IRQGP",},
+	{.description = "Mali_GP_MMU", .base = MALI_OFFSET_GP_MMU, .irq_name = "IRQGPMMU",},
+	{.description = "Mali_PP0", .base = MALI_OFFSET_PP0, .irq_name = "IRQPP0",},
+	{.description = "Mali_PP0_MMU", .base = MALI_OFFSET_PP0_MMU, .irq_name = "IRQPPMMU0",},
+	{.description = "Mali_PP1", .base = MALI_OFFSET_PP1, .irq_name = "IRQPP1",},
+	{.description = "Mali_PP1_MMU", .base = MALI_OFFSET_PP1_MMU, .irq_name = "IRQPPMMU1",},
+	{.description = "Mali_PP2", .base = MALI_OFFSET_PP2, .irq_name = "IRQPP2",},
+	{.description = "Mali_PP2_MMU", .base = MALI_OFFSET_PP2_MMU, .irq_name = "IRQPPMMU2",},
+	{.description = "Mali_PP3", .base = MALI_OFFSET_PP3, .irq_name = "IRQPP3",},
+	{.description = "Mali_PP3_MMU", .base = MALI_OFFSET_PP3_MMU, .irq_name = "IRQPPMMU3",},
+	{.description = "Mali_PP4", .base = MALI_OFFSET_PP4, .irq_name = "IRQPP4",},
+	{.description = "Mali_PP4_MMU", .base = MALI_OFFSET_PP4_MMU, .irq_name = "IRQPPMMU4",},
+	{.description = "Mali_PP5", .base = MALI_OFFSET_PP5, .irq_name = "IRQPP5",},
+	{.description = "Mali_PP5_MMU", .base = MALI_OFFSET_PP5_MMU, .irq_name = "IRQPPMMU5",},
+	{.description = "Mali_PP6", .base = MALI_OFFSET_PP6, .irq_name = "IRQPP6",},
+	{.description = "Mali_PP6_MMU", .base = MALI_OFFSET_PP6_MMU, .irq_name = "IRQPPMMU6",},
+	{.description = "Mali_PP7", .base = MALI_OFFSET_PP7, .irq_name = "IRQPP7",},
+	{.description = "Mali_PP7_MMU", .base = MALI_OFFSET_PP7_MMU, .irq_name = "IRQPPMMU",},
+	{.description = "Mali_PP_Broadcast", .base = MALI_OFFSET_PP_BCAST, .irq_name = "IRQPP",},
+	{.description = "Mali_PMU", .base = MALI_OFFSET_PMU, .irq_name = "IRQPMU",},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE0,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE1,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE2,},
+	{.description = "Mali_PP_MMU_Broadcast", .base = MALI_OFFSET_PP_BCAST_MMU,},
+	{.description = "Mali_Broadcast", .base = MALI_OFFSET_BCAST,},
+	{.description = "Mali_DLBU", .base = MALI_OFFSET_DLBU,},
+	{.description = "Mali_DMA", .base = MALI_OFFSET_DMA,},
+};
+
+static int _mali_osk_get_compatible_name(const char **out_string)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	return of_property_read_string(node, "compatible", out_string);
+}
+
+_mali_osk_errcode_t _mali_osk_resource_initialize(void)
+{
+	mali_bool mali_is_450 = MALI_FALSE, mali_is_470 = MALI_FALSE;
+	int i, pp_core_num = 0, l2_core_num = 0;
+	struct resource *res;
+	const char *compatible_name = NULL;
+
+	if (0 == _mali_osk_get_compatible_name(&compatible_name)) {
+		if (0 == strncmp(compatible_name, "arm,mali-450", strlen("arm,mali-450"))) {
+			mali_is_450 = MALI_TRUE;
+			MALI_DEBUG_PRINT(2, ("mali-450 device tree detected."));
+		} else if (0 == strncmp(compatible_name, "arm,mali-470", strlen("arm,mali-470"))) {
+			mali_is_470 = MALI_TRUE;
+			MALI_DEBUG_PRINT(2, ("mali-470 device tree detected."));
+		}
+	}
+
+	for (i = 0; i < MALI_OSK_RESOURCE_WITH_IRQ_NUMBER; i++) {
+		res = platform_get_resource_byname(mali_platform_device, IORESOURCE_IRQ, mali_osk_resource_bank[i].irq_name);
+		if (res) {
+			mali_osk_resource_bank[i].irq = res->start;
+		} else {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_PP_LOCATION_START; i <= MALI_OSK_RESOURCE_PP_LOCATION_END; i++) {
+		if (MALI_OSK_INVALID_RESOURCE_ADDRESS != mali_osk_resource_bank[i].base) {
+			pp_core_num++;
+		}
+	}
+
+	/* We have to divide by 2, because we caculate twice for only one pp(pp_core and pp_mmu_core). */
+	if (0 != pp_core_num % 2) {
+		MALI_DEBUG_PRINT(2, ("The value of pp core number isn't normal."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pp_core_num /= 2;
+
+	/**
+	 * we can caculate the number of l2 cache core according the number of pp core number
+	 * and device type(mali400/mali450/mali470).
+	 */
+	l2_core_num = 1;
+	if (mali_is_450) {
+		if (pp_core_num > 4) {
+			l2_core_num = 3;
+		} else if (pp_core_num <= 4) {
+			l2_core_num = 2;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_l2_LOCATION_END; i > MALI_OSK_RESOURCE_L2_LOCATION_START + l2_core_num - 1; i--) {
+		mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+	}
+
+	/* If device is not mali-450 type, we have to remove related resource from resource bank. */
+	if (!(mali_is_450 || mali_is_470)) {
+		for (i = MALI_OSK_RESOURCE_l2_LOCATION_END + 1; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	if (mali_is_470)
+		mali_osk_resource_bank[MALI_OSK_RESOURCE_DMA_LOCATION].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+
+	if (NULL == mali_platform_device) {
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	/* Traverse all of resources in resources bank to find the matching one. */
+	for (i = 0; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+		if (mali_osk_resource_bank[i].base == addr) {
+			if (NULL != res) {
+				res->base = addr + _mali_osk_resource_base_address();
+				res->description = mali_osk_resource_bank[i].description;
+				res->irq = mali_osk_resource_bank[i].irq;
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	struct resource *reg_res = NULL;
+	uintptr_t ret = 0;
+
+	reg_res = platform_get_resource(mali_platform_device, IORESOURCE_MEM, 0);
+
+	if (NULL != reg_res) {
+		ret = reg_res->start;
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from device tree configuration.\n"));
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	if (!of_get_property(node, "pmu_domain_config", &length)) {
+		return;
+	}
+
+	if (array_size != length / sizeof(u32)) {
+		MALI_PRINT_ERROR(("Wrong pmu domain config in device tree."));
+		return;
+	}
+
+	of_property_for_each_u32(node, "pmu_domain_config", prop, p, u) {
+		domain_config_array[i] = (u16)u;
+		i++;
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	u32 switch_delay;
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	if (0 == of_property_read_u32(node, "pmu_switch_delay", &switch_delay)) {
+		return switch_delay;
+	} else {
+		MALI_DEBUG_PRINT(2, ("Couldn't find pmu_switch_delay in device tree configuration.\n"));
+	}
+
+	return 0;
+}
+
+#else /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+	uintptr_t phys_addr;
+
+	if (NULL == mali_platform_device) {
+		/* Not connected to a device */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	phys_addr = addr + _mali_osk_resource_base_address();
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_MEM == resource_type(&(mali_platform_device->resource[i])) &&
+		    mali_platform_device->resource[i].start == phys_addr) {
+			if (NULL != res) {
+				res->base = phys_addr;
+				res->description = mali_platform_device->resource[i].name;
+
+				/* Any (optional) IRQ resource belonging to this resource will follow */
+				if ((i + 1) < mali_platform_device->num_resources &&
+				    IORESOURCE_IRQ == resource_type(&(mali_platform_device->resource[i + 1]))) {
+					res->irq = mali_platform_device->resource[i + 1].start;
+				} else {
+					res->irq = -1;
+				}
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	uintptr_t lowest_addr = (uintptr_t)(0 - 1);
+	uintptr_t ret = 0;
+
+	if (NULL != mali_platform_device) {
+		int i;
+		for (i = 0; i < mali_platform_device->num_resources; i++) {
+			if (mali_platform_device->resource[i].flags & IORESOURCE_MEM &&
+			    mali_platform_device->resource[i].start < lowest_addr) {
+				lowest_addr = mali_platform_device->resource[i].start;
+				ret = lowest_addr;
+			}
+		}
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	_mali_osk_device_data data = { 0, };
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from platform device data.\n"));
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Copy the custom customer power domain config */
+		_mali_osk_memcpy(domain_config_array, data.pmu_domain_config, sizeof(data.pmu_domain_config));
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_device_data data = { 0, };
+
+	err = _mali_osk_device_data_get(&data);
+
+	if (_MALI_OSK_ERR_OK == err) {
+		return data.pmu_switch_delay;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(data);
+
+	if (NULL != mali_platform_device) {
+		struct mali_gpu_device_data *os_data = NULL;
+
+		os_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+		if (NULL != os_data) {
+			/* Copy data from OS dependant struct to Mali neutral struct (identical!) */
+			BUILD_BUG_ON(sizeof(*os_data) != sizeof(*data));
+			_mali_osk_memcpy(data, os_data, sizeof(*os_data));
+
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+u32 _mali_osk_identify_gpu_resource(void)
+{
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_L2_RESOURCE1, NULL))
+		/* Mali 450 */
+		return 0x450;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_DLBU, NULL))
+		/* Mali 470 */
+		return 0x470;
+
+	/* Mali 400 */
+	return 0x400;
+}
+
+mali_bool _mali_osk_shared_interrupts(void)
+{
+	u32 irqs[128];
+	u32 i, j, irq, num_irqs_found = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	MALI_DEBUG_ASSERT(128 >= mali_platform_device->num_resources);
+
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_IRQ & mali_platform_device->resource[i].flags) {
+			irq = mali_platform_device->resource[i].start;
+
+			for (j = 0; j < num_irqs_found; ++j) {
+				if (irq == irqs[j]) {
+					return MALI_TRUE;
+				}
+			}
+
+			irqs[num_irqs_found++] = irq;
+		}
+	}
+
+	return MALI_FALSE;
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void)
+{
+	_mali_osk_device_data data = { 0, };
+
+	if (_MALI_OSK_ERR_OK ==  _mali_osk_device_data_get(&data)) {
+		if ((NULL != data.secure_mode_init) && (NULL != data.secure_mode_deinit)
+		    && (NULL != data.gpu_reset_and_secure_mode_enable) && (NULL != data.gpu_reset_and_secure_mode_disable)) {
+			int err = data.secure_mode_init();
+			if (err) {
+				MALI_DEBUG_PRINT(1, ("Failed to init gpu secure mode.\n"));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			mali_secure_mode_deinit = data.secure_mode_deinit;
+			mali_gpu_reset_and_secure_mode_enable = data.gpu_reset_and_secure_mode_enable;
+			mali_gpu_reset_and_secure_mode_disable = data.gpu_reset_and_secure_mode_disable;
+
+			mali_secure_mode_supported = MALI_TRUE;
+			mali_secure_mode_enabled = MALI_FALSE;
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+	MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void)
+{
+	if (NULL !=  mali_secure_mode_deinit) {
+		mali_secure_mode_deinit();
+		mali_secure_mode_enabled = MALI_FALSE;
+		mali_secure_mode_supported = MALI_FALSE;
+		return _MALI_OSK_ERR_OK;
+	}
+	MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_enable(void)
+{
+	/* the mali executor lock must be held before enter this function. */
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_secure_mode_enabled);
+
+	if (NULL !=  mali_gpu_reset_and_secure_mode_enable) {
+		if (mali_gpu_reset_and_secure_mode_enable()) {
+			MALI_DEBUG_PRINT(1, ("Failed to reset GPU or enable gpu secure mode.\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_secure_mode_enabled = MALI_TRUE;
+		return _MALI_OSK_ERR_OK;
+	}
+	MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_disable(void)
+{
+	/* the mali executor lock must be held before enter this function. */
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_secure_mode_enabled);
+
+	if (NULL != mali_gpu_reset_and_secure_mode_disable) {
+		if (mali_gpu_reset_and_secure_mode_disable()) {
+			MALI_DEBUG_PRINT(1, ("Failed to reset GPU or disable gpu secure mode.\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_secure_mode_enabled = MALI_FALSE;
+
+		return _MALI_OSK_ERR_OK;
+
+	}
+	MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+mali_bool _mali_osk_gpu_secure_mode_is_enabled(void)
+{
+	return mali_secure_mode_enabled;
+}
+
+mali_bool _mali_osk_gpu_secure_mode_is_supported(void)
+{
+	return mali_secure_mode_supported;
+}
+
+
diff --git a/mali/linux/mali_osk_math.c b/mali/linux/mali_osk_math.c
new file mode 100755
index 0000000..3f06723
--- /dev/null
+++ b/mali/linux/mali_osk_math.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_math.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/bitops.h>
+
+u32 _mali_osk_clz(u32 input)
+{
+	return 32 - fls(input);
+}
+
+u32 _mali_osk_fls(u32 input)
+{
+	return fls(input);
+}
diff --git a/mali/linux/mali_osk_memory.c b/mali/linux/mali_osk_memory.c
new file mode 100755
index 0000000..ad5494d
--- /dev/null
+++ b/mali/linux/mali_osk_memory.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_memory.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+void inline *_mali_osk_calloc(u32 n, u32 size)
+{
+	return kcalloc(n, size, GFP_KERNEL);
+}
+
+void inline *_mali_osk_malloc(u32 size)
+{
+	return kmalloc(size, GFP_KERNEL);
+}
+
+void inline _mali_osk_free(void *ptr)
+{
+	kfree(ptr);
+}
+
+void inline *_mali_osk_valloc(u32 size)
+{
+	return vmalloc(size);
+}
+
+void inline _mali_osk_vfree(void *ptr)
+{
+	vfree(ptr);
+}
+
+void inline *_mali_osk_memcpy(void *dst, const void *src, u32  len)
+{
+	return memcpy(dst, src, len);
+}
+
+void inline *_mali_osk_memset(void *s, u32 c, u32 n)
+{
+	return memset(s, c, n);
+}
+
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated)
+{
+	/* No need to prevent an out-of-memory dialogue appearing on Linux,
+	 * so we always return MALI_TRUE.
+	 */
+	return MALI_TRUE;
+}
diff --git a/mali/linux/mali_osk_misc.c b/mali/linux/mali_osk_misc.c
new file mode 100755
index 0000000..2a6ae91
--- /dev/null
+++ b/mali/linux/mali_osk_misc.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_misc.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+#include <linux/kernel.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include "mali_osk.h"
+
+#if !defined(CONFIG_MALI_QUIET)
+void _mali_osk_dbgmsg(const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+}
+#endif /* !defined(CONFIG_MALI_QUIET) */
+
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...)
+{
+	int res;
+	va_list args;
+	va_start(args, fmt);
+
+	res = vscnprintf(buf, (size_t)size, fmt, args);
+
+	va_end(args);
+	return res;
+}
+
+void _mali_osk_abort(void)
+{
+	/* make a simple fault by dereferencing a NULL pointer */
+	dump_stack();
+	*(int *)0 = 0;
+}
+
+void _mali_osk_break(void)
+{
+	_mali_osk_abort();
+}
+
+u32 _mali_osk_get_pid(void)
+{
+	/* Thread group ID is the process ID on Linux */
+	return (u32)current->tgid;
+}
+
+char *_mali_osk_get_comm(void)
+{
+	return (char *)current->comm;
+}
+
+
+u32 _mali_osk_get_tid(void)
+{
+	/* pid is actually identifying the thread on Linux */
+	u32 tid = current->pid;
+
+	/* If the pid is 0 the core was idle.  Instead of returning 0 we return a special number
+	 * identifying which core we are on. */
+	if (0 == tid) {
+		tid = -(1 + raw_smp_processor_id());
+	}
+
+	return tid;
+}
diff --git a/mali/linux/mali_osk_notification.c b/mali/linux/mali_osk_notification.c
new file mode 100755
index 0000000..5867841
--- /dev/null
+++ b/mali/linux/mali_osk_notification.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_notification.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/**
+ * Declaration of the notification queue object type
+ * Contains a linked list of notification pending delivery to user space.
+ * It also contains a wait queue of exclusive waiters blocked in the ioctl
+ * When a new notification is posted a single thread is resumed.
+ */
+struct _mali_osk_notification_queue_t_struct {
+	spinlock_t mutex; /**< Mutex protecting the list */
+	wait_queue_head_t receive_queue; /**< Threads waiting for new entries to the queue */
+	struct list_head head; /**< List of notifications waiting to be picked up */
+};
+
+typedef struct _mali_osk_notification_wrapper_t_struct {
+	struct list_head list;           /**< Internal linked list variable */
+	_mali_osk_notification_t data;   /**< Notification data */
+} _mali_osk_notification_wrapper_t;
+
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void)
+{
+	_mali_osk_notification_queue_t         *result;
+
+	result = (_mali_osk_notification_queue_t *)kmalloc(sizeof(_mali_osk_notification_queue_t), GFP_KERNEL);
+	if (NULL == result) return NULL;
+
+	spin_lock_init(&result->mutex);
+	init_waitqueue_head(&result->receive_queue);
+	INIT_LIST_HEAD(&result->head);
+
+	return result;
+}
+
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size)
+{
+	/* OPT Recycling of notification objects */
+	_mali_osk_notification_wrapper_t *notification;
+
+	notification = (_mali_osk_notification_wrapper_t *)kmalloc(sizeof(_mali_osk_notification_wrapper_t) + size,
+			GFP_KERNEL | __GFP_HIGH | __GFP_REPEAT);
+	if (NULL == notification) {
+		MALI_DEBUG_PRINT(1, ("Failed to create a notification object\n"));
+		return NULL;
+	}
+
+	/* Init the list */
+	INIT_LIST_HEAD(&notification->list);
+
+	if (0 != size) {
+		notification->data.result_buffer = ((u8 *)notification) + sizeof(_mali_osk_notification_wrapper_t);
+	} else {
+		notification->data.result_buffer = NULL;
+	}
+
+	/* set up the non-allocating fields */
+	notification->data.notification_type = type;
+	notification->data.result_buffer_size = size;
+
+	/* all ok */
+	return &(notification->data);
+}
+
+void _mali_osk_notification_delete(_mali_osk_notification_t *object)
+{
+	_mali_osk_notification_wrapper_t *notification;
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+	/* Free the container */
+	kfree(notification);
+}
+
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue)
+{
+	_mali_osk_notification_t *result;
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	while (_MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, &result)) {
+		_mali_osk_notification_delete(result);
+	}
+
+	/* not much to do, just free the memory */
+	kfree(queue);
+}
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_notification_wrapper_t *notification;
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	list_add_tail(&notification->list, &queue->head);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	/* and wake up one possible exclusive waiter */
+	wake_up(&queue->receive_queue);
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	_mali_osk_notification_wrapper_t *wrapper_object;
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	if (!list_empty(&queue->head)) {
+		wrapper_object = list_entry(queue->head.next, _mali_osk_notification_wrapper_t, list);
+		*result = &(wrapper_object->data);
+		list_del_init(&wrapper_object->list);
+		ret = _MALI_OSK_ERR_OK;
+	}
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(result);
+
+	/* default result */
+	*result = NULL;
+
+	if (wait_event_interruptible(queue->receive_queue,
+				     _MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, result))) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
diff --git a/mali/linux/mali_osk_pm.c b/mali/linux/mali_osk_pm.c
new file mode 100755
index 0000000..6a579ad
--- /dev/null
+++ b/mali/linux/mali_osk_pm.c
@@ -0,0 +1,83 @@
+/**
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_pm.c
+ * Implementation of the callback functions from common power management
+ */
+
+#include <linux/sched.h>
+
+#include "mali_kernel_linux.h"
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_PM_RUNTIME */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/* Can NOT run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get_sync(&(mali_platform_device->dev));
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get_sync() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Can run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get(&(mali_platform_device->dev));
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err && -EINPROGRESS != err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* Can run in atomic context */
+void _mali_osk_pm_dev_ref_put(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+	pm_runtime_put_autosuspend(&(mali_platform_device->dev));
+#else
+	pm_runtime_put(&(mali_platform_device->dev));
+#endif
+#endif
+}
+
+void _mali_osk_pm_dev_barrier(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	pm_runtime_barrier(&(mali_platform_device->dev));
+#endif
+}
diff --git a/mali/linux/mali_osk_profiling.c b/mali/linux/mali_osk_profiling.c
new file mode 100755
index 0000000..35d1abc
--- /dev/null
+++ b/mali/linux/mali_osk_profiling.c
@@ -0,0 +1,1282 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched.h>
+
+#include <mali_profiling_gator_api.h>
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_osk_profiling.h"
+#include "mali_linux_trace.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_user_settings_db.h"
+#include "mali_executor.h"
+#include "mali_memory_manager.h"
+
+#define MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE 100
+#define MALI_PROFILING_STREAM_HOLD_TIME 1000000         /*1 ms */
+
+#define MALI_PROFILING_STREAM_BUFFER_SIZE       (1 << 12)
+#define MALI_PROFILING_STREAM_BUFFER_NUM        100
+
+/**
+ * Define the mali profiling stream struct.
+ */
+typedef struct mali_profiling_stream {
+	u8 data[MALI_PROFILING_STREAM_BUFFER_SIZE];
+	u32 used_size;
+	struct list_head list;
+} mali_profiling_stream;
+
+typedef struct mali_profiling_stream_list {
+	spinlock_t spin_lock;
+	struct list_head free_list;
+	struct list_head queue_list;
+} mali_profiling_stream_list;
+
+static const char mali_name[] = "4xx";
+static const char utgard_setup_version[] = "ANNOTATE_SETUP 1\n";
+
+static u32 profiling_sample_rate = 0;
+static u32 first_sw_counter_index = 0;
+
+static mali_bool l2_cache_counter_if_enabled = MALI_FALSE;
+static u32 num_counters_enabled = 0;
+static u32 mem_counters_enabled = 0;
+
+static _mali_osk_atomic_t stream_fd_if_used;
+
+static wait_queue_head_t stream_fd_wait_queue;
+static mali_profiling_counter *global_mali_profiling_counters = NULL;
+static u32 num_global_mali_profiling_counters = 0;
+
+static mali_profiling_stream_list *global_mali_stream_list = NULL;
+static mali_profiling_stream *mali_counter_stream = NULL;
+static mali_profiling_stream *mali_core_activity_stream = NULL;
+static u64 mali_core_activity_stream_dequeue_time = 0;
+static spinlock_t mali_activity_lock;
+static u32 mali_activity_cores_num =  0;
+static struct hrtimer profiling_sampling_timer;
+
+const char *_mali_mem_counter_descriptions[] = _MALI_MEM_COUTNER_DESCRIPTIONS;
+const char *_mali_special_counter_descriptions[] = _MALI_SPCIAL_COUNTER_DESCRIPTIONS;
+
+static u32 current_profiling_pid = 0;
+
+static void _mali_profiling_stream_list_destory(mali_profiling_stream_list *profiling_stream_list)
+{
+	mali_profiling_stream *profiling_stream, *tmp_profiling_stream;
+	MALI_DEBUG_ASSERT_POINTER(profiling_stream_list);
+
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->free_list, list) {
+		list_del(&profiling_stream->list);
+		kfree(profiling_stream);
+	}
+
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->queue_list, list) {
+		list_del(&profiling_stream->list);
+		kfree(profiling_stream);
+	}
+
+	kfree(profiling_stream_list);
+}
+
+static void _mali_profiling_global_stream_list_free(void)
+{
+	mali_profiling_stream *profiling_stream, *tmp_profiling_stream;
+	unsigned long irq_flags;
+
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &global_mali_stream_list->queue_list, list) {
+		profiling_stream->used_size = 0;
+		list_move(&profiling_stream->list, &global_mali_stream_list->free_list);
+	}
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+}
+
+static _mali_osk_errcode_t _mali_profiling_global_stream_list_dequeue(struct list_head *stream_list, mali_profiling_stream **new_mali_profiling_stream)
+{
+	unsigned long irq_flags;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	MALI_DEBUG_ASSERT_POINTER(stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+
+	if (!list_empty(stream_list)) {
+		*new_mali_profiling_stream = list_entry(stream_list->next, mali_profiling_stream, list);
+		list_del_init(&(*new_mali_profiling_stream)->list);
+	} else {
+		ret = _MALI_OSK_ERR_NOMEM;
+	}
+
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+
+	return ret;
+}
+
+static void _mali_profiling_global_stream_list_queue(struct list_head *stream_list, mali_profiling_stream *current_mali_profiling_stream)
+{
+	unsigned long irq_flags;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	MALI_DEBUG_ASSERT_POINTER(stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	list_add_tail(&current_mali_profiling_stream->list, stream_list);
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+}
+
+static mali_bool _mali_profiling_global_stream_queue_list_if_empty(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	return list_empty(&global_mali_stream_list->queue_list);
+}
+
+static u32 _mali_profiling_global_stream_queue_list_next_size(void)
+{
+	unsigned long irq_flags;
+	u32 size = 0;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	if (!list_empty(&global_mali_stream_list->queue_list)) {
+		mali_profiling_stream *next_mali_profiling_stream =
+			list_entry(global_mali_stream_list->queue_list.next, mali_profiling_stream, list);
+		size = next_mali_profiling_stream->used_size;
+	}
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+	return size;
+}
+
+/* The mali profiling stream file operations functions. */
+static ssize_t _mali_profiling_stream_read(
+	struct file *filp,
+	char __user *buffer,
+	size_t      size,
+	loff_t      *f_pos);
+
+static unsigned int  _mali_profiling_stream_poll(struct file *filp, poll_table *wait);
+
+static int  _mali_profiling_stream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations mali_profiling_stream_fops = {
+	.release = _mali_profiling_stream_release,
+	.read    = _mali_profiling_stream_read,
+	.poll    = _mali_profiling_stream_poll,
+};
+
+static ssize_t _mali_profiling_stream_read(
+	struct file *filp,
+	char __user *buffer,
+	size_t      size,
+	loff_t      *f_pos)
+{
+	u32 copy_len = 0;
+	mali_profiling_stream *current_mali_profiling_stream;
+	u32 used_size;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	while (!_mali_profiling_global_stream_queue_list_if_empty()) {
+		used_size = _mali_profiling_global_stream_queue_list_next_size();
+		if (used_size <= ((u32)size - copy_len)) {
+			current_mali_profiling_stream = NULL;
+			_mali_profiling_global_stream_list_dequeue(&global_mali_stream_list->queue_list,
+					&current_mali_profiling_stream);
+			MALI_DEBUG_ASSERT_POINTER(current_mali_profiling_stream);
+			if (copy_to_user(&buffer[copy_len], current_mali_profiling_stream->data, current_mali_profiling_stream->used_size)) {
+				current_mali_profiling_stream->used_size = 0;
+				_mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream);
+				return -EFAULT;
+			}
+			copy_len += current_mali_profiling_stream->used_size;
+			current_mali_profiling_stream->used_size = 0;
+			_mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream);
+		} else {
+			break;
+		}
+	}
+	return (ssize_t)copy_len;
+}
+
+static unsigned int  _mali_profiling_stream_poll(struct file *filp, poll_table *wait)
+{
+	poll_wait(filp, &stream_fd_wait_queue, wait);
+	if (!_mali_profiling_global_stream_queue_list_if_empty())
+		return POLLIN;
+	return 0;
+}
+
+static int  _mali_profiling_stream_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_atomic_init(&stream_fd_if_used, 0);
+	return 0;
+}
+
+/* The funs for control packet and stream data.*/
+static void _mali_profiling_set_packet_size(unsigned char *const buf, const u32 size)
+{
+	u32 i;
+
+	for (i = 0; i < sizeof(size); ++i)
+		buf[i] = (size >> 8 * i) & 0xFF;
+}
+
+static u32 _mali_profiling_get_packet_size(unsigned char *const buf)
+{
+	u32 i;
+	u32 size = 0;
+	for (i = 0; i < sizeof(size); ++i)
+		size |= (u32)buf[i] << 8 * i;
+	return size;
+}
+
+static u32 _mali_profiling_read_packet_int(unsigned char *const buf, u32 *const pos, u32 const packet_size)
+{
+	u64 int_value = 0;
+	u8 shift = 0;
+	u8 byte_value = ~0;
+
+	while ((byte_value & 0x80) != 0) {
+		if ((*pos) >= packet_size)
+			return -1;
+		byte_value = buf[*pos];
+		*pos += 1;
+		int_value |= (u32)(byte_value & 0x7f) << shift;
+		shift += 7;
+	}
+
+	if (shift < 8 * sizeof(int_value) && (byte_value & 0x40) != 0) {
+		int_value |= -(1 << shift);
+	}
+
+	return int_value;
+}
+
+static u32 _mali_profiling_pack_int(u8 *const buf, u32 const buf_size, u32 const pos, s32 value)
+{
+	u32 add_bytes = 0;
+	int more = 1;
+	while (more) {
+		/* low order 7 bits of val */
+		char byte_value = value & 0x7f;
+		value >>= 7;
+
+		if ((value == 0 && (byte_value & 0x40) == 0) || (value == -1 && (byte_value & 0x40) != 0)) {
+			more = 0;
+		} else {
+			byte_value |= 0x80;
+		}
+
+		if ((pos + add_bytes) >= buf_size)
+			return 0;
+		buf[pos + add_bytes] = byte_value;
+		add_bytes++;
+	}
+
+	return add_bytes;
+}
+
+static int _mali_profiling_pack_long(uint8_t *const buf, u32 const buf_size, u32 const pos, s64 val)
+{
+	int add_bytes = 0;
+	int more = 1;
+	while (more) {
+		/* low order 7 bits of x */
+		char byte_value = val & 0x7f;
+		val >>= 7;
+
+		if ((val == 0 && (byte_value & 0x40) == 0) || (val == -1 && (byte_value & 0x40) != 0)) {
+			more = 0;
+		} else {
+			byte_value |= 0x80;
+		}
+
+		MALI_DEBUG_ASSERT((pos + add_bytes) < buf_size);
+		buf[pos + add_bytes] = byte_value;
+		add_bytes++;
+	}
+
+	return add_bytes;
+}
+
+static void _mali_profiling_stream_add_counter(mali_profiling_stream *profiling_stream, s64 current_time, u32 key, u32 counter_value)
+{
+	u32 add_size = STREAM_HEADER_SIZE;
+	MALI_DEBUG_ASSERT_POINTER(profiling_stream);
+	MALI_DEBUG_ASSERT((profiling_stream->used_size) < MALI_PROFILING_STREAM_BUFFER_SIZE);
+
+	profiling_stream->data[profiling_stream->used_size] = STREAM_HEADER_COUNTER_VALUE;
+
+	add_size += _mali_profiling_pack_long(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					      profiling_stream->used_size + add_size, current_time);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)0);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)key);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)counter_value);
+
+	_mali_profiling_set_packet_size(profiling_stream->data + profiling_stream->used_size + 1,
+					add_size - STREAM_HEADER_SIZE);
+
+	profiling_stream->used_size += add_size;
+}
+
+/* The callback function for sampling timer.*/
+static enum hrtimer_restart  _mali_profiling_sampling_counters(struct hrtimer *timer)
+{
+	u32 counter_index;
+	s64 current_time;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_profiling_counters);
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	MALI_DEBUG_ASSERT(NULL == mali_counter_stream);
+	if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue(
+		    &global_mali_stream_list->free_list, &mali_counter_stream)) {
+
+		MALI_DEBUG_ASSERT_POINTER(mali_counter_stream);
+		MALI_DEBUG_ASSERT(0 == mali_counter_stream->used_size);
+
+		/* Capture l2 cache counter values if enabled */
+		if (MALI_TRUE == l2_cache_counter_if_enabled) {
+			int i, j = 0;
+			_mali_profiling_l2_counter_values l2_counters_values;
+			_mali_profiling_get_l2_counters(&l2_counters_values);
+
+			for (i  = COUNTER_L2_0_C0; i <= COUNTER_L2_2_C1; i++) {
+				if (0 == (j % 2))
+					_mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value0);
+				else
+					_mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value1);
+				j++;
+			}
+		}
+
+		current_time = (s64)_mali_osk_boot_time_get_ns();
+
+		/* Add all enabled counter values into stream */
+		for (counter_index = 0; counter_index < num_global_mali_profiling_counters; counter_index++) {
+			/* No need to sample these couners here. */
+			if (global_mali_profiling_counters[counter_index].enabled) {
+				if ((global_mali_profiling_counters[counter_index].counter_id >= FIRST_MEM_COUNTER &&
+				     global_mali_profiling_counters[counter_index].counter_id <= LAST_MEM_COUNTER)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_VP_ACTIVITY)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FP_ACTIVITY)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FILMSTRIP)) {
+
+					continue;
+				}
+
+				if (global_mali_profiling_counters[counter_index].counter_id >= COUNTER_L2_0_C0 &&
+				    global_mali_profiling_counters[counter_index].counter_id <= COUNTER_L2_2_C1) {
+
+					u32 prev_val = global_mali_profiling_counters[counter_index].prev_counter_value;
+
+					_mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key,
+									   global_mali_profiling_counters[counter_index].current_counter_value - prev_val);
+
+					prev_val = global_mali_profiling_counters[counter_index].current_counter_value;
+
+					global_mali_profiling_counters[counter_index].prev_counter_value = prev_val;
+				} else {
+
+					if (global_mali_profiling_counters[counter_index].counter_id == COUNTER_TOTAL_ALLOC_PAGES) {
+						u32 total_alloc_mem = _mali_ukk_report_memory_usage();
+						global_mali_profiling_counters[counter_index].current_counter_value = total_alloc_mem / _MALI_OSK_MALI_PAGE_SIZE;
+					}
+					_mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key,
+									   global_mali_profiling_counters[counter_index].current_counter_value);
+					if (global_mali_profiling_counters[counter_index].counter_id < FIRST_SPECIAL_COUNTER)
+						global_mali_profiling_counters[counter_index].current_counter_value = 0;
+				}
+			}
+		}
+		_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_counter_stream);
+		mali_counter_stream = NULL;
+	} else {
+		MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n"));
+	}
+
+	wake_up_interruptible(&stream_fd_wait_queue);
+
+	/*Enable the sampling timer again*/
+	if (0 != num_counters_enabled && 0 != profiling_sample_rate) {
+		hrtimer_forward_now(&profiling_sampling_timer, ns_to_ktime(profiling_sample_rate));
+		return HRTIMER_RESTART;
+	}
+	return HRTIMER_NORESTART;
+}
+
+static void _mali_profiling_sampling_core_activity_switch(int counter_id, int core, u32 activity, u32 pid)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&mali_activity_lock, irq_flags);
+	if (activity == 0)
+		mali_activity_cores_num--;
+	else
+		mali_activity_cores_num++;
+	spin_unlock_irqrestore(&mali_activity_lock, irq_flags);
+
+	if (NULL != global_mali_profiling_counters) {
+		int i ;
+		for (i = 0; i < num_global_mali_profiling_counters; i++) {
+			if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) {
+				u64 current_time = _mali_osk_boot_time_get_ns();
+				u32 add_size = STREAM_HEADER_SIZE;
+
+				if (NULL != mali_core_activity_stream) {
+					if ((mali_core_activity_stream_dequeue_time +  MALI_PROFILING_STREAM_HOLD_TIME < current_time) ||
+					    (MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE > MALI_PROFILING_STREAM_BUFFER_SIZE
+					     - mali_core_activity_stream->used_size)) {
+						_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream);
+						mali_core_activity_stream = NULL;
+						wake_up_interruptible(&stream_fd_wait_queue);
+					}
+				}
+
+				if (NULL == mali_core_activity_stream) {
+					if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue(
+						    &global_mali_stream_list->free_list, &mali_core_activity_stream)) {
+						mali_core_activity_stream_dequeue_time = current_time;
+					} else {
+						MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n"));
+						wake_up_interruptible(&stream_fd_wait_queue);
+						break;
+					}
+
+				}
+
+				mali_core_activity_stream->data[mali_core_activity_stream->used_size] = STREAM_HEADER_CORE_ACTIVITY;
+
+				add_size += _mali_profiling_pack_long(mali_core_activity_stream->data,
+								      MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s64)current_time);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, core);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s32)global_mali_profiling_counters[i].key);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, activity);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, pid);
+
+				_mali_profiling_set_packet_size(mali_core_activity_stream->data + mali_core_activity_stream->used_size + 1,
+								add_size - STREAM_HEADER_SIZE);
+
+				mali_core_activity_stream->used_size += add_size;
+
+				if (0 == mali_activity_cores_num) {
+					_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream);
+					mali_core_activity_stream = NULL;
+					wake_up_interruptible(&stream_fd_wait_queue);
+				}
+
+				break;
+			}
+		}
+	}
+}
+
+static mali_bool _mali_profiling_global_counters_init(void)
+{
+	int core_id, counter_index, counter_number, counter_id;
+	u32 num_l2_cache_cores;
+	u32 num_pp_cores;
+	u32 num_gp_cores = 1;
+
+	MALI_DEBUG_ASSERT(NULL == global_mali_profiling_counters);
+	num_pp_cores = mali_pp_get_glob_num_pp_cores();
+	num_l2_cache_cores =    mali_l2_cache_core_get_glob_num_l2_cores();
+
+	num_global_mali_profiling_counters = 3 * (num_gp_cores + num_pp_cores) + 2 * num_l2_cache_cores
+					     + MALI_PROFILING_SW_COUNTERS_NUM
+					     + MALI_PROFILING_SPECIAL_COUNTERS_NUM
+					     + MALI_PROFILING_MEM_COUNTERS_NUM;
+	global_mali_profiling_counters = _mali_osk_calloc(num_global_mali_profiling_counters, sizeof(mali_profiling_counter));
+
+	if (NULL == global_mali_profiling_counters)
+		return MALI_FALSE;
+
+	counter_index = 0;
+	/*Vertex processor counters */
+	for (core_id = 0; core_id < num_gp_cores; core_id ++) {
+		global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_VP_0 + core_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_active", mali_name, core_id);
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* Fragment processors' counters */
+	for (core_id = 0; core_id < num_pp_cores; core_id++) {
+		counter_index++;
+		global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_FP_0 + core_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_active", mali_name, core_id);
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* L2 Cache counters */
+	for (core_id = 0; core_id < num_l2_cache_cores; core_id++) {
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* Now set up the software counter entries */
+	for (counter_id = FIRST_SW_COUNTER; counter_id <= LAST_SW_COUNTER; counter_id++) {
+		counter_index++;
+
+		if (0 == first_sw_counter_index)
+			first_sw_counter_index = counter_index;
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_SW_%d", mali_name, counter_id - FIRST_SW_COUNTER);
+	}
+
+	/* Now set up the special counter entries */
+	for (counter_id = FIRST_SPECIAL_COUNTER; counter_id <= LAST_SPECIAL_COUNTER; counter_id++) {
+
+		counter_index++;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s",
+				   mali_name, _mali_special_counter_descriptions[counter_id - FIRST_SPECIAL_COUNTER]);
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+	}
+
+	/* Now set up the mem counter entries*/
+	for (counter_id = FIRST_MEM_COUNTER; counter_id <= LAST_MEM_COUNTER; counter_id++) {
+
+		counter_index++;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s",
+				   mali_name, _mali_mem_counter_descriptions[counter_id - FIRST_MEM_COUNTER]);
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+	}
+
+	MALI_DEBUG_ASSERT((counter_index + 1) == num_global_mali_profiling_counters);
+
+	return MALI_TRUE;
+}
+
+void _mali_profiling_notification_mem_counter(struct mali_session_data *session, u32 counter_id, u32 key, int enable)
+{
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL != session) {
+		_mali_osk_notification_t *notification;
+		_mali_osk_notification_queue_t *queue;
+
+		queue = session->ioctl_queue;
+		MALI_DEBUG_ASSERT(NULL != queue);
+
+		notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER,
+				sizeof(_mali_uk_annotate_profiling_mem_counter_s));
+
+		if (NULL != notification) {
+			_mali_uk_annotate_profiling_mem_counter_s *data = notification->result_buffer;
+			data->counter_id = counter_id;
+			data->key = key;
+			data->enable = enable;
+
+			_mali_osk_notification_queue_send(queue, notification);
+		} else {
+			MALI_PRINT_ERROR(("Failed to create notification object!\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("Failed to find the right session!\n"));
+	}
+}
+
+void _mali_profiling_notification_enable(struct mali_session_data *session, u32 sampling_rate, int enable)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL != session) {
+		_mali_osk_notification_t *notification;
+		_mali_osk_notification_queue_t *queue;
+
+		queue = session->ioctl_queue;
+		MALI_DEBUG_ASSERT(NULL != queue);
+
+		notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE,
+				sizeof(_mali_uk_annotate_profiling_enable_s));
+
+		if (NULL != notification) {
+			_mali_uk_annotate_profiling_enable_s *data = notification->result_buffer;
+			data->sampling_rate = sampling_rate;
+			data->enable = enable;
+
+			_mali_osk_notification_queue_send(queue, notification);
+		} else {
+			MALI_PRINT_ERROR(("Failed to create notification object!\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("Failed to find the right session!\n"));
+	}
+}
+
+
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start)
+{
+	int i;
+	mali_profiling_stream *new_mali_profiling_stream = NULL;
+	mali_profiling_stream_list *new_mali_profiling_stream_list = NULL;
+	if (MALI_TRUE == auto_start) {
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+	}
+
+	/*Init the global_mali_stream_list*/
+	MALI_DEBUG_ASSERT(NULL == global_mali_stream_list);
+	new_mali_profiling_stream_list = (mali_profiling_stream_list *)kmalloc(sizeof(mali_profiling_stream_list), GFP_KERNEL);
+
+	if (NULL == new_mali_profiling_stream_list) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	spin_lock_init(&new_mali_profiling_stream_list->spin_lock);
+	INIT_LIST_HEAD(&new_mali_profiling_stream_list->free_list);
+	INIT_LIST_HEAD(&new_mali_profiling_stream_list->queue_list);
+
+	spin_lock_init(&mali_activity_lock);
+	mali_activity_cores_num =  0;
+
+	for (i = 0; i < MALI_PROFILING_STREAM_BUFFER_NUM; i++) {
+		new_mali_profiling_stream = (mali_profiling_stream *)kmalloc(sizeof(mali_profiling_stream), GFP_KERNEL);
+		if (NULL == new_mali_profiling_stream) {
+			_mali_profiling_stream_list_destory(new_mali_profiling_stream_list);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+
+		INIT_LIST_HEAD(&new_mali_profiling_stream->list);
+		new_mali_profiling_stream->used_size = 0;
+		list_add_tail(&new_mali_profiling_stream->list, &new_mali_profiling_stream_list->free_list);
+
+	}
+
+	_mali_osk_atomic_init(&stream_fd_if_used, 0);
+	init_waitqueue_head(&stream_fd_wait_queue);
+
+	hrtimer_init(&profiling_sampling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	profiling_sampling_timer.function = _mali_profiling_sampling_counters;
+
+	global_mali_stream_list = new_mali_profiling_stream_list;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_profiling_term(void)
+{
+	if (0 != profiling_sample_rate) {
+		hrtimer_cancel(&profiling_sampling_timer);
+		profiling_sample_rate = 0;
+	}
+	_mali_osk_atomic_term(&stream_fd_if_used);
+
+	if (NULL != global_mali_profiling_counters) {
+		_mali_osk_free(global_mali_profiling_counters);
+		global_mali_profiling_counters = NULL;
+		num_global_mali_profiling_counters = 0;
+	}
+
+	if (NULL != global_mali_stream_list) {
+		_mali_profiling_stream_list_destory(global_mali_stream_list);
+		global_mali_stream_list = NULL;
+	}
+
+}
+
+void _mali_osk_profiling_stop_sampling(u32 pid)
+{
+	if (pid == current_profiling_pid) {
+
+		int i;
+		/* Reset all counter states when closing connection.*/
+		for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+			_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER);
+			global_mali_profiling_counters[i].enabled = 0;
+			global_mali_profiling_counters[i].prev_counter_value = 0;
+			global_mali_profiling_counters[i].current_counter_value = 0;
+		}
+		l2_cache_counter_if_enabled = MALI_FALSE;
+		num_counters_enabled = 0;
+		mem_counters_enabled = 0;
+		_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0);
+		_mali_profiling_control(SW_COUNTER_ENABLE, 0);
+		/* Delete sampling timer when closing connection. */
+		if (0 != profiling_sample_rate) {
+			hrtimer_cancel(&profiling_sampling_timer);
+			profiling_sample_rate = 0;
+		}
+		current_profiling_pid = 0;
+	}
+}
+
+void    _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	/*Record the freq & volt to global_mali_profiling_counters here. */
+	if (0 != profiling_sample_rate) {
+		u32 channel;
+		u32 state;
+		channel = (event_id >> 16) & 0xFF;
+		state = ((event_id >> 24) & 0xF) << 24;
+
+		switch (state) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GPU >> 16) == channel) {
+				u32 reason = (event_id & 0xFFFF);
+				if (MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE == reason) {
+					_mali_osk_profiling_record_global_counters(COUNTER_FREQUENCY, data0);
+					_mali_osk_profiling_record_global_counters(COUNTER_VOLTAGE, data1);
+				}
+			}
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) {
+				_mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 1, data1);
+			} else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) &&
+				   (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) {
+				u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16);
+				_mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 1, data1);
+			}
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) {
+				_mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 0, 0);
+			} else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) &&
+				   (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) {
+				u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16);
+				_mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 0, 0);
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	trace_mali_timeline_event(event_id, data0, data1, data2, data3, data4);
+}
+
+void _mali_osk_profiling_report_sw_counters(u32 *counters)
+{
+	trace_mali_sw_counters(_mali_osk_get_pid(), _mali_osk_get_tid(), NULL, counters);
+}
+
+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value)
+{
+	if (NULL != global_mali_profiling_counters) {
+		int i ;
+		for (i = 0; i < num_global_mali_profiling_counters; i++) {
+			if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) {
+				global_mali_profiling_counters[i].current_counter_value = value;
+				break;
+			}
+		}
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args)
+{
+	/* Always add process and thread identificator in the first two data elements for events from user space */
+	_mali_osk_profiling_add_event(args->event_id, _mali_osk_get_pid(), _mali_osk_get_tid(), args->data[2], args->data[3], args->data[4]);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args)
+{
+	u32 *counters = (u32 *)(uintptr_t)args->counters;
+
+	_mali_osk_profiling_report_sw_counters(counters);
+
+	if (NULL != global_mali_profiling_counters) {
+		int i;
+		for (i = 0; i < MALI_PROFILING_SW_COUNTERS_NUM; i ++) {
+			if (global_mali_profiling_counters[first_sw_counter_index + i].enabled) {
+				global_mali_profiling_counters[first_sw_counter_index + i].current_counter_value = *(counters + i);
+			}
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (1 == _mali_osk_atomic_inc_return(&stream_fd_if_used)) {
+
+		s32 fd = anon_inode_getfd("[mali_profiling_stream]", &mali_profiling_stream_fops,
+					  session,
+					  O_RDONLY | O_CLOEXEC);
+
+		args->stream_fd = fd;
+		if (0 > fd) {
+			_mali_osk_atomic_dec(&stream_fd_if_used);
+			return _MALI_OSK_ERR_FAULT;
+		}
+		args->stream_fd = fd;
+	} else {
+		_mali_osk_atomic_dec(&stream_fd_if_used);
+		args->stream_fd = -1;
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args)
+{
+	u32 control_packet_size;
+	u32 output_buffer_size;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL == global_mali_profiling_counters && MALI_FALSE == _mali_profiling_global_counters_init()) {
+		MALI_PRINT_ERROR(("Failed to create global_mali_profiling_counters.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	control_packet_size = args->control_packet_size;
+	output_buffer_size = args->response_packet_size;
+
+	if (0 != control_packet_size) {
+		u8 control_type;
+		u8 *control_packet_data;
+		u8 *response_packet_data;
+		u32 version_length = sizeof(utgard_setup_version) - 1;
+
+		control_packet_data = (u8 *)(uintptr_t)args->control_packet_data;
+		MALI_DEBUG_ASSERT_POINTER(control_packet_data);
+		response_packet_data = (u8 *)(uintptr_t)args->response_packet_data;
+		MALI_DEBUG_ASSERT_POINTER(response_packet_data);
+
+		/*Decide if need to ignore Utgard setup version.*/
+		if (control_packet_size >= version_length) {
+			if (0 == memcmp(control_packet_data, utgard_setup_version, version_length)) {
+				if (control_packet_size == version_length) {
+					args->response_packet_size = 0;
+					return _MALI_OSK_ERR_OK;
+				} else {
+					control_packet_data += version_length;
+					control_packet_size -= version_length;
+				}
+			}
+		}
+
+		current_profiling_pid = _mali_osk_get_pid();
+
+		control_type = control_packet_data[0];
+		switch (control_type) {
+		case PACKET_HEADER_COUNTERS_REQUEST: {
+			int i;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size, type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Send supported counters */
+			if (PACKET_HEADER_SIZE > output_buffer_size)
+				return _MALI_OSK_ERR_FAULT;
+
+			*response_packet_data = PACKET_HEADER_COUNTERS_ACK;
+			args->response_packet_size = PACKET_HEADER_SIZE;
+
+			for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+				u32 name_size = strlen(global_mali_profiling_counters[i].counter_name);
+
+				if ((args->response_packet_size + name_size + 1) > output_buffer_size) {
+					MALI_PRINT_ERROR(("Response packet data is too large..\n"));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				memcpy(response_packet_data + args->response_packet_size,
+				       global_mali_profiling_counters[i].counter_name, name_size + 1);
+
+				args->response_packet_size += (name_size + 1);
+
+				if (global_mali_profiling_counters[i].counter_id == COUNTER_VP_ACTIVITY) {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32)1);
+				} else if (global_mali_profiling_counters[i].counter_id == COUNTER_FP_ACTIVITY) {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32)mali_pp_get_glob_num_pp_cores());
+				} else {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32) - 1);
+				}
+			}
+
+			_mali_profiling_set_packet_size(response_packet_data + 1, args->response_packet_size);
+			break;
+		}
+
+		case PACKET_HEADER_COUNTERS_ENABLE: {
+			int i;
+			u32 request_pos = PACKET_HEADER_SIZE;
+			mali_bool sw_counter_if_enabled = MALI_FALSE;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Init all counter states before enable requested counters.*/
+			for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+				_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER);
+				global_mali_profiling_counters[i].enabled = 0;
+				global_mali_profiling_counters[i].prev_counter_value = 0;
+				global_mali_profiling_counters[i].current_counter_value = 0;
+
+				if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER &&
+				    global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) {
+					_mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id, 0, 0);
+				}
+			}
+
+			l2_cache_counter_if_enabled = MALI_FALSE;
+			num_counters_enabled = 0;
+			mem_counters_enabled = 0;
+			_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0);
+			_mali_profiling_control(SW_COUNTER_ENABLE, 0);
+			_mali_profiling_notification_enable(session, 0, 0);
+
+			/* Enable requested counters */
+			while (request_pos < control_packet_size) {
+				u32 begin = request_pos;
+				u32 event;
+				u32 key;
+
+				/* Check the counter name which should be ended with null */
+				while (request_pos < control_packet_size && control_packet_data[request_pos] != '\0') {
+					++request_pos;
+				}
+
+				if (request_pos >= control_packet_size)
+					return _MALI_OSK_ERR_FAULT;
+
+				++request_pos;
+				event = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+				key = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+
+				for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+					u32 name_size = strlen((char *)(control_packet_data + begin));
+
+					if (strncmp(global_mali_profiling_counters[i].counter_name, (char *)(control_packet_data + begin), name_size) == 0) {
+						if (!sw_counter_if_enabled && (FIRST_SW_COUNTER <= global_mali_profiling_counters[i].counter_id
+									       && global_mali_profiling_counters[i].counter_id <= LAST_SW_COUNTER)) {
+							sw_counter_if_enabled = MALI_TRUE;
+							_mali_profiling_control(SW_COUNTER_ENABLE, 1);
+						}
+
+						if (COUNTER_FILMSTRIP == global_mali_profiling_counters[i].counter_id) {
+							_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 1);
+							_mali_profiling_control(FBDUMP_CONTROL_RATE, event & 0xff);
+							_mali_profiling_control(FBDUMP_CONTROL_RESIZE_FACTOR, (event >> 8) & 0xff);
+						}
+
+						if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER &&
+						    global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) {
+							_mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id,
+									key, 1);
+							mem_counters_enabled++;
+						}
+
+						global_mali_profiling_counters[i].counter_event = event;
+						global_mali_profiling_counters[i].key = key;
+						global_mali_profiling_counters[i].enabled = 1;
+
+						_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id,
+									  global_mali_profiling_counters[i].counter_event);
+						num_counters_enabled++;
+						break;
+					}
+				}
+
+				if (i == num_global_mali_profiling_counters) {
+					MALI_PRINT_ERROR(("Counter name does not match for type %u.\n", control_type));
+					return _MALI_OSK_ERR_FAULT;
+				}
+			}
+
+			if (PACKET_HEADER_SIZE <= output_buffer_size) {
+				*response_packet_data = PACKET_HEADER_ACK;
+				_mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE);
+				args->response_packet_size = PACKET_HEADER_SIZE;
+			} else {
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			break;
+		}
+
+		case PACKET_HEADER_START_CAPTURE_VALUE: {
+			u32 live_rate;
+			u32 request_pos = PACKET_HEADER_SIZE;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Read samping rate in nanoseconds and live rate, start capture.*/
+			profiling_sample_rate =  _mali_profiling_read_packet_int(control_packet_data,
+						 &request_pos, control_packet_size);
+
+			live_rate = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+
+			if (PACKET_HEADER_SIZE <= output_buffer_size) {
+				*response_packet_data = PACKET_HEADER_ACK;
+				_mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE);
+				args->response_packet_size = PACKET_HEADER_SIZE;
+			} else {
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			if (0 != num_counters_enabled && 0 != profiling_sample_rate) {
+				_mali_profiling_global_stream_list_free();
+				if (mem_counters_enabled > 0) {
+					_mali_profiling_notification_enable(session, profiling_sample_rate, 1);
+				}
+				hrtimer_start(&profiling_sampling_timer,
+					      ktime_set(profiling_sample_rate / 1000000000, profiling_sample_rate % 1000000000),
+					      HRTIMER_MODE_REL_PINNED);
+			}
+
+			break;
+		}
+		default:
+			MALI_PRINT_ERROR(("Unsupported  profiling packet header type %u.\n", control_type));
+			args->response_packet_size  = 0;
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else {
+		_mali_osk_profiling_stop_sampling(current_profiling_pid);
+		_mali_profiling_notification_enable(session, 0, 0);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Called by gator.ko to set HW counters
+ *
+ * @param counter_id The counter ID.
+ * @param event_id Event ID that the counter should count (HW counter value from TRM).
+ *
+ * @return 1 on success, 0 on failure.
+ */
+int _mali_profiling_set_event(u32 counter_id, s32 event_id)
+{
+	if (COUNTER_VP_0_C0 == counter_id) {
+		mali_gp_job_set_gp_counter_src0(event_id);
+	} else if (COUNTER_VP_0_C1 == counter_id) {
+		mali_gp_job_set_gp_counter_src1(event_id);
+	} else if (COUNTER_FP_0_C0 <= counter_id && COUNTER_FP_7_C1 >= counter_id) {
+		/*
+		 * Two compatibility notes for this function:
+		 *
+		 * 1) Previously the DDK allowed per core counters.
+		 *
+		 *    This did not make much sense on Mali-450 with the "virtual PP core" concept,
+		 *    so this option was removed, and only the same pair of HW counters was allowed on all cores,
+		 *    beginning with r3p2 release.
+		 *
+		 *    Starting with r4p0, it is now possible to set different HW counters for the different sub jobs.
+		 *    This should be almost the same, since sub job 0 is designed to run on core 0,
+		 *    sub job 1 on core 1, and so on.
+		 *
+		 *    The scheduling of PP sub jobs is not predictable, and this often led to situations where core 0 ran 2
+		 *    sub jobs, while for instance core 1 ran zero. Having the counters set per sub job would thus increase
+		 *    the predictability of the returned data (as you would be guaranteed data for all the selected HW counters).
+		 *
+		 *    PS: Core scaling needs to be disabled in order to use this reliably (goes for both solutions).
+		 *
+		 *    The framework/#defines with Gator still indicates that the counter is for a particular core,
+		 *    but this is internally used as a sub job ID instead (no translation needed).
+		 *
+		 *  2) Global/default vs per sub job counters
+		 *
+		 *     Releases before r3p2 had only per PP core counters.
+		 *     r3p2 releases had only one set of default/global counters which applied to all PP cores
+		 *     Starting with r4p0, we have both a set of default/global counters,
+		 *     and individual counters per sub job (equal to per core).
+		 *
+		 *     To keep compatibility with Gator/DS-5/streamline, the following scheme is used:
+		 *
+		 *     r3p2 release; only counters set for core 0 is handled,
+		 *     this is applied as the default/global set of counters, and will thus affect all cores.
+		 *
+		 *     r4p0 release; counters set for core 0 is applied as both the global/default set of counters,
+		 *     and counters for sub job 0.
+		 *     Counters set for core 1-7 is only applied for the corresponding sub job.
+		 *
+		 *     This should allow the DS-5/Streamline GUI to have a simple mode where it only allows setting the
+		 *     values for core 0, and thus this will be applied to all PP sub jobs/cores.
+		 *     Advanced mode will also be supported, where individual pairs of HW counters can be selected.
+		 *
+		 *     The GUI will (until it is updated) still refer to cores instead of sub jobs, but this is probably
+		 *     something we can live with!
+		 *
+		 *     Mali-450 note: Each job is not divided into a deterministic number of sub jobs, as the HW DLBU
+		 *     automatically distributes the load between whatever number of cores is available at this particular time.
+		 *     A normal PP job on Mali-450 is thus considered a single (virtual) job, and it will thus only be possible
+		 *     to use a single pair of HW counters (even if the job ran on multiple PP cores).
+		 *     In other words, only the global/default pair of PP HW counters will be used for normal Mali-450 jobs.
+		 */
+		u32 sub_job = (counter_id - COUNTER_FP_0_C0) >> 1;
+		u32 counter_src = (counter_id - COUNTER_FP_0_C0) & 1;
+		if (0 == counter_src) {
+			mali_pp_job_set_pp_counter_sub_job_src0(sub_job, event_id);
+			if (0 == sub_job) {
+				mali_pp_job_set_pp_counter_global_src0(event_id);
+			}
+		} else {
+			mali_pp_job_set_pp_counter_sub_job_src1(sub_job, event_id);
+			if (0 == sub_job) {
+				mali_pp_job_set_pp_counter_global_src1(event_id);
+			}
+		}
+	} else if (COUNTER_L2_0_C0 <= counter_id && COUNTER_L2_2_C1 >= counter_id) {
+		u32 core_id = (counter_id - COUNTER_L2_0_C0) >> 1;
+		struct mali_l2_cache_core *l2_cache_core = mali_l2_cache_core_get_glob_l2_core(core_id);
+
+		if (NULL != l2_cache_core) {
+			u32 counter_src = (counter_id - COUNTER_L2_0_C0) & 1;
+			mali_l2_cache_core_set_counter_src(l2_cache_core,
+							   counter_src, event_id);
+			l2_cache_counter_if_enabled = MALI_TRUE;
+		}
+	} else {
+		return 0; /* Failure, unknown event */
+	}
+
+	return 1; /* success */
+}
+
+/**
+ * Called by gator.ko to retrieve the L2 cache counter values for all L2 cache cores.
+ * The L2 cache counters are unique in that they are polled by gator, rather than being
+ * transmitted via the tracepoint mechanism.
+ *
+ * @param values Pointer to a _mali_profiling_l2_counter_values structure where
+ *               the counter sources and values will be output
+ * @return 0 if all went well; otherwise, return the mask with the bits set for the powered off cores
+ */
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values)
+{
+	u32 l2_cores_num = mali_l2_cache_core_get_glob_num_l2_cores();
+	u32 i;
+
+	MALI_DEBUG_ASSERT(l2_cores_num <= 3);
+
+	for (i = 0; i < l2_cores_num; i++) {
+		struct mali_l2_cache_core *l2_cache = mali_l2_cache_core_get_glob_l2_core(i);
+
+		if (NULL == l2_cache) {
+			continue;
+		}
+
+		mali_l2_cache_core_get_counter_values(l2_cache,
+						      &values->cores[i].source0,
+						      &values->cores[i].value0,
+						      &values->cores[i].source1,
+						      &values->cores[i].value1);
+	}
+
+	return 0;
+}
+
+/**
+ * Called by gator to control the production of profiling information at runtime.
+ */
+void _mali_profiling_control(u32 action, u32 value)
+{
+	switch (action) {
+	case FBDUMP_CONTROL_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED, (value == 0 ? MALI_FALSE : MALI_TRUE));
+		break;
+	case FBDUMP_CONTROL_RATE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES, value);
+		break;
+	case SW_COUNTER_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED, value);
+		break;
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR, value);
+		break;
+	default:
+		break;  /* Ignore unimplemented actions */
+	}
+}
+
+/**
+ * Called by gator to get mali api version.
+ */
+u32 _mali_profiling_get_api_version(void)
+{
+	return MALI_PROFILING_API_VERSION;
+}
+
+/**
+* Called by gator to get the data about Mali instance in use:
+* product id, version, number of cores
+*/
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values)
+{
+	values->mali_product_id = (u32)mali_kernel_core_get_product_id();
+	values->mali_version_major = mali_kernel_core_get_gpu_major_version();
+	values->mali_version_minor = mali_kernel_core_get_gpu_minor_version();
+	values->num_of_l2_cores = mali_l2_cache_core_get_glob_num_l2_cores();
+	values->num_of_fp_cores = mali_executor_get_num_cores_total();
+	values->num_of_vp_cores = 1;
+}
+
+
+EXPORT_SYMBOL(_mali_profiling_set_event);
+EXPORT_SYMBOL(_mali_profiling_get_l2_counters);
+EXPORT_SYMBOL(_mali_profiling_control);
+EXPORT_SYMBOL(_mali_profiling_get_api_version);
+EXPORT_SYMBOL(_mali_profiling_get_mali_version);
diff --git a/mali/linux/mali_osk_specific.h b/mali/linux/mali_osk_specific.h
new file mode 100755
index 0000000..0826b08
--- /dev/null
+++ b/mali/linux/mali_osk_specific.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_specific.h
+ * Defines per-OS Kernel level specifics, such as unusual workarounds for
+ * certain OSs.
+ */
+
+#ifndef __MALI_OSK_SPECIFIC_H__
+#define __MALI_OSK_SPECIFIC_H__
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/hardirq.h>
+
+
+#include "mali_osk_types.h"
+#include "mali_kernel_linux.h"
+
+#define MALI_STATIC_INLINE static inline
+#define MALI_NON_STATIC_INLINE inline
+
+typedef struct dma_pool *mali_dma_pool;
+
+typedef u32 mali_dma_addr;
+
+#if MALI_ENABLE_CPU_CYCLES
+/* Reads out the clock cycle performance counter of the current cpu.
+   It is useful for cost-free (2 cycle) measuring of the time spent
+   in a code path. Sample before and after, the diff number of cycles.
+   When the CPU is idle it will not increase this clock counter.
+   It means that the counter is accurate if only spin-locks are used,
+   but mutexes may lead to too low values since the cpu might "idle"
+   waiting for the mutex to become available.
+   The clock source is configured on the CPU during mali module load,
+   but will not give useful output after a CPU has been power cycled.
+   It is therefore important to configure the system to not turn of
+   the cpu cores when using this functionallity.*/
+static inline unsigned int mali_get_cpu_cyclecount(void)
+{
+	unsigned int value;
+	/* Reading the CCNT Register - CPU clock counter */
+	asm volatile("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(value));
+	return value;
+}
+
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64);
+#endif
+
+
+MALI_STATIC_INLINE u32 _mali_osk_copy_from_user(void *to, void *from, u32 n)
+{
+	return (u32)copy_from_user(to, from, (unsigned long)n);
+}
+
+MALI_STATIC_INLINE mali_bool _mali_osk_in_atomic(void)
+{
+	return in_atomic();
+}
+
+#define _mali_osk_put_user(x, ptr) put_user(x, ptr)
+
+#endif /* __MALI_OSK_SPECIFIC_H__ */
diff --git a/mali/linux/mali_osk_time.c b/mali/linux/mali_osk_time.c
new file mode 100755
index 0000000..76876b6
--- /dev/null
+++ b/mali/linux/mali_osk_time.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_time.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <asm/delay.h>
+
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb)
+{
+	return time_after_eq(ticka, tickb) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+unsigned long _mali_osk_time_mstoticks(u32 ms)
+{
+	return msecs_to_jiffies(ms);
+}
+
+u32 _mali_osk_time_tickstoms(unsigned long ticks)
+{
+	return jiffies_to_msecs(ticks);
+}
+
+unsigned long _mali_osk_time_tickcount(void)
+{
+	return jiffies;
+}
+
+void _mali_osk_time_ubusydelay(u32 usecs)
+{
+	udelay(usecs);
+}
+
+u64 _mali_osk_time_get_ns(void)
+{
+	struct timespec tsval;
+	getnstimeofday(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
+
+u64 _mali_osk_boot_time_get_ns(void)
+{
+	struct timespec tsval;
+	get_monotonic_boottime(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
diff --git a/mali/linux/mali_osk_timers.c b/mali/linux/mali_osk_timers.c
new file mode 100755
index 0000000..8ada2da
--- /dev/null
+++ b/mali/linux/mali_osk_timers.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_timers.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_timer_t_struct {
+	struct timer_list timer;
+};
+
+typedef void (*timer_timeout_function_t)(unsigned long);
+
+_mali_osk_timer_t *_mali_osk_timer_init(void)
+{
+	_mali_osk_timer_t *t = (_mali_osk_timer_t *)kmalloc(sizeof(_mali_osk_timer_t), GFP_KERNEL);
+	if (NULL != t) init_timer(&t->timer);
+	return t;
+}
+
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.expires = jiffies + ticks_to_expire;
+	add_timer(&(tim->timer));
+}
+
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	mod_timer(&(tim->timer), jiffies + ticks_to_expire);
+}
+
+void _mali_osk_timer_del(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer_sync(&(tim->timer));
+}
+
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer(&(tim->timer));
+}
+
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	return 1 == timer_pending(&(tim->timer));
+}
+
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.data = (unsigned long)data;
+	tim->timer.function = (timer_timeout_function_t)callback;
+}
+
+void _mali_osk_timer_term(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	kfree(tim);
+}
diff --git a/mali/linux/mali_osk_wait_queue.c b/mali/linux/mali_osk_wait_queue.c
new file mode 100755
index 0000000..caa3abe
--- /dev/null
+++ b/mali/linux/mali_osk_wait_queue.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wait_queue.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_wait_queue_t_struct {
+	wait_queue_head_t wait_queue;
+};
+
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void)
+{
+	_mali_osk_wait_queue_t *ret = NULL;
+
+	ret = kmalloc(sizeof(_mali_osk_wait_queue_t), GFP_KERNEL);
+
+	if (NULL == ret) {
+		return ret;
+	}
+
+	init_waitqueue_head(&ret->wait_queue);
+	MALI_DEBUG_ASSERT(!waitqueue_active(&ret->wait_queue));
+
+	return ret;
+}
+
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event(queue->wait_queue, condition(data));
+}
+
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event_timeout(queue->wait_queue, condition(data), _mali_osk_time_mstoticks(timeout));
+}
+
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* if queue is empty, don't attempt to wake up its elements */
+	if (!waitqueue_active(&queue->wait_queue)) return;
+
+	MALI_DEBUG_PRINT(6, ("Waking up elements in wait queue %p ....\n", queue));
+
+	wake_up_all(&queue->wait_queue);
+
+	MALI_DEBUG_PRINT(6, ("... elements in wait queue %p woken up\n", queue));
+}
+
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue)
+{
+	/* Parameter validation  */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* Linux requires no explicit termination of wait queues */
+	kfree(queue);
+}
diff --git a/mali/linux/mali_osk_wq.c b/mali/linux/mali_osk_wq.c
new file mode 100755
index 0000000..06afa04
--- /dev/null
+++ b/mali/linux/mali_osk_wq.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/slab.h> /* For memory allocation */
+#include <linux/workqueue.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_license.h"
+#include "mali_kernel_linux.h"
+
+typedef struct _mali_osk_wq_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	mali_bool high_pri;
+	struct work_struct work_handle;
+} mali_osk_wq_work_object_t;
+
+typedef struct _mali_osk_wq_delayed_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	struct delayed_work work;
+} mali_osk_wq_delayed_work_object_t;
+
+#if MALI_LICENSE_IS_GPL
+static struct workqueue_struct *mali_wq_normal = NULL;
+static struct workqueue_struct *mali_wq_high = NULL;
+#endif
+
+static void _mali_osk_wq_work_func(struct work_struct *work);
+
+_mali_osk_errcode_t _mali_osk_wq_init(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(NULL == mali_wq_normal);
+	MALI_DEBUG_ASSERT(NULL == mali_wq_high);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_wq_normal = alloc_workqueue("mali", WQ_UNBOUND, 0);
+	mali_wq_high = alloc_workqueue("mali_high_pri", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_wq_normal = create_workqueue("mali");
+	mali_wq_high = create_workqueue("mali_high_pri");
+#endif
+	if (NULL == mali_wq_normal || NULL == mali_wq_high) {
+		MALI_PRINT_ERROR(("Unable to create Mali workqueues\n"));
+
+		if (mali_wq_normal) destroy_workqueue(mali_wq_normal);
+		if (mali_wq_high)   destroy_workqueue(mali_wq_high);
+
+		mali_wq_normal = NULL;
+		mali_wq_high   = NULL;
+
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* MALI_LICENSE_IS_GPL */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_wq_flush(void)
+{
+#if MALI_LICENSE_IS_GPL
+	flush_workqueue(mali_wq_high);
+	flush_workqueue(mali_wq_normal);
+#else
+	flush_scheduled_work();
+#endif
+}
+
+void _mali_osk_wq_term(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(NULL != mali_wq_normal);
+	MALI_DEBUG_ASSERT(NULL != mali_wq_high);
+
+	flush_workqueue(mali_wq_normal);
+	destroy_workqueue(mali_wq_normal);
+
+	flush_workqueue(mali_wq_high);
+	destroy_workqueue(mali_wq_high);
+
+	mali_wq_normal = NULL;
+	mali_wq_high   = NULL;
+#else
+	flush_scheduled_work();
+#endif
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_FALSE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_TRUE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+	_mali_osk_wq_flush();
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+	kfree(work_object);
+}
+
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_normal, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_high, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+static void _mali_osk_wq_work_func(struct work_struct *work)
+{
+	mali_osk_wq_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_work_object_t, work_handle);
+
+#if MALI_LICENSE_IS_GPL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+	/* We want highest Dynamic priority of the thread so that the Jobs depending
+	** on this thread could be scheduled in time. Without this, this thread might
+	** sometimes need to wait for some threads in user mode to finish its round-robin
+	** time, causing *bubble* in the Mali pipeline. Thanks to the new implementation
+	** of high-priority workqueue in new kernel, this only happens in older kernel.
+	*/
+	if (MALI_TRUE == work_object->high_pri) {
+		set_user_nice(current, -19);
+	}
+#endif
+#endif /* MALI_LICENSE_IS_GPL */
+
+	work_object->handler(work_object->data);
+}
+
+static void _mali_osk_wq_delayed_work_func(struct work_struct *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_delayed_work_object_t, work.work);
+	work_object->handler(work_object->data);
+}
+
+mali_osk_wq_delayed_work_object_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_delayed_work_object_t *work = kmalloc(sizeof(mali_osk_wq_delayed_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+
+	INIT_DELAYED_WORK(&work->work, _mali_osk_wq_delayed_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	cancel_delayed_work(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	cancel_delayed_work_sync(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+
+#if MALI_LICENSE_IS_GPL
+	queue_delayed_work(mali_wq_normal, &work_object->work, delay);
+#else
+	schedule_delayed_work(&work_object->work, delay);
+#endif
+
+}
diff --git a/mali/linux/mali_pmu_power_up_down.c b/mali/linux/mali_pmu_power_up_down.c
new file mode 100755
index 0000000..6a6c9f8
--- /dev/null
+++ b/mali/linux/mali_pmu_power_up_down.c
@@ -0,0 +1,27 @@
+/**
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu_power_up_down.c
+ */
+
+#include <linux/module.h>
+#include "mali_executor.h"
+
+int mali_perf_set_num_pp_cores(unsigned int num_cores)
+{
+#ifndef CONFIG_MALI_DVFS
+	return mali_executor_set_perf_level(num_cores, MALI_TRUE);
+#else
+	return mali_executor_set_perf_level(num_cores, MALI_FALSE);
+#endif
+}
+
+EXPORT_SYMBOL(mali_perf_set_num_pp_cores);
diff --git a/mali/linux/mali_profiling_events.h b/mali/linux/mali_profiling_events.h
new file mode 100755
index 0000000..5e51095
--- /dev/null
+++ b/mali/linux/mali_profiling_events.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_EVENTS_H__
+#define __MALI_PROFILING_EVENTS_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_events.h>
+
+#endif /* __MALI_PROFILING_EVENTS_H__ */
diff --git a/mali/linux/mali_profiling_gator_api.h b/mali/linux/mali_profiling_gator_api.h
new file mode 100755
index 0000000..e371971
--- /dev/null
+++ b/mali/linux/mali_profiling_gator_api.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_GATOR_API_H__
+#define __MALI_PROFILING_GATOR_API_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_gator_api.h>
+
+#endif /* __MALI_PROFILING_GATOR_API_H__ */
diff --git a/mali/linux/mali_profiling_internal.c b/mali/linux/mali_profiling_internal.c
new file mode 100755
index 0000000..918caa0
--- /dev/null
+++ b/mali/linux/mali_profiling_internal.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_timestamp.h"
+#include "mali_osk_profiling.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+
+typedef struct mali_profiling_entry {
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+} mali_profiling_entry;
+
+typedef enum mali_profiling_state {
+	MALI_PROFILING_STATE_UNINITIALIZED,
+	MALI_PROFILING_STATE_IDLE,
+	MALI_PROFILING_STATE_RUNNING,
+	MALI_PROFILING_STATE_RETURN,
+} mali_profiling_state;
+
+static _mali_osk_mutex_t *lock = NULL;
+static mali_profiling_state prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+static mali_profiling_entry *profile_entries = NULL;
+static _mali_osk_atomic_t profile_insert_index;
+static u32 profile_mask = 0;
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+void probe_mali_timeline_event(void *data, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned
+			       int d2, unsigned int d3, unsigned int d4))
+{
+	add_event(event_id, d0, d1, d2, d3, d4);
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_init(mali_bool auto_start)
+{
+	profile_entries = NULL;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_PROFILING);
+	if (NULL == lock) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+
+	if (MALI_TRUE == auto_start) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* Use maximum buffer size */
+
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_internal_profiling_term(void)
+{
+	u32 count;
+
+	/* Ensure profiling is stopped */
+	_mali_internal_profiling_stop(&count);
+
+	prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+
+	if (NULL != profile_entries) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	if (NULL != lock) {
+		_mali_osk_mutex_term(lock);
+		lock = NULL;
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_start(u32 *limit)
+{
+	_mali_osk_errcode_t ret;
+	mali_profiling_entry *new_profile_entries;
+
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RUNNING == prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	new_profile_entries = _mali_osk_valloc(*limit * sizeof(mali_profiling_entry));
+
+	if (NULL == new_profile_entries) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (MALI_PROFILING_MAX_BUFFER_ENTRIES < *limit) {
+		*limit = MALI_PROFILING_MAX_BUFFER_ENTRIES;
+	}
+
+	profile_mask = 1;
+	while (profile_mask <= *limit) {
+		profile_mask <<= 1;
+	}
+	profile_mask >>= 1;
+
+	*limit = profile_mask;
+
+	profile_mask--; /* turns the power of two into a mask of one less */
+
+	if (MALI_PROFILING_STATE_IDLE != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	profile_entries = new_profile_entries;
+
+	ret = _mali_timestamp_reset();
+
+	if (_MALI_OSK_ERR_OK == ret) {
+		prof_state = MALI_PROFILING_STATE_RUNNING;
+	} else {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	register_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+	return ret;
+}
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	u32 cur_index = (_mali_osk_atomic_inc_return(&profile_insert_index) - 1) & profile_mask;
+
+	profile_entries[cur_index].timestamp = _mali_timestamp_get();
+	profile_entries[cur_index].event_id = event_id;
+	profile_entries[cur_index].data[0] = data0;
+	profile_entries[cur_index].data[1] = data1;
+	profile_entries[cur_index].data[2] = data2;
+	profile_entries[cur_index].data[3] = data3;
+	profile_entries[cur_index].data[4] = data4;
+
+	/* If event is "leave API function", add current memory usage to the event
+	 * as data point 4.  This is used in timeline profiling to indicate how
+	 * much memory was used when leaving a function. */
+	if (event_id == (MALI_PROFILING_EVENT_TYPE_SINGLE | MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC)) {
+		profile_entries[cur_index].data[4] = _mali_ukk_report_memory_usage();
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_stop(u32 *count)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RUNNING != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	/* go into return state (user to retreive events), no more events will be added after this */
+	prof_state = MALI_PROFILING_STATE_RETURN;
+
+	unregister_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+
+	tracepoint_synchronize_unregister();
+
+	*count = _mali_osk_atomic_read(&profile_insert_index);
+	if (*count > profile_mask) *count = profile_mask;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 _mali_internal_profiling_get_count(void)
+{
+	u32 retval = 0;
+
+	_mali_osk_mutex_wait(lock);
+	if (MALI_PROFILING_STATE_RETURN == prof_state) {
+		retval = _mali_osk_atomic_read(&profile_insert_index);
+		if (retval > profile_mask) retval = profile_mask;
+	}
+	_mali_osk_mutex_signal(lock);
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5])
+{
+	u32 raw_index = _mali_osk_atomic_read(&profile_insert_index);
+
+	_mali_osk_mutex_wait(lock);
+
+	if (index < profile_mask) {
+		if ((raw_index & ~profile_mask) != 0) {
+			index += raw_index;
+			index &= profile_mask;
+		}
+
+		if (prof_state != MALI_PROFILING_STATE_RETURN) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+		}
+
+		if (index >= raw_index) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		*timestamp = profile_entries[index].timestamp;
+		*event_id = profile_entries[index].event_id;
+		data[0] = profile_entries[index].data[0];
+		data[1] = profile_entries[index].data[1];
+		data[2] = profile_entries[index].data[2];
+		data[3] = profile_entries[index].data[3];
+		data[4] = profile_entries[index].data[4];
+	} else {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_clear(void)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RETURN != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	if (NULL != profile_entries) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool _mali_internal_profiling_is_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RUNNING ? MALI_TRUE : MALI_FALSE;
+}
+
+mali_bool _mali_internal_profiling_have_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RETURN ? MALI_TRUE : MALI_FALSE;
+}
diff --git a/mali/linux/mali_profiling_internal.h b/mali/linux/mali_profiling_internal.h
new file mode 100755
index 0000000..6e05ffd
--- /dev/null
+++ b/mali/linux/mali_profiling_internal.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_INTERNAL_H__
+#define __MALI_PROFILING_INTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_osk.h"
+
+int _mali_internal_profiling_init(mali_bool auto_start);
+void _mali_internal_profiling_term(void);
+
+mali_bool _mali_internal_profiling_is_recording(void);
+mali_bool _mali_internal_profiling_have_recording(void);
+_mali_osk_errcode_t _mali_internal_profiling_clear(void);
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+u32 _mali_internal_profiling_get_count(void);
+int _mali_internal_profiling_stop(u32 *count);
+int _mali_internal_profiling_start(u32 *limit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_PROFILING_INTERNAL_H__ */
diff --git a/mali/linux/mali_sync.c b/mali/linux/mali_sync.c
new file mode 100755
index 0000000..1712cfd
--- /dev/null
+++ b/mali/linux/mali_sync.c
@@ -0,0 +1,668 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_sync.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_timeline.h"
+#include "mali_executor.h"
+
+#include <linux/file.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <asm-generic/fcntl.h>
+
+struct mali_sync_pt {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_pt         sync_pt;
+#else
+	struct mali_internal_sync_point         sync_pt;
+#endif
+	struct mali_sync_flag *flag;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_timeline *sync_tl;  /**< Sync timeline this pt is connected to. */
+#else
+	struct mali_internal_sync_timeline *sync_tl;  /**< Sync timeline this pt is connected to. */
+#endif
+};
+
+/**
+ * The sync flag is used to connect sync fences to the Mali Timeline system.  Sync fences can be
+ * created from a sync flag, and when the flag is signaled, the sync fences will also be signaled.
+ */
+struct mali_sync_flag {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_timeline *sync_tl;  /**< Sync timeline this flag is connected to. */
+#else
+	struct mali_internal_sync_timeline *sync_tl;  /**< Sync timeline this flag is connected to. */
+#endif
+	u32                   point;    /**< Point on timeline. */
+	int                   status;   /**< 0 if unsignaled, 1 if signaled without error or negative if signaled with error. */
+	struct kref           refcount; /**< Reference count. */
+};
+
+/**
+ * Mali sync timeline is used to connect mali timeline to sync_timeline.
+ * When fence timeout can print more detailed mali timeline system info.
+ */
+struct mali_sync_timeline_container {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	struct sync_timeline sync_timeline;
+#else
+	struct mali_internal_sync_timeline sync_timeline;
+#endif
+	struct mali_timeline *timeline;
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+#else
+MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct mali_internal_sync_point *pt)
+#endif
+{
+	return container_of(pt, struct mali_sync_pt, sync_pt);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct sync_timeline *sync_tl)
+#else
+MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct mali_internal_sync_timeline *sync_tl)
+#endif
+{
+	return container_of(sync_tl, struct mali_sync_timeline_container, sync_timeline);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+static int timeline_has_signaled(struct sync_pt *pt)
+#else
+static int timeline_has_signaled(struct mali_internal_sync_point *pt)
+#endif
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	MALI_DEBUG_ASSERT_POINTER(mpt->flag);
+
+	return mpt->flag->status;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+static void timeline_free_pt(struct sync_pt *pt)
+#else
+static void timeline_free_pt(struct mali_internal_sync_point *pt)
+#endif
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	mali_sync_flag_put(mpt->flag);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+static void timeline_release(struct sync_timeline *sync_timeline)
+#else
+static void timeline_release(struct mali_internal_sync_timeline *sync_timeline)
+#endif
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_timeline);
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_timeline);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	/* always signaled timeline didn't have mali container */
+	if (mali_tl) {
+		if (NULL != mali_tl->spinlock) {
+			mali_spinlock_reentrant_term(mali_tl->spinlock);
+		}
+		_mali_osk_free(mali_tl);
+	}
+
+	module_put(THIS_MODULE);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt, *new_mpt;
+	struct sync_pt *new_pt;
+	MALI_DEBUG_ASSERT_POINTER(pt);
+
+	mpt = to_mali_sync_pt(pt);
+
+	new_pt = sync_pt_create(mpt->sync_tl, sizeof(struct mali_sync_pt));
+	if (NULL == new_pt) return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+
+	mali_sync_flag_get(mpt->flag);
+	new_mpt->flag = mpt->flag;
+	new_mpt->sync_tl = mpt->sync_tl;
+
+	return new_pt;
+}
+
+static int timeline_compare(struct sync_pt *pta, struct sync_pt *ptb)
+{
+	struct mali_sync_pt *mpta;
+	struct mali_sync_pt *mptb;
+	u32 a, b;
+
+	MALI_DEBUG_ASSERT_POINTER(pta);
+	MALI_DEBUG_ASSERT_POINTER(ptb);
+	mpta = to_mali_sync_pt(pta);
+	mptb = to_mali_sync_pt(ptb);
+
+	MALI_DEBUG_ASSERT_POINTER(mpta->flag);
+	MALI_DEBUG_ASSERT_POINTER(mptb->flag);
+
+	a = mpta->flag->point;
+	b = mptb->flag->point;
+
+	if (a == b) return 0;
+
+	return ((b - a) < (a - b) ? -1 : 1);
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+static void timeline_print_pt(struct seq_file *s, struct sync_pt *sync_pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(s);
+	MALI_DEBUG_ASSERT_POINTER(sync_pt);
+
+	mpt = to_mali_sync_pt(sync_pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		seq_printf(s, "%u", mpt->flag->point);
+	} else {
+		seq_printf(s, "uninitialized");
+	}
+}
+
+static void timeline_print_obj(struct seq_file *s, struct sync_timeline *sync_tl)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	if (NULL != mali_tl) {
+		seq_printf(s, "oldest (%u) ", mali_tl->point_oldest);
+		seq_printf(s, "next (%u)", mali_tl->point_next);
+		seq_printf(s, "\n");
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+		{
+			u32 tid = _mali_osk_get_tid();
+			struct mali_timeline_system *system = mali_tl->system;
+
+			mali_spinlock_reentrant_wait(mali_tl->spinlock, tid);
+			if (!mali_tl->destroyed) {
+				mali_spinlock_reentrant_wait(system->spinlock, tid);
+				mali_timeline_debug_print_timeline(mali_tl, s);
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+			}
+			mali_spinlock_reentrant_signal(mali_tl->spinlock, tid);
+
+			/* dump job queue status and group running status */
+			mali_executor_status_dump();
+		}
+#endif
+	}
+}
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+static void timeline_pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(str);
+	MALI_DEBUG_ASSERT_POINTER(pt);
+
+	mpt = to_mali_sync_pt(pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		_mali_osk_snprintf(str, size, "%u", mpt->flag->point);
+	} else {
+		_mali_osk_snprintf(str, size, "uninitialized");
+	}
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str, int size)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	mali_sync_tl = to_mali_sync_tl_container(timeline);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	if (NULL != mali_tl) {
+		_mali_osk_snprintf(str, size, "oldest (%u) ", mali_tl->point_oldest);
+		_mali_osk_snprintf(str, size, "next (%u)", mali_tl->point_next);
+		_mali_osk_snprintf(str, size, "\n");
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+		{
+			u32 tid = _mali_osk_get_tid();
+			struct mali_timeline_system *system = mali_tl->system;
+
+			mali_spinlock_reentrant_wait(mali_tl->spinlock, tid);
+			if (!mali_tl->destroyed) {
+				mali_spinlock_reentrant_wait(system->spinlock, tid);
+				mali_timeline_debug_direct_print_timeline(mali_tl);
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+			}
+			mali_spinlock_reentrant_signal(mali_tl->spinlock, tid);
+
+			/* dump job queue status and group running status */
+			mali_executor_status_dump();
+		}
+#endif
+	}
+}
+#else
+static void timeline_print_sync_pt(struct mali_internal_sync_point *sync_pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_pt);
+
+	mpt = to_mali_sync_pt(sync_pt);
+
+	if (mpt->flag) {
+		MALI_DEBUG_PRINT(2, ("mali_internal_sync_pt: %u\n", mpt->flag->point));
+	} else {
+		MALI_DEBUG_PRINT(2, ("uninitialized\n", mpt->flag->point));
+	}
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name    = "Mali",
+	.dup            = timeline_dup,
+	.has_signaled   = timeline_has_signaled,
+	.compare        = timeline_compare,
+	.free_pt        = timeline_free_pt,
+	.release_obj    = timeline_release,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	.print_pt       = timeline_print_pt,
+	.print_obj      = timeline_print_obj,
+#else
+	.pt_value_str = timeline_pt_value_str,
+	.timeline_value_str = timeline_value_str,
+#endif
+};
+
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name)
+{
+	struct sync_timeline *sync_tl;
+	struct mali_sync_timeline_container *mali_sync_tl;
+
+	sync_tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name);
+	if (NULL == sync_tl) return NULL;
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	mali_sync_tl->timeline = timeline;
+
+	/* Grab a reference on the module to ensure the callbacks are present
+	 * as long some timeline exists. The reference is released when the
+	 * timeline is freed.
+	 * Since this function is called from a ioctl on an open file we know
+	 * we already have a reference, so using __module_get is safe. */
+	__module_get(THIS_MODULE);
+
+	return sync_tl;
+}
+
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence)
+{
+	s32 fd = -1;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
+	fd = get_unused_fd();
+#else
+	fd = get_unused_fd_flags(O_CLOEXEC);
+#endif
+
+	if (fd < 0) {
+		sync_fence_put(sync_fence);
+		return -1;
+	}
+	sync_fence_install(sync_fence, fd);
+
+	return fd;
+}
+
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2)
+{
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+
+	sync_fence = sync_fence_merge("mali_merge_fence", sync_fence1, sync_fence2);
+	sync_fence_put(sync_fence1);
+	sync_fence_put(sync_fence2);
+
+	return sync_fence;
+}
+
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl)
+{
+	struct mali_sync_flag *flag;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	flag = mali_sync_flag_create(sync_tl, 0);
+	if (NULL == flag) return NULL;
+
+	sync_fence = mali_sync_flag_create_fence(flag);
+
+	mali_sync_flag_signal(flag, 0);
+	mali_sync_flag_put(flag);
+
+	return sync_fence;
+}
+
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, mali_timeline_point point)
+{
+	struct mali_sync_flag *flag;
+
+	if (NULL == sync_tl) return NULL;
+
+	flag = _mali_osk_calloc(1, sizeof(*flag));
+	if (NULL == flag) return NULL;
+
+	flag->sync_tl = sync_tl;
+	flag->point = point;
+
+	flag->status = 0;
+	kref_init(&flag->refcount);
+
+	return flag;
+}
+
+/**
+ * Create a sync point attached to given sync flag.
+ *
+ * @note Sync points must be triggered in *exactly* the same order as they are created.
+ *
+ * @param flag Sync flag.
+ * @return New sync point if successful, NULL if not.
+ */
+static struct sync_pt *mali_sync_flag_create_pt(struct mali_sync_flag *flag)
+{
+	struct sync_pt *pt;
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	pt = sync_pt_create(flag->sync_tl, sizeof(struct mali_sync_pt));
+	if (NULL == pt) return NULL;
+
+	mali_sync_flag_get(flag);
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->flag = flag;
+	mpt->sync_tl = flag->sync_tl;
+
+	return pt;
+}
+
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag)
+{
+	struct sync_pt    *sync_pt;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	sync_pt = mali_sync_flag_create_pt(flag);
+	if (NULL == sync_pt) return NULL;
+
+	sync_fence = sync_fence_create("mali_flag_fence", sync_pt);
+	if (NULL == sync_fence) {
+		sync_pt_free(sync_pt);
+		return NULL;
+	}
+
+	return sync_fence;
+}
+#else
+static struct mali_internal_sync_timeline_ops mali_timeline_ops = {
+	.driver_name    = "Mali",
+	.has_signaled   = timeline_has_signaled,
+	.free_pt        = timeline_free_pt,
+	.release_obj    = timeline_release,
+	.print_sync_pt = timeline_print_sync_pt,
+};
+
+struct mali_internal_sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name)
+{
+	struct mali_internal_sync_timeline *sync_tl;
+	struct mali_sync_timeline_container *mali_sync_tl;
+
+	sync_tl = mali_internal_sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name);
+	if (NULL == sync_tl) return NULL;
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	mali_sync_tl->timeline = timeline;
+
+	/* Grab a reference on the module to ensure the callbacks are present
+	 * as long some timeline exists. The reference is released when the
+	 * timeline is freed.
+	 * Since this function is called from a ioctl on an open file we know
+	 * we already have a reference, so using __module_get is safe. */
+	__module_get(THIS_MODULE);
+
+	return sync_tl;
+}
+
+s32 mali_sync_fence_fd_alloc(struct mali_internal_sync_fence *sync_fence)
+{
+	s32 fd = -1;
+
+	fd = get_unused_fd_flags(0);
+
+	if (fd < 0) {
+		fput(sync_fence->file);
+		return -1;
+	}
+	fd_install(fd, sync_fence->file);
+	return fd;
+}
+
+struct mali_internal_sync_fence *mali_sync_fence_merge(struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2)
+{
+	struct mali_internal_sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+
+	sync_fence = mali_internal_sync_fence_merge(sync_fence1, sync_fence2);
+	fput(sync_fence1->file);
+	fput(sync_fence2->file);
+
+	return sync_fence;
+}
+
+struct mali_internal_sync_fence *mali_sync_timeline_create_signaled_fence(struct mali_internal_sync_timeline *sync_tl)
+{
+	struct mali_sync_flag *flag;
+	struct mali_internal_sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	flag = mali_sync_flag_create(sync_tl, 0);
+	if (NULL == flag) return NULL;
+
+	sync_fence = mali_sync_flag_create_fence(flag);
+
+	mali_sync_flag_signal(flag, 0);
+	mali_sync_flag_put(flag);
+
+	return sync_fence;
+}
+
+struct mali_sync_flag *mali_sync_flag_create(struct mali_internal_sync_timeline *sync_tl, mali_timeline_point point)
+{
+	struct mali_sync_flag *flag;
+
+	if (NULL == sync_tl) return NULL;
+
+	flag = _mali_osk_calloc(1, sizeof(*flag));
+	if (NULL == flag) return NULL;
+
+	flag->sync_tl = sync_tl;
+	flag->point = point;
+
+	flag->status = 0;
+	kref_init(&flag->refcount);
+
+	return flag;
+}
+
+/**
+ * Create a sync point attached to given sync flag.
+ *
+ * @note Sync points must be triggered in *exactly* the same order as they are created.
+ *
+ * @param flag Sync flag.
+ * @return New sync point if successful, NULL if not.
+ */
+static struct mali_internal_sync_point *mali_sync_flag_create_pt(struct mali_sync_flag *flag)
+{
+	struct mali_internal_sync_point *pt;
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	pt = mali_internal_sync_point_create(flag->sync_tl, sizeof(struct mali_sync_pt));
+
+	if (pt == NULL) {
+		MALI_PRINT_ERROR(("Mali sync: sync_pt creation failed\n"));
+		return NULL;
+	}
+	mali_sync_flag_get(flag);
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->flag = flag;
+	mpt->sync_tl = flag->sync_tl;
+
+	return pt;
+}
+
+struct mali_internal_sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag)
+{
+	struct mali_internal_sync_point    *sync_pt;
+	struct mali_internal_sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	sync_pt = mali_sync_flag_create_pt(flag);
+	if (NULL == sync_pt) {
+		MALI_PRINT_ERROR(("Mali sync: sync_pt creation failed\n"));
+		return NULL;
+	}
+	sync_fence = (struct mali_internal_sync_fence *)sync_file_create(&sync_pt->base);
+	if (NULL == sync_fence) {
+		MALI_PRINT_ERROR(("Mali sync: sync_fence creation failed\n"));
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+		dma_fence_put(&sync_pt->base);
+#else
+		fence_put(&sync_pt->base);
+#endif
+		return NULL;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	fence_put(&sync_pt->base);
+#else
+	dma_fence_put(&sync_pt->base);
+#endif
+
+	return sync_fence;
+}
+#endif
+
+void mali_sync_flag_get(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_get(&flag->refcount);
+}
+
+/**
+ * Free sync flag.
+ *
+ * @param ref kref object embedded in sync flag that should be freed.
+ */
+static void mali_sync_flag_free(struct kref *ref)
+{
+	struct mali_sync_flag *flag;
+
+	MALI_DEBUG_ASSERT_POINTER(ref);
+	flag = container_of(ref, struct mali_sync_flag, refcount);
+
+	_mali_osk_free(flag);
+}
+
+void mali_sync_flag_put(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_put(&flag->refcount, mali_sync_flag_free);
+}
+
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+
+	MALI_DEBUG_ASSERT(0 == flag->status);
+	flag->status = (0 > error) ? error : 1;
+
+	_mali_osk_write_mem_barrier();
+#if  LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	sync_timeline_signal(flag->sync_tl);
+#else
+	mali_internal_sync_timeline_signal(flag->sync_tl);
+#endif
+}
+
+
diff --git a/mali/linux/mali_sync.h b/mali/linux/mali_sync.h
new file mode 100755
index 0000000..553dba7
--- /dev/null
+++ b/mali/linux/mali_sync.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_sync.h
+ *
+ * Mali interface for Linux sync objects.
+ */
+
+#ifndef _MALI_SYNC_H_
+#define _MALI_SYNC_H_
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/seq_file.h>
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+#include <linux/sync.h>
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+#include <sync.h>
+#else
+#include "mali_internal_sync.h"
+#endif
+
+
+#include "mali_osk.h"
+
+struct mali_sync_flag;
+struct mali_timeline;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+/**
+ * Create a sync timeline.
+ *
+ * @param name Name of the sync timeline.
+ * @return The new sync timeline if successful, NULL if not.
+ */
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name);
+
+/**
+ * Creates a file descriptor representing the sync fence.  Will release sync fence if allocation of
+ * file descriptor fails.
+ *
+ * @param sync_fence Sync fence.
+ * @return File descriptor representing sync fence if successful, or -1 if not.
+ */
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence);
+
+/**
+ * Merges two sync fences.  Both input sync fences will be released.
+ *
+ * @param sync_fence1 First sync fence.
+ * @param sync_fence2 Second sync fence.
+ * @return New sync fence that is the result of the merger if successful, or NULL if not.
+ */
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2);
+
+/**
+ * Create a sync fence that is already signaled.
+ *
+ * @param tl Sync timeline.
+ * @return New signaled sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl);
+
+
+/**
+ * Create a sync flag.
+ *
+ * @param sync_tl Sync timeline.
+ * @param point Point on Mali timeline.
+ * @return New sync flag if successful, NULL if not.
+ */
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, u32 point);
+
+/**
+ * Create a sync fence attached to given sync flag.
+ *
+ * @param flag Sync flag.
+ * @return New sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag);
+#else
+/**
+ * Create a sync timeline.
+ *
+ * @param name Name of the sync timeline.
+ * @return The new sync timeline if successful, NULL if not.
+ */
+struct mali_internal_sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name);
+
+/**
+ * Creates a file descriptor representing the sync fence.  Will release sync fence if allocation of
+ * file descriptor fails.
+ *
+ * @param sync_fence Sync fence.
+ * @return File descriptor representing sync fence if successful, or -1 if not.
+ */
+s32 mali_sync_fence_fd_alloc(struct mali_internal_sync_fence *sync_fence);
+
+/**
+ * Merges two sync fences.  Both input sync fences will be released.
+ *
+ * @param sync_fence1 First sync fence.
+ * @param sync_fence2 Second sync fence.
+ * @return New sync fence that is the result of the merger if successful, or NULL if not.
+ */
+struct mali_internal_sync_fence *mali_sync_fence_merge(struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2);
+
+/**
+ * Create a sync fence that is already signaled.
+ *
+ * @param tl Sync timeline.
+ * @return New signaled sync fence if successful, NULL if not.
+ */
+struct mali_internal_sync_fence *mali_sync_timeline_create_signaled_fence(struct mali_internal_sync_timeline *sync_tl);
+
+
+/**
+ * Create a sync flag.
+ *
+ * @param sync_tl Sync timeline.
+ * @param point Point on Mali timeline.
+ * @return New sync flag if successful, NULL if not.
+ */
+struct mali_sync_flag *mali_sync_flag_create(struct mali_internal_sync_timeline *sync_tl, u32 point);
+
+/**
+ * Create a sync fence attached to given sync flag.
+ *
+ * @param flag Sync flag.
+ * @return New sync fence if successful, NULL if not.
+ */
+struct mali_internal_sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag);
+
+#endif
+/**
+ * Grab sync flag reference.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_get(struct mali_sync_flag *flag);
+
+/**
+ * Release sync flag reference.  If this was the last reference, the sync flag will be freed.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_put(struct mali_sync_flag *flag);
+
+/**
+ * Signal sync flag.  All sync fences created from this flag will be signaled.
+ *
+ * @param flag Sync flag to signal.
+ * @param error Negative error code, or 0 if no error.
+ */
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error);
+
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+#endif /* _MALI_SYNC_H_ */
diff --git a/mali/linux/mali_uk_types.h b/mali/linux/mali_uk_types.h
new file mode 100755
index 0000000..00ba28b
--- /dev/null
+++ b/mali/linux/mali_uk_types.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_UK_TYPES_H__
+#define __MALI_UK_TYPES_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_uk_types.h>
+
+#endif /* __MALI_UK_TYPES_H__ */
diff --git a/mali/linux/mali_ukk_core.c b/mali/linux/mali_ukk_core.c
new file mode 100755
index 0000000..62e31c9
--- /dev/null
+++ b/mali/linux/mali_ukk_core.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/slab.h>     /* memort allocation functions */
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs)
+{
+	_mali_uk_get_api_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+	if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT;
+
+	return 0;
+}
+
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs)
+{
+	_mali_uk_get_api_version_v2_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version_v2(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+	if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT;
+
+	return 0;
+}
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs)
+{
+	_mali_uk_wait_for_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_wait_for_notification(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS != kargs.type) {
+		kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+		if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_wait_for_notification_s))) return -EFAULT;
+	} else {
+		if (0 != put_user(kargs.type, &uargs->type)) return -EFAULT;
+	}
+
+	return 0;
+}
+
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs)
+{
+	_mali_uk_post_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	if (0 != get_user(kargs.type, &uargs->type)) {
+		return -EFAULT;
+	}
+
+	err = _mali_ukk_post_notification(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs)
+{
+	_mali_uk_get_user_settings_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_user_settings(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = 0; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_user_settings_s))) return -EFAULT;
+
+	return 0;
+}
+
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs)
+{
+	_mali_uk_request_high_priority_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_request_high_priority(&kargs);
+
+	kargs.ctx = 0;
+
+	return map_errcode(err);
+}
+
+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs)
+{
+	_mali_uk_pending_submit_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_pending_submit(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
diff --git a/mali/linux/mali_ukk_gp.c b/mali/linux/mali_ukk_gp.c
new file mode 100755
index 0000000..6909e52
--- /dev/null
+++ b/mali/linux/mali_ukk_gp.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_gp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs)
+{
+	_mali_uk_get_gp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err =  _mali_ukk_get_gp_core_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	return 0;
+}
+
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs)
+{
+	_mali_uk_gp_suspend_response_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_gp_suspend_response_s))) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_gp_suspend_response(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.cookie, &uargs->cookie)) return -EFAULT;
+
+	/* no known transactions to roll-back */
+	return 0;
+}
+
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_gp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_gp_number_of_cores(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (0 != put_user(kargs.number_of_cores, &uargs->number_of_cores)) return -EFAULT;
+
+	return 0;
+}
diff --git a/mali/linux/mali_ukk_mem.c b/mali/linux/mali_ukk_mem.c
new file mode 100755
index 0000000..ea21147
--- /dev/null
+++ b/mali/linux/mali_ukk_mem.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs)
+{
+	_mali_uk_alloc_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_alloc_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_allocate(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs)
+{
+	_mali_uk_free_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_free_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_free(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.free_pages_nr, &uargs->free_pages_nr)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs)
+{
+	_mali_uk_bind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_bind_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_bind(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs)
+{
+	_mali_uk_unbind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_unbind_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_unbind(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+
+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs)
+{
+	_mali_uk_cow_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_cow(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs)
+{
+	_mali_uk_cow_modify_range_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_modify_range_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_cow_modify_range(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.change_pages_nr, &uargs->change_pages_nr)) {
+		return -EFAULT;
+	}
+	return 0;
+}
+
+
+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs)
+{
+	_mali_uk_mem_resize_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_resize_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_resize(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs)
+{
+	_mali_uk_mem_write_safe_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_write_safe_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	/* Check if we can access the buffers */
+	if (!access_ok(VERIFY_WRITE, kargs.dest, kargs.size)
+	    || !access_ok(VERIFY_READ, kargs.src, kargs.size)) {
+		return -EINVAL;
+	}
+
+	/* Check if size wraps */
+	if ((kargs.size + kargs.dest) <= kargs.dest
+	    || (kargs.size + kargs.src) <= kargs.src) {
+		return -EINVAL;
+	}
+
+	err = _mali_ukk_mem_write_safe(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.size, &uargs->size)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+
+
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs)
+{
+	_mali_uk_query_mmu_page_table_dump_size_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_query_mmu_page_table_dump_size(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.size, &uargs->size)) return -EFAULT;
+
+	return 0;
+}
+
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs)
+{
+	_mali_uk_dump_mmu_page_table_s kargs;
+	_mali_osk_errcode_t err;
+	void __user *user_buffer;
+	void *buffer = NULL;
+	int rc = -EFAULT;
+
+	/* validate input */
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	/* the session_data pointer was validated by caller */
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_dump_mmu_page_table_s)))
+		goto err_exit;
+
+	user_buffer = (void __user *)(uintptr_t)kargs.buffer;
+	if (!access_ok(VERIFY_WRITE, user_buffer, kargs.size))
+		goto err_exit;
+
+	/* allocate temporary buffer (kernel side) to store mmu page table info */
+	if (kargs.size <= 0)
+		return -EINVAL;
+	/* Allow at most 8MiB buffers, this is more than enough to dump a fully
+	 * populated page table. */
+	if (kargs.size > SZ_8M)
+		return -EINVAL;
+
+	buffer = (void *)(uintptr_t)_mali_osk_valloc(kargs.size);
+	if (NULL == buffer) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.buffer = (uintptr_t)buffer;
+	err = _mali_ukk_dump_mmu_page_table(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		rc = map_errcode(err);
+		goto err_exit;
+	}
+
+	/* copy mmu page table info back to user space and update pointers */
+	if (0 != copy_to_user(user_buffer, buffer, kargs.size))
+		goto err_exit;
+
+	kargs.register_writes = kargs.register_writes -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+	kargs.page_table_dump = kargs.page_table_dump -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+
+	if (0 != copy_to_user(uargs, &kargs, sizeof(kargs)))
+		goto err_exit;
+
+	rc = 0;
+
+err_exit:
+	if (buffer) _mali_osk_vfree(buffer);
+	return rc;
+}
+
+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+	_mali_uk_profiling_memory_usage_get_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_mem_usage_get(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
diff --git a/mali/linux/mali_ukk_pp.c b/mali/linux/mali_ukk_pp.c
new file mode 100755
index 0000000..17ac889
--- /dev/null
+++ b/mali/linux/mali_ukk_pp.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the jobs were started successfully, 0 is returned.  If there was an error, but the
+	 * jobs were started, we return -ENOENT.  For anything else returned, the jobs were not
+	 * started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_and_gp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_pp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_get_pp_number_of_cores(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_pp_number_of_cores_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs)
+{
+	_mali_uk_get_pp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_pp_core_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	return 0;
+}
+
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs)
+{
+	_mali_uk_pp_disable_wb_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_pp_disable_wb_s))) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	_mali_ukk_pp_job_disable_wb(&kargs);
+
+	return 0;
+}
diff --git a/mali/linux/mali_ukk_profiling.c b/mali/linux/mali_ukk_profiling.c
new file mode 100755
index 0000000..1c82611
--- /dev/null
+++ b/mali/linux/mali_ukk_profiling.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+#include <linux/slab.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs)
+{
+	_mali_uk_profiling_add_event_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_add_event_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_add_event(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs)
+{
+	_mali_uk_sw_counters_report_s kargs;
+	_mali_osk_errcode_t err;
+	u32 *counter_buffer;
+	u32 __user *counters;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_sw_counters_report_s))) {
+		return -EFAULT;
+	}
+
+	/* make sure that kargs.num_counters is [at least somewhat] sane */
+	if (kargs.num_counters > 10000) {
+		MALI_DEBUG_PRINT(1, ("User space attempted to allocate too many counters.\n"));
+		return -EINVAL;
+	}
+
+	counter_buffer = (u32 *)kmalloc(sizeof(u32) * kargs.num_counters, GFP_KERNEL);
+	if (NULL == counter_buffer) {
+		return -ENOMEM;
+	}
+
+	counters = (u32 *)(uintptr_t)kargs.counters;
+
+	if (0 != copy_from_user(counter_buffer, counters, sizeof(u32) * kargs.num_counters)) {
+		kfree(counter_buffer);
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.counters = (uintptr_t)counter_buffer;
+
+	err = _mali_ukk_sw_counters_report(&kargs);
+
+	kfree(counter_buffer);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs)
+{
+	_mali_uk_profiling_stream_fd_get_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_stream_fd_get(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs)
+{
+	_mali_uk_profiling_control_set_s kargs;
+	_mali_osk_errcode_t err;
+	u8 *kernel_control_data = NULL;
+	u8 *kernel_response_data = NULL;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.control_packet_size, &uargs->control_packet_size)) return -EFAULT;
+	if (0 != get_user(kargs.response_packet_size, &uargs->response_packet_size)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+
+
+	/* Sanity check about the size */
+	if (kargs.control_packet_size > PAGE_SIZE || kargs.response_packet_size > PAGE_SIZE)
+		return -EINVAL;
+
+	if (0 !=  kargs.control_packet_size) {
+
+		if (0 == kargs.response_packet_size)
+			return -EINVAL;
+
+		kernel_control_data = _mali_osk_calloc(1, kargs.control_packet_size);
+		if (NULL == kernel_control_data) {
+			return -ENOMEM;
+		}
+
+		kernel_response_data = _mali_osk_calloc(1, kargs.response_packet_size);
+		if (NULL == kernel_response_data) {
+			_mali_osk_free(kernel_control_data);
+			return -ENOMEM;
+		}
+
+		kargs.control_packet_data = (uintptr_t)kernel_control_data;
+		kargs.response_packet_data = (uintptr_t)kernel_response_data;
+
+		if (0 != copy_from_user((void *)(uintptr_t)kernel_control_data, (void *)(uintptr_t)uargs->control_packet_data, kargs.control_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		err = _mali_ukk_profiling_control_set(&kargs);
+		if (_MALI_OSK_ERR_OK != err) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return map_errcode(err);
+		}
+
+		if (0 != kargs.response_packet_size && 0 != copy_to_user(((void *)(uintptr_t)uargs->response_packet_data), ((void *)(uintptr_t)kargs.response_packet_data), kargs.response_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		if (0 != put_user(kargs.response_packet_size, &uargs->response_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		_mali_osk_free(kernel_control_data);
+		_mali_osk_free(kernel_response_data);
+	} else {
+
+		err = _mali_ukk_profiling_control_set(&kargs);
+		if (_MALI_OSK_ERR_OK != err) {
+			return map_errcode(err);
+		}
+
+	}
+	return 0;
+}
diff --git a/mali/linux/mali_ukk_soft_job.c b/mali/linux/mali_ukk_soft_job.c
new file mode 100755
index 0000000..5d700ea
--- /dev/null
+++ b/mali/linux/mali_ukk_soft_job.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_soft_job.h"
+#include "mali_timeline.h"
+
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs)
+{
+	_mali_uk_soft_job_start_s kargs;
+	u32 type, point;
+	u64 user_job;
+	struct mali_timeline_fence fence;
+	struct mali_soft_job *job = NULL;
+	u32 __user *job_id_ptr = NULL;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session, -EINVAL);
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(kargs))) {
+		return -EFAULT;
+	}
+
+	type = kargs.type;
+	user_job = kargs.user_job;
+	job_id_ptr = (u32 __user *)(uintptr_t)kargs.job_id_ptr;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &kargs.fence);
+
+	if ((MALI_SOFT_JOB_TYPE_USER_SIGNALED != type) && (MALI_SOFT_JOB_TYPE_SELF_SIGNALED != type)) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid soft job type specified\n"));
+		return -EINVAL;
+	}
+
+	/* Create soft job. */
+	job = mali_soft_job_create(session->soft_job_system, (enum mali_soft_job_type)type, user_job);
+	if (unlikely(NULL == job)) {
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Write job id back to user space. */
+	if (0 != put_user(job->id, job_id_ptr)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to put job id"));
+		mali_soft_job_destroy(job);
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Start soft job. */
+	point = mali_soft_job_start(job, &fence);
+
+	if (0 != put_user(point, &uargs->point)) {
+		/* Let user space know that something failed after the job was started. */
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs)
+{
+	u32 job_id;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != get_user(job_id, &uargs->job_id)) return -EFAULT;
+
+	err = mali_soft_job_system_signal_job(session->soft_job_system, job_id);
+
+	return map_errcode(err);
+}
diff --git a/mali/linux/mali_ukk_timeline.c b/mali/linux/mali_ukk_timeline.c
new file mode 100755
index 0000000..8670d04
--- /dev/null
+++ b/mali/linux/mali_ukk_timeline.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_timeline.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs)
+{
+	u32 val;
+	mali_timeline_id timeline;
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != get_user(val, &uargs->timeline)) return -EFAULT;
+
+	if (MALI_UK_TIMELINE_MAX <= val) {
+		return -EINVAL;
+	}
+
+	timeline = (mali_timeline_id)val;
+
+	point = mali_timeline_system_get_latest_point(session->timeline_system, timeline);
+
+	if (0 != put_user(point, &uargs->point)) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs)
+{
+	u32 timeout, status;
+	mali_bool ret;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT;
+	if (0 != get_user(timeout, &uargs->timeout)) return -EFAULT;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+	ret = mali_timeline_fence_wait(session->timeline_system, &fence, timeout);
+	status = (MALI_TRUE == ret ? 1 : 0);
+
+	if (0 != put_user(status, &uargs->status)) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs)
+{
+	s32 sync_fd = -1;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT;
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	sync_fd = mali_timeline_sync_fence_create(session->timeline_system, &fence);
+#else
+	sync_fd = -1;
+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */
+
+	if (0 != put_user(sync_fd, &uargs->sync_fd)) return -EFAULT;
+
+	return 0;
+}
diff --git a/mali/linux/mali_ukk_vsync.c b/mali/linux/mali_ukk_vsync.c
new file mode 100755
index 0000000..e3f1d32
--- /dev/null
+++ b/mali/linux/mali_ukk_vsync.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs)
+{
+	_mali_uk_vsync_event_report_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_vsync_event_report_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_vsync_event_report(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
diff --git a/mali/linux/mali_ukk_wrappers.h b/mali/linux/mali_ukk_wrappers.h
new file mode 100755
index 0000000..7e59346
--- /dev/null
+++ b/mali/linux/mali_ukk_wrappers.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk_wrappers.h
+ * Defines the wrapper functions for each user-kernel function
+ */
+
+#ifndef __MALI_UKK_WRAPPERS_H__
+#define __MALI_UKK_WRAPPERS_H__
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs);
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs);
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs);
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs);
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs);
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs);
+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs);
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs);
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs);
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs);
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs);
+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs);
+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs);
+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs);
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs);
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs);
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs);
+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs);
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs);
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs);
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs);
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs);
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs);
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs);
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs);
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs);
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs);
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs);
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs);
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs);
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs);
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs);
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs);
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs);
+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs);
+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs);
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs);
+
+
+int map_errcode(_mali_osk_errcode_t err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_WRAPPERS_H__ */
diff --git a/mali/readme.txt b/mali/readme.txt
new file mode 100755
index 0000000..6785ac9
--- /dev/null
+++ b/mali/readme.txt
@@ -0,0 +1,28 @@
+Building the Mali Device Driver for Linux
+-----------------------------------------
+
+Build the Mali Device Driver for Linux by running the following make command:
+
+KDIR=<kdir_path> USING_UMP=<ump_option> BUILD=<build_option> make
+
+where
+    kdir_path: Path to your Linux Kernel directory
+    ump_option: 1 = Enable UMP support(*)
+                0 = disable UMP support
+    build_option: debug = debug build of driver
+                  release = release build of driver
+
+(*)  For newer Linux Kernels, the Module.symvers file for the UMP device driver
+     must be available. The UMP_SYMVERS_FILE variable in the Makefile should
+     point to this file. This file is generated when the UMP driver is built.
+
+The result will be a mali.ko file, which can be loaded into the Linux kernel
+by using the insmod command.
+
+Use of UMP is not recommended. The dma-buf API in the Linux kernel has
+replaced UMP. The Mali Device Driver will be built with dma-buf support if the
+kernel config includes enabled dma-buf.
+
+The kernel needs to be provided with a platform_device struct for the Mali GPU
+device. See the mali_utgard.h header file for how to set up the Mali GPU
+resources.
diff --git a/mali/regs/mali_200_regs.h b/mali/regs/mali_200_regs.h
new file mode 100755
index 0000000..c904ad2
--- /dev/null
+++ b/mali/regs/mali_200_regs.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2010, 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALI200_REGS_H_
+#define _MALI200_REGS_H_
+
+/**
+ *  Enum for management register addresses.
+ */
+enum mali200_mgmt_reg {
+	MALI200_REG_ADDR_MGMT_VERSION                              = 0x1000,
+	MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR               = 0x1004,
+	MALI200_REG_ADDR_MGMT_STATUS                               = 0x1008,
+	MALI200_REG_ADDR_MGMT_CTRL_MGMT                            = 0x100c,
+
+	MALI200_REG_ADDR_MGMT_INT_RAWSTAT                          = 0x1020,
+	MALI200_REG_ADDR_MGMT_INT_CLEAR                            = 0x1024,
+	MALI200_REG_ADDR_MGMT_INT_MASK                             = 0x1028,
+	MALI200_REG_ADDR_MGMT_INT_STATUS                           = 0x102c,
+
+	MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS                     = 0x1050,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE                    = 0x1080,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC                       = 0x1084,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT                     = 0x1088,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE                     = 0x108c,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE                    = 0x10a0,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC                       = 0x10a4,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE                     = 0x10ac,
+
+	MALI200_REG_ADDR_MGMT_PERFMON_CONTR                        = 0x10b0,
+	MALI200_REG_ADDR_MGMT_PERFMON_BASE                         = 0x10b4,
+
+	MALI200_REG_SIZEOF_REGISTER_BANK                           = 0x10f0
+
+};
+
+#define MALI200_REG_VAL_PERF_CNT_ENABLE 1
+
+enum mali200_mgmt_ctrl_mgmt {
+	MALI200_REG_VAL_CTRL_MGMT_STOP_BUS         = (1 << 0),
+	MALI200_REG_VAL_CTRL_MGMT_FLUSH_CACHES     = (1 << 3),
+	MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET      = (1 << 5),
+	MALI200_REG_VAL_CTRL_MGMT_START_RENDERING  = (1 << 6),
+	MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET     = (1 << 7), /* Only valid for Mali-300 and later */
+};
+
+enum mali200_mgmt_irq {
+	MALI200_REG_VAL_IRQ_END_OF_FRAME          = (1 << 0),
+	MALI200_REG_VAL_IRQ_END_OF_TILE           = (1 << 1),
+	MALI200_REG_VAL_IRQ_HANG                  = (1 << 2),
+	MALI200_REG_VAL_IRQ_FORCE_HANG            = (1 << 3),
+	MALI200_REG_VAL_IRQ_BUS_ERROR             = (1 << 4),
+	MALI200_REG_VAL_IRQ_BUS_STOP              = (1 << 5),
+	MALI200_REG_VAL_IRQ_CNT_0_LIMIT           = (1 << 6),
+	MALI200_REG_VAL_IRQ_CNT_1_LIMIT           = (1 << 7),
+	MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR  = (1 << 8),
+	MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND = (1 << 9),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW  = (1 << 10),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW   = (1 << 11),
+	MALI400PP_REG_VAL_IRQ_RESET_COMPLETED       = (1 << 12),
+};
+
+#define MALI200_REG_VAL_IRQ_MASK_ALL  ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_END_OF_TILE                            |\
+				       MALI200_REG_VAL_IRQ_HANG                                   |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_BUS_STOP                               |\
+				       MALI200_REG_VAL_IRQ_CNT_0_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_CNT_1_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW                    |\
+				       MALI400PP_REG_VAL_IRQ_RESET_COMPLETED))
+
+#define MALI200_REG_VAL_IRQ_MASK_USED ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW))
+
+#define MALI200_REG_VAL_IRQ_MASK_NONE ((enum mali200_mgmt_irq)(0))
+
+enum mali200_mgmt_status {
+	MALI200_REG_VAL_STATUS_RENDERING_ACTIVE     = (1 << 0),
+	MALI200_REG_VAL_STATUS_BUS_STOPPED          = (1 << 4),
+};
+
+enum mali200_render_unit {
+	MALI200_REG_ADDR_FRAME = 0x0000,
+	MALI200_REG_ADDR_RSW   = 0x0004,
+	MALI200_REG_ADDR_STACK = 0x0030,
+	MALI200_REG_ADDR_STACK_SIZE = 0x0034,
+	MALI200_REG_ADDR_ORIGIN_OFFSET_X  = 0x0040
+};
+
+enum mali200_wb_unit {
+	MALI200_REG_ADDR_WB0 = 0x0100,
+	MALI200_REG_ADDR_WB1 = 0x0200,
+	MALI200_REG_ADDR_WB2 = 0x0300
+};
+
+enum mali200_wb_unit_regs {
+	MALI200_REG_ADDR_WB_SOURCE_SELECT = 0x0000,
+	MALI200_REG_ADDR_WB_SOURCE_ADDR   = 0x0004,
+};
+
+/* This should be in the top 16 bit of the version register of Mali PP */
+#define MALI200_PP_PRODUCT_ID 0xC807
+#define MALI300_PP_PRODUCT_ID 0xCE07
+#define MALI400_PP_PRODUCT_ID 0xCD07
+#define MALI450_PP_PRODUCT_ID 0xCF07
+#define MALI470_PP_PRODUCT_ID 0xCF08
+
+
+
+#endif /* _MALI200_REGS_H_ */
diff --git a/mali/regs/mali_gp_regs.h b/mali/regs/mali_gp_regs.h
new file mode 100755
index 0000000..435953d
--- /dev/null
+++ b/mali/regs/mali_gp_regs.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2010, 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALIGP2_CONROL_REGS_H_
+#define _MALIGP2_CONROL_REGS_H_
+
+/**
+ * These are the different geometry processor control registers.
+ * Their usage is to control and monitor the operation of the
+ * Vertex Shader and the Polygon List Builder in the geometry processor.
+ * Addresses are in 32-bit word relative sizes.
+ * @see [P0081] "Geometry Processor Data Structures" for details
+ */
+
+typedef enum {
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR           = 0x00,
+	MALIGP2_REG_ADDR_MGMT_VSCL_END_ADDR             = 0x04,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_START_ADDR         = 0x08,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_END_ADDR           = 0x0c,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR     = 0x10,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR       = 0x14,
+	MALIGP2_REG_ADDR_MGMT_CMD                       = 0x20,
+	MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT               = 0x24,
+	MALIGP2_REG_ADDR_MGMT_INT_CLEAR                 = 0x28,
+	MALIGP2_REG_ADDR_MGMT_INT_MASK                  = 0x2C,
+	MALIGP2_REG_ADDR_MGMT_INT_STAT                  = 0x30,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE         = 0x3C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE         = 0x40,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC            = 0x44,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC            = 0x48,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE          = 0x4C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE          = 0x50,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT          = 0x54,
+	MALIGP2_REG_ADDR_MGMT_STATUS                    = 0x68,
+	MALIGP2_REG_ADDR_MGMT_VERSION                   = 0x6C,
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR_READ      = 0x80,
+	MALIGP2_REG_ADDR_MGMT_PLBCL_START_ADDR_READ     = 0x84,
+	MALIGP2_CONTR_AXI_BUS_ERROR_STAT                = 0x94,
+	MALIGP2_REGISTER_ADDRESS_SPACE_SIZE             = 0x98,
+} maligp_reg_addr_mgmt_addr;
+
+#define MALIGP2_REG_VAL_PERF_CNT_ENABLE 1
+
+/**
+ * Commands to geometry processor.
+ *  @see MALIGP2_CTRL_REG_CMD
+ */
+typedef enum {
+	MALIGP2_REG_VAL_CMD_START_VS                    = (1 << 0),
+	MALIGP2_REG_VAL_CMD_START_PLBU                  = (1 << 1),
+	MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC   = (1 << 4),
+	MALIGP2_REG_VAL_CMD_RESET                               = (1 << 5),
+	MALIGP2_REG_VAL_CMD_FORCE_HANG                  = (1 << 6),
+	MALIGP2_REG_VAL_CMD_STOP_BUS                    = (1 << 9),
+	MALI400GP_REG_VAL_CMD_SOFT_RESET                = (1 << 10), /* only valid for Mali-300 and later */
+} mgp_contr_reg_val_cmd;
+
+
+/**  @defgroup MALIGP2_IRQ
+ * Interrupt status of geometry processor.
+ *  @see MALIGP2_CTRL_REG_INT_RAWSTAT, MALIGP2_REG_ADDR_MGMT_INT_CLEAR,
+ *       MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_ADDR_MGMT_INT_STAT
+ * @{
+ */
+#define MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      (1 << 0)
+#define MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    (1 << 1)
+#define MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     (1 << 2)
+#define MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          (1 << 3)
+#define MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        (1 << 4)
+#define MALIGP2_REG_VAL_IRQ_HANG                (1 << 5)
+#define MALIGP2_REG_VAL_IRQ_FORCE_HANG          (1 << 6)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    (1 << 7)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    (1 << 8)
+#define MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     (1 << 9)
+#define MALIGP2_REG_VAL_IRQ_SYNC_ERROR          (1 << 10)
+#define MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       (1 << 11)
+#define MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     (1 << 12)
+#define MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      (1 << 13)
+#define MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     (1 << 14)
+#define MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     (1 << 19)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW (1 << 20)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  (1 << 21)
+#define MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  (1 << 22)
+
+/* Mask defining all IRQs in Mali GP */
+#define MALIGP2_REG_VAL_IRQ_MASK_ALL \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        | \
+	 MALIGP2_REG_VAL_IRQ_HANG                | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining the IRQs in Mali GP which we use */
+#define MALIGP2_REG_VAL_IRQ_MASK_USED \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining non IRQs on MaliGP2*/
+#define MALIGP2_REG_VAL_IRQ_MASK_NONE 0
+
+/** }@ defgroup MALIGP2_IRQ*/
+
+/** @defgroup MALIGP2_STATUS
+ * The different Status values to the geometry processor.
+ *  @see MALIGP2_CTRL_REG_STATUS
+ * @{
+ */
+#define MALIGP2_REG_VAL_STATUS_VS_ACTIVE         0x0002
+#define MALIGP2_REG_VAL_STATUS_BUS_STOPPED       0x0004
+#define MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE       0x0008
+#define MALIGP2_REG_VAL_STATUS_BUS_ERROR         0x0040
+#define MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR   0x0100
+/** }@ defgroup MALIGP2_STATUS*/
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ACTIVE (\
+		MALIGP2_REG_VAL_STATUS_VS_ACTIVE|\
+		MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE)
+
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ERROR (\
+		MALIGP2_REG_VAL_STATUS_BUS_ERROR |\
+		MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR )
+
+/* This should be in the top 16 bit of the version register of gp.*/
+#define MALI200_GP_PRODUCT_ID 0xA07
+#define MALI300_GP_PRODUCT_ID 0xC07
+#define MALI400_GP_PRODUCT_ID 0xB07
+#define MALI450_GP_PRODUCT_ID 0xD07
+
+/**
+ * The different sources for instrumented on the geometry processor.
+ *  @see MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC
+ */
+
+enum MALIGP2_cont_reg_perf_cnt_src {
+	MALIGP2_REG_VAL_PERF_CNT1_SRC_NUMBER_OF_VERTICES_PROCESSED = 0x0a,
+};
+
+#endif
diff --git a/mali/timestamp-arm11-cc/mali_timestamp.c b/mali/timestamp-arm11-cc/mali_timestamp.c
new file mode 100755
index 0000000..f7f7baf
--- /dev/null
+++ b/mali/timestamp-arm11-cc/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/mali/timestamp-arm11-cc/mali_timestamp.h b/mali/timestamp-arm11-cc/mali_timestamp.h
new file mode 100755
index 0000000..757f643
--- /dev/null
+++ b/mali/timestamp-arm11-cc/mali_timestamp.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	/*
+	 * reset counters and overflow flags
+	 */
+
+	u32 mask = (1 << 0) | /* enable all three counters */
+		   (0 << 1) | /* reset both Count Registers to 0x0 */
+		   (1 << 2) | /* reset the Cycle Counter Register to 0x0 */
+		   (0 << 3) | /* 1 = Cycle Counter Register counts every 64th processor clock cycle */
+		   (0 << 4) | /* Count Register 0 interrupt enable */
+		   (0 << 5) | /* Count Register 1 interrupt enable */
+		   (0 << 6) | /* Cycle Counter interrupt enable */
+		   (0 << 8) | /* Count Register 0 overflow flag (clear or write, flag on read) */
+		   (0 << 9) | /* Count Register 1 overflow flag (clear or write, flag on read) */
+		   (1 << 10); /* Cycle Counter Register overflow flag (clear or write, flag on read) */
+
+	__asm__ __volatile__("MCR    p15, 0, %0, c15, c12, 0" : : "r"(mask));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	u32 result;
+
+	/* this is for the clock cycles */
+	__asm__ __volatile__("MRC    p15, 0, %0, c15, c12, 1" : "=r"(result));
+
+	return (u64)result;
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/mali/timestamp-default/mali_timestamp.c b/mali/timestamp-default/mali_timestamp.c
new file mode 100755
index 0000000..f7f7baf
--- /dev/null
+++ b/mali/timestamp-default/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/mali/timestamp-default/mali_timestamp.h b/mali/timestamp-default/mali_timestamp.h
new file mode 100755
index 0000000..21700fe
--- /dev/null
+++ b/mali/timestamp-default/mali_timestamp.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	return _mali_osk_boot_time_get_ns();
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/midgard/r11p0/android/patches/K/android-k.sh b/midgard/r11p0/android/patches/K/android-k.sh
new file mode 100755
index 0000000..4e44d5e
--- /dev/null
+++ b/midgard/r11p0/android/patches/K/android-k.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+#
+# Copyright (C) 2014-2015 ARM Limited. All rights reserved.
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Android K script for LLVM 3.5 and above support
+#
+# This script applies two minor patches and checks-out libcxx and libcxxabi libraries
+# adding C++11 support in Android K required for LLVM3.5 and above.
+
+
+if [ -z "$ANDROID_BUILD_TOP" ]; then
+    echo "[ANDROID-PATCH] This script must be executed in an Android build environment"
+    echo "                ANDROID_BUILD_TOP is undefined"
+    echo "                please do \"source build/envseup.sh; lunch\"."
+    exit 1
+fi
+
+ANDROID_REPOSITORY_URL="https://android.googlesource.com/"
+
+REFERENCE_ARGUMENT=""
+
+# Proccess arguments. Valid arguments are:
+#        -u ARG      Override the Android repository url
+#        -r ARG      Provide a path to a reference git repository
+while getopts "u:r:" opt; do
+  case $opt in
+    u)
+      ANDROID_REPOSITORY_URL=$OPTARG
+      ;;
+
+    r)
+      REFERENCE_ARGUMENT="--reference "$OPTARG
+      ;;
+
+    \?)
+      echo "Invalid option: -$OPTARG" >&2
+      exit 1
+
+      ;;
+    :)
+      echo "Option -$OPTARG requires an argument." >&2
+      exit 1
+      ;;
+
+  esac
+done
+
+MIDGARD_DDK_WD=$(pwd)
+
+cd $ANDROID_BUILD_TOP/external
+
+#Checkout libcxx from Android Master required by LLVM due to c++11
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxx $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxx failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxx
+git checkout 27ae7cb782821a4f2d3813522ee411cd978bcd85 -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxx@27ae7cb782821a4f2d3813522ee411cd978bcd85 revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/libcxx.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching libcxx failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Checkout libcxxabi from Android Master required by LLVM due to c++11
+cd $ANDROID_BUILD_TOP/external
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxxabi $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxxabi failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxxabi
+git checkout 285d67f35f6044cf733091e36248405ca967c62c -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxxabi@285d67f35f6044cf733091e36248405ca967c62c revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+
+#Apply patch to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/bionics.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching bionics failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add locale support to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/locale.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Adding locale support failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add LLVM backend ARM fix to support -O0 compilation
+cd $ANDROID_BUILD_TOP/external/llvm/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/llvm.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Fixing ARM LLVM backend failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+echo "[ANDROID-PATCH] Patching completed successfully"
+cd $MIDGARD_DDK_WD
diff --git a/midgard/r11p0/android/patches/K/bionics.diff b/midgard/r11p0/android/patches/K/bionics.diff
new file mode 100755
index 0000000..b86cedf
--- /dev/null
+++ b/midgard/r11p0/android/patches/K/bionics.diff
@@ -0,0 +1,202 @@
+diff --git a/libc/Android.mk b/libc/Android.mk
+index 9610c14..d3dbe81 100644
+--- a/libc/Android.mk
++++ b/libc/Android.mk
+@@ -214,6 +215,7 @@ libc_bionic_src_:= \
+     bionic/libc_init_common.cpp \
+     bionic/libc_logging.cpp \
+     bionic/libgen.cpp \
++    bionic/locale.cpp \
+     bionic/mmap.cpp \
+     bionic/pthread_attr.cpp \
+     bionic/pthread_detach.cpp \
+@@ -232,7 +232,6 @@ libc_bionic_src_files := \
+     bionic/scandir.cpp \
+     bionic/sched_getaffinity.cpp \
+     bionic/__set_errno.cpp \
+-    bionic/setlocale.cpp \
+     bionic/signalfd.cpp \
+     bionic/sigwait.cpp \
+     bionic/statvfs.cpp \
+diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp
+index 50a3875..cc830b7 100644
+--- a/libc/bionic/wchar.cpp
++++ b/libc/bionic/wchar.cpp
+@@ -96,6 +96,7 @@ int swscanf(const wchar_t* /*s*/, const wchar_t* /*format*/, ... ) {
+
+ int iswalnum(wint_t wc) { return isalnum(wc); }
+ int iswalpha(wint_t wc) { return isalpha(wc); }
++int iswblank(wint_t wc) { return isblank(wc); }
+ int iswcntrl(wint_t wc) { return iscntrl(wc); }
+ int iswdigit(wint_t wc) { return isdigit(wc); }
+ int iswgraph(wint_t wc) { return isgraph(wc); }
+diff --git a/libc/include/assert.h b/libc/include/assert.h
+index 62470f5..361a5ff 100644
+--- a/libc/include/assert.h
++++ b/libc/include/assert.h
+@@ -60,6 +60,6 @@
+ #endif
+
+ __BEGIN_DECLS
+-__dead void __assert(const char *, int, const char *);
+-__dead void __assert2(const char *, int, const char *, const char *);
++__dead void __assert(const char *, int, const char *) __noreturn;
++__dead void __assert2(const char *, int, const char *, const char *) __noreturn;
+ __END_DECLS
+diff --git a/libc/include/ctype.h b/libc/include/ctype.h
+index 5557e31..a66df12 100644
+--- a/libc/include/ctype.h
++++ b/libc/include/ctype.h
+@@ -45,11 +45,14 @@
+ #define	_CTYPE_U	0x01
+ #define	_CTYPE_L	0x02
+ #define	_CTYPE_N	0x04
++#define	_CTYPE_D	0x04
+ #define	_CTYPE_S	0x08
+ #define	_CTYPE_P	0x10
+ #define	_CTYPE_C	0x20
+ #define	_CTYPE_X	0x40
+ #define	_CTYPE_B	0x80
++#define	_CTYPE_R	(_CTYPE_P|_CTYPE_U|_CTYPE_L|_CTYPE_D|_CTYPE_B)
++#define	_CTYPE_A	(_CTYPE_L|_CTYPE_U)
+
+ __BEGIN_DECLS
+
+diff --git a/libc/include/locale.h b/libc/include/locale.h
+index 65b5c7d..6989851 100644
+--- a/libc/include/locale.h
++++ b/libc/include/locale.h
+@@ -29,6 +29,7 @@
+ #define _LOCALE_H_
+
+ #include <sys/cdefs.h>
++#include <xlocale.h>
+
+ __BEGIN_DECLS
+
+@@ -43,17 +44,65 @@ enum {
+     LC_PAPER     = 7,
+     LC_NAME      = 8,
+     LC_ADDRESS   = 9,
+-
+     LC_TELEPHONE      = 10,
+     LC_MEASUREMENT    = 11,
+     LC_IDENTIFICATION = 12
+ };
+
+-extern char *setlocale(int category, const char *locale);
++#define LC_CTYPE_MASK          (1 << LC_CTYPE)
++#define LC_NUMERIC_MASK        (1 << LC_NUMERIC)
++#define LC_TIME_MASK           (1 << LC_TIME)
++#define LC_COLLATE_MASK        (1 << LC_COLLATE)
++#define LC_MONETARY_MASK       (1 << LC_MONETARY)
++#define LC_MESSAGES_MASK       (1 << LC_MESSAGES)
++#define LC_PAPER_MASK          (1 << LC_PAPER)
++#define LC_NAME_MASK           (1 << LC_NAME)
++#define LC_ADDRESS_MASK        (1 << LC_ADDRESS)
++#define LC_TELEPHONE_MASK      (1 << LC_TELEPHONE)
++#define LC_MEASUREMENT_MASK    (1 << LC_MEASUREMENT)
++#define LC_IDENTIFICATION_MASK (1 << LC_IDENTIFICATION)
++
++#define LC_ALL_MASK (LC_CTYPE_MASK | LC_NUMERIC_MASK | LC_TIME_MASK | LC_COLLATE_MASK | \
++                     LC_MONETARY_MASK | LC_MESSAGES_MASK | LC_PAPER_MASK | LC_NAME_MASK | \
++                     LC_ADDRESS_MASK | LC_TELEPHONE_MASK | LC_MEASUREMENT_MASK | \
++                     LC_IDENTIFICATION_MASK)
++
++struct lconv {
++    char* decimal_point;
++    char* thousands_sep;
++    char* grouping;
++    char* int_curr_symbol;
++    char* currency_symbol;
++    char* mon_decimal_point;
++    char* mon_thousands_sep;
++    char* mon_grouping;
++    char* positive_sign;
++    char* negative_sign;
++    char  int_frac_digits;
++    char  frac_digits;
++    char  p_cs_precedes;
++    char  p_sep_by_space;
++    char  n_cs_precedes;
++    char  n_sep_by_space;
++    char  p_sign_posn;
++    char  n_sign_posn;
++    char  int_p_cs_precedes;
++    char  int_p_sep_by_space;
++    char  int_n_cs_precedes;
++    char  int_n_sep_by_space;
++    char  int_p_sign_posn;
++    char  int_n_sign_posn;
++};
++
++struct lconv* localeconv(void);
++
++locale_t duplocale(locale_t);
++void freelocale(locale_t);
++locale_t newlocale(int, const char*, locale_t);
++char* setlocale(int, const char*);
++locale_t uselocale(locale_t);
+
+-/* Make libstdc++-v3 happy.  */
+-struct lconv { };
+-struct lconv *localeconv(void);
++#define LC_GLOBAL_LOCALE ((locale_t) -1L)
+
+ __END_DECLS
+
+diff --git a/libc/include/stdio.h b/libc/include/stdio.h
+index 23fc944..28685c7 100644
+--- a/libc/include/stdio.h
++++ b/libc/include/stdio.h
+@@ -469,7 +469,10 @@ int vsprintf(char *dest, const char *format, __va_list ap)
+ }
+
+ #if defined(__clang__)
+-#define snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(snprintf)
++    #define __wrap_snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++    #define snprintf(...) __wrap_snprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(3, 4)
+@@ -481,7 +484,10 @@ int snprintf(char *dest, size_t size, const char *format, ...)
+ #endif
+
+ #if defined(__clang__)
+-#define sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(sprintf)
++    #define __wrap_sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++    #define sprintf(...) __wrap_sprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(2, 3)
+diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h
+index 9fa84c1..188da69 100644
+--- a/libc/include/stdlib.h
++++ b/libc/include/stdlib.h
+@@ -68,6 +68,10 @@ static __inline__ float strtof(const char *nptr, char **endptr)
+     return (float)strtod(nptr, endptr);
+ }
+
++static __inline__ long double strtold(const char* s, char** end_ptr) {
++  return (long double)strtod(s, end_ptr);
++}
++
+ extern int atoi(const char *) __purefunc;
+ extern long atol(const char *) __purefunc;
+ extern long long atoll(const char *) __purefunc;
+diff --git a/libc/include/wchar.h b/libc/include/wchar.h
+index 76ac02c..c413806 100644
+--- a/libc/include/wchar.h
++++ b/libc/include/wchar.h
+@@ -77,6 +77,7 @@ extern int               fwprintf(FILE *, const wchar_t *, ...);
+ extern int               fwscanf(FILE *, const wchar_t *, ...);
+ extern int               iswalnum(wint_t);
+ extern int               iswalpha(wint_t);
++extern int               iswblank(wint_t);
+ extern int               iswcntrl(wint_t);
+ extern int               iswdigit(wint_t);
+ extern int               iswgraph(wint_t);
diff --git a/midgard/r11p0/android/patches/K/libcxx.diff b/midgard/r11p0/android/patches/K/libcxx.diff
new file mode 100755
index 0000000..a10d75c
--- /dev/null
+++ b/midgard/r11p0/android/patches/K/libcxx.diff
@@ -0,0 +1,13 @@
+diff --git a/include/cstdlib b/include/cstdlib
+index 152b891..e45a8fd 100644
+--- a/include/cstdlib
++++ b/include/cstdlib
+@@ -126,7 +126,7 @@ using ::realloc;
+ using ::abort;
+ using ::atexit;
+ using ::exit;
+-using ::_Exit;
++
+ using ::getenv;
+ using ::system;
+ using ::bsearch;
diff --git a/midgard/r11p0/android/patches/K/license.txt b/midgard/r11p0/android/patches/K/license.txt
new file mode 100755
index 0000000..d645695
--- /dev/null
+++ b/midgard/r11p0/android/patches/K/license.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/midgard/r11p0/android/patches/K/llvm.diff b/midgard/r11p0/android/patches/K/llvm.diff
new file mode 100755
index 0000000..d41d279
--- /dev/null
+++ b/midgard/r11p0/android/patches/K/llvm.diff
@@ -0,0 +1,14 @@
+diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
+index 8cdb853..868dd47 100644
+--- a/lib/Target/ARM/ARMInstrInfo.cpp
++++ b/lib/Target/ARM/ARMInstrInfo.cpp
+@@ -130,6 +130,10 @@ namespace {
+         MIB.addImm(0);
+       AddDefaultPred(MIB);
+
++      // Fix the GOT address by adding pc.
++      BuildMI(FirstMBB, MBBI, DL, TII.get(ARM::tPICADD), GlobalBaseReg)
++          .addReg(GlobalBaseReg).addImm(ARMPCLabelIndex);
++
+       return true;
+     }
diff --git a/midgard/r11p0/android/patches/K/locale.diff b/midgard/r11p0/android/patches/K/locale.diff
new file mode 100755
index 0000000..9c4c140
--- /dev/null
+++ b/midgard/r11p0/android/patches/K/locale.diff
@@ -0,0 +1,204 @@
+diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp
+new file mode 100644
+index 0000000..4fade84
+--- /dev/null
++++ b/libc/bionic/locale.cpp
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (C) 2008 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#include <locale.h>
++#include <pthread.h>
++#include <stdlib.h>
++
++// We currently support a single locale, the "C" locale (also known as "POSIX").
++
++struct __locale_t {
++  // Because we only support one locale, these are just tokens with no data.
++};
++
++static pthread_once_t gLocaleOnce = PTHREAD_ONCE_INIT;
++static lconv gLocale;
++
++static pthread_once_t gUselocaleKeyOnce = PTHREAD_ONCE_INIT;
++static pthread_key_t gUselocaleKey;
++
++static void __locale_init() {
++  gLocale.decimal_point = const_cast<char*>(".");
++
++  char* not_available = const_cast<char*>("");
++  gLocale.thousands_sep = not_available;
++  gLocale.grouping = not_available;
++  gLocale.int_curr_symbol = not_available;
++  gLocale.currency_symbol = not_available;
++  gLocale.mon_decimal_point = not_available;
++  gLocale.mon_thousands_sep = not_available;
++  gLocale.mon_grouping = not_available;
++  gLocale.positive_sign = not_available;
++  gLocale.negative_sign = not_available;
++
++  gLocale.int_frac_digits = CHAR_MAX;
++  gLocale.frac_digits = CHAR_MAX;
++  gLocale.p_cs_precedes = CHAR_MAX;
++  gLocale.p_sep_by_space = CHAR_MAX;
++  gLocale.n_cs_precedes = CHAR_MAX;
++  gLocale.n_sep_by_space = CHAR_MAX;
++  gLocale.p_sign_posn = CHAR_MAX;
++  gLocale.n_sign_posn = CHAR_MAX;
++  gLocale.int_p_cs_precedes = CHAR_MAX;
++  gLocale.int_p_sep_by_space = CHAR_MAX;
++  gLocale.int_n_cs_precedes = CHAR_MAX;
++  gLocale.int_n_sep_by_space = CHAR_MAX;
++  gLocale.int_p_sign_posn = CHAR_MAX;
++  gLocale.int_n_sign_posn = CHAR_MAX;
++}
++
++static void __uselocale_key_init() {
++  pthread_key_create(&gUselocaleKey, NULL);
++}
++
++static bool __is_supported_locale(const char* locale) {
++  return (strcmp(locale, "") == 0 || strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0);
++}
++
++static locale_t __new_locale() {
++  return reinterpret_cast<locale_t>(malloc(sizeof(__locale_t)));
++}
++
++lconv* localeconv() {
++  pthread_once(&gLocaleOnce, __locale_init);
++  return &gLocale;
++}
++
++locale_t duplocale(locale_t l) {
++  locale_t clone = __new_locale();
++  if (clone != NULL && l != LC_GLOBAL_LOCALE) {
++    *clone = *l;
++  }
++  return clone;
++}
++
++void freelocale(locale_t l) {
++  free(l);
++}
++
++locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/) {
++  // Is 'category_mask' valid?
++  if ((category_mask & ~LC_ALL_MASK) != 0) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  if (!__is_supported_locale(locale_name)) {
++    errno = ENOENT;
++    return NULL;
++  }
++
++  return __new_locale();
++}
++
++char* setlocale(int category, char const* locale_name) {
++  // Is 'category' valid?
++  if (category < LC_CTYPE || category > LC_IDENTIFICATION) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  // Caller just wants to query the current locale?
++  if (locale_name == NULL) {
++    return const_cast<char*>("C");
++  }
++
++  // Caller wants one of the mandatory POSIX locales?
++  if (__is_supported_locale(locale_name)) {
++    return const_cast<char*>("C");
++  }
++
++  // We don't support any other locales.
++  errno = ENOENT;
++  return NULL;
++}
++
++locale_t uselocale(locale_t new_locale) {
++  pthread_once(&gUselocaleKeyOnce, __uselocale_key_init);
++
++  locale_t old_locale = static_cast<locale_t>(pthread_getspecific(gUselocaleKey));
++
++  // If this is the first call to uselocale(3) on this thread, we return LC_GLOBAL_LOCALE.
++  if (old_locale == NULL) {
++    old_locale = LC_GLOBAL_LOCALE;
++  }
++
++  if (new_locale != NULL) {
++    pthread_setspecific(gUselocaleKey, new_locale);
++  }
++
++  return old_locale;
++}
+diff --git a/libc/include/xlocale.h b/libc/include/xlocale.h
+new file mode 100644
+index 0000000..f7eb8f4
+--- /dev/null
++++ b/libc/include/xlocale.h
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (C) 2014 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#ifndef _XLOCALE_H_
++#define _XLOCALE_H_
++
++/* If we just use void* here, GCC exposes that in error messages. */
++struct __locale_t;
++typedef struct __locale_t* locale_t;
++
++#endif /* _XLOCALE_H_ */
diff --git a/midgard/r11p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/midgard/r11p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100755
index 0000000..46b704b
--- /dev/null
+++ b/midgard/r11p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,92 @@
+#
+# (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- snoop_enable_smc : SMC function ID to enable CCI snooping on the GPU port(s).
+- snoop_disable_smc : SMC function ID to disable CCI snooping on the GPU port(s).
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets power model parameters. Note that this model was designed for the Juno
+	        platform, and may not be suitable for other platforms. A structure containing :
+	- compatible: Should be arm,mali-simple-power-model
+	- voltage: Voltage at reference point. Specified in mV.
+	- frequency: Frequency at reference point. Specified in MHz.
+	- dynamic-power: Dynamic power at reference frequency and voltage. Specified in mW.
+	- static-power: Static power at reference frequency. Specified in mW.
+	- ts: An array containing coefficients for the temperature scaling factor.
+	  Used as : tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0], where T = temperature
+	- thermal-zone: A string identifying the thermal zone used for the GPU
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+
+Example for a Mali-T602:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points = <
+		/* KHz   uV */
+		533000 1250000,
+		450000 1150000,
+		400000 1125000,
+		350000 1075000,
+		266000 1025000,
+		160000  925000,
+		100000  912500,
+	>;
+	power_model {
+		compatible = "arm,mali-simple-power-model";
+		voltage = <800>;
+		frequency = <500>;
+		static-power = <500>;
+		dynamic-power = <1500>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+};
diff --git a/midgard/r11p0/kernel/Documentation/dma-buf-test-exporter.txt b/midgard/r11p0/kernel/Documentation/dma-buf-test-exporter.txt
new file mode 100755
index 0000000..4208010
--- /dev/null
+++ b/midgard/r11p0/kernel/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/midgard/r11p0/kernel/Documentation/kds.txt b/midgard/r11p0/kernel/Documentation/kds.txt
new file mode 100755
index 0000000..639288c
--- /dev/null
+++ b/midgard/r11p0/kernel/Documentation/kds.txt
@@ -0,0 +1,268 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+==============================
+kds - Kernel Dependency System
+==============================
+
+Introduction
+------------
+kds provides a mechanism for clients to atomically lock down multiple abstract resources.
+This can be done either synchronously or asynchronously.
+Abstract resources is used to allow a set of clients to use kds to control access to any
+resource, an example is structured memory buffers.
+
+kds supports that buffer is locked for exclusive access and sharing of buffers.
+
+kds can be built as either a integrated feature of the kernel or as a module.
+It supports being compiled as a module both in-tree and out-of-tree.
+
+
+Concepts
+--------
+A core concept in kds is abstract resources.
+A kds resource is just an abstraction for some client object, kds doesn't care what it is.
+Typically EGL will consider UMP buffers as being a resource, thus each UMP buffer has
+a kds resource for synchronization to the buffer.
+
+kds allows a client to create and destroy the abstract resource objects.
+A new resource object is made available asap (it is just a simple malloc with some initializations),
+while destroy it requires some external synchronization.
+
+The other core concept in kds is consumer of resources.
+kds is requested to allow a client to consume a set of resources and the client will be notified when it can consume the resources.
+
+Exclusive access allows only one client to consume a resource.
+Shared access permits multiple consumers to acceess a resource concurrently.
+
+
+APIs
+----
+kds provides simple resource allocate and destroy functions.
+Clients use this to instantiate and control the lifetime of the resources kds manages.
+
+kds provides two ways to wait for resources:
+- Asynchronous wait: the client specifies a function pointer to be called when wait is over
+- Synchronous wait: Function blocks until access is gained.
+
+The synchronous API has a timeout for the wait.
+The call can early out if a signal is delivered.
+
+After a client is done consuming the resource kds must be notified to release the resources and let some other client take ownership.
+This is done via resource set release call.
+
+A Windows comparison:
+kds implements WaitForMultipleObjectsEx(..., bWaitAll = TRUE, ...) but also has an asynchronous version in addition.
+kds resources can be seen as being the same as NT object manager resources.
+
+Internals
+---------
+kds guarantees atomicity when a set of resources is operated on.
+This is implemented via a global resource lock which is taken by kds when it updates resource objects.
+
+Internally a resource in kds is a linked list head with some flags.
+
+When a consumer requests access to a set of resources it is queued on each of the resources.
+The link from the consumer to the resources can be triggered. Once all links are triggered
+the registered callback is called or the blocking function returns.
+A link is considered triggered if it is the first on the list of consumers of a resource,
+or if all the links ahead of it is marked as shared and itself is of the type shared.
+
+When the client is done consuming the consumer object is removed from the linked lists of
+the resources and a potential new consumer becomes the head of the resources.
+As we add and remove consumers atomically across all resources we can guarantee that
+we never introduces a A->B + B->A type of loops/deadlocks.
+
+
+kbase/base implementation
+-------------------------
+A HW job needs access to a set of shared resources.
+EGL tracks this and encodes the set along with the atom in the ringbuffer.
+EGL allocates a (k)base dep object to represent the dependency to the set of resources and encodes that along with the list of resources.
+This dep object is use to create a dependency from a job chain(atom) to the resources it needs to run.
+When kbase decodes the atom in the ringbuffer it finds the set of resources and calls kds to request all the needed resources.
+As EGL needs to know when the kds request is delivered a new base event object is needed: atom enqueued. This event is only delivered for atoms which uses kds.
+The callback kbase registers trigger the dependency object described which would trigger the existing JD system to release the job chain.
+When the atom is done kds resource set release is call to release the resources.
+
+EGL will typically use exclusive access to the render target, while all buffers used as input can be marked as shared.
+
+
+Buffer publish/vsync
+--------------------
+EGL will use a separate ioctl or DRM flip to request the flip.
+If the LCD driver is integrated with kds EGL can do these operations early.
+The LCD driver must then implement the ioctl or DRM flip to be asynchronous with kds async call.
+The LCD driver binds a kds resource to each virtual buffer (2 buffers in case of double-buffering).
+EGL will make a dependency to the target kds resource in the kbase atom.
+After EGL receives a atom enqueued event it can ask the LCD driver to pan to the target kds resource.
+When the atom is completed it'll release the resource and the LCD driver will get its callback.
+In the callback it'll load the target buffer into the DMA unit of the LCD hardware.
+The LCD driver will be the consumer of both buffers for a short period.
+The LCD driver will call kds resource set release on the previous on-screen buffer when the next vsync/dma read end is handled.
+
+===============================================
+Kernel driver kds client design considerations
+=============================================== 
+
+Number of resources
+--------------------
+
+The kds api allows a client to wait for ownership of a number of resources, where by the client does not take on ownership of any of the resources in the resource set 
+until all of the resources in the set are released.  Consideration must be made with respect to performance, as waiting on large number of resources will incur
+a greater overhead and may increase system latency.  It may be worth considering how independent each of the resources are, for example if the same set of resources 
+are waited upon by each of the clients, then it may be possible to aggregate these into one resource that each client waits upon.
+
+Clients with shared access
+---------------------------
+
+The kds api allows a number of clients to gain shared access to a resource simultaneously, consideration must be made with respect to performance, large numbers of clients
+wanting shared access can incur a performance penalty and may increase system latency, specifically when the clients are granted access. Having an excessively high 
+number of clients with shared access should be avoided, consideration should be made to the call back configuration being used. See Callbacks and Scenario 1 below.
+
+Callbacks
+----------
+
+Careful consideration must be made as to which callback type is most appropriate for kds clients, direct callbacks are called immediately from the context in which the
+ownership of the resource is passed to the next waiter in the list.  Where as when using deferred callbacks the callback is deferred and called from outside the context
+that is relinquishing ownership, while this reduces the latency in the releasing clients context it does incur a cost as there is more latency between a resource 
+becoming free and the new client owning the resource callback being executed.  
+
+Obviously direct callbacks have a performance advantage, as the call back is immediate and does not have to wait for the kernel to context switch to schedule in the 
+execution of the callback.  
+
+However as the callback is immediate and within the context that is granting ownership it is important that the callback perform the MINIMUM amount of work necessary, 
+long call backs could cause poor system latency.  Special care and attention must be taken if the direct callbacks can be called from IRQ handlers, such as when 
+kds_resource_set_release is called from an IRQ handler, in this case you have to avoid any calls that may sleep.  
+
+Deferred contexts have the advantage that the call backs are deferred until they are scheduled by the kernel, therefore they are allowed to call functions that may sleep
+and if scheduled from IRQ context not incur as much system latency as would be seen with direct callbacks from within the IRQ.
+
+Once the clients callback has been called, the client is considered to be owning the resource.  Within the callback the client may only need to perform a small amount of work
+before the client need to give up owner ship.  The kds_resource_release function may be called from with in the call back, but consideration must be made when using direct 
+callbacks, with both respect to execution time and stack usage. Consider the example in Scenario 2 with direct callbacks:
+
+Scenario 1 - Shared client access - direct callbacks:
+
+Resources: X
+Clients: A(S), B(S), C(S), D(S), E(E)
+where: (S) = shared user, (E) = exclusive
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+}
+
+Where <client> is either A,B,C,D
+                                        Queue   |Owner 
+1.  E Requests X exclusive                      |
+2.  E Owns     X exclusive                      |E
+3.  A Requests X shared                        A|E
+4.  B Requests X shared                       BA|E
+5.  C Requests X shared                      CBA|E
+6.  D Requests X shared                     DCBA|E
+7.  E Releases X                                |DCBA
+8.  A Owns X shared                             |DCBA
+9.  B Owns X shared                             |DCBA
+10. C Owns X shared                             |DCBA
+11. D Owns X shared                             |DCBA
+
+At point 7 it is important to note that when E releases X; A,B,C and D become the new shared owners of X and the call back for each of the client(A,B,C,D) triggered, so consideration
+must be made as to whether a direct or deferred callback is suitable, using direct callbacks would result in the call graph.
+
+Call graph when E releases X:
+    kds_resource_set_release( .. )
+        +->client_A_cb( .. )
+        +->client_B_cb( .. )
+        +->client_C_cb( .. )
+        +->client_D_cb( .. )
+
+
+Scenario 2 - Immediate resource release - direct callbacks:
+
+Resource: X
+Clients: A, B, C, D
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+    kds_resource_set_release( .. );
+}
+
+Where <client> is either A,B,C,D
+
+1. A Owns     X exclusive
+2. B Requests X exclusive	(direct callback)
+3. C Requests X exclusive	(direct callback)
+4. D Requests X exclusive	(direct callback)
+5. A Releases X
+
+Call graph when A releases X:
+    kds_resource_set_release( .. )
+        +->client_B_cb( .. )
+            +->kds_resource_set_release( .. )
+                +->client_C_cb( .. )
+                    +->kds_resource_set_release( .. )
+                        +->client_D_cb( .. )
+
+As can be seen when a client releases the resource, with direct call backs it is possible to create nested calls
+
+IRQ Considerations
+-------------------
+
+Usage of kds_resource_release in IRQ handlers should be carefully considered.
+
+Things to keep in mind:
+
+1.) Are you using direct or deferred callbacks?
+2.) How many resources are you releasing?
+3.) How many shared waiters are pending on the resource?
+
+Releasing ownership and wait cancellation
+------------------------------------------
+
+Client Wait Cancellation
+-------------------------
+
+It may be necessary in certain circumstances for the client to cancel the wait for kds resources for error handling, process termination etc.  Cancellation is 
+performed using kds_resource_set_release or kds_resource_set_release_sync using the rset that was received from kds_async_waitall, kds_resource_set_release_sync 
+being used for waits which are using deferred callbacks.
+
+It is possible that while the request to cancel the wait is being issued by the client, the client is granted access to the resources.  Normally after the client
+has taken ownership and finishes with that resource, it will release ownership to signal other waiters which are pending, this causes a race with the cancellation.
+To prevent KDS trying to remove a wait twice from the internal list and accessing memory that is potentially freed, it is very important that all releasers use the
+same rset pointer.  Here is a simplified example of bad usage that must be avoided in any client implementation:
+
+Senario 3 - Bad release from multiple contexts:
+
+	This scenaro is highlighting bad usage of the kds API
+
+	kds_resource_set * rset;
+	kds_resource_set * rset_copy;
+
+	kds_async_waitall( &rset, ... ... ... );
+
+	/* Don't do this */
+	rset_copy = rset;
+
+Context A:
+	kds_resource_set_release( &rset )
+
+Context B:
+	kds_resource_set_release( &rset_copy )
+
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_lock/sconscript b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/sconscript
new file mode 100755
index 0000000..8212693
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/sconscript
@@ -0,0 +1,27 @@
+#
+# (C) COPYRIGHT 2012, 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+search_term = '^[\ ]*CONFIG_DMA_SHARED_BUFFER_USES_KDS[\ ]*=[\ ]*y'
+for line in open(linux_config_file, 'r'):
+	if re.search(search_term, line):
+		SConscript( 'src/sconscript' )
+		if env['tests'] and Glob('tests/sconscript'):
+			SConscript( 'tests/sconscript' )
+		break
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/Kbuild b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100755
index 0000000..68dad07
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/Makefile b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/Makefile
new file mode 100755
index 0000000..cf76132
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100755
index 0000000..9613ffc
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/dma-buf.h>
+#include <linux/kds.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	struct kds_resource **kds_resources;    /* List of KDS resources associated with buffers */
+	struct kds_resource_set *resource_set;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	wait_queue_head_t wait;
+	struct kds_callback cb;
+	struct kref refcount;
+	struct list_head link;
+	int count;
+} dma_buf_lock_resource;
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static void dma_buf_lock_kds_callback(void *param1, void *param2)
+{
+	dma_buf_lock_resource *resource = param1;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_kds_callback\n");
+#endif
+	atomic_set(&resource->locked, 1);
+
+	wake_up(&resource->wait);
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related KDS resources, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+	/* Allocate space to store kds_resources associated with dma_buf_fds */
+	size = sizeof(struct kds_resource *) * request->count;
+	resource->kds_resources = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->kds_resources)
+	{
+		kfree(resource->dma_bufs);
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource->kds_resources);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Get kds_resource associated with dma_buf */
+		resource->kds_resources[i] = get_dma_buf_kds_resource(resource->dma_bufs[i]);
+
+		if (NULL == resource->kds_resources[i])
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk("dma_buf_lock_dolock : dma_buf_fd %i dma_buf %X kds_resource %X\n", resource->list_of_dma_buf_fds[i],
+		       (unsigned int)resource->dma_bufs[i], (unsigned int)resource->kds_resources[i]);
+#endif	
+	}
+
+	kds_callback_init(&resource->cb, 1, dma_buf_lock_kds_callback);
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops, 
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = kds_async_waitall(&resource->resource_set,
+	                        &resource->cb, resource, NULL,
+	                        request->count,  &resource->exclusive,
+	                        resource->kds_resources);
+
+	if (IS_ERR_VALUE(ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	kds_callback_term(&resource->cb);
+
+	kds_resource_set_release(&resource->resource_set);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->kds_resources);
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	kfree(resource);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100755
index 0000000..e1b9348
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/sconscript b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/sconscript
new file mode 100755
index 0000000..b8724f1
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_lock/src/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')]
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] dma_buf_lock'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100755
index 0000000..56b9f86
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100755
index 0000000..974f0c2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/Makefile b/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100755
index 0000000..06e3d5c
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100755
index 0000000..6270a52
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,650 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+	dmabuf = vma->vm_private_data;
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic,
+	.kmap = dma_buf_te_kmap
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (alloc_req.size > SG_MAX_SINGLE_ALLOC) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %lu pages",
+				__func__, alloc_req.size, SG_MAX_SINGLE_ALLOC);
+		goto invalid_size;
+	}
+#endif
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+	{
+		struct dma_buf_export_info export_info = {
+			.exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+			.owner = THIS_MODULE,
+#endif
+			.ops = &dma_buf_te_ops,
+			.size = alloc->nr_pages << PAGE_SHIFT,
+			.flags = O_CLOEXEC | O_RDWR,
+			.priv = alloc,
+		};
+
+		dma_buf = dma_buf_export(&export_info);
+	}
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		ret = dma_buf_begin_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+		if (ret)
+			goto no_cpu_access;
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				dma_buf_end_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+				ret = -EPERM;
+				goto no_cpu_access;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		dma_buf_end_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+		offset += sg_dma_len(sg);
+	}
+
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/sconscript b/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
new file mode 100755
index 0000000..09fe7f3
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_test_exporter/*.c'), Glob('#kernel/include/linux/*.h'), Glob('#kernel/drivers/base/dma_buf_test_exporter/K*')]
+
+env.Append( CPPPATH = '#kernel/include' )
+
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[CLEAN] dma-buf-test-exporter'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', src, [])
+	env.KernelObjTarget('dma-buf-test-exporter', cmd)
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make && ( ( [ -f dma-buf-test-exporter.ko ] && cp dma-buf-test-exporter.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/dma-buf-test-exporter.ko)", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', src, [makeAction])
+	env.KernelObjTarget('dma-buf-test-exporter', cmd)
+
+	
diff --git a/midgard/r11p0/kernel/drivers/base/kds/Kbuild b/midgard/r11p0/kernel/drivers/base/kds/Kbuild
new file mode 100755
index 0000000..a88acd8
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/kds/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_KDS) += kds.o
+obj-$(CONFIG_KDS_TEST) += kds_test.o
diff --git a/midgard/r11p0/kernel/drivers/base/kds/Kconfig b/midgard/r11p0/kernel/drivers/base/kds/Kconfig
new file mode 100755
index 0000000..5f96165
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/kds/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config KDS
+	tristate "Kernel dependency system"
+	help
+	  This option enables the generic kernel dependency system
diff --git a/midgard/r11p0/kernel/drivers/base/kds/Makefile b/midgard/r11p0/kernel/drivers/base/kds/Makefile
new file mode 100755
index 0000000..364d151
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/kds/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+CONFIG_KDS_TEST ?= n
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: kds
+
+kds:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS=m
+
+kds_test:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS_TEST=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r11p0/kernel/drivers/base/kds/kds.c b/midgard/r11p0/kernel/drivers/base/kds/kds.c
new file mode 100755
index 0000000..62612d4
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/kds/kds.c
@@ -0,0 +1,559 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/kds.h>
+#include <linux/kref.h>
+
+#include <asm/atomic.h>
+
+#define KDS_LINK_TRIGGERED (1u << 0)
+#define KDS_LINK_EXCLUSIVE (1u << 1)
+
+#define KDS_INVALID (void *)-2
+#define KDS_RESOURCE (void *)-1
+
+struct kds_resource_set
+{
+	unsigned long         num_resources;
+	unsigned long         pending;
+	struct kds_callback  *cb;
+	void                 *callback_parameter;
+	void                 *callback_extra_parameter;
+	struct list_head      callback_link;
+	struct work_struct    callback_work;
+	atomic_t              cb_queued;
+	/* This resource set will be freed when there are no pending
+	 * callbacks */
+	struct kref           refcount;
+
+	/* This is only initted when kds_waitall() is called. */
+	wait_queue_head_t     wake;
+
+	struct kds_link       resources[0];
+
+};
+
+static DEFINE_SPINLOCK(kds_lock);
+
+static void __resource_set_release(struct kref *ref)
+{
+	struct kds_resource_set *rset = container_of(ref,
+			struct kds_resource_set, refcount);
+
+	kfree(rset);
+}
+
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb)
+{
+	int ret = 0;
+
+	cb->direct = direct;
+	cb->user_cb = user_cb;
+
+	if (!direct)
+	{
+		cb->wq = alloc_workqueue("kds", WQ_UNBOUND | WQ_HIGHPRI, WQ_UNBOUND_MAX_ACTIVE);
+		if (!cb->wq)
+			ret = -ENOMEM;
+	}
+	else
+	{
+		cb->wq = NULL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kds_callback_init);
+
+void kds_callback_term(struct kds_callback *cb)
+{
+	if (!cb->direct)
+	{
+		BUG_ON(!cb->wq);
+		destroy_workqueue(cb->wq);
+	}
+	else
+	{
+		BUG_ON(cb->wq);
+	}
+}
+
+EXPORT_SYMBOL(kds_callback_term);
+
+static void kds_do_user_callback(struct kds_resource_set *rset)
+{
+	rset->cb->user_cb(rset->callback_parameter, rset->callback_extra_parameter);
+}
+
+static void kds_queued_callback(struct work_struct *work)
+{
+	struct kds_resource_set *rset;
+	rset = container_of(work, struct kds_resource_set, callback_work);
+
+	atomic_dec(&rset->cb_queued);
+
+	kds_do_user_callback(rset);
+}
+
+static void kds_callback_perform(struct kds_resource_set *rset)
+{
+	if (rset->cb->direct)
+		kds_do_user_callback(rset);
+	else
+	{
+		int result;
+
+		atomic_inc(&rset->cb_queued);
+
+		result = queue_work(rset->cb->wq, &rset->callback_work);
+		/* if we got a 0 return it means we've triggered the same rset twice! */
+		WARN_ON(!result);
+	}
+}
+
+void kds_resource_init(struct kds_resource * const res)
+{
+	BUG_ON(!res);
+	INIT_LIST_HEAD(&res->waiters.link);
+	res->waiters.parent = KDS_RESOURCE;
+}
+EXPORT_SYMBOL(kds_resource_init);
+
+int kds_resource_term(struct kds_resource *res)
+{
+	unsigned long lflags;
+	BUG_ON(!res);
+	spin_lock_irqsave(&kds_lock, lflags);
+	if (!list_empty(&res->waiters.link))
+	{
+		spin_unlock_irqrestore(&kds_lock, lflags);
+		printk(KERN_ERR "ERROR: KDS resource is still in use\n");
+		return -EBUSY;
+	}
+	res->waiters.parent = KDS_INVALID;
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	return 0;
+}
+EXPORT_SYMBOL(kds_resource_term);
+
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list)
+{
+	struct kds_resource_set *rset = NULL;
+	unsigned long lflags;
+	int i;
+	int triggered;
+	int err = -EFAULT;
+
+	BUG_ON(!pprset);
+	BUG_ON(!resource_list);
+	BUG_ON(!cb);
+
+	WARN_ONCE(number_resources > 10, "Waiting on a high numbers of resources may increase latency, see documentation.");
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+	{
+		return -ENOMEM;
+	}
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	rset->cb = cb;
+	rset->callback_parameter = callback_parameter;
+	rset->callback_extra_parameter = callback_extra_parameter;
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+	}
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		/* no-one else waiting? */
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	triggered = (rset->pending == 0);
+
+	/* set the pointer before the callback is called so it sees it */
+	*pprset = rset;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (triggered)
+	{
+		/* all resources obtained, trigger callback */
+		kds_callback_perform(rset);
+	}
+
+	return 0;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+	err = -EINVAL;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return err;
+}
+EXPORT_SYMBOL(kds_async_waitall);
+
+static void wake_up_sync_call(void *callback_parameter, void *callback_extra_parameter)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *)callback_parameter;
+	wake_up(wait);
+}
+
+static struct kds_callback sync_cb =
+{
+	wake_up_sync_call,
+	1,
+	NULL,
+};
+
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jiffies_timeout)
+{
+	struct kds_resource_set *rset;
+	unsigned long lflags;
+	int i;
+	int triggered = 0;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+		return rset;
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	init_waitqueue_head(&rset->wake);
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	if (rset->pending == 0)
+		triggered = 1;
+	else
+	{
+		rset->cb = &sync_cb;
+		rset->callback_parameter = &rset->wake;
+		rset->callback_extra_parameter = NULL;
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (!triggered)
+	{
+		long wait_res = 0;
+		long timeout = (jiffies_timeout == KDS_WAIT_BLOCKING) ?
+				MAX_SCHEDULE_TIMEOUT : jiffies_timeout;
+
+		if (timeout)
+		{
+			wait_res = wait_event_interruptible_timeout(rset->wake,
+					rset->pending == 0, timeout);
+		}
+
+		if ((wait_res == -ERESTARTSYS) || (wait_res == 0))
+		{
+			/* use \a kds_resource_set_release to roll back */
+			kds_resource_set_release(&rset);
+			return ERR_PTR(wait_res);
+		}
+	}
+	return rset;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL(kds_waitall);
+
+static void trigger_new_rset_owner(struct kds_resource_set *rset,
+		struct list_head *triggered)
+{
+	if (0 == --rset->pending) {
+		/* new owner now triggered, track for callback later */
+		kref_get(&rset->refcount);
+		list_add(&rset->callback_link, triggered);
+	}
+}
+
+static void __kds_resource_set_release_common(struct kds_resource_set *rset)
+{
+	struct list_head triggered = LIST_HEAD_INIT(triggered);
+	struct kds_resource_set *it;
+	unsigned long lflags;
+	int i;
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < rset->num_resources; i++)
+	{
+		struct kds_resource *resource;
+		struct kds_link *it = NULL;
+
+		/* fetch the previous entry on the linked list */
+		it = list_entry(rset->resources[i].link.prev, struct kds_link, link);
+		/* unlink ourself */
+		list_del(&rset->resources[i].link);
+
+		/* any waiters? */
+		if (list_empty(&it->link))
+			continue;
+
+		/* were we the head of the list? (head if prev is a resource) */
+		if (it->parent != KDS_RESOURCE)
+		{
+			if ((it->state & KDS_LINK_TRIGGERED) && !(it->state & KDS_LINK_EXCLUSIVE))
+			{
+				/*
+				 * previous was triggered and not exclusive, so we
+				 * trigger non-exclusive until end-of-list or first
+				 * exclusive
+				 */
+
+				struct kds_link *it_waiting = it;
+
+				list_for_each_entry(it, &it_waiting->link, link)
+				{
+					/* exclusive found, stop triggering */
+					if (it->state & KDS_LINK_EXCLUSIVE)
+						break;
+
+					it->state |= KDS_LINK_TRIGGERED;
+					/* a parent to update? */
+					if (it->parent != KDS_RESOURCE)
+						trigger_new_rset_owner(
+								it->parent,
+								&triggered);
+				}
+			}
+			continue;
+		}
+
+		/* we were the head, find the kds_resource */
+		resource = container_of(it, struct kds_resource, waiters);
+
+		/* we know there is someone waiting from the any-waiters test above */
+
+		/* find the head of the waiting list */
+		it = list_first_entry(&resource->waiters.link, struct kds_link, link);
+
+		/* new exclusive owner? */
+		if (it->state & KDS_LINK_EXCLUSIVE)
+		{
+			/* link now triggered */
+			it->state |= KDS_LINK_TRIGGERED;
+			/* a parent to update? */
+			trigger_new_rset_owner(it->parent, &triggered);
+		}
+		/* exclusive releasing ? */
+		else if (rset->resources[i].state & KDS_LINK_EXCLUSIVE)
+		{
+			/* trigger non-exclusive until end-of-list or first exclusive */
+			list_for_each_entry(it, &resource->waiters.link, link)
+			{
+				/* exclusive found, stop triggering */
+				if (it->state & KDS_LINK_EXCLUSIVE)
+					break;
+
+				it->state |= KDS_LINK_TRIGGERED;
+				/* a parent to update? */
+				trigger_new_rset_owner(it->parent, &triggered);
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	while (!list_empty(&triggered))
+	{
+		it = list_first_entry(&triggered, struct kds_resource_set, callback_link);
+		list_del(&it->callback_link);
+		kds_callback_perform(it);
+
+		/* Free the resource set if no callbacks pending */
+		kref_put(&it->refcount, &__resource_set_release);
+	}
+}
+
+void kds_resource_set_release(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+	int queued;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * Caller is responsible for guaranteeing that callback work is not
+	 * pending (i.e. its running or completed) prior to calling release.
+	 */
+	queued = atomic_read(&rset->cb_queued);
+	BUG_ON(queued);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release);
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * In the case of a kds async wait cancellation ensure the deferred
+	 * call back does not get scheduled if a trigger fired at the same time
+	 * to release the wait.
+	 */
+	cancel_work_sync(&rset->callback_work);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release_sync);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r11p0/kernel/drivers/base/kds/sconscript b/midgard/r11p0/kernel/drivers/base/kds/sconscript
new file mode 100755
index 0000000..91a79fd
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/kds/sconscript
@@ -0,0 +1,63 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+Import('env')
+
+#Android uses sync_pt to accomplish KDS functionality.
+#Midgard KDS is not used by Android
+if env['os'] == 'android':
+	Return()
+
+# If KDS is built into the kernel already we skip building the module ourselves
+linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y'
+build_kds = 1
+for line in open(linux_config_file, 'r'):
+	if re.search(search_term, line):
+		# KDS in kernel. Do not build KDS kernel module but
+		# still allow for building kds_test module.
+		build_kds = 0
+
+src = [Glob('#kernel/drivers/base/kds/*.c'), Glob('#kernel/include/linux/*.h'), Glob('#kernel/drivers/base/kds/K*')]
+
+env.Append( CPPPATH = '#kernel/include' )
+
+if Glob('tests/sconscript'):
+	SConscript( 'tests/sconscript' )
+
+if env.GetOption('clean') :
+	# Clean KDS module
+	if build_kds or (int(env['unit']) == 1):
+		env.Execute(Action("make clean", '[CLEAN] kds'))
+		cmd = env.Command('$STATIC_LIB_PATH/kds.ko', src, [])
+		env.KernelObjTarget('kds', cmd)
+else:
+	# Build KDS module
+	if build_kds:
+		makeAction=Action("cd ${SOURCE.dir} && make kds && cp kds.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+		cmd = env.Command('$STATIC_LIB_PATH/kds.ko', src, [makeAction])
+		env.KernelObjTarget('kds', cmd)
+
+	# Build KDS test module
+	if int(env['unit']) == 1:
+		makeActionTest=Action("cd ${SOURCE.dir} && make kds_test && cp kds_test.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+		cmdTest = env.Command('$STATIC_LIB_PATH/kds_test.ko', src, [makeActionTest])
+		env.KernelObjTarget('kds', cmdTest)
+		if build_kds:
+			env.Depends(cmdTest, cmd)
+
diff --git a/midgard/r11p0/kernel/drivers/base/sconscript b/midgard/r11p0/kernel/drivers/base/sconscript
new file mode 100755
index 0000000..b57cc7f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import( 'env' )
+
+if Glob('bus_logger/sconscript'):
+	if env['buslog'] == '1':
+		SConscript('bus_logger/sconscript')
+
+if Glob('dma_buf_lock/sconscript'):
+	SConscript('dma_buf_lock/sconscript')
+
+if Glob('kds/sconscript'):
+	SConscript('kds/sconscript')
+
+if Glob('ump/sconscript'):
+	SConscript('ump/sconscript')
+
+if Glob('dma_buf_test_exporter/sconscript'):
+	SConscript('dma_buf_test_exporter/sconscript')
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/Kbuild b/midgard/r11p0/kernel/drivers/base/ump/Kbuild
new file mode 100755
index 0000000..2bbdba2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += src/
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/Kconfig b/midgard/r11p0/kernel/drivers/base/ump/Kconfig
new file mode 100755
index 0000000..f7451e6
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config UMP
+	tristate "Enable Unified Memory Provider (UMP) support"
+	default n
+    help
+	  Enable this option to build support for the ARM UMP module.
+	  UMP can be used by the Mali T6xx module to improve performance
+	  by reducing the copying of data by sharing memory.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate one module, called ump.
diff --git a/midgard/r11p0/kernel/drivers/base/ump/docs/Doxyfile b/midgard/r11p0/kernel/drivers/base/ump/docs/Doxyfile
new file mode 100755
index 0000000..fbec8eb
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/docs/Doxyfile
@@ -0,0 +1,125 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/include/linux/ump-common.h ../../kernel/include/linux/ump.h
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           += ../../kernel/drivers/base/ump
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           +=
diff --git a/midgard/r11p0/kernel/drivers/base/ump/docs/sconscript b/midgard/r11p0/kernel/drivers/base/ump/docs/sconscript
new file mode 100755
index 0000000..521278d
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/docs/sconscript
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+doxygen_sources = [
+	'Doxyfile',
+	Glob('*.png'),
+	Glob('../*.h'),
+	Glob('../src/*.h') ]
+
+if env['doc'] == '1':
+        doxygen_target = env.Command('doxygen/html/index.html', doxygen_sources,
+                                     ['cd ${SOURCE.dir} && doxygen'])
+        env.Clean(doxygen_target, './doxygen')
+
+        Alias('doxygen', doxygen_target)
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/example_kernel_api.c b/midgard/r11p0/kernel/drivers/base/ump/example_kernel_api.c
new file mode 100755
index 0000000..858dd8b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/example_kernel_api.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Example routine to display information about an UMP allocation
+ * The routine takes an secure_id which can come from a different kernel module
+ * or from a client application (i.e. an ioctl).
+ * It creates a ump handle from the secure id (which validates the secure id)
+ * and if successful dumps the physical memory information.
+ * It follows the API and pins the memory while "using" the physical memory.
+ * Finally it calls the release function to indicate it's finished with the handle.
+ *
+ * If the function can't look up the handle it fails with return value -1.
+ * If the testy succeeds then it return 0.
+ * */
+
+static int display_ump_memory_information(ump_secure_id secure_id)
+{
+	const ump_dd_physical_block_64 * ump_blocks = NULL;
+	ump_dd_handle ump_mem;
+	uint64_t nr_blocks;
+	int i;
+	ump_alloc_flags flags;
+
+	/* get a handle from the secure id */
+	ump_mem = ump_dd_from_secure_id(secure_id);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE == ump_mem)
+	{
+		/* invalid ID received */
+		return -1;
+	}
+
+	/* at this point we know we've added a reference to the ump allocation, so we must release it with ump_dd_release */
+
+	ump_dd_phys_blocks_get_64(ump_mem, &nr_blocks, &ump_blocks);
+	flags = ump_dd_allocation_flags_get(ump_mem);
+
+	printf("UMP allocation with secure ID %u consists of %zd physical block(s):\n", secure_id, nr_blocks);
+
+	for(i=0; i<nr_blocks; ++i)
+	{
+		printf("\tBlock %d: 0x%08zX size 0x%08zX\n", i, ump_blocks[i].addr, ump_blocks[i].size);
+	}
+
+	printf("and was allocated using the following bitflag combo:  0x%lX\n", flags);
+
+	ump_dd_release(ump_mem);
+
+	return 0;
+}
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/example_user_api.c b/midgard/r11p0/kernel/drivers/base/ump/example_user_api.c
new file mode 100755
index 0000000..d143a64
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/example_user_api.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <ump/ump.h>
+#include <memory.h>
+#include <stdio.h>
+
+/*
+ * Example routine to exercise the user space UMP api.
+ * This routine initializes the UMP api and allocates some CPU+device X memory.
+ * No usage hints are given, so the driver will use the default cacheability policy.
+ * With the allocation it creates a duplicate handle and plays with the reference count.
+ * Then it simulates interacting with a device and contains pseudo code for the device.
+ *
+ * If any error is detected correct cleanup will be performed and -1 will be returned.
+ * If successful then 0 will be returned.
+ */
+
+static int test_ump_user_api(void)
+{
+	/* This is the size we try to allocate*/
+	const size_t alloc_size = 4096;
+
+	ump_handle h = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_copy = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+	void * mapping = NULL;
+
+	ump_result ump_api_res;
+	int result = -1;
+
+	ump_secure_id id;
+
+	size_t size_returned;
+
+	ump_api_res = ump_open();
+	if (UMP_OK != ump_api_res)
+	{
+		/* failed to open an ump session */
+		/* early out */
+		return -1;
+	}
+
+	h = ump_allocate_64(alloc_size, UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | UMP_PROT_X_RD | UMP_PROT_X_WR);
+	/* the refcount is now 1 */
+	if (UMP_INVALID_MEMORY_HANDLE == h)
+	{
+		/* allocation failure */
+		goto cleanup;
+	}
+
+	/* this is how we could share this allocation with another process */
+
+	/* in process A: */
+	id = ump_secure_id_get(h);
+	/* still ref count 1 */
+	/* send the id to process B */
+
+	/* in process B: */
+	/* receive the id from A */
+	h_clone = ump_from_secure_id(id);
+	/* the ref count of the allocation is now 2 (one from each handle to it) */
+	/* do something ... */
+	/* release our clone */
+	ump_release(h_clone); /* safe to call even if ump_from_secure_id failed */
+	h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+
+	/* a simple save-for-future-use logic inside the driver would just copy the handle (but add a ref manually too!) */
+	/*
+	 * void assign_memory_to_job(h)
+	 * {
+	  */
+	h_copy = h;
+	ump_retain(h_copy); /* manual retain needed as we just assigned the handle, now 2 */
+	/*
+	 * }
+	 *
+	 * void job_completed(void)
+	 * {
+	 */
+	 ump_release(h_copy); /* normal handle release as if we got via an ump_allocate */
+	 h_copy = UMP_INVALID_MEMORY_HANDLE;
+	 /*
+	 * }
+	 */
+	
+	/* we're now back at ref count 1, and only h is a valid handle */
+	/* enough handle duplication show-off, let's play with the contents instead */
+
+	mapping = ump_map(h, 0, alloc_size);
+	if (NULL == mapping)
+	{
+		/* mapping failure, either out of address space or some other error */
+		goto cleanup;
+	}
+
+	memset(mapping, 0, alloc_size);
+
+	/* let's pretend we're going to start some hw device on this buffer and read the result afterwards */
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN, mapping, alloc_size);
+ 	/*
+		device cache invalidate
+
+		memory barrier
+
+		start device
+
+		memory barrier
+
+		wait for device
+
+		memory barrier
+
+		device cache clean
+
+		memory barrier
+	*/
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN_AND_INVALIDATE, mapping, alloc_size);
+
+	/* we could now peek at the result produced by the hw device, which is now accessible via our mapping */
+
+	/* unmap the buffer when we're done with it */
+	ump_unmap(h, mapping, alloc_size);
+
+	result = 0;
+
+cleanup:
+	ump_release(h);
+	h = UMP_INVALID_MEMORY_HANDLE;
+
+	ump_close();
+
+	return result;
+}
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/sconscript b/midgard/r11p0/kernel/drivers/base/ump/sconscript
new file mode 100755
index 0000000..b4aa8b6
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if Glob('src/sconscript') and int(env['ump']) == 1:
+	SConscript( 'src/sconscript' )
+	SConscript( 'docs/sconscript' )
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/Kbuild b/midgard/r11p0/kernel/drivers/base/ump/src/Kbuild
new file mode 100755
index 0000000..de6d307
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/Kbuild
@@ -0,0 +1,50 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Paths required for build
+UMP_PATH = $(src)/../..
+UMP_DEVICEDRV_PATH = $(src)/.
+
+# Set up defaults if not defined by the user
+MALI_UNIT_TEST ?= 0
+
+SRC :=\
+	common/ump_kernel_core.c \
+	common/ump_kernel_descriptor_mapping.c \
+	linux/ump_kernel_linux.c \
+	linux/ump_kernel_linux_mem.c
+
+UNIT_TEST_DEFINES=
+ifeq ($(MALI_UNIT_TEST), 1)
+	MALI_DEBUG ?= 1
+
+	UNIT_TEST_DEFINES = -DMALI_UNIT_TEST=1 \
+	                    -DMALI_DEBUG=$(MALI_DEBUG)
+endif
+
+# Use our defines when compiling
+ccflags-y += -I$(UMP_PATH) -I$(UMP_DEVICEDRV_PATH) $(UNIT_TEST_DEFINES)
+
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_UMP) += ump.o
+ifeq ($(CONFIG_ION),y)
+ccflags-y += -I$(srctree)/drivers/staging/android/ion -I$(srctree)/include/linux
+obj-$(CONFIG_UMP) += imports/ion/ump_kernel_import_ion.o
+endif
+
+# Tell the Linux build system to enable building of our .c files
+ump-y := $(SRC:.c=.o)
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/Makefile b/midgard/r11p0/kernel/drivers/base/ump/src/Makefile
new file mode 100755
index 0000000..45428ad
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/Makefile
@@ -0,0 +1,81 @@
+#
+# (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifneq ($(KBUILD_EXTMOD),)
+include $(KBUILD_EXTMOD)/Makefile.common
+else
+include ./Makefile.common
+endif
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+RELATIVE_ROOT=../../../../..
+ROOT = $(CURDIR)/$(RELATIVE_ROOT)
+
+EXTRA_CFLAGS=-I$(CURDIR)/../../../../include
+
+ifeq ($(MALI_UNIT_TEST),1)
+	EXTRA_CFLAGS += -DMALI_UNIT_TEST=$(MALI_UNIT_TEST)
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+CONFIG ?= $(ARCH)
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+# Validate selected config
+ifneq ($(shell [ -d arch-$(CONFIG) ] && [ -f arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(shell pwd))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L arch ] && rm arch)
+$(shell ln -sf arch-$(CONFIG) arch)
+$(shell touch arch/config.h)
+endif
+
+EXTRA_SYMBOLS=
+
+ifeq ($(MALI_UNIT_TEST),1)
+	KBASE_PATH=$(ROOT)/kernel/drivers/gpu/arm/midgard
+	EXTRA_SYMBOLS+=$(KBASE_PATH)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+KDS_PATH=$(ROOT)/kernel/drivers/base/kds
+EXTRA_SYMBOLS+=$(KDS_PATH)/Module.symvers
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="$(EXTRA_CFLAGS) $(SCONS_CFLAGS)" CONFIG_UMP=m KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/Makefile.common b/midgard/r11p0/kernel/drivers/base/ump/src/Makefile.common
new file mode 100755
index 0000000..f29a4c1
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/Makefile.common
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2008-2010, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+SRC = $(UMP_FILE_PREFIX)/common/ump_kernel_core.c \
+      $(UMP_FILE_PREFIX)/common/ump_kernel_descriptor_mapping.c
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/arch-arm/config.h b/midgard/r11p0/kernel/drivers/base/ump/src/arch-arm/config.h
new file mode 100755
index 0000000..152d98f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/arch-arm/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/arch-arm64/config.h b/midgard/r11p0/kernel/drivers/base/ump/src/arch-arm64/config.h
new file mode 100755
index 0000000..cfb14ca
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/arch-arm64/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c b/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
new file mode 100755
index 0000000..07aa077
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
@@ -0,0 +1,756 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* module headers */
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+/* local headers */
+#include <common/ump_kernel_core.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#define UMP_FLAGS_RANGE ((UMP_PROT_SHAREABLE<<1) - 1u)
+
+static umpp_device device;
+
+ump_result umpp_core_constructor(void)
+{
+	mutex_init(&device.secure_id_map_lock);
+	device.secure_id_map = umpp_descriptor_mapping_create(UMP_EXPECTED_IDS, UMP_MAX_IDS);
+	if (NULL != device.secure_id_map)
+	{
+		if (UMP_OK == umpp_device_initialize())
+		{
+			return UMP_OK;
+		}
+		umpp_descriptor_mapping_destroy(device.secure_id_map);
+	}
+	mutex_destroy(&device.secure_id_map_lock);
+
+	return UMP_ERROR;
+}
+
+void umpp_core_destructor(void)
+{
+	umpp_device_terminate();
+	umpp_descriptor_mapping_destroy(device.secure_id_map);
+	mutex_destroy(&device.secure_id_map_lock);
+}
+
+umpp_session *umpp_core_session_start(void)
+{
+	umpp_session * session;
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (NULL != session)
+	{
+		mutex_init(&session->session_lock);
+
+		INIT_LIST_HEAD(&session->memory_usage);
+
+		/* try to create import client session, not a failure if they fail to initialize */
+		umpp_import_handlers_init(session);
+	}
+
+	return session;
+}
+
+void umpp_core_session_end(umpp_session *session)
+{
+	umpp_session_memory_usage * usage, *_usage;
+	UMP_ASSERT(session);
+
+	list_for_each_entry_safe(usage, _usage, &session->memory_usage, link)
+	{
+		printk(KERN_WARNING "UMP: Memory usage cleanup, releasing secure ID %d\n", ump_dd_secure_id_get(usage->mem));
+		ump_dd_release(usage->mem);
+		kfree(usage);
+
+	}
+
+	/* we should now not hold any imported memory objects,
+	 * detatch all import handlers */
+	umpp_import_handlers_term(session);
+
+	mutex_destroy(&session->session_lock);
+	kfree(session);
+}
+
+ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	umpp_allocation * alloc;
+	int i;
+
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD ) ) == 0 ||
+	    ( flags & (UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read and one write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL | __GFP_HARDWALL);
+
+	if (NULL == alloc)
+		goto out1;
+
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+	alloc->size = size;
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	if (0 != umpp_phys_commit(alloc))
+	{
+		goto out2;
+	}
+
+	/* all set up, allocate an ID for it */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	return alloc;
+
+out3:
+	umpp_phys_free(alloc);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+uint64_t ump_dd_size_get_64(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->size;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_size_get(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->size <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->size;
+}
+
+ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->id;
+}
+
+#ifdef CONFIG_KDS
+struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return &alloc->kds_res;
+}
+#endif
+
+ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	return alloc->flags;
+}
+
+ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id)
+{
+	umpp_allocation * alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+
+	mutex_lock(&device.secure_id_map_lock);
+
+	if (0 == umpp_descriptor_mapping_lookup(device.secure_id_map, secure_id, (void**)&alloc))
+	{
+		if (NULL != alloc->filter_func)
+		{
+			if (!alloc->filter_func(secure_id, alloc, alloc->callback_data))
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /* the filter denied access */
+			}
+		}
+
+		/* check permission to access it */
+		if ((UMP_DD_INVALID_MEMORY_HANDLE != alloc) && !(alloc->flags & UMP_PROT_SHAREABLE))
+		{
+			if (alloc->owner != get_current()->pid)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /*no rights for the current process*/
+			}
+		}
+
+		if (UMP_DD_INVALID_MEMORY_HANDLE != alloc)
+		{
+			if( ump_dd_retain(alloc) != UMP_DD_SUCCESS)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+			}
+		}
+	}
+	mutex_unlock(&device.secure_id_map_lock);
+
+	return alloc;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	return ump_dd_from_secure_id(secure_id);
+}
+
+int ump_dd_retain(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* check for overflow */
+	while(1)
+	{
+		int refcnt = atomic_read(&alloc->refcount);
+		if (refcnt + 1 > 0)
+		{
+			if(atomic_cmpxchg(&alloc->refcount, refcnt, refcnt + 1) == refcnt)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			return -EBUSY;
+		}
+	}
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_add(ump_dd_handle mem)
+{
+	ump_dd_retain(mem);
+}
+
+
+void ump_dd_release(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+	uint32_t new_cnt;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* secure the id for lookup while releasing */
+	mutex_lock(&device.secure_id_map_lock);
+
+	/* do the actual release */
+	new_cnt = atomic_sub_return(1, &alloc->refcount);
+	if (0 == new_cnt)
+	{
+		/* remove from the table as this was the last ref */
+		umpp_descriptor_mapping_remove(device.secure_id_map, alloc->id);
+	}
+
+	/* release the lock as early as possible */
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if (0 != new_cnt)
+	{
+		/* exit if still have refs */
+		return;
+	}
+
+	UMP_ASSERT(list_empty(&alloc->map_list));
+
+#ifdef CONFIG_KDS
+	if (kds_resource_term(&alloc->kds_res))
+	{
+		printk(KERN_ERR "ump_dd_release: kds_resource_term failed,"
+				"unable to release UMP allocation\n");
+		return;
+	}
+#endif
+	/* cleanup */
+	if (NULL != alloc->final_release_func)
+	{
+		alloc->final_release_func(alloc, alloc->callback_data);
+	}
+
+	if (0 == (alloc->management_flags & UMP_MGMT_EXTERNAL))
+	{
+		umpp_phys_free(alloc);
+	}
+	else
+	{
+		kfree(alloc->block_array);
+	}
+
+	mutex_destroy(&alloc->map_list_lock);
+
+	kfree(alloc);
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_release(ump_dd_handle mem)
+{
+	ump_dd_release(mem);
+}
+
+void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(pCount);
+	UMP_ASSERT(pArray);
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+	*pCount = alloc->blocksCount;
+	*pArray = alloc->block_array;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks)
+{
+	const umpp_allocation * alloc;
+	unsigned long i;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	if((uint64_t)num_blocks != alloc->blocksCount)
+	{
+		return UMP_DD_INVALID;
+	}
+
+	for( i = 0; i < num_blocks; i++)
+	{
+		UMP_ASSERT(alloc->block_array[i].addr <= UMP_UINT32_MAX);
+		UMP_ASSERT(alloc->block_array[i].size <= UMP_UINT32_MAX);
+
+		blocks[i].addr = (unsigned long)alloc->block_array[i].addr;
+		blocks[i].size = (unsigned long)alloc->block_array[i].size;
+	}
+
+	return UMP_DD_SUCCESS;
+}
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(block);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	UMP_ASSERT(alloc->block_array[index].addr <= UMP_UINT32_MAX);
+	UMP_ASSERT(alloc->block_array[index].size <= UMP_UINT32_MAX);
+
+	block->addr = (unsigned long)alloc->block_array[index].addr;
+	block->size = (unsigned long)alloc->block_array[index].size;
+
+	return UMP_DD_SUCCESS;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->blocksCount <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->blocksCount;
+}
+
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void *uaddr, size_t size)
+{
+	umpp_cpu_mapping *map;
+
+	void *target_first = uaddr;
+	void *target_last = (void*)((uintptr_t)uaddr - 1 + size);
+
+	if (target_last < target_first) /* wrapped */
+	{
+		return NULL;
+	}
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if ( map->vaddr_start <= target_first &&
+		   (void*)((uintptr_t)map->vaddr_start + (map->nr_pages << PAGE_SHIFT) - 1) >= target_last)
+		{
+			goto out;
+		}
+	}
+	map = NULL;
+out:
+	mutex_unlock(&alloc->map_list_lock);
+
+	return map;
+}
+
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map)
+{
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(map);
+	mutex_lock(&alloc->map_list_lock);
+	list_add(&map->link, &alloc->map_list);
+	mutex_unlock(&alloc->map_list_lock);
+}
+
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target)
+{
+	umpp_cpu_mapping * map;
+
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(target);
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if (map == target)
+		{
+			list_del(&target->link);
+			kfree(target);
+			mutex_unlock(&alloc->map_list_lock);
+			return;
+		}
+	}
+
+	/* not found, error */
+	UMP_ASSERT(0);
+}
+
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const  block_index, uint64_t * const block_internal_offset)
+{
+	uint64_t i;
+
+	for (i = 0 ; i < alloc->blocksCount; i++)
+	{
+		if (offset < alloc->block_array[i].size)
+		{
+			/* found the block_array element containing this offset */
+			*block_index = i;
+			*block_internal_offset = offset;
+			return 0;
+		}
+		offset -= alloc->block_array[i].size;
+	}
+
+	return -ENXIO;
+}
+
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size)
+{
+	umpp_allocation * alloc;
+	void *vaddr;
+	umpp_cpu_mapping * mapping;
+	uint64_t virt_page_off; /* offset of given address from beginning of the virtual mapping */
+	uint64_t phys_page_off; /* offset of the virtual mapping from the beginning of the physical buffer */
+	uint64_t page_count; /* number of pages to sync */
+	uint64_t i;
+	uint64_t block_idx;
+	uint64_t block_offset;
+	uint64_t paddr;
+
+	UMP_ASSERT((UMP_MSYNC_CLEAN == op) || (UMP_MSYNC_CLEAN_AND_INVALIDATE == op));
+
+	alloc = (umpp_allocation*)mem;
+	vaddr = (void*)(uintptr_t)address;
+
+	if((alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+	{
+		/* mapping is not cached */
+		return;
+	}
+
+	mapping = umpp_dd_find_enclosing_mapping(alloc, vaddr, size);
+	if (NULL == mapping)
+	{
+		printk(KERN_WARNING "UMP: Illegal cache sync address %lx\n", (uintptr_t)vaddr);
+		return; /* invalid pointer or size causes out-of-bounds */
+	}
+
+	/* we already know that address + size don't wrap around as umpp_dd_find_enclosing_mapping didn't fail */
+	page_count = ((((((uintptr_t)address + size - 1) & PAGE_MASK) - ((uintptr_t)address & PAGE_MASK))) >> PAGE_SHIFT) + 1;
+	virt_page_off = (vaddr - mapping->vaddr_start) >> PAGE_SHIFT;
+	phys_page_off = mapping->page_off;
+
+	if (umpp_dd_find_start_block(alloc, (virt_page_off + phys_page_off) << PAGE_SHIFT, &block_idx, &block_offset))
+	{
+		/* should not fail as a valid mapping was found, so the phys mem must exists */
+		printk(KERN_WARNING "UMP: Unable to find physical start block with offset %llx\n", virt_page_off + phys_page_off);
+		UMP_ASSERT(0);
+		return;
+	}
+
+	paddr = alloc->block_array[block_idx].addr + block_offset + (((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1));
+
+	for (i = 0; i < page_count; i++)
+	{
+		size_t offset = ((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1);
+		size_t sz = min((size_t)PAGE_SIZE - offset, size);
+
+		/* check if we've overrrun the current block, if so move to the next block */
+		if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+		{
+			block_idx++;
+			UMP_ASSERT(block_idx < alloc->blocksCount);
+			paddr = alloc->block_array[block_idx].addr;
+		}
+
+		if (UMP_MSYNC_CLEAN == op)
+		{
+			ump_sync_to_memory(paddr, vaddr, sz);
+		}
+		else /* (UMP_MSYNC_CLEAN_AND_INVALIDATE == op) already validated on entry */
+		{
+			ump_sync_to_cpu(paddr, vaddr, sz);
+		}
+
+		/* advance to next page  */
+		vaddr = (void*)((uintptr_t)vaddr + sz);
+		size -= sz;
+		paddr += sz;
+	}
+}
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	uint64_t size = 0;
+	uint64_t i;
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		size += blocks[i].size;
+	}
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD
+	    | UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read or write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc),__GFP_HARDWALL | GFP_KERNEL);
+
+	if (NULL == alloc)
+	{
+		goto out1;
+	}
+
+	alloc->block_array = kzalloc(sizeof(ump_dd_physical_block_64) * num_blocks,__GFP_HARDWALL | GFP_KERNEL);
+	if (NULL == alloc->block_array)
+	{
+		goto out2;
+	}
+
+	memcpy(alloc->block_array, blocks, sizeof(ump_dd_physical_block_64) * num_blocks);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+	alloc->size = size;
+	alloc->blocksCount = num_blocks;
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	/* all set up, allocate an ID */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	alloc->management_flags |= UMP_MGMT_EXTERNAL;
+
+	return alloc;
+
+out3:
+	kfree(alloc->block_array);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+
+/*
+ * UMP v1 API
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks)
+{
+	ump_dd_handle mem;
+	ump_dd_physical_block_64 *block_64_array;
+	ump_alloc_flags flags = UMP_V1_API_DEFAULT_ALLOCATION_FLAGS;
+	unsigned long i;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	block_64_array = kzalloc(num_blocks * sizeof(*block_64_array), __GFP_HARDWALL | GFP_KERNEL);
+
+	if(block_64_array == NULL)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/* copy physical blocks */
+	for( i = 0; i < num_blocks; i++)
+	{
+		block_64_array[i].addr = blocks[i].addr;
+		block_64_array[i].size = blocks[i].size;
+	}
+
+	mem = ump_dd_create_from_phys_blocks_64(block_64_array, num_blocks, flags, NULL, NULL, NULL);
+
+	kfree(block_64_array);
+
+	return mem;
+
+}
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h b/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
new file mode 100755
index 0000000..8cb424d
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
@@ -0,0 +1,228 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_CORE_H_
+#define _UMP_KERNEL_CORE_H_
+
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/cred.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+#include <linux/ump-common.h>
+#include <ump/src/common/ump_kernel_descriptor_mapping.h>
+
+/* forward decl */
+struct umpp_session;
+
+/**
+ * UMP handle metadata.
+ * Tracks various data about a handle not of any use to user space
+ */
+typedef enum
+{
+	UMP_MGMT_EXTERNAL = (1ul << 0) /**< Handle created via the ump_dd_create_from_phys_blocks interface */
+	/* (1ul << 31) not to be used */
+} umpp_management_flags;
+
+/**
+ * Structure tracking the single global UMP device.
+ * Holds global data like the ID map
+ */
+typedef struct umpp_device
+{
+	struct mutex secure_id_map_lock; /**< Lock protecting access to the map */
+	umpp_descriptor_mapping * secure_id_map; /**< Map of all known secure IDs on the system */
+} umpp_device;
+
+/**
+ * Structure tracking all memory allocations of a UMP allocation.
+ * Tracks info about an mapping so we can verify cache maintenace
+ * operations and help in the unmap cleanup.
+ */
+typedef struct umpp_cpu_mapping
+{
+	struct list_head        link; /**< link to list of mappings for an allocation */
+	void                  *vaddr_start; /**< CPU VA start of the mapping */
+	size_t                nr_pages; /**< Size (in pages) of the mapping */
+	uint64_t              page_off; /**< Offset (in pages) from start of the allocation where the mapping starts */
+	ump_dd_handle         handle; /**< Which handle this mapping is linked to */
+	struct umpp_session * session; /**< Which session created the mapping */
+} umpp_cpu_mapping;
+
+/**
+ * Structure tracking UMP allocation.
+ * Represent a memory allocation with its ID.
+ * Tracks all needed meta-data about an allocation.
+ * */
+typedef struct umpp_allocation
+{
+	ump_secure_id id; /**< Secure ID of the allocation */
+	atomic_t refcount; /**< Usage count */
+
+	ump_alloc_flags flags; /**< Flags for all supported devices */
+	uint32_t management_flags; /**< Managment flags tracking */
+
+	pid_t owner; /**< The process ID owning the memory if not sharable */
+
+	ump_dd_security_filter filter_func; /**< Hook to verify use, called during retains from new clients */
+	ump_dd_final_release_callback final_release_func; /**< Hook called when the last reference is removed */
+	void* callback_data; /**< Additional data given to release hook */
+
+	uint64_t size; /**< Size (in bytes) of the allocation */
+	uint64_t blocksCount; /**< Number of physsical blocks the allocation is built up of */
+	ump_dd_physical_block_64 * block_array; /**< Array, one entry per block, describing block start and length */
+
+	struct mutex     map_list_lock; /**< Lock protecting the map_list */
+	struct list_head map_list; /**< Tracks all CPU VA mappings of this allocation */
+
+#ifdef CONFIG_KDS
+	struct kds_resource kds_res; /**< The KDS resource controlling access to this allocation */
+#endif
+
+	void * backendData; /**< Physical memory backend meta-data */
+} umpp_allocation;
+
+/**
+ * Structure tracking use of UMP memory by a session.
+ * Tracks the use of an allocation by a session so session termination can clean up any outstanding references.
+ * Also protects agains non-matched release calls from user space.
+ */
+typedef struct umpp_session_memory_usage
+{
+	ump_secure_id id; /**< ID being used. For quick look-up */
+	ump_dd_handle mem; /**< Handle being used. */
+
+	/**
+	 * Track how many times has the process retained this handle in the kernel.
+	 * This should usually just be 1(allocated or resolved) or 2(mapped),
+	 * but could be more if someone is playing with the low-level API
+	 * */
+	atomic_t process_usage_count;
+
+	struct list_head link; /**< link to other usage trackers for a session */
+} umpp_session_memory_usage;
+
+/**
+ * Structure representing a session/client.
+ * Tracks the UMP allocations being used by this client.
+ */
+typedef struct umpp_session
+{
+	struct mutex session_lock; /**< Lock for memory usage manipulation */
+	struct list_head memory_usage; /**< list of memory currently being used by the this session */
+	void*  import_handler_data[UMPP_EXTERNAL_MEM_COUNT]; /**< Import modules per-session data pointer */
+} umpp_session;
+
+/**
+ * UMP core setup.
+ * Called by any OS specific startup function to initialize the common part.
+ * @return UMP_OK if core initialized correctly, any other value for errors
+ */
+ump_result umpp_core_constructor(void);
+
+/**
+ * UMP core teardown.
+ * Called by any OS specific unload function to clean up the common part.
+ */
+void umpp_core_destructor(void);
+
+/**
+ * UMP session start.
+ * Called by any OS specific session handler when a new session is detected
+ * @return Non-NULL if a matching core session could be set up. NULL on failure
+ */
+umpp_session *umpp_core_session_start(void);
+
+/**
+ * UMP session end.
+ * Called by any OS specific session handler when a session is ended/terminated.
+ * @param session The core session object returned by ump_core_session_start
+ */
+void umpp_core_session_end(umpp_session *session);
+
+/**
+ * Find a mapping object (if any) for this allocation.
+ * Called by any function needing to identify a mapping from a user virtual address.
+ * Verifies that the whole range to be within a mapping object.
+ * @param alloc The UMP allocation to find a matching mapping object of
+ * @param uaddr User mapping address to find the mapping object for
+ * @param size Length of the mapping
+ * @return NULL on error (no match found), pointer to mapping object if match found
+ */
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void* uaddr, size_t size);
+
+/**
+ * Register a new mapping of an allocation.
+ * Called by functions creating a new mapping of an allocation, typically OS specific handlers.
+ * @param alloc The allocation object which has been mapped
+ * @param map Info about the mapping
+ */
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map);
+
+/**
+ * Remove and free mapping object from an allocation.
+ * @param alloc The allocation object to remove the mapping info from
+ * @param target The mapping object to remove
+ */
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target);
+
+/**
+ * Helper to find a block in the blockArray which holds a given byte offset.
+ * @param alloc The allocation object to find the block in
+ * @param offset Offset (in bytes) from allocation start to find the block of
+ * @param[out] block_index Pointer to the index of the block matching
+ * @param[out] block_internal_offset Offset within the returned block of the searched offset
+ * @return 0 if a matching block was found, any other value for error
+ */
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const block_index, uint64_t * const block_internal_offset);
+
+/**
+ * Cache maintenance helper.
+ * Performs the requested cache operation on the given handle.
+ * @param mem Allocation handle
+ * @param op Cache maintenance operation to perform
+ * @param address User mapping at which to do the operation
+ * @param size Length (in bytes) of the range to do the operation on
+ */
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size);
+
+/**
+ * Import module session early init.
+ * Calls session_begin on all installed import modules.
+ * @param session The core session object to initialized the import handler for
+ * */
+void umpp_import_handlers_init(umpp_session * session);
+
+/**
+ * Import module session cleanup.
+ * Calls session_end on all import modules bound to the session.
+ * @param session The core session object to initialized the import handler for
+ */
+void umpp_import_handlers_term(umpp_session * session);
+
+#endif /* _UMP_KERNEL_CORE_H_ */
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c b/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
new file mode 100755
index 0000000..c5b0d74
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <common/ump_kernel_priv.h>
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(umpp_descriptor_table * table);
+
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries)
+{
+	umpp_descriptor_mapping * map = kzalloc(sizeof(umpp_descriptor_mapping), GFP_KERNEL);
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map)
+	{
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table)
+		{
+			init_rwsem( &map->lock);
+			set_bit(0, map->table->usage);
+			map->max_nr_mappings_allowed = max_entries;
+			map->current_nr_mappings = init_entries;
+			return map;
+
+			descriptor_table_free(map->table);
+		}
+		kfree(map);
+	}
+	return NULL;
+}
+
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map)
+{
+	UMP_ASSERT(NULL != map);
+	descriptor_table_free(map->table);
+	kfree(map);
+}
+
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target)
+{
+ 	int descriptor = 0;
+	UMP_ASSERT(NULL != map);
+	down_write( &map->lock);
+	descriptor = find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings)
+	{
+		/* no free descriptor, try to expand the table */
+		umpp_descriptor_table * new_table;
+		umpp_descriptor_table * old_table = map->table;
+		int nr_mappings_new = map->current_nr_mappings + BITS_PER_LONG;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+ 		memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+ 		memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void*));
+
+ 		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	set_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	up_write(&map->lock);
+	return descriptor;
+}
+
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target)
+{
+	int result = -EINVAL;
+ 	UMP_ASSERT(map);
+	UMP_ASSERT(target);
+ 	down_read(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	}
+	/* keep target untouched if the descriptor was not found */
+	up_read(&map->lock);
+	return result;
+}
+
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor)
+{
+	UMP_ASSERT(map);
+ 	down_write(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		map->table->mappings[descriptor] = NULL;
+		clear_bit(descriptor, map->table->usage);
+	}
+	up_write(&map->lock);
+}
+
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count)
+{
+	umpp_descriptor_table * table;
+
+	table = kzalloc(sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG) + (sizeof(void*) * count), __GFP_HARDWALL | GFP_KERNEL );
+
+	if (NULL != table)
+	{
+		table->usage = (unsigned long*)((u8*)table + sizeof(umpp_descriptor_table));
+		table->mappings = (void**)((u8*)table + sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(umpp_descriptor_table * table)
+{
+ 	UMP_ASSERT(table);
+	kfree(table);
+}
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h b/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
new file mode 100755
index 0000000..d06c145
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+#define _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct umpp_descriptor_table
+{
+	/* keep as a unsigned long to rely on the OS's bitops support */
+	unsigned long * usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used(1) or not(0) */
+	void** mappings; /**< Array of the pointers the descriptors map to */
+} umpp_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct umpp_descriptor_mapping
+{
+	struct rw_semaphore lock; /**< Lock protecting access to the mapping object */
+	unsigned int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	unsigned int current_nr_mappings; /**< Current number of possible mappings */
+	umpp_descriptor_table * table; /**< Pointer to the current mapping table */
+} umpp_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object.
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries.
+ * ID 0 is reserved so the number of available entries will be max - 1.
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param[in] map The map to free
+ */
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param[in] map The map to allocate a new entry in
+ * @param[in] target The value to map to
+ * @return The descriptor allocated, ID 0 on failure.
+ */
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param[in] map The map to lookup the descriptor id in
+ * @param[in] descriptor The descriptor ID to lookup
+ * @param[out] target Pointer to a pointer which will receive the stored value
+ *
+ * @return 0 on success lookup, -EINVAL on lookup failure.
+ */
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param[in] map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor);
+
+#endif /* _UMP_KERNEL_DESCRIPTOR_MAPPING_H_ */
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h b/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
new file mode 100755
index 0000000..38b6f1b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_PRIV_H_
+#define _UMP_KERNEL_PRIV_H_
+
+#ifdef __KERNEL__
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <asm/cacheflush.h>
+#endif
+
+
+#define UMP_EXPECTED_IDS 64
+#define UMP_MAX_IDS 32768
+
+#ifdef __KERNEL__
+#define UMP_ASSERT(expr) \
+		if (!(expr)) { \
+			printk(KERN_ERR "UMP: Assertion failed! %s,%s,%s,line=%d\n",\
+					#expr,__FILE__,__func__,__LINE__); \
+					BUG(); \
+		}
+
+static inline void ump_sync_to_memory(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/*TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE);
+	mb(); /* for outer_sync (if needed) */
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+
+static inline void ump_sync_to_cpu(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE);
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+#endif /* __KERNEL__*/
+#endif /* _UMP_KERNEL_PRIV_H_ */
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/imports/ion/Makefile b/midgard/r11p0/kernel/drivers/base/ump/src/imports/ion/Makefile
new file mode 100755
index 0000000..ef74b27
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/imports/ion/Makefile
@@ -0,0 +1,53 @@
+#
+# (C) COPYRIGHT 2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/../../../../..
+KBUILD_EXTRA_SYMBOLS += "$(KBUILD_EXTMOD)/../../Module.symvers"
+
+SRC += ump_kernel_import_ion.c
+
+MODULE:=ump_ion_import.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+$(MODULE:.ko=-objs) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+#
+#
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/imports/ion/sconscript b/midgard/r11p0/kernel/drivers/base/ump/src/imports/ion/sconscript
new file mode 100755
index 0000000..cff24c8
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/imports/ion/sconscript
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ion = env.Clone()
+
+if env_ion['ump_ion'] != '1':
+	Return()
+
+# Source files required for UMP.
+ion_src = [Glob('#kernel/drivers/base/ump/src/imports/ion/*.c')]
+
+# Note: cleaning via the Linux kernel build system does not yet work
+if env_ion.GetOption('clean') :
+	makeAction=Action("cd ${SOURCE.dir} && make clean", '$MAKECOMSTR')
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make PLATFORM=${platform} && cp ump_ion_import.ko $STATIC_LIB_PATH/ump_ion_import.ko", '$MAKECOMSTR')
+# The target is ump_import_ion.ko, built from the source in ion_src, via the action makeAction
+# ump_import_ion.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+# kernel build system, after which it can be installed to the directory specified if
+# "libs_install" is set; this is done by LibTarget.
+cmd = env_ion.Command('$STATIC_LIB_PATH/ump_ion_import.ko', ion_src, [makeAction])
+
+# Until we fathom out how the invoke the Linux build system to clean, we can use Clean
+# to remove generated files.
+
+patterns = ['*.mod.c', '*.o', '*.ko', '*.a', '.*.cmd', 'modules.order', '.tmp_versions', 'Module.symvers']
+
+for p in patterns:
+	Clean(cmd, Glob('#kernel/drivers/base/ump/src/imports/ion/%s' % p))
+
+env_ion.Depends('$STATIC_LIB_PATH/ump_ion_import.ko', '$STATIC_LIB_PATH/ump.ko')
+env_ion.KernelObjTarget('ump', cmd)
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c b/midgard/r11p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
new file mode 100755
index 0000000..12b2e32
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/dma-mapping.h>
+#include "ion.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+struct ion_wrapping_info
+{
+	struct ion_client *   ion_client;
+	struct ion_handle *   ion_handle;
+	int                   num_phys_blocks;
+	struct scatterlist *  sglist;
+};
+
+static struct ion_device * ion_device_get(void)
+{
+	/* < Customer to provide implementation >
+	 * Return a pointer to the global ion_device on the system
+	 */
+	return NULL;
+}
+
+static int import_ion_client_create(void** const custom_session_data)
+{
+	struct ion_client ** ion_client;
+
+	ion_client = (struct ion_client**)custom_session_data;
+
+	*ion_client = ion_client_create(ion_device_get(), "ump");
+
+	return PTR_RET(*ion_client);
+}
+
+
+static void import_ion_client_destroy(void* custom_session_data)
+{
+	struct ion_client * ion_client;
+
+	ion_client = (struct ion_client*)custom_session_data;
+	BUG_ON(!ion_client);
+
+	ion_client_destroy(ion_client);
+}
+
+
+static void import_ion_final_release_callback(const ump_dd_handle handle, void * info)
+{
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!info);
+
+	(void)handle;
+	ion_info = (struct ion_wrapping_info*)info;
+
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+	kfree(ion_info);
+	module_put(THIS_MODULE);
+}
+
+static ump_dd_handle import_ion_import(void * custom_session_data, void * pfd, ump_alloc_flags flags)
+{
+	int fd;
+	ump_dd_handle ump_handle;
+	struct scatterlist * sg;
+	int num_dma_blocks;
+	ump_dd_physical_block_64 * phys_blocks;
+	unsigned long i;
+	struct sg_table * sgt;
+
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!custom_session_data);
+	BUG_ON(!pfd);
+
+	ion_info = kzalloc(GFP_KERNEL, sizeof(*ion_info));
+	if (NULL == ion_info)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	ion_info->ion_client = (struct ion_client*)custom_session_data;
+
+	if (get_user(fd, (int*)pfd))
+	{
+		goto out;
+	}
+
+	ion_info->ion_handle = ion_import_dma_buf(ion_info->ion_client, fd);
+
+	if (IS_ERR_OR_NULL(ion_info->ion_handle))
+	{
+		goto out;
+	}
+
+	sgt = ion_sg_table(ion_info->ion_client, ion_info->ion_handle);
+	if (IS_ERR_OR_NULL(sgt))
+	{
+		goto ion_dma_map_failed;
+	}
+
+	ion_info->sglist = sgt->sgl;
+
+	sg = ion_info->sglist;
+	while (sg)
+	{
+		ion_info->num_phys_blocks++;
+		sg = sg_next(sg);
+	}
+
+	num_dma_blocks = dma_map_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	if (0 == num_dma_blocks)
+	{
+		goto linux_dma_map_failed;
+	}
+
+	phys_blocks = vmalloc(num_dma_blocks * sizeof(*phys_blocks));
+	if (NULL == phys_blocks)
+	{
+		goto vmalloc_failed;
+	}
+
+	for_each_sg(ion_info->sglist, sg, num_dma_blocks, i)
+	{
+		phys_blocks[i].addr = sg_phys(sg);
+		phys_blocks[i].size = sg_dma_len(sg);
+	}
+
+	ump_handle = ump_dd_create_from_phys_blocks_64(phys_blocks, num_dma_blocks, flags, NULL, import_ion_final_release_callback, ion_info);
+
+	vfree(phys_blocks);
+
+	if (ump_handle != UMP_DD_INVALID_MEMORY_HANDLE)
+	{
+		/*
+		 * As we have a final release callback installed
+		 * we must keep the module locked until
+		 * the callback has been triggered
+		 * */
+		__module_get(THIS_MODULE);
+		return ump_handle;
+	}
+
+	/* failed*/
+vmalloc_failed:
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+linux_dma_map_failed:
+ion_dma_map_failed:
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+out:
+	kfree(ion_info);
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+struct ump_import_handler import_handler_ion =
+{
+	.linux_module =  THIS_MODULE,
+	.session_begin = import_ion_client_create,
+	.session_end =   import_ion_client_destroy,
+	.import =        import_ion_import
+};
+
+static int __init import_ion_initialize_module(void)
+{
+	/* register with UMP */
+	return ump_import_module_register(UMP_EXTERNAL_MEM_TYPE_ION, &import_handler_ion);
+}
+
+static void __exit import_ion_cleanup_module(void)
+{
+	/* unregister import handler */
+	ump_import_module_unregister(UMP_EXTERNAL_MEM_TYPE_ION);
+}
+
+/* Setup init and exit functions for this module */
+module_init(import_ion_initialize_module);
+module_exit(import_ion_cleanup_module);
+
+/* And some module information */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/imports/sconscript b/midgard/r11p0/kernel/drivers/base/ump/src/imports/sconscript
new file mode 100755
index 0000000..8f50683
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/imports/sconscript
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os, sys
+Import('env')
+
+import_modules = [ os.path.join( path, 'sconscript' ) for path in sorted(os.listdir( os.getcwd() )) ]
+
+for m in import_modules:
+	if os.path.exists(m):
+		SConscript( m, variant_dir=os.path.join( env['BUILD_DIR_PATH'], os.path.dirname(m) ), duplicate=0 )
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c b/midgard/r11p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
new file mode 100755
index 0000000..d6c3c53
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
@@ -0,0 +1,831 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump-ioctl.h>
+#include <linux/ump.h>
+
+#include <asm/uaccess.h>	         /* copy_*_user */
+#include <linux/compat.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/device.h>            /* class registration support */
+
+#include <common/ump_kernel_core.h>
+
+#include "ump_kernel_linux_mem.h"
+#include <ump_arch.h>
+
+
+struct ump_linux_device
+{
+	struct cdev cdev;
+	struct class * ump_class;
+};
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+#define UMP_REV_STRING "1.0"
+
+char * ump_revision = UMP_REV_STRING;
+module_param(ump_revision, charp, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_revision, "Revision info");
+
+static int umpp_linux_open(struct inode *inode, struct file *filp);
+static int umpp_linux_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+/* This variable defines the file operations this UMP device driver offers */
+static struct file_operations ump_fops =
+{
+	.owner   = THIS_MODULE,
+	.open    = umpp_linux_open,
+	.release = umpp_linux_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = umpp_linux_ioctl,
+#else
+	.ioctl   = umpp_linux_ioctl,
+#endif
+	.compat_ioctl = umpp_linux_ioctl,
+	.mmap = umpp_linux_mmap
+};
+
+/* import module handling */
+DEFINE_MUTEX(import_list_lock);
+struct ump_import_handler *  import_handlers[UMPP_EXTERNAL_MEM_COUNT];
+
+/* The global variable containing the global device data */
+static struct ump_linux_device ump_linux_device;
+
+#define DBG_MSG(level, ...) do { \
+if ((level) <=  ump_debug_level)\
+{\
+printk(KERN_DEBUG "UMP<" #level ">:\n" __VA_ARGS__);\
+} \
+} while (0)
+
+#define MSG_ERR(...) do{ \
+printk(KERN_ERR "UMP: ERR: %s\n           %s()%4d\n", __FILE__, __func__  , __LINE__) ; \
+printk(KERN_ERR __VA_ARGS__); \
+printk(KERN_ERR "\n"); \
+} while(0)
+
+#define MSG(...) do{ \
+printk(KERN_INFO "UMP: " __VA_ARGS__);\
+} while (0)
+
+/*
+ * This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int __init umpp_linux_initialize_module(void)
+{
+	ump_result err;
+
+	err = umpp_core_constructor();
+	if (UMP_OK != err)
+	{
+		MSG_ERR("UMP device driver init failed\n");
+		return -ENOTTY;
+	}
+
+	MSG("UMP device driver %s loaded\n", UMP_REV_STRING);
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void __exit umpp_linux_cleanup_module(void)
+{
+	DBG_MSG(2, "Unloading UMP device driver\n");
+	umpp_core_destructor();
+	DBG_MSG(2, "Module unloaded\n");
+}
+
+
+
+/*
+ * Initialize the UMP device driver.
+ */
+ump_result umpp_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+
+	if (0 == ump_major)
+	{
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	}
+	else
+	{
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err)
+	{
+		memset(&ump_linux_device, 0, sizeof(ump_linux_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_linux_device.cdev, &ump_fops);
+		ump_linux_device.cdev.owner = THIS_MODULE;
+		ump_linux_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_linux_device.cdev, dev, 1/*count*/);
+		if (0 == err)
+		{
+
+			ump_linux_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_linux_device.ump_class))
+			{
+				err = PTR_ERR(ump_linux_device.ump_class);
+			}
+			else
+			{
+				struct device * mdev;
+				mdev = device_create(ump_linux_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return UMP_OK;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(ump_linux_device.ump_class);
+			}
+			cdev_del(&ump_linux_device.cdev);
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return UMP_ERROR;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void umpp_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+	device_destroy(ump_linux_device.ump_class, dev);
+	class_destroy(ump_linux_device.ump_class);
+
+	/* unregister char device */
+	cdev_del(&ump_linux_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+}
+
+
+static int umpp_linux_open(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = umpp_core_session_start();
+	if (NULL == session)
+	{
+		return -EFAULT;
+	}
+	
+	filp->private_data = session;
+
+	return 0;
+}
+
+static int umpp_linux_release(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = filp->private_data;
+
+	umpp_core_session_end(session);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/**************************/
+/*ioctl specific functions*/
+/**************************/
+static int do_ump_dd_allocate(umpp_session * session, ump_k_allocate * params)
+{
+	ump_dd_handle new_allocation;
+	new_allocation = ump_dd_allocate_64(params->size, params->alloc_flags, NULL, NULL, NULL);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	printk(KERN_WARNING "UMP: Allocation FAILED\n");
+	return -ENOMEM;
+}
+
+static int do_ump_dd_retain(umpp_session * session, ump_k_retain * params)
+{
+	umpp_session_memory_usage * it;
+
+	mutex_lock(&session->session_lock);
+
+	/* try to find it on the session usage list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found to already be in use */
+			/* check for overflow */
+			while(1)
+			{
+				int refcnt = atomic_read(&it->process_usage_count);
+				if (refcnt + 1 > 0)
+				{
+					/* add a process local ref */
+					if(atomic_cmpxchg(&it->process_usage_count, refcnt, refcnt + 1) == refcnt)
+					{
+						mutex_unlock(&session->session_lock);
+						return 0;
+					}
+				}
+				else
+				{
+					/* maximum usage cap reached */
+					mutex_unlock(&session->session_lock);
+					return -EBUSY;
+				}
+			}
+		}
+	}
+	/* try to look it up globally */
+
+	it = kmalloc(sizeof(*it), GFP_KERNEL);
+
+	if (NULL != it)
+	{
+		it->mem = ump_dd_from_secure_id(params->secure_id);
+		if (UMP_DD_INVALID_MEMORY_HANDLE != it->mem)
+		{
+			/* found, add it to the session usage list */
+			it->id = params->secure_id;
+			atomic_set(&it->process_usage_count, 1);
+			list_add(&it->link, &session->memory_usage);
+		}
+		else
+		{
+			/* not found */
+			kfree(it);
+			it = NULL;
+		}
+	}
+
+	mutex_unlock(&session->session_lock);
+
+	return (NULL != it) ? 0 : -ENODEV;
+}
+
+
+static int do_ump_dd_release(umpp_session * session, ump_k_release * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only do a release if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			result = 0;
+
+			if (0 == atomic_sub_return(1, &it->process_usage_count))
+			{
+				/* last ref in this process remove from the usage list and remove the underlying ref */
+				list_del(&it->link);
+				ump_dd_release(it->mem);
+				kfree(it);
+			}
+
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_sizequery(umpp_session * session, ump_k_sizequery * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->size = ump_dd_size_get_64(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_allocation_flags_get(umpp_session * session, ump_k_allocation_flags * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->alloc_flags = ump_dd_allocation_flags_get(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_msync_now(umpp_session * session, ump_k_msync * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, do the cache op */
+#ifdef CONFIG_COMPAT
+			if (is_compat_task())
+			{
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, compat_ptr(params->mapped_ptr.compat_value), params->size);
+				result = 0;
+			}
+			else
+			{
+#endif
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, params->mapped_ptr.value, params->size);
+				result = 0;
+#ifdef CONFIG_COMPAT
+			}
+#endif
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+
+void umpp_import_handlers_init(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		if (import_handlers[i])
+		{
+			import_handlers[i]->session_begin(&session->import_handler_data[i]);
+			/* It is OK if session_begin returned an error.
+			 * We won't do any import calls if so */
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+void umpp_import_handlers_term(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		/* only call if session_begin succeeded */
+		if (session->import_handler_data[i] != NULL)
+		{
+			/* if session_beging succeeded the handler
+			 * should not have unregistered with us */
+			BUG_ON(!import_handlers[i]);
+			import_handlers[i]->session_end(session->import_handler_data[i]);
+			session->import_handler_data[i] = NULL;
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler)
+{
+	int res = -EEXIST;
+
+	/* validate input */
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+	BUG_ON(!handler);
+	BUG_ON(!handler->linux_module);
+	BUG_ON(!handler->session_begin);
+	BUG_ON(!handler->session_end);
+	BUG_ON(!handler->import);
+
+	mutex_lock(&import_list_lock);
+
+	if (!import_handlers[type])
+	{
+		import_handlers[type] = handler;
+		res = 0;
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return res;
+}
+
+void ump_import_module_unregister(enum ump_external_memory_type type)
+{
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+
+	mutex_lock(&import_list_lock);
+	/* an error to call this if ump_import_module_register didn't succeed */
+	BUG_ON(!import_handlers[type]);
+	import_handlers[type] = NULL;
+	mutex_unlock(&import_list_lock);
+}
+
+static struct ump_import_handler * import_handler_get(unsigned int type_id)
+{
+	enum ump_external_memory_type type;
+	struct ump_import_handler * handler;
+
+	/* validate and convert input */
+	/* handle bad data here, not just BUG_ON */
+	if (type_id == 0 || type_id >= UMPP_EXTERNAL_MEM_COUNT)
+		return NULL;
+
+	type = (enum ump_external_memory_type)type_id;
+
+	/* find the handler */
+	mutex_lock(&import_list_lock);
+
+	handler = import_handlers[type];
+
+	if (handler)
+	{
+		if (!try_module_get(handler->linux_module))
+		{
+			handler = NULL;
+		}
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return handler;
+}
+
+static void import_handler_put(struct ump_import_handler * handler)
+{
+	module_put(handler->linux_module);
+}
+
+static int do_ump_dd_import(umpp_session * session, ump_k_import * params)
+{
+	ump_dd_handle new_allocation = UMP_DD_INVALID_MEMORY_HANDLE;
+	struct ump_import_handler * handler;
+
+	handler = import_handler_get(params->type);
+
+	if (handler)
+	{
+		/* try late binding if not already bound */
+		if (!session->import_handler_data[params->type])
+		{
+			handler->session_begin(&session->import_handler_data[params->type]);
+		}
+
+		/* do we have a bound session? */
+		if (session->import_handler_data[params->type])
+		{
+			new_allocation = handler->import( session->import_handler_data[params->type],
+		                                      params->phandle.value,
+		                                      params->alloc_flags);
+		}
+
+		/* done with the handler */
+		import_handler_put(handler);
+	}
+
+	/* did the import succeed? */
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	return -ENOMEM;
+
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int ret;
+	uint64_t msg[(UMP_CALL_MAX_SIZE+7)>>3]; /* alignment fixup */
+	uint32_t size = _IOC_SIZE(cmd);
+	struct umpp_session *session = filp->private_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* unused arg */
+#endif
+
+	/*
+	 * extract the type and number bitfields, and don't decode
+	 * wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
+	 */
+	if (_IOC_TYPE(cmd) != UMP_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if (_IOC_NR(cmd) > UMP_IOC_MAXNR)
+	{
+		return -ENOTTY;
+	}
+
+	switch(cmd)
+	{
+		case UMP_FUNC_ALLOCATE:
+			if (size != sizeof(ump_k_allocate))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocate(session, (ump_k_allocate *)&msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_SIZEQUERY:
+			if (size != sizeof(ump_k_sizequery))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_sizequery(session,(ump_k_sizequery*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_MSYNC:
+			if (size != sizeof(ump_k_msync))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_msync_now(session,(ump_k_msync*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_IMPORT:
+			if (size != sizeof(ump_k_import))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user*)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_import(session, (ump_k_import*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		/* used only by v1 API */
+		case UMP_FUNC_ALLOCATION_FLAGS_GET:
+			if (size != sizeof(ump_k_allocation_flags))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocation_flags_get(session,(ump_k_allocation_flags*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_RETAIN:
+			if (size != sizeof(ump_k_retain))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_retain(session,(ump_k_retain*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		case UMP_FUNC_RELEASE:
+			if (size != sizeof(ump_k_release))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_release(session,(ump_k_release*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		default:
+			/* not ours */
+			return -ENOTTY;
+	}
+	/*redundant below*/
+	return -ENOTTY;
+}
+
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_allocate_64);
+EXPORT_SYMBOL(ump_dd_allocation_flags_get);
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get_64);
+EXPORT_SYMBOL(ump_dd_size_get_64);
+EXPORT_SYMBOL(ump_dd_retain);
+EXPORT_SYMBOL(ump_dd_release);
+EXPORT_SYMBOL(ump_dd_create_from_phys_blocks_64);
+#ifdef CONFIG_KDS
+EXPORT_SYMBOL(ump_dd_kds_resource_get);
+#endif
+
+/* import API */
+EXPORT_SYMBOL(ump_import_module_register);
+EXPORT_SYMBOL(ump_import_module_unregister);
+
+
+
+/* V1 API */
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+
+/* Setup init and exit functions for this module */
+module_init(umpp_linux_initialize_module);
+module_exit(umpp_linux_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(UMP_REV_STRING);
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c b/midgard/r11p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
new file mode 100755
index 0000000..38db91e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
@@ -0,0 +1,250 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+#include <linux/version.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h> /* memory mananger definitions */
+#include <linux/pfn.h>
+#include <linux/highmem.h> /*kmap*/
+
+#include <linux/compat.h> /* is_compat_task */
+
+#include <common/ump_kernel_core.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+static void umpp_vm_close(struct vm_area_struct *vma)
+{
+	umpp_cpu_mapping * mapping;
+	umpp_session * session;
+	ump_dd_handle handle;
+
+	mapping = (umpp_cpu_mapping*)vma->vm_private_data;
+	UMP_ASSERT(mapping);
+	
+	session = mapping->session;
+	handle = mapping->handle;
+
+	umpp_dd_remove_cpu_mapping(mapping->handle, mapping); /* will free the mapping object */
+	ump_dd_release(handle);
+}
+
+
+static const struct vm_operations_struct umpp_vm_ops = {
+	.close = umpp_vm_close
+};
+
+int umpp_phys_commit(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	/* round up to a page boundary */
+	alloc->size = (alloc->size + PAGE_SIZE - 1) & ~((uint64_t)PAGE_SIZE-1) ;
+	/* calculate number of pages */
+	alloc->blocksCount = alloc->size >> PAGE_SHIFT;
+
+	if( (sizeof(ump_dd_physical_block_64) * alloc->blocksCount) > ((size_t)-1))
+	{
+		printk(KERN_WARNING "UMP: umpp_phys_commit - trying to allocate more than possible\n");
+		return -ENOMEM;
+	}
+
+	alloc->block_array = kmalloc(sizeof(ump_dd_physical_block_64) * alloc->blocksCount, __GFP_HARDWALL | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (NULL == alloc->block_array)
+	{
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		void * mp;
+		struct page * page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD);
+		if (NULL == page)
+		{
+			break;
+		}
+
+		alloc->block_array[i].addr = page_to_pfn(page) << PAGE_SHIFT;
+		alloc->block_array[i].size = PAGE_SIZE;
+
+		mp = kmap(page);
+		if (NULL == mp)
+		{
+			__free_page(page);
+			break;
+		}
+
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		ump_sync_to_memory(PFN_PHYS(page_to_pfn(page)), mp, PAGE_SIZE);
+		kunmap(page);
+	}
+
+	if (i == alloc->blocksCount)
+	{
+		return 0;
+	}
+	else
+	{
+		uint64_t j;
+		for (j = 0; j < i; j++)
+		{
+			struct page * page;
+			page = pfn_to_page(alloc->block_array[j].addr >> PAGE_SHIFT);
+			__free_page(page);
+		}
+		
+		kfree(alloc->block_array);
+
+		return -ENOMEM;
+	}
+}
+
+void umpp_phys_free(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		__free_page(pfn_to_page(alloc->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	kfree(alloc->block_array);
+}
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma)
+{
+	ump_secure_id id;
+	ump_dd_handle h;
+	size_t offset;
+	int err = -EINVAL;
+	size_t length = vma->vm_end - vma->vm_start;
+
+	umpp_cpu_mapping * map = NULL;
+	umpp_session *session = filp->private_data;
+
+	if ( 0 == length )
+	{
+		return -EINVAL;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (NULL == map)
+	{
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* unpack our arg */
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	if (is_compat_task())
+	{
+#endif
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_32;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_32;
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	}
+	else
+	{
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_64;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_64;
+	}
+#endif
+
+	h = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE != h)
+	{
+		uint64_t i;
+		uint64_t block_idx;
+		uint64_t block_offset;
+		uint64_t paddr;
+		umpp_allocation * alloc;
+		uint64_t last_byte;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO | VM_MIXEDMAP | VM_DONTDUMP;
+#else
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO | VM_MIXEDMAP;
+#endif
+		vma->vm_ops = &umpp_vm_ops;
+		vma->vm_private_data = map;
+
+		alloc = (umpp_allocation*)h;
+
+		if( (alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+		{
+			/* cache disabled flag set, disable caching for cpu mappings */
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		}
+
+		last_byte = length + (offset << PAGE_SHIFT) - 1;
+		if (last_byte >= alloc->size || last_byte < (offset << PAGE_SHIFT))
+		{
+			goto err_out;
+		}
+
+		if (umpp_dd_find_start_block(alloc, offset << PAGE_SHIFT, &block_idx, &block_offset))
+		{
+			goto err_out;
+		}
+
+		paddr = alloc->block_array[block_idx].addr + block_offset;
+
+		for (i = 0; i < (length >> PAGE_SHIFT); i++)
+		{
+			/* check if we've overrrun the current block, if so move to the next block */
+			if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+			{
+				block_idx++;
+				UMP_ASSERT(block_idx < alloc->blocksCount);
+				paddr = alloc->block_array[block_idx].addr;
+			}
+
+			err = vm_insert_mixed(vma, vma->vm_start + (i << PAGE_SHIFT), paddr >> PAGE_SHIFT);
+			paddr += PAGE_SIZE;
+		}
+
+		map->vaddr_start = (void*)vma->vm_start;
+		map->nr_pages = length >> PAGE_SHIFT;
+		map->page_off = offset;
+		map->handle = h;
+		map->session = session;
+
+		umpp_dd_add_cpu_mapping(h, map);
+
+		return 0;
+
+		err_out:
+
+		ump_dd_release(h);
+	}
+
+	kfree(map);
+
+out:
+
+	return err;
+}
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h b/midgard/r11p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
new file mode 100755
index 0000000..4fbf3b2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_LINUX_MEM_H_
+#define _UMP_KERNEL_LINUX_MEM_H_
+
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma);
+
+#endif /* _UMP_KERNEL_LINUX_MEM_H_ */
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/sconscript b/midgard/r11p0/kernel/drivers/base/ump/src/sconscript
new file mode 100755
index 0000000..d706e1e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/sconscript
@@ -0,0 +1,61 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ump = env.Clone()
+
+# Source files required for UMP.
+ump_src = [Glob('#kernel/drivers/base/ump/src/linux/*.c'), Glob('#kernel/drivers/base/ump/src/common/*.c'), Glob('#kernel/drivers/base/ump/src/imports/*/*.c')]
+
+env_ump.Append( CPPPATH='#kernel/drivers/base/ump/src/' )
+
+if env_ump.GetOption('clean') :
+	env_ump.Execute(Action("make clean", '[clean] ump'))
+	cmd = env_ump.Command('$STATIC_LIB_PATH/ump.ko', ump_src, [])
+else:
+	if env['unit'] == '1':
+		makeAction=Action("cd ${SOURCE.dir}/.. && make MALI_UNIT_TEST=${unit} PLATFORM=${platform} %s && cp ump.ko $STATIC_LIB_PATH/ump.ko" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	else:
+		# The target is ump.ko, built from the source in ump_src, via the action makeAction
+		# ump.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+		# kernel build system, after which it can be installed to the directory specified if
+		# "libs_install" is set; this is done by LibTarget.
+		makeAction=Action("cd ${SOURCE.dir}/.. && make PLATFORM=${platform} %s && cp ump.ko $STATIC_LIB_PATH/ump.ko" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	cmd = env_ump.Command('$STATIC_LIB_PATH/ump.ko', ump_src, [makeAction])
+
+# Add a dependency on kds.ko only when the build is not Android
+# Android uses sync_pt instead of Midgard KDS to achieve KDS functionality
+# Only necessary when KDS is not built into the kernel.
+#
+if env['os'] != 'android':
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y'
+	kds_in_kernel = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+			# KDS in kernel.
+			kds_in_kernel = 1
+	if not kds_in_kernel:
+		env.Depends('$STATIC_LIB_PATH/ump.ko', '$STATIC_LIB_PATH/kds.ko')
+
+env_ump.KernelObjTarget('ump', cmd)
+
+SConscript( 'imports/sconscript' )
+
diff --git a/midgard/r11p0/kernel/drivers/base/ump/src/ump_arch.h b/midgard/r11p0/kernel/drivers/base/ump/src/ump_arch.h
new file mode 100755
index 0000000..2303d56
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/src/ump_arch.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_ARCH_H_
+#define _UMP_ARCH_H_
+
+#include <common/ump_kernel_core.h>
+
+/**
+ * Device specific setup.
+ * Called by the UMP core code to to host OS/device specific setup.
+ * Typical use case is device node creation for talking to user space.
+ * @return UMP_OK on success, any other value on failure
+ */
+extern ump_result umpp_device_initialize(void);
+
+/**
+ * Device specific teardown.
+ * Undo any things done by ump_device_initialize.
+ */
+extern void umpp_device_terminate(void);
+
+extern int umpp_phys_commit(umpp_allocation * alloc);
+extern void umpp_phys_free(umpp_allocation * alloc);
+
+#endif /* _UMP_ARCH_H_ */
diff --git a/midgard/r11p0/kernel/drivers/base/ump/ump_ref_drv.h b/midgard/r11p0/kernel/drivers/base/ump/ump_ref_drv.h
new file mode 100755
index 0000000..9a265fe
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/base/ump/ump_ref_drv.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_ref_drv.h
+ *
+ * This file contains the link to user space part of the UMP API for usage by MALI 400 gralloc.
+ *
+ */
+
+#ifndef _UMP_REF_DRV_H_
+#define _UMP_REF_DRV_H_
+
+#include <ump/ump.h>
+
+
+#endif /* _UMP_REF_DRV_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/Kbuild b/midgard/r11p0/kernel/drivers/gpu/arm/Kbuild
new file mode 100755
index 0000000..19c7e9a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/Kconfig b/midgard/r11p0/kernel/drivers/gpu/arm/Kconfig
new file mode 100755
index 0000000..1f30eb5
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/Kconfig
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Kbuild b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Kbuild
new file mode 100755
index 0000000..f7041a7
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,235 @@
+#
+# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r11p0-00rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+MALI_INSTRUMENTATION_LEVEL ?= 0
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_ipa.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_instr.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_sync.c \
+	mali_kbase_sync_user.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+# Job Scheduler Policy: Completely Fair Scheduler
+SRC += mali_kbase_js_policy_cfs.c
+
+ccflags-y += -I$(KBASE_PATH)
+
+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
+	SRC += mali_kbase_platform_fake.c
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
+		SRC += platform/vexpress/mali_kbase_config_vexpress.c \
+		platform/vexpress/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
+		SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/rtsm_ve
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
+		SRC += platform/juno/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/juno
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
+		SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+		ccflags-y += -I$(src)/platform/juno_soc
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
+		SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
+		SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
+		platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y)
+		SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \
+		platform/a7_kipling/mali_kbase_cpu_a7_kipling.c
+		ccflags-y += -I$(src)/platform/a7_kipling
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+	# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+	ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+	ifeq ($(CONFIG_MALI_MIDGARD),m)
+	include  $(src)/platform/$(platform_name)/Kbuild
+	else ifeq ($(CONFIG_MALI_MIDGARD),y)
+	obj-$(CONFIG_MALI_MIDGARD) += platform/
+	endif
+	endif
+endif # CONFIG_MALI_PLATFORM_FAKE=y
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+ifeq ($(CONFIG_MALI_MIDGARD),m)
+include  $(src)/platform/$(platform_name)/Kbuild
+else ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-$(CONFIG_MALI_MIDGARD) += platform/
+endif
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y)
+	ccflags-y += -I$(src)/platform/devicetree
+    include $(src)/platform/devicetree/Kbuild
+endif
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+ifneq ($(wildcard $(src)/internal/Kbuild),)
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+# This include may set MALI_BACKEND_PATH and CONFIG_MALI_BACKEND_REAL
+include  $(src)/internal/Kbuild
+mali_kbase-y += $(INTERNAL:.c=.o)
+endif
+endif
+
+MALI_BACKEND_PATH ?= backend
+CONFIG_MALI_BACKEND ?= gpu
+CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND)
+
+ifeq ($(MALI_MOCK_TEST),1)
+ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+endif
+
+include  $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+
+# Default to devicetree platform if neither a fake platform or a thirdparty
+# platform is configured.
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),)
+CONFIG_MALI_PLATFORM_DEVICETREE := y
+endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Kconfig b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Kconfig
new file mode 100755
index 0000000..8a33841
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,192 @@
+#
+# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_DEBUG_SHADER_SPLIT_FS
+	bool "Allow mapping of shader cores via sysfs"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to provide a sysfs entry for runtime configuration of shader
+	  core affinity masks.
+
+config MALI_PLATFORM_FAKE
+	bool "Enable fake platform device support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  When you start to work with the Mali Midgard series device driver the platform-specific code of
+	  the Linux kernel for your platform may not be complete. In this situation the kernel device driver
+	  supports creating the platform device outside of the Linux platform-specific code.
+	  Enable this option if would like to use a platform device configuration from within the device driver.
+
+choice
+	prompt "Platform configuration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default MALI_PLATFORM_DEVICETREE
+	help
+	  Select the SOC platform that contains a Mali Midgard GPU
+
+config MALI_PLATFORM_DEVICETREE
+	bool "Device Tree platform"
+	depends on OF
+	help
+	  Select this option to use Device Tree with the Mali driver.
+
+	  When using this option the Mali driver will get the details of the
+	  GPU hardware from the Device Tree. This means that the same driver
+	  binary can run on multiple platforms as long as all the GPU hardware
+	  details are described in the device tree.
+
+	  Device Tree is the recommended method for the Mali driver platform
+	  integration.
+
+config MALI_PLATFORM_VEXPRESS
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express"
+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express w/Virtex7 @ 40Mhz"
+config MALI_PLATFORM_GOLDFISH
+	depends on ARCH_GOLDFISH
+	bool "Android Goldfish virtual CPU"
+config MALI_PLATFORM_PBX
+	depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
+	bool "Realview PBX-A9"
+config MALI_PLATFORM_THIRDPARTY
+	bool "Third Party Platform"
+endchoice
+
+config MALI_PLATFORM_THIRDPARTY_NAME
+	depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
+	string "Third party platform name"
+	help
+	  Enter the name of a third party platform that is supported. The third part configuration
+	  file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
+	  specified here.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Makefile b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Makefile
new file mode 100755
index 0000000..d4d5de4
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,46 @@
+#
+# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+KDS_PATH_RELATIVE = $(CURDIR)/../../../..
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifneq ($(wildcard $(CURDIR)/internal/Makefile.in),)
+include $(CURDIR)/internal/Makefile.in
+endif
+
+ifeq ($(MALI_BUS_LOG), 1)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# GPL driver supports KDS
+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100755
index 0000000..2bef9c2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100755
index 0000000..e38120a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,63 @@
+#
+# (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_ca_demand.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
+
+ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+	BACKEND += backend/gpu/mali_kbase_power_model_simple.c
+endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100755
index 0000000..c8ae87e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100755
index 0000000..2f3c41a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100755
index 0000000..fe98691
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100755
index 0000000..7851ea6
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100755
index 0000000..d25f84e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,399 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+#endif
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+static struct devfreq_simple_ondemand_data ondemand_data = {
+		.upthreshold = 90,
+		.downdifferential = 5,
+};
+
+#define DEV_FREQ_GOV_DATA		(&ondemand_data)
+
+static ssize_t upthreshold_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.upthreshold);
+}
+
+static ssize_t upthreshold_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.upthreshold = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(upthreshold);
+
+static ssize_t downdifferential_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.downdifferential);
+}
+
+static ssize_t downdifferential_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.downdifferential = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(downdifferential);
+#else
+#define DEV_FREQ_GOV_DATA		(NULL)
+#endif /* CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND */
+
+static ssize_t utilisation_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev->parent);
+	unsigned long total_time = 0, busy_time = 0;
+	unsigned long utilise;
+
+	kbase_pm_get_dvfs_utilisation(kbdev, &total_time, &busy_time);
+
+	if (total_time == 0) {
+		utilise = 0;
+	} else {
+		/* round up */
+		utilise = (busy_time * 100 + (total_time >> 1)) / total_time;
+	}
+	return sprintf(buf, "%ld\n", utilise);
+}
+static DEVICE_ATTR_RO(utilisation);
+
+static struct device_attribute *kbase_dev_attr[] = {
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+	&dev_attr_downdifferential,
+	&dev_attr_upthreshold,
+#endif
+	&dev_attr_utilisation
+};
+
+static int kbase_dev_add_attr(struct device *dev)
+{
+	int i, ret;
+	for (i = 0; i <  ARRAY_SIZE(kbase_dev_attr); i++) {
+		if (!kbase_dev_attr[i])
+			continue;
+		ret = device_create_file(dev, kbase_dev_attr[i]);
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_freq == freq) {
+		*target_freq = freq;
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_freq = freq;
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		memcpy(&kbdev->devfreq_cooling->last_status, stat,
+				sizeof(*stat));
+#endif
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	if (!kbdev->clock)
+		return -ENODEV;
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", DEV_FREQ_GOV_DATA);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	kbase_dev_add_attr(&kbdev->devfreq->dev);
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_power_model_simple_init(kbdev);
+	if (err && err != -ENODEV && err != -EPROBE_DEFER) {
+		dev_err(kbdev->dev,
+			"Failed to initialize simple power model (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+	if (err == -EPROBE_DEFER)
+		goto cooling_failed;
+	if (err != -ENODEV) {
+		kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+				kbdev->dev->of_node,
+				kbdev->devfreq,
+				&power_model_simple_ops);
+		if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+			err = PTR_ERR(kbdev->devfreq_cooling);
+			dev_err(kbdev->dev,
+				"Failed to register cooling device (%d)\n",
+				err);
+			goto cooling_failed;
+		}
+	} else {
+		err = 0;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100755
index 0000000..c0bf8b1
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100755
index 0000000..83d5ec9
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+	writel(value, kbdev->reg + offset);
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+	val = readl(kbdev->reg + offset);
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100755
index 0000000..5b20445
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100755
index 0000000..72a98d0
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	return 0;
+
+fail_interrupts:
+	kbase_hwaccess_pm_term(kbdev);
+fail_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_release_interrupts(kbdev);
+	kbase_hwaccess_pm_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+/* Currently disabled on the prototype */
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+/* Currently disabled on the prototype */
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100755
index 0000000..d410cd2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,105 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100755
index 0000000..4e70b34
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,550 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_RESETTING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	struct kbasep_js_device_data *js_devdata;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx);
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	js_devdata = &kbdev->js_data;
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+	/* Remember all the settings for suspend/resume */
+	if (&kbdev->hwcnt.suspended_state != setup)
+		memcpy(&kbdev->hwcnt.suspended_state, setup,
+					sizeof(kbdev->hwcnt.suspended_state));
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING ||
+			kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_RESETTING &&
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING));
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+	/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset,
+	 * and the instrumentation state hasn't been restored yet -
+	 * kbasep_reset_timeout_worker() will do the rest of the work */
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+		/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a
+		 * reset, and the instrumentation state hasn't been restored yet
+		 * - kbasep_reset_timeout_worker() will do the rest of the work
+		 */
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100755
index 0000000..23bd80a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,62 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* kbasep_reset_timeout_worker() has started (but not compelted) a
+	 * reset. This generally indicates the current action should be aborted,
+	 * and kbasep_reset_timeout_worker() will handle the cleanup */
+	KBASE_INSTR_STATE_RESETTING,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100755
index 0000000..e96aeae
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100755
index 0000000..8781561
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100755
index 0000000..49c72f9
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,471 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+
+
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100755
index 0000000..f216788
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,385 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = current_as->number;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&current_as->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Assign addr space */
+	kctx->as_nr = as_nr;
+
+	/* If the GPU is currently powered, activate this address space on the
+	 * MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_update(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	/* Book-keeping */
+	js_per_as_data->kctx = kctx;
+	js_per_as_data->as_busy_refcount = 0;
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+/**
+ * release_addr_space - Release an address space
+ * @kbdev: Kbase device
+ * @kctx_as_nr: Address space of context to release
+ * @kctx: Context being released
+ *
+ * Context: kbasep_js_device_data.runpool_mutex must be held
+ *
+ * Release an address space, making it available for being picked again.
+ */
+static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u16 as_bit = (1u << kctx_as_nr);
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* The address space must not already be free */
+	KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
+
+	js_devdata->as_free |= as_bit;
+
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+				&kbdev->js_data.runpool_irq.per_as_data[i];
+
+		if (js_per_as_data->kctx == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
+	if (js_per_as_data->as_busy_refcount != 0) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	/* Release context from address space */
+	js_per_as_data->kctx = NULL;
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+	/* If the GPU is currently powered, de-activate this address space on
+	 * the MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_disable(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	release_addr_space(kbdev, as_nr, kctx);
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+								int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	js_devdata->as_free |= (1 << as_nr);
+}
+
+/**
+ * check_is_runpool_full - check whether the runpool is full for a specified
+ * context
+ * @kbdev: Kbase device
+ * @kctx:  Kbase context
+ *
+ * If kctx == NULL, then this makes the least restrictive check on the
+ * runpool. A specific context that is supplied immediately after could fail
+ * the check, even under the same conditions.
+ *
+ * Therefore, once a context is obtained you \b must re-check it with this
+ * function, since the return value could change to false.
+ *
+ * Context:
+ *   In all cases, the caller must hold kbasep_js_device_data.runpool_mutex.
+ *   When kctx != NULL the caller must hold the
+ *   kbasep_js_kctx_info.ctx.jsctx_mutex.
+ *   When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but
+ *   it doesn't do any harm to do so).
+ *
+ * Return: true if the runpool is full
+ */
+static bool check_is_runpool_full(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool is_runpool_full;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Regardless of whether a context is submitting or not, can't have more
+	 * than there are HW address spaces */
+	is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
+						kbdev->nr_hw_address_spaces);
+
+	if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags &
+					KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		/* Contexts that submit might use less of the address spaces
+		 * available, due to HW workarounds.  In which case, the runpool
+		 * is also full when the number of submitting contexts exceeds
+		 * the number of submittable address spaces.
+		 *
+		 * Both checks must be made: can have nr_user_address_spaces ==
+		 * nr_hw_address spaces, and at the same time can have
+		 * nr_user_contexts_running < nr_all_contexts_running. */
+		is_runpool_full |= (bool)
+					(js_devdata->nr_user_contexts_running >=
+						kbdev->nr_user_address_spaces);
+	}
+
+	return is_runpool_full;
+}
+
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* First try to find a free address space */
+	if (check_is_runpool_full(kbdev, kctx))
+		i = -1;
+	else
+		i = ffs(js_devdata->as_free) - 1;
+
+	if (i >= 0 && i < kbdev->nr_hw_address_spaces) {
+		js_devdata->as_free &= ~(1 << i);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return i;
+	}
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* No address space currently free, see if we can release one */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i];
+		as_kctx = js_per_as_data->kctx;
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running */
+		if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED) &&
+			js_per_as_data->as_busy_refcount == 0) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!as_js_kctx_info->ctx.is_scheduled) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				js_devdata->as_free &= ~(1 << i);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if (kbdev->hwaccess.active_kctx == kctx ||
+	    kctx->as_nr != KBASEP_AS_NR_INVALID ||
+	    as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Invalid parameters to use_ctx()\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&new_address_space->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100755
index 0000000..8ccc440
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,119 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ *
+ * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100755
index 0000000..ddaae34
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1623 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+#ifndef CONFIG_MALI_COH_GPU
+	cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+	cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+#endif
+
+	cfg |= JS_CONFIG_START_MMU;
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_SECURE))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 * In such cases, we'll try to make a better approximation in the IRQ
+	 * handler (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	kbase_tlstream_tl_attrib_atom_config(katom, jc_head,
+			katom->affinity, cfg);
+	kbase_tlstream_tl_ret_ctx_lpu(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	kbase_tlstream_tl_ret_atom_as(katom, &kbdev->as[kctx->as_nr]);
+	kbase_tlstream_tl_ret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the %KBASE_JS_IRQ_THROTTLE_TIME_US and
+ * the time the job was submitted, to work out the best estimate (which might
+ * still result in an over-estimate to the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t new_timestamp;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		/* Account for any IRQ Throttle time - makes an overestimate of
+		 * the time spent by the job */
+		new_timestamp = ktime_sub_ns(end_timestamp,
+					KBASE_JS_IRQ_THROTTLE_TIME_US * 1000);
+		timestamp_diff = ktime_sub(new_timestamp,
+							katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = new_timestamp;
+		}
+	}
+}
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+/**
+ * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint
+ * @kbdev: kbase device
+ * @i: job slot
+ *
+ * Get kbase atom by calling kbase_gpu_inspect for given job slot.
+ * Then use obtained katom and name of slot associated with the given
+ * job slot number in tracepoint call to the instrumentation module
+ * informing that given atom is no longer executed on given lpu (job slot).
+ */
+static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0);
+
+	kbase_tlstream_tl_nret_atom_lpu(katom,
+		&kbdev->gpu_props.props.raw_props.js_features[i]);
+}
+#endif
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0,
+					sizeof(kbdev->slot_submit_count_irq));
+
+	/* write irq throttle register, this will prevent irqs from occurring
+	 * until the given number of gpu clock cycles have passed */
+	{
+		int irq_throttle_cycles =
+				atomic_read(&kbdev->irq_throttle_cycles);
+
+		kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE),
+						irq_throttle_cycles, NULL);
+	}
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbase_tlstream_aux_job_softstop(i);
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+					kbasep_trace_tl_nret_atom_lpu(
+						kbdev, i);
+#endif
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_secure(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+								u16 core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					u16 core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		kbase_tlstream_aux_issue_job_softstop(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if (katom->kctx != kctx)
+			continue;
+
+		if (katom->sched_priority > priority)
+			kbase_job_slot_softstop(kbdev, js, katom);
+	}
+}
+
+struct zap_reset_data {
+	/* The stages are:
+	 * 1. The timer has never been called
+	 * 2. The zap has timed out, all slots are soft-stopped - the GPU reset
+	 *    will happen. The GPU has been reset when
+	 *    kbdev->hwaccess.backend.reset_waitq is signalled
+	 *
+	 * (-1 - The timer has been cancelled)
+	 */
+	int stage;
+	struct kbase_device *kbdev;
+	struct hrtimer timer;
+	spinlock_t lock; /* protects updates to stage member */
+};
+
+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer)
+{
+	struct zap_reset_data *reset_data = container_of(timer,
+						struct zap_reset_data, timer);
+	struct kbase_device *kbdev = reset_data->kbdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&reset_data->lock, flags);
+
+	if (reset_data->stage == -1)
+		goto out;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+								ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	reset_data->stage = 2;
+
+ out:
+	spin_unlock_irqrestore(&reset_data->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct zap_reset_data reset_data;
+	unsigned long flags;
+
+	hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	reset_data.timer.function = zap_timeout_callback;
+
+	spin_lock_init(&reset_data.lock);
+
+	reset_data.kbdev = kbdev;
+	reset_data.stage = 1;
+
+	hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for all jobs to finish, and for the context to be not-scheduled
+	 * (due to kbase_job_zap_context(), we also guarentee it's not in the JS
+	 * policy queue either */
+	wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
+	wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			kctx->jctx.sched_info.ctx.is_scheduled == false);
+
+	spin_lock_irqsave(&reset_data.lock, flags);
+	if (reset_data.stage == 1) {
+		/* The timer hasn't run yet - so cancel it */
+		reset_data.stage = -1;
+	}
+	spin_unlock_irqrestore(&reset_data.lock, flags);
+
+	hrtimer_cancel(&reset_data.timer);
+
+	if (reset_data.stage == 2) {
+		/* The reset has already started.
+		 * Wait for the reset to complete
+		 */
+		wait_event(kbdev->hwaccess.backend.reset_wait,
+				atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+						== KBASE_RESET_GPU_NOT_PENDING);
+	}
+	destroy_hrtimer_on_stack(&reset_data.timer);
+
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	KBASE_DEBUG_ASSERT(0 ==
+		object_is_on_stack(&kbdev->hwaccess.backend.reset_work));
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * runpool_irq lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		u16 core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+}
+
+static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				struct kbase_uk_hwcnt_setup *hwcnt_setup)
+{
+	hwcnt_setup->dump_buffer =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) &
+								0xffffffff;
+	hwcnt_setup->dump_buffer |= (u64)
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) <<
+									32;
+	hwcnt_setup->jm_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx);
+	hwcnt_setup->shader_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx);
+	hwcnt_setup->tiler_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx);
+	hwcnt_setup->mmu_l2_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx);
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags, mmu_flags;
+	struct kbase_device *kbdev;
+	int i;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_uk_hwcnt_setup hwcnt_setup = { {0} };
+	enum kbase_instr_state bckp_state;
+	bool try_schedule = false;
+	bool restore_hwc = false;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	kbase_pm_disable_interrupts(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* the same interrupt handler preempted itself */
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	/* Save the HW counters setup */
+	if (kbdev->hwcnt.kctx != NULL) {
+		struct kbase_context *kctx = kbdev->hwcnt.kctx;
+
+		if (kctx->jctx.sched_info.ctx.is_scheduled) {
+			kbasep_save_hwcnt_setup(kbdev, kctx, &hwcnt_setup);
+
+			restore_hwc = true;
+		}
+	}
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	kbase_debug_dump_registers(kbdev);
+
+	bckp_state = kbdev->hwcnt.backend.state;
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		mutex_lock(&as->transaction_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&as->transaction_mutex);
+	}
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Restore the HW counters setup */
+	if (restore_hwc) {
+		struct kbase_context *kctx = kbdev->hwcnt.kctx;
+		u32 prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+#endif
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+				prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+				hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+				hwcnt_setup.dump_buffer >> 32,        kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+				hwcnt_setup.jm_bm,                    kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+				hwcnt_setup.shader_bm,                kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+				hwcnt_setup.mmu_l2_bm,                kctx);
+
+		/* Due to PRLAM-8186 we need to disable the Tiler before we
+		 * enable the HW counter dump. */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+						0, kctx);
+		else
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+						hwcnt_setup.tiler_bm, kctx);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+				prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL,
+				kctx);
+
+		/* If HW has PRLAM-8186 we can now re-enable the tiler HW
+		 * counters dump */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+						hwcnt_setup.tiler_bm, kctx);
+	}
+	kbdev->hwcnt.backend.state = bckp_state;
+	switch (kbdev->hwcnt.backend.state) {
+	/* Cases for waking kbasep_cache_clean_worker worker */
+	case KBASE_INSTR_STATE_CLEANED:
+		/* Cache-clean IRQ occurred, but we reset:
+		 * Wakeup incase the waiter saw RESETTING */
+	case KBASE_INSTR_STATE_REQUEST_CLEAN:
+		/* After a clean was requested, but before the regs were
+		 * written:
+		 * Wakeup incase the waiter saw RESETTING */
+		wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		break;
+	case KBASE_INSTR_STATE_CLEANING:
+		/* Either:
+		 * 1) We've not got the Cache-clean IRQ yet: it was lost, or:
+		 * 2) We got it whilst resetting: it was voluntarily lost
+		 *
+		 * So, move to the next state and wakeup: */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+		wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		break;
+
+	/* Cases for waking anyone else */
+	case KBASE_INSTR_STATE_DUMPING:
+		/* If dumping, abort the dump, because we may've lost the IRQ */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+		break;
+	case KBASE_INSTR_STATE_DISABLED:
+	case KBASE_INSTR_STATE_IDLE:
+	case KBASE_INSTR_STATE_FAULT:
+		/* Every other reason: wakeup in that state */
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+		break;
+
+	/* Unhandled cases */
+	case KBASE_INSTR_STATE_RESETTING:
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Resume the vinstr core */
+	kbase_vinstr_hwc_resume(kbdev->vinstr_ctx);
+
+	/* Note: counter dumping may now resume */
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	kbase_pm_context_idle(kbdev);
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100755
index 0000000..eb068d4
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string)
+{
+	sprintf(js_string, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					u16 core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100755
index 0000000..af6cddc
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1506 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * runpool_irq.lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev)
+{
+	return kbdev->secure_mode;
+}
+
+static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	WARN_ONCE(!kbdev->secure_ops,
+			"Cannot enable secure mode: secure callbacks not specified.\n");
+
+	if (kbdev->secure_ops) {
+		/* Switch GPU to secure mode */
+		err = kbdev->secure_ops->secure_mode_enable(kbdev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err);
+		else
+			kbdev->secure_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	WARN_ONCE(!kbdev->secure_ops,
+			"Cannot disable secure mode: secure callbacks not specified.\n");
+
+	if (kbdev->secure_ops) {
+		/* Switch GPU to non-secure mode */
+		err = kbdev->secure_ops->secure_mode_disable(kbdev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err);
+		else
+			kbdev->secure_mode = false;
+	}
+
+	return err;
+}
+
+void kbase_gpu_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
+				/* Only submit if head atom or previous atom
+				 * already submitted */
+				if (idx == 1 &&
+					(katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
+					katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+					break;
+
+				if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) {
+					int err = 0;
+
+					/* Not in correct mode, take action */
+					if (kbase_gpu_atoms_submitted_any(kbdev)) {
+						/*
+						 * We are not in the correct
+						 * GPU mode for this job, and
+						 * we can't switch now because
+						 * there are jobs already
+						 * running.
+						 */
+						break;
+					}
+
+					/* No jobs running, so we can switch GPU mode right now */
+					if (kbase_jd_katom_is_secure(katom[idx])) {
+						err = kbase_gpu_secure_mode_enable(kbdev);
+					} else {
+						err = kbase_gpu_secure_mode_disable(kbdev);
+					}
+
+					if (err) {
+						/* Failed to switch secure mode, fail atom */
+						katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+						kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+						/* Only return if head atom or previous atom
+						 * already removed - as atoms must be returned
+						 * in order */
+						if (idx == 0 || katom[0]->gpu_rb_state ==
+								KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+							kbase_gpu_dequeue_atom(kbdev, js, NULL);
+							kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+						}
+						break;
+					}
+				}
+
+				/* Secure mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev),
+					"Secure mode of atom (%d) doesn't match secure mode of GPU (%d)",
+					kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev));
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+				/* Only submit if head atom or previous atom
+				 * already submitted */
+				if (idx == 1 &&
+					(katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+					katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_gpu_slot_update(kbdev);
+}
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx) {
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+				katom_idx0->gpu_rb_state !=
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					katom_idx1->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+								js_string),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+								js_string),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_gpu_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < 2; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js, 0);
+
+			if (katom) {
+				kbase_gpu_release_atom(kbdev, katom, NULL);
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+				kbase_jm_complete(kbdev, katom, end_timestamp);
+			}
+		}
+	}
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+					(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+					(!kctx || katom_idx1->kctx == kctx));
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				kbasep_js_clear_submit_allowed(js_devdata,
+							katom_idx1->kctx);
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			kbasep_js_clear_submit_allowed(js_devdata,
+							katom_idx0->kctx);
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_job_evicted(katom_idx1);
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_job_evicted(katom_idx1);
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+
+	kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	if (katom->need_cache_flush_cores_retained) {
+		kbase_gpu_cacheclean(kbdev, katom);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->affinity = 0;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
+			coreref_state);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100755
index 0000000..102d94b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,87 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:         Device pointer
+ * @js:            Job slot to evict from
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_gpu_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100755
index 0000000..6a49669
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,299 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		/* Tiler only job, bit 0 needed to enable tiler but no shader
+		 * cores required */
+		*affinity = 1;
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required */
+	if (core_req & BASE_JD_REQ_T)
+		*affinity = *affinity | 1;
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100755
index 0000000..3026e6a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,138 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+/* Import the external affinity mask variables */
+extern u64 mali_js0_affinity_mask;
+extern u64 mali_js1_affinity_mask;
+extern u64 mali_js2_affinity_mask;
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold kbasep_js_device_data.runpool_irq.lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - kbasep_js_device_data.runpool_irq.lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100755
index 0000000..1e9a7e4
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,337 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is
+ * enabled for a particular level is down to the integrator. However this is
+ * being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->sched_info.cfs.ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * runpool_irq lock, so it's OK to
+					 * observe the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CINSTR_DUMPING_ENABLED */
+				/* NOTE: During CINSTR_DUMPING_ENABLED, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CINSTR_DUMPING_ENABLED, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CINSTR_DUMPING_ENABLED */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100755
index 0000000..3f53779
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100755
index 0000000..c6c7b89
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,365 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_mmu_hw_direct.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		as->fault_extra_addr = kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+				kctx);
+		as->fault_extra_addr <<= 32;
+		as->fault_extra_addr |= kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+				kctx);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock,
+				flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+	/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+	/* Clear PTW_MEMATTR bits */
+	transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+	/* Enable correct PTW_MEMATTR bits */
+	transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+		/* Clear PTW_SH bits */
+		transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+		/* Enable correct PTW_SH bits */
+		transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+			transcfg, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+			(current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx);
+
+#else /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_tlstream_tl_attrib_as_config(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100755
index 0000000..c02253c
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100755
index 0000000..0614348
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100755
index 0000000..f9d244b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100755
index 0000000..5805efe
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,387 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+	struct kbase_pm_callback_conf *callbacks;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+	} else {
+		kbdev->pm.backend.callback_power_on = NULL;
+		kbdev->pm.backend.callback_power_off = NULL;
+		kbdev->pm.backend.callback_power_suspend = NULL;
+		kbdev->pm.backend.callback_power_resume = NULL;
+		kbdev->pm.callback_power_runtime_init = NULL;
+		kbdev->pm.callback_power_runtime_term = NULL;
+		kbdev->pm.backend.callback_power_runtime_on = NULL;
+		kbdev->pm.backend.callback_power_runtime_off = NULL;
+		kbdev->pm.backend.callback_power_runtime_idle = NULL;
+	}
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.power_change_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+	bool cores_are_available;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Force all cores off */
+	kbdev->pm.backend.desired_shader_state = 0;
+
+	/* Force all cores to be unavailable, in the situation where
+	 * transitions are in progress for some cores but not others,
+	 * and kbase_pm_check_transitions_nolock can not immediately
+	 * power off the cores */
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_available_bitmap = 0;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* NOTE: We won't wait to reach the core's desired state, even if we're
+	 * powering off the GPU itself too. It's safe to cut the power whilst
+	 * they're transitioning to off, because the cores should be idle and
+	 * all cache flushes should already have occurred */
+
+	/* Consume any change-state events */
+	kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	/* Disable interrupts and turn the clock off */
+	return kbase_pm_clock_off(kbdev, is_suspend);
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_read_present_cores(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	if (!kbase_pm_do_poweroff(kbdev, false)) {
+		/* Page/bus faults are pending, must drop pm.lock to process.
+		 * Interrupts are disabled so no more faults should be
+		 * generated at this point */
+		mutex_unlock(&kbdev->pm.lock);
+		kbase_flush_mmu_wqs(kbdev);
+		mutex_lock(&kbdev->pm.lock);
+		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+	}
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_gpu_slot_update(kbdev);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+	}
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	if (!kbase_pm_do_poweroff(kbdev, true)) {
+		/* Page/bus faults are pending, must drop pm.lock to process.
+		 * Interrupts are disabled so no more faults should be
+		 * generated at this point */
+		mutex_unlock(&kbdev->pm.lock);
+		kbase_flush_mmu_wqs(kbdev);
+		mutex_lock(&kbdev->pm.lock);
+		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+	}
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100755
index 0000000..4eada33
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,183 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_demand_policy_ops,
+	&kbase_pm_ca_random_policy_ops,
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100755
index 0000000..ee9e751
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
new file mode 100644
index 0000000..19f1f73
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
@@ -0,0 +1,246 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation.
+ *
+ * This policy periodically selects a new core mask at demand. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/printk.h>
+
+#ifndef MALI_CA_TIMER
+static struct workqueue_struct *mali_ca_wq = NULL;
+#endif
+static int num_shader_cores_to_be_enabled = 0;
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+#ifndef MALI_CA_TIMER
+	struct kbase_device *kbdev = (struct kbase_device *)priv;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+#endif
+
+    num_shader_cores_to_be_enabled = cores -1;
+    printk("pp num=%d\n", num_shader_cores_to_be_enabled);
+    if (num_shader_cores_to_be_enabled < 1)
+        num_shader_cores_to_be_enabled = 1;
+
+#ifndef MALI_CA_TIMER
+    queue_work(mali_ca_wq, &data->wq_work);
+#endif
+
+    return 0;
+}
+
+#ifdef MALI_CA_TIMER
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void demand_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+#if 0
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+#endif
+    data->cores_desired = num_shader_cores_to_be_enabled;
+
+	if (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+            dev_info(kbdev->dev, "error, ca demand policy : new core mask=%llX\n",
+				data->cores_desired);
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "ca demand policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(demand_timer_callback);
+#else
+static void do_ca_scaling(struct work_struct *work)
+{
+    //unsigned long flags;
+	struct kbase_device *kbdev = = container_of(work,
+            struct kbasep_pm_ca_policy_demand, wq_work);
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+    data->cores_desired =  num_shader_cores_to_be_enabled;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+
+}
+#endif
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+#ifdef MALI_CA_TIMER
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = demand_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+#else
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_ca_wq = alloc_workqueue("gpu_ca_wq", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_ca_wq = create_workqueue("gpu_ca_wq");
+#endif
+	INIT_WORK(&data->wq_work, do_ca_scaling);
+    if (mali_ca_wq == NULL) printk("Unable to create gpu scaling workqueue\n");
+#endif
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+#ifdef MALI_CA_TIMER
+	del_timer(&data->core_change_timer);
+#else
+    if (mali_ca_wq) {
+        flush_workqueue(mali_ca_wq);
+        destroy_workqueue(mali_ca_wq);
+        mali_ca_wq = NULL;
+    }
+#endif
+}
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.demand.cores_enabled;
+}
+
+static void demand_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the demand core availability
+ * policy.
+ *
+ * This is the static structure that defines the demand core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_demand_policy_ops);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
new file mode 100644
index 0000000..a3dfe2a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEMAND_H
+#define MALI_KBASE_PM_CA_DEMAND_H
+#include <linux/workqueue.h>
+/**
+ * struct kbasep_pm_ca_policy_demand - Private structure for demand ca policy
+ *
+ * This contains data that is private to the demand core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+#define MALI_CA_TIMER 1
+struct kbasep_pm_ca_policy_demand {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+#ifdef MALI_CA_TIMER
+	struct timer_list core_change_timer;
+#else
+	struct work_struct wq_work;
+#endif
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops;
+extern int mali_perf_set_num_pp_cores(int cores);
+
+#endif /* MALI_KBASE_PM_CA_DEMAND_H */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100755
index 0000000..1db6586
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100755
index 0000000..a763155
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
new file mode 100644
index 0000000..0d1b220
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation.
+ *
+ * This policy periodically selects a new core mask at random. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void random_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(random_timer_callback);
+
+static void random_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = random_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+}
+
+static void random_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	del_timer(&data->core_change_timer);
+}
+
+static u64 random_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.random.cores_enabled;
+}
+
+static void random_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the random core availability
+ * policy.
+ *
+ * This is the static structure that defines the random core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops = {
+	"random",			/* name */
+	random_init,			/* init */
+	random_term,			/* term */
+	random_get_core_mask,		/* get_core_mask */
+	random_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_RANDOM,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_random_policy_ops);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
new file mode 100644
index 0000000..a8c9b15
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_RANDOM_H
+#define MALI_KBASE_PM_CA_RANDOM_H
+
+/**
+ * struct kbasep_pm_ca_policy_random - Private structure for random ca policy
+ *
+ * This contains data that is private to the random core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+struct kbasep_pm_ca_policy_random {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+	struct timer_list core_change_timer;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_RANDOM_H */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100755
index 0000000..4873911
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap))
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100755
index 0000000..749d305
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100755
index 0000000..3eaf1a7
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,486 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#include "mali_kbase_pm_ca_demand.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
+ *                                 management framework.
+ *
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+	struct kbasep_pm_ca_policy_demand demand;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_data metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_CA_POLICY_ID_RANDOM,
+	KBASE_PM_CA_POLICY_ID_DEMAND,
+#endif
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100755
index 0000000..9dac230
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap))
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100755
index 0000000..c0c84b6
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100755
index 0000000..d0fa253
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1546 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/* Special value to indicate that the JM_CONFIG reg isn't currently used. */
+#define KBASE_JM_CONFIG_UNUSED (1<<31)
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		kbase_tlstream_aux_pm_state(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_invoke);
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_present_cores(), kbase_pm_get_active_cores(),
+ * kbase_pm_get_trans_cores() and kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev)
+{
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores);
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+	}
+	KBASE_DEBUG_ASSERT(0);
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state_in_use == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+	//kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+	u64 cores_powered;
+	u64 tiler_available_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered);
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state) {
+		desired_l2_state |= 1;
+		/* Also enable tiler if l2 cache is powered */
+		kbdev->pm.backend.desired_tiler_state =
+			kbdev->gpu_props.props.raw_props.tiler_present;
+	} else {
+		kbdev->pm.backend.desired_tiler_state = 0;
+	}
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_L2, desired_l2_state, 0,
+				&l2_available_bitmap,
+				&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#endif
+
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO),
+					NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO), NULL));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO), NULL));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	} else {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		mutex_lock(&as->transaction_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&as->transaction_mutex);
+	}
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x760 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x760 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+	/* Limit read ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+	kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+	/* Limit write ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+	kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+		/* The JM_CONFIG register is specified as follows in the
+		 T86x/T88x Engineering Specification Supplement:
+		 The values are read from device tree in order.
+		*/
+#define TIMESTAMP_OVERRIDE  1
+#define CLOCK_GATE_OVERRIDE (1<<1)
+#define JOB_THROTTLE_ENABLE (1<<2)
+#define JOB_THROTTLE_LIMIT_SHIFT 3
+
+		/* 6 bits in the register */
+		const u32 jm_max_limit = 0x3F;
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = jm_max_limit; /* Max value */
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > jm_max_limit) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = jm_max_limit;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm = (jm_values[0] ? TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ? CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ? JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JOB_THROTTLE_LIMIT_SHIFT);
+	} else {
+		kbdev->hw_quirks_jm = KBASE_JM_CONFIG_UNUSED;
+	}
+
+
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	if (kbdev->hw_quirks_sc)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+				kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+
+
+	if (kbdev->hw_quirks_jm != KBASE_JM_CONFIG_UNUSED)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+				kbdev->hw_quirks_jm, NULL);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE) &&
+		kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	struct kbasep_reset_timeout_data rtdata;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_jd_gpu_soft_reset(kbdev);
+#endif
+#if 0
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET, NULL);
+#else
+  //dev_err(kbdev->dev, "%s,  %d \n", __FILE__, __LINE__);
+  core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+  l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+  tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+  //printk("core_ready=%ld, l2_ready=%ld, tiler_ready=%ld\n", core_ready, l2_ready, tiler_ready);
+  if (core_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+  if (l2_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+  if (tiler_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+  /* do external reset */
+
+//JOHNT
+#if 0
+    // Level reset mail
+    Wr(P_RESET0_MASK, ~(0x1<<20));
+    Wr(P_RESET0_LEVEL, ~(0x1<<20));
+
+    // Level reset mail
+    Wr(P_RESET2_MASK, ~(0x1<<14));
+    Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    Wr(P_RESET2_LEVEL, 0xffffffff);
+    Wr(P_RESET0_LEVEL, 0xffffffff);
+#else
+    if (reg_base_hiubus) {
+        value = Rd(RESET0_MASK);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_MASK, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+    ///////////////
+        value = Rd(RESET2_MASK);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_MASK, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value | ((0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value | ((0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+    } else {
+		dev_err(kbdev->dev, "reg_base_hiubus is null !!!\n");
+    }
+#endif
+  //writel(..../*e.g. PWR_RESET, EXTERNAL_PWR_REGISTER*/);
+
+  /* any other action necessary, like a simple delay */
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+#endif
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		goto out;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		goto out;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	/* The GPU still hasn't reset, give up */
+	return -EINVAL;
+
+out:
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		kbdev->secure_mode = (gpu_status &
+				GPU_STATUS_PROTECTED_MODE_ACTIVE) != 0;
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		kbase_pm_release_l2_caches(kbdev);
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+	return 0;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100755
index 0000000..477bea7
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,566 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action);
+
+/**
+ * kbasep_pm_read_present_cores - Read the bitmasks of present cores.
+ *
+ * This information is cached to avoid having to perform register reads whenever
+ * the information is required.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the
+ * last caller then the GPU cycle counters will be disabled. A request must have
+ * been made before a call to this.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * Access to registers should be done using kbase_os_reg_read()/write() at this
+ * stage, not kbase_reg_read()/write().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ * Return:
+ *         true      if power was turned off, else
+ *         false     if power can not be turned off due to pending page/bus
+ *                   fault workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#ifdef CONFIG_PM_DEVFREQ
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold runpool_irq.lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100755
index 0000000..ae63256
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,400 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+					int *util_gl_share,
+					int util_cl_share[2],
+					ktime_t now)
+{
+	int utilisation;
+	int busy;
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	return utilisation;
+}
+
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	ktime_t now;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	now = ktime_get();
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+			util_cl_share, now);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				WARN_ON(device_nr >= 2);
+				kbdev->pm.backend.metrics.active_cl_ctx[
+						device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				WARN_ON(js >= 2);
+				kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100755
index 0000000..90648c7
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,984 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state
+			|| kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		if (!kbase_pm_do_poweroff(kbdev, false)) {
+			/* GPU can not be powered off at present */
+			kbdev->pm.backend.poweroff_timer_needed = true;
+			kbdev->pm.backend.poweroff_timer_running = true;
+			hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
+					kbdev->pm.gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&pm->power_change_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&pm->power_change_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		spin_unlock_irqrestore(&pm->power_change_lock, flags);
+
+		/* Power on the GPU and any cores requested by the policy */
+		kbase_pm_do_poweron(kbdev, false);
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&pm->power_change_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&pm->power_change_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				if (!kbase_pm_do_poweroff(kbdev, false)) {
+					/* GPU can not be powered off at present
+					 */
+					spin_lock_irqsave(
+							&pm->power_change_lock,
+							flags);
+					backend->poweroff_timer_needed = true;
+					if (!backend->poweroff_timer_running) {
+						backend->poweroff_timer_running
+								= true;
+						hrtimer_start(
+						&backend->gpu_poweroff_timer,
+							pm->gpu_poweroff_time,
+							HRTIMER_MODE_REL);
+					}
+					spin_unlock_irqrestore(
+							&pm->power_change_lock,
+							flags);
+				}
+			}
+		} else {
+			spin_unlock_irqrestore(&pm->power_change_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+
+	desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+	desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+	/* Enable core 0 if tiler required, regardless of core availability */
+	if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+		desired_bitmap |= 1;
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+				kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending != 0) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		if (kbdev->pm.backend.shader_poweroff_pending == 0)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+int set_policy_by_name(struct kbase_device *kbdev, const char *name)
+{
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, name)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		printk("power_policy: policy not found\n");
+		return -EINVAL;
+	}
+	trace_printk("policy name=%s\n", name);
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(set_policy_by_name);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		return KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbdev->l2_users_count++;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100755
index 0000000..611a90e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
new file mode 100644
index 0000000..cd4f0a2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
@@ -0,0 +1,160 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+#include <linux/of.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms.
+ */
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static u32 dynamic_coefficient;
+static u32 static_coefficient;
+static s32 ts[4];
+static struct thermal_zone_device *gpu_tz;
+
+static unsigned long model_static_power(unsigned long voltage)
+{
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	if (gpu_tz) {
+		int ret;
+
+		ret = gpu_tz->ops->get_temp(gpu_tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	} else {
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(ts[3] * temp_cubed)
+			+ (ts[2] * temp_squared)
+			+ (ts[1] * temp)
+			+ ts[0];
+
+	return (((static_coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+}
+
+static unsigned long model_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+
+	return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+
+struct devfreq_cooling_ops power_model_simple_ops = {
+	.get_static_power = model_static_power,
+	.get_dynamic_power = model_dynamic_power,
+};
+
+int kbase_power_model_simple_init(struct kbase_device *kbdev)
+{
+	struct device_node *power_model_node;
+	const char *tz_name;
+	u32 static_power, dynamic_power;
+	u32 voltage, voltage_squared, voltage_cubed, frequency;
+
+	power_model_node = of_get_child_by_name(kbdev->dev->of_node,
+			"power_model");
+	if (!power_model_node) {
+		dev_err(kbdev->dev, "could not find power_model node\n");
+		return -ENODEV;
+	}
+	if (!of_device_is_compatible(power_model_node,
+			"arm,mali-simple-power-model")) {
+		dev_err(kbdev->dev, "power_model incompatible with simple power model\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_string(power_model_node, "thermal-zone",
+			&tz_name)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	gpu_tz = thermal_zone_get_zone_by_name(tz_name);
+	if (IS_ERR(gpu_tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(gpu_tz));
+		gpu_tz = NULL;
+
+		return -EPROBE_DEFER;
+	}
+
+	if (of_property_read_u32(power_model_node, "static-power",
+			&static_power)) {
+		dev_err(kbdev->dev, "static-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "dynamic-power",
+			&dynamic_power)) {
+		dev_err(kbdev->dev, "dynamic-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "voltage",
+			&voltage)) {
+		dev_err(kbdev->dev, "voltage in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "frequency",
+			&frequency)) {
+		dev_err(kbdev->dev, "frequency in power_model not available\n");
+		return -EINVAL;
+	}
+	voltage_squared = (voltage * voltage) / 1000;
+	voltage_cubed = voltage * voltage * voltage;
+	static_coefficient = (static_power << 20) / (voltage_cubed >> 10);
+	dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared)
+			* 1000) / frequency;
+
+	if (of_property_read_u32_array(power_model_node, "ts", ts, 4)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
new file mode 100644
index 0000000..d20de1e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_POWER_MODEL_SIMPLE_H_
+#define _BASE_POWER_MODEL_SIMPLE_H_
+
+/**
+ * kbase_power_model_simple_init - Initialise the simple power model
+ * @kbdev: Device pointer
+ *
+ * The simple power model estimates power based on current voltage, temperature,
+ * and coefficients read from device tree. It does not take utilization into
+ * account.
+ *
+ * The power model requires coefficients from the power_model node in device
+ * tree. The absence of this node will prevent the model from functioning, but
+ * should not prevent the rest of the driver from running.
+ *
+ * Return: 0 on success
+ *         -ENOSYS if the power_model node is not present in device tree
+ *         -EPROBE_DEFER if the thermal zone specified in device tree is not
+ *         currently available
+ *         Any other negative value on failure
+ */
+int kbase_power_model_simple_init(struct kbase_device *kbdev);
+
+extern struct devfreq_cooling_ops power_model_simple_ops;
+
+#endif /* _BASE_POWER_MODEL_SIMPLE_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100755
index 0000000..4bcde85
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100755
index 0000000..35088ab
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100755
index 0000000..35ff2f1
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100755
index 0000000..7ae05c2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100755
index 0000000..159b993
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100755
index 0000000..29c78c2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,189 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100755
index 0000000..e111b07
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,887 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100755
index 0000000..c59e8b2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1743 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+#ifndef __user
+#define __user
+#endif
+
+/* Support UK6 IOCTLS */
+#define BASE_LEGACY_UK6_SUPPORT 1
+
+/* Support UK7 IOCTLS */
+/* NB: To support UK6 we also need to support UK7 */
+#define BASE_LEGACY_UK7_SUPPORT 1
+
+/* Support UK8 IOCTLS */
+#define BASE_LEGACY_UK8_SUPPORT 1
+
+/* Support UK9 IOCTLS */
+#define BASE_LEGACY_UK9_SUPPORT 1
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+#define BASEP_JD_SEM_PER_WORD_LOG2      5
+#define BASEP_JD_SEM_PER_WORD           (1 << BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_WORD_NR(x)         ((x) >> BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_MASK_IN_WORD(x)    (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
+#define BASEP_JD_SEM_ARRAY_SIZE         BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union kbase_pointer {
+	void __user *value;	  /**< client should store their pointers here */
+	u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	u64 sizer;	  /**< Force 64-bit storage for all clients regardless */
+} kbase_pointer;
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD),
+ * which defines a @a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see ::BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * See ::base_mem_alloc_flags.
+ *
+ */
+enum {
+/* IN */
+	BASE_MEM_PROT_CPU_RD = (1U << 0),      /**< Read access CPU side */
+	BASE_MEM_PROT_CPU_WR = (1U << 1),      /**< Write access CPU side */
+	BASE_MEM_PROT_GPU_RD = (1U << 2),      /**< Read access GPU side */
+	BASE_MEM_PROT_GPU_WR = (1U << 3),      /**< Write access GPU side */
+	BASE_MEM_PROT_GPU_EX = (1U << 4),      /**< Execute allowed on the GPU
+						    side */
+
+	/* BASE_MEM_HINT flags have been removed, but their values are reserved
+	 * for backwards compatibility with older user-space drivers. The values
+	 * can be re-used once support for r5p0 user-space drivers is removed,
+	 * presumably in r7p0.
+	 *
+	 * RESERVED: (1U << 5)
+	 * RESERVED: (1U << 6)
+	 * RESERVED: (1U << 7)
+	 * RESERVED: (1U << 8)
+	 */
+
+	BASE_MEM_GROW_ON_GPF = (1U << 9),      /**< Grow backing store on GPU
+						    Page Fault */
+
+	BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer
+						    shareable, if available */
+	BASE_MEM_COHERENT_LOCAL = (1U << 11),  /**< Page coherence Inner
+						    shareable */
+	BASE_MEM_CACHED_CPU = (1U << 12),      /**< Should be cached on the
+						    CPU */
+
+/* IN/OUT */
+	BASE_MEM_SAME_VA = (1U << 13), /**< Must have same VA on both the GPU
+					    and the CPU */
+/* OUT */
+	BASE_MEM_NEED_MMAP = (1U << 14), /**< Must call mmap to aquire a GPU
+					     address for the alloc */
+/* IN */
+	BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence
+					     Outer shareable, required. */
+	BASE_MEM_SECURE = (1U << 16),          /**< Secure memory */
+	BASE_MEM_DONT_NEED = (1U << 17),       /**< Not needed physical
+						    memory */
+
+};
+
+/**
+ * @brief Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the ::base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 18
+
+/**
+  * A mask for all output bits, excluding IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/**
+  * A mask for all input bits, including IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/**
+ * A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id.
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	kbase_pointer to imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	kbase_pointer ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1,	    /**< Not a growable tmem object */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3,	    /**< Resize attempted on buffer while it was mapped, which is not permitted */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completly unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u16 base_jd_core_req;
+
+/* Requirements that come from the HW */
+#define BASE_JD_REQ_DEP 0	    /**< No requirement, dependency only */
+#define BASE_JD_REQ_FS  (1U << 0)   /**< Requires fragment shaders */
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  (1U << 1)
+#define BASE_JD_REQ_T   (1U << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  (1U << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   (1U << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  (1U << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE (1U << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  (1U << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               (1U << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   (1U << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        (1U << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceeding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceeding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceeding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    (1U << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   (1U << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER (1U << 14)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req (currently a u16)
+ */
+
+#define BASEP_JD_REQ_RESERVED (1U << 15)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED |\
+				BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\
+				BASE_JD_REQ_EXTERNAL_RESOURCES |\
+				BASEP_JD_REQ_EVENT_NEVER |\
+				BASE_JD_REQ_EVENT_COALESCE))
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferrentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	base_jd_core_req core_req;	    /**< core requirements */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[5];
+} base_jd_atom_v2;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+struct base_jd_atom_v2_uk6 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	base_jd_core_req core_req;	    /**< core requirements */
+	base_atom_id pre_dep[2]; /**< pre-dependencies */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;		    /**< priority - smaller is higher priority */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[7];
+};
+#endif
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills
+ * a full cache line.
+ */
+
+#define BASE_CPU_GPU_CACHE_LINE_PADDING (36)
+
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom.
+ *
+ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid
+ * cases where access to pages containing the structure is shared between cached and un-cached
+ * memory regions, which would cause memory corruption.  Here we set the structure size to be 64 bytes
+ * which is the cache line for ARM A15 processors.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING];
+} base_dump_cpu_gpu_counters;
+
+
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistant guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ *  - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ *  so they won't accidentally pick another file with 'mali_t600' in its name
+ *  - When the build doesn't work, the System Integrator may think the DDK is
+ *  doesn't work, and attempt to fix it themselves:
+ *   - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ *   error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ *   you.
+ *   - For a \#include mechanism, checks must still be made elsewhere, which the
+ *   System Integrator may try working around by setting \#defines (such as
+ *   VA_BITS) themselves in their plat_config.h. In the  worst case, they may
+ *   set the prevention-mechanism \#define of
+ *   "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ *   - In this case, they would believe they are on the right track, because
+ *   the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+   digraph plat_config_mechanism {
+	   rankdir=BT
+	   size="6,6"
+
+       "mali_base.h";
+	   "gpu/mali_gpu.h";
+
+	   node [ shape=box ];
+	   {
+	       rank = same; ordering = out;
+
+		   "gpu/mali_gpu_props.h";
+		   "base/midg_gpus/mali_t600.h";
+		   "base/midg_gpus/other_midg_gpu.h";
+	   }
+	   { rank = same; "plat/plat_config.h"; }
+	   {
+	       rank = same;
+		   "gpu/mali_gpu.h" [ shape=box ];
+		   gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+		   select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+	   }
+	   node [ shape=box ];
+	   { rank = same; "plat/plat_config.h"; }
+	   { rank = same; "mali_base.h"; }
+
+	   "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h";
+	   "mali_base.h" -> "plat/plat_config.h" ;
+	   "mali_base.h" -> select_gpu ;
+
+	   "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+	   gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+	   select_gpu -> "base/midg_gpus/mali_t600.h" ;
+	   select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+   }
+   @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algoirthm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * requried for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+     * No defined values, but starts at 0 and increases by one for each release
+     * status (alpha, beta, EAC, etc.).
+     * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/**
+	 * @usecase GPU clock speed is not specified in the Midgard Architecture, but is
+	 * <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+	 */
+	u32 gpu_speed_mhz;
+
+	/**
+	 * @usecase GPU clock max/min speed is required for computing best/worst case
+	 * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+	 *  Midgard Architecture).
+	 */
+	u32 gpu_freq_khz_max;
+	u32 gpu_freq_khz_min;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 unused_1; /* keep for backward compatibility */
+
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[3];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this datastructure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as @ref basep_context_private_flags, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+	/** No flags set */
+	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+	/** Base context is a 'System Monitor' context for Hardware counters.
+	 *
+	 * One important side effect of this is that job submission is disabled. */
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+
+/**
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+enum basep_context_private_flags {
+	/** Private flag tracking whether job descriptor dumping is disabled */
+	BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31)
+};
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+
+	u8 padding[4];
+} base_jd_replay_payload;
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
new file mode 100755
index 0000000..a24791f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base cross-proccess sync API.
+ */
+
+#ifndef _BASE_KERNEL_SYNC_H_
+#define _BASE_KERNEL_SYNC_H_
+
+#include <linux/ioctl.h>
+
+#define STREAM_IOC_MAGIC '~'
+
+/* Fence insert.
+ *
+ * Inserts a fence on the stream operated on.
+ * Fence can be waited via a base fence wait soft-job
+ * or triggered via a base fence trigger soft-job.
+ *
+ * Fences must be cleaned up with close when no longer needed.
+ *
+ * No input/output arguments.
+ * Returns
+ * >=0 fd
+ * <0  error code
+ */
+#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0)
+
+#endif				/* _BASE_KERNEL_SYNC_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100755
index 0000000..4a98a72
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100755
index 0000000..be454a2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100755
index 0000000..3764061
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,569 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_uku.h>
+#include <mali_kbase_linux.h>
+
+#include "mali_kbase_pm.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+#include "mali_kbase_ipa.h"
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+/**
+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs
+ *
+ * The Kernel-side Base (KBase) APIs are divided up as follows:
+ * - @subpage page_kbase_js_policy
+ */
+
+/**
+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom);
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data);
+#endif
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom);
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The js_data.runpool_irq.lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		u16 core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status);
+int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+/* api to be ported per OS, only need to do the raw register access */
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
+
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEBUG
+/**
+ * kbase_set_driver_inactive - Force driver to go inactive
+ * @kbdev:    Device pointer
+ * @inactive: true if driver should go inactive, false otherwise
+ *
+ * Forcing the driver inactive will cause all future IOCTLs to wait until the
+ * driver is made active again. This is intended solely for the use of tests
+ * which require that no jobs are running while the test executes.
+ */
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifndef RESET0_LEVEL
+extern u32  override_value_aml;
+extern void *reg_base_hiubus;
+#define RESET0_MASK    0x01
+#define RESET1_MASK    0x02
+#define RESET2_MASK    0x03
+
+#define RESET0_LEVEL    0x10
+#define RESET1_LEVEL    0x11
+#define RESET2_LEVEL    0x12
+#define Rd(r)                           readl((reg_base_hiubus) + ((r)<<2))
+#define Wr(r, v)                        writel((v), ((reg_base_hiubus) + ((r)<<2)))
+#define Mali_WrReg(unit, core, regnum, value)   kbase_reg_write(kbdev, (regnum), (value), NULL)
+#define Mali_RdReg(unit, core, regnum)          kbase_reg_read(kbdev, (regnum), NULL)
+#define stimulus_print   printk
+#define stimulus_display printk
+#define Mali_pwr_on(x)   Mali_pwr_on_with_kdev(kbdev, (x))
+#define Mali_pwr_off(x)  Mali_pwr_off_with_kdev(kbdev, (x))
+#endif
+
+
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100755
index 0000000..933aa28
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	phys_addr_t *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100755
index 0000000..099a298
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100755
index 0000000..c67b3e9
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100755
index 0000000..0c18bdb
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100755
index 0000000..fb615ae
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100755
index 0000000..356d52b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,345 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see  kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+/**
+ * kbase_platform_fake_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_fake_register()
+ */
+void kbase_platform_fake_unregister(void);
+#endif
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100755
index 0000000..ee7c96d
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,260 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+ * Irq throttle. It is the minimum desired time in between two
+ * consecutive gpu interrupts (given in 'us'). The irq throttle
+ * gpu register will be configured after this, taking into
+ * account the configured max frequency.
+ *
+ * Attached value: number in micro seconds
+ */
+#define DEFAULT_IRQ_THROTTLE_TIME_US 20
+
+/**
+ *  Default Job Scheduler initial runtime of a context for the CFS Policy,
+ *  in time-slices.
+ *
+ * This value is relative to that of the least-run context, and defines
+ * where in the CFS queue a new context is added. A value of 1 means 'after
+ * the least-run context has used its timeslice'. Therefore, when all
+ * contexts consistently use the same amount of time, a value of 1 models a
+ * FIFO. A value of 0 would model a LIFO.
+ *
+ * The value is represented in "numbers of time slices". Multiply this
+ * value by that defined in @ref DEFAULT_JS_CTX_TIMESLICE_NS to get
+ * the time value for this in nanoseconds.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1
+
+/**
+ * Default Job Scheduler minimum runtime value of a context for CFS, in
+ * time_slices relative to that of the least-run context.
+ *
+ * This is a measure of how much preferrential treatment is given to a
+ * context that is not run very often.
+ *
+ * Specficially, this value defines how many timeslices such a context is
+ * (initially) allowed to use at once. Such contexts (e.g. 'interactive'
+ * processes) will appear near the front of the CFS queue, and can initially
+ * use more time than contexts that run continuously (e.g. 'batch'
+ * processes).
+ *
+ * This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx
+ * not run for a long time attacks the system by using a very large initial
+ * number of timeslices when it finally does run.
+ *
+ * @note A value of zero allows not-run-often contexts to get scheduled in
+ * quickly, but to only use a single timeslice when they get scheduled in.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/*
+ * Default timeout for software event jobs, after which these jobs will be
+ * cancelled.
+ */
+#define DEFAULT_JS_SOFT_EVENT_TIMEOUT ((u32)3000) /* 3s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100755
index 0000000..d53f728
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,310 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_instr.h>
+#include <mali_kbase_mem_linux.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	kctx->is_compat = is_compat;
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	kctx->infinite_cache_active = 0;
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+			kbdev->mem_pool_max_size_default,
+			kctx->kbdev, &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_pool;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	mutex_init(&kctx->reg_lock);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+#ifdef CONFIG_KDS
+	INIT_LIST_HEAD(&kctx->waiting_kds_resource);
+#endif
+
+	err = kbase_mmu_init(kctx);
+	if (err)
+		goto free_event;
+
+	kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+	if (!kctx->pgd)
+		goto free_mmu;
+
+	kctx->aliasing_sink_page = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!kctx->aliasing_sink_page)
+		goto no_sink_page;
+
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	hrtimer_init(&kctx->soft_event_timeout, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	kctx->soft_event_timeout.function = &kbasep_soft_event_timeout_worker;
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+no_sink_page:
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+free_mmu:
+	kbase_mmu_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_jd_zap_context(kctx);
+	kbase_event_cleanup(kctx);
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_pm_context_idle(kbdev);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock,
+			irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100755
index 0000000..429fea2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,4367 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_uku.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_instr.h>
+#include <mali_kbase_gator.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <backend/gpu/mali_kbase_devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#endif /* CONFIG_KDS */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task */
+#include <linux/mman.h>
+#include <linux/version.h>
+#include <linux/security.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+#include <platform/mali_kbase_platform_fake.h>
+#endif /*CONFIG_MALI_PLATFORM_FAKE */
+#ifdef CONFIG_SYNC
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC */
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_PM_DEVFREQ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+
+#include <mali_kbase_config.h>
+
+#ifdef CONFIG_MACH_MANTA
+#include <plat/devs.h>
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <mali_kbase_tlstream.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+#if MALI_UNIT_TEST
+static struct kbase_exported_test_data shared_kernel_test_data;
+EXPORT_SYMBOL(shared_kernel_test_data);
+#endif /* MALI_UNIT_TEST */
+
+#define KBASE_DRV_NAME "mali"
+
+static const char kbase_drv_name[] = KBASE_DRV_NAME;
+
+static int kbase_dev_nr;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+extern int mali_pm_statue;
+#endif
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+static inline void __compile_time_asserts(void)
+{
+	CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
+}
+
+#ifdef CONFIG_KDS
+
+struct kbasep_kds_resource_set_file_data {
+	struct kds_resource_set *lock;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file);
+
+static const struct file_operations kds_resource_fops = {
+	.release = kds_resource_release
+};
+
+struct kbase_kds_resource_list_data {
+	struct kds_resource **kds_resources;
+	unsigned long *kds_access_bitmap;
+	int num_elems;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file)
+{
+	struct kbasep_kds_resource_set_file_data *data;
+
+	data = (struct kbasep_kds_resource_set_file_data *)file->private_data;
+	if (NULL != data) {
+		if (NULL != data->lock)
+			kds_resource_set_release(&data->lock);
+
+		kfree(data);
+	}
+	return 0;
+}
+
+static int kbasep_kds_allocate_resource_list_data(struct kbase_context *kctx, struct base_external_resource *ext_res, int num_elems, struct kbase_kds_resource_list_data *resources_list)
+{
+	struct base_external_resource *res = ext_res;
+	int res_id;
+
+	/* assume we have to wait for all */
+
+	KBASE_DEBUG_ASSERT(0 != num_elems);
+	resources_list->kds_resources = kmalloc_array(num_elems,
+			sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (NULL == resources_list->kds_resources)
+		return -ENOMEM;
+
+	KBASE_DEBUG_ASSERT(0 != num_elems);
+	resources_list->kds_access_bitmap = kzalloc(
+			sizeof(unsigned long) *
+			((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG),
+			GFP_KERNEL);
+
+	if (NULL == resources_list->kds_access_bitmap) {
+		kfree(resources_list->kds_access_bitmap);
+		return -ENOMEM;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+	for (res_id = 0; res_id < num_elems; res_id++, res++) {
+		int exclusive;
+		struct kbase_va_region *reg;
+		struct kds_resource *kds_res = NULL;
+
+		exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			break;
+
+		/* no need to check reg->alloc as only regions with an alloc has
+		 * a size, and kbase_region_tracker_find_region_enclosing_address
+		 * only returns regions with size > 0 */
+		switch (reg->gpu_alloc->type) {
+#if defined(CONFIG_UMP) && defined(CONFIG_KDS)
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+			kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle);
+			break;
+#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */
+		default:
+			break;
+		}
+
+		/* no kds resource for the region ? */
+		if (!kds_res)
+			break;
+
+		resources_list->kds_resources[res_id] = kds_res;
+
+		if (exclusive)
+			set_bit(res_id, resources_list->kds_access_bitmap);
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	/* did the loop run to completion? */
+	if (res_id == num_elems)
+		return 0;
+
+	/* Clean up as the resource list is not valid. */
+	kfree(resources_list->kds_resources);
+	kfree(resources_list->kds_access_bitmap);
+
+	return -EINVAL;
+}
+
+static bool kbasep_validate_kbase_pointer(
+		struct kbase_context *kctx, union kbase_pointer *p)
+{
+	if (kctx->is_compat) {
+		if (p->compat_value == 0)
+			return false;
+	} else {
+		if (NULL == p->value)
+			return false;
+	}
+	return true;
+}
+
+static int kbase_external_buffer_lock(struct kbase_context *kctx,
+		struct kbase_uk_ext_buff_kds_data *args, u32 args_size)
+{
+	struct base_external_resource *ext_res_copy;
+	size_t ext_resource_size;
+	int ret = -EINVAL;
+	int fd = -EBADF;
+	struct base_external_resource __user *ext_res_user;
+	int __user *file_desc_usr;
+	struct kbasep_kds_resource_set_file_data *fdata;
+	struct kbase_kds_resource_list_data resource_list_data;
+
+	if (args_size != sizeof(struct kbase_uk_ext_buff_kds_data))
+		return -EINVAL;
+
+	/* Check user space has provided valid data */
+	if (!kbasep_validate_kbase_pointer(kctx, &args->external_resource) ||
+			!kbasep_validate_kbase_pointer(kctx, &args->file_descriptor) ||
+			(0 == args->num_res) ||
+			(args->num_res > KBASE_MAXIMUM_EXT_RESOURCES))
+		return -EINVAL;
+
+	ext_resource_size = sizeof(struct base_external_resource) * args->num_res;
+
+	KBASE_DEBUG_ASSERT(0 != ext_resource_size);
+	ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL);
+
+	if (!ext_res_copy)
+		return -EINVAL;
+#ifdef CONFIG_COMPAT
+	if (kctx->is_compat) {
+		ext_res_user = compat_ptr(args->external_resource.compat_value);
+		file_desc_usr = compat_ptr(args->file_descriptor.compat_value);
+	} else {
+#endif /* CONFIG_COMPAT */
+		ext_res_user = args->external_resource.value;
+		file_desc_usr = args->file_descriptor.value;
+#ifdef CONFIG_COMPAT
+	}
+#endif /* CONFIG_COMPAT */
+
+	/* Copy the external resources to lock from user space */
+	if (copy_from_user(ext_res_copy, ext_res_user, ext_resource_size))
+		goto out;
+
+	/* Allocate data to be stored in the file */
+	fdata = kmalloc(sizeof(*fdata), GFP_KERNEL);
+
+	if (!fdata) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* Parse given elements and create resource and access lists */
+	ret = kbasep_kds_allocate_resource_list_data(kctx,
+			ext_res_copy, args->num_res, &resource_list_data);
+	if (!ret) {
+		long err;
+
+		fdata->lock = NULL;
+
+		fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0);
+
+		err = copy_to_user(file_desc_usr, &fd, sizeof(fd));
+
+		/* If the file descriptor was valid and we successfully copied
+		 * it to user space, then we can try and lock the requested
+		 * kds resources.
+		 */
+		if ((fd >= 0) && (0 == err)) {
+			struct kds_resource_set *lock;
+
+			lock = kds_waitall(args->num_res,
+					resource_list_data.kds_access_bitmap,
+					resource_list_data.kds_resources,
+					KDS_WAIT_BLOCKING);
+
+			if (!lock) {
+				ret = -EINVAL;
+			} else if (IS_ERR(lock)) {
+				ret = PTR_ERR(lock);
+			} else {
+				ret = 0;
+				fdata->lock = lock;
+			}
+		} else {
+			ret = -EINVAL;
+		}
+
+		kfree(resource_list_data.kds_resources);
+		kfree(resource_list_data.kds_access_bitmap);
+	}
+
+	if (ret) {
+		/* If the file was opened successfully then close it which will
+		 * clean up the file data, otherwise we clean up the file data
+		 * ourself.
+		 */
+		if (fd >= 0)
+			sys_close(fd);
+		else
+			kfree(fdata);
+	}
+out:
+	kfree(ext_res_copy);
+
+	return ret;
+}
+#endif /* CONFIG_KDS */
+
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	unsigned int                    as_nr;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		kbase_tlstream_tl_summary_new_lpu(lpu, lpu_id, *lpu);
+	}
+
+	/* Create Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		kbase_tlstream_tl_summary_new_as(&kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	kbase_tlstream_tl_summary_new_gpu(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, kbdev);
+	}
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		kbase_tlstream_tl_summary_lifelink_as_gpu(
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		kbase_tlstream_tl_summary_new_ctx(
+				element->kctx,
+				(u32)(element->kctx->id),
+				(u32)(element->kctx->tgid));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream. */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space. */
+	kbase_tlstream_flush_streams();
+}
+
+static void kbase_api_handshake(struct uku_version_check_args *version)
+{
+	switch (version->major) {
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case 6:
+		/* We are backwards compatible with version 6,
+		 * so pretend to be the old version */
+		version->major = 6;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	case 7:
+		/* We are backwards compatible with version 7,
+		 * so pretend to be the old version */
+		version->major = 7;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case 8:
+		/* We are backwards compatible with version 8,
+		 * so pretend to be the old version */
+		version->major = 8;
+		version->minor = 4;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+#ifdef BASE_LEGACY_UK9_SUPPORT
+	case 9:
+		/* We are backwards compatible with version 9,
+		 * so pretend to be the old version */
+		version->major = 9;
+		version->minor = 0;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				(int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	inited_pm_runtime_init = (1u << 2),
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	inited_ipa = (1u << 9),
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+	inited_job_fault = (1u << 10),
+	inited_misc_register = (1u << 11),
+	inited_get_device = (1u << 12),
+	inited_sysfs_group = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20)
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define INACTIVE_WAIT_MS (5000)
+
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive)
+{
+	kbdev->driver_inactive = inactive;
+	wake_up(&kbdev->driver_inactive_wait);
+
+	/* Wait for any running IOCTLs to complete */
+	if (inactive)
+		msleep(INACTIVE_WAIT_MS);
+}
+KBASE_EXPORT_TEST_API(kbase_set_driver_inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size)
+{
+	struct kbase_device *kbdev;
+	union uk_header *ukh = args;
+	u32 id;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(ukh != NULL);
+
+	kbdev = kctx->kbdev;
+	id = ukh->id;
+	ukh->ret = MALI_ERROR_NONE; /* Be optimistic */
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_event(kbdev->driver_inactive_wait,
+			kbdev->driver_inactive == false);
+#endif /* CONFIG_MALI_DEBUG */
+
+	if (UKP_FUNC_ID_CHECK_VERSION == id) {
+		struct uku_version_check_args *version_check;
+
+		if (args_size != sizeof(struct uku_version_check_args)) {
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			return 0;
+		}
+		version_check = (struct uku_version_check_args *)args;
+		kbase_api_handshake(version_check);
+		/* save the proposed version number for later use */
+		kctx->api_version = KBASE_API_VERSION(version_check->major,
+				version_check->minor);
+		ukh->ret = MALI_ERROR_NONE;
+		return 0;
+	}
+
+	/* block calls until version handshake */
+	if (kctx->api_version == 0)
+		return -EINVAL;
+
+	if (!atomic_read(&kctx->setup_complete)) {
+		struct kbase_uk_set_flags *kbase_set_flags;
+
+		/* setup pending, try to signal that we'll do the setup,
+		 * if setup was already in progress, err this call
+		 */
+		if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+			return -EINVAL;
+
+		/* if unexpected call, will stay stuck in setup mode
+		 * (is it the only call we accept?)
+		 */
+		if (id != KBASE_FUNC_SET_FLAGS)
+			return -EINVAL;
+
+		kbase_set_flags = (struct kbase_uk_set_flags *)args;
+
+		/* if not matching the expected call, stay in setup mode */
+		if (sizeof(*kbase_set_flags) != args_size)
+			goto bad_size;
+
+		/* if bad flags, will stay stuck in setup mode */
+		if (kbase_context_set_create_flags(kctx,
+				kbase_set_flags->create_flags) != 0)
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+		atomic_set(&kctx->setup_complete, 1);
+		return 0;
+	}
+
+	/* setup complete, perform normal operation */
+	switch (id) {
+	case KBASE_FUNC_MEM_JIT_INIT:
+		{
+			struct kbase_uk_mem_jit_init *jit_init = args;
+
+			if (sizeof(*jit_init) != args_size)
+				goto bad_size;
+
+			if (kbase_region_tracker_init_jit(kctx,
+					jit_init->va_pages))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_ALLOC:
+		{
+			struct kbase_uk_mem_alloc *mem = args;
+			struct kbase_va_region *reg;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+#if defined(CONFIG_64BIT)
+			if (!kctx->is_compat) {
+				/* force SAME_VA if a 64-bit client */
+				mem->flags |= BASE_MEM_SAME_VA;
+			}
+#endif
+
+			reg = kbase_mem_alloc(kctx, mem->va_pages,
+					mem->commit_pages, mem->extent,
+					&mem->flags, &mem->gpu_va,
+					&mem->va_alignment);
+			if (!reg)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_IMPORT: {
+			struct kbase_uk_mem_import *mem_import = args;
+			void __user *phandle;
+
+			if (sizeof(*mem_import) != args_size)
+				goto bad_size;
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				phandle = compat_ptr(mem_import->phandle.compat_value);
+			else
+#endif
+				phandle = mem_import->phandle.value;
+
+			if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_import(kctx, mem_import->type, phandle,
+						&mem_import->gpu_va,
+						&mem_import->va_pages,
+						&mem_import->flags)) {
+				mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+	}
+	case KBASE_FUNC_MEM_ALIAS: {
+			struct kbase_uk_mem_alias *alias = args;
+			struct base_mem_aliasing_info __user *user_ai;
+			struct base_mem_aliasing_info *ai;
+
+			if (sizeof(*alias) != args_size)
+				goto bad_size;
+
+			if (alias->nents > 2048) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (!alias->nents) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				user_ai = compat_ptr(alias->ai.compat_value);
+			else
+#endif
+				user_ai = alias->ai.value;
+
+			ai = vmalloc(sizeof(*ai) * alias->nents);
+
+			if (!ai) {
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			}
+
+			if (copy_from_user(ai, user_ai,
+					   sizeof(*ai) * alias->nents)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto copy_failed;
+			}
+
+			alias->gpu_va = kbase_mem_alias(kctx, &alias->flags,
+							alias->stride,
+							alias->nents, ai,
+							&alias->va_pages);
+			if (!alias->gpu_va) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto no_alias;
+			}
+no_alias:
+copy_failed:
+			vfree(ai);
+			break;
+		}
+	case KBASE_FUNC_MEM_COMMIT:
+		{
+			struct kbase_uk_mem_commit *commit = args;
+
+			if (sizeof(*commit) != args_size)
+				goto bad_size;
+
+			if (commit->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_commit(kctx, commit->gpu_addr,
+					commit->pages,
+					(base_backing_threshold_status *)
+					&commit->result_subcode) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	case KBASE_FUNC_MEM_QUERY:
+		{
+			struct kbase_uk_mem_query *query = args;
+
+			if (sizeof(*query) != args_size)
+				goto bad_size;
+
+			if (query->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE &&
+			    query->query != KBASE_MEM_QUERY_VA_SIZE &&
+				query->query != KBASE_MEM_QUERY_FLAGS) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query);
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_query(kctx, query->gpu_addr,
+					query->query, &query->value) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			break;
+		}
+		break;
+
+	case KBASE_FUNC_MEM_FLAGS_CHANGE:
+		{
+			struct kbase_uk_mem_flags_change *fc = args;
+
+			if (sizeof(*fc) != args_size)
+				goto bad_size;
+
+			if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_flags_change(kctx, fc->gpu_va,
+					fc->flags, fc->mask) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+	case KBASE_FUNC_MEM_FREE:
+		{
+			struct kbase_uk_mem_free *mem = args;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+			if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_free(kctx, mem->gpu_addr) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_JOB_SUBMIT:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+			if (kbase_jd_submit(kctx, job, 0) != 0)
+#else
+			if (kbase_jd_submit(kctx, job) != 0)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case KBASE_FUNC_JOB_SUBMIT_UK6:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+			if (kbase_jd_submit(kctx, job, 1) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif
+
+	case KBASE_FUNC_SYNC:
+		{
+			struct kbase_uk_sync_now *sn = args;
+
+			if (sizeof(*sn) != args_size)
+				goto bad_size;
+
+			if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifndef CONFIG_MALI_COH_USER
+			if (kbase_sync_now(kctx, &sn->sset) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif
+			break;
+		}
+
+	case KBASE_FUNC_DISJOINT_QUERY:
+		{
+			struct kbase_uk_disjoint_query *dquery = args;
+
+			if (sizeof(*dquery) != args_size)
+				goto bad_size;
+
+			/* Get the disjointness counter value. */
+			dquery->counter = kbase_disjoint_event_get(kctx->kbdev);
+			break;
+		}
+
+	case KBASE_FUNC_POST_TERM:
+		{
+			kbase_event_close(kctx);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_SETUP:
+		{
+			struct kbase_uk_hwcnt_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx,
+					&kctx->vinstr_cli, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_DUMP:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_dump(kctx->vinstr_cli,
+					BASE_HWCNT_READER_EVENT_MANUAL) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_CLEAR:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_clear(kctx->vinstr_cli) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_READER_SETUP:
+		{
+			struct kbase_uk_hwcnt_reader_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx,
+					setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_GPU_PROPS_REG_DUMP:
+		{
+			struct kbase_uk_gpuprops *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (kbase_gpuprops_uk_get_props(kctx, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_FIND_CPU_OFFSET:
+		{
+			struct kbase_uk_find_cpu_offset *find = args;
+
+			if (sizeof(*find) != args_size)
+				goto bad_size;
+
+			if (find->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid");
+				goto out_bad;
+			}
+
+			if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else {
+				int err;
+
+				err = kbasep_find_enclosing_cpu_mapping_offset(
+						kctx,
+						find->gpu_addr,
+						(uintptr_t) find->cpu_addr,
+						(size_t) find->size,
+						&find->offset);
+
+				if (err)
+					ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+		}
+	case KBASE_FUNC_GET_VERSION:
+		{
+			struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args;
+
+			if (sizeof(*get_version) != args_size)
+				goto bad_size;
+
+			/* version buffer size check is made in compile time assert */
+			memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+			get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+			break;
+		}
+
+	case KBASE_FUNC_STREAM_CREATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args;
+
+			if (sizeof(*screate) != args_size)
+				goto bad_size;
+
+			if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) {
+				/* not NULL terminated */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_stream_create(screate->name, &screate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#else /* CONFIG_SYNC */
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+	case KBASE_FUNC_FENCE_VALIDATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args;
+
+			if (sizeof(*fence_validate) != args_size)
+				goto bad_size;
+
+			if (kbase_fence_validate(fence_validate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+
+	case KBASE_FUNC_EXT_BUFFER_LOCK:
+		{
+#ifdef CONFIG_KDS
+			ret = kbase_external_buffer_lock(kctx,
+				(struct kbase_uk_ext_buff_kds_data *)args,
+				args_size);
+			switch (ret) {
+			case 0:
+				ukh->ret = MALI_ERROR_NONE;
+				break;
+			case -ENOMEM:
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			default:
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+#endif /* CONFIG_KDS */
+			break;
+		}
+
+	case KBASE_FUNC_SET_TEST_DATA:
+		{
+#if MALI_UNIT_TEST
+			struct kbase_uk_set_test_data *set_data = args;
+
+			shared_kernel_test_data = set_data->test_data;
+			shared_kernel_test_data.kctx.value = (void __user *)kctx;
+			shared_kernel_test_data.mm.value = (void __user *)current->mm;
+			ukh->ret = MALI_ERROR_NONE;
+#endif /* MALI_UNIT_TEST */
+			break;
+		}
+
+	case KBASE_FUNC_INJECT_ERROR:
+		{
+#ifdef CONFIG_MALI_ERROR_INJECT
+			unsigned long flags;
+			struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (job_atom_inject_error(&params) != 0)
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_ERROR_INJECT */
+			break;
+		}
+
+	case KBASE_FUNC_MODEL_CONTROL:
+		{
+#ifdef CONFIG_MALI_NO_MALI
+			unsigned long flags;
+			struct kbase_model_control_params params =
+					((struct kbase_uk_model_control_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (gpu_model_control(kbdev->model, &params) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_NO_MALI */
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case KBASE_FUNC_KEEP_GPU_POWERED:
+		{
+			dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_KEEP_GPU_POWERED: function is deprecated and disabled\n");
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	case KBASE_FUNC_GET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i);
+
+			break;
+		}
+
+	/* used only for testing purposes; these controls are to be set by gator through gator API */
+	case KBASE_FUNC_SET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				_mali_profiling_control(i, controls->profiling_controls[i]);
+
+			break;
+		}
+
+	case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD:
+		{
+			struct kbase_uk_debugfs_mem_profile_add *add_data =
+					(struct kbase_uk_debugfs_mem_profile_add *)args;
+			char *buf;
+			char __user *user_buf;
+
+			if (sizeof(*add_data) != args_size)
+				goto bad_size;
+
+			if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+				dev_err(kbdev->dev, "buffer too big\n");
+				goto out_bad;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				user_buf = compat_ptr(add_data->buf.compat_value);
+			else
+#endif
+				user_buf = add_data->buf.value;
+
+			buf = kmalloc(add_data->len, GFP_KERNEL);
+			if (!buf)
+				goto out_bad;
+
+			if (0 != copy_from_user(buf, user_buf, add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			if (kbasep_mem_profile_debugfs_insert(kctx, buf,
+							add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			break;
+		}
+
+#ifdef CONFIG_MALI_NO_MALI
+	case KBASE_FUNC_SET_PRFCNT_VALUES:
+		{
+
+			struct kbase_uk_prfcnt_values *params =
+			  ((struct kbase_uk_prfcnt_values *)args);
+			gpu_model_set_dummy_prfcnt_sample(params->data,
+					params->size);
+
+			break;
+		}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	case KBASE_FUNC_TLSTREAM_ACQUIRE:
+		{
+			struct kbase_uk_tlstream_acquire *tlstream_acquire =
+				args;
+
+			if (sizeof(*tlstream_acquire) != args_size)
+				goto bad_size;
+
+			if (0 != kbase_tlstream_acquire(
+						kctx,
+						&tlstream_acquire->fd)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else if (0 <= tlstream_acquire->fd) {
+				/* Summary stream was cleared during acquire.
+				 * Create static timeline objects that will be
+				 * read by client. */
+				kbase_create_timeline_objects(kctx);
+			}
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_FLUSH:
+		{
+			struct kbase_uk_tlstream_flush *tlstream_flush =
+				args;
+
+			if (sizeof(*tlstream_flush) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_flush_streams();
+			break;
+		}
+#if MALI_UNIT_TEST
+	case KBASE_FUNC_TLSTREAM_TEST:
+		{
+			struct kbase_uk_tlstream_test *tlstream_test = args;
+
+			if (sizeof(*tlstream_test) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_test(
+					tlstream_test->tpw_count,
+					tlstream_test->msg_delay,
+					tlstream_test->msg_count,
+					tlstream_test->aux_msg);
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_STATS:
+		{
+			struct kbase_uk_tlstream_stats *tlstream_stats = args;
+
+			if (sizeof(*tlstream_stats) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_stats(
+					&tlstream_stats->bytes_collected,
+					&tlstream_stats->bytes_generated);
+			break;
+		}
+#endif /* MALI_UNIT_TEST */
+
+	case KBASE_FUNC_GET_CONTEXT_ID:
+		{
+			struct kbase_uk_context_id *info = args;
+
+			info->id = kctx->id;
+			break;
+		}
+
+	case KBASE_FUNC_SOFT_EVENT_UPDATE:
+		{
+			struct kbase_uk_soft_event_update *update = args;
+
+			if (sizeof(*update) != args_size)
+				goto bad_size;
+
+			if (((update->new_status != BASE_JD_SOFT_EVENT_SET) &&
+			    (update->new_status != BASE_JD_SOFT_EVENT_RESET)) ||
+			    (update->flags != 0))
+				goto out_bad;
+
+			if (kbasep_write_soft_event_status(
+						kctx, update->evt,
+						update->new_status) != 0) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (update->new_status == BASE_JD_SOFT_EVENT_SET)
+				kbasep_complete_triggered_soft_events(
+						kctx, update->evt);
+
+			break;
+		}
+
+	default:
+		dev_err(kbdev->dev, "unknown ioctl %u\n", id);
+		goto out_bad;
+	}
+
+	return ret;
+
+ bad_size:
+	dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id);
+ out_bad:
+	return -EINVAL;
+}
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strcmp(irq_res->name, "JOB")) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "MMU")) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "GPU")) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kctx = kbase_create_context(kbdev, is_compat_task());
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+	kctx->filp = filp;
+
+	kctx->infinite_cache_active = kbdev->infinite_cache_active_default;
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_COH_USER
+	 /* if cache is completely coherent at hardware level, then remove the
+	  * infinite cache control support from debugfs.
+	  */
+#else
+	debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry,
+			&kctx->infinite_cache_active);
+#endif /* CONFIG_MALI_COH_USER */
+
+	mutex_init(&kctx->mem_profile_lock);
+
+	kbasep_jd_debugfs_ctx_add(kctx);
+	kbase_debug_mem_view_init(filp);
+
+	kbase_debug_job_fault_context_init(kctx);
+
+	kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool);
+
+	kbase_jit_debugfs_add(kctx);
+#endif /* CONFIG_DEBUGFS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			kbase_tlstream_tl_new_ctx(
+					element->kctx,
+					(u32)(element->kctx->id),
+					(u32)(element->kctx->tgid));
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+	kbase_tlstream_tl_del_ctx(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(kctx->kctx_dentry);
+	kbasep_mem_profile_debugfs_remove(kctx);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	if (kctx->vinstr_cli) {
+		struct kbase_uk_hwcnt_setup setup;
+
+		setup.dump_buffer = 0llu;
+		kbase_vinstr_legacy_hwc_setup(
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup);
+	}
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+#define CALL_MAX_SIZE 536
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull };	/* alignment fixup */
+	u32 size = _IOC_SIZE(cmd);
+	struct kbase_context *kctx = filp->private_data;
+
+	if (size > CALL_MAX_SIZE)
+		return -ENOTTY;
+
+	if (0 != copy_from_user(&msg, (void __user *)arg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n");
+		return -EFAULT;
+	}
+
+	if (kbase_dispatch(kctx, &msg, size) != 0)
+		return -EFAULT;
+
+	if (0 != copy_to_user((void __user *)arg, &msg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+#ifdef CONFIG_64BIT
+/* The following function is taken from the kernel and just
+ * renamed. As it's not exported to modules we must copy-paste it here.
+ */
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit)
+		goto found_highest;
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length)
+			goto found;
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+found:
+	/* We found a suitable gap. Clip it with the original high_limit. */
+	if (gap_end > info->high_limit)
+		gap_end = info->high_limit;
+
+found_highest:
+	/* Compute highest gap address at the desired alignment */
+	gap_end -= info->length;
+	gap_end -= (gap_end - info->align_offset) & info->align_mask;
+
+	VM_BUG_ON(gap_end < info->low_limit);
+	VM_BUG_ON(gap_end < gap_start);
+	return gap_end;
+}
+
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	/* based on get_unmapped_area, but simplified slightly due to that some
+	 * values are known in advance */
+	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (kctx->is_compat)
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+				flags);
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
+		info.high_limit = kctx->same_va_end << PAGE_SHIFT;
+		info.align_mask = 0;
+		info.align_offset = 0;
+	} else {
+		info.high_limit = min_t(unsigned long, mm->mmap_base,
+					(kctx->same_va_end << PAGE_SHIFT));
+		if (len >= SZ_2M) {
+			info.align_offset = SZ_2M;
+			info.align_mask = SZ_2M - 1;
+		} else {
+			info.align_mask = 0;
+			info.align_offset = 0;
+		}
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = SZ_2M;
+	return kbase_unmapped_area_topdown(&info);
+}
+#endif
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+#ifdef CONFIG_64BIT
+	.get_unmapped_area = kbase_get_unmapped_area,
+#endif
+};
+
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value)
+{
+	writel(value, kbdev->reg + offset);
+}
+
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset)
+{
+	return readl(kbdev->reg + offset);
+}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+
+/** Show callback for the @c power_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c power_policy sysfs file.
+ *
+ * This function is called when the @c power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls @ref kbase_pm_set_policy to change the
+ * policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/** Show callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called when the @c core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls @ref kbase_pm_set_policy to change
+ * the policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/** Show callback for the @c core_mask sysfs file.
+ *
+ * This function is called to get the contents of the @c core_mask sysfs
+ * file.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/** Store callback for the @c core_mask sysfs file.
+ *
+ * This function is called when the @c core_mask sysfs file is written to.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	if (items == 1 || items == 3) {
+		u64 shader_present =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		u64 group0_core_mask =
+				kbdev->gpu_props.props.coherency_info.group[0].
+				core_mask;
+
+		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+				!(new_core_mask[0] & group0_core_mask) ||
+			(new_core_mask[1] & shader_present) !=
+						new_core_mask[1] ||
+				!(new_core_mask[1] & group0_core_mask) ||
+			(new_core_mask[2] & shader_present) !=
+						new_core_mask[2] ||
+				!(new_core_mask[2] & group0_core_mask)) {
+			dev_err(dev, "power_policy: invalid core specification\n");
+			return -EINVAL;
+		}
+
+		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+				kbdev->pm.debug_core_mask[1] !=
+						new_core_mask[1] ||
+				kbdev->pm.debug_core_mask[2] !=
+						new_core_mask[2]) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+					new_core_mask[1], new_core_mask[2]);
+
+			spin_unlock_irqrestore(&kbdev->pm.power_change_lock,
+					flags);
+		}
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+		"Use format <core_mask>\n"
+		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_event_timeout() - Store callback for the soft_event_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software event jobs. Waiting jobs will
+ * be cancelled after this period expires. This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_event_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_event_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_event_timeout_ms) != 0) ||
+	    (soft_event_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_event_timeout_ms,
+		   soft_event_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_event_timeout() - Show callback for the soft_event_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software event jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_event_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_event_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_event_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_event_timeout, set_soft_event_timeout);
+
+/** Store callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as JS_SOFT_STOP_TICKS, JS_HARD_STOP_TICKS_SS,
+ * JS_HARD_STOP_TICKS_DUMPING, JS_RESET_TICKS_SS, JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		u64 ticks;
+
+		if (js_soft_stop_ms >= 0) {
+			ticks = js_soft_stop_ms * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_soft_stop_ticks = ticks;
+		} else {
+			kbdev->js_soft_stop_ticks = -1;
+		}
+
+		if (js_soft_stop_ms_cl >= 0) {
+			ticks = js_soft_stop_ms_cl * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_soft_stop_ticks_cl = ticks;
+		} else {
+			kbdev->js_soft_stop_ticks_cl = -1;
+		}
+
+		if (js_hard_stop_ms_ss >= 0) {
+			ticks = js_hard_stop_ms_ss * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_hard_stop_ticks_ss = ticks;
+		} else {
+			kbdev->js_hard_stop_ticks_ss = -1;
+		}
+
+		if (js_hard_stop_ms_cl >= 0) {
+			ticks = js_hard_stop_ms_cl * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_hard_stop_ticks_cl = ticks;
+		} else {
+			kbdev->js_hard_stop_ticks_cl = -1;
+		}
+
+		if (js_hard_stop_ms_dumping >= 0) {
+			ticks = js_hard_stop_ms_dumping * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_hard_stop_ticks_dumping = ticks;
+		} else {
+			kbdev->js_hard_stop_ticks_dumping = -1;
+		}
+
+		if (js_reset_ms_ss >= 0) {
+			ticks = js_reset_ms_ss * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_reset_ticks_ss = ticks;
+		} else {
+			kbdev->js_reset_ticks_ss = -1;
+		}
+
+		if (js_reset_ms_cl >= 0) {
+			ticks = js_reset_ms_cl * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_reset_ticks_cl = ticks;
+		} else {
+			kbdev->js_reset_ticks_cl = -1;
+		}
+
+		if (js_reset_ms_dumping >= 0) {
+			ticks = js_reset_ms_dumping * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_reset_ticks_dumping = ticks;
+		} else {
+			kbdev->js_reset_ticks_dumping = -1;
+		}
+
+		kbdev->js_timeouts_updated = true;
+
+		dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_soft_stop_ticks,
+				js_soft_stop_ms);
+		dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_soft_stop_ticks_cl,
+				js_soft_stop_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_hard_stop_ticks_ss,
+				js_hard_stop_ms_ss);
+		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_hard_stop_ticks_cl,
+				js_hard_stop_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_DUMPING with %lu ticks (%lu ms)\n",
+				(unsigned long)
+					kbdev->js_hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping);
+		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_reset_ticks_ss,
+				js_reset_ms_ss);
+		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_reset_ticks_cl,
+				js_reset_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_DUMPING with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_reset_ticks_dumping,
+				js_reset_ms_dumping);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+/** Show callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	u64 ms;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	unsigned long ticks;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0)
+		scheduling_period_ns = kbdev->js_scheduling_period_ns;
+	else
+		scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0)
+		ticks = kbdev->js_soft_stop_ticks;
+	else
+		ticks = kbdev->js_data.soft_stop_ticks;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_soft_stop_ms = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0)
+		ticks = kbdev->js_soft_stop_ticks_cl;
+	else
+		ticks = kbdev->js_data.soft_stop_ticks_cl;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_soft_stop_ms_cl = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0)
+		ticks = kbdev->js_hard_stop_ticks_ss;
+	else
+		ticks = kbdev->js_data.hard_stop_ticks_ss;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_ss = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0)
+		ticks = kbdev->js_hard_stop_ticks_cl;
+	else
+		ticks = kbdev->js_data.hard_stop_ticks_cl;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_cl = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0)
+		ticks = kbdev->js_hard_stop_ticks_dumping;
+	else
+		ticks = kbdev->js_data.hard_stop_ticks_dumping;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_dumping = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0)
+		ticks = kbdev->js_reset_ticks_ss;
+	else
+		ticks = kbdev->js_data.gpu_reset_ticks_ss;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_ss = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0)
+		ticks = kbdev->js_reset_ticks_cl;
+	else
+		ticks = kbdev->js_data.gpu_reset_ticks_cl;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_cl = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0)
+		ticks = kbdev->js_reset_ticks_dumping;
+	else
+		ticks = kbdev->js_data.gpu_reset_ticks_dumping;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_dumping = (unsigned long)ms;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	u64 ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns)
+		old_period = kbdev->js_scheduling_period_ns;
+	else
+		old_period = kbdev->js_data.scheduling_period_ns;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0)
+		ticks = (u64)kbdev->js_soft_stop_ticks * old_period;
+	else
+		ticks = (u64)kbdev->js_data.soft_stop_ticks *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_soft_stop_ticks = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0)
+		ticks = (u64)kbdev->js_soft_stop_ticks_cl * old_period;
+	else
+		ticks = (u64)kbdev->js_data.soft_stop_ticks_cl *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_soft_stop_ticks_cl = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0)
+		ticks = (u64)kbdev->js_hard_stop_ticks_ss * old_period;
+	else
+		ticks = (u64)kbdev->js_data.hard_stop_ticks_ss *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_hard_stop_ticks_ss = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0)
+		ticks = (u64)kbdev->js_hard_stop_ticks_cl * old_period;
+	else
+		ticks = (u64)kbdev->js_data.hard_stop_ticks_cl *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_hard_stop_ticks_cl = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0)
+		ticks = (u64)kbdev->js_hard_stop_ticks_dumping * old_period;
+	else
+		ticks = (u64)kbdev->js_data.hard_stop_ticks_dumping *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_hard_stop_ticks_dumping = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0)
+		ticks = (u64)kbdev->js_reset_ticks_ss * old_period;
+	else
+		ticks = (u64)kbdev->js_data.gpu_reset_ticks_ss *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_reset_ticks_ss = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0)
+		ticks = (u64)kbdev->js_reset_ticks_cl * old_period;
+	else
+		ticks = (u64)kbdev->js_data.gpu_reset_ticks_cl *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_reset_ticks_cl = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0)
+		ticks = (u64)kbdev->js_reset_ticks_dumping * old_period;
+	else
+		ticks = (u64)kbdev->js_data.gpu_reset_ticks_dumping *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_reset_ticks_dumping = ticks ? ticks : 1;
+
+	kbdev->js_scheduling_period_ns = new_scheduling_period_ns;
+	kbdev->js_timeouts_updated = true;
+
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0)
+		period = kbdev->js_scheduling_period_ns;
+	else
+		period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/** Store callback for the @c force_replay sysfs file.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/** Show callback for the @c force_replay sysfs file.
+ *
+ * This function is called to get the contents of the @c force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c force_replay.
+ *
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present. The ability to
+ * enable soft-stop when only a single context is present can be used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/** Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/** Show callback for the @c debug_command sysfs file.
+ *
+ * This function is called to get the contents of the @c debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c debug_command sysfs file.
+ *
+ * This function is called when the @c debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @ref debug_commands to issue the command.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example:
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-TMIx" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s MP%d r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_max_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+
+static int kbasep_secure_mode_enable(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
+
+static int kbasep_secure_mode_disable(struct kbase_device *kbdev)
+{
+	if (!kbase_prepare_to_reset_gpu_locked(kbdev))
+		return -EBUSY;
+
+	kbase_reset_gpu_locked(kbdev);
+
+	return 0;
+}
+
+static struct kbase_secure_ops kbasep_secure_ops = {
+	.secure_mode_enable = kbasep_secure_mode_enable,
+	.secure_mode_disable = kbasep_secure_mode_disable,
+};
+
+static void kbasep_secure_mode_init(struct kbase_device *kbdev)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native secure ops */
+		kbdev->secure_ops = &kbasep_secure_ops;
+		kbdev->secure_mode_support = true;
+	}
+#ifdef SECURE_CALLBACKS
+	else {
+		kbdev->secure_ops = SECURE_CALLBACKS;
+		kbdev->secure_mode_support = false;
+
+		if (kbdev->secure_ops) {
+			int err;
+
+			/* Make sure secure mode is disabled on startup */
+			err = kbdev->secure_ops->secure_mode_disable(kbdev);
+
+			/* secure_mode_disable() returns -EINVAL if not
+			 * supported
+			 */
+			kbdev->secure_mode_support = (err != -EINVAL);
+		}
+	}
+#endif
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = -ENOMEM;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return 0;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+static int kbasep_secure_mode_seq_show(struct seq_file *m, void *p)
+{
+	struct kbase_device *kbdev = m->private;
+
+	if (!kbdev->secure_mode_support)
+		seq_puts(m, "unsupported\n");
+	else
+		seq_printf(m, "%s\n", kbdev->secure_mode ? "Y" : "N");
+
+	return 0;
+}
+
+static int kbasep_secure_mode_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_secure_mode_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_secure_mode_debugfs_fops = {
+	.open = kbasep_secure_mode_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_add(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_COH_USER
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+#endif /* CONFIG_MALI_COH_USER */
+
+	debugfs_create_size_t("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_max_size_default);
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	debugfs_create_file("secure_mode", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_secure_mode_debugfs_fops);
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_event_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_availability_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	if (kbdev->inited_subsys & inited_ipa) {
+		kbase_ipa_term(kbdev->ipa_ctx);
+		kbdev->inited_subsys &= ~inited_ipa;
+	}
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_pm_runtime_init) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+		kbdev->inited_subsys &= ~inited_pm_runtime_init;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+#ifdef CONFIG_OF
+	err = kbase_platform_early_init();
+	if (err) {
+		dev_err(&pdev->dev, "Early platform initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+#endif
+
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+	core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+
+	kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	if (kbdev->pm.callback_power_runtime_init) {
+		err = kbdev->pm.callback_power_runtime_init(kbdev);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Runtime PM initialization failed\n");
+			kbase_platform_device_remove(pdev);
+			return err;
+		}
+		kbdev->inited_subsys |= inited_pm_runtime_init;
+	}
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, gpu_id);
+
+	kbasep_secure_mode_init(kbdev);
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	err = kbase_devfreq_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Fevfreq initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_devfreq;
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	kbdev->ipa_ctx = kbase_ipa_init(kbdev);
+	if (!kbdev->ipa_ctx) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+
+	kbdev->inited_subsys |= inited_ipa;
+#endif  /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	/* initialize the kctx list */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->inited_subsys |= inited_get_device;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+
+	return err;
+}
+
+/** Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/** Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @param dev  The device to resume
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+	return 0;
+}
+
+/** Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in which it will
+ * not be able to communicate with the CPU(s) and RAM due to power management.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/** Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+#ifndef CONFIG_MALI_DEVFREQ
+static int mali_os_freeze(struct device *device)
+{
+	mali_dev_freeze();
+	return kbase_device_suspend(device);
+}
+
+static int mali_os_restore(struct device *device)
+{
+	mali_dev_restore();
+	return kbase_device_resume(device);
+}
+#endif
+
+
+/** The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifndef CONFIG_MALI_DEVFREQ
+	.freeze = mali_os_freeze,
+	.thaw = kbase_device_resume,
+	.restore = mali_os_restore,
+#endif
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_early_init();
+	if (ret)
+		return ret;
+
+#ifndef CONFIG_MACH_MANTA
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	ret = kbase_platform_fake_register();
+	if (ret)
+		return ret;
+#endif
+#endif
+	ret = platform_driver_register(&kbase_platform_driver);
+#ifndef CONFIG_MACH_MANTA
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	if (ret)
+		kbase_platform_fake_unregister();
+#endif
+#endif
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+#ifndef CONFIG_MACH_MANTA
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	kbase_platform_fake_unregister();
+#endif
+#endif
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counter);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100755
index 0000000..fb57ac2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100755
index 0000000..5fff289
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100755
index 0000000..f3e426f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,470 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct list_head *event_list)
+{
+	bool ret;
+
+	ret = (!list_empty(event_list));
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(
+		struct kbase_context *kctx, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	if (list_empty(event_list))
+		return true;
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx)
+			return false;
+	}
+	return false;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct list_head *event_list,
+		struct base_job_fault_event *event)
+{
+	struct base_job_fault_event *event_in;
+
+	if (list_empty(event_list)) {
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				kbase_is_job_fault_event_pending(event_list)))
+			return -ERESTARTSYS;
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+
+	while (!list_empty(event_list)) {
+
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		wake_up(&kbdev->job_fault_resume_wq);
+	}
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			kbase_ctx_has_no_event_pending(kctx,
+					&kctx->kbdev->job_fault_event_list));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev,
+				&kbdev->job_fault_event_list, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		if (event->katom->need_cache_flush_cores_retained) {
+			kbase_gpu_cacheclean(kbdev, event->katom);
+			event->katom->need_cache_flush_cores_retained = 0;
+		}
+
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100755
index 0000000..0930f90
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100755
index 0000000..42d1d83
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,251 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list))
+		return NULL;
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct rb_node *p;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (p = rb_first(&kctx->reg_rbtree); p; p = rb_next(p)) {
+		struct kbase_va_region *reg;
+		struct debug_mem_mapping *mapping;
+
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100755
index 0000000..20ab51a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100755
index 0000000..c4af0c3
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,1404 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_mmu_mode.h>
+#include <mali_kbase_instr.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#include <linux/bus_logger.h>
+#endif
+
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#endif				/* CONFIG_SYNC */
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL    0
+#else
+#define MIDGARD_MMU_TOPLEVEL    1
+#endif
+
+#define MIDGARD_MMU_BOTTOMLEVEL 3
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom has fail dependency on same-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_PREV (1<<6)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom has been submitted to JSCTX ringbuffers */
+#define KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in secure mode */
+#define KBASE_KATOM_FLAG_SECURE (1<<11)
+/* Atom has been stored in linked list */
+#define KBASE_KATOM_FLAG_JSCTX_IN_LL (1<<12)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+#ifdef CONFIG_DEBUG_FS
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom * const)(dep->atom);
+}
+
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static inline const u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency.
+ * @param[in] a      The ATOM to be set as a dependency.
+ * @param     type   The ATOM dependency type to be set.
+ *
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency to be cleared.
+ *
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+enum kbase_atom_gpu_rb_state {
+	/* Atom is not currently present in slot ringbuffer */
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	/* Atom is in slot ringbuffer but is blocked on a previous atom */
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	/* Atom is in slot ringbuffer but is waiting for cores to become
+	 * available */
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	/* Atom is in slot ringbuffer but is blocked on affinity */
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	/* Atom is in slot ringbuffer but is waiting for secure mode switch */
+	KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE,
+	/* Atom is in slot ringbuffer and ready to run */
+	KBASE_ATOM_GPU_RB_READY,
+	/* Atom is in slot ringbuffer and has been submitted to the GPU */
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	/* Atom must be returned to JS as soon as it reaches the head of the
+	 * ringbuffer due to a previous failure */
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS
+};
+
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+	u64 time_spent_us; /**< Total time spent on the GPU in microseconds */
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	enum kbase_atom_coreref_state coreref_state;
+#ifdef CONFIG_KDS
+	struct list_head node;
+	struct kds_resource_set *kds_rset;
+	bool kds_dep_satisfied;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_SYNC
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;	    /**< core requirements */
+	/** Job Slot to retry submitting to if submission from IRQ handler failed
+	 *
+	 * NOTE: see if this can be unified into the another member e.g. the event */
+	int retry_submit_on_slot;
+
+	union kbasep_js_policy_job_info sched_info;
+	/* JS atom priority with respect to other atoms on its kctx. */
+	int sched_priority;
+
+	int poking;		/* BASE_HW_ISSUE_8316 */
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+	int slot_nr;
+
+	u32 atom_flags;
+
+	/* Number of times this atom has been retried. Used by replay soft job.
+	 */
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	u64 need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	/* Pointer to atom that this atom has cross-slot dependency on */
+	struct kbase_jd_atom *x_pre_dep;
+	/* Pointer to atom that has cross-slot dependency on this atom */
+	struct kbase_jd_atom *x_post_dep;
+
+	/* The GPU's flush count recorded at the time of submission, used for
+	 * the cache flush optimisation */
+	u32 flush_id;
+
+	struct kbase_jd_atom_backend backend;
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	struct list_head queue;
+
+	struct kbase_va_region *jit_addr_reg;
+
+	/* If non-zero, this indicates that the atom will fail with the set
+	 * event_code when the atom is processed. */
+	enum base_jd_event_code will_fail_event_code;
+};
+
+static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_SECURE);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	/** Tracks all job-dispatch jobs.  This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	u32 job_nr;
+
+	/** Waitq that reflects whether there are no jobs (including SW-only
+	 * dependency jobs). This is set when no jobs are present on the ctx,
+	 * and clear when there are jobs.
+	 *
+	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+	 * and SW-only dependency jobs.
+	 *
+	 * This waitq can be waited upon to find out when the context jobs are all
+	 * done/cancelled (including those that might've been blocked on
+	 * dependencies) - and so, whether it can be terminated. However, it should
+	 * only be terminated once it is neither present in the policy-queue (see
+	 * kbasep_js_policy_try_evict_ctx() ) nor the run-pool (see
+	 * kbasep_js_kctx_info::ctx::is_scheduled).
+	 *
+	 * Since the waitq is only set under kbase_jd_context::lock,
+	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+	 * guarentee that the setter has completed its work on the kbase_context
+	 *
+	 * This must be updated atomically with:
+	 * - kbase_jd_context::job_nr */
+	wait_queue_head_t zero_jobs_wait;
+
+	/** Job Done workqueue. */
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_KDS
+	struct kds_callback kds_cb;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+	int number;
+
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	u32 fault_status;
+	u64 fault_addr;
+	u64 fault_extra_addr;
+	struct mutex transaction_mutex;
+
+	struct kbase_mmu_setup current_setup;
+
+	/* BASE_HW_ISSUE_8316  */
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
+	int poke_refcount;
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold kbasep_js_device_data::runpool_irq::lock when
+	 * accessing this */
+	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - true indicates that the transition is ongoing
+	 * Expected to be protected by pm.power_change_lock */
+	bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Lock protecting the power state of the device.
+	 *
+	 * This lock must be held when accessing the shader_available_bitmap,
+	 * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and
+	 * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition
+	 * and shader_poweroff_pending fields of kbase_pm_device_data. It is
+	 * also held when the hardware power registers are being written to, to
+	 * ensure that two threads do not conflict over the power transitions
+	 * that the hardware should make.
+	 */
+	spinlock_t power_change_lock;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_secure_ops - Platform specific functions for GPU secure mode
+ * operations
+ * @secure_mode_enable:  Callback to enable secure mode on the GPU
+ * @secure_mode_disable: Callback to disable secure mode on the GPU
+ */
+struct kbase_secure_ops {
+	/**
+	 * secure_mode_enable() - Enable secure mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*secure_mode_enable)(struct kbase_device *kbdev);
+
+	/**
+	 * secure_mode_disable() - Disable secure mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*secure_mode_disable)(struct kbase_device *kbdev);
+};
+
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:     Kbase device where memory is used
+ * @cur_size:  Number of free pages currently in the pool (may exceed @max_size
+ *             in some corner cases)
+ * @max_size:  Maximum number of free pages in the pool
+ * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
+ *             and @page_list
+ * @page_list: List of free pages in the pool
+ * @reclaim:   Shrinker for kernel reclaim of free pages
+ * @next_pool: Pointer to next pool where pages can be allocated when this pool
+ *             is empty. Pages will spill over to the next pool when this pool
+ *             is full. Can be NULL if there is no next pool.
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+};
+
+
+#define DEVNAME_SIZE	16
+
+struct kbase_device {
+	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+	struct clk *clock;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif				/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool mem_pool;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	/** List of SW workarounds for HW issues */
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	/** List of features available */
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	/* Bitmaps of cores that are currently in use (running jobs).
+	 * These should be kept up to date by the job scheduler.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 *
+	 * kbase_pm_check_transitions_nolock() should be called when bits are
+	 * cleared to update the power management system and allow transitions to
+	 * occur. */
+	u64 shader_inuse_bitmap;
+
+	/* Refcount for cores in use */
+	u32 shader_inuse_cnt[64];
+
+	/* Bitmaps of cores the JS needs for jobs ready to run */
+	u64 shader_needed_bitmap;
+
+	/* Refcount for cores needed */
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	/* struct for keeping track of the disjoint information
+	 *
+	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
+	 * The count is the number of disjoint events that have occurred on the GPU
+	 */
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+	u32 l2_users_count;
+
+	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+	 * submitting new jobs to any cores that are not marked as available.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 */
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
+	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+
+	/* Structure used for instrumentation and HW counters dumping */
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_context *suspended_kctx;
+		struct kbase_uk_hwcnt_setup suspended_state;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	/*value to be written to the irq_throttle register each time an irq is served */
+	atomic_t irq_throttle_cycles;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	/* This is used to override the current job scheduler values for
+	 * JS_SCHEDULING_PERIOD_NS
+	 * JS_SOFT_STOP_TICKS
+	 * JS_SOFT_STOP_TICKS_CL
+	 * JS_HARD_STOP_TICKS_SS
+	 * JS_HARD_STOP_TICKS_CL
+	 * JS_HARD_STOP_TICKS_DUMPING
+	 * JS_RESET_TICKS_SS
+	 * JS_RESET_TICKS_CL
+	 * JS_RESET_TICKS_DUMPING.
+	 *
+	 * These values are set via the js_timeouts sysfs file.
+	 */
+	u32 js_scheduling_period_ns;
+	int js_soft_stop_ticks;
+	int js_soft_stop_ticks_cl;
+	int js_hard_stop_ticks_ss;
+	int js_hard_stop_ticks_cl;
+	int js_hard_stop_ticks_dumping;
+	int js_reset_ticks_ss;
+	int js_reset_ticks_cl;
+	int js_reset_ticks_dumping;
+	bool js_timeouts_updated;
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+	void *platform_context;
+
+	/* List of kbase_contexts created */
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct devfreq_cooling_device *devfreq_cooling;
+#endif
+#endif
+
+	struct kbase_ipa_context *ipa_ctx;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+	/*
+	 * Control for enabling job dump on failure, set when control debugfs
+	 * is opened.
+	 */
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	/* directory for debugfs entries */
+	struct dentry *mali_debugfs_directory;
+	/* Root directory for per context entry */
+	struct dentry *debugfs_ctx_directory;
+
+	/* failed job dump, used for separate debug process */
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	struct kbase_context *kctx_fault;
+
+#if !MALI_CUSTOMER_RELEASE
+	/* Per-device data for register dumping interface */
+	struct {
+		u16 reg_offset; /* Offset of a GPU_CONTROL register to be
+				   dumped upon request */
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	/* fbdump profiling controls set by gator */
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	/* Number of jobs that are run before a job is forced to fail and
+	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+	 * failures. */
+	int force_replay_limit;
+	/* Count of jobs between forced failures. Incremented on each job. A
+	 * job is forced to fail once this is greater than or equal to
+	 * force_replay_limit. */
+	int force_replay_count;
+	/* Core requirement for jobs to be failed and replayed. May be zero. */
+	base_jd_core_req force_replay_core_req;
+	/* true if force_replay_limit should be randomized. The random
+	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+	 */
+	bool force_replay_random;
+#endif
+
+	/* Total number of created contexts */
+	atomic_t ctx_num;
+
+	struct kbase_hwaccess_data hwaccess;
+
+	/* Count of page/bus faults waiting for workqueues to process */
+	atomic_t faults_pending;
+
+	/* true if GPU is powered off or power off operation is in progress */
+	bool poweroff_pending;
+
+
+	/* defaults for new context created for this device */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	size_t mem_pool_max_size_default;
+
+	/* system coherency mode  */
+	u32 system_coherency;
+	/* Flag to track when cci snoops have been enabled on the interface */
+	bool cci_snoop_enabled;
+
+	/* SMC function IDs to call into Trusted firmware to enable/disable
+	 * cache snooping. Value of 0 indicates that they are not used
+	 */
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	/* Secure operations */
+	struct kbase_secure_ops *secure_ops;
+
+	/*
+	 * true when GPU is put into secure mode
+	 */
+	bool secure_mode;
+
+	/*
+	 * true if secure mode is supported
+	 */
+	bool secure_mode_support;
+
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_queue_head_t driver_inactive_wait;
+	bool driver_inactive;
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	/*
+	 * Bus logger integration.
+	 */
+	struct bus_logger_client *buslogger;
+#endif
+	/* Boolean indicating if an IRQ flush during reset is in progress. */
+	bool irq_reset_flush;
+
+	/* list of inited sub systems. Used during terminate/error recovery */
+	u32 inited_subsys;
+};
+
+/* JSCTX ringbuffer size will always be a power of 2. The idx shift must be:
+   - >=2 (buffer size -> 4)
+   - <= 9 (buffer size 2^(9-1)=256) (technically, 10 works for the ringbuffer
+				but this is unnecessary as max atoms is 256)
+ */
+#define JSCTX_RB_IDX_SHIFT (8U)
+#if ((JSCTX_RB_IDX_SHIFT < 2) || ((3 * JSCTX_RB_IDX_SHIFT) >= 32))
+#error "Invalid ring buffer size for 32bit atomic."
+#endif
+#define JSCTX_RB_SIZE (1U << (JSCTX_RB_IDX_SHIFT - 1U)) /* 1 bit for overflow */
+#define JSCTX_RB_SIZE_STORE (1U << JSCTX_RB_IDX_SHIFT)
+#define JSCTX_RB_MASK (JSCTX_RB_SIZE - 1U)
+#define JSCTX_RB_MASK_STORE (JSCTX_RB_SIZE_STORE - 1U)
+
+#define JSCTX_WR_OFFSET         (0U)
+#define JSCTX_RN_OFFSET         (JSCTX_WR_OFFSET   + JSCTX_RB_IDX_SHIFT)
+#define JSCTX_RD_OFFSET         (JSCTX_RN_OFFSET + JSCTX_RB_IDX_SHIFT)
+
+/**
+ * struct jsctx_rb_entry - Ringbuffer entry in &struct jsctx_queue.
+ * @atom_id: Atom ID
+ */
+struct jsctx_rb_entry {
+	u16 atom_id;
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue, containing both ring buffer and linked list.
+ * @entries:     Array of size %JSCTX_RB_SIZE which holds the &struct
+ *               kbase_jd_atom pointers which make up the contents of the ring
+ *               buffer.
+ * @indicies:    An atomic variable containing indicies for the ring buffer.
+ *               Indicies are of size JSCTX_RB_IDX_SHIFT.
+ *               The following are contained:
+ *                - WR_IDX - Write index. Index of the NEXT slot to be written.
+ *                - RN_IDX - Running index. Index of the tail of the list.
+ *                           This is the atom that has been running the longest.
+ *                - RD_IDX - Read index. Index of the next atom to be pulled.
+ * @queue_head:  Head item of the linked list queue.
+ *
+ * Locking:
+ * The linked list assumes jctx.lock is held.
+ * The ringbuffer serves as an intermediary between irq context and non-irq
+ * context, without the need for the two to share any lock. irq context can
+ * pull (and unpull) and only requires the runpool_irq.lock. While non-irq
+ * context can add and remove and only requires holding only jctx.lock.
+ * Error handling affecting both, or the whole ringbuffer in general, must
+ * hold both locks or otherwise ensure (f.ex deschedule/kill) only that thread
+ * is accessing the buffer.
+ * This means that RD_IDX is updated by irq-context (pull and unpull) and must
+ * hold runpool_irq.lock. While WR_IDX (add) and RN_IDX (remove) is updated by
+ * non-irq context and must hold jctx.lock.
+ * Note that pull (or sister function peek) must also access WR_IDX to ensure
+ * there is free space in the buffer, this is ok as WR_IDX is only increased.
+ * A similar situation is apparent with unpull and RN_IDX, but only one atom
+ * (already pulled) can cause either remove or unpull, so this will never
+ * conflict.
+ *
+ * &struct jsctx_queue is a queue of &struct kbase_jd_atom,
+ * part ringbuffer and part linked list.
+ */
+struct jsctx_queue {
+	struct jsctx_rb_entry entries[JSCTX_RB_SIZE];
+
+	atomic_t indicies;
+
+	struct list_head queue_head;
+};
+
+
+
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	int id; /* System wide unique id */
+	unsigned long api_version;
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	bool is_compat;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	u64 *mmu_teardown_pages;
+
+	struct page *aliasing_sink_page;
+
+	struct mutex            mmu_lock;
+	struct mutex            reg_lock; /* To be converted to a rwlock? */
+	struct rb_root          reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_pool mem_pool;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+	struct mutex            evict_lock;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_KDS
+	struct list_head waiting_kds_resource;
+#endif
+	/** This is effectively part of the Run Pool, because it only has a valid
+	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+	 *
+	 * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing
+	 * this.
+	 *
+	 * If the context relating to this as_nr is required, you must use
+	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+	 * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock
+	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
+	 * you can take whilst doing this) */
+	int as_nr;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct *process_mm;
+	/* End of the SAME_VA zone */
+	u64 same_va_end;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	/* Content of mem_profile file */
+	char *mem_profile_data;
+	/* Size of @c mem_profile_data */
+	size_t mem_profile_size;
+	/* Mutex guarding memory profile state */
+	struct mutex mem_profile_lock;
+	/* Memory profile file created */
+	bool mem_profile_initialized;
+	struct dentry *kctx_dentry;
+
+	/* for job fault debug */
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	/* This list will keep the following atoms during the dump
+	 * in the same context
+	 */
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	/* Number of atoms currently pulled from this context */
+	atomic_t atoms_pulled;
+	/* Number of atoms currently pulled from this context, per slot */
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	/* true if last kick() caused atoms to be pulled from this context */
+	bool pulled;
+	/* true if infinite cache is to be enabled for new allocations. Existing
+	 * allocations will not change. bool stored as a u32 per Linux API */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active;
+#else
+	u32 infinite_cache_active;
+#endif
+	/* Bitmask of slots that can be pulled from */
+	u32 slots_pullable;
+
+	/* true if address space assignment is pending */
+	bool as_pending;
+
+	/* Backend specific data */
+	struct kbase_context_backend backend;
+
+	/* Work structure used for deferred ASID assignment */
+	struct work_struct work;
+
+	/* Only one userspace vinstr client per kbase context */
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+
+	/* Must hold queue_mutex when accessing */
+	bool ctx_active;
+
+	/* List of completed jobs waiting for events to be posted */
+	struct list_head completed_jobs;
+	/* Number of work items currently pending on job_done_wq */
+	atomic_t work_count;
+
+	/* true if context is counted in kbdev->js_data.nr_contexts_runnable */
+	bool ctx_runnable_ref;
+
+	/* Waiting soft-jobs will fail when this timer expires */
+	struct hrtimer soft_event_timeout;
+
+	/* JIT allocation management */
+	struct kbase_va_region *jit_alloc[255];
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_lock;
+	struct work_struct jit_work;
+
+	/* External sticky resource management */
+	struct list_head ext_res_meta_head;
+};
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ * @refcount:                      Refcount to keep track of the number of
+ *                                 active mappings.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context refcount is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+	u64 refcount;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100755
index 0000000..c55779c
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,696 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	mutex_init(&kbdev->as[i].transaction_mutex);
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work));
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+#else
+	kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+#ifdef CONFIG_MALI_DEBUG
+	init_waitqueue_head(&kbdev->driver_inactive_wait);
+#endif /* CONFIG_MALI_DEBUG */
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
+{
+	u32 ret_value = 0;
+
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		ret_value = kbdev->kbase_profiling_controls[control];
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+
+	return ret_value;
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100755
index 0000000..f70bccc
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100755
index 0000000..bf8c304
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	kbase_tlstream_tl_nret_atom_ctx(katom, kctx);
+	kbase_tlstream_tl_del_atom(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100755
index 0000000..ce65b55
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100755
index 0000000..0615641
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,322 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+#include "mali_kbase_instr.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device  *kbdev;
+	struct kbase_context *kctx;
+	u64 hwcnt_gpu_va;
+	void *hwcnt_cpu_va;
+	struct kbase_vmap_struct hwcnt_map;
+};
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	uint32_t gpu_id;
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	gpu_id = kbdev->gpu_props.props.core_props.product_id;
+
+	switch (gpu_id) {
+	/* If we are using a Mali-T60x device */
+	case GPU_ID_PI_T60X:
+		hardware_counters = hardware_counters_mali_t60x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t60x);
+		break;
+	/* If we are using a Mali-T62x device */
+	case GPU_ID_PI_T62X:
+		hardware_counters = hardware_counters_mali_t62x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t62x);
+		break;
+	/* If we are using a Mali-T72x device */
+	case GPU_ID_PI_T72X:
+		hardware_counters = hardware_counters_mali_t72x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t72x);
+		break;
+	/* If we are using a Mali-T76x device */
+	case GPU_ID_PI_T76X:
+		hardware_counters = hardware_counters_mali_t76x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t76x);
+		break;
+	/* If we are using a Mali-T82x device */
+	case GPU_ID_PI_T82X:
+		hardware_counters = hardware_counters_mali_t82x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t82x);
+		break;
+	/* If we are using a Mali-T83x device */
+	case GPU_ID_PI_T83X:
+		hardware_counters = hardware_counters_mali_t83x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t83x);
+		break;
+	/* If we are using a Mali-T86x device */
+	case GPU_ID_PI_T86X:
+		hardware_counters = hardware_counters_mali_t86x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t86x);
+		break;
+	/* If we are using a Mali-T88x device */
+	case GPU_ID_PI_TFRX:
+		hardware_counters = hardware_counters_mali_t88x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t88x);
+		 break;
+	default:
+		hardware_counters = NULL;
+		*total_counters = 0;
+		dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", gpu_id);
+		break;
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_setup setup;
+	int err;
+	uint32_t dump_size = 0, i = 0;
+	struct kbase_va_region *reg;
+	u64 flags;
+	u64 nr_pages;
+	u16 va_alignment = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	/* Create a kbase_context */
+	hand->kctx = kbase_create_context(hand->kbdev, true);
+	if (!hand->kctx)
+		goto release_device;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto destroy_context;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto destroy_context;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+
+	in_out_info->size = dump_size;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR;
+	nr_pages = PFN_UP(dump_size);
+	reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0,
+			&flags, &hand->hwcnt_gpu_va, &va_alignment);
+	if (!reg)
+		goto free_layout;
+
+	hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va,
+			dump_size, &hand->hwcnt_map);
+
+	if (!hand->hwcnt_cpu_va)
+		goto free_buffer;
+
+	in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va;
+	memset(in_out_info->kernel_dump_buffer, 0, nr_pages * PAGE_SIZE);
+
+	/*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/
+	setup.dump_buffer = hand->hwcnt_gpu_va;
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+
+	err = kbase_instr_hwcnt_enable(hand->kctx, &setup);
+	if (err)
+		goto free_unmap;
+
+	kbase_instr_hwcnt_clear(hand->kctx);
+
+	return hand;
+
+free_unmap:
+	kbase_vunmap(hand->kctx, &hand->hwcnt_map);
+
+free_buffer:
+	kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va);
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+destroy_context:
+	kbase_destroy_context(hand->kctx);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		kbase_instr_hwcnt_disable(opaque_handles->kctx);
+		kbase_vunmap(opaque_handles->kctx, &opaque_handles->hwcnt_map);
+		kbase_mem_free(opaque_handles->kctx, opaque_handles->hwcnt_gpu_va);
+		kbase_destroy_context(opaque_handles->kctx);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+	bool ret_res, success_res;
+
+	if (opaque_handles && success) {
+		ret_res = kbase_instr_hwcnt_dump_complete(opaque_handles->kctx,
+				&success_res);
+		*success = (uint32_t)success_res;
+		return (uint32_t)(ret_res != 0);
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		return (kbase_instr_hwcnt_request_dump(
+				opaque_handles->kctx) == 0);
+
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100755
index 0000000..ef9ac0f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100755
index 0000000..eb76f01
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2159 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MISS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MISS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MISS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MISS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MISS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MISS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MISS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100644
index 0000000..a962ecb
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100755
index 0000000..82f4c36
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_gpu_memory_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100755
index 0000000..3cf30a4
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_H
+#define _KBASE_GPU_MEMORY_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_H*/
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100755
index 0000000..7f77dba
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,314 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
+{
+	kbase_gpu_clk_speed_func get_gpu_speed_mhz;
+	u32 gpu_speed_mhz;
+	int rc = 1;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kbase_props);
+
+	/* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
+	 * If that function fails, or the function is not provided by the system integrator, we report the maximum
+	 * GPU speed as specified by GPU_FREQ_KHZ_MAX.
+	 */
+	get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
+	if (get_gpu_speed_mhz != NULL) {
+		rc = get_gpu_speed_mhz(&gpu_speed_mhz);
+#ifdef CONFIG_MALI_DEBUG
+		/* Issue a warning message when the reported GPU speed falls outside the min/max range */
+		if (rc == 0) {
+			u32 gpu_speed_khz = gpu_speed_mhz * 1000;
+
+			if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
+					gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
+				dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
+						(unsigned long)gpu_speed_khz,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+		}
+#endif				/* CONFIG_MALI_DEBUG */
+	}
+	if (kctx->kbdev->clock) {
+		gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
+		rc = 0;
+	}
+	if (rc != 0)
+		gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
+
+	kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
+
+	memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
+
+	/* Before API 8.2 they expect L3 cache info here, which was always 0 */
+	if (kctx->api_version < KBASE_API_VERSION(8, 2))
+		kbase_props->props.raw_props.suspend_size = 0;
+
+	return 0;
+}
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100755
index 0000000..f3c95cc
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * @brief Provide GPU properties to userside through UKU call.
+ *
+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
+ *
+ * @param kctx		The struct kbase_context structure
+ * @param kbase_props	A copy of the struct kbase_uk_gpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100755
index 0000000..781375a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/**
+	 * Implementation specific irq throttle value (us), should be adjusted during integration.
+	 */
+	int irq_throttle_time_us;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100755
index 0000000..de2461f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,263 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id) {
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1):
+				issues = base_hw_issues_tMIx_r0p0_05dev0;
+				break;
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2):
+				issues = base_hw_issues_tMIx_r0p0;
+				break;
+			default:
+				if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_TMIX) {
+					issues = base_hw_issues_tMIx_r0p0;
+				} else {
+					dev_err(kbdev->dev,
+						"Unknown GPU ID %x", gpu_id);
+					return -EINVAL;
+				}
+			}
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT);
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100755
index 0000000..fce7d29
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * @brief Set the HW issues mask depending on the GPU ID
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100755
index 0000000..b09be99
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100755
index 0000000..261453e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/* The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx;
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100755
index 0000000..cf8a813
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100755
index 0000000..5de2b75
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @setup:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100755
index 0000000..2efa293
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,337 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_find_free_address_space() - Find a free address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * If no address spaces are currently free, then this function can evict an
+ * idle context from the runpool, freeing up the address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_gpu_release_free_address_space()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_free_address_space() - Release an address space.
+ * @kbdev:	Device pointer
+ * @as_nr:	Address space to release
+ *
+ * The address space must have been returned by
+ * kbase_gpu_find_free_address_space().
+ */
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+						int as_nr);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned. If kctx->as_pending
+ *         is true then ASID assignment will complete at some point in the
+ *         future and will re-start scheduling, otherwise no ASIDs are available
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold as->transaction_mutex and runpool_irq.lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold as->transaction_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ * @affinity:      Affinity of atom
+ * @coreref_state: Coreref state of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ *				  @js
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100755
index 0000000..bae7c0d
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+int set_policy_by_name(struct kbase_device *kbdev, const char *name);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100755
index 0000000..89d26ea
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100755
index 0000000..cf7bf1b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c
new file mode 100755
index 0000000..fda317b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+
+void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(!kbdev->hwcnt.suspended_kctx);
+
+	kctx = kbdev->hwcnt.kctx;
+	kbdev->hwcnt.suspended_kctx = kctx;
+
+	/* Relevant state was saved into hwcnt.suspended_state when enabling the
+	 * counters */
+
+	if (kctx) {
+		KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED);
+		kbase_instr_hwcnt_disable(kctx);
+	}
+}
+
+void kbase_instr_hwcnt_resume(struct kbase_device *kbdev)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	kctx = kbdev->hwcnt.suspended_kctx;
+	kbdev->hwcnt.suspended_kctx = NULL;
+
+	if (kctx) {
+		int err;
+
+		err = kbase_instr_hwcnt_enable_internal(kbdev, kctx,
+						&kbdev->hwcnt.suspended_state);
+		WARN(err, "Failed to restore instrumented hardware counters on resume\n");
+	}
+}
+
+int kbase_instr_hwcnt_enable(struct kbase_context *kctx,
+		struct kbase_uk_hwcnt_setup *setup)
+{
+	struct kbase_device *kbdev;
+	int err;
+
+	kbdev = kctx->kbdev;
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, setup);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_enable);
+
+int kbase_instr_hwcnt_disable(struct kbase_context *kctx)
+{
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err)
+		goto out;
+
+	/* Release the context. This had its own Power Manager Active reference
+	 */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_disable);
+
+int kbase_instr_hwcnt_dump(struct kbase_context *kctx)
+{
+	int err;
+
+	err = kbase_instr_hwcnt_request_dump(kctx);
+	if (err)
+		return err;
+
+	err = kbase_instr_hwcnt_wait_for_dump(kctx);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump);
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.h
new file mode 100755
index 0000000..ac3355e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.h
@@ -0,0 +1,75 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Instrumentation API definitions
+ */
+
+#ifndef _KBASE_INSTR_H_
+#define _KBASE_INSTR_H_
+
+#include <mali_kbase_hwaccess_instr.h>
+
+/**
+ * kbase_instr_hwcnt_enable() - Enable HW counters collection
+ * @kctx:	Kbase context
+ * @setup:	&struct kbase_uk_hwcnt_setup containing configuration
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable(struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable() - Disable HW counters collection
+ * @kctx:	Kbase context
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump() - Trigger dump of HW counters and wait for
+ *                            completion
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_suspend() - GPU is suspending, stop HW counter collection
+ * @kbdev:	Kbase device
+ *
+ * It's assumed that there's only one privileged context.
+ *
+ * Safe to do this without lock when doing an OS suspend, because it only
+ * changes in response to user-space IOCTLs
+ */
+void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_resume() - GPU is resuming, resume HW counter collection
+ * @kbdev:	Kbase device
+ */
+void kbase_instr_hwcnt_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
new file mode 100755
index 0000000..c579d0a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
@@ -0,0 +1,431 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/of.h>
+#include <linux/sysfs.h>
+
+#include <mali_kbase.h>
+
+#define NR_IPA_GROUPS 8
+
+struct kbase_ipa_context;
+
+/**
+ * struct ipa_group - represents a single IPA group
+ * @name:               name of the IPA group
+ * @capacitance:        capacitance constant for IPA group
+ * @calc_power:         function to calculate power for IPA group
+ */
+struct ipa_group {
+	const char *name;
+	u32 capacitance;
+	u32 (*calc_power)(struct kbase_ipa_context *,
+			struct ipa_group *);
+};
+
+#include <mali_kbase_ipa_tables.h>
+
+/**
+ * struct kbase_ipa_context - IPA context per device
+ * @kbdev:              pointer to kbase device
+ * @groups:             array of IPA groups for this context
+ * @vinstr_cli:         vinstr client handle
+ * @vinstr_buffer:      buffer to dump hardware counters onto
+ * @ipa_lock:           protects the entire IPA context
+ */
+struct kbase_ipa_context {
+	struct kbase_device *kbdev;
+	struct ipa_group groups[NR_IPA_GROUPS];
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct mutex ipa_lock;
+};
+
+static ssize_t show_ipa_group(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_ipa_context *ctx = kbdev->ipa_ctx;
+	ssize_t count = -EINVAL;
+	size_t i;
+
+	mutex_lock(&ctx->ipa_lock);
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		if (!strcmp(ctx->groups[i].name, attr->attr.name)) {
+			count = snprintf(buf, PAGE_SIZE, "%lu\n",
+				(unsigned long)ctx->groups[i].capacitance);
+			break;
+		}
+	}
+	mutex_unlock(&ctx->ipa_lock);
+	return count;
+}
+
+static ssize_t set_ipa_group(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_ipa_context *ctx = kbdev->ipa_ctx;
+	unsigned long capacitance;
+	size_t i;
+	int err;
+
+	err = kstrtoul(buf, 0, &capacitance);
+	if (err < 0)
+		return err;
+	if (capacitance > U32_MAX)
+		return -ERANGE;
+
+	mutex_lock(&ctx->ipa_lock);
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		if (!strcmp(ctx->groups[i].name, attr->attr.name)) {
+			ctx->groups[i].capacitance = capacitance;
+			mutex_unlock(&ctx->ipa_lock);
+			return count;
+		}
+	}
+	mutex_unlock(&ctx->ipa_lock);
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(group0, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group1, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group2, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group3, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group4, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group5, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group6, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group7, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+
+static struct attribute *kbase_ipa_attrs[] = {
+	&dev_attr_group0.attr,
+	&dev_attr_group1.attr,
+	&dev_attr_group2.attr,
+	&dev_attr_group3.attr,
+	&dev_attr_group4.attr,
+	&dev_attr_group5.attr,
+	&dev_attr_group6.attr,
+	&dev_attr_group7.attr,
+	NULL,
+};
+
+static struct attribute_group kbase_ipa_attr_group = {
+	.name = "ipa",
+	.attrs = kbase_ipa_attrs,
+};
+
+static void init_ipa_groups(struct kbase_ipa_context *ctx)
+{
+	memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups));
+}
+
+#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	struct device_node *np, *child;
+	struct ipa_group *group;
+	size_t nr_groups;
+	size_t i;
+	int err;
+
+	np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups");
+	if (!np)
+		return 0;
+
+	nr_groups = 0;
+	for_each_available_child_of_node(np, child)
+		nr_groups++;
+	if (!nr_groups || nr_groups > ARRAY_SIZE(ctx->groups)) {
+		dev_err(kbdev->dev, "invalid number of IPA groups: %zu", nr_groups);
+		err = -EINVAL;
+		goto err0;
+	}
+
+	for_each_available_child_of_node(np, child) {
+		const char *name;
+		u32 capacitance;
+
+		name = of_get_property(child, "label", NULL);
+		if (!name) {
+			dev_err(kbdev->dev, "label missing for IPA group");
+			err = -EINVAL;
+			goto err0;
+		}
+		err = of_property_read_u32(child, "capacitance",
+				&capacitance);
+		if (err < 0) {
+			dev_err(kbdev->dev, "capacitance missing for IPA group");
+			goto err0;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+			group = &ctx->groups[i];
+			if (!strcmp(group->name, name)) {
+				group->capacitance = capacitance;
+				break;
+			}
+		}
+	}
+
+	of_node_put(np);
+	return 0;
+err0:
+	of_node_put(np);
+	return err;
+}
+#else
+static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
+{
+	return 0;
+}
+#endif
+
+static int reset_ipa_groups(struct kbase_ipa_context *ctx)
+{
+	init_ipa_groups(ctx);
+	return update_ipa_groups_from_dt(ctx);
+}
+
+static inline u32 read_hwcnt(struct kbase_ipa_context *ctx,
+	u32 offset)
+{
+	u8 *p = ctx->vinstr_buffer;
+
+	return *(u32 *)&p[offset];
+}
+
+static inline u32 add_saturate(u32 a, u32 b)
+{
+	if (U32_MAX - a < b)
+		return U32_MAX;
+	return a + b;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c'
+ * across all shader cores.
+ */
+static u32 calc_power_sc_single(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	u64 core_mask;
+	u32 base = 0, r = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			u64 n = read_hwcnt(ctx, base + c);
+			u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+			u32 s = group->capacitance;
+
+			r = add_saturate(r, div_u64(n * s, d));
+		}
+		base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+		core_mask >>= 1;
+	}
+	return r;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c1'
+ * and `c2' across all shader cores.
+ */
+static u32 calc_power_sc_double(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c1, u32 c2)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	u64 core_mask;
+	u32 base = 0, r = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			u64 n = read_hwcnt(ctx, base + c1);
+			u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+			u32 s = group->capacitance;
+
+			r = add_saturate(r, div_u64(n * s, d));
+			n = read_hwcnt(ctx, base + c2);
+			r = add_saturate(r, div_u64(n * s, d));
+		}
+		base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+		core_mask >>= 1;
+	}
+	return r;
+}
+
+static u32 calc_power_single(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c)
+{
+	u64 n = read_hwcnt(ctx, c);
+	u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+	u32 s = group->capacitance;
+
+	return div_u64(n * s, d);
+}
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_single(ctx, group, L2_ANY_LOOKUP);
+}
+
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_single(ctx, group, TILER_ACTIVE);
+}
+
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, FRAG_ACTIVE);
+}
+
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_double(ctx, group, VARY_SLOT_32,
+			VARY_SLOT_16);
+}
+
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, TEX_COORD_ISSUE);
+}
+
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, EXEC_INSTR_COUNT);
+}
+
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_double(ctx, group, BEATS_RD_LSC,
+			BEATS_WR_LSC);
+}
+
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, EXEC_CORE_ACTIVE);
+}
+
+static int attach_vinstr(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	size_t dump_size;
+
+	dump_size = kbase_vinstr_dump_size(kbdev);
+	ctx->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!ctx->vinstr_buffer) {
+		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+		return -1;
+	}
+
+	setup.jm_bm = ~0u;
+	setup.shader_bm = ~0u;
+	setup.tiler_bm = ~0u;
+	setup.mmu_l2_bm = ~0u;
+	ctx->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
+			&setup, ctx->vinstr_buffer);
+	if (!ctx->vinstr_cli) {
+		dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
+		kfree(ctx->vinstr_buffer);
+		ctx->vinstr_buffer = NULL;
+		return -1;
+	}
+	return 0;
+}
+
+static void detach_vinstr(struct kbase_ipa_context *ctx)
+{
+	if (ctx->vinstr_cli)
+		kbase_vinstr_detach_client(ctx->vinstr_cli);
+	ctx->vinstr_cli = NULL;
+	kfree(ctx->vinstr_buffer);
+	ctx->vinstr_buffer = NULL;
+}
+
+struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_context *ctx;
+	int err;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	mutex_init(&ctx->ipa_lock);
+	ctx->kbdev = kbdev;
+
+	err = reset_ipa_groups(ctx);
+	if (err < 0)
+		goto err0;
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
+	if (err < 0)
+		goto err0;
+
+	return ctx;
+err0:
+	kfree(ctx);
+	return NULL;
+}
+
+void kbase_ipa_term(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+
+	detach_vinstr(ctx);
+	sysfs_remove_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
+	kfree(ctx);
+}
+
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err)
+{
+	struct ipa_group *group;
+	u32 power = 0;
+	size_t i;
+
+	mutex_lock(&ctx->ipa_lock);
+	if (!ctx->vinstr_cli) {
+		*err = attach_vinstr(ctx);
+		if (*err < 0)
+			goto err0;
+	}
+	*err = kbase_vinstr_hwc_dump(ctx->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL);
+	if (*err)
+		goto err0;
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		group = &ctx->groups[i];
+		power = add_saturate(power, group->calc_power(ctx, group));
+	}
+err0:
+	mutex_unlock(&ctx->ipa_lock);
+	return power;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_dynamic_power);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
new file mode 100755
index 0000000..e2234d1
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
@@ -0,0 +1,41 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+struct kbase_ipa_context;
+
+/**
+ * kbase_ipa_init - initialize the kbase ipa core
+ * @kbdev:      kbase device
+ *
+ * Return:      pointer to the IPA context or NULL on failure
+ */
+struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_term - terminate the kbase ipa core
+ * @ctx:        pointer to the IPA context
+ */
+void kbase_ipa_term(struct kbase_ipa_context *ctx);
+
+/**
+ * kbase_ipa_dynamic_power - calculate power
+ * @ctx:        pointer to the IPA context
+ * @err:        0 on success, negative on failure
+ *
+ * Return:      returns power consumption as mw @ 1GHz @ 1V
+ */
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
new file mode 100644
index 0000000..101abfe
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
@@ -0,0 +1,104 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#define NR_BYTES_PER_CNT  4
+#define NR_CNT_PER_BLOCK 64
+
+#define JM_BASE    (0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define TILER_BASE (1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define MMU_BASE   (2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define SC0_BASE   (3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+
+#define GPU_ACTIVE       (JM_BASE    + NR_BYTES_PER_CNT *  6)
+#define TILER_ACTIVE     (TILER_BASE + NR_BYTES_PER_CNT * 45)
+#define L2_ANY_LOOKUP    (MMU_BASE   + NR_BYTES_PER_CNT * 25)
+#define FRAG_ACTIVE      (SC0_BASE   + NR_BYTES_PER_CNT *  4)
+#define EXEC_CORE_ACTIVE (SC0_BASE   + NR_BYTES_PER_CNT * 26)
+#define EXEC_INSTR_COUNT (SC0_BASE   + NR_BYTES_PER_CNT * 28)
+#define TEX_COORD_ISSUE  (SC0_BASE   + NR_BYTES_PER_CNT * 40)
+#define VARY_SLOT_32     (SC0_BASE   + NR_BYTES_PER_CNT * 50)
+#define VARY_SLOT_16     (SC0_BASE   + NR_BYTES_PER_CNT * 51)
+#define BEATS_RD_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 56)
+#define BEATS_WR_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 61)
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+
+static struct ipa_group ipa_groups_def[] = {
+	/* L2 */
+	{
+		.name = "group0",
+		.capacitance = 687,
+		.calc_power = calc_power_group0,
+	},
+	/* TILER */
+	{
+		.name = "group1",
+		.capacitance = 0,
+		.calc_power = calc_power_group1,
+	},
+	/* FRAG */
+	{
+		.name = "group2",
+		.capacitance = 23,
+		.calc_power = calc_power_group2,
+	},
+	/* VARY */
+	{
+		.name = "group3",
+		.capacitance = 108,
+		.calc_power = calc_power_group3,
+	},
+	/* TEX */
+	{
+		.name = "group4",
+		.capacitance = 128,
+		.calc_power = calc_power_group4,
+	},
+	/* EXEC INSTR */
+	{
+		.name = "group5",
+		.capacitance = 249,
+		.calc_power = calc_power_group5,
+	},
+	/* LSC */
+	{
+		.name = "group6",
+		.capacitance = 0,
+		.calc_power = calc_power_group6,
+	},
+	/* EXEC OVERHEAD */
+	{
+		.name = "group7",
+		.capacitance = 29,
+		.calc_power = calc_power_group7,
+	},
+};
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100755
index 0000000..c091ffe
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1600 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <mali_kbase_uku.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+#include <linux/pagemap.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tlstream.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
+{
+#ifdef CONFIG_COMPAT
+	if (kctx->is_compat)
+		return compat_ptr(p->compat_value);
+#endif
+	return p->value;
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#ifdef CONFIG_KDS
+
+/* Add the katom to the kds waiting list.
+ * Atoms must be added to the waiting list after a successful call to kds_async_waitall.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	list_add_tail(&katom->node, &kctx->waiting_kds_resource);
+}
+
+/* Remove the katom from the kds waiting list.
+ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync.
+ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	list_del(&katom->node);
+}
+
+static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+	struct kbase_device *kbdev;
+
+	katom = (struct kbase_jd_atom *)callback_parameter;
+	KBASE_DEBUG_ASSERT(katom);
+	ctx = &katom->kctx->jctx;
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	mutex_lock(&ctx->lock);
+
+	/* KDS resource has already been satisfied (e.g. due to zapping) */
+	if (katom->kds_dep_satisfied)
+		goto out;
+
+	/* This atom's KDS dependency has now been met */
+	katom->kds_dep_satisfied = true;
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+ out:
+	mutex_unlock(&ctx->lock);
+}
+
+static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 *  there is a race between job completion and cancellation */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+#endif				/* CONFIG_KDS */
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_KDS
+	if (katom->kds_rset) {
+		struct kbase_jd_context *jctx = &katom->kctx->jctx;
+
+		/*
+		 * As the atom is no longer waiting, remove it from
+		 * the waiting list.
+		 */
+
+		mutex_lock(&jctx->lock);
+		kbase_jd_kds_waiters_remove(katom);
+		mutex_unlock(&jctx->lock);
+
+		/* Release the kds resource or cancel if zapping */
+		kds_resource_set_release_sync(&katom->kds_rset);
+	}
+#endif				/* CONFIG_KDS */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_KDS
+	/* Prevent the KDS resource from triggering the atom in case of zapping */
+	if (katom->kds_rset)
+		katom->kds_dep_satisfied = true;
+#endif				/* CONFIG_KDS */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_KDS
+	u32 kds_res_count = 0;
+	struct kds_resource **kds_resources = NULL;
+	unsigned long *kds_access_bitmap = NULL;
+#endif				/* CONFIG_KDS */
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, &user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+#ifdef CONFIG_KDS
+	/* assume we have to wait for all */
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (NULL == kds_resources) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((katom->nr_extres + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL);
+
+	if (NULL == kds_access_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif				/* CONFIG_KDS */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
+
+		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_SECURE;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm
+#ifdef CONFIG_KDS
+				, &kds_res_count, kds_resources,
+				kds_access_bitmap, exclusive
+#endif
+				);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock before we call into kds */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_KDS
+	if (kds_res_count) {
+		int wait_failed;
+
+		/* We have resources to wait for with kds */
+		katom->kds_dep_satisfied = false;
+
+		wait_failed = kds_async_waitall(&katom->kds_rset,
+				&katom->kctx->jctx.kds_cb, katom, NULL,
+				kds_res_count, kds_access_bitmap,
+				kds_resources);
+
+		if (wait_failed)
+			goto failed_kds_setup;
+		else
+			kbase_jd_kds_waiters_add(katom);
+	} else {
+		/* Nothing to wait for, so kds dep met */
+		katom->kds_dep_satisfied = true;
+	}
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_KDS
+ failed_kds_setup:
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif				/* CONFIG_KDS */
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_KDS
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_KDS
+			if (!dep_atom->kds_dep_satisfied) {
+				/* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
+				 * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up
+				 */
+				dep_atom->kds_dep_satisfied = true;
+			}
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
+#ifdef CONFIG_KDS
+			if (dep_atom->kds_dep_satisfied)
+#endif
+				list_add_tail(&dep_atom->dep_item[0], out_list);
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->dep_item[0], &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, dep_item[0]);
+		list_del(completed_jobs.prev);
+
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i);
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, dep_item[0]);
+
+			list_del(runnable_jobs.next);
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kctx->jctx.sched_info.ctx.is_dying) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait then remove it from the list of sync waiters. */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						list_del(&node->dep_item[0]);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				list_add_tail(&node->dep_item[0], &completed_jobs);
+		}
+
+		/* Completing an atom might have freed up space
+		 * in the ringbuffer, but only on that slot. */
+		jsctx_ll_flush_to_rb(kctx,
+				katom->sched_priority,
+				katom->slot_nr);
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->dep_item[0], completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	base_jd_core_req core_req;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+	core_req = user_atom->core_req;
+
+	katom->start_timestamp.tv64 = 0;
+	katom->time_spent_us = 0;
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = 0;
+#ifdef CONFIG_KDS
+	/* Start by assuming that the KDS dependencies are satisfied,
+	 * kbase_jd_pre_external_resources will correct this if there are dependencies */
+	katom->kds_dep_satisfied = true;
+	katom->kds_rset = NULL;
+#endif				/* CONFIG_KDS */
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				kbase_tlstream_tl_new_atom(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				kbase_tlstream_tl_ret_atom_ctx(
+						katom, kctx);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom is going through soft replay or
+			 * will be sent back to user space. Do not record any
+			 * dependencies. */
+			kbase_tlstream_tl_new_atom(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+
+			if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* Create a new atom recording all dependencies it was set up with. */
+	kbase_tlstream_tl_new_atom(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+	for (i = 0; i < 2; i++)
+		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+					&katom->dep[i]))
+			kbase_tlstream_tl_dep_atom_atom(
+					(void *)kbase_jd_katom_dep_atom(
+						&katom->dep[i]),
+					(void *)katom);
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue((u32)kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+#ifdef CONFIG_KDS
+	if (!katom->kds_dep_satisfied) {
+		/* Queue atom due to KDS dependency */
+		ret = false;
+		goto out;
+	}
+#endif				/* CONFIG_KDS */
+
+	if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom, NULL);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom)
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	void __user *user_addr;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the jobs
+	 * are reported by immediately falling them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	if ((uk6_atom && submit_data->stride !=
+			sizeof(struct base_jd_atom_v2_uk6)) ||
+			submit_data->stride != sizeof(base_jd_atom_v2)) {
+#else
+	if (submit_data->stride != sizeof(base_jd_atom_v2)) {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	user_addr = get_compat_pointer(kctx, &submit_data->addr);
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < submit_data->nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		if (uk6_atom) {
+			struct base_jd_atom_v2_uk6 user_atom_v6;
+			base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA};
+
+			if (copy_from_user(&user_atom_v6, user_addr,
+					sizeof(user_atom_v6))) {
+				err = -EINVAL;
+				KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+					atomic_sub_return(
+					submit_data->nr_atoms - i,
+					&kctx->timeline.jd_atoms_in_flight));
+				break;
+			}
+			/* Convert from UK6 atom format to UK7 format */
+			user_atom.jc = user_atom_v6.jc;
+			user_atom.udata = user_atom_v6.udata;
+			user_atom.extres_list = user_atom_v6.extres_list;
+			user_atom.nr_extres = user_atom_v6.nr_extres;
+			user_atom.core_req = user_atom_v6.core_req;
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[0])
+				dep_types[0] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[0],
+					user_atom_v6.pre_dep[0],
+					dep_types[0]);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[1])
+				dep_types[1] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[1],
+					user_atom_v6.pre_dep[1],
+					dep_types[1]);
+
+			user_atom.atom_number = user_atom_v6.atom_number;
+			user_atom.prio = user_atom_v6.prio;
+			user_atom.device_nr = user_atom_v6.device_nr;
+		} else {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		}
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+		user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool schedule = false;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	if (!kbasep_js_has_atom_finished(&katom_retained_state)) {
+		mutex_lock(&js_devdata->runpool_mutex);
+		kbasep_js_clear_job_retry_submit(katom);
+		/* An atom that has been hard-stopped might have previously
+		 * been soft-stopped and has just finished before the hard-stop
+		 * occurred. For this reason, clear the hard-stopped flag */
+		katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED);
+		mutex_unlock(&js_devdata->runpool_mutex);
+	}
+
+	if (kbasep_js_has_atom_finished(&katom_retained_state))
+		schedule = true;
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	schedule |= jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * ctx_active will have been set after we marked it as inactive,
+		 * and another pm reference will have been taken, so drop our
+		 * reference. But do not call kbase_jm_idle_ctx(), as the
+		 * context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but ctx_active will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then ctx_active will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * runpool_irq.lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kctx->ctx_active || !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * runpool_irq.lock. This is not performed if ctx_active
+			 * is set as in that case another pm reference has been
+			 * taken and a fast-start would be valid.
+			 */
+			if (!kctx->ctx_active)
+				kbase_jm_idle_ctx(kbdev, kctx);
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+
+			kbase_pm_context_idle(kbdev);
+		} else {
+			kctx->ctx_active = true;
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+		}
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	if (schedule)
+		kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, dep_item[0]);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	hrtimer_cancel(&kctx->soft_event_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_KDS
+
+	/* For each job waiting on a kds resource, cancel the wait and force the job to
+	 * complete early, this is done so that we don't leave jobs outstanding waiting
+	 * on kds resources which may never be released when contexts are zapped, resulting
+	 * in a hang.
+	 *
+	 * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held,
+	 * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job.
+	 */
+
+	list_for_each(entry, &kctx->waiting_kds_resource) {
+		katom = list_entry(entry, struct kbase_jd_atom, node);
+
+		kbase_cancel_kds_wait_job(katom);
+	}
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+#ifdef CONFIG_KDS
+	int err;
+#endif				/* CONFIG_KDS */
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", 0, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+#ifdef CONFIG_KDS
+	err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear);
+	if (0 != err) {
+		mali_err = -EINVAL;
+		goto out2;
+	}
+#endif				/* CONFIG_KDS */
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+#ifdef CONFIG_KDS
+ out2:
+	destroy_workqueue(kctx->jctx.job_done_wq);
+#endif				/* CONFIG_KDS */
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+#ifdef CONFIG_KDS
+	kds_callback_term(&kctx->jctx.kds_cb);
+#endif				/* CONFIG_KDS */
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100755
index 0000000..0cf75f5
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/seq_file.h>
+
+#include <mali_kbase.h>
+
+#include <mali_kbase_jd_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		seq_printf(sfile,
+				"%i,%u,%u,%u,%u %u,%lli,%llu\n",
+				i, atom->core_req, atom->status, atom->coreref_state,
+				(unsigned)(atom->dep[0].atom ?
+						atom->dep[0].atom - atoms : 0),
+				(unsigned)(atom->dep[1].atom ?
+						atom->dep[1].atom - atoms : 0),
+				(signed long long)start_timestamp,
+				(unsigned long long)(atom->time_spent_us ?
+					atom->time_spent_us * 1000 : start_timestamp)
+				);
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100755
index 0000000..bc1878f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+
+#define MALI_JD_DEBUGFS_VERSION 1
+
+/**
+ * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100755
index 0000000..6342532
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx;
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbdev->hwaccess.active_kctx == kctx)
+		kbdev->hwaccess.active_kctx = NULL;
+}
+
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+	}
+}
+
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+			ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100755
index 0000000..27aca3a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,106 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold runpool_irq lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold runpool_irq lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the runpool_irq lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ */
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ */
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+			ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100755
index 0000000..83228c0
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,3235 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_policy_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return refcnt;
+}
+
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private types
+ */
+enum {
+	JS_DEVDATA_INIT_NONE = 0,
+	JS_DEVDATA_INIT_CONSTANTS = (1 << 0),
+	JS_DEVDATA_INIT_POLICY = (1 << 1),
+	JS_DEVDATA_INIT_ALL = ((1 << 2) - 1)
+};
+
+enum {
+	JS_KCTX_INIT_NONE = 0,
+	JS_KCTX_INIT_CONSTANTS = (1 << 0),
+	JS_KCTX_INIT_POLICY = (1 << 1),
+	JS_KCTX_INIT_ALL = ((1 << 2) - 1)
+};
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the kbasep_js_device_data::runpool_irq::lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		int new_refcnt;
+
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL);
+
+		new_refcnt = ++(js_per_as_data->as_busy_refcount);
+
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, new_refcnt);
+		result = true;
+	}
+
+	return result;
+}
+
+/* Helper macros to access and modify jsctx_queue.indicies */
+#define JSCTX_GET(offset, var, mask) \
+	((var >> offset) & mask)
+
+/* This wraps around to correct integer size automatically. */
+#define JSCTX_SET(var, offset, value, mask) \
+	(var = ((var & ~(mask << offset)) /*Clear old bits */ \
+	| (((value) & mask) << offset))) /* Set (after masking) new bits */
+
+#define JSCTX_GET_WR_IDX(var) \
+	JSCTX_GET(JSCTX_WR_OFFSET, var, JSCTX_RB_MASK_STORE)
+#define JSCTX_GET_RN_IDX(var) \
+	JSCTX_GET(JSCTX_RN_OFFSET, var, JSCTX_RB_MASK_STORE)
+#define JSCTX_GET_RD_IDX(var) \
+	JSCTX_GET(JSCTX_RD_OFFSET, var, JSCTX_RB_MASK_STORE)
+
+#define JSCTX_GET_IDX_DIFF(lower, upper) \
+	((upper >= lower) ? (upper - lower) : (upper+JSCTX_RB_SIZE_STORE-lower))
+
+#define JSCTX_SET_WR_IDX(var, value) \
+	JSCTX_SET(var, JSCTX_WR_OFFSET, value, JSCTX_RB_MASK_STORE)
+#define JSCTX_SET_RN_IDX(var, value) \
+	JSCTX_SET(var, JSCTX_RN_OFFSET, value, JSCTX_RB_MASK_STORE)
+#define JSCTX_SET_RD_IDX(var, value) \
+	JSCTX_SET(var, JSCTX_RD_OFFSET, value, JSCTX_RB_MASK_STORE)
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	unsigned int var = atomic_read(&rb->indicies);
+
+	return JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+  * jsctx_rb_is_full(): - Check if the given ringbuffer is full.
+  * @queue: Pointer to the queue containing the ringbuffer.
+  *
+  * No locks explicitly required, result will always be consistent.
+  * But depending on usage, the caller should consider jctx.lock,
+  * for the result to remain correct.
+  *
+  * Return: true if the ringbuffer is full, false otherwise.
+  */
+static inline bool
+jsctx_rb_is_full(struct jsctx_queue *queue)
+{
+	unsigned int var = atomic_read(&queue->indicies);
+	u16 rn_idx = JSCTX_GET_RN_IDX(var);
+	u16 wr_idx = JSCTX_GET_WR_IDX(var);
+
+	return JSCTX_GET_IDX_DIFF(rn_idx, wr_idx) >= JSCTX_RB_SIZE;
+}
+
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a ring buffer and invoke @callback for each entry in buffer, and
+ * remove the entry from the buffer.
+ *
+ * If entries are added to the ring buffer while this is running those entries
+ * may, or may not be covered. To ensure that all entries in the buffer have
+ * been enumerated when this function returns jsctx->lock must be held when
+ * calling this function.
+ *
+ * The HW access lock, js_data.runpool_irq.lock, must always be held when
+ * calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct kbase_jd_atom *katom;
+
+	struct list_head *pos, *q;
+
+	unsigned int var = atomic_read(&queue->indicies);
+	u16 running_idx = JSCTX_GET_RN_IDX(var);
+	u16 read_idx = JSCTX_GET_RD_IDX(var);
+	u16 wr_idx = JSCTX_GET_WR_IDX(var);
+	u16 i;
+	const u16 count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/* There must be no jobs currently in HW access */
+	WARN_ON(read_idx != JSCTX_GET_RN_IDX(var));
+
+	/* Invoke callback on all kbase_jd_atoms in the ring buffer, and
+	 * removes them from the buffer */
+	for (i = 0; i < count; i++) {
+		int id = queue->entries[read_idx & JSCTX_RB_MASK].atom_id;
+
+		katom = kbase_jd_atom_from_id(kctx, id);
+		read_idx++;
+		callback(kctx->kbdev, katom);
+	}
+	atomic_set(&queue->indicies, 0);
+
+	list_for_each_safe(pos, q, &queue->queue_head) {
+		struct kbase_jd_atom *entry;
+
+		entry = list_entry(pos, struct kbase_jd_atom, queue);
+		list_del(pos);
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	int id;
+	unsigned int var = atomic_read(&rb->indicies);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	if (JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var))
+		return NULL;
+
+	id = rb->entries[JSCTX_GET_RD_IDX(var) & JSCTX_RB_MASK].atom_id;
+	return kbase_jd_atom_from_id(kctx, id);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the runpool_irq.lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_peek_last(): - Check a ring buffer and get the last atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a
+ * pointer to the last atom, unless all the priority's ring buffers are empty.
+ *
+ * The last atom is the atom that was added using jsctx_rb_add() most recently.
+ *
+ * Return: Pointer to last atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	unsigned int var = atomic_read(&rb->indicies);
+	int id;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	if (!list_empty(&rb->queue_head)) {
+		return list_entry(rb->queue_head.prev,
+				struct kbase_jd_atom, queue);
+	}
+
+	if (JSCTX_GET_RN_IDX(var) == JSCTX_GET_WR_IDX(var))
+		return NULL;
+
+	id = rb->entries[(JSCTX_GET_WR_IDX(var) - 1) & JSCTX_RB_MASK].atom_id;
+	return kbase_jd_atom_from_id(kctx, id);
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	unsigned int oldvar, var;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	do {
+		u16 rd_idx;
+
+		oldvar = atomic_read(&rb->indicies);
+		var = oldvar;
+		rd_idx = JSCTX_GET_RD_IDX(var);
+
+		JSCTX_SET_RD_IDX(var, rd_idx+1);
+	} while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	unsigned int oldvar, var;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	do {
+		u16 rd_idx;
+
+		oldvar = atomic_read(&rb->indicies);
+		var = oldvar;
+
+
+		rd_idx = JSCTX_GET_RD_IDX(var)-1;
+
+		/* Atoms must be unpulled in correct order. */
+		WARN_ON(rb->entries[rd_idx & JSCTX_RB_MASK].atom_id !=
+				kbase_jd_atom_id(kctx, katom));
+
+		JSCTX_SET_RD_IDX(var, rd_idx);
+	} while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar);
+}
+
+/**
+ * jsctx_rb_add(): - Add atom to ring buffer
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to add.
+ *
+ * Add @katom to the ring buffer determined by the atom's priority and job slot
+ * number.
+ *
+ * If the ring buffer is full -EBUSY will be returned.
+ *
+ * Return: On success 0 is returned, on failure a negative error code.
+ */
+static int
+jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	unsigned int oldvar, var;
+	u16 wr_idx, running_idx, count;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	oldvar = atomic_read(&rb->indicies);
+	var = oldvar;
+
+	running_idx = JSCTX_GET_RN_IDX(var);
+	wr_idx = JSCTX_GET_WR_IDX(var);
+	count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx);
+
+	/* Check if the ring buffer is full */
+	if (count >= JSCTX_RB_SIZE)
+		return -EBUSY;
+
+	rb->entries[wr_idx & JSCTX_RB_MASK].atom_id =
+		kbase_jd_atom_id(kctx, katom);
+
+	wr_idx++;
+	JSCTX_SET_WR_IDX(var, wr_idx);
+
+	while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar) {
+		oldvar = atomic_read(&rb->indicies);
+		var = oldvar;
+		wr_idx = JSCTX_GET_WR_IDX(var)+1;
+
+		JSCTX_SET_WR_IDX(var, wr_idx);
+	}
+	return 0;
+}
+
+/**
+ * jsctx_rb_remove(): - Remove atom from ring buffer
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to remove.
+ *
+ * Remove @katom from the ring buffer.
+ *
+ * @katom must have been pulled from the buffer earlier by jsctx_rb_pull(), and
+ * atoms must be removed in the same order they were pulled from the ring
+ * buffer.
+ */
+static inline void
+jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	unsigned int oldvar, var;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+
+	do {
+		unsigned int rn_idx;
+
+		oldvar = atomic_read(&rb->indicies);
+		var = oldvar;
+
+		rn_idx = JSCTX_GET_RN_IDX(var);
+
+		JSCTX_SET_RN_IDX(var, rn_idx+1);
+	} while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar);
+}
+
+
+static void
+jsctx_ll_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	list_add_tail(&katom->queue, &queue->queue_head);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+void
+jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js)
+{
+	unsigned long flags;
+	struct list_head *pos, *q;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	bool flushed_any = false;
+	struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+	bool enqueue_required = false;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+
+	/* Early out for common case */
+	if (list_empty(&queue->queue_head) || jsctx_rb_is_full(queue))
+		return;
+
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, flags);
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, js))
+		enqueue_required = true;
+	else
+		enqueue_required = false;
+
+	list_for_each_safe(pos, q, &queue->queue_head) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_entry(pos, struct kbase_jd_atom, queue);
+
+		KBASE_DEBUG_ASSERT(katom);
+
+		if (jsctx_rb_add_atom(kctx, katom))
+			break;
+
+		katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_LL;
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+		flushed_any = true;
+
+		list_del(pos);
+	}
+
+
+	if (flushed_any) {
+		bool timer_sync = false;
+
+		if (enqueue_required) {
+			if (kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync = kbase_js_ctx_list_add_pullable(
+						kctx->kbdev, kctx, js);
+			else
+				timer_sync = kbase_js_ctx_list_add_unpullable(
+						kctx->kbdev, kctx, js);
+			/* If this context is active and the atom is the first
+			 * on its slot, kick the job manager to attempt to
+			 * fast-start the atom */
+			if (kctx == kctx->kbdev->hwaccess.active_kctx)
+				kbase_jm_try_kick(kctx->kbdev, 1 << js);
+
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kctx->kbdev);
+
+		} else {
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+		}
+	} else {
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	}
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+}
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int err;
+	int i;
+	u16 as_present;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(jsdd->init_status == JS_DEVDATA_INIT_NONE);
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* All ASs initially free */
+	jsdd->as_free = as_present;
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	jsdd->cfs_ctx_runtime_init_slices =
+		DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES;
+	jsdd->cfs_ctx_runtime_min_slices =
+		DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES;
+	atomic_set(&jsdd->soft_event_timeout_ms, DEFAULT_JS_SOFT_EVENT_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_init_slices:%u",
+			jsdd->cfs_ctx_runtime_init_slices);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u",
+			jsdd->cfs_ctx_runtime_min_slices);
+	dev_dbg(kbdev->dev, "\tsoft_event_timeout:%i",
+		atomic_read(&jsdd->soft_event_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS policy's tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	/* setup the number of irq throttle cycles base on given time */
+	{
+		int time_us = kbdev->gpu_props.irq_throttle_time_us;
+		int cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev,
+				time_us);
+
+		atomic_set(&kbdev->irq_throttle_cycles, cycles);
+	}
+
+	/* Clear the AS data, including setting NULL pointers */
+	memset(&jsdd->runpool_irq.per_as_data[0], 0,
+			sizeof(jsdd->runpool_irq.per_as_data));
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	jsdd->init_status |= JS_DEVDATA_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&jsdd->runpool_irq.lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	err = kbasep_js_policy_init(kbdev);
+	if (!err)
+		jsdd->init_status |= JS_DEVDATA_INIT_POLICY;
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
+		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+	}
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (jsdd->init_status != JS_DEVDATA_INIT_ALL)
+		return -EINVAL;
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_CONSTANTS)) {
+		s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+		/* The caller must de-register all contexts before calling this
+		 */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+		KBASE_DEBUG_ASSERT(memcmp(
+				js_devdata->runpool_irq.ctx_attr_ref_count,
+				zero_ctx_attr_ref_count,
+				sizeof(zero_ctx_attr_ref_count)) == 0);
+		CSTD_UNUSED(zero_ctx_attr_ref_count);
+	}
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_POLICY))
+		kbasep_js_policy_term(&js_devdata->policy);
+
+	js_devdata->init_status = JS_DEVDATA_INIT_NONE;
+}
+
+int kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int err;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE);
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	js_kctx_info->ctx.is_scheduled = false;
+	js_kctx_info->ctx.is_dying = false;
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED;
+
+	js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	err = kbasep_js_policy_init_ctx(kbdev, kctx);
+	if (!err)
+		js_kctx_info->init_status |= JS_KCTX_INIT_POLICY;
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (js_kctx_info->init_status != JS_KCTX_INIT_ALL)
+		return -EINVAL;
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].queue_head);
+			atomic_set(&kctx->jsctx_queue[i][j].indicies, 0);
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	int js;
+	bool update_ctx_count = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) {
+		/* The caller must de-register all jobs before calling this */
+		KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+		KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+	}
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->ctx_runnable_ref) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kctx->ctx_runnable_ref = false;
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY))
+		kbasep_js_policy_term_ctx(js_policy, kctx);
+
+	js_kctx_info->init_status = JS_KCTX_INIT_NONE;
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable - Add context to the tail of the per-slot
+ *                                  pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the tail.
+ *
+ * This function should be used when queueing a context for the first time, or
+ * re-queueing a context that has been pulled from.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = true;
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = true;
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable - Add context to the tail of the per-slot
+ *                                    unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+				&kbdev->js_data.ctx_list_unpullable[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove - Remove context from the per-slot pullable or
+ *                            unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold kbasep_jd_device_data::queue_mutex
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+
+	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
+		return NULL;
+
+	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
+					struct kbase_context,
+					jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					(KBASE_KATOM_FLAG_JSCTX_IN_LL |
+					KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))){
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			if (kbase_jd_katom_dep_type(&katom->dep[i]) ==
+						BASE_JD_DEP_TYPE_DATA &&
+					js == dep_js) {
+				struct kbase_jd_atom *last_atom =
+						jsctx_rb_peek_last(kctx, js,
+								prio);
+
+				/* Last atom on slot must be pre-dep for this
+				 * atom */
+				if (last_atom != dep_atom) {
+					ret = false;
+					break;
+				}
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+								(js == dep_js))
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_PREV;
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Setup any scheduling information */
+	kbasep_js_clear_job_retry_submit(atom);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt_nolock(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx,
+								atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable(kbdev,
+					kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!js_kctx_info->ctx.is_scheduled) {
+		if (js_kctx_info->ctx.is_dying) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context (e.g. KDS
+			 * dependency resolved). Kill that job by killing the
+			 * context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Policy Queue */
+			KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Policy Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	struct kbasep_js_device_data *js_devdata;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0,
+	   kbasep_js_trace_get_refcnt(kbdev, kctx)); */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return found_kctx;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_release_result - Try running more jobs after releasing a context
+ *                            and/or atom
+ *
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst
+ * kbasep_js_device_data.runpool_irq.lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (js_devdata->nr_user_contexts_running != 0) {
+		bool retry_submit = false;
+		int retry_jobslot = 0;
+
+		if (katom_retained_state)
+			retry_submit = kbasep_js_get_atom_retry_submit_slot(
+					katom_retained_state, &retry_jobslot);
+
+		if (runpool_ctx_attr_change || retry_submit) {
+			/* A change in runpool ctx attributes might mean we can
+			 * run more jobs than before  */
+			result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+			KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+						kctx, NULL, 0u, retry_jobslot);
+		}
+	}
+	return result;
+}
+
+/*
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state
+ * change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	struct kbase_as *current_as;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+	/* kctx->as_nr and js_per_as_data are only read from here. The caller's
+	 * js_ctx_mutex provides a barrier that ensures they are up-to-date.
+	 *
+	 * They will not change whilst we're reading them, because the refcount
+	 * is non-zero (and we ASSERT on that last fact).
+	 */
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr];
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	current_as = &kbdev->as[kctx_as_nr];
+	mutex_lock(&kbdev->pm.lock);
+	mutex_lock(&current_as->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/* Update refcount */
+	new_ref_count = --(js_per_as_data->as_busy_refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags &
+			KBASE_CTX_FLAG_PRIVILEGED &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out */
+	if (new_ref_count == 0 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+		kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		if (kbdev->hwaccess.active_kctx == kctx)
+			kbdev->hwaccess.active_kctx = NULL;
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after 'is_scheduled' is changed, otherwise we double-decount
+		 * the attributes */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&current_as->transaction_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any policy timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		js_kctx_info->ctx.is_scheduled = false;
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&current_as->transaction_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_policy = &kbdev->js_data.policy;
+	js_devdata = &kbdev->js_data;
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool or the Policy Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	if (js_kctx_info->ctx.is_dying) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	base_jd_event_code event_code;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	event_code = katom_retained_state->event_code;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+static void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_data = &kbdev->js_data;
+
+	if (kbdev->js_scheduling_period_ns < 0)
+		js_data->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	else if (kbdev->js_scheduling_period_ns > 0)
+		js_data->scheduling_period_ns = kbdev->js_scheduling_period_ns;
+
+	if (kbdev->js_soft_stop_ticks < 0)
+		js_data->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	else if (kbdev->js_soft_stop_ticks > 0)
+		js_data->soft_stop_ticks = kbdev->js_soft_stop_ticks;
+
+	if (kbdev->js_soft_stop_ticks_cl < 0)
+		js_data->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	else if (kbdev->js_soft_stop_ticks_cl > 0)
+		js_data->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl;
+
+	if (kbdev->js_hard_stop_ticks_ss < 0) {
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+			js_data->hard_stop_ticks_ss =
+					DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+		else
+			js_data->hard_stop_ticks_ss =
+					DEFAULT_JS_HARD_STOP_TICKS_SS;
+	} else if (kbdev->js_hard_stop_ticks_ss > 0) {
+		js_data->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss;
+	}
+
+	if (kbdev->js_hard_stop_ticks_cl < 0)
+		js_data->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	else if (kbdev->js_hard_stop_ticks_cl > 0)
+		js_data->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl;
+
+	if (kbdev->js_hard_stop_ticks_dumping < 0)
+		js_data->hard_stop_ticks_dumping =
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	else if (kbdev->js_hard_stop_ticks_dumping > 0)
+		js_data->hard_stop_ticks_dumping =
+				kbdev->js_hard_stop_ticks_dumping;
+
+	if (kbdev->js_reset_ticks_ss < 0) {
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+			js_data->gpu_reset_ticks_ss =
+					DEFAULT_JS_RESET_TICKS_SS_8408;
+		else
+			js_data->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	} else if (kbdev->js_reset_ticks_ss > 0) {
+		js_data->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss;
+	}
+
+	if (kbdev->js_reset_ticks_cl < 0)
+		js_data->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	else if (kbdev->js_reset_ticks_cl > 0)
+		js_data->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl;
+
+	if (kbdev->js_reset_ticks_dumping < 0)
+		js_data->gpu_reset_ticks_dumping =
+				DEFAULT_JS_RESET_TICKS_DUMPING;
+	else if (kbdev->js_reset_ticks_dumping > 0)
+		js_data->gpu_reset_ticks_dumping =
+				kbdev->js_reset_ticks_dumping;
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbase_as *new_address_space = NULL;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	as_nr = kbase_backend_find_free_address_space(kbdev, kctx);
+
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	new_address_space = &kbdev->as[as_nr];
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (js_kctx_info->ctx.is_dying) {
+		/* Roll back the transaction so far and return */
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	if (js_devdata->nr_user_contexts_running == 0 &&
+			kbdev->js_timeouts_updated) {
+		/* Only when there are no other contexts submitting jobs:
+		 * Latch in run-time job scheduler timeouts that were set
+		 * through js_timeouts sysfs file */
+		kbase_js_set_timeouts(kbdev);
+
+		kbdev->js_timeouts_updated = false;
+	}
+
+	js_kctx_info->ctx.is_scheduled = true;
+
+	mutex_lock(&new_address_space->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&new_address_space->transaction_mutex);
+		/* If address space is not pending, then kbase_backend_use_ctx()
+		 * failed. Roll back the transaction so far and return */
+		if (!kctx->as_pending) {
+			js_kctx_info->ctx.is_scheduled = false;
+
+			kbase_backend_release_free_address_space(kbdev, as_nr);
+		}
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+	kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the runpool_irq locking. If a suspend occurs *after* that lock was
+	 * taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	mutex_unlock(&new_address_space->transaction_mutex);
+
+	/* Synchronize with any policy timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	if (kctx->as_pending) {
+		/* Context waiting for AS to be assigned */
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		return false;
+	}
+	if (kbase_backend_use_ctx_sched(kbdev, kctx)) {
+		/* Context already has ASID - mark as active */
+		kbdev->hwaccess.active_kctx = kctx;
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		return true; /* Context already scheduled */
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return kbasep_js_schedule_ctx(kbdev, kctx);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED;
+
+	is_scheduled = js_kctx_info->ctx.is_scheduled;
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			kctx->jctx.sched_info.ctx.is_scheduled);
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool pending;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED);
+	pending = kctx->as_pending;
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out if there is no
+	 * pending job */
+	if (!pending)
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		retained = retained << 1;
+
+		if (kctx) {
+			++(js_per_as_data->as_busy_refcount);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kctx->jctx.sched_info.ctx.flags &
+					KBASE_CTX_FLAG_PRIVILEGED)
+				KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1);
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_context *kctx, *n;
+
+		list_for_each_entry_safe(kctx, n,
+				&kbdev->js_data.ctx_list_unpullable[js],
+				jctx.sched_info.ctx.ctx_list_entry[js]) {
+			struct kbasep_js_kctx_info *js_kctx_info;
+			unsigned long flags;
+			bool timer_sync = false;
+
+			js_kctx_info = &kctx->jctx.sched_info;
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+			if (!js_kctx_info->ctx.is_scheduled &&
+				kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync = kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kbdev);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	/* Check if there are lower priority jobs to soft stop */
+	kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+	/* Add atom to ring buffer. */
+	if (jsctx_rb_add_atom(kctx, katom)) {
+		jsctx_ll_add(kctx, katom);
+		enqueue_required = false;
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_LL;
+	} else {
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+	}
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = jsctx_rb_peek_prio(kctx, js, prio);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	if (next_katom &&
+			(next_katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV)) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_LL |
+					KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	js_devdata = &kctx->kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kctx->kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) &&
+				atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kctx->kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return NULL;
+	}
+
+	kctx->pulled = true;
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kctx->ctx_runnable_ref);
+		kctx->ctx_runnable_ref = true;
+		atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kctx->kbdev, kctx);
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->sched_info.cfs.ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	kbase_tlstream_aux_job_softstop_ex(katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js);
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!js_kctx_info->ctx.is_dying) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+						kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kctx->ctx_active);
+		kctx->ctx_active = false;
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) {
+		jsctx_rb_remove(kctx, katom);
+
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	WARN_ON(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_LL);
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot))
+		timer_sync |= kbase_js_ctx_list_remove(kctx->kbdev, kctx,
+				atom_slot);
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!js_kctx_info->ctx.is_dying) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |= kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |= kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kctx->ctx_active = false;
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	u64 microseconds_spent = 0;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_device_data *js_devdata;
+
+	kbdev = kctx->kbdev;
+
+	js_policy = &kbdev->js_data.policy;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+
+	kbase_tlstream_tl_nret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+	kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]);
+	kbase_tlstream_tl_nret_ctx_lpu(
+			kctx,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+
+	/* Calculate the job's time used */
+	if (end_timestamp != NULL) {
+		/* Only calculating it for jobs that really run on the HW (e.g.
+		 * removed from next jobs never actually ran, so really did take
+		 * zero time) */
+		ktime_t tick_diff = ktime_sub(*end_timestamp,
+							katom->start_timestamp);
+
+		microseconds_spent = ktime_to_ns(tick_diff);
+
+		do_div(microseconds_spent, 1000);
+
+		/* Round up time spent to the minimum timer resolution */
+		if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US)
+			microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US;
+	}
+
+	/* Log the result of the job (completion status, and time spent). */
+	kbasep_js_policy_log_job_result(js_policy, katom, microseconds_spent);
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (katom->x_post_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(katom->x_post_dep->atom_flags &
+						KBASE_KATOM_FLAG_FAIL_BLOCKER)))
+		katom->x_post_dep->atom_flags &=
+					~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+	bool timer_sync = false;
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &js_devdata->policy;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	while (js_mask) {
+		int js;
+
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kctx->ctx_active) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kctx->ctx_active = true;
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&js_devdata->runpool_irq.lock,
+									flags);
+				if (kctx->as_pending ||
+					kbase_js_ctx_pullable(kctx, js, false)
+					|| (kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(
+					&js_devdata->runpool_irq.lock, flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kctx->ctx_active);
+					kctx->ctx_active = false;
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+			kctx->pulled = false;
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kctx->pulled) {
+				/* Failed to pull jobs - push to head of list */
+				if (kbase_js_ctx_pullable(kctx, js, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head(
+								kctx->kbdev,
+								kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable(
+								kctx->kbdev,
+								kctx, js);
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+					WARN_ON(!kctx->ctx_active);
+					kctx->ctx_active = false;
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |= kbase_js_ctx_list_add_pullable(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |= kbase_js_ctx_list_add_unpullable(
+							kctx->kbdev, kctx, js);
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the policy queue */
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	js_kctx_info->ctx.is_dying = true;
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the policy queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the policy queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Policy Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the policy queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!js_kctx_info->ctx.is_scheduled) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Policy Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						js_kctx_info->ctx.is_scheduled);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the policy
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						js_kctx_info->ctx.is_scheduled);
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100755
index 0000000..bdb820a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,1048 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_js_policy.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase and are available for use by the
+ * @ref kbase_js_policy "Job Scheduler Policy APIs"
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally. If the runpool_irq::lock is already held, then
+ * the caller should use kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * kbasep_js_runpool_lookup_ctx_nolock - Lookup a context in the Run Pool based
+ *         upon its current address space and ensure that is stays scheduled in.
+ * @kbdev: Device pointer
+ * @as_nr: Address space to lookup
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * Note: This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must the kbasep_js_device_data::runpoool_irq::lock.
+ *
+ * Return: a valid struct kbase_context on success, which has been refcounted as
+ *         being busy.
+ *         NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduling Policy's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time whenever the Job
+ * Scheduling Policy implements Job Timeouts (such as those done by CFS).
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ */
+void kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+	atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+	KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot ==
+					KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID)
+				|| (atom->retry_submit_on_slot == js));
+
+	atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+	retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+	int js = katom_retained_state->retry_submit_on_slot;
+
+	*res = js;
+	return (bool) (js >= 0);
+}
+
+#if KBASE_DEBUG_DISABLE_ASSERTS == 0
+/**
+ * Debug Check the refcount of a context. Only use within ASSERTs
+ *
+ * Obtains kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return negative value if the context is not scheduled in
+ * @return current refcount of the context if it is scheduled in. The refcount
+ * is not guarenteed to be kept constant.
+ */
+static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int result = -1;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID)
+		result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount;
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return result;
+}
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS == 0 */
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guarenteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guarenteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+	KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: when you need the number of cycles to guarantee you won't wait for
+ * longer than 'us' time (you might have a shorter wait).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need the number of cycles to guarantee you'll wait at least
+ * 'us' amount of time (but you might wait longer).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (u32) (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the worst-case wait that 'ticks' cycles will
+ * take (you guarantee that you won't wait any longer than this, but it may
+ * be shorter).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the best-case wait for 'tick' cycles (you
+ * guarantee to be waiting for at least this long, but it may be longer).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100755
index 0000000..e6e611b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != false) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100755
index 0000000..ce91833
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100755
index 0000000..75d4b98
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,506 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+/* Types used by the policies must go here */
+enum {
+	/** Context will not submit any jobs */
+	KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0),
+
+	/** Set if the context uses an address space and should be kept scheduled in */
+	KBASE_CTX_FLAG_PRIVILEGED = (1u << 1)
+
+	    /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */
+};
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+#include "mali_kbase_js_policy_cfs.h"
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy {
+	struct kbasep_js_policy_cfs cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_ctx_info {
+	struct kbasep_js_policy_cfs_ctx cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_job_info {
+	struct kbasep_js_policy_cfs_job cfs;
+};
+
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief the IRQ_THROTTLE time in microseconds
+ *
+ * This will be converted via the GPU's clock frequency into a cycle-count.
+ *
+ * @note we can make an estimate of the GPU's frequency by periodically
+ * sampling its CYCLE_COUNT register
+ */
+#define KBASE_JS_IRQ_THROTTLE_TIME_US 20
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE. The context usually has
+	 * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW
+	 * workarounds in use in the Job Scheduling Policy.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * Data used by the scheduler that is unique for each Address Space.
+ *
+ * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock
+ * must be held whilst accessing this data (inculding reads and atomic
+ * decisions based on the read).
+ */
+struct kbasep_js_per_as_data {
+	/**
+	 * Ref count of whether this AS is busy, and must not be scheduled out
+	 *
+	 * When jobs are running this is always positive. However, it can still be
+	 * positive when no jobs are running. If all you need is a heuristic to
+	 * tell you whether jobs might be running, this should be sufficient.
+	 */
+	int as_busy_refcount;
+
+	/** Pointer to the current context on this address space, or NULL for no context */
+	struct kbase_context *kctx;
+};
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/** Sub-structure to collect together Job Scheduling data used in IRQ context */
+	struct runpool_irq {
+		/**
+		 * Lock for accessing Job Scheduling data used in IRQ context
+		 *
+		 * This lock must be held whenever this data is accessed (read, or
+		 * write). Even for read-only access, memory barriers would be needed.
+		 * In any case, it is likely that decisions based on only reading must
+		 * also be atomic with respect to data held here and elsewhere in the
+		 * Job Scheduler.
+		 *
+		 * This lock must also be held for accessing:
+		 * - kbase_context::as_nr
+		 * - kbase_device::jm_slots
+		 * - Parts of the kbasep_js_policy, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 */
+		spinlock_t lock;
+
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
+		 *
+		 * It is placed here because it's much more memory efficient than having a u8 in
+		 * struct kbasep_js_per_as_data to store this flag  */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/** Data that is unique for each AS */
+		struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+
+	u16 as_free;				/**< Bitpattern of free Address Spaces */
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/**
+	 * Policy-specific information.
+	 *
+	 * Refer to the structure defined by the current policy to determine which
+	 * locks must be held when accessing this.
+	 */
+	union kbasep_js_policy policy;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+	u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
+	u32 cfs_ctx_runtime_min_slices;	 /**< Value for  DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
+
+	/**< Value for JS_SOFT_EVENT_TIMEOUT */
+	atomic_t soft_event_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+	/**
+	 * Runpool substructure. This must only be accessed whilst the Run Pool
+	 * mutex ( kbasep_js_device_data::runpool_mutex ) is held.
+	 *
+	 * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be
+	 * held for certain sub-members.
+	 *
+	 * @note some of the members could be moved into struct kbasep_js_device_data for
+	 * improved d-cache/tlb efficiency.
+	 */
+	struct {
+		union kbasep_js_policy_ctx_info policy_ctx;	/**< Policy-specific context */
+	} runpool;
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		kbase_context_flags flags;
+		/* NOTE: Unify the following flags into kbase_context_flags */
+		/**
+		 * Is the context scheduled on the Run Pool?
+		 *
+		 * This is only ever updated whilst the jsctx_mutex is held.
+		 */
+		bool is_scheduled;
+		/**
+		 * Wait queue to wait for is_scheduled state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		bool is_dying;			/**< Is the context in the process of being evicted? */
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/** Job Slot to retry submitting to if submission from IRQ handler failed */
+	int retry_submit_on_slot;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
new file mode 100755
index 0000000..debd011
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
@@ -0,0 +1,763 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_policy.h
+ * Job Scheduler Policy APIs.
+ */
+
+#ifndef _KBASE_JS_POLICY_H_
+#define _KBASE_JS_POLICY_H_
+
+/**
+ * @page page_kbase_js_policy Job Scheduling Policies
+ * The Job Scheduling system is described in the following:
+ * - @subpage page_kbase_js_policy_overview
+ * - @subpage page_kbase_js_policy_operation
+ *
+ * The API details are as follows:
+ * - @ref kbase_jm
+ * - @ref kbase_js
+ * - @ref kbase_js_policy
+ */
+
+/**
+ * @page page_kbase_js_policy_overview Overview of the Policy System
+ *
+ * The Job Scheduler Policy manages:
+ * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs)
+ * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the
+ * GPU's Job Slots (\em JSs).
+ * - The amount of \em time a context is assigned to (<em>scheduled on</em>) an
+ * Address Space
+ * - The amount of \em time a Job spends running on the GPU
+ *
+ * The Policy implements this management via 2 components:
+ * - A Policy Queue, which manages a set of contexts that are ready to run,
+ * but not currently running.
+ * - A Policy Run Pool, which manages the currently running contexts (one per Address
+ * Space) and the jobs to run on the Job Slots.
+ *
+ * Each Graphics Process in the system has at least one KBase Context. Therefore,
+ * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on
+ * the GPU.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted"
+ *
+ * The main operations on the queue are:
+ * - Enqueuing a Context to it
+ * - Dequeuing a Context from it, to run it.
+ * - Note: requeuing a context is much the same as enqueuing a context, but
+ * occurs when a context is scheduled out of the system to allow other contexts
+ * to run.
+ *
+ * These operations have much the same meaning for the Run Pool - Jobs are
+ * dequeued to run on a Jobslot, and requeued when they are scheduled out of
+ * the GPU.
+ *
+ * @note This is an over-simplification of the Policy APIs - there are more
+ * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue
+ * takes at least two function calls: one to Dequeue from the Queue, one to add
+ * to the Run Pool.
+ *
+ * As indicated on the diagram, Jobs permanently leave the scheduling system
+ * when they are completed, otherwise they get dequeued/requeued until this
+ * happens. Similarly, Contexts leave the scheduling system when their jobs
+ * have all completed. However, Contexts may later return to the scheduling
+ * system (not shown on the diagram) if more Bags of Jobs are submitted to
+ * them.
+ */
+
+/**
+ * @page page_kbase_js_policy_operation Policy Operation
+ *
+ * We describe the actions that the Job Scheduler Core takes on the Policy in
+ * the following cases:
+ * - The IRQ Path
+ * - The Job Submission Path
+ * - The High Priority Job Submission Path
+ *
+ * This shows how the Policy APIs will be used by the Job Scheduler core.
+ *
+ * The following diagram shows an example Policy that contains a Low Priority
+ * queue, and a Real-time (High Priority) Queue. The RT queue is examined
+ * before the LowP one on dequeuing from the head. The Low Priority Queue is
+ * ordered by time, and the RT queue is ordered by time weighted by
+ * RT-priority. In addition, it shows that the Job Scheduler Core will start a
+ * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The
+ * Soft-Stop time is set by a global configuration value, and must be a value
+ * appropriate for the policy. For example, this could include "don't run a
+ * soft-stop timer" for a First-Come-First-Served (FCFS) policy.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core."
+ *
+ * @section sec_kbase_js_policy_operation_prio Dealing with Priority
+ *
+ * Priority applies separately to a context as a whole, and to the jobs within
+ * a context. The jobs specify a priority in the base_jd_atom::prio member, but
+ * it is independent of the context priority. That is, it only affects
+ * scheduling of atoms within a context. Refer to @ref base_jd_prio for more
+ * details. The meaning of the context's priority value is up to the policy
+ * itself, and could be a logarithmic scale instead of a linear scale (e.g. the
+ * policy could implement an increase/decrease in priority by 1 results in an
+ * increase/decrease in \em proportion of time spent scheduled in by 25%, an
+ * effective change in timeslice by 11%).
+ *
+ * It is up to the policy whether a boost in priority boosts the priority of
+ * the entire context (e.g. to such an extent where it may pre-empt other
+ * running contexts). If it chooses to do this, the Policy must make sure that
+ * only jobs from high-priority contexts are run, and that the context is
+ * scheduled out once only jobs from low priority contexts remain. This ensures
+ * that the low priority contexts do not gain from the priority boost, yet they
+ * still get scheduled correctly with respect to other low priority contexts.
+ *
+ *
+ * @section sec_kbase_js_policy_operation_irq IRQ Path
+ *
+ * The following happens on the IRQ path from the Job Scheduler Core:
+ * - Note the slot that completed (for later)
+ * - Log the time spent by the job (and implicitly, the time spent by the
+ * context)
+ *  - call kbasep_js_policy_log_job_result() <em>in the context of the irq
+ * handler.</em>
+ *  - This must happen regardless of whether the job completed successfully or
+ * not (otherwise the context gets away with DoS'ing the system with faulty jobs)
+ * - What was the result of the job?
+ *  - If Completed: job is just removed from the system
+ *  - If Hard-stop or failure: job is removed from the system
+ *  - If Soft-stop: queue the book-keeping work onto a work-queue: have a
+ * work-queue call kbasep_js_policy_enqueue_job()
+ * - Check the timeslice used by the owning context
+ *  - call kbasep_js_policy_should_remove_ctx() <em>in the context of the irq
+ * handler.</em>
+ *  - If this returns true, clear the "allowed" flag.
+ * - Check the ctx's flags for "allowed", "has jobs to run" and "is running
+ * jobs"
+ * - And so, should the context stay scheduled in?
+ *  - If No, push onto a work-queue the work of scheduling out the old context,
+ * and getting a new one. That is:
+ *   - kbasep_js_policy_runpool_remove_ctx() on old_ctx
+ *   - kbasep_js_policy_enqueue_ctx() on old_ctx
+ *   - kbasep_js_policy_dequeue_head_ctx() to get new_ctx
+ *   - kbasep_js_policy_runpool_add_ctx() on new_ctx
+ *   - (all of this work is deferred on a work-queue to keep the IRQ handler quick)
+ * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job:
+ *  - kbasep_js_policy_dequeue_job() <em>in the context of the irq
+ * handler</em> with core_req set to that of the completing slot
+ *  - if this returned true, submit the job to the completed slot.
+ *  - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *  - If kbasep_js_policy_dequeue_job() returned false, submit some work to
+ * the work-queue to retry from outside of IRQ context (calling
+ * kbasep_js_policy_dequeue_job() from a work-queue).
+ *
+ * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT,
+ * this sequence could loop a large number of times: this could happen if
+ * the jobs submitted completed on the GPU very quickly (in a few cycles), such
+ * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take
+ * more jobs, causing us to loop until we run out of jobs.
+ *
+ * To mitigate this, we must limit the number of jobs submitted per slot during
+ * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should
+ * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For
+ * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per
+ * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs
+ * could complete but the IRQ for each of them is delayed by the throttling. By
+ * the time you get the IRQ, all 6 jobs could've completed, meaning you can
+ * submit jobs to fill all 6 HEAD+NEXT registers again.
+ *
+ * @note As much work is deferred as possible, which includes the scheduling
+ * out of a context and scheduling in a new context. However, we can still make
+ * starting a single high-priorty context quick despite this:
+ * - On Mali-T600 family, there is one more AS than JSs.
+ * - This means we can very quickly schedule out one AS, no matter what the
+ * situation (because there will always be one AS that's not currently running
+ * on the job slot - it can only have a job in the NEXT register).
+ *  - Even with this scheduling out, fair-share can still be guaranteed e.g. by
+ * a timeline-based Completely Fair Scheduler.
+ * - When our high-priority context comes in, we can do this quick-scheduling
+ * out immediately, and then schedule in the high-priority context without having to block.
+ * - This all assumes that the context to schedule out is of lower
+ * priority. Otherwise, we will have to block waiting for some other low
+ * priority context to finish its jobs. Note that it's likely (but not
+ * impossible) that the high-priority context \b is running jobs, by virtue of
+ * it being high priority.
+ * - Therefore, we can give a high liklihood that on Mali-T600 at least one
+ * high-priority context can be started very quickly. For the general case, we
+ * can guarantee starting (no. ASs) - (no. JSs) high priority contexts
+ * quickly. In any case, there is a high likelihood that we're able to start
+ * more than one high priority context quickly.
+ *
+ * In terms of the functions used in the IRQ handler directly, these are the
+ * perfomance considerations:
+ * - kbase_js_policy_log_job_result():
+ *  - This is just adding to a 64-bit value (possibly even a 32-bit value if we
+ * only store the time the job's recently spent - see below on 'priority weighting')
+ *  - For priority weighting, a divide operation ('div') could happen, but
+ * this can happen in a deferred context (outside of IRQ) when scheduling out
+ * the ctx; as per our Engineering Specification, the contexts of different
+ * priority still stay scheduled in for the same timeslice, but higher priority
+ * ones scheduled back in more often.
+ *  - That is, the weighted and unweighted times must be stored separately, and
+ * the weighted time is only updated \em outside of IRQ context.
+ *  - Of course, this divide is more likely to be a 'multiply by inverse of the
+ * weight', assuming that the weight (priority) doesn't change.
+ * - kbasep_js_policy_should_remove_ctx():
+ *  - This is usually just a comparison of the stored time value against some
+ * maximum value.
+ *
+ * @note all deferred work can be wrapped up into one call - we usually need to
+ * indicate that a job/bag is done outside of IRQ context anyway.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit Submission path
+ *
+ * Start with a Context with no jobs present, and assume equal priority of all
+ * contexts in the system. The following work all happens outside of IRQ
+ * Context :
+ * - As soon as job is made 'ready to 'run', then is must be registerd with the Job
+ * Scheduler Policy:
+ *  - 'Ready to run' means they've satisified their dependencies in the
+ * Kernel-side Job Dispatch system.
+ *  - Call kbasep_js_policy_enqueue_job()
+ *  - This indicates that the job should be scheduled (it is ready to run).
+ * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs
+ * 'ready to run', we enqueue the context on the policy queue:
+ *  - Call kbasep_js_policy_enqueue_ctx()
+ *  - This indicates that the \em ctx should be scheduled (it is ready to run)
+ *
+ * Next, we need to handle adding a context to the Run Pool - if it's sensible
+ * to do so. This can happen due to two reasons:
+ * -# A context is enqueued as above, and there are ASs free for it to run on
+ * (e.g. it is the first context to be run, in which case it can be added to
+ * the Run Pool immediately after enqueuing on the Policy Queue)
+ * -# A previous IRQ caused another ctx to be scheduled out, requiring that the
+ * context at the head of the queue be scheduled in. Such steps would happen in
+ * a work queue (work deferred from the IRQ context).
+ *
+ * In both cases, we'd handle it as follows:
+ * - Get the context at the Head of the Policy Queue:
+ *  - Call kbasep_js_policy_dequeue_head_ctx()
+ * - Assign the Context an Address Space (Assert that there will be one free,
+ * given the above two reasons)
+ * - Add this context to the Run Pool:
+ *  - Call kbasep_js_policy_runpool_add_ctx()
+ * - Now see if a job should be run:
+ *  - Mostly, this will be done in the IRQ handler at the completion of a
+ * previous job.
+ *  - However, there are two cases where this cannot be done: a) The first job
+ * enqueued to the system (there is no previous IRQ to act upon) b) When jobs
+ * are submitted at a low enough rate to not fill up all Job Slots (or, not to
+ * fill both the 'HEAD' and 'NEXT' registers in the job-slots)
+ *  - Hence, on each ctx <b>and job</b> submission we should try to see if we
+ * can run a job:
+ *  - For each job slot that has free space (in NEXT or HEAD+NEXT registers):
+ *   - Call kbasep_js_policy_dequeue_job() with core_req set to that of the
+ * slot
+ *   - if we got one, submit it to the job slot.
+ *   - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *
+ * The above case shows that we should attempt to run jobs in cases where a) a ctx
+ * has been added to the Run Pool, and b) new jobs have been added to a context
+ * in the Run Pool:
+ * - In the latter case, the context is in the runpool because it's got a job
+ * ready to run, or is already running a job
+ * - We could just wait until the IRQ handler fires, but for certain types of
+ * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs
+ * generally take much longer to run that GLES CS jobs, which are vertex shader
+ * jobs.
+ * - Therefore, when a new job appears in the ctx, we must check the job-slots
+ * to see if they're free, and run the jobs as before.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts
+ *
+ * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of
+ * them can be scheduled in immediately to start high prioriy jobs. In general,
+ * (no. ASs) - (no JSs) high priority contexts may be started immediately. The
+ * following describes how this happens:
+ *
+ * Similar to the previous section, consider what happens with a high-priority
+ * context (a context with a priority higher than that of any in the Run Pool)
+ * that starts out with no jobs:
+ * - A job becomes ready to run on the context, and so we enqueue the context
+ * on the Policy's Queue.
+ * - However, we'd like to schedule in this context immediately, instead of
+ * waiting for one of the Run Pool contexts' timeslice to expire
+ * - The policy's Enqueue function must detect this (because it is the policy
+ * that embodies the concept of priority), and take appropriate action
+ *  - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run
+ * Pool to see if a lower priority context should be scheduled out, and then
+ * schedule in the High Priority context.
+ *  - For Mali-T600, we can always pick a context to schedule out immediately
+ * (because there are more ASs than JSs), and so scheduling out a victim context
+ * and scheduling in the high priority context can happen immediately.
+ *   - If a policy implements fair-sharing, then this can still ensure the
+ * victim later on gets a fair share of the GPU.
+ *   - As a note, consider whether the victim can be of equal/higher priority
+ * than the incoming context:
+ *   - Usually, higher priority contexts will be the ones currently running
+ * jobs, and so the context with the lowest priority is usually not running
+ * jobs.
+ *   - This makes it likely that the victim context is low priority, but
+ * it's not impossible for it to be a high priority one:
+ *    - Suppose 3 high priority contexts are submitting only FS jobs, and one low
+ * priority context submitting CS jobs. Then, the context not running jobs will
+ * be one of the hi priority contexts (because only 2 FS jobs can be
+ * queued/running on the GPU HW for Mali-T600).
+ *   - The problem can be mitigated by extra action, but it's questionable
+ * whether we need to: we already have a high likelihood that there's at least
+ * one high priority context - that should be good enough.
+ *   - And so, this method makes sure that at least one high priority context
+ * can be started very quickly, but more than one high priority contexts could be
+ * delayed (up to one timeslice).
+ *   - To improve this, use a GPU with a higher number of Address Spaces vs Job
+ * Slots.
+ * - At this point, let's assume this high priority context has been scheduled
+ * in immediately. The next step is to ensure it can start some jobs quickly.
+ *  - It must do this by Soft-Stopping jobs on any of the Job Slots that it can
+ * submit to.
+ *  - The rest of the logic for starting the jobs is taken care of by the IRQ
+ * handler. All the policy needs to do is ensure that
+ * kbasep_js_policy_dequeue_job() will return the jobs from the high priority
+ * context.
+ *
+ * @note in SS state, we currently only use 2 job-slots (even for T608, but
+ * this might change in future). In this case, it's always possible to schedule
+ * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same
+ * time, this maximizes usage of the job-slots (only 2 are in use), because you
+ * can guarantee starting of the jobs from the High Priority contexts immediately too.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_notes Notes
+ *
+ * - In this design, a separate 'init' is needed from dequeue/requeue, so that
+ * information can be retained between the dequeue/requeue calls. For example,
+ * the total time spent for a context/job could be logged between
+ * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just
+ * initializes that information to some known state.
+ *
+ *
+ *
+ */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js_policy Job Scheduler Policy APIs
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy structure
+ */
+union kbasep_js_policy;
+
+/**
+ * @brief Initialize the Job Scheduler Policy
+ */
+int kbasep_js_policy_init(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler Policy
+ */
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy);
+
+/**
+ * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Ctx Info structure
+ *
+ * This structure is embedded in the struct kbase_context structure. It is used to:
+ * - track information needed for the policy to schedule the context (e.g. time
+ * used, OS priority etc.)
+ * - link together kbase_contexts into a queue, so that a struct kbase_context can be
+ * obtained as the container of the policy ctx info. This allows the API to
+ * return what "the next context" should be.
+ * - obtain other information already stored in the struct kbase_context for
+ * scheduling purposes (e.g process ID to get the priority of the originating
+ * process)
+ */
+union kbasep_js_policy_ctx_info;
+
+/**
+ * @brief Initialize a ctx for use with the Job Scheduler Policy
+ *
+ * This effectively initializes the union kbasep_js_policy_ctx_info structure within
+ * the struct kbase_context (itself located within the kctx->jctx.sched_info structure).
+ */
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Terminate resources associated with using a ctx in the Job Scheduler
+ * Policy.
+ */
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Enqueue a context onto the Job Scheduler Policy Queue
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out a low
+ * priority context from the Run Pool to allow the high priority context to be
+ * scheduled in.
+ *
+ * If the context has the privileged flag set, it will always be kept at the
+ * head of the queue.
+ *
+ * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ */
+void kbasep_js_policy_enqueue_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if a context was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no contexts were available.
+ */
+bool kbasep_js_policy_dequeue_head_ctx(union kbasep_js_policy *js_policy, struct kbase_context ** const kctx_ptr);
+
+/**
+ * @brief Evict a context from the Job Scheduler Policy Queue
+ *
+ * This is only called as part of destroying a kbase_context.
+ *
+ * There are many reasons why this might fail during the lifetime of a
+ * context. For example, the context is in the process of being scheduled. In
+ * that case a thread doing the scheduling might have a pointer to it, but the
+ * context is neither in the Policy Queue, nor is it in the Run
+ * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself
+ * are locked.
+ *
+ * Hence to find out where in the system the context is, it is important to do
+ * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member.
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if the context was evicted from the Policy Queue
+ * @return false if the context was not found in the Policy Queue
+ */
+bool kbasep_js_policy_try_evict_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Call a function on all jobs belonging to a non-queued, non-running
+ * context, optionally detaching the jobs from the context as it goes.
+ *
+ * At the time of the call, the context is guarenteed to be not-currently
+ * scheduled on the Run Pool (is_scheduled == false), and not present in
+ * the Policy Queue. This is because one of the following functions was used
+ * recently on the context:
+ * - kbasep_js_policy_evict_ctx()
+ * - kbasep_js_policy_runpool_remove_ctx()
+ *
+ * In both cases, no subsequent call was made on the context to any of:
+ * - kbasep_js_policy_runpool_add_ctx()
+ * - kbasep_js_policy_enqueue_ctx()
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * \a detach_jobs must only be set when cancelling jobs (which occurs as part
+ * of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx,
+	kbasep_js_policy_ctx_job_cb callback, bool detach_jobs);
+
+/**
+ * @brief Add a context to the Job Scheduler Policy's Run Pool
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out low
+ * priority jobs that are currently running on the GPU.
+ *
+ * The number of contexts present in the Run Pool will never be more than the
+ * number of Address Spaces.
+ *
+ * The following guarentees are made about the state of the system when this
+ * is called:
+ * - kctx->as_nr member is valid
+ * - the context has its submit_allowed flag set
+ * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid
+ * - The refcount of the context is guarenteed to be zero.
+ * - kbasep_js_kctx_info::ctx::is_scheduled will be true.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Remove a context from the Job Scheduler Policy's Run Pool
+ *
+ * The kctx->as_nr member is valid and the context has its submit_allowed flag
+ * set when this is called. The state of
+ * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also
+ * valid. The refcount of the context is guarenteed to be zero.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Indicate whether a context should be removed from the Run Pool
+ * (should be scheduled out).
+ *
+ * The kbasep_js_device_data::runpool_irq::lock will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ */
+bool kbasep_js_policy_should_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Synchronize with any timers acting upon the runpool
+ *
+ * The policy should check whether any timers it owns should be running. If
+ * they should not, the policy must cancel such timers and ensure they are not
+ * re-run by the time this function finishes.
+ *
+ * In particular, the timers must not be running when there are no more contexts
+ * on the runpool, because the GPU could be powered off soon after this call.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ */
+void kbasep_js_policy_runpool_timers_sync(union kbasep_js_policy *js_policy);
+
+
+/**
+ * @brief Indicate whether a new context has an higher priority than the current context.
+ *
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx
+ *
+ * This function must not sleep, because an IRQ spinlock might be held whilst
+ * this is called.
+ *
+ * @note There is nothing to stop the priority of \a current_ctx changing
+ * during or immediately after this function is called (because its jsctx_mutex
+ * cannot be held). Therefore, this function should only be seen as a heuristic
+ * guide as to whether \a new_ctx is higher priority than \a current_ctx
+ */
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx);
+
+	  /** @} *//* end group kbase_js_policy_ctx */
+
+/**
+ * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Job Info structure
+ *
+ * This structure is embedded in the struct kbase_jd_atom structure. It is used to:
+ * - track information needed for the policy to schedule the job (e.g. time
+ * used, etc.)
+ * - link together jobs into a queue/buffer, so that a struct kbase_jd_atom can be
+ * obtained as the container of the policy job info. This allows the API to
+ * return what "the next job" should be.
+ */
+union kbasep_js_policy_job_info;
+
+/**
+ * @brief Initialize a job for use with the Job Scheduler Policy
+ *
+ * This function initializes the union kbasep_js_policy_job_info structure within the
+ * kbase_jd_atom. It will only initialize/allocate resources that are specific
+ * to the job.
+ *
+ * That is, this function makes \b no attempt to:
+ * - initialize any context/policy-wide information
+ * - enqueue the job on the policy.
+ *
+ * At some later point, the following functions must be called on the job, in this order:
+ * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data.
+ * - kbasep_js_policy_enqueue_job() to enqueue the job
+ *
+ * A job must only ever be initialized on the Policy once, and must be
+ * terminated on the Policy before the job is freed.
+ *
+ * The caller will not be holding any locks, and so this function will not
+ * modify any information in \a kctx or \a js_policy.
+ *
+ * @return 0 if initialization was correct.
+ */
+int kbasep_js_policy_init_job(const union kbasep_js_policy *js_policy, const struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Register context/policy-wide information for a job on the Job Scheduler Policy.
+ *
+ * Registers the job with the policy. This is used to track the job before it
+ * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically,
+ * it is used to update information under a lock that could not be updated at
+ * kbasep_js_policy_init_job() time (such as context/policy-wide data).
+ *
+ * @note This function will not fail, and hence does not allocate any
+ * resources. Any failures that could occur on registration will be caught
+ * during kbasep_js_policy_init_job() instead.
+ *
+ * A job must only ever be registerd on the Policy once, and must be
+ * deregistered on the Policy on completion (whether or not that completion was
+ * success/failure).
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_register_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief De-register context/policy-wide information for a on the Job Scheduler Policy.
+ *
+ * This must be used before terminating the resources associated with using a
+ * job in the Job Scheduler Policy. This function does not itself terminate any
+ * resources, at most it just updates information in the policy and context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool
+ *
+ * The job returned by the policy will match at least one of the bits in the
+ * job slot's core requirements (but it may match more than one, or all @ref
+ * base_jd_core_req bits supported by the job slot).
+ *
+ * In addition, the requirements of the job returned will be a subset of those
+ * requested - the job returned will not have requirements that \a job_slot_idx
+ * cannot satisfy.
+ *
+ * The caller will submit the job to the GPU as soon as the GPU's NEXT register
+ * for the corresponding slot is empty. Of course, the GPU will then only run
+ * this new job when the currently executing job (in the jobslot's HEAD
+ * register) has completed.
+ *
+ * @return true if a job was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no jobs were available among all ctxs in the Run Pool.
+ *
+ * @note base_jd_core_req is currently a u8 - beware of type conversion.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_lock::irq will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held
+ */
+bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);
+
+/**
+ * @brief Requeue a Job back into the Job Scheduler Policy Run Pool
+ *
+ * This will be used to enqueue a job after its creation and also to requeue
+ * a job into the Run Pool that was previously dequeued (running). It notifies
+ * the policy that the job should be run again at some point later.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Log the result of a job: the time spent on a job/context, and whether
+ * the job failed or not.
+ *
+ * Since a struct kbase_jd_atom contains a pointer to the struct kbase_context owning it,
+ * then this can also be used to log time on either/both the job and the
+ * containing context.
+ *
+ * The completion state of the job can be found by examining \a katom->event.event_code
+ *
+ * If the Job failed and the policy is implementing fair-sharing, then the
+ * policy must penalize the failing job/context:
+ * - At the very least, it should penalize the time taken by the amount of
+ * time spent processing the IRQ in SW. This because a job in the NEXT slot
+ * waiting to run will be delayed until the failing job has had the IRQ
+ * cleared.
+ * - \b Optionally, the policy could apply other penalties. For example, based
+ * on a threshold of a number of failing jobs, after which a large penalty is
+ * applied.
+ *
+ * The kbasep_js_device_data::runpool_mutex will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock will be held.
+ *
+ * @param js_policy     job scheduler policy
+ * @param katom         job dispatch atom
+ * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds).
+ */
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us);
+
+	  /** @} *//* end group kbase_js_policy_job */
+
+	  /** @} *//* end group kbase_js_policy */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_POLICY_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
new file mode 100755
index 0000000..6924607
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
@@ -0,0 +1,297 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler: Completely Fair Policy Implementation
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_js_policy_cfs.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+#include <linux/sched/rt.h>
+#endif
+
+/**
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is enabled for a particular level is down to the integrator.
+ * However this is being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/* Fixed point constants used for runtime weight calculations */
+#define WEIGHT_FIXEDPOINT_SHIFT 10
+#define WEIGHT_TABLE_SIZE       40
+#define WEIGHT_0_NICE           (WEIGHT_TABLE_SIZE/2)
+#define WEIGHT_0_VAL            (1 << WEIGHT_FIXEDPOINT_SHIFT)
+
+#define PROCESS_PRIORITY_MIN (-20)
+#define PROCESS_PRIORITY_MAX  (19)
+
+/* Defines for easy asserts 'is scheduled'/'is queued'/'is neither queued norscheduled' */
+#define KBASEP_JS_CHECKFLAG_QUEUED       (1u << 0) /**< Check the queued state */
+#define KBASEP_JS_CHECKFLAG_SCHEDULED    (1u << 1) /**< Check the scheduled state */
+#define KBASEP_JS_CHECKFLAG_IS_QUEUED    (1u << 2) /**< Expect queued state to be set */
+#define KBASEP_JS_CHECKFLAG_IS_SCHEDULED (1u << 3) /**< Expect scheduled state to be set */
+
+enum {
+	KBASEP_JS_CHECK_NOTQUEUED = KBASEP_JS_CHECKFLAG_QUEUED,
+	KBASEP_JS_CHECK_NOTSCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED,
+	KBASEP_JS_CHECK_QUEUED = KBASEP_JS_CHECKFLAG_QUEUED | KBASEP_JS_CHECKFLAG_IS_QUEUED,
+	KBASEP_JS_CHECK_SCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED | KBASEP_JS_CHECKFLAG_IS_SCHEDULED
+};
+
+typedef u32 kbasep_js_check;
+
+/*
+ * Private Functions
+ */
+
+/* Table autogenerated using util built from: base/tools/gen_cfs_weight_of_prio/ */
+
+/* weight = 1.25 */
+static const int weight_of_priority[] = {
+	/*  -20 */ 11, 14, 18, 23,
+	/*  -16 */ 29, 36, 45, 56,
+	/*  -12 */ 70, 88, 110, 137,
+	/*   -8 */ 171, 214, 268, 335,
+	/*   -4 */ 419, 524, 655, 819,
+	/*    0 */ 1024, 1280, 1600, 2000,
+	/*    4 */ 2500, 3125, 3906, 4883,
+	/*    8 */ 6104, 7630, 9538, 11923,
+	/*   12 */ 14904, 18630, 23288, 29110,
+	/*   16 */ 36388, 45485, 56856, 71070
+};
+
+/*
+ * Note: There is nothing to stop the priority of the ctx containing
+ * ctx_info changing during or immediately after this function is called
+ * (because its jsctx_mutex cannot be held during IRQ). Therefore, this
+ * function should only be seen as a heuristic guide as to the priority weight
+ * of the context.
+ */
+static u64 priority_weight(struct kbasep_js_policy_cfs_ctx *ctx_info, u64 time_us)
+{
+	u64 time_delta_us;
+	int priority;
+
+	priority = ctx_info->process_priority;
+
+	/* Adjust runtime_us using priority weight if required */
+	if (priority != 0 && time_us != 0) {
+		int clamped_priority;
+
+		/* Clamp values to min..max weights */
+		if (priority > PROCESS_PRIORITY_MAX)
+			clamped_priority = PROCESS_PRIORITY_MAX;
+		else if (priority < PROCESS_PRIORITY_MIN)
+			clamped_priority = PROCESS_PRIORITY_MIN;
+		else
+			clamped_priority = priority;
+
+		/* Fixed point multiplication */
+		time_delta_us = (time_us * weight_of_priority[WEIGHT_0_NICE + clamped_priority]);
+		/* Remove fraction */
+		time_delta_us = time_delta_us >> WEIGHT_FIXEDPOINT_SHIFT;
+		/* Make sure the time always increases */
+		if (0 == time_delta_us)
+			time_delta_us++;
+	} else {
+		time_delta_us = time_us;
+	}
+
+	return time_delta_us;
+}
+
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_policy_trace_get_refcnt_nolock(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+
+/*
+ * Non-private functions
+ */
+
+int kbasep_js_policy_init(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs *policy_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+	policy_info = &js_devdata->policy.cfs;
+
+	atomic64_set(&policy_info->least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+	atomic64_set(&policy_info->rt_least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+
+	policy_info->head_runtime_us = 0;
+
+	return 0;
+}
+
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy)
+{
+	CSTD_UNUSED(js_policy);
+}
+
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	int policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	policy_info = &kbdev->js_data.policy.cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_INIT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	policy = current->policy;
+	if (policy == SCHED_FIFO || policy == SCHED_RR) {
+		ctx_info->process_rt_policy = true;
+		ctx_info->process_priority = (((MAX_RT_PRIO - 1) - current->rt_priority) / 5) - 20;
+	} else {
+		ctx_info->process_rt_policy = false;
+		ctx_info->process_priority = (current->static_prio - MAX_RT_PRIO) - 20;
+	}
+
+	/* Initial runtime (relative to least-run context runtime)
+	 *
+	 * This uses the Policy Queue's most up-to-date head_runtime_us by using the
+	 * queue mutex to issue memory barriers - also ensure future updates to
+	 * head_runtime_us occur strictly after this context is initialized */
+	mutex_lock(&js_devdata->queue_mutex);
+
+	/* No need to hold the the runpool_irq.lock here, because we're initializing
+	 * the value, and the context is definitely not being updated in the
+	 * runpool at this point. The queue_mutex ensures the memory barrier. */
+	ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u));
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return 0;
+}
+
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	policy_info = &js_policy->cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	/* No work to do */
+}
+
+/*
+ * Job Chain Management
+ */
+
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbase_context *parent_ctx;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	CSTD_UNUSED(js_policy);
+
+	parent_ctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(parent_ctx != NULL);
+
+	ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	ctx_info->runtime_us += priority_weight(ctx_info, time_spent_us);
+
+	katom->time_spent_us += time_spent_us;
+}
+
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx)
+{
+	struct kbasep_js_policy_cfs_ctx *current_ctx_info;
+	struct kbasep_js_policy_cfs_ctx *new_ctx_info;
+
+	KBASE_DEBUG_ASSERT(current_ctx != NULL);
+	KBASE_DEBUG_ASSERT(new_ctx != NULL);
+	CSTD_UNUSED(js_policy);
+
+	current_ctx_info = &current_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+	new_ctx_info = &new_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	if (!current_ctx_info->process_rt_policy && new_ctx_info->process_rt_policy)
+		return true;
+
+	if (current_ctx_info->process_rt_policy ==
+			new_ctx_info->process_rt_policy)
+		return true;
+
+	return false;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
new file mode 100755
index 0000000..b457d82
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Completely Fair Job Scheduler Policy structure definitions
+ */
+
+#ifndef _KBASE_JS_POLICY_CFS_H_
+#define _KBASE_JS_POLICY_CFS_H_
+
+#define KBASEP_JS_RUNTIME_EMPTY ((u64)-1)
+
+/**
+ * struct kbasep_js_policy_cfs - Data for the CFS policy
+ * @head_runtime_us:  Number of microseconds the least-run context has been
+ *                    running for. The kbasep_js_device_data.queue_mutex must
+ *                    be held whilst updating this.
+ *                    Reads are possible without this mutex, but an older value
+ *                    might be read if no memory barries are issued beforehand.
+ * @least_runtime_us: Number of microseconds the least-run context in the
+ *                    context queue has been running for.
+ *                    -1 if context queue is empty.
+ * @rt_least_runtime_us: Number of microseconds the least-run context in the
+ *                       realtime (priority) context queue has been running for.
+ *                       -1 if realtime context queue is empty
+ */
+struct kbasep_js_policy_cfs {
+	u64 head_runtime_us;
+	atomic64_t least_runtime_us;
+	atomic64_t rt_least_runtime_us;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_ctx - a single linked list of all contexts
+ * @runtime_us:        microseconds this context has been running for
+ * @process_rt_policy: set if calling process policy scheme is a realtime
+ *                     scheduler and will use the priority queue. Non-mutable
+ *                     after ctx init
+ * @process_priority:  calling process NICE priority, in the range -20..19
+ *
+ * &kbasep_js_device_data.runpool_irq.lock must be held when updating
+ * @runtime_us. Initializing will occur on context init and context enqueue
+ * (which can only occur in one thread at a time), but multi-thread access only
+ * occurs while the context is in the runpool.
+ *
+ * Reads are possible without the spinlock, but an older value might be read if
+ * no memory barries are issued beforehand.
+ */
+struct kbasep_js_policy_cfs_ctx {
+	u64 runtime_us;
+	bool process_rt_policy;
+	int process_priority;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_job - per job information for CFS
+ * @ticks: number of ticks that this job has been executing for
+ *
+ * &kbasep_js_device_data.runpool_irq.lock must be held when accessing @ticks.
+ */
+struct kbasep_js_policy_cfs_job {
+	u32 ticks;
+};
+
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100755
index 0000000..6d1e61f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100755
index 0000000..385d56a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,2503 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+/**
+ * @brief Check the zone compatibility of two regions.
+ */
+static int kbase_region_tracker_match_zone(struct kbase_va_region *reg1,
+		struct kbase_va_region *reg2)
+{
+	return ((reg1->flags & KBASE_REG_ZONE_MASK) ==
+		(reg2->flags & KBASE_REG_ZONE_MASK));
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_match_zone);
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx, struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = &(kctx->reg_rbtree.rb_node);
+	struct rb_node *parent = NULL;
+
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+	rb_insert_color(&(new_reg->rblink), &(kctx->reg_rbtree));
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+	rbnode = rb_first(&(kctx->reg_rbtree));
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE) &&
+				kbase_region_tracker_match_zone(reg, reg_reqs)) {
+			/* Check alignment */
+			u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+				return reg;
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if ((prev->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(prev, reg)) {
+			/* We're compatible with the previous VMA, merge with it */
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if ((next->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(next, reg)) {
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), &(kctx->reg_rbtree));
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	int err = 0;
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), &(kctx->reg_rbtree));
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		if ((!kbase_region_tracker_match_zone(tmp, reg)) ||
+				(!(tmp->flags & KBASE_REG_FREE))) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.  */
+	{
+		u64 start_pfn;
+
+		/*
+		 * Depending on the zone the allocation request is for
+		 * we might need to retry it.
+		 */
+		do {
+			tmp = kbase_region_tracker_find_region_meeting_reqs(
+					kctx, reg, nr_pages, align);
+			if (tmp) {
+				start_pfn = (tmp->start_pfn + align - 1) &
+						~(align - 1);
+				err = kbase_insert_va_region_nolock(kctx, reg,
+						tmp, start_pfn, nr_pages);
+				break;
+			}
+
+			/*
+			 * If the allocation is not from the same zone as JIT
+			 * then don't retry, we're out of VA and there is
+			 * nothing which can be done about it.
+			 */
+			if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+					KBASE_REG_ZONE_CUSTOM_VA)
+				break;
+		} while (kbase_jit_evict(kctx));
+
+		if (!tmp)
+			err = -ENOMEM;
+	}
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *exec_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* exec and custom_va_reg doesn't always exist */
+	if (exec_reg && custom_va_reg) {
+		kbase_region_tracker_insert(kctx, exec_reg);
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+	}
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(&(kctx->reg_rbtree));
+		if (rbnode) {
+			rb_erase(rbnode, &(kctx->reg_rbtree));
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kctx->is_compat)
+		same_va_bits = 32;
+	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+#endif
+
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
+		err = -EINVAL;
+		goto fail_unlock;
+	}
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have exec and custom VA zones */
+	if (kctx->is_compat) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_exec;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_exec:
+	kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
+{
+#ifdef CONFIG_64BIT
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits;
+	u64 total_va_size;
+	int err;
+
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	if (kctx->is_compat)
+		return 0;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(kctx,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(kctx, custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	return kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL);
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_term(&kbdev->mem_pool);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(reg->kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT, true);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					page_to_phys(kctx->aliasing_sink_page),
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping_of_region(const struct kbase_va_region *reg, unsigned long uaddr, size_t size)
+{
+	struct kbase_cpu_mapping *map;
+	struct list_head *pos;
+
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	list_for_each(pos, &reg->cpu_alloc->mappings) {
+		map = list_entry(pos, struct kbase_cpu_mapping, mappings_list);
+		if (map->vm_start <= uaddr && map->vm_end >= uaddr + size)
+			return map;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_of_region);
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+	struct kbase_context *kctx, u64 gpu_addr,
+	unsigned long uaddr, size_t size, u64 * offset)
+{
+	struct kbase_cpu_mapping *map = NULL;
+	const struct kbase_va_region *reg;
+	int err = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (reg && !(reg->flags & KBASE_REG_FREE)) {
+		map = kbasep_find_enclosing_cpu_mapping_of_region(reg, uaddr,
+				size);
+		if (map) {
+			*offset = (uaddr - PTR_TO_U64(map->vm_start)) +
+						 (map->page_off << PAGE_SHIFT);
+			err = 0;
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		phys_addr_t cpu_pa, phys_addr_t gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct base_syncset *set, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct basep_syncset *sset = &set->basep_sset;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	phys_addr_t *cpu_pa;
+	phys_addr_t *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	off_t offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	offset = start & (PAGE_SIZE - 1);
+	page_off = map->page_off + ((start - map->vm_start) >> PAGE_SHIFT);
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	/* Sync first page */
+	if (cpu_pa[page_off]) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!cpu_pa[page_off + i])
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 && cpu_pa[page_off + page_count - 1]) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset)
+{
+	int err = -EINVAL;
+	struct basep_syncset *sset;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != syncset);
+
+	sset = &syncset->basep_sset;
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+		/* Wait for GPU to flush write buffer before freeing physical pages */
+		kbase_wait_write_flush(kctx);
+	}
+#endif
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+void kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (flags & BASE_MEM_COHERENT_SYSTEM ||
+			flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	else if (flags & BASE_MEM_COHERENT_LOCAL)
+		reg->flags |= KBASE_REG_SHARE_IN;
+
+	/* Set up default MEMATTR usage */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_update_region_flags);
+
+int kbase_alloc_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+
+	if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool,
+			nr_pages_requested, alloc->pages + alloc->nents) != 0)
+		goto no_alloc;
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)alloc->imported.kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+no_alloc:
+	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	return -ENOMEM;
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	phys_addr_t *start_free;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	kbase_mem_pool_free_pages(&kctx->mem_pool,
+				  nr_pages_to_free,
+				  start_free,
+				  syncback,
+				  reclaimed);
+
+	alloc->nents -= nr_pages_to_free;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, nr_pages_to_free);
+		new_page_count = kbase_atomic_sub_pages(nr_pages_to_free,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(nr_pages_to_free,
+				       &kctx->kbdev->memdev.used_pages);
+
+		kbase_tlstream_aux_pagesalloc(
+				(u32)kctx->id,
+				(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		WARN_ON(!alloc->imported.kctx);
+		/*
+		 * The physical allocation must have been removed from the
+		 * eviction list before trying to free it.
+		 */
+		WARN_ON(!list_empty(&alloc->evict_node));
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+ #ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ump_dd_release(alloc->imported.ump_handle);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		kfree(alloc->imported.user_buf.pages);
+		break;
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_add(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_lock);
+		if (list_empty(&kctx->jit_destroy_head))
+			reg = NULL;
+		else
+			reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		if (reg) {
+			list_del(&reg->jit_node);
+			mutex_unlock(&kctx->jit_lock);
+
+			kbase_gpu_vm_lock(kctx);
+			kbase_mem_free_region(kctx, reg);
+			kbase_gpu_vm_unlock(kctx);
+		} else
+			mutex_unlock(&kctx->jit_lock);
+	} while (reg);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	mutex_init(&kctx->jit_lock);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	return 0;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+	struct kbase_va_region *walker;
+	struct kbase_va_region *temp;
+	size_t current_diff = SIZE_MAX;
+
+	int ret;
+
+	mutex_lock(&kctx->jit_lock);
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+
+		if (walker->nr_pages >= info->va_pages) {
+			size_t min_size, max_size, diff;
+
+			/*
+			 * The JIT allocations VA requirements have been
+			 * meet, it's suitable but other allocations
+			 * might be a better fit.
+			 */
+			min_size = min_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			max_size = max_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			diff = max_size - min_size;
+
+			if (current_diff > diff) {
+				current_diff = diff;
+				reg = walker;
+			}
+
+			/* The allocation is an exact match, stop looking */
+			if (current_diff == 0)
+				break;
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_del_init(&reg->jit_node);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+
+		/* Release the jit lock before modifying the allocation */
+		mutex_unlock(&kctx->jit_lock);
+
+		kbase_gpu_vm_lock(kctx);
+
+		/* Make the physical backing no longer reclaimable */
+		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+			goto update_failed;
+
+		/* Grow the backing if required */
+		if (reg->gpu_alloc->nents < info->commit_pages) {
+			size_t delta;
+			size_t old_size = reg->gpu_alloc->nents;
+
+			/* Allocate some more pages */
+			delta = info->commit_pages - reg->gpu_alloc->nents;
+			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
+					!= 0)
+				goto update_failed;
+
+			if (reg->cpu_alloc != reg->gpu_alloc) {
+				if (kbase_alloc_phy_pages_helper(
+						reg->cpu_alloc, delta) != 0) {
+					kbase_free_phy_pages_helper(
+							reg->gpu_alloc, delta);
+					goto update_failed;
+				}
+			}
+
+			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
+					info->commit_pages, old_size);
+			/*
+			 * The grow failed so put the allocation back in the
+			 * pool and return failure.
+			 */
+			if (ret)
+				goto update_failed;
+		}
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF;
+		u64 gpu_addr;
+		u16 alignment;
+
+		mutex_unlock(&kctx->jit_lock);
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr, &alignment);
+		if (!reg)
+			goto out_unlocked;
+
+		mutex_lock(&kctx->jit_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_lock);
+	}
+
+	return reg;
+
+update_failed:
+	/*
+	 * An update to an allocation from the pool failed, chances
+	 * are slim a new allocation would fair any better so return
+	 * the allocation to the pool and return the function with failure.
+	 */
+	kbase_gpu_vm_unlock(kctx);
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	/* The physical backing of memory in the pool is always reclaimable */
+	down_read(&kctx->process_mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+	kbase_mem_evictable_make(reg->gpu_alloc);
+	kbase_gpu_vm_unlock(kctx);
+	up_read(&kctx->process_mm->mmap_sem);
+
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = reg->kctx;
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_destroy_head);
+	mutex_unlock(&kctx->jit_lock);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	if (reg)
+		kbase_mem_free_region(kctx, reg);
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+
+	kbase_gpu_vm_lock(kctx);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		kbase_mem_free_region(kctx, walker);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		kbase_mem_free_region(kctx, walker);
+	}
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	phys_addr_t *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct task_struct *owner;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	owner = alloc->imported.user_buf.owner;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+	pinned_pages = get_user_pages(owner, owner->mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = page_to_phys(pages[i]);
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+			kbase_reg_current_backed_size(reg),
+			reg->flags);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	phys_addr_t *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		int j;
+		size_t pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = count;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			kbase_reg_current_backed_size(reg),
+			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+	if (err) {
+		dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+				alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+		alloc->imported.umm.sgt = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \
+		|| defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res,
+		struct kds_resource **kds_resources, u32 *kds_res_count,
+		unsigned long *kds_access_bitmap, bool exclusive)
+{
+	u32 i;
+
+	for (i = 0; i < *kds_res_count; i++) {
+		/* Duplicate resource, ignore */
+		if (kds_resources[i] == kds_res)
+			return;
+	}
+
+	kds_resources[*kds_res_count] = kds_res;
+	if (exclusive)
+		set_bit(*kds_res_count, kds_access_bitmap);
+	(*kds_res_count)++;
+}
+#endif
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		if (reg->gpu_alloc->imported.user_buf.owner->mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+	case BASE_MEM_IMPORT_TYPE_UMP: {
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = ump_dd_kds_resource_get(
+					reg->gpu_alloc->imported.ump_handle);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif				/*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+		break;
+	}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = get_dma_buf_kds_resource(
+					reg->gpu_alloc->imported.umm.dma_buf);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context externel resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL
+#ifdef CONFIG_KDS
+				, NULL, NULL,
+				NULL, false
+#endif
+				);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+		meta->refcount = 1;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	} else {
+		if (meta->refcount == UINT_MAX)
+			goto failed;
+
+		meta->refcount++;
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr, bool force)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context externel resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	meta->refcount--;
+	if ((meta->refcount == 0) || force) {
+		/*
+		 * Last reference to the metadata, drop the physical memory
+		 * reference and free the metadata.
+		 */
+		struct kbase_va_region *reg;
+
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx,
+				meta->gpu_addr);
+
+		kbase_unmap_external_resource(kctx, reg, meta->alloc);
+		list_del(&meta->ext_res_node);
+		kfree(meta);
+	}
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0, true);
+	}
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100755
index 0000000..8f7629a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1011 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	pgoff_t  page_off;
+	int      count;
+	unsigned long vm_start;
+	unsigned long vm_end;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMP,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	phys_addr_t           *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	/* member in union valid based on @a type */
+	union {
+#ifdef CONFIG_UMP
+		ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+		struct {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			unsigned int current_mapping_usage_count;
+			struct task_struct *owner;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* VA managed by us */
+#define KBASE_REG_CUSTOM_VA         (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Aligned for GPU EX in SAME_VA */
+#define KBASE_REG_ALIGNED           (1ul<<15)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+/*
+ * Dedicated 16MB region for shader code:
+ * VA range 0x101000000-0x102000000
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_EXEC_BASE    (0x101000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* non-NULL if this memory object is a kds_resource */
+	struct kds_resource *kds_res;
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+};
+
+/* Common functions */
+static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+	reg->cpu_alloc->imported.kctx = kctx;
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		reg->gpu_alloc->imported.kctx = kctx;
+		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	INIT_LIST_HEAD(&reg->jit_node);
+	reg->flags &= ~KBASE_REG_FREE;
+	return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @max_size:  Maximum number of free pages the pool can hold
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Finally, allocate a page from the kernel.
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return ACCESS_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above @max_size.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ *
+ * Return: The new size of the pool
+ */
+size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+void kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - set the MMU in unmapped mode for an address space.
+ *
+ * @kbdev:	Kbase device
+ * @as_nr:	Number of the address space for which the MMU
+ *		should be set in unmapped mode.
+ *
+ * The caller must hold kbdev->as[as_nr].transaction_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset);
+void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa,
+		phys_addr_t gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * @brief Find the offset of the CPU mapping of a memory allocation containing
+ *        a given address range
+ *
+ * Searches for a CPU mapping of any part of the region starting at @p gpu_addr
+ * that fully encloses the CPU virtual address range specified by @p uaddr and
+ * @p size. Returns a failure indication if only part of the address range lies
+ * within a CPU mapping, or the address range lies within a CPU mapping of a
+ * different region.
+ *
+ * @param[in,out] kctx      The kernel base context used for the allocation.
+ * @param[in]     gpu_addr  GPU address of the start of the allocated region
+ *                          within which to search.
+ * @param[in]     uaddr     Start of the CPU virtual address range.
+ * @param[in]     size      Size of the CPU virtual address range (in bytes).
+ * @param[out]    offset    The offset from the start of the allocation to the
+ *                          specified CPU virtual address.
+ *
+ * @return 0 if offset was obtained successfully. Error code
+ *         otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context *kctx,
+							u64 gpu_addr,
+							unsigned long uaddr,
+							size_t size,
+							u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_jit_debugfs_add - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_add(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ * @kds_res_count:     The number of KDS resources.
+ * @kds_resources:     Array of KDS resources.
+ * @kds_access_bitmap: Access bitmap for KDS.
+ * @exclusive:         If the KDS resource requires exclusive access.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ * @force:    If the release is being forced.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * If force is true then the refcount in the metadata is ignored and the
+ * resource will be forced freed.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr, bool force);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+#endif				/* _KBASE_MEM_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100755
index 0000000..e2c4c3d
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,2690 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	#include <linux/dma-mapping.h>
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)  */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_time.h>
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+static const struct vm_operations_struct kbase_vm_ops;
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ *
+ * Note: Caller must be holding the processes mmap_sem lock.
+ */
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment)
+{
+	int zone;
+	int gpu_pc_bits;
+	int cpu_va_bits;
+	struct kbase_va_region *reg;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_alignment);
+
+	dev = kctx->kbdev->dev;
+	*va_alignment = 0; /* no alignment by default */
+	*gpu_va = 0; /* return 0 on failure */
+
+	gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	cpu_va_bits = BITS_PER_LONG;
+
+	if (0 == va_pages) {
+		dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+		goto bad_size;
+	}
+
+	if (va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+#if defined(CONFIG_64BIT)
+	if (kctx->is_compat)
+		cpu_va_bits = 32;
+#endif
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+	    (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+		goto bad_ex_size;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	kbase_update_region_flags(kctx, reg, *flags);
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & BASE_MEM_GROW_ON_GPF)
+		reg->extent = extent;
+	else
+		reg->extent = 0;
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/* See if we must align memory due to GPU PC bits vs CPU VA */
+		if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+		    (cpu_va_bits > gpu_pc_bits)) {
+			*va_alignment = gpu_pc_bits;
+			reg->flags |= KBASE_REG_ALIGNED;
+		}
+
+		/*
+		 * Pre-10.1 UKU userland calls mmap for us so return the
+		 * unaligned address and skip the map.
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1)) {
+			*gpu_va = (u64) cookie;
+			return reg;
+		}
+
+		/*
+		 * GPUCORE-2190:
+		 *
+		 * We still need to return alignment for old userspace.
+		 */
+		if (*va_alignment)
+			va_map += 3 * (1UL << *va_alignment);
+
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
+			goto no_mmap;
+		}
+
+		/*
+		 * If we had to allocate extra VA space to force the
+		 * alignment release it.
+		 */
+		if (*va_alignment) {
+			unsigned long alignment = 1UL << *va_alignment;
+			unsigned long align_mask = alignment - 1;
+			unsigned long addr;
+			unsigned long addr_end;
+			unsigned long aligned_addr;
+			unsigned long aligned_addr_end;
+
+			addr = cpu_addr;
+			addr_end = addr + va_map;
+
+			aligned_addr = (addr + align_mask) &
+					~((u64) align_mask);
+			aligned_addr_end = aligned_addr + va_size;
+
+			if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) {
+				/*
+				 * Can't end at 4GB boundary on some GPUs as
+				 * it will halt the shader.
+				 */
+				aligned_addr += 2 * alignment;
+				aligned_addr_end += 2 * alignment;
+			} else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) {
+				/*
+				 * Can't start at 4GB boundary on some GPUs as
+				 * it will halt the shader.
+				 */
+				aligned_addr += alignment;
+				aligned_addr_end += alignment;
+			}
+
+			/* anything to chop off at the start? */
+			if (addr != aligned_addr)
+				vm_munmap(addr, aligned_addr - addr);
+
+			/* anything at the end? */
+			if (addr_end != aligned_addr_end)
+				vm_munmap(aligned_addr_end,
+						addr_end - aligned_addr_end);
+
+			*gpu_va = (u64) aligned_addr;
+		} else
+			*gpu_va = (u64) cpu_addr;
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_mem:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	int __maybe_unused new_page_count;
+	int i;
+
+	for (i = 0; i < alloc->nents; i++) {
+		struct page *p = phys_to_page(alloc->pages[i]);
+
+		zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+	}
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+#endif
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	int __maybe_unused new_page_count;
+	int i;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 * then remove it from the reclaimable accounting. */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	for (i = 0; i < alloc->nents; i++) {
+		struct page *p = phys_to_page(alloc->pages[i]);
+
+		zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+	}
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+#endif
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* This alloction can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	/*
+	 * Try to shrink the CPU mappings as required, if we fail then
+	 * fail the process of making this allocation evictable.
+	 */
+	err = kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+	if (err)
+		return -EINVAL;
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	mutex_lock(&kctx->evict_lock);
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	mutex_lock(&kctx->evict_lock);
+	list_del_init(&gpu_alloc->evict_node);
+	mutex_unlock(&kctx->evict_lock);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+	switch (reg->gpu_alloc->type) {
+#ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	/* roll back on error, i.e. not UMP */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	ump_dd_handle umph;
+	u64 block_count;
+	const ump_dd_physical_block_64 *block_array;
+	u64 i, j;
+	int page = 0;
+	ump_alloc_flags ump_flags;
+	ump_alloc_flags cpu_flags;
+	ump_alloc_flags gpu_flags;
+
+	if (*flags & BASE_MEM_SECURE)
+		goto bad_flags;
+
+	umph = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+		goto bad_id;
+
+	ump_flags = ump_dd_allocation_flags_get(umph);
+	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+			UMP_DEVICE_MASK;
+
+	*va_pages = ump_dd_size_get_64(umph);
+	*va_pages >>= PAGE_SHIFT;
+
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (*flags & BASE_MEM_SAME_VA)
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	else
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!reg)
+		goto no_region;
+
+	/* we've got pages to map now, and support SAME_VA */
+	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->gpu_alloc->imported.ump_handle = umph;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
+
+	/* Override import flags based on UMP flags */
+	*flags &= ~(BASE_MEM_CACHED_CPU);
+	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+		reg->flags |= KBASE_REG_CPU_CACHED;
+		*flags |= BASE_MEM_CACHED_CPU;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_WR) {
+		reg->flags |= KBASE_REG_CPU_WR;
+		*flags |= BASE_MEM_PROT_CPU_WR;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_RD) {
+		reg->flags |= KBASE_REG_CPU_RD;
+		*flags |= BASE_MEM_PROT_CPU_RD;
+	}
+
+	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+		reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (gpu_flags & UMP_PROT_DEVICE_WR) {
+		reg->flags |= KBASE_REG_GPU_WR;
+		*flags |= BASE_MEM_PROT_GPU_WR;
+	}
+
+	if (gpu_flags & UMP_PROT_DEVICE_RD) {
+		reg->flags |= KBASE_REG_GPU_RD;
+		*flags |= BASE_MEM_PROT_GPU_RD;
+	}
+
+	/* ump phys block query */
+	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+	for (i = 0; i < block_count; i++) {
+		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+			reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT);
+			page++;
+		}
+	}
+	reg->gpu_alloc->nents = *va_pages;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	ump_dd_release(umph);
+bad_id:
+bad_flags:
+	return NULL;
+}
+#endif				/* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	/* no read or write permission given on import, only on run do we give the right permissions */
+
+	reg->gpu_alloc->type = BASE_MEM_IMPORT_TYPE_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	}
+#endif
+	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages,
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	down_read(&current->mm->mmap_sem);
+
+	/* A sanity check that get_user_pages will work on the memory */
+	/* (so the initial import fails on weird memory regions rather than */
+	/* the job failing when we try to handle the external resources). */
+	/* It doesn't take a reference to the pages (because the page list is NULL). */
+	/* We can't really store the page list because that would involve */
+	/* keeping the pages pinned - instead we pin/unpin around the job */
+	/* (as part of the external resources handling code) */
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	reg->gpu_alloc->imported.user_buf.size = size;
+	reg->gpu_alloc->imported.user_buf.address = address;
+	reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages;
+	reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages,
+			sizeof(struct page *), GFP_KERNEL);
+	reg->gpu_alloc->imported.user_buf.owner = current;
+
+	if (!reg->gpu_alloc->imported.user_buf.pages)
+		goto no_page_array;
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_page_array:
+fault_mismatch:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	kbase_update_region_flags(kctx, reg, *flags);
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat)
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_UMP
+	case BASE_MEM_IMPORT_TYPE_UMP: {
+		ump_secure_id id;
+
+		if (get_user(id, (ump_secure_id __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_ump(kctx, id, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				uptr = compat_ptr(user_buffer.ptr.compat_value);
+			else
+#endif
+				uptr = user_buffer.ptr.value;
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+
+static int zap_range_nolock(struct mm_struct *mm,
+		const struct vm_operations_struct *vm_ops,
+		unsigned long start, unsigned long end)
+{
+	struct vm_area_struct *vma;
+	int err = -EINVAL; /* in case end < start */
+
+	while (start < end) {
+		unsigned long local_start;
+		unsigned long local_end;
+
+		vma = find_vma_intersection(mm, start, end);
+		if (!vma)
+			break;
+
+		/* is it ours? */
+		if (vma->vm_ops != vm_ops)
+			goto try_next;
+
+		local_start = vma->vm_start;
+
+		if (start > local_start)
+			local_start = start;
+
+		local_end = vma->vm_end;
+
+		if (end < local_end)
+			local_end = end;
+
+		err = zap_vma_ptes(vma, local_start, local_end - local_start);
+		if (unlikely(err))
+			break;
+
+try_next:
+		/* go to next vma, if any */
+		start = vma->vm_end;
+	}
+
+	return err;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	phys_addr_t *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags);
+
+	return ret;
+}
+
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	struct kbase_mem_phy_alloc *cpu_alloc = reg->cpu_alloc;
+	struct kbase_cpu_mapping *mapping;
+	int err;
+
+	lockdep_assert_held(&kctx->process_mm->mmap_sem);
+
+	list_for_each_entry(mapping, &cpu_alloc->mappings, mappings_list) {
+		unsigned long mapping_size;
+
+		mapping_size = (mapping->vm_end - mapping->vm_start)
+				>> PAGE_SHIFT;
+
+		/* is this mapping affected ?*/
+		if ((mapping->page_off + mapping_size) > new_pages) {
+			unsigned long first_bad = 0;
+
+			if (new_pages > mapping->page_off)
+				first_bad = new_pages - mapping->page_off;
+
+			err = zap_range_nolock(current->mm,
+					&kbase_vm_ops,
+					mapping->vm_start +
+					(first_bad << PAGE_SHIFT),
+					mapping->vm_end);
+
+			WARN(err,
+			     "Failed to zap VA range (0x%lx - 0x%lx);\n",
+			     mapping->vm_start +
+			     (first_bad << PAGE_SHIFT),
+			     mapping->vm_end
+			     );
+
+			/* The zap failed, give up and exit */
+			if (err)
+				goto failed;
+		}
+	}
+
+	return 0;
+
+failed:
+	return err;
+}
+
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx,
+			reg->start_pfn + new_pages, delta);
+	if (ret)
+		return ret;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+		/*
+		* Wait for GPU to flush write buffer before freeing
+		* physical pages.
+		 */
+		kbase_wait_write_flush(kctx);
+	}
+#endif
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(failure_reason);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages > reg->nr_pages) {
+		/* Would overflow the VA region */
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & BASE_MEM_DONT_NEED) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		res = kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->region) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	/* we don't use vmf->pgoff as it's affected by our mmap with
+	 * offset being a GPU VA or a cookie */
+	rel_pgoff = ((unsigned long)vmf->virtual_address - map->vm_start)
+			>> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (map->page_off + rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	for (i = rel_pgoff;
+	     i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
+	     map->alloc->nents - map->page_off); i++) {
+		int ret = vm_insert_pfn(vma, map->vm_start + (i << PAGE_SHIFT),
+		    PFN_DOWN(map->alloc->pages[map->page_off + i]));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	u64 start_off = vma->vm_pgoff - reg->start_pfn;
+	phys_addr_t *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		unsigned long addr = vma->vm_start + aligned_offset;
+
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			unsigned long pfn = PFN_DOWN(page_array[i + start_off]);
+
+			err = vm_insert_pfn(vma, addr, pfn);
+			if (WARN_ON(err))
+				break;
+
+			addr += PAGE_SIZE;
+		}
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->page_off = start_off;
+	map->region = free_on_close ? reg : NULL;
+	map->kctx = reg->kctx;
+	map->vm_start = vma->vm_start + aligned_offset;
+	if (aligned_offset) {
+		KBASE_DEBUG_ASSERT(!start_off);
+		map->vm_end = map->vm_start + (reg->nr_pages << PAGE_SHIFT);
+	} else {
+		map->vm_end = vma->vm_end;
+	}
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kbase_device_trace_buffer_install(kctx, tb, size);
+		if (err) {
+			vfree(tb);
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->cpu_alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+#if defined(CONFIG_DMA_SHARED_BUFFER) && defined(CONFIG_MALI_TRACE_TIMELINE)
+/* This section is required only for instrumentation. */
+
+static void kbase_dma_buf_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* Non-atomic as we're under Linux's mm lock. */
+	map->count++;
+}
+
+static void kbase_dma_buf_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* Non-atomic as we're under Linux's mm lock. */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+
+	kbase_gpu_vm_lock(map->kctx);
+	list_del(&map->mappings_list);
+	kbase_gpu_vm_unlock(map->kctx);
+	kfree(map);
+}
+
+static const struct vm_operations_struct kbase_dma_mmap_ops = {
+	.open  = kbase_dma_buf_vm_open,
+	.close = kbase_dma_buf_vm_close,
+};
+#endif /* CONFIG_DMA_SHARED_BUFFER && CONFIG_MALI_TRACE_TIMELINE */
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg;
+	void *kaddr = NULL;
+	size_t nr_pages;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+	nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	/* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+	vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		/* SAME_VA stuff, fetch the right region */
+		int gpu_pc_bits;
+		int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+		gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		if (reg->flags & KBASE_REG_ALIGNED) {
+			/* nr_pages must be able to hold alignment pages
+			 * plus actual pages */
+			unsigned long align = 1ULL << gpu_pc_bits;
+			unsigned long extra_pages = 3 * PFN_DOWN(align);
+			unsigned long aligned_addr;
+			unsigned long aligned_addr_end;
+			unsigned long nr_bytes = reg->nr_pages << PAGE_SHIFT;
+
+			if (kctx->api_version < KBASE_API_VERSION(8, 5))
+				/* Maintain compatibility with old userspace */
+				extra_pages = PFN_DOWN(align);
+
+			if (nr_pages != reg->nr_pages + extra_pages) {
+				/* incorrect mmap size */
+				/* leave the cookie for a potential
+				 * later mapping, or to be reclaimed
+				 * later when the context is freed */
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			aligned_addr = ALIGN(vma->vm_start, align);
+			aligned_addr_end = aligned_addr + nr_bytes;
+
+			if (kctx->api_version >= KBASE_API_VERSION(8, 5)) {
+				if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) {
+					/* Can't end at 4GB boundary */
+					aligned_addr += 2 * align;
+				} else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) {
+					/* Can't start at 4GB boundary */
+					aligned_addr += align;
+				}
+			}
+
+			aligned_offset = aligned_addr - vma->vm_start;
+		} else if (reg->nr_pages != nr_pages) {
+			/* incorrect mmap size */
+			/* leave the cookie for a potential later
+			 * mapping, or to be reclaimed later when the
+			 * context is freed */
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		if ((vma->vm_flags & VM_READ &&
+					!(reg->flags & KBASE_REG_CPU_RD)) ||
+				(vma->vm_flags & VM_WRITE &&
+				 !(reg->flags & KBASE_REG_CPU_WR))) {
+			/* VM flags inconsistent with region flags */
+			err = -EPERM;
+			dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+			goto out_unlock;
+		}
+
+		/* adjust down nr_pages to what we have physically */
+		nr_pages = kbase_reg_current_backed_size(reg);
+
+		if (kbase_gpu_mmap(kctx, reg,
+					vma->vm_start + aligned_offset,
+					reg->nr_pages, 1) != 0) {
+			dev_err(dev, "%s:%d\n", __FILE__, __LINE__);
+			/* Unable to map in GPU space. */
+			WARN_ON(1);
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		/* no need for the cookie anymore */
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		/*
+		 * Overwrite the offset with the
+		 * region start_pfn, so we effectively
+		 * map from offset 0 in the region.
+		 */
+		vma->vm_pgoff = reg->start_pfn;
+
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn)))
+				goto overflow;
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (reg->cpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM)
+				goto dma_map;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+
+			goto map;
+		}
+
+overflow:
+		err = -ENOMEM;
+		goto out_unlock;
+	} /* default */
+	} /* switch */
+map:
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+	goto out_unlock;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+dma_map:
+	err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn);
+#if defined(CONFIG_MALI_TRACE_TIMELINE)
+	/* This section is required only for instrumentation. */
+	/* Add created mapping to imported region mapping list.
+	 * It is important to make it visible to dumping infrastructure.
+	 * Add mapping only if vm_ops structure is not used by memory owner. */
+	WARN_ON(vma->vm_ops);
+	WARN_ON(vma->vm_private_data);
+	if (!err && !vma->vm_ops && !vma->vm_private_data) {
+		struct kbase_cpu_mapping *map = kzalloc(
+			sizeof(*map),
+			GFP_KERNEL);
+
+		if (map) {
+			map->kctx     = reg->kctx;
+			map->region   = NULL;
+			map->page_off = vma->vm_pgoff;
+			map->vm_start = vma->vm_start;
+			map->vm_end   = vma->vm_end;
+			map->count    = 1; /* start with one ref */
+
+			vma->vm_ops          = &kbase_dma_mmap_ops;
+			vma->vm_private_data = map;
+
+			list_add(
+				&map->mappings_list,
+				&reg->cpu_alloc->mappings);
+		}
+	}
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	phys_addr_t *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+	bool sync_needed;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
+	sync_needed = map->is_cached;
+
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+
+	if (sync_needed) {
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_CPU);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_CPU);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_CPU);
+		}
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	bool sync_needed = map->is_cached;
+	vunmap(addr);
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+	if (sync_needed) {
+		off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
+		size_t size = map->size;
+		size_t page_count = PFN_UP(offset + size);
+		size_t i;
+
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_DEVICE);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_DEVICE);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_DEVICE);
+		}
+	}
+	map->gpu_addr = 0;
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->is_cached = false;
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int i;
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	phys_addr_t *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))&&(LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+							DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	kbase_update_region_flags(kctx, reg, flags);
+
+	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = dma_pa + (i << PAGE_SHIFT);
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+	kfree(reg);
+no_reg:
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))&&(LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa,
+			DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100755
index 0000000..6c0fb56
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment);
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason);
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	u64 gpu_addr;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	phys_addr_t *cpu_pages;
+	phys_addr_t *gpu_pages;
+	void *addr;
+	size_t size;
+	bool is_cached;
+};
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100755
index 0000000..441180b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+#endif				/* _KBASE_LOWLEVEL_H */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100755
index 0000000..c0f47be
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,599 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+/* This function is only provided for backwards compatibility with kernels
+ * which use the old carveout allocator.
+ *
+ * The forward declaration is to keep sparse happy.
+ */
+int __init kbase_carveout_mem_reserve(
+		phys_addr_t size);
+int __init kbase_carveout_mem_reserve(phys_addr_t size)
+{
+	return 0;
+}
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_for_each_entry(p, page_list, lru) {
+		zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+	}
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			PAGE_SIZE, DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	clear_highpage(p);
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+static struct page *kbase_mem_pool_alloc_page(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	if (current->flags & PF_KTHREAD) {
+		/* Don't trigger OOM killer from kernel threads, e.g. when
+		 * growing memory on GPU page fault */
+		gfp |= __GFP_NORETRY;
+	}
+
+	p = alloc_page(gfp);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		__free_page(p);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+
+	kbase_set_dma_addr(p, dma_addr);
+
+	pool_dbg(pool, "alloced page from kernel\n");
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+
+	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	kbase_clear_dma_addr(p);
+	__free_page(p);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) {
+		p = kbase_mem_pool_alloc_page(pool);
+		kbase_mem_pool_add(pool, p);
+	}
+
+	return i;
+}
+
+size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		kbase_mem_pool_grow(pool, new_size - cur_size);
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	return cur_size;
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+	pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool));
+	return kbase_mem_pool_size(pool);
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	pool->cur_size = 0;
+	pool->max_size = max_size;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			kbase_mem_pool_zero_page(pool, p);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	pool_dbg(pool, "alloc()\n");
+
+	p = kbase_mem_pool_remove(pool);
+
+	if (!p && pool->next_pool) {
+		/* Allocate via next pool */
+		return kbase_mem_pool_alloc(pool->next_pool);
+	}
+
+	if (!p) {
+		/* Get page from kernel */
+		p = kbase_mem_pool_alloc_page(pool);
+	}
+
+	return p;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i;
+	int err = -ENOMEM;
+
+	pool_dbg(pool, "alloc_pages(%zu):\n", nr_pages);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages, kbase_mem_pool_size(pool));
+	for (i = 0; i < nr_from_pool; i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		pages[i] = page_to_phys(p);
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_pages - i, pages + i);
+
+		if (err)
+			goto err_rollback;
+
+		i += nr_pages - i;
+	}
+
+	/* Get any remaining pages from kernel */
+	for (; i < nr_pages; i++) {
+		p = kbase_mem_pool_alloc_page(pool);
+		if (!p)
+			goto err_rollback;
+		pages[i] = page_to_phys(p);
+	}
+
+	pool_dbg(pool, "alloc_pages(%zu) done\n", nr_pages);
+
+	return 0;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+		size_t nr_pages, phys_addr_t *pages, bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+
+		if (zero)
+			kbase_mem_pool_zero_page(pool, p);
+		else if (sync)
+			kbase_mem_pool_sync_page(pool, p);
+
+		list_add(&p->lru, &new_page_list);
+		nr_to_pool++;
+		pages[i] = 0;
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+		if (reclaimed)
+			zone_page_state_add(-1, page_zone(p),
+					NR_SLAB_RECLAIMABLE);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = 0;
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100755
index 0000000..493665b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <mali_kbase_mem_pool_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_trim(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
+		kbase_mem_pool_debugfs_size_get,
+		kbase_mem_pool_debugfs_size_set,
+		"%llu\n");
+
+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_max_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_set_max_size(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
+		kbase_mem_pool_debugfs_max_size_get,
+		kbase_mem_pool_debugfs_max_size_set,
+		"%llu\n");
+
+void kbase_mem_pool_debugfs_add(struct dentry *parent,
+		struct kbase_mem_pool *pool)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100755
index 0000000..458f3f0
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H
+#define _KBASE_MEM_POOL_DEBUGFS_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_add - add debugfs knobs for @pool
+ * @parent: Parent debugfs dentry
+ * @pool:   Memory pool to control
+ *
+ * Adds two debugfs files under @parent:
+ * - mem_pool_size: get/set the current size of @pool
+ * - mem_pool_max_size: get/set the max size of @pool
+ */
+void kbase_mem_pool_debugfs_add(struct dentry *parent,
+		struct kbase_mem_pool *pool);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100755
index 0000000..0b19d05
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,118 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_gpu_memory_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kctx->mem_profile_initialized);
+
+	if (!kctx->mem_profile_initialized) {
+		if (!debugfs_create_file("mem_profile", S_IRUGO,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kctx->mem_profile_initialized = true;
+		}
+	}
+
+	if (kctx->mem_profile_initialized) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+				err, kctx->mem_profile_initialized);
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kctx->mem_profile_initialized);
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100755
index 0000000..9555197
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100755
index 0000000..82f0702
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100755
index 0000000..bf45d39
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,1813 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+static size_t make_multiple(size_t minimum, size_t multiple)
+{
+	size_t remainder = minimum % multiple;
+
+	if (remainder == 0)
+		return minimum;
+
+	return minimum + multiple - remainder;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Tranlation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		goto fault_done;
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = make_multiple(fault_rel_pfn -
+			kbase_reg_current_backed_size(region) + 1,
+			region->extent);
+
+	/* cap to max vsize */
+	if (new_pages + kbase_reg_current_backed_size(region) >
+			region->nr_pages)
+		new_pages = region->nr_pages -
+				kbase_reg_current_backed_size(region);
+
+	if (0 == new_pages) {
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
+		if (region->gpu_alloc != region->cpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					region->cpu_alloc, new_pages) == 0) {
+				grown = true;
+			} else {
+				kbase_free_phy_pages_helper(region->gpu_alloc,
+						new_pages);
+			}
+		} else {
+			grown = true;
+		}
+	}
+
+
+	if (grown) {
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+		kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&faulting_as->transaction_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&faulting_as->transaction_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* failed to extend, handle as a normal PF */
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Page allocation failure");
+	}
+
+fault_done:
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	p = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!p)
+		goto sub_pages;
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1 */
+static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return 0;
+	}
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return 0;
+		}
+
+		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Handle failure condition */
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
+			return 0;
+		}
+	}
+
+	return pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+	/* As we are recovering from what has already been set up, we should have a target_pgd */
+	KBASE_DEBUG_ASSERT(0 != target_pgd);
+	kunmap_atomic(page);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Should never fail */
+		KBASE_DEBUG_ASSERT(0 != pgd);
+	}
+
+	return pgd;
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
+					      size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
+		KBASE_DEBUG_ASSERT(0 != pgd);
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+
+		pgd_page = kmap_atomic(p);
+		KBASE_DEBUG_ASSERT(NULL != pgd_page);
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+		kunmap_atomic(pgd_page);
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys, flags);
+		}
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_pages only partially completes we need to be able
+	 * to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys[i], flags);
+		}
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table. If further pages
+		 * need inserting and fail we need to undo what has already
+		 * taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * This function is responsible for validating the MMU PTs
+ * triggering reguired flushes.
+ *
+ * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+
+	/* We must flush if we're currently running jobs. At the very least, we need to retain the
+	 * context to ensure it doesn't schedule out whilst we're trying to flush it */
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		/* Second level check is to try to only do this when jobs are running. The refcount is
+		 * a heuristic for this. */
+		if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) {
+			if (!kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+				int ret;
+				u32 op;
+
+				/* AS transaction begin */
+				mutex_lock(&kbdev->as[
+						kctx->as_nr].transaction_mutex);
+
+				if (kbase_hw_has_issue(kbdev,
+						BASE_HW_ISSUE_6367))
+					op = AS_COMMAND_FLUSH;
+				else
+					op = AS_COMMAND_FLUSH_MEM;
+
+				ret = kbase_mmu_hw_do_operation(kbdev,
+							&kbdev->as[kctx->as_nr],
+							kctx, vpfn, nr,
+							op, 0);
+#if KBASE_GPU_RESET_EN
+				if (ret) {
+					/* Flush failed to complete, assume the
+					 * GPU has hung and perform a reset to
+					 * recover */
+					dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+					if (kbase_prepare_to_reset_gpu(kbdev))
+						kbase_reset_gpu(kbdev);
+				}
+#endif /* KBASE_GPU_RESET_EN */
+
+				mutex_unlock(&kbdev->as[
+						kctx->as_nr].transaction_mutex);
+				/* AS transaction end */
+
+				kbase_pm_context_idle(kbdev);
+			}
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_device *kbdev;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	kbdev = kctx->kbdev;
+	mmu_mode = kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush(kctx, vpfn, requested_nr);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
+			vpfn, phys, nr);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+					flags);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush(kctx, vpfn, requested_nr);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/* This is a debug feature only */
+static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
+{
+	u64 *page;
+	int i;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		if (kctx->kbdev->mmu_mode->ate_is_valid(page[i]))
+			beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
+	}
+	kunmap_atomic(page);
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) {
+				mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
+			} else {
+				/*
+				 * So target_pte is a level-3 page.
+				 * As a leaf, it is safe to free it.
+				 * Unless we have live pages attached to it!
+				 */
+				mmu_check_unused(kctx, target_pgd);
+			}
+
+			beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
+			if (zap) {
+				struct page *p = phys_to_page(target_pgd);
+
+				kbase_mem_pool_free(&kctx->mem_pool, p, true);
+				kbase_process_page_usage_dec(kctx, 1);
+				kbase_atomic_sub_pages(1, &kctx->used_pages);
+				kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+			}
+		}
+	}
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	mutex_init(&kctx->mmu_lock);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	mutex_lock(&kctx->mmu_lock);
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+	mutex_unlock(&kctx->mmu_lock);
+
+	beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
+	kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
+	kbase_process_page_usage_dec(kctx, 1);
+	new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i])) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t size;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+		}
+
+
+
+		size = kbasep_mmu_dump_level(kctx,
+				kctx->pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!size)
+			goto fail_free;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+		/* Add on the size for the config */
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4))
+			size += sizeof(config);
+
+
+		if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->as[as_no].transaction_mutex);
+
+		/* Set the MMU into unmapped mode */
+		kbase_mmu_disable_as(kbdev, as_no);
+
+		mutex_unlock(&kbdev->as[as_no].transaction_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "ACCESS_FLAG";
+		break;
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		e = "ADDRESS_SIZE_FAULT";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		e = "MEMORY_ATTRIBUTES_FAULT";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		return "ATOMIC";
+#else
+		return "UNKNOWN";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&as->transaction_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	kbase_mmu_disable_as(kbdev, as_no);
+
+	mutex_unlock(&as->transaction_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&as->transaction_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&as->transaction_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the runpool_irq lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+#if KBASE_GPU_RESET_EN
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+#endif /* KBASE_GPU_RESET_EN */
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		dev_warn(kbdev->dev,
+				"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+				as->number, as->fault_addr,
+				as->fault_extra_addr);
+#else
+		dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+				as->number, as->fault_addr);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
+		WARN_ON(work_pending(&as->work_busfault));
+		queue_work(as->pf_wq, &as->work_busfault);
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
+		WARN_ON(work_pending(&as->work_pagefault));
+		queue_work(as->pf_wq, &as->work_pagefault);
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100755
index 0000000..986e959
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
new file mode 100755
index 0000000..2449c60
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MALI_KBASE_MMU_MODE_
+#define _MALI_KBASE_MMU_MODE_
+
+#include <linux/types.h>
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_context *kctx);
+	void (*get_as_setup)(struct kbase_context *kctx,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate);
+	int (*pte_is_valid)(u64 pte);
+	void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+#endif /* _MALI_KBASE_MMU_MODE_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100755
index 0000000..683cabb
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated
+	 * memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#else
+	setup->transcfg = 0;
+#endif
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#endif
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+		get_mmu_flags(flags) |
+		ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100755
index 0000000..5bbd6d4
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,126 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+#ifdef CONFIG_MACH_MANTA
+#include <plat/devs.h>
+#endif
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_fake_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_fake_register);
+
+void kbase_platform_fake_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_fake_unregister);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100755
index 0000000..261441f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_instr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1)
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Suspend any counter collection that might be happening */
+	kbase_instr_hwcnt_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Re-enable instrumentation, if it was previously disabled */
+	kbase_instr_hwcnt_resume(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100755
index 0000000..37fa247
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100755
index 0000000..7fb674e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100755
index 0000000..6ac49df
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1135 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct fragment_job {
+	struct job_descriptor_header header;
+
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_descriptor_header *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next_job._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next_job._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+	job = &frag_job->header;
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->exception_status = JOB_NOT_STARTED;
+
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
+
+	if (job->job_type == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->job_barrier = 1;
+
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
+
+	job->job_index += hw_job_id_offset;
+
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
+	} else {
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
+		else
+			fbd_address = (u64)frag_job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
+
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
+		else
+			jc = job->next_job._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list.value = NULL;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
+			       ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
+			      ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		dev_err(kctx->kbdev->dev, "Invalid core requirements\n");
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom, NULL);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_descriptor_header *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+		dump_job_head(kctx, "\njob_head structure:\n", job);
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
+		else
+			job_header = job->next_job._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	/* Don't replay this atom if these issues are not present in the
+	 * hardware */
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) &&
+			!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) {
+		dev_dbg(kbdev->dev, "Hardware does not need replay workaround");
+
+		/* Signal failure to userspace */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+
+		return false;
+	}
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	if (jctx->sched_info.ctx.is_dying) {
+		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100755
index 0000000..43175c8
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100755
index 0000000..9bff3d2
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100755
index 0000000..f1dd011
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1178 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#include <linux/syscalls.h>
+#include "mali_kbase_sync.h"
+#endif
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+
+/* Mask to check cache alignment of data structures */
+#define KBASE_CACHE_ALIGNMENT_MASK		((1<<L1_CACHE_SHIFT)-1)
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static struct page *kbasep_translate_gpu_addr_to_kernel_page(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 pfn;
+	struct kbase_va_region *reg;
+	phys_addr_t addr = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	pfn = gpu_addr >> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto err_vm_unlock;
+	addr = reg->cpu_alloc->pages[pfn - reg->start_pfn];
+	kbase_gpu_vm_unlock(kctx);
+
+	if (!addr)
+		goto err;
+
+	return pfn_to_page(PFN_DOWN(addr));
+
+err_vm_unlock:
+	kbase_gpu_vm_unlock(kctx);
+err:
+	return NULL;
+}
+
+int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	struct page *pg = kbasep_translate_gpu_addr_to_kernel_page(
+			kctx, evt);
+	unsigned char *mapped_pg;
+	u32 offset = evt & ~PAGE_MASK;
+
+	KBASE_DEBUG_ASSERT(NULL != status);
+
+	if (!pg)
+		return -1;
+
+	mapped_pg = (unsigned char *)kmap_atomic(pg);
+	KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */
+	*status = *(mapped_pg + offset);
+	kunmap_atomic(mapped_pg);
+
+	return 0;
+}
+
+int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	struct page *pg = kbasep_translate_gpu_addr_to_kernel_page(
+			kctx, evt);
+	unsigned char *mapped_pg;
+	u32 offset = evt & ~PAGE_MASK;
+
+	KBASE_DEBUG_ASSERT((new_status == BASE_JD_SOFT_EVENT_SET) ||
+			   (new_status == BASE_JD_SOFT_EVENT_RESET));
+
+	if (!pg)
+		return -1;
+
+	mapped_pg = (unsigned char *)kmap_atomic(pg);
+	KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */
+	*(mapped_pg + offset) = new_status;
+	kunmap_atomic(mapped_pg);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_va_region *reg;
+	phys_addr_t addr = 0;
+	u64 pfn;
+	u32 offset;
+	char *page;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	pfn = jc >> PAGE_SHIFT;
+	offset = jc & ~PAGE_MASK;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (offset > 0x1000 - sizeof(data)) {
+		/* Wouldn't fit in the page */
+		return 0;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc);
+	if (reg &&
+	    (reg->flags & KBASE_REG_GPU_WR) &&
+	    reg->cpu_alloc && reg->cpu_alloc->pages)
+		addr = reg->cpu_alloc->pages[pfn - reg->start_pfn];
+
+	kbase_gpu_vm_unlock(kctx);
+	if (!addr)
+		return 0;
+
+	page = kmap(pfn_to_page(PFN_DOWN(addr)));
+	if (!page)
+		return 0;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
+			offset, sizeof(data),
+			DMA_BIDIRECTIONAL);
+
+	memcpy(page + offset, &data, sizeof(data));
+
+	kbase_sync_single_for_device(katom->kctx->kbdev,
+			kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
+			offset, sizeof(data),
+			DMA_BIDIRECTIONAL);
+
+	kunmap(pfn_to_page(PFN_DOWN(addr)));
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#ifdef CONFIG_SYNC
+
+/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited)
+ *
+ * @param katom     The atom to complete
+ */
+static void complete_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	list_del(&katom->dep_item[0]);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly) come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static void kbase_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+
+	complete_soft_job(katom);
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter, struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+
+	kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (fence->status < 0)
+#else
+	if (atomic_read(&fence->status) < 0)
+#endif
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding kctx->jctx.lock and
+	 * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work.
+	 */
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static int kbase_fence_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	KBASE_DEBUG_ASSERT(NULL != katom->kctx);
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signalled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+		INIT_WORK(&katom->work, kbase_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	kbasep_add_waiting_soft_job(katom);
+
+	return 1;
+}
+
+static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+#endif /* CONFIG_SYNC */
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, dep_item[0]);
+
+		if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+		    BASE_JD_REQ_SOFT_EVENT_WAIT) {
+			if (katom->jc == evt) {
+				list_del(&katom->dep_item[0]);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet */
+				cancel_timer = 0;
+			}
+		}
+	}
+
+	if (cancel_timer)
+		hrtimer_try_to_cancel(&kctx->soft_event_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer)
+{
+	struct kbase_context *kctx = container_of(timer, struct kbase_context,
+			soft_event_timeout);
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_event_timeout_ms);
+	ktime_t cur_time = ktime_get();
+	enum hrtimer_restart restarting = HRTIMER_NORESTART;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, dep_item[0]);
+
+		if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+		    BASE_JD_REQ_SOFT_EVENT_WAIT) {
+			s64 elapsed_time =
+				ktime_to_ms(ktime_sub(cur_time,
+						      katom->start_timestamp));
+			if (elapsed_time > (s64)timeout_ms) {
+				/* Take it out of the list to ensure that it
+				 * will be cancelled in all cases */
+				list_del(&katom->dep_item[0]);
+
+				katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				restarting = HRTIMER_RESTART;
+			}
+		}
+	}
+
+	if (restarting)
+		hrtimer_add_expires(timer, HR_TIMER_DELAY_MSEC(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	return restarting;
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	ktime_t remaining;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule cancellation of this atom after a period if it is
+	 * not active */
+	remaining = hrtimer_get_remaining(&kctx->soft_event_timeout);
+	if (remaining.tv64 <= 0) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_event_timeout_ms);
+		hrtimer_start(&kctx->soft_event_timeout,
+			      HR_TIMER_DELAY_MSEC((u64)timeout_ms),
+			      HRTIMER_MODE_REL);
+	}
+
+	return 1;
+}
+
+static void kbasep_soft_event_update(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+	u64 size;
+	struct page **pages;
+	int nr_pages;
+	u64 offset;
+};
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	for (i = 0; i < nr; i++) {
+		int p;
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+	}
+	kfree(buffers);
+
+	katom->jc = 0;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kmalloc_array(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		katom->jc = 0;
+		goto out_cleanup;
+	}
+	katom->jc = (u64)(uintptr_t)buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	if (copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr)) {
+		ret = -EINVAL;
+		goto out_cleanup;
+	}
+
+	down_read(&current->mm->mmap_sem);
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+
+		if (!user_buffers[i].address) {
+			memset(&buffers[i], 0,
+					sizeof(struct kbase_debug_copy_buffer));
+			continue;
+		}
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_unlock;
+		}
+
+		pinned_pages = get_user_pages(current, current->mm, page_addr,
+					nr_pages,
+					1, /* Write */
+					0, /* No force */
+					buffers[i].pages,
+					NULL);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_unlock;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+	}
+	up_read(&current->mm->mmap_sem);
+
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	up_read(&current->mm->mmap_sem);
+
+out_cleanup:
+	kfree(buffers);
+	kfree(user_buffers);
+
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+
+	return ret;
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		u64 offset = buffers[i].offset;
+		u64 buffer_space = buffers[i].size;
+		int p;
+
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+			void *kpage = kmap(pg);
+			u64 page_space = PAGE_SIZE-offset;
+			u64 space;
+
+			if (page_space <= buffer_space)
+				space = page_space;
+			else
+				space = buffer_space;
+
+			/* Temporary - GPUCORE-1843 covers the implementation
+			 * of the actual copying. */
+			memset(kpage+offset, 0x4B, space);
+
+			if (!PageReserved(pg))
+				SetPageDirty(pg);
+
+			kunmap(pg);
+			offset = 0;
+			buffer_space -= space;
+		}
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	struct kbase_context *kctx = katom->kctx;
+	int ret;
+
+	/* Fail the job if there is no info structure */
+	if (!data) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* If the ID is zero or is in use then fail the job */
+	if ((info->id == 0) || (kctx->jit_alloc[info->id])) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Set the jit_alloc to a non-zero value so we know the ID is in use */
+	kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1;
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & 0x7) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Replace the user pointer with our kernel allocated info structure */
+	katom->jc = (u64)(uintptr_t) info;
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(info);
+fail:
+	katom->jc = 0;
+	return ret;
+}
+
+static void kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+
+	/* Create a JIT allocation */
+	reg = kbase_jit_allocate(kctx, info);
+	if (!reg) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Write the address of the JIT allocation to the user provided
+	 * GPU allocation.
+	 */
+	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+			&mapping);
+	if (!ptr) {
+		/*
+		 * Leave the allocation "live" as the JIT free jit will be
+		 * submitted anyway.
+		 */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	*ptr = reg->start_pfn << PAGE_SHIFT;
+	kbase_vunmap(kctx, &mapping);
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	/*
+	 * Bind it to the user provided ID. Do this last so we can check for
+	 * the JIT free racing this JIT alloc job.
+	 */
+	kctx->jit_alloc[info->id] = reg;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 id = (u8) katom->jc;
+
+	/*
+	 * If the ID is zero or it is not in use yet then fail the job.
+	 */
+	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	/*
+	 * If the ID is valid but the allocation request failed still succeed
+	 * this soft job but don't try and free the allocation.
+	 */
+	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+		kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+	kctx->jit_alloc[id] = NULL;
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	/*
+	 * Replace the user pointer with our kernel allocated
+	 * ext_res structure.
+	 */
+	katom->jc = (u64)(uintptr_t) ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr, false))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (--i > 0) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr,
+				false);
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		KBASE_DEBUG_ASSERT(katom->fence != NULL);
+		katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		/* Release the reference as we don't need it any more */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		return kbase_fence_wait(katom);
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		return kbasep_soft_event_wait(katom);
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy(katom);
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_fence_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK))
+				return -EINVAL;
+		}
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_stream_create_fence(fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			katom->fence = sync_fence_fdget(fd);
+
+			if (katom->fence == NULL) {
+				/* The only way the fence can be NULL is if userspace closed it for us.
+				 * So we don't need to clear it up */
+				return -EINVAL;
+			}
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				katom->fence = NULL;
+				sys_close(fd);
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			katom->fence = sync_fence_fdget(fence.basep.fd);
+			if (katom->fence == NULL)
+				return -EINVAL;
+		}
+		break;
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_REPLAY:
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signalled, do it now */
+		if (katom->fence) {
+			kbase_fence_trigger(katom, katom->event_code ==
+					BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+			sync_fence_put(katom->fence);
+			katom->fence = NULL;
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release the reference to the fence object */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+#endif				/* CONFIG_SYNC */
+
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		list_del(&katom_iter->dep_item[0]);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		} else {
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASEP_JD_REQ_ATOM_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
new file mode 100755
index 0000000..c5fb981
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_SYNC
+
+#include <linux/seq_file.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signalled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signalled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signalled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signalled, 0);
+
+	return tl;
+}
+
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int signalled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signalled = atomic_read(&mtl->signalled);
+
+		diff = signalled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signalling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled);
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100755
index 0000000..6d8e34d
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,91 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include "sync.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatiblility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+/*
+ * Create a stream object.
+ * Built on top of timeline object.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ */
+int kbase_stream_create(const char *name, int *const out_fd);
+
+/*
+ * Create a fence in a stream object
+ */
+int kbase_stream_create_fence(int tl_fd);
+
+/*
+ * Validate a fd to be a valid fence
+ * No reference is taken.
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ */
+int kbase_fence_validate(int fd);
+
+/* Returns true if the specified timeline is allocated by Mali */
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline);
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name);
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ */
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent);
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ *
+ * If they are signalled in the wrong order then a message will be printed in debug
+ * builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as success.
+ */
+void kbase_sync_signal_pt(struct sync_pt *pt, int result);
+
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
new file mode 100755
index 0000000..ddd0847
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync_user.c
+ *
+ */
+
+#ifdef CONFIG_SYNC
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <mali_kbase_sync.h>
+#include <mali_base_kernel_sync.h>
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	BUG_ON(!tl);
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	BUG_ON(!out_fd);
+
+	tl = kbase_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int kbase_stream_create_fence(int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+ out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100755
index 0000000..e41efb8
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,2180 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+/*****************************************************************************/
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+
+	/* Job dump specific events. */
+	KBASE_JD_GPU_SOFT_RESET
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_ISSUE_JOB_SOFTSTOP,
+	KBASE_AUX_JOB_SOFTSTOP,
+	KBASE_AUX_JOB_SOFTSTOP_EX,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: counter tracking stream's autoflush state
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pII",
+		"ctx,ctx_nr,tgid"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_NEW_AS,
+		__stringify(KBASE_TL_NEW_AS),
+		"address space object is created",
+		"@pI",
+		"address_space,as_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"ctx"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_LIFELINK_AS_GPU,
+		__stringify(KBASE_TL_LIFELINK_AS_GPU),
+		"address space is deleted with gpu",
+		"@pp",
+		"address_space,gpu"
+	},
+	{
+		KBASE_TL_RET_CTX_LPU,
+		__stringify(KBASE_TL_RET_CTX_LPU),
+		"context is retained by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pps",
+		"atom,lpu,attrib_match_list"
+	},
+	{
+		KBASE_TL_NRET_CTX_LPU,
+		__stringify(KBASE_TL_NRET_CTX_LPU),
+		"context is released by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_RET_AS_CTX,
+		__stringify(KBASE_TL_RET_AS_CTX),
+		"address space is retained by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_NRET_AS_CTX,
+		__stringify(KBASE_TL_NRET_AS_CTX),
+		"address space is released by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_AS,
+		__stringify(KBASE_TL_RET_ATOM_AS),
+		"atom is retained by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_NRET_ATOM_AS,
+		__stringify(KBASE_TL_NRET_ATOM_AS),
+		"atom is released by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_DEP_ATOM_ATOM,
+		__stringify(KBASE_TL_DEP_ATOM_ATOM),
+		"atom2 depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
+		"atom job slot attributes",
+		"@pLLI",
+		"atom,descriptor,affinity,config"
+	},
+	{
+		KBASE_TL_ATTRIB_AS_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
+		"address space attributes",
+		"@pLLL",
+		"address_space,transtab,memattr,transcfg"
+	},
+	{
+		KBASE_JD_GPU_SOFT_RESET,
+		__stringify(KBASE_JD_GPU_SOFT_RESET),
+		"gpu soft reset",
+		"@p",
+		"gpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_ISSUE_JOB_SOFTSTOP,
+		__stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP),
+		"Issuing job soft stop",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_AUX_JOB_SOFTSTOP,
+		__stringify(KBASE_AUX_JOB_SOFTSTOP),
+		"Job soft stop",
+		"@I",
+		"tag_id"
+	},
+	{
+		KBASE_AUX_JOB_SOFTSTOP_EX,
+		__stringify(KBASE_AUX_JOB_SOFTSTOP_EX),
+		"Job soft stop, more details",
+		"@pI",
+		"atom,job_type"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@IL",
+		"ctx_nr,page_cnt_change"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@IL",
+		"ctx_nr,page_cnt"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags) __acquires(&stream->lock)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags) __releases(&stream->lock)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @data:  unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(data);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (wb_size > min_size) {
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(f_pos);
+	CSTD_UNUSED(filp);
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the rbi still points to the packet we just processed
+		 * then there was no overflow so we add the copied size to
+		 * copy_len and move rbi on to the next packet
+		 */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = SWTRACE_VERSION; /* protocol version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 0);
+	setup_timer(&autoflush_timer,
+			kbasep_tlstream_autoflush_timer_callback,
+			0);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd)
+{
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) {
+		int rcode;
+
+		*fd = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd) {
+			atomic_set(&kbase_tlstream_enabled, 0);
+			return *fd;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+	} else {
+		*fd = -EBUSY;
+	}
+
+	return 0;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+			strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t  msg_size =
+			sizeof(msg_id) + sizeof(u64) +
+			sizeof(atom) + sizeof(lpu) + msg_s0;
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_string(
+			buffer, pos, attrib_match_list, msg_s0);
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+		sizeof(jd) + sizeof(affinity) + sizeof(config);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &jd, sizeof(jd));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &affinity, sizeof(affinity));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &config, sizeof(config));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
+		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transtab, sizeof(transtab));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &memattr, sizeof(memattr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transcfg, sizeof(transcfg));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+{
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_issue_job_softstop(void *katom)
+{
+	const u32     msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(katom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_job_softstop(u32 js_id)
+{
+	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(js_id);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &js_id, sizeof(js_id));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+/**
+ * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point
+ * @katom: the atom that has been soft-stopped
+ * @job_type: the job type
+ */
+static void __kbase_tlstream_aux_job_softstop_ex_record(
+		void *katom, u32 job_type)
+{
+	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &job_type, sizeof(job_type));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u64 jd = katom->jc;
+
+	while (jd != 0) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		job = kbase_vmap(kctx, jd, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev,
+				"__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n",
+				jd, (void *)katom);
+			break;
+		}
+		if (job->exception_status != BASE_JD_EVENT_STOPPED) {
+			kbase_vunmap(kctx, &map);
+			break;
+		}
+
+		__kbase_tlstream_aux_job_softstop_ex_record(
+				katom, job->job_type);
+
+		jd = job->job_descriptor_size ?
+			job->next_job._64 : job->next_job._32;
+		kbase_vunmap(kctx, &map);
+	}
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100755
index 0000000..6c5c596
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,485 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx: kernel common context
+ * @fd:   timeline stream file descriptor
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) argument fd will contain negative value.
+ *
+ * Return: zero on success (this does not necessarily mean that stream
+ *         descriptor could be returned), negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_issue_job_softstop(void *katom);
+void __kbase_tlstream_aux_job_softstop(u32 js_id);
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled)                                        \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_tl_summary_new_ctx - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_ctx(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
+
+/**
+ * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_gpu(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
+
+/**
+ * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_lpu(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+
+/**
+ * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+
+/**
+ * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary
+ * @as: name of the address space object
+ * @nr: sequential number assigned to this address space
+ *
+ * Function emits a timeline message informing about address space creation.
+ * Address space is created with one attribute: number identifying this
+ * address space.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_as(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
+
+/**
+ * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU
+ * @as:  name of the address space object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that address space object
+ * shall be deleted along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_lifelink_as_gpu(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
+
+/**
+ * kbase_tlstream_tl_new_ctx - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+#define kbase_tlstream_tl_new_ctx(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
+
+/**
+ * kbase_tlstream_tl_new_atom - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+#define kbase_tlstream_tl_new_atom(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
+
+/**
+ * kbase_tlstream_tl_del_ctx - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+#define kbase_tlstream_tl_del_ctx(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
+
+/**
+ * kbase_tlstream_tl_del_atom - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+#define kbase_tlstream_tl_del_atom(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
+
+/**
+ * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_ctx_lpu(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
+
+/**
+ * kbase_tlstream_tl_ret_atom_ctx - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_ctx(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
+
+/**
+ * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_lpu(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
+
+/**
+ * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
+ */
+#define kbase_tlstream_tl_nret_ctx_lpu(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
+
+/**
+ * kbase_tlstream_tl_nret_atom_ctx - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+#define kbase_tlstream_tl_nret_atom_ctx(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
+
+/**
+ * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+#define kbase_tlstream_tl_nret_atom_lpu(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
+
+/**
+ * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being held by the context object.
+ */
+#define kbase_tlstream_tl_ret_as_ctx(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
+
+/**
+ * kbase_tlstream_tl_nret_as_ctx - release address space by context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being released by atom.
+ */
+#define kbase_tlstream_tl_nret_as_ctx(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
+
+/**
+ * kbase_tlstream_tl_ret_atom_as - retain atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by address space and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_as(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
+
+/**
+ * kbase_tlstream_tl_nret_atom_as - release atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by address space.
+ */
+#define kbase_tlstream_tl_nret_atom_as(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes
+ * @atom:     name of the atom object
+ * @jd:       job descriptor address
+ * @affinity: job affinity
+ * @config:   job config
+ *
+ * Function emits a timeline message containing atom attributes.
+ */
+#define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
+
+/**
+ * kbase_tlstream_tl_attrib_as_config - address space attributes
+ * @as:       assigned address space
+ * @transtab: configuration of the TRANSTAB register
+ * @memattr:  configuration of the MEMATTR register
+ * @transcfg: configuration of the TRANSCFG register (or zero if not present)
+ *
+ * Function emits a timeline message containing address space attributes.
+ */
+#define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
+
+/**
+ * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset
+ * @gpu:        name of the GPU object
+ *
+ * This imperative tracepoint is specific to job dumping.
+ * Function emits a timeline message indicating GPU soft reset.
+ */
+#define kbase_tlstream_jd_gpu_soft_reset(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
+
+/**
+ * kbase_tlstream_aux_pm_state - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+#define kbase_tlstream_aux_pm_state(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued
+ * @katom: the atom that is being soft-stopped
+ */
+#define kbase_tlstream_aux_issue_job_softstop(katom) \
+	__TRACE_IF_ENABLED(aux_issue_job_softstop, katom)
+
+/**
+ * kbase_tlstream_aux_job_softstop - soft job stop occurred
+ * @js_id: job slot id
+ */
+#define kbase_tlstream_aux_job_softstop(js_id) \
+	__TRACE_IF_ENABLED(aux_job_softstop, js_id)
+
+/**
+ * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom
+ * @katom: the atom that has been soft-stopped
+ *
+ * This trace point adds more details about the soft-stopped atom. These details
+ * can't be safety collected inside the interrupt handler so we're doing it
+ * inside a worker.
+ *
+ * Note: this is not the same information that is recorded in the trace point,
+ * refer to __kbase_tlstream_aux_job_softstop_ex() for more details.
+ */
+#define kbase_tlstream_aux_job_softstop_ex(katom) \
+	__TRACE_IF_ENABLED(aux_job_softstop_ex, katom)
+
+/**
+ * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
+ */
+#define kbase_tlstream_aux_pagefault(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
+
+/**
+ * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated
+ *                                 pages is changed
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
+ */
+#define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100755
index 0000000..e2e0544
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100755
index 0000000..a606ae8
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,232 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+}
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbase_backend_inspect_head(kbdev, js);
+			WARN_ON(!next_katom);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = true;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+	/* Simply log the end of the transition */
+	if (kbdev->timeline.l2_transitioning) {
+		kbdev->timeline.l2_transitioning = false;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100755
index 0000000..d92caf0
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,358 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
+				(int)kctx->timeline.owner_tgid,              \
+				count);                                      \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
+				CTX_FLOW_ATOM_READY,                         \
+				(int)kctx->timeline.owner_tgid,              \
+				atom_id);                                    \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_ACTIVE,                      \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_NEXT,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_HEAD,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_STOPPING,                    \
+				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+				js, count);                                  \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_ACTIVE, active);            \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_TILER_ACTIVE,               \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_L2_ACTIVE,                  \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_POWERING,               \
+				1);                                          \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
+				1);                                          \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_SEND_EVENT,                       \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_HANDLE_EVENT,                     \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _consumerof_atom_number);                \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _producerof_atom_number_completed);      \
+	} while (0)
+
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
+				trace_code, 1);                              \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
+				count);                                      \
+	} while (0)
+
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+		enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100755
index 0000000..156a95a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
+"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain start (SW approximated)", "%d,%d,%d",
+"_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain stop (SW approximated)",  "%d,%d,%d",
+"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
new file mode 100755
index 0000000..93ddb5a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
@@ -0,0 +1,525 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UKU_H_
+#define _KBASE_UKU_H_
+
+#include "mali_uk.h"
+#include "mali_base_kernel.h"
+
+/* This file needs to support being included from kernel and userside (which use different defines) */
+#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON
+#define SUPPORT_MALI_ERROR_INJECT
+#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */
+#if defined(CONFIG_MALI_NO_MALI)
+#define SUPPORT_MALI_NO_MALI
+#elif defined(MALI_NO_MALI)
+#if MALI_NO_MALI
+#define SUPPORT_MALI_NO_MALI
+#endif
+#endif
+
+#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#include "mali_kbase_gpuprops_types.h"
+
+/*
+ * 10.1:
+ * - Do mmap in kernel for SAME_VA memory allocations rather then
+ *   calling back into the kernel as a 2nd stage of the allocation request.
+ *
+ * 10.2:
+ * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA
+ *   region for use with JIT (ignored on 32-bit platforms)
+ */
+#define BASE_UK_VERSION_MAJOR 10
+#define BASE_UK_VERSION_MINOR 2
+
+struct kbase_uk_mem_alloc {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	/* IN/OUT */
+	u64 flags;
+	/* OUT */
+	u64 gpu_va;
+	u16 va_alignment;
+	u8  padding[6];
+};
+
+struct kbase_uk_mem_free {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	/* OUT */
+};
+
+struct kbase_uk_mem_alias {
+	union uk_header header;
+	/* IN/OUT */
+	u64 flags;
+	/* IN */
+	u64 stride;
+	u64 nents;
+	union kbase_pointer ai;
+	/* OUT */
+	u64         gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_import {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer phandle;
+	u32 type;
+	u32 padding;
+	/* IN/OUT */
+	u64         flags;
+	/* OUT */
+	u64 gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_flags_change {
+	union uk_header header;
+	/* IN */
+	u64 gpu_va;
+	u64 flags;
+	u64 mask;
+};
+
+struct kbase_uk_job_submit {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer addr;
+	u32 nr_atoms;
+	u32 stride;		/* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */
+	/* OUT */
+};
+
+struct kbase_uk_post_term {
+	union uk_header header;
+};
+
+struct kbase_uk_sync_now {
+	union uk_header header;
+
+	/* IN */
+	struct base_syncset sset;
+
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_setup {
+	union uk_header header;
+
+	/* IN */
+	u64 dump_buffer;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 unused_1; /* keep for backwards compatibility */
+	u32 mmu_l2_bm;
+	u32 padding;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_hwcnt_reader_setup - User/Kernel space data exchange structure
+ * @header:       UK structure header
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ * @fd:           dumping notification file descriptor
+ *
+ * This structure sets up HWC dumper/reader for this context.
+ * Multiple instances can be created for single context.
+ */
+struct kbase_uk_hwcnt_reader_setup {
+	union uk_header header;
+
+	/* IN */
+	u32 buffer_count;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+
+	/* OUT */
+	s32 fd;
+};
+
+struct kbase_uk_hwcnt_dump {
+	union uk_header header;
+};
+
+struct kbase_uk_hwcnt_clear {
+	union uk_header header;
+};
+
+struct kbase_uk_fence_validate {
+	union uk_header header;
+	/* IN */
+	s32 fd;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_stream_create {
+	union uk_header header;
+	/* IN */
+	char name[32];
+	/* OUT */
+	s32 fd;
+	u32 padding;
+};
+
+struct kbase_uk_gpuprops {
+	union uk_header header;
+
+	/* IN */
+	struct mali_base_gpu_props props;
+	/* OUT */
+};
+
+struct kbase_uk_mem_query {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+#define KBASE_MEM_QUERY_COMMIT_SIZE  1
+#define KBASE_MEM_QUERY_VA_SIZE      2
+#define KBASE_MEM_QUERY_FLAGS        3
+	u64         query;
+	/* OUT */
+	u64         value;
+};
+
+struct kbase_uk_mem_commit {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64         pages;
+	/* OUT */
+	u32 result_subcode;
+	u32 padding;
+};
+
+struct kbase_uk_find_cpu_offset {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64 cpu_addr;
+	u64 size;
+	/* OUT */
+	u64 offset;
+};
+
+#define KBASE_GET_VERSION_BUFFER_SIZE 64
+struct kbase_uk_get_ddk_version {
+	union uk_header header;
+	/* OUT */
+	char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE];
+	u32 version_string_size;
+	u32 padding;
+};
+
+struct kbase_uk_disjoint_query {
+	union uk_header header;
+	/* OUT */
+	u32 counter;
+	u32 padding;
+};
+
+struct kbase_uk_set_flags {
+	union uk_header header;
+	/* IN */
+	u32 create_flags;
+	u32 padding;
+};
+
+#if MALI_UNIT_TEST
+#define TEST_ADDR_COUNT 4
+#define KBASE_TEST_BUFFER_SIZE 128
+struct kbase_exported_test_data {
+	u64 test_addr[TEST_ADDR_COUNT];		/**< memory address */
+	u32 test_addr_pages[TEST_ADDR_COUNT];		/**<  memory size in pages */
+	union kbase_pointer kctx;				/**<  base context created by process */
+	union kbase_pointer mm;				/**< pointer to process address space */
+	u8 buffer1[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+	u8 buffer2[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+};
+
+struct kbase_uk_set_test_data {
+	union uk_header header;
+	/* IN */
+	struct kbase_exported_test_data test_data;
+};
+
+#endif				/* MALI_UNIT_TEST */
+
+#ifdef SUPPORT_MALI_ERROR_INJECT
+struct kbase_uk_error_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_error_params params;
+};
+#endif				/* SUPPORT_MALI_ERROR_INJECT */
+
+#ifdef SUPPORT_MALI_NO_MALI
+struct kbase_uk_model_control_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_model_control_params params;
+};
+#endif				/* SUPPORT_MALI_NO_MALI */
+
+#define KBASE_MAXIMUM_EXT_RESOURCES       255
+
+struct kbase_uk_ext_buff_kds_data {
+	union uk_header header;
+	union kbase_pointer external_resource;
+	union kbase_pointer file_descriptor;
+	u32 num_res;		/* limited to KBASE_MAXIMUM_EXT_RESOURCES */
+	u32 padding;
+};
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+struct kbase_uk_keep_gpu_powered {
+	union uk_header header;
+	u32       enabled;
+	u32       padding;
+};
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+struct kbase_uk_profiling_controls {
+	union uk_header header;
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+};
+
+struct kbase_uk_debugfs_mem_profile_add {
+	union uk_header header;
+	u32 len;
+	union kbase_pointer buf;
+};
+
+struct kbase_uk_context_id {
+	union uk_header header;
+	/* OUT */
+	int id;
+};
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header: UK structure header
+ * @fd:     timeline stream file descriptor
+ *
+ * This structure is used used when performing a call to acquire kernel side
+ * timeline stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+	s32  fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure
+ * @header: UK structure header
+ *
+ * This structure is used when performing a call to flush kernel side
+ * timeline streams.
+ */
+struct kbase_uk_tlstream_flush {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+};
+
+#if MALI_UNIT_TEST
+/**
+ * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure
+ * @header:    UK structure header
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This structure is used when performing a call to start timeline stream test
+ * embedded in kernel.
+ */
+struct kbase_uk_tlstream_test {
+	union uk_header header;
+	/* IN */
+	u32 tpw_count;
+	u32 msg_delay;
+	u32 msg_count;
+	u32 aux_msg;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure
+ * @header:          UK structure header
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ *
+ * This structure is used when performing a call to obtain timeline stream
+ * statistics.
+ */
+struct kbase_uk_tlstream_stats {
+	union uk_header header; /**< UK structure header. */
+	/* IN */
+	/* OUT */
+	u32 bytes_collected;
+	u32 bytes_generated;
+};
+#endif /* MALI_UNIT_TEST */
+#endif /* MALI_MIPE_ENABLED */
+
+/**
+ * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl
+ * @header:          UK structure header
+ * @data:            Counter samples for the dummy model
+ * @size:............Size of the counter sample data
+ */
+struct kbase_uk_prfcnt_values {
+	union uk_header header;
+	/* IN */
+	u32 *data;
+	u32 size;
+};
+
+/**
+ * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @evt:        the GPU address containing the event
+ * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or
+ *              BASE_JD_SOFT_EVENT_RESET
+ * @flags:      reserved for future uses, must be set to 0
+ *
+ * This structure is used to update the status of a software event. If the
+ * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting
+ * on this event will complete.
+ */
+struct kbase_uk_soft_event_update {
+	union uk_header header;
+	/* IN */
+	u64 evt;
+	u32 new_status;
+	u32 flags;
+};
+
+/**
+ * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @va_pages:   Number of virtual pages required for JIT
+ *
+ * This structure is used when requesting initialization of JIT.
+ */
+struct kbase_uk_mem_jit_init {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+};
+
+enum kbase_uk_function_id {
+	KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
+	KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1),
+	KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2),
+	KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3),
+	KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4),
+	KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5),
+	KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6),
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7),
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+	KBASE_FUNC_SYNC  = (UK_FUNC_ID + 8),
+
+	KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9),
+
+	KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10),
+	KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11),
+	KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12),
+
+	KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14),
+
+	KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15),
+
+	KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16),
+	KBASE_FUNC_EXT_BUFFER_LOCK = (UK_FUNC_ID + 17),
+	KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18),
+
+	KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19),
+	KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20),
+	KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21),
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	KBASE_FUNC_KEEP_GPU_POWERED = (UK_FUNC_ID + 22),
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23),
+	KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24),
+	KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25),
+	KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26),
+					    /* to be used only for testing
+					    * purposes, otherwise these controls
+					    * are set through gator API */
+
+	KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27),
+	KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28),
+	KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29),
+
+	KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31),
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+	KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32),
+#if MALI_UNIT_TEST
+	KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
+	KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
+#endif /* MALI_UNIT_TEST */
+	KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35),
+#endif /* MALI_MIPE_ENABLED */
+
+	KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36),
+
+#ifdef SUPPORT_MALI_NO_MALI
+	KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37),
+#endif
+
+	KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38),
+
+	KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39),
+
+	KBASE_FUNC_MAX
+};
+
+#endif				/* _KBASE_UKU_H_ */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100755
index 0000000..be474ff
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100755
index 0000000..fd7252d
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100755
index 0000000..371122f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,1820 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/hrtimer.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API        1
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC            1000000000ull /* ns */
+
+/* The time resolution of dumping service. */
+#define DUMPING_RESOLUTION      500000ull /* ns */
+
+/* The maximal supported number of dumping buffers. */
+#define MAX_BUFFER_COUNT        32
+
+/* Size and number of hw counters blocks. */
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+/*****************************************************************************/
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+/**
+ * struct kbase_vinstr_context - vinstr context per device
+ * @lock:              protects the entire vinstr context
+ * @kbdev:             pointer to kbase device
+ * @kctx:              pointer to kbase context
+ * @vmap:              vinstr vmap for mapping hwcnt dump buffer
+ * @gpu_va:            GPU hwcnt dump buffer address
+ * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
+ * @dump_size:         size of the dump buffer in bytes
+ * @bitmap:            current set of counters monitored, not always in sync
+ *                     with hardware
+ * @reprogram:         when true, reprogram hwcnt block with the new set of
+ *                     counters
+ * @suspended:         when true, the context has been suspended
+ * @nclients:          number of attached clients, pending or otherwise
+ * @waiting_clients:   head of list of clients being periodically sampled
+ * @idle_clients:      head of list of clients being idle
+ * @suspended_clients: head of list of clients being suspended
+ * @thread:            periodic sampling thread
+ * @waitq:             notification queue of sampling thread
+ * @request_pending:   request for action for sampling thread
+ */
+struct kbase_vinstr_context {
+	struct mutex             lock;
+	struct kbase_device      *kbdev;
+	struct kbase_context     *kctx;
+
+	struct kbase_vmap_struct vmap;
+	u64                      gpu_va;
+	void                     *cpu_va;
+	size_t                   dump_size;
+	u32                      bitmap[4];
+	bool                     reprogram;
+	bool                     suspended;
+
+	u32                      nclients;
+	struct list_head         waiting_clients;
+	struct list_head         idle_clients;
+	struct list_head         suspended_clients;
+
+	struct task_struct       *thread;
+	wait_queue_head_t        waitq;
+	atomic_t                 request_pending;
+};
+
+/**
+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
+ * @vinstr_ctx:    vinstr context client is attached to
+ * @list:          node used to attach this client to list in vinstr context
+ * @buffer_count:  number of buffers this client is using
+ * @event_mask:    events this client reacts to
+ * @dump_size:     size of one dump buffer in bytes
+ * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
+ * @accum_buffer:  temporary accumulation buffer for preserving counters
+ * @dump_time:     next time this clients shall request hwcnt dump
+ * @dump_interval: interval between periodic hwcnt dumps
+ * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
+ * @dump_buffers_meta: metadata of dump buffers
+ * @meta_idx:      index of metadata being accessed by userspace
+ * @read_idx:      index of buffer read by userspace
+ * @write_idx:     index of buffer being written by dumping service
+ * @waitq:         client's notification queue
+ * @pending:       when true, client has attached but hwcnt not yet updated
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context        *vinstr_ctx;
+	struct list_head                   list;
+	unsigned int                       buffer_count;
+	u32                                event_mask;
+	size_t                             dump_size;
+	u32                                bitmap[4];
+	void __user                        *legacy_buffer;
+	void                               *kernel_buffer;
+	void                               *accum_buffer;
+	u64                                dump_time;
+	u32                                dump_interval;
+	char                               *dump_buffers;
+	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
+	atomic_t                           meta_idx;
+	atomic_t                           read_idx;
+	atomic_t                           write_idx;
+	wait_queue_head_t                  waitq;
+	bool                               pending;
+};
+
+/**
+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
+ * @hrtimer:    high resolution timer
+ * @vinstr_ctx: vinstr context
+ */
+struct kbasep_vinstr_wake_up_timer {
+	struct hrtimer              hrtimer;
+	struct kbase_vinstr_context *vinstr_ctx;
+};
+
+/*****************************************************************************/
+
+static int kbasep_vinstr_service_task(void *data);
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+		struct file *filp,
+		poll_table  *wait);
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+		struct file   *filp,
+		unsigned int  cmd,
+		unsigned long arg);
+static int kbasep_vinstr_hwcnt_reader_mmap(
+		struct file           *filp,
+		struct vm_area_struct *vma);
+static int kbasep_vinstr_hwcnt_reader_release(
+		struct inode *inode,
+		struct file  *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations vinstr_client_fops = {
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/*****************************************************************************/
+
+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_uk_hwcnt_setup setup;
+
+	setup.dump_buffer = vinstr_ctx->gpu_va;
+	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	setup.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+
+	return kbase_instr_hwcnt_enable(vinstr_ctx->kctx, &setup);
+}
+
+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	kbase_instr_hwcnt_disable(vinstr_ctx->kctx);
+}
+
+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	disable_hwcnt(vinstr_ctx);
+	return enable_hwcnt(vinstr_ctx);
+}
+
+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
+}
+
+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
+{
+	size_t dump_size;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else
+#endif /* CONFIG_MALI_NO_MALI */
+	{
+		/* assume v5 for now */
+		base_gpu_props *props = &kbdev->gpu_props.props;
+		u32 nr_l2 = props->l2_props.num_l2_slices;
+		u64 core_mask = props->coherency_info.group[0].core_mask;
+		u32 nr_blocks = fls64(core_mask);
+
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_blocks) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
+
+static int kbasep_vinstr_map_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	u64 flags, nr_pages;
+	u16 va_align = 0;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	nr_pages = PFN_UP(vinstr_ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&vinstr_ctx->gpu_va, &va_align);
+	if (!reg)
+		return -ENOMEM;
+
+	vinstr_ctx->cpu_va = kbase_vmap(
+			kctx,
+			vinstr_ctx->gpu_va,
+			vinstr_ctx->dump_size,
+			&vinstr_ctx->vmap);
+	if (!vinstr_ctx->cpu_va) {
+		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbasep_vinstr_unmap_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+
+	kbase_vunmap(kctx, &vinstr_ctx->vmap);
+	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+}
+
+/**
+ * kbasep_vinstr_create_kctx - create kernel context for vinstr
+ * @vinstr_ctx: vinstr context
+ * Return: zero on success
+ */
+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	int err;
+
+	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
+	if (!vinstr_ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
+	if (err) {
+		kbase_destroy_context(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		kbase_tlstream_tl_new_ctx(
+				vinstr_ctx->kctx,
+				(u32)(vinstr_ctx->kctx->id),
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	err = enable_hwcnt(vinstr_ctx);
+	if (err) {
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	vinstr_ctx->thread = kthread_run(
+			kbasep_vinstr_service_task,
+			vinstr_ctx,
+			"mali_vinstr_service");
+	if (!vinstr_ctx->thread) {
+		disable_hwcnt(vinstr_ctx);
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+
+	/* Release hw counters dumping resources. */
+	vinstr_ctx->thread = NULL;
+	disable_hwcnt(vinstr_ctx);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Inform timeline client about context destruction. */
+	kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+
+	vinstr_ctx->kctx = NULL;
+}
+
+/**
+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
+ *
+ * Return: vinstr opaque client handle or NULL on failure
+ */
+static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
+		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
+		u32 bitmap[4], void *argp, void *kernel_buffer)
+{
+	struct task_struct         *thread = NULL;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(buffer_count >= 0);
+	KBASE_DEBUG_ASSERT(buffer_count <= MAX_BUFFER_COUNT);
+	KBASE_DEBUG_ASSERT(!(buffer_count & (buffer_count - 1)));
+
+	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->vinstr_ctx   = vinstr_ctx;
+	cli->buffer_count = buffer_count;
+	cli->event_mask   =
+		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
+		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
+	cli->pending      = true;
+
+	hwcnt_bitmap_set(cli->bitmap, bitmap);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
+	vinstr_ctx->reprogram = true;
+
+	/* If this is the first client, create the vinstr kbase
+	 * context. This context is permanently resident until the
+	 * last client exits. */
+	if (!vinstr_ctx->nclients) {
+		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
+		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
+			goto error;
+
+		vinstr_ctx->reprogram = false;
+		cli->pending = false;
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it. We need a per client kernel buffer for accumulating
+	 * the counters. */
+	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	if (!cli->accum_buffer)
+		goto error;
+
+	/* Prepare buffers. */
+	if (cli->buffer_count) {
+		int *fd = (int *)argp;
+		size_t tmp;
+
+		/* Allocate area for buffers metadata storage. */
+		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
+			cli->buffer_count;
+		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
+		if (!cli->dump_buffers_meta)
+			goto error;
+
+		/* Allocate required number of dumping buffers. */
+		cli->dump_buffers = (char *)__get_free_pages(
+				GFP_KERNEL,
+				get_order(cli->dump_size * cli->buffer_count));
+		if (!cli->dump_buffers)
+			goto error;
+
+		/* Create descriptor for user-kernel data exchange. */
+		*fd = anon_inode_getfd(
+				"[mali_vinstr_desc]",
+				&vinstr_client_fops,
+				cli,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd)
+			goto error;
+	} else if (kernel_buffer) {
+		cli->kernel_buffer = kernel_buffer;
+	} else {
+		cli->legacy_buffer = (void __user *)argp;
+	}
+
+	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->meta_idx, 0);
+	atomic_set(&cli->write_idx, 0);
+	init_waitqueue_head(&cli->waitq);
+
+	vinstr_ctx->nclients++;
+	list_add(&cli->list, &vinstr_ctx->idle_clients);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return cli;
+
+error:
+	kfree(cli->dump_buffers_meta);
+	if (cli->dump_buffers)
+		free_pages(
+				(unsigned long)cli->dump_buffers,
+				get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	if (!vinstr_ctx->nclients && vinstr_ctx->kctx) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+	kfree(cli);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+
+	return NULL;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	struct kbase_vinstr_client  *iter, *tmp;
+	struct task_struct          *thread = NULL;
+	u32 zerobitmap[4] = { 0 };
+	int cli_found = 0;
+
+	KBASE_DEBUG_ASSERT(cli);
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
+		if (iter == cli) {
+			vinstr_ctx->reprogram = true;
+			cli_found = 1;
+			list_del(&iter->list);
+			break;
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->waiting_clients, list) {
+			if (iter == cli) {
+				vinstr_ctx->reprogram = true;
+				cli_found = 1;
+				list_del(&iter->list);
+				break;
+			}
+		}
+	}
+	KBASE_DEBUG_ASSERT(cli_found);
+
+	kfree(cli->dump_buffers_meta);
+	free_pages(
+			(unsigned long)cli->dump_buffers,
+			get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	kfree(cli);
+
+	vinstr_ctx->nclients--;
+	if (!vinstr_ctx->nclients) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
+	u32 i, nr_l2;
+	u64 core_mask;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (0ull != core_mask) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		if (0ull != (core_mask & 1ull)) {
+			/* if block is not reserved update header */
+			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+			*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		}
+		dst += block_size;
+		src += block_size;
+
+		core_mask >>= 1;
+	}
+}
+
+/**
+ * accum_clients - accumulate dumped hw counters for all known clients
+ * @vinstr_ctx: vinstr context
+ */
+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4 = 0;
+
+#ifndef CONFIG_MALI_NO_MALI
+	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ *
+ * Return: timestamp value
+ */
+static u64 kbasep_vinstr_get_timestamp(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_add_dump_request - register client's dumping request
+ * @cli:             requesting client
+ * @waiting_clients: list of pending dumping requests
+ */
+static void kbasep_vinstr_add_dump_request(
+		struct kbase_vinstr_client *cli,
+		struct list_head *waiting_clients)
+{
+	struct kbase_vinstr_client *tmp;
+
+	if (list_empty(waiting_clients)) {
+		list_add(&cli->list, waiting_clients);
+		return;
+	}
+	list_for_each_entry(tmp, waiting_clients, list) {
+		if (tmp->dump_time > cli->dump_time) {
+			list_add_tail(&cli->list, &tmp->list);
+			return;
+		}
+	}
+	list_add_tail(&cli->list, waiting_clients);
+}
+
+/**
+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
+ *                                        dump and accumulate them for known
+ *                                        clients
+ * @vinstr_ctx: vinstr context
+ * @timestamp: pointer where collection timestamp will be recorded
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_collect_and_accumulate(
+		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+{
+	int rcode;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
+	/* Request HW counters dump.
+	 * Disable preemption to make dump timestamp more accurate. */
+	preempt_disable();
+	*timestamp = kbasep_vinstr_get_timestamp();
+	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
+	preempt_enable();
+
+	if (!rcode)
+		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
+	WARN_ON(rcode);
+
+	/* Accumulate values of collected counters. */
+	if (!rcode)
+		accum_clients(vinstr_ctx);
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
+ *                                  buffer
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_fill_dump_buffer(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	unsigned int write_idx = atomic_read(&cli->write_idx);
+	unsigned int read_idx  = atomic_read(&cli->read_idx);
+
+	struct kbase_hwcnt_reader_metadata *meta;
+	void                               *buffer;
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == cli->buffer_count)
+		return -1;
+	write_idx %= cli->buffer_count;
+
+	/* Fill in dump buffer and its metadata. */
+	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
+	meta   = &cli->dump_buffers_meta[write_idx];
+	meta->timestamp  = timestamp;
+	meta->event_id   = event_id;
+	meta->buffer_idx = write_idx;
+	memcpy(buffer, cli->accum_buffer, cli->dump_size);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
+ *                                         allocated in userspace
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of legacy ioctl interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_legacy(
+		struct kbase_vinstr_client *cli)
+{
+	void __user  *buffer = cli->legacy_buffer;
+	int          rcode;
+
+	/* Copy data to user buffer. */
+	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
+	if (rcode)
+		pr_warn("error while copying buffer to user\n");
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+		struct kbase_vinstr_client *cli)
+{
+	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_reprogram(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	if (vinstr_ctx->reprogram) {
+		struct kbase_vinstr_client *iter;
+
+		if (!reprogram_hwcnt(vinstr_ctx)) {
+			vinstr_ctx->reprogram = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->idle_clients,
+					list)
+				iter->pending = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->waiting_clients,
+					list)
+				iter->pending = false;
+		}
+	}
+}
+
+/**
+ * kbasep_vinstr_update_client - copy accumulated counters to user readable
+ *                               buffer and notify the user
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_update_client(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	int rcode = 0;
+
+	/* Copy collected counters to user readable buffer. */
+	if (cli->buffer_count)
+		rcode = kbasep_vinstr_fill_dump_buffer(
+				cli, timestamp, event_id);
+	else if (cli->kernel_buffer)
+		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
+	else
+		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
+
+	if (rcode)
+		goto exit;
+
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&cli->write_idx);
+	wake_up_interruptible(&cli->waitq);
+
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+exit:
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ *
+ * @hrtimer: high resolution timer
+ *
+ * Return: High resolution timer restart enum.
+ */
+static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
+		struct hrtimer *hrtimer)
+{
+	struct kbasep_vinstr_wake_up_timer *timer =
+		container_of(
+			hrtimer,
+			struct kbasep_vinstr_wake_up_timer,
+			hrtimer);
+
+	KBASE_DEBUG_ASSERT(timer);
+
+	atomic_set(&timer->vinstr_ctx->request_pending, 1);
+	wake_up_all(&timer->vinstr_ctx->waitq);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_service_task - HWC dumping service thread
+ *
+ * @data: Pointer to vinstr context structure.
+ *
+ * Return: Always returns zero.
+ */
+static int kbasep_vinstr_service_task(void *data)
+{
+	struct kbase_vinstr_context        *vinstr_ctx = data;
+	struct kbasep_vinstr_wake_up_timer timer;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	hrtimer_init(&timer.hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer.hrtimer.function = kbasep_vinstr_wake_up_callback;
+	timer.vinstr_ctx       = vinstr_ctx;
+
+	while (!kthread_should_stop()) {
+		struct kbase_vinstr_client *cli = NULL;
+		struct kbase_vinstr_client *tmp;
+
+		u64              timestamp = kbasep_vinstr_get_timestamp();
+		u64              dump_time = 0;
+		struct list_head expired_requests;
+
+		/* Hold lock while performing operations on lists of clients. */
+		mutex_lock(&vinstr_ctx->lock);
+
+		/* Closing thread must not interact with client requests. */
+		if (current == vinstr_ctx->thread) {
+			atomic_set(&vinstr_ctx->request_pending, 0);
+
+			if (!list_empty(&vinstr_ctx->waiting_clients)) {
+				cli = list_first_entry(
+						&vinstr_ctx->waiting_clients,
+						struct kbase_vinstr_client,
+						list);
+				dump_time = cli->dump_time;
+			}
+		}
+
+		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
+			mutex_unlock(&vinstr_ctx->lock);
+
+			/* Sleep until next dumping event or service request. */
+			if (cli) {
+				u64 diff = dump_time - timestamp;
+
+				hrtimer_start(
+						&timer.hrtimer,
+						ns_to_ktime(diff),
+						HRTIMER_MODE_REL);
+			}
+			wait_event(
+					vinstr_ctx->waitq,
+					atomic_read(
+						&vinstr_ctx->request_pending) ||
+					kthread_should_stop());
+			hrtimer_cancel(&timer.hrtimer);
+			continue;
+		}
+
+		kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &timestamp);
+
+		INIT_LIST_HEAD(&expired_requests);
+
+		/* Find all expired requests. */
+		list_for_each_entry_safe(
+				cli,
+				tmp,
+				&vinstr_ctx->waiting_clients,
+				list) {
+			s64 tdiff =
+				(s64)(timestamp + DUMPING_RESOLUTION) -
+				(s64)cli->dump_time;
+			if (tdiff >= 0ll) {
+				list_del(&cli->list);
+				list_add(&cli->list, &expired_requests);
+			} else {
+				break;
+			}
+		}
+
+		/* Fill data for each request found. */
+		list_for_each_entry_safe(cli, tmp, &expired_requests, list) {
+			/* Ensure that legacy buffer will not be used from
+			 * this kthread context. */
+			BUG_ON(0 == cli->buffer_count);
+			/* Expect only periodically sampled clients. */
+			BUG_ON(0 == cli->dump_interval);
+
+			kbasep_vinstr_update_client(
+					cli,
+					timestamp,
+					BASE_HWCNT_READER_EVENT_PERIODIC);
+
+			/* Set new dumping time. Drop missed probing times. */
+			do {
+				cli->dump_time += cli->dump_interval;
+			} while (cli->dump_time < timestamp);
+
+			list_del(&cli->list);
+			kbasep_vinstr_add_dump_request(
+					cli,
+					&vinstr_ctx->waiting_clients);
+		}
+
+		/* Reprogram counters set if required. */
+		kbasep_vinstr_reprogram(vinstr_ctx);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	}
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
+ * @cli: pointer to vinstr client structure
+ *
+ * Return: non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+		struct kbase_vinstr_client *cli)
+{
+	KBASE_DEBUG_ASSERT(cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+
+	/* Metadata sanity check. */
+	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @interval: periodic dumping interval (disable periodic dumping if zero)
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+		struct kbase_vinstr_client *cli, u32 interval)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->suspended) {
+		mutex_unlock(&vinstr_ctx->lock);
+		return -EBUSY;
+	}
+
+	list_del(&cli->list);
+
+	cli->dump_interval = interval;
+
+	/* If interval is non-zero, enable periodic dumping for this client. */
+	if (cli->dump_interval) {
+		if (DUMPING_RESOLUTION > cli->dump_interval)
+			cli->dump_interval = DUMPING_RESOLUTION;
+		cli->dump_time =
+			kbasep_vinstr_get_timestamp() + cli->dump_interval;
+
+		kbasep_vinstr_add_dump_request(
+				cli, &vinstr_ctx->waiting_clients);
+
+		atomic_set(&vinstr_ctx->request_pending, 1);
+		wake_up_all(&vinstr_ctx->waitq);
+	} else {
+		list_add(&cli->list, &vinstr_ctx->idle_clients);
+	}
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
+ * @event_id: id of event
+ * Return: event_mask or zero if event is not supported or maskable
+ */
+static u32 kbasep_vinstr_hwcnt_reader_event_mask(
+		enum base_hwcnt_reader_event event_id)
+{
+	u32 event_mask = 0;
+
+	switch (event_id) {
+	case BASE_HWCNT_READER_EVENT_PREJOB:
+	case BASE_HWCNT_READER_EVENT_POSTJOB:
+		/* These event are maskable. */
+		event_mask = (1 << event_id);
+		break;
+
+	case BASE_HWCNT_READER_EVENT_MANUAL:
+	case BASE_HWCNT_READER_EVENT_PERIODIC:
+		/* These event are non-maskable. */
+	default:
+		/* These event are not supported. */
+		break;
+	}
+
+	return event_mask;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to enable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask |= event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to disable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask &= ~event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
+ * @cli:   pointer to vinstr client structure
+ * @hwver: pointer to user buffer where hw version will be stored
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+		struct kbase_vinstr_client *cli, u32 __user *hwver)
+{
+#ifndef CONFIG_MALI_NO_MALI
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+#endif
+
+	u32                         ver = 5;
+
+#ifndef CONFIG_MALI_NO_MALI
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+		ver = 4;
+#endif
+
+	return put_user(ver, hwver);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
+ * @filp:   pointer to file structure
+ * @cmd:    user command
+ * @arg:    command's argument
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	long                       rcode = 0;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+				cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
+		rcode = put_user(
+				(u32)cli->vinstr_ctx->dump_size,
+				(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbase_vinstr_hwc_dump(
+				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbase_vinstr_hwc_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+				cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
+		poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
+ * @filp: pointer to file structure
+ * @vma:  pointer to vma structure
+ * Return: zero on success
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	size_t                     size;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(vma);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	size = cli->buffer_count * cli->dump_size;
+	if (vma->vm_end - vma->vm_start > size)
+		return -ENOMEM;
+
+	return remap_pfn_range(
+			vma,
+			vma->vm_start,
+			__pa((unsigned long)cli->dump_buffers) >> PAGE_SHIFT,
+			size,
+			vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ * Return always return zero
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+		struct file *filp)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	kbase_vinstr_detach_client(cli);
+	return 0;
+}
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
+	if (!vinstr_ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	mutex_init(&vinstr_ctx->lock);
+	vinstr_ctx->kbdev = kbdev;
+	vinstr_ctx->thread = NULL;
+
+	atomic_set(&vinstr_ctx->request_pending, 0);
+	init_waitqueue_head(&vinstr_ctx->waitq);
+
+	return vinstr_ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	/* Stop service thread first. */
+	if (vinstr_ctx->thread)
+		kthread_stop(vinstr_ctx->thread);
+
+	while (1) {
+		struct list_head *list = &vinstr_ctx->idle_clients;
+
+		if (list_empty(list)) {
+			list = &vinstr_ctx->waiting_clients;
+			if (list_empty(list))
+				break;
+		}
+
+		cli = list_first_entry(list, struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		kfree(cli->accum_buffer);
+		kfree(cli);
+		vinstr_ctx->nclients--;
+	}
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	if (vinstr_ctx->kctx)
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	kfree(vinstr_ctx);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup)
+{
+	struct kbase_vinstr_client  *cli;
+	u32                         bitmap[4];
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(setup->buffer_count);
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	cli = kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			setup->buffer_count,
+			bitmap,
+			&setup->fd,
+			NULL);
+
+	if (!cli)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (setup->dump_buffer) {
+		u32 bitmap[4];
+
+		bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+		if (*cli)
+			return -EBUSY;
+
+		*cli = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				(void *)(long)setup->dump_buffer,
+				NULL);
+
+		if (!(*cli))
+			return -ENOMEM;
+	} else {
+		if (!*cli)
+			return -EINVAL;
+
+		kbase_vinstr_detach_client(*cli);
+		*cli = NULL;
+	}
+
+	return 0;
+}
+
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer)
+{
+	u32 bitmap[4];
+
+	if (!vinstr_ctx || !setup || !kernel_buffer)
+		return NULL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	return kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			0,
+			bitmap,
+			NULL,
+			kernel_buffer);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	int                         rcode = 0;
+	struct kbase_vinstr_context *vinstr_ctx;
+	u64                         timestamp;
+	u32                         event_mask;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
+	event_mask = 1 << event_id;
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->suspended) {
+		rcode = -EBUSY;
+		goto exit;
+	}
+
+	if (event_mask & cli->event_mask) {
+		rcode = kbasep_vinstr_collect_and_accumulate(
+				vinstr_ctx,
+				&timestamp);
+		if (rcode)
+			goto exit;
+
+		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
+		if (rcode)
+			goto exit;
+
+		kbasep_vinstr_reprogram(vinstr_ctx);
+	}
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
+
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	int                         rcode;
+	u64                         unused;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->suspended) {
+		rcode = -EBUSY;
+		goto exit;
+	}
+
+	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	if (rcode)
+		goto exit;
+	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
+	if (rcode)
+		goto exit;
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	kbasep_vinstr_reprogram(vinstr_ctx);
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+
+void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	u64 unused;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+	if (!vinstr_ctx->nclients || vinstr_ctx->suspended) {
+		mutex_unlock(&vinstr_ctx->lock);
+		return;
+	}
+
+	kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	vinstr_ctx->suspended = true;
+	vinstr_ctx->suspended_clients = vinstr_ctx->waiting_clients;
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	mutex_unlock(&vinstr_ctx->lock);
+}
+
+void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+	if (!vinstr_ctx->nclients || !vinstr_ctx->suspended) {
+		mutex_unlock(&vinstr_ctx->lock);
+		return;
+	}
+
+	vinstr_ctx->suspended = false;
+	vinstr_ctx->waiting_clients = vinstr_ctx->suspended_clients;
+	vinstr_ctx->reprogram = true;
+	kbasep_vinstr_reprogram(vinstr_ctx);
+	atomic_set(&vinstr_ctx->request_pending, 1);
+	wake_up_all(&vinstr_ctx->waitq);
+	mutex_unlock(&vinstr_ctx->lock);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100755
index 0000000..d32462a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,134 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwcnt_reader.h>
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+/*****************************************************************************/
+
+/**
+ * kbase_vinstr_init() - initialize the vinstr core
+ * @kbdev: kbase device
+ *
+ * Return: pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - terminate the vinstr core
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
+ * @vinstr_ctx: vinstr context
+ * @setup:      reader's configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+		struct kbase_vinstr_context        *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup);
+
+/**
+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
+ * @vinstr_ctx: vinstr context
+ * @cli:        pointer where to store pointer to new vinstr client structure
+ * @setup:      hwc configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count and setup->fd are not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer);
+
+/**
+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
+ * @cli:      pointer to vinstr client
+ * @event_id: id of event that triggered hwcnt dump
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_dump(
+		struct kbase_vinstr_client   *cli,
+		enum base_hwcnt_reader_event event_id);
+
+/**
+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
+ *                          a given kbase context
+ * @cli: pointer to vinstr client
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_hwc_suspend - suspends hardware counter collection for
+ *                            a given kbase context
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwc_resume - resumes hardware counter collection for
+ *                            a given kbase context
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: Pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
+#endif /* _KBASE_VINSTR_H_ */
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100755
index 0000000..5d6b402
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100755
index 0000000..93fc5ea
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,207 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+/**
+ * mali_sw_counter - not currently used
+ * @event_id: counter id
+ */
+TRACE_EVENT(mali_sw_counter,
+	TP_PROTO(unsigned int event_id, signed long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lld", __entry->event_id, __entry->value)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100755
index 0000000..9945293
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644
index 0000000..a509cbd
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100755
index 0000000..6b1d67d
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,571 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
+						size */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+#define LATEST_FLUSH            0x038	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+
+#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+
+/* Symbol for default MEMATTR to use */
+
+/* Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100755
index 0000000..c356372
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,397 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->atom_id,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->count)
+);
+
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_uk.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100755
index 0000000..841d03f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
new file mode 100755
index 0000000..558657b
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	obj-y += $(platform_name)/
+endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100755
index 0000000..8fb4e91
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME
+#
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100755
index 0000000..de8849e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+
+ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-y += platform/devicetree/mali_clock.c
+obj-y += platform/devicetree/mpgpu.c
+obj-y += platform/devicetree/meson_main2.c
+obj-y += platform/devicetree/platform_gx.c
+obj-y += platform/devicetree/scaling.c
+obj-y += mali_kbase_runtime_pm.c
+obj-y += mali_kbase_config_devicetree.c
+else ifeq ($(CONFIG_MALI_MIDGARD),m)
+SRC += platform/devicetree/mali_clock.c
+SRC += platform/devicetree/mpgpu.c
+SRC += platform/devicetree/meson_main2.c
+SRC += platform/devicetree/platform_gx.c
+SRC += platform/devicetree/scaling.c
+SRC += platform/devicetree/mali_kbase_runtime_pm.c
+SRC += platform/devicetree/mali_kbase_config_devicetree.c
+endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
new file mode 100644
index 0000000..35249a5
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
@@ -0,0 +1,653 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)
+
+//disable print
+#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+int mali_pm_statue = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "ao io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	mpdata->dvfs_table_size = 0;
+
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+	dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size);
+	dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n",
+			mpdata->dvfs_table,
+			sizeof(mpdata->dvfs_table[0]));
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+#else
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	//mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+
+	GPU_CLK_DBG();
+	do_gettimeofday(&start);
+	ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+	do_gettimeofday(&end);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	GPU_CLK_DBG();
+	clk_disable_unprepare(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+#if 0
+#ifdef MESON_CPU_VERSION_OPS
+		if (is_meson_gxbbm_cpu()) {
+			if (dvfs_tbl->clk_freq >= GXBBM_MAX_GPU_FREQ)
+				continue;
+		}
+#endif
+#endif
+#if  0
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+#endif
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux");
+#if 0
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+#endif
+	if (IS_ERR(mpdata->clk_mali)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100755
index 0000000..1267b6f
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,109 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/version.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long t83x_static_power(unsigned long voltage)
+{
+#if 0
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+#else
+	return 0;
+#endif
+}
+
+static unsigned long t83x_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+struct devfreq_cooling_ops t83x_model_ops = {
+#else
+struct devfreq_cooling_power t83x_model_ops = {
+#endif
+	.get_static_power = t83x_static_power,
+	.get_dynamic_power = t83x_dynamic_power,
+};
+
+#endif
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100755
index 0000000..adc052c
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (750000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (100000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+extern struct kbase_platform_funcs_conf dt_funcs_conf;
+#define PLATFORM_FUNCS (&dt_funcs_conf)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&t83x_model_ops)
+extern struct devfreq_cooling_ops t83x_model_ops;
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100755
index 0000000..3b3f095
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,244 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+void *reg_base_hiubus = NULL;
+u32  override_value_aml = 0;
+static int first = 1;
+
+static void  Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+
+    if ((mask & 0x1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000190, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000194, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a0, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a4, 0xffffffff);    // Power on all cores
+    }
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000180, mask >> 1);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000184, 0x0);    // Power on all cores
+    }
+
+    if ( mask != 0 ) {
+        udelay(10);
+        part1_done = Mali_RdReg(0x0, 0x0, 0x0000020);
+        //printk("%s, %d\n", __func__, __LINE__);
+        while((part1_done ==0)) { part1_done = Mali_RdReg(0x0, 0x0, 0x00000020); udelay(10); }
+        //printk("Mali_pwr_on:gpu_irq : %x\n", part1_done);
+        Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+    }
+}
+
+static void gpu_power_main( struct kbase_device *kbdev) {
+
+    Mali_pwr_on ( 0x7);
+
+    printk("%s, %d\n", __func__, __LINE__);
+}
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret;
+	struct platform_device *pdev = to_platform_device(kbdev->dev);
+	struct resource *reg_res;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+    //printk("20151013, %s, %d\n", __FILE__, __LINE__);
+    if (first == 0) goto ret;
+
+    reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+    if (!reg_res) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) 
+        reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res));
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+
+//JOHNT
+    // Level reset mail
+
+    // Level reset mail
+    //Wr(P_RESET2_MASK, ~(0x1<<14));
+    //Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    //Wr(P_RESET2_LEVEL, 0xffffffff);
+    //Wr(P_RESET0_LEVEL, 0xffffffff);
+
+    value = Rd(RESET0_MASK);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_MASK, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+///////////////
+    value = Rd(RESET2_MASK);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_MASK, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value | ((0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value | ((0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    udelay(10); // OR POLL for reset done
+
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+    gpu_power_main(kbdev);
+    //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n",
+    //        kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus);
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+    first = 0;
+    //printk("%s, %d\n", __FILE__, __LINE__);
+ret:
+	ret = pm_runtime_get_sync(kbdev->dev);
+    udelay(100);
+#if 1
+
+    core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+    l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+    tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+    //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready);
+#endif
+	dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret);
+
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+    //printk("%s, %d\n", __FILE__, __LINE__);
+#if 0
+    iounmap(reg_base_hiubus);
+    reg_base_hiubus = NULL;
+#endif
+	u64 core_ready  = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	u64 l2_ready    = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+	u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+	if (core_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+	if (l2_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+	if (tiler_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+
+	return 0;
+}
+
+void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
+
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
new file mode 100644
index 0000000..b541090
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
new file mode 100644
index 0000000..7687f92
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_defs.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+extern void mali_post_init(void);
+struct kbase_device;
+//static int gpu_dvfs_probe(struct platform_device *pdev)
+int platform_dt_init_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	int err = -1;
+
+	err = mali_meson_init_start(pdev);
+    mali_meson_init_finish(pdev);
+    mpgpu_class_init();
+    mali_post_init();
+    return err;
+}
+
+//static int gpu_dvfs_remove(struct platform_device *pdev)
+void platform_dt_term_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+    mpgpu_class_exit();
+	mali_meson_uninit(pdev);
+
+}
+
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    mali_gpu_utilization_callback(utilisation*255/100);
+    return 1;
+}
+
+struct kbase_platform_funcs_conf dt_funcs_conf = {
+    .platform_init_func = platform_dt_init_func,
+    .platform_term_func = platform_dt_term_func,
+};
+#if 0
+static const struct of_device_id gpu_dvfs_ids[] = {
+	{ .compatible = "meson, gpu-dvfs-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpu_dvfs_ids);
+
+static struct platform_driver gpu_dvfs_driver = {
+	.driver = {
+		.name = "meson-gpu-dvfs",
+		.owner = THIS_MODULE,
+		.of_match_table = gpu_dvfs_ids,
+	},
+	.probe = gpu_dvfs_probe,
+	.remove = gpu_dvfs_remove,
+};
+module_platform_driver(gpu_dvfs_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Amlogic SH, MM");
+MODULE_DESCRIPTION("Driver for the Meson GPU dvfs");
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
new file mode 100644
index 0000000..ca79752
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
@@ -0,0 +1,33 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mpgpu_class_init(void);
+void mpgpu_class_exit(void);
+void mali_gpu_utilization_callback(int utilization_pp);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
new file mode 100644
index 0000000..0cc5035
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
@@ -0,0 +1,359 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+#include "meson_main2.h"
+
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+#if 0
+static ssize_t max_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxpp:%d, maxpp_sysfs:%d, total=%d\n",
+			pmali_plat->scale_info.maxpp, pmali_plat->maxpp_sysfs,
+			pmali_plat->cfg_pp);
+	return sprintf(buf, "%d\n", pmali_plat->cfg_pp);
+}
+
+static ssize_t max_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_pp) || (val < pinfo->minpp))
+		return -EINVAL;
+
+	pmali_plat->maxpp_sysfs = val;
+	pinfo->maxpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minpp);
+}
+
+static ssize_t min_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxpp) || (val < 1))
+		return -EINVAL;
+
+	pinfo->minpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+#endif
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+#if 0
+static ssize_t current_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+	return sprintf(buf, "%d\n", pp);
+}
+
+static ssize_t current_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+
+	get_mali_rt_clkpp(&clk, &pp);
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(clk, val, 1);
+
+	return count;
+}
+#endif
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+#if 0
+	__ATTR(min_pp,		0644, min_pp_read,	min_pp_write),
+	__ATTR(max_pp,		0644, max_pp_read,	max_pp_write),
+#endif
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+#if 0
+	__ATTR(cur_pp,		0644, current_pp_read,	current_pp_write),
+#endif
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+
+int mpgpu_class_init(void)
+{
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+}
+
+void  mpgpu_class_exit(void)
+{
+	class_unregister(&mpgpu_class);
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
new file mode 100644
index 0000000..d16675e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
@@ -0,0 +1,246 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#ifdef CONFIG_GPU_THERMAL
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#include <linux/amlogic/aml_thermal_hw.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq < mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq < mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+                break;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+#ifndef MESON_DRV_BRING
+    return 55;
+#else
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+#endif
+}
+#endif
+#endif
+
+#ifdef CONFIG_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+#ifndef MESON_DRV_BRING
+    return 2;
+#else
+    return mali_executor_get_num_cores_enabled();
+#endif
+}
+#endif
+#endif
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
new file mode 100644
index 0000000..aa8a77a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+
+#if AMLOGIC_GPU_USE_GPPLL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)
+#include <linux/amlogic/amports/gp_pll.h>
+#else
+#include <linux/amlogic/media/clk/gp_pll.h>
+#endif
+#endif
+
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+extern int  mali_pm_statue;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+#if AMLOGIC_GPU_USE_GPPLL
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+#endif
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+#endif
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+#if AMLOGIC_GPU_USE_GPPLL
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+#endif
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+	lastStep = execStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+#endif
+
+}
+#if AMLOGIC_GPU_USE_GPPLL
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+#endif
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+    cores = cores;
+    return 0;
+}
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    if (err < 0) scalingdbg(1, "set pp failed");
+
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+    printk("mali_plat=%p\n", mali_plat);
+	num_cores_enabled = pmali_plat->sc_mpp;
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+#endif
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	40, /* 40% */
+	50, /* 50% */
+	90, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< utilization_pp)
+		ret = enable_one_core();
+	else if (0 < utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(int utilization_gpu, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(int utilization_pp, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization_pp,  ld_up);
+	if (utilization_pp >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization_pp <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(utilization_pp);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(utilization_pp);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
new file mode 100755
index 0000000..5b6ef37
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_juno_soc.o
+
+
+obj-m += juno_mali_opp.o
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
new file mode 100755
index 0000000..ccfd8cc
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/scpi_protocol.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp_add opp_add
+#endif /* Linux >= 3.13 */
+
+
+static int init_juno_opps_from_scpi(struct device *dev)
+{
+	struct scpi_opp *sopp;
+	int i;
+
+	/* Hard coded for Juno. 2 is GPU domain */
+	sopp = scpi_dvfs_get_opps(2);
+	if (IS_ERR_OR_NULL(sopp))
+		return PTR_ERR(sopp);
+
+	for (i = 0; i < sopp->count; i++) {
+		struct scpi_opp_entry *e = &sopp->opp[i];
+
+		dev_info(dev, "Mali OPP from SCPI: %u Hz @ %u mV\n",
+				e->freq_hz, e->volt_mv);
+
+		dev_pm_opp_add(dev, e->freq_hz, e->volt_mv * 1000);
+	}
+
+	return 0;
+}
+
+static int juno_setup_opps(void)
+{
+	struct device_node *np;
+	struct platform_device *pdev;
+	int err;
+
+	np = of_find_node_by_name(NULL, "gpu");
+	if (!np) {
+		pr_err("Failed to find DT entry for Mali\n");
+		return -EFAULT;
+	}
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev) {
+		pr_err("Failed to find device for Mali\n");
+		of_node_put(np);
+		return -EFAULT;
+	}
+
+	err = init_juno_opps_from_scpi(&pdev->dev);
+
+	of_node_put(np);
+
+	return err;
+}
+
+module_init(juno_setup_opps);
+MODULE_LICENSE("GPL");
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
new file mode 100755
index 0000000..3baf3d9
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
@@ -0,0 +1,128 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <linux/thermal.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_smc.h>
+
+/* Versatile Express (VE) Juno Development Platform */
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 65,
+	.mmu_irq_number = 66,
+	.gpu_irq_number = 64,
+	.io_memory_region = {
+			     .start = 0x2D000000,
+			     .end = 0x2D000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+/*
+ * Juno Secure Mode integration
+ */
+
+/* SMC Function Numbers */
+#define JUNO_SMC_SECURE_ENABLE_FUNC  0xff06
+#define JUNO_SMC_SECURE_DISABLE_FUNC 0xff07
+
+static int juno_secure_mode_enable(struct kbase_device *kbdev)
+{
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+
+	if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) &&
+			kbdev->reg_start == 0x2d000000) {
+		/* T62X in SoC detected */
+		u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+			JUNO_SMC_SECURE_ENABLE_FUNC, false,
+			0, 0, 0);
+		return ret;
+	}
+
+	return -EINVAL; /* Not supported */
+}
+
+static int juno_secure_mode_disable(struct kbase_device *kbdev)
+{
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+
+	if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) &&
+			kbdev->reg_start == 0x2d000000) {
+		/* T62X in SoC detected */
+		u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+			JUNO_SMC_SECURE_DISABLE_FUNC, false,
+			0, 0, 0);
+		return ret;
+	}
+
+	return -EINVAL; /* Not supported */
+}
+
+struct kbase_secure_ops juno_secure_ops = {
+	.secure_mode_enable = juno_secure_mode_enable,
+	.secure_mode_disable = juno_secure_mode_disable,
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
new file mode 100755
index 0000000..5fc6d9e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 600000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 600000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (&juno_secure_ops)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+#ifdef CONFIG_DEVFREQ_THERMAL
+extern struct devfreq_cooling_ops juno_model_ops;
+#endif
+extern struct kbase_secure_ops juno_secure_ops;
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100755
index 0000000..7cb3be7
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
new file mode 100755
index 0000000..01f9dfc
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+/**
+ * kbase_platform_fake_register - Entry point for fake platform registration
+ *
+ * This function is called early on in the initialization during execution of
+ * kbase_driver_init.
+ *
+ * Return: 0 to indicate success, non-zero for failure.
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Entry point for fake platform unregistration
+ *
+ * This function is called in the termination during execution of
+ * kbase_driver_exit.
+ */
+void kbase_platform_fake_unregister(void);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100755
index 0000000..084a156
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100755
index 0000000..eb957d3
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq()
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq()
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..687b1a8
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+#include "mali_kbase_config_platform.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100755
index 0000000..4665f98
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START	(0x10000000)
+#define SYS_CFGDATA_OFFSET		(0x000000A0)
+#define SYS_CFGCTRL_OFFSET		(0x000000A4)
+#define SYS_CFGSTAT_OFFSET		(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		(1 << 31)
+#define READ_REG_BIT_VALUE			(0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			(0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		(1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			(1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE	(0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE		(2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		(1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			(1 <<  1)
+
+#define FEED_REG_BIT_MASK			(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+/* the following three values used for reading
+ * HBI value of the LogicTile daughterboard */
+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000)
+#define VE_SYS_PROC_ID1_OFFSET (0x00000088)
+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define VE_DEFAULT_GPU_FREQ_MIN 5000
+#define VE_DEFAULT_GPU_FREQ_MAX 5000
+
+
+#define CPU_CLOCK_SPEED_UNDEFINED (0)
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed
+ * @cpu_clock - the value of CPU clock speed in MHz
+ *
+ * Returns 0 on success, error code otherwise.
+ *
+ * The implementation is platform specific.
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	int err = 0;
+	u32 reg_val = 0;
+	u32 osc2_value = 0;
+	u32 pa_divide = 0;
+	u32 pb_divide = 0;
+	u32 pc_divide = 0;
+	void __iomem *syscfg_reg = NULL;
+	void __iomem *scc_reg = NULL;
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) {
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+
+	/* Init the value in case something goes wrong */
+	*cpu_clock = 0;
+
+	/* Map CPU register into virtual memory */
+	syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+	if (syscfg_reg == NULL) {
+		err = -EIO;
+		goto syscfg_reg_map_failed;
+	}
+
+	scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+	if (scc_reg == NULL) {
+		err = -EIO;
+		goto scc_reg_map_failed;
+	}
+
+	raw_spin_lock(&syscfg_lock);
+
+	/* Read SYS regs - OSC2 */
+	reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET);
+
+	/* Check if there is any other undergoing request */
+	if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) {
+		err = -EBUSY;
+		goto ongoing_request;
+	}
+	/* Reset the CGFGSTAT reg */
+	writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET));
+
+	writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE |
+			DCC_DEFAULT_BIT_VALUE |
+			SYS_CFG_OSC_FUNC_BIT_VALUE |
+			SITE_DEFAULT_BIT_VALUE |
+			BOARD_STACK_POS_DEFAULT_BIT_VALUE |
+			DEVICE_DEFAULT_BIT_VALUE,
+			(syscfg_reg + SYS_CFGCTRL_OFFSET));
+	/* Wait for the transaction to complete */
+	while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) &
+			SYS_CFG_COMPLETE_BIT_VALUE))
+		;
+	/* Read SYS_CFGSTAT Register to get the status of submitted
+	 * transaction */
+	reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET);
+
+	if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+		/* Error while setting register */
+		err = -EIO;
+		goto set_reg_error;
+	}
+
+	osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET);
+	/* Read the SCC CFGRW0 register */
+	reg_val = readl(scc_reg);
+
+	/*
+	 * Select the appropriate feed:
+	 * CFGRW0[0] - CLKOB
+	 * CFGRW0[1] - CLKOC
+	 * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+	 */
+	/* Calculate the  FCLK */
+	if (IS_SINGLE_BIT_SET(reg_val, 0)) {
+		/* CFGRW0[0] - CLKOB */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[10:7] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PB_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 1)) {
+		/* CFGRW0[1] - CLKOC */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[14:11] */
+		pc_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PC_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PC_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {
+		/* CFGRW0[2] - FACLK */
+		/* CFGRW0[18:15] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PA_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[22:19] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PB_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else {
+		err = -EIO;
+	}
+
+set_reg_error:
+ongoing_request:
+	raw_spin_unlock(&syscfg_lock);
+	*cpu_clock /= HZ_IN_MHZ;
+
+	if (!err)
+		cpu_clock_speed = *cpu_clock;
+
+	iounmap(scc_reg);
+
+scc_reg_map_failed:
+	iounmap(syscfg_reg);
+
+syscfg_reg_map_failed:
+
+	return err;
+}
+
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function determines which LogicTile type is used by the platform by
+ * reading the HBI value of the daughterboard which holds the LogicTile:
+ *
+ * 0x217 HBI0217 Virtex-6
+ * 0x192 HBI0192 Virtex-5
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+static u32 kbase_get_platform_logic_tile_type(void)
+{
+	void __iomem *syscfg_reg = NULL;
+	u32 sys_procid1 = 0;
+
+	syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
+	if (NULL != syscfg_reg) {
+		sys_procid1 = readl(syscfg_reg);
+		iounmap(syscfg_reg);
+	}
+
+	return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
+}
+
+u32 kbase_get_platform_min_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MIN;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MIN;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MIN;
+	}
+}
+
+u32 kbase_get_platform_max_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MAX;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MAX;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MAX;
+	}
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100755
index 0000000..da86569
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+/**
+ * Get the minimum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_min_freq(void);
+
+/**
+ * Get the maximum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_max_freq(void);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100755
index 0000000..d9bfabc
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_vexpress.o
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100755
index 0000000..2b91d72
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..3ff0930
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,79 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100755
index 0000000..0cb41ce
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100755
index 0000000..d269c25
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..76ffe4a
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100755
index 0000000..816dff4
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	/* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+	*cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+	return 0;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100755
index 0000000..23647cc
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100755
index 0000000..5fa9b39
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/midgard/sconscript b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/sconscript
new file mode 100755
index 0000000..643b1f3
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/midgard/sconscript
@@ -0,0 +1,132 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+import sys
+Import('env')
+
+
+if Glob('tests/sconscript'):
+	SConscript( 'tests/sconscript' )
+
+mock_test = 0
+
+# Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data.
+# For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0.
+if env['platform_config']=='devicetree':
+	fake_platform_device = 0
+else:
+	fake_platform_device = 1
+
+# Source files required for kbase.
+kbase_src = [Glob('#kernel/drivers/gpu/arm/midgard/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/platform/%s/*.c' % (env['platform_config'])),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/Makefile',
+             Glob('#kernel/drivers/gpu/arm/midgard/K*'))
+             ]
+
+kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c'),
+              Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*/*.c')]
+
+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
+	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
+	mock_test = 1
+
+# we need platform config for GPL version using fake platform
+if fake_platform_device==1:
+	# Check if we are compiling for PBX
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_MACH_REALVIEW_PBX[\ ]*=[\ ]*y'
+	REALVIEW_PBX = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+			REALVIEW_PBX = 1
+			break
+	if REALVIEW_PBX == 1 and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
+		sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n")
+	# if the file platform config file is in the tpip directory then use that, otherwise use the default config directory
+	if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])):
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config']))
+	else:
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config']))
+	
+# Note: cleaning via the Linux kernel build system does not yet work
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[clean] kbase'))
+	cmd = env.Command(['$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/mali_platform_fake.ko'], kbase_src, [])
+else:
+	if env['os'] == 'android':
+		env['android'] = 1
+	else:
+		env['android'] = 0
+
+	if env['unit'] == '1':
+		env['kernel_test'] = 1
+	else:
+		env['kernel_test'] = 0
+
+	#Extract environment options, note the trailing spaces are important
+	env_options = \
+	"PLATFORM=${platform} " +\
+	"MALI_ERROR_INJECT_ON=${error_inject} " +\
+	"MALI_ANDROID=${android} " +\
+	"MALI_KERNEL_TEST_API=${kernel_test} " +\
+	"MALI_UNIT_TEST=${unit} " +\
+	"MALI_RELEASE_NAME=\"${mali_release_name}\" "+\
+	"MALI_MOCK_TEST=%s " % mock_test +\
+	"MALI_CUSTOMER_RELEASE=${release} " +\
+	"MALI_INSTRUMENTATION_LEVEL=${instr} " +\
+	"MALI_COVERAGE=${coverage} " +\
+	"MALI_BUS_LOG=${buslog} "
+
+	make_action_start = "cd ${SOURCE.dir} && make -j%d " % GetOption('num_jobs')
+	make_action_end = "%s && cp mali_kbase.ko $STATIC_LIB_PATH/mali_kbase.ko" % env.kernel_get_config_defines(fake_platform_device)
+	make_action = make_action_start + env_options + make_action_end
+
+	makeAction=Action(make_action, '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, [makeAction])
+
+# Add a dependency on kds.ko.
+# Only necessary when KDS is not built into the kernel.
+#
+if env['os'] != 'android':
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y'
+	kds_in_kernel = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+	        # KDS in kernel.
+			kds_in_kernel = 1
+	if not kds_in_kernel:
+		env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/kds.ko')
+
+# need Module.symvers from ump.ko build
+if int(env['ump']) == 1:
+	env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/ump.ko')
+
+if Glob('internal/sconsfrag'):
+	execfile('internal/sconsfrag')
+	get_internal(env)
+
+env.KernelObjTarget('kbase', cmd)
+
+env.AppendUnique(BASE=['cutils_linked_list'])
diff --git a/midgard/r11p0/kernel/drivers/gpu/arm/sconscript b/midgard/r11p0/kernel/drivers/gpu/arm/sconscript
new file mode 100755
index 0000000..135113c
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/arm/sconscript
@@ -0,0 +1,22 @@
+#
+# (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+
+
+SConscript('midgard/sconscript')
+
+if glob.glob('kmim/sconscript'):
+	SConscript('kmim/sconscript')
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/Kbuild b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/Kbuild
new file mode 100755
index 0000000..f10d58c
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/Kbuild
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+pl111_drm-y +=	pl111_drm_device.o \
+		pl111_drm_connector.o \
+		pl111_drm_crtc.o \
+		pl111_drm_cursor.o \
+		pl111_drm_dma_buf.o \
+		pl111_drm_encoder.o \
+		pl111_drm_fb.o \
+		pl111_drm_gem.o \
+		pl111_drm_pl111.o \
+		pl111_drm_platform.o \
+		pl111_drm_suspend.o \
+		pl111_drm_vma.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/Kconfig b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/Kconfig
new file mode 100755
index 0000000..60b465c
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+config DRM_PL111
+	tristate "DRM Support for PL111 CLCD Controller"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the PL111 CLCD controller.
+	  If M is selected the module will be called pl111_drm.
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/Makefile b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/Makefile
new file mode 100755
index 0000000..2869f58
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: pl111_drm
+
+pl111_drm:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_USES_KDS=y CONFIG_DRM_PL111=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
new file mode 100755
index 0000000..d3e0086
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
@@ -0,0 +1,95 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+/**
+ * pl111_clcd_ext.h
+ * Extended CLCD register definitions
+ */
+
+#ifndef PL111_CLCD_EXT_H_
+#define PL111_CLCD_EXT_H_
+
+/* 
+ * PL111 cursor register definitions not defined in the kernel's clcd header.
+ * 
+ * TODO MIDEGL-1718: move to include/linux/amba/clcd.h
+ */
+
+#define CLCD_CRSR_IMAGE				0x00000800
+#define CLCD_CRSR_IMAGE_MAX_WORDS		256
+#define CLCD_CRSR_IMAGE_WORDS_PER_LINE		4
+#define CLCD_CRSR_IMAGE_PIXELS_PER_WORD	16
+
+#define CLCD_CRSR_LBBP_COLOR_MASK	0x00000003
+#define CLCD_CRSR_LBBP_BACKGROUND	0x0
+#define CLCD_CRSR_LBBP_FOREGROUND	0x1
+#define CLCD_CRSR_LBBP_TRANSPARENT	0x2
+#define CLCD_CRSR_LBBP_INVERSE		0x3
+
+
+#define CLCD_CRSR_CTRL			0x00000c00
+#define CLCD_CRSR_CONFIG		0x00000c04
+#define CLCD_CRSR_PALETTE_0		0x00000c08
+#define CLCD_CRSR_PALETTE_1		0x00000c0c
+#define CLCD_CRSR_XY			0x00000c10
+#define CLCD_CRSR_CLIP			0x00000c14
+#define CLCD_CRSR_IMSC			0x00000c20
+#define CLCD_CRSR_ICR			0x00000c24
+#define CLCD_CRSR_RIS			0x00000c28
+#define CLCD_MIS				0x00000c2c
+
+#define CRSR_CTRL_CRSR_ON		(1 << 0)
+#define CRSR_CTRL_CRSR_MAX		3
+#define CRSR_CTRL_CRSR_NUM_SHIFT	4
+#define CRSR_CTRL_CRSR_NUM_MASK		\
+	(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
+#define CRSR_CTRL_CURSOR_0		0
+#define CRSR_CTRL_CURSOR_1		1
+#define CRSR_CTRL_CURSOR_2		2
+#define CRSR_CTRL_CURSOR_3		3
+
+#define CRSR_CONFIG_CRSR_SIZE		(1 << 0)
+#define CRSR_CONFIG_CRSR_FRAME_SYNC	(1 << 1)
+
+#define CRSR_PALETTE_RED_SHIFT		0
+#define CRSR_PALETTE_GREEN_SHIFT	8
+#define CRSR_PALETTE_BLUE_SHIFT		16
+
+#define CRSR_PALETTE_RED_MASK		0x000000ff
+#define CRSR_PALETTE_GREEN_MASK		0x0000ff00
+#define CRSR_PALETTE_BLUE_MASK		0x00ff0000
+#define CRSR_PALETTE_MASK		(~0xff000000)
+
+#define CRSR_XY_MASK			0x000003ff
+#define CRSR_XY_X_SHIFT			0
+#define CRSR_XY_Y_SHIFT			16
+
+#define CRSR_XY_X_MASK			CRSR_XY_MASK
+#define CRSR_XY_Y_MASK			(CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
+
+#define CRSR_CLIP_MASK			0x3f
+#define CRSR_CLIP_X_SHIFT		0
+#define CRSR_CLIP_Y_SHIFT		8
+
+#define CRSR_CLIP_X_MASK		CRSR_CLIP_MASK
+#define CRSR_CLIP_Y_MASK		(CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
+
+#define CRSR_IMSC_CRSR_IM		(1<<0)
+#define CRSR_ICR_CRSR_IC		(1<<0)
+#define CRSR_RIS_CRSR_RIS		(1<<0)
+#define CRSR_MIS_CRSR_MIS		(1<<0)
+
+#endif /* PL111_CLCD_EXT_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100755
index 0000000..64d87b6
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+#define DRIVER_AUTHOR    "ARM Ltd."
+#define DRIVER_NAME      "pl111_drm"
+#define DRIVER_DESC      "DRM module for PL111"
+#define DRIVER_LICENCE   "GPL"
+#define DRIVER_ALIAS     "platform:pl111_drm"
+#define DRIVER_DATE      "20101111"
+#define DRIVER_VERSION   "0.2"
+#define DRIVER_MAJOR      2
+#define DRIVER_MINOR      1
+#define DRIVER_PATCHLEVEL 1
+
+/*
+ * Number of flips allowed in flight at any one time. Any more flips requested
+ * beyond this value will cause the caller to block until earlier flips have
+ * completed.
+ *
+ * For performance reasons, this must be greater than the number of buffers
+ * used in the rendering pipeline. Note that the rendering pipeline can contain
+ * different types of buffer, e.g.:
+ * - 2 final framebuffers
+ * - >2 geometry buffers for GPU use-cases
+ * - >2 vertex buffers for GPU use-cases
+ *
+ * For example, a system using 5 geometry buffers could have 5 flips in flight,
+ * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
+ *
+ * Whilst there may be more intermediate buffers (such as vertex/geometry) than
+ * final framebuffers, KDS is used to ensure that GPU rendering waits for the
+ * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
+ * produce tearing.
+ */
+
+/*
+ * Here, we choose a conservative value. A lower value is most likely
+ * suitable for GPU use-cases.
+ */
+#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
+
+#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
+
+struct pl111_drm_flip_resource;
+
+struct pl111_gem_bo_dma {
+	dma_addr_t fb_dev_addr;
+	void *fb_cpu_addr;
+};
+
+struct pl111_gem_bo_shm {
+	struct page **pages;
+	dma_addr_t *dma_addrs;
+};
+
+struct pl111_gem_bo {
+	struct drm_gem_object gem_object;
+	u32 type;
+	union {
+		struct pl111_gem_bo_dma dma;
+		struct pl111_gem_bo_shm shm;
+	} backing_data;
+	struct sg_table *sgt;
+};
+
+extern struct pl111_drm_dev_private priv;
+
+struct pl111_drm_framebuffer {
+	struct drm_framebuffer fb;
+	struct pl111_gem_bo *bo;
+};
+
+struct pl111_drm_flip_resource {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This is the kds set associated to the dma_buf we want to flip */
+	struct kds_resource_set *kds_res_set;
+#endif
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct list_head link;
+	bool page_flip;
+	struct drm_pending_vblank_event *event;
+};
+
+struct pl111_drm_crtc {
+	struct drm_crtc crtc;
+	int crtc_index;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This protects "old_kds_res_set" and "displaying_fb" */
+	spinlock_t current_displaying_lock;
+	/*
+	 * When a buffer is displayed its associated kds resource
+	 * will be obtained and stored here. Every time a buffer
+	 * flip is completed this old kds set is released and assigned
+	 * the kds set of the new buffer.
+	 */
+	struct kds_resource_set *old_kds_res_set;
+	/*
+	 * Stores which frame buffer is currently being displayed by
+	 * this CRTC or NULL if nothing is being displayed. It is used
+	 * to tell whether we need to obtain a set of kds resources for
+	 * exported buffer objects.
+	 */
+	struct drm_framebuffer *displaying_fb;
+#endif
+	struct drm_display_mode *new_mode;
+	struct drm_display_mode *current_mode;
+	int last_bpp;
+
+	/*
+	 * This spinlock protects "update_queue", "current_update_res"
+	 * and calls to do_flip_to_res() which updates the CLCD base
+	 * registers.
+	 */
+	spinlock_t base_update_lock;
+	/*
+	 * The resource that caused a base address update. Only one can be
+	 * pending, hence it's != NULL if there's a pending update
+	 */
+	struct pl111_drm_flip_resource *current_update_res;
+	/* Queue of things waiting to update the base address */
+	struct list_head update_queue;
+
+	void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
+				struct drm_framebuffer *fb);
+};
+
+struct pl111_drm_connector {
+	struct drm_connector connector;
+};
+
+struct pl111_drm_encoder {
+	struct drm_encoder encoder;
+};
+
+struct pl111_drm_dev_private {
+	struct pl111_drm_crtc *pl111_crtc;
+
+	struct amba_device *amba_dev;
+	unsigned long mmio_start;
+	__u32 mmio_len;
+	void *regs;
+	struct clk *clk;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_callback kds_cb;
+	struct kds_callback kds_obtain_current_cb;
+#endif
+	/*
+	 * Number of flips that were started in show_framebuffer_on_crtc(),
+	 * but haven't completed yet - because we do deferred flipping
+	 */
+	atomic_t nr_flips_in_flight;
+	wait_queue_head_t wait_for_flips;
+
+	/*
+	 * Used to prevent race between pl111_dma_buf_release and
+	 * drm_gem_prime_handle_to_fd
+	 */
+	struct mutex export_dma_buf_lock;
+
+	uint32_t number_crtcs;
+
+	/* Cache for flip resources used to avoid kmalloc on each page flip */
+	struct kmem_cache *page_flip_slab;
+};
+
+enum pl111_cursor_size {
+	CURSOR_32X32,
+	CURSOR_64X64
+};
+
+enum pl111_cursor_sync {
+	CURSOR_SYNC_NONE,
+	CURSOR_SYNC_VSYNC
+};
+
+
+/**
+ * Buffer allocation function which is more flexible than dumb_create(),
+ * it allows passing driver specific flags to control the kind of buffer
+ * to be allocated.
+ */
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+ 
+/****** TODO MIDEGL-1718: this should be moved to uapi/include/drm/pl111_drm.h ********/
+
+/* 
+ * Parameters for different buffer objects:
+ * bit [0]: backing storage
+ *	(0 -> SHM)
+ *	(1 -> DMA)
+ * bit [2:1]: kind of mapping
+ *	(0x0 -> uncached)
+ *	(0x1 -> write combine)
+ *	(0x2 -> cached)
+ */
+#define PL111_BOT_MASK		(0x7)
+#define PL111_BOT_SHM		(0x0 << 0)
+#define PL111_BOT_DMA		(0x1 << 0)
+#define PL111_BOT_UNCACHED	(0x0 << 1)
+#define PL111_BOT_WC		(0x1 << 1)
+#define PL111_BOT_CACHED	(0x2 << 1)
+
+/**
+ * User-desired buffer creation information structure.
+ *
+ * @size: user-desired memory allocation size.
+ *      - this size value would be page-aligned internally.
+ * @flags: user request for setting memory type or cache attributes as a bit op
+ *	- PL111_BOT_DMA / PL111_BOT_SHM
+ *	- PL111_BOT_UNCACHED / PL111_BOT_WC / PL111_BOT_CACHED
+ * @handle: returned a handle to created gem object.
+ *      - this handle will be set by gem module of kernel side.
+ */
+struct drm_pl111_gem_create {
+	uint32_t height;
+	uint32_t width;
+	uint32_t bpp;
+	uint32_t flags;
+	/* handle, pitch, size will be returned */
+	uint32_t handle;
+	uint32_t pitch;
+	uint64_t size;
+};
+
+#define DRM_PL111_GEM_CREATE		0x00
+
+#define DRM_IOCTL_PL111_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + \
+			DRM_PL111_GEM_CREATE, struct drm_pl111_gem_create)
+/****************************************************************************/
+
+#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb))
+
+#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
+
+#define PL111_BO_FROM_GEM(gem_obj) \
+	container_of(gem_obj, struct pl111_gem_bo, gem_object)
+
+#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
+
+#define PL111_ENCODER_FROM_ENCODER(x) \
+	container_of(x, struct pl111_drm_encoder, encoder)
+
+#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
+	container_of(x, struct pl111_drm_connector, connector)
+
+#include "pl111_drm_funcs.h"
+
+#endif /* _PL111_DRM_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
new file mode 100755
index 0000000..c7c3a22
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
@@ -0,0 +1,170 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+
+static struct {
+	int w, h, type;
+} pl111_drm_modes[] = {
+	{ 640, 480,  DRM_MODE_TYPE_PREFERRED},
+	{ 800, 600,  0},
+	{1024, 768,  0},
+	{  -1,  -1, -1}
+};
+
+void pl111_connector_destroy(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+				PL111_CONNECTOR_FROM_CONNECTOR(connector);
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(pl111_connector);
+}
+
+enum drm_connector_status pl111_connector_detect(struct drm_connector
+							*connector, bool force)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return connector_status_connected;
+}
+
+void pl111_connector_dpms(struct drm_connector *connector, int mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+}
+
+struct drm_encoder *
+pl111_connector_helper_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	if (connector->encoder != NULL) {
+		return connector->encoder; /* Return attached encoder */
+	} else {
+		/*
+		 * If there is no attached encoder we choose the best candidate
+		 * from the list.
+		 * For PL111 there is only one encoder so we return the first
+		 * one we find.
+		 * Other h/w would require a suitable criterion below.
+		 */
+		struct drm_encoder *encoder = NULL;
+		struct drm_device *dev = connector->dev;
+
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
+					head) {
+			if (1) { /* criterion ? */
+				break;
+			}
+		}
+		return encoder; /* return best candidate encoder */
+	}
+}
+
+int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+	int i = 0;
+	int count = 0;
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	while (pl111_drm_modes[i].w != -1) {
+		struct drm_display_mode *mode =
+				drm_mode_find_dmt(connector->dev,
+						pl111_drm_modes[i].w,
+						pl111_drm_modes[i].h,
+						60
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+						, false
+#endif
+						);
+
+		if (mode != NULL) {
+			mode->type |= pl111_drm_modes[i].type;
+			drm_mode_probed_add(connector, mode);
+			count++;
+		}
+
+		i++;
+	}
+
+	DRM_DEBUG_KMS("found %d modes\n", count);
+
+	return count;
+}
+
+int pl111_connector_helper_mode_valid(struct drm_connector *connector,
+					struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return MODE_OK;
+}
+
+const struct drm_connector_funcs connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = pl111_connector_destroy,
+	.detect = pl111_connector_detect,
+	.dpms = pl111_connector_dpms,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = pl111_connector_helper_get_modes,
+	.mode_valid = pl111_connector_helper_mode_valid,
+	.best_encoder = pl111_connector_helper_best_encoder,
+};
+
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
+{
+	struct pl111_drm_connector *pl111_connector;
+
+	pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
+					GFP_KERNEL);
+
+	if (pl111_connector == NULL) {
+		pr_err("Failed to allocated pl111_drm_connector\n");
+		return NULL;
+	}
+
+	drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
+				DRM_MODE_CONNECTOR_DVII);
+
+	drm_connector_helper_add(&pl111_connector->connector,
+					&connector_helper_funcs);
+
+	drm_sysfs_connector_add(&pl111_connector->connector);
+
+	return pl111_connector;
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
new file mode 100755
index 0000000..ede07ff
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
@@ -0,0 +1,449 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_crtc.c
+ * Implementation of the CRTC functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static int pl111_crtc_num;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
+{
+	struct drm_device *dev = pl111_crtc->crtc.dev;
+	struct pl111_drm_flip_resource *old_flip_res;
+	struct pl111_gem_bo *bo;
+	unsigned long irq_flags;
+	int flips_in_flight;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+
+	/*
+	 * Cache the flip resource that caused the IRQ since it will be
+	 * dispatched later. Early return if the IRQ isn't associated to
+	 * a base register update.
+	 * 
+	 * TODO MIDBASE-2790: disable IRQs when a flip is not pending.
+	 */
+	old_flip_res = pl111_crtc->current_update_res;
+	if (!old_flip_res) {
+		spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+		return;
+	}
+	pl111_crtc->current_update_res = NULL;
+
+	/* Prepare the next flip (if any) of the queue as soon as possible. */
+	if (!list_empty(&pl111_crtc->update_queue)) {
+		struct pl111_drm_flip_resource *flip_res;
+		/* Remove the head of the list */
+		flip_res = list_first_entry(&pl111_crtc->update_queue,
+			struct pl111_drm_flip_resource, link);
+		list_del(&flip_res->link);
+		do_flip_to_res(flip_res);
+		/*
+		 * current_update_res will be set, so guarentees that
+		 * another flip_res coming in gets queued instead of
+		 * handled immediately
+		 */
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	/* Finalize properly the flip that caused the IRQ */
+	DRM_DEBUG_KMS("DRM Finalizing old_flip_res=%p\n", old_flip_res);
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(old_flip_res->fb);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	release_kds_resource_and_display(old_flip_res);
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	/* Release DMA buffer on this flip */
+
+	if (bo->gem_object.export_dma_buf != NULL)
+		dma_buf_put(bo->gem_object.export_dma_buf);
+
+	drm_handle_vblank(dev, pl111_crtc->crtc_index);
+
+	/* Wake up any processes waiting for page flip event */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (old_flip_res->event) {
+		spin_lock_bh(&dev->event_lock);
+		drm_send_vblank_event(dev, pl111_crtc->crtc_index,
+					old_flip_res->event);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#else
+	if (old_flip_res->event) {
+		struct drm_pending_vblank_event *e = old_flip_res->event;
+		struct timeval now;
+		unsigned int seq;
+
+		DRM_DEBUG_KMS("%s: wake up page flip event (%p)\n", __func__,
+				old_flip_res->event);
+
+		spin_lock_bh(&dev->event_lock);
+		seq = drm_vblank_count_and_time(dev, pl111_crtc->crtc_index,
+							&now);
+		e->pipe = pl111_crtc->crtc_index;
+		e->event.sequence = seq;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_add_tail(&e->base.link,
+			&e->base.file_priv->event_list);
+
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#endif
+
+	drm_vblank_put(dev, pl111_crtc->crtc_index);
+
+	/*
+	 * workqueue.c:process_one_work():
+	 * "It is permissible to free the struct work_struct from
+	 *  inside the function that is called from it"
+	 */
+	kmem_cache_free(priv.page_flip_slab, old_flip_res);
+
+	flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
+	if (flips_in_flight == 0 ||
+			flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
+		wake_up(&priv.wait_for_flips);
+
+	DRM_DEBUG_KMS("DRM release flip_res=%p\n", old_flip_res);
+}
+
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
+{
+	struct pl111_drm_flip_resource *flip_res = cb1;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	pl111_crtc->show_framebuffer_cb(cb1, cb2);
+}
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb, bool page_flip,
+				struct drm_pending_vblank_event *event)
+{
+	struct pl111_gem_bo *bo;
+	struct pl111_drm_flip_resource *flip_res;
+	int flips_in_flight;
+	int old_flips_in_flight;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	crtc->fb = fb;
+#else
+	crtc->primary->fb = fb;
+#endif
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	if (bo == NULL) {
+		DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
+		return -EINVAL;
+	}
+
+	/* If this is a full modeset, wait for all outstanding flips to complete
+	 * before continuing. This avoids unnecessary complication from being
+	 * able to queue up multiple modesets and queues of mixed modesets and
+	 * page flips.
+	 *
+	 * Modesets should be uncommon and will not be performant anyway, so
+	 * making them synchronous should have negligible performance impact.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				atomic_read(&priv.nr_flips_in_flight) == 0);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * There can be more 'early display' flips in flight than there are
+	 * buffers, and there is (currently) no explicit bound on the number of
+	 * flips. Hence, we need a new allocation for each one.
+	 *
+	 * Note: this could be optimized down if we knew a bound on the flips,
+	 * since an application can only have so many buffers in flight to be
+	 * useful/not hog all the memory
+	 */
+	flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
+	if (flip_res == NULL) {
+		pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * increment flips in flight, whilst blocking when we reach
+	 * NR_FLIPS_IN_FLIGHT_THRESHOLD
+	 */
+	do {
+		/*
+		 * Note: use of assign-and-then-compare in the condition to set
+		 * flips_in_flight
+		 */
+		int ret = wait_event_killable(priv.wait_for_flips,
+				(flips_in_flight =
+					atomic_read(&priv.nr_flips_in_flight))
+				< NR_FLIPS_IN_FLIGHT_THRESHOLD);
+		if (ret != 0) {
+			kmem_cache_free(priv.page_flip_slab, flip_res);
+			return ret;
+		}
+
+		old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
+					flips_in_flight, flips_in_flight + 1);
+	} while (old_flips_in_flight != flips_in_flight);
+
+	flip_res->fb = fb;
+	flip_res->crtc = crtc;
+	flip_res->page_flip = page_flip;
+	flip_res->event = event;
+	INIT_LIST_HEAD(&flip_res->link);
+	DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+		unsigned long shared[1] = { 0 };
+		struct kds_resource *resource_list[1] = {
+				get_dma_buf_kds_resource(buf) };
+		int err;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+
+		/* Wait for the KDS resource associated with this buffer */
+		err = kds_async_waitall(&flip_res->kds_res_set,
+					&priv.kds_cb, flip_res, fb, 1, shared,
+					resource_list);
+		BUG_ON(err);
+	} else {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+
+		/* No dma-buf attached so just call the callback directly */
+		flip_res->kds_res_set = NULL;
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#else
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+	} else {
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+	}
+
+	/* No dma-buf attached to this so just call the callback directly */
+	{
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#endif
+
+	/* For the same reasons as the wait at the start of this function,
+	 * wait for the modeset to complete before continuing.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				flips_in_flight == 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+			struct drm_pending_vblank_event *event)
+#else
+			struct drm_pending_vblank_event *event,
+			uint32_t flags)
+#endif
+{
+	DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
+			__func__, crtc, fb, event);
+	return show_framebuffer_on_crtc(crtc, fb, true, event);
+}
+
+int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode,
+				int x, int y, struct drm_framebuffer *old_fb)
+{
+	int ret;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+	struct drm_display_mode *duplicated_mode;
+
+	DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
+			adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel);
+#else
+			crtc->primary->fb->bits_per_pixel);
+#endif
+
+	duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
+	if (!duplicated_mode)
+		return -ENOMEM;
+
+	pl111_crtc->new_mode = duplicated_mode;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
+#else
+	ret = show_framebuffer_on_crtc(crtc, crtc->primary->fb, false, NULL);
+#endif
+	if (ret != 0) {
+		pl111_crtc->new_mode = pl111_crtc->current_mode;
+		drm_mode_destroy(crtc->dev, duplicated_mode);
+	}
+
+	return ret;
+}
+
+void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+void pl111_crtc_helper_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+				const struct drm_display_mode *mode,
+#else
+				struct drm_display_mode *mode,
+#endif
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+#ifdef CONFIG_ARCH_VEXPRESS
+	/*
+	 * 1024x768 with more than 16 bits per pixel may not work 
+	 * correctly on Versatile Express due to bandwidth issues
+	 */
+	if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel > 16) {
+#else
+			crtc->primary->fb->bits_per_pixel > 16) {
+#endif
+		DRM_INFO("*WARNING* 1024x768 at > 16 bpp may suffer corruption\n");
+	}
+#endif
+
+	return true;
+}
+
+void pl111_crtc_helper_disable(struct drm_crtc *crtc)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	/* don't disable crtc until no flips in flight as irq will be disabled */
+	ret = wait_event_killable(priv.wait_for_flips, atomic_read(&priv.nr_flips_in_flight) == 0);
+	if(ret) {
+		pr_err("pl111_crtc_helper_disable failed\n");
+		return;
+	}
+	clcd_disable(crtc);
+}
+
+void pl111_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(pl111_crtc);
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+	.cursor_set = pl111_crtc_cursor_set,
+	.cursor_move = pl111_crtc_cursor_move,
+	.set_config = drm_crtc_helper_set_config,
+	.page_flip = pl111_crtc_page_flip,
+	.destroy = pl111_crtc_destroy
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+	.mode_set = pl111_crtc_helper_mode_set,
+	.prepare = pl111_crtc_helper_prepare,
+	.commit = pl111_crtc_helper_commit,
+	.mode_fixup = pl111_crtc_helper_mode_fixup,
+	.disable = pl111_crtc_helper_disable,
+};
+
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
+{
+	struct pl111_drm_crtc *pl111_crtc;
+
+	pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
+	if (pl111_crtc == NULL) {
+		pr_err("Failed to allocated pl111_drm_crtc\n");
+		return NULL;
+	}
+
+	drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
+	drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
+
+	pl111_crtc->crtc_index = pl111_crtc_num;
+	pl111_crtc_num++;
+	pl111_crtc->crtc.enabled = 0;
+	pl111_crtc->last_bpp = 0;
+	pl111_crtc->current_update_res = NULL;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	pl111_crtc->displaying_fb = NULL;
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_lock_init(&pl111_crtc->current_displaying_lock);
+#endif
+	pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
+	INIT_LIST_HEAD(&pl111_crtc->update_queue);
+	spin_lock_init(&pl111_crtc->base_update_lock);
+
+	return pl111_crtc;
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
new file mode 100755
index 0000000..4bf20fe
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
@@ -0,0 +1,331 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_cursor.c
+ * Implementation of cursor functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_clcd_ext.h"
+#include "pl111_drm.h"
+
+#define PL111_MAX_CURSOR_WIDTH (64)
+#define PL111_MAX_CURSOR_HEIGHT (64)
+
+#define ARGB_2_LBBP_BINARY_THRESHOLD	(1 << 7)
+#define ARGB_ALPHA_SHIFT		24
+#define ARGB_ALPHA_MASK			(0xff << ARGB_ALPHA_SHIFT)
+#define ARGB_RED_SHIFT			16
+#define ARGB_RED_MASK			(0xff << ARGB_RED_SHIFT)
+#define ARGB_GREEN_SHIFT		8
+#define ARGB_GREEN_MASK			(0xff << ARGB_GREEN_SHIFT)
+#define ARGB_BLUE_SHIFT			0
+#define ARGB_BLUE_MASK			(0xff << ARGB_BLUE_SHIFT)
+
+
+void pl111_set_cursor_size(enum pl111_cursor_size size)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (size == CURSOR_64X64)
+		reg_data |= CRSR_CONFIG_CRSR_SIZE;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (sync == CURSOR_SYNC_VSYNC)
+		reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor(u32 cursor)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
+	reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_enable(bool enable)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	if (enable)
+		reg_data |= CRSR_CTRL_CRSR_ON;
+	else
+		reg_data &= ~CRSR_CTRL_CRSR_ON;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_position(u32 x, u32 y)
+{
+	u32 reg_data = (x & CRSR_XY_MASK) |
+			((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_XY);
+}
+
+void pl111_set_cursor_clipping(u32 x, u32 y)
+{
+	u32 reg_data;
+
+	/* 
+	 * Do not allow setting clipping values larger than
+	 * the cursor size since the cursor is already fully hidden
+	 * when x,y = PL111_MAX_CURSOR_WIDTH.
+	 */
+	if (x > PL111_MAX_CURSOR_WIDTH)
+		x = PL111_MAX_CURSOR_WIDTH;
+	if (y > PL111_MAX_CURSOR_WIDTH)
+		y = PL111_MAX_CURSOR_WIDTH;
+
+	reg_data = (x & CRSR_CLIP_MASK) |
+			((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
+}
+
+void pl111_set_cursor_palette(u32 color0, u32 color1)
+{
+	writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
+	writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
+}
+
+void pl111_cursor_enable(void)
+{
+	pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
+	pl111_set_cursor_size(CURSOR_64X64);
+	pl111_set_cursor_palette(0x0, 0x00ffffff);
+	pl111_set_cursor_enable(true);
+}
+
+void pl111_cursor_disable(void)
+{
+	pl111_set_cursor_enable(false);
+}
+
+/* shift required to locate pixel into the correct position in
+ * a cursor LBBP word, indexed by x mod 16.
+ */
+static const unsigned char
+x_mod_16_to_value_shift[CLCD_CRSR_IMAGE_PIXELS_PER_WORD] = {
+	6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24
+};
+
+/* Pack the pixel value into its correct position in the buffer as specified
+ * for LBBP */
+static inline void
+set_lbbp_pixel(uint32_t *buffer, unsigned int x, unsigned int y,
+				uint32_t value)
+{
+	u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
+	uint32_t shift;
+	uint32_t data;
+
+	shift = x_mod_16_to_value_shift[x % CLCD_CRSR_IMAGE_PIXELS_PER_WORD];
+
+	/* Get the word containing this pixel */
+	cursor_ram = cursor_ram + (x >> CLCD_CRSR_IMAGE_WORDS_PER_LINE) + (y << 2);
+
+	/* Update pixel in cursor RAM */
+	data = readl(cursor_ram);
+	data &= ~(CLCD_CRSR_LBBP_COLOR_MASK << shift);
+	data |= value << shift;
+	writel(data, cursor_ram);
+}
+
+static u32 pl111_argb_to_lbbp(u32 argb_pix)
+{
+	u32 lbbp_pix = CLCD_CRSR_LBBP_TRANSPARENT;
+	u32 alpha = (argb_pix & ARGB_ALPHA_MASK) >> ARGB_ALPHA_SHIFT;
+	u32 red = (argb_pix & ARGB_RED_MASK) >> ARGB_RED_SHIFT;
+	u32 green = (argb_pix & ARGB_GREEN_MASK) >> ARGB_GREEN_SHIFT;
+	u32 blue =  (argb_pix & ARGB_BLUE_MASK) >> ARGB_BLUE_SHIFT;
+
+	/*
+	 * Converting from 8 pixel transparency to binary transparency
+	 * it's the best we can achieve.
+	 */
+	if (alpha & ARGB_2_LBBP_BINARY_THRESHOLD) {
+		u32 gray, max, min;
+
+		/*
+		 * Convert to gray using the lightness method:
+		 * gray = [max(R,G,B) + min(R,G,B)]/2
+		 */
+		min = min(red, green);
+		min = min(min, blue);
+		max = max(red, green);
+		max = max(max, blue);
+		gray = (min + max) >> 1; /* divide by 2 */
+		/* Apply binary threshold to the gray value calculated */
+		if (gray & ARGB_2_LBBP_BINARY_THRESHOLD)
+			lbbp_pix = CLCD_CRSR_LBBP_FOREGROUND;
+		else
+			lbbp_pix = CLCD_CRSR_LBBP_BACKGROUND;
+	}
+
+	return lbbp_pix;
+}
+
+/*
+ * The PL111 hardware cursor supports only LBBP which is a 2bpp format but
+ * the cursor format from userspace is ARGB8888 so we need to convert
+ *  to LBBP here.
+ */
+static void pl111_set_cursor_image(u32 *data)
+{
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+	/* Add 1 on width to insert trailing NULL */
+	char string_cursor[PL111_MAX_CURSOR_WIDTH + 1];
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	unsigned int x;
+	unsigned int y;
+
+	for (y = 0; y < PL111_MAX_CURSOR_HEIGHT; y++) {
+		for (x = 0; x < PL111_MAX_CURSOR_WIDTH; x++) {
+			u32 value = pl111_argb_to_lbbp(*data);
+
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+			if (value == CLCD_CRSR_LBBP_TRANSPARENT)
+				string_cursor[x] = 'T';
+			else if (value == CLCD_CRSR_LBBP_FOREGROUND)
+				string_cursor[x] = 'F';
+			else if (value == CLCD_CRSR_LBBP_INVERSE)
+				string_cursor[x] = 'I';
+			else
+				string_cursor[x] = 'B';
+
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+			set_lbbp_pixel(data, x, y, value);
+			++data;
+		}
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+		string_cursor[PL111_MAX_CURSOR_WIDTH] = '\0';
+		DRM_INFO("%s\n", string_cursor);
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	}
+}
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height)
+{
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("handle = %u, width = %u, height = %u\n",
+		handle, width, height);
+
+	if (!handle) {
+		pl111_cursor_disable();
+		return 0;
+	}
+
+	if ((width != PL111_MAX_CURSOR_WIDTH) ||
+	    (height != PL111_MAX_CURSOR_HEIGHT))
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object for handle = %d\n",
+			  handle);
+		return -ENOENT;
+	}
+
+	/*
+	 * We expect a PL111_MAX_CURSOR_WIDTH x PL111_MAX_CURSOR_HEIGHT
+	 * ARGB888 buffer object in the input.
+	 *
+	 */
+	if (obj->size < (PL111_MAX_CURSOR_WIDTH * PL111_MAX_CURSOR_HEIGHT * 4)) {
+		DRM_ERROR("Cannot set cursor with an obj size = %d\n",
+			  obj->size);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	bo = PL111_BO_FROM_GEM(obj);
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Tried to set cursor with non DMA backed obj = %p\n",
+			  obj);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
+
+	/*
+	 * Since we copy the contents of the buffer to the HW cursor internal
+	 * memory this GEM object is not needed anymore.
+	 */
+	drm_gem_object_unreference_unlocked(obj);
+
+	pl111_cursor_enable();
+
+	return 0;
+}
+
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y)
+{
+	int x_clip = 0;
+	int y_clip = 0;
+
+	DRM_DEBUG("x %d y %d\n", x, y);
+
+	/*
+	 * The cursor image is clipped automatically at the screen limits when
+	 * it extends beyond the screen image to the right or bottom but
+	 * we must clip it using pl111 HW features for negative values.
+	 */
+	if (x < 0) {
+		x_clip = -x;
+		x = 0;
+	}
+	if (y < 0) {
+		y_clip = -y;
+		y = 0;
+	}
+
+	pl111_set_cursor_clipping(x_clip, y_clip);
+	pl111_set_cursor_position(x, y);
+
+	return 0;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
new file mode 100755
index 0000000..6619c07
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
@@ -0,0 +1,338 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_device.c
+ * Implementation of the Linux device driver entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+struct pl111_drm_dev_private priv;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void initial_kds_obtained(void *cb1, void *cb2)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
+	bool *cb_has_called = (bool *) cb2;
+
+	*cb_has_called = true;
+	wake_up(wait);
+}
+
+/* Must be called from within current_displaying_lock spinlock */
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	pl111_crtc->displaying_fb = flip_res->fb;
+
+	/* Release the previous buffer */
+	if (pl111_crtc->old_kds_res_set != NULL) {
+		/*
+		 * Can flip to the same buffer, but must not release the current
+		 * resource set
+		 */
+		BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+	}
+	/* Record the current buffer, to release on the next buffer flip */
+	pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
+}
+#endif
+
+void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+void pl111_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+/*
+ * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
+ * and disable_vblank are just no op callbacks.
+ */
+static int pl111_enable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+	return 0;
+}
+
+static void pl111_disable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+}
+
+struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = pl111_fb_create,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	struct pl111_drm_connector *pl111_connector;
+	struct pl111_drm_encoder *pl111_encoder;
+	int ret = 0;
+
+	if (priv == NULL)
+		return -EINVAL;
+
+	drm_mode_config_init(dev);
+	mode_config = &dev->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 1024;
+	mode_config->min_height = 1;
+	mode_config->max_height = 768;
+
+	priv->pl111_crtc = pl111_crtc_create(dev);
+	if (priv->pl111_crtc == NULL) {
+		pr_err("Failed to create pl111_drm_crtc\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	priv->number_crtcs = 1;
+
+	pl111_connector = pl111_connector_create(dev);
+	if (pl111_connector == NULL) {
+		pr_err("Failed to create pl111_drm_connector\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	pl111_encoder = pl111_encoder_create(dev, 1);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to create pl111_drm_encoder\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
+						&pl111_encoder->encoder);
+	if (ret != 0) {
+		DRM_ERROR("Failed to attach encoder\n");
+		goto out_config;
+	}
+
+	pl111_connector->connector.encoder = &pl111_encoder->encoder;
+
+	pl111_encoder->encoder.crtc = &priv->pl111_crtc->crtc;
+
+	goto finish;
+
+out_config:
+	drm_mode_config_cleanup(dev);
+finish:
+	DRM_DEBUG("%s returned %d\n", __func__, ret);
+	return ret;
+}
+
+static void pl111_modeset_fini(struct drm_device *dev)
+{
+	drm_mode_config_cleanup(dev);
+}
+
+static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
+{
+	int ret = 0;
+
+	pr_info("DRM %s\n", __func__);
+
+	mutex_init(&priv.export_dma_buf_lock);
+	atomic_set(&priv.nr_flips_in_flight, 0);
+	init_waitqueue_head(&priv.wait_for_flips);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
+	if (ret != 0) {
+		pr_err("Failed to initialise KDS callback\n");
+		goto finish;
+	}
+
+	ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
+				initial_kds_obtained);
+	if (ret != 0) {
+		pr_err("Failed to init KDS obtain callback\n");
+		kds_callback_term(&priv.kds_cb);
+		goto finish;
+	}
+#endif
+
+	/* Create a cache for page flips */
+	priv.page_flip_slab = kmem_cache_create("page flip slab",
+			sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
+	if (priv.page_flip_slab == NULL) {
+		DRM_ERROR("Failed to create slab\n");
+		ret = -ENOMEM;
+		goto out_kds_callbacks;
+	}
+
+	dev->dev_private = &priv;
+
+	ret = pl111_modeset_init(dev);
+	if (ret != 0) {
+		pr_err("Failed to init modeset\n");
+		goto out_slab;
+	}
+
+	ret = pl111_device_init(dev);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init MMIO and IRQ\n");
+		goto out_modeset;
+	}
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init vblank\n");
+		goto out_vblank;
+	}
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 13, 0))
+	platform_set_drvdata(dev->platformdev, dev);
+#endif
+
+	goto finish;
+
+out_vblank:
+	pl111_device_fini(dev);
+out_modeset:
+	pl111_modeset_fini(dev);
+out_slab:
+	kmem_cache_destroy(priv.page_flip_slab);
+out_kds_callbacks:
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+finish:
+	DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
+	return ret;
+}
+
+static int pl111_drm_unload(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+	kmem_cache_destroy(priv.page_flip_slab);
+
+	drm_vblank_cleanup(dev);
+	pl111_modeset_fini(dev);
+	pl111_device_fini(dev);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+	return 0;
+}
+
+static struct vm_operations_struct pl111_gem_vm_ops = {
+	.fault = pl111_gem_fault,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+#else
+	.open = pl111_gem_vm_open,
+	.close = pl111_gem_vm_close,
+#endif
+};
+
+static const struct file_operations drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = pl111_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+};
+
+static struct drm_ioctl_desc pl111_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(PL111_GEM_CREATE, pl111_drm_gem_create_ioctl,
+		DRM_CONTROL_ALLOW | DRM_UNLOCKED),
+};
+
+static struct drm_driver driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.load = pl111_drm_load,
+	.unload = pl111_drm_unload,
+	.context_dtor = NULL,
+	.preclose = pl111_drm_preclose,
+	.lastclose = pl111_drm_lastclose,
+	.suspend = pl111_drm_suspend,
+	.resume = pl111_drm_resume,
+	.get_vblank_counter = drm_vblank_count,
+	.enable_vblank = pl111_enable_vblank,
+	.disable_vblank = pl111_disable_vblank,
+	.ioctls = pl111_ioctls,
+	.fops = &drm_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+	.dumb_create = pl111_dumb_create,
+	.dumb_destroy = pl111_dumb_destroy,
+	.dumb_map_offset = pl111_dumb_map_offset,
+	.gem_free_object = pl111_gem_free_object,
+	.gem_vm_ops = &pl111_gem_vm_ops,
+	.prime_handle_to_fd = &pl111_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = &pl111_gem_prime_export,
+	.gem_prime_import = &pl111_gem_prime_import,
+};
+
+int pl111_drm_init(struct platform_device *dev)
+{
+	int ret;
+	pr_info("DRM %s\n", __func__);
+	pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
+		DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
+	driver.num_ioctls = ARRAY_SIZE(pl111_ioctls);
+	ret = 0;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 15, 0))
+	driver.kdriver.platform_device = dev;
+#endif
+	return drm_platform_init(&driver, dev);
+
+}
+
+void pl111_drm_exit(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_platform_exit(&driver, dev);
+#else
+	drm_put_dev(platform_get_drvdata(dev));
+#endif
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
new file mode 100755
index 0000000..1131f46
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
@@ -0,0 +1,625 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_dma_buf.c
+ * Implementation of the dma_buf functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void obtain_kds_if_currently_displayed(struct drm_device *dev,
+						struct pl111_gem_bo *bo,
+						struct dma_buf *dma_buf)
+{
+	unsigned long shared[1] = { 0 };
+	struct kds_resource *resource_list[1];
+	struct kds_resource_set *kds_res_set;
+	struct drm_crtc *crtc;
+	bool cb_has_called = false;
+	unsigned long flags;
+	int err;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	DRM_DEBUG_KMS("Obtaining initial KDS res for bo:%p dma_buf:%p\n",
+			bo, dma_buf);
+
+	resource_list[0] = get_dma_buf_kds_resource(dma_buf);
+	get_dma_buf(dma_buf);
+
+	/*
+	 * Can't use kds_waitall(), because kbase will be let through due to
+	 * locked ignore'
+	 */
+	err = kds_async_waitall(&kds_res_set,
+				&priv.kds_obtain_current_cb, &wake,
+				&cb_has_called, 1, shared, resource_list);
+	BUG_ON(err);
+	wait_event(wake, cb_has_called == true);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+
+			if (pl111_fb->bo == bo) {
+				DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+				DRM_DEBUG_KMS(" is being displayed, keeping\n");
+				/* There shouldn't be a previous buffer to release */
+				BUG_ON(pl111_crtc->old_kds_res_set);
+
+				if (kds_res_set == NULL) {
+					err = kds_async_waitall(&kds_res_set,
+							&priv.kds_obtain_current_cb,
+							&wake, &cb_has_called,
+							1, shared, resource_list);
+					BUG_ON(err);
+					wait_event(wake, cb_has_called == true);
+				}
+
+				/* Current buffer will need releasing on next flip */
+				pl111_crtc->old_kds_res_set = kds_res_set;
+
+				/*
+				* Clear kds_res_set, so a new kds_res_set is allocated
+				* for additional CRTCs
+				*/
+				kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+
+	/* kds_res_set will be NULL here if any CRTCs are displaying fb */
+	if (kds_res_set != NULL) {
+		DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+		DRM_DEBUG_KMS(" not being displayed, discarding\n");
+		/* They're not being displayed, release them */
+		kds_resource_set_release(&kds_res_set);
+	}
+
+	dma_buf_put(dma_buf);
+}
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0))
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	mutex_unlock(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#else
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/* Check for valid size. */
+	if (obj->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	BUG_ON(!dev->driver->gem_vm_ops);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+#endif
+
+	vma->vm_ops = dev->driver->gem_vm_ops;
+	vma->vm_private_data = obj;
+
+	/* Take a ref for this mapping of the object, so that the fault
+	* handler can dereference the mmap offset's pointer to the object.
+	* This reference is cleaned up by the corresponding vm_close
+	* (which should happen whether the vma was created by this call, or
+	* by a vm_open due to mremap or partial unmap or whatever).
+	*/
+	drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+	pl111_drm_vm_open_locked(dev, vma);
+#else
+	drm_vm_open_locked(dev, vma);
+#endif
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#endif /* KERNEL_VERSION */
+
+static void pl111_dma_buf_release(struct dma_buf *buf)
+{
+	/*
+	 * Need to release the dma_buf's reference on the gem object it was
+	 * exported from, and also clear the gem object's export_dma_buf
+	 * pointer to this dma_buf as it no longer exists
+	 */
+	struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
+	struct pl111_gem_bo *bo;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+			if (pl111_fb->bo == bo) {
+				kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+				pl111_crtc->old_kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+#endif
+	mutex_lock(&priv.export_dma_buf_lock);
+
+	obj->export_dma_buf = NULL;
+	drm_gem_object_unreference_unlocked(obj);
+
+	mutex_unlock(&priv.export_dma_buf_lock);
+}
+
+static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
+			dev, attach);
+
+	attach->priv = dev;
+
+	return 0;
+}
+
+static void pl111_dma_buf_detach(struct dma_buf *buf,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
+			attach);
+}
+
+/* Heavily from exynos_drm_dmabuf.c */
+static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
+						*attach,
+						enum dma_data_direction
+						direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int size, n_pages, nents;
+	struct scatterlist *s, *sg;
+	struct sg_table *sgt;
+	int ret, i;
+
+	DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+		      attach, bo);
+
+	/*
+	 * Nothing to do, if we are trying to map a dmabuf that has been imported.
+	 * Just return the existing sgt.
+	 */
+	if (obj->import_attach) {
+		BUG_ON(!bo->sgt);
+		return bo->sgt;
+	}
+
+	size = obj->size;
+	n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	if (bo->type & PL111_BOT_DMA) {
+		sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+		if (!sgt) {
+			DRM_ERROR("Failed to allocate sg_table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+		if (ret < 0) {
+			DRM_ERROR("Failed to allocate page table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		sg_dma_len(sgt->sgl) = size;
+		/* We use DMA coherent mappings for PL111_BOT_DMA so we must
+		 * use the virtual address returned at buffer allocation */
+		sg_set_buf(sgt->sgl, bo->backing_data.dma.fb_cpu_addr, size);
+		sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
+	} else { /* PL111_BOT_SHM */
+		struct page **pages;
+		int pg = 0;
+
+		mutex_lock(&dev->struct_mutex);
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev, "could not get pages: %ld\n",
+				PTR_ERR(pages));
+			return ERR_CAST(pages);
+		}
+		sgt = drm_prime_pages_to_sg(pages, n_pages);
+		if (sgt == NULL)
+			return ERR_PTR(-ENOMEM);
+
+		pl111_gem_sync_to_dma(bo);
+
+		/*
+		 * At this point the pages have been dma-mapped by either
+		 * get_pages() for non cached maps or pl111_gem_sync_to_dma()
+		 * for cached. So the physical addresses can be assigned
+		 * to the sg entries.
+		 * drm_prime_pages_to_sg() may have combined contiguous pages
+		 * into chunks so we assign the physical address of the first
+		 * page of a chunk to the chunk and check that the physical
+		 * addresses of the rest of the pages in that chunk are also
+		 * contiguous.
+		 */
+		sg = sgt->sgl;
+		nents = sgt->nents;
+
+		for_each_sg(sg, s, nents, i) {
+			int j, n_pages_in_chunk = sg_dma_len(s) >> PAGE_SHIFT;
+
+			sg_dma_address(s) = bo->backing_data.shm.dma_addrs[pg];
+
+			for (j = pg+1; j < pg+n_pages_in_chunk; j++) {
+				BUG_ON(bo->backing_data.shm.dma_addrs[j] !=
+						bo->backing_data.shm.dma_addrs[j-1]+PAGE_SIZE);
+			}
+
+			pg += n_pages_in_chunk;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+	}
+	bo->sgt = sgt;
+	return sgt;
+}
+
+static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					struct sg_table *sgt,
+					enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+			attach, bo);
+
+	sg_free_table(sgt);
+	kfree(sgt);
+	bo->sgt = NULL;
+}
+
+/*
+ * There isn't any operation here that can sleep or fail so this callback can
+ * be used for both kmap and kmap_atomic implementations.
+ */
+static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long pageno)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	void *vaddr = NULL;
+
+	/* Make sure we cannot access outside the memory range */
+	if (((pageno + 1) << PAGE_SHIFT) > bo->gem_object.size)
+		return NULL;
+
+	if (bo->type & PL111_BOT_DMA) {
+		vaddr = (bo->backing_data.dma.fb_cpu_addr +
+						(pageno << PAGE_SHIFT));
+	} else {
+		vaddr = page_address(bo->backing_data.shm.pages[pageno]);
+	}
+
+	return vaddr;
+}
+
+/*
+ * Find a scatterlist that starts in "start" and has "len"
+ * or return a NULL dma_handle.
+ */
+static dma_addr_t pl111_find_matching_sg(struct sg_table *sgt, size_t start,
+					size_t len)
+{
+	struct scatterlist *sg;
+	unsigned int count;
+	size_t size = 0;
+	dma_addr_t dma_handle = 0;
+
+	/* Find a scatterlist that starts in "start" and has "len"
+	* or return error */
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		if ((size == start) && (len == sg_dma_len(sg))) {
+			dma_handle = sg_dma_address(sg);
+			break;
+		}
+		size += sg_dma_len(sg);
+	}
+	return dma_handle;
+}
+
+static int pl111_dma_buf_begin_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return -EINVAL;
+
+	if (!(bo->type & PL111_BOT_SHM)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return -EINVAL;
+
+		dma_sync_single_range_for_cpu(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+	return 0;
+}
+
+static void pl111_dma_buf_end_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return;
+
+	if (!(bo->type & PL111_BOT_DMA)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return;
+
+		dma_sync_single_range_for_device(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+}
+
+static struct dma_buf_ops pl111_dma_buf_ops = {
+	.release = &pl111_dma_buf_release,
+	.attach = &pl111_dma_buf_attach,
+	.detach = &pl111_dma_buf_detach,
+	.map_dma_buf = &pl111_dma_buf_map_dma_buf,
+	.unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
+	.kmap_atomic = &pl111_dma_buf_kmap,
+	.kmap = &pl111_dma_buf_kmap,
+	.begin_cpu_access = &pl111_dma_buf_begin_cpu,
+	.end_cpu_access = &pl111_dma_buf_end_cpu,
+	.mmap = &pl111_dma_buf_mmap,
+};
+
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attachment;
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+	struct scatterlist *sgl;
+	struct sg_table *sgt;
+	dma_addr_t cont_phys;
+	int ret = 0;
+	int i;
+
+	DRM_DEBUG_KMS("DRM %s on dev=%p dma_buf=%p\n", __func__, dev, dma_buf);
+
+	/* is this one of own objects? */
+	if (dma_buf->ops == &pl111_dma_buf_ops) {
+		obj = dma_buf->priv;
+		/* is it from our device? */
+		if (obj->dev == dev) {
+			/*
+			* Importing dmabuf exported from our own gem increases
+			* refcount on gem itself instead of f_count of dmabuf.
+			*/
+			drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+			/* before v3.10.0 we assume the caller has taken a ref on the dma_buf
+			 * we don't want it for self-imported buffers so drop it here */
+			dma_buf_put(dma_buf);
+#endif
+
+			return obj;
+		}
+	}
+
+	attachment = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attachment))
+		return ERR_CAST(attachment);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we assume the caller has not taken a ref so we take one here */
+	get_dma_buf(dma_buf);
+#endif
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto err_buf_detach;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		DRM_ERROR("%s: failed to allocate buffer object.\n", __func__);
+		ret = -ENOMEM;
+		goto err_unmap_attach;
+	}
+
+	/* Find out whether the buffer is contiguous or not */
+	sgl = sgt->sgl;
+	cont_phys = sg_phys(sgl);
+	bo->type |= PL111_BOT_DMA;
+	for_each_sg(sgt->sgl, sgl, sgt->nents, i) {
+		dma_addr_t real_phys = sg_phys(sgl);
+		if (real_phys != cont_phys) {
+			bo->type &= ~PL111_BOT_DMA;
+			break;
+		}
+		cont_phys += (PAGE_SIZE - sgl->offset);
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	ret = drm_gem_private_object_init(dev, &bo->gem_object,
+					  dma_buf->size);
+	if (ret != 0) {
+		DRM_ERROR("DRM could not import DMA GEM obj\n");
+		goto err_free_buffer;
+	}
+#else
+	drm_gem_private_object_init(dev, &bo->gem_object, dma_buf->size);
+#endif
+
+	if (bo->type & PL111_BOT_DMA) {
+		bo->backing_data.dma.fb_cpu_addr = sg_virt(sgt->sgl);
+		bo->backing_data.dma.fb_dev_addr = sg_phys(sgt->sgl);
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, contiguous import\n", __func__, bo);
+	} else { /* PL111_BOT_SHM */
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, non contiguous import\n", __func__, bo);
+	}
+
+	bo->gem_object.import_attach = attachment;
+	bo->sgt = sgt;
+
+	return &bo->gem_object;
+
+err_free_buffer:
+	kfree(bo);
+	bo = NULL;
+err_unmap_attach:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+err_buf_detach:
+	dma_buf_detach(dma_buf, attachment);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we will have taken a ref so drop it here */
+	dma_buf_put(dma_buf);
+#endif
+	return ERR_PTR(ret);
+}
+
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				      struct drm_gem_object *obj, int flags)
+{
+	struct dma_buf *new_buf;
+	struct pl111_gem_bo *bo;
+	size_t size;
+
+	DRM_DEBUG("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
+	size = obj->size;
+
+	new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
+					flags | O_RDWR);
+	bo = PL111_BO_FROM_GEM(new_buf->priv);
+
+	/*
+	 * bo->gem_object.export_dma_buf not setup until after gem_prime_export
+	 * finishes
+	 */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Ensure that we hold the kds resource if it's the currently
+	 * displayed buffer.
+	 */
+	obtain_kds_if_currently_displayed(dev, bo, new_buf);
+#endif
+
+	DRM_DEBUG("Created dma_buf %p\n", new_buf);
+
+	return new_buf;
+}
+
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+				uint32_t handle, uint32_t flags, int *prime_fd)
+{
+	int result;
+	/*
+	 * This will re-use any existing exports, and calls
+	 * driver->gem_prime_export to do the first export when needed
+	 */
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	mutex_lock(&priv.export_dma_buf_lock);
+	result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
+						prime_fd);
+	mutex_unlock(&priv.export_dma_buf_lock);
+
+	return result;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
new file mode 100755
index 0000000..78c91c0
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_encoder.c
+ * Implementation of the encoder functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+	return true;
+}
+
+void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_commit(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_disable(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct pl111_drm_encoder *pl111_encoder =
+					PL111_ENCODER_FROM_ENCODER(encoder);
+
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(pl111_encoder);
+}
+
+const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = pl111_encoder_destroy,
+};
+
+const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.mode_fixup = pl111_encoder_helper_mode_fixup,
+	.prepare = pl111_encoder_helper_prepare,
+	.commit = pl111_encoder_helper_commit,
+	.mode_set = pl111_encoder_helper_mode_set,
+	.disable = pl111_encoder_helper_disable,
+};
+
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs)
+{
+	struct pl111_drm_encoder *pl111_encoder;
+
+	pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to allocated pl111_drm_encoder\n");
+		return NULL;
+	}
+
+	drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
+				DRM_MODE_ENCODER_DAC);
+
+	drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
+
+	pl111_encoder->encoder.possible_crtcs = possible_crtcs;
+
+	return pl111_encoder;
+}
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
new file mode 100755
index 0000000..f575c9e
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
@@ -0,0 +1,202 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_fb.c
+ * Implementation of the framebuffer functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_crtc.h>
+#include "pl111_drm.h"
+
+static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct pl111_drm_framebuffer *pl111_fb;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
+
+	pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
+
+	/*
+	 * Because flips are deferred, wait for all previous flips to complete
+	 */
+	wait_event(priv.wait_for_flips,
+			atomic_read(&priv.nr_flips_in_flight) == 0);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Release KDS resources if it's currently being displayed. Only occurs
+	 * when the last framebuffer is destroyed.
+	 */
+	list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb == framebuffer) {
+			/* Release the current buffers */
+			if (pl111_crtc->old_kds_res_set != NULL) {
+				DRM_DEBUG_KMS("Releasing KDS resources for ");
+				DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
+				kds_resource_set_release(
+					&pl111_crtc->old_kds_res_set);
+			}
+			pl111_crtc->old_kds_res_set = NULL;
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock,
+								flags);
+	}
+#endif
+	drm_framebuffer_cleanup(framebuffer);
+
+	if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
+		drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
+
+	kfree(pl111_fb);
+
+	DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
+}
+
+static int pl111_fb_create_handle(struct drm_framebuffer *fb,
+				struct drm_file *file_priv,
+				unsigned int *handle)
+{
+	struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
+
+	if (bo == NULL)
+		return -EINVAL;
+
+	return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+}
+
+const struct drm_framebuffer_funcs fb_funcs = {
+	.destroy = pl111_fb_destroy,
+	.create_handle = pl111_fb_create_handle,
+};
+
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct pl111_drm_framebuffer *pl111_fb = NULL;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_gem_object *gem_obj;
+	struct pl111_gem_bo *bo;
+	int err = 0;
+	size_t min_size;
+	int bpp;
+	int depth;
+
+	pr_info("DRM %s\n", __func__);
+	gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (gem_obj == NULL) {
+		DRM_ERROR("Could not get gem obj from handle to create fb\n");
+		err = -ENOENT;
+		goto error;
+	}
+
+	bo = PL111_BO_FROM_GEM(gem_obj);
+	drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp);
+
+	if (mode_cmd->pitches[0] < mode_cmd->width * (bpp >> 3)) {
+		DRM_ERROR("bad pitch %u for plane 0\n", mode_cmd->pitches[0]);
+		err = -EINVAL;
+		goto error;
+	}
+
+	min_size = (mode_cmd->height - 1) * mode_cmd->pitches[0]
+				+ mode_cmd->width * (bpp >> 3);
+
+	if (bo->gem_object.size < min_size) {
+		DRM_ERROR("gem obj size < min size\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* We can't scan out SHM so we can't create an fb for it */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Can't create FB for non-scanout buffer\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	switch ((char)(mode_cmd->pixel_format & 0xFF)) {
+	case 'Y':
+	case 'U':
+	case 'V':
+	case 'N':
+	case 'T':
+		DRM_ERROR("YUV formats not supported\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
+	if (pl111_fb == NULL) {
+		DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
+		err = -ENOMEM;
+		goto error;
+	}
+	fb = &pl111_fb->fb;
+
+	err = drm_framebuffer_init(dev, fb, &fb_funcs);
+	if (err) {
+		DRM_ERROR("drm_framebuffer_init failed\n");
+		kfree(fb);
+		fb = NULL;
+		goto error;
+	}
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	/* The only framebuffer formats supported by pl111
+	 * are 16 bpp or 32 bpp with 24 bit depth.
+	 * See clcd_enable()
+	 */
+	if (!((fb->bits_per_pixel == 16) ||
+		  (fb->bits_per_pixel == 32 && fb->depth == 24))) {
+		DRM_DEBUG_KMS("unsupported pixel format bpp=%d, depth=%d\n", fb->bits_per_pixel, fb->depth);
+		drm_framebuffer_cleanup(fb);
+		kfree(fb);
+		fb = NULL;
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb->bo = bo;
+
+	DRM_DEBUG_KMS("Created fb 0x%p with gem_obj 0x%p physaddr=0x%.8x\n",
+			fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
+
+	return fb;
+
+error:
+	if (gem_obj != NULL)
+		drm_gem_object_unreference_unlocked(gem_obj);
+
+	return ERR_PTR(err);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
new file mode 100755
index 0000000..494baa0
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_funcs.h
+ * Function prototypes for PL111 DRM
+ */
+
+#ifndef PL111_DRM_FUNCS_H_
+#define PL111_DRM_FUNCS_H_
+
+/* Platform Initialisation */
+int pl111_drm_init(struct platform_device *dev);
+void pl111_drm_exit(struct platform_device *dev);
+
+/* KDS Callbacks */
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
+
+/* CRTC Functions */
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
+struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
+void pl111_crtc_destroy(struct drm_crtc *crtc);
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+					struct drm_framebuffer *fb);
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+			struct drm_framebuffer *fb, bool page_flip,
+			struct drm_pending_vblank_event *event);
+
+/* Common IRQ handler */
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height);
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y);
+
+/* Connector Functions */
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
+void pl111_connector_destroy(struct drm_connector *connector);
+struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
+								*dev);
+
+/* Encoder Functions */
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs);
+struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
+							int possible_crtcs);
+void pl111_encoder_destroy(struct drm_encoder *encoder);
+
+/* Frame Buffer Functions */
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd);
+
+/* VMA Functions */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
+struct page **get_pages(struct drm_gem_object *obj);
+void put_pages(struct drm_gem_object *obj, struct page **pages);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma);
+void pl111_gem_vm_open(struct vm_area_struct *vma);
+void pl111_gem_vm_close(struct vm_area_struct *vma);
+#endif
+
+/* Suspend Functions */
+int pl111_drm_resume(struct drm_device *dev);
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
+
+/* GEM Functions */
+int pl111_dumb_create(struct drm_file *file_priv,
+			struct drm_device *dev,
+			struct drm_mode_create_dumb *args);
+int pl111_dumb_destroy(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle);
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset);
+void pl111_gem_free_object(struct drm_gem_object *obj);
+
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+			struct vm_area_struct *vma, size_t size);
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff);
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo);
+
+/* DMA BUF Functions */
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf);
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+			uint32_t handle, uint32_t flags, int *prime_fd);
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				struct drm_gem_object *obj, int flags);
+
+/* Pl111 Functions */
+void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
+					*flip_res, struct drm_framebuffer *fb);
+int clcd_disable(struct drm_crtc *crtc);
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
+int pl111_amba_remove(struct amba_device *dev);
+
+int pl111_device_init(struct drm_device *dev);
+void pl111_device_fini(struct drm_device *dev);
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing);
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode);
+#endif /* PL111_DRM_FUNCS_H_ */
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
new file mode 100755
index 0000000..13fb256
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
@@ -0,0 +1,476 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_gem.c
+ * Implementation of the GEM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0))
+#include <drm/drm_vma_manager.h>
+#endif
+
+void pl111_gem_free_object(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	struct drm_device *dev = obj->dev;
+	DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
+
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (obj->import_attach)
+		drm_prime_gem_destroy(obj, bo->sgt);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map != NULL)
+		drm_gem_free_mmap_offset(obj);
+#else
+	drm_gem_free_mmap_offset(obj);
+#endif
+	/*
+	 * Only free the backing memory if the object has not been imported.
+	 * If it has been imported, the exporter is in charge to free that
+	 * once dmabuf's refcount becomes 0.
+	 */
+	if (obj->import_attach)
+		goto imported_out;
+
+	if (bo->type & PL111_BOT_DMA) {
+		dma_free_writecombine(dev->dev, obj->size,
+				bo->backing_data.dma.fb_cpu_addr,
+				bo->backing_data.dma.fb_dev_addr);
+	} else if (bo->backing_data.shm.pages != NULL) {
+		put_pages(obj, bo->backing_data.shm.pages);
+	}
+
+imported_out:
+	drm_gem_object_release(obj);
+
+	kfree(bo);
+
+	DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
+}
+
+static int pl111_gem_object_create(struct drm_device *dev, u64 size,
+				   u32 flags, struct drm_file *file_priv,
+				   u32 *handle)
+{
+	int ret = 0;
+	struct pl111_gem_bo *bo = NULL;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (bo == NULL) {
+		ret = -ENOMEM;
+		goto finish;
+	}
+
+	bo->type = flags;
+
+#ifndef ARCH_HAS_SG_CHAIN
+	/*
+	 * If the ARCH can't chain we can't have non-contiguous allocs larger
+	 * than a single sg can hold.
+	 * In this case we fall back to using contiguous memory
+	 */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		long unsigned int n_pages =
+				PAGE_ALIGN(size) >> PAGE_SHIFT;
+		if (n_pages > SG_MAX_SINGLE_ALLOC) {
+			bo->type |= PL111_BOT_DMA;
+			/*
+			 * Non-contiguous allocation request changed to
+			 * contigous
+			 */
+			DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
+					n_pages, SG_MAX_SINGLE_ALLOC);
+		}
+	}
+#endif
+	if (bo->type & PL111_BOT_DMA) {
+		/* scanout compatible - use physically contiguous buffer */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_attrs(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL,
+					&attrs);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_attrs failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#else
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_writecombine(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_writecombine failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+		ret = drm_gem_private_object_init(dev, &bo->gem_object,
+							size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not initialise GEM object\n");
+			goto free_dma;
+		}
+#else
+		drm_gem_private_object_init(dev, &bo->gem_object, size);
+#endif
+
+	} else { /* PL111_BOT_SHM */
+		/* not scanout compatible - use SHM backed object */
+		ret = drm_gem_object_init(dev, &bo->gem_object, size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not init SHM backed GEM obj\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+		DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
+	}
+
+	DRM_DEBUG("s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
+		size, flags,
+		(bo->type & PL111_BOT_DMA) ? "physaddr" : "shared page array",
+		(bo->type & PL111_BOT_DMA) ?
+			(unsigned long)bo->backing_data.dma.fb_dev_addr:
+			(unsigned long)bo->backing_data.shm.pages,
+			bo->type);
+
+	ret = drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+	if (ret != 0) {
+		DRM_ERROR("DRM failed to create GEM handle\n");
+		goto obj_release;
+	}
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&bo->gem_object);
+	
+	return 0;
+
+obj_release:
+	drm_gem_object_release(&bo->gem_object);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+free_dma:
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	if (bo->type & PL111_BOT_DMA) {
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr,
+			&attrs);
+		}
+#else
+	if (bo->type & PL111_BOT_DMA)
+		dma_free_writecombine(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr);
+#endif
+free_bo:
+	kfree(bo);
+finish:
+	return ret;
+}
+
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct drm_pl111_gem_create *args = data;
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags & ~PL111_BOT_MASK) {
+		DRM_ERROR("wrong flags: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("gem_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size, args->flags, file_priv,
+					&args->handle);
+}
+
+int pl111_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags) {
+		DRM_ERROR("flags must be zero: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size,
+				       PL111_BOT_DMA | PL111_BOT_UNCACHED,
+				       file_priv, &args->handle);
+}
+
+int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
+		uint32_t handle)
+{
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset)
+{
+	struct drm_gem_object *obj;
+	int ret = 0;
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	/* GEM does all our handle to object mapping */
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (obj == NULL) {
+		ret = -ENOENT;
+		goto fail;
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map == NULL) {
+		ret = drm_gem_create_mmap_offset(obj);
+		if (ret != 0) {
+			drm_gem_object_unreference_unlocked(obj);
+			goto fail;
+		}
+	}
+#else
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret != 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		goto fail;
+	}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	*offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
+#else
+	*offset = drm_vma_node_offset_addr(&obj->vma_node);
+	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+#endif
+
+	drm_gem_object_unreference_unlocked(obj);
+fail:
+	return ret;
+}
+
+/* sync the buffer for DMA access */
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED)) {
+		int i, npages = bo->gem_object.size >> PAGE_SHIFT;
+		struct page **pages = bo->backing_data.shm.pages;
+		bool dirty = false;
+
+		for (i = 0; i < npages; i++) {
+			if (!bo->backing_data.shm.dma_addrs[i]) {
+				DRM_DEBUG("%s: dma map page=%d bo=%p\n", __func__, i, bo);
+				 bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(dev->dev, pages[i], 0,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+				dirty = true;
+			}
+		}
+
+		if (dirty) {
+			DRM_DEBUG("%s: zap ptes (and flush cache) bo=%p\n", __func__, bo);
+			/*
+			 * TODO MIDEGL-1813
+			 * 
+			 * Use flush_cache_page() and outer_flush_range() to
+			 * flush only the user space mappings of the dirty pages
+			 */
+			flush_cache_all();
+			outer_flush_all();
+			unmap_mapping_range(bo->gem_object.filp->f_mapping, 0,
+					bo->gem_object.size, 1);
+		}
+	}
+}
+
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	/*
+	 * TODO MIDEGL-1808
+	 * 
+	 * The following check was meant to detect if the CPU is trying to access
+	 * a buffer that is currently mapped for DMA accesses, which is illegal
+	 * as described by the DMA-API.
+	 * 
+	 * However, some of our tests are trying to do that, which triggers the message
+	 * below and avoids dma-unmapping the pages not to annoy the DMA device but that
+	 * leads to the test failing because of caches not being properly flushed.
+	 */
+
+	/*
+	if (bo->sgt) {
+		DRM_ERROR("%s: the CPU is trying to access a dma-mapped buffer\n",  __func__);
+		return;
+	}
+	*/
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED) &&
+	     bo->backing_data.shm.dma_addrs[pgoff]) {
+		DRM_DEBUG("%s: unmap bo=%p (s=%d), paddr=%08x\n",
+			  __func__, bo,  bo->gem_object.size,
+			  bo->backing_data.shm.dma_addrs[pgoff]);
+		dma_unmap_page(dev->dev, bo->backing_data.shm.dma_addrs[pgoff],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		bo->backing_data.shm.dma_addrs[pgoff] = 0;
+	}
+}
+
+/* Based on omapdrm driver */
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+		 struct vm_area_struct *vma, size_t size)
+{
+	DRM_DEBUG("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p type=%08x\n",
+			__func__, obj, bo, bo->type);
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	if (bo->type & PL111_BOT_WC) {
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	} else if (bo->type & PL111_BOT_CACHED) {
+		/*
+		 * Objects that do not have a filp (DMA backed) can't be
+		 * mapped as cached now. Write-combine should be enough.
+		 */
+		if (WARN_ON(!obj->filp))
+			return -EINVAL;
+
+		/*
+		 * As explained in Documentation/dma-buf-sharing.txt
+		 * we need this trick so that we can manually zap ptes
+		 * in order to fake coherency.
+		 */
+		fput(vma->vm_file);
+		get_file(obj->filp);
+		vma->vm_file = obj->filp;
+
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	} else { /* PL111_BOT_UNCACHED */
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+	}
+	return 0;
+}
+
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
+{
+	int ret;
+	struct drm_file *priv = file_priv->private_data;
+	struct drm_device *dev = priv->minor->dev;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_hash_item *hash;
+	struct drm_local_map *map = NULL;
+#else
+	struct drm_vma_offset_node *node;
+#endif
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	obj = map->handle;
+#else
+	node = drm_vma_offset_exact_lookup(dev->vma_offset_manager,
+			vma->vm_pgoff,
+			vma_pages(vma));
+	obj = container_of(node, struct drm_gem_object, vma_node);
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p bo->type 0x%08x\n", __func__, bo, bo->type);
+
+	/* for an imported buffer we let the exporter handle the mmap */
+	if (obj->import_attach)
+		return dma_buf_mmap(obj->import_attach->dmabuf, vma, 0);
+
+	ret = drm_gem_mmap(file_priv, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap\n");
+		return ret;
+	}
+
+	/* Our page fault handler uses the page offset calculated from the vma,
+	 * so we need to remove the gem cookie offset specified in the call.
+	 */
+	vma->vm_pgoff = 0;
+
+	return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
new file mode 100755
index 0000000..1d613d0
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
@@ -0,0 +1,417 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_pl111.c
+ * PL111 specific functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+/* This can't be called from IRQ context, due to clk_get() and board->enable */
+static int clcd_enable(struct drm_framebuffer *fb)
+{
+	__u32 cntl;
+	struct clcd_board *board;
+
+	pr_info("DRM %s\n", __func__);
+
+	clk_prepare_enable(priv.clk);
+
+	/* Enable and Power Up */
+	cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+	DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
+	if (fb->bits_per_pixel == 16)
+		cntl |= CNTL_LCDBPP16_565;
+	else if (fb->bits_per_pixel == 32 && fb->depth == 24)
+		cntl |= CNTL_LCDBPP24;
+	else
+		BUG_ON(1);
+
+	cntl |= CNTL_BGR;
+
+	writel(cntl, priv.regs + CLCD_PL111_CNTL);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->enable)
+			board->enable(NULL);
+	}
+
+	/* Enable Interrupts */
+	writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
+
+	return 0;
+}
+
+int clcd_disable(struct drm_crtc *crtc)
+{
+	struct clcd_board *board;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	pr_info("DRM %s\n", __func__);
+
+	/* Disable Interrupts */
+	writel(0x00000000, priv.regs + CLCD_PL111_IENB);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->disable)
+			board->disable(NULL);
+	}
+
+	/* Disable and Power Down */
+	writel(0, priv.regs + CLCD_PL111_CNTL);
+
+	/* Disable clock */
+	clk_disable_unprepare(priv.clk);
+
+	pl111_crtc->last_bpp = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	/* Release the previous buffers */
+	if (pl111_crtc->old_kds_res_set != NULL)
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	return 0;
+}
+
+/*
+ * To avoid a possible race where "pl111_crtc->current_update_res" has
+ * been updated (non NULL) but the corresponding scanout buffer has not been
+ * written to the base registers we must always call this function holding
+ * the "base_update_lock" spinlock with IRQs disabled (spin_lock_irqsave()).
+ */
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	struct drm_framebuffer *fb;
+	struct pl111_gem_bo *bo;
+	size_t min_size;
+	fb = flip_res->fb;
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+
+
+	min_size = (fb->height - 1) * fb->pitches[0]
+	         + fb->width * (fb->bits_per_pixel >> 3);
+
+	BUG_ON(bo->gem_object.size < min_size);
+
+	/* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+	BUG_ON(bo->type != PL111_BOT_DMA);
+
+	/*
+	 * Note the buffer for releasing after IRQ, and don't allow any more
+	 * updates until then.
+	 *
+	 * This clcd controller latches the new address on next vsync. Address
+	 * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
+	 * wait for that before releasing the previous buffer's kds
+	 * resources. Otherwise, we'll allow writers to write to the old buffer
+	 * whilst it is still being displayed
+	 */
+	pl111_crtc->current_update_res = flip_res;
+
+	DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
+			fb, bo, bo->backing_data.dma.fb_dev_addr);
+	
+	if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
+		DRM_ERROR("Could not get vblank reference for crtc %d\n",
+				pl111_crtc->crtc_index);
+
+	/* Set the scanout buffer */
+	writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
+	writel(bo->backing_data.dma.fb_dev_addr +
+		((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
+}
+
+void
+show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
+					struct drm_framebuffer *fb)
+{
+	unsigned long irq_flags;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+	if (list_empty(&pl111_crtc->update_queue) &&
+			!pl111_crtc->current_update_res) {
+		do_flip_to_res(flip_res);
+	} else {
+		/*
+		 * Enqueue the update to occur on a future IRQ
+		 * This only happens on triple-or-greater buffering
+		 */
+		DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
+				fb);
+		list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
+			pl111_crtc->last_bpp != fb->bits_per_pixel ||
+			!drm_mode_equal(pl111_crtc->new_mode,
+					pl111_crtc->current_mode))) {
+		struct clcd_regs timing;
+
+		pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
+
+		DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
+				timing.tim0, timing.tim1, timing.tim2,
+				timing.tim3, timing.pixclock);
+
+		/* This is the actual mode setting part */
+		clk_set_rate(priv.clk, timing.pixclock);
+
+		writel(timing.tim0, priv.regs + CLCD_TIM0);
+		writel(timing.tim1, priv.regs + CLCD_TIM1);
+		writel(timing.tim2, priv.regs + CLCD_TIM2);
+		writel(timing.tim3, priv.regs + CLCD_TIM3);
+
+		clcd_enable(fb);
+		pl111_crtc->last_bpp = fb->bits_per_pixel;
+	}
+
+	if (!flip_res->page_flip) {
+		drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
+		pl111_crtc->current_mode = pl111_crtc->new_mode;
+		pl111_crtc->new_mode = NULL;
+	}
+
+	BUG_ON(!pl111_crtc->current_mode);
+
+	/*
+	 * If IRQs weren't enabled before, they are now. This will eventually
+	 * cause flip_res to be released via pl111_common_irq, which updates
+	 * every time the Base Address is latched (i.e. every frame, regardless
+	 * of whether we update the base address or not)
+	 */
+}
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+	u32 irq_stat;
+	struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
+
+	irq_stat = readl(priv.regs + CLCD_PL111_MIS);
+
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE)
+		pl111_common_irq(pl111_crtc);
+
+	/* Clear the interrupt once done */
+	writel(irq_stat, priv.regs + CLCD_PL111_ICR);
+
+	return IRQ_HANDLED;
+}
+
+int pl111_device_init(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int ret;
+
+	if (priv == NULL) {
+		pr_err("%s no private data\n", __func__);
+		return -EINVAL;
+	}
+
+	if (priv->amba_dev == NULL) {
+		pr_err("%s no amba device found\n", __func__);
+		return -EINVAL;
+	}
+
+	/* set up MMIO for register access */
+	priv->mmio_start = priv->amba_dev->res.start;
+	priv->mmio_len = resource_size(&priv->amba_dev->res);
+
+	DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
+			priv->mmio_len);
+
+	priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
+	if (priv->regs == NULL) {
+		pr_err("%s failed mmio\n", __func__);
+		return -EINVAL;
+	}
+
+	/* turn off interrupts */
+	writel(0, priv->regs + CLCD_PL111_IENB);
+
+	ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
+				"pl111_irq_handler", NULL);
+	if (ret != 0) {
+		pr_err("%s failed %d\n", __func__, ret);
+		goto out_mmio;
+	}
+
+	goto finish;
+
+out_mmio:
+	iounmap(priv->regs);
+finish:
+	DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
+	return ret;
+}
+
+void pl111_device_fini(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	u32 cntl;
+
+	if (priv == NULL || priv->regs == NULL)
+		return;
+
+	free_irq(priv->amba_dev->irq[0], NULL);
+
+	cntl = readl(priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDEN;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDPWR;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	iounmap(priv->regs);
+}
+
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
+{
+	struct clcd_board *board = dev->dev.platform_data;
+	int ret;
+	pr_info("DRM %s\n", __func__);
+
+	if (!board)
+		dev_warn(&dev->dev, "board data not available\n");
+
+	ret = amba_request_regions(dev, NULL);
+	if (ret != 0) {
+		DRM_ERROR("CLCD: unable to reserve regs region\n");
+		goto out;
+	}
+
+	priv.amba_dev = dev;
+
+	priv.clk = clk_get(&priv.amba_dev->dev, NULL);
+	if (IS_ERR(priv.clk)) {
+		DRM_ERROR("CLCD: unable to get clk.\n");
+		ret = PTR_ERR(priv.clk);
+		goto clk_err;
+	}
+
+	return 0;
+
+clk_err:
+	amba_release_regions(dev);
+out:
+	return ret;
+}
+
+int pl111_amba_remove(struct amba_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	clk_put(priv.clk);
+
+	amba_release_regions(dev);
+
+	priv.amba_dev = NULL;
+
+	return 0;
+}
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+	unsigned int cpl;
+
+	memset(timing, 0, sizeof(struct clcd_regs));
+
+	ppl = (mode->hdisplay / 16) - 1;
+	hsw = mode->hsync_end - mode->hsync_start - 1;
+	hfp = mode->hsync_start - mode->hdisplay - 1;
+	hbp = mode->htotal - mode->hsync_end - 1;
+
+	lpp = mode->vdisplay - 1;
+	vsw = mode->vsync_end - mode->vsync_start - 1;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	cpl = mode->hdisplay - 1;
+
+	timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
+	timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
+	timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
+	timing->tim3 = 0;
+
+	timing->pixclock = mode->clock * 1000;
+}
+
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+
+	ppl = (timing->tim0 >> 2) & 0x3f;
+	hsw = (timing->tim0 >> 8) & 0xff;
+	hfp = (timing->tim0 >> 16) & 0xff;
+	hbp = (timing->tim0 >> 24) & 0xff;
+
+	lpp = timing->tim1 & 0x3ff;
+	vsw = (timing->tim1 >> 10) & 0x3f;
+	vfp = (timing->tim1 >> 16) & 0xff;
+	vbp = (timing->tim1 >> 24) & 0xff;
+
+	mode->hdisplay    = (ppl + 1) * 16;
+	mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
+	mode->hsync_end   = ((ppl + 1) * 16) + hfp + hsw + 2;
+	mode->htotal      = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
+	mode->hskew       = 0;
+
+	mode->vdisplay    = lpp + 1;
+	mode->vsync_start = lpp + vfp + 1;
+	mode->vsync_end   = lpp + vfp + vsw + 2;
+	mode->vtotal      = lpp + vfp + vsw + vbp + 2;
+
+	mode->flags = 0;
+
+	mode->width_mm = 0;
+	mode->height_mm = 0;
+
+	mode->clock = timing->pixclock / 1000;
+	mode->hsync = timing->pixclock / mode->htotal;
+	mode->vrefresh = mode->hsync / mode->vtotal;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
new file mode 100755
index 0000000..9d5ec0c
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
@@ -0,0 +1,151 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_platform.c
+ * Implementation of the Linux platform device entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+static int pl111_platform_drm_suspend(struct platform_device *dev,
+					pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+static int pl111_platform_drm_resume(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_platform_drm_probe(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return pl111_drm_init(dev);
+}
+
+static int pl111_platform_drm_remove(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	pl111_drm_exit(dev);
+
+	return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+	{
+	.id = 0x00041110,
+	.mask = 0x000ffffe,
+	},
+	{0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+	.drv = {
+		.name = "clcd-pl11x",
+		},
+	.probe = pl111_amba_probe,
+	.remove = pl111_amba_remove,
+	.id_table = pl111_id_table,
+};
+
+static struct platform_driver platform_drm_driver = {
+	.probe = pl111_platform_drm_probe,
+	.remove = pl111_platform_drm_remove,
+	.suspend = pl111_platform_drm_suspend,
+	.resume = pl111_platform_drm_resume,
+	.driver = {
+			.owner = THIS_MODULE,
+			.name = DRIVER_NAME,
+		},
+};
+
+static const struct platform_device_info pl111_drm_pdevinfo = {
+	.name = DRIVER_NAME,
+	.id = -1,
+	.dma_mask = ~0UL
+};
+
+static struct platform_device *pl111_drm_device;
+
+static int __init pl111_platform_drm_init(void)
+{
+	int ret;
+
+	pr_info("DRM %s\n", __func__);
+
+	pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
+	if (pl111_drm_device == NULL) {
+		pr_err("DRM platform_device_register_full() failed\n");
+		return -ENOMEM;
+	}
+
+	ret = amba_driver_register(&pl111_amba_driver);
+	if (ret != 0) {
+		pr_err("DRM amba_driver_register() failed %d\n", ret);
+		goto err_amba_reg;
+	}
+
+	ret = platform_driver_register(&platform_drm_driver);
+	if (ret != 0) {
+		pr_err("DRM platform_driver_register() failed %d\n", ret);
+		goto err_pdrv_reg;
+	}
+
+	return 0;
+
+err_pdrv_reg:
+	amba_driver_unregister(&pl111_amba_driver);
+err_amba_reg:
+	platform_device_unregister(pl111_drm_device);
+
+	return ret;
+}
+
+static void __exit pl111_platform_drm_exit(void)
+{
+	pr_info("DRM %s\n", __func__);
+
+	platform_device_unregister(pl111_drm_device);
+	amba_driver_unregister(&pl111_amba_driver);
+	platform_driver_unregister(&platform_drm_driver);
+}
+
+#ifdef MODULE
+module_init(pl111_platform_drm_init);
+#else
+late_initcall(pl111_platform_drm_init);
+#endif
+module_exit(pl111_platform_drm_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE(DRIVER_LICENCE);
+MODULE_ALIAS(DRIVER_ALIAS);
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
new file mode 100755
index 0000000..d033566
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_suspend.c
+ * Implementation of the suspend/resume functions for PL111 DRM
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_drm_resume(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
new file mode 100755
index 0000000..ff602ef
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_vma.c
+ * Implementation of the VM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ */
+static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+				bool dirty, bool accessed)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	for (i = 0; i < npages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+		if (accessed)
+			mark_page_accessed(pages[i]);
+		/* Undo the reference we took when populating the table */
+		page_cache_release(pages[i]);
+	}
+	drm_free_large(pages);
+}
+
+void put_pages(struct drm_gem_object *obj, struct page **pages)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	_drm_gem_put_pages(obj, pages, true, true);
+	if (bo->backing_data.shm.dma_addrs) {
+		for (i = 0; i < npages; i++) {
+			/* Filter pages unmapped because of CPU accesses */
+			if (!bo->backing_data.shm.dma_addrs[i])
+				continue;
+			if (!dma_mapping_error(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i])) {
+				dma_unmap_page(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i],
+					PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+			}
+		}
+		kfree(bo->backing_data.shm.dma_addrs);
+		bo->backing_data.shm.dma_addrs = NULL;
+	}
+}
+
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
+					gfp_t gfpmask)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct page *p, **pages;
+	int i, npages;
+
+	/* This is the shared memory object that backs the GEM resource */
+	inode = obj->filp->f_path.dentry->d_inode;
+	mapping = inode->i_mapping;
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (pages == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gfpmask |= mapping_gfp_mask(mapping);
+
+	for (i = 0; i < npages; i++) {
+		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		if (IS_ERR(p))
+			goto fail;
+		pages[i] = p;
+
+		/*
+		 * There is a hypothetical issue w/ drivers that require
+		 * buffer memory in the low 4GB.. if the pages are un-
+		 * pinned, and swapped out, they can end up swapped back
+		 * in above 4GB.  If pages are already in memory, then
+		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
+		 * even if the already in-memory page disobeys the mask.
+		 *
+		 * It is only a theoretical issue today, because none of
+		 * the devices with this limitation can be populated with
+		 * enough memory to trigger the issue.  But this BUG_ON()
+		 * is here as a reminder in case the problem with
+		 * shmem_read_mapping_page_gfp() isn't solved by the time
+		 * it does become a real issue.
+		 *
+		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		 */
+		BUG_ON((gfpmask & __GFP_DMA32) &&
+			(page_to_pfn(p) >= 0x00100000UL));
+	}
+
+	return pages;
+
+fail:
+	while (i--)
+		page_cache_release(pages[i]);
+
+	drm_free_large(pages);
+	return ERR_PTR(PTR_ERR(p));
+}
+
+struct page **get_pages(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (bo->backing_data.shm.pages == NULL) {
+		struct page **p;
+		int npages = obj->size >> PAGE_SHIFT;
+		int i;
+
+		p = _drm_gem_get_pages(obj, GFP_KERNEL);
+		if (IS_ERR(p))
+			return ERR_PTR(-ENOMEM);
+
+		bo->backing_data.shm.pages = p;
+
+		if (bo->backing_data.shm.dma_addrs == NULL) {
+			bo->backing_data.shm.dma_addrs =
+				kzalloc(npages * sizeof(dma_addr_t),
+					GFP_KERNEL);
+			if (bo->backing_data.shm.dma_addrs == NULL)
+				goto error_out;
+		}
+
+		if (!(bo->type & PL111_BOT_CACHED)) {
+			for (i = 0; i < npages; ++i) {
+				bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+				if (dma_mapping_error(obj->dev->dev,
+						bo->backing_data.shm.dma_addrs[i]))
+					goto error_out;
+			}
+		}
+	}
+
+	return bo->backing_data.shm.pages;
+
+error_out:
+	put_pages(obj, bo->backing_data.shm.pages);
+	bo->backing_data.shm.pages = NULL;
+	return ERR_PTR(-ENOMEM);
+}
+
+/* END drivers/staging/omapdrm/omap_gem_helpers.c */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page **pages;
+	unsigned long pfn;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * Our mmap calls setup a valid vma->vm_pgoff
+	 * so we can use vmf->pgoff
+	 */
+
+	if (bo->type & PL111_BOT_DMA) {
+		pfn = (bo->backing_data.dma.fb_dev_addr >> PAGE_SHIFT) +
+				vmf->pgoff;
+	} else { /* PL111_BOT_SHM */
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev,
+				"could not get pages: %ld\n", PTR_ERR(pages));
+			ret = PTR_ERR(pages);
+			goto error;
+		}
+		pfn = page_to_pfn(pages[vmf->pgoff]);
+		pl111_gem_sync_to_cpu(bo, vmf->pgoff);
+	}
+
+	DRM_DEBUG("bo=%p physaddr=0x%.8x for offset 0x%x\n",
+				bo, PFN_PHYS(pfn), PFN_PHYS(vmf->pgoff));
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+error:
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+/*
+ * The core drm_vm_ functions in kernel 3.4 are not ready
+ * to handle dma_buf cases where vma->vm_file->private_data
+ * cannot be accessed to get the device.
+ * 
+ * We use these functions from 3.5 instead where the device
+ * pointer is passed explicitly.
+ *
+ * However they aren't exported from the kernel until 3.10
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *vma_entry;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_inc(&dev->vma_count);
+
+	vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
+	if (vma_entry) {
+		vma_entry->vma = vma;
+		vma_entry->pid = current->pid;
+		list_add(&vma_entry->head, &dev->vmalist);
+	}
+}
+
+void pl111_drm_vm_close_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *pt, *temp;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_dec(&dev->vma_count);
+
+	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
+		if (pt->vma == vma) {
+			list_del(&pt->head);
+			kfree(pt);
+			break;
+		}
+	}
+}
+
+void pl111_gem_vm_open(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+
+	drm_gem_object_reference(obj);
+
+	mutex_lock(&obj->dev->struct_mutex);
+	pl111_drm_vm_open_locked(obj->dev, vma);
+	mutex_unlock(&obj->dev->struct_mutex);
+}
+
+void pl111_gem_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	pl111_drm_vm_close_locked(obj->dev, vma);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+}
+#endif
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/pl111/sconscript b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/sconscript
new file mode 100755
index 0000000..5c47de7
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/pl111/sconscript
@@ -0,0 +1,52 @@
+#
+# (C) COPYRIGHT 2010-2013, 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import shutil
+Import('env')
+
+# Generate a build environment for the integration tests, taking a copy of the top-level build environment 
+# (env) as a start.
+drm_env = env.Clone()
+
+# Xorg uses C++ style comments and 'inline' keyword
+if '-std=c89' in drm_env['CFLAGS']:
+	drm_env['CFLAGS'].remove('-std=c89')
+
+# X11 generates a lot of warnings
+if '-Werror' in drm_env['CCFLAGS']:
+	drm_env['CCFLAGS'].remove('-Werror')
+
+#remove the 'lib'prefix
+drm_env['LIBPREFIX'] = ''
+
+src = Glob('*.c')
+
+if drm_env.GetOption('clean') :
+	drm_env.Execute(Action("make clean", 'clean [pl111]'))
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [])
+else:
+	# The target is mali_drm.ko, built from the source in pl111_drm/pl111, via the action makeAction
+	# mali_drm.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+	# kernel build system, after which it can be installed to the directory specified if
+	# "libs_install" is set; this is done by LibTarget.
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} && cp pl111_drm.ko $STATIC_LIB_PATH/mali_drm.ko", '$MAKECOMSTR')
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [makeAction])
+
+# need Module.symvers from drm.ko
+#drm_env.Depends('$STATIC_LIB_PATH/pl111_drm.ko', '$STATIC_LIB_PATH/drm.ko')
+
+drm_env.KernelObjTarget('x11', cmd)
+
diff --git a/midgard/r11p0/kernel/drivers/gpu/drm/sconscript b/midgard/r11p0/kernel/drivers/gpu/drm/sconscript
new file mode 100755
index 0000000..a90fa89
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/drm/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if 'x11' in env['winsys'] or 'gbm' in env['winsys']:
+	# pl111_drm isn't released so only try to build it if it's there
+	if Glob('pl111/sconscript') and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
+		SConscript('pl111/sconscript')
+
+#SConscript('dummy_drm/sconscript')
diff --git a/midgard/r11p0/kernel/drivers/gpu/sconscript b/midgard/r11p0/kernel/drivers/gpu/sconscript
new file mode 100755
index 0000000..729dbda
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/gpu/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+SConscript('arm/sconscript')
+
+if Glob('drm/sconscript'):
+	SConscript('drm/sconscript')
diff --git a/midgard/r11p0/kernel/drivers/sconscript b/midgard/r11p0/kernel/drivers/sconscript
new file mode 100755
index 0000000..dd6a637
--- /dev/null
+++ b/midgard/r11p0/kernel/drivers/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+if Glob('base/sconscript'):
+	SConscript('base/sconscript')
+
+if Glob('video/sconscript'):
+	SConscript('video/sconscript')
+
+SConscript('gpu/sconscript')
diff --git a/midgard/r11p0/kernel/include/linux/dma-buf-test-exporter.h b/midgard/r11p0/kernel/include/linux/dma-buf-test-exporter.h
new file mode 100755
index 0000000..98984ba
--- /dev/null
+++ b/midgard/r11p0/kernel/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/midgard/r11p0/kernel/include/linux/kds.h b/midgard/r11p0/kernel/include/linux/kds.h
new file mode 100755
index 0000000..1346eda
--- /dev/null
+++ b/midgard/r11p0/kernel/include/linux/kds.h
@@ -0,0 +1,173 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KDS_H_
+#define _KDS_H_
+
+#include <linux/list.h>
+#include <linux/workqueue.h>
+
+#define KDS_WAIT_BLOCKING (ULONG_MAX)
+
+struct kds_resource_set;
+
+typedef void (*kds_callback_fn) (void *callback_parameter, void *callback_extra_parameter);
+
+struct kds_callback
+{
+	kds_callback_fn  user_cb; /* real cb */
+	int direct;               /* do direct or queued call? */
+	struct workqueue_struct *wq;
+};
+
+struct kds_link
+{
+	struct kds_resource_set *parent;
+	struct list_head         link;
+	unsigned long            state;
+};
+
+struct kds_resource
+{
+	struct kds_link waiters;
+};
+
+/* callback API */
+
+/* Initialize a callback object.
+ *
+ * Typically created per context or per hw resource.
+ *
+ * Callbacks can be performed directly if no nested locking can
+ * happen in the client.
+ *
+ * Nested locking can occur when a lock is held during the kds_async_waitall or
+ * kds_resource_set_release call. If the callback needs to take the same lock
+ * nested locking will happen.
+ *
+ * If nested locking could happen non-direct callbacks can be requested.
+ * Callbacks will then be called asynchronous to the triggering call.
+ */
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb);
+
+/* Terminate the use of a callback object.
+ *
+ * If the callback object was set up as non-direct
+ * any pending callbacks will be flushed first.
+ * Note that to avoid a deadlock the lock callbacks needs
+ * can't be held when a callback object is terminated.
+ */
+void kds_callback_term(struct kds_callback *cb);
+
+
+/* resource object API */
+
+/* initialize a resource handle for a shared resource */
+void kds_resource_init(struct kds_resource * const resource);
+
+/*
+ * Will return 0 on success.
+ * If the resource is being used or waited -EBUSY is returned.
+ * The caller should NOT try to terminate a resource that could still have clients.
+ * After the function returns the resource is no longer known by kds.
+ */
+int kds_resource_term(struct kds_resource *resource);
+
+/* Asynchronous wait for a set of resources.
+ * Callback will be called when all resources are available.
+ * If all the resources was available the callback will be called before kds_async_waitall returns.
+ * So one must not hold any locks the callback code-flow can take when calling kds_async_waitall.
+ * Caller considered to own/use the resources until \a kds_rset_release is called.
+ * exclusive_access_bitmap is a bitmap where a high bit means exclusive access while a low bit means shared access.
+ * Use the Linux __set_bit API, where the index of the buffer to control is used as the bit index.
+ *
+ * Standard Linux error return value.
+ */
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list);
+
+/* Synchronous wait for a set of resources.
+ * Function will return when one of these have happened:
+ * - all resources have been obtained
+ * - timeout lapsed while waiting
+ * - a signal was received while waiting
+ *
+ * To wait without a timeout, specify KDS_WAIT_BLOCKING for \a jifies_timeout, otherwise
+ * the timeout in jiffies. A zero timeout attempts to obtain all resources and returns
+ * immediately with a timeout if all resources could not be obtained.
+ *
+ * Caller considered to own/use the resources when the function returns.
+ * Caller must release the resources using \a kds_rset_release.
+ *
+ * Calling this function while holding already locked resources or other locking primitives is dangerous.
+ * One must if this is needed decide on a lock order of the resources and/or the other locking primitives
+ * and always take the resources/locking primitives in the specific order.
+ *
+ * Use the ERR_PTR framework to decode the return value.
+ * NULL = time out
+ * If IS_ERR then PTR_ERR gives:
+ *  ERESTARTSYS = signal received, retry call after signal
+ *  all other values = internal error, lock failed
+ * Other values  = successful wait, now the owner, must call kds_resource_set_release
+ */
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jifies_timeout);
+
+/* Release resources after use.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ */
+void kds_resource_set_release(struct kds_resource_set **pprset);
+
+/* Release resources after use and wait callbacks to complete.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ *
+ * This should be used to cancel waits which are pending on a kds
+ * resource.
+ *
+ * It is a bug to call this from atomic contexts and from within
+ * a kds callback that now owns the kds_rseource.
+ */
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset);
+#endif /* _KDS_H_ */
diff --git a/midgard/r11p0/kernel/include/linux/ump-common.h b/midgard/r11p0/kernel/include/linux/ump-common.h
new file mode 100755
index 0000000..0015bda
--- /dev/null
+++ b/midgard/r11p0/kernel/include/linux/ump-common.h
@@ -0,0 +1,252 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/midgard/r11p0/kernel/include/linux/ump-import.h b/midgard/r11p0/kernel/include/linux/ump-import.h
new file mode 100755
index 0000000..89ce727
--- /dev/null
+++ b/midgard/r11p0/kernel/include/linux/ump-import.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/midgard/r11p0/kernel/include/linux/ump-ioctl.h b/midgard/r11p0/kernel/include/linux/ump-ioctl.h
new file mode 100755
index 0000000..451403e
--- /dev/null
+++ b/midgard/r11p0/kernel/include/linux/ump-ioctl.h
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/midgard/r11p0/kernel/include/linux/ump.h b/midgard/r11p0/kernel/include/linux/ump.h
new file mode 100755
index 0000000..16f9c39
--- /dev/null
+++ b/midgard/r11p0/kernel/include/linux/ump.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a true to indicate that access to the handle is allowed or @n
+ * @a false to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a true to permit access
+ * @li @a false to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+#ifdef CONFIG_KDS
+/**
+ * Retrieve the KDS resource for the specified UMP memory.
+ *
+ * The KDS resource should be used to synchronize access to the UMP allocation.
+ * See the KDS API for how to do that.
+ *
+ * @param mem Handle to the UMP memory to query.
+ * @return Pointer to the KDS resource controlling access to the UMP memory.
+ */
+UMP_KERNEL_API_EXPORT struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem);
+#endif
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
diff --git a/midgard/r11p0/kernel/license.txt b/midgard/r11p0/kernel/license.txt
new file mode 100755
index 0000000..77c14bd
--- /dev/null
+++ b/midgard/r11p0/kernel/license.txt
@@ -0,0 +1,198 @@
+GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE
+
+THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE.
+
+
+
+Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form.
+
+
+
+GPL Licence
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+Version 2, June 1991
+
+
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+
+
+Preamble
+
+
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
+
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program).
+
+Whether that is true depends on what the Program does.
+
+
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty;
+
+and give any other recipients of the Program a copy of this License along with the Program.
+
+
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the
+
+Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein.
+
+You are not responsible for enforcing compliance by third parties to this License.
+
+
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+
+
+NO WARRANTY
+
+
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+/end
+
diff --git a/midgard/r11p0/kernel/patches/integrate_kds_with_dma_buf.patch b/midgard/r11p0/kernel/patches/integrate_kds_with_dma_buf.patch
new file mode 100755
index 0000000..37458cf
--- /dev/null
+++ b/midgard/r11p0/kernel/patches/integrate_kds_with_dma_buf.patch
@@ -0,0 +1,188 @@
+diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
+index 9aa618a..30e07ea 100644
+--- a/drivers/base/Kconfig
++++ b/drivers/base/Kconfig
+@@ -192,4 +192,12 @@ config DMA_SHARED_BUFFER
+ 	  APIs extension; the file's descriptor can then be passed on to other
+ 	  driver.
+ 
++config DMA_SHARED_BUFFER_USES_KDS
++	bool "Synchronize access to dma_buf with KDS"
++	default n
++	select KDS
++	depends on DMA_SHARED_BUFFER
++	help
++	  This option adds a KDS resource within every dma_buf allocation.
++
+ endmenu
+diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
+index 20258e1..e4083fd 100644
+--- a/drivers/base/dma-buf.c
++++ b/drivers/base/dma-buf.c
+@@ -27,6 +27,8 @@
+ #include <linux/dma-buf.h>
+ #include <linux/anon_inodes.h>
+ #include <linux/export.h>
++#include <linux/poll.h>
++#include <linux/sched.h>
+ 
+ static inline int is_dma_buf_file(struct file *);
+ 
+@@ -40,6 +42,8 @@ static int dma_buf_release(struct inode *inode, struct file *file)
+ 	dmabuf = file->private_data;
+ 
+ 	dmabuf->ops->release(dmabuf);
++	kds_callback_term(&dmabuf->kds_cb);
++	kds_resource_term(&dmabuf->kds);
+ 	kfree(dmabuf);
+ 	return 0;
+ }
+@@ -61,9 +65,90 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
+ 	return dmabuf->ops->mmap(dmabuf, vma);
+ }
+ 
++
++static void dma_buf_kds_cb_fn (void *param1, void *param2)
++{
++	struct kds_resource_set **rset_ptr = param1;
++	struct kds_resource_set *rset = *rset_ptr;
++	wait_queue_head_t *wait_queue = param2;
++
++	kfree(rset_ptr);
++	kds_resource_set_release(&rset);
++	wake_up(wait_queue);
++}
++
++static int dma_buf_kds_check(struct kds_resource *kds,
++                             long unsigned int exclusive, int *poll_ret)
++{
++	/* Synchronous wait with 0 timeout - poll availability */
++	struct kds_resource_set *rset = kds_waitall(1,&exclusive,&kds,0);
++
++	if (IS_ERR(rset))
++		return POLLERR;
++
++	if (rset){
++		kds_resource_set_release(&rset);
++		*poll_ret = POLLIN | POLLRDNORM;
++		if (exclusive)
++			*poll_ret |=  POLLOUT | POLLWRNORM;
++		return 1;
++	}else{
++		return 0;
++	}
++}
++
++static unsigned int dma_buf_poll(struct file *file,
++                                 struct poll_table_struct *wait)
++{
++	struct dma_buf *dmabuf;
++	struct kds_resource *kds;
++	unsigned int ret = 0;
++
++	if (!is_dma_buf_file(file))
++		return POLLERR;
++
++	dmabuf = file->private_data;
++	kds    = &dmabuf->kds;
++
++	if (poll_does_not_wait(wait)){
++		/* Check for exclusive access (superset of shared) first */
++		if(!dma_buf_kds_check(kds, 1ul, &ret))
++			dma_buf_kds_check(kds, 0ul, &ret);
++	}else{
++		int events = poll_requested_events(wait);
++		unsigned long exclusive;
++		wait_queue_head_t *wq;
++		struct kds_resource_set **rset_ptr = kmalloc(sizeof(*rset_ptr), GFP_KERNEL);
++
++		if (!rset_ptr)
++			return POLL_ERR;
++
++		if (events & POLLOUT){
++			wq = &dmabuf->wq_exclusive;
++			exclusive = 1;
++		}else{
++			wq = &dmabuf->wq_shared;
++			exclusive = 0;
++		}
++		poll_wait(file, wq, wait);
++		ret = kds_async_waitall(rset_ptr, KDS_FLAG_LOCKED_WAIT, &dmabuf->kds_cb,
++		                        rset_ptr, wq, 1, &exclusive, &kds);
++
++		if (IS_ERR_VALUE(ret)){
++			ret = POLL_ERR;
++			kfree(rset_ptr);
++		}else{
++			/* Can't allow access until callback */
++			ret = 0;
++		}
++	}
++	return ret;
++}
++
+ static const struct file_operations dma_buf_fops = {
+ 	.release	= dma_buf_release,
+ 	.mmap		= dma_buf_mmap_internal,
++	.poll		= dma_buf_poll,
+ };
+ 
+ /*
+@@ -120,6 +205,11 @@ struct dma_buf *dma_buf_export(void *priv, const struct dma_buf_ops *ops,
+ 	mutex_init(&dmabuf->lock);
+ 	INIT_LIST_HEAD(&dmabuf->attachments);
+ 
++	init_waitqueue_head(&dmabuf->wq_exclusive);
++	init_waitqueue_head(&dmabuf->wq_shared);
++	kds_resource_init(&dmabuf->kds);
++	kds_callback_init(&dmabuf->kds_cb, 1, dma_buf_kds_cb_fn);
++
+ 	return dmabuf;
+ }
+ EXPORT_SYMBOL_GPL(dma_buf_export);
+diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
+index eb48f38..6e824d2 100644
+--- a/include/linux/dma-buf.h
++++ b/include/linux/dma-buf.h
+@@ -30,6 +30,8 @@
+ #include <linux/list.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/fs.h>
++#include <linux/kds.h>
++#include <linux/wait.h>
+ 
+ struct device;
+ struct dma_buf;
+@@ -121,6 +123,10 @@ struct dma_buf {
+ 	const struct dma_buf_ops *ops;
+ 	/* mutex to serialize list manipulation and attach/detach */
+ 	struct mutex lock;
++	struct kds_resource kds;
++	wait_queue_head_t wq_exclusive;
++	wait_queue_head_t wq_shared;
++	struct kds_callback kds_cb;
+ 	void *priv;
+ };
+ 
+@@ -156,6 +162,21 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
+ 	get_file(dmabuf->file);
+ }
+ 
++/**
++ * get_dma_buf_kds_resource - get a KDS resource for this dma-buf
++ * @dmabuf:	[in]	pointer to dma_buf
++ *
++ * Returns a KDS resource that represents the dma-buf. This should be used by
++ * drivers to synchronize access to the buffer. Note that the caller should
++ * ensure that a reference to the dma-buf exists from the call to
++ * kds_async_wait until kds_resource_set_release is called.
++ */
++static inline struct kds_resource *
++	get_dma_buf_kds_resource(struct dma_buf *dmabuf)
++{
++	return &dmabuf->kds;
++}
++
+ #ifdef CONFIG_DMA_SHARED_BUFFER
+ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+ 							struct device *dev);
diff --git a/midgard/r11p0/kernel/sconscript b/midgard/r11p0/kernel/sconscript
new file mode 100755
index 0000000..e6be64c
--- /dev/null
+++ b/midgard/r11p0/kernel/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+if env['backend'] == 'kernel' and int(env['kernel_modules']) == 1 and env['gpu'] != 'none':
+	SConscript('drivers/sconscript')
+
+	if Glob('tests/sconscript'):
+		SConscript('tests/sconscript')
diff --git a/midgard/r12p0/android/patches/K/android-k.sh b/midgard/r12p0/android/patches/K/android-k.sh
new file mode 100755
index 0000000..4e44d5e
--- /dev/null
+++ b/midgard/r12p0/android/patches/K/android-k.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+#
+# Copyright (C) 2014-2015 ARM Limited. All rights reserved.
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Android K script for LLVM 3.5 and above support
+#
+# This script applies two minor patches and checks-out libcxx and libcxxabi libraries
+# adding C++11 support in Android K required for LLVM3.5 and above.
+
+
+if [ -z "$ANDROID_BUILD_TOP" ]; then
+    echo "[ANDROID-PATCH] This script must be executed in an Android build environment"
+    echo "                ANDROID_BUILD_TOP is undefined"
+    echo "                please do \"source build/envseup.sh; lunch\"."
+    exit 1
+fi
+
+ANDROID_REPOSITORY_URL="https://android.googlesource.com/"
+
+REFERENCE_ARGUMENT=""
+
+# Proccess arguments. Valid arguments are:
+#        -u ARG      Override the Android repository url
+#        -r ARG      Provide a path to a reference git repository
+while getopts "u:r:" opt; do
+  case $opt in
+    u)
+      ANDROID_REPOSITORY_URL=$OPTARG
+      ;;
+
+    r)
+      REFERENCE_ARGUMENT="--reference "$OPTARG
+      ;;
+
+    \?)
+      echo "Invalid option: -$OPTARG" >&2
+      exit 1
+
+      ;;
+    :)
+      echo "Option -$OPTARG requires an argument." >&2
+      exit 1
+      ;;
+
+  esac
+done
+
+MIDGARD_DDK_WD=$(pwd)
+
+cd $ANDROID_BUILD_TOP/external
+
+#Checkout libcxx from Android Master required by LLVM due to c++11
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxx $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxx failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxx
+git checkout 27ae7cb782821a4f2d3813522ee411cd978bcd85 -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxx@27ae7cb782821a4f2d3813522ee411cd978bcd85 revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/libcxx.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching libcxx failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Checkout libcxxabi from Android Master required by LLVM due to c++11
+cd $ANDROID_BUILD_TOP/external
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxxabi $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxxabi failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxxabi
+git checkout 285d67f35f6044cf733091e36248405ca967c62c -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxxabi@285d67f35f6044cf733091e36248405ca967c62c revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+
+#Apply patch to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/bionics.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching bionics failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add locale support to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/locale.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Adding locale support failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add LLVM backend ARM fix to support -O0 compilation
+cd $ANDROID_BUILD_TOP/external/llvm/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/llvm.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Fixing ARM LLVM backend failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+echo "[ANDROID-PATCH] Patching completed successfully"
+cd $MIDGARD_DDK_WD
diff --git a/midgard/r12p0/android/patches/K/bionics.diff b/midgard/r12p0/android/patches/K/bionics.diff
new file mode 100755
index 0000000..b86cedf
--- /dev/null
+++ b/midgard/r12p0/android/patches/K/bionics.diff
@@ -0,0 +1,202 @@
+diff --git a/libc/Android.mk b/libc/Android.mk
+index 9610c14..d3dbe81 100644
+--- a/libc/Android.mk
++++ b/libc/Android.mk
+@@ -214,6 +215,7 @@ libc_bionic_src_:= \
+     bionic/libc_init_common.cpp \
+     bionic/libc_logging.cpp \
+     bionic/libgen.cpp \
++    bionic/locale.cpp \
+     bionic/mmap.cpp \
+     bionic/pthread_attr.cpp \
+     bionic/pthread_detach.cpp \
+@@ -232,7 +232,6 @@ libc_bionic_src_files := \
+     bionic/scandir.cpp \
+     bionic/sched_getaffinity.cpp \
+     bionic/__set_errno.cpp \
+-    bionic/setlocale.cpp \
+     bionic/signalfd.cpp \
+     bionic/sigwait.cpp \
+     bionic/statvfs.cpp \
+diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp
+index 50a3875..cc830b7 100644
+--- a/libc/bionic/wchar.cpp
++++ b/libc/bionic/wchar.cpp
+@@ -96,6 +96,7 @@ int swscanf(const wchar_t* /*s*/, const wchar_t* /*format*/, ... ) {
+
+ int iswalnum(wint_t wc) { return isalnum(wc); }
+ int iswalpha(wint_t wc) { return isalpha(wc); }
++int iswblank(wint_t wc) { return isblank(wc); }
+ int iswcntrl(wint_t wc) { return iscntrl(wc); }
+ int iswdigit(wint_t wc) { return isdigit(wc); }
+ int iswgraph(wint_t wc) { return isgraph(wc); }
+diff --git a/libc/include/assert.h b/libc/include/assert.h
+index 62470f5..361a5ff 100644
+--- a/libc/include/assert.h
++++ b/libc/include/assert.h
+@@ -60,6 +60,6 @@
+ #endif
+
+ __BEGIN_DECLS
+-__dead void __assert(const char *, int, const char *);
+-__dead void __assert2(const char *, int, const char *, const char *);
++__dead void __assert(const char *, int, const char *) __noreturn;
++__dead void __assert2(const char *, int, const char *, const char *) __noreturn;
+ __END_DECLS
+diff --git a/libc/include/ctype.h b/libc/include/ctype.h
+index 5557e31..a66df12 100644
+--- a/libc/include/ctype.h
++++ b/libc/include/ctype.h
+@@ -45,11 +45,14 @@
+ #define	_CTYPE_U	0x01
+ #define	_CTYPE_L	0x02
+ #define	_CTYPE_N	0x04
++#define	_CTYPE_D	0x04
+ #define	_CTYPE_S	0x08
+ #define	_CTYPE_P	0x10
+ #define	_CTYPE_C	0x20
+ #define	_CTYPE_X	0x40
+ #define	_CTYPE_B	0x80
++#define	_CTYPE_R	(_CTYPE_P|_CTYPE_U|_CTYPE_L|_CTYPE_D|_CTYPE_B)
++#define	_CTYPE_A	(_CTYPE_L|_CTYPE_U)
+
+ __BEGIN_DECLS
+
+diff --git a/libc/include/locale.h b/libc/include/locale.h
+index 65b5c7d..6989851 100644
+--- a/libc/include/locale.h
++++ b/libc/include/locale.h
+@@ -29,6 +29,7 @@
+ #define _LOCALE_H_
+
+ #include <sys/cdefs.h>
++#include <xlocale.h>
+
+ __BEGIN_DECLS
+
+@@ -43,17 +44,65 @@ enum {
+     LC_PAPER     = 7,
+     LC_NAME      = 8,
+     LC_ADDRESS   = 9,
+-
+     LC_TELEPHONE      = 10,
+     LC_MEASUREMENT    = 11,
+     LC_IDENTIFICATION = 12
+ };
+
+-extern char *setlocale(int category, const char *locale);
++#define LC_CTYPE_MASK          (1 << LC_CTYPE)
++#define LC_NUMERIC_MASK        (1 << LC_NUMERIC)
++#define LC_TIME_MASK           (1 << LC_TIME)
++#define LC_COLLATE_MASK        (1 << LC_COLLATE)
++#define LC_MONETARY_MASK       (1 << LC_MONETARY)
++#define LC_MESSAGES_MASK       (1 << LC_MESSAGES)
++#define LC_PAPER_MASK          (1 << LC_PAPER)
++#define LC_NAME_MASK           (1 << LC_NAME)
++#define LC_ADDRESS_MASK        (1 << LC_ADDRESS)
++#define LC_TELEPHONE_MASK      (1 << LC_TELEPHONE)
++#define LC_MEASUREMENT_MASK    (1 << LC_MEASUREMENT)
++#define LC_IDENTIFICATION_MASK (1 << LC_IDENTIFICATION)
++
++#define LC_ALL_MASK (LC_CTYPE_MASK | LC_NUMERIC_MASK | LC_TIME_MASK | LC_COLLATE_MASK | \
++                     LC_MONETARY_MASK | LC_MESSAGES_MASK | LC_PAPER_MASK | LC_NAME_MASK | \
++                     LC_ADDRESS_MASK | LC_TELEPHONE_MASK | LC_MEASUREMENT_MASK | \
++                     LC_IDENTIFICATION_MASK)
++
++struct lconv {
++    char* decimal_point;
++    char* thousands_sep;
++    char* grouping;
++    char* int_curr_symbol;
++    char* currency_symbol;
++    char* mon_decimal_point;
++    char* mon_thousands_sep;
++    char* mon_grouping;
++    char* positive_sign;
++    char* negative_sign;
++    char  int_frac_digits;
++    char  frac_digits;
++    char  p_cs_precedes;
++    char  p_sep_by_space;
++    char  n_cs_precedes;
++    char  n_sep_by_space;
++    char  p_sign_posn;
++    char  n_sign_posn;
++    char  int_p_cs_precedes;
++    char  int_p_sep_by_space;
++    char  int_n_cs_precedes;
++    char  int_n_sep_by_space;
++    char  int_p_sign_posn;
++    char  int_n_sign_posn;
++};
++
++struct lconv* localeconv(void);
++
++locale_t duplocale(locale_t);
++void freelocale(locale_t);
++locale_t newlocale(int, const char*, locale_t);
++char* setlocale(int, const char*);
++locale_t uselocale(locale_t);
+
+-/* Make libstdc++-v3 happy.  */
+-struct lconv { };
+-struct lconv *localeconv(void);
++#define LC_GLOBAL_LOCALE ((locale_t) -1L)
+
+ __END_DECLS
+
+diff --git a/libc/include/stdio.h b/libc/include/stdio.h
+index 23fc944..28685c7 100644
+--- a/libc/include/stdio.h
++++ b/libc/include/stdio.h
+@@ -469,7 +469,10 @@ int vsprintf(char *dest, const char *format, __va_list ap)
+ }
+
+ #if defined(__clang__)
+-#define snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(snprintf)
++    #define __wrap_snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++    #define snprintf(...) __wrap_snprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(3, 4)
+@@ -481,7 +484,10 @@ int snprintf(char *dest, size_t size, const char *format, ...)
+ #endif
+
+ #if defined(__clang__)
+-#define sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(sprintf)
++    #define __wrap_sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++    #define sprintf(...) __wrap_sprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(2, 3)
+diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h
+index 9fa84c1..188da69 100644
+--- a/libc/include/stdlib.h
++++ b/libc/include/stdlib.h
+@@ -68,6 +68,10 @@ static __inline__ float strtof(const char *nptr, char **endptr)
+     return (float)strtod(nptr, endptr);
+ }
+
++static __inline__ long double strtold(const char* s, char** end_ptr) {
++  return (long double)strtod(s, end_ptr);
++}
++
+ extern int atoi(const char *) __purefunc;
+ extern long atol(const char *) __purefunc;
+ extern long long atoll(const char *) __purefunc;
+diff --git a/libc/include/wchar.h b/libc/include/wchar.h
+index 76ac02c..c413806 100644
+--- a/libc/include/wchar.h
++++ b/libc/include/wchar.h
+@@ -77,6 +77,7 @@ extern int               fwprintf(FILE *, const wchar_t *, ...);
+ extern int               fwscanf(FILE *, const wchar_t *, ...);
+ extern int               iswalnum(wint_t);
+ extern int               iswalpha(wint_t);
++extern int               iswblank(wint_t);
+ extern int               iswcntrl(wint_t);
+ extern int               iswdigit(wint_t);
+ extern int               iswgraph(wint_t);
diff --git a/midgard/r12p0/android/patches/K/libcxx.diff b/midgard/r12p0/android/patches/K/libcxx.diff
new file mode 100755
index 0000000..a10d75c
--- /dev/null
+++ b/midgard/r12p0/android/patches/K/libcxx.diff
@@ -0,0 +1,13 @@
+diff --git a/include/cstdlib b/include/cstdlib
+index 152b891..e45a8fd 100644
+--- a/include/cstdlib
++++ b/include/cstdlib
+@@ -126,7 +126,7 @@ using ::realloc;
+ using ::abort;
+ using ::atexit;
+ using ::exit;
+-using ::_Exit;
++
+ using ::getenv;
+ using ::system;
+ using ::bsearch;
diff --git a/midgard/r12p0/android/patches/K/license.txt b/midgard/r12p0/android/patches/K/license.txt
new file mode 100755
index 0000000..d645695
--- /dev/null
+++ b/midgard/r12p0/android/patches/K/license.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/midgard/r12p0/android/patches/K/llvm.diff b/midgard/r12p0/android/patches/K/llvm.diff
new file mode 100755
index 0000000..d41d279
--- /dev/null
+++ b/midgard/r12p0/android/patches/K/llvm.diff
@@ -0,0 +1,14 @@
+diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
+index 8cdb853..868dd47 100644
+--- a/lib/Target/ARM/ARMInstrInfo.cpp
++++ b/lib/Target/ARM/ARMInstrInfo.cpp
+@@ -130,6 +130,10 @@ namespace {
+         MIB.addImm(0);
+       AddDefaultPred(MIB);
+
++      // Fix the GOT address by adding pc.
++      BuildMI(FirstMBB, MBBI, DL, TII.get(ARM::tPICADD), GlobalBaseReg)
++          .addReg(GlobalBaseReg).addImm(ARMPCLabelIndex);
++
+       return true;
+     }
diff --git a/midgard/r12p0/android/patches/K/locale.diff b/midgard/r12p0/android/patches/K/locale.diff
new file mode 100755
index 0000000..9c4c140
--- /dev/null
+++ b/midgard/r12p0/android/patches/K/locale.diff
@@ -0,0 +1,204 @@
+diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp
+new file mode 100644
+index 0000000..4fade84
+--- /dev/null
++++ b/libc/bionic/locale.cpp
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (C) 2008 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#include <locale.h>
++#include <pthread.h>
++#include <stdlib.h>
++
++// We currently support a single locale, the "C" locale (also known as "POSIX").
++
++struct __locale_t {
++  // Because we only support one locale, these are just tokens with no data.
++};
++
++static pthread_once_t gLocaleOnce = PTHREAD_ONCE_INIT;
++static lconv gLocale;
++
++static pthread_once_t gUselocaleKeyOnce = PTHREAD_ONCE_INIT;
++static pthread_key_t gUselocaleKey;
++
++static void __locale_init() {
++  gLocale.decimal_point = const_cast<char*>(".");
++
++  char* not_available = const_cast<char*>("");
++  gLocale.thousands_sep = not_available;
++  gLocale.grouping = not_available;
++  gLocale.int_curr_symbol = not_available;
++  gLocale.currency_symbol = not_available;
++  gLocale.mon_decimal_point = not_available;
++  gLocale.mon_thousands_sep = not_available;
++  gLocale.mon_grouping = not_available;
++  gLocale.positive_sign = not_available;
++  gLocale.negative_sign = not_available;
++
++  gLocale.int_frac_digits = CHAR_MAX;
++  gLocale.frac_digits = CHAR_MAX;
++  gLocale.p_cs_precedes = CHAR_MAX;
++  gLocale.p_sep_by_space = CHAR_MAX;
++  gLocale.n_cs_precedes = CHAR_MAX;
++  gLocale.n_sep_by_space = CHAR_MAX;
++  gLocale.p_sign_posn = CHAR_MAX;
++  gLocale.n_sign_posn = CHAR_MAX;
++  gLocale.int_p_cs_precedes = CHAR_MAX;
++  gLocale.int_p_sep_by_space = CHAR_MAX;
++  gLocale.int_n_cs_precedes = CHAR_MAX;
++  gLocale.int_n_sep_by_space = CHAR_MAX;
++  gLocale.int_p_sign_posn = CHAR_MAX;
++  gLocale.int_n_sign_posn = CHAR_MAX;
++}
++
++static void __uselocale_key_init() {
++  pthread_key_create(&gUselocaleKey, NULL);
++}
++
++static bool __is_supported_locale(const char* locale) {
++  return (strcmp(locale, "") == 0 || strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0);
++}
++
++static locale_t __new_locale() {
++  return reinterpret_cast<locale_t>(malloc(sizeof(__locale_t)));
++}
++
++lconv* localeconv() {
++  pthread_once(&gLocaleOnce, __locale_init);
++  return &gLocale;
++}
++
++locale_t duplocale(locale_t l) {
++  locale_t clone = __new_locale();
++  if (clone != NULL && l != LC_GLOBAL_LOCALE) {
++    *clone = *l;
++  }
++  return clone;
++}
++
++void freelocale(locale_t l) {
++  free(l);
++}
++
++locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/) {
++  // Is 'category_mask' valid?
++  if ((category_mask & ~LC_ALL_MASK) != 0) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  if (!__is_supported_locale(locale_name)) {
++    errno = ENOENT;
++    return NULL;
++  }
++
++  return __new_locale();
++}
++
++char* setlocale(int category, char const* locale_name) {
++  // Is 'category' valid?
++  if (category < LC_CTYPE || category > LC_IDENTIFICATION) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  // Caller just wants to query the current locale?
++  if (locale_name == NULL) {
++    return const_cast<char*>("C");
++  }
++
++  // Caller wants one of the mandatory POSIX locales?
++  if (__is_supported_locale(locale_name)) {
++    return const_cast<char*>("C");
++  }
++
++  // We don't support any other locales.
++  errno = ENOENT;
++  return NULL;
++}
++
++locale_t uselocale(locale_t new_locale) {
++  pthread_once(&gUselocaleKeyOnce, __uselocale_key_init);
++
++  locale_t old_locale = static_cast<locale_t>(pthread_getspecific(gUselocaleKey));
++
++  // If this is the first call to uselocale(3) on this thread, we return LC_GLOBAL_LOCALE.
++  if (old_locale == NULL) {
++    old_locale = LC_GLOBAL_LOCALE;
++  }
++
++  if (new_locale != NULL) {
++    pthread_setspecific(gUselocaleKey, new_locale);
++  }
++
++  return old_locale;
++}
+diff --git a/libc/include/xlocale.h b/libc/include/xlocale.h
+new file mode 100644
+index 0000000..f7eb8f4
+--- /dev/null
++++ b/libc/include/xlocale.h
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (C) 2014 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#ifndef _XLOCALE_H_
++#define _XLOCALE_H_
++
++/* If we just use void* here, GCC exposes that in error messages. */
++struct __locale_t;
++typedef struct __locale_t* locale_t;
++
++#endif /* _XLOCALE_H_ */
diff --git a/midgard/r12p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/midgard/r12p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100755
index 0000000..46b704b
--- /dev/null
+++ b/midgard/r12p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,92 @@
+#
+# (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- snoop_enable_smc : SMC function ID to enable CCI snooping on the GPU port(s).
+- snoop_disable_smc : SMC function ID to disable CCI snooping on the GPU port(s).
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets power model parameters. Note that this model was designed for the Juno
+	        platform, and may not be suitable for other platforms. A structure containing :
+	- compatible: Should be arm,mali-simple-power-model
+	- voltage: Voltage at reference point. Specified in mV.
+	- frequency: Frequency at reference point. Specified in MHz.
+	- dynamic-power: Dynamic power at reference frequency and voltage. Specified in mW.
+	- static-power: Static power at reference frequency. Specified in mW.
+	- ts: An array containing coefficients for the temperature scaling factor.
+	  Used as : tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0], where T = temperature
+	- thermal-zone: A string identifying the thermal zone used for the GPU
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+
+Example for a Mali-T602:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points = <
+		/* KHz   uV */
+		533000 1250000,
+		450000 1150000,
+		400000 1125000,
+		350000 1075000,
+		266000 1025000,
+		160000  925000,
+		100000  912500,
+	>;
+	power_model {
+		compatible = "arm,mali-simple-power-model";
+		voltage = <800>;
+		frequency = <500>;
+		static-power = <500>;
+		dynamic-power = <1500>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+};
diff --git a/midgard/r12p0/kernel/Documentation/dma-buf-test-exporter.txt b/midgard/r12p0/kernel/Documentation/dma-buf-test-exporter.txt
new file mode 100755
index 0000000..4208010
--- /dev/null
+++ b/midgard/r12p0/kernel/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/midgard/r12p0/kernel/Documentation/kds.txt b/midgard/r12p0/kernel/Documentation/kds.txt
new file mode 100755
index 0000000..639288c
--- /dev/null
+++ b/midgard/r12p0/kernel/Documentation/kds.txt
@@ -0,0 +1,268 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+==============================
+kds - Kernel Dependency System
+==============================
+
+Introduction
+------------
+kds provides a mechanism for clients to atomically lock down multiple abstract resources.
+This can be done either synchronously or asynchronously.
+Abstract resources is used to allow a set of clients to use kds to control access to any
+resource, an example is structured memory buffers.
+
+kds supports that buffer is locked for exclusive access and sharing of buffers.
+
+kds can be built as either a integrated feature of the kernel or as a module.
+It supports being compiled as a module both in-tree and out-of-tree.
+
+
+Concepts
+--------
+A core concept in kds is abstract resources.
+A kds resource is just an abstraction for some client object, kds doesn't care what it is.
+Typically EGL will consider UMP buffers as being a resource, thus each UMP buffer has
+a kds resource for synchronization to the buffer.
+
+kds allows a client to create and destroy the abstract resource objects.
+A new resource object is made available asap (it is just a simple malloc with some initializations),
+while destroy it requires some external synchronization.
+
+The other core concept in kds is consumer of resources.
+kds is requested to allow a client to consume a set of resources and the client will be notified when it can consume the resources.
+
+Exclusive access allows only one client to consume a resource.
+Shared access permits multiple consumers to acceess a resource concurrently.
+
+
+APIs
+----
+kds provides simple resource allocate and destroy functions.
+Clients use this to instantiate and control the lifetime of the resources kds manages.
+
+kds provides two ways to wait for resources:
+- Asynchronous wait: the client specifies a function pointer to be called when wait is over
+- Synchronous wait: Function blocks until access is gained.
+
+The synchronous API has a timeout for the wait.
+The call can early out if a signal is delivered.
+
+After a client is done consuming the resource kds must be notified to release the resources and let some other client take ownership.
+This is done via resource set release call.
+
+A Windows comparison:
+kds implements WaitForMultipleObjectsEx(..., bWaitAll = TRUE, ...) but also has an asynchronous version in addition.
+kds resources can be seen as being the same as NT object manager resources.
+
+Internals
+---------
+kds guarantees atomicity when a set of resources is operated on.
+This is implemented via a global resource lock which is taken by kds when it updates resource objects.
+
+Internally a resource in kds is a linked list head with some flags.
+
+When a consumer requests access to a set of resources it is queued on each of the resources.
+The link from the consumer to the resources can be triggered. Once all links are triggered
+the registered callback is called or the blocking function returns.
+A link is considered triggered if it is the first on the list of consumers of a resource,
+or if all the links ahead of it is marked as shared and itself is of the type shared.
+
+When the client is done consuming the consumer object is removed from the linked lists of
+the resources and a potential new consumer becomes the head of the resources.
+As we add and remove consumers atomically across all resources we can guarantee that
+we never introduces a A->B + B->A type of loops/deadlocks.
+
+
+kbase/base implementation
+-------------------------
+A HW job needs access to a set of shared resources.
+EGL tracks this and encodes the set along with the atom in the ringbuffer.
+EGL allocates a (k)base dep object to represent the dependency to the set of resources and encodes that along with the list of resources.
+This dep object is use to create a dependency from a job chain(atom) to the resources it needs to run.
+When kbase decodes the atom in the ringbuffer it finds the set of resources and calls kds to request all the needed resources.
+As EGL needs to know when the kds request is delivered a new base event object is needed: atom enqueued. This event is only delivered for atoms which uses kds.
+The callback kbase registers trigger the dependency object described which would trigger the existing JD system to release the job chain.
+When the atom is done kds resource set release is call to release the resources.
+
+EGL will typically use exclusive access to the render target, while all buffers used as input can be marked as shared.
+
+
+Buffer publish/vsync
+--------------------
+EGL will use a separate ioctl or DRM flip to request the flip.
+If the LCD driver is integrated with kds EGL can do these operations early.
+The LCD driver must then implement the ioctl or DRM flip to be asynchronous with kds async call.
+The LCD driver binds a kds resource to each virtual buffer (2 buffers in case of double-buffering).
+EGL will make a dependency to the target kds resource in the kbase atom.
+After EGL receives a atom enqueued event it can ask the LCD driver to pan to the target kds resource.
+When the atom is completed it'll release the resource and the LCD driver will get its callback.
+In the callback it'll load the target buffer into the DMA unit of the LCD hardware.
+The LCD driver will be the consumer of both buffers for a short period.
+The LCD driver will call kds resource set release on the previous on-screen buffer when the next vsync/dma read end is handled.
+
+===============================================
+Kernel driver kds client design considerations
+=============================================== 
+
+Number of resources
+--------------------
+
+The kds api allows a client to wait for ownership of a number of resources, where by the client does not take on ownership of any of the resources in the resource set 
+until all of the resources in the set are released.  Consideration must be made with respect to performance, as waiting on large number of resources will incur
+a greater overhead and may increase system latency.  It may be worth considering how independent each of the resources are, for example if the same set of resources 
+are waited upon by each of the clients, then it may be possible to aggregate these into one resource that each client waits upon.
+
+Clients with shared access
+---------------------------
+
+The kds api allows a number of clients to gain shared access to a resource simultaneously, consideration must be made with respect to performance, large numbers of clients
+wanting shared access can incur a performance penalty and may increase system latency, specifically when the clients are granted access. Having an excessively high 
+number of clients with shared access should be avoided, consideration should be made to the call back configuration being used. See Callbacks and Scenario 1 below.
+
+Callbacks
+----------
+
+Careful consideration must be made as to which callback type is most appropriate for kds clients, direct callbacks are called immediately from the context in which the
+ownership of the resource is passed to the next waiter in the list.  Where as when using deferred callbacks the callback is deferred and called from outside the context
+that is relinquishing ownership, while this reduces the latency in the releasing clients context it does incur a cost as there is more latency between a resource 
+becoming free and the new client owning the resource callback being executed.  
+
+Obviously direct callbacks have a performance advantage, as the call back is immediate and does not have to wait for the kernel to context switch to schedule in the 
+execution of the callback.  
+
+However as the callback is immediate and within the context that is granting ownership it is important that the callback perform the MINIMUM amount of work necessary, 
+long call backs could cause poor system latency.  Special care and attention must be taken if the direct callbacks can be called from IRQ handlers, such as when 
+kds_resource_set_release is called from an IRQ handler, in this case you have to avoid any calls that may sleep.  
+
+Deferred contexts have the advantage that the call backs are deferred until they are scheduled by the kernel, therefore they are allowed to call functions that may sleep
+and if scheduled from IRQ context not incur as much system latency as would be seen with direct callbacks from within the IRQ.
+
+Once the clients callback has been called, the client is considered to be owning the resource.  Within the callback the client may only need to perform a small amount of work
+before the client need to give up owner ship.  The kds_resource_release function may be called from with in the call back, but consideration must be made when using direct 
+callbacks, with both respect to execution time and stack usage. Consider the example in Scenario 2 with direct callbacks:
+
+Scenario 1 - Shared client access - direct callbacks:
+
+Resources: X
+Clients: A(S), B(S), C(S), D(S), E(E)
+where: (S) = shared user, (E) = exclusive
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+}
+
+Where <client> is either A,B,C,D
+                                        Queue   |Owner 
+1.  E Requests X exclusive                      |
+2.  E Owns     X exclusive                      |E
+3.  A Requests X shared                        A|E
+4.  B Requests X shared                       BA|E
+5.  C Requests X shared                      CBA|E
+6.  D Requests X shared                     DCBA|E
+7.  E Releases X                                |DCBA
+8.  A Owns X shared                             |DCBA
+9.  B Owns X shared                             |DCBA
+10. C Owns X shared                             |DCBA
+11. D Owns X shared                             |DCBA
+
+At point 7 it is important to note that when E releases X; A,B,C and D become the new shared owners of X and the call back for each of the client(A,B,C,D) triggered, so consideration
+must be made as to whether a direct or deferred callback is suitable, using direct callbacks would result in the call graph.
+
+Call graph when E releases X:
+    kds_resource_set_release( .. )
+        +->client_A_cb( .. )
+        +->client_B_cb( .. )
+        +->client_C_cb( .. )
+        +->client_D_cb( .. )
+
+
+Scenario 2 - Immediate resource release - direct callbacks:
+
+Resource: X
+Clients: A, B, C, D
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+    kds_resource_set_release( .. );
+}
+
+Where <client> is either A,B,C,D
+
+1. A Owns     X exclusive
+2. B Requests X exclusive	(direct callback)
+3. C Requests X exclusive	(direct callback)
+4. D Requests X exclusive	(direct callback)
+5. A Releases X
+
+Call graph when A releases X:
+    kds_resource_set_release( .. )
+        +->client_B_cb( .. )
+            +->kds_resource_set_release( .. )
+                +->client_C_cb( .. )
+                    +->kds_resource_set_release( .. )
+                        +->client_D_cb( .. )
+
+As can be seen when a client releases the resource, with direct call backs it is possible to create nested calls
+
+IRQ Considerations
+-------------------
+
+Usage of kds_resource_release in IRQ handlers should be carefully considered.
+
+Things to keep in mind:
+
+1.) Are you using direct or deferred callbacks?
+2.) How many resources are you releasing?
+3.) How many shared waiters are pending on the resource?
+
+Releasing ownership and wait cancellation
+------------------------------------------
+
+Client Wait Cancellation
+-------------------------
+
+It may be necessary in certain circumstances for the client to cancel the wait for kds resources for error handling, process termination etc.  Cancellation is 
+performed using kds_resource_set_release or kds_resource_set_release_sync using the rset that was received from kds_async_waitall, kds_resource_set_release_sync 
+being used for waits which are using deferred callbacks.
+
+It is possible that while the request to cancel the wait is being issued by the client, the client is granted access to the resources.  Normally after the client
+has taken ownership and finishes with that resource, it will release ownership to signal other waiters which are pending, this causes a race with the cancellation.
+To prevent KDS trying to remove a wait twice from the internal list and accessing memory that is potentially freed, it is very important that all releasers use the
+same rset pointer.  Here is a simplified example of bad usage that must be avoided in any client implementation:
+
+Senario 3 - Bad release from multiple contexts:
+
+	This scenaro is highlighting bad usage of the kds API
+
+	kds_resource_set * rset;
+	kds_resource_set * rset_copy;
+
+	kds_async_waitall( &rset, ... ... ... );
+
+	/* Don't do this */
+	rset_copy = rset;
+
+Context A:
+	kds_resource_set_release( &rset )
+
+Context B:
+	kds_resource_set_release( &rset_copy )
+
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_lock/sconscript b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/sconscript
new file mode 100755
index 0000000..8212693
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/sconscript
@@ -0,0 +1,27 @@
+#
+# (C) COPYRIGHT 2012, 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+search_term = '^[\ ]*CONFIG_DMA_SHARED_BUFFER_USES_KDS[\ ]*=[\ ]*y'
+for line in open(linux_config_file, 'r'):
+	if re.search(search_term, line):
+		SConscript( 'src/sconscript' )
+		if env['tests'] and Glob('tests/sconscript'):
+			SConscript( 'tests/sconscript' )
+		break
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/Kbuild b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100755
index 0000000..68dad07
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/Makefile b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/Makefile
new file mode 100755
index 0000000..cf76132
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100755
index 0000000..9613ffc
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/dma-buf.h>
+#include <linux/kds.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	struct kds_resource **kds_resources;    /* List of KDS resources associated with buffers */
+	struct kds_resource_set *resource_set;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	wait_queue_head_t wait;
+	struct kds_callback cb;
+	struct kref refcount;
+	struct list_head link;
+	int count;
+} dma_buf_lock_resource;
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static void dma_buf_lock_kds_callback(void *param1, void *param2)
+{
+	dma_buf_lock_resource *resource = param1;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_kds_callback\n");
+#endif
+	atomic_set(&resource->locked, 1);
+
+	wake_up(&resource->wait);
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related KDS resources, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+	/* Allocate space to store kds_resources associated with dma_buf_fds */
+	size = sizeof(struct kds_resource *) * request->count;
+	resource->kds_resources = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->kds_resources)
+	{
+		kfree(resource->dma_bufs);
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource->kds_resources);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Get kds_resource associated with dma_buf */
+		resource->kds_resources[i] = get_dma_buf_kds_resource(resource->dma_bufs[i]);
+
+		if (NULL == resource->kds_resources[i])
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk("dma_buf_lock_dolock : dma_buf_fd %i dma_buf %X kds_resource %X\n", resource->list_of_dma_buf_fds[i],
+		       (unsigned int)resource->dma_bufs[i], (unsigned int)resource->kds_resources[i]);
+#endif	
+	}
+
+	kds_callback_init(&resource->cb, 1, dma_buf_lock_kds_callback);
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops, 
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = kds_async_waitall(&resource->resource_set,
+	                        &resource->cb, resource, NULL,
+	                        request->count,  &resource->exclusive,
+	                        resource->kds_resources);
+
+	if (IS_ERR_VALUE(ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	kds_callback_term(&resource->cb);
+
+	kds_resource_set_release(&resource->resource_set);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->kds_resources);
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	kfree(resource);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100755
index 0000000..e1b9348
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/sconscript b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/sconscript
new file mode 100755
index 0000000..b8724f1
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_lock/src/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')]
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] dma_buf_lock'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100755
index 0000000..56b9f86
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100755
index 0000000..974f0c2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/Makefile b/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100755
index 0000000..06e3d5c
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100755
index 0000000..0188481
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,670 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+	dmabuf = vma->vm_private_data;
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return NULL;
+
+	return kmap(alloc->pages[page_num]);
+}
+static void dma_buf_te_kunmap(struct dma_buf *buf,
+		unsigned long page_num, void *addr)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return;
+
+	kunmap(alloc->pages[page_num]);
+	return;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+	.kmap = dma_buf_te_kmap,
+	.kunmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (alloc_req.size > SG_MAX_SINGLE_ALLOC) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %lu pages",
+				__func__, alloc_req.size, SG_MAX_SINGLE_ALLOC);
+		goto invalid_size;
+	}
+#endif
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+	{
+		struct dma_buf_export_info export_info = {
+			.exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+			.owner = THIS_MODULE,
+#endif
+			.ops = &dma_buf_te_ops,
+			.size = alloc->nr_pages << PAGE_SHIFT,
+			.flags = O_CLOEXEC | O_RDWR,
+			.priv = alloc,
+		};
+
+		dma_buf = dma_buf_export(&export_info);
+	}
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		ret = dma_buf_begin_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+		if (ret)
+			goto no_cpu_access;
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				dma_buf_end_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+				ret = -EPERM;
+				goto no_cpu_access;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		dma_buf_end_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+		offset += sg_dma_len(sg);
+	}
+
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/sconscript b/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
new file mode 100755
index 0000000..09fe7f3
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_test_exporter/*.c'), Glob('#kernel/include/linux/*.h'), Glob('#kernel/drivers/base/dma_buf_test_exporter/K*')]
+
+env.Append( CPPPATH = '#kernel/include' )
+
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[CLEAN] dma-buf-test-exporter'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', src, [])
+	env.KernelObjTarget('dma-buf-test-exporter', cmd)
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make && ( ( [ -f dma-buf-test-exporter.ko ] && cp dma-buf-test-exporter.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/dma-buf-test-exporter.ko)", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', src, [makeAction])
+	env.KernelObjTarget('dma-buf-test-exporter', cmd)
+
+	
diff --git a/midgard/r12p0/kernel/drivers/base/kds/Kbuild b/midgard/r12p0/kernel/drivers/base/kds/Kbuild
new file mode 100755
index 0000000..a88acd8
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/kds/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_KDS) += kds.o
+obj-$(CONFIG_KDS_TEST) += kds_test.o
diff --git a/midgard/r12p0/kernel/drivers/base/kds/Kconfig b/midgard/r12p0/kernel/drivers/base/kds/Kconfig
new file mode 100755
index 0000000..5f96165
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/kds/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config KDS
+	tristate "Kernel dependency system"
+	help
+	  This option enables the generic kernel dependency system
diff --git a/midgard/r12p0/kernel/drivers/base/kds/Makefile b/midgard/r12p0/kernel/drivers/base/kds/Makefile
new file mode 100755
index 0000000..364d151
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/kds/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+CONFIG_KDS_TEST ?= n
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: kds
+
+kds:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS=m
+
+kds_test:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS_TEST=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r12p0/kernel/drivers/base/kds/kds.c b/midgard/r12p0/kernel/drivers/base/kds/kds.c
new file mode 100755
index 0000000..62612d4
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/kds/kds.c
@@ -0,0 +1,559 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/kds.h>
+#include <linux/kref.h>
+
+#include <asm/atomic.h>
+
+#define KDS_LINK_TRIGGERED (1u << 0)
+#define KDS_LINK_EXCLUSIVE (1u << 1)
+
+#define KDS_INVALID (void *)-2
+#define KDS_RESOURCE (void *)-1
+
+struct kds_resource_set
+{
+	unsigned long         num_resources;
+	unsigned long         pending;
+	struct kds_callback  *cb;
+	void                 *callback_parameter;
+	void                 *callback_extra_parameter;
+	struct list_head      callback_link;
+	struct work_struct    callback_work;
+	atomic_t              cb_queued;
+	/* This resource set will be freed when there are no pending
+	 * callbacks */
+	struct kref           refcount;
+
+	/* This is only initted when kds_waitall() is called. */
+	wait_queue_head_t     wake;
+
+	struct kds_link       resources[0];
+
+};
+
+static DEFINE_SPINLOCK(kds_lock);
+
+static void __resource_set_release(struct kref *ref)
+{
+	struct kds_resource_set *rset = container_of(ref,
+			struct kds_resource_set, refcount);
+
+	kfree(rset);
+}
+
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb)
+{
+	int ret = 0;
+
+	cb->direct = direct;
+	cb->user_cb = user_cb;
+
+	if (!direct)
+	{
+		cb->wq = alloc_workqueue("kds", WQ_UNBOUND | WQ_HIGHPRI, WQ_UNBOUND_MAX_ACTIVE);
+		if (!cb->wq)
+			ret = -ENOMEM;
+	}
+	else
+	{
+		cb->wq = NULL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kds_callback_init);
+
+void kds_callback_term(struct kds_callback *cb)
+{
+	if (!cb->direct)
+	{
+		BUG_ON(!cb->wq);
+		destroy_workqueue(cb->wq);
+	}
+	else
+	{
+		BUG_ON(cb->wq);
+	}
+}
+
+EXPORT_SYMBOL(kds_callback_term);
+
+static void kds_do_user_callback(struct kds_resource_set *rset)
+{
+	rset->cb->user_cb(rset->callback_parameter, rset->callback_extra_parameter);
+}
+
+static void kds_queued_callback(struct work_struct *work)
+{
+	struct kds_resource_set *rset;
+	rset = container_of(work, struct kds_resource_set, callback_work);
+
+	atomic_dec(&rset->cb_queued);
+
+	kds_do_user_callback(rset);
+}
+
+static void kds_callback_perform(struct kds_resource_set *rset)
+{
+	if (rset->cb->direct)
+		kds_do_user_callback(rset);
+	else
+	{
+		int result;
+
+		atomic_inc(&rset->cb_queued);
+
+		result = queue_work(rset->cb->wq, &rset->callback_work);
+		/* if we got a 0 return it means we've triggered the same rset twice! */
+		WARN_ON(!result);
+	}
+}
+
+void kds_resource_init(struct kds_resource * const res)
+{
+	BUG_ON(!res);
+	INIT_LIST_HEAD(&res->waiters.link);
+	res->waiters.parent = KDS_RESOURCE;
+}
+EXPORT_SYMBOL(kds_resource_init);
+
+int kds_resource_term(struct kds_resource *res)
+{
+	unsigned long lflags;
+	BUG_ON(!res);
+	spin_lock_irqsave(&kds_lock, lflags);
+	if (!list_empty(&res->waiters.link))
+	{
+		spin_unlock_irqrestore(&kds_lock, lflags);
+		printk(KERN_ERR "ERROR: KDS resource is still in use\n");
+		return -EBUSY;
+	}
+	res->waiters.parent = KDS_INVALID;
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	return 0;
+}
+EXPORT_SYMBOL(kds_resource_term);
+
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list)
+{
+	struct kds_resource_set *rset = NULL;
+	unsigned long lflags;
+	int i;
+	int triggered;
+	int err = -EFAULT;
+
+	BUG_ON(!pprset);
+	BUG_ON(!resource_list);
+	BUG_ON(!cb);
+
+	WARN_ONCE(number_resources > 10, "Waiting on a high numbers of resources may increase latency, see documentation.");
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+	{
+		return -ENOMEM;
+	}
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	rset->cb = cb;
+	rset->callback_parameter = callback_parameter;
+	rset->callback_extra_parameter = callback_extra_parameter;
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+	}
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		/* no-one else waiting? */
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	triggered = (rset->pending == 0);
+
+	/* set the pointer before the callback is called so it sees it */
+	*pprset = rset;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (triggered)
+	{
+		/* all resources obtained, trigger callback */
+		kds_callback_perform(rset);
+	}
+
+	return 0;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+	err = -EINVAL;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return err;
+}
+EXPORT_SYMBOL(kds_async_waitall);
+
+static void wake_up_sync_call(void *callback_parameter, void *callback_extra_parameter)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *)callback_parameter;
+	wake_up(wait);
+}
+
+static struct kds_callback sync_cb =
+{
+	wake_up_sync_call,
+	1,
+	NULL,
+};
+
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jiffies_timeout)
+{
+	struct kds_resource_set *rset;
+	unsigned long lflags;
+	int i;
+	int triggered = 0;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+		return rset;
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	init_waitqueue_head(&rset->wake);
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	if (rset->pending == 0)
+		triggered = 1;
+	else
+	{
+		rset->cb = &sync_cb;
+		rset->callback_parameter = &rset->wake;
+		rset->callback_extra_parameter = NULL;
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (!triggered)
+	{
+		long wait_res = 0;
+		long timeout = (jiffies_timeout == KDS_WAIT_BLOCKING) ?
+				MAX_SCHEDULE_TIMEOUT : jiffies_timeout;
+
+		if (timeout)
+		{
+			wait_res = wait_event_interruptible_timeout(rset->wake,
+					rset->pending == 0, timeout);
+		}
+
+		if ((wait_res == -ERESTARTSYS) || (wait_res == 0))
+		{
+			/* use \a kds_resource_set_release to roll back */
+			kds_resource_set_release(&rset);
+			return ERR_PTR(wait_res);
+		}
+	}
+	return rset;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL(kds_waitall);
+
+static void trigger_new_rset_owner(struct kds_resource_set *rset,
+		struct list_head *triggered)
+{
+	if (0 == --rset->pending) {
+		/* new owner now triggered, track for callback later */
+		kref_get(&rset->refcount);
+		list_add(&rset->callback_link, triggered);
+	}
+}
+
+static void __kds_resource_set_release_common(struct kds_resource_set *rset)
+{
+	struct list_head triggered = LIST_HEAD_INIT(triggered);
+	struct kds_resource_set *it;
+	unsigned long lflags;
+	int i;
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < rset->num_resources; i++)
+	{
+		struct kds_resource *resource;
+		struct kds_link *it = NULL;
+
+		/* fetch the previous entry on the linked list */
+		it = list_entry(rset->resources[i].link.prev, struct kds_link, link);
+		/* unlink ourself */
+		list_del(&rset->resources[i].link);
+
+		/* any waiters? */
+		if (list_empty(&it->link))
+			continue;
+
+		/* were we the head of the list? (head if prev is a resource) */
+		if (it->parent != KDS_RESOURCE)
+		{
+			if ((it->state & KDS_LINK_TRIGGERED) && !(it->state & KDS_LINK_EXCLUSIVE))
+			{
+				/*
+				 * previous was triggered and not exclusive, so we
+				 * trigger non-exclusive until end-of-list or first
+				 * exclusive
+				 */
+
+				struct kds_link *it_waiting = it;
+
+				list_for_each_entry(it, &it_waiting->link, link)
+				{
+					/* exclusive found, stop triggering */
+					if (it->state & KDS_LINK_EXCLUSIVE)
+						break;
+
+					it->state |= KDS_LINK_TRIGGERED;
+					/* a parent to update? */
+					if (it->parent != KDS_RESOURCE)
+						trigger_new_rset_owner(
+								it->parent,
+								&triggered);
+				}
+			}
+			continue;
+		}
+
+		/* we were the head, find the kds_resource */
+		resource = container_of(it, struct kds_resource, waiters);
+
+		/* we know there is someone waiting from the any-waiters test above */
+
+		/* find the head of the waiting list */
+		it = list_first_entry(&resource->waiters.link, struct kds_link, link);
+
+		/* new exclusive owner? */
+		if (it->state & KDS_LINK_EXCLUSIVE)
+		{
+			/* link now triggered */
+			it->state |= KDS_LINK_TRIGGERED;
+			/* a parent to update? */
+			trigger_new_rset_owner(it->parent, &triggered);
+		}
+		/* exclusive releasing ? */
+		else if (rset->resources[i].state & KDS_LINK_EXCLUSIVE)
+		{
+			/* trigger non-exclusive until end-of-list or first exclusive */
+			list_for_each_entry(it, &resource->waiters.link, link)
+			{
+				/* exclusive found, stop triggering */
+				if (it->state & KDS_LINK_EXCLUSIVE)
+					break;
+
+				it->state |= KDS_LINK_TRIGGERED;
+				/* a parent to update? */
+				trigger_new_rset_owner(it->parent, &triggered);
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	while (!list_empty(&triggered))
+	{
+		it = list_first_entry(&triggered, struct kds_resource_set, callback_link);
+		list_del(&it->callback_link);
+		kds_callback_perform(it);
+
+		/* Free the resource set if no callbacks pending */
+		kref_put(&it->refcount, &__resource_set_release);
+	}
+}
+
+void kds_resource_set_release(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+	int queued;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * Caller is responsible for guaranteeing that callback work is not
+	 * pending (i.e. its running or completed) prior to calling release.
+	 */
+	queued = atomic_read(&rset->cb_queued);
+	BUG_ON(queued);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release);
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * In the case of a kds async wait cancellation ensure the deferred
+	 * call back does not get scheduled if a trigger fired at the same time
+	 * to release the wait.
+	 */
+	cancel_work_sync(&rset->callback_work);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release_sync);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r12p0/kernel/drivers/base/kds/sconscript b/midgard/r12p0/kernel/drivers/base/kds/sconscript
new file mode 100755
index 0000000..91a79fd
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/kds/sconscript
@@ -0,0 +1,63 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+Import('env')
+
+#Android uses sync_pt to accomplish KDS functionality.
+#Midgard KDS is not used by Android
+if env['os'] == 'android':
+	Return()
+
+# If KDS is built into the kernel already we skip building the module ourselves
+linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y'
+build_kds = 1
+for line in open(linux_config_file, 'r'):
+	if re.search(search_term, line):
+		# KDS in kernel. Do not build KDS kernel module but
+		# still allow for building kds_test module.
+		build_kds = 0
+
+src = [Glob('#kernel/drivers/base/kds/*.c'), Glob('#kernel/include/linux/*.h'), Glob('#kernel/drivers/base/kds/K*')]
+
+env.Append( CPPPATH = '#kernel/include' )
+
+if Glob('tests/sconscript'):
+	SConscript( 'tests/sconscript' )
+
+if env.GetOption('clean') :
+	# Clean KDS module
+	if build_kds or (int(env['unit']) == 1):
+		env.Execute(Action("make clean", '[CLEAN] kds'))
+		cmd = env.Command('$STATIC_LIB_PATH/kds.ko', src, [])
+		env.KernelObjTarget('kds', cmd)
+else:
+	# Build KDS module
+	if build_kds:
+		makeAction=Action("cd ${SOURCE.dir} && make kds && cp kds.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+		cmd = env.Command('$STATIC_LIB_PATH/kds.ko', src, [makeAction])
+		env.KernelObjTarget('kds', cmd)
+
+	# Build KDS test module
+	if int(env['unit']) == 1:
+		makeActionTest=Action("cd ${SOURCE.dir} && make kds_test && cp kds_test.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+		cmdTest = env.Command('$STATIC_LIB_PATH/kds_test.ko', src, [makeActionTest])
+		env.KernelObjTarget('kds', cmdTest)
+		if build_kds:
+			env.Depends(cmdTest, cmd)
+
diff --git a/midgard/r12p0/kernel/drivers/base/sconscript b/midgard/r12p0/kernel/drivers/base/sconscript
new file mode 100755
index 0000000..b57cc7f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import( 'env' )
+
+if Glob('bus_logger/sconscript'):
+	if env['buslog'] == '1':
+		SConscript('bus_logger/sconscript')
+
+if Glob('dma_buf_lock/sconscript'):
+	SConscript('dma_buf_lock/sconscript')
+
+if Glob('kds/sconscript'):
+	SConscript('kds/sconscript')
+
+if Glob('ump/sconscript'):
+	SConscript('ump/sconscript')
+
+if Glob('dma_buf_test_exporter/sconscript'):
+	SConscript('dma_buf_test_exporter/sconscript')
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/Kbuild b/midgard/r12p0/kernel/drivers/base/ump/Kbuild
new file mode 100755
index 0000000..2bbdba2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += src/
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/Kconfig b/midgard/r12p0/kernel/drivers/base/ump/Kconfig
new file mode 100755
index 0000000..f7451e6
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config UMP
+	tristate "Enable Unified Memory Provider (UMP) support"
+	default n
+    help
+	  Enable this option to build support for the ARM UMP module.
+	  UMP can be used by the Mali T6xx module to improve performance
+	  by reducing the copying of data by sharing memory.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate one module, called ump.
diff --git a/midgard/r12p0/kernel/drivers/base/ump/docs/Doxyfile b/midgard/r12p0/kernel/drivers/base/ump/docs/Doxyfile
new file mode 100755
index 0000000..fbec8eb
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/docs/Doxyfile
@@ -0,0 +1,125 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/include/linux/ump-common.h ../../kernel/include/linux/ump.h
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           += ../../kernel/drivers/base/ump
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           +=
diff --git a/midgard/r12p0/kernel/drivers/base/ump/docs/sconscript b/midgard/r12p0/kernel/drivers/base/ump/docs/sconscript
new file mode 100755
index 0000000..521278d
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/docs/sconscript
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+doxygen_sources = [
+	'Doxyfile',
+	Glob('*.png'),
+	Glob('../*.h'),
+	Glob('../src/*.h') ]
+
+if env['doc'] == '1':
+        doxygen_target = env.Command('doxygen/html/index.html', doxygen_sources,
+                                     ['cd ${SOURCE.dir} && doxygen'])
+        env.Clean(doxygen_target, './doxygen')
+
+        Alias('doxygen', doxygen_target)
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/example_kernel_api.c b/midgard/r12p0/kernel/drivers/base/ump/example_kernel_api.c
new file mode 100755
index 0000000..858dd8b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/example_kernel_api.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Example routine to display information about an UMP allocation
+ * The routine takes an secure_id which can come from a different kernel module
+ * or from a client application (i.e. an ioctl).
+ * It creates a ump handle from the secure id (which validates the secure id)
+ * and if successful dumps the physical memory information.
+ * It follows the API and pins the memory while "using" the physical memory.
+ * Finally it calls the release function to indicate it's finished with the handle.
+ *
+ * If the function can't look up the handle it fails with return value -1.
+ * If the testy succeeds then it return 0.
+ * */
+
+static int display_ump_memory_information(ump_secure_id secure_id)
+{
+	const ump_dd_physical_block_64 * ump_blocks = NULL;
+	ump_dd_handle ump_mem;
+	uint64_t nr_blocks;
+	int i;
+	ump_alloc_flags flags;
+
+	/* get a handle from the secure id */
+	ump_mem = ump_dd_from_secure_id(secure_id);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE == ump_mem)
+	{
+		/* invalid ID received */
+		return -1;
+	}
+
+	/* at this point we know we've added a reference to the ump allocation, so we must release it with ump_dd_release */
+
+	ump_dd_phys_blocks_get_64(ump_mem, &nr_blocks, &ump_blocks);
+	flags = ump_dd_allocation_flags_get(ump_mem);
+
+	printf("UMP allocation with secure ID %u consists of %zd physical block(s):\n", secure_id, nr_blocks);
+
+	for(i=0; i<nr_blocks; ++i)
+	{
+		printf("\tBlock %d: 0x%08zX size 0x%08zX\n", i, ump_blocks[i].addr, ump_blocks[i].size);
+	}
+
+	printf("and was allocated using the following bitflag combo:  0x%lX\n", flags);
+
+	ump_dd_release(ump_mem);
+
+	return 0;
+}
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/example_user_api.c b/midgard/r12p0/kernel/drivers/base/ump/example_user_api.c
new file mode 100755
index 0000000..d143a64
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/example_user_api.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <ump/ump.h>
+#include <memory.h>
+#include <stdio.h>
+
+/*
+ * Example routine to exercise the user space UMP api.
+ * This routine initializes the UMP api and allocates some CPU+device X memory.
+ * No usage hints are given, so the driver will use the default cacheability policy.
+ * With the allocation it creates a duplicate handle and plays with the reference count.
+ * Then it simulates interacting with a device and contains pseudo code for the device.
+ *
+ * If any error is detected correct cleanup will be performed and -1 will be returned.
+ * If successful then 0 will be returned.
+ */
+
+static int test_ump_user_api(void)
+{
+	/* This is the size we try to allocate*/
+	const size_t alloc_size = 4096;
+
+	ump_handle h = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_copy = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+	void * mapping = NULL;
+
+	ump_result ump_api_res;
+	int result = -1;
+
+	ump_secure_id id;
+
+	size_t size_returned;
+
+	ump_api_res = ump_open();
+	if (UMP_OK != ump_api_res)
+	{
+		/* failed to open an ump session */
+		/* early out */
+		return -1;
+	}
+
+	h = ump_allocate_64(alloc_size, UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | UMP_PROT_X_RD | UMP_PROT_X_WR);
+	/* the refcount is now 1 */
+	if (UMP_INVALID_MEMORY_HANDLE == h)
+	{
+		/* allocation failure */
+		goto cleanup;
+	}
+
+	/* this is how we could share this allocation with another process */
+
+	/* in process A: */
+	id = ump_secure_id_get(h);
+	/* still ref count 1 */
+	/* send the id to process B */
+
+	/* in process B: */
+	/* receive the id from A */
+	h_clone = ump_from_secure_id(id);
+	/* the ref count of the allocation is now 2 (one from each handle to it) */
+	/* do something ... */
+	/* release our clone */
+	ump_release(h_clone); /* safe to call even if ump_from_secure_id failed */
+	h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+
+	/* a simple save-for-future-use logic inside the driver would just copy the handle (but add a ref manually too!) */
+	/*
+	 * void assign_memory_to_job(h)
+	 * {
+	  */
+	h_copy = h;
+	ump_retain(h_copy); /* manual retain needed as we just assigned the handle, now 2 */
+	/*
+	 * }
+	 *
+	 * void job_completed(void)
+	 * {
+	 */
+	 ump_release(h_copy); /* normal handle release as if we got via an ump_allocate */
+	 h_copy = UMP_INVALID_MEMORY_HANDLE;
+	 /*
+	 * }
+	 */
+	
+	/* we're now back at ref count 1, and only h is a valid handle */
+	/* enough handle duplication show-off, let's play with the contents instead */
+
+	mapping = ump_map(h, 0, alloc_size);
+	if (NULL == mapping)
+	{
+		/* mapping failure, either out of address space or some other error */
+		goto cleanup;
+	}
+
+	memset(mapping, 0, alloc_size);
+
+	/* let's pretend we're going to start some hw device on this buffer and read the result afterwards */
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN, mapping, alloc_size);
+ 	/*
+		device cache invalidate
+
+		memory barrier
+
+		start device
+
+		memory barrier
+
+		wait for device
+
+		memory barrier
+
+		device cache clean
+
+		memory barrier
+	*/
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN_AND_INVALIDATE, mapping, alloc_size);
+
+	/* we could now peek at the result produced by the hw device, which is now accessible via our mapping */
+
+	/* unmap the buffer when we're done with it */
+	ump_unmap(h, mapping, alloc_size);
+
+	result = 0;
+
+cleanup:
+	ump_release(h);
+	h = UMP_INVALID_MEMORY_HANDLE;
+
+	ump_close();
+
+	return result;
+}
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/sconscript b/midgard/r12p0/kernel/drivers/base/ump/sconscript
new file mode 100755
index 0000000..b4aa8b6
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if Glob('src/sconscript') and int(env['ump']) == 1:
+	SConscript( 'src/sconscript' )
+	SConscript( 'docs/sconscript' )
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/Kbuild b/midgard/r12p0/kernel/drivers/base/ump/src/Kbuild
new file mode 100755
index 0000000..de6d307
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/Kbuild
@@ -0,0 +1,50 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Paths required for build
+UMP_PATH = $(src)/../..
+UMP_DEVICEDRV_PATH = $(src)/.
+
+# Set up defaults if not defined by the user
+MALI_UNIT_TEST ?= 0
+
+SRC :=\
+	common/ump_kernel_core.c \
+	common/ump_kernel_descriptor_mapping.c \
+	linux/ump_kernel_linux.c \
+	linux/ump_kernel_linux_mem.c
+
+UNIT_TEST_DEFINES=
+ifeq ($(MALI_UNIT_TEST), 1)
+	MALI_DEBUG ?= 1
+
+	UNIT_TEST_DEFINES = -DMALI_UNIT_TEST=1 \
+	                    -DMALI_DEBUG=$(MALI_DEBUG)
+endif
+
+# Use our defines when compiling
+ccflags-y += -I$(UMP_PATH) -I$(UMP_DEVICEDRV_PATH) $(UNIT_TEST_DEFINES)
+
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_UMP) += ump.o
+ifeq ($(CONFIG_ION),y)
+ccflags-y += -I$(srctree)/drivers/staging/android/ion -I$(srctree)/include/linux
+obj-$(CONFIG_UMP) += imports/ion/ump_kernel_import_ion.o
+endif
+
+# Tell the Linux build system to enable building of our .c files
+ump-y := $(SRC:.c=.o)
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/Makefile b/midgard/r12p0/kernel/drivers/base/ump/src/Makefile
new file mode 100755
index 0000000..45428ad
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/Makefile
@@ -0,0 +1,81 @@
+#
+# (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifneq ($(KBUILD_EXTMOD),)
+include $(KBUILD_EXTMOD)/Makefile.common
+else
+include ./Makefile.common
+endif
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+RELATIVE_ROOT=../../../../..
+ROOT = $(CURDIR)/$(RELATIVE_ROOT)
+
+EXTRA_CFLAGS=-I$(CURDIR)/../../../../include
+
+ifeq ($(MALI_UNIT_TEST),1)
+	EXTRA_CFLAGS += -DMALI_UNIT_TEST=$(MALI_UNIT_TEST)
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+CONFIG ?= $(ARCH)
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+# Validate selected config
+ifneq ($(shell [ -d arch-$(CONFIG) ] && [ -f arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(shell pwd))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L arch ] && rm arch)
+$(shell ln -sf arch-$(CONFIG) arch)
+$(shell touch arch/config.h)
+endif
+
+EXTRA_SYMBOLS=
+
+ifeq ($(MALI_UNIT_TEST),1)
+	KBASE_PATH=$(ROOT)/kernel/drivers/gpu/arm/midgard
+	EXTRA_SYMBOLS+=$(KBASE_PATH)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+KDS_PATH=$(ROOT)/kernel/drivers/base/kds
+EXTRA_SYMBOLS+=$(KDS_PATH)/Module.symvers
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="$(EXTRA_CFLAGS) $(SCONS_CFLAGS)" CONFIG_UMP=m KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/Makefile.common b/midgard/r12p0/kernel/drivers/base/ump/src/Makefile.common
new file mode 100755
index 0000000..f29a4c1
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/Makefile.common
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2008-2010, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+SRC = $(UMP_FILE_PREFIX)/common/ump_kernel_core.c \
+      $(UMP_FILE_PREFIX)/common/ump_kernel_descriptor_mapping.c
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/arch-arm/config.h b/midgard/r12p0/kernel/drivers/base/ump/src/arch-arm/config.h
new file mode 100755
index 0000000..152d98f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/arch-arm/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/arch-arm64/config.h b/midgard/r12p0/kernel/drivers/base/ump/src/arch-arm64/config.h
new file mode 100755
index 0000000..cfb14ca
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/arch-arm64/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c b/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
new file mode 100755
index 0000000..07aa077
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
@@ -0,0 +1,756 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* module headers */
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+/* local headers */
+#include <common/ump_kernel_core.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#define UMP_FLAGS_RANGE ((UMP_PROT_SHAREABLE<<1) - 1u)
+
+static umpp_device device;
+
+ump_result umpp_core_constructor(void)
+{
+	mutex_init(&device.secure_id_map_lock);
+	device.secure_id_map = umpp_descriptor_mapping_create(UMP_EXPECTED_IDS, UMP_MAX_IDS);
+	if (NULL != device.secure_id_map)
+	{
+		if (UMP_OK == umpp_device_initialize())
+		{
+			return UMP_OK;
+		}
+		umpp_descriptor_mapping_destroy(device.secure_id_map);
+	}
+	mutex_destroy(&device.secure_id_map_lock);
+
+	return UMP_ERROR;
+}
+
+void umpp_core_destructor(void)
+{
+	umpp_device_terminate();
+	umpp_descriptor_mapping_destroy(device.secure_id_map);
+	mutex_destroy(&device.secure_id_map_lock);
+}
+
+umpp_session *umpp_core_session_start(void)
+{
+	umpp_session * session;
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (NULL != session)
+	{
+		mutex_init(&session->session_lock);
+
+		INIT_LIST_HEAD(&session->memory_usage);
+
+		/* try to create import client session, not a failure if they fail to initialize */
+		umpp_import_handlers_init(session);
+	}
+
+	return session;
+}
+
+void umpp_core_session_end(umpp_session *session)
+{
+	umpp_session_memory_usage * usage, *_usage;
+	UMP_ASSERT(session);
+
+	list_for_each_entry_safe(usage, _usage, &session->memory_usage, link)
+	{
+		printk(KERN_WARNING "UMP: Memory usage cleanup, releasing secure ID %d\n", ump_dd_secure_id_get(usage->mem));
+		ump_dd_release(usage->mem);
+		kfree(usage);
+
+	}
+
+	/* we should now not hold any imported memory objects,
+	 * detatch all import handlers */
+	umpp_import_handlers_term(session);
+
+	mutex_destroy(&session->session_lock);
+	kfree(session);
+}
+
+ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	umpp_allocation * alloc;
+	int i;
+
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD ) ) == 0 ||
+	    ( flags & (UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read and one write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL | __GFP_HARDWALL);
+
+	if (NULL == alloc)
+		goto out1;
+
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+	alloc->size = size;
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	if (0 != umpp_phys_commit(alloc))
+	{
+		goto out2;
+	}
+
+	/* all set up, allocate an ID for it */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	return alloc;
+
+out3:
+	umpp_phys_free(alloc);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+uint64_t ump_dd_size_get_64(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->size;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_size_get(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->size <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->size;
+}
+
+ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->id;
+}
+
+#ifdef CONFIG_KDS
+struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return &alloc->kds_res;
+}
+#endif
+
+ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	return alloc->flags;
+}
+
+ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id)
+{
+	umpp_allocation * alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+
+	mutex_lock(&device.secure_id_map_lock);
+
+	if (0 == umpp_descriptor_mapping_lookup(device.secure_id_map, secure_id, (void**)&alloc))
+	{
+		if (NULL != alloc->filter_func)
+		{
+			if (!alloc->filter_func(secure_id, alloc, alloc->callback_data))
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /* the filter denied access */
+			}
+		}
+
+		/* check permission to access it */
+		if ((UMP_DD_INVALID_MEMORY_HANDLE != alloc) && !(alloc->flags & UMP_PROT_SHAREABLE))
+		{
+			if (alloc->owner != get_current()->pid)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /*no rights for the current process*/
+			}
+		}
+
+		if (UMP_DD_INVALID_MEMORY_HANDLE != alloc)
+		{
+			if( ump_dd_retain(alloc) != UMP_DD_SUCCESS)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+			}
+		}
+	}
+	mutex_unlock(&device.secure_id_map_lock);
+
+	return alloc;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	return ump_dd_from_secure_id(secure_id);
+}
+
+int ump_dd_retain(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* check for overflow */
+	while(1)
+	{
+		int refcnt = atomic_read(&alloc->refcount);
+		if (refcnt + 1 > 0)
+		{
+			if(atomic_cmpxchg(&alloc->refcount, refcnt, refcnt + 1) == refcnt)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			return -EBUSY;
+		}
+	}
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_add(ump_dd_handle mem)
+{
+	ump_dd_retain(mem);
+}
+
+
+void ump_dd_release(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+	uint32_t new_cnt;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* secure the id for lookup while releasing */
+	mutex_lock(&device.secure_id_map_lock);
+
+	/* do the actual release */
+	new_cnt = atomic_sub_return(1, &alloc->refcount);
+	if (0 == new_cnt)
+	{
+		/* remove from the table as this was the last ref */
+		umpp_descriptor_mapping_remove(device.secure_id_map, alloc->id);
+	}
+
+	/* release the lock as early as possible */
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if (0 != new_cnt)
+	{
+		/* exit if still have refs */
+		return;
+	}
+
+	UMP_ASSERT(list_empty(&alloc->map_list));
+
+#ifdef CONFIG_KDS
+	if (kds_resource_term(&alloc->kds_res))
+	{
+		printk(KERN_ERR "ump_dd_release: kds_resource_term failed,"
+				"unable to release UMP allocation\n");
+		return;
+	}
+#endif
+	/* cleanup */
+	if (NULL != alloc->final_release_func)
+	{
+		alloc->final_release_func(alloc, alloc->callback_data);
+	}
+
+	if (0 == (alloc->management_flags & UMP_MGMT_EXTERNAL))
+	{
+		umpp_phys_free(alloc);
+	}
+	else
+	{
+		kfree(alloc->block_array);
+	}
+
+	mutex_destroy(&alloc->map_list_lock);
+
+	kfree(alloc);
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_release(ump_dd_handle mem)
+{
+	ump_dd_release(mem);
+}
+
+void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(pCount);
+	UMP_ASSERT(pArray);
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+	*pCount = alloc->blocksCount;
+	*pArray = alloc->block_array;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks)
+{
+	const umpp_allocation * alloc;
+	unsigned long i;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	if((uint64_t)num_blocks != alloc->blocksCount)
+	{
+		return UMP_DD_INVALID;
+	}
+
+	for( i = 0; i < num_blocks; i++)
+	{
+		UMP_ASSERT(alloc->block_array[i].addr <= UMP_UINT32_MAX);
+		UMP_ASSERT(alloc->block_array[i].size <= UMP_UINT32_MAX);
+
+		blocks[i].addr = (unsigned long)alloc->block_array[i].addr;
+		blocks[i].size = (unsigned long)alloc->block_array[i].size;
+	}
+
+	return UMP_DD_SUCCESS;
+}
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(block);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	UMP_ASSERT(alloc->block_array[index].addr <= UMP_UINT32_MAX);
+	UMP_ASSERT(alloc->block_array[index].size <= UMP_UINT32_MAX);
+
+	block->addr = (unsigned long)alloc->block_array[index].addr;
+	block->size = (unsigned long)alloc->block_array[index].size;
+
+	return UMP_DD_SUCCESS;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->blocksCount <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->blocksCount;
+}
+
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void *uaddr, size_t size)
+{
+	umpp_cpu_mapping *map;
+
+	void *target_first = uaddr;
+	void *target_last = (void*)((uintptr_t)uaddr - 1 + size);
+
+	if (target_last < target_first) /* wrapped */
+	{
+		return NULL;
+	}
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if ( map->vaddr_start <= target_first &&
+		   (void*)((uintptr_t)map->vaddr_start + (map->nr_pages << PAGE_SHIFT) - 1) >= target_last)
+		{
+			goto out;
+		}
+	}
+	map = NULL;
+out:
+	mutex_unlock(&alloc->map_list_lock);
+
+	return map;
+}
+
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map)
+{
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(map);
+	mutex_lock(&alloc->map_list_lock);
+	list_add(&map->link, &alloc->map_list);
+	mutex_unlock(&alloc->map_list_lock);
+}
+
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target)
+{
+	umpp_cpu_mapping * map;
+
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(target);
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if (map == target)
+		{
+			list_del(&target->link);
+			kfree(target);
+			mutex_unlock(&alloc->map_list_lock);
+			return;
+		}
+	}
+
+	/* not found, error */
+	UMP_ASSERT(0);
+}
+
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const  block_index, uint64_t * const block_internal_offset)
+{
+	uint64_t i;
+
+	for (i = 0 ; i < alloc->blocksCount; i++)
+	{
+		if (offset < alloc->block_array[i].size)
+		{
+			/* found the block_array element containing this offset */
+			*block_index = i;
+			*block_internal_offset = offset;
+			return 0;
+		}
+		offset -= alloc->block_array[i].size;
+	}
+
+	return -ENXIO;
+}
+
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size)
+{
+	umpp_allocation * alloc;
+	void *vaddr;
+	umpp_cpu_mapping * mapping;
+	uint64_t virt_page_off; /* offset of given address from beginning of the virtual mapping */
+	uint64_t phys_page_off; /* offset of the virtual mapping from the beginning of the physical buffer */
+	uint64_t page_count; /* number of pages to sync */
+	uint64_t i;
+	uint64_t block_idx;
+	uint64_t block_offset;
+	uint64_t paddr;
+
+	UMP_ASSERT((UMP_MSYNC_CLEAN == op) || (UMP_MSYNC_CLEAN_AND_INVALIDATE == op));
+
+	alloc = (umpp_allocation*)mem;
+	vaddr = (void*)(uintptr_t)address;
+
+	if((alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+	{
+		/* mapping is not cached */
+		return;
+	}
+
+	mapping = umpp_dd_find_enclosing_mapping(alloc, vaddr, size);
+	if (NULL == mapping)
+	{
+		printk(KERN_WARNING "UMP: Illegal cache sync address %lx\n", (uintptr_t)vaddr);
+		return; /* invalid pointer or size causes out-of-bounds */
+	}
+
+	/* we already know that address + size don't wrap around as umpp_dd_find_enclosing_mapping didn't fail */
+	page_count = ((((((uintptr_t)address + size - 1) & PAGE_MASK) - ((uintptr_t)address & PAGE_MASK))) >> PAGE_SHIFT) + 1;
+	virt_page_off = (vaddr - mapping->vaddr_start) >> PAGE_SHIFT;
+	phys_page_off = mapping->page_off;
+
+	if (umpp_dd_find_start_block(alloc, (virt_page_off + phys_page_off) << PAGE_SHIFT, &block_idx, &block_offset))
+	{
+		/* should not fail as a valid mapping was found, so the phys mem must exists */
+		printk(KERN_WARNING "UMP: Unable to find physical start block with offset %llx\n", virt_page_off + phys_page_off);
+		UMP_ASSERT(0);
+		return;
+	}
+
+	paddr = alloc->block_array[block_idx].addr + block_offset + (((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1));
+
+	for (i = 0; i < page_count; i++)
+	{
+		size_t offset = ((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1);
+		size_t sz = min((size_t)PAGE_SIZE - offset, size);
+
+		/* check if we've overrrun the current block, if so move to the next block */
+		if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+		{
+			block_idx++;
+			UMP_ASSERT(block_idx < alloc->blocksCount);
+			paddr = alloc->block_array[block_idx].addr;
+		}
+
+		if (UMP_MSYNC_CLEAN == op)
+		{
+			ump_sync_to_memory(paddr, vaddr, sz);
+		}
+		else /* (UMP_MSYNC_CLEAN_AND_INVALIDATE == op) already validated on entry */
+		{
+			ump_sync_to_cpu(paddr, vaddr, sz);
+		}
+
+		/* advance to next page  */
+		vaddr = (void*)((uintptr_t)vaddr + sz);
+		size -= sz;
+		paddr += sz;
+	}
+}
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	uint64_t size = 0;
+	uint64_t i;
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		size += blocks[i].size;
+	}
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD
+	    | UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read or write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc),__GFP_HARDWALL | GFP_KERNEL);
+
+	if (NULL == alloc)
+	{
+		goto out1;
+	}
+
+	alloc->block_array = kzalloc(sizeof(ump_dd_physical_block_64) * num_blocks,__GFP_HARDWALL | GFP_KERNEL);
+	if (NULL == alloc->block_array)
+	{
+		goto out2;
+	}
+
+	memcpy(alloc->block_array, blocks, sizeof(ump_dd_physical_block_64) * num_blocks);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+	alloc->size = size;
+	alloc->blocksCount = num_blocks;
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	/* all set up, allocate an ID */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	alloc->management_flags |= UMP_MGMT_EXTERNAL;
+
+	return alloc;
+
+out3:
+	kfree(alloc->block_array);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+
+/*
+ * UMP v1 API
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks)
+{
+	ump_dd_handle mem;
+	ump_dd_physical_block_64 *block_64_array;
+	ump_alloc_flags flags = UMP_V1_API_DEFAULT_ALLOCATION_FLAGS;
+	unsigned long i;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	block_64_array = kzalloc(num_blocks * sizeof(*block_64_array), __GFP_HARDWALL | GFP_KERNEL);
+
+	if(block_64_array == NULL)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/* copy physical blocks */
+	for( i = 0; i < num_blocks; i++)
+	{
+		block_64_array[i].addr = blocks[i].addr;
+		block_64_array[i].size = blocks[i].size;
+	}
+
+	mem = ump_dd_create_from_phys_blocks_64(block_64_array, num_blocks, flags, NULL, NULL, NULL);
+
+	kfree(block_64_array);
+
+	return mem;
+
+}
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h b/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
new file mode 100755
index 0000000..8cb424d
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
@@ -0,0 +1,228 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_CORE_H_
+#define _UMP_KERNEL_CORE_H_
+
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/cred.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+#include <linux/ump-common.h>
+#include <ump/src/common/ump_kernel_descriptor_mapping.h>
+
+/* forward decl */
+struct umpp_session;
+
+/**
+ * UMP handle metadata.
+ * Tracks various data about a handle not of any use to user space
+ */
+typedef enum
+{
+	UMP_MGMT_EXTERNAL = (1ul << 0) /**< Handle created via the ump_dd_create_from_phys_blocks interface */
+	/* (1ul << 31) not to be used */
+} umpp_management_flags;
+
+/**
+ * Structure tracking the single global UMP device.
+ * Holds global data like the ID map
+ */
+typedef struct umpp_device
+{
+	struct mutex secure_id_map_lock; /**< Lock protecting access to the map */
+	umpp_descriptor_mapping * secure_id_map; /**< Map of all known secure IDs on the system */
+} umpp_device;
+
+/**
+ * Structure tracking all memory allocations of a UMP allocation.
+ * Tracks info about an mapping so we can verify cache maintenace
+ * operations and help in the unmap cleanup.
+ */
+typedef struct umpp_cpu_mapping
+{
+	struct list_head        link; /**< link to list of mappings for an allocation */
+	void                  *vaddr_start; /**< CPU VA start of the mapping */
+	size_t                nr_pages; /**< Size (in pages) of the mapping */
+	uint64_t              page_off; /**< Offset (in pages) from start of the allocation where the mapping starts */
+	ump_dd_handle         handle; /**< Which handle this mapping is linked to */
+	struct umpp_session * session; /**< Which session created the mapping */
+} umpp_cpu_mapping;
+
+/**
+ * Structure tracking UMP allocation.
+ * Represent a memory allocation with its ID.
+ * Tracks all needed meta-data about an allocation.
+ * */
+typedef struct umpp_allocation
+{
+	ump_secure_id id; /**< Secure ID of the allocation */
+	atomic_t refcount; /**< Usage count */
+
+	ump_alloc_flags flags; /**< Flags for all supported devices */
+	uint32_t management_flags; /**< Managment flags tracking */
+
+	pid_t owner; /**< The process ID owning the memory if not sharable */
+
+	ump_dd_security_filter filter_func; /**< Hook to verify use, called during retains from new clients */
+	ump_dd_final_release_callback final_release_func; /**< Hook called when the last reference is removed */
+	void* callback_data; /**< Additional data given to release hook */
+
+	uint64_t size; /**< Size (in bytes) of the allocation */
+	uint64_t blocksCount; /**< Number of physsical blocks the allocation is built up of */
+	ump_dd_physical_block_64 * block_array; /**< Array, one entry per block, describing block start and length */
+
+	struct mutex     map_list_lock; /**< Lock protecting the map_list */
+	struct list_head map_list; /**< Tracks all CPU VA mappings of this allocation */
+
+#ifdef CONFIG_KDS
+	struct kds_resource kds_res; /**< The KDS resource controlling access to this allocation */
+#endif
+
+	void * backendData; /**< Physical memory backend meta-data */
+} umpp_allocation;
+
+/**
+ * Structure tracking use of UMP memory by a session.
+ * Tracks the use of an allocation by a session so session termination can clean up any outstanding references.
+ * Also protects agains non-matched release calls from user space.
+ */
+typedef struct umpp_session_memory_usage
+{
+	ump_secure_id id; /**< ID being used. For quick look-up */
+	ump_dd_handle mem; /**< Handle being used. */
+
+	/**
+	 * Track how many times has the process retained this handle in the kernel.
+	 * This should usually just be 1(allocated or resolved) or 2(mapped),
+	 * but could be more if someone is playing with the low-level API
+	 * */
+	atomic_t process_usage_count;
+
+	struct list_head link; /**< link to other usage trackers for a session */
+} umpp_session_memory_usage;
+
+/**
+ * Structure representing a session/client.
+ * Tracks the UMP allocations being used by this client.
+ */
+typedef struct umpp_session
+{
+	struct mutex session_lock; /**< Lock for memory usage manipulation */
+	struct list_head memory_usage; /**< list of memory currently being used by the this session */
+	void*  import_handler_data[UMPP_EXTERNAL_MEM_COUNT]; /**< Import modules per-session data pointer */
+} umpp_session;
+
+/**
+ * UMP core setup.
+ * Called by any OS specific startup function to initialize the common part.
+ * @return UMP_OK if core initialized correctly, any other value for errors
+ */
+ump_result umpp_core_constructor(void);
+
+/**
+ * UMP core teardown.
+ * Called by any OS specific unload function to clean up the common part.
+ */
+void umpp_core_destructor(void);
+
+/**
+ * UMP session start.
+ * Called by any OS specific session handler when a new session is detected
+ * @return Non-NULL if a matching core session could be set up. NULL on failure
+ */
+umpp_session *umpp_core_session_start(void);
+
+/**
+ * UMP session end.
+ * Called by any OS specific session handler when a session is ended/terminated.
+ * @param session The core session object returned by ump_core_session_start
+ */
+void umpp_core_session_end(umpp_session *session);
+
+/**
+ * Find a mapping object (if any) for this allocation.
+ * Called by any function needing to identify a mapping from a user virtual address.
+ * Verifies that the whole range to be within a mapping object.
+ * @param alloc The UMP allocation to find a matching mapping object of
+ * @param uaddr User mapping address to find the mapping object for
+ * @param size Length of the mapping
+ * @return NULL on error (no match found), pointer to mapping object if match found
+ */
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void* uaddr, size_t size);
+
+/**
+ * Register a new mapping of an allocation.
+ * Called by functions creating a new mapping of an allocation, typically OS specific handlers.
+ * @param alloc The allocation object which has been mapped
+ * @param map Info about the mapping
+ */
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map);
+
+/**
+ * Remove and free mapping object from an allocation.
+ * @param alloc The allocation object to remove the mapping info from
+ * @param target The mapping object to remove
+ */
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target);
+
+/**
+ * Helper to find a block in the blockArray which holds a given byte offset.
+ * @param alloc The allocation object to find the block in
+ * @param offset Offset (in bytes) from allocation start to find the block of
+ * @param[out] block_index Pointer to the index of the block matching
+ * @param[out] block_internal_offset Offset within the returned block of the searched offset
+ * @return 0 if a matching block was found, any other value for error
+ */
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const block_index, uint64_t * const block_internal_offset);
+
+/**
+ * Cache maintenance helper.
+ * Performs the requested cache operation on the given handle.
+ * @param mem Allocation handle
+ * @param op Cache maintenance operation to perform
+ * @param address User mapping at which to do the operation
+ * @param size Length (in bytes) of the range to do the operation on
+ */
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size);
+
+/**
+ * Import module session early init.
+ * Calls session_begin on all installed import modules.
+ * @param session The core session object to initialized the import handler for
+ * */
+void umpp_import_handlers_init(umpp_session * session);
+
+/**
+ * Import module session cleanup.
+ * Calls session_end on all import modules bound to the session.
+ * @param session The core session object to initialized the import handler for
+ */
+void umpp_import_handlers_term(umpp_session * session);
+
+#endif /* _UMP_KERNEL_CORE_H_ */
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c b/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
new file mode 100755
index 0000000..c5b0d74
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <common/ump_kernel_priv.h>
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(umpp_descriptor_table * table);
+
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries)
+{
+	umpp_descriptor_mapping * map = kzalloc(sizeof(umpp_descriptor_mapping), GFP_KERNEL);
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map)
+	{
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table)
+		{
+			init_rwsem( &map->lock);
+			set_bit(0, map->table->usage);
+			map->max_nr_mappings_allowed = max_entries;
+			map->current_nr_mappings = init_entries;
+			return map;
+
+			descriptor_table_free(map->table);
+		}
+		kfree(map);
+	}
+	return NULL;
+}
+
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map)
+{
+	UMP_ASSERT(NULL != map);
+	descriptor_table_free(map->table);
+	kfree(map);
+}
+
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target)
+{
+ 	int descriptor = 0;
+	UMP_ASSERT(NULL != map);
+	down_write( &map->lock);
+	descriptor = find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings)
+	{
+		/* no free descriptor, try to expand the table */
+		umpp_descriptor_table * new_table;
+		umpp_descriptor_table * old_table = map->table;
+		int nr_mappings_new = map->current_nr_mappings + BITS_PER_LONG;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+ 		memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+ 		memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void*));
+
+ 		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	set_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	up_write(&map->lock);
+	return descriptor;
+}
+
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target)
+{
+	int result = -EINVAL;
+ 	UMP_ASSERT(map);
+	UMP_ASSERT(target);
+ 	down_read(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	}
+	/* keep target untouched if the descriptor was not found */
+	up_read(&map->lock);
+	return result;
+}
+
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor)
+{
+	UMP_ASSERT(map);
+ 	down_write(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		map->table->mappings[descriptor] = NULL;
+		clear_bit(descriptor, map->table->usage);
+	}
+	up_write(&map->lock);
+}
+
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count)
+{
+	umpp_descriptor_table * table;
+
+	table = kzalloc(sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG) + (sizeof(void*) * count), __GFP_HARDWALL | GFP_KERNEL );
+
+	if (NULL != table)
+	{
+		table->usage = (unsigned long*)((u8*)table + sizeof(umpp_descriptor_table));
+		table->mappings = (void**)((u8*)table + sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(umpp_descriptor_table * table)
+{
+ 	UMP_ASSERT(table);
+	kfree(table);
+}
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h b/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
new file mode 100755
index 0000000..d06c145
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+#define _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct umpp_descriptor_table
+{
+	/* keep as a unsigned long to rely on the OS's bitops support */
+	unsigned long * usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used(1) or not(0) */
+	void** mappings; /**< Array of the pointers the descriptors map to */
+} umpp_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct umpp_descriptor_mapping
+{
+	struct rw_semaphore lock; /**< Lock protecting access to the mapping object */
+	unsigned int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	unsigned int current_nr_mappings; /**< Current number of possible mappings */
+	umpp_descriptor_table * table; /**< Pointer to the current mapping table */
+} umpp_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object.
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries.
+ * ID 0 is reserved so the number of available entries will be max - 1.
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param[in] map The map to free
+ */
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param[in] map The map to allocate a new entry in
+ * @param[in] target The value to map to
+ * @return The descriptor allocated, ID 0 on failure.
+ */
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param[in] map The map to lookup the descriptor id in
+ * @param[in] descriptor The descriptor ID to lookup
+ * @param[out] target Pointer to a pointer which will receive the stored value
+ *
+ * @return 0 on success lookup, -EINVAL on lookup failure.
+ */
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param[in] map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor);
+
+#endif /* _UMP_KERNEL_DESCRIPTOR_MAPPING_H_ */
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h b/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
new file mode 100755
index 0000000..38b6f1b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_PRIV_H_
+#define _UMP_KERNEL_PRIV_H_
+
+#ifdef __KERNEL__
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <asm/cacheflush.h>
+#endif
+
+
+#define UMP_EXPECTED_IDS 64
+#define UMP_MAX_IDS 32768
+
+#ifdef __KERNEL__
+#define UMP_ASSERT(expr) \
+		if (!(expr)) { \
+			printk(KERN_ERR "UMP: Assertion failed! %s,%s,%s,line=%d\n",\
+					#expr,__FILE__,__func__,__LINE__); \
+					BUG(); \
+		}
+
+static inline void ump_sync_to_memory(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/*TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE);
+	mb(); /* for outer_sync (if needed) */
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+
+static inline void ump_sync_to_cpu(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE);
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+#endif /* __KERNEL__*/
+#endif /* _UMP_KERNEL_PRIV_H_ */
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/imports/ion/Makefile b/midgard/r12p0/kernel/drivers/base/ump/src/imports/ion/Makefile
new file mode 100755
index 0000000..ef74b27
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/imports/ion/Makefile
@@ -0,0 +1,53 @@
+#
+# (C) COPYRIGHT 2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/../../../../..
+KBUILD_EXTRA_SYMBOLS += "$(KBUILD_EXTMOD)/../../Module.symvers"
+
+SRC += ump_kernel_import_ion.c
+
+MODULE:=ump_ion_import.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+$(MODULE:.ko=-objs) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+#
+#
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/imports/ion/sconscript b/midgard/r12p0/kernel/drivers/base/ump/src/imports/ion/sconscript
new file mode 100755
index 0000000..cff24c8
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/imports/ion/sconscript
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ion = env.Clone()
+
+if env_ion['ump_ion'] != '1':
+	Return()
+
+# Source files required for UMP.
+ion_src = [Glob('#kernel/drivers/base/ump/src/imports/ion/*.c')]
+
+# Note: cleaning via the Linux kernel build system does not yet work
+if env_ion.GetOption('clean') :
+	makeAction=Action("cd ${SOURCE.dir} && make clean", '$MAKECOMSTR')
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make PLATFORM=${platform} && cp ump_ion_import.ko $STATIC_LIB_PATH/ump_ion_import.ko", '$MAKECOMSTR')
+# The target is ump_import_ion.ko, built from the source in ion_src, via the action makeAction
+# ump_import_ion.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+# kernel build system, after which it can be installed to the directory specified if
+# "libs_install" is set; this is done by LibTarget.
+cmd = env_ion.Command('$STATIC_LIB_PATH/ump_ion_import.ko', ion_src, [makeAction])
+
+# Until we fathom out how the invoke the Linux build system to clean, we can use Clean
+# to remove generated files.
+
+patterns = ['*.mod.c', '*.o', '*.ko', '*.a', '.*.cmd', 'modules.order', '.tmp_versions', 'Module.symvers']
+
+for p in patterns:
+	Clean(cmd, Glob('#kernel/drivers/base/ump/src/imports/ion/%s' % p))
+
+env_ion.Depends('$STATIC_LIB_PATH/ump_ion_import.ko', '$STATIC_LIB_PATH/ump.ko')
+env_ion.KernelObjTarget('ump', cmd)
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c b/midgard/r12p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
new file mode 100755
index 0000000..12b2e32
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/dma-mapping.h>
+#include "ion.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+struct ion_wrapping_info
+{
+	struct ion_client *   ion_client;
+	struct ion_handle *   ion_handle;
+	int                   num_phys_blocks;
+	struct scatterlist *  sglist;
+};
+
+static struct ion_device * ion_device_get(void)
+{
+	/* < Customer to provide implementation >
+	 * Return a pointer to the global ion_device on the system
+	 */
+	return NULL;
+}
+
+static int import_ion_client_create(void** const custom_session_data)
+{
+	struct ion_client ** ion_client;
+
+	ion_client = (struct ion_client**)custom_session_data;
+
+	*ion_client = ion_client_create(ion_device_get(), "ump");
+
+	return PTR_RET(*ion_client);
+}
+
+
+static void import_ion_client_destroy(void* custom_session_data)
+{
+	struct ion_client * ion_client;
+
+	ion_client = (struct ion_client*)custom_session_data;
+	BUG_ON(!ion_client);
+
+	ion_client_destroy(ion_client);
+}
+
+
+static void import_ion_final_release_callback(const ump_dd_handle handle, void * info)
+{
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!info);
+
+	(void)handle;
+	ion_info = (struct ion_wrapping_info*)info;
+
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+	kfree(ion_info);
+	module_put(THIS_MODULE);
+}
+
+static ump_dd_handle import_ion_import(void * custom_session_data, void * pfd, ump_alloc_flags flags)
+{
+	int fd;
+	ump_dd_handle ump_handle;
+	struct scatterlist * sg;
+	int num_dma_blocks;
+	ump_dd_physical_block_64 * phys_blocks;
+	unsigned long i;
+	struct sg_table * sgt;
+
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!custom_session_data);
+	BUG_ON(!pfd);
+
+	ion_info = kzalloc(GFP_KERNEL, sizeof(*ion_info));
+	if (NULL == ion_info)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	ion_info->ion_client = (struct ion_client*)custom_session_data;
+
+	if (get_user(fd, (int*)pfd))
+	{
+		goto out;
+	}
+
+	ion_info->ion_handle = ion_import_dma_buf(ion_info->ion_client, fd);
+
+	if (IS_ERR_OR_NULL(ion_info->ion_handle))
+	{
+		goto out;
+	}
+
+	sgt = ion_sg_table(ion_info->ion_client, ion_info->ion_handle);
+	if (IS_ERR_OR_NULL(sgt))
+	{
+		goto ion_dma_map_failed;
+	}
+
+	ion_info->sglist = sgt->sgl;
+
+	sg = ion_info->sglist;
+	while (sg)
+	{
+		ion_info->num_phys_blocks++;
+		sg = sg_next(sg);
+	}
+
+	num_dma_blocks = dma_map_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	if (0 == num_dma_blocks)
+	{
+		goto linux_dma_map_failed;
+	}
+
+	phys_blocks = vmalloc(num_dma_blocks * sizeof(*phys_blocks));
+	if (NULL == phys_blocks)
+	{
+		goto vmalloc_failed;
+	}
+
+	for_each_sg(ion_info->sglist, sg, num_dma_blocks, i)
+	{
+		phys_blocks[i].addr = sg_phys(sg);
+		phys_blocks[i].size = sg_dma_len(sg);
+	}
+
+	ump_handle = ump_dd_create_from_phys_blocks_64(phys_blocks, num_dma_blocks, flags, NULL, import_ion_final_release_callback, ion_info);
+
+	vfree(phys_blocks);
+
+	if (ump_handle != UMP_DD_INVALID_MEMORY_HANDLE)
+	{
+		/*
+		 * As we have a final release callback installed
+		 * we must keep the module locked until
+		 * the callback has been triggered
+		 * */
+		__module_get(THIS_MODULE);
+		return ump_handle;
+	}
+
+	/* failed*/
+vmalloc_failed:
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+linux_dma_map_failed:
+ion_dma_map_failed:
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+out:
+	kfree(ion_info);
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+struct ump_import_handler import_handler_ion =
+{
+	.linux_module =  THIS_MODULE,
+	.session_begin = import_ion_client_create,
+	.session_end =   import_ion_client_destroy,
+	.import =        import_ion_import
+};
+
+static int __init import_ion_initialize_module(void)
+{
+	/* register with UMP */
+	return ump_import_module_register(UMP_EXTERNAL_MEM_TYPE_ION, &import_handler_ion);
+}
+
+static void __exit import_ion_cleanup_module(void)
+{
+	/* unregister import handler */
+	ump_import_module_unregister(UMP_EXTERNAL_MEM_TYPE_ION);
+}
+
+/* Setup init and exit functions for this module */
+module_init(import_ion_initialize_module);
+module_exit(import_ion_cleanup_module);
+
+/* And some module information */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/imports/sconscript b/midgard/r12p0/kernel/drivers/base/ump/src/imports/sconscript
new file mode 100755
index 0000000..8f50683
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/imports/sconscript
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os, sys
+Import('env')
+
+import_modules = [ os.path.join( path, 'sconscript' ) for path in sorted(os.listdir( os.getcwd() )) ]
+
+for m in import_modules:
+	if os.path.exists(m):
+		SConscript( m, variant_dir=os.path.join( env['BUILD_DIR_PATH'], os.path.dirname(m) ), duplicate=0 )
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c b/midgard/r12p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
new file mode 100755
index 0000000..d6c3c53
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
@@ -0,0 +1,831 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump-ioctl.h>
+#include <linux/ump.h>
+
+#include <asm/uaccess.h>	         /* copy_*_user */
+#include <linux/compat.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/device.h>            /* class registration support */
+
+#include <common/ump_kernel_core.h>
+
+#include "ump_kernel_linux_mem.h"
+#include <ump_arch.h>
+
+
+struct ump_linux_device
+{
+	struct cdev cdev;
+	struct class * ump_class;
+};
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+#define UMP_REV_STRING "1.0"
+
+char * ump_revision = UMP_REV_STRING;
+module_param(ump_revision, charp, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_revision, "Revision info");
+
+static int umpp_linux_open(struct inode *inode, struct file *filp);
+static int umpp_linux_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+/* This variable defines the file operations this UMP device driver offers */
+static struct file_operations ump_fops =
+{
+	.owner   = THIS_MODULE,
+	.open    = umpp_linux_open,
+	.release = umpp_linux_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = umpp_linux_ioctl,
+#else
+	.ioctl   = umpp_linux_ioctl,
+#endif
+	.compat_ioctl = umpp_linux_ioctl,
+	.mmap = umpp_linux_mmap
+};
+
+/* import module handling */
+DEFINE_MUTEX(import_list_lock);
+struct ump_import_handler *  import_handlers[UMPP_EXTERNAL_MEM_COUNT];
+
+/* The global variable containing the global device data */
+static struct ump_linux_device ump_linux_device;
+
+#define DBG_MSG(level, ...) do { \
+if ((level) <=  ump_debug_level)\
+{\
+printk(KERN_DEBUG "UMP<" #level ">:\n" __VA_ARGS__);\
+} \
+} while (0)
+
+#define MSG_ERR(...) do{ \
+printk(KERN_ERR "UMP: ERR: %s\n           %s()%4d\n", __FILE__, __func__  , __LINE__) ; \
+printk(KERN_ERR __VA_ARGS__); \
+printk(KERN_ERR "\n"); \
+} while(0)
+
+#define MSG(...) do{ \
+printk(KERN_INFO "UMP: " __VA_ARGS__);\
+} while (0)
+
+/*
+ * This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int __init umpp_linux_initialize_module(void)
+{
+	ump_result err;
+
+	err = umpp_core_constructor();
+	if (UMP_OK != err)
+	{
+		MSG_ERR("UMP device driver init failed\n");
+		return -ENOTTY;
+	}
+
+	MSG("UMP device driver %s loaded\n", UMP_REV_STRING);
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void __exit umpp_linux_cleanup_module(void)
+{
+	DBG_MSG(2, "Unloading UMP device driver\n");
+	umpp_core_destructor();
+	DBG_MSG(2, "Module unloaded\n");
+}
+
+
+
+/*
+ * Initialize the UMP device driver.
+ */
+ump_result umpp_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+
+	if (0 == ump_major)
+	{
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	}
+	else
+	{
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err)
+	{
+		memset(&ump_linux_device, 0, sizeof(ump_linux_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_linux_device.cdev, &ump_fops);
+		ump_linux_device.cdev.owner = THIS_MODULE;
+		ump_linux_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_linux_device.cdev, dev, 1/*count*/);
+		if (0 == err)
+		{
+
+			ump_linux_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_linux_device.ump_class))
+			{
+				err = PTR_ERR(ump_linux_device.ump_class);
+			}
+			else
+			{
+				struct device * mdev;
+				mdev = device_create(ump_linux_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return UMP_OK;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(ump_linux_device.ump_class);
+			}
+			cdev_del(&ump_linux_device.cdev);
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return UMP_ERROR;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void umpp_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+	device_destroy(ump_linux_device.ump_class, dev);
+	class_destroy(ump_linux_device.ump_class);
+
+	/* unregister char device */
+	cdev_del(&ump_linux_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+}
+
+
+static int umpp_linux_open(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = umpp_core_session_start();
+	if (NULL == session)
+	{
+		return -EFAULT;
+	}
+	
+	filp->private_data = session;
+
+	return 0;
+}
+
+static int umpp_linux_release(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = filp->private_data;
+
+	umpp_core_session_end(session);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/**************************/
+/*ioctl specific functions*/
+/**************************/
+static int do_ump_dd_allocate(umpp_session * session, ump_k_allocate * params)
+{
+	ump_dd_handle new_allocation;
+	new_allocation = ump_dd_allocate_64(params->size, params->alloc_flags, NULL, NULL, NULL);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	printk(KERN_WARNING "UMP: Allocation FAILED\n");
+	return -ENOMEM;
+}
+
+static int do_ump_dd_retain(umpp_session * session, ump_k_retain * params)
+{
+	umpp_session_memory_usage * it;
+
+	mutex_lock(&session->session_lock);
+
+	/* try to find it on the session usage list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found to already be in use */
+			/* check for overflow */
+			while(1)
+			{
+				int refcnt = atomic_read(&it->process_usage_count);
+				if (refcnt + 1 > 0)
+				{
+					/* add a process local ref */
+					if(atomic_cmpxchg(&it->process_usage_count, refcnt, refcnt + 1) == refcnt)
+					{
+						mutex_unlock(&session->session_lock);
+						return 0;
+					}
+				}
+				else
+				{
+					/* maximum usage cap reached */
+					mutex_unlock(&session->session_lock);
+					return -EBUSY;
+				}
+			}
+		}
+	}
+	/* try to look it up globally */
+
+	it = kmalloc(sizeof(*it), GFP_KERNEL);
+
+	if (NULL != it)
+	{
+		it->mem = ump_dd_from_secure_id(params->secure_id);
+		if (UMP_DD_INVALID_MEMORY_HANDLE != it->mem)
+		{
+			/* found, add it to the session usage list */
+			it->id = params->secure_id;
+			atomic_set(&it->process_usage_count, 1);
+			list_add(&it->link, &session->memory_usage);
+		}
+		else
+		{
+			/* not found */
+			kfree(it);
+			it = NULL;
+		}
+	}
+
+	mutex_unlock(&session->session_lock);
+
+	return (NULL != it) ? 0 : -ENODEV;
+}
+
+
+static int do_ump_dd_release(umpp_session * session, ump_k_release * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only do a release if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			result = 0;
+
+			if (0 == atomic_sub_return(1, &it->process_usage_count))
+			{
+				/* last ref in this process remove from the usage list and remove the underlying ref */
+				list_del(&it->link);
+				ump_dd_release(it->mem);
+				kfree(it);
+			}
+
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_sizequery(umpp_session * session, ump_k_sizequery * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->size = ump_dd_size_get_64(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_allocation_flags_get(umpp_session * session, ump_k_allocation_flags * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->alloc_flags = ump_dd_allocation_flags_get(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_msync_now(umpp_session * session, ump_k_msync * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, do the cache op */
+#ifdef CONFIG_COMPAT
+			if (is_compat_task())
+			{
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, compat_ptr(params->mapped_ptr.compat_value), params->size);
+				result = 0;
+			}
+			else
+			{
+#endif
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, params->mapped_ptr.value, params->size);
+				result = 0;
+#ifdef CONFIG_COMPAT
+			}
+#endif
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+
+void umpp_import_handlers_init(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		if (import_handlers[i])
+		{
+			import_handlers[i]->session_begin(&session->import_handler_data[i]);
+			/* It is OK if session_begin returned an error.
+			 * We won't do any import calls if so */
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+void umpp_import_handlers_term(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		/* only call if session_begin succeeded */
+		if (session->import_handler_data[i] != NULL)
+		{
+			/* if session_beging succeeded the handler
+			 * should not have unregistered with us */
+			BUG_ON(!import_handlers[i]);
+			import_handlers[i]->session_end(session->import_handler_data[i]);
+			session->import_handler_data[i] = NULL;
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler)
+{
+	int res = -EEXIST;
+
+	/* validate input */
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+	BUG_ON(!handler);
+	BUG_ON(!handler->linux_module);
+	BUG_ON(!handler->session_begin);
+	BUG_ON(!handler->session_end);
+	BUG_ON(!handler->import);
+
+	mutex_lock(&import_list_lock);
+
+	if (!import_handlers[type])
+	{
+		import_handlers[type] = handler;
+		res = 0;
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return res;
+}
+
+void ump_import_module_unregister(enum ump_external_memory_type type)
+{
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+
+	mutex_lock(&import_list_lock);
+	/* an error to call this if ump_import_module_register didn't succeed */
+	BUG_ON(!import_handlers[type]);
+	import_handlers[type] = NULL;
+	mutex_unlock(&import_list_lock);
+}
+
+static struct ump_import_handler * import_handler_get(unsigned int type_id)
+{
+	enum ump_external_memory_type type;
+	struct ump_import_handler * handler;
+
+	/* validate and convert input */
+	/* handle bad data here, not just BUG_ON */
+	if (type_id == 0 || type_id >= UMPP_EXTERNAL_MEM_COUNT)
+		return NULL;
+
+	type = (enum ump_external_memory_type)type_id;
+
+	/* find the handler */
+	mutex_lock(&import_list_lock);
+
+	handler = import_handlers[type];
+
+	if (handler)
+	{
+		if (!try_module_get(handler->linux_module))
+		{
+			handler = NULL;
+		}
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return handler;
+}
+
+static void import_handler_put(struct ump_import_handler * handler)
+{
+	module_put(handler->linux_module);
+}
+
+static int do_ump_dd_import(umpp_session * session, ump_k_import * params)
+{
+	ump_dd_handle new_allocation = UMP_DD_INVALID_MEMORY_HANDLE;
+	struct ump_import_handler * handler;
+
+	handler = import_handler_get(params->type);
+
+	if (handler)
+	{
+		/* try late binding if not already bound */
+		if (!session->import_handler_data[params->type])
+		{
+			handler->session_begin(&session->import_handler_data[params->type]);
+		}
+
+		/* do we have a bound session? */
+		if (session->import_handler_data[params->type])
+		{
+			new_allocation = handler->import( session->import_handler_data[params->type],
+		                                      params->phandle.value,
+		                                      params->alloc_flags);
+		}
+
+		/* done with the handler */
+		import_handler_put(handler);
+	}
+
+	/* did the import succeed? */
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	return -ENOMEM;
+
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int ret;
+	uint64_t msg[(UMP_CALL_MAX_SIZE+7)>>3]; /* alignment fixup */
+	uint32_t size = _IOC_SIZE(cmd);
+	struct umpp_session *session = filp->private_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* unused arg */
+#endif
+
+	/*
+	 * extract the type and number bitfields, and don't decode
+	 * wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
+	 */
+	if (_IOC_TYPE(cmd) != UMP_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if (_IOC_NR(cmd) > UMP_IOC_MAXNR)
+	{
+		return -ENOTTY;
+	}
+
+	switch(cmd)
+	{
+		case UMP_FUNC_ALLOCATE:
+			if (size != sizeof(ump_k_allocate))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocate(session, (ump_k_allocate *)&msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_SIZEQUERY:
+			if (size != sizeof(ump_k_sizequery))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_sizequery(session,(ump_k_sizequery*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_MSYNC:
+			if (size != sizeof(ump_k_msync))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_msync_now(session,(ump_k_msync*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_IMPORT:
+			if (size != sizeof(ump_k_import))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user*)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_import(session, (ump_k_import*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		/* used only by v1 API */
+		case UMP_FUNC_ALLOCATION_FLAGS_GET:
+			if (size != sizeof(ump_k_allocation_flags))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocation_flags_get(session,(ump_k_allocation_flags*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_RETAIN:
+			if (size != sizeof(ump_k_retain))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_retain(session,(ump_k_retain*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		case UMP_FUNC_RELEASE:
+			if (size != sizeof(ump_k_release))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_release(session,(ump_k_release*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		default:
+			/* not ours */
+			return -ENOTTY;
+	}
+	/*redundant below*/
+	return -ENOTTY;
+}
+
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_allocate_64);
+EXPORT_SYMBOL(ump_dd_allocation_flags_get);
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get_64);
+EXPORT_SYMBOL(ump_dd_size_get_64);
+EXPORT_SYMBOL(ump_dd_retain);
+EXPORT_SYMBOL(ump_dd_release);
+EXPORT_SYMBOL(ump_dd_create_from_phys_blocks_64);
+#ifdef CONFIG_KDS
+EXPORT_SYMBOL(ump_dd_kds_resource_get);
+#endif
+
+/* import API */
+EXPORT_SYMBOL(ump_import_module_register);
+EXPORT_SYMBOL(ump_import_module_unregister);
+
+
+
+/* V1 API */
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+
+/* Setup init and exit functions for this module */
+module_init(umpp_linux_initialize_module);
+module_exit(umpp_linux_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(UMP_REV_STRING);
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c b/midgard/r12p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
new file mode 100755
index 0000000..38db91e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
@@ -0,0 +1,250 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+#include <linux/version.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h> /* memory mananger definitions */
+#include <linux/pfn.h>
+#include <linux/highmem.h> /*kmap*/
+
+#include <linux/compat.h> /* is_compat_task */
+
+#include <common/ump_kernel_core.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+static void umpp_vm_close(struct vm_area_struct *vma)
+{
+	umpp_cpu_mapping * mapping;
+	umpp_session * session;
+	ump_dd_handle handle;
+
+	mapping = (umpp_cpu_mapping*)vma->vm_private_data;
+	UMP_ASSERT(mapping);
+	
+	session = mapping->session;
+	handle = mapping->handle;
+
+	umpp_dd_remove_cpu_mapping(mapping->handle, mapping); /* will free the mapping object */
+	ump_dd_release(handle);
+}
+
+
+static const struct vm_operations_struct umpp_vm_ops = {
+	.close = umpp_vm_close
+};
+
+int umpp_phys_commit(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	/* round up to a page boundary */
+	alloc->size = (alloc->size + PAGE_SIZE - 1) & ~((uint64_t)PAGE_SIZE-1) ;
+	/* calculate number of pages */
+	alloc->blocksCount = alloc->size >> PAGE_SHIFT;
+
+	if( (sizeof(ump_dd_physical_block_64) * alloc->blocksCount) > ((size_t)-1))
+	{
+		printk(KERN_WARNING "UMP: umpp_phys_commit - trying to allocate more than possible\n");
+		return -ENOMEM;
+	}
+
+	alloc->block_array = kmalloc(sizeof(ump_dd_physical_block_64) * alloc->blocksCount, __GFP_HARDWALL | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (NULL == alloc->block_array)
+	{
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		void * mp;
+		struct page * page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD);
+		if (NULL == page)
+		{
+			break;
+		}
+
+		alloc->block_array[i].addr = page_to_pfn(page) << PAGE_SHIFT;
+		alloc->block_array[i].size = PAGE_SIZE;
+
+		mp = kmap(page);
+		if (NULL == mp)
+		{
+			__free_page(page);
+			break;
+		}
+
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		ump_sync_to_memory(PFN_PHYS(page_to_pfn(page)), mp, PAGE_SIZE);
+		kunmap(page);
+	}
+
+	if (i == alloc->blocksCount)
+	{
+		return 0;
+	}
+	else
+	{
+		uint64_t j;
+		for (j = 0; j < i; j++)
+		{
+			struct page * page;
+			page = pfn_to_page(alloc->block_array[j].addr >> PAGE_SHIFT);
+			__free_page(page);
+		}
+		
+		kfree(alloc->block_array);
+
+		return -ENOMEM;
+	}
+}
+
+void umpp_phys_free(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		__free_page(pfn_to_page(alloc->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	kfree(alloc->block_array);
+}
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma)
+{
+	ump_secure_id id;
+	ump_dd_handle h;
+	size_t offset;
+	int err = -EINVAL;
+	size_t length = vma->vm_end - vma->vm_start;
+
+	umpp_cpu_mapping * map = NULL;
+	umpp_session *session = filp->private_data;
+
+	if ( 0 == length )
+	{
+		return -EINVAL;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (NULL == map)
+	{
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* unpack our arg */
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	if (is_compat_task())
+	{
+#endif
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_32;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_32;
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	}
+	else
+	{
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_64;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_64;
+	}
+#endif
+
+	h = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE != h)
+	{
+		uint64_t i;
+		uint64_t block_idx;
+		uint64_t block_offset;
+		uint64_t paddr;
+		umpp_allocation * alloc;
+		uint64_t last_byte;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO | VM_MIXEDMAP | VM_DONTDUMP;
+#else
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO | VM_MIXEDMAP;
+#endif
+		vma->vm_ops = &umpp_vm_ops;
+		vma->vm_private_data = map;
+
+		alloc = (umpp_allocation*)h;
+
+		if( (alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+		{
+			/* cache disabled flag set, disable caching for cpu mappings */
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		}
+
+		last_byte = length + (offset << PAGE_SHIFT) - 1;
+		if (last_byte >= alloc->size || last_byte < (offset << PAGE_SHIFT))
+		{
+			goto err_out;
+		}
+
+		if (umpp_dd_find_start_block(alloc, offset << PAGE_SHIFT, &block_idx, &block_offset))
+		{
+			goto err_out;
+		}
+
+		paddr = alloc->block_array[block_idx].addr + block_offset;
+
+		for (i = 0; i < (length >> PAGE_SHIFT); i++)
+		{
+			/* check if we've overrrun the current block, if so move to the next block */
+			if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+			{
+				block_idx++;
+				UMP_ASSERT(block_idx < alloc->blocksCount);
+				paddr = alloc->block_array[block_idx].addr;
+			}
+
+			err = vm_insert_mixed(vma, vma->vm_start + (i << PAGE_SHIFT), paddr >> PAGE_SHIFT);
+			paddr += PAGE_SIZE;
+		}
+
+		map->vaddr_start = (void*)vma->vm_start;
+		map->nr_pages = length >> PAGE_SHIFT;
+		map->page_off = offset;
+		map->handle = h;
+		map->session = session;
+
+		umpp_dd_add_cpu_mapping(h, map);
+
+		return 0;
+
+		err_out:
+
+		ump_dd_release(h);
+	}
+
+	kfree(map);
+
+out:
+
+	return err;
+}
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h b/midgard/r12p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
new file mode 100755
index 0000000..4fbf3b2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_LINUX_MEM_H_
+#define _UMP_KERNEL_LINUX_MEM_H_
+
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma);
+
+#endif /* _UMP_KERNEL_LINUX_MEM_H_ */
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/sconscript b/midgard/r12p0/kernel/drivers/base/ump/src/sconscript
new file mode 100755
index 0000000..d706e1e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/sconscript
@@ -0,0 +1,61 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ump = env.Clone()
+
+# Source files required for UMP.
+ump_src = [Glob('#kernel/drivers/base/ump/src/linux/*.c'), Glob('#kernel/drivers/base/ump/src/common/*.c'), Glob('#kernel/drivers/base/ump/src/imports/*/*.c')]
+
+env_ump.Append( CPPPATH='#kernel/drivers/base/ump/src/' )
+
+if env_ump.GetOption('clean') :
+	env_ump.Execute(Action("make clean", '[clean] ump'))
+	cmd = env_ump.Command('$STATIC_LIB_PATH/ump.ko', ump_src, [])
+else:
+	if env['unit'] == '1':
+		makeAction=Action("cd ${SOURCE.dir}/.. && make MALI_UNIT_TEST=${unit} PLATFORM=${platform} %s && cp ump.ko $STATIC_LIB_PATH/ump.ko" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	else:
+		# The target is ump.ko, built from the source in ump_src, via the action makeAction
+		# ump.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+		# kernel build system, after which it can be installed to the directory specified if
+		# "libs_install" is set; this is done by LibTarget.
+		makeAction=Action("cd ${SOURCE.dir}/.. && make PLATFORM=${platform} %s && cp ump.ko $STATIC_LIB_PATH/ump.ko" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	cmd = env_ump.Command('$STATIC_LIB_PATH/ump.ko', ump_src, [makeAction])
+
+# Add a dependency on kds.ko only when the build is not Android
+# Android uses sync_pt instead of Midgard KDS to achieve KDS functionality
+# Only necessary when KDS is not built into the kernel.
+#
+if env['os'] != 'android':
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y'
+	kds_in_kernel = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+			# KDS in kernel.
+			kds_in_kernel = 1
+	if not kds_in_kernel:
+		env.Depends('$STATIC_LIB_PATH/ump.ko', '$STATIC_LIB_PATH/kds.ko')
+
+env_ump.KernelObjTarget('ump', cmd)
+
+SConscript( 'imports/sconscript' )
+
diff --git a/midgard/r12p0/kernel/drivers/base/ump/src/ump_arch.h b/midgard/r12p0/kernel/drivers/base/ump/src/ump_arch.h
new file mode 100755
index 0000000..2303d56
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/src/ump_arch.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_ARCH_H_
+#define _UMP_ARCH_H_
+
+#include <common/ump_kernel_core.h>
+
+/**
+ * Device specific setup.
+ * Called by the UMP core code to to host OS/device specific setup.
+ * Typical use case is device node creation for talking to user space.
+ * @return UMP_OK on success, any other value on failure
+ */
+extern ump_result umpp_device_initialize(void);
+
+/**
+ * Device specific teardown.
+ * Undo any things done by ump_device_initialize.
+ */
+extern void umpp_device_terminate(void);
+
+extern int umpp_phys_commit(umpp_allocation * alloc);
+extern void umpp_phys_free(umpp_allocation * alloc);
+
+#endif /* _UMP_ARCH_H_ */
diff --git a/midgard/r12p0/kernel/drivers/base/ump/ump_ref_drv.h b/midgard/r12p0/kernel/drivers/base/ump/ump_ref_drv.h
new file mode 100755
index 0000000..9a265fe
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/base/ump/ump_ref_drv.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_ref_drv.h
+ *
+ * This file contains the link to user space part of the UMP API for usage by MALI 400 gralloc.
+ *
+ */
+
+#ifndef _UMP_REF_DRV_H_
+#define _UMP_REF_DRV_H_
+
+#include <ump/ump.h>
+
+
+#endif /* _UMP_REF_DRV_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/Kbuild b/midgard/r12p0/kernel/drivers/gpu/arm/Kbuild
new file mode 100755
index 0000000..19c7e9a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/Kconfig b/midgard/r12p0/kernel/drivers/gpu/arm/Kconfig
new file mode 100755
index 0000000..1f30eb5
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/Kconfig
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Kbuild b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Kbuild
new file mode 100755
index 0000000..f741742
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,238 @@
+#
+# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r12p0-04rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+MALI_INSTRUMENTATION_LEVEL ?= 0
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_ipa.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_instr.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_sync.c \
+	mali_kbase_sync_user.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+# Job Scheduler Policy: Completely Fair Scheduler
+SRC += mali_kbase_js_policy_cfs.c
+
+ccflags-y += -I$(KBASE_PATH)
+
+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
+	SRC += mali_kbase_platform_fake.c
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
+		SRC += platform/vexpress/mali_kbase_config_vexpress.c \
+		platform/vexpress/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
+		SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/rtsm_ve
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
+		SRC += platform/juno/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/juno
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
+		SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+		ccflags-y += -I$(src)/platform/juno_soc
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
+		SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
+		SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
+		platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y)
+		SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \
+		platform/a7_kipling/mali_kbase_cpu_a7_kipling.c
+		ccflags-y += -I$(src)/platform/a7_kipling
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+	# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+	ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+	ifeq ($(CONFIG_MALI_MIDGARD),m)
+	include  $(src)/platform/$(platform_name)/Kbuild
+	else ifeq ($(CONFIG_MALI_MIDGARD),y)
+	obj-$(CONFIG_MALI_MIDGARD) += platform/
+	endif
+	endif
+endif # CONFIG_MALI_PLATFORM_FAKE=y
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+ifeq ($(CONFIG_MALI_MIDGARD),m)
+include  $(src)/platform/$(platform_name)/Kbuild
+else ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-$(CONFIG_MALI_MIDGARD) += platform/
+endif
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y)
+	ccflags-y += -I$(src)/platform/devicetree
+    include $(src)/platform/devicetree/Kbuild
+endif
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o
+
+ifneq ($(wildcard $(src)/internal/Kbuild),)
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+include  $(src)/internal/Kbuild
+mali_kbase-y += $(INTERNAL:.c=.o)
+endif
+endif
+
+MALI_BACKEND_PATH ?= backend
+CONFIG_MALI_BACKEND ?= gpu
+CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND)
+
+ifeq ($(MALI_MOCK_TEST),1)
+ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+endif
+
+include  $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+
+# Default to devicetree platform if neither a fake platform or a thirdparty
+# platform is configured.
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),)
+CONFIG_MALI_PLATFORM_DEVICETREE := y
+endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Kconfig b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Kconfig
new file mode 100755
index 0000000..367f0c7
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,201 @@
+#
+# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali (EXPERIMENTAL)"
+	depends on MALI_MIDGARD && !KDS
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if KDS is not present and
+	  the Linux Kernel has built in support for DMA_BUF fences.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_DEBUG_SHADER_SPLIT_FS
+	bool "Allow mapping of shader cores via sysfs"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to provide a sysfs entry for runtime configuration of shader
+	  core affinity masks.
+
+config MALI_PLATFORM_FAKE
+	bool "Enable fake platform device support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  When you start to work with the Mali Midgard series device driver the platform-specific code of
+	  the Linux kernel for your platform may not be complete. In this situation the kernel device driver
+	  supports creating the platform device outside of the Linux platform-specific code.
+	  Enable this option if would like to use a platform device configuration from within the device driver.
+
+choice
+	prompt "Platform configuration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default MALI_PLATFORM_DEVICETREE
+	help
+	  Select the SOC platform that contains a Mali Midgard GPU
+
+config MALI_PLATFORM_DEVICETREE
+	bool "Device Tree platform"
+	depends on OF
+	help
+	  Select this option to use Device Tree with the Mali driver.
+
+	  When using this option the Mali driver will get the details of the
+	  GPU hardware from the Device Tree. This means that the same driver
+	  binary can run on multiple platforms as long as all the GPU hardware
+	  details are described in the device tree.
+
+	  Device Tree is the recommended method for the Mali driver platform
+	  integration.
+
+config MALI_PLATFORM_VEXPRESS
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express"
+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express w/Virtex7 @ 40Mhz"
+config MALI_PLATFORM_GOLDFISH
+	depends on ARCH_GOLDFISH
+	bool "Android Goldfish virtual CPU"
+config MALI_PLATFORM_PBX
+	depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
+	bool "Realview PBX-A9"
+config MALI_PLATFORM_THIRDPARTY
+	bool "Third Party Platform"
+endchoice
+
+config MALI_PLATFORM_THIRDPARTY_NAME
+	depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
+	string "Third party platform name"
+	help
+	  Enter the name of a third party platform that is supported. The third part configuration
+	  file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
+	  specified here.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Makefile b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Makefile
new file mode 100755
index 0000000..e1625e6
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+KDS_PATH_RELATIVE = $(CURDIR)/../../../..
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifeq ($(MALI_BUS_LOG), 1)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# GPL driver supports KDS
+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100755
index 0000000..2bef9c2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100755
index 0000000..e38120a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,63 @@
+#
+# (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_ca_demand.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
+
+ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+	BACKEND += backend/gpu/mali_kbase_power_model_simple.c
+endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100755
index 0000000..c8ae87e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100755
index 0000000..2f3c41a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100755
index 0000000..fe98691
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100755
index 0000000..7851ea6
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100755
index 0000000..d25f84e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,399 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+#endif
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+static struct devfreq_simple_ondemand_data ondemand_data = {
+		.upthreshold = 90,
+		.downdifferential = 5,
+};
+
+#define DEV_FREQ_GOV_DATA		(&ondemand_data)
+
+static ssize_t upthreshold_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.upthreshold);
+}
+
+static ssize_t upthreshold_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.upthreshold = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(upthreshold);
+
+static ssize_t downdifferential_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.downdifferential);
+}
+
+static ssize_t downdifferential_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.downdifferential = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(downdifferential);
+#else
+#define DEV_FREQ_GOV_DATA		(NULL)
+#endif /* CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND */
+
+static ssize_t utilisation_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev->parent);
+	unsigned long total_time = 0, busy_time = 0;
+	unsigned long utilise;
+
+	kbase_pm_get_dvfs_utilisation(kbdev, &total_time, &busy_time);
+
+	if (total_time == 0) {
+		utilise = 0;
+	} else {
+		/* round up */
+		utilise = (busy_time * 100 + (total_time >> 1)) / total_time;
+	}
+	return sprintf(buf, "%ld\n", utilise);
+}
+static DEVICE_ATTR_RO(utilisation);
+
+static struct device_attribute *kbase_dev_attr[] = {
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+	&dev_attr_downdifferential,
+	&dev_attr_upthreshold,
+#endif
+	&dev_attr_utilisation
+};
+
+static int kbase_dev_add_attr(struct device *dev)
+{
+	int i, ret;
+	for (i = 0; i <  ARRAY_SIZE(kbase_dev_attr); i++) {
+		if (!kbase_dev_attr[i])
+			continue;
+		ret = device_create_file(dev, kbase_dev_attr[i]);
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_freq == freq) {
+		*target_freq = freq;
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_freq = freq;
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		memcpy(&kbdev->devfreq_cooling->last_status, stat,
+				sizeof(*stat));
+#endif
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	if (!kbdev->clock)
+		return -ENODEV;
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", DEV_FREQ_GOV_DATA);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	kbase_dev_add_attr(&kbdev->devfreq->dev);
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_power_model_simple_init(kbdev);
+	if (err && err != -ENODEV && err != -EPROBE_DEFER) {
+		dev_err(kbdev->dev,
+			"Failed to initialize simple power model (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+	if (err == -EPROBE_DEFER)
+		goto cooling_failed;
+	if (err != -ENODEV) {
+		kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+				kbdev->dev->of_node,
+				kbdev->devfreq,
+				&power_model_simple_ops);
+		if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+			err = PTR_ERR(kbdev->devfreq_cooling);
+			dev_err(kbdev->dev,
+				"Failed to register cooling device (%d)\n",
+				err);
+			goto cooling_failed;
+		}
+	} else {
+		err = 0;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100755
index 0000000..c0bf8b1
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100755
index 0000000..b9238a3
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,120 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
+	writel(value, kbdev->reg + offset);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100755
index 0000000..5b20445
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100755
index 0000000..033caf9
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	return 0;
+
+fail_interrupts:
+	kbase_hwaccess_pm_term(kbdev);
+fail_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_release_interrupts(kbdev);
+	kbase_hwaccess_pm_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100755
index 0000000..d410cd2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,105 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100755
index 0000000..4e70b34
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,550 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_RESETTING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	struct kbasep_js_device_data *js_devdata;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx);
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	js_devdata = &kbdev->js_data;
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+	/* Remember all the settings for suspend/resume */
+	if (&kbdev->hwcnt.suspended_state != setup)
+		memcpy(&kbdev->hwcnt.suspended_state, setup,
+					sizeof(kbdev->hwcnt.suspended_state));
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING ||
+			kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_RESETTING &&
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING));
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+	/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset,
+	 * and the instrumentation state hasn't been restored yet -
+	 * kbasep_reset_timeout_worker() will do the rest of the work */
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+		/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a
+		 * reset, and the instrumentation state hasn't been restored yet
+		 * - kbasep_reset_timeout_worker() will do the rest of the work
+		 */
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100755
index 0000000..23bd80a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,62 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* kbasep_reset_timeout_worker() has started (but not compelted) a
+	 * reset. This generally indicates the current action should be aborted,
+	 * and kbasep_reset_timeout_worker() will handle the cleanup */
+	KBASE_INSTR_STATE_RESETTING,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100755
index 0000000..e96aeae
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100755
index 0000000..8781561
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100755
index 0000000..b891b12
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,467 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100755
index 0000000..f216788
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,385 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = current_as->number;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&current_as->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Assign addr space */
+	kctx->as_nr = as_nr;
+
+	/* If the GPU is currently powered, activate this address space on the
+	 * MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_update(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	/* Book-keeping */
+	js_per_as_data->kctx = kctx;
+	js_per_as_data->as_busy_refcount = 0;
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+/**
+ * release_addr_space - Release an address space
+ * @kbdev: Kbase device
+ * @kctx_as_nr: Address space of context to release
+ * @kctx: Context being released
+ *
+ * Context: kbasep_js_device_data.runpool_mutex must be held
+ *
+ * Release an address space, making it available for being picked again.
+ */
+static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u16 as_bit = (1u << kctx_as_nr);
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* The address space must not already be free */
+	KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
+
+	js_devdata->as_free |= as_bit;
+
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+				&kbdev->js_data.runpool_irq.per_as_data[i];
+
+		if (js_per_as_data->kctx == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
+	if (js_per_as_data->as_busy_refcount != 0) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	/* Release context from address space */
+	js_per_as_data->kctx = NULL;
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+	/* If the GPU is currently powered, de-activate this address space on
+	 * the MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_disable(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	release_addr_space(kbdev, as_nr, kctx);
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+								int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	js_devdata->as_free |= (1 << as_nr);
+}
+
+/**
+ * check_is_runpool_full - check whether the runpool is full for a specified
+ * context
+ * @kbdev: Kbase device
+ * @kctx:  Kbase context
+ *
+ * If kctx == NULL, then this makes the least restrictive check on the
+ * runpool. A specific context that is supplied immediately after could fail
+ * the check, even under the same conditions.
+ *
+ * Therefore, once a context is obtained you \b must re-check it with this
+ * function, since the return value could change to false.
+ *
+ * Context:
+ *   In all cases, the caller must hold kbasep_js_device_data.runpool_mutex.
+ *   When kctx != NULL the caller must hold the
+ *   kbasep_js_kctx_info.ctx.jsctx_mutex.
+ *   When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but
+ *   it doesn't do any harm to do so).
+ *
+ * Return: true if the runpool is full
+ */
+static bool check_is_runpool_full(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool is_runpool_full;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Regardless of whether a context is submitting or not, can't have more
+	 * than there are HW address spaces */
+	is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
+						kbdev->nr_hw_address_spaces);
+
+	if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags &
+					KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		/* Contexts that submit might use less of the address spaces
+		 * available, due to HW workarounds.  In which case, the runpool
+		 * is also full when the number of submitting contexts exceeds
+		 * the number of submittable address spaces.
+		 *
+		 * Both checks must be made: can have nr_user_address_spaces ==
+		 * nr_hw_address spaces, and at the same time can have
+		 * nr_user_contexts_running < nr_all_contexts_running. */
+		is_runpool_full |= (bool)
+					(js_devdata->nr_user_contexts_running >=
+						kbdev->nr_user_address_spaces);
+	}
+
+	return is_runpool_full;
+}
+
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* First try to find a free address space */
+	if (check_is_runpool_full(kbdev, kctx))
+		i = -1;
+	else
+		i = ffs(js_devdata->as_free) - 1;
+
+	if (i >= 0 && i < kbdev->nr_hw_address_spaces) {
+		js_devdata->as_free &= ~(1 << i);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return i;
+	}
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* No address space currently free, see if we can release one */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i];
+		as_kctx = js_per_as_data->kctx;
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running */
+		if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED) &&
+			js_per_as_data->as_busy_refcount == 0) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!as_js_kctx_info->ctx.is_scheduled) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				js_devdata->as_free &= ~(1 << i);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if (kbdev->hwaccess.active_kctx == kctx ||
+	    kctx->as_nr != KBASEP_AS_NR_INVALID ||
+	    as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Invalid parameters to use_ctx()\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&new_address_space->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100755
index 0000000..8ccc440
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,119 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ *
+ * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100755
index 0000000..6577175
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1617 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+#ifndef CONFIG_MALI_COH_GPU
+	cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+	cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+#endif
+
+	cfg |= JS_CONFIG_START_MMU;
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_SECURE))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 * In such cases, we'll try to make a better approximation in the IRQ
+	 * handler (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	kbase_tlstream_tl_attrib_atom_config(katom, jc_head,
+			katom->affinity, cfg);
+	kbase_tlstream_tl_ret_ctx_lpu(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	kbase_tlstream_tl_ret_atom_as(katom, &kbdev->as[kctx->as_nr]);
+	kbase_tlstream_tl_ret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the %KBASE_JS_IRQ_THROTTLE_TIME_US and
+ * the time the job was submitted, to work out the best estimate (which might
+ * still result in an over-estimate to the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t new_timestamp;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		/* Account for any IRQ Throttle time - makes an overestimate of
+		 * the time spent by the job */
+		new_timestamp = ktime_sub_ns(end_timestamp,
+					KBASE_JS_IRQ_THROTTLE_TIME_US * 1000);
+		timestamp_diff = ktime_sub(new_timestamp,
+							katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = new_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint
+ * @kbdev: kbase device
+ * @i: job slot
+ *
+ * Get kbase atom by calling kbase_gpu_inspect for given job slot.
+ * Then use obtained katom and name of slot associated with the given
+ * job slot number in tracepoint call to the instrumentation module
+ * informing that given atom is no longer executed on given lpu (job slot).
+ */
+static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0);
+
+	kbase_tlstream_tl_nret_atom_lpu(katom,
+		&kbdev->gpu_props.props.raw_props.js_features[i]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0,
+					sizeof(kbdev->slot_submit_count_irq));
+
+	/* write irq throttle register, this will prevent irqs from occurring
+	 * until the given number of gpu clock cycles have passed */
+	{
+		int irq_throttle_cycles =
+				atomic_read(&kbdev->irq_throttle_cycles);
+
+		kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE),
+						irq_throttle_cycles, NULL);
+	}
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbase_tlstream_aux_job_softstop(i);
+
+					kbasep_trace_tl_nret_atom_lpu(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_secure(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+								u16 core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					u16 core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		kbase_tlstream_aux_issue_job_softstop(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if (katom->kctx != kctx)
+			continue;
+
+		if (katom->sched_priority > priority)
+			kbase_job_slot_softstop(kbdev, js, katom);
+	}
+}
+
+struct zap_reset_data {
+	/* The stages are:
+	 * 1. The timer has never been called
+	 * 2. The zap has timed out, all slots are soft-stopped - the GPU reset
+	 *    will happen. The GPU has been reset when
+	 *    kbdev->hwaccess.backend.reset_waitq is signalled
+	 *
+	 * (-1 - The timer has been cancelled)
+	 */
+	int stage;
+	struct kbase_device *kbdev;
+	struct hrtimer timer;
+	spinlock_t lock; /* protects updates to stage member */
+};
+
+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer)
+{
+	struct zap_reset_data *reset_data = container_of(timer,
+						struct zap_reset_data, timer);
+	struct kbase_device *kbdev = reset_data->kbdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&reset_data->lock, flags);
+
+	if (reset_data->stage == -1)
+		goto out;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+								ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	reset_data->stage = 2;
+
+ out:
+	spin_unlock_irqrestore(&reset_data->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct zap_reset_data reset_data;
+	unsigned long flags;
+
+	hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	reset_data.timer.function = zap_timeout_callback;
+
+	spin_lock_init(&reset_data.lock);
+
+	reset_data.kbdev = kbdev;
+	reset_data.stage = 1;
+
+	hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for all jobs to finish, and for the context to be not-scheduled
+	 * (due to kbase_job_zap_context(), we also guarentee it's not in the JS
+	 * policy queue either */
+	wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
+	wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			kctx->jctx.sched_info.ctx.is_scheduled == false);
+
+	spin_lock_irqsave(&reset_data.lock, flags);
+	if (reset_data.stage == 1) {
+		/* The timer hasn't run yet - so cancel it */
+		reset_data.stage = -1;
+	}
+	spin_unlock_irqrestore(&reset_data.lock, flags);
+
+	hrtimer_cancel(&reset_data.timer);
+
+	if (reset_data.stage == 2) {
+		/* The reset has already started.
+		 * Wait for the reset to complete
+		 */
+		wait_event(kbdev->hwaccess.backend.reset_wait,
+				atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+						== KBASE_RESET_GPU_NOT_PENDING);
+	}
+	destroy_hrtimer_on_stack(&reset_data.timer);
+
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	KBASE_DEBUG_ASSERT(0 ==
+		object_is_on_stack(&kbdev->hwaccess.backend.reset_work));
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * runpool_irq lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		u16 core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+}
+
+static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				struct kbase_uk_hwcnt_setup *hwcnt_setup)
+{
+	hwcnt_setup->dump_buffer =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) &
+								0xffffffff;
+	hwcnt_setup->dump_buffer |= (u64)
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) <<
+									32;
+	hwcnt_setup->jm_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx);
+	hwcnt_setup->shader_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx);
+	hwcnt_setup->tiler_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx);
+	hwcnt_setup->mmu_l2_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx);
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags, mmu_flags;
+	struct kbase_device *kbdev;
+	int i;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_uk_hwcnt_setup hwcnt_setup = { {0} };
+	enum kbase_instr_state bckp_state;
+	bool try_schedule = false;
+	bool restore_hwc = false;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	kbase_pm_disable_interrupts(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* the same interrupt handler preempted itself */
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	/* Save the HW counters setup */
+	if (kbdev->hwcnt.kctx != NULL) {
+		struct kbase_context *kctx = kbdev->hwcnt.kctx;
+
+		if (kctx->jctx.sched_info.ctx.is_scheduled) {
+			kbasep_save_hwcnt_setup(kbdev, kctx, &hwcnt_setup);
+
+			restore_hwc = true;
+		}
+	}
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	kbase_debug_dump_registers(kbdev);
+
+	bckp_state = kbdev->hwcnt.backend.state;
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		mutex_lock(&as->transaction_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&as->transaction_mutex);
+	}
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Restore the HW counters setup */
+	if (restore_hwc) {
+		struct kbase_context *kctx = kbdev->hwcnt.kctx;
+		u32 prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+#endif
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+				prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+				hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+				hwcnt_setup.dump_buffer >> 32,        kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+				hwcnt_setup.jm_bm,                    kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+				hwcnt_setup.shader_bm,                kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+				hwcnt_setup.mmu_l2_bm,                kctx);
+
+		/* Due to PRLAM-8186 we need to disable the Tiler before we
+		 * enable the HW counter dump. */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+						0, kctx);
+		else
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+						hwcnt_setup.tiler_bm, kctx);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+				prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL,
+				kctx);
+
+		/* If HW has PRLAM-8186 we can now re-enable the tiler HW
+		 * counters dump */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+						hwcnt_setup.tiler_bm, kctx);
+	}
+	kbdev->hwcnt.backend.state = bckp_state;
+	switch (kbdev->hwcnt.backend.state) {
+	/* Cases for waking kbasep_cache_clean_worker worker */
+	case KBASE_INSTR_STATE_CLEANED:
+		/* Cache-clean IRQ occurred, but we reset:
+		 * Wakeup incase the waiter saw RESETTING */
+	case KBASE_INSTR_STATE_REQUEST_CLEAN:
+		/* After a clean was requested, but before the regs were
+		 * written:
+		 * Wakeup incase the waiter saw RESETTING */
+		wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		break;
+	case KBASE_INSTR_STATE_CLEANING:
+		/* Either:
+		 * 1) We've not got the Cache-clean IRQ yet: it was lost, or:
+		 * 2) We got it whilst resetting: it was voluntarily lost
+		 *
+		 * So, move to the next state and wakeup: */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+		wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		break;
+
+	/* Cases for waking anyone else */
+	case KBASE_INSTR_STATE_DUMPING:
+		/* If dumping, abort the dump, because we may've lost the IRQ */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+		break;
+	case KBASE_INSTR_STATE_DISABLED:
+	case KBASE_INSTR_STATE_IDLE:
+	case KBASE_INSTR_STATE_FAULT:
+		/* Every other reason: wakeup in that state */
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+		break;
+
+	/* Unhandled cases */
+	case KBASE_INSTR_STATE_RESETTING:
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Resume the vinstr core */
+	kbase_vinstr_hwc_resume(kbdev->vinstr_ctx);
+
+	/* Note: counter dumping may now resume */
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	kbase_pm_context_idle(kbdev);
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100755
index 0000000..eb068d4
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string)
+{
+	sprintf(js_string, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					u16 core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100755
index 0000000..af6cddc
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1506 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * runpool_irq.lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev)
+{
+	return kbdev->secure_mode;
+}
+
+static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	WARN_ONCE(!kbdev->secure_ops,
+			"Cannot enable secure mode: secure callbacks not specified.\n");
+
+	if (kbdev->secure_ops) {
+		/* Switch GPU to secure mode */
+		err = kbdev->secure_ops->secure_mode_enable(kbdev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err);
+		else
+			kbdev->secure_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	WARN_ONCE(!kbdev->secure_ops,
+			"Cannot disable secure mode: secure callbacks not specified.\n");
+
+	if (kbdev->secure_ops) {
+		/* Switch GPU to non-secure mode */
+		err = kbdev->secure_ops->secure_mode_disable(kbdev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err);
+		else
+			kbdev->secure_mode = false;
+	}
+
+	return err;
+}
+
+void kbase_gpu_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
+				/* Only submit if head atom or previous atom
+				 * already submitted */
+				if (idx == 1 &&
+					(katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
+					katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+					break;
+
+				if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) {
+					int err = 0;
+
+					/* Not in correct mode, take action */
+					if (kbase_gpu_atoms_submitted_any(kbdev)) {
+						/*
+						 * We are not in the correct
+						 * GPU mode for this job, and
+						 * we can't switch now because
+						 * there are jobs already
+						 * running.
+						 */
+						break;
+					}
+
+					/* No jobs running, so we can switch GPU mode right now */
+					if (kbase_jd_katom_is_secure(katom[idx])) {
+						err = kbase_gpu_secure_mode_enable(kbdev);
+					} else {
+						err = kbase_gpu_secure_mode_disable(kbdev);
+					}
+
+					if (err) {
+						/* Failed to switch secure mode, fail atom */
+						katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+						kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+						/* Only return if head atom or previous atom
+						 * already removed - as atoms must be returned
+						 * in order */
+						if (idx == 0 || katom[0]->gpu_rb_state ==
+								KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+							kbase_gpu_dequeue_atom(kbdev, js, NULL);
+							kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+						}
+						break;
+					}
+				}
+
+				/* Secure mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev),
+					"Secure mode of atom (%d) doesn't match secure mode of GPU (%d)",
+					kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev));
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+				/* Only submit if head atom or previous atom
+				 * already submitted */
+				if (idx == 1 &&
+					(katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+					katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_gpu_slot_update(kbdev);
+}
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx) {
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+				katom_idx0->gpu_rb_state !=
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					katom_idx1->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+								js_string),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+								js_string),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_gpu_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < 2; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js, 0);
+
+			if (katom) {
+				kbase_gpu_release_atom(kbdev, katom, NULL);
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+				kbase_jm_complete(kbdev, katom, end_timestamp);
+			}
+		}
+	}
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+					(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+					(!kctx || katom_idx1->kctx == kctx));
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				kbasep_js_clear_submit_allowed(js_devdata,
+							katom_idx1->kctx);
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			kbasep_js_clear_submit_allowed(js_devdata,
+							katom_idx0->kctx);
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_job_evicted(katom_idx1);
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_job_evicted(katom_idx1);
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+
+	kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	if (katom->need_cache_flush_cores_retained) {
+		kbase_gpu_cacheclean(kbdev, katom);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->affinity = 0;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
+			coreref_state);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100755
index 0000000..102d94b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,87 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:         Device pointer
+ * @js:            Job slot to evict from
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_gpu_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100755
index 0000000..5459281
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,299 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		/* Tiler only job, bit 0 needed to enable tiler but no shader
+		 * cores required */
+		*affinity = 1;
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required */
+	if (core_req & BASE_JD_REQ_T)
+		*affinity = *affinity | 1;
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100755
index 0000000..3026e6a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,138 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+/* Import the external affinity mask variables */
+extern u64 mali_js0_affinity_mask;
+extern u64 mali_js1_affinity_mask;
+extern u64 mali_js2_affinity_mask;
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold kbasep_js_device_data.runpool_irq.lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - kbasep_js_device_data.runpool_irq.lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100755
index 0000000..1e9a7e4
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,337 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is
+ * enabled for a particular level is down to the integrator. However this is
+ * being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->sched_info.cfs.ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * runpool_irq lock, so it's OK to
+					 * observe the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CINSTR_DUMPING_ENABLED */
+				/* NOTE: During CINSTR_DUMPING_ENABLED, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CINSTR_DUMPING_ENABLED, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CINSTR_DUMPING_ENABLED */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100755
index 0000000..3f53779
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100755
index 0000000..c6c7b89
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,365 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_mmu_hw_direct.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		as->fault_extra_addr = kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+				kctx);
+		as->fault_extra_addr <<= 32;
+		as->fault_extra_addr |= kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+				kctx);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock,
+				flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+	/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+	/* Clear PTW_MEMATTR bits */
+	transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+	/* Enable correct PTW_MEMATTR bits */
+	transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+		/* Clear PTW_SH bits */
+		transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+		/* Enable correct PTW_SH bits */
+		transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+			transcfg, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+			(current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx);
+
+#else /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_tlstream_tl_attrib_as_config(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100755
index 0000000..c02253c
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100755
index 0000000..0614348
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100755
index 0000000..f9d244b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100755
index 0000000..5805efe
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,387 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+	struct kbase_pm_callback_conf *callbacks;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+	} else {
+		kbdev->pm.backend.callback_power_on = NULL;
+		kbdev->pm.backend.callback_power_off = NULL;
+		kbdev->pm.backend.callback_power_suspend = NULL;
+		kbdev->pm.backend.callback_power_resume = NULL;
+		kbdev->pm.callback_power_runtime_init = NULL;
+		kbdev->pm.callback_power_runtime_term = NULL;
+		kbdev->pm.backend.callback_power_runtime_on = NULL;
+		kbdev->pm.backend.callback_power_runtime_off = NULL;
+		kbdev->pm.backend.callback_power_runtime_idle = NULL;
+	}
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.power_change_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+	bool cores_are_available;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Force all cores off */
+	kbdev->pm.backend.desired_shader_state = 0;
+
+	/* Force all cores to be unavailable, in the situation where
+	 * transitions are in progress for some cores but not others,
+	 * and kbase_pm_check_transitions_nolock can not immediately
+	 * power off the cores */
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_available_bitmap = 0;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* NOTE: We won't wait to reach the core's desired state, even if we're
+	 * powering off the GPU itself too. It's safe to cut the power whilst
+	 * they're transitioning to off, because the cores should be idle and
+	 * all cache flushes should already have occurred */
+
+	/* Consume any change-state events */
+	kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	/* Disable interrupts and turn the clock off */
+	return kbase_pm_clock_off(kbdev, is_suspend);
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_read_present_cores(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	if (!kbase_pm_do_poweroff(kbdev, false)) {
+		/* Page/bus faults are pending, must drop pm.lock to process.
+		 * Interrupts are disabled so no more faults should be
+		 * generated at this point */
+		mutex_unlock(&kbdev->pm.lock);
+		kbase_flush_mmu_wqs(kbdev);
+		mutex_lock(&kbdev->pm.lock);
+		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+	}
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_gpu_slot_update(kbdev);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+	}
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	if (!kbase_pm_do_poweroff(kbdev, true)) {
+		/* Page/bus faults are pending, must drop pm.lock to process.
+		 * Interrupts are disabled so no more faults should be
+		 * generated at this point */
+		mutex_unlock(&kbdev->pm.lock);
+		kbase_flush_mmu_wqs(kbdev);
+		mutex_lock(&kbdev->pm.lock);
+		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+	}
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100755
index 0000000..4eada33
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,183 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_demand_policy_ops,
+	&kbase_pm_ca_random_policy_ops,
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100755
index 0000000..ee9e751
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
new file mode 100644
index 0000000..19f1f73
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
@@ -0,0 +1,246 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation.
+ *
+ * This policy periodically selects a new core mask at demand. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/printk.h>
+
+#ifndef MALI_CA_TIMER
+static struct workqueue_struct *mali_ca_wq = NULL;
+#endif
+static int num_shader_cores_to_be_enabled = 0;
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+#ifndef MALI_CA_TIMER
+	struct kbase_device *kbdev = (struct kbase_device *)priv;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+#endif
+
+    num_shader_cores_to_be_enabled = cores -1;
+    printk("pp num=%d\n", num_shader_cores_to_be_enabled);
+    if (num_shader_cores_to_be_enabled < 1)
+        num_shader_cores_to_be_enabled = 1;
+
+#ifndef MALI_CA_TIMER
+    queue_work(mali_ca_wq, &data->wq_work);
+#endif
+
+    return 0;
+}
+
+#ifdef MALI_CA_TIMER
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void demand_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+#if 0
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+#endif
+    data->cores_desired = num_shader_cores_to_be_enabled;
+
+	if (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+            dev_info(kbdev->dev, "error, ca demand policy : new core mask=%llX\n",
+				data->cores_desired);
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "ca demand policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(demand_timer_callback);
+#else
+static void do_ca_scaling(struct work_struct *work)
+{
+    //unsigned long flags;
+	struct kbase_device *kbdev = = container_of(work,
+            struct kbasep_pm_ca_policy_demand, wq_work);
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+    data->cores_desired =  num_shader_cores_to_be_enabled;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+
+}
+#endif
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+#ifdef MALI_CA_TIMER
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = demand_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+#else
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_ca_wq = alloc_workqueue("gpu_ca_wq", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_ca_wq = create_workqueue("gpu_ca_wq");
+#endif
+	INIT_WORK(&data->wq_work, do_ca_scaling);
+    if (mali_ca_wq == NULL) printk("Unable to create gpu scaling workqueue\n");
+#endif
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+#ifdef MALI_CA_TIMER
+	del_timer(&data->core_change_timer);
+#else
+    if (mali_ca_wq) {
+        flush_workqueue(mali_ca_wq);
+        destroy_workqueue(mali_ca_wq);
+        mali_ca_wq = NULL;
+    }
+#endif
+}
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.demand.cores_enabled;
+}
+
+static void demand_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the demand core availability
+ * policy.
+ *
+ * This is the static structure that defines the demand core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_demand_policy_ops);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
new file mode 100644
index 0000000..a3dfe2a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEMAND_H
+#define MALI_KBASE_PM_CA_DEMAND_H
+#include <linux/workqueue.h>
+/**
+ * struct kbasep_pm_ca_policy_demand - Private structure for demand ca policy
+ *
+ * This contains data that is private to the demand core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+#define MALI_CA_TIMER 1
+struct kbasep_pm_ca_policy_demand {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+#ifdef MALI_CA_TIMER
+	struct timer_list core_change_timer;
+#else
+	struct work_struct wq_work;
+#endif
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops;
+extern int mali_perf_set_num_pp_cores(int cores);
+
+#endif /* MALI_KBASE_PM_CA_DEMAND_H */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100755
index 0000000..1db6586
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100755
index 0000000..a763155
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
new file mode 100644
index 0000000..0d1b220
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation.
+ *
+ * This policy periodically selects a new core mask at random. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void random_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(random_timer_callback);
+
+static void random_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = random_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+}
+
+static void random_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	del_timer(&data->core_change_timer);
+}
+
+static u64 random_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.random.cores_enabled;
+}
+
+static void random_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the random core availability
+ * policy.
+ *
+ * This is the static structure that defines the random core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops = {
+	"random",			/* name */
+	random_init,			/* init */
+	random_term,			/* term */
+	random_get_core_mask,		/* get_core_mask */
+	random_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_RANDOM,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_random_policy_ops);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
new file mode 100644
index 0000000..a8c9b15
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_RANDOM_H
+#define MALI_KBASE_PM_CA_RANDOM_H
+
+/**
+ * struct kbasep_pm_ca_policy_random - Private structure for random ca policy
+ *
+ * This contains data that is private to the random core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+struct kbasep_pm_ca_policy_random {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+	struct timer_list core_change_timer;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_RANDOM_H */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100755
index 0000000..4873911
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap))
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100755
index 0000000..749d305
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100755
index 0000000..3eaf1a7
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,486 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#include "mali_kbase_pm_ca_demand.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
+ *                                 management framework.
+ *
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+	struct kbasep_pm_ca_policy_demand demand;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_data metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_CA_POLICY_ID_RANDOM,
+	KBASE_PM_CA_POLICY_ID_DEMAND,
+#endif
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100755
index 0000000..9dac230
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap))
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100755
index 0000000..c0c84b6
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100755
index 0000000..d0fa253
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1546 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/* Special value to indicate that the JM_CONFIG reg isn't currently used. */
+#define KBASE_JM_CONFIG_UNUSED (1<<31)
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		kbase_tlstream_aux_pm_state(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_invoke);
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_present_cores(), kbase_pm_get_active_cores(),
+ * kbase_pm_get_trans_cores() and kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev)
+{
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores);
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+	}
+	KBASE_DEBUG_ASSERT(0);
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state_in_use == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+	//kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+	u64 cores_powered;
+	u64 tiler_available_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered);
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state) {
+		desired_l2_state |= 1;
+		/* Also enable tiler if l2 cache is powered */
+		kbdev->pm.backend.desired_tiler_state =
+			kbdev->gpu_props.props.raw_props.tiler_present;
+	} else {
+		kbdev->pm.backend.desired_tiler_state = 0;
+	}
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_L2, desired_l2_state, 0,
+				&l2_available_bitmap,
+				&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#endif
+
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO),
+					NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO), NULL));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO), NULL));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	} else {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		mutex_lock(&as->transaction_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&as->transaction_mutex);
+	}
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x760 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x760 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+	/* Limit read ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+	kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+	/* Limit write ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+	kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+		/* The JM_CONFIG register is specified as follows in the
+		 T86x/T88x Engineering Specification Supplement:
+		 The values are read from device tree in order.
+		*/
+#define TIMESTAMP_OVERRIDE  1
+#define CLOCK_GATE_OVERRIDE (1<<1)
+#define JOB_THROTTLE_ENABLE (1<<2)
+#define JOB_THROTTLE_LIMIT_SHIFT 3
+
+		/* 6 bits in the register */
+		const u32 jm_max_limit = 0x3F;
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = jm_max_limit; /* Max value */
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > jm_max_limit) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = jm_max_limit;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm = (jm_values[0] ? TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ? CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ? JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JOB_THROTTLE_LIMIT_SHIFT);
+	} else {
+		kbdev->hw_quirks_jm = KBASE_JM_CONFIG_UNUSED;
+	}
+
+
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	if (kbdev->hw_quirks_sc)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+				kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+
+
+	if (kbdev->hw_quirks_jm != KBASE_JM_CONFIG_UNUSED)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+				kbdev->hw_quirks_jm, NULL);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE) &&
+		kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	struct kbasep_reset_timeout_data rtdata;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_jd_gpu_soft_reset(kbdev);
+#endif
+#if 0
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET, NULL);
+#else
+  //dev_err(kbdev->dev, "%s,  %d \n", __FILE__, __LINE__);
+  core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+  l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+  tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+  //printk("core_ready=%ld, l2_ready=%ld, tiler_ready=%ld\n", core_ready, l2_ready, tiler_ready);
+  if (core_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+  if (l2_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+  if (tiler_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+  /* do external reset */
+
+//JOHNT
+#if 0
+    // Level reset mail
+    Wr(P_RESET0_MASK, ~(0x1<<20));
+    Wr(P_RESET0_LEVEL, ~(0x1<<20));
+
+    // Level reset mail
+    Wr(P_RESET2_MASK, ~(0x1<<14));
+    Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    Wr(P_RESET2_LEVEL, 0xffffffff);
+    Wr(P_RESET0_LEVEL, 0xffffffff);
+#else
+    if (reg_base_hiubus) {
+        value = Rd(RESET0_MASK);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_MASK, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+    ///////////////
+        value = Rd(RESET2_MASK);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_MASK, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value | ((0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value | ((0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+    } else {
+		dev_err(kbdev->dev, "reg_base_hiubus is null !!!\n");
+    }
+#endif
+  //writel(..../*e.g. PWR_RESET, EXTERNAL_PWR_REGISTER*/);
+
+  /* any other action necessary, like a simple delay */
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+#endif
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		goto out;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		goto out;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	/* The GPU still hasn't reset, give up */
+	return -EINVAL;
+
+out:
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		kbdev->secure_mode = (gpu_status &
+				GPU_STATUS_PROTECTED_MODE_ACTIVE) != 0;
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		kbase_pm_release_l2_caches(kbdev);
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+	return 0;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100755
index 0000000..477bea7
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,566 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action);
+
+/**
+ * kbasep_pm_read_present_cores - Read the bitmasks of present cores.
+ *
+ * This information is cached to avoid having to perform register reads whenever
+ * the information is required.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the
+ * last caller then the GPU cycle counters will be disabled. A request must have
+ * been made before a call to this.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * Access to registers should be done using kbase_os_reg_read()/write() at this
+ * stage, not kbase_reg_read()/write().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ * Return:
+ *         true      if power was turned off, else
+ *         false     if power can not be turned off due to pending page/bus
+ *                   fault workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#ifdef CONFIG_PM_DEVFREQ
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold runpool_irq.lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100755
index 0000000..ae63256
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,400 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+					int *util_gl_share,
+					int util_cl_share[2],
+					ktime_t now)
+{
+	int utilisation;
+	int busy;
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	return utilisation;
+}
+
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	ktime_t now;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	now = ktime_get();
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+			util_cl_share, now);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				WARN_ON(device_nr >= 2);
+				kbdev->pm.backend.metrics.active_cl_ctx[
+						device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				WARN_ON(js >= 2);
+				kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100755
index 0000000..f83deee
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,984 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state
+			|| kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		if (!kbase_pm_do_poweroff(kbdev, false)) {
+			/* GPU can not be powered off at present */
+			kbdev->pm.backend.poweroff_timer_needed = true;
+			kbdev->pm.backend.poweroff_timer_running = true;
+			hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
+					kbdev->pm.gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&pm->power_change_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&pm->power_change_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		spin_unlock_irqrestore(&pm->power_change_lock, flags);
+
+		/* Power on the GPU and any cores requested by the policy */
+		kbase_pm_do_poweron(kbdev, false);
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&pm->power_change_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&pm->power_change_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				if (!kbase_pm_do_poweroff(kbdev, false)) {
+					/* GPU can not be powered off at present
+					 */
+					spin_lock_irqsave(
+							&pm->power_change_lock,
+							flags);
+					backend->poweroff_timer_needed = true;
+					if (!backend->poweroff_timer_running) {
+						backend->poweroff_timer_running
+								= true;
+						hrtimer_start(
+						&backend->gpu_poweroff_timer,
+							pm->gpu_poweroff_time,
+							HRTIMER_MODE_REL);
+					}
+					spin_unlock_irqrestore(
+							&pm->power_change_lock,
+							flags);
+				}
+			}
+		} else {
+			spin_unlock_irqrestore(&pm->power_change_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+
+	desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+	desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+	/* Enable core 0 if tiler required, regardless of core availability */
+	if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+		desired_bitmap |= 1;
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+				kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending != 0) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		if (kbdev->pm.backend.shader_poweroff_pending == 0)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+int set_policy_by_name(struct kbase_device *kbdev, const char *name)
+{
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, name)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		printk("power_policy: policy not found\n");
+		return -EINVAL;
+	}
+	trace_printk("policy name=%s\n", name);
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(set_policy_by_name);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		return KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbdev->l2_users_count++;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100755
index 0000000..611a90e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
new file mode 100644
index 0000000..cd4f0a2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
@@ -0,0 +1,160 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+#include <linux/of.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms.
+ */
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static u32 dynamic_coefficient;
+static u32 static_coefficient;
+static s32 ts[4];
+static struct thermal_zone_device *gpu_tz;
+
+static unsigned long model_static_power(unsigned long voltage)
+{
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	if (gpu_tz) {
+		int ret;
+
+		ret = gpu_tz->ops->get_temp(gpu_tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	} else {
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(ts[3] * temp_cubed)
+			+ (ts[2] * temp_squared)
+			+ (ts[1] * temp)
+			+ ts[0];
+
+	return (((static_coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+}
+
+static unsigned long model_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+
+	return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+
+struct devfreq_cooling_ops power_model_simple_ops = {
+	.get_static_power = model_static_power,
+	.get_dynamic_power = model_dynamic_power,
+};
+
+int kbase_power_model_simple_init(struct kbase_device *kbdev)
+{
+	struct device_node *power_model_node;
+	const char *tz_name;
+	u32 static_power, dynamic_power;
+	u32 voltage, voltage_squared, voltage_cubed, frequency;
+
+	power_model_node = of_get_child_by_name(kbdev->dev->of_node,
+			"power_model");
+	if (!power_model_node) {
+		dev_err(kbdev->dev, "could not find power_model node\n");
+		return -ENODEV;
+	}
+	if (!of_device_is_compatible(power_model_node,
+			"arm,mali-simple-power-model")) {
+		dev_err(kbdev->dev, "power_model incompatible with simple power model\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_string(power_model_node, "thermal-zone",
+			&tz_name)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	gpu_tz = thermal_zone_get_zone_by_name(tz_name);
+	if (IS_ERR(gpu_tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(gpu_tz));
+		gpu_tz = NULL;
+
+		return -EPROBE_DEFER;
+	}
+
+	if (of_property_read_u32(power_model_node, "static-power",
+			&static_power)) {
+		dev_err(kbdev->dev, "static-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "dynamic-power",
+			&dynamic_power)) {
+		dev_err(kbdev->dev, "dynamic-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "voltage",
+			&voltage)) {
+		dev_err(kbdev->dev, "voltage in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "frequency",
+			&frequency)) {
+		dev_err(kbdev->dev, "frequency in power_model not available\n");
+		return -EINVAL;
+	}
+	voltage_squared = (voltage * voltage) / 1000;
+	voltage_cubed = voltage * voltage * voltage;
+	static_coefficient = (static_power << 20) / (voltage_cubed >> 10);
+	dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared)
+			* 1000) / frequency;
+
+	if (of_property_read_u32_array(power_model_node, "ts", ts, 4)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
new file mode 100644
index 0000000..d20de1e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_POWER_MODEL_SIMPLE_H_
+#define _BASE_POWER_MODEL_SIMPLE_H_
+
+/**
+ * kbase_power_model_simple_init - Initialise the simple power model
+ * @kbdev: Device pointer
+ *
+ * The simple power model estimates power based on current voltage, temperature,
+ * and coefficients read from device tree. It does not take utilization into
+ * account.
+ *
+ * The power model requires coefficients from the power_model node in device
+ * tree. The absence of this node will prevent the model from functioning, but
+ * should not prevent the rest of the driver from running.
+ *
+ * Return: 0 on success
+ *         -ENOSYS if the power_model node is not present in device tree
+ *         -EPROBE_DEFER if the thermal zone specified in device tree is not
+ *         currently available
+ *         Any other negative value on failure
+ */
+int kbase_power_model_simple_init(struct kbase_device *kbdev);
+
+extern struct devfreq_cooling_ops power_model_simple_ops;
+
+#endif /* _BASE_POWER_MODEL_SIMPLE_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100755
index 0000000..4bcde85
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100755
index 0000000..35088ab
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100755
index 0000000..35ff2f1
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100755
index 0000000..7ae05c2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100755
index 0000000..159b993
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100755
index 0000000..a6e08da
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,191 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100755
index 0000000..55e5e5a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,945 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+
+
+
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100755
index 0000000..873cd92
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1751 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+#ifndef __user
+#define __user
+#endif
+
+/* Support UK6 IOCTLS */
+#define BASE_LEGACY_UK6_SUPPORT 1
+
+/* Support UK7 IOCTLS */
+/* NB: To support UK6 we also need to support UK7 */
+#define BASE_LEGACY_UK7_SUPPORT 1
+
+/* Support UK8 IOCTLS */
+#define BASE_LEGACY_UK8_SUPPORT 1
+
+/* Support UK9 IOCTLS */
+#define BASE_LEGACY_UK9_SUPPORT 1
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+#define BASEP_JD_SEM_PER_WORD_LOG2      5
+#define BASEP_JD_SEM_PER_WORD           (1 << BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_WORD_NR(x)         ((x) >> BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_MASK_IN_WORD(x)    (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
+#define BASEP_JD_SEM_ARRAY_SIZE         BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union kbase_pointer {
+	void __user *value;	  /**< client should store their pointers here */
+	u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	u64 sizer;	  /**< Force 64-bit storage for all clients regardless */
+} kbase_pointer;
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD),
+ * which defines a @a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see ::BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * See ::base_mem_alloc_flags.
+ *
+ */
+enum {
+/* IN */
+	BASE_MEM_PROT_CPU_RD = (1U << 0),      /**< Read access CPU side */
+	BASE_MEM_PROT_CPU_WR = (1U << 1),      /**< Write access CPU side */
+	BASE_MEM_PROT_GPU_RD = (1U << 2),      /**< Read access GPU side */
+	BASE_MEM_PROT_GPU_WR = (1U << 3),      /**< Write access GPU side */
+	BASE_MEM_PROT_GPU_EX = (1U << 4),      /**< Execute allowed on the GPU
+						    side */
+
+	/* BASE_MEM_HINT flags have been removed, but their values are reserved
+	 * for backwards compatibility with older user-space drivers. The values
+	 * can be re-used once support for r5p0 user-space drivers is removed,
+	 * presumably in r7p0.
+	 *
+	 * RESERVED: (1U << 5)
+	 * RESERVED: (1U << 6)
+	 * RESERVED: (1U << 7)
+	 * RESERVED: (1U << 8)
+	 */
+
+	BASE_MEM_GROW_ON_GPF = (1U << 9),      /**< Grow backing store on GPU
+						    Page Fault */
+
+	BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer
+						    shareable, if available */
+	BASE_MEM_COHERENT_LOCAL = (1U << 11),  /**< Page coherence Inner
+						    shareable */
+	BASE_MEM_CACHED_CPU = (1U << 12),      /**< Should be cached on the
+						    CPU */
+
+/* IN/OUT */
+	BASE_MEM_SAME_VA = (1U << 13), /**< Must have same VA on both the GPU
+					    and the CPU */
+/* OUT */
+	BASE_MEM_NEED_MMAP = (1U << 14), /**< Must call mmap to aquire a GPU
+					     address for the alloc */
+/* IN */
+	BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence
+					     Outer shareable, required. */
+	BASE_MEM_SECURE = (1U << 16),          /**< Secure memory */
+	BASE_MEM_DONT_NEED = (1U << 17),       /**< Not needed physical
+						    memory */
+	BASE_MEM_IMPORT_SHARED = (1U << 18),   /**< Must use shared CPU/GPU zone
+						    (SAME_VA zone) but doesn't
+						    require the addresses to
+						    be the same */
+};
+
+/**
+ * @brief Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the ::base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 19
+
+/**
+  * A mask for all output bits, excluding IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/**
+  * A mask for all input bits, including IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/**
+ * A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id.
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	kbase_pointer to imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	kbase_pointer ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1,	    /**< Not a growable tmem object */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3,	    /**< Resize attempted on buffer while it was mapped, which is not permitted */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completly unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u16 base_jd_core_req;
+
+/* Requirements that come from the HW */
+#define BASE_JD_REQ_DEP 0	    /**< No requirement, dependency only */
+#define BASE_JD_REQ_FS  (1U << 0)   /**< Requires fragment shaders */
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  (1U << 1)
+#define BASE_JD_REQ_T   (1U << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  (1U << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   (1U << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  (1U << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE (1U << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  (1U << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               (1U << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   (1U << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        (1U << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceeding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceeding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceeding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    (1U << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   (1U << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER (1U << 14)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req (currently a u16)
+ */
+
+#define BASEP_JD_REQ_RESERVED (1U << 15)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED |\
+				BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\
+				BASE_JD_REQ_EXTERNAL_RESOURCES |\
+				BASEP_JD_REQ_EVENT_NEVER |\
+				BASE_JD_REQ_EVENT_COALESCE))
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferrentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	base_jd_core_req core_req;	    /**< core requirements */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[5];
+} base_jd_atom_v2;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+struct base_jd_atom_v2_uk6 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	base_jd_core_req core_req;	    /**< core requirements */
+	base_atom_id pre_dep[2]; /**< pre-dependencies */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;		    /**< priority - smaller is higher priority */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[7];
+};
+#endif
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills
+ * a full cache line.
+ */
+
+#define BASE_CPU_GPU_CACHE_LINE_PADDING (36)
+
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom.
+ *
+ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid
+ * cases where access to pages containing the structure is shared between cached and un-cached
+ * memory regions, which would cause memory corruption.  Here we set the structure size to be 64 bytes
+ * which is the cache line for ARM A15 processors.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING];
+} base_dump_cpu_gpu_counters;
+
+
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistant guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ *  - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ *  so they won't accidentally pick another file with 'mali_t600' in its name
+ *  - When the build doesn't work, the System Integrator may think the DDK is
+ *  doesn't work, and attempt to fix it themselves:
+ *   - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ *   error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ *   you.
+ *   - For a \#include mechanism, checks must still be made elsewhere, which the
+ *   System Integrator may try working around by setting \#defines (such as
+ *   VA_BITS) themselves in their plat_config.h. In the  worst case, they may
+ *   set the prevention-mechanism \#define of
+ *   "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ *   - In this case, they would believe they are on the right track, because
+ *   the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+   digraph plat_config_mechanism {
+	   rankdir=BT
+	   size="6,6"
+
+       "mali_base.h";
+	   "gpu/mali_gpu.h";
+
+	   node [ shape=box ];
+	   {
+	       rank = same; ordering = out;
+
+		   "gpu/mali_gpu_props.h";
+		   "base/midg_gpus/mali_t600.h";
+		   "base/midg_gpus/other_midg_gpu.h";
+	   }
+	   { rank = same; "plat/plat_config.h"; }
+	   {
+	       rank = same;
+		   "gpu/mali_gpu.h" [ shape=box ];
+		   gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+		   select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+	   }
+	   node [ shape=box ];
+	   { rank = same; "plat/plat_config.h"; }
+	   { rank = same; "mali_base.h"; }
+
+	   "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h";
+	   "mali_base.h" -> "plat/plat_config.h" ;
+	   "mali_base.h" -> select_gpu ;
+
+	   "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+	   gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+	   select_gpu -> "base/midg_gpus/mali_t600.h" ;
+	   select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+   }
+   @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algoirthm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * requried for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+     * No defined values, but starts at 0 and increases by one for each release
+     * status (alpha, beta, EAC, etc.).
+     * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/**
+	 * @usecase GPU clock speed is not specified in the Midgard Architecture, but is
+	 * <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+	 */
+	u32 gpu_speed_mhz;
+
+	/**
+	 * @usecase GPU clock max/min speed is required for computing best/worst case
+	 * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+	 *  Midgard Architecture).
+	 */
+	u32 gpu_freq_khz_max;
+	u32 gpu_freq_khz_min;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 unused_1; /* keep for backward compatibility */
+
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[3];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this datastructure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as @ref basep_context_private_flags, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+	/** No flags set */
+	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+	/** Base context is a 'System Monitor' context for Hardware counters.
+	 *
+	 * One important side effect of this is that job submission is disabled. */
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+
+/**
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+enum basep_context_private_flags {
+	/** Private flag tracking whether job descriptor dumping is disabled */
+	BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31)
+};
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+
+	u8 padding[4];
+} base_jd_replay_payload;
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
new file mode 100755
index 0000000..a24791f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base cross-proccess sync API.
+ */
+
+#ifndef _BASE_KERNEL_SYNC_H_
+#define _BASE_KERNEL_SYNC_H_
+
+#include <linux/ioctl.h>
+
+#define STREAM_IOC_MAGIC '~'
+
+/* Fence insert.
+ *
+ * Inserts a fence on the stream operated on.
+ * Fence can be waited via a base fence wait soft-job
+ * or triggered via a base fence trigger soft-job.
+ *
+ * Fences must be cleaned up with close when no longer needed.
+ *
+ * No input/output arguments.
+ * Returns
+ * >=0 fd
+ * <0  error code
+ */
+#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0)
+
+#endif				/* _BASE_KERNEL_SYNC_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100755
index 0000000..4a98a72
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100755
index 0000000..be454a2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100755
index 0000000..df5e415
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,570 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_uku.h>
+#include <mali_kbase_linux.h>
+
+#include "mali_kbase_strings.h"
+#include "mali_kbase_pm.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+#include "mali_kbase_ipa.h"
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+/**
+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs
+ *
+ * The Kernel-side Base (KBase) APIs are divided up as follows:
+ * - @subpage page_kbase_js_policy
+ */
+
+/**
+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom);
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data);
+#endif
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom);
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The js_data.runpool_irq.lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		u16 core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status);
+int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+/* api to be ported per OS, only need to do the raw register access */
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEBUG
+/**
+ * kbase_set_driver_inactive - Force driver to go inactive
+ * @kbdev:    Device pointer
+ * @inactive: true if driver should go inactive, false otherwise
+ *
+ * Forcing the driver inactive will cause all future IOCTLs to wait until the
+ * driver is made active again. This is intended solely for the use of tests
+ * which require that no jobs are running while the test executes.
+ */
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifndef RESET0_LEVEL
+extern u32  override_value_aml;
+extern void *reg_base_hiubus;
+#define RESET0_MASK    0x01
+#define RESET1_MASK    0x02
+#define RESET2_MASK    0x03
+
+#define RESET0_LEVEL    0x10
+#define RESET1_LEVEL    0x11
+#define RESET2_LEVEL    0x12
+#define Rd(r)                           readl((reg_base_hiubus) + ((r)<<2))
+#define Wr(r, v)                        writel((v), ((reg_base_hiubus) + ((r)<<2)))
+#define Mali_WrReg(unit, core, regnum, value)   kbase_reg_write(kbdev, (regnum), (value), NULL)
+#define Mali_RdReg(unit, core, regnum)          kbase_reg_read(kbdev, (regnum), NULL)
+#define stimulus_print   printk
+#define stimulus_display printk
+#define Mali_pwr_on(x)   Mali_pwr_on_with_kdev(kbdev, (x))
+#define Mali_pwr_off(x)  Mali_pwr_off_with_kdev(kbdev, (x))
+#endif
+
+
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100755
index 0000000..933aa28
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	phys_addr_t *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100755
index 0000000..099a298
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100755
index 0000000..c67b3e9
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100755
index 0000000..0c18bdb
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100755
index 0000000..fb615ae
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100755
index 0000000..356d52b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,345 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see  kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+/**
+ * kbase_platform_fake_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_fake_register()
+ */
+void kbase_platform_fake_unregister(void);
+#endif
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100755
index 0000000..ee7c96d
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,260 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+ * Irq throttle. It is the minimum desired time in between two
+ * consecutive gpu interrupts (given in 'us'). The irq throttle
+ * gpu register will be configured after this, taking into
+ * account the configured max frequency.
+ *
+ * Attached value: number in micro seconds
+ */
+#define DEFAULT_IRQ_THROTTLE_TIME_US 20
+
+/**
+ *  Default Job Scheduler initial runtime of a context for the CFS Policy,
+ *  in time-slices.
+ *
+ * This value is relative to that of the least-run context, and defines
+ * where in the CFS queue a new context is added. A value of 1 means 'after
+ * the least-run context has used its timeslice'. Therefore, when all
+ * contexts consistently use the same amount of time, a value of 1 models a
+ * FIFO. A value of 0 would model a LIFO.
+ *
+ * The value is represented in "numbers of time slices". Multiply this
+ * value by that defined in @ref DEFAULT_JS_CTX_TIMESLICE_NS to get
+ * the time value for this in nanoseconds.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1
+
+/**
+ * Default Job Scheduler minimum runtime value of a context for CFS, in
+ * time_slices relative to that of the least-run context.
+ *
+ * This is a measure of how much preferrential treatment is given to a
+ * context that is not run very often.
+ *
+ * Specficially, this value defines how many timeslices such a context is
+ * (initially) allowed to use at once. Such contexts (e.g. 'interactive'
+ * processes) will appear near the front of the CFS queue, and can initially
+ * use more time than contexts that run continuously (e.g. 'batch'
+ * processes).
+ *
+ * This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx
+ * not run for a long time attacks the system by using a very large initial
+ * number of timeslices when it finally does run.
+ *
+ * @note A value of zero allows not-run-often contexts to get scheduled in
+ * quickly, but to only use a single timeslice when they get scheduled in.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/*
+ * Default timeout for software event jobs, after which these jobs will be
+ * cancelled.
+ */
+#define DEFAULT_JS_SOFT_EVENT_TIMEOUT ((u32)3000) /* 3s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100755
index 0000000..0548d2b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,317 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_instr.h>
+#include <mali_kbase_mem_linux.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	kctx->is_compat = is_compat;
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	kctx->infinite_cache_active = 0;
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+			kbdev->mem_pool_max_size_default,
+			kctx->kbdev, &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_pool;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	mutex_init(&kctx->reg_lock);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+#ifdef CONFIG_KDS
+	INIT_LIST_HEAD(&kctx->waiting_kds_resource);
+#endif
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kctx);
+	if (err)
+		goto term_dma_fence;
+
+	kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+	if (!kctx->pgd)
+		goto free_mmu;
+
+	kctx->aliasing_sink_page = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!kctx->aliasing_sink_page)
+		goto no_sink_page;
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	hrtimer_init(&kctx->soft_event_timeout, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	kctx->soft_event_timeout.function = &kbasep_soft_event_timeout_worker;
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+no_sink_page:
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+free_mmu:
+	kbase_mmu_term(kctx);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_jd_zap_context(kctx);
+	kbase_event_cleanup(kctx);
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_pm_context_idle(kbdev);
+
+	kbase_dma_fence_term(kctx);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock,
+			irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100755
index 0000000..9d9b1a5
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,4323 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_uku.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_instr.h>
+#include <mali_kbase_gator.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <backend/gpu/mali_kbase_devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#endif /* CONFIG_KDS */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task */
+#include <linux/mman.h>
+#include <linux/version.h>
+#include <linux/security.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+#include <platform/mali_kbase_platform_fake.h>
+#endif /*CONFIG_MALI_PLATFORM_FAKE */
+#ifdef CONFIG_SYNC
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC */
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_PM_DEVFREQ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+
+#include <mali_kbase_config.h>
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <mali_kbase_tlstream.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+#if MALI_UNIT_TEST
+static struct kbase_exported_test_data shared_kernel_test_data;
+EXPORT_SYMBOL(shared_kernel_test_data);
+#endif /* MALI_UNIT_TEST */
+
+static int kbase_dev_nr;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+extern int mali_pm_statue;
+#endif
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+static inline void __compile_time_asserts(void)
+{
+	CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
+}
+
+#ifdef CONFIG_KDS
+
+struct kbasep_kds_resource_set_file_data {
+	struct kds_resource_set *lock;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file);
+
+static const struct file_operations kds_resource_fops = {
+	.release = kds_resource_release
+};
+
+struct kbase_kds_resource_list_data {
+	struct kds_resource **kds_resources;
+	unsigned long *kds_access_bitmap;
+	int num_elems;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file)
+{
+	struct kbasep_kds_resource_set_file_data *data;
+
+	data = (struct kbasep_kds_resource_set_file_data *)file->private_data;
+	if (NULL != data) {
+		if (NULL != data->lock)
+			kds_resource_set_release(&data->lock);
+
+		kfree(data);
+	}
+	return 0;
+}
+
+static int kbasep_kds_allocate_resource_list_data(struct kbase_context *kctx, struct base_external_resource *ext_res, int num_elems, struct kbase_kds_resource_list_data *resources_list)
+{
+	struct base_external_resource *res = ext_res;
+	int res_id;
+
+	/* assume we have to wait for all */
+
+	KBASE_DEBUG_ASSERT(0 != num_elems);
+	resources_list->kds_resources = kmalloc_array(num_elems,
+			sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (NULL == resources_list->kds_resources)
+		return -ENOMEM;
+
+	KBASE_DEBUG_ASSERT(0 != num_elems);
+	resources_list->kds_access_bitmap = kzalloc(
+			sizeof(unsigned long) *
+			((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG),
+			GFP_KERNEL);
+
+	if (NULL == resources_list->kds_access_bitmap) {
+		kfree(resources_list->kds_access_bitmap);
+		return -ENOMEM;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+	for (res_id = 0; res_id < num_elems; res_id++, res++) {
+		int exclusive;
+		struct kbase_va_region *reg;
+		struct kds_resource *kds_res = NULL;
+
+		exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			break;
+
+		/* no need to check reg->alloc as only regions with an alloc has
+		 * a size, and kbase_region_tracker_find_region_enclosing_address
+		 * only returns regions with size > 0 */
+		switch (reg->gpu_alloc->type) {
+#if defined(CONFIG_UMP) && defined(CONFIG_KDS)
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+			kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle);
+			break;
+#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */
+		default:
+			break;
+		}
+
+		/* no kds resource for the region ? */
+		if (!kds_res)
+			break;
+
+		resources_list->kds_resources[res_id] = kds_res;
+
+		if (exclusive)
+			set_bit(res_id, resources_list->kds_access_bitmap);
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	/* did the loop run to completion? */
+	if (res_id == num_elems)
+		return 0;
+
+	/* Clean up as the resource list is not valid. */
+	kfree(resources_list->kds_resources);
+	kfree(resources_list->kds_access_bitmap);
+
+	return -EINVAL;
+}
+
+static bool kbasep_validate_kbase_pointer(
+		struct kbase_context *kctx, union kbase_pointer *p)
+{
+	if (kctx->is_compat) {
+		if (p->compat_value == 0)
+			return false;
+	} else {
+		if (NULL == p->value)
+			return false;
+	}
+	return true;
+}
+
+static int kbase_external_buffer_lock(struct kbase_context *kctx,
+		struct kbase_uk_ext_buff_kds_data *args, u32 args_size)
+{
+	struct base_external_resource *ext_res_copy;
+	size_t ext_resource_size;
+	int ret = -EINVAL;
+	int fd = -EBADF;
+	struct base_external_resource __user *ext_res_user;
+	int __user *file_desc_usr;
+	struct kbasep_kds_resource_set_file_data *fdata;
+	struct kbase_kds_resource_list_data resource_list_data;
+
+	if (args_size != sizeof(struct kbase_uk_ext_buff_kds_data))
+		return -EINVAL;
+
+	/* Check user space has provided valid data */
+	if (!kbasep_validate_kbase_pointer(kctx, &args->external_resource) ||
+			!kbasep_validate_kbase_pointer(kctx, &args->file_descriptor) ||
+			(0 == args->num_res) ||
+			(args->num_res > KBASE_MAXIMUM_EXT_RESOURCES))
+		return -EINVAL;
+
+	ext_resource_size = sizeof(struct base_external_resource) * args->num_res;
+
+	KBASE_DEBUG_ASSERT(0 != ext_resource_size);
+	ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL);
+
+	if (!ext_res_copy)
+		return -EINVAL;
+#ifdef CONFIG_COMPAT
+	if (kctx->is_compat) {
+		ext_res_user = compat_ptr(args->external_resource.compat_value);
+		file_desc_usr = compat_ptr(args->file_descriptor.compat_value);
+	} else {
+#endif /* CONFIG_COMPAT */
+		ext_res_user = args->external_resource.value;
+		file_desc_usr = args->file_descriptor.value;
+#ifdef CONFIG_COMPAT
+	}
+#endif /* CONFIG_COMPAT */
+
+	/* Copy the external resources to lock from user space */
+	if (copy_from_user(ext_res_copy, ext_res_user, ext_resource_size))
+		goto out;
+
+	/* Allocate data to be stored in the file */
+	fdata = kmalloc(sizeof(*fdata), GFP_KERNEL);
+
+	if (!fdata) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* Parse given elements and create resource and access lists */
+	ret = kbasep_kds_allocate_resource_list_data(kctx,
+			ext_res_copy, args->num_res, &resource_list_data);
+	if (!ret) {
+		long err;
+
+		fdata->lock = NULL;
+
+		fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0);
+
+		err = copy_to_user(file_desc_usr, &fd, sizeof(fd));
+
+		/* If the file descriptor was valid and we successfully copied
+		 * it to user space, then we can try and lock the requested
+		 * kds resources.
+		 */
+		if ((fd >= 0) && (0 == err)) {
+			struct kds_resource_set *lock;
+
+			lock = kds_waitall(args->num_res,
+					resource_list_data.kds_access_bitmap,
+					resource_list_data.kds_resources,
+					KDS_WAIT_BLOCKING);
+
+			if (!lock) {
+				ret = -EINVAL;
+			} else if (IS_ERR(lock)) {
+				ret = PTR_ERR(lock);
+			} else {
+				ret = 0;
+				fdata->lock = lock;
+			}
+		} else {
+			ret = -EINVAL;
+		}
+
+		kfree(resource_list_data.kds_resources);
+		kfree(resource_list_data.kds_access_bitmap);
+	}
+
+	if (ret) {
+		/* If the file was opened successfully then close it which will
+		 * clean up the file data, otherwise we clean up the file data
+		 * ourself.
+		 */
+		if (fd >= 0)
+			sys_close(fd);
+		else
+			kfree(fdata);
+	}
+out:
+	kfree(ext_res_copy);
+
+	return ret;
+}
+#endif /* CONFIG_KDS */
+
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	unsigned int                    as_nr;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		kbase_tlstream_tl_summary_new_lpu(lpu, lpu_id, *lpu);
+	}
+
+	/* Create Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		kbase_tlstream_tl_summary_new_as(&kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	kbase_tlstream_tl_summary_new_gpu(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, kbdev);
+	}
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		kbase_tlstream_tl_summary_lifelink_as_gpu(
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		kbase_tlstream_tl_summary_new_ctx(
+				element->kctx,
+				(u32)(element->kctx->id),
+				(u32)(element->kctx->tgid));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream. */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space. */
+	kbase_tlstream_flush_streams();
+}
+
+static void kbase_api_handshake(struct uku_version_check_args *version)
+{
+	switch (version->major) {
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case 6:
+		/* We are backwards compatible with version 6,
+		 * so pretend to be the old version */
+		version->major = 6;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	case 7:
+		/* We are backwards compatible with version 7,
+		 * so pretend to be the old version */
+		version->major = 7;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case 8:
+		/* We are backwards compatible with version 8,
+		 * so pretend to be the old version */
+		version->major = 8;
+		version->minor = 4;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+#ifdef BASE_LEGACY_UK9_SUPPORT
+	case 9:
+		/* We are backwards compatible with version 9,
+		 * so pretend to be the old version */
+		version->major = 9;
+		version->minor = 0;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				(int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	inited_pm_runtime_init = (1u << 2),
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	inited_ipa = (1u << 9),
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+	inited_job_fault = (1u << 10),
+	inited_misc_register = (1u << 11),
+	inited_get_device = (1u << 12),
+	inited_sysfs_group = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20)
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define INACTIVE_WAIT_MS (5000)
+
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive)
+{
+	kbdev->driver_inactive = inactive;
+	wake_up(&kbdev->driver_inactive_wait);
+
+	/* Wait for any running IOCTLs to complete */
+	if (inactive)
+		msleep(INACTIVE_WAIT_MS);
+}
+KBASE_EXPORT_TEST_API(kbase_set_driver_inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size)
+{
+	struct kbase_device *kbdev;
+	union uk_header *ukh = args;
+	u32 id;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(ukh != NULL);
+
+	kbdev = kctx->kbdev;
+	id = ukh->id;
+	ukh->ret = MALI_ERROR_NONE; /* Be optimistic */
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_event(kbdev->driver_inactive_wait,
+			kbdev->driver_inactive == false);
+#endif /* CONFIG_MALI_DEBUG */
+
+	if (UKP_FUNC_ID_CHECK_VERSION == id) {
+		struct uku_version_check_args *version_check;
+
+		if (args_size != sizeof(struct uku_version_check_args)) {
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			return 0;
+		}
+		version_check = (struct uku_version_check_args *)args;
+		kbase_api_handshake(version_check);
+		/* save the proposed version number for later use */
+		kctx->api_version = KBASE_API_VERSION(version_check->major,
+				version_check->minor);
+		ukh->ret = MALI_ERROR_NONE;
+		return 0;
+	}
+
+	/* block calls until version handshake */
+	if (kctx->api_version == 0)
+		return -EINVAL;
+
+	if (!atomic_read(&kctx->setup_complete)) {
+		struct kbase_uk_set_flags *kbase_set_flags;
+
+		/* setup pending, try to signal that we'll do the setup,
+		 * if setup was already in progress, err this call
+		 */
+		if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+			return -EINVAL;
+
+		/* if unexpected call, will stay stuck in setup mode
+		 * (is it the only call we accept?)
+		 */
+		if (id != KBASE_FUNC_SET_FLAGS)
+			return -EINVAL;
+
+		kbase_set_flags = (struct kbase_uk_set_flags *)args;
+
+		/* if not matching the expected call, stay in setup mode */
+		if (sizeof(*kbase_set_flags) != args_size)
+			goto bad_size;
+
+		/* if bad flags, will stay stuck in setup mode */
+		if (kbase_context_set_create_flags(kctx,
+				kbase_set_flags->create_flags) != 0)
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+		atomic_set(&kctx->setup_complete, 1);
+		return 0;
+	}
+
+	/* setup complete, perform normal operation */
+	switch (id) {
+	case KBASE_FUNC_MEM_JIT_INIT:
+		{
+			struct kbase_uk_mem_jit_init *jit_init = args;
+
+			if (sizeof(*jit_init) != args_size)
+				goto bad_size;
+
+			if (kbase_region_tracker_init_jit(kctx,
+					jit_init->va_pages))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_ALLOC:
+		{
+			struct kbase_uk_mem_alloc *mem = args;
+			struct kbase_va_region *reg;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+#if defined(CONFIG_64BIT)
+			if (!kctx->is_compat) {
+				/* force SAME_VA if a 64-bit client */
+				mem->flags |= BASE_MEM_SAME_VA;
+			}
+#endif
+
+			reg = kbase_mem_alloc(kctx, mem->va_pages,
+					mem->commit_pages, mem->extent,
+					&mem->flags, &mem->gpu_va,
+					&mem->va_alignment);
+			if (!reg)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_IMPORT: {
+			struct kbase_uk_mem_import *mem_import = args;
+			void __user *phandle;
+
+			if (sizeof(*mem_import) != args_size)
+				goto bad_size;
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				phandle = compat_ptr(mem_import->phandle.compat_value);
+			else
+#endif
+				phandle = mem_import->phandle.value;
+
+			if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_import(kctx, mem_import->type, phandle,
+						&mem_import->gpu_va,
+						&mem_import->va_pages,
+						&mem_import->flags)) {
+				mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+	}
+	case KBASE_FUNC_MEM_ALIAS: {
+			struct kbase_uk_mem_alias *alias = args;
+			struct base_mem_aliasing_info __user *user_ai;
+			struct base_mem_aliasing_info *ai;
+
+			if (sizeof(*alias) != args_size)
+				goto bad_size;
+
+			if (alias->nents > 2048) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (!alias->nents) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				user_ai = compat_ptr(alias->ai.compat_value);
+			else
+#endif
+				user_ai = alias->ai.value;
+
+			ai = vmalloc(sizeof(*ai) * alias->nents);
+
+			if (!ai) {
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			}
+
+			if (copy_from_user(ai, user_ai,
+					   sizeof(*ai) * alias->nents)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto copy_failed;
+			}
+
+			alias->gpu_va = kbase_mem_alias(kctx, &alias->flags,
+							alias->stride,
+							alias->nents, ai,
+							&alias->va_pages);
+			if (!alias->gpu_va) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto no_alias;
+			}
+no_alias:
+copy_failed:
+			vfree(ai);
+			break;
+		}
+	case KBASE_FUNC_MEM_COMMIT:
+		{
+			struct kbase_uk_mem_commit *commit = args;
+
+			if (sizeof(*commit) != args_size)
+				goto bad_size;
+
+			if (commit->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_commit(kctx, commit->gpu_addr,
+					commit->pages,
+					(base_backing_threshold_status *)
+					&commit->result_subcode) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	case KBASE_FUNC_MEM_QUERY:
+		{
+			struct kbase_uk_mem_query *query = args;
+
+			if (sizeof(*query) != args_size)
+				goto bad_size;
+
+			if (query->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE &&
+			    query->query != KBASE_MEM_QUERY_VA_SIZE &&
+				query->query != KBASE_MEM_QUERY_FLAGS) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query);
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_query(kctx, query->gpu_addr,
+					query->query, &query->value) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			break;
+		}
+		break;
+
+	case KBASE_FUNC_MEM_FLAGS_CHANGE:
+		{
+			struct kbase_uk_mem_flags_change *fc = args;
+
+			if (sizeof(*fc) != args_size)
+				goto bad_size;
+
+			if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_flags_change(kctx, fc->gpu_va,
+					fc->flags, fc->mask) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+	case KBASE_FUNC_MEM_FREE:
+		{
+			struct kbase_uk_mem_free *mem = args;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+			if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_free(kctx, mem->gpu_addr) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_JOB_SUBMIT:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+			if (kbase_jd_submit(kctx, job, 0) != 0)
+#else
+			if (kbase_jd_submit(kctx, job) != 0)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case KBASE_FUNC_JOB_SUBMIT_UK6:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+			if (kbase_jd_submit(kctx, job, 1) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif
+
+	case KBASE_FUNC_SYNC:
+		{
+			struct kbase_uk_sync_now *sn = args;
+
+			if (sizeof(*sn) != args_size)
+				goto bad_size;
+
+			if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifndef CONFIG_MALI_COH_USER
+			if (kbase_sync_now(kctx, &sn->sset) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif
+			break;
+		}
+
+	case KBASE_FUNC_DISJOINT_QUERY:
+		{
+			struct kbase_uk_disjoint_query *dquery = args;
+
+			if (sizeof(*dquery) != args_size)
+				goto bad_size;
+
+			/* Get the disjointness counter value. */
+			dquery->counter = kbase_disjoint_event_get(kctx->kbdev);
+			break;
+		}
+
+	case KBASE_FUNC_POST_TERM:
+		{
+			kbase_event_close(kctx);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_SETUP:
+		{
+			struct kbase_uk_hwcnt_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx,
+					&kctx->vinstr_cli, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_DUMP:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_dump(kctx->vinstr_cli,
+					BASE_HWCNT_READER_EVENT_MANUAL) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_CLEAR:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_clear(kctx->vinstr_cli) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_READER_SETUP:
+		{
+			struct kbase_uk_hwcnt_reader_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx,
+					setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_GPU_PROPS_REG_DUMP:
+		{
+			struct kbase_uk_gpuprops *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (kbase_gpuprops_uk_get_props(kctx, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_FIND_CPU_OFFSET:
+		{
+			struct kbase_uk_find_cpu_offset *find = args;
+
+			if (sizeof(*find) != args_size)
+				goto bad_size;
+
+			if (find->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid");
+				goto out_bad;
+			}
+
+			if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else {
+				int err;
+
+				err = kbasep_find_enclosing_cpu_mapping_offset(
+						kctx,
+						find->gpu_addr,
+						(uintptr_t) find->cpu_addr,
+						(size_t) find->size,
+						&find->offset);
+
+				if (err)
+					ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+		}
+	case KBASE_FUNC_GET_VERSION:
+		{
+			struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args;
+
+			if (sizeof(*get_version) != args_size)
+				goto bad_size;
+
+			/* version buffer size check is made in compile time assert */
+			memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+			get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+			break;
+		}
+
+	case KBASE_FUNC_STREAM_CREATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args;
+
+			if (sizeof(*screate) != args_size)
+				goto bad_size;
+
+			if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) {
+				/* not NULL terminated */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_stream_create(screate->name, &screate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#else /* CONFIG_SYNC */
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+	case KBASE_FUNC_FENCE_VALIDATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args;
+
+			if (sizeof(*fence_validate) != args_size)
+				goto bad_size;
+
+			if (kbase_fence_validate(fence_validate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+
+	case KBASE_FUNC_EXT_BUFFER_LOCK:
+		{
+#ifdef CONFIG_KDS
+			ret = kbase_external_buffer_lock(kctx,
+				(struct kbase_uk_ext_buff_kds_data *)args,
+				args_size);
+			switch (ret) {
+			case 0:
+				ukh->ret = MALI_ERROR_NONE;
+				break;
+			case -ENOMEM:
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			default:
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+#endif /* CONFIG_KDS */
+			break;
+		}
+
+	case KBASE_FUNC_SET_TEST_DATA:
+		{
+#if MALI_UNIT_TEST
+			struct kbase_uk_set_test_data *set_data = args;
+
+			shared_kernel_test_data = set_data->test_data;
+			shared_kernel_test_data.kctx.value = (void __user *)kctx;
+			shared_kernel_test_data.mm.value = (void __user *)current->mm;
+			ukh->ret = MALI_ERROR_NONE;
+#endif /* MALI_UNIT_TEST */
+			break;
+		}
+
+	case KBASE_FUNC_INJECT_ERROR:
+		{
+#ifdef CONFIG_MALI_ERROR_INJECT
+			unsigned long flags;
+			struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (job_atom_inject_error(&params) != 0)
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_ERROR_INJECT */
+			break;
+		}
+
+	case KBASE_FUNC_MODEL_CONTROL:
+		{
+#ifdef CONFIG_MALI_NO_MALI
+			unsigned long flags;
+			struct kbase_model_control_params params =
+					((struct kbase_uk_model_control_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (gpu_model_control(kbdev->model, &params) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_NO_MALI */
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case KBASE_FUNC_KEEP_GPU_POWERED:
+		{
+			dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_KEEP_GPU_POWERED: function is deprecated and disabled\n");
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	case KBASE_FUNC_GET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i);
+
+			break;
+		}
+
+	/* used only for testing purposes; these controls are to be set by gator through gator API */
+	case KBASE_FUNC_SET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				_mali_profiling_control(i, controls->profiling_controls[i]);
+
+			break;
+		}
+
+	case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD:
+		{
+			struct kbase_uk_debugfs_mem_profile_add *add_data =
+					(struct kbase_uk_debugfs_mem_profile_add *)args;
+			char *buf;
+			char __user *user_buf;
+
+			if (sizeof(*add_data) != args_size)
+				goto bad_size;
+
+			if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+				dev_err(kbdev->dev, "buffer too big\n");
+				goto out_bad;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				user_buf = compat_ptr(add_data->buf.compat_value);
+			else
+#endif
+				user_buf = add_data->buf.value;
+
+			buf = kmalloc(add_data->len, GFP_KERNEL);
+			if (!buf)
+				goto out_bad;
+
+			if (0 != copy_from_user(buf, user_buf, add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			if (kbasep_mem_profile_debugfs_insert(kctx, buf,
+							add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			break;
+		}
+
+#ifdef CONFIG_MALI_NO_MALI
+	case KBASE_FUNC_SET_PRFCNT_VALUES:
+		{
+
+			struct kbase_uk_prfcnt_values *params =
+			  ((struct kbase_uk_prfcnt_values *)args);
+			gpu_model_set_dummy_prfcnt_sample(params->data,
+					params->size);
+
+			break;
+		}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	case KBASE_FUNC_TLSTREAM_ACQUIRE:
+		{
+			struct kbase_uk_tlstream_acquire *tlstream_acquire =
+				args;
+
+			if (sizeof(*tlstream_acquire) != args_size)
+				goto bad_size;
+
+			if (0 != kbase_tlstream_acquire(
+						kctx,
+						&tlstream_acquire->fd)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else if (0 <= tlstream_acquire->fd) {
+				/* Summary stream was cleared during acquire.
+				 * Create static timeline objects that will be
+				 * read by client. */
+				kbase_create_timeline_objects(kctx);
+			}
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_FLUSH:
+		{
+			struct kbase_uk_tlstream_flush *tlstream_flush =
+				args;
+
+			if (sizeof(*tlstream_flush) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_flush_streams();
+			break;
+		}
+#if MALI_UNIT_TEST
+	case KBASE_FUNC_TLSTREAM_TEST:
+		{
+			struct kbase_uk_tlstream_test *tlstream_test = args;
+
+			if (sizeof(*tlstream_test) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_test(
+					tlstream_test->tpw_count,
+					tlstream_test->msg_delay,
+					tlstream_test->msg_count,
+					tlstream_test->aux_msg);
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_STATS:
+		{
+			struct kbase_uk_tlstream_stats *tlstream_stats = args;
+
+			if (sizeof(*tlstream_stats) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_stats(
+					&tlstream_stats->bytes_collected,
+					&tlstream_stats->bytes_generated);
+			break;
+		}
+#endif /* MALI_UNIT_TEST */
+
+	case KBASE_FUNC_GET_CONTEXT_ID:
+		{
+			struct kbase_uk_context_id *info = args;
+
+			info->id = kctx->id;
+			break;
+		}
+
+	case KBASE_FUNC_SOFT_EVENT_UPDATE:
+		{
+			struct kbase_uk_soft_event_update *update = args;
+
+			if (sizeof(*update) != args_size)
+				goto bad_size;
+
+			if (((update->new_status != BASE_JD_SOFT_EVENT_SET) &&
+			    (update->new_status != BASE_JD_SOFT_EVENT_RESET)) ||
+			    (update->flags != 0))
+				goto out_bad;
+
+			if (kbasep_write_soft_event_status(
+						kctx, update->evt,
+						update->new_status) != 0) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (update->new_status == BASE_JD_SOFT_EVENT_SET)
+				kbasep_complete_triggered_soft_events(
+						kctx, update->evt);
+
+			break;
+		}
+
+	default:
+		dev_err(kbdev->dev, "unknown ioctl %u\n", id);
+		goto out_bad;
+	}
+
+	return ret;
+
+ bad_size:
+	dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id);
+ out_bad:
+	return -EINVAL;
+}
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strcmp(irq_res->name, "JOB")) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "MMU")) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "GPU")) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kctx = kbase_create_context(kbdev, is_compat_task());
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+	kctx->filp = filp;
+
+	kctx->infinite_cache_active = kbdev->infinite_cache_active_default;
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_COH_USER
+	 /* if cache is completely coherent at hardware level, then remove the
+	  * infinite cache control support from debugfs.
+	  */
+#else
+	debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry,
+			&kctx->infinite_cache_active);
+#endif /* CONFIG_MALI_COH_USER */
+
+	mutex_init(&kctx->mem_profile_lock);
+
+	kbasep_jd_debugfs_ctx_add(kctx);
+	kbase_debug_mem_view_init(filp);
+
+	kbase_debug_job_fault_context_init(kctx);
+
+	kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool);
+
+	kbase_jit_debugfs_add(kctx);
+#endif /* CONFIG_DEBUGFS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			kbase_tlstream_tl_new_ctx(
+					element->kctx,
+					(u32)(element->kctx->id),
+					(u32)(element->kctx->tgid));
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+	kbase_tlstream_tl_del_ctx(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(kctx->kctx_dentry);
+	kbasep_mem_profile_debugfs_remove(kctx);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	if (kctx->vinstr_cli) {
+		struct kbase_uk_hwcnt_setup setup;
+
+		setup.dump_buffer = 0llu;
+		kbase_vinstr_legacy_hwc_setup(
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup);
+	}
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+#define CALL_MAX_SIZE 536
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull };	/* alignment fixup */
+	u32 size = _IOC_SIZE(cmd);
+	struct kbase_context *kctx = filp->private_data;
+
+	if (size > CALL_MAX_SIZE)
+		return -ENOTTY;
+
+	if (0 != copy_from_user(&msg, (void __user *)arg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n");
+		return -EFAULT;
+	}
+
+	if (kbase_dispatch(kctx, &msg, size) != 0)
+		return -EFAULT;
+
+	if (0 != copy_to_user((void __user *)arg, &msg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+#ifdef CONFIG_64BIT
+/* The following function is taken from the kernel and just
+ * renamed. As it's not exported to modules we must copy-paste it here.
+ */
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit)
+		goto found_highest;
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length)
+			goto found;
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+found:
+	/* We found a suitable gap. Clip it with the original high_limit. */
+	if (gap_end > info->high_limit)
+		gap_end = info->high_limit;
+
+found_highest:
+	/* Compute highest gap address at the desired alignment */
+	gap_end -= info->length;
+	gap_end -= (gap_end - info->align_offset) & info->align_mask;
+
+	VM_BUG_ON(gap_end < info->low_limit);
+	VM_BUG_ON(gap_end < gap_start);
+	return gap_end;
+}
+
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	/* based on get_unmapped_area, but simplified slightly due to that some
+	 * values are known in advance */
+	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (kctx->is_compat)
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+				flags);
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
+		info.high_limit = kctx->same_va_end << PAGE_SHIFT;
+		info.align_mask = 0;
+		info.align_offset = 0;
+	} else {
+		info.high_limit = min_t(unsigned long, mm->mmap_base,
+					(kctx->same_va_end << PAGE_SHIFT));
+		if (len >= SZ_2M) {
+			info.align_offset = SZ_2M;
+			info.align_mask = SZ_2M - 1;
+		} else {
+			info.align_mask = 0;
+			info.align_offset = 0;
+		}
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = SZ_2M;
+	return kbase_unmapped_area_topdown(&info);
+}
+#endif
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+#ifdef CONFIG_64BIT
+	.get_unmapped_area = kbase_get_unmapped_area,
+#endif
+};
+
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value)
+{
+	writel(value, kbdev->reg + offset);
+}
+
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset)
+{
+	return readl(kbdev->reg + offset);
+}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+/** Show callback for the @c power_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c power_policy sysfs file.
+ *
+ * This function is called when the @c power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls @ref kbase_pm_set_policy to change the
+ * policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/** Show callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called when the @c core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls @ref kbase_pm_set_policy to change
+ * the policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/** Show callback for the @c core_mask sysfs file.
+ *
+ * This function is called to get the contents of the @c core_mask sysfs
+ * file.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/** Store callback for the @c core_mask sysfs file.
+ *
+ * This function is called when the @c core_mask sysfs file is written to.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	if (items == 1 || items == 3) {
+		u64 shader_present =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		u64 group0_core_mask =
+				kbdev->gpu_props.props.coherency_info.group[0].
+				core_mask;
+
+		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+				!(new_core_mask[0] & group0_core_mask) ||
+			(new_core_mask[1] & shader_present) !=
+						new_core_mask[1] ||
+				!(new_core_mask[1] & group0_core_mask) ||
+			(new_core_mask[2] & shader_present) !=
+						new_core_mask[2] ||
+				!(new_core_mask[2] & group0_core_mask)) {
+			dev_err(dev, "power_policy: invalid core specification\n");
+			return -EINVAL;
+		}
+
+		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+				kbdev->pm.debug_core_mask[1] !=
+						new_core_mask[1] ||
+				kbdev->pm.debug_core_mask[2] !=
+						new_core_mask[2]) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+					new_core_mask[1], new_core_mask[2]);
+
+			spin_unlock_irqrestore(&kbdev->pm.power_change_lock,
+					flags);
+		}
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+		"Use format <core_mask>\n"
+		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_event_timeout() - Store callback for the soft_event_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software event jobs. Waiting jobs will
+ * be cancelled after this period expires. This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_event_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_event_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_event_timeout_ms) != 0) ||
+	    (soft_event_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_event_timeout_ms,
+		   soft_event_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_event_timeout() - Show callback for the soft_event_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software event jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_event_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_event_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_event_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_event_timeout, set_soft_event_timeout);
+
+/** Store callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as JS_SOFT_STOP_TICKS, JS_HARD_STOP_TICKS_SS,
+ * JS_HARD_STOP_TICKS_DUMPING, JS_RESET_TICKS_SS, JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		u64 ticks;
+
+		if (js_soft_stop_ms >= 0) {
+			ticks = js_soft_stop_ms * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_soft_stop_ticks = ticks;
+		} else {
+			kbdev->js_soft_stop_ticks = -1;
+		}
+
+		if (js_soft_stop_ms_cl >= 0) {
+			ticks = js_soft_stop_ms_cl * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_soft_stop_ticks_cl = ticks;
+		} else {
+			kbdev->js_soft_stop_ticks_cl = -1;
+		}
+
+		if (js_hard_stop_ms_ss >= 0) {
+			ticks = js_hard_stop_ms_ss * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_hard_stop_ticks_ss = ticks;
+		} else {
+			kbdev->js_hard_stop_ticks_ss = -1;
+		}
+
+		if (js_hard_stop_ms_cl >= 0) {
+			ticks = js_hard_stop_ms_cl * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_hard_stop_ticks_cl = ticks;
+		} else {
+			kbdev->js_hard_stop_ticks_cl = -1;
+		}
+
+		if (js_hard_stop_ms_dumping >= 0) {
+			ticks = js_hard_stop_ms_dumping * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_hard_stop_ticks_dumping = ticks;
+		} else {
+			kbdev->js_hard_stop_ticks_dumping = -1;
+		}
+
+		if (js_reset_ms_ss >= 0) {
+			ticks = js_reset_ms_ss * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_reset_ticks_ss = ticks;
+		} else {
+			kbdev->js_reset_ticks_ss = -1;
+		}
+
+		if (js_reset_ms_cl >= 0) {
+			ticks = js_reset_ms_cl * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_reset_ticks_cl = ticks;
+		} else {
+			kbdev->js_reset_ticks_cl = -1;
+		}
+
+		if (js_reset_ms_dumping >= 0) {
+			ticks = js_reset_ms_dumping * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_reset_ticks_dumping = ticks;
+		} else {
+			kbdev->js_reset_ticks_dumping = -1;
+		}
+
+		kbdev->js_timeouts_updated = true;
+
+		dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_soft_stop_ticks,
+				js_soft_stop_ms);
+		dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_soft_stop_ticks_cl,
+				js_soft_stop_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_hard_stop_ticks_ss,
+				js_hard_stop_ms_ss);
+		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_hard_stop_ticks_cl,
+				js_hard_stop_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_DUMPING with %lu ticks (%lu ms)\n",
+				(unsigned long)
+					kbdev->js_hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping);
+		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_reset_ticks_ss,
+				js_reset_ms_ss);
+		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_reset_ticks_cl,
+				js_reset_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_DUMPING with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_reset_ticks_dumping,
+				js_reset_ms_dumping);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+/** Show callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	u64 ms;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	unsigned long ticks;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0)
+		scheduling_period_ns = kbdev->js_scheduling_period_ns;
+	else
+		scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0)
+		ticks = kbdev->js_soft_stop_ticks;
+	else
+		ticks = kbdev->js_data.soft_stop_ticks;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_soft_stop_ms = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0)
+		ticks = kbdev->js_soft_stop_ticks_cl;
+	else
+		ticks = kbdev->js_data.soft_stop_ticks_cl;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_soft_stop_ms_cl = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0)
+		ticks = kbdev->js_hard_stop_ticks_ss;
+	else
+		ticks = kbdev->js_data.hard_stop_ticks_ss;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_ss = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0)
+		ticks = kbdev->js_hard_stop_ticks_cl;
+	else
+		ticks = kbdev->js_data.hard_stop_ticks_cl;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_cl = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0)
+		ticks = kbdev->js_hard_stop_ticks_dumping;
+	else
+		ticks = kbdev->js_data.hard_stop_ticks_dumping;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_dumping = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0)
+		ticks = kbdev->js_reset_ticks_ss;
+	else
+		ticks = kbdev->js_data.gpu_reset_ticks_ss;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_ss = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0)
+		ticks = kbdev->js_reset_ticks_cl;
+	else
+		ticks = kbdev->js_data.gpu_reset_ticks_cl;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_cl = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0)
+		ticks = kbdev->js_reset_ticks_dumping;
+	else
+		ticks = kbdev->js_data.gpu_reset_ticks_dumping;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_dumping = (unsigned long)ms;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	u64 ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns)
+		old_period = kbdev->js_scheduling_period_ns;
+	else
+		old_period = kbdev->js_data.scheduling_period_ns;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0)
+		ticks = (u64)kbdev->js_soft_stop_ticks * old_period;
+	else
+		ticks = (u64)kbdev->js_data.soft_stop_ticks *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_soft_stop_ticks = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0)
+		ticks = (u64)kbdev->js_soft_stop_ticks_cl * old_period;
+	else
+		ticks = (u64)kbdev->js_data.soft_stop_ticks_cl *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_soft_stop_ticks_cl = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0)
+		ticks = (u64)kbdev->js_hard_stop_ticks_ss * old_period;
+	else
+		ticks = (u64)kbdev->js_data.hard_stop_ticks_ss *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_hard_stop_ticks_ss = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0)
+		ticks = (u64)kbdev->js_hard_stop_ticks_cl * old_period;
+	else
+		ticks = (u64)kbdev->js_data.hard_stop_ticks_cl *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_hard_stop_ticks_cl = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0)
+		ticks = (u64)kbdev->js_hard_stop_ticks_dumping * old_period;
+	else
+		ticks = (u64)kbdev->js_data.hard_stop_ticks_dumping *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_hard_stop_ticks_dumping = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0)
+		ticks = (u64)kbdev->js_reset_ticks_ss * old_period;
+	else
+		ticks = (u64)kbdev->js_data.gpu_reset_ticks_ss *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_reset_ticks_ss = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0)
+		ticks = (u64)kbdev->js_reset_ticks_cl * old_period;
+	else
+		ticks = (u64)kbdev->js_data.gpu_reset_ticks_cl *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_reset_ticks_cl = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0)
+		ticks = (u64)kbdev->js_reset_ticks_dumping * old_period;
+	else
+		ticks = (u64)kbdev->js_data.gpu_reset_ticks_dumping *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_reset_ticks_dumping = ticks ? ticks : 1;
+
+	kbdev->js_scheduling_period_ns = new_scheduling_period_ns;
+	kbdev->js_timeouts_updated = true;
+
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0)
+		period = kbdev->js_scheduling_period_ns;
+	else
+		period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/** Store callback for the @c force_replay sysfs file.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/** Show callback for the @c force_replay sysfs file.
+ *
+ * This function is called to get the contents of the @c force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c force_replay.
+ *
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present. The ability to
+ * enable soft-stop when only a single context is present can be used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/** Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/** Show callback for the @c debug_command sysfs file.
+ *
+ * This function is called to get the contents of the @c debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c debug_command sysfs file.
+ *
+ * This function is called when the @c debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @ref debug_commands to issue the command.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example:
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s MP%d r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_max_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+
+static int kbasep_secure_mode_enable(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
+
+static int kbasep_secure_mode_disable(struct kbase_device *kbdev)
+{
+	if (!kbase_prepare_to_reset_gpu_locked(kbdev))
+		return -EBUSY;
+
+	kbase_reset_gpu_locked(kbdev);
+
+	return 0;
+}
+
+static struct kbase_secure_ops kbasep_secure_ops = {
+	.secure_mode_enable = kbasep_secure_mode_enable,
+	.secure_mode_disable = kbasep_secure_mode_disable,
+};
+
+static void kbasep_secure_mode_init(struct kbase_device *kbdev)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native secure ops */
+		kbdev->secure_ops = &kbasep_secure_ops;
+		kbdev->secure_mode_support = true;
+	}
+#ifdef SECURE_CALLBACKS
+	else {
+		kbdev->secure_ops = SECURE_CALLBACKS;
+		kbdev->secure_mode_support = false;
+
+		if (kbdev->secure_ops) {
+			int err;
+
+			/* Make sure secure mode is disabled on startup */
+			err = kbdev->secure_ops->secure_mode_disable(kbdev);
+
+			/* secure_mode_disable() returns -EINVAL if not
+			 * supported
+			 */
+			kbdev->secure_mode_support = (err != -EINVAL);
+		}
+	}
+#endif
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = -ENOMEM;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return 0;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_add(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_COH_USER
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+#endif /* CONFIG_MALI_COH_USER */
+
+	debugfs_create_size_t("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_max_size_default);
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_event_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_availability_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	if (kbdev->inited_subsys & inited_ipa) {
+		kbase_ipa_term(kbdev->ipa_ctx);
+		kbdev->inited_subsys &= ~inited_ipa;
+	}
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_pm_runtime_init) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+		kbdev->inited_subsys &= ~inited_pm_runtime_init;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+#ifdef CONFIG_OF
+	err = kbase_platform_early_init();
+	if (err) {
+		dev_err(&pdev->dev, "Early platform initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+#endif
+
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+	core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+
+	kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	if (kbdev->pm.callback_power_runtime_init) {
+		err = kbdev->pm.callback_power_runtime_init(kbdev);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Runtime PM initialization failed\n");
+			kbase_platform_device_remove(pdev);
+			return err;
+		}
+		kbdev->inited_subsys |= inited_pm_runtime_init;
+	}
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, gpu_id);
+
+	kbasep_secure_mode_init(kbdev);
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	err = kbase_devfreq_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Fevfreq initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_devfreq;
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	kbdev->ipa_ctx = kbase_ipa_init(kbdev);
+	if (!kbdev->ipa_ctx) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+
+	kbdev->inited_subsys |= inited_ipa;
+#endif  /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	/* initialize the kctx list */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->inited_subsys |= inited_get_device;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+
+	return err;
+}
+
+/** Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/** Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @param dev  The device to resume
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+	return 0;
+}
+
+/** Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in which it will
+ * not be able to communicate with the CPU(s) and RAM due to power management.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/** Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+#ifndef CONFIG_MALI_DEVFREQ
+static int mali_os_freeze(struct device *device)
+{
+	mali_dev_freeze();
+	return kbase_device_suspend(device);
+}
+
+static int mali_os_restore(struct device *device)
+{
+	mali_dev_restore();
+	return kbase_device_resume(device);
+}
+#endif
+
+
+/** The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifndef CONFIG_MALI_DEVFREQ
+	.freeze = mali_os_freeze,
+	.thaw = kbase_device_resume,
+	.restore = mali_os_restore,
+#endif
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_early_init();
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	ret = kbase_platform_fake_register();
+	if (ret)
+		return ret;
+#endif
+	ret = platform_driver_register(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	if (ret)
+		kbase_platform_fake_unregister();
+#endif
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	kbase_platform_fake_unregister();
+#endif
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counter);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100755
index 0000000..fb57ac2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100755
index 0000000..5fff289
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100755
index 0000000..bcd2569
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,502 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/spinlock.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				 kbase_is_job_fault_event_pending(kbdev)))
+			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		if (event->katom->need_cache_flush_cores_retained) {
+			kbase_gpu_cacheclean(kbdev, event->katom);
+			event->katom->need_cache_flush_cores_retained = 0;
+		}
+
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100755
index 0000000..0930f90
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100755
index 0000000..28d93e3
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,278 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list))
+		return NULL;
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct rb_node *p;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (p = rb_first(&kctx->reg_rbtree); p; p = rb_next(p)) {
+		struct kbase_va_region *reg;
+		struct debug_mem_mapping *mapping;
+
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			kbase_gpu_vm_unlock(kctx);
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+	}
+	seq_release(i, file);
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100755
index 0000000..20ab51a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100755
index 0000000..9bb5ea2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,1465 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_mmu_mode.h>
+#include <mali_kbase_instr.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#include <linux/bus_logger.h>
+#endif
+
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#endif				/* CONFIG_SYNC */
+
+#include "mali_kbase_dma_fence.h"
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL    0
+#else
+#define MIDGARD_MMU_TOPLEVEL    1
+#endif
+
+#define MIDGARD_MMU_BOTTOMLEVEL 3
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom has fail dependency on same-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_PREV (1<<6)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom has been submitted to JSCTX ringbuffers */
+#define KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in secure mode */
+#define KBASE_KATOM_FLAG_SECURE (1<<11)
+/* Atom has been stored in linked list */
+#define KBASE_KATOM_FLAG_JSCTX_IN_LL (1<<12)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+#ifdef CONFIG_DEBUG_FS
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom * const)(dep->atom);
+}
+
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static inline const u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency.
+ * @param[in] a      The ATOM to be set as a dependency.
+ * @param     type   The ATOM dependency type to be set.
+ *
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency to be cleared.
+ *
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+enum kbase_atom_gpu_rb_state {
+	/* Atom is not currently present in slot ringbuffer */
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	/* Atom is in slot ringbuffer but is blocked on a previous atom */
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	/* Atom is in slot ringbuffer but is waiting for cores to become
+	 * available */
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	/* Atom is in slot ringbuffer but is blocked on affinity */
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	/* Atom is in slot ringbuffer but is waiting for secure mode switch */
+	KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE,
+	/* Atom is in slot ringbuffer and ready to run */
+	KBASE_ATOM_GPU_RB_READY,
+	/* Atom is in slot ringbuffer and has been submitted to the GPU */
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	/* Atom must be returned to JS as soon as it reaches the head of the
+	 * ringbuffer due to a previous failure */
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS
+};
+
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+	u64 time_spent_us; /**< Total time spent on the GPU in microseconds */
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	enum kbase_atom_coreref_state coreref_state;
+#ifdef CONFIG_KDS
+	struct list_head node;
+	struct kds_resource_set *kds_rset;
+	bool kds_dep_satisfied;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_SYNC
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		/* This points to the dma-buf fence for this atom. If this is
+		 * NULL then there is no fence for this atom and the other
+		 * fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+		struct fence *fence;
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;	    /**< core requirements */
+	/** Job Slot to retry submitting to if submission from IRQ handler failed
+	 *
+	 * NOTE: see if this can be unified into the another member e.g. the event */
+	int retry_submit_on_slot;
+
+	union kbasep_js_policy_job_info sched_info;
+	/* JS atom priority with respect to other atoms on its kctx. */
+	int sched_priority;
+
+	int poking;		/* BASE_HW_ISSUE_8316 */
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+	int slot_nr;
+
+	u32 atom_flags;
+
+	/* Number of times this atom has been retried. Used by replay soft job.
+	 */
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	u64 need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	/* Pointer to atom that this atom has cross-slot dependency on */
+	struct kbase_jd_atom *x_pre_dep;
+	/* Pointer to atom that has cross-slot dependency on this atom */
+	struct kbase_jd_atom *x_post_dep;
+
+	/* The GPU's flush count recorded at the time of submission, used for
+	 * the cache flush optimisation */
+	u32 flush_id;
+
+	struct kbase_jd_atom_backend backend;
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	struct list_head queue;
+
+	struct kbase_va_region *jit_addr_reg;
+
+	/* If non-zero, this indicates that the atom will fail with the set
+	 * event_code when the atom is processed. */
+	enum base_jd_event_code will_fail_event_code;
+};
+
+static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_SECURE);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	/** Tracks all job-dispatch jobs.  This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	u32 job_nr;
+
+	/** Waitq that reflects whether there are no jobs (including SW-only
+	 * dependency jobs). This is set when no jobs are present on the ctx,
+	 * and clear when there are jobs.
+	 *
+	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+	 * and SW-only dependency jobs.
+	 *
+	 * This waitq can be waited upon to find out when the context jobs are all
+	 * done/cancelled (including those that might've been blocked on
+	 * dependencies) - and so, whether it can be terminated. However, it should
+	 * only be terminated once it is neither present in the policy-queue (see
+	 * kbasep_js_policy_try_evict_ctx() ) nor the run-pool (see
+	 * kbasep_js_kctx_info::ctx::is_scheduled).
+	 *
+	 * Since the waitq is only set under kbase_jd_context::lock,
+	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+	 * guarentee that the setter has completed its work on the kbase_context
+	 *
+	 * This must be updated atomically with:
+	 * - kbase_jd_context::job_nr */
+	wait_queue_head_t zero_jobs_wait;
+
+	/** Job Done workqueue. */
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_KDS
+	struct kds_callback kds_cb;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+	int number;
+
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	u32 fault_status;
+	u64 fault_addr;
+	u64 fault_extra_addr;
+	struct mutex transaction_mutex;
+
+	struct kbase_mmu_setup current_setup;
+
+	/* BASE_HW_ISSUE_8316  */
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
+	int poke_refcount;
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold kbasep_js_device_data::runpool_irq::lock when
+	 * accessing this */
+	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - true indicates that the transition is ongoing
+	 * Expected to be protected by pm.power_change_lock */
+	bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Lock protecting the power state of the device.
+	 *
+	 * This lock must be held when accessing the shader_available_bitmap,
+	 * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and
+	 * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition
+	 * and shader_poweroff_pending fields of kbase_pm_device_data. It is
+	 * also held when the hardware power registers are being written to, to
+	 * ensure that two threads do not conflict over the power transitions
+	 * that the hardware should make.
+	 */
+	spinlock_t power_change_lock;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_secure_ops - Platform specific functions for GPU secure mode
+ * operations
+ * @secure_mode_enable:  Callback to enable secure mode on the GPU
+ * @secure_mode_disable: Callback to disable secure mode on the GPU
+ */
+struct kbase_secure_ops {
+	/**
+	 * secure_mode_enable() - Enable secure mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*secure_mode_enable)(struct kbase_device *kbdev);
+
+	/**
+	 * secure_mode_disable() - Disable secure mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*secure_mode_disable)(struct kbase_device *kbdev);
+};
+
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:     Kbase device where memory is used
+ * @cur_size:  Number of free pages currently in the pool (may exceed @max_size
+ *             in some corner cases)
+ * @max_size:  Maximum number of free pages in the pool
+ * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
+ *             and @page_list
+ * @page_list: List of free pages in the pool
+ * @reclaim:   Shrinker for kernel reclaim of free pages
+ * @next_pool: Pointer to next pool where pages can be allocated when this pool
+ *             is empty. Pages will spill over to the next pool when this pool
+ *             is full. Can be NULL if there is no next pool.
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+};
+
+
+#define DEVNAME_SIZE	16
+
+struct kbase_device {
+	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+
+	struct clk *clock;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif	/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool mem_pool;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	/** List of SW workarounds for HW issues */
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	/** List of features available */
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	/* Bitmaps of cores that are currently in use (running jobs).
+	 * These should be kept up to date by the job scheduler.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 *
+	 * kbase_pm_check_transitions_nolock() should be called when bits are
+	 * cleared to update the power management system and allow transitions to
+	 * occur. */
+	u64 shader_inuse_bitmap;
+
+	/* Refcount for cores in use */
+	u32 shader_inuse_cnt[64];
+
+	/* Bitmaps of cores the JS needs for jobs ready to run */
+	u64 shader_needed_bitmap;
+
+	/* Refcount for cores needed */
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	/* struct for keeping track of the disjoint information
+	 *
+	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
+	 * The count is the number of disjoint events that have occurred on the GPU
+	 */
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+	u32 l2_users_count;
+
+	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+	 * submitting new jobs to any cores that are not marked as available.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 */
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
+	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+
+	/* Structure used for instrumentation and HW counters dumping */
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_context *suspended_kctx;
+		struct kbase_uk_hwcnt_setup suspended_state;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	/*value to be written to the irq_throttle register each time an irq is served */
+	atomic_t irq_throttle_cycles;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	/* This is used to override the current job scheduler values for
+	 * JS_SCHEDULING_PERIOD_NS
+	 * JS_SOFT_STOP_TICKS
+	 * JS_SOFT_STOP_TICKS_CL
+	 * JS_HARD_STOP_TICKS_SS
+	 * JS_HARD_STOP_TICKS_CL
+	 * JS_HARD_STOP_TICKS_DUMPING
+	 * JS_RESET_TICKS_SS
+	 * JS_RESET_TICKS_CL
+	 * JS_RESET_TICKS_DUMPING.
+	 *
+	 * These values are set via the js_timeouts sysfs file.
+	 */
+	u32 js_scheduling_period_ns;
+	int js_soft_stop_ticks;
+	int js_soft_stop_ticks_cl;
+	int js_hard_stop_ticks_ss;
+	int js_hard_stop_ticks_cl;
+	int js_hard_stop_ticks_dumping;
+	int js_reset_ticks_ss;
+	int js_reset_ticks_cl;
+	int js_reset_ticks_dumping;
+	bool js_timeouts_updated;
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+	void *platform_context;
+
+	/* List of kbase_contexts created */
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct devfreq_cooling_device *devfreq_cooling;
+#endif
+#endif
+
+	struct kbase_ipa_context *ipa_ctx;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+	/*
+	 * Control for enabling job dump on failure, set when control debugfs
+	 * is opened.
+	 */
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	/* directory for debugfs entries */
+	struct dentry *mali_debugfs_directory;
+	/* Root directory for per context entry */
+	struct dentry *debugfs_ctx_directory;
+
+	/* failed job dump, used for separate debug process */
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
+	struct kbase_context *kctx_fault;
+
+#if !MALI_CUSTOMER_RELEASE
+	/* Per-device data for register dumping interface */
+	struct {
+		u16 reg_offset; /* Offset of a GPU_CONTROL register to be
+				   dumped upon request */
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	/* fbdump profiling controls set by gator */
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	/* Number of jobs that are run before a job is forced to fail and
+	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+	 * failures. */
+	int force_replay_limit;
+	/* Count of jobs between forced failures. Incremented on each job. A
+	 * job is forced to fail once this is greater than or equal to
+	 * force_replay_limit. */
+	int force_replay_count;
+	/* Core requirement for jobs to be failed and replayed. May be zero. */
+	base_jd_core_req force_replay_core_req;
+	/* true if force_replay_limit should be randomized. The random
+	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+	 */
+	bool force_replay_random;
+#endif
+
+	/* Total number of created contexts */
+	atomic_t ctx_num;
+
+	struct kbase_hwaccess_data hwaccess;
+
+	/* Count of page/bus faults waiting for workqueues to process */
+	atomic_t faults_pending;
+
+	/* true if GPU is powered off or power off operation is in progress */
+	bool poweroff_pending;
+
+
+	/* defaults for new context created for this device */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	size_t mem_pool_max_size_default;
+
+	/* system coherency mode  */
+	u32 system_coherency;
+	/* Flag to track when cci snoops have been enabled on the interface */
+	bool cci_snoop_enabled;
+
+	/* SMC function IDs to call into Trusted firmware to enable/disable
+	 * cache snooping. Value of 0 indicates that they are not used
+	 */
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	/* Secure operations */
+	struct kbase_secure_ops *secure_ops;
+
+	/*
+	 * true when GPU is put into secure mode
+	 */
+	bool secure_mode;
+
+	/*
+	 * true if secure mode is supported
+	 */
+	bool secure_mode_support;
+
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_queue_head_t driver_inactive_wait;
+	bool driver_inactive;
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	/*
+	 * Bus logger integration.
+	 */
+	struct bus_logger_client *buslogger;
+#endif
+	/* Boolean indicating if an IRQ flush during reset is in progress. */
+	bool irq_reset_flush;
+
+	/* list of inited sub systems. Used during terminate/error recovery */
+	u32 inited_subsys;
+};
+
+/* JSCTX ringbuffer size will always be a power of 2. The idx shift must be:
+   - >=2 (buffer size -> 4)
+   - <= 9 (buffer size 2^(9-1)=256) (technically, 10 works for the ringbuffer
+				but this is unnecessary as max atoms is 256)
+ */
+#define JSCTX_RB_IDX_SHIFT (8U)
+#if ((JSCTX_RB_IDX_SHIFT < 2) || ((3 * JSCTX_RB_IDX_SHIFT) >= 32))
+#error "Invalid ring buffer size for 32bit atomic."
+#endif
+#define JSCTX_RB_SIZE (1U << (JSCTX_RB_IDX_SHIFT - 1U)) /* 1 bit for overflow */
+#define JSCTX_RB_SIZE_STORE (1U << JSCTX_RB_IDX_SHIFT)
+#define JSCTX_RB_MASK (JSCTX_RB_SIZE - 1U)
+#define JSCTX_RB_MASK_STORE (JSCTX_RB_SIZE_STORE - 1U)
+
+#define JSCTX_WR_OFFSET         (0U)
+#define JSCTX_RN_OFFSET         (JSCTX_WR_OFFSET   + JSCTX_RB_IDX_SHIFT)
+#define JSCTX_RD_OFFSET         (JSCTX_RN_OFFSET + JSCTX_RB_IDX_SHIFT)
+
+/**
+ * struct jsctx_rb_entry - Ringbuffer entry in &struct jsctx_queue.
+ * @atom_id: Atom ID
+ */
+struct jsctx_rb_entry {
+	u16 atom_id;
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue, containing both ring buffer and linked list.
+ * @entries:     Array of size %JSCTX_RB_SIZE which holds the &struct
+ *               kbase_jd_atom pointers which make up the contents of the ring
+ *               buffer.
+ * @indicies:    An atomic variable containing indicies for the ring buffer.
+ *               Indicies are of size JSCTX_RB_IDX_SHIFT.
+ *               The following are contained:
+ *                - WR_IDX - Write index. Index of the NEXT slot to be written.
+ *                - RN_IDX - Running index. Index of the tail of the list.
+ *                           This is the atom that has been running the longest.
+ *                - RD_IDX - Read index. Index of the next atom to be pulled.
+ * @queue_head:  Head item of the linked list queue.
+ *
+ * Locking:
+ * The linked list assumes jctx.lock is held.
+ * The ringbuffer serves as an intermediary between irq context and non-irq
+ * context, without the need for the two to share any lock. irq context can
+ * pull (and unpull) and only requires the runpool_irq.lock. While non-irq
+ * context can add and remove and only requires holding only jctx.lock.
+ * Error handling affecting both, or the whole ringbuffer in general, must
+ * hold both locks or otherwise ensure (f.ex deschedule/kill) only that thread
+ * is accessing the buffer.
+ * This means that RD_IDX is updated by irq-context (pull and unpull) and must
+ * hold runpool_irq.lock. While WR_IDX (add) and RN_IDX (remove) is updated by
+ * non-irq context and must hold jctx.lock.
+ * Note that pull (or sister function peek) must also access WR_IDX to ensure
+ * there is free space in the buffer, this is ok as WR_IDX is only increased.
+ * A similar situation is apparent with unpull and RN_IDX, but only one atom
+ * (already pulled) can cause either remove or unpull, so this will never
+ * conflict.
+ *
+ * &struct jsctx_queue is a queue of &struct kbase_jd_atom,
+ * part ringbuffer and part linked list.
+ */
+struct jsctx_queue {
+	struct jsctx_rb_entry entries[JSCTX_RB_SIZE];
+
+	atomic_t indicies;
+
+	struct list_head queue_head;
+};
+
+
+
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	int id; /* System wide unique id */
+	unsigned long api_version;
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	bool is_compat;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	u64 *mmu_teardown_pages;
+
+	struct page *aliasing_sink_page;
+
+	struct mutex            mmu_lock;
+	struct mutex            reg_lock; /* To be converted to a rwlock? */
+	struct rb_root          reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_pool mem_pool;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+	struct mutex            evict_lock;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_KDS
+	struct list_head waiting_kds_resource;
+#endif
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+	/** This is effectively part of the Run Pool, because it only has a valid
+	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+	 *
+	 * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing
+	 * this.
+	 *
+	 * If the context relating to this as_nr is required, you must use
+	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+	 * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock
+	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
+	 * you can take whilst doing this) */
+	int as_nr;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct *process_mm;
+	/* End of the SAME_VA zone */
+	u64 same_va_end;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	/* Content of mem_profile file */
+	char *mem_profile_data;
+	/* Size of @c mem_profile_data */
+	size_t mem_profile_size;
+	/* Mutex guarding memory profile state */
+	struct mutex mem_profile_lock;
+	/* Memory profile file created */
+	bool mem_profile_initialized;
+	struct dentry *kctx_dentry;
+
+	/* for job fault debug */
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	/* This list will keep the following atoms during the dump
+	 * in the same context
+	 */
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	/* Number of atoms currently pulled from this context */
+	atomic_t atoms_pulled;
+	/* Number of atoms currently pulled from this context, per slot */
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	/* true if last kick() caused atoms to be pulled from this context */
+	bool pulled;
+	/* true if infinite cache is to be enabled for new allocations. Existing
+	 * allocations will not change. bool stored as a u32 per Linux API */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active;
+#else
+	u32 infinite_cache_active;
+#endif
+	/* Bitmask of slots that can be pulled from */
+	u32 slots_pullable;
+
+	/* true if address space assignment is pending */
+	bool as_pending;
+
+	/* Backend specific data */
+	struct kbase_context_backend backend;
+
+	/* Work structure used for deferred ASID assignment */
+	struct work_struct work;
+
+	/* Only one userspace vinstr client per kbase context */
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+
+	/* Must hold queue_mutex when accessing */
+	bool ctx_active;
+
+	/* List of completed jobs waiting for events to be posted */
+	struct list_head completed_jobs;
+	/* Number of work items currently pending on job_done_wq */
+	atomic_t work_count;
+
+	/* true if context is counted in kbdev->js_data.nr_contexts_runnable */
+	bool ctx_runnable_ref;
+
+	/* Waiting soft-jobs will fail when this timer expires */
+	struct hrtimer soft_event_timeout;
+
+	/* JIT allocation management */
+	struct kbase_va_region *jit_alloc[255];
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_lock;
+	struct work_struct jit_work;
+
+	/* External sticky resource management */
+	struct list_head ext_res_meta_head;
+};
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100755
index 0000000..c55779c
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,696 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	mutex_init(&kbdev->as[i].transaction_mutex);
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work));
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+#else
+	kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+#ifdef CONFIG_MALI_DEBUG
+	init_waitqueue_head(&kbdev->driver_inactive_wait);
+#endif /* CONFIG_MALI_DEBUG */
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
+{
+	u32 ret_value = 0;
+
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		ret_value = kbdev->kbase_profiling_controls[control];
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+
+	return ret_value;
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100755
index 0000000..f70bccc
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
new file mode 100644
index 0000000..29f3324
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -0,0 +1,524 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_dma_fence_lock);
+
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static const char *
+kbase_dma_fence_get_driver_name(struct fence *fence)
+{
+	return kbase_drv_name;
+}
+
+static const char *
+kbase_dma_fence_get_timeline_name(struct fence *fence)
+{
+	return kbase_timeline_name;
+}
+
+static bool
+kbase_dma_fence_enable_signaling(struct fence *fence)
+{
+	/* If in the future we need to add code here remember to
+	 * to get a reference to the fence and release it when signaling
+	 * as stated in fence.h
+	 */
+	return true;
+}
+
+static void
+kbase_dma_fence_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static const struct fence_ops kbase_dma_fence_ops = {
+	.get_driver_name = kbase_dma_fence_get_driver_name,
+	.get_timeline_name = kbase_dma_fence_get_timeline_name,
+	.enable_signaling = kbase_dma_fence_enable_signaling,
+	/* Use the default wait */
+	.wait = fence_default_wait,
+	.fence_value_str = kbase_dma_fence_fence_value_str,
+};
+
+static struct fence *
+kbase_dma_fence_new(unsigned int context, unsigned int seqno)
+{
+	struct fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence_init(fence,
+		   &kbase_dma_fence_ops,
+		   &kbase_dma_fence_lock,
+		   context,
+		   seqno);
+
+	return fence;
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	for (r--; r >= 0; r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom)
+{
+	struct kbase_dma_fence_cb *cb, *tmp;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			atomic_dec(&katom->dma_fence.dep_count);
+		}
+
+		fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom);
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (atomic_read(&katom->dma_fence.dep_count) != 0)
+		goto out;
+
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_dma_fence_free_callbacks(katom);
+	/* Queue atom on GPU. */
+	kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+static int
+kbase_dma_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback)
+{
+	int err = 0;
+	struct kbase_dma_fence_cb *kbase_fence_cb;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	fence_get(fence);
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+
+	err = fence_add_callback(fence, &kbase_fence_cb->fence_cb, callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, clear the error and return */
+		err = 0;
+		kbase_fence_cb->fence = NULL;
+		fence_put(fence);
+		kfree(kbase_fence_cb);
+	} else if (err) {
+		/* Do nothing, just return the error */
+		fence_put(fence);
+		kfree(kbase_fence_cb);
+	} else {
+		atomic_inc(&katom->dma_fence.dep_count);
+		/* Add callback to katom's list of callbacks */
+		list_add(&katom->dma_fence.callbacks, &kbase_fence_cb->node);
+	}
+
+	return err;
+}
+
+static void
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+{
+	struct kbase_dma_fence_cb *kcb = container_of(cb,
+				struct kbase_dma_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+	if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
+		bool ret;
+
+		INIT_WORK(&katom->work, kbase_dma_fence_work);
+		ret = queue_work(kctx->dma_fence.wq, &katom->work);
+		/* Warn if work was already queued, that should not happen. */
+		WARN_ON(!ret);
+	}
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct reservation_object *resv,
+					 bool exclusive)
+{
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_dma_fence_add_callback(katom,
+						   excl_fence,
+						   kbase_dma_fence_cb);
+		if (err)
+			goto error;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_dma_fence_add_callback(katom,
+							   shared_fences[i],
+							   kbase_dma_fence_cb);
+			if (err)
+				goto error;
+		}
+	}
+	kfree(shared_fences);
+
+	return err;
+
+error:
+	/* Cancel and clean up all callbacks that was set up before the error.
+	 */
+	kbase_dma_fence_free_callbacks(katom);
+	kfree(shared_fences);
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+	struct fence *fence;
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	atomic_set(&katom->dma_fence.dep_count, 1);
+	fence = kbase_dma_fence_new(katom->dma_fence.context,
+				    atomic_inc_return(&katom->dma_fence.seqno));
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	katom->dma_fence.fence = fence;
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct reservation_object *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = reservation_object_reserve_shared(obj);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_shared_fence(obj, katom->dma_fence.fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_excl_fence(obj, katom->dma_fence.fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (!err) {
+		/* Test if the callbacks are already triggered */
+		if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
+			atomic_set(&katom->dma_fence.dep_count, -1);
+			kbase_dma_fence_free_callbacks(katom);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom, *katom_tmp;
+
+	list_for_each_entry_safe(katom, katom_tmp,
+				 &kctx->dma_fence.waiting_resource, queue) {
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == -1);
+
+	/* Signal the atom's fence. */
+	fence_signal(katom->dma_fence.fence);
+	fence_put(katom->dma_fence.fence);
+	katom->dma_fence.fence = NULL;
+
+	kbase_dma_fence_free_callbacks(katom);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
new file mode 100644
index 0000000..3b0a69b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
@@ -0,0 +1,150 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/reservation.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_cb - Mali dma-fence callback data struct
+ * @fence_cb: Callback function
+ * @katom:    Pointer to katom that is waiting on this callback
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @node:     List head for linking this callback to the katom
+ */
+struct kbase_dma_fence_cb {
+	struct fence_cb fence_cb;
+	struct kbase_jd_atom *katom;
+	struct fence *fence;
+	struct list_head node;
+};
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct reservation_object **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_waiters_remove()- Remove katom from dma-fence wait list
+ * @katom: Pointer to katom to remove from list
+ */
+void kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom);
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100755
index 0000000..bf8c304
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	kbase_tlstream_tl_nret_atom_ctx(katom, kctx);
+	kbase_tlstream_tl_del_atom(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100755
index 0000000..ce65b55
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100755
index 0000000..b9077ea
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,330 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+#include "mali_kbase_instr.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
+};
+
+static void dump_worker(struct work_struct *work);
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+	uint32_t gpu_id;
+	uint32_t product_id;
+	uint32_t count;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	gpu_id = kbdev->gpu_props.props.core_props.product_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n",
+				gpu_id);
+			break;
+		}
+	} else {
+		switch (gpu_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n",
+				gpu_id);
+			break;
+		}
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	*total_counters = count;
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	uint32_t dump_size = 0, i = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
+		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+	in_out_info->size = dump_size;
+
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
+
+	return hand;
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+
+	if (opaque_handles && success) {
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		schedule_work(&opaque_handles->dump_work);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100755
index 0000000..ef9ac0f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100755
index 0000000..c247dd6
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2163 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MISS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MISS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MISS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MISS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MISS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MISS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MISS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#include "mali_kbase_gator_hwcnt_names_tmix.h"
+
+
+
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
new file mode 100644
index 0000000..682f98e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+
+static const char * const hardware_counters_mali_tMIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MESSAGES_SENT",
+	"TMIx_MESSAGES_RECEIVED",
+	"TMIx_GPU_ACTIVE",
+	"TMIx_IRQ_ACTIVE",
+	"TMIx_JS0_JOBS",
+	"TMIx_JS0_TASKS",
+	"TMIx_JS0_ACTIVE",
+	"",
+	"TMIx_JS0_WAIT_READ",
+	"TMIx_JS0_WAIT_ISSUE",
+	"TMIx_JS0_WAIT_DEPEND",
+	"TMIx_JS0_WAIT_FINISH",
+	"TMIx_JS1_JOBS",
+	"TMIx_JS1_TASKS",
+	"TMIx_JS1_ACTIVE",
+	"",
+	"TMIx_JS1_WAIT_READ",
+	"TMIx_JS1_WAIT_ISSUE",
+	"TMIx_JS1_WAIT_DEPEND",
+	"TMIx_JS1_WAIT_FINISH",
+	"TMIx_JS2_JOBS",
+	"TMIx_JS2_TASKS",
+	"TMIx_JS2_ACTIVE",
+	"",
+	"TMIx_JS2_WAIT_READ",
+	"TMIx_JS2_WAIT_ISSUE",
+	"TMIx_JS2_WAIT_DEPEND",
+	"TMIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_TILER_ACTIVE",
+	"TMIx_JOBS_PROCESSED",
+	"TMIx_TRIANGLES",
+	"TMIx_LINES",
+	"TMIx_POINTS",
+	"TMIx_FRONT_FACING",
+	"TMIx_BACK_FACING",
+	"TMIx_PRIM_VISIBLE",
+	"TMIx_PRIM_CULLED",
+	"TMIx_PRIM_CLIPPED",
+	"TMIx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"TMIx_BUS_READ",
+	"",
+	"TMIx_BUS_WRITE",
+	"TMIx_LOADING_DESC",
+	"TMIx_IDVS_POS_SHAD_REQ",
+	"TMIx_IDVS_POS_SHAD_WAIT",
+	"TMIx_IDVS_POS_SHAD_STALL",
+	"TMIx_IDVS_POS_FIFO_FULL",
+	"TMIx_PREFETCH_STALL",
+	"TMIx_VCACHE_HIT",
+	"TMIx_VCACHE_MISS",
+	"TMIx_VCACHE_LINE_WAIT",
+	"TMIx_VFETCH_POS_READ_WAIT",
+	"TMIx_VFETCH_VERTEX_WAIT",
+	"TMIx_VFETCH_STALL",
+	"TMIx_PRIMASSY_STALL",
+	"TMIx_BBOX_GEN_STALL",
+	"TMIx_IDVS_VBU_HIT",
+	"TMIx_IDVS_VBU_MISS",
+	"TMIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TMIx_IDVS_VAR_SHAD_REQ",
+	"TMIx_IDVS_VAR_SHAD_STALL",
+	"TMIx_BINNER_STALL",
+	"TMIx_ITER_STALL",
+	"TMIx_COMPRESS_MISS",
+	"TMIx_COMPRESS_STALL",
+	"TMIx_PCACHE_HIT",
+	"TMIx_PCACHE_MISS",
+	"TMIx_PCACHE_MISS_STALL",
+	"TMIx_PCACHE_EVICT_STALL",
+	"TMIx_PMGR_PTR_WR_STALL",
+	"TMIx_PMGR_PTR_RD_STALL",
+	"TMIx_PMGR_CMD_WR_STALL",
+	"TMIx_WRBUF_ACTIVE",
+	"TMIx_WRBUF_HIT",
+	"TMIx_WRBUF_MISS",
+	"TMIx_WRBUF_NO_FREE_LINE_STALL",
+	"TMIx_WRBUF_NO_AXI_ID_STALL",
+	"TMIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TMIx_UTLB_TRANS",
+	"TMIx_UTLB_TRANS_HIT",
+	"TMIx_UTLB_TRANS_STALL",
+	"TMIx_UTLB_TRANS_MISS_DELAY",
+	"TMIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_FRAG_ACTIVE",
+	"TMIx_FRAG_PRIMITIVES",
+	"TMIx_FRAG_PRIM_RAST",
+	"TMIx_FRAG_FPK_ACTIVE",
+	"TMIx_FRAG_STARVING",
+	"TMIx_FRAG_WARPS",
+	"TMIx_FRAG_PARTIAL_WARPS",
+	"TMIx_FRAG_QUADS_RAST",
+	"TMIx_FRAG_QUADS_EZS_TEST",
+	"TMIx_FRAG_QUADS_EZS_UPDATE",
+	"TMIx_FRAG_QUADS_EZS_KILL",
+	"TMIx_FRAG_LZS_TEST",
+	"TMIx_FRAG_LZS_KILL",
+	"",
+	"TMIx_FRAG_PTILES",
+	"TMIx_FRAG_TRANS_ELIM",
+	"TMIx_QUAD_FPK_KILLER",
+	"",
+	"TMIx_COMPUTE_ACTIVE",
+	"TMIx_COMPUTE_TASKS",
+	"TMIx_COMPUTE_WARPS",
+	"TMIx_COMPUTE_STARVING",
+	"TMIx_EXEC_CORE_ACTIVE",
+	"TMIx_EXEC_ACTIVE",
+	"TMIx_EXEC_INSTR_COUNT",
+	"TMIx_EXEC_INSTR_DIVERGED",
+	"TMIx_EXEC_INSTR_STARVING",
+	"TMIx_ARITH_INSTR_SINGLE_FMA",
+	"TMIx_ARITH_INSTR_DOUBLE",
+	"TMIx_ARITH_INSTR_MSG",
+	"TMIx_ARITH_INSTR_MSG_ONLY",
+	"TMIx_TEX_INSTR",
+	"TMIx_TEX_INSTR_MIPMAP",
+	"TMIx_TEX_INSTR_COMPRESSED",
+	"TMIx_TEX_INSTR_3D",
+	"TMIx_TEX_INSTR_TRILINEAR",
+	"TMIx_TEX_COORD_ISSUE",
+	"TMIx_TEX_COORD_STALL",
+	"TMIx_TEX_STARVE_CACHE",
+	"TMIx_TEX_STARVE_FILTER",
+	"TMIx_LS_MEM_READ_FULL",
+	"TMIx_LS_MEM_READ_SHORT",
+	"TMIx_LS_MEM_WRITE_FULL",
+	"TMIx_LS_MEM_WRITE_SHORT",
+	"TMIx_LS_MEM_ATOMIC",
+	"TMIx_VARY_INSTR",
+	"",
+	"",
+	"TMIx_ATTR_INSTR",
+	"TMIx_ARITH_INSTR_FP_MUL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_RD_MSG_IN",
+	"TMIx_L2_RD_MSG_IN_STALL",
+	"TMIx_L2_WR_MSG_IN",
+	"TMIx_L2_WR_MSG_IN_STALL",
+	"TMIx_L2_SNP_MSG_IN",
+	"TMIx_L2_SNP_MSG_IN_STALL",
+	"TMIx_L2_RD_MSG_OUT",
+	"TMIx_L2_RD_MSG_OUT_STALL",
+	"TMIx_L2_WR_MSG_OUT",
+	"TMIx_L2_ANY_LOOKUP",
+	"TMIx_L2_READ_LOOKUP",
+	"TMIx_L2_WRITE_LOOKUP",
+	"TMIx_L2_EXT_SNOOP_LOOKUP",
+	"TMIx_L2_EXT_READ",
+	"TMIx_L2_EXT_READ_NOSNP",
+	"TMIx_L2_EXT_READ_UNIQUE",
+	"TMIx_L2_EXT_READ_BEATS",
+	"TMIx_L2_EXT_AR_STALL",
+	"TMIx_L2_EXT_AR_CNT_Q1",
+	"TMIx_L2_EXT_AR_CNT_Q2",
+	"TMIx_L2_EXT_AR_CNT_Q3",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_EXT_WRITE",
+	"TMIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TMIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TMIx_L2_EXT_WRITE_SNP_FULL",
+	"TMIx_L2_EXT_WRITE_SNP_PTL",
+	"TMIx_L2_EXT_WRITE_BEATS",
+	"TMIx_L2_EXT_W_STALL",
+	"TMIx_L2_EXT_AW_CNT_Q1",
+	"TMIx_L2_EXT_AW_CNT_Q2",
+	"TMIx_L2_EXT_AW_CNT_Q3",
+	"TMIx_L2_EXT_SNOOP",
+	"TMIx_L2_EXT_SNOOP_STALL",
+	"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TMIx_L2_EXT_SNOOP_RESP_DATA",
+	"TMIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100644
index 0000000..a962ecb
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100755
index 0000000..82f4c36
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_gpu_memory_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100755
index 0000000..3cf30a4
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_H
+#define _KBASE_GPU_MEMORY_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_H*/
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100755
index 0000000..7f77dba
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,314 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
+{
+	kbase_gpu_clk_speed_func get_gpu_speed_mhz;
+	u32 gpu_speed_mhz;
+	int rc = 1;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kbase_props);
+
+	/* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
+	 * If that function fails, or the function is not provided by the system integrator, we report the maximum
+	 * GPU speed as specified by GPU_FREQ_KHZ_MAX.
+	 */
+	get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
+	if (get_gpu_speed_mhz != NULL) {
+		rc = get_gpu_speed_mhz(&gpu_speed_mhz);
+#ifdef CONFIG_MALI_DEBUG
+		/* Issue a warning message when the reported GPU speed falls outside the min/max range */
+		if (rc == 0) {
+			u32 gpu_speed_khz = gpu_speed_mhz * 1000;
+
+			if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
+					gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
+				dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
+						(unsigned long)gpu_speed_khz,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+		}
+#endif				/* CONFIG_MALI_DEBUG */
+	}
+	if (kctx->kbdev->clock) {
+		gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
+		rc = 0;
+	}
+	if (rc != 0)
+		gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
+
+	kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
+
+	memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
+
+	/* Before API 8.2 they expect L3 cache info here, which was always 0 */
+	if (kctx->api_version < KBASE_API_VERSION(8, 2))
+		kbase_props->props.raw_props.suspend_size = 0;
+
+	return 0;
+}
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100755
index 0000000..f3c95cc
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * @brief Provide GPU properties to userside through UKU call.
+ *
+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
+ *
+ * @param kctx		The struct kbase_context structure
+ * @param kbase_props	A copy of the struct kbase_uk_gpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100755
index 0000000..f42e91b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/**
+	 * Implementation specific irq throttle value (us), should be adjusted during integration.
+	 */
+	int irq_throttle_time_us;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100755
index 0000000..de2461f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,263 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id) {
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1):
+				issues = base_hw_issues_tMIx_r0p0_05dev0;
+				break;
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2):
+				issues = base_hw_issues_tMIx_r0p0;
+				break;
+			default:
+				if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_TMIX) {
+					issues = base_hw_issues_tMIx_r0p0;
+				} else {
+					dev_err(kbdev->dev,
+						"Unknown GPU ID %x", gpu_id);
+					return -EINVAL;
+				}
+			}
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT);
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100755
index 0000000..fce7d29
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * @brief Set the HW issues mask depending on the GPU ID
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100755
index 0000000..b09be99
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100755
index 0000000..261453e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/* The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx;
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100755
index 0000000..cf8a813
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100755
index 0000000..5de2b75
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @setup:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100755
index 0000000..2efa293
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,337 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_find_free_address_space() - Find a free address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * If no address spaces are currently free, then this function can evict an
+ * idle context from the runpool, freeing up the address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_gpu_release_free_address_space()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_free_address_space() - Release an address space.
+ * @kbdev:	Device pointer
+ * @as_nr:	Address space to release
+ *
+ * The address space must have been returned by
+ * kbase_gpu_find_free_address_space().
+ */
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+						int as_nr);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned. If kctx->as_pending
+ *         is true then ASID assignment will complete at some point in the
+ *         future and will re-start scheduling, otherwise no ASIDs are available
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold as->transaction_mutex and runpool_irq.lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold as->transaction_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ * @affinity:      Affinity of atom
+ * @coreref_state: Coreref state of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ *				  @js
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100755
index 0000000..bae7c0d
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+int set_policy_by_name(struct kbase_device *kbdev, const char *name);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100755
index 0000000..89d26ea
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100755
index 0000000..cf7bf1b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c
new file mode 100755
index 0000000..fda317b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+
+void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(!kbdev->hwcnt.suspended_kctx);
+
+	kctx = kbdev->hwcnt.kctx;
+	kbdev->hwcnt.suspended_kctx = kctx;
+
+	/* Relevant state was saved into hwcnt.suspended_state when enabling the
+	 * counters */
+
+	if (kctx) {
+		KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED);
+		kbase_instr_hwcnt_disable(kctx);
+	}
+}
+
+void kbase_instr_hwcnt_resume(struct kbase_device *kbdev)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	kctx = kbdev->hwcnt.suspended_kctx;
+	kbdev->hwcnt.suspended_kctx = NULL;
+
+	if (kctx) {
+		int err;
+
+		err = kbase_instr_hwcnt_enable_internal(kbdev, kctx,
+						&kbdev->hwcnt.suspended_state);
+		WARN(err, "Failed to restore instrumented hardware counters on resume\n");
+	}
+}
+
+int kbase_instr_hwcnt_enable(struct kbase_context *kctx,
+		struct kbase_uk_hwcnt_setup *setup)
+{
+	struct kbase_device *kbdev;
+	int err;
+
+	kbdev = kctx->kbdev;
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, setup);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_enable);
+
+int kbase_instr_hwcnt_disable(struct kbase_context *kctx)
+{
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err)
+		goto out;
+
+	/* Release the context. This had its own Power Manager Active reference
+	 */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_disable);
+
+int kbase_instr_hwcnt_dump(struct kbase_context *kctx)
+{
+	int err;
+
+	err = kbase_instr_hwcnt_request_dump(kctx);
+	if (err)
+		return err;
+
+	err = kbase_instr_hwcnt_wait_for_dump(kctx);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump);
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.h
new file mode 100755
index 0000000..ac3355e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.h
@@ -0,0 +1,75 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Instrumentation API definitions
+ */
+
+#ifndef _KBASE_INSTR_H_
+#define _KBASE_INSTR_H_
+
+#include <mali_kbase_hwaccess_instr.h>
+
+/**
+ * kbase_instr_hwcnt_enable() - Enable HW counters collection
+ * @kctx:	Kbase context
+ * @setup:	&struct kbase_uk_hwcnt_setup containing configuration
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable(struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable() - Disable HW counters collection
+ * @kctx:	Kbase context
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump() - Trigger dump of HW counters and wait for
+ *                            completion
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_suspend() - GPU is suspending, stop HW counter collection
+ * @kbdev:	Kbase device
+ *
+ * It's assumed that there's only one privileged context.
+ *
+ * Safe to do this without lock when doing an OS suspend, because it only
+ * changes in response to user-space IOCTLs
+ */
+void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_resume() - GPU is resuming, resume HW counter collection
+ * @kbdev:	Kbase device
+ */
+void kbase_instr_hwcnt_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
new file mode 100755
index 0000000..c579d0a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
@@ -0,0 +1,431 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/of.h>
+#include <linux/sysfs.h>
+
+#include <mali_kbase.h>
+
+#define NR_IPA_GROUPS 8
+
+struct kbase_ipa_context;
+
+/**
+ * struct ipa_group - represents a single IPA group
+ * @name:               name of the IPA group
+ * @capacitance:        capacitance constant for IPA group
+ * @calc_power:         function to calculate power for IPA group
+ */
+struct ipa_group {
+	const char *name;
+	u32 capacitance;
+	u32 (*calc_power)(struct kbase_ipa_context *,
+			struct ipa_group *);
+};
+
+#include <mali_kbase_ipa_tables.h>
+
+/**
+ * struct kbase_ipa_context - IPA context per device
+ * @kbdev:              pointer to kbase device
+ * @groups:             array of IPA groups for this context
+ * @vinstr_cli:         vinstr client handle
+ * @vinstr_buffer:      buffer to dump hardware counters onto
+ * @ipa_lock:           protects the entire IPA context
+ */
+struct kbase_ipa_context {
+	struct kbase_device *kbdev;
+	struct ipa_group groups[NR_IPA_GROUPS];
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct mutex ipa_lock;
+};
+
+static ssize_t show_ipa_group(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_ipa_context *ctx = kbdev->ipa_ctx;
+	ssize_t count = -EINVAL;
+	size_t i;
+
+	mutex_lock(&ctx->ipa_lock);
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		if (!strcmp(ctx->groups[i].name, attr->attr.name)) {
+			count = snprintf(buf, PAGE_SIZE, "%lu\n",
+				(unsigned long)ctx->groups[i].capacitance);
+			break;
+		}
+	}
+	mutex_unlock(&ctx->ipa_lock);
+	return count;
+}
+
+static ssize_t set_ipa_group(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_ipa_context *ctx = kbdev->ipa_ctx;
+	unsigned long capacitance;
+	size_t i;
+	int err;
+
+	err = kstrtoul(buf, 0, &capacitance);
+	if (err < 0)
+		return err;
+	if (capacitance > U32_MAX)
+		return -ERANGE;
+
+	mutex_lock(&ctx->ipa_lock);
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		if (!strcmp(ctx->groups[i].name, attr->attr.name)) {
+			ctx->groups[i].capacitance = capacitance;
+			mutex_unlock(&ctx->ipa_lock);
+			return count;
+		}
+	}
+	mutex_unlock(&ctx->ipa_lock);
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(group0, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group1, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group2, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group3, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group4, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group5, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group6, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group7, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+
+static struct attribute *kbase_ipa_attrs[] = {
+	&dev_attr_group0.attr,
+	&dev_attr_group1.attr,
+	&dev_attr_group2.attr,
+	&dev_attr_group3.attr,
+	&dev_attr_group4.attr,
+	&dev_attr_group5.attr,
+	&dev_attr_group6.attr,
+	&dev_attr_group7.attr,
+	NULL,
+};
+
+static struct attribute_group kbase_ipa_attr_group = {
+	.name = "ipa",
+	.attrs = kbase_ipa_attrs,
+};
+
+static void init_ipa_groups(struct kbase_ipa_context *ctx)
+{
+	memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups));
+}
+
+#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	struct device_node *np, *child;
+	struct ipa_group *group;
+	size_t nr_groups;
+	size_t i;
+	int err;
+
+	np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups");
+	if (!np)
+		return 0;
+
+	nr_groups = 0;
+	for_each_available_child_of_node(np, child)
+		nr_groups++;
+	if (!nr_groups || nr_groups > ARRAY_SIZE(ctx->groups)) {
+		dev_err(kbdev->dev, "invalid number of IPA groups: %zu", nr_groups);
+		err = -EINVAL;
+		goto err0;
+	}
+
+	for_each_available_child_of_node(np, child) {
+		const char *name;
+		u32 capacitance;
+
+		name = of_get_property(child, "label", NULL);
+		if (!name) {
+			dev_err(kbdev->dev, "label missing for IPA group");
+			err = -EINVAL;
+			goto err0;
+		}
+		err = of_property_read_u32(child, "capacitance",
+				&capacitance);
+		if (err < 0) {
+			dev_err(kbdev->dev, "capacitance missing for IPA group");
+			goto err0;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+			group = &ctx->groups[i];
+			if (!strcmp(group->name, name)) {
+				group->capacitance = capacitance;
+				break;
+			}
+		}
+	}
+
+	of_node_put(np);
+	return 0;
+err0:
+	of_node_put(np);
+	return err;
+}
+#else
+static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
+{
+	return 0;
+}
+#endif
+
+static int reset_ipa_groups(struct kbase_ipa_context *ctx)
+{
+	init_ipa_groups(ctx);
+	return update_ipa_groups_from_dt(ctx);
+}
+
+static inline u32 read_hwcnt(struct kbase_ipa_context *ctx,
+	u32 offset)
+{
+	u8 *p = ctx->vinstr_buffer;
+
+	return *(u32 *)&p[offset];
+}
+
+static inline u32 add_saturate(u32 a, u32 b)
+{
+	if (U32_MAX - a < b)
+		return U32_MAX;
+	return a + b;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c'
+ * across all shader cores.
+ */
+static u32 calc_power_sc_single(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	u64 core_mask;
+	u32 base = 0, r = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			u64 n = read_hwcnt(ctx, base + c);
+			u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+			u32 s = group->capacitance;
+
+			r = add_saturate(r, div_u64(n * s, d));
+		}
+		base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+		core_mask >>= 1;
+	}
+	return r;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c1'
+ * and `c2' across all shader cores.
+ */
+static u32 calc_power_sc_double(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c1, u32 c2)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	u64 core_mask;
+	u32 base = 0, r = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			u64 n = read_hwcnt(ctx, base + c1);
+			u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+			u32 s = group->capacitance;
+
+			r = add_saturate(r, div_u64(n * s, d));
+			n = read_hwcnt(ctx, base + c2);
+			r = add_saturate(r, div_u64(n * s, d));
+		}
+		base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+		core_mask >>= 1;
+	}
+	return r;
+}
+
+static u32 calc_power_single(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c)
+{
+	u64 n = read_hwcnt(ctx, c);
+	u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+	u32 s = group->capacitance;
+
+	return div_u64(n * s, d);
+}
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_single(ctx, group, L2_ANY_LOOKUP);
+}
+
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_single(ctx, group, TILER_ACTIVE);
+}
+
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, FRAG_ACTIVE);
+}
+
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_double(ctx, group, VARY_SLOT_32,
+			VARY_SLOT_16);
+}
+
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, TEX_COORD_ISSUE);
+}
+
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, EXEC_INSTR_COUNT);
+}
+
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_double(ctx, group, BEATS_RD_LSC,
+			BEATS_WR_LSC);
+}
+
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, EXEC_CORE_ACTIVE);
+}
+
+static int attach_vinstr(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	size_t dump_size;
+
+	dump_size = kbase_vinstr_dump_size(kbdev);
+	ctx->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!ctx->vinstr_buffer) {
+		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+		return -1;
+	}
+
+	setup.jm_bm = ~0u;
+	setup.shader_bm = ~0u;
+	setup.tiler_bm = ~0u;
+	setup.mmu_l2_bm = ~0u;
+	ctx->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
+			&setup, ctx->vinstr_buffer);
+	if (!ctx->vinstr_cli) {
+		dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
+		kfree(ctx->vinstr_buffer);
+		ctx->vinstr_buffer = NULL;
+		return -1;
+	}
+	return 0;
+}
+
+static void detach_vinstr(struct kbase_ipa_context *ctx)
+{
+	if (ctx->vinstr_cli)
+		kbase_vinstr_detach_client(ctx->vinstr_cli);
+	ctx->vinstr_cli = NULL;
+	kfree(ctx->vinstr_buffer);
+	ctx->vinstr_buffer = NULL;
+}
+
+struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_context *ctx;
+	int err;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	mutex_init(&ctx->ipa_lock);
+	ctx->kbdev = kbdev;
+
+	err = reset_ipa_groups(ctx);
+	if (err < 0)
+		goto err0;
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
+	if (err < 0)
+		goto err0;
+
+	return ctx;
+err0:
+	kfree(ctx);
+	return NULL;
+}
+
+void kbase_ipa_term(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+
+	detach_vinstr(ctx);
+	sysfs_remove_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
+	kfree(ctx);
+}
+
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err)
+{
+	struct ipa_group *group;
+	u32 power = 0;
+	size_t i;
+
+	mutex_lock(&ctx->ipa_lock);
+	if (!ctx->vinstr_cli) {
+		*err = attach_vinstr(ctx);
+		if (*err < 0)
+			goto err0;
+	}
+	*err = kbase_vinstr_hwc_dump(ctx->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL);
+	if (*err)
+		goto err0;
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		group = &ctx->groups[i];
+		power = add_saturate(power, group->calc_power(ctx, group));
+	}
+err0:
+	mutex_unlock(&ctx->ipa_lock);
+	return power;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_dynamic_power);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
new file mode 100755
index 0000000..e2234d1
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
@@ -0,0 +1,41 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+struct kbase_ipa_context;
+
+/**
+ * kbase_ipa_init - initialize the kbase ipa core
+ * @kbdev:      kbase device
+ *
+ * Return:      pointer to the IPA context or NULL on failure
+ */
+struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_term - terminate the kbase ipa core
+ * @ctx:        pointer to the IPA context
+ */
+void kbase_ipa_term(struct kbase_ipa_context *ctx);
+
+/**
+ * kbase_ipa_dynamic_power - calculate power
+ * @ctx:        pointer to the IPA context
+ * @err:        0 on success, negative on failure
+ *
+ * Return:      returns power consumption as mw @ 1GHz @ 1V
+ */
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
new file mode 100644
index 0000000..101abfe
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
@@ -0,0 +1,104 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#define NR_BYTES_PER_CNT  4
+#define NR_CNT_PER_BLOCK 64
+
+#define JM_BASE    (0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define TILER_BASE (1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define MMU_BASE   (2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define SC0_BASE   (3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+
+#define GPU_ACTIVE       (JM_BASE    + NR_BYTES_PER_CNT *  6)
+#define TILER_ACTIVE     (TILER_BASE + NR_BYTES_PER_CNT * 45)
+#define L2_ANY_LOOKUP    (MMU_BASE   + NR_BYTES_PER_CNT * 25)
+#define FRAG_ACTIVE      (SC0_BASE   + NR_BYTES_PER_CNT *  4)
+#define EXEC_CORE_ACTIVE (SC0_BASE   + NR_BYTES_PER_CNT * 26)
+#define EXEC_INSTR_COUNT (SC0_BASE   + NR_BYTES_PER_CNT * 28)
+#define TEX_COORD_ISSUE  (SC0_BASE   + NR_BYTES_PER_CNT * 40)
+#define VARY_SLOT_32     (SC0_BASE   + NR_BYTES_PER_CNT * 50)
+#define VARY_SLOT_16     (SC0_BASE   + NR_BYTES_PER_CNT * 51)
+#define BEATS_RD_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 56)
+#define BEATS_WR_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 61)
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+
+static struct ipa_group ipa_groups_def[] = {
+	/* L2 */
+	{
+		.name = "group0",
+		.capacitance = 687,
+		.calc_power = calc_power_group0,
+	},
+	/* TILER */
+	{
+		.name = "group1",
+		.capacitance = 0,
+		.calc_power = calc_power_group1,
+	},
+	/* FRAG */
+	{
+		.name = "group2",
+		.capacitance = 23,
+		.calc_power = calc_power_group2,
+	},
+	/* VARY */
+	{
+		.name = "group3",
+		.capacitance = 108,
+		.calc_power = calc_power_group3,
+	},
+	/* TEX */
+	{
+		.name = "group4",
+		.capacitance = 128,
+		.calc_power = calc_power_group4,
+	},
+	/* EXEC INSTR */
+	{
+		.name = "group5",
+		.capacitance = 249,
+		.calc_power = calc_power_group5,
+	},
+	/* LSC */
+	{
+		.name = "group6",
+		.capacitance = 0,
+		.calc_power = calc_power_group6,
+	},
+	/* EXEC OVERHEAD */
+	{
+		.name = "group7",
+		.capacitance = 29,
+		.calc_power = calc_power_group7,
+	},
+};
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100755
index 0000000..0ffd8be
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1781 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <mali_kbase_uku.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+#include <linux/pagemap.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tlstream.h>
+
+#include "mali_kbase_dma_fence.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
+{
+#ifdef CONFIG_COMPAT
+	if (kctx->is_compat)
+		return compat_ptr(p->compat_value);
+#endif
+	return p->value;
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
+#ifdef CONFIG_KDS
+
+/* Add the katom to the kds waiting list.
+ * Atoms must be added to the waiting list after a successful call to kds_async_waitall.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	list_add_tail(&katom->node, &kctx->waiting_kds_resource);
+}
+
+/* Remove the katom from the kds waiting list.
+ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync.
+ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	list_del(&katom->node);
+}
+
+static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = (struct kbase_jd_atom *)callback_parameter;
+	KBASE_DEBUG_ASSERT(katom);
+
+	ctx = &katom->kctx->jctx;
+
+	/* If KDS resource has already been satisfied (e.g. due to zapping)
+	 * do nothing.
+	 */
+	mutex_lock(&ctx->lock);
+	if (!katom->kds_dep_satisfied) {
+		katom->kds_dep_satisfied = true;
+		kbase_jd_dep_clear_locked(katom);
+	}
+	mutex_unlock(&ctx->lock);
+}
+
+static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 *  there is a race between job completion and cancellation */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+#endif				/* CONFIG_KDS */
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_KDS
+	if (katom->kds_rset) {
+		struct kbase_jd_context *jctx = &katom->kctx->jctx;
+
+		/*
+		 * As the atom is no longer waiting, remove it from
+		 * the waiting list.
+		 */
+
+		mutex_lock(&jctx->lock);
+		kbase_jd_kds_waiters_remove(katom);
+		mutex_unlock(&jctx->lock);
+
+		/* Release the kds resource or cancel if zapping */
+		kds_resource_set_release_sync(&katom->kds_rset);
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_KDS
+	/* Prevent the KDS resource from triggering the atom in case of zapping */
+	if (katom->kds_rset)
+		katom->kds_dep_satisfied = true;
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_KDS
+	u32 kds_res_count = 0;
+	struct kds_resource **kds_resources = NULL;
+	unsigned long *kds_access_bitmap = NULL;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.dma_fence_resv_count = 0,
+	};
+#endif
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, &user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+#ifdef CONFIG_KDS
+	/* assume we have to wait for all */
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (!kds_resources) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres),
+				    sizeof(unsigned long),
+				    GFP_KERNEL);
+	if (!kds_access_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	info.resv_objs = kmalloc_array(katom->nr_extres,
+				       sizeof(struct reservation_object *),
+				       GFP_KERNEL);
+	if (!info.resv_objs) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	info.dma_fence_excl_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					     sizeof(unsigned long),
+					     GFP_KERNEL);
+	if (!info.dma_fence_excl_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
+
+		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_SECURE;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm
+#ifdef CONFIG_KDS
+				, &kds_res_count, kds_resources,
+				kds_access_bitmap, exclusive
+#endif
+				);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct reservation_object *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock before we call into kds */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_KDS
+	if (kds_res_count) {
+		int wait_failed;
+
+		/* We have resources to wait for with kds */
+		katom->kds_dep_satisfied = false;
+
+		wait_failed = kds_async_waitall(&katom->kds_rset,
+				&katom->kctx->jctx.kds_cb, katom, NULL,
+				kds_res_count, kds_access_bitmap,
+				kds_resources);
+
+		if (wait_failed)
+			goto failed_kds_setup;
+		else
+			kbase_jd_kds_waiters_add(katom);
+	} else {
+		/* Nothing to wait for, so kds dep met */
+		katom->kds_dep_satisfied = true;
+	}
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (info.dma_fence_resv_count) {
+		int ret;
+
+		ret = kbase_dma_fence_wait(katom, &info);
+		if (ret < 0)
+			goto failed_dma_fence_setup;
+	}
+
+	kfree(info.resv_objs);
+	kfree(info.dma_fence_excl_bitmap);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
+#ifdef CONFIG_KDS
+	/* If we are here, dma_fence setup failed but KDS didn't.
+	 * Revert KDS setup if any.
+	 */
+	if (kds_res_count) {
+		mutex_unlock(&katom->kctx->jctx.lock);
+		kds_resource_set_release_sync(&katom->kds_rset);
+		mutex_lock(&katom->kctx->jctx.lock);
+
+		kbase_jd_kds_waiters_remove(katom);
+		katom->kds_dep_satisfied = true;
+	}
+#endif /* CONFIG_KDS */
+#endif /* CONFIG_MALI_DMA_FENCE */
+#ifdef CONFIG_KDS
+failed_kds_setup:
+#endif
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_KDS
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	kfree(info.resv_objs);
+	kfree(info.dma_fence_excl_bitmap);
+#endif
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_KDS
+			if (!dep_atom->kds_dep_satisfied) {
+				/* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
+				 * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up
+				 */
+				dep_atom->kds_dep_satisfied = true;
+			}
+#endif
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = atomic_read(&dep_atom->dma_fence.dep_count);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else if (dep_count == 0) {
+				/*
+				 * All fences for this atom has signaled, but
+				 * the worker that will queue the atom has not
+				 * yet run.
+				 *
+				 * Mark the atom as handled by setting
+				 * dep_count to -1 so that the worker doesn't
+				 * queue the atom again.
+				 */
+				atomic_set(&dep_atom->dma_fence.dep_count, -1);
+				/*
+				 * Remove the atom from the list of dma-fence
+				 * waiting atoms.
+				 */
+				kbase_dma_fence_waiters_remove(dep_atom);
+				dep_satisfied = true;
+			} else {
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+#ifdef CONFIG_KDS
+			dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied;
+#endif
+
+			if (dep_satisfied)
+				list_add_tail(&dep_atom->dep_item[0], out_list);
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->dep_item[0], &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, dep_item[0]);
+		list_del(completed_jobs.prev);
+
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i);
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, dep_item[0]);
+
+			list_del(runnable_jobs.next);
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kctx->jctx.sched_info.ctx.is_dying) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait then remove it from the list of sync waiters. */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						list_del(&node->dep_item[0]);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				list_add_tail(&node->dep_item[0], &completed_jobs);
+		}
+
+		/* Completing an atom might have freed up space
+		 * in the ringbuffer, but only on that slot. */
+		jsctx_ll_flush_to_rb(kctx,
+				katom->sched_priority,
+				katom->slot_nr);
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->dep_item[0], completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	base_jd_core_req core_req;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+	core_req = user_atom->core_req;
+
+	katom->start_timestamp.tv64 = 0;
+	katom->time_spent_us = 0;
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = 0;
+#ifdef CONFIG_KDS
+	/* Start by assuming that the KDS dependencies are satisfied,
+	 * kbase_jd_pre_external_resources will correct this if there are dependencies */
+	katom->kds_dep_satisfied = true;
+	katom->kds_rset = NULL;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	atomic_set(&katom->dma_fence.dep_count, -1);
+#endif
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				kbase_tlstream_tl_new_atom(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				kbase_tlstream_tl_ret_atom_ctx(
+						katom, kctx);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom is going through soft replay or
+			 * will be sent back to user space. Do not record any
+			 * dependencies. */
+			kbase_tlstream_tl_new_atom(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+
+			if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* Create a new atom recording all dependencies it was set up with. */
+	kbase_tlstream_tl_new_atom(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+	for (i = 0; i < 2; i++)
+		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+					&katom->dep[i])) {
+			kbase_tlstream_tl_dep_atom_atom(
+					(void *)kbase_jd_katom_dep_atom(
+						&katom->dep[i]),
+					(void *)katom);
+		} else if (BASE_JD_DEP_TYPE_INVALID !=
+				user_atom->pre_dep[i].dependency_type) {
+			/* Resolved dependency. */
+			int dep_atom_number =
+				user_atom->pre_dep[i].atom_id;
+			struct kbase_jd_atom *dep_atom =
+				&jctx->atoms[dep_atom_number];
+
+			kbase_tlstream_tl_rdep_atom_atom(
+					(void *)dep_atom,
+					(void *)katom);
+		}
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue((u32)kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+#ifdef CONFIG_KDS
+	if (!katom->kds_dep_satisfied) {
+		/* Queue atom due to KDS dependency */
+		ret = false;
+		goto out;
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atomic_read(&katom->dma_fence.dep_count) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom, NULL);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom)
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	void __user *user_addr;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the jobs
+	 * are reported by immediately falling them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	if ((uk6_atom && submit_data->stride !=
+			sizeof(struct base_jd_atom_v2_uk6)) ||
+			submit_data->stride != sizeof(base_jd_atom_v2)) {
+#else
+	if (submit_data->stride != sizeof(base_jd_atom_v2)) {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	user_addr = get_compat_pointer(kctx, &submit_data->addr);
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < submit_data->nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		if (uk6_atom) {
+			struct base_jd_atom_v2_uk6 user_atom_v6;
+			base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA};
+
+			if (copy_from_user(&user_atom_v6, user_addr,
+					sizeof(user_atom_v6))) {
+				err = -EINVAL;
+				KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+					atomic_sub_return(
+					submit_data->nr_atoms - i,
+					&kctx->timeline.jd_atoms_in_flight));
+				break;
+			}
+			/* Convert from UK6 atom format to UK7 format */
+			user_atom.jc = user_atom_v6.jc;
+			user_atom.udata = user_atom_v6.udata;
+			user_atom.extres_list = user_atom_v6.extres_list;
+			user_atom.nr_extres = user_atom_v6.nr_extres;
+			user_atom.core_req = user_atom_v6.core_req;
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[0])
+				dep_types[0] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[0],
+					user_atom_v6.pre_dep[0],
+					dep_types[0]);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[1])
+				dep_types[1] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[1],
+					user_atom_v6.pre_dep[1],
+					dep_types[1]);
+
+			user_atom.atom_number = user_atom_v6.atom_number;
+			user_atom.prio = user_atom_v6.prio;
+			user_atom.device_nr = user_atom_v6.device_nr;
+		} else {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		}
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+		user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool schedule = false;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	if (!kbasep_js_has_atom_finished(&katom_retained_state)) {
+		mutex_lock(&js_devdata->runpool_mutex);
+		kbasep_js_clear_job_retry_submit(katom);
+		/* An atom that has been hard-stopped might have previously
+		 * been soft-stopped and has just finished before the hard-stop
+		 * occurred. For this reason, clear the hard-stopped flag */
+		katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED);
+		mutex_unlock(&js_devdata->runpool_mutex);
+	}
+
+	if (kbasep_js_has_atom_finished(&katom_retained_state))
+		schedule = true;
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	schedule |= jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * ctx_active will have been set after we marked it as inactive,
+		 * and another pm reference will have been taken, so drop our
+		 * reference. But do not call kbase_jm_idle_ctx(), as the
+		 * context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but ctx_active will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then ctx_active will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * runpool_irq.lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kctx->ctx_active || !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * runpool_irq.lock. This is not performed if ctx_active
+			 * is set as in that case another pm reference has been
+			 * taken and a fast-start would be valid.
+			 */
+			if (!kctx->ctx_active)
+				kbase_jm_idle_ctx(kbdev, kctx);
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+
+			kbase_pm_context_idle(kbdev);
+		} else {
+			kctx->ctx_active = true;
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+		}
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	if (schedule)
+		kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, dep_item[0]);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	hrtimer_cancel(&kctx->soft_event_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_KDS
+
+	/* For each job waiting on a kds resource, cancel the wait and force the job to
+	 * complete early, this is done so that we don't leave jobs outstanding waiting
+	 * on kds resources which may never be released when contexts are zapped, resulting
+	 * in a hang.
+	 *
+	 * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held,
+	 * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job.
+	 */
+
+	list_for_each(entry, &kctx->waiting_kds_resource) {
+		katom = list_entry(entry, struct kbase_jd_atom, node);
+
+		kbase_cancel_kds_wait_job(katom);
+	}
+#endif
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+#ifdef CONFIG_KDS
+	int err;
+#endif				/* CONFIG_KDS */
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		kctx->jctx.atoms[i].dma_fence.context = fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+#ifdef CONFIG_KDS
+	err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear);
+	if (0 != err) {
+		mali_err = -EINVAL;
+		goto out2;
+	}
+#endif				/* CONFIG_KDS */
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+#ifdef CONFIG_KDS
+ out2:
+	destroy_workqueue(kctx->jctx.job_done_wq);
+#endif				/* CONFIG_KDS */
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+#ifdef CONFIG_KDS
+	kds_callback_term(&kctx->jctx.kds_cb);
+#endif				/* CONFIG_KDS */
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100755
index 0000000..0cf75f5
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/seq_file.h>
+
+#include <mali_kbase.h>
+
+#include <mali_kbase_jd_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		seq_printf(sfile,
+				"%i,%u,%u,%u,%u %u,%lli,%llu\n",
+				i, atom->core_req, atom->status, atom->coreref_state,
+				(unsigned)(atom->dep[0].atom ?
+						atom->dep[0].atom - atoms : 0),
+				(unsigned)(atom->dep[1].atom ?
+						atom->dep[1].atom - atoms : 0),
+				(signed long long)start_timestamp,
+				(unsigned long long)(atom->time_spent_us ?
+					atom->time_spent_us * 1000 : start_timestamp)
+				);
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100755
index 0000000..bc1878f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+
+#define MALI_JD_DEBUGFS_VERSION 1
+
+/**
+ * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100755
index 0000000..6342532
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx;
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbdev->hwaccess.active_kctx == kctx)
+		kbdev->hwaccess.active_kctx = NULL;
+}
+
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+	}
+}
+
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+			ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100755
index 0000000..27aca3a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,106 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold runpool_irq lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold runpool_irq lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the runpool_irq lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ */
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ */
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+			ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100755
index 0000000..c1a8e9c
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,3237 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_policy_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return refcnt;
+}
+
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private types
+ */
+enum {
+	JS_DEVDATA_INIT_NONE = 0,
+	JS_DEVDATA_INIT_CONSTANTS = (1 << 0),
+	JS_DEVDATA_INIT_POLICY = (1 << 1),
+	JS_DEVDATA_INIT_ALL = ((1 << 2) - 1)
+};
+
+enum {
+	JS_KCTX_INIT_NONE = 0,
+	JS_KCTX_INIT_CONSTANTS = (1 << 0),
+	JS_KCTX_INIT_POLICY = (1 << 1),
+	JS_KCTX_INIT_ALL = ((1 << 2) - 1)
+};
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the kbasep_js_device_data::runpool_irq::lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		int new_refcnt;
+
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL);
+
+		new_refcnt = ++(js_per_as_data->as_busy_refcount);
+
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, new_refcnt);
+		result = true;
+	}
+
+	return result;
+}
+
+/* Helper macros to access and modify jsctx_queue.indicies */
+#define JSCTX_GET(offset, var, mask) \
+	((var >> offset) & mask)
+
+/* This wraps around to correct integer size automatically. */
+#define JSCTX_SET(var, offset, value, mask) \
+	(var = ((var & ~(mask << offset)) /*Clear old bits */ \
+	| (((value) & mask) << offset))) /* Set (after masking) new bits */
+
+#define JSCTX_GET_WR_IDX(var) \
+	JSCTX_GET(JSCTX_WR_OFFSET, var, JSCTX_RB_MASK_STORE)
+#define JSCTX_GET_RN_IDX(var) \
+	JSCTX_GET(JSCTX_RN_OFFSET, var, JSCTX_RB_MASK_STORE)
+#define JSCTX_GET_RD_IDX(var) \
+	JSCTX_GET(JSCTX_RD_OFFSET, var, JSCTX_RB_MASK_STORE)
+
+#define JSCTX_GET_IDX_DIFF(lower, upper) \
+	((upper >= lower) ? (upper - lower) : (upper+JSCTX_RB_SIZE_STORE-lower))
+
+#define JSCTX_SET_WR_IDX(var, value) \
+	JSCTX_SET(var, JSCTX_WR_OFFSET, value, JSCTX_RB_MASK_STORE)
+#define JSCTX_SET_RN_IDX(var, value) \
+	JSCTX_SET(var, JSCTX_RN_OFFSET, value, JSCTX_RB_MASK_STORE)
+#define JSCTX_SET_RD_IDX(var, value) \
+	JSCTX_SET(var, JSCTX_RD_OFFSET, value, JSCTX_RB_MASK_STORE)
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	unsigned int var = atomic_read(&rb->indicies);
+
+	return JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+  * jsctx_rb_is_full(): - Check if the given ringbuffer is full.
+  * @queue: Pointer to the queue containing the ringbuffer.
+  *
+  * No locks explicitly required, result will always be consistent.
+  * But depending on usage, the caller should consider jctx.lock,
+  * for the result to remain correct.
+  *
+  * Return: true if the ringbuffer is full, false otherwise.
+  */
+static inline bool
+jsctx_rb_is_full(struct jsctx_queue *queue)
+{
+	unsigned int var = atomic_read(&queue->indicies);
+	u16 rn_idx = JSCTX_GET_RN_IDX(var);
+	u16 wr_idx = JSCTX_GET_WR_IDX(var);
+
+	return JSCTX_GET_IDX_DIFF(rn_idx, wr_idx) >= JSCTX_RB_SIZE;
+}
+
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a ring buffer and invoke @callback for each entry in buffer, and
+ * remove the entry from the buffer.
+ *
+ * If entries are added to the ring buffer while this is running those entries
+ * may, or may not be covered. To ensure that all entries in the buffer have
+ * been enumerated when this function returns jsctx->lock must be held when
+ * calling this function.
+ *
+ * The HW access lock, js_data.runpool_irq.lock, must always be held when
+ * calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct kbase_jd_atom *katom;
+
+	struct list_head *pos, *q;
+
+	unsigned int var = atomic_read(&queue->indicies);
+	u16 running_idx = JSCTX_GET_RN_IDX(var);
+	u16 read_idx = JSCTX_GET_RD_IDX(var);
+	u16 wr_idx = JSCTX_GET_WR_IDX(var);
+	u16 i;
+	const u16 count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/* There must be no jobs currently in HW access */
+	WARN_ON(read_idx != JSCTX_GET_RN_IDX(var));
+
+	/* Invoke callback on all kbase_jd_atoms in the ring buffer, and
+	 * removes them from the buffer */
+	for (i = 0; i < count; i++) {
+		int id = queue->entries[read_idx & JSCTX_RB_MASK].atom_id;
+
+		katom = kbase_jd_atom_from_id(kctx, id);
+		read_idx++;
+		callback(kctx->kbdev, katom);
+	}
+	atomic_set(&queue->indicies, 0);
+
+	list_for_each_safe(pos, q, &queue->queue_head) {
+		struct kbase_jd_atom *entry;
+
+		entry = list_entry(pos, struct kbase_jd_atom, queue);
+		list_del(pos);
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	int id;
+	unsigned int var = atomic_read(&rb->indicies);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	if (JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var))
+		return NULL;
+
+	id = rb->entries[JSCTX_GET_RD_IDX(var) & JSCTX_RB_MASK].atom_id;
+	return kbase_jd_atom_from_id(kctx, id);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the runpool_irq.lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_peek_last(): - Check a ring buffer and get the last atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a
+ * pointer to the last atom, unless all the priority's ring buffers are empty.
+ *
+ * The last atom is the atom that was added using jsctx_rb_add() most recently.
+ *
+ * Return: Pointer to last atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	unsigned int var = atomic_read(&rb->indicies);
+	int id;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	if (!list_empty(&rb->queue_head)) {
+		return list_entry(rb->queue_head.prev,
+				struct kbase_jd_atom, queue);
+	}
+
+	if (JSCTX_GET_RN_IDX(var) == JSCTX_GET_WR_IDX(var))
+		return NULL;
+
+	id = rb->entries[(JSCTX_GET_WR_IDX(var) - 1) & JSCTX_RB_MASK].atom_id;
+	return kbase_jd_atom_from_id(kctx, id);
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	unsigned int oldvar, var;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	do {
+		u16 rd_idx;
+
+		oldvar = atomic_read(&rb->indicies);
+		var = oldvar;
+		rd_idx = JSCTX_GET_RD_IDX(var);
+
+		JSCTX_SET_RD_IDX(var, rd_idx+1);
+	} while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	unsigned int oldvar, var;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	do {
+		u16 rd_idx;
+
+		oldvar = atomic_read(&rb->indicies);
+		var = oldvar;
+
+
+		rd_idx = JSCTX_GET_RD_IDX(var)-1;
+
+		/* Atoms must be unpulled in correct order. */
+		WARN_ON(rb->entries[rd_idx & JSCTX_RB_MASK].atom_id !=
+				kbase_jd_atom_id(kctx, katom));
+
+		JSCTX_SET_RD_IDX(var, rd_idx);
+	} while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar);
+}
+
+/**
+ * jsctx_rb_add(): - Add atom to ring buffer
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to add.
+ *
+ * Add @katom to the ring buffer determined by the atom's priority and job slot
+ * number.
+ *
+ * If the ring buffer is full -EBUSY will be returned.
+ *
+ * Return: On success 0 is returned, on failure a negative error code.
+ */
+static int
+jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	unsigned int oldvar, var;
+	u16 wr_idx, running_idx, count;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	oldvar = atomic_read(&rb->indicies);
+	var = oldvar;
+
+	running_idx = JSCTX_GET_RN_IDX(var);
+	wr_idx = JSCTX_GET_WR_IDX(var);
+	count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx);
+
+	/* Check if the ring buffer is full */
+	if (count >= JSCTX_RB_SIZE)
+		return -EBUSY;
+
+	rb->entries[wr_idx & JSCTX_RB_MASK].atom_id =
+		kbase_jd_atom_id(kctx, katom);
+
+	wr_idx++;
+	JSCTX_SET_WR_IDX(var, wr_idx);
+
+	while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar) {
+		oldvar = atomic_read(&rb->indicies);
+		var = oldvar;
+		wr_idx = JSCTX_GET_WR_IDX(var)+1;
+
+		JSCTX_SET_WR_IDX(var, wr_idx);
+	}
+	return 0;
+}
+
+/**
+ * jsctx_rb_remove(): - Remove atom from ring buffer
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to remove.
+ *
+ * Remove @katom from the ring buffer.
+ *
+ * @katom must have been pulled from the buffer earlier by jsctx_rb_pull(), and
+ * atoms must be removed in the same order they were pulled from the ring
+ * buffer.
+ */
+static inline void
+jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	unsigned int oldvar, var;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+
+	do {
+		unsigned int rn_idx;
+
+		oldvar = atomic_read(&rb->indicies);
+		var = oldvar;
+
+		rn_idx = JSCTX_GET_RN_IDX(var);
+
+		JSCTX_SET_RN_IDX(var, rn_idx+1);
+	} while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar);
+}
+
+
+static void
+jsctx_ll_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	list_add_tail(&katom->queue, &queue->queue_head);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+void
+jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js)
+{
+	unsigned long flags;
+	struct list_head *pos, *q;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	bool flushed_any = false;
+	struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+	bool enqueue_required = false;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+
+	/* Early out for common case */
+	if (list_empty(&queue->queue_head) || jsctx_rb_is_full(queue))
+		return;
+
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, flags);
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, js))
+		enqueue_required = true;
+	else
+		enqueue_required = false;
+
+	list_for_each_safe(pos, q, &queue->queue_head) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_entry(pos, struct kbase_jd_atom, queue);
+
+		KBASE_DEBUG_ASSERT(katom);
+
+		if (jsctx_rb_add_atom(kctx, katom))
+			break;
+
+		katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_LL;
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+		flushed_any = true;
+
+		list_del(pos);
+	}
+
+
+	if (flushed_any) {
+		bool timer_sync = false;
+
+		if (enqueue_required) {
+			if (kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync = kbase_js_ctx_list_add_pullable(
+						kctx->kbdev, kctx, js);
+			else
+				timer_sync = kbase_js_ctx_list_add_unpullable(
+						kctx->kbdev, kctx, js);
+			/* If this context is active and the atom is the first
+			 * on its slot, kick the job manager to attempt to
+			 * fast-start the atom */
+			if (kctx == kctx->kbdev->hwaccess.active_kctx)
+				kbase_jm_try_kick(kctx->kbdev, 1 << js);
+
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kctx->kbdev);
+
+		} else {
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+		}
+	} else {
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	}
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+}
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int err;
+	int i;
+	u16 as_present;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(jsdd->init_status == JS_DEVDATA_INIT_NONE);
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* All ASs initially free */
+	jsdd->as_free = as_present;
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	jsdd->cfs_ctx_runtime_init_slices =
+		DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES;
+	jsdd->cfs_ctx_runtime_min_slices =
+		DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES;
+	atomic_set(&jsdd->soft_event_timeout_ms, DEFAULT_JS_SOFT_EVENT_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_init_slices:%u",
+			jsdd->cfs_ctx_runtime_init_slices);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u",
+			jsdd->cfs_ctx_runtime_min_slices);
+	dev_dbg(kbdev->dev, "\tsoft_event_timeout:%i",
+		atomic_read(&jsdd->soft_event_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS policy's tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	/* setup the number of irq throttle cycles base on given time */
+	{
+		int time_us = kbdev->gpu_props.irq_throttle_time_us;
+		int cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev,
+				time_us);
+
+		atomic_set(&kbdev->irq_throttle_cycles, cycles);
+	}
+
+	/* Clear the AS data, including setting NULL pointers */
+	memset(&jsdd->runpool_irq.per_as_data[0], 0,
+			sizeof(jsdd->runpool_irq.per_as_data));
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	jsdd->init_status |= JS_DEVDATA_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&jsdd->runpool_irq.lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	err = kbasep_js_policy_init(kbdev);
+	if (!err)
+		jsdd->init_status |= JS_DEVDATA_INIT_POLICY;
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
+		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+	}
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (jsdd->init_status != JS_DEVDATA_INIT_ALL)
+		return -EINVAL;
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_CONSTANTS)) {
+		s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+		/* The caller must de-register all contexts before calling this
+		 */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+		KBASE_DEBUG_ASSERT(memcmp(
+				js_devdata->runpool_irq.ctx_attr_ref_count,
+				zero_ctx_attr_ref_count,
+				sizeof(zero_ctx_attr_ref_count)) == 0);
+		CSTD_UNUSED(zero_ctx_attr_ref_count);
+	}
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_POLICY))
+		kbasep_js_policy_term(&js_devdata->policy);
+
+	js_devdata->init_status = JS_DEVDATA_INIT_NONE;
+}
+
+int kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int err;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE);
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	js_kctx_info->ctx.is_scheduled = false;
+	js_kctx_info->ctx.is_dying = false;
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED;
+
+	js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	err = kbasep_js_policy_init_ctx(kbdev, kctx);
+	if (!err)
+		js_kctx_info->init_status |= JS_KCTX_INIT_POLICY;
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (js_kctx_info->init_status != JS_KCTX_INIT_ALL)
+		return -EINVAL;
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].queue_head);
+			atomic_set(&kctx->jsctx_queue[i][j].indicies, 0);
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	int js;
+	bool update_ctx_count = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) {
+		/* The caller must de-register all jobs before calling this */
+		KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+		KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+	}
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->ctx_runnable_ref) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kctx->ctx_runnable_ref = false;
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY))
+		kbasep_js_policy_term_ctx(js_policy, kctx);
+
+	js_kctx_info->init_status = JS_KCTX_INIT_NONE;
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable - Add context to the tail of the per-slot
+ *                                  pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the tail.
+ *
+ * This function should be used when queueing a context for the first time, or
+ * re-queueing a context that has been pulled from.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = true;
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = true;
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable - Add context to the tail of the per-slot
+ *                                    unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+				&kbdev->js_data.ctx_list_unpullable[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove - Remove context from the per-slot pullable or
+ *                            unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold kbasep_jd_device_data::queue_mutex
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+
+	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
+		return NULL;
+
+	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
+					struct kbase_context,
+					jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					(KBASE_KATOM_FLAG_JSCTX_IN_LL |
+					KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))){
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			if (kbase_jd_katom_dep_type(&katom->dep[i]) ==
+						BASE_JD_DEP_TYPE_DATA &&
+					js == dep_js) {
+				struct kbase_jd_atom *last_atom =
+						jsctx_rb_peek_last(kctx, js,
+								prio);
+
+				/* Last atom on slot must be pre-dep for this
+				 * atom */
+				if (last_atom != dep_atom) {
+					ret = false;
+					break;
+				}
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+								(js == dep_js))
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_PREV;
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Setup any scheduling information */
+	kbasep_js_clear_job_retry_submit(atom);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt_nolock(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx,
+								atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable(kbdev,
+					kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!js_kctx_info->ctx.is_scheduled) {
+		if (js_kctx_info->ctx.is_dying) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context (e.g. KDS
+			 * dependency resolved). Kill that job by killing the
+			 * context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Policy Queue */
+			KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Policy Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	struct kbasep_js_device_data *js_devdata;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0,
+	   kbasep_js_trace_get_refcnt(kbdev, kctx)); */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return found_kctx;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_release_result - Try running more jobs after releasing a context
+ *                            and/or atom
+ *
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst
+ * kbasep_js_device_data.runpool_irq.lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (js_devdata->nr_user_contexts_running != 0) {
+		bool retry_submit = false;
+		int retry_jobslot = 0;
+
+		if (katom_retained_state)
+			retry_submit = kbasep_js_get_atom_retry_submit_slot(
+					katom_retained_state, &retry_jobslot);
+
+		if (runpool_ctx_attr_change || retry_submit) {
+			/* A change in runpool ctx attributes might mean we can
+			 * run more jobs than before  */
+			result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+			KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+						kctx, NULL, 0u, retry_jobslot);
+		}
+	}
+	return result;
+}
+
+/*
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state
+ * change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	struct kbase_as *current_as;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+	/* kctx->as_nr and js_per_as_data are only read from here. The caller's
+	 * js_ctx_mutex provides a barrier that ensures they are up-to-date.
+	 *
+	 * They will not change whilst we're reading them, because the refcount
+	 * is non-zero (and we ASSERT on that last fact).
+	 */
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr];
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	current_as = &kbdev->as[kctx_as_nr];
+	mutex_lock(&kbdev->pm.lock);
+	mutex_lock(&current_as->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/* Update refcount */
+	new_ref_count = --(js_per_as_data->as_busy_refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags &
+			KBASE_CTX_FLAG_PRIVILEGED &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out */
+	if (new_ref_count == 0 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+		kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		if (kbdev->hwaccess.active_kctx == kctx)
+			kbdev->hwaccess.active_kctx = NULL;
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after 'is_scheduled' is changed, otherwise we double-decount
+		 * the attributes */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&current_as->transaction_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any policy timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		js_kctx_info->ctx.is_scheduled = false;
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&current_as->transaction_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_policy = &kbdev->js_data.policy;
+	js_devdata = &kbdev->js_data;
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool or the Policy Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	if (js_kctx_info->ctx.is_dying) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	base_jd_event_code event_code;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	event_code = katom_retained_state->event_code;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+static void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_data = &kbdev->js_data;
+
+	if (kbdev->js_scheduling_period_ns < 0)
+		js_data->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	else if (kbdev->js_scheduling_period_ns > 0)
+		js_data->scheduling_period_ns = kbdev->js_scheduling_period_ns;
+
+	if (kbdev->js_soft_stop_ticks < 0)
+		js_data->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	else if (kbdev->js_soft_stop_ticks > 0)
+		js_data->soft_stop_ticks = kbdev->js_soft_stop_ticks;
+
+	if (kbdev->js_soft_stop_ticks_cl < 0)
+		js_data->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	else if (kbdev->js_soft_stop_ticks_cl > 0)
+		js_data->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl;
+
+	if (kbdev->js_hard_stop_ticks_ss < 0) {
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+			js_data->hard_stop_ticks_ss =
+					DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+		else
+			js_data->hard_stop_ticks_ss =
+					DEFAULT_JS_HARD_STOP_TICKS_SS;
+	} else if (kbdev->js_hard_stop_ticks_ss > 0) {
+		js_data->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss;
+	}
+
+	if (kbdev->js_hard_stop_ticks_cl < 0)
+		js_data->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	else if (kbdev->js_hard_stop_ticks_cl > 0)
+		js_data->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl;
+
+	if (kbdev->js_hard_stop_ticks_dumping < 0)
+		js_data->hard_stop_ticks_dumping =
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	else if (kbdev->js_hard_stop_ticks_dumping > 0)
+		js_data->hard_stop_ticks_dumping =
+				kbdev->js_hard_stop_ticks_dumping;
+
+	if (kbdev->js_reset_ticks_ss < 0) {
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+			js_data->gpu_reset_ticks_ss =
+					DEFAULT_JS_RESET_TICKS_SS_8408;
+		else
+			js_data->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	} else if (kbdev->js_reset_ticks_ss > 0) {
+		js_data->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss;
+	}
+
+	if (kbdev->js_reset_ticks_cl < 0)
+		js_data->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	else if (kbdev->js_reset_ticks_cl > 0)
+		js_data->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl;
+
+	if (kbdev->js_reset_ticks_dumping < 0)
+		js_data->gpu_reset_ticks_dumping =
+				DEFAULT_JS_RESET_TICKS_DUMPING;
+	else if (kbdev->js_reset_ticks_dumping > 0)
+		js_data->gpu_reset_ticks_dumping =
+				kbdev->js_reset_ticks_dumping;
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbase_as *new_address_space = NULL;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	as_nr = kbase_backend_find_free_address_space(kbdev, kctx);
+
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	new_address_space = &kbdev->as[as_nr];
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (js_kctx_info->ctx.is_dying) {
+		/* Roll back the transaction so far and return */
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	if (js_devdata->nr_user_contexts_running == 0 &&
+			kbdev->js_timeouts_updated) {
+		/* Only when there are no other contexts submitting jobs:
+		 * Latch in run-time job scheduler timeouts that were set
+		 * through js_timeouts sysfs file */
+		kbase_js_set_timeouts(kbdev);
+
+		kbdev->js_timeouts_updated = false;
+	}
+
+	js_kctx_info->ctx.is_scheduled = true;
+
+	mutex_lock(&new_address_space->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&new_address_space->transaction_mutex);
+		/* If address space is not pending, then kbase_backend_use_ctx()
+		 * failed. Roll back the transaction so far and return */
+		if (!kctx->as_pending) {
+			js_kctx_info->ctx.is_scheduled = false;
+
+			kbase_backend_release_free_address_space(kbdev, as_nr);
+		}
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+	kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the runpool_irq locking. If a suspend occurs *after* that lock was
+	 * taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	mutex_unlock(&new_address_space->transaction_mutex);
+
+	/* Synchronize with any policy timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	if (kctx->as_pending) {
+		/* Context waiting for AS to be assigned */
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		return false;
+	}
+	if (kbase_backend_use_ctx_sched(kbdev, kctx)) {
+		/* Context already has ASID - mark as active */
+		kbdev->hwaccess.active_kctx = kctx;
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		return true; /* Context already scheduled */
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return kbasep_js_schedule_ctx(kbdev, kctx);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED;
+
+	is_scheduled = js_kctx_info->ctx.is_scheduled;
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			kctx->jctx.sched_info.ctx.is_scheduled);
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool pending;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED);
+	pending = kctx->as_pending;
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out if there is no
+	 * pending job */
+	if (!pending)
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		retained = retained << 1;
+
+		if (kctx) {
+			++(js_per_as_data->as_busy_refcount);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kctx->jctx.sched_info.ctx.flags &
+					KBASE_CTX_FLAG_PRIVILEGED)
+				KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1);
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_context *kctx, *n;
+
+		list_for_each_entry_safe(kctx, n,
+				&kbdev->js_data.ctx_list_unpullable[js],
+				jctx.sched_info.ctx.ctx_list_entry[js]) {
+			struct kbasep_js_kctx_info *js_kctx_info;
+			unsigned long flags;
+			bool timer_sync = false;
+
+			js_kctx_info = &kctx->jctx.sched_info;
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+			if (!js_kctx_info->ctx.is_scheduled &&
+				kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync = kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kbdev);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	/* Check if there are lower priority jobs to soft stop */
+	kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+	/* Add atom to ring buffer. */
+	if (jsctx_rb_add_atom(kctx, katom)) {
+		jsctx_ll_add(kctx, katom);
+		enqueue_required = false;
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_LL;
+	} else {
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+	}
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = jsctx_rb_peek_prio(kctx, js, prio);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	if (next_katom &&
+			(next_katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV)) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_LL |
+					KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	js_devdata = &kctx->kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kctx->kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) &&
+				atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kctx->kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return NULL;
+	}
+
+	kctx->pulled = true;
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kctx->ctx_runnable_ref);
+		kctx->ctx_runnable_ref = true;
+		atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kctx->kbdev, kctx);
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->sched_info.cfs.ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	kbase_tlstream_aux_job_softstop_ex(katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js);
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!js_kctx_info->ctx.is_dying) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+						kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kctx->ctx_active);
+		kctx->ctx_active = false;
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) {
+		jsctx_rb_remove(kctx, katom);
+
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+	}
+	WARN_ON(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_LL);
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot))
+		timer_sync |= kbase_js_ctx_list_remove(kctx->kbdev, kctx,
+				atom_slot);
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!js_kctx_info->ctx.is_dying) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |= kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |= kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kctx->ctx_active = false;
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	u64 microseconds_spent = 0;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_device_data *js_devdata;
+
+	kbdev = kctx->kbdev;
+
+	js_policy = &kbdev->js_data.policy;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+
+	kbase_tlstream_tl_nret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+	kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]);
+	kbase_tlstream_tl_nret_ctx_lpu(
+			kctx,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+
+	/* Calculate the job's time used */
+	if (end_timestamp != NULL) {
+		/* Only calculating it for jobs that really run on the HW (e.g.
+		 * removed from next jobs never actually ran, so really did take
+		 * zero time) */
+		ktime_t tick_diff = ktime_sub(*end_timestamp,
+							katom->start_timestamp);
+
+		microseconds_spent = ktime_to_ns(tick_diff);
+
+		do_div(microseconds_spent, 1000);
+
+		/* Round up time spent to the minimum timer resolution */
+		if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US)
+			microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US;
+	}
+
+	/* Log the result of the job (completion status, and time spent). */
+	kbasep_js_policy_log_job_result(js_policy, katom, microseconds_spent);
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (katom->x_post_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(katom->x_post_dep->atom_flags &
+						KBASE_KATOM_FLAG_FAIL_BLOCKER)))
+		katom->x_post_dep->atom_flags &=
+					~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+	bool timer_sync = false;
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &js_devdata->policy;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	while (js_mask) {
+		int js;
+
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kctx->ctx_active) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kctx->ctx_active = true;
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&js_devdata->runpool_irq.lock,
+									flags);
+				if (kctx->as_pending ||
+					kbase_js_ctx_pullable(kctx, js, false)
+					|| (kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(
+					&js_devdata->runpool_irq.lock, flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kctx->ctx_active);
+					kctx->ctx_active = false;
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+			kctx->pulled = false;
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kctx->pulled) {
+				/* Failed to pull jobs - push to head of list */
+				if (kbase_js_ctx_pullable(kctx, js, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head(
+								kctx->kbdev,
+								kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable(
+								kctx->kbdev,
+								kctx, js);
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+					WARN_ON(!kctx->ctx_active);
+					kctx->ctx_active = false;
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |= kbase_js_ctx_list_add_pullable(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |= kbase_js_ctx_list_add_unpullable(
+							kctx->kbdev, kctx, js);
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the policy queue */
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	js_kctx_info->ctx.is_dying = true;
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the policy queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the policy queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Policy Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the policy queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!js_kctx_info->ctx.is_scheduled) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Policy Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						js_kctx_info->ctx.is_scheduled);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the policy
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						js_kctx_info->ctx.is_scheduled);
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100755
index 0000000..bdb820a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,1048 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_js_policy.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase and are available for use by the
+ * @ref kbase_js_policy "Job Scheduler Policy APIs"
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally. If the runpool_irq::lock is already held, then
+ * the caller should use kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * kbasep_js_runpool_lookup_ctx_nolock - Lookup a context in the Run Pool based
+ *         upon its current address space and ensure that is stays scheduled in.
+ * @kbdev: Device pointer
+ * @as_nr: Address space to lookup
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * Note: This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must the kbasep_js_device_data::runpoool_irq::lock.
+ *
+ * Return: a valid struct kbase_context on success, which has been refcounted as
+ *         being busy.
+ *         NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduling Policy's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time whenever the Job
+ * Scheduling Policy implements Job Timeouts (such as those done by CFS).
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ */
+void kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+	atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+	KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot ==
+					KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID)
+				|| (atom->retry_submit_on_slot == js));
+
+	atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+	retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+	int js = katom_retained_state->retry_submit_on_slot;
+
+	*res = js;
+	return (bool) (js >= 0);
+}
+
+#if KBASE_DEBUG_DISABLE_ASSERTS == 0
+/**
+ * Debug Check the refcount of a context. Only use within ASSERTs
+ *
+ * Obtains kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return negative value if the context is not scheduled in
+ * @return current refcount of the context if it is scheduled in. The refcount
+ * is not guarenteed to be kept constant.
+ */
+static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int result = -1;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID)
+		result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount;
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return result;
+}
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS == 0 */
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guarenteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guarenteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+	KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: when you need the number of cycles to guarantee you won't wait for
+ * longer than 'us' time (you might have a shorter wait).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need the number of cycles to guarantee you'll wait at least
+ * 'us' amount of time (but you might wait longer).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (u32) (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the worst-case wait that 'ticks' cycles will
+ * take (you guarantee that you won't wait any longer than this, but it may
+ * be shorter).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the best-case wait for 'tick' cycles (you
+ * guarantee to be waiting for at least this long, but it may be longer).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100755
index 0000000..e6e611b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != false) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100755
index 0000000..ce91833
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100755
index 0000000..75d4b98
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,506 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+/* Types used by the policies must go here */
+enum {
+	/** Context will not submit any jobs */
+	KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0),
+
+	/** Set if the context uses an address space and should be kept scheduled in */
+	KBASE_CTX_FLAG_PRIVILEGED = (1u << 1)
+
+	    /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */
+};
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+#include "mali_kbase_js_policy_cfs.h"
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy {
+	struct kbasep_js_policy_cfs cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_ctx_info {
+	struct kbasep_js_policy_cfs_ctx cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_job_info {
+	struct kbasep_js_policy_cfs_job cfs;
+};
+
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief the IRQ_THROTTLE time in microseconds
+ *
+ * This will be converted via the GPU's clock frequency into a cycle-count.
+ *
+ * @note we can make an estimate of the GPU's frequency by periodically
+ * sampling its CYCLE_COUNT register
+ */
+#define KBASE_JS_IRQ_THROTTLE_TIME_US 20
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE. The context usually has
+	 * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW
+	 * workarounds in use in the Job Scheduling Policy.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * Data used by the scheduler that is unique for each Address Space.
+ *
+ * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock
+ * must be held whilst accessing this data (inculding reads and atomic
+ * decisions based on the read).
+ */
+struct kbasep_js_per_as_data {
+	/**
+	 * Ref count of whether this AS is busy, and must not be scheduled out
+	 *
+	 * When jobs are running this is always positive. However, it can still be
+	 * positive when no jobs are running. If all you need is a heuristic to
+	 * tell you whether jobs might be running, this should be sufficient.
+	 */
+	int as_busy_refcount;
+
+	/** Pointer to the current context on this address space, or NULL for no context */
+	struct kbase_context *kctx;
+};
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/** Sub-structure to collect together Job Scheduling data used in IRQ context */
+	struct runpool_irq {
+		/**
+		 * Lock for accessing Job Scheduling data used in IRQ context
+		 *
+		 * This lock must be held whenever this data is accessed (read, or
+		 * write). Even for read-only access, memory barriers would be needed.
+		 * In any case, it is likely that decisions based on only reading must
+		 * also be atomic with respect to data held here and elsewhere in the
+		 * Job Scheduler.
+		 *
+		 * This lock must also be held for accessing:
+		 * - kbase_context::as_nr
+		 * - kbase_device::jm_slots
+		 * - Parts of the kbasep_js_policy, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 */
+		spinlock_t lock;
+
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
+		 *
+		 * It is placed here because it's much more memory efficient than having a u8 in
+		 * struct kbasep_js_per_as_data to store this flag  */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/** Data that is unique for each AS */
+		struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+
+	u16 as_free;				/**< Bitpattern of free Address Spaces */
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/**
+	 * Policy-specific information.
+	 *
+	 * Refer to the structure defined by the current policy to determine which
+	 * locks must be held when accessing this.
+	 */
+	union kbasep_js_policy policy;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+	u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
+	u32 cfs_ctx_runtime_min_slices;	 /**< Value for  DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
+
+	/**< Value for JS_SOFT_EVENT_TIMEOUT */
+	atomic_t soft_event_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+	/**
+	 * Runpool substructure. This must only be accessed whilst the Run Pool
+	 * mutex ( kbasep_js_device_data::runpool_mutex ) is held.
+	 *
+	 * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be
+	 * held for certain sub-members.
+	 *
+	 * @note some of the members could be moved into struct kbasep_js_device_data for
+	 * improved d-cache/tlb efficiency.
+	 */
+	struct {
+		union kbasep_js_policy_ctx_info policy_ctx;	/**< Policy-specific context */
+	} runpool;
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		kbase_context_flags flags;
+		/* NOTE: Unify the following flags into kbase_context_flags */
+		/**
+		 * Is the context scheduled on the Run Pool?
+		 *
+		 * This is only ever updated whilst the jsctx_mutex is held.
+		 */
+		bool is_scheduled;
+		/**
+		 * Wait queue to wait for is_scheduled state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		bool is_dying;			/**< Is the context in the process of being evicted? */
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/** Job Slot to retry submitting to if submission from IRQ handler failed */
+	int retry_submit_on_slot;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
new file mode 100755
index 0000000..debd011
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
@@ -0,0 +1,763 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_policy.h
+ * Job Scheduler Policy APIs.
+ */
+
+#ifndef _KBASE_JS_POLICY_H_
+#define _KBASE_JS_POLICY_H_
+
+/**
+ * @page page_kbase_js_policy Job Scheduling Policies
+ * The Job Scheduling system is described in the following:
+ * - @subpage page_kbase_js_policy_overview
+ * - @subpage page_kbase_js_policy_operation
+ *
+ * The API details are as follows:
+ * - @ref kbase_jm
+ * - @ref kbase_js
+ * - @ref kbase_js_policy
+ */
+
+/**
+ * @page page_kbase_js_policy_overview Overview of the Policy System
+ *
+ * The Job Scheduler Policy manages:
+ * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs)
+ * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the
+ * GPU's Job Slots (\em JSs).
+ * - The amount of \em time a context is assigned to (<em>scheduled on</em>) an
+ * Address Space
+ * - The amount of \em time a Job spends running on the GPU
+ *
+ * The Policy implements this management via 2 components:
+ * - A Policy Queue, which manages a set of contexts that are ready to run,
+ * but not currently running.
+ * - A Policy Run Pool, which manages the currently running contexts (one per Address
+ * Space) and the jobs to run on the Job Slots.
+ *
+ * Each Graphics Process in the system has at least one KBase Context. Therefore,
+ * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on
+ * the GPU.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted"
+ *
+ * The main operations on the queue are:
+ * - Enqueuing a Context to it
+ * - Dequeuing a Context from it, to run it.
+ * - Note: requeuing a context is much the same as enqueuing a context, but
+ * occurs when a context is scheduled out of the system to allow other contexts
+ * to run.
+ *
+ * These operations have much the same meaning for the Run Pool - Jobs are
+ * dequeued to run on a Jobslot, and requeued when they are scheduled out of
+ * the GPU.
+ *
+ * @note This is an over-simplification of the Policy APIs - there are more
+ * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue
+ * takes at least two function calls: one to Dequeue from the Queue, one to add
+ * to the Run Pool.
+ *
+ * As indicated on the diagram, Jobs permanently leave the scheduling system
+ * when they are completed, otherwise they get dequeued/requeued until this
+ * happens. Similarly, Contexts leave the scheduling system when their jobs
+ * have all completed. However, Contexts may later return to the scheduling
+ * system (not shown on the diagram) if more Bags of Jobs are submitted to
+ * them.
+ */
+
+/**
+ * @page page_kbase_js_policy_operation Policy Operation
+ *
+ * We describe the actions that the Job Scheduler Core takes on the Policy in
+ * the following cases:
+ * - The IRQ Path
+ * - The Job Submission Path
+ * - The High Priority Job Submission Path
+ *
+ * This shows how the Policy APIs will be used by the Job Scheduler core.
+ *
+ * The following diagram shows an example Policy that contains a Low Priority
+ * queue, and a Real-time (High Priority) Queue. The RT queue is examined
+ * before the LowP one on dequeuing from the head. The Low Priority Queue is
+ * ordered by time, and the RT queue is ordered by time weighted by
+ * RT-priority. In addition, it shows that the Job Scheduler Core will start a
+ * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The
+ * Soft-Stop time is set by a global configuration value, and must be a value
+ * appropriate for the policy. For example, this could include "don't run a
+ * soft-stop timer" for a First-Come-First-Served (FCFS) policy.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core."
+ *
+ * @section sec_kbase_js_policy_operation_prio Dealing with Priority
+ *
+ * Priority applies separately to a context as a whole, and to the jobs within
+ * a context. The jobs specify a priority in the base_jd_atom::prio member, but
+ * it is independent of the context priority. That is, it only affects
+ * scheduling of atoms within a context. Refer to @ref base_jd_prio for more
+ * details. The meaning of the context's priority value is up to the policy
+ * itself, and could be a logarithmic scale instead of a linear scale (e.g. the
+ * policy could implement an increase/decrease in priority by 1 results in an
+ * increase/decrease in \em proportion of time spent scheduled in by 25%, an
+ * effective change in timeslice by 11%).
+ *
+ * It is up to the policy whether a boost in priority boosts the priority of
+ * the entire context (e.g. to such an extent where it may pre-empt other
+ * running contexts). If it chooses to do this, the Policy must make sure that
+ * only jobs from high-priority contexts are run, and that the context is
+ * scheduled out once only jobs from low priority contexts remain. This ensures
+ * that the low priority contexts do not gain from the priority boost, yet they
+ * still get scheduled correctly with respect to other low priority contexts.
+ *
+ *
+ * @section sec_kbase_js_policy_operation_irq IRQ Path
+ *
+ * The following happens on the IRQ path from the Job Scheduler Core:
+ * - Note the slot that completed (for later)
+ * - Log the time spent by the job (and implicitly, the time spent by the
+ * context)
+ *  - call kbasep_js_policy_log_job_result() <em>in the context of the irq
+ * handler.</em>
+ *  - This must happen regardless of whether the job completed successfully or
+ * not (otherwise the context gets away with DoS'ing the system with faulty jobs)
+ * - What was the result of the job?
+ *  - If Completed: job is just removed from the system
+ *  - If Hard-stop or failure: job is removed from the system
+ *  - If Soft-stop: queue the book-keeping work onto a work-queue: have a
+ * work-queue call kbasep_js_policy_enqueue_job()
+ * - Check the timeslice used by the owning context
+ *  - call kbasep_js_policy_should_remove_ctx() <em>in the context of the irq
+ * handler.</em>
+ *  - If this returns true, clear the "allowed" flag.
+ * - Check the ctx's flags for "allowed", "has jobs to run" and "is running
+ * jobs"
+ * - And so, should the context stay scheduled in?
+ *  - If No, push onto a work-queue the work of scheduling out the old context,
+ * and getting a new one. That is:
+ *   - kbasep_js_policy_runpool_remove_ctx() on old_ctx
+ *   - kbasep_js_policy_enqueue_ctx() on old_ctx
+ *   - kbasep_js_policy_dequeue_head_ctx() to get new_ctx
+ *   - kbasep_js_policy_runpool_add_ctx() on new_ctx
+ *   - (all of this work is deferred on a work-queue to keep the IRQ handler quick)
+ * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job:
+ *  - kbasep_js_policy_dequeue_job() <em>in the context of the irq
+ * handler</em> with core_req set to that of the completing slot
+ *  - if this returned true, submit the job to the completed slot.
+ *  - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *  - If kbasep_js_policy_dequeue_job() returned false, submit some work to
+ * the work-queue to retry from outside of IRQ context (calling
+ * kbasep_js_policy_dequeue_job() from a work-queue).
+ *
+ * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT,
+ * this sequence could loop a large number of times: this could happen if
+ * the jobs submitted completed on the GPU very quickly (in a few cycles), such
+ * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take
+ * more jobs, causing us to loop until we run out of jobs.
+ *
+ * To mitigate this, we must limit the number of jobs submitted per slot during
+ * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should
+ * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For
+ * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per
+ * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs
+ * could complete but the IRQ for each of them is delayed by the throttling. By
+ * the time you get the IRQ, all 6 jobs could've completed, meaning you can
+ * submit jobs to fill all 6 HEAD+NEXT registers again.
+ *
+ * @note As much work is deferred as possible, which includes the scheduling
+ * out of a context and scheduling in a new context. However, we can still make
+ * starting a single high-priorty context quick despite this:
+ * - On Mali-T600 family, there is one more AS than JSs.
+ * - This means we can very quickly schedule out one AS, no matter what the
+ * situation (because there will always be one AS that's not currently running
+ * on the job slot - it can only have a job in the NEXT register).
+ *  - Even with this scheduling out, fair-share can still be guaranteed e.g. by
+ * a timeline-based Completely Fair Scheduler.
+ * - When our high-priority context comes in, we can do this quick-scheduling
+ * out immediately, and then schedule in the high-priority context without having to block.
+ * - This all assumes that the context to schedule out is of lower
+ * priority. Otherwise, we will have to block waiting for some other low
+ * priority context to finish its jobs. Note that it's likely (but not
+ * impossible) that the high-priority context \b is running jobs, by virtue of
+ * it being high priority.
+ * - Therefore, we can give a high liklihood that on Mali-T600 at least one
+ * high-priority context can be started very quickly. For the general case, we
+ * can guarantee starting (no. ASs) - (no. JSs) high priority contexts
+ * quickly. In any case, there is a high likelihood that we're able to start
+ * more than one high priority context quickly.
+ *
+ * In terms of the functions used in the IRQ handler directly, these are the
+ * perfomance considerations:
+ * - kbase_js_policy_log_job_result():
+ *  - This is just adding to a 64-bit value (possibly even a 32-bit value if we
+ * only store the time the job's recently spent - see below on 'priority weighting')
+ *  - For priority weighting, a divide operation ('div') could happen, but
+ * this can happen in a deferred context (outside of IRQ) when scheduling out
+ * the ctx; as per our Engineering Specification, the contexts of different
+ * priority still stay scheduled in for the same timeslice, but higher priority
+ * ones scheduled back in more often.
+ *  - That is, the weighted and unweighted times must be stored separately, and
+ * the weighted time is only updated \em outside of IRQ context.
+ *  - Of course, this divide is more likely to be a 'multiply by inverse of the
+ * weight', assuming that the weight (priority) doesn't change.
+ * - kbasep_js_policy_should_remove_ctx():
+ *  - This is usually just a comparison of the stored time value against some
+ * maximum value.
+ *
+ * @note all deferred work can be wrapped up into one call - we usually need to
+ * indicate that a job/bag is done outside of IRQ context anyway.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit Submission path
+ *
+ * Start with a Context with no jobs present, and assume equal priority of all
+ * contexts in the system. The following work all happens outside of IRQ
+ * Context :
+ * - As soon as job is made 'ready to 'run', then is must be registerd with the Job
+ * Scheduler Policy:
+ *  - 'Ready to run' means they've satisified their dependencies in the
+ * Kernel-side Job Dispatch system.
+ *  - Call kbasep_js_policy_enqueue_job()
+ *  - This indicates that the job should be scheduled (it is ready to run).
+ * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs
+ * 'ready to run', we enqueue the context on the policy queue:
+ *  - Call kbasep_js_policy_enqueue_ctx()
+ *  - This indicates that the \em ctx should be scheduled (it is ready to run)
+ *
+ * Next, we need to handle adding a context to the Run Pool - if it's sensible
+ * to do so. This can happen due to two reasons:
+ * -# A context is enqueued as above, and there are ASs free for it to run on
+ * (e.g. it is the first context to be run, in which case it can be added to
+ * the Run Pool immediately after enqueuing on the Policy Queue)
+ * -# A previous IRQ caused another ctx to be scheduled out, requiring that the
+ * context at the head of the queue be scheduled in. Such steps would happen in
+ * a work queue (work deferred from the IRQ context).
+ *
+ * In both cases, we'd handle it as follows:
+ * - Get the context at the Head of the Policy Queue:
+ *  - Call kbasep_js_policy_dequeue_head_ctx()
+ * - Assign the Context an Address Space (Assert that there will be one free,
+ * given the above two reasons)
+ * - Add this context to the Run Pool:
+ *  - Call kbasep_js_policy_runpool_add_ctx()
+ * - Now see if a job should be run:
+ *  - Mostly, this will be done in the IRQ handler at the completion of a
+ * previous job.
+ *  - However, there are two cases where this cannot be done: a) The first job
+ * enqueued to the system (there is no previous IRQ to act upon) b) When jobs
+ * are submitted at a low enough rate to not fill up all Job Slots (or, not to
+ * fill both the 'HEAD' and 'NEXT' registers in the job-slots)
+ *  - Hence, on each ctx <b>and job</b> submission we should try to see if we
+ * can run a job:
+ *  - For each job slot that has free space (in NEXT or HEAD+NEXT registers):
+ *   - Call kbasep_js_policy_dequeue_job() with core_req set to that of the
+ * slot
+ *   - if we got one, submit it to the job slot.
+ *   - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *
+ * The above case shows that we should attempt to run jobs in cases where a) a ctx
+ * has been added to the Run Pool, and b) new jobs have been added to a context
+ * in the Run Pool:
+ * - In the latter case, the context is in the runpool because it's got a job
+ * ready to run, or is already running a job
+ * - We could just wait until the IRQ handler fires, but for certain types of
+ * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs
+ * generally take much longer to run that GLES CS jobs, which are vertex shader
+ * jobs.
+ * - Therefore, when a new job appears in the ctx, we must check the job-slots
+ * to see if they're free, and run the jobs as before.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts
+ *
+ * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of
+ * them can be scheduled in immediately to start high prioriy jobs. In general,
+ * (no. ASs) - (no JSs) high priority contexts may be started immediately. The
+ * following describes how this happens:
+ *
+ * Similar to the previous section, consider what happens with a high-priority
+ * context (a context with a priority higher than that of any in the Run Pool)
+ * that starts out with no jobs:
+ * - A job becomes ready to run on the context, and so we enqueue the context
+ * on the Policy's Queue.
+ * - However, we'd like to schedule in this context immediately, instead of
+ * waiting for one of the Run Pool contexts' timeslice to expire
+ * - The policy's Enqueue function must detect this (because it is the policy
+ * that embodies the concept of priority), and take appropriate action
+ *  - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run
+ * Pool to see if a lower priority context should be scheduled out, and then
+ * schedule in the High Priority context.
+ *  - For Mali-T600, we can always pick a context to schedule out immediately
+ * (because there are more ASs than JSs), and so scheduling out a victim context
+ * and scheduling in the high priority context can happen immediately.
+ *   - If a policy implements fair-sharing, then this can still ensure the
+ * victim later on gets a fair share of the GPU.
+ *   - As a note, consider whether the victim can be of equal/higher priority
+ * than the incoming context:
+ *   - Usually, higher priority contexts will be the ones currently running
+ * jobs, and so the context with the lowest priority is usually not running
+ * jobs.
+ *   - This makes it likely that the victim context is low priority, but
+ * it's not impossible for it to be a high priority one:
+ *    - Suppose 3 high priority contexts are submitting only FS jobs, and one low
+ * priority context submitting CS jobs. Then, the context not running jobs will
+ * be one of the hi priority contexts (because only 2 FS jobs can be
+ * queued/running on the GPU HW for Mali-T600).
+ *   - The problem can be mitigated by extra action, but it's questionable
+ * whether we need to: we already have a high likelihood that there's at least
+ * one high priority context - that should be good enough.
+ *   - And so, this method makes sure that at least one high priority context
+ * can be started very quickly, but more than one high priority contexts could be
+ * delayed (up to one timeslice).
+ *   - To improve this, use a GPU with a higher number of Address Spaces vs Job
+ * Slots.
+ * - At this point, let's assume this high priority context has been scheduled
+ * in immediately. The next step is to ensure it can start some jobs quickly.
+ *  - It must do this by Soft-Stopping jobs on any of the Job Slots that it can
+ * submit to.
+ *  - The rest of the logic for starting the jobs is taken care of by the IRQ
+ * handler. All the policy needs to do is ensure that
+ * kbasep_js_policy_dequeue_job() will return the jobs from the high priority
+ * context.
+ *
+ * @note in SS state, we currently only use 2 job-slots (even for T608, but
+ * this might change in future). In this case, it's always possible to schedule
+ * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same
+ * time, this maximizes usage of the job-slots (only 2 are in use), because you
+ * can guarantee starting of the jobs from the High Priority contexts immediately too.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_notes Notes
+ *
+ * - In this design, a separate 'init' is needed from dequeue/requeue, so that
+ * information can be retained between the dequeue/requeue calls. For example,
+ * the total time spent for a context/job could be logged between
+ * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just
+ * initializes that information to some known state.
+ *
+ *
+ *
+ */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js_policy Job Scheduler Policy APIs
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy structure
+ */
+union kbasep_js_policy;
+
+/**
+ * @brief Initialize the Job Scheduler Policy
+ */
+int kbasep_js_policy_init(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler Policy
+ */
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy);
+
+/**
+ * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Ctx Info structure
+ *
+ * This structure is embedded in the struct kbase_context structure. It is used to:
+ * - track information needed for the policy to schedule the context (e.g. time
+ * used, OS priority etc.)
+ * - link together kbase_contexts into a queue, so that a struct kbase_context can be
+ * obtained as the container of the policy ctx info. This allows the API to
+ * return what "the next context" should be.
+ * - obtain other information already stored in the struct kbase_context for
+ * scheduling purposes (e.g process ID to get the priority of the originating
+ * process)
+ */
+union kbasep_js_policy_ctx_info;
+
+/**
+ * @brief Initialize a ctx for use with the Job Scheduler Policy
+ *
+ * This effectively initializes the union kbasep_js_policy_ctx_info structure within
+ * the struct kbase_context (itself located within the kctx->jctx.sched_info structure).
+ */
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Terminate resources associated with using a ctx in the Job Scheduler
+ * Policy.
+ */
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Enqueue a context onto the Job Scheduler Policy Queue
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out a low
+ * priority context from the Run Pool to allow the high priority context to be
+ * scheduled in.
+ *
+ * If the context has the privileged flag set, it will always be kept at the
+ * head of the queue.
+ *
+ * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ */
+void kbasep_js_policy_enqueue_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if a context was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no contexts were available.
+ */
+bool kbasep_js_policy_dequeue_head_ctx(union kbasep_js_policy *js_policy, struct kbase_context ** const kctx_ptr);
+
+/**
+ * @brief Evict a context from the Job Scheduler Policy Queue
+ *
+ * This is only called as part of destroying a kbase_context.
+ *
+ * There are many reasons why this might fail during the lifetime of a
+ * context. For example, the context is in the process of being scheduled. In
+ * that case a thread doing the scheduling might have a pointer to it, but the
+ * context is neither in the Policy Queue, nor is it in the Run
+ * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself
+ * are locked.
+ *
+ * Hence to find out where in the system the context is, it is important to do
+ * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member.
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if the context was evicted from the Policy Queue
+ * @return false if the context was not found in the Policy Queue
+ */
+bool kbasep_js_policy_try_evict_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Call a function on all jobs belonging to a non-queued, non-running
+ * context, optionally detaching the jobs from the context as it goes.
+ *
+ * At the time of the call, the context is guarenteed to be not-currently
+ * scheduled on the Run Pool (is_scheduled == false), and not present in
+ * the Policy Queue. This is because one of the following functions was used
+ * recently on the context:
+ * - kbasep_js_policy_evict_ctx()
+ * - kbasep_js_policy_runpool_remove_ctx()
+ *
+ * In both cases, no subsequent call was made on the context to any of:
+ * - kbasep_js_policy_runpool_add_ctx()
+ * - kbasep_js_policy_enqueue_ctx()
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * \a detach_jobs must only be set when cancelling jobs (which occurs as part
+ * of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx,
+	kbasep_js_policy_ctx_job_cb callback, bool detach_jobs);
+
+/**
+ * @brief Add a context to the Job Scheduler Policy's Run Pool
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out low
+ * priority jobs that are currently running on the GPU.
+ *
+ * The number of contexts present in the Run Pool will never be more than the
+ * number of Address Spaces.
+ *
+ * The following guarentees are made about the state of the system when this
+ * is called:
+ * - kctx->as_nr member is valid
+ * - the context has its submit_allowed flag set
+ * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid
+ * - The refcount of the context is guarenteed to be zero.
+ * - kbasep_js_kctx_info::ctx::is_scheduled will be true.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Remove a context from the Job Scheduler Policy's Run Pool
+ *
+ * The kctx->as_nr member is valid and the context has its submit_allowed flag
+ * set when this is called. The state of
+ * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also
+ * valid. The refcount of the context is guarenteed to be zero.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Indicate whether a context should be removed from the Run Pool
+ * (should be scheduled out).
+ *
+ * The kbasep_js_device_data::runpool_irq::lock will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ */
+bool kbasep_js_policy_should_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Synchronize with any timers acting upon the runpool
+ *
+ * The policy should check whether any timers it owns should be running. If
+ * they should not, the policy must cancel such timers and ensure they are not
+ * re-run by the time this function finishes.
+ *
+ * In particular, the timers must not be running when there are no more contexts
+ * on the runpool, because the GPU could be powered off soon after this call.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ */
+void kbasep_js_policy_runpool_timers_sync(union kbasep_js_policy *js_policy);
+
+
+/**
+ * @brief Indicate whether a new context has an higher priority than the current context.
+ *
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx
+ *
+ * This function must not sleep, because an IRQ spinlock might be held whilst
+ * this is called.
+ *
+ * @note There is nothing to stop the priority of \a current_ctx changing
+ * during or immediately after this function is called (because its jsctx_mutex
+ * cannot be held). Therefore, this function should only be seen as a heuristic
+ * guide as to whether \a new_ctx is higher priority than \a current_ctx
+ */
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx);
+
+	  /** @} *//* end group kbase_js_policy_ctx */
+
+/**
+ * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Job Info structure
+ *
+ * This structure is embedded in the struct kbase_jd_atom structure. It is used to:
+ * - track information needed for the policy to schedule the job (e.g. time
+ * used, etc.)
+ * - link together jobs into a queue/buffer, so that a struct kbase_jd_atom can be
+ * obtained as the container of the policy job info. This allows the API to
+ * return what "the next job" should be.
+ */
+union kbasep_js_policy_job_info;
+
+/**
+ * @brief Initialize a job for use with the Job Scheduler Policy
+ *
+ * This function initializes the union kbasep_js_policy_job_info structure within the
+ * kbase_jd_atom. It will only initialize/allocate resources that are specific
+ * to the job.
+ *
+ * That is, this function makes \b no attempt to:
+ * - initialize any context/policy-wide information
+ * - enqueue the job on the policy.
+ *
+ * At some later point, the following functions must be called on the job, in this order:
+ * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data.
+ * - kbasep_js_policy_enqueue_job() to enqueue the job
+ *
+ * A job must only ever be initialized on the Policy once, and must be
+ * terminated on the Policy before the job is freed.
+ *
+ * The caller will not be holding any locks, and so this function will not
+ * modify any information in \a kctx or \a js_policy.
+ *
+ * @return 0 if initialization was correct.
+ */
+int kbasep_js_policy_init_job(const union kbasep_js_policy *js_policy, const struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Register context/policy-wide information for a job on the Job Scheduler Policy.
+ *
+ * Registers the job with the policy. This is used to track the job before it
+ * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically,
+ * it is used to update information under a lock that could not be updated at
+ * kbasep_js_policy_init_job() time (such as context/policy-wide data).
+ *
+ * @note This function will not fail, and hence does not allocate any
+ * resources. Any failures that could occur on registration will be caught
+ * during kbasep_js_policy_init_job() instead.
+ *
+ * A job must only ever be registerd on the Policy once, and must be
+ * deregistered on the Policy on completion (whether or not that completion was
+ * success/failure).
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_register_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief De-register context/policy-wide information for a on the Job Scheduler Policy.
+ *
+ * This must be used before terminating the resources associated with using a
+ * job in the Job Scheduler Policy. This function does not itself terminate any
+ * resources, at most it just updates information in the policy and context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool
+ *
+ * The job returned by the policy will match at least one of the bits in the
+ * job slot's core requirements (but it may match more than one, or all @ref
+ * base_jd_core_req bits supported by the job slot).
+ *
+ * In addition, the requirements of the job returned will be a subset of those
+ * requested - the job returned will not have requirements that \a job_slot_idx
+ * cannot satisfy.
+ *
+ * The caller will submit the job to the GPU as soon as the GPU's NEXT register
+ * for the corresponding slot is empty. Of course, the GPU will then only run
+ * this new job when the currently executing job (in the jobslot's HEAD
+ * register) has completed.
+ *
+ * @return true if a job was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no jobs were available among all ctxs in the Run Pool.
+ *
+ * @note base_jd_core_req is currently a u8 - beware of type conversion.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_lock::irq will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held
+ */
+bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);
+
+/**
+ * @brief Requeue a Job back into the Job Scheduler Policy Run Pool
+ *
+ * This will be used to enqueue a job after its creation and also to requeue
+ * a job into the Run Pool that was previously dequeued (running). It notifies
+ * the policy that the job should be run again at some point later.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Log the result of a job: the time spent on a job/context, and whether
+ * the job failed or not.
+ *
+ * Since a struct kbase_jd_atom contains a pointer to the struct kbase_context owning it,
+ * then this can also be used to log time on either/both the job and the
+ * containing context.
+ *
+ * The completion state of the job can be found by examining \a katom->event.event_code
+ *
+ * If the Job failed and the policy is implementing fair-sharing, then the
+ * policy must penalize the failing job/context:
+ * - At the very least, it should penalize the time taken by the amount of
+ * time spent processing the IRQ in SW. This because a job in the NEXT slot
+ * waiting to run will be delayed until the failing job has had the IRQ
+ * cleared.
+ * - \b Optionally, the policy could apply other penalties. For example, based
+ * on a threshold of a number of failing jobs, after which a large penalty is
+ * applied.
+ *
+ * The kbasep_js_device_data::runpool_mutex will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock will be held.
+ *
+ * @param js_policy     job scheduler policy
+ * @param katom         job dispatch atom
+ * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds).
+ */
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us);
+
+	  /** @} *//* end group kbase_js_policy_job */
+
+	  /** @} *//* end group kbase_js_policy */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_POLICY_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
new file mode 100755
index 0000000..6924607
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
@@ -0,0 +1,297 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler: Completely Fair Policy Implementation
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_js_policy_cfs.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+#include <linux/sched/rt.h>
+#endif
+
+/**
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is enabled for a particular level is down to the integrator.
+ * However this is being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/* Fixed point constants used for runtime weight calculations */
+#define WEIGHT_FIXEDPOINT_SHIFT 10
+#define WEIGHT_TABLE_SIZE       40
+#define WEIGHT_0_NICE           (WEIGHT_TABLE_SIZE/2)
+#define WEIGHT_0_VAL            (1 << WEIGHT_FIXEDPOINT_SHIFT)
+
+#define PROCESS_PRIORITY_MIN (-20)
+#define PROCESS_PRIORITY_MAX  (19)
+
+/* Defines for easy asserts 'is scheduled'/'is queued'/'is neither queued norscheduled' */
+#define KBASEP_JS_CHECKFLAG_QUEUED       (1u << 0) /**< Check the queued state */
+#define KBASEP_JS_CHECKFLAG_SCHEDULED    (1u << 1) /**< Check the scheduled state */
+#define KBASEP_JS_CHECKFLAG_IS_QUEUED    (1u << 2) /**< Expect queued state to be set */
+#define KBASEP_JS_CHECKFLAG_IS_SCHEDULED (1u << 3) /**< Expect scheduled state to be set */
+
+enum {
+	KBASEP_JS_CHECK_NOTQUEUED = KBASEP_JS_CHECKFLAG_QUEUED,
+	KBASEP_JS_CHECK_NOTSCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED,
+	KBASEP_JS_CHECK_QUEUED = KBASEP_JS_CHECKFLAG_QUEUED | KBASEP_JS_CHECKFLAG_IS_QUEUED,
+	KBASEP_JS_CHECK_SCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED | KBASEP_JS_CHECKFLAG_IS_SCHEDULED
+};
+
+typedef u32 kbasep_js_check;
+
+/*
+ * Private Functions
+ */
+
+/* Table autogenerated using util built from: base/tools/gen_cfs_weight_of_prio/ */
+
+/* weight = 1.25 */
+static const int weight_of_priority[] = {
+	/*  -20 */ 11, 14, 18, 23,
+	/*  -16 */ 29, 36, 45, 56,
+	/*  -12 */ 70, 88, 110, 137,
+	/*   -8 */ 171, 214, 268, 335,
+	/*   -4 */ 419, 524, 655, 819,
+	/*    0 */ 1024, 1280, 1600, 2000,
+	/*    4 */ 2500, 3125, 3906, 4883,
+	/*    8 */ 6104, 7630, 9538, 11923,
+	/*   12 */ 14904, 18630, 23288, 29110,
+	/*   16 */ 36388, 45485, 56856, 71070
+};
+
+/*
+ * Note: There is nothing to stop the priority of the ctx containing
+ * ctx_info changing during or immediately after this function is called
+ * (because its jsctx_mutex cannot be held during IRQ). Therefore, this
+ * function should only be seen as a heuristic guide as to the priority weight
+ * of the context.
+ */
+static u64 priority_weight(struct kbasep_js_policy_cfs_ctx *ctx_info, u64 time_us)
+{
+	u64 time_delta_us;
+	int priority;
+
+	priority = ctx_info->process_priority;
+
+	/* Adjust runtime_us using priority weight if required */
+	if (priority != 0 && time_us != 0) {
+		int clamped_priority;
+
+		/* Clamp values to min..max weights */
+		if (priority > PROCESS_PRIORITY_MAX)
+			clamped_priority = PROCESS_PRIORITY_MAX;
+		else if (priority < PROCESS_PRIORITY_MIN)
+			clamped_priority = PROCESS_PRIORITY_MIN;
+		else
+			clamped_priority = priority;
+
+		/* Fixed point multiplication */
+		time_delta_us = (time_us * weight_of_priority[WEIGHT_0_NICE + clamped_priority]);
+		/* Remove fraction */
+		time_delta_us = time_delta_us >> WEIGHT_FIXEDPOINT_SHIFT;
+		/* Make sure the time always increases */
+		if (0 == time_delta_us)
+			time_delta_us++;
+	} else {
+		time_delta_us = time_us;
+	}
+
+	return time_delta_us;
+}
+
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_policy_trace_get_refcnt_nolock(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+
+/*
+ * Non-private functions
+ */
+
+int kbasep_js_policy_init(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs *policy_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+	policy_info = &js_devdata->policy.cfs;
+
+	atomic64_set(&policy_info->least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+	atomic64_set(&policy_info->rt_least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+
+	policy_info->head_runtime_us = 0;
+
+	return 0;
+}
+
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy)
+{
+	CSTD_UNUSED(js_policy);
+}
+
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	int policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	policy_info = &kbdev->js_data.policy.cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_INIT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	policy = current->policy;
+	if (policy == SCHED_FIFO || policy == SCHED_RR) {
+		ctx_info->process_rt_policy = true;
+		ctx_info->process_priority = (((MAX_RT_PRIO - 1) - current->rt_priority) / 5) - 20;
+	} else {
+		ctx_info->process_rt_policy = false;
+		ctx_info->process_priority = (current->static_prio - MAX_RT_PRIO) - 20;
+	}
+
+	/* Initial runtime (relative to least-run context runtime)
+	 *
+	 * This uses the Policy Queue's most up-to-date head_runtime_us by using the
+	 * queue mutex to issue memory barriers - also ensure future updates to
+	 * head_runtime_us occur strictly after this context is initialized */
+	mutex_lock(&js_devdata->queue_mutex);
+
+	/* No need to hold the the runpool_irq.lock here, because we're initializing
+	 * the value, and the context is definitely not being updated in the
+	 * runpool at this point. The queue_mutex ensures the memory barrier. */
+	ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u));
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return 0;
+}
+
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	policy_info = &js_policy->cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	/* No work to do */
+}
+
+/*
+ * Job Chain Management
+ */
+
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbase_context *parent_ctx;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	CSTD_UNUSED(js_policy);
+
+	parent_ctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(parent_ctx != NULL);
+
+	ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	ctx_info->runtime_us += priority_weight(ctx_info, time_spent_us);
+
+	katom->time_spent_us += time_spent_us;
+}
+
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx)
+{
+	struct kbasep_js_policy_cfs_ctx *current_ctx_info;
+	struct kbasep_js_policy_cfs_ctx *new_ctx_info;
+
+	KBASE_DEBUG_ASSERT(current_ctx != NULL);
+	KBASE_DEBUG_ASSERT(new_ctx != NULL);
+	CSTD_UNUSED(js_policy);
+
+	current_ctx_info = &current_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+	new_ctx_info = &new_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	if (!current_ctx_info->process_rt_policy && new_ctx_info->process_rt_policy)
+		return true;
+
+	if (current_ctx_info->process_rt_policy ==
+			new_ctx_info->process_rt_policy)
+		return true;
+
+	return false;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
new file mode 100755
index 0000000..b457d82
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Completely Fair Job Scheduler Policy structure definitions
+ */
+
+#ifndef _KBASE_JS_POLICY_CFS_H_
+#define _KBASE_JS_POLICY_CFS_H_
+
+#define KBASEP_JS_RUNTIME_EMPTY ((u64)-1)
+
+/**
+ * struct kbasep_js_policy_cfs - Data for the CFS policy
+ * @head_runtime_us:  Number of microseconds the least-run context has been
+ *                    running for. The kbasep_js_device_data.queue_mutex must
+ *                    be held whilst updating this.
+ *                    Reads are possible without this mutex, but an older value
+ *                    might be read if no memory barries are issued beforehand.
+ * @least_runtime_us: Number of microseconds the least-run context in the
+ *                    context queue has been running for.
+ *                    -1 if context queue is empty.
+ * @rt_least_runtime_us: Number of microseconds the least-run context in the
+ *                       realtime (priority) context queue has been running for.
+ *                       -1 if realtime context queue is empty
+ */
+struct kbasep_js_policy_cfs {
+	u64 head_runtime_us;
+	atomic64_t least_runtime_us;
+	atomic64_t rt_least_runtime_us;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_ctx - a single linked list of all contexts
+ * @runtime_us:        microseconds this context has been running for
+ * @process_rt_policy: set if calling process policy scheme is a realtime
+ *                     scheduler and will use the priority queue. Non-mutable
+ *                     after ctx init
+ * @process_priority:  calling process NICE priority, in the range -20..19
+ *
+ * &kbasep_js_device_data.runpool_irq.lock must be held when updating
+ * @runtime_us. Initializing will occur on context init and context enqueue
+ * (which can only occur in one thread at a time), but multi-thread access only
+ * occurs while the context is in the runpool.
+ *
+ * Reads are possible without the spinlock, but an older value might be read if
+ * no memory barries are issued beforehand.
+ */
+struct kbasep_js_policy_cfs_ctx {
+	u64 runtime_us;
+	bool process_rt_policy;
+	int process_priority;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_job - per job information for CFS
+ * @ticks: number of ticks that this job has been executing for
+ *
+ * &kbasep_js_device_data.runpool_irq.lock must be held when accessing @ticks.
+ */
+struct kbasep_js_policy_cfs_job {
+	u32 ticks;
+};
+
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100755
index 0000000..6d1e61f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100755
index 0000000..88dbdc0
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,2514 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+/**
+ * @brief Check the zone compatibility of two regions.
+ */
+static int kbase_region_tracker_match_zone(struct kbase_va_region *reg1,
+		struct kbase_va_region *reg2)
+{
+	return ((reg1->flags & KBASE_REG_ZONE_MASK) ==
+		(reg2->flags & KBASE_REG_ZONE_MASK));
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_match_zone);
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx, struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = &(kctx->reg_rbtree.rb_node);
+	struct rb_node *parent = NULL;
+
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+	rb_insert_color(&(new_reg->rblink), &(kctx->reg_rbtree));
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+	rbnode = rb_first(&(kctx->reg_rbtree));
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE) &&
+				kbase_region_tracker_match_zone(reg, reg_reqs)) {
+			/* Check alignment */
+			u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+				return reg;
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if ((prev->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(prev, reg)) {
+			/* We're compatible with the previous VMA, merge with it */
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if ((next->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(next, reg)) {
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), &(kctx->reg_rbtree));
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	int err = 0;
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), &(kctx->reg_rbtree));
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		if ((!kbase_region_tracker_match_zone(tmp, reg)) ||
+				(!(tmp->flags & KBASE_REG_FREE))) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.  */
+	{
+		u64 start_pfn;
+
+		/*
+		 * Depending on the zone the allocation request is for
+		 * we might need to retry it.
+		 */
+		do {
+			tmp = kbase_region_tracker_find_region_meeting_reqs(
+					kctx, reg, nr_pages, align);
+			if (tmp) {
+				start_pfn = (tmp->start_pfn + align - 1) &
+						~(align - 1);
+				err = kbase_insert_va_region_nolock(kctx, reg,
+						tmp, start_pfn, nr_pages);
+				break;
+			}
+
+			/*
+			 * If the allocation is not from the same zone as JIT
+			 * then don't retry, we're out of VA and there is
+			 * nothing which can be done about it.
+			 */
+			if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+					KBASE_REG_ZONE_CUSTOM_VA)
+				break;
+		} while (kbase_jit_evict(kctx));
+
+		if (!tmp)
+			err = -ENOMEM;
+	}
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *exec_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* exec and custom_va_reg doesn't always exist */
+	if (exec_reg && custom_va_reg) {
+		kbase_region_tracker_insert(kctx, exec_reg);
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+	}
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(&(kctx->reg_rbtree));
+		if (rbnode) {
+			rb_erase(rbnode, &(kctx->reg_rbtree));
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kctx->is_compat)
+		same_va_bits = 32;
+	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+#endif
+
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
+		err = -EINVAL;
+		goto fail_unlock;
+	}
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have exec and custom VA zones */
+	if (kctx->is_compat) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_exec;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_exec:
+	kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
+{
+#ifdef CONFIG_64BIT
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits;
+	u64 total_va_size;
+	int err;
+
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	if (kctx->is_compat)
+		return 0;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(kctx,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(kctx, custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	return kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL);
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_term(&kbdev->mem_pool);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(reg->kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					page_to_phys(kctx->aliasing_sink_page),
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping_of_region(const struct kbase_va_region *reg, unsigned long uaddr, size_t size)
+{
+	struct kbase_cpu_mapping *map;
+	struct list_head *pos;
+
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	list_for_each(pos, &reg->cpu_alloc->mappings) {
+		map = list_entry(pos, struct kbase_cpu_mapping, mappings_list);
+		if (map->vm_start <= uaddr && map->vm_end >= uaddr + size)
+			return map;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_of_region);
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+	struct kbase_context *kctx, u64 gpu_addr,
+	unsigned long uaddr, size_t size, u64 * offset)
+{
+	struct kbase_cpu_mapping *map = NULL;
+	const struct kbase_va_region *reg;
+	int err = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (reg && !(reg->flags & KBASE_REG_FREE)) {
+		map = kbasep_find_enclosing_cpu_mapping_of_region(reg, uaddr,
+				size);
+		if (map) {
+			*offset = (uaddr - PTR_TO_U64(map->vm_start)) +
+						 (map->page_off << PAGE_SHIFT);
+			err = 0;
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		phys_addr_t cpu_pa, phys_addr_t gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct base_syncset *set, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct basep_syncset *sset = &set->basep_sset;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	phys_addr_t *cpu_pa;
+	phys_addr_t *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	off_t offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	offset = start & (PAGE_SIZE - 1);
+	page_off = map->page_off + ((start - map->vm_start) >> PAGE_SHIFT);
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	/* Sync first page */
+	if (cpu_pa[page_off]) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!cpu_pa[page_off + i])
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 && cpu_pa[page_off + page_count - 1]) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset)
+{
+	int err = -EINVAL;
+	struct basep_syncset *sset;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != syncset);
+
+	sset = &syncset->basep_sset;
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+		/* Wait for GPU to flush write buffer before freeing physical pages */
+		kbase_wait_write_flush(kctx);
+	}
+#endif
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+void kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (flags & BASE_MEM_COHERENT_SYSTEM ||
+			flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	else if (flags & BASE_MEM_COHERENT_LOCAL)
+		reg->flags |= KBASE_REG_SHARE_IN;
+
+	/* Set up default MEMATTR usage */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_update_region_flags);
+
+int kbase_alloc_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t old_page_count = alloc->nents;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+
+	if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool,
+			nr_pages_requested, alloc->pages + old_page_count) != 0)
+		goto no_alloc;
+
+	/*
+	 * Request a zone cache update, this scans only the new pages an
+	 * appends their information to the zone cache. if the update
+	 * fails then clear the cache so we fall-back to doing things
+	 * page by page.
+	 */
+	if (kbase_zone_cache_update(alloc, old_page_count) != 0)
+		kbase_zone_cache_clear(alloc);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)alloc->imported.kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+no_alloc:
+	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	return -ENOMEM;
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	phys_addr_t *start_free;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/*
+	 * Clear the zone cache, we don't expect JIT allocations to be
+	 * shrunk in parts so there is no point trying to optimize for that
+	 * by scanning for the changes caused by freeing this memory and
+	 * updating the existing cache entries.
+	 */
+	kbase_zone_cache_clear(alloc);
+
+	kbase_mem_pool_free_pages(&kctx->mem_pool,
+				  nr_pages_to_free,
+				  start_free,
+				  syncback,
+				  reclaimed);
+
+	alloc->nents -= nr_pages_to_free;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, nr_pages_to_free);
+		new_page_count = kbase_atomic_sub_pages(nr_pages_to_free,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(nr_pages_to_free,
+				       &kctx->kbdev->memdev.used_pages);
+
+		kbase_tlstream_aux_pagesalloc(
+				(u32)kctx->id,
+				(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		WARN_ON(!alloc->imported.kctx);
+		/*
+		 * The physical allocation must have been removed from the
+		 * eviction list before trying to free it.
+		 */
+		WARN_ON(!list_empty(&alloc->evict_node));
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+ #ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ump_dd_release(alloc->imported.ump_handle);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
+		kfree(alloc->imported.user_buf.pages);
+		break;
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_add(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_lock);
+		if (list_empty(&kctx->jit_destroy_head))
+			reg = NULL;
+		else
+			reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		if (reg) {
+			list_del(&reg->jit_node);
+			mutex_unlock(&kctx->jit_lock);
+
+			kbase_gpu_vm_lock(kctx);
+			kbase_mem_free_region(kctx, reg);
+			kbase_gpu_vm_unlock(kctx);
+		} else
+			mutex_unlock(&kctx->jit_lock);
+	} while (reg);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	mutex_init(&kctx->jit_lock);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	return 0;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+	struct kbase_va_region *walker;
+	struct kbase_va_region *temp;
+	size_t current_diff = SIZE_MAX;
+
+	int ret;
+
+	mutex_lock(&kctx->jit_lock);
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+
+		if (walker->nr_pages >= info->va_pages) {
+			size_t min_size, max_size, diff;
+
+			/*
+			 * The JIT allocations VA requirements have been
+			 * meet, it's suitable but other allocations
+			 * might be a better fit.
+			 */
+			min_size = min_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			max_size = max_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			diff = max_size - min_size;
+
+			if (current_diff > diff) {
+				current_diff = diff;
+				reg = walker;
+			}
+
+			/* The allocation is an exact match, stop looking */
+			if (current_diff == 0)
+				break;
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_del_init(&reg->jit_node);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+
+		/* Release the jit lock before modifying the allocation */
+		mutex_unlock(&kctx->jit_lock);
+
+		kbase_gpu_vm_lock(kctx);
+
+		/* Make the physical backing no longer reclaimable */
+		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+			goto update_failed;
+
+		/* Grow the backing if required */
+		if (reg->gpu_alloc->nents < info->commit_pages) {
+			size_t delta;
+			size_t old_size = reg->gpu_alloc->nents;
+
+			/* Allocate some more pages */
+			delta = info->commit_pages - reg->gpu_alloc->nents;
+			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
+					!= 0)
+				goto update_failed;
+
+			if (reg->cpu_alloc != reg->gpu_alloc) {
+				if (kbase_alloc_phy_pages_helper(
+						reg->cpu_alloc, delta) != 0) {
+					kbase_free_phy_pages_helper(
+							reg->gpu_alloc, delta);
+					goto update_failed;
+				}
+			}
+
+			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
+					info->commit_pages, old_size);
+			/*
+			 * The grow failed so put the allocation back in the
+			 * pool and return failure.
+			 */
+			if (ret)
+				goto update_failed;
+		}
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL;
+		u64 gpu_addr;
+		u16 alignment;
+
+		mutex_unlock(&kctx->jit_lock);
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr, &alignment);
+		if (!reg)
+			goto out_unlocked;
+
+		mutex_lock(&kctx->jit_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_lock);
+	}
+
+	return reg;
+
+update_failed:
+	/*
+	 * An update to an allocation from the pool failed, chances
+	 * are slim a new allocation would fair any better so return
+	 * the allocation to the pool and return the function with failure.
+	 */
+	kbase_gpu_vm_unlock(kctx);
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	/* The physical backing of memory in the pool is always reclaimable */
+	down_read(&kctx->process_mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+	kbase_mem_evictable_make(reg->gpu_alloc);
+	kbase_gpu_vm_unlock(kctx);
+	up_read(&kctx->process_mm->mmap_sem);
+
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = reg->kctx;
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_destroy_head);
+	mutex_unlock(&kctx->jit_lock);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	if (reg)
+		kbase_mem_free_region(kctx, reg);
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+
+	kbase_gpu_vm_lock(kctx);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		kbase_mem_free_region(kctx, walker);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		kbase_mem_free_region(kctx, walker);
+	}
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	phys_addr_t *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct mm_struct *mm;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	mm = alloc->imported.user_buf.mm;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = page_to_phys(pages[i]);
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+			kbase_reg_current_backed_size(reg),
+			reg->flags);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	phys_addr_t *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		int j;
+		size_t pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = count;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			kbase_reg_current_backed_size(reg),
+			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+	if (err) {
+		dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+				alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+		alloc->imported.umm.sgt = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \
+		|| defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res,
+		struct kds_resource **kds_resources, u32 *kds_res_count,
+		unsigned long *kds_access_bitmap, bool exclusive)
+{
+	u32 i;
+
+	for (i = 0; i < *kds_res_count; i++) {
+		/* Duplicate resource, ignore */
+		if (kds_resources[i] == kds_res)
+			return;
+	}
+
+	kds_resources[*kds_res_count] = kds_res;
+	if (exclusive)
+		set_bit(*kds_res_count, kds_access_bitmap);
+	(*kds_res_count)++;
+}
+#endif
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+	case BASE_MEM_IMPORT_TYPE_UMP: {
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = ump_dd_kds_resource_get(
+					reg->gpu_alloc->imported.ump_handle);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif				/*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+		break;
+	}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = get_dma_buf_kds_resource(
+					reg->gpu_alloc->imported.umm.dma_buf);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL
+#ifdef CONFIG_KDS
+				, NULL, NULL,
+				NULL, false
+#endif
+				);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100755
index 0000000..dea3aba
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1044 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	pgoff_t  page_off;
+	int      count;
+	unsigned long vm_start;
+	unsigned long vm_end;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMP,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	phys_addr_t           *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	struct list_head       zone_cache;
+
+	/* member in union valid based on @a type */
+	union {
+#ifdef CONFIG_UMP
+		ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+		struct {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			unsigned int current_mapping_usage_count;
+			struct mm_struct *mm;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* VA managed by us */
+#define KBASE_REG_CUSTOM_VA         (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Aligned for GPU EX in SAME_VA */
+#define KBASE_REG_ALIGNED           (1ul<<15)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+/*
+ * Dedicated 16MB region for shader code:
+ * VA range 0x101000000-0x102000000
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_EXEC_BASE    (0x101000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* non-NULL if this memory object is a kds_resource */
+	struct kds_resource *kds_res;
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+};
+
+/* Common functions */
+static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+	INIT_LIST_HEAD(&alloc->zone_cache);
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+	reg->cpu_alloc->imported.kctx = kctx;
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		reg->gpu_alloc->imported.kctx = kctx;
+		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	INIT_LIST_HEAD(&reg->jit_node);
+	reg->flags &= ~KBASE_REG_FREE;
+	return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @max_size:  Maximum number of free pages the pool can hold
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Finally, allocate a page from the kernel.
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return ACCESS_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above @max_size.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ *
+ * Return: The new size of the pool
+ */
+size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+void kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - set the MMU in unmapped mode for an address space.
+ *
+ * @kbdev:	Kbase device
+ * @as_nr:	Number of the address space for which the MMU
+ *		should be set in unmapped mode.
+ *
+ * The caller must hold kbdev->as[as_nr].transaction_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset);
+void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa,
+		phys_addr_t gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * @brief Find the offset of the CPU mapping of a memory allocation containing
+ *        a given address range
+ *
+ * Searches for a CPU mapping of any part of the region starting at @p gpu_addr
+ * that fully encloses the CPU virtual address range specified by @p uaddr and
+ * @p size. Returns a failure indication if only part of the address range lies
+ * within a CPU mapping, or the address range lies within a CPU mapping of a
+ * different region.
+ *
+ * @param[in,out] kctx      The kernel base context used for the allocation.
+ * @param[in]     gpu_addr  GPU address of the start of the allocated region
+ *                          within which to search.
+ * @param[in]     uaddr     Start of the CPU virtual address range.
+ * @param[in]     size      Size of the CPU virtual address range (in bytes).
+ * @param[out]    offset    The offset from the start of the allocation to the
+ *                          specified CPU virtual address.
+ *
+ * @return 0 if offset was obtained successfully. Error code
+ *         otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context *kctx,
+							u64 gpu_addr,
+							unsigned long uaddr,
+							size_t size,
+							u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_jit_debugfs_add - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_add(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ * @kds_res_count:     The number of KDS resources.
+ * @kds_resources:     Array of KDS resources.
+ * @kds_access_bitmap: Access bitmap for KDS.
+ * @exclusive:         If the KDS resource requires exclusive access.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_zone_cache_update - Update the memory zone cache after new pages have
+ * been added.
+ * @alloc:        The physical memory allocation to build the cache for.
+ * @start_offset: Offset to where the new pages start.
+ *
+ * Updates an existing memory zone cache, updating the counters for the
+ * various zones.
+ * If the memory allocation doesn't already have a zone cache assume that
+ * one isn't created and thus don't do anything.
+ *
+ * Return: Zero cache was updated, negative error code on error.
+ */
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset);
+
+/**
+ * kbase_zone_cache_build - Build the memory zone cache.
+ * @alloc:        The physical memory allocation to build the cache for.
+ *
+ * Create a new zone cache for the provided physical memory allocation if
+ * one doesn't already exist, if one does exist then just return.
+ *
+ * Return: Zero if the zone cache was created, negative error code on error.
+ */
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_zone_cache_clear - Clear the memory zone cache.
+ * @alloc:        The physical memory allocation to clear the cache on.
+ */
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc);
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100755
index 0000000..ed4704f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,2828 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	#include <linux/dma-mapping.h>
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)  */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+static const struct vm_operations_struct kbase_vm_ops;
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ *
+ * Note: Caller must be holding the processes mmap_sem lock.
+ */
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment)
+{
+	int zone;
+	int gpu_pc_bits;
+	int cpu_va_bits;
+	struct kbase_va_region *reg;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_alignment);
+
+	dev = kctx->kbdev->dev;
+	*va_alignment = 0; /* no alignment by default */
+	*gpu_va = 0; /* return 0 on failure */
+
+	gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	cpu_va_bits = BITS_PER_LONG;
+
+	if (0 == va_pages) {
+		dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+		goto bad_size;
+	}
+
+	if (va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+#if defined(CONFIG_64BIT)
+	if (kctx->is_compat)
+		cpu_va_bits = 32;
+#endif
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+	    (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+		goto bad_ex_size;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	kbase_update_region_flags(kctx, reg, *flags);
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & BASE_MEM_GROW_ON_GPF)
+		reg->extent = extent;
+	else
+		reg->extent = 0;
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/* See if we must align memory due to GPU PC bits vs CPU VA */
+		if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+		    (cpu_va_bits > gpu_pc_bits)) {
+			*va_alignment = gpu_pc_bits;
+			reg->flags |= KBASE_REG_ALIGNED;
+		}
+
+		/*
+		 * Pre-10.1 UKU userland calls mmap for us so return the
+		 * unaligned address and skip the map.
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1)) {
+			*gpu_va = (u64) cookie;
+			return reg;
+		}
+
+		/*
+		 * GPUCORE-2190:
+		 *
+		 * We still need to return alignment for old userspace.
+		 */
+		if (*va_alignment)
+			va_map += 3 * (1UL << *va_alignment);
+
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
+			goto no_mmap;
+		}
+
+		/*
+		 * If we had to allocate extra VA space to force the
+		 * alignment release it.
+		 */
+		if (*va_alignment) {
+			unsigned long alignment = 1UL << *va_alignment;
+			unsigned long align_mask = alignment - 1;
+			unsigned long addr;
+			unsigned long addr_end;
+			unsigned long aligned_addr;
+			unsigned long aligned_addr_end;
+
+			addr = cpu_addr;
+			addr_end = addr + va_map;
+
+			aligned_addr = (addr + align_mask) &
+					~((u64) align_mask);
+			aligned_addr_end = aligned_addr + va_size;
+
+			if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) {
+				/*
+				 * Can't end at 4GB boundary on some GPUs as
+				 * it will halt the shader.
+				 */
+				aligned_addr += 2 * alignment;
+				aligned_addr_end += 2 * alignment;
+			} else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) {
+				/*
+				 * Can't start at 4GB boundary on some GPUs as
+				 * it will halt the shader.
+				 */
+				aligned_addr += alignment;
+				aligned_addr_end += alignment;
+			}
+
+			/* anything to chop off at the start? */
+			if (addr != aligned_addr)
+				vm_munmap(addr, aligned_addr - addr);
+
+			/* anything at the end? */
+			if (addr_end != aligned_addr_end)
+				vm_munmap(aligned_addr_end,
+						addr_end - aligned_addr_end);
+
+			*gpu_va = (u64) aligned_addr;
+		} else
+			*gpu_va = (u64) cpu_addr;
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_mem:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+struct kbase_mem_zone_cache_entry {
+	/* List head used to link the cache entry to the memory allocation. */
+	struct list_head zone_node;
+	/* The zone the cacheline is for. */
+	struct zone *zone;
+	/* The number of pages in the allocation which belong to this zone. */
+	u64 count;
+};
+
+static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	struct kbase_mem_zone_cache_entry *cache = NULL;
+	size_t i;
+	int ret = 0;
+
+	for (i = start_offset; i < alloc->nents; i++) {
+		struct page *p = phys_to_page(alloc->pages[i]);
+		struct zone *zone = page_zone(p);
+		bool create = true;
+
+		if (cache && (cache->zone == zone)) {
+			/*
+			 * Fast path check as most of the time adjacent
+			 * pages come from the same zone.
+			 */
+			create = false;
+		} else {
+			/*
+			 * Slow path check, walk all the cache entries to see
+			 * if we already know about this zone.
+			 */
+			list_for_each_entry(cache, &alloc->zone_cache, zone_node) {
+				if (cache->zone == zone) {
+					create = false;
+					break;
+				}
+			}
+		}
+
+		/* This zone wasn't found in the cache, create an entry for it */
+		if (create) {
+			cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+			if (!cache) {
+				ret = -ENOMEM;
+				goto bail;
+			}
+			cache->zone = zone;
+			cache->count = 0;
+			list_add(&cache->zone_node, &alloc->zone_cache);
+		}
+
+		cache->count++;
+	}
+	return 0;
+
+bail:
+	return ret;
+}
+
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	/*
+	 * Bail if the zone cache is empty, only update the cache if it
+	 * existed in the first place.
+	 */
+	if (list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, start_offset);
+}
+
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc)
+{
+	/* Bail if the zone cache already exists */
+	if (!list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, 0);
+}
+
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_mem_zone_cache_entry *walker;
+
+	while(!list_empty(&alloc->zone_cache)){
+		walker = list_first_entry(&alloc->zone_cache,
+				struct kbase_mem_zone_cache_entry, zone_node);
+		list_del(&walker->zone_node);
+		kfree(walker);
+	}
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 * then remove it from the reclaimable accounting. */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(-zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* This alloction can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	/*
+	 * Try to shrink the CPU mappings as required, if we fail then
+	 * fail the process of making this allocation evictable.
+	 */
+	err = kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+	if (err)
+		return -EINVAL;
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	mutex_lock(&kctx->evict_lock);
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	mutex_lock(&kctx->evict_lock);
+	list_del_init(&gpu_alloc->evict_node);
+	mutex_unlock(&kctx->evict_lock);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+	switch (reg->gpu_alloc->type) {
+#ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	/* roll back on error, i.e. not UMP */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	ump_dd_handle umph;
+	u64 block_count;
+	const ump_dd_physical_block_64 *block_array;
+	u64 i, j;
+	int page = 0;
+	ump_alloc_flags ump_flags;
+	ump_alloc_flags cpu_flags;
+	ump_alloc_flags gpu_flags;
+
+	if (*flags & BASE_MEM_SECURE)
+		goto bad_flags;
+
+	umph = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+		goto bad_id;
+
+	ump_flags = ump_dd_allocation_flags_get(umph);
+	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+			UMP_DEVICE_MASK;
+
+	*va_pages = ump_dd_size_get_64(umph);
+	*va_pages >>= PAGE_SHIFT;
+
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (*flags & BASE_MEM_SAME_VA)
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	else
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!reg)
+		goto no_region;
+
+	/* we've got pages to map now, and support SAME_VA */
+	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->gpu_alloc->imported.ump_handle = umph;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
+
+	/* Override import flags based on UMP flags */
+	*flags &= ~(BASE_MEM_CACHED_CPU);
+	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+		reg->flags |= KBASE_REG_CPU_CACHED;
+		*flags |= BASE_MEM_CACHED_CPU;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_WR) {
+		reg->flags |= KBASE_REG_CPU_WR;
+		*flags |= BASE_MEM_PROT_CPU_WR;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_RD) {
+		reg->flags |= KBASE_REG_CPU_RD;
+		*flags |= BASE_MEM_PROT_CPU_RD;
+	}
+
+	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+		reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (gpu_flags & UMP_PROT_DEVICE_WR) {
+		reg->flags |= KBASE_REG_GPU_WR;
+		*flags |= BASE_MEM_PROT_GPU_WR;
+	}
+
+	if (gpu_flags & UMP_PROT_DEVICE_RD) {
+		reg->flags |= KBASE_REG_GPU_RD;
+		*flags |= BASE_MEM_PROT_GPU_RD;
+	}
+
+	/* ump phys block query */
+	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+	for (i = 0; i < block_count; i++) {
+		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+			reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT);
+			page++;
+		}
+	}
+	reg->gpu_alloc->nents = *va_pages;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	ump_dd_release(umph);
+bad_id:
+bad_flags:
+	return NULL;
+}
+#endif				/* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	/* no read or write permission given on import, only on run do we give the right permissions */
+
+	reg->gpu_alloc->type = BASE_MEM_IMPORT_TYPE_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	}
+
+	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages,
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	down_read(&current->mm->mmap_sem);
+
+	/* A sanity check that get_user_pages will work on the memory */
+	/* (so the initial import fails on weird memory regions rather than */
+	/* the job failing when we try to handle the external resources). */
+	/* It doesn't take a reference to the pages (because the page list is NULL). */
+	/* We can't really store the page list because that would involve */
+	/* keeping the pages pinned - instead we pin/unpin around the job */
+	/* (as part of the external resources handling code) */
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	reg->gpu_alloc->imported.user_buf.size = size;
+	reg->gpu_alloc->imported.user_buf.address = address;
+	reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages;
+	reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages,
+			sizeof(struct page *), GFP_KERNEL);
+	reg->gpu_alloc->imported.user_buf.mm = current->mm;
+	atomic_inc(&current->mm->mm_count);
+
+	if (!reg->gpu_alloc->imported.user_buf.pages)
+		goto no_page_array;
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_page_array:
+fault_mismatch:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	kbase_update_region_flags(kctx, reg, *flags);
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat)
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_UMP
+	case BASE_MEM_IMPORT_TYPE_UMP: {
+		ump_secure_id id;
+
+		if (get_user(id, (ump_secure_id __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_ump(kctx, id, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				uptr = compat_ptr(user_buffer.ptr.compat_value);
+			else
+#endif
+				uptr = user_buffer.ptr.value;
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+
+static int zap_range_nolock(struct mm_struct *mm,
+		const struct vm_operations_struct *vm_ops,
+		unsigned long start, unsigned long end)
+{
+	struct vm_area_struct *vma;
+	int err = -EINVAL; /* in case end < start */
+
+	while (start < end) {
+		unsigned long local_start;
+		unsigned long local_end;
+
+		vma = find_vma_intersection(mm, start, end);
+		if (!vma)
+			break;
+
+		/* is it ours? */
+		if (vma->vm_ops != vm_ops)
+			goto try_next;
+
+		local_start = vma->vm_start;
+
+		if (start > local_start)
+			local_start = start;
+
+		local_end = vma->vm_end;
+
+		if (end < local_end)
+			local_end = end;
+
+		err = zap_vma_ptes(vma, local_start, local_end - local_start);
+		if (unlikely(err))
+			break;
+
+try_next:
+		/* go to next vma, if any */
+		start = vma->vm_end;
+	}
+
+	return err;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	phys_addr_t *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags);
+
+	return ret;
+}
+
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	struct kbase_mem_phy_alloc *cpu_alloc = reg->cpu_alloc;
+	struct kbase_cpu_mapping *mapping;
+	int err;
+
+	lockdep_assert_held(&kctx->process_mm->mmap_sem);
+
+	list_for_each_entry(mapping, &cpu_alloc->mappings, mappings_list) {
+		unsigned long mapping_size;
+
+		mapping_size = (mapping->vm_end - mapping->vm_start)
+				>> PAGE_SHIFT;
+
+		/* is this mapping affected ?*/
+		if ((mapping->page_off + mapping_size) > new_pages) {
+			unsigned long first_bad = 0;
+
+			if (new_pages > mapping->page_off)
+				first_bad = new_pages - mapping->page_off;
+
+			err = zap_range_nolock(current->mm,
+					&kbase_vm_ops,
+					mapping->vm_start +
+					(first_bad << PAGE_SHIFT),
+					mapping->vm_end);
+
+			WARN(err,
+			     "Failed to zap VA range (0x%lx - 0x%lx);\n",
+			     mapping->vm_start +
+			     (first_bad << PAGE_SHIFT),
+			     mapping->vm_end
+			     );
+
+			/* The zap failed, give up and exit */
+			if (err)
+				goto failed;
+		}
+	}
+
+	return 0;
+
+failed:
+	return err;
+}
+
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx,
+			reg->start_pfn + new_pages, delta);
+	if (ret)
+		return ret;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+		/*
+		* Wait for GPU to flush write buffer before freeing
+		* physical pages.
+		 */
+		kbase_wait_write_flush(kctx);
+	}
+#endif
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(failure_reason);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages > reg->nr_pages) {
+		/* Would overflow the VA region */
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		res = kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->region) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	/* we don't use vmf->pgoff as it's affected by our mmap with
+	 * offset being a GPU VA or a cookie */
+	rel_pgoff = ((unsigned long)vmf->virtual_address - map->vm_start)
+			>> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (map->page_off + rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	for (i = rel_pgoff;
+	     i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
+	     map->alloc->nents - map->page_off); i++) {
+		int ret = vm_insert_pfn(vma, map->vm_start + (i << PAGE_SHIFT),
+		    PFN_DOWN(map->alloc->pages[map->page_off + i]));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	u64 start_off = vma->vm_pgoff - reg->start_pfn;
+	phys_addr_t *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		unsigned long addr = vma->vm_start + aligned_offset;
+
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			unsigned long pfn = PFN_DOWN(page_array[i + start_off]);
+
+			err = vm_insert_pfn(vma, addr, pfn);
+			if (WARN_ON(err))
+				break;
+
+			addr += PAGE_SIZE;
+		}
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->page_off = start_off;
+	map->region = free_on_close ? reg : NULL;
+	map->kctx = reg->kctx;
+	map->vm_start = vma->vm_start + aligned_offset;
+	if (aligned_offset) {
+		KBASE_DEBUG_ASSERT(!start_off);
+		map->vm_end = map->vm_start + (reg->nr_pages << PAGE_SHIFT);
+	} else {
+		map->vm_end = vma->vm_end;
+	}
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kbase_device_trace_buffer_install(kctx, tb, size);
+		if (err) {
+			vfree(tb);
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->cpu_alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+#if defined(CONFIG_DMA_SHARED_BUFFER) && defined(CONFIG_MALI_TRACE_TIMELINE)
+/* This section is required only for instrumentation. */
+
+static void kbase_dma_buf_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* Non-atomic as we're under Linux's mm lock. */
+	map->count++;
+}
+
+static void kbase_dma_buf_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* Non-atomic as we're under Linux's mm lock. */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+
+	kbase_gpu_vm_lock(map->kctx);
+	list_del(&map->mappings_list);
+	kbase_gpu_vm_unlock(map->kctx);
+	kfree(map);
+}
+
+static const struct vm_operations_struct kbase_dma_mmap_ops = {
+	.open  = kbase_dma_buf_vm_open,
+	.close = kbase_dma_buf_vm_close,
+};
+#endif /* CONFIG_DMA_SHARED_BUFFER && CONFIG_MALI_TRACE_TIMELINE */
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg;
+	void *kaddr = NULL;
+	size_t nr_pages;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+	nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	/* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+	vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		/* SAME_VA stuff, fetch the right region */
+		int gpu_pc_bits;
+		int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+		gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		if (reg->flags & KBASE_REG_ALIGNED) {
+			/* nr_pages must be able to hold alignment pages
+			 * plus actual pages */
+			unsigned long align = 1ULL << gpu_pc_bits;
+			unsigned long extra_pages = 3 * PFN_DOWN(align);
+			unsigned long aligned_addr;
+			unsigned long aligned_addr_end;
+			unsigned long nr_bytes = reg->nr_pages << PAGE_SHIFT;
+
+			if (kctx->api_version < KBASE_API_VERSION(8, 5))
+				/* Maintain compatibility with old userspace */
+				extra_pages = PFN_DOWN(align);
+
+			if (nr_pages != reg->nr_pages + extra_pages) {
+				/* incorrect mmap size */
+				/* leave the cookie for a potential
+				 * later mapping, or to be reclaimed
+				 * later when the context is freed */
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			aligned_addr = ALIGN(vma->vm_start, align);
+			aligned_addr_end = aligned_addr + nr_bytes;
+
+			if (kctx->api_version >= KBASE_API_VERSION(8, 5)) {
+				if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) {
+					/* Can't end at 4GB boundary */
+					aligned_addr += 2 * align;
+				} else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) {
+					/* Can't start at 4GB boundary */
+					aligned_addr += align;
+				}
+			}
+
+			aligned_offset = aligned_addr - vma->vm_start;
+		} else if (reg->nr_pages != nr_pages) {
+			/* incorrect mmap size */
+			/* leave the cookie for a potential later
+			 * mapping, or to be reclaimed later when the
+			 * context is freed */
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		if ((vma->vm_flags & VM_READ &&
+					!(reg->flags & KBASE_REG_CPU_RD)) ||
+				(vma->vm_flags & VM_WRITE &&
+				 !(reg->flags & KBASE_REG_CPU_WR))) {
+			/* VM flags inconsistent with region flags */
+			err = -EPERM;
+			dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+			goto out_unlock;
+		}
+
+		/* adjust down nr_pages to what we have physically */
+		nr_pages = kbase_reg_current_backed_size(reg);
+
+		if (kbase_gpu_mmap(kctx, reg,
+					vma->vm_start + aligned_offset,
+					reg->nr_pages, 1) != 0) {
+			dev_err(dev, "%s:%d\n", __FILE__, __LINE__);
+			/* Unable to map in GPU space. */
+			WARN_ON(1);
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		/* no need for the cookie anymore */
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		/*
+		 * Overwrite the offset with the
+		 * region start_pfn, so we effectively
+		 * map from offset 0 in the region.
+		 */
+		vma->vm_pgoff = reg->start_pfn;
+
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn)))
+				goto overflow;
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (reg->cpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM)
+				goto dma_map;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+
+			goto map;
+		}
+
+overflow:
+		err = -ENOMEM;
+		goto out_unlock;
+	} /* default */
+	} /* switch */
+map:
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+	goto out_unlock;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+dma_map:
+	err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn);
+#if defined(CONFIG_MALI_TRACE_TIMELINE)
+	/* This section is required only for instrumentation. */
+	/* Add created mapping to imported region mapping list.
+	 * It is important to make it visible to dumping infrastructure.
+	 * Add mapping only if vm_ops structure is not used by memory owner. */
+	WARN_ON(vma->vm_ops);
+	WARN_ON(vma->vm_private_data);
+	if (!err && !vma->vm_ops && !vma->vm_private_data) {
+		struct kbase_cpu_mapping *map = kzalloc(
+			sizeof(*map),
+			GFP_KERNEL);
+
+		if (map) {
+			map->kctx     = reg->kctx;
+			map->region   = NULL;
+			map->page_off = vma->vm_pgoff;
+			map->vm_start = vma->vm_start;
+			map->vm_end   = vma->vm_end;
+			map->count    = 1; /* start with one ref */
+
+			vma->vm_ops          = &kbase_dma_mmap_ops;
+			vma->vm_private_data = map;
+
+			list_add(
+				&map->mappings_list,
+				&reg->cpu_alloc->mappings);
+		}
+	}
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	phys_addr_t *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+	bool sync_needed;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
+	sync_needed = map->is_cached;
+
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+
+	if (sync_needed) {
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_CPU);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_CPU);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_CPU);
+		}
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	bool sync_needed = map->is_cached;
+	vunmap(addr);
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+	if (sync_needed) {
+		off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
+		size_t size = map->size;
+		size_t page_count = PFN_UP(offset + size);
+		size_t i;
+
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_DEVICE);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_DEVICE);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_DEVICE);
+		}
+	}
+	map->gpu_addr = 0;
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->is_cached = false;
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int i;
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	phys_addr_t *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))&&(LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+							DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	kbase_update_region_flags(kctx, reg, flags);
+
+	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = dma_pa + (i << PAGE_SHIFT);
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+	kfree(reg);
+no_reg:
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))&&(LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa,
+			DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100755
index 0000000..6c0fb56
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment);
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason);
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	u64 gpu_addr;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	phys_addr_t *cpu_pages;
+	phys_addr_t *gpu_pages;
+	void *addr;
+	size_t size;
+	bool is_cached;
+};
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100755
index 0000000..441180b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+#endif				/* _KBASE_LOWLEVEL_H */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100755
index 0000000..9570618
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,601 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+/* This function is only provided for backwards compatibility with kernels
+ * which use the old carveout allocator.
+ *
+ * The forward declaration is to keep sparse happy.
+ */
+int __init kbase_carveout_mem_reserve(
+		phys_addr_t size);
+int __init kbase_carveout_mem_reserve(phys_addr_t size)
+{
+	return 0;
+}
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_for_each_entry(p, page_list, lru) {
+		zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+	}
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			PAGE_SIZE, DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	clear_highpage(p);
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+static struct page *kbase_mem_pool_alloc_page(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	if (current->flags & PF_KTHREAD) {
+		/* Don't trigger OOM killer from kernel threads, e.g. when
+		 * growing memory on GPU page fault */
+		gfp |= __GFP_NORETRY;
+	}
+
+	p = alloc_page(gfp);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		__free_page(p);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+
+	kbase_set_dma_addr(p, dma_addr);
+
+	pool_dbg(pool, "alloced page from kernel\n");
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+
+	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	kbase_clear_dma_addr(p);
+	__free_page(p);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) {
+		p = kbase_mem_pool_alloc_page(pool);
+		if (!p)
+			break;
+		kbase_mem_pool_add(pool, p);
+	}
+
+	return i;
+}
+
+size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		kbase_mem_pool_grow(pool, new_size - cur_size);
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	return cur_size;
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+	pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool));
+	return kbase_mem_pool_size(pool);
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	pool->cur_size = 0;
+	pool->max_size = max_size;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			kbase_mem_pool_zero_page(pool, p);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	pool_dbg(pool, "alloc()\n");
+
+	p = kbase_mem_pool_remove(pool);
+
+	if (!p && pool->next_pool) {
+		/* Allocate via next pool */
+		return kbase_mem_pool_alloc(pool->next_pool);
+	}
+
+	if (!p) {
+		/* Get page from kernel */
+		p = kbase_mem_pool_alloc_page(pool);
+	}
+
+	return p;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i;
+	int err = -ENOMEM;
+
+	pool_dbg(pool, "alloc_pages(%zu):\n", nr_pages);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages, kbase_mem_pool_size(pool));
+	for (i = 0; i < nr_from_pool; i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		pages[i] = page_to_phys(p);
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_pages - i, pages + i);
+
+		if (err)
+			goto err_rollback;
+
+		i += nr_pages - i;
+	}
+
+	/* Get any remaining pages from kernel */
+	for (; i < nr_pages; i++) {
+		p = kbase_mem_pool_alloc_page(pool);
+		if (!p)
+			goto err_rollback;
+		pages[i] = page_to_phys(p);
+	}
+
+	pool_dbg(pool, "alloc_pages(%zu) done\n", nr_pages);
+
+	return 0;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+		size_t nr_pages, phys_addr_t *pages, bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+
+		if (zero)
+			kbase_mem_pool_zero_page(pool, p);
+		else if (sync)
+			kbase_mem_pool_sync_page(pool, p);
+
+		list_add(&p->lru, &new_page_list);
+		nr_to_pool++;
+		pages[i] = 0;
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+		if (reclaimed)
+			zone_page_state_add(-1, page_zone(p),
+					NR_SLAB_RECLAIMABLE);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = 0;
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100755
index 0000000..493665b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <mali_kbase_mem_pool_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_trim(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
+		kbase_mem_pool_debugfs_size_get,
+		kbase_mem_pool_debugfs_size_set,
+		"%llu\n");
+
+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_max_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_set_max_size(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
+		kbase_mem_pool_debugfs_max_size_get,
+		kbase_mem_pool_debugfs_max_size_set,
+		"%llu\n");
+
+void kbase_mem_pool_debugfs_add(struct dentry *parent,
+		struct kbase_mem_pool *pool)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100755
index 0000000..458f3f0
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H
+#define _KBASE_MEM_POOL_DEBUGFS_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_add - add debugfs knobs for @pool
+ * @parent: Parent debugfs dentry
+ * @pool:   Memory pool to control
+ *
+ * Adds two debugfs files under @parent:
+ * - mem_pool_size: get/set the current size of @pool
+ * - mem_pool_max_size: get/set the max size of @pool
+ */
+void kbase_mem_pool_debugfs_add(struct dentry *parent,
+		struct kbase_mem_pool *pool);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100755
index 0000000..0b19d05
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,118 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_gpu_memory_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kctx->mem_profile_initialized);
+
+	if (!kctx->mem_profile_initialized) {
+		if (!debugfs_create_file("mem_profile", S_IRUGO,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kctx->mem_profile_initialized = true;
+		}
+	}
+
+	if (kctx->mem_profile_initialized) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+				err, kctx->mem_profile_initialized);
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kctx->mem_profile_initialized);
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100755
index 0000000..9555197
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100755
index 0000000..82f0702
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100755
index 0000000..bf45d39
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,1813 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+static size_t make_multiple(size_t minimum, size_t multiple)
+{
+	size_t remainder = minimum % multiple;
+
+	if (remainder == 0)
+		return minimum;
+
+	return minimum + multiple - remainder;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Tranlation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		goto fault_done;
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = make_multiple(fault_rel_pfn -
+			kbase_reg_current_backed_size(region) + 1,
+			region->extent);
+
+	/* cap to max vsize */
+	if (new_pages + kbase_reg_current_backed_size(region) >
+			region->nr_pages)
+		new_pages = region->nr_pages -
+				kbase_reg_current_backed_size(region);
+
+	if (0 == new_pages) {
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
+		if (region->gpu_alloc != region->cpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					region->cpu_alloc, new_pages) == 0) {
+				grown = true;
+			} else {
+				kbase_free_phy_pages_helper(region->gpu_alloc,
+						new_pages);
+			}
+		} else {
+			grown = true;
+		}
+	}
+
+
+	if (grown) {
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+		kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&faulting_as->transaction_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&faulting_as->transaction_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* failed to extend, handle as a normal PF */
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Page allocation failure");
+	}
+
+fault_done:
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	p = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!p)
+		goto sub_pages;
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1 */
+static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return 0;
+	}
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return 0;
+		}
+
+		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Handle failure condition */
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
+			return 0;
+		}
+	}
+
+	return pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+	/* As we are recovering from what has already been set up, we should have a target_pgd */
+	KBASE_DEBUG_ASSERT(0 != target_pgd);
+	kunmap_atomic(page);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Should never fail */
+		KBASE_DEBUG_ASSERT(0 != pgd);
+	}
+
+	return pgd;
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
+					      size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
+		KBASE_DEBUG_ASSERT(0 != pgd);
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+
+		pgd_page = kmap_atomic(p);
+		KBASE_DEBUG_ASSERT(NULL != pgd_page);
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+		kunmap_atomic(pgd_page);
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys, flags);
+		}
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_pages only partially completes we need to be able
+	 * to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys[i], flags);
+		}
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table. If further pages
+		 * need inserting and fail we need to undo what has already
+		 * taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * This function is responsible for validating the MMU PTs
+ * triggering reguired flushes.
+ *
+ * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+
+	/* We must flush if we're currently running jobs. At the very least, we need to retain the
+	 * context to ensure it doesn't schedule out whilst we're trying to flush it */
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		/* Second level check is to try to only do this when jobs are running. The refcount is
+		 * a heuristic for this. */
+		if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) {
+			if (!kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+				int ret;
+				u32 op;
+
+				/* AS transaction begin */
+				mutex_lock(&kbdev->as[
+						kctx->as_nr].transaction_mutex);
+
+				if (kbase_hw_has_issue(kbdev,
+						BASE_HW_ISSUE_6367))
+					op = AS_COMMAND_FLUSH;
+				else
+					op = AS_COMMAND_FLUSH_MEM;
+
+				ret = kbase_mmu_hw_do_operation(kbdev,
+							&kbdev->as[kctx->as_nr],
+							kctx, vpfn, nr,
+							op, 0);
+#if KBASE_GPU_RESET_EN
+				if (ret) {
+					/* Flush failed to complete, assume the
+					 * GPU has hung and perform a reset to
+					 * recover */
+					dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+					if (kbase_prepare_to_reset_gpu(kbdev))
+						kbase_reset_gpu(kbdev);
+				}
+#endif /* KBASE_GPU_RESET_EN */
+
+				mutex_unlock(&kbdev->as[
+						kctx->as_nr].transaction_mutex);
+				/* AS transaction end */
+
+				kbase_pm_context_idle(kbdev);
+			}
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_device *kbdev;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	kbdev = kctx->kbdev;
+	mmu_mode = kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush(kctx, vpfn, requested_nr);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
+			vpfn, phys, nr);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+					flags);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush(kctx, vpfn, requested_nr);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/* This is a debug feature only */
+static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
+{
+	u64 *page;
+	int i;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		if (kctx->kbdev->mmu_mode->ate_is_valid(page[i]))
+			beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
+	}
+	kunmap_atomic(page);
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) {
+				mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
+			} else {
+				/*
+				 * So target_pte is a level-3 page.
+				 * As a leaf, it is safe to free it.
+				 * Unless we have live pages attached to it!
+				 */
+				mmu_check_unused(kctx, target_pgd);
+			}
+
+			beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
+			if (zap) {
+				struct page *p = phys_to_page(target_pgd);
+
+				kbase_mem_pool_free(&kctx->mem_pool, p, true);
+				kbase_process_page_usage_dec(kctx, 1);
+				kbase_atomic_sub_pages(1, &kctx->used_pages);
+				kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+			}
+		}
+	}
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	mutex_init(&kctx->mmu_lock);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	mutex_lock(&kctx->mmu_lock);
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+	mutex_unlock(&kctx->mmu_lock);
+
+	beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
+	kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
+	kbase_process_page_usage_dec(kctx, 1);
+	new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i])) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t size;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+		}
+
+
+
+		size = kbasep_mmu_dump_level(kctx,
+				kctx->pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!size)
+			goto fail_free;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+		/* Add on the size for the config */
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4))
+			size += sizeof(config);
+
+
+		if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->as[as_no].transaction_mutex);
+
+		/* Set the MMU into unmapped mode */
+		kbase_mmu_disable_as(kbdev, as_no);
+
+		mutex_unlock(&kbdev->as[as_no].transaction_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "ACCESS_FLAG";
+		break;
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		e = "ADDRESS_SIZE_FAULT";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		e = "MEMORY_ATTRIBUTES_FAULT";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		return "ATOMIC";
+#else
+		return "UNKNOWN";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&as->transaction_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	kbase_mmu_disable_as(kbdev, as_no);
+
+	mutex_unlock(&as->transaction_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&as->transaction_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&as->transaction_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the runpool_irq lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+#if KBASE_GPU_RESET_EN
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+#endif /* KBASE_GPU_RESET_EN */
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		dev_warn(kbdev->dev,
+				"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+				as->number, as->fault_addr,
+				as->fault_extra_addr);
+#else
+		dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+				as->number, as->fault_addr);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
+		WARN_ON(work_pending(&as->work_busfault));
+		queue_work(as->pf_wq, &as->work_busfault);
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
+		WARN_ON(work_pending(&as->work_pagefault));
+		queue_work(as->pf_wq, &as->work_pagefault);
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100755
index 0000000..986e959
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
new file mode 100755
index 0000000..2449c60
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MALI_KBASE_MMU_MODE_
+#define _MALI_KBASE_MMU_MODE_
+
+#include <linux/types.h>
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_context *kctx);
+	void (*get_as_setup)(struct kbase_context *kctx,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate);
+	int (*pte_is_valid)(u64 pte);
+	void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+#endif /* _MALI_KBASE_MMU_MODE_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100755
index 0000000..683cabb
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated
+	 * memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#else
+	setup->transcfg = 0;
+#endif
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#endif
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+		get_mmu_flags(flags) |
+		ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100755
index 0000000..1a44957
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_fake_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_fake_register);
+
+void kbase_platform_fake_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_fake_unregister);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100755
index 0000000..261441f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_instr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1)
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Suspend any counter collection that might be happening */
+	kbase_instr_hwcnt_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Re-enable instrumentation, if it was previously disabled */
+	kbase_instr_hwcnt_resume(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100755
index 0000000..37fa247
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100755
index 0000000..7fb674e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100755
index 0000000..170b939
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1155 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct fragment_job {
+	struct job_descriptor_header header;
+
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_descriptor_header *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next_job._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next_job._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+	job = &frag_job->header;
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->exception_status = JOB_NOT_STARTED;
+
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
+
+	if (job->job_type == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->job_barrier = 1;
+
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
+
+	job->job_index += hw_job_id_offset;
+
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
+	} else {
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
+		else
+			fbd_address = (u64)frag_job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
+
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
+		else
+			jc = job->next_job._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list.value = NULL;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if (unlikely((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
+			       ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
+			      ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)) {
+
+		int t_atom_type = t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP;
+		int f_atom_type = f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC;
+		int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+		int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+
+		if (t_atom_type != BASE_JD_REQ_T) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x",
+			    t_atom_type, BASE_JD_REQ_T);
+		}
+		if (f_atom_type != BASE_JD_REQ_FS) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n",
+			    f_atom_type, BASE_JD_REQ_FS);
+		}
+		if (t_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n");
+		}
+		if (f_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n");
+		}
+
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom, NULL);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_descriptor_header *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+		dump_job_head(kctx, "\njob_head structure:\n", job);
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
+		else
+			job_header = job->next_job._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	/* Don't replay this atom if these issues are not present in the
+	 * hardware */
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) &&
+			!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) {
+		dev_dbg(kbdev->dev, "Hardware does not need replay workaround");
+
+		/* Signal failure to userspace */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+
+		return false;
+	}
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	if (jctx->sched_info.ctx.is_dying) {
+		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100755
index 0000000..43175c8
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100755
index 0000000..9bff3d2
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100755
index 0000000..0419761
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1400 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#include <linux/syscalls.h>
+#include "mali_kbase_sync.h"
+#endif
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+
+/* Mask to check cache alignment of data structures */
+#define KBASE_CACHE_ALIGNMENT_MASK		((1<<L1_CACHE_SHIFT)-1)
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static struct page *kbasep_translate_gpu_addr_to_kernel_page(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 pfn;
+	struct kbase_va_region *reg;
+	phys_addr_t addr = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	pfn = gpu_addr >> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto err_vm_unlock;
+	addr = reg->cpu_alloc->pages[pfn - reg->start_pfn];
+	kbase_gpu_vm_unlock(kctx);
+
+	if (!addr)
+		goto err;
+
+	return pfn_to_page(PFN_DOWN(addr));
+
+err_vm_unlock:
+	kbase_gpu_vm_unlock(kctx);
+err:
+	return NULL;
+}
+
+int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	struct page *pg = kbasep_translate_gpu_addr_to_kernel_page(
+			kctx, evt);
+	unsigned char *mapped_pg;
+	u32 offset = evt & ~PAGE_MASK;
+
+	KBASE_DEBUG_ASSERT(NULL != status);
+
+	if (!pg)
+		return -1;
+
+	mapped_pg = (unsigned char *)kmap_atomic(pg);
+	KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */
+	*status = *(mapped_pg + offset);
+	kunmap_atomic(mapped_pg);
+
+	return 0;
+}
+
+int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	struct page *pg = kbasep_translate_gpu_addr_to_kernel_page(
+			kctx, evt);
+	unsigned char *mapped_pg;
+	u32 offset = evt & ~PAGE_MASK;
+
+	KBASE_DEBUG_ASSERT((new_status == BASE_JD_SOFT_EVENT_SET) ||
+			   (new_status == BASE_JD_SOFT_EVENT_RESET));
+
+	if (!pg)
+		return -1;
+
+	mapped_pg = (unsigned char *)kmap_atomic(pg);
+	KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */
+	*(mapped_pg + offset) = new_status;
+	kunmap_atomic(mapped_pg);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_va_region *reg;
+	phys_addr_t addr = 0;
+	u64 pfn;
+	u32 offset;
+	char *page;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	pfn = jc >> PAGE_SHIFT;
+	offset = jc & ~PAGE_MASK;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (offset > 0x1000 - sizeof(data)) {
+		/* Wouldn't fit in the page */
+		return 0;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc);
+	if (reg &&
+	    (reg->flags & KBASE_REG_GPU_WR) &&
+	    reg->cpu_alloc && reg->cpu_alloc->pages)
+		addr = reg->cpu_alloc->pages[pfn - reg->start_pfn];
+
+	kbase_gpu_vm_unlock(kctx);
+	if (!addr)
+		return 0;
+
+	page = kmap(pfn_to_page(PFN_DOWN(addr)));
+	if (!page)
+		return 0;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
+			offset, sizeof(data),
+			DMA_BIDIRECTIONAL);
+
+	memcpy(page + offset, &data, sizeof(data));
+
+	kbase_sync_single_for_device(katom->kctx->kbdev,
+			kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
+			offset, sizeof(data),
+			DMA_BIDIRECTIONAL);
+
+	kunmap(pfn_to_page(PFN_DOWN(addr)));
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#ifdef CONFIG_SYNC
+
+/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited)
+ *
+ * @param katom     The atom to complete
+ */
+static void complete_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	list_del(&katom->dep_item[0]);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly) come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static void kbase_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+
+	complete_soft_job(katom);
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter, struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+
+	kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (fence->status < 0)
+#else
+	if (atomic_read(&fence->status) < 0)
+#endif
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding kctx->jctx.lock and
+	 * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work.
+	 */
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static int kbase_fence_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	KBASE_DEBUG_ASSERT(NULL != katom->kctx);
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signalled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+		INIT_WORK(&katom->work, kbase_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	kbasep_add_waiting_soft_job(katom);
+
+	return 1;
+}
+
+static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+#endif /* CONFIG_SYNC */
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, dep_item[0]);
+
+		if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+		    BASE_JD_REQ_SOFT_EVENT_WAIT) {
+			if (katom->jc == evt) {
+				list_del(&katom->dep_item[0]);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet */
+				cancel_timer = 0;
+			}
+		}
+	}
+
+	if (cancel_timer)
+		hrtimer_try_to_cancel(&kctx->soft_event_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer)
+{
+	struct kbase_context *kctx = container_of(timer, struct kbase_context,
+			soft_event_timeout);
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_event_timeout_ms);
+	ktime_t cur_time = ktime_get();
+	enum hrtimer_restart restarting = HRTIMER_NORESTART;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, dep_item[0]);
+
+		if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+		    BASE_JD_REQ_SOFT_EVENT_WAIT) {
+			s64 elapsed_time =
+				ktime_to_ms(ktime_sub(cur_time,
+						      katom->start_timestamp));
+			if (elapsed_time > (s64)timeout_ms) {
+				/* Take it out of the list to ensure that it
+				 * will be cancelled in all cases */
+				list_del(&katom->dep_item[0]);
+
+				katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				restarting = HRTIMER_RESTART;
+			}
+		}
+	}
+
+	if (restarting)
+		hrtimer_add_expires(timer, HR_TIMER_DELAY_MSEC(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	return restarting;
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	ktime_t remaining;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule cancellation of this atom after a period if it is
+	 * not active */
+	remaining = hrtimer_get_remaining(&kctx->soft_event_timeout);
+	if (remaining.tv64 <= 0) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_event_timeout_ms);
+		hrtimer_start(&kctx->soft_event_timeout,
+			      HR_TIMER_DELAY_MSEC((u64)timeout_ms),
+			      HRTIMER_MODE_REL);
+	}
+
+	return 1;
+}
+
+static void kbasep_soft_event_update(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	/*To find memory region*/
+	u64 gpu_addr;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_va_region *reg;
+
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, buffers[i].gpu_addr);
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+		if (reg && reg->gpu_alloc) {
+			switch (reg->gpu_alloc->type) {
+			case BASE_MEM_IMPORT_TYPE_USER_BUFFER:
+			{
+				free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->jc = 0;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		katom->jc = 0;
+		goto out_cleanup;
+	}
+	katom->jc = (u64)(uintptr_t)buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret)
+		goto out_cleanup;
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		buffers[i].gpu_addr = user_extres.ext_resource &
+			~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, buffers[i].gpu_addr);
+
+		if (NULL == reg || NULL == reg->cpu_alloc ||
+				(reg->flags & KBASE_REG_FREE)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+		buffers[i].nr_extres_pages = reg->nr_pages;
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case BASE_MEM_IMPORT_TYPE_USER_BUFFER:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages)
+				goto out_unlock;
+			ret = 0;
+			break;
+		}
+		case BASE_MEM_IMPORT_TYPE_UMP:
+		{
+			dev_warn(katom->kctx->kbdev->dev,
+					"UMP is not supported for debug_copy jobs\n");
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	kfree(buffers);
+	kfree(user_buffers);
+
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	return ret;
+}
+
+static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct kbase_va_region *reg;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx, buf_data->gpu_addr);
+
+	if (!reg) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (reg->gpu_alloc->type) {
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		struct dma_buf *dma_buf = reg->gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+
+		ret = dma_buf_begin_cpu_access(dma_buf, 0,
+				buf_data->nr_extres_pages*PAGE_SIZE,
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf, 0,
+				buf_data->nr_extres_pages*PAGE_SIZE,
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	int ret;
+
+	/* Fail the job if there is no info structure */
+	if (!data) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* If the ID is zero then fail the job */
+	if (info->id == 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & 0x7) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Replace the user pointer with our kernel allocated info structure */
+	katom->jc = (u64)(uintptr_t) info;
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(info);
+fail:
+	katom->jc = 0;
+	return ret;
+}
+
+static void kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+
+	/* The JIT ID is still in use so fail the allocation */
+	if (kctx->jit_alloc[info->id]) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Mark the allocation so we know it's in use even if the
+	 * allocation itself fails.
+	 */
+	kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1;
+
+	/* Create a JIT allocation */
+	reg = kbase_jit_allocate(kctx, info);
+	if (!reg) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Write the address of the JIT allocation to the user provided
+	 * GPU allocation.
+	 */
+	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+			&mapping);
+	if (!ptr) {
+		/*
+		 * Leave the allocation "live" as the JIT free jit will be
+		 * submitted anyway.
+		 */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	*ptr = reg->start_pfn << PAGE_SHIFT;
+	kbase_vunmap(kctx, &mapping);
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	/*
+	 * Bind it to the user provided ID. Do this last so we can check for
+	 * the JIT free racing this JIT alloc job.
+	 */
+	kctx->jit_alloc[info->id] = reg;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 id = (u8) katom->jc;
+
+	/*
+	 * If the ID is zero or it is not in use yet then fail the job.
+	 */
+	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	/*
+	 * If the ID is valid but the allocation request failed still succeed
+	 * this soft job but don't try and free the allocation.
+	 */
+	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+		kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+	kctx->jit_alloc[id] = NULL;
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	/*
+	 * Replace the user pointer with our kernel allocated
+	 * ext_res structure.
+	 */
+	katom->jc = (u64)(uintptr_t) ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (--i > 0) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		KBASE_DEBUG_ASSERT(katom->fence != NULL);
+		katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		/* Release the reference as we don't need it any more */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		return kbase_fence_wait(katom);
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		return kbasep_soft_event_wait(katom);
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_fence_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK))
+				return -EINVAL;
+		}
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_stream_create_fence(fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			katom->fence = sync_fence_fdget(fd);
+
+			if (katom->fence == NULL) {
+				/* The only way the fence can be NULL is if userspace closed it for us.
+				 * So we don't need to clear it up */
+				return -EINVAL;
+			}
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				katom->fence = NULL;
+				sys_close(fd);
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			katom->fence = sync_fence_fdget(fence.basep.fd);
+			if (katom->fence == NULL)
+				return -EINVAL;
+		}
+		break;
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_REPLAY:
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signalled, do it now */
+		if (katom->fence) {
+			kbase_fence_trigger(katom, katom->event_code ==
+					BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+			sync_fence_put(katom->fence);
+			katom->fence = NULL;
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release the reference to the fence object */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+#endif				/* CONFIG_SYNC */
+
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		list_del(&katom_iter->dep_item[0]);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		} else {
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASEP_JD_REQ_ATOM_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
new file mode 100644
index 0000000..c98762c
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
@@ -0,0 +1,23 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include "mali_kbase_strings.h"
+
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
+const char kbase_drv_name[] = KBASE_DRV_NAME;
+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
new file mode 100644
index 0000000..41b8fdb
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
@@ -0,0 +1,19 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+extern const char kbase_drv_name[];
+extern const char kbase_timeline_name[];
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
new file mode 100755
index 0000000..c5fb981
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_SYNC
+
+#include <linux/seq_file.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signalled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signalled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signalled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signalled, 0);
+
+	return tl;
+}
+
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int signalled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signalled = atomic_read(&mtl->signalled);
+
+		diff = signalled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signalling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled);
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100755
index 0000000..6d8e34d
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,91 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include "sync.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatiblility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+/*
+ * Create a stream object.
+ * Built on top of timeline object.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ */
+int kbase_stream_create(const char *name, int *const out_fd);
+
+/*
+ * Create a fence in a stream object
+ */
+int kbase_stream_create_fence(int tl_fd);
+
+/*
+ * Validate a fd to be a valid fence
+ * No reference is taken.
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ */
+int kbase_fence_validate(int fd);
+
+/* Returns true if the specified timeline is allocated by Mali */
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline);
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name);
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ */
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent);
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ *
+ * If they are signalled in the wrong order then a message will be printed in debug
+ * builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as success.
+ */
+void kbase_sync_signal_pt(struct sync_pt *pt, int result);
+
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
new file mode 100755
index 0000000..ddd0847
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync_user.c
+ *
+ */
+
+#ifdef CONFIG_SYNC
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <mali_kbase_sync.h>
+#include <mali_base_kernel_sync.h>
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	BUG_ON(!tl);
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	BUG_ON(!out_fd);
+
+	tl = kbase_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int kbase_stream_create_fence(int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+ out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100755
index 0000000..962b19c
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,2246 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+/*****************************************************************************/
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_NDEP_ATOM_ATOM,
+	KBASE_TL_RDEP_ATOM_ATOM,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+
+	/* Job dump specific events. */
+	KBASE_JD_GPU_SOFT_RESET
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_ISSUE_JOB_SOFTSTOP,
+	KBASE_AUX_JOB_SOFTSTOP,
+	KBASE_AUX_JOB_SOFTSTOP_EX,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: counter tracking stream's autoflush state
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pII",
+		"ctx,ctx_nr,tgid"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_NEW_AS,
+		__stringify(KBASE_TL_NEW_AS),
+		"address space object is created",
+		"@pI",
+		"address_space,as_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"ctx"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_LIFELINK_AS_GPU,
+		__stringify(KBASE_TL_LIFELINK_AS_GPU),
+		"address space is deleted with gpu",
+		"@pp",
+		"address_space,gpu"
+	},
+	{
+		KBASE_TL_RET_CTX_LPU,
+		__stringify(KBASE_TL_RET_CTX_LPU),
+		"context is retained by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pps",
+		"atom,lpu,attrib_match_list"
+	},
+	{
+		KBASE_TL_NRET_CTX_LPU,
+		__stringify(KBASE_TL_NRET_CTX_LPU),
+		"context is released by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_RET_AS_CTX,
+		__stringify(KBASE_TL_RET_AS_CTX),
+		"address space is retained by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_NRET_AS_CTX,
+		__stringify(KBASE_TL_NRET_AS_CTX),
+		"address space is released by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_AS,
+		__stringify(KBASE_TL_RET_ATOM_AS),
+		"atom is retained by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_NRET_ATOM_AS,
+		__stringify(KBASE_TL_NRET_ATOM_AS),
+		"atom is released by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_DEP_ATOM_ATOM,
+		__stringify(KBASE_TL_DEP_ATOM_ATOM),
+		"atom2 depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_NDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
+		"atom2 no longer depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_RDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
+		"resolved dependecy of atom2 depending on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
+		"atom job slot attributes",
+		"@pLLI",
+		"atom,descriptor,affinity,config"
+	},
+	{
+		KBASE_TL_ATTRIB_AS_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
+		"address space attributes",
+		"@pLLL",
+		"address_space,transtab,memattr,transcfg"
+	},
+	{
+		KBASE_JD_GPU_SOFT_RESET,
+		__stringify(KBASE_JD_GPU_SOFT_RESET),
+		"gpu soft reset",
+		"@p",
+		"gpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_ISSUE_JOB_SOFTSTOP,
+		__stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP),
+		"Issuing job soft stop",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_AUX_JOB_SOFTSTOP,
+		__stringify(KBASE_AUX_JOB_SOFTSTOP),
+		"Job soft stop",
+		"@I",
+		"tag_id"
+	},
+	{
+		KBASE_AUX_JOB_SOFTSTOP_EX,
+		__stringify(KBASE_AUX_JOB_SOFTSTOP_EX),
+		"Job soft stop, more details",
+		"@pI",
+		"atom,job_type"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@IL",
+		"ctx_nr,page_cnt_change"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@IL",
+		"ctx_nr,page_cnt"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags) __acquires(&stream->lock)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags) __releases(&stream->lock)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @data:  unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(data);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (wb_size > min_size) {
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(f_pos);
+	CSTD_UNUSED(filp);
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the rbi still points to the packet we just processed
+		 * then there was no overflow so we add the copied size to
+		 * copy_len and move rbi on to the next packet
+		 */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = SWTRACE_VERSION; /* protocol version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 0);
+	setup_timer(&autoflush_timer,
+			kbasep_tlstream_autoflush_timer_callback,
+			0);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd)
+{
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) {
+		int rcode;
+
+		*fd = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd) {
+			atomic_set(&kbase_tlstream_enabled, 0);
+			return *fd;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+	} else {
+		*fd = -EBUSY;
+	}
+
+	return 0;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+			strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t  msg_size =
+			sizeof(msg_id) + sizeof(u64) +
+			sizeof(atom) + sizeof(lpu) + msg_s0;
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_string(
+			buffer, pos, attrib_match_list, msg_s0);
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+		sizeof(jd) + sizeof(affinity) + sizeof(config);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &jd, sizeof(jd));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &affinity, sizeof(affinity));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &config, sizeof(config));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
+		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transtab, sizeof(transtab));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &memattr, sizeof(memattr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transcfg, sizeof(transcfg));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+{
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_issue_job_softstop(void *katom)
+{
+	const u32     msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(katom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_job_softstop(u32 js_id)
+{
+	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(js_id);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &js_id, sizeof(js_id));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+/**
+ * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point
+ * @katom: the atom that has been soft-stopped
+ * @job_type: the job type
+ */
+static void __kbase_tlstream_aux_job_softstop_ex_record(
+		void *katom, u32 job_type)
+{
+	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &job_type, sizeof(job_type));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u64 jd = katom->jc;
+
+	while (jd != 0) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		job = kbase_vmap(kctx, jd, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev,
+				"__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n",
+				jd, (void *)katom);
+			break;
+		}
+		if (job->exception_status != BASE_JD_EVENT_STOPPED) {
+			kbase_vunmap(kctx, &map);
+			break;
+		}
+
+		__kbase_tlstream_aux_job_softstop_ex_record(
+				katom, job->job_type);
+
+		jd = job->job_descriptor_size ?
+			job->next_job._64 : job->next_job._32;
+		kbase_vunmap(kctx, &map);
+	}
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100755
index 0000000..22a0d96
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,509 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx: kernel common context
+ * @fd:   timeline stream file descriptor
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) argument fd will contain negative value.
+ *
+ * Return: zero on success (this does not necessarily mean that stream
+ *         descriptor could be returned), negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_issue_job_softstop(void *katom);
+void __kbase_tlstream_aux_job_softstop(u32 js_id);
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled)                                        \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_tl_summary_new_ctx - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_ctx(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
+
+/**
+ * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_gpu(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
+
+/**
+ * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_lpu(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+
+/**
+ * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+
+/**
+ * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary
+ * @as: name of the address space object
+ * @nr: sequential number assigned to this address space
+ *
+ * Function emits a timeline message informing about address space creation.
+ * Address space is created with one attribute: number identifying this
+ * address space.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_as(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
+
+/**
+ * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU
+ * @as:  name of the address space object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that address space object
+ * shall be deleted along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_lifelink_as_gpu(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
+
+/**
+ * kbase_tlstream_tl_new_ctx - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+#define kbase_tlstream_tl_new_ctx(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
+
+/**
+ * kbase_tlstream_tl_new_atom - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+#define kbase_tlstream_tl_new_atom(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
+
+/**
+ * kbase_tlstream_tl_del_ctx - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+#define kbase_tlstream_tl_del_ctx(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
+
+/**
+ * kbase_tlstream_tl_del_atom - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+#define kbase_tlstream_tl_del_atom(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
+
+/**
+ * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_ctx_lpu(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
+
+/**
+ * kbase_tlstream_tl_ret_atom_ctx - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_ctx(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
+
+/**
+ * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_lpu(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
+
+/**
+ * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
+ */
+#define kbase_tlstream_tl_nret_ctx_lpu(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
+
+/**
+ * kbase_tlstream_tl_nret_atom_ctx - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+#define kbase_tlstream_tl_nret_atom_ctx(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
+
+/**
+ * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+#define kbase_tlstream_tl_nret_atom_lpu(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
+
+/**
+ * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being held by the context object.
+ */
+#define kbase_tlstream_tl_ret_as_ctx(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
+
+/**
+ * kbase_tlstream_tl_nret_as_ctx - release address space by context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being released by atom.
+ */
+#define kbase_tlstream_tl_nret_as_ctx(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
+
+/**
+ * kbase_tlstream_tl_ret_atom_as - retain atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by address space and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_as(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
+
+/**
+ * kbase_tlstream_tl_nret_atom_as - release atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by address space.
+ */
+#define kbase_tlstream_tl_nret_atom_as(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_ndep_atom_atom - dependency between atoms resolved
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define kbase_tlstream_tl_ndep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_rdep_atom_atom - information about already resolved dependency between atoms
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define kbase_tlstream_tl_rdep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes
+ * @atom:     name of the atom object
+ * @jd:       job descriptor address
+ * @affinity: job affinity
+ * @config:   job config
+ *
+ * Function emits a timeline message containing atom attributes.
+ */
+#define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
+
+/**
+ * kbase_tlstream_tl_attrib_as_config - address space attributes
+ * @as:       assigned address space
+ * @transtab: configuration of the TRANSTAB register
+ * @memattr:  configuration of the MEMATTR register
+ * @transcfg: configuration of the TRANSCFG register (or zero if not present)
+ *
+ * Function emits a timeline message containing address space attributes.
+ */
+#define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
+
+/**
+ * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset
+ * @gpu:        name of the GPU object
+ *
+ * This imperative tracepoint is specific to job dumping.
+ * Function emits a timeline message indicating GPU soft reset.
+ */
+#define kbase_tlstream_jd_gpu_soft_reset(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
+
+/**
+ * kbase_tlstream_aux_pm_state - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+#define kbase_tlstream_aux_pm_state(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued
+ * @katom: the atom that is being soft-stopped
+ */
+#define kbase_tlstream_aux_issue_job_softstop(katom) \
+	__TRACE_IF_ENABLED(aux_issue_job_softstop, katom)
+
+/**
+ * kbase_tlstream_aux_job_softstop - soft job stop occurred
+ * @js_id: job slot id
+ */
+#define kbase_tlstream_aux_job_softstop(js_id) \
+	__TRACE_IF_ENABLED(aux_job_softstop, js_id)
+
+/**
+ * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom
+ * @katom: the atom that has been soft-stopped
+ *
+ * This trace point adds more details about the soft-stopped atom. These details
+ * can't be safety collected inside the interrupt handler so we're doing it
+ * inside a worker.
+ *
+ * Note: this is not the same information that is recorded in the trace point,
+ * refer to __kbase_tlstream_aux_job_softstop_ex() for more details.
+ */
+#define kbase_tlstream_aux_job_softstop_ex(katom) \
+	__TRACE_IF_ENABLED(aux_job_softstop_ex, katom)
+
+/**
+ * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
+ */
+#define kbase_tlstream_aux_pagefault(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
+
+/**
+ * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated
+ *                                 pages is changed
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
+ */
+#define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100755
index 0000000..e2e0544
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100755
index 0000000..a606ae8
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,232 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+}
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbase_backend_inspect_head(kbdev, js);
+			WARN_ON(!next_katom);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = true;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+	/* Simply log the end of the transition */
+	if (kbdev->timeline.l2_transitioning) {
+		kbdev->timeline.l2_transitioning = false;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100755
index 0000000..22a3649
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,356 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
+				(int)kctx->timeline.owner_tgid,              \
+				count);                                      \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
+				CTX_FLOW_ATOM_READY,                         \
+				(int)kctx->timeline.owner_tgid,              \
+				atom_id);                                    \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_ACTIVE,                      \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_NEXT,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_HEAD,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_STOPPING,                    \
+				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+				js, count);                                  \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_ACTIVE, active);            \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_TILER_ACTIVE,               \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_L2_ACTIVE,                  \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_POWERING,               \
+				1);                                          \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
+				1);                                          \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_SEND_EVENT,                       \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_HANDLE_EVENT,                     \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _consumerof_atom_number);                \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _producerof_atom_number_completed);      \
+	} while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
+				trace_code, 1);                              \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
+				count);                                      \
+	} while (0)
+
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+		enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100755
index 0000000..156a95a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
+"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain start (SW approximated)", "%d,%d,%d",
+"_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain stop (SW approximated)",  "%d,%d,%d",
+"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
new file mode 100755
index 0000000..93ddb5a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
@@ -0,0 +1,525 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UKU_H_
+#define _KBASE_UKU_H_
+
+#include "mali_uk.h"
+#include "mali_base_kernel.h"
+
+/* This file needs to support being included from kernel and userside (which use different defines) */
+#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON
+#define SUPPORT_MALI_ERROR_INJECT
+#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */
+#if defined(CONFIG_MALI_NO_MALI)
+#define SUPPORT_MALI_NO_MALI
+#elif defined(MALI_NO_MALI)
+#if MALI_NO_MALI
+#define SUPPORT_MALI_NO_MALI
+#endif
+#endif
+
+#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#include "mali_kbase_gpuprops_types.h"
+
+/*
+ * 10.1:
+ * - Do mmap in kernel for SAME_VA memory allocations rather then
+ *   calling back into the kernel as a 2nd stage of the allocation request.
+ *
+ * 10.2:
+ * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA
+ *   region for use with JIT (ignored on 32-bit platforms)
+ */
+#define BASE_UK_VERSION_MAJOR 10
+#define BASE_UK_VERSION_MINOR 2
+
+struct kbase_uk_mem_alloc {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	/* IN/OUT */
+	u64 flags;
+	/* OUT */
+	u64 gpu_va;
+	u16 va_alignment;
+	u8  padding[6];
+};
+
+struct kbase_uk_mem_free {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	/* OUT */
+};
+
+struct kbase_uk_mem_alias {
+	union uk_header header;
+	/* IN/OUT */
+	u64 flags;
+	/* IN */
+	u64 stride;
+	u64 nents;
+	union kbase_pointer ai;
+	/* OUT */
+	u64         gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_import {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer phandle;
+	u32 type;
+	u32 padding;
+	/* IN/OUT */
+	u64         flags;
+	/* OUT */
+	u64 gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_flags_change {
+	union uk_header header;
+	/* IN */
+	u64 gpu_va;
+	u64 flags;
+	u64 mask;
+};
+
+struct kbase_uk_job_submit {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer addr;
+	u32 nr_atoms;
+	u32 stride;		/* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */
+	/* OUT */
+};
+
+struct kbase_uk_post_term {
+	union uk_header header;
+};
+
+struct kbase_uk_sync_now {
+	union uk_header header;
+
+	/* IN */
+	struct base_syncset sset;
+
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_setup {
+	union uk_header header;
+
+	/* IN */
+	u64 dump_buffer;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 unused_1; /* keep for backwards compatibility */
+	u32 mmu_l2_bm;
+	u32 padding;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_hwcnt_reader_setup - User/Kernel space data exchange structure
+ * @header:       UK structure header
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ * @fd:           dumping notification file descriptor
+ *
+ * This structure sets up HWC dumper/reader for this context.
+ * Multiple instances can be created for single context.
+ */
+struct kbase_uk_hwcnt_reader_setup {
+	union uk_header header;
+
+	/* IN */
+	u32 buffer_count;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+
+	/* OUT */
+	s32 fd;
+};
+
+struct kbase_uk_hwcnt_dump {
+	union uk_header header;
+};
+
+struct kbase_uk_hwcnt_clear {
+	union uk_header header;
+};
+
+struct kbase_uk_fence_validate {
+	union uk_header header;
+	/* IN */
+	s32 fd;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_stream_create {
+	union uk_header header;
+	/* IN */
+	char name[32];
+	/* OUT */
+	s32 fd;
+	u32 padding;
+};
+
+struct kbase_uk_gpuprops {
+	union uk_header header;
+
+	/* IN */
+	struct mali_base_gpu_props props;
+	/* OUT */
+};
+
+struct kbase_uk_mem_query {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+#define KBASE_MEM_QUERY_COMMIT_SIZE  1
+#define KBASE_MEM_QUERY_VA_SIZE      2
+#define KBASE_MEM_QUERY_FLAGS        3
+	u64         query;
+	/* OUT */
+	u64         value;
+};
+
+struct kbase_uk_mem_commit {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64         pages;
+	/* OUT */
+	u32 result_subcode;
+	u32 padding;
+};
+
+struct kbase_uk_find_cpu_offset {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64 cpu_addr;
+	u64 size;
+	/* OUT */
+	u64 offset;
+};
+
+#define KBASE_GET_VERSION_BUFFER_SIZE 64
+struct kbase_uk_get_ddk_version {
+	union uk_header header;
+	/* OUT */
+	char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE];
+	u32 version_string_size;
+	u32 padding;
+};
+
+struct kbase_uk_disjoint_query {
+	union uk_header header;
+	/* OUT */
+	u32 counter;
+	u32 padding;
+};
+
+struct kbase_uk_set_flags {
+	union uk_header header;
+	/* IN */
+	u32 create_flags;
+	u32 padding;
+};
+
+#if MALI_UNIT_TEST
+#define TEST_ADDR_COUNT 4
+#define KBASE_TEST_BUFFER_SIZE 128
+struct kbase_exported_test_data {
+	u64 test_addr[TEST_ADDR_COUNT];		/**< memory address */
+	u32 test_addr_pages[TEST_ADDR_COUNT];		/**<  memory size in pages */
+	union kbase_pointer kctx;				/**<  base context created by process */
+	union kbase_pointer mm;				/**< pointer to process address space */
+	u8 buffer1[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+	u8 buffer2[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+};
+
+struct kbase_uk_set_test_data {
+	union uk_header header;
+	/* IN */
+	struct kbase_exported_test_data test_data;
+};
+
+#endif				/* MALI_UNIT_TEST */
+
+#ifdef SUPPORT_MALI_ERROR_INJECT
+struct kbase_uk_error_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_error_params params;
+};
+#endif				/* SUPPORT_MALI_ERROR_INJECT */
+
+#ifdef SUPPORT_MALI_NO_MALI
+struct kbase_uk_model_control_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_model_control_params params;
+};
+#endif				/* SUPPORT_MALI_NO_MALI */
+
+#define KBASE_MAXIMUM_EXT_RESOURCES       255
+
+struct kbase_uk_ext_buff_kds_data {
+	union uk_header header;
+	union kbase_pointer external_resource;
+	union kbase_pointer file_descriptor;
+	u32 num_res;		/* limited to KBASE_MAXIMUM_EXT_RESOURCES */
+	u32 padding;
+};
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+struct kbase_uk_keep_gpu_powered {
+	union uk_header header;
+	u32       enabled;
+	u32       padding;
+};
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+struct kbase_uk_profiling_controls {
+	union uk_header header;
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+};
+
+struct kbase_uk_debugfs_mem_profile_add {
+	union uk_header header;
+	u32 len;
+	union kbase_pointer buf;
+};
+
+struct kbase_uk_context_id {
+	union uk_header header;
+	/* OUT */
+	int id;
+};
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header: UK structure header
+ * @fd:     timeline stream file descriptor
+ *
+ * This structure is used used when performing a call to acquire kernel side
+ * timeline stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+	s32  fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure
+ * @header: UK structure header
+ *
+ * This structure is used when performing a call to flush kernel side
+ * timeline streams.
+ */
+struct kbase_uk_tlstream_flush {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+};
+
+#if MALI_UNIT_TEST
+/**
+ * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure
+ * @header:    UK structure header
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This structure is used when performing a call to start timeline stream test
+ * embedded in kernel.
+ */
+struct kbase_uk_tlstream_test {
+	union uk_header header;
+	/* IN */
+	u32 tpw_count;
+	u32 msg_delay;
+	u32 msg_count;
+	u32 aux_msg;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure
+ * @header:          UK structure header
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ *
+ * This structure is used when performing a call to obtain timeline stream
+ * statistics.
+ */
+struct kbase_uk_tlstream_stats {
+	union uk_header header; /**< UK structure header. */
+	/* IN */
+	/* OUT */
+	u32 bytes_collected;
+	u32 bytes_generated;
+};
+#endif /* MALI_UNIT_TEST */
+#endif /* MALI_MIPE_ENABLED */
+
+/**
+ * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl
+ * @header:          UK structure header
+ * @data:            Counter samples for the dummy model
+ * @size:............Size of the counter sample data
+ */
+struct kbase_uk_prfcnt_values {
+	union uk_header header;
+	/* IN */
+	u32 *data;
+	u32 size;
+};
+
+/**
+ * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @evt:        the GPU address containing the event
+ * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or
+ *              BASE_JD_SOFT_EVENT_RESET
+ * @flags:      reserved for future uses, must be set to 0
+ *
+ * This structure is used to update the status of a software event. If the
+ * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting
+ * on this event will complete.
+ */
+struct kbase_uk_soft_event_update {
+	union uk_header header;
+	/* IN */
+	u64 evt;
+	u32 new_status;
+	u32 flags;
+};
+
+/**
+ * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @va_pages:   Number of virtual pages required for JIT
+ *
+ * This structure is used when requesting initialization of JIT.
+ */
+struct kbase_uk_mem_jit_init {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+};
+
+enum kbase_uk_function_id {
+	KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
+	KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1),
+	KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2),
+	KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3),
+	KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4),
+	KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5),
+	KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6),
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7),
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+	KBASE_FUNC_SYNC  = (UK_FUNC_ID + 8),
+
+	KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9),
+
+	KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10),
+	KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11),
+	KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12),
+
+	KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14),
+
+	KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15),
+
+	KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16),
+	KBASE_FUNC_EXT_BUFFER_LOCK = (UK_FUNC_ID + 17),
+	KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18),
+
+	KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19),
+	KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20),
+	KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21),
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	KBASE_FUNC_KEEP_GPU_POWERED = (UK_FUNC_ID + 22),
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23),
+	KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24),
+	KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25),
+	KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26),
+					    /* to be used only for testing
+					    * purposes, otherwise these controls
+					    * are set through gator API */
+
+	KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27),
+	KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28),
+	KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29),
+
+	KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31),
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+	KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32),
+#if MALI_UNIT_TEST
+	KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
+	KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
+#endif /* MALI_UNIT_TEST */
+	KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35),
+#endif /* MALI_MIPE_ENABLED */
+
+	KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36),
+
+#ifdef SUPPORT_MALI_NO_MALI
+	KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37),
+#endif
+
+	KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38),
+
+	KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39),
+
+	KBASE_FUNC_MAX
+};
+
+#endif				/* _KBASE_UKU_H_ */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100755
index 0000000..be474ff
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100755
index 0000000..fd7252d
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100755
index 0000000..45de4e0
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,1832 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/hrtimer.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API        1
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC            1000000000ull /* ns */
+
+/* The time resolution of dumping service. */
+#define DUMPING_RESOLUTION      500000ull /* ns */
+
+/* The maximal supported number of dumping buffers. */
+#define MAX_BUFFER_COUNT        32
+
+/* Size and number of hw counters blocks. */
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+/*****************************************************************************/
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+/**
+ * struct kbase_vinstr_context - vinstr context per device
+ * @lock:              protects the entire vinstr context
+ * @kbdev:             pointer to kbase device
+ * @kctx:              pointer to kbase context
+ * @vmap:              vinstr vmap for mapping hwcnt dump buffer
+ * @gpu_va:            GPU hwcnt dump buffer address
+ * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
+ * @dump_size:         size of the dump buffer in bytes
+ * @bitmap:            current set of counters monitored, not always in sync
+ *                     with hardware
+ * @reprogram:         when true, reprogram hwcnt block with the new set of
+ *                     counters
+ * @suspended:         when true, the context has been suspended
+ * @nclients:          number of attached clients, pending or otherwise
+ * @waiting_clients:   head of list of clients being periodically sampled
+ * @idle_clients:      head of list of clients being idle
+ * @suspended_clients: head of list of clients being suspended
+ * @thread:            periodic sampling thread
+ * @waitq:             notification queue of sampling thread
+ * @request_pending:   request for action for sampling thread
+ */
+struct kbase_vinstr_context {
+	struct mutex             lock;
+	struct kbase_device      *kbdev;
+	struct kbase_context     *kctx;
+
+	struct kbase_vmap_struct vmap;
+	u64                      gpu_va;
+	void                     *cpu_va;
+	size_t                   dump_size;
+	u32                      bitmap[4];
+	bool                     reprogram;
+	bool                     suspended;
+
+	u32                      nclients;
+	struct list_head         waiting_clients;
+	struct list_head         idle_clients;
+	struct list_head         suspended_clients;
+
+	struct task_struct       *thread;
+	wait_queue_head_t        waitq;
+	atomic_t                 request_pending;
+};
+
+/**
+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
+ * @vinstr_ctx:    vinstr context client is attached to
+ * @list:          node used to attach this client to list in vinstr context
+ * @buffer_count:  number of buffers this client is using
+ * @event_mask:    events this client reacts to
+ * @dump_size:     size of one dump buffer in bytes
+ * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
+ * @accum_buffer:  temporary accumulation buffer for preserving counters
+ * @dump_time:     next time this clients shall request hwcnt dump
+ * @dump_interval: interval between periodic hwcnt dumps
+ * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
+ * @dump_buffers_meta: metadata of dump buffers
+ * @meta_idx:      index of metadata being accessed by userspace
+ * @read_idx:      index of buffer read by userspace
+ * @write_idx:     index of buffer being written by dumping service
+ * @waitq:         client's notification queue
+ * @pending:       when true, client has attached but hwcnt not yet updated
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context        *vinstr_ctx;
+	struct list_head                   list;
+	unsigned int                       buffer_count;
+	u32                                event_mask;
+	size_t                             dump_size;
+	u32                                bitmap[4];
+	void __user                        *legacy_buffer;
+	void                               *kernel_buffer;
+	void                               *accum_buffer;
+	u64                                dump_time;
+	u32                                dump_interval;
+	char                               *dump_buffers;
+	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
+	atomic_t                           meta_idx;
+	atomic_t                           read_idx;
+	atomic_t                           write_idx;
+	wait_queue_head_t                  waitq;
+	bool                               pending;
+};
+
+/**
+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
+ * @hrtimer:    high resolution timer
+ * @vinstr_ctx: vinstr context
+ */
+struct kbasep_vinstr_wake_up_timer {
+	struct hrtimer              hrtimer;
+	struct kbase_vinstr_context *vinstr_ctx;
+};
+
+/*****************************************************************************/
+
+static int kbasep_vinstr_service_task(void *data);
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+		struct file *filp,
+		poll_table  *wait);
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+		struct file   *filp,
+		unsigned int  cmd,
+		unsigned long arg);
+static int kbasep_vinstr_hwcnt_reader_mmap(
+		struct file           *filp,
+		struct vm_area_struct *vma);
+static int kbasep_vinstr_hwcnt_reader_release(
+		struct inode *inode,
+		struct file  *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations vinstr_client_fops = {
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/*****************************************************************************/
+
+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_uk_hwcnt_setup setup;
+
+	setup.dump_buffer = vinstr_ctx->gpu_va;
+	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	setup.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+
+	return kbase_instr_hwcnt_enable(vinstr_ctx->kctx, &setup);
+}
+
+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	kbase_instr_hwcnt_disable(vinstr_ctx->kctx);
+}
+
+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	disable_hwcnt(vinstr_ctx);
+	return enable_hwcnt(vinstr_ctx);
+}
+
+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
+}
+
+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
+{
+	size_t dump_size;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else
+#endif /* CONFIG_MALI_NO_MALI */
+	{
+		/* assume v5 for now */
+		base_gpu_props *props = &kbdev->gpu_props.props;
+		u32 nr_l2 = props->l2_props.num_l2_slices;
+		u64 core_mask = props->coherency_info.group[0].core_mask;
+		u32 nr_blocks = fls64(core_mask);
+
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_blocks) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
+
+static int kbasep_vinstr_map_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	u64 flags, nr_pages;
+	u16 va_align = 0;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	nr_pages = PFN_UP(vinstr_ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&vinstr_ctx->gpu_va, &va_align);
+	if (!reg)
+		return -ENOMEM;
+
+	vinstr_ctx->cpu_va = kbase_vmap(
+			kctx,
+			vinstr_ctx->gpu_va,
+			vinstr_ctx->dump_size,
+			&vinstr_ctx->vmap);
+	if (!vinstr_ctx->cpu_va) {
+		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbasep_vinstr_unmap_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+
+	kbase_vunmap(kctx, &vinstr_ctx->vmap);
+	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+}
+
+/**
+ * kbasep_vinstr_create_kctx - create kernel context for vinstr
+ * @vinstr_ctx: vinstr context
+ * Return: zero on success
+ */
+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	int err;
+
+	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
+	if (!vinstr_ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
+	if (err) {
+		kbase_destroy_context(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		kbase_tlstream_tl_new_ctx(
+				vinstr_ctx->kctx,
+				(u32)(vinstr_ctx->kctx->id),
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	err = enable_hwcnt(vinstr_ctx);
+	if (err) {
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	vinstr_ctx->thread = kthread_run(
+			kbasep_vinstr_service_task,
+			vinstr_ctx,
+			"mali_vinstr_service");
+	if (!vinstr_ctx->thread) {
+		disable_hwcnt(vinstr_ctx);
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+
+	/* Release hw counters dumping resources. */
+	vinstr_ctx->thread = NULL;
+	disable_hwcnt(vinstr_ctx);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Inform timeline client about context destruction. */
+	kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+
+	vinstr_ctx->kctx = NULL;
+}
+
+/**
+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
+ *
+ * Return: vinstr opaque client handle or NULL on failure
+ */
+static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
+		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
+		u32 bitmap[4], void *argp, void *kernel_buffer)
+{
+	struct task_struct         *thread = NULL;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	if (buffer_count > MAX_BUFFER_COUNT
+	    || (buffer_count & (buffer_count - 1)))
+		return NULL;
+
+	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->vinstr_ctx   = vinstr_ctx;
+	cli->buffer_count = buffer_count;
+	cli->event_mask   =
+		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
+		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
+	cli->pending      = true;
+
+	hwcnt_bitmap_set(cli->bitmap, bitmap);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
+	vinstr_ctx->reprogram = true;
+
+	/* If this is the first client, create the vinstr kbase
+	 * context. This context is permanently resident until the
+	 * last client exits. */
+	if (!vinstr_ctx->nclients) {
+		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
+		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
+			goto error;
+
+		vinstr_ctx->reprogram = false;
+		cli->pending = false;
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it. We need a per client kernel buffer for accumulating
+	 * the counters. */
+	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	if (!cli->accum_buffer)
+		goto error;
+
+	/* Prepare buffers. */
+	if (cli->buffer_count) {
+		int *fd = (int *)argp;
+		size_t tmp;
+
+		/* Allocate area for buffers metadata storage. */
+		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
+			cli->buffer_count;
+		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
+		if (!cli->dump_buffers_meta)
+			goto error;
+
+		/* Allocate required number of dumping buffers. */
+		cli->dump_buffers = (char *)__get_free_pages(
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(cli->dump_size * cli->buffer_count));
+		if (!cli->dump_buffers)
+			goto error;
+
+		/* Create descriptor for user-kernel data exchange. */
+		*fd = anon_inode_getfd(
+				"[mali_vinstr_desc]",
+				&vinstr_client_fops,
+				cli,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd)
+			goto error;
+	} else if (kernel_buffer) {
+		cli->kernel_buffer = kernel_buffer;
+	} else {
+		cli->legacy_buffer = (void __user *)argp;
+	}
+
+	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->meta_idx, 0);
+	atomic_set(&cli->write_idx, 0);
+	init_waitqueue_head(&cli->waitq);
+
+	vinstr_ctx->nclients++;
+	list_add(&cli->list, &vinstr_ctx->idle_clients);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return cli;
+
+error:
+	kfree(cli->dump_buffers_meta);
+	if (cli->dump_buffers)
+		free_pages(
+				(unsigned long)cli->dump_buffers,
+				get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	if (!vinstr_ctx->nclients && vinstr_ctx->kctx) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+	kfree(cli);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+
+	return NULL;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	struct kbase_vinstr_client  *iter, *tmp;
+	struct task_struct          *thread = NULL;
+	u32 zerobitmap[4] = { 0 };
+	int cli_found = 0;
+
+	KBASE_DEBUG_ASSERT(cli);
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
+		if (iter == cli) {
+			vinstr_ctx->reprogram = true;
+			cli_found = 1;
+			list_del(&iter->list);
+			break;
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->waiting_clients, list) {
+			if (iter == cli) {
+				vinstr_ctx->reprogram = true;
+				cli_found = 1;
+				list_del(&iter->list);
+				break;
+			}
+		}
+	}
+	KBASE_DEBUG_ASSERT(cli_found);
+
+	kfree(cli->dump_buffers_meta);
+	free_pages(
+			(unsigned long)cli->dump_buffers,
+			get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	kfree(cli);
+
+	vinstr_ctx->nclients--;
+	if (!vinstr_ctx->nclients) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
+	u32 i, nr_l2;
+	u64 core_mask;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (0ull != core_mask) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		if (0ull != (core_mask & 1ull)) {
+			/* if block is not reserved update header */
+			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+			*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		}
+		dst += block_size;
+		src += block_size;
+
+		core_mask >>= 1;
+	}
+}
+
+/**
+ * accum_clients - accumulate dumped hw counters for all known clients
+ * @vinstr_ctx: vinstr context
+ */
+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4 = 0;
+
+#ifndef CONFIG_MALI_NO_MALI
+	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ *
+ * Return: timestamp value
+ */
+static u64 kbasep_vinstr_get_timestamp(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_add_dump_request - register client's dumping request
+ * @cli:             requesting client
+ * @waiting_clients: list of pending dumping requests
+ */
+static void kbasep_vinstr_add_dump_request(
+		struct kbase_vinstr_client *cli,
+		struct list_head *waiting_clients)
+{
+	struct kbase_vinstr_client *tmp;
+
+	if (list_empty(waiting_clients)) {
+		list_add(&cli->list, waiting_clients);
+		return;
+	}
+	list_for_each_entry(tmp, waiting_clients, list) {
+		if (tmp->dump_time > cli->dump_time) {
+			list_add_tail(&cli->list, &tmp->list);
+			return;
+		}
+	}
+	list_add_tail(&cli->list, waiting_clients);
+}
+
+/**
+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
+ *                                        dump and accumulate them for known
+ *                                        clients
+ * @vinstr_ctx: vinstr context
+ * @timestamp: pointer where collection timestamp will be recorded
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_collect_and_accumulate(
+		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+{
+	int rcode;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
+	/* Request HW counters dump.
+	 * Disable preemption to make dump timestamp more accurate. */
+	preempt_disable();
+	*timestamp = kbasep_vinstr_get_timestamp();
+	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
+	preempt_enable();
+
+	if (!rcode)
+		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
+	WARN_ON(rcode);
+
+	/* Accumulate values of collected counters. */
+	if (!rcode)
+		accum_clients(vinstr_ctx);
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
+ *                                  buffer
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_fill_dump_buffer(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	unsigned int write_idx = atomic_read(&cli->write_idx);
+	unsigned int read_idx  = atomic_read(&cli->read_idx);
+
+	struct kbase_hwcnt_reader_metadata *meta;
+	void                               *buffer;
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == cli->buffer_count)
+		return -1;
+	write_idx %= cli->buffer_count;
+
+	/* Fill in dump buffer and its metadata. */
+	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
+	meta   = &cli->dump_buffers_meta[write_idx];
+	meta->timestamp  = timestamp;
+	meta->event_id   = event_id;
+	meta->buffer_idx = write_idx;
+	memcpy(buffer, cli->accum_buffer, cli->dump_size);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
+ *                                         allocated in userspace
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of legacy ioctl interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_legacy(
+		struct kbase_vinstr_client *cli)
+{
+	void __user  *buffer = cli->legacy_buffer;
+	int          rcode;
+
+	/* Copy data to user buffer. */
+	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
+	if (rcode)
+		pr_warn("error while copying buffer to user\n");
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+		struct kbase_vinstr_client *cli)
+{
+	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_reprogram(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	if (vinstr_ctx->reprogram) {
+		struct kbase_vinstr_client *iter;
+
+		if (!reprogram_hwcnt(vinstr_ctx)) {
+			vinstr_ctx->reprogram = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->idle_clients,
+					list)
+				iter->pending = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->waiting_clients,
+					list)
+				iter->pending = false;
+		}
+	}
+}
+
+/**
+ * kbasep_vinstr_update_client - copy accumulated counters to user readable
+ *                               buffer and notify the user
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_update_client(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	int rcode = 0;
+
+	/* Copy collected counters to user readable buffer. */
+	if (cli->buffer_count)
+		rcode = kbasep_vinstr_fill_dump_buffer(
+				cli, timestamp, event_id);
+	else if (cli->kernel_buffer)
+		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
+	else
+		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
+
+	if (rcode)
+		goto exit;
+
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&cli->write_idx);
+	wake_up_interruptible(&cli->waitq);
+
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+exit:
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ *
+ * @hrtimer: high resolution timer
+ *
+ * Return: High resolution timer restart enum.
+ */
+static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
+		struct hrtimer *hrtimer)
+{
+	struct kbasep_vinstr_wake_up_timer *timer =
+		container_of(
+			hrtimer,
+			struct kbasep_vinstr_wake_up_timer,
+			hrtimer);
+
+	KBASE_DEBUG_ASSERT(timer);
+
+	atomic_set(&timer->vinstr_ctx->request_pending, 1);
+	wake_up_all(&timer->vinstr_ctx->waitq);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_service_task - HWC dumping service thread
+ *
+ * @data: Pointer to vinstr context structure.
+ *
+ * Return: Always returns zero.
+ */
+static int kbasep_vinstr_service_task(void *data)
+{
+	struct kbase_vinstr_context        *vinstr_ctx = data;
+	struct kbasep_vinstr_wake_up_timer timer;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	hrtimer_init(&timer.hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer.hrtimer.function = kbasep_vinstr_wake_up_callback;
+	timer.vinstr_ctx       = vinstr_ctx;
+
+	while (!kthread_should_stop()) {
+		struct kbase_vinstr_client *cli = NULL;
+		struct kbase_vinstr_client *tmp;
+
+		u64              timestamp = kbasep_vinstr_get_timestamp();
+		u64              dump_time = 0;
+		struct list_head expired_requests;
+
+		/* Hold lock while performing operations on lists of clients. */
+		mutex_lock(&vinstr_ctx->lock);
+
+		/* Closing thread must not interact with client requests. */
+		if (current == vinstr_ctx->thread) {
+			atomic_set(&vinstr_ctx->request_pending, 0);
+
+			if (!list_empty(&vinstr_ctx->waiting_clients)) {
+				cli = list_first_entry(
+						&vinstr_ctx->waiting_clients,
+						struct kbase_vinstr_client,
+						list);
+				dump_time = cli->dump_time;
+			}
+		}
+
+		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
+			mutex_unlock(&vinstr_ctx->lock);
+
+			/* Sleep until next dumping event or service request. */
+			if (cli) {
+				u64 diff = dump_time - timestamp;
+
+				hrtimer_start(
+						&timer.hrtimer,
+						ns_to_ktime(diff),
+						HRTIMER_MODE_REL);
+			}
+			wait_event(
+					vinstr_ctx->waitq,
+					atomic_read(
+						&vinstr_ctx->request_pending) ||
+					kthread_should_stop());
+			hrtimer_cancel(&timer.hrtimer);
+			continue;
+		}
+
+		kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &timestamp);
+
+		INIT_LIST_HEAD(&expired_requests);
+
+		/* Find all expired requests. */
+		list_for_each_entry_safe(
+				cli,
+				tmp,
+				&vinstr_ctx->waiting_clients,
+				list) {
+			s64 tdiff =
+				(s64)(timestamp + DUMPING_RESOLUTION) -
+				(s64)cli->dump_time;
+			if (tdiff >= 0ll) {
+				list_del(&cli->list);
+				list_add(&cli->list, &expired_requests);
+			} else {
+				break;
+			}
+		}
+
+		/* Fill data for each request found. */
+		list_for_each_entry_safe(cli, tmp, &expired_requests, list) {
+			/* Ensure that legacy buffer will not be used from
+			 * this kthread context. */
+			BUG_ON(0 == cli->buffer_count);
+			/* Expect only periodically sampled clients. */
+			BUG_ON(0 == cli->dump_interval);
+
+			kbasep_vinstr_update_client(
+					cli,
+					timestamp,
+					BASE_HWCNT_READER_EVENT_PERIODIC);
+
+			/* Set new dumping time. Drop missed probing times. */
+			do {
+				cli->dump_time += cli->dump_interval;
+			} while (cli->dump_time < timestamp);
+
+			list_del(&cli->list);
+			kbasep_vinstr_add_dump_request(
+					cli,
+					&vinstr_ctx->waiting_clients);
+		}
+
+		/* Reprogram counters set if required. */
+		kbasep_vinstr_reprogram(vinstr_ctx);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	}
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
+ * @cli: pointer to vinstr client structure
+ *
+ * Return: non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+		struct kbase_vinstr_client *cli)
+{
+	KBASE_DEBUG_ASSERT(cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+
+	/* Metadata sanity check. */
+	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @interval: periodic dumping interval (disable periodic dumping if zero)
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+		struct kbase_vinstr_client *cli, u32 interval)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->suspended) {
+		mutex_unlock(&vinstr_ctx->lock);
+		return -EBUSY;
+	}
+
+	list_del(&cli->list);
+
+	cli->dump_interval = interval;
+
+	/* If interval is non-zero, enable periodic dumping for this client. */
+	if (cli->dump_interval) {
+		if (DUMPING_RESOLUTION > cli->dump_interval)
+			cli->dump_interval = DUMPING_RESOLUTION;
+		cli->dump_time =
+			kbasep_vinstr_get_timestamp() + cli->dump_interval;
+
+		kbasep_vinstr_add_dump_request(
+				cli, &vinstr_ctx->waiting_clients);
+
+		atomic_set(&vinstr_ctx->request_pending, 1);
+		wake_up_all(&vinstr_ctx->waitq);
+	} else {
+		list_add(&cli->list, &vinstr_ctx->idle_clients);
+	}
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
+ * @event_id: id of event
+ * Return: event_mask or zero if event is not supported or maskable
+ */
+static u32 kbasep_vinstr_hwcnt_reader_event_mask(
+		enum base_hwcnt_reader_event event_id)
+{
+	u32 event_mask = 0;
+
+	switch (event_id) {
+	case BASE_HWCNT_READER_EVENT_PREJOB:
+	case BASE_HWCNT_READER_EVENT_POSTJOB:
+		/* These event are maskable. */
+		event_mask = (1 << event_id);
+		break;
+
+	case BASE_HWCNT_READER_EVENT_MANUAL:
+	case BASE_HWCNT_READER_EVENT_PERIODIC:
+		/* These event are non-maskable. */
+	default:
+		/* These event are not supported. */
+		break;
+	}
+
+	return event_mask;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to enable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask |= event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to disable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask &= ~event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
+ * @cli:   pointer to vinstr client structure
+ * @hwver: pointer to user buffer where hw version will be stored
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+		struct kbase_vinstr_client *cli, u32 __user *hwver)
+{
+#ifndef CONFIG_MALI_NO_MALI
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+#endif
+
+	u32                         ver = 5;
+
+#ifndef CONFIG_MALI_NO_MALI
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+		ver = 4;
+#endif
+
+	return put_user(ver, hwver);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
+ * @filp:   pointer to file structure
+ * @cmd:    user command
+ * @arg:    command's argument
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	long                       rcode = 0;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+				cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
+		rcode = put_user(
+				(u32)cli->vinstr_ctx->dump_size,
+				(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbase_vinstr_hwc_dump(
+				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbase_vinstr_hwc_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+				cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
+		poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
+ * @filp: pointer to file structure
+ * @vma:  pointer to vma structure
+ * Return: zero on success
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	unsigned long size, addr, pfn, offset;
+	unsigned long vm_size = vma->vm_end - vma->vm_start;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(vma);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	size = cli->buffer_count * cli->dump_size;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+	if (vm_size > size)
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if ((vm_size + offset) > size)
+		return -EINVAL;
+
+	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	pfn = addr >> PAGE_SHIFT;
+
+	return remap_pfn_range(
+			vma,
+			vma->vm_start,
+			pfn,
+			vm_size,
+			vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ * Return always return zero
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+		struct file *filp)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	kbase_vinstr_detach_client(cli);
+	return 0;
+}
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
+	if (!vinstr_ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	mutex_init(&vinstr_ctx->lock);
+	vinstr_ctx->kbdev = kbdev;
+	vinstr_ctx->thread = NULL;
+
+	atomic_set(&vinstr_ctx->request_pending, 0);
+	init_waitqueue_head(&vinstr_ctx->waitq);
+
+	return vinstr_ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	/* Stop service thread first. */
+	if (vinstr_ctx->thread)
+		kthread_stop(vinstr_ctx->thread);
+
+	while (1) {
+		struct list_head *list = &vinstr_ctx->idle_clients;
+
+		if (list_empty(list)) {
+			list = &vinstr_ctx->waiting_clients;
+			if (list_empty(list))
+				break;
+		}
+
+		cli = list_first_entry(list, struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		kfree(cli->accum_buffer);
+		kfree(cli);
+		vinstr_ctx->nclients--;
+	}
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	if (vinstr_ctx->kctx)
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	kfree(vinstr_ctx);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup)
+{
+	struct kbase_vinstr_client  *cli;
+	u32                         bitmap[4];
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(setup->buffer_count);
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	cli = kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			setup->buffer_count,
+			bitmap,
+			&setup->fd,
+			NULL);
+
+	if (!cli)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (setup->dump_buffer) {
+		u32 bitmap[4];
+
+		bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+		if (*cli)
+			return -EBUSY;
+
+		*cli = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				(void *)(long)setup->dump_buffer,
+				NULL);
+
+		if (!(*cli))
+			return -ENOMEM;
+	} else {
+		if (!*cli)
+			return -EINVAL;
+
+		kbase_vinstr_detach_client(*cli);
+		*cli = NULL;
+	}
+
+	return 0;
+}
+
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer)
+{
+	u32 bitmap[4];
+
+	if (!vinstr_ctx || !setup || !kernel_buffer)
+		return NULL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	return kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			0,
+			bitmap,
+			NULL,
+			kernel_buffer);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	int                         rcode = 0;
+	struct kbase_vinstr_context *vinstr_ctx;
+	u64                         timestamp;
+	u32                         event_mask;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
+	event_mask = 1 << event_id;
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->suspended) {
+		rcode = -EBUSY;
+		goto exit;
+	}
+
+	if (event_mask & cli->event_mask) {
+		rcode = kbasep_vinstr_collect_and_accumulate(
+				vinstr_ctx,
+				&timestamp);
+		if (rcode)
+			goto exit;
+
+		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
+		if (rcode)
+			goto exit;
+
+		kbasep_vinstr_reprogram(vinstr_ctx);
+	}
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
+
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	int                         rcode;
+	u64                         unused;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->suspended) {
+		rcode = -EBUSY;
+		goto exit;
+	}
+
+	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	if (rcode)
+		goto exit;
+	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
+	if (rcode)
+		goto exit;
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	kbasep_vinstr_reprogram(vinstr_ctx);
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+
+void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	u64 unused;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+	if (!vinstr_ctx->nclients || vinstr_ctx->suspended) {
+		mutex_unlock(&vinstr_ctx->lock);
+		return;
+	}
+
+	kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	vinstr_ctx->suspended = true;
+	vinstr_ctx->suspended_clients = vinstr_ctx->waiting_clients;
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	mutex_unlock(&vinstr_ctx->lock);
+}
+
+void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+	if (!vinstr_ctx->nclients || !vinstr_ctx->suspended) {
+		mutex_unlock(&vinstr_ctx->lock);
+		return;
+	}
+
+	vinstr_ctx->suspended = false;
+	vinstr_ctx->waiting_clients = vinstr_ctx->suspended_clients;
+	vinstr_ctx->reprogram = true;
+	kbasep_vinstr_reprogram(vinstr_ctx);
+	atomic_set(&vinstr_ctx->request_pending, 1);
+	wake_up_all(&vinstr_ctx->waitq);
+	mutex_unlock(&vinstr_ctx->lock);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100755
index 0000000..d32462a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,134 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwcnt_reader.h>
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+/*****************************************************************************/
+
+/**
+ * kbase_vinstr_init() - initialize the vinstr core
+ * @kbdev: kbase device
+ *
+ * Return: pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - terminate the vinstr core
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
+ * @vinstr_ctx: vinstr context
+ * @setup:      reader's configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+		struct kbase_vinstr_context        *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup);
+
+/**
+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
+ * @vinstr_ctx: vinstr context
+ * @cli:        pointer where to store pointer to new vinstr client structure
+ * @setup:      hwc configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count and setup->fd are not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer);
+
+/**
+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
+ * @cli:      pointer to vinstr client
+ * @event_id: id of event that triggered hwcnt dump
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_dump(
+		struct kbase_vinstr_client   *cli,
+		enum base_hwcnt_reader_event event_id);
+
+/**
+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
+ *                          a given kbase context
+ * @cli: pointer to vinstr client
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_hwc_suspend - suspends hardware counter collection for
+ *                            a given kbase context
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwc_resume - resumes hardware counter collection for
+ *                            a given kbase context
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: Pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
+#endif /* _KBASE_VINSTR_H_ */
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100755
index 0000000..5d6b402
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100755
index 0000000..93fc5ea
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,207 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+/**
+ * mali_sw_counter - not currently used
+ * @event_id: counter id
+ */
+TRACE_EVENT(mali_sw_counter,
+	TP_PROTO(unsigned int event_id, signed long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lld", __entry->event_id, __entry->value)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100755
index 0000000..9945293
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644
index 0000000..a509cbd
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100755
index 0000000..7498407
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,570 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
+						size */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+#define LATEST_FLUSH            0x038	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+
+#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+
+/* Symbol for default MEMATTR to use */
+
+/* Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100755
index 0000000..bd5f661
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->atom_id,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->count)
+);
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_uk.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100755
index 0000000..841d03f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
new file mode 100755
index 0000000..558657b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	obj-y += $(platform_name)/
+endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100755
index 0000000..8fb4e91
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME
+#
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100755
index 0000000..de8849e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+
+ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-y += platform/devicetree/mali_clock.c
+obj-y += platform/devicetree/mpgpu.c
+obj-y += platform/devicetree/meson_main2.c
+obj-y += platform/devicetree/platform_gx.c
+obj-y += platform/devicetree/scaling.c
+obj-y += mali_kbase_runtime_pm.c
+obj-y += mali_kbase_config_devicetree.c
+else ifeq ($(CONFIG_MALI_MIDGARD),m)
+SRC += platform/devicetree/mali_clock.c
+SRC += platform/devicetree/mpgpu.c
+SRC += platform/devicetree/meson_main2.c
+SRC += platform/devicetree/platform_gx.c
+SRC += platform/devicetree/scaling.c
+SRC += platform/devicetree/mali_kbase_runtime_pm.c
+SRC += platform/devicetree/mali_kbase_config_devicetree.c
+endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
new file mode 100644
index 0000000..35249a5
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
@@ -0,0 +1,653 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)
+
+//disable print
+#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+int mali_pm_statue = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "ao io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	mpdata->dvfs_table_size = 0;
+
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+	dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size);
+	dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n",
+			mpdata->dvfs_table,
+			sizeof(mpdata->dvfs_table[0]));
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+#else
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	//mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+
+	GPU_CLK_DBG();
+	do_gettimeofday(&start);
+	ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+	do_gettimeofday(&end);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	GPU_CLK_DBG();
+	clk_disable_unprepare(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+#if 0
+#ifdef MESON_CPU_VERSION_OPS
+		if (is_meson_gxbbm_cpu()) {
+			if (dvfs_tbl->clk_freq >= GXBBM_MAX_GPU_FREQ)
+				continue;
+		}
+#endif
+#endif
+#if  0
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+#endif
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux");
+#if 0
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+#endif
+	if (IS_ERR(mpdata->clk_mali)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100755
index 0000000..1267b6f
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,109 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/version.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long t83x_static_power(unsigned long voltage)
+{
+#if 0
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+#else
+	return 0;
+#endif
+}
+
+static unsigned long t83x_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+struct devfreq_cooling_ops t83x_model_ops = {
+#else
+struct devfreq_cooling_power t83x_model_ops = {
+#endif
+	.get_static_power = t83x_static_power,
+	.get_dynamic_power = t83x_dynamic_power,
+};
+
+#endif
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100755
index 0000000..adc052c
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (750000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (100000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+extern struct kbase_platform_funcs_conf dt_funcs_conf;
+#define PLATFORM_FUNCS (&dt_funcs_conf)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&t83x_model_ops)
+extern struct devfreq_cooling_ops t83x_model_ops;
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100755
index 0000000..3b3f095
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,244 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+void *reg_base_hiubus = NULL;
+u32  override_value_aml = 0;
+static int first = 1;
+
+static void  Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+
+    if ((mask & 0x1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000190, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000194, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a0, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a4, 0xffffffff);    // Power on all cores
+    }
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000180, mask >> 1);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000184, 0x0);    // Power on all cores
+    }
+
+    if ( mask != 0 ) {
+        udelay(10);
+        part1_done = Mali_RdReg(0x0, 0x0, 0x0000020);
+        //printk("%s, %d\n", __func__, __LINE__);
+        while((part1_done ==0)) { part1_done = Mali_RdReg(0x0, 0x0, 0x00000020); udelay(10); }
+        //printk("Mali_pwr_on:gpu_irq : %x\n", part1_done);
+        Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+    }
+}
+
+static void gpu_power_main( struct kbase_device *kbdev) {
+
+    Mali_pwr_on ( 0x7);
+
+    printk("%s, %d\n", __func__, __LINE__);
+}
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret;
+	struct platform_device *pdev = to_platform_device(kbdev->dev);
+	struct resource *reg_res;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+    //printk("20151013, %s, %d\n", __FILE__, __LINE__);
+    if (first == 0) goto ret;
+
+    reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+    if (!reg_res) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) 
+        reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res));
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+
+//JOHNT
+    // Level reset mail
+
+    // Level reset mail
+    //Wr(P_RESET2_MASK, ~(0x1<<14));
+    //Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    //Wr(P_RESET2_LEVEL, 0xffffffff);
+    //Wr(P_RESET0_LEVEL, 0xffffffff);
+
+    value = Rd(RESET0_MASK);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_MASK, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+///////////////
+    value = Rd(RESET2_MASK);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_MASK, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value | ((0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value | ((0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    udelay(10); // OR POLL for reset done
+
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+    gpu_power_main(kbdev);
+    //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n",
+    //        kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus);
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+    first = 0;
+    //printk("%s, %d\n", __FILE__, __LINE__);
+ret:
+	ret = pm_runtime_get_sync(kbdev->dev);
+    udelay(100);
+#if 1
+
+    core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+    l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+    tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+    //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready);
+#endif
+	dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret);
+
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+    //printk("%s, %d\n", __FILE__, __LINE__);
+#if 0
+    iounmap(reg_base_hiubus);
+    reg_base_hiubus = NULL;
+#endif
+	u64 core_ready  = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	u64 l2_ready    = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+	u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+	if (core_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+	if (l2_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+	if (tiler_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+
+	return 0;
+}
+
+void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
+
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
new file mode 100644
index 0000000..b541090
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
new file mode 100644
index 0000000..7687f92
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_defs.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+extern void mali_post_init(void);
+struct kbase_device;
+//static int gpu_dvfs_probe(struct platform_device *pdev)
+int platform_dt_init_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	int err = -1;
+
+	err = mali_meson_init_start(pdev);
+    mali_meson_init_finish(pdev);
+    mpgpu_class_init();
+    mali_post_init();
+    return err;
+}
+
+//static int gpu_dvfs_remove(struct platform_device *pdev)
+void platform_dt_term_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+    mpgpu_class_exit();
+	mali_meson_uninit(pdev);
+
+}
+
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    mali_gpu_utilization_callback(utilisation*255/100);
+    return 1;
+}
+
+struct kbase_platform_funcs_conf dt_funcs_conf = {
+    .platform_init_func = platform_dt_init_func,
+    .platform_term_func = platform_dt_term_func,
+};
+#if 0
+static const struct of_device_id gpu_dvfs_ids[] = {
+	{ .compatible = "meson, gpu-dvfs-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpu_dvfs_ids);
+
+static struct platform_driver gpu_dvfs_driver = {
+	.driver = {
+		.name = "meson-gpu-dvfs",
+		.owner = THIS_MODULE,
+		.of_match_table = gpu_dvfs_ids,
+	},
+	.probe = gpu_dvfs_probe,
+	.remove = gpu_dvfs_remove,
+};
+module_platform_driver(gpu_dvfs_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Amlogic SH, MM");
+MODULE_DESCRIPTION("Driver for the Meson GPU dvfs");
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
new file mode 100644
index 0000000..ca79752
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
@@ -0,0 +1,33 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mpgpu_class_init(void);
+void mpgpu_class_exit(void);
+void mali_gpu_utilization_callback(int utilization_pp);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
new file mode 100644
index 0000000..0cc5035
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
@@ -0,0 +1,359 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+#include "meson_main2.h"
+
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+#if 0
+static ssize_t max_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxpp:%d, maxpp_sysfs:%d, total=%d\n",
+			pmali_plat->scale_info.maxpp, pmali_plat->maxpp_sysfs,
+			pmali_plat->cfg_pp);
+	return sprintf(buf, "%d\n", pmali_plat->cfg_pp);
+}
+
+static ssize_t max_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_pp) || (val < pinfo->minpp))
+		return -EINVAL;
+
+	pmali_plat->maxpp_sysfs = val;
+	pinfo->maxpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minpp);
+}
+
+static ssize_t min_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxpp) || (val < 1))
+		return -EINVAL;
+
+	pinfo->minpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+#endif
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+#if 0
+static ssize_t current_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+	return sprintf(buf, "%d\n", pp);
+}
+
+static ssize_t current_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+
+	get_mali_rt_clkpp(&clk, &pp);
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(clk, val, 1);
+
+	return count;
+}
+#endif
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+#if 0
+	__ATTR(min_pp,		0644, min_pp_read,	min_pp_write),
+	__ATTR(max_pp,		0644, max_pp_read,	max_pp_write),
+#endif
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+#if 0
+	__ATTR(cur_pp,		0644, current_pp_read,	current_pp_write),
+#endif
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+
+int mpgpu_class_init(void)
+{
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+}
+
+void  mpgpu_class_exit(void)
+{
+	class_unregister(&mpgpu_class);
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
new file mode 100644
index 0000000..d16675e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
@@ -0,0 +1,246 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#ifdef CONFIG_GPU_THERMAL
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#include <linux/amlogic/aml_thermal_hw.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq < mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq < mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+                break;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+#ifndef MESON_DRV_BRING
+    return 55;
+#else
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+#endif
+}
+#endif
+#endif
+
+#ifdef CONFIG_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+#ifndef MESON_DRV_BRING
+    return 2;
+#else
+    return mali_executor_get_num_cores_enabled();
+#endif
+}
+#endif
+#endif
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
new file mode 100644
index 0000000..aa8a77a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+
+#if AMLOGIC_GPU_USE_GPPLL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)
+#include <linux/amlogic/amports/gp_pll.h>
+#else
+#include <linux/amlogic/media/clk/gp_pll.h>
+#endif
+#endif
+
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+extern int  mali_pm_statue;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+#if AMLOGIC_GPU_USE_GPPLL
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+#endif
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+#endif
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+#if AMLOGIC_GPU_USE_GPPLL
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+#endif
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+	lastStep = execStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+#endif
+
+}
+#if AMLOGIC_GPU_USE_GPPLL
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+#endif
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+    cores = cores;
+    return 0;
+}
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    if (err < 0) scalingdbg(1, "set pp failed");
+
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+    printk("mali_plat=%p\n", mali_plat);
+	num_cores_enabled = pmali_plat->sc_mpp;
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+#endif
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	40, /* 40% */
+	50, /* 50% */
+	90, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< utilization_pp)
+		ret = enable_one_core();
+	else if (0 < utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(int utilization_gpu, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(int utilization_pp, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization_pp,  ld_up);
+	if (utilization_pp >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization_pp <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(utilization_pp);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(utilization_pp);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
new file mode 100755
index 0000000..5b6ef37
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_juno_soc.o
+
+
+obj-m += juno_mali_opp.o
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
new file mode 100755
index 0000000..ccfd8cc
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/scpi_protocol.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp_add opp_add
+#endif /* Linux >= 3.13 */
+
+
+static int init_juno_opps_from_scpi(struct device *dev)
+{
+	struct scpi_opp *sopp;
+	int i;
+
+	/* Hard coded for Juno. 2 is GPU domain */
+	sopp = scpi_dvfs_get_opps(2);
+	if (IS_ERR_OR_NULL(sopp))
+		return PTR_ERR(sopp);
+
+	for (i = 0; i < sopp->count; i++) {
+		struct scpi_opp_entry *e = &sopp->opp[i];
+
+		dev_info(dev, "Mali OPP from SCPI: %u Hz @ %u mV\n",
+				e->freq_hz, e->volt_mv);
+
+		dev_pm_opp_add(dev, e->freq_hz, e->volt_mv * 1000);
+	}
+
+	return 0;
+}
+
+static int juno_setup_opps(void)
+{
+	struct device_node *np;
+	struct platform_device *pdev;
+	int err;
+
+	np = of_find_node_by_name(NULL, "gpu");
+	if (!np) {
+		pr_err("Failed to find DT entry for Mali\n");
+		return -EFAULT;
+	}
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev) {
+		pr_err("Failed to find device for Mali\n");
+		of_node_put(np);
+		return -EFAULT;
+	}
+
+	err = init_juno_opps_from_scpi(&pdev->dev);
+
+	of_node_put(np);
+
+	return err;
+}
+
+module_init(juno_setup_opps);
+MODULE_LICENSE("GPL");
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
new file mode 100755
index 0000000..3baf3d9
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
@@ -0,0 +1,128 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <linux/thermal.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_smc.h>
+
+/* Versatile Express (VE) Juno Development Platform */
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 65,
+	.mmu_irq_number = 66,
+	.gpu_irq_number = 64,
+	.io_memory_region = {
+			     .start = 0x2D000000,
+			     .end = 0x2D000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+/*
+ * Juno Secure Mode integration
+ */
+
+/* SMC Function Numbers */
+#define JUNO_SMC_SECURE_ENABLE_FUNC  0xff06
+#define JUNO_SMC_SECURE_DISABLE_FUNC 0xff07
+
+static int juno_secure_mode_enable(struct kbase_device *kbdev)
+{
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+
+	if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) &&
+			kbdev->reg_start == 0x2d000000) {
+		/* T62X in SoC detected */
+		u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+			JUNO_SMC_SECURE_ENABLE_FUNC, false,
+			0, 0, 0);
+		return ret;
+	}
+
+	return -EINVAL; /* Not supported */
+}
+
+static int juno_secure_mode_disable(struct kbase_device *kbdev)
+{
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+
+	if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) &&
+			kbdev->reg_start == 0x2d000000) {
+		/* T62X in SoC detected */
+		u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+			JUNO_SMC_SECURE_DISABLE_FUNC, false,
+			0, 0, 0);
+		return ret;
+	}
+
+	return -EINVAL; /* Not supported */
+}
+
+struct kbase_secure_ops juno_secure_ops = {
+	.secure_mode_enable = juno_secure_mode_enable,
+	.secure_mode_disable = juno_secure_mode_disable,
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
new file mode 100755
index 0000000..5fc6d9e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 600000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 600000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (&juno_secure_ops)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+#ifdef CONFIG_DEVFREQ_THERMAL
+extern struct devfreq_cooling_ops juno_model_ops;
+#endif
+extern struct kbase_secure_ops juno_secure_ops;
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100755
index 0000000..7cb3be7
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
new file mode 100755
index 0000000..01f9dfc
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+/**
+ * kbase_platform_fake_register - Entry point for fake platform registration
+ *
+ * This function is called early on in the initialization during execution of
+ * kbase_driver_init.
+ *
+ * Return: 0 to indicate success, non-zero for failure.
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Entry point for fake platform unregistration
+ *
+ * This function is called in the termination during execution of
+ * kbase_driver_exit.
+ */
+void kbase_platform_fake_unregister(void);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100755
index 0000000..084a156
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100755
index 0000000..eb957d3
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq()
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq()
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..15ce2bc
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+#include "mali_kbase_config_platform.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100755
index 0000000..4665f98
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START	(0x10000000)
+#define SYS_CFGDATA_OFFSET		(0x000000A0)
+#define SYS_CFGCTRL_OFFSET		(0x000000A4)
+#define SYS_CFGSTAT_OFFSET		(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		(1 << 31)
+#define READ_REG_BIT_VALUE			(0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			(0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		(1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			(1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE	(0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE		(2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		(1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			(1 <<  1)
+
+#define FEED_REG_BIT_MASK			(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+/* the following three values used for reading
+ * HBI value of the LogicTile daughterboard */
+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000)
+#define VE_SYS_PROC_ID1_OFFSET (0x00000088)
+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define VE_DEFAULT_GPU_FREQ_MIN 5000
+#define VE_DEFAULT_GPU_FREQ_MAX 5000
+
+
+#define CPU_CLOCK_SPEED_UNDEFINED (0)
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed
+ * @cpu_clock - the value of CPU clock speed in MHz
+ *
+ * Returns 0 on success, error code otherwise.
+ *
+ * The implementation is platform specific.
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	int err = 0;
+	u32 reg_val = 0;
+	u32 osc2_value = 0;
+	u32 pa_divide = 0;
+	u32 pb_divide = 0;
+	u32 pc_divide = 0;
+	void __iomem *syscfg_reg = NULL;
+	void __iomem *scc_reg = NULL;
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) {
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+
+	/* Init the value in case something goes wrong */
+	*cpu_clock = 0;
+
+	/* Map CPU register into virtual memory */
+	syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+	if (syscfg_reg == NULL) {
+		err = -EIO;
+		goto syscfg_reg_map_failed;
+	}
+
+	scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+	if (scc_reg == NULL) {
+		err = -EIO;
+		goto scc_reg_map_failed;
+	}
+
+	raw_spin_lock(&syscfg_lock);
+
+	/* Read SYS regs - OSC2 */
+	reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET);
+
+	/* Check if there is any other undergoing request */
+	if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) {
+		err = -EBUSY;
+		goto ongoing_request;
+	}
+	/* Reset the CGFGSTAT reg */
+	writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET));
+
+	writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE |
+			DCC_DEFAULT_BIT_VALUE |
+			SYS_CFG_OSC_FUNC_BIT_VALUE |
+			SITE_DEFAULT_BIT_VALUE |
+			BOARD_STACK_POS_DEFAULT_BIT_VALUE |
+			DEVICE_DEFAULT_BIT_VALUE,
+			(syscfg_reg + SYS_CFGCTRL_OFFSET));
+	/* Wait for the transaction to complete */
+	while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) &
+			SYS_CFG_COMPLETE_BIT_VALUE))
+		;
+	/* Read SYS_CFGSTAT Register to get the status of submitted
+	 * transaction */
+	reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET);
+
+	if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+		/* Error while setting register */
+		err = -EIO;
+		goto set_reg_error;
+	}
+
+	osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET);
+	/* Read the SCC CFGRW0 register */
+	reg_val = readl(scc_reg);
+
+	/*
+	 * Select the appropriate feed:
+	 * CFGRW0[0] - CLKOB
+	 * CFGRW0[1] - CLKOC
+	 * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+	 */
+	/* Calculate the  FCLK */
+	if (IS_SINGLE_BIT_SET(reg_val, 0)) {
+		/* CFGRW0[0] - CLKOB */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[10:7] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PB_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 1)) {
+		/* CFGRW0[1] - CLKOC */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[14:11] */
+		pc_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PC_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PC_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {
+		/* CFGRW0[2] - FACLK */
+		/* CFGRW0[18:15] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PA_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[22:19] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PB_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else {
+		err = -EIO;
+	}
+
+set_reg_error:
+ongoing_request:
+	raw_spin_unlock(&syscfg_lock);
+	*cpu_clock /= HZ_IN_MHZ;
+
+	if (!err)
+		cpu_clock_speed = *cpu_clock;
+
+	iounmap(scc_reg);
+
+scc_reg_map_failed:
+	iounmap(syscfg_reg);
+
+syscfg_reg_map_failed:
+
+	return err;
+}
+
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function determines which LogicTile type is used by the platform by
+ * reading the HBI value of the daughterboard which holds the LogicTile:
+ *
+ * 0x217 HBI0217 Virtex-6
+ * 0x192 HBI0192 Virtex-5
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+static u32 kbase_get_platform_logic_tile_type(void)
+{
+	void __iomem *syscfg_reg = NULL;
+	u32 sys_procid1 = 0;
+
+	syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
+	if (NULL != syscfg_reg) {
+		sys_procid1 = readl(syscfg_reg);
+		iounmap(syscfg_reg);
+	}
+
+	return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
+}
+
+u32 kbase_get_platform_min_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MIN;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MIN;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MIN;
+	}
+}
+
+u32 kbase_get_platform_max_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MAX;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MAX;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MAX;
+	}
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100755
index 0000000..da86569
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+/**
+ * Get the minimum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_min_freq(void);
+
+/**
+ * Get the maximum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_max_freq(void);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100755
index 0000000..d9bfabc
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_vexpress.o
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100755
index 0000000..2b91d72
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..3ff0930
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,79 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100755
index 0000000..0cb41ce
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100755
index 0000000..d269c25
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..76ffe4a
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100755
index 0000000..816dff4
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	/* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+	*cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+	return 0;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100755
index 0000000..23647cc
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100755
index 0000000..5fa9b39
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/midgard/sconscript b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/sconscript
new file mode 100755
index 0000000..c36e92d
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/midgard/sconscript
@@ -0,0 +1,127 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+import sys
+Import('env')
+
+
+if Glob('tests/sconscript'):
+	SConscript( 'tests/sconscript' )
+
+mock_test = 0
+
+# Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data.
+# For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0.
+if env['platform_config']=='devicetree':
+	fake_platform_device = 0
+else:
+	fake_platform_device = 1
+
+# Source files required for kbase.
+kbase_src = [Glob('#kernel/drivers/gpu/arm/midgard/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/platform/%s/*.c' % (env['platform_config'])),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/Makefile',
+             Glob('#kernel/drivers/gpu/arm/midgard/K*'))
+             ]
+
+kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c')]
+
+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
+	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
+	mock_test = 1
+
+# we need platform config for GPL version using fake platform
+if fake_platform_device==1:
+	# Check if we are compiling for PBX
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_MACH_REALVIEW_PBX[\ ]*=[\ ]*y'
+	REALVIEW_PBX = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+			REALVIEW_PBX = 1
+			break
+	if REALVIEW_PBX == 1 and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
+		sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n")
+	# if the file platform config file is in the tpip directory then use that, otherwise use the default config directory
+	if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])):
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config']))
+	else:
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config']))
+	
+# Note: cleaning via the Linux kernel build system does not yet work
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[clean] kbase'))
+	cmd = env.Command(['$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/mali_platform_fake.ko'], kbase_src, [])
+else:
+	if env['os'] == 'android':
+		env['android'] = 1
+	else:
+		env['android'] = 0
+
+	if env['unit'] == '1':
+		env['kernel_test'] = 1
+	else:
+		env['kernel_test'] = 0
+
+	#Extract environment options, note the trailing spaces are important
+	env_options = \
+	"PLATFORM=${platform} " +\
+	"MALI_ERROR_INJECT_ON=${error_inject} " +\
+	"MALI_ANDROID=${android} " +\
+	"MALI_KERNEL_TEST_API=${kernel_test} " +\
+	"MALI_UNIT_TEST=${unit} " +\
+	"MALI_RELEASE_NAME=\"${mali_release_name}\" "+\
+	"MALI_MOCK_TEST=%s " % mock_test +\
+	"MALI_CUSTOMER_RELEASE=${release} " +\
+	"MALI_INSTRUMENTATION_LEVEL=${instr} " +\
+	"MALI_COVERAGE=${coverage} " +\
+	"MALI_BUS_LOG=${buslog} "
+
+	make_action_start = "cd ${SOURCE.dir} && make -j%d " % GetOption('num_jobs')
+	make_action_end = "%s && cp mali_kbase.ko $STATIC_LIB_PATH/mali_kbase.ko" % env.kernel_get_config_defines(fake_platform_device)
+	make_action = make_action_start + env_options + make_action_end
+
+	makeAction=Action(make_action, '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, [makeAction])
+
+# Add a dependency on kds.ko.
+# Only necessary when KDS is not built into the kernel.
+#
+if env['os'] != 'android':
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y'
+	kds_in_kernel = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+	        # KDS in kernel.
+			kds_in_kernel = 1
+	if not kds_in_kernel:
+		env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/kds.ko')
+
+# need Module.symvers from ump.ko build
+if int(env['ump']) == 1:
+	env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/ump.ko')
+
+env.KernelObjTarget('kbase', cmd)
+
+env.AppendUnique(BASE=['cutils_linked_list'])
diff --git a/midgard/r12p0/kernel/drivers/gpu/arm/sconscript b/midgard/r12p0/kernel/drivers/gpu/arm/sconscript
new file mode 100755
index 0000000..924511b
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/arm/sconscript
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+
+
+SConscript('midgard/sconscript')
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/Kbuild b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/Kbuild
new file mode 100755
index 0000000..f10d58c
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/Kbuild
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+pl111_drm-y +=	pl111_drm_device.o \
+		pl111_drm_connector.o \
+		pl111_drm_crtc.o \
+		pl111_drm_cursor.o \
+		pl111_drm_dma_buf.o \
+		pl111_drm_encoder.o \
+		pl111_drm_fb.o \
+		pl111_drm_gem.o \
+		pl111_drm_pl111.o \
+		pl111_drm_platform.o \
+		pl111_drm_suspend.o \
+		pl111_drm_vma.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/Kconfig b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/Kconfig
new file mode 100755
index 0000000..60b465c
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+config DRM_PL111
+	tristate "DRM Support for PL111 CLCD Controller"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the PL111 CLCD controller.
+	  If M is selected the module will be called pl111_drm.
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/Makefile b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/Makefile
new file mode 100755
index 0000000..2869f58
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: pl111_drm
+
+pl111_drm:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_USES_KDS=y CONFIG_DRM_PL111=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
new file mode 100755
index 0000000..d3e0086
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
@@ -0,0 +1,95 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+/**
+ * pl111_clcd_ext.h
+ * Extended CLCD register definitions
+ */
+
+#ifndef PL111_CLCD_EXT_H_
+#define PL111_CLCD_EXT_H_
+
+/* 
+ * PL111 cursor register definitions not defined in the kernel's clcd header.
+ * 
+ * TODO MIDEGL-1718: move to include/linux/amba/clcd.h
+ */
+
+#define CLCD_CRSR_IMAGE				0x00000800
+#define CLCD_CRSR_IMAGE_MAX_WORDS		256
+#define CLCD_CRSR_IMAGE_WORDS_PER_LINE		4
+#define CLCD_CRSR_IMAGE_PIXELS_PER_WORD	16
+
+#define CLCD_CRSR_LBBP_COLOR_MASK	0x00000003
+#define CLCD_CRSR_LBBP_BACKGROUND	0x0
+#define CLCD_CRSR_LBBP_FOREGROUND	0x1
+#define CLCD_CRSR_LBBP_TRANSPARENT	0x2
+#define CLCD_CRSR_LBBP_INVERSE		0x3
+
+
+#define CLCD_CRSR_CTRL			0x00000c00
+#define CLCD_CRSR_CONFIG		0x00000c04
+#define CLCD_CRSR_PALETTE_0		0x00000c08
+#define CLCD_CRSR_PALETTE_1		0x00000c0c
+#define CLCD_CRSR_XY			0x00000c10
+#define CLCD_CRSR_CLIP			0x00000c14
+#define CLCD_CRSR_IMSC			0x00000c20
+#define CLCD_CRSR_ICR			0x00000c24
+#define CLCD_CRSR_RIS			0x00000c28
+#define CLCD_MIS				0x00000c2c
+
+#define CRSR_CTRL_CRSR_ON		(1 << 0)
+#define CRSR_CTRL_CRSR_MAX		3
+#define CRSR_CTRL_CRSR_NUM_SHIFT	4
+#define CRSR_CTRL_CRSR_NUM_MASK		\
+	(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
+#define CRSR_CTRL_CURSOR_0		0
+#define CRSR_CTRL_CURSOR_1		1
+#define CRSR_CTRL_CURSOR_2		2
+#define CRSR_CTRL_CURSOR_3		3
+
+#define CRSR_CONFIG_CRSR_SIZE		(1 << 0)
+#define CRSR_CONFIG_CRSR_FRAME_SYNC	(1 << 1)
+
+#define CRSR_PALETTE_RED_SHIFT		0
+#define CRSR_PALETTE_GREEN_SHIFT	8
+#define CRSR_PALETTE_BLUE_SHIFT		16
+
+#define CRSR_PALETTE_RED_MASK		0x000000ff
+#define CRSR_PALETTE_GREEN_MASK		0x0000ff00
+#define CRSR_PALETTE_BLUE_MASK		0x00ff0000
+#define CRSR_PALETTE_MASK		(~0xff000000)
+
+#define CRSR_XY_MASK			0x000003ff
+#define CRSR_XY_X_SHIFT			0
+#define CRSR_XY_Y_SHIFT			16
+
+#define CRSR_XY_X_MASK			CRSR_XY_MASK
+#define CRSR_XY_Y_MASK			(CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
+
+#define CRSR_CLIP_MASK			0x3f
+#define CRSR_CLIP_X_SHIFT		0
+#define CRSR_CLIP_Y_SHIFT		8
+
+#define CRSR_CLIP_X_MASK		CRSR_CLIP_MASK
+#define CRSR_CLIP_Y_MASK		(CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
+
+#define CRSR_IMSC_CRSR_IM		(1<<0)
+#define CRSR_ICR_CRSR_IC		(1<<0)
+#define CRSR_RIS_CRSR_RIS		(1<<0)
+#define CRSR_MIS_CRSR_MIS		(1<<0)
+
+#endif /* PL111_CLCD_EXT_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100755
index 0000000..64d87b6
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+#define DRIVER_AUTHOR    "ARM Ltd."
+#define DRIVER_NAME      "pl111_drm"
+#define DRIVER_DESC      "DRM module for PL111"
+#define DRIVER_LICENCE   "GPL"
+#define DRIVER_ALIAS     "platform:pl111_drm"
+#define DRIVER_DATE      "20101111"
+#define DRIVER_VERSION   "0.2"
+#define DRIVER_MAJOR      2
+#define DRIVER_MINOR      1
+#define DRIVER_PATCHLEVEL 1
+
+/*
+ * Number of flips allowed in flight at any one time. Any more flips requested
+ * beyond this value will cause the caller to block until earlier flips have
+ * completed.
+ *
+ * For performance reasons, this must be greater than the number of buffers
+ * used in the rendering pipeline. Note that the rendering pipeline can contain
+ * different types of buffer, e.g.:
+ * - 2 final framebuffers
+ * - >2 geometry buffers for GPU use-cases
+ * - >2 vertex buffers for GPU use-cases
+ *
+ * For example, a system using 5 geometry buffers could have 5 flips in flight,
+ * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
+ *
+ * Whilst there may be more intermediate buffers (such as vertex/geometry) than
+ * final framebuffers, KDS is used to ensure that GPU rendering waits for the
+ * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
+ * produce tearing.
+ */
+
+/*
+ * Here, we choose a conservative value. A lower value is most likely
+ * suitable for GPU use-cases.
+ */
+#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
+
+#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
+
+struct pl111_drm_flip_resource;
+
+struct pl111_gem_bo_dma {
+	dma_addr_t fb_dev_addr;
+	void *fb_cpu_addr;
+};
+
+struct pl111_gem_bo_shm {
+	struct page **pages;
+	dma_addr_t *dma_addrs;
+};
+
+struct pl111_gem_bo {
+	struct drm_gem_object gem_object;
+	u32 type;
+	union {
+		struct pl111_gem_bo_dma dma;
+		struct pl111_gem_bo_shm shm;
+	} backing_data;
+	struct sg_table *sgt;
+};
+
+extern struct pl111_drm_dev_private priv;
+
+struct pl111_drm_framebuffer {
+	struct drm_framebuffer fb;
+	struct pl111_gem_bo *bo;
+};
+
+struct pl111_drm_flip_resource {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This is the kds set associated to the dma_buf we want to flip */
+	struct kds_resource_set *kds_res_set;
+#endif
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct list_head link;
+	bool page_flip;
+	struct drm_pending_vblank_event *event;
+};
+
+struct pl111_drm_crtc {
+	struct drm_crtc crtc;
+	int crtc_index;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This protects "old_kds_res_set" and "displaying_fb" */
+	spinlock_t current_displaying_lock;
+	/*
+	 * When a buffer is displayed its associated kds resource
+	 * will be obtained and stored here. Every time a buffer
+	 * flip is completed this old kds set is released and assigned
+	 * the kds set of the new buffer.
+	 */
+	struct kds_resource_set *old_kds_res_set;
+	/*
+	 * Stores which frame buffer is currently being displayed by
+	 * this CRTC or NULL if nothing is being displayed. It is used
+	 * to tell whether we need to obtain a set of kds resources for
+	 * exported buffer objects.
+	 */
+	struct drm_framebuffer *displaying_fb;
+#endif
+	struct drm_display_mode *new_mode;
+	struct drm_display_mode *current_mode;
+	int last_bpp;
+
+	/*
+	 * This spinlock protects "update_queue", "current_update_res"
+	 * and calls to do_flip_to_res() which updates the CLCD base
+	 * registers.
+	 */
+	spinlock_t base_update_lock;
+	/*
+	 * The resource that caused a base address update. Only one can be
+	 * pending, hence it's != NULL if there's a pending update
+	 */
+	struct pl111_drm_flip_resource *current_update_res;
+	/* Queue of things waiting to update the base address */
+	struct list_head update_queue;
+
+	void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
+				struct drm_framebuffer *fb);
+};
+
+struct pl111_drm_connector {
+	struct drm_connector connector;
+};
+
+struct pl111_drm_encoder {
+	struct drm_encoder encoder;
+};
+
+struct pl111_drm_dev_private {
+	struct pl111_drm_crtc *pl111_crtc;
+
+	struct amba_device *amba_dev;
+	unsigned long mmio_start;
+	__u32 mmio_len;
+	void *regs;
+	struct clk *clk;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_callback kds_cb;
+	struct kds_callback kds_obtain_current_cb;
+#endif
+	/*
+	 * Number of flips that were started in show_framebuffer_on_crtc(),
+	 * but haven't completed yet - because we do deferred flipping
+	 */
+	atomic_t nr_flips_in_flight;
+	wait_queue_head_t wait_for_flips;
+
+	/*
+	 * Used to prevent race between pl111_dma_buf_release and
+	 * drm_gem_prime_handle_to_fd
+	 */
+	struct mutex export_dma_buf_lock;
+
+	uint32_t number_crtcs;
+
+	/* Cache for flip resources used to avoid kmalloc on each page flip */
+	struct kmem_cache *page_flip_slab;
+};
+
+enum pl111_cursor_size {
+	CURSOR_32X32,
+	CURSOR_64X64
+};
+
+enum pl111_cursor_sync {
+	CURSOR_SYNC_NONE,
+	CURSOR_SYNC_VSYNC
+};
+
+
+/**
+ * Buffer allocation function which is more flexible than dumb_create(),
+ * it allows passing driver specific flags to control the kind of buffer
+ * to be allocated.
+ */
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+ 
+/****** TODO MIDEGL-1718: this should be moved to uapi/include/drm/pl111_drm.h ********/
+
+/* 
+ * Parameters for different buffer objects:
+ * bit [0]: backing storage
+ *	(0 -> SHM)
+ *	(1 -> DMA)
+ * bit [2:1]: kind of mapping
+ *	(0x0 -> uncached)
+ *	(0x1 -> write combine)
+ *	(0x2 -> cached)
+ */
+#define PL111_BOT_MASK		(0x7)
+#define PL111_BOT_SHM		(0x0 << 0)
+#define PL111_BOT_DMA		(0x1 << 0)
+#define PL111_BOT_UNCACHED	(0x0 << 1)
+#define PL111_BOT_WC		(0x1 << 1)
+#define PL111_BOT_CACHED	(0x2 << 1)
+
+/**
+ * User-desired buffer creation information structure.
+ *
+ * @size: user-desired memory allocation size.
+ *      - this size value would be page-aligned internally.
+ * @flags: user request for setting memory type or cache attributes as a bit op
+ *	- PL111_BOT_DMA / PL111_BOT_SHM
+ *	- PL111_BOT_UNCACHED / PL111_BOT_WC / PL111_BOT_CACHED
+ * @handle: returned a handle to created gem object.
+ *      - this handle will be set by gem module of kernel side.
+ */
+struct drm_pl111_gem_create {
+	uint32_t height;
+	uint32_t width;
+	uint32_t bpp;
+	uint32_t flags;
+	/* handle, pitch, size will be returned */
+	uint32_t handle;
+	uint32_t pitch;
+	uint64_t size;
+};
+
+#define DRM_PL111_GEM_CREATE		0x00
+
+#define DRM_IOCTL_PL111_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + \
+			DRM_PL111_GEM_CREATE, struct drm_pl111_gem_create)
+/****************************************************************************/
+
+#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb))
+
+#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
+
+#define PL111_BO_FROM_GEM(gem_obj) \
+	container_of(gem_obj, struct pl111_gem_bo, gem_object)
+
+#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
+
+#define PL111_ENCODER_FROM_ENCODER(x) \
+	container_of(x, struct pl111_drm_encoder, encoder)
+
+#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
+	container_of(x, struct pl111_drm_connector, connector)
+
+#include "pl111_drm_funcs.h"
+
+#endif /* _PL111_DRM_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
new file mode 100755
index 0000000..c7c3a22
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
@@ -0,0 +1,170 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+
+static struct {
+	int w, h, type;
+} pl111_drm_modes[] = {
+	{ 640, 480,  DRM_MODE_TYPE_PREFERRED},
+	{ 800, 600,  0},
+	{1024, 768,  0},
+	{  -1,  -1, -1}
+};
+
+void pl111_connector_destroy(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+				PL111_CONNECTOR_FROM_CONNECTOR(connector);
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(pl111_connector);
+}
+
+enum drm_connector_status pl111_connector_detect(struct drm_connector
+							*connector, bool force)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return connector_status_connected;
+}
+
+void pl111_connector_dpms(struct drm_connector *connector, int mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+}
+
+struct drm_encoder *
+pl111_connector_helper_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	if (connector->encoder != NULL) {
+		return connector->encoder; /* Return attached encoder */
+	} else {
+		/*
+		 * If there is no attached encoder we choose the best candidate
+		 * from the list.
+		 * For PL111 there is only one encoder so we return the first
+		 * one we find.
+		 * Other h/w would require a suitable criterion below.
+		 */
+		struct drm_encoder *encoder = NULL;
+		struct drm_device *dev = connector->dev;
+
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
+					head) {
+			if (1) { /* criterion ? */
+				break;
+			}
+		}
+		return encoder; /* return best candidate encoder */
+	}
+}
+
+int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+	int i = 0;
+	int count = 0;
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	while (pl111_drm_modes[i].w != -1) {
+		struct drm_display_mode *mode =
+				drm_mode_find_dmt(connector->dev,
+						pl111_drm_modes[i].w,
+						pl111_drm_modes[i].h,
+						60
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+						, false
+#endif
+						);
+
+		if (mode != NULL) {
+			mode->type |= pl111_drm_modes[i].type;
+			drm_mode_probed_add(connector, mode);
+			count++;
+		}
+
+		i++;
+	}
+
+	DRM_DEBUG_KMS("found %d modes\n", count);
+
+	return count;
+}
+
+int pl111_connector_helper_mode_valid(struct drm_connector *connector,
+					struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return MODE_OK;
+}
+
+const struct drm_connector_funcs connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = pl111_connector_destroy,
+	.detect = pl111_connector_detect,
+	.dpms = pl111_connector_dpms,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = pl111_connector_helper_get_modes,
+	.mode_valid = pl111_connector_helper_mode_valid,
+	.best_encoder = pl111_connector_helper_best_encoder,
+};
+
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
+{
+	struct pl111_drm_connector *pl111_connector;
+
+	pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
+					GFP_KERNEL);
+
+	if (pl111_connector == NULL) {
+		pr_err("Failed to allocated pl111_drm_connector\n");
+		return NULL;
+	}
+
+	drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
+				DRM_MODE_CONNECTOR_DVII);
+
+	drm_connector_helper_add(&pl111_connector->connector,
+					&connector_helper_funcs);
+
+	drm_sysfs_connector_add(&pl111_connector->connector);
+
+	return pl111_connector;
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
new file mode 100755
index 0000000..ede07ff
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
@@ -0,0 +1,449 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_crtc.c
+ * Implementation of the CRTC functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static int pl111_crtc_num;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
+{
+	struct drm_device *dev = pl111_crtc->crtc.dev;
+	struct pl111_drm_flip_resource *old_flip_res;
+	struct pl111_gem_bo *bo;
+	unsigned long irq_flags;
+	int flips_in_flight;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+
+	/*
+	 * Cache the flip resource that caused the IRQ since it will be
+	 * dispatched later. Early return if the IRQ isn't associated to
+	 * a base register update.
+	 * 
+	 * TODO MIDBASE-2790: disable IRQs when a flip is not pending.
+	 */
+	old_flip_res = pl111_crtc->current_update_res;
+	if (!old_flip_res) {
+		spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+		return;
+	}
+	pl111_crtc->current_update_res = NULL;
+
+	/* Prepare the next flip (if any) of the queue as soon as possible. */
+	if (!list_empty(&pl111_crtc->update_queue)) {
+		struct pl111_drm_flip_resource *flip_res;
+		/* Remove the head of the list */
+		flip_res = list_first_entry(&pl111_crtc->update_queue,
+			struct pl111_drm_flip_resource, link);
+		list_del(&flip_res->link);
+		do_flip_to_res(flip_res);
+		/*
+		 * current_update_res will be set, so guarentees that
+		 * another flip_res coming in gets queued instead of
+		 * handled immediately
+		 */
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	/* Finalize properly the flip that caused the IRQ */
+	DRM_DEBUG_KMS("DRM Finalizing old_flip_res=%p\n", old_flip_res);
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(old_flip_res->fb);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	release_kds_resource_and_display(old_flip_res);
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	/* Release DMA buffer on this flip */
+
+	if (bo->gem_object.export_dma_buf != NULL)
+		dma_buf_put(bo->gem_object.export_dma_buf);
+
+	drm_handle_vblank(dev, pl111_crtc->crtc_index);
+
+	/* Wake up any processes waiting for page flip event */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (old_flip_res->event) {
+		spin_lock_bh(&dev->event_lock);
+		drm_send_vblank_event(dev, pl111_crtc->crtc_index,
+					old_flip_res->event);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#else
+	if (old_flip_res->event) {
+		struct drm_pending_vblank_event *e = old_flip_res->event;
+		struct timeval now;
+		unsigned int seq;
+
+		DRM_DEBUG_KMS("%s: wake up page flip event (%p)\n", __func__,
+				old_flip_res->event);
+
+		spin_lock_bh(&dev->event_lock);
+		seq = drm_vblank_count_and_time(dev, pl111_crtc->crtc_index,
+							&now);
+		e->pipe = pl111_crtc->crtc_index;
+		e->event.sequence = seq;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_add_tail(&e->base.link,
+			&e->base.file_priv->event_list);
+
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#endif
+
+	drm_vblank_put(dev, pl111_crtc->crtc_index);
+
+	/*
+	 * workqueue.c:process_one_work():
+	 * "It is permissible to free the struct work_struct from
+	 *  inside the function that is called from it"
+	 */
+	kmem_cache_free(priv.page_flip_slab, old_flip_res);
+
+	flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
+	if (flips_in_flight == 0 ||
+			flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
+		wake_up(&priv.wait_for_flips);
+
+	DRM_DEBUG_KMS("DRM release flip_res=%p\n", old_flip_res);
+}
+
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
+{
+	struct pl111_drm_flip_resource *flip_res = cb1;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	pl111_crtc->show_framebuffer_cb(cb1, cb2);
+}
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb, bool page_flip,
+				struct drm_pending_vblank_event *event)
+{
+	struct pl111_gem_bo *bo;
+	struct pl111_drm_flip_resource *flip_res;
+	int flips_in_flight;
+	int old_flips_in_flight;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	crtc->fb = fb;
+#else
+	crtc->primary->fb = fb;
+#endif
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	if (bo == NULL) {
+		DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
+		return -EINVAL;
+	}
+
+	/* If this is a full modeset, wait for all outstanding flips to complete
+	 * before continuing. This avoids unnecessary complication from being
+	 * able to queue up multiple modesets and queues of mixed modesets and
+	 * page flips.
+	 *
+	 * Modesets should be uncommon and will not be performant anyway, so
+	 * making them synchronous should have negligible performance impact.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				atomic_read(&priv.nr_flips_in_flight) == 0);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * There can be more 'early display' flips in flight than there are
+	 * buffers, and there is (currently) no explicit bound on the number of
+	 * flips. Hence, we need a new allocation for each one.
+	 *
+	 * Note: this could be optimized down if we knew a bound on the flips,
+	 * since an application can only have so many buffers in flight to be
+	 * useful/not hog all the memory
+	 */
+	flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
+	if (flip_res == NULL) {
+		pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * increment flips in flight, whilst blocking when we reach
+	 * NR_FLIPS_IN_FLIGHT_THRESHOLD
+	 */
+	do {
+		/*
+		 * Note: use of assign-and-then-compare in the condition to set
+		 * flips_in_flight
+		 */
+		int ret = wait_event_killable(priv.wait_for_flips,
+				(flips_in_flight =
+					atomic_read(&priv.nr_flips_in_flight))
+				< NR_FLIPS_IN_FLIGHT_THRESHOLD);
+		if (ret != 0) {
+			kmem_cache_free(priv.page_flip_slab, flip_res);
+			return ret;
+		}
+
+		old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
+					flips_in_flight, flips_in_flight + 1);
+	} while (old_flips_in_flight != flips_in_flight);
+
+	flip_res->fb = fb;
+	flip_res->crtc = crtc;
+	flip_res->page_flip = page_flip;
+	flip_res->event = event;
+	INIT_LIST_HEAD(&flip_res->link);
+	DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+		unsigned long shared[1] = { 0 };
+		struct kds_resource *resource_list[1] = {
+				get_dma_buf_kds_resource(buf) };
+		int err;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+
+		/* Wait for the KDS resource associated with this buffer */
+		err = kds_async_waitall(&flip_res->kds_res_set,
+					&priv.kds_cb, flip_res, fb, 1, shared,
+					resource_list);
+		BUG_ON(err);
+	} else {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+
+		/* No dma-buf attached so just call the callback directly */
+		flip_res->kds_res_set = NULL;
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#else
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+	} else {
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+	}
+
+	/* No dma-buf attached to this so just call the callback directly */
+	{
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#endif
+
+	/* For the same reasons as the wait at the start of this function,
+	 * wait for the modeset to complete before continuing.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				flips_in_flight == 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+			struct drm_pending_vblank_event *event)
+#else
+			struct drm_pending_vblank_event *event,
+			uint32_t flags)
+#endif
+{
+	DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
+			__func__, crtc, fb, event);
+	return show_framebuffer_on_crtc(crtc, fb, true, event);
+}
+
+int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode,
+				int x, int y, struct drm_framebuffer *old_fb)
+{
+	int ret;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+	struct drm_display_mode *duplicated_mode;
+
+	DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
+			adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel);
+#else
+			crtc->primary->fb->bits_per_pixel);
+#endif
+
+	duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
+	if (!duplicated_mode)
+		return -ENOMEM;
+
+	pl111_crtc->new_mode = duplicated_mode;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
+#else
+	ret = show_framebuffer_on_crtc(crtc, crtc->primary->fb, false, NULL);
+#endif
+	if (ret != 0) {
+		pl111_crtc->new_mode = pl111_crtc->current_mode;
+		drm_mode_destroy(crtc->dev, duplicated_mode);
+	}
+
+	return ret;
+}
+
+void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+void pl111_crtc_helper_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+				const struct drm_display_mode *mode,
+#else
+				struct drm_display_mode *mode,
+#endif
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+#ifdef CONFIG_ARCH_VEXPRESS
+	/*
+	 * 1024x768 with more than 16 bits per pixel may not work 
+	 * correctly on Versatile Express due to bandwidth issues
+	 */
+	if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel > 16) {
+#else
+			crtc->primary->fb->bits_per_pixel > 16) {
+#endif
+		DRM_INFO("*WARNING* 1024x768 at > 16 bpp may suffer corruption\n");
+	}
+#endif
+
+	return true;
+}
+
+void pl111_crtc_helper_disable(struct drm_crtc *crtc)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	/* don't disable crtc until no flips in flight as irq will be disabled */
+	ret = wait_event_killable(priv.wait_for_flips, atomic_read(&priv.nr_flips_in_flight) == 0);
+	if(ret) {
+		pr_err("pl111_crtc_helper_disable failed\n");
+		return;
+	}
+	clcd_disable(crtc);
+}
+
+void pl111_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(pl111_crtc);
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+	.cursor_set = pl111_crtc_cursor_set,
+	.cursor_move = pl111_crtc_cursor_move,
+	.set_config = drm_crtc_helper_set_config,
+	.page_flip = pl111_crtc_page_flip,
+	.destroy = pl111_crtc_destroy
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+	.mode_set = pl111_crtc_helper_mode_set,
+	.prepare = pl111_crtc_helper_prepare,
+	.commit = pl111_crtc_helper_commit,
+	.mode_fixup = pl111_crtc_helper_mode_fixup,
+	.disable = pl111_crtc_helper_disable,
+};
+
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
+{
+	struct pl111_drm_crtc *pl111_crtc;
+
+	pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
+	if (pl111_crtc == NULL) {
+		pr_err("Failed to allocated pl111_drm_crtc\n");
+		return NULL;
+	}
+
+	drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
+	drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
+
+	pl111_crtc->crtc_index = pl111_crtc_num;
+	pl111_crtc_num++;
+	pl111_crtc->crtc.enabled = 0;
+	pl111_crtc->last_bpp = 0;
+	pl111_crtc->current_update_res = NULL;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	pl111_crtc->displaying_fb = NULL;
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_lock_init(&pl111_crtc->current_displaying_lock);
+#endif
+	pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
+	INIT_LIST_HEAD(&pl111_crtc->update_queue);
+	spin_lock_init(&pl111_crtc->base_update_lock);
+
+	return pl111_crtc;
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
new file mode 100755
index 0000000..4bf20fe
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
@@ -0,0 +1,331 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_cursor.c
+ * Implementation of cursor functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_clcd_ext.h"
+#include "pl111_drm.h"
+
+#define PL111_MAX_CURSOR_WIDTH (64)
+#define PL111_MAX_CURSOR_HEIGHT (64)
+
+#define ARGB_2_LBBP_BINARY_THRESHOLD	(1 << 7)
+#define ARGB_ALPHA_SHIFT		24
+#define ARGB_ALPHA_MASK			(0xff << ARGB_ALPHA_SHIFT)
+#define ARGB_RED_SHIFT			16
+#define ARGB_RED_MASK			(0xff << ARGB_RED_SHIFT)
+#define ARGB_GREEN_SHIFT		8
+#define ARGB_GREEN_MASK			(0xff << ARGB_GREEN_SHIFT)
+#define ARGB_BLUE_SHIFT			0
+#define ARGB_BLUE_MASK			(0xff << ARGB_BLUE_SHIFT)
+
+
+void pl111_set_cursor_size(enum pl111_cursor_size size)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (size == CURSOR_64X64)
+		reg_data |= CRSR_CONFIG_CRSR_SIZE;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (sync == CURSOR_SYNC_VSYNC)
+		reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor(u32 cursor)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
+	reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_enable(bool enable)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	if (enable)
+		reg_data |= CRSR_CTRL_CRSR_ON;
+	else
+		reg_data &= ~CRSR_CTRL_CRSR_ON;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_position(u32 x, u32 y)
+{
+	u32 reg_data = (x & CRSR_XY_MASK) |
+			((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_XY);
+}
+
+void pl111_set_cursor_clipping(u32 x, u32 y)
+{
+	u32 reg_data;
+
+	/* 
+	 * Do not allow setting clipping values larger than
+	 * the cursor size since the cursor is already fully hidden
+	 * when x,y = PL111_MAX_CURSOR_WIDTH.
+	 */
+	if (x > PL111_MAX_CURSOR_WIDTH)
+		x = PL111_MAX_CURSOR_WIDTH;
+	if (y > PL111_MAX_CURSOR_WIDTH)
+		y = PL111_MAX_CURSOR_WIDTH;
+
+	reg_data = (x & CRSR_CLIP_MASK) |
+			((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
+}
+
+void pl111_set_cursor_palette(u32 color0, u32 color1)
+{
+	writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
+	writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
+}
+
+void pl111_cursor_enable(void)
+{
+	pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
+	pl111_set_cursor_size(CURSOR_64X64);
+	pl111_set_cursor_palette(0x0, 0x00ffffff);
+	pl111_set_cursor_enable(true);
+}
+
+void pl111_cursor_disable(void)
+{
+	pl111_set_cursor_enable(false);
+}
+
+/* shift required to locate pixel into the correct position in
+ * a cursor LBBP word, indexed by x mod 16.
+ */
+static const unsigned char
+x_mod_16_to_value_shift[CLCD_CRSR_IMAGE_PIXELS_PER_WORD] = {
+	6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24
+};
+
+/* Pack the pixel value into its correct position in the buffer as specified
+ * for LBBP */
+static inline void
+set_lbbp_pixel(uint32_t *buffer, unsigned int x, unsigned int y,
+				uint32_t value)
+{
+	u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
+	uint32_t shift;
+	uint32_t data;
+
+	shift = x_mod_16_to_value_shift[x % CLCD_CRSR_IMAGE_PIXELS_PER_WORD];
+
+	/* Get the word containing this pixel */
+	cursor_ram = cursor_ram + (x >> CLCD_CRSR_IMAGE_WORDS_PER_LINE) + (y << 2);
+
+	/* Update pixel in cursor RAM */
+	data = readl(cursor_ram);
+	data &= ~(CLCD_CRSR_LBBP_COLOR_MASK << shift);
+	data |= value << shift;
+	writel(data, cursor_ram);
+}
+
+static u32 pl111_argb_to_lbbp(u32 argb_pix)
+{
+	u32 lbbp_pix = CLCD_CRSR_LBBP_TRANSPARENT;
+	u32 alpha = (argb_pix & ARGB_ALPHA_MASK) >> ARGB_ALPHA_SHIFT;
+	u32 red = (argb_pix & ARGB_RED_MASK) >> ARGB_RED_SHIFT;
+	u32 green = (argb_pix & ARGB_GREEN_MASK) >> ARGB_GREEN_SHIFT;
+	u32 blue =  (argb_pix & ARGB_BLUE_MASK) >> ARGB_BLUE_SHIFT;
+
+	/*
+	 * Converting from 8 pixel transparency to binary transparency
+	 * it's the best we can achieve.
+	 */
+	if (alpha & ARGB_2_LBBP_BINARY_THRESHOLD) {
+		u32 gray, max, min;
+
+		/*
+		 * Convert to gray using the lightness method:
+		 * gray = [max(R,G,B) + min(R,G,B)]/2
+		 */
+		min = min(red, green);
+		min = min(min, blue);
+		max = max(red, green);
+		max = max(max, blue);
+		gray = (min + max) >> 1; /* divide by 2 */
+		/* Apply binary threshold to the gray value calculated */
+		if (gray & ARGB_2_LBBP_BINARY_THRESHOLD)
+			lbbp_pix = CLCD_CRSR_LBBP_FOREGROUND;
+		else
+			lbbp_pix = CLCD_CRSR_LBBP_BACKGROUND;
+	}
+
+	return lbbp_pix;
+}
+
+/*
+ * The PL111 hardware cursor supports only LBBP which is a 2bpp format but
+ * the cursor format from userspace is ARGB8888 so we need to convert
+ *  to LBBP here.
+ */
+static void pl111_set_cursor_image(u32 *data)
+{
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+	/* Add 1 on width to insert trailing NULL */
+	char string_cursor[PL111_MAX_CURSOR_WIDTH + 1];
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	unsigned int x;
+	unsigned int y;
+
+	for (y = 0; y < PL111_MAX_CURSOR_HEIGHT; y++) {
+		for (x = 0; x < PL111_MAX_CURSOR_WIDTH; x++) {
+			u32 value = pl111_argb_to_lbbp(*data);
+
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+			if (value == CLCD_CRSR_LBBP_TRANSPARENT)
+				string_cursor[x] = 'T';
+			else if (value == CLCD_CRSR_LBBP_FOREGROUND)
+				string_cursor[x] = 'F';
+			else if (value == CLCD_CRSR_LBBP_INVERSE)
+				string_cursor[x] = 'I';
+			else
+				string_cursor[x] = 'B';
+
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+			set_lbbp_pixel(data, x, y, value);
+			++data;
+		}
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+		string_cursor[PL111_MAX_CURSOR_WIDTH] = '\0';
+		DRM_INFO("%s\n", string_cursor);
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	}
+}
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height)
+{
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("handle = %u, width = %u, height = %u\n",
+		handle, width, height);
+
+	if (!handle) {
+		pl111_cursor_disable();
+		return 0;
+	}
+
+	if ((width != PL111_MAX_CURSOR_WIDTH) ||
+	    (height != PL111_MAX_CURSOR_HEIGHT))
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object for handle = %d\n",
+			  handle);
+		return -ENOENT;
+	}
+
+	/*
+	 * We expect a PL111_MAX_CURSOR_WIDTH x PL111_MAX_CURSOR_HEIGHT
+	 * ARGB888 buffer object in the input.
+	 *
+	 */
+	if (obj->size < (PL111_MAX_CURSOR_WIDTH * PL111_MAX_CURSOR_HEIGHT * 4)) {
+		DRM_ERROR("Cannot set cursor with an obj size = %d\n",
+			  obj->size);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	bo = PL111_BO_FROM_GEM(obj);
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Tried to set cursor with non DMA backed obj = %p\n",
+			  obj);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
+
+	/*
+	 * Since we copy the contents of the buffer to the HW cursor internal
+	 * memory this GEM object is not needed anymore.
+	 */
+	drm_gem_object_unreference_unlocked(obj);
+
+	pl111_cursor_enable();
+
+	return 0;
+}
+
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y)
+{
+	int x_clip = 0;
+	int y_clip = 0;
+
+	DRM_DEBUG("x %d y %d\n", x, y);
+
+	/*
+	 * The cursor image is clipped automatically at the screen limits when
+	 * it extends beyond the screen image to the right or bottom but
+	 * we must clip it using pl111 HW features for negative values.
+	 */
+	if (x < 0) {
+		x_clip = -x;
+		x = 0;
+	}
+	if (y < 0) {
+		y_clip = -y;
+		y = 0;
+	}
+
+	pl111_set_cursor_clipping(x_clip, y_clip);
+	pl111_set_cursor_position(x, y);
+
+	return 0;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
new file mode 100755
index 0000000..6619c07
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
@@ -0,0 +1,338 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_device.c
+ * Implementation of the Linux device driver entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+struct pl111_drm_dev_private priv;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void initial_kds_obtained(void *cb1, void *cb2)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
+	bool *cb_has_called = (bool *) cb2;
+
+	*cb_has_called = true;
+	wake_up(wait);
+}
+
+/* Must be called from within current_displaying_lock spinlock */
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	pl111_crtc->displaying_fb = flip_res->fb;
+
+	/* Release the previous buffer */
+	if (pl111_crtc->old_kds_res_set != NULL) {
+		/*
+		 * Can flip to the same buffer, but must not release the current
+		 * resource set
+		 */
+		BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+	}
+	/* Record the current buffer, to release on the next buffer flip */
+	pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
+}
+#endif
+
+void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+void pl111_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+/*
+ * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
+ * and disable_vblank are just no op callbacks.
+ */
+static int pl111_enable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+	return 0;
+}
+
+static void pl111_disable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+}
+
+struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = pl111_fb_create,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	struct pl111_drm_connector *pl111_connector;
+	struct pl111_drm_encoder *pl111_encoder;
+	int ret = 0;
+
+	if (priv == NULL)
+		return -EINVAL;
+
+	drm_mode_config_init(dev);
+	mode_config = &dev->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 1024;
+	mode_config->min_height = 1;
+	mode_config->max_height = 768;
+
+	priv->pl111_crtc = pl111_crtc_create(dev);
+	if (priv->pl111_crtc == NULL) {
+		pr_err("Failed to create pl111_drm_crtc\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	priv->number_crtcs = 1;
+
+	pl111_connector = pl111_connector_create(dev);
+	if (pl111_connector == NULL) {
+		pr_err("Failed to create pl111_drm_connector\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	pl111_encoder = pl111_encoder_create(dev, 1);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to create pl111_drm_encoder\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
+						&pl111_encoder->encoder);
+	if (ret != 0) {
+		DRM_ERROR("Failed to attach encoder\n");
+		goto out_config;
+	}
+
+	pl111_connector->connector.encoder = &pl111_encoder->encoder;
+
+	pl111_encoder->encoder.crtc = &priv->pl111_crtc->crtc;
+
+	goto finish;
+
+out_config:
+	drm_mode_config_cleanup(dev);
+finish:
+	DRM_DEBUG("%s returned %d\n", __func__, ret);
+	return ret;
+}
+
+static void pl111_modeset_fini(struct drm_device *dev)
+{
+	drm_mode_config_cleanup(dev);
+}
+
+static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
+{
+	int ret = 0;
+
+	pr_info("DRM %s\n", __func__);
+
+	mutex_init(&priv.export_dma_buf_lock);
+	atomic_set(&priv.nr_flips_in_flight, 0);
+	init_waitqueue_head(&priv.wait_for_flips);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
+	if (ret != 0) {
+		pr_err("Failed to initialise KDS callback\n");
+		goto finish;
+	}
+
+	ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
+				initial_kds_obtained);
+	if (ret != 0) {
+		pr_err("Failed to init KDS obtain callback\n");
+		kds_callback_term(&priv.kds_cb);
+		goto finish;
+	}
+#endif
+
+	/* Create a cache for page flips */
+	priv.page_flip_slab = kmem_cache_create("page flip slab",
+			sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
+	if (priv.page_flip_slab == NULL) {
+		DRM_ERROR("Failed to create slab\n");
+		ret = -ENOMEM;
+		goto out_kds_callbacks;
+	}
+
+	dev->dev_private = &priv;
+
+	ret = pl111_modeset_init(dev);
+	if (ret != 0) {
+		pr_err("Failed to init modeset\n");
+		goto out_slab;
+	}
+
+	ret = pl111_device_init(dev);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init MMIO and IRQ\n");
+		goto out_modeset;
+	}
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init vblank\n");
+		goto out_vblank;
+	}
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 13, 0))
+	platform_set_drvdata(dev->platformdev, dev);
+#endif
+
+	goto finish;
+
+out_vblank:
+	pl111_device_fini(dev);
+out_modeset:
+	pl111_modeset_fini(dev);
+out_slab:
+	kmem_cache_destroy(priv.page_flip_slab);
+out_kds_callbacks:
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+finish:
+	DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
+	return ret;
+}
+
+static int pl111_drm_unload(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+	kmem_cache_destroy(priv.page_flip_slab);
+
+	drm_vblank_cleanup(dev);
+	pl111_modeset_fini(dev);
+	pl111_device_fini(dev);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+	return 0;
+}
+
+static struct vm_operations_struct pl111_gem_vm_ops = {
+	.fault = pl111_gem_fault,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+#else
+	.open = pl111_gem_vm_open,
+	.close = pl111_gem_vm_close,
+#endif
+};
+
+static const struct file_operations drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = pl111_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+};
+
+static struct drm_ioctl_desc pl111_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(PL111_GEM_CREATE, pl111_drm_gem_create_ioctl,
+		DRM_CONTROL_ALLOW | DRM_UNLOCKED),
+};
+
+static struct drm_driver driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.load = pl111_drm_load,
+	.unload = pl111_drm_unload,
+	.context_dtor = NULL,
+	.preclose = pl111_drm_preclose,
+	.lastclose = pl111_drm_lastclose,
+	.suspend = pl111_drm_suspend,
+	.resume = pl111_drm_resume,
+	.get_vblank_counter = drm_vblank_count,
+	.enable_vblank = pl111_enable_vblank,
+	.disable_vblank = pl111_disable_vblank,
+	.ioctls = pl111_ioctls,
+	.fops = &drm_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+	.dumb_create = pl111_dumb_create,
+	.dumb_destroy = pl111_dumb_destroy,
+	.dumb_map_offset = pl111_dumb_map_offset,
+	.gem_free_object = pl111_gem_free_object,
+	.gem_vm_ops = &pl111_gem_vm_ops,
+	.prime_handle_to_fd = &pl111_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = &pl111_gem_prime_export,
+	.gem_prime_import = &pl111_gem_prime_import,
+};
+
+int pl111_drm_init(struct platform_device *dev)
+{
+	int ret;
+	pr_info("DRM %s\n", __func__);
+	pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
+		DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
+	driver.num_ioctls = ARRAY_SIZE(pl111_ioctls);
+	ret = 0;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 15, 0))
+	driver.kdriver.platform_device = dev;
+#endif
+	return drm_platform_init(&driver, dev);
+
+}
+
+void pl111_drm_exit(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_platform_exit(&driver, dev);
+#else
+	drm_put_dev(platform_get_drvdata(dev));
+#endif
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
new file mode 100755
index 0000000..1131f46
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
@@ -0,0 +1,625 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_dma_buf.c
+ * Implementation of the dma_buf functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void obtain_kds_if_currently_displayed(struct drm_device *dev,
+						struct pl111_gem_bo *bo,
+						struct dma_buf *dma_buf)
+{
+	unsigned long shared[1] = { 0 };
+	struct kds_resource *resource_list[1];
+	struct kds_resource_set *kds_res_set;
+	struct drm_crtc *crtc;
+	bool cb_has_called = false;
+	unsigned long flags;
+	int err;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	DRM_DEBUG_KMS("Obtaining initial KDS res for bo:%p dma_buf:%p\n",
+			bo, dma_buf);
+
+	resource_list[0] = get_dma_buf_kds_resource(dma_buf);
+	get_dma_buf(dma_buf);
+
+	/*
+	 * Can't use kds_waitall(), because kbase will be let through due to
+	 * locked ignore'
+	 */
+	err = kds_async_waitall(&kds_res_set,
+				&priv.kds_obtain_current_cb, &wake,
+				&cb_has_called, 1, shared, resource_list);
+	BUG_ON(err);
+	wait_event(wake, cb_has_called == true);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+
+			if (pl111_fb->bo == bo) {
+				DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+				DRM_DEBUG_KMS(" is being displayed, keeping\n");
+				/* There shouldn't be a previous buffer to release */
+				BUG_ON(pl111_crtc->old_kds_res_set);
+
+				if (kds_res_set == NULL) {
+					err = kds_async_waitall(&kds_res_set,
+							&priv.kds_obtain_current_cb,
+							&wake, &cb_has_called,
+							1, shared, resource_list);
+					BUG_ON(err);
+					wait_event(wake, cb_has_called == true);
+				}
+
+				/* Current buffer will need releasing on next flip */
+				pl111_crtc->old_kds_res_set = kds_res_set;
+
+				/*
+				* Clear kds_res_set, so a new kds_res_set is allocated
+				* for additional CRTCs
+				*/
+				kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+
+	/* kds_res_set will be NULL here if any CRTCs are displaying fb */
+	if (kds_res_set != NULL) {
+		DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+		DRM_DEBUG_KMS(" not being displayed, discarding\n");
+		/* They're not being displayed, release them */
+		kds_resource_set_release(&kds_res_set);
+	}
+
+	dma_buf_put(dma_buf);
+}
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0))
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	mutex_unlock(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#else
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/* Check for valid size. */
+	if (obj->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	BUG_ON(!dev->driver->gem_vm_ops);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+#endif
+
+	vma->vm_ops = dev->driver->gem_vm_ops;
+	vma->vm_private_data = obj;
+
+	/* Take a ref for this mapping of the object, so that the fault
+	* handler can dereference the mmap offset's pointer to the object.
+	* This reference is cleaned up by the corresponding vm_close
+	* (which should happen whether the vma was created by this call, or
+	* by a vm_open due to mremap or partial unmap or whatever).
+	*/
+	drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+	pl111_drm_vm_open_locked(dev, vma);
+#else
+	drm_vm_open_locked(dev, vma);
+#endif
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#endif /* KERNEL_VERSION */
+
+static void pl111_dma_buf_release(struct dma_buf *buf)
+{
+	/*
+	 * Need to release the dma_buf's reference on the gem object it was
+	 * exported from, and also clear the gem object's export_dma_buf
+	 * pointer to this dma_buf as it no longer exists
+	 */
+	struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
+	struct pl111_gem_bo *bo;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+			if (pl111_fb->bo == bo) {
+				kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+				pl111_crtc->old_kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+#endif
+	mutex_lock(&priv.export_dma_buf_lock);
+
+	obj->export_dma_buf = NULL;
+	drm_gem_object_unreference_unlocked(obj);
+
+	mutex_unlock(&priv.export_dma_buf_lock);
+}
+
+static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
+			dev, attach);
+
+	attach->priv = dev;
+
+	return 0;
+}
+
+static void pl111_dma_buf_detach(struct dma_buf *buf,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
+			attach);
+}
+
+/* Heavily from exynos_drm_dmabuf.c */
+static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
+						*attach,
+						enum dma_data_direction
+						direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int size, n_pages, nents;
+	struct scatterlist *s, *sg;
+	struct sg_table *sgt;
+	int ret, i;
+
+	DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+		      attach, bo);
+
+	/*
+	 * Nothing to do, if we are trying to map a dmabuf that has been imported.
+	 * Just return the existing sgt.
+	 */
+	if (obj->import_attach) {
+		BUG_ON(!bo->sgt);
+		return bo->sgt;
+	}
+
+	size = obj->size;
+	n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	if (bo->type & PL111_BOT_DMA) {
+		sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+		if (!sgt) {
+			DRM_ERROR("Failed to allocate sg_table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+		if (ret < 0) {
+			DRM_ERROR("Failed to allocate page table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		sg_dma_len(sgt->sgl) = size;
+		/* We use DMA coherent mappings for PL111_BOT_DMA so we must
+		 * use the virtual address returned at buffer allocation */
+		sg_set_buf(sgt->sgl, bo->backing_data.dma.fb_cpu_addr, size);
+		sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
+	} else { /* PL111_BOT_SHM */
+		struct page **pages;
+		int pg = 0;
+
+		mutex_lock(&dev->struct_mutex);
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev, "could not get pages: %ld\n",
+				PTR_ERR(pages));
+			return ERR_CAST(pages);
+		}
+		sgt = drm_prime_pages_to_sg(pages, n_pages);
+		if (sgt == NULL)
+			return ERR_PTR(-ENOMEM);
+
+		pl111_gem_sync_to_dma(bo);
+
+		/*
+		 * At this point the pages have been dma-mapped by either
+		 * get_pages() for non cached maps or pl111_gem_sync_to_dma()
+		 * for cached. So the physical addresses can be assigned
+		 * to the sg entries.
+		 * drm_prime_pages_to_sg() may have combined contiguous pages
+		 * into chunks so we assign the physical address of the first
+		 * page of a chunk to the chunk and check that the physical
+		 * addresses of the rest of the pages in that chunk are also
+		 * contiguous.
+		 */
+		sg = sgt->sgl;
+		nents = sgt->nents;
+
+		for_each_sg(sg, s, nents, i) {
+			int j, n_pages_in_chunk = sg_dma_len(s) >> PAGE_SHIFT;
+
+			sg_dma_address(s) = bo->backing_data.shm.dma_addrs[pg];
+
+			for (j = pg+1; j < pg+n_pages_in_chunk; j++) {
+				BUG_ON(bo->backing_data.shm.dma_addrs[j] !=
+						bo->backing_data.shm.dma_addrs[j-1]+PAGE_SIZE);
+			}
+
+			pg += n_pages_in_chunk;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+	}
+	bo->sgt = sgt;
+	return sgt;
+}
+
+static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					struct sg_table *sgt,
+					enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+			attach, bo);
+
+	sg_free_table(sgt);
+	kfree(sgt);
+	bo->sgt = NULL;
+}
+
+/*
+ * There isn't any operation here that can sleep or fail so this callback can
+ * be used for both kmap and kmap_atomic implementations.
+ */
+static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long pageno)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	void *vaddr = NULL;
+
+	/* Make sure we cannot access outside the memory range */
+	if (((pageno + 1) << PAGE_SHIFT) > bo->gem_object.size)
+		return NULL;
+
+	if (bo->type & PL111_BOT_DMA) {
+		vaddr = (bo->backing_data.dma.fb_cpu_addr +
+						(pageno << PAGE_SHIFT));
+	} else {
+		vaddr = page_address(bo->backing_data.shm.pages[pageno]);
+	}
+
+	return vaddr;
+}
+
+/*
+ * Find a scatterlist that starts in "start" and has "len"
+ * or return a NULL dma_handle.
+ */
+static dma_addr_t pl111_find_matching_sg(struct sg_table *sgt, size_t start,
+					size_t len)
+{
+	struct scatterlist *sg;
+	unsigned int count;
+	size_t size = 0;
+	dma_addr_t dma_handle = 0;
+
+	/* Find a scatterlist that starts in "start" and has "len"
+	* or return error */
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		if ((size == start) && (len == sg_dma_len(sg))) {
+			dma_handle = sg_dma_address(sg);
+			break;
+		}
+		size += sg_dma_len(sg);
+	}
+	return dma_handle;
+}
+
+static int pl111_dma_buf_begin_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return -EINVAL;
+
+	if (!(bo->type & PL111_BOT_SHM)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return -EINVAL;
+
+		dma_sync_single_range_for_cpu(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+	return 0;
+}
+
+static void pl111_dma_buf_end_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return;
+
+	if (!(bo->type & PL111_BOT_DMA)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return;
+
+		dma_sync_single_range_for_device(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+}
+
+static struct dma_buf_ops pl111_dma_buf_ops = {
+	.release = &pl111_dma_buf_release,
+	.attach = &pl111_dma_buf_attach,
+	.detach = &pl111_dma_buf_detach,
+	.map_dma_buf = &pl111_dma_buf_map_dma_buf,
+	.unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
+	.kmap_atomic = &pl111_dma_buf_kmap,
+	.kmap = &pl111_dma_buf_kmap,
+	.begin_cpu_access = &pl111_dma_buf_begin_cpu,
+	.end_cpu_access = &pl111_dma_buf_end_cpu,
+	.mmap = &pl111_dma_buf_mmap,
+};
+
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attachment;
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+	struct scatterlist *sgl;
+	struct sg_table *sgt;
+	dma_addr_t cont_phys;
+	int ret = 0;
+	int i;
+
+	DRM_DEBUG_KMS("DRM %s on dev=%p dma_buf=%p\n", __func__, dev, dma_buf);
+
+	/* is this one of own objects? */
+	if (dma_buf->ops == &pl111_dma_buf_ops) {
+		obj = dma_buf->priv;
+		/* is it from our device? */
+		if (obj->dev == dev) {
+			/*
+			* Importing dmabuf exported from our own gem increases
+			* refcount on gem itself instead of f_count of dmabuf.
+			*/
+			drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+			/* before v3.10.0 we assume the caller has taken a ref on the dma_buf
+			 * we don't want it for self-imported buffers so drop it here */
+			dma_buf_put(dma_buf);
+#endif
+
+			return obj;
+		}
+	}
+
+	attachment = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attachment))
+		return ERR_CAST(attachment);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we assume the caller has not taken a ref so we take one here */
+	get_dma_buf(dma_buf);
+#endif
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto err_buf_detach;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		DRM_ERROR("%s: failed to allocate buffer object.\n", __func__);
+		ret = -ENOMEM;
+		goto err_unmap_attach;
+	}
+
+	/* Find out whether the buffer is contiguous or not */
+	sgl = sgt->sgl;
+	cont_phys = sg_phys(sgl);
+	bo->type |= PL111_BOT_DMA;
+	for_each_sg(sgt->sgl, sgl, sgt->nents, i) {
+		dma_addr_t real_phys = sg_phys(sgl);
+		if (real_phys != cont_phys) {
+			bo->type &= ~PL111_BOT_DMA;
+			break;
+		}
+		cont_phys += (PAGE_SIZE - sgl->offset);
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	ret = drm_gem_private_object_init(dev, &bo->gem_object,
+					  dma_buf->size);
+	if (ret != 0) {
+		DRM_ERROR("DRM could not import DMA GEM obj\n");
+		goto err_free_buffer;
+	}
+#else
+	drm_gem_private_object_init(dev, &bo->gem_object, dma_buf->size);
+#endif
+
+	if (bo->type & PL111_BOT_DMA) {
+		bo->backing_data.dma.fb_cpu_addr = sg_virt(sgt->sgl);
+		bo->backing_data.dma.fb_dev_addr = sg_phys(sgt->sgl);
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, contiguous import\n", __func__, bo);
+	} else { /* PL111_BOT_SHM */
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, non contiguous import\n", __func__, bo);
+	}
+
+	bo->gem_object.import_attach = attachment;
+	bo->sgt = sgt;
+
+	return &bo->gem_object;
+
+err_free_buffer:
+	kfree(bo);
+	bo = NULL;
+err_unmap_attach:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+err_buf_detach:
+	dma_buf_detach(dma_buf, attachment);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we will have taken a ref so drop it here */
+	dma_buf_put(dma_buf);
+#endif
+	return ERR_PTR(ret);
+}
+
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				      struct drm_gem_object *obj, int flags)
+{
+	struct dma_buf *new_buf;
+	struct pl111_gem_bo *bo;
+	size_t size;
+
+	DRM_DEBUG("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
+	size = obj->size;
+
+	new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
+					flags | O_RDWR);
+	bo = PL111_BO_FROM_GEM(new_buf->priv);
+
+	/*
+	 * bo->gem_object.export_dma_buf not setup until after gem_prime_export
+	 * finishes
+	 */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Ensure that we hold the kds resource if it's the currently
+	 * displayed buffer.
+	 */
+	obtain_kds_if_currently_displayed(dev, bo, new_buf);
+#endif
+
+	DRM_DEBUG("Created dma_buf %p\n", new_buf);
+
+	return new_buf;
+}
+
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+				uint32_t handle, uint32_t flags, int *prime_fd)
+{
+	int result;
+	/*
+	 * This will re-use any existing exports, and calls
+	 * driver->gem_prime_export to do the first export when needed
+	 */
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	mutex_lock(&priv.export_dma_buf_lock);
+	result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
+						prime_fd);
+	mutex_unlock(&priv.export_dma_buf_lock);
+
+	return result;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
new file mode 100755
index 0000000..78c91c0
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_encoder.c
+ * Implementation of the encoder functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+	return true;
+}
+
+void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_commit(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_disable(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct pl111_drm_encoder *pl111_encoder =
+					PL111_ENCODER_FROM_ENCODER(encoder);
+
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(pl111_encoder);
+}
+
+const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = pl111_encoder_destroy,
+};
+
+const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.mode_fixup = pl111_encoder_helper_mode_fixup,
+	.prepare = pl111_encoder_helper_prepare,
+	.commit = pl111_encoder_helper_commit,
+	.mode_set = pl111_encoder_helper_mode_set,
+	.disable = pl111_encoder_helper_disable,
+};
+
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs)
+{
+	struct pl111_drm_encoder *pl111_encoder;
+
+	pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to allocated pl111_drm_encoder\n");
+		return NULL;
+	}
+
+	drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
+				DRM_MODE_ENCODER_DAC);
+
+	drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
+
+	pl111_encoder->encoder.possible_crtcs = possible_crtcs;
+
+	return pl111_encoder;
+}
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
new file mode 100755
index 0000000..f575c9e
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
@@ -0,0 +1,202 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_fb.c
+ * Implementation of the framebuffer functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_crtc.h>
+#include "pl111_drm.h"
+
+static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct pl111_drm_framebuffer *pl111_fb;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
+
+	pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
+
+	/*
+	 * Because flips are deferred, wait for all previous flips to complete
+	 */
+	wait_event(priv.wait_for_flips,
+			atomic_read(&priv.nr_flips_in_flight) == 0);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Release KDS resources if it's currently being displayed. Only occurs
+	 * when the last framebuffer is destroyed.
+	 */
+	list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb == framebuffer) {
+			/* Release the current buffers */
+			if (pl111_crtc->old_kds_res_set != NULL) {
+				DRM_DEBUG_KMS("Releasing KDS resources for ");
+				DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
+				kds_resource_set_release(
+					&pl111_crtc->old_kds_res_set);
+			}
+			pl111_crtc->old_kds_res_set = NULL;
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock,
+								flags);
+	}
+#endif
+	drm_framebuffer_cleanup(framebuffer);
+
+	if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
+		drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
+
+	kfree(pl111_fb);
+
+	DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
+}
+
+static int pl111_fb_create_handle(struct drm_framebuffer *fb,
+				struct drm_file *file_priv,
+				unsigned int *handle)
+{
+	struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
+
+	if (bo == NULL)
+		return -EINVAL;
+
+	return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+}
+
+const struct drm_framebuffer_funcs fb_funcs = {
+	.destroy = pl111_fb_destroy,
+	.create_handle = pl111_fb_create_handle,
+};
+
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct pl111_drm_framebuffer *pl111_fb = NULL;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_gem_object *gem_obj;
+	struct pl111_gem_bo *bo;
+	int err = 0;
+	size_t min_size;
+	int bpp;
+	int depth;
+
+	pr_info("DRM %s\n", __func__);
+	gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (gem_obj == NULL) {
+		DRM_ERROR("Could not get gem obj from handle to create fb\n");
+		err = -ENOENT;
+		goto error;
+	}
+
+	bo = PL111_BO_FROM_GEM(gem_obj);
+	drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp);
+
+	if (mode_cmd->pitches[0] < mode_cmd->width * (bpp >> 3)) {
+		DRM_ERROR("bad pitch %u for plane 0\n", mode_cmd->pitches[0]);
+		err = -EINVAL;
+		goto error;
+	}
+
+	min_size = (mode_cmd->height - 1) * mode_cmd->pitches[0]
+				+ mode_cmd->width * (bpp >> 3);
+
+	if (bo->gem_object.size < min_size) {
+		DRM_ERROR("gem obj size < min size\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* We can't scan out SHM so we can't create an fb for it */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Can't create FB for non-scanout buffer\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	switch ((char)(mode_cmd->pixel_format & 0xFF)) {
+	case 'Y':
+	case 'U':
+	case 'V':
+	case 'N':
+	case 'T':
+		DRM_ERROR("YUV formats not supported\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
+	if (pl111_fb == NULL) {
+		DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
+		err = -ENOMEM;
+		goto error;
+	}
+	fb = &pl111_fb->fb;
+
+	err = drm_framebuffer_init(dev, fb, &fb_funcs);
+	if (err) {
+		DRM_ERROR("drm_framebuffer_init failed\n");
+		kfree(fb);
+		fb = NULL;
+		goto error;
+	}
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	/* The only framebuffer formats supported by pl111
+	 * are 16 bpp or 32 bpp with 24 bit depth.
+	 * See clcd_enable()
+	 */
+	if (!((fb->bits_per_pixel == 16) ||
+		  (fb->bits_per_pixel == 32 && fb->depth == 24))) {
+		DRM_DEBUG_KMS("unsupported pixel format bpp=%d, depth=%d\n", fb->bits_per_pixel, fb->depth);
+		drm_framebuffer_cleanup(fb);
+		kfree(fb);
+		fb = NULL;
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb->bo = bo;
+
+	DRM_DEBUG_KMS("Created fb 0x%p with gem_obj 0x%p physaddr=0x%.8x\n",
+			fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
+
+	return fb;
+
+error:
+	if (gem_obj != NULL)
+		drm_gem_object_unreference_unlocked(gem_obj);
+
+	return ERR_PTR(err);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
new file mode 100755
index 0000000..494baa0
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_funcs.h
+ * Function prototypes for PL111 DRM
+ */
+
+#ifndef PL111_DRM_FUNCS_H_
+#define PL111_DRM_FUNCS_H_
+
+/* Platform Initialisation */
+int pl111_drm_init(struct platform_device *dev);
+void pl111_drm_exit(struct platform_device *dev);
+
+/* KDS Callbacks */
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
+
+/* CRTC Functions */
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
+struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
+void pl111_crtc_destroy(struct drm_crtc *crtc);
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+					struct drm_framebuffer *fb);
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+			struct drm_framebuffer *fb, bool page_flip,
+			struct drm_pending_vblank_event *event);
+
+/* Common IRQ handler */
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height);
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y);
+
+/* Connector Functions */
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
+void pl111_connector_destroy(struct drm_connector *connector);
+struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
+								*dev);
+
+/* Encoder Functions */
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs);
+struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
+							int possible_crtcs);
+void pl111_encoder_destroy(struct drm_encoder *encoder);
+
+/* Frame Buffer Functions */
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd);
+
+/* VMA Functions */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
+struct page **get_pages(struct drm_gem_object *obj);
+void put_pages(struct drm_gem_object *obj, struct page **pages);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma);
+void pl111_gem_vm_open(struct vm_area_struct *vma);
+void pl111_gem_vm_close(struct vm_area_struct *vma);
+#endif
+
+/* Suspend Functions */
+int pl111_drm_resume(struct drm_device *dev);
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
+
+/* GEM Functions */
+int pl111_dumb_create(struct drm_file *file_priv,
+			struct drm_device *dev,
+			struct drm_mode_create_dumb *args);
+int pl111_dumb_destroy(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle);
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset);
+void pl111_gem_free_object(struct drm_gem_object *obj);
+
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+			struct vm_area_struct *vma, size_t size);
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff);
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo);
+
+/* DMA BUF Functions */
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf);
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+			uint32_t handle, uint32_t flags, int *prime_fd);
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				struct drm_gem_object *obj, int flags);
+
+/* Pl111 Functions */
+void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
+					*flip_res, struct drm_framebuffer *fb);
+int clcd_disable(struct drm_crtc *crtc);
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
+int pl111_amba_remove(struct amba_device *dev);
+
+int pl111_device_init(struct drm_device *dev);
+void pl111_device_fini(struct drm_device *dev);
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing);
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode);
+#endif /* PL111_DRM_FUNCS_H_ */
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
new file mode 100755
index 0000000..13fb256
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
@@ -0,0 +1,476 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_gem.c
+ * Implementation of the GEM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0))
+#include <drm/drm_vma_manager.h>
+#endif
+
+void pl111_gem_free_object(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	struct drm_device *dev = obj->dev;
+	DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
+
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (obj->import_attach)
+		drm_prime_gem_destroy(obj, bo->sgt);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map != NULL)
+		drm_gem_free_mmap_offset(obj);
+#else
+	drm_gem_free_mmap_offset(obj);
+#endif
+	/*
+	 * Only free the backing memory if the object has not been imported.
+	 * If it has been imported, the exporter is in charge to free that
+	 * once dmabuf's refcount becomes 0.
+	 */
+	if (obj->import_attach)
+		goto imported_out;
+
+	if (bo->type & PL111_BOT_DMA) {
+		dma_free_writecombine(dev->dev, obj->size,
+				bo->backing_data.dma.fb_cpu_addr,
+				bo->backing_data.dma.fb_dev_addr);
+	} else if (bo->backing_data.shm.pages != NULL) {
+		put_pages(obj, bo->backing_data.shm.pages);
+	}
+
+imported_out:
+	drm_gem_object_release(obj);
+
+	kfree(bo);
+
+	DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
+}
+
+static int pl111_gem_object_create(struct drm_device *dev, u64 size,
+				   u32 flags, struct drm_file *file_priv,
+				   u32 *handle)
+{
+	int ret = 0;
+	struct pl111_gem_bo *bo = NULL;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (bo == NULL) {
+		ret = -ENOMEM;
+		goto finish;
+	}
+
+	bo->type = flags;
+
+#ifndef ARCH_HAS_SG_CHAIN
+	/*
+	 * If the ARCH can't chain we can't have non-contiguous allocs larger
+	 * than a single sg can hold.
+	 * In this case we fall back to using contiguous memory
+	 */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		long unsigned int n_pages =
+				PAGE_ALIGN(size) >> PAGE_SHIFT;
+		if (n_pages > SG_MAX_SINGLE_ALLOC) {
+			bo->type |= PL111_BOT_DMA;
+			/*
+			 * Non-contiguous allocation request changed to
+			 * contigous
+			 */
+			DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
+					n_pages, SG_MAX_SINGLE_ALLOC);
+		}
+	}
+#endif
+	if (bo->type & PL111_BOT_DMA) {
+		/* scanout compatible - use physically contiguous buffer */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_attrs(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL,
+					&attrs);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_attrs failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#else
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_writecombine(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_writecombine failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+		ret = drm_gem_private_object_init(dev, &bo->gem_object,
+							size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not initialise GEM object\n");
+			goto free_dma;
+		}
+#else
+		drm_gem_private_object_init(dev, &bo->gem_object, size);
+#endif
+
+	} else { /* PL111_BOT_SHM */
+		/* not scanout compatible - use SHM backed object */
+		ret = drm_gem_object_init(dev, &bo->gem_object, size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not init SHM backed GEM obj\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+		DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
+	}
+
+	DRM_DEBUG("s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
+		size, flags,
+		(bo->type & PL111_BOT_DMA) ? "physaddr" : "shared page array",
+		(bo->type & PL111_BOT_DMA) ?
+			(unsigned long)bo->backing_data.dma.fb_dev_addr:
+			(unsigned long)bo->backing_data.shm.pages,
+			bo->type);
+
+	ret = drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+	if (ret != 0) {
+		DRM_ERROR("DRM failed to create GEM handle\n");
+		goto obj_release;
+	}
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&bo->gem_object);
+	
+	return 0;
+
+obj_release:
+	drm_gem_object_release(&bo->gem_object);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+free_dma:
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	if (bo->type & PL111_BOT_DMA) {
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr,
+			&attrs);
+		}
+#else
+	if (bo->type & PL111_BOT_DMA)
+		dma_free_writecombine(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr);
+#endif
+free_bo:
+	kfree(bo);
+finish:
+	return ret;
+}
+
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct drm_pl111_gem_create *args = data;
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags & ~PL111_BOT_MASK) {
+		DRM_ERROR("wrong flags: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("gem_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size, args->flags, file_priv,
+					&args->handle);
+}
+
+int pl111_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags) {
+		DRM_ERROR("flags must be zero: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size,
+				       PL111_BOT_DMA | PL111_BOT_UNCACHED,
+				       file_priv, &args->handle);
+}
+
+int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
+		uint32_t handle)
+{
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset)
+{
+	struct drm_gem_object *obj;
+	int ret = 0;
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	/* GEM does all our handle to object mapping */
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (obj == NULL) {
+		ret = -ENOENT;
+		goto fail;
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map == NULL) {
+		ret = drm_gem_create_mmap_offset(obj);
+		if (ret != 0) {
+			drm_gem_object_unreference_unlocked(obj);
+			goto fail;
+		}
+	}
+#else
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret != 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		goto fail;
+	}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	*offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
+#else
+	*offset = drm_vma_node_offset_addr(&obj->vma_node);
+	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+#endif
+
+	drm_gem_object_unreference_unlocked(obj);
+fail:
+	return ret;
+}
+
+/* sync the buffer for DMA access */
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED)) {
+		int i, npages = bo->gem_object.size >> PAGE_SHIFT;
+		struct page **pages = bo->backing_data.shm.pages;
+		bool dirty = false;
+
+		for (i = 0; i < npages; i++) {
+			if (!bo->backing_data.shm.dma_addrs[i]) {
+				DRM_DEBUG("%s: dma map page=%d bo=%p\n", __func__, i, bo);
+				 bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(dev->dev, pages[i], 0,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+				dirty = true;
+			}
+		}
+
+		if (dirty) {
+			DRM_DEBUG("%s: zap ptes (and flush cache) bo=%p\n", __func__, bo);
+			/*
+			 * TODO MIDEGL-1813
+			 * 
+			 * Use flush_cache_page() and outer_flush_range() to
+			 * flush only the user space mappings of the dirty pages
+			 */
+			flush_cache_all();
+			outer_flush_all();
+			unmap_mapping_range(bo->gem_object.filp->f_mapping, 0,
+					bo->gem_object.size, 1);
+		}
+	}
+}
+
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	/*
+	 * TODO MIDEGL-1808
+	 * 
+	 * The following check was meant to detect if the CPU is trying to access
+	 * a buffer that is currently mapped for DMA accesses, which is illegal
+	 * as described by the DMA-API.
+	 * 
+	 * However, some of our tests are trying to do that, which triggers the message
+	 * below and avoids dma-unmapping the pages not to annoy the DMA device but that
+	 * leads to the test failing because of caches not being properly flushed.
+	 */
+
+	/*
+	if (bo->sgt) {
+		DRM_ERROR("%s: the CPU is trying to access a dma-mapped buffer\n",  __func__);
+		return;
+	}
+	*/
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED) &&
+	     bo->backing_data.shm.dma_addrs[pgoff]) {
+		DRM_DEBUG("%s: unmap bo=%p (s=%d), paddr=%08x\n",
+			  __func__, bo,  bo->gem_object.size,
+			  bo->backing_data.shm.dma_addrs[pgoff]);
+		dma_unmap_page(dev->dev, bo->backing_data.shm.dma_addrs[pgoff],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		bo->backing_data.shm.dma_addrs[pgoff] = 0;
+	}
+}
+
+/* Based on omapdrm driver */
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+		 struct vm_area_struct *vma, size_t size)
+{
+	DRM_DEBUG("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p type=%08x\n",
+			__func__, obj, bo, bo->type);
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	if (bo->type & PL111_BOT_WC) {
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	} else if (bo->type & PL111_BOT_CACHED) {
+		/*
+		 * Objects that do not have a filp (DMA backed) can't be
+		 * mapped as cached now. Write-combine should be enough.
+		 */
+		if (WARN_ON(!obj->filp))
+			return -EINVAL;
+
+		/*
+		 * As explained in Documentation/dma-buf-sharing.txt
+		 * we need this trick so that we can manually zap ptes
+		 * in order to fake coherency.
+		 */
+		fput(vma->vm_file);
+		get_file(obj->filp);
+		vma->vm_file = obj->filp;
+
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	} else { /* PL111_BOT_UNCACHED */
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+	}
+	return 0;
+}
+
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
+{
+	int ret;
+	struct drm_file *priv = file_priv->private_data;
+	struct drm_device *dev = priv->minor->dev;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_hash_item *hash;
+	struct drm_local_map *map = NULL;
+#else
+	struct drm_vma_offset_node *node;
+#endif
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	obj = map->handle;
+#else
+	node = drm_vma_offset_exact_lookup(dev->vma_offset_manager,
+			vma->vm_pgoff,
+			vma_pages(vma));
+	obj = container_of(node, struct drm_gem_object, vma_node);
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p bo->type 0x%08x\n", __func__, bo, bo->type);
+
+	/* for an imported buffer we let the exporter handle the mmap */
+	if (obj->import_attach)
+		return dma_buf_mmap(obj->import_attach->dmabuf, vma, 0);
+
+	ret = drm_gem_mmap(file_priv, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap\n");
+		return ret;
+	}
+
+	/* Our page fault handler uses the page offset calculated from the vma,
+	 * so we need to remove the gem cookie offset specified in the call.
+	 */
+	vma->vm_pgoff = 0;
+
+	return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
new file mode 100755
index 0000000..1d613d0
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
@@ -0,0 +1,417 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_pl111.c
+ * PL111 specific functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+/* This can't be called from IRQ context, due to clk_get() and board->enable */
+static int clcd_enable(struct drm_framebuffer *fb)
+{
+	__u32 cntl;
+	struct clcd_board *board;
+
+	pr_info("DRM %s\n", __func__);
+
+	clk_prepare_enable(priv.clk);
+
+	/* Enable and Power Up */
+	cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+	DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
+	if (fb->bits_per_pixel == 16)
+		cntl |= CNTL_LCDBPP16_565;
+	else if (fb->bits_per_pixel == 32 && fb->depth == 24)
+		cntl |= CNTL_LCDBPP24;
+	else
+		BUG_ON(1);
+
+	cntl |= CNTL_BGR;
+
+	writel(cntl, priv.regs + CLCD_PL111_CNTL);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->enable)
+			board->enable(NULL);
+	}
+
+	/* Enable Interrupts */
+	writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
+
+	return 0;
+}
+
+int clcd_disable(struct drm_crtc *crtc)
+{
+	struct clcd_board *board;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	pr_info("DRM %s\n", __func__);
+
+	/* Disable Interrupts */
+	writel(0x00000000, priv.regs + CLCD_PL111_IENB);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->disable)
+			board->disable(NULL);
+	}
+
+	/* Disable and Power Down */
+	writel(0, priv.regs + CLCD_PL111_CNTL);
+
+	/* Disable clock */
+	clk_disable_unprepare(priv.clk);
+
+	pl111_crtc->last_bpp = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	/* Release the previous buffers */
+	if (pl111_crtc->old_kds_res_set != NULL)
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	return 0;
+}
+
+/*
+ * To avoid a possible race where "pl111_crtc->current_update_res" has
+ * been updated (non NULL) but the corresponding scanout buffer has not been
+ * written to the base registers we must always call this function holding
+ * the "base_update_lock" spinlock with IRQs disabled (spin_lock_irqsave()).
+ */
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	struct drm_framebuffer *fb;
+	struct pl111_gem_bo *bo;
+	size_t min_size;
+	fb = flip_res->fb;
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+
+
+	min_size = (fb->height - 1) * fb->pitches[0]
+	         + fb->width * (fb->bits_per_pixel >> 3);
+
+	BUG_ON(bo->gem_object.size < min_size);
+
+	/* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+	BUG_ON(bo->type != PL111_BOT_DMA);
+
+	/*
+	 * Note the buffer for releasing after IRQ, and don't allow any more
+	 * updates until then.
+	 *
+	 * This clcd controller latches the new address on next vsync. Address
+	 * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
+	 * wait for that before releasing the previous buffer's kds
+	 * resources. Otherwise, we'll allow writers to write to the old buffer
+	 * whilst it is still being displayed
+	 */
+	pl111_crtc->current_update_res = flip_res;
+
+	DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
+			fb, bo, bo->backing_data.dma.fb_dev_addr);
+	
+	if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
+		DRM_ERROR("Could not get vblank reference for crtc %d\n",
+				pl111_crtc->crtc_index);
+
+	/* Set the scanout buffer */
+	writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
+	writel(bo->backing_data.dma.fb_dev_addr +
+		((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
+}
+
+void
+show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
+					struct drm_framebuffer *fb)
+{
+	unsigned long irq_flags;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+	if (list_empty(&pl111_crtc->update_queue) &&
+			!pl111_crtc->current_update_res) {
+		do_flip_to_res(flip_res);
+	} else {
+		/*
+		 * Enqueue the update to occur on a future IRQ
+		 * This only happens on triple-or-greater buffering
+		 */
+		DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
+				fb);
+		list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
+			pl111_crtc->last_bpp != fb->bits_per_pixel ||
+			!drm_mode_equal(pl111_crtc->new_mode,
+					pl111_crtc->current_mode))) {
+		struct clcd_regs timing;
+
+		pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
+
+		DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
+				timing.tim0, timing.tim1, timing.tim2,
+				timing.tim3, timing.pixclock);
+
+		/* This is the actual mode setting part */
+		clk_set_rate(priv.clk, timing.pixclock);
+
+		writel(timing.tim0, priv.regs + CLCD_TIM0);
+		writel(timing.tim1, priv.regs + CLCD_TIM1);
+		writel(timing.tim2, priv.regs + CLCD_TIM2);
+		writel(timing.tim3, priv.regs + CLCD_TIM3);
+
+		clcd_enable(fb);
+		pl111_crtc->last_bpp = fb->bits_per_pixel;
+	}
+
+	if (!flip_res->page_flip) {
+		drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
+		pl111_crtc->current_mode = pl111_crtc->new_mode;
+		pl111_crtc->new_mode = NULL;
+	}
+
+	BUG_ON(!pl111_crtc->current_mode);
+
+	/*
+	 * If IRQs weren't enabled before, they are now. This will eventually
+	 * cause flip_res to be released via pl111_common_irq, which updates
+	 * every time the Base Address is latched (i.e. every frame, regardless
+	 * of whether we update the base address or not)
+	 */
+}
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+	u32 irq_stat;
+	struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
+
+	irq_stat = readl(priv.regs + CLCD_PL111_MIS);
+
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE)
+		pl111_common_irq(pl111_crtc);
+
+	/* Clear the interrupt once done */
+	writel(irq_stat, priv.regs + CLCD_PL111_ICR);
+
+	return IRQ_HANDLED;
+}
+
+int pl111_device_init(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int ret;
+
+	if (priv == NULL) {
+		pr_err("%s no private data\n", __func__);
+		return -EINVAL;
+	}
+
+	if (priv->amba_dev == NULL) {
+		pr_err("%s no amba device found\n", __func__);
+		return -EINVAL;
+	}
+
+	/* set up MMIO for register access */
+	priv->mmio_start = priv->amba_dev->res.start;
+	priv->mmio_len = resource_size(&priv->amba_dev->res);
+
+	DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
+			priv->mmio_len);
+
+	priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
+	if (priv->regs == NULL) {
+		pr_err("%s failed mmio\n", __func__);
+		return -EINVAL;
+	}
+
+	/* turn off interrupts */
+	writel(0, priv->regs + CLCD_PL111_IENB);
+
+	ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
+				"pl111_irq_handler", NULL);
+	if (ret != 0) {
+		pr_err("%s failed %d\n", __func__, ret);
+		goto out_mmio;
+	}
+
+	goto finish;
+
+out_mmio:
+	iounmap(priv->regs);
+finish:
+	DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
+	return ret;
+}
+
+void pl111_device_fini(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	u32 cntl;
+
+	if (priv == NULL || priv->regs == NULL)
+		return;
+
+	free_irq(priv->amba_dev->irq[0], NULL);
+
+	cntl = readl(priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDEN;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDPWR;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	iounmap(priv->regs);
+}
+
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
+{
+	struct clcd_board *board = dev->dev.platform_data;
+	int ret;
+	pr_info("DRM %s\n", __func__);
+
+	if (!board)
+		dev_warn(&dev->dev, "board data not available\n");
+
+	ret = amba_request_regions(dev, NULL);
+	if (ret != 0) {
+		DRM_ERROR("CLCD: unable to reserve regs region\n");
+		goto out;
+	}
+
+	priv.amba_dev = dev;
+
+	priv.clk = clk_get(&priv.amba_dev->dev, NULL);
+	if (IS_ERR(priv.clk)) {
+		DRM_ERROR("CLCD: unable to get clk.\n");
+		ret = PTR_ERR(priv.clk);
+		goto clk_err;
+	}
+
+	return 0;
+
+clk_err:
+	amba_release_regions(dev);
+out:
+	return ret;
+}
+
+int pl111_amba_remove(struct amba_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	clk_put(priv.clk);
+
+	amba_release_regions(dev);
+
+	priv.amba_dev = NULL;
+
+	return 0;
+}
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+	unsigned int cpl;
+
+	memset(timing, 0, sizeof(struct clcd_regs));
+
+	ppl = (mode->hdisplay / 16) - 1;
+	hsw = mode->hsync_end - mode->hsync_start - 1;
+	hfp = mode->hsync_start - mode->hdisplay - 1;
+	hbp = mode->htotal - mode->hsync_end - 1;
+
+	lpp = mode->vdisplay - 1;
+	vsw = mode->vsync_end - mode->vsync_start - 1;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	cpl = mode->hdisplay - 1;
+
+	timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
+	timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
+	timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
+	timing->tim3 = 0;
+
+	timing->pixclock = mode->clock * 1000;
+}
+
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+
+	ppl = (timing->tim0 >> 2) & 0x3f;
+	hsw = (timing->tim0 >> 8) & 0xff;
+	hfp = (timing->tim0 >> 16) & 0xff;
+	hbp = (timing->tim0 >> 24) & 0xff;
+
+	lpp = timing->tim1 & 0x3ff;
+	vsw = (timing->tim1 >> 10) & 0x3f;
+	vfp = (timing->tim1 >> 16) & 0xff;
+	vbp = (timing->tim1 >> 24) & 0xff;
+
+	mode->hdisplay    = (ppl + 1) * 16;
+	mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
+	mode->hsync_end   = ((ppl + 1) * 16) + hfp + hsw + 2;
+	mode->htotal      = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
+	mode->hskew       = 0;
+
+	mode->vdisplay    = lpp + 1;
+	mode->vsync_start = lpp + vfp + 1;
+	mode->vsync_end   = lpp + vfp + vsw + 2;
+	mode->vtotal      = lpp + vfp + vsw + vbp + 2;
+
+	mode->flags = 0;
+
+	mode->width_mm = 0;
+	mode->height_mm = 0;
+
+	mode->clock = timing->pixclock / 1000;
+	mode->hsync = timing->pixclock / mode->htotal;
+	mode->vrefresh = mode->hsync / mode->vtotal;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
new file mode 100755
index 0000000..9d5ec0c
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
@@ -0,0 +1,151 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_platform.c
+ * Implementation of the Linux platform device entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+static int pl111_platform_drm_suspend(struct platform_device *dev,
+					pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+static int pl111_platform_drm_resume(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_platform_drm_probe(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return pl111_drm_init(dev);
+}
+
+static int pl111_platform_drm_remove(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	pl111_drm_exit(dev);
+
+	return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+	{
+	.id = 0x00041110,
+	.mask = 0x000ffffe,
+	},
+	{0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+	.drv = {
+		.name = "clcd-pl11x",
+		},
+	.probe = pl111_amba_probe,
+	.remove = pl111_amba_remove,
+	.id_table = pl111_id_table,
+};
+
+static struct platform_driver platform_drm_driver = {
+	.probe = pl111_platform_drm_probe,
+	.remove = pl111_platform_drm_remove,
+	.suspend = pl111_platform_drm_suspend,
+	.resume = pl111_platform_drm_resume,
+	.driver = {
+			.owner = THIS_MODULE,
+			.name = DRIVER_NAME,
+		},
+};
+
+static const struct platform_device_info pl111_drm_pdevinfo = {
+	.name = DRIVER_NAME,
+	.id = -1,
+	.dma_mask = ~0UL
+};
+
+static struct platform_device *pl111_drm_device;
+
+static int __init pl111_platform_drm_init(void)
+{
+	int ret;
+
+	pr_info("DRM %s\n", __func__);
+
+	pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
+	if (pl111_drm_device == NULL) {
+		pr_err("DRM platform_device_register_full() failed\n");
+		return -ENOMEM;
+	}
+
+	ret = amba_driver_register(&pl111_amba_driver);
+	if (ret != 0) {
+		pr_err("DRM amba_driver_register() failed %d\n", ret);
+		goto err_amba_reg;
+	}
+
+	ret = platform_driver_register(&platform_drm_driver);
+	if (ret != 0) {
+		pr_err("DRM platform_driver_register() failed %d\n", ret);
+		goto err_pdrv_reg;
+	}
+
+	return 0;
+
+err_pdrv_reg:
+	amba_driver_unregister(&pl111_amba_driver);
+err_amba_reg:
+	platform_device_unregister(pl111_drm_device);
+
+	return ret;
+}
+
+static void __exit pl111_platform_drm_exit(void)
+{
+	pr_info("DRM %s\n", __func__);
+
+	platform_device_unregister(pl111_drm_device);
+	amba_driver_unregister(&pl111_amba_driver);
+	platform_driver_unregister(&platform_drm_driver);
+}
+
+#ifdef MODULE
+module_init(pl111_platform_drm_init);
+#else
+late_initcall(pl111_platform_drm_init);
+#endif
+module_exit(pl111_platform_drm_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE(DRIVER_LICENCE);
+MODULE_ALIAS(DRIVER_ALIAS);
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
new file mode 100755
index 0000000..d033566
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_suspend.c
+ * Implementation of the suspend/resume functions for PL111 DRM
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_drm_resume(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
new file mode 100755
index 0000000..ff602ef
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_vma.c
+ * Implementation of the VM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ */
+static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+				bool dirty, bool accessed)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	for (i = 0; i < npages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+		if (accessed)
+			mark_page_accessed(pages[i]);
+		/* Undo the reference we took when populating the table */
+		page_cache_release(pages[i]);
+	}
+	drm_free_large(pages);
+}
+
+void put_pages(struct drm_gem_object *obj, struct page **pages)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	_drm_gem_put_pages(obj, pages, true, true);
+	if (bo->backing_data.shm.dma_addrs) {
+		for (i = 0; i < npages; i++) {
+			/* Filter pages unmapped because of CPU accesses */
+			if (!bo->backing_data.shm.dma_addrs[i])
+				continue;
+			if (!dma_mapping_error(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i])) {
+				dma_unmap_page(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i],
+					PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+			}
+		}
+		kfree(bo->backing_data.shm.dma_addrs);
+		bo->backing_data.shm.dma_addrs = NULL;
+	}
+}
+
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
+					gfp_t gfpmask)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct page *p, **pages;
+	int i, npages;
+
+	/* This is the shared memory object that backs the GEM resource */
+	inode = obj->filp->f_path.dentry->d_inode;
+	mapping = inode->i_mapping;
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (pages == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gfpmask |= mapping_gfp_mask(mapping);
+
+	for (i = 0; i < npages; i++) {
+		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		if (IS_ERR(p))
+			goto fail;
+		pages[i] = p;
+
+		/*
+		 * There is a hypothetical issue w/ drivers that require
+		 * buffer memory in the low 4GB.. if the pages are un-
+		 * pinned, and swapped out, they can end up swapped back
+		 * in above 4GB.  If pages are already in memory, then
+		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
+		 * even if the already in-memory page disobeys the mask.
+		 *
+		 * It is only a theoretical issue today, because none of
+		 * the devices with this limitation can be populated with
+		 * enough memory to trigger the issue.  But this BUG_ON()
+		 * is here as a reminder in case the problem with
+		 * shmem_read_mapping_page_gfp() isn't solved by the time
+		 * it does become a real issue.
+		 *
+		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		 */
+		BUG_ON((gfpmask & __GFP_DMA32) &&
+			(page_to_pfn(p) >= 0x00100000UL));
+	}
+
+	return pages;
+
+fail:
+	while (i--)
+		page_cache_release(pages[i]);
+
+	drm_free_large(pages);
+	return ERR_PTR(PTR_ERR(p));
+}
+
+struct page **get_pages(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (bo->backing_data.shm.pages == NULL) {
+		struct page **p;
+		int npages = obj->size >> PAGE_SHIFT;
+		int i;
+
+		p = _drm_gem_get_pages(obj, GFP_KERNEL);
+		if (IS_ERR(p))
+			return ERR_PTR(-ENOMEM);
+
+		bo->backing_data.shm.pages = p;
+
+		if (bo->backing_data.shm.dma_addrs == NULL) {
+			bo->backing_data.shm.dma_addrs =
+				kzalloc(npages * sizeof(dma_addr_t),
+					GFP_KERNEL);
+			if (bo->backing_data.shm.dma_addrs == NULL)
+				goto error_out;
+		}
+
+		if (!(bo->type & PL111_BOT_CACHED)) {
+			for (i = 0; i < npages; ++i) {
+				bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+				if (dma_mapping_error(obj->dev->dev,
+						bo->backing_data.shm.dma_addrs[i]))
+					goto error_out;
+			}
+		}
+	}
+
+	return bo->backing_data.shm.pages;
+
+error_out:
+	put_pages(obj, bo->backing_data.shm.pages);
+	bo->backing_data.shm.pages = NULL;
+	return ERR_PTR(-ENOMEM);
+}
+
+/* END drivers/staging/omapdrm/omap_gem_helpers.c */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page **pages;
+	unsigned long pfn;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * Our mmap calls setup a valid vma->vm_pgoff
+	 * so we can use vmf->pgoff
+	 */
+
+	if (bo->type & PL111_BOT_DMA) {
+		pfn = (bo->backing_data.dma.fb_dev_addr >> PAGE_SHIFT) +
+				vmf->pgoff;
+	} else { /* PL111_BOT_SHM */
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev,
+				"could not get pages: %ld\n", PTR_ERR(pages));
+			ret = PTR_ERR(pages);
+			goto error;
+		}
+		pfn = page_to_pfn(pages[vmf->pgoff]);
+		pl111_gem_sync_to_cpu(bo, vmf->pgoff);
+	}
+
+	DRM_DEBUG("bo=%p physaddr=0x%.8x for offset 0x%x\n",
+				bo, PFN_PHYS(pfn), PFN_PHYS(vmf->pgoff));
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+error:
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+/*
+ * The core drm_vm_ functions in kernel 3.4 are not ready
+ * to handle dma_buf cases where vma->vm_file->private_data
+ * cannot be accessed to get the device.
+ * 
+ * We use these functions from 3.5 instead where the device
+ * pointer is passed explicitly.
+ *
+ * However they aren't exported from the kernel until 3.10
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *vma_entry;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_inc(&dev->vma_count);
+
+	vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
+	if (vma_entry) {
+		vma_entry->vma = vma;
+		vma_entry->pid = current->pid;
+		list_add(&vma_entry->head, &dev->vmalist);
+	}
+}
+
+void pl111_drm_vm_close_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *pt, *temp;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_dec(&dev->vma_count);
+
+	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
+		if (pt->vma == vma) {
+			list_del(&pt->head);
+			kfree(pt);
+			break;
+		}
+	}
+}
+
+void pl111_gem_vm_open(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+
+	drm_gem_object_reference(obj);
+
+	mutex_lock(&obj->dev->struct_mutex);
+	pl111_drm_vm_open_locked(obj->dev, vma);
+	mutex_unlock(&obj->dev->struct_mutex);
+}
+
+void pl111_gem_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	pl111_drm_vm_close_locked(obj->dev, vma);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+}
+#endif
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/pl111/sconscript b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/sconscript
new file mode 100755
index 0000000..5c47de7
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/pl111/sconscript
@@ -0,0 +1,52 @@
+#
+# (C) COPYRIGHT 2010-2013, 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import shutil
+Import('env')
+
+# Generate a build environment for the integration tests, taking a copy of the top-level build environment 
+# (env) as a start.
+drm_env = env.Clone()
+
+# Xorg uses C++ style comments and 'inline' keyword
+if '-std=c89' in drm_env['CFLAGS']:
+	drm_env['CFLAGS'].remove('-std=c89')
+
+# X11 generates a lot of warnings
+if '-Werror' in drm_env['CCFLAGS']:
+	drm_env['CCFLAGS'].remove('-Werror')
+
+#remove the 'lib'prefix
+drm_env['LIBPREFIX'] = ''
+
+src = Glob('*.c')
+
+if drm_env.GetOption('clean') :
+	drm_env.Execute(Action("make clean", 'clean [pl111]'))
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [])
+else:
+	# The target is mali_drm.ko, built from the source in pl111_drm/pl111, via the action makeAction
+	# mali_drm.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+	# kernel build system, after which it can be installed to the directory specified if
+	# "libs_install" is set; this is done by LibTarget.
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} && cp pl111_drm.ko $STATIC_LIB_PATH/mali_drm.ko", '$MAKECOMSTR')
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [makeAction])
+
+# need Module.symvers from drm.ko
+#drm_env.Depends('$STATIC_LIB_PATH/pl111_drm.ko', '$STATIC_LIB_PATH/drm.ko')
+
+drm_env.KernelObjTarget('x11', cmd)
+
diff --git a/midgard/r12p0/kernel/drivers/gpu/drm/sconscript b/midgard/r12p0/kernel/drivers/gpu/drm/sconscript
new file mode 100755
index 0000000..a90fa89
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/drm/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if 'x11' in env['winsys'] or 'gbm' in env['winsys']:
+	# pl111_drm isn't released so only try to build it if it's there
+	if Glob('pl111/sconscript') and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
+		SConscript('pl111/sconscript')
+
+#SConscript('dummy_drm/sconscript')
diff --git a/midgard/r12p0/kernel/drivers/gpu/sconscript b/midgard/r12p0/kernel/drivers/gpu/sconscript
new file mode 100755
index 0000000..729dbda
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/gpu/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+SConscript('arm/sconscript')
+
+if Glob('drm/sconscript'):
+	SConscript('drm/sconscript')
diff --git a/midgard/r12p0/kernel/drivers/sconscript b/midgard/r12p0/kernel/drivers/sconscript
new file mode 100755
index 0000000..dd6a637
--- /dev/null
+++ b/midgard/r12p0/kernel/drivers/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+if Glob('base/sconscript'):
+	SConscript('base/sconscript')
+
+if Glob('video/sconscript'):
+	SConscript('video/sconscript')
+
+SConscript('gpu/sconscript')
diff --git a/midgard/r12p0/kernel/include/linux/dma-buf-test-exporter.h b/midgard/r12p0/kernel/include/linux/dma-buf-test-exporter.h
new file mode 100755
index 0000000..98984ba
--- /dev/null
+++ b/midgard/r12p0/kernel/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/midgard/r12p0/kernel/include/linux/kds.h b/midgard/r12p0/kernel/include/linux/kds.h
new file mode 100755
index 0000000..1346eda
--- /dev/null
+++ b/midgard/r12p0/kernel/include/linux/kds.h
@@ -0,0 +1,173 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KDS_H_
+#define _KDS_H_
+
+#include <linux/list.h>
+#include <linux/workqueue.h>
+
+#define KDS_WAIT_BLOCKING (ULONG_MAX)
+
+struct kds_resource_set;
+
+typedef void (*kds_callback_fn) (void *callback_parameter, void *callback_extra_parameter);
+
+struct kds_callback
+{
+	kds_callback_fn  user_cb; /* real cb */
+	int direct;               /* do direct or queued call? */
+	struct workqueue_struct *wq;
+};
+
+struct kds_link
+{
+	struct kds_resource_set *parent;
+	struct list_head         link;
+	unsigned long            state;
+};
+
+struct kds_resource
+{
+	struct kds_link waiters;
+};
+
+/* callback API */
+
+/* Initialize a callback object.
+ *
+ * Typically created per context or per hw resource.
+ *
+ * Callbacks can be performed directly if no nested locking can
+ * happen in the client.
+ *
+ * Nested locking can occur when a lock is held during the kds_async_waitall or
+ * kds_resource_set_release call. If the callback needs to take the same lock
+ * nested locking will happen.
+ *
+ * If nested locking could happen non-direct callbacks can be requested.
+ * Callbacks will then be called asynchronous to the triggering call.
+ */
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb);
+
+/* Terminate the use of a callback object.
+ *
+ * If the callback object was set up as non-direct
+ * any pending callbacks will be flushed first.
+ * Note that to avoid a deadlock the lock callbacks needs
+ * can't be held when a callback object is terminated.
+ */
+void kds_callback_term(struct kds_callback *cb);
+
+
+/* resource object API */
+
+/* initialize a resource handle for a shared resource */
+void kds_resource_init(struct kds_resource * const resource);
+
+/*
+ * Will return 0 on success.
+ * If the resource is being used or waited -EBUSY is returned.
+ * The caller should NOT try to terminate a resource that could still have clients.
+ * After the function returns the resource is no longer known by kds.
+ */
+int kds_resource_term(struct kds_resource *resource);
+
+/* Asynchronous wait for a set of resources.
+ * Callback will be called when all resources are available.
+ * If all the resources was available the callback will be called before kds_async_waitall returns.
+ * So one must not hold any locks the callback code-flow can take when calling kds_async_waitall.
+ * Caller considered to own/use the resources until \a kds_rset_release is called.
+ * exclusive_access_bitmap is a bitmap where a high bit means exclusive access while a low bit means shared access.
+ * Use the Linux __set_bit API, where the index of the buffer to control is used as the bit index.
+ *
+ * Standard Linux error return value.
+ */
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list);
+
+/* Synchronous wait for a set of resources.
+ * Function will return when one of these have happened:
+ * - all resources have been obtained
+ * - timeout lapsed while waiting
+ * - a signal was received while waiting
+ *
+ * To wait without a timeout, specify KDS_WAIT_BLOCKING for \a jifies_timeout, otherwise
+ * the timeout in jiffies. A zero timeout attempts to obtain all resources and returns
+ * immediately with a timeout if all resources could not be obtained.
+ *
+ * Caller considered to own/use the resources when the function returns.
+ * Caller must release the resources using \a kds_rset_release.
+ *
+ * Calling this function while holding already locked resources or other locking primitives is dangerous.
+ * One must if this is needed decide on a lock order of the resources and/or the other locking primitives
+ * and always take the resources/locking primitives in the specific order.
+ *
+ * Use the ERR_PTR framework to decode the return value.
+ * NULL = time out
+ * If IS_ERR then PTR_ERR gives:
+ *  ERESTARTSYS = signal received, retry call after signal
+ *  all other values = internal error, lock failed
+ * Other values  = successful wait, now the owner, must call kds_resource_set_release
+ */
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jifies_timeout);
+
+/* Release resources after use.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ */
+void kds_resource_set_release(struct kds_resource_set **pprset);
+
+/* Release resources after use and wait callbacks to complete.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ *
+ * This should be used to cancel waits which are pending on a kds
+ * resource.
+ *
+ * It is a bug to call this from atomic contexts and from within
+ * a kds callback that now owns the kds_rseource.
+ */
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset);
+#endif /* _KDS_H_ */
diff --git a/midgard/r12p0/kernel/include/linux/ump-common.h b/midgard/r12p0/kernel/include/linux/ump-common.h
new file mode 100755
index 0000000..0015bda
--- /dev/null
+++ b/midgard/r12p0/kernel/include/linux/ump-common.h
@@ -0,0 +1,252 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/midgard/r12p0/kernel/include/linux/ump-import.h b/midgard/r12p0/kernel/include/linux/ump-import.h
new file mode 100755
index 0000000..89ce727
--- /dev/null
+++ b/midgard/r12p0/kernel/include/linux/ump-import.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/midgard/r12p0/kernel/include/linux/ump-ioctl.h b/midgard/r12p0/kernel/include/linux/ump-ioctl.h
new file mode 100755
index 0000000..451403e
--- /dev/null
+++ b/midgard/r12p0/kernel/include/linux/ump-ioctl.h
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/midgard/r12p0/kernel/include/linux/ump.h b/midgard/r12p0/kernel/include/linux/ump.h
new file mode 100755
index 0000000..16f9c39
--- /dev/null
+++ b/midgard/r12p0/kernel/include/linux/ump.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a true to indicate that access to the handle is allowed or @n
+ * @a false to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a true to permit access
+ * @li @a false to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+#ifdef CONFIG_KDS
+/**
+ * Retrieve the KDS resource for the specified UMP memory.
+ *
+ * The KDS resource should be used to synchronize access to the UMP allocation.
+ * See the KDS API for how to do that.
+ *
+ * @param mem Handle to the UMP memory to query.
+ * @return Pointer to the KDS resource controlling access to the UMP memory.
+ */
+UMP_KERNEL_API_EXPORT struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem);
+#endif
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
diff --git a/midgard/r12p0/kernel/license.txt b/midgard/r12p0/kernel/license.txt
new file mode 100755
index 0000000..77c14bd
--- /dev/null
+++ b/midgard/r12p0/kernel/license.txt
@@ -0,0 +1,198 @@
+GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE
+
+THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE.
+
+
+
+Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form.
+
+
+
+GPL Licence
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+Version 2, June 1991
+
+
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+
+
+Preamble
+
+
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
+
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program).
+
+Whether that is true depends on what the Program does.
+
+
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty;
+
+and give any other recipients of the Program a copy of this License along with the Program.
+
+
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the
+
+Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein.
+
+You are not responsible for enforcing compliance by third parties to this License.
+
+
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+
+
+NO WARRANTY
+
+
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+/end
+
diff --git a/midgard/r12p0/kernel/patches/integrate_kds_with_dma_buf.patch b/midgard/r12p0/kernel/patches/integrate_kds_with_dma_buf.patch
new file mode 100755
index 0000000..37458cf
--- /dev/null
+++ b/midgard/r12p0/kernel/patches/integrate_kds_with_dma_buf.patch
@@ -0,0 +1,188 @@
+diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
+index 9aa618a..30e07ea 100644
+--- a/drivers/base/Kconfig
++++ b/drivers/base/Kconfig
+@@ -192,4 +192,12 @@ config DMA_SHARED_BUFFER
+ 	  APIs extension; the file's descriptor can then be passed on to other
+ 	  driver.
+ 
++config DMA_SHARED_BUFFER_USES_KDS
++	bool "Synchronize access to dma_buf with KDS"
++	default n
++	select KDS
++	depends on DMA_SHARED_BUFFER
++	help
++	  This option adds a KDS resource within every dma_buf allocation.
++
+ endmenu
+diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
+index 20258e1..e4083fd 100644
+--- a/drivers/base/dma-buf.c
++++ b/drivers/base/dma-buf.c
+@@ -27,6 +27,8 @@
+ #include <linux/dma-buf.h>
+ #include <linux/anon_inodes.h>
+ #include <linux/export.h>
++#include <linux/poll.h>
++#include <linux/sched.h>
+ 
+ static inline int is_dma_buf_file(struct file *);
+ 
+@@ -40,6 +42,8 @@ static int dma_buf_release(struct inode *inode, struct file *file)
+ 	dmabuf = file->private_data;
+ 
+ 	dmabuf->ops->release(dmabuf);
++	kds_callback_term(&dmabuf->kds_cb);
++	kds_resource_term(&dmabuf->kds);
+ 	kfree(dmabuf);
+ 	return 0;
+ }
+@@ -61,9 +65,90 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
+ 	return dmabuf->ops->mmap(dmabuf, vma);
+ }
+ 
++
++static void dma_buf_kds_cb_fn (void *param1, void *param2)
++{
++	struct kds_resource_set **rset_ptr = param1;
++	struct kds_resource_set *rset = *rset_ptr;
++	wait_queue_head_t *wait_queue = param2;
++
++	kfree(rset_ptr);
++	kds_resource_set_release(&rset);
++	wake_up(wait_queue);
++}
++
++static int dma_buf_kds_check(struct kds_resource *kds,
++                             long unsigned int exclusive, int *poll_ret)
++{
++	/* Synchronous wait with 0 timeout - poll availability */
++	struct kds_resource_set *rset = kds_waitall(1,&exclusive,&kds,0);
++
++	if (IS_ERR(rset))
++		return POLLERR;
++
++	if (rset){
++		kds_resource_set_release(&rset);
++		*poll_ret = POLLIN | POLLRDNORM;
++		if (exclusive)
++			*poll_ret |=  POLLOUT | POLLWRNORM;
++		return 1;
++	}else{
++		return 0;
++	}
++}
++
++static unsigned int dma_buf_poll(struct file *file,
++                                 struct poll_table_struct *wait)
++{
++	struct dma_buf *dmabuf;
++	struct kds_resource *kds;
++	unsigned int ret = 0;
++
++	if (!is_dma_buf_file(file))
++		return POLLERR;
++
++	dmabuf = file->private_data;
++	kds    = &dmabuf->kds;
++
++	if (poll_does_not_wait(wait)){
++		/* Check for exclusive access (superset of shared) first */
++		if(!dma_buf_kds_check(kds, 1ul, &ret))
++			dma_buf_kds_check(kds, 0ul, &ret);
++	}else{
++		int events = poll_requested_events(wait);
++		unsigned long exclusive;
++		wait_queue_head_t *wq;
++		struct kds_resource_set **rset_ptr = kmalloc(sizeof(*rset_ptr), GFP_KERNEL);
++
++		if (!rset_ptr)
++			return POLL_ERR;
++
++		if (events & POLLOUT){
++			wq = &dmabuf->wq_exclusive;
++			exclusive = 1;
++		}else{
++			wq = &dmabuf->wq_shared;
++			exclusive = 0;
++		}
++		poll_wait(file, wq, wait);
++		ret = kds_async_waitall(rset_ptr, KDS_FLAG_LOCKED_WAIT, &dmabuf->kds_cb,
++		                        rset_ptr, wq, 1, &exclusive, &kds);
++
++		if (IS_ERR_VALUE(ret)){
++			ret = POLL_ERR;
++			kfree(rset_ptr);
++		}else{
++			/* Can't allow access until callback */
++			ret = 0;
++		}
++	}
++	return ret;
++}
++
+ static const struct file_operations dma_buf_fops = {
+ 	.release	= dma_buf_release,
+ 	.mmap		= dma_buf_mmap_internal,
++	.poll		= dma_buf_poll,
+ };
+ 
+ /*
+@@ -120,6 +205,11 @@ struct dma_buf *dma_buf_export(void *priv, const struct dma_buf_ops *ops,
+ 	mutex_init(&dmabuf->lock);
+ 	INIT_LIST_HEAD(&dmabuf->attachments);
+ 
++	init_waitqueue_head(&dmabuf->wq_exclusive);
++	init_waitqueue_head(&dmabuf->wq_shared);
++	kds_resource_init(&dmabuf->kds);
++	kds_callback_init(&dmabuf->kds_cb, 1, dma_buf_kds_cb_fn);
++
+ 	return dmabuf;
+ }
+ EXPORT_SYMBOL_GPL(dma_buf_export);
+diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
+index eb48f38..6e824d2 100644
+--- a/include/linux/dma-buf.h
++++ b/include/linux/dma-buf.h
+@@ -30,6 +30,8 @@
+ #include <linux/list.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/fs.h>
++#include <linux/kds.h>
++#include <linux/wait.h>
+ 
+ struct device;
+ struct dma_buf;
+@@ -121,6 +123,10 @@ struct dma_buf {
+ 	const struct dma_buf_ops *ops;
+ 	/* mutex to serialize list manipulation and attach/detach */
+ 	struct mutex lock;
++	struct kds_resource kds;
++	wait_queue_head_t wq_exclusive;
++	wait_queue_head_t wq_shared;
++	struct kds_callback kds_cb;
+ 	void *priv;
+ };
+ 
+@@ -156,6 +162,21 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
+ 	get_file(dmabuf->file);
+ }
+ 
++/**
++ * get_dma_buf_kds_resource - get a KDS resource for this dma-buf
++ * @dmabuf:	[in]	pointer to dma_buf
++ *
++ * Returns a KDS resource that represents the dma-buf. This should be used by
++ * drivers to synchronize access to the buffer. Note that the caller should
++ * ensure that a reference to the dma-buf exists from the call to
++ * kds_async_wait until kds_resource_set_release is called.
++ */
++static inline struct kds_resource *
++	get_dma_buf_kds_resource(struct dma_buf *dmabuf)
++{
++	return &dmabuf->kds;
++}
++
+ #ifdef CONFIG_DMA_SHARED_BUFFER
+ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+ 							struct device *dev);
diff --git a/midgard/r12p0/kernel/sconscript b/midgard/r12p0/kernel/sconscript
new file mode 100755
index 0000000..e6be64c
--- /dev/null
+++ b/midgard/r12p0/kernel/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+if env['backend'] == 'kernel' and int(env['kernel_modules']) == 1 and env['gpu'] != 'none':
+	SConscript('drivers/sconscript')
+
+	if Glob('tests/sconscript'):
+		SConscript('tests/sconscript')
diff --git a/midgard/r13p0/android/patches/K/android-k.sh b/midgard/r13p0/android/patches/K/android-k.sh
new file mode 100755
index 0000000..4e44d5e
--- /dev/null
+++ b/midgard/r13p0/android/patches/K/android-k.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+#
+# Copyright (C) 2014-2015 ARM Limited. All rights reserved.
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Android K script for LLVM 3.5 and above support
+#
+# This script applies two minor patches and checks-out libcxx and libcxxabi libraries
+# adding C++11 support in Android K required for LLVM3.5 and above.
+
+
+if [ -z "$ANDROID_BUILD_TOP" ]; then
+    echo "[ANDROID-PATCH] This script must be executed in an Android build environment"
+    echo "                ANDROID_BUILD_TOP is undefined"
+    echo "                please do \"source build/envseup.sh; lunch\"."
+    exit 1
+fi
+
+ANDROID_REPOSITORY_URL="https://android.googlesource.com/"
+
+REFERENCE_ARGUMENT=""
+
+# Proccess arguments. Valid arguments are:
+#        -u ARG      Override the Android repository url
+#        -r ARG      Provide a path to a reference git repository
+while getopts "u:r:" opt; do
+  case $opt in
+    u)
+      ANDROID_REPOSITORY_URL=$OPTARG
+      ;;
+
+    r)
+      REFERENCE_ARGUMENT="--reference "$OPTARG
+      ;;
+
+    \?)
+      echo "Invalid option: -$OPTARG" >&2
+      exit 1
+
+      ;;
+    :)
+      echo "Option -$OPTARG requires an argument." >&2
+      exit 1
+      ;;
+
+  esac
+done
+
+MIDGARD_DDK_WD=$(pwd)
+
+cd $ANDROID_BUILD_TOP/external
+
+#Checkout libcxx from Android Master required by LLVM due to c++11
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxx $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxx failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxx
+git checkout 27ae7cb782821a4f2d3813522ee411cd978bcd85 -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxx@27ae7cb782821a4f2d3813522ee411cd978bcd85 revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/libcxx.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching libcxx failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Checkout libcxxabi from Android Master required by LLVM due to c++11
+cd $ANDROID_BUILD_TOP/external
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxxabi $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxxabi failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxxabi
+git checkout 285d67f35f6044cf733091e36248405ca967c62c -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxxabi@285d67f35f6044cf733091e36248405ca967c62c revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+
+#Apply patch to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/bionics.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching bionics failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add locale support to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/locale.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Adding locale support failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add LLVM backend ARM fix to support -O0 compilation
+cd $ANDROID_BUILD_TOP/external/llvm/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/llvm.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Fixing ARM LLVM backend failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+echo "[ANDROID-PATCH] Patching completed successfully"
+cd $MIDGARD_DDK_WD
diff --git a/midgard/r13p0/android/patches/K/bionics.diff b/midgard/r13p0/android/patches/K/bionics.diff
new file mode 100755
index 0000000..b86cedf
--- /dev/null
+++ b/midgard/r13p0/android/patches/K/bionics.diff
@@ -0,0 +1,202 @@
+diff --git a/libc/Android.mk b/libc/Android.mk
+index 9610c14..d3dbe81 100644
+--- a/libc/Android.mk
++++ b/libc/Android.mk
+@@ -214,6 +215,7 @@ libc_bionic_src_:= \
+     bionic/libc_init_common.cpp \
+     bionic/libc_logging.cpp \
+     bionic/libgen.cpp \
++    bionic/locale.cpp \
+     bionic/mmap.cpp \
+     bionic/pthread_attr.cpp \
+     bionic/pthread_detach.cpp \
+@@ -232,7 +232,6 @@ libc_bionic_src_files := \
+     bionic/scandir.cpp \
+     bionic/sched_getaffinity.cpp \
+     bionic/__set_errno.cpp \
+-    bionic/setlocale.cpp \
+     bionic/signalfd.cpp \
+     bionic/sigwait.cpp \
+     bionic/statvfs.cpp \
+diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp
+index 50a3875..cc830b7 100644
+--- a/libc/bionic/wchar.cpp
++++ b/libc/bionic/wchar.cpp
+@@ -96,6 +96,7 @@ int swscanf(const wchar_t* /*s*/, const wchar_t* /*format*/, ... ) {
+
+ int iswalnum(wint_t wc) { return isalnum(wc); }
+ int iswalpha(wint_t wc) { return isalpha(wc); }
++int iswblank(wint_t wc) { return isblank(wc); }
+ int iswcntrl(wint_t wc) { return iscntrl(wc); }
+ int iswdigit(wint_t wc) { return isdigit(wc); }
+ int iswgraph(wint_t wc) { return isgraph(wc); }
+diff --git a/libc/include/assert.h b/libc/include/assert.h
+index 62470f5..361a5ff 100644
+--- a/libc/include/assert.h
++++ b/libc/include/assert.h
+@@ -60,6 +60,6 @@
+ #endif
+
+ __BEGIN_DECLS
+-__dead void __assert(const char *, int, const char *);
+-__dead void __assert2(const char *, int, const char *, const char *);
++__dead void __assert(const char *, int, const char *) __noreturn;
++__dead void __assert2(const char *, int, const char *, const char *) __noreturn;
+ __END_DECLS
+diff --git a/libc/include/ctype.h b/libc/include/ctype.h
+index 5557e31..a66df12 100644
+--- a/libc/include/ctype.h
++++ b/libc/include/ctype.h
+@@ -45,11 +45,14 @@
+ #define	_CTYPE_U	0x01
+ #define	_CTYPE_L	0x02
+ #define	_CTYPE_N	0x04
++#define	_CTYPE_D	0x04
+ #define	_CTYPE_S	0x08
+ #define	_CTYPE_P	0x10
+ #define	_CTYPE_C	0x20
+ #define	_CTYPE_X	0x40
+ #define	_CTYPE_B	0x80
++#define	_CTYPE_R	(_CTYPE_P|_CTYPE_U|_CTYPE_L|_CTYPE_D|_CTYPE_B)
++#define	_CTYPE_A	(_CTYPE_L|_CTYPE_U)
+
+ __BEGIN_DECLS
+
+diff --git a/libc/include/locale.h b/libc/include/locale.h
+index 65b5c7d..6989851 100644
+--- a/libc/include/locale.h
++++ b/libc/include/locale.h
+@@ -29,6 +29,7 @@
+ #define _LOCALE_H_
+
+ #include <sys/cdefs.h>
++#include <xlocale.h>
+
+ __BEGIN_DECLS
+
+@@ -43,17 +44,65 @@ enum {
+     LC_PAPER     = 7,
+     LC_NAME      = 8,
+     LC_ADDRESS   = 9,
+-
+     LC_TELEPHONE      = 10,
+     LC_MEASUREMENT    = 11,
+     LC_IDENTIFICATION = 12
+ };
+
+-extern char *setlocale(int category, const char *locale);
++#define LC_CTYPE_MASK          (1 << LC_CTYPE)
++#define LC_NUMERIC_MASK        (1 << LC_NUMERIC)
++#define LC_TIME_MASK           (1 << LC_TIME)
++#define LC_COLLATE_MASK        (1 << LC_COLLATE)
++#define LC_MONETARY_MASK       (1 << LC_MONETARY)
++#define LC_MESSAGES_MASK       (1 << LC_MESSAGES)
++#define LC_PAPER_MASK          (1 << LC_PAPER)
++#define LC_NAME_MASK           (1 << LC_NAME)
++#define LC_ADDRESS_MASK        (1 << LC_ADDRESS)
++#define LC_TELEPHONE_MASK      (1 << LC_TELEPHONE)
++#define LC_MEASUREMENT_MASK    (1 << LC_MEASUREMENT)
++#define LC_IDENTIFICATION_MASK (1 << LC_IDENTIFICATION)
++
++#define LC_ALL_MASK (LC_CTYPE_MASK | LC_NUMERIC_MASK | LC_TIME_MASK | LC_COLLATE_MASK | \
++                     LC_MONETARY_MASK | LC_MESSAGES_MASK | LC_PAPER_MASK | LC_NAME_MASK | \
++                     LC_ADDRESS_MASK | LC_TELEPHONE_MASK | LC_MEASUREMENT_MASK | \
++                     LC_IDENTIFICATION_MASK)
++
++struct lconv {
++    char* decimal_point;
++    char* thousands_sep;
++    char* grouping;
++    char* int_curr_symbol;
++    char* currency_symbol;
++    char* mon_decimal_point;
++    char* mon_thousands_sep;
++    char* mon_grouping;
++    char* positive_sign;
++    char* negative_sign;
++    char  int_frac_digits;
++    char  frac_digits;
++    char  p_cs_precedes;
++    char  p_sep_by_space;
++    char  n_cs_precedes;
++    char  n_sep_by_space;
++    char  p_sign_posn;
++    char  n_sign_posn;
++    char  int_p_cs_precedes;
++    char  int_p_sep_by_space;
++    char  int_n_cs_precedes;
++    char  int_n_sep_by_space;
++    char  int_p_sign_posn;
++    char  int_n_sign_posn;
++};
++
++struct lconv* localeconv(void);
++
++locale_t duplocale(locale_t);
++void freelocale(locale_t);
++locale_t newlocale(int, const char*, locale_t);
++char* setlocale(int, const char*);
++locale_t uselocale(locale_t);
+
+-/* Make libstdc++-v3 happy.  */
+-struct lconv { };
+-struct lconv *localeconv(void);
++#define LC_GLOBAL_LOCALE ((locale_t) -1L)
+
+ __END_DECLS
+
+diff --git a/libc/include/stdio.h b/libc/include/stdio.h
+index 23fc944..28685c7 100644
+--- a/libc/include/stdio.h
++++ b/libc/include/stdio.h
+@@ -469,7 +469,10 @@ int vsprintf(char *dest, const char *format, __va_list ap)
+ }
+
+ #if defined(__clang__)
+-#define snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(snprintf)
++    #define __wrap_snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++    #define snprintf(...) __wrap_snprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(3, 4)
+@@ -481,7 +484,10 @@ int snprintf(char *dest, size_t size, const char *format, ...)
+ #endif
+
+ #if defined(__clang__)
+-#define sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(sprintf)
++    #define __wrap_sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++    #define sprintf(...) __wrap_sprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(2, 3)
+diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h
+index 9fa84c1..188da69 100644
+--- a/libc/include/stdlib.h
++++ b/libc/include/stdlib.h
+@@ -68,6 +68,10 @@ static __inline__ float strtof(const char *nptr, char **endptr)
+     return (float)strtod(nptr, endptr);
+ }
+
++static __inline__ long double strtold(const char* s, char** end_ptr) {
++  return (long double)strtod(s, end_ptr);
++}
++
+ extern int atoi(const char *) __purefunc;
+ extern long atol(const char *) __purefunc;
+ extern long long atoll(const char *) __purefunc;
+diff --git a/libc/include/wchar.h b/libc/include/wchar.h
+index 76ac02c..c413806 100644
+--- a/libc/include/wchar.h
++++ b/libc/include/wchar.h
+@@ -77,6 +77,7 @@ extern int               fwprintf(FILE *, const wchar_t *, ...);
+ extern int               fwscanf(FILE *, const wchar_t *, ...);
+ extern int               iswalnum(wint_t);
+ extern int               iswalpha(wint_t);
++extern int               iswblank(wint_t);
+ extern int               iswcntrl(wint_t);
+ extern int               iswdigit(wint_t);
+ extern int               iswgraph(wint_t);
diff --git a/midgard/r13p0/android/patches/K/libcxx.diff b/midgard/r13p0/android/patches/K/libcxx.diff
new file mode 100755
index 0000000..a10d75c
--- /dev/null
+++ b/midgard/r13p0/android/patches/K/libcxx.diff
@@ -0,0 +1,13 @@
+diff --git a/include/cstdlib b/include/cstdlib
+index 152b891..e45a8fd 100644
+--- a/include/cstdlib
++++ b/include/cstdlib
+@@ -126,7 +126,7 @@ using ::realloc;
+ using ::abort;
+ using ::atexit;
+ using ::exit;
+-using ::_Exit;
++
+ using ::getenv;
+ using ::system;
+ using ::bsearch;
diff --git a/midgard/r13p0/android/patches/K/license.txt b/midgard/r13p0/android/patches/K/license.txt
new file mode 100755
index 0000000..d645695
--- /dev/null
+++ b/midgard/r13p0/android/patches/K/license.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/midgard/r13p0/android/patches/K/llvm.diff b/midgard/r13p0/android/patches/K/llvm.diff
new file mode 100755
index 0000000..d41d279
--- /dev/null
+++ b/midgard/r13p0/android/patches/K/llvm.diff
@@ -0,0 +1,14 @@
+diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
+index 8cdb853..868dd47 100644
+--- a/lib/Target/ARM/ARMInstrInfo.cpp
++++ b/lib/Target/ARM/ARMInstrInfo.cpp
+@@ -130,6 +130,10 @@ namespace {
+         MIB.addImm(0);
+       AddDefaultPred(MIB);
+
++      // Fix the GOT address by adding pc.
++      BuildMI(FirstMBB, MBBI, DL, TII.get(ARM::tPICADD), GlobalBaseReg)
++          .addReg(GlobalBaseReg).addImm(ARMPCLabelIndex);
++
+       return true;
+     }
diff --git a/midgard/r13p0/android/patches/K/locale.diff b/midgard/r13p0/android/patches/K/locale.diff
new file mode 100755
index 0000000..9c4c140
--- /dev/null
+++ b/midgard/r13p0/android/patches/K/locale.diff
@@ -0,0 +1,204 @@
+diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp
+new file mode 100644
+index 0000000..4fade84
+--- /dev/null
++++ b/libc/bionic/locale.cpp
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (C) 2008 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#include <locale.h>
++#include <pthread.h>
++#include <stdlib.h>
++
++// We currently support a single locale, the "C" locale (also known as "POSIX").
++
++struct __locale_t {
++  // Because we only support one locale, these are just tokens with no data.
++};
++
++static pthread_once_t gLocaleOnce = PTHREAD_ONCE_INIT;
++static lconv gLocale;
++
++static pthread_once_t gUselocaleKeyOnce = PTHREAD_ONCE_INIT;
++static pthread_key_t gUselocaleKey;
++
++static void __locale_init() {
++  gLocale.decimal_point = const_cast<char*>(".");
++
++  char* not_available = const_cast<char*>("");
++  gLocale.thousands_sep = not_available;
++  gLocale.grouping = not_available;
++  gLocale.int_curr_symbol = not_available;
++  gLocale.currency_symbol = not_available;
++  gLocale.mon_decimal_point = not_available;
++  gLocale.mon_thousands_sep = not_available;
++  gLocale.mon_grouping = not_available;
++  gLocale.positive_sign = not_available;
++  gLocale.negative_sign = not_available;
++
++  gLocale.int_frac_digits = CHAR_MAX;
++  gLocale.frac_digits = CHAR_MAX;
++  gLocale.p_cs_precedes = CHAR_MAX;
++  gLocale.p_sep_by_space = CHAR_MAX;
++  gLocale.n_cs_precedes = CHAR_MAX;
++  gLocale.n_sep_by_space = CHAR_MAX;
++  gLocale.p_sign_posn = CHAR_MAX;
++  gLocale.n_sign_posn = CHAR_MAX;
++  gLocale.int_p_cs_precedes = CHAR_MAX;
++  gLocale.int_p_sep_by_space = CHAR_MAX;
++  gLocale.int_n_cs_precedes = CHAR_MAX;
++  gLocale.int_n_sep_by_space = CHAR_MAX;
++  gLocale.int_p_sign_posn = CHAR_MAX;
++  gLocale.int_n_sign_posn = CHAR_MAX;
++}
++
++static void __uselocale_key_init() {
++  pthread_key_create(&gUselocaleKey, NULL);
++}
++
++static bool __is_supported_locale(const char* locale) {
++  return (strcmp(locale, "") == 0 || strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0);
++}
++
++static locale_t __new_locale() {
++  return reinterpret_cast<locale_t>(malloc(sizeof(__locale_t)));
++}
++
++lconv* localeconv() {
++  pthread_once(&gLocaleOnce, __locale_init);
++  return &gLocale;
++}
++
++locale_t duplocale(locale_t l) {
++  locale_t clone = __new_locale();
++  if (clone != NULL && l != LC_GLOBAL_LOCALE) {
++    *clone = *l;
++  }
++  return clone;
++}
++
++void freelocale(locale_t l) {
++  free(l);
++}
++
++locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/) {
++  // Is 'category_mask' valid?
++  if ((category_mask & ~LC_ALL_MASK) != 0) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  if (!__is_supported_locale(locale_name)) {
++    errno = ENOENT;
++    return NULL;
++  }
++
++  return __new_locale();
++}
++
++char* setlocale(int category, char const* locale_name) {
++  // Is 'category' valid?
++  if (category < LC_CTYPE || category > LC_IDENTIFICATION) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  // Caller just wants to query the current locale?
++  if (locale_name == NULL) {
++    return const_cast<char*>("C");
++  }
++
++  // Caller wants one of the mandatory POSIX locales?
++  if (__is_supported_locale(locale_name)) {
++    return const_cast<char*>("C");
++  }
++
++  // We don't support any other locales.
++  errno = ENOENT;
++  return NULL;
++}
++
++locale_t uselocale(locale_t new_locale) {
++  pthread_once(&gUselocaleKeyOnce, __uselocale_key_init);
++
++  locale_t old_locale = static_cast<locale_t>(pthread_getspecific(gUselocaleKey));
++
++  // If this is the first call to uselocale(3) on this thread, we return LC_GLOBAL_LOCALE.
++  if (old_locale == NULL) {
++    old_locale = LC_GLOBAL_LOCALE;
++  }
++
++  if (new_locale != NULL) {
++    pthread_setspecific(gUselocaleKey, new_locale);
++  }
++
++  return old_locale;
++}
+diff --git a/libc/include/xlocale.h b/libc/include/xlocale.h
+new file mode 100644
+index 0000000..f7eb8f4
+--- /dev/null
++++ b/libc/include/xlocale.h
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (C) 2014 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#ifndef _XLOCALE_H_
++#define _XLOCALE_H_
++
++/* If we just use void* here, GCC exposes that in error messages. */
++struct __locale_t;
++typedef struct __locale_t* locale_t;
++
++#endif /* _XLOCALE_H_ */
diff --git a/midgard/r13p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/midgard/r13p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100755
index 0000000..46b704b
--- /dev/null
+++ b/midgard/r13p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,92 @@
+#
+# (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- snoop_enable_smc : SMC function ID to enable CCI snooping on the GPU port(s).
+- snoop_disable_smc : SMC function ID to disable CCI snooping on the GPU port(s).
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets power model parameters. Note that this model was designed for the Juno
+	        platform, and may not be suitable for other platforms. A structure containing :
+	- compatible: Should be arm,mali-simple-power-model
+	- voltage: Voltage at reference point. Specified in mV.
+	- frequency: Frequency at reference point. Specified in MHz.
+	- dynamic-power: Dynamic power at reference frequency and voltage. Specified in mW.
+	- static-power: Static power at reference frequency. Specified in mW.
+	- ts: An array containing coefficients for the temperature scaling factor.
+	  Used as : tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0], where T = temperature
+	- thermal-zone: A string identifying the thermal zone used for the GPU
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+
+Example for a Mali-T602:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points = <
+		/* KHz   uV */
+		533000 1250000,
+		450000 1150000,
+		400000 1125000,
+		350000 1075000,
+		266000 1025000,
+		160000  925000,
+		100000  912500,
+	>;
+	power_model {
+		compatible = "arm,mali-simple-power-model";
+		voltage = <800>;
+		frequency = <500>;
+		static-power = <500>;
+		dynamic-power = <1500>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+};
diff --git a/midgard/r13p0/kernel/Documentation/dma-buf-test-exporter.txt b/midgard/r13p0/kernel/Documentation/dma-buf-test-exporter.txt
new file mode 100755
index 0000000..4208010
--- /dev/null
+++ b/midgard/r13p0/kernel/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/midgard/r13p0/kernel/Documentation/kds.txt b/midgard/r13p0/kernel/Documentation/kds.txt
new file mode 100755
index 0000000..639288c
--- /dev/null
+++ b/midgard/r13p0/kernel/Documentation/kds.txt
@@ -0,0 +1,268 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+==============================
+kds - Kernel Dependency System
+==============================
+
+Introduction
+------------
+kds provides a mechanism for clients to atomically lock down multiple abstract resources.
+This can be done either synchronously or asynchronously.
+Abstract resources is used to allow a set of clients to use kds to control access to any
+resource, an example is structured memory buffers.
+
+kds supports that buffer is locked for exclusive access and sharing of buffers.
+
+kds can be built as either a integrated feature of the kernel or as a module.
+It supports being compiled as a module both in-tree and out-of-tree.
+
+
+Concepts
+--------
+A core concept in kds is abstract resources.
+A kds resource is just an abstraction for some client object, kds doesn't care what it is.
+Typically EGL will consider UMP buffers as being a resource, thus each UMP buffer has
+a kds resource for synchronization to the buffer.
+
+kds allows a client to create and destroy the abstract resource objects.
+A new resource object is made available asap (it is just a simple malloc with some initializations),
+while destroy it requires some external synchronization.
+
+The other core concept in kds is consumer of resources.
+kds is requested to allow a client to consume a set of resources and the client will be notified when it can consume the resources.
+
+Exclusive access allows only one client to consume a resource.
+Shared access permits multiple consumers to acceess a resource concurrently.
+
+
+APIs
+----
+kds provides simple resource allocate and destroy functions.
+Clients use this to instantiate and control the lifetime of the resources kds manages.
+
+kds provides two ways to wait for resources:
+- Asynchronous wait: the client specifies a function pointer to be called when wait is over
+- Synchronous wait: Function blocks until access is gained.
+
+The synchronous API has a timeout for the wait.
+The call can early out if a signal is delivered.
+
+After a client is done consuming the resource kds must be notified to release the resources and let some other client take ownership.
+This is done via resource set release call.
+
+A Windows comparison:
+kds implements WaitForMultipleObjectsEx(..., bWaitAll = TRUE, ...) but also has an asynchronous version in addition.
+kds resources can be seen as being the same as NT object manager resources.
+
+Internals
+---------
+kds guarantees atomicity when a set of resources is operated on.
+This is implemented via a global resource lock which is taken by kds when it updates resource objects.
+
+Internally a resource in kds is a linked list head with some flags.
+
+When a consumer requests access to a set of resources it is queued on each of the resources.
+The link from the consumer to the resources can be triggered. Once all links are triggered
+the registered callback is called or the blocking function returns.
+A link is considered triggered if it is the first on the list of consumers of a resource,
+or if all the links ahead of it is marked as shared and itself is of the type shared.
+
+When the client is done consuming the consumer object is removed from the linked lists of
+the resources and a potential new consumer becomes the head of the resources.
+As we add and remove consumers atomically across all resources we can guarantee that
+we never introduces a A->B + B->A type of loops/deadlocks.
+
+
+kbase/base implementation
+-------------------------
+A HW job needs access to a set of shared resources.
+EGL tracks this and encodes the set along with the atom in the ringbuffer.
+EGL allocates a (k)base dep object to represent the dependency to the set of resources and encodes that along with the list of resources.
+This dep object is use to create a dependency from a job chain(atom) to the resources it needs to run.
+When kbase decodes the atom in the ringbuffer it finds the set of resources and calls kds to request all the needed resources.
+As EGL needs to know when the kds request is delivered a new base event object is needed: atom enqueued. This event is only delivered for atoms which uses kds.
+The callback kbase registers trigger the dependency object described which would trigger the existing JD system to release the job chain.
+When the atom is done kds resource set release is call to release the resources.
+
+EGL will typically use exclusive access to the render target, while all buffers used as input can be marked as shared.
+
+
+Buffer publish/vsync
+--------------------
+EGL will use a separate ioctl or DRM flip to request the flip.
+If the LCD driver is integrated with kds EGL can do these operations early.
+The LCD driver must then implement the ioctl or DRM flip to be asynchronous with kds async call.
+The LCD driver binds a kds resource to each virtual buffer (2 buffers in case of double-buffering).
+EGL will make a dependency to the target kds resource in the kbase atom.
+After EGL receives a atom enqueued event it can ask the LCD driver to pan to the target kds resource.
+When the atom is completed it'll release the resource and the LCD driver will get its callback.
+In the callback it'll load the target buffer into the DMA unit of the LCD hardware.
+The LCD driver will be the consumer of both buffers for a short period.
+The LCD driver will call kds resource set release on the previous on-screen buffer when the next vsync/dma read end is handled.
+
+===============================================
+Kernel driver kds client design considerations
+=============================================== 
+
+Number of resources
+--------------------
+
+The kds api allows a client to wait for ownership of a number of resources, where by the client does not take on ownership of any of the resources in the resource set 
+until all of the resources in the set are released.  Consideration must be made with respect to performance, as waiting on large number of resources will incur
+a greater overhead and may increase system latency.  It may be worth considering how independent each of the resources are, for example if the same set of resources 
+are waited upon by each of the clients, then it may be possible to aggregate these into one resource that each client waits upon.
+
+Clients with shared access
+---------------------------
+
+The kds api allows a number of clients to gain shared access to a resource simultaneously, consideration must be made with respect to performance, large numbers of clients
+wanting shared access can incur a performance penalty and may increase system latency, specifically when the clients are granted access. Having an excessively high 
+number of clients with shared access should be avoided, consideration should be made to the call back configuration being used. See Callbacks and Scenario 1 below.
+
+Callbacks
+----------
+
+Careful consideration must be made as to which callback type is most appropriate for kds clients, direct callbacks are called immediately from the context in which the
+ownership of the resource is passed to the next waiter in the list.  Where as when using deferred callbacks the callback is deferred and called from outside the context
+that is relinquishing ownership, while this reduces the latency in the releasing clients context it does incur a cost as there is more latency between a resource 
+becoming free and the new client owning the resource callback being executed.  
+
+Obviously direct callbacks have a performance advantage, as the call back is immediate and does not have to wait for the kernel to context switch to schedule in the 
+execution of the callback.  
+
+However as the callback is immediate and within the context that is granting ownership it is important that the callback perform the MINIMUM amount of work necessary, 
+long call backs could cause poor system latency.  Special care and attention must be taken if the direct callbacks can be called from IRQ handlers, such as when 
+kds_resource_set_release is called from an IRQ handler, in this case you have to avoid any calls that may sleep.  
+
+Deferred contexts have the advantage that the call backs are deferred until they are scheduled by the kernel, therefore they are allowed to call functions that may sleep
+and if scheduled from IRQ context not incur as much system latency as would be seen with direct callbacks from within the IRQ.
+
+Once the clients callback has been called, the client is considered to be owning the resource.  Within the callback the client may only need to perform a small amount of work
+before the client need to give up owner ship.  The kds_resource_release function may be called from with in the call back, but consideration must be made when using direct 
+callbacks, with both respect to execution time and stack usage. Consider the example in Scenario 2 with direct callbacks:
+
+Scenario 1 - Shared client access - direct callbacks:
+
+Resources: X
+Clients: A(S), B(S), C(S), D(S), E(E)
+where: (S) = shared user, (E) = exclusive
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+}
+
+Where <client> is either A,B,C,D
+                                        Queue   |Owner 
+1.  E Requests X exclusive                      |
+2.  E Owns     X exclusive                      |E
+3.  A Requests X shared                        A|E
+4.  B Requests X shared                       BA|E
+5.  C Requests X shared                      CBA|E
+6.  D Requests X shared                     DCBA|E
+7.  E Releases X                                |DCBA
+8.  A Owns X shared                             |DCBA
+9.  B Owns X shared                             |DCBA
+10. C Owns X shared                             |DCBA
+11. D Owns X shared                             |DCBA
+
+At point 7 it is important to note that when E releases X; A,B,C and D become the new shared owners of X and the call back for each of the client(A,B,C,D) triggered, so consideration
+must be made as to whether a direct or deferred callback is suitable, using direct callbacks would result in the call graph.
+
+Call graph when E releases X:
+    kds_resource_set_release( .. )
+        +->client_A_cb( .. )
+        +->client_B_cb( .. )
+        +->client_C_cb( .. )
+        +->client_D_cb( .. )
+
+
+Scenario 2 - Immediate resource release - direct callbacks:
+
+Resource: X
+Clients: A, B, C, D
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+    kds_resource_set_release( .. );
+}
+
+Where <client> is either A,B,C,D
+
+1. A Owns     X exclusive
+2. B Requests X exclusive	(direct callback)
+3. C Requests X exclusive	(direct callback)
+4. D Requests X exclusive	(direct callback)
+5. A Releases X
+
+Call graph when A releases X:
+    kds_resource_set_release( .. )
+        +->client_B_cb( .. )
+            +->kds_resource_set_release( .. )
+                +->client_C_cb( .. )
+                    +->kds_resource_set_release( .. )
+                        +->client_D_cb( .. )
+
+As can be seen when a client releases the resource, with direct call backs it is possible to create nested calls
+
+IRQ Considerations
+-------------------
+
+Usage of kds_resource_release in IRQ handlers should be carefully considered.
+
+Things to keep in mind:
+
+1.) Are you using direct or deferred callbacks?
+2.) How many resources are you releasing?
+3.) How many shared waiters are pending on the resource?
+
+Releasing ownership and wait cancellation
+------------------------------------------
+
+Client Wait Cancellation
+-------------------------
+
+It may be necessary in certain circumstances for the client to cancel the wait for kds resources for error handling, process termination etc.  Cancellation is 
+performed using kds_resource_set_release or kds_resource_set_release_sync using the rset that was received from kds_async_waitall, kds_resource_set_release_sync 
+being used for waits which are using deferred callbacks.
+
+It is possible that while the request to cancel the wait is being issued by the client, the client is granted access to the resources.  Normally after the client
+has taken ownership and finishes with that resource, it will release ownership to signal other waiters which are pending, this causes a race with the cancellation.
+To prevent KDS trying to remove a wait twice from the internal list and accessing memory that is potentially freed, it is very important that all releasers use the
+same rset pointer.  Here is a simplified example of bad usage that must be avoided in any client implementation:
+
+Senario 3 - Bad release from multiple contexts:
+
+	This scenaro is highlighting bad usage of the kds API
+
+	kds_resource_set * rset;
+	kds_resource_set * rset_copy;
+
+	kds_async_waitall( &rset, ... ... ... );
+
+	/* Don't do this */
+	rset_copy = rset;
+
+Context A:
+	kds_resource_set_release( &rset )
+
+Context B:
+	kds_resource_set_release( &rset_copy )
+
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_lock/sconscript b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/sconscript
new file mode 100755
index 0000000..8212693
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/sconscript
@@ -0,0 +1,27 @@
+#
+# (C) COPYRIGHT 2012, 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+search_term = '^[\ ]*CONFIG_DMA_SHARED_BUFFER_USES_KDS[\ ]*=[\ ]*y'
+for line in open(linux_config_file, 'r'):
+	if re.search(search_term, line):
+		SConscript( 'src/sconscript' )
+		if env['tests'] and Glob('tests/sconscript'):
+			SConscript( 'tests/sconscript' )
+		break
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/Kbuild b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100755
index 0000000..68dad07
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/Makefile b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/Makefile
new file mode 100755
index 0000000..cf76132
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100755
index 0000000..9613ffc
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/dma-buf.h>
+#include <linux/kds.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	struct kds_resource **kds_resources;    /* List of KDS resources associated with buffers */
+	struct kds_resource_set *resource_set;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	wait_queue_head_t wait;
+	struct kds_callback cb;
+	struct kref refcount;
+	struct list_head link;
+	int count;
+} dma_buf_lock_resource;
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static void dma_buf_lock_kds_callback(void *param1, void *param2)
+{
+	dma_buf_lock_resource *resource = param1;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_kds_callback\n");
+#endif
+	atomic_set(&resource->locked, 1);
+
+	wake_up(&resource->wait);
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related KDS resources, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+	/* Allocate space to store kds_resources associated with dma_buf_fds */
+	size = sizeof(struct kds_resource *) * request->count;
+	resource->kds_resources = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->kds_resources)
+	{
+		kfree(resource->dma_bufs);
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource->kds_resources);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Get kds_resource associated with dma_buf */
+		resource->kds_resources[i] = get_dma_buf_kds_resource(resource->dma_bufs[i]);
+
+		if (NULL == resource->kds_resources[i])
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk("dma_buf_lock_dolock : dma_buf_fd %i dma_buf %X kds_resource %X\n", resource->list_of_dma_buf_fds[i],
+		       (unsigned int)resource->dma_bufs[i], (unsigned int)resource->kds_resources[i]);
+#endif	
+	}
+
+	kds_callback_init(&resource->cb, 1, dma_buf_lock_kds_callback);
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops, 
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = kds_async_waitall(&resource->resource_set,
+	                        &resource->cb, resource, NULL,
+	                        request->count,  &resource->exclusive,
+	                        resource->kds_resources);
+
+	if (IS_ERR_VALUE(ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	kds_callback_term(&resource->cb);
+
+	kds_resource_set_release(&resource->resource_set);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->kds_resources);
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	kfree(resource);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100755
index 0000000..e1b9348
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/sconscript b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/sconscript
new file mode 100755
index 0000000..b8724f1
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_lock/src/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')]
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] dma_buf_lock'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100755
index 0000000..56b9f86
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100755
index 0000000..974f0c2
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/Makefile b/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100755
index 0000000..06e3d5c
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100755
index 0000000..f4b6fd6
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,679 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+	dmabuf = vma->vm_private_data;
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return NULL;
+
+	return kmap(alloc->pages[page_num]);
+}
+static void dma_buf_te_kunmap(struct dma_buf *buf,
+		unsigned long page_num, void *addr)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return;
+
+	kunmap(alloc->pages[page_num]);
+	return;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+	.kmap = dma_buf_te_kmap,
+	.kunmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (alloc_req.size > SG_MAX_SINGLE_ALLOC) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %lu pages",
+				__func__, alloc_req.size, SG_MAX_SINGLE_ALLOC);
+		goto invalid_size;
+	}
+#endif
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+	{
+		struct dma_buf_export_info export_info = {
+			.exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+			.owner = THIS_MODULE,
+#endif
+			.ops = &dma_buf_te_ops,
+			.size = alloc->nr_pages << PAGE_SHIFT,
+			.flags = O_CLOEXEC | O_RDWR,
+			.priv = alloc,
+		};
+
+		dma_buf = dma_buf_export(&export_info);
+	}
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+	if (ret)
+		goto no_cpu_access;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				ret = -EPERM;
+				goto no_kmap;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		offset += sg_dma_len(sg);
+	}
+
+no_kmap:
+	dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/sconscript b/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
new file mode 100755
index 0000000..09fe7f3
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_test_exporter/*.c'), Glob('#kernel/include/linux/*.h'), Glob('#kernel/drivers/base/dma_buf_test_exporter/K*')]
+
+env.Append( CPPPATH = '#kernel/include' )
+
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[CLEAN] dma-buf-test-exporter'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', src, [])
+	env.KernelObjTarget('dma-buf-test-exporter', cmd)
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make && ( ( [ -f dma-buf-test-exporter.ko ] && cp dma-buf-test-exporter.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/dma-buf-test-exporter.ko)", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', src, [makeAction])
+	env.KernelObjTarget('dma-buf-test-exporter', cmd)
+
+	
diff --git a/midgard/r13p0/kernel/drivers/base/kds/Kbuild b/midgard/r13p0/kernel/drivers/base/kds/Kbuild
new file mode 100755
index 0000000..a88acd8
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/kds/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_KDS) += kds.o
+obj-$(CONFIG_KDS_TEST) += kds_test.o
diff --git a/midgard/r13p0/kernel/drivers/base/kds/Kconfig b/midgard/r13p0/kernel/drivers/base/kds/Kconfig
new file mode 100755
index 0000000..5f96165
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/kds/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config KDS
+	tristate "Kernel dependency system"
+	help
+	  This option enables the generic kernel dependency system
diff --git a/midgard/r13p0/kernel/drivers/base/kds/Makefile b/midgard/r13p0/kernel/drivers/base/kds/Makefile
new file mode 100755
index 0000000..364d151
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/kds/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+CONFIG_KDS_TEST ?= n
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: kds
+
+kds:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS=m
+
+kds_test:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS_TEST=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r13p0/kernel/drivers/base/kds/kds.c b/midgard/r13p0/kernel/drivers/base/kds/kds.c
new file mode 100755
index 0000000..62612d4
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/kds/kds.c
@@ -0,0 +1,559 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/kds.h>
+#include <linux/kref.h>
+
+#include <asm/atomic.h>
+
+#define KDS_LINK_TRIGGERED (1u << 0)
+#define KDS_LINK_EXCLUSIVE (1u << 1)
+
+#define KDS_INVALID (void *)-2
+#define KDS_RESOURCE (void *)-1
+
+struct kds_resource_set
+{
+	unsigned long         num_resources;
+	unsigned long         pending;
+	struct kds_callback  *cb;
+	void                 *callback_parameter;
+	void                 *callback_extra_parameter;
+	struct list_head      callback_link;
+	struct work_struct    callback_work;
+	atomic_t              cb_queued;
+	/* This resource set will be freed when there are no pending
+	 * callbacks */
+	struct kref           refcount;
+
+	/* This is only initted when kds_waitall() is called. */
+	wait_queue_head_t     wake;
+
+	struct kds_link       resources[0];
+
+};
+
+static DEFINE_SPINLOCK(kds_lock);
+
+static void __resource_set_release(struct kref *ref)
+{
+	struct kds_resource_set *rset = container_of(ref,
+			struct kds_resource_set, refcount);
+
+	kfree(rset);
+}
+
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb)
+{
+	int ret = 0;
+
+	cb->direct = direct;
+	cb->user_cb = user_cb;
+
+	if (!direct)
+	{
+		cb->wq = alloc_workqueue("kds", WQ_UNBOUND | WQ_HIGHPRI, WQ_UNBOUND_MAX_ACTIVE);
+		if (!cb->wq)
+			ret = -ENOMEM;
+	}
+	else
+	{
+		cb->wq = NULL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kds_callback_init);
+
+void kds_callback_term(struct kds_callback *cb)
+{
+	if (!cb->direct)
+	{
+		BUG_ON(!cb->wq);
+		destroy_workqueue(cb->wq);
+	}
+	else
+	{
+		BUG_ON(cb->wq);
+	}
+}
+
+EXPORT_SYMBOL(kds_callback_term);
+
+static void kds_do_user_callback(struct kds_resource_set *rset)
+{
+	rset->cb->user_cb(rset->callback_parameter, rset->callback_extra_parameter);
+}
+
+static void kds_queued_callback(struct work_struct *work)
+{
+	struct kds_resource_set *rset;
+	rset = container_of(work, struct kds_resource_set, callback_work);
+
+	atomic_dec(&rset->cb_queued);
+
+	kds_do_user_callback(rset);
+}
+
+static void kds_callback_perform(struct kds_resource_set *rset)
+{
+	if (rset->cb->direct)
+		kds_do_user_callback(rset);
+	else
+	{
+		int result;
+
+		atomic_inc(&rset->cb_queued);
+
+		result = queue_work(rset->cb->wq, &rset->callback_work);
+		/* if we got a 0 return it means we've triggered the same rset twice! */
+		WARN_ON(!result);
+	}
+}
+
+void kds_resource_init(struct kds_resource * const res)
+{
+	BUG_ON(!res);
+	INIT_LIST_HEAD(&res->waiters.link);
+	res->waiters.parent = KDS_RESOURCE;
+}
+EXPORT_SYMBOL(kds_resource_init);
+
+int kds_resource_term(struct kds_resource *res)
+{
+	unsigned long lflags;
+	BUG_ON(!res);
+	spin_lock_irqsave(&kds_lock, lflags);
+	if (!list_empty(&res->waiters.link))
+	{
+		spin_unlock_irqrestore(&kds_lock, lflags);
+		printk(KERN_ERR "ERROR: KDS resource is still in use\n");
+		return -EBUSY;
+	}
+	res->waiters.parent = KDS_INVALID;
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	return 0;
+}
+EXPORT_SYMBOL(kds_resource_term);
+
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list)
+{
+	struct kds_resource_set *rset = NULL;
+	unsigned long lflags;
+	int i;
+	int triggered;
+	int err = -EFAULT;
+
+	BUG_ON(!pprset);
+	BUG_ON(!resource_list);
+	BUG_ON(!cb);
+
+	WARN_ONCE(number_resources > 10, "Waiting on a high numbers of resources may increase latency, see documentation.");
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+	{
+		return -ENOMEM;
+	}
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	rset->cb = cb;
+	rset->callback_parameter = callback_parameter;
+	rset->callback_extra_parameter = callback_extra_parameter;
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+	}
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		/* no-one else waiting? */
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	triggered = (rset->pending == 0);
+
+	/* set the pointer before the callback is called so it sees it */
+	*pprset = rset;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (triggered)
+	{
+		/* all resources obtained, trigger callback */
+		kds_callback_perform(rset);
+	}
+
+	return 0;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+	err = -EINVAL;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return err;
+}
+EXPORT_SYMBOL(kds_async_waitall);
+
+static void wake_up_sync_call(void *callback_parameter, void *callback_extra_parameter)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *)callback_parameter;
+	wake_up(wait);
+}
+
+static struct kds_callback sync_cb =
+{
+	wake_up_sync_call,
+	1,
+	NULL,
+};
+
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jiffies_timeout)
+{
+	struct kds_resource_set *rset;
+	unsigned long lflags;
+	int i;
+	int triggered = 0;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+		return rset;
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	init_waitqueue_head(&rset->wake);
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	if (rset->pending == 0)
+		triggered = 1;
+	else
+	{
+		rset->cb = &sync_cb;
+		rset->callback_parameter = &rset->wake;
+		rset->callback_extra_parameter = NULL;
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (!triggered)
+	{
+		long wait_res = 0;
+		long timeout = (jiffies_timeout == KDS_WAIT_BLOCKING) ?
+				MAX_SCHEDULE_TIMEOUT : jiffies_timeout;
+
+		if (timeout)
+		{
+			wait_res = wait_event_interruptible_timeout(rset->wake,
+					rset->pending == 0, timeout);
+		}
+
+		if ((wait_res == -ERESTARTSYS) || (wait_res == 0))
+		{
+			/* use \a kds_resource_set_release to roll back */
+			kds_resource_set_release(&rset);
+			return ERR_PTR(wait_res);
+		}
+	}
+	return rset;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL(kds_waitall);
+
+static void trigger_new_rset_owner(struct kds_resource_set *rset,
+		struct list_head *triggered)
+{
+	if (0 == --rset->pending) {
+		/* new owner now triggered, track for callback later */
+		kref_get(&rset->refcount);
+		list_add(&rset->callback_link, triggered);
+	}
+}
+
+static void __kds_resource_set_release_common(struct kds_resource_set *rset)
+{
+	struct list_head triggered = LIST_HEAD_INIT(triggered);
+	struct kds_resource_set *it;
+	unsigned long lflags;
+	int i;
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < rset->num_resources; i++)
+	{
+		struct kds_resource *resource;
+		struct kds_link *it = NULL;
+
+		/* fetch the previous entry on the linked list */
+		it = list_entry(rset->resources[i].link.prev, struct kds_link, link);
+		/* unlink ourself */
+		list_del(&rset->resources[i].link);
+
+		/* any waiters? */
+		if (list_empty(&it->link))
+			continue;
+
+		/* were we the head of the list? (head if prev is a resource) */
+		if (it->parent != KDS_RESOURCE)
+		{
+			if ((it->state & KDS_LINK_TRIGGERED) && !(it->state & KDS_LINK_EXCLUSIVE))
+			{
+				/*
+				 * previous was triggered and not exclusive, so we
+				 * trigger non-exclusive until end-of-list or first
+				 * exclusive
+				 */
+
+				struct kds_link *it_waiting = it;
+
+				list_for_each_entry(it, &it_waiting->link, link)
+				{
+					/* exclusive found, stop triggering */
+					if (it->state & KDS_LINK_EXCLUSIVE)
+						break;
+
+					it->state |= KDS_LINK_TRIGGERED;
+					/* a parent to update? */
+					if (it->parent != KDS_RESOURCE)
+						trigger_new_rset_owner(
+								it->parent,
+								&triggered);
+				}
+			}
+			continue;
+		}
+
+		/* we were the head, find the kds_resource */
+		resource = container_of(it, struct kds_resource, waiters);
+
+		/* we know there is someone waiting from the any-waiters test above */
+
+		/* find the head of the waiting list */
+		it = list_first_entry(&resource->waiters.link, struct kds_link, link);
+
+		/* new exclusive owner? */
+		if (it->state & KDS_LINK_EXCLUSIVE)
+		{
+			/* link now triggered */
+			it->state |= KDS_LINK_TRIGGERED;
+			/* a parent to update? */
+			trigger_new_rset_owner(it->parent, &triggered);
+		}
+		/* exclusive releasing ? */
+		else if (rset->resources[i].state & KDS_LINK_EXCLUSIVE)
+		{
+			/* trigger non-exclusive until end-of-list or first exclusive */
+			list_for_each_entry(it, &resource->waiters.link, link)
+			{
+				/* exclusive found, stop triggering */
+				if (it->state & KDS_LINK_EXCLUSIVE)
+					break;
+
+				it->state |= KDS_LINK_TRIGGERED;
+				/* a parent to update? */
+				trigger_new_rset_owner(it->parent, &triggered);
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	while (!list_empty(&triggered))
+	{
+		it = list_first_entry(&triggered, struct kds_resource_set, callback_link);
+		list_del(&it->callback_link);
+		kds_callback_perform(it);
+
+		/* Free the resource set if no callbacks pending */
+		kref_put(&it->refcount, &__resource_set_release);
+	}
+}
+
+void kds_resource_set_release(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+	int queued;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * Caller is responsible for guaranteeing that callback work is not
+	 * pending (i.e. its running or completed) prior to calling release.
+	 */
+	queued = atomic_read(&rset->cb_queued);
+	BUG_ON(queued);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release);
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * In the case of a kds async wait cancellation ensure the deferred
+	 * call back does not get scheduled if a trigger fired at the same time
+	 * to release the wait.
+	 */
+	cancel_work_sync(&rset->callback_work);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release_sync);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r13p0/kernel/drivers/base/kds/sconscript b/midgard/r13p0/kernel/drivers/base/kds/sconscript
new file mode 100755
index 0000000..91a79fd
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/kds/sconscript
@@ -0,0 +1,63 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+Import('env')
+
+#Android uses sync_pt to accomplish KDS functionality.
+#Midgard KDS is not used by Android
+if env['os'] == 'android':
+	Return()
+
+# If KDS is built into the kernel already we skip building the module ourselves
+linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y'
+build_kds = 1
+for line in open(linux_config_file, 'r'):
+	if re.search(search_term, line):
+		# KDS in kernel. Do not build KDS kernel module but
+		# still allow for building kds_test module.
+		build_kds = 0
+
+src = [Glob('#kernel/drivers/base/kds/*.c'), Glob('#kernel/include/linux/*.h'), Glob('#kernel/drivers/base/kds/K*')]
+
+env.Append( CPPPATH = '#kernel/include' )
+
+if Glob('tests/sconscript'):
+	SConscript( 'tests/sconscript' )
+
+if env.GetOption('clean') :
+	# Clean KDS module
+	if build_kds or (int(env['unit']) == 1):
+		env.Execute(Action("make clean", '[CLEAN] kds'))
+		cmd = env.Command('$STATIC_LIB_PATH/kds.ko', src, [])
+		env.KernelObjTarget('kds', cmd)
+else:
+	# Build KDS module
+	if build_kds:
+		makeAction=Action("cd ${SOURCE.dir} && make kds && cp kds.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+		cmd = env.Command('$STATIC_LIB_PATH/kds.ko', src, [makeAction])
+		env.KernelObjTarget('kds', cmd)
+
+	# Build KDS test module
+	if int(env['unit']) == 1:
+		makeActionTest=Action("cd ${SOURCE.dir} && make kds_test && cp kds_test.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+		cmdTest = env.Command('$STATIC_LIB_PATH/kds_test.ko', src, [makeActionTest])
+		env.KernelObjTarget('kds', cmdTest)
+		if build_kds:
+			env.Depends(cmdTest, cmd)
+
diff --git a/midgard/r13p0/kernel/drivers/base/sconscript b/midgard/r13p0/kernel/drivers/base/sconscript
new file mode 100755
index 0000000..84de8c4
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/sconscript
@@ -0,0 +1,36 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import( 'env' )
+
+if Glob('bus_logger/sconscript'):
+	if env['buslog'] == '1':
+		SConscript('bus_logger/sconscript')
+
+if Glob('mali_fpga_sysctl/sconscript'):
+	SConscript('mali_fpga_sysctl/sconscript')
+
+if Glob('dma_buf_lock/sconscript'):
+	SConscript('dma_buf_lock/sconscript')
+
+if Glob('kds/sconscript'):
+	SConscript('kds/sconscript')
+
+if Glob('ump/sconscript'):
+	SConscript('ump/sconscript')
+
+if Glob('dma_buf_test_exporter/sconscript'):
+	SConscript('dma_buf_test_exporter/sconscript')
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/Kbuild b/midgard/r13p0/kernel/drivers/base/ump/Kbuild
new file mode 100755
index 0000000..2bbdba2
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += src/
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/Kconfig b/midgard/r13p0/kernel/drivers/base/ump/Kconfig
new file mode 100755
index 0000000..f7451e6
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config UMP
+	tristate "Enable Unified Memory Provider (UMP) support"
+	default n
+    help
+	  Enable this option to build support for the ARM UMP module.
+	  UMP can be used by the Mali T6xx module to improve performance
+	  by reducing the copying of data by sharing memory.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate one module, called ump.
diff --git a/midgard/r13p0/kernel/drivers/base/ump/docs/Doxyfile b/midgard/r13p0/kernel/drivers/base/ump/docs/Doxyfile
new file mode 100755
index 0000000..fbec8eb
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/docs/Doxyfile
@@ -0,0 +1,125 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/include/linux/ump-common.h ../../kernel/include/linux/ump.h
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           += ../../kernel/drivers/base/ump
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           +=
diff --git a/midgard/r13p0/kernel/drivers/base/ump/docs/sconscript b/midgard/r13p0/kernel/drivers/base/ump/docs/sconscript
new file mode 100755
index 0000000..521278d
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/docs/sconscript
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+doxygen_sources = [
+	'Doxyfile',
+	Glob('*.png'),
+	Glob('../*.h'),
+	Glob('../src/*.h') ]
+
+if env['doc'] == '1':
+        doxygen_target = env.Command('doxygen/html/index.html', doxygen_sources,
+                                     ['cd ${SOURCE.dir} && doxygen'])
+        env.Clean(doxygen_target, './doxygen')
+
+        Alias('doxygen', doxygen_target)
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/example_kernel_api.c b/midgard/r13p0/kernel/drivers/base/ump/example_kernel_api.c
new file mode 100755
index 0000000..858dd8b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/example_kernel_api.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Example routine to display information about an UMP allocation
+ * The routine takes an secure_id which can come from a different kernel module
+ * or from a client application (i.e. an ioctl).
+ * It creates a ump handle from the secure id (which validates the secure id)
+ * and if successful dumps the physical memory information.
+ * It follows the API and pins the memory while "using" the physical memory.
+ * Finally it calls the release function to indicate it's finished with the handle.
+ *
+ * If the function can't look up the handle it fails with return value -1.
+ * If the testy succeeds then it return 0.
+ * */
+
+static int display_ump_memory_information(ump_secure_id secure_id)
+{
+	const ump_dd_physical_block_64 * ump_blocks = NULL;
+	ump_dd_handle ump_mem;
+	uint64_t nr_blocks;
+	int i;
+	ump_alloc_flags flags;
+
+	/* get a handle from the secure id */
+	ump_mem = ump_dd_from_secure_id(secure_id);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE == ump_mem)
+	{
+		/* invalid ID received */
+		return -1;
+	}
+
+	/* at this point we know we've added a reference to the ump allocation, so we must release it with ump_dd_release */
+
+	ump_dd_phys_blocks_get_64(ump_mem, &nr_blocks, &ump_blocks);
+	flags = ump_dd_allocation_flags_get(ump_mem);
+
+	printf("UMP allocation with secure ID %u consists of %zd physical block(s):\n", secure_id, nr_blocks);
+
+	for(i=0; i<nr_blocks; ++i)
+	{
+		printf("\tBlock %d: 0x%08zX size 0x%08zX\n", i, ump_blocks[i].addr, ump_blocks[i].size);
+	}
+
+	printf("and was allocated using the following bitflag combo:  0x%lX\n", flags);
+
+	ump_dd_release(ump_mem);
+
+	return 0;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/example_user_api.c b/midgard/r13p0/kernel/drivers/base/ump/example_user_api.c
new file mode 100755
index 0000000..d143a64
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/example_user_api.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <ump/ump.h>
+#include <memory.h>
+#include <stdio.h>
+
+/*
+ * Example routine to exercise the user space UMP api.
+ * This routine initializes the UMP api and allocates some CPU+device X memory.
+ * No usage hints are given, so the driver will use the default cacheability policy.
+ * With the allocation it creates a duplicate handle and plays with the reference count.
+ * Then it simulates interacting with a device and contains pseudo code for the device.
+ *
+ * If any error is detected correct cleanup will be performed and -1 will be returned.
+ * If successful then 0 will be returned.
+ */
+
+static int test_ump_user_api(void)
+{
+	/* This is the size we try to allocate*/
+	const size_t alloc_size = 4096;
+
+	ump_handle h = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_copy = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+	void * mapping = NULL;
+
+	ump_result ump_api_res;
+	int result = -1;
+
+	ump_secure_id id;
+
+	size_t size_returned;
+
+	ump_api_res = ump_open();
+	if (UMP_OK != ump_api_res)
+	{
+		/* failed to open an ump session */
+		/* early out */
+		return -1;
+	}
+
+	h = ump_allocate_64(alloc_size, UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | UMP_PROT_X_RD | UMP_PROT_X_WR);
+	/* the refcount is now 1 */
+	if (UMP_INVALID_MEMORY_HANDLE == h)
+	{
+		/* allocation failure */
+		goto cleanup;
+	}
+
+	/* this is how we could share this allocation with another process */
+
+	/* in process A: */
+	id = ump_secure_id_get(h);
+	/* still ref count 1 */
+	/* send the id to process B */
+
+	/* in process B: */
+	/* receive the id from A */
+	h_clone = ump_from_secure_id(id);
+	/* the ref count of the allocation is now 2 (one from each handle to it) */
+	/* do something ... */
+	/* release our clone */
+	ump_release(h_clone); /* safe to call even if ump_from_secure_id failed */
+	h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+
+	/* a simple save-for-future-use logic inside the driver would just copy the handle (but add a ref manually too!) */
+	/*
+	 * void assign_memory_to_job(h)
+	 * {
+	  */
+	h_copy = h;
+	ump_retain(h_copy); /* manual retain needed as we just assigned the handle, now 2 */
+	/*
+	 * }
+	 *
+	 * void job_completed(void)
+	 * {
+	 */
+	 ump_release(h_copy); /* normal handle release as if we got via an ump_allocate */
+	 h_copy = UMP_INVALID_MEMORY_HANDLE;
+	 /*
+	 * }
+	 */
+	
+	/* we're now back at ref count 1, and only h is a valid handle */
+	/* enough handle duplication show-off, let's play with the contents instead */
+
+	mapping = ump_map(h, 0, alloc_size);
+	if (NULL == mapping)
+	{
+		/* mapping failure, either out of address space or some other error */
+		goto cleanup;
+	}
+
+	memset(mapping, 0, alloc_size);
+
+	/* let's pretend we're going to start some hw device on this buffer and read the result afterwards */
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN, mapping, alloc_size);
+ 	/*
+		device cache invalidate
+
+		memory barrier
+
+		start device
+
+		memory barrier
+
+		wait for device
+
+		memory barrier
+
+		device cache clean
+
+		memory barrier
+	*/
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN_AND_INVALIDATE, mapping, alloc_size);
+
+	/* we could now peek at the result produced by the hw device, which is now accessible via our mapping */
+
+	/* unmap the buffer when we're done with it */
+	ump_unmap(h, mapping, alloc_size);
+
+	result = 0;
+
+cleanup:
+	ump_release(h);
+	h = UMP_INVALID_MEMORY_HANDLE;
+
+	ump_close();
+
+	return result;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/sconscript b/midgard/r13p0/kernel/drivers/base/ump/sconscript
new file mode 100755
index 0000000..b4aa8b6
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if Glob('src/sconscript') and int(env['ump']) == 1:
+	SConscript( 'src/sconscript' )
+	SConscript( 'docs/sconscript' )
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/Kbuild b/midgard/r13p0/kernel/drivers/base/ump/src/Kbuild
new file mode 100755
index 0000000..de6d307
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/Kbuild
@@ -0,0 +1,50 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Paths required for build
+UMP_PATH = $(src)/../..
+UMP_DEVICEDRV_PATH = $(src)/.
+
+# Set up defaults if not defined by the user
+MALI_UNIT_TEST ?= 0
+
+SRC :=\
+	common/ump_kernel_core.c \
+	common/ump_kernel_descriptor_mapping.c \
+	linux/ump_kernel_linux.c \
+	linux/ump_kernel_linux_mem.c
+
+UNIT_TEST_DEFINES=
+ifeq ($(MALI_UNIT_TEST), 1)
+	MALI_DEBUG ?= 1
+
+	UNIT_TEST_DEFINES = -DMALI_UNIT_TEST=1 \
+	                    -DMALI_DEBUG=$(MALI_DEBUG)
+endif
+
+# Use our defines when compiling
+ccflags-y += -I$(UMP_PATH) -I$(UMP_DEVICEDRV_PATH) $(UNIT_TEST_DEFINES)
+
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_UMP) += ump.o
+ifeq ($(CONFIG_ION),y)
+ccflags-y += -I$(srctree)/drivers/staging/android/ion -I$(srctree)/include/linux
+obj-$(CONFIG_UMP) += imports/ion/ump_kernel_import_ion.o
+endif
+
+# Tell the Linux build system to enable building of our .c files
+ump-y := $(SRC:.c=.o)
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/Makefile b/midgard/r13p0/kernel/drivers/base/ump/src/Makefile
new file mode 100755
index 0000000..45428ad
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/Makefile
@@ -0,0 +1,81 @@
+#
+# (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifneq ($(KBUILD_EXTMOD),)
+include $(KBUILD_EXTMOD)/Makefile.common
+else
+include ./Makefile.common
+endif
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+RELATIVE_ROOT=../../../../..
+ROOT = $(CURDIR)/$(RELATIVE_ROOT)
+
+EXTRA_CFLAGS=-I$(CURDIR)/../../../../include
+
+ifeq ($(MALI_UNIT_TEST),1)
+	EXTRA_CFLAGS += -DMALI_UNIT_TEST=$(MALI_UNIT_TEST)
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+CONFIG ?= $(ARCH)
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+# Validate selected config
+ifneq ($(shell [ -d arch-$(CONFIG) ] && [ -f arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(shell pwd))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L arch ] && rm arch)
+$(shell ln -sf arch-$(CONFIG) arch)
+$(shell touch arch/config.h)
+endif
+
+EXTRA_SYMBOLS=
+
+ifeq ($(MALI_UNIT_TEST),1)
+	KBASE_PATH=$(ROOT)/kernel/drivers/gpu/arm/midgard
+	EXTRA_SYMBOLS+=$(KBASE_PATH)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+KDS_PATH=$(ROOT)/kernel/drivers/base/kds
+EXTRA_SYMBOLS+=$(KDS_PATH)/Module.symvers
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="$(EXTRA_CFLAGS) $(SCONS_CFLAGS)" CONFIG_UMP=m KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/Makefile.common b/midgard/r13p0/kernel/drivers/base/ump/src/Makefile.common
new file mode 100755
index 0000000..f29a4c1
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/Makefile.common
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2008-2010, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+SRC = $(UMP_FILE_PREFIX)/common/ump_kernel_core.c \
+      $(UMP_FILE_PREFIX)/common/ump_kernel_descriptor_mapping.c
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/arch-arm/config.h b/midgard/r13p0/kernel/drivers/base/ump/src/arch-arm/config.h
new file mode 100755
index 0000000..152d98f
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/arch-arm/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/arch-arm64/config.h b/midgard/r13p0/kernel/drivers/base/ump/src/arch-arm64/config.h
new file mode 100755
index 0000000..cfb14ca
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/arch-arm64/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c b/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
new file mode 100755
index 0000000..07aa077
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
@@ -0,0 +1,756 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* module headers */
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+/* local headers */
+#include <common/ump_kernel_core.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#define UMP_FLAGS_RANGE ((UMP_PROT_SHAREABLE<<1) - 1u)
+
+static umpp_device device;
+
+ump_result umpp_core_constructor(void)
+{
+	mutex_init(&device.secure_id_map_lock);
+	device.secure_id_map = umpp_descriptor_mapping_create(UMP_EXPECTED_IDS, UMP_MAX_IDS);
+	if (NULL != device.secure_id_map)
+	{
+		if (UMP_OK == umpp_device_initialize())
+		{
+			return UMP_OK;
+		}
+		umpp_descriptor_mapping_destroy(device.secure_id_map);
+	}
+	mutex_destroy(&device.secure_id_map_lock);
+
+	return UMP_ERROR;
+}
+
+void umpp_core_destructor(void)
+{
+	umpp_device_terminate();
+	umpp_descriptor_mapping_destroy(device.secure_id_map);
+	mutex_destroy(&device.secure_id_map_lock);
+}
+
+umpp_session *umpp_core_session_start(void)
+{
+	umpp_session * session;
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (NULL != session)
+	{
+		mutex_init(&session->session_lock);
+
+		INIT_LIST_HEAD(&session->memory_usage);
+
+		/* try to create import client session, not a failure if they fail to initialize */
+		umpp_import_handlers_init(session);
+	}
+
+	return session;
+}
+
+void umpp_core_session_end(umpp_session *session)
+{
+	umpp_session_memory_usage * usage, *_usage;
+	UMP_ASSERT(session);
+
+	list_for_each_entry_safe(usage, _usage, &session->memory_usage, link)
+	{
+		printk(KERN_WARNING "UMP: Memory usage cleanup, releasing secure ID %d\n", ump_dd_secure_id_get(usage->mem));
+		ump_dd_release(usage->mem);
+		kfree(usage);
+
+	}
+
+	/* we should now not hold any imported memory objects,
+	 * detatch all import handlers */
+	umpp_import_handlers_term(session);
+
+	mutex_destroy(&session->session_lock);
+	kfree(session);
+}
+
+ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	umpp_allocation * alloc;
+	int i;
+
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD ) ) == 0 ||
+	    ( flags & (UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read and one write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL | __GFP_HARDWALL);
+
+	if (NULL == alloc)
+		goto out1;
+
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+	alloc->size = size;
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	if (0 != umpp_phys_commit(alloc))
+	{
+		goto out2;
+	}
+
+	/* all set up, allocate an ID for it */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	return alloc;
+
+out3:
+	umpp_phys_free(alloc);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+uint64_t ump_dd_size_get_64(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->size;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_size_get(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->size <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->size;
+}
+
+ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->id;
+}
+
+#ifdef CONFIG_KDS
+struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return &alloc->kds_res;
+}
+#endif
+
+ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	return alloc->flags;
+}
+
+ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id)
+{
+	umpp_allocation * alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+
+	mutex_lock(&device.secure_id_map_lock);
+
+	if (0 == umpp_descriptor_mapping_lookup(device.secure_id_map, secure_id, (void**)&alloc))
+	{
+		if (NULL != alloc->filter_func)
+		{
+			if (!alloc->filter_func(secure_id, alloc, alloc->callback_data))
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /* the filter denied access */
+			}
+		}
+
+		/* check permission to access it */
+		if ((UMP_DD_INVALID_MEMORY_HANDLE != alloc) && !(alloc->flags & UMP_PROT_SHAREABLE))
+		{
+			if (alloc->owner != get_current()->pid)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /*no rights for the current process*/
+			}
+		}
+
+		if (UMP_DD_INVALID_MEMORY_HANDLE != alloc)
+		{
+			if( ump_dd_retain(alloc) != UMP_DD_SUCCESS)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+			}
+		}
+	}
+	mutex_unlock(&device.secure_id_map_lock);
+
+	return alloc;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	return ump_dd_from_secure_id(secure_id);
+}
+
+int ump_dd_retain(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* check for overflow */
+	while(1)
+	{
+		int refcnt = atomic_read(&alloc->refcount);
+		if (refcnt + 1 > 0)
+		{
+			if(atomic_cmpxchg(&alloc->refcount, refcnt, refcnt + 1) == refcnt)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			return -EBUSY;
+		}
+	}
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_add(ump_dd_handle mem)
+{
+	ump_dd_retain(mem);
+}
+
+
+void ump_dd_release(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+	uint32_t new_cnt;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* secure the id for lookup while releasing */
+	mutex_lock(&device.secure_id_map_lock);
+
+	/* do the actual release */
+	new_cnt = atomic_sub_return(1, &alloc->refcount);
+	if (0 == new_cnt)
+	{
+		/* remove from the table as this was the last ref */
+		umpp_descriptor_mapping_remove(device.secure_id_map, alloc->id);
+	}
+
+	/* release the lock as early as possible */
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if (0 != new_cnt)
+	{
+		/* exit if still have refs */
+		return;
+	}
+
+	UMP_ASSERT(list_empty(&alloc->map_list));
+
+#ifdef CONFIG_KDS
+	if (kds_resource_term(&alloc->kds_res))
+	{
+		printk(KERN_ERR "ump_dd_release: kds_resource_term failed,"
+				"unable to release UMP allocation\n");
+		return;
+	}
+#endif
+	/* cleanup */
+	if (NULL != alloc->final_release_func)
+	{
+		alloc->final_release_func(alloc, alloc->callback_data);
+	}
+
+	if (0 == (alloc->management_flags & UMP_MGMT_EXTERNAL))
+	{
+		umpp_phys_free(alloc);
+	}
+	else
+	{
+		kfree(alloc->block_array);
+	}
+
+	mutex_destroy(&alloc->map_list_lock);
+
+	kfree(alloc);
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_release(ump_dd_handle mem)
+{
+	ump_dd_release(mem);
+}
+
+void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(pCount);
+	UMP_ASSERT(pArray);
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+	*pCount = alloc->blocksCount;
+	*pArray = alloc->block_array;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks)
+{
+	const umpp_allocation * alloc;
+	unsigned long i;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	if((uint64_t)num_blocks != alloc->blocksCount)
+	{
+		return UMP_DD_INVALID;
+	}
+
+	for( i = 0; i < num_blocks; i++)
+	{
+		UMP_ASSERT(alloc->block_array[i].addr <= UMP_UINT32_MAX);
+		UMP_ASSERT(alloc->block_array[i].size <= UMP_UINT32_MAX);
+
+		blocks[i].addr = (unsigned long)alloc->block_array[i].addr;
+		blocks[i].size = (unsigned long)alloc->block_array[i].size;
+	}
+
+	return UMP_DD_SUCCESS;
+}
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(block);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	UMP_ASSERT(alloc->block_array[index].addr <= UMP_UINT32_MAX);
+	UMP_ASSERT(alloc->block_array[index].size <= UMP_UINT32_MAX);
+
+	block->addr = (unsigned long)alloc->block_array[index].addr;
+	block->size = (unsigned long)alloc->block_array[index].size;
+
+	return UMP_DD_SUCCESS;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->blocksCount <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->blocksCount;
+}
+
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void *uaddr, size_t size)
+{
+	umpp_cpu_mapping *map;
+
+	void *target_first = uaddr;
+	void *target_last = (void*)((uintptr_t)uaddr - 1 + size);
+
+	if (target_last < target_first) /* wrapped */
+	{
+		return NULL;
+	}
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if ( map->vaddr_start <= target_first &&
+		   (void*)((uintptr_t)map->vaddr_start + (map->nr_pages << PAGE_SHIFT) - 1) >= target_last)
+		{
+			goto out;
+		}
+	}
+	map = NULL;
+out:
+	mutex_unlock(&alloc->map_list_lock);
+
+	return map;
+}
+
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map)
+{
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(map);
+	mutex_lock(&alloc->map_list_lock);
+	list_add(&map->link, &alloc->map_list);
+	mutex_unlock(&alloc->map_list_lock);
+}
+
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target)
+{
+	umpp_cpu_mapping * map;
+
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(target);
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if (map == target)
+		{
+			list_del(&target->link);
+			kfree(target);
+			mutex_unlock(&alloc->map_list_lock);
+			return;
+		}
+	}
+
+	/* not found, error */
+	UMP_ASSERT(0);
+}
+
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const  block_index, uint64_t * const block_internal_offset)
+{
+	uint64_t i;
+
+	for (i = 0 ; i < alloc->blocksCount; i++)
+	{
+		if (offset < alloc->block_array[i].size)
+		{
+			/* found the block_array element containing this offset */
+			*block_index = i;
+			*block_internal_offset = offset;
+			return 0;
+		}
+		offset -= alloc->block_array[i].size;
+	}
+
+	return -ENXIO;
+}
+
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size)
+{
+	umpp_allocation * alloc;
+	void *vaddr;
+	umpp_cpu_mapping * mapping;
+	uint64_t virt_page_off; /* offset of given address from beginning of the virtual mapping */
+	uint64_t phys_page_off; /* offset of the virtual mapping from the beginning of the physical buffer */
+	uint64_t page_count; /* number of pages to sync */
+	uint64_t i;
+	uint64_t block_idx;
+	uint64_t block_offset;
+	uint64_t paddr;
+
+	UMP_ASSERT((UMP_MSYNC_CLEAN == op) || (UMP_MSYNC_CLEAN_AND_INVALIDATE == op));
+
+	alloc = (umpp_allocation*)mem;
+	vaddr = (void*)(uintptr_t)address;
+
+	if((alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+	{
+		/* mapping is not cached */
+		return;
+	}
+
+	mapping = umpp_dd_find_enclosing_mapping(alloc, vaddr, size);
+	if (NULL == mapping)
+	{
+		printk(KERN_WARNING "UMP: Illegal cache sync address %lx\n", (uintptr_t)vaddr);
+		return; /* invalid pointer or size causes out-of-bounds */
+	}
+
+	/* we already know that address + size don't wrap around as umpp_dd_find_enclosing_mapping didn't fail */
+	page_count = ((((((uintptr_t)address + size - 1) & PAGE_MASK) - ((uintptr_t)address & PAGE_MASK))) >> PAGE_SHIFT) + 1;
+	virt_page_off = (vaddr - mapping->vaddr_start) >> PAGE_SHIFT;
+	phys_page_off = mapping->page_off;
+
+	if (umpp_dd_find_start_block(alloc, (virt_page_off + phys_page_off) << PAGE_SHIFT, &block_idx, &block_offset))
+	{
+		/* should not fail as a valid mapping was found, so the phys mem must exists */
+		printk(KERN_WARNING "UMP: Unable to find physical start block with offset %llx\n", virt_page_off + phys_page_off);
+		UMP_ASSERT(0);
+		return;
+	}
+
+	paddr = alloc->block_array[block_idx].addr + block_offset + (((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1));
+
+	for (i = 0; i < page_count; i++)
+	{
+		size_t offset = ((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1);
+		size_t sz = min((size_t)PAGE_SIZE - offset, size);
+
+		/* check if we've overrrun the current block, if so move to the next block */
+		if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+		{
+			block_idx++;
+			UMP_ASSERT(block_idx < alloc->blocksCount);
+			paddr = alloc->block_array[block_idx].addr;
+		}
+
+		if (UMP_MSYNC_CLEAN == op)
+		{
+			ump_sync_to_memory(paddr, vaddr, sz);
+		}
+		else /* (UMP_MSYNC_CLEAN_AND_INVALIDATE == op) already validated on entry */
+		{
+			ump_sync_to_cpu(paddr, vaddr, sz);
+		}
+
+		/* advance to next page  */
+		vaddr = (void*)((uintptr_t)vaddr + sz);
+		size -= sz;
+		paddr += sz;
+	}
+}
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	uint64_t size = 0;
+	uint64_t i;
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		size += blocks[i].size;
+	}
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD
+	    | UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read or write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc),__GFP_HARDWALL | GFP_KERNEL);
+
+	if (NULL == alloc)
+	{
+		goto out1;
+	}
+
+	alloc->block_array = kzalloc(sizeof(ump_dd_physical_block_64) * num_blocks,__GFP_HARDWALL | GFP_KERNEL);
+	if (NULL == alloc->block_array)
+	{
+		goto out2;
+	}
+
+	memcpy(alloc->block_array, blocks, sizeof(ump_dd_physical_block_64) * num_blocks);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+	alloc->size = size;
+	alloc->blocksCount = num_blocks;
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	/* all set up, allocate an ID */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	alloc->management_flags |= UMP_MGMT_EXTERNAL;
+
+	return alloc;
+
+out3:
+	kfree(alloc->block_array);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+
+/*
+ * UMP v1 API
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks)
+{
+	ump_dd_handle mem;
+	ump_dd_physical_block_64 *block_64_array;
+	ump_alloc_flags flags = UMP_V1_API_DEFAULT_ALLOCATION_FLAGS;
+	unsigned long i;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	block_64_array = kzalloc(num_blocks * sizeof(*block_64_array), __GFP_HARDWALL | GFP_KERNEL);
+
+	if(block_64_array == NULL)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/* copy physical blocks */
+	for( i = 0; i < num_blocks; i++)
+	{
+		block_64_array[i].addr = blocks[i].addr;
+		block_64_array[i].size = blocks[i].size;
+	}
+
+	mem = ump_dd_create_from_phys_blocks_64(block_64_array, num_blocks, flags, NULL, NULL, NULL);
+
+	kfree(block_64_array);
+
+	return mem;
+
+}
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h b/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
new file mode 100755
index 0000000..8cb424d
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
@@ -0,0 +1,228 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_CORE_H_
+#define _UMP_KERNEL_CORE_H_
+
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/cred.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+#include <linux/ump-common.h>
+#include <ump/src/common/ump_kernel_descriptor_mapping.h>
+
+/* forward decl */
+struct umpp_session;
+
+/**
+ * UMP handle metadata.
+ * Tracks various data about a handle not of any use to user space
+ */
+typedef enum
+{
+	UMP_MGMT_EXTERNAL = (1ul << 0) /**< Handle created via the ump_dd_create_from_phys_blocks interface */
+	/* (1ul << 31) not to be used */
+} umpp_management_flags;
+
+/**
+ * Structure tracking the single global UMP device.
+ * Holds global data like the ID map
+ */
+typedef struct umpp_device
+{
+	struct mutex secure_id_map_lock; /**< Lock protecting access to the map */
+	umpp_descriptor_mapping * secure_id_map; /**< Map of all known secure IDs on the system */
+} umpp_device;
+
+/**
+ * Structure tracking all memory allocations of a UMP allocation.
+ * Tracks info about an mapping so we can verify cache maintenace
+ * operations and help in the unmap cleanup.
+ */
+typedef struct umpp_cpu_mapping
+{
+	struct list_head        link; /**< link to list of mappings for an allocation */
+	void                  *vaddr_start; /**< CPU VA start of the mapping */
+	size_t                nr_pages; /**< Size (in pages) of the mapping */
+	uint64_t              page_off; /**< Offset (in pages) from start of the allocation where the mapping starts */
+	ump_dd_handle         handle; /**< Which handle this mapping is linked to */
+	struct umpp_session * session; /**< Which session created the mapping */
+} umpp_cpu_mapping;
+
+/**
+ * Structure tracking UMP allocation.
+ * Represent a memory allocation with its ID.
+ * Tracks all needed meta-data about an allocation.
+ * */
+typedef struct umpp_allocation
+{
+	ump_secure_id id; /**< Secure ID of the allocation */
+	atomic_t refcount; /**< Usage count */
+
+	ump_alloc_flags flags; /**< Flags for all supported devices */
+	uint32_t management_flags; /**< Managment flags tracking */
+
+	pid_t owner; /**< The process ID owning the memory if not sharable */
+
+	ump_dd_security_filter filter_func; /**< Hook to verify use, called during retains from new clients */
+	ump_dd_final_release_callback final_release_func; /**< Hook called when the last reference is removed */
+	void* callback_data; /**< Additional data given to release hook */
+
+	uint64_t size; /**< Size (in bytes) of the allocation */
+	uint64_t blocksCount; /**< Number of physsical blocks the allocation is built up of */
+	ump_dd_physical_block_64 * block_array; /**< Array, one entry per block, describing block start and length */
+
+	struct mutex     map_list_lock; /**< Lock protecting the map_list */
+	struct list_head map_list; /**< Tracks all CPU VA mappings of this allocation */
+
+#ifdef CONFIG_KDS
+	struct kds_resource kds_res; /**< The KDS resource controlling access to this allocation */
+#endif
+
+	void * backendData; /**< Physical memory backend meta-data */
+} umpp_allocation;
+
+/**
+ * Structure tracking use of UMP memory by a session.
+ * Tracks the use of an allocation by a session so session termination can clean up any outstanding references.
+ * Also protects agains non-matched release calls from user space.
+ */
+typedef struct umpp_session_memory_usage
+{
+	ump_secure_id id; /**< ID being used. For quick look-up */
+	ump_dd_handle mem; /**< Handle being used. */
+
+	/**
+	 * Track how many times has the process retained this handle in the kernel.
+	 * This should usually just be 1(allocated or resolved) or 2(mapped),
+	 * but could be more if someone is playing with the low-level API
+	 * */
+	atomic_t process_usage_count;
+
+	struct list_head link; /**< link to other usage trackers for a session */
+} umpp_session_memory_usage;
+
+/**
+ * Structure representing a session/client.
+ * Tracks the UMP allocations being used by this client.
+ */
+typedef struct umpp_session
+{
+	struct mutex session_lock; /**< Lock for memory usage manipulation */
+	struct list_head memory_usage; /**< list of memory currently being used by the this session */
+	void*  import_handler_data[UMPP_EXTERNAL_MEM_COUNT]; /**< Import modules per-session data pointer */
+} umpp_session;
+
+/**
+ * UMP core setup.
+ * Called by any OS specific startup function to initialize the common part.
+ * @return UMP_OK if core initialized correctly, any other value for errors
+ */
+ump_result umpp_core_constructor(void);
+
+/**
+ * UMP core teardown.
+ * Called by any OS specific unload function to clean up the common part.
+ */
+void umpp_core_destructor(void);
+
+/**
+ * UMP session start.
+ * Called by any OS specific session handler when a new session is detected
+ * @return Non-NULL if a matching core session could be set up. NULL on failure
+ */
+umpp_session *umpp_core_session_start(void);
+
+/**
+ * UMP session end.
+ * Called by any OS specific session handler when a session is ended/terminated.
+ * @param session The core session object returned by ump_core_session_start
+ */
+void umpp_core_session_end(umpp_session *session);
+
+/**
+ * Find a mapping object (if any) for this allocation.
+ * Called by any function needing to identify a mapping from a user virtual address.
+ * Verifies that the whole range to be within a mapping object.
+ * @param alloc The UMP allocation to find a matching mapping object of
+ * @param uaddr User mapping address to find the mapping object for
+ * @param size Length of the mapping
+ * @return NULL on error (no match found), pointer to mapping object if match found
+ */
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void* uaddr, size_t size);
+
+/**
+ * Register a new mapping of an allocation.
+ * Called by functions creating a new mapping of an allocation, typically OS specific handlers.
+ * @param alloc The allocation object which has been mapped
+ * @param map Info about the mapping
+ */
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map);
+
+/**
+ * Remove and free mapping object from an allocation.
+ * @param alloc The allocation object to remove the mapping info from
+ * @param target The mapping object to remove
+ */
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target);
+
+/**
+ * Helper to find a block in the blockArray which holds a given byte offset.
+ * @param alloc The allocation object to find the block in
+ * @param offset Offset (in bytes) from allocation start to find the block of
+ * @param[out] block_index Pointer to the index of the block matching
+ * @param[out] block_internal_offset Offset within the returned block of the searched offset
+ * @return 0 if a matching block was found, any other value for error
+ */
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const block_index, uint64_t * const block_internal_offset);
+
+/**
+ * Cache maintenance helper.
+ * Performs the requested cache operation on the given handle.
+ * @param mem Allocation handle
+ * @param op Cache maintenance operation to perform
+ * @param address User mapping at which to do the operation
+ * @param size Length (in bytes) of the range to do the operation on
+ */
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size);
+
+/**
+ * Import module session early init.
+ * Calls session_begin on all installed import modules.
+ * @param session The core session object to initialized the import handler for
+ * */
+void umpp_import_handlers_init(umpp_session * session);
+
+/**
+ * Import module session cleanup.
+ * Calls session_end on all import modules bound to the session.
+ * @param session The core session object to initialized the import handler for
+ */
+void umpp_import_handlers_term(umpp_session * session);
+
+#endif /* _UMP_KERNEL_CORE_H_ */
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c b/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
new file mode 100755
index 0000000..c5b0d74
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <common/ump_kernel_priv.h>
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(umpp_descriptor_table * table);
+
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries)
+{
+	umpp_descriptor_mapping * map = kzalloc(sizeof(umpp_descriptor_mapping), GFP_KERNEL);
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map)
+	{
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table)
+		{
+			init_rwsem( &map->lock);
+			set_bit(0, map->table->usage);
+			map->max_nr_mappings_allowed = max_entries;
+			map->current_nr_mappings = init_entries;
+			return map;
+
+			descriptor_table_free(map->table);
+		}
+		kfree(map);
+	}
+	return NULL;
+}
+
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map)
+{
+	UMP_ASSERT(NULL != map);
+	descriptor_table_free(map->table);
+	kfree(map);
+}
+
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target)
+{
+ 	int descriptor = 0;
+	UMP_ASSERT(NULL != map);
+	down_write( &map->lock);
+	descriptor = find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings)
+	{
+		/* no free descriptor, try to expand the table */
+		umpp_descriptor_table * new_table;
+		umpp_descriptor_table * old_table = map->table;
+		int nr_mappings_new = map->current_nr_mappings + BITS_PER_LONG;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+ 		memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+ 		memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void*));
+
+ 		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	set_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	up_write(&map->lock);
+	return descriptor;
+}
+
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target)
+{
+	int result = -EINVAL;
+ 	UMP_ASSERT(map);
+	UMP_ASSERT(target);
+ 	down_read(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	}
+	/* keep target untouched if the descriptor was not found */
+	up_read(&map->lock);
+	return result;
+}
+
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor)
+{
+	UMP_ASSERT(map);
+ 	down_write(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		map->table->mappings[descriptor] = NULL;
+		clear_bit(descriptor, map->table->usage);
+	}
+	up_write(&map->lock);
+}
+
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count)
+{
+	umpp_descriptor_table * table;
+
+	table = kzalloc(sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG) + (sizeof(void*) * count), __GFP_HARDWALL | GFP_KERNEL );
+
+	if (NULL != table)
+	{
+		table->usage = (unsigned long*)((u8*)table + sizeof(umpp_descriptor_table));
+		table->mappings = (void**)((u8*)table + sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(umpp_descriptor_table * table)
+{
+ 	UMP_ASSERT(table);
+	kfree(table);
+}
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h b/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
new file mode 100755
index 0000000..d06c145
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+#define _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct umpp_descriptor_table
+{
+	/* keep as a unsigned long to rely on the OS's bitops support */
+	unsigned long * usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used(1) or not(0) */
+	void** mappings; /**< Array of the pointers the descriptors map to */
+} umpp_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct umpp_descriptor_mapping
+{
+	struct rw_semaphore lock; /**< Lock protecting access to the mapping object */
+	unsigned int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	unsigned int current_nr_mappings; /**< Current number of possible mappings */
+	umpp_descriptor_table * table; /**< Pointer to the current mapping table */
+} umpp_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object.
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries.
+ * ID 0 is reserved so the number of available entries will be max - 1.
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param[in] map The map to free
+ */
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param[in] map The map to allocate a new entry in
+ * @param[in] target The value to map to
+ * @return The descriptor allocated, ID 0 on failure.
+ */
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param[in] map The map to lookup the descriptor id in
+ * @param[in] descriptor The descriptor ID to lookup
+ * @param[out] target Pointer to a pointer which will receive the stored value
+ *
+ * @return 0 on success lookup, -EINVAL on lookup failure.
+ */
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param[in] map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor);
+
+#endif /* _UMP_KERNEL_DESCRIPTOR_MAPPING_H_ */
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h b/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
new file mode 100755
index 0000000..38b6f1b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_PRIV_H_
+#define _UMP_KERNEL_PRIV_H_
+
+#ifdef __KERNEL__
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <asm/cacheflush.h>
+#endif
+
+
+#define UMP_EXPECTED_IDS 64
+#define UMP_MAX_IDS 32768
+
+#ifdef __KERNEL__
+#define UMP_ASSERT(expr) \
+		if (!(expr)) { \
+			printk(KERN_ERR "UMP: Assertion failed! %s,%s,%s,line=%d\n",\
+					#expr,__FILE__,__func__,__LINE__); \
+					BUG(); \
+		}
+
+static inline void ump_sync_to_memory(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/*TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE);
+	mb(); /* for outer_sync (if needed) */
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+
+static inline void ump_sync_to_cpu(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE);
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+#endif /* __KERNEL__*/
+#endif /* _UMP_KERNEL_PRIV_H_ */
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/imports/ion/Makefile b/midgard/r13p0/kernel/drivers/base/ump/src/imports/ion/Makefile
new file mode 100755
index 0000000..ef74b27
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/imports/ion/Makefile
@@ -0,0 +1,53 @@
+#
+# (C) COPYRIGHT 2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/../../../../..
+KBUILD_EXTRA_SYMBOLS += "$(KBUILD_EXTMOD)/../../Module.symvers"
+
+SRC += ump_kernel_import_ion.c
+
+MODULE:=ump_ion_import.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+$(MODULE:.ko=-objs) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+#
+#
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/imports/ion/sconscript b/midgard/r13p0/kernel/drivers/base/ump/src/imports/ion/sconscript
new file mode 100755
index 0000000..cff24c8
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/imports/ion/sconscript
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ion = env.Clone()
+
+if env_ion['ump_ion'] != '1':
+	Return()
+
+# Source files required for UMP.
+ion_src = [Glob('#kernel/drivers/base/ump/src/imports/ion/*.c')]
+
+# Note: cleaning via the Linux kernel build system does not yet work
+if env_ion.GetOption('clean') :
+	makeAction=Action("cd ${SOURCE.dir} && make clean", '$MAKECOMSTR')
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make PLATFORM=${platform} && cp ump_ion_import.ko $STATIC_LIB_PATH/ump_ion_import.ko", '$MAKECOMSTR')
+# The target is ump_import_ion.ko, built from the source in ion_src, via the action makeAction
+# ump_import_ion.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+# kernel build system, after which it can be installed to the directory specified if
+# "libs_install" is set; this is done by LibTarget.
+cmd = env_ion.Command('$STATIC_LIB_PATH/ump_ion_import.ko', ion_src, [makeAction])
+
+# Until we fathom out how the invoke the Linux build system to clean, we can use Clean
+# to remove generated files.
+
+patterns = ['*.mod.c', '*.o', '*.ko', '*.a', '.*.cmd', 'modules.order', '.tmp_versions', 'Module.symvers']
+
+for p in patterns:
+	Clean(cmd, Glob('#kernel/drivers/base/ump/src/imports/ion/%s' % p))
+
+env_ion.Depends('$STATIC_LIB_PATH/ump_ion_import.ko', '$STATIC_LIB_PATH/ump.ko')
+env_ion.KernelObjTarget('ump', cmd)
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c b/midgard/r13p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
new file mode 100755
index 0000000..12b2e32
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/dma-mapping.h>
+#include "ion.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+struct ion_wrapping_info
+{
+	struct ion_client *   ion_client;
+	struct ion_handle *   ion_handle;
+	int                   num_phys_blocks;
+	struct scatterlist *  sglist;
+};
+
+static struct ion_device * ion_device_get(void)
+{
+	/* < Customer to provide implementation >
+	 * Return a pointer to the global ion_device on the system
+	 */
+	return NULL;
+}
+
+static int import_ion_client_create(void** const custom_session_data)
+{
+	struct ion_client ** ion_client;
+
+	ion_client = (struct ion_client**)custom_session_data;
+
+	*ion_client = ion_client_create(ion_device_get(), "ump");
+
+	return PTR_RET(*ion_client);
+}
+
+
+static void import_ion_client_destroy(void* custom_session_data)
+{
+	struct ion_client * ion_client;
+
+	ion_client = (struct ion_client*)custom_session_data;
+	BUG_ON(!ion_client);
+
+	ion_client_destroy(ion_client);
+}
+
+
+static void import_ion_final_release_callback(const ump_dd_handle handle, void * info)
+{
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!info);
+
+	(void)handle;
+	ion_info = (struct ion_wrapping_info*)info;
+
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+	kfree(ion_info);
+	module_put(THIS_MODULE);
+}
+
+static ump_dd_handle import_ion_import(void * custom_session_data, void * pfd, ump_alloc_flags flags)
+{
+	int fd;
+	ump_dd_handle ump_handle;
+	struct scatterlist * sg;
+	int num_dma_blocks;
+	ump_dd_physical_block_64 * phys_blocks;
+	unsigned long i;
+	struct sg_table * sgt;
+
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!custom_session_data);
+	BUG_ON(!pfd);
+
+	ion_info = kzalloc(GFP_KERNEL, sizeof(*ion_info));
+	if (NULL == ion_info)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	ion_info->ion_client = (struct ion_client*)custom_session_data;
+
+	if (get_user(fd, (int*)pfd))
+	{
+		goto out;
+	}
+
+	ion_info->ion_handle = ion_import_dma_buf(ion_info->ion_client, fd);
+
+	if (IS_ERR_OR_NULL(ion_info->ion_handle))
+	{
+		goto out;
+	}
+
+	sgt = ion_sg_table(ion_info->ion_client, ion_info->ion_handle);
+	if (IS_ERR_OR_NULL(sgt))
+	{
+		goto ion_dma_map_failed;
+	}
+
+	ion_info->sglist = sgt->sgl;
+
+	sg = ion_info->sglist;
+	while (sg)
+	{
+		ion_info->num_phys_blocks++;
+		sg = sg_next(sg);
+	}
+
+	num_dma_blocks = dma_map_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	if (0 == num_dma_blocks)
+	{
+		goto linux_dma_map_failed;
+	}
+
+	phys_blocks = vmalloc(num_dma_blocks * sizeof(*phys_blocks));
+	if (NULL == phys_blocks)
+	{
+		goto vmalloc_failed;
+	}
+
+	for_each_sg(ion_info->sglist, sg, num_dma_blocks, i)
+	{
+		phys_blocks[i].addr = sg_phys(sg);
+		phys_blocks[i].size = sg_dma_len(sg);
+	}
+
+	ump_handle = ump_dd_create_from_phys_blocks_64(phys_blocks, num_dma_blocks, flags, NULL, import_ion_final_release_callback, ion_info);
+
+	vfree(phys_blocks);
+
+	if (ump_handle != UMP_DD_INVALID_MEMORY_HANDLE)
+	{
+		/*
+		 * As we have a final release callback installed
+		 * we must keep the module locked until
+		 * the callback has been triggered
+		 * */
+		__module_get(THIS_MODULE);
+		return ump_handle;
+	}
+
+	/* failed*/
+vmalloc_failed:
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+linux_dma_map_failed:
+ion_dma_map_failed:
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+out:
+	kfree(ion_info);
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+struct ump_import_handler import_handler_ion =
+{
+	.linux_module =  THIS_MODULE,
+	.session_begin = import_ion_client_create,
+	.session_end =   import_ion_client_destroy,
+	.import =        import_ion_import
+};
+
+static int __init import_ion_initialize_module(void)
+{
+	/* register with UMP */
+	return ump_import_module_register(UMP_EXTERNAL_MEM_TYPE_ION, &import_handler_ion);
+}
+
+static void __exit import_ion_cleanup_module(void)
+{
+	/* unregister import handler */
+	ump_import_module_unregister(UMP_EXTERNAL_MEM_TYPE_ION);
+}
+
+/* Setup init and exit functions for this module */
+module_init(import_ion_initialize_module);
+module_exit(import_ion_cleanup_module);
+
+/* And some module information */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/imports/sconscript b/midgard/r13p0/kernel/drivers/base/ump/src/imports/sconscript
new file mode 100755
index 0000000..8f50683
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/imports/sconscript
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os, sys
+Import('env')
+
+import_modules = [ os.path.join( path, 'sconscript' ) for path in sorted(os.listdir( os.getcwd() )) ]
+
+for m in import_modules:
+	if os.path.exists(m):
+		SConscript( m, variant_dir=os.path.join( env['BUILD_DIR_PATH'], os.path.dirname(m) ), duplicate=0 )
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c b/midgard/r13p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
new file mode 100755
index 0000000..d6c3c53
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
@@ -0,0 +1,831 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump-ioctl.h>
+#include <linux/ump.h>
+
+#include <asm/uaccess.h>	         /* copy_*_user */
+#include <linux/compat.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/device.h>            /* class registration support */
+
+#include <common/ump_kernel_core.h>
+
+#include "ump_kernel_linux_mem.h"
+#include <ump_arch.h>
+
+
+struct ump_linux_device
+{
+	struct cdev cdev;
+	struct class * ump_class;
+};
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+#define UMP_REV_STRING "1.0"
+
+char * ump_revision = UMP_REV_STRING;
+module_param(ump_revision, charp, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_revision, "Revision info");
+
+static int umpp_linux_open(struct inode *inode, struct file *filp);
+static int umpp_linux_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+/* This variable defines the file operations this UMP device driver offers */
+static struct file_operations ump_fops =
+{
+	.owner   = THIS_MODULE,
+	.open    = umpp_linux_open,
+	.release = umpp_linux_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = umpp_linux_ioctl,
+#else
+	.ioctl   = umpp_linux_ioctl,
+#endif
+	.compat_ioctl = umpp_linux_ioctl,
+	.mmap = umpp_linux_mmap
+};
+
+/* import module handling */
+DEFINE_MUTEX(import_list_lock);
+struct ump_import_handler *  import_handlers[UMPP_EXTERNAL_MEM_COUNT];
+
+/* The global variable containing the global device data */
+static struct ump_linux_device ump_linux_device;
+
+#define DBG_MSG(level, ...) do { \
+if ((level) <=  ump_debug_level)\
+{\
+printk(KERN_DEBUG "UMP<" #level ">:\n" __VA_ARGS__);\
+} \
+} while (0)
+
+#define MSG_ERR(...) do{ \
+printk(KERN_ERR "UMP: ERR: %s\n           %s()%4d\n", __FILE__, __func__  , __LINE__) ; \
+printk(KERN_ERR __VA_ARGS__); \
+printk(KERN_ERR "\n"); \
+} while(0)
+
+#define MSG(...) do{ \
+printk(KERN_INFO "UMP: " __VA_ARGS__);\
+} while (0)
+
+/*
+ * This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int __init umpp_linux_initialize_module(void)
+{
+	ump_result err;
+
+	err = umpp_core_constructor();
+	if (UMP_OK != err)
+	{
+		MSG_ERR("UMP device driver init failed\n");
+		return -ENOTTY;
+	}
+
+	MSG("UMP device driver %s loaded\n", UMP_REV_STRING);
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void __exit umpp_linux_cleanup_module(void)
+{
+	DBG_MSG(2, "Unloading UMP device driver\n");
+	umpp_core_destructor();
+	DBG_MSG(2, "Module unloaded\n");
+}
+
+
+
+/*
+ * Initialize the UMP device driver.
+ */
+ump_result umpp_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+
+	if (0 == ump_major)
+	{
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	}
+	else
+	{
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err)
+	{
+		memset(&ump_linux_device, 0, sizeof(ump_linux_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_linux_device.cdev, &ump_fops);
+		ump_linux_device.cdev.owner = THIS_MODULE;
+		ump_linux_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_linux_device.cdev, dev, 1/*count*/);
+		if (0 == err)
+		{
+
+			ump_linux_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_linux_device.ump_class))
+			{
+				err = PTR_ERR(ump_linux_device.ump_class);
+			}
+			else
+			{
+				struct device * mdev;
+				mdev = device_create(ump_linux_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return UMP_OK;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(ump_linux_device.ump_class);
+			}
+			cdev_del(&ump_linux_device.cdev);
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return UMP_ERROR;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void umpp_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+	device_destroy(ump_linux_device.ump_class, dev);
+	class_destroy(ump_linux_device.ump_class);
+
+	/* unregister char device */
+	cdev_del(&ump_linux_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+}
+
+
+static int umpp_linux_open(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = umpp_core_session_start();
+	if (NULL == session)
+	{
+		return -EFAULT;
+	}
+	
+	filp->private_data = session;
+
+	return 0;
+}
+
+static int umpp_linux_release(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = filp->private_data;
+
+	umpp_core_session_end(session);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/**************************/
+/*ioctl specific functions*/
+/**************************/
+static int do_ump_dd_allocate(umpp_session * session, ump_k_allocate * params)
+{
+	ump_dd_handle new_allocation;
+	new_allocation = ump_dd_allocate_64(params->size, params->alloc_flags, NULL, NULL, NULL);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	printk(KERN_WARNING "UMP: Allocation FAILED\n");
+	return -ENOMEM;
+}
+
+static int do_ump_dd_retain(umpp_session * session, ump_k_retain * params)
+{
+	umpp_session_memory_usage * it;
+
+	mutex_lock(&session->session_lock);
+
+	/* try to find it on the session usage list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found to already be in use */
+			/* check for overflow */
+			while(1)
+			{
+				int refcnt = atomic_read(&it->process_usage_count);
+				if (refcnt + 1 > 0)
+				{
+					/* add a process local ref */
+					if(atomic_cmpxchg(&it->process_usage_count, refcnt, refcnt + 1) == refcnt)
+					{
+						mutex_unlock(&session->session_lock);
+						return 0;
+					}
+				}
+				else
+				{
+					/* maximum usage cap reached */
+					mutex_unlock(&session->session_lock);
+					return -EBUSY;
+				}
+			}
+		}
+	}
+	/* try to look it up globally */
+
+	it = kmalloc(sizeof(*it), GFP_KERNEL);
+
+	if (NULL != it)
+	{
+		it->mem = ump_dd_from_secure_id(params->secure_id);
+		if (UMP_DD_INVALID_MEMORY_HANDLE != it->mem)
+		{
+			/* found, add it to the session usage list */
+			it->id = params->secure_id;
+			atomic_set(&it->process_usage_count, 1);
+			list_add(&it->link, &session->memory_usage);
+		}
+		else
+		{
+			/* not found */
+			kfree(it);
+			it = NULL;
+		}
+	}
+
+	mutex_unlock(&session->session_lock);
+
+	return (NULL != it) ? 0 : -ENODEV;
+}
+
+
+static int do_ump_dd_release(umpp_session * session, ump_k_release * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only do a release if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			result = 0;
+
+			if (0 == atomic_sub_return(1, &it->process_usage_count))
+			{
+				/* last ref in this process remove from the usage list and remove the underlying ref */
+				list_del(&it->link);
+				ump_dd_release(it->mem);
+				kfree(it);
+			}
+
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_sizequery(umpp_session * session, ump_k_sizequery * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->size = ump_dd_size_get_64(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_allocation_flags_get(umpp_session * session, ump_k_allocation_flags * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->alloc_flags = ump_dd_allocation_flags_get(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_msync_now(umpp_session * session, ump_k_msync * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, do the cache op */
+#ifdef CONFIG_COMPAT
+			if (is_compat_task())
+			{
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, compat_ptr(params->mapped_ptr.compat_value), params->size);
+				result = 0;
+			}
+			else
+			{
+#endif
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, params->mapped_ptr.value, params->size);
+				result = 0;
+#ifdef CONFIG_COMPAT
+			}
+#endif
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+
+void umpp_import_handlers_init(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		if (import_handlers[i])
+		{
+			import_handlers[i]->session_begin(&session->import_handler_data[i]);
+			/* It is OK if session_begin returned an error.
+			 * We won't do any import calls if so */
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+void umpp_import_handlers_term(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		/* only call if session_begin succeeded */
+		if (session->import_handler_data[i] != NULL)
+		{
+			/* if session_beging succeeded the handler
+			 * should not have unregistered with us */
+			BUG_ON(!import_handlers[i]);
+			import_handlers[i]->session_end(session->import_handler_data[i]);
+			session->import_handler_data[i] = NULL;
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler)
+{
+	int res = -EEXIST;
+
+	/* validate input */
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+	BUG_ON(!handler);
+	BUG_ON(!handler->linux_module);
+	BUG_ON(!handler->session_begin);
+	BUG_ON(!handler->session_end);
+	BUG_ON(!handler->import);
+
+	mutex_lock(&import_list_lock);
+
+	if (!import_handlers[type])
+	{
+		import_handlers[type] = handler;
+		res = 0;
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return res;
+}
+
+void ump_import_module_unregister(enum ump_external_memory_type type)
+{
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+
+	mutex_lock(&import_list_lock);
+	/* an error to call this if ump_import_module_register didn't succeed */
+	BUG_ON(!import_handlers[type]);
+	import_handlers[type] = NULL;
+	mutex_unlock(&import_list_lock);
+}
+
+static struct ump_import_handler * import_handler_get(unsigned int type_id)
+{
+	enum ump_external_memory_type type;
+	struct ump_import_handler * handler;
+
+	/* validate and convert input */
+	/* handle bad data here, not just BUG_ON */
+	if (type_id == 0 || type_id >= UMPP_EXTERNAL_MEM_COUNT)
+		return NULL;
+
+	type = (enum ump_external_memory_type)type_id;
+
+	/* find the handler */
+	mutex_lock(&import_list_lock);
+
+	handler = import_handlers[type];
+
+	if (handler)
+	{
+		if (!try_module_get(handler->linux_module))
+		{
+			handler = NULL;
+		}
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return handler;
+}
+
+static void import_handler_put(struct ump_import_handler * handler)
+{
+	module_put(handler->linux_module);
+}
+
+static int do_ump_dd_import(umpp_session * session, ump_k_import * params)
+{
+	ump_dd_handle new_allocation = UMP_DD_INVALID_MEMORY_HANDLE;
+	struct ump_import_handler * handler;
+
+	handler = import_handler_get(params->type);
+
+	if (handler)
+	{
+		/* try late binding if not already bound */
+		if (!session->import_handler_data[params->type])
+		{
+			handler->session_begin(&session->import_handler_data[params->type]);
+		}
+
+		/* do we have a bound session? */
+		if (session->import_handler_data[params->type])
+		{
+			new_allocation = handler->import( session->import_handler_data[params->type],
+		                                      params->phandle.value,
+		                                      params->alloc_flags);
+		}
+
+		/* done with the handler */
+		import_handler_put(handler);
+	}
+
+	/* did the import succeed? */
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	return -ENOMEM;
+
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int ret;
+	uint64_t msg[(UMP_CALL_MAX_SIZE+7)>>3]; /* alignment fixup */
+	uint32_t size = _IOC_SIZE(cmd);
+	struct umpp_session *session = filp->private_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* unused arg */
+#endif
+
+	/*
+	 * extract the type and number bitfields, and don't decode
+	 * wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
+	 */
+	if (_IOC_TYPE(cmd) != UMP_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if (_IOC_NR(cmd) > UMP_IOC_MAXNR)
+	{
+		return -ENOTTY;
+	}
+
+	switch(cmd)
+	{
+		case UMP_FUNC_ALLOCATE:
+			if (size != sizeof(ump_k_allocate))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocate(session, (ump_k_allocate *)&msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_SIZEQUERY:
+			if (size != sizeof(ump_k_sizequery))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_sizequery(session,(ump_k_sizequery*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_MSYNC:
+			if (size != sizeof(ump_k_msync))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_msync_now(session,(ump_k_msync*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_IMPORT:
+			if (size != sizeof(ump_k_import))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user*)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_import(session, (ump_k_import*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		/* used only by v1 API */
+		case UMP_FUNC_ALLOCATION_FLAGS_GET:
+			if (size != sizeof(ump_k_allocation_flags))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocation_flags_get(session,(ump_k_allocation_flags*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_RETAIN:
+			if (size != sizeof(ump_k_retain))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_retain(session,(ump_k_retain*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		case UMP_FUNC_RELEASE:
+			if (size != sizeof(ump_k_release))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_release(session,(ump_k_release*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		default:
+			/* not ours */
+			return -ENOTTY;
+	}
+	/*redundant below*/
+	return -ENOTTY;
+}
+
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_allocate_64);
+EXPORT_SYMBOL(ump_dd_allocation_flags_get);
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get_64);
+EXPORT_SYMBOL(ump_dd_size_get_64);
+EXPORT_SYMBOL(ump_dd_retain);
+EXPORT_SYMBOL(ump_dd_release);
+EXPORT_SYMBOL(ump_dd_create_from_phys_blocks_64);
+#ifdef CONFIG_KDS
+EXPORT_SYMBOL(ump_dd_kds_resource_get);
+#endif
+
+/* import API */
+EXPORT_SYMBOL(ump_import_module_register);
+EXPORT_SYMBOL(ump_import_module_unregister);
+
+
+
+/* V1 API */
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+
+/* Setup init and exit functions for this module */
+module_init(umpp_linux_initialize_module);
+module_exit(umpp_linux_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(UMP_REV_STRING);
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c b/midgard/r13p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
new file mode 100755
index 0000000..38db91e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
@@ -0,0 +1,250 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+#include <linux/version.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h> /* memory mananger definitions */
+#include <linux/pfn.h>
+#include <linux/highmem.h> /*kmap*/
+
+#include <linux/compat.h> /* is_compat_task */
+
+#include <common/ump_kernel_core.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+static void umpp_vm_close(struct vm_area_struct *vma)
+{
+	umpp_cpu_mapping * mapping;
+	umpp_session * session;
+	ump_dd_handle handle;
+
+	mapping = (umpp_cpu_mapping*)vma->vm_private_data;
+	UMP_ASSERT(mapping);
+	
+	session = mapping->session;
+	handle = mapping->handle;
+
+	umpp_dd_remove_cpu_mapping(mapping->handle, mapping); /* will free the mapping object */
+	ump_dd_release(handle);
+}
+
+
+static const struct vm_operations_struct umpp_vm_ops = {
+	.close = umpp_vm_close
+};
+
+int umpp_phys_commit(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	/* round up to a page boundary */
+	alloc->size = (alloc->size + PAGE_SIZE - 1) & ~((uint64_t)PAGE_SIZE-1) ;
+	/* calculate number of pages */
+	alloc->blocksCount = alloc->size >> PAGE_SHIFT;
+
+	if( (sizeof(ump_dd_physical_block_64) * alloc->blocksCount) > ((size_t)-1))
+	{
+		printk(KERN_WARNING "UMP: umpp_phys_commit - trying to allocate more than possible\n");
+		return -ENOMEM;
+	}
+
+	alloc->block_array = kmalloc(sizeof(ump_dd_physical_block_64) * alloc->blocksCount, __GFP_HARDWALL | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (NULL == alloc->block_array)
+	{
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		void * mp;
+		struct page * page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD);
+		if (NULL == page)
+		{
+			break;
+		}
+
+		alloc->block_array[i].addr = page_to_pfn(page) << PAGE_SHIFT;
+		alloc->block_array[i].size = PAGE_SIZE;
+
+		mp = kmap(page);
+		if (NULL == mp)
+		{
+			__free_page(page);
+			break;
+		}
+
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		ump_sync_to_memory(PFN_PHYS(page_to_pfn(page)), mp, PAGE_SIZE);
+		kunmap(page);
+	}
+
+	if (i == alloc->blocksCount)
+	{
+		return 0;
+	}
+	else
+	{
+		uint64_t j;
+		for (j = 0; j < i; j++)
+		{
+			struct page * page;
+			page = pfn_to_page(alloc->block_array[j].addr >> PAGE_SHIFT);
+			__free_page(page);
+		}
+		
+		kfree(alloc->block_array);
+
+		return -ENOMEM;
+	}
+}
+
+void umpp_phys_free(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		__free_page(pfn_to_page(alloc->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	kfree(alloc->block_array);
+}
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma)
+{
+	ump_secure_id id;
+	ump_dd_handle h;
+	size_t offset;
+	int err = -EINVAL;
+	size_t length = vma->vm_end - vma->vm_start;
+
+	umpp_cpu_mapping * map = NULL;
+	umpp_session *session = filp->private_data;
+
+	if ( 0 == length )
+	{
+		return -EINVAL;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (NULL == map)
+	{
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* unpack our arg */
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	if (is_compat_task())
+	{
+#endif
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_32;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_32;
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	}
+	else
+	{
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_64;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_64;
+	}
+#endif
+
+	h = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE != h)
+	{
+		uint64_t i;
+		uint64_t block_idx;
+		uint64_t block_offset;
+		uint64_t paddr;
+		umpp_allocation * alloc;
+		uint64_t last_byte;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO | VM_MIXEDMAP | VM_DONTDUMP;
+#else
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO | VM_MIXEDMAP;
+#endif
+		vma->vm_ops = &umpp_vm_ops;
+		vma->vm_private_data = map;
+
+		alloc = (umpp_allocation*)h;
+
+		if( (alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+		{
+			/* cache disabled flag set, disable caching for cpu mappings */
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		}
+
+		last_byte = length + (offset << PAGE_SHIFT) - 1;
+		if (last_byte >= alloc->size || last_byte < (offset << PAGE_SHIFT))
+		{
+			goto err_out;
+		}
+
+		if (umpp_dd_find_start_block(alloc, offset << PAGE_SHIFT, &block_idx, &block_offset))
+		{
+			goto err_out;
+		}
+
+		paddr = alloc->block_array[block_idx].addr + block_offset;
+
+		for (i = 0; i < (length >> PAGE_SHIFT); i++)
+		{
+			/* check if we've overrrun the current block, if so move to the next block */
+			if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+			{
+				block_idx++;
+				UMP_ASSERT(block_idx < alloc->blocksCount);
+				paddr = alloc->block_array[block_idx].addr;
+			}
+
+			err = vm_insert_mixed(vma, vma->vm_start + (i << PAGE_SHIFT), paddr >> PAGE_SHIFT);
+			paddr += PAGE_SIZE;
+		}
+
+		map->vaddr_start = (void*)vma->vm_start;
+		map->nr_pages = length >> PAGE_SHIFT;
+		map->page_off = offset;
+		map->handle = h;
+		map->session = session;
+
+		umpp_dd_add_cpu_mapping(h, map);
+
+		return 0;
+
+		err_out:
+
+		ump_dd_release(h);
+	}
+
+	kfree(map);
+
+out:
+
+	return err;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h b/midgard/r13p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
new file mode 100755
index 0000000..4fbf3b2
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_LINUX_MEM_H_
+#define _UMP_KERNEL_LINUX_MEM_H_
+
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma);
+
+#endif /* _UMP_KERNEL_LINUX_MEM_H_ */
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/sconscript b/midgard/r13p0/kernel/drivers/base/ump/src/sconscript
new file mode 100755
index 0000000..d706e1e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/sconscript
@@ -0,0 +1,61 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ump = env.Clone()
+
+# Source files required for UMP.
+ump_src = [Glob('#kernel/drivers/base/ump/src/linux/*.c'), Glob('#kernel/drivers/base/ump/src/common/*.c'), Glob('#kernel/drivers/base/ump/src/imports/*/*.c')]
+
+env_ump.Append( CPPPATH='#kernel/drivers/base/ump/src/' )
+
+if env_ump.GetOption('clean') :
+	env_ump.Execute(Action("make clean", '[clean] ump'))
+	cmd = env_ump.Command('$STATIC_LIB_PATH/ump.ko', ump_src, [])
+else:
+	if env['unit'] == '1':
+		makeAction=Action("cd ${SOURCE.dir}/.. && make MALI_UNIT_TEST=${unit} PLATFORM=${platform} %s && cp ump.ko $STATIC_LIB_PATH/ump.ko" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	else:
+		# The target is ump.ko, built from the source in ump_src, via the action makeAction
+		# ump.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+		# kernel build system, after which it can be installed to the directory specified if
+		# "libs_install" is set; this is done by LibTarget.
+		makeAction=Action("cd ${SOURCE.dir}/.. && make PLATFORM=${platform} %s && cp ump.ko $STATIC_LIB_PATH/ump.ko" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	cmd = env_ump.Command('$STATIC_LIB_PATH/ump.ko', ump_src, [makeAction])
+
+# Add a dependency on kds.ko only when the build is not Android
+# Android uses sync_pt instead of Midgard KDS to achieve KDS functionality
+# Only necessary when KDS is not built into the kernel.
+#
+if env['os'] != 'android':
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y'
+	kds_in_kernel = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+			# KDS in kernel.
+			kds_in_kernel = 1
+	if not kds_in_kernel:
+		env.Depends('$STATIC_LIB_PATH/ump.ko', '$STATIC_LIB_PATH/kds.ko')
+
+env_ump.KernelObjTarget('ump', cmd)
+
+SConscript( 'imports/sconscript' )
+
diff --git a/midgard/r13p0/kernel/drivers/base/ump/src/ump_arch.h b/midgard/r13p0/kernel/drivers/base/ump/src/ump_arch.h
new file mode 100755
index 0000000..2303d56
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/src/ump_arch.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_ARCH_H_
+#define _UMP_ARCH_H_
+
+#include <common/ump_kernel_core.h>
+
+/**
+ * Device specific setup.
+ * Called by the UMP core code to to host OS/device specific setup.
+ * Typical use case is device node creation for talking to user space.
+ * @return UMP_OK on success, any other value on failure
+ */
+extern ump_result umpp_device_initialize(void);
+
+/**
+ * Device specific teardown.
+ * Undo any things done by ump_device_initialize.
+ */
+extern void umpp_device_terminate(void);
+
+extern int umpp_phys_commit(umpp_allocation * alloc);
+extern void umpp_phys_free(umpp_allocation * alloc);
+
+#endif /* _UMP_ARCH_H_ */
diff --git a/midgard/r13p0/kernel/drivers/base/ump/ump_ref_drv.h b/midgard/r13p0/kernel/drivers/base/ump/ump_ref_drv.h
new file mode 100755
index 0000000..9a265fe
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/base/ump/ump_ref_drv.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_ref_drv.h
+ *
+ * This file contains the link to user space part of the UMP API for usage by MALI 400 gralloc.
+ *
+ */
+
+#ifndef _UMP_REF_DRV_H_
+#define _UMP_REF_DRV_H_
+
+#include <ump/ump.h>
+
+
+#endif /* _UMP_REF_DRV_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/Kbuild b/midgard/r13p0/kernel/drivers/gpu/arm/Kbuild
new file mode 100755
index 0000000..19c7e9a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/Kconfig b/midgard/r13p0/kernel/drivers/gpu/arm/Kconfig
new file mode 100755
index 0000000..1f30eb5
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/Kconfig
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Kbuild b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Kbuild
new file mode 100755
index 0000000..e8e581b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,241 @@
+#
+# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r13p0-00rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+MALI_INSTRUMENTATION_LEVEL ?= 0
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_ipa.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_sync.c \
+	mali_kbase_sync_user.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+# Job Scheduler Policy: Completely Fair Scheduler
+SRC += mali_kbase_js_policy_cfs.c
+
+ccflags-y += -I$(KBASE_PATH)
+
+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
+	SRC += mali_kbase_platform_fake.c
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
+		SRC += platform/vexpress/mali_kbase_config_vexpress.c \
+		platform/vexpress/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
+		SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/rtsm_ve
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
+		SRC += platform/juno/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/juno
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
+		SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+		ccflags-y += -I$(src)/platform/juno_soc
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
+		SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
+		SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
+		platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y)
+		SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \
+		platform/a7_kipling/mali_kbase_cpu_a7_kipling.c
+		ccflags-y += -I$(src)/platform/a7_kipling
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+	# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+	ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+	ifeq ($(CONFIG_MALI_MIDGARD),m)
+	include  $(src)/platform/$(platform_name)/Kbuild
+	else ifeq ($(CONFIG_MALI_MIDGARD),y)
+	obj-$(CONFIG_MALI_MIDGARD) += platform/
+	endif
+	endif
+endif # CONFIG_MALI_PLATFORM_FAKE=y
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+ifeq ($(CONFIG_MALI_MIDGARD),m)
+include  $(src)/platform/$(platform_name)/Kbuild
+else ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-$(CONFIG_MALI_MIDGARD) += platform/
+endif
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y)
+	ccflags-y += -I$(src)/platform/devicetree
+endif
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o
+
+MALI_BACKEND_PATH ?= backend
+CONFIG_MALI_BACKEND ?= gpu
+CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND)
+
+ifeq ($(MALI_MOCK_TEST),1)
+ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+endif
+
+include  $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+
+# Default to devicetree platform if neither a fake platform or a thirdparty
+# platform is configured.
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),)
+CONFIG_MALI_PLATFORM_DEVICETREE := y
+endif
+
+mali_kbase-$(CONFIG_MALI_PLATFORM_DEVICETREE) += \
+    platform/devicetree/mali_clock.o \
+    platform/devicetree/mpgpu.o \
+    platform/devicetree/meson_main2.o \
+    platform/devicetree/platform_gx.o \
+    platform/devicetree/scaling.o \
+	platform/devicetree/mali_kbase_runtime_pm.o \
+	platform/devicetree/mali_kbase_config_devicetree.o
+ccflags-$(CONFIG_MALI_PLATFORM_DEVICETREE) += -I$(src)/platform/devicetree
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Kconfig b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Kconfig
new file mode 100755
index 0000000..201832b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,225 @@
+#
+# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD && !KDS
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if KDS is not present and
+	  the Linux Kernel has built in support for DMA_BUF fences.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_PLATFORM_FAKE
+	bool "Enable fake platform device support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  When you start to work with the Mali Midgard series device driver the platform-specific code of
+	  the Linux kernel for your platform may not be complete. In this situation the kernel device driver
+	  supports creating the platform device outside of the Linux platform-specific code.
+	  Enable this option if would like to use a platform device configuration from within the device driver.
+
+choice
+	prompt "Platform configuration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default MALI_PLATFORM_DEVICETREE
+	help
+	  Select the SOC platform that contains a Mali Midgard GPU
+
+config MALI_PLATFORM_DEVICETREE
+	bool "Device Tree platform"
+	depends on OF
+	help
+	  Select this option to use Device Tree with the Mali driver.
+
+	  When using this option the Mali driver will get the details of the
+	  GPU hardware from the Device Tree. This means that the same driver
+	  binary can run on multiple platforms as long as all the GPU hardware
+	  details are described in the device tree.
+
+	  Device Tree is the recommended method for the Mali driver platform
+	  integration.
+
+config MALI_PLATFORM_VEXPRESS
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express"
+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express w/Virtex7 @ 40Mhz"
+config MALI_PLATFORM_GOLDFISH
+	depends on ARCH_GOLDFISH
+	bool "Android Goldfish virtual CPU"
+config MALI_PLATFORM_PBX
+	depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
+	bool "Realview PBX-A9"
+config MALI_PLATFORM_THIRDPARTY
+	bool "Third Party Platform"
+endchoice
+
+config MALI_PLATFORM_THIRDPARTY_NAME
+	depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
+	string "Third party platform name"
+	help
+	  Enter the name of a third party platform that is supported. The third part configuration
+	  file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
+	  specified here.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && SYNC
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_GPU_MMU_AARCH64
+	bool "Use AArch64 page tables"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Use AArch64 format page tables for the GPU instead of LPAE-style.
+	  The two formats have the same functionality and performance but a
+	  future GPU may deprecate or remove the legacy LPAE-style format.
+
+	  The LPAE-style format is supported on all Midgard and current Bifrost
+	  GPUs. Enabling AArch64 format restricts the driver to only supporting
+	  Bifrost GPUs.
+
+	  If in doubt, say N.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Makefile b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Makefile
new file mode 100755
index 0000000..e1625e6
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+KDS_PATH_RELATIVE = $(CURDIR)/../../../..
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifeq ($(MALI_BUS_LOG), 1)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# GPL driver supports KDS
+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100755
index 0000000..2bef9c2
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100755
index 0000000..e38120a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,63 @@
+#
+# (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_ca_demand.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
+
+ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+	BACKEND += backend/gpu/mali_kbase_power_model_simple.c
+endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100755
index 0000000..c8ae87e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100755
index 0000000..c686253
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100755
index 0000000..fe98691
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100755
index 0000000..7851ea6
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100755
index 0000000..696dd50
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+#endif
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+static struct devfreq_simple_ondemand_data ondemand_data = {
+		.upthreshold = 90,
+		.downdifferential = 5,
+};
+
+#define DEV_FREQ_GOV_DATA		(&ondemand_data)
+
+static ssize_t upthreshold_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.upthreshold);
+}
+
+static ssize_t upthreshold_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.upthreshold = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(upthreshold);
+
+static ssize_t downdifferential_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.downdifferential);
+}
+
+static ssize_t downdifferential_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.downdifferential = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(downdifferential);
+#else
+#define DEV_FREQ_GOV_DATA		(NULL)
+#endif /* CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND */
+
+static ssize_t utilisation_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev->parent);
+	unsigned long total_time = 0, busy_time = 0;
+	unsigned long utilise;
+
+	kbase_pm_get_dvfs_utilisation(kbdev, &total_time, &busy_time);
+
+	if (total_time == 0) {
+		utilise = 0;
+	} else {
+		/* round up */
+		utilise = (busy_time * 100 + (total_time >> 1)) / total_time;
+	}
+	return sprintf(buf, "%ld\n", utilise);
+}
+static DEVICE_ATTR_RO(utilisation);
+
+static struct device_attribute *kbase_dev_attr[] = {
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+	&dev_attr_downdifferential,
+	&dev_attr_upthreshold,
+#endif
+	&dev_attr_utilisation
+};
+
+static int kbase_dev_add_attr(struct device *dev)
+{
+	int i, ret;
+	for (i = 0; i <  ARRAY_SIZE(kbase_dev_attr); i++) {
+		if (!kbase_dev_attr[i])
+			continue;
+		ret = device_create_file(dev, kbase_dev_attr[i]);
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_freq == freq) {
+		*target_freq = freq;
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_freq = freq;
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	if (kbdev->devfreq_cooling)
+		memcpy(&kbdev->devfreq_cooling->last_status, stat,
+				sizeof(*stat));
+#endif
+#endif
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	if (!kbdev->clock)
+		return -ENODEV;
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", DEV_FREQ_GOV_DATA);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	kbase_dev_add_attr(&kbdev->devfreq->dev);
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_power_model_simple_init(kbdev);
+	if (err && err != -ENODEV && err != -EPROBE_DEFER) {
+		dev_err(kbdev->dev,
+			"Failed to initialize simple power model (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+	if (err == -EPROBE_DEFER)
+		goto cooling_failed;
+	if (err != -ENODEV) {
+		kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+				kbdev->dev->of_node,
+				kbdev->devfreq,
+				&power_model_simple_ops);
+		if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+			err = PTR_ERR(kbdev->devfreq_cooling);
+			dev_err(kbdev->dev,
+				"Failed to register cooling device (%d)\n",
+				err);
+			goto cooling_failed;
+		}
+	} else {
+		err = 0;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100755
index 0000000..c0bf8b1
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100755
index 0000000..b9238a3
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,120 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
+	writel(value, kbdev->reg + offset);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100755
index 0000000..5b20445
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100755
index 0000000..d578fd7
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	return 0;
+
+fail_interrupts:
+	kbase_hwaccess_pm_term(kbdev);
+fail_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_release_interrupts(kbdev);
+	kbase_hwaccess_pm_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100755
index 0000000..d410cd2
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,105 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100755
index 0000000..3f06a10
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,490 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100755
index 0000000..4794672
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100755
index 0000000..e96aeae
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100755
index 0000000..8781561
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100755
index 0000000..b891b12
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,467 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100755
index 0000000..f216788
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,385 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = current_as->number;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&current_as->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Assign addr space */
+	kctx->as_nr = as_nr;
+
+	/* If the GPU is currently powered, activate this address space on the
+	 * MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_update(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	/* Book-keeping */
+	js_per_as_data->kctx = kctx;
+	js_per_as_data->as_busy_refcount = 0;
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+/**
+ * release_addr_space - Release an address space
+ * @kbdev: Kbase device
+ * @kctx_as_nr: Address space of context to release
+ * @kctx: Context being released
+ *
+ * Context: kbasep_js_device_data.runpool_mutex must be held
+ *
+ * Release an address space, making it available for being picked again.
+ */
+static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u16 as_bit = (1u << kctx_as_nr);
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* The address space must not already be free */
+	KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
+
+	js_devdata->as_free |= as_bit;
+
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+				&kbdev->js_data.runpool_irq.per_as_data[i];
+
+		if (js_per_as_data->kctx == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
+	if (js_per_as_data->as_busy_refcount != 0) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	/* Release context from address space */
+	js_per_as_data->kctx = NULL;
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+	/* If the GPU is currently powered, de-activate this address space on
+	 * the MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_disable(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	release_addr_space(kbdev, as_nr, kctx);
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+								int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	js_devdata->as_free |= (1 << as_nr);
+}
+
+/**
+ * check_is_runpool_full - check whether the runpool is full for a specified
+ * context
+ * @kbdev: Kbase device
+ * @kctx:  Kbase context
+ *
+ * If kctx == NULL, then this makes the least restrictive check on the
+ * runpool. A specific context that is supplied immediately after could fail
+ * the check, even under the same conditions.
+ *
+ * Therefore, once a context is obtained you \b must re-check it with this
+ * function, since the return value could change to false.
+ *
+ * Context:
+ *   In all cases, the caller must hold kbasep_js_device_data.runpool_mutex.
+ *   When kctx != NULL the caller must hold the
+ *   kbasep_js_kctx_info.ctx.jsctx_mutex.
+ *   When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but
+ *   it doesn't do any harm to do so).
+ *
+ * Return: true if the runpool is full
+ */
+static bool check_is_runpool_full(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool is_runpool_full;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Regardless of whether a context is submitting or not, can't have more
+	 * than there are HW address spaces */
+	is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
+						kbdev->nr_hw_address_spaces);
+
+	if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags &
+					KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		/* Contexts that submit might use less of the address spaces
+		 * available, due to HW workarounds.  In which case, the runpool
+		 * is also full when the number of submitting contexts exceeds
+		 * the number of submittable address spaces.
+		 *
+		 * Both checks must be made: can have nr_user_address_spaces ==
+		 * nr_hw_address spaces, and at the same time can have
+		 * nr_user_contexts_running < nr_all_contexts_running. */
+		is_runpool_full |= (bool)
+					(js_devdata->nr_user_contexts_running >=
+						kbdev->nr_user_address_spaces);
+	}
+
+	return is_runpool_full;
+}
+
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* First try to find a free address space */
+	if (check_is_runpool_full(kbdev, kctx))
+		i = -1;
+	else
+		i = ffs(js_devdata->as_free) - 1;
+
+	if (i >= 0 && i < kbdev->nr_hw_address_spaces) {
+		js_devdata->as_free &= ~(1 << i);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return i;
+	}
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* No address space currently free, see if we can release one */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i];
+		as_kctx = js_per_as_data->kctx;
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running */
+		if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED) &&
+			js_per_as_data->as_busy_refcount == 0) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!as_js_kctx_info->ctx.is_scheduled) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				js_devdata->as_free &= ~(1 << i);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if (kbdev->hwaccess.active_kctx == kctx ||
+	    kctx->as_nr != KBASEP_AS_NR_INVALID ||
+	    as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Invalid parameters to use_ctx()\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&new_address_space->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100755
index 0000000..83d4778
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100755
index 0000000..00900a9
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1518 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+#ifndef CONFIG_MALI_COH_GPU
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+#endif /* CONFIG_MALI_COH_GPU */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649) ||
+		!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3982))
+		cfg |= JS_CONFIG_START_MMU;
+
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 * In such cases, we'll try to make a better approximation in the IRQ
+	 * handler (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	kbase_tlstream_tl_attrib_atom_config(katom, jc_head,
+			katom->affinity, cfg);
+	kbase_tlstream_tl_ret_ctx_lpu(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	kbase_tlstream_tl_ret_atom_as(katom, &kbdev->as[kctx->as_nr]);
+	kbase_tlstream_tl_ret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the %KBASE_JS_IRQ_THROTTLE_TIME_US and
+ * the time the job was submitted, to work out the best estimate (which might
+ * still result in an over-estimate to the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t new_timestamp;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		/* Account for any IRQ Throttle time - makes an overestimate of
+		 * the time spent by the job */
+		new_timestamp = ktime_sub_ns(end_timestamp,
+					KBASE_JS_IRQ_THROTTLE_TIME_US * 1000);
+		timestamp_diff = ktime_sub(new_timestamp,
+							katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = new_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint
+ * @kbdev: kbase device
+ * @i: job slot
+ *
+ * Get kbase atom by calling kbase_gpu_inspect for given job slot.
+ * Then use obtained katom and name of slot associated with the given
+ * job slot number in tracepoint call to the instrumentation module
+ * informing that given atom is no longer executed on given lpu (job slot).
+ */
+static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0);
+
+	kbase_tlstream_tl_nret_atom_lpu(katom,
+		&kbdev->gpu_props.props.raw_props.js_features[i]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0,
+					sizeof(kbdev->slot_submit_count_irq));
+
+	/* write irq throttle register, this will prevent irqs from occurring
+	 * until the given number of gpu clock cycles have passed */
+	{
+		int irq_throttle_cycles =
+				atomic_read(&kbdev->irq_throttle_cycles);
+
+		kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE),
+						irq_throttle_cycles, NULL);
+	}
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbase_tlstream_aux_job_softstop(i);
+
+					kbasep_trace_tl_nret_atom_lpu(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+						base_jd_core_req core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		kbase_tlstream_aux_issue_job_softstop(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if (katom->kctx != kctx)
+			continue;
+
+		if (katom->sched_priority > priority)
+			kbase_job_slot_softstop(kbdev, js, katom);
+	}
+}
+
+struct zap_reset_data {
+	/* The stages are:
+	 * 1. The timer has never been called
+	 * 2. The zap has timed out, all slots are soft-stopped - the GPU reset
+	 *    will happen. The GPU has been reset when
+	 *    kbdev->hwaccess.backend.reset_waitq is signalled
+	 *
+	 * (-1 - The timer has been cancelled)
+	 */
+	int stage;
+	struct kbase_device *kbdev;
+	struct hrtimer timer;
+	spinlock_t lock; /* protects updates to stage member */
+};
+
+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer)
+{
+	struct zap_reset_data *reset_data = container_of(timer,
+						struct zap_reset_data, timer);
+	struct kbase_device *kbdev = reset_data->kbdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&reset_data->lock, flags);
+
+	if (reset_data->stage == -1)
+		goto out;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+								ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	reset_data->stage = 2;
+
+ out:
+	spin_unlock_irqrestore(&reset_data->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct zap_reset_data reset_data;
+	unsigned long flags;
+
+	hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	reset_data.timer.function = zap_timeout_callback;
+
+	spin_lock_init(&reset_data.lock);
+
+	reset_data.kbdev = kbdev;
+	reset_data.stage = 1;
+
+	hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for all jobs to finish, and for the context to be not-scheduled
+	 * (due to kbase_job_zap_context(), we also guarentee it's not in the JS
+	 * policy queue either */
+	wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
+	wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			kctx->jctx.sched_info.ctx.is_scheduled == false);
+
+	spin_lock_irqsave(&reset_data.lock, flags);
+	if (reset_data.stage == 1) {
+		/* The timer hasn't run yet - so cancel it */
+		reset_data.stage = -1;
+	}
+	spin_unlock_irqrestore(&reset_data.lock, flags);
+
+	hrtimer_cancel(&reset_data.timer);
+
+	if (reset_data.stage == 2) {
+		/* The reset has already started.
+		 * Wait for the reset to complete
+		 */
+		wait_event(kbdev->hwaccess.backend.reset_wait,
+				atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+						== KBASE_RESET_GPU_NOT_PENDING);
+	}
+	destroy_hrtimer_on_stack(&reset_data.timer);
+
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	KBASE_DEBUG_ASSERT(0 ==
+		object_is_on_stack(&kbdev->hwaccess.backend.reset_work));
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * runpool_irq lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags, mmu_flags;
+	struct kbase_device *kbdev;
+	int i;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	bool try_schedule = false;
+	bool silent = false;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	kbase_pm_disable_interrupts(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		mutex_lock(&as->transaction_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&as->transaction_mutex);
+	}
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	kbase_pm_context_idle(kbdev);
+
+	/* Release vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+
+void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+}
+
+bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100755
index 0000000..8f1e561
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string)
+{
+	sprintf(js_string, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100755
index 0000000..da7c4df
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1669 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * runpool_irq.lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
+{
+	return kbdev->protected_mode;
+}
+
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
+
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enter(kbdev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
+		else
+			kbdev->protected_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
+
+	if (!kbdev->protected_ops)
+		return -EINVAL;
+
+	kbdev->protected_mode_transition = true;
+	kbase_reset_gpu_silent(kbdev);
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	switch (katom[idx]->exit_protected_state) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		/*
+		 * If the atom ahead of this one hasn't got to being
+		 * submitted yet then bail.
+		 */
+		if (idx == 1 &&
+			(katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
+			katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+			return -EAGAIN;
+
+		/* If we're not exiting protected mode then we're done here. */
+		if (!(kbase_gpu_in_protected_mode(kbdev) &&
+				!kbase_jd_katom_is_protected(katom[idx])))
+			return 0;
+
+		/*
+		 * If there is a transition in progress, or work still
+		 * on the GPU try again later.
+		 */
+		if (kbdev->protected_mode_transition ||
+				kbase_gpu_atoms_submitted_any(kbdev))
+			return -EAGAIN;
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->exit_protected_state =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->exit_protected_state =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+		if (err) {
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			return -EINVAL;
+		}
+
+		katom[idx]->exit_protected_state =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		if (kbase_reset_gpu_active(kbdev))
+			return -EAGAIN;
+
+		/* protected mode sanity checks */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
+		KBASE_DEBUG_ASSERT_MSG(
+			(kbase_jd_katom_is_protected(katom[idx]) && js == 0) ||
+			!kbase_jd_katom_is_protected(katom[idx]),
+			"Protected atom on JS%d not supported", js);
+	}
+
+	return 0;
+}
+
+void kbase_gpu_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+			int ret;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT:
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+				ret = kbase_jm_exit_protected_mode(kbdev,
+						katom, idx, js);
+				if (ret)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY:
+
+				/* Only submit if head atom or previous atom
+				 * already submitted */
+				if (idx == 1 &&
+					(katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
+					katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+					break;
+
+				/*
+				 * If the GPU is transitioning protected mode
+				 * then bail now and we'll be called when the
+				 * new state has settled.
+				 */
+				if (kbdev->protected_mode_transition)
+					break;
+
+				if (!kbase_gpu_in_protected_mode(kbdev) && kbase_jd_katom_is_protected(katom[idx])) {
+					int err = 0;
+
+					/* Not in correct mode, take action */
+					if (kbase_gpu_atoms_submitted_any(kbdev)) {
+						/*
+						 * We are not in the correct
+						 * GPU mode for this job, and
+						 * we can't switch now because
+						 * there are jobs already
+						 * running.
+						 */
+						break;
+					}
+					if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+						/*
+						 * We can't switch now because
+						 * the vinstr core state switch
+						 * is not done yet.
+						 */
+						break;
+					}
+					/* Once reaching this point GPU must be
+					 * switched to protected mode or vinstr
+					 * re-enabled. */
+
+					/* No jobs running, so we can switch GPU mode right now */
+					err = kbase_gpu_protected_mode_enter(kbdev);
+					if (err) {
+						/*
+						 * Failed to switch into protected mode, resume
+						 * vinstr core and fail atom.
+						 */
+						kbase_vinstr_resume(kbdev->vinstr_ctx);
+						katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+						kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+						/* Only return if head atom or previous atom
+						 * already removed - as atoms must be returned
+						 * in order */
+						if (idx == 0 || katom[0]->gpu_rb_state ==
+								KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+							kbase_gpu_dequeue_atom(kbdev, js, NULL);
+							kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+						}
+						break;
+					}
+				}
+
+				/* Protected mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
+					"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+					kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+
+				/* Only submit if head atom or previous atom
+				 * already submitted */
+				if (idx == 1 &&
+					(katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+					katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_gpu_slot_update(kbdev);
+}
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+	kbase_tlstream_tl_nret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+	kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]);
+	kbase_tlstream_tl_nret_ctx_lpu(
+			kctx,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx) {
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+				katom_idx0->gpu_rb_state !=
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					katom_idx1->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+								js_string),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+								js_string),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_gpu_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < 2; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js, 0);
+			bool keep_in_jm_rb = false;
+
+			if (!katom)
+				continue;
+
+			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+				katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				continue;
+			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
+		}
+	}
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+					(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+					(!kctx || katom_idx1->kctx == kctx));
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				kbasep_js_clear_submit_allowed(js_devdata,
+							katom_idx1->kctx);
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			kbasep_js_clear_submit_allowed(js_devdata,
+							katom_idx0->kctx);
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_job_evicted(katom_idx1);
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_job_evicted(katom_idx1);
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+
+	kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	if (katom->need_cache_flush_cores_retained) {
+		kbase_gpu_cacheclean(kbdev, katom);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->affinity = 0;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
+			coreref_state);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100755
index 0000000..102d94b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,87 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:         Device pointer
+ * @js:            Job slot to evict from
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_gpu_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100755
index 0000000..d665420
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+#include "mali_kbase_hw.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		 /* If the hardware supports XAFFINITY then we'll only enable
+		  * the tiler (which is the default so this is a no-op),
+		  * otherwise enable shader core 0. */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = 1;
+		else
+			*affinity = 0;
+
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required for hardware without XAFFINITY
+	 * support (notes above) */
+	if (core_req & BASE_JD_REQ_T) {
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = *affinity | 1;
+	}
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100755
index 0000000..fbffa3b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold kbasep_js_device_data.runpool_irq.lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - kbasep_js_device_data.runpool_irq.lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100755
index 0000000..a23deb4
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,357 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is
+ * enabled for a particular level is down to the integrator. However this is
+ * being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->sched_info.cfs.ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->sched_info.cfs.ticks =
+							soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * runpool_irq lock, so it's OK to
+					 * observe the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CINSTR_DUMPING_ENABLED */
+				/* NOTE: During CINSTR_DUMPING_ENABLED, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CINSTR_DUMPING_ENABLED, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CINSTR_DUMPING_ENABLED */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100755
index 0000000..3f53779
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100755
index 0000000..4a3572d
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,368 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		as->fault_extra_addr = kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+				kctx);
+		as->fault_extra_addr <<= 32;
+		as->fault_extra_addr |= kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+				kctx);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock,
+				flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+	/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+	/* Clear PTW_MEMATTR bits */
+	transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+	/* Enable correct PTW_MEMATTR bits */
+	transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+		/* Clear PTW_SH bits */
+		transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+		/* Enable correct PTW_SH bits */
+		transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+			transcfg, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+			(current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx);
+
+#else /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_tlstream_tl_attrib_as_config(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100755
index 0000000..c02253c
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100755
index 0000000..0614348
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100755
index 0000000..f9d244b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100755
index 0000000..711e44c
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,388 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+	struct kbase_pm_callback_conf *callbacks;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+	} else {
+		kbdev->pm.backend.callback_power_on = NULL;
+		kbdev->pm.backend.callback_power_off = NULL;
+		kbdev->pm.backend.callback_power_suspend = NULL;
+		kbdev->pm.backend.callback_power_resume = NULL;
+		kbdev->pm.callback_power_runtime_init = NULL;
+		kbdev->pm.callback_power_runtime_term = NULL;
+		kbdev->pm.backend.callback_power_runtime_on = NULL;
+		kbdev->pm.backend.callback_power_runtime_off = NULL;
+		kbdev->pm.backend.callback_power_runtime_idle = NULL;
+	}
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.power_change_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+	bool cores_are_available;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Force all cores off */
+	kbdev->pm.backend.desired_shader_state = 0;
+	kbdev->pm.backend.desired_tiler_state = 0;
+
+	/* Force all cores to be unavailable, in the situation where
+	 * transitions are in progress for some cores but not others,
+	 * and kbase_pm_check_transitions_nolock can not immediately
+	 * power off the cores */
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_available_bitmap = 0;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* NOTE: We won't wait to reach the core's desired state, even if we're
+	 * powering off the GPU itself too. It's safe to cut the power whilst
+	 * they're transitioning to off, because the cores should be idle and
+	 * all cache flushes should already have occurred */
+
+	/* Consume any change-state events */
+	kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	/* Disable interrupts and turn the clock off */
+	return kbase_pm_clock_off(kbdev, is_suspend);
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_read_present_cores(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	if (!kbase_pm_do_poweroff(kbdev, false)) {
+		/* Page/bus faults are pending, must drop pm.lock to process.
+		 * Interrupts are disabled so no more faults should be
+		 * generated at this point */
+		mutex_unlock(&kbdev->pm.lock);
+		kbase_flush_mmu_wqs(kbdev);
+		mutex_lock(&kbdev->pm.lock);
+		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+	}
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_gpu_slot_update(kbdev);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+	}
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	if (!kbase_pm_do_poweroff(kbdev, true)) {
+		/* Page/bus faults are pending, must drop pm.lock to process.
+		 * Interrupts are disabled so no more faults should be
+		 * generated at this point */
+		mutex_unlock(&kbdev->pm.lock);
+		kbase_flush_mmu_wqs(kbdev);
+		mutex_lock(&kbdev->pm.lock);
+		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+	}
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100755
index 0000000..4eada33
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,183 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_demand_policy_ops,
+	&kbase_pm_ca_random_policy_ops,
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100755
index 0000000..ee9e751
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
new file mode 100644
index 0000000..19f1f73
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
@@ -0,0 +1,246 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation.
+ *
+ * This policy periodically selects a new core mask at demand. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/printk.h>
+
+#ifndef MALI_CA_TIMER
+static struct workqueue_struct *mali_ca_wq = NULL;
+#endif
+static int num_shader_cores_to_be_enabled = 0;
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+#ifndef MALI_CA_TIMER
+	struct kbase_device *kbdev = (struct kbase_device *)priv;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+#endif
+
+    num_shader_cores_to_be_enabled = cores -1;
+    printk("pp num=%d\n", num_shader_cores_to_be_enabled);
+    if (num_shader_cores_to_be_enabled < 1)
+        num_shader_cores_to_be_enabled = 1;
+
+#ifndef MALI_CA_TIMER
+    queue_work(mali_ca_wq, &data->wq_work);
+#endif
+
+    return 0;
+}
+
+#ifdef MALI_CA_TIMER
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void demand_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+#if 0
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+#endif
+    data->cores_desired = num_shader_cores_to_be_enabled;
+
+	if (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+            dev_info(kbdev->dev, "error, ca demand policy : new core mask=%llX\n",
+				data->cores_desired);
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "ca demand policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(demand_timer_callback);
+#else
+static void do_ca_scaling(struct work_struct *work)
+{
+    //unsigned long flags;
+	struct kbase_device *kbdev = = container_of(work,
+            struct kbasep_pm_ca_policy_demand, wq_work);
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+    data->cores_desired =  num_shader_cores_to_be_enabled;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+
+}
+#endif
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+#ifdef MALI_CA_TIMER
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = demand_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+#else
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_ca_wq = alloc_workqueue("gpu_ca_wq", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_ca_wq = create_workqueue("gpu_ca_wq");
+#endif
+	INIT_WORK(&data->wq_work, do_ca_scaling);
+    if (mali_ca_wq == NULL) printk("Unable to create gpu scaling workqueue\n");
+#endif
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+#ifdef MALI_CA_TIMER
+	del_timer(&data->core_change_timer);
+#else
+    if (mali_ca_wq) {
+        flush_workqueue(mali_ca_wq);
+        destroy_workqueue(mali_ca_wq);
+        mali_ca_wq = NULL;
+    }
+#endif
+}
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.demand.cores_enabled;
+}
+
+static void demand_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the demand core availability
+ * policy.
+ *
+ * This is the static structure that defines the demand core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_demand_policy_ops);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
new file mode 100644
index 0000000..a3dfe2a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEMAND_H
+#define MALI_KBASE_PM_CA_DEMAND_H
+#include <linux/workqueue.h>
+/**
+ * struct kbasep_pm_ca_policy_demand - Private structure for demand ca policy
+ *
+ * This contains data that is private to the demand core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+#define MALI_CA_TIMER 1
+struct kbasep_pm_ca_policy_demand {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+#ifdef MALI_CA_TIMER
+	struct timer_list core_change_timer;
+#else
+	struct work_struct wq_work;
+#endif
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops;
+extern int mali_perf_set_num_pp_cores(int cores);
+
+#endif /* MALI_KBASE_PM_CA_DEMAND_H */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100755
index 0000000..1db6586
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100755
index 0000000..a763155
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
new file mode 100644
index 0000000..0d1b220
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation.
+ *
+ * This policy periodically selects a new core mask at random. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void random_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(random_timer_callback);
+
+static void random_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = random_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+}
+
+static void random_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	del_timer(&data->core_change_timer);
+}
+
+static u64 random_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.random.cores_enabled;
+}
+
+static void random_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the random core availability
+ * policy.
+ *
+ * This is the static structure that defines the random core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops = {
+	"random",			/* name */
+	random_init,			/* init */
+	random_term,			/* term */
+	random_get_core_mask,		/* get_core_mask */
+	random_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_RANDOM,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_random_policy_ops);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
new file mode 100644
index 0000000..a8c9b15
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_RANDOM_H
+#define MALI_KBASE_PM_CA_RANDOM_H
+
+/**
+ * struct kbasep_pm_ca_policy_random - Private structure for random ca policy
+ *
+ * This contains data that is private to the random core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+struct kbasep_pm_ca_policy_random {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+	struct timer_list core_change_timer;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_RANDOM_H */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100755
index 0000000..f891fa2
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100755
index 0000000..749d305
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100755
index 0000000..8cb1dd7
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,489 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#include "mali_kbase_pm_ca_demand.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
+ *                                 management framework.
+ *
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+	struct kbasep_pm_ca_policy_demand demand;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        and/or timers are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_data metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_CA_POLICY_ID_RANDOM,
+	KBASE_PM_CA_POLICY_ID_DEMAND,
+#endif
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100755
index 0000000..81322fd
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100755
index 0000000..c0c84b6
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100755
index 0000000..dcde2d9
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1582 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/* Special value to indicate that the JM_CONFIG reg isn't currently used. */
+#define KBASE_JM_CONFIG_UNUSED (1<<31)
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		kbase_tlstream_aux_pm_state(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_invoke);
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_present_cores(), kbase_pm_get_active_cores(),
+ * kbase_pm_get_trans_cores() and kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev)
+{
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores);
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+	}
+	KBASE_DEBUG_ASSERT(0);
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state_in_use == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+	//kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ * @tilers_powered: The bit mask of tilers that are desired to be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered,
+		u64 tilers_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	/* Power up the required L2(s) for the tiler */
+	if (tilers_powered)
+		desired |= 1;
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+	u64 cores_powered;
+	u64 tilers_powered;
+	u64 tiler_available_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+	/* Work out which tilers want to be powered */
+	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered, tilers_powered);
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state)
+		desired_l2_state |= 1;
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_L2, desired_l2_state, 0,
+				&l2_available_bitmap,
+				&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#endif
+
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO),
+					NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO), NULL));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO), NULL));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	} else {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		mutex_lock(&as->transaction_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&as->transaction_mutex);
+	}
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+	/* Limit read ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+	kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+	/* Limit write ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+	kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+		/* The JM_CONFIG register is specified as follows in the
+		 T86x/T88x Engineering Specification Supplement:
+		 The values are read from device tree in order.
+		*/
+#define TIMESTAMP_OVERRIDE  1
+#define CLOCK_GATE_OVERRIDE (1<<1)
+#define JOB_THROTTLE_ENABLE (1<<2)
+#define JOB_THROTTLE_LIMIT_SHIFT 3
+
+		/* 6 bits in the register */
+		const u32 jm_max_limit = 0x3F;
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = jm_max_limit; /* Max value */
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > jm_max_limit) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = jm_max_limit;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm = (jm_values[0] ? TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ? CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ? JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JOB_THROTTLE_LIMIT_SHIFT);
+	} else {
+		kbdev->hw_quirks_jm = KBASE_JM_CONFIG_UNUSED;
+	}
+
+
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	if (kbdev->hw_quirks_sc)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+				kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+
+
+	if (kbdev->hw_quirks_jm != KBASE_JM_CONFIG_UNUSED)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+				kbdev->hw_quirks_jm, NULL);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE) &&
+		kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+static int kbase_pm_reset_do_normal(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_jd_gpu_soft_reset(kbdev);
+#endif
+#if 0
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET, NULL);
+#else
+  //dev_err(kbdev->dev, "%s,  %d \n", __FILE__, __LINE__);
+  core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+  l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+  tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+  //printk("core_ready=%ld, l2_ready=%ld, tiler_ready=%ld\n", core_ready, l2_ready, tiler_ready);
+  if (core_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+  if (l2_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+  if (tiler_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+  /* do external reset */
+
+//JOHNT
+#if 0
+    // Level reset mail
+    Wr(P_RESET0_MASK, ~(0x1<<20));
+    Wr(P_RESET0_LEVEL, ~(0x1<<20));
+
+    // Level reset mail
+    Wr(P_RESET2_MASK, ~(0x1<<14));
+    Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    Wr(P_RESET2_LEVEL, 0xffffffff);
+    Wr(P_RESET0_LEVEL, 0xffffffff);
+#else
+    if (reg_base_hiubus) {
+        value = Rd(RESET0_MASK);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_MASK, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+    ///////////////
+        value = Rd(RESET2_MASK);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_MASK, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value | ((0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value | ((0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+    } else {
+		dev_err(kbdev->dev, "reg_base_hiubus is null !!!\n");
+    }
+#endif
+  //writel(..../*e.g. PWR_RESET, EXTERNAL_PWR_REGISTER*/);
+
+  /* any other action necessary, like a simple delay */
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+#endif
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	return -EINVAL;
+}
+
+static int kbase_pm_reset_do_protected(struct kbase_device *kbdev)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+	kbase_tlstream_jd_gpu_soft_reset(kbdev);
+
+	return kbdev->protected_ops->protected_mode_reset(kbdev);
+}
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+	bool resume_vinstr = false;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support &&
+			kbdev->protected_ops->protected_mode_reset)
+		err = kbase_pm_reset_do_protected(kbdev);
+	else
+		err = kbase_pm_reset_do_normal(kbdev);
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, irq_flags);
+	if (kbdev->protected_mode)
+		resume_vinstr = true;
+	kbdev->protected_mode_transition = false;
+	kbdev->protected_mode = false;
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, irq_flags);
+
+	if (err)
+		goto exit;
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		kbase_pm_release_l2_caches(kbdev);
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+exit:
+	/* If GPU is leaving protected mode resume vinstr operation. */
+	if (kbdev->vinstr_ctx && resume_vinstr)
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	return err;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100755
index 0000000..477bea7
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,566 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action);
+
+/**
+ * kbasep_pm_read_present_cores - Read the bitmasks of present cores.
+ *
+ * This information is cached to avoid having to perform register reads whenever
+ * the information is required.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the
+ * last caller then the GPU cycle counters will be disabled. A request must have
+ * been made before a call to this.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * Access to registers should be done using kbase_os_reg_read()/write() at this
+ * stage, not kbase_reg_read()/write().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ * Return:
+ *         true      if power was turned off, else
+ *         false     if power can not be turned off due to pending page/bus
+ *                   fault workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#ifdef CONFIG_PM_DEVFREQ
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold runpool_irq.lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100755
index 0000000..ae63256
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,400 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+					int *util_gl_share,
+					int util_cl_share[2],
+					ktime_t now)
+{
+	int utilisation;
+	int busy;
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	return utilisation;
+}
+
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	ktime_t now;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	now = ktime_get();
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+			util_cl_share, now);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				WARN_ON(device_nr >= 2);
+				kbdev->pm.backend.metrics.active_cl_ctx[
+						device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				WARN_ON(js >= 2);
+				kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100755
index 0000000..6d4b00d
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,1021 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		if (!kbase_pm_do_poweroff(kbdev, false)) {
+			/* GPU can not be powered off at present */
+			kbdev->pm.backend.poweroff_timer_needed = true;
+			kbdev->pm.backend.poweroff_timer_running = true;
+			hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
+					kbdev->pm.gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&pm->power_change_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&pm->power_change_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		spin_unlock_irqrestore(&pm->power_change_lock, flags);
+
+		/* Power on the GPU and any cores requested by the policy */
+		kbase_pm_do_poweron(kbdev, false);
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&pm->power_change_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&pm->power_change_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				if (!kbase_pm_do_poweroff(kbdev, false)) {
+					/* GPU can not be powered off at present
+					 */
+					spin_lock_irqsave(
+							&pm->power_change_lock,
+							flags);
+					backend->poweroff_timer_needed = true;
+					if (!backend->poweroff_timer_running) {
+						backend->poweroff_timer_running
+								= true;
+						hrtimer_start(
+						&backend->gpu_poweroff_timer,
+							pm->gpu_poweroff_time,
+							HRTIMER_MODE_REL);
+					}
+					spin_unlock_irqrestore(
+							&pm->power_change_lock,
+							flags);
+				}
+			}
+		} else {
+			spin_unlock_irqrestore(&pm->power_change_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+
+	desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+	desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+	if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+		desired_tiler_bitmap = 1;
+	else
+		desired_tiler_bitmap = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+		/* Unless XAFFINITY is supported, enable core 0 if tiler
+		 * required, regardless of core availability */
+		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+			desired_bitmap |= 1;
+	}
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+int set_policy_by_name(struct kbase_device *kbdev, const char *name)
+{
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, name)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		printk("power_policy: policy not found\n");
+		return -EINVAL;
+	}
+	trace_printk("policy name=%s\n", name);
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(set_policy_by_name);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		return KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbdev->l2_users_count++;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100755
index 0000000..611a90e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
new file mode 100644
index 0000000..d965033
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+#include <linux/of.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms.
+ */
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static u32 dynamic_coefficient;
+static u32 static_coefficient;
+static s32 ts[4];
+static struct thermal_zone_device *gpu_tz;
+
+static unsigned long model_static_power(unsigned long voltage)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temperature;
+#else
+	int temperature;
+#endif
+	unsigned long temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	if (gpu_tz) {
+		int ret;
+
+		ret = gpu_tz->ops->get_temp(gpu_tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	} else {
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(ts[3] * temp_cubed)
+			+ (ts[2] * temp_squared)
+			+ (ts[1] * temp)
+			+ ts[0];
+
+	return (((static_coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+}
+
+static unsigned long model_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+
+	return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops power_model_simple_ops = {
+#else
+struct devfreq_cooling_power power_model_simple_ops = {
+#endif
+	.get_static_power = model_static_power,
+	.get_dynamic_power = model_dynamic_power,
+};
+
+int kbase_power_model_simple_init(struct kbase_device *kbdev)
+{
+	struct device_node *power_model_node;
+	const char *tz_name;
+	u32 static_power, dynamic_power;
+	u32 voltage, voltage_squared, voltage_cubed, frequency;
+
+	power_model_node = of_get_child_by_name(kbdev->dev->of_node,
+			"power_model");
+	if (!power_model_node) {
+		dev_err(kbdev->dev, "could not find power_model node\n");
+		return -ENODEV;
+	}
+	if (!of_device_is_compatible(power_model_node,
+			"arm,mali-simple-power-model")) {
+		dev_err(kbdev->dev, "power_model incompatible with simple power model\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_string(power_model_node, "thermal-zone",
+			&tz_name)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	gpu_tz = thermal_zone_get_zone_by_name(tz_name);
+	if (IS_ERR(gpu_tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(gpu_tz));
+		gpu_tz = NULL;
+
+		return -EPROBE_DEFER;
+	}
+
+	if (of_property_read_u32(power_model_node, "static-power",
+			&static_power)) {
+		dev_err(kbdev->dev, "static-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "dynamic-power",
+			&dynamic_power)) {
+		dev_err(kbdev->dev, "dynamic-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "voltage",
+			&voltage)) {
+		dev_err(kbdev->dev, "voltage in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "frequency",
+			&frequency)) {
+		dev_err(kbdev->dev, "frequency in power_model not available\n");
+		return -EINVAL;
+	}
+	voltage_squared = (voltage * voltage) / 1000;
+	voltage_cubed = voltage * voltage * voltage;
+	static_coefficient = (static_power << 20) / (voltage_cubed >> 10);
+	dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared)
+			* 1000) / frequency;
+
+	if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
new file mode 100644
index 0000000..9b5e69a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_POWER_MODEL_SIMPLE_H_
+#define _BASE_POWER_MODEL_SIMPLE_H_
+
+/**
+ * kbase_power_model_simple_init - Initialise the simple power model
+ * @kbdev: Device pointer
+ *
+ * The simple power model estimates power based on current voltage, temperature,
+ * and coefficients read from device tree. It does not take utilization into
+ * account.
+ *
+ * The power model requires coefficients from the power_model node in device
+ * tree. The absence of this node will prevent the model from functioning, but
+ * should not prevent the rest of the driver from running.
+ *
+ * Return: 0 on success
+ *         -ENOSYS if the power_model node is not present in device tree
+ *         -EPROBE_DEFER if the thermal zone specified in device tree is not
+ *         currently available
+ *         Any other negative value on failure
+ */
+int kbase_power_model_simple_init(struct kbase_device *kbdev);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops power_model_simple_ops;
+#else
+extern struct devfreq_cooling_power power_model_simple_ops;
+#endif
+
+#endif /* _BASE_POWER_MODEL_SIMPLE_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100755
index 0000000..d992989
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,103 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100755
index 0000000..35088ab
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100755
index 0000000..35ff2f1
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100755
index 0000000..7ae05c2
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100755
index 0000000..159b993
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100755
index 0000000..f7c0ff6
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,197 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100755
index 0000000..149f44c
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,987 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_T76X_3982,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+
+
+
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100755
index 0000000..749dd9a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1810 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+#ifndef __user
+#define __user
+#endif
+
+/* Support UK6 IOCTLS */
+#define BASE_LEGACY_UK6_SUPPORT 1
+
+/* Support UK7 IOCTLS */
+/* NB: To support UK6 we also need to support UK7 */
+#define BASE_LEGACY_UK7_SUPPORT 1
+
+/* Support UK8 IOCTLS */
+#define BASE_LEGACY_UK8_SUPPORT 1
+
+/* Support UK9 IOCTLS */
+#define BASE_LEGACY_UK9_SUPPORT 1
+
+/* Support UK10_2 IOCTLS */
+#define BASE_LEGACY_UK10_2_SUPPORT 1
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+#define BASEP_JD_SEM_PER_WORD_LOG2      5
+#define BASEP_JD_SEM_PER_WORD           (1 << BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_WORD_NR(x)         ((x) >> BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_MASK_IN_WORD(x)    (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
+#define BASEP_JD_SEM_ARRAY_SIZE         BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union kbase_pointer {
+	void __user *value;	  /**< client should store their pointers here */
+	u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	u64 sizer;	  /**< Force 64-bit storage for all clients regardless */
+} kbase_pointer;
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD),
+ * which defines a @a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see ::BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * See ::base_mem_alloc_flags.
+ *
+ */
+enum {
+/* IN */
+	BASE_MEM_PROT_CPU_RD = (1U << 0),      /**< Read access CPU side */
+	BASE_MEM_PROT_CPU_WR = (1U << 1),      /**< Write access CPU side */
+	BASE_MEM_PROT_GPU_RD = (1U << 2),      /**< Read access GPU side */
+	BASE_MEM_PROT_GPU_WR = (1U << 3),      /**< Write access GPU side */
+	BASE_MEM_PROT_GPU_EX = (1U << 4),      /**< Execute allowed on the GPU
+						    side */
+
+	/* BASE_MEM_HINT flags have been removed, but their values are reserved
+	 * for backwards compatibility with older user-space drivers. The values
+	 * can be re-used once support for r5p0 user-space drivers is removed,
+	 * presumably in r7p0.
+	 *
+	 * RESERVED: (1U << 5)
+	 * RESERVED: (1U << 6)
+	 * RESERVED: (1U << 7)
+	 * RESERVED: (1U << 8)
+	 */
+
+	BASE_MEM_GROW_ON_GPF = (1U << 9),      /**< Grow backing store on GPU
+						    Page Fault */
+
+	BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer
+						    shareable, if available */
+	BASE_MEM_COHERENT_LOCAL = (1U << 11),  /**< Page coherence Inner
+						    shareable */
+	BASE_MEM_CACHED_CPU = (1U << 12),      /**< Should be cached on the
+						    CPU */
+
+/* IN/OUT */
+	BASE_MEM_SAME_VA = (1U << 13), /**< Must have same VA on both the GPU
+					    and the CPU */
+/* OUT */
+	BASE_MEM_NEED_MMAP = (1U << 14), /**< Must call mmap to aquire a GPU
+					     address for the alloc */
+/* IN */
+	BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence
+					     Outer shareable, required. */
+	BASE_MEM_SECURE = (1U << 16),          /**< Secure memory */
+	BASE_MEM_DONT_NEED = (1U << 17),       /**< Not needed physical
+						    memory */
+	BASE_MEM_IMPORT_SHARED = (1U << 18),   /**< Must use shared CPU/GPU zone
+						    (SAME_VA zone) but doesn't
+						    require the addresses to
+						    be the same */
+};
+
+/**
+ * @brief Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the ::base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 19
+
+/**
+  * A mask for all output bits, excluding IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/**
+  * A mask for all input bits, including IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/**
+ * A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id.
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	kbase_pointer to imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	kbase_pointer ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1,	    /**< Not a growable tmem object */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completly unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/**
+ * No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/**
+ * Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  ((base_jd_core_req)1 << 1)
+#define BASE_JD_REQ_T   ((base_jd_core_req)1 << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  ((base_jd_core_req)1 << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   ((base_jd_core_req)1 << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceeding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceeding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceeding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
+ * the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
+ * the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferrentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
+ * In order to keep the size of the structure same, padding field has been adjusted
+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
+ * for possible future use. */
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[1];
+	base_jd_core_req core_req;          /**< core requirements */
+} base_jd_atom_v2;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+struct base_jd_atom_v2_uk6 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 core_req;                       /**< core requirements */
+	base_atom_id pre_dep[2]; /**< pre-dependencies */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;		    /**< priority - smaller is higher priority */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[7];
+};
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills
+ * a full cache line.
+ */
+
+#define BASE_CPU_GPU_CACHE_LINE_PADDING (36)
+
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom.
+ *
+ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid
+ * cases where access to pages containing the structure is shared between cached and un-cached
+ * memory regions, which would cause memory corruption.  Here we set the structure size to be 64 bytes
+ * which is the cache line for ARM A15 processors.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING];
+} base_dump_cpu_gpu_counters;
+
+
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistant guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ *  - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ *  so they won't accidentally pick another file with 'mali_t600' in its name
+ *  - When the build doesn't work, the System Integrator may think the DDK is
+ *  doesn't work, and attempt to fix it themselves:
+ *   - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ *   error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ *   you.
+ *   - For a \#include mechanism, checks must still be made elsewhere, which the
+ *   System Integrator may try working around by setting \#defines (such as
+ *   VA_BITS) themselves in their plat_config.h. In the  worst case, they may
+ *   set the prevention-mechanism \#define of
+ *   "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ *   - In this case, they would believe they are on the right track, because
+ *   the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+   digraph plat_config_mechanism {
+	   rankdir=BT
+	   size="6,6"
+
+       "mali_base.h";
+	   "gpu/mali_gpu.h";
+
+	   node [ shape=box ];
+	   {
+	       rank = same; ordering = out;
+
+		   "gpu/mali_gpu_props.h";
+		   "base/midg_gpus/mali_t600.h";
+		   "base/midg_gpus/other_midg_gpu.h";
+	   }
+	   { rank = same; "plat/plat_config.h"; }
+	   {
+	       rank = same;
+		   "gpu/mali_gpu.h" [ shape=box ];
+		   gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+		   select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+	   }
+	   node [ shape=box ];
+	   { rank = same; "plat/plat_config.h"; }
+	   { rank = same; "mali_base.h"; }
+
+	   "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h";
+	   "mali_base.h" -> "plat/plat_config.h" ;
+	   "mali_base.h" -> select_gpu ;
+
+	   "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+	   gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+	   select_gpu -> "base/midg_gpus/mali_t600.h" ;
+	   select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+   }
+   @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algoirthm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * requried for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+     * No defined values, but starts at 0 and increases by one for each release
+     * status (alpha, beta, EAC, etc.).
+     * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/**
+	 * @usecase GPU clock speed is not specified in the Midgard Architecture, but is
+	 * <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+	 */
+	u32 gpu_speed_mhz;
+
+	/**
+	 * @usecase GPU clock max/min speed is required for computing best/worst case
+	 * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+	 *  Midgard Architecture).
+	 */
+	u32 gpu_freq_khz_max;
+	u32 gpu_freq_khz_min;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 unused_1; /* keep for backward compatibility */
+
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[3];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this datastructure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+	/** No flags set */
+	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+	/** Base context is a 'System Monitor' context for Hardware counters.
+	 *
+	 * One important side effect of this is that job submission is disabled. */
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+/** Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+} base_jd_replay_payload;
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+typedef struct base_jd_replay_payload_uk10_2 {
+	u64 tiler_jc_list;
+	u64 fragment_jc;
+	u64 tiler_heap_free;
+	u16 fragment_hierarchy_mask;
+	u16 tiler_hierarchy_mask;
+	u32 hierarchy_default_weight;
+	u16 tiler_core_req;
+	u16 fragment_core_req;
+	u8 padding[4];
+} base_jd_replay_payload_uk10_2;
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
new file mode 100755
index 0000000..a24791f
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base cross-proccess sync API.
+ */
+
+#ifndef _BASE_KERNEL_SYNC_H_
+#define _BASE_KERNEL_SYNC_H_
+
+#include <linux/ioctl.h>
+
+#define STREAM_IOC_MAGIC '~'
+
+/* Fence insert.
+ *
+ * Inserts a fence on the stream operated on.
+ * Fence can be waited via a base fence wait soft-job
+ * or triggered via a base fence trigger soft-job.
+ *
+ * Fences must be cleaned up with close when no longer needed.
+ *
+ * No input/output arguments.
+ * Returns
+ * >=0 fd
+ * <0  error code
+ */
+#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0)
+
+#endif				/* _BASE_KERNEL_SYNC_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100755
index 0000000..4a98a72
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100755
index 0000000..be454a2
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100755
index 0000000..e02d9c9
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,570 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_uku.h>
+#include <mali_kbase_linux.h>
+
+#include "mali_kbase_strings.h"
+#include "mali_kbase_pm.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+#include "mali_kbase_ipa.h"
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+/**
+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs
+ *
+ * The Kernel-side Base (KBase) APIs are divided up as follows:
+ * - @subpage page_kbase_js_policy
+ */
+
+/**
+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom);
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data);
+#endif
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom);
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The js_data.runpool_irq.lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+void kbasep_soft_job_timeout_worker(unsigned long data);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+/* api to be ported per OS, only need to do the raw register access */
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEBUG
+/**
+ * kbase_set_driver_inactive - Force driver to go inactive
+ * @kbdev:    Device pointer
+ * @inactive: true if driver should go inactive, false otherwise
+ *
+ * Forcing the driver inactive will cause all future IOCTLs to wait until the
+ * driver is made active again. This is intended solely for the use of tests
+ * which require that no jobs are running while the test executes.
+ */
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifndef RESET0_LEVEL
+extern u32  override_value_aml;
+extern void *reg_base_hiubus;
+#define RESET0_MASK    0x01
+#define RESET1_MASK    0x02
+#define RESET2_MASK    0x03
+
+#define RESET0_LEVEL    0x10
+#define RESET1_LEVEL    0x11
+#define RESET2_LEVEL    0x12
+#define Rd(r)                           readl((reg_base_hiubus) + ((r)<<2))
+#define Wr(r, v)                        writel((v), ((reg_base_hiubus) + ((r)<<2)))
+#define Mali_WrReg(unit, core, regnum, value)   kbase_reg_write(kbdev, (regnum), (value), NULL)
+#define Mali_RdReg(unit, core, regnum)          kbase_reg_read(kbdev, (regnum), NULL)
+#define stimulus_print   printk
+#define stimulus_display printk
+#define Mali_pwr_on(x)   Mali_pwr_on_with_kdev(kbdev, (x))
+#define Mali_pwr_off(x)  Mali_pwr_off_with_kdev(kbdev, (x))
+#endif
+
+
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100755
index 0000000..933aa28
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	phys_addr_t *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100755
index 0000000..099a298
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
new file mode 100644
index 0000000..f910fe9
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+
+static int kbase_as_fault_read(struct seq_file *sfile, void *data)
+{
+	uintptr_t as_no = (uintptr_t) sfile->private;
+
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	struct kbase_device *kbdev = NULL;
+
+	kbdev_list = kbase_dev_list_get();
+
+	list_for_each(entry, kbdev_list) {
+		kbdev = list_entry(entry, struct kbase_device, entry);
+
+		if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+
+			/* don't show this one again until another fault occors */
+			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
+
+			/* output the last page fault addr */
+			seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr);
+		}
+
+	}
+
+	kbase_dev_list_put(kbdev_list);
+
+	return 0;
+}
+
+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_as_fault_read , in->i_private);
+}
+
+static const struct file_operations as_fault_fops = {
+	.open = kbase_as_fault_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ *  Initialize debugfs entry for each address space
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	uint i;
+	char as_name[64];
+	struct dentry *debugfs_directory;
+
+	kbdev->debugfs_as_read_bitmap = 0ULL;
+
+	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
+
+	debugfs_directory = debugfs_create_dir("address_spaces",
+		kbdev->mali_debugfs_directory);
+
+	if(debugfs_directory) {
+		for(i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
+			debugfs_create_file(as_name, S_IRUGO,
+				debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops);
+		}
+	}
+	else
+		dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory");
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+	return;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
new file mode 100644
index 0000000..3ed2248
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
+#define _KBASE_AS_FAULT_DEBUG_FS_H
+
+/**
+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
+ *
+ * @kbdev: Pointer to kbase_device
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs
+ *
+ * @kbdev: Pointer to kbase_device
+ * @as_no: The address space the fault occurred on
+ */
+static inline void
+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_MALI_DEBUG */
+	return;
+}
+
+#endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100755
index 0000000..c67b3e9
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100755
index 0000000..0c18bdb
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100755
index 0000000..fb615ae
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100755
index 0000000..356d52b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,345 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see  kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+/**
+ * kbase_platform_fake_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_fake_register()
+ */
+void kbase_platform_fake_unregister(void);
+#endif
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100755
index 0000000..739ac83
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,260 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+ * Irq throttle. It is the minimum desired time in between two
+ * consecutive gpu interrupts (given in 'us'). The irq throttle
+ * gpu register will be configured after this, taking into
+ * account the configured max frequency.
+ *
+ * Attached value: number in micro seconds
+ */
+#define DEFAULT_IRQ_THROTTLE_TIME_US 20
+
+/**
+ *  Default Job Scheduler initial runtime of a context for the CFS Policy,
+ *  in time-slices.
+ *
+ * This value is relative to that of the least-run context, and defines
+ * where in the CFS queue a new context is added. A value of 1 means 'after
+ * the least-run context has used its timeslice'. Therefore, when all
+ * contexts consistently use the same amount of time, a value of 1 models a
+ * FIFO. A value of 0 would model a LIFO.
+ *
+ * The value is represented in "numbers of time slices". Multiply this
+ * value by that defined in @ref DEFAULT_JS_CTX_TIMESLICE_NS to get
+ * the time value for this in nanoseconds.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1
+
+/**
+ * Default Job Scheduler minimum runtime value of a context for CFS, in
+ * time_slices relative to that of the least-run context.
+ *
+ * This is a measure of how much preferrential treatment is given to a
+ * context that is not run very often.
+ *
+ * Specficially, this value defines how many timeslices such a context is
+ * (initially) allowed to use at once. Such contexts (e.g. 'interactive'
+ * processes) will appear near the front of the CFS queue, and can initially
+ * use more time than contexts that run continuously (e.g. 'batch'
+ * processes).
+ *
+ * This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx
+ * not run for a long time attacks the system by using a very large initial
+ * number of timeslices when it finally does run.
+ *
+ * @note A value of zero allows not-run-often contexts to get scheduled in
+ * quickly, but to only use a single timeslice when they get scheduled in.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/*
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT ((u32)3000) /* 3s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100755
index 0000000..344a1f1
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,318 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_mem_linux.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	kctx->is_compat = is_compat;
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	kctx->infinite_cache_active = 0;
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+			kbdev->mem_pool_max_size_default,
+			kctx->kbdev, &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_pool;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	atomic_set(&kctx->drain_pending, 0);
+
+	mutex_init(&kctx->reg_lock);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+#ifdef CONFIG_KDS
+	INIT_LIST_HEAD(&kctx->waiting_kds_resource);
+#endif
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kctx);
+	if (err)
+		goto term_dma_fence;
+
+	kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+	if (!kctx->pgd)
+		goto free_mmu;
+
+	kctx->aliasing_sink_page = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!kctx->aliasing_sink_page)
+		goto no_sink_page;
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	setup_timer(&kctx->soft_job_timeout,
+		    kbasep_soft_job_timeout_worker,
+		    (uintptr_t)kctx);
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+no_sink_page:
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+free_mmu:
+	kbase_mmu_term(kctx);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_jd_zap_context(kctx);
+	kbase_event_cleanup(kctx);
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_pm_context_idle(kbdev);
+
+	kbase_dma_fence_term(kctx);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock,
+			irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100755
index 0000000..ffa889b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,3971 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_uku.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <backend/gpu/mali_kbase_devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#endif /* CONFIG_KDS */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task */
+#include <linux/mman.h>
+#include <linux/version.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+#include <platform/mali_kbase_platform_fake.h>
+#endif /*CONFIG_MALI_PLATFORM_FAKE */
+#ifdef CONFIG_SYNC
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC */
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_PM_DEVFREQ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+
+#include <mali_kbase_config.h>
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <mali_kbase_tlstream.h>
+
+#include <mali_kbase_as_fault_debugfs.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+#if MALI_UNIT_TEST
+static struct kbase_exported_test_data shared_kernel_test_data;
+EXPORT_SYMBOL(shared_kernel_test_data);
+#endif /* MALI_UNIT_TEST */
+
+static int kbase_dev_nr;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+extern int mali_pm_statue;
+#endif
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+static inline void __compile_time_asserts(void)
+{
+	CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
+}
+
+#ifdef CONFIG_KDS
+
+struct kbasep_kds_resource_set_file_data {
+	struct kds_resource_set *lock;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file);
+
+static const struct file_operations kds_resource_fops = {
+	.release = kds_resource_release
+};
+
+struct kbase_kds_resource_list_data {
+	struct kds_resource **kds_resources;
+	unsigned long *kds_access_bitmap;
+	int num_elems;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file)
+{
+	struct kbasep_kds_resource_set_file_data *data;
+
+	data = (struct kbasep_kds_resource_set_file_data *)file->private_data;
+	if (NULL != data) {
+		if (NULL != data->lock)
+			kds_resource_set_release(&data->lock);
+
+		kfree(data);
+	}
+	return 0;
+}
+#endif /* CONFIG_KDS */
+
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	unsigned int                    as_nr;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		kbase_tlstream_tl_summary_new_lpu(lpu, lpu_id, *lpu);
+	}
+
+	/* Create Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		kbase_tlstream_tl_summary_new_as(&kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	kbase_tlstream_tl_summary_new_gpu(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, kbdev);
+	}
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		kbase_tlstream_tl_summary_lifelink_as_gpu(
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		kbase_tlstream_tl_summary_new_ctx(
+				element->kctx,
+				(u32)(element->kctx->id),
+				(u32)(element->kctx->tgid));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream. */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space. */
+	kbase_tlstream_flush_streams();
+}
+
+static void kbase_api_handshake(struct uku_version_check_args *version)
+{
+	switch (version->major) {
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case 6:
+		/* We are backwards compatible with version 6,
+		 * so pretend to be the old version */
+		version->major = 6;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	case 7:
+		/* We are backwards compatible with version 7,
+		 * so pretend to be the old version */
+		version->major = 7;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case 8:
+		/* We are backwards compatible with version 8,
+		 * so pretend to be the old version */
+		version->major = 8;
+		version->minor = 4;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+#ifdef BASE_LEGACY_UK9_SUPPORT
+	case 9:
+		/* We are backwards compatible with version 9,
+		 * so pretend to be the old version */
+		version->major = 9;
+		version->minor = 0;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				(int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	inited_pm_runtime_init = (1u << 2),
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	inited_ipa = (1u << 9),
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+	inited_job_fault = (1u << 10),
+	inited_misc_register = (1u << 11),
+	inited_get_device = (1u << 12),
+	inited_sysfs_group = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20)
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define INACTIVE_WAIT_MS (5000)
+
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive)
+{
+	kbdev->driver_inactive = inactive;
+	wake_up(&kbdev->driver_inactive_wait);
+
+	/* Wait for any running IOCTLs to complete */
+	if (inactive)
+		msleep(INACTIVE_WAIT_MS);
+}
+KBASE_EXPORT_TEST_API(kbase_set_driver_inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size)
+{
+	struct kbase_device *kbdev;
+	union uk_header *ukh = args;
+	u32 id;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(ukh != NULL);
+
+	kbdev = kctx->kbdev;
+	id = ukh->id;
+	ukh->ret = MALI_ERROR_NONE; /* Be optimistic */
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_event(kbdev->driver_inactive_wait,
+			kbdev->driver_inactive == false);
+#endif /* CONFIG_MALI_DEBUG */
+
+	if (UKP_FUNC_ID_CHECK_VERSION == id) {
+		struct uku_version_check_args *version_check;
+
+		if (args_size != sizeof(struct uku_version_check_args)) {
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			return 0;
+		}
+		version_check = (struct uku_version_check_args *)args;
+		kbase_api_handshake(version_check);
+		/* save the proposed version number for later use */
+		kctx->api_version = KBASE_API_VERSION(version_check->major,
+				version_check->minor);
+		ukh->ret = MALI_ERROR_NONE;
+		return 0;
+	}
+
+	/* block calls until version handshake */
+	if (kctx->api_version == 0)
+		return -EINVAL;
+
+	if (!atomic_read(&kctx->setup_complete)) {
+		struct kbase_uk_set_flags *kbase_set_flags;
+
+		/* setup pending, try to signal that we'll do the setup,
+		 * if setup was already in progress, err this call
+		 */
+		if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+			return -EINVAL;
+
+		/* if unexpected call, will stay stuck in setup mode
+		 * (is it the only call we accept?)
+		 */
+		if (id != KBASE_FUNC_SET_FLAGS)
+			return -EINVAL;
+
+		kbase_set_flags = (struct kbase_uk_set_flags *)args;
+
+		/* if not matching the expected call, stay in setup mode */
+		if (sizeof(*kbase_set_flags) != args_size)
+			goto bad_size;
+
+		/* if bad flags, will stay stuck in setup mode */
+		if (kbase_context_set_create_flags(kctx,
+				kbase_set_flags->create_flags) != 0)
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+		atomic_set(&kctx->setup_complete, 1);
+		return 0;
+	}
+
+	/* setup complete, perform normal operation */
+	switch (id) {
+	case KBASE_FUNC_MEM_JIT_INIT:
+		{
+			struct kbase_uk_mem_jit_init *jit_init = args;
+
+			if (sizeof(*jit_init) != args_size)
+				goto bad_size;
+
+			if (kbase_region_tracker_init_jit(kctx,
+					jit_init->va_pages))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_ALLOC:
+		{
+			struct kbase_uk_mem_alloc *mem = args;
+			struct kbase_va_region *reg;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+#if defined(CONFIG_64BIT)
+			if (!kctx->is_compat) {
+				/* force SAME_VA if a 64-bit client */
+				mem->flags |= BASE_MEM_SAME_VA;
+			}
+#endif
+
+			reg = kbase_mem_alloc(kctx, mem->va_pages,
+					mem->commit_pages, mem->extent,
+					&mem->flags, &mem->gpu_va,
+					&mem->va_alignment);
+			if (!reg)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_IMPORT: {
+			struct kbase_uk_mem_import *mem_import = args;
+			void __user *phandle;
+
+			if (sizeof(*mem_import) != args_size)
+				goto bad_size;
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				phandle = compat_ptr(mem_import->phandle.compat_value);
+			else
+#endif
+				phandle = mem_import->phandle.value;
+
+			if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_import(kctx,
+					(enum base_mem_import_type)
+					mem_import->type,
+					phandle,
+					&mem_import->gpu_va,
+					&mem_import->va_pages,
+					&mem_import->flags)) {
+				mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+	}
+	case KBASE_FUNC_MEM_ALIAS: {
+			struct kbase_uk_mem_alias *alias = args;
+			struct base_mem_aliasing_info __user *user_ai;
+			struct base_mem_aliasing_info *ai;
+
+			if (sizeof(*alias) != args_size)
+				goto bad_size;
+
+			if (alias->nents > 2048) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (!alias->nents) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				user_ai = compat_ptr(alias->ai.compat_value);
+			else
+#endif
+				user_ai = alias->ai.value;
+
+			ai = vmalloc(sizeof(*ai) * alias->nents);
+
+			if (!ai) {
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			}
+
+			if (copy_from_user(ai, user_ai,
+					   sizeof(*ai) * alias->nents)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto copy_failed;
+			}
+
+			alias->gpu_va = kbase_mem_alias(kctx, &alias->flags,
+							alias->stride,
+							alias->nents, ai,
+							&alias->va_pages);
+			if (!alias->gpu_va) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto no_alias;
+			}
+no_alias:
+copy_failed:
+			vfree(ai);
+			break;
+		}
+	case KBASE_FUNC_MEM_COMMIT:
+		{
+			struct kbase_uk_mem_commit *commit = args;
+
+			if (sizeof(*commit) != args_size)
+				goto bad_size;
+
+			if (commit->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_commit(kctx, commit->gpu_addr,
+					commit->pages,
+					(base_backing_threshold_status *)
+					&commit->result_subcode) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	case KBASE_FUNC_MEM_QUERY:
+		{
+			struct kbase_uk_mem_query *query = args;
+
+			if (sizeof(*query) != args_size)
+				goto bad_size;
+
+			if (query->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE &&
+			    query->query != KBASE_MEM_QUERY_VA_SIZE &&
+				query->query != KBASE_MEM_QUERY_FLAGS) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query);
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_query(kctx, query->gpu_addr,
+					query->query, &query->value) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			break;
+		}
+		break;
+
+	case KBASE_FUNC_MEM_FLAGS_CHANGE:
+		{
+			struct kbase_uk_mem_flags_change *fc = args;
+
+			if (sizeof(*fc) != args_size)
+				goto bad_size;
+
+			if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_flags_change(kctx, fc->gpu_va,
+					fc->flags, fc->mask) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+	case KBASE_FUNC_MEM_FREE:
+		{
+			struct kbase_uk_mem_free *mem = args;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+			if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_free(kctx, mem->gpu_addr) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_JOB_SUBMIT:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+			if (kbase_jd_submit(kctx, job, 0) != 0)
+#else
+			if (kbase_jd_submit(kctx, job) != 0)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case KBASE_FUNC_JOB_SUBMIT_UK6:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+			if (kbase_jd_submit(kctx, job, 1) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif
+
+	case KBASE_FUNC_SYNC:
+		{
+			struct kbase_uk_sync_now *sn = args;
+
+			if (sizeof(*sn) != args_size)
+				goto bad_size;
+
+			if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifndef CONFIG_MALI_COH_USER
+			if (kbase_sync_now(kctx, &sn->sset) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif
+			break;
+		}
+
+	case KBASE_FUNC_DISJOINT_QUERY:
+		{
+			struct kbase_uk_disjoint_query *dquery = args;
+
+			if (sizeof(*dquery) != args_size)
+				goto bad_size;
+
+			/* Get the disjointness counter value. */
+			dquery->counter = kbase_disjoint_event_get(kctx->kbdev);
+			break;
+		}
+
+	case KBASE_FUNC_POST_TERM:
+		{
+			kbase_event_close(kctx);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_SETUP:
+		{
+			struct kbase_uk_hwcnt_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx,
+					&kctx->vinstr_cli, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_DUMP:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_dump(kctx->vinstr_cli,
+					BASE_HWCNT_READER_EVENT_MANUAL) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_CLEAR:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_clear(kctx->vinstr_cli) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_READER_SETUP:
+		{
+			struct kbase_uk_hwcnt_reader_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx,
+					setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_GPU_PROPS_REG_DUMP:
+		{
+			struct kbase_uk_gpuprops *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (kbase_gpuprops_uk_get_props(kctx, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_FIND_CPU_OFFSET:
+		{
+			struct kbase_uk_find_cpu_offset *find = args;
+
+			if (sizeof(*find) != args_size)
+				goto bad_size;
+
+			if (find->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid");
+				goto out_bad;
+			}
+
+			if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else {
+				int err;
+
+				err = kbasep_find_enclosing_cpu_mapping_offset(
+						kctx,
+						find->gpu_addr,
+						(uintptr_t) find->cpu_addr,
+						(size_t) find->size,
+						&find->offset);
+
+				if (err)
+					ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+		}
+	case KBASE_FUNC_GET_VERSION:
+		{
+			struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args;
+
+			if (sizeof(*get_version) != args_size)
+				goto bad_size;
+
+			/* version buffer size check is made in compile time assert */
+			memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+			get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+			break;
+		}
+
+	case KBASE_FUNC_STREAM_CREATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args;
+
+			if (sizeof(*screate) != args_size)
+				goto bad_size;
+
+			if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) {
+				/* not NULL terminated */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_stream_create(screate->name, &screate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#else /* CONFIG_SYNC */
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+	case KBASE_FUNC_FENCE_VALIDATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args;
+
+			if (sizeof(*fence_validate) != args_size)
+				goto bad_size;
+
+			if (kbase_fence_validate(fence_validate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+
+	case KBASE_FUNC_SET_TEST_DATA:
+		{
+#if MALI_UNIT_TEST
+			struct kbase_uk_set_test_data *set_data = args;
+
+			shared_kernel_test_data = set_data->test_data;
+			shared_kernel_test_data.kctx.value = (void __user *)kctx;
+			shared_kernel_test_data.mm.value = (void __user *)current->mm;
+			ukh->ret = MALI_ERROR_NONE;
+#endif /* MALI_UNIT_TEST */
+			break;
+		}
+
+	case KBASE_FUNC_INJECT_ERROR:
+		{
+#ifdef CONFIG_MALI_ERROR_INJECT
+			unsigned long flags;
+			struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (job_atom_inject_error(&params) != 0)
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_ERROR_INJECT */
+			break;
+		}
+
+	case KBASE_FUNC_MODEL_CONTROL:
+		{
+#ifdef CONFIG_MALI_NO_MALI
+			unsigned long flags;
+			struct kbase_model_control_params params =
+					((struct kbase_uk_model_control_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (gpu_model_control(kbdev->model, &params) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_NO_MALI */
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case KBASE_FUNC_KEEP_GPU_POWERED:
+		{
+			dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_KEEP_GPU_POWERED: function is deprecated and disabled\n");
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	case KBASE_FUNC_GET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i);
+
+			break;
+		}
+
+	/* used only for testing purposes; these controls are to be set by gator through gator API */
+	case KBASE_FUNC_SET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				_mali_profiling_control(i, controls->profiling_controls[i]);
+
+			break;
+		}
+
+	case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD:
+		{
+			struct kbase_uk_debugfs_mem_profile_add *add_data =
+					(struct kbase_uk_debugfs_mem_profile_add *)args;
+			char *buf;
+			char __user *user_buf;
+
+			if (sizeof(*add_data) != args_size)
+				goto bad_size;
+
+			if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+				dev_err(kbdev->dev, "buffer too big\n");
+				goto out_bad;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				user_buf = compat_ptr(add_data->buf.compat_value);
+			else
+#endif
+				user_buf = add_data->buf.value;
+
+			buf = kmalloc(add_data->len, GFP_KERNEL);
+			if (!buf)
+				goto out_bad;
+
+			if (0 != copy_from_user(buf, user_buf, add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			if (kbasep_mem_profile_debugfs_insert(kctx, buf,
+							add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			break;
+		}
+
+#ifdef CONFIG_MALI_NO_MALI
+	case KBASE_FUNC_SET_PRFCNT_VALUES:
+		{
+
+			struct kbase_uk_prfcnt_values *params =
+			  ((struct kbase_uk_prfcnt_values *)args);
+			gpu_model_set_dummy_prfcnt_sample(params->data,
+					params->size);
+
+			break;
+		}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	case KBASE_FUNC_TLSTREAM_ACQUIRE:
+		{
+			struct kbase_uk_tlstream_acquire *tlstream_acquire =
+				args;
+
+			if (sizeof(*tlstream_acquire) != args_size)
+				goto bad_size;
+
+			if (0 != kbase_tlstream_acquire(
+						kctx,
+						&tlstream_acquire->fd)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else if (0 <= tlstream_acquire->fd) {
+				/* Summary stream was cleared during acquire.
+				 * Create static timeline objects that will be
+				 * read by client. */
+				kbase_create_timeline_objects(kctx);
+			}
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_FLUSH:
+		{
+			struct kbase_uk_tlstream_flush *tlstream_flush =
+				args;
+
+			if (sizeof(*tlstream_flush) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_flush_streams();
+			break;
+		}
+#if MALI_UNIT_TEST
+	case KBASE_FUNC_TLSTREAM_TEST:
+		{
+			struct kbase_uk_tlstream_test *tlstream_test = args;
+
+			if (sizeof(*tlstream_test) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_test(
+					tlstream_test->tpw_count,
+					tlstream_test->msg_delay,
+					tlstream_test->msg_count,
+					tlstream_test->aux_msg);
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_STATS:
+		{
+			struct kbase_uk_tlstream_stats *tlstream_stats = args;
+
+			if (sizeof(*tlstream_stats) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_stats(
+					&tlstream_stats->bytes_collected,
+					&tlstream_stats->bytes_generated);
+			break;
+		}
+#endif /* MALI_UNIT_TEST */
+
+	case KBASE_FUNC_GET_CONTEXT_ID:
+		{
+			struct kbase_uk_context_id *info = args;
+
+			info->id = kctx->id;
+			break;
+		}
+
+	case KBASE_FUNC_SOFT_EVENT_UPDATE:
+		{
+			struct kbase_uk_soft_event_update *update = args;
+
+			if (sizeof(*update) != args_size)
+				goto bad_size;
+
+			if (((update->new_status != BASE_JD_SOFT_EVENT_SET) &&
+			    (update->new_status != BASE_JD_SOFT_EVENT_RESET)) ||
+			    (update->flags != 0))
+				goto out_bad;
+
+			if (kbase_soft_event_update(kctx, update->evt,
+						update->new_status))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	default:
+		dev_err(kbdev->dev, "unknown ioctl %u\n", id);
+		goto out_bad;
+	}
+
+	return ret;
+
+ bad_size:
+	dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id);
+ out_bad:
+	return -EINVAL;
+}
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strcmp(irq_res->name, "JOB")) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "MMU")) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "GPU")) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kctx = kbase_create_context(kbdev, is_compat_task());
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+	kctx->filp = filp;
+
+	kctx->infinite_cache_active = kbdev->infinite_cache_active_default;
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_COH_USER
+	 /* if cache is completely coherent at hardware level, then remove the
+	  * infinite cache control support from debugfs.
+	  */
+#else
+	debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry,
+			&kctx->infinite_cache_active);
+#endif /* CONFIG_MALI_COH_USER */
+
+	mutex_init(&kctx->mem_profile_lock);
+
+	kbasep_jd_debugfs_ctx_add(kctx);
+	kbase_debug_mem_view_init(filp);
+
+	kbase_debug_job_fault_context_init(kctx);
+
+	kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool);
+
+	kbase_jit_debugfs_add(kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			kbase_tlstream_tl_new_ctx(
+					element->kctx,
+					(u32)(element->kctx->id),
+					(u32)(element->kctx->tgid));
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+	kbase_tlstream_tl_del_ctx(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(kctx->kctx_dentry);
+	kbasep_mem_profile_debugfs_remove(kctx);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	if (kctx->vinstr_cli) {
+		struct kbase_uk_hwcnt_setup setup;
+
+		setup.dump_buffer = 0llu;
+		kbase_vinstr_legacy_hwc_setup(
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup);
+	}
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+#define CALL_MAX_SIZE 536
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull };	/* alignment fixup */
+	u32 size = _IOC_SIZE(cmd);
+	struct kbase_context *kctx = filp->private_data;
+
+	if (size > CALL_MAX_SIZE)
+		return -ENOTTY;
+
+	if (0 != copy_from_user(&msg, (void __user *)arg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n");
+		return -EFAULT;
+	}
+
+	if (kbase_dispatch(kctx, &msg, size) != 0)
+		return -EFAULT;
+
+	if (0 != copy_to_user((void __user *)arg, &msg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+#ifdef CONFIG_64BIT
+/* The following function is taken from the kernel and just
+ * renamed. As it's not exported to modules we must copy-paste it here.
+ */
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit)
+		goto found_highest;
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length)
+			goto found;
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+found:
+	/* We found a suitable gap. Clip it with the original high_limit. */
+	if (gap_end > info->high_limit)
+		gap_end = info->high_limit;
+
+found_highest:
+	/* Compute highest gap address at the desired alignment */
+	gap_end -= info->length;
+	gap_end -= (gap_end - info->align_offset) & info->align_mask;
+
+	VM_BUG_ON(gap_end < info->low_limit);
+	VM_BUG_ON(gap_end < gap_start);
+	return gap_end;
+}
+
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	/* based on get_unmapped_area, but simplified slightly due to that some
+	 * values are known in advance */
+	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (kctx->is_compat)
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+				flags);
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
+		info.high_limit = kctx->same_va_end << PAGE_SHIFT;
+		info.align_mask = 0;
+		info.align_offset = 0;
+	} else {
+		info.high_limit = min_t(unsigned long, mm->mmap_base,
+					(kctx->same_va_end << PAGE_SHIFT));
+		if (len >= SZ_2M) {
+			info.align_offset = SZ_2M;
+			info.align_mask = SZ_2M - 1;
+		} else {
+			info.align_mask = 0;
+			info.align_offset = 0;
+		}
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = SZ_2M;
+	return kbase_unmapped_area_topdown(&info);
+}
+#endif
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+#ifdef CONFIG_64BIT
+	.get_unmapped_area = kbase_get_unmapped_area,
+#endif
+};
+
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value)
+{
+	writel(value, kbdev->reg + offset);
+}
+
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset)
+{
+	return readl(kbdev->reg + offset);
+}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+/** Show callback for the @c power_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c power_policy sysfs file.
+ *
+ * This function is called when the @c power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls @ref kbase_pm_set_policy to change the
+ * policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/** Show callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called when the @c core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls @ref kbase_pm_set_policy to change
+ * the policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/** Show callback for the @c core_mask sysfs file.
+ *
+ * This function is called to get the contents of the @c core_mask sysfs
+ * file.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/** Store callback for the @c core_mask sysfs file.
+ *
+ * This function is called when the @c core_mask sysfs file is written to.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	if (items == 1 || items == 3) {
+		u64 shader_present =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		u64 group0_core_mask =
+				kbdev->gpu_props.props.coherency_info.group[0].
+				core_mask;
+
+		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+				!(new_core_mask[0] & group0_core_mask) ||
+			(new_core_mask[1] & shader_present) !=
+						new_core_mask[1] ||
+				!(new_core_mask[1] & group0_core_mask) ||
+			(new_core_mask[2] & shader_present) !=
+						new_core_mask[2] ||
+				!(new_core_mask[2] & group0_core_mask)) {
+			dev_err(dev, "power_policy: invalid core specification\n");
+			return -EINVAL;
+		}
+
+		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+				kbdev->pm.debug_core_mask[1] !=
+						new_core_mask[1] ||
+				kbdev->pm.debug_core_mask[2] !=
+						new_core_mask[2]) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+					new_core_mask[1], new_core_mask[2]);
+
+			spin_unlock_irqrestore(&kbdev->pm.power_change_lock,
+					flags);
+		}
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+		"Use format <core_mask>\n"
+		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_job_timeout() - Store callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software jobs. Waiting soft event wait
+ * jobs will be cancelled after this period expires, while soft fence wait jobs
+ * will print debug information if the fence debug feature is enabled.
+ *
+ * This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_job_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_job_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) ||
+	    (soft_job_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+		   soft_job_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_job_timeout() - Show callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_job_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_job_timeout, set_soft_job_timeout);
+
+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
+				int default_ticks, u32 old_ticks)
+{
+	if (timeout_ms > 0) {
+		u64 ticks = timeout_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_period_ns);
+		if (!ticks)
+			return 1;
+		return ticks;
+	} else if (timeout_ms < 0) {
+		return default_ticks;
+	} else {
+		return old_ticks;
+	}
+}
+
+/** Store callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as JS_SOFT_STOP_TICKS, JS_HARD_STOP_TICKS_SS,
+ * JS_HARD_STOP_TICKS_DUMPING, JS_RESET_TICKS_SS, JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		struct kbasep_js_device_data *js_data = &kbdev->js_data;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+
+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
+	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
+			default, js_data->ticks_name); \
+	dev_dbg(kbdev->dev, "Overriding " #ticks_name \
+			" with %lu ticks (%lu ms)\n", \
+			(unsigned long)js_data->ticks_name, \
+			ms_name); \
+	} while (0)
+
+		UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms,
+				DEFAULT_JS_SOFT_STOP_TICKS);
+		UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl,
+				DEFAULT_JS_SOFT_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_HARD_STOP_TICKS_SS_8408 :
+				DEFAULT_JS_HARD_STOP_TICKS_SS);
+		UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl,
+				DEFAULT_JS_HARD_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping,
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING);
+		UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_RESET_TICKS_SS_8408 :
+				DEFAULT_JS_RESET_TICKS_SS);
+		UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl,
+				DEFAULT_JS_RESET_TICKS_CL);
+		UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping,
+				DEFAULT_JS_RESET_TICKS_DUMPING);
+
+		kbase_js_set_timeouts(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+static unsigned long get_js_timeout_in_ms(
+		u32 scheduling_period_ns,
+		u32 ticks)
+{
+	u64 ms = (u64)ticks * scheduling_period_ns;
+
+	do_div(ms, 1000000UL);
+	return ms;
+}
+
+/** Show callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\
+		scheduling_period_ns, \
+		kbdev->js_data.name)
+
+	js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks);
+	js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl);
+	js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss);
+	js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl);
+	js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping);
+	js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss);
+	js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl);
+	js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef GET_TIMEOUT
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+static u32 get_new_js_timeout(
+		u32 old_period,
+		u32 old_ticks,
+		u32 new_scheduling_period_ns)
+{
+	u64 ticks = (u64)old_period * (u64)old_ticks;
+	do_div(ticks, new_scheduling_period_ns);
+	return ticks?ticks:1;
+}
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	struct kbasep_js_device_data *js_data;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	js_data = &kbdev->js_data;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&js_data->runpool_mutex);
+	spin_lock_irqsave(&js_data->runpool_irq.lock, flags);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	old_period = js_data->scheduling_period_ns;
+
+#define SET_TIMEOUT(name) \
+		(js_data->name = get_new_js_timeout(\
+				old_period, \
+				kbdev->js_data.name, \
+				new_scheduling_period_ns))
+
+	SET_TIMEOUT(soft_stop_ticks);
+	SET_TIMEOUT(soft_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_ss);
+	SET_TIMEOUT(hard_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_dumping);
+	SET_TIMEOUT(gpu_reset_ticks_ss);
+	SET_TIMEOUT(gpu_reset_ticks_cl);
+	SET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef SET_TIMEOUT
+
+	js_data->scheduling_period_ns = new_scheduling_period_ns;
+
+	kbase_js_set_timeouts(kbdev);
+
+	spin_unlock_irqrestore(&js_data->runpool_irq.lock, flags);
+	mutex_unlock(&js_data->runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/** Store callback for the @c force_replay sysfs file.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/** Show callback for the @c force_replay sysfs file.
+ *
+ * This function is called to get the contents of the @c force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c force_replay.
+ *
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present. The ability to
+ * enable soft-stop when only a single context is present can be used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/** Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/** Show callback for the @c debug_command sysfs file.
+ *
+ * This function is called to get the contents of the @c debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c debug_command sysfs file.
+ *
+ * This function is called when the @c debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @ref debug_commands to issue the command.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example:
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s MP%d r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_max_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+
+static int kbasep_protected_mode_enter(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
+
+static bool kbasep_protected_mode_supported(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static struct kbase_protected_ops kbasep_protected_ops = {
+	.protected_mode_enter = kbasep_protected_mode_enter,
+	.protected_mode_reset = NULL,
+	.protected_mode_supported = kbasep_protected_mode_supported,
+};
+
+static void kbasep_protected_mode_init(struct kbase_device *kbdev)
+{
+	kbdev->protected_ops = NULL;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native protected ops */
+		kbdev->protected_ops = &kbasep_protected_ops;
+	}
+#ifdef PROTECTED_CALLBACKS
+	else
+		kbdev->protected_ops = PROTECTED_CALLBACKS;
+#endif
+
+	if (kbdev->protected_ops)
+		kbdev->protected_mode_support =
+				kbdev->protected_ops->protected_mode_supported(kbdev);
+	else
+		kbdev->protected_mode_support = false;
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = -ENOMEM;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return 0;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_add(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+	kbase_as_fault_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_COH_USER
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+#endif /* CONFIG_MALI_COH_USER */
+
+	debugfs_create_size_t("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_max_size_default);
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_job_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_availability_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	if (kbdev->inited_subsys & inited_ipa) {
+		kbase_ipa_term(kbdev->ipa_ctx);
+		kbdev->inited_subsys &= ~inited_ipa;
+	}
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_pm_runtime_init) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+		kbdev->inited_subsys &= ~inited_pm_runtime_init;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+#ifdef CONFIG_OF
+	err = kbase_platform_early_init();
+	if (err) {
+		dev_err(&pdev->dev, "Early platform initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+#endif
+
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+	core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+
+	kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	if (kbdev->pm.callback_power_runtime_init) {
+		err = kbdev->pm.callback_power_runtime_init(kbdev);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Runtime PM initialization failed\n");
+			kbase_platform_device_remove(pdev);
+			return err;
+		}
+		kbdev->inited_subsys |= inited_pm_runtime_init;
+	}
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, gpu_id);
+
+	kbasep_protected_mode_init(kbdev);
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	err = kbase_devfreq_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Fevfreq initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_devfreq;
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	kbdev->ipa_ctx = kbase_ipa_init(kbdev);
+	if (!kbdev->ipa_ctx) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+
+	kbdev->inited_subsys |= inited_ipa;
+#endif  /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	/* initialize the kctx list */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->inited_subsys |= inited_get_device;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+
+	return err;
+}
+
+/** Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/** Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @param dev  The device to resume
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+	return 0;
+}
+
+/** Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in which it will
+ * not be able to communicate with the CPU(s) and RAM due to power management.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/** Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+#ifndef CONFIG_MALI_DEVFREQ
+static int mali_os_freeze(struct device *device)
+{
+	mali_dev_freeze();
+	return kbase_device_suspend(device);
+}
+
+static int mali_os_restore(struct device *device)
+{
+	mali_dev_restore();
+	return kbase_device_resume(device);
+}
+#endif
+
+
+/** The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifndef CONFIG_MALI_DEVFREQ
+	.freeze = mali_os_freeze,
+	.thaw = kbase_device_resume,
+	.restore = mali_os_restore,
+#endif
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_early_init();
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	ret = kbase_platform_fake_register();
+	if (ret)
+		return ret;
+#endif
+	ret = platform_driver_register(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	if (ret)
+		kbase_platform_fake_unregister();
+#endif
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	kbase_platform_fake_unregister();
+#endif
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100755
index 0000000..fb57ac2
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100755
index 0000000..5fff289
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100755
index 0000000..83c5c37
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,502 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				 kbase_is_job_fault_event_pending(kbdev)))
+			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		if (event->katom->need_cache_flush_cores_retained) {
+			kbase_gpu_cacheclean(kbdev, event->katom);
+			event->katom->need_cache_flush_cores_retained = 0;
+		}
+
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100755
index 0000000..a2bf898
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,96 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100755
index 0000000..a98355e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list))
+		return NULL;
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct rb_node *p;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (p = rb_first(&kctx->reg_rbtree); p; p = rb_next(p)) {
+		struct kbase_va_region *reg;
+		struct debug_mem_mapping *mapping;
+
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			kbase_gpu_vm_unlock(kctx);
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100755
index 0000000..20ab51a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100755
index 0000000..d8f6f75
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,1449 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_mmu_mode.h>
+#include <mali_kbase_instr_defs.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#include <linux/bus_logger.h>
+#endif
+
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#endif				/* CONFIG_SYNC */
+
+#include "mali_kbase_dma_fence.h"
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL    0
+#else
+#define MIDGARD_MMU_TOPLEVEL    1
+#endif
+
+#define MIDGARD_MMU_BOTTOMLEVEL 3
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+#ifdef CONFIG_DEBUG_FS
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom *)(dep->atom);
+}
+
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency.
+ * @param[in] a      The ATOM to be set as a dependency.
+ * @param     type   The ATOM dependency type to be set.
+ *
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency to be cleared.
+ *
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+enum kbase_atom_gpu_rb_state {
+	/* Atom is not currently present in slot ringbuffer */
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	/* Atom is in slot ringbuffer but is blocked on a previous atom */
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	/* Atom is in slot ringbuffer but is waiting for proected mode exit */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT,
+	/* Atom is in slot ringbuffer but is waiting for cores to become
+	 * available */
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	/* Atom is in slot ringbuffer but is blocked on affinity */
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	/* Atom is in slot ringbuffer but is waiting for protected mode entry */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY,
+	/* Atom is in slot ringbuffer and ready to run */
+	KBASE_ATOM_GPU_RB_READY,
+	/* Atom is in slot ringbuffer and has been submitted to the GPU */
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	/* Atom must be returned to JS as soon as it reaches the head of the
+	 * ringbuffer due to a previous failure */
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS
+};
+
+enum kbase_atom_exit_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition out of protected mode is required.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK,
+	/* Wait for the L2 to become idle in preparation for the reset. */
+	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+	/* Issue the protected reset. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET,
+	/*
+	 * End state;
+	 * Wait for the reset to complete.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+	u64 time_spent_us; /**< Total time spent on the GPU in microseconds */
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+	/* List head used during job dispatch job_done processing - as
+	 * dependencies may not be entirely resolved at this point, we need to
+	 * use a separate list head. */
+	struct list_head jd_item;
+	/* true if atom's jd_item is currently on a list. Prevents atom being
+	 * processed twice. */
+	bool in_jd_list;
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	enum kbase_atom_coreref_state coreref_state;
+#ifdef CONFIG_KDS
+	struct list_head node;
+	struct kds_resource_set *kds_rset;
+	bool kds_dep_satisfied;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_SYNC
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		/* This points to the dma-buf fence for this atom. If this is
+		 * NULL then there is no fence for this atom and the other
+		 * fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+		struct fence *fence;
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;	    /**< core requirements */
+	/** Job Slot to retry submitting to if submission from IRQ handler failed
+	 *
+	 * NOTE: see if this can be unified into the another member e.g. the event */
+	int retry_submit_on_slot;
+
+	union kbasep_js_policy_job_info sched_info;
+	/* JS atom priority with respect to other atoms on its kctx. */
+	int sched_priority;
+
+	int poking;		/* BASE_HW_ISSUE_8316 */
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+	int slot_nr;
+
+	u32 atom_flags;
+
+	/* Number of times this atom has been retried. Used by replay soft job.
+	 */
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	u64 need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	/* Pointer to atom that this atom has same-slot dependency on */
+	struct kbase_jd_atom *pre_dep;
+	/* Pointer to atom that has same-slot dependency on this atom */
+	struct kbase_jd_atom *post_dep;
+
+	/* Pointer to atom that this atom has cross-slot dependency on */
+	struct kbase_jd_atom *x_pre_dep;
+	/* Pointer to atom that has cross-slot dependency on this atom */
+	struct kbase_jd_atom *x_post_dep;
+
+	/* The GPU's flush count recorded at the time of submission, used for
+	 * the cache flush optimisation */
+	u32 flush_id;
+
+	struct kbase_jd_atom_backend backend;
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	/* List head used for two different purposes:
+	 *  1. Overflow list for JS ring buffers. If an atom is ready to run,
+	 *     but there is no room in the JS ring buffer, then the atom is put
+	 *     on the ring buffer's overflow list using this list node.
+	 *  2. List of waiting soft jobs.
+	 */
+	struct list_head queue;
+
+	struct kbase_va_region *jit_addr_reg;
+
+	/* If non-zero, this indicates that the atom will fail with the set
+	 * event_code when the atom is processed. */
+	enum base_jd_event_code will_fail_event_code;
+
+	enum kbase_atom_exit_protected_state exit_protected_state;
+
+	struct rb_node runnable_tree_node;
+
+	/* 'Age' of atom relative to other atoms in the context. */
+	u32 age;
+};
+
+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	/** Tracks all job-dispatch jobs.  This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	u32 job_nr;
+
+	/** Waitq that reflects whether there are no jobs (including SW-only
+	 * dependency jobs). This is set when no jobs are present on the ctx,
+	 * and clear when there are jobs.
+	 *
+	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+	 * and SW-only dependency jobs.
+	 *
+	 * This waitq can be waited upon to find out when the context jobs are all
+	 * done/cancelled (including those that might've been blocked on
+	 * dependencies) - and so, whether it can be terminated. However, it should
+	 * only be terminated once it is neither present in the policy-queue (see
+	 * kbasep_js_policy_try_evict_ctx() ) nor the run-pool (see
+	 * kbasep_js_kctx_info::ctx::is_scheduled).
+	 *
+	 * Since the waitq is only set under kbase_jd_context::lock,
+	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+	 * guarentee that the setter has completed its work on the kbase_context
+	 *
+	 * This must be updated atomically with:
+	 * - kbase_jd_context::job_nr */
+	wait_queue_head_t zero_jobs_wait;
+
+	/** Job Done workqueue. */
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_KDS
+	struct kds_callback kds_cb;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+	int number;
+
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	u32 fault_status;
+	u64 fault_addr;
+	u64 fault_extra_addr;
+	struct mutex transaction_mutex;
+
+	struct kbase_mmu_setup current_setup;
+
+	/* BASE_HW_ISSUE_8316  */
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
+	int poke_refcount;
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold kbasep_js_device_data::runpool_irq::lock when
+	 * accessing this */
+	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - true indicates that the transition is ongoing
+	 * Expected to be protected by pm.power_change_lock */
+	bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Lock protecting the power state of the device.
+	 *
+	 * This lock must be held when accessing the shader_available_bitmap,
+	 * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and
+	 * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition
+	 * and shader_poweroff_pending fields of kbase_pm_device_data. It is
+	 * also held when the hardware power registers are being written to, to
+	 * ensure that two threads do not conflict over the power transitions
+	 * that the hardware should make.
+	 */
+	spinlock_t power_change_lock;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_protected_ops - Platform specific functions for GPU protected
+ * mode operations
+ * @protected_mode_enter: Callback to enter protected mode on the GPU
+ * @protected_mode_reset: Callback to reset the GPU and exit protected mode.
+ * @protected_mode_supported: Callback to check if protected mode is supported.
+ */
+struct kbase_protected_ops {
+	/**
+	 * protected_mode_enter() - Enter protected mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enter)(struct kbase_device *kbdev);
+
+	/**
+	 * protected_mode_reset() - Reset the GPU and exit protected mode
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_reset)(struct kbase_device *kbdev);
+
+	/**
+	 * protected_mode_supported() - Check if protected mode is supported
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	bool (*protected_mode_supported)(struct kbase_device *kbdev);
+};
+
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:     Kbase device where memory is used
+ * @cur_size:  Number of free pages currently in the pool (may exceed @max_size
+ *             in some corner cases)
+ * @max_size:  Maximum number of free pages in the pool
+ * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
+ *             and @page_list
+ * @page_list: List of free pages in the pool
+ * @reclaim:   Shrinker for kernel reclaim of free pages
+ * @next_pool: Pointer to next pool where pages can be allocated when this pool
+ *             is empty. Pages will spill over to the next pool when this pool
+ *             is full. Can be NULL if there is no next pool.
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+};
+
+
+#define DEVNAME_SIZE	16
+
+struct kbase_device {
+	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+
+	struct clk *clock;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif	/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool mem_pool;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	/** List of SW workarounds for HW issues */
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	/** List of features available */
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	/* Bitmaps of cores that are currently in use (running jobs).
+	 * These should be kept up to date by the job scheduler.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 *
+	 * kbase_pm_check_transitions_nolock() should be called when bits are
+	 * cleared to update the power management system and allow transitions to
+	 * occur. */
+	u64 shader_inuse_bitmap;
+
+	/* Refcount for cores in use */
+	u32 shader_inuse_cnt[64];
+
+	/* Bitmaps of cores the JS needs for jobs ready to run */
+	u64 shader_needed_bitmap;
+
+	/* Refcount for cores needed */
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	/* struct for keeping track of the disjoint information
+	 *
+	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
+	 * The count is the number of disjoint events that have occurred on the GPU
+	 */
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+	u32 l2_users_count;
+
+	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+	 * submitting new jobs to any cores that are not marked as available.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 */
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
+	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+
+	/* Structure used for instrumentation and HW counters dumping */
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	/*value to be written to the irq_throttle register each time an irq is served */
+	atomic_t irq_throttle_cycles;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+	void *platform_context;
+
+	/* List of kbase_contexts created */
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	struct devfreq_cooling_device *devfreq_cooling;
+#else
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+#endif
+#endif
+
+	struct kbase_ipa_context *ipa_ctx;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+	/*
+	 * Control for enabling job dump on failure, set when control debugfs
+	 * is opened.
+	 */
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	/* directory for debugfs entries */
+	struct dentry *mali_debugfs_directory;
+	/* Root directory for per context entry */
+	struct dentry *debugfs_ctx_directory;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* bit for each as, set if there is new data to report */
+	u64 debugfs_as_read_bitmap;
+#endif /* CONFIG_MALI_DEBUG */
+
+	/* failed job dump, used for separate debug process */
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
+	struct kbase_context *kctx_fault;
+
+#if !MALI_CUSTOMER_RELEASE
+	/* Per-device data for register dumping interface */
+	struct {
+		u16 reg_offset; /* Offset of a GPU_CONTROL register to be
+				   dumped upon request */
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	/* fbdump profiling controls set by gator */
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	/* Number of jobs that are run before a job is forced to fail and
+	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+	 * failures. */
+	int force_replay_limit;
+	/* Count of jobs between forced failures. Incremented on each job. A
+	 * job is forced to fail once this is greater than or equal to
+	 * force_replay_limit. */
+	int force_replay_count;
+	/* Core requirement for jobs to be failed and replayed. May be zero. */
+	base_jd_core_req force_replay_core_req;
+	/* true if force_replay_limit should be randomized. The random
+	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+	 */
+	bool force_replay_random;
+#endif
+
+	/* Total number of created contexts */
+	atomic_t ctx_num;
+
+	struct kbase_hwaccess_data hwaccess;
+
+	/* Count of page/bus faults waiting for workqueues to process */
+	atomic_t faults_pending;
+
+	/* true if GPU is powered off or power off operation is in progress */
+	bool poweroff_pending;
+
+
+	/* defaults for new context created for this device */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	size_t mem_pool_max_size_default;
+
+	/* system coherency mode  */
+	u32 system_coherency;
+	/* Flag to track when cci snoops have been enabled on the interface */
+	bool cci_snoop_enabled;
+
+	/* SMC function IDs to call into Trusted firmware to enable/disable
+	 * cache snooping. Value of 0 indicates that they are not used
+	 */
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	/* Protected operations */
+	struct kbase_protected_ops *protected_ops;
+
+	/*
+	 * true when GPU is put into protected mode
+	 */
+	bool protected_mode;
+
+	/*
+	 * true when GPU is transitioning into or out of protected mode
+	 */
+	bool protected_mode_transition;
+
+	/*
+	 * true if protected mode is supported
+	 */
+	bool protected_mode_support;
+
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_queue_head_t driver_inactive_wait;
+	bool driver_inactive;
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	/*
+	 * Bus logger integration.
+	 */
+	struct bus_logger_client *buslogger;
+#endif
+	/* Boolean indicating if an IRQ flush during reset is in progress. */
+	bool irq_reset_flush;
+
+	/* list of inited sub systems. Used during terminate/error recovery */
+	u32 inited_subsys;
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ *                 job slot.
+ * @x_dep_head:    Head item of the linked list of atoms blocked on cross-slot
+ *                 dependencies. Atoms on this list will be moved to the
+ *                 runnable_tree when the blocking atom completes.
+ *
+ * runpool_irq.lock must be held when accessing this structure.
+ */
+struct jsctx_queue {
+	struct rb_root runnable_tree;
+	struct list_head x_dep_head;
+};
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	int id; /* System wide unique id */
+	unsigned long api_version;
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	bool is_compat;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	u64 *mmu_teardown_pages;
+
+	struct page *aliasing_sink_page;
+
+	struct mutex            mmu_lock;
+	struct mutex            reg_lock; /* To be converted to a rwlock? */
+	struct rb_root          reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_pool mem_pool;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+	struct mutex            evict_lock;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_KDS
+	struct list_head waiting_kds_resource;
+#endif
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+	/** This is effectively part of the Run Pool, because it only has a valid
+	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+	 *
+	 * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing
+	 * this.
+	 *
+	 * If the context relating to this as_nr is required, you must use
+	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+	 * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock
+	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
+	 * you can take whilst doing this) */
+	int as_nr;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct *process_mm;
+	/* End of the SAME_VA zone */
+	u64 same_va_end;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	/* Content of mem_profile file */
+	char *mem_profile_data;
+	/* Size of @c mem_profile_data */
+	size_t mem_profile_size;
+	/* Mutex guarding memory profile state */
+	struct mutex mem_profile_lock;
+	/* Memory profile file created */
+	bool mem_profile_initialized;
+	struct dentry *kctx_dentry;
+
+	/* for job fault debug */
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	/* This list will keep the following atoms during the dump
+	 * in the same context
+	 */
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	/* Number of atoms currently pulled from this context */
+	atomic_t atoms_pulled;
+	/* Number of atoms currently pulled from this context, per slot */
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	/* true if last kick() caused atoms to be pulled from this context */
+	bool pulled;
+	/* true if infinite cache is to be enabled for new allocations. Existing
+	 * allocations will not change. bool stored as a u32 per Linux API */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active;
+#else
+	u32 infinite_cache_active;
+#endif
+	/* Bitmask of slots that can be pulled from */
+	u32 slots_pullable;
+
+	/* Backend specific data */
+	struct kbase_context_backend backend;
+
+	/* Work structure used for deferred ASID assignment */
+	struct work_struct work;
+
+	/* Only one userspace vinstr client per kbase context */
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+
+	/* Must hold queue_mutex when accessing */
+	bool ctx_active;
+
+	/* List of completed jobs waiting for events to be posted */
+	struct list_head completed_jobs;
+	/* Number of work items currently pending on job_done_wq */
+	atomic_t work_count;
+
+	/* true if context is counted in kbdev->js_data.nr_contexts_runnable */
+	bool ctx_runnable_ref;
+
+	/* Waiting soft-jobs will fail when this timer expires */
+	struct timer_list soft_job_timeout;
+
+	/* JIT allocation management */
+	struct kbase_va_region *jit_alloc[256];
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_lock;
+	struct work_struct jit_work;
+
+	/* External sticky resource management */
+	struct list_head ext_res_meta_head;
+
+	/* Used to record that a drain was requested from atomic context */
+	atomic_t drain_pending;
+
+	/* Current age count, used to determine age for newly submitted atoms */
+	u32 age_count;
+};
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100755
index 0000000..62ab0ca
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,697 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	mutex_init(&kbdev->as[i].transaction_mutex);
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work));
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+#else
+	kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+#ifdef CONFIG_MALI_DEBUG
+	init_waitqueue_head(&kbdev->driver_inactive_wait);
+#endif /* CONFIG_MALI_DEBUG */
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
+{
+	u32 ret_value = 0;
+
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		ret_value = kbdev->kbase_profiling_controls[control];
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+
+	return ret_value;
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100755
index 0000000..f70bccc
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
new file mode 100644
index 0000000..4d3836a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -0,0 +1,571 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_dma_fence_lock);
+
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static const char *
+kbase_dma_fence_get_driver_name(struct fence *fence)
+{
+	return kbase_drv_name;
+}
+
+static const char *
+kbase_dma_fence_get_timeline_name(struct fence *fence)
+{
+	return kbase_timeline_name;
+}
+
+static bool
+kbase_dma_fence_enable_signaling(struct fence *fence)
+{
+	/* If in the future we need to add code here remember to
+	 * to get a reference to the fence and release it when signaling
+	 * as stated in fence.h
+	 */
+	return true;
+}
+
+static void
+kbase_dma_fence_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static const struct fence_ops kbase_dma_fence_ops = {
+	.get_driver_name = kbase_dma_fence_get_driver_name,
+	.get_timeline_name = kbase_dma_fence_get_timeline_name,
+	.enable_signaling = kbase_dma_fence_enable_signaling,
+	/* Use the default wait */
+	.wait = fence_default_wait,
+	.fence_value_str = kbase_dma_fence_fence_value_str,
+};
+
+static struct fence *
+kbase_dma_fence_new(unsigned int context, unsigned int seqno)
+{
+	struct fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence_init(fence,
+		   &kbase_dma_fence_ops,
+		   &kbase_dma_fence_lock,
+		   context,
+		   seqno);
+
+	return fence;
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom)
+{
+	struct kbase_dma_fence_cb *cb, *tmp;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			atomic_dec(&katom->dma_fence.dep_count);
+		}
+
+		/*
+		 * Release the reference taken in
+		 * kbase_dma_fence_add_callback().
+		 */
+		fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom);
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (atomic_read(&katom->dma_fence.dep_count) != 0)
+		goto out;
+
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_dma_fence_free_callbacks(katom);
+	/* Queue atom on GPU. */
+	kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+/**
+ * kbase_dma_fence_add_callback() - Add callback on @fence to block @katom
+ * @katom: Pointer to katom that will be blocked by @fence
+ * @fence: Pointer to fence on which to set up the callback
+ * @callback: Pointer to function to be called when fence is signaled
+ *
+ * Caller needs to hold a reference to @fence when calling this function, and
+ * the caller is responsible for releasing that reference.  An additional
+ * reference to @fence will be taken when the callback was successfully set up
+ * and @fence needs to be kept valid until the callback has been called and
+ * cleanup have been done.
+ *
+ * Return: 0 on success: fence was either already signalled, or callback was
+ * set up. Negative error code is returned on error.
+ */
+static int
+kbase_dma_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback)
+{
+	int err = 0;
+	struct kbase_dma_fence_cb *kbase_fence_cb;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+
+	err = fence_add_callback(fence, &kbase_fence_cb->fence_cb, callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, clear the error and return */
+		err = 0;
+		kbase_fence_cb->fence = NULL;
+		kfree(kbase_fence_cb);
+	} else if (err) {
+		kfree(kbase_fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in kbase_dma_fence_free_callbacks().
+		 */
+		fence_get(fence);
+		atomic_inc(&katom->dma_fence.dep_count);
+		/* Add callback to katom's list of callbacks */
+		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
+	}
+
+	return err;
+}
+
+static void
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+{
+	struct kbase_dma_fence_cb *kcb = container_of(cb,
+				struct kbase_dma_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+	if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
+		bool ret;
+
+		INIT_WORK(&katom->work, kbase_dma_fence_work);
+		ret = queue_work(kctx->dma_fence.wq, &katom->work);
+		/* Warn if work was already queued, that should not happen. */
+		WARN_ON(!ret);
+	}
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct reservation_object *resv,
+					 bool exclusive)
+{
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_dma_fence_add_callback(katom,
+						   excl_fence,
+						   kbase_dma_fence_cb);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_dma_fence_add_callback(katom,
+							   shared_fences[i],
+							   kbase_dma_fence_cb);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	if (err) {
+		/*
+		 * On error, cancel and clean up all callbacks that was set up
+		 * before the error.
+		 */
+		kbase_dma_fence_free_callbacks(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+	struct fence *fence;
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	fence = kbase_dma_fence_new(katom->dma_fence.context,
+				    atomic_inc_return(&katom->dma_fence.seqno));
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	katom->dma_fence.fence = fence;
+	atomic_set(&katom->dma_fence.dep_count, 1);
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		atomic_set(&katom->dma_fence.dep_count, -1);
+		fence_put(fence);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct reservation_object *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = reservation_object_reserve_shared(obj);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_shared_fence(obj, katom->dma_fence.fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_excl_fence(obj, katom->dma_fence.fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
+			atomic_set(&katom->dma_fence.dep_count, -1);
+			kbase_dma_fence_free_callbacks(katom);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	} else {
+		/* There was an error, cancel callbacks, set dep_count to -1 to
+		 * indicate that the atom has been handled (the caller will
+		 * kill it for us), signal the fence, free callbacks and the
+		 * fence.
+		 */
+		kbase_dma_fence_free_callbacks(katom);
+		atomic_set(&katom->dma_fence.dep_count, -1);
+		kbase_dma_fence_signal(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom, *katom_tmp;
+
+	list_for_each_entry_safe(katom, katom_tmp,
+				 &kctx->dma_fence.waiting_resource, queue) {
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == -1);
+
+	/* Signal the atom's fence. */
+	fence_signal(katom->dma_fence.fence);
+	fence_put(katom->dma_fence.fence);
+	katom->dma_fence.fence = NULL;
+
+	kbase_dma_fence_free_callbacks(katom);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
new file mode 100644
index 0000000..3b0a69b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
@@ -0,0 +1,150 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/reservation.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_cb - Mali dma-fence callback data struct
+ * @fence_cb: Callback function
+ * @katom:    Pointer to katom that is waiting on this callback
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @node:     List head for linking this callback to the katom
+ */
+struct kbase_dma_fence_cb {
+	struct fence_cb fence_cb;
+	struct kbase_jd_atom *katom;
+	struct fence *fence;
+	struct list_head node;
+};
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct reservation_object **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_waiters_remove()- Remove katom from dma-fence wait list
+ * @katom: Pointer to katom to remove from list
+ */
+void kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom);
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100755
index 0000000..bf8c304
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	kbase_tlstream_tl_nret_atom_ctx(katom, kctx);
+	kbase_tlstream_tl_del_atom(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100755
index 0000000..ce65b55
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100755
index 0000000..4af3e48
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,329 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
+};
+
+static void dump_worker(struct work_struct *work);
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+	uint32_t gpu_id;
+	uint32_t product_id;
+	uint32_t count;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	gpu_id = kbdev->gpu_props.props.core_props.product_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n",
+				gpu_id);
+			break;
+		}
+	} else {
+		switch (gpu_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n",
+				gpu_id);
+			break;
+		}
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	*total_counters = count;
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	uint32_t dump_size = 0, i = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
+		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+	in_out_info->size = dump_size;
+
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
+
+	return hand;
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+
+	if (opaque_handles && success) {
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		schedule_work(&opaque_handles->dump_work);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100755
index 0000000..ef9ac0f
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100755
index 0000000..c247dd6
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2163 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MISS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MISS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MISS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MISS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MISS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MISS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MISS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#include "mali_kbase_gator_hwcnt_names_tmix.h"
+
+
+
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
new file mode 100644
index 0000000..5ea0677
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+
+static const char * const hardware_counters_mali_tMIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MESSAGES_SENT",
+	"TMIx_MESSAGES_RECEIVED",
+	"TMIx_GPU_ACTIVE",
+	"TMIx_IRQ_ACTIVE",
+	"TMIx_JS0_JOBS",
+	"TMIx_JS0_TASKS",
+	"TMIx_JS0_ACTIVE",
+	"",
+	"TMIx_JS0_WAIT_READ",
+	"TMIx_JS0_WAIT_ISSUE",
+	"TMIx_JS0_WAIT_DEPEND",
+	"TMIx_JS0_WAIT_FINISH",
+	"TMIx_JS1_JOBS",
+	"TMIx_JS1_TASKS",
+	"TMIx_JS1_ACTIVE",
+	"",
+	"TMIx_JS1_WAIT_READ",
+	"TMIx_JS1_WAIT_ISSUE",
+	"TMIx_JS1_WAIT_DEPEND",
+	"TMIx_JS1_WAIT_FINISH",
+	"TMIx_JS2_JOBS",
+	"TMIx_JS2_TASKS",
+	"TMIx_JS2_ACTIVE",
+	"",
+	"TMIx_JS2_WAIT_READ",
+	"TMIx_JS2_WAIT_ISSUE",
+	"TMIx_JS2_WAIT_DEPEND",
+	"TMIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_TILER_ACTIVE",
+	"TMIx_JOBS_PROCESSED",
+	"TMIx_TRIANGLES",
+	"TMIx_LINES",
+	"TMIx_POINTS",
+	"TMIx_FRONT_FACING",
+	"TMIx_BACK_FACING",
+	"TMIx_PRIM_VISIBLE",
+	"TMIx_PRIM_CULLED",
+	"TMIx_PRIM_CLIPPED",
+	"TMIx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"TMIx_BUS_READ",
+	"",
+	"TMIx_BUS_WRITE",
+	"TMIx_LOADING_DESC",
+	"TMIx_IDVS_POS_SHAD_REQ",
+	"TMIx_IDVS_POS_SHAD_WAIT",
+	"TMIx_IDVS_POS_SHAD_STALL",
+	"TMIx_IDVS_POS_FIFO_FULL",
+	"TMIx_PREFETCH_STALL",
+	"TMIx_VCACHE_HIT",
+	"TMIx_VCACHE_MISS",
+	"TMIx_VCACHE_LINE_WAIT",
+	"TMIx_VFETCH_POS_READ_WAIT",
+	"TMIx_VFETCH_VERTEX_WAIT",
+	"TMIx_VFETCH_STALL",
+	"TMIx_PRIMASSY_STALL",
+	"TMIx_BBOX_GEN_STALL",
+	"TMIx_IDVS_VBU_HIT",
+	"TMIx_IDVS_VBU_MISS",
+	"TMIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TMIx_IDVS_VAR_SHAD_REQ",
+	"TMIx_IDVS_VAR_SHAD_STALL",
+	"TMIx_BINNER_STALL",
+	"TMIx_ITER_STALL",
+	"TMIx_COMPRESS_MISS",
+	"TMIx_COMPRESS_STALL",
+	"TMIx_PCACHE_HIT",
+	"TMIx_PCACHE_MISS",
+	"TMIx_PCACHE_MISS_STALL",
+	"TMIx_PCACHE_EVICT_STALL",
+	"TMIx_PMGR_PTR_WR_STALL",
+	"TMIx_PMGR_PTR_RD_STALL",
+	"TMIx_PMGR_CMD_WR_STALL",
+	"TMIx_WRBUF_ACTIVE",
+	"TMIx_WRBUF_HIT",
+	"TMIx_WRBUF_MISS",
+	"TMIx_WRBUF_NO_FREE_LINE_STALL",
+	"TMIx_WRBUF_NO_AXI_ID_STALL",
+	"TMIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TMIx_UTLB_TRANS",
+	"TMIx_UTLB_TRANS_HIT",
+	"TMIx_UTLB_TRANS_STALL",
+	"TMIx_UTLB_TRANS_MISS_DELAY",
+	"TMIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_FRAG_ACTIVE",
+	"TMIx_FRAG_PRIMITIVES",
+	"TMIx_FRAG_PRIM_RAST",
+	"TMIx_FRAG_FPK_ACTIVE",
+	"TMIx_FRAG_STARVING",
+	"TMIx_FRAG_WARPS",
+	"TMIx_FRAG_PARTIAL_WARPS",
+	"TMIx_FRAG_QUADS_RAST",
+	"TMIx_FRAG_QUADS_EZS_TEST",
+	"TMIx_FRAG_QUADS_EZS_UPDATE",
+	"TMIx_FRAG_QUADS_EZS_KILL",
+	"TMIx_FRAG_LZS_TEST",
+	"TMIx_FRAG_LZS_KILL",
+	"",
+	"TMIx_FRAG_PTILES",
+	"TMIx_FRAG_TRANS_ELIM",
+	"TMIx_QUAD_FPK_KILLER",
+	"",
+	"TMIx_COMPUTE_ACTIVE",
+	"TMIx_COMPUTE_TASKS",
+	"TMIx_COMPUTE_WARPS",
+	"TMIx_COMPUTE_STARVING",
+	"TMIx_EXEC_CORE_ACTIVE",
+	"TMIx_EXEC_ACTIVE",
+	"TMIx_EXEC_INSTR_COUNT",
+	"TMIx_EXEC_INSTR_DIVERGED",
+	"TMIx_EXEC_INSTR_STARVING",
+	"TMIx_ARITH_INSTR_SINGLE_FMA",
+	"TMIx_ARITH_INSTR_DOUBLE",
+	"TMIx_ARITH_INSTR_MSG",
+	"TMIx_ARITH_INSTR_MSG_ONLY",
+	"TMIx_TEX_INSTR",
+	"TMIx_TEX_INSTR_MIPMAP",
+	"TMIx_TEX_INSTR_COMPRESSED",
+	"TMIx_TEX_INSTR_3D",
+	"TMIx_TEX_INSTR_TRILINEAR",
+	"TMIx_TEX_COORD_ISSUE",
+	"TMIx_TEX_COORD_STALL",
+	"TMIx_TEX_STARVE_CACHE",
+	"TMIx_TEX_STARVE_FILTER",
+	"TMIx_LS_MEM_READ_FULL",
+	"TMIx_LS_MEM_READ_SHORT",
+	"TMIx_LS_MEM_WRITE_FULL",
+	"TMIx_LS_MEM_WRITE_SHORT",
+	"TMIx_LS_MEM_ATOMIC",
+	"TMIx_VARY_INSTR",
+	"TMIx_VARY_SLOT_32",
+	"TMIx_VARY_SLOT_16",
+	"TMIx_ATTR_INSTR",
+	"TMIx_ARITH_INSTR_FP_MUL",
+	"TMIx_BEATS_RD_FTC",
+	"TMIx_BEATS_RD_FTC_EXT",
+	"TMIx_BEATS_RD_LSC",
+	"TMIx_BEATS_RD_LSC_EXT",
+	"TMIx_BEATS_RD_TEX",
+	"TMIx_BEATS_RD_TEX_EXT",
+	"TMIx_BEATS_RD_OTHER",
+	"TMIx_BEATS_WR_LSC",
+	"TMIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_RD_MSG_IN",
+	"TMIx_L2_RD_MSG_IN_STALL",
+	"TMIx_L2_WR_MSG_IN",
+	"TMIx_L2_WR_MSG_IN_STALL",
+	"TMIx_L2_SNP_MSG_IN",
+	"TMIx_L2_SNP_MSG_IN_STALL",
+	"TMIx_L2_RD_MSG_OUT",
+	"TMIx_L2_RD_MSG_OUT_STALL",
+	"TMIx_L2_WR_MSG_OUT",
+	"TMIx_L2_ANY_LOOKUP",
+	"TMIx_L2_READ_LOOKUP",
+	"TMIx_L2_WRITE_LOOKUP",
+	"TMIx_L2_EXT_SNOOP_LOOKUP",
+	"TMIx_L2_EXT_READ",
+	"TMIx_L2_EXT_READ_NOSNP",
+	"TMIx_L2_EXT_READ_UNIQUE",
+	"TMIx_L2_EXT_READ_BEATS",
+	"TMIx_L2_EXT_AR_STALL",
+	"TMIx_L2_EXT_AR_CNT_Q1",
+	"TMIx_L2_EXT_AR_CNT_Q2",
+	"TMIx_L2_EXT_AR_CNT_Q3",
+	"TMIx_L2_EXT_RRESP_0_127",
+	"TMIx_L2_EXT_RRESP_128_191",
+	"TMIx_L2_EXT_RRESP_192_255",
+	"TMIx_L2_EXT_RRESP_256_319",
+	"TMIx_L2_EXT_RRESP_320_383",
+	"TMIx_L2_EXT_WRITE",
+	"TMIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TMIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TMIx_L2_EXT_WRITE_SNP_FULL",
+	"TMIx_L2_EXT_WRITE_SNP_PTL",
+	"TMIx_L2_EXT_WRITE_BEATS",
+	"TMIx_L2_EXT_W_STALL",
+	"TMIx_L2_EXT_AW_CNT_Q1",
+	"TMIx_L2_EXT_AW_CNT_Q2",
+	"TMIx_L2_EXT_AW_CNT_Q3",
+	"TMIx_L2_EXT_SNOOP",
+	"TMIx_L2_EXT_SNOOP_STALL",
+	"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TMIx_L2_EXT_SNOOP_RESP_DATA",
+	"TMIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100644
index 0000000..a962ecb
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100755
index 0000000..6df0a1c
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100755
index 0000000..7045693
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100755
index 0000000..7f77dba
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,314 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
+{
+	kbase_gpu_clk_speed_func get_gpu_speed_mhz;
+	u32 gpu_speed_mhz;
+	int rc = 1;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kbase_props);
+
+	/* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
+	 * If that function fails, or the function is not provided by the system integrator, we report the maximum
+	 * GPU speed as specified by GPU_FREQ_KHZ_MAX.
+	 */
+	get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
+	if (get_gpu_speed_mhz != NULL) {
+		rc = get_gpu_speed_mhz(&gpu_speed_mhz);
+#ifdef CONFIG_MALI_DEBUG
+		/* Issue a warning message when the reported GPU speed falls outside the min/max range */
+		if (rc == 0) {
+			u32 gpu_speed_khz = gpu_speed_mhz * 1000;
+
+			if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
+					gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
+				dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
+						(unsigned long)gpu_speed_khz,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+		}
+#endif				/* CONFIG_MALI_DEBUG */
+	}
+	if (kctx->kbdev->clock) {
+		gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
+		rc = 0;
+	}
+	if (rc != 0)
+		gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
+
+	kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
+
+	memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
+
+	/* Before API 8.2 they expect L3 cache info here, which was always 0 */
+	if (kctx->api_version < KBASE_API_VERSION(8, 2))
+		kbase_props->props.raw_props.suspend_size = 0;
+
+	return 0;
+}
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100755
index 0000000..f3c95cc
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * @brief Provide GPU properties to userside through UKU call.
+ *
+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
+ *
+ * @param kctx		The struct kbase_context structure
+ * @param kbase_props	A copy of the struct kbase_uk_gpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100755
index 0000000..f42e91b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/**
+	 * Implementation specific irq throttle value (us), should be adjusted during integration.
+	 */
+	int irq_throttle_time_us;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100755
index 0000000..de2461f
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,263 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id) {
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1):
+				issues = base_hw_issues_tMIx_r0p0_05dev0;
+				break;
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2):
+				issues = base_hw_issues_tMIx_r0p0;
+				break;
+			default:
+				if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_TMIX) {
+					issues = base_hw_issues_tMIx_r0p0;
+				} else {
+					dev_err(kbdev->dev,
+						"Unknown GPU ID %x", gpu_id);
+					return -EINVAL;
+				}
+			}
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT);
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100755
index 0000000..fce7d29
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * @brief Set the HW issues mask depending on the GPU ID
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100755
index 0000000..b09be99
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100755
index 0000000..261453e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/* The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx;
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100755
index 0000000..cf8a813
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100755
index 0000000..5de2b75
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @setup:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100755
index 0000000..abe6607
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,366 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_find_free_address_space() - Find a free address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * If no address spaces are currently free, then this function can evict an
+ * idle context from the runpool, freeing up the address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_gpu_release_free_address_space()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_free_address_space() - Release an address space.
+ * @kbdev:	Device pointer
+ * @as_nr:	Address space to release
+ *
+ * The address space must have been returned by
+ * kbase_gpu_find_free_address_space().
+ */
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+						int as_nr);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned.
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold as->transaction_mutex and runpool_irq.lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold as->transaction_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ * @affinity:      Affinity of atom
+ * @coreref_state: Coreref state of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ *				  @js
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg updating timeouts of
+ * currently running atoms).
+ */
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs and don't emit messages into
+ * the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ */
+void kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset.
+ */
+bool kbase_reset_gpu_active(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100755
index 0000000..bae7c0d
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+int set_policy_by_name(struct kbase_device *kbdev, const char *name);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100755
index 0000000..89d26ea
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100755
index 0000000..cf7bf1b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
new file mode 100755
index 0000000..c579d0a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
@@ -0,0 +1,431 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/of.h>
+#include <linux/sysfs.h>
+
+#include <mali_kbase.h>
+
+#define NR_IPA_GROUPS 8
+
+struct kbase_ipa_context;
+
+/**
+ * struct ipa_group - represents a single IPA group
+ * @name:               name of the IPA group
+ * @capacitance:        capacitance constant for IPA group
+ * @calc_power:         function to calculate power for IPA group
+ */
+struct ipa_group {
+	const char *name;
+	u32 capacitance;
+	u32 (*calc_power)(struct kbase_ipa_context *,
+			struct ipa_group *);
+};
+
+#include <mali_kbase_ipa_tables.h>
+
+/**
+ * struct kbase_ipa_context - IPA context per device
+ * @kbdev:              pointer to kbase device
+ * @groups:             array of IPA groups for this context
+ * @vinstr_cli:         vinstr client handle
+ * @vinstr_buffer:      buffer to dump hardware counters onto
+ * @ipa_lock:           protects the entire IPA context
+ */
+struct kbase_ipa_context {
+	struct kbase_device *kbdev;
+	struct ipa_group groups[NR_IPA_GROUPS];
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct mutex ipa_lock;
+};
+
+static ssize_t show_ipa_group(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_ipa_context *ctx = kbdev->ipa_ctx;
+	ssize_t count = -EINVAL;
+	size_t i;
+
+	mutex_lock(&ctx->ipa_lock);
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		if (!strcmp(ctx->groups[i].name, attr->attr.name)) {
+			count = snprintf(buf, PAGE_SIZE, "%lu\n",
+				(unsigned long)ctx->groups[i].capacitance);
+			break;
+		}
+	}
+	mutex_unlock(&ctx->ipa_lock);
+	return count;
+}
+
+static ssize_t set_ipa_group(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_ipa_context *ctx = kbdev->ipa_ctx;
+	unsigned long capacitance;
+	size_t i;
+	int err;
+
+	err = kstrtoul(buf, 0, &capacitance);
+	if (err < 0)
+		return err;
+	if (capacitance > U32_MAX)
+		return -ERANGE;
+
+	mutex_lock(&ctx->ipa_lock);
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		if (!strcmp(ctx->groups[i].name, attr->attr.name)) {
+			ctx->groups[i].capacitance = capacitance;
+			mutex_unlock(&ctx->ipa_lock);
+			return count;
+		}
+	}
+	mutex_unlock(&ctx->ipa_lock);
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(group0, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group1, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group2, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group3, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group4, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group5, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group6, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group7, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+
+static struct attribute *kbase_ipa_attrs[] = {
+	&dev_attr_group0.attr,
+	&dev_attr_group1.attr,
+	&dev_attr_group2.attr,
+	&dev_attr_group3.attr,
+	&dev_attr_group4.attr,
+	&dev_attr_group5.attr,
+	&dev_attr_group6.attr,
+	&dev_attr_group7.attr,
+	NULL,
+};
+
+static struct attribute_group kbase_ipa_attr_group = {
+	.name = "ipa",
+	.attrs = kbase_ipa_attrs,
+};
+
+static void init_ipa_groups(struct kbase_ipa_context *ctx)
+{
+	memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups));
+}
+
+#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	struct device_node *np, *child;
+	struct ipa_group *group;
+	size_t nr_groups;
+	size_t i;
+	int err;
+
+	np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups");
+	if (!np)
+		return 0;
+
+	nr_groups = 0;
+	for_each_available_child_of_node(np, child)
+		nr_groups++;
+	if (!nr_groups || nr_groups > ARRAY_SIZE(ctx->groups)) {
+		dev_err(kbdev->dev, "invalid number of IPA groups: %zu", nr_groups);
+		err = -EINVAL;
+		goto err0;
+	}
+
+	for_each_available_child_of_node(np, child) {
+		const char *name;
+		u32 capacitance;
+
+		name = of_get_property(child, "label", NULL);
+		if (!name) {
+			dev_err(kbdev->dev, "label missing for IPA group");
+			err = -EINVAL;
+			goto err0;
+		}
+		err = of_property_read_u32(child, "capacitance",
+				&capacitance);
+		if (err < 0) {
+			dev_err(kbdev->dev, "capacitance missing for IPA group");
+			goto err0;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+			group = &ctx->groups[i];
+			if (!strcmp(group->name, name)) {
+				group->capacitance = capacitance;
+				break;
+			}
+		}
+	}
+
+	of_node_put(np);
+	return 0;
+err0:
+	of_node_put(np);
+	return err;
+}
+#else
+static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
+{
+	return 0;
+}
+#endif
+
+static int reset_ipa_groups(struct kbase_ipa_context *ctx)
+{
+	init_ipa_groups(ctx);
+	return update_ipa_groups_from_dt(ctx);
+}
+
+static inline u32 read_hwcnt(struct kbase_ipa_context *ctx,
+	u32 offset)
+{
+	u8 *p = ctx->vinstr_buffer;
+
+	return *(u32 *)&p[offset];
+}
+
+static inline u32 add_saturate(u32 a, u32 b)
+{
+	if (U32_MAX - a < b)
+		return U32_MAX;
+	return a + b;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c'
+ * across all shader cores.
+ */
+static u32 calc_power_sc_single(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	u64 core_mask;
+	u32 base = 0, r = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			u64 n = read_hwcnt(ctx, base + c);
+			u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+			u32 s = group->capacitance;
+
+			r = add_saturate(r, div_u64(n * s, d));
+		}
+		base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+		core_mask >>= 1;
+	}
+	return r;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c1'
+ * and `c2' across all shader cores.
+ */
+static u32 calc_power_sc_double(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c1, u32 c2)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	u64 core_mask;
+	u32 base = 0, r = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			u64 n = read_hwcnt(ctx, base + c1);
+			u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+			u32 s = group->capacitance;
+
+			r = add_saturate(r, div_u64(n * s, d));
+			n = read_hwcnt(ctx, base + c2);
+			r = add_saturate(r, div_u64(n * s, d));
+		}
+		base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+		core_mask >>= 1;
+	}
+	return r;
+}
+
+static u32 calc_power_single(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c)
+{
+	u64 n = read_hwcnt(ctx, c);
+	u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+	u32 s = group->capacitance;
+
+	return div_u64(n * s, d);
+}
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_single(ctx, group, L2_ANY_LOOKUP);
+}
+
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_single(ctx, group, TILER_ACTIVE);
+}
+
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, FRAG_ACTIVE);
+}
+
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_double(ctx, group, VARY_SLOT_32,
+			VARY_SLOT_16);
+}
+
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, TEX_COORD_ISSUE);
+}
+
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, EXEC_INSTR_COUNT);
+}
+
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_double(ctx, group, BEATS_RD_LSC,
+			BEATS_WR_LSC);
+}
+
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, EXEC_CORE_ACTIVE);
+}
+
+static int attach_vinstr(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	size_t dump_size;
+
+	dump_size = kbase_vinstr_dump_size(kbdev);
+	ctx->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!ctx->vinstr_buffer) {
+		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+		return -1;
+	}
+
+	setup.jm_bm = ~0u;
+	setup.shader_bm = ~0u;
+	setup.tiler_bm = ~0u;
+	setup.mmu_l2_bm = ~0u;
+	ctx->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
+			&setup, ctx->vinstr_buffer);
+	if (!ctx->vinstr_cli) {
+		dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
+		kfree(ctx->vinstr_buffer);
+		ctx->vinstr_buffer = NULL;
+		return -1;
+	}
+	return 0;
+}
+
+static void detach_vinstr(struct kbase_ipa_context *ctx)
+{
+	if (ctx->vinstr_cli)
+		kbase_vinstr_detach_client(ctx->vinstr_cli);
+	ctx->vinstr_cli = NULL;
+	kfree(ctx->vinstr_buffer);
+	ctx->vinstr_buffer = NULL;
+}
+
+struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_context *ctx;
+	int err;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	mutex_init(&ctx->ipa_lock);
+	ctx->kbdev = kbdev;
+
+	err = reset_ipa_groups(ctx);
+	if (err < 0)
+		goto err0;
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
+	if (err < 0)
+		goto err0;
+
+	return ctx;
+err0:
+	kfree(ctx);
+	return NULL;
+}
+
+void kbase_ipa_term(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+
+	detach_vinstr(ctx);
+	sysfs_remove_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
+	kfree(ctx);
+}
+
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err)
+{
+	struct ipa_group *group;
+	u32 power = 0;
+	size_t i;
+
+	mutex_lock(&ctx->ipa_lock);
+	if (!ctx->vinstr_cli) {
+		*err = attach_vinstr(ctx);
+		if (*err < 0)
+			goto err0;
+	}
+	*err = kbase_vinstr_hwc_dump(ctx->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL);
+	if (*err)
+		goto err0;
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		group = &ctx->groups[i];
+		power = add_saturate(power, group->calc_power(ctx, group));
+	}
+err0:
+	mutex_unlock(&ctx->ipa_lock);
+	return power;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_dynamic_power);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
new file mode 100755
index 0000000..e2234d1
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
@@ -0,0 +1,41 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+struct kbase_ipa_context;
+
+/**
+ * kbase_ipa_init - initialize the kbase ipa core
+ * @kbdev:      kbase device
+ *
+ * Return:      pointer to the IPA context or NULL on failure
+ */
+struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_term - terminate the kbase ipa core
+ * @ctx:        pointer to the IPA context
+ */
+void kbase_ipa_term(struct kbase_ipa_context *ctx);
+
+/**
+ * kbase_ipa_dynamic_power - calculate power
+ * @ctx:        pointer to the IPA context
+ * @err:        0 on success, negative on failure
+ *
+ * Return:      returns power consumption as mw @ 1GHz @ 1V
+ */
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
new file mode 100644
index 0000000..101abfe
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
@@ -0,0 +1,104 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#define NR_BYTES_PER_CNT  4
+#define NR_CNT_PER_BLOCK 64
+
+#define JM_BASE    (0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define TILER_BASE (1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define MMU_BASE   (2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define SC0_BASE   (3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+
+#define GPU_ACTIVE       (JM_BASE    + NR_BYTES_PER_CNT *  6)
+#define TILER_ACTIVE     (TILER_BASE + NR_BYTES_PER_CNT * 45)
+#define L2_ANY_LOOKUP    (MMU_BASE   + NR_BYTES_PER_CNT * 25)
+#define FRAG_ACTIVE      (SC0_BASE   + NR_BYTES_PER_CNT *  4)
+#define EXEC_CORE_ACTIVE (SC0_BASE   + NR_BYTES_PER_CNT * 26)
+#define EXEC_INSTR_COUNT (SC0_BASE   + NR_BYTES_PER_CNT * 28)
+#define TEX_COORD_ISSUE  (SC0_BASE   + NR_BYTES_PER_CNT * 40)
+#define VARY_SLOT_32     (SC0_BASE   + NR_BYTES_PER_CNT * 50)
+#define VARY_SLOT_16     (SC0_BASE   + NR_BYTES_PER_CNT * 51)
+#define BEATS_RD_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 56)
+#define BEATS_WR_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 61)
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+
+static struct ipa_group ipa_groups_def[] = {
+	/* L2 */
+	{
+		.name = "group0",
+		.capacitance = 687,
+		.calc_power = calc_power_group0,
+	},
+	/* TILER */
+	{
+		.name = "group1",
+		.capacitance = 0,
+		.calc_power = calc_power_group1,
+	},
+	/* FRAG */
+	{
+		.name = "group2",
+		.capacitance = 23,
+		.calc_power = calc_power_group2,
+	},
+	/* VARY */
+	{
+		.name = "group3",
+		.capacitance = 108,
+		.calc_power = calc_power_group3,
+	},
+	/* TEX */
+	{
+		.name = "group4",
+		.capacitance = 128,
+		.calc_power = calc_power_group4,
+	},
+	/* EXEC INSTR */
+	{
+		.name = "group5",
+		.capacitance = 249,
+		.calc_power = calc_power_group5,
+	},
+	/* LSC */
+	{
+		.name = "group6",
+		.capacitance = 0,
+		.calc_power = calc_power_group6,
+	},
+	/* EXEC OVERHEAD */
+	{
+		.name = "group7",
+		.capacitance = 29,
+		.calc_power = calc_power_group7,
+	},
+};
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100755
index 0000000..3e0a589
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1829 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <mali_kbase_uku.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tlstream.h>
+
+#include "mali_kbase_dma_fence.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
+{
+#ifdef CONFIG_COMPAT
+	if (kctx->is_compat)
+		return compat_ptr(p->compat_value);
+#endif
+	return p->value;
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
+#ifdef CONFIG_KDS
+
+/* Add the katom to the kds waiting list.
+ * Atoms must be added to the waiting list after a successful call to kds_async_waitall.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	list_add_tail(&katom->node, &kctx->waiting_kds_resource);
+}
+
+/* Remove the katom from the kds waiting list.
+ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync.
+ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	list_del(&katom->node);
+}
+
+static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = (struct kbase_jd_atom *)callback_parameter;
+	KBASE_DEBUG_ASSERT(katom);
+
+	ctx = &katom->kctx->jctx;
+
+	/* If KDS resource has already been satisfied (e.g. due to zapping)
+	 * do nothing.
+	 */
+	mutex_lock(&ctx->lock);
+	if (!katom->kds_dep_satisfied) {
+		katom->kds_dep_satisfied = true;
+		kbase_jd_dep_clear_locked(katom);
+	}
+	mutex_unlock(&ctx->lock);
+}
+
+static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 *  there is a race between job completion and cancellation */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+#endif				/* CONFIG_KDS */
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_KDS
+	if (katom->kds_rset) {
+		struct kbase_jd_context *jctx = &katom->kctx->jctx;
+
+		/*
+		 * As the atom is no longer waiting, remove it from
+		 * the waiting list.
+		 */
+
+		mutex_lock(&jctx->lock);
+		kbase_jd_kds_waiters_remove(katom);
+		mutex_unlock(&jctx->lock);
+
+		/* Release the kds resource or cancel if zapping */
+		kds_resource_set_release_sync(&katom->kds_rset);
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_KDS
+	/* Prevent the KDS resource from triggering the atom in case of zapping */
+	if (katom->kds_rset)
+		katom->kds_dep_satisfied = true;
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_KDS
+	u32 kds_res_count = 0;
+	struct kds_resource **kds_resources = NULL;
+	unsigned long *kds_access_bitmap = NULL;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.dma_fence_resv_count = 0,
+	};
+#endif
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, &user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+#ifdef CONFIG_KDS
+	/* assume we have to wait for all */
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (!kds_resources) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres),
+				    sizeof(unsigned long),
+				    GFP_KERNEL);
+	if (!kds_access_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	info.resv_objs = kmalloc_array(katom->nr_extres,
+				       sizeof(struct reservation_object *),
+				       GFP_KERNEL);
+	if (!info.resv_objs) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	info.dma_fence_excl_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					     sizeof(unsigned long),
+					     GFP_KERNEL);
+	if (!info.dma_fence_excl_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
+
+		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm
+#ifdef CONFIG_KDS
+				, &kds_res_count, kds_resources,
+				kds_access_bitmap, exclusive
+#endif
+				);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct reservation_object *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock before we call into kds */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_KDS
+	if (kds_res_count) {
+		int wait_failed;
+
+		/* We have resources to wait for with kds */
+		katom->kds_dep_satisfied = false;
+
+		wait_failed = kds_async_waitall(&katom->kds_rset,
+				&katom->kctx->jctx.kds_cb, katom, NULL,
+				kds_res_count, kds_access_bitmap,
+				kds_resources);
+
+		if (wait_failed)
+			goto failed_kds_setup;
+		else
+			kbase_jd_kds_waiters_add(katom);
+	} else {
+		/* Nothing to wait for, so kds dep met */
+		katom->kds_dep_satisfied = true;
+	}
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (info.dma_fence_resv_count) {
+		int ret;
+
+		ret = kbase_dma_fence_wait(katom, &info);
+		if (ret < 0)
+			goto failed_dma_fence_setup;
+	}
+
+	kfree(info.resv_objs);
+	kfree(info.dma_fence_excl_bitmap);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
+#ifdef CONFIG_KDS
+	/* If we are here, dma_fence setup failed but KDS didn't.
+	 * Revert KDS setup if any.
+	 */
+	if (kds_res_count) {
+		mutex_unlock(&katom->kctx->jctx.lock);
+		kds_resource_set_release_sync(&katom->kds_rset);
+		mutex_lock(&katom->kctx->jctx.lock);
+
+		kbase_jd_kds_waiters_remove(katom);
+		katom->kds_dep_satisfied = true;
+	}
+#endif /* CONFIG_KDS */
+#endif /* CONFIG_MALI_DMA_FENCE */
+#ifdef CONFIG_KDS
+failed_kds_setup:
+#endif
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_KDS
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	kfree(info.resv_objs);
+	kfree(info.dma_fence_excl_bitmap);
+#endif
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d, bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		struct kbase_jd_atom *other_dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_KDS
+			if (!dep_atom->kds_dep_satisfied) {
+				/* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
+				 * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up
+				 */
+				dep_atom->kds_dep_satisfied = true;
+			}
+#endif
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		other_dep_atom = (struct kbase_jd_atom *)
+			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
+
+		if (!dep_atom->in_jd_list && (!other_dep_atom ||
+				(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
+				!dep_atom->will_fail_event_code &&
+				!other_dep_atom->will_fail_event_code))) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = atomic_read(&dep_atom->dma_fence.dep_count);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else if (dep_count == 0) {
+				/*
+				 * All fences for this atom has signaled, but
+				 * the worker that will queue the atom has not
+				 * yet run.
+				 *
+				 * Mark the atom as handled by setting
+				 * dep_count to -1 so that the worker doesn't
+				 * queue the atom again.
+				 */
+				atomic_set(&dep_atom->dma_fence.dep_count, -1);
+				/*
+				 * Remove the atom from the list of dma-fence
+				 * waiting atoms.
+				 */
+				kbase_dma_fence_waiters_remove(dep_atom);
+				dep_satisfied = true;
+			} else {
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+#ifdef CONFIG_KDS
+			dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied;
+#endif
+
+			if (dep_satisfied) {
+				dep_atom->in_jd_list = true;
+				list_add_tail(&dep_atom->jd_item, out_list);
+			}
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+static void jd_try_submitting_deps(struct list_head *out_list,
+		struct kbase_jd_atom *node)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *pos;
+
+		list_for_each(pos, &node->dep_head[i]) {
+			struct kbase_jd_atom *dep_atom = list_entry(pos,
+					struct kbase_jd_atom, dep_item[i]);
+
+			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
+				/*Check if atom deps look sane*/
+				bool dep0_valid = !dep_atom->dep[0].atom ||
+						(dep_atom->dep[0].atom->status
+						>= KBASE_JD_ATOM_STATE_IN_JS);
+				bool dep1_valid = !dep_atom->dep[1].atom ||
+						(dep_atom->dep[1].atom->status
+						>= KBASE_JD_ATOM_STATE_IN_JS);
+
+				if (dep0_valid && dep1_valid) {
+					dep_atom->in_jd_list = true;
+					list_add(&dep_atom->jd_item, out_list);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->jd_item, &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item);
+		list_del(completed_jobs.prev);
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+					kctx->jctx.sched_info.ctx.is_dying);
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(runnable_jobs.next);
+			node->in_jd_list = false;
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kctx->jctx.sched_info.ctx.is_dying) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE) ==
+					BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait soft job
+					 * then remove it from the list of sync
+					 * waiters.
+					 */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						kbasep_remove_waiting_soft_job(node);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				list_add_tail(&node->jd_item, &completed_jobs);
+			} else if (node->status == KBASE_JD_ATOM_STATE_IN_JS &&
+					!node->will_fail_event_code) {
+				/* Node successfully submitted, try submitting
+				 * dependencies as they may now be representable
+				 * in JS */
+				jd_try_submitting_deps(&runnable_jobs, node);
+			}
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->jd_item, completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+	katom->start_timestamp.tv64 = 0;
+	katom->time_spent_us = 0;
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = user_atom->core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->pre_dep = NULL;
+	katom->post_dep = NULL;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
+	katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+	katom->age = kctx->age_count++;
+
+	INIT_LIST_HEAD(&katom->jd_item);
+#ifdef CONFIG_KDS
+	/* Start by assuming that the KDS dependencies are satisfied,
+	 * kbase_jd_pre_external_resources will correct this if there are dependencies */
+	katom->kds_dep_satisfied = true;
+	katom->kds_rset = NULL;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	atomic_set(&katom->dma_fence.dep_count, -1);
+#endif
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				kbase_tlstream_tl_new_atom(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				kbase_tlstream_tl_ret_atom_ctx(
+						katom, kctx);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom is going through soft replay or
+			 * will be sent back to user space. Do not record any
+			 * dependencies. */
+			kbase_tlstream_tl_new_atom(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+
+			if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* Create a new atom recording all dependencies it was set up with. */
+	kbase_tlstream_tl_new_atom(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+	for (i = 0; i < 2; i++)
+		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+					&katom->dep[i])) {
+			kbase_tlstream_tl_dep_atom_atom(
+					(void *)kbase_jd_katom_dep_atom(
+						&katom->dep[i]),
+					(void *)katom);
+		} else if (BASE_JD_DEP_TYPE_INVALID !=
+				user_atom->pre_dep[i].dependency_type) {
+			/* Resolved dependency. */
+			int dep_atom_number =
+				user_atom->pre_dep[i].atom_id;
+			struct kbase_jd_atom *dep_atom =
+				&jctx->atoms[dep_atom_number];
+
+			kbase_tlstream_tl_rdep_atom_atom(
+					(void *)dep_atom,
+					(void *)katom);
+		}
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue((u32)kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+#ifdef CONFIG_KDS
+	if (!katom->kds_dep_satisfied) {
+		/* Queue atom due to KDS dependency */
+		ret = false;
+		goto out;
+	}
+#endif				/* CONFIG_KDS */
+
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atomic_read(&katom->dma_fence.dep_count) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom, NULL);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom)
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	void __user *user_addr;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the jobs
+	 * are reported by immediately falling them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	if ((uk6_atom && submit_data->stride !=
+			sizeof(struct base_jd_atom_v2_uk6)) ||
+			submit_data->stride != sizeof(base_jd_atom_v2)) {
+#else
+	if (submit_data->stride != sizeof(base_jd_atom_v2)) {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	user_addr = get_compat_pointer(kctx, &submit_data->addr);
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < submit_data->nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		if (uk6_atom) {
+			struct base_jd_atom_v2_uk6 user_atom_v6;
+			base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA};
+
+			if (copy_from_user(&user_atom_v6, user_addr,
+					sizeof(user_atom_v6))) {
+				err = -EINVAL;
+				KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+					atomic_sub_return(
+					submit_data->nr_atoms - i,
+					&kctx->timeline.jd_atoms_in_flight));
+				break;
+			}
+			/* Convert from UK6 atom format to UK7 format */
+			user_atom.jc = user_atom_v6.jc;
+			user_atom.udata = user_atom_v6.udata;
+			user_atom.extres_list = user_atom_v6.extres_list;
+			user_atom.nr_extres = user_atom_v6.nr_extres;
+			user_atom.core_req = (u32)(user_atom_v6.core_req & 0x7fff);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[0])
+				dep_types[0] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[0],
+					user_atom_v6.pre_dep[0],
+					dep_types[0]);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[1])
+				dep_types[1] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[1],
+					user_atom_v6.pre_dep[1],
+					dep_types[1]);
+
+			user_atom.atom_number = user_atom_v6.atom_number;
+			user_atom.prio = user_atom_v6.prio;
+			user_atom.device_nr = user_atom_v6.device_nr;
+		} else {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		}
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+		if (KBASE_API_VERSION(10, 3) > kctx->api_version)
+			user_atom.core_req = (u32)(user_atom.compat_core_req
+					      & 0x7fff);
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+		user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool schedule = false;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	if (!kbasep_js_has_atom_finished(&katom_retained_state)) {
+		mutex_lock(&js_devdata->runpool_mutex);
+		kbasep_js_clear_job_retry_submit(katom);
+		/* An atom that has been hard-stopped might have previously
+		 * been soft-stopped and has just finished before the hard-stop
+		 * occurred. For this reason, clear the hard-stopped flag */
+		katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED);
+		mutex_unlock(&js_devdata->runpool_mutex);
+	}
+
+	if (kbasep_js_has_atom_finished(&katom_retained_state))
+		schedule = true;
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	schedule |= jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * ctx_active will have been set after we marked it as inactive,
+		 * and another pm reference will have been taken, so drop our
+		 * reference. But do not call kbase_jm_idle_ctx(), as the
+		 * context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but ctx_active will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then ctx_active will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * runpool_irq.lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kctx->ctx_active || !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * runpool_irq.lock. This is not performed if ctx_active
+			 * is set as in that case another pm reference has been
+			 * taken and a fast-start would be valid.
+			 */
+			if (!kctx->ctx_active)
+				kbase_jm_idle_ctx(kbdev, kctx);
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+
+			kbase_pm_context_idle(kbdev);
+		} else {
+			kctx->ctx_active = true;
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+		}
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	if (schedule)
+		kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	del_timer_sync(&kctx->soft_job_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, queue);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_KDS
+
+	/* For each job waiting on a kds resource, cancel the wait and force the job to
+	 * complete early, this is done so that we don't leave jobs outstanding waiting
+	 * on kds resources which may never be released when contexts are zapped, resulting
+	 * in a hang.
+	 *
+	 * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held,
+	 * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job.
+	 */
+
+	list_for_each(entry, &kctx->waiting_kds_resource) {
+		katom = list_entry(entry, struct kbase_jd_atom, node);
+
+		kbase_cancel_kds_wait_job(katom);
+	}
+#endif
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+#ifdef CONFIG_KDS
+	int err;
+#endif				/* CONFIG_KDS */
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		kctx->jctx.atoms[i].dma_fence.context = fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+#ifdef CONFIG_KDS
+	err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear);
+	if (0 != err) {
+		mali_err = -EINVAL;
+		goto out2;
+	}
+#endif				/* CONFIG_KDS */
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+#ifdef CONFIG_KDS
+ out2:
+	destroy_workqueue(kctx->jctx.job_done_wq);
+#endif				/* CONFIG_KDS */
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+#ifdef CONFIG_KDS
+	kds_callback_term(&kctx->jctx.kds_cb);
+#endif				/* CONFIG_KDS */
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100755
index 0000000..0cf75f5
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/seq_file.h>
+
+#include <mali_kbase.h>
+
+#include <mali_kbase_jd_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		seq_printf(sfile,
+				"%i,%u,%u,%u,%u %u,%lli,%llu\n",
+				i, atom->core_req, atom->status, atom->coreref_state,
+				(unsigned)(atom->dep[0].atom ?
+						atom->dep[0].atom - atoms : 0),
+				(unsigned)(atom->dep[1].atom ?
+						atom->dep[1].atom - atoms : 0),
+				(signed long long)start_timestamp,
+				(unsigned long long)(atom->time_spent_us ?
+					atom->time_spent_us * 1000 : start_timestamp)
+				);
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100755
index 0000000..bc1878f
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+
+#define MALI_JD_DEBUGFS_VERSION 1
+
+/**
+ * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100755
index 0000000..6342532
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx;
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbdev->hwaccess.active_kctx == kctx)
+		kbdev->hwaccess.active_kctx = NULL;
+}
+
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+	}
+}
+
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+			ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100755
index 0000000..27aca3a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,106 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold runpool_irq lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold runpool_irq lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the runpool_irq lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ */
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ */
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+			ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100755
index 0000000..ac6c3ce
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,2944 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_policy_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return refcnt;
+}
+
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private types
+ */
+enum {
+	JS_DEVDATA_INIT_NONE = 0,
+	JS_DEVDATA_INIT_CONSTANTS = (1 << 0),
+	JS_DEVDATA_INIT_POLICY = (1 << 1),
+	JS_DEVDATA_INIT_ALL = ((1 << 2) - 1)
+};
+
+enum {
+	JS_KCTX_INIT_NONE = 0,
+	JS_KCTX_INIT_CONSTANTS = (1 << 0),
+	JS_KCTX_INIT_POLICY = (1 << 1),
+	JS_KCTX_INIT_ALL = ((1 << 2) - 1)
+};
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the kbasep_js_device_data::runpool_irq::lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		int new_refcnt;
+
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL);
+
+		new_refcnt = ++(js_per_as_data->as_busy_refcount);
+
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, new_refcnt);
+		result = true;
+	}
+
+	return result;
+}
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	return RB_EMPTY_ROOT(&rb->runnable_tree);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a queue and invoke @callback for each entry in the queue, and
+ * remove the entry from the queue.
+ *
+ * If entries are added to the queue while this is running those entries may, or
+ * may not be covered. To ensure that all entries in the buffer have been
+ * enumerated when this function returns jsctx->lock must be held when calling
+ * this function.
+ *
+ * The HW access lock, js_data.runpool_irq.lock, must always be held when
+ * calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
+		struct rb_node *node = rb_first(&queue->runnable_tree);
+		struct kbase_jd_atom *entry = rb_entry(node,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		rb_erase(node, &queue->runnable_tree);
+		callback(kctx->kbdev, entry);
+	}
+
+	while (!list_empty(&queue->x_dep_head)) {
+		struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
+				struct kbase_jd_atom, queue);
+
+		list_del(queue->x_dep_head.next);
+
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	struct rb_node *node;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	node = rb_first(&rb->runnable_tree);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the runpool_irq.lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
+}
+
+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
+
+static void
+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	while (*new) {
+		struct kbase_jd_atom *entry = container_of(*new,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		parent = *new;
+		if (LESS_THAN_WRAP(katom->age, entry->age))
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&katom->runnable_tree_node, parent, new);
+	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	jsctx_tree_add(kctx, katom);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int err;
+	int i;
+	u16 as_present;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(jsdd->init_status == JS_DEVDATA_INIT_NONE);
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* All ASs initially free */
+	jsdd->as_free = as_present;
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	jsdd->cfs_ctx_runtime_init_slices =
+		DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES;
+	jsdd->cfs_ctx_runtime_min_slices =
+		DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES;
+	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_init_slices:%u",
+			jsdd->cfs_ctx_runtime_init_slices);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u",
+			jsdd->cfs_ctx_runtime_min_slices);
+	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
+		atomic_read(&jsdd->soft_job_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS policy's tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	/* setup the number of irq throttle cycles base on given time */
+	{
+		int time_us = kbdev->gpu_props.irq_throttle_time_us;
+		int cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev,
+				time_us);
+
+		atomic_set(&kbdev->irq_throttle_cycles, cycles);
+	}
+
+	/* Clear the AS data, including setting NULL pointers */
+	memset(&jsdd->runpool_irq.per_as_data[0], 0,
+			sizeof(jsdd->runpool_irq.per_as_data));
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	jsdd->init_status |= JS_DEVDATA_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&jsdd->runpool_irq.lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	err = kbasep_js_policy_init(kbdev);
+	if (!err)
+		jsdd->init_status |= JS_DEVDATA_INIT_POLICY;
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
+		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+	}
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (jsdd->init_status != JS_DEVDATA_INIT_ALL)
+		return -EINVAL;
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_CONSTANTS)) {
+		s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+		/* The caller must de-register all contexts before calling this
+		 */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+		KBASE_DEBUG_ASSERT(memcmp(
+				js_devdata->runpool_irq.ctx_attr_ref_count,
+				zero_ctx_attr_ref_count,
+				sizeof(zero_ctx_attr_ref_count)) == 0);
+		CSTD_UNUSED(zero_ctx_attr_ref_count);
+	}
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_POLICY))
+		kbasep_js_policy_term(&js_devdata->policy);
+
+	js_devdata->init_status = JS_DEVDATA_INIT_NONE;
+}
+
+int kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int err;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE);
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	js_kctx_info->ctx.is_scheduled = false;
+	js_kctx_info->ctx.is_dying = false;
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED;
+
+	js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	err = kbasep_js_policy_init_ctx(kbdev, kctx);
+	if (!err)
+		js_kctx_info->init_status |= JS_KCTX_INIT_POLICY;
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (js_kctx_info->init_status != JS_KCTX_INIT_ALL)
+		return -EINVAL;
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
+			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	int js;
+	bool update_ctx_count = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) {
+		/* The caller must de-register all jobs before calling this */
+		KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+		KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+	}
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->ctx_runnable_ref) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kctx->ctx_runnable_ref = false;
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY))
+		kbasep_js_policy_term_ctx(js_policy, kctx);
+
+	js_kctx_info->init_status = JS_KCTX_INIT_NONE;
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_nolock - Variant of
+ *                                         kbase_jd_ctx_list_add_pullable()
+ *                                         where the caller must hold
+ *                                         runpool_irq.lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = true;
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
+ *                                              kbase_js_ctx_list_add_pullable_head()
+ *                                              where the caller must hold
+ *                                              runpool_irq.lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head_nolock(
+		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = true;
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the
+ *                                           per-slot unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+				&kbdev->js_data.ctx_list_unpullable[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable
+ *                                   or unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
+ *                                     where the caller must hold
+ *                                     runpool_irq.lock
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
+		return NULL;
+
+	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
+					struct kbase_context,
+					jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+		struct kbase_device *kbdev, int js)
+{
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * same-slot dependency */
+				if (dep_atom->post_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+						(js == dep_js)) {
+					katom->pre_dep = dep_atom;
+					dep_atom->post_dep = katom;
+				}
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Setup any scheduling information */
+	kbasep_js_clear_job_retry_submit(atom);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt_nolock(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!js_kctx_info->ctx.is_scheduled) {
+		if (js_kctx_info->ctx.is_dying) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context (e.g. KDS
+			 * dependency resolved). Kill that job by killing the
+			 * context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Policy Queue */
+			KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Policy Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	struct kbasep_js_device_data *js_devdata;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0,
+	   kbasep_js_trace_get_refcnt(kbdev, kctx)); */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return found_kctx;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_release_result - Try running more jobs after releasing a context
+ *                            and/or atom
+ *
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst
+ * kbasep_js_device_data.runpool_irq.lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (js_devdata->nr_user_contexts_running != 0) {
+		bool retry_submit = false;
+		int retry_jobslot = 0;
+
+		if (katom_retained_state)
+			retry_submit = kbasep_js_get_atom_retry_submit_slot(
+					katom_retained_state, &retry_jobslot);
+
+		if (runpool_ctx_attr_change || retry_submit) {
+			/* A change in runpool ctx attributes might mean we can
+			 * run more jobs than before  */
+			result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+			KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+						kctx, NULL, 0u, retry_jobslot);
+		}
+	}
+	return result;
+}
+
+/*
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state
+ * change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	struct kbase_as *current_as;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+	/* kctx->as_nr and js_per_as_data are only read from here. The caller's
+	 * js_ctx_mutex provides a barrier that ensures they are up-to-date.
+	 *
+	 * They will not change whilst we're reading them, because the refcount
+	 * is non-zero (and we ASSERT on that last fact).
+	 */
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr];
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	current_as = &kbdev->as[kctx_as_nr];
+	mutex_lock(&kbdev->pm.lock);
+	mutex_lock(&current_as->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/* Update refcount */
+	new_ref_count = --(js_per_as_data->as_busy_refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags &
+			KBASE_CTX_FLAG_PRIVILEGED &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out */
+	if (new_ref_count == 0 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+		kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		if (kbdev->hwaccess.active_kctx == kctx)
+			kbdev->hwaccess.active_kctx = NULL;
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after 'is_scheduled' is changed, otherwise we double-decount
+		 * the attributes */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&current_as->transaction_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any policy timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		js_kctx_info->ctx.is_scheduled = false;
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED |
+			KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&current_as->transaction_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_policy = &kbdev->js_data.policy;
+	js_devdata = &kbdev->js_data;
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool or the Policy Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	if (js_kctx_info->ctx.is_dying) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbase_backend_timeouts_changed(kbdev);
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbase_as *new_address_space = NULL;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	as_nr = kbase_backend_find_free_address_space(kbdev, kctx);
+
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	new_address_space = &kbdev->as[as_nr];
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (js_kctx_info->ctx.is_dying) {
+		/* Roll back the transaction so far and return */
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	js_kctx_info->ctx.is_scheduled = true;
+
+	mutex_lock(&new_address_space->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&new_address_space->transaction_mutex);
+		/* Roll back the transaction so far and return */
+		js_kctx_info->ctx.is_scheduled = false;
+
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+	kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the runpool_irq locking. If a suspend occurs *after* that lock was
+	 * taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	mutex_unlock(&new_address_space->transaction_mutex);
+
+	/* Synchronize with any policy timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	if (kbase_backend_use_ctx_sched(kbdev, kctx)) {
+		/* Context already has ASID - mark as active */
+		kbdev->hwaccess.active_kctx = kctx;
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		return true; /* Context already scheduled */
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return kbasep_js_schedule_ctx(kbdev, kctx);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED;
+
+	is_scheduled = js_kctx_info->ctx.is_scheduled;
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			kctx->jctx.sched_info.ctx.is_scheduled);
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		retained = retained << 1;
+
+		if (kctx) {
+			++(js_per_as_data->as_busy_refcount);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kctx->jctx.sched_info.ctx.flags &
+					KBASE_CTX_FLAG_PRIVILEGED) {
+				++nr_privileged_ctx;
+				WARN_ON(nr_privileged_ctx != 1);
+			}
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_context *kctx, *n;
+
+		list_for_each_entry_safe(kctx, n,
+				&kbdev->js_data.ctx_list_unpullable[js],
+				jctx.sched_info.ctx.ctx_list_entry[js]) {
+			struct kbasep_js_kctx_info *js_kctx_info;
+			unsigned long flags;
+			bool timer_sync = false;
+
+			js_kctx_info = &kctx->jctx.sched_info;
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+			if (!js_kctx_info->ctx.is_scheduled &&
+				kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync =
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kbdev);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	/* Check if there are lower priority jobs to soft stop */
+	kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
+			(katom->pre_dep && (katom->pre_dep->atom_flags &
+			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		int prio = katom->sched_priority;
+		int js = katom->slot_nr;
+		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+		list_add_tail(&katom->queue, &queue->x_dep_head);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+		enqueue_required = false;
+	} else {
+		/* Add atom to ring buffer. */
+		jsctx_tree_add(kctx, katom);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+	}
+
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the
+ *                         runnable_tree, ready for execution
+ * @katom: Atom to submit
+ *
+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set,
+ * but is still present in the x_dep list. If @katom has a same-slot dependent
+ * atom then that atom (and any dependents) will also be moved.
+ */
+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->kbdev->js_data.runpool_irq.lock);
+
+	while (katom) {
+		WARN_ON(!(katom->atom_flags &
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			list_del(&katom->queue);
+			katom->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+		} else {
+			break;
+		}
+
+		katom = katom->post_dep;
+	}
+}
+
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = katom->post_dep;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	if (next_katom) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE |
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
+			kbase_js_move_to_tree(x_dep);
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	js_devdata = &kctx->kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kctx->kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kctx->kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return NULL;
+	}
+
+	kctx->pulled = true;
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kctx->ctx_runnable_ref);
+		kctx->ctx_runnable_ref = true;
+		atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kctx->kbdev, kctx);
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->sched_info.cfs.ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	kbase_tlstream_aux_job_softstop_ex(katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!js_kctx_info->ctx.is_dying) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kctx->ctx_active);
+		kctx->ctx_active = false;
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+	}
+	WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot)) {
+		if (!list_empty(
+			&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
+			timer_sync |= kbase_js_ctx_list_remove_nolock(
+					kctx->kbdev, kctx, atom_slot);
+	}
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!js_kctx_info->ctx.is_dying) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kctx->ctx_active = false;
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	u64 microseconds_spent = 0;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	union kbasep_js_policy *js_policy;
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+
+	kbdev = kctx->kbdev;
+
+	js_policy = &kbdev->js_data.policy;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+
+	/* Calculate the job's time used */
+	if (end_timestamp != NULL) {
+		/* Only calculating it for jobs that really run on the HW (e.g.
+		 * removed from next jobs never actually ran, so really did take
+		 * zero time) */
+		ktime_t tick_diff = ktime_sub(*end_timestamp,
+							katom->start_timestamp);
+
+		microseconds_spent = ktime_to_ns(tick_diff);
+
+		do_div(microseconds_spent, 1000);
+
+		/* Round up time spent to the minimum timer resolution */
+		if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US)
+			microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US;
+	}
+
+	/* Log the result of the job (completion status, and time spent). */
+	kbasep_js_policy_log_job_result(js_policy, katom, microseconds_spent);
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+			(x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false);
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+		kbase_js_move_to_tree(x_dep);
+		if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false))
+			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+					x_dep->slot_nr);
+	}
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool timer_sync = false;
+
+	js_devdata = &kbdev->js_data;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	while (js_mask) {
+		int js;
+
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kctx->ctx_active) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kctx->ctx_active = true;
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&js_devdata->runpool_irq.lock,
+									flags);
+				if (kbase_js_ctx_pullable(kctx, js, false)
+					|| (kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(
+					&js_devdata->runpool_irq.lock, flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kctx->ctx_active);
+					kctx->ctx_active = false;
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+			kctx->pulled = false;
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kctx->pulled) {
+				/* Failed to pull jobs - push to head of list */
+				if (kbase_js_ctx_pullable(kctx, js, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+								kctx->kbdev,
+								kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+								kctx->kbdev,
+								kctx, js);
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+					WARN_ON(!kctx->ctx_active);
+					kctx->ctx_active = false;
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the policy queue */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	js_kctx_info->ctx.is_dying = true;
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the policy queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the policy queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Policy Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the policy queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!js_kctx_info->ctx.is_scheduled) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Policy Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						js_kctx_info->ctx.is_scheduled);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the policy
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						js_kctx_info->ctx.is_scheduled);
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100755
index 0000000..66b2132
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,1058 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_js_policy.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase and are available for use by the
+ * @ref kbase_js_policy "Job Scheduler Policy APIs"
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally. If the runpool_irq::lock is already held, then
+ * the caller should use kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * kbasep_js_runpool_lookup_ctx_nolock - Lookup a context in the Run Pool based
+ *         upon its current address space and ensure that is stays scheduled in.
+ * @kbdev: Device pointer
+ * @as_nr: Address space to lookup
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * Note: This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must the kbasep_js_device_data::runpoool_irq::lock.
+ *
+ * Return: a valid struct kbase_context on success, which has been refcounted as
+ *         being busy.
+ *         NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduling Policy's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time whenever the Job
+ * Scheduling Policy implements Job Timeouts (such as those done by CFS).
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ */
+void kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+	atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+	KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot ==
+					KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID)
+				|| (atom->retry_submit_on_slot == js));
+
+	atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+	retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+	int js = katom_retained_state->retry_submit_on_slot;
+
+	*res = js;
+	return (bool) (js >= 0);
+}
+
+#if KBASE_DEBUG_DISABLE_ASSERTS == 0
+/**
+ * Debug Check the refcount of a context. Only use within ASSERTs
+ *
+ * Obtains kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return negative value if the context is not scheduled in
+ * @return current refcount of the context if it is scheduled in. The refcount
+ * is not guarenteed to be kept constant.
+ */
+static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int result = -1;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID)
+		result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount;
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return result;
+}
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS == 0 */
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guarenteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guarenteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+	KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: when you need the number of cycles to guarantee you won't wait for
+ * longer than 'us' time (you might have a shorter wait).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need the number of cycles to guarantee you'll wait at least
+ * 'us' amount of time (but you might wait longer).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (u32) (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the worst-case wait that 'ticks' cycles will
+ * take (you guarantee that you won't wait any longer than this, but it may
+ * be shorter).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the best-case wait for 'tick' cycles (you
+ * guarantee to be waiting for at least this long, but it may be longer).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100755
index 0000000..e6e611b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != false) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100755
index 0000000..ce91833
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100755
index 0000000..e134204
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,506 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+/* Types used by the policies must go here */
+enum {
+	/** Context will not submit any jobs */
+	KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0),
+
+	/** Set if the context uses an address space and should be kept scheduled in */
+	KBASE_CTX_FLAG_PRIVILEGED = (1u << 1)
+
+	    /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */
+};
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+#include "mali_kbase_js_policy_cfs.h"
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy {
+	struct kbasep_js_policy_cfs cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_ctx_info {
+	struct kbasep_js_policy_cfs_ctx cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_job_info {
+	struct kbasep_js_policy_cfs_job cfs;
+};
+
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief the IRQ_THROTTLE time in microseconds
+ *
+ * This will be converted via the GPU's clock frequency into a cycle-count.
+ *
+ * @note we can make an estimate of the GPU's frequency by periodically
+ * sampling its CYCLE_COUNT register
+ */
+#define KBASE_JS_IRQ_THROTTLE_TIME_US 20
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE. The context usually has
+	 * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW
+	 * workarounds in use in the Job Scheduling Policy.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * Data used by the scheduler that is unique for each Address Space.
+ *
+ * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock
+ * must be held whilst accessing this data (inculding reads and atomic
+ * decisions based on the read).
+ */
+struct kbasep_js_per_as_data {
+	/**
+	 * Ref count of whether this AS is busy, and must not be scheduled out
+	 *
+	 * When jobs are running this is always positive. However, it can still be
+	 * positive when no jobs are running. If all you need is a heuristic to
+	 * tell you whether jobs might be running, this should be sufficient.
+	 */
+	int as_busy_refcount;
+
+	/** Pointer to the current context on this address space, or NULL for no context */
+	struct kbase_context *kctx;
+};
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/** Sub-structure to collect together Job Scheduling data used in IRQ context */
+	struct runpool_irq {
+		/**
+		 * Lock for accessing Job Scheduling data used in IRQ context
+		 *
+		 * This lock must be held whenever this data is accessed (read, or
+		 * write). Even for read-only access, memory barriers would be needed.
+		 * In any case, it is likely that decisions based on only reading must
+		 * also be atomic with respect to data held here and elsewhere in the
+		 * Job Scheduler.
+		 *
+		 * This lock must also be held for accessing:
+		 * - kbase_context::as_nr
+		 * - kbase_device::jm_slots
+		 * - Parts of the kbasep_js_policy, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 */
+		spinlock_t lock;
+
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
+		 *
+		 * It is placed here because it's much more memory efficient than having a u8 in
+		 * struct kbasep_js_per_as_data to store this flag  */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/** Data that is unique for each AS */
+		struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+
+	u16 as_free;				/**< Bitpattern of free Address Spaces */
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/**
+	 * Policy-specific information.
+	 *
+	 * Refer to the structure defined by the current policy to determine which
+	 * locks must be held when accessing this.
+	 */
+	union kbasep_js_policy policy;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+	u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
+	u32 cfs_ctx_runtime_min_slices;	 /**< Value for  DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
+
+	/**< Value for JS_SOFT_JOB_TIMEOUT */
+	atomic_t soft_job_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+	/**
+	 * Runpool substructure. This must only be accessed whilst the Run Pool
+	 * mutex ( kbasep_js_device_data::runpool_mutex ) is held.
+	 *
+	 * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be
+	 * held for certain sub-members.
+	 *
+	 * @note some of the members could be moved into struct kbasep_js_device_data for
+	 * improved d-cache/tlb efficiency.
+	 */
+	struct {
+		union kbasep_js_policy_ctx_info policy_ctx;	/**< Policy-specific context */
+	} runpool;
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		kbase_context_flags flags;
+		/* NOTE: Unify the following flags into kbase_context_flags */
+		/**
+		 * Is the context scheduled on the Run Pool?
+		 *
+		 * This is only ever updated whilst the jsctx_mutex is held.
+		 */
+		bool is_scheduled;
+		/**
+		 * Wait queue to wait for is_scheduled state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		bool is_dying;			/**< Is the context in the process of being evicted? */
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/** Job Slot to retry submitting to if submission from IRQ handler failed */
+	int retry_submit_on_slot;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
new file mode 100755
index 0000000..debd011
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
@@ -0,0 +1,763 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_policy.h
+ * Job Scheduler Policy APIs.
+ */
+
+#ifndef _KBASE_JS_POLICY_H_
+#define _KBASE_JS_POLICY_H_
+
+/**
+ * @page page_kbase_js_policy Job Scheduling Policies
+ * The Job Scheduling system is described in the following:
+ * - @subpage page_kbase_js_policy_overview
+ * - @subpage page_kbase_js_policy_operation
+ *
+ * The API details are as follows:
+ * - @ref kbase_jm
+ * - @ref kbase_js
+ * - @ref kbase_js_policy
+ */
+
+/**
+ * @page page_kbase_js_policy_overview Overview of the Policy System
+ *
+ * The Job Scheduler Policy manages:
+ * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs)
+ * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the
+ * GPU's Job Slots (\em JSs).
+ * - The amount of \em time a context is assigned to (<em>scheduled on</em>) an
+ * Address Space
+ * - The amount of \em time a Job spends running on the GPU
+ *
+ * The Policy implements this management via 2 components:
+ * - A Policy Queue, which manages a set of contexts that are ready to run,
+ * but not currently running.
+ * - A Policy Run Pool, which manages the currently running contexts (one per Address
+ * Space) and the jobs to run on the Job Slots.
+ *
+ * Each Graphics Process in the system has at least one KBase Context. Therefore,
+ * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on
+ * the GPU.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted"
+ *
+ * The main operations on the queue are:
+ * - Enqueuing a Context to it
+ * - Dequeuing a Context from it, to run it.
+ * - Note: requeuing a context is much the same as enqueuing a context, but
+ * occurs when a context is scheduled out of the system to allow other contexts
+ * to run.
+ *
+ * These operations have much the same meaning for the Run Pool - Jobs are
+ * dequeued to run on a Jobslot, and requeued when they are scheduled out of
+ * the GPU.
+ *
+ * @note This is an over-simplification of the Policy APIs - there are more
+ * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue
+ * takes at least two function calls: one to Dequeue from the Queue, one to add
+ * to the Run Pool.
+ *
+ * As indicated on the diagram, Jobs permanently leave the scheduling system
+ * when they are completed, otherwise they get dequeued/requeued until this
+ * happens. Similarly, Contexts leave the scheduling system when their jobs
+ * have all completed. However, Contexts may later return to the scheduling
+ * system (not shown on the diagram) if more Bags of Jobs are submitted to
+ * them.
+ */
+
+/**
+ * @page page_kbase_js_policy_operation Policy Operation
+ *
+ * We describe the actions that the Job Scheduler Core takes on the Policy in
+ * the following cases:
+ * - The IRQ Path
+ * - The Job Submission Path
+ * - The High Priority Job Submission Path
+ *
+ * This shows how the Policy APIs will be used by the Job Scheduler core.
+ *
+ * The following diagram shows an example Policy that contains a Low Priority
+ * queue, and a Real-time (High Priority) Queue. The RT queue is examined
+ * before the LowP one on dequeuing from the head. The Low Priority Queue is
+ * ordered by time, and the RT queue is ordered by time weighted by
+ * RT-priority. In addition, it shows that the Job Scheduler Core will start a
+ * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The
+ * Soft-Stop time is set by a global configuration value, and must be a value
+ * appropriate for the policy. For example, this could include "don't run a
+ * soft-stop timer" for a First-Come-First-Served (FCFS) policy.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core."
+ *
+ * @section sec_kbase_js_policy_operation_prio Dealing with Priority
+ *
+ * Priority applies separately to a context as a whole, and to the jobs within
+ * a context. The jobs specify a priority in the base_jd_atom::prio member, but
+ * it is independent of the context priority. That is, it only affects
+ * scheduling of atoms within a context. Refer to @ref base_jd_prio for more
+ * details. The meaning of the context's priority value is up to the policy
+ * itself, and could be a logarithmic scale instead of a linear scale (e.g. the
+ * policy could implement an increase/decrease in priority by 1 results in an
+ * increase/decrease in \em proportion of time spent scheduled in by 25%, an
+ * effective change in timeslice by 11%).
+ *
+ * It is up to the policy whether a boost in priority boosts the priority of
+ * the entire context (e.g. to such an extent where it may pre-empt other
+ * running contexts). If it chooses to do this, the Policy must make sure that
+ * only jobs from high-priority contexts are run, and that the context is
+ * scheduled out once only jobs from low priority contexts remain. This ensures
+ * that the low priority contexts do not gain from the priority boost, yet they
+ * still get scheduled correctly with respect to other low priority contexts.
+ *
+ *
+ * @section sec_kbase_js_policy_operation_irq IRQ Path
+ *
+ * The following happens on the IRQ path from the Job Scheduler Core:
+ * - Note the slot that completed (for later)
+ * - Log the time spent by the job (and implicitly, the time spent by the
+ * context)
+ *  - call kbasep_js_policy_log_job_result() <em>in the context of the irq
+ * handler.</em>
+ *  - This must happen regardless of whether the job completed successfully or
+ * not (otherwise the context gets away with DoS'ing the system with faulty jobs)
+ * - What was the result of the job?
+ *  - If Completed: job is just removed from the system
+ *  - If Hard-stop or failure: job is removed from the system
+ *  - If Soft-stop: queue the book-keeping work onto a work-queue: have a
+ * work-queue call kbasep_js_policy_enqueue_job()
+ * - Check the timeslice used by the owning context
+ *  - call kbasep_js_policy_should_remove_ctx() <em>in the context of the irq
+ * handler.</em>
+ *  - If this returns true, clear the "allowed" flag.
+ * - Check the ctx's flags for "allowed", "has jobs to run" and "is running
+ * jobs"
+ * - And so, should the context stay scheduled in?
+ *  - If No, push onto a work-queue the work of scheduling out the old context,
+ * and getting a new one. That is:
+ *   - kbasep_js_policy_runpool_remove_ctx() on old_ctx
+ *   - kbasep_js_policy_enqueue_ctx() on old_ctx
+ *   - kbasep_js_policy_dequeue_head_ctx() to get new_ctx
+ *   - kbasep_js_policy_runpool_add_ctx() on new_ctx
+ *   - (all of this work is deferred on a work-queue to keep the IRQ handler quick)
+ * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job:
+ *  - kbasep_js_policy_dequeue_job() <em>in the context of the irq
+ * handler</em> with core_req set to that of the completing slot
+ *  - if this returned true, submit the job to the completed slot.
+ *  - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *  - If kbasep_js_policy_dequeue_job() returned false, submit some work to
+ * the work-queue to retry from outside of IRQ context (calling
+ * kbasep_js_policy_dequeue_job() from a work-queue).
+ *
+ * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT,
+ * this sequence could loop a large number of times: this could happen if
+ * the jobs submitted completed on the GPU very quickly (in a few cycles), such
+ * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take
+ * more jobs, causing us to loop until we run out of jobs.
+ *
+ * To mitigate this, we must limit the number of jobs submitted per slot during
+ * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should
+ * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For
+ * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per
+ * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs
+ * could complete but the IRQ for each of them is delayed by the throttling. By
+ * the time you get the IRQ, all 6 jobs could've completed, meaning you can
+ * submit jobs to fill all 6 HEAD+NEXT registers again.
+ *
+ * @note As much work is deferred as possible, which includes the scheduling
+ * out of a context and scheduling in a new context. However, we can still make
+ * starting a single high-priorty context quick despite this:
+ * - On Mali-T600 family, there is one more AS than JSs.
+ * - This means we can very quickly schedule out one AS, no matter what the
+ * situation (because there will always be one AS that's not currently running
+ * on the job slot - it can only have a job in the NEXT register).
+ *  - Even with this scheduling out, fair-share can still be guaranteed e.g. by
+ * a timeline-based Completely Fair Scheduler.
+ * - When our high-priority context comes in, we can do this quick-scheduling
+ * out immediately, and then schedule in the high-priority context without having to block.
+ * - This all assumes that the context to schedule out is of lower
+ * priority. Otherwise, we will have to block waiting for some other low
+ * priority context to finish its jobs. Note that it's likely (but not
+ * impossible) that the high-priority context \b is running jobs, by virtue of
+ * it being high priority.
+ * - Therefore, we can give a high liklihood that on Mali-T600 at least one
+ * high-priority context can be started very quickly. For the general case, we
+ * can guarantee starting (no. ASs) - (no. JSs) high priority contexts
+ * quickly. In any case, there is a high likelihood that we're able to start
+ * more than one high priority context quickly.
+ *
+ * In terms of the functions used in the IRQ handler directly, these are the
+ * perfomance considerations:
+ * - kbase_js_policy_log_job_result():
+ *  - This is just adding to a 64-bit value (possibly even a 32-bit value if we
+ * only store the time the job's recently spent - see below on 'priority weighting')
+ *  - For priority weighting, a divide operation ('div') could happen, but
+ * this can happen in a deferred context (outside of IRQ) when scheduling out
+ * the ctx; as per our Engineering Specification, the contexts of different
+ * priority still stay scheduled in for the same timeslice, but higher priority
+ * ones scheduled back in more often.
+ *  - That is, the weighted and unweighted times must be stored separately, and
+ * the weighted time is only updated \em outside of IRQ context.
+ *  - Of course, this divide is more likely to be a 'multiply by inverse of the
+ * weight', assuming that the weight (priority) doesn't change.
+ * - kbasep_js_policy_should_remove_ctx():
+ *  - This is usually just a comparison of the stored time value against some
+ * maximum value.
+ *
+ * @note all deferred work can be wrapped up into one call - we usually need to
+ * indicate that a job/bag is done outside of IRQ context anyway.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit Submission path
+ *
+ * Start with a Context with no jobs present, and assume equal priority of all
+ * contexts in the system. The following work all happens outside of IRQ
+ * Context :
+ * - As soon as job is made 'ready to 'run', then is must be registerd with the Job
+ * Scheduler Policy:
+ *  - 'Ready to run' means they've satisified their dependencies in the
+ * Kernel-side Job Dispatch system.
+ *  - Call kbasep_js_policy_enqueue_job()
+ *  - This indicates that the job should be scheduled (it is ready to run).
+ * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs
+ * 'ready to run', we enqueue the context on the policy queue:
+ *  - Call kbasep_js_policy_enqueue_ctx()
+ *  - This indicates that the \em ctx should be scheduled (it is ready to run)
+ *
+ * Next, we need to handle adding a context to the Run Pool - if it's sensible
+ * to do so. This can happen due to two reasons:
+ * -# A context is enqueued as above, and there are ASs free for it to run on
+ * (e.g. it is the first context to be run, in which case it can be added to
+ * the Run Pool immediately after enqueuing on the Policy Queue)
+ * -# A previous IRQ caused another ctx to be scheduled out, requiring that the
+ * context at the head of the queue be scheduled in. Such steps would happen in
+ * a work queue (work deferred from the IRQ context).
+ *
+ * In both cases, we'd handle it as follows:
+ * - Get the context at the Head of the Policy Queue:
+ *  - Call kbasep_js_policy_dequeue_head_ctx()
+ * - Assign the Context an Address Space (Assert that there will be one free,
+ * given the above two reasons)
+ * - Add this context to the Run Pool:
+ *  - Call kbasep_js_policy_runpool_add_ctx()
+ * - Now see if a job should be run:
+ *  - Mostly, this will be done in the IRQ handler at the completion of a
+ * previous job.
+ *  - However, there are two cases where this cannot be done: a) The first job
+ * enqueued to the system (there is no previous IRQ to act upon) b) When jobs
+ * are submitted at a low enough rate to not fill up all Job Slots (or, not to
+ * fill both the 'HEAD' and 'NEXT' registers in the job-slots)
+ *  - Hence, on each ctx <b>and job</b> submission we should try to see if we
+ * can run a job:
+ *  - For each job slot that has free space (in NEXT or HEAD+NEXT registers):
+ *   - Call kbasep_js_policy_dequeue_job() with core_req set to that of the
+ * slot
+ *   - if we got one, submit it to the job slot.
+ *   - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *
+ * The above case shows that we should attempt to run jobs in cases where a) a ctx
+ * has been added to the Run Pool, and b) new jobs have been added to a context
+ * in the Run Pool:
+ * - In the latter case, the context is in the runpool because it's got a job
+ * ready to run, or is already running a job
+ * - We could just wait until the IRQ handler fires, but for certain types of
+ * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs
+ * generally take much longer to run that GLES CS jobs, which are vertex shader
+ * jobs.
+ * - Therefore, when a new job appears in the ctx, we must check the job-slots
+ * to see if they're free, and run the jobs as before.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts
+ *
+ * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of
+ * them can be scheduled in immediately to start high prioriy jobs. In general,
+ * (no. ASs) - (no JSs) high priority contexts may be started immediately. The
+ * following describes how this happens:
+ *
+ * Similar to the previous section, consider what happens with a high-priority
+ * context (a context with a priority higher than that of any in the Run Pool)
+ * that starts out with no jobs:
+ * - A job becomes ready to run on the context, and so we enqueue the context
+ * on the Policy's Queue.
+ * - However, we'd like to schedule in this context immediately, instead of
+ * waiting for one of the Run Pool contexts' timeslice to expire
+ * - The policy's Enqueue function must detect this (because it is the policy
+ * that embodies the concept of priority), and take appropriate action
+ *  - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run
+ * Pool to see if a lower priority context should be scheduled out, and then
+ * schedule in the High Priority context.
+ *  - For Mali-T600, we can always pick a context to schedule out immediately
+ * (because there are more ASs than JSs), and so scheduling out a victim context
+ * and scheduling in the high priority context can happen immediately.
+ *   - If a policy implements fair-sharing, then this can still ensure the
+ * victim later on gets a fair share of the GPU.
+ *   - As a note, consider whether the victim can be of equal/higher priority
+ * than the incoming context:
+ *   - Usually, higher priority contexts will be the ones currently running
+ * jobs, and so the context with the lowest priority is usually not running
+ * jobs.
+ *   - This makes it likely that the victim context is low priority, but
+ * it's not impossible for it to be a high priority one:
+ *    - Suppose 3 high priority contexts are submitting only FS jobs, and one low
+ * priority context submitting CS jobs. Then, the context not running jobs will
+ * be one of the hi priority contexts (because only 2 FS jobs can be
+ * queued/running on the GPU HW for Mali-T600).
+ *   - The problem can be mitigated by extra action, but it's questionable
+ * whether we need to: we already have a high likelihood that there's at least
+ * one high priority context - that should be good enough.
+ *   - And so, this method makes sure that at least one high priority context
+ * can be started very quickly, but more than one high priority contexts could be
+ * delayed (up to one timeslice).
+ *   - To improve this, use a GPU with a higher number of Address Spaces vs Job
+ * Slots.
+ * - At this point, let's assume this high priority context has been scheduled
+ * in immediately. The next step is to ensure it can start some jobs quickly.
+ *  - It must do this by Soft-Stopping jobs on any of the Job Slots that it can
+ * submit to.
+ *  - The rest of the logic for starting the jobs is taken care of by the IRQ
+ * handler. All the policy needs to do is ensure that
+ * kbasep_js_policy_dequeue_job() will return the jobs from the high priority
+ * context.
+ *
+ * @note in SS state, we currently only use 2 job-slots (even for T608, but
+ * this might change in future). In this case, it's always possible to schedule
+ * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same
+ * time, this maximizes usage of the job-slots (only 2 are in use), because you
+ * can guarantee starting of the jobs from the High Priority contexts immediately too.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_notes Notes
+ *
+ * - In this design, a separate 'init' is needed from dequeue/requeue, so that
+ * information can be retained between the dequeue/requeue calls. For example,
+ * the total time spent for a context/job could be logged between
+ * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just
+ * initializes that information to some known state.
+ *
+ *
+ *
+ */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js_policy Job Scheduler Policy APIs
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy structure
+ */
+union kbasep_js_policy;
+
+/**
+ * @brief Initialize the Job Scheduler Policy
+ */
+int kbasep_js_policy_init(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler Policy
+ */
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy);
+
+/**
+ * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Ctx Info structure
+ *
+ * This structure is embedded in the struct kbase_context structure. It is used to:
+ * - track information needed for the policy to schedule the context (e.g. time
+ * used, OS priority etc.)
+ * - link together kbase_contexts into a queue, so that a struct kbase_context can be
+ * obtained as the container of the policy ctx info. This allows the API to
+ * return what "the next context" should be.
+ * - obtain other information already stored in the struct kbase_context for
+ * scheduling purposes (e.g process ID to get the priority of the originating
+ * process)
+ */
+union kbasep_js_policy_ctx_info;
+
+/**
+ * @brief Initialize a ctx for use with the Job Scheduler Policy
+ *
+ * This effectively initializes the union kbasep_js_policy_ctx_info structure within
+ * the struct kbase_context (itself located within the kctx->jctx.sched_info structure).
+ */
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Terminate resources associated with using a ctx in the Job Scheduler
+ * Policy.
+ */
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Enqueue a context onto the Job Scheduler Policy Queue
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out a low
+ * priority context from the Run Pool to allow the high priority context to be
+ * scheduled in.
+ *
+ * If the context has the privileged flag set, it will always be kept at the
+ * head of the queue.
+ *
+ * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ */
+void kbasep_js_policy_enqueue_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if a context was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no contexts were available.
+ */
+bool kbasep_js_policy_dequeue_head_ctx(union kbasep_js_policy *js_policy, struct kbase_context ** const kctx_ptr);
+
+/**
+ * @brief Evict a context from the Job Scheduler Policy Queue
+ *
+ * This is only called as part of destroying a kbase_context.
+ *
+ * There are many reasons why this might fail during the lifetime of a
+ * context. For example, the context is in the process of being scheduled. In
+ * that case a thread doing the scheduling might have a pointer to it, but the
+ * context is neither in the Policy Queue, nor is it in the Run
+ * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself
+ * are locked.
+ *
+ * Hence to find out where in the system the context is, it is important to do
+ * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member.
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if the context was evicted from the Policy Queue
+ * @return false if the context was not found in the Policy Queue
+ */
+bool kbasep_js_policy_try_evict_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Call a function on all jobs belonging to a non-queued, non-running
+ * context, optionally detaching the jobs from the context as it goes.
+ *
+ * At the time of the call, the context is guarenteed to be not-currently
+ * scheduled on the Run Pool (is_scheduled == false), and not present in
+ * the Policy Queue. This is because one of the following functions was used
+ * recently on the context:
+ * - kbasep_js_policy_evict_ctx()
+ * - kbasep_js_policy_runpool_remove_ctx()
+ *
+ * In both cases, no subsequent call was made on the context to any of:
+ * - kbasep_js_policy_runpool_add_ctx()
+ * - kbasep_js_policy_enqueue_ctx()
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * \a detach_jobs must only be set when cancelling jobs (which occurs as part
+ * of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx,
+	kbasep_js_policy_ctx_job_cb callback, bool detach_jobs);
+
+/**
+ * @brief Add a context to the Job Scheduler Policy's Run Pool
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out low
+ * priority jobs that are currently running on the GPU.
+ *
+ * The number of contexts present in the Run Pool will never be more than the
+ * number of Address Spaces.
+ *
+ * The following guarentees are made about the state of the system when this
+ * is called:
+ * - kctx->as_nr member is valid
+ * - the context has its submit_allowed flag set
+ * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid
+ * - The refcount of the context is guarenteed to be zero.
+ * - kbasep_js_kctx_info::ctx::is_scheduled will be true.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Remove a context from the Job Scheduler Policy's Run Pool
+ *
+ * The kctx->as_nr member is valid and the context has its submit_allowed flag
+ * set when this is called. The state of
+ * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also
+ * valid. The refcount of the context is guarenteed to be zero.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Indicate whether a context should be removed from the Run Pool
+ * (should be scheduled out).
+ *
+ * The kbasep_js_device_data::runpool_irq::lock will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ */
+bool kbasep_js_policy_should_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Synchronize with any timers acting upon the runpool
+ *
+ * The policy should check whether any timers it owns should be running. If
+ * they should not, the policy must cancel such timers and ensure they are not
+ * re-run by the time this function finishes.
+ *
+ * In particular, the timers must not be running when there are no more contexts
+ * on the runpool, because the GPU could be powered off soon after this call.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ */
+void kbasep_js_policy_runpool_timers_sync(union kbasep_js_policy *js_policy);
+
+
+/**
+ * @brief Indicate whether a new context has an higher priority than the current context.
+ *
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx
+ *
+ * This function must not sleep, because an IRQ spinlock might be held whilst
+ * this is called.
+ *
+ * @note There is nothing to stop the priority of \a current_ctx changing
+ * during or immediately after this function is called (because its jsctx_mutex
+ * cannot be held). Therefore, this function should only be seen as a heuristic
+ * guide as to whether \a new_ctx is higher priority than \a current_ctx
+ */
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx);
+
+	  /** @} *//* end group kbase_js_policy_ctx */
+
+/**
+ * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Job Info structure
+ *
+ * This structure is embedded in the struct kbase_jd_atom structure. It is used to:
+ * - track information needed for the policy to schedule the job (e.g. time
+ * used, etc.)
+ * - link together jobs into a queue/buffer, so that a struct kbase_jd_atom can be
+ * obtained as the container of the policy job info. This allows the API to
+ * return what "the next job" should be.
+ */
+union kbasep_js_policy_job_info;
+
+/**
+ * @brief Initialize a job for use with the Job Scheduler Policy
+ *
+ * This function initializes the union kbasep_js_policy_job_info structure within the
+ * kbase_jd_atom. It will only initialize/allocate resources that are specific
+ * to the job.
+ *
+ * That is, this function makes \b no attempt to:
+ * - initialize any context/policy-wide information
+ * - enqueue the job on the policy.
+ *
+ * At some later point, the following functions must be called on the job, in this order:
+ * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data.
+ * - kbasep_js_policy_enqueue_job() to enqueue the job
+ *
+ * A job must only ever be initialized on the Policy once, and must be
+ * terminated on the Policy before the job is freed.
+ *
+ * The caller will not be holding any locks, and so this function will not
+ * modify any information in \a kctx or \a js_policy.
+ *
+ * @return 0 if initialization was correct.
+ */
+int kbasep_js_policy_init_job(const union kbasep_js_policy *js_policy, const struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Register context/policy-wide information for a job on the Job Scheduler Policy.
+ *
+ * Registers the job with the policy. This is used to track the job before it
+ * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically,
+ * it is used to update information under a lock that could not be updated at
+ * kbasep_js_policy_init_job() time (such as context/policy-wide data).
+ *
+ * @note This function will not fail, and hence does not allocate any
+ * resources. Any failures that could occur on registration will be caught
+ * during kbasep_js_policy_init_job() instead.
+ *
+ * A job must only ever be registerd on the Policy once, and must be
+ * deregistered on the Policy on completion (whether or not that completion was
+ * success/failure).
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_register_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief De-register context/policy-wide information for a on the Job Scheduler Policy.
+ *
+ * This must be used before terminating the resources associated with using a
+ * job in the Job Scheduler Policy. This function does not itself terminate any
+ * resources, at most it just updates information in the policy and context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool
+ *
+ * The job returned by the policy will match at least one of the bits in the
+ * job slot's core requirements (but it may match more than one, or all @ref
+ * base_jd_core_req bits supported by the job slot).
+ *
+ * In addition, the requirements of the job returned will be a subset of those
+ * requested - the job returned will not have requirements that \a job_slot_idx
+ * cannot satisfy.
+ *
+ * The caller will submit the job to the GPU as soon as the GPU's NEXT register
+ * for the corresponding slot is empty. Of course, the GPU will then only run
+ * this new job when the currently executing job (in the jobslot's HEAD
+ * register) has completed.
+ *
+ * @return true if a job was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no jobs were available among all ctxs in the Run Pool.
+ *
+ * @note base_jd_core_req is currently a u8 - beware of type conversion.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_lock::irq will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held
+ */
+bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);
+
+/**
+ * @brief Requeue a Job back into the Job Scheduler Policy Run Pool
+ *
+ * This will be used to enqueue a job after its creation and also to requeue
+ * a job into the Run Pool that was previously dequeued (running). It notifies
+ * the policy that the job should be run again at some point later.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Log the result of a job: the time spent on a job/context, and whether
+ * the job failed or not.
+ *
+ * Since a struct kbase_jd_atom contains a pointer to the struct kbase_context owning it,
+ * then this can also be used to log time on either/both the job and the
+ * containing context.
+ *
+ * The completion state of the job can be found by examining \a katom->event.event_code
+ *
+ * If the Job failed and the policy is implementing fair-sharing, then the
+ * policy must penalize the failing job/context:
+ * - At the very least, it should penalize the time taken by the amount of
+ * time spent processing the IRQ in SW. This because a job in the NEXT slot
+ * waiting to run will be delayed until the failing job has had the IRQ
+ * cleared.
+ * - \b Optionally, the policy could apply other penalties. For example, based
+ * on a threshold of a number of failing jobs, after which a large penalty is
+ * applied.
+ *
+ * The kbasep_js_device_data::runpool_mutex will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock will be held.
+ *
+ * @param js_policy     job scheduler policy
+ * @param katom         job dispatch atom
+ * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds).
+ */
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us);
+
+	  /** @} *//* end group kbase_js_policy_job */
+
+	  /** @} *//* end group kbase_js_policy */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_POLICY_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
new file mode 100755
index 0000000..90c1345
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
@@ -0,0 +1,292 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler: Completely Fair Policy Implementation
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_js_policy_cfs.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+#include <linux/sched/rt.h>
+#endif
+
+/**
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is enabled for a particular level is down to the integrator.
+ * However this is being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/* Fixed point constants used for runtime weight calculations */
+#define WEIGHT_FIXEDPOINT_SHIFT 10
+#define WEIGHT_TABLE_SIZE       40
+#define WEIGHT_0_NICE           (WEIGHT_TABLE_SIZE/2)
+#define WEIGHT_0_VAL            (1 << WEIGHT_FIXEDPOINT_SHIFT)
+
+#define PROCESS_PRIORITY_MIN (-20)
+#define PROCESS_PRIORITY_MAX  (19)
+
+/* Defines for easy asserts 'is scheduled'/'is queued'/'is neither queued norscheduled' */
+#define KBASEP_JS_CHECKFLAG_QUEUED       (1u << 0) /**< Check the queued state */
+#define KBASEP_JS_CHECKFLAG_SCHEDULED    (1u << 1) /**< Check the scheduled state */
+#define KBASEP_JS_CHECKFLAG_IS_QUEUED    (1u << 2) /**< Expect queued state to be set */
+#define KBASEP_JS_CHECKFLAG_IS_SCHEDULED (1u << 3) /**< Expect scheduled state to be set */
+
+enum {
+	KBASEP_JS_CHECK_NOTQUEUED = KBASEP_JS_CHECKFLAG_QUEUED,
+	KBASEP_JS_CHECK_NOTSCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED,
+	KBASEP_JS_CHECK_QUEUED = KBASEP_JS_CHECKFLAG_QUEUED | KBASEP_JS_CHECKFLAG_IS_QUEUED,
+	KBASEP_JS_CHECK_SCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED | KBASEP_JS_CHECKFLAG_IS_SCHEDULED
+};
+
+typedef u32 kbasep_js_check;
+
+/*
+ * Private Functions
+ */
+
+/* Table autogenerated using util built from: base/tools/gen_cfs_weight_of_prio/ */
+
+/* weight = 1.25 */
+static const int weight_of_priority[] = {
+	/*  -20 */ 11, 14, 18, 23,
+	/*  -16 */ 29, 36, 45, 56,
+	/*  -12 */ 70, 88, 110, 137,
+	/*   -8 */ 171, 214, 268, 335,
+	/*   -4 */ 419, 524, 655, 819,
+	/*    0 */ 1024, 1280, 1600, 2000,
+	/*    4 */ 2500, 3125, 3906, 4883,
+	/*    8 */ 6104, 7630, 9538, 11923,
+	/*   12 */ 14904, 18630, 23288, 29110,
+	/*   16 */ 36388, 45485, 56856, 71070
+};
+
+/*
+ * Note: There is nothing to stop the priority of the ctx containing
+ * ctx_info changing during or immediately after this function is called
+ * (because its jsctx_mutex cannot be held during IRQ). Therefore, this
+ * function should only be seen as a heuristic guide as to the priority weight
+ * of the context.
+ */
+static u64 priority_weight(struct kbasep_js_policy_cfs_ctx *ctx_info, u64 time_us)
+{
+	u64 time_delta_us;
+	int priority;
+
+	priority = ctx_info->process_priority;
+
+	/* Adjust runtime_us using priority weight if required */
+	if (priority != 0 && time_us != 0) {
+		int clamped_priority;
+
+		/* Clamp values to min..max weights */
+		if (priority > PROCESS_PRIORITY_MAX)
+			clamped_priority = PROCESS_PRIORITY_MAX;
+		else if (priority < PROCESS_PRIORITY_MIN)
+			clamped_priority = PROCESS_PRIORITY_MIN;
+		else
+			clamped_priority = priority;
+
+		/* Fixed point multiplication */
+		time_delta_us = (time_us * weight_of_priority[WEIGHT_0_NICE + clamped_priority]);
+		/* Remove fraction */
+		time_delta_us = time_delta_us >> WEIGHT_FIXEDPOINT_SHIFT;
+		/* Make sure the time always increases */
+		if (0 == time_delta_us)
+			time_delta_us++;
+	} else {
+		time_delta_us = time_us;
+	}
+
+	return time_delta_us;
+}
+
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_policy_trace_get_refcnt_nolock(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+
+/*
+ * Non-private functions
+ */
+
+int kbasep_js_policy_init(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs *policy_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+	policy_info = &js_devdata->policy.cfs;
+
+	atomic64_set(&policy_info->least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+	atomic64_set(&policy_info->rt_least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+
+	policy_info->head_runtime_us = 0;
+
+	return 0;
+}
+
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy)
+{
+	CSTD_UNUSED(js_policy);
+}
+
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	int policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	policy_info = &kbdev->js_data.policy.cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_INIT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	policy = current->policy;
+	if (policy == SCHED_FIFO || policy == SCHED_RR) {
+		ctx_info->process_rt_policy = true;
+		ctx_info->process_priority = (((MAX_RT_PRIO - 1) - current->rt_priority) / 5) - 20;
+	} else {
+		ctx_info->process_rt_policy = false;
+		ctx_info->process_priority = (current->static_prio - MAX_RT_PRIO) - 20;
+	}
+
+	/* Initial runtime (relative to least-run context runtime)
+	 *
+	 * This uses the Policy Queue's most up-to-date head_runtime_us by using the
+	 * queue mutex to issue memory barriers - also ensure future updates to
+	 * head_runtime_us occur strictly after this context is initialized */
+	mutex_lock(&js_devdata->queue_mutex);
+
+	/* No need to hold the the runpool_irq.lock here, because we're initializing
+	 * the value, and the context is definitely not being updated in the
+	 * runpool at this point. The queue_mutex ensures the memory barrier. */
+	ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u));
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return 0;
+}
+
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	/* No work to do */
+}
+
+/*
+ * Job Chain Management
+ */
+
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbase_context *parent_ctx;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	CSTD_UNUSED(js_policy);
+
+	parent_ctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(parent_ctx != NULL);
+
+	ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	ctx_info->runtime_us += priority_weight(ctx_info, time_spent_us);
+
+	katom->time_spent_us += time_spent_us;
+}
+
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx)
+{
+	struct kbasep_js_policy_cfs_ctx *current_ctx_info;
+	struct kbasep_js_policy_cfs_ctx *new_ctx_info;
+
+	KBASE_DEBUG_ASSERT(current_ctx != NULL);
+	KBASE_DEBUG_ASSERT(new_ctx != NULL);
+	CSTD_UNUSED(js_policy);
+
+	current_ctx_info = &current_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+	new_ctx_info = &new_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	if (!current_ctx_info->process_rt_policy && new_ctx_info->process_rt_policy)
+		return true;
+
+	if (current_ctx_info->process_rt_policy ==
+			new_ctx_info->process_rt_policy)
+		return true;
+
+	return false;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
new file mode 100755
index 0000000..b457d82
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Completely Fair Job Scheduler Policy structure definitions
+ */
+
+#ifndef _KBASE_JS_POLICY_CFS_H_
+#define _KBASE_JS_POLICY_CFS_H_
+
+#define KBASEP_JS_RUNTIME_EMPTY ((u64)-1)
+
+/**
+ * struct kbasep_js_policy_cfs - Data for the CFS policy
+ * @head_runtime_us:  Number of microseconds the least-run context has been
+ *                    running for. The kbasep_js_device_data.queue_mutex must
+ *                    be held whilst updating this.
+ *                    Reads are possible without this mutex, but an older value
+ *                    might be read if no memory barries are issued beforehand.
+ * @least_runtime_us: Number of microseconds the least-run context in the
+ *                    context queue has been running for.
+ *                    -1 if context queue is empty.
+ * @rt_least_runtime_us: Number of microseconds the least-run context in the
+ *                       realtime (priority) context queue has been running for.
+ *                       -1 if realtime context queue is empty
+ */
+struct kbasep_js_policy_cfs {
+	u64 head_runtime_us;
+	atomic64_t least_runtime_us;
+	atomic64_t rt_least_runtime_us;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_ctx - a single linked list of all contexts
+ * @runtime_us:        microseconds this context has been running for
+ * @process_rt_policy: set if calling process policy scheme is a realtime
+ *                     scheduler and will use the priority queue. Non-mutable
+ *                     after ctx init
+ * @process_priority:  calling process NICE priority, in the range -20..19
+ *
+ * &kbasep_js_device_data.runpool_irq.lock must be held when updating
+ * @runtime_us. Initializing will occur on context init and context enqueue
+ * (which can only occur in one thread at a time), but multi-thread access only
+ * occurs while the context is in the runpool.
+ *
+ * Reads are possible without the spinlock, but an older value might be read if
+ * no memory barries are issued beforehand.
+ */
+struct kbasep_js_policy_cfs_ctx {
+	u64 runtime_us;
+	bool process_rt_policy;
+	int process_priority;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_job - per job information for CFS
+ * @ticks: number of ticks that this job has been executing for
+ *
+ * &kbasep_js_device_data.runpool_irq.lock must be held when accessing @ticks.
+ */
+struct kbasep_js_policy_cfs_job {
+	u32 ticks;
+};
+
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100755
index 0000000..6d1e61f
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100755
index 0000000..709ff3e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,2490 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+/**
+ * @brief Check the zone compatibility of two regions.
+ */
+static int kbase_region_tracker_match_zone(struct kbase_va_region *reg1,
+		struct kbase_va_region *reg2)
+{
+	return ((reg1->flags & KBASE_REG_ZONE_MASK) ==
+		(reg2->flags & KBASE_REG_ZONE_MASK));
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_match_zone);
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx, struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = &(kctx->reg_rbtree.rb_node);
+	struct rb_node *parent = NULL;
+
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+	rb_insert_color(&(new_reg->rblink), &(kctx->reg_rbtree));
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+	rbnode = rb_first(&(kctx->reg_rbtree));
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE) &&
+				kbase_region_tracker_match_zone(reg, reg_reqs)) {
+			/* Check alignment */
+			u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+				return reg;
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if ((prev->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(prev, reg)) {
+			/* We're compatible with the previous VMA, merge with it */
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if ((next->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(next, reg)) {
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), &(kctx->reg_rbtree));
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	int err = 0;
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), &(kctx->reg_rbtree));
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		if ((!kbase_region_tracker_match_zone(tmp, reg)) ||
+				(!(tmp->flags & KBASE_REG_FREE))) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.  */
+	{
+		u64 start_pfn;
+
+		/*
+		 * Depending on the zone the allocation request is for
+		 * we might need to retry it.
+		 */
+		do {
+			tmp = kbase_region_tracker_find_region_meeting_reqs(
+					kctx, reg, nr_pages, align);
+			if (tmp) {
+				start_pfn = (tmp->start_pfn + align - 1) &
+						~(align - 1);
+				err = kbase_insert_va_region_nolock(kctx, reg,
+						tmp, start_pfn, nr_pages);
+				break;
+			}
+
+			/*
+			 * If the allocation is not from the same zone as JIT
+			 * then don't retry, we're out of VA and there is
+			 * nothing which can be done about it.
+			 */
+			if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+					KBASE_REG_ZONE_CUSTOM_VA)
+				break;
+		} while (kbase_jit_evict(kctx));
+
+		if (!tmp)
+			err = -ENOMEM;
+	}
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *exec_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* exec and custom_va_reg doesn't always exist */
+	if (exec_reg && custom_va_reg) {
+		kbase_region_tracker_insert(kctx, exec_reg);
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+	}
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(&(kctx->reg_rbtree));
+		if (rbnode) {
+			rb_erase(rbnode, &(kctx->reg_rbtree));
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kctx->is_compat)
+		same_va_bits = 32;
+	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+#endif
+
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
+		err = -EINVAL;
+		goto fail_unlock;
+	}
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have exec and custom VA zones */
+	if (kctx->is_compat) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_exec;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_exec:
+	kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
+{
+#ifdef CONFIG_64BIT
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits;
+	u64 total_va_size;
+	int err;
+
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	if (kctx->is_compat)
+		return 0;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	if (same_va->nr_pages < jit_va_pages ||
+			kctx->same_va_end < jit_va_pages) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(kctx,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(kctx, custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	return kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL);
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_term(&kbdev->mem_pool);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(reg->kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					page_to_phys(kctx->aliasing_sink_page),
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping_of_region(const struct kbase_va_region *reg, unsigned long uaddr, size_t size)
+{
+	struct kbase_cpu_mapping *map;
+	struct list_head *pos;
+
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	list_for_each(pos, &reg->cpu_alloc->mappings) {
+		map = list_entry(pos, struct kbase_cpu_mapping, mappings_list);
+		if (map->vm_start <= uaddr && map->vm_end >= uaddr + size)
+			return map;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_of_region);
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+	struct kbase_context *kctx, u64 gpu_addr,
+	unsigned long uaddr, size_t size, u64 * offset)
+{
+	struct kbase_cpu_mapping *map = NULL;
+	const struct kbase_va_region *reg;
+	int err = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (reg && !(reg->flags & KBASE_REG_FREE)) {
+		map = kbasep_find_enclosing_cpu_mapping_of_region(reg, uaddr,
+				size);
+		if (map) {
+			*offset = (uaddr - PTR_TO_U64(map->vm_start)) +
+						 (map->page_off << PAGE_SHIFT);
+			err = 0;
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		phys_addr_t cpu_pa, phys_addr_t gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct base_syncset *set, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct basep_syncset *sset = &set->basep_sset;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	phys_addr_t *cpu_pa;
+	phys_addr_t *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	off_t offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	offset = start & (PAGE_SIZE - 1);
+	page_off = map->page_off + ((start - map->vm_start) >> PAGE_SHIFT);
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	/* Sync first page */
+	if (cpu_pa[page_off]) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!cpu_pa[page_off + i])
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 && cpu_pa[page_off + page_count - 1]) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset)
+{
+	int err = -EINVAL;
+	struct basep_syncset *sset;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != syncset);
+
+	sset = &syncset->basep_sset;
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+void kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (flags & BASE_MEM_COHERENT_SYSTEM ||
+			flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	else if (flags & BASE_MEM_COHERENT_LOCAL)
+		reg->flags |= KBASE_REG_SHARE_IN;
+
+	/* Set up default MEMATTR usage */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_update_region_flags);
+
+int kbase_alloc_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t old_page_count = alloc->nents;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+
+	if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool,
+			nr_pages_requested, alloc->pages + old_page_count) != 0)
+		goto no_alloc;
+
+	/*
+	 * Request a zone cache update, this scans only the new pages an
+	 * appends their information to the zone cache. if the update
+	 * fails then clear the cache so we fall-back to doing things
+	 * page by page.
+	 */
+	if (kbase_zone_cache_update(alloc, old_page_count) != 0)
+		kbase_zone_cache_clear(alloc);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)alloc->imported.kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+no_alloc:
+	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	return -ENOMEM;
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	phys_addr_t *start_free;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/*
+	 * Clear the zone cache, we don't expect JIT allocations to be
+	 * shrunk in parts so there is no point trying to optimize for that
+	 * by scanning for the changes caused by freeing this memory and
+	 * updating the existing cache entries.
+	 */
+	kbase_zone_cache_clear(alloc);
+
+	kbase_mem_pool_free_pages(&kctx->mem_pool,
+				  nr_pages_to_free,
+				  start_free,
+				  syncback,
+				  reclaimed);
+
+	alloc->nents -= nr_pages_to_free;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, nr_pages_to_free);
+		new_page_count = kbase_atomic_sub_pages(nr_pages_to_free,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(nr_pages_to_free,
+				       &kctx->kbdev->memdev.used_pages);
+
+		kbase_tlstream_aux_pagesalloc(
+				(u32)kctx->id,
+				(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		WARN_ON(!alloc->imported.kctx);
+		/*
+		 * The physical allocation must have been removed from the
+		 * eviction list before trying to free it.
+		 */
+		WARN_ON(!list_empty(&alloc->evict_node));
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+ #ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ump_dd_release(alloc->imported.ump_handle);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
+		kfree(alloc->imported.user_buf.pages);
+		break;
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+#ifdef CONFIG_DEBUG_FS
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_add(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_lock);
+		if (list_empty(&kctx->jit_destroy_head))
+			reg = NULL;
+		else
+			reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		if (reg) {
+			list_del(&reg->jit_node);
+			mutex_unlock(&kctx->jit_lock);
+
+			kbase_gpu_vm_lock(kctx);
+			kbase_mem_free_region(kctx, reg);
+			kbase_gpu_vm_unlock(kctx);
+		} else
+			mutex_unlock(&kctx->jit_lock);
+	} while (reg);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	mutex_init(&kctx->jit_lock);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	return 0;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+	struct kbase_va_region *walker;
+	struct kbase_va_region *temp;
+	size_t current_diff = SIZE_MAX;
+
+	int ret;
+
+	mutex_lock(&kctx->jit_lock);
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+
+		if (walker->nr_pages >= info->va_pages) {
+			size_t min_size, max_size, diff;
+
+			/*
+			 * The JIT allocations VA requirements have been
+			 * meet, it's suitable but other allocations
+			 * might be a better fit.
+			 */
+			min_size = min_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			max_size = max_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			diff = max_size - min_size;
+
+			if (current_diff > diff) {
+				current_diff = diff;
+				reg = walker;
+			}
+
+			/* The allocation is an exact match, stop looking */
+			if (current_diff == 0)
+				break;
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_del_init(&reg->jit_node);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+
+		/* Release the jit lock before modifying the allocation */
+		mutex_unlock(&kctx->jit_lock);
+
+		kbase_gpu_vm_lock(kctx);
+
+		/* Make the physical backing no longer reclaimable */
+		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+			goto update_failed;
+
+		/* Grow the backing if required */
+		if (reg->gpu_alloc->nents < info->commit_pages) {
+			size_t delta;
+			size_t old_size = reg->gpu_alloc->nents;
+
+			/* Allocate some more pages */
+			delta = info->commit_pages - reg->gpu_alloc->nents;
+			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
+					!= 0)
+				goto update_failed;
+
+			if (reg->cpu_alloc != reg->gpu_alloc) {
+				if (kbase_alloc_phy_pages_helper(
+						reg->cpu_alloc, delta) != 0) {
+					kbase_free_phy_pages_helper(
+							reg->gpu_alloc, delta);
+					goto update_failed;
+				}
+			}
+
+			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
+					info->commit_pages, old_size);
+			/*
+			 * The grow failed so put the allocation back in the
+			 * pool and return failure.
+			 */
+			if (ret)
+				goto update_failed;
+		}
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL;
+		u64 gpu_addr;
+		u16 alignment;
+
+		mutex_unlock(&kctx->jit_lock);
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr, &alignment);
+		if (!reg)
+			goto out_unlocked;
+
+		mutex_lock(&kctx->jit_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_lock);
+	}
+
+	return reg;
+
+update_failed:
+	/*
+	 * An update to an allocation from the pool failed, chances
+	 * are slim a new allocation would fair any better so return
+	 * the allocation to the pool and return the function with failure.
+	 */
+	kbase_gpu_vm_unlock(kctx);
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	/* The physical backing of memory in the pool is always reclaimable */
+	down_read(&kctx->process_mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+	kbase_mem_evictable_make(reg->gpu_alloc);
+	kbase_gpu_vm_unlock(kctx);
+	up_read(&kctx->process_mm->mmap_sem);
+
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = reg->kctx;
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_destroy_head);
+	mutex_unlock(&kctx->jit_lock);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	if (reg)
+		kbase_mem_free_region(kctx, reg);
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+
+	kbase_gpu_vm_lock(kctx);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		kbase_mem_free_region(kctx, walker);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		kbase_mem_free_region(kctx, walker);
+	}
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	phys_addr_t *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct mm_struct *mm;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	mm = alloc->imported.user_buf.mm;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#else
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#endif
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = page_to_phys(pages[i]);
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+			kbase_reg_current_backed_size(reg),
+			reg->flags);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	phys_addr_t *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		int j;
+		size_t pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = count;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			kbase_reg_current_backed_size(reg),
+			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+	if (err) {
+		dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+				alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+		alloc->imported.umm.sgt = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \
+		|| defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res,
+		struct kds_resource **kds_resources, u32 *kds_res_count,
+		unsigned long *kds_access_bitmap, bool exclusive)
+{
+	u32 i;
+
+	for (i = 0; i < *kds_res_count; i++) {
+		/* Duplicate resource, ignore */
+		if (kds_resources[i] == kds_res)
+			return;
+	}
+
+	kds_resources[*kds_res_count] = kds_res;
+	if (exclusive)
+		set_bit(*kds_res_count, kds_access_bitmap);
+	(*kds_res_count)++;
+}
+#endif
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+	case KBASE_MEM_TYPE_IMPORTED_UMP: {
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = ump_dd_kds_resource_get(
+					reg->gpu_alloc->imported.ump_handle);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif				/*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+		break;
+	}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = get_dma_buf_kds_resource(
+					reg->gpu_alloc->imported.umm.dma_buf);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL
+#ifdef CONFIG_KDS
+				, NULL, NULL,
+				NULL, false
+#endif
+				);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100755
index 0000000..7b2433e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1057 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	pgoff_t  page_off;
+	int      count;
+	unsigned long vm_start;
+	unsigned long vm_end;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMP,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	phys_addr_t           *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	struct list_head       zone_cache;
+
+	/* member in union valid based on @a type */
+	union {
+#ifdef CONFIG_UMP
+		ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+		struct {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			unsigned int current_mapping_usage_count;
+			struct mm_struct *mm;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* VA managed by us */
+#define KBASE_REG_CUSTOM_VA         (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Aligned for GPU EX in SAME_VA */
+#define KBASE_REG_ALIGNED           (1ul<<15)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+/*
+ * Dedicated 16MB region for shader code:
+ * VA range 0x101000000-0x102000000
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_EXEC_BASE    (0x101000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* non-NULL if this memory object is a kds_resource */
+	struct kds_resource *kds_res;
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+};
+
+/* Common functions */
+static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+	INIT_LIST_HEAD(&alloc->zone_cache);
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+	reg->cpu_alloc->imported.kctx = kctx;
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		reg->gpu_alloc->imported.kctx = kctx;
+		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	INIT_LIST_HEAD(&reg->jit_node);
+	reg->flags &= ~KBASE_REG_FREE;
+	return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @max_size:  Maximum number of free pages the pool can hold
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Finally, allocate a page from the kernel.
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return ACCESS_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above @max_size.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ *
+ * Return: The new size of the pool
+ */
+size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+void kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
+ * @kctx:	Kbase context
+ *
+ * Disable and perform the required cache maintenance to remove the all
+ * data from provided kbase context from the GPU caches.
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified
+ * address space.
+ * @kbdev:	Kbase device
+ * @as_nr:	The address space number to set to unmapped.
+ *
+ * This function must only be called during reset/power-up and it used to
+ * ensure the registers are in a known state.
+ *
+ * The caller must hold kbdev->as[as_nr].transaction_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset);
+void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa,
+		phys_addr_t gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * @brief Find the offset of the CPU mapping of a memory allocation containing
+ *        a given address range
+ *
+ * Searches for a CPU mapping of any part of the region starting at @p gpu_addr
+ * that fully encloses the CPU virtual address range specified by @p uaddr and
+ * @p size. Returns a failure indication if only part of the address range lies
+ * within a CPU mapping, or the address range lies within a CPU mapping of a
+ * different region.
+ *
+ * @param[in,out] kctx      The kernel base context used for the allocation.
+ * @param[in]     gpu_addr  GPU address of the start of the allocated region
+ *                          within which to search.
+ * @param[in]     uaddr     Start of the CPU virtual address range.
+ * @param[in]     size      Size of the CPU virtual address range (in bytes).
+ * @param[out]    offset    The offset from the start of the allocation to the
+ *                          specified CPU virtual address.
+ *
+ * @return 0 if offset was obtained successfully. Error code
+ *         otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context *kctx,
+							u64 gpu_addr,
+							unsigned long uaddr,
+							size_t size,
+							u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbase_jit_debugfs_add - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_add(struct kbase_context *kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ * @kds_res_count:     The number of KDS resources.
+ * @kds_resources:     Array of KDS resources.
+ * @kds_access_bitmap: Access bitmap for KDS.
+ * @exclusive:         If the KDS resource requires exclusive access.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_zone_cache_update - Update the memory zone cache after new pages have
+ * been added.
+ * @alloc:        The physical memory allocation to build the cache for.
+ * @start_offset: Offset to where the new pages start.
+ *
+ * Updates an existing memory zone cache, updating the counters for the
+ * various zones.
+ * If the memory allocation doesn't already have a zone cache assume that
+ * one isn't created and thus don't do anything.
+ *
+ * Return: Zero cache was updated, negative error code on error.
+ */
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset);
+
+/**
+ * kbase_zone_cache_build - Build the memory zone cache.
+ * @alloc:        The physical memory allocation to build the cache for.
+ *
+ * Create a new zone cache for the provided physical memory allocation if
+ * one doesn't already exist, if one does exist then just return.
+ *
+ * Return: Zero if the zone cache was created, negative error code on error.
+ */
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_zone_cache_clear - Clear the memory zone cache.
+ * @alloc:        The physical memory allocation to clear the cache on.
+ */
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc);
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100755
index 0000000..f91d3c9
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,2829 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)  */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+static const struct vm_operations_struct kbase_vm_ops;
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ *
+ * Note: Caller must be holding the processes mmap_sem lock.
+ */
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment)
+{
+	int zone;
+	int gpu_pc_bits;
+	int cpu_va_bits;
+	struct kbase_va_region *reg;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_alignment);
+
+	dev = kctx->kbdev->dev;
+	*va_alignment = 0; /* no alignment by default */
+	*gpu_va = 0; /* return 0 on failure */
+
+	gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	cpu_va_bits = BITS_PER_LONG;
+
+	if (0 == va_pages) {
+		dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+		goto bad_size;
+	}
+
+	if (va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+#if defined(CONFIG_64BIT)
+	if (kctx->is_compat)
+		cpu_va_bits = 32;
+#endif
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+	    (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+		goto bad_ex_size;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	kbase_update_region_flags(kctx, reg, *flags);
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & BASE_MEM_GROW_ON_GPF)
+		reg->extent = extent;
+	else
+		reg->extent = 0;
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/* See if we must align memory due to GPU PC bits vs CPU VA */
+		if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+		    (cpu_va_bits > gpu_pc_bits)) {
+			*va_alignment = gpu_pc_bits;
+			reg->flags |= KBASE_REG_ALIGNED;
+		}
+
+		/*
+		 * Pre-10.1 UKU userland calls mmap for us so return the
+		 * unaligned address and skip the map.
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1)) {
+			*gpu_va = (u64) cookie;
+			return reg;
+		}
+
+		/*
+		 * GPUCORE-2190:
+		 *
+		 * We still need to return alignment for old userspace.
+		 */
+		if (*va_alignment)
+			va_map += 3 * (1UL << *va_alignment);
+
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
+			goto no_mmap;
+		}
+
+		/*
+		 * If we had to allocate extra VA space to force the
+		 * alignment release it.
+		 */
+		if (*va_alignment) {
+			unsigned long alignment = 1UL << *va_alignment;
+			unsigned long align_mask = alignment - 1;
+			unsigned long addr;
+			unsigned long addr_end;
+			unsigned long aligned_addr;
+			unsigned long aligned_addr_end;
+
+			addr = cpu_addr;
+			addr_end = addr + va_map;
+
+			aligned_addr = (addr + align_mask) &
+					~((u64) align_mask);
+			aligned_addr_end = aligned_addr + va_size;
+
+			if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) {
+				/*
+				 * Can't end at 4GB boundary on some GPUs as
+				 * it will halt the shader.
+				 */
+				aligned_addr += 2 * alignment;
+				aligned_addr_end += 2 * alignment;
+			} else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) {
+				/*
+				 * Can't start at 4GB boundary on some GPUs as
+				 * it will halt the shader.
+				 */
+				aligned_addr += alignment;
+				aligned_addr_end += alignment;
+			}
+
+			/* anything to chop off at the start? */
+			if (addr != aligned_addr)
+				vm_munmap(addr, aligned_addr - addr);
+
+			/* anything at the end? */
+			if (addr_end != aligned_addr_end)
+				vm_munmap(aligned_addr_end,
+						addr_end - aligned_addr_end);
+
+			*gpu_va = (u64) aligned_addr;
+		} else
+			*gpu_va = (u64) cpu_addr;
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_mem:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+struct kbase_mem_zone_cache_entry {
+	/* List head used to link the cache entry to the memory allocation. */
+	struct list_head zone_node;
+	/* The zone the cacheline is for. */
+	struct zone *zone;
+	/* The number of pages in the allocation which belong to this zone. */
+	u64 count;
+};
+
+static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	struct kbase_mem_zone_cache_entry *cache = NULL;
+	size_t i;
+	int ret = 0;
+
+	for (i = start_offset; i < alloc->nents; i++) {
+		struct page *p = phys_to_page(alloc->pages[i]);
+		struct zone *zone = page_zone(p);
+		bool create = true;
+
+		if (cache && (cache->zone == zone)) {
+			/*
+			 * Fast path check as most of the time adjacent
+			 * pages come from the same zone.
+			 */
+			create = false;
+		} else {
+			/*
+			 * Slow path check, walk all the cache entries to see
+			 * if we already know about this zone.
+			 */
+			list_for_each_entry(cache, &alloc->zone_cache, zone_node) {
+				if (cache->zone == zone) {
+					create = false;
+					break;
+				}
+			}
+		}
+
+		/* This zone wasn't found in the cache, create an entry for it */
+		if (create) {
+			cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+			if (!cache) {
+				ret = -ENOMEM;
+				goto bail;
+			}
+			cache->zone = zone;
+			cache->count = 0;
+			list_add(&cache->zone_node, &alloc->zone_cache);
+		}
+
+		cache->count++;
+	}
+	return 0;
+
+bail:
+	return ret;
+}
+
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	/*
+	 * Bail if the zone cache is empty, only update the cache if it
+	 * existed in the first place.
+	 */
+	if (list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, start_offset);
+}
+
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc)
+{
+	/* Bail if the zone cache already exists */
+	if (!list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, 0);
+}
+
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_mem_zone_cache_entry *walker;
+
+	while(!list_empty(&alloc->zone_cache)){
+		walker = list_first_entry(&alloc->zone_cache,
+				struct kbase_mem_zone_cache_entry, zone_node);
+		list_del(&walker->zone_node);
+		kfree(walker);
+	}
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 * then remove it from the reclaimable accounting. */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(-zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* This alloction can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	/*
+	 * Try to shrink the CPU mappings as required, if we fail then
+	 * fail the process of making this allocation evictable.
+	 */
+	err = kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+	if (err)
+		return -EINVAL;
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	mutex_lock(&kctx->evict_lock);
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	mutex_lock(&kctx->evict_lock);
+	list_del_init(&gpu_alloc->evict_node);
+	mutex_unlock(&kctx->evict_lock);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+	switch (reg->gpu_alloc->type) {
+#ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	/* roll back on error, i.e. not UMP */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	ump_dd_handle umph;
+	u64 block_count;
+	const ump_dd_physical_block_64 *block_array;
+	u64 i, j;
+	int page = 0;
+	ump_alloc_flags ump_flags;
+	ump_alloc_flags cpu_flags;
+	ump_alloc_flags gpu_flags;
+
+	if (*flags & BASE_MEM_SECURE)
+		goto bad_flags;
+
+	umph = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+		goto bad_id;
+
+	ump_flags = ump_dd_allocation_flags_get(umph);
+	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+			UMP_DEVICE_MASK;
+
+	*va_pages = ump_dd_size_get_64(umph);
+	*va_pages >>= PAGE_SHIFT;
+
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (*flags & BASE_MEM_SAME_VA)
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	else
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!reg)
+		goto no_region;
+
+	/* we've got pages to map now, and support SAME_VA */
+	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->gpu_alloc->imported.ump_handle = umph;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
+
+	/* Override import flags based on UMP flags */
+	*flags &= ~(BASE_MEM_CACHED_CPU);
+	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+		reg->flags |= KBASE_REG_CPU_CACHED;
+		*flags |= BASE_MEM_CACHED_CPU;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_WR) {
+		reg->flags |= KBASE_REG_CPU_WR;
+		*flags |= BASE_MEM_PROT_CPU_WR;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_RD) {
+		reg->flags |= KBASE_REG_CPU_RD;
+		*flags |= BASE_MEM_PROT_CPU_RD;
+	}
+
+	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+		reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (gpu_flags & UMP_PROT_DEVICE_WR) {
+		reg->flags |= KBASE_REG_GPU_WR;
+		*flags |= BASE_MEM_PROT_GPU_WR;
+	}
+
+	if (gpu_flags & UMP_PROT_DEVICE_RD) {
+		reg->flags |= KBASE_REG_GPU_RD;
+		*flags |= BASE_MEM_PROT_GPU_RD;
+	}
+
+	/* ump phys block query */
+	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+	for (i = 0; i < block_count; i++) {
+		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+			reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT);
+			page++;
+		}
+	}
+	reg->gpu_alloc->nents = *va_pages;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	ump_dd_release(umph);
+bad_id:
+bad_flags:
+	return NULL;
+}
+#endif				/* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	/* no read or write permission given on import, only on run do we give the right permissions */
+
+	reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	}
+
+	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages,
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	down_read(&current->mm->mmap_sem);
+
+	/* A sanity check that get_user_pages will work on the memory */
+	/* (so the initial import fails on weird memory regions rather than */
+	/* the job failing when we try to handle the external resources). */
+	/* It doesn't take a reference to the pages (because the page list is NULL). */
+	/* We can't really store the page list because that would involve */
+	/* keeping the pages pinned - instead we pin/unpin around the job */
+	/* (as part of the external resources handling code) */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+#else
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+#endif
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	reg->gpu_alloc->imported.user_buf.size = size;
+	reg->gpu_alloc->imported.user_buf.address = address;
+	reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages;
+	reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages,
+			sizeof(struct page *), GFP_KERNEL);
+	reg->gpu_alloc->imported.user_buf.mm = current->mm;
+	atomic_inc(&current->mm->mm_count);
+
+	if (!reg->gpu_alloc->imported.user_buf.pages)
+		goto no_page_array;
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_page_array:
+fault_mismatch:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	kbase_update_region_flags(kctx, reg, *flags);
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat)
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_UMP
+	case BASE_MEM_IMPORT_TYPE_UMP: {
+		ump_secure_id id;
+
+		if (get_user(id, (ump_secure_id __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_ump(kctx, id, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				uptr = compat_ptr(user_buffer.ptr.compat_value);
+			else
+#endif
+				uptr = user_buffer.ptr.value;
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+
+static int zap_range_nolock(struct mm_struct *mm,
+		const struct vm_operations_struct *vm_ops,
+		unsigned long start, unsigned long end)
+{
+	struct vm_area_struct *vma;
+	int err = -EINVAL; /* in case end < start */
+
+	while (start < end) {
+		unsigned long local_start;
+		unsigned long local_end;
+
+		vma = find_vma_intersection(mm, start, end);
+		if (!vma)
+			break;
+
+		/* is it ours? */
+		if (vma->vm_ops != vm_ops)
+			goto try_next;
+
+		local_start = vma->vm_start;
+
+		if (start > local_start)
+			local_start = start;
+
+		local_end = vma->vm_end;
+
+		if (end < local_end)
+			local_end = end;
+
+		err = zap_vma_ptes(vma, local_start, local_end - local_start);
+		if (unlikely(err))
+			break;
+
+try_next:
+		/* go to next vma, if any */
+		start = vma->vm_end;
+	}
+
+	return err;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	phys_addr_t *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags);
+
+	return ret;
+}
+
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	struct kbase_mem_phy_alloc *cpu_alloc = reg->cpu_alloc;
+	struct kbase_cpu_mapping *mapping;
+	int err;
+
+	lockdep_assert_held(&kctx->process_mm->mmap_sem);
+
+	list_for_each_entry(mapping, &cpu_alloc->mappings, mappings_list) {
+		unsigned long mapping_size;
+
+		mapping_size = (mapping->vm_end - mapping->vm_start)
+				>> PAGE_SHIFT;
+
+		/* is this mapping affected ?*/
+		if ((mapping->page_off + mapping_size) > new_pages) {
+			unsigned long first_bad = 0;
+
+			if (new_pages > mapping->page_off)
+				first_bad = new_pages - mapping->page_off;
+
+			err = zap_range_nolock(current->mm,
+					&kbase_vm_ops,
+					mapping->vm_start +
+					(first_bad << PAGE_SHIFT),
+					mapping->vm_end);
+
+			WARN(err,
+			     "Failed to zap VA range (0x%lx - 0x%lx);\n",
+			     mapping->vm_start +
+			     (first_bad << PAGE_SHIFT),
+			     mapping->vm_end
+			     );
+
+			/* The zap failed, give up and exit */
+			if (err)
+				goto failed;
+		}
+	}
+
+	return 0;
+
+failed:
+	return err;
+}
+
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx,
+			reg->start_pfn + new_pages, delta);
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(failure_reason);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages > reg->nr_pages) {
+		/* Would overflow the VA region */
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		res = kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->region) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	/* we don't use vmf->pgoff as it's affected by our mmap with
+	 * offset being a GPU VA or a cookie */
+	rel_pgoff = ((unsigned long)vmf->virtual_address - map->vm_start)
+			>> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (map->page_off + rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	for (i = rel_pgoff;
+	     i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
+	     map->alloc->nents - map->page_off); i++) {
+		int ret = vm_insert_pfn(vma, map->vm_start + (i << PAGE_SHIFT),
+		    PFN_DOWN(map->alloc->pages[map->page_off + i]));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	u64 start_off = vma->vm_pgoff - reg->start_pfn;
+	phys_addr_t *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		unsigned long addr = vma->vm_start + aligned_offset;
+
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			unsigned long pfn = PFN_DOWN(page_array[i + start_off]);
+
+			err = vm_insert_pfn(vma, addr, pfn);
+			if (WARN_ON(err))
+				break;
+
+			addr += PAGE_SIZE;
+		}
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->page_off = start_off;
+	map->region = free_on_close ? reg : NULL;
+	map->kctx = reg->kctx;
+	map->vm_start = vma->vm_start + aligned_offset;
+	if (aligned_offset) {
+		KBASE_DEBUG_ASSERT(!start_off);
+		map->vm_end = map->vm_start + (reg->nr_pages << PAGE_SHIFT);
+	} else {
+		map->vm_end = vma->vm_end;
+	}
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kbase_device_trace_buffer_install(kctx, tb, size);
+		if (err) {
+			vfree(tb);
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->cpu_alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+#if defined(CONFIG_DMA_SHARED_BUFFER) && defined(CONFIG_MALI_TRACE_TIMELINE)
+/* This section is required only for instrumentation. */
+
+static void kbase_dma_buf_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* Non-atomic as we're under Linux's mm lock. */
+	map->count++;
+}
+
+static void kbase_dma_buf_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* Non-atomic as we're under Linux's mm lock. */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+
+	kbase_gpu_vm_lock(map->kctx);
+	list_del(&map->mappings_list);
+	kbase_gpu_vm_unlock(map->kctx);
+	kfree(map);
+}
+
+static const struct vm_operations_struct kbase_dma_mmap_ops = {
+	.open  = kbase_dma_buf_vm_open,
+	.close = kbase_dma_buf_vm_close,
+};
+#endif /* CONFIG_DMA_SHARED_BUFFER && CONFIG_MALI_TRACE_TIMELINE */
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg;
+	void *kaddr = NULL;
+	size_t nr_pages;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+	nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	/* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+	vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		/* SAME_VA stuff, fetch the right region */
+		int gpu_pc_bits;
+		int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+		gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		if (reg->flags & KBASE_REG_ALIGNED) {
+			/* nr_pages must be able to hold alignment pages
+			 * plus actual pages */
+			unsigned long align = 1ULL << gpu_pc_bits;
+			unsigned long extra_pages = 3 * PFN_DOWN(align);
+			unsigned long aligned_addr;
+			unsigned long aligned_addr_end;
+			unsigned long nr_bytes = reg->nr_pages << PAGE_SHIFT;
+
+			if (kctx->api_version < KBASE_API_VERSION(8, 5))
+				/* Maintain compatibility with old userspace */
+				extra_pages = PFN_DOWN(align);
+
+			if (nr_pages != reg->nr_pages + extra_pages) {
+				/* incorrect mmap size */
+				/* leave the cookie for a potential
+				 * later mapping, or to be reclaimed
+				 * later when the context is freed */
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			aligned_addr = ALIGN(vma->vm_start, align);
+			aligned_addr_end = aligned_addr + nr_bytes;
+
+			if (kctx->api_version >= KBASE_API_VERSION(8, 5)) {
+				if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) {
+					/* Can't end at 4GB boundary */
+					aligned_addr += 2 * align;
+				} else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) {
+					/* Can't start at 4GB boundary */
+					aligned_addr += align;
+				}
+			}
+
+			aligned_offset = aligned_addr - vma->vm_start;
+		} else if (reg->nr_pages != nr_pages) {
+			/* incorrect mmap size */
+			/* leave the cookie for a potential later
+			 * mapping, or to be reclaimed later when the
+			 * context is freed */
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		if ((vma->vm_flags & VM_READ &&
+					!(reg->flags & KBASE_REG_CPU_RD)) ||
+				(vma->vm_flags & VM_WRITE &&
+				 !(reg->flags & KBASE_REG_CPU_WR))) {
+			/* VM flags inconsistent with region flags */
+			err = -EPERM;
+			dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+			goto out_unlock;
+		}
+
+		/* adjust down nr_pages to what we have physically */
+		nr_pages = kbase_reg_current_backed_size(reg);
+
+		if (kbase_gpu_mmap(kctx, reg,
+					vma->vm_start + aligned_offset,
+					reg->nr_pages, 1) != 0) {
+			dev_err(dev, "%s:%d\n", __FILE__, __LINE__);
+			/* Unable to map in GPU space. */
+			WARN_ON(1);
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		/* no need for the cookie anymore */
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		/*
+		 * Overwrite the offset with the
+		 * region start_pfn, so we effectively
+		 * map from offset 0 in the region.
+		 */
+		vma->vm_pgoff = reg->start_pfn;
+
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn)))
+				goto overflow;
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (reg->cpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM)
+				goto dma_map;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+
+			goto map;
+		}
+
+overflow:
+		err = -ENOMEM;
+		goto out_unlock;
+	} /* default */
+	} /* switch */
+map:
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+	goto out_unlock;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+dma_map:
+	err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn);
+#if defined(CONFIG_MALI_TRACE_TIMELINE)
+	/* This section is required only for instrumentation. */
+	/* Add created mapping to imported region mapping list.
+	 * It is important to make it visible to dumping infrastructure.
+	 * Add mapping only if vm_ops structure is not used by memory owner. */
+	WARN_ON(vma->vm_ops);
+	WARN_ON(vma->vm_private_data);
+	if (!err && !vma->vm_ops && !vma->vm_private_data) {
+		struct kbase_cpu_mapping *map = kzalloc(
+			sizeof(*map),
+			GFP_KERNEL);
+
+		if (map) {
+			map->kctx     = reg->kctx;
+			map->region   = NULL;
+			map->page_off = vma->vm_pgoff;
+			map->vm_start = vma->vm_start;
+			map->vm_end   = vma->vm_end;
+			map->count    = 1; /* start with one ref */
+
+			vma->vm_ops          = &kbase_dma_mmap_ops;
+			vma->vm_private_data = map;
+
+			list_add(
+				&map->mappings_list,
+				&reg->cpu_alloc->mappings);
+		}
+	}
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	phys_addr_t *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+	bool sync_needed;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+	/* Note: enforcing a RO prot_request onto prot is not done, since:
+	 * - CPU-arch-specific integration required
+	 * - kbase_vmap() requires no access checks to be made/enforced */
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
+	sync_needed = map->is_cached;
+
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+
+	if (sync_needed) {
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_CPU);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_CPU);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_CPU);
+		}
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	/* 0 is specified for prot_request to indicate no access checks should
+	 * be made.
+	 *
+	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
+	 * CPU-RO mapping is not enforced to allow this to work */
+	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	bool sync_needed = map->is_cached;
+	vunmap(addr);
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+	if (sync_needed) {
+		off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
+		size_t size = map->size;
+		size_t page_count = PFN_UP(offset + size);
+		size_t i;
+
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_DEVICE);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_DEVICE);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_DEVICE);
+		}
+	}
+	map->gpu_addr = 0;
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->is_cached = false;
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int i;
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	phys_addr_t *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	kbase_update_region_flags(kctx, reg, flags);
+
+	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = dma_pa + (i << PAGE_SHIFT);
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+	kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100755
index 0000000..6471747
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,216 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment);
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason);
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	u64 gpu_addr;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	phys_addr_t *cpu_pages;
+	phys_addr_t *gpu_pages;
+	void *addr;
+	size_t size;
+	bool is_cached;
+};
+
+
+/**
+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the
+ * requested access permissions are supported
+ * @kctx:         Context the VA range belongs to
+ * @gpu_addr:     Start address of VA range
+ * @size:         Size of VA range
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map:          Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check
+ * whether the region should allow the intended access, and return an error if
+ * disallowed. This is essential for security of imported memory, particularly
+ * a user buf from SHM mapped into the process as RO. In that case, write
+ * access must be checked if the intention is for kernel to write to the
+ * memory.
+ *
+ * The checks are also there to help catch access errors on memory where
+ * security is not a concern: imported memory that is always RW, and memory
+ * that was allocated and owned by the process attached to @kctx. In this case,
+ * it helps to identify memory that was was mapped with the wrong access type.
+ *
+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
+ * where either the security of memory is solely dependent on those flags, or
+ * when userspace code was expecting only the GPU to access the memory (e.g. HW
+ * workarounds).
+ *
+ */
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vmap - Map a GPU VA range into the kernel safely
+ * @kctx:     Context the VA range belongs to
+ * @gpu_addr: Start address of VA range
+ * @size:     Size of VA range
+ * @map:      Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no
+ * checks to ensure the security of e.g. imported user bufs from RO SHM.
+ */
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vunmap - Unmap a GPU VA range from the kernel
+ * @kctx: Context the VA range belongs to
+ * @map:  Structure describing the mapping from the corresponding kbase_vmap()
+ *        call
+ *
+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained
+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * The reference taken on pages during kbase_vmap() is released.
+ */
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100755
index 0000000..441180b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+#endif				/* _KBASE_LOWLEVEL_H */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100755
index 0000000..9570618
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,601 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+/* This function is only provided for backwards compatibility with kernels
+ * which use the old carveout allocator.
+ *
+ * The forward declaration is to keep sparse happy.
+ */
+int __init kbase_carveout_mem_reserve(
+		phys_addr_t size);
+int __init kbase_carveout_mem_reserve(phys_addr_t size)
+{
+	return 0;
+}
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_for_each_entry(p, page_list, lru) {
+		zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+	}
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			PAGE_SIZE, DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	clear_highpage(p);
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+static struct page *kbase_mem_pool_alloc_page(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	if (current->flags & PF_KTHREAD) {
+		/* Don't trigger OOM killer from kernel threads, e.g. when
+		 * growing memory on GPU page fault */
+		gfp |= __GFP_NORETRY;
+	}
+
+	p = alloc_page(gfp);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		__free_page(p);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+
+	kbase_set_dma_addr(p, dma_addr);
+
+	pool_dbg(pool, "alloced page from kernel\n");
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+
+	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	kbase_clear_dma_addr(p);
+	__free_page(p);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) {
+		p = kbase_mem_pool_alloc_page(pool);
+		if (!p)
+			break;
+		kbase_mem_pool_add(pool, p);
+	}
+
+	return i;
+}
+
+size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		kbase_mem_pool_grow(pool, new_size - cur_size);
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	return cur_size;
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+	pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool));
+	return kbase_mem_pool_size(pool);
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	pool->cur_size = 0;
+	pool->max_size = max_size;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			kbase_mem_pool_zero_page(pool, p);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	pool_dbg(pool, "alloc()\n");
+
+	p = kbase_mem_pool_remove(pool);
+
+	if (!p && pool->next_pool) {
+		/* Allocate via next pool */
+		return kbase_mem_pool_alloc(pool->next_pool);
+	}
+
+	if (!p) {
+		/* Get page from kernel */
+		p = kbase_mem_pool_alloc_page(pool);
+	}
+
+	return p;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i;
+	int err = -ENOMEM;
+
+	pool_dbg(pool, "alloc_pages(%zu):\n", nr_pages);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages, kbase_mem_pool_size(pool));
+	for (i = 0; i < nr_from_pool; i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		pages[i] = page_to_phys(p);
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_pages - i, pages + i);
+
+		if (err)
+			goto err_rollback;
+
+		i += nr_pages - i;
+	}
+
+	/* Get any remaining pages from kernel */
+	for (; i < nr_pages; i++) {
+		p = kbase_mem_pool_alloc_page(pool);
+		if (!p)
+			goto err_rollback;
+		pages[i] = page_to_phys(p);
+	}
+
+	pool_dbg(pool, "alloc_pages(%zu) done\n", nr_pages);
+
+	return 0;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+		size_t nr_pages, phys_addr_t *pages, bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+
+		if (zero)
+			kbase_mem_pool_zero_page(pool, p);
+		else if (sync)
+			kbase_mem_pool_sync_page(pool, p);
+
+		list_add(&p->lru, &new_page_list);
+		nr_to_pool++;
+		pages[i] = 0;
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+		if (reclaimed)
+			zone_page_state_add(-1, page_zone(p),
+					NR_SLAB_RECLAIMABLE);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = 0;
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100755
index 0000000..493665b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <mali_kbase_mem_pool_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_trim(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
+		kbase_mem_pool_debugfs_size_get,
+		kbase_mem_pool_debugfs_size_set,
+		"%llu\n");
+
+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_max_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_set_max_size(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
+		kbase_mem_pool_debugfs_max_size_get,
+		kbase_mem_pool_debugfs_max_size_set,
+		"%llu\n");
+
+void kbase_mem_pool_debugfs_add(struct dentry *parent,
+		struct kbase_mem_pool *pool)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100755
index 0000000..458f3f0
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H
+#define _KBASE_MEM_POOL_DEBUGFS_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_add - add debugfs knobs for @pool
+ * @parent: Parent debugfs dentry
+ * @pool:   Memory pool to control
+ *
+ * Adds two debugfs files under @parent:
+ * - mem_pool_size: get/set the current size of @pool
+ * - mem_pool_max_size: get/set the max size of @pool
+ */
+void kbase_mem_pool_debugfs_add(struct dentry *parent,
+		struct kbase_mem_pool *pool);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100755
index 0000000..0359410
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,118 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kctx->mem_profile_initialized);
+
+	if (!kctx->mem_profile_initialized) {
+		if (!debugfs_create_file("mem_profile", S_IRUGO,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kctx->mem_profile_initialized = true;
+		}
+	}
+
+	if (kctx->mem_profile_initialized) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+				err, kctx->mem_profile_initialized);
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kctx->mem_profile_initialized);
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100755
index 0000000..a1dc2e0
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100755
index 0000000..82f0702
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100755
index 0000000..a78be98
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,1992 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_time.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ *
+ * If sync is not set then transactions still in flight when the flush is issued
+ * may use the old page tables and the data they write will not be written out
+ * to memory, this function returns after the flush has been issued but
+ * before all accesses which might effect the flushed region have completed.
+ *
+ * If sync is set then accesses in the flushed region will be drained
+ * before data is flush and invalidated through L1, L2 and into memory,
+ * after which point this function will return.
+ */
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync);
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+static size_t make_multiple(size_t minimum, size_t multiple)
+{
+	size_t remainder = minimum % multiple;
+
+	if (remainder == 0)
+		return minimum;
+
+	return minimum + multiple - remainder;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Tranlation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		goto fault_done;
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = make_multiple(fault_rel_pfn -
+			kbase_reg_current_backed_size(region) + 1,
+			region->extent);
+
+	/* cap to max vsize */
+	if (new_pages + kbase_reg_current_backed_size(region) >
+			region->nr_pages)
+		new_pages = region->nr_pages -
+				kbase_reg_current_backed_size(region);
+
+	if (0 == new_pages) {
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
+		if (region->gpu_alloc != region->cpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					region->cpu_alloc, new_pages) == 0) {
+				grown = true;
+			} else {
+				kbase_free_phy_pages_helper(region->gpu_alloc,
+						new_pages);
+			}
+		} else {
+			grown = true;
+		}
+	}
+
+
+	if (grown) {
+		u64 pfn_offset;
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
+		/*
+		 * Note:
+		 * Issuing an MMU operation will unlock the MMU and cause the
+		 * translation to be replayed. If the page insertion fails then
+		 * rather then trying to continue the context should be killed
+		 * so the no_flush version of insert_pages is used which allows
+		 * us to unlock the MMU as we see fit.
+		 */
+		err = kbase_mmu_insert_pages_no_flush(kctx,
+				region->start_pfn + pfn_offset,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				new_pages, region->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+		kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&faulting_as->transaction_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&faulting_as->transaction_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* failed to extend, handle as a normal PF */
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Page allocation failure");
+	}
+
+fault_done:
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	p = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!p)
+		goto sub_pages;
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1 */
+static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return 0;
+	}
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return 0;
+		}
+
+		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Handle failure condition */
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
+			return 0;
+		}
+	}
+
+	return pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+	/* As we are recovering from what has already been set up, we should have a target_pgd */
+	KBASE_DEBUG_ASSERT(0 != target_pgd);
+	kunmap_atomic(page);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Should never fail */
+		KBASE_DEBUG_ASSERT(0 != pgd);
+	}
+
+	return pgd;
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
+					      size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
+		KBASE_DEBUG_ASSERT(0 != pgd);
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+
+		pgd_page = kmap_atomic(p);
+		KBASE_DEBUG_ASSERT(NULL != pgd_page);
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+		kunmap_atomic(pgd_page);
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys, flags);
+		}
+
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_pages only partially completes we need to be able
+	 * to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys[i], flags);
+		}
+
+		phys += count;
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table. If further pages
+		 * need inserting and fail we need to undo what has already
+		 * taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	int err;
+
+	err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
+ */
+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	u32 op;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+				&kbdev->as[kctx->as_nr],
+				kctx, vpfn, nr, op, 0);
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to
+		 * recover */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * As this function could be called in interrupt context the sync
+	 * request can't block. Instead log the request and the next flush
+	 * request will pick it up.
+	 */
+	if ((!err) && sync &&
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
+		atomic_set(&kctx->drain_pending, 1);
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+#ifndef CONFIG_MALI_NO_MALI
+	bool drain_pending = false;
+
+	if (atomic_xchg(&kctx->drain_pending, 0))
+		drain_pending = true;
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	kbdev = kctx->kbdev;
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		if (!kbase_pm_context_active_handle_suspend(kbdev,
+			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+			int err;
+			u32 op;
+
+			/* AS transaction begin */
+			mutex_lock(&kbdev->as[
+					kctx->as_nr].transaction_mutex);
+
+			if (sync)
+				op = AS_COMMAND_FLUSH_MEM;
+			else
+				op = AS_COMMAND_FLUSH_PT;
+
+			err = kbase_mmu_hw_do_operation(kbdev,
+						&kbdev->as[kctx->as_nr],
+						kctx, vpfn, nr, op, 0);
+
+#if KBASE_GPU_RESET_EN
+			if (err) {
+				/* Flush failed to complete, assume the
+				 * GPU has hung and perform a reset to
+				 * recover */
+				dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+				if (kbase_prepare_to_reset_gpu(kbdev))
+					kbase_reset_gpu(kbdev);
+			}
+#endif /* KBASE_GPU_RESET_EN */
+
+			mutex_unlock(&kbdev->as[
+					kctx->as_nr].transaction_mutex);
+			/* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+			/*
+			 * The transaction lock must be dropped before here
+			 * as kbase_wait_write_flush could take it if
+			 * the GPU was powered down (static analysis doesn't
+			 * know this can't happen).
+			 */
+			drain_pending |= (!err) && sync &&
+					kbase_hw_has_issue(kctx->kbdev,
+							BASE_HW_ISSUE_6367);
+			if (drain_pending) {
+				/* Wait for GPU to flush write buffer */
+				kbase_wait_write_flush(kctx);
+			}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+			kbase_pm_context_idle(kbdev);
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
+	 */
+	kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_device *kbdev;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	kbdev = kctx->kbdev;
+	mmu_mode = kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
+			vpfn, phys, nr);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+					flags);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+/* This is a debug feature only */
+static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
+{
+	u64 *page;
+	int i;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		if (kctx->kbdev->mmu_mode->ate_is_valid(page[i]))
+			beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
+	}
+	kunmap_atomic(page);
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) {
+				mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
+			} else {
+				/*
+				 * So target_pte is a level-3 page.
+				 * As a leaf, it is safe to free it.
+				 * Unless we have live pages attached to it!
+				 */
+				mmu_check_unused(kctx, target_pgd);
+			}
+
+			beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
+			if (zap) {
+				struct page *p = phys_to_page(target_pgd);
+
+				kbase_mem_pool_free(&kctx->mem_pool, p, true);
+				kbase_process_page_usage_dec(kctx, 1);
+				kbase_atomic_sub_pages(1, &kctx->used_pages);
+				kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+			}
+		}
+	}
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	mutex_init(&kctx->mmu_lock);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	mutex_lock(&kctx->mmu_lock);
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+	mutex_unlock(&kctx->mmu_lock);
+
+	beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
+	kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
+	kbase_process_page_usage_dec(kctx, 1);
+	new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i])) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t size;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+		}
+
+
+
+		size = kbasep_mmu_dump_level(kctx,
+				kctx->pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!size)
+			goto fail_free;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+		/* Add on the size for the config */
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4))
+			size += sizeof(config);
+
+
+		if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		unsigned long flags;
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->as[as_no].transaction_mutex);
+
+		/* Set the MMU into unmapped mode */
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_mmu_disable(kctx);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock,
+				flags);
+
+		mutex_unlock(&kbdev->as[as_no].transaction_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "ACCESS_FLAG";
+		break;
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		e = "ADDRESS_SIZE_FAULT";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		e = "MEMORY_ATTRIBUTES_FAULT";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		return "ATOMIC";
+#else
+		return "UNKNOWN";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&as->transaction_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbase_mmu_disable(kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	mutex_unlock(&as->transaction_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&as->transaction_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&as->transaction_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the runpool_irq lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+#if KBASE_GPU_RESET_EN
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+#endif /* KBASE_GPU_RESET_EN */
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		dev_warn(kbdev->dev,
+				"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+				as->number, as->fault_addr,
+				as->fault_extra_addr);
+#else
+		dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+				as->number, as->fault_addr);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
+		WARN_ON(work_pending(&as->work_busfault));
+		queue_work(as->pf_wq, &as->work_busfault);
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
+		WARN_ON(work_pending(&as->work_pagefault));
+		queue_work(as->pf_wq, &as->work_pagefault);
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100755
index 0000000..986e959
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
new file mode 100755
index 0000000..2449c60
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MALI_KBASE_MMU_MODE_
+#define _MALI_KBASE_MMU_MODE_
+
+#include <linux/types.h>
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_context *kctx);
+	void (*get_as_setup)(struct kbase_context *kctx,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate);
+	int (*pte_is_valid)(u64 pte);
+	void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+#endif /* _MALI_KBASE_MMU_MODE_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
new file mode 100644
index 0000000..791f3ed
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -0,0 +1,200 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+/* For valid ATEs bit 1 = (level == 3) ? 1 : 0.
+ * The MMU is only ever configured by the driver so that ATEs
+ * are at level 3, so bit 1 should always be set
+ */
+#define ENTRY_IS_ATE        3ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_ACCESS_RW (1ULL << 6)     /* bits 6:7 */
+#define ENTRY_ACCESS_RO (3ULL << 6)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register.
+	 */
+	setup->memattr =
+		(AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
+			(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
+			(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_WRITE_ALLOC           <<
+			(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF   <<
+			(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_WA         <<
+			(AS_MEMATTR_INDEX_OUTER_WA * 8));
+
+	setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK;
+	setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = 0ULL;
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* Set access flags - note that AArch64 stage 1 does not support
+	 * write-only access, so we use read/write instead
+	 */
+	if (flags & KBASE_REG_GPU_WR)
+		mmu_flags |= ENTRY_ACCESS_RW;
+	else if (flags & KBASE_REG_GPU_RD)
+		mmu_flags |= ENTRY_ACCESS_RO;
+
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+			get_mmu_flags(flags) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const aarch64_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
+{
+	return &aarch64_mode;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100755
index 0000000..683cabb
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated
+	 * memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#else
+	setup->transcfg = 0;
+#endif
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#endif
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+		get_mmu_flags(flags) |
+		ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100755
index 0000000..1a44957
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_fake_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_fake_register);
+
+void kbase_platform_fake_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_fake_unregister);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100755
index 0000000..97d5434
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,205 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_vinstr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1)
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+
+	/* Resume vinstr operation */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100755
index 0000000..37fa247
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100755
index 0000000..7fb674e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100755
index 0000000..d3a3dbf
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1167 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct fragment_job {
+	struct job_descriptor_header header;
+
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_descriptor_header *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next_job._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next_job._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+	job = &frag_job->header;
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->exception_status = JOB_NOT_STARTED;
+
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
+
+	if (job->job_type == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->job_barrier = 1;
+
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
+
+	job->job_index += hw_job_id_offset;
+
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
+	} else {
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
+		else
+			fbd_address = (u64)frag_job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
+
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
+		else
+			jc = job->next_job._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list.value = NULL;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload = NULL;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+	if (KBASE_API_VERSION(10, 3) > replay_atom->kctx->api_version) {
+		base_jd_replay_payload_uk10_2 *payload_uk10_2;
+		u16 tiler_core_req;
+		u16 fragment_core_req;
+
+		payload_uk10_2 = (base_jd_replay_payload_uk10_2 *) payload;
+		memcpy(&tiler_core_req, &payload_uk10_2->tiler_core_req,
+				sizeof(tiler_core_req));
+		memcpy(&fragment_core_req, &payload_uk10_2->fragment_core_req,
+				sizeof(fragment_core_req));
+		payload->tiler_core_req = (u32)(tiler_core_req & 0x7fff);
+		payload->fragment_core_req = (u32)(fragment_core_req & 0x7fff);
+	}
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+
+		int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP;
+		int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC;
+		int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+		int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+
+		if (t_atom_type != BASE_JD_REQ_T) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x",
+			    t_atom_type, BASE_JD_REQ_T);
+		}
+		if (f_atom_type != BASE_JD_REQ_FS) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n",
+			    f_atom_type, BASE_JD_REQ_FS);
+		}
+		if (t_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n");
+		}
+		if (f_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n");
+		}
+
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom, NULL);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_descriptor_header *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+		dump_job_head(kctx, "\njob_head structure:\n", job);
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
+		else
+			job_header = job->next_job._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	/* Don't replay this atom if these issues are not present in the
+	 * hardware */
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) &&
+			!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) {
+		dev_dbg(kbdev->dev, "Hardware does not need replay workaround");
+
+		/* Signal failure to userspace */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+
+		return false;
+	}
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	if (jctx->sched_info.ctx.is_dying) {
+		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100755
index 0000000..43175c8
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100755
index 0000000..9bff3d2
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100755
index 0000000..3bdfa0e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1509 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#include <linux/syscalls.h>
+#include "mali_kbase_sync.h"
+#endif
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+
+/* Mask to check cache alignment of data structures */
+#define KBASE_CACHE_ALIGNMENT_MASK		((1<<L1_CACHE_SHIFT)-1)
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->queue, &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_del(&katom->queue);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed.
+	 */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule timeout of this atom after a period if it is not active */
+	if (!timer_pending(&kctx->soft_job_timeout)) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_job_timeout_ms);
+		mod_timer(&kctx->soft_job_timeout,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}
+}
+
+static int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*status = *mapped_evt;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	if ((new_status != BASE_JD_SOFT_EVENT_SET) &&
+	    (new_status != BASE_JD_SOFT_EVENT_RESET))
+		return -EINVAL;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*mapped_evt = new_status;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_vmap_struct map;
+	void *user_result;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] was previously in use
+		 * for 'waiting_soft_jobs'.
+		 */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* GPU_WR access is checked on the range for returning the result to
+	 * userspace for the following reasons:
+	 * - security, this is currently how imported user bufs are checked.
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
+	if (!user_result)
+		return 0;
+
+	memcpy(user_result, &data, sizeof(data));
+
+	kbase_vunmap(kctx, &map);
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#ifdef CONFIG_SYNC
+
+static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly) come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static void kbase_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter, struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+
+	kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+	if (kbase_fence_get_status(fence) < 0)
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding kctx->jctx.lock and
+	 * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work.
+	 */
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static int kbase_fence_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	KBASE_DEBUG_ASSERT(NULL != katom->kctx);
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signalled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+		INIT_WORK(&katom->work, kbase_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+	/* The timeout code will add this job to the list of waiting soft jobs.
+	 */
+	kbasep_add_waiting_with_timeout(katom);
+#else
+	kbasep_add_waiting_soft_job(katom);
+#endif
+
+	return 1;
+}
+
+static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+#endif /* CONFIG_SYNC */
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, queue);
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			if (katom->jc == evt) {
+				list_del(&katom->queue);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet.
+				 */
+				cancel_timer = 0;
+			}
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			/* Keep the timer running if fence debug is enabled and
+			 * there are waiting fence jobs.
+			 */
+			cancel_timer = 0;
+			break;
+#endif
+		}
+	}
+
+	if (cancel_timer)
+		del_timer(&kctx->soft_job_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+static char *kbase_fence_debug_status_string(int status)
+{
+	if (status == 0)
+		return "signaled";
+	else if (status > 0)
+		return "active";
+	else
+		return "error";
+}
+
+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep;
+
+		list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) {
+			if (dep->status == KBASE_JD_ATOM_STATE_UNUSED ||
+			    dep->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				continue;
+
+			if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					== BASE_JD_REQ_SOFT_FENCE_TRIGGER) {
+				struct sync_fence *fence = dep->fence;
+				int status = kbase_fence_get_status(fence);
+
+				/* Found blocked trigger fence. */
+				dev_warn(dev,
+					 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+					 kbase_jd_atom_id(kctx, dep),
+					 fence, fence->name,
+					 kbase_fence_debug_status_string(status));
+			}
+
+			kbase_fence_debug_check_atom(dep);
+		}
+	}
+}
+
+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = katom->kctx->kbdev->dev;
+	struct sync_fence *fence = katom->fence;
+	int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms);
+	int status = kbase_fence_get_status(fence);
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+		 kctx->tgid, kctx->id,
+		 kbase_jd_atom_id(kctx, katom),
+		 fence, timeout_ms);
+	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+		 fence, fence->name,
+		 kbase_fence_debug_status_string(status));
+
+	/* Search for blocked trigger atoms */
+	kbase_fence_debug_check_atom(katom);
+
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	/* Dump out the full state of all the Android sync fences.
+	 * The function sync_dump() isn't exported to modules, so force
+	 * sync_fence_wait() to time out to trigger sync_dump().
+	 */
+	sync_fence_wait(fence, 1);
+}
+
+struct kbase_fence_debug_work {
+	struct kbase_jd_atom *katom;
+	struct work_struct work;
+};
+
+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work)
+{
+	struct kbase_fence_debug_work *w = container_of(work,
+			struct kbase_fence_debug_work, work);
+	struct kbase_jd_atom *katom = w->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_fence_debug_wait_timeout(katom);
+	mutex_unlock(&kctx->jctx.lock);
+
+	kfree(w);
+}
+
+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_debug_work *work;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Enqueue fence debug worker. Use job_done_wq to get
+	 * debug print ordered with job completion.
+	 */
+	work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC);
+	/* Ignore allocation failure. */
+	if (work) {
+		work->katom = katom;
+		INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker);
+		queue_work(kctx->jctx.job_done_wq, &work->work);
+	}
+}
+#endif /* CONFIG_MALI_FENCE_DEBUG */
+
+void kbasep_soft_job_timeout_worker(unsigned long data)
+{
+	struct kbase_context *kctx = (struct kbase_context *)data;
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_job_timeout_ms);
+	struct timer_list *timer = &kctx->soft_job_timeout;
+	ktime_t cur_time = ktime_get();
+	bool restarting = false;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(entry,
+				struct kbase_jd_atom, queue);
+		s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time,
+					katom->start_timestamp));
+
+		if (elapsed_time < (s64)timeout_ms) {
+			restarting = true;
+			continue;
+		}
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			/* Take it out of the list to ensure that it
+			 * will be cancelled in all cases
+			 */
+			list_del(&katom->queue);
+
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			INIT_WORK(&katom->work, kbasep_soft_event_complete_job);
+			queue_work(kctx->jctx.job_done_wq, &katom->work);
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			kbase_fence_debug_timeout(katom);
+			break;
+#endif
+		}
+	}
+
+	if (restarting)
+		mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	kbasep_add_waiting_with_timeout(katom);
+
+	return 1;
+}
+
+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+/**
+ * kbase_soft_event_update() - Update soft event state
+ * @kctx: Pointer to context
+ * @event: Event to update
+ * @new_status: New status value of event
+ *
+ * Update the event, and wake up any atoms waiting for the event.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+int kbase_soft_event_update(struct kbase_context *kctx,
+			     u64 event,
+			     unsigned char new_status)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->jctx.lock);
+
+	if (kbasep_write_soft_event_status(kctx, event, new_status)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, event);
+
+out:
+	mutex_unlock(&kctx->jctx.lock);
+
+	return err;
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	/*To find memory region*/
+	u64 gpu_addr;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_va_region *reg;
+
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, buffers[i].gpu_addr);
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+		if (reg && reg->gpu_alloc) {
+			switch (reg->gpu_alloc->type) {
+			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+			{
+				free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->jc = 0;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		katom->jc = 0;
+		goto out_cleanup;
+	}
+	katom->jc = (u64)(uintptr_t)buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret)
+		goto out_cleanup;
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		buffers[i].gpu_addr = user_extres.ext_resource &
+			~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, buffers[i].gpu_addr);
+
+		if (NULL == reg || NULL == reg->cpu_alloc ||
+				(reg->flags & KBASE_REG_FREE)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+		buffers[i].nr_extres_pages = reg->nr_pages;
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages)
+				goto out_unlock;
+			ret = 0;
+			break;
+		}
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+		{
+			dev_warn(katom->kctx->kbdev->dev,
+					"UMP is not supported for debug_copy jobs\n");
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	kfree(buffers);
+	kfree(user_buffers);
+
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	return ret;
+}
+
+static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct kbase_va_region *reg;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx, buf_data->gpu_addr);
+
+	if (!reg) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		struct dma_buf *dma_buf = reg->gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+		KBASE_DEBUG_ASSERT(dma_buf->size ==
+				   buf_data->nr_extres_pages * PAGE_SIZE);
+
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+				0, buf_data->nr_extres_pages*PAGE_SIZE,
+#endif
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+				0, buf_data->nr_extres_pages*PAGE_SIZE,
+#endif
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	int ret;
+
+	/* Fail the job if there is no info structure */
+	if (!data) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* If the ID is zero then fail the job */
+	if (info->id == 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & 0x7) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Replace the user pointer with our kernel allocated info structure */
+	katom->jc = (u64)(uintptr_t) info;
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(info);
+fail:
+	katom->jc = 0;
+	return ret;
+}
+
+static void kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+
+	/* The JIT ID is still in use so fail the allocation */
+	if (kctx->jit_alloc[info->id]) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Mark the allocation so we know it's in use even if the
+	 * allocation itself fails.
+	 */
+	kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1;
+
+	/* Create a JIT allocation */
+	reg = kbase_jit_allocate(kctx, info);
+	if (!reg) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Write the address of the JIT allocation to the user provided
+	 * GPU allocation.
+	 */
+	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+			&mapping);
+	if (!ptr) {
+		/*
+		 * Leave the allocation "live" as the JIT free jit will be
+		 * submitted anyway.
+		 */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	*ptr = reg->start_pfn << PAGE_SHIFT;
+	kbase_vunmap(kctx, &mapping);
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	/*
+	 * Bind it to the user provided ID. Do this last so we can check for
+	 * the JIT free racing this JIT alloc job.
+	 */
+	kctx->jit_alloc[info->id] = reg;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 id = (u8) katom->jc;
+
+	/*
+	 * If the ID is zero or it is not in use yet then fail the job.
+	 */
+	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	/*
+	 * If the ID is valid but the allocation request failed still succeed
+	 * this soft job but don't try and free the allocation.
+	 */
+	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+		kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+	kctx->jit_alloc[id] = NULL;
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	/*
+	 * Replace the user pointer with our kernel allocated
+	 * ext_res structure.
+	 */
+	katom->jc = (u64)(uintptr_t) ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (--i > 0) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		KBASE_DEBUG_ASSERT(katom->fence != NULL);
+		katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		/* Release the reference as we don't need it any more */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		return kbase_fence_wait(katom);
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		return kbasep_soft_event_wait(katom);
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_fence_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK))
+				return -EINVAL;
+		}
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_stream_create_fence(fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			katom->fence = sync_fence_fdget(fd);
+
+			if (katom->fence == NULL) {
+				/* The only way the fence can be NULL is if userspace closed it for us.
+				 * So we don't need to clear it up */
+				return -EINVAL;
+			}
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				katom->fence = NULL;
+				sys_close(fd);
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			katom->fence = sync_fence_fdget(fence.basep.fd);
+			if (katom->fence == NULL)
+				return -EINVAL;
+		}
+		break;
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_REPLAY:
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signalled, do it now */
+		if (katom->fence) {
+			kbase_fence_trigger(katom, katom->event_code ==
+					BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+			sync_fence_put(katom->fence);
+			katom->fence = NULL;
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release the reference to the fence object */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+#endif				/* CONFIG_SYNC */
+
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		kbasep_remove_waiting_soft_job(katom_iter);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		} else {
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
new file mode 100644
index 0000000..c98762c
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
@@ -0,0 +1,23 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include "mali_kbase_strings.h"
+
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
+const char kbase_drv_name[] = KBASE_DRV_NAME;
+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
new file mode 100644
index 0000000..41b8fdb
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
@@ -0,0 +1,19 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+extern const char kbase_drv_name[];
+extern const char kbase_timeline_name[];
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
new file mode 100755
index 0000000..c5fb981
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_SYNC
+
+#include <linux/seq_file.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signalled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signalled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signalled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signalled, 0);
+
+	return tl;
+}
+
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int signalled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signalled = atomic_read(&mtl->signalled);
+
+		diff = signalled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signalling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled);
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100755
index 0000000..820bddc
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,100 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include "sync.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatiblility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+static inline int kbase_fence_get_status(struct sync_fence *fence)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	return fence->status;
+#else
+	return atomic_read(&fence->status);
+#endif
+}
+
+/*
+ * Create a stream object.
+ * Built on top of timeline object.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ */
+int kbase_stream_create(const char *name, int *const out_fd);
+
+/*
+ * Create a fence in a stream object
+ */
+int kbase_stream_create_fence(int tl_fd);
+
+/*
+ * Validate a fd to be a valid fence
+ * No reference is taken.
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ */
+int kbase_fence_validate(int fd);
+
+/* Returns true if the specified timeline is allocated by Mali */
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline);
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name);
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ */
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent);
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ *
+ * If they are signalled in the wrong order then a message will be printed in debug
+ * builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as success.
+ */
+void kbase_sync_signal_pt(struct sync_pt *pt, int result);
+
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
new file mode 100755
index 0000000..b9baa91
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync_user.c
+ *
+ */
+
+#ifdef CONFIG_SYNC
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <mali_kbase_sync.h>
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	BUG_ON(!tl);
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	BUG_ON(!out_fd);
+
+	tl = kbase_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int kbase_stream_create_fence(int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+ out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100755
index 0000000..ad88b7b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,2245 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_NDEP_ATOM_ATOM,
+	KBASE_TL_RDEP_ATOM_ATOM,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+
+	/* Job dump specific events. */
+	KBASE_JD_GPU_SOFT_RESET
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_ISSUE_JOB_SOFTSTOP,
+	KBASE_AUX_JOB_SOFTSTOP,
+	KBASE_AUX_JOB_SOFTSTOP_EX,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: counter tracking stream's autoflush state
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pII",
+		"ctx,ctx_nr,tgid"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_NEW_AS,
+		__stringify(KBASE_TL_NEW_AS),
+		"address space object is created",
+		"@pI",
+		"address_space,as_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"ctx"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_LIFELINK_AS_GPU,
+		__stringify(KBASE_TL_LIFELINK_AS_GPU),
+		"address space is deleted with gpu",
+		"@pp",
+		"address_space,gpu"
+	},
+	{
+		KBASE_TL_RET_CTX_LPU,
+		__stringify(KBASE_TL_RET_CTX_LPU),
+		"context is retained by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pps",
+		"atom,lpu,attrib_match_list"
+	},
+	{
+		KBASE_TL_NRET_CTX_LPU,
+		__stringify(KBASE_TL_NRET_CTX_LPU),
+		"context is released by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_RET_AS_CTX,
+		__stringify(KBASE_TL_RET_AS_CTX),
+		"address space is retained by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_NRET_AS_CTX,
+		__stringify(KBASE_TL_NRET_AS_CTX),
+		"address space is released by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_AS,
+		__stringify(KBASE_TL_RET_ATOM_AS),
+		"atom is retained by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_NRET_ATOM_AS,
+		__stringify(KBASE_TL_NRET_ATOM_AS),
+		"atom is released by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_DEP_ATOM_ATOM,
+		__stringify(KBASE_TL_DEP_ATOM_ATOM),
+		"atom2 depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_NDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
+		"atom2 no longer depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_RDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
+		"resolved dependecy of atom2 depending on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
+		"atom job slot attributes",
+		"@pLLI",
+		"atom,descriptor,affinity,config"
+	},
+	{
+		KBASE_TL_ATTRIB_AS_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
+		"address space attributes",
+		"@pLLL",
+		"address_space,transtab,memattr,transcfg"
+	},
+	{
+		KBASE_JD_GPU_SOFT_RESET,
+		__stringify(KBASE_JD_GPU_SOFT_RESET),
+		"gpu soft reset",
+		"@p",
+		"gpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_ISSUE_JOB_SOFTSTOP,
+		__stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP),
+		"Issuing job soft stop",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_AUX_JOB_SOFTSTOP,
+		__stringify(KBASE_AUX_JOB_SOFTSTOP),
+		"Job soft stop",
+		"@I",
+		"tag_id"
+	},
+	{
+		KBASE_AUX_JOB_SOFTSTOP_EX,
+		__stringify(KBASE_AUX_JOB_SOFTSTOP_EX),
+		"Job soft stop, more details",
+		"@pI",
+		"atom,job_type"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@IL",
+		"ctx_nr,page_cnt_change"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@IL",
+		"ctx_nr,page_cnt"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags) __acquires(&stream->lock)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags) __releases(&stream->lock)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @data:  unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(data);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (wb_size > min_size) {
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(f_pos);
+	CSTD_UNUSED(filp);
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw = 0;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the rbi still points to the packet we just processed
+		 * then there was no overflow so we add the copied size to
+		 * copy_len and move rbi on to the next packet
+		 */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = SWTRACE_VERSION; /* protocol version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 0);
+	setup_timer(&autoflush_timer,
+			kbasep_tlstream_autoflush_timer_callback,
+			0);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd)
+{
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) {
+		int rcode;
+
+		*fd = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd) {
+			atomic_set(&kbase_tlstream_enabled, 0);
+			return *fd;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+	} else {
+		*fd = -EBUSY;
+	}
+
+	return 0;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+			strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t  msg_size =
+			sizeof(msg_id) + sizeof(u64) +
+			sizeof(atom) + sizeof(lpu) + msg_s0;
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_string(
+			buffer, pos, attrib_match_list, msg_s0);
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+		sizeof(jd) + sizeof(affinity) + sizeof(config);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &jd, sizeof(jd));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &affinity, sizeof(affinity));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &config, sizeof(config));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
+		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transtab, sizeof(transtab));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &memattr, sizeof(memattr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transcfg, sizeof(transcfg));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+{
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_issue_job_softstop(void *katom)
+{
+	const u32     msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(katom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_job_softstop(u32 js_id)
+{
+	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(js_id);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &js_id, sizeof(js_id));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+/**
+ * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point
+ * @katom: the atom that has been soft-stopped
+ * @job_type: the job type
+ */
+static void __kbase_tlstream_aux_job_softstop_ex_record(
+		void *katom, u32 job_type)
+{
+	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &job_type, sizeof(job_type));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u64 jd = katom->jc;
+
+	while (jd != 0) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		job = kbase_vmap(kctx, jd, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev,
+				"__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n",
+				jd, (void *)katom);
+			break;
+		}
+		if (job->exception_status != BASE_JD_EVENT_STOPPED) {
+			kbase_vunmap(kctx, &map);
+			break;
+		}
+
+		__kbase_tlstream_aux_job_softstop_ex_record(
+				katom, job->job_type);
+
+		jd = job->job_descriptor_size ?
+			job->next_job._64 : job->next_job._32;
+		kbase_vunmap(kctx, &map);
+	}
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100755
index 0000000..22a0d96
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,509 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx: kernel common context
+ * @fd:   timeline stream file descriptor
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) argument fd will contain negative value.
+ *
+ * Return: zero on success (this does not necessarily mean that stream
+ *         descriptor could be returned), negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_issue_job_softstop(void *katom);
+void __kbase_tlstream_aux_job_softstop(u32 js_id);
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled)                                        \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_tl_summary_new_ctx - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_ctx(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
+
+/**
+ * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_gpu(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
+
+/**
+ * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_lpu(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+
+/**
+ * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+
+/**
+ * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary
+ * @as: name of the address space object
+ * @nr: sequential number assigned to this address space
+ *
+ * Function emits a timeline message informing about address space creation.
+ * Address space is created with one attribute: number identifying this
+ * address space.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_as(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
+
+/**
+ * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU
+ * @as:  name of the address space object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that address space object
+ * shall be deleted along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_lifelink_as_gpu(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
+
+/**
+ * kbase_tlstream_tl_new_ctx - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+#define kbase_tlstream_tl_new_ctx(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
+
+/**
+ * kbase_tlstream_tl_new_atom - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+#define kbase_tlstream_tl_new_atom(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
+
+/**
+ * kbase_tlstream_tl_del_ctx - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+#define kbase_tlstream_tl_del_ctx(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
+
+/**
+ * kbase_tlstream_tl_del_atom - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+#define kbase_tlstream_tl_del_atom(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
+
+/**
+ * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_ctx_lpu(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
+
+/**
+ * kbase_tlstream_tl_ret_atom_ctx - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_ctx(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
+
+/**
+ * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_lpu(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
+
+/**
+ * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
+ */
+#define kbase_tlstream_tl_nret_ctx_lpu(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
+
+/**
+ * kbase_tlstream_tl_nret_atom_ctx - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+#define kbase_tlstream_tl_nret_atom_ctx(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
+
+/**
+ * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+#define kbase_tlstream_tl_nret_atom_lpu(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
+
+/**
+ * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being held by the context object.
+ */
+#define kbase_tlstream_tl_ret_as_ctx(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
+
+/**
+ * kbase_tlstream_tl_nret_as_ctx - release address space by context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being released by atom.
+ */
+#define kbase_tlstream_tl_nret_as_ctx(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
+
+/**
+ * kbase_tlstream_tl_ret_atom_as - retain atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by address space and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_as(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
+
+/**
+ * kbase_tlstream_tl_nret_atom_as - release atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by address space.
+ */
+#define kbase_tlstream_tl_nret_atom_as(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_ndep_atom_atom - dependency between atoms resolved
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define kbase_tlstream_tl_ndep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_rdep_atom_atom - information about already resolved dependency between atoms
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define kbase_tlstream_tl_rdep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes
+ * @atom:     name of the atom object
+ * @jd:       job descriptor address
+ * @affinity: job affinity
+ * @config:   job config
+ *
+ * Function emits a timeline message containing atom attributes.
+ */
+#define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
+
+/**
+ * kbase_tlstream_tl_attrib_as_config - address space attributes
+ * @as:       assigned address space
+ * @transtab: configuration of the TRANSTAB register
+ * @memattr:  configuration of the MEMATTR register
+ * @transcfg: configuration of the TRANSCFG register (or zero if not present)
+ *
+ * Function emits a timeline message containing address space attributes.
+ */
+#define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
+
+/**
+ * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset
+ * @gpu:        name of the GPU object
+ *
+ * This imperative tracepoint is specific to job dumping.
+ * Function emits a timeline message indicating GPU soft reset.
+ */
+#define kbase_tlstream_jd_gpu_soft_reset(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
+
+/**
+ * kbase_tlstream_aux_pm_state - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+#define kbase_tlstream_aux_pm_state(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued
+ * @katom: the atom that is being soft-stopped
+ */
+#define kbase_tlstream_aux_issue_job_softstop(katom) \
+	__TRACE_IF_ENABLED(aux_issue_job_softstop, katom)
+
+/**
+ * kbase_tlstream_aux_job_softstop - soft job stop occurred
+ * @js_id: job slot id
+ */
+#define kbase_tlstream_aux_job_softstop(js_id) \
+	__TRACE_IF_ENABLED(aux_job_softstop, js_id)
+
+/**
+ * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom
+ * @katom: the atom that has been soft-stopped
+ *
+ * This trace point adds more details about the soft-stopped atom. These details
+ * can't be safety collected inside the interrupt handler so we're doing it
+ * inside a worker.
+ *
+ * Note: this is not the same information that is recorded in the trace point,
+ * refer to __kbase_tlstream_aux_job_softstop_ex() for more details.
+ */
+#define kbase_tlstream_aux_job_softstop_ex(katom) \
+	__TRACE_IF_ENABLED(aux_job_softstop_ex, katom)
+
+/**
+ * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
+ */
+#define kbase_tlstream_aux_pagefault(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
+
+/**
+ * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated
+ *                                 pages is changed
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
+ */
+#define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100755
index 0000000..e2e0544
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100755
index 0000000..a606ae8
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,232 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+}
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbase_backend_inspect_head(kbdev, js);
+			WARN_ON(!next_katom);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = true;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+	/* Simply log the end of the transition */
+	if (kbdev->timeline.l2_transitioning) {
+		kbdev->timeline.l2_transitioning = false;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100755
index 0000000..22a3649
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,356 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
+				(int)kctx->timeline.owner_tgid,              \
+				count);                                      \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
+				CTX_FLOW_ATOM_READY,                         \
+				(int)kctx->timeline.owner_tgid,              \
+				atom_id);                                    \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_ACTIVE,                      \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_NEXT,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_HEAD,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_STOPPING,                    \
+				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+				js, count);                                  \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_ACTIVE, active);            \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_TILER_ACTIVE,               \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_L2_ACTIVE,                  \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_POWERING,               \
+				1);                                          \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
+				1);                                          \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_SEND_EVENT,                       \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_HANDLE_EVENT,                     \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _consumerof_atom_number);                \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _producerof_atom_number_completed);      \
+	} while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
+				trace_code, 1);                              \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
+				count);                                      \
+	} while (0)
+
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+		enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100755
index 0000000..156a95a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
+"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain start (SW approximated)", "%d,%d,%d",
+"_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain stop (SW approximated)",  "%d,%d,%d",
+"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
new file mode 100755
index 0000000..6036eed
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
@@ -0,0 +1,521 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UKU_H_
+#define _KBASE_UKU_H_
+
+#include "mali_uk.h"
+#include "mali_base_kernel.h"
+
+/* This file needs to support being included from kernel and userside (which use different defines) */
+#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON
+#define SUPPORT_MALI_ERROR_INJECT
+#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */
+#if defined(CONFIG_MALI_NO_MALI)
+#define SUPPORT_MALI_NO_MALI
+#elif defined(MALI_NO_MALI)
+#if MALI_NO_MALI
+#define SUPPORT_MALI_NO_MALI
+#endif
+#endif
+
+#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#include "mali_kbase_gpuprops_types.h"
+
+/*
+ * 10.1:
+ * - Do mmap in kernel for SAME_VA memory allocations rather then
+ *   calling back into the kernel as a 2nd stage of the allocation request.
+ *
+ * 10.2:
+ * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA
+ *   region for use with JIT (ignored on 32-bit platforms)
+ *
+ * 10.3:
+ * - base_jd_core_req typedef-ed to u32 (instead of to u16)
+ * - two flags added: BASE_JD_REQ_SKIP_CACHE_STAT / _END
+ *
+ * 10.4:
+ * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests
+ */
+#define BASE_UK_VERSION_MAJOR 10
+#define BASE_UK_VERSION_MINOR 4
+
+struct kbase_uk_mem_alloc {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	/* IN/OUT */
+	u64 flags;
+	/* OUT */
+	u64 gpu_va;
+	u16 va_alignment;
+	u8  padding[6];
+};
+
+struct kbase_uk_mem_free {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	/* OUT */
+};
+
+struct kbase_uk_mem_alias {
+	union uk_header header;
+	/* IN/OUT */
+	u64 flags;
+	/* IN */
+	u64 stride;
+	u64 nents;
+	union kbase_pointer ai;
+	/* OUT */
+	u64         gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_import {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer phandle;
+	u32 type;
+	u32 padding;
+	/* IN/OUT */
+	u64         flags;
+	/* OUT */
+	u64 gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_flags_change {
+	union uk_header header;
+	/* IN */
+	u64 gpu_va;
+	u64 flags;
+	u64 mask;
+};
+
+struct kbase_uk_job_submit {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer addr;
+	u32 nr_atoms;
+	u32 stride;		/* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */
+	/* OUT */
+};
+
+struct kbase_uk_post_term {
+	union uk_header header;
+};
+
+struct kbase_uk_sync_now {
+	union uk_header header;
+
+	/* IN */
+	struct base_syncset sset;
+
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_setup {
+	union uk_header header;
+
+	/* IN */
+	u64 dump_buffer;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 unused_1; /* keep for backwards compatibility */
+	u32 mmu_l2_bm;
+	u32 padding;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_hwcnt_reader_setup - User/Kernel space data exchange structure
+ * @header:       UK structure header
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ * @fd:           dumping notification file descriptor
+ *
+ * This structure sets up HWC dumper/reader for this context.
+ * Multiple instances can be created for single context.
+ */
+struct kbase_uk_hwcnt_reader_setup {
+	union uk_header header;
+
+	/* IN */
+	u32 buffer_count;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+
+	/* OUT */
+	s32 fd;
+};
+
+struct kbase_uk_hwcnt_dump {
+	union uk_header header;
+};
+
+struct kbase_uk_hwcnt_clear {
+	union uk_header header;
+};
+
+struct kbase_uk_fence_validate {
+	union uk_header header;
+	/* IN */
+	s32 fd;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_stream_create {
+	union uk_header header;
+	/* IN */
+	char name[32];
+	/* OUT */
+	s32 fd;
+	u32 padding;
+};
+
+struct kbase_uk_gpuprops {
+	union uk_header header;
+
+	/* IN */
+	struct mali_base_gpu_props props;
+	/* OUT */
+};
+
+struct kbase_uk_mem_query {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+#define KBASE_MEM_QUERY_COMMIT_SIZE  1
+#define KBASE_MEM_QUERY_VA_SIZE      2
+#define KBASE_MEM_QUERY_FLAGS        3
+	u64         query;
+	/* OUT */
+	u64         value;
+};
+
+struct kbase_uk_mem_commit {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64         pages;
+	/* OUT */
+	u32 result_subcode;
+	u32 padding;
+};
+
+struct kbase_uk_find_cpu_offset {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64 cpu_addr;
+	u64 size;
+	/* OUT */
+	u64 offset;
+};
+
+#define KBASE_GET_VERSION_BUFFER_SIZE 64
+struct kbase_uk_get_ddk_version {
+	union uk_header header;
+	/* OUT */
+	char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE];
+	u32 version_string_size;
+	u32 padding;
+};
+
+struct kbase_uk_disjoint_query {
+	union uk_header header;
+	/* OUT */
+	u32 counter;
+	u32 padding;
+};
+
+struct kbase_uk_set_flags {
+	union uk_header header;
+	/* IN */
+	u32 create_flags;
+	u32 padding;
+};
+
+#if MALI_UNIT_TEST
+#define TEST_ADDR_COUNT 4
+#define KBASE_TEST_BUFFER_SIZE 128
+struct kbase_exported_test_data {
+	u64 test_addr[TEST_ADDR_COUNT];		/**< memory address */
+	u32 test_addr_pages[TEST_ADDR_COUNT];		/**<  memory size in pages */
+	union kbase_pointer kctx;				/**<  base context created by process */
+	union kbase_pointer mm;				/**< pointer to process address space */
+	u8 buffer1[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+	u8 buffer2[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+};
+
+struct kbase_uk_set_test_data {
+	union uk_header header;
+	/* IN */
+	struct kbase_exported_test_data test_data;
+};
+
+#endif				/* MALI_UNIT_TEST */
+
+#ifdef SUPPORT_MALI_ERROR_INJECT
+struct kbase_uk_error_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_error_params params;
+};
+#endif				/* SUPPORT_MALI_ERROR_INJECT */
+
+#ifdef SUPPORT_MALI_NO_MALI
+struct kbase_uk_model_control_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_model_control_params params;
+};
+#endif				/* SUPPORT_MALI_NO_MALI */
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+struct kbase_uk_keep_gpu_powered {
+	union uk_header header;
+	u32       enabled;
+	u32       padding;
+};
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+struct kbase_uk_profiling_controls {
+	union uk_header header;
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+};
+
+struct kbase_uk_debugfs_mem_profile_add {
+	union uk_header header;
+	u32 len;
+	union kbase_pointer buf;
+};
+
+struct kbase_uk_context_id {
+	union uk_header header;
+	/* OUT */
+	int id;
+};
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header: UK structure header
+ * @fd:     timeline stream file descriptor
+ *
+ * This structure is used used when performing a call to acquire kernel side
+ * timeline stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+	s32  fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure
+ * @header: UK structure header
+ *
+ * This structure is used when performing a call to flush kernel side
+ * timeline streams.
+ */
+struct kbase_uk_tlstream_flush {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+};
+
+#if MALI_UNIT_TEST
+/**
+ * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure
+ * @header:    UK structure header
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This structure is used when performing a call to start timeline stream test
+ * embedded in kernel.
+ */
+struct kbase_uk_tlstream_test {
+	union uk_header header;
+	/* IN */
+	u32 tpw_count;
+	u32 msg_delay;
+	u32 msg_count;
+	u32 aux_msg;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure
+ * @header:          UK structure header
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ *
+ * This structure is used when performing a call to obtain timeline stream
+ * statistics.
+ */
+struct kbase_uk_tlstream_stats {
+	union uk_header header; /**< UK structure header. */
+	/* IN */
+	/* OUT */
+	u32 bytes_collected;
+	u32 bytes_generated;
+};
+#endif /* MALI_UNIT_TEST */
+#endif /* MALI_MIPE_ENABLED */
+
+/**
+ * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl
+ * @header:          UK structure header
+ * @data:            Counter samples for the dummy model
+ * @size:............Size of the counter sample data
+ */
+struct kbase_uk_prfcnt_values {
+	union uk_header header;
+	/* IN */
+	u32 *data;
+	u32 size;
+};
+
+/**
+ * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @evt:        the GPU address containing the event
+ * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or
+ *              BASE_JD_SOFT_EVENT_RESET
+ * @flags:      reserved for future uses, must be set to 0
+ *
+ * This structure is used to update the status of a software event. If the
+ * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting
+ * on this event will complete.
+ */
+struct kbase_uk_soft_event_update {
+	union uk_header header;
+	/* IN */
+	u64 evt;
+	u32 new_status;
+	u32 flags;
+};
+
+/**
+ * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @va_pages:   Number of virtual pages required for JIT
+ *
+ * This structure is used when requesting initialization of JIT.
+ */
+struct kbase_uk_mem_jit_init {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+};
+
+enum kbase_uk_function_id {
+	KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
+	KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1),
+	KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2),
+	KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3),
+	KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4),
+	KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5),
+	KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6),
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7),
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+	KBASE_FUNC_SYNC  = (UK_FUNC_ID + 8),
+
+	KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9),
+
+	KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10),
+	KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11),
+	KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12),
+
+	KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14),
+
+	KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15),
+
+	KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16),
+	KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18),
+
+	KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19),
+	KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20),
+	KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21),
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	KBASE_FUNC_KEEP_GPU_POWERED = (UK_FUNC_ID + 22),
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23),
+	KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24),
+	KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25),
+	KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26),
+					    /* to be used only for testing
+					    * purposes, otherwise these controls
+					    * are set through gator API */
+
+	KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27),
+	KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28),
+	KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29),
+
+	KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31),
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+	KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32),
+#if MALI_UNIT_TEST
+	KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
+	KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
+#endif /* MALI_UNIT_TEST */
+	KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35),
+#endif /* MALI_MIPE_ENABLED */
+
+	KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36),
+
+#ifdef SUPPORT_MALI_NO_MALI
+	KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37),
+#endif
+
+	KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38),
+
+	KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39),
+
+	KBASE_FUNC_MAX
+};
+
+#endif				/* _KBASE_UKU_H_ */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100755
index 0000000..be474ff
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100755
index 0000000..fd7252d
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100755
index 0000000..bd6095f
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,2041 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/hrtimer.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API        1
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC            1000000000ull /* ns */
+
+/* The time resolution of dumping service. */
+#define DUMPING_RESOLUTION      500000ull /* ns */
+
+/* The maximal supported number of dumping buffers. */
+#define MAX_BUFFER_COUNT        32
+
+/* Size and number of hw counters blocks. */
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+/*****************************************************************************/
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+enum vinstr_state {
+	VINSTR_IDLE,
+	VINSTR_DUMPING,
+	VINSTR_SUSPENDING,
+	VINSTR_SUSPENDED,
+	VINSTR_RESUMING
+};
+
+/**
+ * struct kbase_vinstr_context - vinstr context per device
+ * @lock:              protects the entire vinstr context
+ * @kbdev:             pointer to kbase device
+ * @kctx:              pointer to kbase context
+ * @vmap:              vinstr vmap for mapping hwcnt dump buffer
+ * @gpu_va:            GPU hwcnt dump buffer address
+ * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
+ * @dump_size:         size of the dump buffer in bytes
+ * @bitmap:            current set of counters monitored, not always in sync
+ *                     with hardware
+ * @reprogram:         when true, reprogram hwcnt block with the new set of
+ *                     counters
+ * @state:             vinstr state
+ * @state_lock:        protects information about vinstr state
+ * @suspend_waitq:     notification queue to trigger state re-validation
+ * @suspend_cnt:       reference counter of vinstr's suspend state
+ * @suspend_work:      worker to execute on entering suspended state
+ * @resume_work:       worker to execute on leaving suspended state
+ * @nclients:          number of attached clients, pending or otherwise
+ * @waiting_clients:   head of list of clients being periodically sampled
+ * @idle_clients:      head of list of clients being idle
+ * @suspended_clients: head of list of clients being suspended
+ * @thread:            periodic sampling thread
+ * @waitq:             notification queue of sampling thread
+ * @request_pending:   request for action for sampling thread
+ */
+struct kbase_vinstr_context {
+	struct mutex             lock;
+	struct kbase_device      *kbdev;
+	struct kbase_context     *kctx;
+
+	struct kbase_vmap_struct vmap;
+	u64                      gpu_va;
+	void                     *cpu_va;
+	size_t                   dump_size;
+	u32                      bitmap[4];
+	bool                     reprogram;
+
+	enum vinstr_state        state;
+	struct spinlock          state_lock;
+	wait_queue_head_t        suspend_waitq;
+	unsigned int             suspend_cnt;
+	struct work_struct       suspend_work;
+	struct work_struct       resume_work;
+
+	u32                      nclients;
+	struct list_head         waiting_clients;
+	struct list_head         idle_clients;
+	struct list_head         suspended_clients;
+
+	struct task_struct       *thread;
+	wait_queue_head_t        waitq;
+	atomic_t                 request_pending;
+};
+
+/**
+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
+ * @vinstr_ctx:    vinstr context client is attached to
+ * @list:          node used to attach this client to list in vinstr context
+ * @buffer_count:  number of buffers this client is using
+ * @event_mask:    events this client reacts to
+ * @dump_size:     size of one dump buffer in bytes
+ * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
+ * @accum_buffer:  temporary accumulation buffer for preserving counters
+ * @dump_time:     next time this clients shall request hwcnt dump
+ * @dump_interval: interval between periodic hwcnt dumps
+ * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
+ * @dump_buffers_meta: metadata of dump buffers
+ * @meta_idx:      index of metadata being accessed by userspace
+ * @read_idx:      index of buffer read by userspace
+ * @write_idx:     index of buffer being written by dumping service
+ * @waitq:         client's notification queue
+ * @pending:       when true, client has attached but hwcnt not yet updated
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context        *vinstr_ctx;
+	struct list_head                   list;
+	unsigned int                       buffer_count;
+	u32                                event_mask;
+	size_t                             dump_size;
+	u32                                bitmap[4];
+	void __user                        *legacy_buffer;
+	void                               *kernel_buffer;
+	void                               *accum_buffer;
+	u64                                dump_time;
+	u32                                dump_interval;
+	char                               *dump_buffers;
+	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
+	atomic_t                           meta_idx;
+	atomic_t                           read_idx;
+	atomic_t                           write_idx;
+	wait_queue_head_t                  waitq;
+	bool                               pending;
+};
+
+/**
+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
+ * @hrtimer:    high resolution timer
+ * @vinstr_ctx: vinstr context
+ */
+struct kbasep_vinstr_wake_up_timer {
+	struct hrtimer              hrtimer;
+	struct kbase_vinstr_context *vinstr_ctx;
+};
+
+/*****************************************************************************/
+
+static int kbasep_vinstr_service_task(void *data);
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+		struct file *filp,
+		poll_table  *wait);
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+		struct file   *filp,
+		unsigned int  cmd,
+		unsigned long arg);
+static int kbasep_vinstr_hwcnt_reader_mmap(
+		struct file           *filp,
+		struct vm_area_struct *vma);
+static int kbasep_vinstr_hwcnt_reader_release(
+		struct inode *inode,
+		struct file  *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations vinstr_client_fops = {
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/*****************************************************************************/
+
+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_uk_hwcnt_setup setup;
+	int err;
+
+	setup.dump_buffer = vinstr_ctx->gpu_va;
+	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	setup.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+
+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err) {
+		dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
+				kctx);
+		return;
+	}
+
+	/* Release the context. This had its own Power Manager Active reference. */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference. */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
+}
+
+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	disable_hwcnt(vinstr_ctx);
+	return enable_hwcnt(vinstr_ctx);
+}
+
+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
+}
+
+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
+{
+	size_t dump_size;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else
+#endif /* CONFIG_MALI_NO_MALI */
+	{
+		/* assume v5 for now */
+		base_gpu_props *props = &kbdev->gpu_props.props;
+		u32 nr_l2 = props->l2_props.num_l2_slices;
+		u64 core_mask = props->coherency_info.group[0].core_mask;
+		u32 nr_blocks = fls64(core_mask);
+
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_blocks) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
+
+static int kbasep_vinstr_map_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	u64 flags, nr_pages;
+	u16 va_align = 0;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	nr_pages = PFN_UP(vinstr_ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&vinstr_ctx->gpu_va, &va_align);
+	if (!reg)
+		return -ENOMEM;
+
+	vinstr_ctx->cpu_va = kbase_vmap(
+			kctx,
+			vinstr_ctx->gpu_va,
+			vinstr_ctx->dump_size,
+			&vinstr_ctx->vmap);
+	if (!vinstr_ctx->cpu_va) {
+		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbasep_vinstr_unmap_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+
+	kbase_vunmap(kctx, &vinstr_ctx->vmap);
+	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+}
+
+/**
+ * kbasep_vinstr_create_kctx - create kernel context for vinstr
+ * @vinstr_ctx: vinstr context
+ * Return: zero on success
+ */
+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	unsigned long flags;
+	bool enable_backend = false;
+	int err;
+
+	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
+	if (!vinstr_ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
+	if (err) {
+		kbase_destroy_context(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		kbase_tlstream_tl_new_ctx(
+				vinstr_ctx->kctx,
+				(u32)(vinstr_ctx->kctx->id),
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	/* Don't enable hardware counters if vinstr is suspended.
+	 * Note that vinstr resume code is run under vinstr context lock,
+	 * lower layer will be enabled as needed on resume. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE == vinstr_ctx->state)
+		enable_backend = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (enable_backend)
+		err = enable_hwcnt(vinstr_ctx);
+
+	if (err) {
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	vinstr_ctx->thread = kthread_run(
+			kbasep_vinstr_service_task,
+			vinstr_ctx,
+			"mali_vinstr_service");
+	if (!vinstr_ctx->thread) {
+		disable_hwcnt(vinstr_ctx);
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+
+	/* Release hw counters dumping resources. */
+	vinstr_ctx->thread = NULL;
+	disable_hwcnt(vinstr_ctx);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Inform timeline client about context destruction. */
+	kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+
+	vinstr_ctx->kctx = NULL;
+}
+
+/**
+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
+ *
+ * Return: vinstr opaque client handle or NULL on failure
+ */
+static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
+		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
+		u32 bitmap[4], void *argp, void *kernel_buffer)
+{
+	struct task_struct         *thread = NULL;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	if (buffer_count > MAX_BUFFER_COUNT
+	    || (buffer_count & (buffer_count - 1)))
+		return NULL;
+
+	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->vinstr_ctx   = vinstr_ctx;
+	cli->buffer_count = buffer_count;
+	cli->event_mask   =
+		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
+		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
+	cli->pending      = true;
+
+	hwcnt_bitmap_set(cli->bitmap, bitmap);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
+	vinstr_ctx->reprogram = true;
+
+	/* If this is the first client, create the vinstr kbase
+	 * context. This context is permanently resident until the
+	 * last client exits. */
+	if (!vinstr_ctx->nclients) {
+		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
+		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
+			goto error;
+
+		vinstr_ctx->reprogram = false;
+		cli->pending = false;
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it. We need a per client kernel buffer for accumulating
+	 * the counters. */
+	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	if (!cli->accum_buffer)
+		goto error;
+
+	/* Prepare buffers. */
+	if (cli->buffer_count) {
+		int *fd = (int *)argp;
+		size_t tmp;
+
+		/* Allocate area for buffers metadata storage. */
+		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
+			cli->buffer_count;
+		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
+		if (!cli->dump_buffers_meta)
+			goto error;
+
+		/* Allocate required number of dumping buffers. */
+		cli->dump_buffers = (char *)__get_free_pages(
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(cli->dump_size * cli->buffer_count));
+		if (!cli->dump_buffers)
+			goto error;
+
+		/* Create descriptor for user-kernel data exchange. */
+		*fd = anon_inode_getfd(
+				"[mali_vinstr_desc]",
+				&vinstr_client_fops,
+				cli,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd)
+			goto error;
+	} else if (kernel_buffer) {
+		cli->kernel_buffer = kernel_buffer;
+	} else {
+		cli->legacy_buffer = (void __user *)argp;
+	}
+
+	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->meta_idx, 0);
+	atomic_set(&cli->write_idx, 0);
+	init_waitqueue_head(&cli->waitq);
+
+	vinstr_ctx->nclients++;
+	list_add(&cli->list, &vinstr_ctx->idle_clients);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return cli;
+
+error:
+	kfree(cli->dump_buffers_meta);
+	if (cli->dump_buffers)
+		free_pages(
+				(unsigned long)cli->dump_buffers,
+				get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	if (!vinstr_ctx->nclients && vinstr_ctx->kctx) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+	kfree(cli);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+
+	return NULL;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	struct kbase_vinstr_client  *iter, *tmp;
+	struct task_struct          *thread = NULL;
+	u32 zerobitmap[4] = { 0 };
+	int cli_found = 0;
+
+	KBASE_DEBUG_ASSERT(cli);
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
+		if (iter == cli) {
+			vinstr_ctx->reprogram = true;
+			cli_found = 1;
+			list_del(&iter->list);
+			break;
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->waiting_clients, list) {
+			if (iter == cli) {
+				vinstr_ctx->reprogram = true;
+				cli_found = 1;
+				list_del(&iter->list);
+				break;
+			}
+		}
+	}
+	KBASE_DEBUG_ASSERT(cli_found);
+
+	kfree(cli->dump_buffers_meta);
+	free_pages(
+			(unsigned long)cli->dump_buffers,
+			get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	kfree(cli);
+
+	vinstr_ctx->nclients--;
+	if (!vinstr_ctx->nclients) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
+	u32 i, nr_l2;
+	u64 core_mask;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (0ull != core_mask) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		if (0ull != (core_mask & 1ull)) {
+			/* if block is not reserved update header */
+			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+			*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		}
+		dst += block_size;
+		src += block_size;
+
+		core_mask >>= 1;
+	}
+}
+
+/**
+ * accum_clients - accumulate dumped hw counters for all known clients
+ * @vinstr_ctx: vinstr context
+ */
+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4 = 0;
+
+#ifndef CONFIG_MALI_NO_MALI
+	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ *
+ * Return: timestamp value
+ */
+static u64 kbasep_vinstr_get_timestamp(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_add_dump_request - register client's dumping request
+ * @cli:             requesting client
+ * @waiting_clients: list of pending dumping requests
+ */
+static void kbasep_vinstr_add_dump_request(
+		struct kbase_vinstr_client *cli,
+		struct list_head *waiting_clients)
+{
+	struct kbase_vinstr_client *tmp;
+
+	if (list_empty(waiting_clients)) {
+		list_add(&cli->list, waiting_clients);
+		return;
+	}
+	list_for_each_entry(tmp, waiting_clients, list) {
+		if (tmp->dump_time > cli->dump_time) {
+			list_add_tail(&cli->list, &tmp->list);
+			return;
+		}
+	}
+	list_add_tail(&cli->list, waiting_clients);
+}
+
+/**
+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
+ *                                        dump and accumulate them for known
+ *                                        clients
+ * @vinstr_ctx: vinstr context
+ * @timestamp: pointer where collection timestamp will be recorded
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_collect_and_accumulate(
+		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+{
+	unsigned long flags;
+	int rcode;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		return -EAGAIN;
+	} else {
+		vinstr_ctx->state = VINSTR_DUMPING;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Request HW counters dump.
+	 * Disable preemption to make dump timestamp more accurate. */
+	preempt_disable();
+	*timestamp = kbasep_vinstr_get_timestamp();
+	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
+	preempt_enable();
+
+	if (!rcode)
+		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
+	WARN_ON(rcode);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state)
+	{
+	case VINSTR_SUSPENDING:
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_IDLE;
+		wake_up_all(&vinstr_ctx->suspend_waitq);
+		break;
+	default:
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Accumulate values of collected counters. */
+	if (!rcode)
+		accum_clients(vinstr_ctx);
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
+ *                                  buffer
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_fill_dump_buffer(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	unsigned int write_idx = atomic_read(&cli->write_idx);
+	unsigned int read_idx  = atomic_read(&cli->read_idx);
+
+	struct kbase_hwcnt_reader_metadata *meta;
+	void                               *buffer;
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == cli->buffer_count)
+		return -1;
+	write_idx %= cli->buffer_count;
+
+	/* Fill in dump buffer and its metadata. */
+	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
+	meta   = &cli->dump_buffers_meta[write_idx];
+	meta->timestamp  = timestamp;
+	meta->event_id   = event_id;
+	meta->buffer_idx = write_idx;
+	memcpy(buffer, cli->accum_buffer, cli->dump_size);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
+ *                                         allocated in userspace
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of legacy ioctl interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_legacy(
+		struct kbase_vinstr_client *cli)
+{
+	void __user  *buffer = cli->legacy_buffer;
+	int          rcode;
+
+	/* Copy data to user buffer. */
+	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
+	if (rcode)
+		pr_warn("error while copying buffer to user\n");
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+		struct kbase_vinstr_client *cli)
+{
+	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_reprogram(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	bool suspended = false;
+
+	/* Don't enable hardware counters if vinstr is suspended. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state)
+		suspended = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (suspended)
+		return;
+
+	/* Change to suspended state is done while holding vinstr context
+	 * lock. Below code will then no re-enable the instrumentation. */
+
+	if (vinstr_ctx->reprogram) {
+		struct kbase_vinstr_client *iter;
+
+		if (!reprogram_hwcnt(vinstr_ctx)) {
+			vinstr_ctx->reprogram = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->idle_clients,
+					list)
+				iter->pending = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->waiting_clients,
+					list)
+				iter->pending = false;
+		}
+	}
+}
+
+/**
+ * kbasep_vinstr_update_client - copy accumulated counters to user readable
+ *                               buffer and notify the user
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_update_client(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	int rcode = 0;
+
+	/* Copy collected counters to user readable buffer. */
+	if (cli->buffer_count)
+		rcode = kbasep_vinstr_fill_dump_buffer(
+				cli, timestamp, event_id);
+	else if (cli->kernel_buffer)
+		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
+	else
+		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
+
+	if (rcode)
+		goto exit;
+
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&cli->write_idx);
+	wake_up_interruptible(&cli->waitq);
+
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+exit:
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ *
+ * @hrtimer: high resolution timer
+ *
+ * Return: High resolution timer restart enum.
+ */
+static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
+		struct hrtimer *hrtimer)
+{
+	struct kbasep_vinstr_wake_up_timer *timer =
+		container_of(
+			hrtimer,
+			struct kbasep_vinstr_wake_up_timer,
+			hrtimer);
+
+	KBASE_DEBUG_ASSERT(timer);
+
+	atomic_set(&timer->vinstr_ctx->request_pending, 1);
+	wake_up_all(&timer->vinstr_ctx->waitq);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_service_task - HWC dumping service thread
+ *
+ * @data: Pointer to vinstr context structure.
+ *
+ * Return: Always returns zero.
+ */
+static int kbasep_vinstr_service_task(void *data)
+{
+	struct kbase_vinstr_context        *vinstr_ctx = data;
+	struct kbasep_vinstr_wake_up_timer timer;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	hrtimer_init(&timer.hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer.hrtimer.function = kbasep_vinstr_wake_up_callback;
+	timer.vinstr_ctx       = vinstr_ctx;
+
+	while (!kthread_should_stop()) {
+		struct kbase_vinstr_client *cli = NULL;
+		struct kbase_vinstr_client *tmp;
+		int                        rcode;
+
+		u64              timestamp = kbasep_vinstr_get_timestamp();
+		u64              dump_time = 0;
+		struct list_head expired_requests;
+
+		/* Hold lock while performing operations on lists of clients. */
+		mutex_lock(&vinstr_ctx->lock);
+
+		/* Closing thread must not interact with client requests. */
+		if (current == vinstr_ctx->thread) {
+			atomic_set(&vinstr_ctx->request_pending, 0);
+
+			if (!list_empty(&vinstr_ctx->waiting_clients)) {
+				cli = list_first_entry(
+						&vinstr_ctx->waiting_clients,
+						struct kbase_vinstr_client,
+						list);
+				dump_time = cli->dump_time;
+			}
+		}
+
+		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
+			mutex_unlock(&vinstr_ctx->lock);
+
+			/* Sleep until next dumping event or service request. */
+			if (cli) {
+				u64 diff = dump_time - timestamp;
+
+				hrtimer_start(
+						&timer.hrtimer,
+						ns_to_ktime(diff),
+						HRTIMER_MODE_REL);
+			}
+			wait_event(
+					vinstr_ctx->waitq,
+					atomic_read(
+						&vinstr_ctx->request_pending) ||
+					kthread_should_stop());
+			hrtimer_cancel(&timer.hrtimer);
+			continue;
+		}
+
+		rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
+				&timestamp);
+
+		INIT_LIST_HEAD(&expired_requests);
+
+		/* Find all expired requests. */
+		list_for_each_entry_safe(
+				cli,
+				tmp,
+				&vinstr_ctx->waiting_clients,
+				list) {
+			s64 tdiff =
+				(s64)(timestamp + DUMPING_RESOLUTION) -
+				(s64)cli->dump_time;
+			if (tdiff >= 0ll) {
+				list_del(&cli->list);
+				list_add(&cli->list, &expired_requests);
+			} else {
+				break;
+			}
+		}
+
+		/* Fill data for each request found. */
+		list_for_each_entry_safe(cli, tmp, &expired_requests, list) {
+			/* Ensure that legacy buffer will not be used from
+			 * this kthread context. */
+			BUG_ON(0 == cli->buffer_count);
+			/* Expect only periodically sampled clients. */
+			BUG_ON(0 == cli->dump_interval);
+
+			if (!rcode)
+				kbasep_vinstr_update_client(
+						cli,
+						timestamp,
+						BASE_HWCNT_READER_EVENT_PERIODIC);
+
+			/* Set new dumping time. Drop missed probing times. */
+			do {
+				cli->dump_time += cli->dump_interval;
+			} while (cli->dump_time < timestamp);
+
+			list_del(&cli->list);
+			kbasep_vinstr_add_dump_request(
+					cli,
+					&vinstr_ctx->waiting_clients);
+		}
+
+		/* Reprogram counters set if required. */
+		kbasep_vinstr_reprogram(vinstr_ctx);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	}
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
+ * @cli: pointer to vinstr client structure
+ *
+ * Return: non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+		struct kbase_vinstr_client *cli)
+{
+	KBASE_DEBUG_ASSERT(cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+
+	/* Metadata sanity check. */
+	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @interval: periodic dumping interval (disable periodic dumping if zero)
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+		struct kbase_vinstr_client *cli, u32 interval)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_del(&cli->list);
+
+	cli->dump_interval = interval;
+
+	/* If interval is non-zero, enable periodic dumping for this client. */
+	if (cli->dump_interval) {
+		if (DUMPING_RESOLUTION > cli->dump_interval)
+			cli->dump_interval = DUMPING_RESOLUTION;
+		cli->dump_time =
+			kbasep_vinstr_get_timestamp() + cli->dump_interval;
+
+		kbasep_vinstr_add_dump_request(
+				cli, &vinstr_ctx->waiting_clients);
+
+		atomic_set(&vinstr_ctx->request_pending, 1);
+		wake_up_all(&vinstr_ctx->waitq);
+	} else {
+		list_add(&cli->list, &vinstr_ctx->idle_clients);
+	}
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
+ * @event_id: id of event
+ * Return: event_mask or zero if event is not supported or maskable
+ */
+static u32 kbasep_vinstr_hwcnt_reader_event_mask(
+		enum base_hwcnt_reader_event event_id)
+{
+	u32 event_mask = 0;
+
+	switch (event_id) {
+	case BASE_HWCNT_READER_EVENT_PREJOB:
+	case BASE_HWCNT_READER_EVENT_POSTJOB:
+		/* These event are maskable. */
+		event_mask = (1 << event_id);
+		break;
+
+	case BASE_HWCNT_READER_EVENT_MANUAL:
+	case BASE_HWCNT_READER_EVENT_PERIODIC:
+		/* These event are non-maskable. */
+	default:
+		/* These event are not supported. */
+		break;
+	}
+
+	return event_mask;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to enable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask |= event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to disable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask &= ~event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
+ * @cli:   pointer to vinstr client structure
+ * @hwver: pointer to user buffer where hw version will be stored
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+		struct kbase_vinstr_client *cli, u32 __user *hwver)
+{
+#ifndef CONFIG_MALI_NO_MALI
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+#endif
+
+	u32                         ver = 5;
+
+#ifndef CONFIG_MALI_NO_MALI
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+		ver = 4;
+#endif
+
+	return put_user(ver, hwver);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
+ * @filp:   pointer to file structure
+ * @cmd:    user command
+ * @arg:    command's argument
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	long                       rcode = 0;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+				cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
+		rcode = put_user(
+				(u32)cli->vinstr_ctx->dump_size,
+				(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbase_vinstr_hwc_dump(
+				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbase_vinstr_hwc_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+				cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
+		poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
+ * @filp: pointer to file structure
+ * @vma:  pointer to vma structure
+ * Return: zero on success
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	unsigned long size, addr, pfn, offset;
+	unsigned long vm_size = vma->vm_end - vma->vm_start;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(vma);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	size = cli->buffer_count * cli->dump_size;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+	if (vm_size > size)
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if ((vm_size + offset) > size)
+		return -EINVAL;
+
+	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	pfn = addr >> PAGE_SHIFT;
+
+	return remap_pfn_range(
+			vma,
+			vma->vm_start,
+			pfn,
+			vm_size,
+			vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ * Return always return zero
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+		struct file *filp)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	kbase_vinstr_detach_client(cli);
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
+ * @kbdev: pointer to kbase device structure
+ */
+static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	down(&js_devdata->schedule_sem);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbase_jm_kick_all(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	up(&js_devdata->schedule_sem);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker suspending vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_suspend_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			suspend_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		disable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_SUSPENDED;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * This must happen after vinstr was suspended. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker resuming vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_resume_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			resume_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		enable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_IDLE;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * Note that scheduler state machine might requested re-entry to
+	 * protected mode before vinstr was resumed.
+	 * This must happen after vinstr was release. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
+	if (!vinstr_ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	mutex_init(&vinstr_ctx->lock);
+	spin_lock_init(&vinstr_ctx->state_lock);
+	vinstr_ctx->kbdev = kbdev;
+	vinstr_ctx->thread = NULL;
+	vinstr_ctx->state = VINSTR_IDLE;
+	vinstr_ctx->suspend_cnt = 0;
+	INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
+	INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
+	init_waitqueue_head(&vinstr_ctx->suspend_waitq);
+
+	atomic_set(&vinstr_ctx->request_pending, 0);
+	init_waitqueue_head(&vinstr_ctx->waitq);
+
+	return vinstr_ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	/* Stop service thread first. */
+	if (vinstr_ctx->thread)
+		kthread_stop(vinstr_ctx->thread);
+
+	/* Wait for workers. */
+	flush_work(&vinstr_ctx->suspend_work);
+	flush_work(&vinstr_ctx->resume_work);
+
+	while (1) {
+		struct list_head *list = &vinstr_ctx->idle_clients;
+
+		if (list_empty(list)) {
+			list = &vinstr_ctx->waiting_clients;
+			if (list_empty(list))
+				break;
+		}
+
+		cli = list_first_entry(list, struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		kfree(cli->accum_buffer);
+		kfree(cli);
+		vinstr_ctx->nclients--;
+	}
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	if (vinstr_ctx->kctx)
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	kfree(vinstr_ctx);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup)
+{
+	struct kbase_vinstr_client  *cli;
+	u32                         bitmap[4];
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(setup->buffer_count);
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	cli = kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			setup->buffer_count,
+			bitmap,
+			&setup->fd,
+			NULL);
+
+	if (!cli)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (setup->dump_buffer) {
+		u32 bitmap[4];
+
+		bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+		if (*cli)
+			return -EBUSY;
+
+		*cli = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				(void *)(long)setup->dump_buffer,
+				NULL);
+
+		if (!(*cli))
+			return -ENOMEM;
+	} else {
+		if (!*cli)
+			return -EINVAL;
+
+		kbase_vinstr_detach_client(*cli);
+		*cli = NULL;
+	}
+
+	return 0;
+}
+
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer)
+{
+	u32 bitmap[4];
+
+	if (!vinstr_ctx || !setup || !kernel_buffer)
+		return NULL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	return kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			0,
+			bitmap,
+			NULL,
+			kernel_buffer);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	int                         rcode = 0;
+	struct kbase_vinstr_context *vinstr_ctx;
+	u64                         timestamp;
+	u32                         event_mask;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
+	event_mask = 1 << event_id;
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (event_mask & cli->event_mask) {
+		rcode = kbasep_vinstr_collect_and_accumulate(
+				vinstr_ctx,
+				&timestamp);
+		if (rcode)
+			goto exit;
+
+		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
+		if (rcode)
+			goto exit;
+
+		kbasep_vinstr_reprogram(vinstr_ctx);
+	}
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
+
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	int                         rcode;
+	u64                         unused;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	if (rcode)
+		goto exit;
+	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
+	if (rcode)
+		goto exit;
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	kbasep_vinstr_reprogram(vinstr_ctx);
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		vinstr_ctx->suspend_cnt++;
+		/* overflow shall not happen */
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		ret = 0;
+		break;
+
+	case VINSTR_IDLE:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_SUSPENDING:
+		/* fall through */
+	case VINSTR_RESUMING:
+		break;
+
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
+	if (VINSTR_SUSPENDED == vinstr_ctx->state) {
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		vinstr_ctx->suspend_cnt--;
+		if (0 == vinstr_ctx->suspend_cnt) {
+			vinstr_ctx->state = VINSTR_RESUMING;
+			schedule_work(&vinstr_ctx->resume_work);
+		}
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100755
index 0000000..6207d25
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwcnt_reader.h>
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+/*****************************************************************************/
+
+/**
+ * kbase_vinstr_init() - initialize the vinstr core
+ * @kbdev: kbase device
+ *
+ * Return: pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - terminate the vinstr core
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
+ * @vinstr_ctx: vinstr context
+ * @setup:      reader's configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+		struct kbase_vinstr_context        *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup);
+
+/**
+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
+ * @vinstr_ctx: vinstr context
+ * @cli:        pointer where to store pointer to new vinstr client structure
+ * @setup:      hwc configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count and setup->fd are not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer);
+
+/**
+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
+ * @cli:      pointer to vinstr client
+ * @event_id: id of event that triggered hwcnt dump
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_dump(
+		struct kbase_vinstr_client   *cli,
+		enum base_hwcnt_reader_event event_id);
+
+/**
+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
+ *                          a given kbase context
+ * @cli: pointer to vinstr client
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Return: 0 on success, or negative if state change is in progress
+ *
+ * Warning: This API call is non-generic. It is meant to be used only by
+ *          job scheduler state machine.
+ *
+ * Function initiates vinstr switch to suspended state. Once it was called
+ * vinstr enters suspending state. If function return non-zero value, it
+ * indicates that state switch is not complete and function must be called
+ * again. On state switch vinstr will trigger job scheduler state machine
+ * cycle.
+ */
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_suspend - suspends operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function initiates vinstr switch to suspended state. Then it blocks until
+ * operation is completed.
+ */
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_resume - resumes operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function can be called only if it was preceded by a successful call
+ * to kbase_vinstr_suspend.
+ */
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
+#endif /* _KBASE_VINSTR_H_ */
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100755
index 0000000..5d6b402
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100755
index 0000000..2be06a5
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,189 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100755
index 0000000..9945293
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644
index 0000000..a509cbd
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100755
index 0000000..de6c206
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,579 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
+						size */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+#define LATEST_FLUSH            0x038	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+
+#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+
+/* Symbol for default MEMATTR to use */
+
+/* Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100755
index 0000000..bd5f661
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->atom_id,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->count)
+);
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_uk.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100755
index 0000000..841d03f
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
new file mode 100755
index 0000000..558657b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	obj-y += $(platform_name)/
+endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100755
index 0000000..8fb4e91
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME
+#
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100755
index 0000000..0833cac
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-y += platform/devicetree/mali_clock.c
+obj-y += platform/devicetree/mpgpu.c
+obj-y += platform/devicetree/meson_main2.c
+obj-y += platform/devicetree/platform_gx.c
+obj-y += platform/devicetree/scaling.c
+obj-y += mali_kbase_runtime_pm.c
+obj-y += mali_kbase_config_devicetree.c
+else ifeq ($(CONFIG_MALI_MIDGARD),m)
+SRC += platform/devicetree/mali_clock.c
+SRC += platform/devicetree/mpgpu.c
+SRC += platform/devicetree/meson_main2.c
+SRC += platform/devicetree/platform_gx.c
+SRC += platform/devicetree/scaling.c
+SRC += platform/devicetree/mali_kbase_runtime_pm.c
+SRC += platform/devicetree/mali_kbase_config_devicetree.c
+endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
new file mode 100644
index 0000000..6ddf39f
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
@@ -0,0 +1,371 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-private.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)	\
+	do {		  \
+		gpu_dbg(1, "line(%d), clk_cntl=0x%08x\n" fmt, __LINE__, mplt_read(HHI_MALI_CLK_CNTL), ## arg);\
+	} while (0)
+
+//disable print
+#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+int mali_pm_statue = 0;
+
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "ao io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	mpdata->dvfs_table_size = 0;
+
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+	dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size);
+	dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n",
+			mpdata->dvfs_table,
+			sizeof(mpdata->dvfs_table[0]));
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100755
index 0000000..1267b6f
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,109 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/version.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long t83x_static_power(unsigned long voltage)
+{
+#if 0
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+#else
+	return 0;
+#endif
+}
+
+static unsigned long t83x_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+struct devfreq_cooling_ops t83x_model_ops = {
+#else
+struct devfreq_cooling_power t83x_model_ops = {
+#endif
+	.get_static_power = t83x_static_power,
+	.get_dynamic_power = t83x_dynamic_power,
+};
+
+#endif
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100755
index 0000000..13af899
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (750000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (100000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+extern struct kbase_platform_funcs_conf dt_funcs_conf;
+#define PLATFORM_FUNCS (&dt_funcs_conf)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&t83x_model_ops)
+extern struct devfreq_cooling_ops t83x_model_ops;
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100755
index 0000000..3b3f095
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,244 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+void *reg_base_hiubus = NULL;
+u32  override_value_aml = 0;
+static int first = 1;
+
+static void  Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+
+    if ((mask & 0x1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000190, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000194, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a0, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a4, 0xffffffff);    // Power on all cores
+    }
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000180, mask >> 1);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000184, 0x0);    // Power on all cores
+    }
+
+    if ( mask != 0 ) {
+        udelay(10);
+        part1_done = Mali_RdReg(0x0, 0x0, 0x0000020);
+        //printk("%s, %d\n", __func__, __LINE__);
+        while((part1_done ==0)) { part1_done = Mali_RdReg(0x0, 0x0, 0x00000020); udelay(10); }
+        //printk("Mali_pwr_on:gpu_irq : %x\n", part1_done);
+        Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+    }
+}
+
+static void gpu_power_main( struct kbase_device *kbdev) {
+
+    Mali_pwr_on ( 0x7);
+
+    printk("%s, %d\n", __func__, __LINE__);
+}
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret;
+	struct platform_device *pdev = to_platform_device(kbdev->dev);
+	struct resource *reg_res;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+    //printk("20151013, %s, %d\n", __FILE__, __LINE__);
+    if (first == 0) goto ret;
+
+    reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+    if (!reg_res) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) 
+        reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res));
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+
+//JOHNT
+    // Level reset mail
+
+    // Level reset mail
+    //Wr(P_RESET2_MASK, ~(0x1<<14));
+    //Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    //Wr(P_RESET2_LEVEL, 0xffffffff);
+    //Wr(P_RESET0_LEVEL, 0xffffffff);
+
+    value = Rd(RESET0_MASK);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_MASK, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+///////////////
+    value = Rd(RESET2_MASK);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_MASK, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value | ((0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value | ((0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    udelay(10); // OR POLL for reset done
+
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+    gpu_power_main(kbdev);
+    //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n",
+    //        kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus);
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+    first = 0;
+    //printk("%s, %d\n", __FILE__, __LINE__);
+ret:
+	ret = pm_runtime_get_sync(kbdev->dev);
+    udelay(100);
+#if 1
+
+    core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+    l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+    tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+    //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready);
+#endif
+	dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret);
+
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+    //printk("%s, %d\n", __FILE__, __LINE__);
+#if 0
+    iounmap(reg_base_hiubus);
+    reg_base_hiubus = NULL;
+#endif
+	u64 core_ready  = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	u64 l2_ready    = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+	u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+	if (core_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+	if (l2_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+	if (tiler_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+
+	return 0;
+}
+
+void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
+
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
new file mode 100644
index 0000000..dd2a53b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-private.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
new file mode 100644
index 0000000..7687f92
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_defs.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+extern void mali_post_init(void);
+struct kbase_device;
+//static int gpu_dvfs_probe(struct platform_device *pdev)
+int platform_dt_init_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	int err = -1;
+
+	err = mali_meson_init_start(pdev);
+    mali_meson_init_finish(pdev);
+    mpgpu_class_init();
+    mali_post_init();
+    return err;
+}
+
+//static int gpu_dvfs_remove(struct platform_device *pdev)
+void platform_dt_term_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+    mpgpu_class_exit();
+	mali_meson_uninit(pdev);
+
+}
+
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    mali_gpu_utilization_callback(utilisation*255/100);
+    return 1;
+}
+
+struct kbase_platform_funcs_conf dt_funcs_conf = {
+    .platform_init_func = platform_dt_init_func,
+    .platform_term_func = platform_dt_term_func,
+};
+#if 0
+static const struct of_device_id gpu_dvfs_ids[] = {
+	{ .compatible = "meson, gpu-dvfs-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpu_dvfs_ids);
+
+static struct platform_driver gpu_dvfs_driver = {
+	.driver = {
+		.name = "meson-gpu-dvfs",
+		.owner = THIS_MODULE,
+		.of_match_table = gpu_dvfs_ids,
+	},
+	.probe = gpu_dvfs_probe,
+	.remove = gpu_dvfs_remove,
+};
+module_platform_driver(gpu_dvfs_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Amlogic SH, MM");
+MODULE_DESCRIPTION("Driver for the Meson GPU dvfs");
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
new file mode 100644
index 0000000..ca79752
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
@@ -0,0 +1,33 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mpgpu_class_init(void);
+void mpgpu_class_exit(void);
+void mali_gpu_utilization_callback(int utilization_pp);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
new file mode 100644
index 0000000..0cc5035
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
@@ -0,0 +1,359 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+#include "meson_main2.h"
+
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+#if 0
+static ssize_t max_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxpp:%d, maxpp_sysfs:%d, total=%d\n",
+			pmali_plat->scale_info.maxpp, pmali_plat->maxpp_sysfs,
+			pmali_plat->cfg_pp);
+	return sprintf(buf, "%d\n", pmali_plat->cfg_pp);
+}
+
+static ssize_t max_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_pp) || (val < pinfo->minpp))
+		return -EINVAL;
+
+	pmali_plat->maxpp_sysfs = val;
+	pinfo->maxpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minpp);
+}
+
+static ssize_t min_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxpp) || (val < 1))
+		return -EINVAL;
+
+	pinfo->minpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+#endif
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+#if 0
+static ssize_t current_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+	return sprintf(buf, "%d\n", pp);
+}
+
+static ssize_t current_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+
+	get_mali_rt_clkpp(&clk, &pp);
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(clk, val, 1);
+
+	return count;
+}
+#endif
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+#if 0
+	__ATTR(min_pp,		0644, min_pp_read,	min_pp_write),
+	__ATTR(max_pp,		0644, max_pp_read,	max_pp_write),
+#endif
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+#if 0
+	__ATTR(cur_pp,		0644, current_pp_read,	current_pp_write),
+#endif
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+
+int mpgpu_class_init(void)
+{
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+}
+
+void  mpgpu_class_exit(void)
+{
+	class_unregister(&mpgpu_class);
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
new file mode 100644
index 0000000..d16675e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
@@ -0,0 +1,246 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#ifdef CONFIG_GPU_THERMAL
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#include <linux/amlogic/aml_thermal_hw.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq < mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq < mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+                break;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+#ifndef MESON_DRV_BRING
+    return 55;
+#else
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+#endif
+}
+#endif
+#endif
+
+#ifdef CONFIG_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+#ifndef MESON_DRV_BRING
+    return 2;
+#else
+    return mali_executor_get_num_cores_enabled();
+#endif
+}
+#endif
+#endif
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
new file mode 100644
index 0000000..16fe365
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
@@ -0,0 +1,552 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+
+#include <linux/amlogic/amports/gp_pll.h>
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+extern int  mali_pm_statue;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+	lastStep = execStep;
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+
+}
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+    cores = cores;
+    return 0;
+}
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    if (err < 0) scalingdbg(1, "set pp failed");
+
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+    printk("mali_plat=%p\n", mali_plat);
+	num_cores_enabled = pmali_plat->sc_mpp;
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	40, /* 40% */
+	50, /* 50% */
+	90, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< utilization_pp)
+		ret = enable_one_core();
+	else if (0 < utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(int utilization_gpu, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(int utilization_pp, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization_pp,  ld_up);
+	if (utilization_pp >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization_pp <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(utilization_pp);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(utilization_pp);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
new file mode 100755
index 0000000..5b6ef37
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_juno_soc.o
+
+
+obj-m += juno_mali_opp.o
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
new file mode 100755
index 0000000..ccfd8cc
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/scpi_protocol.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp_add opp_add
+#endif /* Linux >= 3.13 */
+
+
+static int init_juno_opps_from_scpi(struct device *dev)
+{
+	struct scpi_opp *sopp;
+	int i;
+
+	/* Hard coded for Juno. 2 is GPU domain */
+	sopp = scpi_dvfs_get_opps(2);
+	if (IS_ERR_OR_NULL(sopp))
+		return PTR_ERR(sopp);
+
+	for (i = 0; i < sopp->count; i++) {
+		struct scpi_opp_entry *e = &sopp->opp[i];
+
+		dev_info(dev, "Mali OPP from SCPI: %u Hz @ %u mV\n",
+				e->freq_hz, e->volt_mv);
+
+		dev_pm_opp_add(dev, e->freq_hz, e->volt_mv * 1000);
+	}
+
+	return 0;
+}
+
+static int juno_setup_opps(void)
+{
+	struct device_node *np;
+	struct platform_device *pdev;
+	int err;
+
+	np = of_find_node_by_name(NULL, "gpu");
+	if (!np) {
+		pr_err("Failed to find DT entry for Mali\n");
+		return -EFAULT;
+	}
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev) {
+		pr_err("Failed to find device for Mali\n");
+		of_node_put(np);
+		return -EFAULT;
+	}
+
+	err = init_juno_opps_from_scpi(&pdev->dev);
+
+	of_node_put(np);
+
+	return err;
+}
+
+module_init(juno_setup_opps);
+MODULE_LICENSE("GPL");
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
new file mode 100755
index 0000000..c654818
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <linux/thermal.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_smc.h>
+
+/* Versatile Express (VE) Juno Development Platform */
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 65,
+	.mmu_irq_number = 66,
+	.gpu_irq_number = 64,
+	.io_memory_region = {
+			     .start = 0x2D000000,
+			     .end = 0x2D000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+/*
+ * Juno Protected Mode integration
+ */
+
+/* SMC Function Numbers */
+#define JUNO_SMC_PROTECTED_ENTER_FUNC  0xff06
+#define JUNO_SMC_PROTECTED_RESET_FUNC 0xff07
+
+static int juno_protected_mode_enter(struct kbase_device *kbdev)
+{
+	/* T62X in SoC detected */
+	u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+		JUNO_SMC_PROTECTED_ENTER_FUNC, false,
+		0, 0, 0);
+	return ret;
+}
+
+/* TODO: Remove these externs, reset should should be done by the firmware */
+extern void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+extern u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+static int juno_protected_mode_reset(struct kbase_device *kbdev)
+{
+
+	/* T62X in SoC detected */
+	u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+		JUNO_SMC_PROTECTED_RESET_FUNC, false,
+		0, 0, 0);
+
+	/* TODO: Remove this reset, it should be done by the firmware */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	while ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL)
+			& RESET_COMPLETED) != RESET_COMPLETED)
+		;
+
+	return ret;
+}
+
+static bool juno_protected_mode_supported(struct kbase_device *kbdev)
+{
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+
+	/*
+	 * Protected mode is only supported for the built in GPU
+	 * _and_ only if the right firmware is running.
+	 *
+	 * Given that at init time the GPU is not powered up the
+	 * juno_protected_mode_reset function can't be used as
+	 * is needs to access GPU registers.
+	 * However, although we don't want the GPU to boot into
+	 * protected mode we know a GPU reset will be done after
+	 * this function is called so although we set the GPU to
+	 * protected mode it will exit protected mode before the
+	 * driver is ready to run work.
+	 */
+	if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) &&
+			(kbdev->reg_start == 0x2d000000))
+		return juno_protected_mode_enter(kbdev) == 0;
+
+	return false;
+}
+
+struct kbase_protected_ops juno_protected_ops = {
+	.protected_mode_enter = juno_protected_mode_enter,
+	.protected_mode_reset = juno_protected_mode_reset,
+	.protected_mode_supported = juno_protected_mode_supported,
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
new file mode 100755
index 0000000..ab29e9d
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 600000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 600000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (&juno_protected_ops)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+#ifdef CONFIG_DEVFREQ_THERMAL
+extern struct devfreq_cooling_ops juno_model_ops;
+#endif
+extern struct kbase_protected_ops juno_protected_ops;
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100755
index 0000000..7cb3be7
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
new file mode 100755
index 0000000..01f9dfc
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+/**
+ * kbase_platform_fake_register - Entry point for fake platform registration
+ *
+ * This function is called early on in the initialization during execution of
+ * kbase_driver_init.
+ *
+ * Return: 0 to indicate success, non-zero for failure.
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Entry point for fake platform unregistration
+ *
+ * This function is called in the termination during execution of
+ * kbase_driver_exit.
+ */
+void kbase_platform_fake_unregister(void);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100755
index 0000000..084a156
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100755
index 0000000..dc4471b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq()
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq()
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..15ce2bc
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+#include "mali_kbase_config_platform.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100755
index 0000000..4665f98
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START	(0x10000000)
+#define SYS_CFGDATA_OFFSET		(0x000000A0)
+#define SYS_CFGCTRL_OFFSET		(0x000000A4)
+#define SYS_CFGSTAT_OFFSET		(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		(1 << 31)
+#define READ_REG_BIT_VALUE			(0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			(0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		(1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			(1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE	(0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE		(2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		(1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			(1 <<  1)
+
+#define FEED_REG_BIT_MASK			(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+/* the following three values used for reading
+ * HBI value of the LogicTile daughterboard */
+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000)
+#define VE_SYS_PROC_ID1_OFFSET (0x00000088)
+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define VE_DEFAULT_GPU_FREQ_MIN 5000
+#define VE_DEFAULT_GPU_FREQ_MAX 5000
+
+
+#define CPU_CLOCK_SPEED_UNDEFINED (0)
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed
+ * @cpu_clock - the value of CPU clock speed in MHz
+ *
+ * Returns 0 on success, error code otherwise.
+ *
+ * The implementation is platform specific.
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	int err = 0;
+	u32 reg_val = 0;
+	u32 osc2_value = 0;
+	u32 pa_divide = 0;
+	u32 pb_divide = 0;
+	u32 pc_divide = 0;
+	void __iomem *syscfg_reg = NULL;
+	void __iomem *scc_reg = NULL;
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) {
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+
+	/* Init the value in case something goes wrong */
+	*cpu_clock = 0;
+
+	/* Map CPU register into virtual memory */
+	syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+	if (syscfg_reg == NULL) {
+		err = -EIO;
+		goto syscfg_reg_map_failed;
+	}
+
+	scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+	if (scc_reg == NULL) {
+		err = -EIO;
+		goto scc_reg_map_failed;
+	}
+
+	raw_spin_lock(&syscfg_lock);
+
+	/* Read SYS regs - OSC2 */
+	reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET);
+
+	/* Check if there is any other undergoing request */
+	if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) {
+		err = -EBUSY;
+		goto ongoing_request;
+	}
+	/* Reset the CGFGSTAT reg */
+	writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET));
+
+	writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE |
+			DCC_DEFAULT_BIT_VALUE |
+			SYS_CFG_OSC_FUNC_BIT_VALUE |
+			SITE_DEFAULT_BIT_VALUE |
+			BOARD_STACK_POS_DEFAULT_BIT_VALUE |
+			DEVICE_DEFAULT_BIT_VALUE,
+			(syscfg_reg + SYS_CFGCTRL_OFFSET));
+	/* Wait for the transaction to complete */
+	while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) &
+			SYS_CFG_COMPLETE_BIT_VALUE))
+		;
+	/* Read SYS_CFGSTAT Register to get the status of submitted
+	 * transaction */
+	reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET);
+
+	if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+		/* Error while setting register */
+		err = -EIO;
+		goto set_reg_error;
+	}
+
+	osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET);
+	/* Read the SCC CFGRW0 register */
+	reg_val = readl(scc_reg);
+
+	/*
+	 * Select the appropriate feed:
+	 * CFGRW0[0] - CLKOB
+	 * CFGRW0[1] - CLKOC
+	 * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+	 */
+	/* Calculate the  FCLK */
+	if (IS_SINGLE_BIT_SET(reg_val, 0)) {
+		/* CFGRW0[0] - CLKOB */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[10:7] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PB_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 1)) {
+		/* CFGRW0[1] - CLKOC */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[14:11] */
+		pc_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PC_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PC_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {
+		/* CFGRW0[2] - FACLK */
+		/* CFGRW0[18:15] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PA_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[22:19] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PB_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else {
+		err = -EIO;
+	}
+
+set_reg_error:
+ongoing_request:
+	raw_spin_unlock(&syscfg_lock);
+	*cpu_clock /= HZ_IN_MHZ;
+
+	if (!err)
+		cpu_clock_speed = *cpu_clock;
+
+	iounmap(scc_reg);
+
+scc_reg_map_failed:
+	iounmap(syscfg_reg);
+
+syscfg_reg_map_failed:
+
+	return err;
+}
+
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function determines which LogicTile type is used by the platform by
+ * reading the HBI value of the daughterboard which holds the LogicTile:
+ *
+ * 0x217 HBI0217 Virtex-6
+ * 0x192 HBI0192 Virtex-5
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+static u32 kbase_get_platform_logic_tile_type(void)
+{
+	void __iomem *syscfg_reg = NULL;
+	u32 sys_procid1 = 0;
+
+	syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
+	if (NULL != syscfg_reg) {
+		sys_procid1 = readl(syscfg_reg);
+		iounmap(syscfg_reg);
+	}
+
+	return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
+}
+
+u32 kbase_get_platform_min_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MIN;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MIN;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MIN;
+	}
+}
+
+u32 kbase_get_platform_max_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MAX;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MAX;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MAX;
+	}
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100755
index 0000000..da86569
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+/**
+ * Get the minimum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_min_freq(void);
+
+/**
+ * Get the maximum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_max_freq(void);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100755
index 0000000..d9bfabc
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_vexpress.o
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100755
index 0000000..b0490ca
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..3ff0930
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,79 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100755
index 0000000..0cb41ce
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100755
index 0000000..22ffccb
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..76ffe4a
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100755
index 0000000..816dff4
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	/* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+	*cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+	return 0;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100755
index 0000000..23647cc
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100755
index 0000000..5fa9b39
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/midgard/sconscript b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/sconscript
new file mode 100755
index 0000000..c36e92d
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/midgard/sconscript
@@ -0,0 +1,127 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+import sys
+Import('env')
+
+
+if Glob('tests/sconscript'):
+	SConscript( 'tests/sconscript' )
+
+mock_test = 0
+
+# Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data.
+# For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0.
+if env['platform_config']=='devicetree':
+	fake_platform_device = 0
+else:
+	fake_platform_device = 1
+
+# Source files required for kbase.
+kbase_src = [Glob('#kernel/drivers/gpu/arm/midgard/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/platform/%s/*.c' % (env['platform_config'])),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/Makefile',
+             Glob('#kernel/drivers/gpu/arm/midgard/K*'))
+             ]
+
+kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c')]
+
+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
+	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
+	mock_test = 1
+
+# we need platform config for GPL version using fake platform
+if fake_platform_device==1:
+	# Check if we are compiling for PBX
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_MACH_REALVIEW_PBX[\ ]*=[\ ]*y'
+	REALVIEW_PBX = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+			REALVIEW_PBX = 1
+			break
+	if REALVIEW_PBX == 1 and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
+		sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n")
+	# if the file platform config file is in the tpip directory then use that, otherwise use the default config directory
+	if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])):
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config']))
+	else:
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config']))
+	
+# Note: cleaning via the Linux kernel build system does not yet work
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[clean] kbase'))
+	cmd = env.Command(['$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/mali_platform_fake.ko'], kbase_src, [])
+else:
+	if env['os'] == 'android':
+		env['android'] = 1
+	else:
+		env['android'] = 0
+
+	if env['unit'] == '1':
+		env['kernel_test'] = 1
+	else:
+		env['kernel_test'] = 0
+
+	#Extract environment options, note the trailing spaces are important
+	env_options = \
+	"PLATFORM=${platform} " +\
+	"MALI_ERROR_INJECT_ON=${error_inject} " +\
+	"MALI_ANDROID=${android} " +\
+	"MALI_KERNEL_TEST_API=${kernel_test} " +\
+	"MALI_UNIT_TEST=${unit} " +\
+	"MALI_RELEASE_NAME=\"${mali_release_name}\" "+\
+	"MALI_MOCK_TEST=%s " % mock_test +\
+	"MALI_CUSTOMER_RELEASE=${release} " +\
+	"MALI_INSTRUMENTATION_LEVEL=${instr} " +\
+	"MALI_COVERAGE=${coverage} " +\
+	"MALI_BUS_LOG=${buslog} "
+
+	make_action_start = "cd ${SOURCE.dir} && make -j%d " % GetOption('num_jobs')
+	make_action_end = "%s && cp mali_kbase.ko $STATIC_LIB_PATH/mali_kbase.ko" % env.kernel_get_config_defines(fake_platform_device)
+	make_action = make_action_start + env_options + make_action_end
+
+	makeAction=Action(make_action, '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, [makeAction])
+
+# Add a dependency on kds.ko.
+# Only necessary when KDS is not built into the kernel.
+#
+if env['os'] != 'android':
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y'
+	kds_in_kernel = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+	        # KDS in kernel.
+			kds_in_kernel = 1
+	if not kds_in_kernel:
+		env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/kds.ko')
+
+# need Module.symvers from ump.ko build
+if int(env['ump']) == 1:
+	env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/ump.ko')
+
+env.KernelObjTarget('kbase', cmd)
+
+env.AppendUnique(BASE=['cutils_linked_list'])
diff --git a/midgard/r13p0/kernel/drivers/gpu/arm/sconscript b/midgard/r13p0/kernel/drivers/gpu/arm/sconscript
new file mode 100755
index 0000000..924511b
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/arm/sconscript
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+
+
+SConscript('midgard/sconscript')
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/Kbuild b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/Kbuild
new file mode 100755
index 0000000..f10d58c
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/Kbuild
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+pl111_drm-y +=	pl111_drm_device.o \
+		pl111_drm_connector.o \
+		pl111_drm_crtc.o \
+		pl111_drm_cursor.o \
+		pl111_drm_dma_buf.o \
+		pl111_drm_encoder.o \
+		pl111_drm_fb.o \
+		pl111_drm_gem.o \
+		pl111_drm_pl111.o \
+		pl111_drm_platform.o \
+		pl111_drm_suspend.o \
+		pl111_drm_vma.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/Kconfig b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/Kconfig
new file mode 100755
index 0000000..60b465c
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+config DRM_PL111
+	tristate "DRM Support for PL111 CLCD Controller"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the PL111 CLCD controller.
+	  If M is selected the module will be called pl111_drm.
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/Makefile b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/Makefile
new file mode 100755
index 0000000..2869f58
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: pl111_drm
+
+pl111_drm:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_USES_KDS=y CONFIG_DRM_PL111=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
new file mode 100755
index 0000000..d3e0086
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
@@ -0,0 +1,95 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+/**
+ * pl111_clcd_ext.h
+ * Extended CLCD register definitions
+ */
+
+#ifndef PL111_CLCD_EXT_H_
+#define PL111_CLCD_EXT_H_
+
+/* 
+ * PL111 cursor register definitions not defined in the kernel's clcd header.
+ * 
+ * TODO MIDEGL-1718: move to include/linux/amba/clcd.h
+ */
+
+#define CLCD_CRSR_IMAGE				0x00000800
+#define CLCD_CRSR_IMAGE_MAX_WORDS		256
+#define CLCD_CRSR_IMAGE_WORDS_PER_LINE		4
+#define CLCD_CRSR_IMAGE_PIXELS_PER_WORD	16
+
+#define CLCD_CRSR_LBBP_COLOR_MASK	0x00000003
+#define CLCD_CRSR_LBBP_BACKGROUND	0x0
+#define CLCD_CRSR_LBBP_FOREGROUND	0x1
+#define CLCD_CRSR_LBBP_TRANSPARENT	0x2
+#define CLCD_CRSR_LBBP_INVERSE		0x3
+
+
+#define CLCD_CRSR_CTRL			0x00000c00
+#define CLCD_CRSR_CONFIG		0x00000c04
+#define CLCD_CRSR_PALETTE_0		0x00000c08
+#define CLCD_CRSR_PALETTE_1		0x00000c0c
+#define CLCD_CRSR_XY			0x00000c10
+#define CLCD_CRSR_CLIP			0x00000c14
+#define CLCD_CRSR_IMSC			0x00000c20
+#define CLCD_CRSR_ICR			0x00000c24
+#define CLCD_CRSR_RIS			0x00000c28
+#define CLCD_MIS				0x00000c2c
+
+#define CRSR_CTRL_CRSR_ON		(1 << 0)
+#define CRSR_CTRL_CRSR_MAX		3
+#define CRSR_CTRL_CRSR_NUM_SHIFT	4
+#define CRSR_CTRL_CRSR_NUM_MASK		\
+	(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
+#define CRSR_CTRL_CURSOR_0		0
+#define CRSR_CTRL_CURSOR_1		1
+#define CRSR_CTRL_CURSOR_2		2
+#define CRSR_CTRL_CURSOR_3		3
+
+#define CRSR_CONFIG_CRSR_SIZE		(1 << 0)
+#define CRSR_CONFIG_CRSR_FRAME_SYNC	(1 << 1)
+
+#define CRSR_PALETTE_RED_SHIFT		0
+#define CRSR_PALETTE_GREEN_SHIFT	8
+#define CRSR_PALETTE_BLUE_SHIFT		16
+
+#define CRSR_PALETTE_RED_MASK		0x000000ff
+#define CRSR_PALETTE_GREEN_MASK		0x0000ff00
+#define CRSR_PALETTE_BLUE_MASK		0x00ff0000
+#define CRSR_PALETTE_MASK		(~0xff000000)
+
+#define CRSR_XY_MASK			0x000003ff
+#define CRSR_XY_X_SHIFT			0
+#define CRSR_XY_Y_SHIFT			16
+
+#define CRSR_XY_X_MASK			CRSR_XY_MASK
+#define CRSR_XY_Y_MASK			(CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
+
+#define CRSR_CLIP_MASK			0x3f
+#define CRSR_CLIP_X_SHIFT		0
+#define CRSR_CLIP_Y_SHIFT		8
+
+#define CRSR_CLIP_X_MASK		CRSR_CLIP_MASK
+#define CRSR_CLIP_Y_MASK		(CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
+
+#define CRSR_IMSC_CRSR_IM		(1<<0)
+#define CRSR_ICR_CRSR_IC		(1<<0)
+#define CRSR_RIS_CRSR_RIS		(1<<0)
+#define CRSR_MIS_CRSR_MIS		(1<<0)
+
+#endif /* PL111_CLCD_EXT_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100755
index 0000000..64d87b6
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+#define DRIVER_AUTHOR    "ARM Ltd."
+#define DRIVER_NAME      "pl111_drm"
+#define DRIVER_DESC      "DRM module for PL111"
+#define DRIVER_LICENCE   "GPL"
+#define DRIVER_ALIAS     "platform:pl111_drm"
+#define DRIVER_DATE      "20101111"
+#define DRIVER_VERSION   "0.2"
+#define DRIVER_MAJOR      2
+#define DRIVER_MINOR      1
+#define DRIVER_PATCHLEVEL 1
+
+/*
+ * Number of flips allowed in flight at any one time. Any more flips requested
+ * beyond this value will cause the caller to block until earlier flips have
+ * completed.
+ *
+ * For performance reasons, this must be greater than the number of buffers
+ * used in the rendering pipeline. Note that the rendering pipeline can contain
+ * different types of buffer, e.g.:
+ * - 2 final framebuffers
+ * - >2 geometry buffers for GPU use-cases
+ * - >2 vertex buffers for GPU use-cases
+ *
+ * For example, a system using 5 geometry buffers could have 5 flips in flight,
+ * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
+ *
+ * Whilst there may be more intermediate buffers (such as vertex/geometry) than
+ * final framebuffers, KDS is used to ensure that GPU rendering waits for the
+ * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
+ * produce tearing.
+ */
+
+/*
+ * Here, we choose a conservative value. A lower value is most likely
+ * suitable for GPU use-cases.
+ */
+#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
+
+#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
+
+struct pl111_drm_flip_resource;
+
+struct pl111_gem_bo_dma {
+	dma_addr_t fb_dev_addr;
+	void *fb_cpu_addr;
+};
+
+struct pl111_gem_bo_shm {
+	struct page **pages;
+	dma_addr_t *dma_addrs;
+};
+
+struct pl111_gem_bo {
+	struct drm_gem_object gem_object;
+	u32 type;
+	union {
+		struct pl111_gem_bo_dma dma;
+		struct pl111_gem_bo_shm shm;
+	} backing_data;
+	struct sg_table *sgt;
+};
+
+extern struct pl111_drm_dev_private priv;
+
+struct pl111_drm_framebuffer {
+	struct drm_framebuffer fb;
+	struct pl111_gem_bo *bo;
+};
+
+struct pl111_drm_flip_resource {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This is the kds set associated to the dma_buf we want to flip */
+	struct kds_resource_set *kds_res_set;
+#endif
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct list_head link;
+	bool page_flip;
+	struct drm_pending_vblank_event *event;
+};
+
+struct pl111_drm_crtc {
+	struct drm_crtc crtc;
+	int crtc_index;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This protects "old_kds_res_set" and "displaying_fb" */
+	spinlock_t current_displaying_lock;
+	/*
+	 * When a buffer is displayed its associated kds resource
+	 * will be obtained and stored here. Every time a buffer
+	 * flip is completed this old kds set is released and assigned
+	 * the kds set of the new buffer.
+	 */
+	struct kds_resource_set *old_kds_res_set;
+	/*
+	 * Stores which frame buffer is currently being displayed by
+	 * this CRTC or NULL if nothing is being displayed. It is used
+	 * to tell whether we need to obtain a set of kds resources for
+	 * exported buffer objects.
+	 */
+	struct drm_framebuffer *displaying_fb;
+#endif
+	struct drm_display_mode *new_mode;
+	struct drm_display_mode *current_mode;
+	int last_bpp;
+
+	/*
+	 * This spinlock protects "update_queue", "current_update_res"
+	 * and calls to do_flip_to_res() which updates the CLCD base
+	 * registers.
+	 */
+	spinlock_t base_update_lock;
+	/*
+	 * The resource that caused a base address update. Only one can be
+	 * pending, hence it's != NULL if there's a pending update
+	 */
+	struct pl111_drm_flip_resource *current_update_res;
+	/* Queue of things waiting to update the base address */
+	struct list_head update_queue;
+
+	void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
+				struct drm_framebuffer *fb);
+};
+
+struct pl111_drm_connector {
+	struct drm_connector connector;
+};
+
+struct pl111_drm_encoder {
+	struct drm_encoder encoder;
+};
+
+struct pl111_drm_dev_private {
+	struct pl111_drm_crtc *pl111_crtc;
+
+	struct amba_device *amba_dev;
+	unsigned long mmio_start;
+	__u32 mmio_len;
+	void *regs;
+	struct clk *clk;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_callback kds_cb;
+	struct kds_callback kds_obtain_current_cb;
+#endif
+	/*
+	 * Number of flips that were started in show_framebuffer_on_crtc(),
+	 * but haven't completed yet - because we do deferred flipping
+	 */
+	atomic_t nr_flips_in_flight;
+	wait_queue_head_t wait_for_flips;
+
+	/*
+	 * Used to prevent race between pl111_dma_buf_release and
+	 * drm_gem_prime_handle_to_fd
+	 */
+	struct mutex export_dma_buf_lock;
+
+	uint32_t number_crtcs;
+
+	/* Cache for flip resources used to avoid kmalloc on each page flip */
+	struct kmem_cache *page_flip_slab;
+};
+
+enum pl111_cursor_size {
+	CURSOR_32X32,
+	CURSOR_64X64
+};
+
+enum pl111_cursor_sync {
+	CURSOR_SYNC_NONE,
+	CURSOR_SYNC_VSYNC
+};
+
+
+/**
+ * Buffer allocation function which is more flexible than dumb_create(),
+ * it allows passing driver specific flags to control the kind of buffer
+ * to be allocated.
+ */
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+ 
+/****** TODO MIDEGL-1718: this should be moved to uapi/include/drm/pl111_drm.h ********/
+
+/* 
+ * Parameters for different buffer objects:
+ * bit [0]: backing storage
+ *	(0 -> SHM)
+ *	(1 -> DMA)
+ * bit [2:1]: kind of mapping
+ *	(0x0 -> uncached)
+ *	(0x1 -> write combine)
+ *	(0x2 -> cached)
+ */
+#define PL111_BOT_MASK		(0x7)
+#define PL111_BOT_SHM		(0x0 << 0)
+#define PL111_BOT_DMA		(0x1 << 0)
+#define PL111_BOT_UNCACHED	(0x0 << 1)
+#define PL111_BOT_WC		(0x1 << 1)
+#define PL111_BOT_CACHED	(0x2 << 1)
+
+/**
+ * User-desired buffer creation information structure.
+ *
+ * @size: user-desired memory allocation size.
+ *      - this size value would be page-aligned internally.
+ * @flags: user request for setting memory type or cache attributes as a bit op
+ *	- PL111_BOT_DMA / PL111_BOT_SHM
+ *	- PL111_BOT_UNCACHED / PL111_BOT_WC / PL111_BOT_CACHED
+ * @handle: returned a handle to created gem object.
+ *      - this handle will be set by gem module of kernel side.
+ */
+struct drm_pl111_gem_create {
+	uint32_t height;
+	uint32_t width;
+	uint32_t bpp;
+	uint32_t flags;
+	/* handle, pitch, size will be returned */
+	uint32_t handle;
+	uint32_t pitch;
+	uint64_t size;
+};
+
+#define DRM_PL111_GEM_CREATE		0x00
+
+#define DRM_IOCTL_PL111_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + \
+			DRM_PL111_GEM_CREATE, struct drm_pl111_gem_create)
+/****************************************************************************/
+
+#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb))
+
+#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
+
+#define PL111_BO_FROM_GEM(gem_obj) \
+	container_of(gem_obj, struct pl111_gem_bo, gem_object)
+
+#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
+
+#define PL111_ENCODER_FROM_ENCODER(x) \
+	container_of(x, struct pl111_drm_encoder, encoder)
+
+#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
+	container_of(x, struct pl111_drm_connector, connector)
+
+#include "pl111_drm_funcs.h"
+
+#endif /* _PL111_DRM_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
new file mode 100755
index 0000000..c7c3a22
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
@@ -0,0 +1,170 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+
+static struct {
+	int w, h, type;
+} pl111_drm_modes[] = {
+	{ 640, 480,  DRM_MODE_TYPE_PREFERRED},
+	{ 800, 600,  0},
+	{1024, 768,  0},
+	{  -1,  -1, -1}
+};
+
+void pl111_connector_destroy(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+				PL111_CONNECTOR_FROM_CONNECTOR(connector);
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(pl111_connector);
+}
+
+enum drm_connector_status pl111_connector_detect(struct drm_connector
+							*connector, bool force)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return connector_status_connected;
+}
+
+void pl111_connector_dpms(struct drm_connector *connector, int mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+}
+
+struct drm_encoder *
+pl111_connector_helper_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	if (connector->encoder != NULL) {
+		return connector->encoder; /* Return attached encoder */
+	} else {
+		/*
+		 * If there is no attached encoder we choose the best candidate
+		 * from the list.
+		 * For PL111 there is only one encoder so we return the first
+		 * one we find.
+		 * Other h/w would require a suitable criterion below.
+		 */
+		struct drm_encoder *encoder = NULL;
+		struct drm_device *dev = connector->dev;
+
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
+					head) {
+			if (1) { /* criterion ? */
+				break;
+			}
+		}
+		return encoder; /* return best candidate encoder */
+	}
+}
+
+int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+	int i = 0;
+	int count = 0;
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	while (pl111_drm_modes[i].w != -1) {
+		struct drm_display_mode *mode =
+				drm_mode_find_dmt(connector->dev,
+						pl111_drm_modes[i].w,
+						pl111_drm_modes[i].h,
+						60
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+						, false
+#endif
+						);
+
+		if (mode != NULL) {
+			mode->type |= pl111_drm_modes[i].type;
+			drm_mode_probed_add(connector, mode);
+			count++;
+		}
+
+		i++;
+	}
+
+	DRM_DEBUG_KMS("found %d modes\n", count);
+
+	return count;
+}
+
+int pl111_connector_helper_mode_valid(struct drm_connector *connector,
+					struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return MODE_OK;
+}
+
+const struct drm_connector_funcs connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = pl111_connector_destroy,
+	.detect = pl111_connector_detect,
+	.dpms = pl111_connector_dpms,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = pl111_connector_helper_get_modes,
+	.mode_valid = pl111_connector_helper_mode_valid,
+	.best_encoder = pl111_connector_helper_best_encoder,
+};
+
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
+{
+	struct pl111_drm_connector *pl111_connector;
+
+	pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
+					GFP_KERNEL);
+
+	if (pl111_connector == NULL) {
+		pr_err("Failed to allocated pl111_drm_connector\n");
+		return NULL;
+	}
+
+	drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
+				DRM_MODE_CONNECTOR_DVII);
+
+	drm_connector_helper_add(&pl111_connector->connector,
+					&connector_helper_funcs);
+
+	drm_sysfs_connector_add(&pl111_connector->connector);
+
+	return pl111_connector;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
new file mode 100755
index 0000000..ede07ff
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
@@ -0,0 +1,449 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_crtc.c
+ * Implementation of the CRTC functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static int pl111_crtc_num;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
+{
+	struct drm_device *dev = pl111_crtc->crtc.dev;
+	struct pl111_drm_flip_resource *old_flip_res;
+	struct pl111_gem_bo *bo;
+	unsigned long irq_flags;
+	int flips_in_flight;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+
+	/*
+	 * Cache the flip resource that caused the IRQ since it will be
+	 * dispatched later. Early return if the IRQ isn't associated to
+	 * a base register update.
+	 * 
+	 * TODO MIDBASE-2790: disable IRQs when a flip is not pending.
+	 */
+	old_flip_res = pl111_crtc->current_update_res;
+	if (!old_flip_res) {
+		spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+		return;
+	}
+	pl111_crtc->current_update_res = NULL;
+
+	/* Prepare the next flip (if any) of the queue as soon as possible. */
+	if (!list_empty(&pl111_crtc->update_queue)) {
+		struct pl111_drm_flip_resource *flip_res;
+		/* Remove the head of the list */
+		flip_res = list_first_entry(&pl111_crtc->update_queue,
+			struct pl111_drm_flip_resource, link);
+		list_del(&flip_res->link);
+		do_flip_to_res(flip_res);
+		/*
+		 * current_update_res will be set, so guarentees that
+		 * another flip_res coming in gets queued instead of
+		 * handled immediately
+		 */
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	/* Finalize properly the flip that caused the IRQ */
+	DRM_DEBUG_KMS("DRM Finalizing old_flip_res=%p\n", old_flip_res);
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(old_flip_res->fb);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	release_kds_resource_and_display(old_flip_res);
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	/* Release DMA buffer on this flip */
+
+	if (bo->gem_object.export_dma_buf != NULL)
+		dma_buf_put(bo->gem_object.export_dma_buf);
+
+	drm_handle_vblank(dev, pl111_crtc->crtc_index);
+
+	/* Wake up any processes waiting for page flip event */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (old_flip_res->event) {
+		spin_lock_bh(&dev->event_lock);
+		drm_send_vblank_event(dev, pl111_crtc->crtc_index,
+					old_flip_res->event);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#else
+	if (old_flip_res->event) {
+		struct drm_pending_vblank_event *e = old_flip_res->event;
+		struct timeval now;
+		unsigned int seq;
+
+		DRM_DEBUG_KMS("%s: wake up page flip event (%p)\n", __func__,
+				old_flip_res->event);
+
+		spin_lock_bh(&dev->event_lock);
+		seq = drm_vblank_count_and_time(dev, pl111_crtc->crtc_index,
+							&now);
+		e->pipe = pl111_crtc->crtc_index;
+		e->event.sequence = seq;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_add_tail(&e->base.link,
+			&e->base.file_priv->event_list);
+
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#endif
+
+	drm_vblank_put(dev, pl111_crtc->crtc_index);
+
+	/*
+	 * workqueue.c:process_one_work():
+	 * "It is permissible to free the struct work_struct from
+	 *  inside the function that is called from it"
+	 */
+	kmem_cache_free(priv.page_flip_slab, old_flip_res);
+
+	flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
+	if (flips_in_flight == 0 ||
+			flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
+		wake_up(&priv.wait_for_flips);
+
+	DRM_DEBUG_KMS("DRM release flip_res=%p\n", old_flip_res);
+}
+
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
+{
+	struct pl111_drm_flip_resource *flip_res = cb1;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	pl111_crtc->show_framebuffer_cb(cb1, cb2);
+}
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb, bool page_flip,
+				struct drm_pending_vblank_event *event)
+{
+	struct pl111_gem_bo *bo;
+	struct pl111_drm_flip_resource *flip_res;
+	int flips_in_flight;
+	int old_flips_in_flight;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	crtc->fb = fb;
+#else
+	crtc->primary->fb = fb;
+#endif
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	if (bo == NULL) {
+		DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
+		return -EINVAL;
+	}
+
+	/* If this is a full modeset, wait for all outstanding flips to complete
+	 * before continuing. This avoids unnecessary complication from being
+	 * able to queue up multiple modesets and queues of mixed modesets and
+	 * page flips.
+	 *
+	 * Modesets should be uncommon and will not be performant anyway, so
+	 * making them synchronous should have negligible performance impact.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				atomic_read(&priv.nr_flips_in_flight) == 0);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * There can be more 'early display' flips in flight than there are
+	 * buffers, and there is (currently) no explicit bound on the number of
+	 * flips. Hence, we need a new allocation for each one.
+	 *
+	 * Note: this could be optimized down if we knew a bound on the flips,
+	 * since an application can only have so many buffers in flight to be
+	 * useful/not hog all the memory
+	 */
+	flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
+	if (flip_res == NULL) {
+		pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * increment flips in flight, whilst blocking when we reach
+	 * NR_FLIPS_IN_FLIGHT_THRESHOLD
+	 */
+	do {
+		/*
+		 * Note: use of assign-and-then-compare in the condition to set
+		 * flips_in_flight
+		 */
+		int ret = wait_event_killable(priv.wait_for_flips,
+				(flips_in_flight =
+					atomic_read(&priv.nr_flips_in_flight))
+				< NR_FLIPS_IN_FLIGHT_THRESHOLD);
+		if (ret != 0) {
+			kmem_cache_free(priv.page_flip_slab, flip_res);
+			return ret;
+		}
+
+		old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
+					flips_in_flight, flips_in_flight + 1);
+	} while (old_flips_in_flight != flips_in_flight);
+
+	flip_res->fb = fb;
+	flip_res->crtc = crtc;
+	flip_res->page_flip = page_flip;
+	flip_res->event = event;
+	INIT_LIST_HEAD(&flip_res->link);
+	DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+		unsigned long shared[1] = { 0 };
+		struct kds_resource *resource_list[1] = {
+				get_dma_buf_kds_resource(buf) };
+		int err;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+
+		/* Wait for the KDS resource associated with this buffer */
+		err = kds_async_waitall(&flip_res->kds_res_set,
+					&priv.kds_cb, flip_res, fb, 1, shared,
+					resource_list);
+		BUG_ON(err);
+	} else {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+
+		/* No dma-buf attached so just call the callback directly */
+		flip_res->kds_res_set = NULL;
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#else
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+	} else {
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+	}
+
+	/* No dma-buf attached to this so just call the callback directly */
+	{
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#endif
+
+	/* For the same reasons as the wait at the start of this function,
+	 * wait for the modeset to complete before continuing.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				flips_in_flight == 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+			struct drm_pending_vblank_event *event)
+#else
+			struct drm_pending_vblank_event *event,
+			uint32_t flags)
+#endif
+{
+	DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
+			__func__, crtc, fb, event);
+	return show_framebuffer_on_crtc(crtc, fb, true, event);
+}
+
+int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode,
+				int x, int y, struct drm_framebuffer *old_fb)
+{
+	int ret;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+	struct drm_display_mode *duplicated_mode;
+
+	DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
+			adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel);
+#else
+			crtc->primary->fb->bits_per_pixel);
+#endif
+
+	duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
+	if (!duplicated_mode)
+		return -ENOMEM;
+
+	pl111_crtc->new_mode = duplicated_mode;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
+#else
+	ret = show_framebuffer_on_crtc(crtc, crtc->primary->fb, false, NULL);
+#endif
+	if (ret != 0) {
+		pl111_crtc->new_mode = pl111_crtc->current_mode;
+		drm_mode_destroy(crtc->dev, duplicated_mode);
+	}
+
+	return ret;
+}
+
+void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+void pl111_crtc_helper_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+				const struct drm_display_mode *mode,
+#else
+				struct drm_display_mode *mode,
+#endif
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+#ifdef CONFIG_ARCH_VEXPRESS
+	/*
+	 * 1024x768 with more than 16 bits per pixel may not work 
+	 * correctly on Versatile Express due to bandwidth issues
+	 */
+	if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel > 16) {
+#else
+			crtc->primary->fb->bits_per_pixel > 16) {
+#endif
+		DRM_INFO("*WARNING* 1024x768 at > 16 bpp may suffer corruption\n");
+	}
+#endif
+
+	return true;
+}
+
+void pl111_crtc_helper_disable(struct drm_crtc *crtc)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	/* don't disable crtc until no flips in flight as irq will be disabled */
+	ret = wait_event_killable(priv.wait_for_flips, atomic_read(&priv.nr_flips_in_flight) == 0);
+	if(ret) {
+		pr_err("pl111_crtc_helper_disable failed\n");
+		return;
+	}
+	clcd_disable(crtc);
+}
+
+void pl111_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(pl111_crtc);
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+	.cursor_set = pl111_crtc_cursor_set,
+	.cursor_move = pl111_crtc_cursor_move,
+	.set_config = drm_crtc_helper_set_config,
+	.page_flip = pl111_crtc_page_flip,
+	.destroy = pl111_crtc_destroy
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+	.mode_set = pl111_crtc_helper_mode_set,
+	.prepare = pl111_crtc_helper_prepare,
+	.commit = pl111_crtc_helper_commit,
+	.mode_fixup = pl111_crtc_helper_mode_fixup,
+	.disable = pl111_crtc_helper_disable,
+};
+
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
+{
+	struct pl111_drm_crtc *pl111_crtc;
+
+	pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
+	if (pl111_crtc == NULL) {
+		pr_err("Failed to allocated pl111_drm_crtc\n");
+		return NULL;
+	}
+
+	drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
+	drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
+
+	pl111_crtc->crtc_index = pl111_crtc_num;
+	pl111_crtc_num++;
+	pl111_crtc->crtc.enabled = 0;
+	pl111_crtc->last_bpp = 0;
+	pl111_crtc->current_update_res = NULL;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	pl111_crtc->displaying_fb = NULL;
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_lock_init(&pl111_crtc->current_displaying_lock);
+#endif
+	pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
+	INIT_LIST_HEAD(&pl111_crtc->update_queue);
+	spin_lock_init(&pl111_crtc->base_update_lock);
+
+	return pl111_crtc;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
new file mode 100755
index 0000000..4bf20fe
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
@@ -0,0 +1,331 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_cursor.c
+ * Implementation of cursor functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_clcd_ext.h"
+#include "pl111_drm.h"
+
+#define PL111_MAX_CURSOR_WIDTH (64)
+#define PL111_MAX_CURSOR_HEIGHT (64)
+
+#define ARGB_2_LBBP_BINARY_THRESHOLD	(1 << 7)
+#define ARGB_ALPHA_SHIFT		24
+#define ARGB_ALPHA_MASK			(0xff << ARGB_ALPHA_SHIFT)
+#define ARGB_RED_SHIFT			16
+#define ARGB_RED_MASK			(0xff << ARGB_RED_SHIFT)
+#define ARGB_GREEN_SHIFT		8
+#define ARGB_GREEN_MASK			(0xff << ARGB_GREEN_SHIFT)
+#define ARGB_BLUE_SHIFT			0
+#define ARGB_BLUE_MASK			(0xff << ARGB_BLUE_SHIFT)
+
+
+void pl111_set_cursor_size(enum pl111_cursor_size size)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (size == CURSOR_64X64)
+		reg_data |= CRSR_CONFIG_CRSR_SIZE;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (sync == CURSOR_SYNC_VSYNC)
+		reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor(u32 cursor)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
+	reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_enable(bool enable)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	if (enable)
+		reg_data |= CRSR_CTRL_CRSR_ON;
+	else
+		reg_data &= ~CRSR_CTRL_CRSR_ON;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_position(u32 x, u32 y)
+{
+	u32 reg_data = (x & CRSR_XY_MASK) |
+			((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_XY);
+}
+
+void pl111_set_cursor_clipping(u32 x, u32 y)
+{
+	u32 reg_data;
+
+	/* 
+	 * Do not allow setting clipping values larger than
+	 * the cursor size since the cursor is already fully hidden
+	 * when x,y = PL111_MAX_CURSOR_WIDTH.
+	 */
+	if (x > PL111_MAX_CURSOR_WIDTH)
+		x = PL111_MAX_CURSOR_WIDTH;
+	if (y > PL111_MAX_CURSOR_WIDTH)
+		y = PL111_MAX_CURSOR_WIDTH;
+
+	reg_data = (x & CRSR_CLIP_MASK) |
+			((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
+}
+
+void pl111_set_cursor_palette(u32 color0, u32 color1)
+{
+	writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
+	writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
+}
+
+void pl111_cursor_enable(void)
+{
+	pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
+	pl111_set_cursor_size(CURSOR_64X64);
+	pl111_set_cursor_palette(0x0, 0x00ffffff);
+	pl111_set_cursor_enable(true);
+}
+
+void pl111_cursor_disable(void)
+{
+	pl111_set_cursor_enable(false);
+}
+
+/* shift required to locate pixel into the correct position in
+ * a cursor LBBP word, indexed by x mod 16.
+ */
+static const unsigned char
+x_mod_16_to_value_shift[CLCD_CRSR_IMAGE_PIXELS_PER_WORD] = {
+	6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24
+};
+
+/* Pack the pixel value into its correct position in the buffer as specified
+ * for LBBP */
+static inline void
+set_lbbp_pixel(uint32_t *buffer, unsigned int x, unsigned int y,
+				uint32_t value)
+{
+	u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
+	uint32_t shift;
+	uint32_t data;
+
+	shift = x_mod_16_to_value_shift[x % CLCD_CRSR_IMAGE_PIXELS_PER_WORD];
+
+	/* Get the word containing this pixel */
+	cursor_ram = cursor_ram + (x >> CLCD_CRSR_IMAGE_WORDS_PER_LINE) + (y << 2);
+
+	/* Update pixel in cursor RAM */
+	data = readl(cursor_ram);
+	data &= ~(CLCD_CRSR_LBBP_COLOR_MASK << shift);
+	data |= value << shift;
+	writel(data, cursor_ram);
+}
+
+static u32 pl111_argb_to_lbbp(u32 argb_pix)
+{
+	u32 lbbp_pix = CLCD_CRSR_LBBP_TRANSPARENT;
+	u32 alpha = (argb_pix & ARGB_ALPHA_MASK) >> ARGB_ALPHA_SHIFT;
+	u32 red = (argb_pix & ARGB_RED_MASK) >> ARGB_RED_SHIFT;
+	u32 green = (argb_pix & ARGB_GREEN_MASK) >> ARGB_GREEN_SHIFT;
+	u32 blue =  (argb_pix & ARGB_BLUE_MASK) >> ARGB_BLUE_SHIFT;
+
+	/*
+	 * Converting from 8 pixel transparency to binary transparency
+	 * it's the best we can achieve.
+	 */
+	if (alpha & ARGB_2_LBBP_BINARY_THRESHOLD) {
+		u32 gray, max, min;
+
+		/*
+		 * Convert to gray using the lightness method:
+		 * gray = [max(R,G,B) + min(R,G,B)]/2
+		 */
+		min = min(red, green);
+		min = min(min, blue);
+		max = max(red, green);
+		max = max(max, blue);
+		gray = (min + max) >> 1; /* divide by 2 */
+		/* Apply binary threshold to the gray value calculated */
+		if (gray & ARGB_2_LBBP_BINARY_THRESHOLD)
+			lbbp_pix = CLCD_CRSR_LBBP_FOREGROUND;
+		else
+			lbbp_pix = CLCD_CRSR_LBBP_BACKGROUND;
+	}
+
+	return lbbp_pix;
+}
+
+/*
+ * The PL111 hardware cursor supports only LBBP which is a 2bpp format but
+ * the cursor format from userspace is ARGB8888 so we need to convert
+ *  to LBBP here.
+ */
+static void pl111_set_cursor_image(u32 *data)
+{
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+	/* Add 1 on width to insert trailing NULL */
+	char string_cursor[PL111_MAX_CURSOR_WIDTH + 1];
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	unsigned int x;
+	unsigned int y;
+
+	for (y = 0; y < PL111_MAX_CURSOR_HEIGHT; y++) {
+		for (x = 0; x < PL111_MAX_CURSOR_WIDTH; x++) {
+			u32 value = pl111_argb_to_lbbp(*data);
+
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+			if (value == CLCD_CRSR_LBBP_TRANSPARENT)
+				string_cursor[x] = 'T';
+			else if (value == CLCD_CRSR_LBBP_FOREGROUND)
+				string_cursor[x] = 'F';
+			else if (value == CLCD_CRSR_LBBP_INVERSE)
+				string_cursor[x] = 'I';
+			else
+				string_cursor[x] = 'B';
+
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+			set_lbbp_pixel(data, x, y, value);
+			++data;
+		}
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+		string_cursor[PL111_MAX_CURSOR_WIDTH] = '\0';
+		DRM_INFO("%s\n", string_cursor);
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	}
+}
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height)
+{
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("handle = %u, width = %u, height = %u\n",
+		handle, width, height);
+
+	if (!handle) {
+		pl111_cursor_disable();
+		return 0;
+	}
+
+	if ((width != PL111_MAX_CURSOR_WIDTH) ||
+	    (height != PL111_MAX_CURSOR_HEIGHT))
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object for handle = %d\n",
+			  handle);
+		return -ENOENT;
+	}
+
+	/*
+	 * We expect a PL111_MAX_CURSOR_WIDTH x PL111_MAX_CURSOR_HEIGHT
+	 * ARGB888 buffer object in the input.
+	 *
+	 */
+	if (obj->size < (PL111_MAX_CURSOR_WIDTH * PL111_MAX_CURSOR_HEIGHT * 4)) {
+		DRM_ERROR("Cannot set cursor with an obj size = %d\n",
+			  obj->size);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	bo = PL111_BO_FROM_GEM(obj);
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Tried to set cursor with non DMA backed obj = %p\n",
+			  obj);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
+
+	/*
+	 * Since we copy the contents of the buffer to the HW cursor internal
+	 * memory this GEM object is not needed anymore.
+	 */
+	drm_gem_object_unreference_unlocked(obj);
+
+	pl111_cursor_enable();
+
+	return 0;
+}
+
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y)
+{
+	int x_clip = 0;
+	int y_clip = 0;
+
+	DRM_DEBUG("x %d y %d\n", x, y);
+
+	/*
+	 * The cursor image is clipped automatically at the screen limits when
+	 * it extends beyond the screen image to the right or bottom but
+	 * we must clip it using pl111 HW features for negative values.
+	 */
+	if (x < 0) {
+		x_clip = -x;
+		x = 0;
+	}
+	if (y < 0) {
+		y_clip = -y;
+		y = 0;
+	}
+
+	pl111_set_cursor_clipping(x_clip, y_clip);
+	pl111_set_cursor_position(x, y);
+
+	return 0;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
new file mode 100755
index 0000000..6619c07
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
@@ -0,0 +1,338 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_device.c
+ * Implementation of the Linux device driver entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+struct pl111_drm_dev_private priv;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void initial_kds_obtained(void *cb1, void *cb2)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
+	bool *cb_has_called = (bool *) cb2;
+
+	*cb_has_called = true;
+	wake_up(wait);
+}
+
+/* Must be called from within current_displaying_lock spinlock */
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	pl111_crtc->displaying_fb = flip_res->fb;
+
+	/* Release the previous buffer */
+	if (pl111_crtc->old_kds_res_set != NULL) {
+		/*
+		 * Can flip to the same buffer, but must not release the current
+		 * resource set
+		 */
+		BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+	}
+	/* Record the current buffer, to release on the next buffer flip */
+	pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
+}
+#endif
+
+void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+void pl111_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+/*
+ * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
+ * and disable_vblank are just no op callbacks.
+ */
+static int pl111_enable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+	return 0;
+}
+
+static void pl111_disable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+}
+
+struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = pl111_fb_create,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	struct pl111_drm_connector *pl111_connector;
+	struct pl111_drm_encoder *pl111_encoder;
+	int ret = 0;
+
+	if (priv == NULL)
+		return -EINVAL;
+
+	drm_mode_config_init(dev);
+	mode_config = &dev->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 1024;
+	mode_config->min_height = 1;
+	mode_config->max_height = 768;
+
+	priv->pl111_crtc = pl111_crtc_create(dev);
+	if (priv->pl111_crtc == NULL) {
+		pr_err("Failed to create pl111_drm_crtc\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	priv->number_crtcs = 1;
+
+	pl111_connector = pl111_connector_create(dev);
+	if (pl111_connector == NULL) {
+		pr_err("Failed to create pl111_drm_connector\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	pl111_encoder = pl111_encoder_create(dev, 1);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to create pl111_drm_encoder\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
+						&pl111_encoder->encoder);
+	if (ret != 0) {
+		DRM_ERROR("Failed to attach encoder\n");
+		goto out_config;
+	}
+
+	pl111_connector->connector.encoder = &pl111_encoder->encoder;
+
+	pl111_encoder->encoder.crtc = &priv->pl111_crtc->crtc;
+
+	goto finish;
+
+out_config:
+	drm_mode_config_cleanup(dev);
+finish:
+	DRM_DEBUG("%s returned %d\n", __func__, ret);
+	return ret;
+}
+
+static void pl111_modeset_fini(struct drm_device *dev)
+{
+	drm_mode_config_cleanup(dev);
+}
+
+static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
+{
+	int ret = 0;
+
+	pr_info("DRM %s\n", __func__);
+
+	mutex_init(&priv.export_dma_buf_lock);
+	atomic_set(&priv.nr_flips_in_flight, 0);
+	init_waitqueue_head(&priv.wait_for_flips);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
+	if (ret != 0) {
+		pr_err("Failed to initialise KDS callback\n");
+		goto finish;
+	}
+
+	ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
+				initial_kds_obtained);
+	if (ret != 0) {
+		pr_err("Failed to init KDS obtain callback\n");
+		kds_callback_term(&priv.kds_cb);
+		goto finish;
+	}
+#endif
+
+	/* Create a cache for page flips */
+	priv.page_flip_slab = kmem_cache_create("page flip slab",
+			sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
+	if (priv.page_flip_slab == NULL) {
+		DRM_ERROR("Failed to create slab\n");
+		ret = -ENOMEM;
+		goto out_kds_callbacks;
+	}
+
+	dev->dev_private = &priv;
+
+	ret = pl111_modeset_init(dev);
+	if (ret != 0) {
+		pr_err("Failed to init modeset\n");
+		goto out_slab;
+	}
+
+	ret = pl111_device_init(dev);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init MMIO and IRQ\n");
+		goto out_modeset;
+	}
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init vblank\n");
+		goto out_vblank;
+	}
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 13, 0))
+	platform_set_drvdata(dev->platformdev, dev);
+#endif
+
+	goto finish;
+
+out_vblank:
+	pl111_device_fini(dev);
+out_modeset:
+	pl111_modeset_fini(dev);
+out_slab:
+	kmem_cache_destroy(priv.page_flip_slab);
+out_kds_callbacks:
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+finish:
+	DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
+	return ret;
+}
+
+static int pl111_drm_unload(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+	kmem_cache_destroy(priv.page_flip_slab);
+
+	drm_vblank_cleanup(dev);
+	pl111_modeset_fini(dev);
+	pl111_device_fini(dev);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+	return 0;
+}
+
+static struct vm_operations_struct pl111_gem_vm_ops = {
+	.fault = pl111_gem_fault,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+#else
+	.open = pl111_gem_vm_open,
+	.close = pl111_gem_vm_close,
+#endif
+};
+
+static const struct file_operations drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = pl111_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+};
+
+static struct drm_ioctl_desc pl111_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(PL111_GEM_CREATE, pl111_drm_gem_create_ioctl,
+		DRM_CONTROL_ALLOW | DRM_UNLOCKED),
+};
+
+static struct drm_driver driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.load = pl111_drm_load,
+	.unload = pl111_drm_unload,
+	.context_dtor = NULL,
+	.preclose = pl111_drm_preclose,
+	.lastclose = pl111_drm_lastclose,
+	.suspend = pl111_drm_suspend,
+	.resume = pl111_drm_resume,
+	.get_vblank_counter = drm_vblank_count,
+	.enable_vblank = pl111_enable_vblank,
+	.disable_vblank = pl111_disable_vblank,
+	.ioctls = pl111_ioctls,
+	.fops = &drm_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+	.dumb_create = pl111_dumb_create,
+	.dumb_destroy = pl111_dumb_destroy,
+	.dumb_map_offset = pl111_dumb_map_offset,
+	.gem_free_object = pl111_gem_free_object,
+	.gem_vm_ops = &pl111_gem_vm_ops,
+	.prime_handle_to_fd = &pl111_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = &pl111_gem_prime_export,
+	.gem_prime_import = &pl111_gem_prime_import,
+};
+
+int pl111_drm_init(struct platform_device *dev)
+{
+	int ret;
+	pr_info("DRM %s\n", __func__);
+	pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
+		DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
+	driver.num_ioctls = ARRAY_SIZE(pl111_ioctls);
+	ret = 0;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 15, 0))
+	driver.kdriver.platform_device = dev;
+#endif
+	return drm_platform_init(&driver, dev);
+
+}
+
+void pl111_drm_exit(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_platform_exit(&driver, dev);
+#else
+	drm_put_dev(platform_get_drvdata(dev));
+#endif
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
new file mode 100755
index 0000000..1131f46
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
@@ -0,0 +1,625 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_dma_buf.c
+ * Implementation of the dma_buf functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void obtain_kds_if_currently_displayed(struct drm_device *dev,
+						struct pl111_gem_bo *bo,
+						struct dma_buf *dma_buf)
+{
+	unsigned long shared[1] = { 0 };
+	struct kds_resource *resource_list[1];
+	struct kds_resource_set *kds_res_set;
+	struct drm_crtc *crtc;
+	bool cb_has_called = false;
+	unsigned long flags;
+	int err;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	DRM_DEBUG_KMS("Obtaining initial KDS res for bo:%p dma_buf:%p\n",
+			bo, dma_buf);
+
+	resource_list[0] = get_dma_buf_kds_resource(dma_buf);
+	get_dma_buf(dma_buf);
+
+	/*
+	 * Can't use kds_waitall(), because kbase will be let through due to
+	 * locked ignore'
+	 */
+	err = kds_async_waitall(&kds_res_set,
+				&priv.kds_obtain_current_cb, &wake,
+				&cb_has_called, 1, shared, resource_list);
+	BUG_ON(err);
+	wait_event(wake, cb_has_called == true);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+
+			if (pl111_fb->bo == bo) {
+				DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+				DRM_DEBUG_KMS(" is being displayed, keeping\n");
+				/* There shouldn't be a previous buffer to release */
+				BUG_ON(pl111_crtc->old_kds_res_set);
+
+				if (kds_res_set == NULL) {
+					err = kds_async_waitall(&kds_res_set,
+							&priv.kds_obtain_current_cb,
+							&wake, &cb_has_called,
+							1, shared, resource_list);
+					BUG_ON(err);
+					wait_event(wake, cb_has_called == true);
+				}
+
+				/* Current buffer will need releasing on next flip */
+				pl111_crtc->old_kds_res_set = kds_res_set;
+
+				/*
+				* Clear kds_res_set, so a new kds_res_set is allocated
+				* for additional CRTCs
+				*/
+				kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+
+	/* kds_res_set will be NULL here if any CRTCs are displaying fb */
+	if (kds_res_set != NULL) {
+		DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+		DRM_DEBUG_KMS(" not being displayed, discarding\n");
+		/* They're not being displayed, release them */
+		kds_resource_set_release(&kds_res_set);
+	}
+
+	dma_buf_put(dma_buf);
+}
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0))
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	mutex_unlock(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#else
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/* Check for valid size. */
+	if (obj->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	BUG_ON(!dev->driver->gem_vm_ops);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+#endif
+
+	vma->vm_ops = dev->driver->gem_vm_ops;
+	vma->vm_private_data = obj;
+
+	/* Take a ref for this mapping of the object, so that the fault
+	* handler can dereference the mmap offset's pointer to the object.
+	* This reference is cleaned up by the corresponding vm_close
+	* (which should happen whether the vma was created by this call, or
+	* by a vm_open due to mremap or partial unmap or whatever).
+	*/
+	drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+	pl111_drm_vm_open_locked(dev, vma);
+#else
+	drm_vm_open_locked(dev, vma);
+#endif
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#endif /* KERNEL_VERSION */
+
+static void pl111_dma_buf_release(struct dma_buf *buf)
+{
+	/*
+	 * Need to release the dma_buf's reference on the gem object it was
+	 * exported from, and also clear the gem object's export_dma_buf
+	 * pointer to this dma_buf as it no longer exists
+	 */
+	struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
+	struct pl111_gem_bo *bo;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+			if (pl111_fb->bo == bo) {
+				kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+				pl111_crtc->old_kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+#endif
+	mutex_lock(&priv.export_dma_buf_lock);
+
+	obj->export_dma_buf = NULL;
+	drm_gem_object_unreference_unlocked(obj);
+
+	mutex_unlock(&priv.export_dma_buf_lock);
+}
+
+static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
+			dev, attach);
+
+	attach->priv = dev;
+
+	return 0;
+}
+
+static void pl111_dma_buf_detach(struct dma_buf *buf,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
+			attach);
+}
+
+/* Heavily from exynos_drm_dmabuf.c */
+static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
+						*attach,
+						enum dma_data_direction
+						direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int size, n_pages, nents;
+	struct scatterlist *s, *sg;
+	struct sg_table *sgt;
+	int ret, i;
+
+	DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+		      attach, bo);
+
+	/*
+	 * Nothing to do, if we are trying to map a dmabuf that has been imported.
+	 * Just return the existing sgt.
+	 */
+	if (obj->import_attach) {
+		BUG_ON(!bo->sgt);
+		return bo->sgt;
+	}
+
+	size = obj->size;
+	n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	if (bo->type & PL111_BOT_DMA) {
+		sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+		if (!sgt) {
+			DRM_ERROR("Failed to allocate sg_table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+		if (ret < 0) {
+			DRM_ERROR("Failed to allocate page table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		sg_dma_len(sgt->sgl) = size;
+		/* We use DMA coherent mappings for PL111_BOT_DMA so we must
+		 * use the virtual address returned at buffer allocation */
+		sg_set_buf(sgt->sgl, bo->backing_data.dma.fb_cpu_addr, size);
+		sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
+	} else { /* PL111_BOT_SHM */
+		struct page **pages;
+		int pg = 0;
+
+		mutex_lock(&dev->struct_mutex);
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev, "could not get pages: %ld\n",
+				PTR_ERR(pages));
+			return ERR_CAST(pages);
+		}
+		sgt = drm_prime_pages_to_sg(pages, n_pages);
+		if (sgt == NULL)
+			return ERR_PTR(-ENOMEM);
+
+		pl111_gem_sync_to_dma(bo);
+
+		/*
+		 * At this point the pages have been dma-mapped by either
+		 * get_pages() for non cached maps or pl111_gem_sync_to_dma()
+		 * for cached. So the physical addresses can be assigned
+		 * to the sg entries.
+		 * drm_prime_pages_to_sg() may have combined contiguous pages
+		 * into chunks so we assign the physical address of the first
+		 * page of a chunk to the chunk and check that the physical
+		 * addresses of the rest of the pages in that chunk are also
+		 * contiguous.
+		 */
+		sg = sgt->sgl;
+		nents = sgt->nents;
+
+		for_each_sg(sg, s, nents, i) {
+			int j, n_pages_in_chunk = sg_dma_len(s) >> PAGE_SHIFT;
+
+			sg_dma_address(s) = bo->backing_data.shm.dma_addrs[pg];
+
+			for (j = pg+1; j < pg+n_pages_in_chunk; j++) {
+				BUG_ON(bo->backing_data.shm.dma_addrs[j] !=
+						bo->backing_data.shm.dma_addrs[j-1]+PAGE_SIZE);
+			}
+
+			pg += n_pages_in_chunk;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+	}
+	bo->sgt = sgt;
+	return sgt;
+}
+
+static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					struct sg_table *sgt,
+					enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+			attach, bo);
+
+	sg_free_table(sgt);
+	kfree(sgt);
+	bo->sgt = NULL;
+}
+
+/*
+ * There isn't any operation here that can sleep or fail so this callback can
+ * be used for both kmap and kmap_atomic implementations.
+ */
+static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long pageno)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	void *vaddr = NULL;
+
+	/* Make sure we cannot access outside the memory range */
+	if (((pageno + 1) << PAGE_SHIFT) > bo->gem_object.size)
+		return NULL;
+
+	if (bo->type & PL111_BOT_DMA) {
+		vaddr = (bo->backing_data.dma.fb_cpu_addr +
+						(pageno << PAGE_SHIFT));
+	} else {
+		vaddr = page_address(bo->backing_data.shm.pages[pageno]);
+	}
+
+	return vaddr;
+}
+
+/*
+ * Find a scatterlist that starts in "start" and has "len"
+ * or return a NULL dma_handle.
+ */
+static dma_addr_t pl111_find_matching_sg(struct sg_table *sgt, size_t start,
+					size_t len)
+{
+	struct scatterlist *sg;
+	unsigned int count;
+	size_t size = 0;
+	dma_addr_t dma_handle = 0;
+
+	/* Find a scatterlist that starts in "start" and has "len"
+	* or return error */
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		if ((size == start) && (len == sg_dma_len(sg))) {
+			dma_handle = sg_dma_address(sg);
+			break;
+		}
+		size += sg_dma_len(sg);
+	}
+	return dma_handle;
+}
+
+static int pl111_dma_buf_begin_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return -EINVAL;
+
+	if (!(bo->type & PL111_BOT_SHM)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return -EINVAL;
+
+		dma_sync_single_range_for_cpu(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+	return 0;
+}
+
+static void pl111_dma_buf_end_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return;
+
+	if (!(bo->type & PL111_BOT_DMA)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return;
+
+		dma_sync_single_range_for_device(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+}
+
+static struct dma_buf_ops pl111_dma_buf_ops = {
+	.release = &pl111_dma_buf_release,
+	.attach = &pl111_dma_buf_attach,
+	.detach = &pl111_dma_buf_detach,
+	.map_dma_buf = &pl111_dma_buf_map_dma_buf,
+	.unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
+	.kmap_atomic = &pl111_dma_buf_kmap,
+	.kmap = &pl111_dma_buf_kmap,
+	.begin_cpu_access = &pl111_dma_buf_begin_cpu,
+	.end_cpu_access = &pl111_dma_buf_end_cpu,
+	.mmap = &pl111_dma_buf_mmap,
+};
+
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attachment;
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+	struct scatterlist *sgl;
+	struct sg_table *sgt;
+	dma_addr_t cont_phys;
+	int ret = 0;
+	int i;
+
+	DRM_DEBUG_KMS("DRM %s on dev=%p dma_buf=%p\n", __func__, dev, dma_buf);
+
+	/* is this one of own objects? */
+	if (dma_buf->ops == &pl111_dma_buf_ops) {
+		obj = dma_buf->priv;
+		/* is it from our device? */
+		if (obj->dev == dev) {
+			/*
+			* Importing dmabuf exported from our own gem increases
+			* refcount on gem itself instead of f_count of dmabuf.
+			*/
+			drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+			/* before v3.10.0 we assume the caller has taken a ref on the dma_buf
+			 * we don't want it for self-imported buffers so drop it here */
+			dma_buf_put(dma_buf);
+#endif
+
+			return obj;
+		}
+	}
+
+	attachment = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attachment))
+		return ERR_CAST(attachment);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we assume the caller has not taken a ref so we take one here */
+	get_dma_buf(dma_buf);
+#endif
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto err_buf_detach;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		DRM_ERROR("%s: failed to allocate buffer object.\n", __func__);
+		ret = -ENOMEM;
+		goto err_unmap_attach;
+	}
+
+	/* Find out whether the buffer is contiguous or not */
+	sgl = sgt->sgl;
+	cont_phys = sg_phys(sgl);
+	bo->type |= PL111_BOT_DMA;
+	for_each_sg(sgt->sgl, sgl, sgt->nents, i) {
+		dma_addr_t real_phys = sg_phys(sgl);
+		if (real_phys != cont_phys) {
+			bo->type &= ~PL111_BOT_DMA;
+			break;
+		}
+		cont_phys += (PAGE_SIZE - sgl->offset);
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	ret = drm_gem_private_object_init(dev, &bo->gem_object,
+					  dma_buf->size);
+	if (ret != 0) {
+		DRM_ERROR("DRM could not import DMA GEM obj\n");
+		goto err_free_buffer;
+	}
+#else
+	drm_gem_private_object_init(dev, &bo->gem_object, dma_buf->size);
+#endif
+
+	if (bo->type & PL111_BOT_DMA) {
+		bo->backing_data.dma.fb_cpu_addr = sg_virt(sgt->sgl);
+		bo->backing_data.dma.fb_dev_addr = sg_phys(sgt->sgl);
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, contiguous import\n", __func__, bo);
+	} else { /* PL111_BOT_SHM */
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, non contiguous import\n", __func__, bo);
+	}
+
+	bo->gem_object.import_attach = attachment;
+	bo->sgt = sgt;
+
+	return &bo->gem_object;
+
+err_free_buffer:
+	kfree(bo);
+	bo = NULL;
+err_unmap_attach:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+err_buf_detach:
+	dma_buf_detach(dma_buf, attachment);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we will have taken a ref so drop it here */
+	dma_buf_put(dma_buf);
+#endif
+	return ERR_PTR(ret);
+}
+
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				      struct drm_gem_object *obj, int flags)
+{
+	struct dma_buf *new_buf;
+	struct pl111_gem_bo *bo;
+	size_t size;
+
+	DRM_DEBUG("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
+	size = obj->size;
+
+	new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
+					flags | O_RDWR);
+	bo = PL111_BO_FROM_GEM(new_buf->priv);
+
+	/*
+	 * bo->gem_object.export_dma_buf not setup until after gem_prime_export
+	 * finishes
+	 */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Ensure that we hold the kds resource if it's the currently
+	 * displayed buffer.
+	 */
+	obtain_kds_if_currently_displayed(dev, bo, new_buf);
+#endif
+
+	DRM_DEBUG("Created dma_buf %p\n", new_buf);
+
+	return new_buf;
+}
+
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+				uint32_t handle, uint32_t flags, int *prime_fd)
+{
+	int result;
+	/*
+	 * This will re-use any existing exports, and calls
+	 * driver->gem_prime_export to do the first export when needed
+	 */
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	mutex_lock(&priv.export_dma_buf_lock);
+	result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
+						prime_fd);
+	mutex_unlock(&priv.export_dma_buf_lock);
+
+	return result;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
new file mode 100755
index 0000000..78c91c0
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_encoder.c
+ * Implementation of the encoder functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+	return true;
+}
+
+void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_commit(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_disable(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct pl111_drm_encoder *pl111_encoder =
+					PL111_ENCODER_FROM_ENCODER(encoder);
+
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(pl111_encoder);
+}
+
+const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = pl111_encoder_destroy,
+};
+
+const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.mode_fixup = pl111_encoder_helper_mode_fixup,
+	.prepare = pl111_encoder_helper_prepare,
+	.commit = pl111_encoder_helper_commit,
+	.mode_set = pl111_encoder_helper_mode_set,
+	.disable = pl111_encoder_helper_disable,
+};
+
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs)
+{
+	struct pl111_drm_encoder *pl111_encoder;
+
+	pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to allocated pl111_drm_encoder\n");
+		return NULL;
+	}
+
+	drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
+				DRM_MODE_ENCODER_DAC);
+
+	drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
+
+	pl111_encoder->encoder.possible_crtcs = possible_crtcs;
+
+	return pl111_encoder;
+}
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
new file mode 100755
index 0000000..f575c9e
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
@@ -0,0 +1,202 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_fb.c
+ * Implementation of the framebuffer functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_crtc.h>
+#include "pl111_drm.h"
+
+static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct pl111_drm_framebuffer *pl111_fb;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
+
+	pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
+
+	/*
+	 * Because flips are deferred, wait for all previous flips to complete
+	 */
+	wait_event(priv.wait_for_flips,
+			atomic_read(&priv.nr_flips_in_flight) == 0);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Release KDS resources if it's currently being displayed. Only occurs
+	 * when the last framebuffer is destroyed.
+	 */
+	list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb == framebuffer) {
+			/* Release the current buffers */
+			if (pl111_crtc->old_kds_res_set != NULL) {
+				DRM_DEBUG_KMS("Releasing KDS resources for ");
+				DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
+				kds_resource_set_release(
+					&pl111_crtc->old_kds_res_set);
+			}
+			pl111_crtc->old_kds_res_set = NULL;
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock,
+								flags);
+	}
+#endif
+	drm_framebuffer_cleanup(framebuffer);
+
+	if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
+		drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
+
+	kfree(pl111_fb);
+
+	DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
+}
+
+static int pl111_fb_create_handle(struct drm_framebuffer *fb,
+				struct drm_file *file_priv,
+				unsigned int *handle)
+{
+	struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
+
+	if (bo == NULL)
+		return -EINVAL;
+
+	return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+}
+
+const struct drm_framebuffer_funcs fb_funcs = {
+	.destroy = pl111_fb_destroy,
+	.create_handle = pl111_fb_create_handle,
+};
+
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct pl111_drm_framebuffer *pl111_fb = NULL;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_gem_object *gem_obj;
+	struct pl111_gem_bo *bo;
+	int err = 0;
+	size_t min_size;
+	int bpp;
+	int depth;
+
+	pr_info("DRM %s\n", __func__);
+	gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (gem_obj == NULL) {
+		DRM_ERROR("Could not get gem obj from handle to create fb\n");
+		err = -ENOENT;
+		goto error;
+	}
+
+	bo = PL111_BO_FROM_GEM(gem_obj);
+	drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp);
+
+	if (mode_cmd->pitches[0] < mode_cmd->width * (bpp >> 3)) {
+		DRM_ERROR("bad pitch %u for plane 0\n", mode_cmd->pitches[0]);
+		err = -EINVAL;
+		goto error;
+	}
+
+	min_size = (mode_cmd->height - 1) * mode_cmd->pitches[0]
+				+ mode_cmd->width * (bpp >> 3);
+
+	if (bo->gem_object.size < min_size) {
+		DRM_ERROR("gem obj size < min size\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* We can't scan out SHM so we can't create an fb for it */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Can't create FB for non-scanout buffer\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	switch ((char)(mode_cmd->pixel_format & 0xFF)) {
+	case 'Y':
+	case 'U':
+	case 'V':
+	case 'N':
+	case 'T':
+		DRM_ERROR("YUV formats not supported\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
+	if (pl111_fb == NULL) {
+		DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
+		err = -ENOMEM;
+		goto error;
+	}
+	fb = &pl111_fb->fb;
+
+	err = drm_framebuffer_init(dev, fb, &fb_funcs);
+	if (err) {
+		DRM_ERROR("drm_framebuffer_init failed\n");
+		kfree(fb);
+		fb = NULL;
+		goto error;
+	}
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	/* The only framebuffer formats supported by pl111
+	 * are 16 bpp or 32 bpp with 24 bit depth.
+	 * See clcd_enable()
+	 */
+	if (!((fb->bits_per_pixel == 16) ||
+		  (fb->bits_per_pixel == 32 && fb->depth == 24))) {
+		DRM_DEBUG_KMS("unsupported pixel format bpp=%d, depth=%d\n", fb->bits_per_pixel, fb->depth);
+		drm_framebuffer_cleanup(fb);
+		kfree(fb);
+		fb = NULL;
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb->bo = bo;
+
+	DRM_DEBUG_KMS("Created fb 0x%p with gem_obj 0x%p physaddr=0x%.8x\n",
+			fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
+
+	return fb;
+
+error:
+	if (gem_obj != NULL)
+		drm_gem_object_unreference_unlocked(gem_obj);
+
+	return ERR_PTR(err);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
new file mode 100755
index 0000000..494baa0
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_funcs.h
+ * Function prototypes for PL111 DRM
+ */
+
+#ifndef PL111_DRM_FUNCS_H_
+#define PL111_DRM_FUNCS_H_
+
+/* Platform Initialisation */
+int pl111_drm_init(struct platform_device *dev);
+void pl111_drm_exit(struct platform_device *dev);
+
+/* KDS Callbacks */
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
+
+/* CRTC Functions */
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
+struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
+void pl111_crtc_destroy(struct drm_crtc *crtc);
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+					struct drm_framebuffer *fb);
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+			struct drm_framebuffer *fb, bool page_flip,
+			struct drm_pending_vblank_event *event);
+
+/* Common IRQ handler */
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height);
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y);
+
+/* Connector Functions */
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
+void pl111_connector_destroy(struct drm_connector *connector);
+struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
+								*dev);
+
+/* Encoder Functions */
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs);
+struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
+							int possible_crtcs);
+void pl111_encoder_destroy(struct drm_encoder *encoder);
+
+/* Frame Buffer Functions */
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd);
+
+/* VMA Functions */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
+struct page **get_pages(struct drm_gem_object *obj);
+void put_pages(struct drm_gem_object *obj, struct page **pages);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma);
+void pl111_gem_vm_open(struct vm_area_struct *vma);
+void pl111_gem_vm_close(struct vm_area_struct *vma);
+#endif
+
+/* Suspend Functions */
+int pl111_drm_resume(struct drm_device *dev);
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
+
+/* GEM Functions */
+int pl111_dumb_create(struct drm_file *file_priv,
+			struct drm_device *dev,
+			struct drm_mode_create_dumb *args);
+int pl111_dumb_destroy(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle);
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset);
+void pl111_gem_free_object(struct drm_gem_object *obj);
+
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+			struct vm_area_struct *vma, size_t size);
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff);
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo);
+
+/* DMA BUF Functions */
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf);
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+			uint32_t handle, uint32_t flags, int *prime_fd);
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				struct drm_gem_object *obj, int flags);
+
+/* Pl111 Functions */
+void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
+					*flip_res, struct drm_framebuffer *fb);
+int clcd_disable(struct drm_crtc *crtc);
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
+int pl111_amba_remove(struct amba_device *dev);
+
+int pl111_device_init(struct drm_device *dev);
+void pl111_device_fini(struct drm_device *dev);
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing);
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode);
+#endif /* PL111_DRM_FUNCS_H_ */
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
new file mode 100755
index 0000000..13fb256
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
@@ -0,0 +1,476 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_gem.c
+ * Implementation of the GEM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0))
+#include <drm/drm_vma_manager.h>
+#endif
+
+void pl111_gem_free_object(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	struct drm_device *dev = obj->dev;
+	DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
+
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (obj->import_attach)
+		drm_prime_gem_destroy(obj, bo->sgt);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map != NULL)
+		drm_gem_free_mmap_offset(obj);
+#else
+	drm_gem_free_mmap_offset(obj);
+#endif
+	/*
+	 * Only free the backing memory if the object has not been imported.
+	 * If it has been imported, the exporter is in charge to free that
+	 * once dmabuf's refcount becomes 0.
+	 */
+	if (obj->import_attach)
+		goto imported_out;
+
+	if (bo->type & PL111_BOT_DMA) {
+		dma_free_writecombine(dev->dev, obj->size,
+				bo->backing_data.dma.fb_cpu_addr,
+				bo->backing_data.dma.fb_dev_addr);
+	} else if (bo->backing_data.shm.pages != NULL) {
+		put_pages(obj, bo->backing_data.shm.pages);
+	}
+
+imported_out:
+	drm_gem_object_release(obj);
+
+	kfree(bo);
+
+	DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
+}
+
+static int pl111_gem_object_create(struct drm_device *dev, u64 size,
+				   u32 flags, struct drm_file *file_priv,
+				   u32 *handle)
+{
+	int ret = 0;
+	struct pl111_gem_bo *bo = NULL;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (bo == NULL) {
+		ret = -ENOMEM;
+		goto finish;
+	}
+
+	bo->type = flags;
+
+#ifndef ARCH_HAS_SG_CHAIN
+	/*
+	 * If the ARCH can't chain we can't have non-contiguous allocs larger
+	 * than a single sg can hold.
+	 * In this case we fall back to using contiguous memory
+	 */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		long unsigned int n_pages =
+				PAGE_ALIGN(size) >> PAGE_SHIFT;
+		if (n_pages > SG_MAX_SINGLE_ALLOC) {
+			bo->type |= PL111_BOT_DMA;
+			/*
+			 * Non-contiguous allocation request changed to
+			 * contigous
+			 */
+			DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
+					n_pages, SG_MAX_SINGLE_ALLOC);
+		}
+	}
+#endif
+	if (bo->type & PL111_BOT_DMA) {
+		/* scanout compatible - use physically contiguous buffer */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_attrs(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL,
+					&attrs);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_attrs failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#else
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_writecombine(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_writecombine failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+		ret = drm_gem_private_object_init(dev, &bo->gem_object,
+							size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not initialise GEM object\n");
+			goto free_dma;
+		}
+#else
+		drm_gem_private_object_init(dev, &bo->gem_object, size);
+#endif
+
+	} else { /* PL111_BOT_SHM */
+		/* not scanout compatible - use SHM backed object */
+		ret = drm_gem_object_init(dev, &bo->gem_object, size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not init SHM backed GEM obj\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+		DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
+	}
+
+	DRM_DEBUG("s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
+		size, flags,
+		(bo->type & PL111_BOT_DMA) ? "physaddr" : "shared page array",
+		(bo->type & PL111_BOT_DMA) ?
+			(unsigned long)bo->backing_data.dma.fb_dev_addr:
+			(unsigned long)bo->backing_data.shm.pages,
+			bo->type);
+
+	ret = drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+	if (ret != 0) {
+		DRM_ERROR("DRM failed to create GEM handle\n");
+		goto obj_release;
+	}
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&bo->gem_object);
+	
+	return 0;
+
+obj_release:
+	drm_gem_object_release(&bo->gem_object);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+free_dma:
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	if (bo->type & PL111_BOT_DMA) {
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr,
+			&attrs);
+		}
+#else
+	if (bo->type & PL111_BOT_DMA)
+		dma_free_writecombine(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr);
+#endif
+free_bo:
+	kfree(bo);
+finish:
+	return ret;
+}
+
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct drm_pl111_gem_create *args = data;
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags & ~PL111_BOT_MASK) {
+		DRM_ERROR("wrong flags: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("gem_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size, args->flags, file_priv,
+					&args->handle);
+}
+
+int pl111_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags) {
+		DRM_ERROR("flags must be zero: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size,
+				       PL111_BOT_DMA | PL111_BOT_UNCACHED,
+				       file_priv, &args->handle);
+}
+
+int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
+		uint32_t handle)
+{
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset)
+{
+	struct drm_gem_object *obj;
+	int ret = 0;
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	/* GEM does all our handle to object mapping */
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (obj == NULL) {
+		ret = -ENOENT;
+		goto fail;
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map == NULL) {
+		ret = drm_gem_create_mmap_offset(obj);
+		if (ret != 0) {
+			drm_gem_object_unreference_unlocked(obj);
+			goto fail;
+		}
+	}
+#else
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret != 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		goto fail;
+	}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	*offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
+#else
+	*offset = drm_vma_node_offset_addr(&obj->vma_node);
+	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+#endif
+
+	drm_gem_object_unreference_unlocked(obj);
+fail:
+	return ret;
+}
+
+/* sync the buffer for DMA access */
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED)) {
+		int i, npages = bo->gem_object.size >> PAGE_SHIFT;
+		struct page **pages = bo->backing_data.shm.pages;
+		bool dirty = false;
+
+		for (i = 0; i < npages; i++) {
+			if (!bo->backing_data.shm.dma_addrs[i]) {
+				DRM_DEBUG("%s: dma map page=%d bo=%p\n", __func__, i, bo);
+				 bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(dev->dev, pages[i], 0,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+				dirty = true;
+			}
+		}
+
+		if (dirty) {
+			DRM_DEBUG("%s: zap ptes (and flush cache) bo=%p\n", __func__, bo);
+			/*
+			 * TODO MIDEGL-1813
+			 * 
+			 * Use flush_cache_page() and outer_flush_range() to
+			 * flush only the user space mappings of the dirty pages
+			 */
+			flush_cache_all();
+			outer_flush_all();
+			unmap_mapping_range(bo->gem_object.filp->f_mapping, 0,
+					bo->gem_object.size, 1);
+		}
+	}
+}
+
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	/*
+	 * TODO MIDEGL-1808
+	 * 
+	 * The following check was meant to detect if the CPU is trying to access
+	 * a buffer that is currently mapped for DMA accesses, which is illegal
+	 * as described by the DMA-API.
+	 * 
+	 * However, some of our tests are trying to do that, which triggers the message
+	 * below and avoids dma-unmapping the pages not to annoy the DMA device but that
+	 * leads to the test failing because of caches not being properly flushed.
+	 */
+
+	/*
+	if (bo->sgt) {
+		DRM_ERROR("%s: the CPU is trying to access a dma-mapped buffer\n",  __func__);
+		return;
+	}
+	*/
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED) &&
+	     bo->backing_data.shm.dma_addrs[pgoff]) {
+		DRM_DEBUG("%s: unmap bo=%p (s=%d), paddr=%08x\n",
+			  __func__, bo,  bo->gem_object.size,
+			  bo->backing_data.shm.dma_addrs[pgoff]);
+		dma_unmap_page(dev->dev, bo->backing_data.shm.dma_addrs[pgoff],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		bo->backing_data.shm.dma_addrs[pgoff] = 0;
+	}
+}
+
+/* Based on omapdrm driver */
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+		 struct vm_area_struct *vma, size_t size)
+{
+	DRM_DEBUG("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p type=%08x\n",
+			__func__, obj, bo, bo->type);
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	if (bo->type & PL111_BOT_WC) {
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	} else if (bo->type & PL111_BOT_CACHED) {
+		/*
+		 * Objects that do not have a filp (DMA backed) can't be
+		 * mapped as cached now. Write-combine should be enough.
+		 */
+		if (WARN_ON(!obj->filp))
+			return -EINVAL;
+
+		/*
+		 * As explained in Documentation/dma-buf-sharing.txt
+		 * we need this trick so that we can manually zap ptes
+		 * in order to fake coherency.
+		 */
+		fput(vma->vm_file);
+		get_file(obj->filp);
+		vma->vm_file = obj->filp;
+
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	} else { /* PL111_BOT_UNCACHED */
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+	}
+	return 0;
+}
+
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
+{
+	int ret;
+	struct drm_file *priv = file_priv->private_data;
+	struct drm_device *dev = priv->minor->dev;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_hash_item *hash;
+	struct drm_local_map *map = NULL;
+#else
+	struct drm_vma_offset_node *node;
+#endif
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	obj = map->handle;
+#else
+	node = drm_vma_offset_exact_lookup(dev->vma_offset_manager,
+			vma->vm_pgoff,
+			vma_pages(vma));
+	obj = container_of(node, struct drm_gem_object, vma_node);
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p bo->type 0x%08x\n", __func__, bo, bo->type);
+
+	/* for an imported buffer we let the exporter handle the mmap */
+	if (obj->import_attach)
+		return dma_buf_mmap(obj->import_attach->dmabuf, vma, 0);
+
+	ret = drm_gem_mmap(file_priv, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap\n");
+		return ret;
+	}
+
+	/* Our page fault handler uses the page offset calculated from the vma,
+	 * so we need to remove the gem cookie offset specified in the call.
+	 */
+	vma->vm_pgoff = 0;
+
+	return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
new file mode 100755
index 0000000..1d613d0
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
@@ -0,0 +1,417 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_pl111.c
+ * PL111 specific functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+/* This can't be called from IRQ context, due to clk_get() and board->enable */
+static int clcd_enable(struct drm_framebuffer *fb)
+{
+	__u32 cntl;
+	struct clcd_board *board;
+
+	pr_info("DRM %s\n", __func__);
+
+	clk_prepare_enable(priv.clk);
+
+	/* Enable and Power Up */
+	cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+	DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
+	if (fb->bits_per_pixel == 16)
+		cntl |= CNTL_LCDBPP16_565;
+	else if (fb->bits_per_pixel == 32 && fb->depth == 24)
+		cntl |= CNTL_LCDBPP24;
+	else
+		BUG_ON(1);
+
+	cntl |= CNTL_BGR;
+
+	writel(cntl, priv.regs + CLCD_PL111_CNTL);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->enable)
+			board->enable(NULL);
+	}
+
+	/* Enable Interrupts */
+	writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
+
+	return 0;
+}
+
+int clcd_disable(struct drm_crtc *crtc)
+{
+	struct clcd_board *board;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	pr_info("DRM %s\n", __func__);
+
+	/* Disable Interrupts */
+	writel(0x00000000, priv.regs + CLCD_PL111_IENB);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->disable)
+			board->disable(NULL);
+	}
+
+	/* Disable and Power Down */
+	writel(0, priv.regs + CLCD_PL111_CNTL);
+
+	/* Disable clock */
+	clk_disable_unprepare(priv.clk);
+
+	pl111_crtc->last_bpp = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	/* Release the previous buffers */
+	if (pl111_crtc->old_kds_res_set != NULL)
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	return 0;
+}
+
+/*
+ * To avoid a possible race where "pl111_crtc->current_update_res" has
+ * been updated (non NULL) but the corresponding scanout buffer has not been
+ * written to the base registers we must always call this function holding
+ * the "base_update_lock" spinlock with IRQs disabled (spin_lock_irqsave()).
+ */
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	struct drm_framebuffer *fb;
+	struct pl111_gem_bo *bo;
+	size_t min_size;
+	fb = flip_res->fb;
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+
+
+	min_size = (fb->height - 1) * fb->pitches[0]
+	         + fb->width * (fb->bits_per_pixel >> 3);
+
+	BUG_ON(bo->gem_object.size < min_size);
+
+	/* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+	BUG_ON(bo->type != PL111_BOT_DMA);
+
+	/*
+	 * Note the buffer for releasing after IRQ, and don't allow any more
+	 * updates until then.
+	 *
+	 * This clcd controller latches the new address on next vsync. Address
+	 * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
+	 * wait for that before releasing the previous buffer's kds
+	 * resources. Otherwise, we'll allow writers to write to the old buffer
+	 * whilst it is still being displayed
+	 */
+	pl111_crtc->current_update_res = flip_res;
+
+	DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
+			fb, bo, bo->backing_data.dma.fb_dev_addr);
+	
+	if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
+		DRM_ERROR("Could not get vblank reference for crtc %d\n",
+				pl111_crtc->crtc_index);
+
+	/* Set the scanout buffer */
+	writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
+	writel(bo->backing_data.dma.fb_dev_addr +
+		((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
+}
+
+void
+show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
+					struct drm_framebuffer *fb)
+{
+	unsigned long irq_flags;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+	if (list_empty(&pl111_crtc->update_queue) &&
+			!pl111_crtc->current_update_res) {
+		do_flip_to_res(flip_res);
+	} else {
+		/*
+		 * Enqueue the update to occur on a future IRQ
+		 * This only happens on triple-or-greater buffering
+		 */
+		DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
+				fb);
+		list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
+			pl111_crtc->last_bpp != fb->bits_per_pixel ||
+			!drm_mode_equal(pl111_crtc->new_mode,
+					pl111_crtc->current_mode))) {
+		struct clcd_regs timing;
+
+		pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
+
+		DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
+				timing.tim0, timing.tim1, timing.tim2,
+				timing.tim3, timing.pixclock);
+
+		/* This is the actual mode setting part */
+		clk_set_rate(priv.clk, timing.pixclock);
+
+		writel(timing.tim0, priv.regs + CLCD_TIM0);
+		writel(timing.tim1, priv.regs + CLCD_TIM1);
+		writel(timing.tim2, priv.regs + CLCD_TIM2);
+		writel(timing.tim3, priv.regs + CLCD_TIM3);
+
+		clcd_enable(fb);
+		pl111_crtc->last_bpp = fb->bits_per_pixel;
+	}
+
+	if (!flip_res->page_flip) {
+		drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
+		pl111_crtc->current_mode = pl111_crtc->new_mode;
+		pl111_crtc->new_mode = NULL;
+	}
+
+	BUG_ON(!pl111_crtc->current_mode);
+
+	/*
+	 * If IRQs weren't enabled before, they are now. This will eventually
+	 * cause flip_res to be released via pl111_common_irq, which updates
+	 * every time the Base Address is latched (i.e. every frame, regardless
+	 * of whether we update the base address or not)
+	 */
+}
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+	u32 irq_stat;
+	struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
+
+	irq_stat = readl(priv.regs + CLCD_PL111_MIS);
+
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE)
+		pl111_common_irq(pl111_crtc);
+
+	/* Clear the interrupt once done */
+	writel(irq_stat, priv.regs + CLCD_PL111_ICR);
+
+	return IRQ_HANDLED;
+}
+
+int pl111_device_init(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int ret;
+
+	if (priv == NULL) {
+		pr_err("%s no private data\n", __func__);
+		return -EINVAL;
+	}
+
+	if (priv->amba_dev == NULL) {
+		pr_err("%s no amba device found\n", __func__);
+		return -EINVAL;
+	}
+
+	/* set up MMIO for register access */
+	priv->mmio_start = priv->amba_dev->res.start;
+	priv->mmio_len = resource_size(&priv->amba_dev->res);
+
+	DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
+			priv->mmio_len);
+
+	priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
+	if (priv->regs == NULL) {
+		pr_err("%s failed mmio\n", __func__);
+		return -EINVAL;
+	}
+
+	/* turn off interrupts */
+	writel(0, priv->regs + CLCD_PL111_IENB);
+
+	ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
+				"pl111_irq_handler", NULL);
+	if (ret != 0) {
+		pr_err("%s failed %d\n", __func__, ret);
+		goto out_mmio;
+	}
+
+	goto finish;
+
+out_mmio:
+	iounmap(priv->regs);
+finish:
+	DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
+	return ret;
+}
+
+void pl111_device_fini(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	u32 cntl;
+
+	if (priv == NULL || priv->regs == NULL)
+		return;
+
+	free_irq(priv->amba_dev->irq[0], NULL);
+
+	cntl = readl(priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDEN;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDPWR;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	iounmap(priv->regs);
+}
+
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
+{
+	struct clcd_board *board = dev->dev.platform_data;
+	int ret;
+	pr_info("DRM %s\n", __func__);
+
+	if (!board)
+		dev_warn(&dev->dev, "board data not available\n");
+
+	ret = amba_request_regions(dev, NULL);
+	if (ret != 0) {
+		DRM_ERROR("CLCD: unable to reserve regs region\n");
+		goto out;
+	}
+
+	priv.amba_dev = dev;
+
+	priv.clk = clk_get(&priv.amba_dev->dev, NULL);
+	if (IS_ERR(priv.clk)) {
+		DRM_ERROR("CLCD: unable to get clk.\n");
+		ret = PTR_ERR(priv.clk);
+		goto clk_err;
+	}
+
+	return 0;
+
+clk_err:
+	amba_release_regions(dev);
+out:
+	return ret;
+}
+
+int pl111_amba_remove(struct amba_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	clk_put(priv.clk);
+
+	amba_release_regions(dev);
+
+	priv.amba_dev = NULL;
+
+	return 0;
+}
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+	unsigned int cpl;
+
+	memset(timing, 0, sizeof(struct clcd_regs));
+
+	ppl = (mode->hdisplay / 16) - 1;
+	hsw = mode->hsync_end - mode->hsync_start - 1;
+	hfp = mode->hsync_start - mode->hdisplay - 1;
+	hbp = mode->htotal - mode->hsync_end - 1;
+
+	lpp = mode->vdisplay - 1;
+	vsw = mode->vsync_end - mode->vsync_start - 1;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	cpl = mode->hdisplay - 1;
+
+	timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
+	timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
+	timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
+	timing->tim3 = 0;
+
+	timing->pixclock = mode->clock * 1000;
+}
+
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+
+	ppl = (timing->tim0 >> 2) & 0x3f;
+	hsw = (timing->tim0 >> 8) & 0xff;
+	hfp = (timing->tim0 >> 16) & 0xff;
+	hbp = (timing->tim0 >> 24) & 0xff;
+
+	lpp = timing->tim1 & 0x3ff;
+	vsw = (timing->tim1 >> 10) & 0x3f;
+	vfp = (timing->tim1 >> 16) & 0xff;
+	vbp = (timing->tim1 >> 24) & 0xff;
+
+	mode->hdisplay    = (ppl + 1) * 16;
+	mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
+	mode->hsync_end   = ((ppl + 1) * 16) + hfp + hsw + 2;
+	mode->htotal      = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
+	mode->hskew       = 0;
+
+	mode->vdisplay    = lpp + 1;
+	mode->vsync_start = lpp + vfp + 1;
+	mode->vsync_end   = lpp + vfp + vsw + 2;
+	mode->vtotal      = lpp + vfp + vsw + vbp + 2;
+
+	mode->flags = 0;
+
+	mode->width_mm = 0;
+	mode->height_mm = 0;
+
+	mode->clock = timing->pixclock / 1000;
+	mode->hsync = timing->pixclock / mode->htotal;
+	mode->vrefresh = mode->hsync / mode->vtotal;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
new file mode 100755
index 0000000..9d5ec0c
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
@@ -0,0 +1,151 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_platform.c
+ * Implementation of the Linux platform device entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+static int pl111_platform_drm_suspend(struct platform_device *dev,
+					pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+static int pl111_platform_drm_resume(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_platform_drm_probe(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return pl111_drm_init(dev);
+}
+
+static int pl111_platform_drm_remove(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	pl111_drm_exit(dev);
+
+	return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+	{
+	.id = 0x00041110,
+	.mask = 0x000ffffe,
+	},
+	{0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+	.drv = {
+		.name = "clcd-pl11x",
+		},
+	.probe = pl111_amba_probe,
+	.remove = pl111_amba_remove,
+	.id_table = pl111_id_table,
+};
+
+static struct platform_driver platform_drm_driver = {
+	.probe = pl111_platform_drm_probe,
+	.remove = pl111_platform_drm_remove,
+	.suspend = pl111_platform_drm_suspend,
+	.resume = pl111_platform_drm_resume,
+	.driver = {
+			.owner = THIS_MODULE,
+			.name = DRIVER_NAME,
+		},
+};
+
+static const struct platform_device_info pl111_drm_pdevinfo = {
+	.name = DRIVER_NAME,
+	.id = -1,
+	.dma_mask = ~0UL
+};
+
+static struct platform_device *pl111_drm_device;
+
+static int __init pl111_platform_drm_init(void)
+{
+	int ret;
+
+	pr_info("DRM %s\n", __func__);
+
+	pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
+	if (pl111_drm_device == NULL) {
+		pr_err("DRM platform_device_register_full() failed\n");
+		return -ENOMEM;
+	}
+
+	ret = amba_driver_register(&pl111_amba_driver);
+	if (ret != 0) {
+		pr_err("DRM amba_driver_register() failed %d\n", ret);
+		goto err_amba_reg;
+	}
+
+	ret = platform_driver_register(&platform_drm_driver);
+	if (ret != 0) {
+		pr_err("DRM platform_driver_register() failed %d\n", ret);
+		goto err_pdrv_reg;
+	}
+
+	return 0;
+
+err_pdrv_reg:
+	amba_driver_unregister(&pl111_amba_driver);
+err_amba_reg:
+	platform_device_unregister(pl111_drm_device);
+
+	return ret;
+}
+
+static void __exit pl111_platform_drm_exit(void)
+{
+	pr_info("DRM %s\n", __func__);
+
+	platform_device_unregister(pl111_drm_device);
+	amba_driver_unregister(&pl111_amba_driver);
+	platform_driver_unregister(&platform_drm_driver);
+}
+
+#ifdef MODULE
+module_init(pl111_platform_drm_init);
+#else
+late_initcall(pl111_platform_drm_init);
+#endif
+module_exit(pl111_platform_drm_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE(DRIVER_LICENCE);
+MODULE_ALIAS(DRIVER_ALIAS);
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
new file mode 100755
index 0000000..d033566
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_suspend.c
+ * Implementation of the suspend/resume functions for PL111 DRM
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_drm_resume(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
new file mode 100755
index 0000000..ff602ef
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_vma.c
+ * Implementation of the VM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ */
+static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+				bool dirty, bool accessed)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	for (i = 0; i < npages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+		if (accessed)
+			mark_page_accessed(pages[i]);
+		/* Undo the reference we took when populating the table */
+		page_cache_release(pages[i]);
+	}
+	drm_free_large(pages);
+}
+
+void put_pages(struct drm_gem_object *obj, struct page **pages)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	_drm_gem_put_pages(obj, pages, true, true);
+	if (bo->backing_data.shm.dma_addrs) {
+		for (i = 0; i < npages; i++) {
+			/* Filter pages unmapped because of CPU accesses */
+			if (!bo->backing_data.shm.dma_addrs[i])
+				continue;
+			if (!dma_mapping_error(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i])) {
+				dma_unmap_page(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i],
+					PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+			}
+		}
+		kfree(bo->backing_data.shm.dma_addrs);
+		bo->backing_data.shm.dma_addrs = NULL;
+	}
+}
+
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
+					gfp_t gfpmask)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct page *p, **pages;
+	int i, npages;
+
+	/* This is the shared memory object that backs the GEM resource */
+	inode = obj->filp->f_path.dentry->d_inode;
+	mapping = inode->i_mapping;
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (pages == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gfpmask |= mapping_gfp_mask(mapping);
+
+	for (i = 0; i < npages; i++) {
+		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		if (IS_ERR(p))
+			goto fail;
+		pages[i] = p;
+
+		/*
+		 * There is a hypothetical issue w/ drivers that require
+		 * buffer memory in the low 4GB.. if the pages are un-
+		 * pinned, and swapped out, they can end up swapped back
+		 * in above 4GB.  If pages are already in memory, then
+		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
+		 * even if the already in-memory page disobeys the mask.
+		 *
+		 * It is only a theoretical issue today, because none of
+		 * the devices with this limitation can be populated with
+		 * enough memory to trigger the issue.  But this BUG_ON()
+		 * is here as a reminder in case the problem with
+		 * shmem_read_mapping_page_gfp() isn't solved by the time
+		 * it does become a real issue.
+		 *
+		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		 */
+		BUG_ON((gfpmask & __GFP_DMA32) &&
+			(page_to_pfn(p) >= 0x00100000UL));
+	}
+
+	return pages;
+
+fail:
+	while (i--)
+		page_cache_release(pages[i]);
+
+	drm_free_large(pages);
+	return ERR_PTR(PTR_ERR(p));
+}
+
+struct page **get_pages(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (bo->backing_data.shm.pages == NULL) {
+		struct page **p;
+		int npages = obj->size >> PAGE_SHIFT;
+		int i;
+
+		p = _drm_gem_get_pages(obj, GFP_KERNEL);
+		if (IS_ERR(p))
+			return ERR_PTR(-ENOMEM);
+
+		bo->backing_data.shm.pages = p;
+
+		if (bo->backing_data.shm.dma_addrs == NULL) {
+			bo->backing_data.shm.dma_addrs =
+				kzalloc(npages * sizeof(dma_addr_t),
+					GFP_KERNEL);
+			if (bo->backing_data.shm.dma_addrs == NULL)
+				goto error_out;
+		}
+
+		if (!(bo->type & PL111_BOT_CACHED)) {
+			for (i = 0; i < npages; ++i) {
+				bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+				if (dma_mapping_error(obj->dev->dev,
+						bo->backing_data.shm.dma_addrs[i]))
+					goto error_out;
+			}
+		}
+	}
+
+	return bo->backing_data.shm.pages;
+
+error_out:
+	put_pages(obj, bo->backing_data.shm.pages);
+	bo->backing_data.shm.pages = NULL;
+	return ERR_PTR(-ENOMEM);
+}
+
+/* END drivers/staging/omapdrm/omap_gem_helpers.c */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page **pages;
+	unsigned long pfn;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * Our mmap calls setup a valid vma->vm_pgoff
+	 * so we can use vmf->pgoff
+	 */
+
+	if (bo->type & PL111_BOT_DMA) {
+		pfn = (bo->backing_data.dma.fb_dev_addr >> PAGE_SHIFT) +
+				vmf->pgoff;
+	} else { /* PL111_BOT_SHM */
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev,
+				"could not get pages: %ld\n", PTR_ERR(pages));
+			ret = PTR_ERR(pages);
+			goto error;
+		}
+		pfn = page_to_pfn(pages[vmf->pgoff]);
+		pl111_gem_sync_to_cpu(bo, vmf->pgoff);
+	}
+
+	DRM_DEBUG("bo=%p physaddr=0x%.8x for offset 0x%x\n",
+				bo, PFN_PHYS(pfn), PFN_PHYS(vmf->pgoff));
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+error:
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+/*
+ * The core drm_vm_ functions in kernel 3.4 are not ready
+ * to handle dma_buf cases where vma->vm_file->private_data
+ * cannot be accessed to get the device.
+ * 
+ * We use these functions from 3.5 instead where the device
+ * pointer is passed explicitly.
+ *
+ * However they aren't exported from the kernel until 3.10
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *vma_entry;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_inc(&dev->vma_count);
+
+	vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
+	if (vma_entry) {
+		vma_entry->vma = vma;
+		vma_entry->pid = current->pid;
+		list_add(&vma_entry->head, &dev->vmalist);
+	}
+}
+
+void pl111_drm_vm_close_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *pt, *temp;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_dec(&dev->vma_count);
+
+	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
+		if (pt->vma == vma) {
+			list_del(&pt->head);
+			kfree(pt);
+			break;
+		}
+	}
+}
+
+void pl111_gem_vm_open(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+
+	drm_gem_object_reference(obj);
+
+	mutex_lock(&obj->dev->struct_mutex);
+	pl111_drm_vm_open_locked(obj->dev, vma);
+	mutex_unlock(&obj->dev->struct_mutex);
+}
+
+void pl111_gem_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	pl111_drm_vm_close_locked(obj->dev, vma);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+}
+#endif
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/pl111/sconscript b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/sconscript
new file mode 100755
index 0000000..5c47de7
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/pl111/sconscript
@@ -0,0 +1,52 @@
+#
+# (C) COPYRIGHT 2010-2013, 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import shutil
+Import('env')
+
+# Generate a build environment for the integration tests, taking a copy of the top-level build environment 
+# (env) as a start.
+drm_env = env.Clone()
+
+# Xorg uses C++ style comments and 'inline' keyword
+if '-std=c89' in drm_env['CFLAGS']:
+	drm_env['CFLAGS'].remove('-std=c89')
+
+# X11 generates a lot of warnings
+if '-Werror' in drm_env['CCFLAGS']:
+	drm_env['CCFLAGS'].remove('-Werror')
+
+#remove the 'lib'prefix
+drm_env['LIBPREFIX'] = ''
+
+src = Glob('*.c')
+
+if drm_env.GetOption('clean') :
+	drm_env.Execute(Action("make clean", 'clean [pl111]'))
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [])
+else:
+	# The target is mali_drm.ko, built from the source in pl111_drm/pl111, via the action makeAction
+	# mali_drm.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+	# kernel build system, after which it can be installed to the directory specified if
+	# "libs_install" is set; this is done by LibTarget.
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} && cp pl111_drm.ko $STATIC_LIB_PATH/mali_drm.ko", '$MAKECOMSTR')
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [makeAction])
+
+# need Module.symvers from drm.ko
+#drm_env.Depends('$STATIC_LIB_PATH/pl111_drm.ko', '$STATIC_LIB_PATH/drm.ko')
+
+drm_env.KernelObjTarget('x11', cmd)
+
diff --git a/midgard/r13p0/kernel/drivers/gpu/drm/sconscript b/midgard/r13p0/kernel/drivers/gpu/drm/sconscript
new file mode 100755
index 0000000..a90fa89
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/drm/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if 'x11' in env['winsys'] or 'gbm' in env['winsys']:
+	# pl111_drm isn't released so only try to build it if it's there
+	if Glob('pl111/sconscript') and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
+		SConscript('pl111/sconscript')
+
+#SConscript('dummy_drm/sconscript')
diff --git a/midgard/r13p0/kernel/drivers/gpu/sconscript b/midgard/r13p0/kernel/drivers/gpu/sconscript
new file mode 100755
index 0000000..729dbda
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/gpu/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+SConscript('arm/sconscript')
+
+if Glob('drm/sconscript'):
+	SConscript('drm/sconscript')
diff --git a/midgard/r13p0/kernel/drivers/sconscript b/midgard/r13p0/kernel/drivers/sconscript
new file mode 100755
index 0000000..dd6a637
--- /dev/null
+++ b/midgard/r13p0/kernel/drivers/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+if Glob('base/sconscript'):
+	SConscript('base/sconscript')
+
+if Glob('video/sconscript'):
+	SConscript('video/sconscript')
+
+SConscript('gpu/sconscript')
diff --git a/midgard/r13p0/kernel/include/linux/dma-buf-test-exporter.h b/midgard/r13p0/kernel/include/linux/dma-buf-test-exporter.h
new file mode 100755
index 0000000..98984ba
--- /dev/null
+++ b/midgard/r13p0/kernel/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/midgard/r13p0/kernel/include/linux/kds.h b/midgard/r13p0/kernel/include/linux/kds.h
new file mode 100755
index 0000000..1346eda
--- /dev/null
+++ b/midgard/r13p0/kernel/include/linux/kds.h
@@ -0,0 +1,173 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KDS_H_
+#define _KDS_H_
+
+#include <linux/list.h>
+#include <linux/workqueue.h>
+
+#define KDS_WAIT_BLOCKING (ULONG_MAX)
+
+struct kds_resource_set;
+
+typedef void (*kds_callback_fn) (void *callback_parameter, void *callback_extra_parameter);
+
+struct kds_callback
+{
+	kds_callback_fn  user_cb; /* real cb */
+	int direct;               /* do direct or queued call? */
+	struct workqueue_struct *wq;
+};
+
+struct kds_link
+{
+	struct kds_resource_set *parent;
+	struct list_head         link;
+	unsigned long            state;
+};
+
+struct kds_resource
+{
+	struct kds_link waiters;
+};
+
+/* callback API */
+
+/* Initialize a callback object.
+ *
+ * Typically created per context or per hw resource.
+ *
+ * Callbacks can be performed directly if no nested locking can
+ * happen in the client.
+ *
+ * Nested locking can occur when a lock is held during the kds_async_waitall or
+ * kds_resource_set_release call. If the callback needs to take the same lock
+ * nested locking will happen.
+ *
+ * If nested locking could happen non-direct callbacks can be requested.
+ * Callbacks will then be called asynchronous to the triggering call.
+ */
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb);
+
+/* Terminate the use of a callback object.
+ *
+ * If the callback object was set up as non-direct
+ * any pending callbacks will be flushed first.
+ * Note that to avoid a deadlock the lock callbacks needs
+ * can't be held when a callback object is terminated.
+ */
+void kds_callback_term(struct kds_callback *cb);
+
+
+/* resource object API */
+
+/* initialize a resource handle for a shared resource */
+void kds_resource_init(struct kds_resource * const resource);
+
+/*
+ * Will return 0 on success.
+ * If the resource is being used or waited -EBUSY is returned.
+ * The caller should NOT try to terminate a resource that could still have clients.
+ * After the function returns the resource is no longer known by kds.
+ */
+int kds_resource_term(struct kds_resource *resource);
+
+/* Asynchronous wait for a set of resources.
+ * Callback will be called when all resources are available.
+ * If all the resources was available the callback will be called before kds_async_waitall returns.
+ * So one must not hold any locks the callback code-flow can take when calling kds_async_waitall.
+ * Caller considered to own/use the resources until \a kds_rset_release is called.
+ * exclusive_access_bitmap is a bitmap where a high bit means exclusive access while a low bit means shared access.
+ * Use the Linux __set_bit API, where the index of the buffer to control is used as the bit index.
+ *
+ * Standard Linux error return value.
+ */
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list);
+
+/* Synchronous wait for a set of resources.
+ * Function will return when one of these have happened:
+ * - all resources have been obtained
+ * - timeout lapsed while waiting
+ * - a signal was received while waiting
+ *
+ * To wait without a timeout, specify KDS_WAIT_BLOCKING for \a jifies_timeout, otherwise
+ * the timeout in jiffies. A zero timeout attempts to obtain all resources and returns
+ * immediately with a timeout if all resources could not be obtained.
+ *
+ * Caller considered to own/use the resources when the function returns.
+ * Caller must release the resources using \a kds_rset_release.
+ *
+ * Calling this function while holding already locked resources or other locking primitives is dangerous.
+ * One must if this is needed decide on a lock order of the resources and/or the other locking primitives
+ * and always take the resources/locking primitives in the specific order.
+ *
+ * Use the ERR_PTR framework to decode the return value.
+ * NULL = time out
+ * If IS_ERR then PTR_ERR gives:
+ *  ERESTARTSYS = signal received, retry call after signal
+ *  all other values = internal error, lock failed
+ * Other values  = successful wait, now the owner, must call kds_resource_set_release
+ */
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jifies_timeout);
+
+/* Release resources after use.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ */
+void kds_resource_set_release(struct kds_resource_set **pprset);
+
+/* Release resources after use and wait callbacks to complete.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ *
+ * This should be used to cancel waits which are pending on a kds
+ * resource.
+ *
+ * It is a bug to call this from atomic contexts and from within
+ * a kds callback that now owns the kds_rseource.
+ */
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset);
+#endif /* _KDS_H_ */
diff --git a/midgard/r13p0/kernel/include/linux/ump-common.h b/midgard/r13p0/kernel/include/linux/ump-common.h
new file mode 100755
index 0000000..0015bda
--- /dev/null
+++ b/midgard/r13p0/kernel/include/linux/ump-common.h
@@ -0,0 +1,252 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/midgard/r13p0/kernel/include/linux/ump-import.h b/midgard/r13p0/kernel/include/linux/ump-import.h
new file mode 100755
index 0000000..89ce727
--- /dev/null
+++ b/midgard/r13p0/kernel/include/linux/ump-import.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/midgard/r13p0/kernel/include/linux/ump-ioctl.h b/midgard/r13p0/kernel/include/linux/ump-ioctl.h
new file mode 100755
index 0000000..451403e
--- /dev/null
+++ b/midgard/r13p0/kernel/include/linux/ump-ioctl.h
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/midgard/r13p0/kernel/include/linux/ump.h b/midgard/r13p0/kernel/include/linux/ump.h
new file mode 100755
index 0000000..16f9c39
--- /dev/null
+++ b/midgard/r13p0/kernel/include/linux/ump.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a true to indicate that access to the handle is allowed or @n
+ * @a false to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a true to permit access
+ * @li @a false to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+#ifdef CONFIG_KDS
+/**
+ * Retrieve the KDS resource for the specified UMP memory.
+ *
+ * The KDS resource should be used to synchronize access to the UMP allocation.
+ * See the KDS API for how to do that.
+ *
+ * @param mem Handle to the UMP memory to query.
+ * @return Pointer to the KDS resource controlling access to the UMP memory.
+ */
+UMP_KERNEL_API_EXPORT struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem);
+#endif
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
diff --git a/midgard/r13p0/kernel/license.txt b/midgard/r13p0/kernel/license.txt
new file mode 100755
index 0000000..77c14bd
--- /dev/null
+++ b/midgard/r13p0/kernel/license.txt
@@ -0,0 +1,198 @@
+GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE
+
+THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE.
+
+
+
+Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form.
+
+
+
+GPL Licence
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+Version 2, June 1991
+
+
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+
+
+Preamble
+
+
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
+
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program).
+
+Whether that is true depends on what the Program does.
+
+
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty;
+
+and give any other recipients of the Program a copy of this License along with the Program.
+
+
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the
+
+Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein.
+
+You are not responsible for enforcing compliance by third parties to this License.
+
+
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+
+
+NO WARRANTY
+
+
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+/end
+
diff --git a/midgard/r13p0/kernel/patches/integrate_kds_with_dma_buf.patch b/midgard/r13p0/kernel/patches/integrate_kds_with_dma_buf.patch
new file mode 100755
index 0000000..37458cf
--- /dev/null
+++ b/midgard/r13p0/kernel/patches/integrate_kds_with_dma_buf.patch
@@ -0,0 +1,188 @@
+diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
+index 9aa618a..30e07ea 100644
+--- a/drivers/base/Kconfig
++++ b/drivers/base/Kconfig
+@@ -192,4 +192,12 @@ config DMA_SHARED_BUFFER
+ 	  APIs extension; the file's descriptor can then be passed on to other
+ 	  driver.
+ 
++config DMA_SHARED_BUFFER_USES_KDS
++	bool "Synchronize access to dma_buf with KDS"
++	default n
++	select KDS
++	depends on DMA_SHARED_BUFFER
++	help
++	  This option adds a KDS resource within every dma_buf allocation.
++
+ endmenu
+diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
+index 20258e1..e4083fd 100644
+--- a/drivers/base/dma-buf.c
++++ b/drivers/base/dma-buf.c
+@@ -27,6 +27,8 @@
+ #include <linux/dma-buf.h>
+ #include <linux/anon_inodes.h>
+ #include <linux/export.h>
++#include <linux/poll.h>
++#include <linux/sched.h>
+ 
+ static inline int is_dma_buf_file(struct file *);
+ 
+@@ -40,6 +42,8 @@ static int dma_buf_release(struct inode *inode, struct file *file)
+ 	dmabuf = file->private_data;
+ 
+ 	dmabuf->ops->release(dmabuf);
++	kds_callback_term(&dmabuf->kds_cb);
++	kds_resource_term(&dmabuf->kds);
+ 	kfree(dmabuf);
+ 	return 0;
+ }
+@@ -61,9 +65,90 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
+ 	return dmabuf->ops->mmap(dmabuf, vma);
+ }
+ 
++
++static void dma_buf_kds_cb_fn (void *param1, void *param2)
++{
++	struct kds_resource_set **rset_ptr = param1;
++	struct kds_resource_set *rset = *rset_ptr;
++	wait_queue_head_t *wait_queue = param2;
++
++	kfree(rset_ptr);
++	kds_resource_set_release(&rset);
++	wake_up(wait_queue);
++}
++
++static int dma_buf_kds_check(struct kds_resource *kds,
++                             long unsigned int exclusive, int *poll_ret)
++{
++	/* Synchronous wait with 0 timeout - poll availability */
++	struct kds_resource_set *rset = kds_waitall(1,&exclusive,&kds,0);
++
++	if (IS_ERR(rset))
++		return POLLERR;
++
++	if (rset){
++		kds_resource_set_release(&rset);
++		*poll_ret = POLLIN | POLLRDNORM;
++		if (exclusive)
++			*poll_ret |=  POLLOUT | POLLWRNORM;
++		return 1;
++	}else{
++		return 0;
++	}
++}
++
++static unsigned int dma_buf_poll(struct file *file,
++                                 struct poll_table_struct *wait)
++{
++	struct dma_buf *dmabuf;
++	struct kds_resource *kds;
++	unsigned int ret = 0;
++
++	if (!is_dma_buf_file(file))
++		return POLLERR;
++
++	dmabuf = file->private_data;
++	kds    = &dmabuf->kds;
++
++	if (poll_does_not_wait(wait)){
++		/* Check for exclusive access (superset of shared) first */
++		if(!dma_buf_kds_check(kds, 1ul, &ret))
++			dma_buf_kds_check(kds, 0ul, &ret);
++	}else{
++		int events = poll_requested_events(wait);
++		unsigned long exclusive;
++		wait_queue_head_t *wq;
++		struct kds_resource_set **rset_ptr = kmalloc(sizeof(*rset_ptr), GFP_KERNEL);
++
++		if (!rset_ptr)
++			return POLL_ERR;
++
++		if (events & POLLOUT){
++			wq = &dmabuf->wq_exclusive;
++			exclusive = 1;
++		}else{
++			wq = &dmabuf->wq_shared;
++			exclusive = 0;
++		}
++		poll_wait(file, wq, wait);
++		ret = kds_async_waitall(rset_ptr, KDS_FLAG_LOCKED_WAIT, &dmabuf->kds_cb,
++		                        rset_ptr, wq, 1, &exclusive, &kds);
++
++		if (IS_ERR_VALUE(ret)){
++			ret = POLL_ERR;
++			kfree(rset_ptr);
++		}else{
++			/* Can't allow access until callback */
++			ret = 0;
++		}
++	}
++	return ret;
++}
++
+ static const struct file_operations dma_buf_fops = {
+ 	.release	= dma_buf_release,
+ 	.mmap		= dma_buf_mmap_internal,
++	.poll		= dma_buf_poll,
+ };
+ 
+ /*
+@@ -120,6 +205,11 @@ struct dma_buf *dma_buf_export(void *priv, const struct dma_buf_ops *ops,
+ 	mutex_init(&dmabuf->lock);
+ 	INIT_LIST_HEAD(&dmabuf->attachments);
+ 
++	init_waitqueue_head(&dmabuf->wq_exclusive);
++	init_waitqueue_head(&dmabuf->wq_shared);
++	kds_resource_init(&dmabuf->kds);
++	kds_callback_init(&dmabuf->kds_cb, 1, dma_buf_kds_cb_fn);
++
+ 	return dmabuf;
+ }
+ EXPORT_SYMBOL_GPL(dma_buf_export);
+diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
+index eb48f38..6e824d2 100644
+--- a/include/linux/dma-buf.h
++++ b/include/linux/dma-buf.h
+@@ -30,6 +30,8 @@
+ #include <linux/list.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/fs.h>
++#include <linux/kds.h>
++#include <linux/wait.h>
+ 
+ struct device;
+ struct dma_buf;
+@@ -121,6 +123,10 @@ struct dma_buf {
+ 	const struct dma_buf_ops *ops;
+ 	/* mutex to serialize list manipulation and attach/detach */
+ 	struct mutex lock;
++	struct kds_resource kds;
++	wait_queue_head_t wq_exclusive;
++	wait_queue_head_t wq_shared;
++	struct kds_callback kds_cb;
+ 	void *priv;
+ };
+ 
+@@ -156,6 +162,21 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
+ 	get_file(dmabuf->file);
+ }
+ 
++/**
++ * get_dma_buf_kds_resource - get a KDS resource for this dma-buf
++ * @dmabuf:	[in]	pointer to dma_buf
++ *
++ * Returns a KDS resource that represents the dma-buf. This should be used by
++ * drivers to synchronize access to the buffer. Note that the caller should
++ * ensure that a reference to the dma-buf exists from the call to
++ * kds_async_wait until kds_resource_set_release is called.
++ */
++static inline struct kds_resource *
++	get_dma_buf_kds_resource(struct dma_buf *dmabuf)
++{
++	return &dmabuf->kds;
++}
++
+ #ifdef CONFIG_DMA_SHARED_BUFFER
+ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+ 							struct device *dev);
diff --git a/midgard/r13p0/kernel/sconscript b/midgard/r13p0/kernel/sconscript
new file mode 100755
index 0000000..e6be64c
--- /dev/null
+++ b/midgard/r13p0/kernel/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+if env['backend'] == 'kernel' and int(env['kernel_modules']) == 1 and env['gpu'] != 'none':
+	SConscript('drivers/sconscript')
+
+	if Glob('tests/sconscript'):
+		SConscript('tests/sconscript')
diff --git a/midgard/r14p0/android/patches/K/android-k.sh b/midgard/r14p0/android/patches/K/android-k.sh
new file mode 100755
index 0000000..4e44d5e
--- /dev/null
+++ b/midgard/r14p0/android/patches/K/android-k.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+#
+# Copyright (C) 2014-2015 ARM Limited. All rights reserved.
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Android K script for LLVM 3.5 and above support
+#
+# This script applies two minor patches and checks-out libcxx and libcxxabi libraries
+# adding C++11 support in Android K required for LLVM3.5 and above.
+
+
+if [ -z "$ANDROID_BUILD_TOP" ]; then
+    echo "[ANDROID-PATCH] This script must be executed in an Android build environment"
+    echo "                ANDROID_BUILD_TOP is undefined"
+    echo "                please do \"source build/envseup.sh; lunch\"."
+    exit 1
+fi
+
+ANDROID_REPOSITORY_URL="https://android.googlesource.com/"
+
+REFERENCE_ARGUMENT=""
+
+# Proccess arguments. Valid arguments are:
+#        -u ARG      Override the Android repository url
+#        -r ARG      Provide a path to a reference git repository
+while getopts "u:r:" opt; do
+  case $opt in
+    u)
+      ANDROID_REPOSITORY_URL=$OPTARG
+      ;;
+
+    r)
+      REFERENCE_ARGUMENT="--reference "$OPTARG
+      ;;
+
+    \?)
+      echo "Invalid option: -$OPTARG" >&2
+      exit 1
+
+      ;;
+    :)
+      echo "Option -$OPTARG requires an argument." >&2
+      exit 1
+      ;;
+
+  esac
+done
+
+MIDGARD_DDK_WD=$(pwd)
+
+cd $ANDROID_BUILD_TOP/external
+
+#Checkout libcxx from Android Master required by LLVM due to c++11
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxx $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxx failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxx
+git checkout 27ae7cb782821a4f2d3813522ee411cd978bcd85 -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxx@27ae7cb782821a4f2d3813522ee411cd978bcd85 revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/libcxx.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching libcxx failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Checkout libcxxabi from Android Master required by LLVM due to c++11
+cd $ANDROID_BUILD_TOP/external
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxxabi $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxxabi failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxxabi
+git checkout 285d67f35f6044cf733091e36248405ca967c62c -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxxabi@285d67f35f6044cf733091e36248405ca967c62c revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+
+#Apply patch to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/bionics.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching bionics failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add locale support to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/locale.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Adding locale support failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add LLVM backend ARM fix to support -O0 compilation
+cd $ANDROID_BUILD_TOP/external/llvm/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/llvm.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Fixing ARM LLVM backend failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+echo "[ANDROID-PATCH] Patching completed successfully"
+cd $MIDGARD_DDK_WD
diff --git a/midgard/r14p0/android/patches/K/bionics.diff b/midgard/r14p0/android/patches/K/bionics.diff
new file mode 100755
index 0000000..b86cedf
--- /dev/null
+++ b/midgard/r14p0/android/patches/K/bionics.diff
@@ -0,0 +1,202 @@
+diff --git a/libc/Android.mk b/libc/Android.mk
+index 9610c14..d3dbe81 100644
+--- a/libc/Android.mk
++++ b/libc/Android.mk
+@@ -214,6 +215,7 @@ libc_bionic_src_:= \
+     bionic/libc_init_common.cpp \
+     bionic/libc_logging.cpp \
+     bionic/libgen.cpp \
++    bionic/locale.cpp \
+     bionic/mmap.cpp \
+     bionic/pthread_attr.cpp \
+     bionic/pthread_detach.cpp \
+@@ -232,7 +232,6 @@ libc_bionic_src_files := \
+     bionic/scandir.cpp \
+     bionic/sched_getaffinity.cpp \
+     bionic/__set_errno.cpp \
+-    bionic/setlocale.cpp \
+     bionic/signalfd.cpp \
+     bionic/sigwait.cpp \
+     bionic/statvfs.cpp \
+diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp
+index 50a3875..cc830b7 100644
+--- a/libc/bionic/wchar.cpp
++++ b/libc/bionic/wchar.cpp
+@@ -96,6 +96,7 @@ int swscanf(const wchar_t* /*s*/, const wchar_t* /*format*/, ... ) {
+
+ int iswalnum(wint_t wc) { return isalnum(wc); }
+ int iswalpha(wint_t wc) { return isalpha(wc); }
++int iswblank(wint_t wc) { return isblank(wc); }
+ int iswcntrl(wint_t wc) { return iscntrl(wc); }
+ int iswdigit(wint_t wc) { return isdigit(wc); }
+ int iswgraph(wint_t wc) { return isgraph(wc); }
+diff --git a/libc/include/assert.h b/libc/include/assert.h
+index 62470f5..361a5ff 100644
+--- a/libc/include/assert.h
++++ b/libc/include/assert.h
+@@ -60,6 +60,6 @@
+ #endif
+
+ __BEGIN_DECLS
+-__dead void __assert(const char *, int, const char *);
+-__dead void __assert2(const char *, int, const char *, const char *);
++__dead void __assert(const char *, int, const char *) __noreturn;
++__dead void __assert2(const char *, int, const char *, const char *) __noreturn;
+ __END_DECLS
+diff --git a/libc/include/ctype.h b/libc/include/ctype.h
+index 5557e31..a66df12 100644
+--- a/libc/include/ctype.h
++++ b/libc/include/ctype.h
+@@ -45,11 +45,14 @@
+ #define	_CTYPE_U	0x01
+ #define	_CTYPE_L	0x02
+ #define	_CTYPE_N	0x04
++#define	_CTYPE_D	0x04
+ #define	_CTYPE_S	0x08
+ #define	_CTYPE_P	0x10
+ #define	_CTYPE_C	0x20
+ #define	_CTYPE_X	0x40
+ #define	_CTYPE_B	0x80
++#define	_CTYPE_R	(_CTYPE_P|_CTYPE_U|_CTYPE_L|_CTYPE_D|_CTYPE_B)
++#define	_CTYPE_A	(_CTYPE_L|_CTYPE_U)
+
+ __BEGIN_DECLS
+
+diff --git a/libc/include/locale.h b/libc/include/locale.h
+index 65b5c7d..6989851 100644
+--- a/libc/include/locale.h
++++ b/libc/include/locale.h
+@@ -29,6 +29,7 @@
+ #define _LOCALE_H_
+
+ #include <sys/cdefs.h>
++#include <xlocale.h>
+
+ __BEGIN_DECLS
+
+@@ -43,17 +44,65 @@ enum {
+     LC_PAPER     = 7,
+     LC_NAME      = 8,
+     LC_ADDRESS   = 9,
+-
+     LC_TELEPHONE      = 10,
+     LC_MEASUREMENT    = 11,
+     LC_IDENTIFICATION = 12
+ };
+
+-extern char *setlocale(int category, const char *locale);
++#define LC_CTYPE_MASK          (1 << LC_CTYPE)
++#define LC_NUMERIC_MASK        (1 << LC_NUMERIC)
++#define LC_TIME_MASK           (1 << LC_TIME)
++#define LC_COLLATE_MASK        (1 << LC_COLLATE)
++#define LC_MONETARY_MASK       (1 << LC_MONETARY)
++#define LC_MESSAGES_MASK       (1 << LC_MESSAGES)
++#define LC_PAPER_MASK          (1 << LC_PAPER)
++#define LC_NAME_MASK           (1 << LC_NAME)
++#define LC_ADDRESS_MASK        (1 << LC_ADDRESS)
++#define LC_TELEPHONE_MASK      (1 << LC_TELEPHONE)
++#define LC_MEASUREMENT_MASK    (1 << LC_MEASUREMENT)
++#define LC_IDENTIFICATION_MASK (1 << LC_IDENTIFICATION)
++
++#define LC_ALL_MASK (LC_CTYPE_MASK | LC_NUMERIC_MASK | LC_TIME_MASK | LC_COLLATE_MASK | \
++                     LC_MONETARY_MASK | LC_MESSAGES_MASK | LC_PAPER_MASK | LC_NAME_MASK | \
++                     LC_ADDRESS_MASK | LC_TELEPHONE_MASK | LC_MEASUREMENT_MASK | \
++                     LC_IDENTIFICATION_MASK)
++
++struct lconv {
++    char* decimal_point;
++    char* thousands_sep;
++    char* grouping;
++    char* int_curr_symbol;
++    char* currency_symbol;
++    char* mon_decimal_point;
++    char* mon_thousands_sep;
++    char* mon_grouping;
++    char* positive_sign;
++    char* negative_sign;
++    char  int_frac_digits;
++    char  frac_digits;
++    char  p_cs_precedes;
++    char  p_sep_by_space;
++    char  n_cs_precedes;
++    char  n_sep_by_space;
++    char  p_sign_posn;
++    char  n_sign_posn;
++    char  int_p_cs_precedes;
++    char  int_p_sep_by_space;
++    char  int_n_cs_precedes;
++    char  int_n_sep_by_space;
++    char  int_p_sign_posn;
++    char  int_n_sign_posn;
++};
++
++struct lconv* localeconv(void);
++
++locale_t duplocale(locale_t);
++void freelocale(locale_t);
++locale_t newlocale(int, const char*, locale_t);
++char* setlocale(int, const char*);
++locale_t uselocale(locale_t);
+
+-/* Make libstdc++-v3 happy.  */
+-struct lconv { };
+-struct lconv *localeconv(void);
++#define LC_GLOBAL_LOCALE ((locale_t) -1L)
+
+ __END_DECLS
+
+diff --git a/libc/include/stdio.h b/libc/include/stdio.h
+index 23fc944..28685c7 100644
+--- a/libc/include/stdio.h
++++ b/libc/include/stdio.h
+@@ -469,7 +469,10 @@ int vsprintf(char *dest, const char *format, __va_list ap)
+ }
+
+ #if defined(__clang__)
+-#define snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(snprintf)
++    #define __wrap_snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++    #define snprintf(...) __wrap_snprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(3, 4)
+@@ -481,7 +484,10 @@ int snprintf(char *dest, size_t size, const char *format, ...)
+ #endif
+
+ #if defined(__clang__)
+-#define sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(sprintf)
++    #define __wrap_sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++    #define sprintf(...) __wrap_sprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(2, 3)
+diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h
+index 9fa84c1..188da69 100644
+--- a/libc/include/stdlib.h
++++ b/libc/include/stdlib.h
+@@ -68,6 +68,10 @@ static __inline__ float strtof(const char *nptr, char **endptr)
+     return (float)strtod(nptr, endptr);
+ }
+
++static __inline__ long double strtold(const char* s, char** end_ptr) {
++  return (long double)strtod(s, end_ptr);
++}
++
+ extern int atoi(const char *) __purefunc;
+ extern long atol(const char *) __purefunc;
+ extern long long atoll(const char *) __purefunc;
+diff --git a/libc/include/wchar.h b/libc/include/wchar.h
+index 76ac02c..c413806 100644
+--- a/libc/include/wchar.h
++++ b/libc/include/wchar.h
+@@ -77,6 +77,7 @@ extern int               fwprintf(FILE *, const wchar_t *, ...);
+ extern int               fwscanf(FILE *, const wchar_t *, ...);
+ extern int               iswalnum(wint_t);
+ extern int               iswalpha(wint_t);
++extern int               iswblank(wint_t);
+ extern int               iswcntrl(wint_t);
+ extern int               iswdigit(wint_t);
+ extern int               iswgraph(wint_t);
diff --git a/midgard/r14p0/android/patches/K/libcxx.diff b/midgard/r14p0/android/patches/K/libcxx.diff
new file mode 100755
index 0000000..a10d75c
--- /dev/null
+++ b/midgard/r14p0/android/patches/K/libcxx.diff
@@ -0,0 +1,13 @@
+diff --git a/include/cstdlib b/include/cstdlib
+index 152b891..e45a8fd 100644
+--- a/include/cstdlib
++++ b/include/cstdlib
+@@ -126,7 +126,7 @@ using ::realloc;
+ using ::abort;
+ using ::atexit;
+ using ::exit;
+-using ::_Exit;
++
+ using ::getenv;
+ using ::system;
+ using ::bsearch;
diff --git a/midgard/r14p0/android/patches/K/license.txt b/midgard/r14p0/android/patches/K/license.txt
new file mode 100755
index 0000000..d645695
--- /dev/null
+++ b/midgard/r14p0/android/patches/K/license.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/midgard/r14p0/android/patches/K/llvm.diff b/midgard/r14p0/android/patches/K/llvm.diff
new file mode 100755
index 0000000..d41d279
--- /dev/null
+++ b/midgard/r14p0/android/patches/K/llvm.diff
@@ -0,0 +1,14 @@
+diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
+index 8cdb853..868dd47 100644
+--- a/lib/Target/ARM/ARMInstrInfo.cpp
++++ b/lib/Target/ARM/ARMInstrInfo.cpp
+@@ -130,6 +130,10 @@ namespace {
+         MIB.addImm(0);
+       AddDefaultPred(MIB);
+
++      // Fix the GOT address by adding pc.
++      BuildMI(FirstMBB, MBBI, DL, TII.get(ARM::tPICADD), GlobalBaseReg)
++          .addReg(GlobalBaseReg).addImm(ARMPCLabelIndex);
++
+       return true;
+     }
diff --git a/midgard/r14p0/android/patches/K/locale.diff b/midgard/r14p0/android/patches/K/locale.diff
new file mode 100755
index 0000000..9c4c140
--- /dev/null
+++ b/midgard/r14p0/android/patches/K/locale.diff
@@ -0,0 +1,204 @@
+diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp
+new file mode 100644
+index 0000000..4fade84
+--- /dev/null
++++ b/libc/bionic/locale.cpp
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (C) 2008 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#include <locale.h>
++#include <pthread.h>
++#include <stdlib.h>
++
++// We currently support a single locale, the "C" locale (also known as "POSIX").
++
++struct __locale_t {
++  // Because we only support one locale, these are just tokens with no data.
++};
++
++static pthread_once_t gLocaleOnce = PTHREAD_ONCE_INIT;
++static lconv gLocale;
++
++static pthread_once_t gUselocaleKeyOnce = PTHREAD_ONCE_INIT;
++static pthread_key_t gUselocaleKey;
++
++static void __locale_init() {
++  gLocale.decimal_point = const_cast<char*>(".");
++
++  char* not_available = const_cast<char*>("");
++  gLocale.thousands_sep = not_available;
++  gLocale.grouping = not_available;
++  gLocale.int_curr_symbol = not_available;
++  gLocale.currency_symbol = not_available;
++  gLocale.mon_decimal_point = not_available;
++  gLocale.mon_thousands_sep = not_available;
++  gLocale.mon_grouping = not_available;
++  gLocale.positive_sign = not_available;
++  gLocale.negative_sign = not_available;
++
++  gLocale.int_frac_digits = CHAR_MAX;
++  gLocale.frac_digits = CHAR_MAX;
++  gLocale.p_cs_precedes = CHAR_MAX;
++  gLocale.p_sep_by_space = CHAR_MAX;
++  gLocale.n_cs_precedes = CHAR_MAX;
++  gLocale.n_sep_by_space = CHAR_MAX;
++  gLocale.p_sign_posn = CHAR_MAX;
++  gLocale.n_sign_posn = CHAR_MAX;
++  gLocale.int_p_cs_precedes = CHAR_MAX;
++  gLocale.int_p_sep_by_space = CHAR_MAX;
++  gLocale.int_n_cs_precedes = CHAR_MAX;
++  gLocale.int_n_sep_by_space = CHAR_MAX;
++  gLocale.int_p_sign_posn = CHAR_MAX;
++  gLocale.int_n_sign_posn = CHAR_MAX;
++}
++
++static void __uselocale_key_init() {
++  pthread_key_create(&gUselocaleKey, NULL);
++}
++
++static bool __is_supported_locale(const char* locale) {
++  return (strcmp(locale, "") == 0 || strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0);
++}
++
++static locale_t __new_locale() {
++  return reinterpret_cast<locale_t>(malloc(sizeof(__locale_t)));
++}
++
++lconv* localeconv() {
++  pthread_once(&gLocaleOnce, __locale_init);
++  return &gLocale;
++}
++
++locale_t duplocale(locale_t l) {
++  locale_t clone = __new_locale();
++  if (clone != NULL && l != LC_GLOBAL_LOCALE) {
++    *clone = *l;
++  }
++  return clone;
++}
++
++void freelocale(locale_t l) {
++  free(l);
++}
++
++locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/) {
++  // Is 'category_mask' valid?
++  if ((category_mask & ~LC_ALL_MASK) != 0) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  if (!__is_supported_locale(locale_name)) {
++    errno = ENOENT;
++    return NULL;
++  }
++
++  return __new_locale();
++}
++
++char* setlocale(int category, char const* locale_name) {
++  // Is 'category' valid?
++  if (category < LC_CTYPE || category > LC_IDENTIFICATION) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  // Caller just wants to query the current locale?
++  if (locale_name == NULL) {
++    return const_cast<char*>("C");
++  }
++
++  // Caller wants one of the mandatory POSIX locales?
++  if (__is_supported_locale(locale_name)) {
++    return const_cast<char*>("C");
++  }
++
++  // We don't support any other locales.
++  errno = ENOENT;
++  return NULL;
++}
++
++locale_t uselocale(locale_t new_locale) {
++  pthread_once(&gUselocaleKeyOnce, __uselocale_key_init);
++
++  locale_t old_locale = static_cast<locale_t>(pthread_getspecific(gUselocaleKey));
++
++  // If this is the first call to uselocale(3) on this thread, we return LC_GLOBAL_LOCALE.
++  if (old_locale == NULL) {
++    old_locale = LC_GLOBAL_LOCALE;
++  }
++
++  if (new_locale != NULL) {
++    pthread_setspecific(gUselocaleKey, new_locale);
++  }
++
++  return old_locale;
++}
+diff --git a/libc/include/xlocale.h b/libc/include/xlocale.h
+new file mode 100644
+index 0000000..f7eb8f4
+--- /dev/null
++++ b/libc/include/xlocale.h
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (C) 2014 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#ifndef _XLOCALE_H_
++#define _XLOCALE_H_
++
++/* If we just use void* here, GCC exposes that in error messages. */
++struct __locale_t;
++typedef struct __locale_t* locale_t;
++
++#endif /* _XLOCALE_H_ */
diff --git a/midgard/r14p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/midgard/r14p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100755
index 0000000..46b704b
--- /dev/null
+++ b/midgard/r14p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,92 @@
+#
+# (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- snoop_enable_smc : SMC function ID to enable CCI snooping on the GPU port(s).
+- snoop_disable_smc : SMC function ID to disable CCI snooping on the GPU port(s).
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets power model parameters. Note that this model was designed for the Juno
+	        platform, and may not be suitable for other platforms. A structure containing :
+	- compatible: Should be arm,mali-simple-power-model
+	- voltage: Voltage at reference point. Specified in mV.
+	- frequency: Frequency at reference point. Specified in MHz.
+	- dynamic-power: Dynamic power at reference frequency and voltage. Specified in mW.
+	- static-power: Static power at reference frequency. Specified in mW.
+	- ts: An array containing coefficients for the temperature scaling factor.
+	  Used as : tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0], where T = temperature
+	- thermal-zone: A string identifying the thermal zone used for the GPU
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+
+Example for a Mali-T602:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points = <
+		/* KHz   uV */
+		533000 1250000,
+		450000 1150000,
+		400000 1125000,
+		350000 1075000,
+		266000 1025000,
+		160000  925000,
+		100000  912500,
+	>;
+	power_model {
+		compatible = "arm,mali-simple-power-model";
+		voltage = <800>;
+		frequency = <500>;
+		static-power = <500>;
+		dynamic-power = <1500>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+};
diff --git a/midgard/r14p0/kernel/Documentation/dma-buf-test-exporter.txt b/midgard/r14p0/kernel/Documentation/dma-buf-test-exporter.txt
new file mode 100755
index 0000000..4208010
--- /dev/null
+++ b/midgard/r14p0/kernel/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/midgard/r14p0/kernel/Documentation/kds.txt b/midgard/r14p0/kernel/Documentation/kds.txt
new file mode 100755
index 0000000..639288c
--- /dev/null
+++ b/midgard/r14p0/kernel/Documentation/kds.txt
@@ -0,0 +1,268 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+==============================
+kds - Kernel Dependency System
+==============================
+
+Introduction
+------------
+kds provides a mechanism for clients to atomically lock down multiple abstract resources.
+This can be done either synchronously or asynchronously.
+Abstract resources is used to allow a set of clients to use kds to control access to any
+resource, an example is structured memory buffers.
+
+kds supports that buffer is locked for exclusive access and sharing of buffers.
+
+kds can be built as either a integrated feature of the kernel or as a module.
+It supports being compiled as a module both in-tree and out-of-tree.
+
+
+Concepts
+--------
+A core concept in kds is abstract resources.
+A kds resource is just an abstraction for some client object, kds doesn't care what it is.
+Typically EGL will consider UMP buffers as being a resource, thus each UMP buffer has
+a kds resource for synchronization to the buffer.
+
+kds allows a client to create and destroy the abstract resource objects.
+A new resource object is made available asap (it is just a simple malloc with some initializations),
+while destroy it requires some external synchronization.
+
+The other core concept in kds is consumer of resources.
+kds is requested to allow a client to consume a set of resources and the client will be notified when it can consume the resources.
+
+Exclusive access allows only one client to consume a resource.
+Shared access permits multiple consumers to acceess a resource concurrently.
+
+
+APIs
+----
+kds provides simple resource allocate and destroy functions.
+Clients use this to instantiate and control the lifetime of the resources kds manages.
+
+kds provides two ways to wait for resources:
+- Asynchronous wait: the client specifies a function pointer to be called when wait is over
+- Synchronous wait: Function blocks until access is gained.
+
+The synchronous API has a timeout for the wait.
+The call can early out if a signal is delivered.
+
+After a client is done consuming the resource kds must be notified to release the resources and let some other client take ownership.
+This is done via resource set release call.
+
+A Windows comparison:
+kds implements WaitForMultipleObjectsEx(..., bWaitAll = TRUE, ...) but also has an asynchronous version in addition.
+kds resources can be seen as being the same as NT object manager resources.
+
+Internals
+---------
+kds guarantees atomicity when a set of resources is operated on.
+This is implemented via a global resource lock which is taken by kds when it updates resource objects.
+
+Internally a resource in kds is a linked list head with some flags.
+
+When a consumer requests access to a set of resources it is queued on each of the resources.
+The link from the consumer to the resources can be triggered. Once all links are triggered
+the registered callback is called or the blocking function returns.
+A link is considered triggered if it is the first on the list of consumers of a resource,
+or if all the links ahead of it is marked as shared and itself is of the type shared.
+
+When the client is done consuming the consumer object is removed from the linked lists of
+the resources and a potential new consumer becomes the head of the resources.
+As we add and remove consumers atomically across all resources we can guarantee that
+we never introduces a A->B + B->A type of loops/deadlocks.
+
+
+kbase/base implementation
+-------------------------
+A HW job needs access to a set of shared resources.
+EGL tracks this and encodes the set along with the atom in the ringbuffer.
+EGL allocates a (k)base dep object to represent the dependency to the set of resources and encodes that along with the list of resources.
+This dep object is use to create a dependency from a job chain(atom) to the resources it needs to run.
+When kbase decodes the atom in the ringbuffer it finds the set of resources and calls kds to request all the needed resources.
+As EGL needs to know when the kds request is delivered a new base event object is needed: atom enqueued. This event is only delivered for atoms which uses kds.
+The callback kbase registers trigger the dependency object described which would trigger the existing JD system to release the job chain.
+When the atom is done kds resource set release is call to release the resources.
+
+EGL will typically use exclusive access to the render target, while all buffers used as input can be marked as shared.
+
+
+Buffer publish/vsync
+--------------------
+EGL will use a separate ioctl or DRM flip to request the flip.
+If the LCD driver is integrated with kds EGL can do these operations early.
+The LCD driver must then implement the ioctl or DRM flip to be asynchronous with kds async call.
+The LCD driver binds a kds resource to each virtual buffer (2 buffers in case of double-buffering).
+EGL will make a dependency to the target kds resource in the kbase atom.
+After EGL receives a atom enqueued event it can ask the LCD driver to pan to the target kds resource.
+When the atom is completed it'll release the resource and the LCD driver will get its callback.
+In the callback it'll load the target buffer into the DMA unit of the LCD hardware.
+The LCD driver will be the consumer of both buffers for a short period.
+The LCD driver will call kds resource set release on the previous on-screen buffer when the next vsync/dma read end is handled.
+
+===============================================
+Kernel driver kds client design considerations
+=============================================== 
+
+Number of resources
+--------------------
+
+The kds api allows a client to wait for ownership of a number of resources, where by the client does not take on ownership of any of the resources in the resource set 
+until all of the resources in the set are released.  Consideration must be made with respect to performance, as waiting on large number of resources will incur
+a greater overhead and may increase system latency.  It may be worth considering how independent each of the resources are, for example if the same set of resources 
+are waited upon by each of the clients, then it may be possible to aggregate these into one resource that each client waits upon.
+
+Clients with shared access
+---------------------------
+
+The kds api allows a number of clients to gain shared access to a resource simultaneously, consideration must be made with respect to performance, large numbers of clients
+wanting shared access can incur a performance penalty and may increase system latency, specifically when the clients are granted access. Having an excessively high 
+number of clients with shared access should be avoided, consideration should be made to the call back configuration being used. See Callbacks and Scenario 1 below.
+
+Callbacks
+----------
+
+Careful consideration must be made as to which callback type is most appropriate for kds clients, direct callbacks are called immediately from the context in which the
+ownership of the resource is passed to the next waiter in the list.  Where as when using deferred callbacks the callback is deferred and called from outside the context
+that is relinquishing ownership, while this reduces the latency in the releasing clients context it does incur a cost as there is more latency between a resource 
+becoming free and the new client owning the resource callback being executed.  
+
+Obviously direct callbacks have a performance advantage, as the call back is immediate and does not have to wait for the kernel to context switch to schedule in the 
+execution of the callback.  
+
+However as the callback is immediate and within the context that is granting ownership it is important that the callback perform the MINIMUM amount of work necessary, 
+long call backs could cause poor system latency.  Special care and attention must be taken if the direct callbacks can be called from IRQ handlers, such as when 
+kds_resource_set_release is called from an IRQ handler, in this case you have to avoid any calls that may sleep.  
+
+Deferred contexts have the advantage that the call backs are deferred until they are scheduled by the kernel, therefore they are allowed to call functions that may sleep
+and if scheduled from IRQ context not incur as much system latency as would be seen with direct callbacks from within the IRQ.
+
+Once the clients callback has been called, the client is considered to be owning the resource.  Within the callback the client may only need to perform a small amount of work
+before the client need to give up owner ship.  The kds_resource_release function may be called from with in the call back, but consideration must be made when using direct 
+callbacks, with both respect to execution time and stack usage. Consider the example in Scenario 2 with direct callbacks:
+
+Scenario 1 - Shared client access - direct callbacks:
+
+Resources: X
+Clients: A(S), B(S), C(S), D(S), E(E)
+where: (S) = shared user, (E) = exclusive
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+}
+
+Where <client> is either A,B,C,D
+                                        Queue   |Owner 
+1.  E Requests X exclusive                      |
+2.  E Owns     X exclusive                      |E
+3.  A Requests X shared                        A|E
+4.  B Requests X shared                       BA|E
+5.  C Requests X shared                      CBA|E
+6.  D Requests X shared                     DCBA|E
+7.  E Releases X                                |DCBA
+8.  A Owns X shared                             |DCBA
+9.  B Owns X shared                             |DCBA
+10. C Owns X shared                             |DCBA
+11. D Owns X shared                             |DCBA
+
+At point 7 it is important to note that when E releases X; A,B,C and D become the new shared owners of X and the call back for each of the client(A,B,C,D) triggered, so consideration
+must be made as to whether a direct or deferred callback is suitable, using direct callbacks would result in the call graph.
+
+Call graph when E releases X:
+    kds_resource_set_release( .. )
+        +->client_A_cb( .. )
+        +->client_B_cb( .. )
+        +->client_C_cb( .. )
+        +->client_D_cb( .. )
+
+
+Scenario 2 - Immediate resource release - direct callbacks:
+
+Resource: X
+Clients: A, B, C, D
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+    kds_resource_set_release( .. );
+}
+
+Where <client> is either A,B,C,D
+
+1. A Owns     X exclusive
+2. B Requests X exclusive	(direct callback)
+3. C Requests X exclusive	(direct callback)
+4. D Requests X exclusive	(direct callback)
+5. A Releases X
+
+Call graph when A releases X:
+    kds_resource_set_release( .. )
+        +->client_B_cb( .. )
+            +->kds_resource_set_release( .. )
+                +->client_C_cb( .. )
+                    +->kds_resource_set_release( .. )
+                        +->client_D_cb( .. )
+
+As can be seen when a client releases the resource, with direct call backs it is possible to create nested calls
+
+IRQ Considerations
+-------------------
+
+Usage of kds_resource_release in IRQ handlers should be carefully considered.
+
+Things to keep in mind:
+
+1.) Are you using direct or deferred callbacks?
+2.) How many resources are you releasing?
+3.) How many shared waiters are pending on the resource?
+
+Releasing ownership and wait cancellation
+------------------------------------------
+
+Client Wait Cancellation
+-------------------------
+
+It may be necessary in certain circumstances for the client to cancel the wait for kds resources for error handling, process termination etc.  Cancellation is 
+performed using kds_resource_set_release or kds_resource_set_release_sync using the rset that was received from kds_async_waitall, kds_resource_set_release_sync 
+being used for waits which are using deferred callbacks.
+
+It is possible that while the request to cancel the wait is being issued by the client, the client is granted access to the resources.  Normally after the client
+has taken ownership and finishes with that resource, it will release ownership to signal other waiters which are pending, this causes a race with the cancellation.
+To prevent KDS trying to remove a wait twice from the internal list and accessing memory that is potentially freed, it is very important that all releasers use the
+same rset pointer.  Here is a simplified example of bad usage that must be avoided in any client implementation:
+
+Senario 3 - Bad release from multiple contexts:
+
+	This scenaro is highlighting bad usage of the kds API
+
+	kds_resource_set * rset;
+	kds_resource_set * rset_copy;
+
+	kds_async_waitall( &rset, ... ... ... );
+
+	/* Don't do this */
+	rset_copy = rset;
+
+Context A:
+	kds_resource_set_release( &rset )
+
+Context B:
+	kds_resource_set_release( &rset_copy )
+
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_lock/sconscript b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/sconscript
new file mode 100755
index 0000000..99293c3
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2012, 2014, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+if env.KernelConfigEnabled("CONFIG_DMA_SHARED_BUFFER_USES_KDS"):
+    SConscript("src/sconscript")
+    if env["tests"] and Glob("tests/sconscript"):
+        SConscript("tests/sconscript")
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/Kbuild b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100755
index 0000000..68dad07
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/Makefile b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/Makefile
new file mode 100755
index 0000000..cf76132
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100755
index 0000000..9613ffc
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/dma-buf.h>
+#include <linux/kds.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	struct kds_resource **kds_resources;    /* List of KDS resources associated with buffers */
+	struct kds_resource_set *resource_set;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	wait_queue_head_t wait;
+	struct kds_callback cb;
+	struct kref refcount;
+	struct list_head link;
+	int count;
+} dma_buf_lock_resource;
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static void dma_buf_lock_kds_callback(void *param1, void *param2)
+{
+	dma_buf_lock_resource *resource = param1;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_kds_callback\n");
+#endif
+	atomic_set(&resource->locked, 1);
+
+	wake_up(&resource->wait);
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related KDS resources, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+	/* Allocate space to store kds_resources associated with dma_buf_fds */
+	size = sizeof(struct kds_resource *) * request->count;
+	resource->kds_resources = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->kds_resources)
+	{
+		kfree(resource->dma_bufs);
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource->kds_resources);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Get kds_resource associated with dma_buf */
+		resource->kds_resources[i] = get_dma_buf_kds_resource(resource->dma_bufs[i]);
+
+		if (NULL == resource->kds_resources[i])
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk("dma_buf_lock_dolock : dma_buf_fd %i dma_buf %X kds_resource %X\n", resource->list_of_dma_buf_fds[i],
+		       (unsigned int)resource->dma_bufs[i], (unsigned int)resource->kds_resources[i]);
+#endif	
+	}
+
+	kds_callback_init(&resource->cb, 1, dma_buf_lock_kds_callback);
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops, 
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = kds_async_waitall(&resource->resource_set,
+	                        &resource->cb, resource, NULL,
+	                        request->count,  &resource->exclusive,
+	                        resource->kds_resources);
+
+	if (IS_ERR_VALUE(ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	kds_callback_term(&resource->cb);
+
+	kds_resource_set_release(&resource->resource_set);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->kds_resources);
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	kfree(resource);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100755
index 0000000..e1b9348
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/sconscript b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/sconscript
new file mode 100755
index 0000000..b8724f1
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_lock/src/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')]
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] dma_buf_lock'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100755
index 0000000..56b9f86
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100755
index 0000000..974f0c2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/Makefile b/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100755
index 0000000..06e3d5c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100755
index 0000000..08ab49a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,703 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+	dmabuf = vma->vm_private_data;
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return NULL;
+
+	return kmap(alloc->pages[page_num]);
+}
+static void dma_buf_te_kunmap(struct dma_buf *buf,
+		unsigned long page_num, void *addr)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return;
+
+	kunmap(alloc->pages[page_num]);
+	return;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+	.kmap = dma_buf_te_kmap,
+	.kunmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (alloc_req.size > SG_MAX_SINGLE_ALLOC) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %lu pages",
+				__func__, alloc_req.size, SG_MAX_SINGLE_ALLOC);
+		goto invalid_size;
+	}
+#endif
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO,
+				DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+	{
+		struct dma_buf_export_info export_info = {
+			.exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+			.owner = THIS_MODULE,
+#endif
+			.ops = &dma_buf_te_ops,
+			.size = alloc->nr_pages << PAGE_SHIFT,
+			.flags = O_CLOEXEC | O_RDWR,
+			.priv = alloc,
+		};
+
+		dma_buf = dma_buf_export(&export_info);
+	}
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+	if (ret)
+		goto no_cpu_access;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				ret = -EPERM;
+				goto no_kmap;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		offset += sg_dma_len(sg);
+	}
+
+no_kmap:
+	dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/sconscript b/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
new file mode 100755
index 0000000..5f42141
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+mod = env.BuildKernelModule('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', Glob('*.c'))
+env.KernelObjTarget('dma-buf-test-exporter', mod)
diff --git a/midgard/r14p0/kernel/drivers/base/kds/Kbuild b/midgard/r14p0/kernel/drivers/base/kds/Kbuild
new file mode 100755
index 0000000..a88acd8
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/kds/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_KDS) += kds.o
+obj-$(CONFIG_KDS_TEST) += kds_test.o
diff --git a/midgard/r14p0/kernel/drivers/base/kds/Kconfig b/midgard/r14p0/kernel/drivers/base/kds/Kconfig
new file mode 100755
index 0000000..5f96165
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/kds/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config KDS
+	tristate "Kernel dependency system"
+	help
+	  This option enables the generic kernel dependency system
diff --git a/midgard/r14p0/kernel/drivers/base/kds/Makefile b/midgard/r14p0/kernel/drivers/base/kds/Makefile
new file mode 100755
index 0000000..364d151
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/kds/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+CONFIG_KDS_TEST ?= n
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: kds
+
+kds:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS=m
+
+kds_test:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS_TEST=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r14p0/kernel/drivers/base/kds/kds.c b/midgard/r14p0/kernel/drivers/base/kds/kds.c
new file mode 100755
index 0000000..62612d4
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/kds/kds.c
@@ -0,0 +1,559 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/kds.h>
+#include <linux/kref.h>
+
+#include <asm/atomic.h>
+
+#define KDS_LINK_TRIGGERED (1u << 0)
+#define KDS_LINK_EXCLUSIVE (1u << 1)
+
+#define KDS_INVALID (void *)-2
+#define KDS_RESOURCE (void *)-1
+
+struct kds_resource_set
+{
+	unsigned long         num_resources;
+	unsigned long         pending;
+	struct kds_callback  *cb;
+	void                 *callback_parameter;
+	void                 *callback_extra_parameter;
+	struct list_head      callback_link;
+	struct work_struct    callback_work;
+	atomic_t              cb_queued;
+	/* This resource set will be freed when there are no pending
+	 * callbacks */
+	struct kref           refcount;
+
+	/* This is only initted when kds_waitall() is called. */
+	wait_queue_head_t     wake;
+
+	struct kds_link       resources[0];
+
+};
+
+static DEFINE_SPINLOCK(kds_lock);
+
+static void __resource_set_release(struct kref *ref)
+{
+	struct kds_resource_set *rset = container_of(ref,
+			struct kds_resource_set, refcount);
+
+	kfree(rset);
+}
+
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb)
+{
+	int ret = 0;
+
+	cb->direct = direct;
+	cb->user_cb = user_cb;
+
+	if (!direct)
+	{
+		cb->wq = alloc_workqueue("kds", WQ_UNBOUND | WQ_HIGHPRI, WQ_UNBOUND_MAX_ACTIVE);
+		if (!cb->wq)
+			ret = -ENOMEM;
+	}
+	else
+	{
+		cb->wq = NULL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kds_callback_init);
+
+void kds_callback_term(struct kds_callback *cb)
+{
+	if (!cb->direct)
+	{
+		BUG_ON(!cb->wq);
+		destroy_workqueue(cb->wq);
+	}
+	else
+	{
+		BUG_ON(cb->wq);
+	}
+}
+
+EXPORT_SYMBOL(kds_callback_term);
+
+static void kds_do_user_callback(struct kds_resource_set *rset)
+{
+	rset->cb->user_cb(rset->callback_parameter, rset->callback_extra_parameter);
+}
+
+static void kds_queued_callback(struct work_struct *work)
+{
+	struct kds_resource_set *rset;
+	rset = container_of(work, struct kds_resource_set, callback_work);
+
+	atomic_dec(&rset->cb_queued);
+
+	kds_do_user_callback(rset);
+}
+
+static void kds_callback_perform(struct kds_resource_set *rset)
+{
+	if (rset->cb->direct)
+		kds_do_user_callback(rset);
+	else
+	{
+		int result;
+
+		atomic_inc(&rset->cb_queued);
+
+		result = queue_work(rset->cb->wq, &rset->callback_work);
+		/* if we got a 0 return it means we've triggered the same rset twice! */
+		WARN_ON(!result);
+	}
+}
+
+void kds_resource_init(struct kds_resource * const res)
+{
+	BUG_ON(!res);
+	INIT_LIST_HEAD(&res->waiters.link);
+	res->waiters.parent = KDS_RESOURCE;
+}
+EXPORT_SYMBOL(kds_resource_init);
+
+int kds_resource_term(struct kds_resource *res)
+{
+	unsigned long lflags;
+	BUG_ON(!res);
+	spin_lock_irqsave(&kds_lock, lflags);
+	if (!list_empty(&res->waiters.link))
+	{
+		spin_unlock_irqrestore(&kds_lock, lflags);
+		printk(KERN_ERR "ERROR: KDS resource is still in use\n");
+		return -EBUSY;
+	}
+	res->waiters.parent = KDS_INVALID;
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	return 0;
+}
+EXPORT_SYMBOL(kds_resource_term);
+
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list)
+{
+	struct kds_resource_set *rset = NULL;
+	unsigned long lflags;
+	int i;
+	int triggered;
+	int err = -EFAULT;
+
+	BUG_ON(!pprset);
+	BUG_ON(!resource_list);
+	BUG_ON(!cb);
+
+	WARN_ONCE(number_resources > 10, "Waiting on a high numbers of resources may increase latency, see documentation.");
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+	{
+		return -ENOMEM;
+	}
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	rset->cb = cb;
+	rset->callback_parameter = callback_parameter;
+	rset->callback_extra_parameter = callback_extra_parameter;
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+	}
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		/* no-one else waiting? */
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	triggered = (rset->pending == 0);
+
+	/* set the pointer before the callback is called so it sees it */
+	*pprset = rset;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (triggered)
+	{
+		/* all resources obtained, trigger callback */
+		kds_callback_perform(rset);
+	}
+
+	return 0;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+	err = -EINVAL;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return err;
+}
+EXPORT_SYMBOL(kds_async_waitall);
+
+static void wake_up_sync_call(void *callback_parameter, void *callback_extra_parameter)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *)callback_parameter;
+	wake_up(wait);
+}
+
+static struct kds_callback sync_cb =
+{
+	wake_up_sync_call,
+	1,
+	NULL,
+};
+
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jiffies_timeout)
+{
+	struct kds_resource_set *rset;
+	unsigned long lflags;
+	int i;
+	int triggered = 0;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+		return rset;
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	init_waitqueue_head(&rset->wake);
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	if (rset->pending == 0)
+		triggered = 1;
+	else
+	{
+		rset->cb = &sync_cb;
+		rset->callback_parameter = &rset->wake;
+		rset->callback_extra_parameter = NULL;
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (!triggered)
+	{
+		long wait_res = 0;
+		long timeout = (jiffies_timeout == KDS_WAIT_BLOCKING) ?
+				MAX_SCHEDULE_TIMEOUT : jiffies_timeout;
+
+		if (timeout)
+		{
+			wait_res = wait_event_interruptible_timeout(rset->wake,
+					rset->pending == 0, timeout);
+		}
+
+		if ((wait_res == -ERESTARTSYS) || (wait_res == 0))
+		{
+			/* use \a kds_resource_set_release to roll back */
+			kds_resource_set_release(&rset);
+			return ERR_PTR(wait_res);
+		}
+	}
+	return rset;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL(kds_waitall);
+
+static void trigger_new_rset_owner(struct kds_resource_set *rset,
+		struct list_head *triggered)
+{
+	if (0 == --rset->pending) {
+		/* new owner now triggered, track for callback later */
+		kref_get(&rset->refcount);
+		list_add(&rset->callback_link, triggered);
+	}
+}
+
+static void __kds_resource_set_release_common(struct kds_resource_set *rset)
+{
+	struct list_head triggered = LIST_HEAD_INIT(triggered);
+	struct kds_resource_set *it;
+	unsigned long lflags;
+	int i;
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < rset->num_resources; i++)
+	{
+		struct kds_resource *resource;
+		struct kds_link *it = NULL;
+
+		/* fetch the previous entry on the linked list */
+		it = list_entry(rset->resources[i].link.prev, struct kds_link, link);
+		/* unlink ourself */
+		list_del(&rset->resources[i].link);
+
+		/* any waiters? */
+		if (list_empty(&it->link))
+			continue;
+
+		/* were we the head of the list? (head if prev is a resource) */
+		if (it->parent != KDS_RESOURCE)
+		{
+			if ((it->state & KDS_LINK_TRIGGERED) && !(it->state & KDS_LINK_EXCLUSIVE))
+			{
+				/*
+				 * previous was triggered and not exclusive, so we
+				 * trigger non-exclusive until end-of-list or first
+				 * exclusive
+				 */
+
+				struct kds_link *it_waiting = it;
+
+				list_for_each_entry(it, &it_waiting->link, link)
+				{
+					/* exclusive found, stop triggering */
+					if (it->state & KDS_LINK_EXCLUSIVE)
+						break;
+
+					it->state |= KDS_LINK_TRIGGERED;
+					/* a parent to update? */
+					if (it->parent != KDS_RESOURCE)
+						trigger_new_rset_owner(
+								it->parent,
+								&triggered);
+				}
+			}
+			continue;
+		}
+
+		/* we were the head, find the kds_resource */
+		resource = container_of(it, struct kds_resource, waiters);
+
+		/* we know there is someone waiting from the any-waiters test above */
+
+		/* find the head of the waiting list */
+		it = list_first_entry(&resource->waiters.link, struct kds_link, link);
+
+		/* new exclusive owner? */
+		if (it->state & KDS_LINK_EXCLUSIVE)
+		{
+			/* link now triggered */
+			it->state |= KDS_LINK_TRIGGERED;
+			/* a parent to update? */
+			trigger_new_rset_owner(it->parent, &triggered);
+		}
+		/* exclusive releasing ? */
+		else if (rset->resources[i].state & KDS_LINK_EXCLUSIVE)
+		{
+			/* trigger non-exclusive until end-of-list or first exclusive */
+			list_for_each_entry(it, &resource->waiters.link, link)
+			{
+				/* exclusive found, stop triggering */
+				if (it->state & KDS_LINK_EXCLUSIVE)
+					break;
+
+				it->state |= KDS_LINK_TRIGGERED;
+				/* a parent to update? */
+				trigger_new_rset_owner(it->parent, &triggered);
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	while (!list_empty(&triggered))
+	{
+		it = list_first_entry(&triggered, struct kds_resource_set, callback_link);
+		list_del(&it->callback_link);
+		kds_callback_perform(it);
+
+		/* Free the resource set if no callbacks pending */
+		kref_put(&it->refcount, &__resource_set_release);
+	}
+}
+
+void kds_resource_set_release(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+	int queued;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * Caller is responsible for guaranteeing that callback work is not
+	 * pending (i.e. its running or completed) prior to calling release.
+	 */
+	queued = atomic_read(&rset->cb_queued);
+	BUG_ON(queued);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release);
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * In the case of a kds async wait cancellation ensure the deferred
+	 * call back does not get scheduled if a trigger fired at the same time
+	 * to release the wait.
+	 */
+	cancel_work_sync(&rset->callback_work);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release_sync);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r14p0/kernel/drivers/base/kds/sconscript b/midgard/r14p0/kernel/drivers/base/kds/sconscript
new file mode 100755
index 0000000..75add37
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/kds/sconscript
@@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+#Android uses sync_pt to accomplish KDS functionality.
+#Midgard KDS is not used by Android
+if env['os'] == 'android':
+    Return()
+
+if Glob('tests/sconscript'):
+    SConscript('tests/sconscript')
+
+# If KDS is built into the kernel already we skip building the module ourselves
+build_kds = not env.KernelConfigEnabled("CONFIG_KDS")
+
+# Build KDS module
+if build_kds:
+    kds_mod = env.BuildKernelModule('$STATIC_LIB_PATH/kds.ko', ['kds.c'],
+                                    make_args = ['kds'])
+    env.KernelObjTarget('kds', kds_mod)
+
+# Build KDS test module
+if int(env['unit']) == 1:
+    kds_test_mod = env.BuildKernelModule('$STATIC_LIB_PATH/kds_test.ko',
+                                         ['kds_test.c'],
+                                         make_args = ['kds_test'])
+    env.KernelObjTarget('kds', kds_test_mod)
+    if build_kds:
+        env.Depends(kds_test_mod, kds_mod)
diff --git a/midgard/r14p0/kernel/drivers/base/sconscript b/midgard/r14p0/kernel/drivers/base/sconscript
new file mode 100755
index 0000000..84de8c4
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/sconscript
@@ -0,0 +1,36 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import( 'env' )
+
+if Glob('bus_logger/sconscript'):
+	if env['buslog'] == '1':
+		SConscript('bus_logger/sconscript')
+
+if Glob('mali_fpga_sysctl/sconscript'):
+	SConscript('mali_fpga_sysctl/sconscript')
+
+if Glob('dma_buf_lock/sconscript'):
+	SConscript('dma_buf_lock/sconscript')
+
+if Glob('kds/sconscript'):
+	SConscript('kds/sconscript')
+
+if Glob('ump/sconscript'):
+	SConscript('ump/sconscript')
+
+if Glob('dma_buf_test_exporter/sconscript'):
+	SConscript('dma_buf_test_exporter/sconscript')
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/Kbuild b/midgard/r14p0/kernel/drivers/base/ump/Kbuild
new file mode 100755
index 0000000..2bbdba2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += src/
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/Kconfig b/midgard/r14p0/kernel/drivers/base/ump/Kconfig
new file mode 100755
index 0000000..f7451e6
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config UMP
+	tristate "Enable Unified Memory Provider (UMP) support"
+	default n
+    help
+	  Enable this option to build support for the ARM UMP module.
+	  UMP can be used by the Mali T6xx module to improve performance
+	  by reducing the copying of data by sharing memory.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate one module, called ump.
diff --git a/midgard/r14p0/kernel/drivers/base/ump/docs/Doxyfile b/midgard/r14p0/kernel/drivers/base/ump/docs/Doxyfile
new file mode 100755
index 0000000..fbec8eb
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/docs/Doxyfile
@@ -0,0 +1,125 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/include/linux/ump-common.h ../../kernel/include/linux/ump.h
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           += ../../kernel/drivers/base/ump
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           +=
diff --git a/midgard/r14p0/kernel/drivers/base/ump/docs/sconscript b/midgard/r14p0/kernel/drivers/base/ump/docs/sconscript
new file mode 100755
index 0000000..521278d
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/docs/sconscript
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+doxygen_sources = [
+	'Doxyfile',
+	Glob('*.png'),
+	Glob('../*.h'),
+	Glob('../src/*.h') ]
+
+if env['doc'] == '1':
+        doxygen_target = env.Command('doxygen/html/index.html', doxygen_sources,
+                                     ['cd ${SOURCE.dir} && doxygen'])
+        env.Clean(doxygen_target, './doxygen')
+
+        Alias('doxygen', doxygen_target)
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/example_kernel_api.c b/midgard/r14p0/kernel/drivers/base/ump/example_kernel_api.c
new file mode 100755
index 0000000..858dd8b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/example_kernel_api.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Example routine to display information about an UMP allocation
+ * The routine takes an secure_id which can come from a different kernel module
+ * or from a client application (i.e. an ioctl).
+ * It creates a ump handle from the secure id (which validates the secure id)
+ * and if successful dumps the physical memory information.
+ * It follows the API and pins the memory while "using" the physical memory.
+ * Finally it calls the release function to indicate it's finished with the handle.
+ *
+ * If the function can't look up the handle it fails with return value -1.
+ * If the testy succeeds then it return 0.
+ * */
+
+static int display_ump_memory_information(ump_secure_id secure_id)
+{
+	const ump_dd_physical_block_64 * ump_blocks = NULL;
+	ump_dd_handle ump_mem;
+	uint64_t nr_blocks;
+	int i;
+	ump_alloc_flags flags;
+
+	/* get a handle from the secure id */
+	ump_mem = ump_dd_from_secure_id(secure_id);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE == ump_mem)
+	{
+		/* invalid ID received */
+		return -1;
+	}
+
+	/* at this point we know we've added a reference to the ump allocation, so we must release it with ump_dd_release */
+
+	ump_dd_phys_blocks_get_64(ump_mem, &nr_blocks, &ump_blocks);
+	flags = ump_dd_allocation_flags_get(ump_mem);
+
+	printf("UMP allocation with secure ID %u consists of %zd physical block(s):\n", secure_id, nr_blocks);
+
+	for(i=0; i<nr_blocks; ++i)
+	{
+		printf("\tBlock %d: 0x%08zX size 0x%08zX\n", i, ump_blocks[i].addr, ump_blocks[i].size);
+	}
+
+	printf("and was allocated using the following bitflag combo:  0x%lX\n", flags);
+
+	ump_dd_release(ump_mem);
+
+	return 0;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/example_user_api.c b/midgard/r14p0/kernel/drivers/base/ump/example_user_api.c
new file mode 100755
index 0000000..d143a64
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/example_user_api.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <ump/ump.h>
+#include <memory.h>
+#include <stdio.h>
+
+/*
+ * Example routine to exercise the user space UMP api.
+ * This routine initializes the UMP api and allocates some CPU+device X memory.
+ * No usage hints are given, so the driver will use the default cacheability policy.
+ * With the allocation it creates a duplicate handle and plays with the reference count.
+ * Then it simulates interacting with a device and contains pseudo code for the device.
+ *
+ * If any error is detected correct cleanup will be performed and -1 will be returned.
+ * If successful then 0 will be returned.
+ */
+
+static int test_ump_user_api(void)
+{
+	/* This is the size we try to allocate*/
+	const size_t alloc_size = 4096;
+
+	ump_handle h = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_copy = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+	void * mapping = NULL;
+
+	ump_result ump_api_res;
+	int result = -1;
+
+	ump_secure_id id;
+
+	size_t size_returned;
+
+	ump_api_res = ump_open();
+	if (UMP_OK != ump_api_res)
+	{
+		/* failed to open an ump session */
+		/* early out */
+		return -1;
+	}
+
+	h = ump_allocate_64(alloc_size, UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | UMP_PROT_X_RD | UMP_PROT_X_WR);
+	/* the refcount is now 1 */
+	if (UMP_INVALID_MEMORY_HANDLE == h)
+	{
+		/* allocation failure */
+		goto cleanup;
+	}
+
+	/* this is how we could share this allocation with another process */
+
+	/* in process A: */
+	id = ump_secure_id_get(h);
+	/* still ref count 1 */
+	/* send the id to process B */
+
+	/* in process B: */
+	/* receive the id from A */
+	h_clone = ump_from_secure_id(id);
+	/* the ref count of the allocation is now 2 (one from each handle to it) */
+	/* do something ... */
+	/* release our clone */
+	ump_release(h_clone); /* safe to call even if ump_from_secure_id failed */
+	h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+
+	/* a simple save-for-future-use logic inside the driver would just copy the handle (but add a ref manually too!) */
+	/*
+	 * void assign_memory_to_job(h)
+	 * {
+	  */
+	h_copy = h;
+	ump_retain(h_copy); /* manual retain needed as we just assigned the handle, now 2 */
+	/*
+	 * }
+	 *
+	 * void job_completed(void)
+	 * {
+	 */
+	 ump_release(h_copy); /* normal handle release as if we got via an ump_allocate */
+	 h_copy = UMP_INVALID_MEMORY_HANDLE;
+	 /*
+	 * }
+	 */
+	
+	/* we're now back at ref count 1, and only h is a valid handle */
+	/* enough handle duplication show-off, let's play with the contents instead */
+
+	mapping = ump_map(h, 0, alloc_size);
+	if (NULL == mapping)
+	{
+		/* mapping failure, either out of address space or some other error */
+		goto cleanup;
+	}
+
+	memset(mapping, 0, alloc_size);
+
+	/* let's pretend we're going to start some hw device on this buffer and read the result afterwards */
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN, mapping, alloc_size);
+ 	/*
+		device cache invalidate
+
+		memory barrier
+
+		start device
+
+		memory barrier
+
+		wait for device
+
+		memory barrier
+
+		device cache clean
+
+		memory barrier
+	*/
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN_AND_INVALIDATE, mapping, alloc_size);
+
+	/* we could now peek at the result produced by the hw device, which is now accessible via our mapping */
+
+	/* unmap the buffer when we're done with it */
+	ump_unmap(h, mapping, alloc_size);
+
+	result = 0;
+
+cleanup:
+	ump_release(h);
+	h = UMP_INVALID_MEMORY_HANDLE;
+
+	ump_close();
+
+	return result;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/sconscript b/midgard/r14p0/kernel/drivers/base/ump/sconscript
new file mode 100755
index 0000000..b4aa8b6
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if Glob('src/sconscript') and int(env['ump']) == 1:
+	SConscript( 'src/sconscript' )
+	SConscript( 'docs/sconscript' )
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/Kbuild b/midgard/r14p0/kernel/drivers/base/ump/src/Kbuild
new file mode 100755
index 0000000..de6d307
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/Kbuild
@@ -0,0 +1,50 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Paths required for build
+UMP_PATH = $(src)/../..
+UMP_DEVICEDRV_PATH = $(src)/.
+
+# Set up defaults if not defined by the user
+MALI_UNIT_TEST ?= 0
+
+SRC :=\
+	common/ump_kernel_core.c \
+	common/ump_kernel_descriptor_mapping.c \
+	linux/ump_kernel_linux.c \
+	linux/ump_kernel_linux_mem.c
+
+UNIT_TEST_DEFINES=
+ifeq ($(MALI_UNIT_TEST), 1)
+	MALI_DEBUG ?= 1
+
+	UNIT_TEST_DEFINES = -DMALI_UNIT_TEST=1 \
+	                    -DMALI_DEBUG=$(MALI_DEBUG)
+endif
+
+# Use our defines when compiling
+ccflags-y += -I$(UMP_PATH) -I$(UMP_DEVICEDRV_PATH) $(UNIT_TEST_DEFINES)
+
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_UMP) += ump.o
+ifeq ($(CONFIG_ION),y)
+ccflags-y += -I$(srctree)/drivers/staging/android/ion -I$(srctree)/include/linux
+obj-$(CONFIG_UMP) += imports/ion/ump_kernel_import_ion.o
+endif
+
+# Tell the Linux build system to enable building of our .c files
+ump-y := $(SRC:.c=.o)
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/Makefile b/midgard/r14p0/kernel/drivers/base/ump/src/Makefile
new file mode 100755
index 0000000..45428ad
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/Makefile
@@ -0,0 +1,81 @@
+#
+# (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifneq ($(KBUILD_EXTMOD),)
+include $(KBUILD_EXTMOD)/Makefile.common
+else
+include ./Makefile.common
+endif
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+RELATIVE_ROOT=../../../../..
+ROOT = $(CURDIR)/$(RELATIVE_ROOT)
+
+EXTRA_CFLAGS=-I$(CURDIR)/../../../../include
+
+ifeq ($(MALI_UNIT_TEST),1)
+	EXTRA_CFLAGS += -DMALI_UNIT_TEST=$(MALI_UNIT_TEST)
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+CONFIG ?= $(ARCH)
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+# Validate selected config
+ifneq ($(shell [ -d arch-$(CONFIG) ] && [ -f arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(shell pwd))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L arch ] && rm arch)
+$(shell ln -sf arch-$(CONFIG) arch)
+$(shell touch arch/config.h)
+endif
+
+EXTRA_SYMBOLS=
+
+ifeq ($(MALI_UNIT_TEST),1)
+	KBASE_PATH=$(ROOT)/kernel/drivers/gpu/arm/midgard
+	EXTRA_SYMBOLS+=$(KBASE_PATH)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+KDS_PATH=$(ROOT)/kernel/drivers/base/kds
+EXTRA_SYMBOLS+=$(KDS_PATH)/Module.symvers
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="$(EXTRA_CFLAGS) $(SCONS_CFLAGS)" CONFIG_UMP=m KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/Makefile.common b/midgard/r14p0/kernel/drivers/base/ump/src/Makefile.common
new file mode 100755
index 0000000..f29a4c1
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/Makefile.common
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2008-2010, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+SRC = $(UMP_FILE_PREFIX)/common/ump_kernel_core.c \
+      $(UMP_FILE_PREFIX)/common/ump_kernel_descriptor_mapping.c
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/arch-arm/config.h b/midgard/r14p0/kernel/drivers/base/ump/src/arch-arm/config.h
new file mode 100755
index 0000000..152d98f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/arch-arm/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/arch-arm64/config.h b/midgard/r14p0/kernel/drivers/base/ump/src/arch-arm64/config.h
new file mode 100755
index 0000000..cfb14ca
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/arch-arm64/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c b/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
new file mode 100755
index 0000000..07aa077
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
@@ -0,0 +1,756 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* module headers */
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+/* local headers */
+#include <common/ump_kernel_core.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#define UMP_FLAGS_RANGE ((UMP_PROT_SHAREABLE<<1) - 1u)
+
+static umpp_device device;
+
+ump_result umpp_core_constructor(void)
+{
+	mutex_init(&device.secure_id_map_lock);
+	device.secure_id_map = umpp_descriptor_mapping_create(UMP_EXPECTED_IDS, UMP_MAX_IDS);
+	if (NULL != device.secure_id_map)
+	{
+		if (UMP_OK == umpp_device_initialize())
+		{
+			return UMP_OK;
+		}
+		umpp_descriptor_mapping_destroy(device.secure_id_map);
+	}
+	mutex_destroy(&device.secure_id_map_lock);
+
+	return UMP_ERROR;
+}
+
+void umpp_core_destructor(void)
+{
+	umpp_device_terminate();
+	umpp_descriptor_mapping_destroy(device.secure_id_map);
+	mutex_destroy(&device.secure_id_map_lock);
+}
+
+umpp_session *umpp_core_session_start(void)
+{
+	umpp_session * session;
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (NULL != session)
+	{
+		mutex_init(&session->session_lock);
+
+		INIT_LIST_HEAD(&session->memory_usage);
+
+		/* try to create import client session, not a failure if they fail to initialize */
+		umpp_import_handlers_init(session);
+	}
+
+	return session;
+}
+
+void umpp_core_session_end(umpp_session *session)
+{
+	umpp_session_memory_usage * usage, *_usage;
+	UMP_ASSERT(session);
+
+	list_for_each_entry_safe(usage, _usage, &session->memory_usage, link)
+	{
+		printk(KERN_WARNING "UMP: Memory usage cleanup, releasing secure ID %d\n", ump_dd_secure_id_get(usage->mem));
+		ump_dd_release(usage->mem);
+		kfree(usage);
+
+	}
+
+	/* we should now not hold any imported memory objects,
+	 * detatch all import handlers */
+	umpp_import_handlers_term(session);
+
+	mutex_destroy(&session->session_lock);
+	kfree(session);
+}
+
+ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	umpp_allocation * alloc;
+	int i;
+
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD ) ) == 0 ||
+	    ( flags & (UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read and one write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL | __GFP_HARDWALL);
+
+	if (NULL == alloc)
+		goto out1;
+
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+	alloc->size = size;
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	if (0 != umpp_phys_commit(alloc))
+	{
+		goto out2;
+	}
+
+	/* all set up, allocate an ID for it */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	return alloc;
+
+out3:
+	umpp_phys_free(alloc);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+uint64_t ump_dd_size_get_64(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->size;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_size_get(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->size <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->size;
+}
+
+ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->id;
+}
+
+#ifdef CONFIG_KDS
+struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return &alloc->kds_res;
+}
+#endif
+
+ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	return alloc->flags;
+}
+
+ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id)
+{
+	umpp_allocation * alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+
+	mutex_lock(&device.secure_id_map_lock);
+
+	if (0 == umpp_descriptor_mapping_lookup(device.secure_id_map, secure_id, (void**)&alloc))
+	{
+		if (NULL != alloc->filter_func)
+		{
+			if (!alloc->filter_func(secure_id, alloc, alloc->callback_data))
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /* the filter denied access */
+			}
+		}
+
+		/* check permission to access it */
+		if ((UMP_DD_INVALID_MEMORY_HANDLE != alloc) && !(alloc->flags & UMP_PROT_SHAREABLE))
+		{
+			if (alloc->owner != get_current()->pid)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /*no rights for the current process*/
+			}
+		}
+
+		if (UMP_DD_INVALID_MEMORY_HANDLE != alloc)
+		{
+			if( ump_dd_retain(alloc) != UMP_DD_SUCCESS)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+			}
+		}
+	}
+	mutex_unlock(&device.secure_id_map_lock);
+
+	return alloc;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	return ump_dd_from_secure_id(secure_id);
+}
+
+int ump_dd_retain(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* check for overflow */
+	while(1)
+	{
+		int refcnt = atomic_read(&alloc->refcount);
+		if (refcnt + 1 > 0)
+		{
+			if(atomic_cmpxchg(&alloc->refcount, refcnt, refcnt + 1) == refcnt)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			return -EBUSY;
+		}
+	}
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_add(ump_dd_handle mem)
+{
+	ump_dd_retain(mem);
+}
+
+
+void ump_dd_release(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+	uint32_t new_cnt;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* secure the id for lookup while releasing */
+	mutex_lock(&device.secure_id_map_lock);
+
+	/* do the actual release */
+	new_cnt = atomic_sub_return(1, &alloc->refcount);
+	if (0 == new_cnt)
+	{
+		/* remove from the table as this was the last ref */
+		umpp_descriptor_mapping_remove(device.secure_id_map, alloc->id);
+	}
+
+	/* release the lock as early as possible */
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if (0 != new_cnt)
+	{
+		/* exit if still have refs */
+		return;
+	}
+
+	UMP_ASSERT(list_empty(&alloc->map_list));
+
+#ifdef CONFIG_KDS
+	if (kds_resource_term(&alloc->kds_res))
+	{
+		printk(KERN_ERR "ump_dd_release: kds_resource_term failed,"
+				"unable to release UMP allocation\n");
+		return;
+	}
+#endif
+	/* cleanup */
+	if (NULL != alloc->final_release_func)
+	{
+		alloc->final_release_func(alloc, alloc->callback_data);
+	}
+
+	if (0 == (alloc->management_flags & UMP_MGMT_EXTERNAL))
+	{
+		umpp_phys_free(alloc);
+	}
+	else
+	{
+		kfree(alloc->block_array);
+	}
+
+	mutex_destroy(&alloc->map_list_lock);
+
+	kfree(alloc);
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_release(ump_dd_handle mem)
+{
+	ump_dd_release(mem);
+}
+
+void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(pCount);
+	UMP_ASSERT(pArray);
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+	*pCount = alloc->blocksCount;
+	*pArray = alloc->block_array;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks)
+{
+	const umpp_allocation * alloc;
+	unsigned long i;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	if((uint64_t)num_blocks != alloc->blocksCount)
+	{
+		return UMP_DD_INVALID;
+	}
+
+	for( i = 0; i < num_blocks; i++)
+	{
+		UMP_ASSERT(alloc->block_array[i].addr <= UMP_UINT32_MAX);
+		UMP_ASSERT(alloc->block_array[i].size <= UMP_UINT32_MAX);
+
+		blocks[i].addr = (unsigned long)alloc->block_array[i].addr;
+		blocks[i].size = (unsigned long)alloc->block_array[i].size;
+	}
+
+	return UMP_DD_SUCCESS;
+}
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(block);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	UMP_ASSERT(alloc->block_array[index].addr <= UMP_UINT32_MAX);
+	UMP_ASSERT(alloc->block_array[index].size <= UMP_UINT32_MAX);
+
+	block->addr = (unsigned long)alloc->block_array[index].addr;
+	block->size = (unsigned long)alloc->block_array[index].size;
+
+	return UMP_DD_SUCCESS;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->blocksCount <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->blocksCount;
+}
+
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void *uaddr, size_t size)
+{
+	umpp_cpu_mapping *map;
+
+	void *target_first = uaddr;
+	void *target_last = (void*)((uintptr_t)uaddr - 1 + size);
+
+	if (target_last < target_first) /* wrapped */
+	{
+		return NULL;
+	}
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if ( map->vaddr_start <= target_first &&
+		   (void*)((uintptr_t)map->vaddr_start + (map->nr_pages << PAGE_SHIFT) - 1) >= target_last)
+		{
+			goto out;
+		}
+	}
+	map = NULL;
+out:
+	mutex_unlock(&alloc->map_list_lock);
+
+	return map;
+}
+
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map)
+{
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(map);
+	mutex_lock(&alloc->map_list_lock);
+	list_add(&map->link, &alloc->map_list);
+	mutex_unlock(&alloc->map_list_lock);
+}
+
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target)
+{
+	umpp_cpu_mapping * map;
+
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(target);
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if (map == target)
+		{
+			list_del(&target->link);
+			kfree(target);
+			mutex_unlock(&alloc->map_list_lock);
+			return;
+		}
+	}
+
+	/* not found, error */
+	UMP_ASSERT(0);
+}
+
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const  block_index, uint64_t * const block_internal_offset)
+{
+	uint64_t i;
+
+	for (i = 0 ; i < alloc->blocksCount; i++)
+	{
+		if (offset < alloc->block_array[i].size)
+		{
+			/* found the block_array element containing this offset */
+			*block_index = i;
+			*block_internal_offset = offset;
+			return 0;
+		}
+		offset -= alloc->block_array[i].size;
+	}
+
+	return -ENXIO;
+}
+
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size)
+{
+	umpp_allocation * alloc;
+	void *vaddr;
+	umpp_cpu_mapping * mapping;
+	uint64_t virt_page_off; /* offset of given address from beginning of the virtual mapping */
+	uint64_t phys_page_off; /* offset of the virtual mapping from the beginning of the physical buffer */
+	uint64_t page_count; /* number of pages to sync */
+	uint64_t i;
+	uint64_t block_idx;
+	uint64_t block_offset;
+	uint64_t paddr;
+
+	UMP_ASSERT((UMP_MSYNC_CLEAN == op) || (UMP_MSYNC_CLEAN_AND_INVALIDATE == op));
+
+	alloc = (umpp_allocation*)mem;
+	vaddr = (void*)(uintptr_t)address;
+
+	if((alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+	{
+		/* mapping is not cached */
+		return;
+	}
+
+	mapping = umpp_dd_find_enclosing_mapping(alloc, vaddr, size);
+	if (NULL == mapping)
+	{
+		printk(KERN_WARNING "UMP: Illegal cache sync address %lx\n", (uintptr_t)vaddr);
+		return; /* invalid pointer or size causes out-of-bounds */
+	}
+
+	/* we already know that address + size don't wrap around as umpp_dd_find_enclosing_mapping didn't fail */
+	page_count = ((((((uintptr_t)address + size - 1) & PAGE_MASK) - ((uintptr_t)address & PAGE_MASK))) >> PAGE_SHIFT) + 1;
+	virt_page_off = (vaddr - mapping->vaddr_start) >> PAGE_SHIFT;
+	phys_page_off = mapping->page_off;
+
+	if (umpp_dd_find_start_block(alloc, (virt_page_off + phys_page_off) << PAGE_SHIFT, &block_idx, &block_offset))
+	{
+		/* should not fail as a valid mapping was found, so the phys mem must exists */
+		printk(KERN_WARNING "UMP: Unable to find physical start block with offset %llx\n", virt_page_off + phys_page_off);
+		UMP_ASSERT(0);
+		return;
+	}
+
+	paddr = alloc->block_array[block_idx].addr + block_offset + (((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1));
+
+	for (i = 0; i < page_count; i++)
+	{
+		size_t offset = ((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1);
+		size_t sz = min((size_t)PAGE_SIZE - offset, size);
+
+		/* check if we've overrrun the current block, if so move to the next block */
+		if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+		{
+			block_idx++;
+			UMP_ASSERT(block_idx < alloc->blocksCount);
+			paddr = alloc->block_array[block_idx].addr;
+		}
+
+		if (UMP_MSYNC_CLEAN == op)
+		{
+			ump_sync_to_memory(paddr, vaddr, sz);
+		}
+		else /* (UMP_MSYNC_CLEAN_AND_INVALIDATE == op) already validated on entry */
+		{
+			ump_sync_to_cpu(paddr, vaddr, sz);
+		}
+
+		/* advance to next page  */
+		vaddr = (void*)((uintptr_t)vaddr + sz);
+		size -= sz;
+		paddr += sz;
+	}
+}
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	uint64_t size = 0;
+	uint64_t i;
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		size += blocks[i].size;
+	}
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD
+	    | UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read or write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc),__GFP_HARDWALL | GFP_KERNEL);
+
+	if (NULL == alloc)
+	{
+		goto out1;
+	}
+
+	alloc->block_array = kzalloc(sizeof(ump_dd_physical_block_64) * num_blocks,__GFP_HARDWALL | GFP_KERNEL);
+	if (NULL == alloc->block_array)
+	{
+		goto out2;
+	}
+
+	memcpy(alloc->block_array, blocks, sizeof(ump_dd_physical_block_64) * num_blocks);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+	alloc->size = size;
+	alloc->blocksCount = num_blocks;
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	/* all set up, allocate an ID */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	alloc->management_flags |= UMP_MGMT_EXTERNAL;
+
+	return alloc;
+
+out3:
+	kfree(alloc->block_array);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+
+/*
+ * UMP v1 API
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks)
+{
+	ump_dd_handle mem;
+	ump_dd_physical_block_64 *block_64_array;
+	ump_alloc_flags flags = UMP_V1_API_DEFAULT_ALLOCATION_FLAGS;
+	unsigned long i;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	block_64_array = kzalloc(num_blocks * sizeof(*block_64_array), __GFP_HARDWALL | GFP_KERNEL);
+
+	if(block_64_array == NULL)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/* copy physical blocks */
+	for( i = 0; i < num_blocks; i++)
+	{
+		block_64_array[i].addr = blocks[i].addr;
+		block_64_array[i].size = blocks[i].size;
+	}
+
+	mem = ump_dd_create_from_phys_blocks_64(block_64_array, num_blocks, flags, NULL, NULL, NULL);
+
+	kfree(block_64_array);
+
+	return mem;
+
+}
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h b/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
new file mode 100755
index 0000000..8cb424d
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
@@ -0,0 +1,228 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_CORE_H_
+#define _UMP_KERNEL_CORE_H_
+
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/cred.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+#include <linux/ump-common.h>
+#include <ump/src/common/ump_kernel_descriptor_mapping.h>
+
+/* forward decl */
+struct umpp_session;
+
+/**
+ * UMP handle metadata.
+ * Tracks various data about a handle not of any use to user space
+ */
+typedef enum
+{
+	UMP_MGMT_EXTERNAL = (1ul << 0) /**< Handle created via the ump_dd_create_from_phys_blocks interface */
+	/* (1ul << 31) not to be used */
+} umpp_management_flags;
+
+/**
+ * Structure tracking the single global UMP device.
+ * Holds global data like the ID map
+ */
+typedef struct umpp_device
+{
+	struct mutex secure_id_map_lock; /**< Lock protecting access to the map */
+	umpp_descriptor_mapping * secure_id_map; /**< Map of all known secure IDs on the system */
+} umpp_device;
+
+/**
+ * Structure tracking all memory allocations of a UMP allocation.
+ * Tracks info about an mapping so we can verify cache maintenace
+ * operations and help in the unmap cleanup.
+ */
+typedef struct umpp_cpu_mapping
+{
+	struct list_head        link; /**< link to list of mappings for an allocation */
+	void                  *vaddr_start; /**< CPU VA start of the mapping */
+	size_t                nr_pages; /**< Size (in pages) of the mapping */
+	uint64_t              page_off; /**< Offset (in pages) from start of the allocation where the mapping starts */
+	ump_dd_handle         handle; /**< Which handle this mapping is linked to */
+	struct umpp_session * session; /**< Which session created the mapping */
+} umpp_cpu_mapping;
+
+/**
+ * Structure tracking UMP allocation.
+ * Represent a memory allocation with its ID.
+ * Tracks all needed meta-data about an allocation.
+ * */
+typedef struct umpp_allocation
+{
+	ump_secure_id id; /**< Secure ID of the allocation */
+	atomic_t refcount; /**< Usage count */
+
+	ump_alloc_flags flags; /**< Flags for all supported devices */
+	uint32_t management_flags; /**< Managment flags tracking */
+
+	pid_t owner; /**< The process ID owning the memory if not sharable */
+
+	ump_dd_security_filter filter_func; /**< Hook to verify use, called during retains from new clients */
+	ump_dd_final_release_callback final_release_func; /**< Hook called when the last reference is removed */
+	void* callback_data; /**< Additional data given to release hook */
+
+	uint64_t size; /**< Size (in bytes) of the allocation */
+	uint64_t blocksCount; /**< Number of physsical blocks the allocation is built up of */
+	ump_dd_physical_block_64 * block_array; /**< Array, one entry per block, describing block start and length */
+
+	struct mutex     map_list_lock; /**< Lock protecting the map_list */
+	struct list_head map_list; /**< Tracks all CPU VA mappings of this allocation */
+
+#ifdef CONFIG_KDS
+	struct kds_resource kds_res; /**< The KDS resource controlling access to this allocation */
+#endif
+
+	void * backendData; /**< Physical memory backend meta-data */
+} umpp_allocation;
+
+/**
+ * Structure tracking use of UMP memory by a session.
+ * Tracks the use of an allocation by a session so session termination can clean up any outstanding references.
+ * Also protects agains non-matched release calls from user space.
+ */
+typedef struct umpp_session_memory_usage
+{
+	ump_secure_id id; /**< ID being used. For quick look-up */
+	ump_dd_handle mem; /**< Handle being used. */
+
+	/**
+	 * Track how many times has the process retained this handle in the kernel.
+	 * This should usually just be 1(allocated or resolved) or 2(mapped),
+	 * but could be more if someone is playing with the low-level API
+	 * */
+	atomic_t process_usage_count;
+
+	struct list_head link; /**< link to other usage trackers for a session */
+} umpp_session_memory_usage;
+
+/**
+ * Structure representing a session/client.
+ * Tracks the UMP allocations being used by this client.
+ */
+typedef struct umpp_session
+{
+	struct mutex session_lock; /**< Lock for memory usage manipulation */
+	struct list_head memory_usage; /**< list of memory currently being used by the this session */
+	void*  import_handler_data[UMPP_EXTERNAL_MEM_COUNT]; /**< Import modules per-session data pointer */
+} umpp_session;
+
+/**
+ * UMP core setup.
+ * Called by any OS specific startup function to initialize the common part.
+ * @return UMP_OK if core initialized correctly, any other value for errors
+ */
+ump_result umpp_core_constructor(void);
+
+/**
+ * UMP core teardown.
+ * Called by any OS specific unload function to clean up the common part.
+ */
+void umpp_core_destructor(void);
+
+/**
+ * UMP session start.
+ * Called by any OS specific session handler when a new session is detected
+ * @return Non-NULL if a matching core session could be set up. NULL on failure
+ */
+umpp_session *umpp_core_session_start(void);
+
+/**
+ * UMP session end.
+ * Called by any OS specific session handler when a session is ended/terminated.
+ * @param session The core session object returned by ump_core_session_start
+ */
+void umpp_core_session_end(umpp_session *session);
+
+/**
+ * Find a mapping object (if any) for this allocation.
+ * Called by any function needing to identify a mapping from a user virtual address.
+ * Verifies that the whole range to be within a mapping object.
+ * @param alloc The UMP allocation to find a matching mapping object of
+ * @param uaddr User mapping address to find the mapping object for
+ * @param size Length of the mapping
+ * @return NULL on error (no match found), pointer to mapping object if match found
+ */
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void* uaddr, size_t size);
+
+/**
+ * Register a new mapping of an allocation.
+ * Called by functions creating a new mapping of an allocation, typically OS specific handlers.
+ * @param alloc The allocation object which has been mapped
+ * @param map Info about the mapping
+ */
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map);
+
+/**
+ * Remove and free mapping object from an allocation.
+ * @param alloc The allocation object to remove the mapping info from
+ * @param target The mapping object to remove
+ */
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target);
+
+/**
+ * Helper to find a block in the blockArray which holds a given byte offset.
+ * @param alloc The allocation object to find the block in
+ * @param offset Offset (in bytes) from allocation start to find the block of
+ * @param[out] block_index Pointer to the index of the block matching
+ * @param[out] block_internal_offset Offset within the returned block of the searched offset
+ * @return 0 if a matching block was found, any other value for error
+ */
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const block_index, uint64_t * const block_internal_offset);
+
+/**
+ * Cache maintenance helper.
+ * Performs the requested cache operation on the given handle.
+ * @param mem Allocation handle
+ * @param op Cache maintenance operation to perform
+ * @param address User mapping at which to do the operation
+ * @param size Length (in bytes) of the range to do the operation on
+ */
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size);
+
+/**
+ * Import module session early init.
+ * Calls session_begin on all installed import modules.
+ * @param session The core session object to initialized the import handler for
+ * */
+void umpp_import_handlers_init(umpp_session * session);
+
+/**
+ * Import module session cleanup.
+ * Calls session_end on all import modules bound to the session.
+ * @param session The core session object to initialized the import handler for
+ */
+void umpp_import_handlers_term(umpp_session * session);
+
+#endif /* _UMP_KERNEL_CORE_H_ */
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c b/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
new file mode 100755
index 0000000..c5b0d74
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <common/ump_kernel_priv.h>
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(umpp_descriptor_table * table);
+
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries)
+{
+	umpp_descriptor_mapping * map = kzalloc(sizeof(umpp_descriptor_mapping), GFP_KERNEL);
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map)
+	{
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table)
+		{
+			init_rwsem( &map->lock);
+			set_bit(0, map->table->usage);
+			map->max_nr_mappings_allowed = max_entries;
+			map->current_nr_mappings = init_entries;
+			return map;
+
+			descriptor_table_free(map->table);
+		}
+		kfree(map);
+	}
+	return NULL;
+}
+
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map)
+{
+	UMP_ASSERT(NULL != map);
+	descriptor_table_free(map->table);
+	kfree(map);
+}
+
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target)
+{
+ 	int descriptor = 0;
+	UMP_ASSERT(NULL != map);
+	down_write( &map->lock);
+	descriptor = find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings)
+	{
+		/* no free descriptor, try to expand the table */
+		umpp_descriptor_table * new_table;
+		umpp_descriptor_table * old_table = map->table;
+		int nr_mappings_new = map->current_nr_mappings + BITS_PER_LONG;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+ 		memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+ 		memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void*));
+
+ 		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	set_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	up_write(&map->lock);
+	return descriptor;
+}
+
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target)
+{
+	int result = -EINVAL;
+ 	UMP_ASSERT(map);
+	UMP_ASSERT(target);
+ 	down_read(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	}
+	/* keep target untouched if the descriptor was not found */
+	up_read(&map->lock);
+	return result;
+}
+
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor)
+{
+	UMP_ASSERT(map);
+ 	down_write(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		map->table->mappings[descriptor] = NULL;
+		clear_bit(descriptor, map->table->usage);
+	}
+	up_write(&map->lock);
+}
+
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count)
+{
+	umpp_descriptor_table * table;
+
+	table = kzalloc(sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG) + (sizeof(void*) * count), __GFP_HARDWALL | GFP_KERNEL );
+
+	if (NULL != table)
+	{
+		table->usage = (unsigned long*)((u8*)table + sizeof(umpp_descriptor_table));
+		table->mappings = (void**)((u8*)table + sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(umpp_descriptor_table * table)
+{
+ 	UMP_ASSERT(table);
+	kfree(table);
+}
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h b/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
new file mode 100755
index 0000000..d06c145
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+#define _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct umpp_descriptor_table
+{
+	/* keep as a unsigned long to rely on the OS's bitops support */
+	unsigned long * usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used(1) or not(0) */
+	void** mappings; /**< Array of the pointers the descriptors map to */
+} umpp_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct umpp_descriptor_mapping
+{
+	struct rw_semaphore lock; /**< Lock protecting access to the mapping object */
+	unsigned int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	unsigned int current_nr_mappings; /**< Current number of possible mappings */
+	umpp_descriptor_table * table; /**< Pointer to the current mapping table */
+} umpp_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object.
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries.
+ * ID 0 is reserved so the number of available entries will be max - 1.
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param[in] map The map to free
+ */
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param[in] map The map to allocate a new entry in
+ * @param[in] target The value to map to
+ * @return The descriptor allocated, ID 0 on failure.
+ */
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param[in] map The map to lookup the descriptor id in
+ * @param[in] descriptor The descriptor ID to lookup
+ * @param[out] target Pointer to a pointer which will receive the stored value
+ *
+ * @return 0 on success lookup, -EINVAL on lookup failure.
+ */
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param[in] map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor);
+
+#endif /* _UMP_KERNEL_DESCRIPTOR_MAPPING_H_ */
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h b/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
new file mode 100755
index 0000000..38b6f1b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_PRIV_H_
+#define _UMP_KERNEL_PRIV_H_
+
+#ifdef __KERNEL__
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <asm/cacheflush.h>
+#endif
+
+
+#define UMP_EXPECTED_IDS 64
+#define UMP_MAX_IDS 32768
+
+#ifdef __KERNEL__
+#define UMP_ASSERT(expr) \
+		if (!(expr)) { \
+			printk(KERN_ERR "UMP: Assertion failed! %s,%s,%s,line=%d\n",\
+					#expr,__FILE__,__func__,__LINE__); \
+					BUG(); \
+		}
+
+static inline void ump_sync_to_memory(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/*TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE);
+	mb(); /* for outer_sync (if needed) */
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+
+static inline void ump_sync_to_cpu(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE);
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+#endif /* __KERNEL__*/
+#endif /* _UMP_KERNEL_PRIV_H_ */
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/imports/ion/Makefile b/midgard/r14p0/kernel/drivers/base/ump/src/imports/ion/Makefile
new file mode 100755
index 0000000..ef74b27
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/imports/ion/Makefile
@@ -0,0 +1,53 @@
+#
+# (C) COPYRIGHT 2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/../../../../..
+KBUILD_EXTRA_SYMBOLS += "$(KBUILD_EXTMOD)/../../Module.symvers"
+
+SRC += ump_kernel_import_ion.c
+
+MODULE:=ump_ion_import.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+$(MODULE:.ko=-objs) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+#
+#
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/imports/ion/sconscript b/midgard/r14p0/kernel/drivers/base/ump/src/imports/ion/sconscript
new file mode 100755
index 0000000..cff24c8
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/imports/ion/sconscript
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ion = env.Clone()
+
+if env_ion['ump_ion'] != '1':
+	Return()
+
+# Source files required for UMP.
+ion_src = [Glob('#kernel/drivers/base/ump/src/imports/ion/*.c')]
+
+# Note: cleaning via the Linux kernel build system does not yet work
+if env_ion.GetOption('clean') :
+	makeAction=Action("cd ${SOURCE.dir} && make clean", '$MAKECOMSTR')
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make PLATFORM=${platform} && cp ump_ion_import.ko $STATIC_LIB_PATH/ump_ion_import.ko", '$MAKECOMSTR')
+# The target is ump_import_ion.ko, built from the source in ion_src, via the action makeAction
+# ump_import_ion.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+# kernel build system, after which it can be installed to the directory specified if
+# "libs_install" is set; this is done by LibTarget.
+cmd = env_ion.Command('$STATIC_LIB_PATH/ump_ion_import.ko', ion_src, [makeAction])
+
+# Until we fathom out how the invoke the Linux build system to clean, we can use Clean
+# to remove generated files.
+
+patterns = ['*.mod.c', '*.o', '*.ko', '*.a', '.*.cmd', 'modules.order', '.tmp_versions', 'Module.symvers']
+
+for p in patterns:
+	Clean(cmd, Glob('#kernel/drivers/base/ump/src/imports/ion/%s' % p))
+
+env_ion.Depends('$STATIC_LIB_PATH/ump_ion_import.ko', '$STATIC_LIB_PATH/ump.ko')
+env_ion.KernelObjTarget('ump', cmd)
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c b/midgard/r14p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
new file mode 100755
index 0000000..12b2e32
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/dma-mapping.h>
+#include "ion.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+struct ion_wrapping_info
+{
+	struct ion_client *   ion_client;
+	struct ion_handle *   ion_handle;
+	int                   num_phys_blocks;
+	struct scatterlist *  sglist;
+};
+
+static struct ion_device * ion_device_get(void)
+{
+	/* < Customer to provide implementation >
+	 * Return a pointer to the global ion_device on the system
+	 */
+	return NULL;
+}
+
+static int import_ion_client_create(void** const custom_session_data)
+{
+	struct ion_client ** ion_client;
+
+	ion_client = (struct ion_client**)custom_session_data;
+
+	*ion_client = ion_client_create(ion_device_get(), "ump");
+
+	return PTR_RET(*ion_client);
+}
+
+
+static void import_ion_client_destroy(void* custom_session_data)
+{
+	struct ion_client * ion_client;
+
+	ion_client = (struct ion_client*)custom_session_data;
+	BUG_ON(!ion_client);
+
+	ion_client_destroy(ion_client);
+}
+
+
+static void import_ion_final_release_callback(const ump_dd_handle handle, void * info)
+{
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!info);
+
+	(void)handle;
+	ion_info = (struct ion_wrapping_info*)info;
+
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+	kfree(ion_info);
+	module_put(THIS_MODULE);
+}
+
+static ump_dd_handle import_ion_import(void * custom_session_data, void * pfd, ump_alloc_flags flags)
+{
+	int fd;
+	ump_dd_handle ump_handle;
+	struct scatterlist * sg;
+	int num_dma_blocks;
+	ump_dd_physical_block_64 * phys_blocks;
+	unsigned long i;
+	struct sg_table * sgt;
+
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!custom_session_data);
+	BUG_ON(!pfd);
+
+	ion_info = kzalloc(GFP_KERNEL, sizeof(*ion_info));
+	if (NULL == ion_info)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	ion_info->ion_client = (struct ion_client*)custom_session_data;
+
+	if (get_user(fd, (int*)pfd))
+	{
+		goto out;
+	}
+
+	ion_info->ion_handle = ion_import_dma_buf(ion_info->ion_client, fd);
+
+	if (IS_ERR_OR_NULL(ion_info->ion_handle))
+	{
+		goto out;
+	}
+
+	sgt = ion_sg_table(ion_info->ion_client, ion_info->ion_handle);
+	if (IS_ERR_OR_NULL(sgt))
+	{
+		goto ion_dma_map_failed;
+	}
+
+	ion_info->sglist = sgt->sgl;
+
+	sg = ion_info->sglist;
+	while (sg)
+	{
+		ion_info->num_phys_blocks++;
+		sg = sg_next(sg);
+	}
+
+	num_dma_blocks = dma_map_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	if (0 == num_dma_blocks)
+	{
+		goto linux_dma_map_failed;
+	}
+
+	phys_blocks = vmalloc(num_dma_blocks * sizeof(*phys_blocks));
+	if (NULL == phys_blocks)
+	{
+		goto vmalloc_failed;
+	}
+
+	for_each_sg(ion_info->sglist, sg, num_dma_blocks, i)
+	{
+		phys_blocks[i].addr = sg_phys(sg);
+		phys_blocks[i].size = sg_dma_len(sg);
+	}
+
+	ump_handle = ump_dd_create_from_phys_blocks_64(phys_blocks, num_dma_blocks, flags, NULL, import_ion_final_release_callback, ion_info);
+
+	vfree(phys_blocks);
+
+	if (ump_handle != UMP_DD_INVALID_MEMORY_HANDLE)
+	{
+		/*
+		 * As we have a final release callback installed
+		 * we must keep the module locked until
+		 * the callback has been triggered
+		 * */
+		__module_get(THIS_MODULE);
+		return ump_handle;
+	}
+
+	/* failed*/
+vmalloc_failed:
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+linux_dma_map_failed:
+ion_dma_map_failed:
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+out:
+	kfree(ion_info);
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+struct ump_import_handler import_handler_ion =
+{
+	.linux_module =  THIS_MODULE,
+	.session_begin = import_ion_client_create,
+	.session_end =   import_ion_client_destroy,
+	.import =        import_ion_import
+};
+
+static int __init import_ion_initialize_module(void)
+{
+	/* register with UMP */
+	return ump_import_module_register(UMP_EXTERNAL_MEM_TYPE_ION, &import_handler_ion);
+}
+
+static void __exit import_ion_cleanup_module(void)
+{
+	/* unregister import handler */
+	ump_import_module_unregister(UMP_EXTERNAL_MEM_TYPE_ION);
+}
+
+/* Setup init and exit functions for this module */
+module_init(import_ion_initialize_module);
+module_exit(import_ion_cleanup_module);
+
+/* And some module information */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/imports/sconscript b/midgard/r14p0/kernel/drivers/base/ump/src/imports/sconscript
new file mode 100755
index 0000000..8f50683
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/imports/sconscript
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os, sys
+Import('env')
+
+import_modules = [ os.path.join( path, 'sconscript' ) for path in sorted(os.listdir( os.getcwd() )) ]
+
+for m in import_modules:
+	if os.path.exists(m):
+		SConscript( m, variant_dir=os.path.join( env['BUILD_DIR_PATH'], os.path.dirname(m) ), duplicate=0 )
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c b/midgard/r14p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
new file mode 100755
index 0000000..d6c3c53
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
@@ -0,0 +1,831 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump-ioctl.h>
+#include <linux/ump.h>
+
+#include <asm/uaccess.h>	         /* copy_*_user */
+#include <linux/compat.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/device.h>            /* class registration support */
+
+#include <common/ump_kernel_core.h>
+
+#include "ump_kernel_linux_mem.h"
+#include <ump_arch.h>
+
+
+struct ump_linux_device
+{
+	struct cdev cdev;
+	struct class * ump_class;
+};
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+#define UMP_REV_STRING "1.0"
+
+char * ump_revision = UMP_REV_STRING;
+module_param(ump_revision, charp, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_revision, "Revision info");
+
+static int umpp_linux_open(struct inode *inode, struct file *filp);
+static int umpp_linux_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+/* This variable defines the file operations this UMP device driver offers */
+static struct file_operations ump_fops =
+{
+	.owner   = THIS_MODULE,
+	.open    = umpp_linux_open,
+	.release = umpp_linux_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = umpp_linux_ioctl,
+#else
+	.ioctl   = umpp_linux_ioctl,
+#endif
+	.compat_ioctl = umpp_linux_ioctl,
+	.mmap = umpp_linux_mmap
+};
+
+/* import module handling */
+DEFINE_MUTEX(import_list_lock);
+struct ump_import_handler *  import_handlers[UMPP_EXTERNAL_MEM_COUNT];
+
+/* The global variable containing the global device data */
+static struct ump_linux_device ump_linux_device;
+
+#define DBG_MSG(level, ...) do { \
+if ((level) <=  ump_debug_level)\
+{\
+printk(KERN_DEBUG "UMP<" #level ">:\n" __VA_ARGS__);\
+} \
+} while (0)
+
+#define MSG_ERR(...) do{ \
+printk(KERN_ERR "UMP: ERR: %s\n           %s()%4d\n", __FILE__, __func__  , __LINE__) ; \
+printk(KERN_ERR __VA_ARGS__); \
+printk(KERN_ERR "\n"); \
+} while(0)
+
+#define MSG(...) do{ \
+printk(KERN_INFO "UMP: " __VA_ARGS__);\
+} while (0)
+
+/*
+ * This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int __init umpp_linux_initialize_module(void)
+{
+	ump_result err;
+
+	err = umpp_core_constructor();
+	if (UMP_OK != err)
+	{
+		MSG_ERR("UMP device driver init failed\n");
+		return -ENOTTY;
+	}
+
+	MSG("UMP device driver %s loaded\n", UMP_REV_STRING);
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void __exit umpp_linux_cleanup_module(void)
+{
+	DBG_MSG(2, "Unloading UMP device driver\n");
+	umpp_core_destructor();
+	DBG_MSG(2, "Module unloaded\n");
+}
+
+
+
+/*
+ * Initialize the UMP device driver.
+ */
+ump_result umpp_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+
+	if (0 == ump_major)
+	{
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	}
+	else
+	{
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err)
+	{
+		memset(&ump_linux_device, 0, sizeof(ump_linux_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_linux_device.cdev, &ump_fops);
+		ump_linux_device.cdev.owner = THIS_MODULE;
+		ump_linux_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_linux_device.cdev, dev, 1/*count*/);
+		if (0 == err)
+		{
+
+			ump_linux_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_linux_device.ump_class))
+			{
+				err = PTR_ERR(ump_linux_device.ump_class);
+			}
+			else
+			{
+				struct device * mdev;
+				mdev = device_create(ump_linux_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return UMP_OK;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(ump_linux_device.ump_class);
+			}
+			cdev_del(&ump_linux_device.cdev);
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return UMP_ERROR;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void umpp_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+	device_destroy(ump_linux_device.ump_class, dev);
+	class_destroy(ump_linux_device.ump_class);
+
+	/* unregister char device */
+	cdev_del(&ump_linux_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+}
+
+
+static int umpp_linux_open(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = umpp_core_session_start();
+	if (NULL == session)
+	{
+		return -EFAULT;
+	}
+	
+	filp->private_data = session;
+
+	return 0;
+}
+
+static int umpp_linux_release(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = filp->private_data;
+
+	umpp_core_session_end(session);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/**************************/
+/*ioctl specific functions*/
+/**************************/
+static int do_ump_dd_allocate(umpp_session * session, ump_k_allocate * params)
+{
+	ump_dd_handle new_allocation;
+	new_allocation = ump_dd_allocate_64(params->size, params->alloc_flags, NULL, NULL, NULL);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	printk(KERN_WARNING "UMP: Allocation FAILED\n");
+	return -ENOMEM;
+}
+
+static int do_ump_dd_retain(umpp_session * session, ump_k_retain * params)
+{
+	umpp_session_memory_usage * it;
+
+	mutex_lock(&session->session_lock);
+
+	/* try to find it on the session usage list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found to already be in use */
+			/* check for overflow */
+			while(1)
+			{
+				int refcnt = atomic_read(&it->process_usage_count);
+				if (refcnt + 1 > 0)
+				{
+					/* add a process local ref */
+					if(atomic_cmpxchg(&it->process_usage_count, refcnt, refcnt + 1) == refcnt)
+					{
+						mutex_unlock(&session->session_lock);
+						return 0;
+					}
+				}
+				else
+				{
+					/* maximum usage cap reached */
+					mutex_unlock(&session->session_lock);
+					return -EBUSY;
+				}
+			}
+		}
+	}
+	/* try to look it up globally */
+
+	it = kmalloc(sizeof(*it), GFP_KERNEL);
+
+	if (NULL != it)
+	{
+		it->mem = ump_dd_from_secure_id(params->secure_id);
+		if (UMP_DD_INVALID_MEMORY_HANDLE != it->mem)
+		{
+			/* found, add it to the session usage list */
+			it->id = params->secure_id;
+			atomic_set(&it->process_usage_count, 1);
+			list_add(&it->link, &session->memory_usage);
+		}
+		else
+		{
+			/* not found */
+			kfree(it);
+			it = NULL;
+		}
+	}
+
+	mutex_unlock(&session->session_lock);
+
+	return (NULL != it) ? 0 : -ENODEV;
+}
+
+
+static int do_ump_dd_release(umpp_session * session, ump_k_release * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only do a release if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			result = 0;
+
+			if (0 == atomic_sub_return(1, &it->process_usage_count))
+			{
+				/* last ref in this process remove from the usage list and remove the underlying ref */
+				list_del(&it->link);
+				ump_dd_release(it->mem);
+				kfree(it);
+			}
+
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_sizequery(umpp_session * session, ump_k_sizequery * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->size = ump_dd_size_get_64(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_allocation_flags_get(umpp_session * session, ump_k_allocation_flags * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->alloc_flags = ump_dd_allocation_flags_get(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_msync_now(umpp_session * session, ump_k_msync * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, do the cache op */
+#ifdef CONFIG_COMPAT
+			if (is_compat_task())
+			{
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, compat_ptr(params->mapped_ptr.compat_value), params->size);
+				result = 0;
+			}
+			else
+			{
+#endif
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, params->mapped_ptr.value, params->size);
+				result = 0;
+#ifdef CONFIG_COMPAT
+			}
+#endif
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+
+void umpp_import_handlers_init(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		if (import_handlers[i])
+		{
+			import_handlers[i]->session_begin(&session->import_handler_data[i]);
+			/* It is OK if session_begin returned an error.
+			 * We won't do any import calls if so */
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+void umpp_import_handlers_term(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		/* only call if session_begin succeeded */
+		if (session->import_handler_data[i] != NULL)
+		{
+			/* if session_beging succeeded the handler
+			 * should not have unregistered with us */
+			BUG_ON(!import_handlers[i]);
+			import_handlers[i]->session_end(session->import_handler_data[i]);
+			session->import_handler_data[i] = NULL;
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler)
+{
+	int res = -EEXIST;
+
+	/* validate input */
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+	BUG_ON(!handler);
+	BUG_ON(!handler->linux_module);
+	BUG_ON(!handler->session_begin);
+	BUG_ON(!handler->session_end);
+	BUG_ON(!handler->import);
+
+	mutex_lock(&import_list_lock);
+
+	if (!import_handlers[type])
+	{
+		import_handlers[type] = handler;
+		res = 0;
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return res;
+}
+
+void ump_import_module_unregister(enum ump_external_memory_type type)
+{
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+
+	mutex_lock(&import_list_lock);
+	/* an error to call this if ump_import_module_register didn't succeed */
+	BUG_ON(!import_handlers[type]);
+	import_handlers[type] = NULL;
+	mutex_unlock(&import_list_lock);
+}
+
+static struct ump_import_handler * import_handler_get(unsigned int type_id)
+{
+	enum ump_external_memory_type type;
+	struct ump_import_handler * handler;
+
+	/* validate and convert input */
+	/* handle bad data here, not just BUG_ON */
+	if (type_id == 0 || type_id >= UMPP_EXTERNAL_MEM_COUNT)
+		return NULL;
+
+	type = (enum ump_external_memory_type)type_id;
+
+	/* find the handler */
+	mutex_lock(&import_list_lock);
+
+	handler = import_handlers[type];
+
+	if (handler)
+	{
+		if (!try_module_get(handler->linux_module))
+		{
+			handler = NULL;
+		}
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return handler;
+}
+
+static void import_handler_put(struct ump_import_handler * handler)
+{
+	module_put(handler->linux_module);
+}
+
+static int do_ump_dd_import(umpp_session * session, ump_k_import * params)
+{
+	ump_dd_handle new_allocation = UMP_DD_INVALID_MEMORY_HANDLE;
+	struct ump_import_handler * handler;
+
+	handler = import_handler_get(params->type);
+
+	if (handler)
+	{
+		/* try late binding if not already bound */
+		if (!session->import_handler_data[params->type])
+		{
+			handler->session_begin(&session->import_handler_data[params->type]);
+		}
+
+		/* do we have a bound session? */
+		if (session->import_handler_data[params->type])
+		{
+			new_allocation = handler->import( session->import_handler_data[params->type],
+		                                      params->phandle.value,
+		                                      params->alloc_flags);
+		}
+
+		/* done with the handler */
+		import_handler_put(handler);
+	}
+
+	/* did the import succeed? */
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	return -ENOMEM;
+
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int ret;
+	uint64_t msg[(UMP_CALL_MAX_SIZE+7)>>3]; /* alignment fixup */
+	uint32_t size = _IOC_SIZE(cmd);
+	struct umpp_session *session = filp->private_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* unused arg */
+#endif
+
+	/*
+	 * extract the type and number bitfields, and don't decode
+	 * wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
+	 */
+	if (_IOC_TYPE(cmd) != UMP_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if (_IOC_NR(cmd) > UMP_IOC_MAXNR)
+	{
+		return -ENOTTY;
+	}
+
+	switch(cmd)
+	{
+		case UMP_FUNC_ALLOCATE:
+			if (size != sizeof(ump_k_allocate))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocate(session, (ump_k_allocate *)&msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_SIZEQUERY:
+			if (size != sizeof(ump_k_sizequery))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_sizequery(session,(ump_k_sizequery*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_MSYNC:
+			if (size != sizeof(ump_k_msync))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_msync_now(session,(ump_k_msync*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_IMPORT:
+			if (size != sizeof(ump_k_import))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user*)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_import(session, (ump_k_import*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		/* used only by v1 API */
+		case UMP_FUNC_ALLOCATION_FLAGS_GET:
+			if (size != sizeof(ump_k_allocation_flags))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocation_flags_get(session,(ump_k_allocation_flags*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_RETAIN:
+			if (size != sizeof(ump_k_retain))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_retain(session,(ump_k_retain*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		case UMP_FUNC_RELEASE:
+			if (size != sizeof(ump_k_release))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_release(session,(ump_k_release*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		default:
+			/* not ours */
+			return -ENOTTY;
+	}
+	/*redundant below*/
+	return -ENOTTY;
+}
+
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_allocate_64);
+EXPORT_SYMBOL(ump_dd_allocation_flags_get);
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get_64);
+EXPORT_SYMBOL(ump_dd_size_get_64);
+EXPORT_SYMBOL(ump_dd_retain);
+EXPORT_SYMBOL(ump_dd_release);
+EXPORT_SYMBOL(ump_dd_create_from_phys_blocks_64);
+#ifdef CONFIG_KDS
+EXPORT_SYMBOL(ump_dd_kds_resource_get);
+#endif
+
+/* import API */
+EXPORT_SYMBOL(ump_import_module_register);
+EXPORT_SYMBOL(ump_import_module_unregister);
+
+
+
+/* V1 API */
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+
+/* Setup init and exit functions for this module */
+module_init(umpp_linux_initialize_module);
+module_exit(umpp_linux_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(UMP_REV_STRING);
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c b/midgard/r14p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
new file mode 100755
index 0000000..38db91e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
@@ -0,0 +1,250 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+#include <linux/version.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h> /* memory mananger definitions */
+#include <linux/pfn.h>
+#include <linux/highmem.h> /*kmap*/
+
+#include <linux/compat.h> /* is_compat_task */
+
+#include <common/ump_kernel_core.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+static void umpp_vm_close(struct vm_area_struct *vma)
+{
+	umpp_cpu_mapping * mapping;
+	umpp_session * session;
+	ump_dd_handle handle;
+
+	mapping = (umpp_cpu_mapping*)vma->vm_private_data;
+	UMP_ASSERT(mapping);
+	
+	session = mapping->session;
+	handle = mapping->handle;
+
+	umpp_dd_remove_cpu_mapping(mapping->handle, mapping); /* will free the mapping object */
+	ump_dd_release(handle);
+}
+
+
+static const struct vm_operations_struct umpp_vm_ops = {
+	.close = umpp_vm_close
+};
+
+int umpp_phys_commit(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	/* round up to a page boundary */
+	alloc->size = (alloc->size + PAGE_SIZE - 1) & ~((uint64_t)PAGE_SIZE-1) ;
+	/* calculate number of pages */
+	alloc->blocksCount = alloc->size >> PAGE_SHIFT;
+
+	if( (sizeof(ump_dd_physical_block_64) * alloc->blocksCount) > ((size_t)-1))
+	{
+		printk(KERN_WARNING "UMP: umpp_phys_commit - trying to allocate more than possible\n");
+		return -ENOMEM;
+	}
+
+	alloc->block_array = kmalloc(sizeof(ump_dd_physical_block_64) * alloc->blocksCount, __GFP_HARDWALL | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (NULL == alloc->block_array)
+	{
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		void * mp;
+		struct page * page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD);
+		if (NULL == page)
+		{
+			break;
+		}
+
+		alloc->block_array[i].addr = page_to_pfn(page) << PAGE_SHIFT;
+		alloc->block_array[i].size = PAGE_SIZE;
+
+		mp = kmap(page);
+		if (NULL == mp)
+		{
+			__free_page(page);
+			break;
+		}
+
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		ump_sync_to_memory(PFN_PHYS(page_to_pfn(page)), mp, PAGE_SIZE);
+		kunmap(page);
+	}
+
+	if (i == alloc->blocksCount)
+	{
+		return 0;
+	}
+	else
+	{
+		uint64_t j;
+		for (j = 0; j < i; j++)
+		{
+			struct page * page;
+			page = pfn_to_page(alloc->block_array[j].addr >> PAGE_SHIFT);
+			__free_page(page);
+		}
+		
+		kfree(alloc->block_array);
+
+		return -ENOMEM;
+	}
+}
+
+void umpp_phys_free(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		__free_page(pfn_to_page(alloc->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	kfree(alloc->block_array);
+}
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma)
+{
+	ump_secure_id id;
+	ump_dd_handle h;
+	size_t offset;
+	int err = -EINVAL;
+	size_t length = vma->vm_end - vma->vm_start;
+
+	umpp_cpu_mapping * map = NULL;
+	umpp_session *session = filp->private_data;
+
+	if ( 0 == length )
+	{
+		return -EINVAL;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (NULL == map)
+	{
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* unpack our arg */
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	if (is_compat_task())
+	{
+#endif
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_32;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_32;
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	}
+	else
+	{
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_64;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_64;
+	}
+#endif
+
+	h = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE != h)
+	{
+		uint64_t i;
+		uint64_t block_idx;
+		uint64_t block_offset;
+		uint64_t paddr;
+		umpp_allocation * alloc;
+		uint64_t last_byte;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO | VM_MIXEDMAP | VM_DONTDUMP;
+#else
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO | VM_MIXEDMAP;
+#endif
+		vma->vm_ops = &umpp_vm_ops;
+		vma->vm_private_data = map;
+
+		alloc = (umpp_allocation*)h;
+
+		if( (alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+		{
+			/* cache disabled flag set, disable caching for cpu mappings */
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		}
+
+		last_byte = length + (offset << PAGE_SHIFT) - 1;
+		if (last_byte >= alloc->size || last_byte < (offset << PAGE_SHIFT))
+		{
+			goto err_out;
+		}
+
+		if (umpp_dd_find_start_block(alloc, offset << PAGE_SHIFT, &block_idx, &block_offset))
+		{
+			goto err_out;
+		}
+
+		paddr = alloc->block_array[block_idx].addr + block_offset;
+
+		for (i = 0; i < (length >> PAGE_SHIFT); i++)
+		{
+			/* check if we've overrrun the current block, if so move to the next block */
+			if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+			{
+				block_idx++;
+				UMP_ASSERT(block_idx < alloc->blocksCount);
+				paddr = alloc->block_array[block_idx].addr;
+			}
+
+			err = vm_insert_mixed(vma, vma->vm_start + (i << PAGE_SHIFT), paddr >> PAGE_SHIFT);
+			paddr += PAGE_SIZE;
+		}
+
+		map->vaddr_start = (void*)vma->vm_start;
+		map->nr_pages = length >> PAGE_SHIFT;
+		map->page_off = offset;
+		map->handle = h;
+		map->session = session;
+
+		umpp_dd_add_cpu_mapping(h, map);
+
+		return 0;
+
+		err_out:
+
+		ump_dd_release(h);
+	}
+
+	kfree(map);
+
+out:
+
+	return err;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h b/midgard/r14p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
new file mode 100755
index 0000000..4fbf3b2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_LINUX_MEM_H_
+#define _UMP_KERNEL_LINUX_MEM_H_
+
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma);
+
+#endif /* _UMP_KERNEL_LINUX_MEM_H_ */
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/sconscript b/midgard/r14p0/kernel/drivers/base/ump/src/sconscript
new file mode 100755
index 0000000..b34d499
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/sconscript
@@ -0,0 +1,46 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ump = env.Clone()
+
+# Source files required for UMP.
+ump_src = [
+    Glob('common/*.c'),
+    Glob('imports/*/*.c'),
+    Glob('linux/*.c'),
+]
+
+make_args = env_ump.kernel_get_config_defines(ret_list = True) + [
+    'PLATFORM=%s' % env_ump['platform'],
+    'MALI_UNIT_TEST=%s' % env_ump['unit'],
+]
+
+mod = env_ump.BuildKernelModule('$STATIC_LIB_PATH/ump.ko', ump_src,
+                                make_args = make_args)
+env_ump.KernelObjTarget('ump', mod)
+
+# Add a dependency on kds.ko only when the build is not Android
+# Android uses sync_pt instead of Midgard KDS to achieve KDS functionality
+# Only necessary when KDS is not built into the kernel.
+#
+if env_ump['os'] != 'android':
+    if not env_ump.KernelConfigEnabled('CONFIG_KDS'):
+        env_ump.Depends(mod, '$STATIC_LIB_PATH/kds.ko')
+
+SConscript( 'imports/sconscript' )
+
diff --git a/midgard/r14p0/kernel/drivers/base/ump/src/ump_arch.h b/midgard/r14p0/kernel/drivers/base/ump/src/ump_arch.h
new file mode 100755
index 0000000..2303d56
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/src/ump_arch.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_ARCH_H_
+#define _UMP_ARCH_H_
+
+#include <common/ump_kernel_core.h>
+
+/**
+ * Device specific setup.
+ * Called by the UMP core code to to host OS/device specific setup.
+ * Typical use case is device node creation for talking to user space.
+ * @return UMP_OK on success, any other value on failure
+ */
+extern ump_result umpp_device_initialize(void);
+
+/**
+ * Device specific teardown.
+ * Undo any things done by ump_device_initialize.
+ */
+extern void umpp_device_terminate(void);
+
+extern int umpp_phys_commit(umpp_allocation * alloc);
+extern void umpp_phys_free(umpp_allocation * alloc);
+
+#endif /* _UMP_ARCH_H_ */
diff --git a/midgard/r14p0/kernel/drivers/base/ump/ump_ref_drv.h b/midgard/r14p0/kernel/drivers/base/ump/ump_ref_drv.h
new file mode 100755
index 0000000..9a265fe
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/base/ump/ump_ref_drv.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_ref_drv.h
+ *
+ * This file contains the link to user space part of the UMP API for usage by MALI 400 gralloc.
+ *
+ */
+
+#ifndef _UMP_REF_DRV_H_
+#define _UMP_REF_DRV_H_
+
+#include <ump/ump.h>
+
+
+#endif /* _UMP_REF_DRV_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/Kbuild b/midgard/r14p0/kernel/drivers/gpu/arm/Kbuild
new file mode 100755
index 0000000..19c7e9a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/Kconfig b/midgard/r14p0/kernel/drivers/gpu/arm/Kconfig
new file mode 100755
index 0000000..1f30eb5
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/Kconfig
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Kbuild b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Kbuild
new file mode 100755
index 0000000..f213b4c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,233 @@
+#
+# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r14p0-01rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+MALI_INSTRUMENTATION_LEVEL ?= 0
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_ipa.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_sync.c \
+	mali_kbase_sync_user.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c \
+	mali_kbase_regs_history_debugfs.c
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+# Job Scheduler Policy: Completely Fair Scheduler
+SRC += mali_kbase_js_policy_cfs.c
+
+ccflags-y += -I$(KBASE_PATH)
+
+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
+	SRC += mali_kbase_platform_fake.c
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
+		SRC += platform/vexpress/mali_kbase_config_vexpress.c \
+		platform/vexpress/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
+		SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/rtsm_ve
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
+		SRC += platform/juno/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/juno
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
+		SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+		ccflags-y += -I$(src)/platform/juno_soc
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
+		SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
+		SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
+		platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y)
+		SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \
+		platform/a7_kipling/mali_kbase_cpu_a7_kipling.c
+		ccflags-y += -I$(src)/platform/a7_kipling
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+	# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+	ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+	ifeq ($(CONFIG_MALI_MIDGARD),m)
+	include  $(src)/platform/$(platform_name)/Kbuild
+	else ifeq ($(CONFIG_MALI_MIDGARD),y)
+	obj-$(CONFIG_MALI_MIDGARD) += platform/
+	endif
+	endif
+endif # CONFIG_MALI_PLATFORM_FAKE=y
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+ifeq ($(CONFIG_MALI_MIDGARD),m)
+include  $(src)/platform/$(platform_name)/Kbuild
+else ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-$(CONFIG_MALI_MIDGARD) += platform/
+endif
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y)
+	ccflags-y += -I$(src)/platform/devicetree
+    include $(src)/platform/devicetree/Kbuild
+endif
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o
+
+MALI_BACKEND_PATH ?= backend
+CONFIG_MALI_BACKEND ?= gpu
+CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND)
+
+ifeq ($(MALI_MOCK_TEST),1)
+ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+endif
+
+include  $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+
+# Default to devicetree platform if neither a fake platform or a thirdparty
+# platform is configured.
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),)
+CONFIG_MALI_PLATFORM_DEVICETREE := y
+endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Kconfig b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Kconfig
new file mode 100755
index 0000000..201832b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,225 @@
+#
+# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD && !KDS
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if KDS is not present and
+	  the Linux Kernel has built in support for DMA_BUF fences.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_PLATFORM_FAKE
+	bool "Enable fake platform device support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  When you start to work with the Mali Midgard series device driver the platform-specific code of
+	  the Linux kernel for your platform may not be complete. In this situation the kernel device driver
+	  supports creating the platform device outside of the Linux platform-specific code.
+	  Enable this option if would like to use a platform device configuration from within the device driver.
+
+choice
+	prompt "Platform configuration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default MALI_PLATFORM_DEVICETREE
+	help
+	  Select the SOC platform that contains a Mali Midgard GPU
+
+config MALI_PLATFORM_DEVICETREE
+	bool "Device Tree platform"
+	depends on OF
+	help
+	  Select this option to use Device Tree with the Mali driver.
+
+	  When using this option the Mali driver will get the details of the
+	  GPU hardware from the Device Tree. This means that the same driver
+	  binary can run on multiple platforms as long as all the GPU hardware
+	  details are described in the device tree.
+
+	  Device Tree is the recommended method for the Mali driver platform
+	  integration.
+
+config MALI_PLATFORM_VEXPRESS
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express"
+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express w/Virtex7 @ 40Mhz"
+config MALI_PLATFORM_GOLDFISH
+	depends on ARCH_GOLDFISH
+	bool "Android Goldfish virtual CPU"
+config MALI_PLATFORM_PBX
+	depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
+	bool "Realview PBX-A9"
+config MALI_PLATFORM_THIRDPARTY
+	bool "Third Party Platform"
+endchoice
+
+config MALI_PLATFORM_THIRDPARTY_NAME
+	depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
+	string "Third party platform name"
+	help
+	  Enter the name of a third party platform that is supported. The third part configuration
+	  file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
+	  specified here.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && SYNC
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_GPU_MMU_AARCH64
+	bool "Use AArch64 page tables"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Use AArch64 format page tables for the GPU instead of LPAE-style.
+	  The two formats have the same functionality and performance but a
+	  future GPU may deprecate or remove the legacy LPAE-style format.
+
+	  The LPAE-style format is supported on all Midgard and current Bifrost
+	  GPUs. Enabling AArch64 format restricts the driver to only supporting
+	  Bifrost GPUs.
+
+	  If in doubt, say N.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Makefile b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Makefile
new file mode 100755
index 0000000..e1625e6
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+KDS_PATH_RELATIVE = $(CURDIR)/../../../..
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifeq ($(MALI_BUS_LOG), 1)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# GPL driver supports KDS
+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100755
index 0000000..2bef9c2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100755
index 0000000..e38120a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,63 @@
+#
+# (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_ca_demand.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
+
+ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+	BACKEND += backend/gpu/mali_kbase_power_model_simple.c
+endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100755
index 0000000..c8ae87e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100755
index 0000000..fef9a2c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	kbdev->current_gpu_coherency_mode = mode;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100755
index 0000000..fe98691
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100755
index 0000000..7851ea6
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100755
index 0000000..67d7d54
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,404 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+#endif
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+static struct devfreq_simple_ondemand_data ondemand_data = {
+		.upthreshold = 90,
+		.downdifferential = 5,
+};
+
+#define DEV_FREQ_GOV_DATA		(&ondemand_data)
+
+static ssize_t upthreshold_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.upthreshold);
+}
+
+static ssize_t upthreshold_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.upthreshold = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(upthreshold);
+
+static ssize_t downdifferential_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.downdifferential);
+}
+
+static ssize_t downdifferential_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.downdifferential = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(downdifferential);
+#else
+#define DEV_FREQ_GOV_DATA		(NULL)
+#endif /* CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND */
+
+static ssize_t utilisation_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev->parent);
+	unsigned long total_time = 0, busy_time = 0;
+	unsigned long utilise;
+
+	kbase_pm_get_dvfs_utilisation(kbdev, &total_time, &busy_time);
+
+	if (total_time == 0) {
+		utilise = 0;
+	} else {
+		/* round up */
+		utilise = (busy_time * 100 + (total_time >> 1)) / total_time;
+	}
+	return sprintf(buf, "%ld\n", utilise);
+}
+static DEVICE_ATTR_RO(utilisation);
+
+static struct device_attribute *kbase_dev_attr[] = {
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+	&dev_attr_downdifferential,
+	&dev_attr_upthreshold,
+#endif
+	&dev_attr_utilisation
+};
+
+static int kbase_dev_add_attr(struct device *dev)
+{
+	int i, ret;
+	for (i = 0; i <  ARRAY_SIZE(kbase_dev_attr); i++) {
+		if (!kbase_dev_attr[i])
+			continue;
+		ret = device_create_file(dev, kbase_dev_attr[i]);
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_freq == freq) {
+		*target_freq = freq;
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_freq = freq;
+
+	kbase_tlstream_aux_devfreq_target((u64)freq);
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	if (kbdev->devfreq_cooling)
+		memcpy(&kbdev->devfreq_cooling->last_status, stat,
+				sizeof(*stat));
+#endif
+#endif
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	if (!kbdev->clock)
+		return -ENODEV;
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", DEV_FREQ_GOV_DATA);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	kbase_dev_add_attr(&kbdev->devfreq->dev);
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_power_model_simple_init(kbdev);
+	if (err && err != -ENODEV && err != -EPROBE_DEFER) {
+		dev_err(kbdev->dev,
+			"Failed to initialize simple power model (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+	if (err == -EPROBE_DEFER)
+		goto cooling_failed;
+	if (err != -ENODEV) {
+		kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+				kbdev->dev->of_node,
+				kbdev->devfreq,
+				&power_model_simple_ops);
+		if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+			err = PTR_ERR(kbdev->devfreq_cooling);
+			dev_err(kbdev->dev,
+				"Failed to register cooling device (%d)\n",
+				err);
+			goto cooling_failed;
+		}
+	} else {
+		err = 0;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100755
index 0000000..c0bf8b1
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100755
index 0000000..dcdf15c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,255 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+	struct kbase_io_access *old_buf;
+	struct kbase_io_access *new_buf;
+	unsigned long flags;
+
+	if (!new_size)
+		goto out_err; /* The new size must not be 0 */
+
+	new_buf = vmalloc(new_size * sizeof(*h->buf));
+	if (!new_buf)
+		goto out_err;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	old_buf = h->buf;
+
+	/* Note: we won't bother with copying the old data over. The dumping
+	 * logic wouldn't work properly as it relies on 'count' both as a
+	 * counter and as an index to the buffer which would have changed with
+	 * the new array. This is a corner case that we don't need to support.
+	 */
+	h->count = 0;
+	h->size = new_size;
+	h->buf = new_buf;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	vfree(old_buf);
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+	h->enabled = false;
+	spin_lock_init(&h->lock);
+	h->count = 0;
+	h->size = 0;
+	h->buf = NULL;
+	if (kbase_io_history_resize(h, n))
+		return -1;
+
+	return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+	vfree(h->buf);
+	h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+		void __iomem const *addr, u32 value, u8 write)
+{
+	struct kbase_io_access *io;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	io = &h->buf[h->count % h->size];
+	io->addr = (uintptr_t)addr | write;
+	io->value = value;
+	++h->count;
+	/* If count overflows, move the index by the buffer size so the entire
+	 * buffer will still be dumped later */
+	if (unlikely(!h->count))
+		h->count = h->size;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+	struct kbase_io_history *const h = &kbdev->io_history;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!unlikely(h->enabled))
+		return;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	dev_err(kbdev->dev, "Register IO History:");
+	iters = (h->size > h->count) ? h->count : h->size;
+	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	writel(value, kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				value, 1);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				val, 0);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100755
index 0000000..5b20445
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100755
index 0000000..d578fd7
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	return 0;
+
+fail_interrupts:
+	kbase_hwaccess_pm_term(kbdev);
+fail_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_release_interrupts(kbdev);
+	kbase_hwaccess_pm_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100755
index 0000000..d410cd2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,105 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100755
index 0000000..7ad309e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,492 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100755
index 0000000..4794672
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100755
index 0000000..e96aeae
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100755
index 0000000..8781561
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100755
index 0000000..8416b80
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,469 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100755
index 0000000..202dcfa
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,378 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = current_as->number;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Assign addr space */
+	kctx->as_nr = as_nr;
+
+	/* If the GPU is currently powered, activate this address space on the
+	 * MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_update(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	/* Book-keeping */
+	js_per_as_data->kctx = kctx;
+	js_per_as_data->as_busy_refcount = 0;
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+/**
+ * release_addr_space - Release an address space
+ * @kbdev: Kbase device
+ * @kctx_as_nr: Address space of context to release
+ * @kctx: Context being released
+ *
+ * Context: kbasep_js_device_data.runpool_mutex must be held
+ *
+ * Release an address space, making it available for being picked again.
+ */
+static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u16 as_bit = (1u << kctx_as_nr);
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* The address space must not already be free */
+	KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
+
+	js_devdata->as_free |= as_bit;
+
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+				&kbdev->js_data.runpool_irq.per_as_data[i];
+
+		if (js_per_as_data->kctx == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
+	if (js_per_as_data->as_busy_refcount != 0) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	/* Release context from address space */
+	js_per_as_data->kctx = NULL;
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+	/* If the GPU is currently powered, de-activate this address space on
+	 * the MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_disable(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	release_addr_space(kbdev, as_nr, kctx);
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+								int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	js_devdata->as_free |= (1 << as_nr);
+}
+
+/**
+ * check_is_runpool_full - check whether the runpool is full for a specified
+ * context
+ * @kbdev: Kbase device
+ * @kctx:  Kbase context
+ *
+ * If kctx == NULL, then this makes the least restrictive check on the
+ * runpool. A specific context that is supplied immediately after could fail
+ * the check, even under the same conditions.
+ *
+ * Therefore, once a context is obtained you \b must re-check it with this
+ * function, since the return value could change to false.
+ *
+ * Context:
+ *   In all cases, the caller must hold kbasep_js_device_data.runpool_mutex.
+ *   When kctx != NULL the caller must hold the
+ *   kbasep_js_kctx_info.ctx.jsctx_mutex.
+ *   When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but
+ *   it doesn't do any harm to do so).
+ *
+ * Return: true if the runpool is full
+ */
+static bool check_is_runpool_full(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool is_runpool_full;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Regardless of whether a context is submitting or not, can't have more
+	 * than there are HW address spaces */
+	is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
+						kbdev->nr_hw_address_spaces);
+
+	if (kctx && !kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		/* Contexts that submit might use less of the address spaces
+		 * available, due to HW workarounds.  In which case, the runpool
+		 * is also full when the number of submitting contexts exceeds
+		 * the number of submittable address spaces.
+		 *
+		 * Both checks must be made: can have nr_user_address_spaces ==
+		 * nr_hw_address spaces, and at the same time can have
+		 * nr_user_contexts_running < nr_all_contexts_running. */
+		is_runpool_full |= (bool)
+					(js_devdata->nr_user_contexts_running >=
+						kbdev->nr_user_address_spaces);
+	}
+
+	return is_runpool_full;
+}
+
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* First try to find a free address space */
+	if (check_is_runpool_full(kbdev, kctx))
+		i = -1;
+	else
+		i = ffs(js_devdata->as_free) - 1;
+
+	if (i >= 0 && i < kbdev->nr_hw_address_spaces) {
+		js_devdata->as_free &= ~(1 << i);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return i;
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* No address space currently free, see if we can release one */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i];
+		as_kctx = js_per_as_data->kctx;
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running */
+		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+			js_per_as_data->as_busy_refcount == 0) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				js_devdata->as_free &= ~(1 << i);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if (kbdev->hwaccess.active_kctx == kctx ||
+	    kctx->as_nr != KBASEP_AS_NR_INVALID ||
+	    as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Invalid parameters to use_ctx()\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100755
index 0000000..08a7400
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100755
index 0000000..668258b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1560 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+#ifndef CONFIG_MALI_COH_GPU
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+#endif /* CONFIG_MALI_COH_GPU */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649) ||
+		!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3982))
+		cfg |= JS_CONFIG_START_MMU;
+
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 * In such cases, we'll try to make a better approximation in the IRQ
+	 * handler (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	kbase_tlstream_tl_attrib_atom_config(katom, jc_head,
+			katom->affinity, cfg);
+	kbase_tlstream_tl_ret_ctx_lpu(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	kbase_tlstream_tl_ret_atom_as(katom, &kbdev->as[kctx->as_nr]);
+	kbase_tlstream_tl_ret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the %KBASE_JS_IRQ_THROTTLE_TIME_US and
+ * the time the job was submitted, to work out the best estimate (which might
+ * still result in an over-estimate to the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t new_timestamp;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		/* Account for any IRQ Throttle time - makes an overestimate of
+		 * the time spent by the job */
+		new_timestamp = ktime_sub_ns(end_timestamp,
+					KBASE_JS_IRQ_THROTTLE_TIME_US * 1000);
+		timestamp_diff = ktime_sub(new_timestamp,
+							katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = new_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Get kbase atom by calling kbase_gpu_inspect for given job slot.
+ * Then use obtained katom and name of slot associated with the given
+ * job slot number in tracepoint call to the instrumentation module
+ * informing that given atom is no longer executed on given lpu (job slot).
+ */
+static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int js)
+{
+	int i;
+	for (i = 0;
+	     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+	     i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		kbase_tlstream_tl_nret_atom_lpu(katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js]);
+	}
+}
+
+/**
+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
+ * tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Make a tracepoint call to the instrumentation module informing that
+ * softstop happened on given lpu (job slot).
+ */
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
+					int js)
+{
+	kbase_tlstream_tl_event_lpu_softstop(
+		&kbdev->gpu_props.props.raw_props.js_features[js]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0,
+					sizeof(kbdev->slot_submit_count_irq));
+
+	/* write irq throttle register, this will prevent irqs from occurring
+	 * until the given number of gpu clock cycles have passed */
+	{
+		int irq_throttle_cycles =
+				atomic_read(&kbdev->irq_throttle_cycles);
+
+		kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE),
+						irq_throttle_cycles, NULL);
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbasep_trace_tl_event_lpu_softstop(
+						kbdev, i);
+
+					kbasep_trace_tl_nret_atom_lpu(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+						base_jd_core_req core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		kbase_tlstream_tl_event_atom_softstop_issue(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+	bool stop_sent = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if (katom->kctx != kctx)
+			continue;
+
+		if (katom->sched_priority > priority) {
+			if (!stop_sent)
+				kbase_tlstream_tl_attrib_atom_priority_change(
+						target_katom);
+
+			kbase_job_slot_softstop(kbdev, js, katom);
+			stop_sent = true;
+		}
+	}
+}
+
+struct zap_reset_data {
+	/* The stages are:
+	 * 1. The timer has never been called
+	 * 2. The zap has timed out, all slots are soft-stopped - the GPU reset
+	 *    will happen. The GPU has been reset when
+	 *    kbdev->hwaccess.backend.reset_waitq is signalled
+	 *
+	 * (-1 - The timer has been cancelled)
+	 */
+	int stage;
+	struct kbase_device *kbdev;
+	struct hrtimer timer;
+	spinlock_t lock; /* protects updates to stage member */
+};
+
+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer)
+{
+	struct zap_reset_data *reset_data = container_of(timer,
+						struct zap_reset_data, timer);
+	struct kbase_device *kbdev = reset_data->kbdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&reset_data->lock, flags);
+
+	if (reset_data->stage == -1)
+		goto out;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+								ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	reset_data->stage = 2;
+
+ out:
+	spin_unlock_irqrestore(&reset_data->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct zap_reset_data reset_data;
+	unsigned long flags;
+
+	hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	reset_data.timer.function = zap_timeout_callback;
+
+	spin_lock_init(&reset_data.lock);
+
+	reset_data.kbdev = kbdev;
+	reset_data.stage = 1;
+
+	hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for all jobs to finish, and for the context to be not-scheduled
+	 * (due to kbase_job_zap_context(), we also guarentee it's not in the JS
+	 * policy queue either */
+	wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
+	wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+		   !kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	spin_lock_irqsave(&reset_data.lock, flags);
+	if (reset_data.stage == 1) {
+		/* The timer hasn't run yet - so cancel it */
+		reset_data.stage = -1;
+	}
+	spin_unlock_irqrestore(&reset_data.lock, flags);
+
+	hrtimer_cancel(&reset_data.timer);
+
+	if (reset_data.stage == 2) {
+		/* The reset has already started.
+		 * Wait for the reset to complete
+		 */
+		wait_event(kbdev->hwaccess.backend.reset_wait,
+				atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+						== KBASE_RESET_GPU_NOT_PENDING);
+	}
+	destroy_hrtimer_on_stack(&reset_data.timer);
+
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	KBASE_DEBUG_ASSERT(0 ==
+		object_is_on_stack(&kbdev->hwaccess.backend.reset_work));
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * hwaccess_lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	kbase_io_history_dump(kbdev);
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	bool try_schedule = false;
+	bool silent = false;
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	spin_lock(&kbdev->hwaccess_lock);
+	spin_lock(&kbdev->mmu_mask_change);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts_nolock(kbdev);
+
+	spin_unlock(&kbdev->mmu_mask_change);
+	spin_unlock(&kbdev->hwaccess_lock);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+		/* Ensure that L2 is not transitioning when we send the reset
+		 * command */
+		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2))
+			;
+
+		WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	kbase_pm_context_idle(kbdev);
+
+	/* Release vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+
+void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+}
+
+bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100755
index 0000000..89b1288
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string)
+{
+	sprintf(js_string, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100755
index 0000000..d7b4d3f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1818 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+/**
+ * check_secure_atom - Check if the given atom is in the given secure state and
+ *                     has a ringbuffer state of at least
+ *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @katom:  Atom pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if atom is in the given state, false otherwise
+ */
+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
+{
+	if (katom->gpu_rb_state >=
+			KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+			((kbase_jd_katom_is_protected(katom) && secure) ||
+			(!kbase_jd_katom_is_protected(katom) && !secure)))
+		return true;
+
+	return false;
+}
+
+/**
+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
+ *                                secure state in the ringbuffers of at least
+ *                                state
+ *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
+ * @kbdev:  Device pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if any atoms are in the given state, false otherwise
+ */
+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
+		bool secure)
+{
+	int js, i;
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, i);
+
+			if (katom) {
+				if (check_secure_atom(katom, secure))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * hwaccess_lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+		if (katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
+				katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK)
+			kbdev->protected_mode_transition = false;
+
+		if (kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2))
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
+{
+	return kbdev->protected_mode;
+}
+
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
+
+	/*
+	 * When entering into protected mode, we must ensure that the
+	 * GPU is not operating in coherent mode as well. This is to
+	 * ensure that no protected memory can be leaked.
+	 */
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enter(kbdev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
+		else
+			kbdev->protected_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
+
+	if (!kbdev->protected_ops)
+		return -EINVAL;
+
+	kbase_reset_gpu_silent(kbdev);
+
+	return 0;
+}
+
+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	switch (katom[idx]->protected_state.enter) {
+	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		kbdev->protected_mode_transition = true;
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_VINSTR;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
+		if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+			/*
+			 * We can't switch now because
+			 * the vinstr core state switch
+			 * is not done yet.
+			 */
+			return -EAGAIN;
+		}
+
+		/* Once reaching this point GPU must be
+		 * switched to protected mode or vinstr
+		 * re-enabled. */
+
+		/*
+		 * Not in correct mode, begin protected mode switch.
+		 * Entering protected mode requires us to power down the L2,
+		 * and drop out of fully coherent mode.
+		 */
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
+		/* Avoid unnecessary waiting on non-ACE platforms. */
+		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+				/*
+				* The L2 is still powered, wait for all the users to
+				* finish with it before doing the actual reset.
+				*/
+				return -EAGAIN;
+			}
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+
+		/* No jobs running, so we can switch GPU mode right now. */
+		err = kbase_gpu_protected_mode_enter(kbdev);
+
+		/*
+		 * Regardless of result, we are no longer transitioning
+		 * the GPU.
+		 */
+		kbdev->protected_mode_transition = false;
+
+		if (err) {
+			/*
+			 * Failed to switch into protected mode, resume
+			 * vinstr core and fail atom.
+			 */
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order. */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+			return -EINVAL;
+		}
+
+		/* Protected mode sanity checks. */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) ==
+			kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]),
+			kbase_gpu_in_protected_mode(kbdev));
+		katom[idx]->gpu_rb_state =
+			KBASE_ATOM_GPU_RB_READY;
+	}
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+
+	switch (katom[idx]->protected_state.exit) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		kbdev->protected_mode_transition = true;
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+
+		if (err) {
+			kbdev->protected_mode_transition = false;
+
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			return -EINVAL;
+		}
+
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		if (kbase_reset_gpu_active(kbdev))
+			return -EAGAIN;
+
+		kbdev->protected_mode_transition = false;
+		kbdev->protected_mode = false;
+
+		/* protected mode sanity checks */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
+		KBASE_DEBUG_ASSERT_MSG(
+			(kbase_jd_katom_is_protected(katom[idx]) && js == 0) ||
+			!kbase_jd_katom_is_protected(katom[idx]),
+			"Protected atom on JS%d not supported", js);
+	}
+
+	return 0;
+}
+
+void kbase_backend_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+			int ret;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+				if (kbase_gpu_check_secure_atoms(kbdev,
+						!kbase_jd_katom_is_protected(
+						katom[idx])))
+					break;
+
+				if (kbdev->protected_mode_transition)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+
+				if (!kbase_gpu_in_protected_mode(kbdev) &&
+					kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition into protected mode. */
+					ret = kbase_jm_enter_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				} else if (kbase_gpu_in_protected_mode(kbdev) &&
+					!kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition out of protected mode. */
+					ret = kbase_jm_exit_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				}
+				katom[idx]->protected_state.exit =
+						KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+				/* Atom needs no protected mode transition. */
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+
+				/* Only submit if head atom or previous atom
+				 * already submitted */
+				if (idx == 1 &&
+					(katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+					katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_backend_slot_update(kbdev);
+}
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+	kbase_tlstream_tl_nret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+	kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]);
+	kbase_tlstream_tl_nret_ctx_lpu(
+			kctx,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx) {
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+				katom_idx0->gpu_rb_state !=
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					katom_idx1->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+								js_string),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+								js_string),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		katom = kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		katom = kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	if (katom) {
+		/* Cross-slot dependency has now become runnable. Try to submit
+		 * it. */
+
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
+	}
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_backend_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int atom_idx = 0;
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, atom_idx);
+			bool keep_in_jm_rb = false;
+
+			if (!katom)
+				break;
+
+			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+				katom->affinity = 0;
+				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				/* As the atom was not removed, increment the
+				 * index so that we read the correct atom in the
+				 * next iteration. */
+				atom_idx++;
+				continue;
+			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
+		}
+	}
+
+	kbdev->protected_mode_transition = false;
+	kbdev->protected_mode = false;
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+					(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+					(!kctx || katom_idx1->kctx == kctx));
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				kbasep_js_clear_submit_allowed(js_devdata,
+							katom_idx1->kctx);
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			kbasep_js_clear_submit_allowed(js_devdata,
+							katom_idx0->kctx);
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_job_evicted(katom_idx1);
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_job_evicted(katom_idx1);
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	unsigned long flags;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	if (katom->need_cache_flush_cores_retained) {
+		kbase_gpu_cacheclean(kbdev, katom);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->affinity = 0;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
+			coreref_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100755
index 0000000..1e0e05a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:         Device pointer
+ * @js:            Job slot to evict from
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100755
index 0000000..54d8ddd
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+#include "mali_kbase_hw.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		 /* If the hardware supports XAFFINITY then we'll only enable
+		  * the tiler (which is the default so this is a no-op),
+		  * otherwise enable shader core 0. */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = 1;
+		else
+			*affinity = 0;
+
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required for hardware without XAFFINITY
+	 * support (notes above) */
+	if (core_req & BASE_JD_REQ_T) {
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = *affinity | 1;
+	}
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100755
index 0000000..35d9781
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold hwaccess_lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100755
index 0000000..b09d491
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,357 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is
+ * enabled for a particular level is down to the integrator. However this is
+ * being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->sched_info.cfs.ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->sched_info.cfs.ticks =
+							soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * hwaccess_lock, so it's OK to observe
+					 * the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CINSTR_DUMPING_ENABLED */
+				/* NOTE: During CINSTR_DUMPING_ENABLED, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CINSTR_DUMPING_ENABLED, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CINSTR_DUMPING_ENABLED */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100755
index 0000000..3f53779
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100755
index 0000000..08eea1c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,403 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+static void validate_protected_page_fault(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	/* GPUs which support (native) protected mode shall not report page
+	 * fault addresses unless it has protected debug mode and protected
+	 * debug mode is turned on */
+	u32 protected_debug_mode = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		return;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		protected_debug_mode = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS),
+				kctx) & GPU_DBGEN;
+	}
+
+	if (!protected_debug_mode) {
+		/* fault_addr should never be reported in protected mode.
+		 * However, we just continue by printing an error message */
+		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+	}
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* Mark the fault protected or not */
+		as->protected_mode = kbdev->protected_mode;
+
+		if (kbdev->protected_mode && as->fault_addr)
+		{
+			/* check if address reporting is allowed */
+			validate_protected_page_fault(kbdev, kctx);
+		}
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		as->fault_extra_addr = kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+				kctx);
+		as->fault_extra_addr <<= 32;
+		as->fault_extra_addr |= kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+				kctx);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+	/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+	/* Clear PTW_MEMATTR bits */
+	transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+	/* Enable correct PTW_MEMATTR bits */
+	transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+		/* Clear PTW_SH bits */
+		transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+		/* Enable correct PTW_SH bits */
+		transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+			transcfg, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+			(current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx);
+
+#else /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_tlstream_tl_attrib_as_config(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100755
index 0000000..c02253c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100755
index 0000000..0614348
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100755
index 0000000..f9d244b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100755
index 0000000..7690ec5
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,460 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+
+#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+	struct kbase_pm_callback_conf *callbacks;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+			kbase_pm_gpu_poweroff_wait_wq);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+	} else {
+		kbdev->pm.backend.callback_power_on = NULL;
+		kbdev->pm.backend.callback_power_off = NULL;
+		kbdev->pm.backend.callback_power_suspend = NULL;
+		kbdev->pm.backend.callback_power_resume = NULL;
+		kbdev->pm.callback_power_runtime_init = NULL;
+		kbdev->pm.callback_power_runtime_term = NULL;
+		kbdev->pm.backend.callback_power_runtime_on = NULL;
+		kbdev->pm.backend.callback_power_runtime_off = NULL;
+		kbdev->pm.backend.callback_power_runtime_idle = NULL;
+	}
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	/* Wait for power transitions to complete. We do this with no locks held
+	 * so that we don't deadlock with any pending workqueues */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	kbase_pm_check_transitions_sync(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	if (!backend->poweron_required) {
+		WARN_ON(kbdev->l2_available_bitmap ||
+				kbdev->shader_available_bitmap ||
+				kbdev->tiler_available_bitmap);
+
+		/* Consume any change-state events */
+		kbase_timeline_pm_check_handle_event(kbdev,
+					KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		/* Disable interrupts and turn the clock off */
+		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+			/*
+			 * Page/bus faults are pending, must drop locks to
+			 * process.  Interrupts are disabled so no more faults
+			 * should be generated at this point.
+			 */
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			kbase_flush_mmu_wqs(kbdev);
+			mutex_lock(&js_devdata->runpool_mutex);
+			mutex_lock(&kbdev->pm.lock);
+
+			/* Turn off clock now that fault have been handled. We
+			 * dropped locks so poweron_required may have changed -
+			 * power back on if this is the case.*/
+			if (backend->poweron_required)
+				kbase_pm_clock_on(kbdev, false);
+			else
+				WARN_ON(!kbase_pm_clock_off(kbdev,
+						backend->poweroff_is_suspend));
+		}
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	backend->poweroff_wait_in_progress = false;
+	if (backend->poweron_required) {
+		backend->poweron_required = false;
+		kbase_pm_update_cores_state_nolock(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
+		/* Force all cores off */
+		kbdev->pm.backend.desired_shader_state = 0;
+		kbdev->pm.backend.desired_tiler_state = 0;
+
+		/* Force all cores to be unavailable, in the situation where
+		 * transitions are in progress for some cores but not others,
+		 * and kbase_pm_check_transitions_nolock can not immediately
+		 * power off the cores */
+		kbdev->shader_available_bitmap = 0;
+		kbdev->tiler_available_bitmap = 0;
+		kbdev->l2_available_bitmap = 0;
+
+		kbdev->pm.backend.poweroff_wait_in_progress = true;
+		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/*Kick off wq here. Callers will have to wait*/
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
+	} else {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+	wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			is_poweroff_in_progress(kbdev));
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_read_present_cores(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, true);
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100755
index 0000000..9cecd4e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,180 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_demand_policy_ops,
+	&kbase_pm_ca_random_policy_ops,
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100755
index 0000000..ee9e751
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
new file mode 100644
index 0000000..19f1f73
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
@@ -0,0 +1,246 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation.
+ *
+ * This policy periodically selects a new core mask at demand. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/printk.h>
+
+#ifndef MALI_CA_TIMER
+static struct workqueue_struct *mali_ca_wq = NULL;
+#endif
+static int num_shader_cores_to_be_enabled = 0;
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+#ifndef MALI_CA_TIMER
+	struct kbase_device *kbdev = (struct kbase_device *)priv;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+#endif
+
+    num_shader_cores_to_be_enabled = cores -1;
+    printk("pp num=%d\n", num_shader_cores_to_be_enabled);
+    if (num_shader_cores_to_be_enabled < 1)
+        num_shader_cores_to_be_enabled = 1;
+
+#ifndef MALI_CA_TIMER
+    queue_work(mali_ca_wq, &data->wq_work);
+#endif
+
+    return 0;
+}
+
+#ifdef MALI_CA_TIMER
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void demand_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+#if 0
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+#endif
+    data->cores_desired = num_shader_cores_to_be_enabled;
+
+	if (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+            dev_info(kbdev->dev, "error, ca demand policy : new core mask=%llX\n",
+				data->cores_desired);
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "ca demand policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(demand_timer_callback);
+#else
+static void do_ca_scaling(struct work_struct *work)
+{
+    //unsigned long flags;
+	struct kbase_device *kbdev = = container_of(work,
+            struct kbasep_pm_ca_policy_demand, wq_work);
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+    data->cores_desired =  num_shader_cores_to_be_enabled;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+
+}
+#endif
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+#ifdef MALI_CA_TIMER
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = demand_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+#else
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_ca_wq = alloc_workqueue("gpu_ca_wq", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_ca_wq = create_workqueue("gpu_ca_wq");
+#endif
+	INIT_WORK(&data->wq_work, do_ca_scaling);
+    if (mali_ca_wq == NULL) printk("Unable to create gpu scaling workqueue\n");
+#endif
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+#ifdef MALI_CA_TIMER
+	del_timer(&data->core_change_timer);
+#else
+    if (mali_ca_wq) {
+        flush_workqueue(mali_ca_wq);
+        destroy_workqueue(mali_ca_wq);
+        mali_ca_wq = NULL;
+    }
+#endif
+}
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.demand.cores_enabled;
+}
+
+static void demand_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the demand core availability
+ * policy.
+ *
+ * This is the static structure that defines the demand core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_demand_policy_ops);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
new file mode 100644
index 0000000..a3dfe2a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEMAND_H
+#define MALI_KBASE_PM_CA_DEMAND_H
+#include <linux/workqueue.h>
+/**
+ * struct kbasep_pm_ca_policy_demand - Private structure for demand ca policy
+ *
+ * This contains data that is private to the demand core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+#define MALI_CA_TIMER 1
+struct kbasep_pm_ca_policy_demand {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+#ifdef MALI_CA_TIMER
+	struct timer_list core_change_timer;
+#else
+	struct work_struct wq_work;
+#endif
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops;
+extern int mali_perf_set_num_pp_cores(int cores);
+
+#endif /* MALI_KBASE_PM_CA_DEMAND_H */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100755
index 0000000..1db6586
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100755
index 0000000..a763155
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
new file mode 100644
index 0000000..0d1b220
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation.
+ *
+ * This policy periodically selects a new core mask at random. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void random_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(random_timer_callback);
+
+static void random_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = random_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+}
+
+static void random_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	del_timer(&data->core_change_timer);
+}
+
+static u64 random_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.random.cores_enabled;
+}
+
+static void random_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the random core availability
+ * policy.
+ *
+ * This is the static structure that defines the random core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops = {
+	"random",			/* name */
+	random_init,			/* init */
+	random_term,			/* term */
+	random_get_core_mask,		/* get_core_mask */
+	random_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_RANDOM,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_random_policy_ops);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
new file mode 100644
index 0000000..a8c9b15
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_RANDOM_H
+#define MALI_KBASE_PM_CA_RANDOM_H
+
+/**
+ * struct kbasep_pm_ca_policy_random - Private structure for random ca policy
+ *
+ * This contains data that is private to the random core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+struct kbasep_pm_ca_policy_random {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+	struct timer_list core_change_timer;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_RANDOM_H */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100755
index 0000000..f891fa2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100755
index 0000000..749d305
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100755
index 0000000..790e338
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,509 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#include "mali_kbase_pm_ca_demand.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
+ *                                 management framework.
+ *
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+	struct kbasep_pm_ca_policy_demand demand;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        and/or timers are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_data metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	bool poweroff_wait_in_progress;
+	bool poweron_required;
+	bool poweroff_is_suspend;
+
+	struct workqueue_struct *gpu_poweroff_wait_wq;
+	struct work_struct gpu_poweroff_wait_work;
+
+	wait_queue_head_t poweroff_wait;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_CA_POLICY_ID_RANDOM,
+	KBASE_PM_CA_POLICY_ID_DEMAND,
+#endif
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100755
index 0000000..81322fd
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100755
index 0000000..c0c84b6
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100755
index 0000000..c774ee9
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1603 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/* Special value to indicate that the JM_CONFIG reg isn't currently used. */
+#define KBASE_JM_CONFIG_UNUSED (1<<31)
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		kbase_tlstream_aux_pm_state(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_invoke);
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_present_cores(), kbase_pm_get_active_cores(),
+ * kbase_pm_get_trans_cores() and kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev)
+{
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores);
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+	}
+	KBASE_DEBUG_ASSERT(0);
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+	//kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ * @tilers_powered: The bit mask of tilers that are desired to be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered,
+		u64 tilers_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	/* Power up the required L2(s) for the tiler */
+	if (tilers_powered)
+		desired |= 1;
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+	u64 cores_powered;
+	u64 tilers_powered;
+	u64 tiler_available_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+	/* Work out which tilers want to be powered */
+	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered, tilers_powered);
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state)
+		desired_l2_state |= 1;
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_L2, desired_l2_state, 0,
+				&l2_available_bitmap,
+				&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#endif
+
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO),
+					NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO), NULL));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO), NULL));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	} else {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_disable_interrupts_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+	/* Limit read ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+	kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+	/* Limit write ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+	kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+		/* The JM_CONFIG register is specified as follows in the
+		 T86x/T88x Engineering Specification Supplement:
+		 The values are read from device tree in order.
+		*/
+#define TIMESTAMP_OVERRIDE  1
+#define CLOCK_GATE_OVERRIDE (1<<1)
+#define JOB_THROTTLE_ENABLE (1<<2)
+#define JOB_THROTTLE_LIMIT_SHIFT 3
+
+		/* 6 bits in the register */
+		const u32 jm_max_limit = 0x3F;
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = jm_max_limit; /* Max value */
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > jm_max_limit) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = jm_max_limit;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm = (jm_values[0] ? TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ? CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ? JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JOB_THROTTLE_LIMIT_SHIFT);
+	} else {
+		kbdev->hw_quirks_jm = KBASE_JM_CONFIG_UNUSED;
+	}
+
+
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	if (kbdev->hw_quirks_sc)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+				kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+
+
+	if (kbdev->hw_quirks_jm != KBASE_JM_CONFIG_UNUSED)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+				kbdev->hw_quirks_jm, NULL);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if (kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+static int kbase_pm_reset_do_normal(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_jd_gpu_soft_reset(kbdev);
+#endif
+#if 0
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET, NULL);
+#else
+  //dev_err(kbdev->dev, "%s,  %d \n", __FILE__, __LINE__);
+  core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+  l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+  tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+  //printk("core_ready=%ld, l2_ready=%ld, tiler_ready=%ld\n", core_ready, l2_ready, tiler_ready);
+  if (core_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+  if (l2_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+  if (tiler_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+  /* do external reset */
+
+//JOHNT
+#if 0
+    // Level reset mail
+    Wr(P_RESET0_MASK, ~(0x1<<20));
+    Wr(P_RESET0_LEVEL, ~(0x1<<20));
+
+    // Level reset mail
+    Wr(P_RESET2_MASK, ~(0x1<<14));
+    Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    Wr(P_RESET2_LEVEL, 0xffffffff);
+    Wr(P_RESET0_LEVEL, 0xffffffff);
+#else
+    if (reg_base_hiubus) {
+        value = Rd(RESET0_MASK);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_MASK, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+    ///////////////
+        value = Rd(RESET2_MASK);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_MASK, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value | ((0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value | ((0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+    } else {
+		dev_err(kbdev->dev, "reg_base_hiubus is null !!!\n");
+    }
+#endif
+  //writel(..../*e.g. PWR_RESET, EXTERNAL_PWR_REGISTER*/);
+
+  /* any other action necessary, like a simple delay */
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+#endif
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	return -EINVAL;
+}
+
+static int kbase_pm_reset_do_protected(struct kbase_device *kbdev)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+	kbase_tlstream_jd_gpu_soft_reset(kbdev);
+
+	return kbdev->protected_ops->protected_mode_reset(kbdev);
+}
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+	bool resume_vinstr = false;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support &&
+			kbdev->protected_ops->protected_mode_reset)
+		err = kbase_pm_reset_do_protected(kbdev);
+	else
+		err = kbase_pm_reset_do_normal(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->protected_mode)
+		resume_vinstr = true;
+	kbdev->protected_mode = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	if (err)
+		goto exit;
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+		kbase_pm_release_l2_caches(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+exit:
+	/* If GPU is leaving protected mode resume vinstr operation. */
+	if (kbdev->vinstr_ctx && resume_vinstr)
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	return err;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100755
index 0000000..d0e2d56
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,593 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action);
+
+/**
+ * kbasep_pm_read_present_cores - Read the bitmasks of present cores.
+ *
+ * This information is cached to avoid having to perform register reads whenever
+ * the information is required.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * Access to registers should be done using kbase_os_reg_read()/write() at this
+ * stage, not kbase_reg_read()/write().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#ifdef CONFIG_PM_DEVFREQ
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold hwaccess_lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100755
index 0000000..7613e1d
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+					int *util_gl_share,
+					int util_cl_share[2],
+					ktime_t now)
+{
+	int utilisation;
+	int busy;
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	return utilisation;
+}
+
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	ktime_t now;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	now = ktime_get();
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+			util_cl_share, now);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				if (!WARN_ON(device_nr >= 2))
+					kbdev->pm.backend.metrics.
+						active_cl_ctx[device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				if (!WARN_ON(js >= 2))
+					kbdev->pm.backend.metrics.
+						active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100755
index 0000000..bc3bd18
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,998 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		kbase_pm_do_poweroff(kbdev, false);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		if (pm->backend.poweroff_wait_in_progress) {
+			pm->backend.poweron_required = true;
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbase_pm_do_poweron(kbdev, false);
+		}
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				kbase_pm_do_poweroff(kbdev, false);
+			}
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		return;
+
+	if (kbdev->protected_mode_transition &&	!kbdev->shader_needed_bitmap &&
+			!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt) {
+		/* We are trying to change in/out of protected mode - force all
+		 * cores off so that the L2 powers down */
+		desired_bitmap = 0;
+		desired_tiler_bitmap = 0;
+	} else {
+		desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+		desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+			desired_tiler_bitmap = 1;
+		else
+			desired_tiler_bitmap = 0;
+
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+			/* Unless XAFFINITY is supported, enable core 0 if tiler
+			 * required, regardless of core availability */
+			if (kbdev->tiler_needed_cnt > 0 ||
+					kbdev->tiler_inuse_cnt > 0)
+				desired_bitmap |= 1;
+		}
+	}
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks &&
+					!kbdev->protected_mode_transition)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks &&
+				!kbdev->protected_mode_transition)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+int set_policy_by_name(struct kbase_device *kbdev, const char *name)
+{
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, name)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		printk("power_policy: policy not found\n");
+		return -EINVAL;
+	}
+	trace_printk("policy name=%s\n", name);
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(set_policy_by_name);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		return (kbdev->pm.backend.poweroff_wait_in_progress ||
+				kbdev->pm.backend.pm_current_policy == NULL) ?
+				KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->l2_users_count++;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100755
index 0000000..611a90e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
new file mode 100644
index 0000000..d965033
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+#include <linux/of.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms.
+ */
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static u32 dynamic_coefficient;
+static u32 static_coefficient;
+static s32 ts[4];
+static struct thermal_zone_device *gpu_tz;
+
+static unsigned long model_static_power(unsigned long voltage)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temperature;
+#else
+	int temperature;
+#endif
+	unsigned long temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	if (gpu_tz) {
+		int ret;
+
+		ret = gpu_tz->ops->get_temp(gpu_tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	} else {
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(ts[3] * temp_cubed)
+			+ (ts[2] * temp_squared)
+			+ (ts[1] * temp)
+			+ ts[0];
+
+	return (((static_coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+}
+
+static unsigned long model_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+
+	return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops power_model_simple_ops = {
+#else
+struct devfreq_cooling_power power_model_simple_ops = {
+#endif
+	.get_static_power = model_static_power,
+	.get_dynamic_power = model_dynamic_power,
+};
+
+int kbase_power_model_simple_init(struct kbase_device *kbdev)
+{
+	struct device_node *power_model_node;
+	const char *tz_name;
+	u32 static_power, dynamic_power;
+	u32 voltage, voltage_squared, voltage_cubed, frequency;
+
+	power_model_node = of_get_child_by_name(kbdev->dev->of_node,
+			"power_model");
+	if (!power_model_node) {
+		dev_err(kbdev->dev, "could not find power_model node\n");
+		return -ENODEV;
+	}
+	if (!of_device_is_compatible(power_model_node,
+			"arm,mali-simple-power-model")) {
+		dev_err(kbdev->dev, "power_model incompatible with simple power model\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_string(power_model_node, "thermal-zone",
+			&tz_name)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	gpu_tz = thermal_zone_get_zone_by_name(tz_name);
+	if (IS_ERR(gpu_tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(gpu_tz));
+		gpu_tz = NULL;
+
+		return -EPROBE_DEFER;
+	}
+
+	if (of_property_read_u32(power_model_node, "static-power",
+			&static_power)) {
+		dev_err(kbdev->dev, "static-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "dynamic-power",
+			&dynamic_power)) {
+		dev_err(kbdev->dev, "dynamic-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "voltage",
+			&voltage)) {
+		dev_err(kbdev->dev, "voltage in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "frequency",
+			&frequency)) {
+		dev_err(kbdev->dev, "frequency in power_model not available\n");
+		return -EINVAL;
+	}
+	voltage_squared = (voltage * voltage) / 1000;
+	voltage_cubed = voltage * voltage * voltage;
+	static_coefficient = (static_power << 20) / (voltage_cubed >> 10);
+	dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared)
+			* 1000) / frequency;
+
+	if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
new file mode 100644
index 0000000..9b5e69a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_POWER_MODEL_SIMPLE_H_
+#define _BASE_POWER_MODEL_SIMPLE_H_
+
+/**
+ * kbase_power_model_simple_init - Initialise the simple power model
+ * @kbdev: Device pointer
+ *
+ * The simple power model estimates power based on current voltage, temperature,
+ * and coefficients read from device tree. It does not take utilization into
+ * account.
+ *
+ * The power model requires coefficients from the power_model node in device
+ * tree. The absence of this node will prevent the model from functioning, but
+ * should not prevent the rest of the driver from running.
+ *
+ * Return: 0 on success
+ *         -ENOSYS if the power_model node is not present in device tree
+ *         -EPROBE_DEFER if the thermal zone specified in device tree is not
+ *         currently available
+ *         Any other negative value on failure
+ */
+int kbase_power_model_simple_init(struct kbase_device *kbdev);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops power_model_simple_ops;
+#else
+extern struct devfreq_cooling_power power_model_simple_ops;
+#endif
+
+#endif /* _BASE_POWER_MODEL_SIMPLE_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100755
index 0000000..d992989
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,103 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100755
index 0000000..35088ab
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100755
index 0000000..35ff2f1
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100755
index 0000000..7ae05c2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100755
index 0000000..159b993
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100755
index 0000000..8b07cbc
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,223 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tHEx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100755
index 0000000..4d95b4f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,1014 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_T76X_3982,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8456,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100755
index 0000000..bcb05e4
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1819 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+#ifndef __user
+#define __user
+#endif
+
+/* Support UK6 IOCTLS */
+#define BASE_LEGACY_UK6_SUPPORT 1
+
+/* Support UK7 IOCTLS */
+/* NB: To support UK6 we also need to support UK7 */
+#define BASE_LEGACY_UK7_SUPPORT 1
+
+/* Support UK8 IOCTLS */
+#define BASE_LEGACY_UK8_SUPPORT 1
+
+/* Support UK9 IOCTLS */
+#define BASE_LEGACY_UK9_SUPPORT 1
+
+/* Support UK10_2 IOCTLS */
+#define BASE_LEGACY_UK10_2_SUPPORT 1
+
+/* Support UK10_4 IOCTLS */
+#define BASE_LEGACY_UK10_4_SUPPORT 1
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+#define BASEP_JD_SEM_PER_WORD_LOG2      5
+#define BASEP_JD_SEM_PER_WORD           (1 << BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_WORD_NR(x)         ((x) >> BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_MASK_IN_WORD(x)    (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
+#define BASEP_JD_SEM_ARRAY_SIZE         BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union kbase_pointer {
+	void __user *value;	  /**< client should store their pointers here */
+	u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	u64 sizer;	  /**< Force 64-bit storage for all clients regardless */
+} kbase_pointer;
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD),
+ * which defines a @a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see ::BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * See ::base_mem_alloc_flags.
+ *
+ */
+enum {
+/* IN */
+	BASE_MEM_PROT_CPU_RD = (1U << 0),      /**< Read access CPU side */
+	BASE_MEM_PROT_CPU_WR = (1U << 1),      /**< Write access CPU side */
+	BASE_MEM_PROT_GPU_RD = (1U << 2),      /**< Read access GPU side */
+	BASE_MEM_PROT_GPU_WR = (1U << 3),      /**< Write access GPU side */
+	BASE_MEM_PROT_GPU_EX = (1U << 4),      /**< Execute allowed on the GPU
+						    side */
+
+	/* BASE_MEM_HINT flags have been removed, but their values are reserved
+	 * for backwards compatibility with older user-space drivers. The values
+	 * can be re-used once support for r5p0 user-space drivers is removed,
+	 * presumably in r7p0.
+	 *
+	 * RESERVED: (1U << 5)
+	 * RESERVED: (1U << 6)
+	 * RESERVED: (1U << 7)
+	 * RESERVED: (1U << 8)
+	 */
+
+	BASE_MEM_GROW_ON_GPF = (1U << 9),      /**< Grow backing store on GPU
+						    Page Fault */
+
+	BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer
+						    shareable, if available */
+	BASE_MEM_COHERENT_LOCAL = (1U << 11),  /**< Page coherence Inner
+						    shareable */
+	BASE_MEM_CACHED_CPU = (1U << 12),      /**< Should be cached on the
+						    CPU */
+
+/* IN/OUT */
+	BASE_MEM_SAME_VA = (1U << 13), /**< Must have same VA on both the GPU
+					    and the CPU */
+/* OUT */
+	BASE_MEM_NEED_MMAP = (1U << 14), /**< Must call mmap to aquire a GPU
+					     address for the alloc */
+/* IN */
+	BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence
+					     Outer shareable, required. */
+	BASE_MEM_SECURE = (1U << 16),          /**< Secure memory */
+	BASE_MEM_DONT_NEED = (1U << 17),       /**< Not needed physical
+						    memory */
+	BASE_MEM_IMPORT_SHARED = (1U << 18),   /**< Must use shared CPU/GPU zone
+						    (SAME_VA zone) but doesn't
+						    require the addresses to
+						    be the same */
+};
+
+/**
+ * @brief Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the ::base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 19
+
+/**
+  * A mask for all output bits, excluding IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/**
+  * A mask for all input bits, including IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/**
+ * A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id.
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	kbase_pointer to imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	kbase_pointer ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1,	    /**< Not a growable tmem object */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completly unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/**
+ * No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/**
+ * Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  ((base_jd_core_req)1 << 1)
+#define BASE_JD_REQ_T   ((base_jd_core_req)1 << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  ((base_jd_core_req)1 << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   ((base_jd_core_req)1 << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceeding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceeding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceeding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
+ * the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
+ * the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferrentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
+ * In order to keep the size of the structure same, padding field has been adjusted
+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
+ * for possible future use. */
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[1];
+	base_jd_core_req core_req;          /**< core requirements */
+} base_jd_atom_v2;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+struct base_jd_atom_v2_uk6 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 core_req;                       /**< core requirements */
+	base_atom_id pre_dep[2]; /**< pre-dependencies */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;		    /**< priority - smaller is higher priority */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[7];
+};
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills
+ * a full cache line.
+ */
+
+#define BASE_CPU_GPU_CACHE_LINE_PADDING (36)
+
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom.
+ *
+ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid
+ * cases where access to pages containing the structure is shared between cached and un-cached
+ * memory regions, which would cause memory corruption.  Here we set the structure size to be 64 bytes
+ * which is the cache line for ARM A15 processors.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING];
+} base_dump_cpu_gpu_counters;
+
+
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistant guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ *  - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ *  so they won't accidentally pick another file with 'mali_t600' in its name
+ *  - When the build doesn't work, the System Integrator may think the DDK is
+ *  doesn't work, and attempt to fix it themselves:
+ *   - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ *   error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ *   you.
+ *   - For a \#include mechanism, checks must still be made elsewhere, which the
+ *   System Integrator may try working around by setting \#defines (such as
+ *   VA_BITS) themselves in their plat_config.h. In the  worst case, they may
+ *   set the prevention-mechanism \#define of
+ *   "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ *   - In this case, they would believe they are on the right track, because
+ *   the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+   digraph plat_config_mechanism {
+	   rankdir=BT
+	   size="6,6"
+
+       "mali_base.h";
+	   "gpu/mali_gpu.h";
+
+	   node [ shape=box ];
+	   {
+	       rank = same; ordering = out;
+
+		   "gpu/mali_gpu_props.h";
+		   "base/midg_gpus/mali_t600.h";
+		   "base/midg_gpus/other_midg_gpu.h";
+	   }
+	   { rank = same; "plat/plat_config.h"; }
+	   {
+	       rank = same;
+		   "gpu/mali_gpu.h" [ shape=box ];
+		   gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+		   select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+	   }
+	   node [ shape=box ];
+	   { rank = same; "plat/plat_config.h"; }
+	   { rank = same; "mali_base.h"; }
+
+	   "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h";
+	   "mali_base.h" -> "plat/plat_config.h" ;
+	   "mali_base.h" -> select_gpu ;
+
+	   "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+	   gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+	   select_gpu -> "base/midg_gpus/mali_t600.h" ;
+	   select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+   }
+   @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algoirthm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * requried for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+     * No defined values, but starts at 0 and increases by one for each release
+     * status (alpha, beta, EAC, etc.).
+     * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/**
+	 * @usecase GPU clock speed is not specified in the Midgard Architecture, but is
+	 * <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+	 */
+	u32 gpu_speed_mhz;
+
+	/**
+	 * @usecase GPU clock max/min speed is required for computing best/worst case
+	 * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+	 *  Midgard Architecture).
+	 */
+	u32 gpu_freq_khz_max;
+	u32 gpu_freq_khz_min;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 unused_1; /* keep for backward compatibility */
+
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[3];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this datastructure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+	/** No flags set */
+	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+	/** Base context is a 'System Monitor' context for Hardware counters.
+	 *
+	 * One important side effect of this is that job submission is disabled. */
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+/** Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+} base_jd_replay_payload;
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+typedef struct base_jd_replay_payload_uk10_2 {
+	u64 tiler_jc_list;
+	u64 fragment_jc;
+	u64 tiler_heap_free;
+	u16 fragment_hierarchy_mask;
+	u16 tiler_hierarchy_mask;
+	u32 hierarchy_default_weight;
+	u16 tiler_core_req;
+	u16 fragment_core_req;
+	u8 padding[4];
+} base_jd_replay_payload_uk10_2;
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
new file mode 100755
index 0000000..a24791f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base cross-proccess sync API.
+ */
+
+#ifndef _BASE_KERNEL_SYNC_H_
+#define _BASE_KERNEL_SYNC_H_
+
+#include <linux/ioctl.h>
+
+#define STREAM_IOC_MAGIC '~'
+
+/* Fence insert.
+ *
+ * Inserts a fence on the stream operated on.
+ * Fence can be waited via a base fence wait soft-job
+ * or triggered via a base fence trigger soft-job.
+ *
+ * Fences must be cleaned up with close when no longer needed.
+ *
+ * No input/output arguments.
+ * Returns
+ * >=0 fd
+ * <0  error code
+ */
+#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0)
+
+#endif				/* _BASE_KERNEL_SYNC_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100755
index 0000000..4a98a72
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100755
index 0000000..be454a2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100755
index 0000000..d261a3f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,628 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_uku.h>
+#include <mali_kbase_linux.h>
+
+/*
+ * Include mali_kbase_defs.h first as this provides types needed by other local
+ * header files.
+ */
+#include "mali_kbase_defs.h"
+
+#include "mali_kbase_context.h"
+#include "mali_kbase_strings.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+#include "mali_kbase_ipa.h"
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+/**
+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs
+ *
+ * The Kernel-side Base (KBase) APIs are divided up as follows:
+ * - @subpage page_kbase_js_policy
+ */
+
+/**
+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom);
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data);
+#endif
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom);
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+void kbasep_soft_job_timeout_worker(unsigned long data);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+/* api to be ported per OS, only need to do the raw register access */
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEBUG
+/**
+ * kbase_set_driver_inactive - Force driver to go inactive
+ * @kbdev:    Device pointer
+ * @inactive: true if driver should go inactive, false otherwise
+ *
+ * Forcing the driver inactive will cause all future IOCTLs to wait until the
+ * driver is made active again. This is intended solely for the use of tests
+ * which require that no jobs are running while the test executes.
+ */
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifndef RESET0_LEVEL
+extern u32  override_value_aml;
+extern void *reg_base_hiubus;
+#define RESET0_MASK    0x01
+#define RESET1_MASK    0x02
+#define RESET2_MASK    0x03
+
+#define RESET0_LEVEL    0x10
+#define RESET1_LEVEL    0x11
+#define RESET2_LEVEL    0x12
+#define Rd(r)                           readl((reg_base_hiubus) + ((r)<<2))
+#define Wr(r, v)                        writel((v), ((reg_base_hiubus) + ((r)<<2)))
+#define Mali_WrReg(unit, core, regnum, value)   kbase_reg_write(kbdev, (regnum), (value), NULL)
+#define Mali_RdReg(unit, core, regnum)          kbase_reg_read(kbdev, (regnum), NULL)
+#define stimulus_print   printk
+#define stimulus_display printk
+#define Mali_pwr_on(x)   Mali_pwr_on_with_kdev(kbdev, (x))
+#define Mali_pwr_off(x)  Mali_pwr_off_with_kdev(kbdev, (x))
+#endif
+
+
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/* kbase_io_history_init - initialize data struct for register access history
+ *
+ * @kbdev The register history to initialize
+ * @n The number of register accesses that the buffer could hold
+ *
+ * @return 0 if successfully initialized, failure otherwise
+ */
+int kbase_io_history_init(struct kbase_io_history *h, u16 n);
+
+/* kbase_io_history_term - uninit all resources for the register access history
+ *
+ * @h The register history to terminate
+ */
+void kbase_io_history_term(struct kbase_io_history *h);
+
+/* kbase_io_history_dump - print the register history to the kernel ring buffer
+ *
+ * @kbdev Pointer to kbase_device containing the register history to dump
+ */
+void kbase_io_history_dump(struct kbase_device *kbdev);
+
+/**
+ * kbase_io_history_resize - resize the register access history buffer.
+ *
+ * @h: Pointer to a valid register history to resize
+ * @new_size: Number of accesses the buffer could hold
+ *
+ * A successful resize will clear all recent register accesses.
+ * If resizing fails for any reason (e.g., could not allocate memory, invalid
+ * buffer size) then the original buffer will be kept intact.
+ *
+ * @return 0 if the buffer was resized, failure otherwise
+ */
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbase_io_history_resize CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+#endif
+
+
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100755
index 0000000..933aa28
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	phys_addr_t *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100755
index 0000000..099a298
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
new file mode 100644
index 0000000..f910fe9
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+
+static int kbase_as_fault_read(struct seq_file *sfile, void *data)
+{
+	uintptr_t as_no = (uintptr_t) sfile->private;
+
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	struct kbase_device *kbdev = NULL;
+
+	kbdev_list = kbase_dev_list_get();
+
+	list_for_each(entry, kbdev_list) {
+		kbdev = list_entry(entry, struct kbase_device, entry);
+
+		if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+
+			/* don't show this one again until another fault occors */
+			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
+
+			/* output the last page fault addr */
+			seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr);
+		}
+
+	}
+
+	kbase_dev_list_put(kbdev_list);
+
+	return 0;
+}
+
+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_as_fault_read , in->i_private);
+}
+
+static const struct file_operations as_fault_fops = {
+	.open = kbase_as_fault_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ *  Initialize debugfs entry for each address space
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	uint i;
+	char as_name[64];
+	struct dentry *debugfs_directory;
+
+	kbdev->debugfs_as_read_bitmap = 0ULL;
+
+	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
+
+	debugfs_directory = debugfs_create_dir("address_spaces",
+		kbdev->mali_debugfs_directory);
+
+	if(debugfs_directory) {
+		for(i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
+			debugfs_create_file(as_name, S_IRUGO,
+				debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops);
+		}
+	}
+	else
+		dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory");
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+	return;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
new file mode 100644
index 0000000..3ed2248
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
+#define _KBASE_AS_FAULT_DEBUG_FS_H
+
+/**
+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
+ *
+ * @kbdev: Pointer to kbase_device
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs
+ *
+ * @kbdev: Pointer to kbase_device
+ * @as_no: The address space the fault occurred on
+ */
+static inline void
+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_MALI_DEBUG */
+	return;
+}
+
+#endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100755
index 0000000..c67b3e9
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100755
index 0000000..0c18bdb
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100755
index 0000000..fb615ae
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100755
index 0000000..356d52b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,345 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see  kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+/**
+ * kbase_platform_fake_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_fake_register()
+ */
+void kbase_platform_fake_unregister(void);
+#endif
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100755
index 0000000..0f11388
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,260 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+ * Irq throttle. It is the minimum desired time in between two
+ * consecutive gpu interrupts (given in 'us'). The irq throttle
+ * gpu register will be configured after this, taking into
+ * account the configured max frequency.
+ *
+ * Attached value: number in micro seconds
+ */
+#define DEFAULT_IRQ_THROTTLE_TIME_US 20
+
+/**
+ *  Default Job Scheduler initial runtime of a context for the CFS Policy,
+ *  in time-slices.
+ *
+ * This value is relative to that of the least-run context, and defines
+ * where in the CFS queue a new context is added. A value of 1 means 'after
+ * the least-run context has used its timeslice'. Therefore, when all
+ * contexts consistently use the same amount of time, a value of 1 models a
+ * FIFO. A value of 0 would model a LIFO.
+ *
+ * The value is represented in "numbers of time slices". Multiply this
+ * value by that defined in @ref DEFAULT_JS_CTX_TIMESLICE_NS to get
+ * the time value for this in nanoseconds.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1
+
+/**
+ * Default Job Scheduler minimum runtime value of a context for CFS, in
+ * time_slices relative to that of the least-run context.
+ *
+ * This is a measure of how much preferrential treatment is given to a
+ * context that is not run very often.
+ *
+ * Specficially, this value defines how many timeslices such a context is
+ * (initially) allowed to use at once. Such contexts (e.g. 'interactive'
+ * processes) will appear near the front of the CFS queue, and can initially
+ * use more time than contexts that run continuously (e.g. 'batch'
+ * processes).
+ *
+ * This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx
+ * not run for a long time attacks the system by using a very large initial
+ * number of timeslices when it finally does run.
+ *
+ * @note A value of zero allows not-run-often contexts to get scheduled in
+ * quickly, but to only use a single timeslice when they get scheduled in.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (100) /* 10s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (100) /* 10s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/*
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT ((u32)3000) /* 3s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (105) /* 10.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (105) /* 10.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100755
index 0000000..55c5ef6
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,321 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_mem_linux.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	if (is_compat)
+		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+			kbdev->mem_pool_max_size_default,
+			kctx->kbdev, &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_pool;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	atomic_set(&kctx->drain_pending, 0);
+
+	mutex_init(&kctx->reg_lock);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+#ifdef CONFIG_KDS
+	INIT_LIST_HEAD(&kctx->waiting_kds_resource);
+#endif
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kctx);
+	if (err)
+		goto term_dma_fence;
+
+	do {
+		err = kbase_mem_pool_grow(&kctx->mem_pool,
+				MIDGARD_MMU_BOTTOMLEVEL);
+		if (err)
+			goto pgd_no_mem;
+		kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+	} while (!kctx->pgd);
+
+	kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev);
+	if (!kctx->aliasing_sink_page)
+		goto no_sink_page;
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	setup_timer(&kctx->soft_job_timeout,
+		    kbasep_soft_job_timeout_worker,
+		    (uintptr_t)kctx);
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+no_sink_page:
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+pgd_no_mem:
+	kbase_mmu_term(kctx);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_jd_zap_context(kctx);
+	kbase_event_cleanup(kctx);
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_pm_context_idle(kbdev);
+
+	kbase_dma_fence_term(kctx);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
new file mode 100644
index 0000000..a3f5bb0
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
@@ -0,0 +1,90 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+					enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+	/*
+	 * Earlier kernel versions doesn't have atomic_andnot() or
+	 * atomic_and(). atomic_clear_mask() was only available on some
+	 * architectures and removed on arm in v3.13 on arm and arm64.
+	 *
+	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+	 * when atomic_andnot() becomes available.
+	 */
+	int old, new;
+
+	do {
+		old = atomic_read(&kctx->flags);
+		new = old & ~flag;
+
+	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+	atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100755
index 0000000..d30837f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,4096 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_uku.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <backend/gpu/mali_kbase_devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include "mali_kbase_regs_history_debugfs.h"
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#endif /* CONFIG_KDS */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task */
+#include <linux/mman.h>
+#include <linux/version.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+#include <platform/mali_kbase_platform_fake.h>
+#endif /*CONFIG_MALI_PLATFORM_FAKE */
+#ifdef CONFIG_SYNC
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC */
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_PM_DEVFREQ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+
+#include <mali_kbase_config.h>
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <mali_kbase_tlstream.h>
+
+#include <mali_kbase_as_fault_debugfs.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+#if MALI_UNIT_TEST
+static struct kbase_exported_test_data shared_kernel_test_data;
+EXPORT_SYMBOL(shared_kernel_test_data);
+#endif /* MALI_UNIT_TEST */
+
+static int kbase_dev_nr;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+extern int mali_pm_statue;
+#endif
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+static inline void __compile_time_asserts(void)
+{
+	CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
+}
+
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	unsigned int                    as_nr;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		kbase_tlstream_tl_summary_new_lpu(lpu, lpu_id, *lpu);
+	}
+
+	/* Create Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		kbase_tlstream_tl_summary_new_as(&kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	kbase_tlstream_tl_summary_new_gpu(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, kbdev);
+	}
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		kbase_tlstream_tl_summary_lifelink_as_gpu(
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		kbase_tlstream_tl_summary_new_ctx(
+				element->kctx,
+				(u32)(element->kctx->id),
+				(u32)(element->kctx->tgid));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream. */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space. */
+	kbase_tlstream_flush_streams();
+}
+
+static void kbase_api_handshake(struct uku_version_check_args *version)
+{
+	switch (version->major) {
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case 6:
+		/* We are backwards compatible with version 6,
+		 * so pretend to be the old version */
+		version->major = 6;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	case 7:
+		/* We are backwards compatible with version 7,
+		 * so pretend to be the old version */
+		version->major = 7;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case 8:
+		/* We are backwards compatible with version 8,
+		 * so pretend to be the old version */
+		version->major = 8;
+		version->minor = 4;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+#ifdef BASE_LEGACY_UK9_SUPPORT
+	case 9:
+		/* We are backwards compatible with version 9,
+		 * so pretend to be the old version */
+		version->major = 9;
+		version->minor = 0;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				(int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	inited_pm_runtime_init = (1u << 2),
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	inited_ipa = (1u << 9),
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+	inited_job_fault = (1u << 10),
+	inited_misc_register = (1u << 11),
+	inited_get_device = (1u << 12),
+	inited_sysfs_group = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_io_history = (1u << 18),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20)
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define INACTIVE_WAIT_MS (5000)
+
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive)
+{
+	kbdev->driver_inactive = inactive;
+	wake_up(&kbdev->driver_inactive_wait);
+
+	/* Wait for any running IOCTLs to complete */
+	if (inactive)
+		msleep(INACTIVE_WAIT_MS);
+}
+KBASE_EXPORT_TEST_API(kbase_set_driver_inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size)
+{
+	struct kbase_device *kbdev;
+	union uk_header *ukh = args;
+	u32 id;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(ukh != NULL);
+
+	kbdev = kctx->kbdev;
+	id = ukh->id;
+	ukh->ret = MALI_ERROR_NONE; /* Be optimistic */
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_event(kbdev->driver_inactive_wait,
+			kbdev->driver_inactive == false);
+#endif /* CONFIG_MALI_DEBUG */
+
+	if (UKP_FUNC_ID_CHECK_VERSION == id) {
+		struct uku_version_check_args *version_check;
+
+		if (args_size != sizeof(struct uku_version_check_args)) {
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			return 0;
+		}
+		version_check = (struct uku_version_check_args *)args;
+		kbase_api_handshake(version_check);
+		/* save the proposed version number for later use */
+		kctx->api_version = KBASE_API_VERSION(version_check->major,
+				version_check->minor);
+		ukh->ret = MALI_ERROR_NONE;
+		return 0;
+	}
+
+	/* block calls until version handshake */
+	if (kctx->api_version == 0)
+		return -EINVAL;
+
+	if (!atomic_read(&kctx->setup_complete)) {
+		struct kbase_uk_set_flags *kbase_set_flags;
+
+		/* setup pending, try to signal that we'll do the setup,
+		 * if setup was already in progress, err this call
+		 */
+		if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+			return -EINVAL;
+
+		/* if unexpected call, will stay stuck in setup mode
+		 * (is it the only call we accept?)
+		 */
+		if (id != KBASE_FUNC_SET_FLAGS)
+			return -EINVAL;
+
+		kbase_set_flags = (struct kbase_uk_set_flags *)args;
+
+		/* if not matching the expected call, stay in setup mode */
+		if (sizeof(*kbase_set_flags) != args_size)
+			goto bad_size;
+
+		/* if bad flags, will stay stuck in setup mode */
+		if (kbase_context_set_create_flags(kctx,
+				kbase_set_flags->create_flags) != 0)
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+		atomic_set(&kctx->setup_complete, 1);
+		return 0;
+	}
+
+	/* setup complete, perform normal operation */
+	switch (id) {
+	case KBASE_FUNC_MEM_JIT_INIT:
+		{
+			struct kbase_uk_mem_jit_init *jit_init = args;
+
+			if (sizeof(*jit_init) != args_size)
+				goto bad_size;
+
+			if (kbase_region_tracker_init_jit(kctx,
+					jit_init->va_pages))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_ALLOC:
+		{
+			struct kbase_uk_mem_alloc *mem = args;
+			struct kbase_va_region *reg;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+#if defined(CONFIG_64BIT)
+			if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+				/* force SAME_VA if a 64-bit client */
+				mem->flags |= BASE_MEM_SAME_VA;
+			}
+#endif
+
+			reg = kbase_mem_alloc(kctx, mem->va_pages,
+					mem->commit_pages, mem->extent,
+					&mem->flags, &mem->gpu_va,
+					&mem->va_alignment);
+			if (!reg)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_IMPORT: {
+			struct kbase_uk_mem_import *mem_import = args;
+			void __user *phandle;
+
+			if (sizeof(*mem_import) != args_size)
+				goto bad_size;
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				phandle = compat_ptr(mem_import->phandle.compat_value);
+			else
+#endif
+				phandle = mem_import->phandle.value;
+
+			if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_import(kctx,
+					(enum base_mem_import_type)
+					mem_import->type,
+					phandle,
+					&mem_import->gpu_va,
+					&mem_import->va_pages,
+					&mem_import->flags)) {
+				mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+	}
+	case KBASE_FUNC_MEM_ALIAS: {
+			struct kbase_uk_mem_alias *alias = args;
+			struct base_mem_aliasing_info __user *user_ai;
+			struct base_mem_aliasing_info *ai;
+
+			if (sizeof(*alias) != args_size)
+				goto bad_size;
+
+			if (alias->nents > 2048) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (!alias->nents) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				user_ai = compat_ptr(alias->ai.compat_value);
+			else
+#endif
+				user_ai = alias->ai.value;
+
+			ai = vmalloc(sizeof(*ai) * alias->nents);
+
+			if (!ai) {
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			}
+
+			if (copy_from_user(ai, user_ai,
+					   sizeof(*ai) * alias->nents)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto copy_failed;
+			}
+
+			alias->gpu_va = kbase_mem_alias(kctx, &alias->flags,
+							alias->stride,
+							alias->nents, ai,
+							&alias->va_pages);
+			if (!alias->gpu_va) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto no_alias;
+			}
+no_alias:
+copy_failed:
+			vfree(ai);
+			break;
+		}
+	case KBASE_FUNC_MEM_COMMIT:
+		{
+			struct kbase_uk_mem_commit *commit = args;
+
+			if (sizeof(*commit) != args_size)
+				goto bad_size;
+
+			if (commit->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_commit(kctx, commit->gpu_addr,
+					commit->pages,
+					(base_backing_threshold_status *)
+					&commit->result_subcode) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	case KBASE_FUNC_MEM_QUERY:
+		{
+			struct kbase_uk_mem_query *query = args;
+
+			if (sizeof(*query) != args_size)
+				goto bad_size;
+
+			if (query->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE &&
+			    query->query != KBASE_MEM_QUERY_VA_SIZE &&
+				query->query != KBASE_MEM_QUERY_FLAGS) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query);
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_query(kctx, query->gpu_addr,
+					query->query, &query->value) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			break;
+		}
+		break;
+
+	case KBASE_FUNC_MEM_FLAGS_CHANGE:
+		{
+			struct kbase_uk_mem_flags_change *fc = args;
+
+			if (sizeof(*fc) != args_size)
+				goto bad_size;
+
+			if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_flags_change(kctx, fc->gpu_va,
+					fc->flags, fc->mask) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+	case KBASE_FUNC_MEM_FREE:
+		{
+			struct kbase_uk_mem_free *mem = args;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+			if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_free(kctx, mem->gpu_addr) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_JOB_SUBMIT:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+			if (kbase_jd_submit(kctx, job, 0) != 0)
+#else
+			if (kbase_jd_submit(kctx, job) != 0)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case KBASE_FUNC_JOB_SUBMIT_UK6:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+			if (kbase_jd_submit(kctx, job, 1) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif
+
+	case KBASE_FUNC_SYNC:
+		{
+			struct kbase_uk_sync_now *sn = args;
+
+			if (sizeof(*sn) != args_size)
+				goto bad_size;
+
+			if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifndef CONFIG_MALI_COH_USER
+			if (kbase_sync_now(kctx, &sn->sset) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif
+			break;
+		}
+
+	case KBASE_FUNC_DISJOINT_QUERY:
+		{
+			struct kbase_uk_disjoint_query *dquery = args;
+
+			if (sizeof(*dquery) != args_size)
+				goto bad_size;
+
+			/* Get the disjointness counter value. */
+			dquery->counter = kbase_disjoint_event_get(kctx->kbdev);
+			break;
+		}
+
+	case KBASE_FUNC_POST_TERM:
+		{
+			kbase_event_close(kctx);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_SETUP:
+		{
+			struct kbase_uk_hwcnt_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx,
+					&kctx->vinstr_cli, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_DUMP:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_dump(kctx->vinstr_cli,
+					BASE_HWCNT_READER_EVENT_MANUAL) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_CLEAR:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_clear(kctx->vinstr_cli) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_READER_SETUP:
+		{
+			struct kbase_uk_hwcnt_reader_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx,
+					setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_GPU_PROPS_REG_DUMP:
+		{
+			struct kbase_uk_gpuprops *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (kbase_gpuprops_uk_get_props(kctx, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_FIND_CPU_OFFSET:
+		{
+			struct kbase_uk_find_cpu_offset *find = args;
+
+			if (sizeof(*find) != args_size)
+				goto bad_size;
+
+			if (find->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid");
+				goto out_bad;
+			}
+
+			if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else {
+				int err;
+
+				err = kbasep_find_enclosing_cpu_mapping_offset(
+						kctx,
+						find->gpu_addr,
+						(uintptr_t) find->cpu_addr,
+						(size_t) find->size,
+						&find->offset);
+
+				if (err)
+					ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+		}
+	case KBASE_FUNC_GET_VERSION:
+		{
+			struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args;
+
+			if (sizeof(*get_version) != args_size)
+				goto bad_size;
+
+			/* version buffer size check is made in compile time assert */
+			memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+			get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+			break;
+		}
+
+	case KBASE_FUNC_STREAM_CREATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args;
+
+			if (sizeof(*screate) != args_size)
+				goto bad_size;
+
+			if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) {
+				/* not NULL terminated */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_stream_create(screate->name, &screate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#else /* CONFIG_SYNC */
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+	case KBASE_FUNC_FENCE_VALIDATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args;
+
+			if (sizeof(*fence_validate) != args_size)
+				goto bad_size;
+
+			if (kbase_fence_validate(fence_validate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+
+	case KBASE_FUNC_SET_TEST_DATA:
+		{
+#if MALI_UNIT_TEST
+			struct kbase_uk_set_test_data *set_data = args;
+
+			shared_kernel_test_data = set_data->test_data;
+			shared_kernel_test_data.kctx.value = (void __user *)kctx;
+			shared_kernel_test_data.mm.value = (void __user *)current->mm;
+			ukh->ret = MALI_ERROR_NONE;
+#endif /* MALI_UNIT_TEST */
+			break;
+		}
+
+	case KBASE_FUNC_INJECT_ERROR:
+		{
+#ifdef CONFIG_MALI_ERROR_INJECT
+			unsigned long flags;
+			struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (job_atom_inject_error(&params) != 0)
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_ERROR_INJECT */
+			break;
+		}
+
+	case KBASE_FUNC_MODEL_CONTROL:
+		{
+#ifdef CONFIG_MALI_NO_MALI
+			unsigned long flags;
+			struct kbase_model_control_params params =
+					((struct kbase_uk_model_control_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (gpu_model_control(kbdev->model, &params) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_NO_MALI */
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case KBASE_FUNC_KEEP_GPU_POWERED:
+		{
+			dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_KEEP_GPU_POWERED: function is deprecated and disabled\n");
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	case KBASE_FUNC_GET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i);
+
+			break;
+		}
+
+	/* used only for testing purposes; these controls are to be set by gator through gator API */
+	case KBASE_FUNC_SET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				_mali_profiling_control(i, controls->profiling_controls[i]);
+
+			break;
+		}
+
+	case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD:
+		{
+			struct kbase_uk_debugfs_mem_profile_add *add_data =
+					(struct kbase_uk_debugfs_mem_profile_add *)args;
+			char *buf;
+			char __user *user_buf;
+
+			if (sizeof(*add_data) != args_size)
+				goto bad_size;
+
+			if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+				dev_err(kbdev->dev, "buffer too big\n");
+				goto out_bad;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				user_buf = compat_ptr(add_data->buf.compat_value);
+			else
+#endif
+				user_buf = add_data->buf.value;
+
+			buf = kmalloc(add_data->len, GFP_KERNEL);
+			if (ZERO_OR_NULL_PTR(buf))
+				goto out_bad;
+
+			if (0 != copy_from_user(buf, user_buf, add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			if (kbasep_mem_profile_debugfs_insert(kctx, buf,
+							add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			break;
+		}
+
+#ifdef CONFIG_MALI_NO_MALI
+	case KBASE_FUNC_SET_PRFCNT_VALUES:
+		{
+
+			struct kbase_uk_prfcnt_values *params =
+			  ((struct kbase_uk_prfcnt_values *)args);
+			gpu_model_set_dummy_prfcnt_sample(params->data,
+					params->size);
+
+			break;
+		}
+#endif /* CONFIG_MALI_NO_MALI */
+#ifdef BASE_LEGACY_UK10_4_SUPPORT
+	case KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4:
+		{
+			struct kbase_uk_tlstream_acquire_v10_4 *tlstream_acquire
+					= args;
+
+			if (sizeof(*tlstream_acquire) != args_size)
+				goto bad_size;
+
+			if (0 != kbase_tlstream_acquire(
+						kctx,
+						&tlstream_acquire->fd, 0)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else if (0 <= tlstream_acquire->fd) {
+				/* Summary stream was cleared during acquire.
+				 * Create static timeline objects that will be
+				 * read by client. */
+				kbase_create_timeline_objects(kctx);
+			}
+			break;
+		}
+#endif /* BASE_LEGACY_UK10_4_SUPPORT */
+	case KBASE_FUNC_TLSTREAM_ACQUIRE:
+		{
+			struct kbase_uk_tlstream_acquire *tlstream_acquire =
+				args;
+
+			if (sizeof(*tlstream_acquire) != args_size)
+				goto bad_size;
+
+			if (tlstream_acquire->flags & ~BASE_TLSTREAM_FLAGS_MASK)
+				goto out_bad;
+
+			if (0 != kbase_tlstream_acquire(
+						kctx,
+						&tlstream_acquire->fd,
+						tlstream_acquire->flags)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else if (0 <= tlstream_acquire->fd) {
+				/* Summary stream was cleared during acquire.
+				 * Create static timeline objects that will be
+				 * read by client. */
+				kbase_create_timeline_objects(kctx);
+			}
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_FLUSH:
+		{
+			struct kbase_uk_tlstream_flush *tlstream_flush =
+				args;
+
+			if (sizeof(*tlstream_flush) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_flush_streams();
+			break;
+		}
+#if MALI_UNIT_TEST
+	case KBASE_FUNC_TLSTREAM_TEST:
+		{
+			struct kbase_uk_tlstream_test *tlstream_test = args;
+
+			if (sizeof(*tlstream_test) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_test(
+					tlstream_test->tpw_count,
+					tlstream_test->msg_delay,
+					tlstream_test->msg_count,
+					tlstream_test->aux_msg);
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_STATS:
+		{
+			struct kbase_uk_tlstream_stats *tlstream_stats = args;
+
+			if (sizeof(*tlstream_stats) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_stats(
+					&tlstream_stats->bytes_collected,
+					&tlstream_stats->bytes_generated);
+			break;
+		}
+#endif /* MALI_UNIT_TEST */
+
+	case KBASE_FUNC_GET_CONTEXT_ID:
+		{
+			struct kbase_uk_context_id *info = args;
+
+			info->id = kctx->id;
+			break;
+		}
+
+	case KBASE_FUNC_SOFT_EVENT_UPDATE:
+		{
+			struct kbase_uk_soft_event_update *update = args;
+
+			if (sizeof(*update) != args_size)
+				goto bad_size;
+
+			if (((update->new_status != BASE_JD_SOFT_EVENT_SET) &&
+			    (update->new_status != BASE_JD_SOFT_EVENT_RESET)) ||
+			    (update->flags != 0))
+				goto out_bad;
+
+			if (kbase_soft_event_update(kctx, update->evt,
+						update->new_status))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	default:
+		dev_err(kbdev->dev, "unknown ioctl %u\n", id);
+		goto out_bad;
+	}
+
+	return ret;
+
+ bad_size:
+	dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id);
+ out_bad:
+	return -EINVAL;
+}
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strcmp(irq_res->name, "JOB")) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "MMU")) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "GPU")) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
+/*
+ * Older versions, before v4.6, of the kernel doesn't have
+ * kstrtobool_from_user().
+ */
+static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+	char buf[32];
+
+	count = min(sizeof(buf), count);
+
+	if (copy_from_user(buf, s, count))
+		return -EFAULT;
+	buf[count] = '\0';
+
+	return strtobool(buf, res);
+}
+#endif
+
+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+	else
+		kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE);
+
+	return size;
+}
+
+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_infinite_cache_fops = {
+	.open = simple_open,
+	.write = write_ctx_infinite_cache,
+	.read = read_ctx_infinite_cache,
+};
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kctx = kbase_create_context(kbdev, is_compat_task());
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+	kctx->filp = filp;
+
+	if (kbdev->infinite_cache_active_default)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_COH_USER
+	 /* if cache is completely coherent at hardware level, then remove the
+	  * infinite cache control support from debugfs.
+	  */
+#else
+	debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
+			    kctx, &kbase_infinite_cache_fops);
+#endif /* CONFIG_MALI_COH_USER */
+
+	mutex_init(&kctx->mem_profile_lock);
+
+	kbasep_jd_debugfs_ctx_init(kctx);
+	kbase_debug_mem_view_init(filp);
+
+	kbase_debug_job_fault_context_init(kctx);
+
+	kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool);
+
+	kbase_jit_debugfs_init(kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			kbase_tlstream_tl_new_ctx(
+					element->kctx,
+					(u32)(element->kctx->id),
+					(u32)(element->kctx->tgid));
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+	kbase_tlstream_tl_del_ctx(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(kctx->kctx_dentry);
+	kbasep_mem_profile_debugfs_remove(kctx);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	if (kctx->vinstr_cli) {
+		struct kbase_uk_hwcnt_setup setup;
+
+		setup.dump_buffer = 0llu;
+		kbase_vinstr_legacy_hwc_setup(
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup);
+	}
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+#define CALL_MAX_SIZE 536
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull };	/* alignment fixup */
+	u32 size = _IOC_SIZE(cmd);
+	struct kbase_context *kctx = filp->private_data;
+
+	if (size > CALL_MAX_SIZE)
+		return -ENOTTY;
+
+	if (0 != copy_from_user(&msg, (void __user *)arg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n");
+		return -EFAULT;
+	}
+
+	if (kbase_dispatch(kctx, &msg, size) != 0)
+		return -EFAULT;
+
+	if (0 != copy_to_user((void __user *)arg, &msg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+#ifdef CONFIG_64BIT
+/* The following function is taken from the kernel and just
+ * renamed. As it's not exported to modules we must copy-paste it here.
+ */
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit)
+		goto found_highest;
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length)
+			goto found;
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+found:
+	/* We found a suitable gap. Clip it with the original high_limit. */
+	if (gap_end > info->high_limit)
+		gap_end = info->high_limit;
+
+found_highest:
+	/* Compute highest gap address at the desired alignment */
+	gap_end -= info->length;
+	gap_end -= (gap_end - info->align_offset) & info->align_mask;
+
+	VM_BUG_ON(gap_end < info->low_limit);
+	VM_BUG_ON(gap_end < gap_start);
+	return gap_end;
+}
+
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	/* based on get_unmapped_area, but simplified slightly due to that some
+	 * values are known in advance */
+	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+				flags);
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
+		info.high_limit = kctx->same_va_end << PAGE_SHIFT;
+		info.align_mask = 0;
+		info.align_offset = 0;
+	} else {
+		info.high_limit = min_t(unsigned long, mm->mmap_base,
+					(kctx->same_va_end << PAGE_SHIFT));
+		if (len >= SZ_2M) {
+			info.align_offset = SZ_2M;
+			info.align_mask = SZ_2M - 1;
+		} else {
+			info.align_mask = 0;
+			info.align_offset = 0;
+		}
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = SZ_2M;
+	return kbase_unmapped_area_topdown(&info);
+}
+#endif
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+#ifdef CONFIG_64BIT
+	.get_unmapped_area = kbase_get_unmapped_area,
+#endif
+};
+
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value)
+{
+	writel(value, kbdev->reg + offset);
+}
+
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset)
+{
+	return readl(kbdev->reg + offset);
+}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+/** Show callback for the @c power_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c power_policy sysfs file.
+ *
+ * This function is called when the @c power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls @ref kbase_pm_set_policy to change the
+ * policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/** Show callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called when the @c core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls @ref kbase_pm_set_policy to change
+ * the policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/** Show callback for the @c core_mask sysfs file.
+ *
+ * This function is called to get the contents of the @c core_mask sysfs
+ * file.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/** Store callback for the @c core_mask sysfs file.
+ *
+ * This function is called when the @c core_mask sysfs file is written to.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	if (items == 1 || items == 3) {
+		u64 shader_present =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		u64 group0_core_mask =
+				kbdev->gpu_props.props.coherency_info.group[0].
+				core_mask;
+
+		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+				!(new_core_mask[0] & group0_core_mask) ||
+			(new_core_mask[1] & shader_present) !=
+						new_core_mask[1] ||
+				!(new_core_mask[1] & group0_core_mask) ||
+			(new_core_mask[2] & shader_present) !=
+						new_core_mask[2] ||
+				!(new_core_mask[2] & group0_core_mask)) {
+			dev_err(dev, "power_policy: invalid core specification\n");
+			return -EINVAL;
+		}
+
+		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+				kbdev->pm.debug_core_mask[1] !=
+						new_core_mask[1] ||
+				kbdev->pm.debug_core_mask[2] !=
+						new_core_mask[2]) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+					new_core_mask[1], new_core_mask[2]);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+		"Use format <core_mask>\n"
+		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_job_timeout() - Store callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software jobs. Waiting soft event wait
+ * jobs will be cancelled after this period expires, while soft fence wait jobs
+ * will print debug information if the fence debug feature is enabled.
+ *
+ * This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_job_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_job_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) ||
+	    (soft_job_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+		   soft_job_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_job_timeout() - Show callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_job_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_job_timeout, set_soft_job_timeout);
+
+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
+				int default_ticks, u32 old_ticks)
+{
+	if (timeout_ms > 0) {
+		u64 ticks = timeout_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_period_ns);
+		if (!ticks)
+			return 1;
+		return ticks;
+	} else if (timeout_ms < 0) {
+		return default_ticks;
+	} else {
+		return old_ticks;
+	}
+}
+
+/** Store callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as JS_SOFT_STOP_TICKS, JS_HARD_STOP_TICKS_SS,
+ * JS_HARD_STOP_TICKS_DUMPING, JS_RESET_TICKS_SS, JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		struct kbasep_js_device_data *js_data = &kbdev->js_data;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
+	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
+			default, js_data->ticks_name); \
+	dev_dbg(kbdev->dev, "Overriding " #ticks_name \
+			" with %lu ticks (%lu ms)\n", \
+			(unsigned long)js_data->ticks_name, \
+			ms_name); \
+	} while (0)
+
+		UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms,
+				DEFAULT_JS_SOFT_STOP_TICKS);
+		UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl,
+				DEFAULT_JS_SOFT_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_HARD_STOP_TICKS_SS_8408 :
+				DEFAULT_JS_HARD_STOP_TICKS_SS);
+		UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl,
+				DEFAULT_JS_HARD_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping,
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING);
+		UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_RESET_TICKS_SS_8408 :
+				DEFAULT_JS_RESET_TICKS_SS);
+		UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl,
+				DEFAULT_JS_RESET_TICKS_CL);
+		UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping,
+				DEFAULT_JS_RESET_TICKS_DUMPING);
+
+		kbase_js_set_timeouts(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+static unsigned long get_js_timeout_in_ms(
+		u32 scheduling_period_ns,
+		u32 ticks)
+{
+	u64 ms = (u64)ticks * scheduling_period_ns;
+
+	do_div(ms, 1000000UL);
+	return ms;
+}
+
+/** Show callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\
+		scheduling_period_ns, \
+		kbdev->js_data.name)
+
+	js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks);
+	js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl);
+	js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss);
+	js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl);
+	js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping);
+	js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss);
+	js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl);
+	js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef GET_TIMEOUT
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+static u32 get_new_js_timeout(
+		u32 old_period,
+		u32 old_ticks,
+		u32 new_scheduling_period_ns)
+{
+	u64 ticks = (u64)old_period * (u64)old_ticks;
+	do_div(ticks, new_scheduling_period_ns);
+	return ticks?ticks:1;
+}
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	struct kbasep_js_device_data *js_data;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	js_data = &kbdev->js_data;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&js_data->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	old_period = js_data->scheduling_period_ns;
+
+#define SET_TIMEOUT(name) \
+		(js_data->name = get_new_js_timeout(\
+				old_period, \
+				kbdev->js_data.name, \
+				new_scheduling_period_ns))
+
+	SET_TIMEOUT(soft_stop_ticks);
+	SET_TIMEOUT(soft_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_ss);
+	SET_TIMEOUT(hard_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_dumping);
+	SET_TIMEOUT(gpu_reset_ticks_ss);
+	SET_TIMEOUT(gpu_reset_ticks_cl);
+	SET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef SET_TIMEOUT
+
+	js_data->scheduling_period_ns = new_scheduling_period_ns;
+
+	kbase_js_set_timeouts(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&js_data->runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/** Store callback for the @c force_replay sysfs file.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/** Show callback for the @c force_replay sysfs file.
+ *
+ * This function is called to get the contents of the @c force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c force_replay.
+ *
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present. The ability to
+ * enable soft-stop when only a single context is present can be used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/** Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/** Show callback for the @c debug_command sysfs file.
+ *
+ * This function is called to get the contents of the @c debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c debug_command sysfs file.
+ *
+ * This function is called when the @c debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @ref debug_commands to issue the command.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example:
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
+		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-THEx" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s MP%d r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_max_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+
+static int kbasep_protected_mode_enter(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
+
+static bool kbasep_protected_mode_supported(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static struct kbase_protected_ops kbasep_protected_ops = {
+	.protected_mode_enter = kbasep_protected_mode_enter,
+	.protected_mode_reset = NULL,
+	.protected_mode_supported = kbasep_protected_mode_supported,
+};
+
+static void kbasep_protected_mode_init(struct kbase_device *kbdev)
+{
+	kbdev->protected_ops = NULL;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native protected ops */
+		kbdev->protected_ops = &kbasep_protected_ops;
+	}
+#ifdef PROTECTED_CALLBACKS
+	else
+		kbdev->protected_ops = PROTECTED_CALLBACKS;
+#endif
+
+	if (kbdev->protected_ops)
+		kbdev->protected_mode_support =
+				kbdev->protected_ops->protected_mode_supported(kbdev);
+	else
+		kbdev->protected_mode_support = false;
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = -ENOMEM;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return 0;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \
+	|| defined(LSK_OPPV2_BACKPORT)
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
+ * @file: File object to read is for
+ * @buf:  User buffer to populate with data
+ * @len:  Length of user buffer
+ * @ppos: Offset within file object
+ *
+ * Retrieves the current status of protected debug mode
+ * (0 = disabled, 1 = enabled)
+ *
+ * Return: Number of bytes added to user buffer
+ */
+static ssize_t debugfs_protected_debug_mode_read(struct file *file,
+				char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)file->private_data;
+	u32 gpu_status;
+	ssize_t ret_val;
+
+	kbase_pm_context_active(kbdev);
+	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL);
+	kbase_pm_context_idle(kbdev);
+
+	if (gpu_status & GPU_DBGEN)
+		ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2);
+	else
+		ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2);
+
+	return ret_val;
+}
+
+/*
+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops
+ *
+ * Contains the file operations for the "protected_debug_mode" debugfs file
+ */
+static const struct file_operations fops_protected_debug_mode = {
+	.open = simple_open,
+	.read = debugfs_protected_debug_mode_read,
+	.llseek = default_llseek,
+};
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_init(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+	kbasep_regs_history_debugfs_init(kbdev);
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+	kbase_as_fault_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_COH_USER
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+#endif /* CONFIG_MALI_COH_USER */
+
+	debugfs_create_size_t("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_max_size_default);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		debugfs_create_file("protected_debug_mode", S_IRUGO,
+				kbdev->mali_debugfs_directory, kbdev,
+				&fops_protected_debug_mode);
+	}
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_job_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_availability_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	if (kbdev->inited_subsys & inited_ipa) {
+		kbase_ipa_term(kbdev->ipa_ctx);
+		kbdev->inited_subsys &= ~inited_ipa;
+	}
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_pm_runtime_init) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+		kbdev->inited_subsys &= ~inited_pm_runtime_init;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+	if (kbdev->inited_subsys & inited_io_history) {
+		kbase_io_history_term(&kbdev->io_history);
+		kbdev->inited_subsys &= ~inited_io_history;
+	}
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+
+/* Number of register accesses for the buffer that we allocate during
+ * initialization time. The buffer size can be changed later via debugfs. */
+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+#ifdef CONFIG_OF
+	err = kbase_platform_early_init();
+	if (err) {
+		dev_err(&pdev->dev, "Early platform initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+#endif
+
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+	err = kbase_io_history_init(&kbdev->io_history,
+			KBASEP_DEFAULT_REGISTER_HISTORY_SIZE);
+	if (err) {
+		dev_err(&pdev->dev, "Register access history initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+	kbdev->inited_subsys |= inited_io_history;
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+	core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+
+	kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	if (kbdev->pm.callback_power_runtime_init) {
+		err = kbdev->pm.callback_power_runtime_init(kbdev);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Runtime PM initialization failed\n");
+			kbase_platform_device_remove(pdev);
+			return err;
+		}
+		kbdev->inited_subsys |= inited_pm_runtime_init;
+	}
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, gpu_id);
+
+	kbasep_protected_mode_init(kbdev);
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	err = kbase_devfreq_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Fevfreq initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_devfreq;
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	kbdev->ipa_ctx = kbase_ipa_init(kbdev);
+	if (!kbdev->ipa_ctx) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+
+	kbdev->inited_subsys |= inited_ipa;
+#endif  /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	/* initialize the kctx list */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->inited_subsys |= inited_get_device;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+
+	return err;
+}
+
+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE
+
+
+/** Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/** Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @param dev  The device to resume
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+	return 0;
+}
+
+/** Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in which it will
+ * not be able to communicate with the CPU(s) and RAM due to power management.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/** Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+#ifndef CONFIG_MALI_DEVFREQ
+static int mali_os_freeze(struct device *device)
+{
+	mali_dev_freeze();
+	return kbase_device_suspend(device);
+}
+
+static int mali_os_restore(struct device *device)
+{
+	mali_dev_restore();
+	return kbase_device_resume(device);
+}
+#endif
+
+
+/** The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifndef CONFIG_MALI_DEVFREQ
+	.freeze = mali_os_freeze,
+	.thaw = kbase_device_resume,
+	.restore = mali_os_restore,
+#endif
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_early_init();
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	ret = kbase_platform_fake_register();
+	if (ret)
+		return ret;
+#endif
+	ret = platform_driver_register(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	if (ret)
+		kbase_platform_fake_unregister();
+#endif
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	kbase_platform_fake_unregister();
+#endif
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100755
index 0000000..fb57ac2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100755
index 0000000..5fff289
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100755
index 0000000..83c5c37
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,502 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				 kbase_is_job_fault_event_pending(kbdev)))
+			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		if (event->katom->need_cache_flush_cores_retained) {
+			kbase_gpu_cacheclean(kbdev, event->katom);
+			event->katom->need_cache_flush_cores_retained = 0;
+		}
+
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100755
index 0000000..a2bf898
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,96 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100755
index 0000000..a98355e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list))
+		return NULL;
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct rb_node *p;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (p = rb_first(&kctx->reg_rbtree); p; p = rb_next(p)) {
+		struct kbase_va_region *reg;
+		struct debug_mem_mapping *mapping;
+
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			kbase_gpu_vm_unlock(kctx);
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100755
index 0000000..20ab51a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100755
index 0000000..df1a6f0
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,1539 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_mmu_mode.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_pm.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#include <linux/bus_logger.h>
+#endif
+
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#endif				/* CONFIG_SYNC */
+
+#include "mali_kbase_dma_fence.h"
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL    0
+#else
+#define MIDGARD_MMU_TOPLEVEL    1
+#endif
+
+#define MIDGARD_MMU_BOTTOMLEVEL 3
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+#ifdef CONFIG_DEBUG_FS
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * struct kbase_io_access - holds information about 1 register access
+ *
+ * @addr: first bit indicates r/w (r=0, w=1)
+ * @value: value written or read
+ */
+struct kbase_io_access {
+	uintptr_t addr;
+	u32 value;
+};
+
+/**
+ * struct kbase_io_history - keeps track of all recent register accesses
+ *
+ * @enabled: true if register accesses are recorded, false otherwise
+ * @lock: spinlock protecting kbase_io_access array
+ * @count: number of registers read/written
+ * @size: number of elements in kbase_io_access array
+ * @buf: array of kbase_io_access
+ */
+struct kbase_io_history {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool enabled;
+#else
+	u32 enabled;
+#endif
+
+	spinlock_t lock;
+	size_t count;
+	u16 size;
+	struct kbase_io_access *buf;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom *)(dep->atom);
+}
+
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency.
+ * @param[in] a      The ATOM to be set as a dependency.
+ * @param     type   The ATOM dependency type to be set.
+ *
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency to be cleared.
+ *
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+enum kbase_atom_gpu_rb_state {
+	/* Atom is not currently present in slot ringbuffer */
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	/* Atom is in slot ringbuffer but is blocked on a previous atom */
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	/* Atom is in slot ringbuffer but is waiting for a previous protected
+	 * mode transition to complete */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
+	/* Atom is in slot ringbuffer but is waiting for proected mode
+	 * transition */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
+	/* Atom is in slot ringbuffer but is waiting for cores to become
+	 * available */
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	/* Atom is in slot ringbuffer but is blocked on affinity */
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	/* Atom is in slot ringbuffer and ready to run */
+	KBASE_ATOM_GPU_RB_READY,
+	/* Atom is in slot ringbuffer and has been submitted to the GPU */
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	/* Atom must be returned to JS as soon as it reaches the head of the
+	 * ringbuffer due to a previous failure */
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS
+};
+
+enum kbase_atom_enter_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition into protected mode is required.
+	 *
+	 * NOTE: The integer value of this must
+	 *       match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
+	/* Wait for vinstr to suspend. */
+	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
+	/* Wait for the L2 to become idle in preparation for
+	 * the coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
+	/* End state;
+	 * Prepare coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
+};
+
+enum kbase_atom_exit_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition out of protected mode is required.
+	 *
+	 * NOTE: The integer value of this must
+	 *       match KBASE_ATOM_ENTER_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
+	/* Wait for the L2 to become idle in preparation
+	 * for the reset. */
+	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+	/* Issue the protected reset. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET,
+	/* End state;
+	 * Wait for the reset to complete. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+	u64 time_spent_us; /**< Total time spent on the GPU in microseconds */
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+	/* List head used during job dispatch job_done processing - as
+	 * dependencies may not be entirely resolved at this point, we need to
+	 * use a separate list head. */
+	struct list_head jd_item;
+	/* true if atom's jd_item is currently on a list. Prevents atom being
+	 * processed twice. */
+	bool in_jd_list;
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	enum kbase_atom_coreref_state coreref_state;
+#ifdef CONFIG_KDS
+	struct list_head node;
+	struct kds_resource_set *kds_rset;
+	bool kds_dep_satisfied;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_SYNC
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		/* This points to the dma-buf fence for this atom. If this is
+		 * NULL then there is no fence for this atom and the other
+		 * fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+		struct fence *fence;
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;	    /**< core requirements */
+	/** Job Slot to retry submitting to if submission from IRQ handler failed
+	 *
+	 * NOTE: see if this can be unified into the another member e.g. the event */
+	int retry_submit_on_slot;
+
+	union kbasep_js_policy_job_info sched_info;
+	/* JS atom priority with respect to other atoms on its kctx. */
+	int sched_priority;
+
+	int poking;		/* BASE_HW_ISSUE_8316 */
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+	int slot_nr;
+
+	u32 atom_flags;
+
+	/* Number of times this atom has been retried. Used by replay soft job.
+	 */
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	u64 need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	/* Pointer to atom that this atom has same-slot dependency on */
+	struct kbase_jd_atom *pre_dep;
+	/* Pointer to atom that has same-slot dependency on this atom */
+	struct kbase_jd_atom *post_dep;
+
+	/* Pointer to atom that this atom has cross-slot dependency on */
+	struct kbase_jd_atom *x_pre_dep;
+	/* Pointer to atom that has cross-slot dependency on this atom */
+	struct kbase_jd_atom *x_post_dep;
+
+	/* The GPU's flush count recorded at the time of submission, used for
+	 * the cache flush optimisation */
+	u32 flush_id;
+
+	struct kbase_jd_atom_backend backend;
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	/* List head used for two different purposes:
+	 *  1. Overflow list for JS ring buffers. If an atom is ready to run,
+	 *     but there is no room in the JS ring buffer, then the atom is put
+	 *     on the ring buffer's overflow list using this list node.
+	 *  2. List of waiting soft jobs.
+	 */
+	struct list_head queue;
+
+	struct kbase_va_region *jit_addr_reg;
+
+	/* If non-zero, this indicates that the atom will fail with the set
+	 * event_code when the atom is processed. */
+	enum base_jd_event_code will_fail_event_code;
+
+	/* Atoms will only ever be transitioning into, or out of
+	 * protected mode so we do not need two separate fields.
+	 */
+	union {
+		enum kbase_atom_enter_protected_state enter;
+		enum kbase_atom_exit_protected_state exit;
+	} protected_state;
+
+	struct rb_node runnable_tree_node;
+
+	/* 'Age' of atom relative to other atoms in the context. */
+	u32 age;
+};
+
+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	/** Tracks all job-dispatch jobs.  This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	u32 job_nr;
+
+	/** Waitq that reflects whether there are no jobs (including SW-only
+	 * dependency jobs). This is set when no jobs are present on the ctx,
+	 * and clear when there are jobs.
+	 *
+	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+	 * and SW-only dependency jobs.
+	 *
+	 * This waitq can be waited upon to find out when the context jobs are all
+	 * done/cancelled (including those that might've been blocked on
+	 * dependencies) - and so, whether it can be terminated. However, it should
+	 * only be terminated once it is neither present in the policy-queue (see
+	 * kbasep_js_policy_try_evict_ctx() ) nor the run-pool (see
+	 * kbasep_js_kctx_info::ctx::is_scheduled).
+	 *
+	 * Since the waitq is only set under kbase_jd_context::lock,
+	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+	 * guarentee that the setter has completed its work on the kbase_context
+	 *
+	 * This must be updated atomically with:
+	 * - kbase_jd_context::job_nr */
+	wait_queue_head_t zero_jobs_wait;
+
+	/** Job Done workqueue. */
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_KDS
+	struct kds_callback kds_cb;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+	int number;
+
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	bool protected_mode;
+	u32 fault_status;
+	u64 fault_addr;
+	u64 fault_extra_addr;
+
+	struct kbase_mmu_setup current_setup;
+
+	/* BASE_HW_ISSUE_8316  */
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	/** Protected by hwaccess_lock */
+	int poke_refcount;
+	/** Protected by hwaccess_lock */
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold hwaccess_lock when accessing this */
+	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - true indicates that the transition is ongoing
+	 * Expected to be protected by hwaccess_lock */
+	bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_protected_ops - Platform specific functions for GPU protected
+ * mode operations
+ * @protected_mode_enter: Callback to enter protected mode on the GPU
+ * @protected_mode_reset: Callback to reset the GPU and exit protected mode.
+ * @protected_mode_supported: Callback to check if protected mode is supported.
+ */
+struct kbase_protected_ops {
+	/**
+	 * protected_mode_enter() - Enter protected mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enter)(struct kbase_device *kbdev);
+
+	/**
+	 * protected_mode_reset() - Reset the GPU and exit protected mode
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_reset)(struct kbase_device *kbdev);
+
+	/**
+	 * protected_mode_supported() - Check if protected mode is supported
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	bool (*protected_mode_supported)(struct kbase_device *kbdev);
+};
+
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:     Kbase device where memory is used
+ * @cur_size:  Number of free pages currently in the pool (may exceed @max_size
+ *             in some corner cases)
+ * @max_size:  Maximum number of free pages in the pool
+ * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
+ *             and @page_list
+ * @page_list: List of free pages in the pool
+ * @reclaim:   Shrinker for kernel reclaim of free pages
+ * @next_pool: Pointer to next pool where pages can be allocated when this pool
+ *             is empty. Pages will spill over to the next pool when this pool
+ *             is full. Can be NULL if there is no next pool.
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+};
+
+
+#define DEVNAME_SIZE	16
+
+struct kbase_device {
+	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+
+	struct clk *clock;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif	/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool mem_pool;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	/** List of SW workarounds for HW issues */
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	/** List of features available */
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	/* Bitmaps of cores that are currently in use (running jobs).
+	 * These should be kept up to date by the job scheduler.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 *
+	 * kbase_pm_check_transitions_nolock() should be called when bits are
+	 * cleared to update the power management system and allow transitions to
+	 * occur. */
+	u64 shader_inuse_bitmap;
+
+	/* Refcount for cores in use */
+	u32 shader_inuse_cnt[64];
+
+	/* Bitmaps of cores the JS needs for jobs ready to run */
+	u64 shader_needed_bitmap;
+
+	/* Refcount for cores needed */
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	/* struct for keeping track of the disjoint information
+	 *
+	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
+	 * The count is the number of disjoint events that have occurred on the GPU
+	 */
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+	u32 l2_users_count;
+
+	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+	 * submitting new jobs to any cores that are not marked as available.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 */
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
+	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+
+	/* Structure used for instrumentation and HW counters dumping */
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	/*value to be written to the irq_throttle register each time an irq is served */
+	atomic_t irq_throttle_cycles;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+	void *platform_context;
+
+	/* List of kbase_contexts created */
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	struct devfreq_cooling_device *devfreq_cooling;
+#else
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+#endif
+#endif
+
+	struct kbase_ipa_context *ipa_ctx;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+	/*
+	 * Control for enabling job dump on failure, set when control debugfs
+	 * is opened.
+	 */
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	/* directory for debugfs entries */
+	struct dentry *mali_debugfs_directory;
+	/* Root directory for per context entry */
+	struct dentry *debugfs_ctx_directory;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* bit for each as, set if there is new data to report */
+	u64 debugfs_as_read_bitmap;
+#endif /* CONFIG_MALI_DEBUG */
+
+	/* failed job dump, used for separate debug process */
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
+	struct kbase_context *kctx_fault;
+
+#if !MALI_CUSTOMER_RELEASE
+	/* Per-device data for register dumping interface */
+	struct {
+		u16 reg_offset; /* Offset of a GPU_CONTROL register to be
+				   dumped upon request */
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	/* fbdump profiling controls set by gator */
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	/* Number of jobs that are run before a job is forced to fail and
+	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+	 * failures. */
+	int force_replay_limit;
+	/* Count of jobs between forced failures. Incremented on each job. A
+	 * job is forced to fail once this is greater than or equal to
+	 * force_replay_limit. */
+	int force_replay_count;
+	/* Core requirement for jobs to be failed and replayed. May be zero. */
+	base_jd_core_req force_replay_core_req;
+	/* true if force_replay_limit should be randomized. The random
+	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+	 */
+	bool force_replay_random;
+#endif
+
+	/* Total number of created contexts */
+	atomic_t ctx_num;
+
+#ifdef CONFIG_DEBUG_FS
+	/* Holds the most recent register accesses */
+	struct kbase_io_history io_history;
+#endif /* CONFIG_DEBUG_FS */
+
+	struct kbase_hwaccess_data hwaccess;
+
+	/* Count of page/bus faults waiting for workqueues to process */
+	atomic_t faults_pending;
+
+	/* true if GPU is powered off or power off operation is in progress */
+	bool poweroff_pending;
+
+
+	/* defaults for new context created for this device */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	size_t mem_pool_max_size_default;
+
+	/* current gpu coherency mode */
+	u32 current_gpu_coherency_mode;
+	/* system coherency mode  */
+	u32 system_coherency;
+	/* Flag to track when cci snoops have been enabled on the interface */
+	bool cci_snoop_enabled;
+
+	/* SMC function IDs to call into Trusted firmware to enable/disable
+	 * cache snooping. Value of 0 indicates that they are not used
+	 */
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	/* Protected operations */
+	struct kbase_protected_ops *protected_ops;
+
+	/*
+	 * true when GPU is put into protected mode
+	 */
+	bool protected_mode;
+
+	/*
+	 * true when GPU is transitioning into or out of protected mode
+	 */
+	bool protected_mode_transition;
+
+	/*
+	 * true if protected mode is supported
+	 */
+	bool protected_mode_support;
+
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_queue_head_t driver_inactive_wait;
+	bool driver_inactive;
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	/*
+	 * Bus logger integration.
+	 */
+	struct bus_logger_client *buslogger;
+#endif
+	/* Boolean indicating if an IRQ flush during reset is in progress. */
+	bool irq_reset_flush;
+
+	/* list of inited sub systems. Used during terminate/error recovery */
+	u32 inited_subsys;
+
+	spinlock_t hwaccess_lock;
+
+	/* Protects access to MMU operations */
+	struct mutex mmu_hw_mutex;
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ *                 job slot.
+ * @x_dep_head:    Head item of the linked list of atoms blocked on cross-slot
+ *                 dependencies. Atoms on this list will be moved to the
+ *                 runnable_tree when the blocking atom completes.
+ *
+ * hwaccess_lock must be held when accessing this structure.
+ */
+struct jsctx_queue {
+	struct rb_root runnable_tree;
+	struct list_head x_dep_head;
+};
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+/**
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum kbase_context_flags {
+	KCTX_COMPAT = 1U << 0,
+	KCTX_RUNNABLE_REF = 1U << 1,
+	KCTX_ACTIVE = 1U << 2,
+	KCTX_PULLED = 1U << 3,
+	KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+	KCTX_INFINITE_CACHE = 1U << 5,
+	KCTX_SUBMIT_DISABLED = 1U << 6,
+	KCTX_PRIVILEGED = 1U << 7,
+	KCTX_SCHEDULED = 1U << 8,
+	KCTX_DYING = 1U << 9,
+};
+
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	int id; /* System wide unique id */
+	unsigned long api_version;
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	atomic_t flags;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	u64 *mmu_teardown_pages;
+
+	struct page *aliasing_sink_page;
+
+	struct mutex            mmu_lock;
+	struct mutex            reg_lock; /* To be converted to a rwlock? */
+	struct rb_root          reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_pool mem_pool;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+	struct mutex            evict_lock;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_KDS
+	struct list_head waiting_kds_resource;
+#endif
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+	/** This is effectively part of the Run Pool, because it only has a valid
+	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+	 *
+	 * The hwaccess_lock must be held whilst accessing this.
+	 *
+	 * If the context relating to this as_nr is required, you must use
+	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+	 * whilst you're using it. Alternatively, just hold the hwaccess_lock
+	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
+	 * you can take whilst doing this) */
+	int as_nr;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct *process_mm;
+	/* End of the SAME_VA zone */
+	u64 same_va_end;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	/* Content of mem_profile file */
+	char *mem_profile_data;
+	/* Size of @c mem_profile_data */
+	size_t mem_profile_size;
+	/* Mutex guarding memory profile state */
+	struct mutex mem_profile_lock;
+	/* Memory profile directory under debugfs */
+	struct dentry *kctx_dentry;
+
+	/* for job fault debug */
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	/* This list will keep the following atoms during the dump
+	 * in the same context
+	 */
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	/* Number of atoms currently pulled from this context */
+	atomic_t atoms_pulled;
+	/* Number of atoms currently pulled from this context, per slot */
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	/* Bitmask of slots that can be pulled from */
+	u32 slots_pullable;
+
+	/* Backend specific data */
+	struct kbase_context_backend backend;
+
+	/* Work structure used for deferred ASID assignment */
+	struct work_struct work;
+
+	/* Only one userspace vinstr client per kbase context */
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+
+	/* List of completed jobs waiting for events to be posted */
+	struct list_head completed_jobs;
+	/* Number of work items currently pending on job_done_wq */
+	atomic_t work_count;
+
+	/* Waiting soft-jobs will fail when this timer expires */
+	struct timer_list soft_job_timeout;
+
+	/* JIT allocation management */
+	struct kbase_va_region *jit_alloc[256];
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_lock;
+	struct work_struct jit_work;
+
+	/* External sticky resource management */
+	struct list_head ext_res_meta_head;
+
+	/* Used to record that a drain was requested from atomic context */
+	atomic_t drain_pending;
+
+	/* Current age count, used to determine age for newly submitted atoms */
+	u32 age_count;
+};
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100755
index 0000000..7484eec
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,697 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work));
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+	mutex_init(&kbdev->mmu_hw_mutex);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+#else
+	kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+#ifdef CONFIG_MALI_DEBUG
+	init_waitqueue_head(&kbdev->driver_inactive_wait);
+#endif /* CONFIG_MALI_DEBUG */
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
+{
+	u32 ret_value = 0;
+
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		ret_value = kbdev->kbase_profiling_controls[control];
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+
+	return ret_value;
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100755
index 0000000..f70bccc
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
new file mode 100644
index 0000000..97bb6c5
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -0,0 +1,606 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_dma_fence_lock);
+
+static void
+kbase_dma_fence_work(struct work_struct *pwork);
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static const char *
+kbase_dma_fence_get_driver_name(struct fence *fence)
+{
+	return kbase_drv_name;
+}
+
+static const char *
+kbase_dma_fence_get_timeline_name(struct fence *fence)
+{
+	return kbase_timeline_name;
+}
+
+static bool
+kbase_dma_fence_enable_signaling(struct fence *fence)
+{
+	/* If in the future we need to add code here remember to
+	 * to get a reference to the fence and release it when signaling
+	 * as stated in fence.h
+	 */
+	return true;
+}
+
+static void
+kbase_dma_fence_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static const struct fence_ops kbase_dma_fence_ops = {
+	.get_driver_name = kbase_dma_fence_get_driver_name,
+	.get_timeline_name = kbase_dma_fence_get_timeline_name,
+	.enable_signaling = kbase_dma_fence_enable_signaling,
+	/* Use the default wait */
+	.wait = fence_default_wait,
+	.fence_value_str = kbase_dma_fence_fence_value_str,
+};
+
+static struct fence *
+kbase_dma_fence_new(unsigned int context, unsigned int seqno)
+{
+	struct fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence_init(fence,
+		   &kbase_dma_fence_ops,
+		   &kbase_dma_fence_lock,
+		   context,
+		   seqno);
+
+	return fence;
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_queue_work() - Queue work to handle @katom
+ * @katom: Pointer to atom for which to queue work
+ *
+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
+ * submit the atom.
+ */
+static void
+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	bool ret;
+
+	INIT_WORK(&katom->work, kbase_dma_fence_work);
+	ret = queue_work(kctx->dma_fence.wq, &katom->work);
+	/* Warn if work was already queued, that should not happen. */
+	WARN_ON(!ret);
+}
+
+/**
+ * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ * @queue_worker: Boolean indicating if fence worker is to be queued when
+ *                dep_count reaches 0.
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker)
+{
+	struct kbase_dma_fence_cb *cb, *tmp;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			int ret;
+
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			ret = atomic_dec_return(&katom->dma_fence.dep_count);
+
+			if (unlikely(queue_worker && ret == 0)) {
+				/*
+				 * dep_count went to zero and queue_worker is
+				 * true. Queue the worker to handle the
+				 * completion of the katom.
+				 */
+				kbase_dma_fence_queue_work(katom);
+			}
+		}
+
+		/*
+		 * Release the reference taken in
+		 * kbase_dma_fence_add_callback().
+		 */
+		fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom, false);
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (atomic_read(&katom->dma_fence.dep_count) != 0)
+		goto out;
+
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_dma_fence_free_callbacks(katom, false);
+	/*
+	 * Queue atom on GPU, unless it has already completed due to a failing
+	 * dependency. Run jd_done_nolock() on the katom if it is completed.
+	 */
+	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
+		jd_done_nolock(katom, NULL);
+	else
+		kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+/**
+ * kbase_dma_fence_add_callback() - Add callback on @fence to block @katom
+ * @katom: Pointer to katom that will be blocked by @fence
+ * @fence: Pointer to fence on which to set up the callback
+ * @callback: Pointer to function to be called when fence is signaled
+ *
+ * Caller needs to hold a reference to @fence when calling this function, and
+ * the caller is responsible for releasing that reference.  An additional
+ * reference to @fence will be taken when the callback was successfully set up
+ * and @fence needs to be kept valid until the callback has been called and
+ * cleanup have been done.
+ *
+ * Return: 0 on success: fence was either already signalled, or callback was
+ * set up. Negative error code is returned on error.
+ */
+static int
+kbase_dma_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback)
+{
+	int err = 0;
+	struct kbase_dma_fence_cb *kbase_fence_cb;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+
+	err = fence_add_callback(fence, &kbase_fence_cb->fence_cb, callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, clear the error and return */
+		err = 0;
+		kbase_fence_cb->fence = NULL;
+		kfree(kbase_fence_cb);
+	} else if (err) {
+		kfree(kbase_fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in kbase_dma_fence_free_callbacks().
+		 */
+		fence_get(fence);
+		atomic_inc(&katom->dma_fence.dep_count);
+		/* Add callback to katom's list of callbacks */
+		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
+	}
+
+	return err;
+}
+
+static void
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+{
+	struct kbase_dma_fence_cb *kcb = container_of(cb,
+				struct kbase_dma_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+	if (atomic_dec_and_test(&katom->dma_fence.dep_count))
+		kbase_dma_fence_queue_work(katom);
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct reservation_object *resv,
+					 bool exclusive)
+{
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_dma_fence_add_callback(katom,
+						   excl_fence,
+						   kbase_dma_fence_cb);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_dma_fence_add_callback(katom,
+							   shared_fences[i],
+							   kbase_dma_fence_cb);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	if (err) {
+		/*
+		 * On error, cancel and clean up all callbacks that was set up
+		 * before the error.
+		 */
+		kbase_dma_fence_free_callbacks(katom, false);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+	struct fence *fence;
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	fence = kbase_dma_fence_new(katom->dma_fence.context,
+				    atomic_inc_return(&katom->dma_fence.seqno));
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	katom->dma_fence.fence = fence;
+	atomic_set(&katom->dma_fence.dep_count, 1);
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		atomic_set(&katom->dma_fence.dep_count, -1);
+		fence_put(fence);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct reservation_object *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = reservation_object_reserve_shared(obj);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_shared_fence(obj, katom->dma_fence.fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_excl_fence(obj, katom->dma_fence.fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
+			atomic_set(&katom->dma_fence.dep_count, -1);
+			kbase_dma_fence_free_callbacks(katom, false);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	} else {
+		/* There was an error, cancel callbacks, set dep_count to -1 to
+		 * indicate that the atom has been handled (the caller will
+		 * kill it for us), signal the fence, free callbacks and the
+		 * fence.
+		 */
+		kbase_dma_fence_free_callbacks(katom, false);
+		atomic_set(&katom->dma_fence.dep_count, -1);
+		kbase_dma_fence_signal(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct list_head *list = &kctx->dma_fence.waiting_resource;
+
+	while (!list_empty(list)) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_first_entry(list, struct kbase_jd_atom, queue);
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom, true);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == -1);
+
+	/* Signal the atom's fence. */
+	fence_signal(katom->dma_fence.fence);
+	fence_put(katom->dma_fence.fence);
+	katom->dma_fence.fence = NULL;
+
+	kbase_dma_fence_free_callbacks(katom, false);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
new file mode 100644
index 0000000..3b0a69b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
@@ -0,0 +1,150 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/reservation.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_cb - Mali dma-fence callback data struct
+ * @fence_cb: Callback function
+ * @katom:    Pointer to katom that is waiting on this callback
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @node:     List head for linking this callback to the katom
+ */
+struct kbase_dma_fence_cb {
+	struct fence_cb fence_cb;
+	struct kbase_jd_atom *katom;
+	struct fence *fence;
+	struct list_head node;
+};
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct reservation_object **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_waiters_remove()- Remove katom from dma-fence wait list
+ * @katom: Pointer to katom to remove from list
+ */
+void kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom);
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100755
index 0000000..f07406c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	kbase_tlstream_tl_nret_atom_ctx(katom, kctx);
+	kbase_tlstream_tl_del_atom(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+	kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_POSTED);
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100755
index 0000000..ce65b55
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100755
index 0000000..3292fa9
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,330 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
+};
+
+static void dump_worker(struct work_struct *work);
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+	uint32_t product_id;
+	uint32_t count;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	product_id = kbdev->gpu_props.props.core_props.product_id;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			hardware_counters = hardware_counters_mali_tHEx;
+			count = ARRAY_SIZE(hardware_counters_mali_tHEx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	} else {
+		switch (product_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	*total_counters = count;
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	uint32_t dump_size = 0, i = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
+		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+	in_out_info->size = dump_size;
+
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
+
+	return hand;
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+
+	if (opaque_handles && success) {
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		schedule_work(&opaque_handles->dump_work);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100755
index 0000000..ef9ac0f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100755
index 0000000..7ec05c1
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2164 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MISS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MISS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MISS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MISS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MISS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MISS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MISS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#include "mali_kbase_gator_hwcnt_names_tmix.h"
+
+#include "mali_kbase_gator_hwcnt_names_thex.h"
+
+
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
new file mode 100644
index 0000000..bcceef4
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+
+static const char * const hardware_counters_mali_tHEx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MESSAGES_SENT",
+	"THEx_MESSAGES_RECEIVED",
+	"THEx_GPU_ACTIVE",
+	"THEx_IRQ_ACTIVE",
+	"THEx_JS0_JOBS",
+	"THEx_JS0_TASKS",
+	"THEx_JS0_ACTIVE",
+	"",
+	"THEx_JS0_WAIT_READ",
+	"THEx_JS0_WAIT_ISSUE",
+	"THEx_JS0_WAIT_DEPEND",
+	"THEx_JS0_WAIT_FINISH",
+	"THEx_JS1_JOBS",
+	"THEx_JS1_TASKS",
+	"THEx_JS1_ACTIVE",
+	"",
+	"THEx_JS1_WAIT_READ",
+	"THEx_JS1_WAIT_ISSUE",
+	"THEx_JS1_WAIT_DEPEND",
+	"THEx_JS1_WAIT_FINISH",
+	"THEx_JS2_JOBS",
+	"THEx_JS2_TASKS",
+	"THEx_JS2_ACTIVE",
+	"",
+	"THEx_JS2_WAIT_READ",
+	"THEx_JS2_WAIT_ISSUE",
+	"THEx_JS2_WAIT_DEPEND",
+	"THEx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"THEx_TILER_ACTIVE",
+	"THEx_JOBS_PROCESSED",
+	"THEx_TRIANGLES",
+	"THEx_LINES",
+	"THEx_POINTS",
+	"THEx_FRONT_FACING",
+	"THEx_BACK_FACING",
+	"THEx_PRIM_VISIBLE",
+	"THEx_PRIM_CULLED",
+	"THEx_PRIM_CLIPPED",
+	"THEx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"THEx_BUS_READ",
+	"",
+	"THEx_BUS_WRITE",
+	"THEx_LOADING_DESC",
+	"THEx_IDVS_POS_SHAD_REQ",
+	"THEx_IDVS_POS_SHAD_WAIT",
+	"THEx_IDVS_POS_SHAD_STALL",
+	"THEx_IDVS_POS_FIFO_FULL",
+	"THEx_PREFETCH_STALL",
+	"THEx_VCACHE_HIT",
+	"THEx_VCACHE_MISS",
+	"THEx_VCACHE_LINE_WAIT",
+	"THEx_VFETCH_POS_READ_WAIT",
+	"THEx_VFETCH_VERTEX_WAIT",
+	"THEx_VFETCH_STALL",
+	"THEx_PRIMASSY_STALL",
+	"THEx_BBOX_GEN_STALL",
+	"THEx_IDVS_VBU_HIT",
+	"THEx_IDVS_VBU_MISS",
+	"THEx_IDVS_VBU_LINE_DEALLOCATE",
+	"THEx_IDVS_VAR_SHAD_REQ",
+	"THEx_IDVS_VAR_SHAD_STALL",
+	"THEx_BINNER_STALL",
+	"THEx_ITER_STALL",
+	"THEx_COMPRESS_MISS",
+	"THEx_COMPRESS_STALL",
+	"THEx_PCACHE_HIT",
+	"THEx_PCACHE_MISS",
+	"THEx_PCACHE_MISS_STALL",
+	"THEx_PCACHE_EVICT_STALL",
+	"THEx_PMGR_PTR_WR_STALL",
+	"THEx_PMGR_PTR_RD_STALL",
+	"THEx_PMGR_CMD_WR_STALL",
+	"THEx_WRBUF_ACTIVE",
+	"THEx_WRBUF_HIT",
+	"THEx_WRBUF_MISS",
+	"THEx_WRBUF_NO_FREE_LINE_STALL",
+	"THEx_WRBUF_NO_AXI_ID_STALL",
+	"THEx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"THEx_UTLB_TRANS",
+	"THEx_UTLB_TRANS_HIT",
+	"THEx_UTLB_TRANS_STALL",
+	"THEx_UTLB_TRANS_MISS_DELAY",
+	"THEx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"THEx_FRAG_ACTIVE",
+	"THEx_FRAG_PRIMITIVES",
+	"THEx_FRAG_PRIM_RAST",
+	"THEx_FRAG_FPK_ACTIVE",
+	"THEx_FRAG_STARVING",
+	"THEx_FRAG_WARPS",
+	"THEx_FRAG_PARTIAL_WARPS",
+	"THEx_FRAG_QUADS_RAST",
+	"THEx_FRAG_QUADS_EZS_TEST",
+	"THEx_FRAG_QUADS_EZS_UPDATE",
+	"THEx_FRAG_QUADS_EZS_KILL",
+	"THEx_FRAG_LZS_TEST",
+	"THEx_FRAG_LZS_KILL",
+	"",
+	"THEx_FRAG_PTILES",
+	"THEx_FRAG_TRANS_ELIM",
+	"THEx_QUAD_FPK_KILLER",
+	"",
+	"THEx_COMPUTE_ACTIVE",
+	"THEx_COMPUTE_TASKS",
+	"THEx_COMPUTE_WARPS",
+	"THEx_COMPUTE_STARVING",
+	"THEx_EXEC_CORE_ACTIVE",
+	"THEx_EXEC_ACTIVE",
+	"THEx_EXEC_INSTR_COUNT",
+	"THEx_EXEC_INSTR_DIVERGED",
+	"THEx_EXEC_INSTR_STARVING",
+	"THEx_ARITH_INSTR_SINGLE_FMA",
+	"THEx_ARITH_INSTR_DOUBLE",
+	"THEx_ARITH_INSTR_MSG",
+	"THEx_ARITH_INSTR_MSG_ONLY",
+	"THEx_TEX_INSTR",
+	"THEx_TEX_INSTR_MIPMAP",
+	"THEx_TEX_INSTR_COMPRESSED",
+	"THEx_TEX_INSTR_3D",
+	"THEx_TEX_INSTR_TRILINEAR",
+	"THEx_TEX_COORD_ISSUE",
+	"THEx_TEX_COORD_STALL",
+	"THEx_TEX_STARVE_CACHE",
+	"THEx_TEX_STARVE_FILTER",
+	"THEx_LS_MEM_READ_FULL",
+	"THEx_LS_MEM_READ_SHORT",
+	"THEx_LS_MEM_WRITE_FULL",
+	"THEx_LS_MEM_WRITE_SHORT",
+	"THEx_LS_MEM_ATOMIC",
+	"THEx_VARY_INSTR",
+	"THEx_VARY_SLOT_32",
+	"THEx_VARY_SLOT_16",
+	"THEx_ATTR_INSTR",
+	"THEx_ARITH_INSTR_FP_MUL",
+	"THEx_BEATS_RD_FTC",
+	"THEx_BEATS_RD_FTC_EXT",
+	"THEx_BEATS_RD_LSC",
+	"THEx_BEATS_RD_LSC_EXT",
+	"THEx_BEATS_RD_TEX",
+	"THEx_BEATS_RD_TEX_EXT",
+	"THEx_BEATS_RD_OTHER",
+	"THEx_BEATS_WR_LSC",
+	"THEx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"THEx_L2_RD_MSG_IN",
+	"THEx_L2_RD_MSG_IN_STALL",
+	"THEx_L2_WR_MSG_IN",
+	"THEx_L2_WR_MSG_IN_STALL",
+	"THEx_L2_SNP_MSG_IN",
+	"THEx_L2_SNP_MSG_IN_STALL",
+	"THEx_L2_RD_MSG_OUT",
+	"THEx_L2_RD_MSG_OUT_STALL",
+	"THEx_L2_WR_MSG_OUT",
+	"THEx_L2_ANY_LOOKUP",
+	"THEx_L2_READ_LOOKUP",
+	"THEx_L2_WRITE_LOOKUP",
+	"THEx_L2_EXT_SNOOP_LOOKUP",
+	"THEx_L2_EXT_READ",
+	"THEx_L2_EXT_READ_NOSNP",
+	"THEx_L2_EXT_READ_UNIQUE",
+	"THEx_L2_EXT_READ_BEATS",
+	"THEx_L2_EXT_AR_STALL",
+	"THEx_L2_EXT_AR_CNT_Q1",
+	"THEx_L2_EXT_AR_CNT_Q2",
+	"THEx_L2_EXT_AR_CNT_Q3",
+	"THEx_L2_EXT_RRESP_0_127",
+	"THEx_L2_EXT_RRESP_128_191",
+	"THEx_L2_EXT_RRESP_192_255",
+	"THEx_L2_EXT_RRESP_256_319",
+	"THEx_L2_EXT_RRESP_320_383",
+	"THEx_L2_EXT_WRITE",
+	"THEx_L2_EXT_WRITE_NOSNP_FULL",
+	"THEx_L2_EXT_WRITE_NOSNP_PTL",
+	"THEx_L2_EXT_WRITE_SNP_FULL",
+	"THEx_L2_EXT_WRITE_SNP_PTL",
+	"THEx_L2_EXT_WRITE_BEATS",
+	"THEx_L2_EXT_W_STALL",
+	"THEx_L2_EXT_AW_CNT_Q1",
+	"THEx_L2_EXT_AW_CNT_Q2",
+	"THEx_L2_EXT_AW_CNT_Q3",
+	"THEx_L2_EXT_SNOOP",
+	"THEx_L2_EXT_SNOOP_STALL",
+	"THEx_L2_EXT_SNOOP_RESP_CLEAN",
+	"THEx_L2_EXT_SNOOP_RESP_DATA",
+	"THEx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
new file mode 100644
index 0000000..5ea0677
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+
+static const char * const hardware_counters_mali_tMIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MESSAGES_SENT",
+	"TMIx_MESSAGES_RECEIVED",
+	"TMIx_GPU_ACTIVE",
+	"TMIx_IRQ_ACTIVE",
+	"TMIx_JS0_JOBS",
+	"TMIx_JS0_TASKS",
+	"TMIx_JS0_ACTIVE",
+	"",
+	"TMIx_JS0_WAIT_READ",
+	"TMIx_JS0_WAIT_ISSUE",
+	"TMIx_JS0_WAIT_DEPEND",
+	"TMIx_JS0_WAIT_FINISH",
+	"TMIx_JS1_JOBS",
+	"TMIx_JS1_TASKS",
+	"TMIx_JS1_ACTIVE",
+	"",
+	"TMIx_JS1_WAIT_READ",
+	"TMIx_JS1_WAIT_ISSUE",
+	"TMIx_JS1_WAIT_DEPEND",
+	"TMIx_JS1_WAIT_FINISH",
+	"TMIx_JS2_JOBS",
+	"TMIx_JS2_TASKS",
+	"TMIx_JS2_ACTIVE",
+	"",
+	"TMIx_JS2_WAIT_READ",
+	"TMIx_JS2_WAIT_ISSUE",
+	"TMIx_JS2_WAIT_DEPEND",
+	"TMIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_TILER_ACTIVE",
+	"TMIx_JOBS_PROCESSED",
+	"TMIx_TRIANGLES",
+	"TMIx_LINES",
+	"TMIx_POINTS",
+	"TMIx_FRONT_FACING",
+	"TMIx_BACK_FACING",
+	"TMIx_PRIM_VISIBLE",
+	"TMIx_PRIM_CULLED",
+	"TMIx_PRIM_CLIPPED",
+	"TMIx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"TMIx_BUS_READ",
+	"",
+	"TMIx_BUS_WRITE",
+	"TMIx_LOADING_DESC",
+	"TMIx_IDVS_POS_SHAD_REQ",
+	"TMIx_IDVS_POS_SHAD_WAIT",
+	"TMIx_IDVS_POS_SHAD_STALL",
+	"TMIx_IDVS_POS_FIFO_FULL",
+	"TMIx_PREFETCH_STALL",
+	"TMIx_VCACHE_HIT",
+	"TMIx_VCACHE_MISS",
+	"TMIx_VCACHE_LINE_WAIT",
+	"TMIx_VFETCH_POS_READ_WAIT",
+	"TMIx_VFETCH_VERTEX_WAIT",
+	"TMIx_VFETCH_STALL",
+	"TMIx_PRIMASSY_STALL",
+	"TMIx_BBOX_GEN_STALL",
+	"TMIx_IDVS_VBU_HIT",
+	"TMIx_IDVS_VBU_MISS",
+	"TMIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TMIx_IDVS_VAR_SHAD_REQ",
+	"TMIx_IDVS_VAR_SHAD_STALL",
+	"TMIx_BINNER_STALL",
+	"TMIx_ITER_STALL",
+	"TMIx_COMPRESS_MISS",
+	"TMIx_COMPRESS_STALL",
+	"TMIx_PCACHE_HIT",
+	"TMIx_PCACHE_MISS",
+	"TMIx_PCACHE_MISS_STALL",
+	"TMIx_PCACHE_EVICT_STALL",
+	"TMIx_PMGR_PTR_WR_STALL",
+	"TMIx_PMGR_PTR_RD_STALL",
+	"TMIx_PMGR_CMD_WR_STALL",
+	"TMIx_WRBUF_ACTIVE",
+	"TMIx_WRBUF_HIT",
+	"TMIx_WRBUF_MISS",
+	"TMIx_WRBUF_NO_FREE_LINE_STALL",
+	"TMIx_WRBUF_NO_AXI_ID_STALL",
+	"TMIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TMIx_UTLB_TRANS",
+	"TMIx_UTLB_TRANS_HIT",
+	"TMIx_UTLB_TRANS_STALL",
+	"TMIx_UTLB_TRANS_MISS_DELAY",
+	"TMIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_FRAG_ACTIVE",
+	"TMIx_FRAG_PRIMITIVES",
+	"TMIx_FRAG_PRIM_RAST",
+	"TMIx_FRAG_FPK_ACTIVE",
+	"TMIx_FRAG_STARVING",
+	"TMIx_FRAG_WARPS",
+	"TMIx_FRAG_PARTIAL_WARPS",
+	"TMIx_FRAG_QUADS_RAST",
+	"TMIx_FRAG_QUADS_EZS_TEST",
+	"TMIx_FRAG_QUADS_EZS_UPDATE",
+	"TMIx_FRAG_QUADS_EZS_KILL",
+	"TMIx_FRAG_LZS_TEST",
+	"TMIx_FRAG_LZS_KILL",
+	"",
+	"TMIx_FRAG_PTILES",
+	"TMIx_FRAG_TRANS_ELIM",
+	"TMIx_QUAD_FPK_KILLER",
+	"",
+	"TMIx_COMPUTE_ACTIVE",
+	"TMIx_COMPUTE_TASKS",
+	"TMIx_COMPUTE_WARPS",
+	"TMIx_COMPUTE_STARVING",
+	"TMIx_EXEC_CORE_ACTIVE",
+	"TMIx_EXEC_ACTIVE",
+	"TMIx_EXEC_INSTR_COUNT",
+	"TMIx_EXEC_INSTR_DIVERGED",
+	"TMIx_EXEC_INSTR_STARVING",
+	"TMIx_ARITH_INSTR_SINGLE_FMA",
+	"TMIx_ARITH_INSTR_DOUBLE",
+	"TMIx_ARITH_INSTR_MSG",
+	"TMIx_ARITH_INSTR_MSG_ONLY",
+	"TMIx_TEX_INSTR",
+	"TMIx_TEX_INSTR_MIPMAP",
+	"TMIx_TEX_INSTR_COMPRESSED",
+	"TMIx_TEX_INSTR_3D",
+	"TMIx_TEX_INSTR_TRILINEAR",
+	"TMIx_TEX_COORD_ISSUE",
+	"TMIx_TEX_COORD_STALL",
+	"TMIx_TEX_STARVE_CACHE",
+	"TMIx_TEX_STARVE_FILTER",
+	"TMIx_LS_MEM_READ_FULL",
+	"TMIx_LS_MEM_READ_SHORT",
+	"TMIx_LS_MEM_WRITE_FULL",
+	"TMIx_LS_MEM_WRITE_SHORT",
+	"TMIx_LS_MEM_ATOMIC",
+	"TMIx_VARY_INSTR",
+	"TMIx_VARY_SLOT_32",
+	"TMIx_VARY_SLOT_16",
+	"TMIx_ATTR_INSTR",
+	"TMIx_ARITH_INSTR_FP_MUL",
+	"TMIx_BEATS_RD_FTC",
+	"TMIx_BEATS_RD_FTC_EXT",
+	"TMIx_BEATS_RD_LSC",
+	"TMIx_BEATS_RD_LSC_EXT",
+	"TMIx_BEATS_RD_TEX",
+	"TMIx_BEATS_RD_TEX_EXT",
+	"TMIx_BEATS_RD_OTHER",
+	"TMIx_BEATS_WR_LSC",
+	"TMIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_RD_MSG_IN",
+	"TMIx_L2_RD_MSG_IN_STALL",
+	"TMIx_L2_WR_MSG_IN",
+	"TMIx_L2_WR_MSG_IN_STALL",
+	"TMIx_L2_SNP_MSG_IN",
+	"TMIx_L2_SNP_MSG_IN_STALL",
+	"TMIx_L2_RD_MSG_OUT",
+	"TMIx_L2_RD_MSG_OUT_STALL",
+	"TMIx_L2_WR_MSG_OUT",
+	"TMIx_L2_ANY_LOOKUP",
+	"TMIx_L2_READ_LOOKUP",
+	"TMIx_L2_WRITE_LOOKUP",
+	"TMIx_L2_EXT_SNOOP_LOOKUP",
+	"TMIx_L2_EXT_READ",
+	"TMIx_L2_EXT_READ_NOSNP",
+	"TMIx_L2_EXT_READ_UNIQUE",
+	"TMIx_L2_EXT_READ_BEATS",
+	"TMIx_L2_EXT_AR_STALL",
+	"TMIx_L2_EXT_AR_CNT_Q1",
+	"TMIx_L2_EXT_AR_CNT_Q2",
+	"TMIx_L2_EXT_AR_CNT_Q3",
+	"TMIx_L2_EXT_RRESP_0_127",
+	"TMIx_L2_EXT_RRESP_128_191",
+	"TMIx_L2_EXT_RRESP_192_255",
+	"TMIx_L2_EXT_RRESP_256_319",
+	"TMIx_L2_EXT_RRESP_320_383",
+	"TMIx_L2_EXT_WRITE",
+	"TMIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TMIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TMIx_L2_EXT_WRITE_SNP_FULL",
+	"TMIx_L2_EXT_WRITE_SNP_PTL",
+	"TMIx_L2_EXT_WRITE_BEATS",
+	"TMIx_L2_EXT_W_STALL",
+	"TMIx_L2_EXT_AW_CNT_Q1",
+	"TMIx_L2_EXT_AW_CNT_Q2",
+	"TMIx_L2_EXT_AW_CNT_Q3",
+	"TMIx_L2_EXT_SNOOP",
+	"TMIx_L2_EXT_SNOOP_STALL",
+	"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TMIx_L2_EXT_SNOOP_RESP_DATA",
+	"TMIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100644
index 0000000..a3377b2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,113 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100755
index 0000000..6df0a1c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100755
index 0000000..7045693
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100755
index 0000000..7f77dba
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,314 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
+{
+	kbase_gpu_clk_speed_func get_gpu_speed_mhz;
+	u32 gpu_speed_mhz;
+	int rc = 1;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kbase_props);
+
+	/* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
+	 * If that function fails, or the function is not provided by the system integrator, we report the maximum
+	 * GPU speed as specified by GPU_FREQ_KHZ_MAX.
+	 */
+	get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
+	if (get_gpu_speed_mhz != NULL) {
+		rc = get_gpu_speed_mhz(&gpu_speed_mhz);
+#ifdef CONFIG_MALI_DEBUG
+		/* Issue a warning message when the reported GPU speed falls outside the min/max range */
+		if (rc == 0) {
+			u32 gpu_speed_khz = gpu_speed_mhz * 1000;
+
+			if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
+					gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
+				dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
+						(unsigned long)gpu_speed_khz,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+		}
+#endif				/* CONFIG_MALI_DEBUG */
+	}
+	if (kctx->kbdev->clock) {
+		gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
+		rc = 0;
+	}
+	if (rc != 0)
+		gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
+
+	kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
+
+	memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
+
+	/* Before API 8.2 they expect L3 cache info here, which was always 0 */
+	if (kctx->api_version < KBASE_API_VERSION(8, 2))
+		kbase_props->props.raw_props.suspend_size = 0;
+
+	return 0;
+}
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100755
index 0000000..f3c95cc
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * @brief Provide GPU properties to userside through UKU call.
+ *
+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
+ *
+ * @param kctx		The struct kbase_context structure
+ * @param kbase_props	A copy of the struct kbase_uk_gpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100755
index 0000000..f42e91b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/**
+	 * Implementation specific irq throttle value (us), should be adjusted during integration.
+	 */
+	int irq_throttle_time_us;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100755
index 0000000..1d7e5e9
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,272 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			features = base_hw_features_tHEx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id) {
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1):
+				issues = base_hw_issues_tMIx_r0p0_05dev0;
+				break;
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2):
+				issues = base_hw_issues_tMIx_r0p0;
+				break;
+			default:
+				if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_TMIX) {
+					issues = base_hw_issues_tMIx_r0p0;
+				} else if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_THEX) {
+					issues = base_hw_issues_tHEx_r0p0;
+				} else {
+					dev_err(kbdev->dev,
+						"Unknown GPU ID %x", gpu_id);
+					return -EINVAL;
+				}
+			}
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			case GPU_ID2_PRODUCT_THEX:
+				issues = base_hw_issues_model_tHEx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT);
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100755
index 0000000..fce7d29
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * @brief Set the HW issues mask depending on the GPU ID
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100755
index 0000000..b09be99
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100755
index 0000000..0acf297
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/* The hwaccess_lock (a spinlock) must be held when accessing this structure */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx;
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100755
index 0000000..cf8a813
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100755
index 0000000..5de2b75
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @setup:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100755
index 0000000..c2c3909
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,377 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_backend_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_find_free_address_space() - Find a free address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * If no address spaces are currently free, then this function can evict an
+ * idle context from the runpool, freeing up the address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_gpu_release_free_address_space()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_free_address_space() - Release an address space.
+ * @kbdev:	Device pointer
+ * @as_nr:	Address space to release
+ *
+ * The address space must have been returned by
+ * kbase_gpu_find_free_address_space().
+ */
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+						int as_nr);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned.
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ * @affinity:      Affinity of atom
+ * @coreref_state: Coreref state of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ *				  @js
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg updating timeouts of
+ * currently running atoms).
+ */
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs and don't emit messages into
+ * the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ */
+void kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset.
+ */
+bool kbase_reset_gpu_active(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100755
index 0000000..bae7c0d
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+int set_policy_by_name(struct kbase_device *kbdev, const char *name);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100755
index 0000000..89d26ea
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100755
index 0000000..cf7bf1b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
new file mode 100755
index 0000000..c579d0a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
@@ -0,0 +1,431 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/of.h>
+#include <linux/sysfs.h>
+
+#include <mali_kbase.h>
+
+#define NR_IPA_GROUPS 8
+
+struct kbase_ipa_context;
+
+/**
+ * struct ipa_group - represents a single IPA group
+ * @name:               name of the IPA group
+ * @capacitance:        capacitance constant for IPA group
+ * @calc_power:         function to calculate power for IPA group
+ */
+struct ipa_group {
+	const char *name;
+	u32 capacitance;
+	u32 (*calc_power)(struct kbase_ipa_context *,
+			struct ipa_group *);
+};
+
+#include <mali_kbase_ipa_tables.h>
+
+/**
+ * struct kbase_ipa_context - IPA context per device
+ * @kbdev:              pointer to kbase device
+ * @groups:             array of IPA groups for this context
+ * @vinstr_cli:         vinstr client handle
+ * @vinstr_buffer:      buffer to dump hardware counters onto
+ * @ipa_lock:           protects the entire IPA context
+ */
+struct kbase_ipa_context {
+	struct kbase_device *kbdev;
+	struct ipa_group groups[NR_IPA_GROUPS];
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct mutex ipa_lock;
+};
+
+static ssize_t show_ipa_group(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_ipa_context *ctx = kbdev->ipa_ctx;
+	ssize_t count = -EINVAL;
+	size_t i;
+
+	mutex_lock(&ctx->ipa_lock);
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		if (!strcmp(ctx->groups[i].name, attr->attr.name)) {
+			count = snprintf(buf, PAGE_SIZE, "%lu\n",
+				(unsigned long)ctx->groups[i].capacitance);
+			break;
+		}
+	}
+	mutex_unlock(&ctx->ipa_lock);
+	return count;
+}
+
+static ssize_t set_ipa_group(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_ipa_context *ctx = kbdev->ipa_ctx;
+	unsigned long capacitance;
+	size_t i;
+	int err;
+
+	err = kstrtoul(buf, 0, &capacitance);
+	if (err < 0)
+		return err;
+	if (capacitance > U32_MAX)
+		return -ERANGE;
+
+	mutex_lock(&ctx->ipa_lock);
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		if (!strcmp(ctx->groups[i].name, attr->attr.name)) {
+			ctx->groups[i].capacitance = capacitance;
+			mutex_unlock(&ctx->ipa_lock);
+			return count;
+		}
+	}
+	mutex_unlock(&ctx->ipa_lock);
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(group0, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group1, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group2, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group3, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group4, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group5, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group6, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group7, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+
+static struct attribute *kbase_ipa_attrs[] = {
+	&dev_attr_group0.attr,
+	&dev_attr_group1.attr,
+	&dev_attr_group2.attr,
+	&dev_attr_group3.attr,
+	&dev_attr_group4.attr,
+	&dev_attr_group5.attr,
+	&dev_attr_group6.attr,
+	&dev_attr_group7.attr,
+	NULL,
+};
+
+static struct attribute_group kbase_ipa_attr_group = {
+	.name = "ipa",
+	.attrs = kbase_ipa_attrs,
+};
+
+static void init_ipa_groups(struct kbase_ipa_context *ctx)
+{
+	memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups));
+}
+
+#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	struct device_node *np, *child;
+	struct ipa_group *group;
+	size_t nr_groups;
+	size_t i;
+	int err;
+
+	np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups");
+	if (!np)
+		return 0;
+
+	nr_groups = 0;
+	for_each_available_child_of_node(np, child)
+		nr_groups++;
+	if (!nr_groups || nr_groups > ARRAY_SIZE(ctx->groups)) {
+		dev_err(kbdev->dev, "invalid number of IPA groups: %zu", nr_groups);
+		err = -EINVAL;
+		goto err0;
+	}
+
+	for_each_available_child_of_node(np, child) {
+		const char *name;
+		u32 capacitance;
+
+		name = of_get_property(child, "label", NULL);
+		if (!name) {
+			dev_err(kbdev->dev, "label missing for IPA group");
+			err = -EINVAL;
+			goto err0;
+		}
+		err = of_property_read_u32(child, "capacitance",
+				&capacitance);
+		if (err < 0) {
+			dev_err(kbdev->dev, "capacitance missing for IPA group");
+			goto err0;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+			group = &ctx->groups[i];
+			if (!strcmp(group->name, name)) {
+				group->capacitance = capacitance;
+				break;
+			}
+		}
+	}
+
+	of_node_put(np);
+	return 0;
+err0:
+	of_node_put(np);
+	return err;
+}
+#else
+static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
+{
+	return 0;
+}
+#endif
+
+static int reset_ipa_groups(struct kbase_ipa_context *ctx)
+{
+	init_ipa_groups(ctx);
+	return update_ipa_groups_from_dt(ctx);
+}
+
+static inline u32 read_hwcnt(struct kbase_ipa_context *ctx,
+	u32 offset)
+{
+	u8 *p = ctx->vinstr_buffer;
+
+	return *(u32 *)&p[offset];
+}
+
+static inline u32 add_saturate(u32 a, u32 b)
+{
+	if (U32_MAX - a < b)
+		return U32_MAX;
+	return a + b;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c'
+ * across all shader cores.
+ */
+static u32 calc_power_sc_single(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	u64 core_mask;
+	u32 base = 0, r = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			u64 n = read_hwcnt(ctx, base + c);
+			u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+			u32 s = group->capacitance;
+
+			r = add_saturate(r, div_u64(n * s, d));
+		}
+		base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+		core_mask >>= 1;
+	}
+	return r;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c1'
+ * and `c2' across all shader cores.
+ */
+static u32 calc_power_sc_double(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c1, u32 c2)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	u64 core_mask;
+	u32 base = 0, r = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			u64 n = read_hwcnt(ctx, base + c1);
+			u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+			u32 s = group->capacitance;
+
+			r = add_saturate(r, div_u64(n * s, d));
+			n = read_hwcnt(ctx, base + c2);
+			r = add_saturate(r, div_u64(n * s, d));
+		}
+		base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+		core_mask >>= 1;
+	}
+	return r;
+}
+
+static u32 calc_power_single(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c)
+{
+	u64 n = read_hwcnt(ctx, c);
+	u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+	u32 s = group->capacitance;
+
+	return div_u64(n * s, d);
+}
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_single(ctx, group, L2_ANY_LOOKUP);
+}
+
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_single(ctx, group, TILER_ACTIVE);
+}
+
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, FRAG_ACTIVE);
+}
+
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_double(ctx, group, VARY_SLOT_32,
+			VARY_SLOT_16);
+}
+
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, TEX_COORD_ISSUE);
+}
+
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, EXEC_INSTR_COUNT);
+}
+
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_double(ctx, group, BEATS_RD_LSC,
+			BEATS_WR_LSC);
+}
+
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, EXEC_CORE_ACTIVE);
+}
+
+static int attach_vinstr(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	size_t dump_size;
+
+	dump_size = kbase_vinstr_dump_size(kbdev);
+	ctx->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!ctx->vinstr_buffer) {
+		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+		return -1;
+	}
+
+	setup.jm_bm = ~0u;
+	setup.shader_bm = ~0u;
+	setup.tiler_bm = ~0u;
+	setup.mmu_l2_bm = ~0u;
+	ctx->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
+			&setup, ctx->vinstr_buffer);
+	if (!ctx->vinstr_cli) {
+		dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
+		kfree(ctx->vinstr_buffer);
+		ctx->vinstr_buffer = NULL;
+		return -1;
+	}
+	return 0;
+}
+
+static void detach_vinstr(struct kbase_ipa_context *ctx)
+{
+	if (ctx->vinstr_cli)
+		kbase_vinstr_detach_client(ctx->vinstr_cli);
+	ctx->vinstr_cli = NULL;
+	kfree(ctx->vinstr_buffer);
+	ctx->vinstr_buffer = NULL;
+}
+
+struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_context *ctx;
+	int err;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	mutex_init(&ctx->ipa_lock);
+	ctx->kbdev = kbdev;
+
+	err = reset_ipa_groups(ctx);
+	if (err < 0)
+		goto err0;
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
+	if (err < 0)
+		goto err0;
+
+	return ctx;
+err0:
+	kfree(ctx);
+	return NULL;
+}
+
+void kbase_ipa_term(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+
+	detach_vinstr(ctx);
+	sysfs_remove_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
+	kfree(ctx);
+}
+
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err)
+{
+	struct ipa_group *group;
+	u32 power = 0;
+	size_t i;
+
+	mutex_lock(&ctx->ipa_lock);
+	if (!ctx->vinstr_cli) {
+		*err = attach_vinstr(ctx);
+		if (*err < 0)
+			goto err0;
+	}
+	*err = kbase_vinstr_hwc_dump(ctx->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL);
+	if (*err)
+		goto err0;
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		group = &ctx->groups[i];
+		power = add_saturate(power, group->calc_power(ctx, group));
+	}
+err0:
+	mutex_unlock(&ctx->ipa_lock);
+	return power;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_dynamic_power);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
new file mode 100755
index 0000000..e2234d1
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
@@ -0,0 +1,41 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+struct kbase_ipa_context;
+
+/**
+ * kbase_ipa_init - initialize the kbase ipa core
+ * @kbdev:      kbase device
+ *
+ * Return:      pointer to the IPA context or NULL on failure
+ */
+struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_term - terminate the kbase ipa core
+ * @ctx:        pointer to the IPA context
+ */
+void kbase_ipa_term(struct kbase_ipa_context *ctx);
+
+/**
+ * kbase_ipa_dynamic_power - calculate power
+ * @ctx:        pointer to the IPA context
+ * @err:        0 on success, negative on failure
+ *
+ * Return:      returns power consumption as mw @ 1GHz @ 1V
+ */
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
new file mode 100644
index 0000000..101abfe
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
@@ -0,0 +1,104 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#define NR_BYTES_PER_CNT  4
+#define NR_CNT_PER_BLOCK 64
+
+#define JM_BASE    (0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define TILER_BASE (1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define MMU_BASE   (2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define SC0_BASE   (3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+
+#define GPU_ACTIVE       (JM_BASE    + NR_BYTES_PER_CNT *  6)
+#define TILER_ACTIVE     (TILER_BASE + NR_BYTES_PER_CNT * 45)
+#define L2_ANY_LOOKUP    (MMU_BASE   + NR_BYTES_PER_CNT * 25)
+#define FRAG_ACTIVE      (SC0_BASE   + NR_BYTES_PER_CNT *  4)
+#define EXEC_CORE_ACTIVE (SC0_BASE   + NR_BYTES_PER_CNT * 26)
+#define EXEC_INSTR_COUNT (SC0_BASE   + NR_BYTES_PER_CNT * 28)
+#define TEX_COORD_ISSUE  (SC0_BASE   + NR_BYTES_PER_CNT * 40)
+#define VARY_SLOT_32     (SC0_BASE   + NR_BYTES_PER_CNT * 50)
+#define VARY_SLOT_16     (SC0_BASE   + NR_BYTES_PER_CNT * 51)
+#define BEATS_RD_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 56)
+#define BEATS_WR_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 61)
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+
+static struct ipa_group ipa_groups_def[] = {
+	/* L2 */
+	{
+		.name = "group0",
+		.capacitance = 687,
+		.calc_power = calc_power_group0,
+	},
+	/* TILER */
+	{
+		.name = "group1",
+		.capacitance = 0,
+		.calc_power = calc_power_group1,
+	},
+	/* FRAG */
+	{
+		.name = "group2",
+		.capacitance = 23,
+		.calc_power = calc_power_group2,
+	},
+	/* VARY */
+	{
+		.name = "group3",
+		.capacitance = 108,
+		.calc_power = calc_power_group3,
+	},
+	/* TEX */
+	{
+		.name = "group4",
+		.capacitance = 128,
+		.calc_power = calc_power_group4,
+	},
+	/* EXEC INSTR */
+	{
+		.name = "group5",
+		.capacitance = 249,
+		.calc_power = calc_power_group5,
+	},
+	/* LSC */
+	{
+		.name = "group6",
+		.capacitance = 0,
+		.calc_power = calc_power_group6,
+	},
+	/* EXEC OVERHEAD */
+	{
+		.name = "group7",
+		.capacitance = 29,
+		.calc_power = calc_power_group7,
+	},
+};
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100755
index 0000000..81952e2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1880 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <mali_kbase_uku.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tlstream.h>
+
+#include "mali_kbase_dma_fence.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
+{
+#ifdef CONFIG_COMPAT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return compat_ptr(p->compat_value);
+#endif
+	return p->value;
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
+#ifdef CONFIG_KDS
+
+/* Add the katom to the kds waiting list.
+ * Atoms must be added to the waiting list after a successful call to kds_async_waitall.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	list_add_tail(&katom->node, &kctx->waiting_kds_resource);
+}
+
+/* Remove the katom from the kds waiting list.
+ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync.
+ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	list_del(&katom->node);
+}
+
+static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = (struct kbase_jd_atom *)callback_parameter;
+	KBASE_DEBUG_ASSERT(katom);
+
+	ctx = &katom->kctx->jctx;
+
+	/* If KDS resource has already been satisfied (e.g. due to zapping)
+	 * do nothing.
+	 */
+	mutex_lock(&ctx->lock);
+	if (!katom->kds_dep_satisfied) {
+		katom->kds_dep_satisfied = true;
+		kbase_jd_dep_clear_locked(katom);
+	}
+	mutex_unlock(&ctx->lock);
+}
+
+static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 *  there is a race between job completion and cancellation */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+#endif				/* CONFIG_KDS */
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_KDS
+	if (katom->kds_rset) {
+		struct kbase_jd_context *jctx = &katom->kctx->jctx;
+
+		/*
+		 * As the atom is no longer waiting, remove it from
+		 * the waiting list.
+		 */
+
+		mutex_lock(&jctx->lock);
+		kbase_jd_kds_waiters_remove(katom);
+		mutex_unlock(&jctx->lock);
+
+		/* Release the kds resource or cancel if zapping */
+		kds_resource_set_release_sync(&katom->kds_rset);
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_KDS
+	/* Prevent the KDS resource from triggering the atom in case of zapping */
+	if (katom->kds_rset)
+		katom->kds_dep_satisfied = true;
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_KDS
+	u32 kds_res_count = 0;
+	struct kds_resource **kds_resources = NULL;
+	unsigned long *kds_access_bitmap = NULL;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.dma_fence_resv_count = 0,
+	};
+#endif
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, &user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+#ifdef CONFIG_KDS
+	/* assume we have to wait for all */
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (!kds_resources) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres),
+				    sizeof(unsigned long),
+				    GFP_KERNEL);
+	if (!kds_access_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	info.resv_objs = kmalloc_array(katom->nr_extres,
+				       sizeof(struct reservation_object *),
+				       GFP_KERNEL);
+	if (!info.resv_objs) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	info.dma_fence_excl_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					     sizeof(unsigned long),
+					     GFP_KERNEL);
+	if (!info.dma_fence_excl_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
+
+		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm
+#ifdef CONFIG_KDS
+				, &kds_res_count, kds_resources,
+				kds_access_bitmap, exclusive
+#endif
+				);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct reservation_object *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock before we call into kds */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_KDS
+	if (kds_res_count) {
+		int wait_failed;
+
+		/* We have resources to wait for with kds */
+		katom->kds_dep_satisfied = false;
+
+		wait_failed = kds_async_waitall(&katom->kds_rset,
+				&katom->kctx->jctx.kds_cb, katom, NULL,
+				kds_res_count, kds_access_bitmap,
+				kds_resources);
+
+		if (wait_failed)
+			goto failed_kds_setup;
+		else
+			kbase_jd_kds_waiters_add(katom);
+	} else {
+		/* Nothing to wait for, so kds dep met */
+		katom->kds_dep_satisfied = true;
+	}
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (info.dma_fence_resv_count) {
+		int ret;
+
+		ret = kbase_dma_fence_wait(katom, &info);
+		if (ret < 0)
+			goto failed_dma_fence_setup;
+	}
+
+	kfree(info.resv_objs);
+	kfree(info.dma_fence_excl_bitmap);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
+#ifdef CONFIG_KDS
+	/* If we are here, dma_fence setup failed but KDS didn't.
+	 * Revert KDS setup if any.
+	 */
+	if (kds_res_count) {
+		mutex_unlock(&katom->kctx->jctx.lock);
+		kds_resource_set_release_sync(&katom->kds_rset);
+		mutex_lock(&katom->kctx->jctx.lock);
+
+		kbase_jd_kds_waiters_remove(katom);
+		katom->kds_dep_satisfied = true;
+	}
+#endif /* CONFIG_KDS */
+#endif /* CONFIG_MALI_DMA_FENCE */
+#ifdef CONFIG_KDS
+failed_kds_setup:
+#endif
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_KDS
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	kfree(info.resv_objs);
+	kfree(info.dma_fence_excl_bitmap);
+#endif
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d, bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		struct kbase_jd_atom *other_dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_KDS
+			if (!dep_atom->kds_dep_satisfied) {
+				/* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
+				 * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up
+				 */
+				dep_atom->kds_dep_satisfied = true;
+			}
+#endif
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		other_dep_atom = (struct kbase_jd_atom *)
+			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
+
+		if (!dep_atom->in_jd_list && (!other_dep_atom ||
+				(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
+				!dep_atom->will_fail_event_code &&
+				!other_dep_atom->will_fail_event_code))) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = atomic_read(&dep_atom->dma_fence.dep_count);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+#ifdef CONFIG_KDS
+			dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied;
+#endif
+
+			if (dep_satisfied) {
+				dep_atom->in_jd_list = true;
+				list_add_tail(&dep_atom->jd_item, out_list);
+			}
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/**
+ * is_dep_valid - Validate that a dependency is valid for early dependency
+ *                submission
+ * @katom: Dependency atom to validate
+ *
+ * A dependency is valid if any of the following are true :
+ * - It does not exist (a non-existent dependency does not block submission)
+ * - It is in the job scheduler
+ * - It has completed, does not have a failure event code, and has not been
+ *   marked to fail in the future
+ *
+ * Return: true if valid, false otherwise
+ */
+static bool is_dep_valid(struct kbase_jd_atom *katom)
+{
+	/* If there's no dependency then this is 'valid' from the perspective of
+	 * early dependency submission */
+	if (!katom)
+		return true;
+
+	/* Dependency must have reached the job scheduler */
+	if (katom->status < KBASE_JD_ATOM_STATE_IN_JS)
+		return false;
+
+	/* If dependency has completed and has failed or will fail then it is
+	 * not valid */
+	if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+			(katom->event_code != BASE_JD_EVENT_DONE ||
+			katom->will_fail_event_code))
+		return false;
+
+	return true;
+}
+
+static void jd_try_submitting_deps(struct list_head *out_list,
+		struct kbase_jd_atom *node)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *pos;
+
+		list_for_each(pos, &node->dep_head[i]) {
+			struct kbase_jd_atom *dep_atom = list_entry(pos,
+					struct kbase_jd_atom, dep_item[i]);
+
+			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
+				/*Check if atom deps look sane*/
+				bool dep0_valid = is_dep_valid(
+						dep_atom->dep[0].atom);
+				bool dep1_valid = is_dep_valid(
+						dep_atom->dep[1].atom);
+				bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+				int dep_count;
+
+				dep_count = atomic_read(
+						&dep_atom->dma_fence.dep_count);
+				if (likely(dep_count == -1)) {
+					dep_satisfied = true;
+				} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+					dep_satisfied = false;
+				}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#ifdef CONFIG_KDS
+				dep_satisfied = dep_satisfied &&
+						dep_atom->kds_dep_satisfied;
+#endif
+
+				if (dep0_valid && dep1_valid && dep_satisfied) {
+					dep_atom->in_jd_list = true;
+					list_add(&dep_atom->jd_item, out_list);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->jd_item, &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item);
+		list_del(completed_jobs.prev);
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+					kbase_ctx_flag(kctx, KCTX_DYING));
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(runnable_jobs.next);
+			node->in_jd_list = false;
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE) ==
+					BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait soft job
+					 * then remove it from the list of sync
+					 * waiters.
+					 */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						kbasep_remove_waiting_soft_job(node);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				list_add_tail(&node->jd_item, &completed_jobs);
+			} else if (node->status == KBASE_JD_ATOM_STATE_IN_JS &&
+					!node->will_fail_event_code) {
+				/* Node successfully submitted, try submitting
+				 * dependencies as they may now be representable
+				 * in JS */
+				jd_try_submitting_deps(&runnable_jobs, node);
+			}
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->jd_item, completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+	katom->start_timestamp.tv64 = 0;
+	katom->time_spent_us = 0;
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = user_atom->core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->pre_dep = NULL;
+	katom->post_dep = NULL;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
+
+	/* Implicitly sets katom->protected_state.enter as well. */
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+	katom->age = kctx->age_count++;
+
+	INIT_LIST_HEAD(&katom->jd_item);
+#ifdef CONFIG_KDS
+	/* Start by assuming that the KDS dependencies are satisfied,
+	 * kbase_jd_pre_external_resources will correct this if there are dependencies */
+	katom->kds_dep_satisfied = true;
+	katom->kds_rset = NULL;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	atomic_set(&katom->dma_fence.dep_count, -1);
+#endif
+
+	kbase_tlstream_tl_attrib_atom_state(katom, TL_ATOM_STATE_IDLE);
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				kbase_tlstream_tl_new_atom(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				kbase_tlstream_tl_ret_atom_ctx(
+						katom, kctx);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom is going through soft replay or
+			 * will be sent back to user space. Do not record any
+			 * dependencies. */
+			kbase_tlstream_tl_new_atom(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+
+			if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	/* Create a new atom recording all dependencies it was set up with. */
+	kbase_tlstream_tl_new_atom(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	kbase_tlstream_tl_attrib_atom_priority(katom, katom->sched_priority);
+	kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+	for (i = 0; i < 2; i++)
+		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+					&katom->dep[i])) {
+			kbase_tlstream_tl_dep_atom_atom(
+					(void *)kbase_jd_katom_dep_atom(
+						&katom->dep[i]),
+					(void *)katom);
+		} else if (BASE_JD_DEP_TYPE_INVALID !=
+				user_atom->pre_dep[i].dependency_type) {
+			/* Resolved dependency. */
+			int dep_atom_number =
+				user_atom->pre_dep[i].atom_id;
+			struct kbase_jd_atom *dep_atom =
+				&jctx->atoms[dep_atom_number];
+
+			kbase_tlstream_tl_rdep_atom_atom(
+					(void *)dep_atom,
+					(void *)katom);
+		}
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue((u32)kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+#ifdef CONFIG_KDS
+	if (!katom->kds_dep_satisfied) {
+		/* Queue atom due to KDS dependency */
+		ret = false;
+		goto out;
+	}
+#endif				/* CONFIG_KDS */
+
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atomic_read(&katom->dma_fence.dep_count) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom, NULL);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom)
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	void __user *user_addr;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the jobs
+	 * are reported by immediately falling them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	if ((uk6_atom && submit_data->stride !=
+			sizeof(struct base_jd_atom_v2_uk6)) ||
+			submit_data->stride != sizeof(base_jd_atom_v2)) {
+#else
+	if (submit_data->stride != sizeof(base_jd_atom_v2)) {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	user_addr = get_compat_pointer(kctx, &submit_data->addr);
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < submit_data->nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		if (uk6_atom) {
+			struct base_jd_atom_v2_uk6 user_atom_v6;
+			base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA};
+
+			if (copy_from_user(&user_atom_v6, user_addr,
+					sizeof(user_atom_v6))) {
+				err = -EINVAL;
+				KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+					atomic_sub_return(
+					submit_data->nr_atoms - i,
+					&kctx->timeline.jd_atoms_in_flight));
+				break;
+			}
+			/* Convert from UK6 atom format to UK7 format */
+			user_atom.jc = user_atom_v6.jc;
+			user_atom.udata = user_atom_v6.udata;
+			user_atom.extres_list = user_atom_v6.extres_list;
+			user_atom.nr_extres = user_atom_v6.nr_extres;
+			user_atom.core_req = (u32)(user_atom_v6.core_req & 0x7fff);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[0])
+				dep_types[0] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[0],
+					user_atom_v6.pre_dep[0],
+					dep_types[0]);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[1])
+				dep_types[1] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[1],
+					user_atom_v6.pre_dep[1],
+					dep_types[1]);
+
+			user_atom.atom_number = user_atom_v6.atom_number;
+			user_atom.prio = user_atom_v6.prio;
+			user_atom.device_nr = user_atom_v6.device_nr;
+		} else {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		}
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+		if (KBASE_API_VERSION(10, 3) > kctx->api_version)
+			user_atom.core_req = (u32)(user_atom.compat_core_req
+					      & 0x7fff);
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+		user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	kbase_tlstream_tl_attrib_atom_state(katom, TL_ATOM_STATE_DONE);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		context_idle = false;
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * KCTX_ACTIVE will have been set after we marked it as
+		 * inactive, and another pm reference will have been taken, so
+		 * drop our reference. But do not call kbase_jm_idle_ctx(), as
+		 * the context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but KCTX_ACTIVE will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then KCTX_ACTIVE will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * hwaccess_lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
+		    !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * hwaccess_lock. This is not performed if
+			 * KCTX_ACTIVE is set as in that case another pm
+			 * reference has been taken and a fast-start would be
+			 * valid.
+			 */
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE))
+				kbase_jm_idle_ctx(kbdev, kctx);
+			context_idle = true;
+		} else {
+			kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+
+	if (context_idle)
+		kbase_pm_context_idle(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbdev->hwaccess_lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	del_timer_sync(&kctx->soft_job_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, queue);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_KDS
+
+	/* For each job waiting on a kds resource, cancel the wait and force the job to
+	 * complete early, this is done so that we don't leave jobs outstanding waiting
+	 * on kds resources which may never be released when contexts are zapped, resulting
+	 * in a hang.
+	 *
+	 * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held,
+	 * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job.
+	 */
+
+	list_for_each(entry, &kctx->waiting_kds_resource) {
+		katom = list_entry(entry, struct kbase_jd_atom, node);
+
+		kbase_cancel_kds_wait_job(katom);
+	}
+#endif
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+#ifdef CONFIG_KDS
+	int err;
+#endif				/* CONFIG_KDS */
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		kctx->jctx.atoms[i].dma_fence.context = fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+#ifdef CONFIG_KDS
+	err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear);
+	if (0 != err) {
+		mali_err = -EINVAL;
+		goto out2;
+	}
+#endif				/* CONFIG_KDS */
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+#ifdef CONFIG_KDS
+ out2:
+	destroy_workqueue(kctx->jctx.job_done_wq);
+#endif				/* CONFIG_KDS */
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+#ifdef CONFIG_KDS
+	kds_callback_term(&kctx->jctx.kds_cb);
+#endif				/* CONFIG_KDS */
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100755
index 0000000..6437e42
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/seq_file.h>
+
+#include <mali_kbase.h>
+
+#include <mali_kbase_jd_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		seq_printf(sfile,
+				"%i,%u,%u,%u,%u %u,%lli,%llu\n",
+				i, atom->core_req, atom->status, atom->coreref_state,
+				(unsigned)(atom->dep[0].atom ?
+						atom->dep[0].atom - atoms : 0),
+				(unsigned)(atom->dep[1].atom ?
+						atom->dep[1].atom - atoms : 0),
+				(signed long long)start_timestamp,
+				(unsigned long long)(atom->time_spent_us ?
+					atom->time_spent_us * 1000 : start_timestamp)
+				);
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100755
index 0000000..090f816
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+
+#define MALI_JD_DEBUGFS_VERSION 1
+
+/**
+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100755
index 0000000..0c5c6a6
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx;
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->hwaccess.active_kctx == kctx)
+		kbdev->hwaccess.active_kctx = NULL;
+}
+
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		return kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+		return NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100755
index 0000000..a74ee24
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the hwaccess_lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+			struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100755
index 0000000..7aed324
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,2947 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_policy_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return refcnt;
+}
+
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private types
+ */
+enum {
+	JS_DEVDATA_INIT_NONE = 0,
+	JS_DEVDATA_INIT_CONSTANTS = (1 << 0),
+	JS_DEVDATA_INIT_POLICY = (1 << 1),
+	JS_DEVDATA_INIT_ALL = ((1 << 2) - 1)
+};
+
+enum {
+	JS_KCTX_INIT_NONE = 0,
+	JS_KCTX_INIT_CONSTANTS = (1 << 0),
+	JS_KCTX_INIT_POLICY = (1 << 1),
+	JS_KCTX_INIT_ALL = ((1 << 2) - 1)
+};
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the hwaccess_lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		int new_refcnt;
+
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL);
+
+		new_refcnt = ++(js_per_as_data->as_busy_refcount);
+
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, new_refcnt);
+		result = true;
+	}
+
+	return result;
+}
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	return RB_EMPTY_ROOT(&rb->runnable_tree);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a queue and invoke @callback for each entry in the queue, and
+ * remove the entry from the queue.
+ *
+ * If entries are added to the queue while this is running those entries may, or
+ * may not be covered. To ensure that all entries in the buffer have been
+ * enumerated when this function returns jsctx->lock must be held when calling
+ * this function.
+ *
+ * The HW access lock must always be held when calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
+		struct rb_node *node = rb_first(&queue->runnable_tree);
+		struct kbase_jd_atom *entry = rb_entry(node,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		rb_erase(node, &queue->runnable_tree);
+		callback(kctx->kbdev, entry);
+	}
+
+	while (!list_empty(&queue->x_dep_head)) {
+		struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
+				struct kbase_jd_atom, queue);
+
+		list_del(queue->x_dep_head.next);
+
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	struct rb_node *node;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	node = rb_first(&rb->runnable_tree);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
+}
+
+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
+
+static void
+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (*new) {
+		struct kbase_jd_atom *entry = container_of(*new,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		parent = *new;
+		if (LESS_THAN_WRAP(katom->age, entry->age))
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&katom->runnable_tree_node, parent, new);
+	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_tree_add(kctx, katom);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int err;
+	int i;
+	u16 as_present;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(jsdd->init_status == JS_DEVDATA_INIT_NONE);
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* All ASs initially free */
+	jsdd->as_free = as_present;
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	jsdd->cfs_ctx_runtime_init_slices =
+		DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES;
+	jsdd->cfs_ctx_runtime_min_slices =
+		DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES;
+	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_init_slices:%u",
+			jsdd->cfs_ctx_runtime_init_slices);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u",
+			jsdd->cfs_ctx_runtime_min_slices);
+	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
+		atomic_read(&jsdd->soft_job_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS policy's tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	/* setup the number of irq throttle cycles base on given time */
+	{
+		int time_us = kbdev->gpu_props.irq_throttle_time_us;
+		int cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev,
+				time_us);
+
+		atomic_set(&kbdev->irq_throttle_cycles, cycles);
+	}
+
+	/* Clear the AS data, including setting NULL pointers */
+	memset(&jsdd->runpool_irq.per_as_data[0], 0,
+			sizeof(jsdd->runpool_irq.per_as_data));
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	jsdd->init_status |= JS_DEVDATA_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&kbdev->hwaccess_lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	err = kbasep_js_policy_init(kbdev);
+	if (!err)
+		jsdd->init_status |= JS_DEVDATA_INIT_POLICY;
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
+		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+	}
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (jsdd->init_status != JS_DEVDATA_INIT_ALL)
+		return -EINVAL;
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_CONSTANTS)) {
+		s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+		/* The caller must de-register all contexts before calling this
+		 */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+		KBASE_DEBUG_ASSERT(memcmp(
+				js_devdata->runpool_irq.ctx_attr_ref_count,
+				zero_ctx_attr_ref_count,
+				sizeof(zero_ctx_attr_ref_count)) == 0);
+		CSTD_UNUSED(zero_ctx_attr_ref_count);
+	}
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_POLICY))
+		kbasep_js_policy_term(&js_devdata->policy);
+
+	js_devdata->init_status = JS_DEVDATA_INIT_NONE;
+}
+
+int kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int err;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE);
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+	kbase_ctx_flag_clear(kctx, KCTX_DYING);
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
+
+	js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	err = kbasep_js_policy_init_ctx(kbdev, kctx);
+	if (!err)
+		js_kctx_info->init_status |= JS_KCTX_INIT_POLICY;
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (js_kctx_info->init_status != JS_KCTX_INIT_ALL)
+		return -EINVAL;
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
+			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	int js;
+	bool update_ctx_count = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) {
+		/* The caller must de-register all jobs before calling this */
+		KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+		KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+	}
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY))
+		kbasep_js_policy_term_ctx(js_policy, kctx);
+
+	js_kctx_info->init_status = JS_KCTX_INIT_NONE;
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_nolock - Variant of
+ *                                         kbase_jd_ctx_list_add_pullable()
+ *                                         where the caller must hold
+ *                                         hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
+ *                                              kbase_js_ctx_list_add_pullable_head()
+ *                                              where the caller must hold
+ *                                              hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head_nolock(
+		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the
+ *                                           per-slot unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+				&kbdev->js_data.ctx_list_unpullable[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable
+ *                                   or unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
+ *                                     where the caller must hold
+ *                                     hwaccess_lock
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
+		return NULL;
+
+	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
+					struct kbase_context,
+					jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+		struct kbase_device *kbdev, int js)
+{
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * same-slot dependency */
+				if (dep_atom->post_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+						(js == dep_js)) {
+					katom->pre_dep = dep_atom;
+					dep_atom->post_dep = katom;
+				}
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Setup any scheduling information */
+	kbasep_js_clear_job_retry_submit(atom);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_READY);
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt_nolock(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context (e.g. KDS
+			 * dependency resolved). Kill that job by killing the
+			 * context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Policy Queue */
+			KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Policy Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	struct kbasep_js_device_data *js_devdata;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0,
+	   kbasep_js_trace_get_refcnt(kbdev, kctx)); */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_release_result - Try running more jobs after releasing a context
+ *                            and/or atom
+ *
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst hwaccess_lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (js_devdata->nr_user_contexts_running != 0) {
+		bool retry_submit = false;
+		int retry_jobslot = 0;
+
+		if (katom_retained_state)
+			retry_submit = kbasep_js_get_atom_retry_submit_slot(
+					katom_retained_state, &retry_jobslot);
+
+		if (runpool_ctx_attr_change || retry_submit) {
+			/* A change in runpool ctx attributes might mean we can
+			 * run more jobs than before  */
+			result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+			KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+						kctx, NULL, 0u, retry_jobslot);
+		}
+	}
+	return result;
+}
+
+/*
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state
+ * change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	struct kbase_as *current_as;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* kctx->as_nr and js_per_as_data are only read from here. The caller's
+	 * js_ctx_mutex provides a barrier that ensures they are up-to-date.
+	 *
+	 * They will not change whilst we're reading them, because the refcount
+	 * is non-zero (and we ASSERT on that last fact).
+	 */
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr];
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	current_as = &kbdev->as[kctx_as_nr];
+	mutex_lock(&kbdev->pm.lock);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/* Update refcount */
+	new_ref_count = --(js_per_as_data->as_busy_refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 1 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out */
+	if (new_ref_count == 0 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+		kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		if (kbdev->hwaccess.active_kctx == kctx)
+			kbdev->hwaccess.active_kctx = NULL;
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after the KCTX_SHEDULED flag is changed, otherwise we
+		 * double-decount the attributes
+		 */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any policy timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED |
+			KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_policy = &kbdev->js_data.policy;
+	js_devdata = &kbdev->js_data;
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool or the Policy Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_backend_timeouts_changed(kbdev);
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbase_as *new_address_space = NULL;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	as_nr = kbase_backend_find_free_address_space(kbdev, kctx);
+
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	new_address_space = &kbdev->as[as_nr];
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Roll back the transaction so far and return */
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	kbase_ctx_flag_set(kctx, KCTX_SCHEDULED);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* Roll back the transaction so far and return */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+	kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the hwaccess_lock locking. If a suspend occurs *after* that lock
+	 * was taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Synchronize with any policy timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbase_backend_use_ctx_sched(kbdev, kctx)) {
+		/* Context already has ASID - mark as active */
+		kbdev->hwaccess.active_kctx = kctx;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return true; /* Context already scheduled */
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kbasep_js_schedule_ctx(kbdev, kctx);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED);
+
+	is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED);
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			   kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		retained = retained << 1;
+
+		if (kctx) {
+			++(js_per_as_data->as_busy_refcount);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+				++nr_privileged_ctx;
+				WARN_ON(nr_privileged_ctx != 1);
+			}
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_context *kctx, *n;
+
+		list_for_each_entry_safe(kctx, n,
+				&kbdev->js_data.ctx_list_unpullable[js],
+				jctx.sched_info.ctx.ctx_list_entry[js]) {
+			struct kbasep_js_kctx_info *js_kctx_info;
+			unsigned long flags;
+			bool timer_sync = false;
+
+			js_kctx_info = &kctx->jctx.sched_info;
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+				kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync =
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kbdev);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
+			(katom->pre_dep && (katom->pre_dep->atom_flags &
+			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		int prio = katom->sched_priority;
+		int js = katom->slot_nr;
+		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+		list_add_tail(&katom->queue, &queue->x_dep_head);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+		enqueue_required = false;
+	} else {
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		/* Add atom to ring buffer. */
+		jsctx_tree_add(kctx, katom);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+	}
+
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the
+ *                         runnable_tree, ready for execution
+ * @katom: Atom to submit
+ *
+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set,
+ * but is still present in the x_dep list. If @katom has a same-slot dependent
+ * atom then that atom (and any dependents) will also be moved.
+ */
+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
+
+	while (katom) {
+		WARN_ON(!(katom->atom_flags &
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			list_del(&katom->queue);
+			katom->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+		} else {
+			break;
+		}
+
+		katom = katom->post_dep;
+	}
+}
+
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = katom->post_dep;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (next_katom) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE |
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
+			kbase_js_move_to_tree(x_dep);
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	js_devdata = &kctx->kbdev->js_data;
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kctx->kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kctx->kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return NULL;
+	}
+
+	kbase_ctx_flag_set(kctx, KCTX_PULLED);
+
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+		kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+		atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kctx->kbdev, kctx);
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->sched_info.cfs.ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	kbase_tlstream_tl_event_atom_softstop_ex(katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!kbase_ctx_flag(kctx, KCTX_DYING)) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+	}
+	WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot)) {
+		if (!list_empty(
+			&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
+			timer_sync |= kbase_js_ctx_list_remove_nolock(
+					kctx->kbdev, kctx, atom_slot);
+	}
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp)
+{
+	u64 microseconds_spent = 0;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	union kbasep_js_policy *js_policy;
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+
+	kbdev = kctx->kbdev;
+
+	js_policy = &kbdev->js_data.policy;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+
+	/* Calculate the job's time used */
+	if (end_timestamp != NULL) {
+		/* Only calculating it for jobs that really run on the HW (e.g.
+		 * removed from next jobs never actually ran, so really did take
+		 * zero time) */
+		ktime_t tick_diff = ktime_sub(*end_timestamp,
+							katom->start_timestamp);
+
+		microseconds_spent = ktime_to_ns(tick_diff);
+
+		do_div(microseconds_spent, 1000);
+
+		/* Round up time spent to the minimum timer resolution */
+		if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US)
+			microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US;
+	}
+
+	/* Log the result of the job (completion status, and time spent). */
+	kbasep_js_policy_log_job_result(js_policy, katom, microseconds_spent);
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+			(x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false);
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+		kbase_js_move_to_tree(x_dep);
+		if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false))
+			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+					x_dep->slot_nr);
+
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)
+			return x_dep;
+	}
+
+	return NULL;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool timer_sync = false;
+
+	js_devdata = &kbdev->js_data;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	while (js_mask) {
+		int js;
+
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				if (kbase_js_ctx_pullable(kctx, js, false)
+				    || kbase_ctx_flag(kctx, KCTX_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_ctx_flag_clear(kctx, KCTX_PULLED);
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
+				/* Failed to pull jobs - push to head of list */
+				if (kbase_js_ctx_pullable(kctx, js, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+								kctx->kbdev,
+								kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+								kctx->kbdev,
+								kctx, js);
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the policy queue */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_set(kctx, KCTX_DYING);
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the policy queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the policy queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Policy Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the policy queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Policy Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the policy
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100755
index 0000000..8969222
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,1058 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_js_policy.h"
+#include "mali_kbase_context.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase and are available for use by the
+ * @ref kbase_js_policy "Job Scheduler Policy APIs"
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold hwaccess_lock (as this will be obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the hwaccess_lock, (as this will be obtained
+ *   internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - hwaccess_lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *   If the hwaccess_lock is already held, then the caller should use
+ *   kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * kbasep_js_runpool_lookup_ctx_nolock - Lookup a context in the Run Pool based
+ *         upon its current address space and ensure that is stays scheduled in.
+ * @kbdev: Device pointer
+ * @as_nr: Address space to lookup
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * Note: This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must the hold the hwaccess_lock
+ *
+ * Return: a valid struct kbase_context on success, which has been refcounted as
+ *         being busy.
+ *         NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - hwaccess_lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduling Policy's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time whenever the Job
+ * Scheduling Policy implements Job Timeouts (such as those done by CFS).
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+	atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+	KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot ==
+					KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID)
+				|| (atom->retry_submit_on_slot == js));
+
+	atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+	retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+	int js = katom_retained_state->retry_submit_on_slot;
+
+	*res = js;
+	return (bool) (js >= 0);
+}
+
+#if KBASE_DEBUG_DISABLE_ASSERTS == 0
+/**
+ * Debug Check the refcount of a context. Only use within ASSERTs
+ *
+ * Obtains hwaccess_lock
+ *
+ * @return negative value if the context is not scheduled in
+ * @return current refcount of the context if it is scheduled in. The refcount
+ * is not guarenteed to be kept constant.
+ */
+static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int result = -1;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID)
+		result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return result;
+}
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS == 0 */
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guarenteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guarenteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+	KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: when you need the number of cycles to guarantee you won't wait for
+ * longer than 'us' time (you might have a shorter wait).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need the number of cycles to guarantee you'll wait at least
+ * 'us' amount of time (but you might wait longer).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (u32) (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the worst-case wait that 'ticks' cycles will
+ * take (you guarantee that you won't wait any longer than this, but it may
+ * be shorter).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the best-case wait for 'tick' cycles (you
+ * guarantee to be waiting for at least this long, but it may be longer).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100755
index 0000000..455b661
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->hwaccess_lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100755
index 0000000..ce91833
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100755
index 0000000..e6a9d41
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,466 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+#include "mali_kbase_js_policy_cfs.h"
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy {
+	struct kbasep_js_policy_cfs cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_ctx_info {
+	struct kbasep_js_policy_cfs_ctx cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_job_info {
+	struct kbasep_js_policy_cfs_job cfs;
+};
+
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief the IRQ_THROTTLE time in microseconds
+ *
+ * This will be converted via the GPU's clock frequency into a cycle-count.
+ *
+ * @note we can make an estimate of the GPU's frequency by periodically
+ * sampling its CYCLE_COUNT register
+ */
+#define KBASE_JS_IRQ_THROTTLE_TIME_US 20
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE. The context usually has
+	 * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW
+	 * workarounds in use in the Job Scheduling Policy.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * Data used by the scheduler that is unique for each Address Space.
+ *
+ * This is used in IRQ context and hwaccess_lock must be held whilst accessing
+ * this data (inculding reads and atomic decisions based on the read).
+ */
+struct kbasep_js_per_as_data {
+	/**
+	 * Ref count of whether this AS is busy, and must not be scheduled out
+	 *
+	 * When jobs are running this is always positive. However, it can still be
+	 * positive when no jobs are running. If all you need is a heuristic to
+	 * tell you whether jobs might be running, this should be sufficient.
+	 */
+	int as_busy_refcount;
+
+	/** Pointer to the current context on this address space, or NULL for no context */
+	struct kbase_context *kctx;
+};
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/* Sub-structure to collect together Job Scheduling data used in IRQ
+	 * context. The hwaccess_lock must be held when accessing. */
+	struct runpool_irq {
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
+		 *
+		 * It is placed here because it's much more memory efficient than having a u8 in
+		 * struct kbasep_js_per_as_data to store this flag  */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/** Data that is unique for each AS */
+		struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+
+	u16 as_free;				/**< Bitpattern of free Address Spaces */
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/**
+	 * Policy-specific information.
+	 *
+	 * Refer to the structure defined by the current policy to determine which
+	 * locks must be held when accessing this.
+	 */
+	union kbasep_js_policy policy;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+	u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
+	u32 cfs_ctx_runtime_min_slices;	 /**< Value for  DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
+
+	/**< Value for JS_SOFT_JOB_TIMEOUT */
+	atomic_t soft_job_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+	/**
+	 * Runpool substructure. This must only be accessed whilst the Run Pool
+	 * mutex ( kbasep_js_device_data::runpool_mutex ) is held.
+	 *
+	 * In addition, the hwaccess_lock may need to be held for certain
+	 * sub-members.
+	 *
+	 * @note some of the members could be moved into struct kbasep_js_device_data for
+	 * improved d-cache/tlb efficiency.
+	 */
+	struct {
+		union kbasep_js_policy_ctx_info policy_ctx;	/**< Policy-specific context */
+	} runpool;
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/**
+		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/** Job Slot to retry submitting to if submission from IRQ handler failed */
+	int retry_submit_on_slot;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
new file mode 100755
index 0000000..d1f3a0a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
@@ -0,0 +1,763 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_policy.h
+ * Job Scheduler Policy APIs.
+ */
+
+#ifndef _KBASE_JS_POLICY_H_
+#define _KBASE_JS_POLICY_H_
+
+/**
+ * @page page_kbase_js_policy Job Scheduling Policies
+ * The Job Scheduling system is described in the following:
+ * - @subpage page_kbase_js_policy_overview
+ * - @subpage page_kbase_js_policy_operation
+ *
+ * The API details are as follows:
+ * - @ref kbase_jm
+ * - @ref kbase_js
+ * - @ref kbase_js_policy
+ */
+
+/**
+ * @page page_kbase_js_policy_overview Overview of the Policy System
+ *
+ * The Job Scheduler Policy manages:
+ * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs)
+ * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the
+ * GPU's Job Slots (\em JSs).
+ * - The amount of \em time a context is assigned to (<em>scheduled on</em>) an
+ * Address Space
+ * - The amount of \em time a Job spends running on the GPU
+ *
+ * The Policy implements this management via 2 components:
+ * - A Policy Queue, which manages a set of contexts that are ready to run,
+ * but not currently running.
+ * - A Policy Run Pool, which manages the currently running contexts (one per Address
+ * Space) and the jobs to run on the Job Slots.
+ *
+ * Each Graphics Process in the system has at least one KBase Context. Therefore,
+ * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on
+ * the GPU.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted"
+ *
+ * The main operations on the queue are:
+ * - Enqueuing a Context to it
+ * - Dequeuing a Context from it, to run it.
+ * - Note: requeuing a context is much the same as enqueuing a context, but
+ * occurs when a context is scheduled out of the system to allow other contexts
+ * to run.
+ *
+ * These operations have much the same meaning for the Run Pool - Jobs are
+ * dequeued to run on a Jobslot, and requeued when they are scheduled out of
+ * the GPU.
+ *
+ * @note This is an over-simplification of the Policy APIs - there are more
+ * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue
+ * takes at least two function calls: one to Dequeue from the Queue, one to add
+ * to the Run Pool.
+ *
+ * As indicated on the diagram, Jobs permanently leave the scheduling system
+ * when they are completed, otherwise they get dequeued/requeued until this
+ * happens. Similarly, Contexts leave the scheduling system when their jobs
+ * have all completed. However, Contexts may later return to the scheduling
+ * system (not shown on the diagram) if more Bags of Jobs are submitted to
+ * them.
+ */
+
+/**
+ * @page page_kbase_js_policy_operation Policy Operation
+ *
+ * We describe the actions that the Job Scheduler Core takes on the Policy in
+ * the following cases:
+ * - The IRQ Path
+ * - The Job Submission Path
+ * - The High Priority Job Submission Path
+ *
+ * This shows how the Policy APIs will be used by the Job Scheduler core.
+ *
+ * The following diagram shows an example Policy that contains a Low Priority
+ * queue, and a Real-time (High Priority) Queue. The RT queue is examined
+ * before the LowP one on dequeuing from the head. The Low Priority Queue is
+ * ordered by time, and the RT queue is ordered by time weighted by
+ * RT-priority. In addition, it shows that the Job Scheduler Core will start a
+ * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The
+ * Soft-Stop time is set by a global configuration value, and must be a value
+ * appropriate for the policy. For example, this could include "don't run a
+ * soft-stop timer" for a First-Come-First-Served (FCFS) policy.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core."
+ *
+ * @section sec_kbase_js_policy_operation_prio Dealing with Priority
+ *
+ * Priority applies separately to a context as a whole, and to the jobs within
+ * a context. The jobs specify a priority in the base_jd_atom::prio member, but
+ * it is independent of the context priority. That is, it only affects
+ * scheduling of atoms within a context. Refer to @ref base_jd_prio for more
+ * details. The meaning of the context's priority value is up to the policy
+ * itself, and could be a logarithmic scale instead of a linear scale (e.g. the
+ * policy could implement an increase/decrease in priority by 1 results in an
+ * increase/decrease in \em proportion of time spent scheduled in by 25%, an
+ * effective change in timeslice by 11%).
+ *
+ * It is up to the policy whether a boost in priority boosts the priority of
+ * the entire context (e.g. to such an extent where it may pre-empt other
+ * running contexts). If it chooses to do this, the Policy must make sure that
+ * only jobs from high-priority contexts are run, and that the context is
+ * scheduled out once only jobs from low priority contexts remain. This ensures
+ * that the low priority contexts do not gain from the priority boost, yet they
+ * still get scheduled correctly with respect to other low priority contexts.
+ *
+ *
+ * @section sec_kbase_js_policy_operation_irq IRQ Path
+ *
+ * The following happens on the IRQ path from the Job Scheduler Core:
+ * - Note the slot that completed (for later)
+ * - Log the time spent by the job (and implicitly, the time spent by the
+ * context)
+ *  - call kbasep_js_policy_log_job_result() <em>in the context of the irq
+ * handler.</em>
+ *  - This must happen regardless of whether the job completed successfully or
+ * not (otherwise the context gets away with DoS'ing the system with faulty jobs)
+ * - What was the result of the job?
+ *  - If Completed: job is just removed from the system
+ *  - If Hard-stop or failure: job is removed from the system
+ *  - If Soft-stop: queue the book-keeping work onto a work-queue: have a
+ * work-queue call kbasep_js_policy_enqueue_job()
+ * - Check the timeslice used by the owning context
+ *  - call kbasep_js_policy_should_remove_ctx() <em>in the context of the irq
+ * handler.</em>
+ *  - If this returns true, clear the "allowed" flag.
+ * - Check the ctx's flags for "allowed", "has jobs to run" and "is running
+ * jobs"
+ * - And so, should the context stay scheduled in?
+ *  - If No, push onto a work-queue the work of scheduling out the old context,
+ * and getting a new one. That is:
+ *   - kbasep_js_policy_runpool_remove_ctx() on old_ctx
+ *   - kbasep_js_policy_enqueue_ctx() on old_ctx
+ *   - kbasep_js_policy_dequeue_head_ctx() to get new_ctx
+ *   - kbasep_js_policy_runpool_add_ctx() on new_ctx
+ *   - (all of this work is deferred on a work-queue to keep the IRQ handler quick)
+ * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job:
+ *  - kbasep_js_policy_dequeue_job() <em>in the context of the irq
+ * handler</em> with core_req set to that of the completing slot
+ *  - if this returned true, submit the job to the completed slot.
+ *  - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *  - If kbasep_js_policy_dequeue_job() returned false, submit some work to
+ * the work-queue to retry from outside of IRQ context (calling
+ * kbasep_js_policy_dequeue_job() from a work-queue).
+ *
+ * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT,
+ * this sequence could loop a large number of times: this could happen if
+ * the jobs submitted completed on the GPU very quickly (in a few cycles), such
+ * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take
+ * more jobs, causing us to loop until we run out of jobs.
+ *
+ * To mitigate this, we must limit the number of jobs submitted per slot during
+ * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should
+ * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For
+ * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per
+ * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs
+ * could complete but the IRQ for each of them is delayed by the throttling. By
+ * the time you get the IRQ, all 6 jobs could've completed, meaning you can
+ * submit jobs to fill all 6 HEAD+NEXT registers again.
+ *
+ * @note As much work is deferred as possible, which includes the scheduling
+ * out of a context and scheduling in a new context. However, we can still make
+ * starting a single high-priorty context quick despite this:
+ * - On Mali-T600 family, there is one more AS than JSs.
+ * - This means we can very quickly schedule out one AS, no matter what the
+ * situation (because there will always be one AS that's not currently running
+ * on the job slot - it can only have a job in the NEXT register).
+ *  - Even with this scheduling out, fair-share can still be guaranteed e.g. by
+ * a timeline-based Completely Fair Scheduler.
+ * - When our high-priority context comes in, we can do this quick-scheduling
+ * out immediately, and then schedule in the high-priority context without having to block.
+ * - This all assumes that the context to schedule out is of lower
+ * priority. Otherwise, we will have to block waiting for some other low
+ * priority context to finish its jobs. Note that it's likely (but not
+ * impossible) that the high-priority context \b is running jobs, by virtue of
+ * it being high priority.
+ * - Therefore, we can give a high liklihood that on Mali-T600 at least one
+ * high-priority context can be started very quickly. For the general case, we
+ * can guarantee starting (no. ASs) - (no. JSs) high priority contexts
+ * quickly. In any case, there is a high likelihood that we're able to start
+ * more than one high priority context quickly.
+ *
+ * In terms of the functions used in the IRQ handler directly, these are the
+ * perfomance considerations:
+ * - kbase_js_policy_log_job_result():
+ *  - This is just adding to a 64-bit value (possibly even a 32-bit value if we
+ * only store the time the job's recently spent - see below on 'priority weighting')
+ *  - For priority weighting, a divide operation ('div') could happen, but
+ * this can happen in a deferred context (outside of IRQ) when scheduling out
+ * the ctx; as per our Engineering Specification, the contexts of different
+ * priority still stay scheduled in for the same timeslice, but higher priority
+ * ones scheduled back in more often.
+ *  - That is, the weighted and unweighted times must be stored separately, and
+ * the weighted time is only updated \em outside of IRQ context.
+ *  - Of course, this divide is more likely to be a 'multiply by inverse of the
+ * weight', assuming that the weight (priority) doesn't change.
+ * - kbasep_js_policy_should_remove_ctx():
+ *  - This is usually just a comparison of the stored time value against some
+ * maximum value.
+ *
+ * @note all deferred work can be wrapped up into one call - we usually need to
+ * indicate that a job/bag is done outside of IRQ context anyway.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit Submission path
+ *
+ * Start with a Context with no jobs present, and assume equal priority of all
+ * contexts in the system. The following work all happens outside of IRQ
+ * Context :
+ * - As soon as job is made 'ready to 'run', then is must be registerd with the Job
+ * Scheduler Policy:
+ *  - 'Ready to run' means they've satisified their dependencies in the
+ * Kernel-side Job Dispatch system.
+ *  - Call kbasep_js_policy_enqueue_job()
+ *  - This indicates that the job should be scheduled (it is ready to run).
+ * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs
+ * 'ready to run', we enqueue the context on the policy queue:
+ *  - Call kbasep_js_policy_enqueue_ctx()
+ *  - This indicates that the \em ctx should be scheduled (it is ready to run)
+ *
+ * Next, we need to handle adding a context to the Run Pool - if it's sensible
+ * to do so. This can happen due to two reasons:
+ * -# A context is enqueued as above, and there are ASs free for it to run on
+ * (e.g. it is the first context to be run, in which case it can be added to
+ * the Run Pool immediately after enqueuing on the Policy Queue)
+ * -# A previous IRQ caused another ctx to be scheduled out, requiring that the
+ * context at the head of the queue be scheduled in. Such steps would happen in
+ * a work queue (work deferred from the IRQ context).
+ *
+ * In both cases, we'd handle it as follows:
+ * - Get the context at the Head of the Policy Queue:
+ *  - Call kbasep_js_policy_dequeue_head_ctx()
+ * - Assign the Context an Address Space (Assert that there will be one free,
+ * given the above two reasons)
+ * - Add this context to the Run Pool:
+ *  - Call kbasep_js_policy_runpool_add_ctx()
+ * - Now see if a job should be run:
+ *  - Mostly, this will be done in the IRQ handler at the completion of a
+ * previous job.
+ *  - However, there are two cases where this cannot be done: a) The first job
+ * enqueued to the system (there is no previous IRQ to act upon) b) When jobs
+ * are submitted at a low enough rate to not fill up all Job Slots (or, not to
+ * fill both the 'HEAD' and 'NEXT' registers in the job-slots)
+ *  - Hence, on each ctx <b>and job</b> submission we should try to see if we
+ * can run a job:
+ *  - For each job slot that has free space (in NEXT or HEAD+NEXT registers):
+ *   - Call kbasep_js_policy_dequeue_job() with core_req set to that of the
+ * slot
+ *   - if we got one, submit it to the job slot.
+ *   - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *
+ * The above case shows that we should attempt to run jobs in cases where a) a ctx
+ * has been added to the Run Pool, and b) new jobs have been added to a context
+ * in the Run Pool:
+ * - In the latter case, the context is in the runpool because it's got a job
+ * ready to run, or is already running a job
+ * - We could just wait until the IRQ handler fires, but for certain types of
+ * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs
+ * generally take much longer to run that GLES CS jobs, which are vertex shader
+ * jobs.
+ * - Therefore, when a new job appears in the ctx, we must check the job-slots
+ * to see if they're free, and run the jobs as before.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts
+ *
+ * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of
+ * them can be scheduled in immediately to start high prioriy jobs. In general,
+ * (no. ASs) - (no JSs) high priority contexts may be started immediately. The
+ * following describes how this happens:
+ *
+ * Similar to the previous section, consider what happens with a high-priority
+ * context (a context with a priority higher than that of any in the Run Pool)
+ * that starts out with no jobs:
+ * - A job becomes ready to run on the context, and so we enqueue the context
+ * on the Policy's Queue.
+ * - However, we'd like to schedule in this context immediately, instead of
+ * waiting for one of the Run Pool contexts' timeslice to expire
+ * - The policy's Enqueue function must detect this (because it is the policy
+ * that embodies the concept of priority), and take appropriate action
+ *  - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run
+ * Pool to see if a lower priority context should be scheduled out, and then
+ * schedule in the High Priority context.
+ *  - For Mali-T600, we can always pick a context to schedule out immediately
+ * (because there are more ASs than JSs), and so scheduling out a victim context
+ * and scheduling in the high priority context can happen immediately.
+ *   - If a policy implements fair-sharing, then this can still ensure the
+ * victim later on gets a fair share of the GPU.
+ *   - As a note, consider whether the victim can be of equal/higher priority
+ * than the incoming context:
+ *   - Usually, higher priority contexts will be the ones currently running
+ * jobs, and so the context with the lowest priority is usually not running
+ * jobs.
+ *   - This makes it likely that the victim context is low priority, but
+ * it's not impossible for it to be a high priority one:
+ *    - Suppose 3 high priority contexts are submitting only FS jobs, and one low
+ * priority context submitting CS jobs. Then, the context not running jobs will
+ * be one of the hi priority contexts (because only 2 FS jobs can be
+ * queued/running on the GPU HW for Mali-T600).
+ *   - The problem can be mitigated by extra action, but it's questionable
+ * whether we need to: we already have a high likelihood that there's at least
+ * one high priority context - that should be good enough.
+ *   - And so, this method makes sure that at least one high priority context
+ * can be started very quickly, but more than one high priority contexts could be
+ * delayed (up to one timeslice).
+ *   - To improve this, use a GPU with a higher number of Address Spaces vs Job
+ * Slots.
+ * - At this point, let's assume this high priority context has been scheduled
+ * in immediately. The next step is to ensure it can start some jobs quickly.
+ *  - It must do this by Soft-Stopping jobs on any of the Job Slots that it can
+ * submit to.
+ *  - The rest of the logic for starting the jobs is taken care of by the IRQ
+ * handler. All the policy needs to do is ensure that
+ * kbasep_js_policy_dequeue_job() will return the jobs from the high priority
+ * context.
+ *
+ * @note in SS state, we currently only use 2 job-slots (even for T608, but
+ * this might change in future). In this case, it's always possible to schedule
+ * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same
+ * time, this maximizes usage of the job-slots (only 2 are in use), because you
+ * can guarantee starting of the jobs from the High Priority contexts immediately too.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_notes Notes
+ *
+ * - In this design, a separate 'init' is needed from dequeue/requeue, so that
+ * information can be retained between the dequeue/requeue calls. For example,
+ * the total time spent for a context/job could be logged between
+ * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just
+ * initializes that information to some known state.
+ *
+ *
+ *
+ */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js_policy Job Scheduler Policy APIs
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy structure
+ */
+union kbasep_js_policy;
+
+/**
+ * @brief Initialize the Job Scheduler Policy
+ */
+int kbasep_js_policy_init(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler Policy
+ */
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy);
+
+/**
+ * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Ctx Info structure
+ *
+ * This structure is embedded in the struct kbase_context structure. It is used to:
+ * - track information needed for the policy to schedule the context (e.g. time
+ * used, OS priority etc.)
+ * - link together kbase_contexts into a queue, so that a struct kbase_context can be
+ * obtained as the container of the policy ctx info. This allows the API to
+ * return what "the next context" should be.
+ * - obtain other information already stored in the struct kbase_context for
+ * scheduling purposes (e.g process ID to get the priority of the originating
+ * process)
+ */
+union kbasep_js_policy_ctx_info;
+
+/**
+ * @brief Initialize a ctx for use with the Job Scheduler Policy
+ *
+ * This effectively initializes the union kbasep_js_policy_ctx_info structure within
+ * the struct kbase_context (itself located within the kctx->jctx.sched_info structure).
+ */
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Terminate resources associated with using a ctx in the Job Scheduler
+ * Policy.
+ */
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Enqueue a context onto the Job Scheduler Policy Queue
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out a low
+ * priority context from the Run Pool to allow the high priority context to be
+ * scheduled in.
+ *
+ * If the context has the privileged flag set, it will always be kept at the
+ * head of the queue.
+ *
+ * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ */
+void kbasep_js_policy_enqueue_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if a context was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no contexts were available.
+ */
+bool kbasep_js_policy_dequeue_head_ctx(union kbasep_js_policy *js_policy, struct kbase_context ** const kctx_ptr);
+
+/**
+ * @brief Evict a context from the Job Scheduler Policy Queue
+ *
+ * This is only called as part of destroying a kbase_context.
+ *
+ * There are many reasons why this might fail during the lifetime of a
+ * context. For example, the context is in the process of being scheduled. In
+ * that case a thread doing the scheduling might have a pointer to it, but the
+ * context is neither in the Policy Queue, nor is it in the Run
+ * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself
+ * are locked.
+ *
+ * Hence to find out where in the system the context is, it is important to do
+ * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member.
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if the context was evicted from the Policy Queue
+ * @return false if the context was not found in the Policy Queue
+ */
+bool kbasep_js_policy_try_evict_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Call a function on all jobs belonging to a non-queued, non-running
+ * context, optionally detaching the jobs from the context as it goes.
+ *
+ * At the time of the call, the context is guarenteed to be not-currently
+ * scheduled on the Run Pool (is_scheduled == false), and not present in
+ * the Policy Queue. This is because one of the following functions was used
+ * recently on the context:
+ * - kbasep_js_policy_evict_ctx()
+ * - kbasep_js_policy_runpool_remove_ctx()
+ *
+ * In both cases, no subsequent call was made on the context to any of:
+ * - kbasep_js_policy_runpool_add_ctx()
+ * - kbasep_js_policy_enqueue_ctx()
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * \a detach_jobs must only be set when cancelling jobs (which occurs as part
+ * of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx,
+	kbasep_js_policy_ctx_job_cb callback, bool detach_jobs);
+
+/**
+ * @brief Add a context to the Job Scheduler Policy's Run Pool
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out low
+ * priority jobs that are currently running on the GPU.
+ *
+ * The number of contexts present in the Run Pool will never be more than the
+ * number of Address Spaces.
+ *
+ * The following guarentees are made about the state of the system when this
+ * is called:
+ * - kctx->as_nr member is valid
+ * - the context has its submit_allowed flag set
+ * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid
+ * - The refcount of the context is guarenteed to be zero.
+ * - kbasep_js_kctx_info::ctx::is_scheduled will be true.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding hwaccess_lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Remove a context from the Job Scheduler Policy's Run Pool
+ *
+ * The kctx->as_nr member is valid and the context has its submit_allowed flag
+ * set when this is called. The state of
+ * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also
+ * valid. The refcount of the context is guarenteed to be zero.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding hwaccess_lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Indicate whether a context should be removed from the Run Pool
+ * (should be scheduled out).
+ *
+ * The hwaccess_lock will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ */
+bool kbasep_js_policy_should_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Synchronize with any timers acting upon the runpool
+ *
+ * The policy should check whether any timers it owns should be running. If
+ * they should not, the policy must cancel such timers and ensure they are not
+ * re-run by the time this function finishes.
+ *
+ * In particular, the timers must not be running when there are no more contexts
+ * on the runpool, because the GPU could be powered off soon after this call.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ */
+void kbasep_js_policy_runpool_timers_sync(union kbasep_js_policy *js_policy);
+
+
+/**
+ * @brief Indicate whether a new context has an higher priority than the current context.
+ *
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx
+ *
+ * This function must not sleep, because an IRQ spinlock might be held whilst
+ * this is called.
+ *
+ * @note There is nothing to stop the priority of \a current_ctx changing
+ * during or immediately after this function is called (because its jsctx_mutex
+ * cannot be held). Therefore, this function should only be seen as a heuristic
+ * guide as to whether \a new_ctx is higher priority than \a current_ctx
+ */
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx);
+
+	  /** @} *//* end group kbase_js_policy_ctx */
+
+/**
+ * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Job Info structure
+ *
+ * This structure is embedded in the struct kbase_jd_atom structure. It is used to:
+ * - track information needed for the policy to schedule the job (e.g. time
+ * used, etc.)
+ * - link together jobs into a queue/buffer, so that a struct kbase_jd_atom can be
+ * obtained as the container of the policy job info. This allows the API to
+ * return what "the next job" should be.
+ */
+union kbasep_js_policy_job_info;
+
+/**
+ * @brief Initialize a job for use with the Job Scheduler Policy
+ *
+ * This function initializes the union kbasep_js_policy_job_info structure within the
+ * kbase_jd_atom. It will only initialize/allocate resources that are specific
+ * to the job.
+ *
+ * That is, this function makes \b no attempt to:
+ * - initialize any context/policy-wide information
+ * - enqueue the job on the policy.
+ *
+ * At some later point, the following functions must be called on the job, in this order:
+ * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data.
+ * - kbasep_js_policy_enqueue_job() to enqueue the job
+ *
+ * A job must only ever be initialized on the Policy once, and must be
+ * terminated on the Policy before the job is freed.
+ *
+ * The caller will not be holding any locks, and so this function will not
+ * modify any information in \a kctx or \a js_policy.
+ *
+ * @return 0 if initialization was correct.
+ */
+int kbasep_js_policy_init_job(const union kbasep_js_policy *js_policy, const struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Register context/policy-wide information for a job on the Job Scheduler Policy.
+ *
+ * Registers the job with the policy. This is used to track the job before it
+ * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically,
+ * it is used to update information under a lock that could not be updated at
+ * kbasep_js_policy_init_job() time (such as context/policy-wide data).
+ *
+ * @note This function will not fail, and hence does not allocate any
+ * resources. Any failures that could occur on registration will be caught
+ * during kbasep_js_policy_init_job() instead.
+ *
+ * A job must only ever be registerd on the Policy once, and must be
+ * deregistered on the Policy on completion (whether or not that completion was
+ * success/failure).
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_register_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief De-register context/policy-wide information for a on the Job Scheduler Policy.
+ *
+ * This must be used before terminating the resources associated with using a
+ * job in the Job Scheduler Policy. This function does not itself terminate any
+ * resources, at most it just updates information in the policy and context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool
+ *
+ * The job returned by the policy will match at least one of the bits in the
+ * job slot's core requirements (but it may match more than one, or all @ref
+ * base_jd_core_req bits supported by the job slot).
+ *
+ * In addition, the requirements of the job returned will be a subset of those
+ * requested - the job returned will not have requirements that \a job_slot_idx
+ * cannot satisfy.
+ *
+ * The caller will submit the job to the GPU as soon as the GPU's NEXT register
+ * for the corresponding slot is empty. Of course, the GPU will then only run
+ * this new job when the currently executing job (in the jobslot's HEAD
+ * register) has completed.
+ *
+ * @return true if a job was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no jobs were available among all ctxs in the Run Pool.
+ *
+ * @note base_jd_core_req is currently a u8 - beware of type conversion.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_lock::irq will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held
+ */
+bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);
+
+/**
+ * @brief Requeue a Job back into the Job Scheduler Policy Run Pool
+ *
+ * This will be used to enqueue a job after its creation and also to requeue
+ * a job into the Run Pool that was previously dequeued (running). It notifies
+ * the policy that the job should be run again at some point later.
+ *
+ * The caller has the following conditions on locking:
+ * - hwaccess_lock (a spinlock) will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Log the result of a job: the time spent on a job/context, and whether
+ * the job failed or not.
+ *
+ * Since a struct kbase_jd_atom contains a pointer to the struct kbase_context owning it,
+ * then this can also be used to log time on either/both the job and the
+ * containing context.
+ *
+ * The completion state of the job can be found by examining \a katom->event.event_code
+ *
+ * If the Job failed and the policy is implementing fair-sharing, then the
+ * policy must penalize the failing job/context:
+ * - At the very least, it should penalize the time taken by the amount of
+ * time spent processing the IRQ in SW. This because a job in the NEXT slot
+ * waiting to run will be delayed until the failing job has had the IRQ
+ * cleared.
+ * - \b Optionally, the policy could apply other penalties. For example, based
+ * on a threshold of a number of failing jobs, after which a large penalty is
+ * applied.
+ *
+ * The kbasep_js_device_data::runpool_mutex will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ *
+ * The caller has the following conditions on locking:
+ * - hwaccess_lock will be held.
+ *
+ * @param js_policy     job scheduler policy
+ * @param katom         job dispatch atom
+ * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds).
+ */
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us);
+
+	  /** @} *//* end group kbase_js_policy_job */
+
+	  /** @} *//* end group kbase_js_policy */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_POLICY_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
new file mode 100755
index 0000000..1ac0569
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
@@ -0,0 +1,292 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler: Completely Fair Policy Implementation
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_js_policy_cfs.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+#include <linux/sched/rt.h>
+#endif
+
+/**
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is enabled for a particular level is down to the integrator.
+ * However this is being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/* Fixed point constants used for runtime weight calculations */
+#define WEIGHT_FIXEDPOINT_SHIFT 10
+#define WEIGHT_TABLE_SIZE       40
+#define WEIGHT_0_NICE           (WEIGHT_TABLE_SIZE/2)
+#define WEIGHT_0_VAL            (1 << WEIGHT_FIXEDPOINT_SHIFT)
+
+#define PROCESS_PRIORITY_MIN (-20)
+#define PROCESS_PRIORITY_MAX  (19)
+
+/* Defines for easy asserts 'is scheduled'/'is queued'/'is neither queued norscheduled' */
+#define KBASEP_JS_CHECKFLAG_QUEUED       (1u << 0) /**< Check the queued state */
+#define KBASEP_JS_CHECKFLAG_SCHEDULED    (1u << 1) /**< Check the scheduled state */
+#define KBASEP_JS_CHECKFLAG_IS_QUEUED    (1u << 2) /**< Expect queued state to be set */
+#define KBASEP_JS_CHECKFLAG_IS_SCHEDULED (1u << 3) /**< Expect scheduled state to be set */
+
+enum {
+	KBASEP_JS_CHECK_NOTQUEUED = KBASEP_JS_CHECKFLAG_QUEUED,
+	KBASEP_JS_CHECK_NOTSCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED,
+	KBASEP_JS_CHECK_QUEUED = KBASEP_JS_CHECKFLAG_QUEUED | KBASEP_JS_CHECKFLAG_IS_QUEUED,
+	KBASEP_JS_CHECK_SCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED | KBASEP_JS_CHECKFLAG_IS_SCHEDULED
+};
+
+typedef u32 kbasep_js_check;
+
+/*
+ * Private Functions
+ */
+
+/* Table autogenerated using util built from: base/tools/gen_cfs_weight_of_prio/ */
+
+/* weight = 1.25 */
+static const int weight_of_priority[] = {
+	/*  -20 */ 11, 14, 18, 23,
+	/*  -16 */ 29, 36, 45, 56,
+	/*  -12 */ 70, 88, 110, 137,
+	/*   -8 */ 171, 214, 268, 335,
+	/*   -4 */ 419, 524, 655, 819,
+	/*    0 */ 1024, 1280, 1600, 2000,
+	/*    4 */ 2500, 3125, 3906, 4883,
+	/*    8 */ 6104, 7630, 9538, 11923,
+	/*   12 */ 14904, 18630, 23288, 29110,
+	/*   16 */ 36388, 45485, 56856, 71070
+};
+
+/*
+ * Note: There is nothing to stop the priority of the ctx containing
+ * ctx_info changing during or immediately after this function is called
+ * (because its jsctx_mutex cannot be held during IRQ). Therefore, this
+ * function should only be seen as a heuristic guide as to the priority weight
+ * of the context.
+ */
+static u64 priority_weight(struct kbasep_js_policy_cfs_ctx *ctx_info, u64 time_us)
+{
+	u64 time_delta_us;
+	int priority;
+
+	priority = ctx_info->process_priority;
+
+	/* Adjust runtime_us using priority weight if required */
+	if (priority != 0 && time_us != 0) {
+		int clamped_priority;
+
+		/* Clamp values to min..max weights */
+		if (priority > PROCESS_PRIORITY_MAX)
+			clamped_priority = PROCESS_PRIORITY_MAX;
+		else if (priority < PROCESS_PRIORITY_MIN)
+			clamped_priority = PROCESS_PRIORITY_MIN;
+		else
+			clamped_priority = priority;
+
+		/* Fixed point multiplication */
+		time_delta_us = (time_us * weight_of_priority[WEIGHT_0_NICE + clamped_priority]);
+		/* Remove fraction */
+		time_delta_us = time_delta_us >> WEIGHT_FIXEDPOINT_SHIFT;
+		/* Make sure the time always increases */
+		if (0 == time_delta_us)
+			time_delta_us++;
+	} else {
+		time_delta_us = time_us;
+	}
+
+	return time_delta_us;
+}
+
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_policy_trace_get_refcnt_nolock(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+
+/*
+ * Non-private functions
+ */
+
+int kbasep_js_policy_init(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs *policy_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+	policy_info = &js_devdata->policy.cfs;
+
+	atomic64_set(&policy_info->least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+	atomic64_set(&policy_info->rt_least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+
+	policy_info->head_runtime_us = 0;
+
+	return 0;
+}
+
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy)
+{
+	CSTD_UNUSED(js_policy);
+}
+
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	int policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	policy_info = &kbdev->js_data.policy.cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_INIT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	policy = current->policy;
+	if (policy == SCHED_FIFO || policy == SCHED_RR) {
+		ctx_info->process_rt_policy = true;
+		ctx_info->process_priority = (((MAX_RT_PRIO - 1) - current->rt_priority) / 5) - 20;
+	} else {
+		ctx_info->process_rt_policy = false;
+		ctx_info->process_priority = (current->static_prio - MAX_RT_PRIO) - 20;
+	}
+
+	/* Initial runtime (relative to least-run context runtime)
+	 *
+	 * This uses the Policy Queue's most up-to-date head_runtime_us by using the
+	 * queue mutex to issue memory barriers - also ensure future updates to
+	 * head_runtime_us occur strictly after this context is initialized */
+	mutex_lock(&js_devdata->queue_mutex);
+
+	/* No need to hold the the hwaccess_lock here, because we're initializing
+	 * the value, and the context is definitely not being updated in the
+	 * runpool at this point. The queue_mutex ensures the memory barrier. */
+	ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u));
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return 0;
+}
+
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	/* No work to do */
+}
+
+/*
+ * Job Chain Management
+ */
+
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbase_context *parent_ctx;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	CSTD_UNUSED(js_policy);
+
+	parent_ctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(parent_ctx != NULL);
+
+	ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	ctx_info->runtime_us += priority_weight(ctx_info, time_spent_us);
+
+	katom->time_spent_us += time_spent_us;
+}
+
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx)
+{
+	struct kbasep_js_policy_cfs_ctx *current_ctx_info;
+	struct kbasep_js_policy_cfs_ctx *new_ctx_info;
+
+	KBASE_DEBUG_ASSERT(current_ctx != NULL);
+	KBASE_DEBUG_ASSERT(new_ctx != NULL);
+	CSTD_UNUSED(js_policy);
+
+	current_ctx_info = &current_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+	new_ctx_info = &new_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	if (!current_ctx_info->process_rt_policy && new_ctx_info->process_rt_policy)
+		return true;
+
+	if (current_ctx_info->process_rt_policy ==
+			new_ctx_info->process_rt_policy)
+		return true;
+
+	return false;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
new file mode 100755
index 0000000..0a8454c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Completely Fair Job Scheduler Policy structure definitions
+ */
+
+#ifndef _KBASE_JS_POLICY_CFS_H_
+#define _KBASE_JS_POLICY_CFS_H_
+
+#define KBASEP_JS_RUNTIME_EMPTY ((u64)-1)
+
+/**
+ * struct kbasep_js_policy_cfs - Data for the CFS policy
+ * @head_runtime_us:  Number of microseconds the least-run context has been
+ *                    running for. The kbasep_js_device_data.queue_mutex must
+ *                    be held whilst updating this.
+ *                    Reads are possible without this mutex, but an older value
+ *                    might be read if no memory barries are issued beforehand.
+ * @least_runtime_us: Number of microseconds the least-run context in the
+ *                    context queue has been running for.
+ *                    -1 if context queue is empty.
+ * @rt_least_runtime_us: Number of microseconds the least-run context in the
+ *                       realtime (priority) context queue has been running for.
+ *                       -1 if realtime context queue is empty
+ */
+struct kbasep_js_policy_cfs {
+	u64 head_runtime_us;
+	atomic64_t least_runtime_us;
+	atomic64_t rt_least_runtime_us;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_ctx - a single linked list of all contexts
+ * @runtime_us:        microseconds this context has been running for
+ * @process_rt_policy: set if calling process policy scheme is a realtime
+ *                     scheduler and will use the priority queue. Non-mutable
+ *                     after ctx init
+ * @process_priority:  calling process NICE priority, in the range -20..19
+ *
+ * hwaccess_lock must be held when updating @runtime_us. Initializing will occur
+ * on context init and context enqueue (which can only occur in one thread at a
+ * time), but multi-thread access only occurs while the context is in the
+ * runpool.
+ *
+ * Reads are possible without the spinlock, but an older value might be read if
+ * no memory barries are issued beforehand.
+ */
+struct kbasep_js_policy_cfs_ctx {
+	u64 runtime_us;
+	bool process_rt_policy;
+	int process_priority;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_job - per job information for CFS
+ * @ticks: number of ticks that this job has been executing for
+ *
+ * hwaccess_lock must be held when accessing @ticks.
+ */
+struct kbasep_js_policy_cfs_job {
+	u32 ticks;
+};
+
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100755
index 0000000..6d1e61f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100755
index 0000000..60d31ac
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,2490 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+/**
+ * @brief Check the zone compatibility of two regions.
+ */
+static int kbase_region_tracker_match_zone(struct kbase_va_region *reg1,
+		struct kbase_va_region *reg2)
+{
+	return ((reg1->flags & KBASE_REG_ZONE_MASK) ==
+		(reg2->flags & KBASE_REG_ZONE_MASK));
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_match_zone);
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx, struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = &(kctx->reg_rbtree.rb_node);
+	struct rb_node *parent = NULL;
+
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+	rb_insert_color(&(new_reg->rblink), &(kctx->reg_rbtree));
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+	rbnode = rb_first(&(kctx->reg_rbtree));
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE) &&
+				kbase_region_tracker_match_zone(reg, reg_reqs)) {
+			/* Check alignment */
+			u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+				return reg;
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if ((prev->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(prev, reg)) {
+			/* We're compatible with the previous VMA, merge with it */
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if ((next->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(next, reg)) {
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), &(kctx->reg_rbtree));
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	int err = 0;
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), &(kctx->reg_rbtree));
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		if ((!kbase_region_tracker_match_zone(tmp, reg)) ||
+				(!(tmp->flags & KBASE_REG_FREE))) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.  */
+	{
+		u64 start_pfn;
+
+		/*
+		 * Depending on the zone the allocation request is for
+		 * we might need to retry it.
+		 */
+		do {
+			tmp = kbase_region_tracker_find_region_meeting_reqs(
+					kctx, reg, nr_pages, align);
+			if (tmp) {
+				start_pfn = (tmp->start_pfn + align - 1) &
+						~(align - 1);
+				err = kbase_insert_va_region_nolock(kctx, reg,
+						tmp, start_pfn, nr_pages);
+				break;
+			}
+
+			/*
+			 * If the allocation is not from the same zone as JIT
+			 * then don't retry, we're out of VA and there is
+			 * nothing which can be done about it.
+			 */
+			if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+					KBASE_REG_ZONE_CUSTOM_VA)
+				break;
+		} while (kbase_jit_evict(kctx));
+
+		if (!tmp)
+			err = -ENOMEM;
+	}
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *exec_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* exec and custom_va_reg doesn't always exist */
+	if (exec_reg && custom_va_reg) {
+		kbase_region_tracker_insert(kctx, exec_reg);
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+	}
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(&(kctx->reg_rbtree));
+		if (rbnode) {
+			rb_erase(rbnode, &(kctx->reg_rbtree));
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		same_va_bits = 32;
+	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+#endif
+
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
+		err = -EINVAL;
+		goto fail_unlock;
+	}
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have exec and custom VA zones */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_exec;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_exec:
+	kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
+{
+#ifdef CONFIG_64BIT
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits;
+	u64 total_va_size;
+	int err;
+
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return 0;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	if (same_va->nr_pages < jit_va_pages ||
+			kctx->same_va_end < jit_va_pages) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(kctx,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(kctx, custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	return kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL);
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_term(&kbdev->mem_pool);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(reg->kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					page_to_phys(kctx->aliasing_sink_page),
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping_of_region(const struct kbase_va_region *reg, unsigned long uaddr, size_t size)
+{
+	struct kbase_cpu_mapping *map;
+	struct list_head *pos;
+
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	list_for_each(pos, &reg->cpu_alloc->mappings) {
+		map = list_entry(pos, struct kbase_cpu_mapping, mappings_list);
+		if (map->vm_start <= uaddr && map->vm_end >= uaddr + size)
+			return map;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_of_region);
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+	struct kbase_context *kctx, u64 gpu_addr,
+	unsigned long uaddr, size_t size, u64 * offset)
+{
+	struct kbase_cpu_mapping *map = NULL;
+	const struct kbase_va_region *reg;
+	int err = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (reg && !(reg->flags & KBASE_REG_FREE)) {
+		map = kbasep_find_enclosing_cpu_mapping_of_region(reg, uaddr,
+				size);
+		if (map) {
+			*offset = (uaddr - PTR_TO_U64(map->vm_start)) +
+						 (map->page_off << PAGE_SHIFT);
+			err = 0;
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		phys_addr_t cpu_pa, phys_addr_t gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct base_syncset *set, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct basep_syncset *sset = &set->basep_sset;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	phys_addr_t *cpu_pa;
+	phys_addr_t *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	off_t offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	offset = start & (PAGE_SIZE - 1);
+	page_off = map->page_off + ((start - map->vm_start) >> PAGE_SHIFT);
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	/* Sync first page */
+	if (cpu_pa[page_off]) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!cpu_pa[page_off + i])
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 && cpu_pa[page_off + page_count - 1]) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset)
+{
+	int err = -EINVAL;
+	struct basep_syncset *sset;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != syncset);
+
+	sset = &syncset->basep_sset;
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+void kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (flags & BASE_MEM_COHERENT_SYSTEM ||
+			flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	else if (flags & BASE_MEM_COHERENT_LOCAL)
+		reg->flags |= KBASE_REG_SHARE_IN;
+
+	/* Set up default MEMATTR usage */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_update_region_flags);
+
+int kbase_alloc_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t old_page_count = alloc->nents;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+
+	if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool,
+			nr_pages_requested, alloc->pages + old_page_count) != 0)
+		goto no_alloc;
+
+	/*
+	 * Request a zone cache update, this scans only the new pages an
+	 * appends their information to the zone cache. if the update
+	 * fails then clear the cache so we fall-back to doing things
+	 * page by page.
+	 */
+	if (kbase_zone_cache_update(alloc, old_page_count) != 0)
+		kbase_zone_cache_clear(alloc);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)alloc->imported.kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+no_alloc:
+	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	return -ENOMEM;
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	phys_addr_t *start_free;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/*
+	 * Clear the zone cache, we don't expect JIT allocations to be
+	 * shrunk in parts so there is no point trying to optimize for that
+	 * by scanning for the changes caused by freeing this memory and
+	 * updating the existing cache entries.
+	 */
+	kbase_zone_cache_clear(alloc);
+
+	kbase_mem_pool_free_pages(&kctx->mem_pool,
+				  nr_pages_to_free,
+				  start_free,
+				  syncback,
+				  reclaimed);
+
+	alloc->nents -= nr_pages_to_free;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, nr_pages_to_free);
+		new_page_count = kbase_atomic_sub_pages(nr_pages_to_free,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(nr_pages_to_free,
+				       &kctx->kbdev->memdev.used_pages);
+
+		kbase_tlstream_aux_pagesalloc(
+				(u32)kctx->id,
+				(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		WARN_ON(!alloc->imported.kctx);
+		/*
+		 * The physical allocation must have been removed from the
+		 * eviction list before trying to free it.
+		 */
+		WARN_ON(!list_empty(&alloc->evict_node));
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+ #ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ump_dd_release(alloc->imported.ump_handle);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
+		kfree(alloc->imported.user_buf.pages);
+		break;
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+#ifdef CONFIG_DEBUG_FS
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_init(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_lock);
+		if (list_empty(&kctx->jit_destroy_head))
+			reg = NULL;
+		else
+			reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		if (reg) {
+			list_del(&reg->jit_node);
+			mutex_unlock(&kctx->jit_lock);
+
+			kbase_gpu_vm_lock(kctx);
+			kbase_mem_free_region(kctx, reg);
+			kbase_gpu_vm_unlock(kctx);
+		} else
+			mutex_unlock(&kctx->jit_lock);
+	} while (reg);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	mutex_init(&kctx->jit_lock);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	return 0;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+	struct kbase_va_region *walker;
+	struct kbase_va_region *temp;
+	size_t current_diff = SIZE_MAX;
+
+	int ret;
+
+	mutex_lock(&kctx->jit_lock);
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+
+		if (walker->nr_pages >= info->va_pages) {
+			size_t min_size, max_size, diff;
+
+			/*
+			 * The JIT allocations VA requirements have been
+			 * meet, it's suitable but other allocations
+			 * might be a better fit.
+			 */
+			min_size = min_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			max_size = max_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			diff = max_size - min_size;
+
+			if (current_diff > diff) {
+				current_diff = diff;
+				reg = walker;
+			}
+
+			/* The allocation is an exact match, stop looking */
+			if (current_diff == 0)
+				break;
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_del_init(&reg->jit_node);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+
+		/* Release the jit lock before modifying the allocation */
+		mutex_unlock(&kctx->jit_lock);
+
+		kbase_gpu_vm_lock(kctx);
+
+		/* Make the physical backing no longer reclaimable */
+		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+			goto update_failed;
+
+		/* Grow the backing if required */
+		if (reg->gpu_alloc->nents < info->commit_pages) {
+			size_t delta;
+			size_t old_size = reg->gpu_alloc->nents;
+
+			/* Allocate some more pages */
+			delta = info->commit_pages - reg->gpu_alloc->nents;
+			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
+					!= 0)
+				goto update_failed;
+
+			if (reg->cpu_alloc != reg->gpu_alloc) {
+				if (kbase_alloc_phy_pages_helper(
+						reg->cpu_alloc, delta) != 0) {
+					kbase_free_phy_pages_helper(
+							reg->gpu_alloc, delta);
+					goto update_failed;
+				}
+			}
+
+			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
+					info->commit_pages, old_size);
+			/*
+			 * The grow failed so put the allocation back in the
+			 * pool and return failure.
+			 */
+			if (ret)
+				goto update_failed;
+		}
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL;
+		u64 gpu_addr;
+		u16 alignment;
+
+		mutex_unlock(&kctx->jit_lock);
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr, &alignment);
+		if (!reg)
+			goto out_unlocked;
+
+		mutex_lock(&kctx->jit_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_lock);
+	}
+
+	return reg;
+
+update_failed:
+	/*
+	 * An update to an allocation from the pool failed, chances
+	 * are slim a new allocation would fair any better so return
+	 * the allocation to the pool and return the function with failure.
+	 */
+	kbase_gpu_vm_unlock(kctx);
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	/* The physical backing of memory in the pool is always reclaimable */
+	down_read(&kctx->process_mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+	kbase_mem_evictable_make(reg->gpu_alloc);
+	kbase_gpu_vm_unlock(kctx);
+	up_read(&kctx->process_mm->mmap_sem);
+
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = reg->kctx;
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_destroy_head);
+	mutex_unlock(&kctx->jit_lock);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	if (reg)
+		kbase_mem_free_region(kctx, reg);
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+
+	kbase_gpu_vm_lock(kctx);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		kbase_mem_free_region(kctx, walker);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		kbase_mem_free_region(kctx, walker);
+	}
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	phys_addr_t *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct mm_struct *mm;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	mm = alloc->imported.user_buf.mm;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#else
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#endif
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = page_to_phys(pages[i]);
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+			kbase_reg_current_backed_size(reg),
+			reg->flags);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	phys_addr_t *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		int j;
+		size_t pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = count;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			kbase_reg_current_backed_size(reg),
+			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+	if (err) {
+		dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+				alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+		alloc->imported.umm.sgt = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \
+		|| defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res,
+		struct kds_resource **kds_resources, u32 *kds_res_count,
+		unsigned long *kds_access_bitmap, bool exclusive)
+{
+	u32 i;
+
+	for (i = 0; i < *kds_res_count; i++) {
+		/* Duplicate resource, ignore */
+		if (kds_resources[i] == kds_res)
+			return;
+	}
+
+	kds_resources[*kds_res_count] = kds_res;
+	if (exclusive)
+		set_bit(*kds_res_count, kds_access_bitmap);
+	(*kds_res_count)++;
+}
+#endif
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+	case KBASE_MEM_TYPE_IMPORTED_UMP: {
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = ump_dd_kds_resource_get(
+					reg->gpu_alloc->imported.ump_handle);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif				/*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+		break;
+	}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = get_dma_buf_kds_resource(
+					reg->gpu_alloc->imported.umm.dma_buf);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL
+#ifdef CONFIG_KDS
+				, NULL, NULL,
+				NULL, false
+#endif
+				);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100755
index 0000000..8953c85
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1078 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	pgoff_t  page_off;
+	int      count;
+	unsigned long vm_start;
+	unsigned long vm_end;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMP,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	phys_addr_t           *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	struct list_head       zone_cache;
+
+	/* member in union valid based on @a type */
+	union {
+#ifdef CONFIG_UMP
+		ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+		struct {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			unsigned int current_mapping_usage_count;
+			struct mm_struct *mm;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* VA managed by us */
+#define KBASE_REG_CUSTOM_VA         (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Aligned for GPU EX in SAME_VA */
+#define KBASE_REG_ALIGNED           (1ul<<15)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+/*
+ * Dedicated 16MB region for shader code:
+ * VA range 0x101000000-0x102000000
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_EXEC_BASE    (0x101000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* non-NULL if this memory object is a kds_resource */
+	struct kds_resource *kds_res;
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+};
+
+/* Common functions */
+static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+	INIT_LIST_HEAD(&alloc->zone_cache);
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+	reg->cpu_alloc->imported.kctx = kctx;
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
+	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		reg->gpu_alloc->imported.kctx = kctx;
+		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	INIT_LIST_HEAD(&reg->jit_node);
+	reg->flags &= ~KBASE_REG_FREE;
+	return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @max_size:  Maximum number of free pages the pool can hold
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Return NULL if no memory in the pool
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return ACCESS_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_grow - Grow the pool
+ * @pool:       Memory pool to grow
+ * @nr_to_grow: Number of pages to add to the pool
+ *
+ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
+ * become larger than the maximum size specified.
+ *
+ * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
+ */
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above the max_size for the pool.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ */
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+/*
+ * kbase_mem_alloc_page - Allocate a new page for a device
+ * @kbdev: The kbase device
+ *
+ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that
+ * function can fail in the event the pool is empty.
+ *
+ * Return: A new page or NULL if no memory
+ */
+struct page *kbase_mem_alloc_page(struct kbase_device *kbdev);
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+void kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
+ * @kctx:	Kbase context
+ *
+ * Disable and perform the required cache maintenance to remove the all
+ * data from provided kbase context from the GPU caches.
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified
+ * address space.
+ * @kbdev:	Kbase device
+ * @as_nr:	The address space number to set to unmapped.
+ *
+ * This function must only be called during reset/power-up and it used to
+ * ensure the registers are in a known state.
+ *
+ * The caller must hold kbdev->mmu_hw_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset);
+void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa,
+		phys_addr_t gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * @brief Find the offset of the CPU mapping of a memory allocation containing
+ *        a given address range
+ *
+ * Searches for a CPU mapping of any part of the region starting at @p gpu_addr
+ * that fully encloses the CPU virtual address range specified by @p uaddr and
+ * @p size. Returns a failure indication if only part of the address range lies
+ * within a CPU mapping, or the address range lies within a CPU mapping of a
+ * different region.
+ *
+ * @param[in,out] kctx      The kernel base context used for the allocation.
+ * @param[in]     gpu_addr  GPU address of the start of the allocated region
+ *                          within which to search.
+ * @param[in]     uaddr     Start of the CPU virtual address range.
+ * @param[in]     size      Size of the CPU virtual address range (in bytes).
+ * @param[out]    offset    The offset from the start of the allocation to the
+ *                          specified CPU virtual address.
+ *
+ * @return 0 if offset was obtained successfully. Error code
+ *         otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context *kctx,
+							u64 gpu_addr,
+							unsigned long uaddr,
+							size_t size,
+							u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_init(struct kbase_context *kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ * @kds_res_count:     The number of KDS resources.
+ * @kds_resources:     Array of KDS resources.
+ * @kds_access_bitmap: Access bitmap for KDS.
+ * @exclusive:         If the KDS resource requires exclusive access.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_zone_cache_update - Update the memory zone cache after new pages have
+ * been added.
+ * @alloc:        The physical memory allocation to build the cache for.
+ * @start_offset: Offset to where the new pages start.
+ *
+ * Updates an existing memory zone cache, updating the counters for the
+ * various zones.
+ * If the memory allocation doesn't already have a zone cache assume that
+ * one isn't created and thus don't do anything.
+ *
+ * Return: Zero cache was updated, negative error code on error.
+ */
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset);
+
+/**
+ * kbase_zone_cache_build - Build the memory zone cache.
+ * @alloc:        The physical memory allocation to build the cache for.
+ *
+ * Create a new zone cache for the provided physical memory allocation if
+ * one doesn't already exist, if one does exist then just return.
+ *
+ * Return: Zero if the zone cache was created, negative error code on error.
+ */
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_zone_cache_clear - Clear the memory zone cache.
+ * @alloc:        The physical memory allocation to clear the cache on.
+ */
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc);
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100755
index 0000000..b6dac55
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,2819 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+#include <linux/cache.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+static const struct vm_operations_struct kbase_vm_ops;
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ *
+ * Note: Caller must be holding the processes mmap_sem lock.
+ */
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment)
+{
+	int zone;
+	int gpu_pc_bits;
+	int cpu_va_bits;
+	struct kbase_va_region *reg;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_alignment);
+
+	dev = kctx->kbdev->dev;
+	*va_alignment = 0; /* no alignment by default */
+	*gpu_va = 0; /* return 0 on failure */
+
+	gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	cpu_va_bits = BITS_PER_LONG;
+
+	if (0 == va_pages) {
+		dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+		goto bad_size;
+	}
+
+	if (va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+#if defined(CONFIG_64BIT)
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		cpu_va_bits = 32;
+#endif
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+	    (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+		goto bad_ex_size;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	kbase_update_region_flags(kctx, reg, *flags);
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & BASE_MEM_GROW_ON_GPF)
+		reg->extent = extent;
+	else
+		reg->extent = 0;
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/* See if we must align memory due to GPU PC bits vs CPU VA */
+		if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+		    (cpu_va_bits > gpu_pc_bits)) {
+			*va_alignment = gpu_pc_bits;
+			reg->flags |= KBASE_REG_ALIGNED;
+		}
+
+		/*
+		 * 10.1-10.4 UKU userland relies on the kernel to call mmap.
+		 * For all other versions we can just return the cookie
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1) ||
+		    kctx->api_version > KBASE_API_VERSION(10, 4)) {
+			*gpu_va = (u64) cookie;
+			return reg;
+		}
+
+		/*
+		 * To achieve alignment and avoid allocating on large alignment
+		 * (to work around a GPU hardware issue) we must allocate 3
+		 * times the required size.
+		 */
+		if (*va_alignment)
+			va_map += 3 * (1UL << *va_alignment);
+
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kbase_gpu_vm_lock(kctx);
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+
+		/*
+		 * If we had to allocate extra VA space to force the
+		 * alignment release it.
+		 */
+		if (*va_alignment) {
+			unsigned long alignment = 1UL << *va_alignment;
+			unsigned long align_mask = alignment - 1;
+			unsigned long addr;
+			unsigned long addr_end;
+			unsigned long aligned_addr;
+			unsigned long aligned_addr_end;
+
+			addr = cpu_addr;
+			addr_end = addr + va_map;
+
+			aligned_addr = (addr + align_mask) &
+					~((u64) align_mask);
+			aligned_addr_end = aligned_addr + va_size;
+
+			if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) {
+				/*
+				 * Can't end at 4GB boundary on some GPUs as
+				 * it will halt the shader.
+				 */
+				aligned_addr += 2 * alignment;
+				aligned_addr_end += 2 * alignment;
+			} else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) {
+				/*
+				 * Can't start at 4GB boundary on some GPUs as
+				 * it will halt the shader.
+				 */
+				aligned_addr += alignment;
+				aligned_addr_end += alignment;
+			}
+
+			/* anything to chop off at the start? */
+			if (addr != aligned_addr)
+				vm_munmap(addr, aligned_addr - addr);
+
+			/* anything at the end? */
+			if (addr_end != aligned_addr_end)
+				vm_munmap(aligned_addr_end,
+						addr_end - aligned_addr_end);
+
+			*gpu_va = (u64) aligned_addr;
+		} else
+			*gpu_va = (u64) cpu_addr;
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_mem:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+struct kbase_mem_zone_cache_entry {
+	/* List head used to link the cache entry to the memory allocation. */
+	struct list_head zone_node;
+	/* The zone the cacheline is for. */
+	struct zone *zone;
+	/* The number of pages in the allocation which belong to this zone. */
+	u64 count;
+};
+
+static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	struct kbase_mem_zone_cache_entry *cache = NULL;
+	size_t i;
+	int ret = 0;
+
+	for (i = start_offset; i < alloc->nents; i++) {
+		struct page *p = phys_to_page(alloc->pages[i]);
+		struct zone *zone = page_zone(p);
+		bool create = true;
+
+		if (cache && (cache->zone == zone)) {
+			/*
+			 * Fast path check as most of the time adjacent
+			 * pages come from the same zone.
+			 */
+			create = false;
+		} else {
+			/*
+			 * Slow path check, walk all the cache entries to see
+			 * if we already know about this zone.
+			 */
+			list_for_each_entry(cache, &alloc->zone_cache, zone_node) {
+				if (cache->zone == zone) {
+					create = false;
+					break;
+				}
+			}
+		}
+
+		/* This zone wasn't found in the cache, create an entry for it */
+		if (create) {
+			cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+			if (!cache) {
+				ret = -ENOMEM;
+				goto bail;
+			}
+			cache->zone = zone;
+			cache->count = 0;
+			list_add(&cache->zone_node, &alloc->zone_cache);
+		}
+
+		cache->count++;
+	}
+	return 0;
+
+bail:
+	return ret;
+}
+
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	/*
+	 * Bail if the zone cache is empty, only update the cache if it
+	 * existed in the first place.
+	 */
+	if (list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, start_offset);
+}
+
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc)
+{
+	/* Bail if the zone cache already exists */
+	if (!list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, 0);
+}
+
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_mem_zone_cache_entry *walker;
+
+	while(!list_empty(&alloc->zone_cache)){
+		walker = list_first_entry(&alloc->zone_cache,
+				struct kbase_mem_zone_cache_entry, zone_node);
+		list_del(&walker->zone_node);
+		kfree(walker);
+	}
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 * then remove it from the reclaimable accounting. */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(-zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* This alloction can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	/*
+	 * Try to shrink the CPU mappings as required, if we fail then
+	 * fail the process of making this allocation evictable.
+	 */
+	err = kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+	if (err)
+		return -EINVAL;
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	mutex_lock(&kctx->evict_lock);
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	mutex_lock(&kctx->evict_lock);
+	list_del_init(&gpu_alloc->evict_node);
+	mutex_unlock(&kctx->evict_lock);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+	switch (reg->gpu_alloc->type) {
+#ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	/* roll back on error, i.e. not UMP */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	ump_dd_handle umph;
+	u64 block_count;
+	const ump_dd_physical_block_64 *block_array;
+	u64 i, j;
+	int page = 0;
+	ump_alloc_flags ump_flags;
+	ump_alloc_flags cpu_flags;
+	ump_alloc_flags gpu_flags;
+
+	if (*flags & BASE_MEM_SECURE)
+		goto bad_flags;
+
+	umph = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+		goto bad_id;
+
+	ump_flags = ump_dd_allocation_flags_get(umph);
+	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+			UMP_DEVICE_MASK;
+
+	*va_pages = ump_dd_size_get_64(umph);
+	*va_pages >>= PAGE_SHIFT;
+
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (*flags & BASE_MEM_SAME_VA)
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	else
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!reg)
+		goto no_region;
+
+	/* we've got pages to map now, and support SAME_VA */
+	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->gpu_alloc->imported.ump_handle = umph;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
+
+	/* Override import flags based on UMP flags */
+	*flags &= ~(BASE_MEM_CACHED_CPU);
+	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+		reg->flags |= KBASE_REG_CPU_CACHED;
+		*flags |= BASE_MEM_CACHED_CPU;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_WR) {
+		reg->flags |= KBASE_REG_CPU_WR;
+		*flags |= BASE_MEM_PROT_CPU_WR;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_RD) {
+		reg->flags |= KBASE_REG_CPU_RD;
+		*flags |= BASE_MEM_PROT_CPU_RD;
+	}
+
+	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+		reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (gpu_flags & UMP_PROT_DEVICE_WR) {
+		reg->flags |= KBASE_REG_GPU_WR;
+		*flags |= BASE_MEM_PROT_GPU_WR;
+	}
+
+	if (gpu_flags & UMP_PROT_DEVICE_RD) {
+		reg->flags |= KBASE_REG_GPU_RD;
+		*flags |= BASE_MEM_PROT_GPU_RD;
+	}
+
+	/* ump phys block query */
+	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+	for (i = 0; i < block_count; i++) {
+		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+			reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT);
+			page++;
+		}
+	}
+	reg->gpu_alloc->nents = *va_pages;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	ump_dd_release(umph);
+bad_id:
+bad_flags:
+	return NULL;
+}
+#endif				/* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	/* no read or write permission given on import, only on run do we give the right permissions */
+
+	reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	}
+
+	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages,
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	down_read(&current->mm->mmap_sem);
+
+	/* A sanity check that get_user_pages will work on the memory */
+	/* (so the initial import fails on weird memory regions rather than */
+	/* the job failing when we try to handle the external resources). */
+	/* It doesn't take a reference to the pages (because the page list is NULL). */
+	/* We can't really store the page list because that would involve */
+	/* keeping the pages pinned - instead we pin/unpin around the job */
+	/* (as part of the external resources handling code) */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+#else
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+#endif
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	reg->gpu_alloc->imported.user_buf.size = size;
+	reg->gpu_alloc->imported.user_buf.address = address;
+	reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages;
+	reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages,
+			sizeof(struct page *), GFP_KERNEL);
+	reg->gpu_alloc->imported.user_buf.mm = current->mm;
+	atomic_inc(&current->mm->mm_count);
+
+	if (!reg->gpu_alloc->imported.user_buf.pages)
+		goto no_page_array;
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_page_array:
+fault_mismatch:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	kbase_update_region_flags(kctx, reg, *flags);
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx)
+{
+	u32 cpu_cache_line_size = cache_line_size();
+	u32 gpu_cache_line_size =
+		(1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+
+	return ((cpu_cache_line_size > gpu_cache_line_size) ?
+				cpu_cache_line_size :
+				gpu_cache_line_size);
+}
+
+static int kbase_check_buffer_size(struct kbase_context *kctx, u64 size)
+{
+	u32 cache_line_align = kbase_get_cache_line_alignment(kctx);
+
+	return (size & (cache_line_align - 1)) == 0 ? 0 : -EINVAL;
+}
+
+static int kbase_check_buffer_cache_alignment(struct kbase_context *kctx,
+					void __user *ptr)
+{
+	u32 cache_line_align = kbase_get_cache_line_alignment(kctx);
+
+	return ((uintptr_t)ptr & (cache_line_align - 1)) == 0 ? 0 : -EINVAL;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_UMP
+	case BASE_MEM_IMPORT_TYPE_UMP: {
+		ump_secure_id id;
+
+		if (get_user(id, (ump_secure_id __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_ump(kctx, id, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				uptr = compat_ptr(user_buffer.ptr.compat_value);
+			else
+#endif
+				uptr = user_buffer.ptr.value;
+
+			if (0 != kbase_check_buffer_cache_alignment(kctx,
+									uptr)) {
+				dev_warn(kctx->kbdev->dev,
+				"User buffer is not cache line aligned!\n");
+				goto no_reg;
+			}
+
+			if (0 != kbase_check_buffer_size(kctx,
+					user_buffer.length)) {
+				dev_warn(kctx->kbdev->dev,
+				"User buffer size is not multiple of cache line size!\n");
+				goto no_reg;
+			}
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+
+static int zap_range_nolock(struct mm_struct *mm,
+		const struct vm_operations_struct *vm_ops,
+		unsigned long start, unsigned long end)
+{
+	struct vm_area_struct *vma;
+	int err = -EINVAL; /* in case end < start */
+
+	while (start < end) {
+		unsigned long local_start;
+		unsigned long local_end;
+
+		vma = find_vma_intersection(mm, start, end);
+		if (!vma)
+			break;
+
+		/* is it ours? */
+		if (vma->vm_ops != vm_ops)
+			goto try_next;
+
+		local_start = vma->vm_start;
+
+		if (start > local_start)
+			local_start = start;
+
+		local_end = vma->vm_end;
+
+		if (end < local_end)
+			local_end = end;
+
+		err = zap_vma_ptes(vma, local_start, local_end - local_start);
+		if (unlikely(err))
+			break;
+
+try_next:
+		/* go to next vma, if any */
+		start = vma->vm_end;
+	}
+
+	return err;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	phys_addr_t *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags);
+
+	return ret;
+}
+
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	struct kbase_mem_phy_alloc *cpu_alloc = reg->cpu_alloc;
+	struct kbase_cpu_mapping *mapping;
+	int err;
+
+	lockdep_assert_held(&kctx->process_mm->mmap_sem);
+
+	list_for_each_entry(mapping, &cpu_alloc->mappings, mappings_list) {
+		unsigned long mapping_size;
+
+		mapping_size = (mapping->vm_end - mapping->vm_start)
+				>> PAGE_SHIFT;
+
+		/* is this mapping affected ?*/
+		if ((mapping->page_off + mapping_size) > new_pages) {
+			unsigned long first_bad = 0;
+
+			if (new_pages > mapping->page_off)
+				first_bad = new_pages - mapping->page_off;
+
+			err = zap_range_nolock(current->mm,
+					&kbase_vm_ops,
+					mapping->vm_start +
+					(first_bad << PAGE_SHIFT),
+					mapping->vm_end);
+
+			WARN(err,
+			     "Failed to zap VA range (0x%lx - 0x%lx);\n",
+			     mapping->vm_start +
+			     (first_bad << PAGE_SHIFT),
+			     mapping->vm_end
+			     );
+
+			/* The zap failed, give up and exit */
+			if (err)
+				goto failed;
+		}
+	}
+
+	return 0;
+
+failed:
+	return err;
+}
+
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx,
+			reg->start_pfn + new_pages, delta);
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(failure_reason);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages > reg->nr_pages) {
+		/* Would overflow the VA region */
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		res = kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->region) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	/* we don't use vmf->pgoff as it's affected by our mmap with
+	 * offset being a GPU VA or a cookie */
+	rel_pgoff = ((unsigned long)vmf->virtual_address - map->vm_start)
+			>> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (map->page_off + rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	for (i = rel_pgoff;
+	     i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
+	     map->alloc->nents - map->page_off); i++) {
+		int ret = vm_insert_pfn(vma, map->vm_start + (i << PAGE_SHIFT),
+		    PFN_DOWN(map->alloc->pages[map->page_off + i]));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	u64 start_off = vma->vm_pgoff - reg->start_pfn;
+	phys_addr_t *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		unsigned long addr = vma->vm_start + aligned_offset;
+
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			unsigned long pfn = PFN_DOWN(page_array[i + start_off]);
+
+			err = vm_insert_pfn(vma, addr, pfn);
+			if (WARN_ON(err))
+				break;
+
+			addr += PAGE_SIZE;
+		}
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->page_off = start_off;
+	map->region = free_on_close ? reg : NULL;
+	map->kctx = reg->kctx;
+	map->vm_start = vma->vm_start + aligned_offset;
+	if (aligned_offset) {
+		KBASE_DEBUG_ASSERT(!start_off);
+		map->vm_end = map->vm_start + (reg->nr_pages << PAGE_SHIFT);
+	} else {
+		map->vm_end = vma->vm_end;
+	}
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kbase_device_trace_buffer_install(kctx, tb, size);
+		if (err) {
+			vfree(tb);
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->cpu_alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg;
+	void *kaddr = NULL;
+	size_t nr_pages;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+	nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	/* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+	vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		/* SAME_VA stuff, fetch the right region */
+		int gpu_pc_bits;
+		int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+		gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		if (reg->flags & KBASE_REG_ALIGNED) {
+			/* nr_pages must be able to hold alignment pages
+			 * plus actual pages */
+			unsigned long align = 1ULL << gpu_pc_bits;
+			unsigned long extra_pages = 3 * PFN_DOWN(align);
+			unsigned long aligned_addr;
+			unsigned long aligned_addr_end;
+			unsigned long nr_bytes = reg->nr_pages << PAGE_SHIFT;
+
+			if (kctx->api_version < KBASE_API_VERSION(8, 5))
+				/* Maintain compatibility with old userspace */
+				extra_pages = PFN_DOWN(align);
+
+			if (nr_pages != reg->nr_pages + extra_pages) {
+				/* incorrect mmap size */
+				/* leave the cookie for a potential
+				 * later mapping, or to be reclaimed
+				 * later when the context is freed */
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			aligned_addr = ALIGN(vma->vm_start, align);
+			aligned_addr_end = aligned_addr + nr_bytes;
+
+			if (kctx->api_version >= KBASE_API_VERSION(8, 5)) {
+				if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) {
+					/* Can't end at 4GB boundary */
+					aligned_addr += 2 * align;
+				} else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) {
+					/* Can't start at 4GB boundary */
+					aligned_addr += align;
+				}
+			}
+
+			aligned_offset = aligned_addr - vma->vm_start;
+		} else if (reg->nr_pages != nr_pages) {
+			/* incorrect mmap size */
+			/* leave the cookie for a potential later
+			 * mapping, or to be reclaimed later when the
+			 * context is freed */
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		if ((vma->vm_flags & VM_READ &&
+					!(reg->flags & KBASE_REG_CPU_RD)) ||
+				(vma->vm_flags & VM_WRITE &&
+				 !(reg->flags & KBASE_REG_CPU_WR))) {
+			/* VM flags inconsistent with region flags */
+			err = -EPERM;
+			dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+			goto out_unlock;
+		}
+
+		/* adjust down nr_pages to what we have physically */
+		nr_pages = kbase_reg_current_backed_size(reg);
+
+		if (kbase_gpu_mmap(kctx, reg,
+					vma->vm_start + aligned_offset,
+					reg->nr_pages, 1) != 0) {
+			dev_err(dev, "%s:%d\n", __FILE__, __LINE__);
+			/* Unable to map in GPU space. */
+			WARN_ON(1);
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		/* no need for the cookie anymore */
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		/*
+		 * Overwrite the offset with the
+		 * region start_pfn, so we effectively
+		 * map from offset 0 in the region.
+		 */
+		vma->vm_pgoff = reg->start_pfn;
+
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn)))
+				goto overflow;
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (reg->cpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM)
+				goto dma_map;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+
+			goto map;
+		}
+
+overflow:
+		err = -ENOMEM;
+		goto out_unlock;
+	} /* default */
+	} /* switch */
+map:
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+	goto out_unlock;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+dma_map:
+	err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn);
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	phys_addr_t *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+	bool sync_needed;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+	/* Note: enforcing a RO prot_request onto prot is not done, since:
+	 * - CPU-arch-specific integration required
+	 * - kbase_vmap() requires no access checks to be made/enforced */
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
+	sync_needed = map->is_cached;
+
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+
+	if (sync_needed) {
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_CPU);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_CPU);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_CPU);
+		}
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	/* 0 is specified for prot_request to indicate no access checks should
+	 * be made.
+	 *
+	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
+	 * CPU-RO mapping is not enforced to allow this to work */
+	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	bool sync_needed = map->is_cached;
+	vunmap(addr);
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+	if (sync_needed) {
+		off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
+		size_t size = map->size;
+		size_t page_count = PFN_UP(offset + size);
+		size_t i;
+
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_DEVICE);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_DEVICE);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_DEVICE);
+		}
+	}
+	map->gpu_addr = 0;
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->is_cached = false;
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int i;
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	phys_addr_t *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	unsigned long attrs = DMA_ATTR_WRITE_COMBINE;
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	kbase_update_region_flags(kctx, reg, flags);
+
+	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = dma_pa + (i << PAGE_SHIFT);
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+	kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+		       handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100755
index 0000000..6471747
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,216 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment);
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason);
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	u64 gpu_addr;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	phys_addr_t *cpu_pages;
+	phys_addr_t *gpu_pages;
+	void *addr;
+	size_t size;
+	bool is_cached;
+};
+
+
+/**
+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the
+ * requested access permissions are supported
+ * @kctx:         Context the VA range belongs to
+ * @gpu_addr:     Start address of VA range
+ * @size:         Size of VA range
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map:          Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check
+ * whether the region should allow the intended access, and return an error if
+ * disallowed. This is essential for security of imported memory, particularly
+ * a user buf from SHM mapped into the process as RO. In that case, write
+ * access must be checked if the intention is for kernel to write to the
+ * memory.
+ *
+ * The checks are also there to help catch access errors on memory where
+ * security is not a concern: imported memory that is always RW, and memory
+ * that was allocated and owned by the process attached to @kctx. In this case,
+ * it helps to identify memory that was was mapped with the wrong access type.
+ *
+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
+ * where either the security of memory is solely dependent on those flags, or
+ * when userspace code was expecting only the GPU to access the memory (e.g. HW
+ * workarounds).
+ *
+ */
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vmap - Map a GPU VA range into the kernel safely
+ * @kctx:     Context the VA range belongs to
+ * @gpu_addr: Start address of VA range
+ * @size:     Size of VA range
+ * @map:      Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no
+ * checks to ensure the security of e.g. imported user bufs from RO SHM.
+ */
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vunmap - Unmap a GPU VA range from the kernel
+ * @kctx: Context the VA range belongs to
+ * @map:  Structure describing the mapping from the corresponding kbase_vmap()
+ *        call
+ *
+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained
+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * The reference taken on pages during kbase_vmap() is released.
+ */
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100755
index 0000000..441180b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+#endif				/* _KBASE_LOWLEVEL_H */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100755
index 0000000..9a3f9b5
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,594 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+/* This function is only provided for backwards compatibility with kernels
+ * which use the old carveout allocator.
+ *
+ * The forward declaration is to keep sparse happy.
+ */
+int __init kbase_carveout_mem_reserve(
+		phys_addr_t size);
+int __init kbase_carveout_mem_reserve(phys_addr_t size)
+{
+	return 0;
+}
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_for_each_entry(p, page_list, lru) {
+		zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+	}
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			PAGE_SIZE, DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	clear_highpage(p);
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+struct page *kbase_mem_alloc_page(struct kbase_device *kbdev)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct device *dev = kbdev->dev;
+	dma_addr_t dma_addr;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	if (current->flags & PF_KTHREAD) {
+		/* Don't trigger OOM killer from kernel threads, e.g. when
+		 * growing memory on GPU page fault */
+		gfp |= __GFP_NORETRY;
+	}
+
+	p = alloc_page(gfp);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		__free_page(p);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+
+	kbase_set_dma_addr(p, dma_addr);
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+
+	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	kbase_clear_dma_addr(p);
+	__free_page(p);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	for (i = 0; i < nr_to_grow; i++) {
+		p = kbase_mem_alloc_page(pool->kbdev);
+		if (!p)
+			return -ENOMEM;
+		kbase_mem_pool_add(pool, p);
+	}
+
+	return 0;
+}
+
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size > pool->max_size)
+		new_size = pool->max_size;
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		kbase_mem_pool_grow(pool, new_size - cur_size);
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+	pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool));
+	return kbase_mem_pool_size(pool);
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	pool->cur_size = 0;
+	pool->max_size = max_size;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			kbase_mem_pool_zero_page(pool, p);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	do {
+		pool_dbg(pool, "alloc()\n");
+		p = kbase_mem_pool_remove(pool);
+
+		if (p)
+			return p;
+
+		pool = pool->next_pool;
+	} while (pool);
+
+	return NULL;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i;
+	int err = -ENOMEM;
+
+	pool_dbg(pool, "alloc_pages(%zu):\n", nr_pages);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages, kbase_mem_pool_size(pool));
+	for (i = 0; i < nr_from_pool; i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		pages[i] = page_to_phys(p);
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_pages - i, pages + i);
+
+		if (err)
+			goto err_rollback;
+
+		i += nr_pages - i;
+	}
+
+	/* Get any remaining pages from kernel */
+	for (; i < nr_pages; i++) {
+		p = kbase_mem_alloc_page(pool->kbdev);
+		if (!p)
+			goto err_rollback;
+		pages[i] = page_to_phys(p);
+	}
+
+	pool_dbg(pool, "alloc_pages(%zu) done\n", nr_pages);
+
+	return 0;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+		size_t nr_pages, phys_addr_t *pages, bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+
+		if (zero)
+			kbase_mem_pool_zero_page(pool, p);
+		else if (sync)
+			kbase_mem_pool_sync_page(pool, p);
+
+		list_add(&p->lru, &new_page_list);
+		nr_to_pool++;
+		pages[i] = 0;
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+		if (reclaimed)
+			zone_page_state_add(-1, page_zone(p),
+					NR_SLAB_RECLAIMABLE);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = 0;
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100755
index 0000000..585fba0
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <mali_kbase_mem_pool_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_trim(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
+		kbase_mem_pool_debugfs_size_get,
+		kbase_mem_pool_debugfs_size_set,
+		"%llu\n");
+
+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_max_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_set_max_size(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
+		kbase_mem_pool_debugfs_max_size_get,
+		kbase_mem_pool_debugfs_max_size_set,
+		"%llu\n");
+
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100755
index 0000000..1442854
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H
+#define _KBASE_MEM_POOL_DEBUGFS_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool
+ * @parent: Parent debugfs dentry
+ * @pool:   Memory pool to control
+ *
+ * Adds two debugfs files under @parent:
+ * - mem_pool_size: get/set the current size of @pool
+ * - mem_pool_max_size: get/set the max size of @pool
+ */
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100755
index 0000000..092da9a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,119 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+		kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		if (!debugfs_create_file("mem_profile", S_IRUGO,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kbase_ctx_flag_set(kctx,
+					   KCTX_MEM_PROFILE_INITIALIZED);
+		}
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+		err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100755
index 0000000..a1dc2e0
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100755
index 0000000..82f0702
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100755
index 0000000..da1689c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,2059 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_time.h>
+#include <mali_kbase_mem.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ *
+ * If sync is not set then transactions still in flight when the flush is issued
+ * may use the old page tables and the data they write will not be written out
+ * to memory, this function returns after the flush has been issued but
+ * before all accesses which might effect the flushed region have completed.
+ *
+ * If sync is set then accesses in the flushed region will be drained
+ * before data is flush and invalidated through L1, L2 and into memory,
+ * after which point this function will return.
+ */
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync);
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+static size_t make_multiple(size_t minimum, size_t multiple)
+{
+	size_t remainder = minimum % multiple;
+
+	if (remainder == 0)
+		return minimum;
+
+	return minimum + multiple - remainder;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	if (unlikely(faulting_as->protected_mode))
+	{
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Protected mode fault");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+
+		goto fault_done;
+	}
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Translation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		goto fault_done;
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = make_multiple(fault_rel_pfn -
+			kbase_reg_current_backed_size(region) + 1,
+			region->extent);
+
+	/* cap to max vsize */
+	if (new_pages + kbase_reg_current_backed_size(region) >
+			region->nr_pages)
+		new_pages = region->nr_pages -
+				kbase_reg_current_backed_size(region);
+
+	if (0 == new_pages) {
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
+		if (region->gpu_alloc != region->cpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					region->cpu_alloc, new_pages) == 0) {
+				grown = true;
+			} else {
+				kbase_free_phy_pages_helper(region->gpu_alloc,
+						new_pages);
+			}
+		} else {
+			grown = true;
+		}
+	}
+
+
+	if (grown) {
+		u64 pfn_offset;
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
+		/*
+		 * Note:
+		 * Issuing an MMU operation will unlock the MMU and cause the
+		 * translation to be replayed. If the page insertion fails then
+		 * rather then trying to continue the context should be killed
+		 * so the no_flush version of insert_pages is used which allows
+		 * us to unlock the MMU as we see fit.
+		 */
+		err = kbase_mmu_insert_pages_no_flush(kctx,
+				region->start_pfn + pfn_offset,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				new_pages, region->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+		kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* failed to extend, handle as a normal PF */
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Page allocation failure");
+	}
+
+fault_done:
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	p = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!p)
+		goto sub_pages;
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
+ * new table from the pool if needed and possible
+ */
+static int mmu_get_next_pgd(struct kbase_context *kctx,
+		phys_addr_t *pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(*pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(*pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return -EINVAL;
+	}
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return -ENOMEM;
+		}
+
+		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	*pgd = target_pgd;
+
+	return 0;
+}
+
+static int mmu_get_bottom_pgd(struct kbase_context *kctx,
+		u64 vpfn, phys_addr_t *out_pgd)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l);
+		/* Handle failure condition */
+		if (err) {
+			dev_dbg(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
+			return err;
+		}
+	}
+
+	*out_pgd = pgd;
+
+	return 0;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+	/* As we are recovering from what has already been set up, we should have a target_pgd */
+	KBASE_DEBUG_ASSERT(0 != target_pgd);
+	kunmap_atomic(page);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Should never fail */
+		KBASE_DEBUG_ASSERT(0 != pgd);
+	}
+
+	return pgd;
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
+					      size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
+		KBASE_DEBUG_ASSERT(0 != pgd);
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+
+		pgd_page = kmap_atomic(p);
+		KBASE_DEBUG_ASSERT(NULL != pgd_page);
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+		kunmap_atomic(pgd_page);
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys, flags);
+		}
+
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_pages only partially completes we need to be able
+	 * to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys[i], flags);
+		}
+
+		phys += count;
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table. If further pages
+		 * need inserting and fail we need to undo what has already
+		 * taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	int err;
+
+	err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
+ */
+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	u32 op;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+				&kbdev->as[kctx->as_nr],
+				kctx, vpfn, nr, op, 0);
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to
+		 * recover */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * As this function could be called in interrupt context the sync
+	 * request can't block. Instead log the request and the next flush
+	 * request will pick it up.
+	 */
+	if ((!err) && sync &&
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
+		atomic_set(&kctx->drain_pending, 1);
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+#ifndef CONFIG_MALI_NO_MALI
+	bool drain_pending = false;
+
+	if (atomic_xchg(&kctx->drain_pending, 0))
+		drain_pending = true;
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	kbdev = kctx->kbdev;
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		if (!kbase_pm_context_active_handle_suspend(kbdev,
+			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+			int err;
+			u32 op;
+
+			/* AS transaction begin */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+
+			if (sync)
+				op = AS_COMMAND_FLUSH_MEM;
+			else
+				op = AS_COMMAND_FLUSH_PT;
+
+			err = kbase_mmu_hw_do_operation(kbdev,
+						&kbdev->as[kctx->as_nr],
+						kctx, vpfn, nr, op, 0);
+
+#if KBASE_GPU_RESET_EN
+			if (err) {
+				/* Flush failed to complete, assume the
+				 * GPU has hung and perform a reset to
+				 * recover */
+				dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+				if (kbase_prepare_to_reset_gpu(kbdev))
+					kbase_reset_gpu(kbdev);
+			}
+#endif /* KBASE_GPU_RESET_EN */
+
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+			/* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+			/*
+			 * The transaction lock must be dropped before here
+			 * as kbase_wait_write_flush could take it if
+			 * the GPU was powered down (static analysis doesn't
+			 * know this can't happen).
+			 */
+			drain_pending |= (!err) && sync &&
+					kbase_hw_has_issue(kctx->kbdev,
+							BASE_HW_ISSUE_6367);
+			if (drain_pending) {
+				/* Wait for GPU to flush write buffer */
+				kbase_wait_write_flush(kctx);
+			}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+			kbase_pm_context_idle(kbdev);
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
+	 */
+	kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_device *kbdev;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	kbdev = kctx->kbdev;
+	mmu_mode = kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+		if (err) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
+			vpfn, phys, nr);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+					flags);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+/* This is a debug feature only */
+static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
+{
+	u64 *page;
+	int i;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		if (kctx->kbdev->mmu_mode->ate_is_valid(page[i]))
+			beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
+	}
+	kunmap_atomic(page);
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) {
+				mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
+			} else {
+				/*
+				 * So target_pte is a level-3 page.
+				 * As a leaf, it is safe to free it.
+				 * Unless we have live pages attached to it!
+				 */
+				mmu_check_unused(kctx, target_pgd);
+			}
+
+			beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
+			if (zap) {
+				struct page *p = phys_to_page(target_pgd);
+
+				kbase_mem_pool_free(&kctx->mem_pool, p, true);
+				kbase_process_page_usage_dec(kctx, 1);
+				kbase_atomic_sub_pages(1, &kctx->used_pages);
+				kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+			}
+		}
+	}
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	mutex_init(&kctx->mmu_lock);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	mutex_lock(&kctx->mmu_lock);
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+	mutex_unlock(&kctx->mmu_lock);
+
+	beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
+	kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
+	kbase_process_page_usage_dec(kctx, 1);
+	new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i])) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	mutex_lock(&kctx->mmu_lock);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t size;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+		}
+
+
+
+		size = kbasep_mmu_dump_level(kctx,
+				kctx->pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!size)
+			goto fail_free;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+		/* Add on the size for the config */
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4))
+			size += sizeof(config);
+
+
+		if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	if (unlikely(faulting_as->protected_mode))
+	{
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+		atomic_dec(&kbdev->faults_pending);
+		return;
+
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		unsigned long flags;
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Set the MMU into unmapped mode */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_disable(kctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "ACCESS_FLAG";
+		break;
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		e = "ADDRESS_SIZE_FAULT";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		e = "MEMORY_ATTRIBUTES_FAULT";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		return "ATOMIC";
+#else
+		return "UNKNOWN";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status: 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_mmu_disable(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold hwaccess_lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the hwaccess_lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+#if KBASE_GPU_RESET_EN
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+#endif /* KBASE_GPU_RESET_EN */
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		dev_warn(kbdev->dev,
+				"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+				as->number, as->fault_addr,
+				as->fault_extra_addr);
+#else
+		dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+				as->number, as->fault_addr);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
+		WARN_ON(work_pending(&as->work_busfault));
+		queue_work(as->pf_wq, &as->work_busfault);
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
+		WARN_ON(work_pending(&as->work_pagefault));
+		queue_work(as->pf_wq, &as->work_pagefault);
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100755
index 0000000..986e959
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
new file mode 100755
index 0000000..2449c60
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MALI_KBASE_MMU_MODE_
+#define _MALI_KBASE_MMU_MODE_
+
+#include <linux/types.h>
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_context *kctx);
+	void (*get_as_setup)(struct kbase_context *kctx,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate);
+	int (*pte_is_valid)(u64 pte);
+	void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+#endif /* _MALI_KBASE_MMU_MODE_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
new file mode 100644
index 0000000..791f3ed
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -0,0 +1,200 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+/* For valid ATEs bit 1 = (level == 3) ? 1 : 0.
+ * The MMU is only ever configured by the driver so that ATEs
+ * are at level 3, so bit 1 should always be set
+ */
+#define ENTRY_IS_ATE        3ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_ACCESS_RW (1ULL << 6)     /* bits 6:7 */
+#define ENTRY_ACCESS_RO (3ULL << 6)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register.
+	 */
+	setup->memattr =
+		(AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
+			(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
+			(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_WRITE_ALLOC           <<
+			(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF   <<
+			(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_WA         <<
+			(AS_MEMATTR_INDEX_OUTER_WA * 8));
+
+	setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK;
+	setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = 0ULL;
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* Set access flags - note that AArch64 stage 1 does not support
+	 * write-only access, so we use read/write instead
+	 */
+	if (flags & KBASE_REG_GPU_WR)
+		mmu_flags |= ENTRY_ACCESS_RW;
+	else if (flags & KBASE_REG_GPU_RD)
+		mmu_flags |= ENTRY_ACCESS_RO;
+
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+			get_mmu_flags(flags) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const aarch64_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
+{
+	return &aarch64_mode;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100755
index 0000000..683cabb
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated
+	 * memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#else
+	setup->transcfg = 0;
+#endif
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#endif
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+		get_mmu_flags(flags) |
+		ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100755
index 0000000..1a44957
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_fake_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_fake_register);
+
+void kbase_platform_fake_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_fake_unregister);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100755
index 0000000..97d5434
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,205 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_vinstr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1)
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+
+	/* Resume vinstr operation */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100755
index 0000000..37fa247
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100755
index 0000000..7fb674e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
new file mode 100644
index 0000000..c970650
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+
+#include "mali_kbase_regs_history_debugfs.h"
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+#include <linux/debugfs.h>
+
+
+static int regs_history_size_get(void *data, u64 *val)
+{
+	struct kbase_io_history *const h = data;
+
+	*val = h->size;
+
+	return 0;
+}
+
+static int regs_history_size_set(void *data, u64 val)
+{
+	struct kbase_io_history *const h = data;
+
+	return kbase_io_history_resize(h, (u16)val);
+}
+
+
+DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
+		regs_history_size_get,
+		regs_history_size_set,
+		"%llu\n");
+
+
+/**
+ * regs_history_show - show callback for the register access history file.
+ *
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to dump all recent accesses to the GPU registers.
+ *
+ * @return 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int regs_history_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_io_history *const h = sfile->private;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!h->enabled) {
+		seq_puts(sfile, "The register access history is disabled\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	iters = (h->size > h->count) ? h->count : h->size;
+	seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+out:
+	return 0;
+}
+
+
+/**
+ * regs_history_open - open operation for regs_history debugfs file
+ *
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * @return file descriptor
+ */
+static int regs_history_open(struct inode *in, struct file *file)
+{
+	return single_open(file, &regs_history_show, in->i_private);
+}
+
+
+static const struct file_operations regs_history_fops = {
+	.open = &regs_history_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history.enabled);
+	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history, &regs_history_size_fops);
+	debugfs_create_file("regs_history", S_IRUGO,
+			kbdev->mali_debugfs_directory, &kbdev->io_history,
+			&regs_history_fops);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
new file mode 100644
index 0000000..f108370
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Header file for register access history support via debugfs
+ *
+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
+ *
+ * Usage:
+ * - regs_history_enabled: whether recording of register accesses is enabled.
+ *   Write 'y' to enable, 'n' to disable.
+ * - regs_history_size: size of the register history buffer, must be > 0
+ * - regs_history: return the information about last accesses to the registers.
+ */
+
+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H
+#define _KBASE_REGS_HISTORY_DEBUGFS_H
+
+struct kbase_device;
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/**
+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history
+ *
+ * @kbdev: Pointer to kbase_device containing the register history
+ */
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_regs_history_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif  /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100755
index 0000000..84aa331
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1166 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct fragment_job {
+	struct job_descriptor_header header;
+
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_descriptor_header *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next_job._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next_job._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+	job = &frag_job->header;
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->exception_status = JOB_NOT_STARTED;
+
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
+
+	if (job->job_type == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->job_barrier = 1;
+
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
+
+	job->job_index += hw_job_id_offset;
+
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
+	} else {
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
+		else
+			fbd_address = (u64)frag_job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
+
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
+		else
+			jc = job->next_job._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list.value = NULL;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload = NULL;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+	if (KBASE_API_VERSION(10, 3) > replay_atom->kctx->api_version) {
+		base_jd_replay_payload_uk10_2 *payload_uk10_2;
+		u16 tiler_core_req;
+		u16 fragment_core_req;
+
+		payload_uk10_2 = (base_jd_replay_payload_uk10_2 *) payload;
+		memcpy(&tiler_core_req, &payload_uk10_2->tiler_core_req,
+				sizeof(tiler_core_req));
+		memcpy(&fragment_core_req, &payload_uk10_2->fragment_core_req,
+				sizeof(fragment_core_req));
+		payload->tiler_core_req = (u32)(tiler_core_req & 0x7fff);
+		payload->fragment_core_req = (u32)(fragment_core_req & 0x7fff);
+	}
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+
+		int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP;
+		int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC;
+		int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+		int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+
+		if (t_atom_type != BASE_JD_REQ_T) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x",
+			    t_atom_type, BASE_JD_REQ_T);
+		}
+		if (f_atom_type != BASE_JD_REQ_FS) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n",
+			    f_atom_type, BASE_JD_REQ_FS);
+		}
+		if (t_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n");
+		}
+		if (f_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n");
+		}
+
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom, NULL);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_descriptor_header *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+		dump_job_head(kctx, "\njob_head structure:\n", job);
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
+		else
+			job_header = job->next_job._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	/* Don't replay this atom if these issues are not present in the
+	 * hardware */
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) &&
+			!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) {
+		dev_dbg(kbdev->dev, "Hardware does not need replay workaround");
+
+		/* Signal failure to userspace */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+
+		return false;
+	}
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100755
index 0000000..43175c8
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100755
index 0000000..9bff3d2
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100755
index 0000000..4f05bc0
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1504 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#include <linux/syscalls.h>
+#include "mali_kbase_sync.h"
+#endif
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+
+/* Mask to check cache alignment of data structures */
+#define KBASE_CACHE_ALIGNMENT_MASK		((1<<L1_CACHE_SHIFT)-1)
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->queue, &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_del(&katom->queue);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed.
+	 */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule timeout of this atom after a period if it is not active */
+	if (!timer_pending(&kctx->soft_job_timeout)) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_job_timeout_ms);
+		mod_timer(&kctx->soft_job_timeout,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}
+}
+
+static int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*status = *mapped_evt;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	if ((new_status != BASE_JD_SOFT_EVENT_SET) &&
+	    (new_status != BASE_JD_SOFT_EVENT_RESET))
+		return -EINVAL;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*mapped_evt = new_status;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_vmap_struct map;
+	void *user_result;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] was previously in use
+		 * for 'waiting_soft_jobs'.
+		 */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* GPU_WR access is checked on the range for returning the result to
+	 * userspace for the following reasons:
+	 * - security, this is currently how imported user bufs are checked.
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
+	if (!user_result)
+		return 0;
+
+	memcpy(user_result, &data, sizeof(data));
+
+	kbase_vunmap(kctx, &map);
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#ifdef CONFIG_SYNC
+
+static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly) come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static void kbase_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter, struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+
+	kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+	if (kbase_fence_get_status(fence) < 0)
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding kctx->jctx.lock and
+	 * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work.
+	 */
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static int kbase_fence_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	KBASE_DEBUG_ASSERT(NULL != katom->kctx);
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signalled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+		INIT_WORK(&katom->work, kbase_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+	/* The timeout code will add this job to the list of waiting soft jobs.
+	 */
+	kbasep_add_waiting_with_timeout(katom);
+#else
+	kbasep_add_waiting_soft_job(katom);
+#endif
+
+	return 1;
+}
+
+static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+#endif /* CONFIG_SYNC */
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, queue);
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			if (katom->jc == evt) {
+				list_del(&katom->queue);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet.
+				 */
+				cancel_timer = 0;
+			}
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			/* Keep the timer running if fence debug is enabled and
+			 * there are waiting fence jobs.
+			 */
+			cancel_timer = 0;
+			break;
+#endif
+		}
+	}
+
+	if (cancel_timer)
+		del_timer(&kctx->soft_job_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+static char *kbase_fence_debug_status_string(int status)
+{
+	if (status == 0)
+		return "signaled";
+	else if (status > 0)
+		return "active";
+	else
+		return "error";
+}
+
+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep;
+
+		list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) {
+			if (dep->status == KBASE_JD_ATOM_STATE_UNUSED ||
+			    dep->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				continue;
+
+			if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					== BASE_JD_REQ_SOFT_FENCE_TRIGGER) {
+				struct sync_fence *fence = dep->fence;
+				int status = kbase_fence_get_status(fence);
+
+				/* Found blocked trigger fence. */
+				dev_warn(dev,
+					 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+					 kbase_jd_atom_id(kctx, dep),
+					 fence, fence->name,
+					 kbase_fence_debug_status_string(status));
+			}
+
+			kbase_fence_debug_check_atom(dep);
+		}
+	}
+}
+
+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = katom->kctx->kbdev->dev;
+	struct sync_fence *fence = katom->fence;
+	int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms);
+	int status = kbase_fence_get_status(fence);
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+		 kctx->tgid, kctx->id,
+		 kbase_jd_atom_id(kctx, katom),
+		 fence, timeout_ms);
+	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+		 fence, fence->name,
+		 kbase_fence_debug_status_string(status));
+
+	/* Search for blocked trigger atoms */
+	kbase_fence_debug_check_atom(katom);
+
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	/* Dump out the full state of all the Android sync fences.
+	 * The function sync_dump() isn't exported to modules, so force
+	 * sync_fence_wait() to time out to trigger sync_dump().
+	 */
+	sync_fence_wait(fence, 1);
+}
+
+struct kbase_fence_debug_work {
+	struct kbase_jd_atom *katom;
+	struct work_struct work;
+};
+
+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work)
+{
+	struct kbase_fence_debug_work *w = container_of(work,
+			struct kbase_fence_debug_work, work);
+	struct kbase_jd_atom *katom = w->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_fence_debug_wait_timeout(katom);
+	mutex_unlock(&kctx->jctx.lock);
+
+	kfree(w);
+}
+
+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_debug_work *work;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Enqueue fence debug worker. Use job_done_wq to get
+	 * debug print ordered with job completion.
+	 */
+	work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC);
+	/* Ignore allocation failure. */
+	if (work) {
+		work->katom = katom;
+		INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker);
+		queue_work(kctx->jctx.job_done_wq, &work->work);
+	}
+}
+#endif /* CONFIG_MALI_FENCE_DEBUG */
+
+void kbasep_soft_job_timeout_worker(unsigned long data)
+{
+	struct kbase_context *kctx = (struct kbase_context *)data;
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_job_timeout_ms);
+	struct timer_list *timer = &kctx->soft_job_timeout;
+	ktime_t cur_time = ktime_get();
+	bool restarting = false;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(entry,
+				struct kbase_jd_atom, queue);
+		s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time,
+					katom->start_timestamp));
+
+		if (elapsed_time < (s64)timeout_ms) {
+			restarting = true;
+			continue;
+		}
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			/* Take it out of the list to ensure that it
+			 * will be cancelled in all cases
+			 */
+			list_del(&katom->queue);
+
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			INIT_WORK(&katom->work, kbasep_soft_event_complete_job);
+			queue_work(kctx->jctx.job_done_wq, &katom->work);
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			kbase_fence_debug_timeout(katom);
+			break;
+#endif
+		}
+	}
+
+	if (restarting)
+		mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	kbasep_add_waiting_with_timeout(katom);
+
+	return 1;
+}
+
+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+/**
+ * kbase_soft_event_update() - Update soft event state
+ * @kctx: Pointer to context
+ * @event: Event to update
+ * @new_status: New status value of event
+ *
+ * Update the event, and wake up any atoms waiting for the event.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+int kbase_soft_event_update(struct kbase_context *kctx,
+			     u64 event,
+			     unsigned char new_status)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->jctx.lock);
+
+	if (kbasep_write_soft_event_status(kctx, event, new_status)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, event);
+
+out:
+	mutex_unlock(&kctx->jctx.lock);
+
+	return err;
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc;
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+		if (gpu_alloc) {
+			switch (gpu_alloc->type) {
+			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+			{
+				free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->jc = 0;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		katom->jc = 0;
+		goto out_cleanup;
+	}
+	katom->jc = (u64)(uintptr_t)buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret)
+		goto out_cleanup;
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, user_extres.ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		if (NULL == reg || NULL == reg->gpu_alloc ||
+				(reg->flags & KBASE_REG_FREE)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		buffers[i].nr_extres_pages = reg->nr_pages;
+
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages)
+				goto out_unlock;
+			ret = 0;
+			break;
+		}
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+		{
+			dev_warn(katom->kctx->kbdev->dev,
+					"UMP is not supported for debug_copy jobs\n");
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	kfree(buffers);
+	kfree(user_buffers);
+
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	return ret;
+}
+
+static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	if (!gpu_alloc) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+		KBASE_DEBUG_ASSERT(dma_buf->size ==
+				   buf_data->nr_extres_pages * PAGE_SIZE);
+
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, buf_data->nr_extres_pages*PAGE_SIZE,
+#endif
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, buf_data->nr_extres_pages*PAGE_SIZE,
+#endif
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	int ret;
+
+	/* Fail the job if there is no info structure */
+	if (!data) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* If the ID is zero then fail the job */
+	if (info->id == 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & 0x7) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Replace the user pointer with our kernel allocated info structure */
+	katom->jc = (u64)(uintptr_t) info;
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(info);
+fail:
+	katom->jc = 0;
+	return ret;
+}
+
+static void kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+
+	/* The JIT ID is still in use so fail the allocation */
+	if (kctx->jit_alloc[info->id]) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Mark the allocation so we know it's in use even if the
+	 * allocation itself fails.
+	 */
+	kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1;
+
+	/* Create a JIT allocation */
+	reg = kbase_jit_allocate(kctx, info);
+	if (!reg) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Write the address of the JIT allocation to the user provided
+	 * GPU allocation.
+	 */
+	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+			&mapping);
+	if (!ptr) {
+		/*
+		 * Leave the allocation "live" as the JIT free jit will be
+		 * submitted anyway.
+		 */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	*ptr = reg->start_pfn << PAGE_SHIFT;
+	kbase_vunmap(kctx, &mapping);
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	/*
+	 * Bind it to the user provided ID. Do this last so we can check for
+	 * the JIT free racing this JIT alloc job.
+	 */
+	kctx->jit_alloc[info->id] = reg;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 id = (u8) katom->jc;
+
+	/*
+	 * If the ID is zero or it is not in use yet then fail the job.
+	 */
+	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	/*
+	 * If the ID is valid but the allocation request failed still succeed
+	 * this soft job but don't try and free the allocation.
+	 */
+	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+		kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+	kctx->jit_alloc[id] = NULL;
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	/*
+	 * Replace the user pointer with our kernel allocated
+	 * ext_res structure.
+	 */
+	katom->jc = (u64)(uintptr_t) ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (--i > 0) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		KBASE_DEBUG_ASSERT(katom->fence != NULL);
+		katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		/* Release the reference as we don't need it any more */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		return kbase_fence_wait(katom);
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		return kbasep_soft_event_wait(katom);
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return -EINVAL; /* Temporarily disabled */
+		kbase_jit_allocate_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		return -EINVAL; /* Temporarily disabled */
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_fence_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK))
+				return -EINVAL;
+		}
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_stream_create_fence(fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			katom->fence = sync_fence_fdget(fd);
+
+			if (katom->fence == NULL) {
+				/* The only way the fence can be NULL is if userspace closed it for us.
+				 * So we don't need to clear it up */
+				return -EINVAL;
+			}
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				katom->fence = NULL;
+				sys_close(fd);
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			katom->fence = sync_fence_fdget(fence.basep.fd);
+			if (katom->fence == NULL)
+				return -EINVAL;
+		}
+		break;
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_REPLAY:
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signalled, do it now */
+		if (katom->fence) {
+			kbase_fence_trigger(katom, katom->event_code ==
+					BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+			sync_fence_put(katom->fence);
+			katom->fence = NULL;
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release the reference to the fence object */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+#endif				/* CONFIG_SYNC */
+
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		kbasep_remove_waiting_soft_job(katom_iter);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		} else {
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
new file mode 100644
index 0000000..c98762c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
@@ -0,0 +1,23 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include "mali_kbase_strings.h"
+
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
+const char kbase_drv_name[] = KBASE_DRV_NAME;
+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
new file mode 100644
index 0000000..41b8fdb
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
@@ -0,0 +1,19 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+extern const char kbase_drv_name[];
+extern const char kbase_timeline_name[];
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
new file mode 100755
index 0000000..c5fb981
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_SYNC
+
+#include <linux/seq_file.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signalled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signalled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signalled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signalled, 0);
+
+	return tl;
+}
+
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int signalled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signalled = atomic_read(&mtl->signalled);
+
+		diff = signalled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signalling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled);
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100755
index 0000000..820bddc
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,100 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include "sync.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatiblility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+static inline int kbase_fence_get_status(struct sync_fence *fence)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	return fence->status;
+#else
+	return atomic_read(&fence->status);
+#endif
+}
+
+/*
+ * Create a stream object.
+ * Built on top of timeline object.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ */
+int kbase_stream_create(const char *name, int *const out_fd);
+
+/*
+ * Create a fence in a stream object
+ */
+int kbase_stream_create_fence(int tl_fd);
+
+/*
+ * Validate a fd to be a valid fence
+ * No reference is taken.
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ */
+int kbase_fence_validate(int fd);
+
+/* Returns true if the specified timeline is allocated by Mali */
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline);
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name);
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ */
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent);
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ *
+ * If they are signalled in the wrong order then a message will be printed in debug
+ * builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as success.
+ */
+void kbase_sync_signal_pt(struct sync_pt *pt, int result);
+
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
new file mode 100755
index 0000000..b9baa91
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync_user.c
+ *
+ */
+
+#ifdef CONFIG_SYNC
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <mali_kbase_sync.h>
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	BUG_ON(!tl);
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	BUG_ON(!out_fd);
+
+	tl = kbase_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int kbase_stream_create_fence(int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+ out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100755
index 0000000..4c1535f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,2342 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_NDEP_ATOM_ATOM,
+	KBASE_TL_RDEP_ATOM_ATOM,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY,
+	KBASE_TL_ATTRIB_ATOM_STATE,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+	KBASE_TL_EVENT_LPU_SOFTSTOP,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+
+	/* Job dump specific events. */
+	KBASE_JD_GPU_SOFT_RESET
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC,
+	KBASE_AUX_DEVFREQ_TARGET
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: counter tracking stream's autoflush state
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pII",
+		"ctx,ctx_nr,tgid"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_NEW_AS,
+		__stringify(KBASE_TL_NEW_AS),
+		"address space object is created",
+		"@pI",
+		"address_space,as_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"ctx"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_LIFELINK_AS_GPU,
+		__stringify(KBASE_TL_LIFELINK_AS_GPU),
+		"address space is deleted with gpu",
+		"@pp",
+		"address_space,gpu"
+	},
+	{
+		KBASE_TL_RET_CTX_LPU,
+		__stringify(KBASE_TL_RET_CTX_LPU),
+		"context is retained by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pps",
+		"atom,lpu,attrib_match_list"
+	},
+	{
+		KBASE_TL_NRET_CTX_LPU,
+		__stringify(KBASE_TL_NRET_CTX_LPU),
+		"context is released by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_RET_AS_CTX,
+		__stringify(KBASE_TL_RET_AS_CTX),
+		"address space is retained by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_NRET_AS_CTX,
+		__stringify(KBASE_TL_NRET_AS_CTX),
+		"address space is released by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_AS,
+		__stringify(KBASE_TL_RET_ATOM_AS),
+		"atom is retained by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_NRET_ATOM_AS,
+		__stringify(KBASE_TL_NRET_ATOM_AS),
+		"atom is released by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_DEP_ATOM_ATOM,
+		__stringify(KBASE_TL_DEP_ATOM_ATOM),
+		"atom2 depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_NDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
+		"atom2 no longer depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_RDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
+		"resolved dependecy of atom2 depending on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
+		"atom job slot attributes",
+		"@pLLI",
+		"atom,descriptor,affinity,config"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY),
+		"atom priority",
+		"@pI",
+		"atom,prio"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_STATE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_STATE),
+		"atom state",
+		"@pI",
+		"atom,state"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE),
+		"atom caused priority change",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_ATTRIB_AS_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
+		"address space attributes",
+		"@pLLL",
+		"address_space,transtab,memattr,transcfg"
+	},
+	{
+		KBASE_TL_EVENT_LPU_SOFTSTOP,
+		__stringify(KBASE_TL_EVENT_LPU_SOFTSTOP),
+		"softstop event on given lpu",
+		"@p",
+		"lpu"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX),
+		"atom softstopped",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+		__stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE),
+		"atom softstop issued",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_JD_GPU_SOFT_RESET,
+		__stringify(KBASE_JD_GPU_SOFT_RESET),
+		"gpu soft reset",
+		"@p",
+		"gpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@IL",
+		"ctx_nr,page_cnt_change"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@IL",
+		"ctx_nr,page_cnt"
+	},
+	{
+		KBASE_AUX_DEVFREQ_TARGET,
+		__stringify(KBASE_AUX_DEVFREQ_TARGET),
+		"New device frequency target",
+		"@L",
+		"target_freq"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags) __acquires(&stream->lock)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags) __releases(&stream->lock)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @data:  unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(data);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (wb_size > min_size) {
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(f_pos);
+
+	if (!buffer)
+		return -EINVAL;
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw = 0;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the rbi still points to the packet we just processed
+		 * then there was no overflow so we add the copied size to
+		 * copy_len and move rbi on to the next packet
+		 */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = SWTRACE_VERSION; /* protocol version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 0);
+	setup_timer(&autoflush_timer,
+			kbasep_tlstream_autoflush_timer_callback,
+			0);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags)
+{
+	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
+
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) {
+		int rcode;
+
+		*fd = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd) {
+			atomic_set(&kbase_tlstream_enabled, 0);
+			return *fd;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+	} else {
+		*fd = -EBUSY;
+	}
+
+	return 0;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+			sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+			strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t  msg_size =
+			sizeof(msg_id) + sizeof(u64) +
+			sizeof(atom) + sizeof(lpu) + msg_s0;
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_string(
+			buffer, pos, attrib_match_list, msg_s0);
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+		sizeof(jd) + sizeof(affinity) + sizeof(config);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &jd, sizeof(jd));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &affinity, sizeof(affinity));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &config, sizeof(config));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &prio, sizeof(prio));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
+		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transtab, sizeof(transtab));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &memattr, sizeof(memattr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transcfg, sizeof(transcfg));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu)
+{
+	const u32     msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+{
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq)
+{
+	const u32       msg_id = KBASE_AUX_DEVFREQ_TARGET;
+	const size_t    msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(target_freq);
+	unsigned long   flags;
+	char            *buffer;
+	size_t          pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &target_freq, sizeof(target_freq));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100755
index 0000000..e29be71
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,558 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx:  kernel common context
+ * @fd:    timeline stream file descriptor
+ * @flags: timeline stream flags
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) argument fd will contain negative value.
+ *
+ * Return: zero on success (this does not necessarily mean that stream
+ *         descriptor could be returned), negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+#define TL_ATOM_STATE_IDLE 0
+#define TL_ATOM_STATE_READY 1
+#define TL_ATOM_STATE_DONE 2
+#define TL_ATOM_STATE_POSTED 3
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio);
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state);
+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq);
+
+#define TLSTREAM_ENABLED (1 << 31)
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & TLSTREAM_ENABLED)                     \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...)                     \
+	do {                                                            \
+		int enabled = atomic_read(&kbase_tlstream_enabled);     \
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \
+			__kbase_tlstream_##trace_name(__VA_ARGS__);     \
+	} while (0)
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_tl_summary_new_ctx - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_ctx(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
+
+/**
+ * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_gpu(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
+
+/**
+ * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_lpu(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+
+/**
+ * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+
+/**
+ * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary
+ * @as: name of the address space object
+ * @nr: sequential number assigned to this address space
+ *
+ * Function emits a timeline message informing about address space creation.
+ * Address space is created with one attribute: number identifying this
+ * address space.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_new_as(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
+
+/**
+ * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU
+ * @as:  name of the address space object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that address space object
+ * shall be deleted along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define kbase_tlstream_tl_summary_lifelink_as_gpu(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
+
+/**
+ * kbase_tlstream_tl_new_ctx - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+#define kbase_tlstream_tl_new_ctx(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
+
+/**
+ * kbase_tlstream_tl_new_atom - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+#define kbase_tlstream_tl_new_atom(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
+
+/**
+ * kbase_tlstream_tl_del_ctx - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+#define kbase_tlstream_tl_del_ctx(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
+
+/**
+ * kbase_tlstream_tl_del_atom - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+#define kbase_tlstream_tl_del_atom(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
+
+/**
+ * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_ctx_lpu(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
+
+/**
+ * kbase_tlstream_tl_ret_atom_ctx - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_ctx(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
+
+/**
+ * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_lpu(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
+
+/**
+ * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
+ */
+#define kbase_tlstream_tl_nret_ctx_lpu(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
+
+/**
+ * kbase_tlstream_tl_nret_atom_ctx - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+#define kbase_tlstream_tl_nret_atom_ctx(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
+
+/**
+ * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+#define kbase_tlstream_tl_nret_atom_lpu(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
+
+/**
+ * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being held by the context object.
+ */
+#define kbase_tlstream_tl_ret_as_ctx(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
+
+/**
+ * kbase_tlstream_tl_nret_as_ctx - release address space by context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being released by atom.
+ */
+#define kbase_tlstream_tl_nret_as_ctx(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
+
+/**
+ * kbase_tlstream_tl_ret_atom_as - retain atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by address space and must not be deleted unless it is released.
+ */
+#define kbase_tlstream_tl_ret_atom_as(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
+
+/**
+ * kbase_tlstream_tl_nret_atom_as - release atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by address space.
+ */
+#define kbase_tlstream_tl_nret_atom_as(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_ndep_atom_atom - dependency between atoms resolved
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define kbase_tlstream_tl_ndep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_rdep_atom_atom - information about already resolved dependency between atoms
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define kbase_tlstream_tl_rdep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes
+ * @atom:     name of the atom object
+ * @jd:       job descriptor address
+ * @affinity: job affinity
+ * @config:   job config
+ *
+ * Function emits a timeline message containing atom attributes.
+ */
+#define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
+
+/**
+ * kbase_tlstream_tl_attrib_atom_priority - atom priority
+ * @atom: name of the atom object
+ * @prio: atom priority
+ *
+ * Function emits a timeline message containing atom priority.
+ */
+#define kbase_tlstream_tl_attrib_atom_priority(atom, prio) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio)
+
+/**
+ * kbase_tlstream_tl_attrib_atom_state - atom state
+ * @atom:  name of the atom object
+ * @state: atom state
+ *
+ * Function emits a timeline message containing atom state.
+ */
+#define kbase_tlstream_tl_attrib_atom_state(atom, state) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state)
+
+/**
+ * kbase_tlstream_tl_attrib_atom_priority_change - atom caused priority change
+ * @atom:  name of the atom object
+ *
+ * Function emits a timeline message signalling priority change
+ */
+#define kbase_tlstream_tl_attrib_atom_priority_change(atom) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority_change, atom)
+
+/**
+ * kbase_tlstream_tl_attrib_as_config - address space attributes
+ * @as:       assigned address space
+ * @transtab: configuration of the TRANSTAB register
+ * @memattr:  configuration of the MEMATTR register
+ * @transcfg: configuration of the TRANSCFG register (or zero if not present)
+ *
+ * Function emits a timeline message containing address space attributes.
+ */
+#define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
+
+/**
+ * kbase_tlstream_tl_event_atom_softstop_ex
+ * @atom:       atom identifier
+ */
+#define kbase_tlstream_tl_event_atom_softstop_ex(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
+
+/**
+ * kbase_tlstream_tl_event_lpu_softstop
+ * @lpu:        name of the LPU object
+ */
+#define kbase_tlstream_tl_event_lpu_softstop(lpu) \
+	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
+
+/**
+ * kbase_tlstream_tl_event_atom_softstop_issue
+ * @atom:       atom identifier
+ */
+#define kbase_tlstream_tl_event_atom_softstop_issue(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
+
+/**
+ * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset
+ * @gpu:        name of the GPU object
+ *
+ * This imperative tracepoint is specific to job dumping.
+ * Function emits a timeline message indicating GPU soft reset.
+ */
+#define kbase_tlstream_jd_gpu_soft_reset(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
+
+/**
+ * kbase_tlstream_aux_pm_state - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+#define kbase_tlstream_aux_pm_state(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
+ */
+#define kbase_tlstream_aux_pagefault(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
+
+/**
+ * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated
+ *                                 pages is changed
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
+ */
+#define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
+
+/**
+ * kbase_tlstream_aux_devfreq_target - timeline message: new target DVFS
+ *                                     frequency
+ * @target_freq: new target frequency
+ */
+#define kbase_tlstream_aux_devfreq_target(target_freq) \
+	__TRACE_IF_ENABLED(aux_devfreq_target, target_freq)
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100755
index 0000000..e2e0544
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100755
index 0000000..5830e87
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,236 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbase_backend_inspect_head(kbdev, js);
+			WARN_ON(!next_katom);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = true;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the end of the transition */
+	if (kbdev->timeline.l2_transitioning) {
+		kbdev->timeline.l2_transitioning = false;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100755
index 0000000..619072f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,363 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_trace_timeline_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
+				(int)kctx->timeline.owner_tgid,              \
+				count);                                      \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
+				CTX_FLOW_ATOM_READY,                         \
+				(int)kctx->timeline.owner_tgid,              \
+				atom_id);                                    \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_ACTIVE,                      \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_NEXT,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_HEAD,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_STOPPING,                    \
+				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+				js, count);                                  \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_ACTIVE, active);            \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_TILER_ACTIVE,               \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_L2_ACTIVE,                  \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_POWERING,               \
+				1);                                          \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
+				1);                                          \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_SEND_EVENT,                       \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_HANDLE_EVENT,                     \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _consumerof_atom_number);                \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _producerof_atom_number_completed);      \
+	} while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
+				trace_code, 1);                              \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
+				count);                                      \
+	} while (0)
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+		enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100755
index 0000000..156a95a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
+"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain start (SW approximated)", "%d,%d,%d",
+"_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain stop (SW approximated)",  "%d,%d,%d",
+"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
new file mode 100755
index 0000000..0d3e45d
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
@@ -0,0 +1,551 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UKU_H_
+#define _KBASE_UKU_H_
+
+#include "mali_uk.h"
+#include "mali_base_kernel.h"
+
+/* This file needs to support being included from kernel and userside (which use different defines) */
+#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON
+#define SUPPORT_MALI_ERROR_INJECT
+#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */
+#if defined(CONFIG_MALI_NO_MALI)
+#define SUPPORT_MALI_NO_MALI
+#elif defined(MALI_NO_MALI)
+#if MALI_NO_MALI
+#define SUPPORT_MALI_NO_MALI
+#endif
+#endif
+
+#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#include "mali_kbase_gpuprops_types.h"
+
+/*
+ * 10.1:
+ * - Do mmap in kernel for SAME_VA memory allocations rather then
+ *   calling back into the kernel as a 2nd stage of the allocation request.
+ *
+ * 10.2:
+ * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA
+ *   region for use with JIT (ignored on 32-bit platforms)
+ *
+ * 10.3:
+ * - base_jd_core_req typedef-ed to u32 (instead of to u16)
+ * - two flags added: BASE_JD_REQ_SKIP_CACHE_STAT / _END
+ *
+ * 10.4:
+ * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests
+ *
+ * 10.5:
+ * - Reverted to performing mmap in user space so that tools like valgrind work.
+ *
+ * 10.6:
+ * - Add flags input variable to KBASE_FUNC_TLSTREAM_ACQUIRE
+ */
+#define BASE_UK_VERSION_MAJOR 10
+#define BASE_UK_VERSION_MINOR 6
+
+struct kbase_uk_mem_alloc {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	/* IN/OUT */
+	u64 flags;
+	/* OUT */
+	u64 gpu_va;
+	u16 va_alignment;
+	u8  padding[6];
+};
+
+struct kbase_uk_mem_free {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	/* OUT */
+};
+
+struct kbase_uk_mem_alias {
+	union uk_header header;
+	/* IN/OUT */
+	u64 flags;
+	/* IN */
+	u64 stride;
+	u64 nents;
+	union kbase_pointer ai;
+	/* OUT */
+	u64         gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_import {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer phandle;
+	u32 type;
+	u32 padding;
+	/* IN/OUT */
+	u64         flags;
+	/* OUT */
+	u64 gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_flags_change {
+	union uk_header header;
+	/* IN */
+	u64 gpu_va;
+	u64 flags;
+	u64 mask;
+};
+
+struct kbase_uk_job_submit {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer addr;
+	u32 nr_atoms;
+	u32 stride;		/* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */
+	/* OUT */
+};
+
+struct kbase_uk_post_term {
+	union uk_header header;
+};
+
+struct kbase_uk_sync_now {
+	union uk_header header;
+
+	/* IN */
+	struct base_syncset sset;
+
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_setup {
+	union uk_header header;
+
+	/* IN */
+	u64 dump_buffer;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 unused_1; /* keep for backwards compatibility */
+	u32 mmu_l2_bm;
+	u32 padding;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_hwcnt_reader_setup - User/Kernel space data exchange structure
+ * @header:       UK structure header
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ * @fd:           dumping notification file descriptor
+ *
+ * This structure sets up HWC dumper/reader for this context.
+ * Multiple instances can be created for single context.
+ */
+struct kbase_uk_hwcnt_reader_setup {
+	union uk_header header;
+
+	/* IN */
+	u32 buffer_count;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+
+	/* OUT */
+	s32 fd;
+};
+
+struct kbase_uk_hwcnt_dump {
+	union uk_header header;
+};
+
+struct kbase_uk_hwcnt_clear {
+	union uk_header header;
+};
+
+struct kbase_uk_fence_validate {
+	union uk_header header;
+	/* IN */
+	s32 fd;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_stream_create {
+	union uk_header header;
+	/* IN */
+	char name[32];
+	/* OUT */
+	s32 fd;
+	u32 padding;
+};
+
+struct kbase_uk_gpuprops {
+	union uk_header header;
+
+	/* IN */
+	struct mali_base_gpu_props props;
+	/* OUT */
+};
+
+struct kbase_uk_mem_query {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+#define KBASE_MEM_QUERY_COMMIT_SIZE  1
+#define KBASE_MEM_QUERY_VA_SIZE      2
+#define KBASE_MEM_QUERY_FLAGS        3
+	u64         query;
+	/* OUT */
+	u64         value;
+};
+
+struct kbase_uk_mem_commit {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64         pages;
+	/* OUT */
+	u32 result_subcode;
+	u32 padding;
+};
+
+struct kbase_uk_find_cpu_offset {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64 cpu_addr;
+	u64 size;
+	/* OUT */
+	u64 offset;
+};
+
+#define KBASE_GET_VERSION_BUFFER_SIZE 64
+struct kbase_uk_get_ddk_version {
+	union uk_header header;
+	/* OUT */
+	char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE];
+	u32 version_string_size;
+	u32 padding;
+};
+
+struct kbase_uk_disjoint_query {
+	union uk_header header;
+	/* OUT */
+	u32 counter;
+	u32 padding;
+};
+
+struct kbase_uk_set_flags {
+	union uk_header header;
+	/* IN */
+	u32 create_flags;
+	u32 padding;
+};
+
+#if MALI_UNIT_TEST
+#define TEST_ADDR_COUNT 4
+#define KBASE_TEST_BUFFER_SIZE 128
+struct kbase_exported_test_data {
+	u64 test_addr[TEST_ADDR_COUNT];		/**< memory address */
+	u32 test_addr_pages[TEST_ADDR_COUNT];		/**<  memory size in pages */
+	union kbase_pointer kctx;				/**<  base context created by process */
+	union kbase_pointer mm;				/**< pointer to process address space */
+	u8 buffer1[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+	u8 buffer2[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+};
+
+struct kbase_uk_set_test_data {
+	union uk_header header;
+	/* IN */
+	struct kbase_exported_test_data test_data;
+};
+
+#endif				/* MALI_UNIT_TEST */
+
+#ifdef SUPPORT_MALI_ERROR_INJECT
+struct kbase_uk_error_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_error_params params;
+};
+#endif				/* SUPPORT_MALI_ERROR_INJECT */
+
+#ifdef SUPPORT_MALI_NO_MALI
+struct kbase_uk_model_control_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_model_control_params params;
+};
+#endif				/* SUPPORT_MALI_NO_MALI */
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+struct kbase_uk_keep_gpu_powered {
+	union uk_header header;
+	u32       enabled;
+	u32       padding;
+};
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+struct kbase_uk_profiling_controls {
+	union uk_header header;
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+};
+
+struct kbase_uk_debugfs_mem_profile_add {
+	union uk_header header;
+	u32 len;
+	u32 padding;
+	union kbase_pointer buf;
+};
+
+struct kbase_uk_context_id {
+	union uk_header header;
+	/* OUT */
+	int id;
+};
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header: UK structure header
+ * @flags:  timeline stream flags
+ * @fd:     timeline stream file descriptor
+ *
+ * This structure is used when performing a call to acquire kernel side timeline
+ * stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire {
+	union uk_header header;
+	/* IN */
+	u32 flags;
+	/* OUT */
+	s32  fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_acquire_v10_4 - User/Kernel space data exchange
+ *                                          structure
+ * @header: UK structure header
+ * @fd:     timeline stream file descriptor
+ *
+ * This structure is used when performing a call to acquire kernel side timeline
+ * stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire_v10_4 {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+	s32  fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure
+ * @header: UK structure header
+ *
+ * This structure is used when performing a call to flush kernel side
+ * timeline streams.
+ */
+struct kbase_uk_tlstream_flush {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+};
+
+#if MALI_UNIT_TEST
+/**
+ * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure
+ * @header:    UK structure header
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This structure is used when performing a call to start timeline stream test
+ * embedded in kernel.
+ */
+struct kbase_uk_tlstream_test {
+	union uk_header header;
+	/* IN */
+	u32 tpw_count;
+	u32 msg_delay;
+	u32 msg_count;
+	u32 aux_msg;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure
+ * @header:          UK structure header
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ *
+ * This structure is used when performing a call to obtain timeline stream
+ * statistics.
+ */
+struct kbase_uk_tlstream_stats {
+	union uk_header header; /**< UK structure header. */
+	/* IN */
+	/* OUT */
+	u32 bytes_collected;
+	u32 bytes_generated;
+};
+#endif /* MALI_UNIT_TEST */
+#endif /* MALI_MIPE_ENABLED */
+
+/**
+ * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl
+ * @header:          UK structure header
+ * @data:            Counter samples for the dummy model
+ * @size:............Size of the counter sample data
+ */
+struct kbase_uk_prfcnt_values {
+	union uk_header header;
+	/* IN */
+	u32 *data;
+	u32 size;
+};
+
+/**
+ * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @evt:        the GPU address containing the event
+ * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or
+ *              BASE_JD_SOFT_EVENT_RESET
+ * @flags:      reserved for future uses, must be set to 0
+ *
+ * This structure is used to update the status of a software event. If the
+ * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting
+ * on this event will complete.
+ */
+struct kbase_uk_soft_event_update {
+	union uk_header header;
+	/* IN */
+	u64 evt;
+	u32 new_status;
+	u32 flags;
+};
+
+/**
+ * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @va_pages:   Number of virtual pages required for JIT
+ *
+ * This structure is used when requesting initialization of JIT.
+ */
+struct kbase_uk_mem_jit_init {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+};
+
+enum kbase_uk_function_id {
+	KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
+	KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1),
+	KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2),
+	KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3),
+	KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4),
+	KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5),
+	KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6),
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7),
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+	KBASE_FUNC_SYNC  = (UK_FUNC_ID + 8),
+
+	KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9),
+
+	KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10),
+	KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11),
+	KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12),
+
+	KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14),
+
+	KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15),
+
+	KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16),
+	KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18),
+
+	KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19),
+	KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20),
+	KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21),
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	KBASE_FUNC_KEEP_GPU_POWERED = (UK_FUNC_ID + 22),
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23),
+	KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24),
+	KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25),
+	KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26),
+					    /* to be used only for testing
+					    * purposes, otherwise these controls
+					    * are set through gator API */
+
+	KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27),
+	KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28),
+	KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29),
+
+	KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31),
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+	KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4 = (UK_FUNC_ID + 32),
+#if MALI_UNIT_TEST
+	KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
+	KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
+#endif /* MALI_UNIT_TEST */
+	KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35),
+#endif /* MALI_MIPE_ENABLED */
+
+	KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36),
+
+#ifdef SUPPORT_MALI_NO_MALI
+	KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37),
+#endif
+
+	KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38),
+
+	KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39),
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
+	KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 40),
+#endif /* MALI_MIPE_ENABLED */
+
+	KBASE_FUNC_MAX
+};
+
+#endif				/* _KBASE_UKU_H_ */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100755
index 0000000..be474ff
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100755
index 0000000..fd7252d
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100755
index 0000000..3adb06d
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,2040 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/hrtimer.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API        1
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC            1000000000ull /* ns */
+
+/* The time resolution of dumping service. */
+#define DUMPING_RESOLUTION      500000ull /* ns */
+
+/* The maximal supported number of dumping buffers. */
+#define MAX_BUFFER_COUNT        32
+
+/* Size and number of hw counters blocks. */
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+/*****************************************************************************/
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+enum vinstr_state {
+	VINSTR_IDLE,
+	VINSTR_DUMPING,
+	VINSTR_SUSPENDING,
+	VINSTR_SUSPENDED,
+	VINSTR_RESUMING
+};
+
+/**
+ * struct kbase_vinstr_context - vinstr context per device
+ * @lock:              protects the entire vinstr context
+ * @kbdev:             pointer to kbase device
+ * @kctx:              pointer to kbase context
+ * @vmap:              vinstr vmap for mapping hwcnt dump buffer
+ * @gpu_va:            GPU hwcnt dump buffer address
+ * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
+ * @dump_size:         size of the dump buffer in bytes
+ * @bitmap:            current set of counters monitored, not always in sync
+ *                     with hardware
+ * @reprogram:         when true, reprogram hwcnt block with the new set of
+ *                     counters
+ * @state:             vinstr state
+ * @state_lock:        protects information about vinstr state
+ * @suspend_waitq:     notification queue to trigger state re-validation
+ * @suspend_cnt:       reference counter of vinstr's suspend state
+ * @suspend_work:      worker to execute on entering suspended state
+ * @resume_work:       worker to execute on leaving suspended state
+ * @nclients:          number of attached clients, pending or otherwise
+ * @waiting_clients:   head of list of clients being periodically sampled
+ * @idle_clients:      head of list of clients being idle
+ * @suspended_clients: head of list of clients being suspended
+ * @thread:            periodic sampling thread
+ * @waitq:             notification queue of sampling thread
+ * @request_pending:   request for action for sampling thread
+ */
+struct kbase_vinstr_context {
+	struct mutex             lock;
+	struct kbase_device      *kbdev;
+	struct kbase_context     *kctx;
+
+	struct kbase_vmap_struct vmap;
+	u64                      gpu_va;
+	void                     *cpu_va;
+	size_t                   dump_size;
+	u32                      bitmap[4];
+	bool                     reprogram;
+
+	enum vinstr_state        state;
+	struct spinlock          state_lock;
+	wait_queue_head_t        suspend_waitq;
+	unsigned int             suspend_cnt;
+	struct work_struct       suspend_work;
+	struct work_struct       resume_work;
+
+	u32                      nclients;
+	struct list_head         waiting_clients;
+	struct list_head         idle_clients;
+	struct list_head         suspended_clients;
+
+	struct task_struct       *thread;
+	wait_queue_head_t        waitq;
+	atomic_t                 request_pending;
+};
+
+/**
+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
+ * @vinstr_ctx:    vinstr context client is attached to
+ * @list:          node used to attach this client to list in vinstr context
+ * @buffer_count:  number of buffers this client is using
+ * @event_mask:    events this client reacts to
+ * @dump_size:     size of one dump buffer in bytes
+ * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
+ * @accum_buffer:  temporary accumulation buffer for preserving counters
+ * @dump_time:     next time this clients shall request hwcnt dump
+ * @dump_interval: interval between periodic hwcnt dumps
+ * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
+ * @dump_buffers_meta: metadata of dump buffers
+ * @meta_idx:      index of metadata being accessed by userspace
+ * @read_idx:      index of buffer read by userspace
+ * @write_idx:     index of buffer being written by dumping service
+ * @waitq:         client's notification queue
+ * @pending:       when true, client has attached but hwcnt not yet updated
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context        *vinstr_ctx;
+	struct list_head                   list;
+	unsigned int                       buffer_count;
+	u32                                event_mask;
+	size_t                             dump_size;
+	u32                                bitmap[4];
+	void __user                        *legacy_buffer;
+	void                               *kernel_buffer;
+	void                               *accum_buffer;
+	u64                                dump_time;
+	u32                                dump_interval;
+	char                               *dump_buffers;
+	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
+	atomic_t                           meta_idx;
+	atomic_t                           read_idx;
+	atomic_t                           write_idx;
+	wait_queue_head_t                  waitq;
+	bool                               pending;
+};
+
+/**
+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
+ * @hrtimer:    high resolution timer
+ * @vinstr_ctx: vinstr context
+ */
+struct kbasep_vinstr_wake_up_timer {
+	struct hrtimer              hrtimer;
+	struct kbase_vinstr_context *vinstr_ctx;
+};
+
+/*****************************************************************************/
+
+static int kbasep_vinstr_service_task(void *data);
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+		struct file *filp,
+		poll_table  *wait);
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+		struct file   *filp,
+		unsigned int  cmd,
+		unsigned long arg);
+static int kbasep_vinstr_hwcnt_reader_mmap(
+		struct file           *filp,
+		struct vm_area_struct *vma);
+static int kbasep_vinstr_hwcnt_reader_release(
+		struct inode *inode,
+		struct file  *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations vinstr_client_fops = {
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/*****************************************************************************/
+
+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_uk_hwcnt_setup setup;
+	int err;
+
+	setup.dump_buffer = vinstr_ctx->gpu_va;
+	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	setup.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+
+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err) {
+		dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
+				kctx);
+		return;
+	}
+
+	/* Release the context. This had its own Power Manager Active reference. */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference. */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
+}
+
+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	disable_hwcnt(vinstr_ctx);
+	return enable_hwcnt(vinstr_ctx);
+}
+
+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
+}
+
+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
+{
+	size_t dump_size;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else
+#endif /* CONFIG_MALI_NO_MALI */
+	{
+		/* assume v5 for now */
+		base_gpu_props *props = &kbdev->gpu_props.props;
+		u32 nr_l2 = props->l2_props.num_l2_slices;
+		u64 core_mask = props->coherency_info.group[0].core_mask;
+		u32 nr_blocks = fls64(core_mask);
+
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_blocks) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
+
+static int kbasep_vinstr_map_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	u64 flags, nr_pages;
+	u16 va_align = 0;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	nr_pages = PFN_UP(vinstr_ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&vinstr_ctx->gpu_va, &va_align);
+	if (!reg)
+		return -ENOMEM;
+
+	vinstr_ctx->cpu_va = kbase_vmap(
+			kctx,
+			vinstr_ctx->gpu_va,
+			vinstr_ctx->dump_size,
+			&vinstr_ctx->vmap);
+	if (!vinstr_ctx->cpu_va) {
+		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbasep_vinstr_unmap_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+
+	kbase_vunmap(kctx, &vinstr_ctx->vmap);
+	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+}
+
+/**
+ * kbasep_vinstr_create_kctx - create kernel context for vinstr
+ * @vinstr_ctx: vinstr context
+ * Return: zero on success
+ */
+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	unsigned long flags;
+	bool enable_backend = false;
+	int err;
+
+	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
+	if (!vinstr_ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
+	if (err) {
+		kbase_destroy_context(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		kbase_tlstream_tl_new_ctx(
+				vinstr_ctx->kctx,
+				(u32)(vinstr_ctx->kctx->id),
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	/* Don't enable hardware counters if vinstr is suspended.
+	 * Note that vinstr resume code is run under vinstr context lock,
+	 * lower layer will be enabled as needed on resume. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE == vinstr_ctx->state)
+		enable_backend = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (enable_backend)
+		err = enable_hwcnt(vinstr_ctx);
+
+	if (err) {
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	vinstr_ctx->thread = kthread_run(
+			kbasep_vinstr_service_task,
+			vinstr_ctx,
+			"mali_vinstr_service");
+	if (!vinstr_ctx->thread) {
+		disable_hwcnt(vinstr_ctx);
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+
+	/* Release hw counters dumping resources. */
+	vinstr_ctx->thread = NULL;
+	disable_hwcnt(vinstr_ctx);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Inform timeline client about context destruction. */
+	kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+
+	vinstr_ctx->kctx = NULL;
+}
+
+/**
+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
+ *
+ * Return: vinstr opaque client handle or NULL on failure
+ */
+static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
+		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
+		u32 bitmap[4], void *argp, void *kernel_buffer)
+{
+	struct task_struct         *thread = NULL;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	if (buffer_count > MAX_BUFFER_COUNT
+	    || (buffer_count & (buffer_count - 1)))
+		return NULL;
+
+	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->vinstr_ctx   = vinstr_ctx;
+	cli->buffer_count = buffer_count;
+	cli->event_mask   =
+		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
+		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
+	cli->pending      = true;
+
+	hwcnt_bitmap_set(cli->bitmap, bitmap);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
+	vinstr_ctx->reprogram = true;
+
+	/* If this is the first client, create the vinstr kbase
+	 * context. This context is permanently resident until the
+	 * last client exits. */
+	if (!vinstr_ctx->nclients) {
+		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
+		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
+			goto error;
+
+		vinstr_ctx->reprogram = false;
+		cli->pending = false;
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it. We need a per client kernel buffer for accumulating
+	 * the counters. */
+	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	if (!cli->accum_buffer)
+		goto error;
+
+	/* Prepare buffers. */
+	if (cli->buffer_count) {
+		int *fd = (int *)argp;
+		size_t tmp;
+
+		/* Allocate area for buffers metadata storage. */
+		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
+			cli->buffer_count;
+		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
+		if (!cli->dump_buffers_meta)
+			goto error;
+
+		/* Allocate required number of dumping buffers. */
+		cli->dump_buffers = (char *)__get_free_pages(
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(cli->dump_size * cli->buffer_count));
+		if (!cli->dump_buffers)
+			goto error;
+
+		/* Create descriptor for user-kernel data exchange. */
+		*fd = anon_inode_getfd(
+				"[mali_vinstr_desc]",
+				&vinstr_client_fops,
+				cli,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd)
+			goto error;
+	} else if (kernel_buffer) {
+		cli->kernel_buffer = kernel_buffer;
+	} else {
+		cli->legacy_buffer = (void __user *)argp;
+	}
+
+	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->meta_idx, 0);
+	atomic_set(&cli->write_idx, 0);
+	init_waitqueue_head(&cli->waitq);
+
+	vinstr_ctx->nclients++;
+	list_add(&cli->list, &vinstr_ctx->idle_clients);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return cli;
+
+error:
+	kfree(cli->dump_buffers_meta);
+	if (cli->dump_buffers)
+		free_pages(
+				(unsigned long)cli->dump_buffers,
+				get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	if (!vinstr_ctx->nclients && vinstr_ctx->kctx) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+	kfree(cli);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+
+	return NULL;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	struct kbase_vinstr_client  *iter, *tmp;
+	struct task_struct          *thread = NULL;
+	u32 zerobitmap[4] = { 0 };
+	int cli_found = 0;
+
+	KBASE_DEBUG_ASSERT(cli);
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
+		if (iter == cli) {
+			vinstr_ctx->reprogram = true;
+			cli_found = 1;
+			list_del(&iter->list);
+			break;
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->waiting_clients, list) {
+			if (iter == cli) {
+				vinstr_ctx->reprogram = true;
+				cli_found = 1;
+				list_del(&iter->list);
+				break;
+			}
+		}
+	}
+	KBASE_DEBUG_ASSERT(cli_found);
+
+	kfree(cli->dump_buffers_meta);
+	free_pages(
+			(unsigned long)cli->dump_buffers,
+			get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	kfree(cli);
+
+	vinstr_ctx->nclients--;
+	if (!vinstr_ctx->nclients) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
+	u32 i, nr_l2;
+	u64 core_mask;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (0ull != core_mask) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		if (0ull != (core_mask & 1ull)) {
+			/* if block is not reserved update header */
+			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+			*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		}
+		dst += block_size;
+		src += block_size;
+
+		core_mask >>= 1;
+	}
+}
+
+/**
+ * accum_clients - accumulate dumped hw counters for all known clients
+ * @vinstr_ctx: vinstr context
+ */
+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4 = 0;
+
+#ifndef CONFIG_MALI_NO_MALI
+	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ *
+ * Return: timestamp value
+ */
+static u64 kbasep_vinstr_get_timestamp(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_add_dump_request - register client's dumping request
+ * @cli:             requesting client
+ * @waiting_clients: list of pending dumping requests
+ */
+static void kbasep_vinstr_add_dump_request(
+		struct kbase_vinstr_client *cli,
+		struct list_head *waiting_clients)
+{
+	struct kbase_vinstr_client *tmp;
+
+	if (list_empty(waiting_clients)) {
+		list_add(&cli->list, waiting_clients);
+		return;
+	}
+	list_for_each_entry(tmp, waiting_clients, list) {
+		if (tmp->dump_time > cli->dump_time) {
+			list_add_tail(&cli->list, &tmp->list);
+			return;
+		}
+	}
+	list_add_tail(&cli->list, waiting_clients);
+}
+
+/**
+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
+ *                                        dump and accumulate them for known
+ *                                        clients
+ * @vinstr_ctx: vinstr context
+ * @timestamp: pointer where collection timestamp will be recorded
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_collect_and_accumulate(
+		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+{
+	unsigned long flags;
+	int rcode;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		return -EAGAIN;
+	} else {
+		vinstr_ctx->state = VINSTR_DUMPING;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Request HW counters dump.
+	 * Disable preemption to make dump timestamp more accurate. */
+	preempt_disable();
+	*timestamp = kbasep_vinstr_get_timestamp();
+	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
+	preempt_enable();
+
+	if (!rcode)
+		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
+	WARN_ON(rcode);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state)
+	{
+	case VINSTR_SUSPENDING:
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_IDLE;
+		wake_up_all(&vinstr_ctx->suspend_waitq);
+		break;
+	default:
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Accumulate values of collected counters. */
+	if (!rcode)
+		accum_clients(vinstr_ctx);
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
+ *                                  buffer
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_fill_dump_buffer(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	unsigned int write_idx = atomic_read(&cli->write_idx);
+	unsigned int read_idx  = atomic_read(&cli->read_idx);
+
+	struct kbase_hwcnt_reader_metadata *meta;
+	void                               *buffer;
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == cli->buffer_count)
+		return -1;
+	write_idx %= cli->buffer_count;
+
+	/* Fill in dump buffer and its metadata. */
+	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
+	meta   = &cli->dump_buffers_meta[write_idx];
+	meta->timestamp  = timestamp;
+	meta->event_id   = event_id;
+	meta->buffer_idx = write_idx;
+	memcpy(buffer, cli->accum_buffer, cli->dump_size);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
+ *                                         allocated in userspace
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of legacy ioctl interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_legacy(
+		struct kbase_vinstr_client *cli)
+{
+	void __user  *buffer = cli->legacy_buffer;
+	int          rcode;
+
+	/* Copy data to user buffer. */
+	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
+	if (rcode)
+		pr_warn("error while copying buffer to user\n");
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+		struct kbase_vinstr_client *cli)
+{
+	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_reprogram(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	bool suspended = false;
+
+	/* Don't enable hardware counters if vinstr is suspended. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state)
+		suspended = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (suspended)
+		return;
+
+	/* Change to suspended state is done while holding vinstr context
+	 * lock. Below code will then no re-enable the instrumentation. */
+
+	if (vinstr_ctx->reprogram) {
+		struct kbase_vinstr_client *iter;
+
+		if (!reprogram_hwcnt(vinstr_ctx)) {
+			vinstr_ctx->reprogram = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->idle_clients,
+					list)
+				iter->pending = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->waiting_clients,
+					list)
+				iter->pending = false;
+		}
+	}
+}
+
+/**
+ * kbasep_vinstr_update_client - copy accumulated counters to user readable
+ *                               buffer and notify the user
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_update_client(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	int rcode = 0;
+
+	/* Copy collected counters to user readable buffer. */
+	if (cli->buffer_count)
+		rcode = kbasep_vinstr_fill_dump_buffer(
+				cli, timestamp, event_id);
+	else if (cli->kernel_buffer)
+		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
+	else
+		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
+
+	if (rcode)
+		goto exit;
+
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&cli->write_idx);
+	wake_up_interruptible(&cli->waitq);
+
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+exit:
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ *
+ * @hrtimer: high resolution timer
+ *
+ * Return: High resolution timer restart enum.
+ */
+static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
+		struct hrtimer *hrtimer)
+{
+	struct kbasep_vinstr_wake_up_timer *timer =
+		container_of(
+			hrtimer,
+			struct kbasep_vinstr_wake_up_timer,
+			hrtimer);
+
+	KBASE_DEBUG_ASSERT(timer);
+
+	atomic_set(&timer->vinstr_ctx->request_pending, 1);
+	wake_up_all(&timer->vinstr_ctx->waitq);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_service_task - HWC dumping service thread
+ *
+ * @data: Pointer to vinstr context structure.
+ *
+ * Return: Always returns zero.
+ */
+static int kbasep_vinstr_service_task(void *data)
+{
+	struct kbase_vinstr_context        *vinstr_ctx = data;
+	struct kbasep_vinstr_wake_up_timer timer;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	hrtimer_init(&timer.hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer.hrtimer.function = kbasep_vinstr_wake_up_callback;
+	timer.vinstr_ctx       = vinstr_ctx;
+
+	while (!kthread_should_stop()) {
+		struct kbase_vinstr_client *cli = NULL;
+		struct kbase_vinstr_client *tmp;
+		int                        rcode;
+
+		u64              timestamp = kbasep_vinstr_get_timestamp();
+		u64              dump_time = 0;
+		struct list_head expired_requests;
+
+		/* Hold lock while performing operations on lists of clients. */
+		mutex_lock(&vinstr_ctx->lock);
+
+		/* Closing thread must not interact with client requests. */
+		if (current == vinstr_ctx->thread) {
+			atomic_set(&vinstr_ctx->request_pending, 0);
+
+			if (!list_empty(&vinstr_ctx->waiting_clients)) {
+				cli = list_first_entry(
+						&vinstr_ctx->waiting_clients,
+						struct kbase_vinstr_client,
+						list);
+				dump_time = cli->dump_time;
+			}
+		}
+
+		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
+			mutex_unlock(&vinstr_ctx->lock);
+
+			/* Sleep until next dumping event or service request. */
+			if (cli) {
+				u64 diff = dump_time - timestamp;
+
+				hrtimer_start(
+						&timer.hrtimer,
+						ns_to_ktime(diff),
+						HRTIMER_MODE_REL);
+			}
+			wait_event(
+					vinstr_ctx->waitq,
+					atomic_read(
+						&vinstr_ctx->request_pending) ||
+					kthread_should_stop());
+			hrtimer_cancel(&timer.hrtimer);
+			continue;
+		}
+
+		rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
+				&timestamp);
+
+		INIT_LIST_HEAD(&expired_requests);
+
+		/* Find all expired requests. */
+		list_for_each_entry_safe(
+				cli,
+				tmp,
+				&vinstr_ctx->waiting_clients,
+				list) {
+			s64 tdiff =
+				(s64)(timestamp + DUMPING_RESOLUTION) -
+				(s64)cli->dump_time;
+			if (tdiff >= 0ll) {
+				list_del(&cli->list);
+				list_add(&cli->list, &expired_requests);
+			} else {
+				break;
+			}
+		}
+
+		/* Fill data for each request found. */
+		list_for_each_entry_safe(cli, tmp, &expired_requests, list) {
+			/* Ensure that legacy buffer will not be used from
+			 * this kthread context. */
+			BUG_ON(0 == cli->buffer_count);
+			/* Expect only periodically sampled clients. */
+			BUG_ON(0 == cli->dump_interval);
+
+			if (!rcode)
+				kbasep_vinstr_update_client(
+						cli,
+						timestamp,
+						BASE_HWCNT_READER_EVENT_PERIODIC);
+
+			/* Set new dumping time. Drop missed probing times. */
+			do {
+				cli->dump_time += cli->dump_interval;
+			} while (cli->dump_time < timestamp);
+
+			list_del(&cli->list);
+			kbasep_vinstr_add_dump_request(
+					cli,
+					&vinstr_ctx->waiting_clients);
+		}
+
+		/* Reprogram counters set if required. */
+		kbasep_vinstr_reprogram(vinstr_ctx);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	}
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
+ * @cli: pointer to vinstr client structure
+ *
+ * Return: non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+		struct kbase_vinstr_client *cli)
+{
+	KBASE_DEBUG_ASSERT(cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+
+	/* Metadata sanity check. */
+	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @interval: periodic dumping interval (disable periodic dumping if zero)
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+		struct kbase_vinstr_client *cli, u32 interval)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_del(&cli->list);
+
+	cli->dump_interval = interval;
+
+	/* If interval is non-zero, enable periodic dumping for this client. */
+	if (cli->dump_interval) {
+		if (DUMPING_RESOLUTION > cli->dump_interval)
+			cli->dump_interval = DUMPING_RESOLUTION;
+		cli->dump_time =
+			kbasep_vinstr_get_timestamp() + cli->dump_interval;
+
+		kbasep_vinstr_add_dump_request(
+				cli, &vinstr_ctx->waiting_clients);
+
+		atomic_set(&vinstr_ctx->request_pending, 1);
+		wake_up_all(&vinstr_ctx->waitq);
+	} else {
+		list_add(&cli->list, &vinstr_ctx->idle_clients);
+	}
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
+ * @event_id: id of event
+ * Return: event_mask or zero if event is not supported or maskable
+ */
+static u32 kbasep_vinstr_hwcnt_reader_event_mask(
+		enum base_hwcnt_reader_event event_id)
+{
+	u32 event_mask = 0;
+
+	switch (event_id) {
+	case BASE_HWCNT_READER_EVENT_PREJOB:
+	case BASE_HWCNT_READER_EVENT_POSTJOB:
+		/* These event are maskable. */
+		event_mask = (1 << event_id);
+		break;
+
+	case BASE_HWCNT_READER_EVENT_MANUAL:
+	case BASE_HWCNT_READER_EVENT_PERIODIC:
+		/* These event are non-maskable. */
+	default:
+		/* These event are not supported. */
+		break;
+	}
+
+	return event_mask;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to enable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask |= event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to disable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask &= ~event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
+ * @cli:   pointer to vinstr client structure
+ * @hwver: pointer to user buffer where hw version will be stored
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+		struct kbase_vinstr_client *cli, u32 __user *hwver)
+{
+#ifndef CONFIG_MALI_NO_MALI
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+#endif
+
+	u32                         ver = 5;
+
+#ifndef CONFIG_MALI_NO_MALI
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+		ver = 4;
+#endif
+
+	return put_user(ver, hwver);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
+ * @filp:   pointer to file structure
+ * @cmd:    user command
+ * @arg:    command's argument
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	long                       rcode = 0;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+				cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
+		rcode = put_user(
+				(u32)cli->vinstr_ctx->dump_size,
+				(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbase_vinstr_hwc_dump(
+				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbase_vinstr_hwc_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+				cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
+		poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
+ * @filp: pointer to file structure
+ * @vma:  pointer to vma structure
+ * Return: zero on success
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	unsigned long size, addr, pfn, offset;
+	unsigned long vm_size = vma->vm_end - vma->vm_start;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(vma);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	size = cli->buffer_count * cli->dump_size;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if (vm_size > size - offset)
+		return -EINVAL;
+
+	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	pfn = addr >> PAGE_SHIFT;
+
+	return remap_pfn_range(
+			vma,
+			vma->vm_start,
+			pfn,
+			vm_size,
+			vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ * Return always return zero
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+		struct file *filp)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	kbase_vinstr_detach_client(cli);
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
+ * @kbdev: pointer to kbase device structure
+ */
+static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	down(&js_devdata->schedule_sem);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	up(&js_devdata->schedule_sem);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker suspending vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_suspend_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			suspend_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		disable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_SUSPENDED;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * This must happen after vinstr was suspended. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker resuming vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_resume_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			resume_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		enable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_IDLE;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * Note that scheduler state machine might requested re-entry to
+	 * protected mode before vinstr was resumed.
+	 * This must happen after vinstr was release. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
+	if (!vinstr_ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	mutex_init(&vinstr_ctx->lock);
+	spin_lock_init(&vinstr_ctx->state_lock);
+	vinstr_ctx->kbdev = kbdev;
+	vinstr_ctx->thread = NULL;
+	vinstr_ctx->state = VINSTR_IDLE;
+	vinstr_ctx->suspend_cnt = 0;
+	INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
+	INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
+	init_waitqueue_head(&vinstr_ctx->suspend_waitq);
+
+	atomic_set(&vinstr_ctx->request_pending, 0);
+	init_waitqueue_head(&vinstr_ctx->waitq);
+
+	return vinstr_ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	/* Stop service thread first. */
+	if (vinstr_ctx->thread)
+		kthread_stop(vinstr_ctx->thread);
+
+	/* Wait for workers. */
+	flush_work(&vinstr_ctx->suspend_work);
+	flush_work(&vinstr_ctx->resume_work);
+
+	while (1) {
+		struct list_head *list = &vinstr_ctx->idle_clients;
+
+		if (list_empty(list)) {
+			list = &vinstr_ctx->waiting_clients;
+			if (list_empty(list))
+				break;
+		}
+
+		cli = list_first_entry(list, struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		kfree(cli->accum_buffer);
+		kfree(cli);
+		vinstr_ctx->nclients--;
+	}
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	if (vinstr_ctx->kctx)
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	kfree(vinstr_ctx);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup)
+{
+	struct kbase_vinstr_client  *cli;
+	u32                         bitmap[4];
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(setup->buffer_count);
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	cli = kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			setup->buffer_count,
+			bitmap,
+			&setup->fd,
+			NULL);
+
+	if (!cli)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (setup->dump_buffer) {
+		u32 bitmap[4];
+
+		bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+		if (*cli)
+			return -EBUSY;
+
+		*cli = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				(void *)(long)setup->dump_buffer,
+				NULL);
+
+		if (!(*cli))
+			return -ENOMEM;
+	} else {
+		if (!*cli)
+			return -EINVAL;
+
+		kbase_vinstr_detach_client(*cli);
+		*cli = NULL;
+	}
+
+	return 0;
+}
+
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer)
+{
+	u32 bitmap[4];
+
+	if (!vinstr_ctx || !setup || !kernel_buffer)
+		return NULL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	return kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			0,
+			bitmap,
+			NULL,
+			kernel_buffer);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	int                         rcode = 0;
+	struct kbase_vinstr_context *vinstr_ctx;
+	u64                         timestamp;
+	u32                         event_mask;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
+	event_mask = 1 << event_id;
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (event_mask & cli->event_mask) {
+		rcode = kbasep_vinstr_collect_and_accumulate(
+				vinstr_ctx,
+				&timestamp);
+		if (rcode)
+			goto exit;
+
+		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
+		if (rcode)
+			goto exit;
+
+		kbasep_vinstr_reprogram(vinstr_ctx);
+	}
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
+
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	int                         rcode;
+	u64                         unused;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	if (rcode)
+		goto exit;
+	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
+	if (rcode)
+		goto exit;
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	kbasep_vinstr_reprogram(vinstr_ctx);
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		vinstr_ctx->suspend_cnt++;
+		/* overflow shall not happen */
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		ret = 0;
+		break;
+
+	case VINSTR_IDLE:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_SUSPENDING:
+		/* fall through */
+	case VINSTR_RESUMING:
+		break;
+
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
+	if (VINSTR_SUSPENDED == vinstr_ctx->state) {
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		vinstr_ctx->suspend_cnt--;
+		if (0 == vinstr_ctx->suspend_cnt) {
+			vinstr_ctx->state = VINSTR_RESUMING;
+			schedule_work(&vinstr_ctx->resume_work);
+		}
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100755
index 0000000..6207d25
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwcnt_reader.h>
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+/*****************************************************************************/
+
+/**
+ * kbase_vinstr_init() - initialize the vinstr core
+ * @kbdev: kbase device
+ *
+ * Return: pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - terminate the vinstr core
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
+ * @vinstr_ctx: vinstr context
+ * @setup:      reader's configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+		struct kbase_vinstr_context        *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup);
+
+/**
+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
+ * @vinstr_ctx: vinstr context
+ * @cli:        pointer where to store pointer to new vinstr client structure
+ * @setup:      hwc configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count and setup->fd are not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer);
+
+/**
+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
+ * @cli:      pointer to vinstr client
+ * @event_id: id of event that triggered hwcnt dump
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_dump(
+		struct kbase_vinstr_client   *cli,
+		enum base_hwcnt_reader_event event_id);
+
+/**
+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
+ *                          a given kbase context
+ * @cli: pointer to vinstr client
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Return: 0 on success, or negative if state change is in progress
+ *
+ * Warning: This API call is non-generic. It is meant to be used only by
+ *          job scheduler state machine.
+ *
+ * Function initiates vinstr switch to suspended state. Once it was called
+ * vinstr enters suspending state. If function return non-zero value, it
+ * indicates that state switch is not complete and function must be called
+ * again. On state switch vinstr will trigger job scheduler state machine
+ * cycle.
+ */
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_suspend - suspends operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function initiates vinstr switch to suspended state. Then it blocks until
+ * operation is completed.
+ */
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_resume - resumes operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function can be called only if it was preceded by a successful call
+ * to kbase_vinstr_suspend.
+ */
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
+#endif /* _KBASE_VINSTR_H_ */
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100755
index 0000000..5d6b402
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100755
index 0000000..2be06a5
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,189 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100755
index 0000000..9945293
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644
index 0000000..a509cbd
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100755
index 0000000..de3053b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,580 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
+						size */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+#define LATEST_FLUSH            0x038	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+#define GPU_DBGEN               (1 << 8)	/* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+
+#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+
+/* Symbol for default MEMATTR to use */
+
+/* Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100755
index 0000000..bd5f661
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->atom_id,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->count)
+);
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_uk.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100755
index 0000000..841d03f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
new file mode 100755
index 0000000..558657b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	obj-y += $(platform_name)/
+endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100755
index 0000000..8fb4e91
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME
+#
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100755
index 0000000..de8849e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+
+ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-y += platform/devicetree/mali_clock.c
+obj-y += platform/devicetree/mpgpu.c
+obj-y += platform/devicetree/meson_main2.c
+obj-y += platform/devicetree/platform_gx.c
+obj-y += platform/devicetree/scaling.c
+obj-y += mali_kbase_runtime_pm.c
+obj-y += mali_kbase_config_devicetree.c
+else ifeq ($(CONFIG_MALI_MIDGARD),m)
+SRC += platform/devicetree/mali_clock.c
+SRC += platform/devicetree/mpgpu.c
+SRC += platform/devicetree/meson_main2.c
+SRC += platform/devicetree/platform_gx.c
+SRC += platform/devicetree/scaling.c
+SRC += platform/devicetree/mali_kbase_runtime_pm.c
+SRC += platform/devicetree/mali_kbase_config_devicetree.c
+endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
new file mode 100644
index 0000000..35249a5
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
@@ -0,0 +1,653 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)
+
+//disable print
+#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+int mali_pm_statue = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "ao io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	mpdata->dvfs_table_size = 0;
+
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+	dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size);
+	dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n",
+			mpdata->dvfs_table,
+			sizeof(mpdata->dvfs_table[0]));
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+#else
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	//mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+
+	GPU_CLK_DBG();
+	do_gettimeofday(&start);
+	ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+	do_gettimeofday(&end);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	GPU_CLK_DBG();
+	clk_disable_unprepare(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+#if 0
+#ifdef MESON_CPU_VERSION_OPS
+		if (is_meson_gxbbm_cpu()) {
+			if (dvfs_tbl->clk_freq >= GXBBM_MAX_GPU_FREQ)
+				continue;
+		}
+#endif
+#endif
+#if  0
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+#endif
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux");
+#if 0
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+#endif
+	if (IS_ERR(mpdata->clk_mali)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100755
index 0000000..1267b6f
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,109 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/version.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long t83x_static_power(unsigned long voltage)
+{
+#if 0
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+#else
+	return 0;
+#endif
+}
+
+static unsigned long t83x_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+struct devfreq_cooling_ops t83x_model_ops = {
+#else
+struct devfreq_cooling_power t83x_model_ops = {
+#endif
+	.get_static_power = t83x_static_power,
+	.get_dynamic_power = t83x_dynamic_power,
+};
+
+#endif
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100755
index 0000000..13af899
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (750000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (100000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+extern struct kbase_platform_funcs_conf dt_funcs_conf;
+#define PLATFORM_FUNCS (&dt_funcs_conf)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&t83x_model_ops)
+extern struct devfreq_cooling_ops t83x_model_ops;
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100755
index 0000000..3b3f095
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,244 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+void *reg_base_hiubus = NULL;
+u32  override_value_aml = 0;
+static int first = 1;
+
+static void  Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+
+    if ((mask & 0x1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000190, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000194, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a0, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a4, 0xffffffff);    // Power on all cores
+    }
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000180, mask >> 1);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000184, 0x0);    // Power on all cores
+    }
+
+    if ( mask != 0 ) {
+        udelay(10);
+        part1_done = Mali_RdReg(0x0, 0x0, 0x0000020);
+        //printk("%s, %d\n", __func__, __LINE__);
+        while((part1_done ==0)) { part1_done = Mali_RdReg(0x0, 0x0, 0x00000020); udelay(10); }
+        //printk("Mali_pwr_on:gpu_irq : %x\n", part1_done);
+        Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+    }
+}
+
+static void gpu_power_main( struct kbase_device *kbdev) {
+
+    Mali_pwr_on ( 0x7);
+
+    printk("%s, %d\n", __func__, __LINE__);
+}
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret;
+	struct platform_device *pdev = to_platform_device(kbdev->dev);
+	struct resource *reg_res;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+    //printk("20151013, %s, %d\n", __FILE__, __LINE__);
+    if (first == 0) goto ret;
+
+    reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+    if (!reg_res) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) 
+        reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res));
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+
+//JOHNT
+    // Level reset mail
+
+    // Level reset mail
+    //Wr(P_RESET2_MASK, ~(0x1<<14));
+    //Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    //Wr(P_RESET2_LEVEL, 0xffffffff);
+    //Wr(P_RESET0_LEVEL, 0xffffffff);
+
+    value = Rd(RESET0_MASK);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_MASK, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+///////////////
+    value = Rd(RESET2_MASK);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_MASK, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value | ((0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value | ((0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    udelay(10); // OR POLL for reset done
+
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+    gpu_power_main(kbdev);
+    //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n",
+    //        kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus);
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+    first = 0;
+    //printk("%s, %d\n", __FILE__, __LINE__);
+ret:
+	ret = pm_runtime_get_sync(kbdev->dev);
+    udelay(100);
+#if 1
+
+    core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+    l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+    tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+    //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready);
+#endif
+	dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret);
+
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+    //printk("%s, %d\n", __FILE__, __LINE__);
+#if 0
+    iounmap(reg_base_hiubus);
+    reg_base_hiubus = NULL;
+#endif
+	u64 core_ready  = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	u64 l2_ready    = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+	u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+	if (core_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+	if (l2_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+	if (tiler_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+
+	return 0;
+}
+
+void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
+
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
new file mode 100644
index 0000000..b541090
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
new file mode 100644
index 0000000..7687f92
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_defs.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+extern void mali_post_init(void);
+struct kbase_device;
+//static int gpu_dvfs_probe(struct platform_device *pdev)
+int platform_dt_init_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	int err = -1;
+
+	err = mali_meson_init_start(pdev);
+    mali_meson_init_finish(pdev);
+    mpgpu_class_init();
+    mali_post_init();
+    return err;
+}
+
+//static int gpu_dvfs_remove(struct platform_device *pdev)
+void platform_dt_term_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+    mpgpu_class_exit();
+	mali_meson_uninit(pdev);
+
+}
+
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    mali_gpu_utilization_callback(utilisation*255/100);
+    return 1;
+}
+
+struct kbase_platform_funcs_conf dt_funcs_conf = {
+    .platform_init_func = platform_dt_init_func,
+    .platform_term_func = platform_dt_term_func,
+};
+#if 0
+static const struct of_device_id gpu_dvfs_ids[] = {
+	{ .compatible = "meson, gpu-dvfs-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpu_dvfs_ids);
+
+static struct platform_driver gpu_dvfs_driver = {
+	.driver = {
+		.name = "meson-gpu-dvfs",
+		.owner = THIS_MODULE,
+		.of_match_table = gpu_dvfs_ids,
+	},
+	.probe = gpu_dvfs_probe,
+	.remove = gpu_dvfs_remove,
+};
+module_platform_driver(gpu_dvfs_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Amlogic SH, MM");
+MODULE_DESCRIPTION("Driver for the Meson GPU dvfs");
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
new file mode 100644
index 0000000..ca79752
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
@@ -0,0 +1,33 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mpgpu_class_init(void);
+void mpgpu_class_exit(void);
+void mali_gpu_utilization_callback(int utilization_pp);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
new file mode 100644
index 0000000..0cc5035
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
@@ -0,0 +1,359 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+#include "meson_main2.h"
+
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+#if 0
+static ssize_t max_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxpp:%d, maxpp_sysfs:%d, total=%d\n",
+			pmali_plat->scale_info.maxpp, pmali_plat->maxpp_sysfs,
+			pmali_plat->cfg_pp);
+	return sprintf(buf, "%d\n", pmali_plat->cfg_pp);
+}
+
+static ssize_t max_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_pp) || (val < pinfo->minpp))
+		return -EINVAL;
+
+	pmali_plat->maxpp_sysfs = val;
+	pinfo->maxpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minpp);
+}
+
+static ssize_t min_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxpp) || (val < 1))
+		return -EINVAL;
+
+	pinfo->minpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+#endif
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+#if 0
+static ssize_t current_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+	return sprintf(buf, "%d\n", pp);
+}
+
+static ssize_t current_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+
+	get_mali_rt_clkpp(&clk, &pp);
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(clk, val, 1);
+
+	return count;
+}
+#endif
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+#if 0
+	__ATTR(min_pp,		0644, min_pp_read,	min_pp_write),
+	__ATTR(max_pp,		0644, max_pp_read,	max_pp_write),
+#endif
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+#if 0
+	__ATTR(cur_pp,		0644, current_pp_read,	current_pp_write),
+#endif
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+
+int mpgpu_class_init(void)
+{
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+}
+
+void  mpgpu_class_exit(void)
+{
+	class_unregister(&mpgpu_class);
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
new file mode 100644
index 0000000..d16675e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
@@ -0,0 +1,246 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#ifdef CONFIG_GPU_THERMAL
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#include <linux/amlogic/aml_thermal_hw.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq < mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq < mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+                break;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+#ifndef MESON_DRV_BRING
+    return 55;
+#else
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+#endif
+}
+#endif
+#endif
+
+#ifdef CONFIG_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+#ifndef MESON_DRV_BRING
+    return 2;
+#else
+    return mali_executor_get_num_cores_enabled();
+#endif
+}
+#endif
+#endif
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
new file mode 100644
index 0000000..aa8a77a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+
+#if AMLOGIC_GPU_USE_GPPLL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)
+#include <linux/amlogic/amports/gp_pll.h>
+#else
+#include <linux/amlogic/media/clk/gp_pll.h>
+#endif
+#endif
+
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+extern int  mali_pm_statue;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+#if AMLOGIC_GPU_USE_GPPLL
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+#endif
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+#endif
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+#if AMLOGIC_GPU_USE_GPPLL
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+#endif
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+	lastStep = execStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+#endif
+
+}
+#if AMLOGIC_GPU_USE_GPPLL
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+#endif
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+    cores = cores;
+    return 0;
+}
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    if (err < 0) scalingdbg(1, "set pp failed");
+
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+    printk("mali_plat=%p\n", mali_plat);
+	num_cores_enabled = pmali_plat->sc_mpp;
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+#endif
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	40, /* 40% */
+	50, /* 50% */
+	90, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< utilization_pp)
+		ret = enable_one_core();
+	else if (0 < utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(int utilization_gpu, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(int utilization_pp, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization_pp,  ld_up);
+	if (utilization_pp >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization_pp <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(utilization_pp);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(utilization_pp);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
new file mode 100755
index 0000000..5b6ef37
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_juno_soc.o
+
+
+obj-m += juno_mali_opp.o
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
new file mode 100755
index 0000000..ccfd8cc
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/scpi_protocol.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp_add opp_add
+#endif /* Linux >= 3.13 */
+
+
+static int init_juno_opps_from_scpi(struct device *dev)
+{
+	struct scpi_opp *sopp;
+	int i;
+
+	/* Hard coded for Juno. 2 is GPU domain */
+	sopp = scpi_dvfs_get_opps(2);
+	if (IS_ERR_OR_NULL(sopp))
+		return PTR_ERR(sopp);
+
+	for (i = 0; i < sopp->count; i++) {
+		struct scpi_opp_entry *e = &sopp->opp[i];
+
+		dev_info(dev, "Mali OPP from SCPI: %u Hz @ %u mV\n",
+				e->freq_hz, e->volt_mv);
+
+		dev_pm_opp_add(dev, e->freq_hz, e->volt_mv * 1000);
+	}
+
+	return 0;
+}
+
+static int juno_setup_opps(void)
+{
+	struct device_node *np;
+	struct platform_device *pdev;
+	int err;
+
+	np = of_find_node_by_name(NULL, "gpu");
+	if (!np) {
+		pr_err("Failed to find DT entry for Mali\n");
+		return -EFAULT;
+	}
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev) {
+		pr_err("Failed to find device for Mali\n");
+		of_node_put(np);
+		return -EFAULT;
+	}
+
+	err = init_juno_opps_from_scpi(&pdev->dev);
+
+	of_node_put(np);
+
+	return err;
+}
+
+module_init(juno_setup_opps);
+MODULE_LICENSE("GPL");
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
new file mode 100755
index 0000000..c654818
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <linux/thermal.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_smc.h>
+
+/* Versatile Express (VE) Juno Development Platform */
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 65,
+	.mmu_irq_number = 66,
+	.gpu_irq_number = 64,
+	.io_memory_region = {
+			     .start = 0x2D000000,
+			     .end = 0x2D000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+/*
+ * Juno Protected Mode integration
+ */
+
+/* SMC Function Numbers */
+#define JUNO_SMC_PROTECTED_ENTER_FUNC  0xff06
+#define JUNO_SMC_PROTECTED_RESET_FUNC 0xff07
+
+static int juno_protected_mode_enter(struct kbase_device *kbdev)
+{
+	/* T62X in SoC detected */
+	u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+		JUNO_SMC_PROTECTED_ENTER_FUNC, false,
+		0, 0, 0);
+	return ret;
+}
+
+/* TODO: Remove these externs, reset should should be done by the firmware */
+extern void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+extern u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+static int juno_protected_mode_reset(struct kbase_device *kbdev)
+{
+
+	/* T62X in SoC detected */
+	u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+		JUNO_SMC_PROTECTED_RESET_FUNC, false,
+		0, 0, 0);
+
+	/* TODO: Remove this reset, it should be done by the firmware */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	while ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL)
+			& RESET_COMPLETED) != RESET_COMPLETED)
+		;
+
+	return ret;
+}
+
+static bool juno_protected_mode_supported(struct kbase_device *kbdev)
+{
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+
+	/*
+	 * Protected mode is only supported for the built in GPU
+	 * _and_ only if the right firmware is running.
+	 *
+	 * Given that at init time the GPU is not powered up the
+	 * juno_protected_mode_reset function can't be used as
+	 * is needs to access GPU registers.
+	 * However, although we don't want the GPU to boot into
+	 * protected mode we know a GPU reset will be done after
+	 * this function is called so although we set the GPU to
+	 * protected mode it will exit protected mode before the
+	 * driver is ready to run work.
+	 */
+	if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) &&
+			(kbdev->reg_start == 0x2d000000))
+		return juno_protected_mode_enter(kbdev) == 0;
+
+	return false;
+}
+
+struct kbase_protected_ops juno_protected_ops = {
+	.protected_mode_enter = juno_protected_mode_enter,
+	.protected_mode_reset = juno_protected_mode_reset,
+	.protected_mode_supported = juno_protected_mode_supported,
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
new file mode 100755
index 0000000..ab29e9d
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 600000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 600000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (&juno_protected_ops)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+#ifdef CONFIG_DEVFREQ_THERMAL
+extern struct devfreq_cooling_ops juno_model_ops;
+#endif
+extern struct kbase_protected_ops juno_protected_ops;
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100755
index 0000000..7cb3be7
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
new file mode 100755
index 0000000..01f9dfc
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+/**
+ * kbase_platform_fake_register - Entry point for fake platform registration
+ *
+ * This function is called early on in the initialization during execution of
+ * kbase_driver_init.
+ *
+ * Return: 0 to indicate success, non-zero for failure.
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Entry point for fake platform unregistration
+ *
+ * This function is called in the termination during execution of
+ * kbase_driver_exit.
+ */
+void kbase_platform_fake_unregister(void);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100755
index 0000000..084a156
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100755
index 0000000..dc4471b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq()
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq()
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..15ce2bc
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+#include "mali_kbase_config_platform.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100755
index 0000000..4665f98
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START	(0x10000000)
+#define SYS_CFGDATA_OFFSET		(0x000000A0)
+#define SYS_CFGCTRL_OFFSET		(0x000000A4)
+#define SYS_CFGSTAT_OFFSET		(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		(1 << 31)
+#define READ_REG_BIT_VALUE			(0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			(0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		(1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			(1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE	(0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE		(2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		(1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			(1 <<  1)
+
+#define FEED_REG_BIT_MASK			(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+/* the following three values used for reading
+ * HBI value of the LogicTile daughterboard */
+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000)
+#define VE_SYS_PROC_ID1_OFFSET (0x00000088)
+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define VE_DEFAULT_GPU_FREQ_MIN 5000
+#define VE_DEFAULT_GPU_FREQ_MAX 5000
+
+
+#define CPU_CLOCK_SPEED_UNDEFINED (0)
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed
+ * @cpu_clock - the value of CPU clock speed in MHz
+ *
+ * Returns 0 on success, error code otherwise.
+ *
+ * The implementation is platform specific.
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	int err = 0;
+	u32 reg_val = 0;
+	u32 osc2_value = 0;
+	u32 pa_divide = 0;
+	u32 pb_divide = 0;
+	u32 pc_divide = 0;
+	void __iomem *syscfg_reg = NULL;
+	void __iomem *scc_reg = NULL;
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) {
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+
+	/* Init the value in case something goes wrong */
+	*cpu_clock = 0;
+
+	/* Map CPU register into virtual memory */
+	syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+	if (syscfg_reg == NULL) {
+		err = -EIO;
+		goto syscfg_reg_map_failed;
+	}
+
+	scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+	if (scc_reg == NULL) {
+		err = -EIO;
+		goto scc_reg_map_failed;
+	}
+
+	raw_spin_lock(&syscfg_lock);
+
+	/* Read SYS regs - OSC2 */
+	reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET);
+
+	/* Check if there is any other undergoing request */
+	if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) {
+		err = -EBUSY;
+		goto ongoing_request;
+	}
+	/* Reset the CGFGSTAT reg */
+	writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET));
+
+	writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE |
+			DCC_DEFAULT_BIT_VALUE |
+			SYS_CFG_OSC_FUNC_BIT_VALUE |
+			SITE_DEFAULT_BIT_VALUE |
+			BOARD_STACK_POS_DEFAULT_BIT_VALUE |
+			DEVICE_DEFAULT_BIT_VALUE,
+			(syscfg_reg + SYS_CFGCTRL_OFFSET));
+	/* Wait for the transaction to complete */
+	while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) &
+			SYS_CFG_COMPLETE_BIT_VALUE))
+		;
+	/* Read SYS_CFGSTAT Register to get the status of submitted
+	 * transaction */
+	reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET);
+
+	if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+		/* Error while setting register */
+		err = -EIO;
+		goto set_reg_error;
+	}
+
+	osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET);
+	/* Read the SCC CFGRW0 register */
+	reg_val = readl(scc_reg);
+
+	/*
+	 * Select the appropriate feed:
+	 * CFGRW0[0] - CLKOB
+	 * CFGRW0[1] - CLKOC
+	 * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+	 */
+	/* Calculate the  FCLK */
+	if (IS_SINGLE_BIT_SET(reg_val, 0)) {
+		/* CFGRW0[0] - CLKOB */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[10:7] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PB_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 1)) {
+		/* CFGRW0[1] - CLKOC */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[14:11] */
+		pc_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PC_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PC_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {
+		/* CFGRW0[2] - FACLK */
+		/* CFGRW0[18:15] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PA_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[22:19] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PB_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else {
+		err = -EIO;
+	}
+
+set_reg_error:
+ongoing_request:
+	raw_spin_unlock(&syscfg_lock);
+	*cpu_clock /= HZ_IN_MHZ;
+
+	if (!err)
+		cpu_clock_speed = *cpu_clock;
+
+	iounmap(scc_reg);
+
+scc_reg_map_failed:
+	iounmap(syscfg_reg);
+
+syscfg_reg_map_failed:
+
+	return err;
+}
+
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function determines which LogicTile type is used by the platform by
+ * reading the HBI value of the daughterboard which holds the LogicTile:
+ *
+ * 0x217 HBI0217 Virtex-6
+ * 0x192 HBI0192 Virtex-5
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+static u32 kbase_get_platform_logic_tile_type(void)
+{
+	void __iomem *syscfg_reg = NULL;
+	u32 sys_procid1 = 0;
+
+	syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
+	if (NULL != syscfg_reg) {
+		sys_procid1 = readl(syscfg_reg);
+		iounmap(syscfg_reg);
+	}
+
+	return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
+}
+
+u32 kbase_get_platform_min_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MIN;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MIN;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MIN;
+	}
+}
+
+u32 kbase_get_platform_max_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MAX;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MAX;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MAX;
+	}
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100755
index 0000000..da86569
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+/**
+ * Get the minimum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_min_freq(void);
+
+/**
+ * Get the maximum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_max_freq(void);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100755
index 0000000..d9bfabc
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_vexpress.o
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100755
index 0000000..b0490ca
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..3ff0930
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,79 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100755
index 0000000..0cb41ce
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100755
index 0000000..22ffccb
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..76ffe4a
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100755
index 0000000..816dff4
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	/* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+	*cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+	return 0;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100755
index 0000000..23647cc
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100755
index 0000000..5fa9b39
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/midgard/sconscript b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/sconscript
new file mode 100755
index 0000000..7b7ec77
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/midgard/sconscript
@@ -0,0 +1,85 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import sys
+Import('env')
+
+if Glob('tests/sconscript'):
+	SConscript( 'tests/sconscript' )
+
+mock_test = 0
+
+# Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data.
+# For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0.
+if env['platform_config']=='devicetree':
+	fake_platform_device = 0
+else:
+	fake_platform_device = 1
+
+# Source files required for kbase.
+kbase_src = [
+	Glob('*.c'),
+	Glob('backend/*/*.c'),
+	Glob('internal/*/*.c'),
+	Glob('platform/%s/*.c' % env['platform_config']),
+]
+
+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
+	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
+	mock_test = 1
+
+# we need platform config for GPL version using fake platform
+if fake_platform_device==1:
+	# Check if we are compiling for PBX
+	if env.KernelConfigEnabled("CONFIG_MACH_REALVIEW_PBX") and \
+	   env["platform_config"] in {"vexpress", "vexpress_6xvirtex7_10mhz"}:
+		sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n")
+	# if the file platform config file is in the tpip directory then use that, otherwise use the default config directory
+	if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])):
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config']))
+	else:
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config']))
+
+make_args = env.kernel_get_config_defines(ret_list = True,
+                                          fake = fake_platform_device) + [
+	'PLATFORM=%s' % env['platform'],
+	'MALI_ERROR_INJECT_ON=%s' % env['error_inject'],
+	'MALI_KERNEL_TEST_API=%s' % env['unit'],
+	'MALI_UNIT_TEST=%s' % env['unit'],
+	'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
+	'MALI_MOCK_TEST=%s' % mock_test,
+	'MALI_CUSTOMER_RELEASE=%s' % env['release'],
+	'MALI_INSTRUMENTATION_LEVEL=%s' % env['instr'],
+	'MALI_COVERAGE=%s' % env['coverage'],
+	'MALI_BUS_LOG=%s' % env['buslog'],
+]
+
+kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src,
+                              make_args = make_args)
+
+# Add a dependency on kds.ko.
+# Only necessary when KDS is not built into the kernel.
+#
+if env['os'] != 'android':
+	if not env.KernelConfigEnabled("CONFIG_KDS"):
+		env.Depends(kbase, '$STATIC_LIB_PATH/kds.ko')
+
+# need Module.symvers from ump.ko build
+if int(env['ump']) == 1:
+	env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko')
+
+env.KernelObjTarget('kbase', kbase)
+
+env.AppendUnique(BASE=['cutils_linked_list'])
diff --git a/midgard/r14p0/kernel/drivers/gpu/arm/sconscript b/midgard/r14p0/kernel/drivers/gpu/arm/sconscript
new file mode 100755
index 0000000..924511b
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/arm/sconscript
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+
+
+SConscript('midgard/sconscript')
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/Kbuild b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/Kbuild
new file mode 100755
index 0000000..f10d58c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/Kbuild
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+pl111_drm-y +=	pl111_drm_device.o \
+		pl111_drm_connector.o \
+		pl111_drm_crtc.o \
+		pl111_drm_cursor.o \
+		pl111_drm_dma_buf.o \
+		pl111_drm_encoder.o \
+		pl111_drm_fb.o \
+		pl111_drm_gem.o \
+		pl111_drm_pl111.o \
+		pl111_drm_platform.o \
+		pl111_drm_suspend.o \
+		pl111_drm_vma.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/Kconfig b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/Kconfig
new file mode 100755
index 0000000..60b465c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+config DRM_PL111
+	tristate "DRM Support for PL111 CLCD Controller"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the PL111 CLCD controller.
+	  If M is selected the module will be called pl111_drm.
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/Makefile b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/Makefile
new file mode 100755
index 0000000..2869f58
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: pl111_drm
+
+pl111_drm:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_USES_KDS=y CONFIG_DRM_PL111=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
new file mode 100755
index 0000000..d3e0086
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
@@ -0,0 +1,95 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+/**
+ * pl111_clcd_ext.h
+ * Extended CLCD register definitions
+ */
+
+#ifndef PL111_CLCD_EXT_H_
+#define PL111_CLCD_EXT_H_
+
+/* 
+ * PL111 cursor register definitions not defined in the kernel's clcd header.
+ * 
+ * TODO MIDEGL-1718: move to include/linux/amba/clcd.h
+ */
+
+#define CLCD_CRSR_IMAGE				0x00000800
+#define CLCD_CRSR_IMAGE_MAX_WORDS		256
+#define CLCD_CRSR_IMAGE_WORDS_PER_LINE		4
+#define CLCD_CRSR_IMAGE_PIXELS_PER_WORD	16
+
+#define CLCD_CRSR_LBBP_COLOR_MASK	0x00000003
+#define CLCD_CRSR_LBBP_BACKGROUND	0x0
+#define CLCD_CRSR_LBBP_FOREGROUND	0x1
+#define CLCD_CRSR_LBBP_TRANSPARENT	0x2
+#define CLCD_CRSR_LBBP_INVERSE		0x3
+
+
+#define CLCD_CRSR_CTRL			0x00000c00
+#define CLCD_CRSR_CONFIG		0x00000c04
+#define CLCD_CRSR_PALETTE_0		0x00000c08
+#define CLCD_CRSR_PALETTE_1		0x00000c0c
+#define CLCD_CRSR_XY			0x00000c10
+#define CLCD_CRSR_CLIP			0x00000c14
+#define CLCD_CRSR_IMSC			0x00000c20
+#define CLCD_CRSR_ICR			0x00000c24
+#define CLCD_CRSR_RIS			0x00000c28
+#define CLCD_MIS				0x00000c2c
+
+#define CRSR_CTRL_CRSR_ON		(1 << 0)
+#define CRSR_CTRL_CRSR_MAX		3
+#define CRSR_CTRL_CRSR_NUM_SHIFT	4
+#define CRSR_CTRL_CRSR_NUM_MASK		\
+	(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
+#define CRSR_CTRL_CURSOR_0		0
+#define CRSR_CTRL_CURSOR_1		1
+#define CRSR_CTRL_CURSOR_2		2
+#define CRSR_CTRL_CURSOR_3		3
+
+#define CRSR_CONFIG_CRSR_SIZE		(1 << 0)
+#define CRSR_CONFIG_CRSR_FRAME_SYNC	(1 << 1)
+
+#define CRSR_PALETTE_RED_SHIFT		0
+#define CRSR_PALETTE_GREEN_SHIFT	8
+#define CRSR_PALETTE_BLUE_SHIFT		16
+
+#define CRSR_PALETTE_RED_MASK		0x000000ff
+#define CRSR_PALETTE_GREEN_MASK		0x0000ff00
+#define CRSR_PALETTE_BLUE_MASK		0x00ff0000
+#define CRSR_PALETTE_MASK		(~0xff000000)
+
+#define CRSR_XY_MASK			0x000003ff
+#define CRSR_XY_X_SHIFT			0
+#define CRSR_XY_Y_SHIFT			16
+
+#define CRSR_XY_X_MASK			CRSR_XY_MASK
+#define CRSR_XY_Y_MASK			(CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
+
+#define CRSR_CLIP_MASK			0x3f
+#define CRSR_CLIP_X_SHIFT		0
+#define CRSR_CLIP_Y_SHIFT		8
+
+#define CRSR_CLIP_X_MASK		CRSR_CLIP_MASK
+#define CRSR_CLIP_Y_MASK		(CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
+
+#define CRSR_IMSC_CRSR_IM		(1<<0)
+#define CRSR_ICR_CRSR_IC		(1<<0)
+#define CRSR_RIS_CRSR_RIS		(1<<0)
+#define CRSR_MIS_CRSR_MIS		(1<<0)
+
+#endif /* PL111_CLCD_EXT_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100755
index 0000000..64d87b6
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+#define DRIVER_AUTHOR    "ARM Ltd."
+#define DRIVER_NAME      "pl111_drm"
+#define DRIVER_DESC      "DRM module for PL111"
+#define DRIVER_LICENCE   "GPL"
+#define DRIVER_ALIAS     "platform:pl111_drm"
+#define DRIVER_DATE      "20101111"
+#define DRIVER_VERSION   "0.2"
+#define DRIVER_MAJOR      2
+#define DRIVER_MINOR      1
+#define DRIVER_PATCHLEVEL 1
+
+/*
+ * Number of flips allowed in flight at any one time. Any more flips requested
+ * beyond this value will cause the caller to block until earlier flips have
+ * completed.
+ *
+ * For performance reasons, this must be greater than the number of buffers
+ * used in the rendering pipeline. Note that the rendering pipeline can contain
+ * different types of buffer, e.g.:
+ * - 2 final framebuffers
+ * - >2 geometry buffers for GPU use-cases
+ * - >2 vertex buffers for GPU use-cases
+ *
+ * For example, a system using 5 geometry buffers could have 5 flips in flight,
+ * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
+ *
+ * Whilst there may be more intermediate buffers (such as vertex/geometry) than
+ * final framebuffers, KDS is used to ensure that GPU rendering waits for the
+ * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
+ * produce tearing.
+ */
+
+/*
+ * Here, we choose a conservative value. A lower value is most likely
+ * suitable for GPU use-cases.
+ */
+#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
+
+#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
+
+struct pl111_drm_flip_resource;
+
+struct pl111_gem_bo_dma {
+	dma_addr_t fb_dev_addr;
+	void *fb_cpu_addr;
+};
+
+struct pl111_gem_bo_shm {
+	struct page **pages;
+	dma_addr_t *dma_addrs;
+};
+
+struct pl111_gem_bo {
+	struct drm_gem_object gem_object;
+	u32 type;
+	union {
+		struct pl111_gem_bo_dma dma;
+		struct pl111_gem_bo_shm shm;
+	} backing_data;
+	struct sg_table *sgt;
+};
+
+extern struct pl111_drm_dev_private priv;
+
+struct pl111_drm_framebuffer {
+	struct drm_framebuffer fb;
+	struct pl111_gem_bo *bo;
+};
+
+struct pl111_drm_flip_resource {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This is the kds set associated to the dma_buf we want to flip */
+	struct kds_resource_set *kds_res_set;
+#endif
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct list_head link;
+	bool page_flip;
+	struct drm_pending_vblank_event *event;
+};
+
+struct pl111_drm_crtc {
+	struct drm_crtc crtc;
+	int crtc_index;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This protects "old_kds_res_set" and "displaying_fb" */
+	spinlock_t current_displaying_lock;
+	/*
+	 * When a buffer is displayed its associated kds resource
+	 * will be obtained and stored here. Every time a buffer
+	 * flip is completed this old kds set is released and assigned
+	 * the kds set of the new buffer.
+	 */
+	struct kds_resource_set *old_kds_res_set;
+	/*
+	 * Stores which frame buffer is currently being displayed by
+	 * this CRTC or NULL if nothing is being displayed. It is used
+	 * to tell whether we need to obtain a set of kds resources for
+	 * exported buffer objects.
+	 */
+	struct drm_framebuffer *displaying_fb;
+#endif
+	struct drm_display_mode *new_mode;
+	struct drm_display_mode *current_mode;
+	int last_bpp;
+
+	/*
+	 * This spinlock protects "update_queue", "current_update_res"
+	 * and calls to do_flip_to_res() which updates the CLCD base
+	 * registers.
+	 */
+	spinlock_t base_update_lock;
+	/*
+	 * The resource that caused a base address update. Only one can be
+	 * pending, hence it's != NULL if there's a pending update
+	 */
+	struct pl111_drm_flip_resource *current_update_res;
+	/* Queue of things waiting to update the base address */
+	struct list_head update_queue;
+
+	void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
+				struct drm_framebuffer *fb);
+};
+
+struct pl111_drm_connector {
+	struct drm_connector connector;
+};
+
+struct pl111_drm_encoder {
+	struct drm_encoder encoder;
+};
+
+struct pl111_drm_dev_private {
+	struct pl111_drm_crtc *pl111_crtc;
+
+	struct amba_device *amba_dev;
+	unsigned long mmio_start;
+	__u32 mmio_len;
+	void *regs;
+	struct clk *clk;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_callback kds_cb;
+	struct kds_callback kds_obtain_current_cb;
+#endif
+	/*
+	 * Number of flips that were started in show_framebuffer_on_crtc(),
+	 * but haven't completed yet - because we do deferred flipping
+	 */
+	atomic_t nr_flips_in_flight;
+	wait_queue_head_t wait_for_flips;
+
+	/*
+	 * Used to prevent race between pl111_dma_buf_release and
+	 * drm_gem_prime_handle_to_fd
+	 */
+	struct mutex export_dma_buf_lock;
+
+	uint32_t number_crtcs;
+
+	/* Cache for flip resources used to avoid kmalloc on each page flip */
+	struct kmem_cache *page_flip_slab;
+};
+
+enum pl111_cursor_size {
+	CURSOR_32X32,
+	CURSOR_64X64
+};
+
+enum pl111_cursor_sync {
+	CURSOR_SYNC_NONE,
+	CURSOR_SYNC_VSYNC
+};
+
+
+/**
+ * Buffer allocation function which is more flexible than dumb_create(),
+ * it allows passing driver specific flags to control the kind of buffer
+ * to be allocated.
+ */
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+ 
+/****** TODO MIDEGL-1718: this should be moved to uapi/include/drm/pl111_drm.h ********/
+
+/* 
+ * Parameters for different buffer objects:
+ * bit [0]: backing storage
+ *	(0 -> SHM)
+ *	(1 -> DMA)
+ * bit [2:1]: kind of mapping
+ *	(0x0 -> uncached)
+ *	(0x1 -> write combine)
+ *	(0x2 -> cached)
+ */
+#define PL111_BOT_MASK		(0x7)
+#define PL111_BOT_SHM		(0x0 << 0)
+#define PL111_BOT_DMA		(0x1 << 0)
+#define PL111_BOT_UNCACHED	(0x0 << 1)
+#define PL111_BOT_WC		(0x1 << 1)
+#define PL111_BOT_CACHED	(0x2 << 1)
+
+/**
+ * User-desired buffer creation information structure.
+ *
+ * @size: user-desired memory allocation size.
+ *      - this size value would be page-aligned internally.
+ * @flags: user request for setting memory type or cache attributes as a bit op
+ *	- PL111_BOT_DMA / PL111_BOT_SHM
+ *	- PL111_BOT_UNCACHED / PL111_BOT_WC / PL111_BOT_CACHED
+ * @handle: returned a handle to created gem object.
+ *      - this handle will be set by gem module of kernel side.
+ */
+struct drm_pl111_gem_create {
+	uint32_t height;
+	uint32_t width;
+	uint32_t bpp;
+	uint32_t flags;
+	/* handle, pitch, size will be returned */
+	uint32_t handle;
+	uint32_t pitch;
+	uint64_t size;
+};
+
+#define DRM_PL111_GEM_CREATE		0x00
+
+#define DRM_IOCTL_PL111_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + \
+			DRM_PL111_GEM_CREATE, struct drm_pl111_gem_create)
+/****************************************************************************/
+
+#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb))
+
+#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
+
+#define PL111_BO_FROM_GEM(gem_obj) \
+	container_of(gem_obj, struct pl111_gem_bo, gem_object)
+
+#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
+
+#define PL111_ENCODER_FROM_ENCODER(x) \
+	container_of(x, struct pl111_drm_encoder, encoder)
+
+#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
+	container_of(x, struct pl111_drm_connector, connector)
+
+#include "pl111_drm_funcs.h"
+
+#endif /* _PL111_DRM_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
new file mode 100755
index 0000000..c7c3a22
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
@@ -0,0 +1,170 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+
+static struct {
+	int w, h, type;
+} pl111_drm_modes[] = {
+	{ 640, 480,  DRM_MODE_TYPE_PREFERRED},
+	{ 800, 600,  0},
+	{1024, 768,  0},
+	{  -1,  -1, -1}
+};
+
+void pl111_connector_destroy(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+				PL111_CONNECTOR_FROM_CONNECTOR(connector);
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(pl111_connector);
+}
+
+enum drm_connector_status pl111_connector_detect(struct drm_connector
+							*connector, bool force)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return connector_status_connected;
+}
+
+void pl111_connector_dpms(struct drm_connector *connector, int mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+}
+
+struct drm_encoder *
+pl111_connector_helper_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	if (connector->encoder != NULL) {
+		return connector->encoder; /* Return attached encoder */
+	} else {
+		/*
+		 * If there is no attached encoder we choose the best candidate
+		 * from the list.
+		 * For PL111 there is only one encoder so we return the first
+		 * one we find.
+		 * Other h/w would require a suitable criterion below.
+		 */
+		struct drm_encoder *encoder = NULL;
+		struct drm_device *dev = connector->dev;
+
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
+					head) {
+			if (1) { /* criterion ? */
+				break;
+			}
+		}
+		return encoder; /* return best candidate encoder */
+	}
+}
+
+int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+	int i = 0;
+	int count = 0;
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	while (pl111_drm_modes[i].w != -1) {
+		struct drm_display_mode *mode =
+				drm_mode_find_dmt(connector->dev,
+						pl111_drm_modes[i].w,
+						pl111_drm_modes[i].h,
+						60
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+						, false
+#endif
+						);
+
+		if (mode != NULL) {
+			mode->type |= pl111_drm_modes[i].type;
+			drm_mode_probed_add(connector, mode);
+			count++;
+		}
+
+		i++;
+	}
+
+	DRM_DEBUG_KMS("found %d modes\n", count);
+
+	return count;
+}
+
+int pl111_connector_helper_mode_valid(struct drm_connector *connector,
+					struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return MODE_OK;
+}
+
+const struct drm_connector_funcs connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = pl111_connector_destroy,
+	.detect = pl111_connector_detect,
+	.dpms = pl111_connector_dpms,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = pl111_connector_helper_get_modes,
+	.mode_valid = pl111_connector_helper_mode_valid,
+	.best_encoder = pl111_connector_helper_best_encoder,
+};
+
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
+{
+	struct pl111_drm_connector *pl111_connector;
+
+	pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
+					GFP_KERNEL);
+
+	if (pl111_connector == NULL) {
+		pr_err("Failed to allocated pl111_drm_connector\n");
+		return NULL;
+	}
+
+	drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
+				DRM_MODE_CONNECTOR_DVII);
+
+	drm_connector_helper_add(&pl111_connector->connector,
+					&connector_helper_funcs);
+
+	drm_sysfs_connector_add(&pl111_connector->connector);
+
+	return pl111_connector;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
new file mode 100755
index 0000000..ede07ff
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
@@ -0,0 +1,449 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_crtc.c
+ * Implementation of the CRTC functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static int pl111_crtc_num;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
+{
+	struct drm_device *dev = pl111_crtc->crtc.dev;
+	struct pl111_drm_flip_resource *old_flip_res;
+	struct pl111_gem_bo *bo;
+	unsigned long irq_flags;
+	int flips_in_flight;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+
+	/*
+	 * Cache the flip resource that caused the IRQ since it will be
+	 * dispatched later. Early return if the IRQ isn't associated to
+	 * a base register update.
+	 * 
+	 * TODO MIDBASE-2790: disable IRQs when a flip is not pending.
+	 */
+	old_flip_res = pl111_crtc->current_update_res;
+	if (!old_flip_res) {
+		spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+		return;
+	}
+	pl111_crtc->current_update_res = NULL;
+
+	/* Prepare the next flip (if any) of the queue as soon as possible. */
+	if (!list_empty(&pl111_crtc->update_queue)) {
+		struct pl111_drm_flip_resource *flip_res;
+		/* Remove the head of the list */
+		flip_res = list_first_entry(&pl111_crtc->update_queue,
+			struct pl111_drm_flip_resource, link);
+		list_del(&flip_res->link);
+		do_flip_to_res(flip_res);
+		/*
+		 * current_update_res will be set, so guarentees that
+		 * another flip_res coming in gets queued instead of
+		 * handled immediately
+		 */
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	/* Finalize properly the flip that caused the IRQ */
+	DRM_DEBUG_KMS("DRM Finalizing old_flip_res=%p\n", old_flip_res);
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(old_flip_res->fb);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	release_kds_resource_and_display(old_flip_res);
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	/* Release DMA buffer on this flip */
+
+	if (bo->gem_object.export_dma_buf != NULL)
+		dma_buf_put(bo->gem_object.export_dma_buf);
+
+	drm_handle_vblank(dev, pl111_crtc->crtc_index);
+
+	/* Wake up any processes waiting for page flip event */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (old_flip_res->event) {
+		spin_lock_bh(&dev->event_lock);
+		drm_send_vblank_event(dev, pl111_crtc->crtc_index,
+					old_flip_res->event);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#else
+	if (old_flip_res->event) {
+		struct drm_pending_vblank_event *e = old_flip_res->event;
+		struct timeval now;
+		unsigned int seq;
+
+		DRM_DEBUG_KMS("%s: wake up page flip event (%p)\n", __func__,
+				old_flip_res->event);
+
+		spin_lock_bh(&dev->event_lock);
+		seq = drm_vblank_count_and_time(dev, pl111_crtc->crtc_index,
+							&now);
+		e->pipe = pl111_crtc->crtc_index;
+		e->event.sequence = seq;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_add_tail(&e->base.link,
+			&e->base.file_priv->event_list);
+
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#endif
+
+	drm_vblank_put(dev, pl111_crtc->crtc_index);
+
+	/*
+	 * workqueue.c:process_one_work():
+	 * "It is permissible to free the struct work_struct from
+	 *  inside the function that is called from it"
+	 */
+	kmem_cache_free(priv.page_flip_slab, old_flip_res);
+
+	flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
+	if (flips_in_flight == 0 ||
+			flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
+		wake_up(&priv.wait_for_flips);
+
+	DRM_DEBUG_KMS("DRM release flip_res=%p\n", old_flip_res);
+}
+
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
+{
+	struct pl111_drm_flip_resource *flip_res = cb1;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	pl111_crtc->show_framebuffer_cb(cb1, cb2);
+}
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb, bool page_flip,
+				struct drm_pending_vblank_event *event)
+{
+	struct pl111_gem_bo *bo;
+	struct pl111_drm_flip_resource *flip_res;
+	int flips_in_flight;
+	int old_flips_in_flight;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	crtc->fb = fb;
+#else
+	crtc->primary->fb = fb;
+#endif
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	if (bo == NULL) {
+		DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
+		return -EINVAL;
+	}
+
+	/* If this is a full modeset, wait for all outstanding flips to complete
+	 * before continuing. This avoids unnecessary complication from being
+	 * able to queue up multiple modesets and queues of mixed modesets and
+	 * page flips.
+	 *
+	 * Modesets should be uncommon and will not be performant anyway, so
+	 * making them synchronous should have negligible performance impact.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				atomic_read(&priv.nr_flips_in_flight) == 0);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * There can be more 'early display' flips in flight than there are
+	 * buffers, and there is (currently) no explicit bound on the number of
+	 * flips. Hence, we need a new allocation for each one.
+	 *
+	 * Note: this could be optimized down if we knew a bound on the flips,
+	 * since an application can only have so many buffers in flight to be
+	 * useful/not hog all the memory
+	 */
+	flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
+	if (flip_res == NULL) {
+		pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * increment flips in flight, whilst blocking when we reach
+	 * NR_FLIPS_IN_FLIGHT_THRESHOLD
+	 */
+	do {
+		/*
+		 * Note: use of assign-and-then-compare in the condition to set
+		 * flips_in_flight
+		 */
+		int ret = wait_event_killable(priv.wait_for_flips,
+				(flips_in_flight =
+					atomic_read(&priv.nr_flips_in_flight))
+				< NR_FLIPS_IN_FLIGHT_THRESHOLD);
+		if (ret != 0) {
+			kmem_cache_free(priv.page_flip_slab, flip_res);
+			return ret;
+		}
+
+		old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
+					flips_in_flight, flips_in_flight + 1);
+	} while (old_flips_in_flight != flips_in_flight);
+
+	flip_res->fb = fb;
+	flip_res->crtc = crtc;
+	flip_res->page_flip = page_flip;
+	flip_res->event = event;
+	INIT_LIST_HEAD(&flip_res->link);
+	DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+		unsigned long shared[1] = { 0 };
+		struct kds_resource *resource_list[1] = {
+				get_dma_buf_kds_resource(buf) };
+		int err;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+
+		/* Wait for the KDS resource associated with this buffer */
+		err = kds_async_waitall(&flip_res->kds_res_set,
+					&priv.kds_cb, flip_res, fb, 1, shared,
+					resource_list);
+		BUG_ON(err);
+	} else {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+
+		/* No dma-buf attached so just call the callback directly */
+		flip_res->kds_res_set = NULL;
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#else
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+	} else {
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+	}
+
+	/* No dma-buf attached to this so just call the callback directly */
+	{
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#endif
+
+	/* For the same reasons as the wait at the start of this function,
+	 * wait for the modeset to complete before continuing.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				flips_in_flight == 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+			struct drm_pending_vblank_event *event)
+#else
+			struct drm_pending_vblank_event *event,
+			uint32_t flags)
+#endif
+{
+	DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
+			__func__, crtc, fb, event);
+	return show_framebuffer_on_crtc(crtc, fb, true, event);
+}
+
+int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode,
+				int x, int y, struct drm_framebuffer *old_fb)
+{
+	int ret;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+	struct drm_display_mode *duplicated_mode;
+
+	DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
+			adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel);
+#else
+			crtc->primary->fb->bits_per_pixel);
+#endif
+
+	duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
+	if (!duplicated_mode)
+		return -ENOMEM;
+
+	pl111_crtc->new_mode = duplicated_mode;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
+#else
+	ret = show_framebuffer_on_crtc(crtc, crtc->primary->fb, false, NULL);
+#endif
+	if (ret != 0) {
+		pl111_crtc->new_mode = pl111_crtc->current_mode;
+		drm_mode_destroy(crtc->dev, duplicated_mode);
+	}
+
+	return ret;
+}
+
+void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+void pl111_crtc_helper_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+				const struct drm_display_mode *mode,
+#else
+				struct drm_display_mode *mode,
+#endif
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+#ifdef CONFIG_ARCH_VEXPRESS
+	/*
+	 * 1024x768 with more than 16 bits per pixel may not work 
+	 * correctly on Versatile Express due to bandwidth issues
+	 */
+	if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel > 16) {
+#else
+			crtc->primary->fb->bits_per_pixel > 16) {
+#endif
+		DRM_INFO("*WARNING* 1024x768 at > 16 bpp may suffer corruption\n");
+	}
+#endif
+
+	return true;
+}
+
+void pl111_crtc_helper_disable(struct drm_crtc *crtc)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	/* don't disable crtc until no flips in flight as irq will be disabled */
+	ret = wait_event_killable(priv.wait_for_flips, atomic_read(&priv.nr_flips_in_flight) == 0);
+	if(ret) {
+		pr_err("pl111_crtc_helper_disable failed\n");
+		return;
+	}
+	clcd_disable(crtc);
+}
+
+void pl111_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(pl111_crtc);
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+	.cursor_set = pl111_crtc_cursor_set,
+	.cursor_move = pl111_crtc_cursor_move,
+	.set_config = drm_crtc_helper_set_config,
+	.page_flip = pl111_crtc_page_flip,
+	.destroy = pl111_crtc_destroy
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+	.mode_set = pl111_crtc_helper_mode_set,
+	.prepare = pl111_crtc_helper_prepare,
+	.commit = pl111_crtc_helper_commit,
+	.mode_fixup = pl111_crtc_helper_mode_fixup,
+	.disable = pl111_crtc_helper_disable,
+};
+
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
+{
+	struct pl111_drm_crtc *pl111_crtc;
+
+	pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
+	if (pl111_crtc == NULL) {
+		pr_err("Failed to allocated pl111_drm_crtc\n");
+		return NULL;
+	}
+
+	drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
+	drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
+
+	pl111_crtc->crtc_index = pl111_crtc_num;
+	pl111_crtc_num++;
+	pl111_crtc->crtc.enabled = 0;
+	pl111_crtc->last_bpp = 0;
+	pl111_crtc->current_update_res = NULL;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	pl111_crtc->displaying_fb = NULL;
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_lock_init(&pl111_crtc->current_displaying_lock);
+#endif
+	pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
+	INIT_LIST_HEAD(&pl111_crtc->update_queue);
+	spin_lock_init(&pl111_crtc->base_update_lock);
+
+	return pl111_crtc;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
new file mode 100755
index 0000000..4bf20fe
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
@@ -0,0 +1,331 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_cursor.c
+ * Implementation of cursor functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_clcd_ext.h"
+#include "pl111_drm.h"
+
+#define PL111_MAX_CURSOR_WIDTH (64)
+#define PL111_MAX_CURSOR_HEIGHT (64)
+
+#define ARGB_2_LBBP_BINARY_THRESHOLD	(1 << 7)
+#define ARGB_ALPHA_SHIFT		24
+#define ARGB_ALPHA_MASK			(0xff << ARGB_ALPHA_SHIFT)
+#define ARGB_RED_SHIFT			16
+#define ARGB_RED_MASK			(0xff << ARGB_RED_SHIFT)
+#define ARGB_GREEN_SHIFT		8
+#define ARGB_GREEN_MASK			(0xff << ARGB_GREEN_SHIFT)
+#define ARGB_BLUE_SHIFT			0
+#define ARGB_BLUE_MASK			(0xff << ARGB_BLUE_SHIFT)
+
+
+void pl111_set_cursor_size(enum pl111_cursor_size size)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (size == CURSOR_64X64)
+		reg_data |= CRSR_CONFIG_CRSR_SIZE;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (sync == CURSOR_SYNC_VSYNC)
+		reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor(u32 cursor)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
+	reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_enable(bool enable)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	if (enable)
+		reg_data |= CRSR_CTRL_CRSR_ON;
+	else
+		reg_data &= ~CRSR_CTRL_CRSR_ON;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_position(u32 x, u32 y)
+{
+	u32 reg_data = (x & CRSR_XY_MASK) |
+			((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_XY);
+}
+
+void pl111_set_cursor_clipping(u32 x, u32 y)
+{
+	u32 reg_data;
+
+	/* 
+	 * Do not allow setting clipping values larger than
+	 * the cursor size since the cursor is already fully hidden
+	 * when x,y = PL111_MAX_CURSOR_WIDTH.
+	 */
+	if (x > PL111_MAX_CURSOR_WIDTH)
+		x = PL111_MAX_CURSOR_WIDTH;
+	if (y > PL111_MAX_CURSOR_WIDTH)
+		y = PL111_MAX_CURSOR_WIDTH;
+
+	reg_data = (x & CRSR_CLIP_MASK) |
+			((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
+}
+
+void pl111_set_cursor_palette(u32 color0, u32 color1)
+{
+	writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
+	writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
+}
+
+void pl111_cursor_enable(void)
+{
+	pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
+	pl111_set_cursor_size(CURSOR_64X64);
+	pl111_set_cursor_palette(0x0, 0x00ffffff);
+	pl111_set_cursor_enable(true);
+}
+
+void pl111_cursor_disable(void)
+{
+	pl111_set_cursor_enable(false);
+}
+
+/* shift required to locate pixel into the correct position in
+ * a cursor LBBP word, indexed by x mod 16.
+ */
+static const unsigned char
+x_mod_16_to_value_shift[CLCD_CRSR_IMAGE_PIXELS_PER_WORD] = {
+	6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24
+};
+
+/* Pack the pixel value into its correct position in the buffer as specified
+ * for LBBP */
+static inline void
+set_lbbp_pixel(uint32_t *buffer, unsigned int x, unsigned int y,
+				uint32_t value)
+{
+	u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
+	uint32_t shift;
+	uint32_t data;
+
+	shift = x_mod_16_to_value_shift[x % CLCD_CRSR_IMAGE_PIXELS_PER_WORD];
+
+	/* Get the word containing this pixel */
+	cursor_ram = cursor_ram + (x >> CLCD_CRSR_IMAGE_WORDS_PER_LINE) + (y << 2);
+
+	/* Update pixel in cursor RAM */
+	data = readl(cursor_ram);
+	data &= ~(CLCD_CRSR_LBBP_COLOR_MASK << shift);
+	data |= value << shift;
+	writel(data, cursor_ram);
+}
+
+static u32 pl111_argb_to_lbbp(u32 argb_pix)
+{
+	u32 lbbp_pix = CLCD_CRSR_LBBP_TRANSPARENT;
+	u32 alpha = (argb_pix & ARGB_ALPHA_MASK) >> ARGB_ALPHA_SHIFT;
+	u32 red = (argb_pix & ARGB_RED_MASK) >> ARGB_RED_SHIFT;
+	u32 green = (argb_pix & ARGB_GREEN_MASK) >> ARGB_GREEN_SHIFT;
+	u32 blue =  (argb_pix & ARGB_BLUE_MASK) >> ARGB_BLUE_SHIFT;
+
+	/*
+	 * Converting from 8 pixel transparency to binary transparency
+	 * it's the best we can achieve.
+	 */
+	if (alpha & ARGB_2_LBBP_BINARY_THRESHOLD) {
+		u32 gray, max, min;
+
+		/*
+		 * Convert to gray using the lightness method:
+		 * gray = [max(R,G,B) + min(R,G,B)]/2
+		 */
+		min = min(red, green);
+		min = min(min, blue);
+		max = max(red, green);
+		max = max(max, blue);
+		gray = (min + max) >> 1; /* divide by 2 */
+		/* Apply binary threshold to the gray value calculated */
+		if (gray & ARGB_2_LBBP_BINARY_THRESHOLD)
+			lbbp_pix = CLCD_CRSR_LBBP_FOREGROUND;
+		else
+			lbbp_pix = CLCD_CRSR_LBBP_BACKGROUND;
+	}
+
+	return lbbp_pix;
+}
+
+/*
+ * The PL111 hardware cursor supports only LBBP which is a 2bpp format but
+ * the cursor format from userspace is ARGB8888 so we need to convert
+ *  to LBBP here.
+ */
+static void pl111_set_cursor_image(u32 *data)
+{
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+	/* Add 1 on width to insert trailing NULL */
+	char string_cursor[PL111_MAX_CURSOR_WIDTH + 1];
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	unsigned int x;
+	unsigned int y;
+
+	for (y = 0; y < PL111_MAX_CURSOR_HEIGHT; y++) {
+		for (x = 0; x < PL111_MAX_CURSOR_WIDTH; x++) {
+			u32 value = pl111_argb_to_lbbp(*data);
+
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+			if (value == CLCD_CRSR_LBBP_TRANSPARENT)
+				string_cursor[x] = 'T';
+			else if (value == CLCD_CRSR_LBBP_FOREGROUND)
+				string_cursor[x] = 'F';
+			else if (value == CLCD_CRSR_LBBP_INVERSE)
+				string_cursor[x] = 'I';
+			else
+				string_cursor[x] = 'B';
+
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+			set_lbbp_pixel(data, x, y, value);
+			++data;
+		}
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+		string_cursor[PL111_MAX_CURSOR_WIDTH] = '\0';
+		DRM_INFO("%s\n", string_cursor);
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	}
+}
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height)
+{
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("handle = %u, width = %u, height = %u\n",
+		handle, width, height);
+
+	if (!handle) {
+		pl111_cursor_disable();
+		return 0;
+	}
+
+	if ((width != PL111_MAX_CURSOR_WIDTH) ||
+	    (height != PL111_MAX_CURSOR_HEIGHT))
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object for handle = %d\n",
+			  handle);
+		return -ENOENT;
+	}
+
+	/*
+	 * We expect a PL111_MAX_CURSOR_WIDTH x PL111_MAX_CURSOR_HEIGHT
+	 * ARGB888 buffer object in the input.
+	 *
+	 */
+	if (obj->size < (PL111_MAX_CURSOR_WIDTH * PL111_MAX_CURSOR_HEIGHT * 4)) {
+		DRM_ERROR("Cannot set cursor with an obj size = %d\n",
+			  obj->size);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	bo = PL111_BO_FROM_GEM(obj);
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Tried to set cursor with non DMA backed obj = %p\n",
+			  obj);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
+
+	/*
+	 * Since we copy the contents of the buffer to the HW cursor internal
+	 * memory this GEM object is not needed anymore.
+	 */
+	drm_gem_object_unreference_unlocked(obj);
+
+	pl111_cursor_enable();
+
+	return 0;
+}
+
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y)
+{
+	int x_clip = 0;
+	int y_clip = 0;
+
+	DRM_DEBUG("x %d y %d\n", x, y);
+
+	/*
+	 * The cursor image is clipped automatically at the screen limits when
+	 * it extends beyond the screen image to the right or bottom but
+	 * we must clip it using pl111 HW features for negative values.
+	 */
+	if (x < 0) {
+		x_clip = -x;
+		x = 0;
+	}
+	if (y < 0) {
+		y_clip = -y;
+		y = 0;
+	}
+
+	pl111_set_cursor_clipping(x_clip, y_clip);
+	pl111_set_cursor_position(x, y);
+
+	return 0;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
new file mode 100755
index 0000000..6619c07
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
@@ -0,0 +1,338 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_device.c
+ * Implementation of the Linux device driver entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+struct pl111_drm_dev_private priv;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void initial_kds_obtained(void *cb1, void *cb2)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
+	bool *cb_has_called = (bool *) cb2;
+
+	*cb_has_called = true;
+	wake_up(wait);
+}
+
+/* Must be called from within current_displaying_lock spinlock */
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	pl111_crtc->displaying_fb = flip_res->fb;
+
+	/* Release the previous buffer */
+	if (pl111_crtc->old_kds_res_set != NULL) {
+		/*
+		 * Can flip to the same buffer, but must not release the current
+		 * resource set
+		 */
+		BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+	}
+	/* Record the current buffer, to release on the next buffer flip */
+	pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
+}
+#endif
+
+void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+void pl111_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+/*
+ * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
+ * and disable_vblank are just no op callbacks.
+ */
+static int pl111_enable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+	return 0;
+}
+
+static void pl111_disable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+}
+
+struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = pl111_fb_create,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	struct pl111_drm_connector *pl111_connector;
+	struct pl111_drm_encoder *pl111_encoder;
+	int ret = 0;
+
+	if (priv == NULL)
+		return -EINVAL;
+
+	drm_mode_config_init(dev);
+	mode_config = &dev->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 1024;
+	mode_config->min_height = 1;
+	mode_config->max_height = 768;
+
+	priv->pl111_crtc = pl111_crtc_create(dev);
+	if (priv->pl111_crtc == NULL) {
+		pr_err("Failed to create pl111_drm_crtc\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	priv->number_crtcs = 1;
+
+	pl111_connector = pl111_connector_create(dev);
+	if (pl111_connector == NULL) {
+		pr_err("Failed to create pl111_drm_connector\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	pl111_encoder = pl111_encoder_create(dev, 1);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to create pl111_drm_encoder\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
+						&pl111_encoder->encoder);
+	if (ret != 0) {
+		DRM_ERROR("Failed to attach encoder\n");
+		goto out_config;
+	}
+
+	pl111_connector->connector.encoder = &pl111_encoder->encoder;
+
+	pl111_encoder->encoder.crtc = &priv->pl111_crtc->crtc;
+
+	goto finish;
+
+out_config:
+	drm_mode_config_cleanup(dev);
+finish:
+	DRM_DEBUG("%s returned %d\n", __func__, ret);
+	return ret;
+}
+
+static void pl111_modeset_fini(struct drm_device *dev)
+{
+	drm_mode_config_cleanup(dev);
+}
+
+static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
+{
+	int ret = 0;
+
+	pr_info("DRM %s\n", __func__);
+
+	mutex_init(&priv.export_dma_buf_lock);
+	atomic_set(&priv.nr_flips_in_flight, 0);
+	init_waitqueue_head(&priv.wait_for_flips);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
+	if (ret != 0) {
+		pr_err("Failed to initialise KDS callback\n");
+		goto finish;
+	}
+
+	ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
+				initial_kds_obtained);
+	if (ret != 0) {
+		pr_err("Failed to init KDS obtain callback\n");
+		kds_callback_term(&priv.kds_cb);
+		goto finish;
+	}
+#endif
+
+	/* Create a cache for page flips */
+	priv.page_flip_slab = kmem_cache_create("page flip slab",
+			sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
+	if (priv.page_flip_slab == NULL) {
+		DRM_ERROR("Failed to create slab\n");
+		ret = -ENOMEM;
+		goto out_kds_callbacks;
+	}
+
+	dev->dev_private = &priv;
+
+	ret = pl111_modeset_init(dev);
+	if (ret != 0) {
+		pr_err("Failed to init modeset\n");
+		goto out_slab;
+	}
+
+	ret = pl111_device_init(dev);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init MMIO and IRQ\n");
+		goto out_modeset;
+	}
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init vblank\n");
+		goto out_vblank;
+	}
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 13, 0))
+	platform_set_drvdata(dev->platformdev, dev);
+#endif
+
+	goto finish;
+
+out_vblank:
+	pl111_device_fini(dev);
+out_modeset:
+	pl111_modeset_fini(dev);
+out_slab:
+	kmem_cache_destroy(priv.page_flip_slab);
+out_kds_callbacks:
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+finish:
+	DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
+	return ret;
+}
+
+static int pl111_drm_unload(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+	kmem_cache_destroy(priv.page_flip_slab);
+
+	drm_vblank_cleanup(dev);
+	pl111_modeset_fini(dev);
+	pl111_device_fini(dev);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+	return 0;
+}
+
+static struct vm_operations_struct pl111_gem_vm_ops = {
+	.fault = pl111_gem_fault,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+#else
+	.open = pl111_gem_vm_open,
+	.close = pl111_gem_vm_close,
+#endif
+};
+
+static const struct file_operations drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = pl111_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+};
+
+static struct drm_ioctl_desc pl111_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(PL111_GEM_CREATE, pl111_drm_gem_create_ioctl,
+		DRM_CONTROL_ALLOW | DRM_UNLOCKED),
+};
+
+static struct drm_driver driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.load = pl111_drm_load,
+	.unload = pl111_drm_unload,
+	.context_dtor = NULL,
+	.preclose = pl111_drm_preclose,
+	.lastclose = pl111_drm_lastclose,
+	.suspend = pl111_drm_suspend,
+	.resume = pl111_drm_resume,
+	.get_vblank_counter = drm_vblank_count,
+	.enable_vblank = pl111_enable_vblank,
+	.disable_vblank = pl111_disable_vblank,
+	.ioctls = pl111_ioctls,
+	.fops = &drm_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+	.dumb_create = pl111_dumb_create,
+	.dumb_destroy = pl111_dumb_destroy,
+	.dumb_map_offset = pl111_dumb_map_offset,
+	.gem_free_object = pl111_gem_free_object,
+	.gem_vm_ops = &pl111_gem_vm_ops,
+	.prime_handle_to_fd = &pl111_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = &pl111_gem_prime_export,
+	.gem_prime_import = &pl111_gem_prime_import,
+};
+
+int pl111_drm_init(struct platform_device *dev)
+{
+	int ret;
+	pr_info("DRM %s\n", __func__);
+	pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
+		DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
+	driver.num_ioctls = ARRAY_SIZE(pl111_ioctls);
+	ret = 0;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 15, 0))
+	driver.kdriver.platform_device = dev;
+#endif
+	return drm_platform_init(&driver, dev);
+
+}
+
+void pl111_drm_exit(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_platform_exit(&driver, dev);
+#else
+	drm_put_dev(platform_get_drvdata(dev));
+#endif
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
new file mode 100755
index 0000000..1131f46
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
@@ -0,0 +1,625 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_dma_buf.c
+ * Implementation of the dma_buf functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void obtain_kds_if_currently_displayed(struct drm_device *dev,
+						struct pl111_gem_bo *bo,
+						struct dma_buf *dma_buf)
+{
+	unsigned long shared[1] = { 0 };
+	struct kds_resource *resource_list[1];
+	struct kds_resource_set *kds_res_set;
+	struct drm_crtc *crtc;
+	bool cb_has_called = false;
+	unsigned long flags;
+	int err;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	DRM_DEBUG_KMS("Obtaining initial KDS res for bo:%p dma_buf:%p\n",
+			bo, dma_buf);
+
+	resource_list[0] = get_dma_buf_kds_resource(dma_buf);
+	get_dma_buf(dma_buf);
+
+	/*
+	 * Can't use kds_waitall(), because kbase will be let through due to
+	 * locked ignore'
+	 */
+	err = kds_async_waitall(&kds_res_set,
+				&priv.kds_obtain_current_cb, &wake,
+				&cb_has_called, 1, shared, resource_list);
+	BUG_ON(err);
+	wait_event(wake, cb_has_called == true);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+
+			if (pl111_fb->bo == bo) {
+				DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+				DRM_DEBUG_KMS(" is being displayed, keeping\n");
+				/* There shouldn't be a previous buffer to release */
+				BUG_ON(pl111_crtc->old_kds_res_set);
+
+				if (kds_res_set == NULL) {
+					err = kds_async_waitall(&kds_res_set,
+							&priv.kds_obtain_current_cb,
+							&wake, &cb_has_called,
+							1, shared, resource_list);
+					BUG_ON(err);
+					wait_event(wake, cb_has_called == true);
+				}
+
+				/* Current buffer will need releasing on next flip */
+				pl111_crtc->old_kds_res_set = kds_res_set;
+
+				/*
+				* Clear kds_res_set, so a new kds_res_set is allocated
+				* for additional CRTCs
+				*/
+				kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+
+	/* kds_res_set will be NULL here if any CRTCs are displaying fb */
+	if (kds_res_set != NULL) {
+		DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+		DRM_DEBUG_KMS(" not being displayed, discarding\n");
+		/* They're not being displayed, release them */
+		kds_resource_set_release(&kds_res_set);
+	}
+
+	dma_buf_put(dma_buf);
+}
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0))
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	mutex_unlock(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#else
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/* Check for valid size. */
+	if (obj->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	BUG_ON(!dev->driver->gem_vm_ops);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+#endif
+
+	vma->vm_ops = dev->driver->gem_vm_ops;
+	vma->vm_private_data = obj;
+
+	/* Take a ref for this mapping of the object, so that the fault
+	* handler can dereference the mmap offset's pointer to the object.
+	* This reference is cleaned up by the corresponding vm_close
+	* (which should happen whether the vma was created by this call, or
+	* by a vm_open due to mremap or partial unmap or whatever).
+	*/
+	drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+	pl111_drm_vm_open_locked(dev, vma);
+#else
+	drm_vm_open_locked(dev, vma);
+#endif
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#endif /* KERNEL_VERSION */
+
+static void pl111_dma_buf_release(struct dma_buf *buf)
+{
+	/*
+	 * Need to release the dma_buf's reference on the gem object it was
+	 * exported from, and also clear the gem object's export_dma_buf
+	 * pointer to this dma_buf as it no longer exists
+	 */
+	struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
+	struct pl111_gem_bo *bo;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+			if (pl111_fb->bo == bo) {
+				kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+				pl111_crtc->old_kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+#endif
+	mutex_lock(&priv.export_dma_buf_lock);
+
+	obj->export_dma_buf = NULL;
+	drm_gem_object_unreference_unlocked(obj);
+
+	mutex_unlock(&priv.export_dma_buf_lock);
+}
+
+static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
+			dev, attach);
+
+	attach->priv = dev;
+
+	return 0;
+}
+
+static void pl111_dma_buf_detach(struct dma_buf *buf,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
+			attach);
+}
+
+/* Heavily from exynos_drm_dmabuf.c */
+static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
+						*attach,
+						enum dma_data_direction
+						direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int size, n_pages, nents;
+	struct scatterlist *s, *sg;
+	struct sg_table *sgt;
+	int ret, i;
+
+	DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+		      attach, bo);
+
+	/*
+	 * Nothing to do, if we are trying to map a dmabuf that has been imported.
+	 * Just return the existing sgt.
+	 */
+	if (obj->import_attach) {
+		BUG_ON(!bo->sgt);
+		return bo->sgt;
+	}
+
+	size = obj->size;
+	n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	if (bo->type & PL111_BOT_DMA) {
+		sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+		if (!sgt) {
+			DRM_ERROR("Failed to allocate sg_table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+		if (ret < 0) {
+			DRM_ERROR("Failed to allocate page table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		sg_dma_len(sgt->sgl) = size;
+		/* We use DMA coherent mappings for PL111_BOT_DMA so we must
+		 * use the virtual address returned at buffer allocation */
+		sg_set_buf(sgt->sgl, bo->backing_data.dma.fb_cpu_addr, size);
+		sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
+	} else { /* PL111_BOT_SHM */
+		struct page **pages;
+		int pg = 0;
+
+		mutex_lock(&dev->struct_mutex);
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev, "could not get pages: %ld\n",
+				PTR_ERR(pages));
+			return ERR_CAST(pages);
+		}
+		sgt = drm_prime_pages_to_sg(pages, n_pages);
+		if (sgt == NULL)
+			return ERR_PTR(-ENOMEM);
+
+		pl111_gem_sync_to_dma(bo);
+
+		/*
+		 * At this point the pages have been dma-mapped by either
+		 * get_pages() for non cached maps or pl111_gem_sync_to_dma()
+		 * for cached. So the physical addresses can be assigned
+		 * to the sg entries.
+		 * drm_prime_pages_to_sg() may have combined contiguous pages
+		 * into chunks so we assign the physical address of the first
+		 * page of a chunk to the chunk and check that the physical
+		 * addresses of the rest of the pages in that chunk are also
+		 * contiguous.
+		 */
+		sg = sgt->sgl;
+		nents = sgt->nents;
+
+		for_each_sg(sg, s, nents, i) {
+			int j, n_pages_in_chunk = sg_dma_len(s) >> PAGE_SHIFT;
+
+			sg_dma_address(s) = bo->backing_data.shm.dma_addrs[pg];
+
+			for (j = pg+1; j < pg+n_pages_in_chunk; j++) {
+				BUG_ON(bo->backing_data.shm.dma_addrs[j] !=
+						bo->backing_data.shm.dma_addrs[j-1]+PAGE_SIZE);
+			}
+
+			pg += n_pages_in_chunk;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+	}
+	bo->sgt = sgt;
+	return sgt;
+}
+
+static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					struct sg_table *sgt,
+					enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+			attach, bo);
+
+	sg_free_table(sgt);
+	kfree(sgt);
+	bo->sgt = NULL;
+}
+
+/*
+ * There isn't any operation here that can sleep or fail so this callback can
+ * be used for both kmap and kmap_atomic implementations.
+ */
+static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long pageno)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	void *vaddr = NULL;
+
+	/* Make sure we cannot access outside the memory range */
+	if (((pageno + 1) << PAGE_SHIFT) > bo->gem_object.size)
+		return NULL;
+
+	if (bo->type & PL111_BOT_DMA) {
+		vaddr = (bo->backing_data.dma.fb_cpu_addr +
+						(pageno << PAGE_SHIFT));
+	} else {
+		vaddr = page_address(bo->backing_data.shm.pages[pageno]);
+	}
+
+	return vaddr;
+}
+
+/*
+ * Find a scatterlist that starts in "start" and has "len"
+ * or return a NULL dma_handle.
+ */
+static dma_addr_t pl111_find_matching_sg(struct sg_table *sgt, size_t start,
+					size_t len)
+{
+	struct scatterlist *sg;
+	unsigned int count;
+	size_t size = 0;
+	dma_addr_t dma_handle = 0;
+
+	/* Find a scatterlist that starts in "start" and has "len"
+	* or return error */
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		if ((size == start) && (len == sg_dma_len(sg))) {
+			dma_handle = sg_dma_address(sg);
+			break;
+		}
+		size += sg_dma_len(sg);
+	}
+	return dma_handle;
+}
+
+static int pl111_dma_buf_begin_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return -EINVAL;
+
+	if (!(bo->type & PL111_BOT_SHM)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return -EINVAL;
+
+		dma_sync_single_range_for_cpu(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+	return 0;
+}
+
+static void pl111_dma_buf_end_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return;
+
+	if (!(bo->type & PL111_BOT_DMA)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return;
+
+		dma_sync_single_range_for_device(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+}
+
+static struct dma_buf_ops pl111_dma_buf_ops = {
+	.release = &pl111_dma_buf_release,
+	.attach = &pl111_dma_buf_attach,
+	.detach = &pl111_dma_buf_detach,
+	.map_dma_buf = &pl111_dma_buf_map_dma_buf,
+	.unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
+	.kmap_atomic = &pl111_dma_buf_kmap,
+	.kmap = &pl111_dma_buf_kmap,
+	.begin_cpu_access = &pl111_dma_buf_begin_cpu,
+	.end_cpu_access = &pl111_dma_buf_end_cpu,
+	.mmap = &pl111_dma_buf_mmap,
+};
+
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attachment;
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+	struct scatterlist *sgl;
+	struct sg_table *sgt;
+	dma_addr_t cont_phys;
+	int ret = 0;
+	int i;
+
+	DRM_DEBUG_KMS("DRM %s on dev=%p dma_buf=%p\n", __func__, dev, dma_buf);
+
+	/* is this one of own objects? */
+	if (dma_buf->ops == &pl111_dma_buf_ops) {
+		obj = dma_buf->priv;
+		/* is it from our device? */
+		if (obj->dev == dev) {
+			/*
+			* Importing dmabuf exported from our own gem increases
+			* refcount on gem itself instead of f_count of dmabuf.
+			*/
+			drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+			/* before v3.10.0 we assume the caller has taken a ref on the dma_buf
+			 * we don't want it for self-imported buffers so drop it here */
+			dma_buf_put(dma_buf);
+#endif
+
+			return obj;
+		}
+	}
+
+	attachment = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attachment))
+		return ERR_CAST(attachment);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we assume the caller has not taken a ref so we take one here */
+	get_dma_buf(dma_buf);
+#endif
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto err_buf_detach;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		DRM_ERROR("%s: failed to allocate buffer object.\n", __func__);
+		ret = -ENOMEM;
+		goto err_unmap_attach;
+	}
+
+	/* Find out whether the buffer is contiguous or not */
+	sgl = sgt->sgl;
+	cont_phys = sg_phys(sgl);
+	bo->type |= PL111_BOT_DMA;
+	for_each_sg(sgt->sgl, sgl, sgt->nents, i) {
+		dma_addr_t real_phys = sg_phys(sgl);
+		if (real_phys != cont_phys) {
+			bo->type &= ~PL111_BOT_DMA;
+			break;
+		}
+		cont_phys += (PAGE_SIZE - sgl->offset);
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	ret = drm_gem_private_object_init(dev, &bo->gem_object,
+					  dma_buf->size);
+	if (ret != 0) {
+		DRM_ERROR("DRM could not import DMA GEM obj\n");
+		goto err_free_buffer;
+	}
+#else
+	drm_gem_private_object_init(dev, &bo->gem_object, dma_buf->size);
+#endif
+
+	if (bo->type & PL111_BOT_DMA) {
+		bo->backing_data.dma.fb_cpu_addr = sg_virt(sgt->sgl);
+		bo->backing_data.dma.fb_dev_addr = sg_phys(sgt->sgl);
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, contiguous import\n", __func__, bo);
+	} else { /* PL111_BOT_SHM */
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, non contiguous import\n", __func__, bo);
+	}
+
+	bo->gem_object.import_attach = attachment;
+	bo->sgt = sgt;
+
+	return &bo->gem_object;
+
+err_free_buffer:
+	kfree(bo);
+	bo = NULL;
+err_unmap_attach:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+err_buf_detach:
+	dma_buf_detach(dma_buf, attachment);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we will have taken a ref so drop it here */
+	dma_buf_put(dma_buf);
+#endif
+	return ERR_PTR(ret);
+}
+
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				      struct drm_gem_object *obj, int flags)
+{
+	struct dma_buf *new_buf;
+	struct pl111_gem_bo *bo;
+	size_t size;
+
+	DRM_DEBUG("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
+	size = obj->size;
+
+	new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
+					flags | O_RDWR);
+	bo = PL111_BO_FROM_GEM(new_buf->priv);
+
+	/*
+	 * bo->gem_object.export_dma_buf not setup until after gem_prime_export
+	 * finishes
+	 */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Ensure that we hold the kds resource if it's the currently
+	 * displayed buffer.
+	 */
+	obtain_kds_if_currently_displayed(dev, bo, new_buf);
+#endif
+
+	DRM_DEBUG("Created dma_buf %p\n", new_buf);
+
+	return new_buf;
+}
+
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+				uint32_t handle, uint32_t flags, int *prime_fd)
+{
+	int result;
+	/*
+	 * This will re-use any existing exports, and calls
+	 * driver->gem_prime_export to do the first export when needed
+	 */
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	mutex_lock(&priv.export_dma_buf_lock);
+	result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
+						prime_fd);
+	mutex_unlock(&priv.export_dma_buf_lock);
+
+	return result;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
new file mode 100755
index 0000000..78c91c0
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_encoder.c
+ * Implementation of the encoder functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+	return true;
+}
+
+void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_commit(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_disable(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct pl111_drm_encoder *pl111_encoder =
+					PL111_ENCODER_FROM_ENCODER(encoder);
+
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(pl111_encoder);
+}
+
+const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = pl111_encoder_destroy,
+};
+
+const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.mode_fixup = pl111_encoder_helper_mode_fixup,
+	.prepare = pl111_encoder_helper_prepare,
+	.commit = pl111_encoder_helper_commit,
+	.mode_set = pl111_encoder_helper_mode_set,
+	.disable = pl111_encoder_helper_disable,
+};
+
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs)
+{
+	struct pl111_drm_encoder *pl111_encoder;
+
+	pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to allocated pl111_drm_encoder\n");
+		return NULL;
+	}
+
+	drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
+				DRM_MODE_ENCODER_DAC);
+
+	drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
+
+	pl111_encoder->encoder.possible_crtcs = possible_crtcs;
+
+	return pl111_encoder;
+}
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
new file mode 100755
index 0000000..f575c9e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
@@ -0,0 +1,202 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_fb.c
+ * Implementation of the framebuffer functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_crtc.h>
+#include "pl111_drm.h"
+
+static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct pl111_drm_framebuffer *pl111_fb;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
+
+	pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
+
+	/*
+	 * Because flips are deferred, wait for all previous flips to complete
+	 */
+	wait_event(priv.wait_for_flips,
+			atomic_read(&priv.nr_flips_in_flight) == 0);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Release KDS resources if it's currently being displayed. Only occurs
+	 * when the last framebuffer is destroyed.
+	 */
+	list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb == framebuffer) {
+			/* Release the current buffers */
+			if (pl111_crtc->old_kds_res_set != NULL) {
+				DRM_DEBUG_KMS("Releasing KDS resources for ");
+				DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
+				kds_resource_set_release(
+					&pl111_crtc->old_kds_res_set);
+			}
+			pl111_crtc->old_kds_res_set = NULL;
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock,
+								flags);
+	}
+#endif
+	drm_framebuffer_cleanup(framebuffer);
+
+	if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
+		drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
+
+	kfree(pl111_fb);
+
+	DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
+}
+
+static int pl111_fb_create_handle(struct drm_framebuffer *fb,
+				struct drm_file *file_priv,
+				unsigned int *handle)
+{
+	struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
+
+	if (bo == NULL)
+		return -EINVAL;
+
+	return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+}
+
+const struct drm_framebuffer_funcs fb_funcs = {
+	.destroy = pl111_fb_destroy,
+	.create_handle = pl111_fb_create_handle,
+};
+
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct pl111_drm_framebuffer *pl111_fb = NULL;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_gem_object *gem_obj;
+	struct pl111_gem_bo *bo;
+	int err = 0;
+	size_t min_size;
+	int bpp;
+	int depth;
+
+	pr_info("DRM %s\n", __func__);
+	gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (gem_obj == NULL) {
+		DRM_ERROR("Could not get gem obj from handle to create fb\n");
+		err = -ENOENT;
+		goto error;
+	}
+
+	bo = PL111_BO_FROM_GEM(gem_obj);
+	drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp);
+
+	if (mode_cmd->pitches[0] < mode_cmd->width * (bpp >> 3)) {
+		DRM_ERROR("bad pitch %u for plane 0\n", mode_cmd->pitches[0]);
+		err = -EINVAL;
+		goto error;
+	}
+
+	min_size = (mode_cmd->height - 1) * mode_cmd->pitches[0]
+				+ mode_cmd->width * (bpp >> 3);
+
+	if (bo->gem_object.size < min_size) {
+		DRM_ERROR("gem obj size < min size\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* We can't scan out SHM so we can't create an fb for it */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Can't create FB for non-scanout buffer\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	switch ((char)(mode_cmd->pixel_format & 0xFF)) {
+	case 'Y':
+	case 'U':
+	case 'V':
+	case 'N':
+	case 'T':
+		DRM_ERROR("YUV formats not supported\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
+	if (pl111_fb == NULL) {
+		DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
+		err = -ENOMEM;
+		goto error;
+	}
+	fb = &pl111_fb->fb;
+
+	err = drm_framebuffer_init(dev, fb, &fb_funcs);
+	if (err) {
+		DRM_ERROR("drm_framebuffer_init failed\n");
+		kfree(fb);
+		fb = NULL;
+		goto error;
+	}
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	/* The only framebuffer formats supported by pl111
+	 * are 16 bpp or 32 bpp with 24 bit depth.
+	 * See clcd_enable()
+	 */
+	if (!((fb->bits_per_pixel == 16) ||
+		  (fb->bits_per_pixel == 32 && fb->depth == 24))) {
+		DRM_DEBUG_KMS("unsupported pixel format bpp=%d, depth=%d\n", fb->bits_per_pixel, fb->depth);
+		drm_framebuffer_cleanup(fb);
+		kfree(fb);
+		fb = NULL;
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb->bo = bo;
+
+	DRM_DEBUG_KMS("Created fb 0x%p with gem_obj 0x%p physaddr=0x%.8x\n",
+			fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
+
+	return fb;
+
+error:
+	if (gem_obj != NULL)
+		drm_gem_object_unreference_unlocked(gem_obj);
+
+	return ERR_PTR(err);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
new file mode 100755
index 0000000..494baa0
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_funcs.h
+ * Function prototypes for PL111 DRM
+ */
+
+#ifndef PL111_DRM_FUNCS_H_
+#define PL111_DRM_FUNCS_H_
+
+/* Platform Initialisation */
+int pl111_drm_init(struct platform_device *dev);
+void pl111_drm_exit(struct platform_device *dev);
+
+/* KDS Callbacks */
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
+
+/* CRTC Functions */
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
+struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
+void pl111_crtc_destroy(struct drm_crtc *crtc);
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+					struct drm_framebuffer *fb);
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+			struct drm_framebuffer *fb, bool page_flip,
+			struct drm_pending_vblank_event *event);
+
+/* Common IRQ handler */
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height);
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y);
+
+/* Connector Functions */
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
+void pl111_connector_destroy(struct drm_connector *connector);
+struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
+								*dev);
+
+/* Encoder Functions */
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs);
+struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
+							int possible_crtcs);
+void pl111_encoder_destroy(struct drm_encoder *encoder);
+
+/* Frame Buffer Functions */
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd);
+
+/* VMA Functions */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
+struct page **get_pages(struct drm_gem_object *obj);
+void put_pages(struct drm_gem_object *obj, struct page **pages);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma);
+void pl111_gem_vm_open(struct vm_area_struct *vma);
+void pl111_gem_vm_close(struct vm_area_struct *vma);
+#endif
+
+/* Suspend Functions */
+int pl111_drm_resume(struct drm_device *dev);
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
+
+/* GEM Functions */
+int pl111_dumb_create(struct drm_file *file_priv,
+			struct drm_device *dev,
+			struct drm_mode_create_dumb *args);
+int pl111_dumb_destroy(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle);
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset);
+void pl111_gem_free_object(struct drm_gem_object *obj);
+
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+			struct vm_area_struct *vma, size_t size);
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff);
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo);
+
+/* DMA BUF Functions */
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf);
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+			uint32_t handle, uint32_t flags, int *prime_fd);
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				struct drm_gem_object *obj, int flags);
+
+/* Pl111 Functions */
+void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
+					*flip_res, struct drm_framebuffer *fb);
+int clcd_disable(struct drm_crtc *crtc);
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
+int pl111_amba_remove(struct amba_device *dev);
+
+int pl111_device_init(struct drm_device *dev);
+void pl111_device_fini(struct drm_device *dev);
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing);
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode);
+#endif /* PL111_DRM_FUNCS_H_ */
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
new file mode 100755
index 0000000..13fb256
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
@@ -0,0 +1,476 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_gem.c
+ * Implementation of the GEM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0))
+#include <drm/drm_vma_manager.h>
+#endif
+
+void pl111_gem_free_object(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	struct drm_device *dev = obj->dev;
+	DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
+
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (obj->import_attach)
+		drm_prime_gem_destroy(obj, bo->sgt);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map != NULL)
+		drm_gem_free_mmap_offset(obj);
+#else
+	drm_gem_free_mmap_offset(obj);
+#endif
+	/*
+	 * Only free the backing memory if the object has not been imported.
+	 * If it has been imported, the exporter is in charge to free that
+	 * once dmabuf's refcount becomes 0.
+	 */
+	if (obj->import_attach)
+		goto imported_out;
+
+	if (bo->type & PL111_BOT_DMA) {
+		dma_free_writecombine(dev->dev, obj->size,
+				bo->backing_data.dma.fb_cpu_addr,
+				bo->backing_data.dma.fb_dev_addr);
+	} else if (bo->backing_data.shm.pages != NULL) {
+		put_pages(obj, bo->backing_data.shm.pages);
+	}
+
+imported_out:
+	drm_gem_object_release(obj);
+
+	kfree(bo);
+
+	DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
+}
+
+static int pl111_gem_object_create(struct drm_device *dev, u64 size,
+				   u32 flags, struct drm_file *file_priv,
+				   u32 *handle)
+{
+	int ret = 0;
+	struct pl111_gem_bo *bo = NULL;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (bo == NULL) {
+		ret = -ENOMEM;
+		goto finish;
+	}
+
+	bo->type = flags;
+
+#ifndef ARCH_HAS_SG_CHAIN
+	/*
+	 * If the ARCH can't chain we can't have non-contiguous allocs larger
+	 * than a single sg can hold.
+	 * In this case we fall back to using contiguous memory
+	 */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		long unsigned int n_pages =
+				PAGE_ALIGN(size) >> PAGE_SHIFT;
+		if (n_pages > SG_MAX_SINGLE_ALLOC) {
+			bo->type |= PL111_BOT_DMA;
+			/*
+			 * Non-contiguous allocation request changed to
+			 * contigous
+			 */
+			DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
+					n_pages, SG_MAX_SINGLE_ALLOC);
+		}
+	}
+#endif
+	if (bo->type & PL111_BOT_DMA) {
+		/* scanout compatible - use physically contiguous buffer */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_attrs(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL,
+					&attrs);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_attrs failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#else
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_writecombine(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_writecombine failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+		ret = drm_gem_private_object_init(dev, &bo->gem_object,
+							size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not initialise GEM object\n");
+			goto free_dma;
+		}
+#else
+		drm_gem_private_object_init(dev, &bo->gem_object, size);
+#endif
+
+	} else { /* PL111_BOT_SHM */
+		/* not scanout compatible - use SHM backed object */
+		ret = drm_gem_object_init(dev, &bo->gem_object, size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not init SHM backed GEM obj\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+		DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
+	}
+
+	DRM_DEBUG("s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
+		size, flags,
+		(bo->type & PL111_BOT_DMA) ? "physaddr" : "shared page array",
+		(bo->type & PL111_BOT_DMA) ?
+			(unsigned long)bo->backing_data.dma.fb_dev_addr:
+			(unsigned long)bo->backing_data.shm.pages,
+			bo->type);
+
+	ret = drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+	if (ret != 0) {
+		DRM_ERROR("DRM failed to create GEM handle\n");
+		goto obj_release;
+	}
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&bo->gem_object);
+	
+	return 0;
+
+obj_release:
+	drm_gem_object_release(&bo->gem_object);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+free_dma:
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	if (bo->type & PL111_BOT_DMA) {
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr,
+			&attrs);
+		}
+#else
+	if (bo->type & PL111_BOT_DMA)
+		dma_free_writecombine(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr);
+#endif
+free_bo:
+	kfree(bo);
+finish:
+	return ret;
+}
+
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct drm_pl111_gem_create *args = data;
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags & ~PL111_BOT_MASK) {
+		DRM_ERROR("wrong flags: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("gem_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size, args->flags, file_priv,
+					&args->handle);
+}
+
+int pl111_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags) {
+		DRM_ERROR("flags must be zero: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size,
+				       PL111_BOT_DMA | PL111_BOT_UNCACHED,
+				       file_priv, &args->handle);
+}
+
+int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
+		uint32_t handle)
+{
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset)
+{
+	struct drm_gem_object *obj;
+	int ret = 0;
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	/* GEM does all our handle to object mapping */
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (obj == NULL) {
+		ret = -ENOENT;
+		goto fail;
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map == NULL) {
+		ret = drm_gem_create_mmap_offset(obj);
+		if (ret != 0) {
+			drm_gem_object_unreference_unlocked(obj);
+			goto fail;
+		}
+	}
+#else
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret != 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		goto fail;
+	}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	*offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
+#else
+	*offset = drm_vma_node_offset_addr(&obj->vma_node);
+	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+#endif
+
+	drm_gem_object_unreference_unlocked(obj);
+fail:
+	return ret;
+}
+
+/* sync the buffer for DMA access */
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED)) {
+		int i, npages = bo->gem_object.size >> PAGE_SHIFT;
+		struct page **pages = bo->backing_data.shm.pages;
+		bool dirty = false;
+
+		for (i = 0; i < npages; i++) {
+			if (!bo->backing_data.shm.dma_addrs[i]) {
+				DRM_DEBUG("%s: dma map page=%d bo=%p\n", __func__, i, bo);
+				 bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(dev->dev, pages[i], 0,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+				dirty = true;
+			}
+		}
+
+		if (dirty) {
+			DRM_DEBUG("%s: zap ptes (and flush cache) bo=%p\n", __func__, bo);
+			/*
+			 * TODO MIDEGL-1813
+			 * 
+			 * Use flush_cache_page() and outer_flush_range() to
+			 * flush only the user space mappings of the dirty pages
+			 */
+			flush_cache_all();
+			outer_flush_all();
+			unmap_mapping_range(bo->gem_object.filp->f_mapping, 0,
+					bo->gem_object.size, 1);
+		}
+	}
+}
+
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	/*
+	 * TODO MIDEGL-1808
+	 * 
+	 * The following check was meant to detect if the CPU is trying to access
+	 * a buffer that is currently mapped for DMA accesses, which is illegal
+	 * as described by the DMA-API.
+	 * 
+	 * However, some of our tests are trying to do that, which triggers the message
+	 * below and avoids dma-unmapping the pages not to annoy the DMA device but that
+	 * leads to the test failing because of caches not being properly flushed.
+	 */
+
+	/*
+	if (bo->sgt) {
+		DRM_ERROR("%s: the CPU is trying to access a dma-mapped buffer\n",  __func__);
+		return;
+	}
+	*/
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED) &&
+	     bo->backing_data.shm.dma_addrs[pgoff]) {
+		DRM_DEBUG("%s: unmap bo=%p (s=%d), paddr=%08x\n",
+			  __func__, bo,  bo->gem_object.size,
+			  bo->backing_data.shm.dma_addrs[pgoff]);
+		dma_unmap_page(dev->dev, bo->backing_data.shm.dma_addrs[pgoff],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		bo->backing_data.shm.dma_addrs[pgoff] = 0;
+	}
+}
+
+/* Based on omapdrm driver */
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+		 struct vm_area_struct *vma, size_t size)
+{
+	DRM_DEBUG("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p type=%08x\n",
+			__func__, obj, bo, bo->type);
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	if (bo->type & PL111_BOT_WC) {
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	} else if (bo->type & PL111_BOT_CACHED) {
+		/*
+		 * Objects that do not have a filp (DMA backed) can't be
+		 * mapped as cached now. Write-combine should be enough.
+		 */
+		if (WARN_ON(!obj->filp))
+			return -EINVAL;
+
+		/*
+		 * As explained in Documentation/dma-buf-sharing.txt
+		 * we need this trick so that we can manually zap ptes
+		 * in order to fake coherency.
+		 */
+		fput(vma->vm_file);
+		get_file(obj->filp);
+		vma->vm_file = obj->filp;
+
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	} else { /* PL111_BOT_UNCACHED */
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+	}
+	return 0;
+}
+
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
+{
+	int ret;
+	struct drm_file *priv = file_priv->private_data;
+	struct drm_device *dev = priv->minor->dev;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_hash_item *hash;
+	struct drm_local_map *map = NULL;
+#else
+	struct drm_vma_offset_node *node;
+#endif
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	obj = map->handle;
+#else
+	node = drm_vma_offset_exact_lookup(dev->vma_offset_manager,
+			vma->vm_pgoff,
+			vma_pages(vma));
+	obj = container_of(node, struct drm_gem_object, vma_node);
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p bo->type 0x%08x\n", __func__, bo, bo->type);
+
+	/* for an imported buffer we let the exporter handle the mmap */
+	if (obj->import_attach)
+		return dma_buf_mmap(obj->import_attach->dmabuf, vma, 0);
+
+	ret = drm_gem_mmap(file_priv, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap\n");
+		return ret;
+	}
+
+	/* Our page fault handler uses the page offset calculated from the vma,
+	 * so we need to remove the gem cookie offset specified in the call.
+	 */
+	vma->vm_pgoff = 0;
+
+	return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
new file mode 100755
index 0000000..1d613d0
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
@@ -0,0 +1,417 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_pl111.c
+ * PL111 specific functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+/* This can't be called from IRQ context, due to clk_get() and board->enable */
+static int clcd_enable(struct drm_framebuffer *fb)
+{
+	__u32 cntl;
+	struct clcd_board *board;
+
+	pr_info("DRM %s\n", __func__);
+
+	clk_prepare_enable(priv.clk);
+
+	/* Enable and Power Up */
+	cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+	DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
+	if (fb->bits_per_pixel == 16)
+		cntl |= CNTL_LCDBPP16_565;
+	else if (fb->bits_per_pixel == 32 && fb->depth == 24)
+		cntl |= CNTL_LCDBPP24;
+	else
+		BUG_ON(1);
+
+	cntl |= CNTL_BGR;
+
+	writel(cntl, priv.regs + CLCD_PL111_CNTL);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->enable)
+			board->enable(NULL);
+	}
+
+	/* Enable Interrupts */
+	writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
+
+	return 0;
+}
+
+int clcd_disable(struct drm_crtc *crtc)
+{
+	struct clcd_board *board;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	pr_info("DRM %s\n", __func__);
+
+	/* Disable Interrupts */
+	writel(0x00000000, priv.regs + CLCD_PL111_IENB);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->disable)
+			board->disable(NULL);
+	}
+
+	/* Disable and Power Down */
+	writel(0, priv.regs + CLCD_PL111_CNTL);
+
+	/* Disable clock */
+	clk_disable_unprepare(priv.clk);
+
+	pl111_crtc->last_bpp = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	/* Release the previous buffers */
+	if (pl111_crtc->old_kds_res_set != NULL)
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	return 0;
+}
+
+/*
+ * To avoid a possible race where "pl111_crtc->current_update_res" has
+ * been updated (non NULL) but the corresponding scanout buffer has not been
+ * written to the base registers we must always call this function holding
+ * the "base_update_lock" spinlock with IRQs disabled (spin_lock_irqsave()).
+ */
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	struct drm_framebuffer *fb;
+	struct pl111_gem_bo *bo;
+	size_t min_size;
+	fb = flip_res->fb;
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+
+
+	min_size = (fb->height - 1) * fb->pitches[0]
+	         + fb->width * (fb->bits_per_pixel >> 3);
+
+	BUG_ON(bo->gem_object.size < min_size);
+
+	/* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+	BUG_ON(bo->type != PL111_BOT_DMA);
+
+	/*
+	 * Note the buffer for releasing after IRQ, and don't allow any more
+	 * updates until then.
+	 *
+	 * This clcd controller latches the new address on next vsync. Address
+	 * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
+	 * wait for that before releasing the previous buffer's kds
+	 * resources. Otherwise, we'll allow writers to write to the old buffer
+	 * whilst it is still being displayed
+	 */
+	pl111_crtc->current_update_res = flip_res;
+
+	DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
+			fb, bo, bo->backing_data.dma.fb_dev_addr);
+	
+	if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
+		DRM_ERROR("Could not get vblank reference for crtc %d\n",
+				pl111_crtc->crtc_index);
+
+	/* Set the scanout buffer */
+	writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
+	writel(bo->backing_data.dma.fb_dev_addr +
+		((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
+}
+
+void
+show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
+					struct drm_framebuffer *fb)
+{
+	unsigned long irq_flags;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+	if (list_empty(&pl111_crtc->update_queue) &&
+			!pl111_crtc->current_update_res) {
+		do_flip_to_res(flip_res);
+	} else {
+		/*
+		 * Enqueue the update to occur on a future IRQ
+		 * This only happens on triple-or-greater buffering
+		 */
+		DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
+				fb);
+		list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
+			pl111_crtc->last_bpp != fb->bits_per_pixel ||
+			!drm_mode_equal(pl111_crtc->new_mode,
+					pl111_crtc->current_mode))) {
+		struct clcd_regs timing;
+
+		pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
+
+		DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
+				timing.tim0, timing.tim1, timing.tim2,
+				timing.tim3, timing.pixclock);
+
+		/* This is the actual mode setting part */
+		clk_set_rate(priv.clk, timing.pixclock);
+
+		writel(timing.tim0, priv.regs + CLCD_TIM0);
+		writel(timing.tim1, priv.regs + CLCD_TIM1);
+		writel(timing.tim2, priv.regs + CLCD_TIM2);
+		writel(timing.tim3, priv.regs + CLCD_TIM3);
+
+		clcd_enable(fb);
+		pl111_crtc->last_bpp = fb->bits_per_pixel;
+	}
+
+	if (!flip_res->page_flip) {
+		drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
+		pl111_crtc->current_mode = pl111_crtc->new_mode;
+		pl111_crtc->new_mode = NULL;
+	}
+
+	BUG_ON(!pl111_crtc->current_mode);
+
+	/*
+	 * If IRQs weren't enabled before, they are now. This will eventually
+	 * cause flip_res to be released via pl111_common_irq, which updates
+	 * every time the Base Address is latched (i.e. every frame, regardless
+	 * of whether we update the base address or not)
+	 */
+}
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+	u32 irq_stat;
+	struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
+
+	irq_stat = readl(priv.regs + CLCD_PL111_MIS);
+
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE)
+		pl111_common_irq(pl111_crtc);
+
+	/* Clear the interrupt once done */
+	writel(irq_stat, priv.regs + CLCD_PL111_ICR);
+
+	return IRQ_HANDLED;
+}
+
+int pl111_device_init(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int ret;
+
+	if (priv == NULL) {
+		pr_err("%s no private data\n", __func__);
+		return -EINVAL;
+	}
+
+	if (priv->amba_dev == NULL) {
+		pr_err("%s no amba device found\n", __func__);
+		return -EINVAL;
+	}
+
+	/* set up MMIO for register access */
+	priv->mmio_start = priv->amba_dev->res.start;
+	priv->mmio_len = resource_size(&priv->amba_dev->res);
+
+	DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
+			priv->mmio_len);
+
+	priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
+	if (priv->regs == NULL) {
+		pr_err("%s failed mmio\n", __func__);
+		return -EINVAL;
+	}
+
+	/* turn off interrupts */
+	writel(0, priv->regs + CLCD_PL111_IENB);
+
+	ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
+				"pl111_irq_handler", NULL);
+	if (ret != 0) {
+		pr_err("%s failed %d\n", __func__, ret);
+		goto out_mmio;
+	}
+
+	goto finish;
+
+out_mmio:
+	iounmap(priv->regs);
+finish:
+	DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
+	return ret;
+}
+
+void pl111_device_fini(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	u32 cntl;
+
+	if (priv == NULL || priv->regs == NULL)
+		return;
+
+	free_irq(priv->amba_dev->irq[0], NULL);
+
+	cntl = readl(priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDEN;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDPWR;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	iounmap(priv->regs);
+}
+
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
+{
+	struct clcd_board *board = dev->dev.platform_data;
+	int ret;
+	pr_info("DRM %s\n", __func__);
+
+	if (!board)
+		dev_warn(&dev->dev, "board data not available\n");
+
+	ret = amba_request_regions(dev, NULL);
+	if (ret != 0) {
+		DRM_ERROR("CLCD: unable to reserve regs region\n");
+		goto out;
+	}
+
+	priv.amba_dev = dev;
+
+	priv.clk = clk_get(&priv.amba_dev->dev, NULL);
+	if (IS_ERR(priv.clk)) {
+		DRM_ERROR("CLCD: unable to get clk.\n");
+		ret = PTR_ERR(priv.clk);
+		goto clk_err;
+	}
+
+	return 0;
+
+clk_err:
+	amba_release_regions(dev);
+out:
+	return ret;
+}
+
+int pl111_amba_remove(struct amba_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	clk_put(priv.clk);
+
+	amba_release_regions(dev);
+
+	priv.amba_dev = NULL;
+
+	return 0;
+}
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+	unsigned int cpl;
+
+	memset(timing, 0, sizeof(struct clcd_regs));
+
+	ppl = (mode->hdisplay / 16) - 1;
+	hsw = mode->hsync_end - mode->hsync_start - 1;
+	hfp = mode->hsync_start - mode->hdisplay - 1;
+	hbp = mode->htotal - mode->hsync_end - 1;
+
+	lpp = mode->vdisplay - 1;
+	vsw = mode->vsync_end - mode->vsync_start - 1;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	cpl = mode->hdisplay - 1;
+
+	timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
+	timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
+	timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
+	timing->tim3 = 0;
+
+	timing->pixclock = mode->clock * 1000;
+}
+
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+
+	ppl = (timing->tim0 >> 2) & 0x3f;
+	hsw = (timing->tim0 >> 8) & 0xff;
+	hfp = (timing->tim0 >> 16) & 0xff;
+	hbp = (timing->tim0 >> 24) & 0xff;
+
+	lpp = timing->tim1 & 0x3ff;
+	vsw = (timing->tim1 >> 10) & 0x3f;
+	vfp = (timing->tim1 >> 16) & 0xff;
+	vbp = (timing->tim1 >> 24) & 0xff;
+
+	mode->hdisplay    = (ppl + 1) * 16;
+	mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
+	mode->hsync_end   = ((ppl + 1) * 16) + hfp + hsw + 2;
+	mode->htotal      = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
+	mode->hskew       = 0;
+
+	mode->vdisplay    = lpp + 1;
+	mode->vsync_start = lpp + vfp + 1;
+	mode->vsync_end   = lpp + vfp + vsw + 2;
+	mode->vtotal      = lpp + vfp + vsw + vbp + 2;
+
+	mode->flags = 0;
+
+	mode->width_mm = 0;
+	mode->height_mm = 0;
+
+	mode->clock = timing->pixclock / 1000;
+	mode->hsync = timing->pixclock / mode->htotal;
+	mode->vrefresh = mode->hsync / mode->vtotal;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
new file mode 100755
index 0000000..9d5ec0c
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
@@ -0,0 +1,151 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_platform.c
+ * Implementation of the Linux platform device entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+static int pl111_platform_drm_suspend(struct platform_device *dev,
+					pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+static int pl111_platform_drm_resume(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_platform_drm_probe(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return pl111_drm_init(dev);
+}
+
+static int pl111_platform_drm_remove(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	pl111_drm_exit(dev);
+
+	return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+	{
+	.id = 0x00041110,
+	.mask = 0x000ffffe,
+	},
+	{0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+	.drv = {
+		.name = "clcd-pl11x",
+		},
+	.probe = pl111_amba_probe,
+	.remove = pl111_amba_remove,
+	.id_table = pl111_id_table,
+};
+
+static struct platform_driver platform_drm_driver = {
+	.probe = pl111_platform_drm_probe,
+	.remove = pl111_platform_drm_remove,
+	.suspend = pl111_platform_drm_suspend,
+	.resume = pl111_platform_drm_resume,
+	.driver = {
+			.owner = THIS_MODULE,
+			.name = DRIVER_NAME,
+		},
+};
+
+static const struct platform_device_info pl111_drm_pdevinfo = {
+	.name = DRIVER_NAME,
+	.id = -1,
+	.dma_mask = ~0UL
+};
+
+static struct platform_device *pl111_drm_device;
+
+static int __init pl111_platform_drm_init(void)
+{
+	int ret;
+
+	pr_info("DRM %s\n", __func__);
+
+	pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
+	if (pl111_drm_device == NULL) {
+		pr_err("DRM platform_device_register_full() failed\n");
+		return -ENOMEM;
+	}
+
+	ret = amba_driver_register(&pl111_amba_driver);
+	if (ret != 0) {
+		pr_err("DRM amba_driver_register() failed %d\n", ret);
+		goto err_amba_reg;
+	}
+
+	ret = platform_driver_register(&platform_drm_driver);
+	if (ret != 0) {
+		pr_err("DRM platform_driver_register() failed %d\n", ret);
+		goto err_pdrv_reg;
+	}
+
+	return 0;
+
+err_pdrv_reg:
+	amba_driver_unregister(&pl111_amba_driver);
+err_amba_reg:
+	platform_device_unregister(pl111_drm_device);
+
+	return ret;
+}
+
+static void __exit pl111_platform_drm_exit(void)
+{
+	pr_info("DRM %s\n", __func__);
+
+	platform_device_unregister(pl111_drm_device);
+	amba_driver_unregister(&pl111_amba_driver);
+	platform_driver_unregister(&platform_drm_driver);
+}
+
+#ifdef MODULE
+module_init(pl111_platform_drm_init);
+#else
+late_initcall(pl111_platform_drm_init);
+#endif
+module_exit(pl111_platform_drm_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE(DRIVER_LICENCE);
+MODULE_ALIAS(DRIVER_ALIAS);
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
new file mode 100755
index 0000000..d033566
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_suspend.c
+ * Implementation of the suspend/resume functions for PL111 DRM
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_drm_resume(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
new file mode 100755
index 0000000..ff602ef
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_vma.c
+ * Implementation of the VM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ */
+static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+				bool dirty, bool accessed)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	for (i = 0; i < npages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+		if (accessed)
+			mark_page_accessed(pages[i]);
+		/* Undo the reference we took when populating the table */
+		page_cache_release(pages[i]);
+	}
+	drm_free_large(pages);
+}
+
+void put_pages(struct drm_gem_object *obj, struct page **pages)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	_drm_gem_put_pages(obj, pages, true, true);
+	if (bo->backing_data.shm.dma_addrs) {
+		for (i = 0; i < npages; i++) {
+			/* Filter pages unmapped because of CPU accesses */
+			if (!bo->backing_data.shm.dma_addrs[i])
+				continue;
+			if (!dma_mapping_error(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i])) {
+				dma_unmap_page(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i],
+					PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+			}
+		}
+		kfree(bo->backing_data.shm.dma_addrs);
+		bo->backing_data.shm.dma_addrs = NULL;
+	}
+}
+
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
+					gfp_t gfpmask)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct page *p, **pages;
+	int i, npages;
+
+	/* This is the shared memory object that backs the GEM resource */
+	inode = obj->filp->f_path.dentry->d_inode;
+	mapping = inode->i_mapping;
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (pages == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gfpmask |= mapping_gfp_mask(mapping);
+
+	for (i = 0; i < npages; i++) {
+		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		if (IS_ERR(p))
+			goto fail;
+		pages[i] = p;
+
+		/*
+		 * There is a hypothetical issue w/ drivers that require
+		 * buffer memory in the low 4GB.. if the pages are un-
+		 * pinned, and swapped out, they can end up swapped back
+		 * in above 4GB.  If pages are already in memory, then
+		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
+		 * even if the already in-memory page disobeys the mask.
+		 *
+		 * It is only a theoretical issue today, because none of
+		 * the devices with this limitation can be populated with
+		 * enough memory to trigger the issue.  But this BUG_ON()
+		 * is here as a reminder in case the problem with
+		 * shmem_read_mapping_page_gfp() isn't solved by the time
+		 * it does become a real issue.
+		 *
+		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		 */
+		BUG_ON((gfpmask & __GFP_DMA32) &&
+			(page_to_pfn(p) >= 0x00100000UL));
+	}
+
+	return pages;
+
+fail:
+	while (i--)
+		page_cache_release(pages[i]);
+
+	drm_free_large(pages);
+	return ERR_PTR(PTR_ERR(p));
+}
+
+struct page **get_pages(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (bo->backing_data.shm.pages == NULL) {
+		struct page **p;
+		int npages = obj->size >> PAGE_SHIFT;
+		int i;
+
+		p = _drm_gem_get_pages(obj, GFP_KERNEL);
+		if (IS_ERR(p))
+			return ERR_PTR(-ENOMEM);
+
+		bo->backing_data.shm.pages = p;
+
+		if (bo->backing_data.shm.dma_addrs == NULL) {
+			bo->backing_data.shm.dma_addrs =
+				kzalloc(npages * sizeof(dma_addr_t),
+					GFP_KERNEL);
+			if (bo->backing_data.shm.dma_addrs == NULL)
+				goto error_out;
+		}
+
+		if (!(bo->type & PL111_BOT_CACHED)) {
+			for (i = 0; i < npages; ++i) {
+				bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+				if (dma_mapping_error(obj->dev->dev,
+						bo->backing_data.shm.dma_addrs[i]))
+					goto error_out;
+			}
+		}
+	}
+
+	return bo->backing_data.shm.pages;
+
+error_out:
+	put_pages(obj, bo->backing_data.shm.pages);
+	bo->backing_data.shm.pages = NULL;
+	return ERR_PTR(-ENOMEM);
+}
+
+/* END drivers/staging/omapdrm/omap_gem_helpers.c */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page **pages;
+	unsigned long pfn;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * Our mmap calls setup a valid vma->vm_pgoff
+	 * so we can use vmf->pgoff
+	 */
+
+	if (bo->type & PL111_BOT_DMA) {
+		pfn = (bo->backing_data.dma.fb_dev_addr >> PAGE_SHIFT) +
+				vmf->pgoff;
+	} else { /* PL111_BOT_SHM */
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev,
+				"could not get pages: %ld\n", PTR_ERR(pages));
+			ret = PTR_ERR(pages);
+			goto error;
+		}
+		pfn = page_to_pfn(pages[vmf->pgoff]);
+		pl111_gem_sync_to_cpu(bo, vmf->pgoff);
+	}
+
+	DRM_DEBUG("bo=%p physaddr=0x%.8x for offset 0x%x\n",
+				bo, PFN_PHYS(pfn), PFN_PHYS(vmf->pgoff));
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+error:
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+/*
+ * The core drm_vm_ functions in kernel 3.4 are not ready
+ * to handle dma_buf cases where vma->vm_file->private_data
+ * cannot be accessed to get the device.
+ * 
+ * We use these functions from 3.5 instead where the device
+ * pointer is passed explicitly.
+ *
+ * However they aren't exported from the kernel until 3.10
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *vma_entry;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_inc(&dev->vma_count);
+
+	vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
+	if (vma_entry) {
+		vma_entry->vma = vma;
+		vma_entry->pid = current->pid;
+		list_add(&vma_entry->head, &dev->vmalist);
+	}
+}
+
+void pl111_drm_vm_close_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *pt, *temp;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_dec(&dev->vma_count);
+
+	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
+		if (pt->vma == vma) {
+			list_del(&pt->head);
+			kfree(pt);
+			break;
+		}
+	}
+}
+
+void pl111_gem_vm_open(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+
+	drm_gem_object_reference(obj);
+
+	mutex_lock(&obj->dev->struct_mutex);
+	pl111_drm_vm_open_locked(obj->dev, vma);
+	mutex_unlock(&obj->dev->struct_mutex);
+}
+
+void pl111_gem_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	pl111_drm_vm_close_locked(obj->dev, vma);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+}
+#endif
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/pl111/sconscript b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/sconscript
new file mode 100755
index 0000000..5c47de7
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/pl111/sconscript
@@ -0,0 +1,52 @@
+#
+# (C) COPYRIGHT 2010-2013, 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import shutil
+Import('env')
+
+# Generate a build environment for the integration tests, taking a copy of the top-level build environment 
+# (env) as a start.
+drm_env = env.Clone()
+
+# Xorg uses C++ style comments and 'inline' keyword
+if '-std=c89' in drm_env['CFLAGS']:
+	drm_env['CFLAGS'].remove('-std=c89')
+
+# X11 generates a lot of warnings
+if '-Werror' in drm_env['CCFLAGS']:
+	drm_env['CCFLAGS'].remove('-Werror')
+
+#remove the 'lib'prefix
+drm_env['LIBPREFIX'] = ''
+
+src = Glob('*.c')
+
+if drm_env.GetOption('clean') :
+	drm_env.Execute(Action("make clean", 'clean [pl111]'))
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [])
+else:
+	# The target is mali_drm.ko, built from the source in pl111_drm/pl111, via the action makeAction
+	# mali_drm.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+	# kernel build system, after which it can be installed to the directory specified if
+	# "libs_install" is set; this is done by LibTarget.
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} && cp pl111_drm.ko $STATIC_LIB_PATH/mali_drm.ko", '$MAKECOMSTR')
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [makeAction])
+
+# need Module.symvers from drm.ko
+#drm_env.Depends('$STATIC_LIB_PATH/pl111_drm.ko', '$STATIC_LIB_PATH/drm.ko')
+
+drm_env.KernelObjTarget('x11', cmd)
+
diff --git a/midgard/r14p0/kernel/drivers/gpu/drm/sconscript b/midgard/r14p0/kernel/drivers/gpu/drm/sconscript
new file mode 100755
index 0000000..a90fa89
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/drm/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if 'x11' in env['winsys'] or 'gbm' in env['winsys']:
+	# pl111_drm isn't released so only try to build it if it's there
+	if Glob('pl111/sconscript') and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
+		SConscript('pl111/sconscript')
+
+#SConscript('dummy_drm/sconscript')
diff --git a/midgard/r14p0/kernel/drivers/gpu/sconscript b/midgard/r14p0/kernel/drivers/gpu/sconscript
new file mode 100755
index 0000000..729dbda
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/gpu/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+SConscript('arm/sconscript')
+
+if Glob('drm/sconscript'):
+	SConscript('drm/sconscript')
diff --git a/midgard/r14p0/kernel/drivers/sconscript b/midgard/r14p0/kernel/drivers/sconscript
new file mode 100755
index 0000000..71b194e
--- /dev/null
+++ b/midgard/r14p0/kernel/drivers/sconscript
@@ -0,0 +1,116 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+import os
+import shutil
+import subprocess
+
+from SCons.Script import SCons
+
+Import("env")
+
+# -----------------------------------------------------------------------------
+g_kernel_config = dict()
+
+def kernel_config_enabled(sEnv, option):
+    """Return true if a given kernel config option is enabled"""
+    global g_kernel_config
+    if not g_kernel_config:
+        config_file = os.path.join(sEnv["ENV"]["KDIR"], "include/config/auto.conf")
+        print "Parsing kernel config '%s'." % config_file
+        with open(config_file, "rt") as fp:
+            for line in fp.readlines():
+                try:
+                    (key, val) = line.split("=")
+                    g_kernel_config[key.strip()] = val.strip()
+                except ValueError:
+                    pass
+    if option not in g_kernel_config:
+        return False
+    val = g_kernel_config[option]
+    if val == "y":
+        return True
+    return False
+
+env.AddMethod(kernel_config_enabled, "KernelConfigEnabled")
+
+def kernel_module_builder(env = None, target = None, source = None):
+    assert (len(target) == 1 and 'M' in env and 'make_args' in env and
+            'built_module' in env)
+
+    module_dir = str(env['M'])
+    cmd = ['make', '-j%d' % GetOption('num_jobs')] + env['make_args']
+    status = subprocess.call(cmd, cwd = module_dir, env = env['ENV'])
+    if status:
+        return status
+
+    shutil.copy2(env['built_module'], target[0].abspath)
+    return 0 # Success (SCons will catch any exceptions thrown by copy2).
+
+action = SCons.Action.Action(kernel_module_builder, "[KBUILD] $TARGET")
+builder = env.Builder(action = action)
+env.Append(BUILDERS = {'KernelModuleBuilder': builder})
+
+def build_kernel_module(env, target, sources, M = None, make_args = [],
+                        built_module = None):
+    if not M:
+        M = Dir('.').srcnode().abspath
+    if not built_module:
+        built_module = os.path.basename(str(target))
+    built_module = os.path.join(M, built_module)
+
+    env_kern = env.Clone()
+    kdir = env_kern['ENV']['KDIR']
+    env_kern.Append(CPPPATH = [M, '#kernel/include', os.path.join(kdir, 'include')])
+    mod = env_kern.KernelModuleBuilder(target, sources, M = M,
+                                       make_args = make_args,
+                                       built_module = built_module)
+
+    # Kbuild implicitly #includes kconfig.h (which includes autoconf.h) when
+    # building kernel module source files.
+    env_kern.Depends(mod, os.path.join(kdir, 'include/generated/autoconf.h'))
+    # Built files may change depending on CONFIG_* values in auto.conf.
+    env_kern.Depends(mod, os.path.join(kdir, 'include/config/auto.conf'))
+
+    for f in ['Kconfig', 'Kbuild', 'Makefile*']:
+        env_kern.Depends(mod, glob.glob(os.path.join(M, f)))
+
+    scanner = SCons.Scanner.C.CScanner()
+    all_headers = set()
+    for src in Flatten(sources):
+        src = env_kern.File(src)
+        (base, ext) = os.path.splitext(src.abspath)
+        if ext == '.c':
+            env_kern.Clean(mod, base + '.o')
+            # Scan for header dependencies explicitly.
+            headers = scanner(src, env_kern, scanner.path_function(env_kern))
+            env_kern.Depends(mod, headers)
+            all_headers.update(headers)
+
+    env_kern.Clean(mod, os.path.join(M, 'Module.symvers'))
+    env_kern.Clean(mod, os.path.join(M, 'module.order'))
+
+    return mod
+
+env.AddMethod(build_kernel_module, 'BuildKernelModule')
+
+if Glob('base/sconscript'):
+	SConscript('base/sconscript')
+
+if Glob('video/sconscript'):
+	SConscript('video/sconscript')
+
+SConscript('gpu/sconscript')
diff --git a/midgard/r14p0/kernel/include/linux/dma-buf-test-exporter.h b/midgard/r14p0/kernel/include/linux/dma-buf-test-exporter.h
new file mode 100755
index 0000000..98984ba
--- /dev/null
+++ b/midgard/r14p0/kernel/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/midgard/r14p0/kernel/include/linux/kds.h b/midgard/r14p0/kernel/include/linux/kds.h
new file mode 100755
index 0000000..1346eda
--- /dev/null
+++ b/midgard/r14p0/kernel/include/linux/kds.h
@@ -0,0 +1,173 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KDS_H_
+#define _KDS_H_
+
+#include <linux/list.h>
+#include <linux/workqueue.h>
+
+#define KDS_WAIT_BLOCKING (ULONG_MAX)
+
+struct kds_resource_set;
+
+typedef void (*kds_callback_fn) (void *callback_parameter, void *callback_extra_parameter);
+
+struct kds_callback
+{
+	kds_callback_fn  user_cb; /* real cb */
+	int direct;               /* do direct or queued call? */
+	struct workqueue_struct *wq;
+};
+
+struct kds_link
+{
+	struct kds_resource_set *parent;
+	struct list_head         link;
+	unsigned long            state;
+};
+
+struct kds_resource
+{
+	struct kds_link waiters;
+};
+
+/* callback API */
+
+/* Initialize a callback object.
+ *
+ * Typically created per context or per hw resource.
+ *
+ * Callbacks can be performed directly if no nested locking can
+ * happen in the client.
+ *
+ * Nested locking can occur when a lock is held during the kds_async_waitall or
+ * kds_resource_set_release call. If the callback needs to take the same lock
+ * nested locking will happen.
+ *
+ * If nested locking could happen non-direct callbacks can be requested.
+ * Callbacks will then be called asynchronous to the triggering call.
+ */
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb);
+
+/* Terminate the use of a callback object.
+ *
+ * If the callback object was set up as non-direct
+ * any pending callbacks will be flushed first.
+ * Note that to avoid a deadlock the lock callbacks needs
+ * can't be held when a callback object is terminated.
+ */
+void kds_callback_term(struct kds_callback *cb);
+
+
+/* resource object API */
+
+/* initialize a resource handle for a shared resource */
+void kds_resource_init(struct kds_resource * const resource);
+
+/*
+ * Will return 0 on success.
+ * If the resource is being used or waited -EBUSY is returned.
+ * The caller should NOT try to terminate a resource that could still have clients.
+ * After the function returns the resource is no longer known by kds.
+ */
+int kds_resource_term(struct kds_resource *resource);
+
+/* Asynchronous wait for a set of resources.
+ * Callback will be called when all resources are available.
+ * If all the resources was available the callback will be called before kds_async_waitall returns.
+ * So one must not hold any locks the callback code-flow can take when calling kds_async_waitall.
+ * Caller considered to own/use the resources until \a kds_rset_release is called.
+ * exclusive_access_bitmap is a bitmap where a high bit means exclusive access while a low bit means shared access.
+ * Use the Linux __set_bit API, where the index of the buffer to control is used as the bit index.
+ *
+ * Standard Linux error return value.
+ */
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list);
+
+/* Synchronous wait for a set of resources.
+ * Function will return when one of these have happened:
+ * - all resources have been obtained
+ * - timeout lapsed while waiting
+ * - a signal was received while waiting
+ *
+ * To wait without a timeout, specify KDS_WAIT_BLOCKING for \a jifies_timeout, otherwise
+ * the timeout in jiffies. A zero timeout attempts to obtain all resources and returns
+ * immediately with a timeout if all resources could not be obtained.
+ *
+ * Caller considered to own/use the resources when the function returns.
+ * Caller must release the resources using \a kds_rset_release.
+ *
+ * Calling this function while holding already locked resources or other locking primitives is dangerous.
+ * One must if this is needed decide on a lock order of the resources and/or the other locking primitives
+ * and always take the resources/locking primitives in the specific order.
+ *
+ * Use the ERR_PTR framework to decode the return value.
+ * NULL = time out
+ * If IS_ERR then PTR_ERR gives:
+ *  ERESTARTSYS = signal received, retry call after signal
+ *  all other values = internal error, lock failed
+ * Other values  = successful wait, now the owner, must call kds_resource_set_release
+ */
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jifies_timeout);
+
+/* Release resources after use.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ */
+void kds_resource_set_release(struct kds_resource_set **pprset);
+
+/* Release resources after use and wait callbacks to complete.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ *
+ * This should be used to cancel waits which are pending on a kds
+ * resource.
+ *
+ * It is a bug to call this from atomic contexts and from within
+ * a kds callback that now owns the kds_rseource.
+ */
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset);
+#endif /* _KDS_H_ */
diff --git a/midgard/r14p0/kernel/include/linux/ump-common.h b/midgard/r14p0/kernel/include/linux/ump-common.h
new file mode 100755
index 0000000..0015bda
--- /dev/null
+++ b/midgard/r14p0/kernel/include/linux/ump-common.h
@@ -0,0 +1,252 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/midgard/r14p0/kernel/include/linux/ump-import.h b/midgard/r14p0/kernel/include/linux/ump-import.h
new file mode 100755
index 0000000..89ce727
--- /dev/null
+++ b/midgard/r14p0/kernel/include/linux/ump-import.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/midgard/r14p0/kernel/include/linux/ump-ioctl.h b/midgard/r14p0/kernel/include/linux/ump-ioctl.h
new file mode 100755
index 0000000..451403e
--- /dev/null
+++ b/midgard/r14p0/kernel/include/linux/ump-ioctl.h
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/midgard/r14p0/kernel/include/linux/ump.h b/midgard/r14p0/kernel/include/linux/ump.h
new file mode 100755
index 0000000..16f9c39
--- /dev/null
+++ b/midgard/r14p0/kernel/include/linux/ump.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a true to indicate that access to the handle is allowed or @n
+ * @a false to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a true to permit access
+ * @li @a false to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+#ifdef CONFIG_KDS
+/**
+ * Retrieve the KDS resource for the specified UMP memory.
+ *
+ * The KDS resource should be used to synchronize access to the UMP allocation.
+ * See the KDS API for how to do that.
+ *
+ * @param mem Handle to the UMP memory to query.
+ * @return Pointer to the KDS resource controlling access to the UMP memory.
+ */
+UMP_KERNEL_API_EXPORT struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem);
+#endif
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
diff --git a/midgard/r14p0/kernel/license.txt b/midgard/r14p0/kernel/license.txt
new file mode 100755
index 0000000..77c14bd
--- /dev/null
+++ b/midgard/r14p0/kernel/license.txt
@@ -0,0 +1,198 @@
+GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE
+
+THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE.
+
+
+
+Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form.
+
+
+
+GPL Licence
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+Version 2, June 1991
+
+
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+
+
+Preamble
+
+
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
+
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program).
+
+Whether that is true depends on what the Program does.
+
+
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty;
+
+and give any other recipients of the Program a copy of this License along with the Program.
+
+
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the
+
+Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein.
+
+You are not responsible for enforcing compliance by third parties to this License.
+
+
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+
+
+NO WARRANTY
+
+
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+/end
+
diff --git a/midgard/r14p0/kernel/patches/integrate_kds_with_dma_buf.patch b/midgard/r14p0/kernel/patches/integrate_kds_with_dma_buf.patch
new file mode 100755
index 0000000..37458cf
--- /dev/null
+++ b/midgard/r14p0/kernel/patches/integrate_kds_with_dma_buf.patch
@@ -0,0 +1,188 @@
+diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
+index 9aa618a..30e07ea 100644
+--- a/drivers/base/Kconfig
++++ b/drivers/base/Kconfig
+@@ -192,4 +192,12 @@ config DMA_SHARED_BUFFER
+ 	  APIs extension; the file's descriptor can then be passed on to other
+ 	  driver.
+ 
++config DMA_SHARED_BUFFER_USES_KDS
++	bool "Synchronize access to dma_buf with KDS"
++	default n
++	select KDS
++	depends on DMA_SHARED_BUFFER
++	help
++	  This option adds a KDS resource within every dma_buf allocation.
++
+ endmenu
+diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
+index 20258e1..e4083fd 100644
+--- a/drivers/base/dma-buf.c
++++ b/drivers/base/dma-buf.c
+@@ -27,6 +27,8 @@
+ #include <linux/dma-buf.h>
+ #include <linux/anon_inodes.h>
+ #include <linux/export.h>
++#include <linux/poll.h>
++#include <linux/sched.h>
+ 
+ static inline int is_dma_buf_file(struct file *);
+ 
+@@ -40,6 +42,8 @@ static int dma_buf_release(struct inode *inode, struct file *file)
+ 	dmabuf = file->private_data;
+ 
+ 	dmabuf->ops->release(dmabuf);
++	kds_callback_term(&dmabuf->kds_cb);
++	kds_resource_term(&dmabuf->kds);
+ 	kfree(dmabuf);
+ 	return 0;
+ }
+@@ -61,9 +65,90 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
+ 	return dmabuf->ops->mmap(dmabuf, vma);
+ }
+ 
++
++static void dma_buf_kds_cb_fn (void *param1, void *param2)
++{
++	struct kds_resource_set **rset_ptr = param1;
++	struct kds_resource_set *rset = *rset_ptr;
++	wait_queue_head_t *wait_queue = param2;
++
++	kfree(rset_ptr);
++	kds_resource_set_release(&rset);
++	wake_up(wait_queue);
++}
++
++static int dma_buf_kds_check(struct kds_resource *kds,
++                             long unsigned int exclusive, int *poll_ret)
++{
++	/* Synchronous wait with 0 timeout - poll availability */
++	struct kds_resource_set *rset = kds_waitall(1,&exclusive,&kds,0);
++
++	if (IS_ERR(rset))
++		return POLLERR;
++
++	if (rset){
++		kds_resource_set_release(&rset);
++		*poll_ret = POLLIN | POLLRDNORM;
++		if (exclusive)
++			*poll_ret |=  POLLOUT | POLLWRNORM;
++		return 1;
++	}else{
++		return 0;
++	}
++}
++
++static unsigned int dma_buf_poll(struct file *file,
++                                 struct poll_table_struct *wait)
++{
++	struct dma_buf *dmabuf;
++	struct kds_resource *kds;
++	unsigned int ret = 0;
++
++	if (!is_dma_buf_file(file))
++		return POLLERR;
++
++	dmabuf = file->private_data;
++	kds    = &dmabuf->kds;
++
++	if (poll_does_not_wait(wait)){
++		/* Check for exclusive access (superset of shared) first */
++		if(!dma_buf_kds_check(kds, 1ul, &ret))
++			dma_buf_kds_check(kds, 0ul, &ret);
++	}else{
++		int events = poll_requested_events(wait);
++		unsigned long exclusive;
++		wait_queue_head_t *wq;
++		struct kds_resource_set **rset_ptr = kmalloc(sizeof(*rset_ptr), GFP_KERNEL);
++
++		if (!rset_ptr)
++			return POLL_ERR;
++
++		if (events & POLLOUT){
++			wq = &dmabuf->wq_exclusive;
++			exclusive = 1;
++		}else{
++			wq = &dmabuf->wq_shared;
++			exclusive = 0;
++		}
++		poll_wait(file, wq, wait);
++		ret = kds_async_waitall(rset_ptr, KDS_FLAG_LOCKED_WAIT, &dmabuf->kds_cb,
++		                        rset_ptr, wq, 1, &exclusive, &kds);
++
++		if (IS_ERR_VALUE(ret)){
++			ret = POLL_ERR;
++			kfree(rset_ptr);
++		}else{
++			/* Can't allow access until callback */
++			ret = 0;
++		}
++	}
++	return ret;
++}
++
+ static const struct file_operations dma_buf_fops = {
+ 	.release	= dma_buf_release,
+ 	.mmap		= dma_buf_mmap_internal,
++	.poll		= dma_buf_poll,
+ };
+ 
+ /*
+@@ -120,6 +205,11 @@ struct dma_buf *dma_buf_export(void *priv, const struct dma_buf_ops *ops,
+ 	mutex_init(&dmabuf->lock);
+ 	INIT_LIST_HEAD(&dmabuf->attachments);
+ 
++	init_waitqueue_head(&dmabuf->wq_exclusive);
++	init_waitqueue_head(&dmabuf->wq_shared);
++	kds_resource_init(&dmabuf->kds);
++	kds_callback_init(&dmabuf->kds_cb, 1, dma_buf_kds_cb_fn);
++
+ 	return dmabuf;
+ }
+ EXPORT_SYMBOL_GPL(dma_buf_export);
+diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
+index eb48f38..6e824d2 100644
+--- a/include/linux/dma-buf.h
++++ b/include/linux/dma-buf.h
+@@ -30,6 +30,8 @@
+ #include <linux/list.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/fs.h>
++#include <linux/kds.h>
++#include <linux/wait.h>
+ 
+ struct device;
+ struct dma_buf;
+@@ -121,6 +123,10 @@ struct dma_buf {
+ 	const struct dma_buf_ops *ops;
+ 	/* mutex to serialize list manipulation and attach/detach */
+ 	struct mutex lock;
++	struct kds_resource kds;
++	wait_queue_head_t wq_exclusive;
++	wait_queue_head_t wq_shared;
++	struct kds_callback kds_cb;
+ 	void *priv;
+ };
+ 
+@@ -156,6 +162,21 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
+ 	get_file(dmabuf->file);
+ }
+ 
++/**
++ * get_dma_buf_kds_resource - get a KDS resource for this dma-buf
++ * @dmabuf:	[in]	pointer to dma_buf
++ *
++ * Returns a KDS resource that represents the dma-buf. This should be used by
++ * drivers to synchronize access to the buffer. Note that the caller should
++ * ensure that a reference to the dma-buf exists from the call to
++ * kds_async_wait until kds_resource_set_release is called.
++ */
++static inline struct kds_resource *
++	get_dma_buf_kds_resource(struct dma_buf *dmabuf)
++{
++	return &dmabuf->kds;
++}
++
+ #ifdef CONFIG_DMA_SHARED_BUFFER
+ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+ 							struct device *dev);
diff --git a/midgard/r14p0/kernel/sconscript b/midgard/r14p0/kernel/sconscript
new file mode 100755
index 0000000..e6be64c
--- /dev/null
+++ b/midgard/r14p0/kernel/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+if env['backend'] == 'kernel' and int(env['kernel_modules']) == 1 and env['gpu'] != 'none':
+	SConscript('drivers/sconscript')
+
+	if Glob('tests/sconscript'):
+		SConscript('tests/sconscript')
diff --git a/midgard/r15p0/android/patches/K/android-k.sh b/midgard/r15p0/android/patches/K/android-k.sh
new file mode 100755
index 0000000..4e44d5e
--- /dev/null
+++ b/midgard/r15p0/android/patches/K/android-k.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+#
+# Copyright (C) 2014-2015 ARM Limited. All rights reserved.
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Android K script for LLVM 3.5 and above support
+#
+# This script applies two minor patches and checks-out libcxx and libcxxabi libraries
+# adding C++11 support in Android K required for LLVM3.5 and above.
+
+
+if [ -z "$ANDROID_BUILD_TOP" ]; then
+    echo "[ANDROID-PATCH] This script must be executed in an Android build environment"
+    echo "                ANDROID_BUILD_TOP is undefined"
+    echo "                please do \"source build/envseup.sh; lunch\"."
+    exit 1
+fi
+
+ANDROID_REPOSITORY_URL="https://android.googlesource.com/"
+
+REFERENCE_ARGUMENT=""
+
+# Proccess arguments. Valid arguments are:
+#        -u ARG      Override the Android repository url
+#        -r ARG      Provide a path to a reference git repository
+while getopts "u:r:" opt; do
+  case $opt in
+    u)
+      ANDROID_REPOSITORY_URL=$OPTARG
+      ;;
+
+    r)
+      REFERENCE_ARGUMENT="--reference "$OPTARG
+      ;;
+
+    \?)
+      echo "Invalid option: -$OPTARG" >&2
+      exit 1
+
+      ;;
+    :)
+      echo "Option -$OPTARG requires an argument." >&2
+      exit 1
+      ;;
+
+  esac
+done
+
+MIDGARD_DDK_WD=$(pwd)
+
+cd $ANDROID_BUILD_TOP/external
+
+#Checkout libcxx from Android Master required by LLVM due to c++11
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxx $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxx failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxx
+git checkout 27ae7cb782821a4f2d3813522ee411cd978bcd85 -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxx@27ae7cb782821a4f2d3813522ee411cd978bcd85 revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/libcxx.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching libcxx failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Checkout libcxxabi from Android Master required by LLVM due to c++11
+cd $ANDROID_BUILD_TOP/external
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxxabi $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxxabi failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxxabi
+git checkout 285d67f35f6044cf733091e36248405ca967c62c -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxxabi@285d67f35f6044cf733091e36248405ca967c62c revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+
+#Apply patch to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/bionics.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching bionics failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add locale support to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/locale.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Adding locale support failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add LLVM backend ARM fix to support -O0 compilation
+cd $ANDROID_BUILD_TOP/external/llvm/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/llvm.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Fixing ARM LLVM backend failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+echo "[ANDROID-PATCH] Patching completed successfully"
+cd $MIDGARD_DDK_WD
diff --git a/midgard/r15p0/android/patches/K/bionics.diff b/midgard/r15p0/android/patches/K/bionics.diff
new file mode 100755
index 0000000..b86cedf
--- /dev/null
+++ b/midgard/r15p0/android/patches/K/bionics.diff
@@ -0,0 +1,202 @@
+diff --git a/libc/Android.mk b/libc/Android.mk
+index 9610c14..d3dbe81 100644
+--- a/libc/Android.mk
++++ b/libc/Android.mk
+@@ -214,6 +215,7 @@ libc_bionic_src_:= \
+     bionic/libc_init_common.cpp \
+     bionic/libc_logging.cpp \
+     bionic/libgen.cpp \
++    bionic/locale.cpp \
+     bionic/mmap.cpp \
+     bionic/pthread_attr.cpp \
+     bionic/pthread_detach.cpp \
+@@ -232,7 +232,6 @@ libc_bionic_src_files := \
+     bionic/scandir.cpp \
+     bionic/sched_getaffinity.cpp \
+     bionic/__set_errno.cpp \
+-    bionic/setlocale.cpp \
+     bionic/signalfd.cpp \
+     bionic/sigwait.cpp \
+     bionic/statvfs.cpp \
+diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp
+index 50a3875..cc830b7 100644
+--- a/libc/bionic/wchar.cpp
++++ b/libc/bionic/wchar.cpp
+@@ -96,6 +96,7 @@ int swscanf(const wchar_t* /*s*/, const wchar_t* /*format*/, ... ) {
+
+ int iswalnum(wint_t wc) { return isalnum(wc); }
+ int iswalpha(wint_t wc) { return isalpha(wc); }
++int iswblank(wint_t wc) { return isblank(wc); }
+ int iswcntrl(wint_t wc) { return iscntrl(wc); }
+ int iswdigit(wint_t wc) { return isdigit(wc); }
+ int iswgraph(wint_t wc) { return isgraph(wc); }
+diff --git a/libc/include/assert.h b/libc/include/assert.h
+index 62470f5..361a5ff 100644
+--- a/libc/include/assert.h
++++ b/libc/include/assert.h
+@@ -60,6 +60,6 @@
+ #endif
+
+ __BEGIN_DECLS
+-__dead void __assert(const char *, int, const char *);
+-__dead void __assert2(const char *, int, const char *, const char *);
++__dead void __assert(const char *, int, const char *) __noreturn;
++__dead void __assert2(const char *, int, const char *, const char *) __noreturn;
+ __END_DECLS
+diff --git a/libc/include/ctype.h b/libc/include/ctype.h
+index 5557e31..a66df12 100644
+--- a/libc/include/ctype.h
++++ b/libc/include/ctype.h
+@@ -45,11 +45,14 @@
+ #define	_CTYPE_U	0x01
+ #define	_CTYPE_L	0x02
+ #define	_CTYPE_N	0x04
++#define	_CTYPE_D	0x04
+ #define	_CTYPE_S	0x08
+ #define	_CTYPE_P	0x10
+ #define	_CTYPE_C	0x20
+ #define	_CTYPE_X	0x40
+ #define	_CTYPE_B	0x80
++#define	_CTYPE_R	(_CTYPE_P|_CTYPE_U|_CTYPE_L|_CTYPE_D|_CTYPE_B)
++#define	_CTYPE_A	(_CTYPE_L|_CTYPE_U)
+
+ __BEGIN_DECLS
+
+diff --git a/libc/include/locale.h b/libc/include/locale.h
+index 65b5c7d..6989851 100644
+--- a/libc/include/locale.h
++++ b/libc/include/locale.h
+@@ -29,6 +29,7 @@
+ #define _LOCALE_H_
+
+ #include <sys/cdefs.h>
++#include <xlocale.h>
+
+ __BEGIN_DECLS
+
+@@ -43,17 +44,65 @@ enum {
+     LC_PAPER     = 7,
+     LC_NAME      = 8,
+     LC_ADDRESS   = 9,
+-
+     LC_TELEPHONE      = 10,
+     LC_MEASUREMENT    = 11,
+     LC_IDENTIFICATION = 12
+ };
+
+-extern char *setlocale(int category, const char *locale);
++#define LC_CTYPE_MASK          (1 << LC_CTYPE)
++#define LC_NUMERIC_MASK        (1 << LC_NUMERIC)
++#define LC_TIME_MASK           (1 << LC_TIME)
++#define LC_COLLATE_MASK        (1 << LC_COLLATE)
++#define LC_MONETARY_MASK       (1 << LC_MONETARY)
++#define LC_MESSAGES_MASK       (1 << LC_MESSAGES)
++#define LC_PAPER_MASK          (1 << LC_PAPER)
++#define LC_NAME_MASK           (1 << LC_NAME)
++#define LC_ADDRESS_MASK        (1 << LC_ADDRESS)
++#define LC_TELEPHONE_MASK      (1 << LC_TELEPHONE)
++#define LC_MEASUREMENT_MASK    (1 << LC_MEASUREMENT)
++#define LC_IDENTIFICATION_MASK (1 << LC_IDENTIFICATION)
++
++#define LC_ALL_MASK (LC_CTYPE_MASK | LC_NUMERIC_MASK | LC_TIME_MASK | LC_COLLATE_MASK | \
++                     LC_MONETARY_MASK | LC_MESSAGES_MASK | LC_PAPER_MASK | LC_NAME_MASK | \
++                     LC_ADDRESS_MASK | LC_TELEPHONE_MASK | LC_MEASUREMENT_MASK | \
++                     LC_IDENTIFICATION_MASK)
++
++struct lconv {
++    char* decimal_point;
++    char* thousands_sep;
++    char* grouping;
++    char* int_curr_symbol;
++    char* currency_symbol;
++    char* mon_decimal_point;
++    char* mon_thousands_sep;
++    char* mon_grouping;
++    char* positive_sign;
++    char* negative_sign;
++    char  int_frac_digits;
++    char  frac_digits;
++    char  p_cs_precedes;
++    char  p_sep_by_space;
++    char  n_cs_precedes;
++    char  n_sep_by_space;
++    char  p_sign_posn;
++    char  n_sign_posn;
++    char  int_p_cs_precedes;
++    char  int_p_sep_by_space;
++    char  int_n_cs_precedes;
++    char  int_n_sep_by_space;
++    char  int_p_sign_posn;
++    char  int_n_sign_posn;
++};
++
++struct lconv* localeconv(void);
++
++locale_t duplocale(locale_t);
++void freelocale(locale_t);
++locale_t newlocale(int, const char*, locale_t);
++char* setlocale(int, const char*);
++locale_t uselocale(locale_t);
+
+-/* Make libstdc++-v3 happy.  */
+-struct lconv { };
+-struct lconv *localeconv(void);
++#define LC_GLOBAL_LOCALE ((locale_t) -1L)
+
+ __END_DECLS
+
+diff --git a/libc/include/stdio.h b/libc/include/stdio.h
+index 23fc944..28685c7 100644
+--- a/libc/include/stdio.h
++++ b/libc/include/stdio.h
+@@ -469,7 +469,10 @@ int vsprintf(char *dest, const char *format, __va_list ap)
+ }
+
+ #if defined(__clang__)
+-#define snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(snprintf)
++    #define __wrap_snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++    #define snprintf(...) __wrap_snprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(3, 4)
+@@ -481,7 +484,10 @@ int snprintf(char *dest, size_t size, const char *format, ...)
+ #endif
+
+ #if defined(__clang__)
+-#define sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(sprintf)
++    #define __wrap_sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++    #define sprintf(...) __wrap_sprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(2, 3)
+diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h
+index 9fa84c1..188da69 100644
+--- a/libc/include/stdlib.h
++++ b/libc/include/stdlib.h
+@@ -68,6 +68,10 @@ static __inline__ float strtof(const char *nptr, char **endptr)
+     return (float)strtod(nptr, endptr);
+ }
+
++static __inline__ long double strtold(const char* s, char** end_ptr) {
++  return (long double)strtod(s, end_ptr);
++}
++
+ extern int atoi(const char *) __purefunc;
+ extern long atol(const char *) __purefunc;
+ extern long long atoll(const char *) __purefunc;
+diff --git a/libc/include/wchar.h b/libc/include/wchar.h
+index 76ac02c..c413806 100644
+--- a/libc/include/wchar.h
++++ b/libc/include/wchar.h
+@@ -77,6 +77,7 @@ extern int               fwprintf(FILE *, const wchar_t *, ...);
+ extern int               fwscanf(FILE *, const wchar_t *, ...);
+ extern int               iswalnum(wint_t);
+ extern int               iswalpha(wint_t);
++extern int               iswblank(wint_t);
+ extern int               iswcntrl(wint_t);
+ extern int               iswdigit(wint_t);
+ extern int               iswgraph(wint_t);
diff --git a/midgard/r15p0/android/patches/K/libcxx.diff b/midgard/r15p0/android/patches/K/libcxx.diff
new file mode 100755
index 0000000..a10d75c
--- /dev/null
+++ b/midgard/r15p0/android/patches/K/libcxx.diff
@@ -0,0 +1,13 @@
+diff --git a/include/cstdlib b/include/cstdlib
+index 152b891..e45a8fd 100644
+--- a/include/cstdlib
++++ b/include/cstdlib
+@@ -126,7 +126,7 @@ using ::realloc;
+ using ::abort;
+ using ::atexit;
+ using ::exit;
+-using ::_Exit;
++
+ using ::getenv;
+ using ::system;
+ using ::bsearch;
diff --git a/midgard/r15p0/android/patches/K/license.txt b/midgard/r15p0/android/patches/K/license.txt
new file mode 100755
index 0000000..d645695
--- /dev/null
+++ b/midgard/r15p0/android/patches/K/license.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/midgard/r15p0/android/patches/K/llvm.diff b/midgard/r15p0/android/patches/K/llvm.diff
new file mode 100755
index 0000000..d41d279
--- /dev/null
+++ b/midgard/r15p0/android/patches/K/llvm.diff
@@ -0,0 +1,14 @@
+diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
+index 8cdb853..868dd47 100644
+--- a/lib/Target/ARM/ARMInstrInfo.cpp
++++ b/lib/Target/ARM/ARMInstrInfo.cpp
+@@ -130,6 +130,10 @@ namespace {
+         MIB.addImm(0);
+       AddDefaultPred(MIB);
+
++      // Fix the GOT address by adding pc.
++      BuildMI(FirstMBB, MBBI, DL, TII.get(ARM::tPICADD), GlobalBaseReg)
++          .addReg(GlobalBaseReg).addImm(ARMPCLabelIndex);
++
+       return true;
+     }
diff --git a/midgard/r15p0/android/patches/K/locale.diff b/midgard/r15p0/android/patches/K/locale.diff
new file mode 100755
index 0000000..9c4c140
--- /dev/null
+++ b/midgard/r15p0/android/patches/K/locale.diff
@@ -0,0 +1,204 @@
+diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp
+new file mode 100644
+index 0000000..4fade84
+--- /dev/null
++++ b/libc/bionic/locale.cpp
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (C) 2008 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#include <locale.h>
++#include <pthread.h>
++#include <stdlib.h>
++
++// We currently support a single locale, the "C" locale (also known as "POSIX").
++
++struct __locale_t {
++  // Because we only support one locale, these are just tokens with no data.
++};
++
++static pthread_once_t gLocaleOnce = PTHREAD_ONCE_INIT;
++static lconv gLocale;
++
++static pthread_once_t gUselocaleKeyOnce = PTHREAD_ONCE_INIT;
++static pthread_key_t gUselocaleKey;
++
++static void __locale_init() {
++  gLocale.decimal_point = const_cast<char*>(".");
++
++  char* not_available = const_cast<char*>("");
++  gLocale.thousands_sep = not_available;
++  gLocale.grouping = not_available;
++  gLocale.int_curr_symbol = not_available;
++  gLocale.currency_symbol = not_available;
++  gLocale.mon_decimal_point = not_available;
++  gLocale.mon_thousands_sep = not_available;
++  gLocale.mon_grouping = not_available;
++  gLocale.positive_sign = not_available;
++  gLocale.negative_sign = not_available;
++
++  gLocale.int_frac_digits = CHAR_MAX;
++  gLocale.frac_digits = CHAR_MAX;
++  gLocale.p_cs_precedes = CHAR_MAX;
++  gLocale.p_sep_by_space = CHAR_MAX;
++  gLocale.n_cs_precedes = CHAR_MAX;
++  gLocale.n_sep_by_space = CHAR_MAX;
++  gLocale.p_sign_posn = CHAR_MAX;
++  gLocale.n_sign_posn = CHAR_MAX;
++  gLocale.int_p_cs_precedes = CHAR_MAX;
++  gLocale.int_p_sep_by_space = CHAR_MAX;
++  gLocale.int_n_cs_precedes = CHAR_MAX;
++  gLocale.int_n_sep_by_space = CHAR_MAX;
++  gLocale.int_p_sign_posn = CHAR_MAX;
++  gLocale.int_n_sign_posn = CHAR_MAX;
++}
++
++static void __uselocale_key_init() {
++  pthread_key_create(&gUselocaleKey, NULL);
++}
++
++static bool __is_supported_locale(const char* locale) {
++  return (strcmp(locale, "") == 0 || strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0);
++}
++
++static locale_t __new_locale() {
++  return reinterpret_cast<locale_t>(malloc(sizeof(__locale_t)));
++}
++
++lconv* localeconv() {
++  pthread_once(&gLocaleOnce, __locale_init);
++  return &gLocale;
++}
++
++locale_t duplocale(locale_t l) {
++  locale_t clone = __new_locale();
++  if (clone != NULL && l != LC_GLOBAL_LOCALE) {
++    *clone = *l;
++  }
++  return clone;
++}
++
++void freelocale(locale_t l) {
++  free(l);
++}
++
++locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/) {
++  // Is 'category_mask' valid?
++  if ((category_mask & ~LC_ALL_MASK) != 0) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  if (!__is_supported_locale(locale_name)) {
++    errno = ENOENT;
++    return NULL;
++  }
++
++  return __new_locale();
++}
++
++char* setlocale(int category, char const* locale_name) {
++  // Is 'category' valid?
++  if (category < LC_CTYPE || category > LC_IDENTIFICATION) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  // Caller just wants to query the current locale?
++  if (locale_name == NULL) {
++    return const_cast<char*>("C");
++  }
++
++  // Caller wants one of the mandatory POSIX locales?
++  if (__is_supported_locale(locale_name)) {
++    return const_cast<char*>("C");
++  }
++
++  // We don't support any other locales.
++  errno = ENOENT;
++  return NULL;
++}
++
++locale_t uselocale(locale_t new_locale) {
++  pthread_once(&gUselocaleKeyOnce, __uselocale_key_init);
++
++  locale_t old_locale = static_cast<locale_t>(pthread_getspecific(gUselocaleKey));
++
++  // If this is the first call to uselocale(3) on this thread, we return LC_GLOBAL_LOCALE.
++  if (old_locale == NULL) {
++    old_locale = LC_GLOBAL_LOCALE;
++  }
++
++  if (new_locale != NULL) {
++    pthread_setspecific(gUselocaleKey, new_locale);
++  }
++
++  return old_locale;
++}
+diff --git a/libc/include/xlocale.h b/libc/include/xlocale.h
+new file mode 100644
+index 0000000..f7eb8f4
+--- /dev/null
++++ b/libc/include/xlocale.h
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (C) 2014 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#ifndef _XLOCALE_H_
++#define _XLOCALE_H_
++
++/* If we just use void* here, GCC exposes that in error messages. */
++struct __locale_t;
++typedef struct __locale_t* locale_t;
++
++#endif /* _XLOCALE_H_ */
diff --git a/midgard/r15p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/midgard/r15p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100755
index 0000000..46b704b
--- /dev/null
+++ b/midgard/r15p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,92 @@
+#
+# (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- snoop_enable_smc : SMC function ID to enable CCI snooping on the GPU port(s).
+- snoop_disable_smc : SMC function ID to disable CCI snooping on the GPU port(s).
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets power model parameters. Note that this model was designed for the Juno
+	        platform, and may not be suitable for other platforms. A structure containing :
+	- compatible: Should be arm,mali-simple-power-model
+	- voltage: Voltage at reference point. Specified in mV.
+	- frequency: Frequency at reference point. Specified in MHz.
+	- dynamic-power: Dynamic power at reference frequency and voltage. Specified in mW.
+	- static-power: Static power at reference frequency. Specified in mW.
+	- ts: An array containing coefficients for the temperature scaling factor.
+	  Used as : tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0], where T = temperature
+	- thermal-zone: A string identifying the thermal zone used for the GPU
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+
+Example for a Mali-T602:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points = <
+		/* KHz   uV */
+		533000 1250000,
+		450000 1150000,
+		400000 1125000,
+		350000 1075000,
+		266000 1025000,
+		160000  925000,
+		100000  912500,
+	>;
+	power_model {
+		compatible = "arm,mali-simple-power-model";
+		voltage = <800>;
+		frequency = <500>;
+		static-power = <500>;
+		dynamic-power = <1500>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+};
diff --git a/midgard/r15p0/kernel/Documentation/dma-buf-test-exporter.txt b/midgard/r15p0/kernel/Documentation/dma-buf-test-exporter.txt
new file mode 100755
index 0000000..4208010
--- /dev/null
+++ b/midgard/r15p0/kernel/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/midgard/r15p0/kernel/Documentation/kds.txt b/midgard/r15p0/kernel/Documentation/kds.txt
new file mode 100755
index 0000000..639288c
--- /dev/null
+++ b/midgard/r15p0/kernel/Documentation/kds.txt
@@ -0,0 +1,268 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+==============================
+kds - Kernel Dependency System
+==============================
+
+Introduction
+------------
+kds provides a mechanism for clients to atomically lock down multiple abstract resources.
+This can be done either synchronously or asynchronously.
+Abstract resources is used to allow a set of clients to use kds to control access to any
+resource, an example is structured memory buffers.
+
+kds supports that buffer is locked for exclusive access and sharing of buffers.
+
+kds can be built as either a integrated feature of the kernel or as a module.
+It supports being compiled as a module both in-tree and out-of-tree.
+
+
+Concepts
+--------
+A core concept in kds is abstract resources.
+A kds resource is just an abstraction for some client object, kds doesn't care what it is.
+Typically EGL will consider UMP buffers as being a resource, thus each UMP buffer has
+a kds resource for synchronization to the buffer.
+
+kds allows a client to create and destroy the abstract resource objects.
+A new resource object is made available asap (it is just a simple malloc with some initializations),
+while destroy it requires some external synchronization.
+
+The other core concept in kds is consumer of resources.
+kds is requested to allow a client to consume a set of resources and the client will be notified when it can consume the resources.
+
+Exclusive access allows only one client to consume a resource.
+Shared access permits multiple consumers to acceess a resource concurrently.
+
+
+APIs
+----
+kds provides simple resource allocate and destroy functions.
+Clients use this to instantiate and control the lifetime of the resources kds manages.
+
+kds provides two ways to wait for resources:
+- Asynchronous wait: the client specifies a function pointer to be called when wait is over
+- Synchronous wait: Function blocks until access is gained.
+
+The synchronous API has a timeout for the wait.
+The call can early out if a signal is delivered.
+
+After a client is done consuming the resource kds must be notified to release the resources and let some other client take ownership.
+This is done via resource set release call.
+
+A Windows comparison:
+kds implements WaitForMultipleObjectsEx(..., bWaitAll = TRUE, ...) but also has an asynchronous version in addition.
+kds resources can be seen as being the same as NT object manager resources.
+
+Internals
+---------
+kds guarantees atomicity when a set of resources is operated on.
+This is implemented via a global resource lock which is taken by kds when it updates resource objects.
+
+Internally a resource in kds is a linked list head with some flags.
+
+When a consumer requests access to a set of resources it is queued on each of the resources.
+The link from the consumer to the resources can be triggered. Once all links are triggered
+the registered callback is called or the blocking function returns.
+A link is considered triggered if it is the first on the list of consumers of a resource,
+or if all the links ahead of it is marked as shared and itself is of the type shared.
+
+When the client is done consuming the consumer object is removed from the linked lists of
+the resources and a potential new consumer becomes the head of the resources.
+As we add and remove consumers atomically across all resources we can guarantee that
+we never introduces a A->B + B->A type of loops/deadlocks.
+
+
+kbase/base implementation
+-------------------------
+A HW job needs access to a set of shared resources.
+EGL tracks this and encodes the set along with the atom in the ringbuffer.
+EGL allocates a (k)base dep object to represent the dependency to the set of resources and encodes that along with the list of resources.
+This dep object is use to create a dependency from a job chain(atom) to the resources it needs to run.
+When kbase decodes the atom in the ringbuffer it finds the set of resources and calls kds to request all the needed resources.
+As EGL needs to know when the kds request is delivered a new base event object is needed: atom enqueued. This event is only delivered for atoms which uses kds.
+The callback kbase registers trigger the dependency object described which would trigger the existing JD system to release the job chain.
+When the atom is done kds resource set release is call to release the resources.
+
+EGL will typically use exclusive access to the render target, while all buffers used as input can be marked as shared.
+
+
+Buffer publish/vsync
+--------------------
+EGL will use a separate ioctl or DRM flip to request the flip.
+If the LCD driver is integrated with kds EGL can do these operations early.
+The LCD driver must then implement the ioctl or DRM flip to be asynchronous with kds async call.
+The LCD driver binds a kds resource to each virtual buffer (2 buffers in case of double-buffering).
+EGL will make a dependency to the target kds resource in the kbase atom.
+After EGL receives a atom enqueued event it can ask the LCD driver to pan to the target kds resource.
+When the atom is completed it'll release the resource and the LCD driver will get its callback.
+In the callback it'll load the target buffer into the DMA unit of the LCD hardware.
+The LCD driver will be the consumer of both buffers for a short period.
+The LCD driver will call kds resource set release on the previous on-screen buffer when the next vsync/dma read end is handled.
+
+===============================================
+Kernel driver kds client design considerations
+=============================================== 
+
+Number of resources
+--------------------
+
+The kds api allows a client to wait for ownership of a number of resources, where by the client does not take on ownership of any of the resources in the resource set 
+until all of the resources in the set are released.  Consideration must be made with respect to performance, as waiting on large number of resources will incur
+a greater overhead and may increase system latency.  It may be worth considering how independent each of the resources are, for example if the same set of resources 
+are waited upon by each of the clients, then it may be possible to aggregate these into one resource that each client waits upon.
+
+Clients with shared access
+---------------------------
+
+The kds api allows a number of clients to gain shared access to a resource simultaneously, consideration must be made with respect to performance, large numbers of clients
+wanting shared access can incur a performance penalty and may increase system latency, specifically when the clients are granted access. Having an excessively high 
+number of clients with shared access should be avoided, consideration should be made to the call back configuration being used. See Callbacks and Scenario 1 below.
+
+Callbacks
+----------
+
+Careful consideration must be made as to which callback type is most appropriate for kds clients, direct callbacks are called immediately from the context in which the
+ownership of the resource is passed to the next waiter in the list.  Where as when using deferred callbacks the callback is deferred and called from outside the context
+that is relinquishing ownership, while this reduces the latency in the releasing clients context it does incur a cost as there is more latency between a resource 
+becoming free and the new client owning the resource callback being executed.  
+
+Obviously direct callbacks have a performance advantage, as the call back is immediate and does not have to wait for the kernel to context switch to schedule in the 
+execution of the callback.  
+
+However as the callback is immediate and within the context that is granting ownership it is important that the callback perform the MINIMUM amount of work necessary, 
+long call backs could cause poor system latency.  Special care and attention must be taken if the direct callbacks can be called from IRQ handlers, such as when 
+kds_resource_set_release is called from an IRQ handler, in this case you have to avoid any calls that may sleep.  
+
+Deferred contexts have the advantage that the call backs are deferred until they are scheduled by the kernel, therefore they are allowed to call functions that may sleep
+and if scheduled from IRQ context not incur as much system latency as would be seen with direct callbacks from within the IRQ.
+
+Once the clients callback has been called, the client is considered to be owning the resource.  Within the callback the client may only need to perform a small amount of work
+before the client need to give up owner ship.  The kds_resource_release function may be called from with in the call back, but consideration must be made when using direct 
+callbacks, with both respect to execution time and stack usage. Consider the example in Scenario 2 with direct callbacks:
+
+Scenario 1 - Shared client access - direct callbacks:
+
+Resources: X
+Clients: A(S), B(S), C(S), D(S), E(E)
+where: (S) = shared user, (E) = exclusive
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+}
+
+Where <client> is either A,B,C,D
+                                        Queue   |Owner 
+1.  E Requests X exclusive                      |
+2.  E Owns     X exclusive                      |E
+3.  A Requests X shared                        A|E
+4.  B Requests X shared                       BA|E
+5.  C Requests X shared                      CBA|E
+6.  D Requests X shared                     DCBA|E
+7.  E Releases X                                |DCBA
+8.  A Owns X shared                             |DCBA
+9.  B Owns X shared                             |DCBA
+10. C Owns X shared                             |DCBA
+11. D Owns X shared                             |DCBA
+
+At point 7 it is important to note that when E releases X; A,B,C and D become the new shared owners of X and the call back for each of the client(A,B,C,D) triggered, so consideration
+must be made as to whether a direct or deferred callback is suitable, using direct callbacks would result in the call graph.
+
+Call graph when E releases X:
+    kds_resource_set_release( .. )
+        +->client_A_cb( .. )
+        +->client_B_cb( .. )
+        +->client_C_cb( .. )
+        +->client_D_cb( .. )
+
+
+Scenario 2 - Immediate resource release - direct callbacks:
+
+Resource: X
+Clients: A, B, C, D
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+    kds_resource_set_release( .. );
+}
+
+Where <client> is either A,B,C,D
+
+1. A Owns     X exclusive
+2. B Requests X exclusive	(direct callback)
+3. C Requests X exclusive	(direct callback)
+4. D Requests X exclusive	(direct callback)
+5. A Releases X
+
+Call graph when A releases X:
+    kds_resource_set_release( .. )
+        +->client_B_cb( .. )
+            +->kds_resource_set_release( .. )
+                +->client_C_cb( .. )
+                    +->kds_resource_set_release( .. )
+                        +->client_D_cb( .. )
+
+As can be seen when a client releases the resource, with direct call backs it is possible to create nested calls
+
+IRQ Considerations
+-------------------
+
+Usage of kds_resource_release in IRQ handlers should be carefully considered.
+
+Things to keep in mind:
+
+1.) Are you using direct or deferred callbacks?
+2.) How many resources are you releasing?
+3.) How many shared waiters are pending on the resource?
+
+Releasing ownership and wait cancellation
+------------------------------------------
+
+Client Wait Cancellation
+-------------------------
+
+It may be necessary in certain circumstances for the client to cancel the wait for kds resources for error handling, process termination etc.  Cancellation is 
+performed using kds_resource_set_release or kds_resource_set_release_sync using the rset that was received from kds_async_waitall, kds_resource_set_release_sync 
+being used for waits which are using deferred callbacks.
+
+It is possible that while the request to cancel the wait is being issued by the client, the client is granted access to the resources.  Normally after the client
+has taken ownership and finishes with that resource, it will release ownership to signal other waiters which are pending, this causes a race with the cancellation.
+To prevent KDS trying to remove a wait twice from the internal list and accessing memory that is potentially freed, it is very important that all releasers use the
+same rset pointer.  Here is a simplified example of bad usage that must be avoided in any client implementation:
+
+Senario 3 - Bad release from multiple contexts:
+
+	This scenaro is highlighting bad usage of the kds API
+
+	kds_resource_set * rset;
+	kds_resource_set * rset_copy;
+
+	kds_async_waitall( &rset, ... ... ... );
+
+	/* Don't do this */
+	rset_copy = rset;
+
+Context A:
+	kds_resource_set_release( &rset )
+
+Context B:
+	kds_resource_set_release( &rset_copy )
+
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_lock/sconscript b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/sconscript
new file mode 100755
index 0000000..99293c3
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2012, 2014, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+if env.KernelConfigEnabled("CONFIG_DMA_SHARED_BUFFER_USES_KDS"):
+    SConscript("src/sconscript")
+    if env["tests"] and Glob("tests/sconscript"):
+        SConscript("tests/sconscript")
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/Kbuild b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100755
index 0000000..68dad07
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/Makefile b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/Makefile
new file mode 100755
index 0000000..cf76132
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100755
index 0000000..9613ffc
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/dma-buf.h>
+#include <linux/kds.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	struct kds_resource **kds_resources;    /* List of KDS resources associated with buffers */
+	struct kds_resource_set *resource_set;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	wait_queue_head_t wait;
+	struct kds_callback cb;
+	struct kref refcount;
+	struct list_head link;
+	int count;
+} dma_buf_lock_resource;
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static void dma_buf_lock_kds_callback(void *param1, void *param2)
+{
+	dma_buf_lock_resource *resource = param1;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_kds_callback\n");
+#endif
+	atomic_set(&resource->locked, 1);
+
+	wake_up(&resource->wait);
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related KDS resources, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+	/* Allocate space to store kds_resources associated with dma_buf_fds */
+	size = sizeof(struct kds_resource *) * request->count;
+	resource->kds_resources = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->kds_resources)
+	{
+		kfree(resource->dma_bufs);
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource->kds_resources);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Get kds_resource associated with dma_buf */
+		resource->kds_resources[i] = get_dma_buf_kds_resource(resource->dma_bufs[i]);
+
+		if (NULL == resource->kds_resources[i])
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk("dma_buf_lock_dolock : dma_buf_fd %i dma_buf %X kds_resource %X\n", resource->list_of_dma_buf_fds[i],
+		       (unsigned int)resource->dma_bufs[i], (unsigned int)resource->kds_resources[i]);
+#endif	
+	}
+
+	kds_callback_init(&resource->cb, 1, dma_buf_lock_kds_callback);
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops, 
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = kds_async_waitall(&resource->resource_set,
+	                        &resource->cb, resource, NULL,
+	                        request->count,  &resource->exclusive,
+	                        resource->kds_resources);
+
+	if (IS_ERR_VALUE(ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	kds_callback_term(&resource->cb);
+
+	kds_resource_set_release(&resource->resource_set);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->kds_resources);
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	kfree(resource);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100755
index 0000000..e1b9348
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/sconscript b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/sconscript
new file mode 100755
index 0000000..b8724f1
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_lock/src/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')]
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] dma_buf_lock'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100755
index 0000000..56b9f86
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100755
index 0000000..974f0c2
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/Makefile b/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100755
index 0000000..06e3d5c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100755
index 0000000..08ab49a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,703 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+	dmabuf = vma->vm_private_data;
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return NULL;
+
+	return kmap(alloc->pages[page_num]);
+}
+static void dma_buf_te_kunmap(struct dma_buf *buf,
+		unsigned long page_num, void *addr)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return;
+
+	kunmap(alloc->pages[page_num]);
+	return;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+	.kmap = dma_buf_te_kmap,
+	.kunmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (alloc_req.size > SG_MAX_SINGLE_ALLOC) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %lu pages",
+				__func__, alloc_req.size, SG_MAX_SINGLE_ALLOC);
+		goto invalid_size;
+	}
+#endif
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO,
+				DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+	{
+		struct dma_buf_export_info export_info = {
+			.exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+			.owner = THIS_MODULE,
+#endif
+			.ops = &dma_buf_te_ops,
+			.size = alloc->nr_pages << PAGE_SHIFT,
+			.flags = O_CLOEXEC | O_RDWR,
+			.priv = alloc,
+		};
+
+		dma_buf = dma_buf_export(&export_info);
+	}
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+	if (ret)
+		goto no_cpu_access;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				ret = -EPERM;
+				goto no_kmap;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		offset += sg_dma_len(sg);
+	}
+
+no_kmap:
+	dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/sconscript b/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
new file mode 100755
index 0000000..5f42141
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+mod = env.BuildKernelModule('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', Glob('*.c'))
+env.KernelObjTarget('dma-buf-test-exporter', mod)
diff --git a/midgard/r15p0/kernel/drivers/base/kds/Kbuild b/midgard/r15p0/kernel/drivers/base/kds/Kbuild
new file mode 100755
index 0000000..a88acd8
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/kds/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_KDS) += kds.o
+obj-$(CONFIG_KDS_TEST) += kds_test.o
diff --git a/midgard/r15p0/kernel/drivers/base/kds/Kconfig b/midgard/r15p0/kernel/drivers/base/kds/Kconfig
new file mode 100755
index 0000000..5f96165
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/kds/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config KDS
+	tristate "Kernel dependency system"
+	help
+	  This option enables the generic kernel dependency system
diff --git a/midgard/r15p0/kernel/drivers/base/kds/Makefile b/midgard/r15p0/kernel/drivers/base/kds/Makefile
new file mode 100755
index 0000000..364d151
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/kds/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+CONFIG_KDS_TEST ?= n
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: kds
+
+kds:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS=m
+
+kds_test:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS_TEST=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r15p0/kernel/drivers/base/kds/kds.c b/midgard/r15p0/kernel/drivers/base/kds/kds.c
new file mode 100755
index 0000000..62612d4
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/kds/kds.c
@@ -0,0 +1,559 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/kds.h>
+#include <linux/kref.h>
+
+#include <asm/atomic.h>
+
+#define KDS_LINK_TRIGGERED (1u << 0)
+#define KDS_LINK_EXCLUSIVE (1u << 1)
+
+#define KDS_INVALID (void *)-2
+#define KDS_RESOURCE (void *)-1
+
+struct kds_resource_set
+{
+	unsigned long         num_resources;
+	unsigned long         pending;
+	struct kds_callback  *cb;
+	void                 *callback_parameter;
+	void                 *callback_extra_parameter;
+	struct list_head      callback_link;
+	struct work_struct    callback_work;
+	atomic_t              cb_queued;
+	/* This resource set will be freed when there are no pending
+	 * callbacks */
+	struct kref           refcount;
+
+	/* This is only initted when kds_waitall() is called. */
+	wait_queue_head_t     wake;
+
+	struct kds_link       resources[0];
+
+};
+
+static DEFINE_SPINLOCK(kds_lock);
+
+static void __resource_set_release(struct kref *ref)
+{
+	struct kds_resource_set *rset = container_of(ref,
+			struct kds_resource_set, refcount);
+
+	kfree(rset);
+}
+
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb)
+{
+	int ret = 0;
+
+	cb->direct = direct;
+	cb->user_cb = user_cb;
+
+	if (!direct)
+	{
+		cb->wq = alloc_workqueue("kds", WQ_UNBOUND | WQ_HIGHPRI, WQ_UNBOUND_MAX_ACTIVE);
+		if (!cb->wq)
+			ret = -ENOMEM;
+	}
+	else
+	{
+		cb->wq = NULL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kds_callback_init);
+
+void kds_callback_term(struct kds_callback *cb)
+{
+	if (!cb->direct)
+	{
+		BUG_ON(!cb->wq);
+		destroy_workqueue(cb->wq);
+	}
+	else
+	{
+		BUG_ON(cb->wq);
+	}
+}
+
+EXPORT_SYMBOL(kds_callback_term);
+
+static void kds_do_user_callback(struct kds_resource_set *rset)
+{
+	rset->cb->user_cb(rset->callback_parameter, rset->callback_extra_parameter);
+}
+
+static void kds_queued_callback(struct work_struct *work)
+{
+	struct kds_resource_set *rset;
+	rset = container_of(work, struct kds_resource_set, callback_work);
+
+	atomic_dec(&rset->cb_queued);
+
+	kds_do_user_callback(rset);
+}
+
+static void kds_callback_perform(struct kds_resource_set *rset)
+{
+	if (rset->cb->direct)
+		kds_do_user_callback(rset);
+	else
+	{
+		int result;
+
+		atomic_inc(&rset->cb_queued);
+
+		result = queue_work(rset->cb->wq, &rset->callback_work);
+		/* if we got a 0 return it means we've triggered the same rset twice! */
+		WARN_ON(!result);
+	}
+}
+
+void kds_resource_init(struct kds_resource * const res)
+{
+	BUG_ON(!res);
+	INIT_LIST_HEAD(&res->waiters.link);
+	res->waiters.parent = KDS_RESOURCE;
+}
+EXPORT_SYMBOL(kds_resource_init);
+
+int kds_resource_term(struct kds_resource *res)
+{
+	unsigned long lflags;
+	BUG_ON(!res);
+	spin_lock_irqsave(&kds_lock, lflags);
+	if (!list_empty(&res->waiters.link))
+	{
+		spin_unlock_irqrestore(&kds_lock, lflags);
+		printk(KERN_ERR "ERROR: KDS resource is still in use\n");
+		return -EBUSY;
+	}
+	res->waiters.parent = KDS_INVALID;
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	return 0;
+}
+EXPORT_SYMBOL(kds_resource_term);
+
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list)
+{
+	struct kds_resource_set *rset = NULL;
+	unsigned long lflags;
+	int i;
+	int triggered;
+	int err = -EFAULT;
+
+	BUG_ON(!pprset);
+	BUG_ON(!resource_list);
+	BUG_ON(!cb);
+
+	WARN_ONCE(number_resources > 10, "Waiting on a high numbers of resources may increase latency, see documentation.");
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+	{
+		return -ENOMEM;
+	}
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	rset->cb = cb;
+	rset->callback_parameter = callback_parameter;
+	rset->callback_extra_parameter = callback_extra_parameter;
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+	}
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		/* no-one else waiting? */
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	triggered = (rset->pending == 0);
+
+	/* set the pointer before the callback is called so it sees it */
+	*pprset = rset;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (triggered)
+	{
+		/* all resources obtained, trigger callback */
+		kds_callback_perform(rset);
+	}
+
+	return 0;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+	err = -EINVAL;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return err;
+}
+EXPORT_SYMBOL(kds_async_waitall);
+
+static void wake_up_sync_call(void *callback_parameter, void *callback_extra_parameter)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *)callback_parameter;
+	wake_up(wait);
+}
+
+static struct kds_callback sync_cb =
+{
+	wake_up_sync_call,
+	1,
+	NULL,
+};
+
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jiffies_timeout)
+{
+	struct kds_resource_set *rset;
+	unsigned long lflags;
+	int i;
+	int triggered = 0;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+		return rset;
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	init_waitqueue_head(&rset->wake);
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	if (rset->pending == 0)
+		triggered = 1;
+	else
+	{
+		rset->cb = &sync_cb;
+		rset->callback_parameter = &rset->wake;
+		rset->callback_extra_parameter = NULL;
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (!triggered)
+	{
+		long wait_res = 0;
+		long timeout = (jiffies_timeout == KDS_WAIT_BLOCKING) ?
+				MAX_SCHEDULE_TIMEOUT : jiffies_timeout;
+
+		if (timeout)
+		{
+			wait_res = wait_event_interruptible_timeout(rset->wake,
+					rset->pending == 0, timeout);
+		}
+
+		if ((wait_res == -ERESTARTSYS) || (wait_res == 0))
+		{
+			/* use \a kds_resource_set_release to roll back */
+			kds_resource_set_release(&rset);
+			return ERR_PTR(wait_res);
+		}
+	}
+	return rset;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL(kds_waitall);
+
+static void trigger_new_rset_owner(struct kds_resource_set *rset,
+		struct list_head *triggered)
+{
+	if (0 == --rset->pending) {
+		/* new owner now triggered, track for callback later */
+		kref_get(&rset->refcount);
+		list_add(&rset->callback_link, triggered);
+	}
+}
+
+static void __kds_resource_set_release_common(struct kds_resource_set *rset)
+{
+	struct list_head triggered = LIST_HEAD_INIT(triggered);
+	struct kds_resource_set *it;
+	unsigned long lflags;
+	int i;
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < rset->num_resources; i++)
+	{
+		struct kds_resource *resource;
+		struct kds_link *it = NULL;
+
+		/* fetch the previous entry on the linked list */
+		it = list_entry(rset->resources[i].link.prev, struct kds_link, link);
+		/* unlink ourself */
+		list_del(&rset->resources[i].link);
+
+		/* any waiters? */
+		if (list_empty(&it->link))
+			continue;
+
+		/* were we the head of the list? (head if prev is a resource) */
+		if (it->parent != KDS_RESOURCE)
+		{
+			if ((it->state & KDS_LINK_TRIGGERED) && !(it->state & KDS_LINK_EXCLUSIVE))
+			{
+				/*
+				 * previous was triggered and not exclusive, so we
+				 * trigger non-exclusive until end-of-list or first
+				 * exclusive
+				 */
+
+				struct kds_link *it_waiting = it;
+
+				list_for_each_entry(it, &it_waiting->link, link)
+				{
+					/* exclusive found, stop triggering */
+					if (it->state & KDS_LINK_EXCLUSIVE)
+						break;
+
+					it->state |= KDS_LINK_TRIGGERED;
+					/* a parent to update? */
+					if (it->parent != KDS_RESOURCE)
+						trigger_new_rset_owner(
+								it->parent,
+								&triggered);
+				}
+			}
+			continue;
+		}
+
+		/* we were the head, find the kds_resource */
+		resource = container_of(it, struct kds_resource, waiters);
+
+		/* we know there is someone waiting from the any-waiters test above */
+
+		/* find the head of the waiting list */
+		it = list_first_entry(&resource->waiters.link, struct kds_link, link);
+
+		/* new exclusive owner? */
+		if (it->state & KDS_LINK_EXCLUSIVE)
+		{
+			/* link now triggered */
+			it->state |= KDS_LINK_TRIGGERED;
+			/* a parent to update? */
+			trigger_new_rset_owner(it->parent, &triggered);
+		}
+		/* exclusive releasing ? */
+		else if (rset->resources[i].state & KDS_LINK_EXCLUSIVE)
+		{
+			/* trigger non-exclusive until end-of-list or first exclusive */
+			list_for_each_entry(it, &resource->waiters.link, link)
+			{
+				/* exclusive found, stop triggering */
+				if (it->state & KDS_LINK_EXCLUSIVE)
+					break;
+
+				it->state |= KDS_LINK_TRIGGERED;
+				/* a parent to update? */
+				trigger_new_rset_owner(it->parent, &triggered);
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	while (!list_empty(&triggered))
+	{
+		it = list_first_entry(&triggered, struct kds_resource_set, callback_link);
+		list_del(&it->callback_link);
+		kds_callback_perform(it);
+
+		/* Free the resource set if no callbacks pending */
+		kref_put(&it->refcount, &__resource_set_release);
+	}
+}
+
+void kds_resource_set_release(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+	int queued;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * Caller is responsible for guaranteeing that callback work is not
+	 * pending (i.e. its running or completed) prior to calling release.
+	 */
+	queued = atomic_read(&rset->cb_queued);
+	BUG_ON(queued);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release);
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * In the case of a kds async wait cancellation ensure the deferred
+	 * call back does not get scheduled if a trigger fired at the same time
+	 * to release the wait.
+	 */
+	cancel_work_sync(&rset->callback_work);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release_sync);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r15p0/kernel/drivers/base/kds/sconscript b/midgard/r15p0/kernel/drivers/base/kds/sconscript
new file mode 100755
index 0000000..75add37
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/kds/sconscript
@@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+#Android uses sync_pt to accomplish KDS functionality.
+#Midgard KDS is not used by Android
+if env['os'] == 'android':
+    Return()
+
+if Glob('tests/sconscript'):
+    SConscript('tests/sconscript')
+
+# If KDS is built into the kernel already we skip building the module ourselves
+build_kds = not env.KernelConfigEnabled("CONFIG_KDS")
+
+# Build KDS module
+if build_kds:
+    kds_mod = env.BuildKernelModule('$STATIC_LIB_PATH/kds.ko', ['kds.c'],
+                                    make_args = ['kds'])
+    env.KernelObjTarget('kds', kds_mod)
+
+# Build KDS test module
+if int(env['unit']) == 1:
+    kds_test_mod = env.BuildKernelModule('$STATIC_LIB_PATH/kds_test.ko',
+                                         ['kds_test.c'],
+                                         make_args = ['kds_test'])
+    env.KernelObjTarget('kds', kds_test_mod)
+    if build_kds:
+        env.Depends(kds_test_mod, kds_mod)
diff --git a/midgard/r15p0/kernel/drivers/base/sconscript b/midgard/r15p0/kernel/drivers/base/sconscript
new file mode 100755
index 0000000..84de8c4
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/sconscript
@@ -0,0 +1,36 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import( 'env' )
+
+if Glob('bus_logger/sconscript'):
+	if env['buslog'] == '1':
+		SConscript('bus_logger/sconscript')
+
+if Glob('mali_fpga_sysctl/sconscript'):
+	SConscript('mali_fpga_sysctl/sconscript')
+
+if Glob('dma_buf_lock/sconscript'):
+	SConscript('dma_buf_lock/sconscript')
+
+if Glob('kds/sconscript'):
+	SConscript('kds/sconscript')
+
+if Glob('ump/sconscript'):
+	SConscript('ump/sconscript')
+
+if Glob('dma_buf_test_exporter/sconscript'):
+	SConscript('dma_buf_test_exporter/sconscript')
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/Kbuild b/midgard/r15p0/kernel/drivers/base/ump/Kbuild
new file mode 100755
index 0000000..2bbdba2
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += src/
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/Kconfig b/midgard/r15p0/kernel/drivers/base/ump/Kconfig
new file mode 100755
index 0000000..f7451e6
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config UMP
+	tristate "Enable Unified Memory Provider (UMP) support"
+	default n
+    help
+	  Enable this option to build support for the ARM UMP module.
+	  UMP can be used by the Mali T6xx module to improve performance
+	  by reducing the copying of data by sharing memory.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate one module, called ump.
diff --git a/midgard/r15p0/kernel/drivers/base/ump/docs/Doxyfile b/midgard/r15p0/kernel/drivers/base/ump/docs/Doxyfile
new file mode 100755
index 0000000..fbec8eb
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/docs/Doxyfile
@@ -0,0 +1,125 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/include/linux/ump-common.h ../../kernel/include/linux/ump.h
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           += ../../kernel/drivers/base/ump
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           +=
diff --git a/midgard/r15p0/kernel/drivers/base/ump/docs/sconscript b/midgard/r15p0/kernel/drivers/base/ump/docs/sconscript
new file mode 100755
index 0000000..521278d
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/docs/sconscript
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+doxygen_sources = [
+	'Doxyfile',
+	Glob('*.png'),
+	Glob('../*.h'),
+	Glob('../src/*.h') ]
+
+if env['doc'] == '1':
+        doxygen_target = env.Command('doxygen/html/index.html', doxygen_sources,
+                                     ['cd ${SOURCE.dir} && doxygen'])
+        env.Clean(doxygen_target, './doxygen')
+
+        Alias('doxygen', doxygen_target)
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/example_kernel_api.c b/midgard/r15p0/kernel/drivers/base/ump/example_kernel_api.c
new file mode 100755
index 0000000..858dd8b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/example_kernel_api.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Example routine to display information about an UMP allocation
+ * The routine takes an secure_id which can come from a different kernel module
+ * or from a client application (i.e. an ioctl).
+ * It creates a ump handle from the secure id (which validates the secure id)
+ * and if successful dumps the physical memory information.
+ * It follows the API and pins the memory while "using" the physical memory.
+ * Finally it calls the release function to indicate it's finished with the handle.
+ *
+ * If the function can't look up the handle it fails with return value -1.
+ * If the testy succeeds then it return 0.
+ * */
+
+static int display_ump_memory_information(ump_secure_id secure_id)
+{
+	const ump_dd_physical_block_64 * ump_blocks = NULL;
+	ump_dd_handle ump_mem;
+	uint64_t nr_blocks;
+	int i;
+	ump_alloc_flags flags;
+
+	/* get a handle from the secure id */
+	ump_mem = ump_dd_from_secure_id(secure_id);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE == ump_mem)
+	{
+		/* invalid ID received */
+		return -1;
+	}
+
+	/* at this point we know we've added a reference to the ump allocation, so we must release it with ump_dd_release */
+
+	ump_dd_phys_blocks_get_64(ump_mem, &nr_blocks, &ump_blocks);
+	flags = ump_dd_allocation_flags_get(ump_mem);
+
+	printf("UMP allocation with secure ID %u consists of %zd physical block(s):\n", secure_id, nr_blocks);
+
+	for(i=0; i<nr_blocks; ++i)
+	{
+		printf("\tBlock %d: 0x%08zX size 0x%08zX\n", i, ump_blocks[i].addr, ump_blocks[i].size);
+	}
+
+	printf("and was allocated using the following bitflag combo:  0x%lX\n", flags);
+
+	ump_dd_release(ump_mem);
+
+	return 0;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/example_user_api.c b/midgard/r15p0/kernel/drivers/base/ump/example_user_api.c
new file mode 100755
index 0000000..d143a64
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/example_user_api.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <ump/ump.h>
+#include <memory.h>
+#include <stdio.h>
+
+/*
+ * Example routine to exercise the user space UMP api.
+ * This routine initializes the UMP api and allocates some CPU+device X memory.
+ * No usage hints are given, so the driver will use the default cacheability policy.
+ * With the allocation it creates a duplicate handle and plays with the reference count.
+ * Then it simulates interacting with a device and contains pseudo code for the device.
+ *
+ * If any error is detected correct cleanup will be performed and -1 will be returned.
+ * If successful then 0 will be returned.
+ */
+
+static int test_ump_user_api(void)
+{
+	/* This is the size we try to allocate*/
+	const size_t alloc_size = 4096;
+
+	ump_handle h = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_copy = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+	void * mapping = NULL;
+
+	ump_result ump_api_res;
+	int result = -1;
+
+	ump_secure_id id;
+
+	size_t size_returned;
+
+	ump_api_res = ump_open();
+	if (UMP_OK != ump_api_res)
+	{
+		/* failed to open an ump session */
+		/* early out */
+		return -1;
+	}
+
+	h = ump_allocate_64(alloc_size, UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | UMP_PROT_X_RD | UMP_PROT_X_WR);
+	/* the refcount is now 1 */
+	if (UMP_INVALID_MEMORY_HANDLE == h)
+	{
+		/* allocation failure */
+		goto cleanup;
+	}
+
+	/* this is how we could share this allocation with another process */
+
+	/* in process A: */
+	id = ump_secure_id_get(h);
+	/* still ref count 1 */
+	/* send the id to process B */
+
+	/* in process B: */
+	/* receive the id from A */
+	h_clone = ump_from_secure_id(id);
+	/* the ref count of the allocation is now 2 (one from each handle to it) */
+	/* do something ... */
+	/* release our clone */
+	ump_release(h_clone); /* safe to call even if ump_from_secure_id failed */
+	h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+
+	/* a simple save-for-future-use logic inside the driver would just copy the handle (but add a ref manually too!) */
+	/*
+	 * void assign_memory_to_job(h)
+	 * {
+	  */
+	h_copy = h;
+	ump_retain(h_copy); /* manual retain needed as we just assigned the handle, now 2 */
+	/*
+	 * }
+	 *
+	 * void job_completed(void)
+	 * {
+	 */
+	 ump_release(h_copy); /* normal handle release as if we got via an ump_allocate */
+	 h_copy = UMP_INVALID_MEMORY_HANDLE;
+	 /*
+	 * }
+	 */
+	
+	/* we're now back at ref count 1, and only h is a valid handle */
+	/* enough handle duplication show-off, let's play with the contents instead */
+
+	mapping = ump_map(h, 0, alloc_size);
+	if (NULL == mapping)
+	{
+		/* mapping failure, either out of address space or some other error */
+		goto cleanup;
+	}
+
+	memset(mapping, 0, alloc_size);
+
+	/* let's pretend we're going to start some hw device on this buffer and read the result afterwards */
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN, mapping, alloc_size);
+ 	/*
+		device cache invalidate
+
+		memory barrier
+
+		start device
+
+		memory barrier
+
+		wait for device
+
+		memory barrier
+
+		device cache clean
+
+		memory barrier
+	*/
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN_AND_INVALIDATE, mapping, alloc_size);
+
+	/* we could now peek at the result produced by the hw device, which is now accessible via our mapping */
+
+	/* unmap the buffer when we're done with it */
+	ump_unmap(h, mapping, alloc_size);
+
+	result = 0;
+
+cleanup:
+	ump_release(h);
+	h = UMP_INVALID_MEMORY_HANDLE;
+
+	ump_close();
+
+	return result;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/sconscript b/midgard/r15p0/kernel/drivers/base/ump/sconscript
new file mode 100755
index 0000000..b4aa8b6
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if Glob('src/sconscript') and int(env['ump']) == 1:
+	SConscript( 'src/sconscript' )
+	SConscript( 'docs/sconscript' )
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/Kbuild b/midgard/r15p0/kernel/drivers/base/ump/src/Kbuild
new file mode 100755
index 0000000..de6d307
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/Kbuild
@@ -0,0 +1,50 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Paths required for build
+UMP_PATH = $(src)/../..
+UMP_DEVICEDRV_PATH = $(src)/.
+
+# Set up defaults if not defined by the user
+MALI_UNIT_TEST ?= 0
+
+SRC :=\
+	common/ump_kernel_core.c \
+	common/ump_kernel_descriptor_mapping.c \
+	linux/ump_kernel_linux.c \
+	linux/ump_kernel_linux_mem.c
+
+UNIT_TEST_DEFINES=
+ifeq ($(MALI_UNIT_TEST), 1)
+	MALI_DEBUG ?= 1
+
+	UNIT_TEST_DEFINES = -DMALI_UNIT_TEST=1 \
+	                    -DMALI_DEBUG=$(MALI_DEBUG)
+endif
+
+# Use our defines when compiling
+ccflags-y += -I$(UMP_PATH) -I$(UMP_DEVICEDRV_PATH) $(UNIT_TEST_DEFINES)
+
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_UMP) += ump.o
+ifeq ($(CONFIG_ION),y)
+ccflags-y += -I$(srctree)/drivers/staging/android/ion -I$(srctree)/include/linux
+obj-$(CONFIG_UMP) += imports/ion/ump_kernel_import_ion.o
+endif
+
+# Tell the Linux build system to enable building of our .c files
+ump-y := $(SRC:.c=.o)
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/Makefile b/midgard/r15p0/kernel/drivers/base/ump/src/Makefile
new file mode 100755
index 0000000..45428ad
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/Makefile
@@ -0,0 +1,81 @@
+#
+# (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifneq ($(KBUILD_EXTMOD),)
+include $(KBUILD_EXTMOD)/Makefile.common
+else
+include ./Makefile.common
+endif
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+RELATIVE_ROOT=../../../../..
+ROOT = $(CURDIR)/$(RELATIVE_ROOT)
+
+EXTRA_CFLAGS=-I$(CURDIR)/../../../../include
+
+ifeq ($(MALI_UNIT_TEST),1)
+	EXTRA_CFLAGS += -DMALI_UNIT_TEST=$(MALI_UNIT_TEST)
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+CONFIG ?= $(ARCH)
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+# Validate selected config
+ifneq ($(shell [ -d arch-$(CONFIG) ] && [ -f arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(shell pwd))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L arch ] && rm arch)
+$(shell ln -sf arch-$(CONFIG) arch)
+$(shell touch arch/config.h)
+endif
+
+EXTRA_SYMBOLS=
+
+ifeq ($(MALI_UNIT_TEST),1)
+	KBASE_PATH=$(ROOT)/kernel/drivers/gpu/arm/midgard
+	EXTRA_SYMBOLS+=$(KBASE_PATH)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+KDS_PATH=$(ROOT)/kernel/drivers/base/kds
+EXTRA_SYMBOLS+=$(KDS_PATH)/Module.symvers
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="$(EXTRA_CFLAGS) $(SCONS_CFLAGS)" CONFIG_UMP=m KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/Makefile.common b/midgard/r15p0/kernel/drivers/base/ump/src/Makefile.common
new file mode 100755
index 0000000..f29a4c1
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/Makefile.common
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2008-2010, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+SRC = $(UMP_FILE_PREFIX)/common/ump_kernel_core.c \
+      $(UMP_FILE_PREFIX)/common/ump_kernel_descriptor_mapping.c
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/arch-arm/config.h b/midgard/r15p0/kernel/drivers/base/ump/src/arch-arm/config.h
new file mode 100755
index 0000000..152d98f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/arch-arm/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/arch-arm64/config.h b/midgard/r15p0/kernel/drivers/base/ump/src/arch-arm64/config.h
new file mode 100755
index 0000000..cfb14ca
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/arch-arm64/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c b/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
new file mode 100755
index 0000000..07aa077
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
@@ -0,0 +1,756 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* module headers */
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+/* local headers */
+#include <common/ump_kernel_core.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#define UMP_FLAGS_RANGE ((UMP_PROT_SHAREABLE<<1) - 1u)
+
+static umpp_device device;
+
+ump_result umpp_core_constructor(void)
+{
+	mutex_init(&device.secure_id_map_lock);
+	device.secure_id_map = umpp_descriptor_mapping_create(UMP_EXPECTED_IDS, UMP_MAX_IDS);
+	if (NULL != device.secure_id_map)
+	{
+		if (UMP_OK == umpp_device_initialize())
+		{
+			return UMP_OK;
+		}
+		umpp_descriptor_mapping_destroy(device.secure_id_map);
+	}
+	mutex_destroy(&device.secure_id_map_lock);
+
+	return UMP_ERROR;
+}
+
+void umpp_core_destructor(void)
+{
+	umpp_device_terminate();
+	umpp_descriptor_mapping_destroy(device.secure_id_map);
+	mutex_destroy(&device.secure_id_map_lock);
+}
+
+umpp_session *umpp_core_session_start(void)
+{
+	umpp_session * session;
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (NULL != session)
+	{
+		mutex_init(&session->session_lock);
+
+		INIT_LIST_HEAD(&session->memory_usage);
+
+		/* try to create import client session, not a failure if they fail to initialize */
+		umpp_import_handlers_init(session);
+	}
+
+	return session;
+}
+
+void umpp_core_session_end(umpp_session *session)
+{
+	umpp_session_memory_usage * usage, *_usage;
+	UMP_ASSERT(session);
+
+	list_for_each_entry_safe(usage, _usage, &session->memory_usage, link)
+	{
+		printk(KERN_WARNING "UMP: Memory usage cleanup, releasing secure ID %d\n", ump_dd_secure_id_get(usage->mem));
+		ump_dd_release(usage->mem);
+		kfree(usage);
+
+	}
+
+	/* we should now not hold any imported memory objects,
+	 * detatch all import handlers */
+	umpp_import_handlers_term(session);
+
+	mutex_destroy(&session->session_lock);
+	kfree(session);
+}
+
+ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	umpp_allocation * alloc;
+	int i;
+
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD ) ) == 0 ||
+	    ( flags & (UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read and one write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL | __GFP_HARDWALL);
+
+	if (NULL == alloc)
+		goto out1;
+
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+	alloc->size = size;
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	if (0 != umpp_phys_commit(alloc))
+	{
+		goto out2;
+	}
+
+	/* all set up, allocate an ID for it */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	return alloc;
+
+out3:
+	umpp_phys_free(alloc);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+uint64_t ump_dd_size_get_64(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->size;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_size_get(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->size <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->size;
+}
+
+ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->id;
+}
+
+#ifdef CONFIG_KDS
+struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return &alloc->kds_res;
+}
+#endif
+
+ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	return alloc->flags;
+}
+
+ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id)
+{
+	umpp_allocation * alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+
+	mutex_lock(&device.secure_id_map_lock);
+
+	if (0 == umpp_descriptor_mapping_lookup(device.secure_id_map, secure_id, (void**)&alloc))
+	{
+		if (NULL != alloc->filter_func)
+		{
+			if (!alloc->filter_func(secure_id, alloc, alloc->callback_data))
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /* the filter denied access */
+			}
+		}
+
+		/* check permission to access it */
+		if ((UMP_DD_INVALID_MEMORY_HANDLE != alloc) && !(alloc->flags & UMP_PROT_SHAREABLE))
+		{
+			if (alloc->owner != get_current()->pid)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /*no rights for the current process*/
+			}
+		}
+
+		if (UMP_DD_INVALID_MEMORY_HANDLE != alloc)
+		{
+			if( ump_dd_retain(alloc) != UMP_DD_SUCCESS)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+			}
+		}
+	}
+	mutex_unlock(&device.secure_id_map_lock);
+
+	return alloc;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	return ump_dd_from_secure_id(secure_id);
+}
+
+int ump_dd_retain(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* check for overflow */
+	while(1)
+	{
+		int refcnt = atomic_read(&alloc->refcount);
+		if (refcnt + 1 > 0)
+		{
+			if(atomic_cmpxchg(&alloc->refcount, refcnt, refcnt + 1) == refcnt)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			return -EBUSY;
+		}
+	}
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_add(ump_dd_handle mem)
+{
+	ump_dd_retain(mem);
+}
+
+
+void ump_dd_release(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+	uint32_t new_cnt;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* secure the id for lookup while releasing */
+	mutex_lock(&device.secure_id_map_lock);
+
+	/* do the actual release */
+	new_cnt = atomic_sub_return(1, &alloc->refcount);
+	if (0 == new_cnt)
+	{
+		/* remove from the table as this was the last ref */
+		umpp_descriptor_mapping_remove(device.secure_id_map, alloc->id);
+	}
+
+	/* release the lock as early as possible */
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if (0 != new_cnt)
+	{
+		/* exit if still have refs */
+		return;
+	}
+
+	UMP_ASSERT(list_empty(&alloc->map_list));
+
+#ifdef CONFIG_KDS
+	if (kds_resource_term(&alloc->kds_res))
+	{
+		printk(KERN_ERR "ump_dd_release: kds_resource_term failed,"
+				"unable to release UMP allocation\n");
+		return;
+	}
+#endif
+	/* cleanup */
+	if (NULL != alloc->final_release_func)
+	{
+		alloc->final_release_func(alloc, alloc->callback_data);
+	}
+
+	if (0 == (alloc->management_flags & UMP_MGMT_EXTERNAL))
+	{
+		umpp_phys_free(alloc);
+	}
+	else
+	{
+		kfree(alloc->block_array);
+	}
+
+	mutex_destroy(&alloc->map_list_lock);
+
+	kfree(alloc);
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_release(ump_dd_handle mem)
+{
+	ump_dd_release(mem);
+}
+
+void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(pCount);
+	UMP_ASSERT(pArray);
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+	*pCount = alloc->blocksCount;
+	*pArray = alloc->block_array;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks)
+{
+	const umpp_allocation * alloc;
+	unsigned long i;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	if((uint64_t)num_blocks != alloc->blocksCount)
+	{
+		return UMP_DD_INVALID;
+	}
+
+	for( i = 0; i < num_blocks; i++)
+	{
+		UMP_ASSERT(alloc->block_array[i].addr <= UMP_UINT32_MAX);
+		UMP_ASSERT(alloc->block_array[i].size <= UMP_UINT32_MAX);
+
+		blocks[i].addr = (unsigned long)alloc->block_array[i].addr;
+		blocks[i].size = (unsigned long)alloc->block_array[i].size;
+	}
+
+	return UMP_DD_SUCCESS;
+}
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(block);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	UMP_ASSERT(alloc->block_array[index].addr <= UMP_UINT32_MAX);
+	UMP_ASSERT(alloc->block_array[index].size <= UMP_UINT32_MAX);
+
+	block->addr = (unsigned long)alloc->block_array[index].addr;
+	block->size = (unsigned long)alloc->block_array[index].size;
+
+	return UMP_DD_SUCCESS;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->blocksCount <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->blocksCount;
+}
+
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void *uaddr, size_t size)
+{
+	umpp_cpu_mapping *map;
+
+	void *target_first = uaddr;
+	void *target_last = (void*)((uintptr_t)uaddr - 1 + size);
+
+	if (target_last < target_first) /* wrapped */
+	{
+		return NULL;
+	}
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if ( map->vaddr_start <= target_first &&
+		   (void*)((uintptr_t)map->vaddr_start + (map->nr_pages << PAGE_SHIFT) - 1) >= target_last)
+		{
+			goto out;
+		}
+	}
+	map = NULL;
+out:
+	mutex_unlock(&alloc->map_list_lock);
+
+	return map;
+}
+
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map)
+{
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(map);
+	mutex_lock(&alloc->map_list_lock);
+	list_add(&map->link, &alloc->map_list);
+	mutex_unlock(&alloc->map_list_lock);
+}
+
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target)
+{
+	umpp_cpu_mapping * map;
+
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(target);
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if (map == target)
+		{
+			list_del(&target->link);
+			kfree(target);
+			mutex_unlock(&alloc->map_list_lock);
+			return;
+		}
+	}
+
+	/* not found, error */
+	UMP_ASSERT(0);
+}
+
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const  block_index, uint64_t * const block_internal_offset)
+{
+	uint64_t i;
+
+	for (i = 0 ; i < alloc->blocksCount; i++)
+	{
+		if (offset < alloc->block_array[i].size)
+		{
+			/* found the block_array element containing this offset */
+			*block_index = i;
+			*block_internal_offset = offset;
+			return 0;
+		}
+		offset -= alloc->block_array[i].size;
+	}
+
+	return -ENXIO;
+}
+
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size)
+{
+	umpp_allocation * alloc;
+	void *vaddr;
+	umpp_cpu_mapping * mapping;
+	uint64_t virt_page_off; /* offset of given address from beginning of the virtual mapping */
+	uint64_t phys_page_off; /* offset of the virtual mapping from the beginning of the physical buffer */
+	uint64_t page_count; /* number of pages to sync */
+	uint64_t i;
+	uint64_t block_idx;
+	uint64_t block_offset;
+	uint64_t paddr;
+
+	UMP_ASSERT((UMP_MSYNC_CLEAN == op) || (UMP_MSYNC_CLEAN_AND_INVALIDATE == op));
+
+	alloc = (umpp_allocation*)mem;
+	vaddr = (void*)(uintptr_t)address;
+
+	if((alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+	{
+		/* mapping is not cached */
+		return;
+	}
+
+	mapping = umpp_dd_find_enclosing_mapping(alloc, vaddr, size);
+	if (NULL == mapping)
+	{
+		printk(KERN_WARNING "UMP: Illegal cache sync address %lx\n", (uintptr_t)vaddr);
+		return; /* invalid pointer or size causes out-of-bounds */
+	}
+
+	/* we already know that address + size don't wrap around as umpp_dd_find_enclosing_mapping didn't fail */
+	page_count = ((((((uintptr_t)address + size - 1) & PAGE_MASK) - ((uintptr_t)address & PAGE_MASK))) >> PAGE_SHIFT) + 1;
+	virt_page_off = (vaddr - mapping->vaddr_start) >> PAGE_SHIFT;
+	phys_page_off = mapping->page_off;
+
+	if (umpp_dd_find_start_block(alloc, (virt_page_off + phys_page_off) << PAGE_SHIFT, &block_idx, &block_offset))
+	{
+		/* should not fail as a valid mapping was found, so the phys mem must exists */
+		printk(KERN_WARNING "UMP: Unable to find physical start block with offset %llx\n", virt_page_off + phys_page_off);
+		UMP_ASSERT(0);
+		return;
+	}
+
+	paddr = alloc->block_array[block_idx].addr + block_offset + (((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1));
+
+	for (i = 0; i < page_count; i++)
+	{
+		size_t offset = ((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1);
+		size_t sz = min((size_t)PAGE_SIZE - offset, size);
+
+		/* check if we've overrrun the current block, if so move to the next block */
+		if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+		{
+			block_idx++;
+			UMP_ASSERT(block_idx < alloc->blocksCount);
+			paddr = alloc->block_array[block_idx].addr;
+		}
+
+		if (UMP_MSYNC_CLEAN == op)
+		{
+			ump_sync_to_memory(paddr, vaddr, sz);
+		}
+		else /* (UMP_MSYNC_CLEAN_AND_INVALIDATE == op) already validated on entry */
+		{
+			ump_sync_to_cpu(paddr, vaddr, sz);
+		}
+
+		/* advance to next page  */
+		vaddr = (void*)((uintptr_t)vaddr + sz);
+		size -= sz;
+		paddr += sz;
+	}
+}
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	uint64_t size = 0;
+	uint64_t i;
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		size += blocks[i].size;
+	}
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD
+	    | UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read or write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc),__GFP_HARDWALL | GFP_KERNEL);
+
+	if (NULL == alloc)
+	{
+		goto out1;
+	}
+
+	alloc->block_array = kzalloc(sizeof(ump_dd_physical_block_64) * num_blocks,__GFP_HARDWALL | GFP_KERNEL);
+	if (NULL == alloc->block_array)
+	{
+		goto out2;
+	}
+
+	memcpy(alloc->block_array, blocks, sizeof(ump_dd_physical_block_64) * num_blocks);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+	alloc->size = size;
+	alloc->blocksCount = num_blocks;
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	/* all set up, allocate an ID */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	alloc->management_flags |= UMP_MGMT_EXTERNAL;
+
+	return alloc;
+
+out3:
+	kfree(alloc->block_array);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+
+/*
+ * UMP v1 API
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks)
+{
+	ump_dd_handle mem;
+	ump_dd_physical_block_64 *block_64_array;
+	ump_alloc_flags flags = UMP_V1_API_DEFAULT_ALLOCATION_FLAGS;
+	unsigned long i;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	block_64_array = kzalloc(num_blocks * sizeof(*block_64_array), __GFP_HARDWALL | GFP_KERNEL);
+
+	if(block_64_array == NULL)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/* copy physical blocks */
+	for( i = 0; i < num_blocks; i++)
+	{
+		block_64_array[i].addr = blocks[i].addr;
+		block_64_array[i].size = blocks[i].size;
+	}
+
+	mem = ump_dd_create_from_phys_blocks_64(block_64_array, num_blocks, flags, NULL, NULL, NULL);
+
+	kfree(block_64_array);
+
+	return mem;
+
+}
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h b/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
new file mode 100755
index 0000000..8cb424d
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
@@ -0,0 +1,228 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_CORE_H_
+#define _UMP_KERNEL_CORE_H_
+
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/cred.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+#include <linux/ump-common.h>
+#include <ump/src/common/ump_kernel_descriptor_mapping.h>
+
+/* forward decl */
+struct umpp_session;
+
+/**
+ * UMP handle metadata.
+ * Tracks various data about a handle not of any use to user space
+ */
+typedef enum
+{
+	UMP_MGMT_EXTERNAL = (1ul << 0) /**< Handle created via the ump_dd_create_from_phys_blocks interface */
+	/* (1ul << 31) not to be used */
+} umpp_management_flags;
+
+/**
+ * Structure tracking the single global UMP device.
+ * Holds global data like the ID map
+ */
+typedef struct umpp_device
+{
+	struct mutex secure_id_map_lock; /**< Lock protecting access to the map */
+	umpp_descriptor_mapping * secure_id_map; /**< Map of all known secure IDs on the system */
+} umpp_device;
+
+/**
+ * Structure tracking all memory allocations of a UMP allocation.
+ * Tracks info about an mapping so we can verify cache maintenace
+ * operations and help in the unmap cleanup.
+ */
+typedef struct umpp_cpu_mapping
+{
+	struct list_head        link; /**< link to list of mappings for an allocation */
+	void                  *vaddr_start; /**< CPU VA start of the mapping */
+	size_t                nr_pages; /**< Size (in pages) of the mapping */
+	uint64_t              page_off; /**< Offset (in pages) from start of the allocation where the mapping starts */
+	ump_dd_handle         handle; /**< Which handle this mapping is linked to */
+	struct umpp_session * session; /**< Which session created the mapping */
+} umpp_cpu_mapping;
+
+/**
+ * Structure tracking UMP allocation.
+ * Represent a memory allocation with its ID.
+ * Tracks all needed meta-data about an allocation.
+ * */
+typedef struct umpp_allocation
+{
+	ump_secure_id id; /**< Secure ID of the allocation */
+	atomic_t refcount; /**< Usage count */
+
+	ump_alloc_flags flags; /**< Flags for all supported devices */
+	uint32_t management_flags; /**< Managment flags tracking */
+
+	pid_t owner; /**< The process ID owning the memory if not sharable */
+
+	ump_dd_security_filter filter_func; /**< Hook to verify use, called during retains from new clients */
+	ump_dd_final_release_callback final_release_func; /**< Hook called when the last reference is removed */
+	void* callback_data; /**< Additional data given to release hook */
+
+	uint64_t size; /**< Size (in bytes) of the allocation */
+	uint64_t blocksCount; /**< Number of physsical blocks the allocation is built up of */
+	ump_dd_physical_block_64 * block_array; /**< Array, one entry per block, describing block start and length */
+
+	struct mutex     map_list_lock; /**< Lock protecting the map_list */
+	struct list_head map_list; /**< Tracks all CPU VA mappings of this allocation */
+
+#ifdef CONFIG_KDS
+	struct kds_resource kds_res; /**< The KDS resource controlling access to this allocation */
+#endif
+
+	void * backendData; /**< Physical memory backend meta-data */
+} umpp_allocation;
+
+/**
+ * Structure tracking use of UMP memory by a session.
+ * Tracks the use of an allocation by a session so session termination can clean up any outstanding references.
+ * Also protects agains non-matched release calls from user space.
+ */
+typedef struct umpp_session_memory_usage
+{
+	ump_secure_id id; /**< ID being used. For quick look-up */
+	ump_dd_handle mem; /**< Handle being used. */
+
+	/**
+	 * Track how many times has the process retained this handle in the kernel.
+	 * This should usually just be 1(allocated or resolved) or 2(mapped),
+	 * but could be more if someone is playing with the low-level API
+	 * */
+	atomic_t process_usage_count;
+
+	struct list_head link; /**< link to other usage trackers for a session */
+} umpp_session_memory_usage;
+
+/**
+ * Structure representing a session/client.
+ * Tracks the UMP allocations being used by this client.
+ */
+typedef struct umpp_session
+{
+	struct mutex session_lock; /**< Lock for memory usage manipulation */
+	struct list_head memory_usage; /**< list of memory currently being used by the this session */
+	void*  import_handler_data[UMPP_EXTERNAL_MEM_COUNT]; /**< Import modules per-session data pointer */
+} umpp_session;
+
+/**
+ * UMP core setup.
+ * Called by any OS specific startup function to initialize the common part.
+ * @return UMP_OK if core initialized correctly, any other value for errors
+ */
+ump_result umpp_core_constructor(void);
+
+/**
+ * UMP core teardown.
+ * Called by any OS specific unload function to clean up the common part.
+ */
+void umpp_core_destructor(void);
+
+/**
+ * UMP session start.
+ * Called by any OS specific session handler when a new session is detected
+ * @return Non-NULL if a matching core session could be set up. NULL on failure
+ */
+umpp_session *umpp_core_session_start(void);
+
+/**
+ * UMP session end.
+ * Called by any OS specific session handler when a session is ended/terminated.
+ * @param session The core session object returned by ump_core_session_start
+ */
+void umpp_core_session_end(umpp_session *session);
+
+/**
+ * Find a mapping object (if any) for this allocation.
+ * Called by any function needing to identify a mapping from a user virtual address.
+ * Verifies that the whole range to be within a mapping object.
+ * @param alloc The UMP allocation to find a matching mapping object of
+ * @param uaddr User mapping address to find the mapping object for
+ * @param size Length of the mapping
+ * @return NULL on error (no match found), pointer to mapping object if match found
+ */
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void* uaddr, size_t size);
+
+/**
+ * Register a new mapping of an allocation.
+ * Called by functions creating a new mapping of an allocation, typically OS specific handlers.
+ * @param alloc The allocation object which has been mapped
+ * @param map Info about the mapping
+ */
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map);
+
+/**
+ * Remove and free mapping object from an allocation.
+ * @param alloc The allocation object to remove the mapping info from
+ * @param target The mapping object to remove
+ */
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target);
+
+/**
+ * Helper to find a block in the blockArray which holds a given byte offset.
+ * @param alloc The allocation object to find the block in
+ * @param offset Offset (in bytes) from allocation start to find the block of
+ * @param[out] block_index Pointer to the index of the block matching
+ * @param[out] block_internal_offset Offset within the returned block of the searched offset
+ * @return 0 if a matching block was found, any other value for error
+ */
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const block_index, uint64_t * const block_internal_offset);
+
+/**
+ * Cache maintenance helper.
+ * Performs the requested cache operation on the given handle.
+ * @param mem Allocation handle
+ * @param op Cache maintenance operation to perform
+ * @param address User mapping at which to do the operation
+ * @param size Length (in bytes) of the range to do the operation on
+ */
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size);
+
+/**
+ * Import module session early init.
+ * Calls session_begin on all installed import modules.
+ * @param session The core session object to initialized the import handler for
+ * */
+void umpp_import_handlers_init(umpp_session * session);
+
+/**
+ * Import module session cleanup.
+ * Calls session_end on all import modules bound to the session.
+ * @param session The core session object to initialized the import handler for
+ */
+void umpp_import_handlers_term(umpp_session * session);
+
+#endif /* _UMP_KERNEL_CORE_H_ */
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c b/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
new file mode 100755
index 0000000..c5b0d74
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <common/ump_kernel_priv.h>
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(umpp_descriptor_table * table);
+
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries)
+{
+	umpp_descriptor_mapping * map = kzalloc(sizeof(umpp_descriptor_mapping), GFP_KERNEL);
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map)
+	{
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table)
+		{
+			init_rwsem( &map->lock);
+			set_bit(0, map->table->usage);
+			map->max_nr_mappings_allowed = max_entries;
+			map->current_nr_mappings = init_entries;
+			return map;
+
+			descriptor_table_free(map->table);
+		}
+		kfree(map);
+	}
+	return NULL;
+}
+
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map)
+{
+	UMP_ASSERT(NULL != map);
+	descriptor_table_free(map->table);
+	kfree(map);
+}
+
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target)
+{
+ 	int descriptor = 0;
+	UMP_ASSERT(NULL != map);
+	down_write( &map->lock);
+	descriptor = find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings)
+	{
+		/* no free descriptor, try to expand the table */
+		umpp_descriptor_table * new_table;
+		umpp_descriptor_table * old_table = map->table;
+		int nr_mappings_new = map->current_nr_mappings + BITS_PER_LONG;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+ 		memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+ 		memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void*));
+
+ 		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	set_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	up_write(&map->lock);
+	return descriptor;
+}
+
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target)
+{
+	int result = -EINVAL;
+ 	UMP_ASSERT(map);
+	UMP_ASSERT(target);
+ 	down_read(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	}
+	/* keep target untouched if the descriptor was not found */
+	up_read(&map->lock);
+	return result;
+}
+
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor)
+{
+	UMP_ASSERT(map);
+ 	down_write(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		map->table->mappings[descriptor] = NULL;
+		clear_bit(descriptor, map->table->usage);
+	}
+	up_write(&map->lock);
+}
+
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count)
+{
+	umpp_descriptor_table * table;
+
+	table = kzalloc(sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG) + (sizeof(void*) * count), __GFP_HARDWALL | GFP_KERNEL );
+
+	if (NULL != table)
+	{
+		table->usage = (unsigned long*)((u8*)table + sizeof(umpp_descriptor_table));
+		table->mappings = (void**)((u8*)table + sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(umpp_descriptor_table * table)
+{
+ 	UMP_ASSERT(table);
+	kfree(table);
+}
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h b/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
new file mode 100755
index 0000000..d06c145
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+#define _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct umpp_descriptor_table
+{
+	/* keep as a unsigned long to rely on the OS's bitops support */
+	unsigned long * usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used(1) or not(0) */
+	void** mappings; /**< Array of the pointers the descriptors map to */
+} umpp_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct umpp_descriptor_mapping
+{
+	struct rw_semaphore lock; /**< Lock protecting access to the mapping object */
+	unsigned int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	unsigned int current_nr_mappings; /**< Current number of possible mappings */
+	umpp_descriptor_table * table; /**< Pointer to the current mapping table */
+} umpp_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object.
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries.
+ * ID 0 is reserved so the number of available entries will be max - 1.
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param[in] map The map to free
+ */
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param[in] map The map to allocate a new entry in
+ * @param[in] target The value to map to
+ * @return The descriptor allocated, ID 0 on failure.
+ */
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param[in] map The map to lookup the descriptor id in
+ * @param[in] descriptor The descriptor ID to lookup
+ * @param[out] target Pointer to a pointer which will receive the stored value
+ *
+ * @return 0 on success lookup, -EINVAL on lookup failure.
+ */
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param[in] map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor);
+
+#endif /* _UMP_KERNEL_DESCRIPTOR_MAPPING_H_ */
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h b/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
new file mode 100755
index 0000000..38b6f1b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_PRIV_H_
+#define _UMP_KERNEL_PRIV_H_
+
+#ifdef __KERNEL__
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <asm/cacheflush.h>
+#endif
+
+
+#define UMP_EXPECTED_IDS 64
+#define UMP_MAX_IDS 32768
+
+#ifdef __KERNEL__
+#define UMP_ASSERT(expr) \
+		if (!(expr)) { \
+			printk(KERN_ERR "UMP: Assertion failed! %s,%s,%s,line=%d\n",\
+					#expr,__FILE__,__func__,__LINE__); \
+					BUG(); \
+		}
+
+static inline void ump_sync_to_memory(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/*TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE);
+	mb(); /* for outer_sync (if needed) */
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+
+static inline void ump_sync_to_cpu(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE);
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+#endif /* __KERNEL__*/
+#endif /* _UMP_KERNEL_PRIV_H_ */
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/imports/ion/Makefile b/midgard/r15p0/kernel/drivers/base/ump/src/imports/ion/Makefile
new file mode 100755
index 0000000..ef74b27
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/imports/ion/Makefile
@@ -0,0 +1,53 @@
+#
+# (C) COPYRIGHT 2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/../../../../..
+KBUILD_EXTRA_SYMBOLS += "$(KBUILD_EXTMOD)/../../Module.symvers"
+
+SRC += ump_kernel_import_ion.c
+
+MODULE:=ump_ion_import.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+$(MODULE:.ko=-objs) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+#
+#
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/imports/ion/sconscript b/midgard/r15p0/kernel/drivers/base/ump/src/imports/ion/sconscript
new file mode 100755
index 0000000..cff24c8
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/imports/ion/sconscript
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ion = env.Clone()
+
+if env_ion['ump_ion'] != '1':
+	Return()
+
+# Source files required for UMP.
+ion_src = [Glob('#kernel/drivers/base/ump/src/imports/ion/*.c')]
+
+# Note: cleaning via the Linux kernel build system does not yet work
+if env_ion.GetOption('clean') :
+	makeAction=Action("cd ${SOURCE.dir} && make clean", '$MAKECOMSTR')
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make PLATFORM=${platform} && cp ump_ion_import.ko $STATIC_LIB_PATH/ump_ion_import.ko", '$MAKECOMSTR')
+# The target is ump_import_ion.ko, built from the source in ion_src, via the action makeAction
+# ump_import_ion.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+# kernel build system, after which it can be installed to the directory specified if
+# "libs_install" is set; this is done by LibTarget.
+cmd = env_ion.Command('$STATIC_LIB_PATH/ump_ion_import.ko', ion_src, [makeAction])
+
+# Until we fathom out how the invoke the Linux build system to clean, we can use Clean
+# to remove generated files.
+
+patterns = ['*.mod.c', '*.o', '*.ko', '*.a', '.*.cmd', 'modules.order', '.tmp_versions', 'Module.symvers']
+
+for p in patterns:
+	Clean(cmd, Glob('#kernel/drivers/base/ump/src/imports/ion/%s' % p))
+
+env_ion.Depends('$STATIC_LIB_PATH/ump_ion_import.ko', '$STATIC_LIB_PATH/ump.ko')
+env_ion.KernelObjTarget('ump', cmd)
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c b/midgard/r15p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
new file mode 100755
index 0000000..12b2e32
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/dma-mapping.h>
+#include "ion.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+struct ion_wrapping_info
+{
+	struct ion_client *   ion_client;
+	struct ion_handle *   ion_handle;
+	int                   num_phys_blocks;
+	struct scatterlist *  sglist;
+};
+
+static struct ion_device * ion_device_get(void)
+{
+	/* < Customer to provide implementation >
+	 * Return a pointer to the global ion_device on the system
+	 */
+	return NULL;
+}
+
+static int import_ion_client_create(void** const custom_session_data)
+{
+	struct ion_client ** ion_client;
+
+	ion_client = (struct ion_client**)custom_session_data;
+
+	*ion_client = ion_client_create(ion_device_get(), "ump");
+
+	return PTR_RET(*ion_client);
+}
+
+
+static void import_ion_client_destroy(void* custom_session_data)
+{
+	struct ion_client * ion_client;
+
+	ion_client = (struct ion_client*)custom_session_data;
+	BUG_ON(!ion_client);
+
+	ion_client_destroy(ion_client);
+}
+
+
+static void import_ion_final_release_callback(const ump_dd_handle handle, void * info)
+{
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!info);
+
+	(void)handle;
+	ion_info = (struct ion_wrapping_info*)info;
+
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+	kfree(ion_info);
+	module_put(THIS_MODULE);
+}
+
+static ump_dd_handle import_ion_import(void * custom_session_data, void * pfd, ump_alloc_flags flags)
+{
+	int fd;
+	ump_dd_handle ump_handle;
+	struct scatterlist * sg;
+	int num_dma_blocks;
+	ump_dd_physical_block_64 * phys_blocks;
+	unsigned long i;
+	struct sg_table * sgt;
+
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!custom_session_data);
+	BUG_ON(!pfd);
+
+	ion_info = kzalloc(GFP_KERNEL, sizeof(*ion_info));
+	if (NULL == ion_info)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	ion_info->ion_client = (struct ion_client*)custom_session_data;
+
+	if (get_user(fd, (int*)pfd))
+	{
+		goto out;
+	}
+
+	ion_info->ion_handle = ion_import_dma_buf(ion_info->ion_client, fd);
+
+	if (IS_ERR_OR_NULL(ion_info->ion_handle))
+	{
+		goto out;
+	}
+
+	sgt = ion_sg_table(ion_info->ion_client, ion_info->ion_handle);
+	if (IS_ERR_OR_NULL(sgt))
+	{
+		goto ion_dma_map_failed;
+	}
+
+	ion_info->sglist = sgt->sgl;
+
+	sg = ion_info->sglist;
+	while (sg)
+	{
+		ion_info->num_phys_blocks++;
+		sg = sg_next(sg);
+	}
+
+	num_dma_blocks = dma_map_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	if (0 == num_dma_blocks)
+	{
+		goto linux_dma_map_failed;
+	}
+
+	phys_blocks = vmalloc(num_dma_blocks * sizeof(*phys_blocks));
+	if (NULL == phys_blocks)
+	{
+		goto vmalloc_failed;
+	}
+
+	for_each_sg(ion_info->sglist, sg, num_dma_blocks, i)
+	{
+		phys_blocks[i].addr = sg_phys(sg);
+		phys_blocks[i].size = sg_dma_len(sg);
+	}
+
+	ump_handle = ump_dd_create_from_phys_blocks_64(phys_blocks, num_dma_blocks, flags, NULL, import_ion_final_release_callback, ion_info);
+
+	vfree(phys_blocks);
+
+	if (ump_handle != UMP_DD_INVALID_MEMORY_HANDLE)
+	{
+		/*
+		 * As we have a final release callback installed
+		 * we must keep the module locked until
+		 * the callback has been triggered
+		 * */
+		__module_get(THIS_MODULE);
+		return ump_handle;
+	}
+
+	/* failed*/
+vmalloc_failed:
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+linux_dma_map_failed:
+ion_dma_map_failed:
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+out:
+	kfree(ion_info);
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+struct ump_import_handler import_handler_ion =
+{
+	.linux_module =  THIS_MODULE,
+	.session_begin = import_ion_client_create,
+	.session_end =   import_ion_client_destroy,
+	.import =        import_ion_import
+};
+
+static int __init import_ion_initialize_module(void)
+{
+	/* register with UMP */
+	return ump_import_module_register(UMP_EXTERNAL_MEM_TYPE_ION, &import_handler_ion);
+}
+
+static void __exit import_ion_cleanup_module(void)
+{
+	/* unregister import handler */
+	ump_import_module_unregister(UMP_EXTERNAL_MEM_TYPE_ION);
+}
+
+/* Setup init and exit functions for this module */
+module_init(import_ion_initialize_module);
+module_exit(import_ion_cleanup_module);
+
+/* And some module information */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/imports/sconscript b/midgard/r15p0/kernel/drivers/base/ump/src/imports/sconscript
new file mode 100755
index 0000000..8f50683
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/imports/sconscript
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os, sys
+Import('env')
+
+import_modules = [ os.path.join( path, 'sconscript' ) for path in sorted(os.listdir( os.getcwd() )) ]
+
+for m in import_modules:
+	if os.path.exists(m):
+		SConscript( m, variant_dir=os.path.join( env['BUILD_DIR_PATH'], os.path.dirname(m) ), duplicate=0 )
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c b/midgard/r15p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
new file mode 100755
index 0000000..d6c3c53
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
@@ -0,0 +1,831 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump-ioctl.h>
+#include <linux/ump.h>
+
+#include <asm/uaccess.h>	         /* copy_*_user */
+#include <linux/compat.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/device.h>            /* class registration support */
+
+#include <common/ump_kernel_core.h>
+
+#include "ump_kernel_linux_mem.h"
+#include <ump_arch.h>
+
+
+struct ump_linux_device
+{
+	struct cdev cdev;
+	struct class * ump_class;
+};
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+#define UMP_REV_STRING "1.0"
+
+char * ump_revision = UMP_REV_STRING;
+module_param(ump_revision, charp, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_revision, "Revision info");
+
+static int umpp_linux_open(struct inode *inode, struct file *filp);
+static int umpp_linux_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+/* This variable defines the file operations this UMP device driver offers */
+static struct file_operations ump_fops =
+{
+	.owner   = THIS_MODULE,
+	.open    = umpp_linux_open,
+	.release = umpp_linux_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = umpp_linux_ioctl,
+#else
+	.ioctl   = umpp_linux_ioctl,
+#endif
+	.compat_ioctl = umpp_linux_ioctl,
+	.mmap = umpp_linux_mmap
+};
+
+/* import module handling */
+DEFINE_MUTEX(import_list_lock);
+struct ump_import_handler *  import_handlers[UMPP_EXTERNAL_MEM_COUNT];
+
+/* The global variable containing the global device data */
+static struct ump_linux_device ump_linux_device;
+
+#define DBG_MSG(level, ...) do { \
+if ((level) <=  ump_debug_level)\
+{\
+printk(KERN_DEBUG "UMP<" #level ">:\n" __VA_ARGS__);\
+} \
+} while (0)
+
+#define MSG_ERR(...) do{ \
+printk(KERN_ERR "UMP: ERR: %s\n           %s()%4d\n", __FILE__, __func__  , __LINE__) ; \
+printk(KERN_ERR __VA_ARGS__); \
+printk(KERN_ERR "\n"); \
+} while(0)
+
+#define MSG(...) do{ \
+printk(KERN_INFO "UMP: " __VA_ARGS__);\
+} while (0)
+
+/*
+ * This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int __init umpp_linux_initialize_module(void)
+{
+	ump_result err;
+
+	err = umpp_core_constructor();
+	if (UMP_OK != err)
+	{
+		MSG_ERR("UMP device driver init failed\n");
+		return -ENOTTY;
+	}
+
+	MSG("UMP device driver %s loaded\n", UMP_REV_STRING);
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void __exit umpp_linux_cleanup_module(void)
+{
+	DBG_MSG(2, "Unloading UMP device driver\n");
+	umpp_core_destructor();
+	DBG_MSG(2, "Module unloaded\n");
+}
+
+
+
+/*
+ * Initialize the UMP device driver.
+ */
+ump_result umpp_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+
+	if (0 == ump_major)
+	{
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	}
+	else
+	{
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err)
+	{
+		memset(&ump_linux_device, 0, sizeof(ump_linux_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_linux_device.cdev, &ump_fops);
+		ump_linux_device.cdev.owner = THIS_MODULE;
+		ump_linux_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_linux_device.cdev, dev, 1/*count*/);
+		if (0 == err)
+		{
+
+			ump_linux_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_linux_device.ump_class))
+			{
+				err = PTR_ERR(ump_linux_device.ump_class);
+			}
+			else
+			{
+				struct device * mdev;
+				mdev = device_create(ump_linux_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return UMP_OK;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(ump_linux_device.ump_class);
+			}
+			cdev_del(&ump_linux_device.cdev);
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return UMP_ERROR;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void umpp_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+	device_destroy(ump_linux_device.ump_class, dev);
+	class_destroy(ump_linux_device.ump_class);
+
+	/* unregister char device */
+	cdev_del(&ump_linux_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+}
+
+
+static int umpp_linux_open(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = umpp_core_session_start();
+	if (NULL == session)
+	{
+		return -EFAULT;
+	}
+	
+	filp->private_data = session;
+
+	return 0;
+}
+
+static int umpp_linux_release(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = filp->private_data;
+
+	umpp_core_session_end(session);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/**************************/
+/*ioctl specific functions*/
+/**************************/
+static int do_ump_dd_allocate(umpp_session * session, ump_k_allocate * params)
+{
+	ump_dd_handle new_allocation;
+	new_allocation = ump_dd_allocate_64(params->size, params->alloc_flags, NULL, NULL, NULL);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	printk(KERN_WARNING "UMP: Allocation FAILED\n");
+	return -ENOMEM;
+}
+
+static int do_ump_dd_retain(umpp_session * session, ump_k_retain * params)
+{
+	umpp_session_memory_usage * it;
+
+	mutex_lock(&session->session_lock);
+
+	/* try to find it on the session usage list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found to already be in use */
+			/* check for overflow */
+			while(1)
+			{
+				int refcnt = atomic_read(&it->process_usage_count);
+				if (refcnt + 1 > 0)
+				{
+					/* add a process local ref */
+					if(atomic_cmpxchg(&it->process_usage_count, refcnt, refcnt + 1) == refcnt)
+					{
+						mutex_unlock(&session->session_lock);
+						return 0;
+					}
+				}
+				else
+				{
+					/* maximum usage cap reached */
+					mutex_unlock(&session->session_lock);
+					return -EBUSY;
+				}
+			}
+		}
+	}
+	/* try to look it up globally */
+
+	it = kmalloc(sizeof(*it), GFP_KERNEL);
+
+	if (NULL != it)
+	{
+		it->mem = ump_dd_from_secure_id(params->secure_id);
+		if (UMP_DD_INVALID_MEMORY_HANDLE != it->mem)
+		{
+			/* found, add it to the session usage list */
+			it->id = params->secure_id;
+			atomic_set(&it->process_usage_count, 1);
+			list_add(&it->link, &session->memory_usage);
+		}
+		else
+		{
+			/* not found */
+			kfree(it);
+			it = NULL;
+		}
+	}
+
+	mutex_unlock(&session->session_lock);
+
+	return (NULL != it) ? 0 : -ENODEV;
+}
+
+
+static int do_ump_dd_release(umpp_session * session, ump_k_release * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only do a release if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			result = 0;
+
+			if (0 == atomic_sub_return(1, &it->process_usage_count))
+			{
+				/* last ref in this process remove from the usage list and remove the underlying ref */
+				list_del(&it->link);
+				ump_dd_release(it->mem);
+				kfree(it);
+			}
+
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_sizequery(umpp_session * session, ump_k_sizequery * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->size = ump_dd_size_get_64(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_allocation_flags_get(umpp_session * session, ump_k_allocation_flags * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->alloc_flags = ump_dd_allocation_flags_get(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_msync_now(umpp_session * session, ump_k_msync * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, do the cache op */
+#ifdef CONFIG_COMPAT
+			if (is_compat_task())
+			{
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, compat_ptr(params->mapped_ptr.compat_value), params->size);
+				result = 0;
+			}
+			else
+			{
+#endif
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, params->mapped_ptr.value, params->size);
+				result = 0;
+#ifdef CONFIG_COMPAT
+			}
+#endif
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+
+void umpp_import_handlers_init(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		if (import_handlers[i])
+		{
+			import_handlers[i]->session_begin(&session->import_handler_data[i]);
+			/* It is OK if session_begin returned an error.
+			 * We won't do any import calls if so */
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+void umpp_import_handlers_term(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		/* only call if session_begin succeeded */
+		if (session->import_handler_data[i] != NULL)
+		{
+			/* if session_beging succeeded the handler
+			 * should not have unregistered with us */
+			BUG_ON(!import_handlers[i]);
+			import_handlers[i]->session_end(session->import_handler_data[i]);
+			session->import_handler_data[i] = NULL;
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler)
+{
+	int res = -EEXIST;
+
+	/* validate input */
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+	BUG_ON(!handler);
+	BUG_ON(!handler->linux_module);
+	BUG_ON(!handler->session_begin);
+	BUG_ON(!handler->session_end);
+	BUG_ON(!handler->import);
+
+	mutex_lock(&import_list_lock);
+
+	if (!import_handlers[type])
+	{
+		import_handlers[type] = handler;
+		res = 0;
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return res;
+}
+
+void ump_import_module_unregister(enum ump_external_memory_type type)
+{
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+
+	mutex_lock(&import_list_lock);
+	/* an error to call this if ump_import_module_register didn't succeed */
+	BUG_ON(!import_handlers[type]);
+	import_handlers[type] = NULL;
+	mutex_unlock(&import_list_lock);
+}
+
+static struct ump_import_handler * import_handler_get(unsigned int type_id)
+{
+	enum ump_external_memory_type type;
+	struct ump_import_handler * handler;
+
+	/* validate and convert input */
+	/* handle bad data here, not just BUG_ON */
+	if (type_id == 0 || type_id >= UMPP_EXTERNAL_MEM_COUNT)
+		return NULL;
+
+	type = (enum ump_external_memory_type)type_id;
+
+	/* find the handler */
+	mutex_lock(&import_list_lock);
+
+	handler = import_handlers[type];
+
+	if (handler)
+	{
+		if (!try_module_get(handler->linux_module))
+		{
+			handler = NULL;
+		}
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return handler;
+}
+
+static void import_handler_put(struct ump_import_handler * handler)
+{
+	module_put(handler->linux_module);
+}
+
+static int do_ump_dd_import(umpp_session * session, ump_k_import * params)
+{
+	ump_dd_handle new_allocation = UMP_DD_INVALID_MEMORY_HANDLE;
+	struct ump_import_handler * handler;
+
+	handler = import_handler_get(params->type);
+
+	if (handler)
+	{
+		/* try late binding if not already bound */
+		if (!session->import_handler_data[params->type])
+		{
+			handler->session_begin(&session->import_handler_data[params->type]);
+		}
+
+		/* do we have a bound session? */
+		if (session->import_handler_data[params->type])
+		{
+			new_allocation = handler->import( session->import_handler_data[params->type],
+		                                      params->phandle.value,
+		                                      params->alloc_flags);
+		}
+
+		/* done with the handler */
+		import_handler_put(handler);
+	}
+
+	/* did the import succeed? */
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	return -ENOMEM;
+
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int ret;
+	uint64_t msg[(UMP_CALL_MAX_SIZE+7)>>3]; /* alignment fixup */
+	uint32_t size = _IOC_SIZE(cmd);
+	struct umpp_session *session = filp->private_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* unused arg */
+#endif
+
+	/*
+	 * extract the type and number bitfields, and don't decode
+	 * wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
+	 */
+	if (_IOC_TYPE(cmd) != UMP_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if (_IOC_NR(cmd) > UMP_IOC_MAXNR)
+	{
+		return -ENOTTY;
+	}
+
+	switch(cmd)
+	{
+		case UMP_FUNC_ALLOCATE:
+			if (size != sizeof(ump_k_allocate))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocate(session, (ump_k_allocate *)&msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_SIZEQUERY:
+			if (size != sizeof(ump_k_sizequery))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_sizequery(session,(ump_k_sizequery*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_MSYNC:
+			if (size != sizeof(ump_k_msync))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_msync_now(session,(ump_k_msync*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_IMPORT:
+			if (size != sizeof(ump_k_import))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user*)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_import(session, (ump_k_import*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		/* used only by v1 API */
+		case UMP_FUNC_ALLOCATION_FLAGS_GET:
+			if (size != sizeof(ump_k_allocation_flags))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocation_flags_get(session,(ump_k_allocation_flags*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_RETAIN:
+			if (size != sizeof(ump_k_retain))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_retain(session,(ump_k_retain*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		case UMP_FUNC_RELEASE:
+			if (size != sizeof(ump_k_release))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_release(session,(ump_k_release*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		default:
+			/* not ours */
+			return -ENOTTY;
+	}
+	/*redundant below*/
+	return -ENOTTY;
+}
+
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_allocate_64);
+EXPORT_SYMBOL(ump_dd_allocation_flags_get);
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get_64);
+EXPORT_SYMBOL(ump_dd_size_get_64);
+EXPORT_SYMBOL(ump_dd_retain);
+EXPORT_SYMBOL(ump_dd_release);
+EXPORT_SYMBOL(ump_dd_create_from_phys_blocks_64);
+#ifdef CONFIG_KDS
+EXPORT_SYMBOL(ump_dd_kds_resource_get);
+#endif
+
+/* import API */
+EXPORT_SYMBOL(ump_import_module_register);
+EXPORT_SYMBOL(ump_import_module_unregister);
+
+
+
+/* V1 API */
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+
+/* Setup init and exit functions for this module */
+module_init(umpp_linux_initialize_module);
+module_exit(umpp_linux_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(UMP_REV_STRING);
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c b/midgard/r15p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
new file mode 100755
index 0000000..9186dd0
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
@@ -0,0 +1,250 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+#include <linux/version.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h> /* memory mananger definitions */
+#include <linux/pfn.h>
+#include <linux/highmem.h> /*kmap*/
+
+#include <linux/compat.h> /* is_compat_task */
+
+#include <common/ump_kernel_core.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+static void umpp_vm_close(struct vm_area_struct *vma)
+{
+	umpp_cpu_mapping * mapping;
+	umpp_session * session;
+	ump_dd_handle handle;
+
+	mapping = (umpp_cpu_mapping*)vma->vm_private_data;
+	UMP_ASSERT(mapping);
+	
+	session = mapping->session;
+	handle = mapping->handle;
+
+	umpp_dd_remove_cpu_mapping(mapping->handle, mapping); /* will free the mapping object */
+	ump_dd_release(handle);
+}
+
+
+static const struct vm_operations_struct umpp_vm_ops = {
+	.close = umpp_vm_close
+};
+
+int umpp_phys_commit(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	/* round up to a page boundary */
+	alloc->size = (alloc->size + PAGE_SIZE - 1) & ~((uint64_t)PAGE_SIZE-1) ;
+	/* calculate number of pages */
+	alloc->blocksCount = alloc->size >> PAGE_SHIFT;
+
+	if( (sizeof(ump_dd_physical_block_64) * alloc->blocksCount) > ((size_t)-1))
+	{
+		printk(KERN_WARNING "UMP: umpp_phys_commit - trying to allocate more than possible\n");
+		return -ENOMEM;
+	}
+
+	alloc->block_array = kmalloc(sizeof(ump_dd_physical_block_64) * alloc->blocksCount, __GFP_HARDWALL | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (NULL == alloc->block_array)
+	{
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		void * mp;
+		struct page * page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD);
+		if (NULL == page)
+		{
+			break;
+		}
+
+		alloc->block_array[i].addr = PFN_PHYS(page_to_pfn(page));
+		alloc->block_array[i].size = PAGE_SIZE;
+
+		mp = kmap(page);
+		if (NULL == mp)
+		{
+			__free_page(page);
+			break;
+		}
+
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		ump_sync_to_memory(PFN_PHYS(page_to_pfn(page)), mp, PAGE_SIZE);
+		kunmap(page);
+	}
+
+	if (i == alloc->blocksCount)
+	{
+		return 0;
+	}
+	else
+	{
+		uint64_t j;
+		for (j = 0; j < i; j++)
+		{
+			struct page * page;
+			page = pfn_to_page(alloc->block_array[j].addr >> PAGE_SHIFT);
+			__free_page(page);
+		}
+		
+		kfree(alloc->block_array);
+
+		return -ENOMEM;
+	}
+}
+
+void umpp_phys_free(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		__free_page(pfn_to_page(alloc->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	kfree(alloc->block_array);
+}
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma)
+{
+	ump_secure_id id;
+	ump_dd_handle h;
+	size_t offset;
+	int err = -EINVAL;
+	size_t length = vma->vm_end - vma->vm_start;
+
+	umpp_cpu_mapping * map = NULL;
+	umpp_session *session = filp->private_data;
+
+	if ( 0 == length )
+	{
+		return -EINVAL;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (NULL == map)
+	{
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* unpack our arg */
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	if (is_compat_task())
+	{
+#endif
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_32;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_32;
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	}
+	else
+	{
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_64;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_64;
+	}
+#endif
+
+	h = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE != h)
+	{
+		uint64_t i;
+		uint64_t block_idx;
+		uint64_t block_offset;
+		uint64_t paddr;
+		umpp_allocation * alloc;
+		uint64_t last_byte;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO | VM_MIXEDMAP | VM_DONTDUMP;
+#else
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO | VM_MIXEDMAP;
+#endif
+		vma->vm_ops = &umpp_vm_ops;
+		vma->vm_private_data = map;
+
+		alloc = (umpp_allocation*)h;
+
+		if( (alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+		{
+			/* cache disabled flag set, disable caching for cpu mappings */
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		}
+
+		last_byte = length + (offset << PAGE_SHIFT) - 1;
+		if (last_byte >= alloc->size || last_byte < (offset << PAGE_SHIFT))
+		{
+			goto err_out;
+		}
+
+		if (umpp_dd_find_start_block(alloc, offset << PAGE_SHIFT, &block_idx, &block_offset))
+		{
+			goto err_out;
+		}
+
+		paddr = alloc->block_array[block_idx].addr + block_offset;
+
+		for (i = 0; i < (length >> PAGE_SHIFT); i++)
+		{
+			/* check if we've overrrun the current block, if so move to the next block */
+			if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+			{
+				block_idx++;
+				UMP_ASSERT(block_idx < alloc->blocksCount);
+				paddr = alloc->block_array[block_idx].addr;
+			}
+
+			err = vm_insert_mixed(vma, vma->vm_start + (i << PAGE_SHIFT), paddr >> PAGE_SHIFT);
+			paddr += PAGE_SIZE;
+		}
+
+		map->vaddr_start = (void*)vma->vm_start;
+		map->nr_pages = length >> PAGE_SHIFT;
+		map->page_off = offset;
+		map->handle = h;
+		map->session = session;
+
+		umpp_dd_add_cpu_mapping(h, map);
+
+		return 0;
+
+		err_out:
+
+		ump_dd_release(h);
+	}
+
+	kfree(map);
+
+out:
+
+	return err;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h b/midgard/r15p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
new file mode 100755
index 0000000..4fbf3b2
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_LINUX_MEM_H_
+#define _UMP_KERNEL_LINUX_MEM_H_
+
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma);
+
+#endif /* _UMP_KERNEL_LINUX_MEM_H_ */
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/sconscript b/midgard/r15p0/kernel/drivers/base/ump/src/sconscript
new file mode 100755
index 0000000..b34d499
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/sconscript
@@ -0,0 +1,46 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ump = env.Clone()
+
+# Source files required for UMP.
+ump_src = [
+    Glob('common/*.c'),
+    Glob('imports/*/*.c'),
+    Glob('linux/*.c'),
+]
+
+make_args = env_ump.kernel_get_config_defines(ret_list = True) + [
+    'PLATFORM=%s' % env_ump['platform'],
+    'MALI_UNIT_TEST=%s' % env_ump['unit'],
+]
+
+mod = env_ump.BuildKernelModule('$STATIC_LIB_PATH/ump.ko', ump_src,
+                                make_args = make_args)
+env_ump.KernelObjTarget('ump', mod)
+
+# Add a dependency on kds.ko only when the build is not Android
+# Android uses sync_pt instead of Midgard KDS to achieve KDS functionality
+# Only necessary when KDS is not built into the kernel.
+#
+if env_ump['os'] != 'android':
+    if not env_ump.KernelConfigEnabled('CONFIG_KDS'):
+        env_ump.Depends(mod, '$STATIC_LIB_PATH/kds.ko')
+
+SConscript( 'imports/sconscript' )
+
diff --git a/midgard/r15p0/kernel/drivers/base/ump/src/ump_arch.h b/midgard/r15p0/kernel/drivers/base/ump/src/ump_arch.h
new file mode 100755
index 0000000..2303d56
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/src/ump_arch.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_ARCH_H_
+#define _UMP_ARCH_H_
+
+#include <common/ump_kernel_core.h>
+
+/**
+ * Device specific setup.
+ * Called by the UMP core code to to host OS/device specific setup.
+ * Typical use case is device node creation for talking to user space.
+ * @return UMP_OK on success, any other value on failure
+ */
+extern ump_result umpp_device_initialize(void);
+
+/**
+ * Device specific teardown.
+ * Undo any things done by ump_device_initialize.
+ */
+extern void umpp_device_terminate(void);
+
+extern int umpp_phys_commit(umpp_allocation * alloc);
+extern void umpp_phys_free(umpp_allocation * alloc);
+
+#endif /* _UMP_ARCH_H_ */
diff --git a/midgard/r15p0/kernel/drivers/base/ump/ump_ref_drv.h b/midgard/r15p0/kernel/drivers/base/ump/ump_ref_drv.h
new file mode 100755
index 0000000..9a265fe
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/base/ump/ump_ref_drv.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_ref_drv.h
+ *
+ * This file contains the link to user space part of the UMP API for usage by MALI 400 gralloc.
+ *
+ */
+
+#ifndef _UMP_REF_DRV_H_
+#define _UMP_REF_DRV_H_
+
+#include <ump/ump.h>
+
+
+#endif /* _UMP_REF_DRV_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/Kbuild b/midgard/r15p0/kernel/drivers/gpu/arm/Kbuild
new file mode 100755
index 0000000..19c7e9a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/Kconfig b/midgard/r15p0/kernel/drivers/gpu/arm/Kconfig
new file mode 100755
index 0000000..1f30eb5
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/Kconfig
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Kbuild b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Kbuild
new file mode 100755
index 0000000..c14d82c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,230 @@
+#
+# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r15p0-00rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+MALI_INSTRUMENTATION_LEVEL ?= 0
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_ipa.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_sync.c \
+	mali_kbase_sync_user.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c \
+	mali_kbase_regs_history_debugfs.c
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+ccflags-y += -I$(KBASE_PATH)
+
+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
+	SRC += mali_kbase_platform_fake.c
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
+		SRC += platform/vexpress/mali_kbase_config_vexpress.c \
+		platform/vexpress/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
+		SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/rtsm_ve
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
+		SRC += platform/juno/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/juno
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
+		SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+		ccflags-y += -I$(src)/platform/juno_soc
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
+		SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
+		SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
+		platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y)
+		SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \
+		platform/a7_kipling/mali_kbase_cpu_a7_kipling.c
+		ccflags-y += -I$(src)/platform/a7_kipling
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+	# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+	ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+	ifeq ($(CONFIG_MALI_MIDGARD),m)
+	include  $(src)/platform/$(platform_name)/Kbuild
+	else ifeq ($(CONFIG_MALI_MIDGARD),y)
+	obj-$(CONFIG_MALI_MIDGARD) += platform/
+	endif
+	endif
+endif # CONFIG_MALI_PLATFORM_FAKE=y
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+ifeq ($(CONFIG_MALI_MIDGARD),m)
+include  $(src)/platform/$(platform_name)/Kbuild
+else ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-$(CONFIG_MALI_MIDGARD) += platform/
+endif
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y)
+	ccflags-y += -I$(src)/platform/devicetree
+    include $(src)/platform/devicetree/Kbuild
+endif
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o
+
+MALI_BACKEND_PATH ?= backend
+CONFIG_MALI_BACKEND ?= gpu
+CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND)
+
+ifeq ($(MALI_MOCK_TEST),1)
+ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+endif
+
+include  $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+
+# Default to devicetree platform if neither a fake platform or a thirdparty
+# platform is configured.
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),)
+CONFIG_MALI_PLATFORM_DEVICETREE := y
+endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Kconfig b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Kconfig
new file mode 100755
index 0000000..f3bb58f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,238 @@
+#
+# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD && !KDS
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if KDS is not present and
+	  the Linux Kernel has built in support for DMA_BUF fences.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_CORESTACK
+	bool "Support controlling power to the GPU core stack"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature on supported GPUs will let the driver powering
+	  on/off the GPU core stack independently without involving the Power
+	  Domain Controller. This should only be enabled on platforms which
+	  integration of the PDC to the Mali GPU is known to be problematic.
+	  This feature is currently only supported on t-Six and t-HEx GPUs.
+
+	  If unsure, say N.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_PLATFORM_FAKE
+	bool "Enable fake platform device support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  When you start to work with the Mali Midgard series device driver the platform-specific code of
+	  the Linux kernel for your platform may not be complete. In this situation the kernel device driver
+	  supports creating the platform device outside of the Linux platform-specific code.
+	  Enable this option if would like to use a platform device configuration from within the device driver.
+
+choice
+	prompt "Platform configuration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default MALI_PLATFORM_DEVICETREE
+	help
+	  Select the SOC platform that contains a Mali Midgard GPU
+
+config MALI_PLATFORM_DEVICETREE
+	bool "Device Tree platform"
+	depends on OF
+	help
+	  Select this option to use Device Tree with the Mali driver.
+
+	  When using this option the Mali driver will get the details of the
+	  GPU hardware from the Device Tree. This means that the same driver
+	  binary can run on multiple platforms as long as all the GPU hardware
+	  details are described in the device tree.
+
+	  Device Tree is the recommended method for the Mali driver platform
+	  integration.
+
+config MALI_PLATFORM_VEXPRESS
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express"
+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express w/Virtex7 @ 40Mhz"
+config MALI_PLATFORM_GOLDFISH
+	depends on ARCH_GOLDFISH
+	bool "Android Goldfish virtual CPU"
+config MALI_PLATFORM_PBX
+	depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
+	bool "Realview PBX-A9"
+config MALI_PLATFORM_THIRDPARTY
+	bool "Third Party Platform"
+endchoice
+
+config MALI_PLATFORM_THIRDPARTY_NAME
+	depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
+	string "Third party platform name"
+	help
+	  Enter the name of a third party platform that is supported. The third part configuration
+	  file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
+	  specified here.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && SYNC
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_GPU_MMU_AARCH64
+	bool "Use AArch64 page tables"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Use AArch64 format page tables for the GPU instead of LPAE-style.
+	  The two formats have the same functionality and performance but a
+	  future GPU may deprecate or remove the legacy LPAE-style format.
+
+	  The LPAE-style format is supported on all Midgard and current Bifrost
+	  GPUs. Enabling AArch64 format restricts the driver to only supporting
+	  Bifrost GPUs.
+
+	  If in doubt, say N.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Makefile b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Makefile
new file mode 100755
index 0000000..e1625e6
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+KDS_PATH_RELATIVE = $(CURDIR)/../../../..
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifeq ($(MALI_BUS_LOG), 1)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# GPL driver supports KDS
+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100755
index 0000000..2bef9c2
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100755
index 0000000..e38120a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,63 @@
+#
+# (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_ca_demand.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
+
+ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+	BACKEND += backend/gpu/mali_kbase_power_model_simple.c
+endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100755
index 0000000..c8ae87e
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100755
index 0000000..fef9a2c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	kbdev->current_gpu_coherency_mode = mode;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100755
index 0000000..fe98691
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100755
index 0000000..7851ea6
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100755
index 0000000..4a91d9e
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+#endif
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+static struct devfreq_simple_ondemand_data ondemand_data = {
+		.upthreshold = 90,
+		.downdifferential = 5,
+};
+
+#define DEV_FREQ_GOV_DATA		(&ondemand_data)
+
+static ssize_t upthreshold_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.upthreshold);
+}
+
+static ssize_t upthreshold_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.upthreshold = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(upthreshold);
+
+static ssize_t downdifferential_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.downdifferential);
+}
+
+static ssize_t downdifferential_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.downdifferential = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(downdifferential);
+#else
+#define DEV_FREQ_GOV_DATA		(NULL)
+#endif /* CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND */
+
+static ssize_t utilisation_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev->parent);
+	unsigned long total_time = 0, busy_time = 0;
+	unsigned long utilise;
+
+	kbase_pm_get_dvfs_utilisation(kbdev, &total_time, &busy_time);
+
+	if (total_time == 0) {
+		utilise = 0;
+	} else {
+		/* round up */
+		utilise = (busy_time * 100 + (total_time >> 1)) / total_time;
+	}
+	return sprintf(buf, "%ld\n", utilise);
+}
+static DEVICE_ATTR_RO(utilisation);
+
+static struct device_attribute *kbase_dev_attr[] = {
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+	&dev_attr_downdifferential,
+	&dev_attr_upthreshold,
+#endif
+	&dev_attr_utilisation
+};
+
+static int kbase_dev_add_attr(struct device *dev)
+{
+	int i, ret;
+	for (i = 0; i <  ARRAY_SIZE(kbase_dev_attr); i++) {
+		if (!kbase_dev_attr[i])
+			continue;
+		ret = device_create_file(dev, kbase_dev_attr[i]);
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_freq == freq) {
+		*target_freq = freq;
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_freq = freq;
+
+	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)freq);
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	if (!kbdev->clock)
+		return -ENODEV;
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", DEV_FREQ_GOV_DATA);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	kbase_dev_add_attr(&kbdev->devfreq->dev);
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_power_model_simple_init(kbdev);
+	if (err && err != -ENODEV && err != -EPROBE_DEFER) {
+		dev_err(kbdev->dev,
+			"Failed to initialize simple power model (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+	if (err == -EPROBE_DEFER)
+		goto cooling_failed;
+	if (err != -ENODEV) {
+		kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+				kbdev->dev->of_node,
+				kbdev->devfreq,
+				&power_model_simple_ops);
+		if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+			err = PTR_ERR(kbdev->devfreq_cooling);
+			dev_err(kbdev->dev,
+				"Failed to register cooling device (%d)\n",
+				err);
+			goto cooling_failed;
+		}
+	} else {
+		err = 0;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100755
index 0000000..c0bf8b1
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100755
index 0000000..dcdf15c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,255 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+	struct kbase_io_access *old_buf;
+	struct kbase_io_access *new_buf;
+	unsigned long flags;
+
+	if (!new_size)
+		goto out_err; /* The new size must not be 0 */
+
+	new_buf = vmalloc(new_size * sizeof(*h->buf));
+	if (!new_buf)
+		goto out_err;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	old_buf = h->buf;
+
+	/* Note: we won't bother with copying the old data over. The dumping
+	 * logic wouldn't work properly as it relies on 'count' both as a
+	 * counter and as an index to the buffer which would have changed with
+	 * the new array. This is a corner case that we don't need to support.
+	 */
+	h->count = 0;
+	h->size = new_size;
+	h->buf = new_buf;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	vfree(old_buf);
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+	h->enabled = false;
+	spin_lock_init(&h->lock);
+	h->count = 0;
+	h->size = 0;
+	h->buf = NULL;
+	if (kbase_io_history_resize(h, n))
+		return -1;
+
+	return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+	vfree(h->buf);
+	h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+		void __iomem const *addr, u32 value, u8 write)
+{
+	struct kbase_io_access *io;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	io = &h->buf[h->count % h->size];
+	io->addr = (uintptr_t)addr | write;
+	io->value = value;
+	++h->count;
+	/* If count overflows, move the index by the buffer size so the entire
+	 * buffer will still be dumped later */
+	if (unlikely(!h->count))
+		h->count = h->size;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+	struct kbase_io_history *const h = &kbdev->io_history;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!unlikely(h->enabled))
+		return;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	dev_err(kbdev->dev, "Register IO History:");
+	iters = (h->size > h->count) ? h->count : h->size;
+	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	writel(value, kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				value, 1);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				val, 0);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100755
index 0000000..5b20445
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100755
index 0000000..d578fd7
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	return 0;
+
+fail_interrupts:
+	kbase_hwaccess_pm_term(kbdev);
+fail_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_release_interrupts(kbdev);
+	kbase_hwaccess_pm_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100755
index 0000000..b395325
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+
+	regdump->stack_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_LO), NULL);
+	regdump->stack_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100755
index 0000000..7ad309e
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,492 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100755
index 0000000..4794672
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100755
index 0000000..e96aeae
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100755
index 0000000..8781561
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100755
index 0000000..8416b80
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,469 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100755
index 0000000..202dcfa
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,378 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = current_as->number;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Assign addr space */
+	kctx->as_nr = as_nr;
+
+	/* If the GPU is currently powered, activate this address space on the
+	 * MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_update(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	/* Book-keeping */
+	js_per_as_data->kctx = kctx;
+	js_per_as_data->as_busy_refcount = 0;
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+/**
+ * release_addr_space - Release an address space
+ * @kbdev: Kbase device
+ * @kctx_as_nr: Address space of context to release
+ * @kctx: Context being released
+ *
+ * Context: kbasep_js_device_data.runpool_mutex must be held
+ *
+ * Release an address space, making it available for being picked again.
+ */
+static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u16 as_bit = (1u << kctx_as_nr);
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* The address space must not already be free */
+	KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
+
+	js_devdata->as_free |= as_bit;
+
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+				&kbdev->js_data.runpool_irq.per_as_data[i];
+
+		if (js_per_as_data->kctx == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
+	if (js_per_as_data->as_busy_refcount != 0) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	/* Release context from address space */
+	js_per_as_data->kctx = NULL;
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+	/* If the GPU is currently powered, de-activate this address space on
+	 * the MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_disable(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	release_addr_space(kbdev, as_nr, kctx);
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+								int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	js_devdata->as_free |= (1 << as_nr);
+}
+
+/**
+ * check_is_runpool_full - check whether the runpool is full for a specified
+ * context
+ * @kbdev: Kbase device
+ * @kctx:  Kbase context
+ *
+ * If kctx == NULL, then this makes the least restrictive check on the
+ * runpool. A specific context that is supplied immediately after could fail
+ * the check, even under the same conditions.
+ *
+ * Therefore, once a context is obtained you \b must re-check it with this
+ * function, since the return value could change to false.
+ *
+ * Context:
+ *   In all cases, the caller must hold kbasep_js_device_data.runpool_mutex.
+ *   When kctx != NULL the caller must hold the
+ *   kbasep_js_kctx_info.ctx.jsctx_mutex.
+ *   When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but
+ *   it doesn't do any harm to do so).
+ *
+ * Return: true if the runpool is full
+ */
+static bool check_is_runpool_full(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool is_runpool_full;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Regardless of whether a context is submitting or not, can't have more
+	 * than there are HW address spaces */
+	is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
+						kbdev->nr_hw_address_spaces);
+
+	if (kctx && !kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		/* Contexts that submit might use less of the address spaces
+		 * available, due to HW workarounds.  In which case, the runpool
+		 * is also full when the number of submitting contexts exceeds
+		 * the number of submittable address spaces.
+		 *
+		 * Both checks must be made: can have nr_user_address_spaces ==
+		 * nr_hw_address spaces, and at the same time can have
+		 * nr_user_contexts_running < nr_all_contexts_running. */
+		is_runpool_full |= (bool)
+					(js_devdata->nr_user_contexts_running >=
+						kbdev->nr_user_address_spaces);
+	}
+
+	return is_runpool_full;
+}
+
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* First try to find a free address space */
+	if (check_is_runpool_full(kbdev, kctx))
+		i = -1;
+	else
+		i = ffs(js_devdata->as_free) - 1;
+
+	if (i >= 0 && i < kbdev->nr_hw_address_spaces) {
+		js_devdata->as_free &= ~(1 << i);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return i;
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* No address space currently free, see if we can release one */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i];
+		as_kctx = js_per_as_data->kctx;
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running */
+		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+			js_per_as_data->as_busy_refcount == 0) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				js_devdata->as_free &= ~(1 << i);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if (kbdev->hwaccess.active_kctx == kctx ||
+	    kctx->as_nr != KBASEP_AS_NR_INVALID ||
+	    as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Invalid parameters to use_ctx()\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100755
index 0000000..08a7400
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100755
index 0000000..5fbd152
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1560 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+#ifndef CONFIG_MALI_COH_GPU
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+#endif /* CONFIG_MALI_COH_GPU */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649))
+		cfg |= JS_CONFIG_START_MMU;
+
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 * In such cases, we'll try to make a better approximation in the IRQ
+	 * handler (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
+			katom->affinity, cfg);
+	KBASE_TLSTREAM_TL_RET_CTX_LPU(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_LPU(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string,
+						sizeof(js_string)),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the %KBASE_JS_IRQ_THROTTLE_TIME_US and
+ * the time the job was submitted, to work out the best estimate (which might
+ * still result in an over-estimate to the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t new_timestamp;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		/* Account for any IRQ Throttle time - makes an overestimate of
+		 * the time spent by the job */
+		new_timestamp = ktime_sub_ns(end_timestamp,
+					KBASE_JS_IRQ_THROTTLE_TIME_US * 1000);
+		timestamp_diff = ktime_sub(new_timestamp,
+							katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = new_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Get kbase atom by calling kbase_gpu_inspect for given job slot.
+ * Then use obtained katom and name of slot associated with the given
+ * job slot number in tracepoint call to the instrumentation module
+ * informing that given atom is no longer executed on given lpu (job slot).
+ */
+static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int js)
+{
+	int i;
+	for (i = 0;
+	     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+	     i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js]);
+	}
+}
+
+/**
+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
+ * tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Make a tracepoint call to the instrumentation module informing that
+ * softstop happened on given lpu (job slot).
+ */
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
+					int js)
+{
+	KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
+		&kbdev->gpu_props.props.raw_props.js_features[js]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0,
+					sizeof(kbdev->slot_submit_count_irq));
+
+	/* write irq throttle register, this will prevent irqs from occurring
+	 * until the given number of gpu clock cycles have passed */
+	{
+		int irq_throttle_cycles =
+				atomic_read(&kbdev->irq_throttle_cycles);
+
+		kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE),
+						irq_throttle_cycles, NULL);
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbasep_trace_tl_event_lpu_softstop(
+						kbdev, i);
+
+					kbasep_trace_tl_nret_atom_lpu(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+						base_jd_core_req core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+	bool stop_sent = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if (katom->kctx != kctx)
+			continue;
+
+		if (katom->sched_priority > priority) {
+			if (!stop_sent)
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(
+						target_katom);
+
+			kbase_job_slot_softstop(kbdev, js, katom);
+			stop_sent = true;
+		}
+	}
+}
+
+struct zap_reset_data {
+	/* The stages are:
+	 * 1. The timer has never been called
+	 * 2. The zap has timed out, all slots are soft-stopped - the GPU reset
+	 *    will happen. The GPU has been reset when
+	 *    kbdev->hwaccess.backend.reset_waitq is signalled
+	 *
+	 * (-1 - The timer has been cancelled)
+	 */
+	int stage;
+	struct kbase_device *kbdev;
+	struct hrtimer timer;
+	spinlock_t lock; /* protects updates to stage member */
+};
+
+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer)
+{
+	struct zap_reset_data *reset_data = container_of(timer,
+						struct zap_reset_data, timer);
+	struct kbase_device *kbdev = reset_data->kbdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&reset_data->lock, flags);
+
+	if (reset_data->stage == -1)
+		goto out;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+								ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	reset_data->stage = 2;
+
+ out:
+	spin_unlock_irqrestore(&reset_data->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct zap_reset_data reset_data;
+	unsigned long flags;
+
+	hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	reset_data.timer.function = zap_timeout_callback;
+
+	spin_lock_init(&reset_data.lock);
+
+	reset_data.kbdev = kbdev;
+	reset_data.stage = 1;
+
+	hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for all jobs to finish, and for the context to be not-scheduled
+	 * (due to kbase_job_zap_context(), we also guarentee it's not in the JS
+	 * policy queue either */
+	wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
+	wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+		   !kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	spin_lock_irqsave(&reset_data.lock, flags);
+	if (reset_data.stage == 1) {
+		/* The timer hasn't run yet - so cancel it */
+		reset_data.stage = -1;
+	}
+	spin_unlock_irqrestore(&reset_data.lock, flags);
+
+	hrtimer_cancel(&reset_data.timer);
+
+	if (reset_data.stage == 2) {
+		/* The reset has already started.
+		 * Wait for the reset to complete
+		 */
+		wait_event(kbdev->hwaccess.backend.reset_wait,
+				atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+						== KBASE_RESET_GPU_NOT_PENDING);
+	}
+	destroy_hrtimer_on_stack(&reset_data.timer);
+
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	KBASE_DEBUG_ASSERT(0 ==
+		object_is_on_stack(&kbdev->hwaccess.backend.reset_work));
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * hwaccess_lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	kbase_io_history_dump(kbdev);
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	bool try_schedule = false;
+	bool silent = false;
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	spin_lock(&kbdev->hwaccess_lock);
+	spin_lock(&kbdev->mmu_mask_change);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts_nolock(kbdev);
+
+	spin_unlock(&kbdev->mmu_mask_change);
+	spin_unlock(&kbdev->hwaccess_lock);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+		/* Ensure that L2 is not transitioning when we send the reset
+		 * command */
+		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2))
+			;
+
+		WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	kbase_pm_context_idle(kbdev);
+
+	/* Release vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+
+void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+}
+
+bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100755
index 0000000..101a07b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string,
+						size_t js_size)
+{
+	snprintf(js_string, js_size, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100755
index 0000000..caf0e2f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1884 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+/**
+ * check_secure_atom - Check if the given atom is in the given secure state and
+ *                     has a ringbuffer state of at least
+ *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @katom:  Atom pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if atom is in the given state, false otherwise
+ */
+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
+{
+	if (katom->gpu_rb_state >=
+			KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+			((kbase_jd_katom_is_protected(katom) && secure) ||
+			(!kbase_jd_katom_is_protected(katom) && !secure)))
+		return true;
+
+	return false;
+}
+
+/**
+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
+ *                                secure state in the ringbuffers of at least
+ *                                state
+ *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
+ * @kbdev:  Device pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if any atoms are in the given state, false otherwise
+ */
+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
+		bool secure)
+{
+	int js, i;
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, i);
+
+			if (katom) {
+				if (check_secure_atom(katom, secure))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * hwaccess_lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+		if (katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
+				katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK)
+			kbdev->protected_mode_transition = false;
+
+		if (kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2))
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+/**
+ * other_slots_busy - Determine if any job slots other than @js are currently
+ *                    running atoms
+ * @kbdev: Device pointer
+ * @js:    Job slot
+ *
+ * Return: true if any slots other than @js are busy, false otherwise
+ */
+static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+{
+	int slot;
+
+	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
+		if (slot == js)
+			continue;
+
+		if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot,
+				KBASE_ATOM_GPU_RB_SUBMITTED))
+			return true;
+	}
+
+	return false;
+}
+
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
+{
+	return kbdev->protected_mode;
+}
+
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
+
+	/*
+	 * When entering into protected mode, we must ensure that the
+	 * GPU is not operating in coherent mode as well. This is to
+	 * ensure that no protected memory can be leaked.
+	 */
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enter(kbdev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
+		else
+			kbdev->protected_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
+
+	if (!kbdev->protected_ops)
+		return -EINVAL;
+
+	kbase_reset_gpu_silent(kbdev);
+
+	return 0;
+}
+
+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	switch (katom[idx]->protected_state.enter) {
+	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		kbdev->protected_mode_transition = true;
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_VINSTR;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
+		if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+			/*
+			 * We can't switch now because
+			 * the vinstr core state switch
+			 * is not done yet.
+			 */
+			return -EAGAIN;
+		}
+
+		/* Once reaching this point GPU must be
+		 * switched to protected mode or vinstr
+		 * re-enabled. */
+
+		/*
+		 * Not in correct mode, begin protected mode switch.
+		 * Entering protected mode requires us to power down the L2,
+		 * and drop out of fully coherent mode.
+		 */
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
+		/* Avoid unnecessary waiting on non-ACE platforms. */
+		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+				/*
+				* The L2 is still powered, wait for all the users to
+				* finish with it before doing the actual reset.
+				*/
+				return -EAGAIN;
+			}
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+
+		/* No jobs running, so we can switch GPU mode right now. */
+		err = kbase_gpu_protected_mode_enter(kbdev);
+
+		/*
+		 * Regardless of result, we are no longer transitioning
+		 * the GPU.
+		 */
+		kbdev->protected_mode_transition = false;
+
+		if (err) {
+			/*
+			 * Failed to switch into protected mode, resume
+			 * vinstr core and fail atom.
+			 */
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order. */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+			return -EINVAL;
+		}
+
+		/* Protected mode sanity checks. */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) ==
+			kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]),
+			kbase_gpu_in_protected_mode(kbdev));
+		katom[idx]->gpu_rb_state =
+			KBASE_ATOM_GPU_RB_READY;
+	}
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+
+	switch (katom[idx]->protected_state.exit) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		kbdev->protected_mode_transition = true;
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+
+		if (err) {
+			kbdev->protected_mode_transition = false;
+
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			return -EINVAL;
+		}
+
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		if (kbase_reset_gpu_active(kbdev))
+			return -EAGAIN;
+
+		kbdev->protected_mode_transition = false;
+		kbdev->protected_mode = false;
+
+		/* protected mode sanity checks */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
+		KBASE_DEBUG_ASSERT_MSG(
+			(kbase_jd_katom_is_protected(katom[idx]) && js == 0) ||
+			!kbase_jd_katom_is_protected(katom[idx]),
+			"Protected atom on JS%d not supported", js);
+	}
+
+	return 0;
+}
+
+void kbase_backend_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+			int ret;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+				if (kbase_gpu_check_secure_atoms(kbdev,
+						!kbase_jd_katom_is_protected(
+						katom[idx])))
+					break;
+
+				if ((idx == 1) && (kbase_jd_katom_is_protected(
+								katom[0]) !=
+						kbase_jd_katom_is_protected(
+								katom[1])))
+					break;
+
+				if (kbdev->protected_mode_transition)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+
+				if (!kbase_gpu_in_protected_mode(kbdev) &&
+					kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition into protected mode. */
+					ret = kbase_jm_enter_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				} else if (kbase_gpu_in_protected_mode(kbdev) &&
+					!kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition out of protected mode. */
+					ret = kbase_jm_exit_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				}
+				katom[idx]->protected_state.exit =
+						KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+				/* Atom needs no protected mode transition. */
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+
+				if (idx == 1) {
+					/* Only submit if head atom or previous
+					 * atom already submitted */
+					if ((katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+						katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+						break;
+
+					/* If intra-slot serialization in use
+					 * then don't submit atom to NEXT slot
+					 */
+					if (kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTRA_SLOT)
+						break;
+				}
+
+				/* If inter-slot serialization in use then don't
+				 * submit atom if any other slots are in use */
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTER_SLOT) &&
+						other_slots_busy(kbdev, js))
+					break;
+
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_RESET) &&
+						kbase_reset_gpu_active(kbdev))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_backend_slot_update(kbdev);
+}
+
+#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
+	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		HAS_DEP(next_katom) &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+	KBASE_TLSTREAM_TL_NRET_ATOM_LPU(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+	KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+	KBASE_TLSTREAM_TL_NRET_CTX_LPU(
+			kctx,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx &&
+				next_katom->sched_priority ==
+				katom->sched_priority) {
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+					HAS_DEP(katom_idx0) &&
+					katom_idx0->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+						katom_idx1->kctx == katom->kctx
+						&& HAS_DEP(katom_idx1) &&
+						katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					HAS_DEP(katom_idx1) &&
+					katom_idx1->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)
+		kbase_reset_gpu_silent(kbdev);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		katom = kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		katom = kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	if (katom) {
+		/* Cross-slot dependency has now become runnable. Try to submit
+		 * it. */
+
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
+	}
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_backend_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int atom_idx = 0;
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, atom_idx);
+			bool keep_in_jm_rb = false;
+
+			if (!katom)
+				break;
+
+			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+				katom->affinity = 0;
+				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				/* As the atom was not removed, increment the
+				 * index so that we read the correct atom in the
+				 * next iteration. */
+				atom_idx++;
+				continue;
+			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
+		}
+	}
+
+	kbdev->protected_mode_transition = false;
+	kbdev->protected_mode = false;
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	katom->kctx->blocked_js[js][katom->sched_priority] = true;
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+	int prio_idx0 = 0, prio_idx1 = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom_idx0)
+		prio_idx0 = katom_idx0->sched_priority;
+	if (katom_idx1)
+		prio_idx1 = katom_idx1->sched_priority;
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+				(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+				(!kctx || katom_idx1->kctx == kctx) &&
+				prio_idx0 == prio_idx1);
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				katom_idx1->kctx->blocked_js[js][prio_idx1] =
+						true;
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_job_evicted(katom_idx1);
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_job_evicted(katom_idx1);
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	unsigned long flags;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	if (katom->need_cache_flush_cores_retained) {
+		kbase_gpu_cacheclean(kbdev, katom);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->affinity = 0;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
+			coreref_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100755
index 0000000..1e0e05a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:         Device pointer
+ * @js:            Job slot to evict from
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100755
index 0000000..54d8ddd
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+#include "mali_kbase_hw.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		 /* If the hardware supports XAFFINITY then we'll only enable
+		  * the tiler (which is the default so this is a no-op),
+		  * otherwise enable shader core 0. */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = 1;
+		else
+			*affinity = 0;
+
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required for hardware without XAFFINITY
+	 * support (notes above) */
+	if (core_req & BASE_JD_REQ_T) {
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = *affinity | 1;
+	}
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100755
index 0000000..35d9781
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold hwaccess_lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100755
index 0000000..a8c1af2
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,356 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is
+ * enabled for a particular level is down to the integrator. However this is
+ * being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->ticks = soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * hwaccess_lock, so it's OK to observe
+					 * the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CINSTR_DUMPING_ENABLED */
+				/* NOTE: During CINSTR_DUMPING_ENABLED, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CINSTR_DUMPING_ENABLED, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CINSTR_DUMPING_ENABLED */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100755
index 0000000..3f53779
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100755
index 0000000..4e5a74f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,403 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+static void validate_protected_page_fault(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	/* GPUs which support (native) protected mode shall not report page
+	 * fault addresses unless it has protected debug mode and protected
+	 * debug mode is turned on */
+	u32 protected_debug_mode = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		return;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		protected_debug_mode = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS),
+				kctx) & GPU_DBGEN;
+	}
+
+	if (!protected_debug_mode) {
+		/* fault_addr should never be reported in protected mode.
+		 * However, we just continue by printing an error message */
+		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+	}
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* Mark the fault protected or not */
+		as->protected_mode = kbdev->protected_mode;
+
+		if (kbdev->protected_mode && as->fault_addr)
+		{
+			/* check if address reporting is allowed */
+			validate_protected_page_fault(kbdev, kctx);
+		}
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		as->fault_extra_addr = kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+				kctx);
+		as->fault_extra_addr <<= 32;
+		as->fault_extra_addr |= kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+				kctx);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+	/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+	/* Clear PTW_MEMATTR bits */
+	transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+	/* Enable correct PTW_MEMATTR bits */
+	transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+		/* Clear PTW_SH bits */
+		transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+		/* Enable correct PTW_SH bits */
+		transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+			transcfg, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+			(current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx);
+
+#else /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100755
index 0000000..c02253c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100755
index 0000000..0614348
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100755
index 0000000..f9d244b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100755
index 0000000..7690ec5
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,460 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+
+#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+	struct kbase_pm_callback_conf *callbacks;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+			kbase_pm_gpu_poweroff_wait_wq);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+	} else {
+		kbdev->pm.backend.callback_power_on = NULL;
+		kbdev->pm.backend.callback_power_off = NULL;
+		kbdev->pm.backend.callback_power_suspend = NULL;
+		kbdev->pm.backend.callback_power_resume = NULL;
+		kbdev->pm.callback_power_runtime_init = NULL;
+		kbdev->pm.callback_power_runtime_term = NULL;
+		kbdev->pm.backend.callback_power_runtime_on = NULL;
+		kbdev->pm.backend.callback_power_runtime_off = NULL;
+		kbdev->pm.backend.callback_power_runtime_idle = NULL;
+	}
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	/* Wait for power transitions to complete. We do this with no locks held
+	 * so that we don't deadlock with any pending workqueues */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	kbase_pm_check_transitions_sync(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	if (!backend->poweron_required) {
+		WARN_ON(kbdev->l2_available_bitmap ||
+				kbdev->shader_available_bitmap ||
+				kbdev->tiler_available_bitmap);
+
+		/* Consume any change-state events */
+		kbase_timeline_pm_check_handle_event(kbdev,
+					KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		/* Disable interrupts and turn the clock off */
+		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+			/*
+			 * Page/bus faults are pending, must drop locks to
+			 * process.  Interrupts are disabled so no more faults
+			 * should be generated at this point.
+			 */
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			kbase_flush_mmu_wqs(kbdev);
+			mutex_lock(&js_devdata->runpool_mutex);
+			mutex_lock(&kbdev->pm.lock);
+
+			/* Turn off clock now that fault have been handled. We
+			 * dropped locks so poweron_required may have changed -
+			 * power back on if this is the case.*/
+			if (backend->poweron_required)
+				kbase_pm_clock_on(kbdev, false);
+			else
+				WARN_ON(!kbase_pm_clock_off(kbdev,
+						backend->poweroff_is_suspend));
+		}
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	backend->poweroff_wait_in_progress = false;
+	if (backend->poweron_required) {
+		backend->poweron_required = false;
+		kbase_pm_update_cores_state_nolock(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
+		/* Force all cores off */
+		kbdev->pm.backend.desired_shader_state = 0;
+		kbdev->pm.backend.desired_tiler_state = 0;
+
+		/* Force all cores to be unavailable, in the situation where
+		 * transitions are in progress for some cores but not others,
+		 * and kbase_pm_check_transitions_nolock can not immediately
+		 * power off the cores */
+		kbdev->shader_available_bitmap = 0;
+		kbdev->tiler_available_bitmap = 0;
+		kbdev->l2_available_bitmap = 0;
+
+		kbdev->pm.backend.poweroff_wait_in_progress = true;
+		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/*Kick off wq here. Callers will have to wait*/
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
+	} else {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+	wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			is_poweroff_in_progress(kbdev));
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_read_present_cores(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, true);
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100755
index 0000000..9cecd4e
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,180 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_demand_policy_ops,
+	&kbase_pm_ca_random_policy_ops,
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100755
index 0000000..ee9e751
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
new file mode 100644
index 0000000..19f1f73
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
@@ -0,0 +1,246 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation.
+ *
+ * This policy periodically selects a new core mask at demand. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/printk.h>
+
+#ifndef MALI_CA_TIMER
+static struct workqueue_struct *mali_ca_wq = NULL;
+#endif
+static int num_shader_cores_to_be_enabled = 0;
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+#ifndef MALI_CA_TIMER
+	struct kbase_device *kbdev = (struct kbase_device *)priv;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+#endif
+
+    num_shader_cores_to_be_enabled = cores -1;
+    printk("pp num=%d\n", num_shader_cores_to_be_enabled);
+    if (num_shader_cores_to_be_enabled < 1)
+        num_shader_cores_to_be_enabled = 1;
+
+#ifndef MALI_CA_TIMER
+    queue_work(mali_ca_wq, &data->wq_work);
+#endif
+
+    return 0;
+}
+
+#ifdef MALI_CA_TIMER
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void demand_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+#if 0
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+#endif
+    data->cores_desired = num_shader_cores_to_be_enabled;
+
+	if (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+            dev_info(kbdev->dev, "error, ca demand policy : new core mask=%llX\n",
+				data->cores_desired);
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "ca demand policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(demand_timer_callback);
+#else
+static void do_ca_scaling(struct work_struct *work)
+{
+    //unsigned long flags;
+	struct kbase_device *kbdev = = container_of(work,
+            struct kbasep_pm_ca_policy_demand, wq_work);
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+    data->cores_desired =  num_shader_cores_to_be_enabled;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+
+}
+#endif
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+#ifdef MALI_CA_TIMER
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = demand_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+#else
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_ca_wq = alloc_workqueue("gpu_ca_wq", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_ca_wq = create_workqueue("gpu_ca_wq");
+#endif
+	INIT_WORK(&data->wq_work, do_ca_scaling);
+    if (mali_ca_wq == NULL) printk("Unable to create gpu scaling workqueue\n");
+#endif
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+#ifdef MALI_CA_TIMER
+	del_timer(&data->core_change_timer);
+#else
+    if (mali_ca_wq) {
+        flush_workqueue(mali_ca_wq);
+        destroy_workqueue(mali_ca_wq);
+        mali_ca_wq = NULL;
+    }
+#endif
+}
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.demand.cores_enabled;
+}
+
+static void demand_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the demand core availability
+ * policy.
+ *
+ * This is the static structure that defines the demand core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_demand_policy_ops);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
new file mode 100644
index 0000000..a3dfe2a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEMAND_H
+#define MALI_KBASE_PM_CA_DEMAND_H
+#include <linux/workqueue.h>
+/**
+ * struct kbasep_pm_ca_policy_demand - Private structure for demand ca policy
+ *
+ * This contains data that is private to the demand core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+#define MALI_CA_TIMER 1
+struct kbasep_pm_ca_policy_demand {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+#ifdef MALI_CA_TIMER
+	struct timer_list core_change_timer;
+#else
+	struct work_struct wq_work;
+#endif
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops;
+extern int mali_perf_set_num_pp_cores(int cores);
+
+#endif /* MALI_KBASE_PM_CA_DEMAND_H */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100755
index 0000000..1db6586
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100755
index 0000000..a763155
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
new file mode 100644
index 0000000..0d1b220
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation.
+ *
+ * This policy periodically selects a new core mask at random. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void random_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(random_timer_callback);
+
+static void random_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = random_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+}
+
+static void random_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	del_timer(&data->core_change_timer);
+}
+
+static u64 random_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.random.cores_enabled;
+}
+
+static void random_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the random core availability
+ * policy.
+ *
+ * This is the static structure that defines the random core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops = {
+	"random",			/* name */
+	random_init,			/* init */
+	random_term,			/* term */
+	random_get_core_mask,		/* get_core_mask */
+	random_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_RANDOM,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_random_policy_ops);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
new file mode 100644
index 0000000..a8c9b15
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_RANDOM_H
+#define MALI_KBASE_PM_CA_RANDOM_H
+
+/**
+ * struct kbasep_pm_ca_policy_random - Private structure for random ca policy
+ *
+ * This contains data that is private to the random core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+struct kbasep_pm_ca_policy_random {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+	struct timer_list core_change_timer;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_RANDOM_H */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100755
index 0000000..f891fa2
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100755
index 0000000..749d305
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100755
index 0000000..0ddf7b7
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,521 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#include "mali_kbase_pm_ca_demand.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ * @KBASE_PM_CORE_STACK: Core stacks
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO,
+	KBASE_PM_CORE_STACK = STACK_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
+ *                                 management framework.
+ *
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+	struct kbasep_pm_ca_policy_demand demand;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @powering_on_stack_state: A bit mask indicating which core stacks are
+ *                           currently in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        and/or timers are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 powering_on_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_data metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	bool poweroff_wait_in_progress;
+	bool poweron_required;
+	bool poweroff_is_suspend;
+
+	struct workqueue_struct *gpu_poweroff_wait_wq;
+	struct work_struct gpu_poweroff_wait_work;
+
+	wait_queue_head_t poweroff_wait;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_CA_POLICY_ID_RANDOM,
+	KBASE_PM_CA_POLICY_ID_DEMAND,
+#endif
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * Maximum length of a CA policy names
+ */
+#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1];
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100755
index 0000000..81322fd
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100755
index 0000000..c0c84b6
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100755
index 0000000..5a0f42c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1704 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/* Special value to indicate that the JM_CONFIG reg isn't currently used. */
+#define KBASE_JM_CONFIG_UNUSED (1<<31)
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+#ifdef CONFIG_MALI_CORESTACK
+	if (core_type == KBASE_PM_CORE_STACK) {
+		switch (action) {
+		case ACTION_PRESENT:
+			return STACK_PRESENT_LO;
+		case ACTION_READY:
+			return STACK_READY_LO;
+		case ACTION_PWRON:
+			return STACK_PWRON_LO;
+		case ACTION_PWROFF:
+			return STACK_PWROFF_LO;
+		case ACTION_PWRTRANS:
+			return STACK_PWRTRANS_LO;
+		default:
+			BUG();
+		}
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		KBASE_TLSTREAM_AUX_PM_STATE(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_invoke);
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_present_cores(), kbase_pm_get_active_cores(),
+ * kbase_pm_get_trans_cores() and kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev)
+{
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores);
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+#ifdef CONFIG_MALI_CORESTACK
+	case KBASE_PM_CORE_STACK:
+		return kbdev->gpu_props.props.raw_props.stack_present;
+#endif /* CONFIG_MALI_CORESTACK */
+	default:
+		break;
+	}
+	KBASE_DEBUG_ASSERT(0);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+	//kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ * @tilers_powered: The bit mask of tilers that are desired to be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered,
+		u64 tilers_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	/* Power up the required L2(s) for the tiler */
+	if (tilers_powered)
+		desired |= 1;
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+#ifdef CONFIG_MALI_CORESTACK
+u64 kbase_pm_core_stack_mask(u64 cores)
+{
+	u64 stack_mask = 0;
+	size_t const MAX_CORE_ID = 31;
+	size_t const NUM_CORES_PER_STACK = 4;
+	size_t i;
+
+	for (i = 0; i <= MAX_CORE_ID; ++i) {
+		if (test_bit(i, (unsigned long *)&cores)) {
+			/* Every core which ID >= 16 is filled to stacks 4-7
+			 * instead of 0-3 */
+			size_t const stack_num = (i > 16) ?
+				(i % NUM_CORES_PER_STACK) + 4 :
+				(i % NUM_CORES_PER_STACK);
+			set_bit(stack_num, (unsigned long *)&stack_mask);
+		}
+	}
+
+	return stack_mask;
+}
+#endif /* CONFIG_MALI_CORESTACK */
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 desired_stack_state;
+	u64 stacks_powered;
+#endif /* CONFIG_MALI_CORESTACK */
+	u64 cores_powered;
+	u64 tilers_powered;
+	u64 tiler_available_bitmap;
+	u64 tiler_transitioning_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+	u64 l2_inuse_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+#ifdef CONFIG_MALI_CORESTACK
+	/* Work out which core stacks want to be powered */
+	desired_stack_state = kbase_pm_core_stack_mask(cores_powered);
+	stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) |
+		desired_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* Work out which tilers want to be powered */
+	tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_TILER);
+	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered, tilers_powered);
+
+	l2_inuse_bitmap = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered | shader_transitioning_bitmap,
+			tilers_powered | tiler_transitioning_bitmap);
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (stacks_powered)
+		desired_l2_state |= 1;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state)
+		desired_l2_state |= 1;
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+			KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap,
+			&l2_available_bitmap,
+			&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_STACK, desired_stack_state, 0,
+				&kbdev->stack_available_bitmap,
+				&kbdev->pm.backend.powering_on_stack_state);
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+#endif
+
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_STACK,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO),
+					NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO), NULL));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO), NULL));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	} else {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_disable_interrupts_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+	/* Limit read ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+	kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+	/* Limit write ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+	kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+		/* The JM_CONFIG register is specified as follows in the
+		 T86x/T88x Engineering Specification Supplement:
+		 The values are read from device tree in order.
+		*/
+#define TIMESTAMP_OVERRIDE  1
+#define CLOCK_GATE_OVERRIDE (1<<1)
+#define JOB_THROTTLE_ENABLE (1<<2)
+#define JOB_THROTTLE_LIMIT_SHIFT 3
+
+		/* 6 bits in the register */
+		const u32 jm_max_limit = 0x3F;
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = jm_max_limit; /* Max value */
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > jm_max_limit) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = jm_max_limit;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm = (jm_values[0] ? TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ? CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ? JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JOB_THROTTLE_LIMIT_SHIFT);
+	} else {
+		kbdev->hw_quirks_jm = KBASE_JM_CONFIG_UNUSED;
+	}
+
+#ifdef CONFIG_MALI_CORESTACK
+#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
+	kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+#endif /* CONFIG_MALI_CORESTACK */
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	if (kbdev->hw_quirks_sc)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+				kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+
+
+	if (kbdev->hw_quirks_jm != KBASE_JM_CONFIG_UNUSED)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+				kbdev->hw_quirks_jm, NULL);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if (kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+static int kbase_pm_reset_do_normal(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
+#endif
+#if 0
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET, NULL);
+#else
+  //dev_err(kbdev->dev, "%s,  %d \n", __FILE__, __LINE__);
+  core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+  l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+  tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+  //printk("core_ready=%ld, l2_ready=%ld, tiler_ready=%ld\n", core_ready, l2_ready, tiler_ready);
+  if (core_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+  if (l2_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+  if (tiler_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+  /* do external reset */
+
+//JOHNT
+#if 0
+    // Level reset mail
+    Wr(P_RESET0_MASK, ~(0x1<<20));
+    Wr(P_RESET0_LEVEL, ~(0x1<<20));
+
+    // Level reset mail
+    Wr(P_RESET2_MASK, ~(0x1<<14));
+    Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    Wr(P_RESET2_LEVEL, 0xffffffff);
+    Wr(P_RESET0_LEVEL, 0xffffffff);
+#else
+    if (reg_base_hiubus) {
+        value = Rd(RESET0_MASK);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_MASK, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+    ///////////////
+        value = Rd(RESET2_MASK);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_MASK, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value | ((0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value | ((0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+    } else {
+		dev_err(kbdev->dev, "reg_base_hiubus is null !!!\n");
+    }
+#endif
+  //writel(..../*e.g. PWR_RESET, EXTERNAL_PWR_REGISTER*/);
+
+  /* any other action necessary, like a simple delay */
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+#endif
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	return -EINVAL;
+}
+
+static int kbase_pm_reset_do_protected(struct kbase_device *kbdev)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
+
+	return kbdev->protected_ops->protected_mode_reset(kbdev);
+}
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+	bool resume_vinstr = false;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support &&
+			kbdev->protected_ops->protected_mode_reset)
+		err = kbase_pm_reset_do_protected(kbdev);
+	else
+		err = kbase_pm_reset_do_normal(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->protected_mode)
+		resume_vinstr = true;
+	kbdev->protected_mode = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	if (err)
+		goto exit;
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+		kbase_pm_release_l2_caches(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+exit:
+	/* If GPU is leaving protected mode resume vinstr operation. */
+	if (kbdev->vinstr_ctx && resume_vinstr)
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	return err;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100755
index 0000000..d0e2d56
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,593 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action);
+
+/**
+ * kbasep_pm_read_present_cores - Read the bitmasks of present cores.
+ *
+ * This information is cached to avoid having to perform register reads whenever
+ * the information is required.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * Access to registers should be done using kbase_os_reg_read()/write() at this
+ * stage, not kbase_reg_read()/write().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#ifdef CONFIG_PM_DEVFREQ
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold hwaccess_lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100755
index 0000000..7613e1d
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+					int *util_gl_share,
+					int util_cl_share[2],
+					ktime_t now)
+{
+	int utilisation;
+	int busy;
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	return utilisation;
+}
+
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	ktime_t now;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	now = ktime_get();
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+			util_cl_share, now);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				if (!WARN_ON(device_nr >= 2))
+					kbdev->pm.backend.metrics.
+						active_cl_ctx[device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				if (!WARN_ON(js >= 2))
+					kbdev->pm.backend.metrics.
+						active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100755
index 0000000..bc3bd18
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,998 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		kbase_pm_do_poweroff(kbdev, false);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		if (pm->backend.poweroff_wait_in_progress) {
+			pm->backend.poweron_required = true;
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbase_pm_do_poweron(kbdev, false);
+		}
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				kbase_pm_do_poweroff(kbdev, false);
+			}
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		return;
+
+	if (kbdev->protected_mode_transition &&	!kbdev->shader_needed_bitmap &&
+			!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt) {
+		/* We are trying to change in/out of protected mode - force all
+		 * cores off so that the L2 powers down */
+		desired_bitmap = 0;
+		desired_tiler_bitmap = 0;
+	} else {
+		desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+		desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+			desired_tiler_bitmap = 1;
+		else
+			desired_tiler_bitmap = 0;
+
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+			/* Unless XAFFINITY is supported, enable core 0 if tiler
+			 * required, regardless of core availability */
+			if (kbdev->tiler_needed_cnt > 0 ||
+					kbdev->tiler_inuse_cnt > 0)
+				desired_bitmap |= 1;
+		}
+	}
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks &&
+					!kbdev->protected_mode_transition)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks &&
+				!kbdev->protected_mode_transition)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+int set_policy_by_name(struct kbase_device *kbdev, const char *name)
+{
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, name)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		printk("power_policy: policy not found\n");
+		return -EINVAL;
+	}
+	trace_printk("policy name=%s\n", name);
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(set_policy_by_name);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		return (kbdev->pm.backend.poweroff_wait_in_progress ||
+				kbdev->pm.backend.pm_current_policy == NULL) ?
+				KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->l2_users_count++;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100755
index 0000000..611a90e
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
new file mode 100644
index 0000000..d965033
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+#include <linux/of.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms.
+ */
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static u32 dynamic_coefficient;
+static u32 static_coefficient;
+static s32 ts[4];
+static struct thermal_zone_device *gpu_tz;
+
+static unsigned long model_static_power(unsigned long voltage)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temperature;
+#else
+	int temperature;
+#endif
+	unsigned long temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	if (gpu_tz) {
+		int ret;
+
+		ret = gpu_tz->ops->get_temp(gpu_tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	} else {
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(ts[3] * temp_cubed)
+			+ (ts[2] * temp_squared)
+			+ (ts[1] * temp)
+			+ ts[0];
+
+	return (((static_coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+}
+
+static unsigned long model_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+
+	return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops power_model_simple_ops = {
+#else
+struct devfreq_cooling_power power_model_simple_ops = {
+#endif
+	.get_static_power = model_static_power,
+	.get_dynamic_power = model_dynamic_power,
+};
+
+int kbase_power_model_simple_init(struct kbase_device *kbdev)
+{
+	struct device_node *power_model_node;
+	const char *tz_name;
+	u32 static_power, dynamic_power;
+	u32 voltage, voltage_squared, voltage_cubed, frequency;
+
+	power_model_node = of_get_child_by_name(kbdev->dev->of_node,
+			"power_model");
+	if (!power_model_node) {
+		dev_err(kbdev->dev, "could not find power_model node\n");
+		return -ENODEV;
+	}
+	if (!of_device_is_compatible(power_model_node,
+			"arm,mali-simple-power-model")) {
+		dev_err(kbdev->dev, "power_model incompatible with simple power model\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_string(power_model_node, "thermal-zone",
+			&tz_name)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	gpu_tz = thermal_zone_get_zone_by_name(tz_name);
+	if (IS_ERR(gpu_tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(gpu_tz));
+		gpu_tz = NULL;
+
+		return -EPROBE_DEFER;
+	}
+
+	if (of_property_read_u32(power_model_node, "static-power",
+			&static_power)) {
+		dev_err(kbdev->dev, "static-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "dynamic-power",
+			&dynamic_power)) {
+		dev_err(kbdev->dev, "dynamic-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "voltage",
+			&voltage)) {
+		dev_err(kbdev->dev, "voltage in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "frequency",
+			&frequency)) {
+		dev_err(kbdev->dev, "frequency in power_model not available\n");
+		return -EINVAL;
+	}
+	voltage_squared = (voltage * voltage) / 1000;
+	voltage_cubed = voltage * voltage * voltage;
+	static_coefficient = (static_power << 20) / (voltage_cubed >> 10);
+	dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared)
+			* 1000) / frequency;
+
+	if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
new file mode 100644
index 0000000..9b5e69a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_POWER_MODEL_SIMPLE_H_
+#define _BASE_POWER_MODEL_SIMPLE_H_
+
+/**
+ * kbase_power_model_simple_init - Initialise the simple power model
+ * @kbdev: Device pointer
+ *
+ * The simple power model estimates power based on current voltage, temperature,
+ * and coefficients read from device tree. It does not take utilization into
+ * account.
+ *
+ * The power model requires coefficients from the power_model node in device
+ * tree. The absence of this node will prevent the model from functioning, but
+ * should not prevent the rest of the driver from running.
+ *
+ * Return: 0 on success
+ *         -ENOSYS if the power_model node is not present in device tree
+ *         -EPROBE_DEFER if the thermal zone specified in device tree is not
+ *         currently available
+ *         Any other negative value on failure
+ */
+int kbase_power_model_simple_init(struct kbase_device *kbdev);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops power_model_simple_ops;
+#else
+extern struct devfreq_cooling_power power_model_simple_ops;
+#endif
+
+#endif /* _BASE_POWER_MODEL_SIMPLE_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100755
index 0000000..d992989
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,103 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100755
index 0000000..35088ab
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100755
index 0000000..35ff2f1
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100755
index 0000000..7ae05c2
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100755
index 0000000..159b993
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100755
index 0000000..c62f0d3
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,249 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tHEx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tSIx[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100755
index 0000000..8666e82
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,988 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100755
index 0000000..d615800
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1819 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+#ifndef __user
+#define __user
+#endif
+
+/* Support UK6 IOCTLS */
+#define BASE_LEGACY_UK6_SUPPORT 1
+
+/* Support UK7 IOCTLS */
+/* NB: To support UK6 we also need to support UK7 */
+#define BASE_LEGACY_UK7_SUPPORT 1
+
+/* Support UK8 IOCTLS */
+#define BASE_LEGACY_UK8_SUPPORT 1
+
+/* Support UK9 IOCTLS */
+#define BASE_LEGACY_UK9_SUPPORT 1
+
+/* Support UK10_2 IOCTLS */
+#define BASE_LEGACY_UK10_2_SUPPORT 1
+
+/* Support UK10_4 IOCTLS */
+#define BASE_LEGACY_UK10_4_SUPPORT 1
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+#define BASEP_JD_SEM_PER_WORD_LOG2      5
+#define BASEP_JD_SEM_PER_WORD           (1 << BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_WORD_NR(x)         ((x) >> BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_MASK_IN_WORD(x)    (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
+#define BASEP_JD_SEM_ARRAY_SIZE         BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union kbase_pointer {
+	void __user *value;	  /**< client should store their pointers here */
+	u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	u64 sizer;	  /**< Force 64-bit storage for all clients regardless */
+} kbase_pointer;
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD),
+ * which defines a @a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see ::BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * See ::base_mem_alloc_flags.
+ *
+ */
+enum {
+/* IN */
+	BASE_MEM_PROT_CPU_RD = (1U << 0),      /**< Read access CPU side */
+	BASE_MEM_PROT_CPU_WR = (1U << 1),      /**< Write access CPU side */
+	BASE_MEM_PROT_GPU_RD = (1U << 2),      /**< Read access GPU side */
+	BASE_MEM_PROT_GPU_WR = (1U << 3),      /**< Write access GPU side */
+	BASE_MEM_PROT_GPU_EX = (1U << 4),      /**< Execute allowed on the GPU
+						    side */
+
+	/* BASE_MEM_HINT flags have been removed, but their values are reserved
+	 * for backwards compatibility with older user-space drivers. The values
+	 * can be re-used once support for r5p0 user-space drivers is removed,
+	 * presumably in r7p0.
+	 *
+	 * RESERVED: (1U << 5)
+	 * RESERVED: (1U << 6)
+	 * RESERVED: (1U << 7)
+	 * RESERVED: (1U << 8)
+	 */
+
+	BASE_MEM_GROW_ON_GPF = (1U << 9),      /**< Grow backing store on GPU
+						    Page Fault */
+
+	BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer
+						    shareable, if available */
+	BASE_MEM_COHERENT_LOCAL = (1U << 11),  /**< Page coherence Inner
+						    shareable */
+	BASE_MEM_CACHED_CPU = (1U << 12),      /**< Should be cached on the
+						    CPU */
+
+/* IN/OUT */
+	BASE_MEM_SAME_VA = (1U << 13), /**< Must have same VA on both the GPU
+					    and the CPU */
+/* OUT */
+	BASE_MEM_NEED_MMAP = (1U << 14), /**< Must call mmap to aquire a GPU
+					     address for the alloc */
+/* IN */
+	BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence
+					     Outer shareable, required. */
+	BASE_MEM_SECURE = (1U << 16),          /**< Secure memory */
+	BASE_MEM_DONT_NEED = (1U << 17),       /**< Not needed physical
+						    memory */
+	BASE_MEM_IMPORT_SHARED = (1U << 18),   /**< Must use shared CPU/GPU zone
+						    (SAME_VA zone) but doesn't
+						    require the addresses to
+						    be the same */
+};
+
+/**
+ * @brief Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the ::base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 19
+
+/**
+  * A mask for all output bits, excluding IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/**
+  * A mask for all input bits, including IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/**
+ * A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id.
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	kbase_pointer to imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	kbase_pointer ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1,	    /**< Not a growable tmem object */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completly unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/**
+ * No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/**
+ * Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  ((base_jd_core_req)1 << 1)
+#define BASE_JD_REQ_T   ((base_jd_core_req)1 << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  ((base_jd_core_req)1 << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   ((base_jd_core_req)1 << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceeding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceeding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceeding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
+ * the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
+ * the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferrentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
+ * In order to keep the size of the structure same, padding field has been adjusted
+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
+ * for possible future use. */
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[1];
+	base_jd_core_req core_req;          /**< core requirements */
+} base_jd_atom_v2;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+struct base_jd_atom_v2_uk6 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 core_req;                       /**< core requirements */
+	base_atom_id pre_dep[2]; /**< pre-dependencies */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;		    /**< priority - smaller is higher priority */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[7];
+};
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit and @a base_jd_submit has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit and @a base_jd_submit has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills
+ * a full cache line.
+ */
+
+#define BASE_CPU_GPU_CACHE_LINE_PADDING (36)
+
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom.
+ *
+ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid
+ * cases where access to pages containing the structure is shared between cached and un-cached
+ * memory regions, which would cause memory corruption.  Here we set the structure size to be 64 bytes
+ * which is the cache line for ARM A15 processors.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING];
+} base_dump_cpu_gpu_counters;
+
+
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistant guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ *  - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ *  so they won't accidentally pick another file with 'mali_t600' in its name
+ *  - When the build doesn't work, the System Integrator may think the DDK is
+ *  doesn't work, and attempt to fix it themselves:
+ *   - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ *   error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ *   you.
+ *   - For a \#include mechanism, checks must still be made elsewhere, which the
+ *   System Integrator may try working around by setting \#defines (such as
+ *   VA_BITS) themselves in their plat_config.h. In the  worst case, they may
+ *   set the prevention-mechanism \#define of
+ *   "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ *   - In this case, they would believe they are on the right track, because
+ *   the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+   digraph plat_config_mechanism {
+	   rankdir=BT
+	   size="6,6"
+
+       "mali_base.h";
+	   "gpu/mali_gpu.h";
+
+	   node [ shape=box ];
+	   {
+	       rank = same; ordering = out;
+
+		   "gpu/mali_gpu_props.h";
+		   "base/midg_gpus/mali_t600.h";
+		   "base/midg_gpus/other_midg_gpu.h";
+	   }
+	   { rank = same; "plat/plat_config.h"; }
+	   {
+	       rank = same;
+		   "gpu/mali_gpu.h" [ shape=box ];
+		   gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+		   select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+	   }
+	   node [ shape=box ];
+	   { rank = same; "plat/plat_config.h"; }
+	   { rank = same; "mali_base.h"; }
+
+	   "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h";
+	   "mali_base.h" -> "plat/plat_config.h" ;
+	   "mali_base.h" -> select_gpu ;
+
+	   "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+	   gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+	   select_gpu -> "base/midg_gpus/mali_t600.h" ;
+	   select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+   }
+   @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algoirthm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * requried for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+     * No defined values, but starts at 0 and increases by one for each release
+     * status (alpha, beta, EAC, etc.).
+     * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/**
+	 * @usecase GPU clock speed is not specified in the Midgard Architecture, but is
+	 * <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+	 */
+	u32 gpu_speed_mhz;
+
+	/**
+	 * @usecase GPU clock max/min speed is required for computing best/worst case
+	 * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+	 *  Midgard Architecture).
+	 */
+	u32 gpu_freq_khz_max;
+	u32 gpu_freq_khz_min;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 stack_present;
+
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[3];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this datastructure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+	/** No flags set */
+	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+	/** Base context is a 'System Monitor' context for Hardware counters.
+	 *
+	 * One important side effect of this is that job submission is disabled. */
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+/** Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+} base_jd_replay_payload;
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+typedef struct base_jd_replay_payload_uk10_2 {
+	u64 tiler_jc_list;
+	u64 fragment_jc;
+	u64 tiler_heap_free;
+	u16 fragment_hierarchy_mask;
+	u16 tiler_hierarchy_mask;
+	u32 hierarchy_default_weight;
+	u16 tiler_core_req;
+	u16 fragment_core_req;
+	u8 padding[4];
+} base_jd_replay_payload_uk10_2;
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100755
index 0000000..4a98a72
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100755
index 0000000..be454a2
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100755
index 0000000..9ae188c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,625 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_uku.h>
+#include <mali_kbase_linux.h>
+
+/*
+ * Include mali_kbase_defs.h first as this provides types needed by other local
+ * header files.
+ */
+#include "mali_kbase_defs.h"
+
+#include "mali_kbase_context.h"
+#include "mali_kbase_strings.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+#include "mali_kbase_ipa.h"
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+/**
+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs
+ */
+
+/**
+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom);
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data);
+#endif
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom);
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+void kbasep_soft_job_timeout_worker(unsigned long data);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+/* api to be ported per OS, only need to do the raw register access */
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEBUG
+/**
+ * kbase_set_driver_inactive - Force driver to go inactive
+ * @kbdev:    Device pointer
+ * @inactive: true if driver should go inactive, false otherwise
+ *
+ * Forcing the driver inactive will cause all future IOCTLs to wait until the
+ * driver is made active again. This is intended solely for the use of tests
+ * which require that no jobs are running while the test executes.
+ */
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifndef RESET0_LEVEL
+extern u32  override_value_aml;
+extern void *reg_base_hiubus;
+#define RESET0_MASK    0x01
+#define RESET1_MASK    0x02
+#define RESET2_MASK    0x03
+
+#define RESET0_LEVEL    0x10
+#define RESET1_LEVEL    0x11
+#define RESET2_LEVEL    0x12
+#define Rd(r)                           readl((reg_base_hiubus) + ((r)<<2))
+#define Wr(r, v)                        writel((v), ((reg_base_hiubus) + ((r)<<2)))
+#define Mali_WrReg(unit, core, regnum, value)   kbase_reg_write(kbdev, (regnum), (value), NULL)
+#define Mali_RdReg(unit, core, regnum)          kbase_reg_read(kbdev, (regnum), NULL)
+#define stimulus_print   printk
+#define stimulus_display printk
+#define Mali_pwr_on(x)   Mali_pwr_on_with_kdev(kbdev, (x))
+#define Mali_pwr_off(x)  Mali_pwr_off_with_kdev(kbdev, (x))
+#endif
+
+
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/* kbase_io_history_init - initialize data struct for register access history
+ *
+ * @kbdev The register history to initialize
+ * @n The number of register accesses that the buffer could hold
+ *
+ * @return 0 if successfully initialized, failure otherwise
+ */
+int kbase_io_history_init(struct kbase_io_history *h, u16 n);
+
+/* kbase_io_history_term - uninit all resources for the register access history
+ *
+ * @h The register history to terminate
+ */
+void kbase_io_history_term(struct kbase_io_history *h);
+
+/* kbase_io_history_dump - print the register history to the kernel ring buffer
+ *
+ * @kbdev Pointer to kbase_device containing the register history to dump
+ */
+void kbase_io_history_dump(struct kbase_device *kbdev);
+
+/**
+ * kbase_io_history_resize - resize the register access history buffer.
+ *
+ * @h: Pointer to a valid register history to resize
+ * @new_size: Number of accesses the buffer could hold
+ *
+ * A successful resize will clear all recent register accesses.
+ * If resizing fails for any reason (e.g., could not allocate memory, invalid
+ * buffer size) then the original buffer will be kept intact.
+ *
+ * @return 0 if the buffer was resized, failure otherwise
+ */
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbase_io_history_resize CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+#endif
+
+
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100755
index 0000000..933aa28
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	phys_addr_t *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100755
index 0000000..099a298
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
new file mode 100644
index 0000000..f910fe9
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+
+static int kbase_as_fault_read(struct seq_file *sfile, void *data)
+{
+	uintptr_t as_no = (uintptr_t) sfile->private;
+
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	struct kbase_device *kbdev = NULL;
+
+	kbdev_list = kbase_dev_list_get();
+
+	list_for_each(entry, kbdev_list) {
+		kbdev = list_entry(entry, struct kbase_device, entry);
+
+		if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+
+			/* don't show this one again until another fault occors */
+			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
+
+			/* output the last page fault addr */
+			seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr);
+		}
+
+	}
+
+	kbase_dev_list_put(kbdev_list);
+
+	return 0;
+}
+
+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_as_fault_read , in->i_private);
+}
+
+static const struct file_operations as_fault_fops = {
+	.open = kbase_as_fault_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ *  Initialize debugfs entry for each address space
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	uint i;
+	char as_name[64];
+	struct dentry *debugfs_directory;
+
+	kbdev->debugfs_as_read_bitmap = 0ULL;
+
+	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
+
+	debugfs_directory = debugfs_create_dir("address_spaces",
+		kbdev->mali_debugfs_directory);
+
+	if(debugfs_directory) {
+		for(i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
+			debugfs_create_file(as_name, S_IRUGO,
+				debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops);
+		}
+	}
+	else
+		dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory");
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+	return;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
new file mode 100644
index 0000000..3ed2248
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
+#define _KBASE_AS_FAULT_DEBUG_FS_H
+
+/**
+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
+ *
+ * @kbdev: Pointer to kbase_device
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs
+ *
+ * @kbdev: Pointer to kbase_device
+ * @as_no: The address space the fault occurred on
+ */
+static inline void
+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_MALI_DEBUG */
+	return;
+}
+
+#endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100755
index 0000000..c67b3e9
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100755
index 0000000..0c18bdb
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100755
index 0000000..fb615ae
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100755
index 0000000..356d52b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,345 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see  kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+/**
+ * kbase_platform_fake_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_fake_register()
+ */
+void kbase_platform_fake_unregister(void);
+#endif
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100755
index 0000000..c78f394
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,222 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+ * Irq throttle. It is the minimum desired time in between two
+ * consecutive gpu interrupts (given in 'us'). The irq throttle
+ * gpu register will be configured after this, taking into
+ * account the configured max frequency.
+ *
+ * Attached value: number in micro seconds
+ */
+#define DEFAULT_IRQ_THROTTLE_TIME_US 20
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/*
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT ((u32)3000) /* 3s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100755
index 0000000..44939a1
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,324 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_mem_linux.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	if (is_compat)
+		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+			kbdev->mem_pool_max_size_default,
+			kctx->kbdev, &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_pool;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	atomic_set(&kctx->drain_pending, 0);
+
+	mutex_init(&kctx->reg_lock);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+#ifdef CONFIG_KDS
+	INIT_LIST_HEAD(&kctx->waiting_kds_resource);
+#endif
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kctx);
+	if (err)
+		goto term_dma_fence;
+
+	do {
+		err = kbase_mem_pool_grow(&kctx->mem_pool,
+				MIDGARD_MMU_BOTTOMLEVEL);
+		if (err)
+			goto pgd_no_mem;
+
+		mutex_lock(&kctx->mmu_lock);
+		kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+		mutex_unlock(&kctx->mmu_lock);
+	} while (!kctx->pgd);
+
+	kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev);
+	if (!kctx->aliasing_sink_page)
+		goto no_sink_page;
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	setup_timer(&kctx->soft_job_timeout,
+		    kbasep_soft_job_timeout_worker,
+		    (uintptr_t)kctx);
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+no_sink_page:
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+pgd_no_mem:
+	kbase_mmu_term(kctx);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_jd_zap_context(kctx);
+	kbase_event_cleanup(kctx);
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_pm_context_idle(kbdev);
+
+	kbase_dma_fence_term(kctx);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
new file mode 100644
index 0000000..a3f5bb0
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
@@ -0,0 +1,90 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+					enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+	/*
+	 * Earlier kernel versions doesn't have atomic_andnot() or
+	 * atomic_and(). atomic_clear_mask() was only available on some
+	 * architectures and removed on arm in v3.13 on arm and arm64.
+	 *
+	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+	 * when atomic_andnot() becomes available.
+	 */
+	int old, new;
+
+	do {
+		old = atomic_read(&kctx->flags);
+		new = old & ~flag;
+
+	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+	atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100755
index 0000000..aaf6ca1
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,4299 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_uku.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <backend/gpu/mali_kbase_devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include "mali_kbase_regs_history_debugfs.h"
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#endif /* CONFIG_KDS */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task */
+#include <linux/mman.h>
+#include <linux/version.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+#include <platform/mali_kbase_platform_fake.h>
+#endif /*CONFIG_MALI_PLATFORM_FAKE */
+#ifdef CONFIG_SYNC
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC */
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_PM_DEVFREQ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+
+#include <mali_kbase_config.h>
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <mali_kbase_tlstream.h>
+
+#include <mali_kbase_as_fault_debugfs.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+#if MALI_UNIT_TEST
+static struct kbase_exported_test_data shared_kernel_test_data;
+EXPORT_SYMBOL(shared_kernel_test_data);
+#endif /* MALI_UNIT_TEST */
+
+static int kbase_dev_nr;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+extern int mali_pm_statue;
+#endif
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+static inline void __compile_time_asserts(void)
+{
+	CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
+}
+
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	unsigned int                    as_nr;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu);
+	}
+
+	/* Create Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev);
+	}
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(
+				element->kctx,
+				(u32)(element->kctx->id),
+				(u32)(element->kctx->tgid));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream. */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space. */
+	kbase_tlstream_flush_streams();
+}
+
+static void kbase_api_handshake(struct uku_version_check_args *version)
+{
+	switch (version->major) {
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case 6:
+		/* We are backwards compatible with version 6,
+		 * so pretend to be the old version */
+		version->major = 6;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	case 7:
+		/* We are backwards compatible with version 7,
+		 * so pretend to be the old version */
+		version->major = 7;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case 8:
+		/* We are backwards compatible with version 8,
+		 * so pretend to be the old version */
+		version->major = 8;
+		version->minor = 4;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+#ifdef BASE_LEGACY_UK9_SUPPORT
+	case 9:
+		/* We are backwards compatible with version 9,
+		 * so pretend to be the old version */
+		version->major = 9;
+		version->minor = 0;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				(int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	inited_pm_runtime_init = (1u << 2),
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	inited_ipa = (1u << 9),
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+	inited_job_fault = (1u << 10),
+	inited_misc_register = (1u << 11),
+	inited_get_device = (1u << 12),
+	inited_sysfs_group = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_io_history = (1u << 18),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20)
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define INACTIVE_WAIT_MS (5000)
+
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive)
+{
+	kbdev->driver_inactive = inactive;
+	wake_up(&kbdev->driver_inactive_wait);
+
+	/* Wait for any running IOCTLs to complete */
+	if (inactive)
+		msleep(INACTIVE_WAIT_MS);
+}
+KBASE_EXPORT_TEST_API(kbase_set_driver_inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size)
+{
+	struct kbase_device *kbdev;
+	union uk_header *ukh = args;
+	u32 id;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(ukh != NULL);
+
+	kbdev = kctx->kbdev;
+	id = ukh->id;
+	ukh->ret = MALI_ERROR_NONE; /* Be optimistic */
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_event(kbdev->driver_inactive_wait,
+			kbdev->driver_inactive == false);
+#endif /* CONFIG_MALI_DEBUG */
+
+	if (UKP_FUNC_ID_CHECK_VERSION == id) {
+		struct uku_version_check_args *version_check;
+
+		if (args_size != sizeof(struct uku_version_check_args)) {
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			return 0;
+		}
+		version_check = (struct uku_version_check_args *)args;
+		kbase_api_handshake(version_check);
+		/* save the proposed version number for later use */
+		kctx->api_version = KBASE_API_VERSION(version_check->major,
+				version_check->minor);
+		ukh->ret = MALI_ERROR_NONE;
+		return 0;
+	}
+
+	/* block calls until version handshake */
+	if (kctx->api_version == 0)
+		return -EINVAL;
+
+	if (!atomic_read(&kctx->setup_complete)) {
+		struct kbase_uk_set_flags *kbase_set_flags;
+
+		/* setup pending, try to signal that we'll do the setup,
+		 * if setup was already in progress, err this call
+		 */
+		if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+			return -EINVAL;
+
+		/* if unexpected call, will stay stuck in setup mode
+		 * (is it the only call we accept?)
+		 */
+		if (id != KBASE_FUNC_SET_FLAGS)
+			return -EINVAL;
+
+		kbase_set_flags = (struct kbase_uk_set_flags *)args;
+
+		/* if not matching the expected call, stay in setup mode */
+		if (sizeof(*kbase_set_flags) != args_size)
+			goto bad_size;
+
+		/* if bad flags, will stay stuck in setup mode */
+		if (kbase_context_set_create_flags(kctx,
+				kbase_set_flags->create_flags) != 0)
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+		atomic_set(&kctx->setup_complete, 1);
+		return 0;
+	}
+
+	/* setup complete, perform normal operation */
+	switch (id) {
+	case KBASE_FUNC_MEM_JIT_INIT:
+		{
+			struct kbase_uk_mem_jit_init *jit_init = args;
+
+			if (sizeof(*jit_init) != args_size)
+				goto bad_size;
+
+			if (kbase_region_tracker_init_jit(kctx,
+					jit_init->va_pages))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_ALLOC:
+		{
+			struct kbase_uk_mem_alloc *mem = args;
+			struct kbase_va_region *reg;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+#if defined(CONFIG_64BIT)
+			if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+				/* force SAME_VA if a 64-bit client */
+				mem->flags |= BASE_MEM_SAME_VA;
+			}
+#endif
+
+			reg = kbase_mem_alloc(kctx, mem->va_pages,
+					mem->commit_pages, mem->extent,
+					&mem->flags, &mem->gpu_va);
+			mem->va_alignment = 0;
+
+			if (!reg)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_IMPORT: {
+			struct kbase_uk_mem_import *mem_import = args;
+			void __user *phandle;
+
+			if (sizeof(*mem_import) != args_size)
+				goto bad_size;
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				phandle = compat_ptr(mem_import->phandle.compat_value);
+			else
+#endif
+				phandle = mem_import->phandle.value;
+
+			if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_import(kctx,
+					(enum base_mem_import_type)
+					mem_import->type,
+					phandle,
+					&mem_import->gpu_va,
+					&mem_import->va_pages,
+					&mem_import->flags)) {
+				mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+	}
+	case KBASE_FUNC_MEM_ALIAS: {
+			struct kbase_uk_mem_alias *alias = args;
+			struct base_mem_aliasing_info __user *user_ai;
+			struct base_mem_aliasing_info *ai;
+
+			if (sizeof(*alias) != args_size)
+				goto bad_size;
+
+			if (alias->nents > 2048) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (!alias->nents) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				user_ai = compat_ptr(alias->ai.compat_value);
+			else
+#endif
+				user_ai = alias->ai.value;
+
+			ai = vmalloc(sizeof(*ai) * alias->nents);
+
+			if (!ai) {
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			}
+
+			if (copy_from_user(ai, user_ai,
+					   sizeof(*ai) * alias->nents)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto copy_failed;
+			}
+
+			alias->gpu_va = kbase_mem_alias(kctx, &alias->flags,
+							alias->stride,
+							alias->nents, ai,
+							&alias->va_pages);
+			if (!alias->gpu_va) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto no_alias;
+			}
+no_alias:
+copy_failed:
+			vfree(ai);
+			break;
+		}
+	case KBASE_FUNC_MEM_COMMIT:
+		{
+			struct kbase_uk_mem_commit *commit = args;
+
+			if (sizeof(*commit) != args_size)
+				goto bad_size;
+
+			if (commit->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_commit(kctx, commit->gpu_addr,
+					commit->pages,
+					(base_backing_threshold_status *)
+					&commit->result_subcode) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	case KBASE_FUNC_MEM_QUERY:
+		{
+			struct kbase_uk_mem_query *query = args;
+
+			if (sizeof(*query) != args_size)
+				goto bad_size;
+
+			if (query->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE &&
+			    query->query != KBASE_MEM_QUERY_VA_SIZE &&
+				query->query != KBASE_MEM_QUERY_FLAGS) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query);
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_query(kctx, query->gpu_addr,
+					query->query, &query->value) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			break;
+		}
+		break;
+
+	case KBASE_FUNC_MEM_FLAGS_CHANGE:
+		{
+			struct kbase_uk_mem_flags_change *fc = args;
+
+			if (sizeof(*fc) != args_size)
+				goto bad_size;
+
+			if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_flags_change(kctx, fc->gpu_va,
+					fc->flags, fc->mask) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+	case KBASE_FUNC_MEM_FREE:
+		{
+			struct kbase_uk_mem_free *mem = args;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+			if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_free(kctx, mem->gpu_addr) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_JOB_SUBMIT:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+			if (kbase_jd_submit(kctx, job, 0) != 0)
+#else
+			if (kbase_jd_submit(kctx, job) != 0)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case KBASE_FUNC_JOB_SUBMIT_UK6:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+			if (kbase_jd_submit(kctx, job, 1) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif
+
+	case KBASE_FUNC_SYNC:
+		{
+			struct kbase_uk_sync_now *sn = args;
+
+			if (sizeof(*sn) != args_size)
+				goto bad_size;
+
+			if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifndef CONFIG_MALI_COH_USER
+			if (kbase_sync_now(kctx, &sn->sset) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif
+			break;
+		}
+
+	case KBASE_FUNC_DISJOINT_QUERY:
+		{
+			struct kbase_uk_disjoint_query *dquery = args;
+
+			if (sizeof(*dquery) != args_size)
+				goto bad_size;
+
+			/* Get the disjointness counter value. */
+			dquery->counter = kbase_disjoint_event_get(kctx->kbdev);
+			break;
+		}
+
+	case KBASE_FUNC_POST_TERM:
+		{
+			kbase_event_close(kctx);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_SETUP:
+		{
+			struct kbase_uk_hwcnt_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx,
+					&kctx->vinstr_cli, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_DUMP:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_dump(kctx->vinstr_cli,
+					BASE_HWCNT_READER_EVENT_MANUAL) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_CLEAR:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_clear(kctx->vinstr_cli) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_READER_SETUP:
+		{
+			struct kbase_uk_hwcnt_reader_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx,
+					setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_GPU_PROPS_REG_DUMP:
+		{
+			struct kbase_uk_gpuprops *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (kbase_gpuprops_uk_get_props(kctx, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_FIND_CPU_OFFSET:
+		{
+			struct kbase_uk_find_cpu_offset *find = args;
+
+			if (sizeof(*find) != args_size)
+				goto bad_size;
+
+			if (find->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid");
+				goto out_bad;
+			}
+
+			if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else {
+				int err;
+
+				err = kbasep_find_enclosing_cpu_mapping_offset(
+						kctx,
+						find->cpu_addr,
+						find->size,
+						&find->offset);
+
+				if (err)
+					ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+		}
+	case KBASE_FUNC_GET_VERSION:
+		{
+			struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args;
+
+			if (sizeof(*get_version) != args_size)
+				goto bad_size;
+
+			/* version buffer size check is made in compile time assert */
+			memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+			get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+			break;
+		}
+
+	case KBASE_FUNC_STREAM_CREATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args;
+
+			if (sizeof(*screate) != args_size)
+				goto bad_size;
+
+			if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) {
+				/* not NULL terminated */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_stream_create(screate->name, &screate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#else /* CONFIG_SYNC */
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+	case KBASE_FUNC_FENCE_VALIDATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args;
+
+			if (sizeof(*fence_validate) != args_size)
+				goto bad_size;
+
+			if (kbase_fence_validate(fence_validate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+
+	case KBASE_FUNC_SET_TEST_DATA:
+		{
+#if MALI_UNIT_TEST
+			struct kbase_uk_set_test_data *set_data = args;
+
+			shared_kernel_test_data = set_data->test_data;
+			shared_kernel_test_data.kctx.value = (void __user *)kctx;
+			shared_kernel_test_data.mm.value = (void __user *)current->mm;
+			ukh->ret = MALI_ERROR_NONE;
+#endif /* MALI_UNIT_TEST */
+			break;
+		}
+
+	case KBASE_FUNC_INJECT_ERROR:
+		{
+#ifdef CONFIG_MALI_ERROR_INJECT
+			unsigned long flags;
+			struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (job_atom_inject_error(&params) != 0)
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_ERROR_INJECT */
+			break;
+		}
+
+	case KBASE_FUNC_MODEL_CONTROL:
+		{
+#ifdef CONFIG_MALI_NO_MALI
+			unsigned long flags;
+			struct kbase_model_control_params params =
+					((struct kbase_uk_model_control_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (gpu_model_control(kbdev->model, &params) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_NO_MALI */
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case KBASE_FUNC_KEEP_GPU_POWERED:
+		{
+			dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_KEEP_GPU_POWERED: function is deprecated and disabled\n");
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	case KBASE_FUNC_GET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i);
+
+			break;
+		}
+
+	/* used only for testing purposes; these controls are to be set by gator through gator API */
+	case KBASE_FUNC_SET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				_mali_profiling_control(i, controls->profiling_controls[i]);
+
+			break;
+		}
+
+	case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD:
+		{
+			struct kbase_uk_debugfs_mem_profile_add *add_data =
+					(struct kbase_uk_debugfs_mem_profile_add *)args;
+			char *buf;
+			char __user *user_buf;
+
+			if (sizeof(*add_data) != args_size)
+				goto bad_size;
+
+			if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+				dev_err(kbdev->dev, "buffer too big\n");
+				goto out_bad;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				user_buf = compat_ptr(add_data->buf.compat_value);
+			else
+#endif
+				user_buf = add_data->buf.value;
+
+			buf = kmalloc(add_data->len, GFP_KERNEL);
+			if (ZERO_OR_NULL_PTR(buf))
+				goto out_bad;
+
+			if (0 != copy_from_user(buf, user_buf, add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			if (kbasep_mem_profile_debugfs_insert(kctx, buf,
+							add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			break;
+		}
+
+#ifdef CONFIG_MALI_NO_MALI
+	case KBASE_FUNC_SET_PRFCNT_VALUES:
+		{
+
+			struct kbase_uk_prfcnt_values *params =
+			  ((struct kbase_uk_prfcnt_values *)args);
+			gpu_model_set_dummy_prfcnt_sample(params->data,
+					params->size);
+
+			break;
+		}
+#endif /* CONFIG_MALI_NO_MALI */
+#ifdef BASE_LEGACY_UK10_4_SUPPORT
+	case KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4:
+		{
+			struct kbase_uk_tlstream_acquire_v10_4 *tlstream_acquire
+					= args;
+
+			if (sizeof(*tlstream_acquire) != args_size)
+				goto bad_size;
+
+			if (0 != kbase_tlstream_acquire(
+						kctx,
+						&tlstream_acquire->fd, 0)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else if (0 <= tlstream_acquire->fd) {
+				/* Summary stream was cleared during acquire.
+				 * Create static timeline objects that will be
+				 * read by client. */
+				kbase_create_timeline_objects(kctx);
+			}
+			break;
+		}
+#endif /* BASE_LEGACY_UK10_4_SUPPORT */
+	case KBASE_FUNC_TLSTREAM_ACQUIRE:
+		{
+			struct kbase_uk_tlstream_acquire *tlstream_acquire =
+				args;
+
+			if (sizeof(*tlstream_acquire) != args_size)
+				goto bad_size;
+
+			if (tlstream_acquire->flags & ~BASE_TLSTREAM_FLAGS_MASK)
+				goto out_bad;
+
+			if (0 != kbase_tlstream_acquire(
+						kctx,
+						&tlstream_acquire->fd,
+						tlstream_acquire->flags)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else if (0 <= tlstream_acquire->fd) {
+				/* Summary stream was cleared during acquire.
+				 * Create static timeline objects that will be
+				 * read by client. */
+				kbase_create_timeline_objects(kctx);
+			}
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_FLUSH:
+		{
+			struct kbase_uk_tlstream_flush *tlstream_flush =
+				args;
+
+			if (sizeof(*tlstream_flush) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_flush_streams();
+			break;
+		}
+#if MALI_UNIT_TEST
+	case KBASE_FUNC_TLSTREAM_TEST:
+		{
+			struct kbase_uk_tlstream_test *tlstream_test = args;
+
+			if (sizeof(*tlstream_test) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_test(
+					tlstream_test->tpw_count,
+					tlstream_test->msg_delay,
+					tlstream_test->msg_count,
+					tlstream_test->aux_msg);
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_STATS:
+		{
+			struct kbase_uk_tlstream_stats *tlstream_stats = args;
+
+			if (sizeof(*tlstream_stats) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_stats(
+					&tlstream_stats->bytes_collected,
+					&tlstream_stats->bytes_generated);
+			break;
+		}
+#endif /* MALI_UNIT_TEST */
+
+	case KBASE_FUNC_GET_CONTEXT_ID:
+		{
+			struct kbase_uk_context_id *info = args;
+
+			info->id = kctx->id;
+			break;
+		}
+
+	case KBASE_FUNC_SOFT_EVENT_UPDATE:
+		{
+			struct kbase_uk_soft_event_update *update = args;
+
+			if (sizeof(*update) != args_size)
+				goto bad_size;
+
+			if (((update->new_status != BASE_JD_SOFT_EVENT_SET) &&
+			    (update->new_status != BASE_JD_SOFT_EVENT_RESET)) ||
+			    (update->flags != 0))
+				goto out_bad;
+
+			if (kbase_soft_event_update(kctx, update->evt,
+						update->new_status))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	default:
+		dev_err(kbdev->dev, "unknown ioctl %u\n", id);
+		goto out_bad;
+	}
+
+	return ret;
+
+ bad_size:
+	dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id);
+ out_bad:
+	return -EINVAL;
+}
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strncmp(irq_res->name, "JOB", 4)) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "MMU", 4)) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "GPU", 4)) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
+/*
+ * Older versions, before v4.6, of the kernel doesn't have
+ * kstrtobool_from_user().
+ */
+static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+	char buf[32];
+
+	count = min(sizeof(buf), count);
+
+	if (copy_from_user(buf, s, count))
+		return -EFAULT;
+	buf[count] = '\0';
+
+	return strtobool(buf, res);
+}
+#endif
+
+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+	else
+		kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE);
+
+	return size;
+}
+
+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_infinite_cache_fops = {
+	.open = simple_open,
+	.write = write_ctx_infinite_cache,
+	.read = read_ctx_infinite_cache,
+};
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kctx = kbase_create_context(kbdev, is_compat_task());
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+	kctx->filp = filp;
+
+	if (kbdev->infinite_cache_active_default)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_COH_USER
+	 /* if cache is completely coherent at hardware level, then remove the
+	  * infinite cache control support from debugfs.
+	  */
+#else
+	debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
+			    kctx, &kbase_infinite_cache_fops);
+#endif /* CONFIG_MALI_COH_USER */
+
+	mutex_init(&kctx->mem_profile_lock);
+
+	kbasep_jd_debugfs_ctx_init(kctx);
+	kbase_debug_mem_view_init(filp);
+
+	kbase_debug_job_fault_context_init(kctx);
+
+	kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool);
+
+	kbase_jit_debugfs_init(kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			KBASE_TLSTREAM_TL_NEW_CTX(
+					element->kctx,
+					(u32)(element->kctx->id),
+					(u32)(element->kctx->tgid));
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+	KBASE_TLSTREAM_TL_DEL_CTX(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(kctx->kctx_dentry);
+	kbasep_mem_profile_debugfs_remove(kctx);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	if (kctx->vinstr_cli) {
+		struct kbase_uk_hwcnt_setup setup;
+
+		setup.dump_buffer = 0llu;
+		kbase_vinstr_legacy_hwc_setup(
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup);
+	}
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+#define CALL_MAX_SIZE 536
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull };	/* alignment fixup */
+	u32 size = _IOC_SIZE(cmd);
+	struct kbase_context *kctx = filp->private_data;
+
+	if (size > CALL_MAX_SIZE)
+		return -ENOTTY;
+
+	if (0 != copy_from_user(&msg, (void __user *)arg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n");
+		return -EFAULT;
+	}
+
+	if (kbase_dispatch(kctx, &msg, size) != 0)
+		return -EFAULT;
+
+	if (0 != copy_to_user((void __user *)arg, &msg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+
+/**
+ * align_and_check - Align the specified pointer to the provided alignment and
+ *                   check that it is still in range.
+ * @gap_end:        Highest possible start address for allocation (end of gap in
+ *                  address space)
+ * @gap_start:      Start address of current memory area / gap in address space
+ * @info:           vm_unmapped_area_info structure passed to caller, containing
+ *                  alignment, length and limits for the allocation
+ * @is_shader_code: True if the allocation is for shader code (which has
+ *                  additional alignment requirements)
+ *
+ * Return: true if gap_end is now aligned correctly and is still in range,
+ *         false otherwise
+ */
+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
+		struct vm_unmapped_area_info *info, bool is_shader_code)
+{
+	/* Compute highest gap address at the desired alignment */
+	(*gap_end) -= info->length;
+	(*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
+
+	if (is_shader_code) {
+		/* Check for 4GB boundary */
+		if (0 == (*gap_end & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+		if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+
+		if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
+				info->length) & BASE_MEM_MASK_4GB))
+			return false;
+	}
+
+
+	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
+		return false;
+
+
+	return true;
+}
+
+/* The following function is taken from the kernel and just
+ * renamed. As it's not exported to modules we must copy-paste it here.
+ */
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info, bool is_shader_code)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit) {
+		if (align_and_check(&gap_end, gap_start, info, is_shader_code))
+			return gap_end;
+	}
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length) {
+			/* We found a suitable gap. Clip it with the original
+			 * high_limit. */
+			if (gap_end > info->high_limit)
+				gap_end = info->high_limit;
+
+			if (align_and_check(&gap_end, gap_start, info,
+					is_shader_code))
+				return gap_end;
+		}
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+	return -ENOMEM;
+}
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	/* based on get_unmapped_area, but simplified slightly due to that some
+	 * values are known in advance */
+	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+	unsigned long align_offset = 0;
+	unsigned long align_mask = 0;
+	unsigned long high_limit = mm->mmap_base;
+	unsigned long low_limit = PAGE_SIZE;
+	int cpu_va_bits = BITS_PER_LONG;
+	int gpu_pc_bits =
+	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	bool is_shader_code = false;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+#ifdef CONFIG_64BIT
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+
+		if (kbase_hw_has_feature(kctx->kbdev,
+						BASE_HW_FEATURE_33BIT_VA)) {
+			high_limit = kctx->same_va_end << PAGE_SHIFT;
+		} else {
+			high_limit = min_t(unsigned long, mm->mmap_base,
+					(kctx->same_va_end << PAGE_SHIFT));
+			if (len >= SZ_2M) {
+				align_offset = SZ_2M;
+				align_mask = SZ_2M - 1;
+			}
+		}
+
+		low_limit = SZ_2M;
+	} else {
+		cpu_va_bits = 32;
+	}
+#endif /* CONFIG_64BIT */
+	if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
+		(PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
+			int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+			if (!kctx->pending_regions[cookie])
+				return -EINVAL;
+
+			if (!(kctx->pending_regions[cookie]->flags &
+							KBASE_REG_GPU_NX)) {
+				if (cpu_va_bits > gpu_pc_bits) {
+					align_offset = 1ULL << gpu_pc_bits;
+					align_mask = align_offset - 1;
+					is_shader_code = true;
+				}
+			}
+#ifndef CONFIG_64BIT
+	} else {
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+						      flags);
+#endif
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = low_limit;
+	info.high_limit = high_limit;
+	info.align_offset = align_offset;
+	info.align_mask = align_mask;
+
+	return kbase_unmapped_area_topdown(&info, is_shader_code);
+}
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+	.get_unmapped_area = kbase_get_unmapped_area,
+};
+
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value)
+{
+	writel(value, kbdev->reg + offset);
+}
+
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset)
+{
+	return readl(kbdev->reg + offset);
+}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+/** Show callback for the @c power_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c power_policy sysfs file.
+ *
+ * This function is called when the @c power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls @ref kbase_pm_set_policy to change the
+ * policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/** Show callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called when the @c core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls @ref kbase_pm_set_policy to change
+ * the policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/** Show callback for the @c core_mask sysfs file.
+ *
+ * This function is called to get the contents of the @c core_mask sysfs
+ * file.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/** Store callback for the @c core_mask sysfs file.
+ *
+ * This function is called when the @c core_mask sysfs file is written to.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	if (items == 1 || items == 3) {
+		u64 shader_present =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		u64 group0_core_mask =
+				kbdev->gpu_props.props.coherency_info.group[0].
+				core_mask;
+
+		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+				!(new_core_mask[0] & group0_core_mask) ||
+			(new_core_mask[1] & shader_present) !=
+						new_core_mask[1] ||
+				!(new_core_mask[1] & group0_core_mask) ||
+			(new_core_mask[2] & shader_present) !=
+						new_core_mask[2] ||
+				!(new_core_mask[2] & group0_core_mask)) {
+			dev_err(dev, "power_policy: invalid core specification\n");
+			return -EINVAL;
+		}
+
+		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+				kbdev->pm.debug_core_mask[1] !=
+						new_core_mask[1] ||
+				kbdev->pm.debug_core_mask[2] !=
+						new_core_mask[2]) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+					new_core_mask[1], new_core_mask[2]);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+		"Use format <core_mask>\n"
+		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_job_timeout() - Store callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software jobs. Waiting soft event wait
+ * jobs will be cancelled after this period expires, while soft fence wait jobs
+ * will print debug information if the fence debug feature is enabled.
+ *
+ * This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_job_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_job_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) ||
+	    (soft_job_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+		   soft_job_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_job_timeout() - Show callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_job_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_job_timeout, set_soft_job_timeout);
+
+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
+				int default_ticks, u32 old_ticks)
+{
+	if (timeout_ms > 0) {
+		u64 ticks = timeout_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_period_ns);
+		if (!ticks)
+			return 1;
+		return ticks;
+	} else if (timeout_ms < 0) {
+		return default_ticks;
+	} else {
+		return old_ticks;
+	}
+}
+
+/** Store callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as JS_SOFT_STOP_TICKS, JS_HARD_STOP_TICKS_SS,
+ * JS_HARD_STOP_TICKS_DUMPING, JS_RESET_TICKS_SS, JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		struct kbasep_js_device_data *js_data = &kbdev->js_data;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
+	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
+			default, js_data->ticks_name); \
+	dev_dbg(kbdev->dev, "Overriding " #ticks_name \
+			" with %lu ticks (%lu ms)\n", \
+			(unsigned long)js_data->ticks_name, \
+			ms_name); \
+	} while (0)
+
+		UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms,
+				DEFAULT_JS_SOFT_STOP_TICKS);
+		UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl,
+				DEFAULT_JS_SOFT_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_HARD_STOP_TICKS_SS_8408 :
+				DEFAULT_JS_HARD_STOP_TICKS_SS);
+		UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl,
+				DEFAULT_JS_HARD_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping,
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING);
+		UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_RESET_TICKS_SS_8408 :
+				DEFAULT_JS_RESET_TICKS_SS);
+		UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl,
+				DEFAULT_JS_RESET_TICKS_CL);
+		UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping,
+				DEFAULT_JS_RESET_TICKS_DUMPING);
+
+		kbase_js_set_timeouts(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+static unsigned long get_js_timeout_in_ms(
+		u32 scheduling_period_ns,
+		u32 ticks)
+{
+	u64 ms = (u64)ticks * scheduling_period_ns;
+
+	do_div(ms, 1000000UL);
+	return ms;
+}
+
+/** Show callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\
+		scheduling_period_ns, \
+		kbdev->js_data.name)
+
+	js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks);
+	js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl);
+	js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss);
+	js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl);
+	js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping);
+	js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss);
+	js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl);
+	js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef GET_TIMEOUT
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+static u32 get_new_js_timeout(
+		u32 old_period,
+		u32 old_ticks,
+		u32 new_scheduling_period_ns)
+{
+	u64 ticks = (u64)old_period * (u64)old_ticks;
+	do_div(ticks, new_scheduling_period_ns);
+	return ticks?ticks:1;
+}
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	struct kbasep_js_device_data *js_data;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	js_data = &kbdev->js_data;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&js_data->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	old_period = js_data->scheduling_period_ns;
+
+#define SET_TIMEOUT(name) \
+		(js_data->name = get_new_js_timeout(\
+				old_period, \
+				kbdev->js_data.name, \
+				new_scheduling_period_ns))
+
+	SET_TIMEOUT(soft_stop_ticks);
+	SET_TIMEOUT(soft_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_ss);
+	SET_TIMEOUT(hard_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_dumping);
+	SET_TIMEOUT(gpu_reset_ticks_ss);
+	SET_TIMEOUT(gpu_reset_ticks_cl);
+	SET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef SET_TIMEOUT
+
+	js_data->scheduling_period_ns = new_scheduling_period_ns;
+
+	kbase_js_set_timeouts(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&js_data->runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/** Store callback for the @c force_replay sysfs file.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/** Show callback for the @c force_replay sysfs file.
+ *
+ * This function is called to get the contents of the @c force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c force_replay.
+ *
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present. The ability to
+ * enable soft-stop when only a single context is present can be used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/** Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/** Show callback for the @c debug_command sysfs file.
+ *
+ * This function is called to get the contents of the @c debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c debug_command sysfs file.
+ *
+ * This function is called when the @c debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @ref debug_commands to issue the command.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example:
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
+		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-THEx" },
+		{ .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-TSIx" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s MP%d r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_max_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+#ifdef CONFIG_DEBUG_FS
+
+/* Number of entries in serialize_jobs_settings[] */
+#define NR_SERIALIZE_JOBS_SETTINGS 5
+/* Maximum string length in serialize_jobs_settings[].name */
+#define MAX_SERIALIZE_JOBS_NAME_LEN 16
+
+static struct
+{
+	char *name;
+	u8 setting;
+} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = {
+	{"none", 0},
+	{"intra-slot", KBASE_SERIALIZE_INTRA_SLOT},
+	{"inter-slot", KBASE_SERIALIZE_INTER_SLOT},
+	{"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT},
+	{"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT |
+			KBASE_SERIALIZE_RESET}
+};
+
+/**
+ * kbasep_serialize_jobs_seq_show: Show callback for the serialize_jobs debugfs
+ *                                 file
+ * @sfile: seq_file pointer
+ * @data:  Private callback data
+ *
+ * This function is called to get the contents of the serialize_jobs debugfs
+ * file. This is a list of the available settings with the currently active one
+ * surrounded by square brackets.
+ *
+ * Return: 0 on success, or an error code on error
+ */
+static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_device *kbdev = sfile->private;
+	int i;
+
+	CSTD_UNUSED(data);
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting)
+			seq_printf(sfile, "[%s] ",
+					serialize_jobs_settings[i].name);
+		else
+			seq_printf(sfile, "%s ",
+					serialize_jobs_settings[i].name);
+	}
+
+	seq_puts(sfile, "\n");
+
+	return 0;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_write: Store callback for the serialize_jobs
+ *                                      debugfs file.
+ * @file:  File pointer
+ * @ubuf:  User buffer containing data to store
+ * @count: Number of bytes in user buffer
+ * @ppos:  File position
+ *
+ * This function is called when the serialize_jobs debugfs file is written to.
+ * It matches the requested setting against the available settings and if a
+ * matching setting is found updates kbdev->serialize_jobs.
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	struct seq_file *s = file->private_data;
+	struct kbase_device *kbdev = s->private;
+	char buf[MAX_SERIALIZE_JOBS_NAME_LEN];
+	int i;
+	bool valid = false;
+
+	CSTD_UNUSED(ppos);
+
+	count = min_t(size_t, sizeof(buf) - 1, count);
+	if (copy_from_user(buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
+			kbdev->serialize_jobs =
+					serialize_jobs_settings[i].setting;
+			valid = true;
+			break;
+		}
+	}
+
+	if (!valid) {
+		dev_err(kbdev->dev, "serialize_jobs: invalid setting\n");
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_open: Open callback for the serialize_jobs
+ *                                     debugfs file
+ * @in:   inode pointer
+ * @file: file pointer
+ *
+ * Return: Zero on success, error code on failure
+ */
+static int kbasep_serialize_jobs_debugfs_open(struct inode *in,
+		struct file *file)
+{
+	return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
+	.open = kbasep_serialize_jobs_debugfs_open,
+	.read = seq_read,
+	.write = kbasep_serialize_jobs_debugfs_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_DEBUG_FS */
+
+static int kbasep_protected_mode_enter(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
+
+static bool kbasep_protected_mode_supported(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static struct kbase_protected_ops kbasep_protected_ops = {
+	.protected_mode_enter = kbasep_protected_mode_enter,
+	.protected_mode_reset = NULL,
+	.protected_mode_supported = kbasep_protected_mode_supported,
+};
+
+static void kbasep_protected_mode_init(struct kbase_device *kbdev)
+{
+	kbdev->protected_ops = NULL;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native protected ops */
+		kbdev->protected_ops = &kbasep_protected_ops;
+	}
+#ifdef PROTECTED_CALLBACKS
+	else
+		kbdev->protected_ops = PROTECTED_CALLBACKS;
+#endif
+
+	if (kbdev->protected_ops)
+		kbdev->protected_mode_support =
+				kbdev->protected_ops->protected_mode_supported(kbdev);
+	else
+		kbdev->protected_mode_support = false;
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = -ENOMEM;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return 0;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \
+	|| defined(LSK_OPPV2_BACKPORT)
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
+ * @file: File object to read is for
+ * @buf:  User buffer to populate with data
+ * @len:  Length of user buffer
+ * @ppos: Offset within file object
+ *
+ * Retrieves the current status of protected debug mode
+ * (0 = disabled, 1 = enabled)
+ *
+ * Return: Number of bytes added to user buffer
+ */
+static ssize_t debugfs_protected_debug_mode_read(struct file *file,
+				char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)file->private_data;
+	u32 gpu_status;
+	ssize_t ret_val;
+
+	kbase_pm_context_active(kbdev);
+	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL);
+	kbase_pm_context_idle(kbdev);
+
+	if (gpu_status & GPU_DBGEN)
+		ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2);
+	else
+		ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2);
+
+	return ret_val;
+}
+
+/*
+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops
+ *
+ * Contains the file operations for the "protected_debug_mode" debugfs file
+ */
+static const struct file_operations fops_protected_debug_mode = {
+	.open = simple_open,
+	.read = debugfs_protected_debug_mode_read,
+	.llseek = default_llseek,
+};
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_init(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+	kbasep_regs_history_debugfs_init(kbdev);
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+	kbase_as_fault_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_COH_USER
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+#endif /* CONFIG_MALI_COH_USER */
+
+	debugfs_create_size_t("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_max_size_default);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		debugfs_create_file("protected_debug_mode", S_IRUGO,
+				kbdev->mali_debugfs_directory, kbdev,
+				&fops_protected_debug_mode);
+	}
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_serialize_jobs_debugfs_fops);
+#endif /* CONFIG_DEBUG_FS */
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_job_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_availability_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	if (kbdev->inited_subsys & inited_ipa) {
+		kbase_ipa_term(kbdev->ipa_ctx);
+		kbdev->inited_subsys &= ~inited_ipa;
+	}
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_pm_runtime_init) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+		kbdev->inited_subsys &= ~inited_pm_runtime_init;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+	if (kbdev->inited_subsys & inited_io_history) {
+		kbase_io_history_term(&kbdev->io_history);
+		kbdev->inited_subsys &= ~inited_io_history;
+	}
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+
+/* Number of register accesses for the buffer that we allocate during
+ * initialization time. The buffer size can be changed later via debugfs. */
+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+#ifdef CONFIG_OF
+	err = kbase_platform_early_init();
+	if (err) {
+		dev_err(&pdev->dev, "Early platform initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+#endif
+
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+	err = kbase_io_history_init(&kbdev->io_history,
+			KBASEP_DEFAULT_REGISTER_HISTORY_SIZE);
+	if (err) {
+		dev_err(&pdev->dev, "Register access history initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+	kbdev->inited_subsys |= inited_io_history;
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+	core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+
+	kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	if (kbdev->pm.callback_power_runtime_init) {
+		err = kbdev->pm.callback_power_runtime_init(kbdev);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Runtime PM initialization failed\n");
+			kbase_platform_device_remove(pdev);
+			return err;
+		}
+		kbdev->inited_subsys |= inited_pm_runtime_init;
+	}
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, gpu_id);
+
+	kbasep_protected_mode_init(kbdev);
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	err = kbase_devfreq_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Fevfreq initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_devfreq;
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	kbdev->ipa_ctx = kbase_ipa_init(kbdev);
+	if (!kbdev->ipa_ctx) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+
+	kbdev->inited_subsys |= inited_ipa;
+#endif  /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	/* initialize the kctx list */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->inited_subsys |= inited_get_device;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+
+	return err;
+}
+
+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE
+
+
+/** Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/** Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @param dev  The device to resume
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+	return 0;
+}
+
+/** Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in which it will
+ * not be able to communicate with the CPU(s) and RAM due to power management.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/** Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+#ifndef CONFIG_MALI_DEVFREQ
+static int mali_os_freeze(struct device *device)
+{
+	mali_dev_freeze();
+	return kbase_device_suspend(device);
+}
+
+static int mali_os_restore(struct device *device)
+{
+	mali_dev_restore();
+	return kbase_device_resume(device);
+}
+#endif
+
+
+/** The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifndef CONFIG_MALI_DEVFREQ
+	.freeze = mali_os_freeze,
+	.thaw = kbase_device_resume,
+	.restore = mali_os_restore,
+#endif
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_early_init();
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	ret = kbase_platform_fake_register();
+	if (ret)
+		return ret;
+#endif
+	ret = platform_driver_register(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	if (ret)
+		kbase_platform_fake_unregister();
+#endif
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	kbase_platform_fake_unregister();
+#endif
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100755
index 0000000..fb57ac2
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100755
index 0000000..5fff289
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100755
index 0000000..83c5c37
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,502 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				 kbase_is_job_fault_event_pending(kbdev)))
+			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		if (event->katom->need_cache_flush_cores_retained) {
+			kbase_gpu_cacheclean(kbdev, event->katom);
+			event->katom->need_cache_flush_cores_retained = 0;
+		}
+
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100755
index 0000000..a2bf898
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,96 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100755
index 0000000..05b6a05
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,281 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list)) {
+		kfree(data);
+		return NULL;
+	}
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct rb_node *p;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (p = rb_first(&kctx->reg_rbtree); p; p = rb_next(p)) {
+		struct kbase_va_region *reg;
+		struct debug_mem_mapping *mapping;
+
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			kbase_gpu_vm_unlock(kctx);
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100755
index 0000000..20ab51a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100755
index 0000000..4fa7c9b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,1562 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_mmu_mode.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_pm.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#include <linux/bus_logger.h>
+#endif
+
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#endif				/* CONFIG_SYNC */
+
+#include "mali_kbase_dma_fence.h"
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL    0
+#else
+#define MIDGARD_MMU_TOPLEVEL    1
+#endif
+
+#define MIDGARD_MMU_BOTTOMLEVEL 3
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+/* Serialize atoms within a slot (ie only one atom per job slot) */
+#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0)
+/* Serialize atoms between slots (ie only one job slot running at any time) */
+#define KBASE_SERIALIZE_INTER_SLOT (1 << 1)
+/* Reset the GPU after each atom completion */
+#define KBASE_SERIALIZE_RESET (1 << 2)
+
+#ifdef CONFIG_DEBUG_FS
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * struct kbase_io_access - holds information about 1 register access
+ *
+ * @addr: first bit indicates r/w (r=0, w=1)
+ * @value: value written or read
+ */
+struct kbase_io_access {
+	uintptr_t addr;
+	u32 value;
+};
+
+/**
+ * struct kbase_io_history - keeps track of all recent register accesses
+ *
+ * @enabled: true if register accesses are recorded, false otherwise
+ * @lock: spinlock protecting kbase_io_access array
+ * @count: number of registers read/written
+ * @size: number of elements in kbase_io_access array
+ * @buf: array of kbase_io_access
+ */
+struct kbase_io_history {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool enabled;
+#else
+	u32 enabled;
+#endif
+
+	spinlock_t lock;
+	size_t count;
+	u16 size;
+	struct kbase_io_access *buf;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom *)(dep->atom);
+}
+
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency.
+ * @param[in] a      The ATOM to be set as a dependency.
+ * @param     type   The ATOM dependency type to be set.
+ *
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency to be cleared.
+ *
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+enum kbase_atom_gpu_rb_state {
+	/* Atom is not currently present in slot ringbuffer */
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	/* Atom is in slot ringbuffer but is blocked on a previous atom */
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	/* Atom is in slot ringbuffer but is waiting for a previous protected
+	 * mode transition to complete */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
+	/* Atom is in slot ringbuffer but is waiting for proected mode
+	 * transition */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
+	/* Atom is in slot ringbuffer but is waiting for cores to become
+	 * available */
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	/* Atom is in slot ringbuffer but is blocked on affinity */
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	/* Atom is in slot ringbuffer and ready to run */
+	KBASE_ATOM_GPU_RB_READY,
+	/* Atom is in slot ringbuffer and has been submitted to the GPU */
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	/* Atom must be returned to JS as soon as it reaches the head of the
+	 * ringbuffer due to a previous failure */
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
+};
+
+enum kbase_atom_enter_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition into protected mode is required.
+	 *
+	 * NOTE: The integer value of this must
+	 *       match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
+	/* Wait for vinstr to suspend. */
+	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
+	/* Wait for the L2 to become idle in preparation for
+	 * the coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
+	/* End state;
+	 * Prepare coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
+};
+
+enum kbase_atom_exit_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition out of protected mode is required.
+	 *
+	 * NOTE: The integer value of this must
+	 *       match KBASE_ATOM_ENTER_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
+	/* Wait for the L2 to become idle in preparation
+	 * for the reset. */
+	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+	/* Issue the protected reset. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET,
+	/* End state;
+	 * Wait for the reset to complete. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+	u64 time_spent_us; /**< Total time spent on the GPU in microseconds */
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+	/* List head used during job dispatch job_done processing - as
+	 * dependencies may not be entirely resolved at this point, we need to
+	 * use a separate list head. */
+	struct list_head jd_item;
+	/* true if atom's jd_item is currently on a list. Prevents atom being
+	 * processed twice. */
+	bool in_jd_list;
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	enum kbase_atom_coreref_state coreref_state;
+#ifdef CONFIG_KDS
+	struct list_head node;
+	struct kds_resource_set *kds_rset;
+	bool kds_dep_satisfied;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_SYNC
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		/* This points to the dma-buf fence for this atom. If this is
+		 * NULL then there is no fence for this atom and the other
+		 * fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+		struct fence *fence;
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;	    /**< core requirements */
+	/** Job Slot to retry submitting to if submission from IRQ handler failed
+	 *
+	 * NOTE: see if this can be unified into the another member e.g. the event */
+	int retry_submit_on_slot;
+
+	u32 ticks;
+	/* JS atom priority with respect to other atoms on its kctx. */
+	int sched_priority;
+
+	int poking;		/* BASE_HW_ISSUE_8316 */
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+	int slot_nr;
+
+	u32 atom_flags;
+
+	/* Number of times this atom has been retried. Used by replay soft job.
+	 */
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	u64 need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	/* Pointer to atom that this atom has same-slot dependency on */
+	struct kbase_jd_atom *pre_dep;
+	/* Pointer to atom that has same-slot dependency on this atom */
+	struct kbase_jd_atom *post_dep;
+
+	/* Pointer to atom that this atom has cross-slot dependency on */
+	struct kbase_jd_atom *x_pre_dep;
+	/* Pointer to atom that has cross-slot dependency on this atom */
+	struct kbase_jd_atom *x_post_dep;
+
+	/* The GPU's flush count recorded at the time of submission, used for
+	 * the cache flush optimisation */
+	u32 flush_id;
+
+	struct kbase_jd_atom_backend backend;
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	/* List head used for two different purposes:
+	 *  1. Overflow list for JS ring buffers. If an atom is ready to run,
+	 *     but there is no room in the JS ring buffer, then the atom is put
+	 *     on the ring buffer's overflow list using this list node.
+	 *  2. List of waiting soft jobs.
+	 */
+	struct list_head queue;
+
+	struct kbase_va_region *jit_addr_reg;
+
+	/* If non-zero, this indicates that the atom will fail with the set
+	 * event_code when the atom is processed. */
+	enum base_jd_event_code will_fail_event_code;
+
+	/* Atoms will only ever be transitioning into, or out of
+	 * protected mode so we do not need two separate fields.
+	 */
+	union {
+		enum kbase_atom_enter_protected_state enter;
+		enum kbase_atom_exit_protected_state exit;
+	} protected_state;
+
+	struct rb_node runnable_tree_node;
+
+	/* 'Age' of atom relative to other atoms in the context. */
+	u32 age;
+};
+
+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	/** Tracks all job-dispatch jobs.  This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	u32 job_nr;
+
+	/** Waitq that reflects whether there are no jobs (including SW-only
+	 * dependency jobs). This is set when no jobs are present on the ctx,
+	 * and clear when there are jobs.
+	 *
+	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+	 * and SW-only dependency jobs.
+	 *
+	 * This waitq can be waited upon to find out when the context jobs are all
+	 * done/cancelled (including those that might've been blocked on
+	 * dependencies) - and so, whether it can be terminated. However, it should
+	 * only be terminated once it is not present in the run-pool (see
+	 * kbasep_js_kctx_info::ctx::is_scheduled).
+	 *
+	 * Since the waitq is only set under kbase_jd_context::lock,
+	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+	 * guarentee that the setter has completed its work on the kbase_context
+	 *
+	 * This must be updated atomically with:
+	 * - kbase_jd_context::job_nr */
+	wait_queue_head_t zero_jobs_wait;
+
+	/** Job Done workqueue. */
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_KDS
+	struct kds_callback kds_cb;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+	int number;
+
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	bool protected_mode;
+	u32 fault_status;
+	u64 fault_addr;
+	u64 fault_extra_addr;
+
+	struct kbase_mmu_setup current_setup;
+
+	/* BASE_HW_ISSUE_8316  */
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	/** Protected by hwaccess_lock */
+	int poke_refcount;
+	/** Protected by hwaccess_lock */
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold hwaccess_lock when accessing this */
+	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - true indicates that the transition is ongoing
+	 * Expected to be protected by hwaccess_lock */
+	bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_protected_ops - Platform specific functions for GPU protected
+ * mode operations
+ * @protected_mode_enter: Callback to enter protected mode on the GPU
+ * @protected_mode_reset: Callback to reset the GPU and exit protected mode.
+ * @protected_mode_supported: Callback to check if protected mode is supported.
+ */
+struct kbase_protected_ops {
+	/**
+	 * protected_mode_enter() - Enter protected mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enter)(struct kbase_device *kbdev);
+
+	/**
+	 * protected_mode_reset() - Reset the GPU and exit protected mode
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_reset)(struct kbase_device *kbdev);
+
+	/**
+	 * protected_mode_supported() - Check if protected mode is supported
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	bool (*protected_mode_supported)(struct kbase_device *kbdev);
+};
+
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:     Kbase device where memory is used
+ * @cur_size:  Number of free pages currently in the pool (may exceed @max_size
+ *             in some corner cases)
+ * @max_size:  Maximum number of free pages in the pool
+ * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
+ *             and @page_list
+ * @page_list: List of free pages in the pool
+ * @reclaim:   Shrinker for kernel reclaim of free pages
+ * @next_pool: Pointer to next pool where pages can be allocated when this pool
+ *             is empty. Pages will spill over to the next pool when this pool
+ *             is full. Can be NULL if there is no next pool.
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+};
+
+
+#define DEVNAME_SIZE	16
+
+struct kbase_device {
+	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+
+	struct clk *clock;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif	/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool mem_pool;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	/** List of SW workarounds for HW issues */
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	/** List of features available */
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	/* Bitmaps of cores that are currently in use (running jobs).
+	 * These should be kept up to date by the job scheduler.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 *
+	 * kbase_pm_check_transitions_nolock() should be called when bits are
+	 * cleared to update the power management system and allow transitions to
+	 * occur. */
+	u64 shader_inuse_bitmap;
+
+	/* Refcount for cores in use */
+	u32 shader_inuse_cnt[64];
+
+	/* Bitmaps of cores the JS needs for jobs ready to run */
+	u64 shader_needed_bitmap;
+
+	/* Refcount for cores needed */
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	/* struct for keeping track of the disjoint information
+	 *
+	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
+	 * The count is the number of disjoint events that have occurred on the GPU
+	 */
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+	u32 l2_users_count;
+
+	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+	 * submitting new jobs to any cores that are not marked as available.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 */
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+	u64 stack_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
+	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+
+	/* Structure used for instrumentation and HW counters dumping */
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	/*value to be written to the irq_throttle register each time an irq is served */
+	atomic_t irq_throttle_cycles;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+	void *platform_context;
+
+	/* List of kbase_contexts created */
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	struct devfreq_cooling_device *devfreq_cooling;
+#else
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+#endif
+#endif
+
+	struct kbase_ipa_context *ipa_ctx;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+	/*
+	 * Control for enabling job dump on failure, set when control debugfs
+	 * is opened.
+	 */
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	/* directory for debugfs entries */
+	struct dentry *mali_debugfs_directory;
+	/* Root directory for per context entry */
+	struct dentry *debugfs_ctx_directory;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* bit for each as, set if there is new data to report */
+	u64 debugfs_as_read_bitmap;
+#endif /* CONFIG_MALI_DEBUG */
+
+	/* failed job dump, used for separate debug process */
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
+	struct kbase_context *kctx_fault;
+
+#if !MALI_CUSTOMER_RELEASE
+	/* Per-device data for register dumping interface */
+	struct {
+		u16 reg_offset; /* Offset of a GPU_CONTROL register to be
+				   dumped upon request */
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	/* fbdump profiling controls set by gator */
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	/* Number of jobs that are run before a job is forced to fail and
+	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+	 * failures. */
+	int force_replay_limit;
+	/* Count of jobs between forced failures. Incremented on each job. A
+	 * job is forced to fail once this is greater than or equal to
+	 * force_replay_limit. */
+	int force_replay_count;
+	/* Core requirement for jobs to be failed and replayed. May be zero. */
+	base_jd_core_req force_replay_core_req;
+	/* true if force_replay_limit should be randomized. The random
+	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+	 */
+	bool force_replay_random;
+#endif
+
+	/* Total number of created contexts */
+	atomic_t ctx_num;
+
+#ifdef CONFIG_DEBUG_FS
+	/* Holds the most recent register accesses */
+	struct kbase_io_history io_history;
+#endif /* CONFIG_DEBUG_FS */
+
+	struct kbase_hwaccess_data hwaccess;
+
+	/* Count of page/bus faults waiting for workqueues to process */
+	atomic_t faults_pending;
+
+	/* true if GPU is powered off or power off operation is in progress */
+	bool poweroff_pending;
+
+
+	/* defaults for new context created for this device */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	size_t mem_pool_max_size_default;
+
+	/* current gpu coherency mode */
+	u32 current_gpu_coherency_mode;
+	/* system coherency mode  */
+	u32 system_coherency;
+	/* Flag to track when cci snoops have been enabled on the interface */
+	bool cci_snoop_enabled;
+
+	/* SMC function IDs to call into Trusted firmware to enable/disable
+	 * cache snooping. Value of 0 indicates that they are not used
+	 */
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	/* Protected operations */
+	struct kbase_protected_ops *protected_ops;
+
+	/*
+	 * true when GPU is put into protected mode
+	 */
+	bool protected_mode;
+
+	/*
+	 * true when GPU is transitioning into or out of protected mode
+	 */
+	bool protected_mode_transition;
+
+	/*
+	 * true if protected mode is supported
+	 */
+	bool protected_mode_support;
+
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_queue_head_t driver_inactive_wait;
+	bool driver_inactive;
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	/*
+	 * Bus logger integration.
+	 */
+	struct bus_logger_client *buslogger;
+#endif
+	/* Boolean indicating if an IRQ flush during reset is in progress. */
+	bool irq_reset_flush;
+
+	/* list of inited sub systems. Used during terminate/error recovery */
+	u32 inited_subsys;
+
+	spinlock_t hwaccess_lock;
+
+	/* Protects access to MMU operations */
+	struct mutex mmu_hw_mutex;
+
+	/* Current serialization mode. See KBASE_SERIALIZE_* for details */
+	u8 serialize_jobs;
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ *                 job slot.
+ * @x_dep_head:    Head item of the linked list of atoms blocked on cross-slot
+ *                 dependencies. Atoms on this list will be moved to the
+ *                 runnable_tree when the blocking atom completes.
+ *
+ * hwaccess_lock must be held when accessing this structure.
+ */
+struct jsctx_queue {
+	struct rb_root runnable_tree;
+	struct list_head x_dep_head;
+};
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+/**
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
+ * context, to disable use of implicit dma-buf fences. This is used to avoid
+ * potential synchronization deadlocks.
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum kbase_context_flags {
+	KCTX_COMPAT = 1U << 0,
+	KCTX_RUNNABLE_REF = 1U << 1,
+	KCTX_ACTIVE = 1U << 2,
+	KCTX_PULLED = 1U << 3,
+	KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+	KCTX_INFINITE_CACHE = 1U << 5,
+	KCTX_SUBMIT_DISABLED = 1U << 6,
+	KCTX_PRIVILEGED = 1U << 7,
+	KCTX_SCHEDULED = 1U << 8,
+	KCTX_DYING = 1U << 9,
+	KCTX_NO_IMPLICIT_SYNC = 1U << 10,
+};
+
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	int id; /* System wide unique id */
+	unsigned long api_version;
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	atomic_t flags;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	u64 *mmu_teardown_pages;
+
+	struct page *aliasing_sink_page;
+
+	struct mutex            mmu_lock;
+	struct mutex            reg_lock; /* To be converted to a rwlock? */
+	struct rb_root          reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_pool mem_pool;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_KDS
+	struct list_head waiting_kds_resource;
+#endif
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+	/** This is effectively part of the Run Pool, because it only has a valid
+	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+	 *
+	 * The hwaccess_lock must be held whilst accessing this.
+	 *
+	 * If the context relating to this as_nr is required, you must use
+	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+	 * whilst you're using it. Alternatively, just hold the hwaccess_lock
+	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
+	 * you can take whilst doing this) */
+	int as_nr;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct *process_mm;
+	/* End of the SAME_VA zone */
+	u64 same_va_end;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	/* Content of mem_profile file */
+	char *mem_profile_data;
+	/* Size of @c mem_profile_data */
+	size_t mem_profile_size;
+	/* Mutex guarding memory profile state */
+	struct mutex mem_profile_lock;
+	/* Memory profile directory under debugfs */
+	struct dentry *kctx_dentry;
+
+	/* for job fault debug */
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	/* This list will keep the following atoms during the dump
+	 * in the same context
+	 */
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	/* Number of atoms currently pulled from this context */
+	atomic_t atoms_pulled;
+	/* Number of atoms currently pulled from this context, per slot */
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	/* Number of atoms currently pulled from this context, per slot and
+	 * priority. Hold hwaccess_lock when accessing */
+	int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
+			KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/* true if slot is blocked on the given priority. This will be set on a
+	 * soft-stop */
+	bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/* Bitmask of slots that can be pulled from */
+	u32 slots_pullable;
+
+	/* Backend specific data */
+	struct kbase_context_backend backend;
+
+	/* Work structure used for deferred ASID assignment */
+	struct work_struct work;
+
+	/* Only one userspace vinstr client per kbase context */
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+
+	/* List of completed jobs waiting for events to be posted */
+	struct list_head completed_jobs;
+	/* Number of work items currently pending on job_done_wq */
+	atomic_t work_count;
+
+	/* Waiting soft-jobs will fail when this timer expires */
+	struct timer_list soft_job_timeout;
+
+	/* JIT allocation management */
+	struct kbase_va_region *jit_alloc[256];
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_evict_lock;
+	struct work_struct jit_work;
+
+	/* External sticky resource management */
+	struct list_head ext_res_meta_head;
+
+	/* Used to record that a drain was requested from atomic context */
+	atomic_t drain_pending;
+
+	/* Current age count, used to determine age for newly submitted atoms */
+	u32 age_count;
+};
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100755
index 0000000..7484eec
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,697 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work));
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+	mutex_init(&kbdev->mmu_hw_mutex);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+#else
+	kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+#ifdef CONFIG_MALI_DEBUG
+	init_waitqueue_head(&kbdev->driver_inactive_wait);
+#endif /* CONFIG_MALI_DEBUG */
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
+{
+	u32 ret_value = 0;
+
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		ret_value = kbdev->kbase_profiling_controls[control];
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+
+	return ret_value;
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100755
index 0000000..f70bccc
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
new file mode 100644
index 0000000..97bb6c5
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -0,0 +1,606 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_dma_fence_lock);
+
+static void
+kbase_dma_fence_work(struct work_struct *pwork);
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static const char *
+kbase_dma_fence_get_driver_name(struct fence *fence)
+{
+	return kbase_drv_name;
+}
+
+static const char *
+kbase_dma_fence_get_timeline_name(struct fence *fence)
+{
+	return kbase_timeline_name;
+}
+
+static bool
+kbase_dma_fence_enable_signaling(struct fence *fence)
+{
+	/* If in the future we need to add code here remember to
+	 * to get a reference to the fence and release it when signaling
+	 * as stated in fence.h
+	 */
+	return true;
+}
+
+static void
+kbase_dma_fence_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static const struct fence_ops kbase_dma_fence_ops = {
+	.get_driver_name = kbase_dma_fence_get_driver_name,
+	.get_timeline_name = kbase_dma_fence_get_timeline_name,
+	.enable_signaling = kbase_dma_fence_enable_signaling,
+	/* Use the default wait */
+	.wait = fence_default_wait,
+	.fence_value_str = kbase_dma_fence_fence_value_str,
+};
+
+static struct fence *
+kbase_dma_fence_new(unsigned int context, unsigned int seqno)
+{
+	struct fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence_init(fence,
+		   &kbase_dma_fence_ops,
+		   &kbase_dma_fence_lock,
+		   context,
+		   seqno);
+
+	return fence;
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_queue_work() - Queue work to handle @katom
+ * @katom: Pointer to atom for which to queue work
+ *
+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
+ * submit the atom.
+ */
+static void
+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	bool ret;
+
+	INIT_WORK(&katom->work, kbase_dma_fence_work);
+	ret = queue_work(kctx->dma_fence.wq, &katom->work);
+	/* Warn if work was already queued, that should not happen. */
+	WARN_ON(!ret);
+}
+
+/**
+ * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ * @queue_worker: Boolean indicating if fence worker is to be queued when
+ *                dep_count reaches 0.
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker)
+{
+	struct kbase_dma_fence_cb *cb, *tmp;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			int ret;
+
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			ret = atomic_dec_return(&katom->dma_fence.dep_count);
+
+			if (unlikely(queue_worker && ret == 0)) {
+				/*
+				 * dep_count went to zero and queue_worker is
+				 * true. Queue the worker to handle the
+				 * completion of the katom.
+				 */
+				kbase_dma_fence_queue_work(katom);
+			}
+		}
+
+		/*
+		 * Release the reference taken in
+		 * kbase_dma_fence_add_callback().
+		 */
+		fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom, false);
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (atomic_read(&katom->dma_fence.dep_count) != 0)
+		goto out;
+
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_dma_fence_free_callbacks(katom, false);
+	/*
+	 * Queue atom on GPU, unless it has already completed due to a failing
+	 * dependency. Run jd_done_nolock() on the katom if it is completed.
+	 */
+	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
+		jd_done_nolock(katom, NULL);
+	else
+		kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+/**
+ * kbase_dma_fence_add_callback() - Add callback on @fence to block @katom
+ * @katom: Pointer to katom that will be blocked by @fence
+ * @fence: Pointer to fence on which to set up the callback
+ * @callback: Pointer to function to be called when fence is signaled
+ *
+ * Caller needs to hold a reference to @fence when calling this function, and
+ * the caller is responsible for releasing that reference.  An additional
+ * reference to @fence will be taken when the callback was successfully set up
+ * and @fence needs to be kept valid until the callback has been called and
+ * cleanup have been done.
+ *
+ * Return: 0 on success: fence was either already signalled, or callback was
+ * set up. Negative error code is returned on error.
+ */
+static int
+kbase_dma_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback)
+{
+	int err = 0;
+	struct kbase_dma_fence_cb *kbase_fence_cb;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+
+	err = fence_add_callback(fence, &kbase_fence_cb->fence_cb, callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, clear the error and return */
+		err = 0;
+		kbase_fence_cb->fence = NULL;
+		kfree(kbase_fence_cb);
+	} else if (err) {
+		kfree(kbase_fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in kbase_dma_fence_free_callbacks().
+		 */
+		fence_get(fence);
+		atomic_inc(&katom->dma_fence.dep_count);
+		/* Add callback to katom's list of callbacks */
+		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
+	}
+
+	return err;
+}
+
+static void
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+{
+	struct kbase_dma_fence_cb *kcb = container_of(cb,
+				struct kbase_dma_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+	if (atomic_dec_and_test(&katom->dma_fence.dep_count))
+		kbase_dma_fence_queue_work(katom);
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct reservation_object *resv,
+					 bool exclusive)
+{
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_dma_fence_add_callback(katom,
+						   excl_fence,
+						   kbase_dma_fence_cb);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_dma_fence_add_callback(katom,
+							   shared_fences[i],
+							   kbase_dma_fence_cb);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	if (err) {
+		/*
+		 * On error, cancel and clean up all callbacks that was set up
+		 * before the error.
+		 */
+		kbase_dma_fence_free_callbacks(katom, false);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+	struct fence *fence;
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	fence = kbase_dma_fence_new(katom->dma_fence.context,
+				    atomic_inc_return(&katom->dma_fence.seqno));
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	katom->dma_fence.fence = fence;
+	atomic_set(&katom->dma_fence.dep_count, 1);
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		atomic_set(&katom->dma_fence.dep_count, -1);
+		fence_put(fence);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct reservation_object *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = reservation_object_reserve_shared(obj);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_shared_fence(obj, katom->dma_fence.fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_excl_fence(obj, katom->dma_fence.fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
+			atomic_set(&katom->dma_fence.dep_count, -1);
+			kbase_dma_fence_free_callbacks(katom, false);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	} else {
+		/* There was an error, cancel callbacks, set dep_count to -1 to
+		 * indicate that the atom has been handled (the caller will
+		 * kill it for us), signal the fence, free callbacks and the
+		 * fence.
+		 */
+		kbase_dma_fence_free_callbacks(katom, false);
+		atomic_set(&katom->dma_fence.dep_count, -1);
+		kbase_dma_fence_signal(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct list_head *list = &kctx->dma_fence.waiting_resource;
+
+	while (!list_empty(list)) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_first_entry(list, struct kbase_jd_atom, queue);
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom, true);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == -1);
+
+	/* Signal the atom's fence. */
+	fence_signal(katom->dma_fence.fence);
+	fence_put(katom->dma_fence.fence);
+	katom->dma_fence.fence = NULL;
+
+	kbase_dma_fence_free_callbacks(katom, false);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
new file mode 100644
index 0000000..3b0a69b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
@@ -0,0 +1,150 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/reservation.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_cb - Mali dma-fence callback data struct
+ * @fence_cb: Callback function
+ * @katom:    Pointer to katom that is waiting on this callback
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @node:     List head for linking this callback to the katom
+ */
+struct kbase_dma_fence_cb {
+	struct fence_cb fence_cb;
+	struct kbase_jd_atom *katom;
+	struct fence *fence;
+	struct list_head node;
+};
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct reservation_object **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_waiters_remove()- Remove katom from dma-fence wait list
+ * @katom: Pointer to katom to remove from list
+ */
+void kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom);
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100755
index 0000000..1881486
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx);
+	KBASE_TLSTREAM_TL_DEL_ATOM(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED);
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100755
index 0000000..ce65b55
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100755
index 0000000..7c3c5f3
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,334 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
+};
+
+static void dump_worker(struct work_struct *work);
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+	uint32_t product_id;
+	uint32_t count;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	product_id = kbdev->gpu_props.props.core_props.product_id;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			hardware_counters = hardware_counters_mali_tHEx;
+			count = ARRAY_SIZE(hardware_counters_mali_tHEx);
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			hardware_counters = hardware_counters_mali_tSIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tSIx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	} else {
+		switch (product_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	*total_counters = count;
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	uint32_t dump_size = 0, i = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
+		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+	in_out_info->size = dump_size;
+
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
+
+	return hand;
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+
+	if (opaque_handles && success) {
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		schedule_work(&opaque_handles->dump_work);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100755
index 0000000..ef9ac0f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100755
index 0000000..ce8e424
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2165 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MISS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MISS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MISS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MISS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MISS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MISS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MISS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#include "mali_kbase_gator_hwcnt_names_tmix.h"
+
+#include "mali_kbase_gator_hwcnt_names_thex.h"
+
+#include "mali_kbase_gator_hwcnt_names_tsix.h"
+
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
new file mode 100644
index 0000000..bcceef4
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+
+static const char * const hardware_counters_mali_tHEx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MESSAGES_SENT",
+	"THEx_MESSAGES_RECEIVED",
+	"THEx_GPU_ACTIVE",
+	"THEx_IRQ_ACTIVE",
+	"THEx_JS0_JOBS",
+	"THEx_JS0_TASKS",
+	"THEx_JS0_ACTIVE",
+	"",
+	"THEx_JS0_WAIT_READ",
+	"THEx_JS0_WAIT_ISSUE",
+	"THEx_JS0_WAIT_DEPEND",
+	"THEx_JS0_WAIT_FINISH",
+	"THEx_JS1_JOBS",
+	"THEx_JS1_TASKS",
+	"THEx_JS1_ACTIVE",
+	"",
+	"THEx_JS1_WAIT_READ",
+	"THEx_JS1_WAIT_ISSUE",
+	"THEx_JS1_WAIT_DEPEND",
+	"THEx_JS1_WAIT_FINISH",
+	"THEx_JS2_JOBS",
+	"THEx_JS2_TASKS",
+	"THEx_JS2_ACTIVE",
+	"",
+	"THEx_JS2_WAIT_READ",
+	"THEx_JS2_WAIT_ISSUE",
+	"THEx_JS2_WAIT_DEPEND",
+	"THEx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"THEx_TILER_ACTIVE",
+	"THEx_JOBS_PROCESSED",
+	"THEx_TRIANGLES",
+	"THEx_LINES",
+	"THEx_POINTS",
+	"THEx_FRONT_FACING",
+	"THEx_BACK_FACING",
+	"THEx_PRIM_VISIBLE",
+	"THEx_PRIM_CULLED",
+	"THEx_PRIM_CLIPPED",
+	"THEx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"THEx_BUS_READ",
+	"",
+	"THEx_BUS_WRITE",
+	"THEx_LOADING_DESC",
+	"THEx_IDVS_POS_SHAD_REQ",
+	"THEx_IDVS_POS_SHAD_WAIT",
+	"THEx_IDVS_POS_SHAD_STALL",
+	"THEx_IDVS_POS_FIFO_FULL",
+	"THEx_PREFETCH_STALL",
+	"THEx_VCACHE_HIT",
+	"THEx_VCACHE_MISS",
+	"THEx_VCACHE_LINE_WAIT",
+	"THEx_VFETCH_POS_READ_WAIT",
+	"THEx_VFETCH_VERTEX_WAIT",
+	"THEx_VFETCH_STALL",
+	"THEx_PRIMASSY_STALL",
+	"THEx_BBOX_GEN_STALL",
+	"THEx_IDVS_VBU_HIT",
+	"THEx_IDVS_VBU_MISS",
+	"THEx_IDVS_VBU_LINE_DEALLOCATE",
+	"THEx_IDVS_VAR_SHAD_REQ",
+	"THEx_IDVS_VAR_SHAD_STALL",
+	"THEx_BINNER_STALL",
+	"THEx_ITER_STALL",
+	"THEx_COMPRESS_MISS",
+	"THEx_COMPRESS_STALL",
+	"THEx_PCACHE_HIT",
+	"THEx_PCACHE_MISS",
+	"THEx_PCACHE_MISS_STALL",
+	"THEx_PCACHE_EVICT_STALL",
+	"THEx_PMGR_PTR_WR_STALL",
+	"THEx_PMGR_PTR_RD_STALL",
+	"THEx_PMGR_CMD_WR_STALL",
+	"THEx_WRBUF_ACTIVE",
+	"THEx_WRBUF_HIT",
+	"THEx_WRBUF_MISS",
+	"THEx_WRBUF_NO_FREE_LINE_STALL",
+	"THEx_WRBUF_NO_AXI_ID_STALL",
+	"THEx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"THEx_UTLB_TRANS",
+	"THEx_UTLB_TRANS_HIT",
+	"THEx_UTLB_TRANS_STALL",
+	"THEx_UTLB_TRANS_MISS_DELAY",
+	"THEx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"THEx_FRAG_ACTIVE",
+	"THEx_FRAG_PRIMITIVES",
+	"THEx_FRAG_PRIM_RAST",
+	"THEx_FRAG_FPK_ACTIVE",
+	"THEx_FRAG_STARVING",
+	"THEx_FRAG_WARPS",
+	"THEx_FRAG_PARTIAL_WARPS",
+	"THEx_FRAG_QUADS_RAST",
+	"THEx_FRAG_QUADS_EZS_TEST",
+	"THEx_FRAG_QUADS_EZS_UPDATE",
+	"THEx_FRAG_QUADS_EZS_KILL",
+	"THEx_FRAG_LZS_TEST",
+	"THEx_FRAG_LZS_KILL",
+	"",
+	"THEx_FRAG_PTILES",
+	"THEx_FRAG_TRANS_ELIM",
+	"THEx_QUAD_FPK_KILLER",
+	"",
+	"THEx_COMPUTE_ACTIVE",
+	"THEx_COMPUTE_TASKS",
+	"THEx_COMPUTE_WARPS",
+	"THEx_COMPUTE_STARVING",
+	"THEx_EXEC_CORE_ACTIVE",
+	"THEx_EXEC_ACTIVE",
+	"THEx_EXEC_INSTR_COUNT",
+	"THEx_EXEC_INSTR_DIVERGED",
+	"THEx_EXEC_INSTR_STARVING",
+	"THEx_ARITH_INSTR_SINGLE_FMA",
+	"THEx_ARITH_INSTR_DOUBLE",
+	"THEx_ARITH_INSTR_MSG",
+	"THEx_ARITH_INSTR_MSG_ONLY",
+	"THEx_TEX_INSTR",
+	"THEx_TEX_INSTR_MIPMAP",
+	"THEx_TEX_INSTR_COMPRESSED",
+	"THEx_TEX_INSTR_3D",
+	"THEx_TEX_INSTR_TRILINEAR",
+	"THEx_TEX_COORD_ISSUE",
+	"THEx_TEX_COORD_STALL",
+	"THEx_TEX_STARVE_CACHE",
+	"THEx_TEX_STARVE_FILTER",
+	"THEx_LS_MEM_READ_FULL",
+	"THEx_LS_MEM_READ_SHORT",
+	"THEx_LS_MEM_WRITE_FULL",
+	"THEx_LS_MEM_WRITE_SHORT",
+	"THEx_LS_MEM_ATOMIC",
+	"THEx_VARY_INSTR",
+	"THEx_VARY_SLOT_32",
+	"THEx_VARY_SLOT_16",
+	"THEx_ATTR_INSTR",
+	"THEx_ARITH_INSTR_FP_MUL",
+	"THEx_BEATS_RD_FTC",
+	"THEx_BEATS_RD_FTC_EXT",
+	"THEx_BEATS_RD_LSC",
+	"THEx_BEATS_RD_LSC_EXT",
+	"THEx_BEATS_RD_TEX",
+	"THEx_BEATS_RD_TEX_EXT",
+	"THEx_BEATS_RD_OTHER",
+	"THEx_BEATS_WR_LSC",
+	"THEx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"THEx_L2_RD_MSG_IN",
+	"THEx_L2_RD_MSG_IN_STALL",
+	"THEx_L2_WR_MSG_IN",
+	"THEx_L2_WR_MSG_IN_STALL",
+	"THEx_L2_SNP_MSG_IN",
+	"THEx_L2_SNP_MSG_IN_STALL",
+	"THEx_L2_RD_MSG_OUT",
+	"THEx_L2_RD_MSG_OUT_STALL",
+	"THEx_L2_WR_MSG_OUT",
+	"THEx_L2_ANY_LOOKUP",
+	"THEx_L2_READ_LOOKUP",
+	"THEx_L2_WRITE_LOOKUP",
+	"THEx_L2_EXT_SNOOP_LOOKUP",
+	"THEx_L2_EXT_READ",
+	"THEx_L2_EXT_READ_NOSNP",
+	"THEx_L2_EXT_READ_UNIQUE",
+	"THEx_L2_EXT_READ_BEATS",
+	"THEx_L2_EXT_AR_STALL",
+	"THEx_L2_EXT_AR_CNT_Q1",
+	"THEx_L2_EXT_AR_CNT_Q2",
+	"THEx_L2_EXT_AR_CNT_Q3",
+	"THEx_L2_EXT_RRESP_0_127",
+	"THEx_L2_EXT_RRESP_128_191",
+	"THEx_L2_EXT_RRESP_192_255",
+	"THEx_L2_EXT_RRESP_256_319",
+	"THEx_L2_EXT_RRESP_320_383",
+	"THEx_L2_EXT_WRITE",
+	"THEx_L2_EXT_WRITE_NOSNP_FULL",
+	"THEx_L2_EXT_WRITE_NOSNP_PTL",
+	"THEx_L2_EXT_WRITE_SNP_FULL",
+	"THEx_L2_EXT_WRITE_SNP_PTL",
+	"THEx_L2_EXT_WRITE_BEATS",
+	"THEx_L2_EXT_W_STALL",
+	"THEx_L2_EXT_AW_CNT_Q1",
+	"THEx_L2_EXT_AW_CNT_Q2",
+	"THEx_L2_EXT_AW_CNT_Q3",
+	"THEx_L2_EXT_SNOOP",
+	"THEx_L2_EXT_SNOOP_STALL",
+	"THEx_L2_EXT_SNOOP_RESP_CLEAN",
+	"THEx_L2_EXT_SNOOP_RESP_DATA",
+	"THEx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
new file mode 100644
index 0000000..5ea0677
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+
+static const char * const hardware_counters_mali_tMIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MESSAGES_SENT",
+	"TMIx_MESSAGES_RECEIVED",
+	"TMIx_GPU_ACTIVE",
+	"TMIx_IRQ_ACTIVE",
+	"TMIx_JS0_JOBS",
+	"TMIx_JS0_TASKS",
+	"TMIx_JS0_ACTIVE",
+	"",
+	"TMIx_JS0_WAIT_READ",
+	"TMIx_JS0_WAIT_ISSUE",
+	"TMIx_JS0_WAIT_DEPEND",
+	"TMIx_JS0_WAIT_FINISH",
+	"TMIx_JS1_JOBS",
+	"TMIx_JS1_TASKS",
+	"TMIx_JS1_ACTIVE",
+	"",
+	"TMIx_JS1_WAIT_READ",
+	"TMIx_JS1_WAIT_ISSUE",
+	"TMIx_JS1_WAIT_DEPEND",
+	"TMIx_JS1_WAIT_FINISH",
+	"TMIx_JS2_JOBS",
+	"TMIx_JS2_TASKS",
+	"TMIx_JS2_ACTIVE",
+	"",
+	"TMIx_JS2_WAIT_READ",
+	"TMIx_JS2_WAIT_ISSUE",
+	"TMIx_JS2_WAIT_DEPEND",
+	"TMIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_TILER_ACTIVE",
+	"TMIx_JOBS_PROCESSED",
+	"TMIx_TRIANGLES",
+	"TMIx_LINES",
+	"TMIx_POINTS",
+	"TMIx_FRONT_FACING",
+	"TMIx_BACK_FACING",
+	"TMIx_PRIM_VISIBLE",
+	"TMIx_PRIM_CULLED",
+	"TMIx_PRIM_CLIPPED",
+	"TMIx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"TMIx_BUS_READ",
+	"",
+	"TMIx_BUS_WRITE",
+	"TMIx_LOADING_DESC",
+	"TMIx_IDVS_POS_SHAD_REQ",
+	"TMIx_IDVS_POS_SHAD_WAIT",
+	"TMIx_IDVS_POS_SHAD_STALL",
+	"TMIx_IDVS_POS_FIFO_FULL",
+	"TMIx_PREFETCH_STALL",
+	"TMIx_VCACHE_HIT",
+	"TMIx_VCACHE_MISS",
+	"TMIx_VCACHE_LINE_WAIT",
+	"TMIx_VFETCH_POS_READ_WAIT",
+	"TMIx_VFETCH_VERTEX_WAIT",
+	"TMIx_VFETCH_STALL",
+	"TMIx_PRIMASSY_STALL",
+	"TMIx_BBOX_GEN_STALL",
+	"TMIx_IDVS_VBU_HIT",
+	"TMIx_IDVS_VBU_MISS",
+	"TMIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TMIx_IDVS_VAR_SHAD_REQ",
+	"TMIx_IDVS_VAR_SHAD_STALL",
+	"TMIx_BINNER_STALL",
+	"TMIx_ITER_STALL",
+	"TMIx_COMPRESS_MISS",
+	"TMIx_COMPRESS_STALL",
+	"TMIx_PCACHE_HIT",
+	"TMIx_PCACHE_MISS",
+	"TMIx_PCACHE_MISS_STALL",
+	"TMIx_PCACHE_EVICT_STALL",
+	"TMIx_PMGR_PTR_WR_STALL",
+	"TMIx_PMGR_PTR_RD_STALL",
+	"TMIx_PMGR_CMD_WR_STALL",
+	"TMIx_WRBUF_ACTIVE",
+	"TMIx_WRBUF_HIT",
+	"TMIx_WRBUF_MISS",
+	"TMIx_WRBUF_NO_FREE_LINE_STALL",
+	"TMIx_WRBUF_NO_AXI_ID_STALL",
+	"TMIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TMIx_UTLB_TRANS",
+	"TMIx_UTLB_TRANS_HIT",
+	"TMIx_UTLB_TRANS_STALL",
+	"TMIx_UTLB_TRANS_MISS_DELAY",
+	"TMIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_FRAG_ACTIVE",
+	"TMIx_FRAG_PRIMITIVES",
+	"TMIx_FRAG_PRIM_RAST",
+	"TMIx_FRAG_FPK_ACTIVE",
+	"TMIx_FRAG_STARVING",
+	"TMIx_FRAG_WARPS",
+	"TMIx_FRAG_PARTIAL_WARPS",
+	"TMIx_FRAG_QUADS_RAST",
+	"TMIx_FRAG_QUADS_EZS_TEST",
+	"TMIx_FRAG_QUADS_EZS_UPDATE",
+	"TMIx_FRAG_QUADS_EZS_KILL",
+	"TMIx_FRAG_LZS_TEST",
+	"TMIx_FRAG_LZS_KILL",
+	"",
+	"TMIx_FRAG_PTILES",
+	"TMIx_FRAG_TRANS_ELIM",
+	"TMIx_QUAD_FPK_KILLER",
+	"",
+	"TMIx_COMPUTE_ACTIVE",
+	"TMIx_COMPUTE_TASKS",
+	"TMIx_COMPUTE_WARPS",
+	"TMIx_COMPUTE_STARVING",
+	"TMIx_EXEC_CORE_ACTIVE",
+	"TMIx_EXEC_ACTIVE",
+	"TMIx_EXEC_INSTR_COUNT",
+	"TMIx_EXEC_INSTR_DIVERGED",
+	"TMIx_EXEC_INSTR_STARVING",
+	"TMIx_ARITH_INSTR_SINGLE_FMA",
+	"TMIx_ARITH_INSTR_DOUBLE",
+	"TMIx_ARITH_INSTR_MSG",
+	"TMIx_ARITH_INSTR_MSG_ONLY",
+	"TMIx_TEX_INSTR",
+	"TMIx_TEX_INSTR_MIPMAP",
+	"TMIx_TEX_INSTR_COMPRESSED",
+	"TMIx_TEX_INSTR_3D",
+	"TMIx_TEX_INSTR_TRILINEAR",
+	"TMIx_TEX_COORD_ISSUE",
+	"TMIx_TEX_COORD_STALL",
+	"TMIx_TEX_STARVE_CACHE",
+	"TMIx_TEX_STARVE_FILTER",
+	"TMIx_LS_MEM_READ_FULL",
+	"TMIx_LS_MEM_READ_SHORT",
+	"TMIx_LS_MEM_WRITE_FULL",
+	"TMIx_LS_MEM_WRITE_SHORT",
+	"TMIx_LS_MEM_ATOMIC",
+	"TMIx_VARY_INSTR",
+	"TMIx_VARY_SLOT_32",
+	"TMIx_VARY_SLOT_16",
+	"TMIx_ATTR_INSTR",
+	"TMIx_ARITH_INSTR_FP_MUL",
+	"TMIx_BEATS_RD_FTC",
+	"TMIx_BEATS_RD_FTC_EXT",
+	"TMIx_BEATS_RD_LSC",
+	"TMIx_BEATS_RD_LSC_EXT",
+	"TMIx_BEATS_RD_TEX",
+	"TMIx_BEATS_RD_TEX_EXT",
+	"TMIx_BEATS_RD_OTHER",
+	"TMIx_BEATS_WR_LSC",
+	"TMIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_RD_MSG_IN",
+	"TMIx_L2_RD_MSG_IN_STALL",
+	"TMIx_L2_WR_MSG_IN",
+	"TMIx_L2_WR_MSG_IN_STALL",
+	"TMIx_L2_SNP_MSG_IN",
+	"TMIx_L2_SNP_MSG_IN_STALL",
+	"TMIx_L2_RD_MSG_OUT",
+	"TMIx_L2_RD_MSG_OUT_STALL",
+	"TMIx_L2_WR_MSG_OUT",
+	"TMIx_L2_ANY_LOOKUP",
+	"TMIx_L2_READ_LOOKUP",
+	"TMIx_L2_WRITE_LOOKUP",
+	"TMIx_L2_EXT_SNOOP_LOOKUP",
+	"TMIx_L2_EXT_READ",
+	"TMIx_L2_EXT_READ_NOSNP",
+	"TMIx_L2_EXT_READ_UNIQUE",
+	"TMIx_L2_EXT_READ_BEATS",
+	"TMIx_L2_EXT_AR_STALL",
+	"TMIx_L2_EXT_AR_CNT_Q1",
+	"TMIx_L2_EXT_AR_CNT_Q2",
+	"TMIx_L2_EXT_AR_CNT_Q3",
+	"TMIx_L2_EXT_RRESP_0_127",
+	"TMIx_L2_EXT_RRESP_128_191",
+	"TMIx_L2_EXT_RRESP_192_255",
+	"TMIx_L2_EXT_RRESP_256_319",
+	"TMIx_L2_EXT_RRESP_320_383",
+	"TMIx_L2_EXT_WRITE",
+	"TMIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TMIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TMIx_L2_EXT_WRITE_SNP_FULL",
+	"TMIx_L2_EXT_WRITE_SNP_PTL",
+	"TMIx_L2_EXT_WRITE_BEATS",
+	"TMIx_L2_EXT_W_STALL",
+	"TMIx_L2_EXT_AW_CNT_Q1",
+	"TMIx_L2_EXT_AW_CNT_Q2",
+	"TMIx_L2_EXT_AW_CNT_Q3",
+	"TMIx_L2_EXT_SNOOP",
+	"TMIx_L2_EXT_SNOOP_STALL",
+	"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TMIx_L2_EXT_SNOOP_RESP_DATA",
+	"TMIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
new file mode 100644
index 0000000..7ff7009
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+
+static const char * const hardware_counters_mali_tSIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MESSAGES_SENT",
+	"TSIx_MESSAGES_RECEIVED",
+	"TSIx_GPU_ACTIVE",
+	"TSIx_IRQ_ACTIVE",
+	"TSIx_JS0_JOBS",
+	"TSIx_JS0_TASKS",
+	"TSIx_JS0_ACTIVE",
+	"",
+	"TSIx_JS0_WAIT_READ",
+	"TSIx_JS0_WAIT_ISSUE",
+	"TSIx_JS0_WAIT_DEPEND",
+	"TSIx_JS0_WAIT_FINISH",
+	"TSIx_JS1_JOBS",
+	"TSIx_JS1_TASKS",
+	"TSIx_JS1_ACTIVE",
+	"",
+	"TSIx_JS1_WAIT_READ",
+	"TSIx_JS1_WAIT_ISSUE",
+	"TSIx_JS1_WAIT_DEPEND",
+	"TSIx_JS1_WAIT_FINISH",
+	"TSIx_JS2_JOBS",
+	"TSIx_JS2_TASKS",
+	"TSIx_JS2_ACTIVE",
+	"",
+	"TSIx_JS2_WAIT_READ",
+	"TSIx_JS2_WAIT_ISSUE",
+	"TSIx_JS2_WAIT_DEPEND",
+	"TSIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_TILER_ACTIVE",
+	"TSIx_JOBS_PROCESSED",
+	"TSIx_TRIANGLES",
+	"TSIx_LINES",
+	"TSIx_POINTS",
+	"TSIx_FRONT_FACING",
+	"TSIx_BACK_FACING",
+	"TSIx_PRIM_VISIBLE",
+	"TSIx_PRIM_CULLED",
+	"TSIx_PRIM_CLIPPED",
+	"TSIx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"TSIx_BUS_READ",
+	"",
+	"TSIx_BUS_WRITE",
+	"TSIx_LOADING_DESC",
+	"TSIx_IDVS_POS_SHAD_REQ",
+	"TSIx_IDVS_POS_SHAD_WAIT",
+	"TSIx_IDVS_POS_SHAD_STALL",
+	"TSIx_IDVS_POS_FIFO_FULL",
+	"TSIx_PREFETCH_STALL",
+	"TSIx_VCACHE_HIT",
+	"TSIx_VCACHE_MISS",
+	"TSIx_VCACHE_LINE_WAIT",
+	"TSIx_VFETCH_POS_READ_WAIT",
+	"TSIx_VFETCH_VERTEX_WAIT",
+	"TSIx_VFETCH_STALL",
+	"TSIx_PRIMASSY_STALL",
+	"TSIx_BBOX_GEN_STALL",
+	"TSIx_IDVS_VBU_HIT",
+	"TSIx_IDVS_VBU_MISS",
+	"TSIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TSIx_IDVS_VAR_SHAD_REQ",
+	"TSIx_IDVS_VAR_SHAD_STALL",
+	"TSIx_BINNER_STALL",
+	"TSIx_ITER_STALL",
+	"TSIx_COMPRESS_MISS",
+	"TSIx_COMPRESS_STALL",
+	"TSIx_PCACHE_HIT",
+	"TSIx_PCACHE_MISS",
+	"TSIx_PCACHE_MISS_STALL",
+	"TSIx_PCACHE_EVICT_STALL",
+	"TSIx_PMGR_PTR_WR_STALL",
+	"TSIx_PMGR_PTR_RD_STALL",
+	"TSIx_PMGR_CMD_WR_STALL",
+	"TSIx_WRBUF_ACTIVE",
+	"TSIx_WRBUF_HIT",
+	"TSIx_WRBUF_MISS",
+	"TSIx_WRBUF_NO_FREE_LINE_STALL",
+	"TSIx_WRBUF_NO_AXI_ID_STALL",
+	"TSIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TSIx_UTLB_TRANS",
+	"TSIx_UTLB_TRANS_HIT",
+	"TSIx_UTLB_TRANS_STALL",
+	"TSIx_UTLB_TRANS_MISS_DELAY",
+	"TSIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_FRAG_ACTIVE",
+	"TSIx_FRAG_PRIMITIVES",
+	"TSIx_FRAG_PRIM_RAST",
+	"TSIx_FRAG_FPK_ACTIVE",
+	"TSIx_FRAG_STARVING",
+	"TSIx_FRAG_WARPS",
+	"TSIx_FRAG_PARTIAL_WARPS",
+	"TSIx_FRAG_QUADS_RAST",
+	"TSIx_FRAG_QUADS_EZS_TEST",
+	"TSIx_FRAG_QUADS_EZS_UPDATE",
+	"TSIx_FRAG_QUADS_EZS_KILL",
+	"TSIx_FRAG_LZS_TEST",
+	"TSIx_FRAG_LZS_KILL",
+	"",
+	"TSIx_FRAG_PTILES",
+	"TSIx_FRAG_TRANS_ELIM",
+	"TSIx_QUAD_FPK_KILLER",
+	"",
+	"TSIx_COMPUTE_ACTIVE",
+	"TSIx_COMPUTE_TASKS",
+	"TSIx_COMPUTE_WARPS",
+	"TSIx_COMPUTE_STARVING",
+	"TSIx_EXEC_CORE_ACTIVE",
+	"TSIx_EXEC_ACTIVE",
+	"TSIx_EXEC_INSTR_COUNT",
+	"TSIx_EXEC_INSTR_DIVERGED",
+	"TSIx_EXEC_INSTR_STARVING",
+	"TSIx_ARITH_INSTR_SINGLE_FMA",
+	"TSIx_ARITH_INSTR_DOUBLE",
+	"TSIx_ARITH_INSTR_MSG",
+	"TSIx_ARITH_INSTR_MSG_ONLY",
+	"TSIx_TEX_INSTR",
+	"TSIx_TEX_INSTR_MIPMAP",
+	"TSIx_TEX_INSTR_COMPRESSED",
+	"TSIx_TEX_INSTR_3D",
+	"TSIx_TEX_INSTR_TRILINEAR",
+	"TSIx_TEX_COORD_ISSUE",
+	"TSIx_TEX_COORD_STALL",
+	"TSIx_TEX_STARVE_CACHE",
+	"TSIx_TEX_STARVE_FILTER",
+	"TSIx_LS_MEM_READ_FULL",
+	"TSIx_LS_MEM_READ_SHORT",
+	"TSIx_LS_MEM_WRITE_FULL",
+	"TSIx_LS_MEM_WRITE_SHORT",
+	"TSIx_LS_MEM_ATOMIC",
+	"TSIx_VARY_INSTR",
+	"TSIx_VARY_SLOT_32",
+	"TSIx_VARY_SLOT_16",
+	"TSIx_ATTR_INSTR",
+	"TSIx_ARITH_INSTR_FP_MUL",
+	"TSIx_BEATS_RD_FTC",
+	"TSIx_BEATS_RD_FTC_EXT",
+	"TSIx_BEATS_RD_LSC",
+	"TSIx_BEATS_RD_LSC_EXT",
+	"TSIx_BEATS_RD_TEX",
+	"TSIx_BEATS_RD_TEX_EXT",
+	"TSIx_BEATS_RD_OTHER",
+	"TSIx_BEATS_WR_LSC",
+	"TSIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TSIx_L2_RD_MSG_IN",
+	"TSIx_L2_RD_MSG_IN_STALL",
+	"TSIx_L2_WR_MSG_IN",
+	"TSIx_L2_WR_MSG_IN_STALL",
+	"TSIx_L2_SNP_MSG_IN",
+	"TSIx_L2_SNP_MSG_IN_STALL",
+	"TSIx_L2_RD_MSG_OUT",
+	"TSIx_L2_RD_MSG_OUT_STALL",
+	"TSIx_L2_WR_MSG_OUT",
+	"TSIx_L2_ANY_LOOKUP",
+	"TSIx_L2_READ_LOOKUP",
+	"TSIx_L2_WRITE_LOOKUP",
+	"TSIx_L2_EXT_SNOOP_LOOKUP",
+	"TSIx_L2_EXT_READ",
+	"TSIx_L2_EXT_READ_NOSNP",
+	"TSIx_L2_EXT_READ_UNIQUE",
+	"TSIx_L2_EXT_READ_BEATS",
+	"TSIx_L2_EXT_AR_STALL",
+	"TSIx_L2_EXT_AR_CNT_Q1",
+	"TSIx_L2_EXT_AR_CNT_Q2",
+	"TSIx_L2_EXT_AR_CNT_Q3",
+	"TSIx_L2_EXT_RRESP_0_127",
+	"TSIx_L2_EXT_RRESP_128_191",
+	"TSIx_L2_EXT_RRESP_192_255",
+	"TSIx_L2_EXT_RRESP_256_319",
+	"TSIx_L2_EXT_RRESP_320_383",
+	"TSIx_L2_EXT_WRITE",
+	"TSIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TSIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TSIx_L2_EXT_WRITE_SNP_FULL",
+	"TSIx_L2_EXT_WRITE_SNP_PTL",
+	"TSIx_L2_EXT_WRITE_BEATS",
+	"TSIx_L2_EXT_W_STALL",
+	"TSIx_L2_EXT_AW_CNT_Q1",
+	"TSIx_L2_EXT_AW_CNT_Q2",
+	"TSIx_L2_EXT_AW_CNT_Q3",
+	"TSIx_L2_EXT_SNOOP",
+	"TSIx_L2_EXT_SNOOP_STALL",
+	"TSIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TSIx_L2_EXT_SNOOP_RESP_DATA",
+	"TSIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100644
index 0000000..5c2367f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,114 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
+#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7, 0)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100755
index 0000000..6df0a1c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100755
index 0000000..7045693
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100755
index 0000000..0fb26d9
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,327 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
+{
+	kbase_gpu_clk_speed_func get_gpu_speed_mhz;
+	u32 gpu_speed_mhz;
+	int rc = 1;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kbase_props);
+
+	/* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
+	 * If that function fails, or the function is not provided by the system integrator, we report the maximum
+	 * GPU speed as specified by GPU_FREQ_KHZ_MAX.
+	 */
+	get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
+	if (get_gpu_speed_mhz != NULL) {
+		rc = get_gpu_speed_mhz(&gpu_speed_mhz);
+#ifdef CONFIG_MALI_DEBUG
+		/* Issue a warning message when the reported GPU speed falls outside the min/max range */
+		if (rc == 0) {
+			u32 gpu_speed_khz = gpu_speed_mhz * 1000;
+
+			if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
+					gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
+				dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
+						(unsigned long)gpu_speed_khz,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+		}
+#endif				/* CONFIG_MALI_DEBUG */
+	}
+	if (kctx->kbdev->clock) {
+		gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
+		rc = 0;
+	}
+	if (rc != 0)
+		gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
+
+	kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
+
+	memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
+
+	/* Before API 8.2 they expect L3 cache info here, which was always 0 */
+	if (kctx->api_version < KBASE_API_VERSION(8, 2))
+		kbase_props->props.raw_props.suspend_size = 0;
+
+	return 0;
+}
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present =
+		((u64) regdump.shader_present_hi << 32) +
+		regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present =
+		((u64) regdump.tiler_present_hi << 32) +
+		regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present =
+		((u64) regdump.l2_present_hi << 32) +
+		regdump.l2_present_lo;
+#ifdef CONFIG_MALI_CORESTACK
+	gpu_props->raw_props.stack_present =
+		((u64) regdump.stack_present_hi << 32) +
+		regdump.stack_present_lo;
+#else /* CONFIG_MALI_CORESTACK */
+	gpu_props->raw_props.stack_present = 0;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100755
index 0000000..f3c95cc
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * @brief Provide GPU properties to userside through UKU call.
+ *
+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
+ *
+ * @param kctx		The struct kbase_context structure
+ * @param kbase_props	A copy of the struct kbase_uk_gpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100755
index 0000000..920cf77
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 stack_present_lo;
+	u32 stack_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/**
+	 * Implementation specific irq throttle value (us), should be adjusted during integration.
+	 */
+	int irq_throttle_time_us;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100755
index 0000000..51a31fb
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,281 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			features = base_hw_features_tHEx;
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			features = base_hw_features_tSIx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id) {
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1):
+				issues = base_hw_issues_tMIx_r0p0_05dev0;
+				break;
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2):
+				issues = base_hw_issues_tMIx_r0p0;
+				break;
+			default:
+				if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_TMIX) {
+					issues = base_hw_issues_tMIx_r0p0;
+				} else if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_THEX) {
+					issues = base_hw_issues_tHEx_r0p0;
+				} else if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_TSIX) {
+					issues = base_hw_issues_tSIx_r0p0;
+				} else {
+					dev_err(kbdev->dev,
+						"Unknown GPU ID %x", gpu_id);
+					return -EINVAL;
+				}
+			}
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			case GPU_ID2_PRODUCT_THEX:
+				issues = base_hw_issues_model_tHEx;
+				break;
+			case GPU_ID2_PRODUCT_TSIX:
+				issues = base_hw_issues_model_tSIx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT);
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100755
index 0000000..fce7d29
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * @brief Set the HW issues mask depending on the GPU ID
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100755
index 0000000..b09be99
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100755
index 0000000..0acf297
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/* The hwaccess_lock (a spinlock) must be held when accessing this structure */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx;
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100755
index 0000000..cf8a813
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100755
index 0000000..5de2b75
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @setup:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100755
index 0000000..c2c3909
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,377 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_backend_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_find_free_address_space() - Find a free address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * If no address spaces are currently free, then this function can evict an
+ * idle context from the runpool, freeing up the address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_gpu_release_free_address_space()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_free_address_space() - Release an address space.
+ * @kbdev:	Device pointer
+ * @as_nr:	Address space to release
+ *
+ * The address space must have been returned by
+ * kbase_gpu_find_free_address_space().
+ */
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+						int as_nr);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned.
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ * @affinity:      Affinity of atom
+ * @coreref_state: Coreref state of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ *				  @js
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg updating timeouts of
+ * currently running atoms).
+ */
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs and don't emit messages into
+ * the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ */
+void kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset.
+ */
+bool kbase_reset_gpu_active(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100755
index 0000000..bae7c0d
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+int set_policy_by_name(struct kbase_device *kbdev, const char *name);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100755
index 0000000..89d26ea
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100755
index 0000000..cf7bf1b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
new file mode 100755
index 0000000..e657a60
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
@@ -0,0 +1,435 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/of.h>
+#include <linux/sysfs.h>
+
+#include <mali_kbase.h>
+
+#define NR_IPA_GROUPS 8
+#define MAX_GROUP_NAME_LEN 15
+
+struct kbase_ipa_context;
+
+/**
+ * struct ipa_group - represents a single IPA group
+ * @name:               name of the IPA group
+ * @capacitance:        capacitance constant for IPA group
+ * @calc_power:         function to calculate power for IPA group
+ */
+struct ipa_group {
+	const char name[MAX_GROUP_NAME_LEN + 1];
+	u32 capacitance;
+	u32 (*calc_power)(struct kbase_ipa_context *,
+			struct ipa_group *);
+};
+
+#include <mali_kbase_ipa_tables.h>
+
+/**
+ * struct kbase_ipa_context - IPA context per device
+ * @kbdev:              pointer to kbase device
+ * @groups:             array of IPA groups for this context
+ * @vinstr_cli:         vinstr client handle
+ * @vinstr_buffer:      buffer to dump hardware counters onto
+ * @ipa_lock:           protects the entire IPA context
+ */
+struct kbase_ipa_context {
+	struct kbase_device *kbdev;
+	struct ipa_group groups[NR_IPA_GROUPS];
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct mutex ipa_lock;
+};
+
+static ssize_t show_ipa_group(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_ipa_context *ctx = kbdev->ipa_ctx;
+	ssize_t count = -EINVAL;
+	size_t i;
+
+	mutex_lock(&ctx->ipa_lock);
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		if (!strncmp(ctx->groups[i].name, attr->attr.name,
+				MAX_GROUP_NAME_LEN + 1)) {
+			count = snprintf(buf, PAGE_SIZE, "%lu\n",
+				(unsigned long)ctx->groups[i].capacitance);
+			break;
+		}
+	}
+	mutex_unlock(&ctx->ipa_lock);
+	return count;
+}
+
+static ssize_t set_ipa_group(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_ipa_context *ctx = kbdev->ipa_ctx;
+	unsigned long capacitance;
+	size_t i;
+	int err;
+
+	err = kstrtoul(buf, 0, &capacitance);
+	if (err < 0)
+		return err;
+	if (capacitance > U32_MAX)
+		return -ERANGE;
+
+	mutex_lock(&ctx->ipa_lock);
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		if (!strncmp(ctx->groups[i].name, attr->attr.name,
+				MAX_GROUP_NAME_LEN + 1)) {
+			ctx->groups[i].capacitance = capacitance;
+			mutex_unlock(&ctx->ipa_lock);
+			return count;
+		}
+	}
+	mutex_unlock(&ctx->ipa_lock);
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(group0, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group1, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group2, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group3, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group4, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group5, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group6, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+static DEVICE_ATTR(group7, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group);
+
+static struct attribute *kbase_ipa_attrs[] = {
+	&dev_attr_group0.attr,
+	&dev_attr_group1.attr,
+	&dev_attr_group2.attr,
+	&dev_attr_group3.attr,
+	&dev_attr_group4.attr,
+	&dev_attr_group5.attr,
+	&dev_attr_group6.attr,
+	&dev_attr_group7.attr,
+	NULL,
+};
+
+static struct attribute_group kbase_ipa_attr_group = {
+	.name = "ipa",
+	.attrs = kbase_ipa_attrs,
+};
+
+static void init_ipa_groups(struct kbase_ipa_context *ctx)
+{
+	memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups));
+}
+
+#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	struct device_node *np, *child;
+	struct ipa_group *group;
+	size_t nr_groups;
+	size_t i;
+	int err;
+
+	np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups");
+	if (!np)
+		return 0;
+
+	nr_groups = 0;
+	for_each_available_child_of_node(np, child)
+		nr_groups++;
+	if (!nr_groups || nr_groups > ARRAY_SIZE(ctx->groups)) {
+		dev_err(kbdev->dev, "invalid number of IPA groups: %zu", nr_groups);
+		err = -EINVAL;
+		goto err0;
+	}
+
+	for_each_available_child_of_node(np, child) {
+		const char *name;
+		u32 capacitance;
+
+		name = of_get_property(child, "label", NULL);
+		if (!name) {
+			dev_err(kbdev->dev, "label missing for IPA group");
+			err = -EINVAL;
+			goto err0;
+		}
+		err = of_property_read_u32(child, "capacitance",
+				&capacitance);
+		if (err < 0) {
+			dev_err(kbdev->dev, "capacitance missing for IPA group");
+			goto err0;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+			group = &ctx->groups[i];
+			if (!strncmp(group->name, name,
+					MAX_GROUP_NAME_LEN + 1)) {
+				group->capacitance = capacitance;
+				break;
+			}
+		}
+	}
+
+	of_node_put(np);
+	return 0;
+err0:
+	of_node_put(np);
+	return err;
+}
+#else
+static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
+{
+	return 0;
+}
+#endif
+
+static int reset_ipa_groups(struct kbase_ipa_context *ctx)
+{
+	init_ipa_groups(ctx);
+	return update_ipa_groups_from_dt(ctx);
+}
+
+static inline u32 read_hwcnt(struct kbase_ipa_context *ctx,
+	u32 offset)
+{
+	u8 *p = ctx->vinstr_buffer;
+
+	return *(u32 *)&p[offset];
+}
+
+static inline u32 add_saturate(u32 a, u32 b)
+{
+	if (U32_MAX - a < b)
+		return U32_MAX;
+	return a + b;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c'
+ * across all shader cores.
+ */
+static u32 calc_power_sc_single(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	u64 core_mask;
+	u32 base = 0, r = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			u64 n = read_hwcnt(ctx, base + c);
+			u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+			u32 s = group->capacitance;
+
+			r = add_saturate(r, div_u64(n * s, d));
+		}
+		base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+		core_mask >>= 1;
+	}
+	return r;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c1'
+ * and `c2' across all shader cores.
+ */
+static u32 calc_power_sc_double(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c1, u32 c2)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	u64 core_mask;
+	u32 base = 0, r = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			u64 n = read_hwcnt(ctx, base + c1);
+			u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+			u32 s = group->capacitance;
+
+			r = add_saturate(r, div_u64(n * s, d));
+			n = read_hwcnt(ctx, base + c2);
+			r = add_saturate(r, div_u64(n * s, d));
+		}
+		base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+		core_mask >>= 1;
+	}
+	return r;
+}
+
+static u32 calc_power_single(struct kbase_ipa_context *ctx,
+	struct ipa_group *group, u32 c)
+{
+	u64 n = read_hwcnt(ctx, c);
+	u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+	u32 s = group->capacitance;
+
+	return div_u64(n * s, d);
+}
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_single(ctx, group, L2_ANY_LOOKUP);
+}
+
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_single(ctx, group, TILER_ACTIVE);
+}
+
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, FRAG_ACTIVE);
+}
+
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_double(ctx, group, VARY_SLOT_32,
+			VARY_SLOT_16);
+}
+
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, TEX_COORD_ISSUE);
+}
+
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, EXEC_INSTR_COUNT);
+}
+
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_double(ctx, group, BEATS_RD_LSC,
+			BEATS_WR_LSC);
+}
+
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+		struct ipa_group *group)
+{
+	return calc_power_sc_single(ctx, group, EXEC_CORE_ACTIVE);
+}
+
+static int attach_vinstr(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	size_t dump_size;
+
+	dump_size = kbase_vinstr_dump_size(kbdev);
+	ctx->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!ctx->vinstr_buffer) {
+		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+		return -1;
+	}
+
+	setup.jm_bm = ~0u;
+	setup.shader_bm = ~0u;
+	setup.tiler_bm = ~0u;
+	setup.mmu_l2_bm = ~0u;
+	ctx->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
+			&setup, ctx->vinstr_buffer);
+	if (!ctx->vinstr_cli) {
+		dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
+		kfree(ctx->vinstr_buffer);
+		ctx->vinstr_buffer = NULL;
+		return -1;
+	}
+	return 0;
+}
+
+static void detach_vinstr(struct kbase_ipa_context *ctx)
+{
+	if (ctx->vinstr_cli)
+		kbase_vinstr_detach_client(ctx->vinstr_cli);
+	ctx->vinstr_cli = NULL;
+	kfree(ctx->vinstr_buffer);
+	ctx->vinstr_buffer = NULL;
+}
+
+struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_context *ctx;
+	int err;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	mutex_init(&ctx->ipa_lock);
+	ctx->kbdev = kbdev;
+
+	err = reset_ipa_groups(ctx);
+	if (err < 0)
+		goto err0;
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
+	if (err < 0)
+		goto err0;
+
+	return ctx;
+err0:
+	kfree(ctx);
+	return NULL;
+}
+
+void kbase_ipa_term(struct kbase_ipa_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kbdev;
+
+	detach_vinstr(ctx);
+	sysfs_remove_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
+	kfree(ctx);
+}
+
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err)
+{
+	struct ipa_group *group;
+	u32 power = 0;
+	size_t i;
+
+	mutex_lock(&ctx->ipa_lock);
+	if (!ctx->vinstr_cli) {
+		*err = attach_vinstr(ctx);
+		if (*err < 0)
+			goto err0;
+	}
+	*err = kbase_vinstr_hwc_dump(ctx->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL);
+	if (*err)
+		goto err0;
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		group = &ctx->groups[i];
+		power = add_saturate(power, group->calc_power(ctx, group));
+	}
+err0:
+	mutex_unlock(&ctx->ipa_lock);
+	return power;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_dynamic_power);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
new file mode 100755
index 0000000..e2234d1
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
@@ -0,0 +1,41 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+struct kbase_ipa_context;
+
+/**
+ * kbase_ipa_init - initialize the kbase ipa core
+ * @kbdev:      kbase device
+ *
+ * Return:      pointer to the IPA context or NULL on failure
+ */
+struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_term - terminate the kbase ipa core
+ * @ctx:        pointer to the IPA context
+ */
+void kbase_ipa_term(struct kbase_ipa_context *ctx);
+
+/**
+ * kbase_ipa_dynamic_power - calculate power
+ * @ctx:        pointer to the IPA context
+ * @err:        0 on success, negative on failure
+ *
+ * Return:      returns power consumption as mw @ 1GHz @ 1V
+ */
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
new file mode 100644
index 0000000..101abfe
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
@@ -0,0 +1,104 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#define NR_BYTES_PER_CNT  4
+#define NR_CNT_PER_BLOCK 64
+
+#define JM_BASE    (0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define TILER_BASE (1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define MMU_BASE   (2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define SC0_BASE   (3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+
+#define GPU_ACTIVE       (JM_BASE    + NR_BYTES_PER_CNT *  6)
+#define TILER_ACTIVE     (TILER_BASE + NR_BYTES_PER_CNT * 45)
+#define L2_ANY_LOOKUP    (MMU_BASE   + NR_BYTES_PER_CNT * 25)
+#define FRAG_ACTIVE      (SC0_BASE   + NR_BYTES_PER_CNT *  4)
+#define EXEC_CORE_ACTIVE (SC0_BASE   + NR_BYTES_PER_CNT * 26)
+#define EXEC_INSTR_COUNT (SC0_BASE   + NR_BYTES_PER_CNT * 28)
+#define TEX_COORD_ISSUE  (SC0_BASE   + NR_BYTES_PER_CNT * 40)
+#define VARY_SLOT_32     (SC0_BASE   + NR_BYTES_PER_CNT * 50)
+#define VARY_SLOT_16     (SC0_BASE   + NR_BYTES_PER_CNT * 51)
+#define BEATS_RD_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 56)
+#define BEATS_WR_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 61)
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+		struct ipa_group *group);
+
+static struct ipa_group ipa_groups_def[] = {
+	/* L2 */
+	{
+		.name = "group0",
+		.capacitance = 687,
+		.calc_power = calc_power_group0,
+	},
+	/* TILER */
+	{
+		.name = "group1",
+		.capacitance = 0,
+		.calc_power = calc_power_group1,
+	},
+	/* FRAG */
+	{
+		.name = "group2",
+		.capacitance = 23,
+		.calc_power = calc_power_group2,
+	},
+	/* VARY */
+	{
+		.name = "group3",
+		.capacitance = 108,
+		.calc_power = calc_power_group3,
+	},
+	/* TEX */
+	{
+		.name = "group4",
+		.capacitance = 128,
+		.calc_power = calc_power_group4,
+	},
+	/* EXEC INSTR */
+	{
+		.name = "group5",
+		.capacitance = 249,
+		.calc_power = calc_power_group5,
+	},
+	/* LSC */
+	{
+		.name = "group6",
+		.capacitance = 0,
+		.calc_power = calc_power_group6,
+	},
+	/* EXEC OVERHEAD */
+	{
+		.name = "group7",
+		.capacitance = 29,
+		.calc_power = calc_power_group7,
+	},
+};
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100755
index 0000000..3518609
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1898 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <mali_kbase_uku.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tlstream.h>
+
+#include "mali_kbase_dma_fence.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
+{
+#ifdef CONFIG_COMPAT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return compat_ptr(p->compat_value);
+#endif
+	return p->value;
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
+#ifdef CONFIG_KDS
+
+/* Add the katom to the kds waiting list.
+ * Atoms must be added to the waiting list after a successful call to kds_async_waitall.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	list_add_tail(&katom->node, &kctx->waiting_kds_resource);
+}
+
+/* Remove the katom from the kds waiting list.
+ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync.
+ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	list_del(&katom->node);
+}
+
+static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = (struct kbase_jd_atom *)callback_parameter;
+	KBASE_DEBUG_ASSERT(katom);
+
+	ctx = &katom->kctx->jctx;
+
+	/* If KDS resource has already been satisfied (e.g. due to zapping)
+	 * do nothing.
+	 */
+	mutex_lock(&ctx->lock);
+	if (!katom->kds_dep_satisfied) {
+		katom->kds_dep_satisfied = true;
+		kbase_jd_dep_clear_locked(katom);
+	}
+	mutex_unlock(&ctx->lock);
+}
+
+static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 *  there is a race between job completion and cancellation */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+#endif				/* CONFIG_KDS */
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_KDS
+	if (katom->kds_rset) {
+		struct kbase_jd_context *jctx = &katom->kctx->jctx;
+
+		/*
+		 * As the atom is no longer waiting, remove it from
+		 * the waiting list.
+		 */
+
+		mutex_lock(&jctx->lock);
+		kbase_jd_kds_waiters_remove(katom);
+		mutex_unlock(&jctx->lock);
+
+		/* Release the kds resource or cancel if zapping */
+		kds_resource_set_release_sync(&katom->kds_rset);
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_KDS
+	/* Prevent the KDS resource from triggering the atom in case of zapping */
+	if (katom->kds_rset)
+		katom->kds_dep_satisfied = true;
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_KDS
+	u32 kds_res_count = 0;
+	struct kds_resource **kds_resources = NULL;
+	unsigned long *kds_access_bitmap = NULL;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.dma_fence_resv_count = 0,
+	};
+#ifdef CONFIG_SYNC
+	/*
+	 * When both dma-buf fence and Android native sync is enabled, we
+	 * disable dma-buf fence for contexts that are using Android native
+	 * fences.
+	 */
+	const bool implicit_sync = !kbase_ctx_flag(katom->kctx,
+						   KCTX_NO_IMPLICIT_SYNC);
+#else /* CONFIG_SYNC */
+	const bool implicit_sync = true;
+#endif /* CONFIG_SYNC */
+#endif /* CONFIG_MALI_DMA_FENCE */
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, &user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+#ifdef CONFIG_KDS
+	/* assume we have to wait for all */
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (!kds_resources) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres),
+				    sizeof(unsigned long),
+				    GFP_KERNEL);
+	if (!kds_access_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		info.resv_objs = kmalloc_array(katom->nr_extres,
+					sizeof(struct reservation_object *),
+					GFP_KERNEL);
+		if (!info.resv_objs) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+
+		info.dma_fence_excl_bitmap =
+				kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					sizeof(unsigned long), GFP_KERNEL);
+		if (!info.dma_fence_excl_bitmap) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
+
+		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm
+#ifdef CONFIG_KDS
+				, &kds_res_count, kds_resources,
+				kds_access_bitmap, exclusive
+#endif
+				);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (implicit_sync &&
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct reservation_object *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock before we call into kds */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_KDS
+	if (kds_res_count) {
+		int wait_failed;
+
+		/* We have resources to wait for with kds */
+		katom->kds_dep_satisfied = false;
+
+		wait_failed = kds_async_waitall(&katom->kds_rset,
+				&katom->kctx->jctx.kds_cb, katom, NULL,
+				kds_res_count, kds_access_bitmap,
+				kds_resources);
+
+		if (wait_failed)
+			goto failed_kds_setup;
+		else
+			kbase_jd_kds_waiters_add(katom);
+	} else {
+		/* Nothing to wait for, so kds dep met */
+		katom->kds_dep_satisfied = true;
+	}
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		if (info.dma_fence_resv_count) {
+			int ret;
+
+			ret = kbase_dma_fence_wait(katom, &info);
+			if (ret < 0)
+				goto failed_dma_fence_setup;
+		}
+
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
+#ifdef CONFIG_KDS
+	/* If we are here, dma_fence setup failed but KDS didn't.
+	 * Revert KDS setup if any.
+	 */
+	if (kds_res_count) {
+		mutex_unlock(&katom->kctx->jctx.lock);
+		kds_resource_set_release_sync(&katom->kds_rset);
+		mutex_lock(&katom->kctx->jctx.lock);
+
+		kbase_jd_kds_waiters_remove(katom);
+		katom->kds_dep_satisfied = true;
+	}
+#endif /* CONFIG_KDS */
+#endif /* CONFIG_MALI_DMA_FENCE */
+#ifdef CONFIG_KDS
+failed_kds_setup:
+#endif
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_KDS
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d, bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		struct kbase_jd_atom *other_dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_KDS
+			if (!dep_atom->kds_dep_satisfied) {
+				/* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
+				 * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up
+				 */
+				dep_atom->kds_dep_satisfied = true;
+			}
+#endif
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		other_dep_atom = (struct kbase_jd_atom *)
+			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
+
+		if (!dep_atom->in_jd_list && (!other_dep_atom ||
+				(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
+				!dep_atom->will_fail_event_code &&
+				!other_dep_atom->will_fail_event_code))) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = atomic_read(&dep_atom->dma_fence.dep_count);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+#ifdef CONFIG_KDS
+			dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied;
+#endif
+
+			if (dep_satisfied) {
+				dep_atom->in_jd_list = true;
+				list_add_tail(&dep_atom->jd_item, out_list);
+			}
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/**
+ * is_dep_valid - Validate that a dependency is valid for early dependency
+ *                submission
+ * @katom: Dependency atom to validate
+ *
+ * A dependency is valid if any of the following are true :
+ * - It does not exist (a non-existent dependency does not block submission)
+ * - It is in the job scheduler
+ * - It has completed, does not have a failure event code, and has not been
+ *   marked to fail in the future
+ *
+ * Return: true if valid, false otherwise
+ */
+static bool is_dep_valid(struct kbase_jd_atom *katom)
+{
+	/* If there's no dependency then this is 'valid' from the perspective of
+	 * early dependency submission */
+	if (!katom)
+		return true;
+
+	/* Dependency must have reached the job scheduler */
+	if (katom->status < KBASE_JD_ATOM_STATE_IN_JS)
+		return false;
+
+	/* If dependency has completed and has failed or will fail then it is
+	 * not valid */
+	if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+			(katom->event_code != BASE_JD_EVENT_DONE ||
+			katom->will_fail_event_code))
+		return false;
+
+	return true;
+}
+
+static void jd_try_submitting_deps(struct list_head *out_list,
+		struct kbase_jd_atom *node)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *pos;
+
+		list_for_each(pos, &node->dep_head[i]) {
+			struct kbase_jd_atom *dep_atom = list_entry(pos,
+					struct kbase_jd_atom, dep_item[i]);
+
+			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
+				/*Check if atom deps look sane*/
+				bool dep0_valid = is_dep_valid(
+						dep_atom->dep[0].atom);
+				bool dep1_valid = is_dep_valid(
+						dep_atom->dep[1].atom);
+				bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+				int dep_count;
+
+				dep_count = atomic_read(
+						&dep_atom->dma_fence.dep_count);
+				if (likely(dep_count == -1)) {
+					dep_satisfied = true;
+				} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+					dep_satisfied = false;
+				}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#ifdef CONFIG_KDS
+				dep_satisfied = dep_satisfied &&
+						dep_atom->kds_dep_satisfied;
+#endif
+
+				if (dep0_valid && dep1_valid && dep_satisfied) {
+					dep_atom->in_jd_list = true;
+					list_add(&dep_atom->jd_item, out_list);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->jd_item, &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item);
+		list_del(completed_jobs.prev);
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+					kbase_ctx_flag(kctx, KCTX_DYING));
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(runnable_jobs.next);
+			node->in_jd_list = false;
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE) ==
+					BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait soft job
+					 * then remove it from the list of sync
+					 * waiters.
+					 */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						kbasep_remove_waiting_soft_job(node);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				list_add_tail(&node->jd_item, &completed_jobs);
+			} else if (node->status == KBASE_JD_ATOM_STATE_IN_JS &&
+					!node->will_fail_event_code) {
+				/* Node successfully submitted, try submitting
+				 * dependencies as they may now be representable
+				 * in JS */
+				jd_try_submitting_deps(&runnable_jobs, node);
+			}
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->jd_item, completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+	katom->start_timestamp.tv64 = 0;
+	katom->time_spent_us = 0;
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = user_atom->core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->pre_dep = NULL;
+	katom->post_dep = NULL;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
+
+	/* Implicitly sets katom->protected_state.enter as well. */
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+	katom->age = kctx->age_count++;
+
+	INIT_LIST_HEAD(&katom->jd_item);
+#ifdef CONFIG_KDS
+	/* Start by assuming that the KDS dependencies are satisfied,
+	 * kbase_jd_pre_external_resources will correct this if there are dependencies */
+	katom->kds_dep_satisfied = true;
+	katom->kds_rset = NULL;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	atomic_set(&katom->dma_fence.dep_count, -1);
+#endif
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				KBASE_TLSTREAM_TL_NEW_ATOM(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				KBASE_TLSTREAM_TL_RET_ATOM_CTX(
+						katom, kctx);
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+						TL_ATOM_STATE_IDLE);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom is going through soft replay or
+			 * will be sent back to user space. Do not record any
+			 * dependencies. */
+			KBASE_TLSTREAM_TL_NEW_ATOM(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+			KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+					TL_ATOM_STATE_IDLE);
+
+			if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	/* Create a new atom recording all dependencies it was set up with. */
+	KBASE_TLSTREAM_TL_NEW_ATOM(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority);
+	KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+	for (i = 0; i < 2; i++)
+		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+					&katom->dep[i])) {
+			KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(
+					(void *)kbase_jd_katom_dep_atom(
+						&katom->dep[i]),
+					(void *)katom);
+		} else if (BASE_JD_DEP_TYPE_INVALID !=
+				user_atom->pre_dep[i].dependency_type) {
+			/* Resolved dependency. */
+			int dep_atom_number =
+				user_atom->pre_dep[i].atom_id;
+			struct kbase_jd_atom *dep_atom =
+				&jctx->atoms[dep_atom_number];
+
+			KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(
+					(void *)dep_atom,
+					(void *)katom);
+		}
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue((u32)kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+#ifdef CONFIG_KDS
+	if (!katom->kds_dep_satisfied) {
+		/* Queue atom due to KDS dependency */
+		ret = false;
+		goto out;
+	}
+#endif				/* CONFIG_KDS */
+
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atomic_read(&katom->dma_fence.dep_count) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom, NULL);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom)
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	void __user *user_addr;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the jobs
+	 * are reported by immediately falling them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	if ((uk6_atom && submit_data->stride !=
+			sizeof(struct base_jd_atom_v2_uk6)) ||
+			submit_data->stride != sizeof(base_jd_atom_v2)) {
+#else
+	if (submit_data->stride != sizeof(base_jd_atom_v2)) {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	user_addr = get_compat_pointer(kctx, &submit_data->addr);
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < submit_data->nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		if (uk6_atom) {
+			struct base_jd_atom_v2_uk6 user_atom_v6;
+			base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA};
+
+			if (copy_from_user(&user_atom_v6, user_addr,
+					sizeof(user_atom_v6))) {
+				err = -EINVAL;
+				KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+					atomic_sub_return(
+					submit_data->nr_atoms - i,
+					&kctx->timeline.jd_atoms_in_flight));
+				break;
+			}
+			/* Convert from UK6 atom format to UK7 format */
+			user_atom.jc = user_atom_v6.jc;
+			user_atom.udata = user_atom_v6.udata;
+			user_atom.extres_list = user_atom_v6.extres_list;
+			user_atom.nr_extres = user_atom_v6.nr_extres;
+			user_atom.core_req = (u32)(user_atom_v6.core_req & 0x7fff);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[0])
+				dep_types[0] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[0],
+					user_atom_v6.pre_dep[0],
+					dep_types[0]);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[1])
+				dep_types[1] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[1],
+					user_atom_v6.pre_dep[1],
+					dep_types[1]);
+
+			user_atom.atom_number = user_atom_v6.atom_number;
+			user_atom.prio = user_atom_v6.prio;
+			user_atom.device_nr = user_atom_v6.device_nr;
+		} else {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		}
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+		if (KBASE_API_VERSION(10, 3) > kctx->api_version)
+			user_atom.core_req = (u32)(user_atom.compat_core_req
+					      & 0x7fff);
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+		user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		context_idle = false;
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * KCTX_ACTIVE will have been set after we marked it as
+		 * inactive, and another pm reference will have been taken, so
+		 * drop our reference. But do not call kbase_jm_idle_ctx(), as
+		 * the context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but KCTX_ACTIVE will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then KCTX_ACTIVE will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * hwaccess_lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
+		    !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * hwaccess_lock. This is not performed if
+			 * KCTX_ACTIVE is set as in that case another pm
+			 * reference has been taken and a fast-start would be
+			 * valid.
+			 */
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE))
+				kbase_jm_idle_ctx(kbdev, kctx);
+			context_idle = true;
+		} else {
+			kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+
+	if (context_idle)
+		kbase_pm_context_idle(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbdev->hwaccess_lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	del_timer_sync(&kctx->soft_job_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, queue);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_KDS
+
+	/* For each job waiting on a kds resource, cancel the wait and force the job to
+	 * complete early, this is done so that we don't leave jobs outstanding waiting
+	 * on kds resources which may never be released when contexts are zapped, resulting
+	 * in a hang.
+	 *
+	 * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held,
+	 * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job.
+	 */
+
+	list_for_each(entry, &kctx->waiting_kds_resource) {
+		katom = list_entry(entry, struct kbase_jd_atom, node);
+
+		kbase_cancel_kds_wait_job(katom);
+	}
+#endif
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+#ifdef CONFIG_KDS
+	int err;
+#endif				/* CONFIG_KDS */
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		kctx->jctx.atoms[i].dma_fence.context = fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+#ifdef CONFIG_KDS
+	err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear);
+	if (0 != err) {
+		mali_err = -EINVAL;
+		goto out2;
+	}
+#endif				/* CONFIG_KDS */
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+#ifdef CONFIG_KDS
+ out2:
+	destroy_workqueue(kctx->jctx.job_done_wq);
+#endif				/* CONFIG_KDS */
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+#ifdef CONFIG_KDS
+	kds_callback_term(&kctx->jctx.kds_cb);
+#endif				/* CONFIG_KDS */
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100755
index 0000000..6437e42
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/seq_file.h>
+
+#include <mali_kbase.h>
+
+#include <mali_kbase_jd_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		seq_printf(sfile,
+				"%i,%u,%u,%u,%u %u,%lli,%llu\n",
+				i, atom->core_req, atom->status, atom->coreref_state,
+				(unsigned)(atom->dep[0].atom ?
+						atom->dep[0].atom - atoms : 0),
+				(unsigned)(atom->dep[1].atom ?
+						atom->dep[1].atom - atoms : 0),
+				(signed long long)start_timestamp,
+				(unsigned long long)(atom->time_spent_us ?
+					atom->time_spent_us * 1000 : start_timestamp)
+				);
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100755
index 0000000..090f816
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+
+#define MALI_JD_DEBUGFS_VERSION 1
+
+/**
+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100755
index 0000000..0c5c6a6
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx;
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->hwaccess.active_kctx == kctx)
+		kbdev->hwaccess.active_kctx = NULL;
+}
+
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		return kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+		return NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100755
index 0000000..a74ee24
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the hwaccess_lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+			struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100755
index 0000000..c25aaa6
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,2898 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return refcnt;
+}
+
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the hwaccess_lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		int new_refcnt;
+
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL);
+
+		new_refcnt = ++(js_per_as_data->as_busy_refcount);
+
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, new_refcnt);
+		result = true;
+	}
+
+	return result;
+}
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	return RB_EMPTY_ROOT(&rb->runnable_tree);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a queue and invoke @callback for each entry in the queue, and
+ * remove the entry from the queue.
+ *
+ * If entries are added to the queue while this is running those entries may, or
+ * may not be covered. To ensure that all entries in the buffer have been
+ * enumerated when this function returns jsctx->lock must be held when calling
+ * this function.
+ *
+ * The HW access lock must always be held when calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
+		struct rb_node *node = rb_first(&queue->runnable_tree);
+		struct kbase_jd_atom *entry = rb_entry(node,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		rb_erase(node, &queue->runnable_tree);
+		callback(kctx->kbdev, entry);
+	}
+
+	while (!list_empty(&queue->x_dep_head)) {
+		struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
+				struct kbase_jd_atom, queue);
+
+		list_del(queue->x_dep_head.next);
+
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	struct rb_node *node;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	node = rb_first(&rb->runnable_tree);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
+}
+
+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
+
+static void
+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (*new) {
+		struct kbase_jd_atom *entry = container_of(*new,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		parent = *new;
+		if (LESS_THAN_WRAP(katom->age, entry->age))
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&katom->runnable_tree_node, parent, new);
+	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_tree_add(kctx, katom);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int i;
+	u16 as_present;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* All ASs initially free */
+	jsdd->as_free = as_present;
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
+		atomic_read(&jsdd->soft_job_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	/* setup the number of irq throttle cycles base on given time */
+	{
+		int time_us = kbdev->gpu_props.irq_throttle_time_us;
+		int cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev,
+				time_us);
+
+		atomic_set(&kbdev->irq_throttle_cycles, cycles);
+	}
+
+	/* Clear the AS data, including setting NULL pointers */
+	memset(&jsdd->runpool_irq.per_as_data[0], 0,
+			sizeof(jsdd->runpool_irq.per_as_data));
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&kbdev->hwaccess_lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
+		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+	}
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	/* The caller must de-register all contexts before calling this
+	 */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+	KBASE_DEBUG_ASSERT(memcmp(
+	        js_devdata->runpool_irq.ctx_attr_ref_count,
+	        zero_ctx_attr_ref_count,
+	        sizeof(zero_ctx_attr_ref_count)) == 0);
+	CSTD_UNUSED(zero_ctx_attr_ref_count);
+}
+
+int kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+	kbase_ctx_flag_clear(kctx, KCTX_DYING);
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
+			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int js;
+	bool update_ctx_count = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* The caller must de-register all jobs before calling this */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_nolock - Variant of
+ *                                         kbase_jd_ctx_list_add_pullable()
+ *                                         where the caller must hold
+ *                                         hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
+ *                                              kbase_js_ctx_list_add_pullable_head()
+ *                                              where the caller must hold
+ *                                              hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head_nolock(
+		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the
+ *                                           per-slot unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+				&kbdev->js_data.ctx_list_unpullable[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable
+ *                                   or unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
+ *                                     where the caller must hold
+ *                                     hwaccess_lock
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
+		return NULL;
+
+	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
+					struct kbase_context,
+					jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+		struct kbase_device *kbdev, int js)
+{
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return false;
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * same-slot dependency */
+				if (dep_atom->post_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+						(js == dep_js)) {
+					katom->pre_dep = dep_atom;
+					dep_atom->post_dep = katom;
+				}
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Setup any scheduling information */
+	kbasep_js_clear_job_retry_submit(atom);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY);
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt_nolock(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context (e.g. KDS
+			 * dependency resolved). Kill that job by killing the
+			 * context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Queue */
+			KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	struct kbasep_js_device_data *js_devdata;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0,
+	   kbasep_js_trace_get_refcnt(kbdev, kctx)); */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_release_result - Try running more jobs after releasing a context
+ *                            and/or atom
+ *
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst hwaccess_lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (js_devdata->nr_user_contexts_running != 0) {
+		bool retry_submit = false;
+		int retry_jobslot = 0;
+
+		if (katom_retained_state)
+			retry_submit = kbasep_js_get_atom_retry_submit_slot(
+					katom_retained_state, &retry_jobslot);
+
+		if (runpool_ctx_attr_change || retry_submit) {
+			/* A change in runpool ctx attributes might mean we can
+			 * run more jobs than before  */
+			result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+			KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+						kctx, NULL, 0u, retry_jobslot);
+		}
+	}
+	return result;
+}
+
+/*
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state
+ * change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	struct kbase_as *current_as;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* kctx->as_nr and js_per_as_data are only read from here. The caller's
+	 * js_ctx_mutex provides a barrier that ensures they are up-to-date.
+	 *
+	 * They will not change whilst we're reading them, because the refcount
+	 * is non-zero (and we ASSERT on that last fact).
+	 */
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr];
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	current_as = &kbdev->as[kctx_as_nr];
+	mutex_lock(&kbdev->pm.lock);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/* Update refcount */
+	new_ref_count = --(js_per_as_data->as_busy_refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 1 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out */
+	if (new_ref_count == 0 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+		KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		if (kbdev->hwaccess.active_kctx == kctx)
+			kbdev->hwaccess.active_kctx = NULL;
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after the KCTX_SHEDULED flag is changed, otherwise we
+		 * double-decount the attributes
+		 */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED |
+			KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_backend_timeouts_changed(kbdev);
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_as *new_address_space = NULL;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	as_nr = kbase_backend_find_free_address_space(kbdev, kctx);
+
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	new_address_space = &kbdev->as[as_nr];
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Roll back the transaction so far and return */
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	kbase_ctx_flag_set(kctx, KCTX_SCHEDULED);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* Roll back the transaction so far and return */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+	KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the hwaccess_lock locking. If a suspend occurs *after* that lock
+	 * was taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Synchronize with any timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbase_backend_use_ctx_sched(kbdev, kctx)) {
+		/* Context already has ASID - mark as active */
+		kbdev->hwaccess.active_kctx = kctx;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return true; /* Context already scheduled */
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kbasep_js_schedule_ctx(kbdev, kctx);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED);
+
+	is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED);
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			   kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		retained = retained << 1;
+
+		if (kctx) {
+			++(js_per_as_data->as_busy_refcount);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+				++nr_privileged_ctx;
+				WARN_ON(nr_privileged_ctx != 1);
+			}
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_context *kctx, *n;
+
+		list_for_each_entry_safe(kctx, n,
+				&kbdev->js_data.ctx_list_unpullable[js],
+				jctx.sched_info.ctx.ctx_list_entry[js]) {
+			struct kbasep_js_kctx_info *js_kctx_info;
+			unsigned long flags;
+			bool timer_sync = false;
+
+			js_kctx_info = &kctx->jctx.sched_info;
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+				kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync =
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kbdev);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
+			(katom->pre_dep && (katom->pre_dep->atom_flags &
+			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		int prio = katom->sched_priority;
+		int js = katom->slot_nr;
+		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+		list_add_tail(&katom->queue, &queue->x_dep_head);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+		enqueue_required = false;
+	} else {
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		/* Add atom to ring buffer. */
+		jsctx_tree_add(kctx, katom);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+	}
+
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the
+ *                         runnable_tree, ready for execution
+ * @katom: Atom to submit
+ *
+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set,
+ * but is still present in the x_dep list. If @katom has a same-slot dependent
+ * atom then that atom (and any dependents) will also be moved.
+ */
+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
+
+	while (katom) {
+		WARN_ON(!(katom->atom_flags &
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			list_del(&katom->queue);
+			katom->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+		} else {
+			break;
+		}
+
+		katom = katom->post_dep;
+	}
+}
+
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = katom->post_dep;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (next_katom) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE |
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
+			kbase_js_move_to_tree(x_dep);
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	js_devdata = &kctx->kbdev->js_data;
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kctx->kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return NULL;
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kctx->kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return NULL;
+	}
+
+	kbase_ctx_flag_set(kctx, KCTX_PULLED);
+
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+		kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+		atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++;
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kctx->kbdev, kctx);
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	int prio = katom->sched_priority;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--;
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
+
+	/* If this slot has been blocked due to soft-stopped atoms, and all
+	 * atoms have now been processed, then unblock the slot */
+	if (!kctx->atoms_pulled_slot_pri[js][prio] &&
+			kctx->blocked_js[js][prio]) {
+		kctx->blocked_js[js][prio] = false;
+
+		/* Only mark the slot as pullable if the context is not idle -
+		 * that case is handled below */
+		if (atomic_read(&kctx->atoms_pulled) &&
+				kbase_js_ctx_pullable(kctx, js, true))
+			timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, js);
+	}
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!kbase_ctx_flag(kctx, KCTX_DYING)) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+	int prio = katom->sched_priority;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+		kctx->atoms_pulled_slot_pri[atom_slot][prio]--;
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		/* If this slot has been blocked due to soft-stopped atoms, and
+		 * all atoms have now been processed, then unblock the slot */
+		if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
+				&& kctx->blocked_js[atom_slot][prio]) {
+			kctx->blocked_js[atom_slot][prio] = false;
+			if (kbase_js_ctx_pullable(kctx, atom_slot, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+						kbdev, kctx, atom_slot);
+		}
+	}
+	WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot)) {
+		if (!list_empty(
+			&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
+			timer_sync |= kbase_js_ctx_list_remove_nolock(
+					kctx->kbdev, kctx, atom_slot);
+	}
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp)
+{
+	u64 microseconds_spent = 0;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+
+	kbdev = kctx->kbdev;
+
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+
+	/* Calculate the job's time used */
+	if (end_timestamp != NULL) {
+		/* Only calculating it for jobs that really run on the HW (e.g.
+		 * removed from next jobs never actually ran, so really did take
+		 * zero time) */
+		ktime_t tick_diff = ktime_sub(*end_timestamp,
+							katom->start_timestamp);
+
+		microseconds_spent = ktime_to_ns(tick_diff);
+
+		do_div(microseconds_spent, 1000);
+
+		/* Round up time spent to the minimum timer resolution */
+		if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US)
+			microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US;
+	}
+
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+			(x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false);
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+		kbase_js_move_to_tree(x_dep);
+		if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false))
+			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+					x_dep->slot_nr);
+
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)
+			return x_dep;
+	}
+
+	return NULL;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool timer_sync = false;
+
+	js_devdata = &kbdev->js_data;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	while (js_mask) {
+		int js;
+
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				if (kbase_js_ctx_pullable(kctx, js, false)
+				    || kbase_ctx_flag(kctx, KCTX_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_ctx_flag_clear(kctx, KCTX_PULLED);
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
+				/* Failed to pull jobs - push to head of list */
+				if (kbase_js_ctx_pullable(kctx, js, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+								kctx->kbdev,
+								kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+								kctx->kbdev,
+								kctx, js);
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the queue */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_set(kctx, KCTX_DYING);
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100755
index 0000000..76172f7
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,1051 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_context.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase.
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold hwaccess_lock (as this will be obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the hwaccess_lock, (as this will be obtained
+ *   internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - hwaccess_lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *   If the hwaccess_lock is already held, then the caller should use
+ *   kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * kbasep_js_runpool_lookup_ctx_nolock - Lookup a context in the Run Pool based
+ *         upon its current address space and ensure that is stays scheduled in.
+ * @kbdev: Device pointer
+ * @as_nr: Address space to lookup
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * Note: This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must the hold the hwaccess_lock
+ *
+ * Return: a valid struct kbase_context on success, which has been refcounted as
+ *         being busy.
+ *         NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - hwaccess_lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time.
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+	atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+	KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot ==
+					KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID)
+				|| (atom->retry_submit_on_slot == js));
+
+	atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+	retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+	int js = katom_retained_state->retry_submit_on_slot;
+
+	*res = js;
+	return (bool) (js >= 0);
+}
+
+#if KBASE_DEBUG_DISABLE_ASSERTS == 0
+/**
+ * Debug Check the refcount of a context. Only use within ASSERTs
+ *
+ * Obtains hwaccess_lock
+ *
+ * @return negative value if the context is not scheduled in
+ * @return current refcount of the context if it is scheduled in. The refcount
+ * is not guarenteed to be kept constant.
+ */
+static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int result = -1;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID)
+		result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return result;
+}
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS == 0 */
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guarenteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guarenteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+	KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: when you need the number of cycles to guarantee you won't wait for
+ * longer than 'us' time (you might have a shorter wait).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need the number of cycles to guarantee you'll wait at least
+ * 'us' amount of time (but you might wait longer).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (u32) (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the worst-case wait that 'ticks' cycles will
+ * take (you guarantee that you won't wait any longer than this, but it may
+ * be shorter).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the best-case wait for 'tick' cycles (you
+ * guarantee to be waiting for at least this long, but it may be longer).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100755
index 0000000..321506a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,301 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->hwaccess_lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100755
index 0000000..ce91833
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100755
index 0000000..119344c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,423 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief the IRQ_THROTTLE time in microseconds
+ *
+ * This will be converted via the GPU's clock frequency into a cycle-count.
+ *
+ * @note we can make an estimate of the GPU's frequency by periodically
+ * sampling its CYCLE_COUNT register
+ */
+#define KBASE_JS_IRQ_THROTTLE_TIME_US 20
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * Data used by the scheduler that is unique for each Address Space.
+ *
+ * This is used in IRQ context and hwaccess_lock must be held whilst accessing
+ * this data (inculding reads and atomic decisions based on the read).
+ */
+struct kbasep_js_per_as_data {
+	/**
+	 * Ref count of whether this AS is busy, and must not be scheduled out
+	 *
+	 * When jobs are running this is always positive. However, it can still be
+	 * positive when no jobs are running. If all you need is a heuristic to
+	 * tell you whether jobs might be running, this should be sufficient.
+	 */
+	int as_busy_refcount;
+
+	/** Pointer to the current context on this address space, or NULL for no context */
+	struct kbase_context *kctx;
+};
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/* Sub-structure to collect together Job Scheduling data used in IRQ
+	 * context. The hwaccess_lock must be held when accessing. */
+	struct runpool_irq {
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
+		 *
+		 * It is placed here because it's much more memory efficient than having a u8 in
+		 * struct kbasep_js_per_as_data to store this flag  */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/** Data that is unique for each AS */
+		struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+
+	u16 as_free;				/**< Bitpattern of free Address Spaces */
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+
+	/**< Value for JS_SOFT_JOB_TIMEOUT */
+	atomic_t soft_job_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/**
+		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/** Job Slot to retry submitting to if submission from IRQ handler failed */
+	int retry_submit_on_slot;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100755
index 0000000..6d1e61f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100755
index 0000000..d76d16a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,2501 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+/**
+ * @brief Check the zone compatibility of two regions.
+ */
+static int kbase_region_tracker_match_zone(struct kbase_va_region *reg1,
+		struct kbase_va_region *reg2)
+{
+	return ((reg1->flags & KBASE_REG_ZONE_MASK) ==
+		(reg2->flags & KBASE_REG_ZONE_MASK));
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_match_zone);
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx, struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = &(kctx->reg_rbtree.rb_node);
+	struct rb_node *parent = NULL;
+
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+	rb_insert_color(&(new_reg->rblink), &(kctx->reg_rbtree));
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+	rbnode = rb_first(&(kctx->reg_rbtree));
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE) &&
+				kbase_region_tracker_match_zone(reg, reg_reqs)) {
+			/* Check alignment */
+			u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+				return reg;
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if ((prev->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(prev, reg)) {
+			/* We're compatible with the previous VMA, merge with it */
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if ((next->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(next, reg)) {
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), &(kctx->reg_rbtree));
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	int err = 0;
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), &(kctx->reg_rbtree));
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		if ((!kbase_region_tracker_match_zone(tmp, reg)) ||
+				(!(tmp->flags & KBASE_REG_FREE))) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.  */
+	{
+		u64 start_pfn;
+
+		/*
+		 * Depending on the zone the allocation request is for
+		 * we might need to retry it.
+		 */
+		do {
+			tmp = kbase_region_tracker_find_region_meeting_reqs(
+					kctx, reg, nr_pages, align);
+			if (tmp) {
+				start_pfn = (tmp->start_pfn + align - 1) &
+						~(align - 1);
+				err = kbase_insert_va_region_nolock(kctx, reg,
+						tmp, start_pfn, nr_pages);
+				break;
+			}
+
+			/*
+			 * If the allocation is not from the same zone as JIT
+			 * then don't retry, we're out of VA and there is
+			 * nothing which can be done about it.
+			 */
+			if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+					KBASE_REG_ZONE_CUSTOM_VA)
+				break;
+		} while (kbase_jit_evict(kctx));
+
+		if (!tmp)
+			err = -ENOMEM;
+	}
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *exec_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* exec and custom_va_reg doesn't always exist */
+	if (exec_reg && custom_va_reg) {
+		kbase_region_tracker_insert(kctx, exec_reg);
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+	}
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(&(kctx->reg_rbtree));
+		if (rbnode) {
+			rb_erase(rbnode, &(kctx->reg_rbtree));
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		same_va_bits = 32;
+	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+#endif
+
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
+		err = -EINVAL;
+		goto fail_unlock;
+	}
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have exec and custom VA zones */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_exec;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_exec:
+	kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
+{
+#ifdef CONFIG_64BIT
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits;
+	u64 total_va_size;
+	int err;
+
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return 0;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	if (same_va->nr_pages < jit_va_pages ||
+			kctx->same_va_end < jit_va_pages) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(kctx,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(kctx, custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	return kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL);
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_term(&kbdev->mem_pool);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(reg->kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					page_to_phys(kctx->aliasing_sink_page),
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct vm_area_struct *vma;
+	struct kbase_cpu_mapping *map;
+
+	lockdep_assert_held(&current->mm->mmap_sem);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	vma = find_vma_intersection(current->mm, uaddr, uaddr+size);
+
+	if (!vma || vma->vm_start > uaddr)
+		return NULL;
+	if (vma->vm_ops != &kbase_vm_ops)
+		/* Not ours! */
+		return NULL;
+
+	map = vma->vm_private_data;
+
+	if (map->kctx != kctx)
+		/* Not from this context! */
+		return NULL;
+
+	*offset = (uaddr - vma->vm_start) +
+		((vma->vm_pgoff - map->region->start_pfn)<<PAGE_SHIFT);
+
+	return map;
+}
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct kbase_cpu_mapping *map;
+
+	kbase_os_mem_map_lock(kctx);
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset);
+
+	kbase_os_mem_map_unlock(kctx);
+
+	if (!map)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		phys_addr_t cpu_pa, phys_addr_t gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct base_syncset *set, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct basep_syncset *sset = &set->basep_sset;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	phys_addr_t *cpu_pa;
+	phys_addr_t *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	u64 offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	page_off = offset >> PAGE_SHIFT;
+	offset &= ~PAGE_MASK;
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	if (page_off > reg->nr_pages ||
+			page_off + page_count > reg->nr_pages) {
+		/* Sync overflows the region */
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Sync first page */
+	if (cpu_pa[page_off]) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!cpu_pa[page_off + i])
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 && cpu_pa[page_off + page_count - 1]) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset)
+{
+	int err = -EINVAL;
+	struct basep_syncset *sset;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != syncset);
+
+	sset = &syncset->basep_sset;
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+void kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (flags & BASE_MEM_COHERENT_SYSTEM ||
+			flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	else if (flags & BASE_MEM_COHERENT_LOCAL)
+		reg->flags |= KBASE_REG_SHARE_IN;
+
+	/* Set up default MEMATTR usage */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_update_region_flags);
+
+int kbase_alloc_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t old_page_count = alloc->nents;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+
+	if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool,
+			nr_pages_requested, alloc->pages + old_page_count) != 0)
+		goto no_alloc;
+
+	/*
+	 * Request a zone cache update, this scans only the new pages an
+	 * appends their information to the zone cache. if the update
+	 * fails then clear the cache so we fall-back to doing things
+	 * page by page.
+	 */
+	if (kbase_zone_cache_update(alloc, old_page_count) != 0)
+		kbase_zone_cache_clear(alloc);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			(u32)alloc->imported.kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+no_alloc:
+	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	return -ENOMEM;
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	phys_addr_t *start_free;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/*
+	 * Clear the zone cache, we don't expect JIT allocations to be
+	 * shrunk in parts so there is no point trying to optimize for that
+	 * by scanning for the changes caused by freeing this memory and
+	 * updating the existing cache entries.
+	 */
+	kbase_zone_cache_clear(alloc);
+
+	kbase_mem_pool_free_pages(&kctx->mem_pool,
+				  nr_pages_to_free,
+				  start_free,
+				  syncback,
+				  reclaimed);
+
+	alloc->nents -= nr_pages_to_free;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, nr_pages_to_free);
+		new_page_count = kbase_atomic_sub_pages(nr_pages_to_free,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(nr_pages_to_free,
+				       &kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				(u32)kctx->id,
+				(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		WARN_ON(!alloc->imported.kctx);
+		/*
+		 * The physical allocation must have been removed from the
+		 * eviction list before trying to free it.
+		 */
+		WARN_ON(!list_empty(&alloc->evict_node));
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+ #ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ump_dd_release(alloc->imported.ump_handle);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
+		kfree(alloc->imported.user_buf.pages);
+		break;
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+#ifdef CONFIG_DEBUG_FS
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_init(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_evict_lock);
+		if (list_empty(&kctx->jit_destroy_head)) {
+			mutex_unlock(&kctx->jit_evict_lock);
+			break;
+		}
+
+		reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		list_del(&reg->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+		kbase_mem_free_region(kctx, reg);
+		kbase_gpu_vm_unlock(kctx);
+	} while (1);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	return 0;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+	struct kbase_va_region *walker;
+	struct kbase_va_region *temp;
+	size_t current_diff = SIZE_MAX;
+
+	int ret;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+
+		if (walker->nr_pages >= info->va_pages) {
+			size_t min_size, max_size, diff;
+
+			/*
+			 * The JIT allocations VA requirements have been
+			 * meet, it's suitable but other allocations
+			 * might be a better fit.
+			 */
+			min_size = min_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			max_size = max_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			diff = max_size - min_size;
+
+			if (current_diff > diff) {
+				current_diff = diff;
+				reg = walker;
+			}
+
+			/* The allocation is an exact match, stop looking */
+			if (current_diff == 0)
+				break;
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_move(&reg->jit_node, &kctx->jit_active_head);
+
+		/*
+		 * Remove the allocation from the eviction list as it's no
+		 * longer eligible for eviction. This must be done before
+		 * dropping the jit_evict_lock
+		 */
+		list_del_init(&reg->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+
+		/* Make the physical backing no longer reclaimable */
+		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+			goto update_failed;
+
+		/* Grow the backing if required */
+		if (reg->gpu_alloc->nents < info->commit_pages) {
+			size_t delta;
+			size_t old_size = reg->gpu_alloc->nents;
+
+			/* Allocate some more pages */
+			delta = info->commit_pages - reg->gpu_alloc->nents;
+			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
+					!= 0)
+				goto update_failed;
+
+			if (reg->cpu_alloc != reg->gpu_alloc) {
+				if (kbase_alloc_phy_pages_helper(
+						reg->cpu_alloc, delta) != 0) {
+					kbase_free_phy_pages_helper(
+							reg->gpu_alloc, delta);
+					goto update_failed;
+				}
+			}
+
+			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
+					info->commit_pages, old_size);
+			/*
+			 * The grow failed so put the allocation back in the
+			 * pool and return failure.
+			 */
+			if (ret)
+				goto update_failed;
+		}
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL;
+		u64 gpu_addr;
+
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr);
+		if (!reg)
+			goto out_unlocked;
+
+		mutex_lock(&kctx->jit_evict_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_evict_lock);
+	}
+
+	return reg;
+
+update_failed:
+	/*
+	 * An update to an allocation from the pool failed, chances
+	 * are slim a new allocation would fair any better so return
+	 * the allocation to the pool and return the function with failure.
+	 */
+	kbase_gpu_vm_unlock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	/* The physical backing of memory in the pool is always reclaimable */
+	down_read(&kctx->process_mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+	kbase_mem_evictable_make(reg->gpu_alloc);
+	kbase_gpu_vm_unlock(kctx);
+	up_read(&kctx->process_mm->mmap_sem);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = reg->kctx;
+
+	lockdep_assert_held(&kctx->jit_evict_lock);
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	list_move(&reg->jit_node, &kctx->jit_destroy_head);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_evict_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	if (reg)
+		kbase_mem_free_region(kctx, reg);
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+
+	kbase_gpu_vm_lock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	phys_addr_t *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct mm_struct *mm;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	mm = alloc->imported.user_buf.mm;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#else
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#endif
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = page_to_phys(pages[i]);
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+			kbase_reg_current_backed_size(reg),
+			reg->flags);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	phys_addr_t *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		int j;
+		size_t pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = count;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			kbase_reg_current_backed_size(reg),
+			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+	if (err) {
+		dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+				alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+		alloc->imported.umm.sgt = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \
+		|| defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res,
+		struct kds_resource **kds_resources, u32 *kds_res_count,
+		unsigned long *kds_access_bitmap, bool exclusive)
+{
+	u32 i;
+
+	for (i = 0; i < *kds_res_count; i++) {
+		/* Duplicate resource, ignore */
+		if (kds_resources[i] == kds_res)
+			return;
+	}
+
+	kds_resources[*kds_res_count] = kds_res;
+	if (exclusive)
+		set_bit(*kds_res_count, kds_access_bitmap);
+	(*kds_res_count)++;
+}
+#endif
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+	case KBASE_MEM_TYPE_IMPORTED_UMP: {
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = ump_dd_kds_resource_get(
+					reg->gpu_alloc->imported.ump_handle);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif				/*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+		break;
+	}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = get_dma_buf_kds_resource(
+					reg->gpu_alloc->imported.umm.dma_buf);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL
+#ifdef CONFIG_KDS
+				, NULL, NULL,
+				NULL, false
+#endif
+				);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100755
index 0000000..5137f0c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1066 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	int      count;
+	int      free_on_close;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMP,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	phys_addr_t           *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	struct list_head       zone_cache;
+
+	/* member in union valid based on @a type */
+	union {
+#ifdef CONFIG_UMP
+		ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+		struct {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			unsigned int current_mapping_usage_count;
+			struct mm_struct *mm;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* VA managed by us */
+#define KBASE_REG_CUSTOM_VA         (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+/*
+ * Dedicated 16MB region for shader code:
+ * VA range 0x101000000-0x102000000
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_EXEC_BASE    (0x101000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* non-NULL if this memory object is a kds_resource */
+	struct kds_resource *kds_res;
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+};
+
+/* Common functions */
+static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+	INIT_LIST_HEAD(&alloc->zone_cache);
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+	reg->cpu_alloc->imported.kctx = kctx;
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
+	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		reg->gpu_alloc->imported.kctx = kctx;
+		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	INIT_LIST_HEAD(&reg->jit_node);
+	reg->flags &= ~KBASE_REG_FREE;
+	return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @max_size:  Maximum number of free pages the pool can hold
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Return NULL if no memory in the pool
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return ACCESS_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_grow - Grow the pool
+ * @pool:       Memory pool to grow
+ * @nr_to_grow: Number of pages to add to the pool
+ *
+ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
+ * become larger than the maximum size specified.
+ *
+ * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
+ */
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above the max_size for the pool.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ */
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+/*
+ * kbase_mem_alloc_page - Allocate a new page for a device
+ * @kbdev: The kbase device
+ *
+ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that
+ * function can fail in the event the pool is empty.
+ *
+ * Return: A new page or NULL if no memory
+ */
+struct page *kbase_mem_alloc_page(struct kbase_device *kbdev);
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+void kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
+ * @kctx:	Kbase context
+ *
+ * Disable and perform the required cache maintenance to remove the all
+ * data from provided kbase context from the GPU caches.
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified
+ * address space.
+ * @kbdev:	Kbase device
+ * @as_nr:	The address space number to set to unmapped.
+ *
+ * This function must only be called during reset/power-up and it used to
+ * ensure the registers are in a known state.
+ *
+ * The caller must hold kbdev->mmu_hw_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset);
+void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa,
+		phys_addr_t gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU
+ * mapping of a memory allocation containing a given address range
+ *
+ * Searches for a CPU mapping of any part of any region that fully encloses the
+ * CPU virtual address range specified by @uaddr and @size. Returns a failure
+ * indication if only part of the address range lies within a CPU mapping.
+ *
+ * @kctx:      The kernel base context used for the allocation.
+ * @uaddr:     Start of the CPU virtual address range.
+ * @size:      Size of the CPU virtual address range (in bytes).
+ * @offset:    The offset from the start of the allocation to the specified CPU
+ *             virtual address.
+ *
+ * Return: 0 if offset was obtained successfully. Error code otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_init(struct kbase_context *kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ * @kds_res_count:     The number of KDS resources.
+ * @kds_resources:     Array of KDS resources.
+ * @kds_access_bitmap: Access bitmap for KDS.
+ * @exclusive:         If the KDS resource requires exclusive access.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_zone_cache_update - Update the memory zone cache after new pages have
+ * been added.
+ * @alloc:        The physical memory allocation to build the cache for.
+ * @start_offset: Offset to where the new pages start.
+ *
+ * Updates an existing memory zone cache, updating the counters for the
+ * various zones.
+ * If the memory allocation doesn't already have a zone cache assume that
+ * one isn't created and thus don't do anything.
+ *
+ * Return: Zero cache was updated, negative error code on error.
+ */
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset);
+
+/**
+ * kbase_zone_cache_build - Build the memory zone cache.
+ * @alloc:        The physical memory allocation to build the cache for.
+ *
+ * Create a new zone cache for the provided physical memory allocation if
+ * one doesn't already exist, if one does exist then just return.
+ *
+ * Return: Zero if the zone cache was created, negative error code on error.
+ */
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_zone_cache_clear - Clear the memory zone cache.
+ * @alloc:        The physical memory allocation to clear the cache on.
+ */
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc);
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100755
index 0000000..e3787c7
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,2643 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+#include <linux/cache.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ *
+ * Note: Caller must be holding the processes mmap_sem lock.
+ */
+static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va)
+{
+	int zone;
+	int gpu_pc_bits;
+	int cpu_va_bits;
+	struct kbase_va_region *reg;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+
+	dev = kctx->kbdev->dev;
+	*gpu_va = 0; /* return 0 on failure */
+
+	gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	cpu_va_bits = BITS_PER_LONG;
+
+	if (0 == va_pages) {
+		dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+		goto bad_size;
+	}
+
+	if (va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+#if defined(CONFIG_64BIT)
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		cpu_va_bits = 32;
+#endif
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+	    (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+		goto bad_ex_size;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	kbase_update_region_flags(kctx, reg, *flags);
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & BASE_MEM_GROW_ON_GPF)
+		reg->extent = extent;
+	else
+		reg->extent = 0;
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/*
+		 * 10.1-10.4 UKU userland relies on the kernel to call mmap.
+		 * For all other versions we can just return the cookie
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1) ||
+		    kctx->api_version > KBASE_API_VERSION(10, 4)) {
+			*gpu_va = (u64) cookie;
+			return reg;
+		}
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kbase_gpu_vm_lock(kctx);
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+
+		*gpu_va = (u64) cpu_addr;
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_mem:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->jit_evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->jit_evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+struct kbase_mem_zone_cache_entry {
+	/* List head used to link the cache entry to the memory allocation. */
+	struct list_head zone_node;
+	/* The zone the cacheline is for. */
+	struct zone *zone;
+	/* The number of pages in the allocation which belong to this zone. */
+	u64 count;
+};
+
+static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	struct kbase_mem_zone_cache_entry *cache = NULL;
+	size_t i;
+	int ret = 0;
+
+	for (i = start_offset; i < alloc->nents; i++) {
+		struct page *p = phys_to_page(alloc->pages[i]);
+		struct zone *zone = page_zone(p);
+		bool create = true;
+
+		if (cache && (cache->zone == zone)) {
+			/*
+			 * Fast path check as most of the time adjacent
+			 * pages come from the same zone.
+			 */
+			create = false;
+		} else {
+			/*
+			 * Slow path check, walk all the cache entries to see
+			 * if we already know about this zone.
+			 */
+			list_for_each_entry(cache, &alloc->zone_cache, zone_node) {
+				if (cache->zone == zone) {
+					create = false;
+					break;
+				}
+			}
+		}
+
+		/* This zone wasn't found in the cache, create an entry for it */
+		if (create) {
+			cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+			if (!cache) {
+				ret = -ENOMEM;
+				goto bail;
+			}
+			cache->zone = zone;
+			cache->count = 0;
+			list_add(&cache->zone_node, &alloc->zone_cache);
+		}
+
+		cache->count++;
+	}
+	return 0;
+
+bail:
+	return ret;
+}
+
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	/*
+	 * Bail if the zone cache is empty, only update the cache if it
+	 * existed in the first place.
+	 */
+	if (list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, start_offset);
+}
+
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc)
+{
+	/* Bail if the zone cache already exists */
+	if (!list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, 0);
+}
+
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_mem_zone_cache_entry *walker;
+
+	while(!list_empty(&alloc->zone_cache)){
+		walker = list_first_entry(&alloc->zone_cache,
+				struct kbase_mem_zone_cache_entry, zone_node);
+		list_del(&walker->zone_node);
+		kfree(walker);
+	}
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 * then remove it from the reclaimable accounting. */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(-zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* This alloction can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	mutex_lock(&kctx->jit_evict_lock);
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	list_del_init(&gpu_alloc->evict_node);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+	switch (reg->gpu_alloc->type) {
+#ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	/* roll back on error, i.e. not UMP */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	ump_dd_handle umph;
+	u64 block_count;
+	const ump_dd_physical_block_64 *block_array;
+	u64 i, j;
+	int page = 0;
+	ump_alloc_flags ump_flags;
+	ump_alloc_flags cpu_flags;
+	ump_alloc_flags gpu_flags;
+
+	if (*flags & BASE_MEM_SECURE)
+		goto bad_flags;
+
+	umph = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+		goto bad_id;
+
+	ump_flags = ump_dd_allocation_flags_get(umph);
+	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+			UMP_DEVICE_MASK;
+
+	*va_pages = ump_dd_size_get_64(umph);
+	*va_pages >>= PAGE_SHIFT;
+
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (*flags & BASE_MEM_SAME_VA)
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	else
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!reg)
+		goto no_region;
+
+	/* we've got pages to map now, and support SAME_VA */
+	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->gpu_alloc->imported.ump_handle = umph;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
+
+	/* Override import flags based on UMP flags */
+	*flags &= ~(BASE_MEM_CACHED_CPU);
+	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+		reg->flags |= KBASE_REG_CPU_CACHED;
+		*flags |= BASE_MEM_CACHED_CPU;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_WR) {
+		reg->flags |= KBASE_REG_CPU_WR;
+		*flags |= BASE_MEM_PROT_CPU_WR;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_RD) {
+		reg->flags |= KBASE_REG_CPU_RD;
+		*flags |= BASE_MEM_PROT_CPU_RD;
+	}
+
+	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+		reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (gpu_flags & UMP_PROT_DEVICE_WR) {
+		reg->flags |= KBASE_REG_GPU_WR;
+		*flags |= BASE_MEM_PROT_GPU_WR;
+	}
+
+	if (gpu_flags & UMP_PROT_DEVICE_RD) {
+		reg->flags |= KBASE_REG_GPU_RD;
+		*flags |= BASE_MEM_PROT_GPU_RD;
+	}
+
+	/* ump phys block query */
+	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+	for (i = 0; i < block_count; i++) {
+		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+			reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT);
+			page++;
+		}
+	}
+	reg->gpu_alloc->nents = *va_pages;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	ump_dd_release(umph);
+bad_id:
+bad_flags:
+	return NULL;
+}
+#endif				/* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	/* no read or write permission given on import, only on run do we give the right permissions */
+
+	reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	}
+
+	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages,
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	down_read(&current->mm->mmap_sem);
+
+	/* A sanity check that get_user_pages will work on the memory */
+	/* (so the initial import fails on weird memory regions rather than */
+	/* the job failing when we try to handle the external resources). */
+	/* It doesn't take a reference to the pages (because the page list is NULL). */
+	/* We can't really store the page list because that would involve */
+	/* keeping the pages pinned - instead we pin/unpin around the job */
+	/* (as part of the external resources handling code) */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+#else
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+#endif
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	reg->gpu_alloc->imported.user_buf.size = size;
+	reg->gpu_alloc->imported.user_buf.address = address;
+	reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages;
+	reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages,
+			sizeof(struct page *), GFP_KERNEL);
+	reg->gpu_alloc->imported.user_buf.mm = current->mm;
+	atomic_inc(&current->mm->mm_count);
+
+	if (!reg->gpu_alloc->imported.user_buf.pages)
+		goto no_page_array;
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_page_array:
+fault_mismatch:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	kbase_update_region_flags(kctx, reg, *flags);
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx)
+{
+	u32 cpu_cache_line_size = cache_line_size();
+	u32 gpu_cache_line_size =
+		(1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+
+	return ((cpu_cache_line_size > gpu_cache_line_size) ?
+				cpu_cache_line_size :
+				gpu_cache_line_size);
+}
+
+static int kbase_check_buffer_size(struct kbase_context *kctx, u64 size)
+{
+	u32 cache_line_align = kbase_get_cache_line_alignment(kctx);
+
+	return (size & (cache_line_align - 1)) == 0 ? 0 : -EINVAL;
+}
+
+static int kbase_check_buffer_cache_alignment(struct kbase_context *kctx,
+					void __user *ptr)
+{
+	u32 cache_line_align = kbase_get_cache_line_alignment(kctx);
+
+	return ((uintptr_t)ptr & (cache_line_align - 1)) == 0 ? 0 : -EINVAL;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_UMP
+	case BASE_MEM_IMPORT_TYPE_UMP: {
+		ump_secure_id id;
+
+		if (get_user(id, (ump_secure_id __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_ump(kctx, id, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				uptr = compat_ptr(user_buffer.ptr.compat_value);
+			else
+#endif
+				uptr = user_buffer.ptr.value;
+
+			if (0 != kbase_check_buffer_cache_alignment(kctx,
+									uptr)) {
+				dev_warn(kctx->kbdev->dev,
+				"User buffer is not cache line aligned!\n");
+				goto no_reg;
+			}
+
+			if (0 != kbase_check_buffer_size(kctx,
+					user_buffer.length)) {
+				dev_warn(kctx->kbdev->dev,
+				"User buffer size is not multiple of cache line size!\n");
+				goto no_reg;
+			}
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	phys_addr_t *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags);
+
+	return ret;
+}
+
+static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 gpu_va_start = reg->start_pfn;
+
+	lockdep_assert_held(&kctx->process_mm->mmap_sem);
+
+	if (new_pages == old_pages)
+		/* Nothing to do */
+		return;
+
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping,
+			(gpu_va_start + new_pages)<<PAGE_SHIFT,
+			(old_pages - new_pages)<<PAGE_SHIFT, 1);
+}
+
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx,
+			reg->start_pfn + new_pages, delta);
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(failure_reason);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages > reg->nr_pages) {
+		/* Would overflow the VA region */
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->free_on_close) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+	pgoff_t addr;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	rel_pgoff = vmf->pgoff - map->region->start_pfn;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	i = rel_pgoff;
+	addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT);
+	while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) {
+		int ret = vm_insert_pfn(vma, addr << PAGE_SHIFT,
+		    PFN_DOWN(map->alloc->pages[i]));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+
+		i++; addr++;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	return VM_FAULT_SIGBUS;
+}
+
+const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	phys_addr_t *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		unsigned long addr = vma->vm_start + aligned_offset;
+		u64 start_off = vma->vm_pgoff - reg->start_pfn +
+			(aligned_offset>>PAGE_SHIFT);
+
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			unsigned long pfn = PFN_DOWN(page_array[i + start_off]);
+
+			err = vm_insert_pfn(vma, addr, pfn);
+			if (WARN_ON(err))
+				break;
+
+			addr += PAGE_SIZE;
+		}
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->region = reg;
+	map->free_on_close = free_on_close;
+	map->kctx = reg->kctx;
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kbase_device_trace_buffer_install(kctx, tb, size);
+		if (err) {
+			vfree(tb);
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->cpu_alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+static int kbasep_reg_mmap(struct kbase_context *kctx,
+			   struct vm_area_struct *vma,
+			   struct kbase_va_region **regm,
+			   size_t *nr_pages, size_t *aligned_offset)
+
+{
+	int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+	struct kbase_va_region *reg;
+	int err = 0;
+
+	*aligned_offset = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n");
+
+	/* SAME_VA stuff, fetch the right region */
+	reg = kctx->pending_regions[cookie];
+	if (!reg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) {
+		/* incorrect mmap size */
+		/* leave the cookie for a potential later
+		 * mapping, or to be reclaimed later when the
+		 * context is freed */
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) ||
+	    (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) {
+		/* VM flags inconsistent with region flags */
+		err = -EPERM;
+		dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n",
+							__FILE__, __LINE__);
+		goto out;
+	}
+
+	/* adjust down nr_pages to what we have physically */
+	*nr_pages = kbase_reg_current_backed_size(reg);
+
+	if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
+						reg->nr_pages, 1) != 0) {
+		dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
+		/* Unable to map in GPU space. */
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+	/* no need for the cookie anymore */
+	kctx->pending_regions[cookie] = NULL;
+	kctx->cookies |= (1UL << cookie);
+
+	/*
+	 * Overwrite the offset with the region start_pfn, so we effectively
+	 * map from offset 0 in the region. However subtract the aligned
+	 * offset so that when user space trims the mapping the beginning of
+	 * the trimmed VMA has the correct vm_pgoff;
+	 */
+	vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT);
+out:
+	*regm = reg;
+	dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n");
+
+	return err;
+}
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg = NULL;
+	void *kaddr = NULL;
+	size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+
+	/* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+	vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		err = kbasep_reg_mmap(kctx, vma, &reg, &nr_pages,
+							&aligned_offset);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+					(u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages -
+					(vma->vm_pgoff - reg->start_pfn))) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (KBASE_MEM_TYPE_IMPORTED_UMM ==
+							reg->cpu_alloc->type) {
+				err = dma_buf_mmap(
+					reg->cpu_alloc->imported.umm.dma_buf,
+					vma, vma->vm_pgoff - reg->start_pfn);
+				goto out_unlock;
+			}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+		} else {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+	} /* default */
+	} /* switch */
+
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	phys_addr_t *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+	bool sync_needed;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+	/* Note: enforcing a RO prot_request onto prot is not done, since:
+	 * - CPU-arch-specific integration required
+	 * - kbase_vmap() requires no access checks to be made/enforced */
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
+	sync_needed = map->is_cached;
+
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+
+	if (sync_needed) {
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_CPU);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_CPU);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_CPU);
+		}
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	/* 0 is specified for prot_request to indicate no access checks should
+	 * be made.
+	 *
+	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
+	 * CPU-RO mapping is not enforced to allow this to work */
+	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	bool sync_needed = map->is_cached;
+	vunmap(addr);
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+	if (sync_needed) {
+		off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
+		size_t size = map->size;
+		size_t page_count = PFN_UP(offset + size);
+		size_t i;
+
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_DEVICE);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_DEVICE);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_DEVICE);
+		}
+	}
+	map->gpu_addr = 0;
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->is_cached = false;
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int i;
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	phys_addr_t *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	unsigned long attrs = DMA_ATTR_WRITE_COMBINE;
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	kbase_update_region_flags(kctx, reg, flags);
+
+	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = dma_pa + (i << PAGE_SHIFT);
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+	kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+		       handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100755
index 0000000..2053a47
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,220 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va);
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason);
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	u64 gpu_addr;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	phys_addr_t *cpu_pages;
+	phys_addr_t *gpu_pages;
+	void *addr;
+	size_t size;
+	bool is_cached;
+};
+
+
+/**
+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the
+ * requested access permissions are supported
+ * @kctx:         Context the VA range belongs to
+ * @gpu_addr:     Start address of VA range
+ * @size:         Size of VA range
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map:          Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check
+ * whether the region should allow the intended access, and return an error if
+ * disallowed. This is essential for security of imported memory, particularly
+ * a user buf from SHM mapped into the process as RO. In that case, write
+ * access must be checked if the intention is for kernel to write to the
+ * memory.
+ *
+ * The checks are also there to help catch access errors on memory where
+ * security is not a concern: imported memory that is always RW, and memory
+ * that was allocated and owned by the process attached to @kctx. In this case,
+ * it helps to identify memory that was was mapped with the wrong access type.
+ *
+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
+ * where either the security of memory is solely dependent on those flags, or
+ * when userspace code was expecting only the GPU to access the memory (e.g. HW
+ * workarounds).
+ *
+ */
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vmap - Map a GPU VA range into the kernel safely
+ * @kctx:     Context the VA range belongs to
+ * @gpu_addr: Start address of VA range
+ * @size:     Size of VA range
+ * @map:      Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no
+ * checks to ensure the security of e.g. imported user bufs from RO SHM.
+ */
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vunmap - Unmap a GPU VA range from the kernel
+ * @kctx: Context the VA range belongs to
+ * @map:  Structure describing the mapping from the corresponding kbase_vmap()
+ *        call
+ *
+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained
+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * The reference taken on pages during kbase_vmap() is released.
+ */
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+extern const struct vm_operations_struct kbase_vm_ops;
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100755
index 0000000..441180b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+#endif				/* _KBASE_LOWLEVEL_H */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100755
index 0000000..9a3f9b5
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,594 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+/* This function is only provided for backwards compatibility with kernels
+ * which use the old carveout allocator.
+ *
+ * The forward declaration is to keep sparse happy.
+ */
+int __init kbase_carveout_mem_reserve(
+		phys_addr_t size);
+int __init kbase_carveout_mem_reserve(phys_addr_t size)
+{
+	return 0;
+}
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_for_each_entry(p, page_list, lru) {
+		zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+	}
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			PAGE_SIZE, DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	clear_highpage(p);
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+struct page *kbase_mem_alloc_page(struct kbase_device *kbdev)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct device *dev = kbdev->dev;
+	dma_addr_t dma_addr;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	if (current->flags & PF_KTHREAD) {
+		/* Don't trigger OOM killer from kernel threads, e.g. when
+		 * growing memory on GPU page fault */
+		gfp |= __GFP_NORETRY;
+	}
+
+	p = alloc_page(gfp);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		__free_page(p);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+
+	kbase_set_dma_addr(p, dma_addr);
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+
+	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	kbase_clear_dma_addr(p);
+	__free_page(p);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	for (i = 0; i < nr_to_grow; i++) {
+		p = kbase_mem_alloc_page(pool->kbdev);
+		if (!p)
+			return -ENOMEM;
+		kbase_mem_pool_add(pool, p);
+	}
+
+	return 0;
+}
+
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size > pool->max_size)
+		new_size = pool->max_size;
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		kbase_mem_pool_grow(pool, new_size - cur_size);
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+	pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool));
+	return kbase_mem_pool_size(pool);
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	pool->cur_size = 0;
+	pool->max_size = max_size;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			kbase_mem_pool_zero_page(pool, p);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	do {
+		pool_dbg(pool, "alloc()\n");
+		p = kbase_mem_pool_remove(pool);
+
+		if (p)
+			return p;
+
+		pool = pool->next_pool;
+	} while (pool);
+
+	return NULL;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i;
+	int err = -ENOMEM;
+
+	pool_dbg(pool, "alloc_pages(%zu):\n", nr_pages);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages, kbase_mem_pool_size(pool));
+	for (i = 0; i < nr_from_pool; i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		pages[i] = page_to_phys(p);
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_pages - i, pages + i);
+
+		if (err)
+			goto err_rollback;
+
+		i += nr_pages - i;
+	}
+
+	/* Get any remaining pages from kernel */
+	for (; i < nr_pages; i++) {
+		p = kbase_mem_alloc_page(pool->kbdev);
+		if (!p)
+			goto err_rollback;
+		pages[i] = page_to_phys(p);
+	}
+
+	pool_dbg(pool, "alloc_pages(%zu) done\n", nr_pages);
+
+	return 0;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+		size_t nr_pages, phys_addr_t *pages, bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+
+		if (zero)
+			kbase_mem_pool_zero_page(pool, p);
+		else if (sync)
+			kbase_mem_pool_sync_page(pool, p);
+
+		list_add(&p->lru, &new_page_list);
+		nr_to_pool++;
+		pages[i] = 0;
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+		if (reclaimed)
+			zone_page_state_add(-1, page_zone(p),
+					NR_SLAB_RECLAIMABLE);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = 0;
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100755
index 0000000..585fba0
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <mali_kbase_mem_pool_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_trim(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
+		kbase_mem_pool_debugfs_size_get,
+		kbase_mem_pool_debugfs_size_set,
+		"%llu\n");
+
+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_max_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_set_max_size(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
+		kbase_mem_pool_debugfs_max_size_get,
+		kbase_mem_pool_debugfs_max_size_set,
+		"%llu\n");
+
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100755
index 0000000..1442854
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H
+#define _KBASE_MEM_POOL_DEBUGFS_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool
+ * @parent: Parent debugfs dentry
+ * @pool:   Memory pool to control
+ *
+ * Adds two debugfs files under @parent:
+ * - mem_pool_size: get/set the current size of @pool
+ * - mem_pool_max_size: get/set the max size of @pool
+ */
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100755
index 0000000..092da9a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,119 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+		kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		if (!debugfs_create_file("mem_profile", S_IRUGO,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kbase_ctx_flag_set(kctx,
+					   KCTX_MEM_PROFILE_INITIALIZED);
+		}
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+		err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100755
index 0000000..a1dc2e0
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100755
index 0000000..82f0702
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100755
index 0000000..40b1ab5
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,2064 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_time.h>
+#include <mali_kbase_mem.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ *
+ * If sync is not set then transactions still in flight when the flush is issued
+ * may use the old page tables and the data they write will not be written out
+ * to memory, this function returns after the flush has been issued but
+ * before all accesses which might effect the flushed region have completed.
+ *
+ * If sync is set then accesses in the flushed region will be drained
+ * before data is flush and invalidated through L1, L2 and into memory,
+ * after which point this function will return.
+ */
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync);
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+static size_t make_multiple(size_t minimum, size_t multiple)
+{
+	size_t remainder = minimum % multiple;
+
+	if (remainder == 0)
+		return minimum;
+
+	return minimum + multiple - remainder;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	if (unlikely(faulting_as->protected_mode))
+	{
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Protected mode fault");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+
+		goto fault_done;
+	}
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Translation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		goto fault_done;
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = make_multiple(fault_rel_pfn -
+			kbase_reg_current_backed_size(region) + 1,
+			region->extent);
+
+	/* cap to max vsize */
+	if (new_pages + kbase_reg_current_backed_size(region) >
+			region->nr_pages)
+		new_pages = region->nr_pages -
+				kbase_reg_current_backed_size(region);
+
+	if (0 == new_pages) {
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
+		if (region->gpu_alloc != region->cpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					region->cpu_alloc, new_pages) == 0) {
+				grown = true;
+			} else {
+				kbase_free_phy_pages_helper(region->gpu_alloc,
+						new_pages);
+			}
+		} else {
+			grown = true;
+		}
+	}
+
+
+	if (grown) {
+		u64 pfn_offset;
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
+		/*
+		 * Note:
+		 * Issuing an MMU operation will unlock the MMU and cause the
+		 * translation to be replayed. If the page insertion fails then
+		 * rather then trying to continue the context should be killed
+		 * so the no_flush version of insert_pages is used which allows
+		 * us to unlock the MMU as we see fit.
+		 */
+		err = kbase_mmu_insert_pages_no_flush(kctx,
+				region->start_pfn + pfn_offset,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				new_pages, region->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+		KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* failed to extend, handle as a normal PF */
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Page allocation failure");
+	}
+
+fault_done:
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	p = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!p)
+		goto sub_pages;
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			(u32)kctx->id,
+			(u64)new_page_count);
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
+ * new table from the pool if needed and possible
+ */
+static int mmu_get_next_pgd(struct kbase_context *kctx,
+		phys_addr_t *pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(*pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(*pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return -EINVAL;
+	}
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return -ENOMEM;
+		}
+
+		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	*pgd = target_pgd;
+
+	return 0;
+}
+
+static int mmu_get_bottom_pgd(struct kbase_context *kctx,
+		u64 vpfn, phys_addr_t *out_pgd)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l);
+		/* Handle failure condition */
+		if (err) {
+			dev_dbg(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
+			return err;
+		}
+	}
+
+	*out_pgd = pgd;
+
+	return 0;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+	/* As we are recovering from what has already been set up, we should have a target_pgd */
+	KBASE_DEBUG_ASSERT(0 != target_pgd);
+	kunmap_atomic(page);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Should never fail */
+		KBASE_DEBUG_ASSERT(0 != pgd);
+	}
+
+	return pgd;
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
+					      size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
+		KBASE_DEBUG_ASSERT(0 != pgd);
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+
+		pgd_page = kmap_atomic(p);
+		KBASE_DEBUG_ASSERT(NULL != pgd_page);
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+		kunmap_atomic(pgd_page);
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys, flags);
+		}
+
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_pages only partially completes we need to be able
+	 * to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys[i], flags);
+		}
+
+		phys += count;
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table. If further pages
+		 * need inserting and fail we need to undo what has already
+		 * taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	int err;
+
+	err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
+ */
+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	u32 op;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+				&kbdev->as[kctx->as_nr],
+				kctx, vpfn, nr, op, 0);
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to
+		 * recover */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * As this function could be called in interrupt context the sync
+	 * request can't block. Instead log the request and the next flush
+	 * request will pick it up.
+	 */
+	if ((!err) && sync &&
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
+		atomic_set(&kctx->drain_pending, 1);
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+#ifndef CONFIG_MALI_NO_MALI
+	bool drain_pending = false;
+
+	if (atomic_xchg(&kctx->drain_pending, 0))
+		drain_pending = true;
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	kbdev = kctx->kbdev;
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		if (!kbase_pm_context_active_handle_suspend(kbdev,
+			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+			int err;
+			u32 op;
+
+			/* AS transaction begin */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+
+			if (sync)
+				op = AS_COMMAND_FLUSH_MEM;
+			else
+				op = AS_COMMAND_FLUSH_PT;
+
+			err = kbase_mmu_hw_do_operation(kbdev,
+						&kbdev->as[kctx->as_nr],
+						kctx, vpfn, nr, op, 0);
+
+#if KBASE_GPU_RESET_EN
+			if (err) {
+				/* Flush failed to complete, assume the
+				 * GPU has hung and perform a reset to
+				 * recover */
+				dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+				if (kbase_prepare_to_reset_gpu(kbdev))
+					kbase_reset_gpu(kbdev);
+			}
+#endif /* KBASE_GPU_RESET_EN */
+
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+			/* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+			/*
+			 * The transaction lock must be dropped before here
+			 * as kbase_wait_write_flush could take it if
+			 * the GPU was powered down (static analysis doesn't
+			 * know this can't happen).
+			 */
+			drain_pending |= (!err) && sync &&
+					kbase_hw_has_issue(kctx->kbdev,
+							BASE_HW_ISSUE_6367);
+			if (drain_pending) {
+				/* Wait for GPU to flush write buffer */
+				kbase_wait_write_flush(kctx);
+			}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+			kbase_pm_context_idle(kbdev);
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
+	 */
+	kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_device *kbdev;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	kbdev = kctx->kbdev;
+	mmu_mode = kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+		if (err) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
+			vpfn, phys, nr);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+					flags);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+/* This is a debug feature only */
+static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
+{
+	u64 *page;
+	int i;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		if (kctx->kbdev->mmu_mode->ate_is_valid(page[i]))
+			beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
+	}
+	kunmap_atomic(page);
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) {
+				mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
+			} else {
+				/*
+				 * So target_pte is a level-3 page.
+				 * As a leaf, it is safe to free it.
+				 * Unless we have live pages attached to it!
+				 */
+				mmu_check_unused(kctx, target_pgd);
+			}
+
+			beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
+			if (zap) {
+				struct page *p = phys_to_page(target_pgd);
+
+				kbase_mem_pool_free(&kctx->mem_pool, p, true);
+				kbase_process_page_usage_dec(kctx, 1);
+				kbase_atomic_sub_pages(1, &kctx->used_pages);
+				kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+			}
+		}
+	}
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	mutex_init(&kctx->mmu_lock);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	mutex_lock(&kctx->mmu_lock);
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+	mutex_unlock(&kctx->mmu_lock);
+
+	beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
+	kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
+	kbase_process_page_usage_dec(kctx, 1);
+	new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i])) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	mutex_lock(&kctx->mmu_lock);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t size;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+		}
+
+
+
+		size = kbasep_mmu_dump_level(kctx,
+				kctx->pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!size)
+			goto fail_free;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+		/* Add on the size for the config */
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4))
+			size += sizeof(config);
+
+
+		if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	if (unlikely(faulting_as->protected_mode))
+	{
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+		atomic_dec(&kbdev->faults_pending);
+		return;
+
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		unsigned long flags;
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Set the MMU into unmapped mode */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_disable(kctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "ACCESS_FLAG";
+		break;
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		e = "ADDRESS_SIZE_FAULT";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		e = "MEMORY_ATTRIBUTES_FAULT";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		return "ATOMIC";
+#else
+		return "UNKNOWN";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status: 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_mmu_disable(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold hwaccess_lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the hwaccess_lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+#if KBASE_GPU_RESET_EN
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+#endif /* KBASE_GPU_RESET_EN */
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		dev_warn(kbdev->dev,
+				"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+				as->number, as->fault_addr,
+				as->fault_extra_addr);
+#else
+		dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+				as->number, as->fault_addr);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
+		WARN_ON(work_pending(&as->work_busfault));
+		queue_work(as->pf_wq, &as->work_busfault);
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
+		WARN_ON(work_pending(&as->work_pagefault));
+		queue_work(as->pf_wq, &as->work_pagefault);
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100755
index 0000000..986e959
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
new file mode 100755
index 0000000..2449c60
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MALI_KBASE_MMU_MODE_
+#define _MALI_KBASE_MMU_MODE_
+
+#include <linux/types.h>
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_context *kctx);
+	void (*get_as_setup)(struct kbase_context *kctx,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate);
+	int (*pte_is_valid)(u64 pte);
+	void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+#endif /* _MALI_KBASE_MMU_MODE_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
new file mode 100644
index 0000000..791f3ed
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -0,0 +1,200 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+/* For valid ATEs bit 1 = (level == 3) ? 1 : 0.
+ * The MMU is only ever configured by the driver so that ATEs
+ * are at level 3, so bit 1 should always be set
+ */
+#define ENTRY_IS_ATE        3ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_ACCESS_RW (1ULL << 6)     /* bits 6:7 */
+#define ENTRY_ACCESS_RO (3ULL << 6)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register.
+	 */
+	setup->memattr =
+		(AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
+			(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
+			(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_WRITE_ALLOC           <<
+			(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF   <<
+			(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_WA         <<
+			(AS_MEMATTR_INDEX_OUTER_WA * 8));
+
+	setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK;
+	setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = 0ULL;
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* Set access flags - note that AArch64 stage 1 does not support
+	 * write-only access, so we use read/write instead
+	 */
+	if (flags & KBASE_REG_GPU_WR)
+		mmu_flags |= ENTRY_ACCESS_RW;
+	else if (flags & KBASE_REG_GPU_RD)
+		mmu_flags |= ENTRY_ACCESS_RO;
+
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+			get_mmu_flags(flags) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const aarch64_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
+{
+	return &aarch64_mode;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100755
index 0000000..683cabb
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated
+	 * memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#else
+	setup->transcfg = 0;
+#endif
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#endif
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+		get_mmu_flags(flags) |
+		ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100755
index 0000000..1a44957
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_fake_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_fake_register);
+
+void kbase_platform_fake_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_fake_unregister);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100755
index 0000000..97d5434
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,205 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_vinstr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1)
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+
+	/* Resume vinstr operation */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100755
index 0000000..37fa247
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100755
index 0000000..7fb674e
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
new file mode 100644
index 0000000..c970650
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+
+#include "mali_kbase_regs_history_debugfs.h"
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+#include <linux/debugfs.h>
+
+
+static int regs_history_size_get(void *data, u64 *val)
+{
+	struct kbase_io_history *const h = data;
+
+	*val = h->size;
+
+	return 0;
+}
+
+static int regs_history_size_set(void *data, u64 val)
+{
+	struct kbase_io_history *const h = data;
+
+	return kbase_io_history_resize(h, (u16)val);
+}
+
+
+DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
+		regs_history_size_get,
+		regs_history_size_set,
+		"%llu\n");
+
+
+/**
+ * regs_history_show - show callback for the register access history file.
+ *
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to dump all recent accesses to the GPU registers.
+ *
+ * @return 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int regs_history_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_io_history *const h = sfile->private;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!h->enabled) {
+		seq_puts(sfile, "The register access history is disabled\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	iters = (h->size > h->count) ? h->count : h->size;
+	seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+out:
+	return 0;
+}
+
+
+/**
+ * regs_history_open - open operation for regs_history debugfs file
+ *
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * @return file descriptor
+ */
+static int regs_history_open(struct inode *in, struct file *file)
+{
+	return single_open(file, &regs_history_show, in->i_private);
+}
+
+
+static const struct file_operations regs_history_fops = {
+	.open = &regs_history_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history.enabled);
+	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history, &regs_history_size_fops);
+	debugfs_create_file("regs_history", S_IRUGO,
+			kbdev->mali_debugfs_directory, &kbdev->io_history,
+			&regs_history_fops);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
new file mode 100644
index 0000000..f108370
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Header file for register access history support via debugfs
+ *
+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
+ *
+ * Usage:
+ * - regs_history_enabled: whether recording of register accesses is enabled.
+ *   Write 'y' to enable, 'n' to disable.
+ * - regs_history_size: size of the register history buffer, must be > 0
+ * - regs_history: return the information about last accesses to the registers.
+ */
+
+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H
+#define _KBASE_REGS_HISTORY_DEBUGFS_H
+
+struct kbase_device;
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/**
+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history
+ *
+ * @kbdev: Pointer to kbase_device containing the register history
+ */
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_regs_history_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif  /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100755
index 0000000..84aa331
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1166 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct fragment_job {
+	struct job_descriptor_header header;
+
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_descriptor_header *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next_job._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next_job._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+	job = &frag_job->header;
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->exception_status = JOB_NOT_STARTED;
+
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
+
+	if (job->job_type == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->job_barrier = 1;
+
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
+
+	job->job_index += hw_job_id_offset;
+
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
+	} else {
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
+		else
+			fbd_address = (u64)frag_job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
+
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
+		else
+			jc = job->next_job._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list.value = NULL;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload = NULL;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+	if (KBASE_API_VERSION(10, 3) > replay_atom->kctx->api_version) {
+		base_jd_replay_payload_uk10_2 *payload_uk10_2;
+		u16 tiler_core_req;
+		u16 fragment_core_req;
+
+		payload_uk10_2 = (base_jd_replay_payload_uk10_2 *) payload;
+		memcpy(&tiler_core_req, &payload_uk10_2->tiler_core_req,
+				sizeof(tiler_core_req));
+		memcpy(&fragment_core_req, &payload_uk10_2->fragment_core_req,
+				sizeof(fragment_core_req));
+		payload->tiler_core_req = (u32)(tiler_core_req & 0x7fff);
+		payload->fragment_core_req = (u32)(fragment_core_req & 0x7fff);
+	}
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+
+		int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP;
+		int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC;
+		int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+		int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+
+		if (t_atom_type != BASE_JD_REQ_T) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x",
+			    t_atom_type, BASE_JD_REQ_T);
+		}
+		if (f_atom_type != BASE_JD_REQ_FS) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n",
+			    f_atom_type, BASE_JD_REQ_FS);
+		}
+		if (t_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n");
+		}
+		if (f_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n");
+		}
+
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom, NULL);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_descriptor_header *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+		dump_job_head(kctx, "\njob_head structure:\n", job);
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
+		else
+			job_header = job->next_job._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	/* Don't replay this atom if these issues are not present in the
+	 * hardware */
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) &&
+			!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) {
+		dev_dbg(kbdev->dev, "Hardware does not need replay workaround");
+
+		/* Signal failure to userspace */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+
+		return false;
+	}
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100755
index 0000000..43175c8
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100755
index 0000000..9bff3d2
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100755
index 0000000..a025434
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1517 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#include <linux/syscalls.h>
+#include "mali_kbase_sync.h"
+#endif
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+
+/* Mask to check cache alignment of data structures */
+#define KBASE_CACHE_ALIGNMENT_MASK		((1<<L1_CACHE_SHIFT)-1)
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->queue, &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_del(&katom->queue);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed.
+	 */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule timeout of this atom after a period if it is not active */
+	if (!timer_pending(&kctx->soft_job_timeout)) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_job_timeout_ms);
+		mod_timer(&kctx->soft_job_timeout,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}
+}
+
+static int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*status = *mapped_evt;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	if ((new_status != BASE_JD_SOFT_EVENT_SET) &&
+	    (new_status != BASE_JD_SOFT_EVENT_RESET))
+		return -EINVAL;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*mapped_evt = new_status;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_vmap_struct map;
+	void *user_result;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] was previously in use
+		 * for 'waiting_soft_jobs'.
+		 */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* GPU_WR access is checked on the range for returning the result to
+	 * userspace for the following reasons:
+	 * - security, this is currently how imported user bufs are checked.
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
+	if (!user_result)
+		return 0;
+
+	memcpy(user_result, &data, sizeof(data));
+
+	kbase_vunmap(kctx, &map);
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#ifdef CONFIG_SYNC
+
+static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly) come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static void kbase_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter, struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+
+	kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+	if (kbase_fence_get_status(fence) < 0)
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding kctx->jctx.lock and
+	 * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work.
+	 */
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static int kbase_fence_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	KBASE_DEBUG_ASSERT(NULL != katom->kctx);
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signalled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+		INIT_WORK(&katom->work, kbase_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+	/* The timeout code will add this job to the list of waiting soft jobs.
+	 */
+	kbasep_add_waiting_with_timeout(katom);
+#else
+	kbasep_add_waiting_soft_job(katom);
+#endif
+
+	return 1;
+}
+
+static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+#endif /* CONFIG_SYNC */
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, queue);
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			if (katom->jc == evt) {
+				list_del(&katom->queue);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet.
+				 */
+				cancel_timer = 0;
+			}
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			/* Keep the timer running if fence debug is enabled and
+			 * there are waiting fence jobs.
+			 */
+			cancel_timer = 0;
+			break;
+#endif
+		}
+	}
+
+	if (cancel_timer)
+		del_timer(&kctx->soft_job_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+static char *kbase_fence_debug_status_string(int status)
+{
+	if (status == 0)
+		return "signaled";
+	else if (status > 0)
+		return "active";
+	else
+		return "error";
+}
+
+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep;
+
+		list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) {
+			if (dep->status == KBASE_JD_ATOM_STATE_UNUSED ||
+			    dep->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				continue;
+
+			if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					== BASE_JD_REQ_SOFT_FENCE_TRIGGER) {
+				struct sync_fence *fence = dep->fence;
+				int status = kbase_fence_get_status(fence);
+
+				/* Found blocked trigger fence. */
+				dev_warn(dev,
+					 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+					 kbase_jd_atom_id(kctx, dep),
+					 fence, fence->name,
+					 kbase_fence_debug_status_string(status));
+			}
+
+			kbase_fence_debug_check_atom(dep);
+		}
+	}
+}
+
+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = katom->kctx->kbdev->dev;
+	struct sync_fence *fence = katom->fence;
+	int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms);
+	int status = kbase_fence_get_status(fence);
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+		 kctx->tgid, kctx->id,
+		 kbase_jd_atom_id(kctx, katom),
+		 fence, timeout_ms);
+	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+		 fence, fence->name,
+		 kbase_fence_debug_status_string(status));
+
+	/* Search for blocked trigger atoms */
+	kbase_fence_debug_check_atom(katom);
+
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	/* Dump out the full state of all the Android sync fences.
+	 * The function sync_dump() isn't exported to modules, so force
+	 * sync_fence_wait() to time out to trigger sync_dump().
+	 */
+	sync_fence_wait(fence, 1);
+}
+
+struct kbase_fence_debug_work {
+	struct kbase_jd_atom *katom;
+	struct work_struct work;
+};
+
+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work)
+{
+	struct kbase_fence_debug_work *w = container_of(work,
+			struct kbase_fence_debug_work, work);
+	struct kbase_jd_atom *katom = w->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_fence_debug_wait_timeout(katom);
+	mutex_unlock(&kctx->jctx.lock);
+
+	kfree(w);
+}
+
+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_debug_work *work;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Enqueue fence debug worker. Use job_done_wq to get
+	 * debug print ordered with job completion.
+	 */
+	work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC);
+	/* Ignore allocation failure. */
+	if (work) {
+		work->katom = katom;
+		INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker);
+		queue_work(kctx->jctx.job_done_wq, &work->work);
+	}
+}
+#endif /* CONFIG_MALI_FENCE_DEBUG */
+
+void kbasep_soft_job_timeout_worker(unsigned long data)
+{
+	struct kbase_context *kctx = (struct kbase_context *)data;
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_job_timeout_ms);
+	struct timer_list *timer = &kctx->soft_job_timeout;
+	ktime_t cur_time = ktime_get();
+	bool restarting = false;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(entry,
+				struct kbase_jd_atom, queue);
+		s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time,
+					katom->start_timestamp));
+
+		if (elapsed_time < (s64)timeout_ms) {
+			restarting = true;
+			continue;
+		}
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			/* Take it out of the list to ensure that it
+			 * will be cancelled in all cases
+			 */
+			list_del(&katom->queue);
+
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			INIT_WORK(&katom->work, kbasep_soft_event_complete_job);
+			queue_work(kctx->jctx.job_done_wq, &katom->work);
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			kbase_fence_debug_timeout(katom);
+			break;
+#endif
+		}
+	}
+
+	if (restarting)
+		mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	kbasep_add_waiting_with_timeout(katom);
+
+	return 1;
+}
+
+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+/**
+ * kbase_soft_event_update() - Update soft event state
+ * @kctx: Pointer to context
+ * @event: Event to update
+ * @new_status: New status value of event
+ *
+ * Update the event, and wake up any atoms waiting for the event.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+int kbase_soft_event_update(struct kbase_context *kctx,
+			     u64 event,
+			     unsigned char new_status)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->jctx.lock);
+
+	if (kbasep_write_soft_event_status(kctx, event, new_status)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, event);
+
+out:
+	mutex_unlock(&kctx->jctx.lock);
+
+	return err;
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc;
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+		if (gpu_alloc) {
+			switch (gpu_alloc->type) {
+			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+			{
+				free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->jc = 0;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		katom->jc = 0;
+		goto out_cleanup;
+	}
+	katom->jc = (u64)(uintptr_t)buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret)
+		goto out_cleanup;
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, user_extres.ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		if (NULL == reg || NULL == reg->gpu_alloc ||
+				(reg->flags & KBASE_REG_FREE)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		buffers[i].nr_extres_pages = reg->nr_pages;
+
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages)
+				goto out_unlock;
+			ret = 0;
+			break;
+		}
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+		{
+			dev_warn(katom->kctx->kbdev->dev,
+					"UMP is not supported for debug_copy jobs\n");
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	kfree(buffers);
+	kfree(user_buffers);
+
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	return ret;
+}
+
+static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	if (!gpu_alloc) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+		KBASE_DEBUG_ASSERT(dma_buf->size ==
+				   buf_data->nr_extres_pages * PAGE_SIZE);
+
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, buf_data->nr_extres_pages*PAGE_SIZE,
+#endif
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, buf_data->nr_extres_pages*PAGE_SIZE,
+#endif
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	int ret;
+
+	/* Fail the job if there is no info structure */
+	if (!data) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* If the ID is zero then fail the job */
+	if (info->id == 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & 0x7) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Replace the user pointer with our kernel allocated info structure */
+	katom->jc = (u64)(uintptr_t) info;
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(info);
+fail:
+	katom->jc = 0;
+	return ret;
+}
+
+static void kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+
+	/* The JIT ID is still in use so fail the allocation */
+	if (kctx->jit_alloc[info->id]) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Mark the allocation so we know it's in use even if the
+	 * allocation itself fails.
+	 */
+	kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1;
+
+	/* Create a JIT allocation */
+	reg = kbase_jit_allocate(kctx, info);
+	if (!reg) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Write the address of the JIT allocation to the user provided
+	 * GPU allocation.
+	 */
+	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+			&mapping);
+	if (!ptr) {
+		/*
+		 * Leave the allocation "live" as the JIT free jit will be
+		 * submitted anyway.
+		 */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	*ptr = reg->start_pfn << PAGE_SHIFT;
+	kbase_vunmap(kctx, &mapping);
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	/*
+	 * Bind it to the user provided ID. Do this last so we can check for
+	 * the JIT free racing this JIT alloc job.
+	 */
+	kctx->jit_alloc[info->id] = reg;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 id = (u8) katom->jc;
+
+	/*
+	 * If the ID is zero or it is not in use yet then fail the job.
+	 */
+	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	/*
+	 * If the ID is valid but the allocation request failed still succeed
+	 * this soft job but don't try and free the allocation.
+	 */
+	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+		kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+	kctx->jit_alloc[id] = NULL;
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	/*
+	 * Replace the user pointer with our kernel allocated
+	 * ext_res structure.
+	 */
+	katom->jc = (u64)(uintptr_t) ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (--i > 0) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		KBASE_DEBUG_ASSERT(katom->fence != NULL);
+		katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		/* Release the reference as we don't need it any more */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		return kbase_fence_wait(katom);
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		return kbasep_soft_event_wait(katom);
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return -EINVAL; /* Temporarily disabled */
+		kbase_jit_allocate_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		return -EINVAL; /* Temporarily disabled */
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_fence_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK))
+				return -EINVAL;
+		}
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_stream_create_fence(fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			katom->fence = sync_fence_fdget(fd);
+
+			if (katom->fence == NULL) {
+				/* The only way the fence can be NULL is if userspace closed it for us.
+				 * So we don't need to clear it up */
+				return -EINVAL;
+			}
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				katom->fence = NULL;
+				sys_close(fd);
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			katom->fence = sync_fence_fdget(fence.basep.fd);
+			if (katom->fence == NULL)
+				return -EINVAL;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			/*
+			 * Set KCTX_NO_IMPLICIT_FENCE in the context the first
+			 * time a soft fence wait job is observed. This will
+			 * prevent the implicit dma-buf fence to conflict with
+			 * the Android native sync fences.
+			 */
+			if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC))
+				kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC);
+#endif /* CONFIG_MALI_DMA_FENCE */
+		}
+		break;
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_REPLAY:
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signalled, do it now */
+		if (katom->fence) {
+			kbase_fence_trigger(katom, katom->event_code ==
+					BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+			sync_fence_put(katom->fence);
+			katom->fence = NULL;
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release the reference to the fence object */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+#endif				/* CONFIG_SYNC */
+
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		kbasep_remove_waiting_soft_job(katom_iter);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		} else {
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
new file mode 100644
index 0000000..c98762c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
@@ -0,0 +1,23 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include "mali_kbase_strings.h"
+
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
+const char kbase_drv_name[] = KBASE_DRV_NAME;
+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
new file mode 100644
index 0000000..41b8fdb
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
@@ -0,0 +1,19 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+extern const char kbase_drv_name[];
+extern const char kbase_timeline_name[];
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
new file mode 100755
index 0000000..c5fb981
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_SYNC
+
+#include <linux/seq_file.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signalled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signalled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signalled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signalled, 0);
+
+	return tl;
+}
+
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int signalled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signalled = atomic_read(&mtl->signalled);
+
+		diff = signalled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signalling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled);
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100755
index 0000000..820bddc
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,100 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include "sync.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatiblility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+static inline int kbase_fence_get_status(struct sync_fence *fence)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	return fence->status;
+#else
+	return atomic_read(&fence->status);
+#endif
+}
+
+/*
+ * Create a stream object.
+ * Built on top of timeline object.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ */
+int kbase_stream_create(const char *name, int *const out_fd);
+
+/*
+ * Create a fence in a stream object
+ */
+int kbase_stream_create_fence(int tl_fd);
+
+/*
+ * Validate a fd to be a valid fence
+ * No reference is taken.
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ */
+int kbase_fence_validate(int fd);
+
+/* Returns true if the specified timeline is allocated by Mali */
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline);
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name);
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ */
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent);
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ *
+ * If they are signalled in the wrong order then a message will be printed in debug
+ * builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as success.
+ */
+void kbase_sync_signal_pt(struct sync_pt *pt, int result);
+
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
new file mode 100755
index 0000000..b9baa91
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync_user.c
+ *
+ */
+
+#ifdef CONFIG_SYNC
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <mali_kbase_sync.h>
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	BUG_ON(!tl);
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	BUG_ON(!out_fd);
+
+	tl = kbase_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int kbase_stream_create_fence(int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+ out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100755
index 0000000..4c1535f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,2342 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_NDEP_ATOM_ATOM,
+	KBASE_TL_RDEP_ATOM_ATOM,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY,
+	KBASE_TL_ATTRIB_ATOM_STATE,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+	KBASE_TL_EVENT_LPU_SOFTSTOP,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+
+	/* Job dump specific events. */
+	KBASE_JD_GPU_SOFT_RESET
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC,
+	KBASE_AUX_DEVFREQ_TARGET
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: counter tracking stream's autoflush state
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pII",
+		"ctx,ctx_nr,tgid"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_NEW_AS,
+		__stringify(KBASE_TL_NEW_AS),
+		"address space object is created",
+		"@pI",
+		"address_space,as_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"ctx"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_LIFELINK_AS_GPU,
+		__stringify(KBASE_TL_LIFELINK_AS_GPU),
+		"address space is deleted with gpu",
+		"@pp",
+		"address_space,gpu"
+	},
+	{
+		KBASE_TL_RET_CTX_LPU,
+		__stringify(KBASE_TL_RET_CTX_LPU),
+		"context is retained by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pps",
+		"atom,lpu,attrib_match_list"
+	},
+	{
+		KBASE_TL_NRET_CTX_LPU,
+		__stringify(KBASE_TL_NRET_CTX_LPU),
+		"context is released by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_RET_AS_CTX,
+		__stringify(KBASE_TL_RET_AS_CTX),
+		"address space is retained by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_NRET_AS_CTX,
+		__stringify(KBASE_TL_NRET_AS_CTX),
+		"address space is released by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_AS,
+		__stringify(KBASE_TL_RET_ATOM_AS),
+		"atom is retained by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_NRET_ATOM_AS,
+		__stringify(KBASE_TL_NRET_ATOM_AS),
+		"atom is released by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_DEP_ATOM_ATOM,
+		__stringify(KBASE_TL_DEP_ATOM_ATOM),
+		"atom2 depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_NDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
+		"atom2 no longer depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_RDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
+		"resolved dependecy of atom2 depending on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
+		"atom job slot attributes",
+		"@pLLI",
+		"atom,descriptor,affinity,config"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY),
+		"atom priority",
+		"@pI",
+		"atom,prio"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_STATE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_STATE),
+		"atom state",
+		"@pI",
+		"atom,state"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE),
+		"atom caused priority change",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_ATTRIB_AS_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
+		"address space attributes",
+		"@pLLL",
+		"address_space,transtab,memattr,transcfg"
+	},
+	{
+		KBASE_TL_EVENT_LPU_SOFTSTOP,
+		__stringify(KBASE_TL_EVENT_LPU_SOFTSTOP),
+		"softstop event on given lpu",
+		"@p",
+		"lpu"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX),
+		"atom softstopped",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+		__stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE),
+		"atom softstop issued",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_JD_GPU_SOFT_RESET,
+		__stringify(KBASE_JD_GPU_SOFT_RESET),
+		"gpu soft reset",
+		"@p",
+		"gpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@IL",
+		"ctx_nr,page_cnt_change"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@IL",
+		"ctx_nr,page_cnt"
+	},
+	{
+		KBASE_AUX_DEVFREQ_TARGET,
+		__stringify(KBASE_AUX_DEVFREQ_TARGET),
+		"New device frequency target",
+		"@L",
+		"target_freq"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags) __acquires(&stream->lock)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags) __releases(&stream->lock)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @data:  unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(data);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (wb_size > min_size) {
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(f_pos);
+
+	if (!buffer)
+		return -EINVAL;
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw = 0;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the rbi still points to the packet we just processed
+		 * then there was no overflow so we add the copied size to
+		 * copy_len and move rbi on to the next packet
+		 */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = SWTRACE_VERSION; /* protocol version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 0);
+	setup_timer(&autoflush_timer,
+			kbasep_tlstream_autoflush_timer_callback,
+			0);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags)
+{
+	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
+
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) {
+		int rcode;
+
+		*fd = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd) {
+			atomic_set(&kbase_tlstream_enabled, 0);
+			return *fd;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+	} else {
+		*fd = -EBUSY;
+	}
+
+	return 0;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+			sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+			strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t  msg_size =
+			sizeof(msg_id) + sizeof(u64) +
+			sizeof(atom) + sizeof(lpu) + msg_s0;
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_string(
+			buffer, pos, attrib_match_list, msg_s0);
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+		sizeof(jd) + sizeof(affinity) + sizeof(config);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &jd, sizeof(jd));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &affinity, sizeof(affinity));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &config, sizeof(config));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &prio, sizeof(prio));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
+		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transtab, sizeof(transtab));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &memattr, sizeof(memattr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transcfg, sizeof(transcfg));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu)
+{
+	const u32     msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+{
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq)
+{
+	const u32       msg_id = KBASE_AUX_DEVFREQ_TARGET;
+	const size_t    msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(target_freq);
+	unsigned long   flags;
+	char            *buffer;
+	size_t          pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &target_freq, sizeof(target_freq));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100755
index 0000000..34def27
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,558 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx:  kernel common context
+ * @fd:    timeline stream file descriptor
+ * @flags: timeline stream flags
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) argument fd will contain negative value.
+ *
+ * Return: zero on success (this does not necessarily mean that stream
+ *         descriptor could be returned), negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+#define TL_ATOM_STATE_IDLE 0
+#define TL_ATOM_STATE_READY 1
+#define TL_ATOM_STATE_DONE 2
+#define TL_ATOM_STATE_POSTED 3
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio);
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state);
+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq);
+
+#define TLSTREAM_ENABLED (1 << 31)
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & TLSTREAM_ENABLED)                     \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...)                     \
+	do {                                                            \
+		int enabled = atomic_read(&kbase_tlstream_enabled);     \
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \
+			__kbase_tlstream_##trace_name(__VA_ARGS__);     \
+	} while (0)
+
+/*****************************************************************************/
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary
+ * @as: name of the address space object
+ * @nr: sequential number assigned to this address space
+ *
+ * Function emits a timeline message informing about address space creation.
+ * Address space is created with one attribute: number identifying this
+ * address space.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU
+ * @as:  name of the address space object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that address space object
+ * shall be deleted along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_CTX(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
+ */
+#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being held by the context object.
+ */
+#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being released by atom.
+ */
+#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by address space and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by address space.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_DEP_ATOM_ATOM - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM - dependency between atoms resolved
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM - information about already resolved dependency between atoms
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes
+ * @atom:     name of the atom object
+ * @jd:       job descriptor address
+ * @affinity: job affinity
+ * @config:   job config
+ *
+ * Function emits a timeline message containing atom attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority
+ * @atom: name of the atom object
+ * @prio: atom priority
+ *
+ * Function emits a timeline message containing atom priority.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state
+ * @atom:  name of the atom object
+ * @state: atom state
+ *
+ * Function emits a timeline message containing atom state.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE - atom caused priority change
+ * @atom:  name of the atom object
+ *
+ * Function emits a timeline message signalling priority change
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(atom) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority_change, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes
+ * @as:       assigned address space
+ * @transtab: configuration of the TRANSTAB register
+ * @memattr:  configuration of the MEMATTR register
+ * @transcfg: configuration of the TRANSCFG register (or zero if not present)
+ *
+ * Function emits a timeline message containing address space attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ex
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_LPU_softstop
+ * @lpu:        name of the LPU object
+ */
+#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \
+	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_issue
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
+
+/**
+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset
+ * @gpu:        name of the GPU object
+ *
+ * This imperative tracepoint is specific to job dumping.
+ * Function emits a timeline message indicating GPU soft reset.
+ */
+#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
+ */
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated
+ *                                 pages is changed
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
+ */
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
+
+/**
+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS
+ *                                     frequency
+ * @target_freq: new target frequency
+ */
+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \
+	__TRACE_IF_ENABLED(aux_devfreq_target, target_freq)
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100755
index 0000000..e2e0544
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100755
index 0000000..5830e87
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,236 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbase_backend_inspect_head(kbdev, js);
+			WARN_ON(!next_katom);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = true;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the end of the transition */
+	if (kbdev->timeline.l2_transitioning) {
+		kbdev->timeline.l2_transitioning = false;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100755
index 0000000..619072f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,363 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_trace_timeline_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
+				(int)kctx->timeline.owner_tgid,              \
+				count);                                      \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
+				CTX_FLOW_ATOM_READY,                         \
+				(int)kctx->timeline.owner_tgid,              \
+				atom_id);                                    \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_ACTIVE,                      \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_NEXT,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_HEAD,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_STOPPING,                    \
+				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+				js, count);                                  \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_ACTIVE, active);            \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_TILER_ACTIVE,               \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_L2_ACTIVE,                  \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_POWERING,               \
+				1);                                          \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
+				1);                                          \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_SEND_EVENT,                       \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_HANDLE_EVENT,                     \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _consumerof_atom_number);                \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _producerof_atom_number_completed);      \
+	} while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
+				trace_code, 1);                              \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
+				count);                                      \
+	} while (0)
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+		enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100755
index 0000000..156a95a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
+"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain start (SW approximated)", "%d,%d,%d",
+"_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain stop (SW approximated)",  "%d,%d,%d",
+"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
new file mode 100755
index 0000000..baa9296
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
@@ -0,0 +1,542 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UKU_H_
+#define _KBASE_UKU_H_
+
+#include "mali_uk.h"
+#include "mali_base_kernel.h"
+
+/* This file needs to support being included from kernel and userside (which use different defines) */
+#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON
+#define SUPPORT_MALI_ERROR_INJECT
+#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */
+#if defined(CONFIG_MALI_NO_MALI)
+#define SUPPORT_MALI_NO_MALI
+#elif defined(MALI_NO_MALI)
+#if MALI_NO_MALI
+#define SUPPORT_MALI_NO_MALI
+#endif
+#endif
+
+#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#include "mali_kbase_gpuprops_types.h"
+
+/*
+ * 10.1:
+ * - Do mmap in kernel for SAME_VA memory allocations rather then
+ *   calling back into the kernel as a 2nd stage of the allocation request.
+ *
+ * 10.2:
+ * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA
+ *   region for use with JIT (ignored on 32-bit platforms)
+ *
+ * 10.3:
+ * - base_jd_core_req typedef-ed to u32 (instead of to u16)
+ * - two flags added: BASE_JD_REQ_SKIP_CACHE_STAT / _END
+ *
+ * 10.4:
+ * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests
+ *
+ * 10.5:
+ * - Reverted to performing mmap in user space so that tools like valgrind work.
+ *
+ * 10.6:
+ * - Add flags input variable to KBASE_FUNC_TLSTREAM_ACQUIRE
+ */
+#define BASE_UK_VERSION_MAJOR 10
+#define BASE_UK_VERSION_MINOR 6
+
+struct kbase_uk_mem_alloc {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	/* IN/OUT */
+	u64 flags;
+	/* OUT */
+	u64 gpu_va;
+	u16 va_alignment;
+	u8  padding[6];
+};
+
+struct kbase_uk_mem_free {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	/* OUT */
+};
+
+struct kbase_uk_mem_alias {
+	union uk_header header;
+	/* IN/OUT */
+	u64 flags;
+	/* IN */
+	u64 stride;
+	u64 nents;
+	union kbase_pointer ai;
+	/* OUT */
+	u64         gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_import {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer phandle;
+	u32 type;
+	u32 padding;
+	/* IN/OUT */
+	u64         flags;
+	/* OUT */
+	u64 gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_flags_change {
+	union uk_header header;
+	/* IN */
+	u64 gpu_va;
+	u64 flags;
+	u64 mask;
+};
+
+struct kbase_uk_job_submit {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer addr;
+	u32 nr_atoms;
+	u32 stride;		/* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */
+	/* OUT */
+};
+
+struct kbase_uk_post_term {
+	union uk_header header;
+};
+
+struct kbase_uk_sync_now {
+	union uk_header header;
+
+	/* IN */
+	struct base_syncset sset;
+
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_setup {
+	union uk_header header;
+
+	/* IN */
+	u64 dump_buffer;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 unused_1; /* keep for backwards compatibility */
+	u32 mmu_l2_bm;
+	u32 padding;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_hwcnt_reader_setup - User/Kernel space data exchange structure
+ * @header:       UK structure header
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ * @fd:           dumping notification file descriptor
+ *
+ * This structure sets up HWC dumper/reader for this context.
+ * Multiple instances can be created for single context.
+ */
+struct kbase_uk_hwcnt_reader_setup {
+	union uk_header header;
+
+	/* IN */
+	u32 buffer_count;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+
+	/* OUT */
+	s32 fd;
+};
+
+struct kbase_uk_hwcnt_dump {
+	union uk_header header;
+};
+
+struct kbase_uk_hwcnt_clear {
+	union uk_header header;
+};
+
+struct kbase_uk_fence_validate {
+	union uk_header header;
+	/* IN */
+	s32 fd;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_stream_create {
+	union uk_header header;
+	/* IN */
+	char name[32];
+	/* OUT */
+	s32 fd;
+	u32 padding;
+};
+
+struct kbase_uk_gpuprops {
+	union uk_header header;
+
+	/* IN */
+	struct mali_base_gpu_props props;
+	/* OUT */
+};
+
+struct kbase_uk_mem_query {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+#define KBASE_MEM_QUERY_COMMIT_SIZE  1
+#define KBASE_MEM_QUERY_VA_SIZE      2
+#define KBASE_MEM_QUERY_FLAGS        3
+	u64         query;
+	/* OUT */
+	u64         value;
+};
+
+struct kbase_uk_mem_commit {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64         pages;
+	/* OUT */
+	u32 result_subcode;
+	u32 padding;
+};
+
+struct kbase_uk_find_cpu_offset {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64 cpu_addr;
+	u64 size;
+	/* OUT */
+	u64 offset;
+};
+
+#define KBASE_GET_VERSION_BUFFER_SIZE 64
+struct kbase_uk_get_ddk_version {
+	union uk_header header;
+	/* OUT */
+	char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE];
+	u32 version_string_size;
+	u32 padding;
+};
+
+struct kbase_uk_disjoint_query {
+	union uk_header header;
+	/* OUT */
+	u32 counter;
+	u32 padding;
+};
+
+struct kbase_uk_set_flags {
+	union uk_header header;
+	/* IN */
+	u32 create_flags;
+	u32 padding;
+};
+
+#if MALI_UNIT_TEST
+#define TEST_ADDR_COUNT 4
+#define KBASE_TEST_BUFFER_SIZE 128
+struct kbase_exported_test_data {
+	u64 test_addr[TEST_ADDR_COUNT];		/**< memory address */
+	u32 test_addr_pages[TEST_ADDR_COUNT];		/**<  memory size in pages */
+	union kbase_pointer kctx;				/**<  base context created by process */
+	union kbase_pointer mm;				/**< pointer to process address space */
+	u8 buffer1[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+	u8 buffer2[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+};
+
+struct kbase_uk_set_test_data {
+	union uk_header header;
+	/* IN */
+	struct kbase_exported_test_data test_data;
+};
+
+#endif				/* MALI_UNIT_TEST */
+
+#ifdef SUPPORT_MALI_ERROR_INJECT
+struct kbase_uk_error_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_error_params params;
+};
+#endif				/* SUPPORT_MALI_ERROR_INJECT */
+
+#ifdef SUPPORT_MALI_NO_MALI
+struct kbase_uk_model_control_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_model_control_params params;
+};
+#endif				/* SUPPORT_MALI_NO_MALI */
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+struct kbase_uk_keep_gpu_powered {
+	union uk_header header;
+	u32       enabled;
+	u32       padding;
+};
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+struct kbase_uk_profiling_controls {
+	union uk_header header;
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+};
+
+struct kbase_uk_debugfs_mem_profile_add {
+	union uk_header header;
+	u32 len;
+	u32 padding;
+	union kbase_pointer buf;
+};
+
+struct kbase_uk_context_id {
+	union uk_header header;
+	/* OUT */
+	int id;
+};
+
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header: UK structure header
+ * @flags:  timeline stream flags
+ * @fd:     timeline stream file descriptor
+ *
+ * This structure is used when performing a call to acquire kernel side timeline
+ * stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire {
+	union uk_header header;
+	/* IN */
+	u32 flags;
+	/* OUT */
+	s32  fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_acquire_v10_4 - User/Kernel space data exchange
+ *                                          structure
+ * @header: UK structure header
+ * @fd:     timeline stream file descriptor
+ *
+ * This structure is used when performing a call to acquire kernel side timeline
+ * stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire_v10_4 {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+	s32  fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure
+ * @header: UK structure header
+ *
+ * This structure is used when performing a call to flush kernel side
+ * timeline streams.
+ */
+struct kbase_uk_tlstream_flush {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+};
+
+#if MALI_UNIT_TEST
+/**
+ * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure
+ * @header:    UK structure header
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This structure is used when performing a call to start timeline stream test
+ * embedded in kernel.
+ */
+struct kbase_uk_tlstream_test {
+	union uk_header header;
+	/* IN */
+	u32 tpw_count;
+	u32 msg_delay;
+	u32 msg_count;
+	u32 aux_msg;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure
+ * @header:          UK structure header
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ *
+ * This structure is used when performing a call to obtain timeline stream
+ * statistics.
+ */
+struct kbase_uk_tlstream_stats {
+	union uk_header header; /**< UK structure header. */
+	/* IN */
+	/* OUT */
+	u32 bytes_collected;
+	u32 bytes_generated;
+};
+#endif /* MALI_UNIT_TEST */
+
+/**
+ * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl
+ * @header:          UK structure header
+ * @data:            Counter samples for the dummy model
+ * @size:............Size of the counter sample data
+ */
+struct kbase_uk_prfcnt_values {
+	union uk_header header;
+	/* IN */
+	u32 *data;
+	u32 size;
+};
+
+/**
+ * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @evt:        the GPU address containing the event
+ * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or
+ *              BASE_JD_SOFT_EVENT_RESET
+ * @flags:      reserved for future uses, must be set to 0
+ *
+ * This structure is used to update the status of a software event. If the
+ * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting
+ * on this event will complete.
+ */
+struct kbase_uk_soft_event_update {
+	union uk_header header;
+	/* IN */
+	u64 evt;
+	u32 new_status;
+	u32 flags;
+};
+
+/**
+ * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @va_pages:   Number of virtual pages required for JIT
+ *
+ * This structure is used when requesting initialization of JIT.
+ */
+struct kbase_uk_mem_jit_init {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+};
+
+enum kbase_uk_function_id {
+	KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
+	KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1),
+	KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2),
+	KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3),
+	KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4),
+	KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5),
+	KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6),
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7),
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+	KBASE_FUNC_SYNC  = (UK_FUNC_ID + 8),
+
+	KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9),
+
+	KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10),
+	KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11),
+	KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12),
+
+	KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14),
+
+	KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15),
+
+	KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16),
+	KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18),
+
+	KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19),
+	KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20),
+	KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21),
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	KBASE_FUNC_KEEP_GPU_POWERED = (UK_FUNC_ID + 22),
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23),
+	KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24),
+	KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25),
+	KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26),
+					    /* to be used only for testing
+					    * purposes, otherwise these controls
+					    * are set through gator API */
+
+	KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27),
+	KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28),
+	KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29),
+
+	KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31),
+
+	KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4 = (UK_FUNC_ID + 32),
+#if MALI_UNIT_TEST
+	KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
+	KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
+#endif /* MALI_UNIT_TEST */
+	KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35),
+
+	KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36),
+
+#ifdef SUPPORT_MALI_NO_MALI
+	KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37),
+#endif
+
+	KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38),
+
+	KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39),
+
+	KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 40),
+
+	KBASE_FUNC_MAX
+};
+
+#endif				/* _KBASE_UKU_H_ */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100755
index 0000000..be474ff
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100755
index 0000000..fd7252d
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100755
index 0000000..7f7efc8
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,2039 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/hrtimer.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API        1
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC            1000000000ull /* ns */
+
+/* The time resolution of dumping service. */
+#define DUMPING_RESOLUTION      500000ull /* ns */
+
+/* The maximal supported number of dumping buffers. */
+#define MAX_BUFFER_COUNT        32
+
+/* Size and number of hw counters blocks. */
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+/*****************************************************************************/
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+enum vinstr_state {
+	VINSTR_IDLE,
+	VINSTR_DUMPING,
+	VINSTR_SUSPENDING,
+	VINSTR_SUSPENDED,
+	VINSTR_RESUMING
+};
+
+/**
+ * struct kbase_vinstr_context - vinstr context per device
+ * @lock:              protects the entire vinstr context
+ * @kbdev:             pointer to kbase device
+ * @kctx:              pointer to kbase context
+ * @vmap:              vinstr vmap for mapping hwcnt dump buffer
+ * @gpu_va:            GPU hwcnt dump buffer address
+ * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
+ * @dump_size:         size of the dump buffer in bytes
+ * @bitmap:            current set of counters monitored, not always in sync
+ *                     with hardware
+ * @reprogram:         when true, reprogram hwcnt block with the new set of
+ *                     counters
+ * @state:             vinstr state
+ * @state_lock:        protects information about vinstr state
+ * @suspend_waitq:     notification queue to trigger state re-validation
+ * @suspend_cnt:       reference counter of vinstr's suspend state
+ * @suspend_work:      worker to execute on entering suspended state
+ * @resume_work:       worker to execute on leaving suspended state
+ * @nclients:          number of attached clients, pending or otherwise
+ * @waiting_clients:   head of list of clients being periodically sampled
+ * @idle_clients:      head of list of clients being idle
+ * @suspended_clients: head of list of clients being suspended
+ * @thread:            periodic sampling thread
+ * @waitq:             notification queue of sampling thread
+ * @request_pending:   request for action for sampling thread
+ */
+struct kbase_vinstr_context {
+	struct mutex             lock;
+	struct kbase_device      *kbdev;
+	struct kbase_context     *kctx;
+
+	struct kbase_vmap_struct vmap;
+	u64                      gpu_va;
+	void                     *cpu_va;
+	size_t                   dump_size;
+	u32                      bitmap[4];
+	bool                     reprogram;
+
+	enum vinstr_state        state;
+	struct spinlock          state_lock;
+	wait_queue_head_t        suspend_waitq;
+	unsigned int             suspend_cnt;
+	struct work_struct       suspend_work;
+	struct work_struct       resume_work;
+
+	u32                      nclients;
+	struct list_head         waiting_clients;
+	struct list_head         idle_clients;
+	struct list_head         suspended_clients;
+
+	struct task_struct       *thread;
+	wait_queue_head_t        waitq;
+	atomic_t                 request_pending;
+};
+
+/**
+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
+ * @vinstr_ctx:    vinstr context client is attached to
+ * @list:          node used to attach this client to list in vinstr context
+ * @buffer_count:  number of buffers this client is using
+ * @event_mask:    events this client reacts to
+ * @dump_size:     size of one dump buffer in bytes
+ * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
+ * @accum_buffer:  temporary accumulation buffer for preserving counters
+ * @dump_time:     next time this clients shall request hwcnt dump
+ * @dump_interval: interval between periodic hwcnt dumps
+ * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
+ * @dump_buffers_meta: metadata of dump buffers
+ * @meta_idx:      index of metadata being accessed by userspace
+ * @read_idx:      index of buffer read by userspace
+ * @write_idx:     index of buffer being written by dumping service
+ * @waitq:         client's notification queue
+ * @pending:       when true, client has attached but hwcnt not yet updated
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context        *vinstr_ctx;
+	struct list_head                   list;
+	unsigned int                       buffer_count;
+	u32                                event_mask;
+	size_t                             dump_size;
+	u32                                bitmap[4];
+	void __user                        *legacy_buffer;
+	void                               *kernel_buffer;
+	void                               *accum_buffer;
+	u64                                dump_time;
+	u32                                dump_interval;
+	char                               *dump_buffers;
+	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
+	atomic_t                           meta_idx;
+	atomic_t                           read_idx;
+	atomic_t                           write_idx;
+	wait_queue_head_t                  waitq;
+	bool                               pending;
+};
+
+/**
+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
+ * @hrtimer:    high resolution timer
+ * @vinstr_ctx: vinstr context
+ */
+struct kbasep_vinstr_wake_up_timer {
+	struct hrtimer              hrtimer;
+	struct kbase_vinstr_context *vinstr_ctx;
+};
+
+/*****************************************************************************/
+
+static int kbasep_vinstr_service_task(void *data);
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+		struct file *filp,
+		poll_table  *wait);
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+		struct file   *filp,
+		unsigned int  cmd,
+		unsigned long arg);
+static int kbasep_vinstr_hwcnt_reader_mmap(
+		struct file           *filp,
+		struct vm_area_struct *vma);
+static int kbasep_vinstr_hwcnt_reader_release(
+		struct inode *inode,
+		struct file  *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations vinstr_client_fops = {
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/*****************************************************************************/
+
+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_uk_hwcnt_setup setup;
+	int err;
+
+	setup.dump_buffer = vinstr_ctx->gpu_va;
+	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	setup.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+
+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err) {
+		dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
+				kctx);
+		return;
+	}
+
+	/* Release the context. This had its own Power Manager Active reference. */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference. */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
+}
+
+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	disable_hwcnt(vinstr_ctx);
+	return enable_hwcnt(vinstr_ctx);
+}
+
+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
+}
+
+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
+{
+	size_t dump_size;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else
+#endif /* CONFIG_MALI_NO_MALI */
+	{
+		/* assume v5 for now */
+		base_gpu_props *props = &kbdev->gpu_props.props;
+		u32 nr_l2 = props->l2_props.num_l2_slices;
+		u64 core_mask = props->coherency_info.group[0].core_mask;
+		u32 nr_blocks = fls64(core_mask);
+
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_blocks) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
+
+static int kbasep_vinstr_map_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	u64 flags, nr_pages;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	nr_pages = PFN_UP(vinstr_ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&vinstr_ctx->gpu_va);
+	if (!reg)
+		return -ENOMEM;
+
+	vinstr_ctx->cpu_va = kbase_vmap(
+			kctx,
+			vinstr_ctx->gpu_va,
+			vinstr_ctx->dump_size,
+			&vinstr_ctx->vmap);
+	if (!vinstr_ctx->cpu_va) {
+		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbasep_vinstr_unmap_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+
+	kbase_vunmap(kctx, &vinstr_ctx->vmap);
+	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+}
+
+/**
+ * kbasep_vinstr_create_kctx - create kernel context for vinstr
+ * @vinstr_ctx: vinstr context
+ * Return: zero on success
+ */
+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	unsigned long flags;
+	bool enable_backend = false;
+	int err;
+
+	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
+	if (!vinstr_ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
+	if (err) {
+		kbase_destroy_context(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		KBASE_TLSTREAM_TL_NEW_CTX(
+				vinstr_ctx->kctx,
+				(u32)(vinstr_ctx->kctx->id),
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	/* Don't enable hardware counters if vinstr is suspended.
+	 * Note that vinstr resume code is run under vinstr context lock,
+	 * lower layer will be enabled as needed on resume. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE == vinstr_ctx->state)
+		enable_backend = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (enable_backend)
+		err = enable_hwcnt(vinstr_ctx);
+
+	if (err) {
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	vinstr_ctx->thread = kthread_run(
+			kbasep_vinstr_service_task,
+			vinstr_ctx,
+			"mali_vinstr_service");
+	if (!vinstr_ctx->thread) {
+		disable_hwcnt(vinstr_ctx);
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+
+	/* Release hw counters dumping resources. */
+	vinstr_ctx->thread = NULL;
+	disable_hwcnt(vinstr_ctx);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Inform timeline client about context destruction. */
+	KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+
+	vinstr_ctx->kctx = NULL;
+}
+
+/**
+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
+ *
+ * Return: vinstr opaque client handle or NULL on failure
+ */
+static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
+		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
+		u32 bitmap[4], void *argp, void *kernel_buffer)
+{
+	struct task_struct         *thread = NULL;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	if (buffer_count > MAX_BUFFER_COUNT
+	    || (buffer_count & (buffer_count - 1)))
+		return NULL;
+
+	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->vinstr_ctx   = vinstr_ctx;
+	cli->buffer_count = buffer_count;
+	cli->event_mask   =
+		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
+		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
+	cli->pending      = true;
+
+	hwcnt_bitmap_set(cli->bitmap, bitmap);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
+	vinstr_ctx->reprogram = true;
+
+	/* If this is the first client, create the vinstr kbase
+	 * context. This context is permanently resident until the
+	 * last client exits. */
+	if (!vinstr_ctx->nclients) {
+		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
+		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
+			goto error;
+
+		vinstr_ctx->reprogram = false;
+		cli->pending = false;
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it. We need a per client kernel buffer for accumulating
+	 * the counters. */
+	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	if (!cli->accum_buffer)
+		goto error;
+
+	/* Prepare buffers. */
+	if (cli->buffer_count) {
+		int *fd = (int *)argp;
+		size_t tmp;
+
+		/* Allocate area for buffers metadata storage. */
+		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
+			cli->buffer_count;
+		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
+		if (!cli->dump_buffers_meta)
+			goto error;
+
+		/* Allocate required number of dumping buffers. */
+		cli->dump_buffers = (char *)__get_free_pages(
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(cli->dump_size * cli->buffer_count));
+		if (!cli->dump_buffers)
+			goto error;
+
+		/* Create descriptor for user-kernel data exchange. */
+		*fd = anon_inode_getfd(
+				"[mali_vinstr_desc]",
+				&vinstr_client_fops,
+				cli,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd)
+			goto error;
+	} else if (kernel_buffer) {
+		cli->kernel_buffer = kernel_buffer;
+	} else {
+		cli->legacy_buffer = (void __user *)argp;
+	}
+
+	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->meta_idx, 0);
+	atomic_set(&cli->write_idx, 0);
+	init_waitqueue_head(&cli->waitq);
+
+	vinstr_ctx->nclients++;
+	list_add(&cli->list, &vinstr_ctx->idle_clients);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return cli;
+
+error:
+	kfree(cli->dump_buffers_meta);
+	if (cli->dump_buffers)
+		free_pages(
+				(unsigned long)cli->dump_buffers,
+				get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	if (!vinstr_ctx->nclients && vinstr_ctx->kctx) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+	kfree(cli);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+
+	return NULL;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	struct kbase_vinstr_client  *iter, *tmp;
+	struct task_struct          *thread = NULL;
+	u32 zerobitmap[4] = { 0 };
+	int cli_found = 0;
+
+	KBASE_DEBUG_ASSERT(cli);
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
+		if (iter == cli) {
+			vinstr_ctx->reprogram = true;
+			cli_found = 1;
+			list_del(&iter->list);
+			break;
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->waiting_clients, list) {
+			if (iter == cli) {
+				vinstr_ctx->reprogram = true;
+				cli_found = 1;
+				list_del(&iter->list);
+				break;
+			}
+		}
+	}
+	KBASE_DEBUG_ASSERT(cli_found);
+
+	kfree(cli->dump_buffers_meta);
+	free_pages(
+			(unsigned long)cli->dump_buffers,
+			get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	kfree(cli);
+
+	vinstr_ctx->nclients--;
+	if (!vinstr_ctx->nclients) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
+	u32 i, nr_l2;
+	u64 core_mask;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (0ull != core_mask) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		if (0ull != (core_mask & 1ull)) {
+			/* if block is not reserved update header */
+			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+			*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		}
+		dst += block_size;
+		src += block_size;
+
+		core_mask >>= 1;
+	}
+}
+
+/**
+ * accum_clients - accumulate dumped hw counters for all known clients
+ * @vinstr_ctx: vinstr context
+ */
+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4 = 0;
+
+#ifndef CONFIG_MALI_NO_MALI
+	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ *
+ * Return: timestamp value
+ */
+static u64 kbasep_vinstr_get_timestamp(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_add_dump_request - register client's dumping request
+ * @cli:             requesting client
+ * @waiting_clients: list of pending dumping requests
+ */
+static void kbasep_vinstr_add_dump_request(
+		struct kbase_vinstr_client *cli,
+		struct list_head *waiting_clients)
+{
+	struct kbase_vinstr_client *tmp;
+
+	if (list_empty(waiting_clients)) {
+		list_add(&cli->list, waiting_clients);
+		return;
+	}
+	list_for_each_entry(tmp, waiting_clients, list) {
+		if (tmp->dump_time > cli->dump_time) {
+			list_add_tail(&cli->list, &tmp->list);
+			return;
+		}
+	}
+	list_add_tail(&cli->list, waiting_clients);
+}
+
+/**
+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
+ *                                        dump and accumulate them for known
+ *                                        clients
+ * @vinstr_ctx: vinstr context
+ * @timestamp: pointer where collection timestamp will be recorded
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_collect_and_accumulate(
+		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+{
+	unsigned long flags;
+	int rcode;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		return -EAGAIN;
+	} else {
+		vinstr_ctx->state = VINSTR_DUMPING;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Request HW counters dump.
+	 * Disable preemption to make dump timestamp more accurate. */
+	preempt_disable();
+	*timestamp = kbasep_vinstr_get_timestamp();
+	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
+	preempt_enable();
+
+	if (!rcode)
+		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
+	WARN_ON(rcode);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state)
+	{
+	case VINSTR_SUSPENDING:
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_IDLE;
+		wake_up_all(&vinstr_ctx->suspend_waitq);
+		break;
+	default:
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Accumulate values of collected counters. */
+	if (!rcode)
+		accum_clients(vinstr_ctx);
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
+ *                                  buffer
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_fill_dump_buffer(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	unsigned int write_idx = atomic_read(&cli->write_idx);
+	unsigned int read_idx  = atomic_read(&cli->read_idx);
+
+	struct kbase_hwcnt_reader_metadata *meta;
+	void                               *buffer;
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == cli->buffer_count)
+		return -1;
+	write_idx %= cli->buffer_count;
+
+	/* Fill in dump buffer and its metadata. */
+	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
+	meta   = &cli->dump_buffers_meta[write_idx];
+	meta->timestamp  = timestamp;
+	meta->event_id   = event_id;
+	meta->buffer_idx = write_idx;
+	memcpy(buffer, cli->accum_buffer, cli->dump_size);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
+ *                                         allocated in userspace
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of legacy ioctl interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_legacy(
+		struct kbase_vinstr_client *cli)
+{
+	void __user  *buffer = cli->legacy_buffer;
+	int          rcode;
+
+	/* Copy data to user buffer. */
+	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
+	if (rcode)
+		pr_warn("error while copying buffer to user\n");
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+		struct kbase_vinstr_client *cli)
+{
+	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_reprogram(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	bool suspended = false;
+
+	/* Don't enable hardware counters if vinstr is suspended. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state)
+		suspended = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (suspended)
+		return;
+
+	/* Change to suspended state is done while holding vinstr context
+	 * lock. Below code will then no re-enable the instrumentation. */
+
+	if (vinstr_ctx->reprogram) {
+		struct kbase_vinstr_client *iter;
+
+		if (!reprogram_hwcnt(vinstr_ctx)) {
+			vinstr_ctx->reprogram = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->idle_clients,
+					list)
+				iter->pending = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->waiting_clients,
+					list)
+				iter->pending = false;
+		}
+	}
+}
+
+/**
+ * kbasep_vinstr_update_client - copy accumulated counters to user readable
+ *                               buffer and notify the user
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_update_client(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	int rcode = 0;
+
+	/* Copy collected counters to user readable buffer. */
+	if (cli->buffer_count)
+		rcode = kbasep_vinstr_fill_dump_buffer(
+				cli, timestamp, event_id);
+	else if (cli->kernel_buffer)
+		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
+	else
+		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
+
+	if (rcode)
+		goto exit;
+
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&cli->write_idx);
+	wake_up_interruptible(&cli->waitq);
+
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+exit:
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ *
+ * @hrtimer: high resolution timer
+ *
+ * Return: High resolution timer restart enum.
+ */
+static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
+		struct hrtimer *hrtimer)
+{
+	struct kbasep_vinstr_wake_up_timer *timer =
+		container_of(
+			hrtimer,
+			struct kbasep_vinstr_wake_up_timer,
+			hrtimer);
+
+	KBASE_DEBUG_ASSERT(timer);
+
+	atomic_set(&timer->vinstr_ctx->request_pending, 1);
+	wake_up_all(&timer->vinstr_ctx->waitq);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_service_task - HWC dumping service thread
+ *
+ * @data: Pointer to vinstr context structure.
+ *
+ * Return: Always returns zero.
+ */
+static int kbasep_vinstr_service_task(void *data)
+{
+	struct kbase_vinstr_context        *vinstr_ctx = data;
+	struct kbasep_vinstr_wake_up_timer timer;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	hrtimer_init(&timer.hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer.hrtimer.function = kbasep_vinstr_wake_up_callback;
+	timer.vinstr_ctx       = vinstr_ctx;
+
+	while (!kthread_should_stop()) {
+		struct kbase_vinstr_client *cli = NULL;
+		struct kbase_vinstr_client *tmp;
+		int                        rcode;
+
+		u64              timestamp = kbasep_vinstr_get_timestamp();
+		u64              dump_time = 0;
+		struct list_head expired_requests;
+
+		/* Hold lock while performing operations on lists of clients. */
+		mutex_lock(&vinstr_ctx->lock);
+
+		/* Closing thread must not interact with client requests. */
+		if (current == vinstr_ctx->thread) {
+			atomic_set(&vinstr_ctx->request_pending, 0);
+
+			if (!list_empty(&vinstr_ctx->waiting_clients)) {
+				cli = list_first_entry(
+						&vinstr_ctx->waiting_clients,
+						struct kbase_vinstr_client,
+						list);
+				dump_time = cli->dump_time;
+			}
+		}
+
+		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
+			mutex_unlock(&vinstr_ctx->lock);
+
+			/* Sleep until next dumping event or service request. */
+			if (cli) {
+				u64 diff = dump_time - timestamp;
+
+				hrtimer_start(
+						&timer.hrtimer,
+						ns_to_ktime(diff),
+						HRTIMER_MODE_REL);
+			}
+			wait_event(
+					vinstr_ctx->waitq,
+					atomic_read(
+						&vinstr_ctx->request_pending) ||
+					kthread_should_stop());
+			hrtimer_cancel(&timer.hrtimer);
+			continue;
+		}
+
+		rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
+				&timestamp);
+
+		INIT_LIST_HEAD(&expired_requests);
+
+		/* Find all expired requests. */
+		list_for_each_entry_safe(
+				cli,
+				tmp,
+				&vinstr_ctx->waiting_clients,
+				list) {
+			s64 tdiff =
+				(s64)(timestamp + DUMPING_RESOLUTION) -
+				(s64)cli->dump_time;
+			if (tdiff >= 0ll) {
+				list_del(&cli->list);
+				list_add(&cli->list, &expired_requests);
+			} else {
+				break;
+			}
+		}
+
+		/* Fill data for each request found. */
+		list_for_each_entry_safe(cli, tmp, &expired_requests, list) {
+			/* Ensure that legacy buffer will not be used from
+			 * this kthread context. */
+			BUG_ON(0 == cli->buffer_count);
+			/* Expect only periodically sampled clients. */
+			BUG_ON(0 == cli->dump_interval);
+
+			if (!rcode)
+				kbasep_vinstr_update_client(
+						cli,
+						timestamp,
+						BASE_HWCNT_READER_EVENT_PERIODIC);
+
+			/* Set new dumping time. Drop missed probing times. */
+			do {
+				cli->dump_time += cli->dump_interval;
+			} while (cli->dump_time < timestamp);
+
+			list_del(&cli->list);
+			kbasep_vinstr_add_dump_request(
+					cli,
+					&vinstr_ctx->waiting_clients);
+		}
+
+		/* Reprogram counters set if required. */
+		kbasep_vinstr_reprogram(vinstr_ctx);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	}
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
+ * @cli: pointer to vinstr client structure
+ *
+ * Return: non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+		struct kbase_vinstr_client *cli)
+{
+	KBASE_DEBUG_ASSERT(cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+
+	/* Metadata sanity check. */
+	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @interval: periodic dumping interval (disable periodic dumping if zero)
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+		struct kbase_vinstr_client *cli, u32 interval)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_del(&cli->list);
+
+	cli->dump_interval = interval;
+
+	/* If interval is non-zero, enable periodic dumping for this client. */
+	if (cli->dump_interval) {
+		if (DUMPING_RESOLUTION > cli->dump_interval)
+			cli->dump_interval = DUMPING_RESOLUTION;
+		cli->dump_time =
+			kbasep_vinstr_get_timestamp() + cli->dump_interval;
+
+		kbasep_vinstr_add_dump_request(
+				cli, &vinstr_ctx->waiting_clients);
+
+		atomic_set(&vinstr_ctx->request_pending, 1);
+		wake_up_all(&vinstr_ctx->waitq);
+	} else {
+		list_add(&cli->list, &vinstr_ctx->idle_clients);
+	}
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
+ * @event_id: id of event
+ * Return: event_mask or zero if event is not supported or maskable
+ */
+static u32 kbasep_vinstr_hwcnt_reader_event_mask(
+		enum base_hwcnt_reader_event event_id)
+{
+	u32 event_mask = 0;
+
+	switch (event_id) {
+	case BASE_HWCNT_READER_EVENT_PREJOB:
+	case BASE_HWCNT_READER_EVENT_POSTJOB:
+		/* These event are maskable. */
+		event_mask = (1 << event_id);
+		break;
+
+	case BASE_HWCNT_READER_EVENT_MANUAL:
+	case BASE_HWCNT_READER_EVENT_PERIODIC:
+		/* These event are non-maskable. */
+	default:
+		/* These event are not supported. */
+		break;
+	}
+
+	return event_mask;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to enable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask |= event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to disable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask &= ~event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
+ * @cli:   pointer to vinstr client structure
+ * @hwver: pointer to user buffer where hw version will be stored
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+		struct kbase_vinstr_client *cli, u32 __user *hwver)
+{
+#ifndef CONFIG_MALI_NO_MALI
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+#endif
+
+	u32                         ver = 5;
+
+#ifndef CONFIG_MALI_NO_MALI
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+		ver = 4;
+#endif
+
+	return put_user(ver, hwver);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
+ * @filp:   pointer to file structure
+ * @cmd:    user command
+ * @arg:    command's argument
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	long                       rcode = 0;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+				cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
+		rcode = put_user(
+				(u32)cli->vinstr_ctx->dump_size,
+				(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbase_vinstr_hwc_dump(
+				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbase_vinstr_hwc_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+				cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
+		poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
+ * @filp: pointer to file structure
+ * @vma:  pointer to vma structure
+ * Return: zero on success
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	unsigned long size, addr, pfn, offset;
+	unsigned long vm_size = vma->vm_end - vma->vm_start;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(vma);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	size = cli->buffer_count * cli->dump_size;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if (vm_size > size - offset)
+		return -EINVAL;
+
+	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	pfn = addr >> PAGE_SHIFT;
+
+	return remap_pfn_range(
+			vma,
+			vma->vm_start,
+			pfn,
+			vm_size,
+			vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ * Return always return zero
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+		struct file *filp)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	kbase_vinstr_detach_client(cli);
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
+ * @kbdev: pointer to kbase device structure
+ */
+static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	down(&js_devdata->schedule_sem);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	up(&js_devdata->schedule_sem);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker suspending vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_suspend_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			suspend_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		disable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_SUSPENDED;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * This must happen after vinstr was suspended. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker resuming vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_resume_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			resume_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		enable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_IDLE;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * Note that scheduler state machine might requested re-entry to
+	 * protected mode before vinstr was resumed.
+	 * This must happen after vinstr was release. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
+	if (!vinstr_ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	mutex_init(&vinstr_ctx->lock);
+	spin_lock_init(&vinstr_ctx->state_lock);
+	vinstr_ctx->kbdev = kbdev;
+	vinstr_ctx->thread = NULL;
+	vinstr_ctx->state = VINSTR_IDLE;
+	vinstr_ctx->suspend_cnt = 0;
+	INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
+	INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
+	init_waitqueue_head(&vinstr_ctx->suspend_waitq);
+
+	atomic_set(&vinstr_ctx->request_pending, 0);
+	init_waitqueue_head(&vinstr_ctx->waitq);
+
+	return vinstr_ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	/* Stop service thread first. */
+	if (vinstr_ctx->thread)
+		kthread_stop(vinstr_ctx->thread);
+
+	/* Wait for workers. */
+	flush_work(&vinstr_ctx->suspend_work);
+	flush_work(&vinstr_ctx->resume_work);
+
+	while (1) {
+		struct list_head *list = &vinstr_ctx->idle_clients;
+
+		if (list_empty(list)) {
+			list = &vinstr_ctx->waiting_clients;
+			if (list_empty(list))
+				break;
+		}
+
+		cli = list_first_entry(list, struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		kfree(cli->accum_buffer);
+		kfree(cli);
+		vinstr_ctx->nclients--;
+	}
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	if (vinstr_ctx->kctx)
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	kfree(vinstr_ctx);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup)
+{
+	struct kbase_vinstr_client  *cli;
+	u32                         bitmap[4];
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(setup->buffer_count);
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	cli = kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			setup->buffer_count,
+			bitmap,
+			&setup->fd,
+			NULL);
+
+	if (!cli)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (setup->dump_buffer) {
+		u32 bitmap[4];
+
+		bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+		if (*cli)
+			return -EBUSY;
+
+		*cli = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				(void *)(long)setup->dump_buffer,
+				NULL);
+
+		if (!(*cli))
+			return -ENOMEM;
+	} else {
+		if (!*cli)
+			return -EINVAL;
+
+		kbase_vinstr_detach_client(*cli);
+		*cli = NULL;
+	}
+
+	return 0;
+}
+
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer)
+{
+	u32 bitmap[4];
+
+	if (!vinstr_ctx || !setup || !kernel_buffer)
+		return NULL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	return kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			0,
+			bitmap,
+			NULL,
+			kernel_buffer);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	int                         rcode = 0;
+	struct kbase_vinstr_context *vinstr_ctx;
+	u64                         timestamp;
+	u32                         event_mask;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
+	event_mask = 1 << event_id;
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (event_mask & cli->event_mask) {
+		rcode = kbasep_vinstr_collect_and_accumulate(
+				vinstr_ctx,
+				&timestamp);
+		if (rcode)
+			goto exit;
+
+		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
+		if (rcode)
+			goto exit;
+
+		kbasep_vinstr_reprogram(vinstr_ctx);
+	}
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
+
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	int                         rcode;
+	u64                         unused;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	if (rcode)
+		goto exit;
+	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
+	if (rcode)
+		goto exit;
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	kbasep_vinstr_reprogram(vinstr_ctx);
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		vinstr_ctx->suspend_cnt++;
+		/* overflow shall not happen */
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		ret = 0;
+		break;
+
+	case VINSTR_IDLE:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_SUSPENDING:
+		/* fall through */
+	case VINSTR_RESUMING:
+		break;
+
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
+	if (VINSTR_SUSPENDED == vinstr_ctx->state) {
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		vinstr_ctx->suspend_cnt--;
+		if (0 == vinstr_ctx->suspend_cnt) {
+			vinstr_ctx->state = VINSTR_RESUMING;
+			schedule_work(&vinstr_ctx->resume_work);
+		}
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100755
index 0000000..6207d25
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwcnt_reader.h>
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+/*****************************************************************************/
+
+/**
+ * kbase_vinstr_init() - initialize the vinstr core
+ * @kbdev: kbase device
+ *
+ * Return: pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - terminate the vinstr core
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
+ * @vinstr_ctx: vinstr context
+ * @setup:      reader's configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+		struct kbase_vinstr_context        *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup);
+
+/**
+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
+ * @vinstr_ctx: vinstr context
+ * @cli:        pointer where to store pointer to new vinstr client structure
+ * @setup:      hwc configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count and setup->fd are not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer);
+
+/**
+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
+ * @cli:      pointer to vinstr client
+ * @event_id: id of event that triggered hwcnt dump
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_dump(
+		struct kbase_vinstr_client   *cli,
+		enum base_hwcnt_reader_event event_id);
+
+/**
+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
+ *                          a given kbase context
+ * @cli: pointer to vinstr client
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Return: 0 on success, or negative if state change is in progress
+ *
+ * Warning: This API call is non-generic. It is meant to be used only by
+ *          job scheduler state machine.
+ *
+ * Function initiates vinstr switch to suspended state. Once it was called
+ * vinstr enters suspending state. If function return non-zero value, it
+ * indicates that state switch is not complete and function must be called
+ * again. On state switch vinstr will trigger job scheduler state machine
+ * cycle.
+ */
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_suspend - suspends operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function initiates vinstr switch to suspended state. Then it blocks until
+ * operation is completed.
+ */
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_resume - resumes operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function can be called only if it was preceded by a successful call
+ * to kbase_vinstr_suspend.
+ */
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
+#endif /* _KBASE_VINSTR_H_ */
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100755
index 0000000..5d6b402
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100755
index 0000000..2be06a5
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,189 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100755
index 0000000..9945293
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644
index 0000000..a509cbd
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100755
index 0000000..c53b33f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,598 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
+						size */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+#define LATEST_FLUSH            0x038	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+#define GPU_DBGEN               (1 << 8)	/* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PRWOFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PRWTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+
+#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+
+/* Symbol for default MEMATTR to use */
+
+/* Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100755
index 0000000..bd5f661
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->atom_id,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->count)
+);
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_uk.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100755
index 0000000..841d03f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
new file mode 100755
index 0000000..558657b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	obj-y += $(platform_name)/
+endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100755
index 0000000..8fb4e91
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME
+#
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100755
index 0000000..de8849e
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+
+ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-y += platform/devicetree/mali_clock.c
+obj-y += platform/devicetree/mpgpu.c
+obj-y += platform/devicetree/meson_main2.c
+obj-y += platform/devicetree/platform_gx.c
+obj-y += platform/devicetree/scaling.c
+obj-y += mali_kbase_runtime_pm.c
+obj-y += mali_kbase_config_devicetree.c
+else ifeq ($(CONFIG_MALI_MIDGARD),m)
+SRC += platform/devicetree/mali_clock.c
+SRC += platform/devicetree/mpgpu.c
+SRC += platform/devicetree/meson_main2.c
+SRC += platform/devicetree/platform_gx.c
+SRC += platform/devicetree/scaling.c
+SRC += platform/devicetree/mali_kbase_runtime_pm.c
+SRC += platform/devicetree/mali_kbase_config_devicetree.c
+endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
new file mode 100644
index 0000000..35249a5
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
@@ -0,0 +1,653 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)
+
+//disable print
+#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+int mali_pm_statue = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "ao io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	mpdata->dvfs_table_size = 0;
+
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+	dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size);
+	dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n",
+			mpdata->dvfs_table,
+			sizeof(mpdata->dvfs_table[0]));
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+#else
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	//mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+
+	GPU_CLK_DBG();
+	do_gettimeofday(&start);
+	ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+	do_gettimeofday(&end);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	GPU_CLK_DBG();
+	clk_disable_unprepare(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+#if 0
+#ifdef MESON_CPU_VERSION_OPS
+		if (is_meson_gxbbm_cpu()) {
+			if (dvfs_tbl->clk_freq >= GXBBM_MAX_GPU_FREQ)
+				continue;
+		}
+#endif
+#endif
+#if  0
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+#endif
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux");
+#if 0
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+#endif
+	if (IS_ERR(mpdata->clk_mali)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100755
index 0000000..1267b6f
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,109 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/version.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long t83x_static_power(unsigned long voltage)
+{
+#if 0
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+#else
+	return 0;
+#endif
+}
+
+static unsigned long t83x_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+struct devfreq_cooling_ops t83x_model_ops = {
+#else
+struct devfreq_cooling_power t83x_model_ops = {
+#endif
+	.get_static_power = t83x_static_power,
+	.get_dynamic_power = t83x_dynamic_power,
+};
+
+#endif
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100755
index 0000000..13af899
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (750000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (100000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+extern struct kbase_platform_funcs_conf dt_funcs_conf;
+#define PLATFORM_FUNCS (&dt_funcs_conf)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&t83x_model_ops)
+extern struct devfreq_cooling_ops t83x_model_ops;
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100755
index 0000000..3b3f095
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,244 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+void *reg_base_hiubus = NULL;
+u32  override_value_aml = 0;
+static int first = 1;
+
+static void  Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+
+    if ((mask & 0x1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000190, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000194, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a0, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a4, 0xffffffff);    // Power on all cores
+    }
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000180, mask >> 1);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000184, 0x0);    // Power on all cores
+    }
+
+    if ( mask != 0 ) {
+        udelay(10);
+        part1_done = Mali_RdReg(0x0, 0x0, 0x0000020);
+        //printk("%s, %d\n", __func__, __LINE__);
+        while((part1_done ==0)) { part1_done = Mali_RdReg(0x0, 0x0, 0x00000020); udelay(10); }
+        //printk("Mali_pwr_on:gpu_irq : %x\n", part1_done);
+        Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+    }
+}
+
+static void gpu_power_main( struct kbase_device *kbdev) {
+
+    Mali_pwr_on ( 0x7);
+
+    printk("%s, %d\n", __func__, __LINE__);
+}
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret;
+	struct platform_device *pdev = to_platform_device(kbdev->dev);
+	struct resource *reg_res;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+    //printk("20151013, %s, %d\n", __FILE__, __LINE__);
+    if (first == 0) goto ret;
+
+    reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+    if (!reg_res) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) 
+        reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res));
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+
+//JOHNT
+    // Level reset mail
+
+    // Level reset mail
+    //Wr(P_RESET2_MASK, ~(0x1<<14));
+    //Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    //Wr(P_RESET2_LEVEL, 0xffffffff);
+    //Wr(P_RESET0_LEVEL, 0xffffffff);
+
+    value = Rd(RESET0_MASK);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_MASK, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+///////////////
+    value = Rd(RESET2_MASK);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_MASK, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value | ((0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value | ((0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    udelay(10); // OR POLL for reset done
+
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+    gpu_power_main(kbdev);
+    //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n",
+    //        kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus);
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+    first = 0;
+    //printk("%s, %d\n", __FILE__, __LINE__);
+ret:
+	ret = pm_runtime_get_sync(kbdev->dev);
+    udelay(100);
+#if 1
+
+    core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+    l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+    tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+    //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready);
+#endif
+	dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret);
+
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+    //printk("%s, %d\n", __FILE__, __LINE__);
+#if 0
+    iounmap(reg_base_hiubus);
+    reg_base_hiubus = NULL;
+#endif
+	u64 core_ready  = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	u64 l2_ready    = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+	u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+	if (core_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+	if (l2_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+	if (tiler_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+
+	return 0;
+}
+
+void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
+
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
new file mode 100644
index 0000000..b541090
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
new file mode 100644
index 0000000..7687f92
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_defs.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+extern void mali_post_init(void);
+struct kbase_device;
+//static int gpu_dvfs_probe(struct platform_device *pdev)
+int platform_dt_init_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	int err = -1;
+
+	err = mali_meson_init_start(pdev);
+    mali_meson_init_finish(pdev);
+    mpgpu_class_init();
+    mali_post_init();
+    return err;
+}
+
+//static int gpu_dvfs_remove(struct platform_device *pdev)
+void platform_dt_term_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+    mpgpu_class_exit();
+	mali_meson_uninit(pdev);
+
+}
+
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    mali_gpu_utilization_callback(utilisation*255/100);
+    return 1;
+}
+
+struct kbase_platform_funcs_conf dt_funcs_conf = {
+    .platform_init_func = platform_dt_init_func,
+    .platform_term_func = platform_dt_term_func,
+};
+#if 0
+static const struct of_device_id gpu_dvfs_ids[] = {
+	{ .compatible = "meson, gpu-dvfs-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpu_dvfs_ids);
+
+static struct platform_driver gpu_dvfs_driver = {
+	.driver = {
+		.name = "meson-gpu-dvfs",
+		.owner = THIS_MODULE,
+		.of_match_table = gpu_dvfs_ids,
+	},
+	.probe = gpu_dvfs_probe,
+	.remove = gpu_dvfs_remove,
+};
+module_platform_driver(gpu_dvfs_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Amlogic SH, MM");
+MODULE_DESCRIPTION("Driver for the Meson GPU dvfs");
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
new file mode 100644
index 0000000..ca79752
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
@@ -0,0 +1,33 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mpgpu_class_init(void);
+void mpgpu_class_exit(void);
+void mali_gpu_utilization_callback(int utilization_pp);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
new file mode 100644
index 0000000..0cc5035
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
@@ -0,0 +1,359 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+#include "meson_main2.h"
+
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+#if 0
+static ssize_t max_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxpp:%d, maxpp_sysfs:%d, total=%d\n",
+			pmali_plat->scale_info.maxpp, pmali_plat->maxpp_sysfs,
+			pmali_plat->cfg_pp);
+	return sprintf(buf, "%d\n", pmali_plat->cfg_pp);
+}
+
+static ssize_t max_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_pp) || (val < pinfo->minpp))
+		return -EINVAL;
+
+	pmali_plat->maxpp_sysfs = val;
+	pinfo->maxpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minpp);
+}
+
+static ssize_t min_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxpp) || (val < 1))
+		return -EINVAL;
+
+	pinfo->minpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+#endif
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+#if 0
+static ssize_t current_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+	return sprintf(buf, "%d\n", pp);
+}
+
+static ssize_t current_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+
+	get_mali_rt_clkpp(&clk, &pp);
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(clk, val, 1);
+
+	return count;
+}
+#endif
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+#if 0
+	__ATTR(min_pp,		0644, min_pp_read,	min_pp_write),
+	__ATTR(max_pp,		0644, max_pp_read,	max_pp_write),
+#endif
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+#if 0
+	__ATTR(cur_pp,		0644, current_pp_read,	current_pp_write),
+#endif
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+
+int mpgpu_class_init(void)
+{
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+}
+
+void  mpgpu_class_exit(void)
+{
+	class_unregister(&mpgpu_class);
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
new file mode 100644
index 0000000..d16675e
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
@@ -0,0 +1,246 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#ifdef CONFIG_GPU_THERMAL
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#include <linux/amlogic/aml_thermal_hw.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq < mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq < mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+                break;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+#ifndef MESON_DRV_BRING
+    return 55;
+#else
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+#endif
+}
+#endif
+#endif
+
+#ifdef CONFIG_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+#ifndef MESON_DRV_BRING
+    return 2;
+#else
+    return mali_executor_get_num_cores_enabled();
+#endif
+}
+#endif
+#endif
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
new file mode 100644
index 0000000..aa8a77a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+
+#if AMLOGIC_GPU_USE_GPPLL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)
+#include <linux/amlogic/amports/gp_pll.h>
+#else
+#include <linux/amlogic/media/clk/gp_pll.h>
+#endif
+#endif
+
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+extern int  mali_pm_statue;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+#if AMLOGIC_GPU_USE_GPPLL
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+#endif
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+#endif
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+#if AMLOGIC_GPU_USE_GPPLL
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+#endif
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+	lastStep = execStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+#endif
+
+}
+#if AMLOGIC_GPU_USE_GPPLL
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+#endif
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+    cores = cores;
+    return 0;
+}
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    if (err < 0) scalingdbg(1, "set pp failed");
+
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+    printk("mali_plat=%p\n", mali_plat);
+	num_cores_enabled = pmali_plat->sc_mpp;
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+#endif
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	40, /* 40% */
+	50, /* 50% */
+	90, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< utilization_pp)
+		ret = enable_one_core();
+	else if (0 < utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(int utilization_gpu, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(int utilization_pp, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization_pp,  ld_up);
+	if (utilization_pp >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization_pp <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(utilization_pp);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(utilization_pp);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
new file mode 100755
index 0000000..5b6ef37
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_juno_soc.o
+
+
+obj-m += juno_mali_opp.o
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
new file mode 100755
index 0000000..ccfd8cc
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/scpi_protocol.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp_add opp_add
+#endif /* Linux >= 3.13 */
+
+
+static int init_juno_opps_from_scpi(struct device *dev)
+{
+	struct scpi_opp *sopp;
+	int i;
+
+	/* Hard coded for Juno. 2 is GPU domain */
+	sopp = scpi_dvfs_get_opps(2);
+	if (IS_ERR_OR_NULL(sopp))
+		return PTR_ERR(sopp);
+
+	for (i = 0; i < sopp->count; i++) {
+		struct scpi_opp_entry *e = &sopp->opp[i];
+
+		dev_info(dev, "Mali OPP from SCPI: %u Hz @ %u mV\n",
+				e->freq_hz, e->volt_mv);
+
+		dev_pm_opp_add(dev, e->freq_hz, e->volt_mv * 1000);
+	}
+
+	return 0;
+}
+
+static int juno_setup_opps(void)
+{
+	struct device_node *np;
+	struct platform_device *pdev;
+	int err;
+
+	np = of_find_node_by_name(NULL, "gpu");
+	if (!np) {
+		pr_err("Failed to find DT entry for Mali\n");
+		return -EFAULT;
+	}
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev) {
+		pr_err("Failed to find device for Mali\n");
+		of_node_put(np);
+		return -EFAULT;
+	}
+
+	err = init_juno_opps_from_scpi(&pdev->dev);
+
+	of_node_put(np);
+
+	return err;
+}
+
+module_init(juno_setup_opps);
+MODULE_LICENSE("GPL");
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
new file mode 100755
index 0000000..c654818
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <linux/thermal.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_smc.h>
+
+/* Versatile Express (VE) Juno Development Platform */
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 65,
+	.mmu_irq_number = 66,
+	.gpu_irq_number = 64,
+	.io_memory_region = {
+			     .start = 0x2D000000,
+			     .end = 0x2D000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+/*
+ * Juno Protected Mode integration
+ */
+
+/* SMC Function Numbers */
+#define JUNO_SMC_PROTECTED_ENTER_FUNC  0xff06
+#define JUNO_SMC_PROTECTED_RESET_FUNC 0xff07
+
+static int juno_protected_mode_enter(struct kbase_device *kbdev)
+{
+	/* T62X in SoC detected */
+	u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+		JUNO_SMC_PROTECTED_ENTER_FUNC, false,
+		0, 0, 0);
+	return ret;
+}
+
+/* TODO: Remove these externs, reset should should be done by the firmware */
+extern void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+extern u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+static int juno_protected_mode_reset(struct kbase_device *kbdev)
+{
+
+	/* T62X in SoC detected */
+	u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+		JUNO_SMC_PROTECTED_RESET_FUNC, false,
+		0, 0, 0);
+
+	/* TODO: Remove this reset, it should be done by the firmware */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	while ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL)
+			& RESET_COMPLETED) != RESET_COMPLETED)
+		;
+
+	return ret;
+}
+
+static bool juno_protected_mode_supported(struct kbase_device *kbdev)
+{
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+
+	/*
+	 * Protected mode is only supported for the built in GPU
+	 * _and_ only if the right firmware is running.
+	 *
+	 * Given that at init time the GPU is not powered up the
+	 * juno_protected_mode_reset function can't be used as
+	 * is needs to access GPU registers.
+	 * However, although we don't want the GPU to boot into
+	 * protected mode we know a GPU reset will be done after
+	 * this function is called so although we set the GPU to
+	 * protected mode it will exit protected mode before the
+	 * driver is ready to run work.
+	 */
+	if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) &&
+			(kbdev->reg_start == 0x2d000000))
+		return juno_protected_mode_enter(kbdev) == 0;
+
+	return false;
+}
+
+struct kbase_protected_ops juno_protected_ops = {
+	.protected_mode_enter = juno_protected_mode_enter,
+	.protected_mode_reset = juno_protected_mode_reset,
+	.protected_mode_supported = juno_protected_mode_supported,
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
new file mode 100755
index 0000000..ab29e9d
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 600000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 600000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (&juno_protected_ops)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+#ifdef CONFIG_DEVFREQ_THERMAL
+extern struct devfreq_cooling_ops juno_model_ops;
+#endif
+extern struct kbase_protected_ops juno_protected_ops;
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100755
index 0000000..7cb3be7
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
new file mode 100755
index 0000000..01f9dfc
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+/**
+ * kbase_platform_fake_register - Entry point for fake platform registration
+ *
+ * This function is called early on in the initialization during execution of
+ * kbase_driver_init.
+ *
+ * Return: 0 to indicate success, non-zero for failure.
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Entry point for fake platform unregistration
+ *
+ * This function is called in the termination during execution of
+ * kbase_driver_exit.
+ */
+void kbase_platform_fake_unregister(void);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100755
index 0000000..084a156
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100755
index 0000000..dc4471b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq()
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq()
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..15ce2bc
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+#include "mali_kbase_config_platform.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100755
index 0000000..4665f98
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START	(0x10000000)
+#define SYS_CFGDATA_OFFSET		(0x000000A0)
+#define SYS_CFGCTRL_OFFSET		(0x000000A4)
+#define SYS_CFGSTAT_OFFSET		(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		(1 << 31)
+#define READ_REG_BIT_VALUE			(0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			(0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		(1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			(1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE	(0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE		(2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		(1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			(1 <<  1)
+
+#define FEED_REG_BIT_MASK			(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+/* the following three values used for reading
+ * HBI value of the LogicTile daughterboard */
+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000)
+#define VE_SYS_PROC_ID1_OFFSET (0x00000088)
+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define VE_DEFAULT_GPU_FREQ_MIN 5000
+#define VE_DEFAULT_GPU_FREQ_MAX 5000
+
+
+#define CPU_CLOCK_SPEED_UNDEFINED (0)
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed
+ * @cpu_clock - the value of CPU clock speed in MHz
+ *
+ * Returns 0 on success, error code otherwise.
+ *
+ * The implementation is platform specific.
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	int err = 0;
+	u32 reg_val = 0;
+	u32 osc2_value = 0;
+	u32 pa_divide = 0;
+	u32 pb_divide = 0;
+	u32 pc_divide = 0;
+	void __iomem *syscfg_reg = NULL;
+	void __iomem *scc_reg = NULL;
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) {
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+
+	/* Init the value in case something goes wrong */
+	*cpu_clock = 0;
+
+	/* Map CPU register into virtual memory */
+	syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+	if (syscfg_reg == NULL) {
+		err = -EIO;
+		goto syscfg_reg_map_failed;
+	}
+
+	scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+	if (scc_reg == NULL) {
+		err = -EIO;
+		goto scc_reg_map_failed;
+	}
+
+	raw_spin_lock(&syscfg_lock);
+
+	/* Read SYS regs - OSC2 */
+	reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET);
+
+	/* Check if there is any other undergoing request */
+	if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) {
+		err = -EBUSY;
+		goto ongoing_request;
+	}
+	/* Reset the CGFGSTAT reg */
+	writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET));
+
+	writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE |
+			DCC_DEFAULT_BIT_VALUE |
+			SYS_CFG_OSC_FUNC_BIT_VALUE |
+			SITE_DEFAULT_BIT_VALUE |
+			BOARD_STACK_POS_DEFAULT_BIT_VALUE |
+			DEVICE_DEFAULT_BIT_VALUE,
+			(syscfg_reg + SYS_CFGCTRL_OFFSET));
+	/* Wait for the transaction to complete */
+	while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) &
+			SYS_CFG_COMPLETE_BIT_VALUE))
+		;
+	/* Read SYS_CFGSTAT Register to get the status of submitted
+	 * transaction */
+	reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET);
+
+	if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+		/* Error while setting register */
+		err = -EIO;
+		goto set_reg_error;
+	}
+
+	osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET);
+	/* Read the SCC CFGRW0 register */
+	reg_val = readl(scc_reg);
+
+	/*
+	 * Select the appropriate feed:
+	 * CFGRW0[0] - CLKOB
+	 * CFGRW0[1] - CLKOC
+	 * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+	 */
+	/* Calculate the  FCLK */
+	if (IS_SINGLE_BIT_SET(reg_val, 0)) {
+		/* CFGRW0[0] - CLKOB */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[10:7] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PB_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 1)) {
+		/* CFGRW0[1] - CLKOC */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[14:11] */
+		pc_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PC_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PC_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {
+		/* CFGRW0[2] - FACLK */
+		/* CFGRW0[18:15] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PA_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[22:19] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PB_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else {
+		err = -EIO;
+	}
+
+set_reg_error:
+ongoing_request:
+	raw_spin_unlock(&syscfg_lock);
+	*cpu_clock /= HZ_IN_MHZ;
+
+	if (!err)
+		cpu_clock_speed = *cpu_clock;
+
+	iounmap(scc_reg);
+
+scc_reg_map_failed:
+	iounmap(syscfg_reg);
+
+syscfg_reg_map_failed:
+
+	return err;
+}
+
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function determines which LogicTile type is used by the platform by
+ * reading the HBI value of the daughterboard which holds the LogicTile:
+ *
+ * 0x217 HBI0217 Virtex-6
+ * 0x192 HBI0192 Virtex-5
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+static u32 kbase_get_platform_logic_tile_type(void)
+{
+	void __iomem *syscfg_reg = NULL;
+	u32 sys_procid1 = 0;
+
+	syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
+	if (NULL != syscfg_reg) {
+		sys_procid1 = readl(syscfg_reg);
+		iounmap(syscfg_reg);
+	}
+
+	return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
+}
+
+u32 kbase_get_platform_min_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MIN;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MIN;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MIN;
+	}
+}
+
+u32 kbase_get_platform_max_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MAX;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MAX;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MAX;
+	}
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100755
index 0000000..da86569
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+/**
+ * Get the minimum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_min_freq(void);
+
+/**
+ * Get the maximum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_max_freq(void);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100755
index 0000000..d9bfabc
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_vexpress.o
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100755
index 0000000..b0490ca
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..3ff0930
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,79 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100755
index 0000000..0cb41ce
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100755
index 0000000..22ffccb
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100755
index 0000000..76ffe4a
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100755
index 0000000..816dff4
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	/* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+	*cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+	return 0;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100755
index 0000000..23647cc
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100755
index 0000000..5fa9b39
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/midgard/sconscript b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/sconscript
new file mode 100755
index 0000000..7b7ec77
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/midgard/sconscript
@@ -0,0 +1,85 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import sys
+Import('env')
+
+if Glob('tests/sconscript'):
+	SConscript( 'tests/sconscript' )
+
+mock_test = 0
+
+# Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data.
+# For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0.
+if env['platform_config']=='devicetree':
+	fake_platform_device = 0
+else:
+	fake_platform_device = 1
+
+# Source files required for kbase.
+kbase_src = [
+	Glob('*.c'),
+	Glob('backend/*/*.c'),
+	Glob('internal/*/*.c'),
+	Glob('platform/%s/*.c' % env['platform_config']),
+]
+
+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
+	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
+	mock_test = 1
+
+# we need platform config for GPL version using fake platform
+if fake_platform_device==1:
+	# Check if we are compiling for PBX
+	if env.KernelConfigEnabled("CONFIG_MACH_REALVIEW_PBX") and \
+	   env["platform_config"] in {"vexpress", "vexpress_6xvirtex7_10mhz"}:
+		sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n")
+	# if the file platform config file is in the tpip directory then use that, otherwise use the default config directory
+	if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])):
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config']))
+	else:
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config']))
+
+make_args = env.kernel_get_config_defines(ret_list = True,
+                                          fake = fake_platform_device) + [
+	'PLATFORM=%s' % env['platform'],
+	'MALI_ERROR_INJECT_ON=%s' % env['error_inject'],
+	'MALI_KERNEL_TEST_API=%s' % env['unit'],
+	'MALI_UNIT_TEST=%s' % env['unit'],
+	'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
+	'MALI_MOCK_TEST=%s' % mock_test,
+	'MALI_CUSTOMER_RELEASE=%s' % env['release'],
+	'MALI_INSTRUMENTATION_LEVEL=%s' % env['instr'],
+	'MALI_COVERAGE=%s' % env['coverage'],
+	'MALI_BUS_LOG=%s' % env['buslog'],
+]
+
+kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src,
+                              make_args = make_args)
+
+# Add a dependency on kds.ko.
+# Only necessary when KDS is not built into the kernel.
+#
+if env['os'] != 'android':
+	if not env.KernelConfigEnabled("CONFIG_KDS"):
+		env.Depends(kbase, '$STATIC_LIB_PATH/kds.ko')
+
+# need Module.symvers from ump.ko build
+if int(env['ump']) == 1:
+	env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko')
+
+env.KernelObjTarget('kbase', kbase)
+
+env.AppendUnique(BASE=['cutils_linked_list'])
diff --git a/midgard/r15p0/kernel/drivers/gpu/arm/sconscript b/midgard/r15p0/kernel/drivers/gpu/arm/sconscript
new file mode 100755
index 0000000..924511b
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/arm/sconscript
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+
+
+SConscript('midgard/sconscript')
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/Kbuild b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/Kbuild
new file mode 100755
index 0000000..f10d58c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/Kbuild
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+pl111_drm-y +=	pl111_drm_device.o \
+		pl111_drm_connector.o \
+		pl111_drm_crtc.o \
+		pl111_drm_cursor.o \
+		pl111_drm_dma_buf.o \
+		pl111_drm_encoder.o \
+		pl111_drm_fb.o \
+		pl111_drm_gem.o \
+		pl111_drm_pl111.o \
+		pl111_drm_platform.o \
+		pl111_drm_suspend.o \
+		pl111_drm_vma.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/Kconfig b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/Kconfig
new file mode 100755
index 0000000..60b465c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+config DRM_PL111
+	tristate "DRM Support for PL111 CLCD Controller"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the PL111 CLCD controller.
+	  If M is selected the module will be called pl111_drm.
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/Makefile b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/Makefile
new file mode 100755
index 0000000..2869f58
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: pl111_drm
+
+pl111_drm:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_USES_KDS=y CONFIG_DRM_PL111=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
new file mode 100755
index 0000000..d3e0086
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
@@ -0,0 +1,95 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+/**
+ * pl111_clcd_ext.h
+ * Extended CLCD register definitions
+ */
+
+#ifndef PL111_CLCD_EXT_H_
+#define PL111_CLCD_EXT_H_
+
+/* 
+ * PL111 cursor register definitions not defined in the kernel's clcd header.
+ * 
+ * TODO MIDEGL-1718: move to include/linux/amba/clcd.h
+ */
+
+#define CLCD_CRSR_IMAGE				0x00000800
+#define CLCD_CRSR_IMAGE_MAX_WORDS		256
+#define CLCD_CRSR_IMAGE_WORDS_PER_LINE		4
+#define CLCD_CRSR_IMAGE_PIXELS_PER_WORD	16
+
+#define CLCD_CRSR_LBBP_COLOR_MASK	0x00000003
+#define CLCD_CRSR_LBBP_BACKGROUND	0x0
+#define CLCD_CRSR_LBBP_FOREGROUND	0x1
+#define CLCD_CRSR_LBBP_TRANSPARENT	0x2
+#define CLCD_CRSR_LBBP_INVERSE		0x3
+
+
+#define CLCD_CRSR_CTRL			0x00000c00
+#define CLCD_CRSR_CONFIG		0x00000c04
+#define CLCD_CRSR_PALETTE_0		0x00000c08
+#define CLCD_CRSR_PALETTE_1		0x00000c0c
+#define CLCD_CRSR_XY			0x00000c10
+#define CLCD_CRSR_CLIP			0x00000c14
+#define CLCD_CRSR_IMSC			0x00000c20
+#define CLCD_CRSR_ICR			0x00000c24
+#define CLCD_CRSR_RIS			0x00000c28
+#define CLCD_MIS				0x00000c2c
+
+#define CRSR_CTRL_CRSR_ON		(1 << 0)
+#define CRSR_CTRL_CRSR_MAX		3
+#define CRSR_CTRL_CRSR_NUM_SHIFT	4
+#define CRSR_CTRL_CRSR_NUM_MASK		\
+	(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
+#define CRSR_CTRL_CURSOR_0		0
+#define CRSR_CTRL_CURSOR_1		1
+#define CRSR_CTRL_CURSOR_2		2
+#define CRSR_CTRL_CURSOR_3		3
+
+#define CRSR_CONFIG_CRSR_SIZE		(1 << 0)
+#define CRSR_CONFIG_CRSR_FRAME_SYNC	(1 << 1)
+
+#define CRSR_PALETTE_RED_SHIFT		0
+#define CRSR_PALETTE_GREEN_SHIFT	8
+#define CRSR_PALETTE_BLUE_SHIFT		16
+
+#define CRSR_PALETTE_RED_MASK		0x000000ff
+#define CRSR_PALETTE_GREEN_MASK		0x0000ff00
+#define CRSR_PALETTE_BLUE_MASK		0x00ff0000
+#define CRSR_PALETTE_MASK		(~0xff000000)
+
+#define CRSR_XY_MASK			0x000003ff
+#define CRSR_XY_X_SHIFT			0
+#define CRSR_XY_Y_SHIFT			16
+
+#define CRSR_XY_X_MASK			CRSR_XY_MASK
+#define CRSR_XY_Y_MASK			(CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
+
+#define CRSR_CLIP_MASK			0x3f
+#define CRSR_CLIP_X_SHIFT		0
+#define CRSR_CLIP_Y_SHIFT		8
+
+#define CRSR_CLIP_X_MASK		CRSR_CLIP_MASK
+#define CRSR_CLIP_Y_MASK		(CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
+
+#define CRSR_IMSC_CRSR_IM		(1<<0)
+#define CRSR_ICR_CRSR_IC		(1<<0)
+#define CRSR_RIS_CRSR_RIS		(1<<0)
+#define CRSR_MIS_CRSR_MIS		(1<<0)
+
+#endif /* PL111_CLCD_EXT_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100755
index 0000000..64d87b6
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+#define DRIVER_AUTHOR    "ARM Ltd."
+#define DRIVER_NAME      "pl111_drm"
+#define DRIVER_DESC      "DRM module for PL111"
+#define DRIVER_LICENCE   "GPL"
+#define DRIVER_ALIAS     "platform:pl111_drm"
+#define DRIVER_DATE      "20101111"
+#define DRIVER_VERSION   "0.2"
+#define DRIVER_MAJOR      2
+#define DRIVER_MINOR      1
+#define DRIVER_PATCHLEVEL 1
+
+/*
+ * Number of flips allowed in flight at any one time. Any more flips requested
+ * beyond this value will cause the caller to block until earlier flips have
+ * completed.
+ *
+ * For performance reasons, this must be greater than the number of buffers
+ * used in the rendering pipeline. Note that the rendering pipeline can contain
+ * different types of buffer, e.g.:
+ * - 2 final framebuffers
+ * - >2 geometry buffers for GPU use-cases
+ * - >2 vertex buffers for GPU use-cases
+ *
+ * For example, a system using 5 geometry buffers could have 5 flips in flight,
+ * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
+ *
+ * Whilst there may be more intermediate buffers (such as vertex/geometry) than
+ * final framebuffers, KDS is used to ensure that GPU rendering waits for the
+ * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
+ * produce tearing.
+ */
+
+/*
+ * Here, we choose a conservative value. A lower value is most likely
+ * suitable for GPU use-cases.
+ */
+#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
+
+#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
+
+struct pl111_drm_flip_resource;
+
+struct pl111_gem_bo_dma {
+	dma_addr_t fb_dev_addr;
+	void *fb_cpu_addr;
+};
+
+struct pl111_gem_bo_shm {
+	struct page **pages;
+	dma_addr_t *dma_addrs;
+};
+
+struct pl111_gem_bo {
+	struct drm_gem_object gem_object;
+	u32 type;
+	union {
+		struct pl111_gem_bo_dma dma;
+		struct pl111_gem_bo_shm shm;
+	} backing_data;
+	struct sg_table *sgt;
+};
+
+extern struct pl111_drm_dev_private priv;
+
+struct pl111_drm_framebuffer {
+	struct drm_framebuffer fb;
+	struct pl111_gem_bo *bo;
+};
+
+struct pl111_drm_flip_resource {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This is the kds set associated to the dma_buf we want to flip */
+	struct kds_resource_set *kds_res_set;
+#endif
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct list_head link;
+	bool page_flip;
+	struct drm_pending_vblank_event *event;
+};
+
+struct pl111_drm_crtc {
+	struct drm_crtc crtc;
+	int crtc_index;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This protects "old_kds_res_set" and "displaying_fb" */
+	spinlock_t current_displaying_lock;
+	/*
+	 * When a buffer is displayed its associated kds resource
+	 * will be obtained and stored here. Every time a buffer
+	 * flip is completed this old kds set is released and assigned
+	 * the kds set of the new buffer.
+	 */
+	struct kds_resource_set *old_kds_res_set;
+	/*
+	 * Stores which frame buffer is currently being displayed by
+	 * this CRTC or NULL if nothing is being displayed. It is used
+	 * to tell whether we need to obtain a set of kds resources for
+	 * exported buffer objects.
+	 */
+	struct drm_framebuffer *displaying_fb;
+#endif
+	struct drm_display_mode *new_mode;
+	struct drm_display_mode *current_mode;
+	int last_bpp;
+
+	/*
+	 * This spinlock protects "update_queue", "current_update_res"
+	 * and calls to do_flip_to_res() which updates the CLCD base
+	 * registers.
+	 */
+	spinlock_t base_update_lock;
+	/*
+	 * The resource that caused a base address update. Only one can be
+	 * pending, hence it's != NULL if there's a pending update
+	 */
+	struct pl111_drm_flip_resource *current_update_res;
+	/* Queue of things waiting to update the base address */
+	struct list_head update_queue;
+
+	void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
+				struct drm_framebuffer *fb);
+};
+
+struct pl111_drm_connector {
+	struct drm_connector connector;
+};
+
+struct pl111_drm_encoder {
+	struct drm_encoder encoder;
+};
+
+struct pl111_drm_dev_private {
+	struct pl111_drm_crtc *pl111_crtc;
+
+	struct amba_device *amba_dev;
+	unsigned long mmio_start;
+	__u32 mmio_len;
+	void *regs;
+	struct clk *clk;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_callback kds_cb;
+	struct kds_callback kds_obtain_current_cb;
+#endif
+	/*
+	 * Number of flips that were started in show_framebuffer_on_crtc(),
+	 * but haven't completed yet - because we do deferred flipping
+	 */
+	atomic_t nr_flips_in_flight;
+	wait_queue_head_t wait_for_flips;
+
+	/*
+	 * Used to prevent race between pl111_dma_buf_release and
+	 * drm_gem_prime_handle_to_fd
+	 */
+	struct mutex export_dma_buf_lock;
+
+	uint32_t number_crtcs;
+
+	/* Cache for flip resources used to avoid kmalloc on each page flip */
+	struct kmem_cache *page_flip_slab;
+};
+
+enum pl111_cursor_size {
+	CURSOR_32X32,
+	CURSOR_64X64
+};
+
+enum pl111_cursor_sync {
+	CURSOR_SYNC_NONE,
+	CURSOR_SYNC_VSYNC
+};
+
+
+/**
+ * Buffer allocation function which is more flexible than dumb_create(),
+ * it allows passing driver specific flags to control the kind of buffer
+ * to be allocated.
+ */
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+ 
+/****** TODO MIDEGL-1718: this should be moved to uapi/include/drm/pl111_drm.h ********/
+
+/* 
+ * Parameters for different buffer objects:
+ * bit [0]: backing storage
+ *	(0 -> SHM)
+ *	(1 -> DMA)
+ * bit [2:1]: kind of mapping
+ *	(0x0 -> uncached)
+ *	(0x1 -> write combine)
+ *	(0x2 -> cached)
+ */
+#define PL111_BOT_MASK		(0x7)
+#define PL111_BOT_SHM		(0x0 << 0)
+#define PL111_BOT_DMA		(0x1 << 0)
+#define PL111_BOT_UNCACHED	(0x0 << 1)
+#define PL111_BOT_WC		(0x1 << 1)
+#define PL111_BOT_CACHED	(0x2 << 1)
+
+/**
+ * User-desired buffer creation information structure.
+ *
+ * @size: user-desired memory allocation size.
+ *      - this size value would be page-aligned internally.
+ * @flags: user request for setting memory type or cache attributes as a bit op
+ *	- PL111_BOT_DMA / PL111_BOT_SHM
+ *	- PL111_BOT_UNCACHED / PL111_BOT_WC / PL111_BOT_CACHED
+ * @handle: returned a handle to created gem object.
+ *      - this handle will be set by gem module of kernel side.
+ */
+struct drm_pl111_gem_create {
+	uint32_t height;
+	uint32_t width;
+	uint32_t bpp;
+	uint32_t flags;
+	/* handle, pitch, size will be returned */
+	uint32_t handle;
+	uint32_t pitch;
+	uint64_t size;
+};
+
+#define DRM_PL111_GEM_CREATE		0x00
+
+#define DRM_IOCTL_PL111_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + \
+			DRM_PL111_GEM_CREATE, struct drm_pl111_gem_create)
+/****************************************************************************/
+
+#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb))
+
+#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
+
+#define PL111_BO_FROM_GEM(gem_obj) \
+	container_of(gem_obj, struct pl111_gem_bo, gem_object)
+
+#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
+
+#define PL111_ENCODER_FROM_ENCODER(x) \
+	container_of(x, struct pl111_drm_encoder, encoder)
+
+#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
+	container_of(x, struct pl111_drm_connector, connector)
+
+#include "pl111_drm_funcs.h"
+
+#endif /* _PL111_DRM_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
new file mode 100755
index 0000000..c7c3a22
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
@@ -0,0 +1,170 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+
+static struct {
+	int w, h, type;
+} pl111_drm_modes[] = {
+	{ 640, 480,  DRM_MODE_TYPE_PREFERRED},
+	{ 800, 600,  0},
+	{1024, 768,  0},
+	{  -1,  -1, -1}
+};
+
+void pl111_connector_destroy(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+				PL111_CONNECTOR_FROM_CONNECTOR(connector);
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(pl111_connector);
+}
+
+enum drm_connector_status pl111_connector_detect(struct drm_connector
+							*connector, bool force)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return connector_status_connected;
+}
+
+void pl111_connector_dpms(struct drm_connector *connector, int mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+}
+
+struct drm_encoder *
+pl111_connector_helper_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	if (connector->encoder != NULL) {
+		return connector->encoder; /* Return attached encoder */
+	} else {
+		/*
+		 * If there is no attached encoder we choose the best candidate
+		 * from the list.
+		 * For PL111 there is only one encoder so we return the first
+		 * one we find.
+		 * Other h/w would require a suitable criterion below.
+		 */
+		struct drm_encoder *encoder = NULL;
+		struct drm_device *dev = connector->dev;
+
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
+					head) {
+			if (1) { /* criterion ? */
+				break;
+			}
+		}
+		return encoder; /* return best candidate encoder */
+	}
+}
+
+int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+	int i = 0;
+	int count = 0;
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	while (pl111_drm_modes[i].w != -1) {
+		struct drm_display_mode *mode =
+				drm_mode_find_dmt(connector->dev,
+						pl111_drm_modes[i].w,
+						pl111_drm_modes[i].h,
+						60
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+						, false
+#endif
+						);
+
+		if (mode != NULL) {
+			mode->type |= pl111_drm_modes[i].type;
+			drm_mode_probed_add(connector, mode);
+			count++;
+		}
+
+		i++;
+	}
+
+	DRM_DEBUG_KMS("found %d modes\n", count);
+
+	return count;
+}
+
+int pl111_connector_helper_mode_valid(struct drm_connector *connector,
+					struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return MODE_OK;
+}
+
+const struct drm_connector_funcs connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = pl111_connector_destroy,
+	.detect = pl111_connector_detect,
+	.dpms = pl111_connector_dpms,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = pl111_connector_helper_get_modes,
+	.mode_valid = pl111_connector_helper_mode_valid,
+	.best_encoder = pl111_connector_helper_best_encoder,
+};
+
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
+{
+	struct pl111_drm_connector *pl111_connector;
+
+	pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
+					GFP_KERNEL);
+
+	if (pl111_connector == NULL) {
+		pr_err("Failed to allocated pl111_drm_connector\n");
+		return NULL;
+	}
+
+	drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
+				DRM_MODE_CONNECTOR_DVII);
+
+	drm_connector_helper_add(&pl111_connector->connector,
+					&connector_helper_funcs);
+
+	drm_sysfs_connector_add(&pl111_connector->connector);
+
+	return pl111_connector;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
new file mode 100755
index 0000000..ede07ff
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
@@ -0,0 +1,449 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_crtc.c
+ * Implementation of the CRTC functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static int pl111_crtc_num;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
+{
+	struct drm_device *dev = pl111_crtc->crtc.dev;
+	struct pl111_drm_flip_resource *old_flip_res;
+	struct pl111_gem_bo *bo;
+	unsigned long irq_flags;
+	int flips_in_flight;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+
+	/*
+	 * Cache the flip resource that caused the IRQ since it will be
+	 * dispatched later. Early return if the IRQ isn't associated to
+	 * a base register update.
+	 * 
+	 * TODO MIDBASE-2790: disable IRQs when a flip is not pending.
+	 */
+	old_flip_res = pl111_crtc->current_update_res;
+	if (!old_flip_res) {
+		spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+		return;
+	}
+	pl111_crtc->current_update_res = NULL;
+
+	/* Prepare the next flip (if any) of the queue as soon as possible. */
+	if (!list_empty(&pl111_crtc->update_queue)) {
+		struct pl111_drm_flip_resource *flip_res;
+		/* Remove the head of the list */
+		flip_res = list_first_entry(&pl111_crtc->update_queue,
+			struct pl111_drm_flip_resource, link);
+		list_del(&flip_res->link);
+		do_flip_to_res(flip_res);
+		/*
+		 * current_update_res will be set, so guarentees that
+		 * another flip_res coming in gets queued instead of
+		 * handled immediately
+		 */
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	/* Finalize properly the flip that caused the IRQ */
+	DRM_DEBUG_KMS("DRM Finalizing old_flip_res=%p\n", old_flip_res);
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(old_flip_res->fb);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	release_kds_resource_and_display(old_flip_res);
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	/* Release DMA buffer on this flip */
+
+	if (bo->gem_object.export_dma_buf != NULL)
+		dma_buf_put(bo->gem_object.export_dma_buf);
+
+	drm_handle_vblank(dev, pl111_crtc->crtc_index);
+
+	/* Wake up any processes waiting for page flip event */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (old_flip_res->event) {
+		spin_lock_bh(&dev->event_lock);
+		drm_send_vblank_event(dev, pl111_crtc->crtc_index,
+					old_flip_res->event);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#else
+	if (old_flip_res->event) {
+		struct drm_pending_vblank_event *e = old_flip_res->event;
+		struct timeval now;
+		unsigned int seq;
+
+		DRM_DEBUG_KMS("%s: wake up page flip event (%p)\n", __func__,
+				old_flip_res->event);
+
+		spin_lock_bh(&dev->event_lock);
+		seq = drm_vblank_count_and_time(dev, pl111_crtc->crtc_index,
+							&now);
+		e->pipe = pl111_crtc->crtc_index;
+		e->event.sequence = seq;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_add_tail(&e->base.link,
+			&e->base.file_priv->event_list);
+
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#endif
+
+	drm_vblank_put(dev, pl111_crtc->crtc_index);
+
+	/*
+	 * workqueue.c:process_one_work():
+	 * "It is permissible to free the struct work_struct from
+	 *  inside the function that is called from it"
+	 */
+	kmem_cache_free(priv.page_flip_slab, old_flip_res);
+
+	flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
+	if (flips_in_flight == 0 ||
+			flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
+		wake_up(&priv.wait_for_flips);
+
+	DRM_DEBUG_KMS("DRM release flip_res=%p\n", old_flip_res);
+}
+
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
+{
+	struct pl111_drm_flip_resource *flip_res = cb1;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	pl111_crtc->show_framebuffer_cb(cb1, cb2);
+}
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb, bool page_flip,
+				struct drm_pending_vblank_event *event)
+{
+	struct pl111_gem_bo *bo;
+	struct pl111_drm_flip_resource *flip_res;
+	int flips_in_flight;
+	int old_flips_in_flight;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	crtc->fb = fb;
+#else
+	crtc->primary->fb = fb;
+#endif
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	if (bo == NULL) {
+		DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
+		return -EINVAL;
+	}
+
+	/* If this is a full modeset, wait for all outstanding flips to complete
+	 * before continuing. This avoids unnecessary complication from being
+	 * able to queue up multiple modesets and queues of mixed modesets and
+	 * page flips.
+	 *
+	 * Modesets should be uncommon and will not be performant anyway, so
+	 * making them synchronous should have negligible performance impact.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				atomic_read(&priv.nr_flips_in_flight) == 0);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * There can be more 'early display' flips in flight than there are
+	 * buffers, and there is (currently) no explicit bound on the number of
+	 * flips. Hence, we need a new allocation for each one.
+	 *
+	 * Note: this could be optimized down if we knew a bound on the flips,
+	 * since an application can only have so many buffers in flight to be
+	 * useful/not hog all the memory
+	 */
+	flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
+	if (flip_res == NULL) {
+		pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * increment flips in flight, whilst blocking when we reach
+	 * NR_FLIPS_IN_FLIGHT_THRESHOLD
+	 */
+	do {
+		/*
+		 * Note: use of assign-and-then-compare in the condition to set
+		 * flips_in_flight
+		 */
+		int ret = wait_event_killable(priv.wait_for_flips,
+				(flips_in_flight =
+					atomic_read(&priv.nr_flips_in_flight))
+				< NR_FLIPS_IN_FLIGHT_THRESHOLD);
+		if (ret != 0) {
+			kmem_cache_free(priv.page_flip_slab, flip_res);
+			return ret;
+		}
+
+		old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
+					flips_in_flight, flips_in_flight + 1);
+	} while (old_flips_in_flight != flips_in_flight);
+
+	flip_res->fb = fb;
+	flip_res->crtc = crtc;
+	flip_res->page_flip = page_flip;
+	flip_res->event = event;
+	INIT_LIST_HEAD(&flip_res->link);
+	DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+		unsigned long shared[1] = { 0 };
+		struct kds_resource *resource_list[1] = {
+				get_dma_buf_kds_resource(buf) };
+		int err;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+
+		/* Wait for the KDS resource associated with this buffer */
+		err = kds_async_waitall(&flip_res->kds_res_set,
+					&priv.kds_cb, flip_res, fb, 1, shared,
+					resource_list);
+		BUG_ON(err);
+	} else {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+
+		/* No dma-buf attached so just call the callback directly */
+		flip_res->kds_res_set = NULL;
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#else
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+	} else {
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+	}
+
+	/* No dma-buf attached to this so just call the callback directly */
+	{
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#endif
+
+	/* For the same reasons as the wait at the start of this function,
+	 * wait for the modeset to complete before continuing.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				flips_in_flight == 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+			struct drm_pending_vblank_event *event)
+#else
+			struct drm_pending_vblank_event *event,
+			uint32_t flags)
+#endif
+{
+	DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
+			__func__, crtc, fb, event);
+	return show_framebuffer_on_crtc(crtc, fb, true, event);
+}
+
+int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode,
+				int x, int y, struct drm_framebuffer *old_fb)
+{
+	int ret;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+	struct drm_display_mode *duplicated_mode;
+
+	DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
+			adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel);
+#else
+			crtc->primary->fb->bits_per_pixel);
+#endif
+
+	duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
+	if (!duplicated_mode)
+		return -ENOMEM;
+
+	pl111_crtc->new_mode = duplicated_mode;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
+#else
+	ret = show_framebuffer_on_crtc(crtc, crtc->primary->fb, false, NULL);
+#endif
+	if (ret != 0) {
+		pl111_crtc->new_mode = pl111_crtc->current_mode;
+		drm_mode_destroy(crtc->dev, duplicated_mode);
+	}
+
+	return ret;
+}
+
+void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+void pl111_crtc_helper_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+				const struct drm_display_mode *mode,
+#else
+				struct drm_display_mode *mode,
+#endif
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+#ifdef CONFIG_ARCH_VEXPRESS
+	/*
+	 * 1024x768 with more than 16 bits per pixel may not work 
+	 * correctly on Versatile Express due to bandwidth issues
+	 */
+	if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel > 16) {
+#else
+			crtc->primary->fb->bits_per_pixel > 16) {
+#endif
+		DRM_INFO("*WARNING* 1024x768 at > 16 bpp may suffer corruption\n");
+	}
+#endif
+
+	return true;
+}
+
+void pl111_crtc_helper_disable(struct drm_crtc *crtc)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	/* don't disable crtc until no flips in flight as irq will be disabled */
+	ret = wait_event_killable(priv.wait_for_flips, atomic_read(&priv.nr_flips_in_flight) == 0);
+	if(ret) {
+		pr_err("pl111_crtc_helper_disable failed\n");
+		return;
+	}
+	clcd_disable(crtc);
+}
+
+void pl111_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(pl111_crtc);
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+	.cursor_set = pl111_crtc_cursor_set,
+	.cursor_move = pl111_crtc_cursor_move,
+	.set_config = drm_crtc_helper_set_config,
+	.page_flip = pl111_crtc_page_flip,
+	.destroy = pl111_crtc_destroy
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+	.mode_set = pl111_crtc_helper_mode_set,
+	.prepare = pl111_crtc_helper_prepare,
+	.commit = pl111_crtc_helper_commit,
+	.mode_fixup = pl111_crtc_helper_mode_fixup,
+	.disable = pl111_crtc_helper_disable,
+};
+
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
+{
+	struct pl111_drm_crtc *pl111_crtc;
+
+	pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
+	if (pl111_crtc == NULL) {
+		pr_err("Failed to allocated pl111_drm_crtc\n");
+		return NULL;
+	}
+
+	drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
+	drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
+
+	pl111_crtc->crtc_index = pl111_crtc_num;
+	pl111_crtc_num++;
+	pl111_crtc->crtc.enabled = 0;
+	pl111_crtc->last_bpp = 0;
+	pl111_crtc->current_update_res = NULL;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	pl111_crtc->displaying_fb = NULL;
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_lock_init(&pl111_crtc->current_displaying_lock);
+#endif
+	pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
+	INIT_LIST_HEAD(&pl111_crtc->update_queue);
+	spin_lock_init(&pl111_crtc->base_update_lock);
+
+	return pl111_crtc;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
new file mode 100755
index 0000000..4bf20fe
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
@@ -0,0 +1,331 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_cursor.c
+ * Implementation of cursor functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_clcd_ext.h"
+#include "pl111_drm.h"
+
+#define PL111_MAX_CURSOR_WIDTH (64)
+#define PL111_MAX_CURSOR_HEIGHT (64)
+
+#define ARGB_2_LBBP_BINARY_THRESHOLD	(1 << 7)
+#define ARGB_ALPHA_SHIFT		24
+#define ARGB_ALPHA_MASK			(0xff << ARGB_ALPHA_SHIFT)
+#define ARGB_RED_SHIFT			16
+#define ARGB_RED_MASK			(0xff << ARGB_RED_SHIFT)
+#define ARGB_GREEN_SHIFT		8
+#define ARGB_GREEN_MASK			(0xff << ARGB_GREEN_SHIFT)
+#define ARGB_BLUE_SHIFT			0
+#define ARGB_BLUE_MASK			(0xff << ARGB_BLUE_SHIFT)
+
+
+void pl111_set_cursor_size(enum pl111_cursor_size size)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (size == CURSOR_64X64)
+		reg_data |= CRSR_CONFIG_CRSR_SIZE;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (sync == CURSOR_SYNC_VSYNC)
+		reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor(u32 cursor)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
+	reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_enable(bool enable)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	if (enable)
+		reg_data |= CRSR_CTRL_CRSR_ON;
+	else
+		reg_data &= ~CRSR_CTRL_CRSR_ON;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_position(u32 x, u32 y)
+{
+	u32 reg_data = (x & CRSR_XY_MASK) |
+			((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_XY);
+}
+
+void pl111_set_cursor_clipping(u32 x, u32 y)
+{
+	u32 reg_data;
+
+	/* 
+	 * Do not allow setting clipping values larger than
+	 * the cursor size since the cursor is already fully hidden
+	 * when x,y = PL111_MAX_CURSOR_WIDTH.
+	 */
+	if (x > PL111_MAX_CURSOR_WIDTH)
+		x = PL111_MAX_CURSOR_WIDTH;
+	if (y > PL111_MAX_CURSOR_WIDTH)
+		y = PL111_MAX_CURSOR_WIDTH;
+
+	reg_data = (x & CRSR_CLIP_MASK) |
+			((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
+}
+
+void pl111_set_cursor_palette(u32 color0, u32 color1)
+{
+	writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
+	writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
+}
+
+void pl111_cursor_enable(void)
+{
+	pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
+	pl111_set_cursor_size(CURSOR_64X64);
+	pl111_set_cursor_palette(0x0, 0x00ffffff);
+	pl111_set_cursor_enable(true);
+}
+
+void pl111_cursor_disable(void)
+{
+	pl111_set_cursor_enable(false);
+}
+
+/* shift required to locate pixel into the correct position in
+ * a cursor LBBP word, indexed by x mod 16.
+ */
+static const unsigned char
+x_mod_16_to_value_shift[CLCD_CRSR_IMAGE_PIXELS_PER_WORD] = {
+	6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24
+};
+
+/* Pack the pixel value into its correct position in the buffer as specified
+ * for LBBP */
+static inline void
+set_lbbp_pixel(uint32_t *buffer, unsigned int x, unsigned int y,
+				uint32_t value)
+{
+	u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
+	uint32_t shift;
+	uint32_t data;
+
+	shift = x_mod_16_to_value_shift[x % CLCD_CRSR_IMAGE_PIXELS_PER_WORD];
+
+	/* Get the word containing this pixel */
+	cursor_ram = cursor_ram + (x >> CLCD_CRSR_IMAGE_WORDS_PER_LINE) + (y << 2);
+
+	/* Update pixel in cursor RAM */
+	data = readl(cursor_ram);
+	data &= ~(CLCD_CRSR_LBBP_COLOR_MASK << shift);
+	data |= value << shift;
+	writel(data, cursor_ram);
+}
+
+static u32 pl111_argb_to_lbbp(u32 argb_pix)
+{
+	u32 lbbp_pix = CLCD_CRSR_LBBP_TRANSPARENT;
+	u32 alpha = (argb_pix & ARGB_ALPHA_MASK) >> ARGB_ALPHA_SHIFT;
+	u32 red = (argb_pix & ARGB_RED_MASK) >> ARGB_RED_SHIFT;
+	u32 green = (argb_pix & ARGB_GREEN_MASK) >> ARGB_GREEN_SHIFT;
+	u32 blue =  (argb_pix & ARGB_BLUE_MASK) >> ARGB_BLUE_SHIFT;
+
+	/*
+	 * Converting from 8 pixel transparency to binary transparency
+	 * it's the best we can achieve.
+	 */
+	if (alpha & ARGB_2_LBBP_BINARY_THRESHOLD) {
+		u32 gray, max, min;
+
+		/*
+		 * Convert to gray using the lightness method:
+		 * gray = [max(R,G,B) + min(R,G,B)]/2
+		 */
+		min = min(red, green);
+		min = min(min, blue);
+		max = max(red, green);
+		max = max(max, blue);
+		gray = (min + max) >> 1; /* divide by 2 */
+		/* Apply binary threshold to the gray value calculated */
+		if (gray & ARGB_2_LBBP_BINARY_THRESHOLD)
+			lbbp_pix = CLCD_CRSR_LBBP_FOREGROUND;
+		else
+			lbbp_pix = CLCD_CRSR_LBBP_BACKGROUND;
+	}
+
+	return lbbp_pix;
+}
+
+/*
+ * The PL111 hardware cursor supports only LBBP which is a 2bpp format but
+ * the cursor format from userspace is ARGB8888 so we need to convert
+ *  to LBBP here.
+ */
+static void pl111_set_cursor_image(u32 *data)
+{
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+	/* Add 1 on width to insert trailing NULL */
+	char string_cursor[PL111_MAX_CURSOR_WIDTH + 1];
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	unsigned int x;
+	unsigned int y;
+
+	for (y = 0; y < PL111_MAX_CURSOR_HEIGHT; y++) {
+		for (x = 0; x < PL111_MAX_CURSOR_WIDTH; x++) {
+			u32 value = pl111_argb_to_lbbp(*data);
+
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+			if (value == CLCD_CRSR_LBBP_TRANSPARENT)
+				string_cursor[x] = 'T';
+			else if (value == CLCD_CRSR_LBBP_FOREGROUND)
+				string_cursor[x] = 'F';
+			else if (value == CLCD_CRSR_LBBP_INVERSE)
+				string_cursor[x] = 'I';
+			else
+				string_cursor[x] = 'B';
+
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+			set_lbbp_pixel(data, x, y, value);
+			++data;
+		}
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+		string_cursor[PL111_MAX_CURSOR_WIDTH] = '\0';
+		DRM_INFO("%s\n", string_cursor);
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	}
+}
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height)
+{
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("handle = %u, width = %u, height = %u\n",
+		handle, width, height);
+
+	if (!handle) {
+		pl111_cursor_disable();
+		return 0;
+	}
+
+	if ((width != PL111_MAX_CURSOR_WIDTH) ||
+	    (height != PL111_MAX_CURSOR_HEIGHT))
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object for handle = %d\n",
+			  handle);
+		return -ENOENT;
+	}
+
+	/*
+	 * We expect a PL111_MAX_CURSOR_WIDTH x PL111_MAX_CURSOR_HEIGHT
+	 * ARGB888 buffer object in the input.
+	 *
+	 */
+	if (obj->size < (PL111_MAX_CURSOR_WIDTH * PL111_MAX_CURSOR_HEIGHT * 4)) {
+		DRM_ERROR("Cannot set cursor with an obj size = %d\n",
+			  obj->size);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	bo = PL111_BO_FROM_GEM(obj);
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Tried to set cursor with non DMA backed obj = %p\n",
+			  obj);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
+
+	/*
+	 * Since we copy the contents of the buffer to the HW cursor internal
+	 * memory this GEM object is not needed anymore.
+	 */
+	drm_gem_object_unreference_unlocked(obj);
+
+	pl111_cursor_enable();
+
+	return 0;
+}
+
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y)
+{
+	int x_clip = 0;
+	int y_clip = 0;
+
+	DRM_DEBUG("x %d y %d\n", x, y);
+
+	/*
+	 * The cursor image is clipped automatically at the screen limits when
+	 * it extends beyond the screen image to the right or bottom but
+	 * we must clip it using pl111 HW features for negative values.
+	 */
+	if (x < 0) {
+		x_clip = -x;
+		x = 0;
+	}
+	if (y < 0) {
+		y_clip = -y;
+		y = 0;
+	}
+
+	pl111_set_cursor_clipping(x_clip, y_clip);
+	pl111_set_cursor_position(x, y);
+
+	return 0;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
new file mode 100755
index 0000000..6619c07
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
@@ -0,0 +1,338 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_device.c
+ * Implementation of the Linux device driver entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+struct pl111_drm_dev_private priv;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void initial_kds_obtained(void *cb1, void *cb2)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
+	bool *cb_has_called = (bool *) cb2;
+
+	*cb_has_called = true;
+	wake_up(wait);
+}
+
+/* Must be called from within current_displaying_lock spinlock */
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	pl111_crtc->displaying_fb = flip_res->fb;
+
+	/* Release the previous buffer */
+	if (pl111_crtc->old_kds_res_set != NULL) {
+		/*
+		 * Can flip to the same buffer, but must not release the current
+		 * resource set
+		 */
+		BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+	}
+	/* Record the current buffer, to release on the next buffer flip */
+	pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
+}
+#endif
+
+void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+void pl111_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+/*
+ * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
+ * and disable_vblank are just no op callbacks.
+ */
+static int pl111_enable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+	return 0;
+}
+
+static void pl111_disable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+}
+
+struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = pl111_fb_create,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	struct pl111_drm_connector *pl111_connector;
+	struct pl111_drm_encoder *pl111_encoder;
+	int ret = 0;
+
+	if (priv == NULL)
+		return -EINVAL;
+
+	drm_mode_config_init(dev);
+	mode_config = &dev->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 1024;
+	mode_config->min_height = 1;
+	mode_config->max_height = 768;
+
+	priv->pl111_crtc = pl111_crtc_create(dev);
+	if (priv->pl111_crtc == NULL) {
+		pr_err("Failed to create pl111_drm_crtc\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	priv->number_crtcs = 1;
+
+	pl111_connector = pl111_connector_create(dev);
+	if (pl111_connector == NULL) {
+		pr_err("Failed to create pl111_drm_connector\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	pl111_encoder = pl111_encoder_create(dev, 1);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to create pl111_drm_encoder\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
+						&pl111_encoder->encoder);
+	if (ret != 0) {
+		DRM_ERROR("Failed to attach encoder\n");
+		goto out_config;
+	}
+
+	pl111_connector->connector.encoder = &pl111_encoder->encoder;
+
+	pl111_encoder->encoder.crtc = &priv->pl111_crtc->crtc;
+
+	goto finish;
+
+out_config:
+	drm_mode_config_cleanup(dev);
+finish:
+	DRM_DEBUG("%s returned %d\n", __func__, ret);
+	return ret;
+}
+
+static void pl111_modeset_fini(struct drm_device *dev)
+{
+	drm_mode_config_cleanup(dev);
+}
+
+static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
+{
+	int ret = 0;
+
+	pr_info("DRM %s\n", __func__);
+
+	mutex_init(&priv.export_dma_buf_lock);
+	atomic_set(&priv.nr_flips_in_flight, 0);
+	init_waitqueue_head(&priv.wait_for_flips);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
+	if (ret != 0) {
+		pr_err("Failed to initialise KDS callback\n");
+		goto finish;
+	}
+
+	ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
+				initial_kds_obtained);
+	if (ret != 0) {
+		pr_err("Failed to init KDS obtain callback\n");
+		kds_callback_term(&priv.kds_cb);
+		goto finish;
+	}
+#endif
+
+	/* Create a cache for page flips */
+	priv.page_flip_slab = kmem_cache_create("page flip slab",
+			sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
+	if (priv.page_flip_slab == NULL) {
+		DRM_ERROR("Failed to create slab\n");
+		ret = -ENOMEM;
+		goto out_kds_callbacks;
+	}
+
+	dev->dev_private = &priv;
+
+	ret = pl111_modeset_init(dev);
+	if (ret != 0) {
+		pr_err("Failed to init modeset\n");
+		goto out_slab;
+	}
+
+	ret = pl111_device_init(dev);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init MMIO and IRQ\n");
+		goto out_modeset;
+	}
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init vblank\n");
+		goto out_vblank;
+	}
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 13, 0))
+	platform_set_drvdata(dev->platformdev, dev);
+#endif
+
+	goto finish;
+
+out_vblank:
+	pl111_device_fini(dev);
+out_modeset:
+	pl111_modeset_fini(dev);
+out_slab:
+	kmem_cache_destroy(priv.page_flip_slab);
+out_kds_callbacks:
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+finish:
+	DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
+	return ret;
+}
+
+static int pl111_drm_unload(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+	kmem_cache_destroy(priv.page_flip_slab);
+
+	drm_vblank_cleanup(dev);
+	pl111_modeset_fini(dev);
+	pl111_device_fini(dev);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+	return 0;
+}
+
+static struct vm_operations_struct pl111_gem_vm_ops = {
+	.fault = pl111_gem_fault,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+#else
+	.open = pl111_gem_vm_open,
+	.close = pl111_gem_vm_close,
+#endif
+};
+
+static const struct file_operations drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = pl111_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+};
+
+static struct drm_ioctl_desc pl111_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(PL111_GEM_CREATE, pl111_drm_gem_create_ioctl,
+		DRM_CONTROL_ALLOW | DRM_UNLOCKED),
+};
+
+static struct drm_driver driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.load = pl111_drm_load,
+	.unload = pl111_drm_unload,
+	.context_dtor = NULL,
+	.preclose = pl111_drm_preclose,
+	.lastclose = pl111_drm_lastclose,
+	.suspend = pl111_drm_suspend,
+	.resume = pl111_drm_resume,
+	.get_vblank_counter = drm_vblank_count,
+	.enable_vblank = pl111_enable_vblank,
+	.disable_vblank = pl111_disable_vblank,
+	.ioctls = pl111_ioctls,
+	.fops = &drm_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+	.dumb_create = pl111_dumb_create,
+	.dumb_destroy = pl111_dumb_destroy,
+	.dumb_map_offset = pl111_dumb_map_offset,
+	.gem_free_object = pl111_gem_free_object,
+	.gem_vm_ops = &pl111_gem_vm_ops,
+	.prime_handle_to_fd = &pl111_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = &pl111_gem_prime_export,
+	.gem_prime_import = &pl111_gem_prime_import,
+};
+
+int pl111_drm_init(struct platform_device *dev)
+{
+	int ret;
+	pr_info("DRM %s\n", __func__);
+	pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
+		DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
+	driver.num_ioctls = ARRAY_SIZE(pl111_ioctls);
+	ret = 0;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 15, 0))
+	driver.kdriver.platform_device = dev;
+#endif
+	return drm_platform_init(&driver, dev);
+
+}
+
+void pl111_drm_exit(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_platform_exit(&driver, dev);
+#else
+	drm_put_dev(platform_get_drvdata(dev));
+#endif
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
new file mode 100755
index 0000000..1131f46
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
@@ -0,0 +1,625 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_dma_buf.c
+ * Implementation of the dma_buf functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void obtain_kds_if_currently_displayed(struct drm_device *dev,
+						struct pl111_gem_bo *bo,
+						struct dma_buf *dma_buf)
+{
+	unsigned long shared[1] = { 0 };
+	struct kds_resource *resource_list[1];
+	struct kds_resource_set *kds_res_set;
+	struct drm_crtc *crtc;
+	bool cb_has_called = false;
+	unsigned long flags;
+	int err;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	DRM_DEBUG_KMS("Obtaining initial KDS res for bo:%p dma_buf:%p\n",
+			bo, dma_buf);
+
+	resource_list[0] = get_dma_buf_kds_resource(dma_buf);
+	get_dma_buf(dma_buf);
+
+	/*
+	 * Can't use kds_waitall(), because kbase will be let through due to
+	 * locked ignore'
+	 */
+	err = kds_async_waitall(&kds_res_set,
+				&priv.kds_obtain_current_cb, &wake,
+				&cb_has_called, 1, shared, resource_list);
+	BUG_ON(err);
+	wait_event(wake, cb_has_called == true);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+
+			if (pl111_fb->bo == bo) {
+				DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+				DRM_DEBUG_KMS(" is being displayed, keeping\n");
+				/* There shouldn't be a previous buffer to release */
+				BUG_ON(pl111_crtc->old_kds_res_set);
+
+				if (kds_res_set == NULL) {
+					err = kds_async_waitall(&kds_res_set,
+							&priv.kds_obtain_current_cb,
+							&wake, &cb_has_called,
+							1, shared, resource_list);
+					BUG_ON(err);
+					wait_event(wake, cb_has_called == true);
+				}
+
+				/* Current buffer will need releasing on next flip */
+				pl111_crtc->old_kds_res_set = kds_res_set;
+
+				/*
+				* Clear kds_res_set, so a new kds_res_set is allocated
+				* for additional CRTCs
+				*/
+				kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+
+	/* kds_res_set will be NULL here if any CRTCs are displaying fb */
+	if (kds_res_set != NULL) {
+		DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+		DRM_DEBUG_KMS(" not being displayed, discarding\n");
+		/* They're not being displayed, release them */
+		kds_resource_set_release(&kds_res_set);
+	}
+
+	dma_buf_put(dma_buf);
+}
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0))
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	mutex_unlock(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#else
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/* Check for valid size. */
+	if (obj->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	BUG_ON(!dev->driver->gem_vm_ops);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+#endif
+
+	vma->vm_ops = dev->driver->gem_vm_ops;
+	vma->vm_private_data = obj;
+
+	/* Take a ref for this mapping of the object, so that the fault
+	* handler can dereference the mmap offset's pointer to the object.
+	* This reference is cleaned up by the corresponding vm_close
+	* (which should happen whether the vma was created by this call, or
+	* by a vm_open due to mremap or partial unmap or whatever).
+	*/
+	drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+	pl111_drm_vm_open_locked(dev, vma);
+#else
+	drm_vm_open_locked(dev, vma);
+#endif
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#endif /* KERNEL_VERSION */
+
+static void pl111_dma_buf_release(struct dma_buf *buf)
+{
+	/*
+	 * Need to release the dma_buf's reference on the gem object it was
+	 * exported from, and also clear the gem object's export_dma_buf
+	 * pointer to this dma_buf as it no longer exists
+	 */
+	struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
+	struct pl111_gem_bo *bo;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+			if (pl111_fb->bo == bo) {
+				kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+				pl111_crtc->old_kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+#endif
+	mutex_lock(&priv.export_dma_buf_lock);
+
+	obj->export_dma_buf = NULL;
+	drm_gem_object_unreference_unlocked(obj);
+
+	mutex_unlock(&priv.export_dma_buf_lock);
+}
+
+static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
+			dev, attach);
+
+	attach->priv = dev;
+
+	return 0;
+}
+
+static void pl111_dma_buf_detach(struct dma_buf *buf,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
+			attach);
+}
+
+/* Heavily from exynos_drm_dmabuf.c */
+static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
+						*attach,
+						enum dma_data_direction
+						direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int size, n_pages, nents;
+	struct scatterlist *s, *sg;
+	struct sg_table *sgt;
+	int ret, i;
+
+	DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+		      attach, bo);
+
+	/*
+	 * Nothing to do, if we are trying to map a dmabuf that has been imported.
+	 * Just return the existing sgt.
+	 */
+	if (obj->import_attach) {
+		BUG_ON(!bo->sgt);
+		return bo->sgt;
+	}
+
+	size = obj->size;
+	n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	if (bo->type & PL111_BOT_DMA) {
+		sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+		if (!sgt) {
+			DRM_ERROR("Failed to allocate sg_table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+		if (ret < 0) {
+			DRM_ERROR("Failed to allocate page table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		sg_dma_len(sgt->sgl) = size;
+		/* We use DMA coherent mappings for PL111_BOT_DMA so we must
+		 * use the virtual address returned at buffer allocation */
+		sg_set_buf(sgt->sgl, bo->backing_data.dma.fb_cpu_addr, size);
+		sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
+	} else { /* PL111_BOT_SHM */
+		struct page **pages;
+		int pg = 0;
+
+		mutex_lock(&dev->struct_mutex);
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev, "could not get pages: %ld\n",
+				PTR_ERR(pages));
+			return ERR_CAST(pages);
+		}
+		sgt = drm_prime_pages_to_sg(pages, n_pages);
+		if (sgt == NULL)
+			return ERR_PTR(-ENOMEM);
+
+		pl111_gem_sync_to_dma(bo);
+
+		/*
+		 * At this point the pages have been dma-mapped by either
+		 * get_pages() for non cached maps or pl111_gem_sync_to_dma()
+		 * for cached. So the physical addresses can be assigned
+		 * to the sg entries.
+		 * drm_prime_pages_to_sg() may have combined contiguous pages
+		 * into chunks so we assign the physical address of the first
+		 * page of a chunk to the chunk and check that the physical
+		 * addresses of the rest of the pages in that chunk are also
+		 * contiguous.
+		 */
+		sg = sgt->sgl;
+		nents = sgt->nents;
+
+		for_each_sg(sg, s, nents, i) {
+			int j, n_pages_in_chunk = sg_dma_len(s) >> PAGE_SHIFT;
+
+			sg_dma_address(s) = bo->backing_data.shm.dma_addrs[pg];
+
+			for (j = pg+1; j < pg+n_pages_in_chunk; j++) {
+				BUG_ON(bo->backing_data.shm.dma_addrs[j] !=
+						bo->backing_data.shm.dma_addrs[j-1]+PAGE_SIZE);
+			}
+
+			pg += n_pages_in_chunk;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+	}
+	bo->sgt = sgt;
+	return sgt;
+}
+
+static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					struct sg_table *sgt,
+					enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+			attach, bo);
+
+	sg_free_table(sgt);
+	kfree(sgt);
+	bo->sgt = NULL;
+}
+
+/*
+ * There isn't any operation here that can sleep or fail so this callback can
+ * be used for both kmap and kmap_atomic implementations.
+ */
+static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long pageno)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	void *vaddr = NULL;
+
+	/* Make sure we cannot access outside the memory range */
+	if (((pageno + 1) << PAGE_SHIFT) > bo->gem_object.size)
+		return NULL;
+
+	if (bo->type & PL111_BOT_DMA) {
+		vaddr = (bo->backing_data.dma.fb_cpu_addr +
+						(pageno << PAGE_SHIFT));
+	} else {
+		vaddr = page_address(bo->backing_data.shm.pages[pageno]);
+	}
+
+	return vaddr;
+}
+
+/*
+ * Find a scatterlist that starts in "start" and has "len"
+ * or return a NULL dma_handle.
+ */
+static dma_addr_t pl111_find_matching_sg(struct sg_table *sgt, size_t start,
+					size_t len)
+{
+	struct scatterlist *sg;
+	unsigned int count;
+	size_t size = 0;
+	dma_addr_t dma_handle = 0;
+
+	/* Find a scatterlist that starts in "start" and has "len"
+	* or return error */
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		if ((size == start) && (len == sg_dma_len(sg))) {
+			dma_handle = sg_dma_address(sg);
+			break;
+		}
+		size += sg_dma_len(sg);
+	}
+	return dma_handle;
+}
+
+static int pl111_dma_buf_begin_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return -EINVAL;
+
+	if (!(bo->type & PL111_BOT_SHM)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return -EINVAL;
+
+		dma_sync_single_range_for_cpu(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+	return 0;
+}
+
+static void pl111_dma_buf_end_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return;
+
+	if (!(bo->type & PL111_BOT_DMA)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return;
+
+		dma_sync_single_range_for_device(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+}
+
+static struct dma_buf_ops pl111_dma_buf_ops = {
+	.release = &pl111_dma_buf_release,
+	.attach = &pl111_dma_buf_attach,
+	.detach = &pl111_dma_buf_detach,
+	.map_dma_buf = &pl111_dma_buf_map_dma_buf,
+	.unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
+	.kmap_atomic = &pl111_dma_buf_kmap,
+	.kmap = &pl111_dma_buf_kmap,
+	.begin_cpu_access = &pl111_dma_buf_begin_cpu,
+	.end_cpu_access = &pl111_dma_buf_end_cpu,
+	.mmap = &pl111_dma_buf_mmap,
+};
+
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attachment;
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+	struct scatterlist *sgl;
+	struct sg_table *sgt;
+	dma_addr_t cont_phys;
+	int ret = 0;
+	int i;
+
+	DRM_DEBUG_KMS("DRM %s on dev=%p dma_buf=%p\n", __func__, dev, dma_buf);
+
+	/* is this one of own objects? */
+	if (dma_buf->ops == &pl111_dma_buf_ops) {
+		obj = dma_buf->priv;
+		/* is it from our device? */
+		if (obj->dev == dev) {
+			/*
+			* Importing dmabuf exported from our own gem increases
+			* refcount on gem itself instead of f_count of dmabuf.
+			*/
+			drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+			/* before v3.10.0 we assume the caller has taken a ref on the dma_buf
+			 * we don't want it for self-imported buffers so drop it here */
+			dma_buf_put(dma_buf);
+#endif
+
+			return obj;
+		}
+	}
+
+	attachment = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attachment))
+		return ERR_CAST(attachment);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we assume the caller has not taken a ref so we take one here */
+	get_dma_buf(dma_buf);
+#endif
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto err_buf_detach;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		DRM_ERROR("%s: failed to allocate buffer object.\n", __func__);
+		ret = -ENOMEM;
+		goto err_unmap_attach;
+	}
+
+	/* Find out whether the buffer is contiguous or not */
+	sgl = sgt->sgl;
+	cont_phys = sg_phys(sgl);
+	bo->type |= PL111_BOT_DMA;
+	for_each_sg(sgt->sgl, sgl, sgt->nents, i) {
+		dma_addr_t real_phys = sg_phys(sgl);
+		if (real_phys != cont_phys) {
+			bo->type &= ~PL111_BOT_DMA;
+			break;
+		}
+		cont_phys += (PAGE_SIZE - sgl->offset);
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	ret = drm_gem_private_object_init(dev, &bo->gem_object,
+					  dma_buf->size);
+	if (ret != 0) {
+		DRM_ERROR("DRM could not import DMA GEM obj\n");
+		goto err_free_buffer;
+	}
+#else
+	drm_gem_private_object_init(dev, &bo->gem_object, dma_buf->size);
+#endif
+
+	if (bo->type & PL111_BOT_DMA) {
+		bo->backing_data.dma.fb_cpu_addr = sg_virt(sgt->sgl);
+		bo->backing_data.dma.fb_dev_addr = sg_phys(sgt->sgl);
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, contiguous import\n", __func__, bo);
+	} else { /* PL111_BOT_SHM */
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, non contiguous import\n", __func__, bo);
+	}
+
+	bo->gem_object.import_attach = attachment;
+	bo->sgt = sgt;
+
+	return &bo->gem_object;
+
+err_free_buffer:
+	kfree(bo);
+	bo = NULL;
+err_unmap_attach:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+err_buf_detach:
+	dma_buf_detach(dma_buf, attachment);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we will have taken a ref so drop it here */
+	dma_buf_put(dma_buf);
+#endif
+	return ERR_PTR(ret);
+}
+
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				      struct drm_gem_object *obj, int flags)
+{
+	struct dma_buf *new_buf;
+	struct pl111_gem_bo *bo;
+	size_t size;
+
+	DRM_DEBUG("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
+	size = obj->size;
+
+	new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
+					flags | O_RDWR);
+	bo = PL111_BO_FROM_GEM(new_buf->priv);
+
+	/*
+	 * bo->gem_object.export_dma_buf not setup until after gem_prime_export
+	 * finishes
+	 */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Ensure that we hold the kds resource if it's the currently
+	 * displayed buffer.
+	 */
+	obtain_kds_if_currently_displayed(dev, bo, new_buf);
+#endif
+
+	DRM_DEBUG("Created dma_buf %p\n", new_buf);
+
+	return new_buf;
+}
+
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+				uint32_t handle, uint32_t flags, int *prime_fd)
+{
+	int result;
+	/*
+	 * This will re-use any existing exports, and calls
+	 * driver->gem_prime_export to do the first export when needed
+	 */
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	mutex_lock(&priv.export_dma_buf_lock);
+	result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
+						prime_fd);
+	mutex_unlock(&priv.export_dma_buf_lock);
+
+	return result;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
new file mode 100755
index 0000000..78c91c0
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_encoder.c
+ * Implementation of the encoder functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+	return true;
+}
+
+void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_commit(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_disable(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct pl111_drm_encoder *pl111_encoder =
+					PL111_ENCODER_FROM_ENCODER(encoder);
+
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(pl111_encoder);
+}
+
+const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = pl111_encoder_destroy,
+};
+
+const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.mode_fixup = pl111_encoder_helper_mode_fixup,
+	.prepare = pl111_encoder_helper_prepare,
+	.commit = pl111_encoder_helper_commit,
+	.mode_set = pl111_encoder_helper_mode_set,
+	.disable = pl111_encoder_helper_disable,
+};
+
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs)
+{
+	struct pl111_drm_encoder *pl111_encoder;
+
+	pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to allocated pl111_drm_encoder\n");
+		return NULL;
+	}
+
+	drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
+				DRM_MODE_ENCODER_DAC);
+
+	drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
+
+	pl111_encoder->encoder.possible_crtcs = possible_crtcs;
+
+	return pl111_encoder;
+}
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
new file mode 100755
index 0000000..f575c9e
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
@@ -0,0 +1,202 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_fb.c
+ * Implementation of the framebuffer functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_crtc.h>
+#include "pl111_drm.h"
+
+static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct pl111_drm_framebuffer *pl111_fb;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
+
+	pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
+
+	/*
+	 * Because flips are deferred, wait for all previous flips to complete
+	 */
+	wait_event(priv.wait_for_flips,
+			atomic_read(&priv.nr_flips_in_flight) == 0);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Release KDS resources if it's currently being displayed. Only occurs
+	 * when the last framebuffer is destroyed.
+	 */
+	list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb == framebuffer) {
+			/* Release the current buffers */
+			if (pl111_crtc->old_kds_res_set != NULL) {
+				DRM_DEBUG_KMS("Releasing KDS resources for ");
+				DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
+				kds_resource_set_release(
+					&pl111_crtc->old_kds_res_set);
+			}
+			pl111_crtc->old_kds_res_set = NULL;
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock,
+								flags);
+	}
+#endif
+	drm_framebuffer_cleanup(framebuffer);
+
+	if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
+		drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
+
+	kfree(pl111_fb);
+
+	DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
+}
+
+static int pl111_fb_create_handle(struct drm_framebuffer *fb,
+				struct drm_file *file_priv,
+				unsigned int *handle)
+{
+	struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
+
+	if (bo == NULL)
+		return -EINVAL;
+
+	return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+}
+
+const struct drm_framebuffer_funcs fb_funcs = {
+	.destroy = pl111_fb_destroy,
+	.create_handle = pl111_fb_create_handle,
+};
+
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct pl111_drm_framebuffer *pl111_fb = NULL;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_gem_object *gem_obj;
+	struct pl111_gem_bo *bo;
+	int err = 0;
+	size_t min_size;
+	int bpp;
+	int depth;
+
+	pr_info("DRM %s\n", __func__);
+	gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (gem_obj == NULL) {
+		DRM_ERROR("Could not get gem obj from handle to create fb\n");
+		err = -ENOENT;
+		goto error;
+	}
+
+	bo = PL111_BO_FROM_GEM(gem_obj);
+	drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp);
+
+	if (mode_cmd->pitches[0] < mode_cmd->width * (bpp >> 3)) {
+		DRM_ERROR("bad pitch %u for plane 0\n", mode_cmd->pitches[0]);
+		err = -EINVAL;
+		goto error;
+	}
+
+	min_size = (mode_cmd->height - 1) * mode_cmd->pitches[0]
+				+ mode_cmd->width * (bpp >> 3);
+
+	if (bo->gem_object.size < min_size) {
+		DRM_ERROR("gem obj size < min size\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* We can't scan out SHM so we can't create an fb for it */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Can't create FB for non-scanout buffer\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	switch ((char)(mode_cmd->pixel_format & 0xFF)) {
+	case 'Y':
+	case 'U':
+	case 'V':
+	case 'N':
+	case 'T':
+		DRM_ERROR("YUV formats not supported\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
+	if (pl111_fb == NULL) {
+		DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
+		err = -ENOMEM;
+		goto error;
+	}
+	fb = &pl111_fb->fb;
+
+	err = drm_framebuffer_init(dev, fb, &fb_funcs);
+	if (err) {
+		DRM_ERROR("drm_framebuffer_init failed\n");
+		kfree(fb);
+		fb = NULL;
+		goto error;
+	}
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	/* The only framebuffer formats supported by pl111
+	 * are 16 bpp or 32 bpp with 24 bit depth.
+	 * See clcd_enable()
+	 */
+	if (!((fb->bits_per_pixel == 16) ||
+		  (fb->bits_per_pixel == 32 && fb->depth == 24))) {
+		DRM_DEBUG_KMS("unsupported pixel format bpp=%d, depth=%d\n", fb->bits_per_pixel, fb->depth);
+		drm_framebuffer_cleanup(fb);
+		kfree(fb);
+		fb = NULL;
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb->bo = bo;
+
+	DRM_DEBUG_KMS("Created fb 0x%p with gem_obj 0x%p physaddr=0x%.8x\n",
+			fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
+
+	return fb;
+
+error:
+	if (gem_obj != NULL)
+		drm_gem_object_unreference_unlocked(gem_obj);
+
+	return ERR_PTR(err);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
new file mode 100755
index 0000000..494baa0
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_funcs.h
+ * Function prototypes for PL111 DRM
+ */
+
+#ifndef PL111_DRM_FUNCS_H_
+#define PL111_DRM_FUNCS_H_
+
+/* Platform Initialisation */
+int pl111_drm_init(struct platform_device *dev);
+void pl111_drm_exit(struct platform_device *dev);
+
+/* KDS Callbacks */
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
+
+/* CRTC Functions */
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
+struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
+void pl111_crtc_destroy(struct drm_crtc *crtc);
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+					struct drm_framebuffer *fb);
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+			struct drm_framebuffer *fb, bool page_flip,
+			struct drm_pending_vblank_event *event);
+
+/* Common IRQ handler */
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height);
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y);
+
+/* Connector Functions */
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
+void pl111_connector_destroy(struct drm_connector *connector);
+struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
+								*dev);
+
+/* Encoder Functions */
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs);
+struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
+							int possible_crtcs);
+void pl111_encoder_destroy(struct drm_encoder *encoder);
+
+/* Frame Buffer Functions */
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd);
+
+/* VMA Functions */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
+struct page **get_pages(struct drm_gem_object *obj);
+void put_pages(struct drm_gem_object *obj, struct page **pages);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma);
+void pl111_gem_vm_open(struct vm_area_struct *vma);
+void pl111_gem_vm_close(struct vm_area_struct *vma);
+#endif
+
+/* Suspend Functions */
+int pl111_drm_resume(struct drm_device *dev);
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
+
+/* GEM Functions */
+int pl111_dumb_create(struct drm_file *file_priv,
+			struct drm_device *dev,
+			struct drm_mode_create_dumb *args);
+int pl111_dumb_destroy(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle);
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset);
+void pl111_gem_free_object(struct drm_gem_object *obj);
+
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+			struct vm_area_struct *vma, size_t size);
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff);
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo);
+
+/* DMA BUF Functions */
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf);
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+			uint32_t handle, uint32_t flags, int *prime_fd);
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				struct drm_gem_object *obj, int flags);
+
+/* Pl111 Functions */
+void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
+					*flip_res, struct drm_framebuffer *fb);
+int clcd_disable(struct drm_crtc *crtc);
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
+int pl111_amba_remove(struct amba_device *dev);
+
+int pl111_device_init(struct drm_device *dev);
+void pl111_device_fini(struct drm_device *dev);
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing);
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode);
+#endif /* PL111_DRM_FUNCS_H_ */
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
new file mode 100755
index 0000000..13fb256
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
@@ -0,0 +1,476 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_gem.c
+ * Implementation of the GEM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0))
+#include <drm/drm_vma_manager.h>
+#endif
+
+void pl111_gem_free_object(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	struct drm_device *dev = obj->dev;
+	DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
+
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (obj->import_attach)
+		drm_prime_gem_destroy(obj, bo->sgt);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map != NULL)
+		drm_gem_free_mmap_offset(obj);
+#else
+	drm_gem_free_mmap_offset(obj);
+#endif
+	/*
+	 * Only free the backing memory if the object has not been imported.
+	 * If it has been imported, the exporter is in charge to free that
+	 * once dmabuf's refcount becomes 0.
+	 */
+	if (obj->import_attach)
+		goto imported_out;
+
+	if (bo->type & PL111_BOT_DMA) {
+		dma_free_writecombine(dev->dev, obj->size,
+				bo->backing_data.dma.fb_cpu_addr,
+				bo->backing_data.dma.fb_dev_addr);
+	} else if (bo->backing_data.shm.pages != NULL) {
+		put_pages(obj, bo->backing_data.shm.pages);
+	}
+
+imported_out:
+	drm_gem_object_release(obj);
+
+	kfree(bo);
+
+	DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
+}
+
+static int pl111_gem_object_create(struct drm_device *dev, u64 size,
+				   u32 flags, struct drm_file *file_priv,
+				   u32 *handle)
+{
+	int ret = 0;
+	struct pl111_gem_bo *bo = NULL;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (bo == NULL) {
+		ret = -ENOMEM;
+		goto finish;
+	}
+
+	bo->type = flags;
+
+#ifndef ARCH_HAS_SG_CHAIN
+	/*
+	 * If the ARCH can't chain we can't have non-contiguous allocs larger
+	 * than a single sg can hold.
+	 * In this case we fall back to using contiguous memory
+	 */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		long unsigned int n_pages =
+				PAGE_ALIGN(size) >> PAGE_SHIFT;
+		if (n_pages > SG_MAX_SINGLE_ALLOC) {
+			bo->type |= PL111_BOT_DMA;
+			/*
+			 * Non-contiguous allocation request changed to
+			 * contigous
+			 */
+			DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
+					n_pages, SG_MAX_SINGLE_ALLOC);
+		}
+	}
+#endif
+	if (bo->type & PL111_BOT_DMA) {
+		/* scanout compatible - use physically contiguous buffer */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_attrs(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL,
+					&attrs);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_attrs failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#else
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_writecombine(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_writecombine failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+		ret = drm_gem_private_object_init(dev, &bo->gem_object,
+							size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not initialise GEM object\n");
+			goto free_dma;
+		}
+#else
+		drm_gem_private_object_init(dev, &bo->gem_object, size);
+#endif
+
+	} else { /* PL111_BOT_SHM */
+		/* not scanout compatible - use SHM backed object */
+		ret = drm_gem_object_init(dev, &bo->gem_object, size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not init SHM backed GEM obj\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+		DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
+	}
+
+	DRM_DEBUG("s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
+		size, flags,
+		(bo->type & PL111_BOT_DMA) ? "physaddr" : "shared page array",
+		(bo->type & PL111_BOT_DMA) ?
+			(unsigned long)bo->backing_data.dma.fb_dev_addr:
+			(unsigned long)bo->backing_data.shm.pages,
+			bo->type);
+
+	ret = drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+	if (ret != 0) {
+		DRM_ERROR("DRM failed to create GEM handle\n");
+		goto obj_release;
+	}
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&bo->gem_object);
+	
+	return 0;
+
+obj_release:
+	drm_gem_object_release(&bo->gem_object);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+free_dma:
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	if (bo->type & PL111_BOT_DMA) {
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr,
+			&attrs);
+		}
+#else
+	if (bo->type & PL111_BOT_DMA)
+		dma_free_writecombine(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr);
+#endif
+free_bo:
+	kfree(bo);
+finish:
+	return ret;
+}
+
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct drm_pl111_gem_create *args = data;
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags & ~PL111_BOT_MASK) {
+		DRM_ERROR("wrong flags: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("gem_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size, args->flags, file_priv,
+					&args->handle);
+}
+
+int pl111_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags) {
+		DRM_ERROR("flags must be zero: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size,
+				       PL111_BOT_DMA | PL111_BOT_UNCACHED,
+				       file_priv, &args->handle);
+}
+
+int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
+		uint32_t handle)
+{
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset)
+{
+	struct drm_gem_object *obj;
+	int ret = 0;
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	/* GEM does all our handle to object mapping */
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (obj == NULL) {
+		ret = -ENOENT;
+		goto fail;
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map == NULL) {
+		ret = drm_gem_create_mmap_offset(obj);
+		if (ret != 0) {
+			drm_gem_object_unreference_unlocked(obj);
+			goto fail;
+		}
+	}
+#else
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret != 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		goto fail;
+	}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	*offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
+#else
+	*offset = drm_vma_node_offset_addr(&obj->vma_node);
+	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+#endif
+
+	drm_gem_object_unreference_unlocked(obj);
+fail:
+	return ret;
+}
+
+/* sync the buffer for DMA access */
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED)) {
+		int i, npages = bo->gem_object.size >> PAGE_SHIFT;
+		struct page **pages = bo->backing_data.shm.pages;
+		bool dirty = false;
+
+		for (i = 0; i < npages; i++) {
+			if (!bo->backing_data.shm.dma_addrs[i]) {
+				DRM_DEBUG("%s: dma map page=%d bo=%p\n", __func__, i, bo);
+				 bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(dev->dev, pages[i], 0,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+				dirty = true;
+			}
+		}
+
+		if (dirty) {
+			DRM_DEBUG("%s: zap ptes (and flush cache) bo=%p\n", __func__, bo);
+			/*
+			 * TODO MIDEGL-1813
+			 * 
+			 * Use flush_cache_page() and outer_flush_range() to
+			 * flush only the user space mappings of the dirty pages
+			 */
+			flush_cache_all();
+			outer_flush_all();
+			unmap_mapping_range(bo->gem_object.filp->f_mapping, 0,
+					bo->gem_object.size, 1);
+		}
+	}
+}
+
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	/*
+	 * TODO MIDEGL-1808
+	 * 
+	 * The following check was meant to detect if the CPU is trying to access
+	 * a buffer that is currently mapped for DMA accesses, which is illegal
+	 * as described by the DMA-API.
+	 * 
+	 * However, some of our tests are trying to do that, which triggers the message
+	 * below and avoids dma-unmapping the pages not to annoy the DMA device but that
+	 * leads to the test failing because of caches not being properly flushed.
+	 */
+
+	/*
+	if (bo->sgt) {
+		DRM_ERROR("%s: the CPU is trying to access a dma-mapped buffer\n",  __func__);
+		return;
+	}
+	*/
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED) &&
+	     bo->backing_data.shm.dma_addrs[pgoff]) {
+		DRM_DEBUG("%s: unmap bo=%p (s=%d), paddr=%08x\n",
+			  __func__, bo,  bo->gem_object.size,
+			  bo->backing_data.shm.dma_addrs[pgoff]);
+		dma_unmap_page(dev->dev, bo->backing_data.shm.dma_addrs[pgoff],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		bo->backing_data.shm.dma_addrs[pgoff] = 0;
+	}
+}
+
+/* Based on omapdrm driver */
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+		 struct vm_area_struct *vma, size_t size)
+{
+	DRM_DEBUG("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p type=%08x\n",
+			__func__, obj, bo, bo->type);
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	if (bo->type & PL111_BOT_WC) {
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	} else if (bo->type & PL111_BOT_CACHED) {
+		/*
+		 * Objects that do not have a filp (DMA backed) can't be
+		 * mapped as cached now. Write-combine should be enough.
+		 */
+		if (WARN_ON(!obj->filp))
+			return -EINVAL;
+
+		/*
+		 * As explained in Documentation/dma-buf-sharing.txt
+		 * we need this trick so that we can manually zap ptes
+		 * in order to fake coherency.
+		 */
+		fput(vma->vm_file);
+		get_file(obj->filp);
+		vma->vm_file = obj->filp;
+
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	} else { /* PL111_BOT_UNCACHED */
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+	}
+	return 0;
+}
+
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
+{
+	int ret;
+	struct drm_file *priv = file_priv->private_data;
+	struct drm_device *dev = priv->minor->dev;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_hash_item *hash;
+	struct drm_local_map *map = NULL;
+#else
+	struct drm_vma_offset_node *node;
+#endif
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	obj = map->handle;
+#else
+	node = drm_vma_offset_exact_lookup(dev->vma_offset_manager,
+			vma->vm_pgoff,
+			vma_pages(vma));
+	obj = container_of(node, struct drm_gem_object, vma_node);
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p bo->type 0x%08x\n", __func__, bo, bo->type);
+
+	/* for an imported buffer we let the exporter handle the mmap */
+	if (obj->import_attach)
+		return dma_buf_mmap(obj->import_attach->dmabuf, vma, 0);
+
+	ret = drm_gem_mmap(file_priv, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap\n");
+		return ret;
+	}
+
+	/* Our page fault handler uses the page offset calculated from the vma,
+	 * so we need to remove the gem cookie offset specified in the call.
+	 */
+	vma->vm_pgoff = 0;
+
+	return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
new file mode 100755
index 0000000..1d613d0
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
@@ -0,0 +1,417 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_pl111.c
+ * PL111 specific functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+/* This can't be called from IRQ context, due to clk_get() and board->enable */
+static int clcd_enable(struct drm_framebuffer *fb)
+{
+	__u32 cntl;
+	struct clcd_board *board;
+
+	pr_info("DRM %s\n", __func__);
+
+	clk_prepare_enable(priv.clk);
+
+	/* Enable and Power Up */
+	cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+	DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
+	if (fb->bits_per_pixel == 16)
+		cntl |= CNTL_LCDBPP16_565;
+	else if (fb->bits_per_pixel == 32 && fb->depth == 24)
+		cntl |= CNTL_LCDBPP24;
+	else
+		BUG_ON(1);
+
+	cntl |= CNTL_BGR;
+
+	writel(cntl, priv.regs + CLCD_PL111_CNTL);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->enable)
+			board->enable(NULL);
+	}
+
+	/* Enable Interrupts */
+	writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
+
+	return 0;
+}
+
+int clcd_disable(struct drm_crtc *crtc)
+{
+	struct clcd_board *board;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	pr_info("DRM %s\n", __func__);
+
+	/* Disable Interrupts */
+	writel(0x00000000, priv.regs + CLCD_PL111_IENB);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->disable)
+			board->disable(NULL);
+	}
+
+	/* Disable and Power Down */
+	writel(0, priv.regs + CLCD_PL111_CNTL);
+
+	/* Disable clock */
+	clk_disable_unprepare(priv.clk);
+
+	pl111_crtc->last_bpp = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	/* Release the previous buffers */
+	if (pl111_crtc->old_kds_res_set != NULL)
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	return 0;
+}
+
+/*
+ * To avoid a possible race where "pl111_crtc->current_update_res" has
+ * been updated (non NULL) but the corresponding scanout buffer has not been
+ * written to the base registers we must always call this function holding
+ * the "base_update_lock" spinlock with IRQs disabled (spin_lock_irqsave()).
+ */
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	struct drm_framebuffer *fb;
+	struct pl111_gem_bo *bo;
+	size_t min_size;
+	fb = flip_res->fb;
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+
+
+	min_size = (fb->height - 1) * fb->pitches[0]
+	         + fb->width * (fb->bits_per_pixel >> 3);
+
+	BUG_ON(bo->gem_object.size < min_size);
+
+	/* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+	BUG_ON(bo->type != PL111_BOT_DMA);
+
+	/*
+	 * Note the buffer for releasing after IRQ, and don't allow any more
+	 * updates until then.
+	 *
+	 * This clcd controller latches the new address on next vsync. Address
+	 * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
+	 * wait for that before releasing the previous buffer's kds
+	 * resources. Otherwise, we'll allow writers to write to the old buffer
+	 * whilst it is still being displayed
+	 */
+	pl111_crtc->current_update_res = flip_res;
+
+	DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
+			fb, bo, bo->backing_data.dma.fb_dev_addr);
+	
+	if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
+		DRM_ERROR("Could not get vblank reference for crtc %d\n",
+				pl111_crtc->crtc_index);
+
+	/* Set the scanout buffer */
+	writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
+	writel(bo->backing_data.dma.fb_dev_addr +
+		((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
+}
+
+void
+show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
+					struct drm_framebuffer *fb)
+{
+	unsigned long irq_flags;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+	if (list_empty(&pl111_crtc->update_queue) &&
+			!pl111_crtc->current_update_res) {
+		do_flip_to_res(flip_res);
+	} else {
+		/*
+		 * Enqueue the update to occur on a future IRQ
+		 * This only happens on triple-or-greater buffering
+		 */
+		DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
+				fb);
+		list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
+			pl111_crtc->last_bpp != fb->bits_per_pixel ||
+			!drm_mode_equal(pl111_crtc->new_mode,
+					pl111_crtc->current_mode))) {
+		struct clcd_regs timing;
+
+		pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
+
+		DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
+				timing.tim0, timing.tim1, timing.tim2,
+				timing.tim3, timing.pixclock);
+
+		/* This is the actual mode setting part */
+		clk_set_rate(priv.clk, timing.pixclock);
+
+		writel(timing.tim0, priv.regs + CLCD_TIM0);
+		writel(timing.tim1, priv.regs + CLCD_TIM1);
+		writel(timing.tim2, priv.regs + CLCD_TIM2);
+		writel(timing.tim3, priv.regs + CLCD_TIM3);
+
+		clcd_enable(fb);
+		pl111_crtc->last_bpp = fb->bits_per_pixel;
+	}
+
+	if (!flip_res->page_flip) {
+		drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
+		pl111_crtc->current_mode = pl111_crtc->new_mode;
+		pl111_crtc->new_mode = NULL;
+	}
+
+	BUG_ON(!pl111_crtc->current_mode);
+
+	/*
+	 * If IRQs weren't enabled before, they are now. This will eventually
+	 * cause flip_res to be released via pl111_common_irq, which updates
+	 * every time the Base Address is latched (i.e. every frame, regardless
+	 * of whether we update the base address or not)
+	 */
+}
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+	u32 irq_stat;
+	struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
+
+	irq_stat = readl(priv.regs + CLCD_PL111_MIS);
+
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE)
+		pl111_common_irq(pl111_crtc);
+
+	/* Clear the interrupt once done */
+	writel(irq_stat, priv.regs + CLCD_PL111_ICR);
+
+	return IRQ_HANDLED;
+}
+
+int pl111_device_init(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int ret;
+
+	if (priv == NULL) {
+		pr_err("%s no private data\n", __func__);
+		return -EINVAL;
+	}
+
+	if (priv->amba_dev == NULL) {
+		pr_err("%s no amba device found\n", __func__);
+		return -EINVAL;
+	}
+
+	/* set up MMIO for register access */
+	priv->mmio_start = priv->amba_dev->res.start;
+	priv->mmio_len = resource_size(&priv->amba_dev->res);
+
+	DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
+			priv->mmio_len);
+
+	priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
+	if (priv->regs == NULL) {
+		pr_err("%s failed mmio\n", __func__);
+		return -EINVAL;
+	}
+
+	/* turn off interrupts */
+	writel(0, priv->regs + CLCD_PL111_IENB);
+
+	ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
+				"pl111_irq_handler", NULL);
+	if (ret != 0) {
+		pr_err("%s failed %d\n", __func__, ret);
+		goto out_mmio;
+	}
+
+	goto finish;
+
+out_mmio:
+	iounmap(priv->regs);
+finish:
+	DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
+	return ret;
+}
+
+void pl111_device_fini(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	u32 cntl;
+
+	if (priv == NULL || priv->regs == NULL)
+		return;
+
+	free_irq(priv->amba_dev->irq[0], NULL);
+
+	cntl = readl(priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDEN;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDPWR;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	iounmap(priv->regs);
+}
+
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
+{
+	struct clcd_board *board = dev->dev.platform_data;
+	int ret;
+	pr_info("DRM %s\n", __func__);
+
+	if (!board)
+		dev_warn(&dev->dev, "board data not available\n");
+
+	ret = amba_request_regions(dev, NULL);
+	if (ret != 0) {
+		DRM_ERROR("CLCD: unable to reserve regs region\n");
+		goto out;
+	}
+
+	priv.amba_dev = dev;
+
+	priv.clk = clk_get(&priv.amba_dev->dev, NULL);
+	if (IS_ERR(priv.clk)) {
+		DRM_ERROR("CLCD: unable to get clk.\n");
+		ret = PTR_ERR(priv.clk);
+		goto clk_err;
+	}
+
+	return 0;
+
+clk_err:
+	amba_release_regions(dev);
+out:
+	return ret;
+}
+
+int pl111_amba_remove(struct amba_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	clk_put(priv.clk);
+
+	amba_release_regions(dev);
+
+	priv.amba_dev = NULL;
+
+	return 0;
+}
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+	unsigned int cpl;
+
+	memset(timing, 0, sizeof(struct clcd_regs));
+
+	ppl = (mode->hdisplay / 16) - 1;
+	hsw = mode->hsync_end - mode->hsync_start - 1;
+	hfp = mode->hsync_start - mode->hdisplay - 1;
+	hbp = mode->htotal - mode->hsync_end - 1;
+
+	lpp = mode->vdisplay - 1;
+	vsw = mode->vsync_end - mode->vsync_start - 1;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	cpl = mode->hdisplay - 1;
+
+	timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
+	timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
+	timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
+	timing->tim3 = 0;
+
+	timing->pixclock = mode->clock * 1000;
+}
+
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+
+	ppl = (timing->tim0 >> 2) & 0x3f;
+	hsw = (timing->tim0 >> 8) & 0xff;
+	hfp = (timing->tim0 >> 16) & 0xff;
+	hbp = (timing->tim0 >> 24) & 0xff;
+
+	lpp = timing->tim1 & 0x3ff;
+	vsw = (timing->tim1 >> 10) & 0x3f;
+	vfp = (timing->tim1 >> 16) & 0xff;
+	vbp = (timing->tim1 >> 24) & 0xff;
+
+	mode->hdisplay    = (ppl + 1) * 16;
+	mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
+	mode->hsync_end   = ((ppl + 1) * 16) + hfp + hsw + 2;
+	mode->htotal      = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
+	mode->hskew       = 0;
+
+	mode->vdisplay    = lpp + 1;
+	mode->vsync_start = lpp + vfp + 1;
+	mode->vsync_end   = lpp + vfp + vsw + 2;
+	mode->vtotal      = lpp + vfp + vsw + vbp + 2;
+
+	mode->flags = 0;
+
+	mode->width_mm = 0;
+	mode->height_mm = 0;
+
+	mode->clock = timing->pixclock / 1000;
+	mode->hsync = timing->pixclock / mode->htotal;
+	mode->vrefresh = mode->hsync / mode->vtotal;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
new file mode 100755
index 0000000..9d5ec0c
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
@@ -0,0 +1,151 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_platform.c
+ * Implementation of the Linux platform device entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+static int pl111_platform_drm_suspend(struct platform_device *dev,
+					pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+static int pl111_platform_drm_resume(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_platform_drm_probe(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return pl111_drm_init(dev);
+}
+
+static int pl111_platform_drm_remove(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	pl111_drm_exit(dev);
+
+	return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+	{
+	.id = 0x00041110,
+	.mask = 0x000ffffe,
+	},
+	{0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+	.drv = {
+		.name = "clcd-pl11x",
+		},
+	.probe = pl111_amba_probe,
+	.remove = pl111_amba_remove,
+	.id_table = pl111_id_table,
+};
+
+static struct platform_driver platform_drm_driver = {
+	.probe = pl111_platform_drm_probe,
+	.remove = pl111_platform_drm_remove,
+	.suspend = pl111_platform_drm_suspend,
+	.resume = pl111_platform_drm_resume,
+	.driver = {
+			.owner = THIS_MODULE,
+			.name = DRIVER_NAME,
+		},
+};
+
+static const struct platform_device_info pl111_drm_pdevinfo = {
+	.name = DRIVER_NAME,
+	.id = -1,
+	.dma_mask = ~0UL
+};
+
+static struct platform_device *pl111_drm_device;
+
+static int __init pl111_platform_drm_init(void)
+{
+	int ret;
+
+	pr_info("DRM %s\n", __func__);
+
+	pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
+	if (pl111_drm_device == NULL) {
+		pr_err("DRM platform_device_register_full() failed\n");
+		return -ENOMEM;
+	}
+
+	ret = amba_driver_register(&pl111_amba_driver);
+	if (ret != 0) {
+		pr_err("DRM amba_driver_register() failed %d\n", ret);
+		goto err_amba_reg;
+	}
+
+	ret = platform_driver_register(&platform_drm_driver);
+	if (ret != 0) {
+		pr_err("DRM platform_driver_register() failed %d\n", ret);
+		goto err_pdrv_reg;
+	}
+
+	return 0;
+
+err_pdrv_reg:
+	amba_driver_unregister(&pl111_amba_driver);
+err_amba_reg:
+	platform_device_unregister(pl111_drm_device);
+
+	return ret;
+}
+
+static void __exit pl111_platform_drm_exit(void)
+{
+	pr_info("DRM %s\n", __func__);
+
+	platform_device_unregister(pl111_drm_device);
+	amba_driver_unregister(&pl111_amba_driver);
+	platform_driver_unregister(&platform_drm_driver);
+}
+
+#ifdef MODULE
+module_init(pl111_platform_drm_init);
+#else
+late_initcall(pl111_platform_drm_init);
+#endif
+module_exit(pl111_platform_drm_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE(DRIVER_LICENCE);
+MODULE_ALIAS(DRIVER_ALIAS);
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
new file mode 100755
index 0000000..d033566
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_suspend.c
+ * Implementation of the suspend/resume functions for PL111 DRM
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_drm_resume(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
new file mode 100755
index 0000000..ff602ef
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_vma.c
+ * Implementation of the VM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ */
+static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+				bool dirty, bool accessed)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	for (i = 0; i < npages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+		if (accessed)
+			mark_page_accessed(pages[i]);
+		/* Undo the reference we took when populating the table */
+		page_cache_release(pages[i]);
+	}
+	drm_free_large(pages);
+}
+
+void put_pages(struct drm_gem_object *obj, struct page **pages)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	_drm_gem_put_pages(obj, pages, true, true);
+	if (bo->backing_data.shm.dma_addrs) {
+		for (i = 0; i < npages; i++) {
+			/* Filter pages unmapped because of CPU accesses */
+			if (!bo->backing_data.shm.dma_addrs[i])
+				continue;
+			if (!dma_mapping_error(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i])) {
+				dma_unmap_page(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i],
+					PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+			}
+		}
+		kfree(bo->backing_data.shm.dma_addrs);
+		bo->backing_data.shm.dma_addrs = NULL;
+	}
+}
+
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
+					gfp_t gfpmask)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct page *p, **pages;
+	int i, npages;
+
+	/* This is the shared memory object that backs the GEM resource */
+	inode = obj->filp->f_path.dentry->d_inode;
+	mapping = inode->i_mapping;
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (pages == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gfpmask |= mapping_gfp_mask(mapping);
+
+	for (i = 0; i < npages; i++) {
+		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		if (IS_ERR(p))
+			goto fail;
+		pages[i] = p;
+
+		/*
+		 * There is a hypothetical issue w/ drivers that require
+		 * buffer memory in the low 4GB.. if the pages are un-
+		 * pinned, and swapped out, they can end up swapped back
+		 * in above 4GB.  If pages are already in memory, then
+		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
+		 * even if the already in-memory page disobeys the mask.
+		 *
+		 * It is only a theoretical issue today, because none of
+		 * the devices with this limitation can be populated with
+		 * enough memory to trigger the issue.  But this BUG_ON()
+		 * is here as a reminder in case the problem with
+		 * shmem_read_mapping_page_gfp() isn't solved by the time
+		 * it does become a real issue.
+		 *
+		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		 */
+		BUG_ON((gfpmask & __GFP_DMA32) &&
+			(page_to_pfn(p) >= 0x00100000UL));
+	}
+
+	return pages;
+
+fail:
+	while (i--)
+		page_cache_release(pages[i]);
+
+	drm_free_large(pages);
+	return ERR_PTR(PTR_ERR(p));
+}
+
+struct page **get_pages(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (bo->backing_data.shm.pages == NULL) {
+		struct page **p;
+		int npages = obj->size >> PAGE_SHIFT;
+		int i;
+
+		p = _drm_gem_get_pages(obj, GFP_KERNEL);
+		if (IS_ERR(p))
+			return ERR_PTR(-ENOMEM);
+
+		bo->backing_data.shm.pages = p;
+
+		if (bo->backing_data.shm.dma_addrs == NULL) {
+			bo->backing_data.shm.dma_addrs =
+				kzalloc(npages * sizeof(dma_addr_t),
+					GFP_KERNEL);
+			if (bo->backing_data.shm.dma_addrs == NULL)
+				goto error_out;
+		}
+
+		if (!(bo->type & PL111_BOT_CACHED)) {
+			for (i = 0; i < npages; ++i) {
+				bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+				if (dma_mapping_error(obj->dev->dev,
+						bo->backing_data.shm.dma_addrs[i]))
+					goto error_out;
+			}
+		}
+	}
+
+	return bo->backing_data.shm.pages;
+
+error_out:
+	put_pages(obj, bo->backing_data.shm.pages);
+	bo->backing_data.shm.pages = NULL;
+	return ERR_PTR(-ENOMEM);
+}
+
+/* END drivers/staging/omapdrm/omap_gem_helpers.c */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page **pages;
+	unsigned long pfn;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * Our mmap calls setup a valid vma->vm_pgoff
+	 * so we can use vmf->pgoff
+	 */
+
+	if (bo->type & PL111_BOT_DMA) {
+		pfn = (bo->backing_data.dma.fb_dev_addr >> PAGE_SHIFT) +
+				vmf->pgoff;
+	} else { /* PL111_BOT_SHM */
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev,
+				"could not get pages: %ld\n", PTR_ERR(pages));
+			ret = PTR_ERR(pages);
+			goto error;
+		}
+		pfn = page_to_pfn(pages[vmf->pgoff]);
+		pl111_gem_sync_to_cpu(bo, vmf->pgoff);
+	}
+
+	DRM_DEBUG("bo=%p physaddr=0x%.8x for offset 0x%x\n",
+				bo, PFN_PHYS(pfn), PFN_PHYS(vmf->pgoff));
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+error:
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+/*
+ * The core drm_vm_ functions in kernel 3.4 are not ready
+ * to handle dma_buf cases where vma->vm_file->private_data
+ * cannot be accessed to get the device.
+ * 
+ * We use these functions from 3.5 instead where the device
+ * pointer is passed explicitly.
+ *
+ * However they aren't exported from the kernel until 3.10
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *vma_entry;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_inc(&dev->vma_count);
+
+	vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
+	if (vma_entry) {
+		vma_entry->vma = vma;
+		vma_entry->pid = current->pid;
+		list_add(&vma_entry->head, &dev->vmalist);
+	}
+}
+
+void pl111_drm_vm_close_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *pt, *temp;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_dec(&dev->vma_count);
+
+	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
+		if (pt->vma == vma) {
+			list_del(&pt->head);
+			kfree(pt);
+			break;
+		}
+	}
+}
+
+void pl111_gem_vm_open(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+
+	drm_gem_object_reference(obj);
+
+	mutex_lock(&obj->dev->struct_mutex);
+	pl111_drm_vm_open_locked(obj->dev, vma);
+	mutex_unlock(&obj->dev->struct_mutex);
+}
+
+void pl111_gem_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	pl111_drm_vm_close_locked(obj->dev, vma);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+}
+#endif
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/pl111/sconscript b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/sconscript
new file mode 100755
index 0000000..5c47de7
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/pl111/sconscript
@@ -0,0 +1,52 @@
+#
+# (C) COPYRIGHT 2010-2013, 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import shutil
+Import('env')
+
+# Generate a build environment for the integration tests, taking a copy of the top-level build environment 
+# (env) as a start.
+drm_env = env.Clone()
+
+# Xorg uses C++ style comments and 'inline' keyword
+if '-std=c89' in drm_env['CFLAGS']:
+	drm_env['CFLAGS'].remove('-std=c89')
+
+# X11 generates a lot of warnings
+if '-Werror' in drm_env['CCFLAGS']:
+	drm_env['CCFLAGS'].remove('-Werror')
+
+#remove the 'lib'prefix
+drm_env['LIBPREFIX'] = ''
+
+src = Glob('*.c')
+
+if drm_env.GetOption('clean') :
+	drm_env.Execute(Action("make clean", 'clean [pl111]'))
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [])
+else:
+	# The target is mali_drm.ko, built from the source in pl111_drm/pl111, via the action makeAction
+	# mali_drm.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+	# kernel build system, after which it can be installed to the directory specified if
+	# "libs_install" is set; this is done by LibTarget.
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} && cp pl111_drm.ko $STATIC_LIB_PATH/mali_drm.ko", '$MAKECOMSTR')
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [makeAction])
+
+# need Module.symvers from drm.ko
+#drm_env.Depends('$STATIC_LIB_PATH/pl111_drm.ko', '$STATIC_LIB_PATH/drm.ko')
+
+drm_env.KernelObjTarget('x11', cmd)
+
diff --git a/midgard/r15p0/kernel/drivers/gpu/drm/sconscript b/midgard/r15p0/kernel/drivers/gpu/drm/sconscript
new file mode 100755
index 0000000..a90fa89
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/drm/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if 'x11' in env['winsys'] or 'gbm' in env['winsys']:
+	# pl111_drm isn't released so only try to build it if it's there
+	if Glob('pl111/sconscript') and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
+		SConscript('pl111/sconscript')
+
+#SConscript('dummy_drm/sconscript')
diff --git a/midgard/r15p0/kernel/drivers/gpu/sconscript b/midgard/r15p0/kernel/drivers/gpu/sconscript
new file mode 100755
index 0000000..729dbda
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/gpu/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+SConscript('arm/sconscript')
+
+if Glob('drm/sconscript'):
+	SConscript('drm/sconscript')
diff --git a/midgard/r15p0/kernel/drivers/sconscript b/midgard/r15p0/kernel/drivers/sconscript
new file mode 100755
index 0000000..71b194e
--- /dev/null
+++ b/midgard/r15p0/kernel/drivers/sconscript
@@ -0,0 +1,116 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+import os
+import shutil
+import subprocess
+
+from SCons.Script import SCons
+
+Import("env")
+
+# -----------------------------------------------------------------------------
+g_kernel_config = dict()
+
+def kernel_config_enabled(sEnv, option):
+    """Return true if a given kernel config option is enabled"""
+    global g_kernel_config
+    if not g_kernel_config:
+        config_file = os.path.join(sEnv["ENV"]["KDIR"], "include/config/auto.conf")
+        print "Parsing kernel config '%s'." % config_file
+        with open(config_file, "rt") as fp:
+            for line in fp.readlines():
+                try:
+                    (key, val) = line.split("=")
+                    g_kernel_config[key.strip()] = val.strip()
+                except ValueError:
+                    pass
+    if option not in g_kernel_config:
+        return False
+    val = g_kernel_config[option]
+    if val == "y":
+        return True
+    return False
+
+env.AddMethod(kernel_config_enabled, "KernelConfigEnabled")
+
+def kernel_module_builder(env = None, target = None, source = None):
+    assert (len(target) == 1 and 'M' in env and 'make_args' in env and
+            'built_module' in env)
+
+    module_dir = str(env['M'])
+    cmd = ['make', '-j%d' % GetOption('num_jobs')] + env['make_args']
+    status = subprocess.call(cmd, cwd = module_dir, env = env['ENV'])
+    if status:
+        return status
+
+    shutil.copy2(env['built_module'], target[0].abspath)
+    return 0 # Success (SCons will catch any exceptions thrown by copy2).
+
+action = SCons.Action.Action(kernel_module_builder, "[KBUILD] $TARGET")
+builder = env.Builder(action = action)
+env.Append(BUILDERS = {'KernelModuleBuilder': builder})
+
+def build_kernel_module(env, target, sources, M = None, make_args = [],
+                        built_module = None):
+    if not M:
+        M = Dir('.').srcnode().abspath
+    if not built_module:
+        built_module = os.path.basename(str(target))
+    built_module = os.path.join(M, built_module)
+
+    env_kern = env.Clone()
+    kdir = env_kern['ENV']['KDIR']
+    env_kern.Append(CPPPATH = [M, '#kernel/include', os.path.join(kdir, 'include')])
+    mod = env_kern.KernelModuleBuilder(target, sources, M = M,
+                                       make_args = make_args,
+                                       built_module = built_module)
+
+    # Kbuild implicitly #includes kconfig.h (which includes autoconf.h) when
+    # building kernel module source files.
+    env_kern.Depends(mod, os.path.join(kdir, 'include/generated/autoconf.h'))
+    # Built files may change depending on CONFIG_* values in auto.conf.
+    env_kern.Depends(mod, os.path.join(kdir, 'include/config/auto.conf'))
+
+    for f in ['Kconfig', 'Kbuild', 'Makefile*']:
+        env_kern.Depends(mod, glob.glob(os.path.join(M, f)))
+
+    scanner = SCons.Scanner.C.CScanner()
+    all_headers = set()
+    for src in Flatten(sources):
+        src = env_kern.File(src)
+        (base, ext) = os.path.splitext(src.abspath)
+        if ext == '.c':
+            env_kern.Clean(mod, base + '.o')
+            # Scan for header dependencies explicitly.
+            headers = scanner(src, env_kern, scanner.path_function(env_kern))
+            env_kern.Depends(mod, headers)
+            all_headers.update(headers)
+
+    env_kern.Clean(mod, os.path.join(M, 'Module.symvers'))
+    env_kern.Clean(mod, os.path.join(M, 'module.order'))
+
+    return mod
+
+env.AddMethod(build_kernel_module, 'BuildKernelModule')
+
+if Glob('base/sconscript'):
+	SConscript('base/sconscript')
+
+if Glob('video/sconscript'):
+	SConscript('video/sconscript')
+
+SConscript('gpu/sconscript')
diff --git a/midgard/r15p0/kernel/include/linux/dma-buf-test-exporter.h b/midgard/r15p0/kernel/include/linux/dma-buf-test-exporter.h
new file mode 100755
index 0000000..98984ba
--- /dev/null
+++ b/midgard/r15p0/kernel/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/midgard/r15p0/kernel/include/linux/kds.h b/midgard/r15p0/kernel/include/linux/kds.h
new file mode 100755
index 0000000..1346eda
--- /dev/null
+++ b/midgard/r15p0/kernel/include/linux/kds.h
@@ -0,0 +1,173 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KDS_H_
+#define _KDS_H_
+
+#include <linux/list.h>
+#include <linux/workqueue.h>
+
+#define KDS_WAIT_BLOCKING (ULONG_MAX)
+
+struct kds_resource_set;
+
+typedef void (*kds_callback_fn) (void *callback_parameter, void *callback_extra_parameter);
+
+struct kds_callback
+{
+	kds_callback_fn  user_cb; /* real cb */
+	int direct;               /* do direct or queued call? */
+	struct workqueue_struct *wq;
+};
+
+struct kds_link
+{
+	struct kds_resource_set *parent;
+	struct list_head         link;
+	unsigned long            state;
+};
+
+struct kds_resource
+{
+	struct kds_link waiters;
+};
+
+/* callback API */
+
+/* Initialize a callback object.
+ *
+ * Typically created per context or per hw resource.
+ *
+ * Callbacks can be performed directly if no nested locking can
+ * happen in the client.
+ *
+ * Nested locking can occur when a lock is held during the kds_async_waitall or
+ * kds_resource_set_release call. If the callback needs to take the same lock
+ * nested locking will happen.
+ *
+ * If nested locking could happen non-direct callbacks can be requested.
+ * Callbacks will then be called asynchronous to the triggering call.
+ */
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb);
+
+/* Terminate the use of a callback object.
+ *
+ * If the callback object was set up as non-direct
+ * any pending callbacks will be flushed first.
+ * Note that to avoid a deadlock the lock callbacks needs
+ * can't be held when a callback object is terminated.
+ */
+void kds_callback_term(struct kds_callback *cb);
+
+
+/* resource object API */
+
+/* initialize a resource handle for a shared resource */
+void kds_resource_init(struct kds_resource * const resource);
+
+/*
+ * Will return 0 on success.
+ * If the resource is being used or waited -EBUSY is returned.
+ * The caller should NOT try to terminate a resource that could still have clients.
+ * After the function returns the resource is no longer known by kds.
+ */
+int kds_resource_term(struct kds_resource *resource);
+
+/* Asynchronous wait for a set of resources.
+ * Callback will be called when all resources are available.
+ * If all the resources was available the callback will be called before kds_async_waitall returns.
+ * So one must not hold any locks the callback code-flow can take when calling kds_async_waitall.
+ * Caller considered to own/use the resources until \a kds_rset_release is called.
+ * exclusive_access_bitmap is a bitmap where a high bit means exclusive access while a low bit means shared access.
+ * Use the Linux __set_bit API, where the index of the buffer to control is used as the bit index.
+ *
+ * Standard Linux error return value.
+ */
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list);
+
+/* Synchronous wait for a set of resources.
+ * Function will return when one of these have happened:
+ * - all resources have been obtained
+ * - timeout lapsed while waiting
+ * - a signal was received while waiting
+ *
+ * To wait without a timeout, specify KDS_WAIT_BLOCKING for \a jifies_timeout, otherwise
+ * the timeout in jiffies. A zero timeout attempts to obtain all resources and returns
+ * immediately with a timeout if all resources could not be obtained.
+ *
+ * Caller considered to own/use the resources when the function returns.
+ * Caller must release the resources using \a kds_rset_release.
+ *
+ * Calling this function while holding already locked resources or other locking primitives is dangerous.
+ * One must if this is needed decide on a lock order of the resources and/or the other locking primitives
+ * and always take the resources/locking primitives in the specific order.
+ *
+ * Use the ERR_PTR framework to decode the return value.
+ * NULL = time out
+ * If IS_ERR then PTR_ERR gives:
+ *  ERESTARTSYS = signal received, retry call after signal
+ *  all other values = internal error, lock failed
+ * Other values  = successful wait, now the owner, must call kds_resource_set_release
+ */
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jifies_timeout);
+
+/* Release resources after use.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ */
+void kds_resource_set_release(struct kds_resource_set **pprset);
+
+/* Release resources after use and wait callbacks to complete.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ *
+ * This should be used to cancel waits which are pending on a kds
+ * resource.
+ *
+ * It is a bug to call this from atomic contexts and from within
+ * a kds callback that now owns the kds_rseource.
+ */
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset);
+#endif /* _KDS_H_ */
diff --git a/midgard/r15p0/kernel/include/linux/ump-common.h b/midgard/r15p0/kernel/include/linux/ump-common.h
new file mode 100755
index 0000000..0015bda
--- /dev/null
+++ b/midgard/r15p0/kernel/include/linux/ump-common.h
@@ -0,0 +1,252 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/midgard/r15p0/kernel/include/linux/ump-import.h b/midgard/r15p0/kernel/include/linux/ump-import.h
new file mode 100755
index 0000000..89ce727
--- /dev/null
+++ b/midgard/r15p0/kernel/include/linux/ump-import.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/midgard/r15p0/kernel/include/linux/ump-ioctl.h b/midgard/r15p0/kernel/include/linux/ump-ioctl.h
new file mode 100755
index 0000000..451403e
--- /dev/null
+++ b/midgard/r15p0/kernel/include/linux/ump-ioctl.h
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/midgard/r15p0/kernel/include/linux/ump.h b/midgard/r15p0/kernel/include/linux/ump.h
new file mode 100755
index 0000000..16f9c39
--- /dev/null
+++ b/midgard/r15p0/kernel/include/linux/ump.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a true to indicate that access to the handle is allowed or @n
+ * @a false to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a true to permit access
+ * @li @a false to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+#ifdef CONFIG_KDS
+/**
+ * Retrieve the KDS resource for the specified UMP memory.
+ *
+ * The KDS resource should be used to synchronize access to the UMP allocation.
+ * See the KDS API for how to do that.
+ *
+ * @param mem Handle to the UMP memory to query.
+ * @return Pointer to the KDS resource controlling access to the UMP memory.
+ */
+UMP_KERNEL_API_EXPORT struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem);
+#endif
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
diff --git a/midgard/r15p0/kernel/license.txt b/midgard/r15p0/kernel/license.txt
new file mode 100755
index 0000000..77c14bd
--- /dev/null
+++ b/midgard/r15p0/kernel/license.txt
@@ -0,0 +1,198 @@
+GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE
+
+THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE.
+
+
+
+Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form.
+
+
+
+GPL Licence
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+Version 2, June 1991
+
+
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+
+
+Preamble
+
+
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
+
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program).
+
+Whether that is true depends on what the Program does.
+
+
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty;
+
+and give any other recipients of the Program a copy of this License along with the Program.
+
+
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the
+
+Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein.
+
+You are not responsible for enforcing compliance by third parties to this License.
+
+
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+
+
+NO WARRANTY
+
+
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+/end
+
diff --git a/midgard/r15p0/kernel/patches/integrate_kds_with_dma_buf.patch b/midgard/r15p0/kernel/patches/integrate_kds_with_dma_buf.patch
new file mode 100755
index 0000000..37458cf
--- /dev/null
+++ b/midgard/r15p0/kernel/patches/integrate_kds_with_dma_buf.patch
@@ -0,0 +1,188 @@
+diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
+index 9aa618a..30e07ea 100644
+--- a/drivers/base/Kconfig
++++ b/drivers/base/Kconfig
+@@ -192,4 +192,12 @@ config DMA_SHARED_BUFFER
+ 	  APIs extension; the file's descriptor can then be passed on to other
+ 	  driver.
+ 
++config DMA_SHARED_BUFFER_USES_KDS
++	bool "Synchronize access to dma_buf with KDS"
++	default n
++	select KDS
++	depends on DMA_SHARED_BUFFER
++	help
++	  This option adds a KDS resource within every dma_buf allocation.
++
+ endmenu
+diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
+index 20258e1..e4083fd 100644
+--- a/drivers/base/dma-buf.c
++++ b/drivers/base/dma-buf.c
+@@ -27,6 +27,8 @@
+ #include <linux/dma-buf.h>
+ #include <linux/anon_inodes.h>
+ #include <linux/export.h>
++#include <linux/poll.h>
++#include <linux/sched.h>
+ 
+ static inline int is_dma_buf_file(struct file *);
+ 
+@@ -40,6 +42,8 @@ static int dma_buf_release(struct inode *inode, struct file *file)
+ 	dmabuf = file->private_data;
+ 
+ 	dmabuf->ops->release(dmabuf);
++	kds_callback_term(&dmabuf->kds_cb);
++	kds_resource_term(&dmabuf->kds);
+ 	kfree(dmabuf);
+ 	return 0;
+ }
+@@ -61,9 +65,90 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
+ 	return dmabuf->ops->mmap(dmabuf, vma);
+ }
+ 
++
++static void dma_buf_kds_cb_fn (void *param1, void *param2)
++{
++	struct kds_resource_set **rset_ptr = param1;
++	struct kds_resource_set *rset = *rset_ptr;
++	wait_queue_head_t *wait_queue = param2;
++
++	kfree(rset_ptr);
++	kds_resource_set_release(&rset);
++	wake_up(wait_queue);
++}
++
++static int dma_buf_kds_check(struct kds_resource *kds,
++                             long unsigned int exclusive, int *poll_ret)
++{
++	/* Synchronous wait with 0 timeout - poll availability */
++	struct kds_resource_set *rset = kds_waitall(1,&exclusive,&kds,0);
++
++	if (IS_ERR(rset))
++		return POLLERR;
++
++	if (rset){
++		kds_resource_set_release(&rset);
++		*poll_ret = POLLIN | POLLRDNORM;
++		if (exclusive)
++			*poll_ret |=  POLLOUT | POLLWRNORM;
++		return 1;
++	}else{
++		return 0;
++	}
++}
++
++static unsigned int dma_buf_poll(struct file *file,
++                                 struct poll_table_struct *wait)
++{
++	struct dma_buf *dmabuf;
++	struct kds_resource *kds;
++	unsigned int ret = 0;
++
++	if (!is_dma_buf_file(file))
++		return POLLERR;
++
++	dmabuf = file->private_data;
++	kds    = &dmabuf->kds;
++
++	if (poll_does_not_wait(wait)){
++		/* Check for exclusive access (superset of shared) first */
++		if(!dma_buf_kds_check(kds, 1ul, &ret))
++			dma_buf_kds_check(kds, 0ul, &ret);
++	}else{
++		int events = poll_requested_events(wait);
++		unsigned long exclusive;
++		wait_queue_head_t *wq;
++		struct kds_resource_set **rset_ptr = kmalloc(sizeof(*rset_ptr), GFP_KERNEL);
++
++		if (!rset_ptr)
++			return POLL_ERR;
++
++		if (events & POLLOUT){
++			wq = &dmabuf->wq_exclusive;
++			exclusive = 1;
++		}else{
++			wq = &dmabuf->wq_shared;
++			exclusive = 0;
++		}
++		poll_wait(file, wq, wait);
++		ret = kds_async_waitall(rset_ptr, KDS_FLAG_LOCKED_WAIT, &dmabuf->kds_cb,
++		                        rset_ptr, wq, 1, &exclusive, &kds);
++
++		if (IS_ERR_VALUE(ret)){
++			ret = POLL_ERR;
++			kfree(rset_ptr);
++		}else{
++			/* Can't allow access until callback */
++			ret = 0;
++		}
++	}
++	return ret;
++}
++
+ static const struct file_operations dma_buf_fops = {
+ 	.release	= dma_buf_release,
+ 	.mmap		= dma_buf_mmap_internal,
++	.poll		= dma_buf_poll,
+ };
+ 
+ /*
+@@ -120,6 +205,11 @@ struct dma_buf *dma_buf_export(void *priv, const struct dma_buf_ops *ops,
+ 	mutex_init(&dmabuf->lock);
+ 	INIT_LIST_HEAD(&dmabuf->attachments);
+ 
++	init_waitqueue_head(&dmabuf->wq_exclusive);
++	init_waitqueue_head(&dmabuf->wq_shared);
++	kds_resource_init(&dmabuf->kds);
++	kds_callback_init(&dmabuf->kds_cb, 1, dma_buf_kds_cb_fn);
++
+ 	return dmabuf;
+ }
+ EXPORT_SYMBOL_GPL(dma_buf_export);
+diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
+index eb48f38..6e824d2 100644
+--- a/include/linux/dma-buf.h
++++ b/include/linux/dma-buf.h
+@@ -30,6 +30,8 @@
+ #include <linux/list.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/fs.h>
++#include <linux/kds.h>
++#include <linux/wait.h>
+ 
+ struct device;
+ struct dma_buf;
+@@ -121,6 +123,10 @@ struct dma_buf {
+ 	const struct dma_buf_ops *ops;
+ 	/* mutex to serialize list manipulation and attach/detach */
+ 	struct mutex lock;
++	struct kds_resource kds;
++	wait_queue_head_t wq_exclusive;
++	wait_queue_head_t wq_shared;
++	struct kds_callback kds_cb;
+ 	void *priv;
+ };
+ 
+@@ -156,6 +162,21 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
+ 	get_file(dmabuf->file);
+ }
+ 
++/**
++ * get_dma_buf_kds_resource - get a KDS resource for this dma-buf
++ * @dmabuf:	[in]	pointer to dma_buf
++ *
++ * Returns a KDS resource that represents the dma-buf. This should be used by
++ * drivers to synchronize access to the buffer. Note that the caller should
++ * ensure that a reference to the dma-buf exists from the call to
++ * kds_async_wait until kds_resource_set_release is called.
++ */
++static inline struct kds_resource *
++	get_dma_buf_kds_resource(struct dma_buf *dmabuf)
++{
++	return &dmabuf->kds;
++}
++
+ #ifdef CONFIG_DMA_SHARED_BUFFER
+ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+ 							struct device *dev);
diff --git a/midgard/r15p0/kernel/sconscript b/midgard/r15p0/kernel/sconscript
new file mode 100755
index 0000000..e6be64c
--- /dev/null
+++ b/midgard/r15p0/kernel/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+if env['backend'] == 'kernel' and int(env['kernel_modules']) == 1 and env['gpu'] != 'none':
+	SConscript('drivers/sconscript')
+
+	if Glob('tests/sconscript'):
+		SConscript('tests/sconscript')
diff --git a/midgard/r16p0/android/patches/K/android-k.sh b/midgard/r16p0/android/patches/K/android-k.sh
new file mode 100644
index 0000000..4e44d5e
--- /dev/null
+++ b/midgard/r16p0/android/patches/K/android-k.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+#
+# Copyright (C) 2014-2015 ARM Limited. All rights reserved.
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Android K script for LLVM 3.5 and above support
+#
+# This script applies two minor patches and checks-out libcxx and libcxxabi libraries
+# adding C++11 support in Android K required for LLVM3.5 and above.
+
+
+if [ -z "$ANDROID_BUILD_TOP" ]; then
+    echo "[ANDROID-PATCH] This script must be executed in an Android build environment"
+    echo "                ANDROID_BUILD_TOP is undefined"
+    echo "                please do \"source build/envseup.sh; lunch\"."
+    exit 1
+fi
+
+ANDROID_REPOSITORY_URL="https://android.googlesource.com/"
+
+REFERENCE_ARGUMENT=""
+
+# Proccess arguments. Valid arguments are:
+#        -u ARG      Override the Android repository url
+#        -r ARG      Provide a path to a reference git repository
+while getopts "u:r:" opt; do
+  case $opt in
+    u)
+      ANDROID_REPOSITORY_URL=$OPTARG
+      ;;
+
+    r)
+      REFERENCE_ARGUMENT="--reference "$OPTARG
+      ;;
+
+    \?)
+      echo "Invalid option: -$OPTARG" >&2
+      exit 1
+
+      ;;
+    :)
+      echo "Option -$OPTARG requires an argument." >&2
+      exit 1
+      ;;
+
+  esac
+done
+
+MIDGARD_DDK_WD=$(pwd)
+
+cd $ANDROID_BUILD_TOP/external
+
+#Checkout libcxx from Android Master required by LLVM due to c++11
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxx $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxx failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxx
+git checkout 27ae7cb782821a4f2d3813522ee411cd978bcd85 -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxx@27ae7cb782821a4f2d3813522ee411cd978bcd85 revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/libcxx.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching libcxx failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Checkout libcxxabi from Android Master required by LLVM due to c++11
+cd $ANDROID_BUILD_TOP/external
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxxabi $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxxabi failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxxabi
+git checkout 285d67f35f6044cf733091e36248405ca967c62c -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxxabi@285d67f35f6044cf733091e36248405ca967c62c revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+
+#Apply patch to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/bionics.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching bionics failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add locale support to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/locale.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Adding locale support failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add LLVM backend ARM fix to support -O0 compilation
+cd $ANDROID_BUILD_TOP/external/llvm/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/llvm.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Fixing ARM LLVM backend failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+echo "[ANDROID-PATCH] Patching completed successfully"
+cd $MIDGARD_DDK_WD
diff --git a/midgard/r16p0/android/patches/K/bionics.diff b/midgard/r16p0/android/patches/K/bionics.diff
new file mode 100644
index 0000000..b86cedf
--- /dev/null
+++ b/midgard/r16p0/android/patches/K/bionics.diff
@@ -0,0 +1,202 @@
+diff --git a/libc/Android.mk b/libc/Android.mk
+index 9610c14..d3dbe81 100644
+--- a/libc/Android.mk
++++ b/libc/Android.mk
+@@ -214,6 +215,7 @@ libc_bionic_src_:= \
+     bionic/libc_init_common.cpp \
+     bionic/libc_logging.cpp \
+     bionic/libgen.cpp \
++    bionic/locale.cpp \
+     bionic/mmap.cpp \
+     bionic/pthread_attr.cpp \
+     bionic/pthread_detach.cpp \
+@@ -232,7 +232,6 @@ libc_bionic_src_files := \
+     bionic/scandir.cpp \
+     bionic/sched_getaffinity.cpp \
+     bionic/__set_errno.cpp \
+-    bionic/setlocale.cpp \
+     bionic/signalfd.cpp \
+     bionic/sigwait.cpp \
+     bionic/statvfs.cpp \
+diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp
+index 50a3875..cc830b7 100644
+--- a/libc/bionic/wchar.cpp
++++ b/libc/bionic/wchar.cpp
+@@ -96,6 +96,7 @@ int swscanf(const wchar_t* /*s*/, const wchar_t* /*format*/, ... ) {
+
+ int iswalnum(wint_t wc) { return isalnum(wc); }
+ int iswalpha(wint_t wc) { return isalpha(wc); }
++int iswblank(wint_t wc) { return isblank(wc); }
+ int iswcntrl(wint_t wc) { return iscntrl(wc); }
+ int iswdigit(wint_t wc) { return isdigit(wc); }
+ int iswgraph(wint_t wc) { return isgraph(wc); }
+diff --git a/libc/include/assert.h b/libc/include/assert.h
+index 62470f5..361a5ff 100644
+--- a/libc/include/assert.h
++++ b/libc/include/assert.h
+@@ -60,6 +60,6 @@
+ #endif
+
+ __BEGIN_DECLS
+-__dead void __assert(const char *, int, const char *);
+-__dead void __assert2(const char *, int, const char *, const char *);
++__dead void __assert(const char *, int, const char *) __noreturn;
++__dead void __assert2(const char *, int, const char *, const char *) __noreturn;
+ __END_DECLS
+diff --git a/libc/include/ctype.h b/libc/include/ctype.h
+index 5557e31..a66df12 100644
+--- a/libc/include/ctype.h
++++ b/libc/include/ctype.h
+@@ -45,11 +45,14 @@
+ #define	_CTYPE_U	0x01
+ #define	_CTYPE_L	0x02
+ #define	_CTYPE_N	0x04
++#define	_CTYPE_D	0x04
+ #define	_CTYPE_S	0x08
+ #define	_CTYPE_P	0x10
+ #define	_CTYPE_C	0x20
+ #define	_CTYPE_X	0x40
+ #define	_CTYPE_B	0x80
++#define	_CTYPE_R	(_CTYPE_P|_CTYPE_U|_CTYPE_L|_CTYPE_D|_CTYPE_B)
++#define	_CTYPE_A	(_CTYPE_L|_CTYPE_U)
+
+ __BEGIN_DECLS
+
+diff --git a/libc/include/locale.h b/libc/include/locale.h
+index 65b5c7d..6989851 100644
+--- a/libc/include/locale.h
++++ b/libc/include/locale.h
+@@ -29,6 +29,7 @@
+ #define _LOCALE_H_
+
+ #include <sys/cdefs.h>
++#include <xlocale.h>
+
+ __BEGIN_DECLS
+
+@@ -43,17 +44,65 @@ enum {
+     LC_PAPER     = 7,
+     LC_NAME      = 8,
+     LC_ADDRESS   = 9,
+-
+     LC_TELEPHONE      = 10,
+     LC_MEASUREMENT    = 11,
+     LC_IDENTIFICATION = 12
+ };
+
+-extern char *setlocale(int category, const char *locale);
++#define LC_CTYPE_MASK          (1 << LC_CTYPE)
++#define LC_NUMERIC_MASK        (1 << LC_NUMERIC)
++#define LC_TIME_MASK           (1 << LC_TIME)
++#define LC_COLLATE_MASK        (1 << LC_COLLATE)
++#define LC_MONETARY_MASK       (1 << LC_MONETARY)
++#define LC_MESSAGES_MASK       (1 << LC_MESSAGES)
++#define LC_PAPER_MASK          (1 << LC_PAPER)
++#define LC_NAME_MASK           (1 << LC_NAME)
++#define LC_ADDRESS_MASK        (1 << LC_ADDRESS)
++#define LC_TELEPHONE_MASK      (1 << LC_TELEPHONE)
++#define LC_MEASUREMENT_MASK    (1 << LC_MEASUREMENT)
++#define LC_IDENTIFICATION_MASK (1 << LC_IDENTIFICATION)
++
++#define LC_ALL_MASK (LC_CTYPE_MASK | LC_NUMERIC_MASK | LC_TIME_MASK | LC_COLLATE_MASK | \
++                     LC_MONETARY_MASK | LC_MESSAGES_MASK | LC_PAPER_MASK | LC_NAME_MASK | \
++                     LC_ADDRESS_MASK | LC_TELEPHONE_MASK | LC_MEASUREMENT_MASK | \
++                     LC_IDENTIFICATION_MASK)
++
++struct lconv {
++    char* decimal_point;
++    char* thousands_sep;
++    char* grouping;
++    char* int_curr_symbol;
++    char* currency_symbol;
++    char* mon_decimal_point;
++    char* mon_thousands_sep;
++    char* mon_grouping;
++    char* positive_sign;
++    char* negative_sign;
++    char  int_frac_digits;
++    char  frac_digits;
++    char  p_cs_precedes;
++    char  p_sep_by_space;
++    char  n_cs_precedes;
++    char  n_sep_by_space;
++    char  p_sign_posn;
++    char  n_sign_posn;
++    char  int_p_cs_precedes;
++    char  int_p_sep_by_space;
++    char  int_n_cs_precedes;
++    char  int_n_sep_by_space;
++    char  int_p_sign_posn;
++    char  int_n_sign_posn;
++};
++
++struct lconv* localeconv(void);
++
++locale_t duplocale(locale_t);
++void freelocale(locale_t);
++locale_t newlocale(int, const char*, locale_t);
++char* setlocale(int, const char*);
++locale_t uselocale(locale_t);
+
+-/* Make libstdc++-v3 happy.  */
+-struct lconv { };
+-struct lconv *localeconv(void);
++#define LC_GLOBAL_LOCALE ((locale_t) -1L)
+
+ __END_DECLS
+
+diff --git a/libc/include/stdio.h b/libc/include/stdio.h
+index 23fc944..28685c7 100644
+--- a/libc/include/stdio.h
++++ b/libc/include/stdio.h
+@@ -469,7 +469,10 @@ int vsprintf(char *dest, const char *format, __va_list ap)
+ }
+
+ #if defined(__clang__)
+-#define snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(snprintf)
++    #define __wrap_snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++    #define snprintf(...) __wrap_snprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(3, 4)
+@@ -481,7 +484,10 @@ int snprintf(char *dest, size_t size, const char *format, ...)
+ #endif
+
+ #if defined(__clang__)
+-#define sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(sprintf)
++    #define __wrap_sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++    #define sprintf(...) __wrap_sprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(2, 3)
+diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h
+index 9fa84c1..188da69 100644
+--- a/libc/include/stdlib.h
++++ b/libc/include/stdlib.h
+@@ -68,6 +68,10 @@ static __inline__ float strtof(const char *nptr, char **endptr)
+     return (float)strtod(nptr, endptr);
+ }
+
++static __inline__ long double strtold(const char* s, char** end_ptr) {
++  return (long double)strtod(s, end_ptr);
++}
++
+ extern int atoi(const char *) __purefunc;
+ extern long atol(const char *) __purefunc;
+ extern long long atoll(const char *) __purefunc;
+diff --git a/libc/include/wchar.h b/libc/include/wchar.h
+index 76ac02c..c413806 100644
+--- a/libc/include/wchar.h
++++ b/libc/include/wchar.h
+@@ -77,6 +77,7 @@ extern int               fwprintf(FILE *, const wchar_t *, ...);
+ extern int               fwscanf(FILE *, const wchar_t *, ...);
+ extern int               iswalnum(wint_t);
+ extern int               iswalpha(wint_t);
++extern int               iswblank(wint_t);
+ extern int               iswcntrl(wint_t);
+ extern int               iswdigit(wint_t);
+ extern int               iswgraph(wint_t);
diff --git a/midgard/r16p0/android/patches/K/libcxx.diff b/midgard/r16p0/android/patches/K/libcxx.diff
new file mode 100644
index 0000000..a10d75c
--- /dev/null
+++ b/midgard/r16p0/android/patches/K/libcxx.diff
@@ -0,0 +1,13 @@
+diff --git a/include/cstdlib b/include/cstdlib
+index 152b891..e45a8fd 100644
+--- a/include/cstdlib
++++ b/include/cstdlib
+@@ -126,7 +126,7 @@ using ::realloc;
+ using ::abort;
+ using ::atexit;
+ using ::exit;
+-using ::_Exit;
++
+ using ::getenv;
+ using ::system;
+ using ::bsearch;
diff --git a/midgard/r16p0/android/patches/K/license.txt b/midgard/r16p0/android/patches/K/license.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/midgard/r16p0/android/patches/K/license.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/midgard/r16p0/android/patches/K/llvm.diff b/midgard/r16p0/android/patches/K/llvm.diff
new file mode 100644
index 0000000..d41d279
--- /dev/null
+++ b/midgard/r16p0/android/patches/K/llvm.diff
@@ -0,0 +1,14 @@
+diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
+index 8cdb853..868dd47 100644
+--- a/lib/Target/ARM/ARMInstrInfo.cpp
++++ b/lib/Target/ARM/ARMInstrInfo.cpp
+@@ -130,6 +130,10 @@ namespace {
+         MIB.addImm(0);
+       AddDefaultPred(MIB);
+
++      // Fix the GOT address by adding pc.
++      BuildMI(FirstMBB, MBBI, DL, TII.get(ARM::tPICADD), GlobalBaseReg)
++          .addReg(GlobalBaseReg).addImm(ARMPCLabelIndex);
++
+       return true;
+     }
diff --git a/midgard/r16p0/android/patches/K/locale.diff b/midgard/r16p0/android/patches/K/locale.diff
new file mode 100644
index 0000000..9c4c140
--- /dev/null
+++ b/midgard/r16p0/android/patches/K/locale.diff
@@ -0,0 +1,204 @@
+diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp
+new file mode 100644
+index 0000000..4fade84
+--- /dev/null
++++ b/libc/bionic/locale.cpp
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (C) 2008 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#include <locale.h>
++#include <pthread.h>
++#include <stdlib.h>
++
++// We currently support a single locale, the "C" locale (also known as "POSIX").
++
++struct __locale_t {
++  // Because we only support one locale, these are just tokens with no data.
++};
++
++static pthread_once_t gLocaleOnce = PTHREAD_ONCE_INIT;
++static lconv gLocale;
++
++static pthread_once_t gUselocaleKeyOnce = PTHREAD_ONCE_INIT;
++static pthread_key_t gUselocaleKey;
++
++static void __locale_init() {
++  gLocale.decimal_point = const_cast<char*>(".");
++
++  char* not_available = const_cast<char*>("");
++  gLocale.thousands_sep = not_available;
++  gLocale.grouping = not_available;
++  gLocale.int_curr_symbol = not_available;
++  gLocale.currency_symbol = not_available;
++  gLocale.mon_decimal_point = not_available;
++  gLocale.mon_thousands_sep = not_available;
++  gLocale.mon_grouping = not_available;
++  gLocale.positive_sign = not_available;
++  gLocale.negative_sign = not_available;
++
++  gLocale.int_frac_digits = CHAR_MAX;
++  gLocale.frac_digits = CHAR_MAX;
++  gLocale.p_cs_precedes = CHAR_MAX;
++  gLocale.p_sep_by_space = CHAR_MAX;
++  gLocale.n_cs_precedes = CHAR_MAX;
++  gLocale.n_sep_by_space = CHAR_MAX;
++  gLocale.p_sign_posn = CHAR_MAX;
++  gLocale.n_sign_posn = CHAR_MAX;
++  gLocale.int_p_cs_precedes = CHAR_MAX;
++  gLocale.int_p_sep_by_space = CHAR_MAX;
++  gLocale.int_n_cs_precedes = CHAR_MAX;
++  gLocale.int_n_sep_by_space = CHAR_MAX;
++  gLocale.int_p_sign_posn = CHAR_MAX;
++  gLocale.int_n_sign_posn = CHAR_MAX;
++}
++
++static void __uselocale_key_init() {
++  pthread_key_create(&gUselocaleKey, NULL);
++}
++
++static bool __is_supported_locale(const char* locale) {
++  return (strcmp(locale, "") == 0 || strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0);
++}
++
++static locale_t __new_locale() {
++  return reinterpret_cast<locale_t>(malloc(sizeof(__locale_t)));
++}
++
++lconv* localeconv() {
++  pthread_once(&gLocaleOnce, __locale_init);
++  return &gLocale;
++}
++
++locale_t duplocale(locale_t l) {
++  locale_t clone = __new_locale();
++  if (clone != NULL && l != LC_GLOBAL_LOCALE) {
++    *clone = *l;
++  }
++  return clone;
++}
++
++void freelocale(locale_t l) {
++  free(l);
++}
++
++locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/) {
++  // Is 'category_mask' valid?
++  if ((category_mask & ~LC_ALL_MASK) != 0) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  if (!__is_supported_locale(locale_name)) {
++    errno = ENOENT;
++    return NULL;
++  }
++
++  return __new_locale();
++}
++
++char* setlocale(int category, char const* locale_name) {
++  // Is 'category' valid?
++  if (category < LC_CTYPE || category > LC_IDENTIFICATION) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  // Caller just wants to query the current locale?
++  if (locale_name == NULL) {
++    return const_cast<char*>("C");
++  }
++
++  // Caller wants one of the mandatory POSIX locales?
++  if (__is_supported_locale(locale_name)) {
++    return const_cast<char*>("C");
++  }
++
++  // We don't support any other locales.
++  errno = ENOENT;
++  return NULL;
++}
++
++locale_t uselocale(locale_t new_locale) {
++  pthread_once(&gUselocaleKeyOnce, __uselocale_key_init);
++
++  locale_t old_locale = static_cast<locale_t>(pthread_getspecific(gUselocaleKey));
++
++  // If this is the first call to uselocale(3) on this thread, we return LC_GLOBAL_LOCALE.
++  if (old_locale == NULL) {
++    old_locale = LC_GLOBAL_LOCALE;
++  }
++
++  if (new_locale != NULL) {
++    pthread_setspecific(gUselocaleKey, new_locale);
++  }
++
++  return old_locale;
++}
+diff --git a/libc/include/xlocale.h b/libc/include/xlocale.h
+new file mode 100644
+index 0000000..f7eb8f4
+--- /dev/null
++++ b/libc/include/xlocale.h
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (C) 2014 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#ifndef _XLOCALE_H_
++#define _XLOCALE_H_
++
++/* If we just use void* here, GCC exposes that in error messages. */
++struct __locale_t;
++typedef struct __locale_t* locale_t;
++
++#endif /* _XLOCALE_H_ */
diff --git a/midgard/r16p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/midgard/r16p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100644
index 0000000..687b9f2
--- /dev/null
+++ b/midgard/r16p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,96 @@
+#
+# (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- snoop_enable_smc : SMC function ID to enable CCI snooping on the GPU port(s).
+- snoop_disable_smc : SMC function ID to disable CCI snooping on the GPU port(s).
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets power model parameters. Note that this model was designed for the Juno
+	        platform, and may not be suitable for other platforms. A structure containing :
+	- compatible: Should be arm,mali-simple-power-model
+	- voltage: Voltage at reference point. Specified in mV.
+	- frequency: Frequency at reference point. Specified in MHz.
+	- dynamic-power: Dynamic power at reference frequency and voltage. Specified in mW.
+	- static-power: Static power at reference frequency. Specified in mW.
+	- ts: An array containing coefficients for the temperature scaling factor.
+	  Used as : tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0], where T = temperature
+	- thermal-zone: A string identifying the thermal zone used for the GPU
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+- ipa-model : Sets the IPA model to be used for power management. GPU probe will fail if the
+	      model is not found in the registered models list. If no model is specified here,
+	      a gpu-id based model is picked if available, otherwise the default model is used.
+	- generic_ipa_model : Default model used on mali
+
+Example for a Mali-T602:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points = <
+		/* KHz   uV */
+		533000 1250000,
+		450000 1150000,
+		400000 1125000,
+		350000 1075000,
+		266000 1025000,
+		160000  925000,
+		100000  912500,
+	>;
+	power_model {
+		compatible = "arm,mali-simple-power-model";
+		voltage = <800>;
+		frequency = <500>;
+		static-power = <500>;
+		dynamic-power = <1500>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+};
diff --git a/midgard/r16p0/kernel/Documentation/dma-buf-test-exporter.txt b/midgard/r16p0/kernel/Documentation/dma-buf-test-exporter.txt
new file mode 100644
index 0000000..4208010
--- /dev/null
+++ b/midgard/r16p0/kernel/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/midgard/r16p0/kernel/Documentation/kds.txt b/midgard/r16p0/kernel/Documentation/kds.txt
new file mode 100644
index 0000000..639288c
--- /dev/null
+++ b/midgard/r16p0/kernel/Documentation/kds.txt
@@ -0,0 +1,268 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+==============================
+kds - Kernel Dependency System
+==============================
+
+Introduction
+------------
+kds provides a mechanism for clients to atomically lock down multiple abstract resources.
+This can be done either synchronously or asynchronously.
+Abstract resources is used to allow a set of clients to use kds to control access to any
+resource, an example is structured memory buffers.
+
+kds supports that buffer is locked for exclusive access and sharing of buffers.
+
+kds can be built as either a integrated feature of the kernel or as a module.
+It supports being compiled as a module both in-tree and out-of-tree.
+
+
+Concepts
+--------
+A core concept in kds is abstract resources.
+A kds resource is just an abstraction for some client object, kds doesn't care what it is.
+Typically EGL will consider UMP buffers as being a resource, thus each UMP buffer has
+a kds resource for synchronization to the buffer.
+
+kds allows a client to create and destroy the abstract resource objects.
+A new resource object is made available asap (it is just a simple malloc with some initializations),
+while destroy it requires some external synchronization.
+
+The other core concept in kds is consumer of resources.
+kds is requested to allow a client to consume a set of resources and the client will be notified when it can consume the resources.
+
+Exclusive access allows only one client to consume a resource.
+Shared access permits multiple consumers to acceess a resource concurrently.
+
+
+APIs
+----
+kds provides simple resource allocate and destroy functions.
+Clients use this to instantiate and control the lifetime of the resources kds manages.
+
+kds provides two ways to wait for resources:
+- Asynchronous wait: the client specifies a function pointer to be called when wait is over
+- Synchronous wait: Function blocks until access is gained.
+
+The synchronous API has a timeout for the wait.
+The call can early out if a signal is delivered.
+
+After a client is done consuming the resource kds must be notified to release the resources and let some other client take ownership.
+This is done via resource set release call.
+
+A Windows comparison:
+kds implements WaitForMultipleObjectsEx(..., bWaitAll = TRUE, ...) but also has an asynchronous version in addition.
+kds resources can be seen as being the same as NT object manager resources.
+
+Internals
+---------
+kds guarantees atomicity when a set of resources is operated on.
+This is implemented via a global resource lock which is taken by kds when it updates resource objects.
+
+Internally a resource in kds is a linked list head with some flags.
+
+When a consumer requests access to a set of resources it is queued on each of the resources.
+The link from the consumer to the resources can be triggered. Once all links are triggered
+the registered callback is called or the blocking function returns.
+A link is considered triggered if it is the first on the list of consumers of a resource,
+or if all the links ahead of it is marked as shared and itself is of the type shared.
+
+When the client is done consuming the consumer object is removed from the linked lists of
+the resources and a potential new consumer becomes the head of the resources.
+As we add and remove consumers atomically across all resources we can guarantee that
+we never introduces a A->B + B->A type of loops/deadlocks.
+
+
+kbase/base implementation
+-------------------------
+A HW job needs access to a set of shared resources.
+EGL tracks this and encodes the set along with the atom in the ringbuffer.
+EGL allocates a (k)base dep object to represent the dependency to the set of resources and encodes that along with the list of resources.
+This dep object is use to create a dependency from a job chain(atom) to the resources it needs to run.
+When kbase decodes the atom in the ringbuffer it finds the set of resources and calls kds to request all the needed resources.
+As EGL needs to know when the kds request is delivered a new base event object is needed: atom enqueued. This event is only delivered for atoms which uses kds.
+The callback kbase registers trigger the dependency object described which would trigger the existing JD system to release the job chain.
+When the atom is done kds resource set release is call to release the resources.
+
+EGL will typically use exclusive access to the render target, while all buffers used as input can be marked as shared.
+
+
+Buffer publish/vsync
+--------------------
+EGL will use a separate ioctl or DRM flip to request the flip.
+If the LCD driver is integrated with kds EGL can do these operations early.
+The LCD driver must then implement the ioctl or DRM flip to be asynchronous with kds async call.
+The LCD driver binds a kds resource to each virtual buffer (2 buffers in case of double-buffering).
+EGL will make a dependency to the target kds resource in the kbase atom.
+After EGL receives a atom enqueued event it can ask the LCD driver to pan to the target kds resource.
+When the atom is completed it'll release the resource and the LCD driver will get its callback.
+In the callback it'll load the target buffer into the DMA unit of the LCD hardware.
+The LCD driver will be the consumer of both buffers for a short period.
+The LCD driver will call kds resource set release on the previous on-screen buffer when the next vsync/dma read end is handled.
+
+===============================================
+Kernel driver kds client design considerations
+=============================================== 
+
+Number of resources
+--------------------
+
+The kds api allows a client to wait for ownership of a number of resources, where by the client does not take on ownership of any of the resources in the resource set 
+until all of the resources in the set are released.  Consideration must be made with respect to performance, as waiting on large number of resources will incur
+a greater overhead and may increase system latency.  It may be worth considering how independent each of the resources are, for example if the same set of resources 
+are waited upon by each of the clients, then it may be possible to aggregate these into one resource that each client waits upon.
+
+Clients with shared access
+---------------------------
+
+The kds api allows a number of clients to gain shared access to a resource simultaneously, consideration must be made with respect to performance, large numbers of clients
+wanting shared access can incur a performance penalty and may increase system latency, specifically when the clients are granted access. Having an excessively high 
+number of clients with shared access should be avoided, consideration should be made to the call back configuration being used. See Callbacks and Scenario 1 below.
+
+Callbacks
+----------
+
+Careful consideration must be made as to which callback type is most appropriate for kds clients, direct callbacks are called immediately from the context in which the
+ownership of the resource is passed to the next waiter in the list.  Where as when using deferred callbacks the callback is deferred and called from outside the context
+that is relinquishing ownership, while this reduces the latency in the releasing clients context it does incur a cost as there is more latency between a resource 
+becoming free and the new client owning the resource callback being executed.  
+
+Obviously direct callbacks have a performance advantage, as the call back is immediate and does not have to wait for the kernel to context switch to schedule in the 
+execution of the callback.  
+
+However as the callback is immediate and within the context that is granting ownership it is important that the callback perform the MINIMUM amount of work necessary, 
+long call backs could cause poor system latency.  Special care and attention must be taken if the direct callbacks can be called from IRQ handlers, such as when 
+kds_resource_set_release is called from an IRQ handler, in this case you have to avoid any calls that may sleep.  
+
+Deferred contexts have the advantage that the call backs are deferred until they are scheduled by the kernel, therefore they are allowed to call functions that may sleep
+and if scheduled from IRQ context not incur as much system latency as would be seen with direct callbacks from within the IRQ.
+
+Once the clients callback has been called, the client is considered to be owning the resource.  Within the callback the client may only need to perform a small amount of work
+before the client need to give up owner ship.  The kds_resource_release function may be called from with in the call back, but consideration must be made when using direct 
+callbacks, with both respect to execution time and stack usage. Consider the example in Scenario 2 with direct callbacks:
+
+Scenario 1 - Shared client access - direct callbacks:
+
+Resources: X
+Clients: A(S), B(S), C(S), D(S), E(E)
+where: (S) = shared user, (E) = exclusive
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+}
+
+Where <client> is either A,B,C,D
+                                        Queue   |Owner 
+1.  E Requests X exclusive                      |
+2.  E Owns     X exclusive                      |E
+3.  A Requests X shared                        A|E
+4.  B Requests X shared                       BA|E
+5.  C Requests X shared                      CBA|E
+6.  D Requests X shared                     DCBA|E
+7.  E Releases X                                |DCBA
+8.  A Owns X shared                             |DCBA
+9.  B Owns X shared                             |DCBA
+10. C Owns X shared                             |DCBA
+11. D Owns X shared                             |DCBA
+
+At point 7 it is important to note that when E releases X; A,B,C and D become the new shared owners of X and the call back for each of the client(A,B,C,D) triggered, so consideration
+must be made as to whether a direct or deferred callback is suitable, using direct callbacks would result in the call graph.
+
+Call graph when E releases X:
+    kds_resource_set_release( .. )
+        +->client_A_cb( .. )
+        +->client_B_cb( .. )
+        +->client_C_cb( .. )
+        +->client_D_cb( .. )
+
+
+Scenario 2 - Immediate resource release - direct callbacks:
+
+Resource: X
+Clients: A, B, C, D
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+    kds_resource_set_release( .. );
+}
+
+Where <client> is either A,B,C,D
+
+1. A Owns     X exclusive
+2. B Requests X exclusive	(direct callback)
+3. C Requests X exclusive	(direct callback)
+4. D Requests X exclusive	(direct callback)
+5. A Releases X
+
+Call graph when A releases X:
+    kds_resource_set_release( .. )
+        +->client_B_cb( .. )
+            +->kds_resource_set_release( .. )
+                +->client_C_cb( .. )
+                    +->kds_resource_set_release( .. )
+                        +->client_D_cb( .. )
+
+As can be seen when a client releases the resource, with direct call backs it is possible to create nested calls
+
+IRQ Considerations
+-------------------
+
+Usage of kds_resource_release in IRQ handlers should be carefully considered.
+
+Things to keep in mind:
+
+1.) Are you using direct or deferred callbacks?
+2.) How many resources are you releasing?
+3.) How many shared waiters are pending on the resource?
+
+Releasing ownership and wait cancellation
+------------------------------------------
+
+Client Wait Cancellation
+-------------------------
+
+It may be necessary in certain circumstances for the client to cancel the wait for kds resources for error handling, process termination etc.  Cancellation is 
+performed using kds_resource_set_release or kds_resource_set_release_sync using the rset that was received from kds_async_waitall, kds_resource_set_release_sync 
+being used for waits which are using deferred callbacks.
+
+It is possible that while the request to cancel the wait is being issued by the client, the client is granted access to the resources.  Normally after the client
+has taken ownership and finishes with that resource, it will release ownership to signal other waiters which are pending, this causes a race with the cancellation.
+To prevent KDS trying to remove a wait twice from the internal list and accessing memory that is potentially freed, it is very important that all releasers use the
+same rset pointer.  Here is a simplified example of bad usage that must be avoided in any client implementation:
+
+Senario 3 - Bad release from multiple contexts:
+
+	This scenaro is highlighting bad usage of the kds API
+
+	kds_resource_set * rset;
+	kds_resource_set * rset_copy;
+
+	kds_async_waitall( &rset, ... ... ... );
+
+	/* Don't do this */
+	rset_copy = rset;
+
+Context A:
+	kds_resource_set_release( &rset )
+
+Context B:
+	kds_resource_set_release( &rset_copy )
+
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_lock/sconscript b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/sconscript
new file mode 100644
index 0000000..99293c3
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2012, 2014, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+if env.KernelConfigEnabled("CONFIG_DMA_SHARED_BUFFER_USES_KDS"):
+    SConscript("src/sconscript")
+    if env["tests"] and Glob("tests/sconscript"):
+        SConscript("tests/sconscript")
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/Kbuild b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100644
index 0000000..68dad07
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/Makefile b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/Makefile
new file mode 100644
index 0000000..cf76132
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100644
index 0000000..a6f5ff1
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/dma-buf.h>
+#include <linux/kds.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	struct kds_resource **kds_resources;    /* List of KDS resources associated with buffers */
+	struct kds_resource_set *resource_set;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	wait_queue_head_t wait;
+	struct kds_callback cb;
+	struct kref refcount;
+	struct list_head link;
+	int count;
+} dma_buf_lock_resource;
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static void dma_buf_lock_kds_callback(void *param1, void *param2)
+{
+	dma_buf_lock_resource *resource = param1;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_kds_callback\n");
+#endif
+	atomic_set(&resource->locked, 1);
+
+	wake_up(&resource->wait);
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait))
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related KDS resources, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+	/* Allocate space to store kds_resources associated with dma_buf_fds */
+	size = sizeof(struct kds_resource *) * request->count;
+	resource->kds_resources = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->kds_resources)
+	{
+		kfree(resource->dma_bufs);
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource->kds_resources);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Get kds_resource associated with dma_buf */
+		resource->kds_resources[i] = get_dma_buf_kds_resource(resource->dma_bufs[i]);
+
+		if (NULL == resource->kds_resources[i])
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk("dma_buf_lock_dolock : dma_buf_fd %i dma_buf %X kds_resource %X\n", resource->list_of_dma_buf_fds[i],
+		       (unsigned int)resource->dma_bufs[i], (unsigned int)resource->kds_resources[i]);
+#endif
+	}
+
+	kds_callback_init(&resource->cb, 1, dma_buf_lock_kds_callback);
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops,
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = kds_async_waitall(&resource->resource_set,
+	                        &resource->cb, resource, NULL,
+	                        request->count,  &resource->exclusive,
+	                        resource->kds_resources);
+
+	if (IS_ERR_VALUE(ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	kds_callback_term(&resource->cb);
+
+	kds_resource_set_release(&resource->resource_set);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->kds_resources);
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	kfree(resource);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next,
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100644
index 0000000..e1b9348
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/sconscript b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/sconscript
new file mode 100644
index 0000000..b8724f1
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_lock/src/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')]
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] dma_buf_lock'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100644
index 0000000..56b9f86
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100644
index 0000000..974f0c2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/Makefile b/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100644
index 0000000..06e3d5c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100644
index 0000000..08ab49a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,703 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+	dmabuf = vma->vm_private_data;
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return NULL;
+
+	return kmap(alloc->pages[page_num]);
+}
+static void dma_buf_te_kunmap(struct dma_buf *buf,
+		unsigned long page_num, void *addr)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return;
+
+	kunmap(alloc->pages[page_num]);
+	return;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+	.kmap = dma_buf_te_kmap,
+	.kunmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (alloc_req.size > SG_MAX_SINGLE_ALLOC) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %lu pages",
+				__func__, alloc_req.size, SG_MAX_SINGLE_ALLOC);
+		goto invalid_size;
+	}
+#endif
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO,
+				DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+	{
+		struct dma_buf_export_info export_info = {
+			.exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+			.owner = THIS_MODULE,
+#endif
+			.ops = &dma_buf_te_ops,
+			.size = alloc->nr_pages << PAGE_SHIFT,
+			.flags = O_CLOEXEC | O_RDWR,
+			.priv = alloc,
+		};
+
+		dma_buf = dma_buf_export(&export_info);
+	}
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+	if (ret)
+		goto no_cpu_access;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				ret = -EPERM;
+				goto no_kmap;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		offset += sg_dma_len(sg);
+	}
+
+no_kmap:
+	dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/sconscript b/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
new file mode 100644
index 0000000..5f42141
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/dma_buf_test_exporter/sconscript
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+mod = env.BuildKernelModule('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', Glob('*.c'))
+env.KernelObjTarget('dma-buf-test-exporter', mod)
diff --git a/midgard/r16p0/kernel/drivers/base/kds/Kbuild b/midgard/r16p0/kernel/drivers/base/kds/Kbuild
new file mode 100644
index 0000000..a88acd8
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/kds/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_KDS) += kds.o
+obj-$(CONFIG_KDS_TEST) += kds_test.o
diff --git a/midgard/r16p0/kernel/drivers/base/kds/Kconfig b/midgard/r16p0/kernel/drivers/base/kds/Kconfig
new file mode 100644
index 0000000..5f96165
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/kds/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config KDS
+	tristate "Kernel dependency system"
+	help
+	  This option enables the generic kernel dependency system
diff --git a/midgard/r16p0/kernel/drivers/base/kds/Makefile b/midgard/r16p0/kernel/drivers/base/kds/Makefile
new file mode 100644
index 0000000..364d151
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/kds/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+CONFIG_KDS_TEST ?= n
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: kds
+
+kds:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS=m
+
+kds_test:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS_TEST=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r16p0/kernel/drivers/base/kds/kds.c b/midgard/r16p0/kernel/drivers/base/kds/kds.c
new file mode 100644
index 0000000..62612d4
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/kds/kds.c
@@ -0,0 +1,559 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/kds.h>
+#include <linux/kref.h>
+
+#include <asm/atomic.h>
+
+#define KDS_LINK_TRIGGERED (1u << 0)
+#define KDS_LINK_EXCLUSIVE (1u << 1)
+
+#define KDS_INVALID (void *)-2
+#define KDS_RESOURCE (void *)-1
+
+struct kds_resource_set
+{
+	unsigned long         num_resources;
+	unsigned long         pending;
+	struct kds_callback  *cb;
+	void                 *callback_parameter;
+	void                 *callback_extra_parameter;
+	struct list_head      callback_link;
+	struct work_struct    callback_work;
+	atomic_t              cb_queued;
+	/* This resource set will be freed when there are no pending
+	 * callbacks */
+	struct kref           refcount;
+
+	/* This is only initted when kds_waitall() is called. */
+	wait_queue_head_t     wake;
+
+	struct kds_link       resources[0];
+
+};
+
+static DEFINE_SPINLOCK(kds_lock);
+
+static void __resource_set_release(struct kref *ref)
+{
+	struct kds_resource_set *rset = container_of(ref,
+			struct kds_resource_set, refcount);
+
+	kfree(rset);
+}
+
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb)
+{
+	int ret = 0;
+
+	cb->direct = direct;
+	cb->user_cb = user_cb;
+
+	if (!direct)
+	{
+		cb->wq = alloc_workqueue("kds", WQ_UNBOUND | WQ_HIGHPRI, WQ_UNBOUND_MAX_ACTIVE);
+		if (!cb->wq)
+			ret = -ENOMEM;
+	}
+	else
+	{
+		cb->wq = NULL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kds_callback_init);
+
+void kds_callback_term(struct kds_callback *cb)
+{
+	if (!cb->direct)
+	{
+		BUG_ON(!cb->wq);
+		destroy_workqueue(cb->wq);
+	}
+	else
+	{
+		BUG_ON(cb->wq);
+	}
+}
+
+EXPORT_SYMBOL(kds_callback_term);
+
+static void kds_do_user_callback(struct kds_resource_set *rset)
+{
+	rset->cb->user_cb(rset->callback_parameter, rset->callback_extra_parameter);
+}
+
+static void kds_queued_callback(struct work_struct *work)
+{
+	struct kds_resource_set *rset;
+	rset = container_of(work, struct kds_resource_set, callback_work);
+
+	atomic_dec(&rset->cb_queued);
+
+	kds_do_user_callback(rset);
+}
+
+static void kds_callback_perform(struct kds_resource_set *rset)
+{
+	if (rset->cb->direct)
+		kds_do_user_callback(rset);
+	else
+	{
+		int result;
+
+		atomic_inc(&rset->cb_queued);
+
+		result = queue_work(rset->cb->wq, &rset->callback_work);
+		/* if we got a 0 return it means we've triggered the same rset twice! */
+		WARN_ON(!result);
+	}
+}
+
+void kds_resource_init(struct kds_resource * const res)
+{
+	BUG_ON(!res);
+	INIT_LIST_HEAD(&res->waiters.link);
+	res->waiters.parent = KDS_RESOURCE;
+}
+EXPORT_SYMBOL(kds_resource_init);
+
+int kds_resource_term(struct kds_resource *res)
+{
+	unsigned long lflags;
+	BUG_ON(!res);
+	spin_lock_irqsave(&kds_lock, lflags);
+	if (!list_empty(&res->waiters.link))
+	{
+		spin_unlock_irqrestore(&kds_lock, lflags);
+		printk(KERN_ERR "ERROR: KDS resource is still in use\n");
+		return -EBUSY;
+	}
+	res->waiters.parent = KDS_INVALID;
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	return 0;
+}
+EXPORT_SYMBOL(kds_resource_term);
+
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list)
+{
+	struct kds_resource_set *rset = NULL;
+	unsigned long lflags;
+	int i;
+	int triggered;
+	int err = -EFAULT;
+
+	BUG_ON(!pprset);
+	BUG_ON(!resource_list);
+	BUG_ON(!cb);
+
+	WARN_ONCE(number_resources > 10, "Waiting on a high numbers of resources may increase latency, see documentation.");
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+	{
+		return -ENOMEM;
+	}
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	rset->cb = cb;
+	rset->callback_parameter = callback_parameter;
+	rset->callback_extra_parameter = callback_extra_parameter;
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+	}
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		/* no-one else waiting? */
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	triggered = (rset->pending == 0);
+
+	/* set the pointer before the callback is called so it sees it */
+	*pprset = rset;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (triggered)
+	{
+		/* all resources obtained, trigger callback */
+		kds_callback_perform(rset);
+	}
+
+	return 0;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+	err = -EINVAL;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return err;
+}
+EXPORT_SYMBOL(kds_async_waitall);
+
+static void wake_up_sync_call(void *callback_parameter, void *callback_extra_parameter)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *)callback_parameter;
+	wake_up(wait);
+}
+
+static struct kds_callback sync_cb =
+{
+	wake_up_sync_call,
+	1,
+	NULL,
+};
+
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jiffies_timeout)
+{
+	struct kds_resource_set *rset;
+	unsigned long lflags;
+	int i;
+	int triggered = 0;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+		return rset;
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	init_waitqueue_head(&rset->wake);
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	if (rset->pending == 0)
+		triggered = 1;
+	else
+	{
+		rset->cb = &sync_cb;
+		rset->callback_parameter = &rset->wake;
+		rset->callback_extra_parameter = NULL;
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (!triggered)
+	{
+		long wait_res = 0;
+		long timeout = (jiffies_timeout == KDS_WAIT_BLOCKING) ?
+				MAX_SCHEDULE_TIMEOUT : jiffies_timeout;
+
+		if (timeout)
+		{
+			wait_res = wait_event_interruptible_timeout(rset->wake,
+					rset->pending == 0, timeout);
+		}
+
+		if ((wait_res == -ERESTARTSYS) || (wait_res == 0))
+		{
+			/* use \a kds_resource_set_release to roll back */
+			kds_resource_set_release(&rset);
+			return ERR_PTR(wait_res);
+		}
+	}
+	return rset;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL(kds_waitall);
+
+static void trigger_new_rset_owner(struct kds_resource_set *rset,
+		struct list_head *triggered)
+{
+	if (0 == --rset->pending) {
+		/* new owner now triggered, track for callback later */
+		kref_get(&rset->refcount);
+		list_add(&rset->callback_link, triggered);
+	}
+}
+
+static void __kds_resource_set_release_common(struct kds_resource_set *rset)
+{
+	struct list_head triggered = LIST_HEAD_INIT(triggered);
+	struct kds_resource_set *it;
+	unsigned long lflags;
+	int i;
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < rset->num_resources; i++)
+	{
+		struct kds_resource *resource;
+		struct kds_link *it = NULL;
+
+		/* fetch the previous entry on the linked list */
+		it = list_entry(rset->resources[i].link.prev, struct kds_link, link);
+		/* unlink ourself */
+		list_del(&rset->resources[i].link);
+
+		/* any waiters? */
+		if (list_empty(&it->link))
+			continue;
+
+		/* were we the head of the list? (head if prev is a resource) */
+		if (it->parent != KDS_RESOURCE)
+		{
+			if ((it->state & KDS_LINK_TRIGGERED) && !(it->state & KDS_LINK_EXCLUSIVE))
+			{
+				/*
+				 * previous was triggered and not exclusive, so we
+				 * trigger non-exclusive until end-of-list or first
+				 * exclusive
+				 */
+
+				struct kds_link *it_waiting = it;
+
+				list_for_each_entry(it, &it_waiting->link, link)
+				{
+					/* exclusive found, stop triggering */
+					if (it->state & KDS_LINK_EXCLUSIVE)
+						break;
+
+					it->state |= KDS_LINK_TRIGGERED;
+					/* a parent to update? */
+					if (it->parent != KDS_RESOURCE)
+						trigger_new_rset_owner(
+								it->parent,
+								&triggered);
+				}
+			}
+			continue;
+		}
+
+		/* we were the head, find the kds_resource */
+		resource = container_of(it, struct kds_resource, waiters);
+
+		/* we know there is someone waiting from the any-waiters test above */
+
+		/* find the head of the waiting list */
+		it = list_first_entry(&resource->waiters.link, struct kds_link, link);
+
+		/* new exclusive owner? */
+		if (it->state & KDS_LINK_EXCLUSIVE)
+		{
+			/* link now triggered */
+			it->state |= KDS_LINK_TRIGGERED;
+			/* a parent to update? */
+			trigger_new_rset_owner(it->parent, &triggered);
+		}
+		/* exclusive releasing ? */
+		else if (rset->resources[i].state & KDS_LINK_EXCLUSIVE)
+		{
+			/* trigger non-exclusive until end-of-list or first exclusive */
+			list_for_each_entry(it, &resource->waiters.link, link)
+			{
+				/* exclusive found, stop triggering */
+				if (it->state & KDS_LINK_EXCLUSIVE)
+					break;
+
+				it->state |= KDS_LINK_TRIGGERED;
+				/* a parent to update? */
+				trigger_new_rset_owner(it->parent, &triggered);
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	while (!list_empty(&triggered))
+	{
+		it = list_first_entry(&triggered, struct kds_resource_set, callback_link);
+		list_del(&it->callback_link);
+		kds_callback_perform(it);
+
+		/* Free the resource set if no callbacks pending */
+		kref_put(&it->refcount, &__resource_set_release);
+	}
+}
+
+void kds_resource_set_release(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+	int queued;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * Caller is responsible for guaranteeing that callback work is not
+	 * pending (i.e. its running or completed) prior to calling release.
+	 */
+	queued = atomic_read(&rset->cb_queued);
+	BUG_ON(queued);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release);
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * In the case of a kds async wait cancellation ensure the deferred
+	 * call back does not get scheduled if a trigger fired at the same time
+	 * to release the wait.
+	 */
+	cancel_work_sync(&rset->callback_work);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release_sync);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r16p0/kernel/drivers/base/kds/sconscript b/midgard/r16p0/kernel/drivers/base/kds/sconscript
new file mode 100644
index 0000000..75add37
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/kds/sconscript
@@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+#Android uses sync_pt to accomplish KDS functionality.
+#Midgard KDS is not used by Android
+if env['os'] == 'android':
+    Return()
+
+if Glob('tests/sconscript'):
+    SConscript('tests/sconscript')
+
+# If KDS is built into the kernel already we skip building the module ourselves
+build_kds = not env.KernelConfigEnabled("CONFIG_KDS")
+
+# Build KDS module
+if build_kds:
+    kds_mod = env.BuildKernelModule('$STATIC_LIB_PATH/kds.ko', ['kds.c'],
+                                    make_args = ['kds'])
+    env.KernelObjTarget('kds', kds_mod)
+
+# Build KDS test module
+if int(env['unit']) == 1:
+    kds_test_mod = env.BuildKernelModule('$STATIC_LIB_PATH/kds_test.ko',
+                                         ['kds_test.c'],
+                                         make_args = ['kds_test'])
+    env.KernelObjTarget('kds', kds_test_mod)
+    if build_kds:
+        env.Depends(kds_test_mod, kds_mod)
diff --git a/midgard/r16p0/kernel/drivers/base/sconscript b/midgard/r16p0/kernel/drivers/base/sconscript
new file mode 100644
index 0000000..84de8c4
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/sconscript
@@ -0,0 +1,36 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import( 'env' )
+
+if Glob('bus_logger/sconscript'):
+	if env['buslog'] == '1':
+		SConscript('bus_logger/sconscript')
+
+if Glob('mali_fpga_sysctl/sconscript'):
+	SConscript('mali_fpga_sysctl/sconscript')
+
+if Glob('dma_buf_lock/sconscript'):
+	SConscript('dma_buf_lock/sconscript')
+
+if Glob('kds/sconscript'):
+	SConscript('kds/sconscript')
+
+if Glob('ump/sconscript'):
+	SConscript('ump/sconscript')
+
+if Glob('dma_buf_test_exporter/sconscript'):
+	SConscript('dma_buf_test_exporter/sconscript')
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/Kbuild b/midgard/r16p0/kernel/drivers/base/ump/Kbuild
new file mode 100644
index 0000000..2bbdba2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += src/
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/Kconfig b/midgard/r16p0/kernel/drivers/base/ump/Kconfig
new file mode 100644
index 0000000..f7451e6
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config UMP
+	tristate "Enable Unified Memory Provider (UMP) support"
+	default n
+    help
+	  Enable this option to build support for the ARM UMP module.
+	  UMP can be used by the Mali T6xx module to improve performance
+	  by reducing the copying of data by sharing memory.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate one module, called ump.
diff --git a/midgard/r16p0/kernel/drivers/base/ump/docs/Doxyfile b/midgard/r16p0/kernel/drivers/base/ump/docs/Doxyfile
new file mode 100644
index 0000000..fbec8eb
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/docs/Doxyfile
@@ -0,0 +1,125 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/include/linux/ump-common.h ../../kernel/include/linux/ump.h
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           += ../../kernel/drivers/base/ump
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           +=
diff --git a/midgard/r16p0/kernel/drivers/base/ump/docs/sconscript b/midgard/r16p0/kernel/drivers/base/ump/docs/sconscript
new file mode 100644
index 0000000..521278d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/docs/sconscript
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+doxygen_sources = [
+	'Doxyfile',
+	Glob('*.png'),
+	Glob('../*.h'),
+	Glob('../src/*.h') ]
+
+if env['doc'] == '1':
+        doxygen_target = env.Command('doxygen/html/index.html', doxygen_sources,
+                                     ['cd ${SOURCE.dir} && doxygen'])
+        env.Clean(doxygen_target, './doxygen')
+
+        Alias('doxygen', doxygen_target)
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/example_kernel_api.c b/midgard/r16p0/kernel/drivers/base/ump/example_kernel_api.c
new file mode 100644
index 0000000..858dd8b
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/example_kernel_api.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Example routine to display information about an UMP allocation
+ * The routine takes an secure_id which can come from a different kernel module
+ * or from a client application (i.e. an ioctl).
+ * It creates a ump handle from the secure id (which validates the secure id)
+ * and if successful dumps the physical memory information.
+ * It follows the API and pins the memory while "using" the physical memory.
+ * Finally it calls the release function to indicate it's finished with the handle.
+ *
+ * If the function can't look up the handle it fails with return value -1.
+ * If the testy succeeds then it return 0.
+ * */
+
+static int display_ump_memory_information(ump_secure_id secure_id)
+{
+	const ump_dd_physical_block_64 * ump_blocks = NULL;
+	ump_dd_handle ump_mem;
+	uint64_t nr_blocks;
+	int i;
+	ump_alloc_flags flags;
+
+	/* get a handle from the secure id */
+	ump_mem = ump_dd_from_secure_id(secure_id);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE == ump_mem)
+	{
+		/* invalid ID received */
+		return -1;
+	}
+
+	/* at this point we know we've added a reference to the ump allocation, so we must release it with ump_dd_release */
+
+	ump_dd_phys_blocks_get_64(ump_mem, &nr_blocks, &ump_blocks);
+	flags = ump_dd_allocation_flags_get(ump_mem);
+
+	printf("UMP allocation with secure ID %u consists of %zd physical block(s):\n", secure_id, nr_blocks);
+
+	for(i=0; i<nr_blocks; ++i)
+	{
+		printf("\tBlock %d: 0x%08zX size 0x%08zX\n", i, ump_blocks[i].addr, ump_blocks[i].size);
+	}
+
+	printf("and was allocated using the following bitflag combo:  0x%lX\n", flags);
+
+	ump_dd_release(ump_mem);
+
+	return 0;
+}
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/example_user_api.c b/midgard/r16p0/kernel/drivers/base/ump/example_user_api.c
new file mode 100644
index 0000000..d143a64
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/example_user_api.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <ump/ump.h>
+#include <memory.h>
+#include <stdio.h>
+
+/*
+ * Example routine to exercise the user space UMP api.
+ * This routine initializes the UMP api and allocates some CPU+device X memory.
+ * No usage hints are given, so the driver will use the default cacheability policy.
+ * With the allocation it creates a duplicate handle and plays with the reference count.
+ * Then it simulates interacting with a device and contains pseudo code for the device.
+ *
+ * If any error is detected correct cleanup will be performed and -1 will be returned.
+ * If successful then 0 will be returned.
+ */
+
+static int test_ump_user_api(void)
+{
+	/* This is the size we try to allocate*/
+	const size_t alloc_size = 4096;
+
+	ump_handle h = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_copy = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+	void * mapping = NULL;
+
+	ump_result ump_api_res;
+	int result = -1;
+
+	ump_secure_id id;
+
+	size_t size_returned;
+
+	ump_api_res = ump_open();
+	if (UMP_OK != ump_api_res)
+	{
+		/* failed to open an ump session */
+		/* early out */
+		return -1;
+	}
+
+	h = ump_allocate_64(alloc_size, UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | UMP_PROT_X_RD | UMP_PROT_X_WR);
+	/* the refcount is now 1 */
+	if (UMP_INVALID_MEMORY_HANDLE == h)
+	{
+		/* allocation failure */
+		goto cleanup;
+	}
+
+	/* this is how we could share this allocation with another process */
+
+	/* in process A: */
+	id = ump_secure_id_get(h);
+	/* still ref count 1 */
+	/* send the id to process B */
+
+	/* in process B: */
+	/* receive the id from A */
+	h_clone = ump_from_secure_id(id);
+	/* the ref count of the allocation is now 2 (one from each handle to it) */
+	/* do something ... */
+	/* release our clone */
+	ump_release(h_clone); /* safe to call even if ump_from_secure_id failed */
+	h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+
+	/* a simple save-for-future-use logic inside the driver would just copy the handle (but add a ref manually too!) */
+	/*
+	 * void assign_memory_to_job(h)
+	 * {
+	  */
+	h_copy = h;
+	ump_retain(h_copy); /* manual retain needed as we just assigned the handle, now 2 */
+	/*
+	 * }
+	 *
+	 * void job_completed(void)
+	 * {
+	 */
+	 ump_release(h_copy); /* normal handle release as if we got via an ump_allocate */
+	 h_copy = UMP_INVALID_MEMORY_HANDLE;
+	 /*
+	 * }
+	 */
+	
+	/* we're now back at ref count 1, and only h is a valid handle */
+	/* enough handle duplication show-off, let's play with the contents instead */
+
+	mapping = ump_map(h, 0, alloc_size);
+	if (NULL == mapping)
+	{
+		/* mapping failure, either out of address space or some other error */
+		goto cleanup;
+	}
+
+	memset(mapping, 0, alloc_size);
+
+	/* let's pretend we're going to start some hw device on this buffer and read the result afterwards */
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN, mapping, alloc_size);
+ 	/*
+		device cache invalidate
+
+		memory barrier
+
+		start device
+
+		memory barrier
+
+		wait for device
+
+		memory barrier
+
+		device cache clean
+
+		memory barrier
+	*/
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN_AND_INVALIDATE, mapping, alloc_size);
+
+	/* we could now peek at the result produced by the hw device, which is now accessible via our mapping */
+
+	/* unmap the buffer when we're done with it */
+	ump_unmap(h, mapping, alloc_size);
+
+	result = 0;
+
+cleanup:
+	ump_release(h);
+	h = UMP_INVALID_MEMORY_HANDLE;
+
+	ump_close();
+
+	return result;
+}
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/sconscript b/midgard/r16p0/kernel/drivers/base/ump/sconscript
new file mode 100644
index 0000000..b4aa8b6
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if Glob('src/sconscript') and int(env['ump']) == 1:
+	SConscript( 'src/sconscript' )
+	SConscript( 'docs/sconscript' )
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/Kbuild b/midgard/r16p0/kernel/drivers/base/ump/src/Kbuild
new file mode 100644
index 0000000..de6d307
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/Kbuild
@@ -0,0 +1,50 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Paths required for build
+UMP_PATH = $(src)/../..
+UMP_DEVICEDRV_PATH = $(src)/.
+
+# Set up defaults if not defined by the user
+MALI_UNIT_TEST ?= 0
+
+SRC :=\
+	common/ump_kernel_core.c \
+	common/ump_kernel_descriptor_mapping.c \
+	linux/ump_kernel_linux.c \
+	linux/ump_kernel_linux_mem.c
+
+UNIT_TEST_DEFINES=
+ifeq ($(MALI_UNIT_TEST), 1)
+	MALI_DEBUG ?= 1
+
+	UNIT_TEST_DEFINES = -DMALI_UNIT_TEST=1 \
+	                    -DMALI_DEBUG=$(MALI_DEBUG)
+endif
+
+# Use our defines when compiling
+ccflags-y += -I$(UMP_PATH) -I$(UMP_DEVICEDRV_PATH) $(UNIT_TEST_DEFINES)
+
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_UMP) += ump.o
+ifeq ($(CONFIG_ION),y)
+ccflags-y += -I$(srctree)/drivers/staging/android/ion -I$(srctree)/include/linux
+obj-$(CONFIG_UMP) += imports/ion/ump_kernel_import_ion.o
+endif
+
+# Tell the Linux build system to enable building of our .c files
+ump-y := $(SRC:.c=.o)
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/Makefile b/midgard/r16p0/kernel/drivers/base/ump/src/Makefile
new file mode 100644
index 0000000..45428ad
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/Makefile
@@ -0,0 +1,81 @@
+#
+# (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifneq ($(KBUILD_EXTMOD),)
+include $(KBUILD_EXTMOD)/Makefile.common
+else
+include ./Makefile.common
+endif
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+RELATIVE_ROOT=../../../../..
+ROOT = $(CURDIR)/$(RELATIVE_ROOT)
+
+EXTRA_CFLAGS=-I$(CURDIR)/../../../../include
+
+ifeq ($(MALI_UNIT_TEST),1)
+	EXTRA_CFLAGS += -DMALI_UNIT_TEST=$(MALI_UNIT_TEST)
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+CONFIG ?= $(ARCH)
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+# Validate selected config
+ifneq ($(shell [ -d arch-$(CONFIG) ] && [ -f arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(shell pwd))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L arch ] && rm arch)
+$(shell ln -sf arch-$(CONFIG) arch)
+$(shell touch arch/config.h)
+endif
+
+EXTRA_SYMBOLS=
+
+ifeq ($(MALI_UNIT_TEST),1)
+	KBASE_PATH=$(ROOT)/kernel/drivers/gpu/arm/midgard
+	EXTRA_SYMBOLS+=$(KBASE_PATH)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+KDS_PATH=$(ROOT)/kernel/drivers/base/kds
+EXTRA_SYMBOLS+=$(KDS_PATH)/Module.symvers
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="$(EXTRA_CFLAGS) $(SCONS_CFLAGS)" CONFIG_UMP=m KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/Makefile.common b/midgard/r16p0/kernel/drivers/base/ump/src/Makefile.common
new file mode 100644
index 0000000..f29a4c1
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/Makefile.common
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2008-2010, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+SRC = $(UMP_FILE_PREFIX)/common/ump_kernel_core.c \
+      $(UMP_FILE_PREFIX)/common/ump_kernel_descriptor_mapping.c
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/arch-arm/config.h b/midgard/r16p0/kernel/drivers/base/ump/src/arch-arm/config.h
new file mode 100644
index 0000000..152d98f
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/arch-arm/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/arch-arm64/config.h b/midgard/r16p0/kernel/drivers/base/ump/src/arch-arm64/config.h
new file mode 100644
index 0000000..cfb14ca
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/arch-arm64/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c b/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
new file mode 100644
index 0000000..07aa077
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_core.c
@@ -0,0 +1,756 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* module headers */
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+/* local headers */
+#include <common/ump_kernel_core.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#define UMP_FLAGS_RANGE ((UMP_PROT_SHAREABLE<<1) - 1u)
+
+static umpp_device device;
+
+ump_result umpp_core_constructor(void)
+{
+	mutex_init(&device.secure_id_map_lock);
+	device.secure_id_map = umpp_descriptor_mapping_create(UMP_EXPECTED_IDS, UMP_MAX_IDS);
+	if (NULL != device.secure_id_map)
+	{
+		if (UMP_OK == umpp_device_initialize())
+		{
+			return UMP_OK;
+		}
+		umpp_descriptor_mapping_destroy(device.secure_id_map);
+	}
+	mutex_destroy(&device.secure_id_map_lock);
+
+	return UMP_ERROR;
+}
+
+void umpp_core_destructor(void)
+{
+	umpp_device_terminate();
+	umpp_descriptor_mapping_destroy(device.secure_id_map);
+	mutex_destroy(&device.secure_id_map_lock);
+}
+
+umpp_session *umpp_core_session_start(void)
+{
+	umpp_session * session;
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (NULL != session)
+	{
+		mutex_init(&session->session_lock);
+
+		INIT_LIST_HEAD(&session->memory_usage);
+
+		/* try to create import client session, not a failure if they fail to initialize */
+		umpp_import_handlers_init(session);
+	}
+
+	return session;
+}
+
+void umpp_core_session_end(umpp_session *session)
+{
+	umpp_session_memory_usage * usage, *_usage;
+	UMP_ASSERT(session);
+
+	list_for_each_entry_safe(usage, _usage, &session->memory_usage, link)
+	{
+		printk(KERN_WARNING "UMP: Memory usage cleanup, releasing secure ID %d\n", ump_dd_secure_id_get(usage->mem));
+		ump_dd_release(usage->mem);
+		kfree(usage);
+
+	}
+
+	/* we should now not hold any imported memory objects,
+	 * detatch all import handlers */
+	umpp_import_handlers_term(session);
+
+	mutex_destroy(&session->session_lock);
+	kfree(session);
+}
+
+ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	umpp_allocation * alloc;
+	int i;
+
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD ) ) == 0 ||
+	    ( flags & (UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read and one write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL | __GFP_HARDWALL);
+
+	if (NULL == alloc)
+		goto out1;
+
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+	alloc->size = size;
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	if (0 != umpp_phys_commit(alloc))
+	{
+		goto out2;
+	}
+
+	/* all set up, allocate an ID for it */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	return alloc;
+
+out3:
+	umpp_phys_free(alloc);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+uint64_t ump_dd_size_get_64(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->size;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_size_get(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->size <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->size;
+}
+
+ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->id;
+}
+
+#ifdef CONFIG_KDS
+struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return &alloc->kds_res;
+}
+#endif
+
+ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	return alloc->flags;
+}
+
+ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id)
+{
+	umpp_allocation * alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+
+	mutex_lock(&device.secure_id_map_lock);
+
+	if (0 == umpp_descriptor_mapping_lookup(device.secure_id_map, secure_id, (void**)&alloc))
+	{
+		if (NULL != alloc->filter_func)
+		{
+			if (!alloc->filter_func(secure_id, alloc, alloc->callback_data))
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /* the filter denied access */
+			}
+		}
+
+		/* check permission to access it */
+		if ((UMP_DD_INVALID_MEMORY_HANDLE != alloc) && !(alloc->flags & UMP_PROT_SHAREABLE))
+		{
+			if (alloc->owner != get_current()->pid)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /*no rights for the current process*/
+			}
+		}
+
+		if (UMP_DD_INVALID_MEMORY_HANDLE != alloc)
+		{
+			if( ump_dd_retain(alloc) != UMP_DD_SUCCESS)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+			}
+		}
+	}
+	mutex_unlock(&device.secure_id_map_lock);
+
+	return alloc;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	return ump_dd_from_secure_id(secure_id);
+}
+
+int ump_dd_retain(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* check for overflow */
+	while(1)
+	{
+		int refcnt = atomic_read(&alloc->refcount);
+		if (refcnt + 1 > 0)
+		{
+			if(atomic_cmpxchg(&alloc->refcount, refcnt, refcnt + 1) == refcnt)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			return -EBUSY;
+		}
+	}
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_add(ump_dd_handle mem)
+{
+	ump_dd_retain(mem);
+}
+
+
+void ump_dd_release(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+	uint32_t new_cnt;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* secure the id for lookup while releasing */
+	mutex_lock(&device.secure_id_map_lock);
+
+	/* do the actual release */
+	new_cnt = atomic_sub_return(1, &alloc->refcount);
+	if (0 == new_cnt)
+	{
+		/* remove from the table as this was the last ref */
+		umpp_descriptor_mapping_remove(device.secure_id_map, alloc->id);
+	}
+
+	/* release the lock as early as possible */
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if (0 != new_cnt)
+	{
+		/* exit if still have refs */
+		return;
+	}
+
+	UMP_ASSERT(list_empty(&alloc->map_list));
+
+#ifdef CONFIG_KDS
+	if (kds_resource_term(&alloc->kds_res))
+	{
+		printk(KERN_ERR "ump_dd_release: kds_resource_term failed,"
+				"unable to release UMP allocation\n");
+		return;
+	}
+#endif
+	/* cleanup */
+	if (NULL != alloc->final_release_func)
+	{
+		alloc->final_release_func(alloc, alloc->callback_data);
+	}
+
+	if (0 == (alloc->management_flags & UMP_MGMT_EXTERNAL))
+	{
+		umpp_phys_free(alloc);
+	}
+	else
+	{
+		kfree(alloc->block_array);
+	}
+
+	mutex_destroy(&alloc->map_list_lock);
+
+	kfree(alloc);
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_release(ump_dd_handle mem)
+{
+	ump_dd_release(mem);
+}
+
+void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(pCount);
+	UMP_ASSERT(pArray);
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+	*pCount = alloc->blocksCount;
+	*pArray = alloc->block_array;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks)
+{
+	const umpp_allocation * alloc;
+	unsigned long i;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	if((uint64_t)num_blocks != alloc->blocksCount)
+	{
+		return UMP_DD_INVALID;
+	}
+
+	for( i = 0; i < num_blocks; i++)
+	{
+		UMP_ASSERT(alloc->block_array[i].addr <= UMP_UINT32_MAX);
+		UMP_ASSERT(alloc->block_array[i].size <= UMP_UINT32_MAX);
+
+		blocks[i].addr = (unsigned long)alloc->block_array[i].addr;
+		blocks[i].size = (unsigned long)alloc->block_array[i].size;
+	}
+
+	return UMP_DD_SUCCESS;
+}
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(block);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	UMP_ASSERT(alloc->block_array[index].addr <= UMP_UINT32_MAX);
+	UMP_ASSERT(alloc->block_array[index].size <= UMP_UINT32_MAX);
+
+	block->addr = (unsigned long)alloc->block_array[index].addr;
+	block->size = (unsigned long)alloc->block_array[index].size;
+
+	return UMP_DD_SUCCESS;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->blocksCount <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->blocksCount;
+}
+
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void *uaddr, size_t size)
+{
+	umpp_cpu_mapping *map;
+
+	void *target_first = uaddr;
+	void *target_last = (void*)((uintptr_t)uaddr - 1 + size);
+
+	if (target_last < target_first) /* wrapped */
+	{
+		return NULL;
+	}
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if ( map->vaddr_start <= target_first &&
+		   (void*)((uintptr_t)map->vaddr_start + (map->nr_pages << PAGE_SHIFT) - 1) >= target_last)
+		{
+			goto out;
+		}
+	}
+	map = NULL;
+out:
+	mutex_unlock(&alloc->map_list_lock);
+
+	return map;
+}
+
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map)
+{
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(map);
+	mutex_lock(&alloc->map_list_lock);
+	list_add(&map->link, &alloc->map_list);
+	mutex_unlock(&alloc->map_list_lock);
+}
+
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target)
+{
+	umpp_cpu_mapping * map;
+
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(target);
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if (map == target)
+		{
+			list_del(&target->link);
+			kfree(target);
+			mutex_unlock(&alloc->map_list_lock);
+			return;
+		}
+	}
+
+	/* not found, error */
+	UMP_ASSERT(0);
+}
+
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const  block_index, uint64_t * const block_internal_offset)
+{
+	uint64_t i;
+
+	for (i = 0 ; i < alloc->blocksCount; i++)
+	{
+		if (offset < alloc->block_array[i].size)
+		{
+			/* found the block_array element containing this offset */
+			*block_index = i;
+			*block_internal_offset = offset;
+			return 0;
+		}
+		offset -= alloc->block_array[i].size;
+	}
+
+	return -ENXIO;
+}
+
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size)
+{
+	umpp_allocation * alloc;
+	void *vaddr;
+	umpp_cpu_mapping * mapping;
+	uint64_t virt_page_off; /* offset of given address from beginning of the virtual mapping */
+	uint64_t phys_page_off; /* offset of the virtual mapping from the beginning of the physical buffer */
+	uint64_t page_count; /* number of pages to sync */
+	uint64_t i;
+	uint64_t block_idx;
+	uint64_t block_offset;
+	uint64_t paddr;
+
+	UMP_ASSERT((UMP_MSYNC_CLEAN == op) || (UMP_MSYNC_CLEAN_AND_INVALIDATE == op));
+
+	alloc = (umpp_allocation*)mem;
+	vaddr = (void*)(uintptr_t)address;
+
+	if((alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+	{
+		/* mapping is not cached */
+		return;
+	}
+
+	mapping = umpp_dd_find_enclosing_mapping(alloc, vaddr, size);
+	if (NULL == mapping)
+	{
+		printk(KERN_WARNING "UMP: Illegal cache sync address %lx\n", (uintptr_t)vaddr);
+		return; /* invalid pointer or size causes out-of-bounds */
+	}
+
+	/* we already know that address + size don't wrap around as umpp_dd_find_enclosing_mapping didn't fail */
+	page_count = ((((((uintptr_t)address + size - 1) & PAGE_MASK) - ((uintptr_t)address & PAGE_MASK))) >> PAGE_SHIFT) + 1;
+	virt_page_off = (vaddr - mapping->vaddr_start) >> PAGE_SHIFT;
+	phys_page_off = mapping->page_off;
+
+	if (umpp_dd_find_start_block(alloc, (virt_page_off + phys_page_off) << PAGE_SHIFT, &block_idx, &block_offset))
+	{
+		/* should not fail as a valid mapping was found, so the phys mem must exists */
+		printk(KERN_WARNING "UMP: Unable to find physical start block with offset %llx\n", virt_page_off + phys_page_off);
+		UMP_ASSERT(0);
+		return;
+	}
+
+	paddr = alloc->block_array[block_idx].addr + block_offset + (((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1));
+
+	for (i = 0; i < page_count; i++)
+	{
+		size_t offset = ((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1);
+		size_t sz = min((size_t)PAGE_SIZE - offset, size);
+
+		/* check if we've overrrun the current block, if so move to the next block */
+		if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+		{
+			block_idx++;
+			UMP_ASSERT(block_idx < alloc->blocksCount);
+			paddr = alloc->block_array[block_idx].addr;
+		}
+
+		if (UMP_MSYNC_CLEAN == op)
+		{
+			ump_sync_to_memory(paddr, vaddr, sz);
+		}
+		else /* (UMP_MSYNC_CLEAN_AND_INVALIDATE == op) already validated on entry */
+		{
+			ump_sync_to_cpu(paddr, vaddr, sz);
+		}
+
+		/* advance to next page  */
+		vaddr = (void*)((uintptr_t)vaddr + sz);
+		size -= sz;
+		paddr += sz;
+	}
+}
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	uint64_t size = 0;
+	uint64_t i;
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		size += blocks[i].size;
+	}
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD
+	    | UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read or write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc),__GFP_HARDWALL | GFP_KERNEL);
+
+	if (NULL == alloc)
+	{
+		goto out1;
+	}
+
+	alloc->block_array = kzalloc(sizeof(ump_dd_physical_block_64) * num_blocks,__GFP_HARDWALL | GFP_KERNEL);
+	if (NULL == alloc->block_array)
+	{
+		goto out2;
+	}
+
+	memcpy(alloc->block_array, blocks, sizeof(ump_dd_physical_block_64) * num_blocks);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+	alloc->size = size;
+	alloc->blocksCount = num_blocks;
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	/* all set up, allocate an ID */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	alloc->management_flags |= UMP_MGMT_EXTERNAL;
+
+	return alloc;
+
+out3:
+	kfree(alloc->block_array);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+
+/*
+ * UMP v1 API
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks)
+{
+	ump_dd_handle mem;
+	ump_dd_physical_block_64 *block_64_array;
+	ump_alloc_flags flags = UMP_V1_API_DEFAULT_ALLOCATION_FLAGS;
+	unsigned long i;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	block_64_array = kzalloc(num_blocks * sizeof(*block_64_array), __GFP_HARDWALL | GFP_KERNEL);
+
+	if(block_64_array == NULL)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/* copy physical blocks */
+	for( i = 0; i < num_blocks; i++)
+	{
+		block_64_array[i].addr = blocks[i].addr;
+		block_64_array[i].size = blocks[i].size;
+	}
+
+	mem = ump_dd_create_from_phys_blocks_64(block_64_array, num_blocks, flags, NULL, NULL, NULL);
+
+	kfree(block_64_array);
+
+	return mem;
+
+}
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h b/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
new file mode 100644
index 0000000..8cb424d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_core.h
@@ -0,0 +1,228 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_CORE_H_
+#define _UMP_KERNEL_CORE_H_
+
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/cred.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+#include <linux/ump-common.h>
+#include <ump/src/common/ump_kernel_descriptor_mapping.h>
+
+/* forward decl */
+struct umpp_session;
+
+/**
+ * UMP handle metadata.
+ * Tracks various data about a handle not of any use to user space
+ */
+typedef enum
+{
+	UMP_MGMT_EXTERNAL = (1ul << 0) /**< Handle created via the ump_dd_create_from_phys_blocks interface */
+	/* (1ul << 31) not to be used */
+} umpp_management_flags;
+
+/**
+ * Structure tracking the single global UMP device.
+ * Holds global data like the ID map
+ */
+typedef struct umpp_device
+{
+	struct mutex secure_id_map_lock; /**< Lock protecting access to the map */
+	umpp_descriptor_mapping * secure_id_map; /**< Map of all known secure IDs on the system */
+} umpp_device;
+
+/**
+ * Structure tracking all memory allocations of a UMP allocation.
+ * Tracks info about an mapping so we can verify cache maintenace
+ * operations and help in the unmap cleanup.
+ */
+typedef struct umpp_cpu_mapping
+{
+	struct list_head        link; /**< link to list of mappings for an allocation */
+	void                  *vaddr_start; /**< CPU VA start of the mapping */
+	size_t                nr_pages; /**< Size (in pages) of the mapping */
+	uint64_t              page_off; /**< Offset (in pages) from start of the allocation where the mapping starts */
+	ump_dd_handle         handle; /**< Which handle this mapping is linked to */
+	struct umpp_session * session; /**< Which session created the mapping */
+} umpp_cpu_mapping;
+
+/**
+ * Structure tracking UMP allocation.
+ * Represent a memory allocation with its ID.
+ * Tracks all needed meta-data about an allocation.
+ * */
+typedef struct umpp_allocation
+{
+	ump_secure_id id; /**< Secure ID of the allocation */
+	atomic_t refcount; /**< Usage count */
+
+	ump_alloc_flags flags; /**< Flags for all supported devices */
+	uint32_t management_flags; /**< Managment flags tracking */
+
+	pid_t owner; /**< The process ID owning the memory if not sharable */
+
+	ump_dd_security_filter filter_func; /**< Hook to verify use, called during retains from new clients */
+	ump_dd_final_release_callback final_release_func; /**< Hook called when the last reference is removed */
+	void* callback_data; /**< Additional data given to release hook */
+
+	uint64_t size; /**< Size (in bytes) of the allocation */
+	uint64_t blocksCount; /**< Number of physsical blocks the allocation is built up of */
+	ump_dd_physical_block_64 * block_array; /**< Array, one entry per block, describing block start and length */
+
+	struct mutex     map_list_lock; /**< Lock protecting the map_list */
+	struct list_head map_list; /**< Tracks all CPU VA mappings of this allocation */
+
+#ifdef CONFIG_KDS
+	struct kds_resource kds_res; /**< The KDS resource controlling access to this allocation */
+#endif
+
+	void * backendData; /**< Physical memory backend meta-data */
+} umpp_allocation;
+
+/**
+ * Structure tracking use of UMP memory by a session.
+ * Tracks the use of an allocation by a session so session termination can clean up any outstanding references.
+ * Also protects agains non-matched release calls from user space.
+ */
+typedef struct umpp_session_memory_usage
+{
+	ump_secure_id id; /**< ID being used. For quick look-up */
+	ump_dd_handle mem; /**< Handle being used. */
+
+	/**
+	 * Track how many times has the process retained this handle in the kernel.
+	 * This should usually just be 1(allocated or resolved) or 2(mapped),
+	 * but could be more if someone is playing with the low-level API
+	 * */
+	atomic_t process_usage_count;
+
+	struct list_head link; /**< link to other usage trackers for a session */
+} umpp_session_memory_usage;
+
+/**
+ * Structure representing a session/client.
+ * Tracks the UMP allocations being used by this client.
+ */
+typedef struct umpp_session
+{
+	struct mutex session_lock; /**< Lock for memory usage manipulation */
+	struct list_head memory_usage; /**< list of memory currently being used by the this session */
+	void*  import_handler_data[UMPP_EXTERNAL_MEM_COUNT]; /**< Import modules per-session data pointer */
+} umpp_session;
+
+/**
+ * UMP core setup.
+ * Called by any OS specific startup function to initialize the common part.
+ * @return UMP_OK if core initialized correctly, any other value for errors
+ */
+ump_result umpp_core_constructor(void);
+
+/**
+ * UMP core teardown.
+ * Called by any OS specific unload function to clean up the common part.
+ */
+void umpp_core_destructor(void);
+
+/**
+ * UMP session start.
+ * Called by any OS specific session handler when a new session is detected
+ * @return Non-NULL if a matching core session could be set up. NULL on failure
+ */
+umpp_session *umpp_core_session_start(void);
+
+/**
+ * UMP session end.
+ * Called by any OS specific session handler when a session is ended/terminated.
+ * @param session The core session object returned by ump_core_session_start
+ */
+void umpp_core_session_end(umpp_session *session);
+
+/**
+ * Find a mapping object (if any) for this allocation.
+ * Called by any function needing to identify a mapping from a user virtual address.
+ * Verifies that the whole range to be within a mapping object.
+ * @param alloc The UMP allocation to find a matching mapping object of
+ * @param uaddr User mapping address to find the mapping object for
+ * @param size Length of the mapping
+ * @return NULL on error (no match found), pointer to mapping object if match found
+ */
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void* uaddr, size_t size);
+
+/**
+ * Register a new mapping of an allocation.
+ * Called by functions creating a new mapping of an allocation, typically OS specific handlers.
+ * @param alloc The allocation object which has been mapped
+ * @param map Info about the mapping
+ */
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map);
+
+/**
+ * Remove and free mapping object from an allocation.
+ * @param alloc The allocation object to remove the mapping info from
+ * @param target The mapping object to remove
+ */
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target);
+
+/**
+ * Helper to find a block in the blockArray which holds a given byte offset.
+ * @param alloc The allocation object to find the block in
+ * @param offset Offset (in bytes) from allocation start to find the block of
+ * @param[out] block_index Pointer to the index of the block matching
+ * @param[out] block_internal_offset Offset within the returned block of the searched offset
+ * @return 0 if a matching block was found, any other value for error
+ */
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const block_index, uint64_t * const block_internal_offset);
+
+/**
+ * Cache maintenance helper.
+ * Performs the requested cache operation on the given handle.
+ * @param mem Allocation handle
+ * @param op Cache maintenance operation to perform
+ * @param address User mapping at which to do the operation
+ * @param size Length (in bytes) of the range to do the operation on
+ */
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size);
+
+/**
+ * Import module session early init.
+ * Calls session_begin on all installed import modules.
+ * @param session The core session object to initialized the import handler for
+ * */
+void umpp_import_handlers_init(umpp_session * session);
+
+/**
+ * Import module session cleanup.
+ * Calls session_end on all import modules bound to the session.
+ * @param session The core session object to initialized the import handler for
+ */
+void umpp_import_handlers_term(umpp_session * session);
+
+#endif /* _UMP_KERNEL_CORE_H_ */
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c b/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
new file mode 100644
index 0000000..c5b0d74
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <common/ump_kernel_priv.h>
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(umpp_descriptor_table * table);
+
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries)
+{
+	umpp_descriptor_mapping * map = kzalloc(sizeof(umpp_descriptor_mapping), GFP_KERNEL);
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map)
+	{
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table)
+		{
+			init_rwsem( &map->lock);
+			set_bit(0, map->table->usage);
+			map->max_nr_mappings_allowed = max_entries;
+			map->current_nr_mappings = init_entries;
+			return map;
+
+			descriptor_table_free(map->table);
+		}
+		kfree(map);
+	}
+	return NULL;
+}
+
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map)
+{
+	UMP_ASSERT(NULL != map);
+	descriptor_table_free(map->table);
+	kfree(map);
+}
+
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target)
+{
+ 	int descriptor = 0;
+	UMP_ASSERT(NULL != map);
+	down_write( &map->lock);
+	descriptor = find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings)
+	{
+		/* no free descriptor, try to expand the table */
+		umpp_descriptor_table * new_table;
+		umpp_descriptor_table * old_table = map->table;
+		int nr_mappings_new = map->current_nr_mappings + BITS_PER_LONG;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+ 		memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+ 		memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void*));
+
+ 		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	set_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	up_write(&map->lock);
+	return descriptor;
+}
+
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target)
+{
+	int result = -EINVAL;
+ 	UMP_ASSERT(map);
+	UMP_ASSERT(target);
+ 	down_read(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	}
+	/* keep target untouched if the descriptor was not found */
+	up_read(&map->lock);
+	return result;
+}
+
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor)
+{
+	UMP_ASSERT(map);
+ 	down_write(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		map->table->mappings[descriptor] = NULL;
+		clear_bit(descriptor, map->table->usage);
+	}
+	up_write(&map->lock);
+}
+
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count)
+{
+	umpp_descriptor_table * table;
+
+	table = kzalloc(sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG) + (sizeof(void*) * count), __GFP_HARDWALL | GFP_KERNEL );
+
+	if (NULL != table)
+	{
+		table->usage = (unsigned long*)((u8*)table + sizeof(umpp_descriptor_table));
+		table->mappings = (void**)((u8*)table + sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(umpp_descriptor_table * table)
+{
+ 	UMP_ASSERT(table);
+	kfree(table);
+}
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h b/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
new file mode 100644
index 0000000..d06c145
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+#define _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct umpp_descriptor_table
+{
+	/* keep as a unsigned long to rely on the OS's bitops support */
+	unsigned long * usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used(1) or not(0) */
+	void** mappings; /**< Array of the pointers the descriptors map to */
+} umpp_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct umpp_descriptor_mapping
+{
+	struct rw_semaphore lock; /**< Lock protecting access to the mapping object */
+	unsigned int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	unsigned int current_nr_mappings; /**< Current number of possible mappings */
+	umpp_descriptor_table * table; /**< Pointer to the current mapping table */
+} umpp_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object.
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries.
+ * ID 0 is reserved so the number of available entries will be max - 1.
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param[in] map The map to free
+ */
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param[in] map The map to allocate a new entry in
+ * @param[in] target The value to map to
+ * @return The descriptor allocated, ID 0 on failure.
+ */
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param[in] map The map to lookup the descriptor id in
+ * @param[in] descriptor The descriptor ID to lookup
+ * @param[out] target Pointer to a pointer which will receive the stored value
+ *
+ * @return 0 on success lookup, -EINVAL on lookup failure.
+ */
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param[in] map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor);
+
+#endif /* _UMP_KERNEL_DESCRIPTOR_MAPPING_H_ */
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h b/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
new file mode 100644
index 0000000..38b6f1b
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_PRIV_H_
+#define _UMP_KERNEL_PRIV_H_
+
+#ifdef __KERNEL__
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <asm/cacheflush.h>
+#endif
+
+
+#define UMP_EXPECTED_IDS 64
+#define UMP_MAX_IDS 32768
+
+#ifdef __KERNEL__
+#define UMP_ASSERT(expr) \
+		if (!(expr)) { \
+			printk(KERN_ERR "UMP: Assertion failed! %s,%s,%s,line=%d\n",\
+					#expr,__FILE__,__func__,__LINE__); \
+					BUG(); \
+		}
+
+static inline void ump_sync_to_memory(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/*TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE);
+	mb(); /* for outer_sync (if needed) */
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+
+static inline void ump_sync_to_cpu(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE);
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+#endif /* __KERNEL__*/
+#endif /* _UMP_KERNEL_PRIV_H_ */
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/imports/ion/Makefile b/midgard/r16p0/kernel/drivers/base/ump/src/imports/ion/Makefile
new file mode 100644
index 0000000..ef74b27
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/imports/ion/Makefile
@@ -0,0 +1,53 @@
+#
+# (C) COPYRIGHT 2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/../../../../..
+KBUILD_EXTRA_SYMBOLS += "$(KBUILD_EXTMOD)/../../Module.symvers"
+
+SRC += ump_kernel_import_ion.c
+
+MODULE:=ump_ion_import.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+$(MODULE:.ko=-objs) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+#
+#
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/imports/ion/sconscript b/midgard/r16p0/kernel/drivers/base/ump/src/imports/ion/sconscript
new file mode 100644
index 0000000..cff24c8
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/imports/ion/sconscript
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ion = env.Clone()
+
+if env_ion['ump_ion'] != '1':
+	Return()
+
+# Source files required for UMP.
+ion_src = [Glob('#kernel/drivers/base/ump/src/imports/ion/*.c')]
+
+# Note: cleaning via the Linux kernel build system does not yet work
+if env_ion.GetOption('clean') :
+	makeAction=Action("cd ${SOURCE.dir} && make clean", '$MAKECOMSTR')
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make PLATFORM=${platform} && cp ump_ion_import.ko $STATIC_LIB_PATH/ump_ion_import.ko", '$MAKECOMSTR')
+# The target is ump_import_ion.ko, built from the source in ion_src, via the action makeAction
+# ump_import_ion.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+# kernel build system, after which it can be installed to the directory specified if
+# "libs_install" is set; this is done by LibTarget.
+cmd = env_ion.Command('$STATIC_LIB_PATH/ump_ion_import.ko', ion_src, [makeAction])
+
+# Until we fathom out how the invoke the Linux build system to clean, we can use Clean
+# to remove generated files.
+
+patterns = ['*.mod.c', '*.o', '*.ko', '*.a', '.*.cmd', 'modules.order', '.tmp_versions', 'Module.symvers']
+
+for p in patterns:
+	Clean(cmd, Glob('#kernel/drivers/base/ump/src/imports/ion/%s' % p))
+
+env_ion.Depends('$STATIC_LIB_PATH/ump_ion_import.ko', '$STATIC_LIB_PATH/ump.ko')
+env_ion.KernelObjTarget('ump', cmd)
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c b/midgard/r16p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
new file mode 100644
index 0000000..12b2e32
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/dma-mapping.h>
+#include "ion.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+struct ion_wrapping_info
+{
+	struct ion_client *   ion_client;
+	struct ion_handle *   ion_handle;
+	int                   num_phys_blocks;
+	struct scatterlist *  sglist;
+};
+
+static struct ion_device * ion_device_get(void)
+{
+	/* < Customer to provide implementation >
+	 * Return a pointer to the global ion_device on the system
+	 */
+	return NULL;
+}
+
+static int import_ion_client_create(void** const custom_session_data)
+{
+	struct ion_client ** ion_client;
+
+	ion_client = (struct ion_client**)custom_session_data;
+
+	*ion_client = ion_client_create(ion_device_get(), "ump");
+
+	return PTR_RET(*ion_client);
+}
+
+
+static void import_ion_client_destroy(void* custom_session_data)
+{
+	struct ion_client * ion_client;
+
+	ion_client = (struct ion_client*)custom_session_data;
+	BUG_ON(!ion_client);
+
+	ion_client_destroy(ion_client);
+}
+
+
+static void import_ion_final_release_callback(const ump_dd_handle handle, void * info)
+{
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!info);
+
+	(void)handle;
+	ion_info = (struct ion_wrapping_info*)info;
+
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+	kfree(ion_info);
+	module_put(THIS_MODULE);
+}
+
+static ump_dd_handle import_ion_import(void * custom_session_data, void * pfd, ump_alloc_flags flags)
+{
+	int fd;
+	ump_dd_handle ump_handle;
+	struct scatterlist * sg;
+	int num_dma_blocks;
+	ump_dd_physical_block_64 * phys_blocks;
+	unsigned long i;
+	struct sg_table * sgt;
+
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!custom_session_data);
+	BUG_ON(!pfd);
+
+	ion_info = kzalloc(GFP_KERNEL, sizeof(*ion_info));
+	if (NULL == ion_info)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	ion_info->ion_client = (struct ion_client*)custom_session_data;
+
+	if (get_user(fd, (int*)pfd))
+	{
+		goto out;
+	}
+
+	ion_info->ion_handle = ion_import_dma_buf(ion_info->ion_client, fd);
+
+	if (IS_ERR_OR_NULL(ion_info->ion_handle))
+	{
+		goto out;
+	}
+
+	sgt = ion_sg_table(ion_info->ion_client, ion_info->ion_handle);
+	if (IS_ERR_OR_NULL(sgt))
+	{
+		goto ion_dma_map_failed;
+	}
+
+	ion_info->sglist = sgt->sgl;
+
+	sg = ion_info->sglist;
+	while (sg)
+	{
+		ion_info->num_phys_blocks++;
+		sg = sg_next(sg);
+	}
+
+	num_dma_blocks = dma_map_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	if (0 == num_dma_blocks)
+	{
+		goto linux_dma_map_failed;
+	}
+
+	phys_blocks = vmalloc(num_dma_blocks * sizeof(*phys_blocks));
+	if (NULL == phys_blocks)
+	{
+		goto vmalloc_failed;
+	}
+
+	for_each_sg(ion_info->sglist, sg, num_dma_blocks, i)
+	{
+		phys_blocks[i].addr = sg_phys(sg);
+		phys_blocks[i].size = sg_dma_len(sg);
+	}
+
+	ump_handle = ump_dd_create_from_phys_blocks_64(phys_blocks, num_dma_blocks, flags, NULL, import_ion_final_release_callback, ion_info);
+
+	vfree(phys_blocks);
+
+	if (ump_handle != UMP_DD_INVALID_MEMORY_HANDLE)
+	{
+		/*
+		 * As we have a final release callback installed
+		 * we must keep the module locked until
+		 * the callback has been triggered
+		 * */
+		__module_get(THIS_MODULE);
+		return ump_handle;
+	}
+
+	/* failed*/
+vmalloc_failed:
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+linux_dma_map_failed:
+ion_dma_map_failed:
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+out:
+	kfree(ion_info);
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+struct ump_import_handler import_handler_ion =
+{
+	.linux_module =  THIS_MODULE,
+	.session_begin = import_ion_client_create,
+	.session_end =   import_ion_client_destroy,
+	.import =        import_ion_import
+};
+
+static int __init import_ion_initialize_module(void)
+{
+	/* register with UMP */
+	return ump_import_module_register(UMP_EXTERNAL_MEM_TYPE_ION, &import_handler_ion);
+}
+
+static void __exit import_ion_cleanup_module(void)
+{
+	/* unregister import handler */
+	ump_import_module_unregister(UMP_EXTERNAL_MEM_TYPE_ION);
+}
+
+/* Setup init and exit functions for this module */
+module_init(import_ion_initialize_module);
+module_exit(import_ion_cleanup_module);
+
+/* And some module information */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/imports/sconscript b/midgard/r16p0/kernel/drivers/base/ump/src/imports/sconscript
new file mode 100644
index 0000000..8f50683
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/imports/sconscript
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os, sys
+Import('env')
+
+import_modules = [ os.path.join( path, 'sconscript' ) for path in sorted(os.listdir( os.getcwd() )) ]
+
+for m in import_modules:
+	if os.path.exists(m):
+		SConscript( m, variant_dir=os.path.join( env['BUILD_DIR_PATH'], os.path.dirname(m) ), duplicate=0 )
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c b/midgard/r16p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
new file mode 100644
index 0000000..d6c3c53
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
@@ -0,0 +1,831 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump-ioctl.h>
+#include <linux/ump.h>
+
+#include <asm/uaccess.h>	         /* copy_*_user */
+#include <linux/compat.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/device.h>            /* class registration support */
+
+#include <common/ump_kernel_core.h>
+
+#include "ump_kernel_linux_mem.h"
+#include <ump_arch.h>
+
+
+struct ump_linux_device
+{
+	struct cdev cdev;
+	struct class * ump_class;
+};
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+#define UMP_REV_STRING "1.0"
+
+char * ump_revision = UMP_REV_STRING;
+module_param(ump_revision, charp, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_revision, "Revision info");
+
+static int umpp_linux_open(struct inode *inode, struct file *filp);
+static int umpp_linux_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+/* This variable defines the file operations this UMP device driver offers */
+static struct file_operations ump_fops =
+{
+	.owner   = THIS_MODULE,
+	.open    = umpp_linux_open,
+	.release = umpp_linux_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = umpp_linux_ioctl,
+#else
+	.ioctl   = umpp_linux_ioctl,
+#endif
+	.compat_ioctl = umpp_linux_ioctl,
+	.mmap = umpp_linux_mmap
+};
+
+/* import module handling */
+DEFINE_MUTEX(import_list_lock);
+struct ump_import_handler *  import_handlers[UMPP_EXTERNAL_MEM_COUNT];
+
+/* The global variable containing the global device data */
+static struct ump_linux_device ump_linux_device;
+
+#define DBG_MSG(level, ...) do { \
+if ((level) <=  ump_debug_level)\
+{\
+printk(KERN_DEBUG "UMP<" #level ">:\n" __VA_ARGS__);\
+} \
+} while (0)
+
+#define MSG_ERR(...) do{ \
+printk(KERN_ERR "UMP: ERR: %s\n           %s()%4d\n", __FILE__, __func__  , __LINE__) ; \
+printk(KERN_ERR __VA_ARGS__); \
+printk(KERN_ERR "\n"); \
+} while(0)
+
+#define MSG(...) do{ \
+printk(KERN_INFO "UMP: " __VA_ARGS__);\
+} while (0)
+
+/*
+ * This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int __init umpp_linux_initialize_module(void)
+{
+	ump_result err;
+
+	err = umpp_core_constructor();
+	if (UMP_OK != err)
+	{
+		MSG_ERR("UMP device driver init failed\n");
+		return -ENOTTY;
+	}
+
+	MSG("UMP device driver %s loaded\n", UMP_REV_STRING);
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void __exit umpp_linux_cleanup_module(void)
+{
+	DBG_MSG(2, "Unloading UMP device driver\n");
+	umpp_core_destructor();
+	DBG_MSG(2, "Module unloaded\n");
+}
+
+
+
+/*
+ * Initialize the UMP device driver.
+ */
+ump_result umpp_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+
+	if (0 == ump_major)
+	{
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	}
+	else
+	{
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err)
+	{
+		memset(&ump_linux_device, 0, sizeof(ump_linux_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_linux_device.cdev, &ump_fops);
+		ump_linux_device.cdev.owner = THIS_MODULE;
+		ump_linux_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_linux_device.cdev, dev, 1/*count*/);
+		if (0 == err)
+		{
+
+			ump_linux_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_linux_device.ump_class))
+			{
+				err = PTR_ERR(ump_linux_device.ump_class);
+			}
+			else
+			{
+				struct device * mdev;
+				mdev = device_create(ump_linux_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return UMP_OK;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(ump_linux_device.ump_class);
+			}
+			cdev_del(&ump_linux_device.cdev);
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return UMP_ERROR;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void umpp_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+	device_destroy(ump_linux_device.ump_class, dev);
+	class_destroy(ump_linux_device.ump_class);
+
+	/* unregister char device */
+	cdev_del(&ump_linux_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+}
+
+
+static int umpp_linux_open(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = umpp_core_session_start();
+	if (NULL == session)
+	{
+		return -EFAULT;
+	}
+	
+	filp->private_data = session;
+
+	return 0;
+}
+
+static int umpp_linux_release(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = filp->private_data;
+
+	umpp_core_session_end(session);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/**************************/
+/*ioctl specific functions*/
+/**************************/
+static int do_ump_dd_allocate(umpp_session * session, ump_k_allocate * params)
+{
+	ump_dd_handle new_allocation;
+	new_allocation = ump_dd_allocate_64(params->size, params->alloc_flags, NULL, NULL, NULL);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	printk(KERN_WARNING "UMP: Allocation FAILED\n");
+	return -ENOMEM;
+}
+
+static int do_ump_dd_retain(umpp_session * session, ump_k_retain * params)
+{
+	umpp_session_memory_usage * it;
+
+	mutex_lock(&session->session_lock);
+
+	/* try to find it on the session usage list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found to already be in use */
+			/* check for overflow */
+			while(1)
+			{
+				int refcnt = atomic_read(&it->process_usage_count);
+				if (refcnt + 1 > 0)
+				{
+					/* add a process local ref */
+					if(atomic_cmpxchg(&it->process_usage_count, refcnt, refcnt + 1) == refcnt)
+					{
+						mutex_unlock(&session->session_lock);
+						return 0;
+					}
+				}
+				else
+				{
+					/* maximum usage cap reached */
+					mutex_unlock(&session->session_lock);
+					return -EBUSY;
+				}
+			}
+		}
+	}
+	/* try to look it up globally */
+
+	it = kmalloc(sizeof(*it), GFP_KERNEL);
+
+	if (NULL != it)
+	{
+		it->mem = ump_dd_from_secure_id(params->secure_id);
+		if (UMP_DD_INVALID_MEMORY_HANDLE != it->mem)
+		{
+			/* found, add it to the session usage list */
+			it->id = params->secure_id;
+			atomic_set(&it->process_usage_count, 1);
+			list_add(&it->link, &session->memory_usage);
+		}
+		else
+		{
+			/* not found */
+			kfree(it);
+			it = NULL;
+		}
+	}
+
+	mutex_unlock(&session->session_lock);
+
+	return (NULL != it) ? 0 : -ENODEV;
+}
+
+
+static int do_ump_dd_release(umpp_session * session, ump_k_release * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only do a release if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			result = 0;
+
+			if (0 == atomic_sub_return(1, &it->process_usage_count))
+			{
+				/* last ref in this process remove from the usage list and remove the underlying ref */
+				list_del(&it->link);
+				ump_dd_release(it->mem);
+				kfree(it);
+			}
+
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_sizequery(umpp_session * session, ump_k_sizequery * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->size = ump_dd_size_get_64(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_allocation_flags_get(umpp_session * session, ump_k_allocation_flags * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->alloc_flags = ump_dd_allocation_flags_get(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_msync_now(umpp_session * session, ump_k_msync * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, do the cache op */
+#ifdef CONFIG_COMPAT
+			if (is_compat_task())
+			{
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, compat_ptr(params->mapped_ptr.compat_value), params->size);
+				result = 0;
+			}
+			else
+			{
+#endif
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, params->mapped_ptr.value, params->size);
+				result = 0;
+#ifdef CONFIG_COMPAT
+			}
+#endif
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+
+void umpp_import_handlers_init(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		if (import_handlers[i])
+		{
+			import_handlers[i]->session_begin(&session->import_handler_data[i]);
+			/* It is OK if session_begin returned an error.
+			 * We won't do any import calls if so */
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+void umpp_import_handlers_term(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		/* only call if session_begin succeeded */
+		if (session->import_handler_data[i] != NULL)
+		{
+			/* if session_beging succeeded the handler
+			 * should not have unregistered with us */
+			BUG_ON(!import_handlers[i]);
+			import_handlers[i]->session_end(session->import_handler_data[i]);
+			session->import_handler_data[i] = NULL;
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler)
+{
+	int res = -EEXIST;
+
+	/* validate input */
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+	BUG_ON(!handler);
+	BUG_ON(!handler->linux_module);
+	BUG_ON(!handler->session_begin);
+	BUG_ON(!handler->session_end);
+	BUG_ON(!handler->import);
+
+	mutex_lock(&import_list_lock);
+
+	if (!import_handlers[type])
+	{
+		import_handlers[type] = handler;
+		res = 0;
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return res;
+}
+
+void ump_import_module_unregister(enum ump_external_memory_type type)
+{
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+
+	mutex_lock(&import_list_lock);
+	/* an error to call this if ump_import_module_register didn't succeed */
+	BUG_ON(!import_handlers[type]);
+	import_handlers[type] = NULL;
+	mutex_unlock(&import_list_lock);
+}
+
+static struct ump_import_handler * import_handler_get(unsigned int type_id)
+{
+	enum ump_external_memory_type type;
+	struct ump_import_handler * handler;
+
+	/* validate and convert input */
+	/* handle bad data here, not just BUG_ON */
+	if (type_id == 0 || type_id >= UMPP_EXTERNAL_MEM_COUNT)
+		return NULL;
+
+	type = (enum ump_external_memory_type)type_id;
+
+	/* find the handler */
+	mutex_lock(&import_list_lock);
+
+	handler = import_handlers[type];
+
+	if (handler)
+	{
+		if (!try_module_get(handler->linux_module))
+		{
+			handler = NULL;
+		}
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return handler;
+}
+
+static void import_handler_put(struct ump_import_handler * handler)
+{
+	module_put(handler->linux_module);
+}
+
+static int do_ump_dd_import(umpp_session * session, ump_k_import * params)
+{
+	ump_dd_handle new_allocation = UMP_DD_INVALID_MEMORY_HANDLE;
+	struct ump_import_handler * handler;
+
+	handler = import_handler_get(params->type);
+
+	if (handler)
+	{
+		/* try late binding if not already bound */
+		if (!session->import_handler_data[params->type])
+		{
+			handler->session_begin(&session->import_handler_data[params->type]);
+		}
+
+		/* do we have a bound session? */
+		if (session->import_handler_data[params->type])
+		{
+			new_allocation = handler->import( session->import_handler_data[params->type],
+		                                      params->phandle.value,
+		                                      params->alloc_flags);
+		}
+
+		/* done with the handler */
+		import_handler_put(handler);
+	}
+
+	/* did the import succeed? */
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	return -ENOMEM;
+
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int ret;
+	uint64_t msg[(UMP_CALL_MAX_SIZE+7)>>3]; /* alignment fixup */
+	uint32_t size = _IOC_SIZE(cmd);
+	struct umpp_session *session = filp->private_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* unused arg */
+#endif
+
+	/*
+	 * extract the type and number bitfields, and don't decode
+	 * wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
+	 */
+	if (_IOC_TYPE(cmd) != UMP_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if (_IOC_NR(cmd) > UMP_IOC_MAXNR)
+	{
+		return -ENOTTY;
+	}
+
+	switch(cmd)
+	{
+		case UMP_FUNC_ALLOCATE:
+			if (size != sizeof(ump_k_allocate))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocate(session, (ump_k_allocate *)&msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_SIZEQUERY:
+			if (size != sizeof(ump_k_sizequery))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_sizequery(session,(ump_k_sizequery*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_MSYNC:
+			if (size != sizeof(ump_k_msync))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_msync_now(session,(ump_k_msync*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_IMPORT:
+			if (size != sizeof(ump_k_import))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user*)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_import(session, (ump_k_import*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		/* used only by v1 API */
+		case UMP_FUNC_ALLOCATION_FLAGS_GET:
+			if (size != sizeof(ump_k_allocation_flags))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocation_flags_get(session,(ump_k_allocation_flags*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_RETAIN:
+			if (size != sizeof(ump_k_retain))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_retain(session,(ump_k_retain*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		case UMP_FUNC_RELEASE:
+			if (size != sizeof(ump_k_release))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_release(session,(ump_k_release*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		default:
+			/* not ours */
+			return -ENOTTY;
+	}
+	/*redundant below*/
+	return -ENOTTY;
+}
+
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_allocate_64);
+EXPORT_SYMBOL(ump_dd_allocation_flags_get);
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get_64);
+EXPORT_SYMBOL(ump_dd_size_get_64);
+EXPORT_SYMBOL(ump_dd_retain);
+EXPORT_SYMBOL(ump_dd_release);
+EXPORT_SYMBOL(ump_dd_create_from_phys_blocks_64);
+#ifdef CONFIG_KDS
+EXPORT_SYMBOL(ump_dd_kds_resource_get);
+#endif
+
+/* import API */
+EXPORT_SYMBOL(ump_import_module_register);
+EXPORT_SYMBOL(ump_import_module_unregister);
+
+
+
+/* V1 API */
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+
+/* Setup init and exit functions for this module */
+module_init(umpp_linux_initialize_module);
+module_exit(umpp_linux_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(UMP_REV_STRING);
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c b/midgard/r16p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
new file mode 100644
index 0000000..9186dd0
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
@@ -0,0 +1,250 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+#include <linux/version.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h> /* memory mananger definitions */
+#include <linux/pfn.h>
+#include <linux/highmem.h> /*kmap*/
+
+#include <linux/compat.h> /* is_compat_task */
+
+#include <common/ump_kernel_core.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+static void umpp_vm_close(struct vm_area_struct *vma)
+{
+	umpp_cpu_mapping * mapping;
+	umpp_session * session;
+	ump_dd_handle handle;
+
+	mapping = (umpp_cpu_mapping*)vma->vm_private_data;
+	UMP_ASSERT(mapping);
+	
+	session = mapping->session;
+	handle = mapping->handle;
+
+	umpp_dd_remove_cpu_mapping(mapping->handle, mapping); /* will free the mapping object */
+	ump_dd_release(handle);
+}
+
+
+static const struct vm_operations_struct umpp_vm_ops = {
+	.close = umpp_vm_close
+};
+
+int umpp_phys_commit(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	/* round up to a page boundary */
+	alloc->size = (alloc->size + PAGE_SIZE - 1) & ~((uint64_t)PAGE_SIZE-1) ;
+	/* calculate number of pages */
+	alloc->blocksCount = alloc->size >> PAGE_SHIFT;
+
+	if( (sizeof(ump_dd_physical_block_64) * alloc->blocksCount) > ((size_t)-1))
+	{
+		printk(KERN_WARNING "UMP: umpp_phys_commit - trying to allocate more than possible\n");
+		return -ENOMEM;
+	}
+
+	alloc->block_array = kmalloc(sizeof(ump_dd_physical_block_64) * alloc->blocksCount, __GFP_HARDWALL | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (NULL == alloc->block_array)
+	{
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		void * mp;
+		struct page * page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD);
+		if (NULL == page)
+		{
+			break;
+		}
+
+		alloc->block_array[i].addr = PFN_PHYS(page_to_pfn(page));
+		alloc->block_array[i].size = PAGE_SIZE;
+
+		mp = kmap(page);
+		if (NULL == mp)
+		{
+			__free_page(page);
+			break;
+		}
+
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		ump_sync_to_memory(PFN_PHYS(page_to_pfn(page)), mp, PAGE_SIZE);
+		kunmap(page);
+	}
+
+	if (i == alloc->blocksCount)
+	{
+		return 0;
+	}
+	else
+	{
+		uint64_t j;
+		for (j = 0; j < i; j++)
+		{
+			struct page * page;
+			page = pfn_to_page(alloc->block_array[j].addr >> PAGE_SHIFT);
+			__free_page(page);
+		}
+		
+		kfree(alloc->block_array);
+
+		return -ENOMEM;
+	}
+}
+
+void umpp_phys_free(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		__free_page(pfn_to_page(alloc->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	kfree(alloc->block_array);
+}
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma)
+{
+	ump_secure_id id;
+	ump_dd_handle h;
+	size_t offset;
+	int err = -EINVAL;
+	size_t length = vma->vm_end - vma->vm_start;
+
+	umpp_cpu_mapping * map = NULL;
+	umpp_session *session = filp->private_data;
+
+	if ( 0 == length )
+	{
+		return -EINVAL;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (NULL == map)
+	{
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* unpack our arg */
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	if (is_compat_task())
+	{
+#endif
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_32;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_32;
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	}
+	else
+	{
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_64;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_64;
+	}
+#endif
+
+	h = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE != h)
+	{
+		uint64_t i;
+		uint64_t block_idx;
+		uint64_t block_offset;
+		uint64_t paddr;
+		umpp_allocation * alloc;
+		uint64_t last_byte;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO | VM_MIXEDMAP | VM_DONTDUMP;
+#else
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO | VM_MIXEDMAP;
+#endif
+		vma->vm_ops = &umpp_vm_ops;
+		vma->vm_private_data = map;
+
+		alloc = (umpp_allocation*)h;
+
+		if( (alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+		{
+			/* cache disabled flag set, disable caching for cpu mappings */
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		}
+
+		last_byte = length + (offset << PAGE_SHIFT) - 1;
+		if (last_byte >= alloc->size || last_byte < (offset << PAGE_SHIFT))
+		{
+			goto err_out;
+		}
+
+		if (umpp_dd_find_start_block(alloc, offset << PAGE_SHIFT, &block_idx, &block_offset))
+		{
+			goto err_out;
+		}
+
+		paddr = alloc->block_array[block_idx].addr + block_offset;
+
+		for (i = 0; i < (length >> PAGE_SHIFT); i++)
+		{
+			/* check if we've overrrun the current block, if so move to the next block */
+			if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+			{
+				block_idx++;
+				UMP_ASSERT(block_idx < alloc->blocksCount);
+				paddr = alloc->block_array[block_idx].addr;
+			}
+
+			err = vm_insert_mixed(vma, vma->vm_start + (i << PAGE_SHIFT), paddr >> PAGE_SHIFT);
+			paddr += PAGE_SIZE;
+		}
+
+		map->vaddr_start = (void*)vma->vm_start;
+		map->nr_pages = length >> PAGE_SHIFT;
+		map->page_off = offset;
+		map->handle = h;
+		map->session = session;
+
+		umpp_dd_add_cpu_mapping(h, map);
+
+		return 0;
+
+		err_out:
+
+		ump_dd_release(h);
+	}
+
+	kfree(map);
+
+out:
+
+	return err;
+}
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h b/midgard/r16p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
new file mode 100644
index 0000000..4fbf3b2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_LINUX_MEM_H_
+#define _UMP_KERNEL_LINUX_MEM_H_
+
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma);
+
+#endif /* _UMP_KERNEL_LINUX_MEM_H_ */
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/sconscript b/midgard/r16p0/kernel/drivers/base/ump/src/sconscript
new file mode 100644
index 0000000..b34d499
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/sconscript
@@ -0,0 +1,46 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ump = env.Clone()
+
+# Source files required for UMP.
+ump_src = [
+    Glob('common/*.c'),
+    Glob('imports/*/*.c'),
+    Glob('linux/*.c'),
+]
+
+make_args = env_ump.kernel_get_config_defines(ret_list = True) + [
+    'PLATFORM=%s' % env_ump['platform'],
+    'MALI_UNIT_TEST=%s' % env_ump['unit'],
+]
+
+mod = env_ump.BuildKernelModule('$STATIC_LIB_PATH/ump.ko', ump_src,
+                                make_args = make_args)
+env_ump.KernelObjTarget('ump', mod)
+
+# Add a dependency on kds.ko only when the build is not Android
+# Android uses sync_pt instead of Midgard KDS to achieve KDS functionality
+# Only necessary when KDS is not built into the kernel.
+#
+if env_ump['os'] != 'android':
+    if not env_ump.KernelConfigEnabled('CONFIG_KDS'):
+        env_ump.Depends(mod, '$STATIC_LIB_PATH/kds.ko')
+
+SConscript( 'imports/sconscript' )
+
diff --git a/midgard/r16p0/kernel/drivers/base/ump/src/ump_arch.h b/midgard/r16p0/kernel/drivers/base/ump/src/ump_arch.h
new file mode 100644
index 0000000..2303d56
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/src/ump_arch.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_ARCH_H_
+#define _UMP_ARCH_H_
+
+#include <common/ump_kernel_core.h>
+
+/**
+ * Device specific setup.
+ * Called by the UMP core code to to host OS/device specific setup.
+ * Typical use case is device node creation for talking to user space.
+ * @return UMP_OK on success, any other value on failure
+ */
+extern ump_result umpp_device_initialize(void);
+
+/**
+ * Device specific teardown.
+ * Undo any things done by ump_device_initialize.
+ */
+extern void umpp_device_terminate(void);
+
+extern int umpp_phys_commit(umpp_allocation * alloc);
+extern void umpp_phys_free(umpp_allocation * alloc);
+
+#endif /* _UMP_ARCH_H_ */
diff --git a/midgard/r16p0/kernel/drivers/base/ump/ump_ref_drv.h b/midgard/r16p0/kernel/drivers/base/ump/ump_ref_drv.h
new file mode 100644
index 0000000..9a265fe
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/base/ump/ump_ref_drv.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_ref_drv.h
+ *
+ * This file contains the link to user space part of the UMP API for usage by MALI 400 gralloc.
+ *
+ */
+
+#ifndef _UMP_REF_DRV_H_
+#define _UMP_REF_DRV_H_
+
+#include <ump/ump.h>
+
+
+#endif /* _UMP_REF_DRV_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/Kbuild b/midgard/r16p0/kernel/drivers/gpu/arm/Kbuild
new file mode 100644
index 0000000..19c7e9a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/Kconfig b/midgard/r16p0/kernel/drivers/gpu/arm/Kconfig
new file mode 100644
index 0000000..1f30eb5
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/Kconfig
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Kbuild b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Kbuild
new file mode 100644
index 0000000..6c03ff9
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,238 @@
+#
+# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r16p0-00rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+MALI_INSTRUMENTATION_LEVEL ?= 0
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_sync.c \
+	mali_kbase_sync_user.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c \
+	mali_kbase_regs_history_debugfs.c
+
+
+
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+ccflags-y += -I$(KBASE_PATH)
+
+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
+	SRC += mali_kbase_platform_fake.c
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
+		SRC += platform/vexpress/mali_kbase_config_vexpress.c \
+		platform/vexpress/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
+		SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/rtsm_ve
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
+		SRC += platform/juno/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/juno
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
+		SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+		ccflags-y += -I$(src)/platform/juno_soc
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
+		SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
+		SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
+		platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y)
+		SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \
+		platform/a7_kipling/mali_kbase_cpu_a7_kipling.c
+		ccflags-y += -I$(src)/platform/a7_kipling
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+	# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+	ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+	ifeq ($(CONFIG_MALI_MIDGARD),m)
+	include  $(src)/platform/$(platform_name)/Kbuild
+	else ifeq ($(CONFIG_MALI_MIDGARD),y)
+	obj-$(CONFIG_MALI_MIDGARD) += platform/
+	endif
+	endif
+endif # CONFIG_MALI_PLATFORM_FAKE=y
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+ifeq ($(CONFIG_MALI_MIDGARD),m)
+include  $(src)/platform/$(platform_name)/Kbuild
+else ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-$(CONFIG_MALI_MIDGARD) += platform/
+endif
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y)
+	ccflags-y += -I$(src)/platform/devicetree
+    include $(src)/platform/devicetree/Kbuild
+endif
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+include  $(src)/ipa/Kbuild
+mali_kbase-y += $(IPA:.c=.o)
+endif
+
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o
+
+MALI_BACKEND_PATH ?= backend
+CONFIG_MALI_BACKEND ?= gpu
+CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND)
+
+ifeq ($(MALI_MOCK_TEST),1)
+ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+endif
+
+include  $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+
+ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+
+# Default to devicetree platform if neither a fake platform or a thirdparty
+# platform is configured.
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),)
+CONFIG_MALI_PLATFORM_DEVICETREE := y
+endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Kconfig b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Kconfig
new file mode 100644
index 0000000..c1acaf0
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,246 @@
+#
+# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD && !KDS
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if KDS is not present and
+	  the Linux Kernel has built in support for DMA_BUF fences.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_CORESTACK
+	bool "Support controlling power to the GPU core stack"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature on supported GPUs will let the driver powering
+	  on/off the GPU core stack independently without involving the Power
+	  Domain Controller. This should only be enabled on platforms which
+	  integration of the PDC to the Mali GPU is known to be problematic.
+	  This feature is currently only supported on t-Six and t-HEx GPUs.
+
+	  If unsure, say N.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_PLATFORM_FAKE
+	bool "Enable fake platform device support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  When you start to work with the Mali Midgard series device driver the platform-specific code of
+	  the Linux kernel for your platform may not be complete. In this situation the kernel device driver
+	  supports creating the platform device outside of the Linux platform-specific code.
+	  Enable this option if would like to use a platform device configuration from within the device driver.
+
+choice
+	prompt "Platform configuration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default MALI_PLATFORM_DEVICETREE
+	help
+	  Select the SOC platform that contains a Mali Midgard GPU
+
+config MALI_PLATFORM_DEVICETREE
+	bool "Device Tree platform"
+	depends on OF
+	help
+	  Select this option to use Device Tree with the Mali driver.
+
+	  When using this option the Mali driver will get the details of the
+	  GPU hardware from the Device Tree. This means that the same driver
+	  binary can run on multiple platforms as long as all the GPU hardware
+	  details are described in the device tree.
+
+	  Device Tree is the recommended method for the Mali driver platform
+	  integration.
+
+config MALI_PLATFORM_VEXPRESS
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express"
+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express w/Virtex7 @ 40Mhz"
+config MALI_PLATFORM_GOLDFISH
+	depends on ARCH_GOLDFISH
+	bool "Android Goldfish virtual CPU"
+config MALI_PLATFORM_PBX
+	depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
+	bool "Realview PBX-A9"
+config MALI_PLATFORM_THIRDPARTY
+	bool "Third Party Platform"
+endchoice
+
+config MALI_PLATFORM_THIRDPARTY_NAME
+	depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
+	string "Third party platform name"
+	help
+	  Enter the name of a third party platform that is supported. The third part configuration
+	  file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
+	  specified here.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && SYNC
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_GPU_MMU_AARCH64
+	bool "Use AArch64 page tables"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Use AArch64 format page tables for the GPU instead of LPAE-style.
+	  The two formats have the same functionality and performance but a
+	  future GPU may deprecate or remove the legacy LPAE-style format.
+
+	  The LPAE-style format is supported on all Midgard and current Bifrost
+	  GPUs. Enabling AArch64 format restricts the driver to only supporting
+	  Bifrost GPUs.
+
+	  If in doubt, say N.
+
+config MALI_PWRSOFT_765
+	bool "PWRSOFT-765 ticket"
+	default n
+	help
+	  PWRSOFT-765 fixes devfreq cooling devices issues. However, they are
+	  not merged in mainline kernel yet. So this define helps to guard those
+	  parts of the code.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Makefile b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Makefile
new file mode 100644
index 0000000..e1625e6
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+KDS_PATH_RELATIVE = $(CURDIR)/../../../..
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifeq ($(MALI_BUS_LOG), 1)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# GPL driver supports KDS
+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100644
index 0000000..2bef9c2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100644
index 0000000..cadd7d5
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,59 @@
+#
+# (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_ca_demand.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100644
index 0000000..c8ae87e
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100644
index 0000000..fef9a2c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	kbdev->current_gpu_coherency_mode = mode;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100644
index 0000000..fe98691
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100644
index 0000000..7851ea6
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100644
index 0000000..8105c74
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,388 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+static struct devfreq_simple_ondemand_data ondemand_data = {
+		.upthreshold = 90,
+		.downdifferential = 5,
+};
+
+#define DEV_FREQ_GOV_DATA		(&ondemand_data)
+
+static ssize_t upthreshold_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.upthreshold);
+}
+
+static ssize_t upthreshold_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.upthreshold = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(upthreshold);
+
+static ssize_t downdifferential_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.downdifferential);
+}
+
+static ssize_t downdifferential_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.downdifferential = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(downdifferential);
+#else
+#define DEV_FREQ_GOV_DATA		(NULL)
+#endif /* CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND */
+
+static ssize_t utilisation_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev->parent);
+	unsigned long total_time = 0, busy_time = 0;
+	unsigned long utilise;
+
+	kbase_pm_get_dvfs_utilisation(kbdev, &total_time, &busy_time);
+
+	if (total_time == 0) {
+		utilise = 0;
+	} else {
+		/* round up */
+		utilise = (busy_time * 100 + (total_time >> 1)) / total_time;
+	}
+	return sprintf(buf, "%ld\n", utilise);
+}
+static DEVICE_ATTR_RO(utilisation);
+
+static struct device_attribute *kbase_dev_attr[] = {
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+	&dev_attr_downdifferential,
+	&dev_attr_upthreshold,
+#endif
+	&dev_attr_utilisation
+};
+
+static int kbase_dev_add_attr(struct device *dev)
+{
+	int i, ret;
+	for (i = 0; i <  ARRAY_SIZE(kbase_dev_attr); i++) {
+		if (!kbase_dev_attr[i])
+			continue;
+		ret = device_create_file(dev, kbase_dev_attr[i]);
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_freq == freq) {
+		*target_freq = freq;
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_freq = freq;
+
+	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)freq);
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
+		opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	if (!kbdev->clock)
+		return -ENODEV;
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", DEV_FREQ_GOV_DATA);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	kbase_dev_add_attr(&kbdev->devfreq->dev);
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_ipa_model_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		goto cooling_failed;
+	}
+
+	kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+			kbdev->dev->of_node,
+			kbdev->devfreq,
+			&power_model_ops);
+	if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+		err = PTR_ERR(kbdev->devfreq_cooling);
+		dev_err(kbdev->dev,
+			"Failed to register cooling device (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+
+	kbase_ipa_model_term(kbdev);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100644
index 0000000..c0bf8b1
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100644
index 0000000..dcdf15c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,255 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+	struct kbase_io_access *old_buf;
+	struct kbase_io_access *new_buf;
+	unsigned long flags;
+
+	if (!new_size)
+		goto out_err; /* The new size must not be 0 */
+
+	new_buf = vmalloc(new_size * sizeof(*h->buf));
+	if (!new_buf)
+		goto out_err;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	old_buf = h->buf;
+
+	/* Note: we won't bother with copying the old data over. The dumping
+	 * logic wouldn't work properly as it relies on 'count' both as a
+	 * counter and as an index to the buffer which would have changed with
+	 * the new array. This is a corner case that we don't need to support.
+	 */
+	h->count = 0;
+	h->size = new_size;
+	h->buf = new_buf;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	vfree(old_buf);
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+	h->enabled = false;
+	spin_lock_init(&h->lock);
+	h->count = 0;
+	h->size = 0;
+	h->buf = NULL;
+	if (kbase_io_history_resize(h, n))
+		return -1;
+
+	return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+	vfree(h->buf);
+	h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+		void __iomem const *addr, u32 value, u8 write)
+{
+	struct kbase_io_access *io;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	io = &h->buf[h->count % h->size];
+	io->addr = (uintptr_t)addr | write;
+	io->value = value;
+	++h->count;
+	/* If count overflows, move the index by the buffer size so the entire
+	 * buffer will still be dumped later */
+	if (unlikely(!h->count))
+		h->count = h->size;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+	struct kbase_io_history *const h = &kbdev->io_history;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!unlikely(h->enabled))
+		return;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	dev_err(kbdev->dev, "Register IO History:");
+	iters = (h->size > h->count) ? h->count : h->size;
+	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	writel(value, kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				value, 1);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				val, 0);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100644
index 0000000..5b20445
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100644
index 0000000..d578fd7
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	return 0;
+
+fail_interrupts:
+	kbase_hwaccess_pm_term(kbdev);
+fail_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_release_interrupts(kbdev);
+	kbase_hwaccess_pm_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100644
index 0000000..b395325
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+
+	regdump->stack_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_LO), NULL);
+	regdump->stack_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100644
index 0000000..7ad309e
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,492 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100644
index 0000000..4794672
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100644
index 0000000..e96aeae
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100644
index 0000000..8781561
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100644
index 0000000..8416b80
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,469 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100644
index 0000000..202dcfa
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,378 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = current_as->number;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Assign addr space */
+	kctx->as_nr = as_nr;
+
+	/* If the GPU is currently powered, activate this address space on the
+	 * MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_update(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	/* Book-keeping */
+	js_per_as_data->kctx = kctx;
+	js_per_as_data->as_busy_refcount = 0;
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+/**
+ * release_addr_space - Release an address space
+ * @kbdev: Kbase device
+ * @kctx_as_nr: Address space of context to release
+ * @kctx: Context being released
+ *
+ * Context: kbasep_js_device_data.runpool_mutex must be held
+ *
+ * Release an address space, making it available for being picked again.
+ */
+static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u16 as_bit = (1u << kctx_as_nr);
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* The address space must not already be free */
+	KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
+
+	js_devdata->as_free |= as_bit;
+
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+				&kbdev->js_data.runpool_irq.per_as_data[i];
+
+		if (js_per_as_data->kctx == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
+	if (js_per_as_data->as_busy_refcount != 0) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	/* Release context from address space */
+	js_per_as_data->kctx = NULL;
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+	/* If the GPU is currently powered, de-activate this address space on
+	 * the MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_disable(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	release_addr_space(kbdev, as_nr, kctx);
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+								int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	js_devdata->as_free |= (1 << as_nr);
+}
+
+/**
+ * check_is_runpool_full - check whether the runpool is full for a specified
+ * context
+ * @kbdev: Kbase device
+ * @kctx:  Kbase context
+ *
+ * If kctx == NULL, then this makes the least restrictive check on the
+ * runpool. A specific context that is supplied immediately after could fail
+ * the check, even under the same conditions.
+ *
+ * Therefore, once a context is obtained you \b must re-check it with this
+ * function, since the return value could change to false.
+ *
+ * Context:
+ *   In all cases, the caller must hold kbasep_js_device_data.runpool_mutex.
+ *   When kctx != NULL the caller must hold the
+ *   kbasep_js_kctx_info.ctx.jsctx_mutex.
+ *   When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but
+ *   it doesn't do any harm to do so).
+ *
+ * Return: true if the runpool is full
+ */
+static bool check_is_runpool_full(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool is_runpool_full;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Regardless of whether a context is submitting or not, can't have more
+	 * than there are HW address spaces */
+	is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
+						kbdev->nr_hw_address_spaces);
+
+	if (kctx && !kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		/* Contexts that submit might use less of the address spaces
+		 * available, due to HW workarounds.  In which case, the runpool
+		 * is also full when the number of submitting contexts exceeds
+		 * the number of submittable address spaces.
+		 *
+		 * Both checks must be made: can have nr_user_address_spaces ==
+		 * nr_hw_address spaces, and at the same time can have
+		 * nr_user_contexts_running < nr_all_contexts_running. */
+		is_runpool_full |= (bool)
+					(js_devdata->nr_user_contexts_running >=
+						kbdev->nr_user_address_spaces);
+	}
+
+	return is_runpool_full;
+}
+
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* First try to find a free address space */
+	if (check_is_runpool_full(kbdev, kctx))
+		i = -1;
+	else
+		i = ffs(js_devdata->as_free) - 1;
+
+	if (i >= 0 && i < kbdev->nr_hw_address_spaces) {
+		js_devdata->as_free &= ~(1 << i);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return i;
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* No address space currently free, see if we can release one */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i];
+		as_kctx = js_per_as_data->kctx;
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running */
+		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+			js_per_as_data->as_busy_refcount == 0) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				js_devdata->as_free &= ~(1 << i);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if (kbdev->hwaccess.active_kctx == kctx ||
+	    kctx->as_nr != KBASEP_AS_NR_INVALID ||
+	    as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Invalid parameters to use_ctx()\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100644
index 0000000..08a7400
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100644
index 0000000..7856378
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1534 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+#ifndef CONFIG_MALI_COH_GPU
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+#endif /* CONFIG_MALI_COH_GPU */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649))
+		cfg |= JS_CONFIG_START_MMU;
+
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 * In such cases, we'll try to make a better approximation in the IRQ
+	 * handler (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
+			katom->affinity, cfg);
+	KBASE_TLSTREAM_TL_RET_CTX_LPU(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_LPU(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string,
+						sizeof(js_string)),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the %KBASE_JS_IRQ_THROTTLE_TIME_US and
+ * the time the job was submitted, to work out the best estimate (which might
+ * still result in an over-estimate to the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t new_timestamp;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		/* Account for any IRQ Throttle time - makes an overestimate of
+		 * the time spent by the job */
+		new_timestamp = ktime_sub_ns(end_timestamp,
+					KBASE_JS_IRQ_THROTTLE_TIME_US * 1000);
+		timestamp_diff = ktime_sub(new_timestamp,
+							katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = new_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
+ * tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Make a tracepoint call to the instrumentation module informing that
+ * softstop happened on given lpu (job slot).
+ */
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
+					int js)
+{
+	KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
+		&kbdev->gpu_props.props.raw_props.js_features[js]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0,
+					sizeof(kbdev->slot_submit_count_irq));
+
+	/* write irq throttle register, this will prevent irqs from occurring
+	 * until the given number of gpu clock cycles have passed */
+	{
+		int irq_throttle_cycles =
+				atomic_read(&kbdev->irq_throttle_cycles);
+
+		kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE),
+						irq_throttle_cycles, NULL);
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbasep_trace_tl_event_lpu_softstop(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+						base_jd_core_req core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+	bool stop_sent = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if (katom->kctx != kctx)
+			continue;
+
+		if (katom->sched_priority > priority) {
+			if (!stop_sent)
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(
+						target_katom);
+
+			kbase_job_slot_softstop(kbdev, js, katom);
+			stop_sent = true;
+		}
+	}
+}
+
+struct zap_reset_data {
+	/* The stages are:
+	 * 1. The timer has never been called
+	 * 2. The zap has timed out, all slots are soft-stopped - the GPU reset
+	 *    will happen. The GPU has been reset when
+	 *    kbdev->hwaccess.backend.reset_waitq is signalled
+	 *
+	 * (-1 - The timer has been cancelled)
+	 */
+	int stage;
+	struct kbase_device *kbdev;
+	struct hrtimer timer;
+	spinlock_t lock; /* protects updates to stage member */
+};
+
+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer)
+{
+	struct zap_reset_data *reset_data = container_of(timer,
+						struct zap_reset_data, timer);
+	struct kbase_device *kbdev = reset_data->kbdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&reset_data->lock, flags);
+
+	if (reset_data->stage == -1)
+		goto out;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+								ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	reset_data->stage = 2;
+
+ out:
+	spin_unlock_irqrestore(&reset_data->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct zap_reset_data reset_data;
+	unsigned long flags;
+
+	hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	reset_data.timer.function = zap_timeout_callback;
+
+	spin_lock_init(&reset_data.lock);
+
+	reset_data.kbdev = kbdev;
+	reset_data.stage = 1;
+
+	hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for all jobs to finish, and for the context to be not-scheduled
+	 * (due to kbase_job_zap_context(), we also guarentee it's not in the JS
+	 * policy queue either */
+	wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
+	wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+		   !kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	spin_lock_irqsave(&reset_data.lock, flags);
+	if (reset_data.stage == 1) {
+		/* The timer hasn't run yet - so cancel it */
+		reset_data.stage = -1;
+	}
+	spin_unlock_irqrestore(&reset_data.lock, flags);
+
+	hrtimer_cancel(&reset_data.timer);
+
+	if (reset_data.stage == 2) {
+		/* The reset has already started.
+		 * Wait for the reset to complete
+		 */
+		wait_event(kbdev->hwaccess.backend.reset_wait,
+				atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+						== KBASE_RESET_GPU_NOT_PENDING);
+	}
+	destroy_hrtimer_on_stack(&reset_data.timer);
+
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	KBASE_DEBUG_ASSERT(0 ==
+		object_is_on_stack(&kbdev->hwaccess.backend.reset_work));
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * hwaccess_lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	kbase_io_history_dump(kbdev);
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	bool try_schedule = false;
+	bool silent = false;
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	spin_lock(&kbdev->hwaccess_lock);
+	spin_lock(&kbdev->mmu_mask_change);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts_nolock(kbdev);
+
+	spin_unlock(&kbdev->mmu_mask_change);
+	spin_unlock(&kbdev->hwaccess_lock);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+		/* Ensure that L2 is not transitioning when we send the reset
+		 * command */
+		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2))
+			;
+
+		WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	kbase_pm_context_idle(kbdev);
+
+	/* Release vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+
+void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+}
+
+bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100644
index 0000000..1f382b3
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string,
+						size_t js_size)
+{
+	snprintf(js_string, js_size, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_cacheclean - Cause a GPU cache clean & flush
+ * @kbdev: Device pointer
+ *
+ * Caller must not be in IRQ context
+ */
+void kbase_gpu_cacheclean(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100644
index 0000000..d4ed3d4
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1931 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+/**
+ * check_secure_atom - Check if the given atom is in the given secure state and
+ *                     has a ringbuffer state of at least
+ *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @katom:  Atom pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if atom is in the given state, false otherwise
+ */
+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
+{
+	if (katom->gpu_rb_state >=
+			KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+			((kbase_jd_katom_is_protected(katom) && secure) ||
+			(!kbase_jd_katom_is_protected(katom) && !secure)))
+		return true;
+
+	return false;
+}
+
+/**
+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
+ *                                secure state in the ringbuffers of at least
+ *                                state
+ *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
+ * @kbdev:  Device pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if any atoms are in the given state, false otherwise
+ */
+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
+		bool secure)
+{
+	int js, i;
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, i);
+
+			if (katom) {
+				if (check_secure_atom(katom, secure))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * hwaccess_lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+		if (katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
+				katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK)
+			kbdev->protected_mode_transition = false;
+
+		if (kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) {
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+#ifdef CONFIG_DEVFREQ_THERMAL
+			/* Go back to configured model for IPA */
+			kbase_ipa_model_use_configured_locked(kbdev);
+#endif
+		}
+
+
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+/**
+ * other_slots_busy - Determine if any job slots other than @js are currently
+ *                    running atoms
+ * @kbdev: Device pointer
+ * @js:    Job slot
+ *
+ * Return: true if any slots other than @js are busy, false otherwise
+ */
+static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+{
+	int slot;
+
+	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
+		if (slot == js)
+			continue;
+
+		if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot,
+				KBASE_ATOM_GPU_RB_SUBMITTED))
+			return true;
+	}
+
+	return false;
+}
+
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
+{
+	return kbdev->protected_mode;
+}
+
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
+
+	/*
+	 * When entering into protected mode, we must ensure that the
+	 * GPU is not operating in coherent mode as well. This is to
+	 * ensure that no protected memory can be leaked.
+	 */
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enter(kbdev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
+		else
+			kbdev->protected_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
+
+	if (!kbdev->protected_ops)
+		return -EINVAL;
+
+	kbase_reset_gpu_silent(kbdev);
+
+	return 0;
+}
+
+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	switch (katom[idx]->protected_state.enter) {
+	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		kbdev->protected_mode_transition = true;
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_VINSTR;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
+		if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+			/*
+			 * We can't switch now because
+			 * the vinstr core state switch
+			 * is not done yet.
+			 */
+			return -EAGAIN;
+		}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+		/* Use generic model for IPA in protected mode */
+		kbase_ipa_model_use_fallback_locked(kbdev);
+#endif
+
+		/* Once reaching this point GPU must be
+		 * switched to protected mode or vinstr
+		 * re-enabled. */
+
+		/*
+		 * Not in correct mode, begin protected mode switch.
+		 * Entering protected mode requires us to power down the L2,
+		 * and drop out of fully coherent mode.
+		 */
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
+		/* Avoid unnecessary waiting on non-ACE platforms. */
+		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+				/*
+				* The L2 is still powered, wait for all the users to
+				* finish with it before doing the actual reset.
+				*/
+				return -EAGAIN;
+			}
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+
+		/* No jobs running, so we can switch GPU mode right now. */
+		err = kbase_gpu_protected_mode_enter(kbdev);
+
+		/*
+		 * Regardless of result, we are no longer transitioning
+		 * the GPU.
+		 */
+		kbdev->protected_mode_transition = false;
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev);
+		if (err) {
+			/*
+			 * Failed to switch into protected mode, resume
+			 * vinstr core and fail atom.
+			 */
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order. */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+#ifdef CONFIG_DEVFREQ_THERMAL
+			/* Go back to configured model for IPA */
+			kbase_ipa_model_use_configured_locked(kbdev);
+#endif
+
+			return -EINVAL;
+		}
+
+		/* Protected mode sanity checks. */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) ==
+			kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]),
+			kbase_gpu_in_protected_mode(kbdev));
+		katom[idx]->gpu_rb_state =
+			KBASE_ATOM_GPU_RB_READY;
+	}
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+
+	switch (katom[idx]->protected_state.exit) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		kbdev->protected_mode_transition = true;
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+
+		if (err) {
+			kbdev->protected_mode_transition = false;
+
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+#ifdef CONFIG_DEVFREQ_THERMAL
+			/* Use generic model for IPA in protected mode */
+			kbase_ipa_model_use_fallback_locked(kbdev);
+#endif
+
+			return -EINVAL;
+		}
+
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		if (kbase_reset_gpu_active(kbdev))
+			return -EAGAIN;
+
+		kbdev->protected_mode_transition = false;
+		kbdev->protected_mode = false;
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
+		/* protected mode sanity checks */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
+		KBASE_DEBUG_ASSERT_MSG(
+			(kbase_jd_katom_is_protected(katom[idx]) && js == 0) ||
+			!kbase_jd_katom_is_protected(katom[idx]),
+			"Protected atom on JS%d not supported", js);
+	}
+
+	return 0;
+}
+
+void kbase_backend_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+			int ret;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+				if (kbase_gpu_check_secure_atoms(kbdev,
+						!kbase_jd_katom_is_protected(
+						katom[idx])))
+					break;
+
+				if ((idx == 1) && (kbase_jd_katom_is_protected(
+								katom[0]) !=
+						kbase_jd_katom_is_protected(
+								katom[1])))
+					break;
+
+				if (kbdev->protected_mode_transition)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+
+				if (!kbase_gpu_in_protected_mode(kbdev) &&
+					kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition into protected mode. */
+					ret = kbase_jm_enter_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				} else if (kbase_gpu_in_protected_mode(kbdev) &&
+					!kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition out of protected mode. */
+					ret = kbase_jm_exit_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				}
+				katom[idx]->protected_state.exit =
+						KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+				/* Atom needs no protected mode transition. */
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+
+				if (idx == 1) {
+					/* Only submit if head atom or previous
+					 * atom already submitted */
+					if ((katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+						katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+						break;
+
+					/* If intra-slot serialization in use
+					 * then don't submit atom to NEXT slot
+					 */
+					if (kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTRA_SLOT)
+						break;
+				}
+
+				/* If inter-slot serialization in use then don't
+				 * submit atom if any other slots are in use */
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTER_SLOT) &&
+						other_slots_busy(kbdev, js))
+					break;
+
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_RESET) &&
+						kbase_reset_gpu_active(kbdev))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_backend_slot_update(kbdev);
+}
+
+#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
+	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		HAS_DEP(next_katom) &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as
+					[katom->kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[katom->slot_nr]);
+
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req &
+					BASE_JD_REQ_SKIP_CACHE_END)) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		/* When a job chain fails, on a T60x or when
+		 * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not
+		 * flushed. To prevent future evictions causing possible memory
+		 * corruption we need to flush the cache manually before any
+		 * affected memory gets reused. */
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx &&
+				next_katom->sched_priority ==
+				katom->sched_priority) {
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+					HAS_DEP(katom_idx0) &&
+					katom_idx0->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+						katom_idx1->kctx == katom->kctx
+						&& HAS_DEP(katom_idx1) &&
+						katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					HAS_DEP(katom_idx1) &&
+					katom_idx1->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)
+		kbase_reset_gpu_silent(kbdev);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		katom = kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		katom = kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	if (katom) {
+		/* Cross-slot dependency has now become runnable. Try to submit
+		 * it. */
+
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
+	}
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_backend_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int atom_idx = 0;
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, atom_idx);
+			bool keep_in_jm_rb = false;
+
+			if (!katom)
+				break;
+			if (katom->protected_state.exit ==
+					KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)
+				KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
+			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+				katom->affinity = 0;
+				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				/* As the atom was not removed, increment the
+				 * index so that we read the correct atom in the
+				 * next iteration. */
+				atom_idx++;
+				continue;
+			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
+		}
+	}
+
+	kbdev->protected_mode_transition = false;
+	kbdev->protected_mode = false;
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	katom->kctx->blocked_js[js][katom->sched_priority] = true;
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+	int prio_idx0 = 0, prio_idx1 = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom_idx0)
+		prio_idx0 = katom_idx0->sched_priority;
+	if (katom_idx1)
+		prio_idx1 = katom_idx1->sched_priority;
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+				(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+				(!kctx || katom_idx1->kctx == kctx) &&
+				prio_idx0 == prio_idx1);
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				katom_idx1->kctx->blocked_js[js][prio_idx1] =
+						true;
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_job_evicted(katom_idx1);
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_job_evicted(katom_idx1);
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->need_cache_flush_cores_retained) {
+		unsigned long flags;
+
+		kbase_gpu_cacheclean(kbdev);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	kbase_backend_cacheclean(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->affinity = 0;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
+			coreref_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100644
index 0000000..1e0e05a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:         Device pointer
+ * @js:            Job slot to evict from
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100644
index 0000000..54d8ddd
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+#include "mali_kbase_hw.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		 /* If the hardware supports XAFFINITY then we'll only enable
+		  * the tiler (which is the default so this is a no-op),
+		  * otherwise enable shader core 0. */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = 1;
+		else
+			*affinity = 0;
+
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required for hardware without XAFFINITY
+	 * support (notes above) */
+	if (core_req & BASE_JD_REQ_T) {
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = *affinity | 1;
+	}
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100644
index 0000000..35d9781
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold hwaccess_lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100644
index 0000000..a8c1af2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,356 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is
+ * enabled for a particular level is down to the integrator. However this is
+ * being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->ticks = soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * hwaccess_lock, so it's OK to observe
+					 * the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CINSTR_DUMPING_ENABLED */
+				/* NOTE: During CINSTR_DUMPING_ENABLED, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CINSTR_DUMPING_ENABLED, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CINSTR_DUMPING_ENABLED */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100644
index 0000000..3f53779
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100644
index 0000000..4e5a74f
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,403 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+static void validate_protected_page_fault(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	/* GPUs which support (native) protected mode shall not report page
+	 * fault addresses unless it has protected debug mode and protected
+	 * debug mode is turned on */
+	u32 protected_debug_mode = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		return;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		protected_debug_mode = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS),
+				kctx) & GPU_DBGEN;
+	}
+
+	if (!protected_debug_mode) {
+		/* fault_addr should never be reported in protected mode.
+		 * However, we just continue by printing an error message */
+		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+	}
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* Mark the fault protected or not */
+		as->protected_mode = kbdev->protected_mode;
+
+		if (kbdev->protected_mode && as->fault_addr)
+		{
+			/* check if address reporting is allowed */
+			validate_protected_page_fault(kbdev, kctx);
+		}
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		as->fault_extra_addr = kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+				kctx);
+		as->fault_extra_addr <<= 32;
+		as->fault_extra_addr |= kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+				kctx);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+	/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+	/* Clear PTW_MEMATTR bits */
+	transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+	/* Enable correct PTW_MEMATTR bits */
+	transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+		/* Clear PTW_SH bits */
+		transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+		/* Enable correct PTW_SH bits */
+		transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+			transcfg, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+			(current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx);
+
+#else /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100644
index 0000000..c02253c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100644
index 0000000..0614348
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100644
index 0000000..f9d244b
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100644
index 0000000..146fd48
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,482 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+
+#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+	struct kbase_pm_callback_conf *callbacks;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+			kbase_pm_gpu_poweroff_wait_wq);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+	} else {
+		kbdev->pm.backend.callback_power_on = NULL;
+		kbdev->pm.backend.callback_power_off = NULL;
+		kbdev->pm.backend.callback_power_suspend = NULL;
+		kbdev->pm.backend.callback_power_resume = NULL;
+		kbdev->pm.callback_power_runtime_init = NULL;
+		kbdev->pm.callback_power_runtime_term = NULL;
+		kbdev->pm.backend.callback_power_runtime_on = NULL;
+		kbdev->pm.backend.callback_power_runtime_off = NULL;
+		kbdev->pm.backend.callback_power_runtime_idle = NULL;
+	}
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+#if !PLATFORM_POWER_DOWN_ONLY
+	/* Wait for power transitions to complete. We do this with no locks held
+	 * so that we don't deadlock with any pending workqueues */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	kbase_pm_check_transitions_sync(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+#if PLATFORM_POWER_DOWN_ONLY
+	if (kbdev->pm.backend.gpu_powered) {
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/* If L2 cache is powered then we must flush it before
+			 * we power off the GPU. Normally this would have been
+			 * handled when the L2 was powered off. */
+			kbase_gpu_cacheclean(kbdev);
+		}
+	}
+#endif /* PLATFORM_POWER_DOWN_ONLY */
+
+	if (!backend->poweron_required) {
+#if !PLATFORM_POWER_DOWN_ONLY
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		WARN_ON(kbdev->l2_available_bitmap ||
+				kbdev->shader_available_bitmap ||
+				kbdev->tiler_available_bitmap);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+		/* Consume any change-state events */
+		kbase_timeline_pm_check_handle_event(kbdev,
+					KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		/* Disable interrupts and turn the clock off */
+		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+			/*
+			 * Page/bus faults are pending, must drop locks to
+			 * process.  Interrupts are disabled so no more faults
+			 * should be generated at this point.
+			 */
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			kbase_flush_mmu_wqs(kbdev);
+			mutex_lock(&js_devdata->runpool_mutex);
+			mutex_lock(&kbdev->pm.lock);
+
+			/* Turn off clock now that fault have been handled. We
+			 * dropped locks so poweron_required may have changed -
+			 * power back on if this is the case.*/
+			if (backend->poweron_required)
+				kbase_pm_clock_on(kbdev, false);
+			else
+				WARN_ON(!kbase_pm_clock_off(kbdev,
+						backend->poweroff_is_suspend));
+		}
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	backend->poweroff_wait_in_progress = false;
+	if (backend->poweron_required) {
+		backend->poweron_required = false;
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
+		/* Force all cores off */
+		kbdev->pm.backend.desired_shader_state = 0;
+		kbdev->pm.backend.desired_tiler_state = 0;
+
+		/* Force all cores to be unavailable, in the situation where
+		 * transitions are in progress for some cores but not others,
+		 * and kbase_pm_check_transitions_nolock can not immediately
+		 * power off the cores */
+		kbdev->shader_available_bitmap = 0;
+		kbdev->tiler_available_bitmap = 0;
+		kbdev->l2_available_bitmap = 0;
+
+		kbdev->pm.backend.poweroff_wait_in_progress = true;
+		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/*Kick off wq here. Callers will have to wait*/
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
+	} else {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+	wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			is_poweroff_in_progress(kbdev));
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_init_core_use_bitmaps(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, true);
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100644
index 0000000..9cecd4e
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,180 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_demand_policy_ops,
+	&kbase_pm_ca_random_policy_ops,
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100644
index 0000000..ee9e751
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
new file mode 100644
index 0000000..19f1f73
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
@@ -0,0 +1,246 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation.
+ *
+ * This policy periodically selects a new core mask at demand. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/printk.h>
+
+#ifndef MALI_CA_TIMER
+static struct workqueue_struct *mali_ca_wq = NULL;
+#endif
+static int num_shader_cores_to_be_enabled = 0;
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+#ifndef MALI_CA_TIMER
+	struct kbase_device *kbdev = (struct kbase_device *)priv;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+#endif
+
+    num_shader_cores_to_be_enabled = cores -1;
+    printk("pp num=%d\n", num_shader_cores_to_be_enabled);
+    if (num_shader_cores_to_be_enabled < 1)
+        num_shader_cores_to_be_enabled = 1;
+
+#ifndef MALI_CA_TIMER
+    queue_work(mali_ca_wq, &data->wq_work);
+#endif
+
+    return 0;
+}
+
+#ifdef MALI_CA_TIMER
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void demand_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+#if 0
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+#endif
+    data->cores_desired = num_shader_cores_to_be_enabled;
+
+	if (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+            dev_info(kbdev->dev, "error, ca demand policy : new core mask=%llX\n",
+				data->cores_desired);
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "ca demand policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(demand_timer_callback);
+#else
+static void do_ca_scaling(struct work_struct *work)
+{
+    //unsigned long flags;
+	struct kbase_device *kbdev = = container_of(work,
+            struct kbasep_pm_ca_policy_demand, wq_work);
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+    data->cores_desired =  num_shader_cores_to_be_enabled;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+
+}
+#endif
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+#ifdef MALI_CA_TIMER
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = demand_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+#else
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_ca_wq = alloc_workqueue("gpu_ca_wq", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_ca_wq = create_workqueue("gpu_ca_wq");
+#endif
+	INIT_WORK(&data->wq_work, do_ca_scaling);
+    if (mali_ca_wq == NULL) printk("Unable to create gpu scaling workqueue\n");
+#endif
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+#ifdef MALI_CA_TIMER
+	del_timer(&data->core_change_timer);
+#else
+    if (mali_ca_wq) {
+        flush_workqueue(mali_ca_wq);
+        destroy_workqueue(mali_ca_wq);
+        mali_ca_wq = NULL;
+    }
+#endif
+}
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.demand.cores_enabled;
+}
+
+static void demand_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the demand core availability
+ * policy.
+ *
+ * This is the static structure that defines the demand core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_demand_policy_ops);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
new file mode 100644
index 0000000..a3dfe2a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEMAND_H
+#define MALI_KBASE_PM_CA_DEMAND_H
+#include <linux/workqueue.h>
+/**
+ * struct kbasep_pm_ca_policy_demand - Private structure for demand ca policy
+ *
+ * This contains data that is private to the demand core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+#define MALI_CA_TIMER 1
+struct kbasep_pm_ca_policy_demand {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+#ifdef MALI_CA_TIMER
+	struct timer_list core_change_timer;
+#else
+	struct work_struct wq_work;
+#endif
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops;
+extern int mali_perf_set_num_pp_cores(int cores);
+
+#endif /* MALI_KBASE_PM_CA_DEMAND_H */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100644
index 0000000..1db6586
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100644
index 0000000..a763155
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
new file mode 100644
index 0000000..0d1b220
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation.
+ *
+ * This policy periodically selects a new core mask at random. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void random_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(random_timer_callback);
+
+static void random_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = random_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+}
+
+static void random_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	del_timer(&data->core_change_timer);
+}
+
+static u64 random_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.random.cores_enabled;
+}
+
+static void random_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the random core availability
+ * policy.
+ *
+ * This is the static structure that defines the random core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops = {
+	"random",			/* name */
+	random_init,			/* init */
+	random_term,			/* term */
+	random_get_core_mask,		/* get_core_mask */
+	random_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_RANDOM,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_random_policy_ops);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
new file mode 100644
index 0000000..a8c9b15
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_RANDOM_H
+#define MALI_KBASE_PM_CA_RANDOM_H
+
+/**
+ * struct kbasep_pm_ca_policy_random - Private structure for random ca policy
+ *
+ * This contains data that is private to the random core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+struct kbasep_pm_ca_policy_random {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+	struct timer_list core_change_timer;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_RANDOM_H */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100644
index 0000000..f891fa2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100644
index 0000000..749d305
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100644
index 0000000..0ddf7b7
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,521 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#include "mali_kbase_pm_ca_demand.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ * @KBASE_PM_CORE_STACK: Core stacks
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO,
+	KBASE_PM_CORE_STACK = STACK_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
+ *                                 management framework.
+ *
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+	struct kbasep_pm_ca_policy_demand demand;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @powering_on_stack_state: A bit mask indicating which core stacks are
+ *                           currently in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        and/or timers are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 powering_on_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_data metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	bool poweroff_wait_in_progress;
+	bool poweron_required;
+	bool poweroff_is_suspend;
+
+	struct workqueue_struct *gpu_poweroff_wait_wq;
+	struct work_struct gpu_poweroff_wait_work;
+
+	wait_queue_head_t poweroff_wait;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_CA_POLICY_ID_RANDOM,
+	KBASE_PM_CA_POLICY_ID_DEMAND,
+#endif
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * Maximum length of a CA policy names
+ */
+#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1];
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100644
index 0000000..81322fd
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100644
index 0000000..c0c84b6
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100644
index 0000000..1902c32
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1698 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/* Special value to indicate that the JM_CONFIG reg isn't currently used. */
+#define KBASE_JM_CONFIG_UNUSED (1<<31)
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+#ifdef CONFIG_MALI_CORESTACK
+	if (core_type == KBASE_PM_CORE_STACK) {
+		switch (action) {
+		case ACTION_PRESENT:
+			return STACK_PRESENT_LO;
+		case ACTION_READY:
+			return STACK_READY_LO;
+		case ACTION_PWRON:
+			return STACK_PWRON_LO;
+		case ACTION_PWROFF:
+			return STACK_PWROFF_LO;
+		case ACTION_PWRTRANS:
+			return STACK_PWRTRANS_LO;
+		default:
+			BUG();
+		}
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		KBASE_TLSTREAM_AUX_PM_STATE(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and
+ * kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev)
+{
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+#ifdef CONFIG_MALI_CORESTACK
+	case KBASE_PM_CORE_STACK:
+		return kbdev->gpu_props.props.raw_props.stack_present;
+#endif /* CONFIG_MALI_CORESTACK */
+	default:
+		break;
+	}
+	KBASE_DEBUG_ASSERT(0);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	if (trans) /* Do not progress if any cores are transitioning */
+		return false;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+#if !PLATFORM_POWER_DOWN_ONLY
+	kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+#endif
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ * @tilers_powered: The bit mask of tilers that are desired to be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered,
+		u64 tilers_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	/* Power up the required L2(s) for the tiler */
+	if (tilers_powered)
+		desired |= 1;
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+#ifdef CONFIG_MALI_CORESTACK
+u64 kbase_pm_core_stack_mask(u64 cores)
+{
+	u64 stack_mask = 0;
+	size_t const MAX_CORE_ID = 31;
+	size_t const NUM_CORES_PER_STACK = 4;
+	size_t i;
+
+	for (i = 0; i <= MAX_CORE_ID; ++i) {
+		if (test_bit(i, (unsigned long *)&cores)) {
+			/* Every core which ID >= 16 is filled to stacks 4-7
+			 * instead of 0-3 */
+			size_t const stack_num = (i > 16) ?
+				(i % NUM_CORES_PER_STACK) + 4 :
+				(i % NUM_CORES_PER_STACK);
+			set_bit(stack_num, (unsigned long *)&stack_mask);
+		}
+	}
+
+	return stack_mask;
+}
+#endif /* CONFIG_MALI_CORESTACK */
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 desired_stack_state;
+	u64 stacks_powered;
+#endif /* CONFIG_MALI_CORESTACK */
+	u64 cores_powered;
+	u64 tilers_powered;
+	u64 tiler_available_bitmap;
+	u64 tiler_transitioning_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+	u64 l2_inuse_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+#ifdef CONFIG_MALI_CORESTACK
+	/* Work out which core stacks want to be powered */
+	desired_stack_state = kbase_pm_core_stack_mask(cores_powered);
+	stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) |
+		desired_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* Work out which tilers want to be powered */
+	tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_TILER);
+	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered, tilers_powered);
+
+	l2_inuse_bitmap = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered | shader_transitioning_bitmap,
+			tilers_powered | tiler_transitioning_bitmap);
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (stacks_powered)
+		desired_l2_state |= 1;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state)
+		desired_l2_state |= 1;
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+			KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap,
+			&l2_available_bitmap,
+			&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_STACK, desired_stack_state, 0,
+				&kbdev->stack_available_bitmap,
+				&kbdev->pm.backend.powering_on_stack_state);
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+#endif
+
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_STACK,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO),
+					NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO), NULL));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO), NULL));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	} else {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_disable_interrupts_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+	/* Limit read ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+	kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+	/* Limit write ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+	kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	kbdev->hw_quirks_jm = JM_CONFIG_UNUSED;
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm = (jm_values[0] ?
+				JM_TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ?
+				JM_CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ?
+				JM_JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JM_JOB_THROTTLE_LIMIT_SHIFT);
+
+	} else if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+			   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+					   GPU_ID2_PRODUCT_TMIX)) {
+		/* Only for tMIx */
+		u32 coherency_features;
+
+		coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+		 * documented for tMIx so force correct value here.
+		 */
+		if (coherency_features ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
+			kbdev->hw_quirks_jm =
+				(COHERENCY_ACE_LITE | COHERENCY_ACE) <<
+				JM_FORCE_COHERENCY_FEATURES_SHIFT;
+		}
+	}
+
+#ifdef CONFIG_MALI_CORESTACK
+#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
+	kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+#endif /* CONFIG_MALI_CORESTACK */
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	if (kbdev->hw_quirks_sc)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+				kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+
+
+	if (kbdev->hw_quirks_jm != JM_CONFIG_UNUSED)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+				kbdev->hw_quirks_jm, NULL);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if (kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+static int kbase_pm_reset_do_normal(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
+  //dev_err(kbdev->dev, "%s,  %d \n", __FILE__, __LINE__);
+  core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+  l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+  tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+  //printk("core_ready=%ld, l2_ready=%ld, tiler_ready=%ld\n", core_ready, l2_ready, tiler_ready);
+  if (core_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+  if (l2_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+  if (tiler_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+  /* do external reset */
+
+//JOHNT
+    if (reg_base_hiubus) {
+        value = Rd(RESET0_MASK);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_MASK, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+    ///////////////
+        value = Rd(RESET2_MASK);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_MASK, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value | ((0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value | ((0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+    } else {
+		dev_err(kbdev->dev, "reg_base_hiubus is null !!!\n");
+    }
+  //writel(..../*e.g. PWR_RESET, EXTERNAL_PWR_REGISTER*/);
+
+  /* any other action necessary, like a simple delay */
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	return -EINVAL;
+}
+
+static int kbase_pm_reset_do_protected(struct kbase_device *kbdev)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
+
+	return kbdev->protected_ops->protected_mode_reset(kbdev);
+}
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+	bool resume_vinstr = false;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support &&
+			kbdev->protected_ops->protected_mode_reset)
+		err = kbase_pm_reset_do_protected(kbdev);
+	else
+		err = kbase_pm_reset_do_normal(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->protected_mode)
+		resume_vinstr = true;
+	kbdev->protected_mode = false;
+#ifdef CONFIG_DEVFREQ_THERMAL
+	kbase_ipa_model_use_configured_locked(kbdev);
+#endif
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	if (err)
+		goto exit;
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+		kbase_pm_release_l2_caches(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+exit:
+	/* If GPU is leaving protected mode resume vinstr operation. */
+	if (kbdev->vinstr_ctx && resume_vinstr)
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	return err;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100644
index 0000000..b0d1741
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,591 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action);
+
+/**
+ * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required
+ *                                   and used cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * Access to registers should be done using kbase_os_reg_read()/write() at this
+ * stage, not kbase_reg_read()/write().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#ifdef CONFIG_PM_DEVFREQ
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold hwaccess_lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100644
index 0000000..7613e1d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+					int *util_gl_share,
+					int util_cl_share[2],
+					ktime_t now)
+{
+	int utilisation;
+	int busy;
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	return utilisation;
+}
+
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	ktime_t now;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	now = ktime_get();
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+			util_cl_share, now);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				if (!WARN_ON(device_nr >= 2))
+					kbdev->pm.backend.metrics.
+						active_cl_ctx[device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				if (!WARN_ON(js >= 2))
+					kbdev->pm.backend.metrics.
+						active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100644
index 0000000..6be2226
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,1002 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+	&kbase_pm_coarse_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		kbase_pm_do_poweroff(kbdev, false);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		if (pm->backend.poweroff_wait_in_progress) {
+			pm->backend.poweron_required = true;
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbase_pm_do_poweron(kbdev, false);
+		}
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				kbase_pm_do_poweroff(kbdev, false);
+			}
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		return;
+
+	if (kbdev->protected_mode_transition &&	!kbdev->shader_needed_bitmap &&
+			!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt) {
+		/* We are trying to change in/out of protected mode - force all
+		 * cores off so that the L2 powers down */
+		desired_bitmap = 0;
+		desired_tiler_bitmap = 0;
+	} else {
+		desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+		desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+			desired_tiler_bitmap = 1;
+		else
+			desired_tiler_bitmap = 0;
+
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+			/* Unless XAFFINITY is supported, enable core 0 if tiler
+			 * required, regardless of core availability */
+			if (kbdev->tiler_needed_cnt > 0 ||
+					kbdev->tiler_inuse_cnt > 0)
+				desired_bitmap |= 1;
+		}
+	}
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks &&
+					!kbdev->protected_mode_transition)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks &&
+				!kbdev->protected_mode_transition)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+int set_policy_by_name(struct kbase_device *kbdev, const char *name)
+{
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, name)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		printk("power_policy: policy not found\n");
+		return -EINVAL;
+	}
+	trace_printk("policy name=%s\n", name);
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(set_policy_by_name);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		return (kbdev->pm.backend.poweroff_wait_in_progress ||
+				kbdev->pm.backend.pm_current_policy == NULL) ?
+				KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->l2_users_count++;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100644
index 0000000..611a90e
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100644
index 0000000..d992989
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,103 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100644
index 0000000..35088ab
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100644
index 0000000..35ff2f1
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100644
index 0000000..7ae05c2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100644
index 0000000..159b993
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild
new file mode 100644
index 0000000..7551f9e
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+IPA = \
+	ipa/mali_kbase_ipa_generic.c \
+	ipa/mali_kbase_ipa.c
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
new file mode 100644
index 0000000..dd0f1d6
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
@@ -0,0 +1,327 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/sysfs.h>
+#include <linux/thermal.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/of.h>
+#include "mali_kbase.h"
+
+int kbase_ipa_model_ops_register(struct kbase_device *kbdev,
+			     struct kbase_ipa_model_ops *new_model_ops)
+{
+	struct kbase_ipa_model *new_model;
+
+	new_model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL);
+	if (!new_model)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&new_model->link);
+	new_model->kbdev = kbdev;
+	new_model->ops = new_model_ops;
+
+	list_add(&new_model->link, &kbdev->ipa_power_models);
+
+	return 0;
+}
+
+static int kbase_ipa_internal_models_append_list(struct kbase_device *kbdev)
+{
+	int err;
+
+	/* Always have the generic IPA */
+	err = kbase_ipa_model_ops_register(kbdev, &kbase_generic_ipa_model_ops);
+
+	if (err)
+		return err;
+
+	return err;
+}
+
+struct kbase_ipa_model *kbase_ipa_get_model(struct kbase_device *kbdev,
+					    const char *name)
+{
+	/* Search registered power models first */
+	struct list_head *it;
+
+	list_for_each(it, &kbdev->ipa_power_models) {
+		struct kbase_ipa_model *model =
+				list_entry(it,
+					   struct kbase_ipa_model,
+					   link);
+		if (strcmp(model->ops->name, name) == 0)
+			return model;
+	}
+
+	return NULL;
+}
+
+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	if (kbdev->ipa_current_model != kbdev->ipa_fallback_model)
+		kbdev->ipa_current_model = kbdev->ipa_fallback_model;
+}
+
+
+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	if (kbdev->ipa_current_model != kbdev->ipa_configured_model)
+		kbdev->ipa_current_model = kbdev->ipa_configured_model;
+}
+
+const char *kbase_ipa_model_name_from_id(u32 gpu_id)
+{
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(prod_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			return "generic_ipa_model";
+		default:
+			return "generic_ipa_model";
+		}
+	}
+
+	return "generic_ipa_model";
+}
+
+int kbase_ipa_model_init(struct kbase_device *kbdev)
+{
+
+	const char *model_name;
+	struct kbase_ipa_model *default_model = NULL;
+	struct kbase_ipa_model *model = NULL;
+	int err;
+
+	/* Add default ones to the list */
+	err = kbase_ipa_internal_models_append_list(kbdev);
+
+	default_model = kbase_ipa_get_model(kbdev, "generic_ipa_model");
+	/* The generic_ipa_model 'MUST' always be present.*/
+	if (!default_model) {
+		err = -EINVAL;
+		goto end;
+	}
+
+	err = default_model->ops->init(default_model);
+	if (err) {
+		dev_err(kbdev->dev,
+			"failed to init default generic power model err %d\n",
+			err);
+		goto end;
+	}
+	kbdev->ipa_fallback_model = default_model;
+	err = of_property_read_string(kbdev->dev->of_node,
+				      "ipa-model",
+				      &model_name);
+	if (err) {
+		/* Attempt to load a match from GPU-ID */
+		u32 gpu_id;
+
+		gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		model_name = kbase_ipa_model_name_from_id(gpu_id);
+	}
+
+	if (strcmp("generic_ipa_model", model_name) != 0) {
+
+		model = kbase_ipa_get_model(kbdev, model_name);
+		kbdev->ipa_configured_model = model;
+
+		if (model) {
+			kbdev->ipa_current_model = model;
+			err = model->ops->init(model);
+			goto end;
+		}
+		dev_err(kbdev->dev,
+			"Failed to find IPA model matching: %s\n",
+			model_name);
+		err = -EINVAL;
+	} else {
+
+		kbdev->ipa_current_model = default_model;
+		kbdev->ipa_configured_model = default_model;
+		dev_dbg(kbdev->dev, "Using generic-ipa-model by default\n");
+		err = 0;
+	}
+end:
+	if (err)
+		kbase_ipa_model_term(kbdev);
+
+	return err;
+}
+
+void kbase_ipa_model_term(struct kbase_device *kbdev)
+{
+	/* Clean up the models */
+	struct kbase_ipa_model *model = kbdev->ipa_configured_model;
+
+	if (model) {
+		if (model->ops->term)
+			model->ops->term(model);
+		/* Always terminate the default model,
+		 * unless it was the configured model */
+		if (model != kbdev->ipa_fallback_model)
+			kbdev->ipa_fallback_model->ops->term(model);
+	}
+	/* Clean up the list */
+	if (!list_empty(&kbdev->ipa_power_models)) {
+		struct kbase_ipa_model *model_p, *model_n;
+
+		list_for_each_entry_safe(model_p, model_n, &kbdev->ipa_power_models, link) {
+			list_del(&model_p->link);
+			kfree(model_p);
+		}
+	}
+
+}
+
+/**
+ * kbase_scale_power() - Scale a model-specific power coefficient to an OPP
+ * @c:		Model coefficient, in pW/(Hz V^2). Should be in range
+ *		0 < c < 2^16 to prevent overflow.
+ * @freq:	Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz)
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Keep a record of the approximate range of each value at every stage of the
+ * calculation, to ensure we don't overflow. This makes heavy use of the
+ * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual
+ * calculations in decimal for increased accuracy.
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+static inline unsigned long kbase_scale_power(const unsigned long c,
+					      const unsigned long freq,
+					      const unsigned long voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const unsigned long v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^3 < f_MHz < 2^10 MHz */
+	const unsigned long f_MHz = freq / 1000000;
+
+	/* Range: 2^11 < v2f_big < 2^26 kHz V^2 */
+	const unsigned long v2f_big = v2 * f_MHz;
+
+	/* Range: 2^1 < v2f < 2^16 MHz V^2 */
+	const unsigned long v2f = v2f_big / 1000;
+
+	/* Range (working backwards from next line): 0 < v2fc < 2^23 uW */
+	const unsigned long v2fc = c * v2f;
+
+	/* Range: 0 < v2fc / 1000 < 2^13 mW */
+	return v2fc / 1000;
+}
+
+#ifdef CONFIG_MALI_PWRSOFT_765
+static unsigned long kbase_get_static_power(struct devfreq *df,
+					    unsigned long voltage)
+#else
+static unsigned long kbase_get_static_power(unsigned long voltage)
+
+#endif
+{
+	struct kbase_ipa_model *model;
+#ifdef CONFIG_MALI_PWRSOFT_765
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	/* We make sure that the model we access is the correct one */
+	model = ACCESS_ONCE(kbdev->ipa_current_model);
+
+#ifndef CONFIG_MALI_PWRSOFT_765
+	kbase_release_device(kbdev);
+#endif
+
+	if (model)
+		return model->ops->get_static_power(model, voltage);
+	return 0;
+}
+
+#ifdef CONFIG_MALI_PWRSOFT_765
+static unsigned long kbase_get_dynamic_power(struct devfreq *df,
+					     unsigned long freq,
+					     unsigned long voltage)
+#else
+static unsigned long kbase_get_dynamic_power(unsigned long freq,
+					     unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+#ifdef CONFIG_MALI_PWRSOFT_765
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	/* We make sure that the model we access is the correct one */
+	model = ACCESS_ONCE(kbdev->ipa_current_model);
+
+#ifndef CONFIG_MALI_PWRSOFT_765
+	kbase_release_device(kbdev);
+#endif
+
+	if (model) {
+		const unsigned long c = model->ops->get_dynamic_power(model);
+
+		return kbase_scale_power(c, freq, voltage);
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_PWRSOFT_765
+unsigned long kbase_power_to_state(struct devfreq *df, u32 target_power)
+{
+	struct kbase_ipa_model *model;
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+
+	/* We make sure that the model we access is the correct one */
+	model = ACCESS_ONCE(kbdev->ipa_current_model);
+
+	if (model && model->ops->power_to_state)
+		return model->ops->power_to_state(model, target_power);
+
+	return 0;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops power_model_ops = {
+#else
+struct devfreq_cooling_power power_model_ops = {
+#endif
+	.get_static_power = &kbase_get_static_power,
+	.get_dynamic_power = &kbase_get_dynamic_power,
+};
+
+unsigned long kbase_ipa_dynamic_power(struct kbase_device *kbdev,
+					     unsigned long freq,
+					     unsigned long voltage)
+{
+#ifdef CONFIG_MALI_PWRSOFT_765
+	struct devfreq *df = kbdev->devfreq;
+
+	return kbase_get_dynamic_power(df, freq, voltage);
+#else
+	return kbase_get_dynamic_power(freq, voltage);
+#endif
+}
+
+KBASE_EXPORT_TEST_API(kbase_ipa_dynamic_power);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
new file mode 100644
index 0000000..67e0270
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IPA_H_
+#define _KBASE_IPA_H_
+
+struct kbase_ipa_model {
+	struct list_head link;
+	struct kbase_device *kbdev;
+	void *model_data;
+	struct kbase_ipa_model_ops *ops;
+};
+
+struct kbase_ipa_model_ops {
+	char *name;
+	/* The init and term ops on the default model are always called.
+	 * However, all the other models are only invoked if the model
+	 * is selected in the device tree. Otherwise they are never
+	 * initialized. Additional resources can be acquired by models
+	 * in init(), however they must be terminated in the term().
+	 */
+	int (*init)(struct kbase_ipa_model *model);
+	void (*term)(struct kbase_ipa_model *model);
+	/* get_dynamic_power() - return a coefficient with units pW/(Hz V^2),
+	 * which is scaled by the IPA framework according to the current OPP's
+	 * frequency and voltage. */
+	unsigned long (*get_dynamic_power)(struct kbase_ipa_model *model);
+	/* get_static_power() - return an estimate of the current static power
+	 * consumption, in mW, based on the current OPP's voltage, in mV. */
+	unsigned long (*get_static_power)(struct kbase_ipa_model *model,
+					  unsigned long voltage);
+	unsigned long (*power_to_state)(struct kbase_ipa_model *model,
+					unsigned long power);
+};
+
+/* Models can be registered only in the platform's platform_init_func call */
+int kbase_ipa_model_ops_register(struct kbase_device *kbdev,
+			     struct kbase_ipa_model_ops *new_model_ops);
+
+int kbase_ipa_model_init(struct kbase_device *kbdev);
+void kbase_ipa_model_term(struct kbase_device *kbdev);
+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev);
+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev);
+
+extern struct kbase_ipa_model_ops kbase_generic_ipa_model_ops;
+
+/**
+ * kbase_ipa_dynamic_power - calculate power
+ * @kbdev:      kbase device
+ * @freq:       frequency in Hz
+ * @voltage:    voltage in mV
+ *
+ * Return:      returns power consumption
+ */
+unsigned long kbase_ipa_dynamic_power(struct kbase_device *kbdev,
+					     unsigned long freq,
+					     unsigned long voltage);
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops power_model_ops;
+#else
+extern struct devfreq_cooling_power power_model_ops;
+#endif
+
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_generic.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_generic.c
new file mode 100644
index 0000000..18fb2b2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_generic.c
@@ -0,0 +1,217 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/thermal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#include <linux/of.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_defs.h"
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms. The additional resources in this model
+ * should preferably be minimal, as this model is rarely used when a dynamic
+ * model is available.
+ */
+
+/**
+ * struct kbase_ipa_model_generic_data - IPA context per device
+ * @dynamic_coefficient: dynamic coefficient of the model
+ * @static_coefficient:  static coefficient of the model
+ * @ts:                  ts of the model
+ * @gpu_tz:              thermal zone device
+ * @ipa_lock:            protects the entire IPA context
+ */
+
+struct kbase_ipa_model_generic_data {
+	u32 dynamic_coefficient;
+	u32 static_coefficient;
+	s32 ts[4];
+	struct thermal_zone_device *gpu_tz;
+	struct mutex ipa_lock;
+};
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long model_static_power(struct kbase_ipa_model *model,
+		unsigned long voltage)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temperature;
+#else
+	int temperature;
+#endif
+	unsigned long temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+	struct kbase_ipa_model_generic_data *model_data =
+		(struct kbase_ipa_model_generic_data *) model->model_data;
+	struct thermal_zone_device *gpu_tz = model_data->gpu_tz;
+
+	if (gpu_tz) {
+		int ret;
+
+		ret = gpu_tz->ops->get_temp(gpu_tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	} else {
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(model_data->ts[3] * temp_cubed)
+			+ (model_data->ts[2] * temp_squared)
+			+ (model_data->ts[1] * temp)
+			+ model_data->ts[0];
+
+	return (((model_data->static_coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+}
+
+static unsigned long model_dynamic_power(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_generic_data *model_data =
+		(struct kbase_ipa_model_generic_data *) model->model_data;
+
+	return model_data->dynamic_coefficient;
+}
+
+static int kbase_generic_power_model_init(struct kbase_ipa_model *model)
+{
+	struct kbase_device *kbdev = model->kbdev;
+	struct device_node *power_model_node;
+	const char *tz_name;
+	u32 static_power, dynamic_power;
+	u32 voltage, voltage_squared, voltage_cubed, frequency;
+	struct kbase_ipa_model_generic_data *model_data;
+	int err;
+
+	model_data = kzalloc(sizeof(struct kbase_ipa_model_generic_data),
+			    GFP_KERNEL);
+
+	if (!model_data)
+		return -ENOMEM;
+
+	power_model_node = of_get_child_by_name(kbdev->dev->of_node,
+			"power_model");
+	if (!power_model_node) {
+		dev_err(kbdev->dev, "could not find power_model node\n");
+		err = -ENODEV;
+		goto error;
+	}
+	if (!of_device_is_compatible(power_model_node,
+			"arm,mali-simple-power-model")) {
+		dev_err(kbdev->dev, "power_model incompatible with simple power model\n");
+		err = -ENODEV;
+		goto error;
+	}
+
+	err = of_property_read_string(power_model_node, "thermal-zone",
+			&tz_name);
+	if (err) {
+		dev_err(kbdev->dev, "thermal zone in power_model not available\n");
+		goto error;
+	}
+
+	model_data->gpu_tz = thermal_zone_get_zone_by_name(tz_name);
+	if (IS_ERR(model_data->gpu_tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(model_data->gpu_tz));
+		model_data->gpu_tz = NULL;
+		err = -EPROBE_DEFER;
+		goto error;
+	}
+
+	err = of_property_read_u32(power_model_node, "static-power",
+			&static_power);
+	if (err) {
+		dev_err(kbdev->dev, "static-power in power_model not available\n");
+		goto error;
+	}
+
+	err = of_property_read_u32(power_model_node, "dynamic-power",
+			&dynamic_power);
+	if (err) {
+		dev_err(kbdev->dev, "dynamic-power in power_model not available\n");
+		goto error;
+	}
+
+	err = of_property_read_u32(power_model_node, "voltage",
+			&voltage);
+	if (err) {
+		dev_err(kbdev->dev, "voltage in power_model not available\n");
+		goto error;
+	}
+
+	err = of_property_read_u32(power_model_node, "frequency",
+			&frequency);
+	if (err) {
+		dev_err(kbdev->dev, "frequency in power_model not available\n");
+		goto error;
+	}
+	voltage_squared = (voltage * voltage) / 1000;
+	voltage_cubed = voltage * voltage * voltage;
+	model_data->static_coefficient =
+		(static_power << 20) / (voltage_cubed >> 10);
+	model_data->dynamic_coefficient =
+		(((dynamic_power * 1000) / voltage_squared) * 1000) / frequency;
+
+	err = of_property_read_u32_array(power_model_node,
+				       "ts",
+				       (u32 *)model_data->ts,
+				       4);
+	if (err) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		goto error;
+	}
+
+	model->model_data = (void *) model_data;
+
+	return 0;
+error:
+	kfree(model_data);
+	return err;
+}
+
+static void kbase_generic_power_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_generic_data *model_data =
+			(struct kbase_ipa_model_generic_data *)model->model_data;
+
+	kfree(model_data);
+}
+
+struct kbase_ipa_model_ops kbase_generic_ipa_model_ops = {
+		.name = "generic_ipa_model",
+		.init = &kbase_generic_power_model_init,
+		.term = &kbase_generic_power_model_term,
+		.get_dynamic_power = &model_dynamic_power,
+		.get_static_power = &model_static_power,
+		.power_to_state = NULL
+};
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100644
index 0000000..239582e
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,249 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tHEx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tSIx[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100644
index 0000000..e1e43a1
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,996 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100644
index 0000000..0d2ac95
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1832 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+#ifndef __user
+#define __user
+#endif
+
+/* Support UK6 IOCTLS */
+#define BASE_LEGACY_UK6_SUPPORT 1
+
+/* Support UK7 IOCTLS */
+/* NB: To support UK6 we also need to support UK7 */
+#define BASE_LEGACY_UK7_SUPPORT 1
+
+/* Support UK8 IOCTLS */
+#define BASE_LEGACY_UK8_SUPPORT 1
+
+/* Support UK9 IOCTLS */
+#define BASE_LEGACY_UK9_SUPPORT 1
+
+/* Support UK10_2 IOCTLS */
+#define BASE_LEGACY_UK10_2_SUPPORT 1
+
+/* Support UK10_4 IOCTLS */
+#define BASE_LEGACY_UK10_4_SUPPORT 1
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+#define BASEP_JD_SEM_PER_WORD_LOG2      5
+#define BASEP_JD_SEM_PER_WORD           (1 << BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_WORD_NR(x)         ((x) >> BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_MASK_IN_WORD(x)    (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
+#define BASEP_JD_SEM_ARRAY_SIZE         BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union kbase_pointer {
+	void __user *value;	  /**< client should store their pointers here */
+	u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	u64 sizer;	  /**< Force 64-bit storage for all clients regardless */
+} kbase_pointer;
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD),
+ * which defines a @a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see ::BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * See ::base_mem_alloc_flags.
+ *
+ */
+enum {
+/* IN */
+	BASE_MEM_PROT_CPU_RD = (1U << 0),      /**< Read access CPU side */
+	BASE_MEM_PROT_CPU_WR = (1U << 1),      /**< Write access CPU side */
+	BASE_MEM_PROT_GPU_RD = (1U << 2),      /**< Read access GPU side */
+	BASE_MEM_PROT_GPU_WR = (1U << 3),      /**< Write access GPU side */
+	BASE_MEM_PROT_GPU_EX = (1U << 4),      /**< Execute allowed on the GPU
+						    side */
+
+	/* BASE_MEM_HINT flags have been removed, but their values are reserved
+	 * for backwards compatibility with older user-space drivers. The values
+	 * can be re-used once support for r5p0 user-space drivers is removed,
+	 * presumably in r7p0.
+	 *
+	 * RESERVED: (1U << 5)
+	 * RESERVED: (1U << 6)
+	 * RESERVED: (1U << 7)
+	 * RESERVED: (1U << 8)
+	 */
+
+	BASE_MEM_GROW_ON_GPF = (1U << 9),      /**< Grow backing store on GPU
+						    Page Fault */
+
+	BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer
+						    shareable, if available */
+	BASE_MEM_COHERENT_LOCAL = (1U << 11),  /**< Page coherence Inner
+						    shareable */
+	BASE_MEM_CACHED_CPU = (1U << 12),      /**< Should be cached on the
+						    CPU */
+
+/* IN/OUT */
+	BASE_MEM_SAME_VA = (1U << 13), /**< Must have same VA on both the GPU
+					    and the CPU */
+/* OUT */
+	BASE_MEM_NEED_MMAP = (1U << 14), /**< Must call mmap to acquire a GPU
+					     address for the alloc */
+/* IN */
+	BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence
+					     Outer shareable, required. */
+	BASE_MEM_SECURE = (1U << 16),          /**< Secure memory */
+	BASE_MEM_DONT_NEED = (1U << 17),       /**< Not needed physical
+						    memory */
+	BASE_MEM_IMPORT_SHARED = (1U << 18),   /**< Must use shared CPU/GPU zone
+						    (SAME_VA zone) but doesn't
+						    require the addresses to
+						    be the same */
+};
+
+/**
+ * @brief Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the ::base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 19
+
+/**
+  * A mask for all output bits, excluding IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/**
+  * A mask for all input bits, including IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/**
+ * A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id.
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	kbase_pointer to imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	kbase_pointer ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1,	    /**< Not a growable tmem object */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completely unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/**
+ * No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/**
+ * Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  ((base_jd_core_req)1 << 1)
+#define BASE_JD_REQ_T   ((base_jd_core_req)1 << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  ((base_jd_core_req)1 << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   ((base_jd_core_req)1 << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
+ * the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
+ * the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/*
+ * Returns non-zero value if core requirements passed define a soft job or
+ * a dependency only job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
+	((core_req & BASE_JD_REQ_SOFT_JOB) || \
+	(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
+ * In order to keep the size of the structure same, padding field has been adjusted
+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
+ * for possible future use. */
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[1];
+	base_jd_core_req core_req;          /**< core requirements */
+} base_jd_atom_v2;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+struct base_jd_atom_v2_uk6 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 core_req;                       /**< core requirements */
+	base_atom_id pre_dep[2]; /**< pre-dependencies */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;		    /**< priority - smaller is higher priority */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[7];
+};
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit and @a base_jd_submit has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit and @a base_jd_submit has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills
+ * a full cache line.
+ */
+
+#define BASE_CPU_GPU_CACHE_LINE_PADDING (36)
+
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom.
+ *
+ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid
+ * cases where access to pages containing the structure is shared between cached and un-cached
+ * memory regions, which would cause memory corruption.  Here we set the structure size to be 64 bytes
+ * which is the cache line for ARM A15 processors.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING];
+} base_dump_cpu_gpu_counters;
+
+
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistent guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ *  - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ *  so they won't accidentally pick another file with 'mali_t600' in its name
+ *  - When the build doesn't work, the System Integrator may think the DDK is
+ *  doesn't work, and attempt to fix it themselves:
+ *   - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ *   error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ *   you.
+ *   - For a \#include mechanism, checks must still be made elsewhere, which the
+ *   System Integrator may try working around by setting \#defines (such as
+ *   VA_BITS) themselves in their plat_config.h. In the  worst case, they may
+ *   set the prevention-mechanism \#define of
+ *   "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ *   - In this case, they would believe they are on the right track, because
+ *   the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+   digraph plat_config_mechanism {
+	   rankdir=BT
+	   size="6,6"
+
+       "mali_base.h";
+	   "gpu/mali_gpu.h";
+
+	   node [ shape=box ];
+	   {
+	       rank = same; ordering = out;
+
+		   "gpu/mali_gpu_props.h";
+		   "base/midg_gpus/mali_t600.h";
+		   "base/midg_gpus/other_midg_gpu.h";
+	   }
+	   { rank = same; "plat/plat_config.h"; }
+	   {
+	       rank = same;
+		   "gpu/mali_gpu.h" [ shape=box ];
+		   gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+		   select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+	   }
+	   node [ shape=box ];
+	   { rank = same; "plat/plat_config.h"; }
+	   { rank = same; "mali_base.h"; }
+
+	   "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h";
+	   "mali_base.h" -> "plat/plat_config.h" ;
+	   "mali_base.h" -> select_gpu ;
+
+	   "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+	   gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+	   select_gpu -> "base/midg_gpus/mali_t600.h" ;
+	   select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+   }
+   @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algorithm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * required for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+     * No defined values, but starts at 0 and increases by one for each release
+     * status (alpha, beta, EAC, etc.).
+     * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/**
+	 * @usecase GPU clock speed is not specified in the Midgard Architecture, but is
+	 * <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+	 */
+	u32 gpu_speed_mhz;
+
+	/**
+	 * @usecase GPU clock max/min speed is required for computing best/worst case
+	 * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+	 *  Midgard Architecture).
+	 */
+	u32 gpu_freq_khz_max;
+	u32 gpu_freq_khz_min;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 stack_present;
+
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[3];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this data structure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+	/** No flags set */
+	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+	/** Base context is a 'System Monitor' context for Hardware counters.
+	 *
+	 * One important side effect of this is that job submission is disabled. */
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+/** Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+} base_jd_replay_payload;
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+typedef struct base_jd_replay_payload_uk10_2 {
+	u64 tiler_jc_list;
+	u64 fragment_jc;
+	u64 tiler_heap_free;
+	u16 fragment_hierarchy_mask;
+	u16 tiler_hierarchy_mask;
+	u32 hierarchy_default_weight;
+	u16 tiler_core_req;
+	u16 fragment_core_req;
+	u8 padding[4];
+} base_jd_replay_payload_uk10_2;
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact. */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
+		BASE_TLSTREAM_JOB_DUMPING_ENABLED)
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100644
index 0000000..4a98a72
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100644
index 0000000..be454a2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100644
index 0000000..9bae708
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,627 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_uku.h>
+#include <mali_kbase_linux.h>
+
+/*
+ * Include mali_kbase_defs.h first as this provides types needed by other local
+ * header files.
+ */
+#include "mali_kbase_defs.h"
+
+#include "mali_kbase_context.h"
+#include "mali_kbase_strings.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include "ipa/mali_kbase_ipa.h"
+#endif
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+/**
+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs
+ */
+
+/**
+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom);
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data);
+#endif
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+void kbasep_soft_job_timeout_worker(unsigned long data);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+/* api to be ported per OS, only need to do the raw register access */
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEBUG
+/**
+ * kbase_set_driver_inactive - Force driver to go inactive
+ * @kbdev:    Device pointer
+ * @inactive: true if driver should go inactive, false otherwise
+ *
+ * Forcing the driver inactive will cause all future IOCTLs to wait until the
+ * driver is made active again. This is intended solely for the use of tests
+ * which require that no jobs are running while the test executes.
+ */
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifndef RESET0_LEVEL
+extern u32  override_value_aml;
+extern void *reg_base_hiubus;
+#define RESET0_MASK    0x01
+#define RESET1_MASK    0x02
+#define RESET2_MASK    0x03
+
+#define RESET0_LEVEL    0x10
+#define RESET1_LEVEL    0x11
+#define RESET2_LEVEL    0x12
+#define Rd(r)                           readl((reg_base_hiubus) + ((r)<<2))
+#define Wr(r, v)                        writel((v), ((reg_base_hiubus) + ((r)<<2)))
+#define Mali_WrReg(unit, core, regnum, value)   kbase_reg_write(kbdev, (regnum), (value), NULL)
+#define Mali_RdReg(unit, core, regnum)          kbase_reg_read(kbdev, (regnum), NULL)
+#define stimulus_print   printk
+#define stimulus_display printk
+#define Mali_pwr_on(x)   Mali_pwr_on_with_kdev(kbdev, (x))
+#define Mali_pwr_off(x)  Mali_pwr_off_with_kdev(kbdev, (x))
+#endif
+
+
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/* kbase_io_history_init - initialize data struct for register access history
+ *
+ * @kbdev The register history to initialize
+ * @n The number of register accesses that the buffer could hold
+ *
+ * @return 0 if successfully initialized, failure otherwise
+ */
+int kbase_io_history_init(struct kbase_io_history *h, u16 n);
+
+/* kbase_io_history_term - uninit all resources for the register access history
+ *
+ * @h The register history to terminate
+ */
+void kbase_io_history_term(struct kbase_io_history *h);
+
+/* kbase_io_history_dump - print the register history to the kernel ring buffer
+ *
+ * @kbdev Pointer to kbase_device containing the register history to dump
+ */
+void kbase_io_history_dump(struct kbase_device *kbdev);
+
+/**
+ * kbase_io_history_resize - resize the register access history buffer.
+ *
+ * @h: Pointer to a valid register history to resize
+ * @new_size: Number of accesses the buffer could hold
+ *
+ * A successful resize will clear all recent register accesses.
+ * If resizing fails for any reason (e.g., could not allocate memory, invalid
+ * buffer size) then the original buffer will be kept intact.
+ *
+ * @return 0 if the buffer was resized, failure otherwise
+ */
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbase_io_history_resize CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+#endif
+
+
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100644
index 0000000..933aa28
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	phys_addr_t *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100644
index 0000000..099a298
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
new file mode 100644
index 0000000..f910fe9
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+
+static int kbase_as_fault_read(struct seq_file *sfile, void *data)
+{
+	uintptr_t as_no = (uintptr_t) sfile->private;
+
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	struct kbase_device *kbdev = NULL;
+
+	kbdev_list = kbase_dev_list_get();
+
+	list_for_each(entry, kbdev_list) {
+		kbdev = list_entry(entry, struct kbase_device, entry);
+
+		if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+
+			/* don't show this one again until another fault occors */
+			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
+
+			/* output the last page fault addr */
+			seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr);
+		}
+
+	}
+
+	kbase_dev_list_put(kbdev_list);
+
+	return 0;
+}
+
+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_as_fault_read , in->i_private);
+}
+
+static const struct file_operations as_fault_fops = {
+	.open = kbase_as_fault_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ *  Initialize debugfs entry for each address space
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	uint i;
+	char as_name[64];
+	struct dentry *debugfs_directory;
+
+	kbdev->debugfs_as_read_bitmap = 0ULL;
+
+	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
+
+	debugfs_directory = debugfs_create_dir("address_spaces",
+		kbdev->mali_debugfs_directory);
+
+	if(debugfs_directory) {
+		for(i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
+			debugfs_create_file(as_name, S_IRUGO,
+				debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops);
+		}
+	}
+	else
+		dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory");
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+	return;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
new file mode 100644
index 0000000..3ed2248
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
+#define _KBASE_AS_FAULT_DEBUG_FS_H
+
+/**
+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
+ *
+ * @kbdev: Pointer to kbase_device
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs
+ *
+ * @kbdev: Pointer to kbase_device
+ * @as_no: The address space the fault occurred on
+ */
+static inline void
+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_MALI_DEBUG */
+	return;
+}
+
+#endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100644
index 0000000..c67b3e9
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100644
index 0000000..0c18bdb
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100644
index 0000000..fb615ae
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100644
index 0000000..356d52b
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,345 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see  kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+/**
+ * kbase_platform_fake_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_fake_register()
+ */
+void kbase_platform_fake_unregister(void);
+#endif
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100644
index 0000000..5187f3a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,236 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+ * Irq throttle. It is the minimum desired time in between two
+ * consecutive gpu interrupts (given in 'us'). The irq throttle
+ * gpu register will be configured after this, taking into
+ * account the configured max frequency.
+ *
+ * Attached value: number in micro seconds
+ */
+#define DEFAULT_IRQ_THROTTLE_TIME_US 20
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/*
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT ((u32)3000) /* 3s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+/*
+ * Perform GPU power down using only platform specific code, skipping DDK power
+ * management.
+ *
+ * If this is non-zero then kbase will avoid powering down shader cores, the
+ * tiler, and the L2 cache, instead just powering down the entire GPU through
+ * platform specific code. This may be required for certain platform
+ * integrations.
+ *
+ * Note that as this prevents kbase from powering down shader cores, this limits
+ * the available power policies to coarse_demand and always_on.
+ */
+#define PLATFORM_POWER_DOWN_ONLY (1)
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100644
index 0000000..1401380
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,332 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_mem_linux.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	if (is_compat)
+		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+			kbdev->mem_pool_max_size_default,
+			kctx->kbdev, &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_pool;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	atomic_set(&kctx->drain_pending, 0);
+
+	mutex_init(&kctx->reg_lock);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+#ifdef CONFIG_KDS
+	INIT_LIST_HEAD(&kctx->waiting_kds_resource);
+#endif
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kctx);
+	if (err)
+		goto term_dma_fence;
+
+	do {
+		err = kbase_mem_pool_grow(&kctx->mem_pool,
+				MIDGARD_MMU_BOTTOMLEVEL);
+		if (err)
+			goto pgd_no_mem;
+
+		mutex_lock(&kctx->mmu_lock);
+		kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+		mutex_unlock(&kctx->mmu_lock);
+	} while (!kctx->pgd);
+
+	kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev);
+	if (!kctx->aliasing_sink_page)
+		goto no_sink_page;
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	setup_timer(&kctx->soft_job_timeout,
+		    kbasep_soft_job_timeout_worker,
+		    (uintptr_t)kctx);
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+no_sink_page:
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+pgd_no_mem:
+	kbase_mmu_term(kctx);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_jd_zap_context(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	/* Removing the rest of the debugfs entries here as we want to keep the
+	 * atom debugfs interface alive until all atoms have completed. This
+	 * is useful for debugging hung contexts. */
+	debugfs_remove_recursive(kctx->kctx_dentry);
+#endif
+
+	kbase_event_cleanup(kctx);
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_pm_context_idle(kbdev);
+
+	kbase_dma_fence_term(kctx);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
new file mode 100644
index 0000000..a3f5bb0
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
@@ -0,0 +1,90 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+					enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+	/*
+	 * Earlier kernel versions doesn't have atomic_andnot() or
+	 * atomic_and(). atomic_clear_mask() was only available on some
+	 * architectures and removed on arm in v3.13 on arm and arm64.
+	 *
+	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+	 * when atomic_andnot() becomes available.
+	 */
+	int old, new;
+
+	do {
+		old = atomic_read(&kctx->flags);
+		new = old & ~flag;
+
+	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+	atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100644
index 0000000..e198fa1
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,4322 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_uku.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <backend/gpu/mali_kbase_devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include "mali_kbase_regs_history_debugfs.h"
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#endif /* CONFIG_KDS */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task */
+#include <linux/mman.h>
+#include <linux/version.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+#include <platform/mali_kbase_platform_fake.h>
+#endif /*CONFIG_MALI_PLATFORM_FAKE */
+#ifdef CONFIG_SYNC
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC */
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_PM_DEVFREQ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+
+#include <mali_kbase_config.h>
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <mali_kbase_tlstream.h>
+
+#include <mali_kbase_as_fault_debugfs.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+#if MALI_UNIT_TEST
+static struct kbase_exported_test_data shared_kernel_test_data;
+EXPORT_SYMBOL(shared_kernel_test_data);
+#endif /* MALI_UNIT_TEST */
+
+static int kbase_dev_nr;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+extern int mali_pm_statue;
+#endif
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+static inline void __compile_time_asserts(void)
+{
+	CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
+}
+
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	unsigned int                    as_nr;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu);
+	}
+
+	/* Create Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev);
+	}
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(
+				element->kctx,
+				(u32)(element->kctx->id),
+				(u32)(element->kctx->tgid));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream. */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space. */
+	kbase_tlstream_flush_streams();
+}
+
+static void kbase_api_handshake(struct uku_version_check_args *version)
+{
+	switch (version->major) {
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case 6:
+		/* We are backwards compatible with version 6,
+		 * so pretend to be the old version */
+		version->major = 6;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	case 7:
+		/* We are backwards compatible with version 7,
+		 * so pretend to be the old version */
+		version->major = 7;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case 8:
+		/* We are backwards compatible with version 8,
+		 * so pretend to be the old version */
+		version->major = 8;
+		version->minor = 4;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+#ifdef BASE_LEGACY_UK9_SUPPORT
+	case 9:
+		/* We are backwards compatible with version 9,
+		 * so pretend to be the old version */
+		version->major = 9;
+		version->minor = 0;
+		break;
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				(int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ *
+ * @MALI_ERROR_NONE: Success
+ * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver
+ * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure
+ * @MALI_ERROR_FUNCTION_FAILED: Generic error code
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	inited_pm_runtime_init = (1u << 2),
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+
+	inited_job_fault = (1u << 10),
+	inited_misc_register = (1u << 11),
+	inited_get_device = (1u << 12),
+	inited_sysfs_group = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_io_history = (1u << 18),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20)
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define INACTIVE_WAIT_MS (5000)
+
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive)
+{
+	kbdev->driver_inactive = inactive;
+	wake_up(&kbdev->driver_inactive_wait);
+
+	/* Wait for any running IOCTLs to complete */
+	if (inactive)
+		msleep(INACTIVE_WAIT_MS);
+}
+KBASE_EXPORT_TEST_API(kbase_set_driver_inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size)
+{
+	struct kbase_device *kbdev;
+	union uk_header *ukh = args;
+	u32 id;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(ukh != NULL);
+
+	kbdev = kctx->kbdev;
+	id = ukh->id;
+	ukh->ret = MALI_ERROR_NONE; /* Be optimistic */
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_event(kbdev->driver_inactive_wait,
+			kbdev->driver_inactive == false);
+#endif /* CONFIG_MALI_DEBUG */
+
+	if (UKP_FUNC_ID_CHECK_VERSION == id) {
+		struct uku_version_check_args *version_check;
+
+		if (args_size != sizeof(struct uku_version_check_args)) {
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			return 0;
+		}
+		version_check = (struct uku_version_check_args *)args;
+		kbase_api_handshake(version_check);
+		/* save the proposed version number for later use */
+		kctx->api_version = KBASE_API_VERSION(version_check->major,
+				version_check->minor);
+		ukh->ret = MALI_ERROR_NONE;
+		return 0;
+	}
+
+	/* block calls until version handshake */
+	if (kctx->api_version == 0)
+		return -EINVAL;
+
+	if (!atomic_read(&kctx->setup_complete)) {
+		struct kbase_uk_set_flags *kbase_set_flags;
+
+		/* setup pending, try to signal that we'll do the setup,
+		 * if setup was already in progress, err this call
+		 */
+		if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+			return -EINVAL;
+
+		/* if unexpected call, will stay stuck in setup mode
+		 * (is it the only call we accept?)
+		 */
+		if (id != KBASE_FUNC_SET_FLAGS)
+			return -EINVAL;
+
+		kbase_set_flags = (struct kbase_uk_set_flags *)args;
+
+		/* if not matching the expected call, stay in setup mode */
+		if (sizeof(*kbase_set_flags) != args_size)
+			goto bad_size;
+
+		/* if bad flags, will stay stuck in setup mode */
+		if (kbase_context_set_create_flags(kctx,
+				kbase_set_flags->create_flags) != 0)
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+		atomic_set(&kctx->setup_complete, 1);
+		return 0;
+	}
+
+	/* setup complete, perform normal operation */
+	switch (id) {
+	case KBASE_FUNC_MEM_JIT_INIT:
+		{
+			struct kbase_uk_mem_jit_init *jit_init = args;
+
+			if (sizeof(*jit_init) != args_size)
+				goto bad_size;
+
+			if (kbase_region_tracker_init_jit(kctx,
+					jit_init->va_pages))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_ALLOC:
+		{
+			struct kbase_uk_mem_alloc *mem = args;
+			struct kbase_va_region *reg;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+#if defined(CONFIG_64BIT)
+			if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+				/* force SAME_VA if a 64-bit client */
+				mem->flags |= BASE_MEM_SAME_VA;
+			}
+#endif
+
+			reg = kbase_mem_alloc(kctx, mem->va_pages,
+					mem->commit_pages, mem->extent,
+					&mem->flags, &mem->gpu_va);
+			mem->va_alignment = 0;
+
+			if (!reg)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_IMPORT: {
+			struct kbase_uk_mem_import *mem_import = args;
+			void __user *phandle;
+
+			if (sizeof(*mem_import) != args_size)
+				goto bad_size;
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				phandle = compat_ptr(mem_import->phandle.compat_value);
+			else
+#endif
+				phandle = mem_import->phandle.value;
+
+			if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_import(kctx,
+					(enum base_mem_import_type)
+					mem_import->type,
+					phandle,
+					&mem_import->gpu_va,
+					&mem_import->va_pages,
+					&mem_import->flags)) {
+				mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+	}
+	case KBASE_FUNC_MEM_ALIAS: {
+			struct kbase_uk_mem_alias *alias = args;
+			struct base_mem_aliasing_info __user *user_ai;
+			struct base_mem_aliasing_info *ai;
+
+			if (sizeof(*alias) != args_size)
+				goto bad_size;
+
+			if (alias->nents > 2048) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (!alias->nents) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				user_ai = compat_ptr(alias->ai.compat_value);
+			else
+#endif
+				user_ai = alias->ai.value;
+
+			ai = vmalloc(sizeof(*ai) * alias->nents);
+
+			if (!ai) {
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			}
+
+			if (copy_from_user(ai, user_ai,
+					   sizeof(*ai) * alias->nents)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto copy_failed;
+			}
+
+			alias->gpu_va = kbase_mem_alias(kctx, &alias->flags,
+							alias->stride,
+							alias->nents, ai,
+							&alias->va_pages);
+			if (!alias->gpu_va) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto no_alias;
+			}
+no_alias:
+copy_failed:
+			vfree(ai);
+			break;
+		}
+	case KBASE_FUNC_MEM_COMMIT:
+		{
+			struct kbase_uk_mem_commit *commit = args;
+
+			if (sizeof(*commit) != args_size)
+				goto bad_size;
+
+			if (commit->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_commit(kctx, commit->gpu_addr,
+					commit->pages,
+					(base_backing_threshold_status *)
+					&commit->result_subcode) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	case KBASE_FUNC_MEM_QUERY:
+		{
+			struct kbase_uk_mem_query *query = args;
+
+			if (sizeof(*query) != args_size)
+				goto bad_size;
+
+			if (query->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE &&
+			    query->query != KBASE_MEM_QUERY_VA_SIZE &&
+				query->query != KBASE_MEM_QUERY_FLAGS) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query);
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_query(kctx, query->gpu_addr,
+					query->query, &query->value) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			break;
+		}
+		break;
+
+	case KBASE_FUNC_MEM_FLAGS_CHANGE:
+		{
+			struct kbase_uk_mem_flags_change *fc = args;
+
+			if (sizeof(*fc) != args_size)
+				goto bad_size;
+
+			if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_flags_change(kctx, fc->gpu_va,
+					fc->flags, fc->mask) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+	case KBASE_FUNC_MEM_FREE:
+		{
+			struct kbase_uk_mem_free *mem = args;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+			if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_free(kctx, mem->gpu_addr) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_JOB_SUBMIT:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+			if (kbase_jd_submit(kctx, job, 0) != 0)
+#else
+			if (kbase_jd_submit(kctx, job) != 0)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case KBASE_FUNC_JOB_SUBMIT_UK6:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+			if (kbase_jd_submit(kctx, job, 1) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif
+
+	case KBASE_FUNC_SYNC:
+		{
+			struct kbase_uk_sync_now *sn = args;
+
+			if (sizeof(*sn) != args_size)
+				goto bad_size;
+
+			if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifndef CONFIG_MALI_COH_USER
+			if (kbase_sync_now(kctx, &sn->sset) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif
+			break;
+		}
+
+	case KBASE_FUNC_DISJOINT_QUERY:
+		{
+			struct kbase_uk_disjoint_query *dquery = args;
+
+			if (sizeof(*dquery) != args_size)
+				goto bad_size;
+
+			/* Get the disjointness counter value. */
+			dquery->counter = kbase_disjoint_event_get(kctx->kbdev);
+			break;
+		}
+
+	case KBASE_FUNC_POST_TERM:
+		{
+			kbase_event_close(kctx);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_SETUP:
+		{
+			struct kbase_uk_hwcnt_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx,
+					&kctx->vinstr_cli, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_DUMP:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_dump(kctx->vinstr_cli,
+					BASE_HWCNT_READER_EVENT_MANUAL) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_CLEAR:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwc_clear(kctx->vinstr_cli) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_READER_SETUP:
+		{
+			struct kbase_uk_hwcnt_reader_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx,
+					setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_GPU_PROPS_REG_DUMP:
+		{
+			struct kbase_uk_gpuprops *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (kbase_gpuprops_uk_get_props(kctx, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_FIND_CPU_OFFSET:
+		{
+			struct kbase_uk_find_cpu_offset *find = args;
+
+			if (sizeof(*find) != args_size)
+				goto bad_size;
+
+			if (find->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid");
+				goto out_bad;
+			}
+
+			if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else {
+				int err;
+
+				err = kbasep_find_enclosing_cpu_mapping_offset(
+						kctx,
+						find->cpu_addr,
+						find->size,
+						&find->offset);
+
+				if (err)
+					ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+		}
+	case KBASE_FUNC_GET_VERSION:
+		{
+			struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args;
+
+			if (sizeof(*get_version) != args_size)
+				goto bad_size;
+
+			/* version buffer size check is made in compile time assert */
+			memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+			get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+			break;
+		}
+
+	case KBASE_FUNC_STREAM_CREATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args;
+
+			if (sizeof(*screate) != args_size)
+				goto bad_size;
+
+			if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) {
+				/* not NULL terminated */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_stream_create(screate->name, &screate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#else /* CONFIG_SYNC */
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+	case KBASE_FUNC_FENCE_VALIDATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args;
+
+			if (sizeof(*fence_validate) != args_size)
+				goto bad_size;
+
+			if (kbase_fence_validate(fence_validate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+
+	case KBASE_FUNC_SET_TEST_DATA:
+		{
+#if MALI_UNIT_TEST
+			struct kbase_uk_set_test_data *set_data = args;
+
+			shared_kernel_test_data = set_data->test_data;
+			shared_kernel_test_data.kctx.value = (void __user *)kctx;
+			shared_kernel_test_data.mm.value = (void __user *)current->mm;
+			ukh->ret = MALI_ERROR_NONE;
+#endif /* MALI_UNIT_TEST */
+			break;
+		}
+
+	case KBASE_FUNC_INJECT_ERROR:
+		{
+#ifdef CONFIG_MALI_ERROR_INJECT
+			unsigned long flags;
+			struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (job_atom_inject_error(&params) != 0)
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_ERROR_INJECT */
+			break;
+		}
+
+	case KBASE_FUNC_MODEL_CONTROL:
+		{
+#ifdef CONFIG_MALI_NO_MALI
+			unsigned long flags;
+			struct kbase_model_control_params params =
+					((struct kbase_uk_model_control_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (gpu_model_control(kbdev->model, &params) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_NO_MALI */
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	case KBASE_FUNC_KEEP_GPU_POWERED:
+		{
+			dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_KEEP_GPU_POWERED: function is deprecated and disabled\n");
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	case KBASE_FUNC_GET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i);
+
+			break;
+		}
+
+	/* used only for testing purposes; these controls are to be set by gator through gator API */
+	case KBASE_FUNC_SET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				_mali_profiling_control(i, controls->profiling_controls[i]);
+
+			break;
+		}
+
+	case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD:
+		{
+			struct kbase_uk_debugfs_mem_profile_add *add_data =
+					(struct kbase_uk_debugfs_mem_profile_add *)args;
+			char *buf;
+			char __user *user_buf;
+
+			if (sizeof(*add_data) != args_size)
+				goto bad_size;
+
+			if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+				dev_err(kbdev->dev, "buffer too big\n");
+				goto out_bad;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				user_buf = compat_ptr(add_data->buf.compat_value);
+			else
+#endif
+				user_buf = add_data->buf.value;
+
+			buf = kmalloc(add_data->len, GFP_KERNEL);
+			if (ZERO_OR_NULL_PTR(buf))
+				goto out_bad;
+
+			if (0 != copy_from_user(buf, user_buf, add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			if (kbasep_mem_profile_debugfs_insert(kctx, buf,
+							add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+
+			break;
+		}
+
+#ifdef CONFIG_MALI_NO_MALI
+	case KBASE_FUNC_SET_PRFCNT_VALUES:
+		{
+
+			struct kbase_uk_prfcnt_values *params =
+			  ((struct kbase_uk_prfcnt_values *)args);
+			gpu_model_set_dummy_prfcnt_sample(params->data,
+					params->size);
+
+			break;
+		}
+#endif /* CONFIG_MALI_NO_MALI */
+#ifdef BASE_LEGACY_UK10_4_SUPPORT
+	case KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4:
+		{
+			struct kbase_uk_tlstream_acquire_v10_4 *tlstream_acquire
+					= args;
+
+			if (sizeof(*tlstream_acquire) != args_size)
+				goto bad_size;
+
+			if (0 != kbase_tlstream_acquire(
+						kctx,
+						&tlstream_acquire->fd, 0)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else if (0 <= tlstream_acquire->fd) {
+				/* Summary stream was cleared during acquire.
+				 * Create static timeline objects that will be
+				 * read by client. */
+				kbase_create_timeline_objects(kctx);
+			}
+			break;
+		}
+#endif /* BASE_LEGACY_UK10_4_SUPPORT */
+	case KBASE_FUNC_TLSTREAM_ACQUIRE:
+		{
+			struct kbase_uk_tlstream_acquire *tlstream_acquire =
+				args;
+
+			if (sizeof(*tlstream_acquire) != args_size)
+				goto bad_size;
+
+			if (tlstream_acquire->flags & ~BASE_TLSTREAM_FLAGS_MASK)
+				goto out_bad;
+
+			if (0 != kbase_tlstream_acquire(
+						kctx,
+						&tlstream_acquire->fd,
+						tlstream_acquire->flags)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else if (0 <= tlstream_acquire->fd) {
+				/* Summary stream was cleared during acquire.
+				 * Create static timeline objects that will be
+				 * read by client. */
+				kbase_create_timeline_objects(kctx);
+			}
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_FLUSH:
+		{
+			struct kbase_uk_tlstream_flush *tlstream_flush =
+				args;
+
+			if (sizeof(*tlstream_flush) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_flush_streams();
+			break;
+		}
+#if MALI_UNIT_TEST
+	case KBASE_FUNC_TLSTREAM_TEST:
+		{
+			struct kbase_uk_tlstream_test *tlstream_test = args;
+
+			if (sizeof(*tlstream_test) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_test(
+					tlstream_test->tpw_count,
+					tlstream_test->msg_delay,
+					tlstream_test->msg_count,
+					tlstream_test->aux_msg);
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_STATS:
+		{
+			struct kbase_uk_tlstream_stats *tlstream_stats = args;
+
+			if (sizeof(*tlstream_stats) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_stats(
+					&tlstream_stats->bytes_collected,
+					&tlstream_stats->bytes_generated);
+			break;
+		}
+#endif /* MALI_UNIT_TEST */
+
+	case KBASE_FUNC_GET_CONTEXT_ID:
+		{
+			struct kbase_uk_context_id *info = args;
+
+			info->id = kctx->id;
+			break;
+		}
+
+	case KBASE_FUNC_SOFT_EVENT_UPDATE:
+		{
+			struct kbase_uk_soft_event_update *update = args;
+
+			if (sizeof(*update) != args_size)
+				goto bad_size;
+
+			if (((update->new_status != BASE_JD_SOFT_EVENT_SET) &&
+			    (update->new_status != BASE_JD_SOFT_EVENT_RESET)) ||
+			    (update->flags != 0))
+				goto out_bad;
+
+			if (kbase_soft_event_update(kctx, update->evt,
+						update->new_status))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	default:
+		dev_err(kbdev->dev, "unknown ioctl %u\n", id);
+		goto out_bad;
+	}
+
+	return ret;
+
+ bad_size:
+	dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id);
+ out_bad:
+	return -EINVAL;
+}
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strncmp(irq_res->name, "JOB", 4)) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "MMU", 4)) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "GPU", 4)) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
+/*
+ * Older versions, before v4.6, of the kernel doesn't have
+ * kstrtobool_from_user().
+ */
+static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+	char buf[32];
+
+	count = min(sizeof(buf), count);
+
+	if (copy_from_user(buf, s, count))
+		return -EFAULT;
+	buf[count] = '\0';
+
+	return strtobool(buf, res);
+}
+#endif
+
+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+	else
+		kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE);
+
+	return size;
+}
+
+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_infinite_cache_fops = {
+	.open = simple_open,
+	.write = write_ctx_infinite_cache,
+	.read = read_ctx_infinite_cache,
+};
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kctx = kbase_create_context(kbdev, is_compat_task());
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+	kctx->filp = filp;
+
+	if (kbdev->infinite_cache_active_default)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_COH_USER
+	 /* if cache is completely coherent at hardware level, then remove the
+	  * infinite cache control support from debugfs.
+	  */
+#else
+	debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
+			    kctx, &kbase_infinite_cache_fops);
+#endif /* CONFIG_MALI_COH_USER */
+
+	mutex_init(&kctx->mem_profile_lock);
+
+	kbasep_jd_debugfs_ctx_init(kctx);
+	kbase_debug_mem_view_init(filp);
+
+	kbase_debug_job_fault_context_init(kctx);
+
+	kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool);
+
+	kbase_jit_debugfs_init(kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			KBASE_TLSTREAM_TL_NEW_CTX(
+					element->kctx,
+					(u32)(element->kctx->id),
+					(u32)(element->kctx->tgid));
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+	KBASE_TLSTREAM_TL_DEL_CTX(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	kbasep_mem_profile_debugfs_remove(kctx);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	if (kctx->vinstr_cli) {
+		struct kbase_uk_hwcnt_setup setup;
+
+		setup.dump_buffer = 0llu;
+		kbase_vinstr_legacy_hwc_setup(
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup);
+	}
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+#define CALL_MAX_SIZE 536
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull };	/* alignment fixup */
+	u32 size = _IOC_SIZE(cmd);
+	struct kbase_context *kctx = filp->private_data;
+
+	if (size > CALL_MAX_SIZE)
+		return -ENOTTY;
+
+	if (0 != copy_from_user(&msg, (void __user *)arg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n");
+		return -EFAULT;
+	}
+
+	if (kbase_dispatch(kctx, &msg, size) != 0)
+		return -EFAULT;
+
+	if (0 != copy_to_user((void __user *)arg, &msg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+
+/**
+ * align_and_check - Align the specified pointer to the provided alignment and
+ *                   check that it is still in range.
+ * @gap_end:        Highest possible start address for allocation (end of gap in
+ *                  address space)
+ * @gap_start:      Start address of current memory area / gap in address space
+ * @info:           vm_unmapped_area_info structure passed to caller, containing
+ *                  alignment, length and limits for the allocation
+ * @is_shader_code: True if the allocation is for shader code (which has
+ *                  additional alignment requirements)
+ *
+ * Return: true if gap_end is now aligned correctly and is still in range,
+ *         false otherwise
+ */
+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
+		struct vm_unmapped_area_info *info, bool is_shader_code)
+{
+	/* Compute highest gap address at the desired alignment */
+	(*gap_end) -= info->length;
+	(*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
+
+	if (is_shader_code) {
+		/* Check for 4GB boundary */
+		if (0 == (*gap_end & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+		if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+
+		if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
+				info->length) & BASE_MEM_MASK_4GB))
+			return false;
+	}
+
+
+	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
+		return false;
+
+
+	return true;
+}
+
+/* The following function is taken from the kernel and just
+ * renamed. As it's not exported to modules we must copy-paste it here.
+ */
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info, bool is_shader_code)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit) {
+		if (align_and_check(&gap_end, gap_start, info, is_shader_code))
+			return gap_end;
+	}
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length) {
+			/* We found a suitable gap. Clip it with the original
+			 * high_limit. */
+			if (gap_end > info->high_limit)
+				gap_end = info->high_limit;
+
+			if (align_and_check(&gap_end, gap_start, info,
+					is_shader_code))
+				return gap_end;
+		}
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+	return -ENOMEM;
+}
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	/* based on get_unmapped_area, but simplified slightly due to that some
+	 * values are known in advance */
+	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+	unsigned long align_offset = 0;
+	unsigned long align_mask = 0;
+	unsigned long high_limit = mm->mmap_base;
+	unsigned long low_limit = PAGE_SIZE;
+	int cpu_va_bits = BITS_PER_LONG;
+	int gpu_pc_bits =
+	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	bool is_shader_code = false;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+#ifdef CONFIG_64BIT
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+
+		if (kbase_hw_has_feature(kctx->kbdev,
+						BASE_HW_FEATURE_33BIT_VA)) {
+			high_limit = kctx->same_va_end << PAGE_SHIFT;
+		} else {
+			high_limit = min_t(unsigned long, mm->mmap_base,
+					(kctx->same_va_end << PAGE_SHIFT));
+			if (len >= SZ_2M) {
+				align_offset = SZ_2M;
+				align_mask = SZ_2M - 1;
+			}
+		}
+
+		low_limit = SZ_2M;
+	} else {
+		cpu_va_bits = 32;
+	}
+#endif /* CONFIG_64BIT */
+	if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
+		(PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
+			int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+			if (!kctx->pending_regions[cookie])
+				return -EINVAL;
+
+			if (!(kctx->pending_regions[cookie]->flags &
+							KBASE_REG_GPU_NX)) {
+				if (cpu_va_bits > gpu_pc_bits) {
+					align_offset = 1ULL << gpu_pc_bits;
+					align_mask = align_offset - 1;
+					is_shader_code = true;
+				}
+			}
+#ifndef CONFIG_64BIT
+	} else {
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+						      flags);
+#endif
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = low_limit;
+	info.high_limit = high_limit;
+	info.align_offset = align_offset;
+	info.align_mask = align_mask;
+
+	return kbase_unmapped_area_topdown(&info, is_shader_code);
+}
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+	.get_unmapped_area = kbase_get_unmapped_area,
+};
+
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value)
+{
+	writel(value, kbdev->reg + offset);
+}
+
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset)
+{
+	return readl(kbdev->reg + offset);
+}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+/**
+ * show_policy - Show callback for the power_policy sysfs file.
+ *
+ * This function is called to get the contents of the power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * set_policy - Store callback for the power_policy sysfs file.
+ *
+ * This function is called when the power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls kbase_pm_set_policy() to change the
+ * policy.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/*
+ * The sysfs file power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/**
+ * show_ca_policy - Show callback for the core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * set_ca_policy - Store callback for the core_availability_policy sysfs file.
+ *
+ * This function is called when the core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls kbase_pm_set_policy() to change
+ * the policy.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/*
+ * The sysfs file core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/*
+ * show_core_mask - Show callback for the core_mask sysfs file.
+ *
+ * This function is called to get the contents of the core_mask sysfs file.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/**
+ * set_core_mask - Store callback for the core_mask sysfs file.
+ *
+ * This function is called when the core_mask sysfs file is written to.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	if (items == 1 || items == 3) {
+		u64 shader_present =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		u64 group0_core_mask =
+				kbdev->gpu_props.props.coherency_info.group[0].
+				core_mask;
+
+		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+				!(new_core_mask[0] & group0_core_mask) ||
+			(new_core_mask[1] & shader_present) !=
+						new_core_mask[1] ||
+				!(new_core_mask[1] & group0_core_mask) ||
+			(new_core_mask[2] & shader_present) !=
+						new_core_mask[2] ||
+				!(new_core_mask[2] & group0_core_mask)) {
+			dev_err(dev, "power_policy: invalid core specification\n");
+			return -EINVAL;
+		}
+
+		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+				kbdev->pm.debug_core_mask[1] !=
+						new_core_mask[1] ||
+				kbdev->pm.debug_core_mask[2] !=
+						new_core_mask[2]) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+					new_core_mask[1], new_core_mask[2]);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+		"Use format <core_mask>\n"
+		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+	return -EINVAL;
+}
+
+/*
+ * The sysfs file core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software jobs. Waiting soft event wait
+ * jobs will be cancelled after this period expires, while soft fence wait jobs
+ * will print debug information if the fence debug feature is enabled.
+ *
+ * This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_job_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_job_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) ||
+	    (soft_job_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+		   soft_job_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_job_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_job_timeout, set_soft_job_timeout);
+
+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
+				int default_ticks, u32 old_ticks)
+{
+	if (timeout_ms > 0) {
+		u64 ticks = timeout_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_period_ns);
+		if (!ticks)
+			return 1;
+		return ticks;
+	} else if (timeout_ms < 0) {
+		return default_ticks;
+	} else {
+		return old_ticks;
+	}
+}
+
+/**
+ * set_js_timeouts - Store callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS,
+ * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		struct kbasep_js_device_data *js_data = &kbdev->js_data;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
+	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
+			default, js_data->ticks_name); \
+	dev_dbg(kbdev->dev, "Overriding " #ticks_name \
+			" with %lu ticks (%lu ms)\n", \
+			(unsigned long)js_data->ticks_name, \
+			ms_name); \
+	} while (0)
+
+		UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms,
+				DEFAULT_JS_SOFT_STOP_TICKS);
+		UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl,
+				DEFAULT_JS_SOFT_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_HARD_STOP_TICKS_SS_8408 :
+				DEFAULT_JS_HARD_STOP_TICKS_SS);
+		UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl,
+				DEFAULT_JS_HARD_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping,
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING);
+		UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_RESET_TICKS_SS_8408 :
+				DEFAULT_JS_RESET_TICKS_SS);
+		UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl,
+				DEFAULT_JS_RESET_TICKS_CL);
+		UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping,
+				DEFAULT_JS_RESET_TICKS_DUMPING);
+
+		kbase_js_set_timeouts(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+static unsigned long get_js_timeout_in_ms(
+		u32 scheduling_period_ns,
+		u32 ticks)
+{
+	u64 ms = (u64)ticks * scheduling_period_ns;
+
+	do_div(ms, 1000000UL);
+	return ms;
+}
+
+/**
+ * show_js_timeouts - Show callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\
+		scheduling_period_ns, \
+		kbdev->js_data.name)
+
+	js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks);
+	js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl);
+	js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss);
+	js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl);
+	js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping);
+	js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss);
+	js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl);
+	js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef GET_TIMEOUT
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+static u32 get_new_js_timeout(
+		u32 old_period,
+		u32 old_ticks,
+		u32 new_scheduling_period_ns)
+{
+	u64 ticks = (u64)old_period * (u64)old_ticks;
+	do_div(ticks, new_scheduling_period_ns);
+	return ticks?ticks:1;
+}
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	struct kbasep_js_device_data *js_data;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	js_data = &kbdev->js_data;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&js_data->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	old_period = js_data->scheduling_period_ns;
+
+#define SET_TIMEOUT(name) \
+		(js_data->name = get_new_js_timeout(\
+				old_period, \
+				kbdev->js_data.name, \
+				new_scheduling_period_ns))
+
+	SET_TIMEOUT(soft_stop_ticks);
+	SET_TIMEOUT(soft_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_ss);
+	SET_TIMEOUT(hard_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_dumping);
+	SET_TIMEOUT(gpu_reset_ticks_ss);
+	SET_TIMEOUT(gpu_reset_ticks_cl);
+	SET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef SET_TIMEOUT
+
+	js_data->scheduling_period_ns = new_scheduling_period_ns;
+
+	kbase_js_set_timeouts(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&js_data->runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/**
+ * set_force_replay - Store callback for the force_replay sysfs file.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/**
+ * show_force_replay - Show callback for the force_replay sysfs file.
+ *
+ * This function is called to get the contents of the force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file force_replay.
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present.
+ * The ability to enable soft-stop when only a single context is present can be
+ * used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/* Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/**
+ * show_debug - Show callback for the debug_command sysfs file.
+ *
+ * This function is called to get the contents of the debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * issue_debug - Store callback for the debug_command sysfs file.
+ *
+ * This function is called when the debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @debug_commands to issue the command.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/* The sysfs file debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
+		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-THEx" },
+		{ .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G51" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_max_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+#ifdef CONFIG_DEBUG_FS
+
+/* Number of entries in serialize_jobs_settings[] */
+#define NR_SERIALIZE_JOBS_SETTINGS 5
+/* Maximum string length in serialize_jobs_settings[].name */
+#define MAX_SERIALIZE_JOBS_NAME_LEN 16
+
+static struct
+{
+	char *name;
+	u8 setting;
+} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = {
+	{"none", 0},
+	{"intra-slot", KBASE_SERIALIZE_INTRA_SLOT},
+	{"inter-slot", KBASE_SERIALIZE_INTER_SLOT},
+	{"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT},
+	{"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT |
+			KBASE_SERIALIZE_RESET}
+};
+
+/**
+ * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs
+ *                                  file
+ * @sfile: seq_file pointer
+ * @data:  Private callback data
+ *
+ * This function is called to get the contents of the serialize_jobs debugfs
+ * file. This is a list of the available settings with the currently active one
+ * surrounded by square brackets.
+ *
+ * Return: 0 on success, or an error code on error
+ */
+static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_device *kbdev = sfile->private;
+	int i;
+
+	CSTD_UNUSED(data);
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting)
+			seq_printf(sfile, "[%s] ",
+					serialize_jobs_settings[i].name);
+		else
+			seq_printf(sfile, "%s ",
+					serialize_jobs_settings[i].name);
+	}
+
+	seq_puts(sfile, "\n");
+
+	return 0;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs
+ *                                       debugfs file.
+ * @file:  File pointer
+ * @ubuf:  User buffer containing data to store
+ * @count: Number of bytes in user buffer
+ * @ppos:  File position
+ *
+ * This function is called when the serialize_jobs debugfs file is written to.
+ * It matches the requested setting against the available settings and if a
+ * matching setting is found updates kbdev->serialize_jobs.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	struct seq_file *s = file->private_data;
+	struct kbase_device *kbdev = s->private;
+	char buf[MAX_SERIALIZE_JOBS_NAME_LEN];
+	int i;
+	bool valid = false;
+
+	CSTD_UNUSED(ppos);
+
+	count = min_t(size_t, sizeof(buf) - 1, count);
+	if (copy_from_user(buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
+			kbdev->serialize_jobs =
+					serialize_jobs_settings[i].setting;
+			valid = true;
+			break;
+		}
+	}
+
+	if (!valid) {
+		dev_err(kbdev->dev, "serialize_jobs: invalid setting\n");
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs
+ *                                     debugfs file
+ * @in:   inode pointer
+ * @file: file pointer
+ *
+ * Return: Zero on success, error code on failure
+ */
+static int kbasep_serialize_jobs_debugfs_open(struct inode *in,
+		struct file *file)
+{
+	return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
+	.open = kbasep_serialize_jobs_debugfs_open,
+	.read = seq_read,
+	.write = kbasep_serialize_jobs_debugfs_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_DEBUG_FS */
+
+static int kbasep_protected_mode_enter(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
+
+static bool kbasep_protected_mode_supported(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static struct kbase_protected_ops kbasep_protected_ops = {
+	.protected_mode_enter = kbasep_protected_mode_enter,
+	.protected_mode_reset = NULL,
+	.protected_mode_supported = kbasep_protected_mode_supported,
+};
+
+static void kbasep_protected_mode_init(struct kbase_device *kbdev)
+{
+	kbdev->protected_ops = NULL;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native protected ops */
+		kbdev->protected_ops = &kbasep_protected_ops;
+	}
+#ifdef PROTECTED_CALLBACKS
+	else
+		kbdev->protected_ops = PROTECTED_CALLBACKS;
+#endif
+
+	if (kbdev->protected_ops)
+		kbdev->protected_mode_support =
+				kbdev->protected_ops->protected_mode_supported(kbdev);
+	else
+		kbdev->protected_mode_support = false;
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = -ENOMEM;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return 0;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \
+	|| defined(LSK_OPPV2_BACKPORT)
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
+ * @file: File object to read is for
+ * @buf:  User buffer to populate with data
+ * @len:  Length of user buffer
+ * @ppos: Offset within file object
+ *
+ * Retrieves the current status of protected debug mode
+ * (0 = disabled, 1 = enabled)
+ *
+ * Return: Number of bytes added to user buffer
+ */
+static ssize_t debugfs_protected_debug_mode_read(struct file *file,
+				char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)file->private_data;
+	u32 gpu_status;
+	ssize_t ret_val;
+
+	kbase_pm_context_active(kbdev);
+	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL);
+	kbase_pm_context_idle(kbdev);
+
+	if (gpu_status & GPU_DBGEN)
+		ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2);
+	else
+		ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2);
+
+	return ret_val;
+}
+
+/*
+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops
+ *
+ * Contains the file operations for the "protected_debug_mode" debugfs file
+ */
+static const struct file_operations fops_protected_debug_mode = {
+	.open = simple_open,
+	.read = debugfs_protected_debug_mode_read,
+	.llseek = default_llseek,
+};
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_init(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+	kbasep_regs_history_debugfs_init(kbdev);
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+	kbase_as_fault_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_COH_USER
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+#endif /* CONFIG_MALI_COH_USER */
+
+	debugfs_create_size_t("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_max_size_default);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		debugfs_create_file("protected_debug_mode", S_IRUGO,
+				kbdev->mali_debugfs_directory, kbdev,
+				&fops_protected_debug_mode);
+	}
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_serialize_jobs_debugfs_fops);
+#endif /* CONFIG_DEBUG_FS */
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev,
+		unsigned prod_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+
+	/* Only for tMIx :
+	 * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+	 * documented for tMIx so force correct value here.
+	 */
+	if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+		   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+				   GPU_ID2_PRODUCT_TMIX))
+		if (supported_coherency_bitmap ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE))
+			supported_coherency_bitmap |=
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_job_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_availability_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_pm_runtime_init) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+		kbdev->inited_subsys &= ~inited_pm_runtime_init;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+	if (kbdev->inited_subsys & inited_io_history) {
+		kbase_io_history_term(&kbdev->io_history);
+		kbdev->inited_subsys &= ~inited_io_history;
+	}
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+
+/* Number of register accesses for the buffer that we allocate during
+ * initialization time. The buffer size can be changed later via debugfs. */
+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	unsigned prod_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+#ifdef CONFIG_OF
+	err = kbase_platform_early_init();
+	if (err) {
+		dev_err(&pdev->dev, "Early platform initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+#endif
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	INIT_LIST_HEAD(&kbdev->ipa_power_models);
+#endif
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+	err = kbase_io_history_init(&kbdev->io_history,
+			KBASEP_DEFAULT_REGISTER_HISTORY_SIZE);
+	if (err) {
+		dev_err(&pdev->dev, "Register access history initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+	kbdev->inited_subsys |= inited_io_history;
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+	core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+
+	kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	if (kbdev->pm.callback_power_runtime_init) {
+		err = kbdev->pm.callback_power_runtime_init(kbdev);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Runtime PM initialization failed\n");
+			kbase_platform_device_remove(pdev);
+			return err;
+		}
+		kbdev->inited_subsys |= inited_pm_runtime_init;
+	}
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, prod_id);
+
+	kbasep_protected_mode_init(kbdev);
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	err = kbase_devfreq_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Devfreq initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_devfreq;
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	/* initialize the kctx list */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->inited_subsys |= inited_get_device;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+
+	return err;
+}
+
+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE
+
+
+/**
+ * kbase_device_suspend - Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/**
+ * kbase_device_resume - Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @dev:  The device to resume
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+	return 0;
+}
+
+/**
+ * kbase_device_runtime_suspend - Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in
+ * which it will not be able to communicate with the CPU(s) and RAM due to
+ * power management.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/**
+ * kbase_device_runtime_resume - Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+#ifndef CONFIG_MALI_DEVFREQ
+static int mali_os_freeze(struct device *device)
+{
+	mali_dev_freeze();
+	return kbase_device_suspend(device);
+}
+
+static int mali_os_restore(struct device *device)
+{
+	mali_dev_restore();
+	return kbase_device_resume(device);
+}
+#endif
+
+
+/* The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifndef CONFIG_MALI_DEVFREQ
+	.freeze = mali_os_freeze,
+	.thaw = kbase_device_resume,
+	.restore = mali_os_restore,
+#endif
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_early_init();
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	ret = kbase_platform_fake_register();
+	if (ret)
+		return ret;
+#endif
+	ret = platform_driver_register(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	if (ret)
+		kbase_platform_fake_unregister();
+#endif
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	kbase_platform_fake_unregister();
+#endif
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100644
index 0000000..fb57ac2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100644
index 0000000..5fff289
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100644
index 0000000..f29430d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,499 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				 kbase_is_job_fault_event_pending(kbdev)))
+			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		kbase_backend_cacheclean(kbdev, event->katom);
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100644
index 0000000..a2bf898
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,96 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100644
index 0000000..dc75c86
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,306 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list)) {
+		kfree(data);
+		return NULL;
+	}
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_zone_open(struct rb_root *rbtree,
+						struct debug_mem_data *mem_data)
+{
+	int ret = 0;
+	struct rb_node *p;
+	struct kbase_va_region *reg;
+	struct debug_mem_mapping *mapping;
+
+	for (p = rb_first(rbtree); p; p = rb_next(p)) {
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+out:
+	return ret;
+}
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100644
index 0000000..20ab51a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100644
index 0000000..290d460
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,1581 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_mmu_mode.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_pm.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#include <linux/bus_logger.h>
+#endif
+
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#endif				/* CONFIG_SYNC */
+
+#include "mali_kbase_dma_fence.h"
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL    0
+#else
+#define MIDGARD_MMU_TOPLEVEL    1
+#endif
+
+#define MIDGARD_MMU_BOTTOMLEVEL 3
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+/* Serialize atoms within a slot (ie only one atom per job slot) */
+#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0)
+/* Serialize atoms between slots (ie only one job slot running at any time) */
+#define KBASE_SERIALIZE_INTER_SLOT (1 << 1)
+/* Reset the GPU after each atom completion */
+#define KBASE_SERIALIZE_RESET (1 << 2)
+
+#ifdef CONFIG_DEBUG_FS
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * struct kbase_io_access - holds information about 1 register access
+ *
+ * @addr: first bit indicates r/w (r=0, w=1)
+ * @value: value written or read
+ */
+struct kbase_io_access {
+	uintptr_t addr;
+	u32 value;
+};
+
+/**
+ * struct kbase_io_history - keeps track of all recent register accesses
+ *
+ * @enabled: true if register accesses are recorded, false otherwise
+ * @lock: spinlock protecting kbase_io_access array
+ * @count: number of registers read/written
+ * @size: number of elements in kbase_io_access array
+ * @buf: array of kbase_io_access
+ */
+struct kbase_io_history {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool enabled;
+#else
+	u32 enabled;
+#endif
+
+	spinlock_t lock;
+	size_t count;
+	u16 size;
+	struct kbase_io_access *buf;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom *)(dep->atom);
+}
+
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency.
+ * @param[in] a      The ATOM to be set as a dependency.
+ * @param     type   The ATOM dependency type to be set.
+ *
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency to be cleared.
+ *
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+enum kbase_atom_gpu_rb_state {
+	/* Atom is not currently present in slot ringbuffer */
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	/* Atom is in slot ringbuffer but is blocked on a previous atom */
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	/* Atom is in slot ringbuffer but is waiting for a previous protected
+	 * mode transition to complete */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
+	/* Atom is in slot ringbuffer but is waiting for proected mode
+	 * transition */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
+	/* Atom is in slot ringbuffer but is waiting for cores to become
+	 * available */
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	/* Atom is in slot ringbuffer but is blocked on affinity */
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	/* Atom is in slot ringbuffer and ready to run */
+	KBASE_ATOM_GPU_RB_READY,
+	/* Atom is in slot ringbuffer and has been submitted to the GPU */
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	/* Atom must be returned to JS as soon as it reaches the head of the
+	 * ringbuffer due to a previous failure */
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
+};
+
+enum kbase_atom_enter_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition into protected mode is required.
+	 *
+	 * NOTE: The integer value of this must
+	 *       match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
+	/* Wait for vinstr to suspend. */
+	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
+	/* Wait for the L2 to become idle in preparation for
+	 * the coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
+	/* End state;
+	 * Prepare coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
+};
+
+enum kbase_atom_exit_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition out of protected mode is required.
+	 *
+	 * NOTE: The integer value of this must
+	 *       match KBASE_ATOM_ENTER_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
+	/* Wait for the L2 to become idle in preparation
+	 * for the reset. */
+	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+	/* Issue the protected reset. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET,
+	/* End state;
+	 * Wait for the reset to complete. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+	/* List head used during job dispatch job_done processing - as
+	 * dependencies may not be entirely resolved at this point, we need to
+	 * use a separate list head. */
+	struct list_head jd_item;
+	/* true if atom's jd_item is currently on a list. Prevents atom being
+	 * processed twice. */
+	bool in_jd_list;
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	enum kbase_atom_coreref_state coreref_state;
+#ifdef CONFIG_KDS
+	struct list_head node;
+	struct kds_resource_set *kds_rset;
+	bool kds_dep_satisfied;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_SYNC
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		/* This points to the dma-buf fence for this atom. If this is
+		 * NULL then there is no fence for this atom and the other
+		 * fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+		struct fence *fence;
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;	    /**< core requirements */
+	/** Job Slot to retry submitting to if submission from IRQ handler failed
+	 *
+	 * NOTE: see if this can be unified into the another member e.g. the event */
+	int retry_submit_on_slot;
+
+	u32 ticks;
+	/* JS atom priority with respect to other atoms on its kctx. */
+	int sched_priority;
+
+	int poking;		/* BASE_HW_ISSUE_8316 */
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+	int slot_nr;
+
+	u32 atom_flags;
+
+	/* Number of times this atom has been retried. Used by replay soft job.
+	 */
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	u64 need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	/* Pointer to atom that this atom has same-slot dependency on */
+	struct kbase_jd_atom *pre_dep;
+	/* Pointer to atom that has same-slot dependency on this atom */
+	struct kbase_jd_atom *post_dep;
+
+	/* Pointer to atom that this atom has cross-slot dependency on */
+	struct kbase_jd_atom *x_pre_dep;
+	/* Pointer to atom that has cross-slot dependency on this atom */
+	struct kbase_jd_atom *x_post_dep;
+
+	/* The GPU's flush count recorded at the time of submission, used for
+	 * the cache flush optimisation */
+	u32 flush_id;
+
+	struct kbase_jd_atom_backend backend;
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	/* List head used for three different purposes:
+	 *  1. Overflow list for JS ring buffers. If an atom is ready to run,
+	 *     but there is no room in the JS ring buffer, then the atom is put
+	 *     on the ring buffer's overflow list using this list node.
+	 *  2. List of waiting soft jobs.
+	 */
+	struct list_head queue;
+
+	/* Used to keep track of all JIT free/alloc jobs in submission order
+	 */
+	struct list_head jit_node;
+	bool jit_blocked;
+
+	/* If non-zero, this indicates that the atom will fail with the set
+	 * event_code when the atom is processed. */
+	enum base_jd_event_code will_fail_event_code;
+
+	/* Atoms will only ever be transitioning into, or out of
+	 * protected mode so we do not need two separate fields.
+	 */
+	union {
+		enum kbase_atom_enter_protected_state enter;
+		enum kbase_atom_exit_protected_state exit;
+	} protected_state;
+
+	struct rb_node runnable_tree_node;
+
+	/* 'Age' of atom relative to other atoms in the context. */
+	u32 age;
+};
+
+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	/** Tracks all job-dispatch jobs.  This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	u32 job_nr;
+
+	/** Waitq that reflects whether there are no jobs (including SW-only
+	 * dependency jobs). This is set when no jobs are present on the ctx,
+	 * and clear when there are jobs.
+	 *
+	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+	 * and SW-only dependency jobs.
+	 *
+	 * This waitq can be waited upon to find out when the context jobs are all
+	 * done/cancelled (including those that might've been blocked on
+	 * dependencies) - and so, whether it can be terminated. However, it should
+	 * only be terminated once it is not present in the run-pool (see
+	 * kbasep_js_kctx_info::ctx::is_scheduled).
+	 *
+	 * Since the waitq is only set under kbase_jd_context::lock,
+	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+	 * guarentee that the setter has completed its work on the kbase_context
+	 *
+	 * This must be updated atomically with:
+	 * - kbase_jd_context::job_nr */
+	wait_queue_head_t zero_jobs_wait;
+
+	/** Job Done workqueue. */
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_KDS
+	struct kds_callback kds_cb;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+	int number;
+
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	bool protected_mode;
+	u32 fault_status;
+	u64 fault_addr;
+	u64 fault_extra_addr;
+
+	struct kbase_mmu_setup current_setup;
+
+	/* BASE_HW_ISSUE_8316  */
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	/** Protected by hwaccess_lock */
+	int poke_refcount;
+	/** Protected by hwaccess_lock */
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold hwaccess_lock when accessing this */
+	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - true indicates that the transition is ongoing
+	 * Expected to be protected by hwaccess_lock */
+	bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_protected_ops - Platform specific functions for GPU protected
+ * mode operations
+ * @protected_mode_enter: Callback to enter protected mode on the GPU
+ * @protected_mode_reset: Callback to reset the GPU and exit protected mode.
+ * @protected_mode_supported: Callback to check if protected mode is supported.
+ */
+struct kbase_protected_ops {
+	/**
+	 * protected_mode_enter() - Enter protected mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enter)(struct kbase_device *kbdev);
+
+	/**
+	 * protected_mode_reset() - Reset the GPU and exit protected mode
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_reset)(struct kbase_device *kbdev);
+
+	/**
+	 * protected_mode_supported() - Check if protected mode is supported
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	bool (*protected_mode_supported)(struct kbase_device *kbdev);
+};
+
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:     Kbase device where memory is used
+ * @cur_size:  Number of free pages currently in the pool (may exceed @max_size
+ *             in some corner cases)
+ * @max_size:  Maximum number of free pages in the pool
+ * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
+ *             and @page_list
+ * @page_list: List of free pages in the pool
+ * @reclaim:   Shrinker for kernel reclaim of free pages
+ * @next_pool: Pointer to next pool where pages can be allocated when this pool
+ *             is empty. Pages will spill over to the next pool when this pool
+ *             is full. Can be NULL if there is no next pool.
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+};
+
+
+#define DEVNAME_SIZE	16
+
+struct kbase_device {
+	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+
+	struct clk *clock;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif	/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool mem_pool;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	/** List of SW workarounds for HW issues */
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	/** List of features available */
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	/* Bitmaps of cores that are currently in use (running jobs).
+	 * These should be kept up to date by the job scheduler.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 *
+	 * kbase_pm_check_transitions_nolock() should be called when bits are
+	 * cleared to update the power management system and allow transitions to
+	 * occur. */
+	u64 shader_inuse_bitmap;
+
+	/* Refcount for cores in use */
+	u32 shader_inuse_cnt[64];
+
+	/* Bitmaps of cores the JS needs for jobs ready to run */
+	u64 shader_needed_bitmap;
+
+	/* Refcount for cores needed */
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	/* struct for keeping track of the disjoint information
+	 *
+	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
+	 * The count is the number of disjoint events that have occurred on the GPU
+	 */
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+	u32 l2_users_count;
+
+	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+	 * submitting new jobs to any cores that are not marked as available.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 */
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+	u64 stack_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
+	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+
+	/* Structure used for instrumentation and HW counters dumping */
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	/*value to be written to the irq_throttle register each time an irq is served */
+	atomic_t irq_throttle_cycles;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+	void *platform_context;
+
+	/* List of kbase_contexts created */
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	struct devfreq_cooling_device *devfreq_cooling;
+#else
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+	struct list_head ipa_power_models;
+	struct kbase_ipa_model *ipa_current_model;
+	struct kbase_ipa_model *ipa_configured_model;
+	struct kbase_ipa_model *ipa_fallback_model;
+#endif
+#endif
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+	/*
+	 * Control for enabling job dump on failure, set when control debugfs
+	 * is opened.
+	 */
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	/* directory for debugfs entries */
+	struct dentry *mali_debugfs_directory;
+	/* Root directory for per context entry */
+	struct dentry *debugfs_ctx_directory;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* bit for each as, set if there is new data to report */
+	u64 debugfs_as_read_bitmap;
+#endif /* CONFIG_MALI_DEBUG */
+
+	/* failed job dump, used for separate debug process */
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
+	struct kbase_context *kctx_fault;
+
+#if !MALI_CUSTOMER_RELEASE
+	/* Per-device data for register dumping interface */
+	struct {
+		u16 reg_offset; /* Offset of a GPU_CONTROL register to be
+				   dumped upon request */
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	/* fbdump profiling controls set by gator */
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	/* Number of jobs that are run before a job is forced to fail and
+	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+	 * failures. */
+	int force_replay_limit;
+	/* Count of jobs between forced failures. Incremented on each job. A
+	 * job is forced to fail once this is greater than or equal to
+	 * force_replay_limit. */
+	int force_replay_count;
+	/* Core requirement for jobs to be failed and replayed. May be zero. */
+	base_jd_core_req force_replay_core_req;
+	/* true if force_replay_limit should be randomized. The random
+	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+	 */
+	bool force_replay_random;
+#endif
+
+	/* Total number of created contexts */
+	atomic_t ctx_num;
+
+#ifdef CONFIG_DEBUG_FS
+	/* Holds the most recent register accesses */
+	struct kbase_io_history io_history;
+#endif /* CONFIG_DEBUG_FS */
+
+	struct kbase_hwaccess_data hwaccess;
+
+	/* Count of page/bus faults waiting for workqueues to process */
+	atomic_t faults_pending;
+
+	/* true if GPU is powered off or power off operation is in progress */
+	bool poweroff_pending;
+
+
+	/* defaults for new context created for this device */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	size_t mem_pool_max_size_default;
+
+	/* current gpu coherency mode */
+	u32 current_gpu_coherency_mode;
+	/* system coherency mode  */
+	u32 system_coherency;
+	/* Flag to track when cci snoops have been enabled on the interface */
+	bool cci_snoop_enabled;
+
+	/* SMC function IDs to call into Trusted firmware to enable/disable
+	 * cache snooping. Value of 0 indicates that they are not used
+	 */
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	/* Protected operations */
+	struct kbase_protected_ops *protected_ops;
+
+	/*
+	 * true when GPU is put into protected mode
+	 */
+	bool protected_mode;
+
+	/*
+	 * true when GPU is transitioning into or out of protected mode
+	 */
+	bool protected_mode_transition;
+
+	/*
+	 * true if protected mode is supported
+	 */
+	bool protected_mode_support;
+
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_queue_head_t driver_inactive_wait;
+	bool driver_inactive;
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	/*
+	 * Bus logger integration.
+	 */
+	struct bus_logger_client *buslogger;
+#endif
+	/* Boolean indicating if an IRQ flush during reset is in progress. */
+	bool irq_reset_flush;
+
+	/* list of inited sub systems. Used during terminate/error recovery */
+	u32 inited_subsys;
+
+	spinlock_t hwaccess_lock;
+
+	/* Protects access to MMU operations */
+	struct mutex mmu_hw_mutex;
+
+	/* Current serialization mode. See KBASE_SERIALIZE_* for details */
+	u8 serialize_jobs;
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ *                 job slot.
+ * @x_dep_head:    Head item of the linked list of atoms blocked on cross-slot
+ *                 dependencies. Atoms on this list will be moved to the
+ *                 runnable_tree when the blocking atom completes.
+ *
+ * hwaccess_lock must be held when accessing this structure.
+ */
+struct jsctx_queue {
+	struct rb_root runnable_tree;
+	struct list_head x_dep_head;
+};
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+/**
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
+ * context, to disable use of implicit dma-buf fences. This is used to avoid
+ * potential synchronization deadlocks.
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum kbase_context_flags {
+	KCTX_COMPAT = 1U << 0,
+	KCTX_RUNNABLE_REF = 1U << 1,
+	KCTX_ACTIVE = 1U << 2,
+	KCTX_PULLED = 1U << 3,
+	KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+	KCTX_INFINITE_CACHE = 1U << 5,
+	KCTX_SUBMIT_DISABLED = 1U << 6,
+	KCTX_PRIVILEGED = 1U << 7,
+	KCTX_SCHEDULED = 1U << 8,
+	KCTX_DYING = 1U << 9,
+	KCTX_NO_IMPLICIT_SYNC = 1U << 10,
+};
+
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	int id; /* System wide unique id */
+	unsigned long api_version;
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	atomic_t flags;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	u64 *mmu_teardown_pages;
+
+	struct page *aliasing_sink_page;
+
+	struct mutex            mmu_lock;
+	struct mutex            reg_lock; /* To be converted to a rwlock? */
+	struct rb_root reg_rbtree_same; /* RB tree of GPU (live) regions,
+					 * SAME_VA zone */
+	struct rb_root reg_rbtree_exec; /* RB tree of GPU (live) regions,
+					 * EXEC zone */
+	struct rb_root reg_rbtree_custom; /* RB tree of GPU (live) regions,
+					 * CUSTOM_VA zone */
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_pool mem_pool;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_KDS
+	struct list_head waiting_kds_resource;
+#endif
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+	/** This is effectively part of the Run Pool, because it only has a valid
+	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+	 *
+	 * The hwaccess_lock must be held whilst accessing this.
+	 *
+	 * If the context relating to this as_nr is required, you must use
+	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+	 * whilst you're using it. Alternatively, just hold the hwaccess_lock
+	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
+	 * you can take whilst doing this) */
+	int as_nr;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct *process_mm;
+	/* End of the SAME_VA zone */
+	u64 same_va_end;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	/* Content of mem_profile file */
+	char *mem_profile_data;
+	/* Size of @c mem_profile_data */
+	size_t mem_profile_size;
+	/* Mutex guarding memory profile state */
+	struct mutex mem_profile_lock;
+	/* Memory profile directory under debugfs */
+	struct dentry *kctx_dentry;
+
+	/* for job fault debug */
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	/* This list will keep the following atoms during the dump
+	 * in the same context
+	 */
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	/* Number of atoms currently pulled from this context */
+	atomic_t atoms_pulled;
+	/* Number of atoms currently pulled from this context, per slot */
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	/* Number of atoms currently pulled from this context, per slot and
+	 * priority. Hold hwaccess_lock when accessing */
+	int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
+			KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/* true if slot is blocked on the given priority. This will be set on a
+	 * soft-stop */
+	bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/* Bitmask of slots that can be pulled from */
+	u32 slots_pullable;
+
+	/* Backend specific data */
+	struct kbase_context_backend backend;
+
+	/* Work structure used for deferred ASID assignment */
+	struct work_struct work;
+
+	/* Only one userspace vinstr client per kbase context */
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+
+	/* List of completed jobs waiting for events to be posted */
+	struct list_head completed_jobs;
+	/* Number of work items currently pending on job_done_wq */
+	atomic_t work_count;
+
+	/* Waiting soft-jobs will fail when this timer expires */
+	struct timer_list soft_job_timeout;
+
+	/* JIT allocation management */
+	struct kbase_va_region *jit_alloc[256];
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_evict_lock;
+	struct work_struct jit_work;
+
+	/* A list of the JIT soft-jobs in submission order
+	 * (protected by kbase_jd_context.lock)
+	 */
+	struct list_head jit_atoms_head;
+	/* A list of pending JIT alloc soft-jobs (using the 'queue' list_head)
+	 * (protected by kbase_jd_context.lock)
+	 */
+	struct list_head jit_pending_alloc;
+
+	/* External sticky resource management */
+	struct list_head ext_res_meta_head;
+
+	/* Used to record that a drain was requested from atomic context */
+	atomic_t drain_pending;
+
+	/* Current age count, used to determine age for newly submitted atoms */
+	u32 age_count;
+};
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100644
index 0000000..7484eec
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,697 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work));
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+	mutex_init(&kbdev->mmu_hw_mutex);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+#else
+	kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+#ifdef CONFIG_MALI_DEBUG
+	init_waitqueue_head(&kbdev->driver_inactive_wait);
+#endif /* CONFIG_MALI_DEBUG */
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
+{
+	u32 ret_value = 0;
+
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		ret_value = kbdev->kbase_profiling_controls[control];
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+
+	return ret_value;
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100644
index 0000000..f70bccc
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
new file mode 100644
index 0000000..97bb6c5
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -0,0 +1,606 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_dma_fence_lock);
+
+static void
+kbase_dma_fence_work(struct work_struct *pwork);
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static const char *
+kbase_dma_fence_get_driver_name(struct fence *fence)
+{
+	return kbase_drv_name;
+}
+
+static const char *
+kbase_dma_fence_get_timeline_name(struct fence *fence)
+{
+	return kbase_timeline_name;
+}
+
+static bool
+kbase_dma_fence_enable_signaling(struct fence *fence)
+{
+	/* If in the future we need to add code here remember to
+	 * to get a reference to the fence and release it when signaling
+	 * as stated in fence.h
+	 */
+	return true;
+}
+
+static void
+kbase_dma_fence_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static const struct fence_ops kbase_dma_fence_ops = {
+	.get_driver_name = kbase_dma_fence_get_driver_name,
+	.get_timeline_name = kbase_dma_fence_get_timeline_name,
+	.enable_signaling = kbase_dma_fence_enable_signaling,
+	/* Use the default wait */
+	.wait = fence_default_wait,
+	.fence_value_str = kbase_dma_fence_fence_value_str,
+};
+
+static struct fence *
+kbase_dma_fence_new(unsigned int context, unsigned int seqno)
+{
+	struct fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence_init(fence,
+		   &kbase_dma_fence_ops,
+		   &kbase_dma_fence_lock,
+		   context,
+		   seqno);
+
+	return fence;
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_queue_work() - Queue work to handle @katom
+ * @katom: Pointer to atom for which to queue work
+ *
+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
+ * submit the atom.
+ */
+static void
+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	bool ret;
+
+	INIT_WORK(&katom->work, kbase_dma_fence_work);
+	ret = queue_work(kctx->dma_fence.wq, &katom->work);
+	/* Warn if work was already queued, that should not happen. */
+	WARN_ON(!ret);
+}
+
+/**
+ * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ * @queue_worker: Boolean indicating if fence worker is to be queued when
+ *                dep_count reaches 0.
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker)
+{
+	struct kbase_dma_fence_cb *cb, *tmp;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			int ret;
+
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			ret = atomic_dec_return(&katom->dma_fence.dep_count);
+
+			if (unlikely(queue_worker && ret == 0)) {
+				/*
+				 * dep_count went to zero and queue_worker is
+				 * true. Queue the worker to handle the
+				 * completion of the katom.
+				 */
+				kbase_dma_fence_queue_work(katom);
+			}
+		}
+
+		/*
+		 * Release the reference taken in
+		 * kbase_dma_fence_add_callback().
+		 */
+		fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom, false);
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (atomic_read(&katom->dma_fence.dep_count) != 0)
+		goto out;
+
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_dma_fence_free_callbacks(katom, false);
+	/*
+	 * Queue atom on GPU, unless it has already completed due to a failing
+	 * dependency. Run jd_done_nolock() on the katom if it is completed.
+	 */
+	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
+		jd_done_nolock(katom, NULL);
+	else
+		kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+/**
+ * kbase_dma_fence_add_callback() - Add callback on @fence to block @katom
+ * @katom: Pointer to katom that will be blocked by @fence
+ * @fence: Pointer to fence on which to set up the callback
+ * @callback: Pointer to function to be called when fence is signaled
+ *
+ * Caller needs to hold a reference to @fence when calling this function, and
+ * the caller is responsible for releasing that reference.  An additional
+ * reference to @fence will be taken when the callback was successfully set up
+ * and @fence needs to be kept valid until the callback has been called and
+ * cleanup have been done.
+ *
+ * Return: 0 on success: fence was either already signalled, or callback was
+ * set up. Negative error code is returned on error.
+ */
+static int
+kbase_dma_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback)
+{
+	int err = 0;
+	struct kbase_dma_fence_cb *kbase_fence_cb;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+
+	err = fence_add_callback(fence, &kbase_fence_cb->fence_cb, callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, clear the error and return */
+		err = 0;
+		kbase_fence_cb->fence = NULL;
+		kfree(kbase_fence_cb);
+	} else if (err) {
+		kfree(kbase_fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in kbase_dma_fence_free_callbacks().
+		 */
+		fence_get(fence);
+		atomic_inc(&katom->dma_fence.dep_count);
+		/* Add callback to katom's list of callbacks */
+		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
+	}
+
+	return err;
+}
+
+static void
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+{
+	struct kbase_dma_fence_cb *kcb = container_of(cb,
+				struct kbase_dma_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+	if (atomic_dec_and_test(&katom->dma_fence.dep_count))
+		kbase_dma_fence_queue_work(katom);
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct reservation_object *resv,
+					 bool exclusive)
+{
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_dma_fence_add_callback(katom,
+						   excl_fence,
+						   kbase_dma_fence_cb);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_dma_fence_add_callback(katom,
+							   shared_fences[i],
+							   kbase_dma_fence_cb);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	if (err) {
+		/*
+		 * On error, cancel and clean up all callbacks that was set up
+		 * before the error.
+		 */
+		kbase_dma_fence_free_callbacks(katom, false);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+	struct fence *fence;
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	fence = kbase_dma_fence_new(katom->dma_fence.context,
+				    atomic_inc_return(&katom->dma_fence.seqno));
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	katom->dma_fence.fence = fence;
+	atomic_set(&katom->dma_fence.dep_count, 1);
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		atomic_set(&katom->dma_fence.dep_count, -1);
+		fence_put(fence);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct reservation_object *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = reservation_object_reserve_shared(obj);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_shared_fence(obj, katom->dma_fence.fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_excl_fence(obj, katom->dma_fence.fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
+			atomic_set(&katom->dma_fence.dep_count, -1);
+			kbase_dma_fence_free_callbacks(katom, false);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	} else {
+		/* There was an error, cancel callbacks, set dep_count to -1 to
+		 * indicate that the atom has been handled (the caller will
+		 * kill it for us), signal the fence, free callbacks and the
+		 * fence.
+		 */
+		kbase_dma_fence_free_callbacks(katom, false);
+		atomic_set(&katom->dma_fence.dep_count, -1);
+		kbase_dma_fence_signal(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct list_head *list = &kctx->dma_fence.waiting_resource;
+
+	while (!list_empty(list)) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_first_entry(list, struct kbase_jd_atom, queue);
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom, true);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == -1);
+
+	/* Signal the atom's fence. */
+	fence_signal(katom->dma_fence.fence);
+	fence_put(katom->dma_fence.fence);
+	katom->dma_fence.fence = NULL;
+
+	kbase_dma_fence_free_callbacks(katom, false);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
new file mode 100644
index 0000000..3b0a69b
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
@@ -0,0 +1,150 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/reservation.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_cb - Mali dma-fence callback data struct
+ * @fence_cb: Callback function
+ * @katom:    Pointer to katom that is waiting on this callback
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @node:     List head for linking this callback to the katom
+ */
+struct kbase_dma_fence_cb {
+	struct fence_cb fence_cb;
+	struct kbase_jd_atom *katom;
+	struct fence *fence;
+	struct list_head node;
+};
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct reservation_object **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_waiters_remove()- Remove katom from dma-fence wait list
+ * @katom: Pointer to katom to remove from list
+ */
+void kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom);
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100644
index 0000000..1881486
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx);
+	KBASE_TLSTREAM_TL_DEL_ATOM(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED);
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100644
index 0000000..ce65b55
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100644
index 0000000..7c3c5f3
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,334 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
+};
+
+static void dump_worker(struct work_struct *work);
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+	uint32_t product_id;
+	uint32_t count;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	product_id = kbdev->gpu_props.props.core_props.product_id;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			hardware_counters = hardware_counters_mali_tHEx;
+			count = ARRAY_SIZE(hardware_counters_mali_tHEx);
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			hardware_counters = hardware_counters_mali_tSIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tSIx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	} else {
+		switch (product_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	*total_counters = count;
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	uint32_t dump_size = 0, i = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
+		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+	in_out_info->size = dump_size;
+
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
+
+	return hand;
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+
+	if (opaque_handles && success) {
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		schedule_work(&opaque_handles->dump_work);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100644
index 0000000..ef9ac0f
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100644
index 0000000..ce8e424
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2165 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MISS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MISS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MISS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MISS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MISS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MISS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MISS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#include "mali_kbase_gator_hwcnt_names_tmix.h"
+
+#include "mali_kbase_gator_hwcnt_names_thex.h"
+
+#include "mali_kbase_gator_hwcnt_names_tsix.h"
+
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
new file mode 100644
index 0000000..bcceef4
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+
+static const char * const hardware_counters_mali_tHEx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MESSAGES_SENT",
+	"THEx_MESSAGES_RECEIVED",
+	"THEx_GPU_ACTIVE",
+	"THEx_IRQ_ACTIVE",
+	"THEx_JS0_JOBS",
+	"THEx_JS0_TASKS",
+	"THEx_JS0_ACTIVE",
+	"",
+	"THEx_JS0_WAIT_READ",
+	"THEx_JS0_WAIT_ISSUE",
+	"THEx_JS0_WAIT_DEPEND",
+	"THEx_JS0_WAIT_FINISH",
+	"THEx_JS1_JOBS",
+	"THEx_JS1_TASKS",
+	"THEx_JS1_ACTIVE",
+	"",
+	"THEx_JS1_WAIT_READ",
+	"THEx_JS1_WAIT_ISSUE",
+	"THEx_JS1_WAIT_DEPEND",
+	"THEx_JS1_WAIT_FINISH",
+	"THEx_JS2_JOBS",
+	"THEx_JS2_TASKS",
+	"THEx_JS2_ACTIVE",
+	"",
+	"THEx_JS2_WAIT_READ",
+	"THEx_JS2_WAIT_ISSUE",
+	"THEx_JS2_WAIT_DEPEND",
+	"THEx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"THEx_TILER_ACTIVE",
+	"THEx_JOBS_PROCESSED",
+	"THEx_TRIANGLES",
+	"THEx_LINES",
+	"THEx_POINTS",
+	"THEx_FRONT_FACING",
+	"THEx_BACK_FACING",
+	"THEx_PRIM_VISIBLE",
+	"THEx_PRIM_CULLED",
+	"THEx_PRIM_CLIPPED",
+	"THEx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"THEx_BUS_READ",
+	"",
+	"THEx_BUS_WRITE",
+	"THEx_LOADING_DESC",
+	"THEx_IDVS_POS_SHAD_REQ",
+	"THEx_IDVS_POS_SHAD_WAIT",
+	"THEx_IDVS_POS_SHAD_STALL",
+	"THEx_IDVS_POS_FIFO_FULL",
+	"THEx_PREFETCH_STALL",
+	"THEx_VCACHE_HIT",
+	"THEx_VCACHE_MISS",
+	"THEx_VCACHE_LINE_WAIT",
+	"THEx_VFETCH_POS_READ_WAIT",
+	"THEx_VFETCH_VERTEX_WAIT",
+	"THEx_VFETCH_STALL",
+	"THEx_PRIMASSY_STALL",
+	"THEx_BBOX_GEN_STALL",
+	"THEx_IDVS_VBU_HIT",
+	"THEx_IDVS_VBU_MISS",
+	"THEx_IDVS_VBU_LINE_DEALLOCATE",
+	"THEx_IDVS_VAR_SHAD_REQ",
+	"THEx_IDVS_VAR_SHAD_STALL",
+	"THEx_BINNER_STALL",
+	"THEx_ITER_STALL",
+	"THEx_COMPRESS_MISS",
+	"THEx_COMPRESS_STALL",
+	"THEx_PCACHE_HIT",
+	"THEx_PCACHE_MISS",
+	"THEx_PCACHE_MISS_STALL",
+	"THEx_PCACHE_EVICT_STALL",
+	"THEx_PMGR_PTR_WR_STALL",
+	"THEx_PMGR_PTR_RD_STALL",
+	"THEx_PMGR_CMD_WR_STALL",
+	"THEx_WRBUF_ACTIVE",
+	"THEx_WRBUF_HIT",
+	"THEx_WRBUF_MISS",
+	"THEx_WRBUF_NO_FREE_LINE_STALL",
+	"THEx_WRBUF_NO_AXI_ID_STALL",
+	"THEx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"THEx_UTLB_TRANS",
+	"THEx_UTLB_TRANS_HIT",
+	"THEx_UTLB_TRANS_STALL",
+	"THEx_UTLB_TRANS_MISS_DELAY",
+	"THEx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"THEx_FRAG_ACTIVE",
+	"THEx_FRAG_PRIMITIVES",
+	"THEx_FRAG_PRIM_RAST",
+	"THEx_FRAG_FPK_ACTIVE",
+	"THEx_FRAG_STARVING",
+	"THEx_FRAG_WARPS",
+	"THEx_FRAG_PARTIAL_WARPS",
+	"THEx_FRAG_QUADS_RAST",
+	"THEx_FRAG_QUADS_EZS_TEST",
+	"THEx_FRAG_QUADS_EZS_UPDATE",
+	"THEx_FRAG_QUADS_EZS_KILL",
+	"THEx_FRAG_LZS_TEST",
+	"THEx_FRAG_LZS_KILL",
+	"",
+	"THEx_FRAG_PTILES",
+	"THEx_FRAG_TRANS_ELIM",
+	"THEx_QUAD_FPK_KILLER",
+	"",
+	"THEx_COMPUTE_ACTIVE",
+	"THEx_COMPUTE_TASKS",
+	"THEx_COMPUTE_WARPS",
+	"THEx_COMPUTE_STARVING",
+	"THEx_EXEC_CORE_ACTIVE",
+	"THEx_EXEC_ACTIVE",
+	"THEx_EXEC_INSTR_COUNT",
+	"THEx_EXEC_INSTR_DIVERGED",
+	"THEx_EXEC_INSTR_STARVING",
+	"THEx_ARITH_INSTR_SINGLE_FMA",
+	"THEx_ARITH_INSTR_DOUBLE",
+	"THEx_ARITH_INSTR_MSG",
+	"THEx_ARITH_INSTR_MSG_ONLY",
+	"THEx_TEX_INSTR",
+	"THEx_TEX_INSTR_MIPMAP",
+	"THEx_TEX_INSTR_COMPRESSED",
+	"THEx_TEX_INSTR_3D",
+	"THEx_TEX_INSTR_TRILINEAR",
+	"THEx_TEX_COORD_ISSUE",
+	"THEx_TEX_COORD_STALL",
+	"THEx_TEX_STARVE_CACHE",
+	"THEx_TEX_STARVE_FILTER",
+	"THEx_LS_MEM_READ_FULL",
+	"THEx_LS_MEM_READ_SHORT",
+	"THEx_LS_MEM_WRITE_FULL",
+	"THEx_LS_MEM_WRITE_SHORT",
+	"THEx_LS_MEM_ATOMIC",
+	"THEx_VARY_INSTR",
+	"THEx_VARY_SLOT_32",
+	"THEx_VARY_SLOT_16",
+	"THEx_ATTR_INSTR",
+	"THEx_ARITH_INSTR_FP_MUL",
+	"THEx_BEATS_RD_FTC",
+	"THEx_BEATS_RD_FTC_EXT",
+	"THEx_BEATS_RD_LSC",
+	"THEx_BEATS_RD_LSC_EXT",
+	"THEx_BEATS_RD_TEX",
+	"THEx_BEATS_RD_TEX_EXT",
+	"THEx_BEATS_RD_OTHER",
+	"THEx_BEATS_WR_LSC",
+	"THEx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"THEx_L2_RD_MSG_IN",
+	"THEx_L2_RD_MSG_IN_STALL",
+	"THEx_L2_WR_MSG_IN",
+	"THEx_L2_WR_MSG_IN_STALL",
+	"THEx_L2_SNP_MSG_IN",
+	"THEx_L2_SNP_MSG_IN_STALL",
+	"THEx_L2_RD_MSG_OUT",
+	"THEx_L2_RD_MSG_OUT_STALL",
+	"THEx_L2_WR_MSG_OUT",
+	"THEx_L2_ANY_LOOKUP",
+	"THEx_L2_READ_LOOKUP",
+	"THEx_L2_WRITE_LOOKUP",
+	"THEx_L2_EXT_SNOOP_LOOKUP",
+	"THEx_L2_EXT_READ",
+	"THEx_L2_EXT_READ_NOSNP",
+	"THEx_L2_EXT_READ_UNIQUE",
+	"THEx_L2_EXT_READ_BEATS",
+	"THEx_L2_EXT_AR_STALL",
+	"THEx_L2_EXT_AR_CNT_Q1",
+	"THEx_L2_EXT_AR_CNT_Q2",
+	"THEx_L2_EXT_AR_CNT_Q3",
+	"THEx_L2_EXT_RRESP_0_127",
+	"THEx_L2_EXT_RRESP_128_191",
+	"THEx_L2_EXT_RRESP_192_255",
+	"THEx_L2_EXT_RRESP_256_319",
+	"THEx_L2_EXT_RRESP_320_383",
+	"THEx_L2_EXT_WRITE",
+	"THEx_L2_EXT_WRITE_NOSNP_FULL",
+	"THEx_L2_EXT_WRITE_NOSNP_PTL",
+	"THEx_L2_EXT_WRITE_SNP_FULL",
+	"THEx_L2_EXT_WRITE_SNP_PTL",
+	"THEx_L2_EXT_WRITE_BEATS",
+	"THEx_L2_EXT_W_STALL",
+	"THEx_L2_EXT_AW_CNT_Q1",
+	"THEx_L2_EXT_AW_CNT_Q2",
+	"THEx_L2_EXT_AW_CNT_Q3",
+	"THEx_L2_EXT_SNOOP",
+	"THEx_L2_EXT_SNOOP_STALL",
+	"THEx_L2_EXT_SNOOP_RESP_CLEAN",
+	"THEx_L2_EXT_SNOOP_RESP_DATA",
+	"THEx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
new file mode 100644
index 0000000..5ea0677
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+
+static const char * const hardware_counters_mali_tMIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MESSAGES_SENT",
+	"TMIx_MESSAGES_RECEIVED",
+	"TMIx_GPU_ACTIVE",
+	"TMIx_IRQ_ACTIVE",
+	"TMIx_JS0_JOBS",
+	"TMIx_JS0_TASKS",
+	"TMIx_JS0_ACTIVE",
+	"",
+	"TMIx_JS0_WAIT_READ",
+	"TMIx_JS0_WAIT_ISSUE",
+	"TMIx_JS0_WAIT_DEPEND",
+	"TMIx_JS0_WAIT_FINISH",
+	"TMIx_JS1_JOBS",
+	"TMIx_JS1_TASKS",
+	"TMIx_JS1_ACTIVE",
+	"",
+	"TMIx_JS1_WAIT_READ",
+	"TMIx_JS1_WAIT_ISSUE",
+	"TMIx_JS1_WAIT_DEPEND",
+	"TMIx_JS1_WAIT_FINISH",
+	"TMIx_JS2_JOBS",
+	"TMIx_JS2_TASKS",
+	"TMIx_JS2_ACTIVE",
+	"",
+	"TMIx_JS2_WAIT_READ",
+	"TMIx_JS2_WAIT_ISSUE",
+	"TMIx_JS2_WAIT_DEPEND",
+	"TMIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_TILER_ACTIVE",
+	"TMIx_JOBS_PROCESSED",
+	"TMIx_TRIANGLES",
+	"TMIx_LINES",
+	"TMIx_POINTS",
+	"TMIx_FRONT_FACING",
+	"TMIx_BACK_FACING",
+	"TMIx_PRIM_VISIBLE",
+	"TMIx_PRIM_CULLED",
+	"TMIx_PRIM_CLIPPED",
+	"TMIx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"TMIx_BUS_READ",
+	"",
+	"TMIx_BUS_WRITE",
+	"TMIx_LOADING_DESC",
+	"TMIx_IDVS_POS_SHAD_REQ",
+	"TMIx_IDVS_POS_SHAD_WAIT",
+	"TMIx_IDVS_POS_SHAD_STALL",
+	"TMIx_IDVS_POS_FIFO_FULL",
+	"TMIx_PREFETCH_STALL",
+	"TMIx_VCACHE_HIT",
+	"TMIx_VCACHE_MISS",
+	"TMIx_VCACHE_LINE_WAIT",
+	"TMIx_VFETCH_POS_READ_WAIT",
+	"TMIx_VFETCH_VERTEX_WAIT",
+	"TMIx_VFETCH_STALL",
+	"TMIx_PRIMASSY_STALL",
+	"TMIx_BBOX_GEN_STALL",
+	"TMIx_IDVS_VBU_HIT",
+	"TMIx_IDVS_VBU_MISS",
+	"TMIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TMIx_IDVS_VAR_SHAD_REQ",
+	"TMIx_IDVS_VAR_SHAD_STALL",
+	"TMIx_BINNER_STALL",
+	"TMIx_ITER_STALL",
+	"TMIx_COMPRESS_MISS",
+	"TMIx_COMPRESS_STALL",
+	"TMIx_PCACHE_HIT",
+	"TMIx_PCACHE_MISS",
+	"TMIx_PCACHE_MISS_STALL",
+	"TMIx_PCACHE_EVICT_STALL",
+	"TMIx_PMGR_PTR_WR_STALL",
+	"TMIx_PMGR_PTR_RD_STALL",
+	"TMIx_PMGR_CMD_WR_STALL",
+	"TMIx_WRBUF_ACTIVE",
+	"TMIx_WRBUF_HIT",
+	"TMIx_WRBUF_MISS",
+	"TMIx_WRBUF_NO_FREE_LINE_STALL",
+	"TMIx_WRBUF_NO_AXI_ID_STALL",
+	"TMIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TMIx_UTLB_TRANS",
+	"TMIx_UTLB_TRANS_HIT",
+	"TMIx_UTLB_TRANS_STALL",
+	"TMIx_UTLB_TRANS_MISS_DELAY",
+	"TMIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_FRAG_ACTIVE",
+	"TMIx_FRAG_PRIMITIVES",
+	"TMIx_FRAG_PRIM_RAST",
+	"TMIx_FRAG_FPK_ACTIVE",
+	"TMIx_FRAG_STARVING",
+	"TMIx_FRAG_WARPS",
+	"TMIx_FRAG_PARTIAL_WARPS",
+	"TMIx_FRAG_QUADS_RAST",
+	"TMIx_FRAG_QUADS_EZS_TEST",
+	"TMIx_FRAG_QUADS_EZS_UPDATE",
+	"TMIx_FRAG_QUADS_EZS_KILL",
+	"TMIx_FRAG_LZS_TEST",
+	"TMIx_FRAG_LZS_KILL",
+	"",
+	"TMIx_FRAG_PTILES",
+	"TMIx_FRAG_TRANS_ELIM",
+	"TMIx_QUAD_FPK_KILLER",
+	"",
+	"TMIx_COMPUTE_ACTIVE",
+	"TMIx_COMPUTE_TASKS",
+	"TMIx_COMPUTE_WARPS",
+	"TMIx_COMPUTE_STARVING",
+	"TMIx_EXEC_CORE_ACTIVE",
+	"TMIx_EXEC_ACTIVE",
+	"TMIx_EXEC_INSTR_COUNT",
+	"TMIx_EXEC_INSTR_DIVERGED",
+	"TMIx_EXEC_INSTR_STARVING",
+	"TMIx_ARITH_INSTR_SINGLE_FMA",
+	"TMIx_ARITH_INSTR_DOUBLE",
+	"TMIx_ARITH_INSTR_MSG",
+	"TMIx_ARITH_INSTR_MSG_ONLY",
+	"TMIx_TEX_INSTR",
+	"TMIx_TEX_INSTR_MIPMAP",
+	"TMIx_TEX_INSTR_COMPRESSED",
+	"TMIx_TEX_INSTR_3D",
+	"TMIx_TEX_INSTR_TRILINEAR",
+	"TMIx_TEX_COORD_ISSUE",
+	"TMIx_TEX_COORD_STALL",
+	"TMIx_TEX_STARVE_CACHE",
+	"TMIx_TEX_STARVE_FILTER",
+	"TMIx_LS_MEM_READ_FULL",
+	"TMIx_LS_MEM_READ_SHORT",
+	"TMIx_LS_MEM_WRITE_FULL",
+	"TMIx_LS_MEM_WRITE_SHORT",
+	"TMIx_LS_MEM_ATOMIC",
+	"TMIx_VARY_INSTR",
+	"TMIx_VARY_SLOT_32",
+	"TMIx_VARY_SLOT_16",
+	"TMIx_ATTR_INSTR",
+	"TMIx_ARITH_INSTR_FP_MUL",
+	"TMIx_BEATS_RD_FTC",
+	"TMIx_BEATS_RD_FTC_EXT",
+	"TMIx_BEATS_RD_LSC",
+	"TMIx_BEATS_RD_LSC_EXT",
+	"TMIx_BEATS_RD_TEX",
+	"TMIx_BEATS_RD_TEX_EXT",
+	"TMIx_BEATS_RD_OTHER",
+	"TMIx_BEATS_WR_LSC",
+	"TMIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_RD_MSG_IN",
+	"TMIx_L2_RD_MSG_IN_STALL",
+	"TMIx_L2_WR_MSG_IN",
+	"TMIx_L2_WR_MSG_IN_STALL",
+	"TMIx_L2_SNP_MSG_IN",
+	"TMIx_L2_SNP_MSG_IN_STALL",
+	"TMIx_L2_RD_MSG_OUT",
+	"TMIx_L2_RD_MSG_OUT_STALL",
+	"TMIx_L2_WR_MSG_OUT",
+	"TMIx_L2_ANY_LOOKUP",
+	"TMIx_L2_READ_LOOKUP",
+	"TMIx_L2_WRITE_LOOKUP",
+	"TMIx_L2_EXT_SNOOP_LOOKUP",
+	"TMIx_L2_EXT_READ",
+	"TMIx_L2_EXT_READ_NOSNP",
+	"TMIx_L2_EXT_READ_UNIQUE",
+	"TMIx_L2_EXT_READ_BEATS",
+	"TMIx_L2_EXT_AR_STALL",
+	"TMIx_L2_EXT_AR_CNT_Q1",
+	"TMIx_L2_EXT_AR_CNT_Q2",
+	"TMIx_L2_EXT_AR_CNT_Q3",
+	"TMIx_L2_EXT_RRESP_0_127",
+	"TMIx_L2_EXT_RRESP_128_191",
+	"TMIx_L2_EXT_RRESP_192_255",
+	"TMIx_L2_EXT_RRESP_256_319",
+	"TMIx_L2_EXT_RRESP_320_383",
+	"TMIx_L2_EXT_WRITE",
+	"TMIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TMIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TMIx_L2_EXT_WRITE_SNP_FULL",
+	"TMIx_L2_EXT_WRITE_SNP_PTL",
+	"TMIx_L2_EXT_WRITE_BEATS",
+	"TMIx_L2_EXT_W_STALL",
+	"TMIx_L2_EXT_AW_CNT_Q1",
+	"TMIx_L2_EXT_AW_CNT_Q2",
+	"TMIx_L2_EXT_AW_CNT_Q3",
+	"TMIx_L2_EXT_SNOOP",
+	"TMIx_L2_EXT_SNOOP_STALL",
+	"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TMIx_L2_EXT_SNOOP_RESP_DATA",
+	"TMIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
new file mode 100644
index 0000000..7ff7009
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+
+static const char * const hardware_counters_mali_tSIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MESSAGES_SENT",
+	"TSIx_MESSAGES_RECEIVED",
+	"TSIx_GPU_ACTIVE",
+	"TSIx_IRQ_ACTIVE",
+	"TSIx_JS0_JOBS",
+	"TSIx_JS0_TASKS",
+	"TSIx_JS0_ACTIVE",
+	"",
+	"TSIx_JS0_WAIT_READ",
+	"TSIx_JS0_WAIT_ISSUE",
+	"TSIx_JS0_WAIT_DEPEND",
+	"TSIx_JS0_WAIT_FINISH",
+	"TSIx_JS1_JOBS",
+	"TSIx_JS1_TASKS",
+	"TSIx_JS1_ACTIVE",
+	"",
+	"TSIx_JS1_WAIT_READ",
+	"TSIx_JS1_WAIT_ISSUE",
+	"TSIx_JS1_WAIT_DEPEND",
+	"TSIx_JS1_WAIT_FINISH",
+	"TSIx_JS2_JOBS",
+	"TSIx_JS2_TASKS",
+	"TSIx_JS2_ACTIVE",
+	"",
+	"TSIx_JS2_WAIT_READ",
+	"TSIx_JS2_WAIT_ISSUE",
+	"TSIx_JS2_WAIT_DEPEND",
+	"TSIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_TILER_ACTIVE",
+	"TSIx_JOBS_PROCESSED",
+	"TSIx_TRIANGLES",
+	"TSIx_LINES",
+	"TSIx_POINTS",
+	"TSIx_FRONT_FACING",
+	"TSIx_BACK_FACING",
+	"TSIx_PRIM_VISIBLE",
+	"TSIx_PRIM_CULLED",
+	"TSIx_PRIM_CLIPPED",
+	"TSIx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"TSIx_BUS_READ",
+	"",
+	"TSIx_BUS_WRITE",
+	"TSIx_LOADING_DESC",
+	"TSIx_IDVS_POS_SHAD_REQ",
+	"TSIx_IDVS_POS_SHAD_WAIT",
+	"TSIx_IDVS_POS_SHAD_STALL",
+	"TSIx_IDVS_POS_FIFO_FULL",
+	"TSIx_PREFETCH_STALL",
+	"TSIx_VCACHE_HIT",
+	"TSIx_VCACHE_MISS",
+	"TSIx_VCACHE_LINE_WAIT",
+	"TSIx_VFETCH_POS_READ_WAIT",
+	"TSIx_VFETCH_VERTEX_WAIT",
+	"TSIx_VFETCH_STALL",
+	"TSIx_PRIMASSY_STALL",
+	"TSIx_BBOX_GEN_STALL",
+	"TSIx_IDVS_VBU_HIT",
+	"TSIx_IDVS_VBU_MISS",
+	"TSIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TSIx_IDVS_VAR_SHAD_REQ",
+	"TSIx_IDVS_VAR_SHAD_STALL",
+	"TSIx_BINNER_STALL",
+	"TSIx_ITER_STALL",
+	"TSIx_COMPRESS_MISS",
+	"TSIx_COMPRESS_STALL",
+	"TSIx_PCACHE_HIT",
+	"TSIx_PCACHE_MISS",
+	"TSIx_PCACHE_MISS_STALL",
+	"TSIx_PCACHE_EVICT_STALL",
+	"TSIx_PMGR_PTR_WR_STALL",
+	"TSIx_PMGR_PTR_RD_STALL",
+	"TSIx_PMGR_CMD_WR_STALL",
+	"TSIx_WRBUF_ACTIVE",
+	"TSIx_WRBUF_HIT",
+	"TSIx_WRBUF_MISS",
+	"TSIx_WRBUF_NO_FREE_LINE_STALL",
+	"TSIx_WRBUF_NO_AXI_ID_STALL",
+	"TSIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TSIx_UTLB_TRANS",
+	"TSIx_UTLB_TRANS_HIT",
+	"TSIx_UTLB_TRANS_STALL",
+	"TSIx_UTLB_TRANS_MISS_DELAY",
+	"TSIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_FRAG_ACTIVE",
+	"TSIx_FRAG_PRIMITIVES",
+	"TSIx_FRAG_PRIM_RAST",
+	"TSIx_FRAG_FPK_ACTIVE",
+	"TSIx_FRAG_STARVING",
+	"TSIx_FRAG_WARPS",
+	"TSIx_FRAG_PARTIAL_WARPS",
+	"TSIx_FRAG_QUADS_RAST",
+	"TSIx_FRAG_QUADS_EZS_TEST",
+	"TSIx_FRAG_QUADS_EZS_UPDATE",
+	"TSIx_FRAG_QUADS_EZS_KILL",
+	"TSIx_FRAG_LZS_TEST",
+	"TSIx_FRAG_LZS_KILL",
+	"",
+	"TSIx_FRAG_PTILES",
+	"TSIx_FRAG_TRANS_ELIM",
+	"TSIx_QUAD_FPK_KILLER",
+	"",
+	"TSIx_COMPUTE_ACTIVE",
+	"TSIx_COMPUTE_TASKS",
+	"TSIx_COMPUTE_WARPS",
+	"TSIx_COMPUTE_STARVING",
+	"TSIx_EXEC_CORE_ACTIVE",
+	"TSIx_EXEC_ACTIVE",
+	"TSIx_EXEC_INSTR_COUNT",
+	"TSIx_EXEC_INSTR_DIVERGED",
+	"TSIx_EXEC_INSTR_STARVING",
+	"TSIx_ARITH_INSTR_SINGLE_FMA",
+	"TSIx_ARITH_INSTR_DOUBLE",
+	"TSIx_ARITH_INSTR_MSG",
+	"TSIx_ARITH_INSTR_MSG_ONLY",
+	"TSIx_TEX_INSTR",
+	"TSIx_TEX_INSTR_MIPMAP",
+	"TSIx_TEX_INSTR_COMPRESSED",
+	"TSIx_TEX_INSTR_3D",
+	"TSIx_TEX_INSTR_TRILINEAR",
+	"TSIx_TEX_COORD_ISSUE",
+	"TSIx_TEX_COORD_STALL",
+	"TSIx_TEX_STARVE_CACHE",
+	"TSIx_TEX_STARVE_FILTER",
+	"TSIx_LS_MEM_READ_FULL",
+	"TSIx_LS_MEM_READ_SHORT",
+	"TSIx_LS_MEM_WRITE_FULL",
+	"TSIx_LS_MEM_WRITE_SHORT",
+	"TSIx_LS_MEM_ATOMIC",
+	"TSIx_VARY_INSTR",
+	"TSIx_VARY_SLOT_32",
+	"TSIx_VARY_SLOT_16",
+	"TSIx_ATTR_INSTR",
+	"TSIx_ARITH_INSTR_FP_MUL",
+	"TSIx_BEATS_RD_FTC",
+	"TSIx_BEATS_RD_FTC_EXT",
+	"TSIx_BEATS_RD_LSC",
+	"TSIx_BEATS_RD_LSC_EXT",
+	"TSIx_BEATS_RD_TEX",
+	"TSIx_BEATS_RD_TEX_EXT",
+	"TSIx_BEATS_RD_OTHER",
+	"TSIx_BEATS_WR_LSC",
+	"TSIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TSIx_L2_RD_MSG_IN",
+	"TSIx_L2_RD_MSG_IN_STALL",
+	"TSIx_L2_WR_MSG_IN",
+	"TSIx_L2_WR_MSG_IN_STALL",
+	"TSIx_L2_SNP_MSG_IN",
+	"TSIx_L2_SNP_MSG_IN_STALL",
+	"TSIx_L2_RD_MSG_OUT",
+	"TSIx_L2_RD_MSG_OUT_STALL",
+	"TSIx_L2_WR_MSG_OUT",
+	"TSIx_L2_ANY_LOOKUP",
+	"TSIx_L2_READ_LOOKUP",
+	"TSIx_L2_WRITE_LOOKUP",
+	"TSIx_L2_EXT_SNOOP_LOOKUP",
+	"TSIx_L2_EXT_READ",
+	"TSIx_L2_EXT_READ_NOSNP",
+	"TSIx_L2_EXT_READ_UNIQUE",
+	"TSIx_L2_EXT_READ_BEATS",
+	"TSIx_L2_EXT_AR_STALL",
+	"TSIx_L2_EXT_AR_CNT_Q1",
+	"TSIx_L2_EXT_AR_CNT_Q2",
+	"TSIx_L2_EXT_AR_CNT_Q3",
+	"TSIx_L2_EXT_RRESP_0_127",
+	"TSIx_L2_EXT_RRESP_128_191",
+	"TSIx_L2_EXT_RRESP_192_255",
+	"TSIx_L2_EXT_RRESP_256_319",
+	"TSIx_L2_EXT_RRESP_320_383",
+	"TSIx_L2_EXT_WRITE",
+	"TSIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TSIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TSIx_L2_EXT_WRITE_SNP_FULL",
+	"TSIx_L2_EXT_WRITE_SNP_PTL",
+	"TSIx_L2_EXT_WRITE_BEATS",
+	"TSIx_L2_EXT_W_STALL",
+	"TSIx_L2_EXT_AW_CNT_Q1",
+	"TSIx_L2_EXT_AW_CNT_Q2",
+	"TSIx_L2_EXT_AW_CNT_Q3",
+	"TSIx_L2_EXT_SNOOP",
+	"TSIx_L2_EXT_SNOOP_STALL",
+	"TSIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TSIx_L2_EXT_SNOOP_RESP_DATA",
+	"TSIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100644
index 0000000..5c2367f
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,114 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
+#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7, 0)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100644
index 0000000..6df0a1c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100644
index 0000000..7045693
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100644
index 0000000..ceba27c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,329 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
+{
+	kbase_gpu_clk_speed_func get_gpu_speed_mhz;
+	u32 gpu_speed_mhz;
+	int rc = 1;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kbase_props);
+
+	/* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
+	 * If that function fails, or the function is not provided by the system integrator, we report the maximum
+	 * GPU speed as specified by GPU_FREQ_KHZ_MAX.
+	 */
+	get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
+	if (get_gpu_speed_mhz != NULL) {
+		rc = get_gpu_speed_mhz(&gpu_speed_mhz);
+#ifdef CONFIG_MALI_DEBUG
+		/* Issue a warning message when the reported GPU speed falls outside the min/max range */
+		if (rc == 0) {
+			u32 gpu_speed_khz = gpu_speed_mhz * 1000;
+
+			if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
+					gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
+				dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
+						(unsigned long)gpu_speed_khz,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+		}
+#endif				/* CONFIG_MALI_DEBUG */
+	}
+	if (kctx->kbdev->clock) {
+		gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
+		rc = 0;
+	}
+	if (rc != 0)
+		gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
+
+	kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
+
+	memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
+
+	/* Before API 8.2 they expect L3 cache info here, which was always 0 */
+	if (kctx->api_version < KBASE_API_VERSION(8, 2))
+		kbase_props->props.raw_props.suspend_size = 0;
+
+	return 0;
+}
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present =
+		((u64) regdump.shader_present_hi << 32) +
+		regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present =
+		((u64) regdump.tiler_present_hi << 32) +
+		regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present =
+		((u64) regdump.l2_present_hi << 32) +
+		regdump.l2_present_lo;
+#ifdef CONFIG_MALI_CORESTACK
+	gpu_props->raw_props.stack_present =
+		((u64) regdump.stack_present_hi << 32) +
+		regdump.stack_present_lo;
+#else /* CONFIG_MALI_CORESTACK */
+	gpu_props->raw_props.stack_present = 0;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 * Additionally, add non-coherent mode, as this is always supported.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features |
+		COHERENCY_FEATURE_BIT(COHERENCY_NONE);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100644
index 0000000..f3c95cc
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * @brief Provide GPU properties to userside through UKU call.
+ *
+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
+ *
+ * @param kctx		The struct kbase_context structure
+ * @param kbase_props	A copy of the struct kbase_uk_gpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100644
index 0000000..920cf77
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 stack_present_lo;
+	u32 stack_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/**
+	 * Implementation specific irq throttle value (us), should be adjusted during integration.
+	 */
+	int irq_throttle_time_us;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100644
index 0000000..51a31fb
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,281 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			features = base_hw_features_tHEx;
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			features = base_hw_features_tSIx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id) {
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1):
+				issues = base_hw_issues_tMIx_r0p0_05dev0;
+				break;
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2):
+				issues = base_hw_issues_tMIx_r0p0;
+				break;
+			default:
+				if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_TMIX) {
+					issues = base_hw_issues_tMIx_r0p0;
+				} else if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_THEX) {
+					issues = base_hw_issues_tHEx_r0p0;
+				} else if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_TSIX) {
+					issues = base_hw_issues_tSIx_r0p0;
+				} else {
+					dev_err(kbdev->dev,
+						"Unknown GPU ID %x", gpu_id);
+					return -EINVAL;
+				}
+			}
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			case GPU_ID2_PRODUCT_THEX:
+				issues = base_hw_issues_model_tHEx;
+				break;
+			case GPU_ID2_PRODUCT_TSIX:
+				issues = base_hw_issues_model_tSIx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT);
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100644
index 0000000..fce7d29
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * @brief Set the HW issues mask depending on the GPU ID
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100644
index 0000000..b09be99
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100644
index 0000000..0acf297
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/* The hwaccess_lock (a spinlock) must be held when accessing this structure */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx;
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100644
index 0000000..cf8a813
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100644
index 0000000..5de2b75
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @setup:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100644
index 0000000..821b68a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,390 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_backend_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_find_free_address_space() - Find a free address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * If no address spaces are currently free, then this function can evict an
+ * idle context from the runpool, freeing up the address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_gpu_release_free_address_space()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_free_address_space() - Release an address space.
+ * @kbdev:	Device pointer
+ * @as_nr:	Address space to release
+ *
+ * The address space must have been returned by
+ * kbase_gpu_find_free_address_space().
+ */
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+						int as_nr);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned.
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_cacheclean - Perform a cache clean if the given atom requires
+ *                            one
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the failed atom
+ *
+ * On some GPUs, the GPU cache must be cleaned following a failed atom. This
+ * function performs a clean if it is required by @katom.
+ */
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom);
+
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ * @affinity:      Affinity of atom
+ * @coreref_state: Coreref state of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ *				  @js
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg updating timeouts of
+ * currently running atoms).
+ */
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs and don't emit messages into
+ * the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ */
+void kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset.
+ */
+bool kbase_reset_gpu_active(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100644
index 0000000..bae7c0d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+int set_policy_by_name(struct kbase_device *kbdev, const char *name);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100644
index 0000000..89d26ea
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100644
index 0000000..cf7bf1b
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100644
index 0000000..596b047
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1897 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <mali_kbase_uku.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tlstream.h>
+
+#include "mali_kbase_dma_fence.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
+{
+#ifdef CONFIG_COMPAT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return compat_ptr(p->compat_value);
+#endif
+	return p->value;
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
+#ifdef CONFIG_KDS
+
+/* Add the katom to the kds waiting list.
+ * Atoms must be added to the waiting list after a successful call to kds_async_waitall.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	list_add_tail(&katom->node, &kctx->waiting_kds_resource);
+}
+
+/* Remove the katom from the kds waiting list.
+ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync.
+ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	list_del(&katom->node);
+}
+
+static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = (struct kbase_jd_atom *)callback_parameter;
+	KBASE_DEBUG_ASSERT(katom);
+
+	ctx = &katom->kctx->jctx;
+
+	/* If KDS resource has already been satisfied (e.g. due to zapping)
+	 * do nothing.
+	 */
+	mutex_lock(&ctx->lock);
+	if (!katom->kds_dep_satisfied) {
+		katom->kds_dep_satisfied = true;
+		kbase_jd_dep_clear_locked(katom);
+	}
+	mutex_unlock(&ctx->lock);
+}
+
+static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 *  there is a race between job completion and cancellation */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+#endif				/* CONFIG_KDS */
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_KDS
+	if (katom->kds_rset) {
+		struct kbase_jd_context *jctx = &katom->kctx->jctx;
+
+		/*
+		 * As the atom is no longer waiting, remove it from
+		 * the waiting list.
+		 */
+
+		mutex_lock(&jctx->lock);
+		kbase_jd_kds_waiters_remove(katom);
+		mutex_unlock(&jctx->lock);
+
+		/* Release the kds resource or cancel if zapping */
+		kds_resource_set_release_sync(&katom->kds_rset);
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_KDS
+	/* Prevent the KDS resource from triggering the atom in case of zapping */
+	if (katom->kds_rset)
+		katom->kds_dep_satisfied = true;
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_KDS
+	u32 kds_res_count = 0;
+	struct kds_resource **kds_resources = NULL;
+	unsigned long *kds_access_bitmap = NULL;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.dma_fence_resv_count = 0,
+	};
+#ifdef CONFIG_SYNC
+	/*
+	 * When both dma-buf fence and Android native sync is enabled, we
+	 * disable dma-buf fence for contexts that are using Android native
+	 * fences.
+	 */
+	const bool implicit_sync = !kbase_ctx_flag(katom->kctx,
+						   KCTX_NO_IMPLICIT_SYNC);
+#else /* CONFIG_SYNC */
+	const bool implicit_sync = true;
+#endif /* CONFIG_SYNC */
+#endif /* CONFIG_MALI_DMA_FENCE */
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, &user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+#ifdef CONFIG_KDS
+	/* assume we have to wait for all */
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (!kds_resources) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres),
+				    sizeof(unsigned long),
+				    GFP_KERNEL);
+	if (!kds_access_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		info.resv_objs = kmalloc_array(katom->nr_extres,
+					sizeof(struct reservation_object *),
+					GFP_KERNEL);
+		if (!info.resv_objs) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+
+		info.dma_fence_excl_bitmap =
+				kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					sizeof(unsigned long), GFP_KERNEL);
+		if (!info.dma_fence_excl_bitmap) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
+
+		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm
+#ifdef CONFIG_KDS
+				, &kds_res_count, kds_resources,
+				kds_access_bitmap, exclusive
+#endif
+				);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (implicit_sync &&
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct reservation_object *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock before we call into kds */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_KDS
+	if (kds_res_count) {
+		int wait_failed;
+
+		/* We have resources to wait for with kds */
+		katom->kds_dep_satisfied = false;
+
+		wait_failed = kds_async_waitall(&katom->kds_rset,
+				&katom->kctx->jctx.kds_cb, katom, NULL,
+				kds_res_count, kds_access_bitmap,
+				kds_resources);
+
+		if (wait_failed)
+			goto failed_kds_setup;
+		else
+			kbase_jd_kds_waiters_add(katom);
+	} else {
+		/* Nothing to wait for, so kds dep met */
+		katom->kds_dep_satisfied = true;
+	}
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		if (info.dma_fence_resv_count) {
+			int ret;
+
+			ret = kbase_dma_fence_wait(katom, &info);
+			if (ret < 0)
+				goto failed_dma_fence_setup;
+		}
+
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
+#ifdef CONFIG_KDS
+	/* If we are here, dma_fence setup failed but KDS didn't.
+	 * Revert KDS setup if any.
+	 */
+	if (kds_res_count) {
+		mutex_unlock(&katom->kctx->jctx.lock);
+		kds_resource_set_release_sync(&katom->kds_rset);
+		mutex_lock(&katom->kctx->jctx.lock);
+
+		kbase_jd_kds_waiters_remove(katom);
+		katom->kds_dep_satisfied = true;
+	}
+#endif /* CONFIG_KDS */
+#endif /* CONFIG_MALI_DMA_FENCE */
+#ifdef CONFIG_KDS
+failed_kds_setup:
+#endif
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_KDS
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d, bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		struct kbase_jd_atom *other_dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_KDS
+			if (!dep_atom->kds_dep_satisfied) {
+				/* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
+				 * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up
+				 */
+				dep_atom->kds_dep_satisfied = true;
+			}
+#endif
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		other_dep_atom = (struct kbase_jd_atom *)
+			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
+
+		if (!dep_atom->in_jd_list && (!other_dep_atom ||
+				(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
+				!dep_atom->will_fail_event_code &&
+				!other_dep_atom->will_fail_event_code))) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = atomic_read(&dep_atom->dma_fence.dep_count);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+#ifdef CONFIG_KDS
+			dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied;
+#endif
+
+			if (dep_satisfied) {
+				dep_atom->in_jd_list = true;
+				list_add_tail(&dep_atom->jd_item, out_list);
+			}
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/**
+ * is_dep_valid - Validate that a dependency is valid for early dependency
+ *                submission
+ * @katom: Dependency atom to validate
+ *
+ * A dependency is valid if any of the following are true :
+ * - It does not exist (a non-existent dependency does not block submission)
+ * - It is in the job scheduler
+ * - It has completed, does not have a failure event code, and has not been
+ *   marked to fail in the future
+ *
+ * Return: true if valid, false otherwise
+ */
+static bool is_dep_valid(struct kbase_jd_atom *katom)
+{
+	/* If there's no dependency then this is 'valid' from the perspective of
+	 * early dependency submission */
+	if (!katom)
+		return true;
+
+	/* Dependency must have reached the job scheduler */
+	if (katom->status < KBASE_JD_ATOM_STATE_IN_JS)
+		return false;
+
+	/* If dependency has completed and has failed or will fail then it is
+	 * not valid */
+	if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+			(katom->event_code != BASE_JD_EVENT_DONE ||
+			katom->will_fail_event_code))
+		return false;
+
+	return true;
+}
+
+static void jd_try_submitting_deps(struct list_head *out_list,
+		struct kbase_jd_atom *node)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *pos;
+
+		list_for_each(pos, &node->dep_head[i]) {
+			struct kbase_jd_atom *dep_atom = list_entry(pos,
+					struct kbase_jd_atom, dep_item[i]);
+
+			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
+				/*Check if atom deps look sane*/
+				bool dep0_valid = is_dep_valid(
+						dep_atom->dep[0].atom);
+				bool dep1_valid = is_dep_valid(
+						dep_atom->dep[1].atom);
+				bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+				int dep_count;
+
+				dep_count = atomic_read(
+						&dep_atom->dma_fence.dep_count);
+				if (likely(dep_count == -1)) {
+					dep_satisfied = true;
+				} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+					dep_satisfied = false;
+				}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#ifdef CONFIG_KDS
+				dep_satisfied = dep_satisfied &&
+						dep_atom->kds_dep_satisfied;
+#endif
+
+				if (dep0_valid && dep1_valid && dep_satisfied) {
+					dep_atom->in_jd_list = true;
+					list_add(&dep_atom->jd_item, out_list);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->jd_item, &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item);
+		list_del(completed_jobs.prev);
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+					kbase_ctx_flag(kctx, KCTX_DYING));
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(runnable_jobs.next);
+			node->in_jd_list = false;
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE) ==
+					BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait soft job
+					 * then remove it from the list of sync
+					 * waiters.
+					 */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						kbasep_remove_waiting_soft_job(node);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				list_add_tail(&node->jd_item, &completed_jobs);
+			} else if (node->status == KBASE_JD_ATOM_STATE_IN_JS &&
+					!node->will_fail_event_code) {
+				/* Node successfully submitted, try submitting
+				 * dependencies as they may now be representable
+				 * in JS */
+				jd_try_submitting_deps(&runnable_jobs, node);
+			}
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->jd_item, completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+	katom->start_timestamp.tv64 = 0;
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = user_atom->core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->pre_dep = NULL;
+	katom->post_dep = NULL;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
+
+	/* Implicitly sets katom->protected_state.enter as well. */
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+	katom->age = kctx->age_count++;
+
+	INIT_LIST_HEAD(&katom->jd_item);
+#ifdef CONFIG_KDS
+	/* Start by assuming that the KDS dependencies are satisfied,
+	 * kbase_jd_pre_external_resources will correct this if there are dependencies */
+	katom->kds_dep_satisfied = true;
+	katom->kds_rset = NULL;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	atomic_set(&katom->dma_fence.dep_count, -1);
+#endif
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				KBASE_TLSTREAM_TL_NEW_ATOM(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				KBASE_TLSTREAM_TL_RET_ATOM_CTX(
+						katom, kctx);
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+						TL_ATOM_STATE_IDLE);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom is going through soft replay or
+			 * will be sent back to user space. Do not record any
+			 * dependencies. */
+			KBASE_TLSTREAM_TL_NEW_ATOM(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+			KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+					TL_ATOM_STATE_IDLE);
+
+			if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	/* Create a new atom recording all dependencies it was set up with. */
+	KBASE_TLSTREAM_TL_NEW_ATOM(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority);
+	KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+	for (i = 0; i < 2; i++)
+		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+					&katom->dep[i])) {
+			KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(
+					(void *)kbase_jd_katom_dep_atom(
+						&katom->dep[i]),
+					(void *)katom);
+		} else if (BASE_JD_DEP_TYPE_INVALID !=
+				user_atom->pre_dep[i].dependency_type) {
+			/* Resolved dependency. */
+			int dep_atom_number =
+				user_atom->pre_dep[i].atom_id;
+			struct kbase_jd_atom *dep_atom =
+				&jctx->atoms[dep_atom_number];
+
+			KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(
+					(void *)dep_atom,
+					(void *)katom);
+		}
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue((u32)kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+#ifdef CONFIG_KDS
+	if (!katom->kds_dep_satisfied) {
+		/* Queue atom due to KDS dependency */
+		ret = false;
+		goto out;
+	}
+#endif				/* CONFIG_KDS */
+
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atomic_read(&katom->dma_fence.dep_count) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom, NULL);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom)
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	void __user *user_addr;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the jobs
+	 * are reported by immediately falling them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	if ((uk6_atom && submit_data->stride !=
+			sizeof(struct base_jd_atom_v2_uk6)) ||
+			submit_data->stride != sizeof(base_jd_atom_v2)) {
+#else
+	if (submit_data->stride != sizeof(base_jd_atom_v2)) {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	user_addr = get_compat_pointer(kctx, &submit_data->addr);
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < submit_data->nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		if (uk6_atom) {
+			struct base_jd_atom_v2_uk6 user_atom_v6;
+			base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA};
+
+			if (copy_from_user(&user_atom_v6, user_addr,
+					sizeof(user_atom_v6))) {
+				err = -EINVAL;
+				KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+					atomic_sub_return(
+					submit_data->nr_atoms - i,
+					&kctx->timeline.jd_atoms_in_flight));
+				break;
+			}
+			/* Convert from UK6 atom format to UK7 format */
+			user_atom.jc = user_atom_v6.jc;
+			user_atom.udata = user_atom_v6.udata;
+			user_atom.extres_list = user_atom_v6.extres_list;
+			user_atom.nr_extres = user_atom_v6.nr_extres;
+			user_atom.core_req = (u32)(user_atom_v6.core_req & 0x7fff);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[0])
+				dep_types[0] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[0],
+					user_atom_v6.pre_dep[0],
+					dep_types[0]);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[1])
+				dep_types[1] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[1],
+					user_atom_v6.pre_dep[1],
+					dep_types[1]);
+
+			user_atom.atom_number = user_atom_v6.atom_number;
+			user_atom.prio = user_atom_v6.prio;
+			user_atom.device_nr = user_atom_v6.device_nr;
+		} else {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		}
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+		if (KBASE_API_VERSION(10, 3) > kctx->api_version)
+			user_atom.core_req = (u32)(user_atom.compat_core_req
+					      & 0x7fff);
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+		user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		context_idle = false;
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * KCTX_ACTIVE will have been set after we marked it as
+		 * inactive, and another pm reference will have been taken, so
+		 * drop our reference. But do not call kbase_jm_idle_ctx(), as
+		 * the context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but KCTX_ACTIVE will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then KCTX_ACTIVE will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * hwaccess_lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
+		    !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * hwaccess_lock. This is not performed if
+			 * KCTX_ACTIVE is set as in that case another pm
+			 * reference has been taken and a fast-start would be
+			 * valid.
+			 */
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE))
+				kbase_jm_idle_ctx(kbdev, kctx);
+			context_idle = true;
+		} else {
+			kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+
+	if (context_idle)
+		kbase_pm_context_idle(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbdev->hwaccess_lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	del_timer_sync(&kctx->soft_job_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, queue);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_KDS
+
+	/* For each job waiting on a kds resource, cancel the wait and force the job to
+	 * complete early, this is done so that we don't leave jobs outstanding waiting
+	 * on kds resources which may never be released when contexts are zapped, resulting
+	 * in a hang.
+	 *
+	 * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held,
+	 * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job.
+	 */
+
+	list_for_each(entry, &kctx->waiting_kds_resource) {
+		katom = list_entry(entry, struct kbase_jd_atom, node);
+
+		kbase_cancel_kds_wait_job(katom);
+	}
+#endif
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+#ifdef CONFIG_KDS
+	int err;
+#endif				/* CONFIG_KDS */
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		kctx->jctx.atoms[i].dma_fence.context = fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+#ifdef CONFIG_KDS
+	err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear);
+	if (0 != err) {
+		mali_err = -EINVAL;
+		goto out2;
+	}
+#endif				/* CONFIG_KDS */
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+#ifdef CONFIG_KDS
+ out2:
+	destroy_workqueue(kctx->jctx.job_done_wq);
+#endif				/* CONFIG_KDS */
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+#ifdef CONFIG_KDS
+	kds_callback_term(&kctx->jctx.kds_cb);
+#endif				/* CONFIG_KDS */
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100644
index 0000000..47ea426
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,236 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/seq_file.h>
+
+#include <mali_kbase.h>
+
+#include <mali_kbase_jd_debugfs.h>
+#include <mali_kbase_dma_fence.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct kbase_jd_debugfs_depinfo {
+	u8 id;
+	char type;
+};
+
+#ifdef CONFIG_SYNC
+#include <mali_kbase_sync.h>
+#include "sync.h"
+#endif /* CONFIG_SYNC */
+
+
+static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
+					struct seq_file *sfile)
+{
+#ifdef CONFIG_SYNC
+	struct sync_fence *fence = atom->fence;
+	int status;
+
+	switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		if (!fence)
+			break;
+
+		status = kbase_fence_get_status(fence);
+
+		seq_printf(sfile, "Sa([%p]%d) ",
+			 fence, status);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		if (!fence)
+			break;
+
+		status = kbase_fence_get_status(fence);
+
+		seq_printf(sfile, "Wa([%p]%d) ",
+			 fence, status);
+		break;
+	default:
+		break;
+	}
+#endif /* CONFIG_SYNC */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		struct kbase_dma_fence_cb *cb;
+
+		if (atom->dma_fence.fence) {
+			struct fence *fence = atom->dma_fence.fence;
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Sd(%u#%u: %s) ",
+#else
+					"Sd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+
+		list_for_each_entry(cb, &atom->dma_fence.callbacks,
+				    node) {
+			struct fence *fence = cb->fence;
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Wd(%u#%u: %s) ",
+#else
+					"Wd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+}
+
+static void kbasep_jd_debugfs_atom_deps(
+		struct kbase_jd_debugfs_depinfo *deps,
+		struct kbase_jd_atom *atom)
+{
+	struct kbase_context *kctx = atom->kctx;
+	int i;
+
+	for (i = 0; i < 2; i++)	{
+		deps[i].id = (unsigned)(atom->dep[i].atom ?
+				kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0);
+
+		switch (atom->dep[i].dep_type) {
+		case BASE_JD_DEP_TYPE_INVALID:
+			deps[i].type = ' ';
+			break;
+		case BASE_JD_DEP_TYPE_DATA:
+			deps[i].type = 'D';
+			break;
+		case BASE_JD_DEP_TYPE_ORDER:
+			deps[i].type = '>';
+			break;
+		default:
+			deps[i].type = '?';
+			break;
+		}
+	}
+}
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, " ID, Core req, St, CR,   Predeps,           Start time, Additional info...\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+		struct kbase_jd_debugfs_depinfo deps[2];
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		kbasep_jd_debugfs_atom_deps(deps, atom);
+
+		seq_printf(sfile,
+				"%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ",
+				i, atom->core_req, atom->status,
+				atom->coreref_state,
+				deps[0].type, deps[0].id,
+				deps[1].type, deps[1].id,
+				start_timestamp);
+
+
+		kbase_jd_debugfs_fence_info(atom, sfile);
+
+		seq_puts(sfile, "\n");
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100644
index 0000000..0935f1d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+
+#define MALI_JD_DEBUGFS_VERSION 2
+
+/**
+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100644
index 0000000..0c5c6a6
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx;
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->hwaccess.active_kctx == kctx)
+		kbdev->hwaccess.active_kctx = NULL;
+}
+
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		return kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+		return NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100644
index 0000000..a74ee24
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the hwaccess_lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+			struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100644
index 0000000..3a675a1
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,2937 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return refcnt;
+}
+
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the hwaccess_lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		int new_refcnt;
+
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL);
+
+		new_refcnt = ++(js_per_as_data->as_busy_refcount);
+
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, new_refcnt);
+		result = true;
+	}
+
+	return result;
+}
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	return RB_EMPTY_ROOT(&rb->runnable_tree);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a queue and invoke @callback for each entry in the queue, and
+ * remove the entry from the queue.
+ *
+ * If entries are added to the queue while this is running those entries may, or
+ * may not be covered. To ensure that all entries in the buffer have been
+ * enumerated when this function returns jsctx->lock must be held when calling
+ * this function.
+ *
+ * The HW access lock must always be held when calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
+		struct rb_node *node = rb_first(&queue->runnable_tree);
+		struct kbase_jd_atom *entry = rb_entry(node,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		rb_erase(node, &queue->runnable_tree);
+		callback(kctx->kbdev, entry);
+	}
+
+	while (!list_empty(&queue->x_dep_head)) {
+		struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
+				struct kbase_jd_atom, queue);
+
+		list_del(queue->x_dep_head.next);
+
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	struct rb_node *node;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	node = rb_first(&rb->runnable_tree);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
+}
+
+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
+
+static void
+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (*new) {
+		struct kbase_jd_atom *entry = container_of(*new,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		parent = *new;
+		if (LESS_THAN_WRAP(katom->age, entry->age))
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&katom->runnable_tree_node, parent, new);
+	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_tree_add(kctx, katom);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int i;
+	u16 as_present;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* All ASs initially free */
+	jsdd->as_free = as_present;
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
+		atomic_read(&jsdd->soft_job_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	/* setup the number of irq throttle cycles base on given time */
+	{
+		int time_us = kbdev->gpu_props.irq_throttle_time_us;
+		int cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev,
+				time_us);
+
+		atomic_set(&kbdev->irq_throttle_cycles, cycles);
+	}
+
+	/* Clear the AS data, including setting NULL pointers */
+	memset(&jsdd->runpool_irq.per_as_data[0], 0,
+			sizeof(jsdd->runpool_irq.per_as_data));
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&kbdev->hwaccess_lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
+		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+	}
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	/* The caller must de-register all contexts before calling this
+	 */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+	KBASE_DEBUG_ASSERT(memcmp(
+	        js_devdata->runpool_irq.ctx_attr_ref_count,
+	        zero_ctx_attr_ref_count,
+	        sizeof(zero_ctx_attr_ref_count)) == 0);
+	CSTD_UNUSED(zero_ctx_attr_ref_count);
+}
+
+int kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+	kbase_ctx_flag_clear(kctx, KCTX_DYING);
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
+			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int js;
+	bool update_ctx_count = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* The caller must de-register all jobs before calling this */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_nolock - Variant of
+ *                                         kbase_jd_ctx_list_add_pullable()
+ *                                         where the caller must hold
+ *                                         hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
+ *                                              kbase_js_ctx_list_add_pullable_head()
+ *                                              where the caller must hold
+ *                                              hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head_nolock(
+		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the
+ *                                           per-slot unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+				&kbdev->js_data.ctx_list_unpullable[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable
+ *                                   or unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
+ *                                     where the caller must hold
+ *                                     hwaccess_lock
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
+		return NULL;
+
+	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
+					struct kbase_context,
+					jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+		struct kbase_device *kbdev, int js)
+{
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return false;
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * same-slot dependency */
+				if (dep_atom->post_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+						(js == dep_js)) {
+					katom->pre_dep = dep_atom;
+					dep_atom->post_dep = katom;
+				}
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Setup any scheduling information */
+	kbasep_js_clear_job_retry_submit(atom);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY);
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt_nolock(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context (e.g. KDS
+			 * dependency resolved). Kill that job by killing the
+			 * context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Queue */
+			KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	struct kbasep_js_device_data *js_devdata;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0,
+	   kbasep_js_trace_get_refcnt(kbdev, kctx)); */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_release_result - Try running more jobs after releasing a context
+ *                            and/or atom
+ *
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst hwaccess_lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (js_devdata->nr_user_contexts_running != 0) {
+		bool retry_submit = false;
+		int retry_jobslot = 0;
+
+		if (katom_retained_state)
+			retry_submit = kbasep_js_get_atom_retry_submit_slot(
+					katom_retained_state, &retry_jobslot);
+
+		if (runpool_ctx_attr_change || retry_submit) {
+			/* A change in runpool ctx attributes might mean we can
+			 * run more jobs than before  */
+			result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+			KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+						kctx, NULL, 0u, retry_jobslot);
+		}
+	}
+	return result;
+}
+
+/*
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state
+ * change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	struct kbase_as *current_as;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* kctx->as_nr and js_per_as_data are only read from here. The caller's
+	 * js_ctx_mutex provides a barrier that ensures they are up-to-date.
+	 *
+	 * They will not change whilst we're reading them, because the refcount
+	 * is non-zero (and we ASSERT on that last fact).
+	 */
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr];
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	current_as = &kbdev->as[kctx_as_nr];
+	mutex_lock(&kbdev->pm.lock);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/* Update refcount */
+	new_ref_count = --(js_per_as_data->as_busy_refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 1 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out */
+	if (new_ref_count == 0 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		int num_slots = kbdev->gpu_props.num_job_slots;
+		int slot;
+
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+		KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		if (kbdev->hwaccess.active_kctx == kctx)
+			kbdev->hwaccess.active_kctx = NULL;
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after the KCTX_SHEDULED flag is changed, otherwise we
+		 * double-decount the attributes
+		 */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+
+		/* Recalculate pullable status for all slots */
+		for (slot = 0; slot < num_slots; slot++) {
+			if (kbase_js_ctx_pullable(kctx, slot, false))
+				kbase_js_ctx_list_add_pullable_nolock(kbdev,
+						kctx, slot);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED |
+			KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_backend_timeouts_changed(kbdev);
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_as *new_address_space = NULL;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	as_nr = kbase_backend_find_free_address_space(kbdev, kctx);
+
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	new_address_space = &kbdev->as[as_nr];
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Roll back the transaction so far and return */
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	kbase_ctx_flag_set(kctx, KCTX_SCHEDULED);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* Roll back the transaction so far and return */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+	KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the hwaccess_lock locking. If a suspend occurs *after* that lock
+	 * was taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Synchronize with any timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbase_backend_use_ctx_sched(kbdev, kctx)) {
+		/* Context already has ASID - mark as active */
+		kbdev->hwaccess.active_kctx = kctx;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return true; /* Context already scheduled */
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kbasep_js_schedule_ctx(kbdev, kctx);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED);
+
+	is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED);
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			   kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		retained = retained << 1;
+
+		if (kctx) {
+			++(js_per_as_data->as_busy_refcount);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+				++nr_privileged_ctx;
+				WARN_ON(nr_privileged_ctx != 1);
+			}
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_context *kctx, *n;
+
+		list_for_each_entry_safe(kctx, n,
+				&kbdev->js_data.ctx_list_unpullable[js],
+				jctx.sched_info.ctx.ctx_list_entry[js]) {
+			struct kbasep_js_kctx_info *js_kctx_info;
+			unsigned long flags;
+			bool timer_sync = false;
+
+			js_kctx_info = &kctx->jctx.sched_info;
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+				kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync =
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kbdev);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
+			(katom->pre_dep && (katom->pre_dep->atom_flags &
+			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		int prio = katom->sched_priority;
+		int js = katom->slot_nr;
+		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+		list_add_tail(&katom->queue, &queue->x_dep_head);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+		enqueue_required = false;
+	} else {
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		/* Add atom to ring buffer. */
+		jsctx_tree_add(kctx, katom);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+	}
+
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the
+ *                         runnable_tree, ready for execution
+ * @katom: Atom to submit
+ *
+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set,
+ * but is still present in the x_dep list. If @katom has a same-slot dependent
+ * atom then that atom (and any dependents) will also be moved.
+ */
+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
+
+	while (katom) {
+		WARN_ON(!(katom->atom_flags &
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			list_del(&katom->queue);
+			katom->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+		} else {
+			break;
+		}
+
+		katom = katom->post_dep;
+	}
+}
+
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = katom->post_dep;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (next_katom) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE |
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
+			kbase_js_move_to_tree(x_dep);
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	js_devdata = &kctx->kbdev->js_data;
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kctx->kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return NULL;
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kctx->kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return NULL;
+	}
+
+	kbase_ctx_flag_set(kctx, KCTX_PULLED);
+
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+		kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+		atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++;
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kctx->kbdev, kctx);
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	int prio = katom->sched_priority;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--;
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
+
+	/* If this slot has been blocked due to soft-stopped atoms, and all
+	 * atoms have now been processed, then unblock the slot */
+	if (!kctx->atoms_pulled_slot_pri[js][prio] &&
+			kctx->blocked_js[js][prio]) {
+		kctx->blocked_js[js][prio] = false;
+
+		/* Only mark the slot as pullable if the context is not idle -
+		 * that case is handled below */
+		if (atomic_read(&kctx->atoms_pulled) &&
+				kbase_js_ctx_pullable(kctx, js, true))
+			timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, js);
+	}
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!kbase_ctx_flag(kctx, KCTX_DYING)) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+	int prio = katom->sched_priority;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+		kctx->atoms_pulled_slot_pri[atom_slot][prio]--;
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		/* If this slot has been blocked due to soft-stopped atoms, and
+		 * all atoms have now been processed, then unblock the slot */
+		if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
+				&& kctx->blocked_js[atom_slot][prio]) {
+			kctx->blocked_js[atom_slot][prio] = false;
+			if (kbase_js_ctx_pullable(kctx, atom_slot, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+						kbdev, kctx, atom_slot);
+		}
+	}
+	WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot)) {
+		if (!list_empty(
+			&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
+			timer_sync |= kbase_js_ctx_list_remove_nolock(
+					kctx->kbdev, kctx, atom_slot);
+	}
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp)
+{
+	u64 microseconds_spent = 0;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+
+	kbdev = kctx->kbdev;
+
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+
+	/* Calculate the job's time used */
+	if (end_timestamp != NULL) {
+		/* Only calculating it for jobs that really run on the HW (e.g.
+		 * removed from next jobs never actually ran, so really did take
+		 * zero time) */
+		ktime_t tick_diff = ktime_sub(*end_timestamp,
+							katom->start_timestamp);
+
+		microseconds_spent = ktime_to_ns(tick_diff);
+
+		do_div(microseconds_spent, 1000);
+
+		/* Round up time spent to the minimum timer resolution */
+		if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US)
+			microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US;
+	}
+
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+			(x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false);
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+		kbase_js_move_to_tree(x_dep);
+		if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false))
+			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+					x_dep->slot_nr);
+
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)
+			return x_dep;
+	}
+
+	return NULL;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *last_active;
+	bool timer_sync = false;
+	bool ctx_waiting = false;
+
+	js_devdata = &kbdev->js_data;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	last_active = kbdev->hwaccess.active_kctx;
+
+	while (js_mask) {
+		int js;
+
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				if (kbase_js_ctx_pullable(kctx, js, false)
+				    || kbase_ctx_flag(kctx, KCTX_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_ctx_flag_clear(kctx, KCTX_PULLED);
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
+				bool pullable = kbase_js_ctx_pullable(kctx, js,
+						true);
+
+				/* Failed to pull jobs - push to head of list.
+				 * Unless this context is already 'active', in
+				 * which case it's effectively already scheduled
+				 * so push it to the back of the list. */
+				if (pullable && kctx == last_active)
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else if (pullable)
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+								kctx->kbdev,
+								kctx, js);
+
+				/* If this context is not the active context,
+				 * but the active context is pullable on this
+				 * slot, then we need to remove the active
+				 * marker to prevent it from submitting atoms in
+				 * the IRQ handler, which would prevent this
+				 * context from making progress. */
+				if (last_active && kctx != last_active &&
+						kbase_js_ctx_pullable(
+						last_active, js, true))
+					ctx_waiting = true;
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	if (kbdev->hwaccess.active_kctx == last_active && ctx_waiting)
+		kbdev->hwaccess.active_kctx = NULL;
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the queue */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_set(kctx, KCTX_DYING);
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100644
index 0000000..76172f7
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,1051 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_context.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase.
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold hwaccess_lock (as this will be obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the hwaccess_lock, (as this will be obtained
+ *   internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - hwaccess_lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *   If the hwaccess_lock is already held, then the caller should use
+ *   kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * kbasep_js_runpool_lookup_ctx_nolock - Lookup a context in the Run Pool based
+ *         upon its current address space and ensure that is stays scheduled in.
+ * @kbdev: Device pointer
+ * @as_nr: Address space to lookup
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * Note: This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must the hold the hwaccess_lock
+ *
+ * Return: a valid struct kbase_context on success, which has been refcounted as
+ *         being busy.
+ *         NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - hwaccess_lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time.
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+	atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+	KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot ==
+					KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID)
+				|| (atom->retry_submit_on_slot == js));
+
+	atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+	retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+	int js = katom_retained_state->retry_submit_on_slot;
+
+	*res = js;
+	return (bool) (js >= 0);
+}
+
+#if KBASE_DEBUG_DISABLE_ASSERTS == 0
+/**
+ * Debug Check the refcount of a context. Only use within ASSERTs
+ *
+ * Obtains hwaccess_lock
+ *
+ * @return negative value if the context is not scheduled in
+ * @return current refcount of the context if it is scheduled in. The refcount
+ * is not guarenteed to be kept constant.
+ */
+static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int result = -1;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID)
+		result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return result;
+}
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS == 0 */
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guarenteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guarenteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+	KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: when you need the number of cycles to guarantee you won't wait for
+ * longer than 'us' time (you might have a shorter wait).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need the number of cycles to guarantee you'll wait at least
+ * 'us' amount of time (but you might wait longer).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (u32) (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the worst-case wait that 'ticks' cycles will
+ * take (you guarantee that you won't wait any longer than this, but it may
+ * be shorter).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the best-case wait for 'tick' cycles (you
+ * guarantee to be waiting for at least this long, but it may be longer).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100644
index 0000000..321506a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,301 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->hwaccess_lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100644
index 0000000..ce91833
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100644
index 0000000..119344c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,423 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief the IRQ_THROTTLE time in microseconds
+ *
+ * This will be converted via the GPU's clock frequency into a cycle-count.
+ *
+ * @note we can make an estimate of the GPU's frequency by periodically
+ * sampling its CYCLE_COUNT register
+ */
+#define KBASE_JS_IRQ_THROTTLE_TIME_US 20
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * Data used by the scheduler that is unique for each Address Space.
+ *
+ * This is used in IRQ context and hwaccess_lock must be held whilst accessing
+ * this data (inculding reads and atomic decisions based on the read).
+ */
+struct kbasep_js_per_as_data {
+	/**
+	 * Ref count of whether this AS is busy, and must not be scheduled out
+	 *
+	 * When jobs are running this is always positive. However, it can still be
+	 * positive when no jobs are running. If all you need is a heuristic to
+	 * tell you whether jobs might be running, this should be sufficient.
+	 */
+	int as_busy_refcount;
+
+	/** Pointer to the current context on this address space, or NULL for no context */
+	struct kbase_context *kctx;
+};
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/* Sub-structure to collect together Job Scheduling data used in IRQ
+	 * context. The hwaccess_lock must be held when accessing. */
+	struct runpool_irq {
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
+		 *
+		 * It is placed here because it's much more memory efficient than having a u8 in
+		 * struct kbasep_js_per_as_data to store this flag  */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/** Data that is unique for each AS */
+		struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+
+	u16 as_free;				/**< Bitpattern of free Address Spaces */
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+
+	/**< Value for JS_SOFT_JOB_TIMEOUT */
+	atomic_t soft_job_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/**
+		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/** Job Slot to retry submitting to if submission from IRQ handler failed */
+	int retry_submit_on_slot;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100644
index 0000000..6d1e61f
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100644
index 0000000..20a18a9
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,2601 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+/* This function finds out which RB tree the given GPU VA region belongs to
+ * based on the region zone */
+static struct rb_root *kbase_reg_flags_to_rbtree(struct kbase_context *kctx,
+						    struct kbase_va_region *reg)
+{
+	struct rb_root *rbtree = NULL;
+
+	switch (reg->flags & KBASE_REG_ZONE_MASK) {
+	case KBASE_REG_ZONE_CUSTOM_VA:
+		rbtree = &kctx->reg_rbtree_custom;
+		break;
+	case KBASE_REG_ZONE_EXEC:
+		rbtree = &kctx->reg_rbtree_exec;
+		break;
+	case KBASE_REG_ZONE_SAME_VA:
+		rbtree = &kctx->reg_rbtree_same;
+		/* fall through */
+	default:
+		rbtree = &kctx->reg_rbtree_same;
+		break;
+	}
+
+	return rbtree;
+}
+
+/* This function finds out which RB tree the given pfn from the GPU VA belongs
+ * to based on the memory zone the pfn refers to */
+static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
+								    u64 gpu_pfn)
+{
+	struct rb_root *rbtree = NULL;
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif /* CONFIG_64BIT */
+		if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE)
+			rbtree = &kctx->reg_rbtree_custom;
+		else if (gpu_pfn >= KBASE_REG_ZONE_EXEC_BASE)
+			rbtree = &kctx->reg_rbtree_exec;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+#ifdef CONFIG_64BIT
+	} else {
+		if (gpu_pfn >= kctx->same_va_end)
+			rbtree = &kctx->reg_rbtree_custom;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+	}
+#endif /* CONFIG_64BIT */
+
+	return rbtree;
+}
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx,
+						struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = NULL;
+	struct rb_node *parent = NULL;
+	struct rb_root *rbtree = NULL;
+
+	rbtree = kbase_reg_flags_to_rbtree(kctx, new_reg);
+
+	link = &(rbtree->rb_node);
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+
+	rb_insert_color(&(new_reg->rblink), rbtree);
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, start_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_root *rbtree = NULL;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+
+	rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs);
+
+	rbnode = rb_first(rbtree);
+
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE)) {
+			/* Check alignment */
+			u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+				return reg;
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+	struct rb_root *reg_rbtree = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	reg_rbtree = kbase_reg_flags_to_rbtree(kctx, reg);
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if (prev->flags & KBASE_REG_FREE) {
+			/* We're compatible with the previous VMA,
+			 * merge with it */
+			WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if (next->flags & KBASE_REG_FREE) {
+			WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_root *reg_rbtree = NULL;
+	int err = 0;
+
+	reg_rbtree = kbase_reg_flags_to_rbtree(kctx, at_reg);
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink),
+								reg_rbtree);
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+		if (!(tmp->flags & KBASE_REG_FREE)) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.  */
+	{
+		u64 start_pfn;
+
+		/*
+		 * Depending on the zone the allocation request is for
+		 * we might need to retry it.
+		 */
+		do {
+			tmp = kbase_region_tracker_find_region_meeting_reqs(
+					kctx, reg, nr_pages, align);
+			if (tmp) {
+				start_pfn = (tmp->start_pfn + align - 1) &
+						~(align - 1);
+				err = kbase_insert_va_region_nolock(kctx, reg,
+						tmp, start_pfn, nr_pages);
+				break;
+			}
+
+			/*
+			 * If the allocation is not from the same zone as JIT
+			 * then don't retry, we're out of VA and there is
+			 * nothing which can be done about it.
+			 */
+			if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+					KBASE_REG_ZONE_CUSTOM_VA)
+				break;
+		} while (kbase_jit_evict(kctx));
+
+		if (!tmp)
+			err = -ENOMEM;
+	}
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *exec_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree_same = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* Although exec and custom_va_reg don't always exist,
+	 * initialize unconditionally because of the mem_view debugfs
+	 * implementation which relies on these being empty */
+	kctx->reg_rbtree_exec = RB_ROOT;
+	kctx->reg_rbtree_custom = RB_ROOT;
+
+	if (exec_reg)
+		kbase_region_tracker_insert(kctx, exec_reg);
+	if (custom_va_reg)
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+}
+
+static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(rbtree);
+		if (rbnode) {
+			rb_erase(rbnode, rbtree);
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		same_va_bits = 32;
+	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+#endif
+
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
+		err = -EINVAL;
+		goto fail_unlock;
+	}
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have exec and custom VA zones */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_exec;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_exec:
+	kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
+{
+#ifdef CONFIG_64BIT
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits;
+	u64 total_va_size;
+	int err;
+
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return 0;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	if (same_va->nr_pages < jit_va_pages ||
+			kctx->same_va_end < jit_va_pages) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(kctx,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(kctx, custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	return kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL);
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_term(&kbdev->mem_pool);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(reg->kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					page_to_phys(kctx->aliasing_sink_page),
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct vm_area_struct *vma;
+	struct kbase_cpu_mapping *map;
+
+	lockdep_assert_held(&current->mm->mmap_sem);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	vma = find_vma_intersection(current->mm, uaddr, uaddr+size);
+
+	if (!vma || vma->vm_start > uaddr)
+		return NULL;
+	if (vma->vm_ops != &kbase_vm_ops)
+		/* Not ours! */
+		return NULL;
+
+	map = vma->vm_private_data;
+
+	if (map->kctx != kctx)
+		/* Not from this context! */
+		return NULL;
+
+	*offset = (uaddr - vma->vm_start) +
+		((vma->vm_pgoff - map->region->start_pfn)<<PAGE_SHIFT);
+
+	return map;
+}
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct kbase_cpu_mapping *map;
+
+	kbase_os_mem_map_lock(kctx);
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset);
+
+	kbase_os_mem_map_unlock(kctx);
+
+	if (!map)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		phys_addr_t cpu_pa, phys_addr_t gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct base_syncset *set, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct basep_syncset *sset = &set->basep_sset;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	phys_addr_t *cpu_pa;
+	phys_addr_t *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	u64 offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	page_off = offset >> PAGE_SHIFT;
+	offset &= ~PAGE_MASK;
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	if (page_off > reg->nr_pages ||
+			page_off + page_count > reg->nr_pages) {
+		/* Sync overflows the region */
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Sync first page */
+	if (cpu_pa[page_off]) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!cpu_pa[page_off + i])
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 && cpu_pa[page_off + page_count - 1]) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset)
+{
+	int err = -EINVAL;
+	struct basep_syncset *sset;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != syncset);
+
+	sset = &syncset->basep_sset;
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+			return -EINVAL;
+	} else if (flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	}
+
+	if (!(reg->flags & KBASE_REG_SHARE_BOTH) &&
+			flags & BASE_MEM_COHERENT_LOCAL) {
+		reg->flags |= KBASE_REG_SHARE_IN;
+	}
+
+	/* Set up default MEMATTR usage */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+
+	return 0;
+}
+
+int kbase_alloc_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t old_page_count = alloc->nents;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+
+	if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool,
+			nr_pages_requested, alloc->pages + old_page_count) != 0)
+		goto no_alloc;
+
+	/*
+	 * Request a zone cache update, this scans only the new pages an
+	 * appends their information to the zone cache. if the update
+	 * fails then clear the cache so we fall-back to doing things
+	 * page by page.
+	 */
+	if (kbase_zone_cache_update(alloc, old_page_count) != 0)
+		kbase_zone_cache_clear(alloc);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			(u32)alloc->imported.kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+no_alloc:
+	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	return -ENOMEM;
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	phys_addr_t *start_free;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/*
+	 * Clear the zone cache, we don't expect JIT allocations to be
+	 * shrunk in parts so there is no point trying to optimize for that
+	 * by scanning for the changes caused by freeing this memory and
+	 * updating the existing cache entries.
+	 */
+	kbase_zone_cache_clear(alloc);
+
+	kbase_mem_pool_free_pages(&kctx->mem_pool,
+				  nr_pages_to_free,
+				  start_free,
+				  syncback,
+				  reclaimed);
+
+	alloc->nents -= nr_pages_to_free;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, nr_pages_to_free);
+		new_page_count = kbase_atomic_sub_pages(nr_pages_to_free,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(nr_pages_to_free,
+				       &kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				(u32)kctx->id,
+				(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		WARN_ON(!alloc->imported.kctx);
+		/*
+		 * The physical allocation must have been removed from the
+		 * eviction list before trying to free it.
+		 */
+		WARN_ON(!list_empty(&alloc->evict_node));
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+ #ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ump_dd_release(alloc->imported.ump_handle);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
+		kfree(alloc->imported.user_buf.pages);
+		break;
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+#ifdef CONFIG_DEBUG_FS
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_init(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_evict_lock);
+		if (list_empty(&kctx->jit_destroy_head)) {
+			mutex_unlock(&kctx->jit_evict_lock);
+			break;
+		}
+
+		reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		list_del(&reg->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+		kbase_mem_free_region(kctx, reg);
+		kbase_gpu_vm_unlock(kctx);
+	} while (1);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	INIT_LIST_HEAD(&kctx->jit_pending_alloc);
+	INIT_LIST_HEAD(&kctx->jit_atoms_head);
+
+	return 0;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+	struct kbase_va_region *walker;
+	struct kbase_va_region *temp;
+	size_t current_diff = SIZE_MAX;
+
+	int ret;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+
+		if (walker->nr_pages >= info->va_pages) {
+			size_t min_size, max_size, diff;
+
+			/*
+			 * The JIT allocations VA requirements have been
+			 * meet, it's suitable but other allocations
+			 * might be a better fit.
+			 */
+			min_size = min_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			max_size = max_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			diff = max_size - min_size;
+
+			if (current_diff > diff) {
+				current_diff = diff;
+				reg = walker;
+			}
+
+			/* The allocation is an exact match, stop looking */
+			if (current_diff == 0)
+				break;
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_move(&reg->jit_node, &kctx->jit_active_head);
+
+		/*
+		 * Remove the allocation from the eviction list as it's no
+		 * longer eligible for eviction. This must be done before
+		 * dropping the jit_evict_lock
+		 */
+		list_del_init(&reg->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+
+		/* Make the physical backing no longer reclaimable */
+		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+			goto update_failed;
+
+		/* Grow the backing if required */
+		if (reg->gpu_alloc->nents < info->commit_pages) {
+			size_t delta;
+			size_t old_size = reg->gpu_alloc->nents;
+
+			/* Allocate some more pages */
+			delta = info->commit_pages - reg->gpu_alloc->nents;
+			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
+					!= 0)
+				goto update_failed;
+
+			if (reg->cpu_alloc != reg->gpu_alloc) {
+				if (kbase_alloc_phy_pages_helper(
+						reg->cpu_alloc, delta) != 0) {
+					kbase_free_phy_pages_helper(
+							reg->gpu_alloc, delta);
+					goto update_failed;
+				}
+			}
+
+			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
+					info->commit_pages, old_size);
+			/*
+			 * The grow failed so put the allocation back in the
+			 * pool and return failure.
+			 */
+			if (ret)
+				goto update_failed;
+		}
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL;
+		u64 gpu_addr;
+
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr);
+		if (!reg)
+			goto out_unlocked;
+
+		mutex_lock(&kctx->jit_evict_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_evict_lock);
+	}
+
+	return reg;
+
+update_failed:
+	/*
+	 * An update to an allocation from the pool failed, chances
+	 * are slim a new allocation would fair any better so return
+	 * the allocation to the pool and return the function with failure.
+	 */
+	kbase_gpu_vm_unlock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	/* The physical backing of memory in the pool is always reclaimable */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mem_evictable_make(reg->gpu_alloc);
+	kbase_gpu_vm_unlock(kctx);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = reg->kctx;
+
+	lockdep_assert_held(&kctx->jit_evict_lock);
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	list_move(&reg->jit_node, &kctx->jit_destroy_head);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_evict_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	if (reg)
+		kbase_mem_free_region(kctx, reg);
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+
+	kbase_gpu_vm_lock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	phys_addr_t *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct mm_struct *mm;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	mm = alloc->imported.user_buf.mm;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#else
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#endif
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = page_to_phys(pages[i]);
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+			kbase_reg_current_backed_size(reg),
+			reg->flags);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	phys_addr_t *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		int j;
+		size_t pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = count;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			kbase_reg_current_backed_size(reg),
+			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+	if (err) {
+		dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+				alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+		alloc->imported.umm.sgt = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \
+		|| defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res,
+		struct kds_resource **kds_resources, u32 *kds_res_count,
+		unsigned long *kds_access_bitmap, bool exclusive)
+{
+	u32 i;
+
+	for (i = 0; i < *kds_res_count; i++) {
+		/* Duplicate resource, ignore */
+		if (kds_resources[i] == kds_res)
+			return;
+	}
+
+	kds_resources[*kds_res_count] = kds_res;
+	if (exclusive)
+		set_bit(*kds_res_count, kds_access_bitmap);
+	(*kds_res_count)++;
+}
+#endif
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+	case KBASE_MEM_TYPE_IMPORTED_UMP: {
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = ump_dd_kds_resource_get(
+					reg->gpu_alloc->imported.ump_handle);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif				/*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+		break;
+	}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = get_dma_buf_kds_resource(
+					reg->gpu_alloc->imported.umm.dma_buf);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL
+#ifdef CONFIG_KDS
+				, NULL, NULL,
+				NULL, false
+#endif
+				);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100644
index 0000000..fb4ca4d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1079 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	int      count;
+	int      free_on_close;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMP,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	phys_addr_t           *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	struct list_head       zone_cache;
+
+	/* member in union valid based on @a type */
+	union {
+#ifdef CONFIG_UMP
+		ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+		struct {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			unsigned int current_mapping_usage_count;
+			struct mm_struct *mm;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* VA managed by us */
+#define KBASE_REG_CUSTOM_VA         (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+/*
+ * Dedicated 16MB region for shader code:
+ * VA range 0x101000000-0x102000000
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_EXEC_BASE    (0x101000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* non-NULL if this memory object is a kds_resource */
+	struct kds_resource *kds_res;
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+};
+
+/* Common functions */
+static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+	INIT_LIST_HEAD(&alloc->zone_cache);
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+	reg->cpu_alloc->imported.kctx = kctx;
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
+	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		reg->gpu_alloc->imported.kctx = kctx;
+		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	INIT_LIST_HEAD(&reg->jit_node);
+	reg->flags &= ~KBASE_REG_FREE;
+	return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @max_size:  Maximum number of free pages the pool can hold
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Return NULL if no memory in the pool
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return ACCESS_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_grow - Grow the pool
+ * @pool:       Memory pool to grow
+ * @nr_to_grow: Number of pages to add to the pool
+ *
+ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
+ * become larger than the maximum size specified.
+ *
+ * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
+ */
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above the max_size for the pool.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ */
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+/*
+ * kbase_mem_alloc_page - Allocate a new page for a device
+ * @kbdev: The kbase device
+ *
+ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that
+ * function can fail in the event the pool is empty.
+ *
+ * Return: A new page or NULL if no memory
+ */
+struct page *kbase_mem_alloc_page(struct kbase_device *kbdev);
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+
+/**
+ * kbase_update_region_flags - Convert user space flags to kernel region flags
+ *
+ * @kctx:  kbase context
+ * @reg:   The region to update the flags on
+ * @flags: The flags passed from user space
+ *
+ * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and
+ * this function will fail if the system does not support system coherency.
+ *
+ * Return: 0 if successful, -EINVAL if the flags are not supported
+ */
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
+ * @kctx:	Kbase context
+ *
+ * Disable and perform the required cache maintenance to remove the all
+ * data from provided kbase context from the GPU caches.
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified
+ * address space.
+ * @kbdev:	Kbase device
+ * @as_nr:	The address space number to set to unmapped.
+ *
+ * This function must only be called during reset/power-up and it used to
+ * ensure the registers are in a known state.
+ *
+ * The caller must hold kbdev->mmu_hw_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset);
+void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa,
+		phys_addr_t gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU
+ * mapping of a memory allocation containing a given address range
+ *
+ * Searches for a CPU mapping of any part of any region that fully encloses the
+ * CPU virtual address range specified by @uaddr and @size. Returns a failure
+ * indication if only part of the address range lies within a CPU mapping.
+ *
+ * @kctx:      The kernel base context used for the allocation.
+ * @uaddr:     Start of the CPU virtual address range.
+ * @size:      Size of the CPU virtual address range (in bytes).
+ * @offset:    The offset from the start of the allocation to the specified CPU
+ *             virtual address.
+ *
+ * Return: 0 if offset was obtained successfully. Error code otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_init(struct kbase_context *kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ * @kds_res_count:     The number of KDS resources.
+ * @kds_resources:     Array of KDS resources.
+ * @kds_access_bitmap: Access bitmap for KDS.
+ * @exclusive:         If the KDS resource requires exclusive access.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_zone_cache_update - Update the memory zone cache after new pages have
+ * been added.
+ * @alloc:        The physical memory allocation to build the cache for.
+ * @start_offset: Offset to where the new pages start.
+ *
+ * Updates an existing memory zone cache, updating the counters for the
+ * various zones.
+ * If the memory allocation doesn't already have a zone cache assume that
+ * one isn't created and thus don't do anything.
+ *
+ * Return: Zero cache was updated, negative error code on error.
+ */
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset);
+
+/**
+ * kbase_zone_cache_build - Build the memory zone cache.
+ * @alloc:        The physical memory allocation to build the cache for.
+ *
+ * Create a new zone cache for the provided physical memory allocation if
+ * one doesn't already exist, if one does exist then just return.
+ *
+ * Return: Zero if the zone cache was created, negative error code on error.
+ */
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_zone_cache_clear - Clear the memory zone cache.
+ * @alloc:        The physical memory allocation to clear the cache on.
+ */
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc);
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100644
index 0000000..503a5ec
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,2622 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+#include <linux/cache.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ *
+ * Note: Caller must be holding the processes mmap_sem lock.
+ */
+static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va)
+{
+	int zone;
+	int gpu_pc_bits;
+	int cpu_va_bits;
+	struct kbase_va_region *reg;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+
+	dev = kctx->kbdev->dev;
+	*gpu_va = 0; /* return 0 on failure */
+
+	gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	cpu_va_bits = BITS_PER_LONG;
+
+	if (0 == va_pages) {
+		dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+		goto bad_size;
+	}
+
+	if (va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+#if defined(CONFIG_64BIT)
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		cpu_va_bits = 32;
+#endif
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+	    (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+		goto bad_ex_size;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & BASE_MEM_GROW_ON_GPF)
+		reg->extent = extent;
+	else
+		reg->extent = 0;
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/*
+		 * 10.1-10.4 UKU userland relies on the kernel to call mmap.
+		 * For all other versions we can just return the cookie
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1) ||
+		    kctx->api_version > KBASE_API_VERSION(10, 4)) {
+			*gpu_va = (u64) cookie;
+			return reg;
+		}
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kbase_gpu_vm_lock(kctx);
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+
+		*gpu_va = (u64) cpu_addr;
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_mem:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+invalid_flags:
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->jit_evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->jit_evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+struct kbase_mem_zone_cache_entry {
+	/* List head used to link the cache entry to the memory allocation. */
+	struct list_head zone_node;
+	/* The zone the cacheline is for. */
+	struct zone *zone;
+	/* The number of pages in the allocation which belong to this zone. */
+	u64 count;
+};
+
+static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	struct kbase_mem_zone_cache_entry *cache = NULL;
+	size_t i;
+	int ret = 0;
+
+	for (i = start_offset; i < alloc->nents; i++) {
+		struct page *p = phys_to_page(alloc->pages[i]);
+		struct zone *zone = page_zone(p);
+		bool create = true;
+
+		if (cache && (cache->zone == zone)) {
+			/*
+			 * Fast path check as most of the time adjacent
+			 * pages come from the same zone.
+			 */
+			create = false;
+		} else {
+			/*
+			 * Slow path check, walk all the cache entries to see
+			 * if we already know about this zone.
+			 */
+			list_for_each_entry(cache, &alloc->zone_cache, zone_node) {
+				if (cache->zone == zone) {
+					create = false;
+					break;
+				}
+			}
+		}
+
+		/* This zone wasn't found in the cache, create an entry for it */
+		if (create) {
+			cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+			if (!cache) {
+				ret = -ENOMEM;
+				goto bail;
+			}
+			cache->zone = zone;
+			cache->count = 0;
+			list_add(&cache->zone_node, &alloc->zone_cache);
+		}
+
+		cache->count++;
+	}
+	return 0;
+
+bail:
+	return ret;
+}
+
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	/*
+	 * Bail if the zone cache is empty, only update the cache if it
+	 * existed in the first place.
+	 */
+	if (list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, start_offset);
+}
+
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc)
+{
+	/* Bail if the zone cache already exists */
+	if (!list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, 0);
+}
+
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_mem_zone_cache_entry *walker;
+
+	while(!list_empty(&alloc->zone_cache)){
+		walker = list_first_entry(&alloc->zone_cache,
+				struct kbase_mem_zone_cache_entry, zone_node);
+		list_del(&walker->zone_node);
+		kfree(walker);
+	}
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 * then remove it from the reclaimable accounting. */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(-zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* This alloction can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	mutex_lock(&kctx->jit_evict_lock);
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	list_del_init(&gpu_alloc->evict_node);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+	switch (reg->gpu_alloc->type) {
+#ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	/* roll back on error, i.e. not UMP */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	ump_dd_handle umph;
+	u64 block_count;
+	const ump_dd_physical_block_64 *block_array;
+	u64 i, j;
+	int page = 0;
+	ump_alloc_flags ump_flags;
+	ump_alloc_flags cpu_flags;
+	ump_alloc_flags gpu_flags;
+
+	if (*flags & BASE_MEM_SECURE)
+		goto bad_flags;
+
+	umph = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+		goto bad_id;
+
+	ump_flags = ump_dd_allocation_flags_get(umph);
+	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+			UMP_DEVICE_MASK;
+
+	*va_pages = ump_dd_size_get_64(umph);
+	*va_pages >>= PAGE_SHIFT;
+
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (*flags & BASE_MEM_SAME_VA)
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	else
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!reg)
+		goto no_region;
+
+	/* we've got pages to map now, and support SAME_VA */
+	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->gpu_alloc->imported.ump_handle = umph;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
+
+	/* Override import flags based on UMP flags */
+	*flags &= ~(BASE_MEM_CACHED_CPU);
+	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+		reg->flags |= KBASE_REG_CPU_CACHED;
+		*flags |= BASE_MEM_CACHED_CPU;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_WR) {
+		reg->flags |= KBASE_REG_CPU_WR;
+		*flags |= BASE_MEM_PROT_CPU_WR;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_RD) {
+		reg->flags |= KBASE_REG_CPU_RD;
+		*flags |= BASE_MEM_PROT_CPU_RD;
+	}
+
+	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+		reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (gpu_flags & UMP_PROT_DEVICE_WR) {
+		reg->flags |= KBASE_REG_GPU_WR;
+		*flags |= BASE_MEM_PROT_GPU_WR;
+	}
+
+	if (gpu_flags & UMP_PROT_DEVICE_RD) {
+		reg->flags |= KBASE_REG_GPU_RD;
+		*flags |= BASE_MEM_PROT_GPU_RD;
+	}
+
+	/* ump phys block query */
+	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+	for (i = 0; i < block_count; i++) {
+		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+			reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT);
+			page++;
+		}
+	}
+	reg->gpu_alloc->nents = *va_pages;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	ump_dd_release(umph);
+bad_id:
+bad_flags:
+	return NULL;
+}
+#endif				/* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx)
+{
+	u32 cpu_cache_line_size = cache_line_size();
+	u32 gpu_cache_line_size =
+		(1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+
+	return ((cpu_cache_line_size > gpu_cache_line_size) ?
+				cpu_cache_line_size :
+				gpu_cache_line_size);
+}
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
+	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx);
+
+	if ((address & (cache_line_alignment - 1)) != 0 ||
+			(size & (cache_line_alignment - 1)) != 0) {
+		/* Coherency must be enabled to handle partial cache lines */
+		if (*flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+			/* Force coherent system required flag, import will
+			 * then fail if coherency isn't available
+			 */
+			*flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+		} else {
+			dev_warn(kctx->kbdev->dev,
+					"User buffer is not cache line aligned and no coherency enabled\n");
+			goto bad_size;
+		}
+	}
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	}
+
+	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages,
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	down_read(&current->mm->mmap_sem);
+
+	/* A sanity check that get_user_pages will work on the memory */
+	/* (so the initial import fails on weird memory regions rather than */
+	/* the job failing when we try to handle the external resources). */
+	/* It doesn't take a reference to the pages (because the page list is NULL). */
+	/* We can't really store the page list because that would involve */
+	/* keeping the pages pinned - instead we pin/unpin around the job */
+	/* (as part of the external resources handling code) */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+#else
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			NULL, NULL);
+#endif
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	reg->gpu_alloc->imported.user_buf.size = size;
+	reg->gpu_alloc->imported.user_buf.address = address;
+	reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages;
+	reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages,
+			sizeof(struct page *), GFP_KERNEL);
+	reg->gpu_alloc->imported.user_buf.mm = current->mm;
+	atomic_inc(&current->mm->mm_count);
+
+	if (!reg->gpu_alloc->imported.user_buf.pages)
+		goto no_page_array;
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_page_array:
+fault_mismatch:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_UMP
+	case BASE_MEM_IMPORT_TYPE_UMP: {
+		ump_secure_id id;
+
+		if (get_user(id, (ump_secure_id __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_ump(kctx, id, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				uptr = compat_ptr(user_buffer.ptr.compat_value);
+			else
+#endif
+				uptr = user_buffer.ptr.value;
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	phys_addr_t *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags);
+
+	return ret;
+}
+
+static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 gpu_va_start = reg->start_pfn;
+
+	if (new_pages == old_pages)
+		/* Nothing to do */
+		return;
+
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping,
+			(gpu_va_start + new_pages)<<PAGE_SHIFT,
+			(old_pages - new_pages)<<PAGE_SHIFT, 1);
+}
+
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx,
+			reg->start_pfn + new_pages, delta);
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(failure_reason);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages > reg->nr_pages) {
+		/* Would overflow the VA region */
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->free_on_close) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+	pgoff_t addr;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	rel_pgoff = vmf->pgoff - map->region->start_pfn;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	i = rel_pgoff;
+	addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT);
+	while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) {
+		int ret = vm_insert_pfn(vma, addr << PAGE_SHIFT,
+		    PFN_DOWN(map->alloc->pages[i]));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+
+		i++; addr++;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	return VM_FAULT_SIGBUS;
+}
+
+const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	phys_addr_t *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		unsigned long addr = vma->vm_start + aligned_offset;
+		u64 start_off = vma->vm_pgoff - reg->start_pfn +
+			(aligned_offset>>PAGE_SHIFT);
+
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			unsigned long pfn = PFN_DOWN(page_array[i + start_off]);
+
+			err = vm_insert_pfn(vma, addr, pfn);
+			if (WARN_ON(err))
+				break;
+
+			addr += PAGE_SIZE;
+		}
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->region = reg;
+	map->free_on_close = free_on_close;
+	map->kctx = reg->kctx;
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kbase_device_trace_buffer_install(kctx, tb, size);
+		if (err) {
+			vfree(tb);
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->cpu_alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+static int kbasep_reg_mmap(struct kbase_context *kctx,
+			   struct vm_area_struct *vma,
+			   struct kbase_va_region **regm,
+			   size_t *nr_pages, size_t *aligned_offset)
+
+{
+	int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+	struct kbase_va_region *reg;
+	int err = 0;
+
+	*aligned_offset = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n");
+
+	/* SAME_VA stuff, fetch the right region */
+	reg = kctx->pending_regions[cookie];
+	if (!reg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) {
+		/* incorrect mmap size */
+		/* leave the cookie for a potential later
+		 * mapping, or to be reclaimed later when the
+		 * context is freed */
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) ||
+	    (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) {
+		/* VM flags inconsistent with region flags */
+		err = -EPERM;
+		dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n",
+							__FILE__, __LINE__);
+		goto out;
+	}
+
+	/* adjust down nr_pages to what we have physically */
+	*nr_pages = kbase_reg_current_backed_size(reg);
+
+	if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
+						reg->nr_pages, 1) != 0) {
+		dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
+		/* Unable to map in GPU space. */
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+	/* no need for the cookie anymore */
+	kctx->pending_regions[cookie] = NULL;
+	kctx->cookies |= (1UL << cookie);
+
+	/*
+	 * Overwrite the offset with the region start_pfn, so we effectively
+	 * map from offset 0 in the region. However subtract the aligned
+	 * offset so that when user space trims the mapping the beginning of
+	 * the trimmed VMA has the correct vm_pgoff;
+	 */
+	vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT);
+out:
+	*regm = reg;
+	dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n");
+
+	return err;
+}
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg = NULL;
+	void *kaddr = NULL;
+	size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+
+	/* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+	vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		err = kbasep_reg_mmap(kctx, vma, &reg, &nr_pages,
+							&aligned_offset);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+					(u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages -
+					(vma->vm_pgoff - reg->start_pfn))) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (KBASE_MEM_TYPE_IMPORTED_UMM ==
+							reg->cpu_alloc->type) {
+				err = dma_buf_mmap(
+					reg->cpu_alloc->imported.umm.dma_buf,
+					vma, vma->vm_pgoff - reg->start_pfn);
+				goto out_unlock;
+			}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+		} else {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+	} /* default */
+	} /* switch */
+
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	phys_addr_t *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+	bool sync_needed;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+	/* Note: enforcing a RO prot_request onto prot is not done, since:
+	 * - CPU-arch-specific integration required
+	 * - kbase_vmap() requires no access checks to be made/enforced */
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
+	sync_needed = map->is_cached;
+
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+
+	if (sync_needed) {
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_CPU);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_CPU);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_CPU);
+		}
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	/* 0 is specified for prot_request to indicate no access checks should
+	 * be made.
+	 *
+	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
+	 * CPU-RO mapping is not enforced to allow this to work */
+	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	bool sync_needed = map->is_cached;
+	vunmap(addr);
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+	if (sync_needed) {
+		off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
+		size_t size = map->size;
+		size_t page_count = PFN_UP(offset + size);
+		size_t i;
+
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_DEVICE);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_DEVICE);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_DEVICE);
+		}
+	}
+	map->gpu_addr = 0;
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->is_cached = false;
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int i;
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	phys_addr_t *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	unsigned long attrs = DMA_ATTR_WRITE_COMBINE;
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	if (kbase_update_region_flags(kctx, reg, flags) != 0)
+		goto invalid_flags;
+
+	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = dma_pa + (i << PAGE_SHIFT);
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+invalid_flags:
+	kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+		       handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100644
index 0000000..2053a47
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,220 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va);
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason);
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	u64 gpu_addr;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	phys_addr_t *cpu_pages;
+	phys_addr_t *gpu_pages;
+	void *addr;
+	size_t size;
+	bool is_cached;
+};
+
+
+/**
+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the
+ * requested access permissions are supported
+ * @kctx:         Context the VA range belongs to
+ * @gpu_addr:     Start address of VA range
+ * @size:         Size of VA range
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map:          Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check
+ * whether the region should allow the intended access, and return an error if
+ * disallowed. This is essential for security of imported memory, particularly
+ * a user buf from SHM mapped into the process as RO. In that case, write
+ * access must be checked if the intention is for kernel to write to the
+ * memory.
+ *
+ * The checks are also there to help catch access errors on memory where
+ * security is not a concern: imported memory that is always RW, and memory
+ * that was allocated and owned by the process attached to @kctx. In this case,
+ * it helps to identify memory that was was mapped with the wrong access type.
+ *
+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
+ * where either the security of memory is solely dependent on those flags, or
+ * when userspace code was expecting only the GPU to access the memory (e.g. HW
+ * workarounds).
+ *
+ */
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vmap - Map a GPU VA range into the kernel safely
+ * @kctx:     Context the VA range belongs to
+ * @gpu_addr: Start address of VA range
+ * @size:     Size of VA range
+ * @map:      Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no
+ * checks to ensure the security of e.g. imported user bufs from RO SHM.
+ */
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vunmap - Unmap a GPU VA range from the kernel
+ * @kctx: Context the VA range belongs to
+ * @map:  Structure describing the mapping from the corresponding kbase_vmap()
+ *        call
+ *
+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained
+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * The reference taken on pages during kbase_vmap() is released.
+ */
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+extern const struct vm_operations_struct kbase_vm_ops;
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100644
index 0000000..441180b
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+#endif				/* _KBASE_LOWLEVEL_H */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100644
index 0000000..b5a7b8c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,582 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_for_each_entry(p, page_list, lru) {
+		zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+	}
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			PAGE_SIZE, DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	clear_highpage(p);
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+struct page *kbase_mem_alloc_page(struct kbase_device *kbdev)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct device *dev = kbdev->dev;
+	dma_addr_t dma_addr;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	if (current->flags & PF_KTHREAD) {
+		/* Don't trigger OOM killer from kernel threads, e.g. when
+		 * growing memory on GPU page fault */
+		gfp |= __GFP_NORETRY;
+	}
+
+	p = alloc_page(gfp);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		__free_page(p);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+
+	kbase_set_dma_addr(p, dma_addr);
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+
+	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	kbase_clear_dma_addr(p);
+	__free_page(p);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	for (i = 0; i < nr_to_grow; i++) {
+		p = kbase_mem_alloc_page(pool->kbdev);
+		if (!p)
+			return -ENOMEM;
+		kbase_mem_pool_add(pool, p);
+	}
+
+	return 0;
+}
+
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size > pool->max_size)
+		new_size = pool->max_size;
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		kbase_mem_pool_grow(pool, new_size - cur_size);
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+	pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool));
+	return kbase_mem_pool_size(pool);
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	pool->cur_size = 0;
+	pool->max_size = max_size;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			kbase_mem_pool_zero_page(pool, p);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	do {
+		pool_dbg(pool, "alloc()\n");
+		p = kbase_mem_pool_remove(pool);
+
+		if (p)
+			return p;
+
+		pool = pool->next_pool;
+	} while (pool);
+
+	return NULL;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i;
+	int err = -ENOMEM;
+
+	pool_dbg(pool, "alloc_pages(%zu):\n", nr_pages);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages, kbase_mem_pool_size(pool));
+	for (i = 0; i < nr_from_pool; i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		pages[i] = page_to_phys(p);
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_pages - i, pages + i);
+
+		if (err)
+			goto err_rollback;
+
+		i += nr_pages - i;
+	}
+
+	/* Get any remaining pages from kernel */
+	for (; i < nr_pages; i++) {
+		p = kbase_mem_alloc_page(pool->kbdev);
+		if (!p)
+			goto err_rollback;
+		pages[i] = page_to_phys(p);
+	}
+
+	pool_dbg(pool, "alloc_pages(%zu) done\n", nr_pages);
+
+	return 0;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+		size_t nr_pages, phys_addr_t *pages, bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+
+		if (zero)
+			kbase_mem_pool_zero_page(pool, p);
+		else if (sync)
+			kbase_mem_pool_sync_page(pool, p);
+
+		list_add(&p->lru, &new_page_list);
+		nr_to_pool++;
+		pages[i] = 0;
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		phys_addr_t *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!pages[i]))
+			continue;
+
+		p = phys_to_page(pages[i]);
+		if (reclaimed)
+			zone_page_state_add(-1, page_zone(p),
+					NR_SLAB_RECLAIMABLE);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = 0;
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100644
index 0000000..585fba0
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <mali_kbase_mem_pool_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_trim(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
+		kbase_mem_pool_debugfs_size_get,
+		kbase_mem_pool_debugfs_size_set,
+		"%llu\n");
+
+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_max_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_set_max_size(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
+		kbase_mem_pool_debugfs_max_size_get,
+		kbase_mem_pool_debugfs_max_size_set,
+		"%llu\n");
+
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100644
index 0000000..1442854
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H
+#define _KBASE_MEM_POOL_DEBUGFS_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool
+ * @parent: Parent debugfs dentry
+ * @pool:   Memory pool to control
+ *
+ * Adds two debugfs files under @parent:
+ * - mem_pool_size: get/set the current size of @pool
+ * - mem_pool_max_size: get/set the max size of @pool
+ */
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100644
index 0000000..092da9a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,119 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+		kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		if (!debugfs_create_file("mem_profile", S_IRUGO,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kbase_ctx_flag_set(kctx,
+					   KCTX_MEM_PROFILE_INITIALIZED);
+		}
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+		err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100644
index 0000000..a1dc2e0
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100644
index 0000000..82f0702
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100644
index 0000000..0329dfd
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,2073 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_time.h>
+#include <mali_kbase_mem.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ *
+ * If sync is not set then transactions still in flight when the flush is issued
+ * may use the old page tables and the data they write will not be written out
+ * to memory, this function returns after the flush has been issued but
+ * before all accesses which might effect the flushed region have completed.
+ *
+ * If sync is set then accesses in the flushed region will be drained
+ * before data is flush and invalidated through L1, L2 and into memory,
+ * after which point this function will return.
+ */
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync);
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+static size_t make_multiple(size_t minimum, size_t multiple)
+{
+	size_t remainder = minimum % multiple;
+
+	if (remainder == 0)
+		return minimum;
+
+	return minimum + multiple - remainder;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	if (unlikely(faulting_as->protected_mode))
+	{
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Protected mode fault");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+
+		goto fault_done;
+	}
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Translation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		goto fault_done;
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"DMA-BUF is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = make_multiple(fault_rel_pfn -
+			kbase_reg_current_backed_size(region) + 1,
+			region->extent);
+
+	/* cap to max vsize */
+	if (new_pages + kbase_reg_current_backed_size(region) >
+			region->nr_pages)
+		new_pages = region->nr_pages -
+				kbase_reg_current_backed_size(region);
+
+	if (0 == new_pages) {
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
+		if (region->gpu_alloc != region->cpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					region->cpu_alloc, new_pages) == 0) {
+				grown = true;
+			} else {
+				kbase_free_phy_pages_helper(region->gpu_alloc,
+						new_pages);
+			}
+		} else {
+			grown = true;
+		}
+	}
+
+
+	if (grown) {
+		u64 pfn_offset;
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
+		/*
+		 * Note:
+		 * Issuing an MMU operation will unlock the MMU and cause the
+		 * translation to be replayed. If the page insertion fails then
+		 * rather then trying to continue the context should be killed
+		 * so the no_flush version of insert_pages is used which allows
+		 * us to unlock the MMU as we see fit.
+		 */
+		err = kbase_mmu_insert_pages_no_flush(kctx,
+				region->start_pfn + pfn_offset,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				new_pages, region->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+		KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* failed to extend, handle as a normal PF */
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Page allocation failure");
+	}
+
+fault_done:
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	p = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!p)
+		goto sub_pages;
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			(u32)kctx->id,
+			(u64)new_page_count);
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
+ * new table from the pool if needed and possible
+ */
+static int mmu_get_next_pgd(struct kbase_context *kctx,
+		phys_addr_t *pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(*pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(*pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return -EINVAL;
+	}
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return -ENOMEM;
+		}
+
+		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	*pgd = target_pgd;
+
+	return 0;
+}
+
+static int mmu_get_bottom_pgd(struct kbase_context *kctx,
+		u64 vpfn, phys_addr_t *out_pgd)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l);
+		/* Handle failure condition */
+		if (err) {
+			dev_dbg(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
+			return err;
+		}
+	}
+
+	*out_pgd = pgd;
+
+	return 0;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+	/* As we are recovering from what has already been set up, we should have a target_pgd */
+	KBASE_DEBUG_ASSERT(0 != target_pgd);
+	kunmap_atomic(page);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
+		pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Should never fail */
+		KBASE_DEBUG_ASSERT(0 != pgd);
+	}
+
+	return pgd;
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
+					      size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
+		KBASE_DEBUG_ASSERT(0 != pgd);
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+
+		pgd_page = kmap_atomic(p);
+		KBASE_DEBUG_ASSERT(NULL != pgd_page);
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+		kunmap_atomic(pgd_page);
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys, flags);
+		}
+
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_pages only partially completes we need to be able
+	 * to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys[i], flags);
+		}
+
+		phys += count;
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table. If further pages
+		 * need inserting and fail we need to undo what has already
+		 * taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	int err;
+
+	err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
+ */
+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	u32 op;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+				&kbdev->as[kctx->as_nr],
+				kctx, vpfn, nr, op, 0);
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to
+		 * recover */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * As this function could be called in interrupt context the sync
+	 * request can't block. Instead log the request and the next flush
+	 * request will pick it up.
+	 */
+	if ((!err) && sync &&
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
+		atomic_set(&kctx->drain_pending, 1);
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+#ifndef CONFIG_MALI_NO_MALI
+	bool drain_pending = false;
+
+	if (atomic_xchg(&kctx->drain_pending, 0))
+		drain_pending = true;
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	kbdev = kctx->kbdev;
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		if (!kbase_pm_context_active_handle_suspend(kbdev,
+			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+			int err;
+			u32 op;
+
+			/* AS transaction begin */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+
+			if (sync)
+				op = AS_COMMAND_FLUSH_MEM;
+			else
+				op = AS_COMMAND_FLUSH_PT;
+
+			err = kbase_mmu_hw_do_operation(kbdev,
+						&kbdev->as[kctx->as_nr],
+						kctx, vpfn, nr, op, 0);
+
+#if KBASE_GPU_RESET_EN
+			if (err) {
+				/* Flush failed to complete, assume the
+				 * GPU has hung and perform a reset to
+				 * recover */
+				dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+				if (kbase_prepare_to_reset_gpu(kbdev))
+					kbase_reset_gpu(kbdev);
+			}
+#endif /* KBASE_GPU_RESET_EN */
+
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+			/* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+			/*
+			 * The transaction lock must be dropped before here
+			 * as kbase_wait_write_flush could take it if
+			 * the GPU was powered down (static analysis doesn't
+			 * know this can't happen).
+			 */
+			drain_pending |= (!err) && sync &&
+					kbase_hw_has_issue(kctx->kbdev,
+							BASE_HW_ISSUE_6367);
+			if (drain_pending) {
+				/* Wait for GPU to flush write buffer */
+				kbase_wait_write_flush(kctx);
+			}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+			kbase_pm_context_idle(kbdev);
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
+	 */
+	kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_device *kbdev;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	kbdev = kctx->kbdev;
+	mmu_mode = kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+		if (err) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
+			err = -EINVAL;
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
+			vpfn, phys, nr);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+					flags);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+/* This is a debug feature only */
+static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
+{
+	u64 *page;
+	int i;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		if (kctx->kbdev->mmu_mode->ate_is_valid(page[i]))
+			beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
+	}
+	kunmap_atomic(page);
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) {
+				mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
+			} else {
+				/*
+				 * So target_pte is a level-3 page.
+				 * As a leaf, it is safe to free it.
+				 * Unless we have live pages attached to it!
+				 */
+				mmu_check_unused(kctx, target_pgd);
+			}
+
+			beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
+			if (zap) {
+				struct page *p = phys_to_page(target_pgd);
+
+				kbase_mem_pool_free(&kctx->mem_pool, p, true);
+				kbase_process_page_usage_dec(kctx, 1);
+				kbase_atomic_sub_pages(1, &kctx->used_pages);
+				kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+			}
+		}
+	}
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	mutex_init(&kctx->mmu_lock);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	mutex_lock(&kctx->mmu_lock);
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+	mutex_unlock(&kctx->mmu_lock);
+
+	beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
+	kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
+	kbase_process_page_usage_dec(kctx, 1);
+	new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i])) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	mutex_lock(&kctx->mmu_lock);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t size;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+		}
+
+
+
+		size = kbasep_mmu_dump_level(kctx,
+				kctx->pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!size)
+			goto fail_free;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+		/* Add on the size for the config */
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4))
+			size += sizeof(config);
+
+
+		if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	if (unlikely(faulting_as->protected_mode))
+	{
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+		atomic_dec(&kbdev->faults_pending);
+		return;
+
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		unsigned long flags;
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Set the MMU into unmapped mode */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_disable(kctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		e = "ACCESS_FLAG";
+		break;
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		e = "ADDRESS_SIZE_FAULT";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		e = "MEMORY_ATTRIBUTES_FAULT";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		return "ATOMIC";
+#else
+		return "UNKNOWN";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status: 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_mmu_disable(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold hwaccess_lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the hwaccess_lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+#if KBASE_GPU_RESET_EN
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+#endif /* KBASE_GPU_RESET_EN */
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		dev_warn(kbdev->dev,
+				"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+				as->number, as->fault_addr,
+				as->fault_extra_addr);
+#else
+		dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+				as->number, as->fault_addr);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
+		WARN_ON(work_pending(&as->work_busfault));
+		queue_work(as->pf_wq, &as->work_busfault);
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
+		WARN_ON(work_pending(&as->work_pagefault));
+		queue_work(as->pf_wq, &as->work_pagefault);
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100644
index 0000000..986e959
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
new file mode 100644
index 0000000..2449c60
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MALI_KBASE_MMU_MODE_
+#define _MALI_KBASE_MMU_MODE_
+
+#include <linux/types.h>
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_context *kctx);
+	void (*get_as_setup)(struct kbase_context *kctx,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate);
+	int (*pte_is_valid)(u64 pte);
+	void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+#endif /* _MALI_KBASE_MMU_MODE_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
new file mode 100644
index 0000000..791f3ed
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -0,0 +1,200 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+/* For valid ATEs bit 1 = (level == 3) ? 1 : 0.
+ * The MMU is only ever configured by the driver so that ATEs
+ * are at level 3, so bit 1 should always be set
+ */
+#define ENTRY_IS_ATE        3ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_ACCESS_RW (1ULL << 6)     /* bits 6:7 */
+#define ENTRY_ACCESS_RO (3ULL << 6)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register.
+	 */
+	setup->memattr =
+		(AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
+			(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
+			(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_WRITE_ALLOC           <<
+			(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF   <<
+			(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_WA         <<
+			(AS_MEMATTR_INDEX_OUTER_WA * 8));
+
+	setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK;
+	setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = 0ULL;
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* Set access flags - note that AArch64 stage 1 does not support
+	 * write-only access, so we use read/write instead
+	 */
+	if (flags & KBASE_REG_GPU_WR)
+		mmu_flags |= ENTRY_ACCESS_RW;
+	else if (flags & KBASE_REG_GPU_RD)
+		mmu_flags |= ENTRY_ACCESS_RO;
+
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+			get_mmu_flags(flags) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const aarch64_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
+{
+	return &aarch64_mode;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100644
index 0000000..683cabb
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated
+	 * memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#else
+	setup->transcfg = 0;
+#endif
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#endif
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+		get_mmu_flags(flags) |
+		ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100644
index 0000000..1a44957
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_fake_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_fake_register);
+
+void kbase_platform_fake_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_fake_unregister);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100644
index 0000000..97d5434
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,205 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_vinstr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1)
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+
+	/* Resume vinstr operation */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100644
index 0000000..37fa247
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100644
index 0000000..7fb674e
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
new file mode 100644
index 0000000..c970650
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+
+#include "mali_kbase_regs_history_debugfs.h"
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+#include <linux/debugfs.h>
+
+
+static int regs_history_size_get(void *data, u64 *val)
+{
+	struct kbase_io_history *const h = data;
+
+	*val = h->size;
+
+	return 0;
+}
+
+static int regs_history_size_set(void *data, u64 val)
+{
+	struct kbase_io_history *const h = data;
+
+	return kbase_io_history_resize(h, (u16)val);
+}
+
+
+DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
+		regs_history_size_get,
+		regs_history_size_set,
+		"%llu\n");
+
+
+/**
+ * regs_history_show - show callback for the register access history file.
+ *
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to dump all recent accesses to the GPU registers.
+ *
+ * @return 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int regs_history_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_io_history *const h = sfile->private;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!h->enabled) {
+		seq_puts(sfile, "The register access history is disabled\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	iters = (h->size > h->count) ? h->count : h->size;
+	seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+out:
+	return 0;
+}
+
+
+/**
+ * regs_history_open - open operation for regs_history debugfs file
+ *
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * @return file descriptor
+ */
+static int regs_history_open(struct inode *in, struct file *file)
+{
+	return single_open(file, &regs_history_show, in->i_private);
+}
+
+
+static const struct file_operations regs_history_fops = {
+	.open = &regs_history_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history.enabled);
+	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history, &regs_history_size_fops);
+	debugfs_create_file("regs_history", S_IRUGO,
+			kbdev->mali_debugfs_directory, &kbdev->io_history,
+			&regs_history_fops);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
new file mode 100644
index 0000000..f108370
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Header file for register access history support via debugfs
+ *
+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
+ *
+ * Usage:
+ * - regs_history_enabled: whether recording of register accesses is enabled.
+ *   Write 'y' to enable, 'n' to disable.
+ * - regs_history_size: size of the register history buffer, must be > 0
+ * - regs_history: return the information about last accesses to the registers.
+ */
+
+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H
+#define _KBASE_REGS_HISTORY_DEBUGFS_H
+
+struct kbase_device;
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/**
+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history
+ *
+ * @kbdev: Pointer to kbase_device containing the register history
+ */
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_regs_history_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif  /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100644
index 0000000..84aa331
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1166 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct fragment_job {
+	struct job_descriptor_header header;
+
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_descriptor_header *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next_job._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next_job._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+	job = &frag_job->header;
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->exception_status = JOB_NOT_STARTED;
+
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
+
+	if (job->job_type == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->job_barrier = 1;
+
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
+
+	job->job_index += hw_job_id_offset;
+
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
+	} else {
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
+		else
+			fbd_address = (u64)frag_job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
+
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
+		else
+			jc = job->next_job._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list.value = NULL;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload = NULL;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+	if (KBASE_API_VERSION(10, 3) > replay_atom->kctx->api_version) {
+		base_jd_replay_payload_uk10_2 *payload_uk10_2;
+		u16 tiler_core_req;
+		u16 fragment_core_req;
+
+		payload_uk10_2 = (base_jd_replay_payload_uk10_2 *) payload;
+		memcpy(&tiler_core_req, &payload_uk10_2->tiler_core_req,
+				sizeof(tiler_core_req));
+		memcpy(&fragment_core_req, &payload_uk10_2->fragment_core_req,
+				sizeof(fragment_core_req));
+		payload->tiler_core_req = (u32)(tiler_core_req & 0x7fff);
+		payload->fragment_core_req = (u32)(fragment_core_req & 0x7fff);
+	}
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+
+		int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP;
+		int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC;
+		int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+		int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+
+		if (t_atom_type != BASE_JD_REQ_T) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x",
+			    t_atom_type, BASE_JD_REQ_T);
+		}
+		if (f_atom_type != BASE_JD_REQ_FS) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n",
+			    f_atom_type, BASE_JD_REQ_FS);
+		}
+		if (t_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n");
+		}
+		if (f_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n");
+		}
+
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom, NULL);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_descriptor_header *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+		dump_job_head(kctx, "\njob_head structure:\n", job);
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
+		else
+			job_header = job->next_job._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	/* Don't replay this atom if these issues are not present in the
+	 * hardware */
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) &&
+			!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) {
+		dev_dbg(kbdev->dev, "Hardware does not need replay workaround");
+
+		/* Signal failure to userspace */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+
+		return false;
+	}
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100644
index 0000000..43175c8
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100644
index 0000000..9bff3d2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100644
index 0000000..2997e6f
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1621 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#include <linux/syscalls.h>
+#include "mali_kbase_sync.h"
+#endif
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+
+/* Mask to check cache alignment of data structures */
+#define KBASE_CACHE_ALIGNMENT_MASK		((1<<L1_CACHE_SHIFT)-1)
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->queue, &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_del(&katom->queue);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed.
+	 */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule timeout of this atom after a period if it is not active */
+	if (!timer_pending(&kctx->soft_job_timeout)) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_job_timeout_ms);
+		mod_timer(&kctx->soft_job_timeout,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}
+}
+
+static int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*status = *mapped_evt;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	if ((new_status != BASE_JD_SOFT_EVENT_SET) &&
+	    (new_status != BASE_JD_SOFT_EVENT_RESET))
+		return -EINVAL;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*mapped_evt = new_status;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_vmap_struct map;
+	void *user_result;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] was previously in use
+		 * for 'waiting_soft_jobs'.
+		 */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* GPU_WR access is checked on the range for returning the result to
+	 * userspace for the following reasons:
+	 * - security, this is currently how imported user bufs are checked.
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
+	if (!user_result)
+		return 0;
+
+	memcpy(user_result, &data, sizeof(data));
+
+	kbase_vunmap(kctx, &map);
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#ifdef CONFIG_SYNC
+
+static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly) come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static void kbase_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter, struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+
+	kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+	if (kbase_fence_get_status(fence) < 0)
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding kctx->jctx.lock and
+	 * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work.
+	 */
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static int kbase_fence_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	KBASE_DEBUG_ASSERT(NULL != katom->kctx);
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signalled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+		INIT_WORK(&katom->work, kbase_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+	/* The timeout code will add this job to the list of waiting soft jobs.
+	 */
+	kbasep_add_waiting_with_timeout(katom);
+#else
+	kbasep_add_waiting_soft_job(katom);
+#endif
+
+	return 1;
+}
+
+static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+#endif /* CONFIG_SYNC */
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, queue);
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			if (katom->jc == evt) {
+				list_del(&katom->queue);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet.
+				 */
+				cancel_timer = 0;
+			}
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			/* Keep the timer running if fence debug is enabled and
+			 * there are waiting fence jobs.
+			 */
+			cancel_timer = 0;
+			break;
+#endif
+		}
+	}
+
+	if (cancel_timer)
+		del_timer(&kctx->soft_job_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep;
+
+		list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) {
+			if (dep->status == KBASE_JD_ATOM_STATE_UNUSED ||
+			    dep->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				continue;
+
+			if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					== BASE_JD_REQ_SOFT_FENCE_TRIGGER) {
+				struct sync_fence *fence = dep->fence;
+				int status = kbase_fence_get_status(fence);
+
+				/* Found blocked trigger fence. */
+				dev_warn(dev,
+					 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+					 kbase_jd_atom_id(kctx, dep),
+					 fence, fence->name,
+					 kbase_sync_status_string(status));
+			}
+
+			kbase_fence_debug_check_atom(dep);
+		}
+	}
+}
+
+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = katom->kctx->kbdev->dev;
+	struct sync_fence *fence = katom->fence;
+	int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms);
+	int status = kbase_fence_get_status(fence);
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+		 kctx->tgid, kctx->id,
+		 kbase_jd_atom_id(kctx, katom),
+		 fence, timeout_ms);
+	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+		 fence, fence->name,
+		 kbase_sync_status_string(status));
+
+	/* Search for blocked trigger atoms */
+	kbase_fence_debug_check_atom(katom);
+
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	/* Dump out the full state of all the Android sync fences.
+	 * The function sync_dump() isn't exported to modules, so force
+	 * sync_fence_wait() to time out to trigger sync_dump().
+	 */
+	sync_fence_wait(fence, 1);
+}
+
+struct kbase_fence_debug_work {
+	struct kbase_jd_atom *katom;
+	struct work_struct work;
+};
+
+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work)
+{
+	struct kbase_fence_debug_work *w = container_of(work,
+			struct kbase_fence_debug_work, work);
+	struct kbase_jd_atom *katom = w->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_fence_debug_wait_timeout(katom);
+	mutex_unlock(&kctx->jctx.lock);
+
+	kfree(w);
+}
+
+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_debug_work *work;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Enqueue fence debug worker. Use job_done_wq to get
+	 * debug print ordered with job completion.
+	 */
+	work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC);
+	/* Ignore allocation failure. */
+	if (work) {
+		work->katom = katom;
+		INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker);
+		queue_work(kctx->jctx.job_done_wq, &work->work);
+	}
+}
+#endif /* CONFIG_MALI_FENCE_DEBUG */
+
+void kbasep_soft_job_timeout_worker(unsigned long data)
+{
+	struct kbase_context *kctx = (struct kbase_context *)data;
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_job_timeout_ms);
+	struct timer_list *timer = &kctx->soft_job_timeout;
+	ktime_t cur_time = ktime_get();
+	bool restarting = false;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(entry,
+				struct kbase_jd_atom, queue);
+		s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time,
+					katom->start_timestamp));
+
+		if (elapsed_time < (s64)timeout_ms) {
+			restarting = true;
+			continue;
+		}
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			/* Take it out of the list to ensure that it
+			 * will be cancelled in all cases
+			 */
+			list_del(&katom->queue);
+
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			INIT_WORK(&katom->work, kbasep_soft_event_complete_job);
+			queue_work(kctx->jctx.job_done_wq, &katom->work);
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			kbase_fence_debug_timeout(katom);
+			break;
+#endif
+		}
+	}
+
+	if (restarting)
+		mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	kbasep_add_waiting_with_timeout(katom);
+
+	return 1;
+}
+
+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+/**
+ * kbase_soft_event_update() - Update soft event state
+ * @kctx: Pointer to context
+ * @event: Event to update
+ * @new_status: New status value of event
+ *
+ * Update the event, and wake up any atoms waiting for the event.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+int kbase_soft_event_update(struct kbase_context *kctx,
+			     u64 event,
+			     unsigned char new_status)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->jctx.lock);
+
+	if (kbasep_write_soft_event_status(kctx, event, new_status)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, event);
+
+out:
+	mutex_unlock(&kctx->jctx.lock);
+
+	return err;
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc;
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+		if (gpu_alloc) {
+			switch (gpu_alloc->type) {
+			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+			{
+				free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->jc = 0;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		katom->jc = 0;
+		goto out_cleanup;
+	}
+	katom->jc = (u64)(uintptr_t)buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret)
+		goto out_cleanup;
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, user_extres.ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		if (NULL == reg || NULL == reg->gpu_alloc ||
+				(reg->flags & KBASE_REG_FREE)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		buffers[i].nr_extres_pages = reg->nr_pages;
+
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages)
+				goto out_unlock;
+			ret = 0;
+			break;
+		}
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+		{
+			dev_warn(katom->kctx->kbdev->dev,
+					"UMP is not supported for debug_copy jobs\n");
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	kfree(buffers);
+	kfree(user_buffers);
+
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	return ret;
+}
+
+static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	if (!gpu_alloc) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+		KBASE_DEBUG_ASSERT(dma_buf->size ==
+				   buf_data->nr_extres_pages * PAGE_SIZE);
+
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, buf_data->nr_extres_pages*PAGE_SIZE,
+#endif
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, buf_data->nr_extres_pages*PAGE_SIZE,
+#endif
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	struct kbase_context *kctx = katom->kctx;
+	int ret;
+
+	/* Fail the job if there is no info structure */
+	if (!data) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* If the ID is zero then fail the job */
+	if (info->id == 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & 0x7) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Replace the user pointer with our kernel allocated info structure */
+	katom->jc = (u64)(uintptr_t) info;
+	katom->jit_blocked = false;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(info);
+fail:
+	katom->jc = 0;
+	return ret;
+}
+
+static u8 kbase_jit_free_get_id(struct kbase_jd_atom *katom)
+{
+	if (WARN_ON(katom->core_req != BASE_JD_REQ_SOFT_JIT_FREE))
+		return 0;
+
+	return (u8) katom->jc;
+}
+
+static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr;
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+
+	/* The JIT ID is still in use so fail the allocation */
+	if (kctx->jit_alloc[info->id]) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return 0;
+	}
+
+	/* Create a JIT allocation */
+	reg = kbase_jit_allocate(kctx, info);
+	if (!reg) {
+		struct kbase_jd_atom *jit_atom;
+		bool can_block = false;
+
+		lockdep_assert_held(&kctx->jctx.lock);
+
+		jit_atom = list_first_entry(&kctx->jit_atoms_head,
+				struct kbase_jd_atom, jit_node);
+
+		list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) {
+			if (jit_atom == katom)
+				break;
+			if (jit_atom->core_req == BASE_JD_REQ_SOFT_JIT_FREE) {
+				u8 free_id = kbase_jit_free_get_id(jit_atom);
+
+				if (free_id && kctx->jit_alloc[free_id]) {
+					/* A JIT free which is active and
+					 * submitted before this atom
+					 */
+					can_block = true;
+					break;
+				}
+			}
+		}
+
+		if (!can_block) {
+			/* Mark the allocation so we know it's in use even if
+			 * the allocation itself fails.
+			 */
+			kctx->jit_alloc[info->id] =
+				(struct kbase_va_region *) -1;
+
+			katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+			return 0;
+		}
+
+		/* There are pending frees for an active allocation
+		 * so we should wait to see whether they free the memory.
+		 * Add to the beginning of the list to ensure that the atom is
+		 * processed only once in kbase_jit_free_finish
+		 */
+		list_add(&katom->queue, &kctx->jit_pending_alloc);
+		katom->jit_blocked = true;
+
+		return 1;
+	}
+
+	/*
+	 * Write the address of the JIT allocation to the user provided
+	 * GPU allocation.
+	 */
+	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+			&mapping);
+	if (!ptr) {
+		/*
+		 * Leave the allocation "live" as the JIT free jit will be
+		 * submitted anyway.
+		 */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return 0;
+	}
+
+	*ptr = reg->start_pfn << PAGE_SHIFT;
+	kbase_vunmap(kctx, &mapping);
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	/*
+	 * Bind it to the user provided ID. Do this last so we can check for
+	 * the JIT free racing this JIT alloc job.
+	 */
+	kctx->jit_alloc[info->id] = reg;
+
+	return 0;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Remove atom from jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static int kbase_jit_free_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	return 0;
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 id = kbase_jit_free_get_id(katom);
+
+	/*
+	 * If the ID is zero or it is not in use yet then fail the job.
+	 */
+	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	/*
+	 * If the ID is valid but the allocation request failed still succeed
+	 * this soft job but don't try and free the allocation.
+	 */
+	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+		kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+	kctx->jit_alloc[id] = NULL;
+}
+
+static void kbasep_jit_free_finish_worker(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_finish_soft_job(katom);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
+{
+	struct list_head *i, *tmp;
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	/* Remove this atom from the kctx->jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	list_for_each_safe(i, tmp, &kctx->jit_pending_alloc) {
+		struct kbase_jd_atom *pending_atom = list_entry(i,
+				struct kbase_jd_atom, queue);
+		if (kbase_jit_allocate_process(pending_atom) == 0) {
+			/* Atom has completed */
+			INIT_WORK(&pending_atom->work,
+					kbasep_jit_free_finish_worker);
+			queue_work(kctx->jctx.job_done_wq, &pending_atom->work);
+		}
+	}
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	/*
+	 * Replace the user pointer with our kernel allocated
+	 * ext_res structure.
+	 */
+	katom->jc = (u64)(uintptr_t) ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (--i > 0) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		KBASE_DEBUG_ASSERT(katom->fence != NULL);
+		katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		/* Release the reference as we don't need it any more */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		return kbase_fence_wait(katom);
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		return kbasep_soft_event_wait(katom);
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_process(katom);
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_fence_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK))
+				return -EINVAL;
+		}
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_stream_create_fence(fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			katom->fence = sync_fence_fdget(fd);
+
+			if (katom->fence == NULL) {
+				/* The only way the fence can be NULL is if userspace closed it for us.
+				 * So we don't need to clear it up */
+				return -EINVAL;
+			}
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				katom->fence = NULL;
+				sys_close(fd);
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			katom->fence = sync_fence_fdget(fence.basep.fd);
+			if (katom->fence == NULL)
+				return -EINVAL;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			/*
+			 * Set KCTX_NO_IMPLICIT_FENCE in the context the first
+			 * time a soft fence wait job is observed. This will
+			 * prevent the implicit dma-buf fence to conflict with
+			 * the Android native sync fences.
+			 */
+			if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC))
+				kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC);
+#endif /* CONFIG_MALI_DMA_FENCE */
+		}
+		break;
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_REPLAY:
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		return kbase_jit_free_prepare(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signalled, do it now */
+		if (katom->fence) {
+			kbase_fence_trigger(katom, katom->event_code ==
+					BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+			sync_fence_put(katom->fence);
+			katom->fence = NULL;
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release the reference to the fence object */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+#endif				/* CONFIG_SYNC */
+
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		kbasep_remove_waiting_soft_job(katom_iter);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		} else {
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
new file mode 100644
index 0000000..c98762c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
@@ -0,0 +1,23 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include "mali_kbase_strings.h"
+
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
+const char kbase_drv_name[] = KBASE_DRV_NAME;
+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
new file mode 100644
index 0000000..41b8fdb
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
@@ -0,0 +1,19 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+extern const char kbase_drv_name[];
+extern const char kbase_timeline_name[];
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
new file mode 100644
index 0000000..97e301d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.c
@@ -0,0 +1,192 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_SYNC
+
+#include <linux/seq_file.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signalled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signalled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signalled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signalled, 0);
+
+	return tl;
+}
+
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int signalled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signalled = atomic_read(&mtl->signalled);
+
+		diff = signalled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signalling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled);
+}
+
+const char *kbase_sync_status_string(int status)
+{
+	if (status == 0)
+		return "signaled";
+	else if (status > 0)
+		return "active";
+	else
+		return "error";
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100644
index 0000000..07e5158
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,108 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include "sync.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatiblility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+static inline int kbase_fence_get_status(struct sync_fence *fence)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	return fence->status;
+#else
+	return atomic_read(&fence->status);
+#endif
+}
+
+/*
+ * Create a stream object.
+ * Built on top of timeline object.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ */
+int kbase_stream_create(const char *name, int *const out_fd);
+
+/*
+ * Create a fence in a stream object
+ */
+int kbase_stream_create_fence(int tl_fd);
+
+/*
+ * Validate a fd to be a valid fence
+ * No reference is taken.
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ */
+int kbase_fence_validate(int fd);
+
+/* Returns true if the specified timeline is allocated by Mali */
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline);
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name);
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ */
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent);
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ *
+ * If they are signalled in the wrong order then a message will be printed in debug
+ * builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as success.
+ */
+void kbase_sync_signal_pt(struct sync_pt *pt, int result);
+
+/**
+ * kbase_sync_status_string() - Get string matching @status
+ * @status: Value of fence status.
+ *
+ * Return: Pointer to string describing @status.
+ */
+const char *kbase_sync_status_string(int status);
+
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
new file mode 100644
index 0000000..b9baa91
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync_user.c
+ *
+ */
+
+#ifdef CONFIG_SYNC
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <mali_kbase_sync.h>
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	BUG_ON(!tl);
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	BUG_ON(!out_fd);
+
+	tl = kbase_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int kbase_stream_create_fence(int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+ out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100644
index 0000000..408b9d9
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,2473 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_NDEP_ATOM_ATOM,
+	KBASE_TL_RDEP_ATOM_ATOM,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY,
+	KBASE_TL_ATTRIB_ATOM_STATE,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+	KBASE_TL_EVENT_LPU_SOFTSTOP,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+
+	/* Job dump specific events. */
+	KBASE_JD_GPU_SOFT_RESET
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC,
+	KBASE_AUX_DEVFREQ_TARGET,
+	KBASE_AUX_PROTECTED_ENTER_START,
+	KBASE_AUX_PROTECTED_ENTER_END,
+	KBASE_AUX_PROTECTED_LEAVE_START,
+	KBASE_AUX_PROTECTED_LEAVE_END
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: counter tracking stream's autoflush state
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pII",
+		"ctx,ctx_nr,tgid"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_NEW_AS,
+		__stringify(KBASE_TL_NEW_AS),
+		"address space object is created",
+		"@pI",
+		"address_space,as_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"ctx"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_LIFELINK_AS_GPU,
+		__stringify(KBASE_TL_LIFELINK_AS_GPU),
+		"address space is deleted with gpu",
+		"@pp",
+		"address_space,gpu"
+	},
+	{
+		KBASE_TL_RET_CTX_LPU,
+		__stringify(KBASE_TL_RET_CTX_LPU),
+		"context is retained by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pps",
+		"atom,lpu,attrib_match_list"
+	},
+	{
+		KBASE_TL_NRET_CTX_LPU,
+		__stringify(KBASE_TL_NRET_CTX_LPU),
+		"context is released by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_RET_AS_CTX,
+		__stringify(KBASE_TL_RET_AS_CTX),
+		"address space is retained by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_NRET_AS_CTX,
+		__stringify(KBASE_TL_NRET_AS_CTX),
+		"address space is released by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_AS,
+		__stringify(KBASE_TL_RET_ATOM_AS),
+		"atom is retained by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_NRET_ATOM_AS,
+		__stringify(KBASE_TL_NRET_ATOM_AS),
+		"atom is released by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_DEP_ATOM_ATOM,
+		__stringify(KBASE_TL_DEP_ATOM_ATOM),
+		"atom2 depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_NDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
+		"atom2 no longer depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_RDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
+		"resolved dependecy of atom2 depending on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
+		"atom job slot attributes",
+		"@pLLI",
+		"atom,descriptor,affinity,config"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY),
+		"atom priority",
+		"@pI",
+		"atom,prio"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_STATE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_STATE),
+		"atom state",
+		"@pI",
+		"atom,state"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE),
+		"atom caused priority change",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_ATTRIB_AS_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
+		"address space attributes",
+		"@pLLL",
+		"address_space,transtab,memattr,transcfg"
+	},
+	{
+		KBASE_TL_EVENT_LPU_SOFTSTOP,
+		__stringify(KBASE_TL_EVENT_LPU_SOFTSTOP),
+		"softstop event on given lpu",
+		"@p",
+		"lpu"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX),
+		"atom softstopped",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+		__stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE),
+		"atom softstop issued",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_JD_GPU_SOFT_RESET,
+		__stringify(KBASE_JD_GPU_SOFT_RESET),
+		"gpu soft reset",
+		"@p",
+		"gpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@IL",
+		"ctx_nr,page_cnt_change"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@IL",
+		"ctx_nr,page_cnt"
+	},
+	{
+		KBASE_AUX_DEVFREQ_TARGET,
+		__stringify(KBASE_AUX_DEVFREQ_TARGET),
+		"New device frequency target",
+		"@L",
+		"target_freq"
+	},
+	{
+		KBASE_AUX_PROTECTED_ENTER_START,
+		__stringify(KBASE_AUX_PROTECTED_ENTER_START),
+		"enter protected mode start",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_ENTER_END,
+		__stringify(KBASE_AUX_PROTECTED_ENTER_END),
+		"enter protected mode end",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_LEAVE_START,
+		__stringify(KBASE_AUX_PROTECTED_LEAVE_START),
+		"leave protected mode start",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_LEAVE_END,
+		__stringify(KBASE_AUX_PROTECTED_LEAVE_END),
+		"leave protected mode end",
+		"@p",
+		"gpu"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags) __acquires(&stream->lock)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags) __releases(&stream->lock)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @data:  unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(data);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (wb_size > min_size) {
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(f_pos);
+
+	if (!buffer)
+		return -EINVAL;
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw = 0;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the rbi still points to the packet we just processed
+		 * then there was no overflow so we add the copied size to
+		 * copy_len and move rbi on to the next packet
+		 */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = SWTRACE_VERSION; /* protocol version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 0);
+	setup_timer(&autoflush_timer,
+			kbasep_tlstream_autoflush_timer_callback,
+			0);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags)
+{
+	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
+
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) {
+		int rcode;
+
+		*fd = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd) {
+			atomic_set(&kbase_tlstream_enabled, 0);
+			return *fd;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+		/* If job dumping is enabled, readjust the software event's
+		 * timeout as the default value of 3 seconds is often
+		 * insufficient. */
+		if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) {
+			dev_info(kctx->kbdev->dev,
+					"Job dumping is enabled, readjusting the software event's timeout\n");
+			atomic_set(&kctx->kbdev->js_data.soft_job_timeout_ms,
+					1800000);
+		}
+
+	} else {
+		*fd = -EBUSY;
+	}
+
+	return 0;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+			sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+			strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t  msg_size =
+			sizeof(msg_id) + sizeof(u64) +
+			sizeof(atom) + sizeof(lpu) + msg_s0;
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_string(
+			buffer, pos, attrib_match_list, msg_s0);
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+		sizeof(jd) + sizeof(affinity) + sizeof(config);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &jd, sizeof(jd));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &affinity, sizeof(affinity));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &config, sizeof(config));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &prio, sizeof(prio));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
+		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transtab, sizeof(transtab));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &memattr, sizeof(memattr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transcfg, sizeof(transcfg));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu)
+{
+	const u32     msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+{
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq)
+{
+	const u32       msg_id = KBASE_AUX_DEVFREQ_TARGET;
+	const size_t    msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(target_freq);
+	unsigned long   flags;
+	char            *buffer;
+	size_t          pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &target_freq, sizeof(target_freq));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_protected_enter_start(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+void __kbase_tlstream_aux_protected_enter_end(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_protected_leave_start(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+void __kbase_tlstream_aux_protected_leave_end(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100644
index 0000000..7349ab2
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,607 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx:  kernel common context
+ * @fd:    timeline stream file descriptor
+ * @flags: timeline stream flags
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) argument fd will contain negative value.
+ *
+ * Return: zero on success (this does not necessarily mean that stream
+ *         descriptor could be returned), negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+#define TL_ATOM_STATE_IDLE 0
+#define TL_ATOM_STATE_READY 1
+#define TL_ATOM_STATE_DONE 2
+#define TL_ATOM_STATE_POSTED 3
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio);
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state);
+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq);
+void __kbase_tlstream_aux_protected_enter_start(void *gpu);
+void __kbase_tlstream_aux_protected_enter_end(void *gpu);
+void __kbase_tlstream_aux_protected_leave_start(void *gpu);
+void __kbase_tlstream_aux_protected_leave_end(void *gpu);
+
+#define TLSTREAM_ENABLED (1 << 31)
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & TLSTREAM_ENABLED)                     \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...)                     \
+	do {                                                            \
+		int enabled = atomic_read(&kbase_tlstream_enabled);     \
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \
+			__kbase_tlstream_##trace_name(__VA_ARGS__);     \
+	} while (0)
+
+/*****************************************************************************/
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary
+ * @as: name of the address space object
+ * @nr: sequential number assigned to this address space
+ *
+ * Function emits a timeline message informing about address space creation.
+ * Address space is created with one attribute: number identifying this
+ * address space.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU
+ * @as:  name of the address space object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that address space object
+ * shall be deleted along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_CTX(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
+ */
+#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being held by the context object.
+ */
+#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being released by atom.
+ */
+#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by address space and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by address space.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_DEP_ATOM_ATOM - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM - dependency between atoms resolved
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM - information about already resolved dependency between atoms
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes
+ * @atom:     name of the atom object
+ * @jd:       job descriptor address
+ * @affinity: job affinity
+ * @config:   job config
+ *
+ * Function emits a timeline message containing atom attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority
+ * @atom: name of the atom object
+ * @prio: atom priority
+ *
+ * Function emits a timeline message containing atom priority.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state
+ * @atom:  name of the atom object
+ * @state: atom state
+ *
+ * Function emits a timeline message containing atom state.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE - atom caused priority change
+ * @atom:  name of the atom object
+ *
+ * Function emits a timeline message signalling priority change
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(atom) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority_change, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes
+ * @as:       assigned address space
+ * @transtab: configuration of the TRANSTAB register
+ * @memattr:  configuration of the MEMATTR register
+ * @transcfg: configuration of the TRANSCFG register (or zero if not present)
+ *
+ * Function emits a timeline message containing address space attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ex
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_LPU_softstop
+ * @lpu:        name of the LPU object
+ */
+#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \
+	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_issue
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
+
+/**
+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset
+ * @gpu:        name of the GPU object
+ *
+ * This imperative tracepoint is specific to job dumping.
+ * Function emits a timeline message indicating GPU soft reset.
+ */
+#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
+
+
+/**
+ * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
+ */
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated
+ *                                 pages is changed
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
+ */
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
+
+/**
+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS
+ *                                     frequency
+ * @target_freq: new target frequency
+ */
+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \
+	__TRACE_IF_ENABLED(aux_devfreq_target, target_freq)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - The GPU has started transitioning
+ *                                            to protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU is starting to
+ * transition to protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - The GPU has finished transitioning
+ *                                          to protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU has finished
+ * transitioning to protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - The GPU has started transitioning
+ *                                            to non-protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU is starting to
+ * transition to non-protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - The GPU has finished transitioning
+ *                                          to non-protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU has finished
+ * transitioning to non-protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu)
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100644
index 0000000..e2e0544
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100644
index 0000000..5830e87
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,236 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbase_backend_inspect_head(kbdev, js);
+			WARN_ON(!next_katom);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = true;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the end of the transition */
+	if (kbdev->timeline.l2_transitioning) {
+		kbdev->timeline.l2_transitioning = false;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100644
index 0000000..619072f
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,363 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_trace_timeline_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
+				(int)kctx->timeline.owner_tgid,              \
+				count);                                      \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
+				CTX_FLOW_ATOM_READY,                         \
+				(int)kctx->timeline.owner_tgid,              \
+				atom_id);                                    \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_ACTIVE,                      \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_NEXT,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_HEAD,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_STOPPING,                    \
+				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+				js, count);                                  \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_ACTIVE, active);            \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_TILER_ACTIVE,               \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_L2_ACTIVE,                  \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_POWERING,               \
+				1);                                          \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
+				1);                                          \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_SEND_EVENT,                       \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_HANDLE_EVENT,                     \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _consumerof_atom_number);                \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _producerof_atom_number_completed);      \
+	} while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
+				trace_code, 1);                              \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
+				count);                                      \
+	} while (0)
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+		enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100644
index 0000000..156a95a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
+"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain start (SW approximated)", "%d,%d,%d",
+"_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain stop (SW approximated)",  "%d,%d,%d",
+"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
new file mode 100644
index 0000000..baa9296
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
@@ -0,0 +1,542 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UKU_H_
+#define _KBASE_UKU_H_
+
+#include "mali_uk.h"
+#include "mali_base_kernel.h"
+
+/* This file needs to support being included from kernel and userside (which use different defines) */
+#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON
+#define SUPPORT_MALI_ERROR_INJECT
+#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */
+#if defined(CONFIG_MALI_NO_MALI)
+#define SUPPORT_MALI_NO_MALI
+#elif defined(MALI_NO_MALI)
+#if MALI_NO_MALI
+#define SUPPORT_MALI_NO_MALI
+#endif
+#endif
+
+#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#include "mali_kbase_gpuprops_types.h"
+
+/*
+ * 10.1:
+ * - Do mmap in kernel for SAME_VA memory allocations rather then
+ *   calling back into the kernel as a 2nd stage of the allocation request.
+ *
+ * 10.2:
+ * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA
+ *   region for use with JIT (ignored on 32-bit platforms)
+ *
+ * 10.3:
+ * - base_jd_core_req typedef-ed to u32 (instead of to u16)
+ * - two flags added: BASE_JD_REQ_SKIP_CACHE_STAT / _END
+ *
+ * 10.4:
+ * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests
+ *
+ * 10.5:
+ * - Reverted to performing mmap in user space so that tools like valgrind work.
+ *
+ * 10.6:
+ * - Add flags input variable to KBASE_FUNC_TLSTREAM_ACQUIRE
+ */
+#define BASE_UK_VERSION_MAJOR 10
+#define BASE_UK_VERSION_MINOR 6
+
+struct kbase_uk_mem_alloc {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	/* IN/OUT */
+	u64 flags;
+	/* OUT */
+	u64 gpu_va;
+	u16 va_alignment;
+	u8  padding[6];
+};
+
+struct kbase_uk_mem_free {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	/* OUT */
+};
+
+struct kbase_uk_mem_alias {
+	union uk_header header;
+	/* IN/OUT */
+	u64 flags;
+	/* IN */
+	u64 stride;
+	u64 nents;
+	union kbase_pointer ai;
+	/* OUT */
+	u64         gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_import {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer phandle;
+	u32 type;
+	u32 padding;
+	/* IN/OUT */
+	u64         flags;
+	/* OUT */
+	u64 gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_flags_change {
+	union uk_header header;
+	/* IN */
+	u64 gpu_va;
+	u64 flags;
+	u64 mask;
+};
+
+struct kbase_uk_job_submit {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer addr;
+	u32 nr_atoms;
+	u32 stride;		/* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */
+	/* OUT */
+};
+
+struct kbase_uk_post_term {
+	union uk_header header;
+};
+
+struct kbase_uk_sync_now {
+	union uk_header header;
+
+	/* IN */
+	struct base_syncset sset;
+
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_setup {
+	union uk_header header;
+
+	/* IN */
+	u64 dump_buffer;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 unused_1; /* keep for backwards compatibility */
+	u32 mmu_l2_bm;
+	u32 padding;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_hwcnt_reader_setup - User/Kernel space data exchange structure
+ * @header:       UK structure header
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ * @fd:           dumping notification file descriptor
+ *
+ * This structure sets up HWC dumper/reader for this context.
+ * Multiple instances can be created for single context.
+ */
+struct kbase_uk_hwcnt_reader_setup {
+	union uk_header header;
+
+	/* IN */
+	u32 buffer_count;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+
+	/* OUT */
+	s32 fd;
+};
+
+struct kbase_uk_hwcnt_dump {
+	union uk_header header;
+};
+
+struct kbase_uk_hwcnt_clear {
+	union uk_header header;
+};
+
+struct kbase_uk_fence_validate {
+	union uk_header header;
+	/* IN */
+	s32 fd;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_stream_create {
+	union uk_header header;
+	/* IN */
+	char name[32];
+	/* OUT */
+	s32 fd;
+	u32 padding;
+};
+
+struct kbase_uk_gpuprops {
+	union uk_header header;
+
+	/* IN */
+	struct mali_base_gpu_props props;
+	/* OUT */
+};
+
+struct kbase_uk_mem_query {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+#define KBASE_MEM_QUERY_COMMIT_SIZE  1
+#define KBASE_MEM_QUERY_VA_SIZE      2
+#define KBASE_MEM_QUERY_FLAGS        3
+	u64         query;
+	/* OUT */
+	u64         value;
+};
+
+struct kbase_uk_mem_commit {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64         pages;
+	/* OUT */
+	u32 result_subcode;
+	u32 padding;
+};
+
+struct kbase_uk_find_cpu_offset {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64 cpu_addr;
+	u64 size;
+	/* OUT */
+	u64 offset;
+};
+
+#define KBASE_GET_VERSION_BUFFER_SIZE 64
+struct kbase_uk_get_ddk_version {
+	union uk_header header;
+	/* OUT */
+	char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE];
+	u32 version_string_size;
+	u32 padding;
+};
+
+struct kbase_uk_disjoint_query {
+	union uk_header header;
+	/* OUT */
+	u32 counter;
+	u32 padding;
+};
+
+struct kbase_uk_set_flags {
+	union uk_header header;
+	/* IN */
+	u32 create_flags;
+	u32 padding;
+};
+
+#if MALI_UNIT_TEST
+#define TEST_ADDR_COUNT 4
+#define KBASE_TEST_BUFFER_SIZE 128
+struct kbase_exported_test_data {
+	u64 test_addr[TEST_ADDR_COUNT];		/**< memory address */
+	u32 test_addr_pages[TEST_ADDR_COUNT];		/**<  memory size in pages */
+	union kbase_pointer kctx;				/**<  base context created by process */
+	union kbase_pointer mm;				/**< pointer to process address space */
+	u8 buffer1[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+	u8 buffer2[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+};
+
+struct kbase_uk_set_test_data {
+	union uk_header header;
+	/* IN */
+	struct kbase_exported_test_data test_data;
+};
+
+#endif				/* MALI_UNIT_TEST */
+
+#ifdef SUPPORT_MALI_ERROR_INJECT
+struct kbase_uk_error_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_error_params params;
+};
+#endif				/* SUPPORT_MALI_ERROR_INJECT */
+
+#ifdef SUPPORT_MALI_NO_MALI
+struct kbase_uk_model_control_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_model_control_params params;
+};
+#endif				/* SUPPORT_MALI_NO_MALI */
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+struct kbase_uk_keep_gpu_powered {
+	union uk_header header;
+	u32       enabled;
+	u32       padding;
+};
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+struct kbase_uk_profiling_controls {
+	union uk_header header;
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+};
+
+struct kbase_uk_debugfs_mem_profile_add {
+	union uk_header header;
+	u32 len;
+	u32 padding;
+	union kbase_pointer buf;
+};
+
+struct kbase_uk_context_id {
+	union uk_header header;
+	/* OUT */
+	int id;
+};
+
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header: UK structure header
+ * @flags:  timeline stream flags
+ * @fd:     timeline stream file descriptor
+ *
+ * This structure is used when performing a call to acquire kernel side timeline
+ * stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire {
+	union uk_header header;
+	/* IN */
+	u32 flags;
+	/* OUT */
+	s32  fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_acquire_v10_4 - User/Kernel space data exchange
+ *                                          structure
+ * @header: UK structure header
+ * @fd:     timeline stream file descriptor
+ *
+ * This structure is used when performing a call to acquire kernel side timeline
+ * stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire_v10_4 {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+	s32  fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure
+ * @header: UK structure header
+ *
+ * This structure is used when performing a call to flush kernel side
+ * timeline streams.
+ */
+struct kbase_uk_tlstream_flush {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+};
+
+#if MALI_UNIT_TEST
+/**
+ * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure
+ * @header:    UK structure header
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This structure is used when performing a call to start timeline stream test
+ * embedded in kernel.
+ */
+struct kbase_uk_tlstream_test {
+	union uk_header header;
+	/* IN */
+	u32 tpw_count;
+	u32 msg_delay;
+	u32 msg_count;
+	u32 aux_msg;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure
+ * @header:          UK structure header
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ *
+ * This structure is used when performing a call to obtain timeline stream
+ * statistics.
+ */
+struct kbase_uk_tlstream_stats {
+	union uk_header header; /**< UK structure header. */
+	/* IN */
+	/* OUT */
+	u32 bytes_collected;
+	u32 bytes_generated;
+};
+#endif /* MALI_UNIT_TEST */
+
+/**
+ * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl
+ * @header:          UK structure header
+ * @data:            Counter samples for the dummy model
+ * @size:............Size of the counter sample data
+ */
+struct kbase_uk_prfcnt_values {
+	union uk_header header;
+	/* IN */
+	u32 *data;
+	u32 size;
+};
+
+/**
+ * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @evt:        the GPU address containing the event
+ * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or
+ *              BASE_JD_SOFT_EVENT_RESET
+ * @flags:      reserved for future uses, must be set to 0
+ *
+ * This structure is used to update the status of a software event. If the
+ * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting
+ * on this event will complete.
+ */
+struct kbase_uk_soft_event_update {
+	union uk_header header;
+	/* IN */
+	u64 evt;
+	u32 new_status;
+	u32 flags;
+};
+
+/**
+ * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @va_pages:   Number of virtual pages required for JIT
+ *
+ * This structure is used when requesting initialization of JIT.
+ */
+struct kbase_uk_mem_jit_init {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+};
+
+enum kbase_uk_function_id {
+	KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
+	KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1),
+	KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2),
+	KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3),
+	KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4),
+	KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5),
+	KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6),
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7),
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+	KBASE_FUNC_SYNC  = (UK_FUNC_ID + 8),
+
+	KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9),
+
+	KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10),
+	KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11),
+	KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12),
+
+	KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14),
+
+	KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15),
+
+	KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16),
+	KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18),
+
+	KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19),
+	KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20),
+	KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21),
+
+#ifdef BASE_LEGACY_UK8_SUPPORT
+	KBASE_FUNC_KEEP_GPU_POWERED = (UK_FUNC_ID + 22),
+#endif /* BASE_LEGACY_UK8_SUPPORT */
+
+	KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23),
+	KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24),
+	KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25),
+	KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26),
+					    /* to be used only for testing
+					    * purposes, otherwise these controls
+					    * are set through gator API */
+
+	KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27),
+	KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28),
+	KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29),
+
+	KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31),
+
+	KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4 = (UK_FUNC_ID + 32),
+#if MALI_UNIT_TEST
+	KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
+	KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
+#endif /* MALI_UNIT_TEST */
+	KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35),
+
+	KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36),
+
+#ifdef SUPPORT_MALI_NO_MALI
+	KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37),
+#endif
+
+	KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38),
+
+	KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39),
+
+	KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 40),
+
+	KBASE_FUNC_MAX
+};
+
+#endif				/* _KBASE_UKU_H_ */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100644
index 0000000..be474ff
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100644
index 0000000..fd7252d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100644
index 0000000..7f7efc8
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,2039 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/hrtimer.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API        1
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC            1000000000ull /* ns */
+
+/* The time resolution of dumping service. */
+#define DUMPING_RESOLUTION      500000ull /* ns */
+
+/* The maximal supported number of dumping buffers. */
+#define MAX_BUFFER_COUNT        32
+
+/* Size and number of hw counters blocks. */
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+/*****************************************************************************/
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+enum vinstr_state {
+	VINSTR_IDLE,
+	VINSTR_DUMPING,
+	VINSTR_SUSPENDING,
+	VINSTR_SUSPENDED,
+	VINSTR_RESUMING
+};
+
+/**
+ * struct kbase_vinstr_context - vinstr context per device
+ * @lock:              protects the entire vinstr context
+ * @kbdev:             pointer to kbase device
+ * @kctx:              pointer to kbase context
+ * @vmap:              vinstr vmap for mapping hwcnt dump buffer
+ * @gpu_va:            GPU hwcnt dump buffer address
+ * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
+ * @dump_size:         size of the dump buffer in bytes
+ * @bitmap:            current set of counters monitored, not always in sync
+ *                     with hardware
+ * @reprogram:         when true, reprogram hwcnt block with the new set of
+ *                     counters
+ * @state:             vinstr state
+ * @state_lock:        protects information about vinstr state
+ * @suspend_waitq:     notification queue to trigger state re-validation
+ * @suspend_cnt:       reference counter of vinstr's suspend state
+ * @suspend_work:      worker to execute on entering suspended state
+ * @resume_work:       worker to execute on leaving suspended state
+ * @nclients:          number of attached clients, pending or otherwise
+ * @waiting_clients:   head of list of clients being periodically sampled
+ * @idle_clients:      head of list of clients being idle
+ * @suspended_clients: head of list of clients being suspended
+ * @thread:            periodic sampling thread
+ * @waitq:             notification queue of sampling thread
+ * @request_pending:   request for action for sampling thread
+ */
+struct kbase_vinstr_context {
+	struct mutex             lock;
+	struct kbase_device      *kbdev;
+	struct kbase_context     *kctx;
+
+	struct kbase_vmap_struct vmap;
+	u64                      gpu_va;
+	void                     *cpu_va;
+	size_t                   dump_size;
+	u32                      bitmap[4];
+	bool                     reprogram;
+
+	enum vinstr_state        state;
+	struct spinlock          state_lock;
+	wait_queue_head_t        suspend_waitq;
+	unsigned int             suspend_cnt;
+	struct work_struct       suspend_work;
+	struct work_struct       resume_work;
+
+	u32                      nclients;
+	struct list_head         waiting_clients;
+	struct list_head         idle_clients;
+	struct list_head         suspended_clients;
+
+	struct task_struct       *thread;
+	wait_queue_head_t        waitq;
+	atomic_t                 request_pending;
+};
+
+/**
+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
+ * @vinstr_ctx:    vinstr context client is attached to
+ * @list:          node used to attach this client to list in vinstr context
+ * @buffer_count:  number of buffers this client is using
+ * @event_mask:    events this client reacts to
+ * @dump_size:     size of one dump buffer in bytes
+ * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
+ * @accum_buffer:  temporary accumulation buffer for preserving counters
+ * @dump_time:     next time this clients shall request hwcnt dump
+ * @dump_interval: interval between periodic hwcnt dumps
+ * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
+ * @dump_buffers_meta: metadata of dump buffers
+ * @meta_idx:      index of metadata being accessed by userspace
+ * @read_idx:      index of buffer read by userspace
+ * @write_idx:     index of buffer being written by dumping service
+ * @waitq:         client's notification queue
+ * @pending:       when true, client has attached but hwcnt not yet updated
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context        *vinstr_ctx;
+	struct list_head                   list;
+	unsigned int                       buffer_count;
+	u32                                event_mask;
+	size_t                             dump_size;
+	u32                                bitmap[4];
+	void __user                        *legacy_buffer;
+	void                               *kernel_buffer;
+	void                               *accum_buffer;
+	u64                                dump_time;
+	u32                                dump_interval;
+	char                               *dump_buffers;
+	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
+	atomic_t                           meta_idx;
+	atomic_t                           read_idx;
+	atomic_t                           write_idx;
+	wait_queue_head_t                  waitq;
+	bool                               pending;
+};
+
+/**
+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
+ * @hrtimer:    high resolution timer
+ * @vinstr_ctx: vinstr context
+ */
+struct kbasep_vinstr_wake_up_timer {
+	struct hrtimer              hrtimer;
+	struct kbase_vinstr_context *vinstr_ctx;
+};
+
+/*****************************************************************************/
+
+static int kbasep_vinstr_service_task(void *data);
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+		struct file *filp,
+		poll_table  *wait);
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+		struct file   *filp,
+		unsigned int  cmd,
+		unsigned long arg);
+static int kbasep_vinstr_hwcnt_reader_mmap(
+		struct file           *filp,
+		struct vm_area_struct *vma);
+static int kbasep_vinstr_hwcnt_reader_release(
+		struct inode *inode,
+		struct file  *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations vinstr_client_fops = {
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/*****************************************************************************/
+
+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_uk_hwcnt_setup setup;
+	int err;
+
+	setup.dump_buffer = vinstr_ctx->gpu_va;
+	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	setup.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+
+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err) {
+		dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
+				kctx);
+		return;
+	}
+
+	/* Release the context. This had its own Power Manager Active reference. */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference. */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
+}
+
+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	disable_hwcnt(vinstr_ctx);
+	return enable_hwcnt(vinstr_ctx);
+}
+
+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
+}
+
+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
+{
+	size_t dump_size;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else
+#endif /* CONFIG_MALI_NO_MALI */
+	{
+		/* assume v5 for now */
+		base_gpu_props *props = &kbdev->gpu_props.props;
+		u32 nr_l2 = props->l2_props.num_l2_slices;
+		u64 core_mask = props->coherency_info.group[0].core_mask;
+		u32 nr_blocks = fls64(core_mask);
+
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_blocks) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
+
+static int kbasep_vinstr_map_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	u64 flags, nr_pages;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	nr_pages = PFN_UP(vinstr_ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&vinstr_ctx->gpu_va);
+	if (!reg)
+		return -ENOMEM;
+
+	vinstr_ctx->cpu_va = kbase_vmap(
+			kctx,
+			vinstr_ctx->gpu_va,
+			vinstr_ctx->dump_size,
+			&vinstr_ctx->vmap);
+	if (!vinstr_ctx->cpu_va) {
+		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbasep_vinstr_unmap_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+
+	kbase_vunmap(kctx, &vinstr_ctx->vmap);
+	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+}
+
+/**
+ * kbasep_vinstr_create_kctx - create kernel context for vinstr
+ * @vinstr_ctx: vinstr context
+ * Return: zero on success
+ */
+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	unsigned long flags;
+	bool enable_backend = false;
+	int err;
+
+	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
+	if (!vinstr_ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
+	if (err) {
+		kbase_destroy_context(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		KBASE_TLSTREAM_TL_NEW_CTX(
+				vinstr_ctx->kctx,
+				(u32)(vinstr_ctx->kctx->id),
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	/* Don't enable hardware counters if vinstr is suspended.
+	 * Note that vinstr resume code is run under vinstr context lock,
+	 * lower layer will be enabled as needed on resume. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE == vinstr_ctx->state)
+		enable_backend = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (enable_backend)
+		err = enable_hwcnt(vinstr_ctx);
+
+	if (err) {
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return err;
+	}
+
+	vinstr_ctx->thread = kthread_run(
+			kbasep_vinstr_service_task,
+			vinstr_ctx,
+			"mali_vinstr_service");
+	if (!vinstr_ctx->thread) {
+		disable_hwcnt(vinstr_ctx);
+		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+		vinstr_ctx->kctx = NULL;
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+
+	/* Release hw counters dumping resources. */
+	vinstr_ctx->thread = NULL;
+	disable_hwcnt(vinstr_ctx);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Inform timeline client about context destruction. */
+	KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+
+	vinstr_ctx->kctx = NULL;
+}
+
+/**
+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
+ *
+ * Return: vinstr opaque client handle or NULL on failure
+ */
+static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
+		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
+		u32 bitmap[4], void *argp, void *kernel_buffer)
+{
+	struct task_struct         *thread = NULL;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	if (buffer_count > MAX_BUFFER_COUNT
+	    || (buffer_count & (buffer_count - 1)))
+		return NULL;
+
+	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->vinstr_ctx   = vinstr_ctx;
+	cli->buffer_count = buffer_count;
+	cli->event_mask   =
+		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
+		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
+	cli->pending      = true;
+
+	hwcnt_bitmap_set(cli->bitmap, bitmap);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
+	vinstr_ctx->reprogram = true;
+
+	/* If this is the first client, create the vinstr kbase
+	 * context. This context is permanently resident until the
+	 * last client exits. */
+	if (!vinstr_ctx->nclients) {
+		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
+		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
+			goto error;
+
+		vinstr_ctx->reprogram = false;
+		cli->pending = false;
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it. We need a per client kernel buffer for accumulating
+	 * the counters. */
+	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	if (!cli->accum_buffer)
+		goto error;
+
+	/* Prepare buffers. */
+	if (cli->buffer_count) {
+		int *fd = (int *)argp;
+		size_t tmp;
+
+		/* Allocate area for buffers metadata storage. */
+		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
+			cli->buffer_count;
+		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
+		if (!cli->dump_buffers_meta)
+			goto error;
+
+		/* Allocate required number of dumping buffers. */
+		cli->dump_buffers = (char *)__get_free_pages(
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(cli->dump_size * cli->buffer_count));
+		if (!cli->dump_buffers)
+			goto error;
+
+		/* Create descriptor for user-kernel data exchange. */
+		*fd = anon_inode_getfd(
+				"[mali_vinstr_desc]",
+				&vinstr_client_fops,
+				cli,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd)
+			goto error;
+	} else if (kernel_buffer) {
+		cli->kernel_buffer = kernel_buffer;
+	} else {
+		cli->legacy_buffer = (void __user *)argp;
+	}
+
+	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->meta_idx, 0);
+	atomic_set(&cli->write_idx, 0);
+	init_waitqueue_head(&cli->waitq);
+
+	vinstr_ctx->nclients++;
+	list_add(&cli->list, &vinstr_ctx->idle_clients);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return cli;
+
+error:
+	kfree(cli->dump_buffers_meta);
+	if (cli->dump_buffers)
+		free_pages(
+				(unsigned long)cli->dump_buffers,
+				get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	if (!vinstr_ctx->nclients && vinstr_ctx->kctx) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+	kfree(cli);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+
+	return NULL;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	struct kbase_vinstr_client  *iter, *tmp;
+	struct task_struct          *thread = NULL;
+	u32 zerobitmap[4] = { 0 };
+	int cli_found = 0;
+
+	KBASE_DEBUG_ASSERT(cli);
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
+		if (iter == cli) {
+			vinstr_ctx->reprogram = true;
+			cli_found = 1;
+			list_del(&iter->list);
+			break;
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->waiting_clients, list) {
+			if (iter == cli) {
+				vinstr_ctx->reprogram = true;
+				cli_found = 1;
+				list_del(&iter->list);
+				break;
+			}
+		}
+	}
+	KBASE_DEBUG_ASSERT(cli_found);
+
+	kfree(cli->dump_buffers_meta);
+	free_pages(
+			(unsigned long)cli->dump_buffers,
+			get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	kfree(cli);
+
+	vinstr_ctx->nclients--;
+	if (!vinstr_ctx->nclients) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
+	u32 i, nr_l2;
+	u64 core_mask;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (0ull != core_mask) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		if (0ull != (core_mask & 1ull)) {
+			/* if block is not reserved update header */
+			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+			*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		}
+		dst += block_size;
+		src += block_size;
+
+		core_mask >>= 1;
+	}
+}
+
+/**
+ * accum_clients - accumulate dumped hw counters for all known clients
+ * @vinstr_ctx: vinstr context
+ */
+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4 = 0;
+
+#ifndef CONFIG_MALI_NO_MALI
+	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ *
+ * Return: timestamp value
+ */
+static u64 kbasep_vinstr_get_timestamp(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_add_dump_request - register client's dumping request
+ * @cli:             requesting client
+ * @waiting_clients: list of pending dumping requests
+ */
+static void kbasep_vinstr_add_dump_request(
+		struct kbase_vinstr_client *cli,
+		struct list_head *waiting_clients)
+{
+	struct kbase_vinstr_client *tmp;
+
+	if (list_empty(waiting_clients)) {
+		list_add(&cli->list, waiting_clients);
+		return;
+	}
+	list_for_each_entry(tmp, waiting_clients, list) {
+		if (tmp->dump_time > cli->dump_time) {
+			list_add_tail(&cli->list, &tmp->list);
+			return;
+		}
+	}
+	list_add_tail(&cli->list, waiting_clients);
+}
+
+/**
+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
+ *                                        dump and accumulate them for known
+ *                                        clients
+ * @vinstr_ctx: vinstr context
+ * @timestamp: pointer where collection timestamp will be recorded
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_collect_and_accumulate(
+		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+{
+	unsigned long flags;
+	int rcode;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		return -EAGAIN;
+	} else {
+		vinstr_ctx->state = VINSTR_DUMPING;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Request HW counters dump.
+	 * Disable preemption to make dump timestamp more accurate. */
+	preempt_disable();
+	*timestamp = kbasep_vinstr_get_timestamp();
+	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
+	preempt_enable();
+
+	if (!rcode)
+		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
+	WARN_ON(rcode);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state)
+	{
+	case VINSTR_SUSPENDING:
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_IDLE;
+		wake_up_all(&vinstr_ctx->suspend_waitq);
+		break;
+	default:
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Accumulate values of collected counters. */
+	if (!rcode)
+		accum_clients(vinstr_ctx);
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
+ *                                  buffer
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_fill_dump_buffer(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	unsigned int write_idx = atomic_read(&cli->write_idx);
+	unsigned int read_idx  = atomic_read(&cli->read_idx);
+
+	struct kbase_hwcnt_reader_metadata *meta;
+	void                               *buffer;
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == cli->buffer_count)
+		return -1;
+	write_idx %= cli->buffer_count;
+
+	/* Fill in dump buffer and its metadata. */
+	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
+	meta   = &cli->dump_buffers_meta[write_idx];
+	meta->timestamp  = timestamp;
+	meta->event_id   = event_id;
+	meta->buffer_idx = write_idx;
+	memcpy(buffer, cli->accum_buffer, cli->dump_size);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
+ *                                         allocated in userspace
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of legacy ioctl interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_legacy(
+		struct kbase_vinstr_client *cli)
+{
+	void __user  *buffer = cli->legacy_buffer;
+	int          rcode;
+
+	/* Copy data to user buffer. */
+	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
+	if (rcode)
+		pr_warn("error while copying buffer to user\n");
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+		struct kbase_vinstr_client *cli)
+{
+	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_reprogram(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	bool suspended = false;
+
+	/* Don't enable hardware counters if vinstr is suspended. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state)
+		suspended = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (suspended)
+		return;
+
+	/* Change to suspended state is done while holding vinstr context
+	 * lock. Below code will then no re-enable the instrumentation. */
+
+	if (vinstr_ctx->reprogram) {
+		struct kbase_vinstr_client *iter;
+
+		if (!reprogram_hwcnt(vinstr_ctx)) {
+			vinstr_ctx->reprogram = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->idle_clients,
+					list)
+				iter->pending = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->waiting_clients,
+					list)
+				iter->pending = false;
+		}
+	}
+}
+
+/**
+ * kbasep_vinstr_update_client - copy accumulated counters to user readable
+ *                               buffer and notify the user
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_update_client(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	int rcode = 0;
+
+	/* Copy collected counters to user readable buffer. */
+	if (cli->buffer_count)
+		rcode = kbasep_vinstr_fill_dump_buffer(
+				cli, timestamp, event_id);
+	else if (cli->kernel_buffer)
+		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
+	else
+		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
+
+	if (rcode)
+		goto exit;
+
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&cli->write_idx);
+	wake_up_interruptible(&cli->waitq);
+
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+exit:
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ *
+ * @hrtimer: high resolution timer
+ *
+ * Return: High resolution timer restart enum.
+ */
+static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
+		struct hrtimer *hrtimer)
+{
+	struct kbasep_vinstr_wake_up_timer *timer =
+		container_of(
+			hrtimer,
+			struct kbasep_vinstr_wake_up_timer,
+			hrtimer);
+
+	KBASE_DEBUG_ASSERT(timer);
+
+	atomic_set(&timer->vinstr_ctx->request_pending, 1);
+	wake_up_all(&timer->vinstr_ctx->waitq);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_service_task - HWC dumping service thread
+ *
+ * @data: Pointer to vinstr context structure.
+ *
+ * Return: Always returns zero.
+ */
+static int kbasep_vinstr_service_task(void *data)
+{
+	struct kbase_vinstr_context        *vinstr_ctx = data;
+	struct kbasep_vinstr_wake_up_timer timer;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	hrtimer_init(&timer.hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer.hrtimer.function = kbasep_vinstr_wake_up_callback;
+	timer.vinstr_ctx       = vinstr_ctx;
+
+	while (!kthread_should_stop()) {
+		struct kbase_vinstr_client *cli = NULL;
+		struct kbase_vinstr_client *tmp;
+		int                        rcode;
+
+		u64              timestamp = kbasep_vinstr_get_timestamp();
+		u64              dump_time = 0;
+		struct list_head expired_requests;
+
+		/* Hold lock while performing operations on lists of clients. */
+		mutex_lock(&vinstr_ctx->lock);
+
+		/* Closing thread must not interact with client requests. */
+		if (current == vinstr_ctx->thread) {
+			atomic_set(&vinstr_ctx->request_pending, 0);
+
+			if (!list_empty(&vinstr_ctx->waiting_clients)) {
+				cli = list_first_entry(
+						&vinstr_ctx->waiting_clients,
+						struct kbase_vinstr_client,
+						list);
+				dump_time = cli->dump_time;
+			}
+		}
+
+		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
+			mutex_unlock(&vinstr_ctx->lock);
+
+			/* Sleep until next dumping event or service request. */
+			if (cli) {
+				u64 diff = dump_time - timestamp;
+
+				hrtimer_start(
+						&timer.hrtimer,
+						ns_to_ktime(diff),
+						HRTIMER_MODE_REL);
+			}
+			wait_event(
+					vinstr_ctx->waitq,
+					atomic_read(
+						&vinstr_ctx->request_pending) ||
+					kthread_should_stop());
+			hrtimer_cancel(&timer.hrtimer);
+			continue;
+		}
+
+		rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
+				&timestamp);
+
+		INIT_LIST_HEAD(&expired_requests);
+
+		/* Find all expired requests. */
+		list_for_each_entry_safe(
+				cli,
+				tmp,
+				&vinstr_ctx->waiting_clients,
+				list) {
+			s64 tdiff =
+				(s64)(timestamp + DUMPING_RESOLUTION) -
+				(s64)cli->dump_time;
+			if (tdiff >= 0ll) {
+				list_del(&cli->list);
+				list_add(&cli->list, &expired_requests);
+			} else {
+				break;
+			}
+		}
+
+		/* Fill data for each request found. */
+		list_for_each_entry_safe(cli, tmp, &expired_requests, list) {
+			/* Ensure that legacy buffer will not be used from
+			 * this kthread context. */
+			BUG_ON(0 == cli->buffer_count);
+			/* Expect only periodically sampled clients. */
+			BUG_ON(0 == cli->dump_interval);
+
+			if (!rcode)
+				kbasep_vinstr_update_client(
+						cli,
+						timestamp,
+						BASE_HWCNT_READER_EVENT_PERIODIC);
+
+			/* Set new dumping time. Drop missed probing times. */
+			do {
+				cli->dump_time += cli->dump_interval;
+			} while (cli->dump_time < timestamp);
+
+			list_del(&cli->list);
+			kbasep_vinstr_add_dump_request(
+					cli,
+					&vinstr_ctx->waiting_clients);
+		}
+
+		/* Reprogram counters set if required. */
+		kbasep_vinstr_reprogram(vinstr_ctx);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	}
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
+ * @cli: pointer to vinstr client structure
+ *
+ * Return: non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+		struct kbase_vinstr_client *cli)
+{
+	KBASE_DEBUG_ASSERT(cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+
+	/* Metadata sanity check. */
+	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @interval: periodic dumping interval (disable periodic dumping if zero)
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+		struct kbase_vinstr_client *cli, u32 interval)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_del(&cli->list);
+
+	cli->dump_interval = interval;
+
+	/* If interval is non-zero, enable periodic dumping for this client. */
+	if (cli->dump_interval) {
+		if (DUMPING_RESOLUTION > cli->dump_interval)
+			cli->dump_interval = DUMPING_RESOLUTION;
+		cli->dump_time =
+			kbasep_vinstr_get_timestamp() + cli->dump_interval;
+
+		kbasep_vinstr_add_dump_request(
+				cli, &vinstr_ctx->waiting_clients);
+
+		atomic_set(&vinstr_ctx->request_pending, 1);
+		wake_up_all(&vinstr_ctx->waitq);
+	} else {
+		list_add(&cli->list, &vinstr_ctx->idle_clients);
+	}
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
+ * @event_id: id of event
+ * Return: event_mask or zero if event is not supported or maskable
+ */
+static u32 kbasep_vinstr_hwcnt_reader_event_mask(
+		enum base_hwcnt_reader_event event_id)
+{
+	u32 event_mask = 0;
+
+	switch (event_id) {
+	case BASE_HWCNT_READER_EVENT_PREJOB:
+	case BASE_HWCNT_READER_EVENT_POSTJOB:
+		/* These event are maskable. */
+		event_mask = (1 << event_id);
+		break;
+
+	case BASE_HWCNT_READER_EVENT_MANUAL:
+	case BASE_HWCNT_READER_EVENT_PERIODIC:
+		/* These event are non-maskable. */
+	default:
+		/* These event are not supported. */
+		break;
+	}
+
+	return event_mask;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to enable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask |= event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to disable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask &= ~event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
+ * @cli:   pointer to vinstr client structure
+ * @hwver: pointer to user buffer where hw version will be stored
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+		struct kbase_vinstr_client *cli, u32 __user *hwver)
+{
+#ifndef CONFIG_MALI_NO_MALI
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+#endif
+
+	u32                         ver = 5;
+
+#ifndef CONFIG_MALI_NO_MALI
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+		ver = 4;
+#endif
+
+	return put_user(ver, hwver);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
+ * @filp:   pointer to file structure
+ * @cmd:    user command
+ * @arg:    command's argument
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	long                       rcode = 0;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+				cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
+		rcode = put_user(
+				(u32)cli->vinstr_ctx->dump_size,
+				(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbase_vinstr_hwc_dump(
+				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbase_vinstr_hwc_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+				cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
+		poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
+ * @filp: pointer to file structure
+ * @vma:  pointer to vma structure
+ * Return: zero on success
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	unsigned long size, addr, pfn, offset;
+	unsigned long vm_size = vma->vm_end - vma->vm_start;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(vma);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	size = cli->buffer_count * cli->dump_size;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if (vm_size > size - offset)
+		return -EINVAL;
+
+	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	pfn = addr >> PAGE_SHIFT;
+
+	return remap_pfn_range(
+			vma,
+			vma->vm_start,
+			pfn,
+			vm_size,
+			vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ * Return always return zero
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+		struct file *filp)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	kbase_vinstr_detach_client(cli);
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
+ * @kbdev: pointer to kbase device structure
+ */
+static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	down(&js_devdata->schedule_sem);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	up(&js_devdata->schedule_sem);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker suspending vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_suspend_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			suspend_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		disable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_SUSPENDED;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * This must happen after vinstr was suspended. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker resuming vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_resume_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			resume_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		enable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_IDLE;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * Note that scheduler state machine might requested re-entry to
+	 * protected mode before vinstr was resumed.
+	 * This must happen after vinstr was release. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
+	if (!vinstr_ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	mutex_init(&vinstr_ctx->lock);
+	spin_lock_init(&vinstr_ctx->state_lock);
+	vinstr_ctx->kbdev = kbdev;
+	vinstr_ctx->thread = NULL;
+	vinstr_ctx->state = VINSTR_IDLE;
+	vinstr_ctx->suspend_cnt = 0;
+	INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
+	INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
+	init_waitqueue_head(&vinstr_ctx->suspend_waitq);
+
+	atomic_set(&vinstr_ctx->request_pending, 0);
+	init_waitqueue_head(&vinstr_ctx->waitq);
+
+	return vinstr_ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	/* Stop service thread first. */
+	if (vinstr_ctx->thread)
+		kthread_stop(vinstr_ctx->thread);
+
+	/* Wait for workers. */
+	flush_work(&vinstr_ctx->suspend_work);
+	flush_work(&vinstr_ctx->resume_work);
+
+	while (1) {
+		struct list_head *list = &vinstr_ctx->idle_clients;
+
+		if (list_empty(list)) {
+			list = &vinstr_ctx->waiting_clients;
+			if (list_empty(list))
+				break;
+		}
+
+		cli = list_first_entry(list, struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		kfree(cli->accum_buffer);
+		kfree(cli);
+		vinstr_ctx->nclients--;
+	}
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	if (vinstr_ctx->kctx)
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	kfree(vinstr_ctx);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup)
+{
+	struct kbase_vinstr_client  *cli;
+	u32                         bitmap[4];
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(setup->buffer_count);
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	cli = kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			setup->buffer_count,
+			bitmap,
+			&setup->fd,
+			NULL);
+
+	if (!cli)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (setup->dump_buffer) {
+		u32 bitmap[4];
+
+		bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+		if (*cli)
+			return -EBUSY;
+
+		*cli = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				(void *)(long)setup->dump_buffer,
+				NULL);
+
+		if (!(*cli))
+			return -ENOMEM;
+	} else {
+		if (!*cli)
+			return -EINVAL;
+
+		kbase_vinstr_detach_client(*cli);
+		*cli = NULL;
+	}
+
+	return 0;
+}
+
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer)
+{
+	u32 bitmap[4];
+
+	if (!vinstr_ctx || !setup || !kernel_buffer)
+		return NULL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	return kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			0,
+			bitmap,
+			NULL,
+			kernel_buffer);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	int                         rcode = 0;
+	struct kbase_vinstr_context *vinstr_ctx;
+	u64                         timestamp;
+	u32                         event_mask;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
+	event_mask = 1 << event_id;
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (event_mask & cli->event_mask) {
+		rcode = kbasep_vinstr_collect_and_accumulate(
+				vinstr_ctx,
+				&timestamp);
+		if (rcode)
+			goto exit;
+
+		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
+		if (rcode)
+			goto exit;
+
+		kbasep_vinstr_reprogram(vinstr_ctx);
+	}
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
+
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	int                         rcode;
+	u64                         unused;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	if (rcode)
+		goto exit;
+	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
+	if (rcode)
+		goto exit;
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	kbasep_vinstr_reprogram(vinstr_ctx);
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		vinstr_ctx->suspend_cnt++;
+		/* overflow shall not happen */
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		ret = 0;
+		break;
+
+	case VINSTR_IDLE:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_SUSPENDING:
+		/* fall through */
+	case VINSTR_RESUMING:
+		break;
+
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
+	if (VINSTR_SUSPENDED == vinstr_ctx->state) {
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		vinstr_ctx->suspend_cnt--;
+		if (0 == vinstr_ctx->suspend_cnt) {
+			vinstr_ctx->state = VINSTR_RESUMING;
+			schedule_work(&vinstr_ctx->resume_work);
+		}
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100644
index 0000000..6207d25
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwcnt_reader.h>
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+/*****************************************************************************/
+
+/**
+ * kbase_vinstr_init() - initialize the vinstr core
+ * @kbdev: kbase device
+ *
+ * Return: pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - terminate the vinstr core
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
+ * @vinstr_ctx: vinstr context
+ * @setup:      reader's configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+		struct kbase_vinstr_context        *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup);
+
+/**
+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
+ * @vinstr_ctx: vinstr context
+ * @cli:        pointer where to store pointer to new vinstr client structure
+ * @setup:      hwc configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count and setup->fd are not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer);
+
+/**
+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
+ * @cli:      pointer to vinstr client
+ * @event_id: id of event that triggered hwcnt dump
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_dump(
+		struct kbase_vinstr_client   *cli,
+		enum base_hwcnt_reader_event event_id);
+
+/**
+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
+ *                          a given kbase context
+ * @cli: pointer to vinstr client
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Return: 0 on success, or negative if state change is in progress
+ *
+ * Warning: This API call is non-generic. It is meant to be used only by
+ *          job scheduler state machine.
+ *
+ * Function initiates vinstr switch to suspended state. Once it was called
+ * vinstr enters suspending state. If function return non-zero value, it
+ * indicates that state switch is not complete and function must be called
+ * again. On state switch vinstr will trigger job scheduler state machine
+ * cycle.
+ */
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_suspend - suspends operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function initiates vinstr switch to suspended state. Then it blocks until
+ * operation is completed.
+ */
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_resume - resumes operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function can be called only if it was preceded by a successful call
+ * to kbase_vinstr_suspend.
+ */
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
+#endif /* _KBASE_VINSTR_H_ */
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100644
index 0000000..5d6b402
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100644
index 0000000..2be06a5
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,189 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100644
index 0000000..9945293
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644
index 0000000..a509cbd
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100644
index 0000000..2d8231d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,612 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
+						size */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+#define LATEST_FLUSH            0x038	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+#define GPU_DBGEN               (1 << 8)	/* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PRWOFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PRWTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+
+#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+
+/* Symbol for default MEMATTR to use */
+
+/* Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+/* JM_CONFIG register */
+
+#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
+#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
+#define JM_IDVS_GROUP_SIZE_SHIFT (16)
+#define JM_MAX_IDVS_GROUP_SIZE (0x3F)
+#define JM_CONFIG_UNUSED (1ul << 31)
+/* End JM_CONFIG register */
+
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100644
index 0000000..bd5f661
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->atom_id,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->count)
+);
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_uk.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100644
index 0000000..841d03f
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
new file mode 100644
index 0000000..558657b
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/Kbuild
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	obj-y += $(platform_name)/
+endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100644
index 0000000..8fb4e91
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME
+#
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100644
index 0000000..de8849e
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+
+ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-y += platform/devicetree/mali_clock.c
+obj-y += platform/devicetree/mpgpu.c
+obj-y += platform/devicetree/meson_main2.c
+obj-y += platform/devicetree/platform_gx.c
+obj-y += platform/devicetree/scaling.c
+obj-y += mali_kbase_runtime_pm.c
+obj-y += mali_kbase_config_devicetree.c
+else ifeq ($(CONFIG_MALI_MIDGARD),m)
+SRC += platform/devicetree/mali_clock.c
+SRC += platform/devicetree/mpgpu.c
+SRC += platform/devicetree/meson_main2.c
+SRC += platform/devicetree/platform_gx.c
+SRC += platform/devicetree/scaling.c
+SRC += platform/devicetree/mali_kbase_runtime_pm.c
+SRC += platform/devicetree/mali_kbase_config_devicetree.c
+endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
new file mode 100644
index 0000000..35249a5
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
@@ -0,0 +1,653 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)
+
+//disable print
+#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+int mali_pm_statue = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "ao io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	mpdata->dvfs_table_size = 0;
+
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+	dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size);
+	dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n",
+			mpdata->dvfs_table,
+			sizeof(mpdata->dvfs_table[0]));
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+#else
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	//mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+
+	GPU_CLK_DBG();
+	do_gettimeofday(&start);
+	ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+	do_gettimeofday(&end);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	GPU_CLK_DBG();
+	clk_disable_unprepare(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+#if 0
+#ifdef MESON_CPU_VERSION_OPS
+		if (is_meson_gxbbm_cpu()) {
+			if (dvfs_tbl->clk_freq >= GXBBM_MAX_GPU_FREQ)
+				continue;
+		}
+#endif
+#endif
+#if  0
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+#endif
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux");
+#if 0
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+#endif
+	if (IS_ERR(mpdata->clk_mali)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100644
index 0000000..1267b6f
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,109 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/version.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long t83x_static_power(unsigned long voltage)
+{
+#if 0
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+#else
+	return 0;
+#endif
+}
+
+static unsigned long t83x_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+struct devfreq_cooling_ops t83x_model_ops = {
+#else
+struct devfreq_cooling_power t83x_model_ops = {
+#endif
+	.get_static_power = t83x_static_power,
+	.get_dynamic_power = t83x_dynamic_power,
+};
+
+#endif
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100644
index 0000000..13af899
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (750000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (100000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+extern struct kbase_platform_funcs_conf dt_funcs_conf;
+#define PLATFORM_FUNCS (&dt_funcs_conf)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&t83x_model_ops)
+extern struct devfreq_cooling_ops t83x_model_ops;
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100644
index 0000000..3b3f095
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,244 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+void *reg_base_hiubus = NULL;
+u32  override_value_aml = 0;
+static int first = 1;
+
+static void  Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+
+    if ((mask & 0x1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000190, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000194, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a0, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a4, 0xffffffff);    // Power on all cores
+    }
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000180, mask >> 1);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000184, 0x0);    // Power on all cores
+    }
+
+    if ( mask != 0 ) {
+        udelay(10);
+        part1_done = Mali_RdReg(0x0, 0x0, 0x0000020);
+        //printk("%s, %d\n", __func__, __LINE__);
+        while((part1_done ==0)) { part1_done = Mali_RdReg(0x0, 0x0, 0x00000020); udelay(10); }
+        //printk("Mali_pwr_on:gpu_irq : %x\n", part1_done);
+        Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+    }
+}
+
+static void gpu_power_main( struct kbase_device *kbdev) {
+
+    Mali_pwr_on ( 0x7);
+
+    printk("%s, %d\n", __func__, __LINE__);
+}
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret;
+	struct platform_device *pdev = to_platform_device(kbdev->dev);
+	struct resource *reg_res;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+    //printk("20151013, %s, %d\n", __FILE__, __LINE__);
+    if (first == 0) goto ret;
+
+    reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+    if (!reg_res) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) 
+        reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res));
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+
+//JOHNT
+    // Level reset mail
+
+    // Level reset mail
+    //Wr(P_RESET2_MASK, ~(0x1<<14));
+    //Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    //Wr(P_RESET2_LEVEL, 0xffffffff);
+    //Wr(P_RESET0_LEVEL, 0xffffffff);
+
+    value = Rd(RESET0_MASK);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_MASK, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+///////////////
+    value = Rd(RESET2_MASK);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_MASK, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value | ((0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value | ((0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    udelay(10); // OR POLL for reset done
+
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+    gpu_power_main(kbdev);
+    //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n",
+    //        kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus);
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+    first = 0;
+    //printk("%s, %d\n", __FILE__, __LINE__);
+ret:
+	ret = pm_runtime_get_sync(kbdev->dev);
+    udelay(100);
+#if 1
+
+    core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+    l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+    tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+    //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready);
+#endif
+	dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret);
+
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+    //printk("%s, %d\n", __FILE__, __LINE__);
+#if 0
+    iounmap(reg_base_hiubus);
+    reg_base_hiubus = NULL;
+#endif
+	u64 core_ready  = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	u64 l2_ready    = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+	u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+	if (core_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+	if (l2_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+	if (tiler_ready)
+		kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+
+	return 0;
+}
+
+void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
+
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
new file mode 100644
index 0000000..b541090
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
new file mode 100644
index 0000000..7687f92
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_defs.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+extern void mali_post_init(void);
+struct kbase_device;
+//static int gpu_dvfs_probe(struct platform_device *pdev)
+int platform_dt_init_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	int err = -1;
+
+	err = mali_meson_init_start(pdev);
+    mali_meson_init_finish(pdev);
+    mpgpu_class_init();
+    mali_post_init();
+    return err;
+}
+
+//static int gpu_dvfs_remove(struct platform_device *pdev)
+void platform_dt_term_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+    mpgpu_class_exit();
+	mali_meson_uninit(pdev);
+
+}
+
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    mali_gpu_utilization_callback(utilisation*255/100);
+    return 1;
+}
+
+struct kbase_platform_funcs_conf dt_funcs_conf = {
+    .platform_init_func = platform_dt_init_func,
+    .platform_term_func = platform_dt_term_func,
+};
+#if 0
+static const struct of_device_id gpu_dvfs_ids[] = {
+	{ .compatible = "meson, gpu-dvfs-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpu_dvfs_ids);
+
+static struct platform_driver gpu_dvfs_driver = {
+	.driver = {
+		.name = "meson-gpu-dvfs",
+		.owner = THIS_MODULE,
+		.of_match_table = gpu_dvfs_ids,
+	},
+	.probe = gpu_dvfs_probe,
+	.remove = gpu_dvfs_remove,
+};
+module_platform_driver(gpu_dvfs_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Amlogic SH, MM");
+MODULE_DESCRIPTION("Driver for the Meson GPU dvfs");
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
new file mode 100644
index 0000000..ca79752
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
@@ -0,0 +1,33 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mpgpu_class_init(void);
+void mpgpu_class_exit(void);
+void mali_gpu_utilization_callback(int utilization_pp);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
new file mode 100644
index 0000000..0cc5035
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
@@ -0,0 +1,359 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+#include "meson_main2.h"
+
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+#if 0
+static ssize_t max_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxpp:%d, maxpp_sysfs:%d, total=%d\n",
+			pmali_plat->scale_info.maxpp, pmali_plat->maxpp_sysfs,
+			pmali_plat->cfg_pp);
+	return sprintf(buf, "%d\n", pmali_plat->cfg_pp);
+}
+
+static ssize_t max_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_pp) || (val < pinfo->minpp))
+		return -EINVAL;
+
+	pmali_plat->maxpp_sysfs = val;
+	pinfo->maxpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minpp);
+}
+
+static ssize_t min_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxpp) || (val < 1))
+		return -EINVAL;
+
+	pinfo->minpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+#endif
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+#if 0
+static ssize_t current_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+	return sprintf(buf, "%d\n", pp);
+}
+
+static ssize_t current_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+
+	get_mali_rt_clkpp(&clk, &pp);
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(clk, val, 1);
+
+	return count;
+}
+#endif
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+#if 0
+	__ATTR(min_pp,		0644, min_pp_read,	min_pp_write),
+	__ATTR(max_pp,		0644, max_pp_read,	max_pp_write),
+#endif
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+#if 0
+	__ATTR(cur_pp,		0644, current_pp_read,	current_pp_write),
+#endif
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+
+int mpgpu_class_init(void)
+{
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+}
+
+void  mpgpu_class_exit(void)
+{
+	class_unregister(&mpgpu_class);
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
new file mode 100644
index 0000000..26a56f6
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
@@ -0,0 +1,245 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#ifdef CONFIG_GPU_THERMAL
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#include <linux/amlogic/aml_thermal_hw.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq <= mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq <= mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+#ifndef MESON_DRV_BRING
+    return 55;
+#else
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+#endif
+}
+#endif
+#endif
+
+#ifdef CONFIG_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+#ifndef MESON_DRV_BRING
+    return 2;
+#else
+    return mali_executor_get_num_cores_enabled();
+#endif
+}
+#endif
+#endif
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
new file mode 100644
index 0000000..aa8a77a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+
+#if AMLOGIC_GPU_USE_GPPLL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)
+#include <linux/amlogic/amports/gp_pll.h>
+#else
+#include <linux/amlogic/media/clk/gp_pll.h>
+#endif
+#endif
+
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+extern int  mali_pm_statue;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+#if AMLOGIC_GPU_USE_GPPLL
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+#endif
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+#endif
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+#if AMLOGIC_GPU_USE_GPPLL
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+#endif
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+	lastStep = execStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+#endif
+
+}
+#if AMLOGIC_GPU_USE_GPPLL
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+#endif
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+    cores = cores;
+    return 0;
+}
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    if (err < 0) scalingdbg(1, "set pp failed");
+
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+    printk("mali_plat=%p\n", mali_plat);
+	num_cores_enabled = pmali_plat->sc_mpp;
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+#endif
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	40, /* 40% */
+	50, /* 50% */
+	90, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< utilization_pp)
+		ret = enable_one_core();
+	else if (0 < utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(int utilization_gpu, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(int utilization_pp, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization_pp,  ld_up);
+	if (utilization_pp >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization_pp <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(utilization_pp);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(utilization_pp);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
new file mode 100644
index 0000000..0449442
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
@@ -0,0 +1,22 @@
+#
+# (C) COPYRIGHT 2013-2014, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_juno_soc.o
+
+ifeq (($CONFIG_MALI_MIDGARD),y)
+obj-m += juno_mali_opp.o
+else ifeq ($(CONFIG_MALI_MIDGARD),m)
+SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
new file mode 100644
index 0000000..ccfd8cc
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/scpi_protocol.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp_add opp_add
+#endif /* Linux >= 3.13 */
+
+
+static int init_juno_opps_from_scpi(struct device *dev)
+{
+	struct scpi_opp *sopp;
+	int i;
+
+	/* Hard coded for Juno. 2 is GPU domain */
+	sopp = scpi_dvfs_get_opps(2);
+	if (IS_ERR_OR_NULL(sopp))
+		return PTR_ERR(sopp);
+
+	for (i = 0; i < sopp->count; i++) {
+		struct scpi_opp_entry *e = &sopp->opp[i];
+
+		dev_info(dev, "Mali OPP from SCPI: %u Hz @ %u mV\n",
+				e->freq_hz, e->volt_mv);
+
+		dev_pm_opp_add(dev, e->freq_hz, e->volt_mv * 1000);
+	}
+
+	return 0;
+}
+
+static int juno_setup_opps(void)
+{
+	struct device_node *np;
+	struct platform_device *pdev;
+	int err;
+
+	np = of_find_node_by_name(NULL, "gpu");
+	if (!np) {
+		pr_err("Failed to find DT entry for Mali\n");
+		return -EFAULT;
+	}
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev) {
+		pr_err("Failed to find device for Mali\n");
+		of_node_put(np);
+		return -EFAULT;
+	}
+
+	err = init_juno_opps_from_scpi(&pdev->dev);
+
+	of_node_put(np);
+
+	return err;
+}
+
+module_init(juno_setup_opps);
+MODULE_LICENSE("GPL");
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
new file mode 100644
index 0000000..d237a38
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
@@ -0,0 +1,150 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <linux/thermal.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_smc.h>
+
+/* Versatile Express (VE) Juno Development Platform */
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 65,
+	.mmu_irq_number = 66,
+	.gpu_irq_number = 64,
+	.io_memory_region = {
+			     .start = 0x2D000000,
+			     .end = 0x2D000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+/*
+ * Juno Protected Mode integration
+ */
+
+/* SMC Function Numbers */
+#define JUNO_SMC_PROTECTED_ENTER_FUNC  0xff06
+#define JUNO_SMC_PROTECTED_RESET_FUNC 0xff07
+
+static int juno_protected_mode_enter(struct kbase_device *kbdev)
+{
+	/* T62X in SoC detected */
+	u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+		JUNO_SMC_PROTECTED_ENTER_FUNC, false,
+		0, 0, 0);
+	return ret;
+}
+
+/* TODO: Remove these externs, reset should should be done by the firmware */
+extern void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+extern u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+static int juno_protected_mode_reset(struct kbase_device *kbdev)
+{
+
+	/* T62X in SoC detected */
+	u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+		JUNO_SMC_PROTECTED_RESET_FUNC, false,
+		0, 0, 0);
+
+	/* The GPU should now be reset */
+
+	return ret;
+}
+
+static bool juno_protected_mode_supported(struct kbase_device *kbdev)
+{
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+
+	/*
+	 * Protected mode is only supported for the built in GPU
+	 * _and_ only if the right firmware is running.
+	 *
+	 * Given that at init time the GPU is not powered up the
+	 * juno_protected_mode_reset function can't be used as
+	 * is needs to access GPU registers.
+	 * However, although we don't want the GPU to boot into
+	 * protected mode we know a GPU reset will be done after
+	 * this function is called so although we set the GPU to
+	 * protected mode it will exit protected mode before the
+	 * driver is ready to run work.
+	 */
+	if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) &&
+			(kbdev->reg_start == 0x2d000000))
+		return juno_protected_mode_enter(kbdev) == 0;
+
+	return false;
+}
+
+struct kbase_protected_ops juno_protected_ops = {
+	.protected_mode_enter = juno_protected_mode_enter,
+	.protected_mode_reset = juno_protected_mode_reset,
+	.protected_mode_supported = juno_protected_mode_supported,
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
new file mode 100644
index 0000000..ab29e9d
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 600000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 600000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (&juno_protected_ops)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+#ifdef CONFIG_DEVFREQ_THERMAL
+extern struct devfreq_cooling_ops juno_model_ops;
+#endif
+extern struct kbase_protected_ops juno_protected_ops;
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100644
index 0000000..7cb3be7
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
new file mode 100644
index 0000000..01f9dfc
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+/**
+ * kbase_platform_fake_register - Entry point for fake platform registration
+ *
+ * This function is called early on in the initialization during execution of
+ * kbase_driver_init.
+ *
+ * Return: 0 to indicate success, non-zero for failure.
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Entry point for fake platform unregistration
+ *
+ * This function is called in the termination during execution of
+ * kbase_driver_exit.
+ */
+void kbase_platform_fake_unregister(void);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100644
index 0000000..084a156
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100644
index 0000000..dc4471b
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq()
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq()
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..15ce2bc
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+#include "mali_kbase_config_platform.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100644
index 0000000..4665f98
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START	(0x10000000)
+#define SYS_CFGDATA_OFFSET		(0x000000A0)
+#define SYS_CFGCTRL_OFFSET		(0x000000A4)
+#define SYS_CFGSTAT_OFFSET		(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		(1 << 31)
+#define READ_REG_BIT_VALUE			(0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			(0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		(1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			(1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE	(0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE		(2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		(1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			(1 <<  1)
+
+#define FEED_REG_BIT_MASK			(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+/* the following three values used for reading
+ * HBI value of the LogicTile daughterboard */
+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000)
+#define VE_SYS_PROC_ID1_OFFSET (0x00000088)
+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define VE_DEFAULT_GPU_FREQ_MIN 5000
+#define VE_DEFAULT_GPU_FREQ_MAX 5000
+
+
+#define CPU_CLOCK_SPEED_UNDEFINED (0)
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed
+ * @cpu_clock - the value of CPU clock speed in MHz
+ *
+ * Returns 0 on success, error code otherwise.
+ *
+ * The implementation is platform specific.
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	int err = 0;
+	u32 reg_val = 0;
+	u32 osc2_value = 0;
+	u32 pa_divide = 0;
+	u32 pb_divide = 0;
+	u32 pc_divide = 0;
+	void __iomem *syscfg_reg = NULL;
+	void __iomem *scc_reg = NULL;
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) {
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+
+	/* Init the value in case something goes wrong */
+	*cpu_clock = 0;
+
+	/* Map CPU register into virtual memory */
+	syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+	if (syscfg_reg == NULL) {
+		err = -EIO;
+		goto syscfg_reg_map_failed;
+	}
+
+	scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+	if (scc_reg == NULL) {
+		err = -EIO;
+		goto scc_reg_map_failed;
+	}
+
+	raw_spin_lock(&syscfg_lock);
+
+	/* Read SYS regs - OSC2 */
+	reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET);
+
+	/* Check if there is any other undergoing request */
+	if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) {
+		err = -EBUSY;
+		goto ongoing_request;
+	}
+	/* Reset the CGFGSTAT reg */
+	writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET));
+
+	writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE |
+			DCC_DEFAULT_BIT_VALUE |
+			SYS_CFG_OSC_FUNC_BIT_VALUE |
+			SITE_DEFAULT_BIT_VALUE |
+			BOARD_STACK_POS_DEFAULT_BIT_VALUE |
+			DEVICE_DEFAULT_BIT_VALUE,
+			(syscfg_reg + SYS_CFGCTRL_OFFSET));
+	/* Wait for the transaction to complete */
+	while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) &
+			SYS_CFG_COMPLETE_BIT_VALUE))
+		;
+	/* Read SYS_CFGSTAT Register to get the status of submitted
+	 * transaction */
+	reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET);
+
+	if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+		/* Error while setting register */
+		err = -EIO;
+		goto set_reg_error;
+	}
+
+	osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET);
+	/* Read the SCC CFGRW0 register */
+	reg_val = readl(scc_reg);
+
+	/*
+	 * Select the appropriate feed:
+	 * CFGRW0[0] - CLKOB
+	 * CFGRW0[1] - CLKOC
+	 * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+	 */
+	/* Calculate the  FCLK */
+	if (IS_SINGLE_BIT_SET(reg_val, 0)) {
+		/* CFGRW0[0] - CLKOB */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[10:7] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PB_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 1)) {
+		/* CFGRW0[1] - CLKOC */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[14:11] */
+		pc_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PC_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PC_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {
+		/* CFGRW0[2] - FACLK */
+		/* CFGRW0[18:15] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PA_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[22:19] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PB_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else {
+		err = -EIO;
+	}
+
+set_reg_error:
+ongoing_request:
+	raw_spin_unlock(&syscfg_lock);
+	*cpu_clock /= HZ_IN_MHZ;
+
+	if (!err)
+		cpu_clock_speed = *cpu_clock;
+
+	iounmap(scc_reg);
+
+scc_reg_map_failed:
+	iounmap(syscfg_reg);
+
+syscfg_reg_map_failed:
+
+	return err;
+}
+
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function determines which LogicTile type is used by the platform by
+ * reading the HBI value of the daughterboard which holds the LogicTile:
+ *
+ * 0x217 HBI0217 Virtex-6
+ * 0x192 HBI0192 Virtex-5
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+static u32 kbase_get_platform_logic_tile_type(void)
+{
+	void __iomem *syscfg_reg = NULL;
+	u32 sys_procid1 = 0;
+
+	syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
+	if (NULL != syscfg_reg) {
+		sys_procid1 = readl(syscfg_reg);
+		iounmap(syscfg_reg);
+	}
+
+	return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
+}
+
+u32 kbase_get_platform_min_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MIN;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MIN;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MIN;
+	}
+}
+
+u32 kbase_get_platform_max_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MAX;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MAX;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MAX;
+	}
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100644
index 0000000..da86569
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+/**
+ * Get the minimum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_min_freq(void);
+
+/**
+ * Get the maximum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_max_freq(void);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100644
index 0000000..d9bfabc
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_vexpress.o
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100644
index 0000000..b0490ca
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..3ff0930
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,79 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100644
index 0000000..0cb41ce
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100644
index 0000000..22ffccb
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/**
+ * Protected mode switch
+ *
+ * Attached value: pointer to @ref kbase_protected_ops
+ */
+#define PROTECTED_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..76ffe4a
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100644
index 0000000..816dff4
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	/* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+	*cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+	return 0;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100644
index 0000000..23647cc
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100644
index 0000000..5fa9b39
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/midgard/sconscript b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/sconscript
new file mode 100644
index 0000000..137d532
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/midgard/sconscript
@@ -0,0 +1,86 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import sys
+Import('env')
+
+if Glob('tests/sconscript'):
+	SConscript( 'tests/sconscript' )
+
+mock_test = 0
+
+# Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data.
+# For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0.
+if env['platform_config']=='devicetree':
+	fake_platform_device = 0
+else:
+	fake_platform_device = 1
+
+# Source files required for kbase.
+kbase_src = [
+	Glob('*.c'),
+	Glob('backend/*/*.c'),
+	Glob('internal/*/*.c'),
+	Glob('ipa/*.c'),
+	Glob('platform/%s/*.c' % env['platform_config']),
+]
+
+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
+	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
+	mock_test = 1
+
+# we need platform config for GPL version using fake platform
+if fake_platform_device==1:
+	# Check if we are compiling for PBX
+	if env.KernelConfigEnabled("CONFIG_MACH_REALVIEW_PBX") and \
+	   env["platform_config"] in {"vexpress", "vexpress_6xvirtex7_10mhz"}:
+		sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n")
+	# if the file platform config file is in the tpip directory then use that, otherwise use the default config directory
+	if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])):
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config']))
+	else:
+		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config']))
+
+make_args = env.kernel_get_config_defines(ret_list = True,
+                                          fake = fake_platform_device) + [
+	'PLATFORM=%s' % env['platform'],
+	'MALI_ERROR_INJECT_ON=%s' % env['error_inject'],
+	'MALI_KERNEL_TEST_API=%s' % env['unit'],
+	'MALI_UNIT_TEST=%s' % env['unit'],
+	'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
+	'MALI_MOCK_TEST=%s' % mock_test,
+	'MALI_CUSTOMER_RELEASE=%s' % env['release'],
+	'MALI_INSTRUMENTATION_LEVEL=%s' % env['instr'],
+	'MALI_COVERAGE=%s' % env['coverage'],
+	'MALI_BUS_LOG=%s' % env['buslog'],
+]
+
+kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src,
+                              make_args = make_args)
+
+# Add a dependency on kds.ko.
+# Only necessary when KDS is not built into the kernel.
+#
+if env['os'] != 'android':
+	if not env.KernelConfigEnabled("CONFIG_KDS"):
+		env.Depends(kbase, '$STATIC_LIB_PATH/kds.ko')
+
+# need Module.symvers from ump.ko build
+if int(env['ump']) == 1:
+	env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko')
+
+env.KernelObjTarget('kbase', kbase)
+
+env.AppendUnique(BASE=['cutils_linked_list'])
diff --git a/midgard/r16p0/kernel/drivers/gpu/arm/sconscript b/midgard/r16p0/kernel/drivers/gpu/arm/sconscript
new file mode 100644
index 0000000..924511b
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/arm/sconscript
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+
+
+SConscript('midgard/sconscript')
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/Kbuild b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/Kbuild
new file mode 100644
index 0000000..f10d58c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/Kbuild
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+pl111_drm-y +=	pl111_drm_device.o \
+		pl111_drm_connector.o \
+		pl111_drm_crtc.o \
+		pl111_drm_cursor.o \
+		pl111_drm_dma_buf.o \
+		pl111_drm_encoder.o \
+		pl111_drm_fb.o \
+		pl111_drm_gem.o \
+		pl111_drm_pl111.o \
+		pl111_drm_platform.o \
+		pl111_drm_suspend.o \
+		pl111_drm_vma.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/Kconfig b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/Kconfig
new file mode 100644
index 0000000..60b465c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+config DRM_PL111
+	tristate "DRM Support for PL111 CLCD Controller"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the PL111 CLCD controller.
+	  If M is selected the module will be called pl111_drm.
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/Makefile b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/Makefile
new file mode 100644
index 0000000..2869f58
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: pl111_drm
+
+pl111_drm:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_USES_KDS=y CONFIG_DRM_PL111=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
new file mode 100644
index 0000000..d3e0086
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_clcd_ext.h
@@ -0,0 +1,95 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+/**
+ * pl111_clcd_ext.h
+ * Extended CLCD register definitions
+ */
+
+#ifndef PL111_CLCD_EXT_H_
+#define PL111_CLCD_EXT_H_
+
+/* 
+ * PL111 cursor register definitions not defined in the kernel's clcd header.
+ * 
+ * TODO MIDEGL-1718: move to include/linux/amba/clcd.h
+ */
+
+#define CLCD_CRSR_IMAGE				0x00000800
+#define CLCD_CRSR_IMAGE_MAX_WORDS		256
+#define CLCD_CRSR_IMAGE_WORDS_PER_LINE		4
+#define CLCD_CRSR_IMAGE_PIXELS_PER_WORD	16
+
+#define CLCD_CRSR_LBBP_COLOR_MASK	0x00000003
+#define CLCD_CRSR_LBBP_BACKGROUND	0x0
+#define CLCD_CRSR_LBBP_FOREGROUND	0x1
+#define CLCD_CRSR_LBBP_TRANSPARENT	0x2
+#define CLCD_CRSR_LBBP_INVERSE		0x3
+
+
+#define CLCD_CRSR_CTRL			0x00000c00
+#define CLCD_CRSR_CONFIG		0x00000c04
+#define CLCD_CRSR_PALETTE_0		0x00000c08
+#define CLCD_CRSR_PALETTE_1		0x00000c0c
+#define CLCD_CRSR_XY			0x00000c10
+#define CLCD_CRSR_CLIP			0x00000c14
+#define CLCD_CRSR_IMSC			0x00000c20
+#define CLCD_CRSR_ICR			0x00000c24
+#define CLCD_CRSR_RIS			0x00000c28
+#define CLCD_MIS				0x00000c2c
+
+#define CRSR_CTRL_CRSR_ON		(1 << 0)
+#define CRSR_CTRL_CRSR_MAX		3
+#define CRSR_CTRL_CRSR_NUM_SHIFT	4
+#define CRSR_CTRL_CRSR_NUM_MASK		\
+	(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
+#define CRSR_CTRL_CURSOR_0		0
+#define CRSR_CTRL_CURSOR_1		1
+#define CRSR_CTRL_CURSOR_2		2
+#define CRSR_CTRL_CURSOR_3		3
+
+#define CRSR_CONFIG_CRSR_SIZE		(1 << 0)
+#define CRSR_CONFIG_CRSR_FRAME_SYNC	(1 << 1)
+
+#define CRSR_PALETTE_RED_SHIFT		0
+#define CRSR_PALETTE_GREEN_SHIFT	8
+#define CRSR_PALETTE_BLUE_SHIFT		16
+
+#define CRSR_PALETTE_RED_MASK		0x000000ff
+#define CRSR_PALETTE_GREEN_MASK		0x0000ff00
+#define CRSR_PALETTE_BLUE_MASK		0x00ff0000
+#define CRSR_PALETTE_MASK		(~0xff000000)
+
+#define CRSR_XY_MASK			0x000003ff
+#define CRSR_XY_X_SHIFT			0
+#define CRSR_XY_Y_SHIFT			16
+
+#define CRSR_XY_X_MASK			CRSR_XY_MASK
+#define CRSR_XY_Y_MASK			(CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
+
+#define CRSR_CLIP_MASK			0x3f
+#define CRSR_CLIP_X_SHIFT		0
+#define CRSR_CLIP_Y_SHIFT		8
+
+#define CRSR_CLIP_X_MASK		CRSR_CLIP_MASK
+#define CRSR_CLIP_Y_MASK		(CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
+
+#define CRSR_IMSC_CRSR_IM		(1<<0)
+#define CRSR_ICR_CRSR_IC		(1<<0)
+#define CRSR_RIS_CRSR_RIS		(1<<0)
+#define CRSR_MIS_CRSR_MIS		(1<<0)
+
+#endif /* PL111_CLCD_EXT_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100644
index 0000000..64d87b6
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+#define DRIVER_AUTHOR    "ARM Ltd."
+#define DRIVER_NAME      "pl111_drm"
+#define DRIVER_DESC      "DRM module for PL111"
+#define DRIVER_LICENCE   "GPL"
+#define DRIVER_ALIAS     "platform:pl111_drm"
+#define DRIVER_DATE      "20101111"
+#define DRIVER_VERSION   "0.2"
+#define DRIVER_MAJOR      2
+#define DRIVER_MINOR      1
+#define DRIVER_PATCHLEVEL 1
+
+/*
+ * Number of flips allowed in flight at any one time. Any more flips requested
+ * beyond this value will cause the caller to block until earlier flips have
+ * completed.
+ *
+ * For performance reasons, this must be greater than the number of buffers
+ * used in the rendering pipeline. Note that the rendering pipeline can contain
+ * different types of buffer, e.g.:
+ * - 2 final framebuffers
+ * - >2 geometry buffers for GPU use-cases
+ * - >2 vertex buffers for GPU use-cases
+ *
+ * For example, a system using 5 geometry buffers could have 5 flips in flight,
+ * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
+ *
+ * Whilst there may be more intermediate buffers (such as vertex/geometry) than
+ * final framebuffers, KDS is used to ensure that GPU rendering waits for the
+ * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
+ * produce tearing.
+ */
+
+/*
+ * Here, we choose a conservative value. A lower value is most likely
+ * suitable for GPU use-cases.
+ */
+#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
+
+#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
+
+struct pl111_drm_flip_resource;
+
+struct pl111_gem_bo_dma {
+	dma_addr_t fb_dev_addr;
+	void *fb_cpu_addr;
+};
+
+struct pl111_gem_bo_shm {
+	struct page **pages;
+	dma_addr_t *dma_addrs;
+};
+
+struct pl111_gem_bo {
+	struct drm_gem_object gem_object;
+	u32 type;
+	union {
+		struct pl111_gem_bo_dma dma;
+		struct pl111_gem_bo_shm shm;
+	} backing_data;
+	struct sg_table *sgt;
+};
+
+extern struct pl111_drm_dev_private priv;
+
+struct pl111_drm_framebuffer {
+	struct drm_framebuffer fb;
+	struct pl111_gem_bo *bo;
+};
+
+struct pl111_drm_flip_resource {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This is the kds set associated to the dma_buf we want to flip */
+	struct kds_resource_set *kds_res_set;
+#endif
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct list_head link;
+	bool page_flip;
+	struct drm_pending_vblank_event *event;
+};
+
+struct pl111_drm_crtc {
+	struct drm_crtc crtc;
+	int crtc_index;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This protects "old_kds_res_set" and "displaying_fb" */
+	spinlock_t current_displaying_lock;
+	/*
+	 * When a buffer is displayed its associated kds resource
+	 * will be obtained and stored here. Every time a buffer
+	 * flip is completed this old kds set is released and assigned
+	 * the kds set of the new buffer.
+	 */
+	struct kds_resource_set *old_kds_res_set;
+	/*
+	 * Stores which frame buffer is currently being displayed by
+	 * this CRTC or NULL if nothing is being displayed. It is used
+	 * to tell whether we need to obtain a set of kds resources for
+	 * exported buffer objects.
+	 */
+	struct drm_framebuffer *displaying_fb;
+#endif
+	struct drm_display_mode *new_mode;
+	struct drm_display_mode *current_mode;
+	int last_bpp;
+
+	/*
+	 * This spinlock protects "update_queue", "current_update_res"
+	 * and calls to do_flip_to_res() which updates the CLCD base
+	 * registers.
+	 */
+	spinlock_t base_update_lock;
+	/*
+	 * The resource that caused a base address update. Only one can be
+	 * pending, hence it's != NULL if there's a pending update
+	 */
+	struct pl111_drm_flip_resource *current_update_res;
+	/* Queue of things waiting to update the base address */
+	struct list_head update_queue;
+
+	void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
+				struct drm_framebuffer *fb);
+};
+
+struct pl111_drm_connector {
+	struct drm_connector connector;
+};
+
+struct pl111_drm_encoder {
+	struct drm_encoder encoder;
+};
+
+struct pl111_drm_dev_private {
+	struct pl111_drm_crtc *pl111_crtc;
+
+	struct amba_device *amba_dev;
+	unsigned long mmio_start;
+	__u32 mmio_len;
+	void *regs;
+	struct clk *clk;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_callback kds_cb;
+	struct kds_callback kds_obtain_current_cb;
+#endif
+	/*
+	 * Number of flips that were started in show_framebuffer_on_crtc(),
+	 * but haven't completed yet - because we do deferred flipping
+	 */
+	atomic_t nr_flips_in_flight;
+	wait_queue_head_t wait_for_flips;
+
+	/*
+	 * Used to prevent race between pl111_dma_buf_release and
+	 * drm_gem_prime_handle_to_fd
+	 */
+	struct mutex export_dma_buf_lock;
+
+	uint32_t number_crtcs;
+
+	/* Cache for flip resources used to avoid kmalloc on each page flip */
+	struct kmem_cache *page_flip_slab;
+};
+
+enum pl111_cursor_size {
+	CURSOR_32X32,
+	CURSOR_64X64
+};
+
+enum pl111_cursor_sync {
+	CURSOR_SYNC_NONE,
+	CURSOR_SYNC_VSYNC
+};
+
+
+/**
+ * Buffer allocation function which is more flexible than dumb_create(),
+ * it allows passing driver specific flags to control the kind of buffer
+ * to be allocated.
+ */
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+ 
+/****** TODO MIDEGL-1718: this should be moved to uapi/include/drm/pl111_drm.h ********/
+
+/* 
+ * Parameters for different buffer objects:
+ * bit [0]: backing storage
+ *	(0 -> SHM)
+ *	(1 -> DMA)
+ * bit [2:1]: kind of mapping
+ *	(0x0 -> uncached)
+ *	(0x1 -> write combine)
+ *	(0x2 -> cached)
+ */
+#define PL111_BOT_MASK		(0x7)
+#define PL111_BOT_SHM		(0x0 << 0)
+#define PL111_BOT_DMA		(0x1 << 0)
+#define PL111_BOT_UNCACHED	(0x0 << 1)
+#define PL111_BOT_WC		(0x1 << 1)
+#define PL111_BOT_CACHED	(0x2 << 1)
+
+/**
+ * User-desired buffer creation information structure.
+ *
+ * @size: user-desired memory allocation size.
+ *      - this size value would be page-aligned internally.
+ * @flags: user request for setting memory type or cache attributes as a bit op
+ *	- PL111_BOT_DMA / PL111_BOT_SHM
+ *	- PL111_BOT_UNCACHED / PL111_BOT_WC / PL111_BOT_CACHED
+ * @handle: returned a handle to created gem object.
+ *      - this handle will be set by gem module of kernel side.
+ */
+struct drm_pl111_gem_create {
+	uint32_t height;
+	uint32_t width;
+	uint32_t bpp;
+	uint32_t flags;
+	/* handle, pitch, size will be returned */
+	uint32_t handle;
+	uint32_t pitch;
+	uint64_t size;
+};
+
+#define DRM_PL111_GEM_CREATE		0x00
+
+#define DRM_IOCTL_PL111_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + \
+			DRM_PL111_GEM_CREATE, struct drm_pl111_gem_create)
+/****************************************************************************/
+
+#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb))
+
+#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
+
+#define PL111_BO_FROM_GEM(gem_obj) \
+	container_of(gem_obj, struct pl111_gem_bo, gem_object)
+
+#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
+
+#define PL111_ENCODER_FROM_ENCODER(x) \
+	container_of(x, struct pl111_drm_encoder, encoder)
+
+#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
+	container_of(x, struct pl111_drm_connector, connector)
+
+#include "pl111_drm_funcs.h"
+
+#endif /* _PL111_DRM_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
new file mode 100644
index 0000000..c7c3a22
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_connector.c
@@ -0,0 +1,170 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+
+static struct {
+	int w, h, type;
+} pl111_drm_modes[] = {
+	{ 640, 480,  DRM_MODE_TYPE_PREFERRED},
+	{ 800, 600,  0},
+	{1024, 768,  0},
+	{  -1,  -1, -1}
+};
+
+void pl111_connector_destroy(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+				PL111_CONNECTOR_FROM_CONNECTOR(connector);
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(pl111_connector);
+}
+
+enum drm_connector_status pl111_connector_detect(struct drm_connector
+							*connector, bool force)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return connector_status_connected;
+}
+
+void pl111_connector_dpms(struct drm_connector *connector, int mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+}
+
+struct drm_encoder *
+pl111_connector_helper_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	if (connector->encoder != NULL) {
+		return connector->encoder; /* Return attached encoder */
+	} else {
+		/*
+		 * If there is no attached encoder we choose the best candidate
+		 * from the list.
+		 * For PL111 there is only one encoder so we return the first
+		 * one we find.
+		 * Other h/w would require a suitable criterion below.
+		 */
+		struct drm_encoder *encoder = NULL;
+		struct drm_device *dev = connector->dev;
+
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
+					head) {
+			if (1) { /* criterion ? */
+				break;
+			}
+		}
+		return encoder; /* return best candidate encoder */
+	}
+}
+
+int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+	int i = 0;
+	int count = 0;
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	while (pl111_drm_modes[i].w != -1) {
+		struct drm_display_mode *mode =
+				drm_mode_find_dmt(connector->dev,
+						pl111_drm_modes[i].w,
+						pl111_drm_modes[i].h,
+						60
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+						, false
+#endif
+						);
+
+		if (mode != NULL) {
+			mode->type |= pl111_drm_modes[i].type;
+			drm_mode_probed_add(connector, mode);
+			count++;
+		}
+
+		i++;
+	}
+
+	DRM_DEBUG_KMS("found %d modes\n", count);
+
+	return count;
+}
+
+int pl111_connector_helper_mode_valid(struct drm_connector *connector,
+					struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return MODE_OK;
+}
+
+const struct drm_connector_funcs connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = pl111_connector_destroy,
+	.detect = pl111_connector_detect,
+	.dpms = pl111_connector_dpms,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = pl111_connector_helper_get_modes,
+	.mode_valid = pl111_connector_helper_mode_valid,
+	.best_encoder = pl111_connector_helper_best_encoder,
+};
+
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
+{
+	struct pl111_drm_connector *pl111_connector;
+
+	pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
+					GFP_KERNEL);
+
+	if (pl111_connector == NULL) {
+		pr_err("Failed to allocated pl111_drm_connector\n");
+		return NULL;
+	}
+
+	drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
+				DRM_MODE_CONNECTOR_DVII);
+
+	drm_connector_helper_add(&pl111_connector->connector,
+					&connector_helper_funcs);
+
+	drm_sysfs_connector_add(&pl111_connector->connector);
+
+	return pl111_connector;
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
new file mode 100644
index 0000000..ede07ff
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_crtc.c
@@ -0,0 +1,449 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_crtc.c
+ * Implementation of the CRTC functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static int pl111_crtc_num;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
+{
+	struct drm_device *dev = pl111_crtc->crtc.dev;
+	struct pl111_drm_flip_resource *old_flip_res;
+	struct pl111_gem_bo *bo;
+	unsigned long irq_flags;
+	int flips_in_flight;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+
+	/*
+	 * Cache the flip resource that caused the IRQ since it will be
+	 * dispatched later. Early return if the IRQ isn't associated to
+	 * a base register update.
+	 * 
+	 * TODO MIDBASE-2790: disable IRQs when a flip is not pending.
+	 */
+	old_flip_res = pl111_crtc->current_update_res;
+	if (!old_flip_res) {
+		spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+		return;
+	}
+	pl111_crtc->current_update_res = NULL;
+
+	/* Prepare the next flip (if any) of the queue as soon as possible. */
+	if (!list_empty(&pl111_crtc->update_queue)) {
+		struct pl111_drm_flip_resource *flip_res;
+		/* Remove the head of the list */
+		flip_res = list_first_entry(&pl111_crtc->update_queue,
+			struct pl111_drm_flip_resource, link);
+		list_del(&flip_res->link);
+		do_flip_to_res(flip_res);
+		/*
+		 * current_update_res will be set, so guarentees that
+		 * another flip_res coming in gets queued instead of
+		 * handled immediately
+		 */
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	/* Finalize properly the flip that caused the IRQ */
+	DRM_DEBUG_KMS("DRM Finalizing old_flip_res=%p\n", old_flip_res);
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(old_flip_res->fb);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	release_kds_resource_and_display(old_flip_res);
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	/* Release DMA buffer on this flip */
+
+	if (bo->gem_object.export_dma_buf != NULL)
+		dma_buf_put(bo->gem_object.export_dma_buf);
+
+	drm_handle_vblank(dev, pl111_crtc->crtc_index);
+
+	/* Wake up any processes waiting for page flip event */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (old_flip_res->event) {
+		spin_lock_bh(&dev->event_lock);
+		drm_send_vblank_event(dev, pl111_crtc->crtc_index,
+					old_flip_res->event);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#else
+	if (old_flip_res->event) {
+		struct drm_pending_vblank_event *e = old_flip_res->event;
+		struct timeval now;
+		unsigned int seq;
+
+		DRM_DEBUG_KMS("%s: wake up page flip event (%p)\n", __func__,
+				old_flip_res->event);
+
+		spin_lock_bh(&dev->event_lock);
+		seq = drm_vblank_count_and_time(dev, pl111_crtc->crtc_index,
+							&now);
+		e->pipe = pl111_crtc->crtc_index;
+		e->event.sequence = seq;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_add_tail(&e->base.link,
+			&e->base.file_priv->event_list);
+
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#endif
+
+	drm_vblank_put(dev, pl111_crtc->crtc_index);
+
+	/*
+	 * workqueue.c:process_one_work():
+	 * "It is permissible to free the struct work_struct from
+	 *  inside the function that is called from it"
+	 */
+	kmem_cache_free(priv.page_flip_slab, old_flip_res);
+
+	flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
+	if (flips_in_flight == 0 ||
+			flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
+		wake_up(&priv.wait_for_flips);
+
+	DRM_DEBUG_KMS("DRM release flip_res=%p\n", old_flip_res);
+}
+
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
+{
+	struct pl111_drm_flip_resource *flip_res = cb1;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	pl111_crtc->show_framebuffer_cb(cb1, cb2);
+}
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb, bool page_flip,
+				struct drm_pending_vblank_event *event)
+{
+	struct pl111_gem_bo *bo;
+	struct pl111_drm_flip_resource *flip_res;
+	int flips_in_flight;
+	int old_flips_in_flight;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	crtc->fb = fb;
+#else
+	crtc->primary->fb = fb;
+#endif
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	if (bo == NULL) {
+		DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
+		return -EINVAL;
+	}
+
+	/* If this is a full modeset, wait for all outstanding flips to complete
+	 * before continuing. This avoids unnecessary complication from being
+	 * able to queue up multiple modesets and queues of mixed modesets and
+	 * page flips.
+	 *
+	 * Modesets should be uncommon and will not be performant anyway, so
+	 * making them synchronous should have negligible performance impact.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				atomic_read(&priv.nr_flips_in_flight) == 0);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * There can be more 'early display' flips in flight than there are
+	 * buffers, and there is (currently) no explicit bound on the number of
+	 * flips. Hence, we need a new allocation for each one.
+	 *
+	 * Note: this could be optimized down if we knew a bound on the flips,
+	 * since an application can only have so many buffers in flight to be
+	 * useful/not hog all the memory
+	 */
+	flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
+	if (flip_res == NULL) {
+		pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * increment flips in flight, whilst blocking when we reach
+	 * NR_FLIPS_IN_FLIGHT_THRESHOLD
+	 */
+	do {
+		/*
+		 * Note: use of assign-and-then-compare in the condition to set
+		 * flips_in_flight
+		 */
+		int ret = wait_event_killable(priv.wait_for_flips,
+				(flips_in_flight =
+					atomic_read(&priv.nr_flips_in_flight))
+				< NR_FLIPS_IN_FLIGHT_THRESHOLD);
+		if (ret != 0) {
+			kmem_cache_free(priv.page_flip_slab, flip_res);
+			return ret;
+		}
+
+		old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
+					flips_in_flight, flips_in_flight + 1);
+	} while (old_flips_in_flight != flips_in_flight);
+
+	flip_res->fb = fb;
+	flip_res->crtc = crtc;
+	flip_res->page_flip = page_flip;
+	flip_res->event = event;
+	INIT_LIST_HEAD(&flip_res->link);
+	DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+		unsigned long shared[1] = { 0 };
+		struct kds_resource *resource_list[1] = {
+				get_dma_buf_kds_resource(buf) };
+		int err;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+
+		/* Wait for the KDS resource associated with this buffer */
+		err = kds_async_waitall(&flip_res->kds_res_set,
+					&priv.kds_cb, flip_res, fb, 1, shared,
+					resource_list);
+		BUG_ON(err);
+	} else {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+
+		/* No dma-buf attached so just call the callback directly */
+		flip_res->kds_res_set = NULL;
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#else
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+	} else {
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+	}
+
+	/* No dma-buf attached to this so just call the callback directly */
+	{
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#endif
+
+	/* For the same reasons as the wait at the start of this function,
+	 * wait for the modeset to complete before continuing.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				flips_in_flight == 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+			struct drm_pending_vblank_event *event)
+#else
+			struct drm_pending_vblank_event *event,
+			uint32_t flags)
+#endif
+{
+	DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
+			__func__, crtc, fb, event);
+	return show_framebuffer_on_crtc(crtc, fb, true, event);
+}
+
+int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode,
+				int x, int y, struct drm_framebuffer *old_fb)
+{
+	int ret;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+	struct drm_display_mode *duplicated_mode;
+
+	DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
+			adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel);
+#else
+			crtc->primary->fb->bits_per_pixel);
+#endif
+
+	duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
+	if (!duplicated_mode)
+		return -ENOMEM;
+
+	pl111_crtc->new_mode = duplicated_mode;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
+#else
+	ret = show_framebuffer_on_crtc(crtc, crtc->primary->fb, false, NULL);
+#endif
+	if (ret != 0) {
+		pl111_crtc->new_mode = pl111_crtc->current_mode;
+		drm_mode_destroy(crtc->dev, duplicated_mode);
+	}
+
+	return ret;
+}
+
+void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+void pl111_crtc_helper_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+				const struct drm_display_mode *mode,
+#else
+				struct drm_display_mode *mode,
+#endif
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+#ifdef CONFIG_ARCH_VEXPRESS
+	/*
+	 * 1024x768 with more than 16 bits per pixel may not work 
+	 * correctly on Versatile Express due to bandwidth issues
+	 */
+	if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel > 16) {
+#else
+			crtc->primary->fb->bits_per_pixel > 16) {
+#endif
+		DRM_INFO("*WARNING* 1024x768 at > 16 bpp may suffer corruption\n");
+	}
+#endif
+
+	return true;
+}
+
+void pl111_crtc_helper_disable(struct drm_crtc *crtc)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	/* don't disable crtc until no flips in flight as irq will be disabled */
+	ret = wait_event_killable(priv.wait_for_flips, atomic_read(&priv.nr_flips_in_flight) == 0);
+	if(ret) {
+		pr_err("pl111_crtc_helper_disable failed\n");
+		return;
+	}
+	clcd_disable(crtc);
+}
+
+void pl111_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(pl111_crtc);
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+	.cursor_set = pl111_crtc_cursor_set,
+	.cursor_move = pl111_crtc_cursor_move,
+	.set_config = drm_crtc_helper_set_config,
+	.page_flip = pl111_crtc_page_flip,
+	.destroy = pl111_crtc_destroy
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+	.mode_set = pl111_crtc_helper_mode_set,
+	.prepare = pl111_crtc_helper_prepare,
+	.commit = pl111_crtc_helper_commit,
+	.mode_fixup = pl111_crtc_helper_mode_fixup,
+	.disable = pl111_crtc_helper_disable,
+};
+
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
+{
+	struct pl111_drm_crtc *pl111_crtc;
+
+	pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
+	if (pl111_crtc == NULL) {
+		pr_err("Failed to allocated pl111_drm_crtc\n");
+		return NULL;
+	}
+
+	drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
+	drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
+
+	pl111_crtc->crtc_index = pl111_crtc_num;
+	pl111_crtc_num++;
+	pl111_crtc->crtc.enabled = 0;
+	pl111_crtc->last_bpp = 0;
+	pl111_crtc->current_update_res = NULL;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	pl111_crtc->displaying_fb = NULL;
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_lock_init(&pl111_crtc->current_displaying_lock);
+#endif
+	pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
+	INIT_LIST_HEAD(&pl111_crtc->update_queue);
+	spin_lock_init(&pl111_crtc->base_update_lock);
+
+	return pl111_crtc;
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
new file mode 100644
index 0000000..4bf20fe
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_cursor.c
@@ -0,0 +1,331 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_cursor.c
+ * Implementation of cursor functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_clcd_ext.h"
+#include "pl111_drm.h"
+
+#define PL111_MAX_CURSOR_WIDTH (64)
+#define PL111_MAX_CURSOR_HEIGHT (64)
+
+#define ARGB_2_LBBP_BINARY_THRESHOLD	(1 << 7)
+#define ARGB_ALPHA_SHIFT		24
+#define ARGB_ALPHA_MASK			(0xff << ARGB_ALPHA_SHIFT)
+#define ARGB_RED_SHIFT			16
+#define ARGB_RED_MASK			(0xff << ARGB_RED_SHIFT)
+#define ARGB_GREEN_SHIFT		8
+#define ARGB_GREEN_MASK			(0xff << ARGB_GREEN_SHIFT)
+#define ARGB_BLUE_SHIFT			0
+#define ARGB_BLUE_MASK			(0xff << ARGB_BLUE_SHIFT)
+
+
+void pl111_set_cursor_size(enum pl111_cursor_size size)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (size == CURSOR_64X64)
+		reg_data |= CRSR_CONFIG_CRSR_SIZE;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (sync == CURSOR_SYNC_VSYNC)
+		reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor(u32 cursor)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
+	reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_enable(bool enable)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	if (enable)
+		reg_data |= CRSR_CTRL_CRSR_ON;
+	else
+		reg_data &= ~CRSR_CTRL_CRSR_ON;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_position(u32 x, u32 y)
+{
+	u32 reg_data = (x & CRSR_XY_MASK) |
+			((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_XY);
+}
+
+void pl111_set_cursor_clipping(u32 x, u32 y)
+{
+	u32 reg_data;
+
+	/* 
+	 * Do not allow setting clipping values larger than
+	 * the cursor size since the cursor is already fully hidden
+	 * when x,y = PL111_MAX_CURSOR_WIDTH.
+	 */
+	if (x > PL111_MAX_CURSOR_WIDTH)
+		x = PL111_MAX_CURSOR_WIDTH;
+	if (y > PL111_MAX_CURSOR_WIDTH)
+		y = PL111_MAX_CURSOR_WIDTH;
+
+	reg_data = (x & CRSR_CLIP_MASK) |
+			((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
+}
+
+void pl111_set_cursor_palette(u32 color0, u32 color1)
+{
+	writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
+	writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
+}
+
+void pl111_cursor_enable(void)
+{
+	pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
+	pl111_set_cursor_size(CURSOR_64X64);
+	pl111_set_cursor_palette(0x0, 0x00ffffff);
+	pl111_set_cursor_enable(true);
+}
+
+void pl111_cursor_disable(void)
+{
+	pl111_set_cursor_enable(false);
+}
+
+/* shift required to locate pixel into the correct position in
+ * a cursor LBBP word, indexed by x mod 16.
+ */
+static const unsigned char
+x_mod_16_to_value_shift[CLCD_CRSR_IMAGE_PIXELS_PER_WORD] = {
+	6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24
+};
+
+/* Pack the pixel value into its correct position in the buffer as specified
+ * for LBBP */
+static inline void
+set_lbbp_pixel(uint32_t *buffer, unsigned int x, unsigned int y,
+				uint32_t value)
+{
+	u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
+	uint32_t shift;
+	uint32_t data;
+
+	shift = x_mod_16_to_value_shift[x % CLCD_CRSR_IMAGE_PIXELS_PER_WORD];
+
+	/* Get the word containing this pixel */
+	cursor_ram = cursor_ram + (x >> CLCD_CRSR_IMAGE_WORDS_PER_LINE) + (y << 2);
+
+	/* Update pixel in cursor RAM */
+	data = readl(cursor_ram);
+	data &= ~(CLCD_CRSR_LBBP_COLOR_MASK << shift);
+	data |= value << shift;
+	writel(data, cursor_ram);
+}
+
+static u32 pl111_argb_to_lbbp(u32 argb_pix)
+{
+	u32 lbbp_pix = CLCD_CRSR_LBBP_TRANSPARENT;
+	u32 alpha = (argb_pix & ARGB_ALPHA_MASK) >> ARGB_ALPHA_SHIFT;
+	u32 red = (argb_pix & ARGB_RED_MASK) >> ARGB_RED_SHIFT;
+	u32 green = (argb_pix & ARGB_GREEN_MASK) >> ARGB_GREEN_SHIFT;
+	u32 blue =  (argb_pix & ARGB_BLUE_MASK) >> ARGB_BLUE_SHIFT;
+
+	/*
+	 * Converting from 8 pixel transparency to binary transparency
+	 * it's the best we can achieve.
+	 */
+	if (alpha & ARGB_2_LBBP_BINARY_THRESHOLD) {
+		u32 gray, max, min;
+
+		/*
+		 * Convert to gray using the lightness method:
+		 * gray = [max(R,G,B) + min(R,G,B)]/2
+		 */
+		min = min(red, green);
+		min = min(min, blue);
+		max = max(red, green);
+		max = max(max, blue);
+		gray = (min + max) >> 1; /* divide by 2 */
+		/* Apply binary threshold to the gray value calculated */
+		if (gray & ARGB_2_LBBP_BINARY_THRESHOLD)
+			lbbp_pix = CLCD_CRSR_LBBP_FOREGROUND;
+		else
+			lbbp_pix = CLCD_CRSR_LBBP_BACKGROUND;
+	}
+
+	return lbbp_pix;
+}
+
+/*
+ * The PL111 hardware cursor supports only LBBP which is a 2bpp format but
+ * the cursor format from userspace is ARGB8888 so we need to convert
+ *  to LBBP here.
+ */
+static void pl111_set_cursor_image(u32 *data)
+{
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+	/* Add 1 on width to insert trailing NULL */
+	char string_cursor[PL111_MAX_CURSOR_WIDTH + 1];
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	unsigned int x;
+	unsigned int y;
+
+	for (y = 0; y < PL111_MAX_CURSOR_HEIGHT; y++) {
+		for (x = 0; x < PL111_MAX_CURSOR_WIDTH; x++) {
+			u32 value = pl111_argb_to_lbbp(*data);
+
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+			if (value == CLCD_CRSR_LBBP_TRANSPARENT)
+				string_cursor[x] = 'T';
+			else if (value == CLCD_CRSR_LBBP_FOREGROUND)
+				string_cursor[x] = 'F';
+			else if (value == CLCD_CRSR_LBBP_INVERSE)
+				string_cursor[x] = 'I';
+			else
+				string_cursor[x] = 'B';
+
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+			set_lbbp_pixel(data, x, y, value);
+			++data;
+		}
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+		string_cursor[PL111_MAX_CURSOR_WIDTH] = '\0';
+		DRM_INFO("%s\n", string_cursor);
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	}
+}
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height)
+{
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("handle = %u, width = %u, height = %u\n",
+		handle, width, height);
+
+	if (!handle) {
+		pl111_cursor_disable();
+		return 0;
+	}
+
+	if ((width != PL111_MAX_CURSOR_WIDTH) ||
+	    (height != PL111_MAX_CURSOR_HEIGHT))
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object for handle = %d\n",
+			  handle);
+		return -ENOENT;
+	}
+
+	/*
+	 * We expect a PL111_MAX_CURSOR_WIDTH x PL111_MAX_CURSOR_HEIGHT
+	 * ARGB888 buffer object in the input.
+	 *
+	 */
+	if (obj->size < (PL111_MAX_CURSOR_WIDTH * PL111_MAX_CURSOR_HEIGHT * 4)) {
+		DRM_ERROR("Cannot set cursor with an obj size = %d\n",
+			  obj->size);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	bo = PL111_BO_FROM_GEM(obj);
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Tried to set cursor with non DMA backed obj = %p\n",
+			  obj);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
+
+	/*
+	 * Since we copy the contents of the buffer to the HW cursor internal
+	 * memory this GEM object is not needed anymore.
+	 */
+	drm_gem_object_unreference_unlocked(obj);
+
+	pl111_cursor_enable();
+
+	return 0;
+}
+
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y)
+{
+	int x_clip = 0;
+	int y_clip = 0;
+
+	DRM_DEBUG("x %d y %d\n", x, y);
+
+	/*
+	 * The cursor image is clipped automatically at the screen limits when
+	 * it extends beyond the screen image to the right or bottom but
+	 * we must clip it using pl111 HW features for negative values.
+	 */
+	if (x < 0) {
+		x_clip = -x;
+		x = 0;
+	}
+	if (y < 0) {
+		y_clip = -y;
+		y = 0;
+	}
+
+	pl111_set_cursor_clipping(x_clip, y_clip);
+	pl111_set_cursor_position(x, y);
+
+	return 0;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
new file mode 100644
index 0000000..6619c07
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
@@ -0,0 +1,338 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_device.c
+ * Implementation of the Linux device driver entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+struct pl111_drm_dev_private priv;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void initial_kds_obtained(void *cb1, void *cb2)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
+	bool *cb_has_called = (bool *) cb2;
+
+	*cb_has_called = true;
+	wake_up(wait);
+}
+
+/* Must be called from within current_displaying_lock spinlock */
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	pl111_crtc->displaying_fb = flip_res->fb;
+
+	/* Release the previous buffer */
+	if (pl111_crtc->old_kds_res_set != NULL) {
+		/*
+		 * Can flip to the same buffer, but must not release the current
+		 * resource set
+		 */
+		BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+	}
+	/* Record the current buffer, to release on the next buffer flip */
+	pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
+}
+#endif
+
+void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+void pl111_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+/*
+ * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
+ * and disable_vblank are just no op callbacks.
+ */
+static int pl111_enable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+	return 0;
+}
+
+static void pl111_disable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+}
+
+struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = pl111_fb_create,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	struct pl111_drm_connector *pl111_connector;
+	struct pl111_drm_encoder *pl111_encoder;
+	int ret = 0;
+
+	if (priv == NULL)
+		return -EINVAL;
+
+	drm_mode_config_init(dev);
+	mode_config = &dev->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 1024;
+	mode_config->min_height = 1;
+	mode_config->max_height = 768;
+
+	priv->pl111_crtc = pl111_crtc_create(dev);
+	if (priv->pl111_crtc == NULL) {
+		pr_err("Failed to create pl111_drm_crtc\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	priv->number_crtcs = 1;
+
+	pl111_connector = pl111_connector_create(dev);
+	if (pl111_connector == NULL) {
+		pr_err("Failed to create pl111_drm_connector\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	pl111_encoder = pl111_encoder_create(dev, 1);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to create pl111_drm_encoder\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
+						&pl111_encoder->encoder);
+	if (ret != 0) {
+		DRM_ERROR("Failed to attach encoder\n");
+		goto out_config;
+	}
+
+	pl111_connector->connector.encoder = &pl111_encoder->encoder;
+
+	pl111_encoder->encoder.crtc = &priv->pl111_crtc->crtc;
+
+	goto finish;
+
+out_config:
+	drm_mode_config_cleanup(dev);
+finish:
+	DRM_DEBUG("%s returned %d\n", __func__, ret);
+	return ret;
+}
+
+static void pl111_modeset_fini(struct drm_device *dev)
+{
+	drm_mode_config_cleanup(dev);
+}
+
+static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
+{
+	int ret = 0;
+
+	pr_info("DRM %s\n", __func__);
+
+	mutex_init(&priv.export_dma_buf_lock);
+	atomic_set(&priv.nr_flips_in_flight, 0);
+	init_waitqueue_head(&priv.wait_for_flips);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
+	if (ret != 0) {
+		pr_err("Failed to initialise KDS callback\n");
+		goto finish;
+	}
+
+	ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
+				initial_kds_obtained);
+	if (ret != 0) {
+		pr_err("Failed to init KDS obtain callback\n");
+		kds_callback_term(&priv.kds_cb);
+		goto finish;
+	}
+#endif
+
+	/* Create a cache for page flips */
+	priv.page_flip_slab = kmem_cache_create("page flip slab",
+			sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
+	if (priv.page_flip_slab == NULL) {
+		DRM_ERROR("Failed to create slab\n");
+		ret = -ENOMEM;
+		goto out_kds_callbacks;
+	}
+
+	dev->dev_private = &priv;
+
+	ret = pl111_modeset_init(dev);
+	if (ret != 0) {
+		pr_err("Failed to init modeset\n");
+		goto out_slab;
+	}
+
+	ret = pl111_device_init(dev);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init MMIO and IRQ\n");
+		goto out_modeset;
+	}
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init vblank\n");
+		goto out_vblank;
+	}
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 13, 0))
+	platform_set_drvdata(dev->platformdev, dev);
+#endif
+
+	goto finish;
+
+out_vblank:
+	pl111_device_fini(dev);
+out_modeset:
+	pl111_modeset_fini(dev);
+out_slab:
+	kmem_cache_destroy(priv.page_flip_slab);
+out_kds_callbacks:
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+finish:
+	DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
+	return ret;
+}
+
+static int pl111_drm_unload(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+	kmem_cache_destroy(priv.page_flip_slab);
+
+	drm_vblank_cleanup(dev);
+	pl111_modeset_fini(dev);
+	pl111_device_fini(dev);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+	return 0;
+}
+
+static struct vm_operations_struct pl111_gem_vm_ops = {
+	.fault = pl111_gem_fault,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+#else
+	.open = pl111_gem_vm_open,
+	.close = pl111_gem_vm_close,
+#endif
+};
+
+static const struct file_operations drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = pl111_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+};
+
+static struct drm_ioctl_desc pl111_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(PL111_GEM_CREATE, pl111_drm_gem_create_ioctl,
+		DRM_CONTROL_ALLOW | DRM_UNLOCKED),
+};
+
+static struct drm_driver driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.load = pl111_drm_load,
+	.unload = pl111_drm_unload,
+	.context_dtor = NULL,
+	.preclose = pl111_drm_preclose,
+	.lastclose = pl111_drm_lastclose,
+	.suspend = pl111_drm_suspend,
+	.resume = pl111_drm_resume,
+	.get_vblank_counter = drm_vblank_count,
+	.enable_vblank = pl111_enable_vblank,
+	.disable_vblank = pl111_disable_vblank,
+	.ioctls = pl111_ioctls,
+	.fops = &drm_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+	.dumb_create = pl111_dumb_create,
+	.dumb_destroy = pl111_dumb_destroy,
+	.dumb_map_offset = pl111_dumb_map_offset,
+	.gem_free_object = pl111_gem_free_object,
+	.gem_vm_ops = &pl111_gem_vm_ops,
+	.prime_handle_to_fd = &pl111_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = &pl111_gem_prime_export,
+	.gem_prime_import = &pl111_gem_prime_import,
+};
+
+int pl111_drm_init(struct platform_device *dev)
+{
+	int ret;
+	pr_info("DRM %s\n", __func__);
+	pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
+		DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
+	driver.num_ioctls = ARRAY_SIZE(pl111_ioctls);
+	ret = 0;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 15, 0))
+	driver.kdriver.platform_device = dev;
+#endif
+	return drm_platform_init(&driver, dev);
+
+}
+
+void pl111_drm_exit(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_platform_exit(&driver, dev);
+#else
+	drm_put_dev(platform_get_drvdata(dev));
+#endif
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
new file mode 100644
index 0000000..1131f46
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
@@ -0,0 +1,625 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_dma_buf.c
+ * Implementation of the dma_buf functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void obtain_kds_if_currently_displayed(struct drm_device *dev,
+						struct pl111_gem_bo *bo,
+						struct dma_buf *dma_buf)
+{
+	unsigned long shared[1] = { 0 };
+	struct kds_resource *resource_list[1];
+	struct kds_resource_set *kds_res_set;
+	struct drm_crtc *crtc;
+	bool cb_has_called = false;
+	unsigned long flags;
+	int err;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	DRM_DEBUG_KMS("Obtaining initial KDS res for bo:%p dma_buf:%p\n",
+			bo, dma_buf);
+
+	resource_list[0] = get_dma_buf_kds_resource(dma_buf);
+	get_dma_buf(dma_buf);
+
+	/*
+	 * Can't use kds_waitall(), because kbase will be let through due to
+	 * locked ignore'
+	 */
+	err = kds_async_waitall(&kds_res_set,
+				&priv.kds_obtain_current_cb, &wake,
+				&cb_has_called, 1, shared, resource_list);
+	BUG_ON(err);
+	wait_event(wake, cb_has_called == true);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+
+			if (pl111_fb->bo == bo) {
+				DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+				DRM_DEBUG_KMS(" is being displayed, keeping\n");
+				/* There shouldn't be a previous buffer to release */
+				BUG_ON(pl111_crtc->old_kds_res_set);
+
+				if (kds_res_set == NULL) {
+					err = kds_async_waitall(&kds_res_set,
+							&priv.kds_obtain_current_cb,
+							&wake, &cb_has_called,
+							1, shared, resource_list);
+					BUG_ON(err);
+					wait_event(wake, cb_has_called == true);
+				}
+
+				/* Current buffer will need releasing on next flip */
+				pl111_crtc->old_kds_res_set = kds_res_set;
+
+				/*
+				* Clear kds_res_set, so a new kds_res_set is allocated
+				* for additional CRTCs
+				*/
+				kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+
+	/* kds_res_set will be NULL here if any CRTCs are displaying fb */
+	if (kds_res_set != NULL) {
+		DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+		DRM_DEBUG_KMS(" not being displayed, discarding\n");
+		/* They're not being displayed, release them */
+		kds_resource_set_release(&kds_res_set);
+	}
+
+	dma_buf_put(dma_buf);
+}
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0))
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	mutex_unlock(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#else
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/* Check for valid size. */
+	if (obj->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	BUG_ON(!dev->driver->gem_vm_ops);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+#endif
+
+	vma->vm_ops = dev->driver->gem_vm_ops;
+	vma->vm_private_data = obj;
+
+	/* Take a ref for this mapping of the object, so that the fault
+	* handler can dereference the mmap offset's pointer to the object.
+	* This reference is cleaned up by the corresponding vm_close
+	* (which should happen whether the vma was created by this call, or
+	* by a vm_open due to mremap or partial unmap or whatever).
+	*/
+	drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+	pl111_drm_vm_open_locked(dev, vma);
+#else
+	drm_vm_open_locked(dev, vma);
+#endif
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#endif /* KERNEL_VERSION */
+
+static void pl111_dma_buf_release(struct dma_buf *buf)
+{
+	/*
+	 * Need to release the dma_buf's reference on the gem object it was
+	 * exported from, and also clear the gem object's export_dma_buf
+	 * pointer to this dma_buf as it no longer exists
+	 */
+	struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
+	struct pl111_gem_bo *bo;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+			if (pl111_fb->bo == bo) {
+				kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+				pl111_crtc->old_kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+#endif
+	mutex_lock(&priv.export_dma_buf_lock);
+
+	obj->export_dma_buf = NULL;
+	drm_gem_object_unreference_unlocked(obj);
+
+	mutex_unlock(&priv.export_dma_buf_lock);
+}
+
+static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
+			dev, attach);
+
+	attach->priv = dev;
+
+	return 0;
+}
+
+static void pl111_dma_buf_detach(struct dma_buf *buf,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
+			attach);
+}
+
+/* Heavily from exynos_drm_dmabuf.c */
+static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
+						*attach,
+						enum dma_data_direction
+						direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int size, n_pages, nents;
+	struct scatterlist *s, *sg;
+	struct sg_table *sgt;
+	int ret, i;
+
+	DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+		      attach, bo);
+
+	/*
+	 * Nothing to do, if we are trying to map a dmabuf that has been imported.
+	 * Just return the existing sgt.
+	 */
+	if (obj->import_attach) {
+		BUG_ON(!bo->sgt);
+		return bo->sgt;
+	}
+
+	size = obj->size;
+	n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	if (bo->type & PL111_BOT_DMA) {
+		sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+		if (!sgt) {
+			DRM_ERROR("Failed to allocate sg_table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+		if (ret < 0) {
+			DRM_ERROR("Failed to allocate page table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		sg_dma_len(sgt->sgl) = size;
+		/* We use DMA coherent mappings for PL111_BOT_DMA so we must
+		 * use the virtual address returned at buffer allocation */
+		sg_set_buf(sgt->sgl, bo->backing_data.dma.fb_cpu_addr, size);
+		sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
+	} else { /* PL111_BOT_SHM */
+		struct page **pages;
+		int pg = 0;
+
+		mutex_lock(&dev->struct_mutex);
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev, "could not get pages: %ld\n",
+				PTR_ERR(pages));
+			return ERR_CAST(pages);
+		}
+		sgt = drm_prime_pages_to_sg(pages, n_pages);
+		if (sgt == NULL)
+			return ERR_PTR(-ENOMEM);
+
+		pl111_gem_sync_to_dma(bo);
+
+		/*
+		 * At this point the pages have been dma-mapped by either
+		 * get_pages() for non cached maps or pl111_gem_sync_to_dma()
+		 * for cached. So the physical addresses can be assigned
+		 * to the sg entries.
+		 * drm_prime_pages_to_sg() may have combined contiguous pages
+		 * into chunks so we assign the physical address of the first
+		 * page of a chunk to the chunk and check that the physical
+		 * addresses of the rest of the pages in that chunk are also
+		 * contiguous.
+		 */
+		sg = sgt->sgl;
+		nents = sgt->nents;
+
+		for_each_sg(sg, s, nents, i) {
+			int j, n_pages_in_chunk = sg_dma_len(s) >> PAGE_SHIFT;
+
+			sg_dma_address(s) = bo->backing_data.shm.dma_addrs[pg];
+
+			for (j = pg+1; j < pg+n_pages_in_chunk; j++) {
+				BUG_ON(bo->backing_data.shm.dma_addrs[j] !=
+						bo->backing_data.shm.dma_addrs[j-1]+PAGE_SIZE);
+			}
+
+			pg += n_pages_in_chunk;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+	}
+	bo->sgt = sgt;
+	return sgt;
+}
+
+static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					struct sg_table *sgt,
+					enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+			attach, bo);
+
+	sg_free_table(sgt);
+	kfree(sgt);
+	bo->sgt = NULL;
+}
+
+/*
+ * There isn't any operation here that can sleep or fail so this callback can
+ * be used for both kmap and kmap_atomic implementations.
+ */
+static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long pageno)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	void *vaddr = NULL;
+
+	/* Make sure we cannot access outside the memory range */
+	if (((pageno + 1) << PAGE_SHIFT) > bo->gem_object.size)
+		return NULL;
+
+	if (bo->type & PL111_BOT_DMA) {
+		vaddr = (bo->backing_data.dma.fb_cpu_addr +
+						(pageno << PAGE_SHIFT));
+	} else {
+		vaddr = page_address(bo->backing_data.shm.pages[pageno]);
+	}
+
+	return vaddr;
+}
+
+/*
+ * Find a scatterlist that starts in "start" and has "len"
+ * or return a NULL dma_handle.
+ */
+static dma_addr_t pl111_find_matching_sg(struct sg_table *sgt, size_t start,
+					size_t len)
+{
+	struct scatterlist *sg;
+	unsigned int count;
+	size_t size = 0;
+	dma_addr_t dma_handle = 0;
+
+	/* Find a scatterlist that starts in "start" and has "len"
+	* or return error */
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		if ((size == start) && (len == sg_dma_len(sg))) {
+			dma_handle = sg_dma_address(sg);
+			break;
+		}
+		size += sg_dma_len(sg);
+	}
+	return dma_handle;
+}
+
+static int pl111_dma_buf_begin_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return -EINVAL;
+
+	if (!(bo->type & PL111_BOT_SHM)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return -EINVAL;
+
+		dma_sync_single_range_for_cpu(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+	return 0;
+}
+
+static void pl111_dma_buf_end_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return;
+
+	if (!(bo->type & PL111_BOT_DMA)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return;
+
+		dma_sync_single_range_for_device(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+}
+
+static struct dma_buf_ops pl111_dma_buf_ops = {
+	.release = &pl111_dma_buf_release,
+	.attach = &pl111_dma_buf_attach,
+	.detach = &pl111_dma_buf_detach,
+	.map_dma_buf = &pl111_dma_buf_map_dma_buf,
+	.unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
+	.kmap_atomic = &pl111_dma_buf_kmap,
+	.kmap = &pl111_dma_buf_kmap,
+	.begin_cpu_access = &pl111_dma_buf_begin_cpu,
+	.end_cpu_access = &pl111_dma_buf_end_cpu,
+	.mmap = &pl111_dma_buf_mmap,
+};
+
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attachment;
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+	struct scatterlist *sgl;
+	struct sg_table *sgt;
+	dma_addr_t cont_phys;
+	int ret = 0;
+	int i;
+
+	DRM_DEBUG_KMS("DRM %s on dev=%p dma_buf=%p\n", __func__, dev, dma_buf);
+
+	/* is this one of own objects? */
+	if (dma_buf->ops == &pl111_dma_buf_ops) {
+		obj = dma_buf->priv;
+		/* is it from our device? */
+		if (obj->dev == dev) {
+			/*
+			* Importing dmabuf exported from our own gem increases
+			* refcount on gem itself instead of f_count of dmabuf.
+			*/
+			drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+			/* before v3.10.0 we assume the caller has taken a ref on the dma_buf
+			 * we don't want it for self-imported buffers so drop it here */
+			dma_buf_put(dma_buf);
+#endif
+
+			return obj;
+		}
+	}
+
+	attachment = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attachment))
+		return ERR_CAST(attachment);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we assume the caller has not taken a ref so we take one here */
+	get_dma_buf(dma_buf);
+#endif
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto err_buf_detach;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		DRM_ERROR("%s: failed to allocate buffer object.\n", __func__);
+		ret = -ENOMEM;
+		goto err_unmap_attach;
+	}
+
+	/* Find out whether the buffer is contiguous or not */
+	sgl = sgt->sgl;
+	cont_phys = sg_phys(sgl);
+	bo->type |= PL111_BOT_DMA;
+	for_each_sg(sgt->sgl, sgl, sgt->nents, i) {
+		dma_addr_t real_phys = sg_phys(sgl);
+		if (real_phys != cont_phys) {
+			bo->type &= ~PL111_BOT_DMA;
+			break;
+		}
+		cont_phys += (PAGE_SIZE - sgl->offset);
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	ret = drm_gem_private_object_init(dev, &bo->gem_object,
+					  dma_buf->size);
+	if (ret != 0) {
+		DRM_ERROR("DRM could not import DMA GEM obj\n");
+		goto err_free_buffer;
+	}
+#else
+	drm_gem_private_object_init(dev, &bo->gem_object, dma_buf->size);
+#endif
+
+	if (bo->type & PL111_BOT_DMA) {
+		bo->backing_data.dma.fb_cpu_addr = sg_virt(sgt->sgl);
+		bo->backing_data.dma.fb_dev_addr = sg_phys(sgt->sgl);
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, contiguous import\n", __func__, bo);
+	} else { /* PL111_BOT_SHM */
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, non contiguous import\n", __func__, bo);
+	}
+
+	bo->gem_object.import_attach = attachment;
+	bo->sgt = sgt;
+
+	return &bo->gem_object;
+
+err_free_buffer:
+	kfree(bo);
+	bo = NULL;
+err_unmap_attach:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+err_buf_detach:
+	dma_buf_detach(dma_buf, attachment);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we will have taken a ref so drop it here */
+	dma_buf_put(dma_buf);
+#endif
+	return ERR_PTR(ret);
+}
+
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				      struct drm_gem_object *obj, int flags)
+{
+	struct dma_buf *new_buf;
+	struct pl111_gem_bo *bo;
+	size_t size;
+
+	DRM_DEBUG("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
+	size = obj->size;
+
+	new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
+					flags | O_RDWR);
+	bo = PL111_BO_FROM_GEM(new_buf->priv);
+
+	/*
+	 * bo->gem_object.export_dma_buf not setup until after gem_prime_export
+	 * finishes
+	 */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Ensure that we hold the kds resource if it's the currently
+	 * displayed buffer.
+	 */
+	obtain_kds_if_currently_displayed(dev, bo, new_buf);
+#endif
+
+	DRM_DEBUG("Created dma_buf %p\n", new_buf);
+
+	return new_buf;
+}
+
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+				uint32_t handle, uint32_t flags, int *prime_fd)
+{
+	int result;
+	/*
+	 * This will re-use any existing exports, and calls
+	 * driver->gem_prime_export to do the first export when needed
+	 */
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	mutex_lock(&priv.export_dma_buf_lock);
+	result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
+						prime_fd);
+	mutex_unlock(&priv.export_dma_buf_lock);
+
+	return result;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
new file mode 100644
index 0000000..78c91c0
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_encoder.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_encoder.c
+ * Implementation of the encoder functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+	return true;
+}
+
+void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_commit(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_disable(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct pl111_drm_encoder *pl111_encoder =
+					PL111_ENCODER_FROM_ENCODER(encoder);
+
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(pl111_encoder);
+}
+
+const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = pl111_encoder_destroy,
+};
+
+const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.mode_fixup = pl111_encoder_helper_mode_fixup,
+	.prepare = pl111_encoder_helper_prepare,
+	.commit = pl111_encoder_helper_commit,
+	.mode_set = pl111_encoder_helper_mode_set,
+	.disable = pl111_encoder_helper_disable,
+};
+
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs)
+{
+	struct pl111_drm_encoder *pl111_encoder;
+
+	pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to allocated pl111_drm_encoder\n");
+		return NULL;
+	}
+
+	drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
+				DRM_MODE_ENCODER_DAC);
+
+	drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
+
+	pl111_encoder->encoder.possible_crtcs = possible_crtcs;
+
+	return pl111_encoder;
+}
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
new file mode 100644
index 0000000..f575c9e
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_fb.c
@@ -0,0 +1,202 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_fb.c
+ * Implementation of the framebuffer functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_crtc.h>
+#include "pl111_drm.h"
+
+static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct pl111_drm_framebuffer *pl111_fb;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
+
+	pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
+
+	/*
+	 * Because flips are deferred, wait for all previous flips to complete
+	 */
+	wait_event(priv.wait_for_flips,
+			atomic_read(&priv.nr_flips_in_flight) == 0);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Release KDS resources if it's currently being displayed. Only occurs
+	 * when the last framebuffer is destroyed.
+	 */
+	list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb == framebuffer) {
+			/* Release the current buffers */
+			if (pl111_crtc->old_kds_res_set != NULL) {
+				DRM_DEBUG_KMS("Releasing KDS resources for ");
+				DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
+				kds_resource_set_release(
+					&pl111_crtc->old_kds_res_set);
+			}
+			pl111_crtc->old_kds_res_set = NULL;
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock,
+								flags);
+	}
+#endif
+	drm_framebuffer_cleanup(framebuffer);
+
+	if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
+		drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
+
+	kfree(pl111_fb);
+
+	DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
+}
+
+static int pl111_fb_create_handle(struct drm_framebuffer *fb,
+				struct drm_file *file_priv,
+				unsigned int *handle)
+{
+	struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
+
+	if (bo == NULL)
+		return -EINVAL;
+
+	return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+}
+
+const struct drm_framebuffer_funcs fb_funcs = {
+	.destroy = pl111_fb_destroy,
+	.create_handle = pl111_fb_create_handle,
+};
+
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct pl111_drm_framebuffer *pl111_fb = NULL;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_gem_object *gem_obj;
+	struct pl111_gem_bo *bo;
+	int err = 0;
+	size_t min_size;
+	int bpp;
+	int depth;
+
+	pr_info("DRM %s\n", __func__);
+	gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (gem_obj == NULL) {
+		DRM_ERROR("Could not get gem obj from handle to create fb\n");
+		err = -ENOENT;
+		goto error;
+	}
+
+	bo = PL111_BO_FROM_GEM(gem_obj);
+	drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp);
+
+	if (mode_cmd->pitches[0] < mode_cmd->width * (bpp >> 3)) {
+		DRM_ERROR("bad pitch %u for plane 0\n", mode_cmd->pitches[0]);
+		err = -EINVAL;
+		goto error;
+	}
+
+	min_size = (mode_cmd->height - 1) * mode_cmd->pitches[0]
+				+ mode_cmd->width * (bpp >> 3);
+
+	if (bo->gem_object.size < min_size) {
+		DRM_ERROR("gem obj size < min size\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* We can't scan out SHM so we can't create an fb for it */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Can't create FB for non-scanout buffer\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	switch ((char)(mode_cmd->pixel_format & 0xFF)) {
+	case 'Y':
+	case 'U':
+	case 'V':
+	case 'N':
+	case 'T':
+		DRM_ERROR("YUV formats not supported\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
+	if (pl111_fb == NULL) {
+		DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
+		err = -ENOMEM;
+		goto error;
+	}
+	fb = &pl111_fb->fb;
+
+	err = drm_framebuffer_init(dev, fb, &fb_funcs);
+	if (err) {
+		DRM_ERROR("drm_framebuffer_init failed\n");
+		kfree(fb);
+		fb = NULL;
+		goto error;
+	}
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	/* The only framebuffer formats supported by pl111
+	 * are 16 bpp or 32 bpp with 24 bit depth.
+	 * See clcd_enable()
+	 */
+	if (!((fb->bits_per_pixel == 16) ||
+		  (fb->bits_per_pixel == 32 && fb->depth == 24))) {
+		DRM_DEBUG_KMS("unsupported pixel format bpp=%d, depth=%d\n", fb->bits_per_pixel, fb->depth);
+		drm_framebuffer_cleanup(fb);
+		kfree(fb);
+		fb = NULL;
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb->bo = bo;
+
+	DRM_DEBUG_KMS("Created fb 0x%p with gem_obj 0x%p physaddr=0x%.8x\n",
+			fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
+
+	return fb;
+
+error:
+	if (gem_obj != NULL)
+		drm_gem_object_unreference_unlocked(gem_obj);
+
+	return ERR_PTR(err);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
new file mode 100644
index 0000000..494baa0
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_funcs.h
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_funcs.h
+ * Function prototypes for PL111 DRM
+ */
+
+#ifndef PL111_DRM_FUNCS_H_
+#define PL111_DRM_FUNCS_H_
+
+/* Platform Initialisation */
+int pl111_drm_init(struct platform_device *dev);
+void pl111_drm_exit(struct platform_device *dev);
+
+/* KDS Callbacks */
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
+
+/* CRTC Functions */
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
+struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
+void pl111_crtc_destroy(struct drm_crtc *crtc);
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+					struct drm_framebuffer *fb);
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+			struct drm_framebuffer *fb, bool page_flip,
+			struct drm_pending_vblank_event *event);
+
+/* Common IRQ handler */
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height);
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y);
+
+/* Connector Functions */
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
+void pl111_connector_destroy(struct drm_connector *connector);
+struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
+								*dev);
+
+/* Encoder Functions */
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs);
+struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
+							int possible_crtcs);
+void pl111_encoder_destroy(struct drm_encoder *encoder);
+
+/* Frame Buffer Functions */
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd);
+
+/* VMA Functions */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
+struct page **get_pages(struct drm_gem_object *obj);
+void put_pages(struct drm_gem_object *obj, struct page **pages);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma);
+void pl111_gem_vm_open(struct vm_area_struct *vma);
+void pl111_gem_vm_close(struct vm_area_struct *vma);
+#endif
+
+/* Suspend Functions */
+int pl111_drm_resume(struct drm_device *dev);
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
+
+/* GEM Functions */
+int pl111_dumb_create(struct drm_file *file_priv,
+			struct drm_device *dev,
+			struct drm_mode_create_dumb *args);
+int pl111_dumb_destroy(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle);
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset);
+void pl111_gem_free_object(struct drm_gem_object *obj);
+
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+			struct vm_area_struct *vma, size_t size);
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff);
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo);
+
+/* DMA BUF Functions */
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf);
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+			uint32_t handle, uint32_t flags, int *prime_fd);
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				struct drm_gem_object *obj, int flags);
+
+/* Pl111 Functions */
+void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
+					*flip_res, struct drm_framebuffer *fb);
+int clcd_disable(struct drm_crtc *crtc);
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
+int pl111_amba_remove(struct amba_device *dev);
+
+int pl111_device_init(struct drm_device *dev);
+void pl111_device_fini(struct drm_device *dev);
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing);
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode);
+#endif /* PL111_DRM_FUNCS_H_ */
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
new file mode 100644
index 0000000..13fb256
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_gem.c
@@ -0,0 +1,476 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_gem.c
+ * Implementation of the GEM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0))
+#include <drm/drm_vma_manager.h>
+#endif
+
+void pl111_gem_free_object(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	struct drm_device *dev = obj->dev;
+	DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
+
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (obj->import_attach)
+		drm_prime_gem_destroy(obj, bo->sgt);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map != NULL)
+		drm_gem_free_mmap_offset(obj);
+#else
+	drm_gem_free_mmap_offset(obj);
+#endif
+	/*
+	 * Only free the backing memory if the object has not been imported.
+	 * If it has been imported, the exporter is in charge to free that
+	 * once dmabuf's refcount becomes 0.
+	 */
+	if (obj->import_attach)
+		goto imported_out;
+
+	if (bo->type & PL111_BOT_DMA) {
+		dma_free_writecombine(dev->dev, obj->size,
+				bo->backing_data.dma.fb_cpu_addr,
+				bo->backing_data.dma.fb_dev_addr);
+	} else if (bo->backing_data.shm.pages != NULL) {
+		put_pages(obj, bo->backing_data.shm.pages);
+	}
+
+imported_out:
+	drm_gem_object_release(obj);
+
+	kfree(bo);
+
+	DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
+}
+
+static int pl111_gem_object_create(struct drm_device *dev, u64 size,
+				   u32 flags, struct drm_file *file_priv,
+				   u32 *handle)
+{
+	int ret = 0;
+	struct pl111_gem_bo *bo = NULL;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (bo == NULL) {
+		ret = -ENOMEM;
+		goto finish;
+	}
+
+	bo->type = flags;
+
+#ifndef ARCH_HAS_SG_CHAIN
+	/*
+	 * If the ARCH can't chain we can't have non-contiguous allocs larger
+	 * than a single sg can hold.
+	 * In this case we fall back to using contiguous memory
+	 */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		long unsigned int n_pages =
+				PAGE_ALIGN(size) >> PAGE_SHIFT;
+		if (n_pages > SG_MAX_SINGLE_ALLOC) {
+			bo->type |= PL111_BOT_DMA;
+			/*
+			 * Non-contiguous allocation request changed to
+			 * contigous
+			 */
+			DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
+					n_pages, SG_MAX_SINGLE_ALLOC);
+		}
+	}
+#endif
+	if (bo->type & PL111_BOT_DMA) {
+		/* scanout compatible - use physically contiguous buffer */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_attrs(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL,
+					&attrs);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_attrs failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#else
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_writecombine(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_writecombine failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+		ret = drm_gem_private_object_init(dev, &bo->gem_object,
+							size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not initialise GEM object\n");
+			goto free_dma;
+		}
+#else
+		drm_gem_private_object_init(dev, &bo->gem_object, size);
+#endif
+
+	} else { /* PL111_BOT_SHM */
+		/* not scanout compatible - use SHM backed object */
+		ret = drm_gem_object_init(dev, &bo->gem_object, size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not init SHM backed GEM obj\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+		DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
+	}
+
+	DRM_DEBUG("s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
+		size, flags,
+		(bo->type & PL111_BOT_DMA) ? "physaddr" : "shared page array",
+		(bo->type & PL111_BOT_DMA) ?
+			(unsigned long)bo->backing_data.dma.fb_dev_addr:
+			(unsigned long)bo->backing_data.shm.pages,
+			bo->type);
+
+	ret = drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+	if (ret != 0) {
+		DRM_ERROR("DRM failed to create GEM handle\n");
+		goto obj_release;
+	}
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&bo->gem_object);
+	
+	return 0;
+
+obj_release:
+	drm_gem_object_release(&bo->gem_object);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+free_dma:
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	if (bo->type & PL111_BOT_DMA) {
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr,
+			&attrs);
+		}
+#else
+	if (bo->type & PL111_BOT_DMA)
+		dma_free_writecombine(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr);
+#endif
+free_bo:
+	kfree(bo);
+finish:
+	return ret;
+}
+
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct drm_pl111_gem_create *args = data;
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags & ~PL111_BOT_MASK) {
+		DRM_ERROR("wrong flags: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("gem_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size, args->flags, file_priv,
+					&args->handle);
+}
+
+int pl111_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags) {
+		DRM_ERROR("flags must be zero: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size,
+				       PL111_BOT_DMA | PL111_BOT_UNCACHED,
+				       file_priv, &args->handle);
+}
+
+int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
+		uint32_t handle)
+{
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset)
+{
+	struct drm_gem_object *obj;
+	int ret = 0;
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	/* GEM does all our handle to object mapping */
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (obj == NULL) {
+		ret = -ENOENT;
+		goto fail;
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map == NULL) {
+		ret = drm_gem_create_mmap_offset(obj);
+		if (ret != 0) {
+			drm_gem_object_unreference_unlocked(obj);
+			goto fail;
+		}
+	}
+#else
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret != 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		goto fail;
+	}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	*offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
+#else
+	*offset = drm_vma_node_offset_addr(&obj->vma_node);
+	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+#endif
+
+	drm_gem_object_unreference_unlocked(obj);
+fail:
+	return ret;
+}
+
+/* sync the buffer for DMA access */
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED)) {
+		int i, npages = bo->gem_object.size >> PAGE_SHIFT;
+		struct page **pages = bo->backing_data.shm.pages;
+		bool dirty = false;
+
+		for (i = 0; i < npages; i++) {
+			if (!bo->backing_data.shm.dma_addrs[i]) {
+				DRM_DEBUG("%s: dma map page=%d bo=%p\n", __func__, i, bo);
+				 bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(dev->dev, pages[i], 0,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+				dirty = true;
+			}
+		}
+
+		if (dirty) {
+			DRM_DEBUG("%s: zap ptes (and flush cache) bo=%p\n", __func__, bo);
+			/*
+			 * TODO MIDEGL-1813
+			 * 
+			 * Use flush_cache_page() and outer_flush_range() to
+			 * flush only the user space mappings of the dirty pages
+			 */
+			flush_cache_all();
+			outer_flush_all();
+			unmap_mapping_range(bo->gem_object.filp->f_mapping, 0,
+					bo->gem_object.size, 1);
+		}
+	}
+}
+
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	/*
+	 * TODO MIDEGL-1808
+	 * 
+	 * The following check was meant to detect if the CPU is trying to access
+	 * a buffer that is currently mapped for DMA accesses, which is illegal
+	 * as described by the DMA-API.
+	 * 
+	 * However, some of our tests are trying to do that, which triggers the message
+	 * below and avoids dma-unmapping the pages not to annoy the DMA device but that
+	 * leads to the test failing because of caches not being properly flushed.
+	 */
+
+	/*
+	if (bo->sgt) {
+		DRM_ERROR("%s: the CPU is trying to access a dma-mapped buffer\n",  __func__);
+		return;
+	}
+	*/
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED) &&
+	     bo->backing_data.shm.dma_addrs[pgoff]) {
+		DRM_DEBUG("%s: unmap bo=%p (s=%d), paddr=%08x\n",
+			  __func__, bo,  bo->gem_object.size,
+			  bo->backing_data.shm.dma_addrs[pgoff]);
+		dma_unmap_page(dev->dev, bo->backing_data.shm.dma_addrs[pgoff],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		bo->backing_data.shm.dma_addrs[pgoff] = 0;
+	}
+}
+
+/* Based on omapdrm driver */
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+		 struct vm_area_struct *vma, size_t size)
+{
+	DRM_DEBUG("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p type=%08x\n",
+			__func__, obj, bo, bo->type);
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	if (bo->type & PL111_BOT_WC) {
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	} else if (bo->type & PL111_BOT_CACHED) {
+		/*
+		 * Objects that do not have a filp (DMA backed) can't be
+		 * mapped as cached now. Write-combine should be enough.
+		 */
+		if (WARN_ON(!obj->filp))
+			return -EINVAL;
+
+		/*
+		 * As explained in Documentation/dma-buf-sharing.txt
+		 * we need this trick so that we can manually zap ptes
+		 * in order to fake coherency.
+		 */
+		fput(vma->vm_file);
+		get_file(obj->filp);
+		vma->vm_file = obj->filp;
+
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	} else { /* PL111_BOT_UNCACHED */
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+	}
+	return 0;
+}
+
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
+{
+	int ret;
+	struct drm_file *priv = file_priv->private_data;
+	struct drm_device *dev = priv->minor->dev;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_hash_item *hash;
+	struct drm_local_map *map = NULL;
+#else
+	struct drm_vma_offset_node *node;
+#endif
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	obj = map->handle;
+#else
+	node = drm_vma_offset_exact_lookup(dev->vma_offset_manager,
+			vma->vm_pgoff,
+			vma_pages(vma));
+	obj = container_of(node, struct drm_gem_object, vma_node);
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p bo->type 0x%08x\n", __func__, bo, bo->type);
+
+	/* for an imported buffer we let the exporter handle the mmap */
+	if (obj->import_attach)
+		return dma_buf_mmap(obj->import_attach->dmabuf, vma, 0);
+
+	ret = drm_gem_mmap(file_priv, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap\n");
+		return ret;
+	}
+
+	/* Our page fault handler uses the page offset calculated from the vma,
+	 * so we need to remove the gem cookie offset specified in the call.
+	 */
+	vma->vm_pgoff = 0;
+
+	return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
new file mode 100644
index 0000000..1d613d0
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_pl111.c
@@ -0,0 +1,417 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_pl111.c
+ * PL111 specific functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+/* This can't be called from IRQ context, due to clk_get() and board->enable */
+static int clcd_enable(struct drm_framebuffer *fb)
+{
+	__u32 cntl;
+	struct clcd_board *board;
+
+	pr_info("DRM %s\n", __func__);
+
+	clk_prepare_enable(priv.clk);
+
+	/* Enable and Power Up */
+	cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+	DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
+	if (fb->bits_per_pixel == 16)
+		cntl |= CNTL_LCDBPP16_565;
+	else if (fb->bits_per_pixel == 32 && fb->depth == 24)
+		cntl |= CNTL_LCDBPP24;
+	else
+		BUG_ON(1);
+
+	cntl |= CNTL_BGR;
+
+	writel(cntl, priv.regs + CLCD_PL111_CNTL);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->enable)
+			board->enable(NULL);
+	}
+
+	/* Enable Interrupts */
+	writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
+
+	return 0;
+}
+
+int clcd_disable(struct drm_crtc *crtc)
+{
+	struct clcd_board *board;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	pr_info("DRM %s\n", __func__);
+
+	/* Disable Interrupts */
+	writel(0x00000000, priv.regs + CLCD_PL111_IENB);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->disable)
+			board->disable(NULL);
+	}
+
+	/* Disable and Power Down */
+	writel(0, priv.regs + CLCD_PL111_CNTL);
+
+	/* Disable clock */
+	clk_disable_unprepare(priv.clk);
+
+	pl111_crtc->last_bpp = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	/* Release the previous buffers */
+	if (pl111_crtc->old_kds_res_set != NULL)
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	return 0;
+}
+
+/*
+ * To avoid a possible race where "pl111_crtc->current_update_res" has
+ * been updated (non NULL) but the corresponding scanout buffer has not been
+ * written to the base registers we must always call this function holding
+ * the "base_update_lock" spinlock with IRQs disabled (spin_lock_irqsave()).
+ */
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	struct drm_framebuffer *fb;
+	struct pl111_gem_bo *bo;
+	size_t min_size;
+	fb = flip_res->fb;
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+
+
+	min_size = (fb->height - 1) * fb->pitches[0]
+	         + fb->width * (fb->bits_per_pixel >> 3);
+
+	BUG_ON(bo->gem_object.size < min_size);
+
+	/* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+	BUG_ON(bo->type != PL111_BOT_DMA);
+
+	/*
+	 * Note the buffer for releasing after IRQ, and don't allow any more
+	 * updates until then.
+	 *
+	 * This clcd controller latches the new address on next vsync. Address
+	 * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
+	 * wait for that before releasing the previous buffer's kds
+	 * resources. Otherwise, we'll allow writers to write to the old buffer
+	 * whilst it is still being displayed
+	 */
+	pl111_crtc->current_update_res = flip_res;
+
+	DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
+			fb, bo, bo->backing_data.dma.fb_dev_addr);
+	
+	if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
+		DRM_ERROR("Could not get vblank reference for crtc %d\n",
+				pl111_crtc->crtc_index);
+
+	/* Set the scanout buffer */
+	writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
+	writel(bo->backing_data.dma.fb_dev_addr +
+		((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
+}
+
+void
+show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
+					struct drm_framebuffer *fb)
+{
+	unsigned long irq_flags;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+	if (list_empty(&pl111_crtc->update_queue) &&
+			!pl111_crtc->current_update_res) {
+		do_flip_to_res(flip_res);
+	} else {
+		/*
+		 * Enqueue the update to occur on a future IRQ
+		 * This only happens on triple-or-greater buffering
+		 */
+		DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
+				fb);
+		list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
+			pl111_crtc->last_bpp != fb->bits_per_pixel ||
+			!drm_mode_equal(pl111_crtc->new_mode,
+					pl111_crtc->current_mode))) {
+		struct clcd_regs timing;
+
+		pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
+
+		DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
+				timing.tim0, timing.tim1, timing.tim2,
+				timing.tim3, timing.pixclock);
+
+		/* This is the actual mode setting part */
+		clk_set_rate(priv.clk, timing.pixclock);
+
+		writel(timing.tim0, priv.regs + CLCD_TIM0);
+		writel(timing.tim1, priv.regs + CLCD_TIM1);
+		writel(timing.tim2, priv.regs + CLCD_TIM2);
+		writel(timing.tim3, priv.regs + CLCD_TIM3);
+
+		clcd_enable(fb);
+		pl111_crtc->last_bpp = fb->bits_per_pixel;
+	}
+
+	if (!flip_res->page_flip) {
+		drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
+		pl111_crtc->current_mode = pl111_crtc->new_mode;
+		pl111_crtc->new_mode = NULL;
+	}
+
+	BUG_ON(!pl111_crtc->current_mode);
+
+	/*
+	 * If IRQs weren't enabled before, they are now. This will eventually
+	 * cause flip_res to be released via pl111_common_irq, which updates
+	 * every time the Base Address is latched (i.e. every frame, regardless
+	 * of whether we update the base address or not)
+	 */
+}
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+	u32 irq_stat;
+	struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
+
+	irq_stat = readl(priv.regs + CLCD_PL111_MIS);
+
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE)
+		pl111_common_irq(pl111_crtc);
+
+	/* Clear the interrupt once done */
+	writel(irq_stat, priv.regs + CLCD_PL111_ICR);
+
+	return IRQ_HANDLED;
+}
+
+int pl111_device_init(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int ret;
+
+	if (priv == NULL) {
+		pr_err("%s no private data\n", __func__);
+		return -EINVAL;
+	}
+
+	if (priv->amba_dev == NULL) {
+		pr_err("%s no amba device found\n", __func__);
+		return -EINVAL;
+	}
+
+	/* set up MMIO for register access */
+	priv->mmio_start = priv->amba_dev->res.start;
+	priv->mmio_len = resource_size(&priv->amba_dev->res);
+
+	DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
+			priv->mmio_len);
+
+	priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
+	if (priv->regs == NULL) {
+		pr_err("%s failed mmio\n", __func__);
+		return -EINVAL;
+	}
+
+	/* turn off interrupts */
+	writel(0, priv->regs + CLCD_PL111_IENB);
+
+	ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
+				"pl111_irq_handler", NULL);
+	if (ret != 0) {
+		pr_err("%s failed %d\n", __func__, ret);
+		goto out_mmio;
+	}
+
+	goto finish;
+
+out_mmio:
+	iounmap(priv->regs);
+finish:
+	DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
+	return ret;
+}
+
+void pl111_device_fini(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	u32 cntl;
+
+	if (priv == NULL || priv->regs == NULL)
+		return;
+
+	free_irq(priv->amba_dev->irq[0], NULL);
+
+	cntl = readl(priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDEN;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDPWR;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	iounmap(priv->regs);
+}
+
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
+{
+	struct clcd_board *board = dev->dev.platform_data;
+	int ret;
+	pr_info("DRM %s\n", __func__);
+
+	if (!board)
+		dev_warn(&dev->dev, "board data not available\n");
+
+	ret = amba_request_regions(dev, NULL);
+	if (ret != 0) {
+		DRM_ERROR("CLCD: unable to reserve regs region\n");
+		goto out;
+	}
+
+	priv.amba_dev = dev;
+
+	priv.clk = clk_get(&priv.amba_dev->dev, NULL);
+	if (IS_ERR(priv.clk)) {
+		DRM_ERROR("CLCD: unable to get clk.\n");
+		ret = PTR_ERR(priv.clk);
+		goto clk_err;
+	}
+
+	return 0;
+
+clk_err:
+	amba_release_regions(dev);
+out:
+	return ret;
+}
+
+int pl111_amba_remove(struct amba_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	clk_put(priv.clk);
+
+	amba_release_regions(dev);
+
+	priv.amba_dev = NULL;
+
+	return 0;
+}
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+	unsigned int cpl;
+
+	memset(timing, 0, sizeof(struct clcd_regs));
+
+	ppl = (mode->hdisplay / 16) - 1;
+	hsw = mode->hsync_end - mode->hsync_start - 1;
+	hfp = mode->hsync_start - mode->hdisplay - 1;
+	hbp = mode->htotal - mode->hsync_end - 1;
+
+	lpp = mode->vdisplay - 1;
+	vsw = mode->vsync_end - mode->vsync_start - 1;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	cpl = mode->hdisplay - 1;
+
+	timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
+	timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
+	timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
+	timing->tim3 = 0;
+
+	timing->pixclock = mode->clock * 1000;
+}
+
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+
+	ppl = (timing->tim0 >> 2) & 0x3f;
+	hsw = (timing->tim0 >> 8) & 0xff;
+	hfp = (timing->tim0 >> 16) & 0xff;
+	hbp = (timing->tim0 >> 24) & 0xff;
+
+	lpp = timing->tim1 & 0x3ff;
+	vsw = (timing->tim1 >> 10) & 0x3f;
+	vfp = (timing->tim1 >> 16) & 0xff;
+	vbp = (timing->tim1 >> 24) & 0xff;
+
+	mode->hdisplay    = (ppl + 1) * 16;
+	mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
+	mode->hsync_end   = ((ppl + 1) * 16) + hfp + hsw + 2;
+	mode->htotal      = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
+	mode->hskew       = 0;
+
+	mode->vdisplay    = lpp + 1;
+	mode->vsync_start = lpp + vfp + 1;
+	mode->vsync_end   = lpp + vfp + vsw + 2;
+	mode->vtotal      = lpp + vfp + vsw + vbp + 2;
+
+	mode->flags = 0;
+
+	mode->width_mm = 0;
+	mode->height_mm = 0;
+
+	mode->clock = timing->pixclock / 1000;
+	mode->hsync = timing->pixclock / mode->htotal;
+	mode->vrefresh = mode->hsync / mode->vtotal;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
new file mode 100644
index 0000000..9d5ec0c
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_platform.c
@@ -0,0 +1,151 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_platform.c
+ * Implementation of the Linux platform device entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+static int pl111_platform_drm_suspend(struct platform_device *dev,
+					pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+static int pl111_platform_drm_resume(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_platform_drm_probe(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return pl111_drm_init(dev);
+}
+
+static int pl111_platform_drm_remove(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	pl111_drm_exit(dev);
+
+	return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+	{
+	.id = 0x00041110,
+	.mask = 0x000ffffe,
+	},
+	{0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+	.drv = {
+		.name = "clcd-pl11x",
+		},
+	.probe = pl111_amba_probe,
+	.remove = pl111_amba_remove,
+	.id_table = pl111_id_table,
+};
+
+static struct platform_driver platform_drm_driver = {
+	.probe = pl111_platform_drm_probe,
+	.remove = pl111_platform_drm_remove,
+	.suspend = pl111_platform_drm_suspend,
+	.resume = pl111_platform_drm_resume,
+	.driver = {
+			.owner = THIS_MODULE,
+			.name = DRIVER_NAME,
+		},
+};
+
+static const struct platform_device_info pl111_drm_pdevinfo = {
+	.name = DRIVER_NAME,
+	.id = -1,
+	.dma_mask = ~0UL
+};
+
+static struct platform_device *pl111_drm_device;
+
+static int __init pl111_platform_drm_init(void)
+{
+	int ret;
+
+	pr_info("DRM %s\n", __func__);
+
+	pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
+	if (pl111_drm_device == NULL) {
+		pr_err("DRM platform_device_register_full() failed\n");
+		return -ENOMEM;
+	}
+
+	ret = amba_driver_register(&pl111_amba_driver);
+	if (ret != 0) {
+		pr_err("DRM amba_driver_register() failed %d\n", ret);
+		goto err_amba_reg;
+	}
+
+	ret = platform_driver_register(&platform_drm_driver);
+	if (ret != 0) {
+		pr_err("DRM platform_driver_register() failed %d\n", ret);
+		goto err_pdrv_reg;
+	}
+
+	return 0;
+
+err_pdrv_reg:
+	amba_driver_unregister(&pl111_amba_driver);
+err_amba_reg:
+	platform_device_unregister(pl111_drm_device);
+
+	return ret;
+}
+
+static void __exit pl111_platform_drm_exit(void)
+{
+	pr_info("DRM %s\n", __func__);
+
+	platform_device_unregister(pl111_drm_device);
+	amba_driver_unregister(&pl111_amba_driver);
+	platform_driver_unregister(&platform_drm_driver);
+}
+
+#ifdef MODULE
+module_init(pl111_platform_drm_init);
+#else
+late_initcall(pl111_platform_drm_init);
+#endif
+module_exit(pl111_platform_drm_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE(DRIVER_LICENCE);
+MODULE_ALIAS(DRIVER_ALIAS);
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
new file mode 100644
index 0000000..d033566
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_suspend.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_suspend.c
+ * Implementation of the suspend/resume functions for PL111 DRM
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_drm_resume(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
new file mode 100644
index 0000000..ff602ef
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/pl111_drm_vma.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_vma.c
+ * Implementation of the VM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ */
+static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+				bool dirty, bool accessed)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	for (i = 0; i < npages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+		if (accessed)
+			mark_page_accessed(pages[i]);
+		/* Undo the reference we took when populating the table */
+		page_cache_release(pages[i]);
+	}
+	drm_free_large(pages);
+}
+
+void put_pages(struct drm_gem_object *obj, struct page **pages)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	_drm_gem_put_pages(obj, pages, true, true);
+	if (bo->backing_data.shm.dma_addrs) {
+		for (i = 0; i < npages; i++) {
+			/* Filter pages unmapped because of CPU accesses */
+			if (!bo->backing_data.shm.dma_addrs[i])
+				continue;
+			if (!dma_mapping_error(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i])) {
+				dma_unmap_page(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i],
+					PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+			}
+		}
+		kfree(bo->backing_data.shm.dma_addrs);
+		bo->backing_data.shm.dma_addrs = NULL;
+	}
+}
+
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
+					gfp_t gfpmask)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct page *p, **pages;
+	int i, npages;
+
+	/* This is the shared memory object that backs the GEM resource */
+	inode = obj->filp->f_path.dentry->d_inode;
+	mapping = inode->i_mapping;
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (pages == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gfpmask |= mapping_gfp_mask(mapping);
+
+	for (i = 0; i < npages; i++) {
+		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		if (IS_ERR(p))
+			goto fail;
+		pages[i] = p;
+
+		/*
+		 * There is a hypothetical issue w/ drivers that require
+		 * buffer memory in the low 4GB.. if the pages are un-
+		 * pinned, and swapped out, they can end up swapped back
+		 * in above 4GB.  If pages are already in memory, then
+		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
+		 * even if the already in-memory page disobeys the mask.
+		 *
+		 * It is only a theoretical issue today, because none of
+		 * the devices with this limitation can be populated with
+		 * enough memory to trigger the issue.  But this BUG_ON()
+		 * is here as a reminder in case the problem with
+		 * shmem_read_mapping_page_gfp() isn't solved by the time
+		 * it does become a real issue.
+		 *
+		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		 */
+		BUG_ON((gfpmask & __GFP_DMA32) &&
+			(page_to_pfn(p) >= 0x00100000UL));
+	}
+
+	return pages;
+
+fail:
+	while (i--)
+		page_cache_release(pages[i]);
+
+	drm_free_large(pages);
+	return ERR_PTR(PTR_ERR(p));
+}
+
+struct page **get_pages(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (bo->backing_data.shm.pages == NULL) {
+		struct page **p;
+		int npages = obj->size >> PAGE_SHIFT;
+		int i;
+
+		p = _drm_gem_get_pages(obj, GFP_KERNEL);
+		if (IS_ERR(p))
+			return ERR_PTR(-ENOMEM);
+
+		bo->backing_data.shm.pages = p;
+
+		if (bo->backing_data.shm.dma_addrs == NULL) {
+			bo->backing_data.shm.dma_addrs =
+				kzalloc(npages * sizeof(dma_addr_t),
+					GFP_KERNEL);
+			if (bo->backing_data.shm.dma_addrs == NULL)
+				goto error_out;
+		}
+
+		if (!(bo->type & PL111_BOT_CACHED)) {
+			for (i = 0; i < npages; ++i) {
+				bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+				if (dma_mapping_error(obj->dev->dev,
+						bo->backing_data.shm.dma_addrs[i]))
+					goto error_out;
+			}
+		}
+	}
+
+	return bo->backing_data.shm.pages;
+
+error_out:
+	put_pages(obj, bo->backing_data.shm.pages);
+	bo->backing_data.shm.pages = NULL;
+	return ERR_PTR(-ENOMEM);
+}
+
+/* END drivers/staging/omapdrm/omap_gem_helpers.c */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page **pages;
+	unsigned long pfn;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * Our mmap calls setup a valid vma->vm_pgoff
+	 * so we can use vmf->pgoff
+	 */
+
+	if (bo->type & PL111_BOT_DMA) {
+		pfn = (bo->backing_data.dma.fb_dev_addr >> PAGE_SHIFT) +
+				vmf->pgoff;
+	} else { /* PL111_BOT_SHM */
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev,
+				"could not get pages: %ld\n", PTR_ERR(pages));
+			ret = PTR_ERR(pages);
+			goto error;
+		}
+		pfn = page_to_pfn(pages[vmf->pgoff]);
+		pl111_gem_sync_to_cpu(bo, vmf->pgoff);
+	}
+
+	DRM_DEBUG("bo=%p physaddr=0x%.8x for offset 0x%x\n",
+				bo, PFN_PHYS(pfn), PFN_PHYS(vmf->pgoff));
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+error:
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+/*
+ * The core drm_vm_ functions in kernel 3.4 are not ready
+ * to handle dma_buf cases where vma->vm_file->private_data
+ * cannot be accessed to get the device.
+ * 
+ * We use these functions from 3.5 instead where the device
+ * pointer is passed explicitly.
+ *
+ * However they aren't exported from the kernel until 3.10
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *vma_entry;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_inc(&dev->vma_count);
+
+	vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
+	if (vma_entry) {
+		vma_entry->vma = vma;
+		vma_entry->pid = current->pid;
+		list_add(&vma_entry->head, &dev->vmalist);
+	}
+}
+
+void pl111_drm_vm_close_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *pt, *temp;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_dec(&dev->vma_count);
+
+	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
+		if (pt->vma == vma) {
+			list_del(&pt->head);
+			kfree(pt);
+			break;
+		}
+	}
+}
+
+void pl111_gem_vm_open(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+
+	drm_gem_object_reference(obj);
+
+	mutex_lock(&obj->dev->struct_mutex);
+	pl111_drm_vm_open_locked(obj->dev, vma);
+	mutex_unlock(&obj->dev->struct_mutex);
+}
+
+void pl111_gem_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	pl111_drm_vm_close_locked(obj->dev, vma);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+}
+#endif
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/pl111/sconscript b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/sconscript
new file mode 100644
index 0000000..5c47de7
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/pl111/sconscript
@@ -0,0 +1,52 @@
+#
+# (C) COPYRIGHT 2010-2013, 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import shutil
+Import('env')
+
+# Generate a build environment for the integration tests, taking a copy of the top-level build environment 
+# (env) as a start.
+drm_env = env.Clone()
+
+# Xorg uses C++ style comments and 'inline' keyword
+if '-std=c89' in drm_env['CFLAGS']:
+	drm_env['CFLAGS'].remove('-std=c89')
+
+# X11 generates a lot of warnings
+if '-Werror' in drm_env['CCFLAGS']:
+	drm_env['CCFLAGS'].remove('-Werror')
+
+#remove the 'lib'prefix
+drm_env['LIBPREFIX'] = ''
+
+src = Glob('*.c')
+
+if drm_env.GetOption('clean') :
+	drm_env.Execute(Action("make clean", 'clean [pl111]'))
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [])
+else:
+	# The target is mali_drm.ko, built from the source in pl111_drm/pl111, via the action makeAction
+	# mali_drm.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+	# kernel build system, after which it can be installed to the directory specified if
+	# "libs_install" is set; this is done by LibTarget.
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} && cp pl111_drm.ko $STATIC_LIB_PATH/mali_drm.ko", '$MAKECOMSTR')
+	cmd = drm_env.Command('$STATIC_LIB_PATH/mali_drm.ko', src, [makeAction])
+
+# need Module.symvers from drm.ko
+#drm_env.Depends('$STATIC_LIB_PATH/pl111_drm.ko', '$STATIC_LIB_PATH/drm.ko')
+
+drm_env.KernelObjTarget('x11', cmd)
+
diff --git a/midgard/r16p0/kernel/drivers/gpu/drm/sconscript b/midgard/r16p0/kernel/drivers/gpu/drm/sconscript
new file mode 100644
index 0000000..a90fa89
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/drm/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if 'x11' in env['winsys'] or 'gbm' in env['winsys']:
+	# pl111_drm isn't released so only try to build it if it's there
+	if Glob('pl111/sconscript') and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
+		SConscript('pl111/sconscript')
+
+#SConscript('dummy_drm/sconscript')
diff --git a/midgard/r16p0/kernel/drivers/gpu/sconscript b/midgard/r16p0/kernel/drivers/gpu/sconscript
new file mode 100644
index 0000000..729dbda
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/gpu/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+SConscript('arm/sconscript')
+
+if Glob('drm/sconscript'):
+	SConscript('drm/sconscript')
diff --git a/midgard/r16p0/kernel/drivers/sconscript b/midgard/r16p0/kernel/drivers/sconscript
new file mode 100644
index 0000000..71b194e
--- /dev/null
+++ b/midgard/r16p0/kernel/drivers/sconscript
@@ -0,0 +1,116 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+import os
+import shutil
+import subprocess
+
+from SCons.Script import SCons
+
+Import("env")
+
+# -----------------------------------------------------------------------------
+g_kernel_config = dict()
+
+def kernel_config_enabled(sEnv, option):
+    """Return true if a given kernel config option is enabled"""
+    global g_kernel_config
+    if not g_kernel_config:
+        config_file = os.path.join(sEnv["ENV"]["KDIR"], "include/config/auto.conf")
+        print "Parsing kernel config '%s'." % config_file
+        with open(config_file, "rt") as fp:
+            for line in fp.readlines():
+                try:
+                    (key, val) = line.split("=")
+                    g_kernel_config[key.strip()] = val.strip()
+                except ValueError:
+                    pass
+    if option not in g_kernel_config:
+        return False
+    val = g_kernel_config[option]
+    if val == "y":
+        return True
+    return False
+
+env.AddMethod(kernel_config_enabled, "KernelConfigEnabled")
+
+def kernel_module_builder(env = None, target = None, source = None):
+    assert (len(target) == 1 and 'M' in env and 'make_args' in env and
+            'built_module' in env)
+
+    module_dir = str(env['M'])
+    cmd = ['make', '-j%d' % GetOption('num_jobs')] + env['make_args']
+    status = subprocess.call(cmd, cwd = module_dir, env = env['ENV'])
+    if status:
+        return status
+
+    shutil.copy2(env['built_module'], target[0].abspath)
+    return 0 # Success (SCons will catch any exceptions thrown by copy2).
+
+action = SCons.Action.Action(kernel_module_builder, "[KBUILD] $TARGET")
+builder = env.Builder(action = action)
+env.Append(BUILDERS = {'KernelModuleBuilder': builder})
+
+def build_kernel_module(env, target, sources, M = None, make_args = [],
+                        built_module = None):
+    if not M:
+        M = Dir('.').srcnode().abspath
+    if not built_module:
+        built_module = os.path.basename(str(target))
+    built_module = os.path.join(M, built_module)
+
+    env_kern = env.Clone()
+    kdir = env_kern['ENV']['KDIR']
+    env_kern.Append(CPPPATH = [M, '#kernel/include', os.path.join(kdir, 'include')])
+    mod = env_kern.KernelModuleBuilder(target, sources, M = M,
+                                       make_args = make_args,
+                                       built_module = built_module)
+
+    # Kbuild implicitly #includes kconfig.h (which includes autoconf.h) when
+    # building kernel module source files.
+    env_kern.Depends(mod, os.path.join(kdir, 'include/generated/autoconf.h'))
+    # Built files may change depending on CONFIG_* values in auto.conf.
+    env_kern.Depends(mod, os.path.join(kdir, 'include/config/auto.conf'))
+
+    for f in ['Kconfig', 'Kbuild', 'Makefile*']:
+        env_kern.Depends(mod, glob.glob(os.path.join(M, f)))
+
+    scanner = SCons.Scanner.C.CScanner()
+    all_headers = set()
+    for src in Flatten(sources):
+        src = env_kern.File(src)
+        (base, ext) = os.path.splitext(src.abspath)
+        if ext == '.c':
+            env_kern.Clean(mod, base + '.o')
+            # Scan for header dependencies explicitly.
+            headers = scanner(src, env_kern, scanner.path_function(env_kern))
+            env_kern.Depends(mod, headers)
+            all_headers.update(headers)
+
+    env_kern.Clean(mod, os.path.join(M, 'Module.symvers'))
+    env_kern.Clean(mod, os.path.join(M, 'module.order'))
+
+    return mod
+
+env.AddMethod(build_kernel_module, 'BuildKernelModule')
+
+if Glob('base/sconscript'):
+	SConscript('base/sconscript')
+
+if Glob('video/sconscript'):
+	SConscript('video/sconscript')
+
+SConscript('gpu/sconscript')
diff --git a/midgard/r16p0/kernel/include/linux/dma-buf-test-exporter.h b/midgard/r16p0/kernel/include/linux/dma-buf-test-exporter.h
new file mode 100644
index 0000000..98984ba
--- /dev/null
+++ b/midgard/r16p0/kernel/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/midgard/r16p0/kernel/include/linux/kds.h b/midgard/r16p0/kernel/include/linux/kds.h
new file mode 100644
index 0000000..1346eda
--- /dev/null
+++ b/midgard/r16p0/kernel/include/linux/kds.h
@@ -0,0 +1,173 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KDS_H_
+#define _KDS_H_
+
+#include <linux/list.h>
+#include <linux/workqueue.h>
+
+#define KDS_WAIT_BLOCKING (ULONG_MAX)
+
+struct kds_resource_set;
+
+typedef void (*kds_callback_fn) (void *callback_parameter, void *callback_extra_parameter);
+
+struct kds_callback
+{
+	kds_callback_fn  user_cb; /* real cb */
+	int direct;               /* do direct or queued call? */
+	struct workqueue_struct *wq;
+};
+
+struct kds_link
+{
+	struct kds_resource_set *parent;
+	struct list_head         link;
+	unsigned long            state;
+};
+
+struct kds_resource
+{
+	struct kds_link waiters;
+};
+
+/* callback API */
+
+/* Initialize a callback object.
+ *
+ * Typically created per context or per hw resource.
+ *
+ * Callbacks can be performed directly if no nested locking can
+ * happen in the client.
+ *
+ * Nested locking can occur when a lock is held during the kds_async_waitall or
+ * kds_resource_set_release call. If the callback needs to take the same lock
+ * nested locking will happen.
+ *
+ * If nested locking could happen non-direct callbacks can be requested.
+ * Callbacks will then be called asynchronous to the triggering call.
+ */
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb);
+
+/* Terminate the use of a callback object.
+ *
+ * If the callback object was set up as non-direct
+ * any pending callbacks will be flushed first.
+ * Note that to avoid a deadlock the lock callbacks needs
+ * can't be held when a callback object is terminated.
+ */
+void kds_callback_term(struct kds_callback *cb);
+
+
+/* resource object API */
+
+/* initialize a resource handle for a shared resource */
+void kds_resource_init(struct kds_resource * const resource);
+
+/*
+ * Will return 0 on success.
+ * If the resource is being used or waited -EBUSY is returned.
+ * The caller should NOT try to terminate a resource that could still have clients.
+ * After the function returns the resource is no longer known by kds.
+ */
+int kds_resource_term(struct kds_resource *resource);
+
+/* Asynchronous wait for a set of resources.
+ * Callback will be called when all resources are available.
+ * If all the resources was available the callback will be called before kds_async_waitall returns.
+ * So one must not hold any locks the callback code-flow can take when calling kds_async_waitall.
+ * Caller considered to own/use the resources until \a kds_rset_release is called.
+ * exclusive_access_bitmap is a bitmap where a high bit means exclusive access while a low bit means shared access.
+ * Use the Linux __set_bit API, where the index of the buffer to control is used as the bit index.
+ *
+ * Standard Linux error return value.
+ */
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list);
+
+/* Synchronous wait for a set of resources.
+ * Function will return when one of these have happened:
+ * - all resources have been obtained
+ * - timeout lapsed while waiting
+ * - a signal was received while waiting
+ *
+ * To wait without a timeout, specify KDS_WAIT_BLOCKING for \a jifies_timeout, otherwise
+ * the timeout in jiffies. A zero timeout attempts to obtain all resources and returns
+ * immediately with a timeout if all resources could not be obtained.
+ *
+ * Caller considered to own/use the resources when the function returns.
+ * Caller must release the resources using \a kds_rset_release.
+ *
+ * Calling this function while holding already locked resources or other locking primitives is dangerous.
+ * One must if this is needed decide on a lock order of the resources and/or the other locking primitives
+ * and always take the resources/locking primitives in the specific order.
+ *
+ * Use the ERR_PTR framework to decode the return value.
+ * NULL = time out
+ * If IS_ERR then PTR_ERR gives:
+ *  ERESTARTSYS = signal received, retry call after signal
+ *  all other values = internal error, lock failed
+ * Other values  = successful wait, now the owner, must call kds_resource_set_release
+ */
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jifies_timeout);
+
+/* Release resources after use.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ */
+void kds_resource_set_release(struct kds_resource_set **pprset);
+
+/* Release resources after use and wait callbacks to complete.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ *
+ * This should be used to cancel waits which are pending on a kds
+ * resource.
+ *
+ * It is a bug to call this from atomic contexts and from within
+ * a kds callback that now owns the kds_rseource.
+ */
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset);
+#endif /* _KDS_H_ */
diff --git a/midgard/r16p0/kernel/include/linux/ump-common.h b/midgard/r16p0/kernel/include/linux/ump-common.h
new file mode 100644
index 0000000..0015bda
--- /dev/null
+++ b/midgard/r16p0/kernel/include/linux/ump-common.h
@@ -0,0 +1,252 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/midgard/r16p0/kernel/include/linux/ump-import.h b/midgard/r16p0/kernel/include/linux/ump-import.h
new file mode 100644
index 0000000..89ce727
--- /dev/null
+++ b/midgard/r16p0/kernel/include/linux/ump-import.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/midgard/r16p0/kernel/include/linux/ump-ioctl.h b/midgard/r16p0/kernel/include/linux/ump-ioctl.h
new file mode 100644
index 0000000..caf4c0b
--- /dev/null
+++ b/midgard/r16p0/kernel/include/linux/ump-ioctl.h
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H_
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/midgard/r16p0/kernel/include/linux/ump.h b/midgard/r16p0/kernel/include/linux/ump.h
new file mode 100644
index 0000000..16f9c39
--- /dev/null
+++ b/midgard/r16p0/kernel/include/linux/ump.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a true to indicate that access to the handle is allowed or @n
+ * @a false to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a true to permit access
+ * @li @a false to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+#ifdef CONFIG_KDS
+/**
+ * Retrieve the KDS resource for the specified UMP memory.
+ *
+ * The KDS resource should be used to synchronize access to the UMP allocation.
+ * See the KDS API for how to do that.
+ *
+ * @param mem Handle to the UMP memory to query.
+ * @return Pointer to the KDS resource controlling access to the UMP memory.
+ */
+UMP_KERNEL_API_EXPORT struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem);
+#endif
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
diff --git a/midgard/r16p0/kernel/license.txt b/midgard/r16p0/kernel/license.txt
new file mode 100644
index 0000000..77c14bd
--- /dev/null
+++ b/midgard/r16p0/kernel/license.txt
@@ -0,0 +1,198 @@
+GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE
+
+THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE.
+
+
+
+Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form.
+
+
+
+GPL Licence
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+Version 2, June 1991
+
+
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+
+
+Preamble
+
+
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
+
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program).
+
+Whether that is true depends on what the Program does.
+
+
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty;
+
+and give any other recipients of the Program a copy of this License along with the Program.
+
+
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the
+
+Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein.
+
+You are not responsible for enforcing compliance by third parties to this License.
+
+
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+
+
+NO WARRANTY
+
+
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+/end
+
diff --git a/midgard/r16p0/kernel/patches/integrate_kds_with_dma_buf.patch b/midgard/r16p0/kernel/patches/integrate_kds_with_dma_buf.patch
new file mode 100644
index 0000000..37458cf
--- /dev/null
+++ b/midgard/r16p0/kernel/patches/integrate_kds_with_dma_buf.patch
@@ -0,0 +1,188 @@
+diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
+index 9aa618a..30e07ea 100644
+--- a/drivers/base/Kconfig
++++ b/drivers/base/Kconfig
+@@ -192,4 +192,12 @@ config DMA_SHARED_BUFFER
+ 	  APIs extension; the file's descriptor can then be passed on to other
+ 	  driver.
+ 
++config DMA_SHARED_BUFFER_USES_KDS
++	bool "Synchronize access to dma_buf with KDS"
++	default n
++	select KDS
++	depends on DMA_SHARED_BUFFER
++	help
++	  This option adds a KDS resource within every dma_buf allocation.
++
+ endmenu
+diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
+index 20258e1..e4083fd 100644
+--- a/drivers/base/dma-buf.c
++++ b/drivers/base/dma-buf.c
+@@ -27,6 +27,8 @@
+ #include <linux/dma-buf.h>
+ #include <linux/anon_inodes.h>
+ #include <linux/export.h>
++#include <linux/poll.h>
++#include <linux/sched.h>
+ 
+ static inline int is_dma_buf_file(struct file *);
+ 
+@@ -40,6 +42,8 @@ static int dma_buf_release(struct inode *inode, struct file *file)
+ 	dmabuf = file->private_data;
+ 
+ 	dmabuf->ops->release(dmabuf);
++	kds_callback_term(&dmabuf->kds_cb);
++	kds_resource_term(&dmabuf->kds);
+ 	kfree(dmabuf);
+ 	return 0;
+ }
+@@ -61,9 +65,90 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
+ 	return dmabuf->ops->mmap(dmabuf, vma);
+ }
+ 
++
++static void dma_buf_kds_cb_fn (void *param1, void *param2)
++{
++	struct kds_resource_set **rset_ptr = param1;
++	struct kds_resource_set *rset = *rset_ptr;
++	wait_queue_head_t *wait_queue = param2;
++
++	kfree(rset_ptr);
++	kds_resource_set_release(&rset);
++	wake_up(wait_queue);
++}
++
++static int dma_buf_kds_check(struct kds_resource *kds,
++                             long unsigned int exclusive, int *poll_ret)
++{
++	/* Synchronous wait with 0 timeout - poll availability */
++	struct kds_resource_set *rset = kds_waitall(1,&exclusive,&kds,0);
++
++	if (IS_ERR(rset))
++		return POLLERR;
++
++	if (rset){
++		kds_resource_set_release(&rset);
++		*poll_ret = POLLIN | POLLRDNORM;
++		if (exclusive)
++			*poll_ret |=  POLLOUT | POLLWRNORM;
++		return 1;
++	}else{
++		return 0;
++	}
++}
++
++static unsigned int dma_buf_poll(struct file *file,
++                                 struct poll_table_struct *wait)
++{
++	struct dma_buf *dmabuf;
++	struct kds_resource *kds;
++	unsigned int ret = 0;
++
++	if (!is_dma_buf_file(file))
++		return POLLERR;
++
++	dmabuf = file->private_data;
++	kds    = &dmabuf->kds;
++
++	if (poll_does_not_wait(wait)){
++		/* Check for exclusive access (superset of shared) first */
++		if(!dma_buf_kds_check(kds, 1ul, &ret))
++			dma_buf_kds_check(kds, 0ul, &ret);
++	}else{
++		int events = poll_requested_events(wait);
++		unsigned long exclusive;
++		wait_queue_head_t *wq;
++		struct kds_resource_set **rset_ptr = kmalloc(sizeof(*rset_ptr), GFP_KERNEL);
++
++		if (!rset_ptr)
++			return POLL_ERR;
++
++		if (events & POLLOUT){
++			wq = &dmabuf->wq_exclusive;
++			exclusive = 1;
++		}else{
++			wq = &dmabuf->wq_shared;
++			exclusive = 0;
++		}
++		poll_wait(file, wq, wait);
++		ret = kds_async_waitall(rset_ptr, KDS_FLAG_LOCKED_WAIT, &dmabuf->kds_cb,
++		                        rset_ptr, wq, 1, &exclusive, &kds);
++
++		if (IS_ERR_VALUE(ret)){
++			ret = POLL_ERR;
++			kfree(rset_ptr);
++		}else{
++			/* Can't allow access until callback */
++			ret = 0;
++		}
++	}
++	return ret;
++}
++
+ static const struct file_operations dma_buf_fops = {
+ 	.release	= dma_buf_release,
+ 	.mmap		= dma_buf_mmap_internal,
++	.poll		= dma_buf_poll,
+ };
+ 
+ /*
+@@ -120,6 +205,11 @@ struct dma_buf *dma_buf_export(void *priv, const struct dma_buf_ops *ops,
+ 	mutex_init(&dmabuf->lock);
+ 	INIT_LIST_HEAD(&dmabuf->attachments);
+ 
++	init_waitqueue_head(&dmabuf->wq_exclusive);
++	init_waitqueue_head(&dmabuf->wq_shared);
++	kds_resource_init(&dmabuf->kds);
++	kds_callback_init(&dmabuf->kds_cb, 1, dma_buf_kds_cb_fn);
++
+ 	return dmabuf;
+ }
+ EXPORT_SYMBOL_GPL(dma_buf_export);
+diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
+index eb48f38..6e824d2 100644
+--- a/include/linux/dma-buf.h
++++ b/include/linux/dma-buf.h
+@@ -30,6 +30,8 @@
+ #include <linux/list.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/fs.h>
++#include <linux/kds.h>
++#include <linux/wait.h>
+ 
+ struct device;
+ struct dma_buf;
+@@ -121,6 +123,10 @@ struct dma_buf {
+ 	const struct dma_buf_ops *ops;
+ 	/* mutex to serialize list manipulation and attach/detach */
+ 	struct mutex lock;
++	struct kds_resource kds;
++	wait_queue_head_t wq_exclusive;
++	wait_queue_head_t wq_shared;
++	struct kds_callback kds_cb;
+ 	void *priv;
+ };
+ 
+@@ -156,6 +162,21 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
+ 	get_file(dmabuf->file);
+ }
+ 
++/**
++ * get_dma_buf_kds_resource - get a KDS resource for this dma-buf
++ * @dmabuf:	[in]	pointer to dma_buf
++ *
++ * Returns a KDS resource that represents the dma-buf. This should be used by
++ * drivers to synchronize access to the buffer. Note that the caller should
++ * ensure that a reference to the dma-buf exists from the call to
++ * kds_async_wait until kds_resource_set_release is called.
++ */
++static inline struct kds_resource *
++	get_dma_buf_kds_resource(struct dma_buf *dmabuf)
++{
++	return &dmabuf->kds;
++}
++
+ #ifdef CONFIG_DMA_SHARED_BUFFER
+ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+ 							struct device *dev);
diff --git a/midgard/r16p0/kernel/sconscript b/midgard/r16p0/kernel/sconscript
new file mode 100644
index 0000000..9cbccb0
--- /dev/null
+++ b/midgard/r16p0/kernel/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+if env['backend'] == 'kernel' and int(env['kernel_modules']) == 1:
+	SConscript('drivers/sconscript')
+
+	if Glob('tests/sconscript'):
+		SConscript('tests/sconscript')
diff --git a/midgard/r21p0 b/midgard/r21p0
new file mode 120000
index 0000000..886200d
--- /dev/null
+++ b/midgard/r21p0
@@ -0,0 +1 @@
+../t83x
\ No newline at end of file
diff --git a/t83x/android/patches/K/android-k.sh b/t83x/android/patches/K/android-k.sh
new file mode 100644
index 0000000..4e44d5e
--- /dev/null
+++ b/t83x/android/patches/K/android-k.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+#
+# Copyright (C) 2014-2015 ARM Limited. All rights reserved.
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Android K script for LLVM 3.5 and above support
+#
+# This script applies two minor patches and checks-out libcxx and libcxxabi libraries
+# adding C++11 support in Android K required for LLVM3.5 and above.
+
+
+if [ -z "$ANDROID_BUILD_TOP" ]; then
+    echo "[ANDROID-PATCH] This script must be executed in an Android build environment"
+    echo "                ANDROID_BUILD_TOP is undefined"
+    echo "                please do \"source build/envseup.sh; lunch\"."
+    exit 1
+fi
+
+ANDROID_REPOSITORY_URL="https://android.googlesource.com/"
+
+REFERENCE_ARGUMENT=""
+
+# Proccess arguments. Valid arguments are:
+#        -u ARG      Override the Android repository url
+#        -r ARG      Provide a path to a reference git repository
+while getopts "u:r:" opt; do
+  case $opt in
+    u)
+      ANDROID_REPOSITORY_URL=$OPTARG
+      ;;
+
+    r)
+      REFERENCE_ARGUMENT="--reference "$OPTARG
+      ;;
+
+    \?)
+      echo "Invalid option: -$OPTARG" >&2
+      exit 1
+
+      ;;
+    :)
+      echo "Option -$OPTARG requires an argument." >&2
+      exit 1
+      ;;
+
+  esac
+done
+
+MIDGARD_DDK_WD=$(pwd)
+
+cd $ANDROID_BUILD_TOP/external
+
+#Checkout libcxx from Android Master required by LLVM due to c++11
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxx $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxx failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxx
+git checkout 27ae7cb782821a4f2d3813522ee411cd978bcd85 -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxx@27ae7cb782821a4f2d3813522ee411cd978bcd85 revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/libcxx.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching libcxx failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Checkout libcxxabi from Android Master required by LLVM due to c++11
+cd $ANDROID_BUILD_TOP/external
+git clone $ANDROID_REPOSITORY_URL/platform/external/libcxxabi $REFERENCE_ARGUMENT
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] git clone of libcxxabi failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+cd libcxxabi
+git checkout 285d67f35f6044cf733091e36248405ca967c62c -q
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] checkout of libcxxabi@285d67f35f6044cf733091e36248405ca967c62c revision failed."
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+
+#Apply patch to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/bionics.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Patching bionics failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add locale support to bioncs
+cd $ANDROID_BUILD_TOP/bionic/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/locale.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Adding locale support failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+#Add LLVM backend ARM fix to support -O0 compilation
+cd $ANDROID_BUILD_TOP/external/llvm/
+patch -p1  < $MIDGARD_DDK_WD/android/patches/K/llvm.diff
+
+ret_code=$?
+if [[ $ret_code != 0 ]] ; then
+    echo "[ANDROID-PATCH] Fixing ARM LLVM backend failed."
+    git reset --hard
+    cd $MIDGARD_DDK_WD
+    exit $ret_code
+fi
+
+echo "[ANDROID-PATCH] Patching completed successfully"
+cd $MIDGARD_DDK_WD
diff --git a/t83x/android/patches/K/bionics.diff b/t83x/android/patches/K/bionics.diff
new file mode 100644
index 0000000..b86cedf
--- /dev/null
+++ b/t83x/android/patches/K/bionics.diff
@@ -0,0 +1,202 @@
+diff --git a/libc/Android.mk b/libc/Android.mk
+index 9610c14..d3dbe81 100644
+--- a/libc/Android.mk
++++ b/libc/Android.mk
+@@ -214,6 +215,7 @@ libc_bionic_src_:= \
+     bionic/libc_init_common.cpp \
+     bionic/libc_logging.cpp \
+     bionic/libgen.cpp \
++    bionic/locale.cpp \
+     bionic/mmap.cpp \
+     bionic/pthread_attr.cpp \
+     bionic/pthread_detach.cpp \
+@@ -232,7 +232,6 @@ libc_bionic_src_files := \
+     bionic/scandir.cpp \
+     bionic/sched_getaffinity.cpp \
+     bionic/__set_errno.cpp \
+-    bionic/setlocale.cpp \
+     bionic/signalfd.cpp \
+     bionic/sigwait.cpp \
+     bionic/statvfs.cpp \
+diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp
+index 50a3875..cc830b7 100644
+--- a/libc/bionic/wchar.cpp
++++ b/libc/bionic/wchar.cpp
+@@ -96,6 +96,7 @@ int swscanf(const wchar_t* /*s*/, const wchar_t* /*format*/, ... ) {
+
+ int iswalnum(wint_t wc) { return isalnum(wc); }
+ int iswalpha(wint_t wc) { return isalpha(wc); }
++int iswblank(wint_t wc) { return isblank(wc); }
+ int iswcntrl(wint_t wc) { return iscntrl(wc); }
+ int iswdigit(wint_t wc) { return isdigit(wc); }
+ int iswgraph(wint_t wc) { return isgraph(wc); }
+diff --git a/libc/include/assert.h b/libc/include/assert.h
+index 62470f5..361a5ff 100644
+--- a/libc/include/assert.h
++++ b/libc/include/assert.h
+@@ -60,6 +60,6 @@
+ #endif
+
+ __BEGIN_DECLS
+-__dead void __assert(const char *, int, const char *);
+-__dead void __assert2(const char *, int, const char *, const char *);
++__dead void __assert(const char *, int, const char *) __noreturn;
++__dead void __assert2(const char *, int, const char *, const char *) __noreturn;
+ __END_DECLS
+diff --git a/libc/include/ctype.h b/libc/include/ctype.h
+index 5557e31..a66df12 100644
+--- a/libc/include/ctype.h
++++ b/libc/include/ctype.h
+@@ -45,11 +45,14 @@
+ #define	_CTYPE_U	0x01
+ #define	_CTYPE_L	0x02
+ #define	_CTYPE_N	0x04
++#define	_CTYPE_D	0x04
+ #define	_CTYPE_S	0x08
+ #define	_CTYPE_P	0x10
+ #define	_CTYPE_C	0x20
+ #define	_CTYPE_X	0x40
+ #define	_CTYPE_B	0x80
++#define	_CTYPE_R	(_CTYPE_P|_CTYPE_U|_CTYPE_L|_CTYPE_D|_CTYPE_B)
++#define	_CTYPE_A	(_CTYPE_L|_CTYPE_U)
+
+ __BEGIN_DECLS
+
+diff --git a/libc/include/locale.h b/libc/include/locale.h
+index 65b5c7d..6989851 100644
+--- a/libc/include/locale.h
++++ b/libc/include/locale.h
+@@ -29,6 +29,7 @@
+ #define _LOCALE_H_
+
+ #include <sys/cdefs.h>
++#include <xlocale.h>
+
+ __BEGIN_DECLS
+
+@@ -43,17 +44,65 @@ enum {
+     LC_PAPER     = 7,
+     LC_NAME      = 8,
+     LC_ADDRESS   = 9,
+-
+     LC_TELEPHONE      = 10,
+     LC_MEASUREMENT    = 11,
+     LC_IDENTIFICATION = 12
+ };
+
+-extern char *setlocale(int category, const char *locale);
++#define LC_CTYPE_MASK          (1 << LC_CTYPE)
++#define LC_NUMERIC_MASK        (1 << LC_NUMERIC)
++#define LC_TIME_MASK           (1 << LC_TIME)
++#define LC_COLLATE_MASK        (1 << LC_COLLATE)
++#define LC_MONETARY_MASK       (1 << LC_MONETARY)
++#define LC_MESSAGES_MASK       (1 << LC_MESSAGES)
++#define LC_PAPER_MASK          (1 << LC_PAPER)
++#define LC_NAME_MASK           (1 << LC_NAME)
++#define LC_ADDRESS_MASK        (1 << LC_ADDRESS)
++#define LC_TELEPHONE_MASK      (1 << LC_TELEPHONE)
++#define LC_MEASUREMENT_MASK    (1 << LC_MEASUREMENT)
++#define LC_IDENTIFICATION_MASK (1 << LC_IDENTIFICATION)
++
++#define LC_ALL_MASK (LC_CTYPE_MASK | LC_NUMERIC_MASK | LC_TIME_MASK | LC_COLLATE_MASK | \
++                     LC_MONETARY_MASK | LC_MESSAGES_MASK | LC_PAPER_MASK | LC_NAME_MASK | \
++                     LC_ADDRESS_MASK | LC_TELEPHONE_MASK | LC_MEASUREMENT_MASK | \
++                     LC_IDENTIFICATION_MASK)
++
++struct lconv {
++    char* decimal_point;
++    char* thousands_sep;
++    char* grouping;
++    char* int_curr_symbol;
++    char* currency_symbol;
++    char* mon_decimal_point;
++    char* mon_thousands_sep;
++    char* mon_grouping;
++    char* positive_sign;
++    char* negative_sign;
++    char  int_frac_digits;
++    char  frac_digits;
++    char  p_cs_precedes;
++    char  p_sep_by_space;
++    char  n_cs_precedes;
++    char  n_sep_by_space;
++    char  p_sign_posn;
++    char  n_sign_posn;
++    char  int_p_cs_precedes;
++    char  int_p_sep_by_space;
++    char  int_n_cs_precedes;
++    char  int_n_sep_by_space;
++    char  int_p_sign_posn;
++    char  int_n_sign_posn;
++};
++
++struct lconv* localeconv(void);
++
++locale_t duplocale(locale_t);
++void freelocale(locale_t);
++locale_t newlocale(int, const char*, locale_t);
++char* setlocale(int, const char*);
++locale_t uselocale(locale_t);
+
+-/* Make libstdc++-v3 happy.  */
+-struct lconv { };
+-struct lconv *localeconv(void);
++#define LC_GLOBAL_LOCALE ((locale_t) -1L)
+
+ __END_DECLS
+
+diff --git a/libc/include/stdio.h b/libc/include/stdio.h
+index 23fc944..28685c7 100644
+--- a/libc/include/stdio.h
++++ b/libc/include/stdio.h
+@@ -469,7 +469,10 @@ int vsprintf(char *dest, const char *format, __va_list ap)
+ }
+
+ #if defined(__clang__)
+-#define snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(snprintf)
++    #define __wrap_snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
++    #define snprintf(...) __wrap_snprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(3, 4)
+@@ -481,7 +484,10 @@ int snprintf(char *dest, size_t size, const char *format, ...)
+ #endif
+
+ #if defined(__clang__)
+-#define sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++  #if !defined(sprintf)
++    #define __wrap_sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
++    #define sprintf(...) __wrap_sprintf(__VA_ARGS__)
++  #endif
+ #else
+ __BIONIC_FORTIFY_INLINE
+ __printflike(2, 3)
+diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h
+index 9fa84c1..188da69 100644
+--- a/libc/include/stdlib.h
++++ b/libc/include/stdlib.h
+@@ -68,6 +68,10 @@ static __inline__ float strtof(const char *nptr, char **endptr)
+     return (float)strtod(nptr, endptr);
+ }
+
++static __inline__ long double strtold(const char* s, char** end_ptr) {
++  return (long double)strtod(s, end_ptr);
++}
++
+ extern int atoi(const char *) __purefunc;
+ extern long atol(const char *) __purefunc;
+ extern long long atoll(const char *) __purefunc;
+diff --git a/libc/include/wchar.h b/libc/include/wchar.h
+index 76ac02c..c413806 100644
+--- a/libc/include/wchar.h
++++ b/libc/include/wchar.h
+@@ -77,6 +77,7 @@ extern int               fwprintf(FILE *, const wchar_t *, ...);
+ extern int               fwscanf(FILE *, const wchar_t *, ...);
+ extern int               iswalnum(wint_t);
+ extern int               iswalpha(wint_t);
++extern int               iswblank(wint_t);
+ extern int               iswcntrl(wint_t);
+ extern int               iswdigit(wint_t);
+ extern int               iswgraph(wint_t);
diff --git a/t83x/android/patches/K/libcxx.diff b/t83x/android/patches/K/libcxx.diff
new file mode 100644
index 0000000..a10d75c
--- /dev/null
+++ b/t83x/android/patches/K/libcxx.diff
@@ -0,0 +1,13 @@
+diff --git a/include/cstdlib b/include/cstdlib
+index 152b891..e45a8fd 100644
+--- a/include/cstdlib
++++ b/include/cstdlib
+@@ -126,7 +126,7 @@ using ::realloc;
+ using ::abort;
+ using ::atexit;
+ using ::exit;
+-using ::_Exit;
++
+ using ::getenv;
+ using ::system;
+ using ::bsearch;
diff --git a/t83x/android/patches/K/license.txt b/t83x/android/patches/K/license.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/t83x/android/patches/K/license.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/t83x/android/patches/K/llvm.diff b/t83x/android/patches/K/llvm.diff
new file mode 100644
index 0000000..d41d279
--- /dev/null
+++ b/t83x/android/patches/K/llvm.diff
@@ -0,0 +1,14 @@
+diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
+index 8cdb853..868dd47 100644
+--- a/lib/Target/ARM/ARMInstrInfo.cpp
++++ b/lib/Target/ARM/ARMInstrInfo.cpp
+@@ -130,6 +130,10 @@ namespace {
+         MIB.addImm(0);
+       AddDefaultPred(MIB);
+
++      // Fix the GOT address by adding pc.
++      BuildMI(FirstMBB, MBBI, DL, TII.get(ARM::tPICADD), GlobalBaseReg)
++          .addReg(GlobalBaseReg).addImm(ARMPCLabelIndex);
++
+       return true;
+     }
diff --git a/t83x/android/patches/K/locale.diff b/t83x/android/patches/K/locale.diff
new file mode 100644
index 0000000..9c4c140
--- /dev/null
+++ b/t83x/android/patches/K/locale.diff
@@ -0,0 +1,204 @@
+diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp
+new file mode 100644
+index 0000000..4fade84
+--- /dev/null
++++ b/libc/bionic/locale.cpp
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (C) 2008 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#include <locale.h>
++#include <pthread.h>
++#include <stdlib.h>
++
++// We currently support a single locale, the "C" locale (also known as "POSIX").
++
++struct __locale_t {
++  // Because we only support one locale, these are just tokens with no data.
++};
++
++static pthread_once_t gLocaleOnce = PTHREAD_ONCE_INIT;
++static lconv gLocale;
++
++static pthread_once_t gUselocaleKeyOnce = PTHREAD_ONCE_INIT;
++static pthread_key_t gUselocaleKey;
++
++static void __locale_init() {
++  gLocale.decimal_point = const_cast<char*>(".");
++
++  char* not_available = const_cast<char*>("");
++  gLocale.thousands_sep = not_available;
++  gLocale.grouping = not_available;
++  gLocale.int_curr_symbol = not_available;
++  gLocale.currency_symbol = not_available;
++  gLocale.mon_decimal_point = not_available;
++  gLocale.mon_thousands_sep = not_available;
++  gLocale.mon_grouping = not_available;
++  gLocale.positive_sign = not_available;
++  gLocale.negative_sign = not_available;
++
++  gLocale.int_frac_digits = CHAR_MAX;
++  gLocale.frac_digits = CHAR_MAX;
++  gLocale.p_cs_precedes = CHAR_MAX;
++  gLocale.p_sep_by_space = CHAR_MAX;
++  gLocale.n_cs_precedes = CHAR_MAX;
++  gLocale.n_sep_by_space = CHAR_MAX;
++  gLocale.p_sign_posn = CHAR_MAX;
++  gLocale.n_sign_posn = CHAR_MAX;
++  gLocale.int_p_cs_precedes = CHAR_MAX;
++  gLocale.int_p_sep_by_space = CHAR_MAX;
++  gLocale.int_n_cs_precedes = CHAR_MAX;
++  gLocale.int_n_sep_by_space = CHAR_MAX;
++  gLocale.int_p_sign_posn = CHAR_MAX;
++  gLocale.int_n_sign_posn = CHAR_MAX;
++}
++
++static void __uselocale_key_init() {
++  pthread_key_create(&gUselocaleKey, NULL);
++}
++
++static bool __is_supported_locale(const char* locale) {
++  return (strcmp(locale, "") == 0 || strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0);
++}
++
++static locale_t __new_locale() {
++  return reinterpret_cast<locale_t>(malloc(sizeof(__locale_t)));
++}
++
++lconv* localeconv() {
++  pthread_once(&gLocaleOnce, __locale_init);
++  return &gLocale;
++}
++
++locale_t duplocale(locale_t l) {
++  locale_t clone = __new_locale();
++  if (clone != NULL && l != LC_GLOBAL_LOCALE) {
++    *clone = *l;
++  }
++  return clone;
++}
++
++void freelocale(locale_t l) {
++  free(l);
++}
++
++locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/) {
++  // Is 'category_mask' valid?
++  if ((category_mask & ~LC_ALL_MASK) != 0) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  if (!__is_supported_locale(locale_name)) {
++    errno = ENOENT;
++    return NULL;
++  }
++
++  return __new_locale();
++}
++
++char* setlocale(int category, char const* locale_name) {
++  // Is 'category' valid?
++  if (category < LC_CTYPE || category > LC_IDENTIFICATION) {
++    errno = EINVAL;
++    return NULL;
++  }
++
++  // Caller just wants to query the current locale?
++  if (locale_name == NULL) {
++    return const_cast<char*>("C");
++  }
++
++  // Caller wants one of the mandatory POSIX locales?
++  if (__is_supported_locale(locale_name)) {
++    return const_cast<char*>("C");
++  }
++
++  // We don't support any other locales.
++  errno = ENOENT;
++  return NULL;
++}
++
++locale_t uselocale(locale_t new_locale) {
++  pthread_once(&gUselocaleKeyOnce, __uselocale_key_init);
++
++  locale_t old_locale = static_cast<locale_t>(pthread_getspecific(gUselocaleKey));
++
++  // If this is the first call to uselocale(3) on this thread, we return LC_GLOBAL_LOCALE.
++  if (old_locale == NULL) {
++    old_locale = LC_GLOBAL_LOCALE;
++  }
++
++  if (new_locale != NULL) {
++    pthread_setspecific(gUselocaleKey, new_locale);
++  }
++
++  return old_locale;
++}
+diff --git a/libc/include/xlocale.h b/libc/include/xlocale.h
+new file mode 100644
+index 0000000..f7eb8f4
+--- /dev/null
++++ b/libc/include/xlocale.h
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (C) 2014 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++#ifndef _XLOCALE_H_
++#define _XLOCALE_H_
++
++/* If we just use void* here, GCC exposes that in error messages. */
++struct __locale_t;
++typedef struct __locale_t* locale_t;
++
++#endif /* _XLOCALE_H_ */
diff --git a/t83x/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/t83x/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100644
index 0000000..4698e5c
--- /dev/null
+++ b/t83x/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,141 @@
+#
+# (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points-v2 : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets the power model parameters. Two power models are currently
+  defined which include "mali-simple-power-model" and "mali-g71-power-model".
+	- mali-simple-power-model: this model derives the GPU power usage based
+	  on the GPU voltage scaled by the system temperature. Note: it was
+	  designed for the Juno platform, and may not be suitable for others.
+		- compatible: Should be "arm,mali-simple-power-model"
+		- dynamic-coefficient: Coefficient, in pW/(Hz V^2), which is
+		  multiplied by v^2*f to calculate the dynamic power consumption.
+		- static-coefficient: Coefficient, in uW/V^3, which is
+		  multiplied by v^3 to calculate the static power consumption.
+		- ts: An array containing coefficients for the temperature
+		  scaling factor. This is used to scale the static power by a
+		  factor of tsf/1000000,
+		  where tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0],
+		  and T = temperature in degrees.
+		- thermal-zone: A string identifying the thermal zone used for
+		  the GPU
+		- temp-poll-interval-ms: the interval at which the system
+		  temperature is polled
+	- mali-g71-power-model: this model derives the GPU power usage based on
+	  performance counters, so is more accurate.
+		- compatible: Should be "arm,mali-g71-power-model"
+		- scale: the dynamic power calculated by the power model is
+		  scaled by a factor of "scale"/1000. This value should be
+		  chosen to match a particular implementation.
+	* Note: the kernel could use either of the 2 power models (simple and
+	  counter-based) at different points so care should be taken to configure
+	  both power models in the device tree (specifically dynamic-coefficient,
+	  static-coefficient and scale) to best match the platform.
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+- ipa-model : Sets the IPA model to be used for power management. GPU probe will fail if the
+	      model is not found in the registered models list. If no model is specified here,
+	      a gpu-id based model is picked if available, otherwise the default model is used.
+	- mali-simple-power-model: Default model used on mali
+- protected-mode-switcher : Phandle to device implemented protected mode switching functionality.
+Refer to Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt for one implementation.
+
+Example for a Mali GPU:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points-v2 = <&gpu_opp_table>;
+	power_model@0 {
+		compatible = "arm,mali-simple-power-model";
+		static-coefficient = <2427750>;
+		dynamic-coefficient = <4687>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+	power_model@1 {
+		compatible = "arm,mali-g71-power-model";
+		scale = <5>;
+	};
+};
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2";
+
+	opp@533000000 {
+		opp-hz = /bits/ 64 <533000000>;
+		opp-microvolt = <1250000>;
+	};
+	opp@450000000 {
+		opp-hz = /bits/ 64 <450000000>;
+		opp-microvolt = <1150000>;
+	};
+	opp@400000000 {
+		opp-hz = /bits/ 64 <400000000>;
+		opp-microvolt = <1125000>;
+	};
+	opp@350000000 {
+		opp-hz = /bits/ 64 <350000000>;
+		opp-microvolt = <1075000>;
+	};
+	opp@266000000 {
+		opp-hz = /bits/ 64 <266000000>;
+		opp-microvolt = <1025000>;
+	};
+	opp@160000000 {
+		opp-hz = /bits/ 64 <160000000>;
+		opp-microvolt = <925000>;
+	};
+	opp@100000000 {
+		opp-hz = /bits/ 64 <100000000>;
+		opp-microvolt = <912500>;
+	};
+};
diff --git a/t83x/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt b/t83x/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt
new file mode 100644
index 0000000..8a1596a
--- /dev/null
+++ b/t83x/kernel/Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM SMC protected mode switcher devices
+
+Required properties :
+
+- compatible : Must be "arm,smc-protected-mode-switcher"
+- arm,smc,protected_enable : SMC call ID to enable protected mode
+- arm,smc,protected_disable : SMC call ID to disable protected mode and reset
+			      device
+
+An example node :
+
+	gpu_switcher {
+		compatible = "arm,smc-protected-mode-switcher";
+		arm,smc,protected_enable = <0xff06>;
+		arm,smc,protected_disable = <0xff07>;
+	};
diff --git a/t83x/kernel/Documentation/devicetree/bindings/power/mali-opp.txt b/t83x/kernel/Documentation/devicetree/bindings/power/mali-opp.txt
new file mode 100644
index 0000000..22be9ba
--- /dev/null
+++ b/t83x/kernel/Documentation/devicetree/bindings/power/mali-opp.txt
@@ -0,0 +1,163 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard OPP
+
+* OPP Table Node
+
+This describes the OPPs belonging to a device. This node can have following
+properties:
+
+Required properties:
+- compatible: Allow OPPs to express their compatibility. It should be:
+  "operating-points-v2", "operating-points-v2-mali".
+
+- OPP nodes: One or more OPP nodes describing voltage-current-frequency
+  combinations. Their name isn't significant but their phandle can be used to
+  reference an OPP.
+
+* OPP Node
+
+This defines voltage-current-frequency combinations along with other related
+properties.
+
+Required properties:
+- opp-hz: Nominal frequency in Hz, expressed as a 64-bit big-endian integer.
+  This should be treated as a relative performance measurement, taking both GPU
+  frequency and core mask into account.
+
+Optional properties:
+- opp-hz-real: Real frequency in Hz, expressed as a 64-bit big-endian integer.
+  If this is not present then the nominal frequency will be used instead.
+
+- opp-core-mask: Shader core mask. If neither this or opp-core-count are present
+  then all shader cores will be used for this OPP.
+
+- opp-core-count: Number of cores to use for this OPP. If this is present then
+  the driver will build a core mask using the available core mask provided by
+  the GPU hardware.
+
+  If neither this nor opp-core-mask are present then all shader cores will be
+  used for this OPP.
+
+  If both this and opp-core-mask are present then opp-core-mask is ignored.
+
+- opp-microvolt: voltage in micro Volts.
+
+  A single regulator's voltage is specified with an array of size one or three.
+  Single entry is for target voltage and three entries are for <target min max>
+  voltages.
+
+  Entries for multiple regulators must be present in the same order as
+  regulators are specified in device's DT node.
+
+- opp-microvolt-<name>: Named opp-microvolt property. This is exactly similar to
+  the above opp-microvolt property, but allows multiple voltage ranges to be
+  provided for the same OPP. At runtime, the platform can pick a <name> and
+  matching opp-microvolt-<name> property will be enabled for all OPPs. If the
+  platform doesn't pick a specific <name> or the <name> doesn't match with any
+  opp-microvolt-<name> properties, then opp-microvolt property shall be used, if
+  present.
+
+- opp-microamp: The maximum current drawn by the device in microamperes
+  considering system specific parameters (such as transients, process, aging,
+  maximum operating temperature range etc.) as necessary. This may be used to
+  set the most efficient regulator operating mode.
+
+  Should only be set if opp-microvolt is set for the OPP.
+
+  Entries for multiple regulators must be present in the same order as
+  regulators are specified in device's DT node. If this property isn't required
+  for few regulators, then this should be marked as zero for them. If it isn't
+  required for any regulator, then this property need not be present.
+
+- opp-microamp-<name>: Named opp-microamp property. Similar to
+  opp-microvolt-<name> property, but for microamp instead.
+
+- clock-latency-ns: Specifies the maximum possible transition latency (in
+  nanoseconds) for switching to this OPP from any other OPP.
+
+- turbo-mode: Marks the OPP to be used only for turbo modes. Turbo mode is
+  available on some platforms, where the device can run over its operating
+  frequency for a short duration of time limited by the device's power, current
+  and thermal limits.
+
+- opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in
+  the table should have this.
+
+- opp-supported-hw: This enables us to select only a subset of OPPs from the
+  larger OPP table, based on what version of the hardware we are running on. We
+  still can't have multiple nodes with the same opp-hz value in OPP table.
+
+  It's an user defined array containing a hierarchy of hardware version numbers,
+  supported by the OPP. For example: a platform with hierarchy of three levels
+  of versions (A, B and C), this field should be like <X Y Z>, where X
+  corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z
+  corresponds to version hierarchy C.
+
+  Each level of hierarchy is represented by a 32 bit value, and so there can be
+  only 32 different supported version per hierarchy. i.e. 1 bit per version. A
+  value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy
+  level. And a value of 0x00000000 will disable the OPP completely, and so we
+  never want that to happen.
+
+  If 32 values aren't sufficient for a version hierarchy, than that version
+  hierarchy can be contained in multiple 32 bit values. i.e. <X Y Z1 Z2> in the
+  above example, Z1 & Z2 refer to the version hierarchy Z.
+
+- status: Marks the node enabled/disabled.
+
+Example for a Juno with Mali T624:
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2", "operating-points-v2-mali";
+
+	opp@112500000 {
+		opp-hz = /bits/ 64 <112500000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-mask = /bits/ 64 <0x1>;
+		opp-suspend;
+	};
+	opp@225000000 {
+		opp-hz = /bits/ 64 <225000000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-count = <2>;
+	};
+	opp@450000000 {
+		opp-hz = /bits/ 64 <450000000>;
+		opp-hz-real = /bits/ 64 <450000000>;
+		opp-microvolt = <820000>;
+		opp-core-mask = /bits/ 64 <0xf>;
+	};
+	opp@487500000 {
+		opp-hz = /bits/ 64 <487500000>;
+		opp-microvolt = <825000>;
+	};
+	opp@525000000 {
+		opp-hz = /bits/ 64 <525000000>;
+		opp-microvolt = <850000>;
+	};
+	opp@562500000 {
+		opp-hz = /bits/ 64 <562500000>;
+		opp-microvolt = <875000>;
+	};
+	opp@600000000 {
+		opp-hz = /bits/ 64 <600000000>;
+		opp-microvolt = <900000>;
+	};
+};
+
diff --git a/t83x/kernel/Documentation/dma-buf-test-exporter.txt b/t83x/kernel/Documentation/dma-buf-test-exporter.txt
new file mode 100644
index 0000000..4208010
--- /dev/null
+++ b/t83x/kernel/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/t83x/kernel/drivers/base/dma_buf_lock/sconscript b/t83x/kernel/drivers/base/dma_buf_lock/sconscript
new file mode 100644
index 0000000..7c7c242
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_lock/sconscript
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2012, 2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+if env.KernelVersion() >= (3, 18, 34):
+    SConscript("src/sconscript")
+    if env["tests"] and Glob("tests/sconscript"):
+        SConscript("tests/sconscript")
diff --git a/t83x/kernel/drivers/base/dma_buf_lock/src/Kbuild b/t83x/kernel/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100644
index 0000000..68dad07
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/t83x/kernel/drivers/base/dma_buf_lock/src/Makefile b/t83x/kernel/drivers/base/dma_buf_lock/src/Makefile
new file mode 100644
index 0000000..cf76132
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/t83x/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/t83x/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100644
index 0000000..9d924ff
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,850 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/version.h>
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/reservation.h>
+#include <linux/dma-buf.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+
+#include <linux/fence.h>
+
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
+	(a)->status ?: 1 \
+	: 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+#define DMA_BUF_LOCK_INIT_BIAS  0xFF
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence fence;
+#else
+	struct dma_fence fence;
+#endif
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	atomic_t fence_dep_count;		/* Number of dma-fence dependencies */
+	struct list_head dma_fence_callbacks;	/* list of all callbacks set up to wait on other fences */
+	wait_queue_head_t wait;
+	struct kref refcount;
+	struct list_head link;
+	int count;
+} dma_buf_lock_resource;
+
+/**
+ * struct dma_buf_lock_fence_cb - Callback data struct for dma-fence
+ * @fence_cb: Callback function
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @res:      Pointer to dma_buf_lock_resource that is waiting on this callback
+ * @node:     List head for linking this callback to the lock resource
+ */
+struct dma_buf_lock_fence_cb {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence_cb fence_cb;
+	struct fence *fence;
+#else
+	struct dma_fence_cb fence_cb;
+	struct dma_fence *fence;
+#endif
+	struct dma_buf_lock_resource *res;
+	struct list_head node;
+};
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+
+/*** dma_buf_lock fence part ***/
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(dma_buf_lock_fence_lock);
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_get_driver_name(struct fence *fence)
+#else
+dma_buf_lock_fence_get_driver_name(struct dma_fence *fence)
+#endif
+{
+	return "dma_buf_lock";
+}
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_get_timeline_name(struct fence *fence)
+#else
+dma_buf_lock_fence_get_timeline_name(struct dma_fence *fence)
+#endif
+{
+	return "dma_buf_lock.timeline";
+}
+
+static bool
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_enable_signaling(struct fence *fence)
+#else
+dma_buf_lock_fence_enable_signaling(struct dma_fence *fence)
+#endif
+{
+	return true;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+const struct fence_ops dma_buf_lock_fence_ops = {
+	.wait = fence_default_wait,
+#else
+const struct dma_fence_ops dma_buf_lock_fence_ops = {
+	.wait = dma_fence_default_wait,
+#endif
+	.get_driver_name = dma_buf_lock_fence_get_driver_name,
+	.get_timeline_name = dma_buf_lock_fence_get_timeline_name,
+	.enable_signaling = dma_buf_lock_fence_enable_signaling,
+};
+
+static void
+dma_buf_lock_fence_init(dma_buf_lock_resource *resource)
+{
+	dma_fence_init(&resource->fence,
+		       &dma_buf_lock_fence_ops,
+		       &dma_buf_lock_fence_lock,
+		       0,
+		       0);
+}
+
+static void
+dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource)
+{
+	struct dma_buf_lock_fence_cb *cb, *tmp;
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &resource->dma_fence_callbacks, node) {
+		/* Cancel callbacks that hasn't been called yet and release the
+		 * reference taken in dma_buf_lock_fence_add_callback().
+		 */
+		dma_fence_remove_callback(cb->fence, &cb->fence_cb);
+		dma_fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+dma_buf_lock_fence_callback(struct fence *fence, struct fence_cb *cb)
+#else
+dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+#endif
+{
+	struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb,
+				struct dma_buf_lock_fence_cb,
+				fence_cb);
+	dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
+
+#if DMA_BUF_LOCK_DEBUG
+	printk(KERN_DEBUG "dma_buf_lock_fence_callback\n");
+#endif
+
+	/* Callback function will be invoked in atomic context. */
+
+	if (atomic_dec_and_test(&resource->fence_dep_count)) {
+		atomic_set(&resource->locked, 1);
+		wake_up(&resource->wait);
+		/* A work item can be queued at this point to invoke
+		 * dma_buf_lock_fence_free_callbacks.
+		 */
+	}
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static int
+dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
+				struct fence *fence,
+				fence_func_t callback)
+#else
+static int
+dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
+				struct dma_fence *fence,
+				dma_fence_func_t callback)
+#endif
+{
+	int err = 0;
+	struct dma_buf_lock_fence_cb *fence_cb;
+
+	if (!fence)
+		return -EINVAL;
+
+	fence_cb = kmalloc(sizeof(*fence_cb), GFP_KERNEL);
+	if (!fence_cb)
+		return -ENOMEM;
+
+	fence_cb->fence = fence;
+	fence_cb->res   = resource;
+	INIT_LIST_HEAD(&fence_cb->node);
+
+	err = dma_fence_add_callback(fence, &fence_cb->fence_cb,
+				     callback);
+
+	if (err == -ENOENT) {
+		/* Fence signaled, get the completion result */
+		err = dma_fence_get_status(fence);
+
+		/* remap success completion to err code */
+		if (err == 1)
+			err = 0;
+
+		kfree(fence_cb);
+	} else if (err) {
+		kfree(fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in dma_buf_lock_fence_free_callbacks().
+		 */
+		dma_fence_get(fence);
+		atomic_inc(&resource->fence_dep_count);
+		/* Add callback to resource's list of callbacks */
+		list_add(&fence_cb->node, &resource->dma_fence_callbacks);
+	}
+
+	return err;
+}
+
+static int
+dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource,
+					    struct reservation_object *resv,
+					    bool exclusive)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+#else
+	struct dma_fence *excl_fence = NULL;
+	struct dma_fence **shared_fences = NULL;
+#endif
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = dma_buf_lock_fence_add_callback(resource,
+						      excl_fence,
+						      dma_buf_lock_fence_callback);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		dma_fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = dma_buf_lock_fence_add_callback(resource,
+							      shared_fences[i],
+							      dma_buf_lock_fence_callback);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		dma_fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	return err;
+}
+
+static void
+dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource,
+				       struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < resource->count; r++)
+		ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
+	ww_acquire_fini(ctx);
+}
+
+static int
+dma_buf_lock_acquire_fence_reservation(dma_buf_lock_resource *resource,
+				       struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_resv = NULL;
+	unsigned int content_resv_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < resource->count; r++) {
+		if (resource->dma_bufs[r]->resv == content_resv) {
+			content_resv = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&resource->dma_bufs[r]->resv->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_resv_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
+
+	if (content_resv)
+		ww_mutex_unlock(&content_resv->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "deadlock at dma_buf fd %i\n",
+		       resource->list_of_dma_buf_fds[content_resv_idx]);
+#endif
+		content_resv = resource->dma_bufs[content_resv_idx]->resv;
+		ww_mutex_lock_slow(&content_resv->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.owner		= THIS_MODULE,
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related reservation objects, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	struct ww_acquire_ctx ww_ctx;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Initialize the fence associated with dma_buf_lock resource */
+	dma_buf_lock_fence_init(resource);
+
+	INIT_LIST_HEAD(&resource->dma_fence_callbacks);
+
+	atomic_set(&resource->fence_dep_count, DMA_BUF_LOCK_INIT_BIAS);
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Check the reservation object associated with dma_buf */
+		if (NULL == resource->dma_bufs[i]->resv)
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "dma_buf_lock_dolock : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
+		       resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
+#endif
+	}
+
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops,
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx);
+	if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+		printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d locking reservations.\n", ret);
+#endif
+		put_unused_fd(fd);
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return ret;
+	}
+
+	for (i = 0; i < request->count; i++) {
+		struct reservation_object *resv = resource->dma_bufs[i]->resv;
+
+		if (!test_bit(i, &resource->exclusive)) {
+			ret = reservation_object_reserve_shared(resv);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d reserving space for shared fence.\n", ret);
+#endif
+				break;
+			}
+
+			ret = dma_buf_lock_add_fence_reservation_callback(resource,
+									  resv,
+									  false);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
+#endif
+				break;
+			}
+
+			reservation_object_add_shared_fence(resv, &resource->fence);
+		} else {
+			ret = dma_buf_lock_add_fence_reservation_callback(resource,
+									  resv,
+									  true);
+			if (ret) {
+#if DMA_BUF_LOCK_DEBUG
+				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
+#endif
+				break;
+			}
+
+			reservation_object_add_excl_fence(resv, &resource->fence);
+		}
+	}
+
+	dma_buf_lock_release_fence_reservation(resource, &ww_ctx);
+
+	if (IS_ERR_VALUE((unsigned long)ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+	/* Test if the callbacks were already triggered */
+	if (!atomic_sub_return(DMA_BUF_LOCK_INIT_BIAS, &resource->fence_dep_count))
+		atomic_set(&resource->locked, 1);
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	/* Signal the resource's fence. */
+	dma_fence_signal(&resource->fence);
+
+	dma_buf_lock_fence_free_callbacks(resource);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		if (resource->dma_bufs[i])
+			dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	dma_fence_put(&resource->fence);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/t83x/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/t83x/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100644
index 0000000..e1b9348
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/t83x/kernel/drivers/base/dma_buf_lock/src/sconscript b/t83x/kernel/drivers/base/dma_buf_lock/src/sconscript
new file mode 100644
index 0000000..b8724f1
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_lock/src/sconscript
@@ -0,0 +1,33 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+import re
+Import('env')
+
+src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')]
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] dma_buf_lock'))
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction])
+	env.KernelObjTarget('dma_buf_lock', cmd)
+
diff --git a/t83x/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/t83x/kernel/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100644
index 0000000..56b9f86
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/t83x/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/t83x/kernel/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100644
index 0000000..974f0c2
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/t83x/kernel/drivers/base/dma_buf_test_exporter/Makefile b/t83x/kernel/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100644
index 0000000..06e3d5c
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/t83x/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/t83x/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100644
index 0000000..1b1c650
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,719 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+#else
+static int dma_buf_te_mmap_fault(struct vm_fault *vmf)
+#endif
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	dmabuf = vma->vm_private_data;
+#else
+	dmabuf = vmf->vma->vm_private_data;
+#endif
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return NULL;
+
+	return kmap(alloc->pages[page_num]);
+}
+static void dma_buf_te_kunmap(struct dma_buf *buf,
+		unsigned long page_num, void *addr)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = buf->priv;
+	if (page_num >= alloc->nr_pages)
+		return;
+
+	kunmap(alloc->pages[page_num]);
+	return;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
+	.kmap = dma_buf_te_kmap,
+	.kunmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic
+#else
+	.map = dma_buf_te_kmap,
+	.unmap = dma_buf_te_kunmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.map_atomic = dma_buf_te_kmap_atomic
+#endif
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (alloc_req.size > SG_MAX_SINGLE_ALLOC) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %lu pages",
+				__func__, alloc_req.size, SG_MAX_SINGLE_ALLOC);
+		goto invalid_size;
+	}
+#endif
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO,
+				DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+	{
+		struct dma_buf_export_info export_info = {
+			.exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+			.owner = THIS_MODULE,
+#endif
+			.ops = &dma_buf_te_ops,
+			.size = alloc->nr_pages << PAGE_SHIFT,
+			.flags = O_CLOEXEC | O_RDWR,
+			.priv = alloc,
+		};
+
+		dma_buf = dma_buf_export(&export_info);
+	}
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr,
+						alloc->contig_dma_addr,
+						DMA_ATTR_WRITE_COMBINE);
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+	if (ret)
+		goto no_cpu_access;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				ret = -EPERM;
+				goto no_kmap;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		offset += sg_dma_len(sg);
+	}
+
+no_kmap:
+	dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+			0, dma_buf->size,
+#endif
+			DMA_BIDIRECTIONAL);
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/t83x/kernel/drivers/base/dma_buf_test_exporter/sconscript b/t83x/kernel/drivers/base/dma_buf_test_exporter/sconscript
new file mode 100644
index 0000000..5f42141
--- /dev/null
+++ b/t83x/kernel/drivers/base/dma_buf_test_exporter/sconscript
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+mod = env.BuildKernelModule('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', Glob('*.c'))
+env.KernelObjTarget('dma-buf-test-exporter', mod)
diff --git a/t83x/kernel/drivers/base/sconscript b/t83x/kernel/drivers/base/sconscript
new file mode 100644
index 0000000..46f5241
--- /dev/null
+++ b/t83x/kernel/drivers/base/sconscript
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import( 'env' )
+
+if Glob('bus_logger/sconscript'):
+	if env['buslog'] == '1':
+		SConscript('bus_logger/sconscript')
+
+if Glob('mali_fpga_sysctl/sconscript'):
+	SConscript('mali_fpga_sysctl/sconscript')
+
+if Glob('dma_buf_lock/sconscript'):
+	SConscript('dma_buf_lock/sconscript')
+
+if Glob('ump/sconscript'):
+	SConscript('ump/sconscript')
+
+if Glob('kds/sconscript'):
+	SConscript('kds/sconscript')
+
+if Glob('dma_buf_test_exporter/sconscript'):
+	SConscript('dma_buf_test_exporter/sconscript')
+
+if Glob('smc_protected_mode_switcher/sconscript'):
+	if env['platform_config'] == 'juno_soc':
+		SConscript('smc_protected_mode_switcher/sconscript')
+
diff --git a/t83x/kernel/drivers/base/smc_protected_mode_switcher/Kbuild b/t83x/kernel/drivers/base/smc_protected_mode_switcher/Kbuild
new file mode 100644
index 0000000..244c70b
--- /dev/null
+++ b/t83x/kernel/drivers/base/smc_protected_mode_switcher/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-$(CONFIG_SMC_PROTECTED_MODE_SWITCHER) := smc_protected_mode_switcher.o
+smc_protected_mode_switcher-y := protected_mode_switcher_device.o \
+				 protected_mode_switcher_smc.o
diff --git a/t83x/kernel/drivers/base/smc_protected_mode_switcher/Kconfig b/t83x/kernel/drivers/base/smc_protected_mode_switcher/Kconfig
new file mode 100644
index 0000000..27a6bab
--- /dev/null
+++ b/t83x/kernel/drivers/base/smc_protected_mode_switcher/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config SMC_PROTECTED_MODE_SWITCHER
+	tristate "SMC protected mode switcher"
+	help
+	  This option enables the SMC protected mode switcher
diff --git a/t83x/kernel/drivers/base/smc_protected_mode_switcher/Makefile b/t83x/kernel/drivers/base/smc_protected_mode_switcher/Makefile
new file mode 100644
index 0000000..3e1788c
--- /dev/null
+++ b/t83x/kernel/drivers/base/smc_protected_mode_switcher/Makefile
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include -I$(CURDIR)/../../gpu/arm/midgard -DCONFIG_SMC_PROTECTED_MODE_SWITCHER" CONFIG_SMC_PROTECTED_MODE_SWITCHER=m modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/t83x/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c b/t83x/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c
new file mode 100644
index 0000000..31c57cd
--- /dev/null
+++ b/t83x/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_device.c
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/compiler.h>
+
+#include <linux/protected_mode_switcher.h>
+
+/*
+ * Protected Mode Switch
+ */
+
+#define SMC_FAST_CALL  (1 << 31)
+#define SMC_64         (1 << 30)
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_SIP    (2 << SMC_OEN_OFFSET)
+
+struct smc_protected_mode_device {
+	u16 smc_fid_enable;
+	u16 smc_fid_disable;
+	struct device *dev;
+};
+
+asmlinkage u64 __invoke_protected_mode_switch_smc(u64, u64, u64, u64);
+
+static u64 invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return __invoke_protected_mode_switch_smc(fid, arg0, arg1, arg2);
+}
+
+static int protected_mode_enable(struct protected_mode_device *protected_dev)
+{
+	struct smc_protected_mode_device *sdev = protected_dev->data;
+
+	if (!sdev)
+		/* Not supported */
+		return -EINVAL;
+
+	return invoke_smc(SMC_OEN_SIP,
+			sdev->smc_fid_enable, false,
+			0, 0, 0);
+
+}
+
+static int protected_mode_disable(struct protected_mode_device *protected_dev)
+{
+	struct smc_protected_mode_device *sdev = protected_dev->data;
+
+	if (!sdev)
+		/* Not supported */
+		return -EINVAL;
+
+	return invoke_smc(SMC_OEN_SIP,
+			sdev->smc_fid_disable, false,
+			0, 0, 0);
+}
+
+
+static int protected_mode_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct protected_mode_device *protected_dev;
+	struct smc_protected_mode_device *sdev;
+	u32 tmp = 0;
+
+	protected_dev = devm_kzalloc(&pdev->dev, sizeof(*protected_dev),
+			GFP_KERNEL);
+	if (!protected_dev)
+		return -ENOMEM;
+
+	sdev = devm_kzalloc(&pdev->dev, sizeof(*sdev), GFP_KERNEL);
+	if (!sdev) {
+		devm_kfree(&pdev->dev, protected_dev);
+		return -ENOMEM;
+	}
+
+	protected_dev->data = sdev;
+	protected_dev->ops.protected_mode_enable = protected_mode_enable;
+	protected_dev->ops.protected_mode_disable = protected_mode_disable;
+	sdev->dev = dev;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,protected_enable",
+			&tmp))
+		sdev->smc_fid_enable = tmp;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,protected_disable",
+			&tmp))
+		sdev->smc_fid_disable = tmp;
+
+	/* Check older property names, for compatibility with outdated DTBs */
+	if (!of_property_read_u32(dev->of_node, "arm,smc,secure_enable", &tmp))
+		sdev->smc_fid_enable = tmp;
+
+	if (!of_property_read_u32(dev->of_node, "arm,smc,secure_disable", &tmp))
+		sdev->smc_fid_disable = tmp;
+
+	platform_set_drvdata(pdev, protected_dev);
+
+	dev_info(&pdev->dev, "Protected mode switcher %s loaded\n", pdev->name);
+	dev_info(&pdev->dev, "SMC enable: 0x%x\n", sdev->smc_fid_enable);
+	dev_info(&pdev->dev, "SMC disable: 0x%x\n", sdev->smc_fid_disable);
+
+	return 0;
+}
+
+static int protected_mode_remove(struct platform_device *pdev)
+{
+	dev_info(&pdev->dev, "Protected mode switcher %s removed\n",
+			pdev->name);
+
+	return 0;
+}
+
+static const struct of_device_id protected_mode_dt_ids[] = {
+	{ .compatible = "arm,smc-protected-mode-switcher" },
+	{ .compatible = "arm,secure-mode-switcher" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, protected_mode_dt_ids);
+
+static struct platform_driver protected_mode_driver = {
+	.probe = protected_mode_probe,
+	.remove = protected_mode_remove,
+	.driver = {
+		.name = "smc-protected-mode-switcher",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(protected_mode_dt_ids),
+	}
+};
+
+module_platform_driver(protected_mode_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/t83x/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S b/t83x/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S
new file mode 100644
index 0000000..5eae328
--- /dev/null
+++ b/t83x/kernel/drivers/base/smc_protected_mode_switcher/protected_mode_switcher_smc.S
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/linkage.h>
+
+/* u64 invoke_protected_mode_switch_smc(u64 function_id, u64 arg0, u64 arg1,
+		u64 arg2) */
+ENTRY(__invoke_protected_mode_switch_smc)
+	smc	#0
+	ret
+ENDPROC(__invoke_protected_mode_switch_smc)
diff --git a/t83x/kernel/drivers/base/smc_protected_mode_switcher/sconscript b/t83x/kernel/drivers/base/smc_protected_mode_switcher/sconscript
new file mode 100644
index 0000000..f740a71
--- /dev/null
+++ b/t83x/kernel/drivers/base/smc_protected_mode_switcher/sconscript
@@ -0,0 +1,43 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+import re
+Import('env')
+
+if env['v'] != '1':
+	env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}'
+
+src = [ Glob('#kernel/drivers/base/smc_protected_mode_switcher/*.c'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/*.S'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/*.h'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/K*'),
+		Glob('#kernel/drivers/base/smc_protected_mode_switcher/Makefile') ]
+
+env.Append( CPPPATH = '#kernel/include' )
+env['smc_protected_mode_switcher'] = 1
+
+if env.GetOption('clean') :
+	# Clean module
+	env.Execute(Action("make clean", '[CLEAN] smc_protected_mode_switcher'))
+	cmd = env.Command('$STATIC_LIB_PATH/smc_protected_mode_switcher.ko', src, [])
+	env.ProgTarget('smc_protected_mode_switcher', cmd)
+else:
+	# Build module
+	makeAction=Action("cd ${SOURCE.dir} && make && cp smc_protected_mode_switcher.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/smc_protected_mode_switcher.ko', src, [makeAction])
+	env.ProgTarget('smc_protected_mode_switcher', cmd)
+
diff --git a/t83x/kernel/drivers/base/ump/Kbuild b/t83x/kernel/drivers/base/ump/Kbuild
new file mode 100644
index 0000000..2bbdba2
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += src/
+
diff --git a/t83x/kernel/drivers/base/ump/Kconfig b/t83x/kernel/drivers/base/ump/Kconfig
new file mode 100644
index 0000000..f7451e6
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config UMP
+	tristate "Enable Unified Memory Provider (UMP) support"
+	default n
+    help
+	  Enable this option to build support for the ARM UMP module.
+	  UMP can be used by the Mali T6xx module to improve performance
+	  by reducing the copying of data by sharing memory.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate one module, called ump.
diff --git a/t83x/kernel/drivers/base/ump/docs/Doxyfile b/t83x/kernel/drivers/base/ump/docs/Doxyfile
new file mode 100644
index 0000000..fbec8eb
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/docs/Doxyfile
@@ -0,0 +1,125 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/include/linux/ump-common.h ../../kernel/include/linux/ump.h
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           += ../../kernel/drivers/base/ump
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           +=
diff --git a/t83x/kernel/drivers/base/ump/docs/sconscript b/t83x/kernel/drivers/base/ump/docs/sconscript
new file mode 100644
index 0000000..521278d
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/docs/sconscript
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+doxygen_sources = [
+	'Doxyfile',
+	Glob('*.png'),
+	Glob('../*.h'),
+	Glob('../src/*.h') ]
+
+if env['doc'] == '1':
+        doxygen_target = env.Command('doxygen/html/index.html', doxygen_sources,
+                                     ['cd ${SOURCE.dir} && doxygen'])
+        env.Clean(doxygen_target, './doxygen')
+
+        Alias('doxygen', doxygen_target)
+
diff --git a/t83x/kernel/drivers/base/ump/example_kernel_api.c b/t83x/kernel/drivers/base/ump/example_kernel_api.c
new file mode 100644
index 0000000..858dd8b
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/example_kernel_api.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Example routine to display information about an UMP allocation
+ * The routine takes an secure_id which can come from a different kernel module
+ * or from a client application (i.e. an ioctl).
+ * It creates a ump handle from the secure id (which validates the secure id)
+ * and if successful dumps the physical memory information.
+ * It follows the API and pins the memory while "using" the physical memory.
+ * Finally it calls the release function to indicate it's finished with the handle.
+ *
+ * If the function can't look up the handle it fails with return value -1.
+ * If the testy succeeds then it return 0.
+ * */
+
+static int display_ump_memory_information(ump_secure_id secure_id)
+{
+	const ump_dd_physical_block_64 * ump_blocks = NULL;
+	ump_dd_handle ump_mem;
+	uint64_t nr_blocks;
+	int i;
+	ump_alloc_flags flags;
+
+	/* get a handle from the secure id */
+	ump_mem = ump_dd_from_secure_id(secure_id);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE == ump_mem)
+	{
+		/* invalid ID received */
+		return -1;
+	}
+
+	/* at this point we know we've added a reference to the ump allocation, so we must release it with ump_dd_release */
+
+	ump_dd_phys_blocks_get_64(ump_mem, &nr_blocks, &ump_blocks);
+	flags = ump_dd_allocation_flags_get(ump_mem);
+
+	printf("UMP allocation with secure ID %u consists of %zd physical block(s):\n", secure_id, nr_blocks);
+
+	for(i=0; i<nr_blocks; ++i)
+	{
+		printf("\tBlock %d: 0x%08zX size 0x%08zX\n", i, ump_blocks[i].addr, ump_blocks[i].size);
+	}
+
+	printf("and was allocated using the following bitflag combo:  0x%lX\n", flags);
+
+	ump_dd_release(ump_mem);
+
+	return 0;
+}
+
diff --git a/t83x/kernel/drivers/base/ump/example_user_api.c b/t83x/kernel/drivers/base/ump/example_user_api.c
new file mode 100644
index 0000000..d143a64
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/example_user_api.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <ump/ump.h>
+#include <memory.h>
+#include <stdio.h>
+
+/*
+ * Example routine to exercise the user space UMP api.
+ * This routine initializes the UMP api and allocates some CPU+device X memory.
+ * No usage hints are given, so the driver will use the default cacheability policy.
+ * With the allocation it creates a duplicate handle and plays with the reference count.
+ * Then it simulates interacting with a device and contains pseudo code for the device.
+ *
+ * If any error is detected correct cleanup will be performed and -1 will be returned.
+ * If successful then 0 will be returned.
+ */
+
+static int test_ump_user_api(void)
+{
+	/* This is the size we try to allocate*/
+	const size_t alloc_size = 4096;
+
+	ump_handle h = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_copy = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+	void * mapping = NULL;
+
+	ump_result ump_api_res;
+	int result = -1;
+
+	ump_secure_id id;
+
+	size_t size_returned;
+
+	ump_api_res = ump_open();
+	if (UMP_OK != ump_api_res)
+	{
+		/* failed to open an ump session */
+		/* early out */
+		return -1;
+	}
+
+	h = ump_allocate_64(alloc_size, UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | UMP_PROT_X_RD | UMP_PROT_X_WR);
+	/* the refcount is now 1 */
+	if (UMP_INVALID_MEMORY_HANDLE == h)
+	{
+		/* allocation failure */
+		goto cleanup;
+	}
+
+	/* this is how we could share this allocation with another process */
+
+	/* in process A: */
+	id = ump_secure_id_get(h);
+	/* still ref count 1 */
+	/* send the id to process B */
+
+	/* in process B: */
+	/* receive the id from A */
+	h_clone = ump_from_secure_id(id);
+	/* the ref count of the allocation is now 2 (one from each handle to it) */
+	/* do something ... */
+	/* release our clone */
+	ump_release(h_clone); /* safe to call even if ump_from_secure_id failed */
+	h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+
+	/* a simple save-for-future-use logic inside the driver would just copy the handle (but add a ref manually too!) */
+	/*
+	 * void assign_memory_to_job(h)
+	 * {
+	  */
+	h_copy = h;
+	ump_retain(h_copy); /* manual retain needed as we just assigned the handle, now 2 */
+	/*
+	 * }
+	 *
+	 * void job_completed(void)
+	 * {
+	 */
+	 ump_release(h_copy); /* normal handle release as if we got via an ump_allocate */
+	 h_copy = UMP_INVALID_MEMORY_HANDLE;
+	 /*
+	 * }
+	 */
+	
+	/* we're now back at ref count 1, and only h is a valid handle */
+	/* enough handle duplication show-off, let's play with the contents instead */
+
+	mapping = ump_map(h, 0, alloc_size);
+	if (NULL == mapping)
+	{
+		/* mapping failure, either out of address space or some other error */
+		goto cleanup;
+	}
+
+	memset(mapping, 0, alloc_size);
+
+	/* let's pretend we're going to start some hw device on this buffer and read the result afterwards */
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN, mapping, alloc_size);
+ 	/*
+		device cache invalidate
+
+		memory barrier
+
+		start device
+
+		memory barrier
+
+		wait for device
+
+		memory barrier
+
+		device cache clean
+
+		memory barrier
+	*/
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN_AND_INVALIDATE, mapping, alloc_size);
+
+	/* we could now peek at the result produced by the hw device, which is now accessible via our mapping */
+
+	/* unmap the buffer when we're done with it */
+	ump_unmap(h, mapping, alloc_size);
+
+	result = 0;
+
+cleanup:
+	ump_release(h);
+	h = UMP_INVALID_MEMORY_HANDLE;
+
+	ump_close();
+
+	return result;
+}
+
diff --git a/t83x/kernel/drivers/base/ump/sconscript b/t83x/kernel/drivers/base/ump/sconscript
new file mode 100644
index 0000000..b4aa8b6
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+if Glob('src/sconscript') and int(env['ump']) == 1:
+	SConscript( 'src/sconscript' )
+	SConscript( 'docs/sconscript' )
+
diff --git a/t83x/kernel/drivers/base/ump/src/Kbuild b/t83x/kernel/drivers/base/ump/src/Kbuild
new file mode 100644
index 0000000..de6d307
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/Kbuild
@@ -0,0 +1,50 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Paths required for build
+UMP_PATH = $(src)/../..
+UMP_DEVICEDRV_PATH = $(src)/.
+
+# Set up defaults if not defined by the user
+MALI_UNIT_TEST ?= 0
+
+SRC :=\
+	common/ump_kernel_core.c \
+	common/ump_kernel_descriptor_mapping.c \
+	linux/ump_kernel_linux.c \
+	linux/ump_kernel_linux_mem.c
+
+UNIT_TEST_DEFINES=
+ifeq ($(MALI_UNIT_TEST), 1)
+	MALI_DEBUG ?= 1
+
+	UNIT_TEST_DEFINES = -DMALI_UNIT_TEST=1 \
+	                    -DMALI_DEBUG=$(MALI_DEBUG)
+endif
+
+# Use our defines when compiling
+ccflags-y += -I$(UMP_PATH) -I$(UMP_DEVICEDRV_PATH) $(UNIT_TEST_DEFINES)
+
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_UMP) += ump.o
+ifeq ($(CONFIG_ION),y)
+ccflags-y += -I$(srctree)/drivers/staging/android/ion -I$(srctree)/include/linux
+obj-$(CONFIG_UMP) += imports/ion/ump_kernel_import_ion.o
+endif
+
+# Tell the Linux build system to enable building of our .c files
+ump-y := $(SRC:.c=.o)
diff --git a/t83x/kernel/drivers/base/ump/src/Makefile b/t83x/kernel/drivers/base/ump/src/Makefile
new file mode 100644
index 0000000..dfd5945
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/Makefile
@@ -0,0 +1,79 @@
+#
+# (C) COPYRIGHT 2008-2014, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifneq ($(KBUILD_EXTMOD),)
+include $(KBUILD_EXTMOD)/Makefile.common
+else
+include ./Makefile.common
+endif
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+RELATIVE_ROOT=../../../../..
+ROOT = $(CURDIR)/$(RELATIVE_ROOT)
+
+EXTRA_CFLAGS=-I$(CURDIR)/../../../../include
+
+ifeq ($(MALI_UNIT_TEST),1)
+	EXTRA_CFLAGS += -DMALI_UNIT_TEST=$(MALI_UNIT_TEST)
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+CONFIG ?= $(ARCH)
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+# Validate selected config
+ifneq ($(shell [ -d arch-$(CONFIG) ] && [ -f arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(shell pwd))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L arch ] && rm arch)
+$(shell ln -sf arch-$(CONFIG) arch)
+$(shell touch arch/config.h)
+endif
+
+EXTRA_SYMBOLS=
+
+ifeq ($(MALI_UNIT_TEST),1)
+	KBASE_PATH=$(ROOT)/kernel/drivers/gpu/arm/midgard
+	EXTRA_SYMBOLS+=$(KBASE_PATH)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="$(EXTRA_CFLAGS) $(SCONS_CFLAGS)" CONFIG_UMP=m KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/t83x/kernel/drivers/base/ump/src/Makefile.common b/t83x/kernel/drivers/base/ump/src/Makefile.common
new file mode 100644
index 0000000..f29a4c1
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/Makefile.common
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2008-2010, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+SRC = $(UMP_FILE_PREFIX)/common/ump_kernel_core.c \
+      $(UMP_FILE_PREFIX)/common/ump_kernel_descriptor_mapping.c
+
diff --git a/t83x/kernel/drivers/base/ump/src/arch-arm/config.h b/t83x/kernel/drivers/base/ump/src/arch-arm/config.h
new file mode 100644
index 0000000..152d98f
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/arch-arm/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/t83x/kernel/drivers/base/ump/src/arch-arm64/config.h b/t83x/kernel/drivers/base/ump/src/arch-arm64/config.h
new file mode 100644
index 0000000..cfb14ca
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/arch-arm64/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/t83x/kernel/drivers/base/ump/src/common/ump_kernel_core.c b/t83x/kernel/drivers/base/ump/src/common/ump_kernel_core.c
new file mode 100644
index 0000000..5a8ad7c
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/common/ump_kernel_core.c
@@ -0,0 +1,728 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* module headers */
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+/* local headers */
+#include <common/ump_kernel_core.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#define UMP_FLAGS_RANGE ((UMP_PROT_SHAREABLE<<1) - 1u)
+
+static umpp_device device;
+
+ump_result umpp_core_constructor(void)
+{
+	mutex_init(&device.secure_id_map_lock);
+	device.secure_id_map = umpp_descriptor_mapping_create(UMP_EXPECTED_IDS, UMP_MAX_IDS);
+	if (NULL != device.secure_id_map)
+	{
+		if (UMP_OK == umpp_device_initialize())
+		{
+			return UMP_OK;
+		}
+		umpp_descriptor_mapping_destroy(device.secure_id_map);
+	}
+	mutex_destroy(&device.secure_id_map_lock);
+
+	return UMP_ERROR;
+}
+
+void umpp_core_destructor(void)
+{
+	umpp_device_terminate();
+	umpp_descriptor_mapping_destroy(device.secure_id_map);
+	mutex_destroy(&device.secure_id_map_lock);
+}
+
+umpp_session *umpp_core_session_start(void)
+{
+	umpp_session * session;
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (NULL != session)
+	{
+		mutex_init(&session->session_lock);
+
+		INIT_LIST_HEAD(&session->memory_usage);
+
+		/* try to create import client session, not a failure if they fail to initialize */
+		umpp_import_handlers_init(session);
+	}
+
+	return session;
+}
+
+void umpp_core_session_end(umpp_session *session)
+{
+	umpp_session_memory_usage * usage, *_usage;
+	UMP_ASSERT(session);
+
+	list_for_each_entry_safe(usage, _usage, &session->memory_usage, link)
+	{
+		printk(KERN_WARNING "UMP: Memory usage cleanup, releasing secure ID %d\n", ump_dd_secure_id_get(usage->mem));
+		ump_dd_release(usage->mem);
+		kfree(usage);
+
+	}
+
+	/* we should now not hold any imported memory objects,
+	 * detatch all import handlers */
+	umpp_import_handlers_term(session);
+
+	mutex_destroy(&session->session_lock);
+	kfree(session);
+}
+
+ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	umpp_allocation * alloc;
+	int i;
+
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD ) ) == 0 ||
+	    ( flags & (UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read and one write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL | __GFP_HARDWALL);
+
+	if (NULL == alloc)
+		goto out1;
+
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+	alloc->size = size;
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	if (0 != umpp_phys_commit(alloc))
+	{
+		goto out2;
+	}
+
+	/* all set up, allocate an ID for it */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	return alloc;
+
+out3:
+	umpp_phys_free(alloc);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+uint64_t ump_dd_size_get_64(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->size;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_size_get(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->size <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->size;
+}
+
+ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->id;
+}
+
+ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	return alloc->flags;
+}
+
+ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id)
+{
+	umpp_allocation * alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+
+	mutex_lock(&device.secure_id_map_lock);
+
+	if (0 == umpp_descriptor_mapping_lookup(device.secure_id_map, secure_id, (void**)&alloc))
+	{
+		if (NULL != alloc->filter_func)
+		{
+			if (!alloc->filter_func(secure_id, alloc, alloc->callback_data))
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /* the filter denied access */
+			}
+		}
+
+		/* check permission to access it */
+		if ((UMP_DD_INVALID_MEMORY_HANDLE != alloc) && !(alloc->flags & UMP_PROT_SHAREABLE))
+		{
+			if (alloc->owner != get_current()->pid)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /*no rights for the current process*/
+			}
+		}
+
+		if (UMP_DD_INVALID_MEMORY_HANDLE != alloc)
+		{
+			if( ump_dd_retain(alloc) != UMP_DD_SUCCESS)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+			}
+		}
+	}
+	mutex_unlock(&device.secure_id_map_lock);
+
+	return alloc;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	return ump_dd_from_secure_id(secure_id);
+}
+
+int ump_dd_retain(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* check for overflow */
+	while(1)
+	{
+		int refcnt = atomic_read(&alloc->refcount);
+		if (refcnt + 1 > 0)
+		{
+			if(atomic_cmpxchg(&alloc->refcount, refcnt, refcnt + 1) == refcnt)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			return -EBUSY;
+		}
+	}
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_add(ump_dd_handle mem)
+{
+	ump_dd_retain(mem);
+}
+
+
+void ump_dd_release(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+	uint32_t new_cnt;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* secure the id for lookup while releasing */
+	mutex_lock(&device.secure_id_map_lock);
+
+	/* do the actual release */
+	new_cnt = atomic_sub_return(1, &alloc->refcount);
+	if (0 == new_cnt)
+	{
+		/* remove from the table as this was the last ref */
+		umpp_descriptor_mapping_remove(device.secure_id_map, alloc->id);
+	}
+
+	/* release the lock as early as possible */
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if (0 != new_cnt)
+	{
+		/* exit if still have refs */
+		return;
+	}
+
+	UMP_ASSERT(list_empty(&alloc->map_list));
+
+	/* cleanup */
+	if (NULL != alloc->final_release_func)
+	{
+		alloc->final_release_func(alloc, alloc->callback_data);
+	}
+
+	if (0 == (alloc->management_flags & UMP_MGMT_EXTERNAL))
+	{
+		umpp_phys_free(alloc);
+	}
+	else
+	{
+		kfree(alloc->block_array);
+	}
+
+	mutex_destroy(&alloc->map_list_lock);
+
+	kfree(alloc);
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_release(ump_dd_handle mem)
+{
+	ump_dd_release(mem);
+}
+
+void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(pCount);
+	UMP_ASSERT(pArray);
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+	*pCount = alloc->blocksCount;
+	*pArray = alloc->block_array;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks)
+{
+	const umpp_allocation * alloc;
+	unsigned long i;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	if((uint64_t)num_blocks != alloc->blocksCount)
+	{
+		return UMP_DD_INVALID;
+	}
+
+	for( i = 0; i < num_blocks; i++)
+	{
+		UMP_ASSERT(alloc->block_array[i].addr <= UMP_UINT32_MAX);
+		UMP_ASSERT(alloc->block_array[i].size <= UMP_UINT32_MAX);
+
+		blocks[i].addr = (unsigned long)alloc->block_array[i].addr;
+		blocks[i].size = (unsigned long)alloc->block_array[i].size;
+	}
+
+	return UMP_DD_SUCCESS;
+}
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(block);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	UMP_ASSERT(alloc->block_array[index].addr <= UMP_UINT32_MAX);
+	UMP_ASSERT(alloc->block_array[index].size <= UMP_UINT32_MAX);
+
+	block->addr = (unsigned long)alloc->block_array[index].addr;
+	block->size = (unsigned long)alloc->block_array[index].size;
+
+	return UMP_DD_SUCCESS;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->blocksCount <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->blocksCount;
+}
+
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void *uaddr, size_t size)
+{
+	umpp_cpu_mapping *map;
+
+	void *target_first = uaddr;
+	void *target_last = (void*)((uintptr_t)uaddr - 1 + size);
+
+	if (target_last < target_first) /* wrapped */
+	{
+		return NULL;
+	}
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if ( map->vaddr_start <= target_first &&
+		   (void*)((uintptr_t)map->vaddr_start + (map->nr_pages << PAGE_SHIFT) - 1) >= target_last)
+		{
+			goto out;
+		}
+	}
+	map = NULL;
+out:
+	mutex_unlock(&alloc->map_list_lock);
+
+	return map;
+}
+
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map)
+{
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(map);
+	mutex_lock(&alloc->map_list_lock);
+	list_add(&map->link, &alloc->map_list);
+	mutex_unlock(&alloc->map_list_lock);
+}
+
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target)
+{
+	umpp_cpu_mapping * map;
+
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(target);
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if (map == target)
+		{
+			list_del(&target->link);
+			kfree(target);
+			mutex_unlock(&alloc->map_list_lock);
+			return;
+		}
+	}
+
+	/* not found, error */
+	UMP_ASSERT(0);
+}
+
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const  block_index, uint64_t * const block_internal_offset)
+{
+	uint64_t i;
+
+	for (i = 0 ; i < alloc->blocksCount; i++)
+	{
+		if (offset < alloc->block_array[i].size)
+		{
+			/* found the block_array element containing this offset */
+			*block_index = i;
+			*block_internal_offset = offset;
+			return 0;
+		}
+		offset -= alloc->block_array[i].size;
+	}
+
+	return -ENXIO;
+}
+
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size)
+{
+	umpp_allocation * alloc;
+	void *vaddr;
+	umpp_cpu_mapping * mapping;
+	uint64_t virt_page_off; /* offset of given address from beginning of the virtual mapping */
+	uint64_t phys_page_off; /* offset of the virtual mapping from the beginning of the physical buffer */
+	uint64_t page_count; /* number of pages to sync */
+	uint64_t i;
+	uint64_t block_idx;
+	uint64_t block_offset;
+	uint64_t paddr;
+
+	UMP_ASSERT((UMP_MSYNC_CLEAN == op) || (UMP_MSYNC_CLEAN_AND_INVALIDATE == op));
+
+	alloc = (umpp_allocation*)mem;
+	vaddr = (void*)(uintptr_t)address;
+
+	if((alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+	{
+		/* mapping is not cached */
+		return;
+	}
+
+	mapping = umpp_dd_find_enclosing_mapping(alloc, vaddr, size);
+	if (NULL == mapping)
+	{
+		printk(KERN_WARNING "UMP: Illegal cache sync address %lx\n", (uintptr_t)vaddr);
+		return; /* invalid pointer or size causes out-of-bounds */
+	}
+
+	/* we already know that address + size don't wrap around as umpp_dd_find_enclosing_mapping didn't fail */
+	page_count = ((((((uintptr_t)address + size - 1) & PAGE_MASK) - ((uintptr_t)address & PAGE_MASK))) >> PAGE_SHIFT) + 1;
+	virt_page_off = (vaddr - mapping->vaddr_start) >> PAGE_SHIFT;
+	phys_page_off = mapping->page_off;
+
+	if (umpp_dd_find_start_block(alloc, (virt_page_off + phys_page_off) << PAGE_SHIFT, &block_idx, &block_offset))
+	{
+		/* should not fail as a valid mapping was found, so the phys mem must exists */
+		printk(KERN_WARNING "UMP: Unable to find physical start block with offset %llx\n", virt_page_off + phys_page_off);
+		UMP_ASSERT(0);
+		return;
+	}
+
+	paddr = alloc->block_array[block_idx].addr + block_offset + (((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1));
+
+	for (i = 0; i < page_count; i++)
+	{
+		size_t offset = ((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1);
+		size_t sz = min((size_t)PAGE_SIZE - offset, size);
+
+		/* check if we've overrrun the current block, if so move to the next block */
+		if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+		{
+			block_idx++;
+			UMP_ASSERT(block_idx < alloc->blocksCount);
+			paddr = alloc->block_array[block_idx].addr;
+		}
+
+		if (UMP_MSYNC_CLEAN == op)
+		{
+			ump_sync_to_memory(paddr, vaddr, sz);
+		}
+		else /* (UMP_MSYNC_CLEAN_AND_INVALIDATE == op) already validated on entry */
+		{
+			ump_sync_to_cpu(paddr, vaddr, sz);
+		}
+
+		/* advance to next page  */
+		vaddr = (void*)((uintptr_t)vaddr + sz);
+		size -= sz;
+		paddr += sz;
+	}
+}
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	uint64_t size = 0;
+	uint64_t i;
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		size += blocks[i].size;
+	}
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD
+	    | UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read or write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc),__GFP_HARDWALL | GFP_KERNEL);
+
+	if (NULL == alloc)
+	{
+		goto out1;
+	}
+
+	alloc->block_array = kzalloc(sizeof(ump_dd_physical_block_64) * num_blocks,__GFP_HARDWALL | GFP_KERNEL);
+	if (NULL == alloc->block_array)
+	{
+		goto out2;
+	}
+
+	memcpy(alloc->block_array, blocks, sizeof(ump_dd_physical_block_64) * num_blocks);
+
+	alloc->size = size;
+	alloc->blocksCount = num_blocks;
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	/* all set up, allocate an ID */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	alloc->management_flags |= UMP_MGMT_EXTERNAL;
+
+	return alloc;
+
+out3:
+	kfree(alloc->block_array);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+
+/*
+ * UMP v1 API
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks)
+{
+	ump_dd_handle mem;
+	ump_dd_physical_block_64 *block_64_array;
+	ump_alloc_flags flags = UMP_V1_API_DEFAULT_ALLOCATION_FLAGS;
+	unsigned long i;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	block_64_array = kzalloc(num_blocks * sizeof(*block_64_array), __GFP_HARDWALL | GFP_KERNEL);
+
+	if(block_64_array == NULL)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/* copy physical blocks */
+	for( i = 0; i < num_blocks; i++)
+	{
+		block_64_array[i].addr = blocks[i].addr;
+		block_64_array[i].size = blocks[i].size;
+	}
+
+	mem = ump_dd_create_from_phys_blocks_64(block_64_array, num_blocks, flags, NULL, NULL, NULL);
+
+	kfree(block_64_array);
+
+	return mem;
+
+}
diff --git a/t83x/kernel/drivers/base/ump/src/common/ump_kernel_core.h b/t83x/kernel/drivers/base/ump/src/common/ump_kernel_core.h
new file mode 100644
index 0000000..314af87
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/common/ump_kernel_core.h
@@ -0,0 +1,221 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_CORE_H_
+#define _UMP_KERNEL_CORE_H_
+
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/cred.h>
+#include <asm/mmu_context.h>
+
+#include <linux/ump-common.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+
+/* forward decl */
+struct umpp_session;
+
+/**
+ * UMP handle metadata.
+ * Tracks various data about a handle not of any use to user space
+ */
+typedef enum
+{
+	UMP_MGMT_EXTERNAL = (1ul << 0) /**< Handle created via the ump_dd_create_from_phys_blocks interface */
+	/* (1ul << 31) not to be used */
+} umpp_management_flags;
+
+/**
+ * Structure tracking the single global UMP device.
+ * Holds global data like the ID map
+ */
+typedef struct umpp_device
+{
+	struct mutex secure_id_map_lock; /**< Lock protecting access to the map */
+	umpp_descriptor_mapping * secure_id_map; /**< Map of all known secure IDs on the system */
+} umpp_device;
+
+/**
+ * Structure tracking all memory allocations of a UMP allocation.
+ * Tracks info about an mapping so we can verify cache maintenace
+ * operations and help in the unmap cleanup.
+ */
+typedef struct umpp_cpu_mapping
+{
+	struct list_head        link; /**< link to list of mappings for an allocation */
+	void                  *vaddr_start; /**< CPU VA start of the mapping */
+	size_t                nr_pages; /**< Size (in pages) of the mapping */
+	uint64_t              page_off; /**< Offset (in pages) from start of the allocation where the mapping starts */
+	ump_dd_handle         handle; /**< Which handle this mapping is linked to */
+	struct umpp_session * session; /**< Which session created the mapping */
+} umpp_cpu_mapping;
+
+/**
+ * Structure tracking UMP allocation.
+ * Represent a memory allocation with its ID.
+ * Tracks all needed meta-data about an allocation.
+ * */
+typedef struct umpp_allocation
+{
+	ump_secure_id id; /**< Secure ID of the allocation */
+	atomic_t refcount; /**< Usage count */
+
+	ump_alloc_flags flags; /**< Flags for all supported devices */
+	uint32_t management_flags; /**< Managment flags tracking */
+
+	pid_t owner; /**< The process ID owning the memory if not sharable */
+
+	ump_dd_security_filter filter_func; /**< Hook to verify use, called during retains from new clients */
+	ump_dd_final_release_callback final_release_func; /**< Hook called when the last reference is removed */
+	void* callback_data; /**< Additional data given to release hook */
+
+	uint64_t size; /**< Size (in bytes) of the allocation */
+	uint64_t blocksCount; /**< Number of physsical blocks the allocation is built up of */
+	ump_dd_physical_block_64 * block_array; /**< Array, one entry per block, describing block start and length */
+
+	struct mutex     map_list_lock; /**< Lock protecting the map_list */
+	struct list_head map_list; /**< Tracks all CPU VA mappings of this allocation */
+
+	void * backendData; /**< Physical memory backend meta-data */
+} umpp_allocation;
+
+/**
+ * Structure tracking use of UMP memory by a session.
+ * Tracks the use of an allocation by a session so session termination can clean up any outstanding references.
+ * Also protects agains non-matched release calls from user space.
+ */
+typedef struct umpp_session_memory_usage
+{
+	ump_secure_id id; /**< ID being used. For quick look-up */
+	ump_dd_handle mem; /**< Handle being used. */
+
+	/**
+	 * Track how many times has the process retained this handle in the kernel.
+	 * This should usually just be 1(allocated or resolved) or 2(mapped),
+	 * but could be more if someone is playing with the low-level API
+	 * */
+	atomic_t process_usage_count;
+
+	struct list_head link; /**< link to other usage trackers for a session */
+} umpp_session_memory_usage;
+
+/**
+ * Structure representing a session/client.
+ * Tracks the UMP allocations being used by this client.
+ */
+typedef struct umpp_session
+{
+	struct mutex session_lock; /**< Lock for memory usage manipulation */
+	struct list_head memory_usage; /**< list of memory currently being used by the this session */
+	void*  import_handler_data[UMPP_EXTERNAL_MEM_COUNT]; /**< Import modules per-session data pointer */
+} umpp_session;
+
+/**
+ * UMP core setup.
+ * Called by any OS specific startup function to initialize the common part.
+ * @return UMP_OK if core initialized correctly, any other value for errors
+ */
+ump_result umpp_core_constructor(void);
+
+/**
+ * UMP core teardown.
+ * Called by any OS specific unload function to clean up the common part.
+ */
+void umpp_core_destructor(void);
+
+/**
+ * UMP session start.
+ * Called by any OS specific session handler when a new session is detected
+ * @return Non-NULL if a matching core session could be set up. NULL on failure
+ */
+umpp_session *umpp_core_session_start(void);
+
+/**
+ * UMP session end.
+ * Called by any OS specific session handler when a session is ended/terminated.
+ * @param session The core session object returned by ump_core_session_start
+ */
+void umpp_core_session_end(umpp_session *session);
+
+/**
+ * Find a mapping object (if any) for this allocation.
+ * Called by any function needing to identify a mapping from a user virtual address.
+ * Verifies that the whole range to be within a mapping object.
+ * @param alloc The UMP allocation to find a matching mapping object of
+ * @param uaddr User mapping address to find the mapping object for
+ * @param size Length of the mapping
+ * @return NULL on error (no match found), pointer to mapping object if match found
+ */
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void* uaddr, size_t size);
+
+/**
+ * Register a new mapping of an allocation.
+ * Called by functions creating a new mapping of an allocation, typically OS specific handlers.
+ * @param alloc The allocation object which has been mapped
+ * @param map Info about the mapping
+ */
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map);
+
+/**
+ * Remove and free mapping object from an allocation.
+ * @param alloc The allocation object to remove the mapping info from
+ * @param target The mapping object to remove
+ */
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target);
+
+/**
+ * Helper to find a block in the blockArray which holds a given byte offset.
+ * @param alloc The allocation object to find the block in
+ * @param offset Offset (in bytes) from allocation start to find the block of
+ * @param[out] block_index Pointer to the index of the block matching
+ * @param[out] block_internal_offset Offset within the returned block of the searched offset
+ * @return 0 if a matching block was found, any other value for error
+ */
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const block_index, uint64_t * const block_internal_offset);
+
+/**
+ * Cache maintenance helper.
+ * Performs the requested cache operation on the given handle.
+ * @param mem Allocation handle
+ * @param op Cache maintenance operation to perform
+ * @param address User mapping at which to do the operation
+ * @param size Length (in bytes) of the range to do the operation on
+ */
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size);
+
+/**
+ * Import module session early init.
+ * Calls session_begin on all installed import modules.
+ * @param session The core session object to initialized the import handler for
+ * */
+void umpp_import_handlers_init(umpp_session * session);
+
+/**
+ * Import module session cleanup.
+ * Calls session_end on all import modules bound to the session.
+ * @param session The core session object to initialized the import handler for
+ */
+void umpp_import_handlers_term(umpp_session * session);
+
+#endif /* _UMP_KERNEL_CORE_H_ */
+
diff --git a/t83x/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c b/t83x/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
new file mode 100644
index 0000000..c5b0d74
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <common/ump_kernel_priv.h>
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(umpp_descriptor_table * table);
+
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries)
+{
+	umpp_descriptor_mapping * map = kzalloc(sizeof(umpp_descriptor_mapping), GFP_KERNEL);
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map)
+	{
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table)
+		{
+			init_rwsem( &map->lock);
+			set_bit(0, map->table->usage);
+			map->max_nr_mappings_allowed = max_entries;
+			map->current_nr_mappings = init_entries;
+			return map;
+
+			descriptor_table_free(map->table);
+		}
+		kfree(map);
+	}
+	return NULL;
+}
+
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map)
+{
+	UMP_ASSERT(NULL != map);
+	descriptor_table_free(map->table);
+	kfree(map);
+}
+
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target)
+{
+ 	int descriptor = 0;
+	UMP_ASSERT(NULL != map);
+	down_write( &map->lock);
+	descriptor = find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings)
+	{
+		/* no free descriptor, try to expand the table */
+		umpp_descriptor_table * new_table;
+		umpp_descriptor_table * old_table = map->table;
+		int nr_mappings_new = map->current_nr_mappings + BITS_PER_LONG;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+ 		memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+ 		memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void*));
+
+ 		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	set_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	up_write(&map->lock);
+	return descriptor;
+}
+
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target)
+{
+	int result = -EINVAL;
+ 	UMP_ASSERT(map);
+	UMP_ASSERT(target);
+ 	down_read(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	}
+	/* keep target untouched if the descriptor was not found */
+	up_read(&map->lock);
+	return result;
+}
+
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor)
+{
+	UMP_ASSERT(map);
+ 	down_write(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		map->table->mappings[descriptor] = NULL;
+		clear_bit(descriptor, map->table->usage);
+	}
+	up_write(&map->lock);
+}
+
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count)
+{
+	umpp_descriptor_table * table;
+
+	table = kzalloc(sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG) + (sizeof(void*) * count), __GFP_HARDWALL | GFP_KERNEL );
+
+	if (NULL != table)
+	{
+		table->usage = (unsigned long*)((u8*)table + sizeof(umpp_descriptor_table));
+		table->mappings = (void**)((u8*)table + sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(umpp_descriptor_table * table)
+{
+ 	UMP_ASSERT(table);
+	kfree(table);
+}
+
diff --git a/t83x/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h b/t83x/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
new file mode 100644
index 0000000..d06c145
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+#define _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct umpp_descriptor_table
+{
+	/* keep as a unsigned long to rely on the OS's bitops support */
+	unsigned long * usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used(1) or not(0) */
+	void** mappings; /**< Array of the pointers the descriptors map to */
+} umpp_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct umpp_descriptor_mapping
+{
+	struct rw_semaphore lock; /**< Lock protecting access to the mapping object */
+	unsigned int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	unsigned int current_nr_mappings; /**< Current number of possible mappings */
+	umpp_descriptor_table * table; /**< Pointer to the current mapping table */
+} umpp_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object.
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries.
+ * ID 0 is reserved so the number of available entries will be max - 1.
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param[in] map The map to free
+ */
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param[in] map The map to allocate a new entry in
+ * @param[in] target The value to map to
+ * @return The descriptor allocated, ID 0 on failure.
+ */
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param[in] map The map to lookup the descriptor id in
+ * @param[in] descriptor The descriptor ID to lookup
+ * @param[out] target Pointer to a pointer which will receive the stored value
+ *
+ * @return 0 on success lookup, -EINVAL on lookup failure.
+ */
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param[in] map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor);
+
+#endif /* _UMP_KERNEL_DESCRIPTOR_MAPPING_H_ */
diff --git a/t83x/kernel/drivers/base/ump/src/common/ump_kernel_priv.h b/t83x/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
new file mode 100644
index 0000000..38b6f1b
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/common/ump_kernel_priv.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_PRIV_H_
+#define _UMP_KERNEL_PRIV_H_
+
+#ifdef __KERNEL__
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <asm/cacheflush.h>
+#endif
+
+
+#define UMP_EXPECTED_IDS 64
+#define UMP_MAX_IDS 32768
+
+#ifdef __KERNEL__
+#define UMP_ASSERT(expr) \
+		if (!(expr)) { \
+			printk(KERN_ERR "UMP: Assertion failed! %s,%s,%s,line=%d\n",\
+					#expr,__FILE__,__func__,__LINE__); \
+					BUG(); \
+		}
+
+static inline void ump_sync_to_memory(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/*TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE);
+	mb(); /* for outer_sync (if needed) */
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+
+static inline void ump_sync_to_cpu(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE);
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+#endif /* __KERNEL__*/
+#endif /* _UMP_KERNEL_PRIV_H_ */
+
diff --git a/t83x/kernel/drivers/base/ump/src/imports/ion/Makefile b/t83x/kernel/drivers/base/ump/src/imports/ion/Makefile
new file mode 100644
index 0000000..ef74b27
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/imports/ion/Makefile
@@ -0,0 +1,53 @@
+#
+# (C) COPYRIGHT 2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/../../../../..
+KBUILD_EXTRA_SYMBOLS += "$(KBUILD_EXTMOD)/../../Module.symvers"
+
+SRC += ump_kernel_import_ion.c
+
+MODULE:=ump_ion_import.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+$(MODULE:.ko=-objs) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+#
+#
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
+
diff --git a/t83x/kernel/drivers/base/ump/src/imports/ion/sconscript b/t83x/kernel/drivers/base/ump/src/imports/ion/sconscript
new file mode 100644
index 0000000..cff24c8
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/imports/ion/sconscript
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ion = env.Clone()
+
+if env_ion['ump_ion'] != '1':
+	Return()
+
+# Source files required for UMP.
+ion_src = [Glob('#kernel/drivers/base/ump/src/imports/ion/*.c')]
+
+# Note: cleaning via the Linux kernel build system does not yet work
+if env_ion.GetOption('clean') :
+	makeAction=Action("cd ${SOURCE.dir} && make clean", '$MAKECOMSTR')
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make PLATFORM=${platform} && cp ump_ion_import.ko $STATIC_LIB_PATH/ump_ion_import.ko", '$MAKECOMSTR')
+# The target is ump_import_ion.ko, built from the source in ion_src, via the action makeAction
+# ump_import_ion.ko will be copied to $STATIC_LIB_PATH after being built by the standard Linux
+# kernel build system, after which it can be installed to the directory specified if
+# "libs_install" is set; this is done by LibTarget.
+cmd = env_ion.Command('$STATIC_LIB_PATH/ump_ion_import.ko', ion_src, [makeAction])
+
+# Until we fathom out how the invoke the Linux build system to clean, we can use Clean
+# to remove generated files.
+
+patterns = ['*.mod.c', '*.o', '*.ko', '*.a', '.*.cmd', 'modules.order', '.tmp_versions', 'Module.symvers']
+
+for p in patterns:
+	Clean(cmd, Glob('#kernel/drivers/base/ump/src/imports/ion/%s' % p))
+
+env_ion.Depends('$STATIC_LIB_PATH/ump_ion_import.ko', '$STATIC_LIB_PATH/ump.ko')
+env_ion.KernelObjTarget('ump', cmd)
diff --git a/t83x/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c b/t83x/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
new file mode 100644
index 0000000..12b2e32
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/dma-mapping.h>
+#include "ion.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+struct ion_wrapping_info
+{
+	struct ion_client *   ion_client;
+	struct ion_handle *   ion_handle;
+	int                   num_phys_blocks;
+	struct scatterlist *  sglist;
+};
+
+static struct ion_device * ion_device_get(void)
+{
+	/* < Customer to provide implementation >
+	 * Return a pointer to the global ion_device on the system
+	 */
+	return NULL;
+}
+
+static int import_ion_client_create(void** const custom_session_data)
+{
+	struct ion_client ** ion_client;
+
+	ion_client = (struct ion_client**)custom_session_data;
+
+	*ion_client = ion_client_create(ion_device_get(), "ump");
+
+	return PTR_RET(*ion_client);
+}
+
+
+static void import_ion_client_destroy(void* custom_session_data)
+{
+	struct ion_client * ion_client;
+
+	ion_client = (struct ion_client*)custom_session_data;
+	BUG_ON(!ion_client);
+
+	ion_client_destroy(ion_client);
+}
+
+
+static void import_ion_final_release_callback(const ump_dd_handle handle, void * info)
+{
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!info);
+
+	(void)handle;
+	ion_info = (struct ion_wrapping_info*)info;
+
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+	kfree(ion_info);
+	module_put(THIS_MODULE);
+}
+
+static ump_dd_handle import_ion_import(void * custom_session_data, void * pfd, ump_alloc_flags flags)
+{
+	int fd;
+	ump_dd_handle ump_handle;
+	struct scatterlist * sg;
+	int num_dma_blocks;
+	ump_dd_physical_block_64 * phys_blocks;
+	unsigned long i;
+	struct sg_table * sgt;
+
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!custom_session_data);
+	BUG_ON(!pfd);
+
+	ion_info = kzalloc(GFP_KERNEL, sizeof(*ion_info));
+	if (NULL == ion_info)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	ion_info->ion_client = (struct ion_client*)custom_session_data;
+
+	if (get_user(fd, (int*)pfd))
+	{
+		goto out;
+	}
+
+	ion_info->ion_handle = ion_import_dma_buf(ion_info->ion_client, fd);
+
+	if (IS_ERR_OR_NULL(ion_info->ion_handle))
+	{
+		goto out;
+	}
+
+	sgt = ion_sg_table(ion_info->ion_client, ion_info->ion_handle);
+	if (IS_ERR_OR_NULL(sgt))
+	{
+		goto ion_dma_map_failed;
+	}
+
+	ion_info->sglist = sgt->sgl;
+
+	sg = ion_info->sglist;
+	while (sg)
+	{
+		ion_info->num_phys_blocks++;
+		sg = sg_next(sg);
+	}
+
+	num_dma_blocks = dma_map_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	if (0 == num_dma_blocks)
+	{
+		goto linux_dma_map_failed;
+	}
+
+	phys_blocks = vmalloc(num_dma_blocks * sizeof(*phys_blocks));
+	if (NULL == phys_blocks)
+	{
+		goto vmalloc_failed;
+	}
+
+	for_each_sg(ion_info->sglist, sg, num_dma_blocks, i)
+	{
+		phys_blocks[i].addr = sg_phys(sg);
+		phys_blocks[i].size = sg_dma_len(sg);
+	}
+
+	ump_handle = ump_dd_create_from_phys_blocks_64(phys_blocks, num_dma_blocks, flags, NULL, import_ion_final_release_callback, ion_info);
+
+	vfree(phys_blocks);
+
+	if (ump_handle != UMP_DD_INVALID_MEMORY_HANDLE)
+	{
+		/*
+		 * As we have a final release callback installed
+		 * we must keep the module locked until
+		 * the callback has been triggered
+		 * */
+		__module_get(THIS_MODULE);
+		return ump_handle;
+	}
+
+	/* failed*/
+vmalloc_failed:
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+linux_dma_map_failed:
+ion_dma_map_failed:
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+out:
+	kfree(ion_info);
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+struct ump_import_handler import_handler_ion =
+{
+	.linux_module =  THIS_MODULE,
+	.session_begin = import_ion_client_create,
+	.session_end =   import_ion_client_destroy,
+	.import =        import_ion_import
+};
+
+static int __init import_ion_initialize_module(void)
+{
+	/* register with UMP */
+	return ump_import_module_register(UMP_EXTERNAL_MEM_TYPE_ION, &import_handler_ion);
+}
+
+static void __exit import_ion_cleanup_module(void)
+{
+	/* unregister import handler */
+	ump_import_module_unregister(UMP_EXTERNAL_MEM_TYPE_ION);
+}
+
+/* Setup init and exit functions for this module */
+module_init(import_ion_initialize_module);
+module_exit(import_ion_cleanup_module);
+
+/* And some module information */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/t83x/kernel/drivers/base/ump/src/imports/sconscript b/t83x/kernel/drivers/base/ump/src/imports/sconscript
new file mode 100644
index 0000000..8f50683
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/imports/sconscript
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+import os, sys
+Import('env')
+
+import_modules = [ os.path.join( path, 'sconscript' ) for path in sorted(os.listdir( os.getcwd() )) ]
+
+for m in import_modules:
+	if os.path.exists(m):
+		SConscript( m, variant_dir=os.path.join( env['BUILD_DIR_PATH'], os.path.dirname(m) ), duplicate=0 )
+
diff --git a/t83x/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c b/t83x/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
new file mode 100644
index 0000000..0ec4ba6
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/linux/ump_kernel_linux.c
@@ -0,0 +1,829 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump-ioctl.h>
+#include <linux/ump.h>
+
+#include <asm/uaccess.h>	         /* copy_*_user */
+#include <linux/compat.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/device.h>            /* class registration support */
+#include <linux/uaccess.h>
+
+#include <common/ump_kernel_core.h>
+
+#include "ump_kernel_linux_mem.h"
+#include <ump_arch.h>
+
+
+struct ump_linux_device
+{
+	struct cdev cdev;
+	struct class * ump_class;
+};
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+#define UMP_REV_STRING "1.0"
+
+char * ump_revision = UMP_REV_STRING;
+module_param(ump_revision, charp, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_revision, "Revision info");
+
+static int umpp_linux_open(struct inode *inode, struct file *filp);
+static int umpp_linux_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+/* This variable defines the file operations this UMP device driver offers */
+static struct file_operations ump_fops =
+{
+	.owner   = THIS_MODULE,
+	.open    = umpp_linux_open,
+	.release = umpp_linux_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = umpp_linux_ioctl,
+#else
+	.ioctl   = umpp_linux_ioctl,
+#endif
+	.compat_ioctl = umpp_linux_ioctl,
+	.mmap = umpp_linux_mmap
+};
+
+/* import module handling */
+DEFINE_MUTEX(import_list_lock);
+struct ump_import_handler *  import_handlers[UMPP_EXTERNAL_MEM_COUNT];
+
+/* The global variable containing the global device data */
+static struct ump_linux_device ump_linux_device;
+
+#define DBG_MSG(level, ...) do { \
+if ((level) <=  ump_debug_level)\
+{\
+printk(KERN_DEBUG "UMP<" #level ">:\n" __VA_ARGS__);\
+} \
+} while (0)
+
+#define MSG_ERR(...) do{ \
+printk(KERN_ERR "UMP: ERR: %s\n           %s()%4d\n", __FILE__, __func__  , __LINE__) ; \
+printk(KERN_ERR __VA_ARGS__); \
+printk(KERN_ERR "\n"); \
+} while(0)
+
+#define MSG(...) do{ \
+printk(KERN_INFO "UMP: " __VA_ARGS__);\
+} while (0)
+
+/*
+ * This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int __init umpp_linux_initialize_module(void)
+{
+	ump_result err;
+
+	err = umpp_core_constructor();
+	if (UMP_OK != err)
+	{
+		MSG_ERR("UMP device driver init failed\n");
+		return -ENOTTY;
+	}
+
+	MSG("UMP device driver %s loaded\n", UMP_REV_STRING);
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void __exit umpp_linux_cleanup_module(void)
+{
+	DBG_MSG(2, "Unloading UMP device driver\n");
+	umpp_core_destructor();
+	DBG_MSG(2, "Module unloaded\n");
+}
+
+
+
+/*
+ * Initialize the UMP device driver.
+ */
+ump_result umpp_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+
+	if (0 == ump_major)
+	{
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	}
+	else
+	{
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err)
+	{
+		memset(&ump_linux_device, 0, sizeof(ump_linux_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_linux_device.cdev, &ump_fops);
+		ump_linux_device.cdev.owner = THIS_MODULE;
+		ump_linux_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_linux_device.cdev, dev, 1/*count*/);
+		if (0 == err)
+		{
+
+			ump_linux_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_linux_device.ump_class))
+			{
+				err = PTR_ERR(ump_linux_device.ump_class);
+			}
+			else
+			{
+				struct device * mdev;
+				mdev = device_create(ump_linux_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return UMP_OK;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(ump_linux_device.ump_class);
+			}
+			cdev_del(&ump_linux_device.cdev);
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return UMP_ERROR;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void umpp_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+	device_destroy(ump_linux_device.ump_class, dev);
+	class_destroy(ump_linux_device.ump_class);
+
+	/* unregister char device */
+	cdev_del(&ump_linux_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+}
+
+
+static int umpp_linux_open(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = umpp_core_session_start();
+	if (NULL == session)
+	{
+		return -EFAULT;
+	}
+	
+	filp->private_data = session;
+
+	return 0;
+}
+
+static int umpp_linux_release(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = filp->private_data;
+
+	umpp_core_session_end(session);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/**************************/
+/*ioctl specific functions*/
+/**************************/
+static int do_ump_dd_allocate(umpp_session * session, ump_k_allocate * params)
+{
+	ump_dd_handle new_allocation;
+	new_allocation = ump_dd_allocate_64(params->size, params->alloc_flags, NULL, NULL, NULL);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	printk(KERN_WARNING "UMP: Allocation FAILED\n");
+	return -ENOMEM;
+}
+
+static int do_ump_dd_retain(umpp_session * session, ump_k_retain * params)
+{
+	umpp_session_memory_usage * it;
+
+	mutex_lock(&session->session_lock);
+
+	/* try to find it on the session usage list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found to already be in use */
+			/* check for overflow */
+			while(1)
+			{
+				int refcnt = atomic_read(&it->process_usage_count);
+				if (refcnt + 1 > 0)
+				{
+					/* add a process local ref */
+					if(atomic_cmpxchg(&it->process_usage_count, refcnt, refcnt + 1) == refcnt)
+					{
+						mutex_unlock(&session->session_lock);
+						return 0;
+					}
+				}
+				else
+				{
+					/* maximum usage cap reached */
+					mutex_unlock(&session->session_lock);
+					return -EBUSY;
+				}
+			}
+		}
+	}
+	/* try to look it up globally */
+
+	it = kmalloc(sizeof(*it), GFP_KERNEL);
+
+	if (NULL != it)
+	{
+		it->mem = ump_dd_from_secure_id(params->secure_id);
+		if (UMP_DD_INVALID_MEMORY_HANDLE != it->mem)
+		{
+			/* found, add it to the session usage list */
+			it->id = params->secure_id;
+			atomic_set(&it->process_usage_count, 1);
+			list_add(&it->link, &session->memory_usage);
+		}
+		else
+		{
+			/* not found */
+			kfree(it);
+			it = NULL;
+		}
+	}
+
+	mutex_unlock(&session->session_lock);
+
+	return (NULL != it) ? 0 : -ENODEV;
+}
+
+
+static int do_ump_dd_release(umpp_session * session, ump_k_release * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only do a release if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			result = 0;
+
+			if (0 == atomic_sub_return(1, &it->process_usage_count))
+			{
+				/* last ref in this process remove from the usage list and remove the underlying ref */
+				list_del(&it->link);
+				ump_dd_release(it->mem);
+				kfree(it);
+			}
+
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_sizequery(umpp_session * session, ump_k_sizequery * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->size = ump_dd_size_get_64(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_allocation_flags_get(umpp_session * session, ump_k_allocation_flags * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->alloc_flags = ump_dd_allocation_flags_get(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_msync_now(umpp_session * session, ump_k_msync * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, do the cache op */
+#ifdef CONFIG_COMPAT
+			if (is_compat_task())
+			{
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, compat_ptr(params->mapped_ptr.compat_value), params->size);
+				result = 0;
+			}
+			else
+			{
+#endif
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, params->mapped_ptr.value, params->size);
+				result = 0;
+#ifdef CONFIG_COMPAT
+			}
+#endif
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+
+void umpp_import_handlers_init(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		if (import_handlers[i])
+		{
+			import_handlers[i]->session_begin(&session->import_handler_data[i]);
+			/* It is OK if session_begin returned an error.
+			 * We won't do any import calls if so */
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+void umpp_import_handlers_term(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		/* only call if session_begin succeeded */
+		if (session->import_handler_data[i] != NULL)
+		{
+			/* if session_beging succeeded the handler
+			 * should not have unregistered with us */
+			BUG_ON(!import_handlers[i]);
+			import_handlers[i]->session_end(session->import_handler_data[i]);
+			session->import_handler_data[i] = NULL;
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler)
+{
+	int res = -EEXIST;
+
+	/* validate input */
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+	BUG_ON(!handler);
+	BUG_ON(!handler->linux_module);
+	BUG_ON(!handler->session_begin);
+	BUG_ON(!handler->session_end);
+	BUG_ON(!handler->import);
+
+	mutex_lock(&import_list_lock);
+
+	if (!import_handlers[type])
+	{
+		import_handlers[type] = handler;
+		res = 0;
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return res;
+}
+
+void ump_import_module_unregister(enum ump_external_memory_type type)
+{
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+
+	mutex_lock(&import_list_lock);
+	/* an error to call this if ump_import_module_register didn't succeed */
+	BUG_ON(!import_handlers[type]);
+	import_handlers[type] = NULL;
+	mutex_unlock(&import_list_lock);
+}
+
+static struct ump_import_handler * import_handler_get(unsigned int type_id)
+{
+	enum ump_external_memory_type type;
+	struct ump_import_handler * handler;
+
+	/* validate and convert input */
+	/* handle bad data here, not just BUG_ON */
+	if (type_id == 0 || type_id >= UMPP_EXTERNAL_MEM_COUNT)
+		return NULL;
+
+	type = (enum ump_external_memory_type)type_id;
+
+	/* find the handler */
+	mutex_lock(&import_list_lock);
+
+	handler = import_handlers[type];
+
+	if (handler)
+	{
+		if (!try_module_get(handler->linux_module))
+		{
+			handler = NULL;
+		}
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return handler;
+}
+
+static void import_handler_put(struct ump_import_handler * handler)
+{
+	module_put(handler->linux_module);
+}
+
+static int do_ump_dd_import(umpp_session * session, ump_k_import * params)
+{
+	ump_dd_handle new_allocation = UMP_DD_INVALID_MEMORY_HANDLE;
+	struct ump_import_handler * handler;
+
+	handler = import_handler_get(params->type);
+
+	if (handler)
+	{
+		/* try late binding if not already bound */
+		if (!session->import_handler_data[params->type])
+		{
+			handler->session_begin(&session->import_handler_data[params->type]);
+		}
+
+		/* do we have a bound session? */
+		if (session->import_handler_data[params->type])
+		{
+			new_allocation = handler->import( session->import_handler_data[params->type],
+		                                      params->phandle.value,
+		                                      params->alloc_flags);
+		}
+
+		/* done with the handler */
+		import_handler_put(handler);
+	}
+
+	/* did the import succeed? */
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	return -ENOMEM;
+
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int ret;
+	uint64_t msg[(UMP_CALL_MAX_SIZE+7)>>3]; /* alignment fixup */
+	uint32_t size = _IOC_SIZE(cmd);
+	struct umpp_session *session = filp->private_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* unused arg */
+#endif
+
+	/*
+	 * extract the type and number bitfields, and don't decode
+	 * wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
+	 */
+	if (_IOC_TYPE(cmd) != UMP_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if (_IOC_NR(cmd) > UMP_IOC_MAXNR)
+	{
+		return -ENOTTY;
+	}
+
+	switch(cmd)
+	{
+		case UMP_FUNC_ALLOCATE:
+			if (size != sizeof(ump_k_allocate))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocate(session, (ump_k_allocate *)&msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_SIZEQUERY:
+			if (size != sizeof(ump_k_sizequery))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_sizequery(session,(ump_k_sizequery*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_MSYNC:
+			if (size != sizeof(ump_k_msync))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_msync_now(session,(ump_k_msync*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_IMPORT:
+			if (size != sizeof(ump_k_import))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user*)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_import(session, (ump_k_import*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		/* used only by v1 API */
+		case UMP_FUNC_ALLOCATION_FLAGS_GET:
+			if (size != sizeof(ump_k_allocation_flags))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocation_flags_get(session,(ump_k_allocation_flags*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_RETAIN:
+			if (size != sizeof(ump_k_retain))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_retain(session,(ump_k_retain*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		case UMP_FUNC_RELEASE:
+			if (size != sizeof(ump_k_release))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_release(session,(ump_k_release*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		default:
+			/* not ours */
+			return -ENOTTY;
+	}
+	/*redundant below*/
+	return -ENOTTY;
+}
+
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_allocate_64);
+EXPORT_SYMBOL(ump_dd_allocation_flags_get);
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get_64);
+EXPORT_SYMBOL(ump_dd_size_get_64);
+EXPORT_SYMBOL(ump_dd_retain);
+EXPORT_SYMBOL(ump_dd_release);
+EXPORT_SYMBOL(ump_dd_create_from_phys_blocks_64);
+
+/* import API */
+EXPORT_SYMBOL(ump_import_module_register);
+EXPORT_SYMBOL(ump_import_module_unregister);
+
+
+
+/* V1 API */
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+
+/* Setup init and exit functions for this module */
+module_init(umpp_linux_initialize_module);
+module_exit(umpp_linux_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(UMP_REV_STRING);
diff --git a/t83x/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c b/t83x/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
new file mode 100644
index 0000000..43e28ee
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
@@ -0,0 +1,258 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+#include <linux/version.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h> /* memory mananger definitions */
+#include <linux/pfn.h>
+#include <linux/highmem.h> /*kmap*/
+
+#include <linux/compat.h> /* is_compat_task */
+
+#include <common/ump_kernel_core.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)
+#define phys_to_pfn_t(phys, flags) (phys >> PAGE_SHIFT)
+#else
+#include <linux/pfn_t.h>
+#endif
+
+static void umpp_vm_close(struct vm_area_struct *vma)
+{
+	umpp_cpu_mapping * mapping;
+	umpp_session * session;
+	ump_dd_handle handle;
+
+	mapping = (umpp_cpu_mapping*)vma->vm_private_data;
+	UMP_ASSERT(mapping);
+	
+	session = mapping->session;
+	handle = mapping->handle;
+
+	umpp_dd_remove_cpu_mapping(mapping->handle, mapping); /* will free the mapping object */
+	ump_dd_release(handle);
+}
+
+
+static const struct vm_operations_struct umpp_vm_ops = {
+	.close = umpp_vm_close
+};
+
+int umpp_phys_commit(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	/* round up to a page boundary */
+	alloc->size = (alloc->size + PAGE_SIZE - 1) & ~((uint64_t)PAGE_SIZE-1) ;
+	/* calculate number of pages */
+	alloc->blocksCount = alloc->size >> PAGE_SHIFT;
+
+	if( (sizeof(ump_dd_physical_block_64) * alloc->blocksCount) > ((size_t)-1))
+	{
+		printk(KERN_WARNING "UMP: umpp_phys_commit - trying to allocate more than possible\n");
+		return -ENOMEM;
+	}
+
+	alloc->block_array = kmalloc(sizeof(ump_dd_physical_block_64) * alloc->blocksCount, __GFP_HARDWALL | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (NULL == alloc->block_array)
+	{
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		void * mp;
+		struct page * page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD);
+		if (NULL == page)
+		{
+			break;
+		}
+
+		alloc->block_array[i].addr = PFN_PHYS(page_to_pfn(page));
+		alloc->block_array[i].size = PAGE_SIZE;
+
+		mp = kmap(page);
+		if (NULL == mp)
+		{
+			__free_page(page);
+			break;
+		}
+
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		ump_sync_to_memory(PFN_PHYS(page_to_pfn(page)), mp, PAGE_SIZE);
+		kunmap(page);
+	}
+
+	if (i == alloc->blocksCount)
+	{
+		return 0;
+	}
+	else
+	{
+		uint64_t j;
+		for (j = 0; j < i; j++)
+		{
+			struct page * page;
+			page = pfn_to_page(alloc->block_array[j].addr >> PAGE_SHIFT);
+			__free_page(page);
+		}
+		
+		kfree(alloc->block_array);
+
+		return -ENOMEM;
+	}
+}
+
+void umpp_phys_free(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		__free_page(pfn_to_page(alloc->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	kfree(alloc->block_array);
+}
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma)
+{
+	ump_secure_id id;
+	ump_dd_handle h;
+	size_t offset;
+	int err = -EINVAL;
+	size_t length = vma->vm_end - vma->vm_start;
+
+	umpp_cpu_mapping * map = NULL;
+	umpp_session *session = filp->private_data;
+
+	if ( 0 == length )
+	{
+		return -EINVAL;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (NULL == map)
+	{
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* unpack our arg */
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	if (is_compat_task())
+	{
+#endif
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_32;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_32;
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	}
+	else
+	{
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_64;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_64;
+	}
+#endif
+
+	h = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE != h)
+	{
+		uint64_t i;
+		uint64_t block_idx;
+		uint64_t block_offset;
+		uint64_t paddr;
+		umpp_allocation * alloc;
+		uint64_t last_byte;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO | VM_MIXEDMAP | VM_DONTDUMP;
+#else
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO | VM_MIXEDMAP;
+#endif
+		vma->vm_ops = &umpp_vm_ops;
+		vma->vm_private_data = map;
+
+		alloc = (umpp_allocation*)h;
+
+		if( (alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+		{
+			/* cache disabled flag set, disable caching for cpu mappings */
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		}
+
+		last_byte = length + (offset << PAGE_SHIFT) - 1;
+		if (last_byte >= alloc->size || last_byte < (offset << PAGE_SHIFT))
+		{
+			goto err_out;
+		}
+
+		if (umpp_dd_find_start_block(alloc, offset << PAGE_SHIFT, &block_idx, &block_offset))
+		{
+			goto err_out;
+		}
+
+		paddr = alloc->block_array[block_idx].addr + block_offset;
+
+		for (i = 0; i < (length >> PAGE_SHIFT); i++)
+		{
+			/* check if we've overrrun the current block, if so move to the next block */
+			if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+			{
+				block_idx++;
+				UMP_ASSERT(block_idx < alloc->blocksCount);
+				paddr = alloc->block_array[block_idx].addr;
+			}
+
+			err = vm_insert_mixed(vma,
+					vma->vm_start + (i << PAGE_SHIFT),
+					phys_to_pfn_t(paddr, 0));
+			paddr += PAGE_SIZE;
+		}
+
+		map->vaddr_start = (void*)vma->vm_start;
+		map->nr_pages = length >> PAGE_SHIFT;
+		map->page_off = offset;
+		map->handle = h;
+		map->session = session;
+
+		umpp_dd_add_cpu_mapping(h, map);
+
+		return 0;
+
+		err_out:
+
+		ump_dd_release(h);
+	}
+
+	kfree(map);
+
+out:
+
+	return err;
+}
+
diff --git a/t83x/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h b/t83x/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
new file mode 100644
index 0000000..4fbf3b2
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_LINUX_MEM_H_
+#define _UMP_KERNEL_LINUX_MEM_H_
+
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma);
+
+#endif /* _UMP_KERNEL_LINUX_MEM_H_ */
diff --git a/t83x/kernel/drivers/base/ump/src/sconscript b/t83x/kernel/drivers/base/ump/src/sconscript
new file mode 100644
index 0000000..8fba351
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/sconscript
@@ -0,0 +1,38 @@
+#
+# (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('env')
+
+# Clone the environment so changes don't affect other build files
+env_ump = env.Clone()
+
+# Source files required for UMP.
+ump_src = [
+    Glob('common/*.c'),
+    Glob('imports/*/*.c'),
+    Glob('linux/*.c'),
+]
+
+make_args = env_ump.kernel_get_config_defines(ret_list = True) + [
+    'PLATFORM=%s' % env_ump['platform'],
+    'MALI_UNIT_TEST=%s' % env_ump['unit'],
+]
+
+mod = env_ump.BuildKernelModule('$STATIC_LIB_PATH/ump.ko', ump_src,
+                                make_args = make_args)
+env_ump.KernelObjTarget('ump', mod)
+
+SConscript( 'imports/sconscript' )
+
diff --git a/t83x/kernel/drivers/base/ump/src/ump_arch.h b/t83x/kernel/drivers/base/ump/src/ump_arch.h
new file mode 100644
index 0000000..2303d56
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/src/ump_arch.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_ARCH_H_
+#define _UMP_ARCH_H_
+
+#include <common/ump_kernel_core.h>
+
+/**
+ * Device specific setup.
+ * Called by the UMP core code to to host OS/device specific setup.
+ * Typical use case is device node creation for talking to user space.
+ * @return UMP_OK on success, any other value on failure
+ */
+extern ump_result umpp_device_initialize(void);
+
+/**
+ * Device specific teardown.
+ * Undo any things done by ump_device_initialize.
+ */
+extern void umpp_device_terminate(void);
+
+extern int umpp_phys_commit(umpp_allocation * alloc);
+extern void umpp_phys_free(umpp_allocation * alloc);
+
+#endif /* _UMP_ARCH_H_ */
diff --git a/t83x/kernel/drivers/base/ump/ump_ref_drv.h b/t83x/kernel/drivers/base/ump/ump_ref_drv.h
new file mode 100644
index 0000000..9a265fe
--- /dev/null
+++ b/t83x/kernel/drivers/base/ump/ump_ref_drv.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_ref_drv.h
+ *
+ * This file contains the link to user space part of the UMP API for usage by MALI 400 gralloc.
+ *
+ */
+
+#ifndef _UMP_REF_DRV_H_
+#define _UMP_REF_DRV_H_
+
+#include <ump/ump.h>
+
+
+#endif /* _UMP_REF_DRV_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/Kbuild b/t83x/kernel/drivers/gpu/arm/Kbuild
new file mode 100644
index 0000000..19c7e9a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/t83x/kernel/drivers/gpu/arm/Kconfig b/t83x/kernel/drivers/gpu/arm/Kconfig
new file mode 100644
index 0000000..1f30eb5
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/Kconfig
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/Kbuild
new file mode 100644
index 0000000..785025d
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,170 @@
+#
+# (C) COPYRIGHT 2012-2016, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r21p0-01rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECT),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_ctx_sched.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c \
+	mali_kbase_regs_history_debugfs.c
+
+
+
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+ccflags-y += -I$(KBASE_PATH)
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+# Kconfig passses in the name with quotes for in-tree builds - remove them.
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_NAME))
+MALI_PLATFORM_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR)
+include $(src)/$(MALI_PLATFORM_DIR)/Kbuild
+
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+	ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+		include $(src)/ipa/Kbuild
+	endif
+endif
+
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \
+	mali_kbase_dma_fence.o \
+	mali_kbase_fence.o
+mali_kbase-$(CONFIG_SYNC) += \
+	mali_kbase_sync_android.o \
+	mali_kbase_sync_common.o
+mali_kbase-$(CONFIG_SYNC_FILE) += \
+	mali_kbase_sync_file.o \
+	mali_kbase_sync_common.o \
+	mali_kbase_fence.o
+	ifeq ($(MALI_MOCK_TEST),1)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+
+include  $(src)/backend/gpu/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+ccflags-y += -I$(src)/backend/gpu
+subdir-ccflags-y += -I$(src)/backend/gpu
+
+# For kutf and mali_kutf_irq_latency_test
+obj-$(CONFIG_MALI_KUTF) += tests/
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/Kconfig b/t83x/kernel/drivers/gpu/arm/midgard/Kconfig
new file mode 100644
index 0000000..bcc2d4a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,201 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if the Linux Kernel has built in
+	  support for DMA_BUF fences.
+
+config MALI_PLATFORM_NAME
+	depends on MALI_MIDGARD
+	string "Platform name"
+	default "devicetree"
+	help
+	  Enter the name of the desired platform configuration directory to
+	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
+	  exist.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_CORESTACK
+	bool "Support controlling power to the GPU core stack"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature on supported GPUs will let the driver powering
+	  on/off the GPU core stack independently without involving the Power
+	  Domain Controller. This should only be enabled on platforms which
+	  integration of the PDC to the Mali GPU is known to be problematic.
+	  This feature is currently only supported on t-Six and t-HEx GPUs.
+
+	  If unsure, say N.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE)
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_2MB_ALLOC
+	bool "Attempt to allocate 2MB pages"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Rather than allocating all GPU memory page-by-page, attempt to
+	  allocate 2MB pages from the kernel. This reduces TLB pressure and
+	  helps to prevent memory fragmentation.
+
+	  If in doubt, say N
+
+config MALI_PWRSOFT_765
+	bool "PWRSOFT-765 ticket"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged
+	  in kernel v4.10, however if backported into the kernel then this
+	  option must be manually selected.
+
+	  If using kernel >= v4.10 then say N, otherwise if devfreq cooling
+	  changes have been backported say Y to avoid compilation errors.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
+source "drivers/gpu/arm/midgard/tests/Kconfig"
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/Makefile b/t83x/kernel/drivers/gpu/arm/midgard/Makefile
new file mode 100644
index 0000000..b7b261a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,38 @@
+#
+# (C) COPYRIGHT 2010-2016, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/t83x/kernel/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100644
index 0000000..2bef9c2
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100644
index 0000000..cc510bf
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,61 @@
+#
+# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_ca_demand.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += \
+	backend/gpu/mali_kbase_devfreq.c \
+	backend/gpu/mali_kbase_pm_ca_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100644
index 0000000..c8ae87e
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100644
index 0000000..fef9a2c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	kbdev->current_gpu_coherency_mode = mode;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100644
index 0000000..fe98691
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100644
index 0000000..7851ea6
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100644
index 0000000..9fd5335
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,518 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#define dev_pm_opp_find_freq_floor opp_find_freq_floor
+#endif /* Linux >= 3.13 */
+
+/**
+ * opp_translate - Translate nominal OPP frequency from devicetree into real
+ *                 frequency and core mask
+ * @kbdev:     Device pointer
+ * @freq:      Nominal frequency
+ * @core_mask: Pointer to u64 to store core mask to
+ *
+ * Return: Real target frequency
+ *
+ * This function will only perform translation if an operating-points-v2-mali
+ * table is present in devicetree. If one is not present then it will return an
+ * untranslated frequency and all cores enabled.
+ */
+static unsigned long opp_translate(struct kbase_device *kbdev,
+		unsigned long freq, u64 *core_mask)
+{
+	int i;
+
+	for (i = 0; i < kbdev->num_opps; i++) {
+		if (kbdev->opp_table[i].opp_freq == freq) {
+			*core_mask = kbdev->opp_table[i].core_mask;
+			return kbdev->opp_table[i].real_freq;
+		}
+	}
+
+	/* Failed to find OPP - return all cores enabled & nominal frequency */
+	*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
+
+	return freq;
+}
+
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+static struct devfreq_simple_ondemand_data ondemand_data = {
+		.upthreshold = 90,
+		.downdifferential = 5,
+};
+
+#define DEV_FREQ_GOV_DATA		(&ondemand_data)
+
+static ssize_t upthreshold_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.upthreshold);
+}
+
+static ssize_t upthreshold_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.upthreshold = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(upthreshold);
+
+static ssize_t downdifferential_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ondemand_data.downdifferential);
+}
+
+static ssize_t downdifferential_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	unsigned int t;
+	int ret;
+
+	ret = sscanf(buf, "%u", &t);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (t >= 100 || t == 0) {
+		dev_err(dev, "invalid input:%s\n", buf);
+		return -EINVAL;
+	}
+	ondemand_data.downdifferential = t;
+	return count;
+
+}
+static DEVICE_ATTR_RW(downdifferential);
+#else
+#define DEV_FREQ_GOV_DATA		(NULL)
+#endif /* CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND */
+
+static ssize_t utilisation_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev->parent);
+	unsigned long total_time = 0, busy_time = 0;
+	unsigned long utilise;
+
+	kbase_pm_get_dvfs_utilisation(kbdev, &total_time, &busy_time);
+
+	if (total_time == 0) {
+		utilise = 0;
+	} else {
+		/* round up */
+		utilise = (busy_time * 100 + (total_time >> 1)) / total_time;
+	}
+	return sprintf(buf, "%ld\n", utilise);
+}
+static DEVICE_ATTR_RO(utilisation);
+
+static struct device_attribute *kbase_dev_attr[] = {
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
+	&dev_attr_downdifferential,
+	&dev_attr_upthreshold,
+#endif
+	&dev_attr_utilisation
+};
+
+static int kbase_dev_add_attr(struct device *dev)
+{
+	int i, ret;
+	for (i = 0; i <  ARRAY_SIZE(kbase_dev_attr); i++) {
+		if (!kbase_dev_attr[i])
+			continue;
+		ret = device_create_file(dev, kbase_dev_attr[i]);
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long nominal_freq;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+	u64 core_mask;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	nominal_freq = freq;
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_nominal_freq == nominal_freq) {
+		*target_freq = nominal_freq;
+		return 0;
+	}
+
+	freq = opp_translate(kbdev, nominal_freq, &core_mask);
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	if (kbdev->pm.backend.ca_current_policy->id ==
+			KBASE_PM_CA_POLICY_ID_DEVFREQ)
+		kbase_devfreq_set_core_mask(kbdev, core_mask);
+
+	*target_freq = nominal_freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_nominal_freq = nominal_freq;
+	kbdev->current_freq = freq;
+	kbdev->current_core_mask = core_mask;
+
+	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq);
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_nominal_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_nominal_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
+		opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
+{
+	struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node,
+			"operating-points-v2", 0);
+	struct device_node *node;
+	int i = 0;
+	int count;
+
+	if (!opp_node)
+		return 0;
+	if (!of_device_is_compatible(opp_node, "operating-points-v2-mali"))
+		return 0;
+
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	kbdev->opp_table = kmalloc_array(count,
+			sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
+	if (!kbdev->opp_table)
+		return -ENOMEM;
+
+	for_each_available_child_of_node(opp_node, node) {
+		u64 core_mask;
+		u64 opp_freq, real_freq;
+		const void *core_count_p;
+
+		if (of_property_read_u64(node, "opp-hz", &opp_freq)) {
+			dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n");
+			continue;
+		}
+		if (of_property_read_u64(node, "opp-hz-real", &real_freq))
+			real_freq = opp_freq;
+		if (of_property_read_u64(node, "opp-core-mask", &core_mask))
+			core_mask =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		core_count_p = of_get_property(node, "opp-core-count", NULL);
+		if (core_count_p) {
+			u64 remaining_core_mask =
+				kbdev->gpu_props.props.raw_props.shader_present;
+			int core_count = be32_to_cpup(core_count_p);
+
+			core_mask = 0;
+
+			for (; core_count > 0; core_count--) {
+				int core = ffs(remaining_core_mask);
+
+				if (!core) {
+					dev_err(kbdev->dev, "OPP has more cores than GPU\n");
+					return -ENODEV;
+				}
+
+				core_mask |= (1ull << (core-1));
+				remaining_core_mask &= ~(1ull << (core-1));
+			}
+		}
+
+		if (!core_mask) {
+			dev_err(kbdev->dev, "OPP has invalid core mask of 0\n");
+			return -ENODEV;
+		}
+
+		kbdev->opp_table[i].opp_freq = opp_freq;
+		kbdev->opp_table[i].real_freq = real_freq;
+		kbdev->opp_table[i].core_mask = core_mask;
+
+		dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n",
+				i, opp_freq, real_freq, core_mask);
+
+		i++;
+	}
+
+	kbdev->num_opps = i;
+
+	return 0;
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	if (!kbdev->clock) {
+		dev_err(kbdev->dev, "Clock not available for devfreq\n");
+		return -ENODEV;
+	}
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+	kbdev->current_nominal_freq = kbdev->current_freq;
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	err = kbase_devfreq_init_core_mask_table(kbdev);
+	if (err)
+		return err;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", DEV_FREQ_GOV_DATA);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	/* devfreq_add_device only copies a few of kbdev->dev's fields, so
+	 * set drvdata explicitly so IPA models can access kbdev. */
+	dev_set_drvdata(&kbdev->devfreq->dev, kbdev);
+
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_ipa_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		goto cooling_failed;
+	}
+
+	kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+			kbdev->dev->of_node,
+			kbdev->devfreq,
+			&kbase_ipa_power_model_ops);
+	if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+		err = PTR_ERR(kbdev->devfreq_cooling);
+		dev_err(kbdev->dev,
+			"Failed to register cooling device (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+
+	kbase_ipa_term(kbdev);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	kfree(kbdev->opp_table);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100644
index 0000000..c0bf8b1
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100644
index 0000000..dcdf15c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,255 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+	struct kbase_io_access *old_buf;
+	struct kbase_io_access *new_buf;
+	unsigned long flags;
+
+	if (!new_size)
+		goto out_err; /* The new size must not be 0 */
+
+	new_buf = vmalloc(new_size * sizeof(*h->buf));
+	if (!new_buf)
+		goto out_err;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	old_buf = h->buf;
+
+	/* Note: we won't bother with copying the old data over. The dumping
+	 * logic wouldn't work properly as it relies on 'count' both as a
+	 * counter and as an index to the buffer which would have changed with
+	 * the new array. This is a corner case that we don't need to support.
+	 */
+	h->count = 0;
+	h->size = new_size;
+	h->buf = new_buf;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	vfree(old_buf);
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+	h->enabled = false;
+	spin_lock_init(&h->lock);
+	h->count = 0;
+	h->size = 0;
+	h->buf = NULL;
+	if (kbase_io_history_resize(h, n))
+		return -1;
+
+	return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+	vfree(h->buf);
+	h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+		void __iomem const *addr, u32 value, u8 write)
+{
+	struct kbase_io_access *io;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	io = &h->buf[h->count % h->size];
+	io->addr = (uintptr_t)addr | write;
+	io->value = value;
+	++h->count;
+	/* If count overflows, move the index by the buffer size so the entire
+	 * buffer will still be dumped later */
+	if (unlikely(!h->count))
+		h->count = h->size;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+	struct kbase_io_history *const h = &kbdev->io_history;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!unlikely(h->enabled))
+		return;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	dev_err(kbdev->dev, "Register IO History:");
+	iters = (h->size > h->count) ? h->count : h->size;
+	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	writel(value, kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				value, 1);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				val, 0);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100644
index 0000000..5b20445
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100644
index 0000000..6bceba1
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	err = kbase_pm_runtime_init(kbdev);
+	if (err)
+		goto fail_runtime_pm;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	return 0;
+
+fail_pm:
+	kbase_release_interrupts(kbdev);
+fail_interrupts:
+	kbase_pm_runtime_term(kbdev);
+fail_runtime_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_term(kbdev);
+	kbase_release_interrupts(kbdev);
+	kbase_pm_runtime_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100644
index 0000000..b395325
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+
+	regdump->stack_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_LO), NULL);
+	regdump->stack_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100644
index 0000000..7ad309e
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,492 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100644
index 0000000..4794672
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100644
index 0000000..e96aeae
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100644
index 0000000..8781561
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100644
index 0000000..8416b80
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,469 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100644
index 0000000..c660c80
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,235 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		if (kbdev->as_to_kctx[i] == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_read(&kctx->refcount) != 1) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+
+	kbase_ctx_sched_release_ctx(kctx);
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		as_kctx = kbdev->as_to_kctx[i];
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running.
+		 * Note that a context will have at least 1 reference (which
+		 * was previously taken by kbasep_js_schedule_ctx()) until
+		 * descheduled.
+		 */
+		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+			atomic_read(&as_kctx->refcount) == 1) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		WARN(1, "Context is already scheduled in\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100644
index 0000000..08a7400
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100644
index 0000000..0a2a0b7
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1459 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649))
+		cfg |= JS_CONFIG_START_MMU;
+
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
+			katom->affinity, cfg);
+	KBASE_TLSTREAM_TL_RET_CTX_LPU(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_LPU(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string,
+						sizeof(js_string)),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the time the job was submitted, to
+ * work out the best estimate (which might still result in an over-estimate to
+ * the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		timestamp_diff = ktime_sub(end_timestamp,
+				katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = end_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
+ * tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Make a tracepoint call to the instrumentation module informing that
+ * softstop happened on given lpu (job slot).
+ */
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
+					int js)
+{
+	KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
+		&kbdev->gpu_props.props.raw_props.js_features[js]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0,
+					sizeof(kbdev->slot_submit_count_irq));
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbasep_trace_tl_event_lpu_softstop(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+						base_jd_core_req core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+	bool stop_sent = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if (katom->kctx != kctx)
+			continue;
+
+		if (katom->sched_priority > priority) {
+			if (!stop_sent)
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(
+						target_katom);
+
+			kbase_job_slot_softstop(kbdev, js, katom);
+			stop_sent = true;
+		}
+	}
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT);
+
+	timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait,
+			kctx->jctx.job_nr == 0, timeout);
+
+	if (timeout != 0)
+		timeout = wait_event_timeout(
+			kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			!kbase_ctx_flag(kctx, KCTX_SCHEDULED),
+			timeout);
+
+	/* Neither wait timed out; all done! */
+	if (timeout != 0)
+		goto exit;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev,
+			"Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+			ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+
+	/* Wait for the reset to complete */
+	wait_event(kbdev->hwaccess.backend.reset_wait,
+			atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+			== KBASE_RESET_GPU_NOT_PENDING);
+#else
+	dev_warn(kbdev->dev,
+		"Jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+		ZAP_TIMEOUT);
+
+#endif
+exit:
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * hwaccess_lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	kbase_io_history_dump(kbdev);
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+	dev_err(kbdev->dev, "  TILER_CONFIG=0x%08x    JM_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL));
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	bool try_schedule = false;
+	bool silent = false;
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	spin_lock(&kbdev->hwaccess_lock);
+	spin_lock(&kbdev->mmu_mask_change);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts_nolock(kbdev);
+
+	spin_unlock(&kbdev->mmu_mask_change);
+	spin_unlock(&kbdev->hwaccess_lock);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+		/* Ensure that L2 is not transitioning when we send the reset
+		 * command */
+		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2))
+			;
+
+		WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->protected_mode = false;
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	/* Process any pending slot updates */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_context_idle(kbdev);
+
+	/* Release vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+
+void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+}
+
+bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100644
index 0000000..1f382b3
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string,
+						size_t js_size)
+{
+	snprintf(js_string, js_size, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_cacheclean - Cause a GPU cache clean & flush
+ * @kbdev: Device pointer
+ *
+ * Caller must not be in IRQ context
+ */
+void kbase_gpu_cacheclean(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100644
index 0000000..a41e7b5
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1947 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+/**
+ * check_secure_atom - Check if the given atom is in the given secure state and
+ *                     has a ringbuffer state of at least
+ *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @katom:  Atom pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if atom is in the given state, false otherwise
+ */
+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
+{
+	if (katom->gpu_rb_state >=
+			KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+			((kbase_jd_katom_is_protected(katom) && secure) ||
+			(!kbase_jd_katom_is_protected(katom) && !secure)))
+		return true;
+
+	return false;
+}
+
+/**
+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
+ *                                secure state in the ringbuffers of at least
+ *                                state
+ *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
+ * @kbdev:  Device pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if any atoms are in the given state, false otherwise
+ */
+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
+		bool secure)
+{
+	int js, i;
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, i);
+
+			if (katom) {
+				if (check_secure_atom(katom, secure))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * hwaccess_lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+		if (katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
+				katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK)
+			kbdev->protected_mode_transition = false;
+
+		if (kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) {
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			/* Go back to configured model for IPA */
+			kbase_ipa_model_use_configured_locked(kbdev);
+		}
+
+
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+/**
+ * other_slots_busy - Determine if any job slots other than @js are currently
+ *                    running atoms
+ * @kbdev: Device pointer
+ * @js:    Job slot
+ *
+ * Return: true if any slots other than @js are busy, false otherwise
+ */
+static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+{
+	int slot;
+
+	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
+		if (slot == js)
+			continue;
+
+		if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot,
+				KBASE_ATOM_GPU_RB_SUBMITTED))
+			return true;
+	}
+
+	return false;
+}
+
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
+{
+	return kbdev->protected_mode;
+}
+
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
+
+	/*
+	 * When entering into protected mode, we must ensure that the
+	 * GPU is not operating in coherent mode as well. This is to
+	 * ensure that no protected memory can be leaked.
+	 */
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enable(
+				kbdev->protected_dev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
+		else
+			kbdev->protected_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
+
+	if (!kbdev->protected_ops)
+		return -EINVAL;
+
+	/* The protected mode disable callback will be called as part of reset
+	 */
+	kbase_reset_gpu_silent(kbdev);
+
+	return 0;
+}
+
+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	switch (katom[idx]->protected_state.enter) {
+	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		kbdev->protected_mode_transition = true;
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_VINSTR;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
+		if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+			/*
+			 * We can't switch now because
+			 * the vinstr core state switch
+			 * is not done yet.
+			 */
+			return -EAGAIN;
+		}
+
+		/* Use generic model for IPA in protected mode */
+		kbase_ipa_model_use_fallback_locked(kbdev);
+
+		/* Once reaching this point GPU must be
+		 * switched to protected mode or vinstr
+		 * re-enabled. */
+
+		/*
+		 * Not in correct mode, begin protected mode switch.
+		 * Entering protected mode requires us to power down the L2,
+		 * and drop out of fully coherent mode.
+		 */
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
+		/* Avoid unnecessary waiting on non-ACE platforms. */
+		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+				/*
+				* The L2 is still powered, wait for all the users to
+				* finish with it before doing the actual reset.
+				*/
+				return -EAGAIN;
+			}
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+
+		/* No jobs running, so we can switch GPU mode right now. */
+		err = kbase_gpu_protected_mode_enter(kbdev);
+
+		/*
+		 * Regardless of result, we are no longer transitioning
+		 * the GPU.
+		 */
+		kbdev->protected_mode_transition = false;
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev);
+		if (err) {
+			/*
+			 * Failed to switch into protected mode, resume
+			 * vinstr core and fail atom.
+			 */
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order. */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			/* Go back to configured model for IPA */
+			kbase_ipa_model_use_configured_locked(kbdev);
+
+			return -EINVAL;
+		}
+
+		/* Protected mode sanity checks. */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) ==
+			kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]),
+			kbase_gpu_in_protected_mode(kbdev));
+		katom[idx]->gpu_rb_state =
+			KBASE_ATOM_GPU_RB_READY;
+	}
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+
+	switch (katom[idx]->protected_state.exit) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		kbdev->protected_mode_transition = true;
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+
+		if (err) {
+			kbdev->protected_mode_transition = false;
+
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			/* Use generic model for IPA in protected mode */
+			kbase_ipa_model_use_fallback_locked(kbdev);
+
+			return -EINVAL;
+		}
+
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		/* A GPU reset is issued when exiting protected mode. Once the
+		 * reset is done all atoms' state will also be reset. For this
+		 * reason, if the atom is still in this state we can safely
+		 * say that the reset has not completed i.e., we have not
+		 * finished exiting protected mode yet.
+		 */
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+void kbase_backend_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+			int ret;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+				if (kbase_gpu_check_secure_atoms(kbdev,
+						!kbase_jd_katom_is_protected(
+						katom[idx])))
+					break;
+
+				if ((idx == 1) && (kbase_jd_katom_is_protected(
+								katom[0]) !=
+						kbase_jd_katom_is_protected(
+								katom[1])))
+					break;
+
+				if (kbdev->protected_mode_transition)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+
+				if (!kbase_gpu_in_protected_mode(kbdev) &&
+					kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition into protected mode. */
+					ret = kbase_jm_enter_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				} else if (kbase_gpu_in_protected_mode(kbdev) &&
+					!kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition out of protected mode. */
+					ret = kbase_jm_exit_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				}
+				katom[idx]->protected_state.exit =
+						KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+				/* Atom needs no protected mode transition. */
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+
+				if (idx == 1) {
+					/* Only submit if head atom or previous
+					 * atom already submitted */
+					if ((katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+						katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+						break;
+
+					/* If intra-slot serialization in use
+					 * then don't submit atom to NEXT slot
+					 */
+					if (kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTRA_SLOT)
+						break;
+				}
+
+				/* If inter-slot serialization in use then don't
+				 * submit atom if any other slots are in use */
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTER_SLOT) &&
+						other_slots_busy(kbdev, js))
+					break;
+
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_RESET) &&
+						kbase_reset_gpu_active(kbdev))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_backend_slot_update(kbdev);
+}
+
+#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
+	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		HAS_DEP(next_katom) &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as
+					[katom->kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[katom->slot_nr]);
+
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/*
+	 * When a hard-stop is followed close after a soft-stop, the completion
+	 * code may be set to STOPPED, even though the job is terminated
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) {
+		if (completion_code == BASE_JD_EVENT_STOPPED &&
+				(katom->atom_flags &
+				KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) {
+			completion_code = BASE_JD_EVENT_TERMINATED;
+		}
+	}
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req &
+					BASE_JD_REQ_SKIP_CACHE_END)) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		/* When a job chain fails, on a T60x or when
+		 * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not
+		 * flushed. To prevent future evictions causing possible memory
+		 * corruption we need to flush the cache manually before any
+		 * affected memory gets reused. */
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx &&
+				next_katom->sched_priority ==
+				katom->sched_priority) {
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+					HAS_DEP(katom_idx0) &&
+					katom_idx0->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+						katom_idx1->kctx == katom->kctx
+						&& HAS_DEP(katom_idx1) &&
+						katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					HAS_DEP(katom_idx1) &&
+					katom_idx1->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)
+		kbase_reset_gpu_silent(kbdev);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		katom = kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		katom = kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	if (katom) {
+		/* Cross-slot dependency has now become runnable. Try to submit
+		 * it. */
+
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
+	}
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_backend_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Reset should always take the GPU out of protected mode */
+	WARN_ON(kbase_gpu_in_protected_mode(kbdev));
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int atom_idx = 0;
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, atom_idx);
+			bool keep_in_jm_rb = false;
+
+			if (!katom)
+				break;
+			if (katom->protected_state.exit ==
+					KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)
+			{
+				KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
+
+				kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+				/* protected mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
+					"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+					kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
+				KBASE_DEBUG_ASSERT_MSG(
+					(kbase_jd_katom_is_protected(katom) && js == 0) ||
+					!kbase_jd_katom_is_protected(katom),
+					"Protected atom on JS%d not supported", js);
+			}
+			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+				katom->affinity = 0;
+				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				/* As the atom was not removed, increment the
+				 * index so that we read the correct atom in the
+				 * next iteration. */
+				atom_idx++;
+				continue;
+			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
+		}
+	}
+
+	kbdev->protected_mode_transition = false;
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	katom->kctx->blocked_js[js][katom->sched_priority] = true;
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+	int prio_idx0 = 0, prio_idx1 = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom_idx0)
+		prio_idx0 = katom_idx0->sched_priority;
+	if (katom_idx1)
+		prio_idx1 = katom_idx1->sched_priority;
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+				(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+				(!kctx || katom_idx1->kctx == kctx) &&
+				prio_idx0 == prio_idx1);
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				katom_idx1->kctx->blocked_js[js][prio_idx1] =
+						true;
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_job_evicted(katom_idx1);
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_job_evicted(katom_idx1);
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->need_cache_flush_cores_retained) {
+		unsigned long flags;
+
+		kbase_gpu_cacheclean(kbdev);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	kbase_backend_cacheclean(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->affinity = 0;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
+			coreref_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int js;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100644
index 0000000..1e0e05a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:         Device pointer
+ * @js:            Job slot to evict from
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100644
index 0000000..54d8ddd
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+#include "mali_kbase_hw.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		 /* If the hardware supports XAFFINITY then we'll only enable
+		  * the tiler (which is the default so this is a no-op),
+		  * otherwise enable shader core 0. */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = 1;
+		else
+			*affinity = 0;
+
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required for hardware without XAFFINITY
+	 * support (notes above) */
+	if (core_req & BASE_JD_REQ_T) {
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = *affinity | 1;
+	}
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100644
index 0000000..35d9781
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold hwaccess_lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100644
index 0000000..63d048f
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,348 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->ticks++;
+
+#ifndef CONFIG_MALI_JOB_DUMP
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->ticks = soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * hwaccess_lock, so it's OK to observe
+					 * the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CONFIG_MALI_JOB_DUMP */
+				/* NOTE: During CONFIG_MALI_JOB_DUMP, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CONFIG_MALI_JOB_DUMP, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CONFIG_MALI_JOB_DUMP */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100644
index 0000000..3f53779
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100644
index 0000000..aa1817c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+static void validate_protected_page_fault(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	/* GPUs which support (native) protected mode shall not report page
+	 * fault addresses unless it has protected debug mode and protected
+	 * debug mode is turned on */
+	u32 protected_debug_mode = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		return;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		protected_debug_mode = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS),
+				kctx) & GPU_DBGEN;
+	}
+
+	if (!protected_debug_mode) {
+		/* fault_addr should never be reported in protected mode.
+		 * However, we just continue by printing an error message */
+		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+	}
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* Mark the fault protected or not */
+		as->protected_mode = kbdev->protected_mode;
+
+		if (kbdev->protected_mode && as->fault_addr)
+		{
+			/* check if address reporting is allowed */
+			validate_protected_page_fault(kbdev, kctx);
+		}
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+			as->fault_extra_addr = kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+					kctx);
+			as->fault_extra_addr <<= 32;
+			as->fault_extra_addr |= kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+					kctx);
+		}
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+		transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+		/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+		/* Clear PTW_MEMATTR bits */
+		transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+		/* Enable correct PTW_MEMATTR bits */
+		transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+		if (kbdev->system_coherency == COHERENCY_ACE) {
+			/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+			/* Clear PTW_SH bits */
+			transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+			/* Enable correct PTW_SH bits */
+			transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+		}
+
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+				transcfg, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+				(current_setup->transcfg >> 32) & 0xFFFFFFFFUL,
+				kctx);
+	} else {
+		if (kbdev->system_coherency == COHERENCY_ACE)
+			current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100644
index 0000000..c02253c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100644
index 0000000..0614348
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100644
index 0000000..f9d244b
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100644
index 0000000..a871eae
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,496 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+
+int kbase_pm_runtime_init(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+
+		if (callbacks->power_runtime_init_callback)
+			return callbacks->power_runtime_init_callback(kbdev);
+		else
+			return 0;
+	}
+
+	kbdev->pm.backend.callback_power_on = NULL;
+	kbdev->pm.backend.callback_power_off = NULL;
+	kbdev->pm.backend.callback_power_suspend = NULL;
+	kbdev->pm.backend.callback_power_resume = NULL;
+	kbdev->pm.callback_power_runtime_init = NULL;
+	kbdev->pm.callback_power_runtime_term = NULL;
+	kbdev->pm.backend.callback_power_runtime_on = NULL;
+	kbdev->pm.backend.callback_power_runtime_off = NULL;
+	kbdev->pm.backend.callback_power_runtime_idle = NULL;
+
+	return 0;
+}
+
+void kbase_pm_runtime_term(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.callback_power_runtime_term) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+	}
+}
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+			kbase_pm_gpu_poweroff_wait_wq);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+#if !PLATFORM_POWER_DOWN_ONLY
+	/* Wait for power transitions to complete. We do this with no locks held
+	 * so that we don't deadlock with any pending workqueues */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	kbase_pm_check_transitions_sync(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+#if PLATFORM_POWER_DOWN_ONLY
+	if (kbdev->pm.backend.gpu_powered) {
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/* If L2 cache is powered then we must flush it before
+			 * we power off the GPU. Normally this would have been
+			 * handled when the L2 was powered off. */
+			kbase_gpu_cacheclean(kbdev);
+		}
+	}
+#endif /* PLATFORM_POWER_DOWN_ONLY */
+
+	if (!backend->poweron_required) {
+#if !PLATFORM_POWER_DOWN_ONLY
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		WARN_ON(kbdev->l2_available_bitmap ||
+				kbdev->shader_available_bitmap ||
+				kbdev->tiler_available_bitmap);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+		/* Consume any change-state events */
+		kbase_timeline_pm_check_handle_event(kbdev,
+					KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		/* Disable interrupts and turn the clock off */
+		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+			/*
+			 * Page/bus faults are pending, must drop locks to
+			 * process.  Interrupts are disabled so no more faults
+			 * should be generated at this point.
+			 */
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			kbase_flush_mmu_wqs(kbdev);
+			mutex_lock(&js_devdata->runpool_mutex);
+			mutex_lock(&kbdev->pm.lock);
+
+			/* Turn off clock now that fault have been handled. We
+			 * dropped locks so poweron_required may have changed -
+			 * power back on if this is the case.*/
+			if (backend->poweron_required)
+				kbase_pm_clock_on(kbdev, false);
+			else
+				WARN_ON(!kbase_pm_clock_off(kbdev,
+						backend->poweroff_is_suspend));
+		}
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	backend->poweroff_wait_in_progress = false;
+	if (backend->poweron_required) {
+		backend->poweron_required = false;
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
+		/* Force all cores off */
+		kbdev->pm.backend.desired_shader_state = 0;
+		kbdev->pm.backend.desired_tiler_state = 0;
+
+		/* Force all cores to be unavailable, in the situation where
+		 * transitions are in progress for some cores but not others,
+		 * and kbase_pm_check_transitions_nolock can not immediately
+		 * power off the cores */
+		kbdev->shader_available_bitmap = 0;
+		kbdev->tiler_available_bitmap = 0;
+		kbdev->l2_available_bitmap = 0;
+
+		kbdev->pm.backend.poweroff_wait_in_progress = true;
+		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/*Kick off wq here. Callers will have to wait*/
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
+	} else {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+	wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			is_poweroff_in_progress(kbdev));
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_init_core_use_bitmaps(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, true);
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100644
index 0000000..d3dceb0
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,183 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#ifdef CONFIG_MALI_DEVFREQ
+	&kbase_pm_ca_devfreq_policy_ops,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_demand_policy_ops,
+	&kbase_pm_ca_random_policy_ops,
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100644
index 0000000..ee9e751
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
new file mode 100644
index 0000000..19f1f73
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.c
@@ -0,0 +1,246 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation.
+ *
+ * This policy periodically selects a new core mask at demand. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/printk.h>
+
+#ifndef MALI_CA_TIMER
+static struct workqueue_struct *mali_ca_wq = NULL;
+#endif
+static int num_shader_cores_to_be_enabled = 0;
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+#ifndef MALI_CA_TIMER
+	struct kbase_device *kbdev = (struct kbase_device *)priv;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+#endif
+
+    num_shader_cores_to_be_enabled = cores -1;
+    printk("pp num=%d\n", num_shader_cores_to_be_enabled);
+    if (num_shader_cores_to_be_enabled < 1)
+        num_shader_cores_to_be_enabled = 1;
+
+#ifndef MALI_CA_TIMER
+    queue_work(mali_ca_wq, &data->wq_work);
+#endif
+
+    return 0;
+}
+
+#ifdef MALI_CA_TIMER
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void demand_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+#if 0
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+#endif
+    data->cores_desired = num_shader_cores_to_be_enabled;
+
+	if (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+            dev_info(kbdev->dev, "error, ca demand policy : new core mask=%llX\n",
+				data->cores_desired);
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "ca demand policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(demand_timer_callback);
+#else
+static void do_ca_scaling(struct work_struct *work)
+{
+    //unsigned long flags;
+	struct kbase_device *kbdev = = container_of(work,
+            struct kbasep_pm_ca_policy_demand, wq_work);
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+    data->cores_desired =  num_shader_cores_to_be_enabled;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+
+}
+#endif
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+#ifdef MALI_CA_TIMER
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = demand_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+#else
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_ca_wq = alloc_workqueue("gpu_ca_wq", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_ca_wq = create_workqueue("gpu_ca_wq");
+#endif
+	INIT_WORK(&data->wq_work, do_ca_scaling);
+    if (mali_ca_wq == NULL) printk("Unable to create gpu scaling workqueue\n");
+#endif
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+#ifdef MALI_CA_TIMER
+	del_timer(&data->core_change_timer);
+#else
+    if (mali_ca_wq) {
+        flush_workqueue(mali_ca_wq);
+        destroy_workqueue(mali_ca_wq);
+        mali_ca_wq = NULL;
+    }
+#endif
+}
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.demand.cores_enabled;
+}
+
+static void demand_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_demand *data =
+				&kbdev->pm.backend.ca_policy_data.demand;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the demand core availability
+ * policy.
+ *
+ * This is the static structure that defines the demand core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_demand_policy_ops);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
new file mode 100644
index 0000000..a3dfe2a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_demand.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing demand core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEMAND_H
+#define MALI_KBASE_PM_CA_DEMAND_H
+#include <linux/workqueue.h>
+/**
+ * struct kbasep_pm_ca_policy_demand - Private structure for demand ca policy
+ *
+ * This contains data that is private to the demand core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+#define MALI_CA_TIMER 1
+struct kbasep_pm_ca_policy_demand {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+#ifdef MALI_CA_TIMER
+	struct timer_list core_change_timer;
+#else
+	struct work_struct wq_work;
+#endif
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_demand_policy_ops;
+extern int mali_perf_set_num_pp_cores(int cores);
+
+#endif /* MALI_KBASE_PM_CA_DEMAND_H */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
new file mode 100644
index 0000000..66bf660
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A core availability policy implementing core mask selection from devfreq OPPs
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/version.h>
+
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	data->cores_desired = core_mask;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+static void devfreq_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+
+	if (kbdev->current_core_mask) {
+		data->cores_enabled = kbdev->current_core_mask;
+		data->cores_desired = kbdev->current_core_mask;
+	} else {
+		data->cores_enabled =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		data->cores_desired =
+				kbdev->gpu_props.props.raw_props.shader_present;
+	}
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void devfreq_term(struct kbase_device *kbdev)
+{
+}
+
+static u64 devfreq_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled;
+}
+
+static void devfreq_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the devfreq core availability
+ * policy.
+ *
+ * This is the static structure that defines the devfreq core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = {
+	"devfreq",			/* name */
+	devfreq_init,			/* init */
+	devfreq_term,			/* term */
+	devfreq_get_core_mask,		/* get_core_mask */
+	devfreq_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEVFREQ,	/* id */
+};
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
new file mode 100644
index 0000000..7ab3cd4
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A core availability policy for use with devfreq, where core masks are
+ * associated with OPPs.
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEVFREQ_H
+#define MALI_KBASE_PM_CA_DEVFREQ_H
+
+/**
+ * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy
+ *
+ * This contains data that is private to the devfreq core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ */
+struct kbasep_pm_ca_policy_devfreq {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops;
+
+/**
+ * kbase_devfreq_set_core_mask - Set core mask for policy to use
+ * @kbdev: Device pointer
+ * @core_mask: New core mask
+ *
+ * The new core mask will have immediate effect if the GPU is powered, or will
+ * take effect when it is next powered on.
+ */
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
+
+#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100644
index 0000000..1db6586
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100644
index 0000000..a763155
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
new file mode 100644
index 0000000..0d1b220
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation.
+ *
+ * This policy periodically selects a new core mask at random. This new mask is
+ * applied in two stages. It initially powers off all undesired cores, by
+ * removing them from the core availability mask. Once it is confirmed that
+ * these cores are powered off, then the desired cores are powered on.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+void random_timer_callback(unsigned long cb)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)cb;
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Select new core mask, ensuring that core group 0 is not powered off
+	 */
+	do {
+		data->cores_desired = prandom_u32() &
+				kbdev->gpu_props.props.raw_props.shader_present;
+	} while (!(data->cores_desired &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask));
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	dev_info(kbdev->dev, "Random policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+KBASE_EXPORT_TEST_API(random_timer_callback);
+
+static void random_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	data->cores_enabled = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_desired = kbdev->gpu_props.props.raw_props.shader_present;
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+
+	init_timer(&data->core_change_timer);
+
+	data->core_change_timer.function = random_timer_callback;
+	data->core_change_timer.data = (unsigned long) kbdev;
+	data->core_change_timer.expires = jiffies + 5 * HZ;
+
+	add_timer(&data->core_change_timer);
+}
+
+static void random_term(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	del_timer(&data->core_change_timer);
+}
+
+static u64 random_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.random.cores_enabled;
+}
+
+static void random_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_random *data =
+				&kbdev->pm.backend.ca_policy_data.random;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the random core availability
+ * policy.
+ *
+ * This is the static structure that defines the random core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops = {
+	"random",			/* name */
+	random_init,			/* init */
+	random_term,			/* term */
+	random_get_core_mask,		/* get_core_mask */
+	random_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_RANDOM,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_random_policy_ops);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
new file mode 100644
index 0000000..a8c9b15
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_random.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+/*
+ * A core availability policy implementing random core rotation
+ */
+
+#ifndef MALI_KBASE_PM_CA_RANDOM_H
+#define MALI_KBASE_PM_CA_RANDOM_H
+
+/**
+ * struct kbasep_pm_ca_policy_random - Private structure for random ca policy
+ *
+ * This contains data that is private to the random core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ * @core_change_timer: Timer for changing desired core mask
+ */
+struct kbasep_pm_ca_policy_random {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+	struct timer_list core_change_timer;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_RANDOM_H */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100644
index 0000000..f891fa2
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100644
index 0000000..749d305
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100644
index 0000000..51cbf0c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,521 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#include "mali_kbase_pm_ca_devfreq.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#include "mali_kbase_pm_ca_demand.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ * @KBASE_PM_CORE_STACK: Core stacks
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO,
+	KBASE_PM_CORE_STACK = STACK_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
+ *                                 management framework.
+ *
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+	struct kbasep_pm_ca_policy_devfreq devfreq;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+	struct kbasep_pm_ca_policy_demand demand;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @powering_on_stack_state: A bit mask indicating which core stacks are
+ *                           currently in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        and/or timers are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 powering_on_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_data metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	bool poweroff_wait_in_progress;
+	bool poweron_required;
+	bool poweroff_is_suspend;
+
+	struct workqueue_struct *gpu_poweroff_wait_wq;
+	struct work_struct gpu_poweroff_wait_work;
+
+	wait_queue_head_t poweroff_wait;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+	KBASE_PM_CA_POLICY_ID_DEVFREQ,
+	KBASE_PM_CA_POLICY_ID_RANDOM
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * Maximum length of a CA policy names
+ */
+#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1];
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100644
index 0000000..81322fd
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100644
index 0000000..c0c84b6
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100644
index 0000000..6fb9eab
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1718 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/* Special value to indicate that the JM_CONFIG reg isn't currently used. */
+#define KBASE_JM_CONFIG_UNUSED (1<<31)
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+#ifdef CONFIG_MALI_CORESTACK
+	if (core_type == KBASE_PM_CORE_STACK) {
+		switch (action) {
+		case ACTION_PRESENT:
+			return STACK_PRESENT_LO;
+		case ACTION_READY:
+			return STACK_READY_LO;
+		case ACTION_PWRON:
+			return STACK_PWRON_LO;
+		case ACTION_PWROFF:
+			return STACK_PWROFF_LO;
+		case ACTION_PWRTRANS:
+			return STACK_PWRTRANS_LO;
+		default:
+			BUG();
+		}
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		KBASE_TLSTREAM_AUX_PM_STATE(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and
+ * kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev)
+{
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+	case KBASE_PM_CORE_STACK:
+		return kbdev->gpu_props.props.raw_props.stack_present;
+	default:
+		break;
+	}
+	KBASE_DEBUG_ASSERT(0);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	if (trans) /* Do not progress if any cores are transitioning */
+		return false;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+#if !PLATFORM_POWER_DOWN_ONLY
+	kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+#endif
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ * @tilers_powered: The bit mask of tilers that are desired to be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered,
+		u64 tilers_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	/* Power up the required L2(s) for the tiler */
+	if (tilers_powered)
+		desired |= 1;
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+#ifdef CONFIG_MALI_CORESTACK
+u64 kbase_pm_core_stack_mask(u64 cores)
+{
+	u64 stack_mask = 0;
+	size_t const MAX_CORE_ID = 31;
+	size_t const NUM_CORES_PER_STACK = 4;
+	size_t i;
+
+	for (i = 0; i <= MAX_CORE_ID; ++i) {
+		if (test_bit(i, (unsigned long *)&cores)) {
+			/* Every core which ID >= 16 is filled to stacks 4-7
+			 * instead of 0-3 */
+			size_t const stack_num = (i >= 16) ?
+				(i % NUM_CORES_PER_STACK) + 4 :
+				(i % NUM_CORES_PER_STACK);
+			set_bit(stack_num, (unsigned long *)&stack_mask);
+		}
+	}
+
+	return stack_mask;
+}
+#endif /* CONFIG_MALI_CORESTACK */
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 desired_stack_state;
+	u64 stacks_powered;
+#endif /* CONFIG_MALI_CORESTACK */
+	u64 cores_powered;
+	u64 tilers_powered;
+	u64 tiler_available_bitmap;
+	u64 tiler_transitioning_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+	u64 l2_inuse_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+#ifdef CONFIG_MALI_CORESTACK
+	/* Work out which core stacks want to be powered */
+	desired_stack_state = kbase_pm_core_stack_mask(cores_powered);
+	stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) |
+		desired_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* Work out which tilers want to be powered */
+	tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_TILER);
+	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered, tilers_powered);
+
+	l2_inuse_bitmap = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered | shader_transitioning_bitmap,
+			tilers_powered | tiler_transitioning_bitmap);
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (stacks_powered)
+		desired_l2_state |= 1;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state)
+		desired_l2_state |= 1;
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+			KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap,
+			&l2_available_bitmap,
+			&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_STACK, desired_stack_state, 0,
+				&kbdev->stack_available_bitmap,
+				&kbdev->pm.backend.powering_on_stack_state);
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+#endif
+
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_STACK,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO),
+					NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO), NULL));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO), NULL));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	} else {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_disable_interrupts_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	if (!kbdev->hw_quirks_sc)
+		kbdev->hw_quirks_sc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_CONFIG), NULL);
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+
+	/* Limit read & write ID width for AXI */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) {
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS);
+		kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_ARID_LIMIT & 0x7) <<
+				L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT;
+
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES);
+		kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_AWID_LIMIT & 0x7) <<
+				L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT;
+	} else {
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+		kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+		kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+	}
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	kbdev->hw_quirks_jm = 0;
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm |= (jm_values[0] ?
+				JM_TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ?
+				JM_CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ?
+				JM_JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JM_JOB_THROTTLE_LIMIT_SHIFT);
+
+	} else if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+			   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+					   GPU_ID2_PRODUCT_TMIX)) {
+		/* Only for tMIx */
+		u32 coherency_features;
+
+		coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+		 * documented for tMIx so force correct value here.
+		 */
+		if (coherency_features ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
+			kbdev->hw_quirks_jm |=
+				(COHERENCY_ACE_LITE | COHERENCY_ACE) <<
+				JM_FORCE_COHERENCY_FEATURES_SHIFT;
+		}
+	}
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING))
+		kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE;
+
+	if (!kbdev->hw_quirks_jm)
+		kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JM_CONFIG), NULL);
+
+#ifdef CONFIG_MALI_CORESTACK
+#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
+	kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+#endif /* CONFIG_MALI_CORESTACK */
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+			kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+			kbdev->hw_quirks_jm, NULL);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if (kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+static int kbase_pm_do_reset(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
+  //dev_err(kbdev->dev, "%s,  %d \n", __FILE__, __LINE__);
+  core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+  l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+  tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+
+  //printk("core_ready=%ld, l2_ready=%ld, tiler_ready=%ld\n", core_ready, l2_ready, tiler_ready);
+  if (core_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, core_ready, ACTION_PWROFF);
+
+  if (l2_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_ready, ACTION_PWROFF);
+
+  if (tiler_ready)
+          kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_ready, ACTION_PWROFF);
+  /* do external reset */
+
+//JOHNT
+    if (reg_base_hiubus) {
+        value = Rd(RESET0_MASK);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_MASK, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value & (~(0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+    ///////////////
+        value = Rd(RESET2_MASK);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_MASK, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value & (~(0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+
+        value = Rd(RESET0_LEVEL);
+        value = value | ((0x1<<20));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET0_LEVEL, value);
+
+        value = Rd(RESET2_LEVEL);
+        value = value | ((0x1<<14));
+        //printk("line(%d), value=%x\n", __LINE__, value);
+        Wr(RESET2_LEVEL, value);
+    } else {
+		dev_err(kbdev->dev, "reg_base_hiubus is null !!!\n");
+    }
+  //writel(..../*e.g. PWR_RESET, EXTERNAL_PWR_REGISTER*/);
+
+  /* any other action necessary, like a simple delay */
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+  kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	return -EINVAL;
+}
+
+static int kbasep_protected_mode_enable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
+
+static int kbasep_protected_mode_disable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	return kbase_pm_do_reset(kbdev);
+}
+
+struct protected_mode_ops kbase_native_protected_ops = {
+	.protected_mode_enable = kbasep_protected_mode_enable,
+	.protected_mode_disable = kbasep_protected_mode_disable
+};
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+	bool resume_vinstr = false;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support)
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+	else
+		err = kbase_pm_do_reset(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->protected_mode)
+		resume_vinstr = true;
+	kbdev->protected_mode = false;
+	kbase_ipa_model_use_configured_locked(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	if (err)
+		goto exit;
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+		kbase_pm_release_l2_caches(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+exit:
+	/* If GPU is leaving protected mode resume vinstr operation. */
+	if (kbdev->vinstr_ctx && resume_vinstr)
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	return err;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100644
index 0000000..c0e124d
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,606 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+
+void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action);
+
+/**
+ * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required
+ *                                   and used cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device
+ *
+ * Setup the power management callbacks and initialize/enable the runtime-pm
+ * for the Mali GPU platform device, using the callback function. This must be
+ * called before the kbase_pm_register_access_enable() function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+int kbase_pm_runtime_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_runtime_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold hwaccess_lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100644
index 0000000..024248c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+					int *util_gl_share,
+					int util_cl_share[2],
+					ktime_t now)
+{
+	int utilisation;
+	int busy;
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	return utilisation;
+}
+
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	ktime_t now;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	now = ktime_get();
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+			util_cl_share, now);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				if (!WARN_ON(device_nr >= 2))
+					kbdev->pm.backend.metrics.
+						active_cl_ctx[device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				if (!WARN_ON(js >= 2))
+					kbdev->pm.backend.metrics.
+						active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100644
index 0000000..6be2226
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,1002 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+	&kbase_pm_coarse_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		kbase_pm_do_poweroff(kbdev, false);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		if (pm->backend.poweroff_wait_in_progress) {
+			pm->backend.poweron_required = true;
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbase_pm_do_poweron(kbdev, false);
+		}
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				kbase_pm_do_poweroff(kbdev, false);
+			}
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		return;
+
+	if (kbdev->protected_mode_transition &&	!kbdev->shader_needed_bitmap &&
+			!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt) {
+		/* We are trying to change in/out of protected mode - force all
+		 * cores off so that the L2 powers down */
+		desired_bitmap = 0;
+		desired_tiler_bitmap = 0;
+	} else {
+		desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+		desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+			desired_tiler_bitmap = 1;
+		else
+			desired_tiler_bitmap = 0;
+
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+			/* Unless XAFFINITY is supported, enable core 0 if tiler
+			 * required, regardless of core availability */
+			if (kbdev->tiler_needed_cnt > 0 ||
+					kbdev->tiler_inuse_cnt > 0)
+				desired_bitmap |= 1;
+		}
+	}
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks &&
+					!kbdev->protected_mode_transition)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks &&
+				!kbdev->protected_mode_transition)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+int set_policy_by_name(struct kbase_device *kbdev, const char *name)
+{
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, name)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		printk("power_policy: policy not found\n");
+		return -EINVAL;
+	}
+	trace_printk("policy name=%s\n", name);
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(set_policy_by_name);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		return (kbdev->pm.backend.poweroff_wait_in_progress ||
+				kbdev->pm.backend.pm_current_policy == NULL) ?
+				KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->l2_users_count++;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100644
index 0000000..611a90e
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100644
index 0000000..d992989
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,103 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100644
index 0000000..35088ab
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/docs/Doxyfile b/t83x/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100644
index 0000000..35ff2f1
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/t83x/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100644
index 0000000..7ae05c2
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot b/t83x/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100644
index 0000000..159b993
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/ipa/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/ipa/Kbuild
new file mode 100644
index 0000000..8e37f40
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/ipa/Kbuild
@@ -0,0 +1,27 @@
+#
+# (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+mali_kbase-y += \
+	ipa/mali_kbase_ipa_simple.o \
+	ipa/mali_kbase_ipa.o
+
+mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
+
+ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_g71.c),)
+  mali_kbase-y += \
+	ipa/mali_kbase_ipa_vinstr_g71.o \
+	ipa/mali_kbase_ipa_vinstr_common.o
+
+endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
new file mode 100644
index 0000000..1450c2c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
@@ -0,0 +1,590 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/thermal.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/of.h>
+#include "mali_kbase.h"
+#include "mali_kbase_ipa.h"
+#include "mali_kbase_ipa_debugfs.h"
+#include "mali_kbase_ipa_simple.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#define dev_pm_opp_find_freq_exact opp_find_freq_exact
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp opp
+#endif
+
+#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
+#define KBASE_IPA_G71_MODEL_NAME      "mali-g71-power-model"
+
+static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
+	&kbase_simple_ipa_model_ops,
+	&kbase_g71_ipa_model_ops
+};
+
+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
+{
+	int err = 0;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (model->ops->recalculate) {
+		err = model->ops->recalculate(model);
+		if (err) {
+			dev_err(model->kbdev->dev,
+				"recalculation of power model %s returned error %d\n",
+				model->ops->name, err);
+		}
+	}
+
+	return err;
+}
+
+static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
+							    const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
+		struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
+
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
+
+	return NULL;
+}
+
+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
+{
+	atomic_set(&kbdev->ipa_use_configured_model, false);
+}
+
+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
+{
+	atomic_set(&kbdev->ipa_use_configured_model, true);
+}
+
+const char *kbase_ipa_model_name_from_id(u32 gpu_id)
+{
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(prod_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			return KBASE_IPA_G71_MODEL_NAME;
+		default:
+			return KBASE_IPA_FALLBACK_MODEL_NAME;
+		}
+	}
+
+	return KBASE_IPA_FALLBACK_MODEL_NAME;
+}
+
+static struct device_node *get_model_dt_node(struct kbase_ipa_model *model)
+{
+	struct device_node *model_dt_node;
+	char compat_string[64];
+
+	snprintf(compat_string, sizeof(compat_string), "arm,%s",
+		 model->ops->name);
+
+	model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node,
+						NULL, compat_string);
+	if (!model_dt_node && !model->missing_dt_node_warning) {
+		dev_warn(model->kbdev->dev,
+			 "Couldn't find power_model DT node matching \'%s\'\n",
+			 compat_string);
+		model->missing_dt_node_warning = true;
+	}
+
+	return model_dt_node;
+}
+
+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
+				  const char *name, s32 *addr,
+				  size_t num_elems, bool dt_required)
+{
+	int err, i;
+	struct device_node *model_dt_node = get_model_dt_node(model);
+	char *origin;
+
+	err = of_property_read_u32_array(model_dt_node, name, addr, num_elems);
+
+	if (err && dt_required) {
+		memset(addr, 0, sizeof(s32) * num_elems);
+		dev_warn(model->kbdev->dev,
+			 "Error %d, no DT entry: %s.%s = %zu*[0]\n",
+			 err, model->ops->name, name, num_elems);
+		origin = "zero";
+	} else if (err && !dt_required) {
+		origin = "default";
+	} else /* !err */ {
+		origin = "DT";
+	}
+
+	/* Create a unique debugfs entry for each element */
+	for (i = 0; i < num_elems; ++i) {
+		char elem_name[32];
+
+		if (num_elems == 1)
+			snprintf(elem_name, sizeof(elem_name), "%s", name);
+		else
+			snprintf(elem_name, sizeof(elem_name), "%s.%d",
+				name, i);
+
+		dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
+			model->ops->name, elem_name, addr[i], origin);
+
+		err = kbase_ipa_model_param_add(model, elem_name,
+						&addr[i], sizeof(s32),
+						PARAM_TYPE_S32);
+		if (err)
+			goto exit;
+	}
+exit:
+	return err;
+}
+
+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
+				     const char *name, char *addr,
+				     size_t size, bool dt_required)
+{
+	int err;
+	struct device_node *model_dt_node = get_model_dt_node(model);
+	const char *string_prop_value;
+	char *origin;
+
+	err = of_property_read_string(model_dt_node, name,
+				      &string_prop_value);
+	if (err && dt_required) {
+		strncpy(addr, "", size - 1);
+		dev_warn(model->kbdev->dev,
+			 "Error %d, no DT entry: %s.%s = \'%s\'\n",
+			 err, model->ops->name, name, addr);
+		err = 0;
+		origin = "zero";
+	} else if (err && !dt_required) {
+		origin = "default";
+	} else /* !err */ {
+		strncpy(addr, string_prop_value, size - 1);
+		origin = "DT";
+	}
+
+	addr[size - 1] = '\0';
+
+	dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n",
+		model->ops->name, name, string_prop_value, origin);
+
+	err = kbase_ipa_model_param_add(model, name, addr, size,
+					PARAM_TYPE_STRING);
+
+	return err;
+}
+
+void kbase_ipa_term_model(struct kbase_ipa_model *model)
+{
+	if (!model)
+		return;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (model->ops->term)
+		model->ops->term(model);
+
+	kbase_ipa_model_param_free_all(model);
+
+	kfree(model);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_term_model);
+
+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
+					     struct kbase_ipa_model_ops *ops)
+{
+	struct kbase_ipa_model *model;
+	int err;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	if (!ops || !ops->name)
+		return NULL;
+
+	model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL);
+	if (!model)
+		return NULL;
+
+	model->kbdev = kbdev;
+	model->ops = ops;
+	INIT_LIST_HEAD(&model->params);
+
+	err = model->ops->init(model);
+	if (err) {
+		dev_err(kbdev->dev,
+			"init of power model \'%s\' returned error %d\n",
+			ops->name, err);
+		kfree(model);
+		return NULL;
+	}
+
+	err = kbase_ipa_model_recalculate(model);
+	if (err) {
+		kbase_ipa_term_model(model);
+		return NULL;
+	}
+
+	return model;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_init_model);
+
+static void kbase_ipa_term_locked(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	/* Clean up the models */
+	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
+		kbase_ipa_term_model(kbdev->ipa.configured_model);
+	kbase_ipa_term_model(kbdev->ipa.fallback_model);
+
+	kbdev->ipa.configured_model = NULL;
+	kbdev->ipa.fallback_model = NULL;
+}
+
+int kbase_ipa_init(struct kbase_device *kbdev)
+{
+
+	const char *model_name;
+	struct kbase_ipa_model_ops *ops;
+	struct kbase_ipa_model *default_model = NULL;
+	int err;
+
+	mutex_init(&kbdev->ipa.lock);
+	/*
+	 * Lock during init to avoid warnings from lockdep_assert_held (there
+	 * shouldn't be any concurrent access yet).
+	 */
+	mutex_lock(&kbdev->ipa.lock);
+
+	/* The simple IPA model must *always* be present.*/
+	ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME);
+
+	if (!ops->do_utilization_scaling_in_framework) {
+		dev_err(kbdev->dev,
+			"Fallback IPA model %s should not account for utilization\n",
+			ops->name);
+		err = -EINVAL;
+		goto end;
+	}
+
+	default_model = kbase_ipa_init_model(kbdev, ops);
+	if (!default_model) {
+		err = -EINVAL;
+		goto end;
+	}
+
+	kbdev->ipa.fallback_model = default_model;
+	err = of_property_read_string(kbdev->dev->of_node,
+				      "ipa-model",
+				      &model_name);
+	if (err) {
+		/* Attempt to load a match from GPU-ID */
+		u32 gpu_id;
+
+		gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		model_name = kbase_ipa_model_name_from_id(gpu_id);
+		dev_dbg(kbdev->dev,
+			"Inferring model from GPU ID 0x%x: \'%s\'\n",
+			gpu_id, model_name);
+		err = 0;
+	} else {
+		dev_dbg(kbdev->dev,
+			"Using ipa-model parameter from DT: \'%s\'\n",
+			model_name);
+	}
+
+	if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) {
+		ops = kbase_ipa_model_ops_find(kbdev, model_name);
+		kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops);
+		if (!kbdev->ipa.configured_model) {
+			err = -EINVAL;
+			goto end;
+		}
+	} else {
+		kbdev->ipa.configured_model = default_model;
+	}
+
+	kbase_ipa_model_use_configured_locked(kbdev);
+
+end:
+	if (err)
+		kbase_ipa_term_locked(kbdev);
+	else
+		dev_info(kbdev->dev,
+			 "Using configured power model %s, and fallback %s\n",
+			 kbdev->ipa.configured_model->ops->name,
+			 kbdev->ipa.fallback_model->ops->name);
+
+	mutex_unlock(&kbdev->ipa.lock);
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_init);
+
+void kbase_ipa_term(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->ipa.lock);
+	kbase_ipa_term_locked(kbdev);
+	mutex_unlock(&kbdev->ipa.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_term);
+
+/**
+ * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP
+ * @c:		Dynamic model coefficient, in pW/(Hz V^2). Should be in range
+ *		0 < c < 2^26 to prevent overflow.
+ * @freq:	Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz)
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Keep a record of the approximate range of each value at every stage of the
+ * calculation, to ensure we don't overflow. This makes heavy use of the
+ * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual
+ * calculations in decimal for increased accuracy.
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq,
+				     const u32 voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const u32 v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^3 < f_MHz < 2^10 MHz */
+	const u32 f_MHz = freq / 1000000;
+
+	/* Range: 2^11 < v2f_big < 2^26 kHz V^2 */
+	const u32 v2f_big = v2 * f_MHz;
+
+	/* Range: 2^1 < v2f < 2^16 MHz V^2 */
+	const u32 v2f = v2f_big / 1000;
+
+	/* Range (working backwards from next line): 0 < v2fc < 2^23 uW.
+	 * Must be < 2^42 to avoid overflowing the return value. */
+	const u64 v2fc = (u64) c * (u64) v2f;
+
+	/* Range: 0 < v2fc / 1000 < 2^13 mW */
+	return div_u64(v2fc, 1000);
+}
+
+/**
+ * kbase_scale_static_power() - Scale a static power coefficient to an OPP
+ * @c:		Static model coefficient, in uW/V^3. Should be in range
+ *		0 < c < 2^32 to prevent overflow.
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+u32 kbase_scale_static_power(const u32 c, const u32 voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const u32 v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^17 < v3_big < 2^29 m(V^2) mV */
+	const u32 v3_big = v2 * voltage;
+
+	/* Range: 2^7 < v3 < 2^19 m(V^3) */
+	const u32 v3 = v3_big / 1000;
+
+	/*
+	 * Range (working backwards from next line): 0 < v3c_big < 2^33 nW.
+	 * The result should be < 2^52 to avoid overflowing the return value.
+	 */
+	const u64 v3c_big = (u64) c * (u64) v3;
+
+	/* Range: 0 < v3c_big / 1000000 < 2^13 mW */
+	return div_u64(v3c_big, 1000000);
+}
+
+static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	if (atomic_read(&kbdev->ipa_use_configured_model))
+		return kbdev->ipa.configured_model;
+	else
+		return kbdev->ipa.fallback_model;
+}
+
+static u32 get_static_power_locked(struct kbase_device *kbdev,
+				   struct kbase_ipa_model *model,
+				   unsigned long voltage)
+{
+	u32 power = 0;
+	int err;
+	u32 power_coeff;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (!model->ops->get_static_coeff)
+		model = kbdev->ipa.fallback_model;
+
+	if (model->ops->get_static_coeff) {
+		err = model->ops->get_static_coeff(model, &power_coeff);
+		if (!err)
+			power = kbase_scale_static_power(power_coeff,
+							 (u32) voltage);
+	}
+
+	return power;
+}
+
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static unsigned long kbase_get_static_power(struct devfreq *df,
+					    unsigned long voltage)
+#else
+static unsigned long kbase_get_static_power(unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+	u32 power = 0;
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = get_current_model(kbdev);
+	power = get_static_power_locked(kbdev, model, voltage);
+
+	mutex_unlock(&kbdev->ipa.lock);
+
+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+	kbase_release_device(kbdev);
+#endif
+
+	return power;
+}
+
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static unsigned long kbase_get_dynamic_power(struct devfreq *df,
+					     unsigned long freq,
+					     unsigned long voltage)
+#else
+static unsigned long kbase_get_dynamic_power(unsigned long freq,
+					     unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+	u32 power_coeff = 0, power = 0;
+	int err = 0;
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = kbdev->ipa.fallback_model;
+
+	err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+
+	if (!err)
+		power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+	else
+		dev_err_ratelimited(kbdev->dev,
+				    "Model %s returned error code %d\n",
+				    model->ops->name, err);
+
+	mutex_unlock(&kbdev->ipa.lock);
+
+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+	kbase_release_device(kbdev);
+#endif
+
+	return power;
+}
+
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage)
+{
+	struct kbase_ipa_model *model;
+	u32 power_coeff = 0;
+	int err = 0;
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = get_current_model(kbdev);
+
+	err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+
+	/* If we switch to protected model between get_current_model() and
+	 * get_dynamic_coeff(), counter reading could fail. If that happens
+	 * (unlikely, but possible), revert to the fallback model. */
+	if (err && model != kbdev->ipa.fallback_model) {
+		model = kbdev->ipa.fallback_model;
+		err = model->ops->get_dynamic_coeff(model, &power_coeff, freq);
+	}
+
+	if (err)
+		goto exit_unlock;
+
+	*power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+
+	if (model->ops->do_utilization_scaling_in_framework) {
+		struct devfreq_dev_status *status = &df->last_status;
+		unsigned long total_time = max(status->total_time, 1ul);
+		u64 busy_time = min(status->busy_time, total_time);
+
+		*power = div_u64((u64) *power * (u64) busy_time, total_time);
+	}
+
+	*power += get_static_power_locked(kbdev, model, voltage);
+
+exit_unlock:
+	mutex_unlock(&kbdev->ipa.lock);
+
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_get_real_power);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops kbase_ipa_power_model_ops = {
+#else
+struct devfreq_cooling_power kbase_ipa_power_model_ops = {
+#endif
+	.get_static_power = &kbase_get_static_power,
+	.get_dynamic_power = &kbase_get_dynamic_power,
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	.get_real_power = &kbase_get_real_power,
+#endif
+};
+KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
new file mode 100644
index 0000000..469f33c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
@@ -0,0 +1,165 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IPA_H_
+#define _KBASE_IPA_H_
+
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+
+struct devfreq;
+
+struct kbase_ipa_model {
+	struct list_head link;
+	struct kbase_device *kbdev;
+	void *model_data;
+	struct kbase_ipa_model_ops *ops;
+	struct list_head params;
+	bool missing_dt_node_warning;
+};
+
+/**
+ * kbase_ipa_model_add_param_s32 - Add an integer model parameter
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @addr:	address where the value is stored
+ * @num_elems:	number of elements (1 if not an array)
+ * @dt_required: if false, a corresponding devicetree entry is not required,
+ *		 and the current value will be used. If true, a warning is
+ *		 output and the data is zeroed
+ *
+ * Return: 0 on success, or an error code
+ */
+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
+				  const char *name, s32 *addr,
+				  size_t num_elems, bool dt_required);
+
+/**
+ * kbase_ipa_model_add_param_string - Add a string model parameter
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @addr:	address where the value is stored
+ * @size:	size, in bytes, of the value storage (so the maximum string
+ *		length is size - 1)
+ * @dt_required: if false, a corresponding devicetree entry is not required,
+ *		 and the current value will be used. If true, a warning is
+ *		 output and the data is zeroed
+ *
+ * Return: 0 on success, or an error code
+ */
+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
+				     const char *name, char *addr,
+				     size_t size, bool dt_required);
+
+struct kbase_ipa_model_ops {
+	char *name;
+	/* The init, recalculate and term ops on the default model are always
+	 * called.  However, all the other models are only invoked if the model
+	 * is selected in the device tree. Otherwise they are never
+	 * initialized. Additional resources can be acquired by models in
+	 * init(), however they must be terminated in the term().
+	 */
+	int (*init)(struct kbase_ipa_model *model);
+	/* Called immediately after init(), or when a parameter is changed, so
+	 * that any coefficients derived from model parameters can be
+	 * recalculated. */
+	int (*recalculate)(struct kbase_ipa_model *model);
+	void (*term)(struct kbase_ipa_model *model);
+	/*
+	 * get_dynamic_coeff() - calculate dynamic power coefficient
+	 * @model:		pointer to model
+	 * @coeffp:		pointer to return value location
+	 * @current_freq:	frequency the GPU has been running at for the
+	 *			previous sampling period.
+	 *
+	 * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which
+	 * is then scaled by the IPA framework according to the current OPP's
+	 * frequency and voltage.
+	 *
+	 * Return: 0 on success, or an error code.
+	 */
+	int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp,
+				 u32 current_freq);
+	/*
+	 * get_static_coeff() - calculate static power coefficient
+	 * @model:		pointer to model
+	 * @coeffp:		pointer to return value location
+	 *
+	 * Calculate a static power coefficient, with units uW/(V^3), which is
+	 * scaled by the IPA framework according to the current OPP's voltage.
+	 *
+	 * Return: 0 on success, or an error code.
+	 */
+	int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
+	/* If false, the model's get_dynamic_coeff() method accounts for how
+	 * long the GPU was active over the sample period. If true, the
+	 * framework will scale the calculated power according to the
+	 * utilization stats recorded by devfreq in get_real_power(). */
+	bool do_utilization_scaling_in_framework;
+};
+
+/* Models can be registered only in the platform's platform_init_func call */
+int kbase_ipa_model_ops_register(struct kbase_device *kbdev,
+			     struct kbase_ipa_model_ops *new_model_ops);
+struct kbase_ipa_model *kbase_ipa_get_model(struct kbase_device *kbdev,
+					    const char *name);
+
+int kbase_ipa_init(struct kbase_device *kbdev);
+void kbase_ipa_term(struct kbase_device *kbdev);
+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev);
+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev);
+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
+					     struct kbase_ipa_model_ops *ops);
+void kbase_ipa_term_model(struct kbase_ipa_model *model);
+
+extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_get_real_power() - get the real power consumption of the GPU
+ * @df: dynamic voltage and frequency scaling information for the GPU.
+ * @power: where to store the power consumption, in mW.
+ * @freq: a frequency, in HZ.
+ * @voltage: a voltage, in mV.
+ *
+ * This function is only exposed for use by unit tests. The returned value
+ * incorporates both static and dynamic power consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage);
+#endif /* MALI_UNIT_TEST */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops kbase_ipa_power_model_ops;
+#else
+extern struct devfreq_cooling_power kbase_ipa_power_model_ops;
+#endif
+
+#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+static inline void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
+{ }
+
+static inline void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
+{ }
+
+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
new file mode 100644
index 0000000..d3ac7c3
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
@@ -0,0 +1,249 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_ipa.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0))
+#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
+#endif
+
+struct kbase_ipa_model_param {
+	char *name;
+	union {
+		void *voidp;
+		s32 *s32p;
+		char *str;
+	} addr;
+	size_t size;
+	enum kbase_ipa_model_param_type type;
+	struct kbase_ipa_model *model;
+	struct list_head link;
+};
+
+static int param_int_get(void *data, u64 *val)
+{
+	struct kbase_ipa_model_param *param = data;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	*(s64 *) val = *param->addr.s32p;
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return 0;
+}
+
+static int param_int_set(void *data, u64 val)
+{
+	struct kbase_ipa_model_param *param = data;
+	struct kbase_ipa_model *model = param->model;
+	s64 sval = (s64) val;
+	int err = 0;
+
+	if (sval < S32_MIN || sval > S32_MAX)
+		return -ERANGE;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	*param->addr.s32p = val;
+	err = kbase_ipa_model_recalculate(model);
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return err;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n");
+
+static ssize_t param_string_get(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct kbase_ipa_model_param *param = file->private_data;
+	ssize_t ret;
+	size_t len;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	len = strnlen(param->addr.str, param->size - 1) + 1;
+	ret = simple_read_from_buffer(user_buf, count, ppos,
+				      param->addr.str, len);
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return ret;
+}
+
+static ssize_t param_string_set(struct file *file, const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct kbase_ipa_model_param *param = file->private_data;
+	struct kbase_ipa_model *model = param->model;
+	ssize_t ret = count;
+	size_t buf_size;
+	int err;
+
+	mutex_lock(&model->kbdev->ipa.lock);
+
+	if (count > param->size) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	buf_size = min(param->size - 1, count);
+	if (copy_from_user(param->addr.str, user_buf, buf_size)) {
+		ret = -EFAULT;
+		goto end;
+	}
+
+	param->addr.str[buf_size] = '\0';
+
+	err = kbase_ipa_model_recalculate(model);
+	if (err < 0)
+		ret = err;
+
+end:
+	mutex_unlock(&model->kbdev->ipa.lock);
+
+	return ret;
+}
+
+static const struct file_operations fops_string = {
+	.read = param_string_get,
+	.write = param_string_set,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
+			      void *addr, size_t size,
+			      enum kbase_ipa_model_param_type type)
+{
+	struct kbase_ipa_model_param *param;
+
+	param = kzalloc(sizeof(*param), GFP_KERNEL);
+
+	if (!param)
+		return -ENOMEM;
+
+	/* 'name' is stack-allocated for array elements, so copy it into
+	 * heap-allocated storage */
+	param->name = kstrdup(name, GFP_KERNEL);
+
+	if (!param->name) {
+		kfree(param);
+		return -ENOMEM;
+	}
+
+	param->addr.voidp = addr;
+	param->size = size;
+	param->type = type;
+	param->model = model;
+
+	list_add(&param->link, &model->params);
+
+	return 0;
+}
+
+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_param *param_p, *param_n;
+
+	list_for_each_entry_safe(param_p, param_n, &model->params, link) {
+		list_del(&param_p->link);
+		kfree(param_p->name);
+		kfree(param_p);
+	}
+}
+
+static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
+{
+	struct list_head *it;
+	struct dentry *dir;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	dir = debugfs_create_dir(model->ops->name,
+				 model->kbdev->mali_debugfs_directory);
+
+	if (!dir) {
+		dev_err(model->kbdev->dev,
+			"Couldn't create mali debugfs %s directory",
+			model->ops->name);
+		return;
+	}
+
+	list_for_each(it, &model->params) {
+		struct kbase_ipa_model_param *param =
+				list_entry(it,
+					   struct kbase_ipa_model_param,
+					   link);
+		const struct file_operations *fops = NULL;
+
+		switch (param->type) {
+		case PARAM_TYPE_S32:
+			fops = &fops_s32;
+			break;
+		case PARAM_TYPE_STRING:
+			fops = &fops_string;
+			break;
+		}
+
+		if (unlikely(!fops)) {
+			dev_err(model->kbdev->dev,
+				"Type not set for %s parameter %s\n",
+				model->ops->name, param->name);
+		} else {
+			debugfs_create_file(param->name, S_IRUGO | S_IWUSR,
+					    dir, param, fops);
+		}
+	}
+}
+
+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+	const char *name, s32 val)
+{
+	struct kbase_ipa_model_param *param;
+
+	mutex_lock(&model->kbdev->ipa.lock);
+
+	list_for_each_entry(param, &model->params, link) {
+		if (!strcmp(param->name, name)) {
+			if (param->type == PARAM_TYPE_S32) {
+				*param->addr.s32p = val;
+			} else {
+				dev_err(model->kbdev->dev,
+					"Wrong type for %s parameter %s\n",
+					model->ops->name, param->name);
+			}
+			break;
+		}
+	}
+
+	mutex_unlock(&model->kbdev->ipa.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32);
+
+void kbase_ipa_debugfs_init(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->ipa.lock);
+
+	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
+		kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model);
+	kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model);
+
+	mutex_unlock(&kbdev->ipa.lock);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h
new file mode 100644
index 0000000..f624de9
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IPA_DEBUGFS_H_
+#define _KBASE_IPA_DEBUGFS_H_
+
+enum kbase_ipa_model_param_type {
+	PARAM_TYPE_S32 = 1,
+	PARAM_TYPE_STRING,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbase_ipa_debugfs_init(struct kbase_device *kbdev);
+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
+			      void *addr, size_t size,
+			      enum kbase_ipa_model_param_type type);
+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_model_param_set_s32 - Set an integer model parameter
+ *
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @val:	new value of the parameter
+ *
+ * This function is only exposed for use by unit tests running in
+ * kernel space. Normally it is expected that parameter values will
+ * instead be set via debugfs.
+ */
+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+	const char *name, s32 val);
+
+#else /* CONFIG_DEBUG_FS */
+
+static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model,
+					    const char *name, void *addr,
+					    size_t size,
+					    enum kbase_ipa_model_param_type type)
+{
+	return 0;
+}
+
+static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
+{ }
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* _KBASE_IPA_DEBUGFS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
new file mode 100644
index 0000000..70e08b3
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
@@ -0,0 +1,323 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/thermal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#include <linux/of.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_ipa_simple.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#if MALI_UNIT_TEST
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+static unsigned long dummy_temp;
+
+static int kbase_simple_power_model_get_dummy_temp(
+	struct thermal_zone_device *tz,
+	unsigned long *temp)
+{
+	*temp = ACCESS_ONCE(dummy_temp);
+	return 0;
+}
+
+#else
+static int dummy_temp;
+
+static int kbase_simple_power_model_get_dummy_temp(
+	struct thermal_zone_device *tz,
+	int *temp)
+{
+	*temp = ACCESS_ONCE(dummy_temp);
+	return 0;
+}
+#endif
+
+/* Intercept calls to the kernel function using a macro */
+#ifdef thermal_zone_get_temp
+#undef thermal_zone_get_temp
+#endif
+#define thermal_zone_get_temp(tz, temp) \
+	kbase_simple_power_model_get_dummy_temp(tz, temp)
+
+void kbase_simple_power_model_set_dummy_temp(int temp)
+{
+	ACCESS_ONCE(dummy_temp) = temp;
+}
+KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp);
+
+#endif /* MALI_UNIT_TEST */
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms. The additional resources in this model
+ * should preferably be minimal, as this model is rarely used when a dynamic
+ * model is available.
+ */
+
+/**
+ * struct kbase_ipa_model_simple_data - IPA context per device
+ * @dynamic_coefficient: dynamic coefficient of the model
+ * @static_coefficient:  static coefficient of the model
+ * @ts:                  Thermal scaling coefficients of the model
+ * @tz_name:             Thermal zone name
+ * @gpu_tz:              thermal zone device
+ * @poll_temperature_thread: Handle for temperature polling thread
+ * @current_temperature: Most recent value of polled temperature
+ * @temperature_poll_interval_ms: How often temperature should be checked, in ms
+ */
+
+struct kbase_ipa_model_simple_data {
+	u32 dynamic_coefficient;
+	u32 static_coefficient;
+	s32 ts[4];
+	char tz_name[16];
+	struct thermal_zone_device *gpu_tz;
+	struct task_struct *poll_temperature_thread;
+	int current_temperature;
+	int temperature_poll_interval_ms;
+};
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+/**
+ * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient
+ * @ts:		Signed coefficients, in order t^0 to t^3, with units Deg^-N
+ * @t:		Temperature, in mDeg C. Range: -2^17 < t < 2^17
+ *
+ * Scale the temperature according to a cubic polynomial whose coefficients are
+ * provided in the device tree. The result is used to scale the static power
+ * coefficient, where 1000000 means no change.
+ *
+ * Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000.
+ */
+static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t)
+{
+	/* Range: -2^24 < t2 < 2^24 m(Deg^2) */
+	const s64 t2 = div_s64((t * t), 1000);
+
+	/* Range: -2^31 < t3 < 2^31 m(Deg^3) */
+	const s64 t3 = div_s64((t * t2), 1000);
+
+	/*
+	 * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in
+	 * Deg^-N, so we need to multiply the last coefficient by 1000.
+	 * Range: -2^63 < res_big < 2^63
+	 */
+	const s64 res_big = ts[3] * t3    /* +/- 2^62 */
+			  + ts[2] * t2    /* +/- 2^55 */
+			  + ts[1] * t     /* +/- 2^48 */
+			  + ts[0] * 1000; /* +/- 2^41 */
+
+	/* Range: -2^60 < res_unclamped < 2^60 */
+	s64 res_unclamped = div_s64(res_big, 1000);
+
+	/* Clamp to range of 0x to 10x the static power */
+	return clamp(res_unclamped, (s64) 0, (s64) 10000000);
+}
+
+/* We can't call thermal_zone_get_temp() directly in model_static_coeff(),
+ * because we don't know if tz->lock is held in the same thread. So poll it in
+ * a separate thread to get around this. */
+static int poll_temperature(void *data)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *) data;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temp;
+#else
+	int temp;
+#endif
+
+	while (!kthread_should_stop()) {
+		struct thermal_zone_device *tz = ACCESS_ONCE(model_data->gpu_tz);
+
+		if (tz) {
+			int ret;
+
+			ret = thermal_zone_get_temp(tz, &temp);
+			if (ret) {
+				pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+						    ret);
+				temp = FALLBACK_STATIC_TEMPERATURE;
+			}
+		} else {
+			temp = FALLBACK_STATIC_TEMPERATURE;
+		}
+
+		ACCESS_ONCE(model_data->current_temperature) = temp;
+
+		msleep_interruptible(ACCESS_ONCE(model_data->temperature_poll_interval_ms));
+	}
+
+	return 0;
+}
+
+static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	u32 temp_scaling_factor;
+	struct kbase_ipa_model_simple_data *model_data =
+		(struct kbase_ipa_model_simple_data *) model->model_data;
+	u64 coeff_big;
+	int temp;
+
+	temp = ACCESS_ONCE(model_data->current_temperature);
+
+	/* Range: 0 <= temp_scaling_factor < 2^24 */
+	temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts,
+							    temp);
+
+	/*
+	 * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This
+	 * means static_coefficient must be in range
+	 * 0 <= static_coefficient < 2^28.
+	 */
+	coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor;
+	*coeffp = div_u64(coeff_big, 1000000);
+
+	return 0;
+}
+
+static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
+			       u32 current_freq)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+		(struct kbase_ipa_model_simple_data *) model->model_data;
+
+	*coeffp = model_data->dynamic_coefficient;
+
+	return 0;
+}
+
+static int add_params(struct kbase_ipa_model *model)
+{
+	int err = 0;
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+
+	err = kbase_ipa_model_add_param_s32(model, "static-coefficient",
+					    &model_data->static_coefficient,
+					    1, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient",
+					    &model_data->dynamic_coefficient,
+					    1, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_s32(model, "ts",
+					    model_data->ts, 4, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_string(model, "thermal-zone",
+					       model_data->tz_name,
+					       sizeof(model_data->tz_name), true);
+	if (err)
+		goto end;
+
+	model_data->temperature_poll_interval_ms = 200;
+	err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms",
+					    &model_data->temperature_poll_interval_ms,
+					    1, false);
+
+end:
+	return err;
+}
+
+static int kbase_simple_power_model_init(struct kbase_ipa_model *model)
+{
+	int err;
+	struct kbase_ipa_model_simple_data *model_data;
+
+	model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data),
+			     GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model->model_data = (void *) model_data;
+
+	model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE;
+	model_data->poll_temperature_thread = kthread_run(poll_temperature,
+							  (void *) model_data,
+							  "mali-simple-power-model-temp-poll");
+	if (IS_ERR(model_data->poll_temperature_thread)) {
+		kfree(model_data);
+		return PTR_ERR(model_data->poll_temperature_thread);
+	}
+
+	err = add_params(model);
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kthread_stop(model_data->poll_temperature_thread);
+		kfree(model_data);
+	}
+
+	return err;
+}
+
+static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+	struct thermal_zone_device *tz;
+
+	if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) {
+		tz = NULL;
+	} else {
+		tz = thermal_zone_get_zone_by_name(model_data->tz_name);
+
+		if (IS_ERR_OR_NULL(tz)) {
+			pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n",
+					    PTR_ERR(tz), model_data->tz_name);
+			tz = NULL;
+			return -EPROBE_DEFER;
+		}
+	}
+
+	ACCESS_ONCE(model_data->gpu_tz) = tz;
+
+	return 0;
+}
+
+static void kbase_simple_power_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+
+	kthread_stop(model_data->poll_temperature_thread);
+
+	kfree(model_data);
+}
+
+struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = {
+		.name = "mali-simple-power-model",
+		.init = &kbase_simple_power_model_init,
+		.recalculate = &kbase_simple_power_model_recalculate,
+		.term = &kbase_simple_power_model_term,
+		.get_dynamic_coeff = &model_dynamic_coeff,
+		.get_static_coeff = &model_static_coeff,
+		.do_utilization_scaling_in_framework = true,
+};
+KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h
new file mode 100644
index 0000000..e78d617
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IPA_SIMPLE_H_
+#define _KBASE_IPA_SIMPLE_H_
+
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+
+extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops;
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value
+ * @temp: Temperature of the thermal zone, in millidegrees celsius.
+ *
+ * This is only intended for use in unit tests, to ensure that the temperature
+ * values used by the simple power model are predictable. Deterministic
+ * behavior is necessary to allow validation of the static power values
+ * computed by this model.
+ */
+void kbase_simple_power_model_set_dummy_temp(int temp);
+#endif /* MALI_UNIT_TEST */
+
+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+#endif /* _KBASE_IPA_SIMPLE_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
new file mode 100644
index 0000000..9b9fa0e
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
@@ -0,0 +1,229 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_ipa_vinstr_common.h"
+
+#if MALI_UNIT_TEST
+static ktime_t dummy_time;
+
+/* Intercept calls to the kernel function using a macro */
+#ifdef ktime_get
+#undef ktime_get
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+#define ktime_get() (ACCESS_ONCE(dummy_time))
+
+void kbase_ipa_set_dummy_time(ktime_t t)
+{
+	ACCESS_ONCE(dummy_time) = t;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_set_dummy_time);
+#else
+#define ktime_get() (READ_ONCE(dummy_time))
+
+void kbase_ipa_set_dummy_time(ktime_t t)
+{
+	WRITE_ONCE(dummy_time, t);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_set_dummy_time);
+
+#endif
+
+#endif /* MALI_UNIT_TEST */
+
+/**
+ * read_hwcnt() - read a counter value
+ * @model_data:		pointer to model data
+ * @offset:		offset, in bytes, into vinstr buffer
+ *
+ * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be
+ * incrementing every cycle over a ~100ms sample period at a high frequency,
+ * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27.
+ */
+static inline u32 kbase_ipa_read_hwcnt(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	u32 offset)
+{
+	u8 *p = model_data->vinstr_buffer;
+
+	return *(u32 *)&p[offset];
+}
+
+static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
+{
+	if (S64_MAX - a < b)
+		return S64_MAX;
+	return a + b;
+}
+
+s64 kbase_ipa_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	u64 core_mask;
+	u32 base = 0;
+	s64 ret = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			/* 0 < counter_value < 2^27 */
+			u32 counter_value = kbase_ipa_read_hwcnt(model_data,
+						       base + counter);
+
+			/* 0 < ret < 2^27 * max_num_cores = 2^32 */
+			ret = kbase_ipa_add_saturate(ret, counter_value);
+		}
+		base += KBASE_IPA_NR_BYTES_PER_BLOCK;
+		core_mask >>= 1;
+	}
+
+	/* Range: -2^54 < ret < 2^54 */
+	ret *= coeff;
+
+	return div_s64(ret, 1000000);
+}
+
+s64 kbase_ipa_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	/* Range: 0 < counter_value < 2^27 */
+	const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter);
+
+	/* Range: -2^49 < ret < 2^49 */
+	const s64 multiplied = (s64) counter_value * (s64) coeff;
+
+	/* Range: -2^29 < return < 2^29 */
+	return div_s64(multiplied, 1000000);
+}
+
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	size_t dump_size;
+
+	dump_size = kbase_vinstr_dump_size(kbdev);
+	model_data->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!model_data->vinstr_buffer) {
+		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+		return -1;
+	}
+
+	setup.jm_bm = ~0u;
+	setup.shader_bm = ~0u;
+	setup.tiler_bm = ~0u;
+	setup.mmu_l2_bm = ~0u;
+	model_data->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
+			&setup, model_data->vinstr_buffer);
+	if (!model_data->vinstr_cli) {
+		dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
+		kfree(model_data->vinstr_buffer);
+		model_data->vinstr_buffer = NULL;
+		return -1;
+	}
+
+	model_data->last_sample_read_time = ktime_get();
+	kbase_vinstr_hwc_clear(model_data->vinstr_cli);
+
+	return 0;
+}
+
+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	if (model_data->vinstr_cli)
+		kbase_vinstr_detach_client(model_data->vinstr_cli);
+	model_data->vinstr_cli = NULL;
+	kfree(model_data->vinstr_buffer);
+	model_data->vinstr_buffer = NULL;
+}
+
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
+	u32 current_freq)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+	s64 energy = 0;
+	size_t i;
+	ktime_t now = ktime_get();
+	ktime_t time_since_last_sample =
+			ktime_sub(now, model_data->last_sample_read_time);
+	/* Range: 2^0 < time_since_last_sample_ms < 2^10 (1-1000ms) */
+	s64 time_since_last_sample_ms = ktime_to_ms(time_since_last_sample);
+	u64 coeff = 0;
+	u64 num_cycles;
+	int err = 0;
+
+	err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
+				    BASE_HWCNT_READER_EVENT_MANUAL);
+	if (err)
+		goto err0;
+
+	model_data->last_sample_read_time = now;
+
+	/* Range of 'energy' is +/- 2^34 * number of IPA groups, so around
+	 * -2^38 < energy < 2^38 */
+	for (i = 0; i < model_data->groups_def_num; i++) {
+		const struct kbase_ipa_group *group = &model_data->groups_def[i];
+		s32 coeff, group_energy;
+
+		coeff = model_data->group_values[i];
+		group_energy = group->op(model_data, coeff, group->counter_block_offset);
+
+		energy = kbase_ipa_add_saturate(energy, group_energy);
+	}
+
+	/* Range: 0 <= coeff < 2^38 */
+	if (energy > 0)
+		coeff = energy;
+
+	/* Scale by user-specified factor and divide by 1000. But actually
+	 * cancel the division out, because we want the num_cycles in KHz and
+	 * don't want to lose precision. */
+
+	/* Range: 0 < coeff < 2^53 */
+	coeff = coeff * model_data->scaling_factor;
+
+	if (time_since_last_sample_ms == 0) {
+		time_since_last_sample_ms = 1;
+	} else if (time_since_last_sample_ms < 0) {
+		err = -ERANGE;
+		goto err0;
+	}
+
+	/* Range: 2^20 < num_cycles < 2^40 mCycles */
+	num_cycles = (u64) current_freq * (u64) time_since_last_sample_ms;
+	/* Range: 2^10 < num_cycles < 2^30 Cycles */
+	num_cycles = div_u64(num_cycles, 1000000);
+
+	/* num_cycles should never be 0 in _normal_ usage (because we expect
+	 * frequencies on the order of MHz and >10ms polling intervals), but
+	 * protect against divide-by-zero anyway. */
+	if (num_cycles == 0)
+		num_cycles = 1;
+
+	/* Range: 0 < coeff < 2^43 */
+	coeff = div_u64(coeff, num_cycles);
+
+err0:
+	/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
+	*coeffp = clamp(coeff, (u64) 0, (u64) 1 << 16);
+	return err;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
new file mode 100644
index 0000000..d212c87
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
@@ -0,0 +1,163 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IPA_VINSTR_COMMON_H_
+#define _KBASE_IPA_VINSTR_COMMON_H_
+
+#include "mali_kbase.h"
+
+/* Maximum length for the name of an IPA group. */
+#define KBASE_IPA_MAX_GROUP_NAME_LEN 15
+
+/* Maximum number of IPA groups for an IPA model. */
+#define KBASE_IPA_MAX_GROUP_DEF_NUM  16
+
+/* Number of bytes per hardware counter in a vinstr_buffer. */
+#define KBASE_IPA_NR_BYTES_PER_CNT    4
+
+/* Number of hardware counters per block in a vinstr_buffer. */
+#define KBASE_IPA_NR_CNT_PER_BLOCK   64
+
+/* Number of bytes per block in a vinstr_buffer. */
+#define KBASE_IPA_NR_BYTES_PER_BLOCK \
+	(KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT)
+
+
+
+/**
+ * struct kbase_ipa_model_vinstr_data - IPA context per device
+ * @kbdev:               pointer to kbase device
+ * @groups_def:          Array of IPA groups.
+ * @groups_def_num:      Number of elements in the array of IPA groups.
+ * @vinstr_cli:          vinstr client handle
+ * @vinstr_buffer:       buffer to dump hardware counters onto
+ * @last_sample_read_time: timestamp of last vinstr buffer read
+ * @scaling_factor:      user-specified power scaling factor. This is
+ *                       interpreted as a fraction where the denominator is
+ *                       1000. Range approx 0.0-32.0:
+ *                       0 < scaling_factor < 2^15
+ */
+struct kbase_ipa_model_vinstr_data {
+	struct kbase_device *kbdev;
+	s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM];
+	const struct kbase_ipa_group *groups_def;
+	size_t groups_def_num;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	ktime_t last_sample_read_time;
+	s32 scaling_factor;
+};
+
+/**
+ * struct ipa_group - represents a single IPA group
+ * @name:               name of the IPA group
+ * @default_value:      default value of coefficient for IPA group.
+ *                      Coefficients are interpreted as fractions where the
+ *                      denominator is 1000000.
+ * @op:                 which operation to be performed on the counter values
+ * @counter_block_offset:  block offset in bytes of the counter used to calculate energy for IPA group
+ */
+struct kbase_ipa_group {
+	char name[KBASE_IPA_MAX_GROUP_NAME_LEN + 1];
+	s32 default_value;
+	s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
+	u32 counter_block_offset;
+};
+
+/**
+ * sum_all_shader_cores() - sum a counter over all cores
+ * @model_data		pointer to model data
+ * @coeff		model coefficient. Unity is ~2^20, so range approx
+ * +/- 4.0: -2^22 < coeff < 2^22
+ * @counter     offset in bytes of the counter used to calculate energy for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter'
+ * across all shader cores.
+ *
+ * Return: Sum of counter values. Range: -2^34 < ret < 2^34
+ */
+s64 kbase_ipa_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * sum_single_counter() - sum a single counter
+ * @model_data		pointer to model data
+ * @coeff		model coefficient. Unity is ~2^20, so range approx
+ * +/- 4.0: -2^22 < coeff < 2^22
+ * @counter     offset in bytes of the counter used to calculate energy for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter'.
+ *
+ * Return: Counter value. Range: -2^34 < ret < 2^34
+ */
+s64 kbase_ipa_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * attach_vinstr() - attach a vinstr_buffer to an IPA model.
+ * @model_data		pointer to model data
+ *
+ * Attach a vinstr_buffer to an IPA model. The vinstr_buffer
+ * allows access to the hardware counters used to calculate
+ * energy consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
+
+/**
+ * detach_vinstr() - detach a vinstr_buffer from an IPA model.
+ * @model_data		pointer to model data
+ *
+ * Detach a vinstr_buffer from an IPA model.
+ */
+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
+
+/**
+ * kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters
+ * @model:		pointer to instantiated model
+ * @coeffp:		pointer to location where calculated power, in
+ *			pW/(Hz V^2), is stored.
+ * @current_freq:	frequency the GPU has been running at over the sample
+ *			period. In Hz. Range: 10 MHz < 1GHz,
+ *			2^20 < current_freq < 2^30
+ *
+ * This is a GPU-agnostic implementation of the get_dynamic_coeff()
+ * function of an IPA model. It relies on the model being populated
+ * with GPU-specific attributes at initialization time.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
+	u32 current_freq);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_ipa_set_dummy_time() - set a dummy monotonic time value
+ * @t: a monotonic time value
+ *
+ * This is only intended for use in unit tests, to ensure that the kernel time
+ * values used by a power model are predictable. Deterministic behavior is
+ * necessary to allow validation of the dynamic power values computed by the
+ * model.
+ */
+void kbase_ipa_set_dummy_time(ktime_t t);
+#endif /* MALI_UNIT_TEST */
+
+#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c
new file mode 100644
index 0000000..4e4c059
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g71.c
@@ -0,0 +1,251 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/thermal.h>
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+
+/* Performance counter blocks base offsets */
+#define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define TILER_BASE          (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define MEMSYS_BASE         (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+
+/* JM counter block offsets */
+#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT *  6)
+
+/* Tiler counter block offsets */
+#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
+
+/* MEMSYS counter block offsets */
+#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
+
+/* SC counter block offsets */
+#define SC_FRAG_ACTIVE      (KBASE_IPA_NR_BYTES_PER_CNT *  4)
+#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26)
+#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28)
+#define SC_TEX_COORD_ISSUE  (KBASE_IPA_NR_BYTES_PER_CNT * 40)
+#define SC_VARY_SLOT_32     (KBASE_IPA_NR_BYTES_PER_CNT * 50)
+#define SC_VARY_SLOT_16     (KBASE_IPA_NR_BYTES_PER_CNT * 51)
+#define SC_BEATS_RD_LSC     (KBASE_IPA_NR_BYTES_PER_CNT * 56)
+#define SC_BEATS_WR_LSC     (KBASE_IPA_NR_BYTES_PER_CNT * 61)
+#define SC_BEATS_WR_TIB     (KBASE_IPA_NR_BYTES_PER_CNT * 62)
+
+/** Maximum number of cores for which a single Memory System block of performance counters is present. */
+#define KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4)
+
+
+/**
+ * get_jm_counter() - get performance counter offset inside the Job Manager block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the Job Manager block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g71_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
+                                                u32 counter_block_offset)
+{
+	return JM_BASE + counter_block_offset;
+}
+
+/**
+ * get_memsys_counter() - get peformance counter offset inside the Memory System block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Memory System block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g71_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
+                                                    u32 counter_block_offset)
+{
+	/* The base address of Memory System performance counters is always the same, although their number
+	 * may vary based on the number of cores. For the moment it's ok to return a constant.
+	 */
+	return MEMSYS_BASE + counter_block_offset;
+}
+
+/**
+ * get_sc_counter() - get performance counter offset inside the Shader Cores block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Shader Cores block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g71_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
+                                                u32 counter_block_offset)
+{
+	const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ?
+	                    SC0_BASE_ONE_MEMSYS :
+	                    SC0_BASE_TWO_MEMSYS;
+
+	return sc_base + counter_block_offset;
+}
+
+/**
+ * memsys_single_counter() - calculate energy for a single Memory System performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Memory System performance counter.
+ */
+static s64 kbase_g71_memsys_single_counter(
+    struct kbase_ipa_model_vinstr_data *model_data,
+    s32 coeff,
+    u32 counter_block_offset)
+{
+	return kbase_ipa_single_counter(model_data, coeff,
+	                                kbase_g71_power_model_get_memsys_counter(model_data, counter_block_offset));
+}
+
+/**
+ * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a Shader Cores performance counter for all cores.
+ */
+static s64 kbase_g71_sum_all_shader_cores(
+    struct kbase_ipa_model_vinstr_data *model_data,
+    s32 coeff,
+    u32 counter_block_offset)
+{
+	return kbase_ipa_sum_all_shader_cores(model_data, coeff,
+	                                      kbase_g71_power_model_get_sc_counter(model_data, counter_block_offset));
+}
+
+/**
+ * jm_single_counter() - calculate energy for a single Job Manager performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Job Manager performance counter.
+ */
+static s64 kbase_g71_jm_single_counter(
+    struct kbase_ipa_model_vinstr_data *model_data,
+    s32 coeff,
+    u32 counter_block_offset)
+{
+	return kbase_ipa_single_counter(model_data, coeff,
+	                                kbase_g71_power_model_get_jm_counter(model_data, counter_block_offset));
+}
+
+/** Table of IPA group definitions.
+ *
+ * For each IPA group, this table defines a function to access the given performance block counter (or counters,
+ * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
+ */
+static const struct kbase_ipa_group ipa_groups_def[] = {
+	{
+		.name = "l2_access",
+		.default_value = 526300,
+		.op = kbase_g71_memsys_single_counter,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 301100,
+		.op = kbase_g71_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 197400,
+		.op = kbase_g71_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -156400,
+		.op = kbase_g71_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 115800,
+		.op = kbase_g71_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static int kbase_g71_power_model_init(struct kbase_ipa_model *model)
+{
+	int i, err = 0;
+	struct kbase_ipa_model_vinstr_data *model_data;
+
+	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model_data->kbdev = model->kbdev;
+	model_data->groups_def = ipa_groups_def;
+	BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def) > KBASE_IPA_MAX_GROUP_DEF_NUM);
+	model_data->groups_def_num = ARRAY_SIZE(ipa_groups_def);
+
+	model->model_data = (void *) model_data;
+
+	for (i = 0; i < ARRAY_SIZE(ipa_groups_def); ++i) {
+		const struct kbase_ipa_group *group = &ipa_groups_def[i];
+
+		model_data->group_values[i] = group->default_value;
+		err = kbase_ipa_model_add_param_s32(model, group->name,
+					&model_data->group_values[i],
+					1, false);
+		if (err)
+			goto exit;
+	}
+
+	model_data->scaling_factor = 5;
+	err = kbase_ipa_model_add_param_s32(model, "scale",
+					    &model_data->scaling_factor,
+					    1, false);
+	if (err)
+		goto exit;
+
+	err = kbase_ipa_attach_vinstr(model_data);
+
+exit:
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kfree(model_data);
+	}
+	return err;
+}
+
+static void kbase_g71_power_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+
+	kbase_ipa_detach_vinstr(model_data);
+	kfree(model_data);
+}
+
+
+struct kbase_ipa_model_ops kbase_g71_ipa_model_ops = {
+		.name = "mali-g71-power-model",
+		.init = kbase_g71_power_model_init,
+		.term = kbase_g71_power_model_term,
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff,
+		.do_utilization_scaling_in_framework = false,
+};
+KBASE_EXPORT_TEST_API(kbase_g71_ipa_model_ops);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100644
index 0000000..c077461
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,432 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tHEx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tSIx[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tDVx[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tNOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tGOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tKAx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tTRx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tBOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100644
index 0000000..58710f6
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,1193 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tKAx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tKAx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tBOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tBOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100644
index 0000000..94d1d74
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1802 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD),
+ * which defines a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/* Memory allocation, access/hint flags.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+	/* BASE_MEM_HINT flags have been removed, but their values are reserved
+	 * for backwards compatibility with older user-space drivers. The values
+	 * can be re-used once support for r5p0 user-space drivers is removed,
+	 * presumably in r7p0.
+	 *
+	 * RESERVED: (1U << 5)
+	 * RESERVED: (1U << 6)
+	 * RESERVED: (1U << 7)
+	 * RESERVED: (1U << 8)
+	 */
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* Should be cached on the CPU
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the alloc
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Secure memory
+ */
+#define BASE_MEM_SECURE ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/**
+ * Bit 19 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ **/
+#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+
+/* Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 20
+
+/* A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id.
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	address of imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	u64 ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completely unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/**
+ * No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/**
+ * Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  ((base_jd_core_req)1 << 1)
+#define BASE_JD_REQ_T   ((base_jd_core_req)1 << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  ((base_jd_core_req)1 << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   ((base_jd_core_req)1 << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
+ * the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
+ * the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/*
+ * Returns non-zero value if core requirements passed define a soft job or
+ * a dependency only job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
+	((core_req & BASE_JD_REQ_SOFT_JOB) || \
+	(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
+ * In order to keep the size of the structure same, padding field has been adjusted
+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
+ * for possible future use. */
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	u64 extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[1];
+	base_jd_core_req core_req;          /**< core requirements */
+} base_jd_atom_v2;
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @ref base_jd_submit and @ref base_jd_submit
+ * has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ *
+ * @pre @p fence must reference a @ref base_fence successfully initialized by
+ *      calling @ref base_fence_init.
+ * @pre @p fence was @e not initialized by calling @ref base_fence_import, nor
+ *      is it associated with a fence-trigger job that was already submitted
+ *      by calling @ref base_jd_submit.
+ * @post @p atom can be submitted by calling @ref base_jd_submit.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @ref base_jd_submit and
+ * @ref base_jd_submit has returned.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ *
+ * @pre @p fence must reference a @ref base_fence successfully initialized by
+ *      calling @ref base_fence_import, or it must be associated with a
+ *      fence-trigger job that was already submitted by calling
+ *      @ref base_jd_submit.
+ * @post @p atom can be submitted by calling @ref base_jd_submit.
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field
+ * of @ref base_jd_atom.
+ *
+ * It must not occupy the same CPU cache line(s) as any neighboring data.
+ * This is to avoid cases where access to pages containing the structure
+ * is shared between cached and un-cached memory regions, which would
+ * cause memory corruption.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[36];
+} base_dump_cpu_gpu_counters;
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistent guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ *  - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ *  so they won't accidentally pick another file with 'mali_t600' in its name
+ *  - When the build doesn't work, the System Integrator may think the DDK is
+ *  doesn't work, and attempt to fix it themselves:
+ *   - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ *   error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ *   you.
+ *   - For a \#include mechanism, checks must still be made elsewhere, which the
+ *   System Integrator may try working around by setting \#defines (such as
+ *   VA_BITS) themselves in their plat_config.h. In the  worst case, they may
+ *   set the prevention-mechanism \#define of
+ *   "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ *   - In this case, they would believe they are on the right track, because
+ *   the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+   digraph plat_config_mechanism {
+	   rankdir=BT
+	   size="6,6"
+
+       "mali_base.h";
+	   "gpu/mali_gpu.h";
+
+	   node [ shape=box ];
+	   {
+	       rank = same; ordering = out;
+
+		   "gpu/mali_gpu_props.h";
+		   "base/midg_gpus/mali_t600.h";
+		   "base/midg_gpus/other_midg_gpu.h";
+	   }
+	   { rank = same; "plat/plat_config.h"; }
+	   {
+	       rank = same;
+		   "gpu/mali_gpu.h" [ shape=box ];
+		   gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+		   select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+	   }
+	   node [ shape=box ];
+	   { rank = same; "plat/plat_config.h"; }
+	   { rank = same; "mali_base.h"; }
+
+	   "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h";
+	   "mali_base.h" -> "plat/plat_config.h" ;
+	   "mali_base.h" -> select_gpu ;
+
+	   "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+	   gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+	   select_gpu -> "base/midg_gpus/mali_t600.h" ;
+	   select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+   }
+   @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algorithm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * required for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+	 * No defined values, but starts at 0 and increases by one for each
+	 * release status (alpha, beta, EAC, etc.).
+	 * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/**
+	 * This property is deprecated since it has not contained the real current
+	 * value of GPU clock speed. It is kept here only for backwards compatibility.
+	 * For the new ioctl interface, it is ignored and is treated as a padding
+	 * to keep the structure of the same size and retain the placement of its
+	 * members.
+	 */
+	u32 gpu_speed_mhz;
+
+	/**
+	 * @usecase GPU clock max/min speed is required for computing best/worst case
+	 * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+	 *  Midgard Architecture).
+	 * Also, GPU clock max speed is used for OpenCL's clGetDeviceInfo() function.
+	 */
+	u32 gpu_freq_khz_max;
+	u32 gpu_freq_khz_min;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 stack_present;
+
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this data structure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+	/** No flags set */
+	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+	/** Base context is a 'System Monitor' context for Hardware counters.
+	 *
+	 * One important side effect of this is that job submission is disabled. */
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+/** Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+} base_jd_replay_payload;
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact. */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
+		BASE_TLSTREAM_JOB_DUMPING_ENABLED)
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100644
index 0000000..4a98a72
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100644
index 0000000..be454a2
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100644
index 0000000..c85dc90
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,632 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+#include <linux/sched/mm.h>
+#endif
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_linux.h>
+
+/*
+ * Include mali_kbase_defs.h first as this provides types needed by other local
+ * header files.
+ */
+#include "mali_kbase_defs.h"
+
+#include "mali_kbase_context.h"
+#include "mali_kbase_strings.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+
+#include "ipa/mali_kbase_ipa.h"
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+
+#ifndef u64_to_user_ptr
+/* Introduced in Linux v4.6 */
+#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
+#endif
+
+/*
+ * Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+
+/**
+ * kbase_jd_submit - Submit atoms to the job dispatcher
+ *
+ * @kctx: The kbase context to submit to
+ * @user_addr: The address in user space of the struct base_jd_atom_v2 array
+ * @nr_atoms: The number of atoms in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6)
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_jd_submit(struct kbase_context *kctx,
+		void __user *user_addr, u32 nr_atoms, u32 stride,
+		bool uk6_atom);
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom);
+#endif
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+void kbasep_soft_job_timeout_worker(unsigned long data);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEBUG
+/**
+ * kbase_set_driver_inactive - Force driver to go inactive
+ * @kbdev:    Device pointer
+ * @inactive: true if driver should go inactive, false otherwise
+ *
+ * Forcing the driver inactive will cause all future IOCTLs to wait until the
+ * driver is made active again. This is intended solely for the use of tests
+ * which require that no jobs are running while the test executes.
+ */
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifndef RESET0_LEVEL
+extern u32  override_value_aml;
+extern void *reg_base_hiubus;
+#define RESET0_MASK    0x01
+#define RESET1_MASK    0x02
+#define RESET2_MASK    0x03
+
+#define RESET0_LEVEL    0x10
+#define RESET1_LEVEL    0x11
+#define RESET2_LEVEL    0x12
+#define Rd(r)                           readl((reg_base_hiubus) + ((r)<<2))
+#define Wr(r, v)                        writel((v), ((reg_base_hiubus) + ((r)<<2)))
+#define Mali_WrReg(unit, core, regnum, value)   kbase_reg_write(kbdev, (regnum), (value), NULL)
+#define Mali_RdReg(unit, core, regnum)          kbase_reg_read(kbdev, (regnum), NULL)
+#define stimulus_print   printk
+#define stimulus_display printk
+#define Mali_pwr_on(x)   Mali_pwr_on_with_kdev(kbdev, (x))
+#define Mali_pwr_off(x)  Mali_pwr_off_with_kdev(kbdev, (x))
+#endif
+
+
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/* kbase_io_history_init - initialize data struct for register access history
+ *
+ * @kbdev The register history to initialize
+ * @n The number of register accesses that the buffer could hold
+ *
+ * @return 0 if successfully initialized, failure otherwise
+ */
+int kbase_io_history_init(struct kbase_io_history *h, u16 n);
+
+/* kbase_io_history_term - uninit all resources for the register access history
+ *
+ * @h The register history to terminate
+ */
+void kbase_io_history_term(struct kbase_io_history *h);
+
+/* kbase_io_history_dump - print the register history to the kernel ring buffer
+ *
+ * @kbdev Pointer to kbase_device containing the register history to dump
+ */
+void kbase_io_history_dump(struct kbase_device *kbdev);
+
+/**
+ * kbase_io_history_resize - resize the register access history buffer.
+ *
+ * @h: Pointer to a valid register history to resize
+ * @new_size: Number of accesses the buffer could hold
+ *
+ * A successful resize will clear all recent register accesses.
+ * If resizing fails for any reason (e.g., could not allocate memory, invalid
+ * buffer size) then the original buffer will be kept intact.
+ *
+ * @return 0 if the buffer was resized, failure otherwise
+ */
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbase_io_history_resize CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+#endif
+
+
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100644
index 0000000..6b3559d
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	struct tagged_addr *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = phys_to_page(as_phys_addr_t(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = phys_to_page(as_phys_addr_t(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = phys_to_page(as_phys_addr_t(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = phys_to_page(as_phys_addr_t(page_array[page_index +
+								   1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100644
index 0000000..099a298
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
new file mode 100644
index 0000000..f910fe9
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+
+static int kbase_as_fault_read(struct seq_file *sfile, void *data)
+{
+	uintptr_t as_no = (uintptr_t) sfile->private;
+
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	struct kbase_device *kbdev = NULL;
+
+	kbdev_list = kbase_dev_list_get();
+
+	list_for_each(entry, kbdev_list) {
+		kbdev = list_entry(entry, struct kbase_device, entry);
+
+		if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+
+			/* don't show this one again until another fault occors */
+			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
+
+			/* output the last page fault addr */
+			seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr);
+		}
+
+	}
+
+	kbase_dev_list_put(kbdev_list);
+
+	return 0;
+}
+
+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_as_fault_read , in->i_private);
+}
+
+static const struct file_operations as_fault_fops = {
+	.open = kbase_as_fault_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ *  Initialize debugfs entry for each address space
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	uint i;
+	char as_name[64];
+	struct dentry *debugfs_directory;
+
+	kbdev->debugfs_as_read_bitmap = 0ULL;
+
+	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
+
+	debugfs_directory = debugfs_create_dir("address_spaces",
+		kbdev->mali_debugfs_directory);
+
+	if(debugfs_directory) {
+		for(i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
+			debugfs_create_file(as_name, S_IRUGO,
+				debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops);
+		}
+	}
+	else
+		dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory");
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+	return;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
new file mode 100644
index 0000000..3ed2248
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
+#define _KBASE_AS_FAULT_DEBUG_FS_H
+
+/**
+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
+ *
+ * @kbdev: Pointer to kbase_device
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs
+ *
+ * @kbdev: Pointer to kbase_device
+ * @as_no: The address space the fault occurred on
+ */
+static inline void
+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_MALI_DEBUG */
+	return;
+}
+
+#endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100644
index 0000000..1d11de6
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100644
index 0000000..0c18bdb
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100644
index 0000000..fb615ae
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100644
index 0000000..212e3b1
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,343 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see  kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+/**
+ * kbase_platform_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_register(void);
+
+/**
+ * kbase_platform_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_register()
+ */
+void kbase_platform_unregister(void);
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100644
index 0000000..63adcea
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,271 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 * Restricting ID width will reduce performance & bus load due to GPU.
+	 */
+	KBASE_3BIT_AID_32 = 0x0,
+
+	/* Restrict GPU to 7/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_28 = 0x1,
+
+	/* Restrict GPU to 3/4 of maximum Address ID count. */
+	KBASE_3BIT_AID_24 = 0x2,
+
+	/* Restrict GPU to 5/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_20 = 0x3,
+
+	/* Restrict GPU to 1/2 of maximum Address ID count.  */
+	KBASE_3BIT_AID_16 = 0x4,
+
+	/* Restrict GPU to 3/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_12 = 0x5,
+
+	/* Restrict GPU to 1/4 of maximum Address ID count. */
+	KBASE_3BIT_AID_8  = 0x6,
+
+	/* Restrict GPU to 1/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_4  = 0x7
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_3BIT_ARID_LIMIT KBASE_3BIT_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_3BIT_AWID_LIMIT KBASE_3BIT_AID_32
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/*
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+/*
+ * Perform GPU power down using only platform specific code, skipping DDK power
+ * management.
+ *
+ * If this is non-zero then kbase will avoid powering down shader cores, the
+ * tiler, and the L2 cache, instead just powering down the entire GPU through
+ * platform specific code. This may be required for certain platform
+ * integrations.
+ *
+ * Note that as this prevents kbase from powering down shader cores, this limits
+ * the available power policies to coarse_demand and always_on.
+ */
+#define PLATFORM_POWER_DOWN_ONLY (1)
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100644
index 0000000..f43db48
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,359 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_dma_fence.h>
+#include <mali_kbase_ctx_sched.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	atomic_set(&kctx->refcount, 0);
+	if (is_compat)
+		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+				  kbdev->mem_pool_max_size_default,
+				  KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+				  kctx->kbdev,
+				  &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_pool_init(&kctx->lp_mem_pool,
+				  (kbdev->mem_pool_max_size_default >> 9),
+				  KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+				  kctx->kbdev,
+				  &kbdev->lp_mem_pool);
+	if (err)
+		goto free_mem_pool;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_both_pools;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	atomic_set(&kctx->drain_pending, 0);
+
+	mutex_init(&kctx->reg_lock);
+
+	mutex_init(&kctx->mem_partials_lock);
+	INIT_LIST_HEAD(&kctx->mem_partials);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kctx);
+	if (err)
+		goto term_dma_fence;
+
+	do {
+		err = kbase_mem_pool_grow(&kctx->mem_pool,
+				MIDGARD_MMU_BOTTOMLEVEL);
+		if (err)
+			goto pgd_no_mem;
+
+		mutex_lock(&kctx->mmu_lock);
+		kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+		mutex_unlock(&kctx->mmu_lock);
+	} while (!kctx->pgd);
+
+	p = kbase_mem_alloc_page(&kctx->mem_pool);
+	if (!p)
+		goto no_sink_page;
+	kctx->aliasing_sink_page = as_tagged(page_to_phys(p));
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	setup_timer(&kctx->soft_job_timeout,
+		    kbasep_soft_job_timeout_worker,
+		    (uintptr_t)kctx);
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+no_sink_page:
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+pgd_no_mem:
+	kbase_mmu_term(kctx);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_both_pools:
+	kbase_mem_pool_term(&kctx->lp_mem_pool);
+free_mem_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+	unsigned long flags;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_jd_zap_context(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	/* Removing the rest of the debugfs entries here as we want to keep the
+	 * atom debugfs interface alive until all atoms have completed. This
+	 * is useful for debugging hung contexts. */
+	debugfs_remove_recursive(kctx->kctx_dentry);
+#endif
+
+	kbase_event_cleanup(kctx);
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	p = phys_to_page(as_phys_addr_t(kctx->aliasing_sink_page));
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_dma_fence_term(kctx);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_remove_ctx(kctx);
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	kbase_mem_pool_term(&kctx->lp_mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+
+	kbase_pm_context_idle(kbdev);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
new file mode 100644
index 0000000..a3f5bb0
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.h
@@ -0,0 +1,90 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+					enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+	/*
+	 * Earlier kernel versions doesn't have atomic_andnot() or
+	 * atomic_and(). atomic_clear_mask() was only available on some
+	 * architectures and removed on arm in v3.13 on arm and arm64.
+	 *
+	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+	 * when atomic_andnot() becomes available.
+	 */
+	int old, new;
+
+	do {
+		old = atomic_read(&kctx->flags);
+		new = old & ~flag;
+
+	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+	atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100644
index 0000000..d1e7128
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,4218 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#include <backend/gpu/mali_kbase_devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <ipa/mali_kbase_ipa_debugfs.h>
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include "mali_kbase_regs_history_debugfs.h"
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_ioctl.h"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task */
+#include <linux/mman.h>
+#include <linux/version.h>
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+#include <linux/clk.h>
+#include <linux/delay.h>
+
+#include <mali_kbase_config.h>
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <mali_kbase_tlstream.h>
+
+#include <mali_kbase_as_fault_debugfs.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static int kbase_dev_nr;
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+extern int mali_pm_statue;
+#endif
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+
+static int kbase_api_handshake(struct kbase_context *kctx,
+		struct kbase_ioctl_version_check *version)
+{
+	switch (version->major) {
+		case BASE_UK_VERSION_MAJOR:
+			/* set minor to be the lowest common */
+			version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+					(int)version->minor);
+			break;
+		default:
+			/* We return our actual version regardless if it
+			 *                  * matches the version returned by userspace -
+			 *                                   * userspace can bail if it can't handle this
+			 *                                                    * version */
+			version->major = BASE_UK_VERSION_MAJOR;
+			version->minor = BASE_UK_VERSION_MINOR;
+			break;
+	}
+
+	/* save the proposed version number for later use */
+	kctx->api_version = KBASE_API_VERSION(version->major, version->minor);
+
+	return 0;
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ *
+ * @MALI_ERROR_NONE: Success
+ * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver
+ * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure
+ * @MALI_ERROR_FUNCTION_FAILED: Generic error code
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	/* Bit number 2 was earlier assigned to the runtime-pm initialization
+	 * stage (which has been merged with the backend_early stage).
+	 */
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+
+	inited_job_fault = (1u << 10),
+	inited_sysfs_group = (1u << 11),
+	inited_misc_register = (1u << 12),
+	inited_get_device = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_io_history = (1u << 18),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20),
+	inited_protected = (1u << 21),
+	inited_ctx_sched = (1u << 22)
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define INACTIVE_WAIT_MS (5000)
+
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive)
+{
+	kbdev->driver_inactive = inactive;
+	wake_up(&kbdev->driver_inactive_wait);
+
+	/* Wait for any running IOCTLs to complete */
+	if (inactive)
+		msleep(INACTIVE_WAIT_MS);
+}
+KBASE_EXPORT_TEST_API(kbase_set_driver_inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strncmp(irq_res->name, "JOB", 4)) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "MMU", 4)) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "GPU", 4)) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \
+		!(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \
+		LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0))
+/*
+ * Older versions, before v4.6, of the kernel doesn't have
+ * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28
+ */
+static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+	char buf[32];
+
+	count = min(sizeof(buf), count);
+
+	if (copy_from_user(buf, s, count))
+		return -EFAULT;
+	buf[count] = '\0';
+
+	return strtobool(buf, res);
+}
+#endif
+
+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+	else
+		kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE);
+
+	return size;
+}
+
+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_infinite_cache_fops = {
+	.open = simple_open,
+	.write = write_ctx_infinite_cache,
+	.read = read_ctx_infinite_cache,
+};
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kctx = kbase_create_context(kbdev, is_compat_task());
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+	kctx->filp = filp;
+
+	if (kbdev->infinite_cache_active_default)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
+			    kctx, &kbase_infinite_cache_fops);
+
+	mutex_init(&kctx->mem_profile_lock);
+
+	kbasep_jd_debugfs_ctx_init(kctx);
+	kbase_debug_mem_view_init(filp);
+
+	kbase_debug_job_fault_context_init(kctx);
+
+	kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool, &kctx->lp_mem_pool);
+
+	kbase_jit_debugfs_init(kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			KBASE_TLSTREAM_TL_NEW_CTX(
+					element->kctx,
+					element->kctx->id,
+					(u32)(element->kctx->tgid));
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+	KBASE_TLSTREAM_TL_DEL_CTX(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	kbasep_mem_profile_debugfs_remove(kctx);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	if (kctx->vinstr_cli) {
+		struct kbase_uk_hwcnt_setup setup;
+
+		setup.dump_buffer = 0llu;
+		kbase_vinstr_legacy_hwc_setup(
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup);
+	}
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+static int kbase_api_set_flags(struct kbase_context *kctx,
+		struct kbase_ioctl_set_flags *flags)
+{
+	int err;
+
+	/* setup pending, try to signal that we'll do the setup,
+	 * if setup was already in progress, err this call
+	 */
+	if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+		return -EINVAL;
+
+	err = kbase_context_set_create_flags(kctx, flags->create_flags);
+	/* if bad flags, will stay stuck in setup mode */
+	if (err)
+		return err;
+
+	atomic_set(&kctx->setup_complete, 1);
+	return 0;
+}
+
+static int kbase_api_job_submit(struct kbase_context *kctx,
+		struct kbase_ioctl_job_submit *submit)
+{
+	return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr),
+			submit->nr_atoms,
+			submit->stride, false);
+}
+
+static int kbase_api_get_gpuprops(struct kbase_context *kctx,
+		struct kbase_ioctl_get_gpuprops *get_props)
+{
+	struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props;
+	int err;
+
+	if (get_props->flags != 0) {
+		dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops");
+		return -EINVAL;
+	}
+
+	if (get_props->size == 0)
+		return kprops->prop_buffer_size;
+	if (get_props->size < kprops->prop_buffer_size)
+		return -EINVAL;
+
+	err = copy_to_user(u64_to_user_ptr(get_props->buffer),
+			kprops->prop_buffer,
+			kprops->prop_buffer_size);
+	if (err)
+		return -EFAULT;
+	return kprops->prop_buffer_size;
+}
+
+static int kbase_api_post_term(struct kbase_context *kctx)
+{
+	kbase_event_close(kctx);
+	return 0;
+}
+
+static int kbase_api_mem_alloc(struct kbase_context *kctx,
+		union kbase_ioctl_mem_alloc *alloc)
+{
+	struct kbase_va_region *reg;
+	u64 flags = alloc->in.flags;
+	u64 gpu_va;
+
+#if defined(CONFIG_64BIT)
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* force SAME_VA if a 64-bit client */
+		flags |= BASE_MEM_SAME_VA;
+	}
+#endif
+
+	reg = kbase_mem_alloc(kctx, alloc->in.va_pages,
+			alloc->in.commit_pages,
+			alloc->in.extent,
+			&flags, &gpu_va);
+
+	if (!reg)
+		return -ENOMEM;
+
+	alloc->out.flags = flags;
+	alloc->out.gpu_va = gpu_va;
+
+	return 0;
+}
+
+static int kbase_api_mem_query(struct kbase_context *kctx,
+		union kbase_ioctl_mem_query *query)
+{
+	return kbase_mem_query(kctx, query->in.gpu_addr,
+			query->in.query, &query->out.value);
+}
+
+static int kbase_api_mem_free(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_free *free)
+{
+	return kbase_mem_free(kctx, free->gpu_addr);
+}
+
+static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup)
+{
+	int ret;
+	struct kbase_uk_hwcnt_reader_setup args = {
+		.buffer_count = setup->buffer_count,
+		.jm_bm = setup->jm_bm,
+		.shader_bm = setup->shader_bm,
+		.tiler_bm = setup->tiler_bm,
+		.mmu_l2_bm = setup->mmu_l2_bm
+	};
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, &args);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	if (ret)
+		return ret;
+	return args.fd;
+}
+
+static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_enable *enable)
+{
+	int ret;
+	struct kbase_uk_hwcnt_setup args = {
+		.dump_buffer = enable->dump_buffer,
+		.jm_bm = enable->jm_bm,
+		.shader_bm = enable->shader_bm,
+		.tiler_bm = enable->tiler_bm,
+		.mmu_l2_bm = enable->mmu_l2_bm
+	};
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx,
+			&kctx->vinstr_cli, &args);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_dump(struct kbase_context *kctx)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwc_dump(kctx->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_clear(struct kbase_context *kctx)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwc_clear(kctx->vinstr_cli);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_disjoint_query(struct kbase_context *kctx,
+		struct kbase_ioctl_disjoint_query *query)
+{
+	query->counter = kbase_disjoint_event_get(kctx->kbdev);
+
+	return 0;
+}
+
+static int kbase_api_get_ddk_version(struct kbase_context *kctx,
+		struct kbase_ioctl_get_ddk_version *version)
+{
+	int ret;
+	int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+
+	if (version->version_buffer == 0)
+		return len;
+
+	if (version->size < len)
+		return -EOVERFLOW;
+
+	ret = copy_to_user(u64_to_user_ptr(version->version_buffer),
+			KERNEL_SIDE_DDK_VERSION_STRING,
+			sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+
+	if (ret)
+		return -EFAULT;
+
+	return len;
+}
+
+static int kbase_api_mem_jit_init(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_jit_init *jit_init)
+{
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages);
+}
+
+static int kbase_api_mem_sync(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_sync *sync)
+{
+	struct basep_syncset sset = {
+		.mem_handle.basep.handle = sync->handle,
+		.user_addr = sync->user_addr,
+		.size = sync->size,
+		.type = sync->type
+	};
+
+	return kbase_sync_now(kctx, &sset);
+}
+
+static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx,
+		union kbase_ioctl_mem_find_cpu_offset *find)
+{
+	return kbasep_find_enclosing_cpu_mapping_offset(
+			kctx,
+			find->in.cpu_addr,
+			find->in.size,
+			&find->out.offset);
+}
+
+static int kbase_api_get_context_id(struct kbase_context *kctx,
+		struct kbase_ioctl_get_context_id *info)
+{
+	info->id = kctx->id;
+
+	return 0;
+}
+
+static int kbase_api_tlstream_acquire(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_acquire *acquire)
+{
+	return kbase_tlstream_acquire(kctx, acquire->flags);
+}
+
+static int kbase_api_tlstream_flush(struct kbase_context *kctx)
+{
+	kbase_tlstream_flush_streams();
+
+	return 0;
+}
+
+static int kbase_api_mem_commit(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_commit *commit)
+{
+	return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages);
+}
+
+static int kbase_api_mem_alias(struct kbase_context *kctx,
+		union kbase_ioctl_mem_alias *alias)
+{
+	struct base_mem_aliasing_info *ai;
+	u64 flags;
+	int err;
+
+	if (alias->in.nents == 0 || alias->in.nents > 2048)
+		return -EINVAL;
+
+	ai = vmalloc(sizeof(*ai) * alias->in.nents);
+	if (!ai)
+		return -ENOMEM;
+
+	err = copy_from_user(ai,
+			u64_to_user_ptr(alias->in.aliasing_info),
+			sizeof(*ai) * alias->in.nents);
+	if (err) {
+		vfree(ai);
+		return -EFAULT;
+	}
+
+	flags = alias->in.flags;
+
+	alias->out.gpu_va = kbase_mem_alias(kctx, &flags,
+			alias->in.stride, alias->in.nents,
+			ai, &alias->out.va_pages);
+
+	alias->out.flags = flags;
+
+	vfree(ai);
+
+	if (alias->out.gpu_va == 0)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int kbase_api_mem_import(struct kbase_context *kctx,
+		union kbase_ioctl_mem_import *import)
+{
+	int ret;
+	u64 flags = import->in.flags;
+
+	ret = kbase_mem_import(kctx,
+			import->in.type,
+			u64_to_user_ptr(import->in.phandle),
+			import->in.padding,
+			&import->out.gpu_va,
+			&import->out.va_pages,
+			&flags);
+
+	import->out.flags = flags;
+
+	return ret;
+}
+
+static int kbase_api_mem_flags_change(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_flags_change *change)
+{
+	return kbase_mem_flags_change(kctx, change->gpu_va,
+			change->flags, change->mask);
+}
+
+static int kbase_api_stream_create(struct kbase_context *kctx,
+		struct kbase_ioctl_stream_create *stream)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	int fd, ret;
+
+	/* Name must be NULL-terminated and padded with NULLs, so check last
+	 * character is NULL
+	 */
+	if (stream->name[sizeof(stream->name)-1] != 0)
+		return -EINVAL;
+
+	ret = kbase_sync_fence_stream_create(stream->name, &fd);
+
+	if (ret)
+		return ret;
+	return fd;
+#else
+	return -ENOENT;
+#endif
+}
+
+static int kbase_api_fence_validate(struct kbase_context *kctx,
+		struct kbase_ioctl_fence_validate *validate)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	return kbase_sync_fence_validate(validate->fd);
+#else
+	return -ENOENT;
+#endif
+}
+
+static int kbase_api_get_profiling_controls(struct kbase_context *kctx,
+		struct kbase_ioctl_get_profiling_controls *controls)
+{
+	int ret;
+
+	if (controls->count > (FBDUMP_CONTROL_MAX - FBDUMP_CONTROL_MIN))
+		return -EINVAL;
+
+	ret = copy_to_user(u64_to_user_ptr(controls->buffer),
+			&kctx->kbdev->kbase_profiling_controls[
+				FBDUMP_CONTROL_MIN],
+			controls->count * sizeof(u32));
+
+	if (ret)
+		return -EFAULT;
+	return 0;
+}
+
+static int kbase_api_mem_profile_add(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_profile_add *data)
+{
+	char *buf;
+	int err;
+
+	if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+		dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n");
+		return -EINVAL;
+	}
+
+	buf = kmalloc(data->len, GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(buf))
+		return -ENOMEM;
+
+	err = copy_from_user(buf, u64_to_user_ptr(data->buffer),
+			data->len);
+	if (err) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len);
+}
+
+static int kbase_api_soft_event_update(struct kbase_context *kctx,
+		struct kbase_ioctl_soft_event_update *update)
+{
+	if (update->flags != 0)
+		return -EINVAL;
+
+	return kbase_soft_event_update(kctx, update->event, update->new_status);
+}
+
+#if MALI_UNIT_TEST
+static int kbase_api_tlstream_test(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_test *test)
+{
+	kbase_tlstream_test(
+			test->tpw_count,
+			test->msg_delay,
+			test->msg_count,
+			test->aux_msg);
+
+	return 0;
+}
+
+static int kbase_api_tlstream_stats(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_stats *stats)
+{
+	kbase_tlstream_stats(
+			&stats->bytes_collected,
+			&stats->bytes_generated);
+
+	return 0;
+}
+#endif /* MALI_UNIT_TEST */
+
+#define KBASE_HANDLE_IOCTL(cmd, function)                          \
+	case cmd:                                                  \
+	do {                                                       \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE);          \
+		return function(kctx);                             \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_IN(cmd, function, type)                 \
+	case cmd:                                                  \
+	do {                                                       \
+		type param;                                        \
+		int err;                                           \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE);         \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
+		err = copy_from_user(&param, uarg, sizeof(param)); \
+		if (err)                                           \
+			return -EFAULT;                            \
+		return function(kctx, &param);                     \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type)                \
+	case cmd:                                                  \
+	do {                                                       \
+		type param;                                        \
+		int ret, err;                                      \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ);          \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
+		ret = function(kctx, &param);                      \
+		err = copy_to_user(uarg, &param, sizeof(param));   \
+		if (err)                                           \
+			return -EFAULT;                            \
+		return ret;                                        \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type)                  \
+	case cmd:                                                      \
+	do {                                                           \
+		type param;                                            \
+		int ret, err;                                          \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));         \
+		err = copy_from_user(&param, uarg, sizeof(param));     \
+		if (err)                                               \
+			return -EFAULT;                                \
+		ret = function(kctx, &param);                          \
+		err = copy_to_user(uarg, &param, sizeof(param));       \
+		if (err)                                               \
+			return -EFAULT;                                \
+		return ret;                                            \
+	} while (0)
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	void __user *uarg = (void __user *)arg;
+
+	/* Only these ioctls are available until setup is complete */
+	switch (cmd) {
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK,
+				kbase_api_handshake,
+				struct kbase_ioctl_version_check);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS,
+				kbase_api_set_flags,
+				struct kbase_ioctl_set_flags);
+	}
+
+	/* Block call until version handshake and setup is complete */
+	if (kctx->api_version == 0 || !atomic_read(&kctx->setup_complete))
+		return -EINVAL;
+
+	/* Normal ioctls */
+	switch (cmd) {
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT,
+				kbase_api_job_submit,
+				struct kbase_ioctl_job_submit);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS,
+				kbase_api_get_gpuprops,
+				struct kbase_ioctl_get_gpuprops);
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM,
+				kbase_api_post_term);
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC,
+				kbase_api_mem_alloc,
+				union kbase_ioctl_mem_alloc);
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY,
+				kbase_api_mem_query,
+				union kbase_ioctl_mem_query);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE,
+				kbase_api_mem_free,
+				struct kbase_ioctl_mem_free);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
+				kbase_api_hwcnt_reader_setup,
+				struct kbase_ioctl_hwcnt_reader_setup);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
+				kbase_api_hwcnt_enable,
+				struct kbase_ioctl_hwcnt_enable);
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
+				kbase_api_hwcnt_dump);
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
+				kbase_api_hwcnt_clear);
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY,
+				kbase_api_disjoint_query,
+				struct kbase_ioctl_disjoint_query);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION,
+				kbase_api_get_ddk_version,
+				struct kbase_ioctl_get_ddk_version);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT,
+				kbase_api_mem_jit_init,
+				struct kbase_ioctl_mem_jit_init);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC,
+				kbase_api_mem_sync,
+				struct kbase_ioctl_mem_sync);
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET,
+				kbase_api_mem_find_cpu_offset,
+				union kbase_ioctl_mem_find_cpu_offset);
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID,
+				kbase_api_get_context_id,
+				struct kbase_ioctl_get_context_id);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE,
+				kbase_api_tlstream_acquire,
+				struct kbase_ioctl_tlstream_acquire);
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH,
+				kbase_api_tlstream_flush);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT,
+				kbase_api_mem_commit,
+				struct kbase_ioctl_mem_commit);
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS,
+				kbase_api_mem_alias,
+				union kbase_ioctl_mem_alias);
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT,
+				kbase_api_mem_import,
+				union kbase_ioctl_mem_import);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE,
+				kbase_api_mem_flags_change,
+				struct kbase_ioctl_mem_flags_change);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE,
+				kbase_api_stream_create,
+				struct kbase_ioctl_stream_create);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE,
+				kbase_api_fence_validate,
+				struct kbase_ioctl_fence_validate);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS,
+				kbase_api_get_profiling_controls,
+				struct kbase_ioctl_get_profiling_controls);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD,
+				kbase_api_mem_profile_add,
+				struct kbase_ioctl_mem_profile_add);
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE,
+				kbase_api_soft_event_update,
+				struct kbase_ioctl_soft_event_update);
+
+#if MALI_UNIT_TEST
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST,
+				kbase_api_tlstream_test,
+				struct kbase_ioctl_tlstream_test);
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS,
+				kbase_api_tlstream_stats,
+				struct kbase_ioctl_tlstream_stats);
+#endif
+	}
+
+	dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd));
+
+	return -ENOIOCTLCMD;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+
+/**
+ * align_and_check - Align the specified pointer to the provided alignment and
+ *                   check that it is still in range.
+ * @gap_end:        Highest possible start address for allocation (end of gap in
+ *                  address space)
+ * @gap_start:      Start address of current memory area / gap in address space
+ * @info:           vm_unmapped_area_info structure passed to caller, containing
+ *                  alignment, length and limits for the allocation
+ * @is_shader_code: True if the allocation is for shader code (which has
+ *                  additional alignment requirements)
+ *
+ * Return: true if gap_end is now aligned correctly and is still in range,
+ *         false otherwise
+ */
+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
+		struct vm_unmapped_area_info *info, bool is_shader_code)
+{
+	/* Compute highest gap address at the desired alignment */
+	(*gap_end) -= info->length;
+	(*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
+
+	if (is_shader_code) {
+		/* Check for 4GB boundary */
+		if (0 == (*gap_end & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+		if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+
+		if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
+				info->length) & BASE_MEM_MASK_4GB))
+			return false;
+	}
+
+
+	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
+		return false;
+
+
+	return true;
+}
+
+/* The following function is taken from the kernel and just
+ * renamed. As it's not exported to modules we must copy-paste it here.
+ */
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info, bool is_shader_code)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit) {
+		if (align_and_check(&gap_end, gap_start, info, is_shader_code))
+			return gap_end;
+	}
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length) {
+			/* We found a suitable gap. Clip it with the original
+			 * high_limit. */
+			if (gap_end > info->high_limit)
+				gap_end = info->high_limit;
+
+			if (align_and_check(&gap_end, gap_start, info,
+					is_shader_code))
+				return gap_end;
+		}
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+	return -ENOMEM;
+}
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	/* based on get_unmapped_area, but simplified slightly due to that some
+	 * values are known in advance */
+	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+	unsigned long align_offset = 0;
+	unsigned long align_mask = 0;
+	unsigned long high_limit = mm->mmap_base;
+	unsigned long low_limit = PAGE_SIZE;
+	int cpu_va_bits = BITS_PER_LONG;
+	int gpu_pc_bits =
+	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	bool is_shader_code = false;
+	unsigned long ret;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+#ifdef CONFIG_64BIT
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+
+		if (kbase_hw_has_feature(kctx->kbdev,
+						BASE_HW_FEATURE_33BIT_VA)) {
+			high_limit = kctx->same_va_end << PAGE_SHIFT;
+		} else {
+			high_limit = min_t(unsigned long, mm->mmap_base,
+					(kctx->same_va_end << PAGE_SHIFT));
+			if (len >= SZ_2M) {
+				align_offset = SZ_2M;
+				align_mask = SZ_2M - 1;
+			}
+		}
+
+		low_limit = SZ_2M;
+	} else {
+		cpu_va_bits = 32;
+	}
+#endif /* CONFIG_64BIT */
+	if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
+		(PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
+			int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+			if (!kctx->pending_regions[cookie])
+				return -EINVAL;
+
+			if (!(kctx->pending_regions[cookie]->flags &
+							KBASE_REG_GPU_NX)) {
+				if (cpu_va_bits > gpu_pc_bits) {
+					align_offset = 1ULL << gpu_pc_bits;
+					align_mask = align_offset - 1;
+					is_shader_code = true;
+				}
+			}
+#ifndef CONFIG_64BIT
+	} else {
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+						      flags);
+#endif
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = low_limit;
+	info.high_limit = high_limit;
+	info.align_offset = align_offset;
+	info.align_mask = align_mask;
+
+	ret = kbase_unmapped_area_topdown(&info, is_shader_code);
+
+	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
+			high_limit < (kctx->same_va_end << PAGE_SHIFT)) {
+		/* Retry above mmap_base */
+		info.low_limit = mm->mmap_base;
+		info.high_limit = min_t(u64, TASK_SIZE,
+					(kctx->same_va_end << PAGE_SHIFT));
+
+		ret = kbase_unmapped_area_topdown(&info, is_shader_code);
+	}
+
+	return ret;
+}
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+	.get_unmapped_area = kbase_get_unmapped_area,
+};
+
+/**
+ * show_policy - Show callback for the power_policy sysfs file.
+ *
+ * This function is called to get the contents of the power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * set_policy - Store callback for the power_policy sysfs file.
+ *
+ * This function is called when the power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls kbase_pm_set_policy() to change the
+ * policy.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/*
+ * The sysfs file power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/**
+ * show_ca_policy - Show callback for the core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * set_ca_policy - Store callback for the core_availability_policy sysfs file.
+ *
+ * This function is called when the core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls kbase_pm_set_policy() to change
+ * the policy.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/*
+ * The sysfs file core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/*
+ * show_core_mask - Show callback for the core_mask sysfs file.
+ *
+ * This function is called to get the contents of the core_mask sysfs file.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/**
+ * set_core_mask - Store callback for the core_mask sysfs file.
+ *
+ * This function is called when the core_mask sysfs file is written to.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	if (items == 1 || items == 3) {
+		u64 shader_present =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		u64 group0_core_mask =
+				kbdev->gpu_props.props.coherency_info.group[0].
+				core_mask;
+
+		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+				!(new_core_mask[0] & group0_core_mask) ||
+			(new_core_mask[1] & shader_present) !=
+						new_core_mask[1] ||
+				!(new_core_mask[1] & group0_core_mask) ||
+			(new_core_mask[2] & shader_present) !=
+						new_core_mask[2] ||
+				!(new_core_mask[2] & group0_core_mask)) {
+			dev_err(dev, "power_policy: invalid core specification\n");
+			return -EINVAL;
+		}
+
+		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+				kbdev->pm.debug_core_mask[1] !=
+						new_core_mask[1] ||
+				kbdev->pm.debug_core_mask[2] !=
+						new_core_mask[2]) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+					new_core_mask[1], new_core_mask[2]);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+		"Use format <core_mask>\n"
+		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+	return -EINVAL;
+}
+
+/*
+ * The sysfs file core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software jobs. Waiting soft event wait
+ * jobs will be cancelled after this period expires, while soft fence wait jobs
+ * will print debug information if the fence debug feature is enabled.
+ *
+ * This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_job_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_job_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) ||
+	    (soft_job_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+		   soft_job_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_job_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_job_timeout, set_soft_job_timeout);
+
+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
+				int default_ticks, u32 old_ticks)
+{
+	if (timeout_ms > 0) {
+		u64 ticks = timeout_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_period_ns);
+		if (!ticks)
+			return 1;
+		return ticks;
+	} else if (timeout_ms < 0) {
+		return default_ticks;
+	} else {
+		return old_ticks;
+	}
+}
+
+/**
+ * set_js_timeouts - Store callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS,
+ * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		struct kbasep_js_device_data *js_data = &kbdev->js_data;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
+	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
+			default, js_data->ticks_name); \
+	dev_dbg(kbdev->dev, "Overriding " #ticks_name \
+			" with %lu ticks (%lu ms)\n", \
+			(unsigned long)js_data->ticks_name, \
+			ms_name); \
+	} while (0)
+
+		UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms,
+				DEFAULT_JS_SOFT_STOP_TICKS);
+		UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl,
+				DEFAULT_JS_SOFT_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_HARD_STOP_TICKS_SS_8408 :
+				DEFAULT_JS_HARD_STOP_TICKS_SS);
+		UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl,
+				DEFAULT_JS_HARD_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping,
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING);
+		UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_RESET_TICKS_SS_8408 :
+				DEFAULT_JS_RESET_TICKS_SS);
+		UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl,
+				DEFAULT_JS_RESET_TICKS_CL);
+		UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping,
+				DEFAULT_JS_RESET_TICKS_DUMPING);
+
+		kbase_js_set_timeouts(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+static unsigned long get_js_timeout_in_ms(
+		u32 scheduling_period_ns,
+		u32 ticks)
+{
+	u64 ms = (u64)ticks * scheduling_period_ns;
+
+	do_div(ms, 1000000UL);
+	return ms;
+}
+
+/**
+ * show_js_timeouts - Show callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\
+		scheduling_period_ns, \
+		kbdev->js_data.name)
+
+	js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks);
+	js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl);
+	js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss);
+	js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl);
+	js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping);
+	js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss);
+	js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl);
+	js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef GET_TIMEOUT
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+static u32 get_new_js_timeout(
+		u32 old_period,
+		u32 old_ticks,
+		u32 new_scheduling_period_ns)
+{
+	u64 ticks = (u64)old_period * (u64)old_ticks;
+	do_div(ticks, new_scheduling_period_ns);
+	return ticks?ticks:1;
+}
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	struct kbasep_js_device_data *js_data;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	js_data = &kbdev->js_data;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&js_data->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	old_period = js_data->scheduling_period_ns;
+
+#define SET_TIMEOUT(name) \
+		(js_data->name = get_new_js_timeout(\
+				old_period, \
+				kbdev->js_data.name, \
+				new_scheduling_period_ns))
+
+	SET_TIMEOUT(soft_stop_ticks);
+	SET_TIMEOUT(soft_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_ss);
+	SET_TIMEOUT(hard_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_dumping);
+	SET_TIMEOUT(gpu_reset_ticks_ss);
+	SET_TIMEOUT(gpu_reset_ticks_cl);
+	SET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef SET_TIMEOUT
+
+	js_data->scheduling_period_ns = new_scheduling_period_ns;
+
+	kbase_js_set_timeouts(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&js_data->runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/**
+ * set_force_replay - Store callback for the force_replay sysfs file.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/**
+ * show_force_replay - Show callback for the force_replay sysfs file.
+ *
+ * This function is called to get the contents of the force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file force_replay.
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present.
+ * The ability to enable soft-stop when only a single context is present can be
+ * used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/* Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/**
+ * show_debug - Show callback for the debug_command sysfs file.
+ *
+ * This function is called to get the contents of the debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * issue_debug - Store callback for the debug_command sysfs file.
+ *
+ * This function is called when the debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @debug_commands to issue the command.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/* The sysfs file debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
+		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G72" },
+		{ .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G51" },
+		{ .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-TNOx" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_max_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+/**
+ * show_lp_mem_pool_size - Show size of the large memory pages pool.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the number of large memory pages which currently populate the kbdev pool.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_lp_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_size(&kbdev->lp_mem_pool));
+}
+
+/**
+ * set_lp_mem_pool_size - Set size of the large memory pages pool.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the number of large memory pages which should populate the kbdev pool.
+ * This may cause existing pages to be removed from the pool, or new pages to be created and then added to the pool.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_lp_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	unsigned long new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->lp_mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size,
+		set_lp_mem_pool_size);
+
+/**
+ * show_lp_mem_pool_max_size - Show maximum size of the large memory pages pool.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the maximum number of large memory pages that the kbdev pool can possibly contain.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_max_size(&kbdev->lp_mem_pool));
+}
+
+/**
+ * set_lp_mem_pool_max_size - Set maximum size of the large memory pages pool.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the maximum number of large memory pages that the kbdev pool can possibly contain.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	unsigned long new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->lp_mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
+		set_lp_mem_pool_max_size);
+
+#ifdef CONFIG_DEBUG_FS
+
+/* Number of entries in serialize_jobs_settings[] */
+#define NR_SERIALIZE_JOBS_SETTINGS 5
+/* Maximum string length in serialize_jobs_settings[].name */
+#define MAX_SERIALIZE_JOBS_NAME_LEN 16
+
+static struct
+{
+	char *name;
+	u8 setting;
+} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = {
+	{"none", 0},
+	{"intra-slot", KBASE_SERIALIZE_INTRA_SLOT},
+	{"inter-slot", KBASE_SERIALIZE_INTER_SLOT},
+	{"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT},
+	{"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT |
+			KBASE_SERIALIZE_RESET}
+};
+
+/**
+ * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs
+ *                                  file
+ * @sfile: seq_file pointer
+ * @data:  Private callback data
+ *
+ * This function is called to get the contents of the serialize_jobs debugfs
+ * file. This is a list of the available settings with the currently active one
+ * surrounded by square brackets.
+ *
+ * Return: 0 on success, or an error code on error
+ */
+static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_device *kbdev = sfile->private;
+	int i;
+
+	CSTD_UNUSED(data);
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting)
+			seq_printf(sfile, "[%s] ",
+					serialize_jobs_settings[i].name);
+		else
+			seq_printf(sfile, "%s ",
+					serialize_jobs_settings[i].name);
+	}
+
+	seq_puts(sfile, "\n");
+
+	return 0;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs
+ *                                       debugfs file.
+ * @file:  File pointer
+ * @ubuf:  User buffer containing data to store
+ * @count: Number of bytes in user buffer
+ * @ppos:  File position
+ *
+ * This function is called when the serialize_jobs debugfs file is written to.
+ * It matches the requested setting against the available settings and if a
+ * matching setting is found updates kbdev->serialize_jobs.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	struct seq_file *s = file->private_data;
+	struct kbase_device *kbdev = s->private;
+	char buf[MAX_SERIALIZE_JOBS_NAME_LEN];
+	int i;
+	bool valid = false;
+
+	CSTD_UNUSED(ppos);
+
+	count = min_t(size_t, sizeof(buf) - 1, count);
+	if (copy_from_user(buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
+			kbdev->serialize_jobs =
+					serialize_jobs_settings[i].setting;
+			valid = true;
+			break;
+		}
+	}
+
+	if (!valid) {
+		dev_err(kbdev->dev, "serialize_jobs: invalid setting\n");
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs
+ *                                     debugfs file
+ * @in:   inode pointer
+ * @file: file pointer
+ *
+ * Return: Zero on success, error code on failure
+ */
+static int kbasep_serialize_jobs_debugfs_open(struct inode *in,
+		struct file *file)
+{
+	return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
+	.open = kbasep_serialize_jobs_debugfs_open,
+	.read = seq_read,
+	.write = kbasep_serialize_jobs_debugfs_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_DEBUG_FS */
+
+static int kbasep_protected_mode_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_OF
+	struct device_node *protected_node;
+	struct platform_device *pdev;
+	struct protected_mode_device *protected_dev;
+#endif
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native protected ops */
+		kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev),
+				GFP_KERNEL);
+		if (!kbdev->protected_dev)
+			return -ENOMEM;
+		kbdev->protected_dev->data = kbdev;
+		kbdev->protected_ops = &kbase_native_protected_ops;
+		kbdev->protected_mode_support = true;
+		return 0;
+	}
+
+	kbdev->protected_mode_support = false;
+
+#ifdef CONFIG_OF
+	protected_node = of_parse_phandle(kbdev->dev->of_node,
+			"protected-mode-switcher", 0);
+
+	if (!protected_node)
+		protected_node = of_parse_phandle(kbdev->dev->of_node,
+				"secure-mode-switcher", 0);
+
+	if (!protected_node) {
+		/* If protected_node cannot be looked up then we assume
+		 * protected mode is not supported on this platform. */
+		dev_info(kbdev->dev, "Protected mode not available\n");
+		return 0;
+	}
+
+	pdev = of_find_device_by_node(protected_node);
+	if (!pdev)
+		return -EINVAL;
+
+	protected_dev = platform_get_drvdata(pdev);
+	if (!protected_dev)
+		return -EPROBE_DEFER;
+
+	kbdev->protected_ops = &protected_dev->ops;
+	kbdev->protected_dev = protected_dev;
+
+	if (kbdev->protected_ops) {
+		int err;
+
+		/* Make sure protected mode is disabled on startup */
+		mutex_lock(&kbdev->pm.lock);
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* protected_mode_disable() returns -EINVAL if not supported */
+		kbdev->protected_mode_support = (err != -EINVAL);
+	}
+#endif
+	return 0;
+}
+
+static void kbasep_protected_mode_term(struct kbase_device *kbdev)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		kfree(kbdev->protected_dev);
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = 0;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return err;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \
+	|| defined(LSK_OPPV2_BACKPORT)
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) || \
+		defined(LSK_OPPV2_BACKPORT)
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+MAKE_QUIRK_ACCESSORS(jm);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
+ * @file: File object to read is for
+ * @buf:  User buffer to populate with data
+ * @len:  Length of user buffer
+ * @ppos: Offset within file object
+ *
+ * Retrieves the current status of protected debug mode
+ * (0 = disabled, 1 = enabled)
+ *
+ * Return: Number of bytes added to user buffer
+ */
+static ssize_t debugfs_protected_debug_mode_read(struct file *file,
+				char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)file->private_data;
+	u32 gpu_status;
+	ssize_t ret_val;
+
+	kbase_pm_context_active(kbdev);
+	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL);
+	kbase_pm_context_idle(kbdev);
+
+	if (gpu_status & GPU_DBGEN)
+		ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2);
+	else
+		ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2);
+
+	return ret_val;
+}
+
+/*
+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops
+ *
+ * Contains the file operations for the "protected_debug_mode" debugfs file
+ */
+static const struct file_operations fops_protected_debug_mode = {
+	.open = simple_open,
+	.read = debugfs_protected_debug_mode_read,
+	.llseek = default_llseek,
+};
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_init(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+	kbasep_regs_history_debugfs_init(kbdev);
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+	kbase_as_fault_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	/* fops_* variables created by invocations of macro
+	 * MAKE_QUIRK_ACCESSORS() above. */
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+	debugfs_create_file("quirks_jm", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_jm_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+
+	debugfs_create_size_t("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_max_size_default);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		debugfs_create_file("protected_debug_mode", S_IRUGO,
+				kbdev->mali_debugfs_directory, kbdev,
+				&fops_protected_debug_mode);
+	}
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->inited_subsys & inited_devfreq)
+		kbase_ipa_debugfs_init(kbdev);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_serialize_jobs_debugfs_fops);
+#endif /* CONFIG_DEBUG_FS */
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev,
+		unsigned prod_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+
+	/* Only for tMIx :
+	 * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+	 * documented for tMIx so force correct value here.
+	 */
+	if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+		   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+				   GPU_ID2_PRODUCT_TMIX))
+		if (supported_coherency_bitmap ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE))
+			supported_coherency_bitmap |=
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_job_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_availability_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	&dev_attr_lp_mem_pool_size.attr,
+	&dev_attr_lp_mem_pool_max_size.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kfree(kbdev->gpu_props.prop_buffer);
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_protected) {
+		kbasep_protected_mode_term(kbdev);
+		kbdev->inited_subsys &= ~inited_protected;
+	}
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_ctx_sched) {
+		kbase_ctx_sched_term(kbdev);
+		kbdev->inited_subsys &= ~inited_ctx_sched;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+	if (kbdev->inited_subsys & inited_io_history) {
+		kbase_io_history_term(&kbdev->io_history);
+		kbdev->inited_subsys &= ~inited_io_history;
+	}
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+
+/* Number of register accesses for the buffer that we allocate during
+ * initialization time. The buffer size can be changed later via debugfs. */
+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	unsigned prod_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+#ifdef CONFIG_OF
+	err = kbase_platform_early_init();
+	if (err) {
+		dev_err(&pdev->dev, "Early platform initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+#endif
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+	err = kbase_io_history_init(&kbdev->io_history,
+			KBASEP_DEFAULT_REGISTER_HISTORY_SIZE);
+	if (err) {
+		dev_err(&pdev->dev, "Register access history initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+	kbdev->inited_subsys |= inited_io_history;
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+	core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	err = kbase_ctx_sched_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Context scheduler initialization failed (%d)\n",
+				err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_ctx_sched;
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, prod_id);
+
+	err = kbasep_protected_mode_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Protected mode subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_protected;
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+	/* Initialize the kctx list. This is used by vinstr. */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	/* Devfreq uses vinstr, so must be initialized after it. */
+	err = kbase_devfreq_init(kbdev);
+	if (!err)
+		kbdev->inited_subsys |= inited_devfreq;
+	else
+		dev_err(kbdev->dev, "Continuing without devfreq\n");
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->inited_subsys |= inited_get_device;
+
+	/* This needs to happen before registering the device with misc_register(),
+	 * otherwise it causes a race condition between registering the device and a
+	 * uevent event being generated for userspace, causing udev rules to run
+	 * which might expect certain sysfs attributes present. As a result of the
+	 * race condition we avoid, some Mali sysfs entries may have appeared to
+	 * udev to not exist.
+
+	 * For more information, see
+	 * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the
+	 * paragraph that starts with "Word of warning", currently the second-last
+	 * paragraph.
+	 */
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	err = kbase_gpuprops_populate_user_buffer(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "GPU property population failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+
+	return err;
+}
+
+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE
+
+/**
+ * kbase_device_suspend - Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/**
+ * kbase_device_resume - Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @dev:  The device to resume
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+	return 0;
+}
+
+/**
+ * kbase_device_runtime_suspend - Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in
+ * which it will not be able to communicate with the CPU(s) and RAM due to
+ * power management.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_suspend_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 1;
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/**
+ * kbase_device_runtime_resume - Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_resume_device(kbdev->devfreq);
+#endif
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	mali_pm_statue = 0;
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+#ifndef CONFIG_MALI_DEVFREQ
+static int mali_os_freeze(struct device *device)
+{
+	mali_dev_freeze();
+	return kbase_device_suspend(device);
+}
+
+static int mali_os_restore(struct device *device)
+{
+	mali_dev_restore();
+	return kbase_device_resume(device);
+}
+#endif
+
+
+/* The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifndef CONFIG_MALI_DEVFREQ
+	.freeze = mali_os_freeze,
+	.thaw = kbase_device_resume,
+	.restore = mali_os_restore,
+#endif
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_early_init();
+	if (ret)
+		return ret;
+
+	ret = kbase_platform_register();
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&kbase_platform_driver);
+
+	if (ret)
+		kbase_platform_unregister();
+
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+	kbase_platform_unregister();
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
new file mode 100644
index 0000000..e2f7baa
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
@@ -0,0 +1,203 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_ctx_sched.h"
+
+int kbase_ctx_sched_init(struct kbase_device *kbdev)
+{
+	int as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+
+	kbdev->as_free = as_present; /* All ASs initially free */
+
+	memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx));
+
+	return 0;
+}
+
+void kbase_ctx_sched_term(struct kbase_device *kbdev)
+{
+	s8 i;
+
+	/* Sanity checks */
+	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
+		WARN_ON(kbdev->as_to_kctx[i] != NULL);
+		WARN_ON(!(kbdev->as_free & (1u << i)));
+	}
+}
+
+/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space
+ *
+ * @kbdev: The context for which to find a free address space
+ *
+ * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID
+ *
+ * This function returns an address space available for use. It would prefer
+ * returning an AS that has been previously assigned to the context to
+ * avoid having to reprogram the MMU.
+ */
+static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+	int free_as;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* First check if the previously assigned AS is available */
+	if ((kctx->as_nr != KBASEP_AS_NR_INVALID) &&
+			(kbdev->as_free & (1u << kctx->as_nr)))
+		return kctx->as_nr;
+
+	/* The previously assigned AS was taken, we'll be returning any free
+	 * AS at this point.
+	 */
+	free_as = ffs(kbdev->as_free) - 1;
+	if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces)
+		return free_as;
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->pm.backend.gpu_powered);
+
+	if (atomic_inc_return(&kctx->refcount) == 1) {
+		int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx);
+
+		if (free_as != KBASEP_AS_NR_INVALID) {
+			kbdev->as_free &= ~(1u << free_as);
+			/* Only program the MMU if the context has not been
+			 * assigned the same address space before.
+			 */
+			if (free_as != kctx->as_nr) {
+				struct kbase_context *const prev_kctx =
+					kbdev->as_to_kctx[free_as];
+
+				if (prev_kctx) {
+					WARN_ON(atomic_read(&prev_kctx->refcount) != 0);
+					kbase_mmu_disable(prev_kctx);
+					prev_kctx->as_nr = KBASEP_AS_NR_INVALID;
+				}
+
+				kctx->as_nr = free_as;
+				kbdev->as_to_kctx[free_as] = kctx;
+				kbase_mmu_update(kctx);
+			}
+		} else {
+			atomic_dec(&kctx->refcount);
+
+			/* Failed to find an available address space, we must
+			 * be returning an error at this point.
+			 */
+			WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID);
+		}
+	}
+
+	return kctx->as_nr;
+}
+
+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	WARN_ON(atomic_read(&kctx->refcount) == 0);
+	WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
+	WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+
+	atomic_inc(&kctx->refcount);
+}
+
+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_dec_return(&kctx->refcount) == 0)
+		kbdev->as_free |= (1u << kctx->as_nr);
+}
+
+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(atomic_read(&kctx->refcount) != 0);
+
+	if (kctx->as_nr != KBASEP_AS_NR_INVALID) {
+		if (kbdev->pm.backend.gpu_powered)
+			kbase_mmu_disable(kctx);
+
+		kbdev->as_to_kctx[kctx->as_nr] = NULL;
+		kctx->as_nr = KBASEP_AS_NR_INVALID;
+	}
+}
+
+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
+{
+	s8 i;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->pm.backend.gpu_powered);
+
+	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
+		struct kbase_context *kctx;
+
+		kctx = kbdev->as_to_kctx[i];
+		if (kctx) {
+			if (atomic_read(&kctx->refcount)) {
+				WARN_ON(kctx->as_nr != i);
+
+				kbase_mmu_update(kctx);
+			} else {
+				/* This context might have been assigned an
+				 * AS before, clear it.
+				 */
+				kbdev->as_to_kctx[kctx->as_nr] = NULL;
+				kctx->as_nr = KBASEP_AS_NR_INVALID;
+			}
+		} else {
+			kbase_mmu_disable_as(kbdev, i);
+		}
+	}
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
new file mode 100644
index 0000000..77d2232
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CTX_SCHED_H_
+#define _KBASE_CTX_SCHED_H_
+
+#include <mali_kbase.h>
+
+/* The Context Scheduler manages address space assignment and reference
+ * counting to kbase_context. The interface has been designed to minimise
+ * interactions between the Job Scheduler and Power Management/MMU to support
+ * the existing Job Scheduler interface.
+ *
+ * The initial implementation of the Context Scheduler does not schedule
+ * contexts. Instead it relies on the Job Scheduler to make decisions of
+ * when to schedule/evict contexts if address spaces are starved. In the
+ * future, once an interface between the CS and JS has been devised to
+ * provide enough information about how each context is consuming GPU resources,
+ * those decisions can be made in the CS itself, thereby reducing duplicated
+ * code.
+ */
+
+/* base_ctx_sched_init - Initialise the context scheduler
+ *
+ * @kbdev: The device for which the context scheduler needs to be
+ *         initialised
+ *
+ * Return: 0 for success, otherwise failure
+ *
+ * This must be called during device initilisation. The number of hardware
+ * address spaces must already be established before calling this function.
+ */
+int kbase_ctx_sched_init(struct kbase_device *kbdev);
+
+/* base_ctx_sched_term - Terminate the context scheduler
+ *
+ * @kbdev: The device for which the context scheduler needs to be
+ *         terminated
+ *
+ * This must be called during device termination after all contexts have been
+ * destroyed.
+ */
+void kbase_ctx_sched_term(struct kbase_device *kbdev);
+
+/* kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
+ *
+ * @kctx: The context to which to retain a reference
+ *
+ * Return: The address space that the context has been assigned to or
+ *         KBASEP_AS_NR_INVALID if no address space was available.
+ *
+ * This function should be called whenever an address space should be assigned
+ * to a context and programmed onto the MMU. It should typically be called
+ * when jobs are ready to be submitted to the GPU.
+ *
+ * It can be called as many times as necessary. The address space will be
+ * assigned to the context for as long as there is a reference to said context.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
+
+/* kbase_ctx_sched_retain_ctx_refcount
+ *
+ * @kctx: The context to which to retain a reference
+ *
+ * This function only retains a reference to the context. It must be called
+ * only when the context already has a reference.
+ *
+ * This is typically called inside an atomic session where we know the context
+ * is already scheduled in but want to take an extra reference to ensure that
+ * it doesn't get descheduled.
+ *
+ * The kbase_device::hwaccess_lock must be held whilst calling this function
+ */
+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx);
+
+/* kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context
+ *
+ * @kctx: The context from which to release a reference
+ *
+ * This function should be called whenever an address space could be unassigned
+ * from a context. When there are no more references to said context, the
+ * address space previously assigned to this context shall be reassigned to
+ * other contexts as needed.
+ *
+ * The kbase_device::hwaccess_lock must be held whilst calling this function
+ */
+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
+
+/* kbase_ctx_sched_remove_ctx - Unassign previously assigned address space
+ *
+ * @kctx: The context to be removed
+ *
+ * This function should be called when a context is being destroyed. The
+ * context must no longer have any reference. If it has been assigned an
+ * address space before then the AS will be unprogrammed.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);
+
+/* kbase_ctx_sched_restore_all_as - Reprogram all address spaces
+ *
+ * @kbdev: The device for which address spaces to be reprogrammed
+ *
+ * This function shall reprogram all address spaces previously assigned to
+ * contexts. It can be used after the GPU is reset.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CTX_SCHED_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100644
index 0000000..fb57ac2
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100644
index 0000000..d7873c5
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100644
index 0000000..f29430d
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,499 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				 kbase_is_job_fault_event_pending(kbdev)))
+			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		kbase_backend_cacheclean(kbdev, event->katom);
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100644
index 0000000..a2bf898
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,96 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100644
index 0000000..c65b4df
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,306 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list)) {
+		kfree(data);
+		return NULL;
+	}
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = phys_to_page(as_phys_addr_t(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_zone_open(struct rb_root *rbtree,
+						struct debug_mem_data *mem_data)
+{
+	int ret = 0;
+	struct rb_node *p;
+	struct kbase_va_region *reg;
+	struct debug_mem_mapping *mapping;
+
+	for (p = rb_first(rbtree); p; p = rb_next(p)) {
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+out:
+	return ret;
+}
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUSR, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100644
index 0000000..20ab51a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100644
index 0000000..09415b3
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,1625 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_gpuprops_types.h>
+#include <protected_mode_switcher.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sizes.h>
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#include <linux/bus_logger.h>
+#endif
+
+
+#if defined(CONFIG_SYNC)
+#include <sync.h>
+#else
+#include "mali_kbase_fence_defs.h"
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#define MIDGARD_MMU_LEVEL(x) (x)
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(0)
+#else
+#define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(1)
+#endif
+
+#define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3)
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+/* Serialize atoms within a slot (ie only one atom per job slot) */
+#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0)
+/* Serialize atoms between slots (ie only one job slot running at any time) */
+#define KBASE_SERIALIZE_INTER_SLOT (1 << 1)
+/* Reset the GPU after each atom completion */
+#define KBASE_SERIALIZE_RESET (1 << 2)
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+
+#ifdef CONFIG_DEBUG_FS
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * struct kbase_io_access - holds information about 1 register access
+ *
+ * @addr: first bit indicates r/w (r=0, w=1)
+ * @value: value written or read
+ */
+struct kbase_io_access {
+	uintptr_t addr;
+	u32 value;
+};
+
+/**
+ * struct kbase_io_history - keeps track of all recent register accesses
+ *
+ * @enabled: true if register accesses are recorded, false otherwise
+ * @lock: spinlock protecting kbase_io_access array
+ * @count: number of registers read/written
+ * @size: number of elements in kbase_io_access array
+ * @buf: array of kbase_io_access
+ */
+struct kbase_io_history {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool enabled;
+#else
+	u32 enabled;
+#endif
+
+	spinlock_t lock;
+	size_t count;
+	u16 size;
+	struct kbase_io_access *buf;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom *)(dep->atom);
+}
+
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency.
+ * @param[in] a      The ATOM to be set as a dependency.
+ * @param     type   The ATOM dependency type to be set.
+ *
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency to be cleared.
+ *
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+enum kbase_atom_gpu_rb_state {
+	/* Atom is not currently present in slot ringbuffer */
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	/* Atom is in slot ringbuffer but is blocked on a previous atom */
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	/* Atom is in slot ringbuffer but is waiting for a previous protected
+	 * mode transition to complete */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
+	/* Atom is in slot ringbuffer but is waiting for proected mode
+	 * transition */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
+	/* Atom is in slot ringbuffer but is waiting for cores to become
+	 * available */
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	/* Atom is in slot ringbuffer but is blocked on affinity */
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	/* Atom is in slot ringbuffer and ready to run */
+	KBASE_ATOM_GPU_RB_READY,
+	/* Atom is in slot ringbuffer and has been submitted to the GPU */
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	/* Atom must be returned to JS as soon as it reaches the head of the
+	 * ringbuffer due to a previous failure */
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
+};
+
+enum kbase_atom_enter_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition into protected mode is required.
+	 *
+	 * NOTE: The integer value of this must
+	 *       match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
+	/* Wait for vinstr to suspend. */
+	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
+	/* Wait for the L2 to become idle in preparation for
+	 * the coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
+	/* End state;
+	 * Prepare coherency change. */
+	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
+};
+
+enum kbase_atom_exit_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition out of protected mode is required.
+	 *
+	 * NOTE: The integer value of this must
+	 *       match KBASE_ATOM_ENTER_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
+	/* Wait for the L2 to become idle in preparation
+	 * for the reset. */
+	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+	/* Issue the protected reset. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET,
+	/* End state;
+	 * Wait for the reset to complete. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+	/* List head used during job dispatch job_done processing - as
+	 * dependencies may not be entirely resolved at this point, we need to
+	 * use a separate list head. */
+	struct list_head jd_item;
+	/* true if atom's jd_item is currently on a list. Prevents atom being
+	 * processed twice. */
+	bool in_jd_list;
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	enum kbase_atom_coreref_state coreref_state;
+#if defined(CONFIG_SYNC)
+	/* Stores either an input or output fence, depending on soft-job type */
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+	struct {
+		/* Use the functions/API defined in mali_kbase_fence.h to
+		 * when working with this sub struct */
+#if defined(CONFIG_SYNC_FILE)
+		/* Input fence */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+		struct fence *fence_in;
+#else
+		struct dma_fence *fence_in;
+#endif
+#endif
+		/* This points to the dma-buf output fence for this atom. If
+		 * this is NULL then there is no fence for this atom and the
+		 * following fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+		struct fence *fence;
+#else
+		struct dma_fence *fence;
+#endif
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;	    /**< core requirements */
+	/** Job Slot to retry submitting to if submission from IRQ handler failed
+	 *
+	 * NOTE: see if this can be unified into the another member e.g. the event */
+	int retry_submit_on_slot;
+
+	u32 ticks;
+	/* JS atom priority with respect to other atoms on its kctx. */
+	int sched_priority;
+
+	int poking;		/* BASE_HW_ISSUE_8316 */
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+	int slot_nr;
+
+	u32 atom_flags;
+
+	/* Number of times this atom has been retried. Used by replay soft job.
+	 */
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	u64 need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	/* Pointer to atom that this atom has same-slot dependency on */
+	struct kbase_jd_atom *pre_dep;
+	/* Pointer to atom that has same-slot dependency on this atom */
+	struct kbase_jd_atom *post_dep;
+
+	/* Pointer to atom that this atom has cross-slot dependency on */
+	struct kbase_jd_atom *x_pre_dep;
+	/* Pointer to atom that has cross-slot dependency on this atom */
+	struct kbase_jd_atom *x_post_dep;
+
+	/* The GPU's flush count recorded at the time of submission, used for
+	 * the cache flush optimisation */
+	u32 flush_id;
+
+	struct kbase_jd_atom_backend backend;
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	/* List head used for three different purposes:
+	 *  1. Overflow list for JS ring buffers. If an atom is ready to run,
+	 *     but there is no room in the JS ring buffer, then the atom is put
+	 *     on the ring buffer's overflow list using this list node.
+	 *  2. List of waiting soft jobs.
+	 */
+	struct list_head queue;
+
+	/* Used to keep track of all JIT free/alloc jobs in submission order
+	 */
+	struct list_head jit_node;
+	bool jit_blocked;
+
+	/* If non-zero, this indicates that the atom will fail with the set
+	 * event_code when the atom is processed. */
+	enum base_jd_event_code will_fail_event_code;
+
+	/* Atoms will only ever be transitioning into, or out of
+	 * protected mode so we do not need two separate fields.
+	 */
+	union {
+		enum kbase_atom_enter_protected_state enter;
+		enum kbase_atom_exit_protected_state exit;
+	} protected_state;
+
+	struct rb_node runnable_tree_node;
+
+	/* 'Age' of atom relative to other atoms in the context. */
+	u32 age;
+};
+
+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	/** Tracks all job-dispatch jobs.  This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	u32 job_nr;
+
+	/** Waitq that reflects whether there are no jobs (including SW-only
+	 * dependency jobs). This is set when no jobs are present on the ctx,
+	 * and clear when there are jobs.
+	 *
+	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+	 * and SW-only dependency jobs.
+	 *
+	 * This waitq can be waited upon to find out when the context jobs are all
+	 * done/cancelled (including those that might've been blocked on
+	 * dependencies) - and so, whether it can be terminated. However, it should
+	 * only be terminated once it is not present in the run-pool (see
+	 * kbasep_js_kctx_info::ctx::is_scheduled).
+	 *
+	 * Since the waitq is only set under kbase_jd_context::lock,
+	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+	 * guarentee that the setter has completed its work on the kbase_context
+	 *
+	 * This must be updated atomically with:
+	 * - kbase_jd_context::job_nr */
+	wait_queue_head_t zero_jobs_wait;
+
+	/** Job Done workqueue. */
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+	int number;
+
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	bool protected_mode;
+	u32 fault_status;
+	u64 fault_addr;
+	u64 fault_extra_addr;
+
+	struct kbase_mmu_setup current_setup;
+
+	/* BASE_HW_ISSUE_8316  */
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	/** Protected by hwaccess_lock */
+	int poke_refcount;
+	/** Protected by hwaccess_lock */
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold hwaccess_lock when accessing this */
+	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - true indicates that the transition is ongoing
+	 * Expected to be protected by hwaccess_lock */
+	bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:     Kbase device where memory is used
+ * @cur_size:  Number of free pages currently in the pool (may exceed @max_size
+ *             in some corner cases)
+ * @max_size:  Maximum number of free pages in the pool
+ * @order:     order = 0 refers to a pool of 4 KB pages
+ *             order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
+ * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
+ *             and @page_list
+ * @page_list: List of free pages in the pool
+ * @reclaim:   Shrinker for kernel reclaim of free pages
+ * @next_pool: Pointer to next pool where pages can be allocated when this pool
+ *             is empty. Pages will spill over to the next pool when this pool
+ *             is full. Can be NULL if there is no next pool.
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	size_t		    order;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+};
+
+/**
+ * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP
+ *                            frequency, and real frequency and core mask
+ * @opp_freq:  Nominal OPP frequency
+ * @real_freq: Real GPU frequency
+ * @core_mask: Shader core mask
+ */
+struct kbase_devfreq_opp {
+	u64 opp_freq;
+	u64 real_freq;
+	u64 core_mask;
+};
+
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_context *kctx);
+	void (*get_as_setup)(struct kbase_context *kctx,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate, unsigned int level);
+	int (*pte_is_valid)(u64 pte, unsigned int level);
+	void (*entry_set_ate)(u64 *entry, struct tagged_addr phy,
+			unsigned long flags, unsigned int level);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+
+#define DEVNAME_SIZE	16
+
+struct kbase_device {
+	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+
+	struct clk *clock;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif	/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool mem_pool;
+	struct kbase_mem_pool lp_mem_pool;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+	/* The below variables (as_free and as_to_kctx) are managed by the
+	 * Context Scheduler. The kbasep_js_device_data::runpool_irq::lock must
+	 * be held whilst accessing these.
+	 */
+	u16 as_free; /* Bitpattern of free Address Spaces */
+	/* Mapping from active Address Spaces to kbase_context */
+	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
+
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	/** List of SW workarounds for HW issues */
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	/** List of features available */
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	/* Bitmaps of cores that are currently in use (running jobs).
+	 * These should be kept up to date by the job scheduler.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 *
+	 * kbase_pm_check_transitions_nolock() should be called when bits are
+	 * cleared to update the power management system and allow transitions to
+	 * occur. */
+	u64 shader_inuse_bitmap;
+
+	/* Refcount for cores in use */
+	u32 shader_inuse_cnt[64];
+
+	/* Bitmaps of cores the JS needs for jobs ready to run */
+	u64 shader_needed_bitmap;
+
+	/* Refcount for cores needed */
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	/* struct for keeping track of the disjoint information
+	 *
+	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
+	 * The count is the number of disjoint events that have occurred on the GPU
+	 */
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+	u32 l2_users_count;
+
+	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+	 * submitting new jobs to any cores that are not marked as available.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 */
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+	u64 stack_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
+	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+
+	/* Structure used for instrumentation and HW counters dumping */
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+	void *platform_context;
+
+	/* List of kbase_contexts created */
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_nominal_freq;
+	unsigned long current_voltage;
+	u64 current_core_mask;
+	struct kbase_devfreq_opp *opp_table;
+	int num_opps;
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	struct devfreq_cooling_device *devfreq_cooling;
+#else
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+	/* Current IPA model - true for configured model, false for fallback */
+	atomic_t ipa_use_configured_model;
+	struct {
+		/* Access to this struct must be with ipa.lock held */
+		struct mutex lock;
+		struct kbase_ipa_model *configured_model;
+		struct kbase_ipa_model *fallback_model;
+	} ipa;
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+	/*
+	 * Control for enabling job dump on failure, set when control debugfs
+	 * is opened.
+	 */
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	/* directory for debugfs entries */
+	struct dentry *mali_debugfs_directory;
+	/* Root directory for per context entry */
+	struct dentry *debugfs_ctx_directory;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* bit for each as, set if there is new data to report */
+	u64 debugfs_as_read_bitmap;
+#endif /* CONFIG_MALI_DEBUG */
+
+	/* failed job dump, used for separate debug process */
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
+	struct kbase_context *kctx_fault;
+
+#if !MALI_CUSTOMER_RELEASE
+	/* Per-device data for register dumping interface */
+	struct {
+		u16 reg_offset; /* Offset of a GPU_CONTROL register to be
+				   dumped upon request */
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	/* fbdump profiling controls set by gator */
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	/* Number of jobs that are run before a job is forced to fail and
+	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+	 * failures. */
+	int force_replay_limit;
+	/* Count of jobs between forced failures. Incremented on each job. A
+	 * job is forced to fail once this is greater than or equal to
+	 * force_replay_limit. */
+	int force_replay_count;
+	/* Core requirement for jobs to be failed and replayed. May be zero. */
+	base_jd_core_req force_replay_core_req;
+	/* true if force_replay_limit should be randomized. The random
+	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+	 */
+	bool force_replay_random;
+#endif
+
+	/* Total number of created contexts */
+	atomic_t ctx_num;
+
+#ifdef CONFIG_DEBUG_FS
+	/* Holds the most recent register accesses */
+	struct kbase_io_history io_history;
+#endif /* CONFIG_DEBUG_FS */
+
+	struct kbase_hwaccess_data hwaccess;
+
+	/* Count of page/bus faults waiting for workqueues to process */
+	atomic_t faults_pending;
+
+	/* true if GPU is powered off or power off operation is in progress */
+	bool poweroff_pending;
+
+
+	/* defaults for new context created for this device */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	size_t mem_pool_max_size_default;
+
+	/* current gpu coherency mode */
+	u32 current_gpu_coherency_mode;
+	/* system coherency mode  */
+	u32 system_coherency;
+	/* Flag to track when cci snoops have been enabled on the interface */
+	bool cci_snoop_enabled;
+
+	/* SMC function IDs to call into Trusted firmware to enable/disable
+	 * cache snooping. Value of 0 indicates that they are not used
+	 */
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	/* Protected mode operations */
+	struct protected_mode_ops *protected_ops;
+
+	/* Protected device attached to this kbase device */
+	struct protected_mode_device *protected_dev;
+
+	/*
+	 * true when GPU is put into protected mode
+	 */
+	bool protected_mode;
+
+	/*
+	 * true when GPU is transitioning into or out of protected mode
+	 */
+	bool protected_mode_transition;
+
+	/*
+	 * true if protected mode is supported
+	 */
+	bool protected_mode_support;
+
+
+#ifdef CONFIG_MALI_DEBUG
+	wait_queue_head_t driver_inactive_wait;
+	bool driver_inactive;
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	/*
+	 * Bus logger integration.
+	 */
+	struct bus_logger_client *buslogger;
+#endif
+	/* Boolean indicating if an IRQ flush during reset is in progress. */
+	bool irq_reset_flush;
+
+	/* list of inited sub systems. Used during terminate/error recovery */
+	u32 inited_subsys;
+
+	spinlock_t hwaccess_lock;
+
+	/* Protects access to MMU operations */
+	struct mutex mmu_hw_mutex;
+
+	/* Current serialization mode. See KBASE_SERIALIZE_* for details */
+	u8 serialize_jobs;
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ *                 job slot.
+ * @x_dep_head:    Head item of the linked list of atoms blocked on cross-slot
+ *                 dependencies. Atoms on this list will be moved to the
+ *                 runnable_tree when the blocking atom completes.
+ *
+ * hwaccess_lock must be held when accessing this structure.
+ */
+struct jsctx_queue {
+	struct rb_root runnable_tree;
+	struct list_head x_dep_head;
+};
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+/**
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
+ * context, to disable use of implicit dma-buf fences. This is used to avoid
+ * potential synchronization deadlocks.
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum kbase_context_flags {
+	KCTX_COMPAT = 1U << 0,
+	KCTX_RUNNABLE_REF = 1U << 1,
+	KCTX_ACTIVE = 1U << 2,
+	KCTX_PULLED = 1U << 3,
+	KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+	KCTX_INFINITE_CACHE = 1U << 5,
+	KCTX_SUBMIT_DISABLED = 1U << 6,
+	KCTX_PRIVILEGED = 1U << 7,
+	KCTX_SCHEDULED = 1U << 8,
+	KCTX_DYING = 1U << 9,
+	KCTX_NO_IMPLICIT_SYNC = 1U << 10,
+};
+
+struct kbase_sub_alloc {
+	struct list_head link;
+	struct page *page;
+	DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
+};
+
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	u32 id; /* System wide unique id */
+	unsigned long api_version;
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	atomic_t flags;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	u64 *mmu_teardown_pages;
+
+	struct tagged_addr aliasing_sink_page;
+
+	struct mutex            mem_partials_lock;
+	struct list_head        mem_partials;
+
+	struct mutex            mmu_lock;
+	struct mutex            reg_lock; /* To be converted to a rwlock? */
+	struct rb_root reg_rbtree_same; /* RB tree of GPU (live) regions,
+					 * SAME_VA zone */
+	struct rb_root reg_rbtree_exec; /* RB tree of GPU (live) regions,
+					 * EXEC zone */
+	struct rb_root reg_rbtree_custom; /* RB tree of GPU (live) regions,
+					 * CUSTOM_VA zone */
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_pool mem_pool;
+	struct kbase_mem_pool lp_mem_pool;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+	/** This is effectively part of the Run Pool, because it only has a valid
+	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+	 *
+	 * The hwaccess_lock must be held whilst accessing this.
+	 *
+	 * If the context relating to this as_nr is required, you must use
+	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+	 * whilst you're using it. Alternatively, just hold the hwaccess_lock
+	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
+	 * you can take whilst doing this) */
+	int as_nr;
+
+	/* Keeps track of the number of users of this context. A user can be a
+	 * job that is available for execution, instrumentation needing to 'pin'
+	 * a context for counter collection, etc. If the refcount reaches 0 then
+	 * this context is considered inactive and the previously programmed
+	 * AS might be cleared at any point.
+	 */
+	atomic_t refcount;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct *process_mm;
+	/* End of the SAME_VA zone */
+	u64 same_va_end;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	/* Content of mem_profile file */
+	char *mem_profile_data;
+	/* Size of @c mem_profile_data */
+	size_t mem_profile_size;
+	/* Mutex guarding memory profile state */
+	struct mutex mem_profile_lock;
+	/* Memory profile directory under debugfs */
+	struct dentry *kctx_dentry;
+
+	/* for job fault debug */
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	/* This list will keep the following atoms during the dump
+	 * in the same context
+	 */
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	/* Number of atoms currently pulled from this context */
+	atomic_t atoms_pulled;
+	/* Number of atoms currently pulled from this context, per slot */
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	/* Number of atoms currently pulled from this context, per slot and
+	 * priority. Hold hwaccess_lock when accessing */
+	int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
+			KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/* true if slot is blocked on the given priority. This will be set on a
+	 * soft-stop */
+	bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/* Bitmask of slots that can be pulled from */
+	u32 slots_pullable;
+
+	/* Backend specific data */
+	struct kbase_context_backend backend;
+
+	/* Work structure used for deferred ASID assignment */
+	struct work_struct work;
+
+	/* Only one userspace vinstr client per kbase context */
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+
+	/* List of completed jobs waiting for events to be posted */
+	struct list_head completed_jobs;
+	/* Number of work items currently pending on job_done_wq */
+	atomic_t work_count;
+
+	/* Waiting soft-jobs will fail when this timer expires */
+	struct timer_list soft_job_timeout;
+
+	/* JIT allocation management */
+	struct kbase_va_region *jit_alloc[256];
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_evict_lock;
+	struct work_struct jit_work;
+
+	/* A list of the JIT soft-jobs in submission order
+	 * (protected by kbase_jd_context.lock)
+	 */
+	struct list_head jit_atoms_head;
+	/* A list of pending JIT alloc soft-jobs (using the 'queue' list_head)
+	 * (protected by kbase_jd_context.lock)
+	 */
+	struct list_head jit_pending_alloc;
+
+	/* External sticky resource management */
+	struct list_head ext_res_meta_head;
+
+	/* Used to record that a drain was requested from atomic context */
+	atomic_t drain_pending;
+
+	/* Current age count, used to determine age for newly submitted atoms */
+	u32 age_count;
+};
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100644
index 0000000..717795b
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,673 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+	mutex_init(&kbdev->mmu_hw_mutex);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+		kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+	else
+		kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+
+#ifdef CONFIG_MALI_DEBUG
+	init_waitqueue_head(&kbdev->driver_inactive_wait);
+#endif /* CONFIG_MALI_DEBUG */
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100644
index 0000000..f70bccc
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
new file mode 100644
index 0000000..9197743
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -0,0 +1,449 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+static void
+kbase_dma_fence_work(struct work_struct *pwork);
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+static void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_queue_work() - Queue work to handle @katom
+ * @katom: Pointer to atom for which to queue work
+ *
+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
+ * submit the atom.
+ */
+static void
+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	bool ret;
+
+	INIT_WORK(&katom->work, kbase_dma_fence_work);
+	ret = queue_work(kctx->dma_fence.wq, &katom->work);
+	/* Warn if work was already queued, that should not happen. */
+	WARN_ON(!ret);
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_fence_free_callbacks(katom);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (kbase_fence_dep_count_read(katom) != 0)
+		goto out;
+
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_fence_free_callbacks(katom);
+	/*
+	 * Queue atom on GPU, unless it has already completed due to a failing
+	 * dependency. Run jd_done_nolock() on the katom if it is completed.
+	 */
+	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
+		jd_done_nolock(katom, NULL);
+	else
+		kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+#else
+kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+#endif
+{
+	struct kbase_fence_cb *kcb = container_of(cb,
+				struct kbase_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+
+	if (kbase_fence_dep_count_dec_and_test(katom))
+		kbase_dma_fence_queue_work(katom);
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct reservation_object *resv,
+					 bool exclusive)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+#else
+	struct dma_fence *excl_fence = NULL;
+	struct dma_fence **shared_fences = NULL;
+#endif
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_fence_add_callback(katom,
+						excl_fence,
+						kbase_dma_fence_cb);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		dma_fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_fence_add_callback(katom,
+							shared_fences[i],
+							kbase_dma_fence_cb);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		dma_fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	if (err) {
+		/*
+		 * On error, cancel and clean up all callbacks that was set up
+		 * before the error.
+		 */
+		kbase_fence_free_callbacks(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	fence = kbase_fence_out_new(katom);
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	kbase_fence_dep_count_set(katom, 1);
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		kbase_fence_dep_count_set(katom, -1);
+		kbase_fence_out_remove(katom);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct reservation_object *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = reservation_object_reserve_shared(obj);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_shared_fence(obj, fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_excl_fence(obj, fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (kbase_fence_dep_count_dec_and_test(katom)) {
+			kbase_fence_dep_count_set(katom, -1);
+			kbase_fence_free_callbacks(katom);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	} else {
+		/* There was an error, cancel callbacks, set dep_count to -1 to
+		 * indicate that the atom has been handled (the caller will
+		 * kill it for us), signal the fence, free callbacks and the
+		 * fence.
+		 */
+		kbase_fence_free_callbacks(katom);
+		kbase_fence_dep_count_set(katom, -1);
+		kbase_dma_fence_signal(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct list_head *list = &kctx->dma_fence.waiting_resource;
+
+	while (!list_empty(list)) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_first_entry(list, struct kbase_jd_atom, queue);
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	if (kbase_fence_free_callbacks(katom))
+		kbase_dma_fence_queue_work(katom);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	/* Signal the atom's fence. */
+	dma_fence_signal(katom->dma_fence.fence);
+
+	kbase_fence_out_remove(katom);
+
+	kbase_fence_free_callbacks(katom);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
new file mode 100644
index 0000000..c9ab403
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/list.h>
+#include <linux/reservation.h>
+#include <mali_kbase_fence.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct reservation_object **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100644
index 0000000..1881486
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx);
+	KBASE_TLSTREAM_TL_DEL_ATOM(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED);
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c
new file mode 100644
index 0000000..17b5621
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_fence_defs.h>
+#include <mali_kbase_fence.h>
+#include <mali_kbase.h>
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_fence_lock);
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_get_driver_name(struct fence *fence)
+#else
+kbase_fence_get_driver_name(struct dma_fence *fence)
+#endif
+{
+	return kbase_drv_name;
+}
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_get_timeline_name(struct fence *fence)
+#else
+kbase_fence_get_timeline_name(struct dma_fence *fence)
+#endif
+{
+	return kbase_timeline_name;
+}
+
+static bool
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_enable_signaling(struct fence *fence)
+#else
+kbase_fence_enable_signaling(struct dma_fence *fence)
+#endif
+{
+	return true;
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_fence_value_str(struct fence *fence, char *str, int size)
+#else
+kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
+#endif
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+const struct fence_ops kbase_fence_ops = {
+	.wait = fence_default_wait,
+#else
+const struct dma_fence_ops kbase_fence_ops = {
+	.wait = dma_fence_default_wait,
+#endif
+	.get_driver_name = kbase_fence_get_driver_name,
+	.get_timeline_name = kbase_fence_get_timeline_name,
+	.enable_signaling = kbase_fence_enable_signaling,
+	.fence_value_str = kbase_fence_fence_value_str
+};
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+struct fence *
+kbase_fence_out_new(struct kbase_jd_atom *katom)
+#else
+struct dma_fence *
+kbase_fence_out_new(struct kbase_jd_atom *katom)
+#endif
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	WARN_ON(katom->dma_fence.fence);
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	dma_fence_init(fence,
+		       &kbase_fence_ops,
+		       &kbase_fence_lock,
+		       katom->dma_fence.context,
+		       atomic_inc_return(&katom->dma_fence.seqno));
+
+	katom->dma_fence.fence = fence;
+
+	return fence;
+}
+
+bool
+kbase_fence_free_callbacks(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_cb *cb, *tmp;
+	bool res = false;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			int ret;
+
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			ret = atomic_dec_return(&katom->dma_fence.dep_count);
+
+			if (unlikely(ret == 0))
+				res = true;
+		}
+
+		/*
+		 * Release the reference taken in
+		 * kbase_fence_add_callback().
+		 */
+		dma_fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+
+	return res;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+int
+kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			 struct fence *fence,
+			 fence_func_t callback)
+#else
+int
+kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			 struct dma_fence *fence,
+			 dma_fence_func_t callback)
+#endif
+{
+	int err = 0;
+	struct kbase_fence_cb *kbase_fence_cb;
+
+	if (!fence)
+		return -EINVAL;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+
+	err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb,
+				     callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, get the completion result */
+		err = dma_fence_get_status(fence);
+
+		/* remap success completion to err code */
+		if (err == 1)
+			err = 0;
+
+		kfree(kbase_fence_cb);
+	} else if (err) {
+		kfree(kbase_fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in kbase_fence_free_callbacks().
+		 */
+		dma_fence_get(fence);
+		atomic_inc(&katom->dma_fence.dep_count);
+		/* Add callback to katom's list of callbacks */
+		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
+	}
+
+	return err;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h
new file mode 100644
index 0000000..f3ed025
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_FENCE_H_
+#define _KBASE_FENCE_H_
+
+/*
+ * mali_kbase_fence.[hc] has common fence code used by both
+ * - CONFIG_MALI_DMA_FENCE - implicit DMA fences
+ * - CONFIG_SYNC_FILE      - explicit fences beginning with 4.9 kernel
+ */
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/list.h>
+#include "mali_kbase_fence_defs.h"
+#include "mali_kbase.h"
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+extern const struct fence_ops kbase_fence_ops;
+#else
+extern const struct dma_fence_ops kbase_fence_ops;
+#endif
+
+/**
+* struct kbase_fence_cb - Mali dma-fence callback data struct
+* @fence_cb: Callback function
+* @katom:    Pointer to katom that is waiting on this callback
+* @fence:    Pointer to the fence object on which this callback is waiting
+* @node:     List head for linking this callback to the katom
+*/
+struct kbase_fence_cb {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence_cb fence_cb;
+	struct fence *fence;
+#else
+	struct dma_fence_cb fence_cb;
+	struct dma_fence *fence;
+#endif
+	struct kbase_jd_atom *katom;
+	struct list_head node;
+};
+
+/**
+ * kbase_fence_out_new() - Creates a new output fence and puts it on the atom
+ * @katom: Atom to create an output fence for
+ *
+ * return: A new fence object on success, NULL on failure.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
+#else
+struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
+#endif
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_fence_in_set() - Assign input fence to atom
+ * @katom: Atom to assign input fence to
+ * @fence: Input fence to assign to atom
+ *
+ * This function will take ownership of one fence reference!
+ */
+#define kbase_fence_fence_in_set(katom, fence) \
+	do { \
+		WARN_ON((katom)->dma_fence.fence_in); \
+		(katom)->dma_fence.fence_in = fence; \
+	} while (0)
+#endif
+
+/**
+ * kbase_fence_out_remove() - Removes the output fence from atom
+ * @katom: Atom to remove output fence for
+ *
+ * This will also release the reference to this fence which the atom keeps
+ */
+static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->dma_fence.fence) {
+		dma_fence_put(katom->dma_fence.fence);
+		katom->dma_fence.fence = NULL;
+	}
+}
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_out_remove() - Removes the input fence from atom
+ * @katom: Atom to remove input fence for
+ *
+ * This will also release the reference to this fence which the atom keeps
+ */
+static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->dma_fence.fence_in) {
+		dma_fence_put(katom->dma_fence.fence_in);
+		katom->dma_fence.fence_in = NULL;
+	}
+}
+#endif
+
+/**
+ * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us
+ * @katom: Atom to check output fence for
+ *
+ * Return: true if fence exists and is valid, otherwise false
+ */
+static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom)
+{
+	return katom->dma_fence.fence &&
+				katom->dma_fence.fence->ops == &kbase_fence_ops;
+}
+
+/**
+ * kbase_fence_out_signal() - Signal output fence of atom
+ * @katom: Atom to signal output fence for
+ * @status: Status to signal with (0 for success, < 0 for error)
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
+					 int status)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+	katom->dma_fence.fence->error = status;
+#else
+	katom->dma_fence.fence->status = status;
+#endif
+	return dma_fence_signal(katom->dma_fence.fence);
+}
+
+/**
+ * kbase_fence_add_callback() - Add callback on @fence to block @katom
+ * @katom: Pointer to katom that will be blocked by @fence
+ * @fence: Pointer to fence on which to set up the callback
+ * @callback: Pointer to function to be called when fence is signaled
+ *
+ * Caller needs to hold a reference to @fence when calling this function, and
+ * the caller is responsible for releasing that reference.  An additional
+ * reference to @fence will be taken when the callback was successfully set up
+ * and @fence needs to be kept valid until the callback has been called and
+ * cleanup have been done.
+ *
+ * Return: 0 on success: fence was either already signaled, or callback was
+ * set up. Negative error code is returned on error.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+int kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback);
+#else
+int kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct dma_fence *fence,
+			     dma_fence_func_t callback);
+#endif
+
+/**
+ * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value
+ * @katom: Atom to set dep_count for
+ * @val: value to set dep_count to
+ *
+ * The dep_count is available to the users of this module so that they can
+ * synchronize completion of the wait with cancellation and adding of more
+ * callbacks. For instance, a user could do the following:
+ *
+ * dep_count set to 1
+ * callback #1 added, dep_count is increased to 2
+ *                             callback #1 happens, dep_count decremented to 1
+ *                             since dep_count > 0, no completion is done
+ * callback #2 is added, dep_count is increased to 2
+ * dep_count decremented to 1
+ *                             callback #2 happens, dep_count decremented to 0
+ *                             since dep_count now is zero, completion executes
+ *
+ * The dep_count can also be used to make sure that the completion only
+ * executes once. This is typically done by setting dep_count to -1 for the
+ * thread that takes on this responsibility.
+ */
+static inline void
+kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val)
+{
+	atomic_set(&katom->dma_fence.dep_count, val);
+}
+
+/**
+ * kbase_fence_dep_count_dec_and_test() - Decrements dep_count
+ * @katom: Atom to decrement dep_count for
+ *
+ * See @kbase_fence_dep_count_set for general description about dep_count
+ *
+ * Return: true if value was decremented to zero, otherwise false
+ */
+static inline bool
+kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom)
+{
+	return atomic_dec_and_test(&katom->dma_fence.dep_count);
+}
+
+/**
+ * kbase_fence_dep_count_read() - Returns the current dep_count value
+ * @katom: Pointer to katom
+ *
+ * See @kbase_fence_dep_count_set for general description about dep_count
+ *
+ * Return: The current dep_count value
+ */
+static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom)
+{
+	return atomic_read(&katom->dma_fence.dep_count);
+}
+
+/**
+ * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ *
+ * Return: true if dep_count reached 0, otherwise false.
+ */
+bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom);
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_in_get() - Retrieve input fence for atom.
+ * @katom: Atom to get input fence from
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no input fence for atom
+ */
+#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in)
+#endif
+
+/**
+ * kbase_fence_out_get() - Retrieve output fence for atom.
+ * @katom: Atom to get output fence from
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no output fence for atom
+ */
+#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence)
+
+/**
+ * kbase_fence_put() - Releases a reference to a fence
+ * @fence: Fence to release reference for.
+ */
+#define kbase_fence_put(fence) dma_fence_put(fence)
+
+
+#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */
+
+#endif /* _KBASE_FENCE_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
new file mode 100644
index 0000000..32243a9
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
@@ -0,0 +1,62 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_FENCE_DEFS_H_
+#define _KBASE_FENCE_DEFS_H_
+
+/*
+ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*)
+ * This file hides the compatibility issues with this for the rest the driver
+ */
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+
+#include <linux/fence.h>
+
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 68))
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#else
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#endif
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
+	(a)->status ?: 1 \
+	: 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */
+
+#endif /* _KBASE_FENCE_DEFS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100644
index 0000000..ce65b55
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100644
index 0000000..fac4d94
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,338 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
+};
+
+static void dump_worker(struct work_struct *work);
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+	uint32_t product_id;
+	uint32_t count;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	product_id = kbdev->gpu_props.props.core_props.product_id;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			hardware_counters = hardware_counters_mali_tHEx;
+			count = ARRAY_SIZE(hardware_counters_mali_tHEx);
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			hardware_counters = hardware_counters_mali_tSIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tSIx);
+			break;
+		case GPU_ID2_PRODUCT_TNOX:
+			hardware_counters = hardware_counters_mali_tNOx;
+			count = ARRAY_SIZE(hardware_counters_mali_tNOx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	} else {
+		switch (product_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	*total_counters = count;
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	uint32_t dump_size = 0, i = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
+		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+	in_out_info->size = dump_size;
+
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
+
+	return hand;
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+
+	if (opaque_handles && success) {
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		schedule_work(&opaque_handles->dump_work);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100644
index 0000000..ef9ac0f
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100644
index 0000000..6fe6530
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2169 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MISS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MISS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MISS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MISS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MISS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MISS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MISS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#include "mali_kbase_gator_hwcnt_names_tmix.h"
+
+#include "mali_kbase_gator_hwcnt_names_thex.h"
+
+#include "mali_kbase_gator_hwcnt_names_tsix.h"
+
+#include "mali_kbase_gator_hwcnt_names_tnox.h"
+
+#include "mali_kbase_gator_hwcnt_names_tkax.h"
+
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
new file mode 100644
index 0000000..15fd4ef
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+
+static const char * const hardware_counters_mali_tHEx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MESSAGES_SENT",
+	"THEx_MESSAGES_RECEIVED",
+	"THEx_GPU_ACTIVE",
+	"THEx_IRQ_ACTIVE",
+	"THEx_JS0_JOBS",
+	"THEx_JS0_TASKS",
+	"THEx_JS0_ACTIVE",
+	"",
+	"THEx_JS0_WAIT_READ",
+	"THEx_JS0_WAIT_ISSUE",
+	"THEx_JS0_WAIT_DEPEND",
+	"THEx_JS0_WAIT_FINISH",
+	"THEx_JS1_JOBS",
+	"THEx_JS1_TASKS",
+	"THEx_JS1_ACTIVE",
+	"",
+	"THEx_JS1_WAIT_READ",
+	"THEx_JS1_WAIT_ISSUE",
+	"THEx_JS1_WAIT_DEPEND",
+	"THEx_JS1_WAIT_FINISH",
+	"THEx_JS2_JOBS",
+	"THEx_JS2_TASKS",
+	"THEx_JS2_ACTIVE",
+	"",
+	"THEx_JS2_WAIT_READ",
+	"THEx_JS2_WAIT_ISSUE",
+	"THEx_JS2_WAIT_DEPEND",
+	"THEx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"THEx_TILER_ACTIVE",
+	"THEx_JOBS_PROCESSED",
+	"THEx_TRIANGLES",
+	"THEx_LINES",
+	"THEx_POINTS",
+	"THEx_FRONT_FACING",
+	"THEx_BACK_FACING",
+	"THEx_PRIM_VISIBLE",
+	"THEx_PRIM_CULLED",
+	"THEx_PRIM_CLIPPED",
+	"THEx_PRIM_SAT_CULLED",
+	"THEx_BIN_ALLOC_INIT",
+	"THEx_BIN_ALLOC_OVERFLOW",
+	"THEx_BUS_READ",
+	"",
+	"THEx_BUS_WRITE",
+	"THEx_LOADING_DESC",
+	"THEx_IDVS_POS_SHAD_REQ",
+	"THEx_IDVS_POS_SHAD_WAIT",
+	"THEx_IDVS_POS_SHAD_STALL",
+	"THEx_IDVS_POS_FIFO_FULL",
+	"THEx_PREFETCH_STALL",
+	"THEx_VCACHE_HIT",
+	"THEx_VCACHE_MISS",
+	"THEx_VCACHE_LINE_WAIT",
+	"THEx_VFETCH_POS_READ_WAIT",
+	"THEx_VFETCH_VERTEX_WAIT",
+	"THEx_VFETCH_STALL",
+	"THEx_PRIMASSY_STALL",
+	"THEx_BBOX_GEN_STALL",
+	"THEx_IDVS_VBU_HIT",
+	"THEx_IDVS_VBU_MISS",
+	"THEx_IDVS_VBU_LINE_DEALLOCATE",
+	"THEx_IDVS_VAR_SHAD_REQ",
+	"THEx_IDVS_VAR_SHAD_STALL",
+	"THEx_BINNER_STALL",
+	"THEx_ITER_STALL",
+	"THEx_COMPRESS_MISS",
+	"THEx_COMPRESS_STALL",
+	"THEx_PCACHE_HIT",
+	"THEx_PCACHE_MISS",
+	"THEx_PCACHE_MISS_STALL",
+	"THEx_PCACHE_EVICT_STALL",
+	"THEx_PMGR_PTR_WR_STALL",
+	"THEx_PMGR_PTR_RD_STALL",
+	"THEx_PMGR_CMD_WR_STALL",
+	"THEx_WRBUF_ACTIVE",
+	"THEx_WRBUF_HIT",
+	"THEx_WRBUF_MISS",
+	"THEx_WRBUF_NO_FREE_LINE_STALL",
+	"THEx_WRBUF_NO_AXI_ID_STALL",
+	"THEx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"THEx_UTLB_TRANS",
+	"THEx_UTLB_TRANS_HIT",
+	"THEx_UTLB_TRANS_STALL",
+	"THEx_UTLB_TRANS_MISS_DELAY",
+	"THEx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"THEx_FRAG_ACTIVE",
+	"THEx_FRAG_PRIMITIVES",
+	"THEx_FRAG_PRIM_RAST",
+	"THEx_FRAG_FPK_ACTIVE",
+	"THEx_FRAG_STARVING",
+	"THEx_FRAG_WARPS",
+	"THEx_FRAG_PARTIAL_WARPS",
+	"THEx_FRAG_QUADS_RAST",
+	"THEx_FRAG_QUADS_EZS_TEST",
+	"THEx_FRAG_QUADS_EZS_UPDATE",
+	"THEx_FRAG_QUADS_EZS_KILL",
+	"THEx_FRAG_LZS_TEST",
+	"THEx_FRAG_LZS_KILL",
+	"",
+	"THEx_FRAG_PTILES",
+	"THEx_FRAG_TRANS_ELIM",
+	"THEx_QUAD_FPK_KILLER",
+	"",
+	"THEx_COMPUTE_ACTIVE",
+	"THEx_COMPUTE_TASKS",
+	"THEx_COMPUTE_WARPS",
+	"THEx_COMPUTE_STARVING",
+	"THEx_EXEC_CORE_ACTIVE",
+	"THEx_EXEC_ACTIVE",
+	"THEx_EXEC_INSTR_COUNT",
+	"THEx_EXEC_INSTR_DIVERGED",
+	"THEx_EXEC_INSTR_STARVING",
+	"THEx_ARITH_INSTR_SINGLE_FMA",
+	"THEx_ARITH_INSTR_DOUBLE",
+	"THEx_ARITH_INSTR_MSG",
+	"THEx_ARITH_INSTR_MSG_ONLY",
+	"THEx_TEX_INSTR",
+	"THEx_TEX_INSTR_MIPMAP",
+	"THEx_TEX_INSTR_COMPRESSED",
+	"THEx_TEX_INSTR_3D",
+	"THEx_TEX_INSTR_TRILINEAR",
+	"THEx_TEX_COORD_ISSUE",
+	"THEx_TEX_COORD_STALL",
+	"THEx_TEX_STARVE_CACHE",
+	"THEx_TEX_STARVE_FILTER",
+	"THEx_LS_MEM_READ_FULL",
+	"THEx_LS_MEM_READ_SHORT",
+	"THEx_LS_MEM_WRITE_FULL",
+	"THEx_LS_MEM_WRITE_SHORT",
+	"THEx_LS_MEM_ATOMIC",
+	"THEx_VARY_INSTR",
+	"THEx_VARY_SLOT_32",
+	"THEx_VARY_SLOT_16",
+	"THEx_ATTR_INSTR",
+	"THEx_ARITH_INSTR_FP_MUL",
+	"THEx_BEATS_RD_FTC",
+	"THEx_BEATS_RD_FTC_EXT",
+	"THEx_BEATS_RD_LSC",
+	"THEx_BEATS_RD_LSC_EXT",
+	"THEx_BEATS_RD_TEX",
+	"THEx_BEATS_RD_TEX_EXT",
+	"THEx_BEATS_RD_OTHER",
+	"THEx_BEATS_WR_LSC",
+	"THEx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"THEx_L2_RD_MSG_IN",
+	"THEx_L2_RD_MSG_IN_STALL",
+	"THEx_L2_WR_MSG_IN",
+	"THEx_L2_WR_MSG_IN_STALL",
+	"THEx_L2_SNP_MSG_IN",
+	"THEx_L2_SNP_MSG_IN_STALL",
+	"THEx_L2_RD_MSG_OUT",
+	"THEx_L2_RD_MSG_OUT_STALL",
+	"THEx_L2_WR_MSG_OUT",
+	"THEx_L2_ANY_LOOKUP",
+	"THEx_L2_READ_LOOKUP",
+	"THEx_L2_WRITE_LOOKUP",
+	"THEx_L2_EXT_SNOOP_LOOKUP",
+	"THEx_L2_EXT_READ",
+	"THEx_L2_EXT_READ_NOSNP",
+	"THEx_L2_EXT_READ_UNIQUE",
+	"THEx_L2_EXT_READ_BEATS",
+	"THEx_L2_EXT_AR_STALL",
+	"THEx_L2_EXT_AR_CNT_Q1",
+	"THEx_L2_EXT_AR_CNT_Q2",
+	"THEx_L2_EXT_AR_CNT_Q3",
+	"THEx_L2_EXT_RRESP_0_127",
+	"THEx_L2_EXT_RRESP_128_191",
+	"THEx_L2_EXT_RRESP_192_255",
+	"THEx_L2_EXT_RRESP_256_319",
+	"THEx_L2_EXT_RRESP_320_383",
+	"THEx_L2_EXT_WRITE",
+	"THEx_L2_EXT_WRITE_NOSNP_FULL",
+	"THEx_L2_EXT_WRITE_NOSNP_PTL",
+	"THEx_L2_EXT_WRITE_SNP_FULL",
+	"THEx_L2_EXT_WRITE_SNP_PTL",
+	"THEx_L2_EXT_WRITE_BEATS",
+	"THEx_L2_EXT_W_STALL",
+	"THEx_L2_EXT_AW_CNT_Q1",
+	"THEx_L2_EXT_AW_CNT_Q2",
+	"THEx_L2_EXT_AW_CNT_Q3",
+	"THEx_L2_EXT_SNOOP",
+	"THEx_L2_EXT_SNOOP_STALL",
+	"THEx_L2_EXT_SNOOP_RESP_CLEAN",
+	"THEx_L2_EXT_SNOOP_RESP_DATA",
+	"THEx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
new file mode 100644
index 0000000..a131d45
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TKAX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TKAX_H_
+
+static const char * const hardware_counters_mali_tKAx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_MESSAGES_SENT",
+	"TKAx_MESSAGES_RECEIVED",
+	"TKAx_GPU_ACTIVE",
+	"TKAx_IRQ_ACTIVE",
+	"TKAx_JS0_JOBS",
+	"TKAx_JS0_TASKS",
+	"TKAx_JS0_ACTIVE",
+	"",
+	"TKAx_JS0_WAIT_READ",
+	"TKAx_JS0_WAIT_ISSUE",
+	"TKAx_JS0_WAIT_DEPEND",
+	"TKAx_JS0_WAIT_FINISH",
+	"TKAx_JS1_JOBS",
+	"TKAx_JS1_TASKS",
+	"TKAx_JS1_ACTIVE",
+	"",
+	"TKAx_JS1_WAIT_READ",
+	"TKAx_JS1_WAIT_ISSUE",
+	"TKAx_JS1_WAIT_DEPEND",
+	"TKAx_JS1_WAIT_FINISH",
+	"TKAx_JS2_JOBS",
+	"TKAx_JS2_TASKS",
+	"TKAx_JS2_ACTIVE",
+	"",
+	"TKAx_JS2_WAIT_READ",
+	"TKAx_JS2_WAIT_ISSUE",
+	"TKAx_JS2_WAIT_DEPEND",
+	"TKAx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_TILER_ACTIVE",
+	"TKAx_JOBS_PROCESSED",
+	"TKAx_TRIANGLES",
+	"TKAx_LINES",
+	"TKAx_POINTS",
+	"TKAx_FRONT_FACING",
+	"TKAx_BACK_FACING",
+	"TKAx_PRIM_VISIBLE",
+	"TKAx_PRIM_CULLED",
+	"TKAx_PRIM_CLIPPED",
+	"TKAx_PRIM_SAT_CULLED",
+	"TKAx_BIN_ALLOC_INIT",
+	"TKAx_BIN_ALLOC_OVERFLOW",
+	"TKAx_BUS_READ",
+	"",
+	"TKAx_BUS_WRITE",
+	"TKAx_LOADING_DESC",
+	"TKAx_IDVS_POS_SHAD_REQ",
+	"TKAx_IDVS_POS_SHAD_WAIT",
+	"TKAx_IDVS_POS_SHAD_STALL",
+	"TKAx_IDVS_POS_FIFO_FULL",
+	"TKAx_PREFETCH_STALL",
+	"TKAx_VCACHE_HIT",
+	"TKAx_VCACHE_MISS",
+	"TKAx_VCACHE_LINE_WAIT",
+	"TKAx_VFETCH_POS_READ_WAIT",
+	"TKAx_VFETCH_VERTEX_WAIT",
+	"TKAx_VFETCH_STALL",
+	"TKAx_PRIMASSY_STALL",
+	"TKAx_BBOX_GEN_STALL",
+	"TKAx_IDVS_VBU_HIT",
+	"TKAx_IDVS_VBU_MISS",
+	"TKAx_IDVS_VBU_LINE_DEALLOCATE",
+	"TKAx_IDVS_VAR_SHAD_REQ",
+	"TKAx_IDVS_VAR_SHAD_STALL",
+	"TKAx_BINNER_STALL",
+	"TKAx_ITER_STALL",
+	"TKAx_COMPRESS_MISS",
+	"TKAx_COMPRESS_STALL",
+	"TKAx_PCACHE_HIT",
+	"TKAx_PCACHE_MISS",
+	"TKAx_PCACHE_MISS_STALL",
+	"TKAx_PCACHE_EVICT_STALL",
+	"TKAx_PMGR_PTR_WR_STALL",
+	"TKAx_PMGR_PTR_RD_STALL",
+	"TKAx_PMGR_CMD_WR_STALL",
+	"TKAx_WRBUF_ACTIVE",
+	"TKAx_WRBUF_HIT",
+	"TKAx_WRBUF_MISS",
+	"TKAx_WRBUF_NO_FREE_LINE_STALL",
+	"TKAx_WRBUF_NO_AXI_ID_STALL",
+	"TKAx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TKAx_UTLB_TRANS",
+	"TKAx_UTLB_TRANS_HIT",
+	"TKAx_UTLB_TRANS_STALL",
+	"TKAx_UTLB_TRANS_MISS_DELAY",
+	"TKAx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_FRAG_ACTIVE",
+	"TKAx_FRAG_PRIMITIVES",
+	"TKAx_FRAG_PRIM_RAST",
+	"TKAx_FRAG_FPK_ACTIVE",
+	"TKAx_FRAG_STARVING",
+	"TKAx_FRAG_WARPS",
+	"TKAx_FRAG_PARTIAL_WARPS",
+	"TKAx_FRAG_QUADS_RAST",
+	"TKAx_FRAG_QUADS_EZS_TEST",
+	"TKAx_FRAG_QUADS_EZS_UPDATE",
+	"TKAx_FRAG_QUADS_EZS_KILL",
+	"TKAx_FRAG_LZS_TEST",
+	"TKAx_FRAG_LZS_KILL",
+	"",
+	"TKAx_FRAG_PTILES",
+	"TKAx_FRAG_TRANS_ELIM",
+	"TKAx_QUAD_FPK_KILLER",
+	"",
+	"TKAx_COMPUTE_ACTIVE",
+	"TKAx_COMPUTE_TASKS",
+	"TKAx_COMPUTE_WARPS",
+	"TKAx_COMPUTE_STARVING",
+	"TKAx_EXEC_CORE_ACTIVE",
+	"TKAx_EXEC_ACTIVE",
+	"TKAx_EXEC_INSTR_COUNT",
+	"TKAx_EXEC_INSTR_DIVERGED",
+	"TKAx_EXEC_INSTR_STARVING",
+	"TKAx_ARITH_INSTR_SINGLE_FMA",
+	"TKAx_ARITH_INSTR_DOUBLE",
+	"TKAx_ARITH_INSTR_MSG",
+	"TKAx_ARITH_INSTR_MSG_ONLY",
+	"TKAx_TEX_MSGI_NUM_QUADS",
+	"TKAx_TEX_DFCH_NUM_PASSES",
+	"TKAx_TEX_DFCH_NUM_PASSES_MISS",
+	"TKAx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TKAx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TKAx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TKAx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TKAx_TEX_TFCH_NUM_OPERATIONS",
+	"TKAx_TEX_FILT_NUM_OPERATIONS",
+	"TKAx_LS_MEM_READ_FULL",
+	"TKAx_LS_MEM_READ_SHORT",
+	"TKAx_LS_MEM_WRITE_FULL",
+	"TKAx_LS_MEM_WRITE_SHORT",
+	"TKAx_LS_MEM_ATOMIC",
+	"TKAx_VARY_INSTR",
+	"TKAx_VARY_SLOT_32",
+	"TKAx_VARY_SLOT_16",
+	"TKAx_ATTR_INSTR",
+	"TKAx_ARITH_INSTR_FP_MUL",
+	"TKAx_BEATS_RD_FTC",
+	"TKAx_BEATS_RD_FTC_EXT",
+	"TKAx_BEATS_RD_LSC",
+	"TKAx_BEATS_RD_LSC_EXT",
+	"TKAx_BEATS_RD_TEX",
+	"TKAx_BEATS_RD_TEX_EXT",
+	"TKAx_BEATS_RD_OTHER",
+	"TKAx_BEATS_WR_LSC_WB",
+	"TKAx_BEATS_WR_TIB",
+	"TKAx_BEATS_WR_LSC_OTHER",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TKAx_L2_RD_MSG_IN",
+	"TKAx_L2_RD_MSG_IN_STALL",
+	"TKAx_L2_WR_MSG_IN",
+	"TKAx_L2_WR_MSG_IN_STALL",
+	"TKAx_L2_SNP_MSG_IN",
+	"TKAx_L2_SNP_MSG_IN_STALL",
+	"TKAx_L2_RD_MSG_OUT",
+	"TKAx_L2_RD_MSG_OUT_STALL",
+	"TKAx_L2_WR_MSG_OUT",
+	"TKAx_L2_ANY_LOOKUP",
+	"TKAx_L2_READ_LOOKUP",
+	"TKAx_L2_WRITE_LOOKUP",
+	"TKAx_L2_EXT_SNOOP_LOOKUP",
+	"TKAx_L2_EXT_READ",
+	"TKAx_L2_EXT_READ_NOSNP",
+	"TKAx_L2_EXT_READ_UNIQUE",
+	"TKAx_L2_EXT_READ_BEATS",
+	"TKAx_L2_EXT_AR_STALL",
+	"TKAx_L2_EXT_AR_CNT_Q1",
+	"TKAx_L2_EXT_AR_CNT_Q2",
+	"TKAx_L2_EXT_AR_CNT_Q3",
+	"TKAx_L2_EXT_RRESP_0_127",
+	"TKAx_L2_EXT_RRESP_128_191",
+	"TKAx_L2_EXT_RRESP_192_255",
+	"TKAx_L2_EXT_RRESP_256_319",
+	"TKAx_L2_EXT_RRESP_320_383",
+	"TKAx_L2_EXT_WRITE",
+	"TKAx_L2_EXT_WRITE_NOSNP_FULL",
+	"TKAx_L2_EXT_WRITE_NOSNP_PTL",
+	"TKAx_L2_EXT_WRITE_SNP_FULL",
+	"TKAx_L2_EXT_WRITE_SNP_PTL",
+	"TKAx_L2_EXT_WRITE_BEATS",
+	"TKAx_L2_EXT_W_STALL",
+	"TKAx_L2_EXT_AW_CNT_Q1",
+	"TKAx_L2_EXT_AW_CNT_Q2",
+	"TKAx_L2_EXT_AW_CNT_Q3",
+	"TKAx_L2_EXT_SNOOP",
+	"TKAx_L2_EXT_SNOOP_STALL",
+	"TKAx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TKAx_L2_EXT_SNOOP_RESP_DATA",
+	"TKAx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
new file mode 100644
index 0000000..8a215f7
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+
+static const char * const hardware_counters_mali_tMIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MESSAGES_SENT",
+	"TMIx_MESSAGES_RECEIVED",
+	"TMIx_GPU_ACTIVE",
+	"TMIx_IRQ_ACTIVE",
+	"TMIx_JS0_JOBS",
+	"TMIx_JS0_TASKS",
+	"TMIx_JS0_ACTIVE",
+	"",
+	"TMIx_JS0_WAIT_READ",
+	"TMIx_JS0_WAIT_ISSUE",
+	"TMIx_JS0_WAIT_DEPEND",
+	"TMIx_JS0_WAIT_FINISH",
+	"TMIx_JS1_JOBS",
+	"TMIx_JS1_TASKS",
+	"TMIx_JS1_ACTIVE",
+	"",
+	"TMIx_JS1_WAIT_READ",
+	"TMIx_JS1_WAIT_ISSUE",
+	"TMIx_JS1_WAIT_DEPEND",
+	"TMIx_JS1_WAIT_FINISH",
+	"TMIx_JS2_JOBS",
+	"TMIx_JS2_TASKS",
+	"TMIx_JS2_ACTIVE",
+	"",
+	"TMIx_JS2_WAIT_READ",
+	"TMIx_JS2_WAIT_ISSUE",
+	"TMIx_JS2_WAIT_DEPEND",
+	"TMIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_TILER_ACTIVE",
+	"TMIx_JOBS_PROCESSED",
+	"TMIx_TRIANGLES",
+	"TMIx_LINES",
+	"TMIx_POINTS",
+	"TMIx_FRONT_FACING",
+	"TMIx_BACK_FACING",
+	"TMIx_PRIM_VISIBLE",
+	"TMIx_PRIM_CULLED",
+	"TMIx_PRIM_CLIPPED",
+	"TMIx_PRIM_SAT_CULLED",
+	"TMIx_BIN_ALLOC_INIT",
+	"TMIx_BIN_ALLOC_OVERFLOW",
+	"TMIx_BUS_READ",
+	"",
+	"TMIx_BUS_WRITE",
+	"TMIx_LOADING_DESC",
+	"TMIx_IDVS_POS_SHAD_REQ",
+	"TMIx_IDVS_POS_SHAD_WAIT",
+	"TMIx_IDVS_POS_SHAD_STALL",
+	"TMIx_IDVS_POS_FIFO_FULL",
+	"TMIx_PREFETCH_STALL",
+	"TMIx_VCACHE_HIT",
+	"TMIx_VCACHE_MISS",
+	"TMIx_VCACHE_LINE_WAIT",
+	"TMIx_VFETCH_POS_READ_WAIT",
+	"TMIx_VFETCH_VERTEX_WAIT",
+	"TMIx_VFETCH_STALL",
+	"TMIx_PRIMASSY_STALL",
+	"TMIx_BBOX_GEN_STALL",
+	"TMIx_IDVS_VBU_HIT",
+	"TMIx_IDVS_VBU_MISS",
+	"TMIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TMIx_IDVS_VAR_SHAD_REQ",
+	"TMIx_IDVS_VAR_SHAD_STALL",
+	"TMIx_BINNER_STALL",
+	"TMIx_ITER_STALL",
+	"TMIx_COMPRESS_MISS",
+	"TMIx_COMPRESS_STALL",
+	"TMIx_PCACHE_HIT",
+	"TMIx_PCACHE_MISS",
+	"TMIx_PCACHE_MISS_STALL",
+	"TMIx_PCACHE_EVICT_STALL",
+	"TMIx_PMGR_PTR_WR_STALL",
+	"TMIx_PMGR_PTR_RD_STALL",
+	"TMIx_PMGR_CMD_WR_STALL",
+	"TMIx_WRBUF_ACTIVE",
+	"TMIx_WRBUF_HIT",
+	"TMIx_WRBUF_MISS",
+	"TMIx_WRBUF_NO_FREE_LINE_STALL",
+	"TMIx_WRBUF_NO_AXI_ID_STALL",
+	"TMIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TMIx_UTLB_TRANS",
+	"TMIx_UTLB_TRANS_HIT",
+	"TMIx_UTLB_TRANS_STALL",
+	"TMIx_UTLB_TRANS_MISS_DELAY",
+	"TMIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_FRAG_ACTIVE",
+	"TMIx_FRAG_PRIMITIVES",
+	"TMIx_FRAG_PRIM_RAST",
+	"TMIx_FRAG_FPK_ACTIVE",
+	"TMIx_FRAG_STARVING",
+	"TMIx_FRAG_WARPS",
+	"TMIx_FRAG_PARTIAL_WARPS",
+	"TMIx_FRAG_QUADS_RAST",
+	"TMIx_FRAG_QUADS_EZS_TEST",
+	"TMIx_FRAG_QUADS_EZS_UPDATE",
+	"TMIx_FRAG_QUADS_EZS_KILL",
+	"TMIx_FRAG_LZS_TEST",
+	"TMIx_FRAG_LZS_KILL",
+	"",
+	"TMIx_FRAG_PTILES",
+	"TMIx_FRAG_TRANS_ELIM",
+	"TMIx_QUAD_FPK_KILLER",
+	"",
+	"TMIx_COMPUTE_ACTIVE",
+	"TMIx_COMPUTE_TASKS",
+	"TMIx_COMPUTE_WARPS",
+	"TMIx_COMPUTE_STARVING",
+	"TMIx_EXEC_CORE_ACTIVE",
+	"TMIx_EXEC_ACTIVE",
+	"TMIx_EXEC_INSTR_COUNT",
+	"TMIx_EXEC_INSTR_DIVERGED",
+	"TMIx_EXEC_INSTR_STARVING",
+	"TMIx_ARITH_INSTR_SINGLE_FMA",
+	"TMIx_ARITH_INSTR_DOUBLE",
+	"TMIx_ARITH_INSTR_MSG",
+	"TMIx_ARITH_INSTR_MSG_ONLY",
+	"TMIx_TEX_INSTR",
+	"TMIx_TEX_INSTR_MIPMAP",
+	"TMIx_TEX_INSTR_COMPRESSED",
+	"TMIx_TEX_INSTR_3D",
+	"TMIx_TEX_INSTR_TRILINEAR",
+	"TMIx_TEX_COORD_ISSUE",
+	"TMIx_TEX_COORD_STALL",
+	"TMIx_TEX_STARVE_CACHE",
+	"TMIx_TEX_STARVE_FILTER",
+	"TMIx_LS_MEM_READ_FULL",
+	"TMIx_LS_MEM_READ_SHORT",
+	"TMIx_LS_MEM_WRITE_FULL",
+	"TMIx_LS_MEM_WRITE_SHORT",
+	"TMIx_LS_MEM_ATOMIC",
+	"TMIx_VARY_INSTR",
+	"TMIx_VARY_SLOT_32",
+	"TMIx_VARY_SLOT_16",
+	"TMIx_ATTR_INSTR",
+	"TMIx_ARITH_INSTR_FP_MUL",
+	"TMIx_BEATS_RD_FTC",
+	"TMIx_BEATS_RD_FTC_EXT",
+	"TMIx_BEATS_RD_LSC",
+	"TMIx_BEATS_RD_LSC_EXT",
+	"TMIx_BEATS_RD_TEX",
+	"TMIx_BEATS_RD_TEX_EXT",
+	"TMIx_BEATS_RD_OTHER",
+	"TMIx_BEATS_WR_LSC",
+	"TMIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_RD_MSG_IN",
+	"TMIx_L2_RD_MSG_IN_STALL",
+	"TMIx_L2_WR_MSG_IN",
+	"TMIx_L2_WR_MSG_IN_STALL",
+	"TMIx_L2_SNP_MSG_IN",
+	"TMIx_L2_SNP_MSG_IN_STALL",
+	"TMIx_L2_RD_MSG_OUT",
+	"TMIx_L2_RD_MSG_OUT_STALL",
+	"TMIx_L2_WR_MSG_OUT",
+	"TMIx_L2_ANY_LOOKUP",
+	"TMIx_L2_READ_LOOKUP",
+	"TMIx_L2_WRITE_LOOKUP",
+	"TMIx_L2_EXT_SNOOP_LOOKUP",
+	"TMIx_L2_EXT_READ",
+	"TMIx_L2_EXT_READ_NOSNP",
+	"TMIx_L2_EXT_READ_UNIQUE",
+	"TMIx_L2_EXT_READ_BEATS",
+	"TMIx_L2_EXT_AR_STALL",
+	"TMIx_L2_EXT_AR_CNT_Q1",
+	"TMIx_L2_EXT_AR_CNT_Q2",
+	"TMIx_L2_EXT_AR_CNT_Q3",
+	"TMIx_L2_EXT_RRESP_0_127",
+	"TMIx_L2_EXT_RRESP_128_191",
+	"TMIx_L2_EXT_RRESP_192_255",
+	"TMIx_L2_EXT_RRESP_256_319",
+	"TMIx_L2_EXT_RRESP_320_383",
+	"TMIx_L2_EXT_WRITE",
+	"TMIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TMIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TMIx_L2_EXT_WRITE_SNP_FULL",
+	"TMIx_L2_EXT_WRITE_SNP_PTL",
+	"TMIx_L2_EXT_WRITE_BEATS",
+	"TMIx_L2_EXT_W_STALL",
+	"TMIx_L2_EXT_AW_CNT_Q1",
+	"TMIx_L2_EXT_AW_CNT_Q2",
+	"TMIx_L2_EXT_AW_CNT_Q3",
+	"TMIx_L2_EXT_SNOOP",
+	"TMIx_L2_EXT_SNOOP_STALL",
+	"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TMIx_L2_EXT_SNOOP_RESP_DATA",
+	"TMIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
new file mode 100644
index 0000000..b6175a5
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TNOX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TNOX_H_
+
+static const char * const hardware_counters_mali_tNOx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_MESSAGES_SENT",
+	"TNOx_MESSAGES_RECEIVED",
+	"TNOx_GPU_ACTIVE",
+	"TNOx_IRQ_ACTIVE",
+	"TNOx_JS0_JOBS",
+	"TNOx_JS0_TASKS",
+	"TNOx_JS0_ACTIVE",
+	"",
+	"TNOx_JS0_WAIT_READ",
+	"TNOx_JS0_WAIT_ISSUE",
+	"TNOx_JS0_WAIT_DEPEND",
+	"TNOx_JS0_WAIT_FINISH",
+	"TNOx_JS1_JOBS",
+	"TNOx_JS1_TASKS",
+	"TNOx_JS1_ACTIVE",
+	"",
+	"TNOx_JS1_WAIT_READ",
+	"TNOx_JS1_WAIT_ISSUE",
+	"TNOx_JS1_WAIT_DEPEND",
+	"TNOx_JS1_WAIT_FINISH",
+	"TNOx_JS2_JOBS",
+	"TNOx_JS2_TASKS",
+	"TNOx_JS2_ACTIVE",
+	"",
+	"TNOx_JS2_WAIT_READ",
+	"TNOx_JS2_WAIT_ISSUE",
+	"TNOx_JS2_WAIT_DEPEND",
+	"TNOx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_TILER_ACTIVE",
+	"TNOx_JOBS_PROCESSED",
+	"TNOx_TRIANGLES",
+	"TNOx_LINES",
+	"TNOx_POINTS",
+	"TNOx_FRONT_FACING",
+	"TNOx_BACK_FACING",
+	"TNOx_PRIM_VISIBLE",
+	"TNOx_PRIM_CULLED",
+	"TNOx_PRIM_CLIPPED",
+	"TNOx_PRIM_SAT_CULLED",
+	"TNOx_BIN_ALLOC_INIT",
+	"TNOx_BIN_ALLOC_OVERFLOW",
+	"TNOx_BUS_READ",
+	"",
+	"TNOx_BUS_WRITE",
+	"TNOx_LOADING_DESC",
+	"TNOx_IDVS_POS_SHAD_REQ",
+	"TNOx_IDVS_POS_SHAD_WAIT",
+	"TNOx_IDVS_POS_SHAD_STALL",
+	"TNOx_IDVS_POS_FIFO_FULL",
+	"TNOx_PREFETCH_STALL",
+	"TNOx_VCACHE_HIT",
+	"TNOx_VCACHE_MISS",
+	"TNOx_VCACHE_LINE_WAIT",
+	"TNOx_VFETCH_POS_READ_WAIT",
+	"TNOx_VFETCH_VERTEX_WAIT",
+	"TNOx_VFETCH_STALL",
+	"TNOx_PRIMASSY_STALL",
+	"TNOx_BBOX_GEN_STALL",
+	"TNOx_IDVS_VBU_HIT",
+	"TNOx_IDVS_VBU_MISS",
+	"TNOx_IDVS_VBU_LINE_DEALLOCATE",
+	"TNOx_IDVS_VAR_SHAD_REQ",
+	"TNOx_IDVS_VAR_SHAD_STALL",
+	"TNOx_BINNER_STALL",
+	"TNOx_ITER_STALL",
+	"TNOx_COMPRESS_MISS",
+	"TNOx_COMPRESS_STALL",
+	"TNOx_PCACHE_HIT",
+	"TNOx_PCACHE_MISS",
+	"TNOx_PCACHE_MISS_STALL",
+	"TNOx_PCACHE_EVICT_STALL",
+	"TNOx_PMGR_PTR_WR_STALL",
+	"TNOx_PMGR_PTR_RD_STALL",
+	"TNOx_PMGR_CMD_WR_STALL",
+	"TNOx_WRBUF_ACTIVE",
+	"TNOx_WRBUF_HIT",
+	"TNOx_WRBUF_MISS",
+	"TNOx_WRBUF_NO_FREE_LINE_STALL",
+	"TNOx_WRBUF_NO_AXI_ID_STALL",
+	"TNOx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TNOx_UTLB_TRANS",
+	"TNOx_UTLB_TRANS_HIT",
+	"TNOx_UTLB_TRANS_STALL",
+	"TNOx_UTLB_TRANS_MISS_DELAY",
+	"TNOx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_FRAG_ACTIVE",
+	"TNOx_FRAG_PRIMITIVES",
+	"TNOx_FRAG_PRIM_RAST",
+	"TNOx_FRAG_FPK_ACTIVE",
+	"TNOx_FRAG_STARVING",
+	"TNOx_FRAG_WARPS",
+	"TNOx_FRAG_PARTIAL_WARPS",
+	"TNOx_FRAG_QUADS_RAST",
+	"TNOx_FRAG_QUADS_EZS_TEST",
+	"TNOx_FRAG_QUADS_EZS_UPDATE",
+	"TNOx_FRAG_QUADS_EZS_KILL",
+	"TNOx_FRAG_LZS_TEST",
+	"TNOx_FRAG_LZS_KILL",
+	"",
+	"TNOx_FRAG_PTILES",
+	"TNOx_FRAG_TRANS_ELIM",
+	"TNOx_QUAD_FPK_KILLER",
+	"",
+	"TNOx_COMPUTE_ACTIVE",
+	"TNOx_COMPUTE_TASKS",
+	"TNOx_COMPUTE_WARPS",
+	"TNOx_COMPUTE_STARVING",
+	"TNOx_EXEC_CORE_ACTIVE",
+	"TNOx_EXEC_ACTIVE",
+	"TNOx_EXEC_INSTR_COUNT",
+	"TNOx_EXEC_INSTR_DIVERGED",
+	"TNOx_EXEC_INSTR_STARVING",
+	"TNOx_ARITH_INSTR_SINGLE_FMA",
+	"TNOx_ARITH_INSTR_DOUBLE",
+	"TNOx_ARITH_INSTR_MSG",
+	"TNOx_ARITH_INSTR_MSG_ONLY",
+	"TNOx_TEX_MSGI_NUM_QUADS",
+	"TNOx_TEX_DFCH_NUM_PASSES",
+	"TNOx_TEX_DFCH_NUM_PASSES_MISS",
+	"TNOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TNOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TNOx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TNOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TNOx_TEX_TFCH_NUM_OPERATIONS",
+	"TNOx_TEX_FILT_NUM_OPERATIONS",
+	"TNOx_LS_MEM_READ_FULL",
+	"TNOx_LS_MEM_READ_SHORT",
+	"TNOx_LS_MEM_WRITE_FULL",
+	"TNOx_LS_MEM_WRITE_SHORT",
+	"TNOx_LS_MEM_ATOMIC",
+	"TNOx_VARY_INSTR",
+	"TNOx_VARY_SLOT_32",
+	"TNOx_VARY_SLOT_16",
+	"TNOx_ATTR_INSTR",
+	"TNOx_ARITH_INSTR_FP_MUL",
+	"TNOx_BEATS_RD_FTC",
+	"TNOx_BEATS_RD_FTC_EXT",
+	"TNOx_BEATS_RD_LSC",
+	"TNOx_BEATS_RD_LSC_EXT",
+	"TNOx_BEATS_RD_TEX",
+	"TNOx_BEATS_RD_TEX_EXT",
+	"TNOx_BEATS_RD_OTHER",
+	"TNOx_BEATS_WR_LSC_WB",
+	"TNOx_BEATS_WR_TIB",
+	"TNOx_BEATS_WR_LSC_OTHER",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TNOx_L2_RD_MSG_IN",
+	"TNOx_L2_RD_MSG_IN_STALL",
+	"TNOx_L2_WR_MSG_IN",
+	"TNOx_L2_WR_MSG_IN_STALL",
+	"TNOx_L2_SNP_MSG_IN",
+	"TNOx_L2_SNP_MSG_IN_STALL",
+	"TNOx_L2_RD_MSG_OUT",
+	"TNOx_L2_RD_MSG_OUT_STALL",
+	"TNOx_L2_WR_MSG_OUT",
+	"TNOx_L2_ANY_LOOKUP",
+	"TNOx_L2_READ_LOOKUP",
+	"TNOx_L2_WRITE_LOOKUP",
+	"TNOx_L2_EXT_SNOOP_LOOKUP",
+	"TNOx_L2_EXT_READ",
+	"TNOx_L2_EXT_READ_NOSNP",
+	"TNOx_L2_EXT_READ_UNIQUE",
+	"TNOx_L2_EXT_READ_BEATS",
+	"TNOx_L2_EXT_AR_STALL",
+	"TNOx_L2_EXT_AR_CNT_Q1",
+	"TNOx_L2_EXT_AR_CNT_Q2",
+	"TNOx_L2_EXT_AR_CNT_Q3",
+	"TNOx_L2_EXT_RRESP_0_127",
+	"TNOx_L2_EXT_RRESP_128_191",
+	"TNOx_L2_EXT_RRESP_192_255",
+	"TNOx_L2_EXT_RRESP_256_319",
+	"TNOx_L2_EXT_RRESP_320_383",
+	"TNOx_L2_EXT_WRITE",
+	"TNOx_L2_EXT_WRITE_NOSNP_FULL",
+	"TNOx_L2_EXT_WRITE_NOSNP_PTL",
+	"TNOx_L2_EXT_WRITE_SNP_FULL",
+	"TNOx_L2_EXT_WRITE_SNP_PTL",
+	"TNOx_L2_EXT_WRITE_BEATS",
+	"TNOx_L2_EXT_W_STALL",
+	"TNOx_L2_EXT_AW_CNT_Q1",
+	"TNOx_L2_EXT_AW_CNT_Q2",
+	"TNOx_L2_EXT_AW_CNT_Q3",
+	"TNOx_L2_EXT_SNOOP",
+	"TNOx_L2_EXT_SNOOP_STALL",
+	"TNOx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TNOx_L2_EXT_SNOOP_RESP_DATA",
+	"TNOx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TNOX_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
new file mode 100644
index 0000000..fb6a143
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+
+static const char * const hardware_counters_mali_tSIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MESSAGES_SENT",
+	"TSIx_MESSAGES_RECEIVED",
+	"TSIx_GPU_ACTIVE",
+	"TSIx_IRQ_ACTIVE",
+	"TSIx_JS0_JOBS",
+	"TSIx_JS0_TASKS",
+	"TSIx_JS0_ACTIVE",
+	"",
+	"TSIx_JS0_WAIT_READ",
+	"TSIx_JS0_WAIT_ISSUE",
+	"TSIx_JS0_WAIT_DEPEND",
+	"TSIx_JS0_WAIT_FINISH",
+	"TSIx_JS1_JOBS",
+	"TSIx_JS1_TASKS",
+	"TSIx_JS1_ACTIVE",
+	"",
+	"TSIx_JS1_WAIT_READ",
+	"TSIx_JS1_WAIT_ISSUE",
+	"TSIx_JS1_WAIT_DEPEND",
+	"TSIx_JS1_WAIT_FINISH",
+	"TSIx_JS2_JOBS",
+	"TSIx_JS2_TASKS",
+	"TSIx_JS2_ACTIVE",
+	"",
+	"TSIx_JS2_WAIT_READ",
+	"TSIx_JS2_WAIT_ISSUE",
+	"TSIx_JS2_WAIT_DEPEND",
+	"TSIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_TILER_ACTIVE",
+	"TSIx_JOBS_PROCESSED",
+	"TSIx_TRIANGLES",
+	"TSIx_LINES",
+	"TSIx_POINTS",
+	"TSIx_FRONT_FACING",
+	"TSIx_BACK_FACING",
+	"TSIx_PRIM_VISIBLE",
+	"TSIx_PRIM_CULLED",
+	"TSIx_PRIM_CLIPPED",
+	"TSIx_PRIM_SAT_CULLED",
+	"TSIx_BIN_ALLOC_INIT",
+	"TSIx_BIN_ALLOC_OVERFLOW",
+	"TSIx_BUS_READ",
+	"",
+	"TSIx_BUS_WRITE",
+	"TSIx_LOADING_DESC",
+	"TSIx_IDVS_POS_SHAD_REQ",
+	"TSIx_IDVS_POS_SHAD_WAIT",
+	"TSIx_IDVS_POS_SHAD_STALL",
+	"TSIx_IDVS_POS_FIFO_FULL",
+	"TSIx_PREFETCH_STALL",
+	"TSIx_VCACHE_HIT",
+	"TSIx_VCACHE_MISS",
+	"TSIx_VCACHE_LINE_WAIT",
+	"TSIx_VFETCH_POS_READ_WAIT",
+	"TSIx_VFETCH_VERTEX_WAIT",
+	"TSIx_VFETCH_STALL",
+	"TSIx_PRIMASSY_STALL",
+	"TSIx_BBOX_GEN_STALL",
+	"TSIx_IDVS_VBU_HIT",
+	"TSIx_IDVS_VBU_MISS",
+	"TSIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TSIx_IDVS_VAR_SHAD_REQ",
+	"TSIx_IDVS_VAR_SHAD_STALL",
+	"TSIx_BINNER_STALL",
+	"TSIx_ITER_STALL",
+	"TSIx_COMPRESS_MISS",
+	"TSIx_COMPRESS_STALL",
+	"TSIx_PCACHE_HIT",
+	"TSIx_PCACHE_MISS",
+	"TSIx_PCACHE_MISS_STALL",
+	"TSIx_PCACHE_EVICT_STALL",
+	"TSIx_PMGR_PTR_WR_STALL",
+	"TSIx_PMGR_PTR_RD_STALL",
+	"TSIx_PMGR_CMD_WR_STALL",
+	"TSIx_WRBUF_ACTIVE",
+	"TSIx_WRBUF_HIT",
+	"TSIx_WRBUF_MISS",
+	"TSIx_WRBUF_NO_FREE_LINE_STALL",
+	"TSIx_WRBUF_NO_AXI_ID_STALL",
+	"TSIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TSIx_UTLB_TRANS",
+	"TSIx_UTLB_TRANS_HIT",
+	"TSIx_UTLB_TRANS_STALL",
+	"TSIx_UTLB_TRANS_MISS_DELAY",
+	"TSIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_FRAG_ACTIVE",
+	"TSIx_FRAG_PRIMITIVES",
+	"TSIx_FRAG_PRIM_RAST",
+	"TSIx_FRAG_FPK_ACTIVE",
+	"TSIx_FRAG_STARVING",
+	"TSIx_FRAG_WARPS",
+	"TSIx_FRAG_PARTIAL_WARPS",
+	"TSIx_FRAG_QUADS_RAST",
+	"TSIx_FRAG_QUADS_EZS_TEST",
+	"TSIx_FRAG_QUADS_EZS_UPDATE",
+	"TSIx_FRAG_QUADS_EZS_KILL",
+	"TSIx_FRAG_LZS_TEST",
+	"TSIx_FRAG_LZS_KILL",
+	"",
+	"TSIx_FRAG_PTILES",
+	"TSIx_FRAG_TRANS_ELIM",
+	"TSIx_QUAD_FPK_KILLER",
+	"",
+	"TSIx_COMPUTE_ACTIVE",
+	"TSIx_COMPUTE_TASKS",
+	"TSIx_COMPUTE_WARPS",
+	"TSIx_COMPUTE_STARVING",
+	"TSIx_EXEC_CORE_ACTIVE",
+	"TSIx_EXEC_ACTIVE",
+	"TSIx_EXEC_INSTR_COUNT",
+	"TSIx_EXEC_INSTR_DIVERGED",
+	"TSIx_EXEC_INSTR_STARVING",
+	"TSIx_ARITH_INSTR_SINGLE_FMA",
+	"TSIx_ARITH_INSTR_DOUBLE",
+	"TSIx_ARITH_INSTR_MSG",
+	"TSIx_ARITH_INSTR_MSG_ONLY",
+	"TSIx_TEX_MSGI_NUM_QUADS",
+	"TSIx_TEX_DFCH_NUM_PASSES",
+	"TSIx_TEX_DFCH_NUM_PASSES_MISS",
+	"TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TSIx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TSIx_TEX_TFCH_NUM_OPERATIONS",
+	"TSIx_TEX_FILT_NUM_OPERATIONS",
+	"TSIx_LS_MEM_READ_FULL",
+	"TSIx_LS_MEM_READ_SHORT",
+	"TSIx_LS_MEM_WRITE_FULL",
+	"TSIx_LS_MEM_WRITE_SHORT",
+	"TSIx_LS_MEM_ATOMIC",
+	"TSIx_VARY_INSTR",
+	"TSIx_VARY_SLOT_32",
+	"TSIx_VARY_SLOT_16",
+	"TSIx_ATTR_INSTR",
+	"TSIx_ARITH_INSTR_FP_MUL",
+	"TSIx_BEATS_RD_FTC",
+	"TSIx_BEATS_RD_FTC_EXT",
+	"TSIx_BEATS_RD_LSC",
+	"TSIx_BEATS_RD_LSC_EXT",
+	"TSIx_BEATS_RD_TEX",
+	"TSIx_BEATS_RD_TEX_EXT",
+	"TSIx_BEATS_RD_OTHER",
+	"TSIx_BEATS_WR_LSC",
+	"TSIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TSIx_L2_RD_MSG_IN",
+	"TSIx_L2_RD_MSG_IN_STALL",
+	"TSIx_L2_WR_MSG_IN",
+	"TSIx_L2_WR_MSG_IN_STALL",
+	"TSIx_L2_SNP_MSG_IN",
+	"TSIx_L2_SNP_MSG_IN_STALL",
+	"TSIx_L2_RD_MSG_OUT",
+	"TSIx_L2_RD_MSG_OUT_STALL",
+	"TSIx_L2_WR_MSG_OUT",
+	"TSIx_L2_ANY_LOOKUP",
+	"TSIx_L2_READ_LOOKUP",
+	"TSIx_L2_WRITE_LOOKUP",
+	"TSIx_L2_EXT_SNOOP_LOOKUP",
+	"TSIx_L2_EXT_READ",
+	"TSIx_L2_EXT_READ_NOSNP",
+	"TSIx_L2_EXT_READ_UNIQUE",
+	"TSIx_L2_EXT_READ_BEATS",
+	"TSIx_L2_EXT_AR_STALL",
+	"TSIx_L2_EXT_AR_CNT_Q1",
+	"TSIx_L2_EXT_AR_CNT_Q2",
+	"TSIx_L2_EXT_AR_CNT_Q3",
+	"TSIx_L2_EXT_RRESP_0_127",
+	"TSIx_L2_EXT_RRESP_128_191",
+	"TSIx_L2_EXT_RRESP_192_255",
+	"TSIx_L2_EXT_RRESP_256_319",
+	"TSIx_L2_EXT_RRESP_320_383",
+	"TSIx_L2_EXT_WRITE",
+	"TSIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TSIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TSIx_L2_EXT_WRITE_SNP_FULL",
+	"TSIx_L2_EXT_WRITE_SNP_PTL",
+	"TSIx_L2_EXT_WRITE_BEATS",
+	"TSIx_L2_EXT_W_STALL",
+	"TSIx_L2_EXT_AW_CNT_Q1",
+	"TSIx_L2_EXT_AW_CNT_Q2",
+	"TSIx_L2_EXT_AW_CNT_Q3",
+	"TSIx_L2_EXT_SNOOP",
+	"TSIx_L2_EXT_SNOOP_STALL",
+	"TSIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TSIx_L2_EXT_SNOOP_RESP_DATA",
+	"TSIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100644
index 0000000..32b9513
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+#define GPU_ID2_VERSION        (GPU_ID2_VERSION_MAJOR | \
+								GPU_ID2_VERSION_MINOR | \
+								GPU_ID2_VERSION_STATUS)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6u, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6u, 1)
+#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7u, 0)
+#define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7u, 3)
+#define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7u, 1)
+#define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7u, 2)
+#define GPU_ID2_PRODUCT_TKAX              GPU_ID2_MODEL_MAKE(8u, 0)
+#define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(8u, 1)
+#define GPU_ID2_PRODUCT_TBOX              GPU_ID2_MODEL_MAKE(8u, 2)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100644
index 0000000..6df0a1c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100644
index 0000000..7045693
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100644
index 0000000..624d078
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,467 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include "mali_kbase_ioctl.h"
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present =
+		((u64) regdump.shader_present_hi << 32) +
+		regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present =
+		((u64) regdump.tiler_present_hi << 32) +
+		regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present =
+		((u64) regdump.l2_present_hi << 32) +
+		regdump.l2_present_lo;
+	gpu_props->raw_props.stack_present =
+		((u64) regdump.stack_present_hi << 32) +
+		regdump.stack_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+}
+
+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)
+{
+	gpu_props->core_props.version_status =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 * Additionally, add non-coherent mode, as this is always supported.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features |
+		COHERENCY_FEATURE_BIT(COHERENCY_NONE);
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
+		gpu_props->thread_props.max_thread_group_split = 0;
+}
+
+static struct {
+	u32 type;
+	size_t offset;
+	int size;
+} gpu_property_mapping[] = {
+#define PROP(name, member) \
+	{KBASE_GPUPROP_ ## name, offsetof(struct mali_base_gpu_props, member), \
+		sizeof(((struct mali_base_gpu_props *)0)->member)}
+	PROP(PRODUCT_ID,                  core_props.product_id),
+	PROP(VERSION_STATUS,              core_props.version_status),
+	PROP(MINOR_REVISION,              core_props.minor_revision),
+	PROP(MAJOR_REVISION,              core_props.major_revision),
+	PROP(GPU_SPEED_MHZ,               core_props.gpu_speed_mhz),
+	PROP(GPU_FREQ_KHZ_MAX,            core_props.gpu_freq_khz_max),
+	PROP(GPU_FREQ_KHZ_MIN,            core_props.gpu_freq_khz_min),
+	PROP(LOG2_PROGRAM_COUNTER_SIZE,   core_props.log2_program_counter_size),
+	PROP(TEXTURE_FEATURES_0,          core_props.texture_features[0]),
+	PROP(TEXTURE_FEATURES_1,          core_props.texture_features[1]),
+	PROP(TEXTURE_FEATURES_2,          core_props.texture_features[2]),
+	PROP(TEXTURE_FEATURES_3,          core_props.texture_features[3]),
+	PROP(GPU_AVAILABLE_MEMORY_SIZE,   core_props.gpu_available_memory_size),
+
+	PROP(L2_LOG2_LINE_SIZE,           l2_props.log2_line_size),
+	PROP(L2_LOG2_CACHE_SIZE,          l2_props.log2_cache_size),
+	PROP(L2_NUM_L2_SLICES,            l2_props.num_l2_slices),
+
+	PROP(TILER_BIN_SIZE_BYTES,        tiler_props.bin_size_bytes),
+	PROP(TILER_MAX_ACTIVE_LEVELS,     tiler_props.max_active_levels),
+
+	PROP(MAX_THREADS,                 thread_props.max_threads),
+	PROP(MAX_WORKGROUP_SIZE,          thread_props.max_workgroup_size),
+	PROP(MAX_BARRIER_SIZE,            thread_props.max_barrier_size),
+	PROP(MAX_REGISTERS,               thread_props.max_registers),
+	PROP(MAX_TASK_QUEUE,              thread_props.max_task_queue),
+	PROP(MAX_THREAD_GROUP_SPLIT,      thread_props.max_thread_group_split),
+	PROP(IMPL_TECH,                   thread_props.impl_tech),
+
+	PROP(RAW_SHADER_PRESENT,          raw_props.shader_present),
+	PROP(RAW_TILER_PRESENT,           raw_props.tiler_present),
+	PROP(RAW_L2_PRESENT,              raw_props.l2_present),
+	PROP(RAW_STACK_PRESENT,           raw_props.stack_present),
+	PROP(RAW_L2_FEATURES,             raw_props.l2_features),
+	PROP(RAW_SUSPEND_SIZE,            raw_props.suspend_size),
+	PROP(RAW_MEM_FEATURES,            raw_props.mem_features),
+	PROP(RAW_MMU_FEATURES,            raw_props.mmu_features),
+	PROP(RAW_AS_PRESENT,              raw_props.as_present),
+	PROP(RAW_JS_PRESENT,              raw_props.js_present),
+	PROP(RAW_JS_FEATURES_0,           raw_props.js_features[0]),
+	PROP(RAW_JS_FEATURES_1,           raw_props.js_features[1]),
+	PROP(RAW_JS_FEATURES_2,           raw_props.js_features[2]),
+	PROP(RAW_JS_FEATURES_3,           raw_props.js_features[3]),
+	PROP(RAW_JS_FEATURES_4,           raw_props.js_features[4]),
+	PROP(RAW_JS_FEATURES_5,           raw_props.js_features[5]),
+	PROP(RAW_JS_FEATURES_6,           raw_props.js_features[6]),
+	PROP(RAW_JS_FEATURES_7,           raw_props.js_features[7]),
+	PROP(RAW_JS_FEATURES_8,           raw_props.js_features[8]),
+	PROP(RAW_JS_FEATURES_9,           raw_props.js_features[9]),
+	PROP(RAW_JS_FEATURES_10,          raw_props.js_features[10]),
+	PROP(RAW_JS_FEATURES_11,          raw_props.js_features[11]),
+	PROP(RAW_JS_FEATURES_12,          raw_props.js_features[12]),
+	PROP(RAW_JS_FEATURES_13,          raw_props.js_features[13]),
+	PROP(RAW_JS_FEATURES_14,          raw_props.js_features[14]),
+	PROP(RAW_JS_FEATURES_15,          raw_props.js_features[15]),
+	PROP(RAW_TILER_FEATURES,          raw_props.tiler_features),
+	PROP(RAW_TEXTURE_FEATURES_0,      raw_props.texture_features[0]),
+	PROP(RAW_TEXTURE_FEATURES_1,      raw_props.texture_features[1]),
+	PROP(RAW_TEXTURE_FEATURES_2,      raw_props.texture_features[2]),
+	PROP(RAW_TEXTURE_FEATURES_3,      raw_props.texture_features[3]),
+	PROP(RAW_GPU_ID,                  raw_props.gpu_id),
+	PROP(RAW_THREAD_MAX_THREADS,      raw_props.thread_max_threads),
+	PROP(RAW_THREAD_MAX_WORKGROUP_SIZE,
+			raw_props.thread_max_workgroup_size),
+	PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
+	PROP(RAW_THREAD_FEATURES,         raw_props.thread_features),
+	PROP(RAW_COHERENCY_MODE,          raw_props.coherency_mode),
+
+	PROP(COHERENCY_NUM_GROUPS,        coherency_info.num_groups),
+	PROP(COHERENCY_NUM_CORE_GROUPS,   coherency_info.num_core_groups),
+	PROP(COHERENCY_COHERENCY,         coherency_info.coherency),
+	PROP(COHERENCY_GROUP_0,           coherency_info.group[0].core_mask),
+	PROP(COHERENCY_GROUP_1,           coherency_info.group[1].core_mask),
+	PROP(COHERENCY_GROUP_2,           coherency_info.group[2].core_mask),
+	PROP(COHERENCY_GROUP_3,           coherency_info.group[3].core_mask),
+	PROP(COHERENCY_GROUP_4,           coherency_info.group[4].core_mask),
+	PROP(COHERENCY_GROUP_5,           coherency_info.group[5].core_mask),
+	PROP(COHERENCY_GROUP_6,           coherency_info.group[6].core_mask),
+	PROP(COHERENCY_GROUP_7,           coherency_info.group[7].core_mask),
+	PROP(COHERENCY_GROUP_8,           coherency_info.group[8].core_mask),
+	PROP(COHERENCY_GROUP_9,           coherency_info.group[9].core_mask),
+	PROP(COHERENCY_GROUP_10,          coherency_info.group[10].core_mask),
+	PROP(COHERENCY_GROUP_11,          coherency_info.group[11].core_mask),
+	PROP(COHERENCY_GROUP_12,          coherency_info.group[12].core_mask),
+	PROP(COHERENCY_GROUP_13,          coherency_info.group[13].core_mask),
+	PROP(COHERENCY_GROUP_14,          coherency_info.group[14].core_mask),
+	PROP(COHERENCY_GROUP_15,          coherency_info.group[15].core_mask),
+
+#undef PROP
+};
+
+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *kprops = &kbdev->gpu_props;
+	struct mali_base_gpu_props *props = &kprops->props;
+	u32 count = ARRAY_SIZE(gpu_property_mapping);
+	u32 i;
+	u32 size = 0;
+	u8 *p;
+
+	for (i = 0; i < count; i++) {
+		/* 4 bytes for the ID, and the size of the property */
+		size += 4 + gpu_property_mapping[i].size;
+	}
+
+	kprops->prop_buffer_size = size;
+	kprops->prop_buffer = kmalloc(size, GFP_KERNEL);
+
+	if (!kprops->prop_buffer) {
+		kprops->prop_buffer_size = 0;
+		return -ENOMEM;
+	}
+
+	p = kprops->prop_buffer;
+
+#define WRITE_U8(v) (*p++ = (v) & 0xFF)
+#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0)
+#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0)
+#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0)
+
+	for (i = 0; i < count; i++) {
+		u32 type = gpu_property_mapping[i].type;
+		u8 type_size;
+		void *field = ((u8 *)props) + gpu_property_mapping[i].offset;
+
+		switch (gpu_property_mapping[i].size) {
+		case 1:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U8;
+			break;
+		case 2:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U16;
+			break;
+		case 4:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U32;
+			break;
+		case 8:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U64;
+			break;
+		default:
+			dev_err(kbdev->dev,
+				"Invalid gpu_property_mapping type=%d size=%d",
+				type, gpu_property_mapping[i].size);
+			return -EINVAL;
+		}
+
+		WRITE_U32((type<<2) | type_size);
+
+		switch (type_size) {
+		case KBASE_GPUPROP_VALUE_SIZE_U8:
+			WRITE_U8(*((u8 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U16:
+			WRITE_U16(*((u16 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U32:
+			WRITE_U32(*((u32 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U64:
+			WRITE_U64(*((u64 *)field));
+			break;
+		default: /* Cannot be reached */
+			WARN_ON(1);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100644
index 0000000..e88af25
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer
+ * @kbdev: The kbase device
+ *
+ * Fills kbdev->gpu_props->prop_buffer with the GPU properties for user
+ * space to read.
+ */
+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value
+ * @gpu_props: the &base_gpu_props structure
+ *
+ * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into
+ * separate fields (version_status, minor_revision, major_revision, product_id)
+ * stored in base_gpu_props::core_props.
+ */
+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props);
+
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100644
index 0000000..10794fc
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 stack_present_lo;
+	u32 stack_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+
+	u32 prop_buffer_size;
+	void *prop_buffer;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100644
index 0000000..89342e1
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,492 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			features = base_hw_features_tHEx;
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			features = base_hw_features_tSIx;
+			break;
+		case GPU_ID2_PRODUCT_TDVX:
+			features = base_hw_features_tDVx;
+			break;
+		case GPU_ID2_PRODUCT_TNOX:
+			features = base_hw_features_tNOx;
+			break;
+		case GPU_ID2_PRODUCT_TGOX:
+			features = base_hw_features_tGOx;
+			break;
+		case GPU_ID2_PRODUCT_TKAX:
+			features = base_hw_features_tKAx;
+			break;
+		case GPU_ID2_PRODUCT_TTRX:
+			features = base_hw_features_tTRx;
+			break;
+		case GPU_ID2_PRODUCT_TBOX:
+			features = base_hw_features_tBOx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+/**
+ * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID
+ * @kbdev: Device pointer
+ *
+ * Return: pointer to an array of hardware issues, terminated by
+ * BASE_HW_ISSUE_END.
+ *
+ * This function can only be used on new-format GPU IDs, i.e. those for which
+ * GPU_ID_IS_NEW_FORMAT evaluates as true. The GPU ID is read from the @kbdev.
+ *
+ * In debugging versions of the driver, unknown versions of a known GPU will
+ * be treated as the most recent known version not later than the actual
+ * version. In such circumstances, the GPU ID in @kbdev will also be replaced
+ * with the most recent known version.
+ *
+ * Note: The GPU configuration must have been read by kbase_gpuprops_get_props()
+ * before calling this function.
+ */
+static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
+					struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues = NULL;
+
+	struct base_hw_product {
+		u32 product_model;
+		struct {
+			u32 version;
+			const enum base_hw_issue *issues;
+		} map[7];
+	};
+
+	static const struct base_hw_product base_hw_products[] = {
+		{GPU_ID2_PRODUCT_TMIX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 1),
+		   base_hw_issues_tMIx_r0p0_05dev0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1},
+		  {U32_MAX /* sentinel value */, NULL} } },
+
+		{GPU_ID2_PRODUCT_THEX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2},
+		  {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TSIX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0},
+		  {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TDVX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TNOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TGOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TKAX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tKAx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TTRX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TBOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBOx_r0p0},
+		  {U32_MAX, NULL} } },
+	};
+
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL;
+	const struct base_hw_product *product = NULL;
+	size_t p;
+
+	/* Stop when we reach the end of the products array. */
+	for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) {
+		if (product_model == base_hw_products[p].product_model) {
+			product = &base_hw_products[p];
+			break;
+		}
+	}
+
+	if (product != NULL) {
+		/* Found a matching product. */
+		const u32 version = gpu_id & GPU_ID2_VERSION;
+#if !MALI_CUSTOMER_RELEASE
+		u32 fallback_version = 0;
+		const enum base_hw_issue *fallback_issues = NULL;
+#endif
+		size_t v;
+
+		/* Stop when we reach the end of the map. */
+		for (v = 0; product->map[v].version != U32_MAX; ++v) {
+
+			if (version == product->map[v].version) {
+				/* Exact match so stop. */
+				issues = product->map[v].issues;
+				break;
+			}
+
+#if !MALI_CUSTOMER_RELEASE
+			/* Check whether this is a candidate for most recent
+				known version not later than the actual
+				version. */
+			if ((version > product->map[v].version) &&
+				(product->map[v].version >= fallback_version)) {
+				fallback_version = product->map[v].version;
+				fallback_issues = product->map[v].issues;
+			}
+#endif
+		}
+
+#if !MALI_CUSTOMER_RELEASE
+		if ((issues == NULL) && (fallback_issues != NULL)) {
+			/* Fall back to the issue set of the most recent known
+				version not later than the actual version. */
+			issues = fallback_issues;
+
+			dev_info(kbdev->dev,
+				"r%dp%d status %d is unknown; treating as r%dp%d status %d",
+				(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+					GPU_ID2_VERSION_MAJOR_SHIFT,
+				(gpu_id & GPU_ID2_VERSION_MINOR) >>
+					GPU_ID2_VERSION_MINOR_SHIFT,
+				(gpu_id & GPU_ID2_VERSION_STATUS) >>
+					GPU_ID2_VERSION_STATUS_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_MAJOR) >>
+					GPU_ID2_VERSION_MAJOR_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_MINOR) >>
+					GPU_ID2_VERSION_MINOR_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_STATUS) >>
+					GPU_ID2_VERSION_STATUS_SHIFT);
+
+			gpu_id &= ~GPU_ID2_VERSION;
+			gpu_id |= fallback_version;
+			kbdev->gpu_props.props.raw_props.gpu_id = gpu_id;
+
+			kbase_gpuprops_update_core_props_gpu_id(
+				&kbdev->gpu_props.props);
+		}
+#endif
+	}
+	return issues;
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			issues = kbase_hw_get_issues_for_new_id(kbdev);
+			if (issues == NULL) {
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+
+#if !MALI_CUSTOMER_RELEASE
+			/* The GPU ID might have been replaced with the last
+			   known version of the same GPU. */
+			gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+#endif
+
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			case GPU_ID2_PRODUCT_THEX:
+				issues = base_hw_issues_model_tHEx;
+				break;
+			case GPU_ID2_PRODUCT_TSIX:
+				issues = base_hw_issues_model_tSIx;
+				break;
+			case GPU_ID2_PRODUCT_TDVX:
+				issues = base_hw_issues_model_tDVx;
+				break;
+			case GPU_ID2_PRODUCT_TNOX:
+				issues = base_hw_issues_model_tNOx;
+				break;
+			case GPU_ID2_PRODUCT_TGOX:
+				issues = base_hw_issues_model_tGOx;
+				break;
+			case GPU_ID2_PRODUCT_TKAX:
+				issues = base_hw_issues_model_tKAx;
+				break;
+			case GPU_ID2_PRODUCT_TTRX:
+				issues = base_hw_issues_model_tTRx;
+				break;
+			case GPU_ID2_PRODUCT_TBOX:
+				issues = base_hw_issues_model_tBOx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		dev_info(kbdev->dev,
+			"GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d",
+			(gpu_id & GPU_ID2_PRODUCT_MAJOR) >>
+				GPU_ID2_PRODUCT_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_MAJOR) >>
+				GPU_ID2_ARCH_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_MINOR) >>
+				GPU_ID2_ARCH_MINOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_REV) >>
+				GPU_ID2_ARCH_REV_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+				GPU_ID2_VERSION_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_MINOR) >>
+				GPU_ID2_VERSION_MINOR_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_STATUS) >>
+				GPU_ID2_VERSION_STATUS_SHIFT);
+	} else {
+		dev_info(kbdev->dev,
+			"GPU identified as 0x%04x r%dp%d status %d",
+			(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+				GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+			(gpu_id & GPU_ID_VERSION_MAJOR) >>
+				GPU_ID_VERSION_MAJOR_SHIFT,
+			(gpu_id & GPU_ID_VERSION_MINOR) >>
+				GPU_ID_VERSION_MINOR_SHIFT,
+			(gpu_id & GPU_ID_VERSION_STATUS) >>
+				GPU_ID_VERSION_STATUS_SHIFT);
+	}
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100644
index 0000000..754250c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID
+ * @kbdev: Device pointer
+ *
+ * Return: 0 if the GPU ID was recognized, otherwise -EINVAL.
+ *
+ * The GPU ID is read from the @kbdev.
+ *
+ * In debugging versions of the driver, unknown versions of a known GPU with a
+ * new-format ID will be treated as the most recent known version not later
+ * than the actual version. In such circumstances, the GPU ID in @kbdev will
+ * also be replaced with the most recent known version.
+ *
+ * Note: The GPU configuration must have been read by
+ * kbase_gpuprops_get_props() before calling this function.
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100644
index 0000000..b09be99
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100644
index 0000000..0acf297
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/* The hwaccess_lock (a spinlock) must be held when accessing this structure */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx;
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100644
index 0000000..cf8a813
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100644
index 0000000..5de2b75
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @setup:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100644
index 0000000..750fda2
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,381 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_backend_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_find_and_release_free_address_space() - Release a free AS
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * This function can evict an idle context from the runpool, freeing up the
+ * address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_ctx_sched_release()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned.
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_cacheclean - Perform a cache clean if the given atom requires
+ *                            one
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the failed atom
+ *
+ * On some GPUs, the GPU cache must be cleaned following a failed atom. This
+ * function performs a clean if it is required by @katom.
+ */
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom);
+
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ * @affinity:      Affinity of atom
+ * @coreref_state: Coreref state of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ *				  @js
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg updating timeouts of
+ * currently running atoms).
+ */
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs and don't emit messages into
+ * the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ */
+void kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset.
+ */
+bool kbase_reset_gpu_active(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+extern struct protected_mode_ops kbase_native_protected_ops;
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100644
index 0000000..bae7c0d
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+int set_policy_by_name(struct kbase_device *kbdev, const char *name);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100644
index 0000000..89d26ea
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100644
index 0000000..cf7bf1b
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
new file mode 100644
index 0000000..041c61a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -0,0 +1,668 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_IOCTL_H_
+#define _KBASE_IOCTL_H_
+
+#ifdef __cpluscplus
+extern "C" {
+#endif
+
+#include <linux/types.h>
+
+#define KBASE_IOCTL_TYPE 0x80
+
+#define BASE_UK_VERSION_MAJOR 11
+#define BASE_UK_VERSION_MINOR 0
+
+#ifdef ANDROID
+/* Android's definition of ioctl is incorrect, specifying the type argument as
+ * 'int'. This creates a warning when using _IOWR (as the top bit is set). Work
+ * round this by redefining _IOC to include a case to 'int'.
+ */
+#undef _IOC
+#define _IOC(dir, type, nr, size) \
+	((int)(((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | \
+	((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT)))
+#endif
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility with kernel
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+/**
+ * struct kbase_ioctl_set_flags - Set kernel context creation flags
+ *
+ * @create_flags: Flags - see base_context_create_flags
+ */
+struct kbase_ioctl_set_flags {
+	__u32 create_flags;
+};
+
+#define KBASE_IOCTL_SET_FLAGS \
+	_IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
+
+/**
+ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
+ *
+ * @addr: Memory address of an array of struct base_jd_atom_v2
+ * @nr_atoms: Number of entries in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ */
+struct kbase_ioctl_job_submit {
+	__u64 addr;
+	__u32 nr_atoms;
+	__u32 stride;
+};
+
+#define KBASE_IOCTL_JOB_SUBMIT \
+	_IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
+
+/**
+ * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
+ *
+ * @buffer: Pointer to the buffer to store properties into
+ * @size: Size of the buffer
+ * @flags: Flags - must be zero for now
+ *
+ * The ioctl will return the number of bytes stored into @buffer or an error
+ * on failure (e.g. @size is too small). If @size is specified as 0 then no
+ * data will be written but the return value will be the number of bytes needed
+ * for all the properties.
+ *
+ * @flags may be used in the future to request a different format for the
+ * buffer. With @flags == 0 the following format is used.
+ *
+ * The buffer will be filled with pairs of values, a u32 key identifying the
+ * property followed by the value. The size of the value is identified using
+ * the bottom bits of the key. The value then immediately followed the key and
+ * is tightly packed (there is no padding). All keys and values are
+ * little-endian.
+ *
+ * 00 = u8
+ * 01 = u16
+ * 10 = u32
+ * 11 = u64
+ */
+struct kbase_ioctl_get_gpuprops {
+	__u64 buffer;
+	__u32 size;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_GET_GPUPROPS \
+	_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
+
+#define KBASE_IOCTL_POST_TERM \
+	_IO(KBASE_IOCTL_TYPE, 4)
+
+/**
+ * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
+ *
+ * @va_pages: The number of pages of virtual address space to reserve
+ * @commit_pages: The number of physical pages to allocate
+ * @extent: The number of extra pages to allocate on each GPU fault which grows
+ *          the region
+ * @flags: Flags
+ * @gpu_va: The GPU virtual address which is allocated
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_alloc {
+	struct {
+		__u64 va_pages;
+		__u64 commit_pages;
+		__u64 extent;
+		__u64 flags;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALLOC \
+	_IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
+
+/**
+ * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
+ * @gpu_addr: A GPU address contained within the region
+ * @query: The type of query
+ * @value: The result of the query
+ *
+ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_query {
+	struct {
+		__u64 gpu_addr;
+		__u64 query;
+	} in;
+	struct {
+		__u64 value;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_QUERY \
+	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
+
+#define KBASE_MEM_QUERY_COMMIT_SIZE	1
+#define KBASE_MEM_QUERY_VA_SIZE		2
+#define KBASE_MEM_QUERY_FLAGS		3
+
+/**
+ * struct kbase_ioctl_mem_free - Free a memory region
+ * @gpu_addr: Handle to the region to free
+ */
+struct kbase_ioctl_mem_free {
+	__u64 gpu_addr;
+};
+
+#define KBASE_IOCTL_MEM_FREE \
+	_IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
+
+/**
+ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error
+ */
+struct kbase_ioctl_hwcnt_reader_setup {
+	__u32 buffer_count;
+	__u32 jm_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_READER_SETUP \
+	_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
+
+/**
+ * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
+ * @dump_buffer:  GPU address to write counters to
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ */
+struct kbase_ioctl_hwcnt_enable {
+	__u64 dump_buffer;
+	__u32 jm_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_ENABLE \
+	_IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable)
+
+#define KBASE_IOCTL_HWCNT_DUMP \
+	_IO(KBASE_IOCTL_TYPE, 10)
+
+#define KBASE_IOCTL_HWCNT_CLEAR \
+	_IO(KBASE_IOCTL_TYPE, 11)
+
+/**
+ * struct kbase_ioctl_disjoint_query - Query the disjoint counter
+ * @counter:   A counter of disjoint events in the kernel
+ */
+struct kbase_ioctl_disjoint_query {
+	__u32 counter;
+};
+
+#define KBASE_IOCTL_DISJOINT_QUERY \
+	_IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
+
+/**
+ * struct kbase_ioctl_get_ddk_version - Query the kernel version
+ * @version_buffer: Buffer to receive the kernel version string
+ * @size: Size of the buffer
+ * @padding: Padding
+ *
+ * The ioctl will return the number of bytes written into version_buffer
+ * (which includes a NULL byte) or a negative error code
+ */
+struct kbase_ioctl_get_ddk_version {
+	__u64 version_buffer;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_DDK_VERSION \
+	_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
+
+/**
+ * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator
+ *
+ * @va_pages: Number of VA pages to reserve for JIT
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ */
+struct kbase_ioctl_mem_jit_init {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
+
+/**
+ * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
+ *
+ * @handle: GPU memory handle (GPU VA)
+ * @user_addr: The address where it is mapped in user space
+ * @size: The number of bytes to synchronise
+ * @type: The direction to synchronise: 0 is sync to memory (clean),
+ * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants.
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_mem_sync {
+	__u64 handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_MEM_SYNC \
+	_IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
+
+/**
+ * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
+ *
+ * @gpu_addr: The GPU address of the memory region
+ * @cpu_addr: The CPU address to locate
+ * @size: A size in bytes to validate is contained within the region
+ * @offset: The offset from the start of the memory region to @cpu_addr
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_find_cpu_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 cpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
+
+/**
+ * struct kbase_ioctl_get_context_id - Get the kernel context ID
+ *
+ * @id: The kernel context ID
+ */
+struct kbase_ioctl_get_context_id {
+	__u32 id;
+};
+
+#define KBASE_IOCTL_GET_CONTEXT_ID \
+	_IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
+
+/**
+ * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
+ *
+ * @flags: Flags
+ *
+ * The ioctl returns a file descriptor when successful
+ */
+struct kbase_ioctl_tlstream_acquire {
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_TLSTREAM_ACQUIRE \
+	_IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
+
+#define KBASE_IOCTL_TLSTREAM_FLUSH \
+	_IO(KBASE_IOCTL_TYPE, 19)
+
+/**
+ * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
+ *
+ * @gpu_addr: The memory region to modify
+ * @pages:    The number of physical pages that should be present
+ *
+ * The ioctl may return on the following error codes or 0 for success:
+ *   -ENOMEM: Out of memory
+ *   -EINVAL: Invalid arguments
+ */
+struct kbase_ioctl_mem_commit {
+	__u64 gpu_addr;
+	__u64 pages;
+};
+
+#define KBASE_IOCTL_MEM_COMMIT \
+	_IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
+
+/**
+ * union kbase_ioctl_mem_alias - Create an alias of memory regions
+ * @flags: Flags, see BASE_MEM_xxx
+ * @stride: Bytes between start of each memory region
+ * @nents: The number of regions to pack together into the alias
+ * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info
+ * @gpu_va: Address of the new alias
+ * @va_pages: Size of the new alias
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_alias {
+	struct {
+		__u64 flags;
+		__u64 stride;
+		__u64 nents;
+		__u64 aliasing_info;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALIAS \
+	_IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
+
+/**
+ * union kbase_ioctl_mem_import - Import memory for use by the GPU
+ * @flags: Flags, see BASE_MEM_xxx
+ * @phandle: Handle to the external memory
+ * @type: Type of external memory, see base_mem_import_type
+ * @padding: Amount of extra VA pages to append to the imported buffer
+ * @gpu_va: Address of the new alias
+ * @va_pages: Size of the new alias
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_import {
+	struct {
+		__u64 flags;
+		__u64 phandle;
+		__u32 type;
+		__u32 padding;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_IMPORT \
+	_IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
+
+/**
+ * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
+ * @gpu_va: The GPU region to modify
+ * @flags: The new flags to set
+ * @mask: Mask of the flags to modify
+ */
+struct kbase_ioctl_mem_flags_change {
+	__u64 gpu_va;
+	__u64 flags;
+	__u64 mask;
+};
+
+#define KBASE_IOCTL_MEM_FLAGS_CHANGE \
+	_IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
+
+/**
+ * struct kbase_ioctl_stream_create - Create a synchronisation stream
+ * @name: A name to identify this stream. Must be NULL-terminated.
+ *
+ * Note that this is also called a "timeline", but is named stream to avoid
+ * confusion with other uses of the word.
+ *
+ * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes.
+ *
+ * The ioctl returns a file descriptor.
+ */
+struct kbase_ioctl_stream_create {
+	char name[32];
+};
+
+#define KBASE_IOCTL_STREAM_CREATE \
+	_IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
+
+/**
+ * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
+ * @fd: The file descriptor to validate
+ */
+struct kbase_ioctl_fence_validate {
+	int fd;
+};
+
+#define KBASE_IOCTL_FENCE_VALIDATE \
+	_IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
+
+/**
+ * struct kbase_ioctl_get_profiling_controls - Get the profiling controls
+ * @count: The size of @buffer in u32 words
+ * @buffer: The buffer to receive the profiling controls
+ * @padding: Padding
+ */
+struct kbase_ioctl_get_profiling_controls {
+	__u64 buffer;
+	__u32 count;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_PROFILING_CONTROLS \
+	_IOW(KBASE_IOCTL_TYPE, 26, struct kbase_ioctl_get_profiling_controls)
+
+/**
+ * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
+ * @buffer: Pointer to the information
+ * @len: Length
+ * @padding: Padding
+ *
+ * The data provided is accessible through a debugfs file
+ */
+struct kbase_ioctl_mem_profile_add {
+	__u64 buffer;
+	__u32 len;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_MEM_PROFILE_ADD \
+	_IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
+
+/**
+ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
+ * @event: GPU address of the event which has been updated
+ * @new_status: The new status to set
+ * @flags: Flags for future expansion
+ */
+struct kbase_ioctl_soft_event_update {
+	__u64 event;
+	__u32 new_status;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
+	_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
+
+/* IOCTLs 29-32 are reserved */
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
+
+/**
+ * struct kbase_ioctl_tlstream_test - Start a timeline stream test
+ *
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ */
+struct kbase_ioctl_tlstream_test {
+	__u32 tpw_count;
+	__u32 msg_delay;
+	__u32 msg_count;
+	__u32 aux_msg;
+};
+
+#define KBASE_IOCTL_TLSTREAM_TEST \
+	_IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test)
+
+/**
+ * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ */
+struct kbase_ioctl_tlstream_stats {
+	__u32 bytes_collected;
+	__u32 bytes_generated;
+};
+
+#define KBASE_IOCTL_TLSTREAM_STATS \
+	_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
+
+#endif
+
+/**********************************
+ * Definitions for GPU properties *
+ **********************************/
+#define KBASE_GPUPROP_VALUE_SIZE_U8	(0x0)
+#define KBASE_GPUPROP_VALUE_SIZE_U16	(0x1)
+#define KBASE_GPUPROP_VALUE_SIZE_U32	(0x2)
+#define KBASE_GPUPROP_VALUE_SIZE_U64	(0x3)
+
+#define KBASE_GPUPROP_PRODUCT_ID			1
+#define KBASE_GPUPROP_VERSION_STATUS			2
+#define KBASE_GPUPROP_MINOR_REVISION			3
+#define KBASE_GPUPROP_MAJOR_REVISION			4
+#define KBASE_GPUPROP_GPU_SPEED_MHZ			5
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX			6
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MIN			7
+#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE		8
+#define KBASE_GPUPROP_TEXTURE_FEATURES_0		9
+#define KBASE_GPUPROP_TEXTURE_FEATURES_1		10
+#define KBASE_GPUPROP_TEXTURE_FEATURES_2		11
+#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE		12
+
+#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE			13
+#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE		14
+#define KBASE_GPUPROP_L2_NUM_L2_SLICES			15
+
+#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES		16
+#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS		17
+
+#define KBASE_GPUPROP_MAX_THREADS			18
+#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE		19
+#define KBASE_GPUPROP_MAX_BARRIER_SIZE			20
+#define KBASE_GPUPROP_MAX_REGISTERS			21
+#define KBASE_GPUPROP_MAX_TASK_QUEUE			22
+#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT		23
+#define KBASE_GPUPROP_IMPL_TECH				24
+
+#define KBASE_GPUPROP_RAW_SHADER_PRESENT		25
+#define KBASE_GPUPROP_RAW_TILER_PRESENT			26
+#define KBASE_GPUPROP_RAW_L2_PRESENT			27
+#define KBASE_GPUPROP_RAW_STACK_PRESENT			28
+#define KBASE_GPUPROP_RAW_L2_FEATURES			29
+#define KBASE_GPUPROP_RAW_SUSPEND_SIZE			30
+#define KBASE_GPUPROP_RAW_MEM_FEATURES			31
+#define KBASE_GPUPROP_RAW_MMU_FEATURES			32
+#define KBASE_GPUPROP_RAW_AS_PRESENT			33
+#define KBASE_GPUPROP_RAW_JS_PRESENT			34
+#define KBASE_GPUPROP_RAW_JS_FEATURES_0			35
+#define KBASE_GPUPROP_RAW_JS_FEATURES_1			36
+#define KBASE_GPUPROP_RAW_JS_FEATURES_2			37
+#define KBASE_GPUPROP_RAW_JS_FEATURES_3			38
+#define KBASE_GPUPROP_RAW_JS_FEATURES_4			39
+#define KBASE_GPUPROP_RAW_JS_FEATURES_5			40
+#define KBASE_GPUPROP_RAW_JS_FEATURES_6			41
+#define KBASE_GPUPROP_RAW_JS_FEATURES_7			42
+#define KBASE_GPUPROP_RAW_JS_FEATURES_8			43
+#define KBASE_GPUPROP_RAW_JS_FEATURES_9			44
+#define KBASE_GPUPROP_RAW_JS_FEATURES_10		45
+#define KBASE_GPUPROP_RAW_JS_FEATURES_11		46
+#define KBASE_GPUPROP_RAW_JS_FEATURES_12		47
+#define KBASE_GPUPROP_RAW_JS_FEATURES_13		48
+#define KBASE_GPUPROP_RAW_JS_FEATURES_14		49
+#define KBASE_GPUPROP_RAW_JS_FEATURES_15		50
+#define KBASE_GPUPROP_RAW_TILER_FEATURES		51
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0		52
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1		53
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2		54
+#define KBASE_GPUPROP_RAW_GPU_ID			55
+#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS		56
+#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE	57
+#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE	58
+#define KBASE_GPUPROP_RAW_THREAD_FEATURES		59
+#define KBASE_GPUPROP_RAW_COHERENCY_MODE		60
+
+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS		61
+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS		62
+#define KBASE_GPUPROP_COHERENCY_COHERENCY		63
+#define KBASE_GPUPROP_COHERENCY_GROUP_0			64
+#define KBASE_GPUPROP_COHERENCY_GROUP_1			65
+#define KBASE_GPUPROP_COHERENCY_GROUP_2			66
+#define KBASE_GPUPROP_COHERENCY_GROUP_3			67
+#define KBASE_GPUPROP_COHERENCY_GROUP_4			68
+#define KBASE_GPUPROP_COHERENCY_GROUP_5			69
+#define KBASE_GPUPROP_COHERENCY_GROUP_6			70
+#define KBASE_GPUPROP_COHERENCY_GROUP_7			71
+#define KBASE_GPUPROP_COHERENCY_GROUP_8			72
+#define KBASE_GPUPROP_COHERENCY_GROUP_9			73
+#define KBASE_GPUPROP_COHERENCY_GROUP_10		74
+#define KBASE_GPUPROP_COHERENCY_GROUP_11		75
+#define KBASE_GPUPROP_COHERENCY_GROUP_12		76
+#define KBASE_GPUPROP_COHERENCY_GROUP_13		77
+#define KBASE_GPUPROP_COHERENCY_GROUP_14		78
+#define KBASE_GPUPROP_COHERENCY_GROUP_15		79
+
+#define KBASE_GPUPROP_TEXTURE_FEATURES_3		80
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3		81
+
+#ifdef __cpluscplus
+}
+#endif
+
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100644
index 0000000..aa545fd
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1607 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tlstream.h>
+
+#include "mali_kbase_dma_fence.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const u64 p)
+{
+#ifdef CONFIG_COMPAT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return compat_ptr(p);
+#endif
+	return u64_to_user_ptr(p);
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#if defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.dma_fence_resv_count = 0,
+	};
+#ifdef CONFIG_SYNC
+	/*
+	 * When both dma-buf fence and Android native sync is enabled, we
+	 * disable dma-buf fence for contexts that are using Android native
+	 * fences.
+	 */
+	const bool implicit_sync = !kbase_ctx_flag(katom->kctx,
+						   KCTX_NO_IMPLICIT_SYNC);
+#else /* CONFIG_SYNC */
+	const bool implicit_sync = true;
+#endif /* CONFIG_SYNC */
+#endif /* CONFIG_MALI_DMA_FENCE */
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		info.resv_objs = kmalloc_array(katom->nr_extres,
+					sizeof(struct reservation_object *),
+					GFP_KERNEL);
+		if (!info.resv_objs) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+
+		info.dma_fence_excl_bitmap =
+				kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					sizeof(unsigned long), GFP_KERNEL);
+		if (!info.dma_fence_excl_bitmap) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
+
+		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (implicit_sync &&
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct reservation_object *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock now */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		if (info.dma_fence_resv_count) {
+			int ret;
+
+			ret = kbase_dma_fence_wait(katom, &info);
+			if (ret < 0)
+				goto failed_dma_fence_setup;
+		}
+
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d, bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		struct kbase_jd_atom *other_dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		other_dep_atom = (struct kbase_jd_atom *)
+			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
+
+		if (!dep_atom->in_jd_list && (!other_dep_atom ||
+				(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
+				!dep_atom->will_fail_event_code &&
+				!other_dep_atom->will_fail_event_code))) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = kbase_fence_dep_count_read(dep_atom);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+			if (dep_satisfied) {
+				dep_atom->in_jd_list = true;
+				list_add_tail(&dep_atom->jd_item, out_list);
+			}
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/**
+ * is_dep_valid - Validate that a dependency is valid for early dependency
+ *                submission
+ * @katom: Dependency atom to validate
+ *
+ * A dependency is valid if any of the following are true :
+ * - It does not exist (a non-existent dependency does not block submission)
+ * - It is in the job scheduler
+ * - It has completed, does not have a failure event code, and has not been
+ *   marked to fail in the future
+ *
+ * Return: true if valid, false otherwise
+ */
+static bool is_dep_valid(struct kbase_jd_atom *katom)
+{
+	/* If there's no dependency then this is 'valid' from the perspective of
+	 * early dependency submission */
+	if (!katom)
+		return true;
+
+	/* Dependency must have reached the job scheduler */
+	if (katom->status < KBASE_JD_ATOM_STATE_IN_JS)
+		return false;
+
+	/* If dependency has completed and has failed or will fail then it is
+	 * not valid */
+	if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+			(katom->event_code != BASE_JD_EVENT_DONE ||
+			katom->will_fail_event_code))
+		return false;
+
+	return true;
+}
+
+static void jd_try_submitting_deps(struct list_head *out_list,
+		struct kbase_jd_atom *node)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *pos;
+
+		list_for_each(pos, &node->dep_head[i]) {
+			struct kbase_jd_atom *dep_atom = list_entry(pos,
+					struct kbase_jd_atom, dep_item[i]);
+
+			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
+				/*Check if atom deps look sane*/
+				bool dep0_valid = is_dep_valid(
+						dep_atom->dep[0].atom);
+				bool dep1_valid = is_dep_valid(
+						dep_atom->dep[1].atom);
+				bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+				int dep_count;
+
+				dep_count = kbase_fence_dep_count_read(
+								dep_atom);
+				if (likely(dep_count == -1)) {
+					dep_satisfied = true;
+				} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+					dep_satisfied = false;
+				}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+				if (dep0_valid && dep1_valid && dep_satisfied) {
+					dep_atom->in_jd_list = true;
+					list_add(&dep_atom->jd_item, out_list);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->jd_item, &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item);
+		list_del(completed_jobs.prev);
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+					kbase_ctx_flag(kctx, KCTX_DYING));
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(runnable_jobs.next);
+			node->in_jd_list = false;
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE) ==
+					BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait soft job
+					 * then remove it from the list of sync
+					 * waiters.
+					 */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						kbasep_remove_waiting_soft_job(node);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				list_add_tail(&node->jd_item, &completed_jobs);
+			} else if (node->status == KBASE_JD_ATOM_STATE_IN_JS &&
+					!node->will_fail_event_code) {
+				/* Node successfully submitted, try submitting
+				 * dependencies as they may now be representable
+				 * in JS */
+				jd_try_submitting_deps(&runnable_jobs, node);
+			}
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->jd_item, completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	katom->start_timestamp.tv64 = 0;
+#else
+	katom->start_timestamp = 0;
+#endif
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = user_atom->core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->pre_dep = NULL;
+	katom->post_dep = NULL;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
+
+	/* Implicitly sets katom->protected_state.enter as well. */
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+	katom->age = kctx->age_count++;
+
+	INIT_LIST_HEAD(&katom->jd_item);
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_fence_dep_count_set(katom, -1);
+#endif
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				KBASE_TLSTREAM_TL_NEW_ATOM(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				KBASE_TLSTREAM_TL_RET_ATOM_CTX(
+						katom, kctx);
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+						TL_ATOM_STATE_IDLE);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom is going through soft replay or
+			 * will be sent back to user space. Do not record any
+			 * dependencies. */
+			KBASE_TLSTREAM_TL_NEW_ATOM(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+			KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+					TL_ATOM_STATE_IDLE);
+
+			if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	/* Create a new atom recording all dependencies it was set up with. */
+	KBASE_TLSTREAM_TL_NEW_ATOM(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority);
+	KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+	for (i = 0; i < 2; i++)
+		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+					&katom->dep[i])) {
+			KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(
+					(void *)kbase_jd_katom_dep_atom(
+						&katom->dep[i]),
+					(void *)katom);
+		} else if (BASE_JD_DEP_TYPE_INVALID !=
+				user_atom->pre_dep[i].dependency_type) {
+			/* Resolved dependency. */
+			int dep_atom_number =
+				user_atom->pre_dep[i].atom_id;
+			struct kbase_jd_atom *dep_atom =
+				&jctx->atoms[dep_atom_number];
+
+			KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(
+					(void *)dep_atom,
+					(void *)katom);
+		}
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue(kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (kbase_fence_dep_count_read(katom) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom, NULL);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+int kbase_jd_submit(struct kbase_context *kctx,
+		void __user *user_addr, u32 nr_atoms, u32 stride,
+		bool uk6_atom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the
+	 * jobs are reported by immediately failing them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+	if (stride != sizeof(base_jd_atom_v2)) {
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(nr_atoms,
+				&kctx->timeline.jd_atoms_in_flight));
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+		if (copy_from_user(&user_atom, user_addr,
+					sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+				atomic_sub_return(nr_atoms - i,
+				&kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+
+		user_addr = (void __user *)((uintptr_t) user_addr + stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		context_idle = false;
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * KCTX_ACTIVE will have been set after we marked it as
+		 * inactive, and another pm reference will have been taken, so
+		 * drop our reference. But do not call kbase_jm_idle_ctx(), as
+		 * the context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but KCTX_ACTIVE will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then KCTX_ACTIVE will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * hwaccess_lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
+		    !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * hwaccess_lock. This is not performed if
+			 * KCTX_ACTIVE is set as in that case another pm
+			 * reference has been taken and a fast-start would be
+			 * valid.
+			 */
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE))
+				kbase_jm_idle_ctx(kbdev, kctx);
+			context_idle = true;
+		} else {
+			kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+
+	if (context_idle)
+		kbase_pm_context_idle(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbdev->hwaccess_lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	del_timer_sync(&kctx->soft_job_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, queue);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+		kctx->jctx.atoms[i].dma_fence.context =
+						dma_fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100644
index 0000000..25f1ed5
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,236 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/seq_file.h>
+#include <mali_kbase.h>
+#include <mali_kbase_jd_debugfs.h>
+#include <mali_kbase_dma_fence.h>
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif
+#include <mali_kbase_ioctl.h>
+
+struct kbase_jd_debugfs_depinfo {
+	u8 id;
+	char type;
+};
+
+static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
+					struct seq_file *sfile)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	struct kbase_sync_fence_info info;
+	int res;
+
+	switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		res = kbase_sync_fence_out_info_get(atom, &info);
+		if (0 == res) {
+			seq_printf(sfile, "Sa([%p]%d) ",
+				   info.fence, info.status);
+			break;
+		}
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		res = kbase_sync_fence_in_info_get(atom, &info);
+		if (0 == res) {
+			seq_printf(sfile, "Wa([%p]%d) ",
+				   info.fence, info.status);
+			break;
+		}
+	default:
+		break;
+	}
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		struct kbase_fence_cb *cb;
+
+		if (atom->dma_fence.fence) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+			struct fence *fence = atom->dma_fence.fence;
+#else
+			struct dma_fence *fence = atom->dma_fence.fence;
+#endif
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Sd(%u#%u: %s) ",
+#else
+					"Sd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					dma_fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+
+		list_for_each_entry(cb, &atom->dma_fence.callbacks,
+				    node) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+			struct fence *fence = cb->fence;
+#else
+			struct dma_fence *fence = cb->fence;
+#endif
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Wd(%u#%u: %s) ",
+#else
+					"Wd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					dma_fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+}
+
+static void kbasep_jd_debugfs_atom_deps(
+		struct kbase_jd_debugfs_depinfo *deps,
+		struct kbase_jd_atom *atom)
+{
+	struct kbase_context *kctx = atom->kctx;
+	int i;
+
+	for (i = 0; i < 2; i++)	{
+		deps[i].id = (unsigned)(atom->dep[i].atom ?
+				kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0);
+
+		switch (atom->dep[i].dep_type) {
+		case BASE_JD_DEP_TYPE_INVALID:
+			deps[i].type = ' ';
+			break;
+		case BASE_JD_DEP_TYPE_DATA:
+			deps[i].type = 'D';
+			break;
+		case BASE_JD_DEP_TYPE_ORDER:
+			deps[i].type = '>';
+			break;
+		default:
+			deps[i].type = '?';
+			break;
+		}
+	}
+}
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, " ID, Core req, St, CR,   Predeps,           Start time, Additional info...\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+		struct kbase_jd_debugfs_depinfo deps[2];
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		kbasep_jd_debugfs_atom_deps(deps, atom);
+
+		seq_printf(sfile,
+				"%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ",
+				i, atom->core_req, atom->status,
+				atom->coreref_state,
+				deps[0].type, deps[0].id,
+				deps[1].type, deps[1].id,
+				start_timestamp);
+
+
+		kbase_jd_debugfs_fence_info(atom, sfile);
+
+		seq_puts(sfile, "\n");
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100644
index 0000000..fae3291
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#define MALI_JD_DEBUGFS_VERSION 2
+
+/* Forward declarations */
+struct kbase_context;
+
+/**
+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100644
index 0000000..0c5c6a6
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx;
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->hwaccess.active_kctx == kctx)
+		kbdev->hwaccess.active_kctx = NULL;
+}
+
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		return kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+		return NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100644
index 0000000..a74ee24
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the hwaccess_lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+			struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100644
index 0000000..a3e8248
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,2789 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_ctx_sched.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	return atomic_read(&kctx->refcount);
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the mmu_hw_mutex and hwaccess_lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	as_nr = kctx->as_nr;
+	if (atomic_read(&kctx->refcount) > 0) {
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+
+		kbase_ctx_sched_retain_ctx_refcount(kctx);
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, atomic_read(&kctx->refcount));
+		result = true;
+	}
+
+	return result;
+}
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	return RB_EMPTY_ROOT(&rb->runnable_tree);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a queue and invoke @callback for each entry in the queue, and
+ * remove the entry from the queue.
+ *
+ * If entries are added to the queue while this is running those entries may, or
+ * may not be covered. To ensure that all entries in the buffer have been
+ * enumerated when this function returns jsctx->lock must be held when calling
+ * this function.
+ *
+ * The HW access lock must always be held when calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
+		struct rb_node *node = rb_first(&queue->runnable_tree);
+		struct kbase_jd_atom *entry = rb_entry(node,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		rb_erase(node, &queue->runnable_tree);
+		callback(kctx->kbdev, entry);
+	}
+
+	while (!list_empty(&queue->x_dep_head)) {
+		struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
+				struct kbase_jd_atom, queue);
+
+		list_del(queue->x_dep_head.next);
+
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	struct rb_node *node;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	node = rb_first(&rb->runnable_tree);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
+}
+
+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
+
+static void
+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (*new) {
+		struct kbase_jd_atom *entry = container_of(*new,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		parent = *new;
+		if (LESS_THAN_WRAP(katom->age, entry->age))
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&katom->runnable_tree_node, parent, new);
+	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_tree_add(kctx, katom);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
+		atomic_read(&jsdd->soft_job_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&kbdev->hwaccess_lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
+		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+	}
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	/* The caller must de-register all contexts before calling this
+	 */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+	KBASE_DEBUG_ASSERT(memcmp(
+	        js_devdata->runpool_irq.ctx_attr_ref_count,
+	        zero_ctx_attr_ref_count,
+	        sizeof(zero_ctx_attr_ref_count)) == 0);
+	CSTD_UNUSED(zero_ctx_attr_ref_count);
+}
+
+int kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+	kbase_ctx_flag_clear(kctx, KCTX_DYING);
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
+			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int js;
+	bool update_ctx_count = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* The caller must de-register all jobs before calling this */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_nolock - Variant of
+ *                                         kbase_jd_ctx_list_add_pullable()
+ *                                         where the caller must hold
+ *                                         hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
+ *                                              kbase_js_ctx_list_add_pullable_head()
+ *                                              where the caller must hold
+ *                                              hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head_nolock(
+		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the
+ *                                           per-slot unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+				&kbdev->js_data.ctx_list_unpullable[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable
+ *                                   or unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
+ *                                     where the caller must hold
+ *                                     hwaccess_lock
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
+		return NULL;
+
+	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
+					struct kbase_context,
+					jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+		struct kbase_device *kbdev, int js)
+{
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return false;
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * same-slot dependency */
+				if (dep_atom->post_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+						(js == dep_js)) {
+					katom->pre_dep = dep_atom;
+					dep_atom->post_dep = katom;
+				}
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Setup any scheduling information */
+	kbasep_js_clear_job_retry_submit(atom);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY);
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context. Kill that job
+			 * by killing the context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Queue */
+			KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbase_context *found_kctx = NULL;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = kbdev->as_to_kctx[as_nr];
+
+	if (found_kctx != NULL)
+		kbase_ctx_sched_retain_ctx_refcount(found_kctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_release_result - Try running more jobs after releasing a context
+ *                            and/or atom
+ *
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst hwaccess_lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (js_devdata->nr_user_contexts_running != 0) {
+		bool retry_submit = false;
+		int retry_jobslot = 0;
+
+		if (katom_retained_state)
+			retry_submit = kbasep_js_get_atom_retry_submit_slot(
+					katom_retained_state, &retry_jobslot);
+
+		if (runpool_ctx_attr_change || retry_submit) {
+			/* A change in runpool ctx attributes might mean we can
+			 * run more jobs than before  */
+			result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+			KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+						kctx, NULL, 0u, retry_jobslot);
+		}
+	}
+	return result;
+}
+
+/*
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state
+ * change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	mutex_lock(&kbdev->pm.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/* Update refcount */
+	kbase_ctx_sched_release_ctx(kctx);
+	new_ref_count = atomic_read(&kctx->refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out.
+	 * Note that there'll always be at least 1 reference to the context
+	 * which was previously acquired by kbasep_js_schedule_ctx(). */
+	if (new_ref_count == 1 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		int num_slots = kbdev->gpu_props.num_job_slots;
+		int slot;
+
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+		KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		if (kbdev->hwaccess.active_kctx == kctx)
+			kbdev->hwaccess.active_kctx = NULL;
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after the KCTX_SHEDULED flag is changed, otherwise we
+		 * double-decount the attributes
+		 */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+
+		/* Recalculate pullable status for all slots */
+		for (slot = 0; slot < num_slots; slot++) {
+			if (kbase_js_ctx_pullable(kctx, slot, false))
+				kbase_js_ctx_list_add_pullable_nolock(kbdev,
+						kctx, slot);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED |
+			KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_backend_timeouts_changed(kbdev);
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kbase_ctx_sched_retain_ctx(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		as_nr = kbase_backend_find_and_release_free_address_space(
+				kbdev, kctx);
+		if (as_nr != KBASEP_AS_NR_INVALID) {
+			/* Attempt to retain the context again, this should
+			 * succeed */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			as_nr = kbase_ctx_sched_retain_ctx(kctx);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+
+			WARN_ON(as_nr == KBASEP_AS_NR_INVALID);
+		}
+	}
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Roll back the transaction so far and return */
+		kbase_ctx_sched_release_ctx(kctx);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	kbase_ctx_flag_set(kctx, KCTX_SCHEDULED);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		/* Roll back the transaction so far and return */
+		kbase_ctx_sched_release_ctx(kctx);
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+	KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the hwaccess_lock locking. If a suspend occurs *after* that lock
+	 * was taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Synchronize with any timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+			kbase_backend_use_ctx_sched(kbdev, kctx)) {
+		/* Context already has ASID - mark as active */
+		kbdev->hwaccess.active_kctx = kctx;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return true; /* Context already scheduled */
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kbasep_js_schedule_ctx(kbdev, kctx);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED);
+
+	is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED);
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			   kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbase_context *kctx = kbdev->as_to_kctx[i];
+
+		retained = retained << 1;
+
+		if (kctx) {
+			kbase_ctx_sched_retain_ctx_refcount(kctx);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+				++nr_privileged_ctx;
+				WARN_ON(nr_privileged_ctx != 1);
+			}
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbase_context *kctx = kbdev->as_to_kctx[i];
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_context *kctx, *n;
+
+		list_for_each_entry_safe(kctx, n,
+				&kbdev->js_data.ctx_list_unpullable[js],
+				jctx.sched_info.ctx.ctx_list_entry[js]) {
+			struct kbasep_js_kctx_info *js_kctx_info;
+			unsigned long flags;
+			bool timer_sync = false;
+
+			js_kctx_info = &kctx->jctx.sched_info;
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+				kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync =
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kbdev);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
+			(katom->pre_dep && (katom->pre_dep->atom_flags &
+			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		int prio = katom->sched_priority;
+		int js = katom->slot_nr;
+		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+		list_add_tail(&katom->queue, &queue->x_dep_head);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+		enqueue_required = false;
+	} else {
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		/* Add atom to ring buffer. */
+		jsctx_tree_add(kctx, katom);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+	}
+
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the
+ *                         runnable_tree, ready for execution
+ * @katom: Atom to submit
+ *
+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set,
+ * but is still present in the x_dep list. If @katom has a same-slot dependent
+ * atom then that atom (and any dependents) will also be moved.
+ */
+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
+
+	while (katom) {
+		WARN_ON(!(katom->atom_flags &
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			list_del(&katom->queue);
+			katom->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+		} else {
+			break;
+		}
+
+		katom = katom->post_dep;
+	}
+}
+
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = katom->post_dep;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (next_katom) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE |
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
+			kbase_js_move_to_tree(x_dep);
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return NULL;
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kbdev, js))
+			return NULL;
+	}
+
+	kbase_ctx_flag_set(kctx, KCTX_PULLED);
+
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+		kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+		atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++;
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	int prio = katom->sched_priority;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+	u64 affinity = katom->affinity;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--;
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
+
+	/* If this slot has been blocked due to soft-stopped atoms, and all
+	 * atoms have now been processed, then unblock the slot */
+	if (!kctx->atoms_pulled_slot_pri[js][prio] &&
+			kctx->blocked_js[js][prio]) {
+		kctx->blocked_js[js][prio] = false;
+
+		/* Only mark the slot as pullable if the context is not idle -
+		 * that case is handled below */
+		if (atomic_read(&kctx->atoms_pulled) &&
+				kbase_js_ctx_pullable(kctx, js, true))
+			timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, js);
+	}
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!kbase_ctx_flag(kctx, KCTX_DYING)) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
+			coreref_state);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+	int prio = katom->sched_priority;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+		kctx->atoms_pulled_slot_pri[atom_slot][prio]--;
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		/* If this slot has been blocked due to soft-stopped atoms, and
+		 * all atoms have now been processed, then unblock the slot */
+		if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
+				&& kctx->blocked_js[atom_slot][prio]) {
+			kctx->blocked_js[atom_slot][prio] = false;
+			if (kbase_js_ctx_pullable(kctx, atom_slot, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+						kbdev, kctx, atom_slot);
+		}
+	}
+	WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot)) {
+		if (!list_empty(
+			&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
+			timer_sync |= kbase_js_ctx_list_remove_nolock(
+					kctx->kbdev, kctx, atom_slot);
+	}
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp)
+{
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+
+	kbdev = kctx->kbdev;
+
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+			(x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false);
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+		kbase_js_move_to_tree(x_dep);
+		if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false))
+			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+					x_dep->slot_nr);
+
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)
+			return x_dep;
+	}
+
+	return NULL;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *last_active;
+	bool timer_sync = false;
+	bool ctx_waiting = false;
+
+	js_devdata = &kbdev->js_data;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	last_active = kbdev->hwaccess.active_kctx;
+
+	while (js_mask) {
+		int js;
+
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				if (kbase_js_ctx_pullable(kctx, js, false)
+				    || kbase_ctx_flag(kctx, KCTX_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_ctx_flag_clear(kctx, KCTX_PULLED);
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
+				bool pullable = kbase_js_ctx_pullable(kctx, js,
+						true);
+
+				/* Failed to pull jobs - push to head of list.
+				 * Unless this context is already 'active', in
+				 * which case it's effectively already scheduled
+				 * so push it to the back of the list. */
+				if (pullable && kctx == last_active)
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else if (pullable)
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+								kctx->kbdev,
+								kctx, js);
+
+				/* If this context is not the active context,
+				 * but the active context is pullable on this
+				 * slot, then we need to remove the active
+				 * marker to prevent it from submitting atoms in
+				 * the IRQ handler, which would prevent this
+				 * context from making progress. */
+				if (last_active && kctx != last_active &&
+						kbase_js_ctx_pullable(
+						last_active, js, true))
+					ctx_waiting = true;
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	if (kbdev->hwaccess.active_kctx == last_active && ctx_waiting)
+		kbdev->hwaccess.active_kctx = NULL;
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the queue */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_set(kctx, KCTX_DYING);
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	return atomic_read(&kctx->refcount);
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100644
index 0000000..ddada8e
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,925 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_context.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase.
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold hwaccess_lock (as this will be obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the hwaccess_lock, (as this will be obtained
+ *   internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be
+ *   used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - mmu_hw_mutex, hwaccess_lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *   If the hwaccess_lock is already held, then the caller should use
+ *   kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - hwaccess_lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time.
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+	atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+	KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot ==
+					KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID)
+				|| (atom->retry_submit_on_slot == js));
+
+	atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+	retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+	int js = katom_retained_state->retry_submit_on_slot;
+
+	*res = js;
+	return (bool) (js >= 0);
+}
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guaranteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guaranteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_context *found_kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	found_kctx = kbdev->as_to_kctx[as_nr];
+	KBASE_DEBUG_ASSERT(found_kctx == NULL ||
+			atomic_read(&found_kctx->refcount) > 0);
+
+	return found_kctx;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100644
index 0000000..321506a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,301 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->hwaccess_lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100644
index 0000000..ce91833
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100644
index 0000000..ba8b644
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,386 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/* Sub-structure to collect together Job Scheduling data used in IRQ
+	 * context. The hwaccess_lock must be held when accessing. */
+	struct runpool_irq {
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' is allowed to submit jobs.
+		 */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+
+	/**< Value for JS_SOFT_JOB_TIMEOUT */
+	atomic_t soft_job_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/**
+		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/** Job Slot to retry submitting to if submission from IRQ handler failed */
+	int retry_submit_on_slot;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100644
index 0000000..6d1e61f
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100644
index 0000000..375e484
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,2822 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_tlstream.h>
+
+/* This function finds out which RB tree the given GPU VA region belongs to
+ * based on the region zone */
+static struct rb_root *kbase_reg_flags_to_rbtree(struct kbase_context *kctx,
+						    struct kbase_va_region *reg)
+{
+	struct rb_root *rbtree = NULL;
+
+	switch (reg->flags & KBASE_REG_ZONE_MASK) {
+	case KBASE_REG_ZONE_CUSTOM_VA:
+		rbtree = &kctx->reg_rbtree_custom;
+		break;
+	case KBASE_REG_ZONE_EXEC:
+		rbtree = &kctx->reg_rbtree_exec;
+		break;
+	case KBASE_REG_ZONE_SAME_VA:
+		rbtree = &kctx->reg_rbtree_same;
+		/* fall through */
+	default:
+		rbtree = &kctx->reg_rbtree_same;
+		break;
+	}
+
+	return rbtree;
+}
+
+/* This function finds out which RB tree the given pfn from the GPU VA belongs
+ * to based on the memory zone the pfn refers to */
+static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
+								    u64 gpu_pfn)
+{
+	struct rb_root *rbtree = NULL;
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif /* CONFIG_64BIT */
+		if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE)
+			rbtree = &kctx->reg_rbtree_custom;
+		else if (gpu_pfn >= KBASE_REG_ZONE_EXEC_BASE)
+			rbtree = &kctx->reg_rbtree_exec;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+#ifdef CONFIG_64BIT
+	} else {
+		if (gpu_pfn >= kctx->same_va_end)
+			rbtree = &kctx->reg_rbtree_custom;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+	}
+#endif /* CONFIG_64BIT */
+
+	return rbtree;
+}
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx,
+						struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = NULL;
+	struct rb_node *parent = NULL;
+	struct rb_root *rbtree = NULL;
+
+	rbtree = kbase_reg_flags_to_rbtree(kctx, new_reg);
+
+	link = &(rbtree->rb_node);
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+
+	rb_insert_color(&(new_reg->rblink), rbtree);
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, start_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_root *rbtree = NULL;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+
+	rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs);
+
+	rbnode = rb_first(rbtree);
+
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE)) {
+			/* Check alignment */
+			u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+				return reg;
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+	struct rb_root *reg_rbtree = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	reg_rbtree = kbase_reg_flags_to_rbtree(kctx, reg);
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if (prev->flags & KBASE_REG_FREE) {
+			/* We're compatible with the previous VMA,
+			 * merge with it */
+			WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if (next->flags & KBASE_REG_FREE) {
+			WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_root *reg_rbtree = NULL;
+	int err = 0;
+
+	reg_rbtree = kbase_reg_flags_to_rbtree(kctx, at_reg);
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink),
+								reg_rbtree);
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+		if (!(tmp->flags & KBASE_REG_FREE)) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.  */
+	{
+		u64 start_pfn;
+
+		/*
+		 * Depending on the zone the allocation request is for
+		 * we might need to retry it.
+		 */
+		do {
+			tmp = kbase_region_tracker_find_region_meeting_reqs(
+					kctx, reg, nr_pages, align);
+			if (tmp) {
+				start_pfn = (tmp->start_pfn + align - 1) &
+						~(align - 1);
+				err = kbase_insert_va_region_nolock(kctx, reg,
+						tmp, start_pfn, nr_pages);
+				break;
+			}
+
+			/*
+			 * If the allocation is not from the same zone as JIT
+			 * then don't retry, we're out of VA and there is
+			 * nothing which can be done about it.
+			 */
+			if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+					KBASE_REG_ZONE_CUSTOM_VA)
+				break;
+		} while (kbase_jit_evict(kctx));
+
+		if (!tmp)
+			err = -ENOMEM;
+	}
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *exec_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree_same = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* Although exec and custom_va_reg don't always exist,
+	 * initialize unconditionally because of the mem_view debugfs
+	 * implementation which relies on these being empty */
+	kctx->reg_rbtree_exec = RB_ROOT;
+	kctx->reg_rbtree_custom = RB_ROOT;
+
+	if (exec_reg)
+		kbase_region_tracker_insert(kctx, exec_reg);
+	if (custom_va_reg)
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+}
+
+static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(rbtree);
+		if (rbnode) {
+			rb_erase(rbnode, rbtree);
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		same_va_bits = 32;
+	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+#endif
+
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
+		err = -EINVAL;
+		goto fail_unlock;
+	}
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have exec and custom VA zones */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_exec;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_exec:
+	kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
+{
+#ifdef CONFIG_64BIT
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits;
+	u64 total_va_size;
+	int err;
+
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return 0;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	if (same_va->nr_pages < jit_va_pages ||
+			kctx->same_va_end < jit_va_pages) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(kctx,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(kctx, custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	ret = kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV,
+			KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+			kbdev,
+			NULL);
+	if (ret)
+		return ret;
+
+	ret = kbase_mem_pool_init(&kbdev->lp_mem_pool,
+			(KBASE_MEM_POOL_MAX_SIZE_KBDEV >> 9),
+			KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+			kbdev,
+			NULL);
+	if (ret)
+		kbase_mem_pool_term(&kbdev->mem_pool);
+
+	return ret;
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_term(&kbdev->mem_pool);
+	kbase_mem_pool_term(&kbdev->lp_mem_pool);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(reg->kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					kctx->aliasing_sink_page,
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type ==
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		struct kbase_alloc_import_user_buf *user_buf =
+			&reg->gpu_alloc->imported.user_buf;
+
+		if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
+			user_buf->current_mapping_usage_count &=
+				~PINNED_ON_IMPORT;
+
+			kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
+					(reg->flags & KBASE_REG_GPU_WR));
+		}
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct vm_area_struct *vma;
+	struct kbase_cpu_mapping *map;
+	unsigned long vm_pgoff_in_region;
+	unsigned long vm_off_in_region;
+	unsigned long map_start;
+	size_t map_size;
+
+	lockdep_assert_held(&current->mm->mmap_sem);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	vma = find_vma_intersection(current->mm, uaddr, uaddr+size);
+
+	if (!vma || vma->vm_start > uaddr)
+		return NULL;
+	if (vma->vm_ops != &kbase_vm_ops)
+		/* Not ours! */
+		return NULL;
+
+	map = vma->vm_private_data;
+
+	if (map->kctx != kctx)
+		/* Not from this context! */
+		return NULL;
+
+	vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn;
+	vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT;
+	map_start = vma->vm_start - vm_off_in_region;
+	map_size = map->region->nr_pages << PAGE_SHIFT;
+
+	if ((uaddr + size) > (map_start + map_size))
+		/* Not within the CPU mapping */
+		return NULL;
+
+	*offset = (uaddr - vma->vm_start) + vm_off_in_region;
+
+	return map;
+}
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct kbase_cpu_mapping *map;
+
+	kbase_os_mem_map_lock(kctx);
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset);
+
+	kbase_os_mem_map_unlock(kctx);
+
+	if (!map)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+	phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa);
+	phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa);
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct basep_syncset *sset, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	struct tagged_addr *cpu_pa;
+	struct tagged_addr *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	u64 offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) ||
+			kbase_mem_is_imported(reg->gpu_alloc->type))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	page_off = offset >> PAGE_SHIFT;
+	offset &= ~PAGE_MASK;
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	if (page_off > reg->nr_pages ||
+			page_off + page_count > reg->nr_pages) {
+		/* Sync overflows the region */
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Sync first page */
+	if (as_phys_addr_t(cpu_pa[page_off])) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!as_phys_addr_t(cpu_pa[page_off + i]))
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 &&
+	    as_phys_addr_t(cpu_pa[page_off + page_count - 1])) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset)
+{
+	int err = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(sset != NULL);
+
+	if (sset->mem_handle.basep.handle & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev,
+				"mem_handle: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
+		dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid");
+		return -EINVAL;
+	}
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+			return -EINVAL;
+	} else if (flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	}
+
+	if (!(reg->flags & KBASE_REG_SHARE_BOTH) &&
+			flags & BASE_MEM_COHERENT_LOCAL) {
+		reg->flags |= KBASE_REG_SHARE_IN;
+	}
+
+	/* Set up default MEMATTR usage */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+
+	return 0;
+}
+
+int kbase_alloc_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t old_page_count = alloc->nents;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct tagged_addr *tp;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (alloc->reg) {
+		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+			goto invalid_request;
+	}
+
+	kctx = alloc->imported.kctx;
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + old_page_count;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Check if we have enough pages requested so we can allocate a large
+	 * page (512 * 4KB = 2MB )
+	 */
+	if (nr_left >= (SZ_2M / SZ_4K)) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages(&kctx->lp_mem_pool,
+						 nr_lp * (SZ_2M / SZ_4K),
+						 tp,
+						 true);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			mutex_lock(&kctx->mem_partials_lock);
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(sa->page +
+									   pidx),
+							      FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) {
+						/* unlink from partial list when full */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+			mutex_unlock(&kctx->mem_partials_lock);
+		}
+
+		/* only if we actually have a chunk left <512. If more it indicates
+		 * that we couldn't allocate a 2MB above, so no point to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it */
+			struct page *np = NULL;
+
+			do {
+				int err;
+
+				np = kbase_mem_pool_alloc(&kctx->lp_mem_pool);
+				if (np)
+					break;
+				err = kbase_mem_pool_grow(&kctx->lp_mem_pool, 1);
+				if (err)
+					break;
+			} while (1);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *sa;
+				struct page *p;
+
+				sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+				if (!sa) {
+					kbase_mem_pool_free(&kctx->lp_mem_pool, np, false);
+					goto no_new_partial;
+				}
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
+
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+
+				/* expose for later use */
+				mutex_lock(&kctx->mem_partials_lock);
+				list_add(&sa->link, &kctx->mem_partials);
+				mutex_unlock(&kctx->mem_partials_lock);
+			}
+		}
+	}
+no_new_partial:
+#endif
+
+	if (nr_left) {
+		res = kbase_mem_pool_alloc_pages(&kctx->mem_pool,
+						 nr_left,
+						 tp,
+						 false);
+		if (res <= 0)
+			goto alloc_failed;
+	}
+
+	/*
+	 * Request a zone cache update, this scans only the new pages an
+	 * appends their information to the zone cache. if the update
+	 * fails then clear the cache so we fall-back to doing things
+	 * page by page.
+	 */
+	if (kbase_zone_cache_update(alloc, old_page_count) != 0)
+		kbase_zone_cache_clear(alloc);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested)
+		kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
+				  nr_pages_requested - nr_left,
+				  alloc->pages + old_page_count,
+				  false,
+				  false);
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+	return -ENOMEM;
+}
+
+static void free_partial(struct kbase_context *kctx, struct tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	p = phys_to_page(as_phys_addr_t(tp));
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	mutex_lock(&kctx->mem_partials_lock);
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free(&kctx->lp_mem_pool, head_page, true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+	mutex_unlock(&kctx->mem_partials_lock);
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	struct tagged_addr *start_free;
+	int new_page_count __maybe_unused;
+	size_t freed = 0;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
+	/*
+	 * Clear the zone cache, we don't expect JIT allocations to be
+	 * shrunk in parts so there is no point trying to optimize for that
+	 * by scanning for the changes caused by freeing this memory and
+	 * updating the existing cache entries.
+	 */
+	kbase_zone_cache_clear(alloc);
+
+
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
+					512,
+					start_free,
+					syncback,
+					reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			free_partial(kctx, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+			       !is_huge(*local_end_free) &&
+			       !is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages(&kctx->mem_pool,
+					local_end_free - start_free,
+					start_free,
+					syncback,
+					reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = kbase_atomic_sub_pages(freed,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(freed,
+				       &kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				kctx->id,
+				(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		WARN_ON(!alloc->imported.kctx);
+		/*
+		 * The physical allocation must have been removed from the
+		 * eviction list before trying to free it.
+		 */
+		WARN_ON(!list_empty(&alloc->evict_node));
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+ #ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ump_dd_release(alloc->imported.ump_handle);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
+		kfree(alloc->imported.user_buf.pages);
+		break;
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+#ifdef CONFIG_DEBUG_FS
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_init(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_evict_lock);
+		if (list_empty(&kctx->jit_destroy_head)) {
+			mutex_unlock(&kctx->jit_evict_lock);
+			break;
+		}
+
+		reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		list_del(&reg->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+		kbase_mem_free_region(kctx, reg);
+		kbase_gpu_vm_unlock(kctx);
+	} while (1);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	INIT_LIST_HEAD(&kctx->jit_pending_alloc);
+	INIT_LIST_HEAD(&kctx->jit_atoms_head);
+
+	return 0;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+	struct kbase_va_region *walker;
+	struct kbase_va_region *temp;
+	size_t current_diff = SIZE_MAX;
+
+	int ret;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+
+		if (walker->nr_pages >= info->va_pages) {
+			size_t min_size, max_size, diff;
+
+			/*
+			 * The JIT allocations VA requirements have been
+			 * meet, it's suitable but other allocations
+			 * might be a better fit.
+			 */
+			min_size = min_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			max_size = max_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			diff = max_size - min_size;
+
+			if (current_diff > diff) {
+				current_diff = diff;
+				reg = walker;
+			}
+
+			/* The allocation is an exact match, stop looking */
+			if (current_diff == 0)
+				break;
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_move(&reg->jit_node, &kctx->jit_active_head);
+
+		/*
+		 * Remove the allocation from the eviction list as it's no
+		 * longer eligible for eviction. This must be done before
+		 * dropping the jit_evict_lock
+		 */
+		list_del_init(&reg->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+
+		/* Make the physical backing no longer reclaimable */
+		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+			goto update_failed;
+
+		/* Grow the backing if required */
+		if (reg->gpu_alloc->nents < info->commit_pages) {
+			size_t delta;
+			size_t old_size = reg->gpu_alloc->nents;
+
+			/* Allocate some more pages */
+			delta = info->commit_pages - reg->gpu_alloc->nents;
+			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
+					!= 0)
+				goto update_failed;
+
+			if (reg->cpu_alloc != reg->gpu_alloc) {
+				if (kbase_alloc_phy_pages_helper(
+						reg->cpu_alloc, delta) != 0) {
+					kbase_free_phy_pages_helper(
+							reg->gpu_alloc, delta);
+					goto update_failed;
+				}
+			}
+
+			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
+					info->commit_pages, old_size);
+			/*
+			 * The grow failed so put the allocation back in the
+			 * pool and return failure.
+			 */
+			if (ret)
+				goto update_failed;
+		}
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL;
+		u64 gpu_addr;
+
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr);
+		if (!reg)
+			goto out_unlocked;
+
+		mutex_lock(&kctx->jit_evict_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_evict_lock);
+	}
+
+	return reg;
+
+update_failed:
+	/*
+	 * An update to an allocation from the pool failed, chances
+	 * are slim a new allocation would fair any better so return
+	 * the allocation to the pool and return the function with failure.
+	 */
+	kbase_gpu_vm_unlock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	/* The physical backing of memory in the pool is always reclaimable */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mem_evictable_make(reg->gpu_alloc);
+	kbase_gpu_vm_unlock(kctx);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = reg->kctx;
+
+	lockdep_assert_held(&kctx->jit_evict_lock);
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	list_move(&reg->jit_node, &kctx->jit_destroy_head);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_evict_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	if (reg)
+		kbase_mem_free_region(kctx, reg);
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+
+	kbase_gpu_vm_lock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	struct tagged_addr *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct mm_struct *mm;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	mm = alloc->imported.user_buf.mm;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#else
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL, NULL);
+#endif
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = as_tagged(page_to_phys(pages[i]));
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+			kbase_reg_current_backed_size(reg),
+			reg->flags);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	while (++i < pinned_pages) {
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	struct tagged_addr *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		size_t j, pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = as_tagged(sg_dma_address(s) +
+				(j << PAGE_SHIFT));
+		WARN_ONCE(j < pages,
+			  "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (!(reg->flags & KBASE_REG_IMPORT_PAD) &&
+			WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto err_unmap_attachment;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = reg->nr_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			count,
+			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+	if (err)
+		goto err_unmap_attachment;
+
+	if (reg->flags & KBASE_REG_IMPORT_PAD) {
+		err = kbase_mmu_insert_single_page(kctx,
+				reg->start_pfn + count,
+				kctx->aliasing_sink_page,
+				reg->nr_pages - count,
+				(reg->flags | KBASE_REG_GPU_RD) &
+				~KBASE_REG_GPU_WR);
+		if (err)
+			goto err_teardown_orig_pages;
+	}
+
+	return 0;
+
+err_teardown_orig_pages:
+	kbase_mmu_teardown_pages(kctx, reg->start_pfn, count);
+err_unmap_attachment:
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+			alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+	case KBASE_MEM_TYPE_IMPORTED_UMP: {
+		break;
+	}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc) {
+				int err;
+
+				err = kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						alloc->nents);
+				WARN_ON(err);
+			}
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100644
index 0000000..815fab4
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1127 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	int      count;
+	int      free_on_close;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMP,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	struct tagged_addr    *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	struct list_head       zone_cache;
+
+	/* member in union valid based on @a type */
+	union {
+#ifdef CONFIG_UMP
+		ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+		struct kbase_alloc_import_user_buf {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			/* top bit (1<<31) of current_mapping_usage_count
+			 * specifies that this import was pinned on import
+			 * See PINNED_ON_IMPORT
+			 */
+			u32 current_mapping_usage_count;
+			struct mm_struct *mm;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is
+ * used to signify that a buffer was pinned when it was imported. Since the
+ * reference count is limited by the number of atoms that can be submitted at
+ * once there should be no danger of overflowing into this bit.
+ * Stealing the top bit also has the benefit that
+ * current_mapping_usage_count != 0 if and only if the buffer is mapped.
+ */
+#define PINNED_ON_IMPORT	(1<<31)
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+/**
+ * kbase_mem_is_imported - Indicate whether a memory type is imported
+ *
+ * @type: the memory type
+ *
+ * Return: true if the memory type is imported, false otherwise
+ */
+static inline bool kbase_mem_is_imported(enum kbase_memory_type type)
+{
+	return (type == KBASE_MEM_TYPE_IMPORTED_UMP) ||
+		(type == KBASE_MEM_TYPE_IMPORTED_UMM) ||
+		(type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* Bit 8 is unused */
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+/* Imported buffer is padded? */
+#define KBASE_REG_IMPORT_PAD        (1ul << 21)
+
+/* Bit 22 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags */
+#define KBASE_REG_RESERVED_BIT_22   (1ul << 22)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+/*
+ * Dedicated 16MB region for shader code:
+ * VA range 0x101000000-0x102000000
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_EXEC_BASE    (0x101000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+};
+
+/* Common functions */
+static inline struct tagged_addr *kbase_get_cpu_phy_pages(
+		struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline struct tagged_addr *kbase_get_gpu_phy_pages(
+		struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+	INIT_LIST_HEAD(&alloc->zone_cache);
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+	reg->cpu_alloc->imported.kctx = kctx;
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
+	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		reg->gpu_alloc->imported.kctx = kctx;
+		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	INIT_LIST_HEAD(&reg->jit_node);
+	reg->flags &= ~KBASE_REG_FREE;
+	return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * The order required for a 2MB page allocation (2^order * 4KB = 2MB)
+ */
+#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER	9
+
+/*
+ * The order required for a 4KB page allocation
+ */
+#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER	0
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @max_size:  Maximum number of free pages the pool can hold
+ * @order:     Page order for physical page size (order=0=>4kB, order=9=>2MB)
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		size_t order,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Return NULL if no memory in the pool
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ * @partial_allowed: If fewer pages allocated is allowed
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return:
+ * On success number of pages allocated (could be less than nr_pages if
+ * partial_allowed).
+ * On error an error code.
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool partial_allowed);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return ACCESS_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_grow - Grow the pool
+ * @pool:       Memory pool to grow
+ * @nr_to_grow: Number of pages to add to the pool
+ *
+ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
+ * become larger than the maximum size specified.
+ *
+ * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
+ */
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above the max_size for the pool.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ */
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+/**
+ * kbase_mem_alloc_page - Allocate a new page for a device
+ * @pool:  Memory pool to allocate a page from
+ *
+ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that
+ * function can fail in the event the pool is empty.
+ *
+ * Return: A new page or NULL if no memory
+ */
+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+
+/**
+ * kbase_update_region_flags - Convert user space flags to kernel region flags
+ *
+ * @kctx:  kbase context
+ * @reg:   The region to update the flags on
+ * @flags: The flags passed from user space
+ *
+ * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and
+ * this function will fail if the system does not support system coherency.
+ *
+ * Return: 0 if successful, -EINVAL if the flags are not supported
+ */
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  struct tagged_addr *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  struct tagged_addr *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
+ * @kctx:	Kbase context
+ *
+ * Disable and perform the required cache maintenance to remove the all
+ * data from provided kbase context from the GPU caches.
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified
+ * address space.
+ * @kbdev:	Kbase device
+ * @as_nr:	The address space number to set to unmapped.
+ *
+ * This function must only be called during reset/power-up and it used to
+ * ensure the registers are in a known state.
+ *
+ * The caller must hold kbdev->mmu_hw_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+/**
+ * kbase_sync_now - Perform cache maintenance on a memory region
+ *
+ * @kctx: The kbase context of the region
+ * @sset: A syncset structure describing the region and direction of the
+ *        synchronisation required
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset);
+void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa,
+		struct tagged_addr gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU
+ * mapping of a memory allocation containing a given address range
+ *
+ * Searches for a CPU mapping of any part of any region that fully encloses the
+ * CPU virtual address range specified by @uaddr and @size. Returns a failure
+ * indication if only part of the address range lies within a CPU mapping.
+ *
+ * @kctx:      The kernel base context used for the allocation.
+ * @uaddr:     Start of the CPU virtual address range.
+ * @size:      Size of the CPU virtual address range (in bytes).
+ * @offset:    The offset from the start of the allocation to the specified CPU
+ *             virtual address.
+ *
+ * Return: 0 if offset was obtained successfully. Error code otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_init(struct kbase_context *kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_zone_cache_update - Update the memory zone cache after new pages have
+ * been added.
+ * @alloc:        The physical memory allocation to build the cache for.
+ * @start_offset: Offset to where the new pages start.
+ *
+ * Updates an existing memory zone cache, updating the counters for the
+ * various zones.
+ * If the memory allocation doesn't already have a zone cache assume that
+ * one isn't created and thus don't do anything.
+ *
+ * Return: Zero cache was updated, negative error code on error.
+ */
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset);
+
+/**
+ * kbase_zone_cache_build - Build the memory zone cache.
+ * @alloc:        The physical memory allocation to build the cache for.
+ *
+ * Create a new zone cache for the provided physical memory allocation if
+ * one doesn't already exist, if one does exist then just return.
+ *
+ * Return: Zero if the zone cache was created, negative error code on error.
+ */
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_zone_cache_clear - Clear the memory zone cache.
+ * @alloc:        The physical memory allocation to clear the cache on.
+ */
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc);
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100644
index 0000000..37f7a6a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,2683 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+#include <linux/cache.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_ioctl.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ *
+ * Note: Caller must be holding the processes mmap_sem lock.
+ */
+static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va)
+{
+	int zone;
+	int gpu_pc_bits;
+	struct kbase_va_region *reg;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+
+	dev = kctx->kbdev->dev;
+	*gpu_va = 0; /* return 0 on failure */
+
+	gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+
+	if (0 == va_pages) {
+		dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+		goto bad_size;
+	}
+
+	if (va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+	    (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+		goto bad_ex_size;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & BASE_MEM_GROW_ON_GPF) {
+		reg->extent = extent;
+		if (reg->extent == 0)
+			goto invalid_extent;
+	}
+	else
+		reg->extent = 0;
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/*
+		 * 10.1-10.4 UKU userland relies on the kernel to call mmap.
+		 * For all other versions we can just return the cookie
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1) ||
+		    kctx->api_version > KBASE_API_VERSION(10, 4)) {
+			*gpu_va = (u64) cookie;
+			return reg;
+		}
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kbase_gpu_vm_lock(kctx);
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+
+		*gpu_va = (u64) cpu_addr;
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_mem:
+invalid_extent:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+invalid_flags:
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	if (gpu_addr & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->jit_evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->jit_evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+struct kbase_mem_zone_cache_entry {
+	/* List head used to link the cache entry to the memory allocation. */
+	struct list_head zone_node;
+	/* The zone the cacheline is for. */
+	struct zone *zone;
+	/* The number of pages in the allocation which belong to this zone. */
+	u64 count;
+};
+
+static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	struct kbase_mem_zone_cache_entry *cache = NULL;
+	size_t i;
+	int ret = 0;
+
+	for (i = start_offset; i < alloc->nents; i++) {
+		struct page *p = phys_to_page(as_phys_addr_t(alloc->pages[i]));
+		struct zone *zone = page_zone(p);
+		bool create = true;
+
+		if (cache && (cache->zone == zone)) {
+			/*
+			 * Fast path check as most of the time adjacent
+			 * pages come from the same zone.
+			 */
+			create = false;
+		} else {
+			/*
+			 * Slow path check, walk all the cache entries to see
+			 * if we already know about this zone.
+			 */
+			list_for_each_entry(cache, &alloc->zone_cache, zone_node) {
+				if (cache->zone == zone) {
+					create = false;
+					break;
+				}
+			}
+		}
+
+		/* This zone wasn't found in the cache, create an entry for it */
+		if (create) {
+			cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+			if (!cache) {
+				ret = -ENOMEM;
+				goto bail;
+			}
+			cache->zone = zone;
+			cache->count = 0;
+			list_add(&cache->zone_node, &alloc->zone_cache);
+		}
+
+		cache->count++;
+	}
+	return 0;
+
+bail:
+	return ret;
+}
+
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	/*
+	 * Bail if the zone cache is empty, only update the cache if it
+	 * existed in the first place.
+	 */
+	if (list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, start_offset);
+}
+
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc)
+{
+	/* Bail if the zone cache already exists */
+	if (!list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, 0);
+}
+
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_mem_zone_cache_entry *walker;
+
+	while(!list_empty(&alloc->zone_cache)){
+		walker = list_first_entry(&alloc->zone_cache,
+				struct kbase_mem_zone_cache_entry, zone_node);
+		list_del(&walker->zone_node);
+		kfree(walker);
+	}
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p;
+			struct zone *zone;
+
+			p = phys_to_page(as_phys_addr_t(alloc->pages[i]));
+			zone = page_zone(p);
+
+			zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 * then remove it from the reclaimable accounting. */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(-zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p;
+			struct zone *zone;
+
+			p = phys_to_page(as_phys_addr_t(alloc->pages[i]));
+			zone = page_zone(p);
+			zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* This alloction can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	mutex_lock(&kctx->jit_evict_lock);
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	list_del_init(&gpu_alloc->evict_node);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE))
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+	switch (reg->gpu_alloc->type) {
+#ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn,
+					     kbase_get_gpu_phy_pages(reg),
+				             reg->gpu_alloc->nents, reg->flags);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	/* roll back on error, i.e. not UMP */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	ump_dd_handle umph;
+	u64 block_count;
+	const ump_dd_physical_block_64 *block_array;
+	u64 i, j;
+	int page = 0;
+	ump_alloc_flags ump_flags;
+	ump_alloc_flags cpu_flags;
+	ump_alloc_flags gpu_flags;
+
+	if (*flags & BASE_MEM_SECURE)
+		goto bad_flags;
+
+	umph = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+		goto bad_id;
+
+	ump_flags = ump_dd_allocation_flags_get(umph);
+	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+			UMP_DEVICE_MASK;
+
+	*va_pages = ump_dd_size_get_64(umph);
+	*va_pages >>= PAGE_SHIFT;
+
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (*flags & BASE_MEM_SAME_VA)
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	else
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!reg)
+		goto no_region;
+
+	/* we've got pages to map now, and support SAME_VA */
+	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->gpu_alloc->imported.ump_handle = umph;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
+
+	/* Override import flags based on UMP flags */
+	*flags &= ~(BASE_MEM_CACHED_CPU);
+	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+		reg->flags |= KBASE_REG_CPU_CACHED;
+		*flags |= BASE_MEM_CACHED_CPU;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_WR) {
+		reg->flags |= KBASE_REG_CPU_WR;
+		*flags |= BASE_MEM_PROT_CPU_WR;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_RD) {
+		reg->flags |= KBASE_REG_CPU_RD;
+		*flags |= BASE_MEM_PROT_CPU_RD;
+	}
+
+	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+		reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (gpu_flags & UMP_PROT_DEVICE_WR) {
+		reg->flags |= KBASE_REG_GPU_WR;
+		*flags |= BASE_MEM_PROT_GPU_WR;
+	}
+
+	if (gpu_flags & UMP_PROT_DEVICE_RD) {
+		reg->flags |= KBASE_REG_GPU_RD;
+		*flags |= BASE_MEM_PROT_GPU_RD;
+	}
+
+	/* ump phys block query */
+	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+	for (i = 0; i < block_count; i++) {
+		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+			struct tagged_addr tagged;
+
+			tagged = as_tagged(block_array[i].addr +
+					   (j << PAGE_SHIFT));
+			reg->gpu_alloc->pages[page] = tagged;
+			page++;
+		}
+	}
+	reg->gpu_alloc->nents = *va_pages;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	ump_dd_release(umph);
+bad_id:
+bad_flags:
+	return NULL;
+}
+#endif				/* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
+		int fd, u64 *va_pages, u64 *flags, u32 padding)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	if (padding)
+		reg->flags |= KBASE_REG_IMPORT_PAD;
+
+	reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx)
+{
+	u32 cpu_cache_line_size = cache_line_size();
+	u32 gpu_cache_line_size =
+		(1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+
+	return ((cpu_cache_line_size > gpu_cache_line_size) ?
+				cpu_cache_line_size :
+				gpu_cache_line_size);
+}
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	long i;
+	struct kbase_va_region *reg;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
+	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx);
+	struct kbase_alloc_import_user_buf *user_buf;
+	struct page **pages = NULL;
+
+	if ((address & (cache_line_alignment - 1)) != 0 ||
+			(size & (cache_line_alignment - 1)) != 0) {
+		/* Coherency must be enabled to handle partial cache lines */
+		if (*flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+			/* Force coherent system required flag, import will
+			 * then fail if coherency isn't available
+			 */
+			*flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+		} else {
+			dev_warn(kctx->kbdev->dev,
+					"User buffer is not cache line aligned and no coherency enabled\n");
+			goto bad_size;
+		}
+	}
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	}
+
+	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages,
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	user_buf = &reg->gpu_alloc->imported.user_buf;
+
+	user_buf->size = size;
+	user_buf->address = address;
+	user_buf->nr_pages = *va_pages;
+	user_buf->mm = current->mm;
+	user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *),
+			GFP_KERNEL);
+
+	if (!user_buf->pages)
+		goto no_page_array;
+
+	/* If the region is coherent with the CPU then the memory is imported
+	 * and mapped onto the GPU immediately.
+	 * Otherwise get_user_pages is called as a sanity check, but with
+	 * NULL as the pages argument which will fault the pages, but not
+	 * pin them. The memory will then be pinned only around the jobs that
+	 * specify the region as an external resource.
+	 */
+	if (reg->flags & KBASE_REG_SHARE_BOTH) {
+		pages = user_buf->pages;
+		*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+	}
+
+	down_read(&current->mm->mmap_sem);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#else
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#endif
+
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	atomic_inc(&current->mm->mm_count);
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	if (pages) {
+		struct device *dev = kctx->kbdev->dev;
+		unsigned long local_size = user_buf->size;
+		unsigned long offset = user_buf->address & ~PAGE_MASK;
+		struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
+
+		/* Top bit signifies that this was pinned on import */
+		user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
+
+		for (i = 0; i < faulted_pages; i++) {
+			dma_addr_t dma_addr;
+			unsigned long min;
+
+			min = MIN(PAGE_SIZE - offset, local_size);
+			dma_addr = dma_map_page(dev, pages[i],
+					offset, min,
+					DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(dev, dma_addr))
+				goto unwind_dma_map;
+
+			user_buf->dma_addrs[i] = dma_addr;
+			pa[i] = as_tagged(page_to_phys(pages[i]));
+
+			local_size -= min;
+			offset = 0;
+		}
+
+		reg->gpu_alloc->nents = faulted_pages;
+	}
+
+	return reg;
+
+unwind_dma_map:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				user_buf->dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+fault_mismatch:
+	if (pages) {
+		for (i = 0; i < faulted_pages; i++)
+			put_page(pages[i]);
+	}
+	kfree(user_buf->pages);
+no_page_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
+		dev_warn(kctx->kbdev->dev,
+				"padding is only supported for UMM");
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_UMP
+	case BASE_MEM_IMPORT_TYPE_UMP: {
+		ump_secure_id id;
+
+		if (get_user(id, (ump_secure_id __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_ump(kctx, id, va_pages, flags);
+	}
+	break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags,
+					padding);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				uptr = compat_ptr(user_buffer.ptr);
+			else
+#endif
+				uptr = u64_to_user_ptr(user_buffer.ptr);
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	struct tagged_addr *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags);
+
+	return ret;
+}
+
+static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 gpu_va_start = reg->start_pfn;
+
+	if (new_pages == old_pages)
+		/* Nothing to do */
+		return;
+
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping,
+			(gpu_va_start + new_pages)<<PAGE_SHIFT,
+			(old_pages - new_pages)<<PAGE_SHIFT, 1);
+}
+
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx,
+			reg->start_pfn + new_pages, delta);
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	if (gpu_addr & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+		goto out_unlock;
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE))
+		goto out_unlock;
+
+	/* Would overflow the VA region */
+	if (new_pages > reg->nr_pages)
+		goto out_unlock;
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1)
+		goto out_unlock;
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				res = -ENOMEM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->free_on_close) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#else
+static int kbase_cpu_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+#endif
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+	pgoff_t addr;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	rel_pgoff = vmf->pgoff - map->region->start_pfn;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	i = rel_pgoff;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT);
+#else
+	addr = (pgoff_t)(vmf->address >> PAGE_SHIFT);
+#endif
+	while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) {
+		int ret = vm_insert_pfn(vma, addr << PAGE_SHIFT,
+		    PFN_DOWN(as_phys_addr_t(map->alloc->pages[i])));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+
+		i++; addr++;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	return VM_FAULT_SIGBUS;
+}
+
+const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	struct tagged_addr *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		unsigned long addr = vma->vm_start + aligned_offset;
+		u64 start_off = vma->vm_pgoff - reg->start_pfn +
+			(aligned_offset>>PAGE_SHIFT);
+
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			phys_addr_t phys;
+
+			phys = as_phys_addr_t(page_array[i + start_off]);
+			err = vm_insert_pfn(vma, addr, PFN_DOWN(phys));
+			if (WARN_ON(err))
+				break;
+
+			addr += PAGE_SIZE;
+		}
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->region = reg;
+	map->free_on_close = free_on_close;
+	map->kctx = reg->kctx;
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kbase_device_trace_buffer_install(kctx, tb, size);
+		if (err) {
+			vfree(tb);
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->cpu_alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+static int kbasep_reg_mmap(struct kbase_context *kctx,
+			   struct vm_area_struct *vma,
+			   struct kbase_va_region **regm,
+			   size_t *nr_pages, size_t *aligned_offset)
+
+{
+	int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+	struct kbase_va_region *reg;
+	int err = 0;
+
+	*aligned_offset = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n");
+
+	/* SAME_VA stuff, fetch the right region */
+	reg = kctx->pending_regions[cookie];
+	if (!reg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) {
+		/* incorrect mmap size */
+		/* leave the cookie for a potential later
+		 * mapping, or to be reclaimed later when the
+		 * context is freed */
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) ||
+	    (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) {
+		/* VM flags inconsistent with region flags */
+		err = -EPERM;
+		dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n",
+							__FILE__, __LINE__);
+		goto out;
+	}
+
+	/* adjust down nr_pages to what we have physically */
+	*nr_pages = kbase_reg_current_backed_size(reg);
+
+	if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
+						reg->nr_pages, 1) != 0) {
+		dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
+		/* Unable to map in GPU space. */
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+	/* no need for the cookie anymore */
+	kctx->pending_regions[cookie] = NULL;
+	kctx->cookies |= (1UL << cookie);
+
+	/*
+	 * Overwrite the offset with the region start_pfn, so we effectively
+	 * map from offset 0 in the region. However subtract the aligned
+	 * offset so that when user space trims the mapping the beginning of
+	 * the trimmed VMA has the correct vm_pgoff;
+	 */
+	vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT);
+out:
+	*regm = reg;
+	dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n");
+
+	return err;
+}
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg = NULL;
+	void *kaddr = NULL;
+	size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+
+	/* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+	vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		err = kbasep_reg_mmap(kctx, vma, &reg, &nr_pages,
+							&aligned_offset);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+					(u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages -
+					(vma->vm_pgoff - reg->start_pfn))) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (KBASE_MEM_TYPE_IMPORTED_UMM ==
+							reg->cpu_alloc->type) {
+				err = dma_buf_mmap(
+					reg->cpu_alloc->imported.umm.dma_buf,
+					vma, vma->vm_pgoff - reg->start_pfn);
+				goto out_unlock;
+			}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+		} else {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+	} /* default */
+	} /* switch */
+
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+static void kbasep_sync_mem_regions(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map, enum kbase_sync_type dest)
+{
+	size_t i;
+	off_t const offset = (uintptr_t)map->gpu_addr & ~PAGE_MASK;
+	size_t const page_count = PFN_UP(offset + map->size);
+
+	/* Sync first page */
+	size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size);
+	struct tagged_addr cpu_pa = map->cpu_pages[0];
+	struct tagged_addr gpu_pa = map->gpu_pages[0];
+
+	kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, dest);
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		cpu_pa = map->cpu_pages[i];
+		gpu_pa = map->gpu_pages[i];
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, dest);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1) {
+		cpu_pa = map->cpu_pages[page_count - 1];
+		gpu_pa = map->gpu_pages[page_count - 1];
+		sz = ((offset + map->size - 1) & ~PAGE_MASK) + 1;
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, dest);
+	}
+}
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	struct tagged_addr *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = phys_to_page(as_phys_addr_t(page_array[page_index +
+								  i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+	/* Note: enforcing a RO prot_request onto prot is not done, since:
+	 * - CPU-arch-specific integration required
+	 * - kbase_vmap() requires no access checks to be made/enforced */
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) &&
+		!kbase_mem_is_imported(map->gpu_alloc->type);
+
+	if (map->sync_needed)
+		kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	/* 0 is specified for prot_request to indicate no access checks should
+	 * be made.
+	 *
+	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
+	 * CPU-RO mapping is not enforced to allow this to work */
+	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	vunmap(addr);
+
+	if (map->sync_needed)
+		kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
+	map->gpu_addr = 0;
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->sync_needed = false;
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	struct tagged_addr *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	unsigned long attrs = DMA_ATTR_WRITE_COMBINE;
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+	u32 i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	if (kbase_update_region_flags(kctx, reg, flags) != 0)
+		goto invalid_flags;
+
+	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = as_tagged(dma_pa + ((dma_addr_t)i << PAGE_SHIFT));
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+invalid_flags:
+	kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+		       handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100644
index 0000000..db35f62
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,240 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va);
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+
+/**
+ * kbase_mem_commit - Change the physical backing size of a region
+ *
+ * @kctx: The kernel context
+ * @gpu_addr: Handle to the memory region
+ * @new_pages: Number of physical pages to back the region with
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages);
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	u64 gpu_addr;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	struct tagged_addr *cpu_pages;
+	struct tagged_addr *gpu_pages;
+	void *addr;
+	size_t size;
+	bool sync_needed;
+};
+
+
+/**
+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the
+ * requested access permissions are supported
+ * @kctx:         Context the VA range belongs to
+ * @gpu_addr:     Start address of VA range
+ * @size:         Size of VA range
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map:          Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check
+ * whether the region should allow the intended access, and return an error if
+ * disallowed. This is essential for security of imported memory, particularly
+ * a user buf from SHM mapped into the process as RO. In that case, write
+ * access must be checked if the intention is for kernel to write to the
+ * memory.
+ *
+ * The checks are also there to help catch access errors on memory where
+ * security is not a concern: imported memory that is always RW, and memory
+ * that was allocated and owned by the process attached to @kctx. In this case,
+ * it helps to identify memory that was was mapped with the wrong access type.
+ *
+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
+ * where either the security of memory is solely dependent on those flags, or
+ * when userspace code was expecting only the GPU to access the memory (e.g. HW
+ * workarounds).
+ *
+ * All cache maintenance operations shall be ignored if the
+ * memory region has been imported.
+ *
+ */
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vmap - Map a GPU VA range into the kernel safely
+ * @kctx:     Context the VA range belongs to
+ * @gpu_addr: Start address of VA range
+ * @size:     Size of VA range
+ * @map:      Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no
+ * checks to ensure the security of e.g. imported user bufs from RO SHM.
+ *
+ * Note: All cache maintenance operations shall be ignored if the memory region
+ * has been imported.
+ */
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vunmap - Unmap a GPU VA range from the kernel
+ * @kctx: Context the VA range belongs to
+ * @map:  Structure describing the mapping from the corresponding kbase_vmap()
+ *        call
+ *
+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained
+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * The reference taken on pages during kbase_vmap() is released.
+ *
+ * Note: All cache maintenance operations shall be ignored if the memory region
+ * has been imported.
+ */
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+extern const struct vm_operations_struct kbase_vm_ops;
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100644
index 0000000..f4e8849
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,89 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+struct tagged_addr { phys_addr_t tagged_addr; };
+
+#define HUGE_PAGE    (1u << 0)
+#define HUGE_HEAD    (1u << 1)
+#define FROM_PARTIAL (1u << 2)
+
+static inline phys_addr_t as_phys_addr_t(struct tagged_addr t)
+{
+	return t.tagged_addr & PAGE_MASK;
+}
+
+static inline struct tagged_addr as_tagged(phys_addr_t phys)
+{
+	struct tagged_addr t;
+
+	t.tagged_addr = phys & PAGE_MASK;
+	return t;
+}
+
+static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag)
+{
+	struct tagged_addr t;
+
+	t.tagged_addr = (phys & PAGE_MASK) | (tag & ~PAGE_MASK);
+	return t;
+}
+
+static inline bool is_huge(struct tagged_addr t)
+{
+	return t.tagged_addr & HUGE_PAGE;
+}
+
+static inline bool is_huge_head(struct tagged_addr t)
+{
+	int mask = HUGE_HEAD | HUGE_PAGE;
+
+	return mask == (t.tagged_addr & mask);
+}
+
+static inline bool is_partial(struct tagged_addr t)
+{
+	return t.tagged_addr & FROM_PARTIAL;
+}
+
+#endif /* _KBASE_LOWLEVEL_H */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100644
index 0000000..696730a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,651 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_for_each_entry(p, page_list, lru) {
+		zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+	}
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			(PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	int i;
+
+	for (i = 0; i < (1U << pool->order); i++)
+		clear_highpage(p+i);
+
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr;
+	int i;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	if (current->flags & PF_KTHREAD) {
+		/* Don't trigger OOM killer from kernel threads, e.g. when
+		 * growing memory on GPU page fault */
+		gfp |= __GFP_NORETRY;
+	}
+
+	/* don't warn on higer order failures */
+	if (pool->order)
+		gfp |= __GFP_NOWARN;
+
+	p = alloc_pages(gfp, pool->order);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order),
+				DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		__free_pages(p, pool->order);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+	for (i = 0; i < (1u << pool->order); i++)
+		kbase_set_dma_addr(p+i, dma_addr + PAGE_SIZE * i);
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+	int i;
+
+	dma_unmap_page(dev, dma_addr, (PAGE_SIZE << pool->order),
+		       DMA_BIDIRECTIONAL);
+	for (i = 0; i < (1u << pool->order); i++)
+		kbase_clear_dma_addr(p+i);
+	__free_pages(p, pool->order);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	for (i = 0; i < nr_to_grow; i++) {
+		p = kbase_mem_alloc_page(pool);
+		if (!p)
+			return -ENOMEM;
+		kbase_mem_pool_add(pool, p);
+	}
+
+	return 0;
+}
+
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+	int err = 0;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size > pool->max_size)
+		new_size = pool->max_size;
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		err = kbase_mem_pool_grow(pool, new_size - cur_size);
+
+	if (err) {
+		size_t grown_size = kbase_mem_pool_size(pool);
+
+		dev_warn(pool->kbdev->dev,
+			 "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n",
+			 (new_size - cur_size), (grown_size - cur_size));
+	}
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+	pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool));
+	return kbase_mem_pool_size(pool);
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		size_t order,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	pool->cur_size = 0;
+	pool->max_size = max_size;
+	pool->order = order;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			kbase_mem_pool_zero_page(pool, p);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	do {
+		pool_dbg(pool, "alloc()\n");
+		p = kbase_mem_pool_remove(pool);
+
+		if (p)
+			return p;
+
+		pool = pool->next_pool;
+	} while (pool);
+
+	return NULL;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
+		struct tagged_addr *pages, bool partial_allowed)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i = 0;
+	int err = -ENOMEM;
+	size_t nr_pages_internal;
+
+	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
+
+	if (nr_pages_internal * (1u << pool->order) != nr_4k_pages)
+		return -EINVAL;
+
+	pool_dbg(pool, "alloc_pages(4k=%zu):\n", nr_4k_pages);
+	pool_dbg(pool, "alloc_pages(internal=%zu):\n", nr_pages_internal);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
+	while (nr_from_pool--) {
+		int j;
+		p = kbase_mem_pool_remove_locked(pool);
+		if (pool->order) {
+			pages[i++] = as_tagged_tag(page_to_phys(p),
+						   HUGE_HEAD | HUGE_PAGE);
+			for (j = 1; j < (1u << pool->order); j++)
+				pages[i++] = as_tagged_tag(page_to_phys(p) +
+							   PAGE_SIZE * j,
+							   HUGE_PAGE);
+		} else {
+			pages[i++] = as_tagged(page_to_phys(p));
+		}
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_4k_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_4k_pages - i, pages + i, partial_allowed);
+
+		if (err < 0)
+			goto err_rollback;
+
+		i += err;
+	} else {
+		/* Get any remaining pages from kernel */
+		while (i != nr_4k_pages) {
+			p = kbase_mem_alloc_page(pool);
+			if (!p) {
+				if (partial_allowed)
+					goto done;
+				else
+					goto err_rollback;
+			}
+
+			if (pool->order) {
+				int j;
+
+				pages[i++] = as_tagged_tag(page_to_phys(p),
+							   HUGE_PAGE |
+							   HUGE_HEAD);
+				for (j = 1; j < (1u << pool->order); j++) {
+					phys_addr_t phys;
+
+					phys = page_to_phys(p) + PAGE_SIZE * j;
+					pages[i++] = as_tagged_tag(phys,
+								   HUGE_PAGE);
+				}
+			} else {
+				pages[i++] = as_tagged(page_to_phys(p));
+			}
+		}
+	}
+
+done:
+	pool_dbg(pool, "alloc_pages(%zu) done\n", i);
+
+	return i;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+				     size_t nr_pages, struct tagged_addr *pages,
+				     bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
+			p = phys_to_page(as_phys_addr_t(pages[i]));
+			if (zero)
+				kbase_mem_pool_zero_page(pool, p);
+			else if (sync)
+				kbase_mem_pool_sync_page(pool, p);
+
+			list_add(&p->lru, &new_page_list);
+			nr_to_pool++;
+		}
+		pages[i] = as_tagged(0);
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge(pages[i]) && !is_huge_head(pages[i])) {
+			pages[i] = as_tagged(0);
+			continue;
+		}
+
+		p = phys_to_page(as_phys_addr_t(pages[i]));
+
+		if (reclaimed)
+			zone_page_state_add(-1, page_zone(p),
+					NR_SLAB_RECLAIMABLE);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = as_tagged(0);
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100644
index 0000000..319cf25
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,88 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <mali_kbase_mem_pool_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_trim(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
+		kbase_mem_pool_debugfs_size_get,
+		kbase_mem_pool_debugfs_size_set,
+		"%llu\n");
+
+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_max_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_set_max_size(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
+		kbase_mem_pool_debugfs_max_size_get,
+		kbase_mem_pool_debugfs_max_size_set,
+		"%llu\n");
+
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool,
+		struct kbase_mem_pool *lp_pool)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_max_size_fops);
+
+	debugfs_create_file("lp_mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			lp_pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("lp_mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			lp_pool, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100644
index 0000000..496eaf3
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H
+#define _KBASE_MEM_POOL_DEBUGFS_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool
+ * @parent:  Parent debugfs dentry
+ * @pool:    Memory pool of small pages to control
+ * @lp_pool: Memory pool of large pages to control
+ *
+ * Adds four debugfs files under @parent:
+ * - mem_pool_size: get/set the current size of @pool
+ * - mem_pool_max_size: get/set the max size of @pool
+ * - lp_mem_pool_size: get/set the current size of @lp_pool
+ * - lp_mem_pool_max_size: get/set the max size of @lp_pool
+ */
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool,
+		struct kbase_mem_pool *lp_pool);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100644
index 0000000..d58fd8d
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,121 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+		kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		if (!debugfs_create_file("mem_profile", S_IRUGO,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kbase_ctx_flag_set(kctx,
+					   KCTX_MEM_PROFILE_INITIALIZED);
+		}
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	} else {
+		kfree(data);
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+		err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100644
index 0000000..a1dc2e0
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100644
index 0000000..82f0702
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100644
index 0000000..1694779
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,2149 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_time.h>
+#include <mali_kbase_mem.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ *
+ * If sync is not set then transactions still in flight when the flush is issued
+ * may use the old page tables and the data they write will not be written out
+ * to memory, this function returns after the flush has been issued but
+ * before all accesses which might effect the flushed region have completed.
+ *
+ * If sync is set then accesses in the flushed region will be drained
+ * before data is flush and invalidated through L1, L2 and into memory,
+ * after which point this function will return.
+ */
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync);
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+/**
+ * make_multiple() - Calculate the nearest integral multiple of a given number.
+ *
+ * @minimum: The number to round up.
+ * @multiple: The number of which the function shall calculate an integral multiple.
+ * @result: The number rounded up to the nearest integral multiple in case of success,
+ *          or just the number itself in case of failure.
+ *
+ * Return: 0 in case of success, or -1 in case of failure.
+ */
+static int make_multiple(size_t minimum, size_t multiple, size_t *result)
+{
+	int err = -1;
+
+	*result = minimum;
+
+	if (multiple > 0) {
+		size_t remainder = minimum % multiple;
+
+		if (remainder != 0)
+			*result = minimum + multiple - remainder;
+		err = 0;
+	}
+
+	return err;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	if (unlikely(faulting_as->protected_mode))
+	{
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Protected mode fault");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+
+		goto fault_done;
+	}
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Translation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		else
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Unknown fault code");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		else
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Unknown fault code");
+		goto fault_done;
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"DMA-BUF is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	err = make_multiple(fault_rel_pfn - kbase_reg_current_backed_size(region) + 1,
+			region->extent,
+			&new_pages);
+	WARN_ON(err);
+
+	/* cap to max vsize */
+	new_pages = min(new_pages, region->nr_pages - kbase_reg_current_backed_size(region));
+
+	if (0 == new_pages) {
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
+		if (region->gpu_alloc != region->cpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					region->cpu_alloc, new_pages) == 0) {
+				grown = true;
+			} else {
+				kbase_free_phy_pages_helper(region->gpu_alloc,
+						new_pages);
+			}
+		} else {
+			grown = true;
+		}
+	}
+
+
+	if (grown) {
+		u64 pfn_offset;
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
+		/*
+		 * Note:
+		 * Issuing an MMU operation will unlock the MMU and cause the
+		 * translation to be replayed. If the page insertion fails then
+		 * rather then trying to continue the context should be killed
+		 * so the no_flush version of insert_pages is used which allows
+		 * us to unlock the MMU as we see fit.
+		 */
+		err = kbase_mmu_insert_pages_no_flush(kctx,
+				region->start_pfn + pfn_offset,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				new_pages, region->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+		KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* failed to extend, handle as a normal PF */
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Page allocation failure");
+	}
+
+fault_done:
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+	int new_page_count __maybe_unused;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	p = kbase_mem_pool_alloc(&kctx->mem_pool);
+	if (!p)
+		goto sub_pages;
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
+ * new table from the pool if needed and possible
+ */
+static int mmu_get_next_pgd(struct kbase_context *kctx,
+		phys_addr_t *pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(*pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(*pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return -EINVAL;
+	}
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return -ENOMEM;
+		}
+
+		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	*pgd = target_pgd;
+
+	return 0;
+}
+
+/*
+ * Returns the PGD for the specified level of translation
+ */
+static int mmu_get_pgd_at_level(struct kbase_context *kctx,
+					u64 vpfn,
+					unsigned int level,
+					phys_addr_t *out_pgd)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&kctx->mmu_lock);
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
+		int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l);
+		/* Handle failure condition */
+		if (err) {
+			dev_dbg(kctx->kbdev->dev,
+				 "%s: mmu_get_next_pgd failure at level %d\n",
+				 __func__, l);
+			return err;
+		}
+	}
+
+	*out_pgd = pgd;
+
+	return 0;
+}
+
+#define mmu_get_bottom_pgd(kctx, vpfn, out_pgd) \
+	mmu_get_pgd_at_level((kctx), (vpfn), MIDGARD_MMU_BOTTOMLEVEL, (out_pgd))
+
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx,
+					      u64 from_vpfn, u64 to_vpfn)
+{
+	phys_addr_t pgd;
+	u64 vpfn = from_vpfn;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+	KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn);
+
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (vpfn < to_vpfn) {
+		unsigned int i;
+		unsigned int idx = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
+		unsigned int pcount = 0;
+		unsigned int left = to_vpfn - vpfn;
+		unsigned int level;
+		u64 *page;
+
+		if (count > left)
+			count = left;
+
+		/* need to check if this is a 2MB page or a 4kB */
+		pgd = kctx->pgd;
+
+		for (level = MIDGARD_MMU_TOPLEVEL;
+				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
+			idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+			page = kmap(phys_to_page(pgd));
+			if (mmu_mode->ate_is_valid(page[idx], level))
+				break; /* keep the mapping */
+			kunmap(phys_to_page(pgd));
+			pgd = mmu_mode->pte_to_phy_addr(page[idx]);
+		}
+
+		switch (level) {
+		case MIDGARD_MMU_LEVEL(2):
+			/* remap to single entry to update */
+			pcount = 1;
+			break;
+		case MIDGARD_MMU_BOTTOMLEVEL:
+			/* page count is the same as the logical count */
+			pcount = count;
+			break;
+		default:
+			dev_warn(kctx->kbdev->dev, "%sNo support for ATEs at level %d\n",
+			       __func__, level);
+			goto next;
+		}
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < pcount; i++)
+			mmu_mode->entry_invalidate(&page[idx + i]);
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				   kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
+				   8 * pcount);
+		kunmap(phys_to_page(pgd));
+
+next:
+		vpfn += count;
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_vpfn +
+								  recover_count
+								  );
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_vpfn +
+								  recover_count
+								  );
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			/* Fail if the current page is a valid ATE entry */
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+
+			mmu_mode->entry_set_ate(&pgd_page[ofs],
+						phys, flags,
+						MIDGARD_MMU_BOTTOMLEVEL);
+		}
+
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+static inline void cleanup_empty_pte(struct kbase_context *kctx, u64 *pte)
+{
+	phys_addr_t tmp_pgd;
+	struct page *tmp_p;
+
+	tmp_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(*pte);
+	tmp_p = phys_to_page(tmp_pgd);
+	kbase_mem_pool_free(&kctx->mem_pool, tmp_p, false);
+	kbase_process_page_usage_dec(kctx, 1);
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+}
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx,
+				    const u64 start_vpfn,
+				    struct tagged_addr *phys, size_t nr,
+				    unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	u64 insert_vpfn = start_vpfn;
+	size_t remain = nr;
+	int err;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(start_vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int vindex = insert_vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
+		struct page *p;
+		unsigned int cur_level;
+
+		if (count > remain)
+			count = remain;
+
+		if (!vindex && is_huge_head(*phys))
+			cur_level = MIDGARD_MMU_LEVEL(2);
+		else
+			cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+
+		/*
+		 * Repeatedly calling mmu_get_pgd_at_level() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_pgd_at_level(kctx, insert_vpfn, cur_level,
+						   &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					cur_level);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+
+		if (err) {
+			dev_warn(kctx->kbdev->dev,
+				 "%s: mmu_get_bottom_pgd failure\n", __func__);
+			if (insert_vpfn != start_vpfn) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  start_vpfn,
+								  insert_vpfn);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "%s: kmap failure\n",
+				 __func__);
+			if (insert_vpfn != start_vpfn) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  start_vpfn,
+								  insert_vpfn);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
+			unsigned int level_index = (insert_vpfn >> 9) & 0x1FF;
+			u64 *target = &pgd_page[level_index];
+
+			if (mmu_mode->pte_is_valid(*target, cur_level))
+				cleanup_empty_pte(kctx, target);
+			mmu_mode->entry_set_ate(target, *phys, flags,
+						cur_level);
+		} else {
+			for (i = 0; i < count; i++) {
+				unsigned int ofs = vindex + i;
+				u64 *target = &pgd_page[ofs];
+
+				/* Fail if the current page is a valid ATE entry
+				 */
+				KBASE_DEBUG_ASSERT(0 == (*target & 1UL));
+
+				kctx->kbdev->mmu_mode->entry_set_ate(target,
+						phys[i], flags, cur_level);
+			}
+		}
+
+		phys += count;
+		insert_vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (vindex * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  struct tagged_addr *phys, size_t nr,
+				  unsigned long flags)
+{
+	int err;
+
+	err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
+ */
+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	u32 op;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+				&kbdev->as[kctx->as_nr],
+				kctx, vpfn, nr, op, 0);
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to
+		 * recover */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * As this function could be called in interrupt context the sync
+	 * request can't block. Instead log the request and the next flush
+	 * request will pick it up.
+	 */
+	if ((!err) && sync &&
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
+		atomic_set(&kctx->drain_pending, 1);
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+#ifndef CONFIG_MALI_NO_MALI
+	bool drain_pending = false;
+
+	if (atomic_xchg(&kctx->drain_pending, 0))
+		drain_pending = true;
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	kbdev = kctx->kbdev;
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		if (!kbase_pm_context_active_handle_suspend(kbdev,
+			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+			int err;
+			u32 op;
+
+			/* AS transaction begin */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+
+			if (sync)
+				op = AS_COMMAND_FLUSH_MEM;
+			else
+				op = AS_COMMAND_FLUSH_PT;
+
+			err = kbase_mmu_hw_do_operation(kbdev,
+						&kbdev->as[kctx->as_nr],
+						kctx, vpfn, nr, op, 0);
+
+#if KBASE_GPU_RESET_EN
+			if (err) {
+				/* Flush failed to complete, assume the
+				 * GPU has hung and perform a reset to
+				 * recover */
+				dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+				if (kbase_prepare_to_reset_gpu(kbdev))
+					kbase_reset_gpu(kbdev);
+			}
+#endif /* KBASE_GPU_RESET_EN */
+
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+			/* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+			/*
+			 * The transaction lock must be dropped before here
+			 * as kbase_wait_write_flush could take it if
+			 * the GPU was powered down (static analysis doesn't
+			 * know this can't happen).
+			 */
+			drain_pending |= (!err) && sync &&
+					kbase_hw_has_issue(kctx->kbdev,
+							BASE_HW_ISSUE_6367);
+			if (drain_pending) {
+				/* Wait for GPU to flush write buffer */
+				kbase_wait_write_flush(kctx);
+			}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+			kbase_pm_context_idle(kbdev);
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
+	 */
+	kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err = -EFAULT;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		unsigned int pcount;
+		unsigned int level;
+		u64 *page;
+
+		if (count > nr)
+			count = nr;
+
+		/* need to check if this is a 2MB or a 4kB page */
+		pgd = kctx->pgd;
+
+		for (level = MIDGARD_MMU_TOPLEVEL;
+				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
+			phys_addr_t next_pgd;
+
+			index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+			page = kmap(phys_to_page(pgd));
+			if (mmu_mode->ate_is_valid(page[index], level))
+				break; /* keep the mapping */
+			else if (!mmu_mode->pte_is_valid(page[index], level)) {
+				/* nothing here, advance */
+				switch (level) {
+				case MIDGARD_MMU_LEVEL(0):
+					count = 134217728;
+					break;
+				case MIDGARD_MMU_LEVEL(1):
+					count = 262144;
+					break;
+				case MIDGARD_MMU_LEVEL(2):
+					count = 512;
+					break;
+				case MIDGARD_MMU_LEVEL(3):
+					count = 1;
+					break;
+				}
+				if (count > nr)
+					count = nr;
+				goto next;
+			}
+			next_pgd = mmu_mode->pte_to_phy_addr(page[index]);
+			kunmap(phys_to_page(pgd));
+			pgd = next_pgd;
+		}
+
+		switch (level) {
+		case MIDGARD_MMU_LEVEL(0):
+		case MIDGARD_MMU_LEVEL(1):
+			dev_warn(kctx->kbdev->dev,
+				 "%s: No support for ATEs at level %d\n",
+				 __func__, level);
+			kunmap(phys_to_page(pgd));
+			goto out;
+		case MIDGARD_MMU_LEVEL(2):
+			/* can only teardown if count >= 512 */
+			if (count >= 512) {
+				pcount = 1;
+			} else {
+				dev_warn(kctx->kbdev->dev,
+					 "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
+					 __func__, count);
+				pcount = 0;
+			}
+			break;
+		case MIDGARD_MMU_BOTTOMLEVEL:
+			/* page count is the same as the logical count */
+			pcount = count;
+			break;
+		default:
+			dev_err(kctx->kbdev->dev,
+				"%s: found non-mapped memory, early out\n",
+				__func__);
+			vpfn += count;
+			nr -= count;
+			continue;
+		}
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < pcount; i++)
+			mmu_mode->entry_invalidate(&page[index + i]);
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				   kbase_dma_addr(phys_to_page(pgd)) +
+				   8 * index, 8*pcount);
+
+next:
+		kunmap(phys_to_page(pgd));
+		vpfn += count;
+		nr -= count;
+	}
+	err = 0;
+out:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
+			vpfn, phys, nr);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		do {
+			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev,
+				 "mmu_get_bottom_pgd failure\n");
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+						flags, MIDGARD_MMU_BOTTOMLEVEL);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd,
+			       int level, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	struct page *p;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize
+	 * kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
+				mmu_teardown_level(kctx,
+						   target_pgd,
+						   level + 1,
+						   pgd_page_buffer +
+						   (PAGE_SIZE / sizeof(u64)));
+			}
+		}
+	}
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	kbase_mem_pool_free(&kctx->mem_pool, p, true);
+	kbase_process_page_usage_dec(kctx, 1);
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	mutex_init(&kctx->mmu_lock);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	int new_page_count = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	mutex_lock(&kctx->mmu_lock);
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL,
+			   kctx->mmu_teardown_pages);
+	mutex_unlock(&kctx->mmu_lock);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	mutex_lock(&kctx->mmu_lock);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t dump_size, size = 0;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+			size += sizeof(config);
+		}
+
+		dump_size = kbasep_mmu_dump_level(kctx,
+				kctx->pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!dump_size)
+			goto fail_free;
+
+		size += dump_size;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+
+		if (size > (nr_pages * PAGE_SIZE)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	if (unlikely(faulting_as->protected_mode))
+	{
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+		atomic_dec(&kbdev->faults_pending);
+		return;
+
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		unsigned long flags;
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Set the MMU into unmapped mode */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_disable(kctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "PERMISSION_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+		e = "ACCESS_FLAG";
+		break;
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "ACCESS_FLAG";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "ADDRESS_SIZE_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "MEMORY_ATTRIBUTES_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			return "ATOMIC";
+		else
+			return "UNKNOWN";
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status: 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_mmu_disable(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold hwaccess_lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the hwaccess_lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+#if KBASE_GPU_RESET_EN
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+#endif /* KBASE_GPU_RESET_EN */
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			dev_warn(kbdev->dev,
+					"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+					as->number, as->fault_addr,
+					as->fault_extra_addr);
+		else
+			dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+					as->number, as->fault_addr);
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		WARN_ON(!queue_work(as->pf_wq, &as->work_busfault));
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault));
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100644
index 0000000..986e959
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
new file mode 100644
index 0000000..0fb717b
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -0,0 +1,214 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+#include "mali_kbase_defs.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+/* For valid ATEs bit 1 = ((level == 3) ? 1 : 0).
+ * Valid ATE entries at level 3 are flagged with the value 3.
+ * Valid ATE entries at level 0-2 are flagged with the value 1.
+ */
+#define ENTRY_IS_ATE_L3		3ULL
+#define ENTRY_IS_ATE_L02	1ULL
+#define ENTRY_IS_INVAL		2ULL
+#define ENTRY_IS_PTE		3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_ACCESS_RW (1ULL << 6)     /* bits 6:7 */
+#define ENTRY_ACCESS_RO (3ULL << 6)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register.
+	 */
+	setup->memattr =
+		(AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
+			(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
+			(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_WRITE_ALLOC           <<
+			(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF   <<
+			(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_WA         <<
+			(AS_MEMATTR_INDEX_OUTER_WA * 8));
+
+	setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK;
+	setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = 0ULL;
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate, unsigned int level)
+{
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3);
+	else
+		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02);
+}
+
+static int pte_is_valid(u64 pte, unsigned int level)
+{
+	/* PTEs cannot exist at the bottom level */
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		return false;
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* Set access flags - note that AArch64 stage 1 does not support
+	 * write-only access, so we use read/write instead
+	 */
+	if (flags & KBASE_REG_GPU_WR)
+		mmu_flags |= ENTRY_ACCESS_RW;
+	else if (flags & KBASE_REG_GPU_RD)
+		mmu_flags |= ENTRY_ACCESS_RO;
+
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry,
+		struct tagged_addr phy,
+		unsigned long flags,
+		unsigned int level)
+{
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		page_table_entry_set(entry, as_phys_addr_t(phy) |
+				get_mmu_flags(flags) |
+				ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3);
+	else
+		page_table_entry_set(entry, as_phys_addr_t(phy) |
+				get_mmu_flags(flags) |
+				ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & PAGE_MASK) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const aarch64_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
+{
+	return &aarch64_mode;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100644
index 0000000..f080fdc
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,199 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+#include "mali_kbase_defs.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated
+	 * memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_context *kctx,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+	setup->transcfg = 0;
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	mmu_get_as_setup(kctx, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate, unsigned int level)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte, unsigned int level)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry,
+		struct tagged_addr phy,
+		unsigned long flags,
+		unsigned int level)
+{
+	page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) |
+			     ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100644
index 0000000..0152b35
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,119 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_register);
+
+void kbase_platform_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_unregister);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100644
index 0000000..97d5434
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,205 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_vinstr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1)
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+
+	/* Resume vinstr operation */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100644
index 0000000..37fa247
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100644
index 0000000..7fb674e
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
new file mode 100644
index 0000000..c970650
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+
+#include "mali_kbase_regs_history_debugfs.h"
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+#include <linux/debugfs.h>
+
+
+static int regs_history_size_get(void *data, u64 *val)
+{
+	struct kbase_io_history *const h = data;
+
+	*val = h->size;
+
+	return 0;
+}
+
+static int regs_history_size_set(void *data, u64 val)
+{
+	struct kbase_io_history *const h = data;
+
+	return kbase_io_history_resize(h, (u16)val);
+}
+
+
+DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
+		regs_history_size_get,
+		regs_history_size_set,
+		"%llu\n");
+
+
+/**
+ * regs_history_show - show callback for the register access history file.
+ *
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to dump all recent accesses to the GPU registers.
+ *
+ * @return 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int regs_history_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_io_history *const h = sfile->private;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!h->enabled) {
+		seq_puts(sfile, "The register access history is disabled\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	iters = (h->size > h->count) ? h->count : h->size;
+	seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+out:
+	return 0;
+}
+
+
+/**
+ * regs_history_open - open operation for regs_history debugfs file
+ *
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * @return file descriptor
+ */
+static int regs_history_open(struct inode *in, struct file *file)
+{
+	return single_open(file, &regs_history_show, in->i_private);
+}
+
+
+static const struct file_operations regs_history_fops = {
+	.open = &regs_history_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history.enabled);
+	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history, &regs_history_size_fops);
+	debugfs_create_file("regs_history", S_IRUGO,
+			kbdev->mali_debugfs_directory, &kbdev->io_history,
+			&regs_history_fops);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
new file mode 100644
index 0000000..f108370
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Header file for register access history support via debugfs
+ *
+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
+ *
+ * Usage:
+ * - regs_history_enabled: whether recording of register accesses is enabled.
+ *   Write 'y' to enable, 'n' to disable.
+ * - regs_history_size: size of the register history buffer, must be > 0
+ * - regs_history: return the information about last accesses to the registers.
+ */
+
+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H
+#define _KBASE_REGS_HISTORY_DEBUGFS_H
+
+struct kbase_device;
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/**
+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history
+ *
+ * @kbdev: Pointer to kbase_device containing the register history
+ */
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_regs_history_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif  /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100644
index 0000000..203a065
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1150 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct fragment_job {
+	struct job_descriptor_header header;
+
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_descriptor_header *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next_job._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next_job._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+	job = &frag_job->header;
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->exception_status = JOB_NOT_STARTED;
+
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
+
+	if (job->job_type == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->job_barrier = 1;
+
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
+
+	job->job_index += hw_job_id_offset;
+
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
+	} else {
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
+		else
+			fbd_address = (u64)frag_job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
+
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
+		else
+			jc = job->next_job._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list = 0;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload = NULL;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+
+		int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP;
+		int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC;
+		int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+		int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+
+		if (t_atom_type != BASE_JD_REQ_T) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x",
+			    t_atom_type, BASE_JD_REQ_T);
+		}
+		if (f_atom_type != BASE_JD_REQ_FS) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n",
+			    f_atom_type, BASE_JD_REQ_FS);
+		}
+		if (t_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n");
+		}
+		if (f_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n");
+		}
+
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom, NULL);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_descriptor_header *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+		dump_job_head(kctx, "\njob_head structure:\n", job);
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
+		else
+			job_header = job->next_job._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	/* Don't replay this atom if these issues are not present in the
+	 * hardware */
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) &&
+			!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) {
+		dev_dbg(kbdev->dev, "Hardware does not need replay workaround");
+
+		/* Signal failure to userspace */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+
+		return false;
+	}
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100644
index 0000000..43175c8
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100644
index 0000000..9bff3d2
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100644
index 0000000..127ada0
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1513 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif
+#include <linux/dma-mapping.h>
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/cache.h>
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->queue, &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_del(&katom->queue);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed.
+	 */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule timeout of this atom after a period if it is not active */
+	if (!timer_pending(&kctx->soft_job_timeout)) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_job_timeout_ms);
+		mod_timer(&kctx->soft_job_timeout,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}
+}
+
+static int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*status = *mapped_evt;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	if ((new_status != BASE_JD_SOFT_EVENT_SET) &&
+	    (new_status != BASE_JD_SOFT_EVENT_RESET))
+		return -EINVAL;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*mapped_evt = new_status;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_vmap_struct map;
+	void *user_result;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] was previously in use
+		 * for 'waiting_soft_jobs'.
+		 */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* GPU_WR access is checked on the range for returning the result to
+	 * userspace for the following reasons:
+	 * - security, this is currently how imported user bufs are checked.
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
+	if (!user_result)
+		return 0;
+
+	memcpy(user_result, &data, sizeof(data));
+
+	kbase_vunmap(kctx, &map);
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+/* Called by the explicit fence mechanism when a fence wait has completed */
+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+#endif
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, queue);
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			if (katom->jc == evt) {
+				list_del(&katom->queue);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet.
+				 */
+				cancel_timer = 0;
+			}
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			/* Keep the timer running if fence debug is enabled and
+			 * there are waiting fence jobs.
+			 */
+			cancel_timer = 0;
+			break;
+#endif
+		}
+	}
+
+	if (cancel_timer)
+		del_timer(&kctx->soft_job_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep;
+
+		list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) {
+			if (dep->status == KBASE_JD_ATOM_STATE_UNUSED ||
+			    dep->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				continue;
+
+			if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					== BASE_JD_REQ_SOFT_FENCE_TRIGGER) {
+				/* Found blocked trigger fence. */
+				struct kbase_sync_fence_info info;
+
+				if (!kbase_sync_fence_in_info_get(dep, &info)) {
+					dev_warn(dev,
+						 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+						 kbase_jd_atom_id(kctx, dep),
+						 info.fence,
+						 info.name,
+						 kbase_sync_status_string(info.status));
+				 }
+			}
+
+			kbase_fence_debug_check_atom(dep);
+		}
+	}
+}
+
+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = katom->kctx->kbdev->dev;
+	int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms);
+	unsigned long lflags;
+	struct kbase_sync_fence_info info;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+
+	if (kbase_sync_fence_in_info_get(katom, &info)) {
+		/* Fence must have signaled just after timeout. */
+		spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+		return;
+	}
+
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+		 kctx->tgid, kctx->id,
+		 kbase_jd_atom_id(kctx, katom),
+		 info.fence, timeout_ms);
+	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+		 info.fence, info.name,
+		 kbase_sync_status_string(info.status));
+
+	/* Search for blocked trigger atoms */
+	kbase_fence_debug_check_atom(katom);
+
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	kbase_sync_fence_in_dump(katom);
+}
+
+struct kbase_fence_debug_work {
+	struct kbase_jd_atom *katom;
+	struct work_struct work;
+};
+
+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work)
+{
+	struct kbase_fence_debug_work *w = container_of(work,
+			struct kbase_fence_debug_work, work);
+	struct kbase_jd_atom *katom = w->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_fence_debug_wait_timeout(katom);
+	mutex_unlock(&kctx->jctx.lock);
+
+	kfree(w);
+}
+
+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_debug_work *work;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Enqueue fence debug worker. Use job_done_wq to get
+	 * debug print ordered with job completion.
+	 */
+	work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC);
+	/* Ignore allocation failure. */
+	if (work) {
+		work->katom = katom;
+		INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker);
+		queue_work(kctx->jctx.job_done_wq, &work->work);
+	}
+}
+#endif /* CONFIG_MALI_FENCE_DEBUG */
+
+void kbasep_soft_job_timeout_worker(unsigned long data)
+{
+	struct kbase_context *kctx = (struct kbase_context *)data;
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_job_timeout_ms);
+	struct timer_list *timer = &kctx->soft_job_timeout;
+	ktime_t cur_time = ktime_get();
+	bool restarting = false;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(entry,
+				struct kbase_jd_atom, queue);
+		s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time,
+					katom->start_timestamp));
+
+		if (elapsed_time < (s64)timeout_ms) {
+			restarting = true;
+			continue;
+		}
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			/* Take it out of the list to ensure that it
+			 * will be cancelled in all cases
+			 */
+			list_del(&katom->queue);
+
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			INIT_WORK(&katom->work, kbasep_soft_event_complete_job);
+			queue_work(kctx->jctx.job_done_wq, &katom->work);
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			kbase_fence_debug_timeout(katom);
+			break;
+#endif
+		}
+	}
+
+	if (restarting)
+		mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	kbasep_add_waiting_with_timeout(katom);
+
+	return 1;
+}
+
+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+/**
+ * kbase_soft_event_update() - Update soft event state
+ * @kctx: Pointer to context
+ * @event: Event to update
+ * @new_status: New status value of event
+ *
+ * Update the event, and wake up any atoms waiting for the event.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+int kbase_soft_event_update(struct kbase_context *kctx,
+			     u64 event,
+			     unsigned char new_status)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->jctx.lock);
+
+	if (kbasep_write_soft_event_status(kctx, event, new_status)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, event);
+
+out:
+	mutex_unlock(&kctx->jctx.lock);
+
+	return err;
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc;
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+		if (gpu_alloc) {
+			switch (gpu_alloc->type) {
+			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+			{
+				free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->jc = 0;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		katom->jc = 0;
+		goto out_cleanup;
+	}
+	katom->jc = (u64)(uintptr_t)buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret) {
+		ret = -EFAULT;
+		goto out_cleanup;
+	}
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, user_extres.ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		if (NULL == reg || NULL == reg->gpu_alloc ||
+				(reg->flags & KBASE_REG_FREE)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		buffers[i].nr_extres_pages = reg->nr_pages;
+
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages)
+				goto out_unlock;
+			ret = 0;
+			break;
+		}
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+		{
+			dev_warn(katom->kctx->kbdev->dev,
+					"UMP is not supported for debug_copy jobs\n");
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	kfree(user_buffers);
+
+	return ret;
+}
+
+static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	size_t dma_to_copy;
+	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	if (!gpu_alloc) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+		if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE)
+			dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch");
+
+		dma_to_copy = min(dma_buf->size,
+			(size_t)(buf_data->nr_extres_pages * PAGE_SIZE));
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, dma_to_copy,
+#endif
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < dma_to_copy/PAGE_SIZE; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, dma_to_copy,
+#endif
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	struct kbase_context *kctx = katom->kctx;
+	int ret;
+
+	/* Fail the job if there is no info structure */
+	if (!data) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* If the ID is zero then fail the job */
+	if (info->id == 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & 0x7) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Replace the user pointer with our kernel allocated info structure */
+	katom->jc = (u64)(uintptr_t) info;
+	katom->jit_blocked = false;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(info);
+fail:
+	katom->jc = 0;
+	return ret;
+}
+
+static u8 kbase_jit_free_get_id(struct kbase_jd_atom *katom)
+{
+	if (WARN_ON(katom->core_req != BASE_JD_REQ_SOFT_JIT_FREE))
+		return 0;
+
+	return (u8) katom->jc;
+}
+
+static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr, new_addr;
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+
+	/* The JIT ID is still in use so fail the allocation */
+	if (kctx->jit_alloc[info->id]) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return 0;
+	}
+
+	/* Create a JIT allocation */
+	reg = kbase_jit_allocate(kctx, info);
+	if (!reg) {
+		struct kbase_jd_atom *jit_atom;
+		bool can_block = false;
+
+		lockdep_assert_held(&kctx->jctx.lock);
+
+		jit_atom = list_first_entry(&kctx->jit_atoms_head,
+				struct kbase_jd_atom, jit_node);
+
+		list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) {
+			if (jit_atom == katom)
+				break;
+			if (jit_atom->core_req == BASE_JD_REQ_SOFT_JIT_FREE) {
+				u8 free_id = kbase_jit_free_get_id(jit_atom);
+
+				if (free_id && kctx->jit_alloc[free_id]) {
+					/* A JIT free which is active and
+					 * submitted before this atom
+					 */
+					can_block = true;
+					break;
+				}
+			}
+		}
+
+		if (!can_block) {
+			/* Mark the allocation so we know it's in use even if
+			 * the allocation itself fails.
+			 */
+			kctx->jit_alloc[info->id] =
+				(struct kbase_va_region *) -1;
+
+			katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+			return 0;
+		}
+
+		/* There are pending frees for an active allocation
+		 * so we should wait to see whether they free the memory.
+		 * Add to the beginning of the list to ensure that the atom is
+		 * processed only once in kbase_jit_free_finish
+		 */
+		list_add(&katom->queue, &kctx->jit_pending_alloc);
+		katom->jit_blocked = true;
+
+		return 1;
+	}
+
+	/*
+	 * Write the address of the JIT allocation to the user provided
+	 * GPU allocation.
+	 */
+	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+			&mapping);
+	if (!ptr) {
+		/*
+		 * Leave the allocation "live" as the JIT free jit will be
+		 * submitted anyway.
+		 */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return 0;
+	}
+
+	new_addr = reg->start_pfn << PAGE_SHIFT;
+	*ptr = new_addr;
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
+			katom, info->gpu_alloc_addr, new_addr);
+	kbase_vunmap(kctx, &mapping);
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	/*
+	 * Bind it to the user provided ID. Do this last so we can check for
+	 * the JIT free racing this JIT alloc job.
+	 */
+	kctx->jit_alloc[info->id] = reg;
+
+	return 0;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Remove atom from jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static int kbase_jit_free_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	return 0;
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 id = kbase_jit_free_get_id(katom);
+
+	/*
+	 * If the ID is zero or it is not in use yet then fail the job.
+	 */
+	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	/*
+	 * If the ID is valid but the allocation request failed still succeed
+	 * this soft job but don't try and free the allocation.
+	 */
+	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+		kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+	kctx->jit_alloc[id] = NULL;
+}
+
+static void kbasep_jit_free_finish_worker(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_finish_soft_job(katom);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
+{
+	struct list_head *i, *tmp;
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	/* Remove this atom from the kctx->jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	list_for_each_safe(i, tmp, &kctx->jit_pending_alloc) {
+		struct kbase_jd_atom *pending_atom = list_entry(i,
+				struct kbase_jd_atom, queue);
+		if (kbase_jit_allocate_process(pending_atom) == 0) {
+			/* Atom has completed */
+			INIT_WORK(&pending_atom->work,
+					kbasep_jit_free_finish_worker);
+			queue_work(kctx->jctx.job_done_wq, &pending_atom->work);
+		}
+	}
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	/*
+	 * Replace the user pointer with our kernel allocated
+	 * ext_res structure.
+	 */
+	katom->jc = (u64)(uintptr_t) ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (i > 0) {
+		u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+
+		--i;
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		katom->event_code = kbase_sync_fence_out_trigger(katom,
+				katom->event_code == BASE_JD_EVENT_DONE ?
+								0 : -EFAULT);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+	{
+		int ret = kbase_sync_fence_in_wait(katom);
+
+		if (ret == 1) {
+#ifdef CONFIG_MALI_FENCE_DEBUG
+			kbasep_add_waiting_with_timeout(katom);
+#else
+			kbasep_add_waiting_soft_job(katom);
+#endif
+		}
+		return ret;
+	}
+#endif
+
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		return kbasep_soft_event_wait(katom);
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_process(katom);
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_sync_fence_in_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (!IS_ALIGNED(katom->jc, cache_line_size()))
+				return -EINVAL;
+		}
+		break;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_sync_fence_out_create(katom,
+							 fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				kbase_sync_fence_out_remove(katom);
+				kbase_sync_fence_close_fd(fd);
+				fence.basep.fd = -EINVAL;
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+			int ret;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			ret = kbase_sync_fence_in_from_fd(katom,
+							  fence.basep.fd);
+			if (ret < 0)
+				return ret;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			/*
+			 * Set KCTX_NO_IMPLICIT_FENCE in the context the first
+			 * time a soft fence wait job is observed. This will
+			 * prevent the implicit dma-buf fence to conflict with
+			 * the Android native sync fences.
+			 */
+			if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC))
+				kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC);
+#endif /* CONFIG_MALI_DMA_FENCE */
+		}
+		break;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_REPLAY:
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		return kbase_jit_free_prepare(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signaled, do it now */
+		kbase_sync_fence_out_trigger(katom, katom->event_code ==
+				BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release katom's reference to fence object */
+		kbase_sync_fence_in_remove(katom);
+		break;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		kbasep_remove_waiting_soft_job(katom_iter);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		} else {
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
new file mode 100644
index 0000000..c98762c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c
@@ -0,0 +1,23 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include "mali_kbase_strings.h"
+
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
+const char kbase_drv_name[] = KBASE_DRV_NAME;
+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
new file mode 100644
index 0000000..41b8fdb
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h
@@ -0,0 +1,19 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+extern const char kbase_drv_name[];
+extern const char kbase_timeline_name[];
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100644
index 0000000..de72147
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,203 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ * This file contains our internal "API" for explicit fences.
+ * It hides the implementation details of the actual explicit fence mechanism
+ * used (Android fences or sync file with DMA fences).
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include <linux/syscalls.h>
+#ifdef CONFIG_SYNC
+#include <sync.h>
+#endif
+#ifdef CONFIG_SYNC_FILE
+#include "mali_kbase_fence_defs.h"
+#include <linux/sync_file.h>
+#endif
+
+#include "mali_kbase.h"
+
+/**
+ * struct kbase_sync_fence_info - Information about a fence
+ * @fence: Pointer to fence (type is void*, as underlaying struct can differ)
+ * @name: The name given to this fence when it was created
+ * @status: < 0 means error, 0 means active, 1 means signaled
+ *
+ * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get()
+ * to get the information.
+ */
+struct kbase_sync_fence_info {
+	void *fence;
+	char name[32];
+	int status;
+};
+
+/**
+ * kbase_sync_fence_stream_create() - Create a stream object
+ * @name: Name of stream (only used to ease debugging/visualization)
+ * @out_fd: A file descriptor representing the created stream object
+ *
+ * Can map down to a timeline implementation in some implementations.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd);
+
+/**
+ * kbase_sync_fence_out_create Create an explicit output fence to specified atom
+ * @katom: Atom to assign the new explicit fence to
+ * @stream_fd: File descriptor for stream object to create fence on
+ *
+ * return: Valid file descriptor to fence or < 0 on error
+ */
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd);
+
+/**
+ * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom
+ * @katom: Atom to assign the existing explicit fence to
+ * @fd: File descriptor to an existing fence
+ *
+ * Assigns an explicit input fence to atom.
+ * This can later be waited for by calling @kbase_sync_fence_in_wait
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd);
+
+/**
+ * kbase_sync_fence_validate() - Validate a fd to be a valid fence
+ * @fd: File descriptor to check
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ *
+ * return 0: if fd is for a valid fence, < 0 if invalid
+ */
+int kbase_sync_fence_validate(int fd);
+
+/**
+ * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom
+ * @katom: Atom with an explicit fence to signal
+ * @result: < 0 means signal with error, 0 >= indicates success
+ *
+ * Signal output fence attached on katom and remove the fence from the atom.
+ *
+ * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE
+ */
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result);
+
+/**
+ * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled
+ * @katom: Atom with explicit fence to wait for
+ *
+ * If the fence is already signaled, then 0 is returned, and the caller must
+ * continue processing of the katom.
+ *
+ * If the fence isn't already signaled, then this kbase_sync framework will
+ * take responsibility to continue the processing once the fence is signaled.
+ *
+ * return: 0 if already signaled, otherwise 1
+ */
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits
+ * @katom: Atom to cancel wait for
+ *
+ * This function is fully responsible for continuing processing of this atom
+ * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all)
+ */
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_in_remove() - Remove the input fence from the katom
+ * @katom: Atom to remove explicit input fence for
+ *
+ * This will also release the corresponding reference.
+ */
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_out_remove() - Remove the output fence from the katom
+ * @katom: Atom to remove explicit output fence for
+ *
+ * This will also release the corresponding reference.
+ */
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence
+ * @fd: File descriptor to close
+ */
+static inline void kbase_sync_fence_close_fd(int fd)
+{
+	sys_close(fd);
+}
+
+/**
+ * kbase_sync_fence_in_info_get() - Retrieves information about input fence
+ * @katom: Atom to get fence information from
+ * @info: Struct to be filled with fence information
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info);
+
+/**
+ * kbase_sync_fence_out_info_get() - Retrieves information about output fence
+ * @katom: Atom to get fence information from
+ * @info: Struct to be filled with fence information
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				  struct kbase_sync_fence_info *info);
+
+/**
+ * kbase_sync_status_string() - Get string matching @status
+ * @status: Value of fence status.
+ *
+ * return: Pointer to string describing @status.
+ */
+const char *kbase_sync_status_string(int status);
+
+/*
+ * Internal worker used to continue processing of atom.
+ */
+void kbase_sync_fence_wait_worker(struct work_struct *data);
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+/**
+ * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state
+ * @katom: Atom to trigger fence debug dump for
+ */
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom);
+#endif
+
+#endif /* MALI_KBASE_SYNC_H */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c
new file mode 100644
index 0000000..d7349dc
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c
@@ -0,0 +1,537 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Code for supporting explicit Android fences (CONFIG_SYNC)
+ * Known to be good for kernels 4.5 and earlier.
+ * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels
+ * (see mali_kbase_sync_file.c)
+ */
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signaled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatibility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+static struct mali_sync_timeline *to_mali_sync_timeline(
+						struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt),
+						sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(
+							sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signaled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signaled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+static struct sync_timeline *mali_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops,
+				  sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signaled, 0);
+
+	return tl;
+}
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	if (!out_fd)
+		return -EINVAL;
+
+	tl = mali_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are
+ * allocated.
+ */
+static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent,
+					    sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+	katom->fence = sync_fence_fdget(fd);
+	if (katom->fence == NULL) {
+		/* The only way the fence can be NULL is if userspace closed it
+		 * for us, so we don't need to clear it up */
+		fd = -EINVAL;
+		goto out;
+	}
+
+out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
+{
+	katom->fence = sync_fence_fdget(fd);
+	return katom->fence ? 0 : -ENOENT;
+}
+
+int kbase_sync_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+/* Returns true if the specified timeline is allocated by Mali */
+static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are
+ * allocated.
+ *
+ * If they are signaled in the wrong order then a message will be printed in
+ * debug builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as
+ * success.
+ */
+static void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(
+							sync_pt_parent(pt));
+	int signaled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signaled = atomic_read(&mtl->signaled);
+
+		diff = signaled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signaling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signaled,
+				signaled, mpt->order) != signaled);
+}
+
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+	if (!katom->fence)
+		return BASE_JD_EVENT_JOB_CANCELLED;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly)
+		 * come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head,
+			      struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	kbase_sync_fence_out_remove(katom);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static inline int kbase_fence_get_status(struct sync_fence *fence)
+{
+	if (!fence)
+		return -ENOENT;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	return fence->status;
+#else
+	return atomic_read(&fence->status);
+#endif
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence,
+				      struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter,
+					struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+	if (kbase_fence_get_status(fence) < 0)
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the
+	 * job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding
+	 * kctx->jctx.lock and the callbacks are run synchronously from
+	 * sync_timeline_signal. So we simply defer the work.
+	 */
+
+	INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signaled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	return 1;
+}
+
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for
+		 * kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->fence) {
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+	}
+}
+
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->fence) {
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+	}
+}
+
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+	if (!katom->fence)
+		return -ENOENT;
+
+	info->fence = katom->fence;
+	info->status = kbase_fence_get_status(katom->fence);
+	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	return 0;
+}
+
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+	if (!katom->fence)
+		return -ENOENT;
+
+	info->fence = katom->fence;
+	info->status = kbase_fence_get_status(katom->fence);
+	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
+{
+	/* Dump out the full state of all the Android sync fences.
+	 * The function sync_dump() isn't exported to modules, so force
+	 * sync_fence_wait() to time out to trigger sync_dump().
+	 */
+	if (katom->fence)
+		sync_fence_wait(katom->fence, 1);
+}
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
new file mode 100644
index 0000000..457def2
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * @file mali_kbase_sync_common.c
+ *
+ * Common code for our explicit fence functionality
+ */
+
+#include <linux/workqueue.h>
+#include "mali_kbase.h"
+
+void kbase_sync_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kbase_soft_event_wait_callback(katom);
+}
+
+const char *kbase_sync_status_string(int status)
+{
+	if (status == 0)
+		return "signaled";
+	else if (status > 0)
+		return "active";
+	else
+		return "error";
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
new file mode 100644
index 0000000..ef5b7ce
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
@@ -0,0 +1,348 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE)
+ * Introduced in kernel 4.9.
+ * Android explicit fences (CONFIG_SYNC) can be used for older kernels
+ * (see mali_kbase_sync_android.c)
+ */
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/sync_file.h>
+#include <linux/slab.h>
+#include "mali_kbase_fence_defs.h"
+#include "mali_kbase_sync.h"
+#include "mali_kbase_fence.h"
+#include "mali_kbase.h"
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE
+};
+
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
+{
+	if (!out_fd)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, NULL,
+				   O_RDONLY | O_CLOEXEC);
+	if (*out_fd < 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct sync_file *sync_file;
+	int fd;
+
+	fence = kbase_fence_out_new(katom);
+	if (!fence)
+		return -ENOMEM;
+
+	/* Take an extra reference to the fence on behalf of the katom.
+	 * This is needed because sync_file_create() will take ownership of
+	 * one of these refs */
+	dma_fence_get(fence);
+
+	/* create a sync_file fd representing the fence */
+	sync_file = sync_file_create(fence);
+	if (!sync_file) {
+		dma_fence_put(fence);
+		kbase_fence_out_remove(katom);
+		return -ENOMEM;
+	}
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		fput(sync_file->file);
+		kbase_fence_out_remove(katom);
+		return fd;
+	}
+
+	fd_install(fd, sync_file->file);
+
+	return fd;
+}
+
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence = sync_file_get_fence(fd);
+#else
+	struct dma_fence *fence = sync_file_get_fence(fd);
+#endif
+
+	if (!fence)
+		return -ENOENT;
+
+	kbase_fence_fence_in_set(katom, fence);
+
+	return 0;
+}
+
+int kbase_sync_fence_validate(int fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence = sync_file_get_fence(fd);
+#else
+	struct dma_fence *fence = sync_file_get_fence(fd);
+#endif
+
+	if (!fence)
+		return -EINVAL;
+
+	dma_fence_put(fence);
+
+	return 0; /* valid */
+}
+
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
+{
+	int res;
+
+	if (!kbase_fence_out_is_ours(katom)) {
+		/* Not our fence */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	res = kbase_fence_out_signal(katom, result);
+	if (unlikely(res < 0)) {
+		dev_warn(katom->kctx->kbdev->dev,
+				"fence_signal() failed with %d\n", res);
+	}
+
+	kbase_sync_fence_out_remove(katom);
+
+	return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static void kbase_fence_wait_callback(struct fence *fence,
+				      struct fence_cb *cb)
+#else
+static void kbase_fence_wait_callback(struct dma_fence *fence,
+				      struct dma_fence_cb *cb)
+#endif
+{
+	struct kbase_fence_cb *kcb = container_of(cb,
+				struct kbase_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Cancel atom if fence is erroneous */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error)
+#else
+	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0)
+#endif
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	if (kbase_fence_dep_count_dec_and_test(katom)) {
+		/* We take responsibility of handling this */
+		kbase_fence_dep_count_set(katom, -1);
+
+		/* To prevent a potential deadlock we schedule the work onto the
+		 * job_done_wq workqueue
+		 *
+		 * The issue is that we may signal the timeline while holding
+		 * kctx->jctx.lock and the callbacks are run synchronously from
+		 * sync_timeline_signal. So we simply defer the work.
+		 */
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(kctx->jctx.job_done_wq, &katom->work);
+	}
+}
+
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
+{
+	int err;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_in_get(katom);
+	if (!fence)
+		return 0; /* no input fence to wait for, good to go! */
+
+	kbase_fence_dep_count_set(katom, 1);
+
+	err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback);
+
+	kbase_fence_put(fence);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (kbase_fence_dep_count_dec_and_test(katom)) {
+			kbase_fence_free_callbacks(katom);
+			kbase_fence_dep_count_set(katom, -1);
+			return 0; /* Already signaled, good to go right now */
+		}
+
+		/* Callback installed, so we just need to wait for it... */
+	} else {
+		/* Failure */
+		kbase_fence_free_callbacks(katom);
+		kbase_fence_dep_count_set(katom, -1);
+
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	return 1; /* completion to be done later by callback/worker */
+}
+
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (!kbase_fence_free_callbacks(katom)) {
+		/* The wait wasn't cancelled -
+		 * leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Take responsibility of completion */
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	kbase_fence_out_remove(katom);
+}
+
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	kbase_fence_free_callbacks(katom);
+	kbase_fence_in_remove(katom);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static void kbase_sync_fence_info_get(struct fence *fence,
+				      struct kbase_sync_fence_info *info)
+#else
+static void kbase_sync_fence_info_get(struct dma_fence *fence,
+				      struct kbase_sync_fence_info *info)
+#endif
+{
+	info->fence = fence;
+
+	/* translate into CONFIG_SYNC status:
+	 * < 0 : error
+	 * 0 : active
+	 * 1 : signaled
+	 */
+	if (dma_fence_is_signaled(fence)) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+		int status = fence->error;
+#else
+		int status = fence->status;
+#endif
+		if (status < 0)
+			info->status = status; /* signaled with error */
+		else
+			info->status = 1; /* signaled with success */
+	} else  {
+		info->status = 0; /* still active (unsignaled) */
+	}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	scnprintf(info->name, sizeof(info->name), "%u#%u",
+		  fence->context, fence->seqno);
+#else
+	scnprintf(info->name, sizeof(info->name), "%llu#%u",
+		  fence->context, fence->seqno);
+#endif
+}
+
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_in_get(katom);
+	if (!fence)
+		return -ENOENT;
+
+	kbase_sync_fence_info_get(fence, info);
+
+	kbase_fence_put(fence);
+
+	return 0;
+}
+
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				  struct kbase_sync_fence_info *info)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_out_get(katom);
+	if (!fence)
+		return -ENOENT;
+
+	kbase_sync_fence_info_get(fence, info);
+
+	kbase_fence_put(fence);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
+{
+	/* Not implemented */
+}
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100644
index 0000000..d01aa23
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,2572 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_NDEP_ATOM_ATOM,
+	KBASE_TL_RDEP_ATOM_ATOM,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY,
+	KBASE_TL_ATTRIB_ATOM_STATE,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
+	KBASE_TL_ATTRIB_ATOM_JIT,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+	KBASE_TL_EVENT_LPU_SOFTSTOP,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+
+	/* Job dump specific events. */
+	KBASE_JD_GPU_SOFT_RESET
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC,
+	KBASE_AUX_DEVFREQ_TARGET,
+	KBASE_AUX_PROTECTED_ENTER_START,
+	KBASE_AUX_PROTECTED_ENTER_END,
+	KBASE_AUX_PROTECTED_LEAVE_START,
+	KBASE_AUX_PROTECTED_LEAVE_END
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: counter tracking stream's autoflush state
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pII",
+		"ctx,ctx_nr,tgid"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_NEW_AS,
+		__stringify(KBASE_TL_NEW_AS),
+		"address space object is created",
+		"@pI",
+		"address_space,as_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"ctx"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_LIFELINK_AS_GPU,
+		__stringify(KBASE_TL_LIFELINK_AS_GPU),
+		"address space is deleted with gpu",
+		"@pp",
+		"address_space,gpu"
+	},
+	{
+		KBASE_TL_RET_CTX_LPU,
+		__stringify(KBASE_TL_RET_CTX_LPU),
+		"context is retained by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pps",
+		"atom,lpu,attrib_match_list"
+	},
+	{
+		KBASE_TL_NRET_CTX_LPU,
+		__stringify(KBASE_TL_NRET_CTX_LPU),
+		"context is released by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_RET_AS_CTX,
+		__stringify(KBASE_TL_RET_AS_CTX),
+		"address space is retained by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_NRET_AS_CTX,
+		__stringify(KBASE_TL_NRET_AS_CTX),
+		"address space is released by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_AS,
+		__stringify(KBASE_TL_RET_ATOM_AS),
+		"atom is retained by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_NRET_ATOM_AS,
+		__stringify(KBASE_TL_NRET_ATOM_AS),
+		"atom is released by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_DEP_ATOM_ATOM,
+		__stringify(KBASE_TL_DEP_ATOM_ATOM),
+		"atom2 depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_NDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
+		"atom2 no longer depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_RDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
+		"resolved dependecy of atom2 depending on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
+		"atom job slot attributes",
+		"@pLLI",
+		"atom,descriptor,affinity,config"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY),
+		"atom priority",
+		"@pI",
+		"atom,prio"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_STATE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_STATE),
+		"atom state",
+		"@pI",
+		"atom,state"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE),
+		"atom caused priority change",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_JIT,
+		__stringify(KBASE_TL_ATTRIB_ATOM_JIT),
+		"jit done for atom",
+		"@pLL",
+		"atom,edit_addr,new_addr"
+	},
+	{
+		KBASE_TL_ATTRIB_AS_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
+		"address space attributes",
+		"@pLLL",
+		"address_space,transtab,memattr,transcfg"
+	},
+	{
+		KBASE_TL_EVENT_LPU_SOFTSTOP,
+		__stringify(KBASE_TL_EVENT_LPU_SOFTSTOP),
+		"softstop event on given lpu",
+		"@p",
+		"lpu"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX),
+		"atom softstopped",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+		__stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE),
+		"atom softstop issued",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_JD_GPU_SOFT_RESET,
+		__stringify(KBASE_JD_GPU_SOFT_RESET),
+		"gpu soft reset",
+		"@p",
+		"gpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@IL",
+		"ctx_nr,page_cnt_change"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@IL",
+		"ctx_nr,page_cnt"
+	},
+	{
+		KBASE_AUX_DEVFREQ_TARGET,
+		__stringify(KBASE_AUX_DEVFREQ_TARGET),
+		"New device frequency target",
+		"@L",
+		"target_freq"
+	},
+	{
+		KBASE_AUX_PROTECTED_ENTER_START,
+		__stringify(KBASE_AUX_PROTECTED_ENTER_START),
+		"enter protected mode start",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_ENTER_END,
+		__stringify(KBASE_AUX_PROTECTED_ENTER_END),
+		"enter protected mode end",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_LEAVE_START,
+		__stringify(KBASE_AUX_PROTECTED_LEAVE_START),
+		"leave protected mode start",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_LEAVE_END,
+		__stringify(KBASE_AUX_PROTECTED_LEAVE_END),
+		"leave protected mode end",
+		"@p",
+		"gpu"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags) __acquires(&stream->lock)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags) __releases(&stream->lock)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @data:  unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(data);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (wb_size > min_size) {
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(f_pos);
+
+	if (!buffer)
+		return -EINVAL;
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw = 0;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the rbi still points to the packet we just processed
+		 * then there was no overflow so we add the copied size to
+		 * copy_len and move rbi on to the next packet
+		 */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = SWTRACE_VERSION; /* protocol version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 0);
+	setup_timer(&autoflush_timer,
+			kbasep_tlstream_autoflush_timer_callback,
+			0);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	unsigned int                    as_nr;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu);
+	}
+
+	/* Create Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev);
+	}
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(
+				element->kctx,
+				element->kctx->id,
+				(u32)(element->kctx->tgid));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream.
+	 */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space.
+	 */
+	kbase_tlstream_flush_streams();
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags)
+{
+	int ret;
+	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
+
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) {
+		int rcode;
+
+		ret = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (ret < 0) {
+			atomic_set(&kbase_tlstream_enabled, 0);
+			return ret;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+		/* If job dumping is enabled, readjust the software event's
+		 * timeout as the default value of 3 seconds is often
+		 * insufficient. */
+		if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) {
+			dev_info(kctx->kbdev->dev,
+					"Job dumping is enabled, readjusting the software event's timeout\n");
+			atomic_set(&kctx->kbdev->js_data.soft_job_timeout_ms,
+					1800000);
+		}
+
+		/* Summary stream was cleared during acquire.
+		 * Create static timeline objects that will be
+		 * read by client.
+		 */
+		kbase_create_timeline_objects(kctx);
+
+	} else {
+		ret = -EBUSY;
+	}
+
+	return ret;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+			sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+			strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t  msg_size =
+			sizeof(msg_id) + sizeof(u64) +
+			sizeof(atom) + sizeof(lpu) + msg_s0;
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_string(
+			buffer, pos, attrib_match_list, msg_s0);
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+		sizeof(jd) + sizeof(affinity) + sizeof(config);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &jd, sizeof(jd));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &affinity, sizeof(affinity));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &config, sizeof(config));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &prio, sizeof(prio));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_jit(
+		void *atom, u64 edit_addr, u64 new_addr)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_JIT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom)
+		+ sizeof(edit_addr) + sizeof(new_addr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &edit_addr, sizeof(edit_addr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &new_addr, sizeof(new_addr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
+		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transtab, sizeof(transtab));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &memattr, sizeof(memattr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transcfg, sizeof(transcfg));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu)
+{
+	const u32     msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+{
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq)
+{
+	const u32       msg_id = KBASE_AUX_DEVFREQ_TARGET;
+	const size_t    msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(target_freq);
+	unsigned long   flags;
+	char            *buffer;
+	size_t          pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &target_freq, sizeof(target_freq));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_protected_enter_start(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+void __kbase_tlstream_aux_protected_enter_end(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_protected_leave_start(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+void __kbase_tlstream_aux_protected_leave_end(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100644
index 0000000..c0a1117
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,623 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx:  kernel common context
+ * @flags: timeline stream flags
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) return will be a negative value.
+ *
+ * Return: file descriptor on success, negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+#define TL_ATOM_STATE_IDLE 0
+#define TL_ATOM_STATE_READY 1
+#define TL_ATOM_STATE_DONE 2
+#define TL_ATOM_STATE_POSTED 3
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio);
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state);
+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom);
+void __kbase_tlstream_tl_attrib_atom_jit(
+		void *atom, u64 edit_addr, u64 new_addr);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq);
+void __kbase_tlstream_aux_protected_enter_start(void *gpu);
+void __kbase_tlstream_aux_protected_enter_end(void *gpu);
+void __kbase_tlstream_aux_protected_leave_start(void *gpu);
+void __kbase_tlstream_aux_protected_leave_end(void *gpu);
+
+#define TLSTREAM_ENABLED (1 << 31)
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & TLSTREAM_ENABLED)                     \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...)                     \
+	do {                                                            \
+		int enabled = atomic_read(&kbase_tlstream_enabled);     \
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \
+			__kbase_tlstream_##trace_name(__VA_ARGS__);     \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_JD(trace_name, ...)                      \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED)    \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+/*****************************************************************************/
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary
+ * @as: name of the address space object
+ * @nr: sequential number assigned to this address space
+ *
+ * Function emits a timeline message informing about address space creation.
+ * Address space is created with one attribute: number identifying this
+ * address space.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU
+ * @as:  name of the address space object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that address space object
+ * shall be deleted along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_CTX(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
+ */
+#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being held by the context object.
+ */
+#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being released by atom.
+ */
+#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by address space and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by address space.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_DEP_ATOM_ATOM - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM - dependency between atoms resolved
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM - information about already resolved dependency between atoms
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes
+ * @atom:     name of the atom object
+ * @jd:       job descriptor address
+ * @affinity: job affinity
+ * @config:   job config
+ *
+ * Function emits a timeline message containing atom attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority
+ * @atom: name of the atom object
+ * @prio: atom priority
+ *
+ * Function emits a timeline message containing atom priority.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state
+ * @atom:  name of the atom object
+ * @state: atom state
+ *
+ * Function emits a timeline message containing atom state.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE - atom caused priority change
+ * @atom:  name of the atom object
+ *
+ * Function emits a timeline message signalling priority change
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(atom) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority_change, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit happened on atom
+ * @atom:       atom identifier
+ * @edit_addr:  address edited by jit
+ * @new_addr:   address placed into the edited location
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr) \
+	__TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, new_addr)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes
+ * @as:       assigned address space
+ * @transtab: configuration of the TRANSTAB register
+ * @memattr:  configuration of the MEMATTR register
+ * @transcfg: configuration of the TRANSCFG register (or zero if not present)
+ *
+ * Function emits a timeline message containing address space attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ex
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_LPU_softstop
+ * @lpu:        name of the LPU object
+ */
+#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \
+	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_issue
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
+
+/**
+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset
+ * @gpu:        name of the GPU object
+ *
+ * This imperative tracepoint is specific to job dumping.
+ * Function emits a timeline message indicating GPU soft reset.
+ */
+#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
+
+
+/**
+ * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
+ */
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated
+ *                                 pages is changed
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
+ */
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
+
+/**
+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS
+ *                                     frequency
+ * @target_freq: new target frequency
+ */
+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \
+	__TRACE_IF_ENABLED(aux_devfreq_target, target_freq)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - The GPU has started transitioning
+ *                                            to protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU is starting to
+ * transition to protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - The GPU has finished transitioning
+ *                                          to protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU has finished
+ * transitioning to protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - The GPU has started transitioning
+ *                                            to non-protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU is starting to
+ * transition to non-protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - The GPU has finished transitioning
+ *                                          to non-protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU has finished
+ * transitioning to non-protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu)
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100644
index 0000000..e2e0544
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100644
index 0000000..5830e87
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,236 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbase_backend_inspect_head(kbdev, js);
+			WARN_ON(!next_katom);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = true;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	/* Simply log the end of the transition */
+	if (kbdev->timeline.l2_transitioning) {
+		kbdev->timeline.l2_transitioning = false;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100644
index 0000000..619072f
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,363 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_trace_timeline_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
+				(int)kctx->timeline.owner_tgid,              \
+				count);                                      \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
+				CTX_FLOW_ATOM_READY,                         \
+				(int)kctx->timeline.owner_tgid,              \
+				atom_id);                                    \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_ACTIVE,                      \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_NEXT,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_HEAD,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_STOPPING,                    \
+				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+				js, count);                                  \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_ACTIVE, active);            \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_TILER_ACTIVE,               \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_L2_ACTIVE,                  \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_POWERING,               \
+				1);                                          \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
+				1);                                          \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_SEND_EVENT,                       \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_HANDLE_EVENT,                     \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _consumerof_atom_number);                \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _producerof_atom_number_completed);      \
+	} while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
+				trace_code, 1);                              \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
+				count);                                      \
+	} while (0)
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding hwaccess_lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+		enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100644
index 0000000..156a95a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
+"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain start (SW approximated)", "%d,%d,%d",
+"_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
+"HW: Job Chain stop (SW approximated)",  "%d,%d,%d",
+"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100644
index 0000000..be474ff
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100644
index 0000000..fd7252d
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100644
index 0000000..c0d4292
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,2075 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/hrtimer.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+#ifdef CONFIG_MALI_NO_MALI
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif
+
+/*****************************************************************************/
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API        1
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC            1000000000ull /* ns */
+
+/* The time resolution of dumping service. */
+#define DUMPING_RESOLUTION      500000ull /* ns */
+
+/* The maximal supported number of dumping buffers. */
+#define MAX_BUFFER_COUNT        32
+
+/* Size and number of hw counters blocks. */
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+/*****************************************************************************/
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+enum vinstr_state {
+	VINSTR_IDLE,
+	VINSTR_DUMPING,
+	VINSTR_SUSPENDING,
+	VINSTR_SUSPENDED,
+	VINSTR_RESUMING
+};
+
+/**
+ * struct kbase_vinstr_context - vinstr context per device
+ * @lock:              protects the entire vinstr context
+ * @kbdev:             pointer to kbase device
+ * @kctx:              pointer to kbase context
+ * @vmap:              vinstr vmap for mapping hwcnt dump buffer
+ * @gpu_va:            GPU hwcnt dump buffer address
+ * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
+ * @dump_size:         size of the dump buffer in bytes
+ * @bitmap:            current set of counters monitored, not always in sync
+ *                     with hardware
+ * @reprogram:         when true, reprogram hwcnt block with the new set of
+ *                     counters
+ * @state:             vinstr state
+ * @state_lock:        protects information about vinstr state
+ * @suspend_waitq:     notification queue to trigger state re-validation
+ * @suspend_cnt:       reference counter of vinstr's suspend state
+ * @suspend_work:      worker to execute on entering suspended state
+ * @resume_work:       worker to execute on leaving suspended state
+ * @nclients:          number of attached clients, pending or otherwise
+ * @waiting_clients:   head of list of clients being periodically sampled
+ * @idle_clients:      head of list of clients being idle
+ * @suspended_clients: head of list of clients being suspended
+ * @thread:            periodic sampling thread
+ * @waitq:             notification queue of sampling thread
+ * @request_pending:   request for action for sampling thread
+ * @clients_present:   when true, we have at least one client
+ *                     Note: this variable is in sync. with nclients and is
+ *                     present to preserve simplicity. Protected by state_lock.
+ */
+struct kbase_vinstr_context {
+	struct mutex             lock;
+	struct kbase_device      *kbdev;
+	struct kbase_context     *kctx;
+
+	struct kbase_vmap_struct vmap;
+	u64                      gpu_va;
+	void                     *cpu_va;
+	size_t                   dump_size;
+	u32                      bitmap[4];
+	bool                     reprogram;
+
+	enum vinstr_state        state;
+	struct spinlock          state_lock;
+	wait_queue_head_t        suspend_waitq;
+	unsigned int             suspend_cnt;
+	struct work_struct       suspend_work;
+	struct work_struct       resume_work;
+
+	u32                      nclients;
+	struct list_head         waiting_clients;
+	struct list_head         idle_clients;
+	struct list_head         suspended_clients;
+
+	struct task_struct       *thread;
+	wait_queue_head_t        waitq;
+	atomic_t                 request_pending;
+
+	bool                     clients_present;
+};
+
+/**
+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
+ * @vinstr_ctx:    vinstr context client is attached to
+ * @list:          node used to attach this client to list in vinstr context
+ * @buffer_count:  number of buffers this client is using
+ * @event_mask:    events this client reacts to
+ * @dump_size:     size of one dump buffer in bytes
+ * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
+ * @accum_buffer:  temporary accumulation buffer for preserving counters
+ * @dump_time:     next time this clients shall request hwcnt dump
+ * @dump_interval: interval between periodic hwcnt dumps
+ * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
+ * @dump_buffers_meta: metadata of dump buffers
+ * @meta_idx:      index of metadata being accessed by userspace
+ * @read_idx:      index of buffer read by userspace
+ * @write_idx:     index of buffer being written by dumping service
+ * @waitq:         client's notification queue
+ * @pending:       when true, client has attached but hwcnt not yet updated
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context        *vinstr_ctx;
+	struct list_head                   list;
+	unsigned int                       buffer_count;
+	u32                                event_mask;
+	size_t                             dump_size;
+	u32                                bitmap[4];
+	void __user                        *legacy_buffer;
+	void                               *kernel_buffer;
+	void                               *accum_buffer;
+	u64                                dump_time;
+	u32                                dump_interval;
+	char                               *dump_buffers;
+	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
+	atomic_t                           meta_idx;
+	atomic_t                           read_idx;
+	atomic_t                           write_idx;
+	wait_queue_head_t                  waitq;
+	bool                               pending;
+};
+
+/**
+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
+ * @hrtimer:    high resolution timer
+ * @vinstr_ctx: vinstr context
+ */
+struct kbasep_vinstr_wake_up_timer {
+	struct hrtimer              hrtimer;
+	struct kbase_vinstr_context *vinstr_ctx;
+};
+
+/*****************************************************************************/
+
+static int kbasep_vinstr_service_task(void *data);
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+		struct file *filp,
+		poll_table  *wait);
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+		struct file   *filp,
+		unsigned int  cmd,
+		unsigned long arg);
+static int kbasep_vinstr_hwcnt_reader_mmap(
+		struct file           *filp,
+		struct vm_area_struct *vma);
+static int kbasep_vinstr_hwcnt_reader_release(
+		struct inode *inode,
+		struct file  *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations vinstr_client_fops = {
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/*****************************************************************************/
+
+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_uk_hwcnt_setup setup;
+	int err;
+
+	setup.dump_buffer = vinstr_ctx->gpu_va;
+	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	setup.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+
+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err) {
+		dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
+				kctx);
+		return;
+	}
+
+	/* Release the context. This had its own Power Manager Active reference. */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference. */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
+}
+
+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	disable_hwcnt(vinstr_ctx);
+	return enable_hwcnt(vinstr_ctx);
+}
+
+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
+}
+
+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
+{
+	size_t dump_size;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else
+#endif /* CONFIG_MALI_NO_MALI */
+	{
+		/* assume v5 for now */
+		base_gpu_props *props = &kbdev->gpu_props.props;
+		u32 nr_l2 = props->l2_props.num_l2_slices;
+		u64 core_mask = props->coherency_info.group[0].core_mask;
+		u32 nr_blocks = fls64(core_mask);
+
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_blocks) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
+
+static int kbasep_vinstr_map_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	u64 flags, nr_pages;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	nr_pages = PFN_UP(vinstr_ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&vinstr_ctx->gpu_va);
+	if (!reg)
+		return -ENOMEM;
+
+	vinstr_ctx->cpu_va = kbase_vmap(
+			kctx,
+			vinstr_ctx->gpu_va,
+			vinstr_ctx->dump_size,
+			&vinstr_ctx->vmap);
+	if (!vinstr_ctx->cpu_va) {
+		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbasep_vinstr_unmap_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+
+	kbase_vunmap(kctx, &vinstr_ctx->vmap);
+	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+}
+
+/**
+ * kbasep_vinstr_create_kctx - create kernel context for vinstr
+ * @vinstr_ctx: vinstr context
+ * Return: zero on success
+ */
+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element = NULL;
+	unsigned long flags;
+	bool enable_backend = false;
+	int err;
+
+	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
+	if (!vinstr_ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
+	if (err)
+		goto failed_map;
+
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		KBASE_TLSTREAM_TL_NEW_CTX(
+				vinstr_ctx->kctx,
+				vinstr_ctx->kctx->id,
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	/* Don't enable hardware counters if vinstr is suspended.
+	 * Note that vinstr resume code is run under vinstr context lock,
+	 * lower layer will be enabled as needed on resume. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE == vinstr_ctx->state)
+		enable_backend = true;
+	vinstr_ctx->clients_present = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (enable_backend)
+		err = enable_hwcnt(vinstr_ctx);
+	if (err)
+		goto failed_enable;
+
+	vinstr_ctx->thread = kthread_run(
+			kbasep_vinstr_service_task,
+			vinstr_ctx,
+			"mali_vinstr_service");
+	if (IS_ERR(vinstr_ctx->thread)) {
+		err = PTR_ERR(vinstr_ctx->thread);
+		goto failed_kthread;
+	}
+
+	return 0;
+
+failed_kthread:
+	disable_hwcnt(vinstr_ctx);
+failed_enable:
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->clients_present = false;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+	if (element) {
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_del(&element->link);
+		kfree(element);
+		mutex_unlock(&kbdev->kctx_list_lock);
+		KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+	}
+failed_map:
+	kbase_destroy_context(vinstr_ctx->kctx);
+	vinstr_ctx->kctx = NULL;
+	return err;
+}
+
+/**
+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+	unsigned long                   flags;
+
+	/* Release hw counters dumping resources. */
+	vinstr_ctx->thread = NULL;
+	disable_hwcnt(vinstr_ctx);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+
+	/* Simplify state transitions by specifying that we have no clients. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->clients_present = false;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Destroy context. */
+	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Inform timeline client about context destruction. */
+	KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+
+	vinstr_ctx->kctx = NULL;
+}
+
+/**
+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
+ *
+ * Return: vinstr opaque client handle or NULL on failure
+ */
+static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
+		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
+		u32 bitmap[4], void *argp, void *kernel_buffer)
+{
+	struct task_struct         *thread = NULL;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	if (buffer_count > MAX_BUFFER_COUNT
+	    || (buffer_count & (buffer_count - 1)))
+		return NULL;
+
+	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->vinstr_ctx   = vinstr_ctx;
+	cli->buffer_count = buffer_count;
+	cli->event_mask   =
+		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
+		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
+	cli->pending      = true;
+
+	hwcnt_bitmap_set(cli->bitmap, bitmap);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
+	vinstr_ctx->reprogram = true;
+
+	/* If this is the first client, create the vinstr kbase
+	 * context. This context is permanently resident until the
+	 * last client exits. */
+	if (!vinstr_ctx->nclients) {
+		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
+		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
+			goto error;
+
+		vinstr_ctx->reprogram = false;
+		cli->pending = false;
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it. We need a per client kernel buffer for accumulating
+	 * the counters. */
+	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	if (!cli->accum_buffer)
+		goto error;
+
+	/* Prepare buffers. */
+	if (cli->buffer_count) {
+		int *fd = (int *)argp;
+		size_t tmp;
+
+		/* Allocate area for buffers metadata storage. */
+		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
+			cli->buffer_count;
+		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
+		if (!cli->dump_buffers_meta)
+			goto error;
+
+		/* Allocate required number of dumping buffers. */
+		cli->dump_buffers = (char *)__get_free_pages(
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(cli->dump_size * cli->buffer_count));
+		if (!cli->dump_buffers)
+			goto error;
+
+		/* Create descriptor for user-kernel data exchange. */
+		*fd = anon_inode_getfd(
+				"[mali_vinstr_desc]",
+				&vinstr_client_fops,
+				cli,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd)
+			goto error;
+	} else if (kernel_buffer) {
+		cli->kernel_buffer = kernel_buffer;
+	} else {
+		cli->legacy_buffer = (void __user *)argp;
+	}
+
+	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->meta_idx, 0);
+	atomic_set(&cli->write_idx, 0);
+	init_waitqueue_head(&cli->waitq);
+
+	vinstr_ctx->nclients++;
+	list_add(&cli->list, &vinstr_ctx->idle_clients);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return cli;
+
+error:
+	kfree(cli->dump_buffers_meta);
+	if (cli->dump_buffers)
+		free_pages(
+				(unsigned long)cli->dump_buffers,
+				get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	if (!vinstr_ctx->nclients && vinstr_ctx->kctx) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+	kfree(cli);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+
+	return NULL;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	struct kbase_vinstr_client  *iter, *tmp;
+	struct task_struct          *thread = NULL;
+	u32 zerobitmap[4] = { 0 };
+	int cli_found = 0;
+
+	KBASE_DEBUG_ASSERT(cli);
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
+		if (iter == cli) {
+			vinstr_ctx->reprogram = true;
+			cli_found = 1;
+			list_del(&iter->list);
+			break;
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->waiting_clients, list) {
+			if (iter == cli) {
+				vinstr_ctx->reprogram = true;
+				cli_found = 1;
+				list_del(&iter->list);
+				break;
+			}
+		}
+	}
+	KBASE_DEBUG_ASSERT(cli_found);
+
+	kfree(cli->dump_buffers_meta);
+	free_pages(
+			(unsigned long)cli->dump_buffers,
+			get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	kfree(cli);
+
+	vinstr_ctx->nclients--;
+	if (!vinstr_ctx->nclients) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
+	u32 i, nr_l2;
+	u64 core_mask;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (0ull != core_mask) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		if (0ull != (core_mask & 1ull)) {
+			/* if block is not reserved update header */
+			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+			*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		}
+		dst += block_size;
+		src += block_size;
+
+		core_mask >>= 1;
+	}
+}
+
+/**
+ * accum_clients - accumulate dumped hw counters for all known clients
+ * @vinstr_ctx: vinstr context
+ */
+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4 = 0;
+
+#ifndef CONFIG_MALI_NO_MALI
+	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ *
+ * Return: timestamp value
+ */
+static u64 kbasep_vinstr_get_timestamp(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_add_dump_request - register client's dumping request
+ * @cli:             requesting client
+ * @waiting_clients: list of pending dumping requests
+ */
+static void kbasep_vinstr_add_dump_request(
+		struct kbase_vinstr_client *cli,
+		struct list_head *waiting_clients)
+{
+	struct kbase_vinstr_client *tmp;
+
+	if (list_empty(waiting_clients)) {
+		list_add(&cli->list, waiting_clients);
+		return;
+	}
+	list_for_each_entry(tmp, waiting_clients, list) {
+		if (tmp->dump_time > cli->dump_time) {
+			list_add_tail(&cli->list, &tmp->list);
+			return;
+		}
+	}
+	list_add_tail(&cli->list, waiting_clients);
+}
+
+/**
+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
+ *                                        dump and accumulate them for known
+ *                                        clients
+ * @vinstr_ctx: vinstr context
+ * @timestamp: pointer where collection timestamp will be recorded
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_collect_and_accumulate(
+		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+{
+	unsigned long flags;
+	int rcode;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		return -EAGAIN;
+	} else {
+		vinstr_ctx->state = VINSTR_DUMPING;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Request HW counters dump.
+	 * Disable preemption to make dump timestamp more accurate. */
+	preempt_disable();
+	*timestamp = kbasep_vinstr_get_timestamp();
+	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
+	preempt_enable();
+
+	if (!rcode)
+		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
+	WARN_ON(rcode);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state)
+	{
+	case VINSTR_SUSPENDING:
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_IDLE;
+		wake_up_all(&vinstr_ctx->suspend_waitq);
+		break;
+	default:
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Accumulate values of collected counters. */
+	if (!rcode)
+		accum_clients(vinstr_ctx);
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
+ *                                  buffer
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_fill_dump_buffer(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	unsigned int write_idx = atomic_read(&cli->write_idx);
+	unsigned int read_idx  = atomic_read(&cli->read_idx);
+
+	struct kbase_hwcnt_reader_metadata *meta;
+	void                               *buffer;
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == cli->buffer_count)
+		return -1;
+	write_idx %= cli->buffer_count;
+
+	/* Fill in dump buffer and its metadata. */
+	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
+	meta   = &cli->dump_buffers_meta[write_idx];
+	meta->timestamp  = timestamp;
+	meta->event_id   = event_id;
+	meta->buffer_idx = write_idx;
+	memcpy(buffer, cli->accum_buffer, cli->dump_size);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
+ *                                         allocated in userspace
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of legacy ioctl interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_legacy(
+		struct kbase_vinstr_client *cli)
+{
+	void __user  *buffer = cli->legacy_buffer;
+	int          rcode;
+
+	/* Copy data to user buffer. */
+	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
+	if (rcode) {
+		pr_warn("error while copying buffer to user\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+		struct kbase_vinstr_client *cli)
+{
+	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_reprogram(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	bool suspended = false;
+
+	/* Don't enable hardware counters if vinstr is suspended. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state)
+		suspended = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (suspended)
+		return;
+
+	/* Change to suspended state is done while holding vinstr context
+	 * lock. Below code will then no re-enable the instrumentation. */
+
+	if (vinstr_ctx->reprogram) {
+		struct kbase_vinstr_client *iter;
+
+		if (!reprogram_hwcnt(vinstr_ctx)) {
+			vinstr_ctx->reprogram = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->idle_clients,
+					list)
+				iter->pending = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->waiting_clients,
+					list)
+				iter->pending = false;
+		}
+	}
+}
+
+/**
+ * kbasep_vinstr_update_client - copy accumulated counters to user readable
+ *                               buffer and notify the user
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_update_client(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	int rcode = 0;
+
+	/* Copy collected counters to user readable buffer. */
+	if (cli->buffer_count)
+		rcode = kbasep_vinstr_fill_dump_buffer(
+				cli, timestamp, event_id);
+	else if (cli->kernel_buffer)
+		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
+	else
+		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
+
+	if (rcode)
+		goto exit;
+
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&cli->write_idx);
+	wake_up_interruptible(&cli->waitq);
+
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+exit:
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ *
+ * @hrtimer: high resolution timer
+ *
+ * Return: High resolution timer restart enum.
+ */
+static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
+		struct hrtimer *hrtimer)
+{
+	struct kbasep_vinstr_wake_up_timer *timer =
+		container_of(
+			hrtimer,
+			struct kbasep_vinstr_wake_up_timer,
+			hrtimer);
+
+	KBASE_DEBUG_ASSERT(timer);
+
+	atomic_set(&timer->vinstr_ctx->request_pending, 1);
+	wake_up_all(&timer->vinstr_ctx->waitq);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_service_task - HWC dumping service thread
+ *
+ * @data: Pointer to vinstr context structure.
+ *
+ * Return: 0 on success; -ENOMEM if timer allocation fails
+ */
+static int kbasep_vinstr_service_task(void *data)
+{
+	struct kbase_vinstr_context        *vinstr_ctx = data;
+	struct kbasep_vinstr_wake_up_timer *timer;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+
+	if (!timer) {
+		dev_warn(vinstr_ctx->kbdev->dev, "Timer allocation failed!\n");
+		return -ENOMEM;
+	}
+
+	hrtimer_init(&timer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	timer->hrtimer.function = kbasep_vinstr_wake_up_callback;
+	timer->vinstr_ctx       = vinstr_ctx;
+
+	while (!kthread_should_stop()) {
+		struct kbase_vinstr_client *cli = NULL;
+		struct kbase_vinstr_client *tmp;
+		int                        rcode;
+
+		u64              timestamp = kbasep_vinstr_get_timestamp();
+		u64              dump_time = 0;
+		struct list_head expired_requests;
+
+		/* Hold lock while performing operations on lists of clients. */
+		mutex_lock(&vinstr_ctx->lock);
+
+		/* Closing thread must not interact with client requests. */
+		if (current == vinstr_ctx->thread) {
+			atomic_set(&vinstr_ctx->request_pending, 0);
+
+			if (!list_empty(&vinstr_ctx->waiting_clients)) {
+				cli = list_first_entry(
+						&vinstr_ctx->waiting_clients,
+						struct kbase_vinstr_client,
+						list);
+				dump_time = cli->dump_time;
+			}
+		}
+
+		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
+			mutex_unlock(&vinstr_ctx->lock);
+
+			/* Sleep until next dumping event or service request. */
+			if (cli) {
+				u64 diff = dump_time - timestamp;
+
+				hrtimer_start(
+						&timer->hrtimer,
+						ns_to_ktime(diff),
+						HRTIMER_MODE_REL);
+			}
+			wait_event(
+					vinstr_ctx->waitq,
+					atomic_read(
+						&vinstr_ctx->request_pending) ||
+					kthread_should_stop());
+			hrtimer_cancel(&timer->hrtimer);
+			continue;
+		}
+
+		rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
+				&timestamp);
+
+		INIT_LIST_HEAD(&expired_requests);
+
+		/* Find all expired requests. */
+		list_for_each_entry_safe(
+				cli,
+				tmp,
+				&vinstr_ctx->waiting_clients,
+				list) {
+			s64 tdiff =
+				(s64)(timestamp + DUMPING_RESOLUTION) -
+				(s64)cli->dump_time;
+			if (tdiff >= 0ll) {
+				list_del(&cli->list);
+				list_add(&cli->list, &expired_requests);
+			} else {
+				break;
+			}
+		}
+
+		/* Fill data for each request found. */
+		list_for_each_entry_safe(cli, tmp, &expired_requests, list) {
+			/* Ensure that legacy buffer will not be used from
+			 * this kthread context. */
+			BUG_ON(0 == cli->buffer_count);
+			/* Expect only periodically sampled clients. */
+			BUG_ON(0 == cli->dump_interval);
+
+			if (!rcode)
+				kbasep_vinstr_update_client(
+						cli,
+						timestamp,
+						BASE_HWCNT_READER_EVENT_PERIODIC);
+
+			/* Set new dumping time. Drop missed probing times. */
+			do {
+				cli->dump_time += cli->dump_interval;
+			} while (cli->dump_time < timestamp);
+
+			list_del(&cli->list);
+			kbasep_vinstr_add_dump_request(
+					cli,
+					&vinstr_ctx->waiting_clients);
+		}
+
+		/* Reprogram counters set if required. */
+		kbasep_vinstr_reprogram(vinstr_ctx);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	}
+
+	kfree(timer);
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
+ * @cli: pointer to vinstr client structure
+ *
+ * Return: non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+		struct kbase_vinstr_client *cli)
+{
+	KBASE_DEBUG_ASSERT(cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+
+	/* Metadata sanity check. */
+	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @interval: periodic dumping interval (disable periodic dumping if zero)
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+		struct kbase_vinstr_client *cli, u32 interval)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	list_del(&cli->list);
+
+	cli->dump_interval = interval;
+
+	/* If interval is non-zero, enable periodic dumping for this client. */
+	if (cli->dump_interval) {
+		if (DUMPING_RESOLUTION > cli->dump_interval)
+			cli->dump_interval = DUMPING_RESOLUTION;
+		cli->dump_time =
+			kbasep_vinstr_get_timestamp() + cli->dump_interval;
+
+		kbasep_vinstr_add_dump_request(
+				cli, &vinstr_ctx->waiting_clients);
+
+		atomic_set(&vinstr_ctx->request_pending, 1);
+		wake_up_all(&vinstr_ctx->waitq);
+	} else {
+		list_add(&cli->list, &vinstr_ctx->idle_clients);
+	}
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
+ * @event_id: id of event
+ * Return: event_mask or zero if event is not supported or maskable
+ */
+static u32 kbasep_vinstr_hwcnt_reader_event_mask(
+		enum base_hwcnt_reader_event event_id)
+{
+	u32 event_mask = 0;
+
+	switch (event_id) {
+	case BASE_HWCNT_READER_EVENT_PREJOB:
+	case BASE_HWCNT_READER_EVENT_POSTJOB:
+		/* These event are maskable. */
+		event_mask = (1 << event_id);
+		break;
+
+	case BASE_HWCNT_READER_EVENT_MANUAL:
+	case BASE_HWCNT_READER_EVENT_PERIODIC:
+		/* These event are non-maskable. */
+	default:
+		/* These event are not supported. */
+		break;
+	}
+
+	return event_mask;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to enable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask |= event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to disable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask &= ~event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
+ * @cli:   pointer to vinstr client structure
+ * @hwver: pointer to user buffer where hw version will be stored
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+		struct kbase_vinstr_client *cli, u32 __user *hwver)
+{
+#ifndef CONFIG_MALI_NO_MALI
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+#endif
+
+	u32                         ver = 5;
+
+#ifndef CONFIG_MALI_NO_MALI
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+		ver = 4;
+#endif
+
+	return put_user(ver, hwver);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
+ * @filp:   pointer to file structure
+ * @cmd:    user command
+ * @arg:    command's argument
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	long                       rcode = 0;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+				cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
+		rcode = put_user(
+				(u32)cli->vinstr_ctx->dump_size,
+				(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbase_vinstr_hwc_dump(
+				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbase_vinstr_hwc_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+				cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
+		poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
+ * @filp: pointer to file structure
+ * @vma:  pointer to vma structure
+ * Return: zero on success
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	unsigned long size, addr, pfn, offset;
+	unsigned long vm_size = vma->vm_end - vma->vm_start;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(vma);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	size = cli->buffer_count * cli->dump_size;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if (vm_size > size - offset)
+		return -EINVAL;
+
+	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	pfn = addr >> PAGE_SHIFT;
+
+	return remap_pfn_range(
+			vma,
+			vma->vm_start,
+			pfn,
+			vm_size,
+			vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ * Return always return zero
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+		struct file *filp)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	kbase_vinstr_detach_client(cli);
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
+ * @kbdev: pointer to kbase device structure
+ */
+static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	down(&js_devdata->schedule_sem);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	up(&js_devdata->schedule_sem);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker suspending vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_suspend_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			suspend_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		disable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_SUSPENDED;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * This must happen after vinstr was suspended. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker resuming vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_resume_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			resume_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		enable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_IDLE;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * Note that scheduler state machine might requested re-entry to
+	 * protected mode before vinstr was resumed.
+	 * This must happen after vinstr was release. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
+	if (!vinstr_ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	mutex_init(&vinstr_ctx->lock);
+	spin_lock_init(&vinstr_ctx->state_lock);
+	vinstr_ctx->kbdev = kbdev;
+	vinstr_ctx->thread = NULL;
+	vinstr_ctx->state = VINSTR_IDLE;
+	vinstr_ctx->suspend_cnt = 0;
+	INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
+	INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
+	init_waitqueue_head(&vinstr_ctx->suspend_waitq);
+
+	atomic_set(&vinstr_ctx->request_pending, 0);
+	init_waitqueue_head(&vinstr_ctx->waitq);
+
+	return vinstr_ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	/* Stop service thread first. */
+	if (vinstr_ctx->thread)
+		kthread_stop(vinstr_ctx->thread);
+
+	/* Wait for workers. */
+	flush_work(&vinstr_ctx->suspend_work);
+	flush_work(&vinstr_ctx->resume_work);
+
+	while (1) {
+		struct list_head *list = &vinstr_ctx->idle_clients;
+
+		if (list_empty(list)) {
+			list = &vinstr_ctx->waiting_clients;
+			if (list_empty(list))
+				break;
+		}
+
+		cli = list_first_entry(list, struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		kfree(cli->accum_buffer);
+		kfree(cli);
+		vinstr_ctx->nclients--;
+	}
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	if (vinstr_ctx->kctx)
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	kfree(vinstr_ctx);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup)
+{
+	struct kbase_vinstr_client  *cli;
+	u32                         bitmap[4];
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(setup->buffer_count);
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	cli = kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			setup->buffer_count,
+			bitmap,
+			&setup->fd,
+			NULL);
+
+	if (!cli)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (setup->dump_buffer) {
+		u32 bitmap[4];
+
+		bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+		if (*cli)
+			return -EBUSY;
+
+		*cli = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				(void *)(long)setup->dump_buffer,
+				NULL);
+
+		if (!(*cli))
+			return -ENOMEM;
+	} else {
+		if (!*cli)
+			return -EINVAL;
+
+		kbase_vinstr_detach_client(*cli);
+		*cli = NULL;
+	}
+
+	return 0;
+}
+
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer)
+{
+	u32 bitmap[4];
+
+	if (!vinstr_ctx || !setup || !kernel_buffer)
+		return NULL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	return kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			0,
+			bitmap,
+			NULL,
+			kernel_buffer);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	int                         rcode = 0;
+	struct kbase_vinstr_context *vinstr_ctx;
+	u64                         timestamp;
+	u32                         event_mask;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
+	event_mask = 1 << event_id;
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (event_mask & cli->event_mask) {
+		rcode = kbasep_vinstr_collect_and_accumulate(
+				vinstr_ctx,
+				&timestamp);
+		if (rcode)
+			goto exit;
+
+		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
+		if (rcode)
+			goto exit;
+
+		kbasep_vinstr_reprogram(vinstr_ctx);
+	}
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
+
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	int                         rcode;
+	u64                         unused;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	if (rcode)
+		goto exit;
+	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
+	if (rcode)
+		goto exit;
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	kbasep_vinstr_reprogram(vinstr_ctx);
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		vinstr_ctx->suspend_cnt++;
+		/* overflow shall not happen */
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		ret = 0;
+		break;
+
+	case VINSTR_IDLE:
+		if (vinstr_ctx->clients_present) {
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+			schedule_work(&vinstr_ctx->suspend_work);
+		} else {
+			vinstr_ctx->state = VINSTR_SUSPENDED;
+
+			vinstr_ctx->suspend_cnt++;
+			/* overflow shall not happen */
+			WARN_ON(0 == vinstr_ctx->suspend_cnt);
+			ret = 0;
+		}
+		break;
+
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_SUSPENDING:
+		/* fall through */
+	case VINSTR_RESUMING:
+		break;
+
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
+	if (VINSTR_SUSPENDED == vinstr_ctx->state) {
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		vinstr_ctx->suspend_cnt--;
+		if (0 == vinstr_ctx->suspend_cnt) {
+			if (vinstr_ctx->clients_present) {
+				vinstr_ctx->state = VINSTR_RESUMING;
+				schedule_work(&vinstr_ctx->resume_work);
+			} else {
+				vinstr_ctx->state = VINSTR_IDLE;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100644
index 0000000..4dbf7ee
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,177 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+#include <mali_kbase_hwcnt_reader.h>
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+struct kbase_uk_hwcnt_setup {
+	/* IN */
+	u64 dump_buffer;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 unused_1; /* keep for backwards compatibility */
+	u32 mmu_l2_bm;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_reader_setup {
+	/* IN */
+	u32 buffer_count;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+
+	/* OUT */
+	s32 fd;
+};
+/*****************************************************************************/
+
+/**
+ * kbase_vinstr_init() - initialize the vinstr core
+ * @kbdev: kbase device
+ *
+ * Return: pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - terminate the vinstr core
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
+ * @vinstr_ctx: vinstr context
+ * @setup:      reader's configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+		struct kbase_vinstr_context        *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup);
+
+/**
+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
+ * @vinstr_ctx: vinstr context
+ * @cli:        pointer where to store pointer to new vinstr client structure
+ * @setup:      hwc configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count and setup->fd are not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_uk_hwcnt_reader_setup *setup,
+		void *kernel_buffer);
+
+/**
+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
+ * @cli:      pointer to vinstr client
+ * @event_id: id of event that triggered hwcnt dump
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_dump(
+		struct kbase_vinstr_client   *cli,
+		enum base_hwcnt_reader_event event_id);
+
+/**
+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
+ *                          a given kbase context
+ * @cli: pointer to vinstr client
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Return: 0 on success, or negative if state change is in progress
+ *
+ * Warning: This API call is non-generic. It is meant to be used only by
+ *          job scheduler state machine.
+ *
+ * Function initiates vinstr switch to suspended state. Once it was called
+ * vinstr enters suspending state. If function return non-zero value, it
+ * indicates that state switch is not complete and function must be called
+ * again. On state switch vinstr will trigger job scheduler state machine
+ * cycle.
+ */
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_suspend - suspends operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function initiates vinstr switch to suspended state. Then it blocks until
+ * operation is completed.
+ */
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_resume - resumes operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function can be called only if it was preceded by a successful call
+ * to kbase_vinstr_suspend.
+ */
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
+#endif /* _KBASE_VINSTR_H_ */
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100644
index 0000000..5d6b402
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100644
index 0000000..2be06a5
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,189 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100644
index 0000000..9945293
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644
index 0000000..a509cbd
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100644
index 0000000..59adfb8
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,619 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
+						size */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+#define LATEST_FLUSH            0x038	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+#define GPU_DBGEN               (1 << 8)	/* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+#define TEXTURE_FEATURES_3      0x0BC	/* (RO) Support flags for texture order */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+
+#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+
+/* Symbols for default MEMATTR to use
+ * Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT      (12)
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS            (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT     (15)
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES           (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_TLS_HASH_ENABLE          (1ul << 17)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+/* JM_CONFIG register */
+
+#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
+#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
+#define JM_IDVS_GROUP_SIZE_SHIFT (16)
+#define JM_MAX_IDVS_GROUP_SIZE (0x3F)
+/* End JM_CONFIG register */
+
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_timeline.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100644
index 0000000..bd5f661
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->atom_id,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->count)
+);
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_uk.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100644
index 0000000..841d03f
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/t83x/kernel/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100644
index 0000000..38835d3
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME
+#
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100644
index 0000000..5043558
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,31 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
+	$(MALI_PLATFORM_DIR)/mali_clock.o \
+	$(MALI_PLATFORM_DIR)/mpgpu.o \
+	$(MALI_PLATFORM_DIR)/meson_main2.o \
+	$(MALI_PLATFORM_DIR)/platform_gx.o \
+	$(MALI_PLATFORM_DIR)/scaling.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
new file mode 100644
index 0000000..35249a5
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c
@@ -0,0 +1,653 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)
+
+//disable print
+#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+int mali_pm_statue = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "ao io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	mpdata->dvfs_table_size = 0;
+
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+	dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size);
+	dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n",
+			mpdata->dvfs_table,
+			sizeof(mpdata->dvfs_table[0]));
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+#else
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	//mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+
+	GPU_CLK_DBG();
+	do_gettimeofday(&start);
+	ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+	do_gettimeofday(&end);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	GPU_CLK_DBG();
+	clk_disable_unprepare(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+#if 0
+#ifdef MESON_CPU_VERSION_OPS
+		if (is_meson_gxbbm_cpu()) {
+			if (dvfs_tbl->clk_freq >= GXBBM_MAX_GPU_FREQ)
+				continue;
+		}
+#endif
+#endif
+#if  0
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+#endif
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux");
+#if 0
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+#endif
+	if (IS_ERR(mpdata->clk_mali)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100644
index 0000000..8d762b0
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,120 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/version.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long t83x_static_power(unsigned long voltage)
+{
+#if 0
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+#else
+	return 0;
+#endif
+}
+
+static unsigned long t83x_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+struct devfreq_cooling_ops t83x_model_ops = {
+#else
+struct devfreq_cooling_power t83x_model_ops = {
+#endif
+	.get_static_power = t83x_static_power,
+	.get_dynamic_power = t83x_dynamic_power,
+};
+
+#endif
+
+#include <mali_kbase_config.h>
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
+
+#ifndef CONFIG_OF
+int kbase_platform_register(void)
+{
+	return 0;
+}
+
+void kbase_platform_unregister(void)
+{
+}
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100644
index 0000000..3ab8dfd
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (750000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (100000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+extern struct kbase_platform_funcs_conf dt_funcs_conf;
+#define PLATFORM_FUNCS (&dt_funcs_conf)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&t83x_model_ops)
+extern struct devfreq_cooling_ops t83x_model_ops;
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+/**
+ * Autosuspend delay
+ *
+ * The delay time (in milliseconds) to be used for autosuspend
+ */
+#define AUTO_SUSPEND_DELAY (100)
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100644
index 0000000..3adb5e4
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,254 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_config_platform.h"
+
+void *reg_base_hiubus = NULL;
+u32  override_value_aml = 0;
+static int first = 1;
+
+extern u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type);
+static void  Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t  mask)
+{
+    uint32_t    part1_done;
+    part1_done = 0;
+    Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+
+    if ((mask & 0x1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000190, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000194, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a0, 0xffffffff);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x000001a4, 0xffffffff);    // Power on all cores
+    }
+
+    if ( (mask >> 1) != 0 ) {
+        Mali_WrReg(0x0, 0x0, 0x00000180, mask >> 1);    // Power on all cores
+        Mali_WrReg(0x0, 0x0, 0x00000184, 0x0);    // Power on all cores
+    }
+
+    if ( mask != 0 ) {
+        udelay(10);
+        part1_done = Mali_RdReg(0x0, 0x0, 0x0000020);
+        //printk("%s, %d\n", __func__, __LINE__);
+        while((part1_done ==0)) { part1_done = Mali_RdReg(0x0, 0x0, 0x00000020); udelay(10); }
+        //printk("Mali_pwr_on:gpu_irq : %x\n", part1_done);
+        Mali_WrReg(0x0, 0x0, 0x0000024, 0xffffffff); // clear interrupts
+    }
+}
+
+static void gpu_power_main( struct kbase_device *kbdev) {
+
+    Mali_pwr_on ( 0x7);
+
+    printk("%s, %d\n", __func__, __LINE__);
+}
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret = 1; /* Assume GPU has been powered off */
+	int error;
+	struct platform_device *pdev = to_platform_device(kbdev->dev);
+	struct resource *reg_res;
+	u64 core_ready;
+	u64 l2_ready;
+	u64 tiler_ready;
+	u32 value;
+
+    //printk("20151013, %s, %d\n", __FILE__, __LINE__);
+    if (first == 0) goto ret;
+
+    reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+    if (!reg_res) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) 
+        reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res));
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+    if (NULL == reg_base_hiubus) {
+        dev_err(kbdev->dev, "Invalid register resource\n");
+        ret = -ENOENT;
+    }
+
+    //printk("%s, %d\n", __FILE__, __LINE__);
+
+//JOHNT
+    // Level reset mail
+
+    // Level reset mail
+    //Wr(P_RESET2_MASK, ~(0x1<<14));
+    //Wr(P_RESET2_LEVEL, ~(0x1<<14));
+
+    //Wr(P_RESET2_LEVEL, 0xffffffff);
+    //Wr(P_RESET0_LEVEL, 0xffffffff);
+
+    value = Rd(RESET0_MASK);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_MASK, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value & (~(0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+///////////////
+    value = Rd(RESET2_MASK);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_MASK, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value & (~(0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    value = Rd(RESET0_LEVEL);
+    value = value | ((0x1<<20));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET0_LEVEL, value);
+
+    value = Rd(RESET2_LEVEL);
+    value = value | ((0x1<<14));
+    //printk("line(%d), value=%x\n", __LINE__, value);
+    Wr(RESET2_LEVEL, value);
+
+    udelay(10); // OR POLL for reset done
+
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL);
+    kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL);
+
+    gpu_power_main(kbdev);
+    //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n",
+    //        kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus);
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+    first = 0;
+    //printk("%s, %d\n", __FILE__, __LINE__);
+ret:
+	error = pm_runtime_get_sync(kbdev->dev);
+	if (error == 1) {
+		/*
+		 * Let core know that the chip has not been
+		 * powered off, so we can save on re-initialization.
+		 */
+		ret = 0;
+	}
+	udelay(100);
+#if 1
+
+    core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+    l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
+    tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+    //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready);
+#endif
+	dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
+
+	return ret;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+    //printk("%s, %d\n", __FILE__, __LINE__);
+#if 0
+    iounmap(reg_base_hiubus);
+    reg_base_hiubus = NULL;
+#endif
+	pm_runtime_mark_last_busy(kbdev->dev);
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	
+	pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY);
+	pm_runtime_use_autosuspend(kbdev->dev);
+	pm_runtime_set_active(kbdev->dev);
+	
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+
+	if (!pm_runtime_enabled(kbdev->dev)) {
+		dev_warn(kbdev->dev, "pm_runtime not enabled");
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+#endif
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
+
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
new file mode 100644
index 0000000..b541090
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
new file mode 100644
index 0000000..7687f92
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase_defs.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+extern void mali_post_init(void);
+struct kbase_device;
+//static int gpu_dvfs_probe(struct platform_device *pdev)
+int platform_dt_init_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	int err = -1;
+
+	err = mali_meson_init_start(pdev);
+    mali_meson_init_finish(pdev);
+    mpgpu_class_init();
+    mali_post_init();
+    return err;
+}
+
+//static int gpu_dvfs_remove(struct platform_device *pdev)
+void platform_dt_term_func(struct kbase_device *kbdev)
+{
+    struct device *dev = kbdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+    mpgpu_class_exit();
+	mali_meson_uninit(pdev);
+
+}
+
+inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2])
+{
+    mali_gpu_utilization_callback(utilisation*255/100);
+    return 1;
+}
+
+struct kbase_platform_funcs_conf dt_funcs_conf = {
+    .platform_init_func = platform_dt_init_func,
+    .platform_term_func = platform_dt_term_func,
+};
+#if 0
+static const struct of_device_id gpu_dvfs_ids[] = {
+	{ .compatible = "meson, gpu-dvfs-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpu_dvfs_ids);
+
+static struct platform_driver gpu_dvfs_driver = {
+	.driver = {
+		.name = "meson-gpu-dvfs",
+		.owner = THIS_MODULE,
+		.of_match_table = gpu_dvfs_ids,
+	},
+	.probe = gpu_dvfs_probe,
+	.remove = gpu_dvfs_remove,
+};
+module_platform_driver(gpu_dvfs_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Amlogic SH, MM");
+MODULE_DESCRIPTION("Driver for the Meson GPU dvfs");
+#endif
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
new file mode 100644
index 0000000..ca79752
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h
@@ -0,0 +1,33 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mpgpu_class_init(void);
+void mpgpu_class_exit(void);
+void mali_gpu_utilization_callback(int utilization_pp);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
new file mode 100644
index 0000000..0cc5035
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c
@@ -0,0 +1,359 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+#include "meson_main2.h"
+
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+#if 0
+static ssize_t max_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxpp:%d, maxpp_sysfs:%d, total=%d\n",
+			pmali_plat->scale_info.maxpp, pmali_plat->maxpp_sysfs,
+			pmali_plat->cfg_pp);
+	return sprintf(buf, "%d\n", pmali_plat->cfg_pp);
+}
+
+static ssize_t max_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_pp) || (val < pinfo->minpp))
+		return -EINVAL;
+
+	pmali_plat->maxpp_sysfs = val;
+	pinfo->maxpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minpp);
+}
+
+static ssize_t min_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxpp) || (val < 1))
+		return -EINVAL;
+
+	pinfo->minpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+#endif
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+#if 0
+static ssize_t current_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+	return sprintf(buf, "%d\n", pp);
+}
+
+static ssize_t current_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+
+	get_mali_rt_clkpp(&clk, &pp);
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(clk, val, 1);
+
+	return count;
+}
+#endif
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+#if 0
+	__ATTR(min_pp,		0644, min_pp_read,	min_pp_write),
+	__ATTR(max_pp,		0644, max_pp_read,	max_pp_write),
+#endif
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+#if 0
+	__ATTR(cur_pp,		0644, current_pp_read,	current_pp_write),
+#endif
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+
+int mpgpu_class_init(void)
+{
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+}
+
+void  mpgpu_class_exit(void)
+{
+	class_unregister(&mpgpu_class);
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
new file mode 100644
index 0000000..26a56f6
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c
@@ -0,0 +1,245 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#ifdef CONFIG_GPU_THERMAL
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#include <linux/amlogic/aml_thermal_hw.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq <= mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq <= mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+#ifndef MESON_DRV_BRING
+    return 55;
+#else
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+#endif
+}
+#endif
+#endif
+
+#ifdef CONFIG_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+#ifndef MESON_DRV_BRING
+    return 2;
+#else
+    return mali_executor_get_num_cores_enabled();
+#endif
+}
+#endif
+#endif
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
new file mode 100644
index 0000000..aa8a77a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+
+#if AMLOGIC_GPU_USE_GPPLL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)
+#include <linux/amlogic/amports/gp_pll.h>
+#else
+#include <linux/amlogic/media/clk/gp_pll.h>
+#endif
+#endif
+
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+extern int  mali_pm_statue;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+#if AMLOGIC_GPU_USE_GPPLL
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+#endif
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+#endif
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+#if AMLOGIC_GPU_USE_GPPLL
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+#endif
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+	lastStep = execStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+#endif
+
+}
+#if AMLOGIC_GPU_USE_GPPLL
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+#endif
+
+int mali_perf_set_num_pp_cores(int cores)
+{
+    cores = cores;
+    return 0;
+}
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    if (err < 0) scalingdbg(1, "set pp failed");
+
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+    printk("mali_plat=%p\n", mali_plat);
+	num_cores_enabled = pmali_plat->sc_mpp;
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+#endif
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	40, /* 40% */
+	50, /* 50% */
+	90, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< utilization_pp)
+		ret = enable_one_core();
+	else if (0 < utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(int utilization_gpu, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(int utilization_pp, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization_pp,  ld_up);
+	if (utilization_pp >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization_pp <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (utilization_pp <= ld_down) {
+				ld_left = utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(int utilization_pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(utilization_pp);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(utilization_pp);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100644
index 0000000..7cb3be7
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100644
index 0000000..d9d5e90
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100644
index 0000000..02835f1
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,75 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq()
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq()
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..745884f
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+#include "mali_kbase_config_platform.h"
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100644
index 0000000..4665f98
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START	(0x10000000)
+#define SYS_CFGDATA_OFFSET		(0x000000A0)
+#define SYS_CFGCTRL_OFFSET		(0x000000A4)
+#define SYS_CFGSTAT_OFFSET		(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		(1 << 31)
+#define READ_REG_BIT_VALUE			(0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			(0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		(1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			(1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE	(0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE		(2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		(1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			(1 <<  1)
+
+#define FEED_REG_BIT_MASK			(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+/* the following three values used for reading
+ * HBI value of the LogicTile daughterboard */
+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000)
+#define VE_SYS_PROC_ID1_OFFSET (0x00000088)
+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define VE_DEFAULT_GPU_FREQ_MIN 5000
+#define VE_DEFAULT_GPU_FREQ_MAX 5000
+
+
+#define CPU_CLOCK_SPEED_UNDEFINED (0)
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed
+ * @cpu_clock - the value of CPU clock speed in MHz
+ *
+ * Returns 0 on success, error code otherwise.
+ *
+ * The implementation is platform specific.
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	int err = 0;
+	u32 reg_val = 0;
+	u32 osc2_value = 0;
+	u32 pa_divide = 0;
+	u32 pb_divide = 0;
+	u32 pc_divide = 0;
+	void __iomem *syscfg_reg = NULL;
+	void __iomem *scc_reg = NULL;
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) {
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+
+	/* Init the value in case something goes wrong */
+	*cpu_clock = 0;
+
+	/* Map CPU register into virtual memory */
+	syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+	if (syscfg_reg == NULL) {
+		err = -EIO;
+		goto syscfg_reg_map_failed;
+	}
+
+	scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+	if (scc_reg == NULL) {
+		err = -EIO;
+		goto scc_reg_map_failed;
+	}
+
+	raw_spin_lock(&syscfg_lock);
+
+	/* Read SYS regs - OSC2 */
+	reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET);
+
+	/* Check if there is any other undergoing request */
+	if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) {
+		err = -EBUSY;
+		goto ongoing_request;
+	}
+	/* Reset the CGFGSTAT reg */
+	writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET));
+
+	writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE |
+			DCC_DEFAULT_BIT_VALUE |
+			SYS_CFG_OSC_FUNC_BIT_VALUE |
+			SITE_DEFAULT_BIT_VALUE |
+			BOARD_STACK_POS_DEFAULT_BIT_VALUE |
+			DEVICE_DEFAULT_BIT_VALUE,
+			(syscfg_reg + SYS_CFGCTRL_OFFSET));
+	/* Wait for the transaction to complete */
+	while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) &
+			SYS_CFG_COMPLETE_BIT_VALUE))
+		;
+	/* Read SYS_CFGSTAT Register to get the status of submitted
+	 * transaction */
+	reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET);
+
+	if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+		/* Error while setting register */
+		err = -EIO;
+		goto set_reg_error;
+	}
+
+	osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET);
+	/* Read the SCC CFGRW0 register */
+	reg_val = readl(scc_reg);
+
+	/*
+	 * Select the appropriate feed:
+	 * CFGRW0[0] - CLKOB
+	 * CFGRW0[1] - CLKOC
+	 * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+	 */
+	/* Calculate the  FCLK */
+	if (IS_SINGLE_BIT_SET(reg_val, 0)) {
+		/* CFGRW0[0] - CLKOB */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[10:7] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PB_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 1)) {
+		/* CFGRW0[1] - CLKOC */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[14:11] */
+		pc_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PC_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PC_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {
+		/* CFGRW0[2] - FACLK */
+		/* CFGRW0[18:15] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PA_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[22:19] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PB_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else {
+		err = -EIO;
+	}
+
+set_reg_error:
+ongoing_request:
+	raw_spin_unlock(&syscfg_lock);
+	*cpu_clock /= HZ_IN_MHZ;
+
+	if (!err)
+		cpu_clock_speed = *cpu_clock;
+
+	iounmap(scc_reg);
+
+scc_reg_map_failed:
+	iounmap(syscfg_reg);
+
+syscfg_reg_map_failed:
+
+	return err;
+}
+
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function determines which LogicTile type is used by the platform by
+ * reading the HBI value of the daughterboard which holds the LogicTile:
+ *
+ * 0x217 HBI0217 Virtex-6
+ * 0x192 HBI0192 Virtex-5
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+static u32 kbase_get_platform_logic_tile_type(void)
+{
+	void __iomem *syscfg_reg = NULL;
+	u32 sys_procid1 = 0;
+
+	syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
+	if (NULL != syscfg_reg) {
+		sys_procid1 = readl(syscfg_reg);
+		iounmap(syscfg_reg);
+	}
+
+	return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
+}
+
+u32 kbase_get_platform_min_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MIN;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MIN;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MIN;
+	}
+}
+
+u32 kbase_get_platform_max_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MAX;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MAX;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MAX;
+	}
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100644
index 0000000..da86569
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+/**
+ * Get the minimum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_min_freq(void);
+
+/**
+ * Get the maximum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_max_freq(void);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100644
index 0000000..df87c74
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100644
index 0000000..0efbf39
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..5a69758
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100644
index 0000000..d9d5e90
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100644
index 0000000..dbdf21e
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,75 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000..5d8ec2d
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100644
index 0000000..816dff4
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	/* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+	*cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+	return 0;
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100644
index 0000000..23647cc
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/t83x/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100644
index 0000000..5fa9b39
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h b/t83x/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h
new file mode 100644
index 0000000..5dc2f3b
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PROTECTED_MODE_SWITCH_H_
+#define _PROTECTED_MODE_SWITCH_H_
+
+struct protected_mode_device;
+
+/**
+ * struct protected_mode_ops - Callbacks for protected mode switch operations
+ *
+ * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_disable: Callback to disable protected mode for device
+ */
+struct protected_mode_ops {
+	/**
+	 * protected_mode_enable() - Enable protected mode on device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enable)(
+			struct protected_mode_device *protected_dev);
+
+	/**
+	 * protected_mode_disable() - Disable protected mode on device, and
+	 *                            reset device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_disable)(
+			struct protected_mode_device *protected_dev);
+};
+
+/**
+ * struct protected_mode_device - Device structure for protected mode devices
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_mode_device {
+	struct protected_mode_ops ops;
+	void *data;
+};
+
+#endif /* _PROTECTED_MODE_SWITCH_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/sconscript b/t83x/kernel/drivers/gpu/arm/midgard/sconscript
new file mode 100644
index 0000000..f11933a
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/sconscript
@@ -0,0 +1,64 @@
+#
+# (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import sys
+Import('env')
+
+SConscript( 'tests/sconscript' )
+
+mock_test = 0
+
+# Source files required for kbase.
+kbase_src = [
+	Glob('*.c'),
+	Glob('backend/*/*.c'),
+	Glob('internal/*/*.c'),
+	Glob('ipa/*.c'),
+	Glob('platform/%s/*.c' % env['platform_config']),
+]
+
+if env['platform_config']=='juno_soc':
+	kbase_src += [Glob('platform/devicetree/*.c')]
+else:
+	kbase_src += [Glob('platform/%s/*.c' % env['platform_config'])]
+
+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
+	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
+	mock_test = 1
+
+make_args = env.kernel_get_config_defines(ret_list = True) + [
+	'PLATFORM=%s' % env['platform'],
+	'MALI_ERROR_INJECT_ON=%s' % env['error_inject'],
+	'MALI_KERNEL_TEST_API=%s' % env['debug'],
+	'MALI_UNIT_TEST=%s' % env['unit'],
+	'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
+	'MALI_MOCK_TEST=%s' % mock_test,
+	'MALI_CUSTOMER_RELEASE=%s' % env['release'],
+	'MALI_COVERAGE=%s' % env['coverage'],
+]
+
+kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src,
+                              make_args = make_args)
+
+# need Module.symvers from ump.ko build
+if int(env['ump']) == 1:
+	env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko')
+
+if 'smc_protected_mode_switcher' in env:
+	env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/smc_protected_mode_switcher.ko')
+
+env.KernelObjTarget('kbase', kbase)
+
+env.AppendUnique(BASE=['cutils_linked_list'])
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/tests/Kbuild
new file mode 100644
index 0000000..b4bed04
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-$(CONFIG_MALI_KUTF) += kutf/
+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/Kconfig b/t83x/kernel/drivers/gpu/arm/midgard/tests/Kconfig
new file mode 100644
index 0000000..da0515c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/Kconfig
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+source "drivers/gpu/arm/midgard/tests/kutf/Kconfig"
+source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig"
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h
new file mode 100644
index 0000000..3667687
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_HELPERS_H_
+#define _KERNEL_UTF_HELPERS_H_
+
+/* kutf_helpers.h
+ * Test helper functions for the kernel UTF test infrastructure.
+ *
+ * These functions provide methods for enqueuing/dequeuing lines of text sent
+ * by user space. They are used to implement the transfer of "userdata" from
+ * user space to kernel.
+ */
+
+#include <kutf/kutf_suite.h>
+
+/**
+ * kutf_helper_input_dequeue() - Dequeue a line sent by user space
+ * @context:    KUTF context
+ * @str_size:   Pointer to an integer to receive the size of the string
+ *
+ * If no line is available then this function will wait (interruptibly) until
+ * a line is available.
+ *
+ * Return: The line dequeued, ERR_PTR(-EINTR) if interrupted or NULL on end
+ * of data.
+ */
+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size);
+
+/**
+ * kutf_helper_input_enqueue() - Enqueue a line sent by user space
+ * @context:   KUTF context
+ * @str:       The user space address of the line
+ * @size:      The length in bytes of the string
+ *
+ * This function will use copy_from_user to copy the string out of user space.
+ * The string need not be NULL-terminated (@size should not include the NULL
+ * termination).
+ *
+ * As a special case @str==NULL and @size==0 is valid to mark the end of input,
+ * but callers should use kutf_helper_input_enqueue_end_of_data() instead.
+ *
+ * Return: 0 on success, -EFAULT if the line cannot be copied from user space,
+ * -ENOMEM if out of memory.
+ */
+int kutf_helper_input_enqueue(struct kutf_context *context,
+		const char __user *str, size_t size);
+
+/**
+ * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent
+ * @context:    KUTF context
+ *
+ * After this function has been called, kutf_helper_input_dequeue() will always
+ * return NULL.
+ */
+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context);
+
+#endif	/* _KERNEL_UTF_HELPERS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h
new file mode 100644
index 0000000..c7b5263
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h
@@ -0,0 +1,174 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_HELPERS_USER_H_
+#define _KERNEL_UTF_HELPERS_USER_H_
+
+/* kutf_helpers.h
+ * Test helper functions for the kernel UTF test infrastructure, whose
+ * implementation mirrors that of similar functions for kutf-userside
+ */
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_helpers.h>
+
+
+#define KUTF_HELPER_MAX_VAL_NAME_LEN 255
+
+enum kutf_helper_valtype {
+	KUTF_HELPER_VALTYPE_INVALID,
+	KUTF_HELPER_VALTYPE_U64,
+	KUTF_HELPER_VALTYPE_STR,
+
+	KUTF_HELPER_VALTYPE_COUNT /* Must be last */
+};
+
+struct kutf_helper_named_val {
+	enum kutf_helper_valtype type;
+	char *val_name;
+	union {
+		u64 val_u64;
+		char *val_str;
+	} u;
+};
+
+/* Extra error values for certain helpers when we want to distinguish between
+ * Linux's own error values too.
+ *
+ * These can only be used on certain functions returning an int type that are
+ * documented as returning one of these potential values, they cannot be used
+ * from functions return a ptr type, since we can't decode it with PTR_ERR
+ *
+ * No negative values are used - Linux error codes should be used instead, and
+ * indicate a problem in accessing the data file itself (are generally
+ * unrecoverable)
+ *
+ * Positive values indicate correct access but invalid parsing (can be
+ * recovered from assuming data in the future is correct) */
+enum kutf_helper_err {
+	/* No error - must be zero */
+	KUTF_HELPER_ERR_NONE = 0,
+	/* Named value parsing encountered an invalid name */
+	KUTF_HELPER_ERR_INVALID_NAME,
+	/* Named value parsing of string or u64 type encountered extra
+	 * characters after the value (after the last digit for a u64 type or
+	 * after the string end delimiter for string type) */
+	KUTF_HELPER_ERR_CHARS_AFTER_VAL,
+	/* Named value parsing of string type couldn't find the string end
+	 * delimiter.
+	 *
+	 * This cannot be encountered when the NAME="value" message exceeds the
+	 * textbuf's maximum line length, because such messages are not checked
+	 * for an end string delimiter */
+	KUTF_HELPER_ERR_NO_END_DELIMITER,
+	/* Named value didn't parse as any of the known types */
+	KUTF_HELPER_ERR_INVALID_VALUE,
+};
+
+
+/* Send named NAME=value pair, u64 value
+ *
+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long
+ *
+ * Any failure will be logged on the suite's current test fixture
+ *
+ * Returns 0 on success, non-zero on failure
+ */
+int kutf_helper_send_named_u64(struct kutf_context *context,
+		const char *val_name, u64 val);
+
+/* Get the maximum length of a string that can be represented as a particular
+ * NAME="value" pair without string-value truncation in the kernel's buffer
+ *
+ * Given val_name and the kernel buffer's size, this can be used to determine
+ * the maximum length of a string that can be sent as val_name="value" pair
+ * without having the string value truncated. Any string longer than this will
+ * be truncated at some point during communication to this size.
+ *
+ * It is assumed that val_name is a valid name for
+ * kutf_helper_send_named_str(), and no checking will be made to
+ * ensure this.
+ *
+ * Returns the maximum string length that can be represented, or a negative
+ * value if the NAME="value" encoding itself wouldn't fit in kern_buf_sz
+ */
+int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz);
+
+/* Send named NAME="str" pair
+ *
+ * no escaping allowed in str. Any of the following characters will terminate
+ * the string: '"' '\\' '\n'
+ *
+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long
+ *
+ * Any failure will be logged on the suite's current test fixture
+ *
+ * Returns 0 on success, non-zero on failure */
+int kutf_helper_send_named_str(struct kutf_context *context,
+		const char *val_name, const char *val_str);
+
+/* Receive named NAME=value pair
+ *
+ * This can receive u64 and string values - check named_val->type
+ *
+ * If you are not planning on dynamic handling of the named value's name and
+ * type, then kutf_helper_receive_check_val() is more useful as a
+ * convenience function.
+ *
+ * String members of named_val will come from memory allocated on the fixture's mempool
+ *
+ * Returns 0 on success. Negative value on failure to receive from the 'run'
+ * file, positive value indicates an enum kutf_helper_err value for correct
+ * reception of data but invalid parsing */
+int kutf_helper_receive_named_val(
+		struct kutf_context *context,
+		struct kutf_helper_named_val *named_val);
+
+/* Receive and validate NAME=value pair
+ *
+ * As with kutf_helper_receive_named_val, but validate that the
+ * name and type are as expected, as a convenience for a common pattern found
+ * in tests.
+ *
+ * NOTE: this only returns an error value if there was actually a problem
+ * receiving data.
+ *
+ * NOTE: If the underlying data was received correctly, but:
+ * - isn't of the expected name
+ * - isn't the expected type
+ * - isn't correctly parsed for the type
+ * then the following happens:
+ * - failure result is recorded
+ * - named_val->type will be KUTF_HELPER_VALTYPE_INVALID
+ * - named_val->u will contain some default value that should be relatively
+ *   harmless for the test, including being writable in the case of string
+ *   values
+ * - return value will be 0 to indicate success
+ *
+ * The rationale behind this is that we'd prefer to continue the rest of the
+ * test with failures propagated, rather than hitting a timeout */
+int kutf_helper_receive_check_val(
+		struct kutf_helper_named_val *named_val,
+		struct kutf_context *context,
+		const char *expect_val_name,
+		enum kutf_helper_valtype expect_val_type);
+
+/* Output a named value to kmsg */
+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val);
+
+
+#endif	/* _KERNEL_UTF_HELPERS_USER_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h
new file mode 100644
index 0000000..584c9dd
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_MEM_H_
+#define _KERNEL_UTF_MEM_H_
+
+/* kutf_mem.h
+ * Functions for management of memory pools in the kernel.
+ *
+ * This module implements a memory pool allocator, allowing a test
+ * implementation to allocate linked allocations which can then be freed by a
+ * single free which releases all of the resources held by the entire pool.
+ *
+ * Note that it is not possible to free single resources within the pool once
+ * allocated.
+ */
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+/**
+ * struct kutf_mempool - the memory pool context management structure
+ * @head:	list head on which the allocations in this context are added to
+ * @lock:	mutex for concurrent allocation from multiple threads
+ *
+ */
+struct kutf_mempool {
+	struct list_head head;
+	struct mutex lock;
+};
+
+/**
+ * kutf_mempool_init() - Initialize a memory pool.
+ * @pool:	Memory pool structure to initialize, provided by the user
+ *
+ * Return:	zero on success
+ */
+int kutf_mempool_init(struct kutf_mempool *pool);
+
+/**
+ * kutf_mempool_alloc() - Allocate memory from a pool
+ * @pool:	Memory pool to allocate from
+ * @size:	Size of memory wanted in number of bytes
+ *
+ * Return:	Pointer to memory on success, NULL on failure.
+ */
+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size);
+
+/**
+ * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it.
+ * @pool:	The memory pool to free
+ */
+void kutf_mempool_destroy(struct kutf_mempool *pool);
+#endif	/* _KERNEL_UTF_MEM_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h
new file mode 100644
index 0000000..6787f71
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h
@@ -0,0 +1,176 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_RESULTSET_H_
+#define _KERNEL_UTF_RESULTSET_H_
+
+/* kutf_resultset.h
+ * Functions and structures for handling test results and result sets.
+ *
+ * This section of the kernel UTF contains structures and functions used for the
+ * management of Results and Result Sets.
+ */
+
+/**
+ * enum kutf_result_status - Status values for a single Test error.
+ * @KUTF_RESULT_BENCHMARK:	Result is a meta-result containing benchmark
+ *                              results.
+ * @KUTF_RESULT_SKIP:		The test was skipped.
+ * @KUTF_RESULT_UNKNOWN:	The test has an unknown result.
+ * @KUTF_RESULT_PASS:		The test result passed.
+ * @KUTF_RESULT_DEBUG:		The test result passed, but raised a debug
+ *                              message.
+ * @KUTF_RESULT_INFO:		The test result passed, but raised
+ *                              an informative message.
+ * @KUTF_RESULT_WARN:		The test result passed, but raised a warning
+ *                              message.
+ * @KUTF_RESULT_FAIL:		The test result failed with a non-fatal error.
+ * @KUTF_RESULT_FATAL:		The test result failed with a fatal error.
+ * @KUTF_RESULT_ABORT:		The test result failed due to a non-UTF
+ *                              assertion failure.
+ * @KUTF_RESULT_USERDATA:	User data is ready to be read,
+ *                              this is not seen outside the kernel
+ * @KUTF_RESULT_USERDATA_WAIT:	Waiting for user data to be sent,
+ *                              this is not seen outside the kernel
+ * @KUTF_RESULT_TEST_FINISHED:	The test has finished, no more results will
+ *                              be produced. This is not seen outside kutf
+ */
+enum kutf_result_status {
+	KUTF_RESULT_BENCHMARK = -3,
+	KUTF_RESULT_SKIP    = -2,
+	KUTF_RESULT_UNKNOWN = -1,
+
+	KUTF_RESULT_PASS    = 0,
+	KUTF_RESULT_DEBUG   = 1,
+	KUTF_RESULT_INFO    = 2,
+	KUTF_RESULT_WARN    = 3,
+	KUTF_RESULT_FAIL    = 4,
+	KUTF_RESULT_FATAL   = 5,
+	KUTF_RESULT_ABORT   = 6,
+
+	KUTF_RESULT_USERDATA      = 7,
+	KUTF_RESULT_USERDATA_WAIT = 8,
+	KUTF_RESULT_TEST_FINISHED = 9
+};
+
+/* The maximum size of a kutf_result_status result when
+ * converted to a string
+ */
+#define KUTF_ERROR_MAX_NAME_SIZE 21
+
+#ifdef __KERNEL__
+
+#include <kutf/kutf_mem.h>
+#include <linux/wait.h>
+
+struct kutf_context;
+
+/**
+ * struct kutf_result - Represents a single test result.
+ * @node:	Next result in the list of results.
+ * @status:	The status summary (pass / warn / fail / etc).
+ * @message:	A more verbose status message.
+ */
+struct kutf_result {
+	struct list_head            node;
+	enum kutf_result_status     status;
+	const char                  *message;
+};
+
+/**
+ * KUTF_RESULT_SET_WAITING_FOR_INPUT - Test is waiting for user data
+ *
+ * This flag is set within a struct kutf_result_set whenever the test is blocked
+ * waiting for user data. Attempts to dequeue results when this flag is set
+ * will cause a dummy %KUTF_RESULT_USERDATA_WAIT result to be produced. This
+ * is used to output a warning message and end of file.
+ */
+#define KUTF_RESULT_SET_WAITING_FOR_INPUT 1
+
+/**
+ * struct kutf_result_set - Represents a set of results.
+ * @results:	List head of a struct kutf_result list for storing the results
+ * @waitq:	Wait queue signalled whenever new results are added.
+ * @flags:	Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT
+ */
+struct kutf_result_set {
+	struct list_head          results;
+	wait_queue_head_t         waitq;
+	int                       flags;
+};
+
+/**
+ * kutf_create_result_set() - Create a new result set
+ *                            to which results can be added.
+ *
+ * Return: The created result set.
+ */
+struct kutf_result_set *kutf_create_result_set(void);
+
+/**
+ * kutf_add_result() - Add a result to the end of an existing result set.
+ *
+ * @context:	The kutf context
+ * @status:	The result status to add.
+ * @message:	The result message to add.
+ *
+ * Return: 0 if the result is successfully added. -ENOMEM if allocation fails.
+ */
+int kutf_add_result(struct kutf_context *context,
+		enum kutf_result_status status, const char *message);
+
+/**
+ * kutf_remove_result() - Remove a result from the head of a result set.
+ * @set:	The result set.
+ *
+ * This function will block until there is a result to read. The wait is
+ * interruptible, so this function will return with an ERR_PTR if interrupted.
+ *
+ * Return: result or ERR_PTR if interrupted
+ */
+struct kutf_result *kutf_remove_result(
+		struct kutf_result_set *set);
+
+/**
+ * kutf_destroy_result_set() - Free a previously created result set.
+ *
+ * @results:	The result set whose resources to free.
+ */
+void kutf_destroy_result_set(struct kutf_result_set *results);
+
+/**
+ * kutf_set_waiting_for_input() - The test is waiting for userdata
+ *
+ * @set: The result set to update
+ *
+ * Causes the result set to always have results and return a fake
+ * %KUTF_RESULT_USERDATA_WAIT result.
+ */
+void kutf_set_waiting_for_input(struct kutf_result_set *set);
+
+/**
+ * kutf_clear_waiting_for_input() - The test is no longer waiting for userdata
+ *
+ * @set: The result set to update
+ *
+ * Cancels the effect of kutf_set_waiting_for_input()
+ */
+void kutf_clear_waiting_for_input(struct kutf_result_set *set);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _KERNEL_UTF_RESULTSET_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h
new file mode 100644
index 0000000..ca30e57
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h
@@ -0,0 +1,564 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_SUITE_H_
+#define _KERNEL_UTF_SUITE_H_
+
+/* kutf_suite.h
+ * Functions for management of test suites.
+ *
+ * This collection of data structures, macros, and functions are used to
+ * create Test Suites, Tests within those Test Suites, and Fixture variants
+ * of each test.
+ */
+
+#include <linux/kref.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+
+#include <kutf/kutf_mem.h>
+#include <kutf/kutf_resultset.h>
+
+/* Arbitrary maximum size to prevent user space allocating too much kernel
+ * memory
+ */
+#define KUTF_MAX_LINE_LENGTH (1024u)
+
+/**
+ * Pseudo-flag indicating an absence of any specified test class. Note that
+ * tests should not be annotated with this constant as it is simply a zero
+ * value; tests without a more specific class must be marked with the flag
+ * KUTF_F_TEST_GENERIC.
+ */
+#define KUTF_F_TEST_NONE                ((unsigned int)(0))
+
+/**
+ * Class indicating this test is a smoke test.
+ * A given set of smoke tests should be quick to run, enabling rapid turn-around
+ * of "regress-on-commit" test runs.
+ */
+#define KUTF_F_TEST_SMOKETEST           ((unsigned int)(1 << 1))
+
+/**
+ * Class indicating this test is a performance test.
+ * These tests typically produce a performance metric, such as "time to run" or
+ * "frames per second",
+ */
+#define KUTF_F_TEST_PERFORMANCE         ((unsigned int)(1 << 2))
+
+/**
+ * Class indicating that this test is a deprecated test.
+ * These tests have typically been replaced by an alternative test which is
+ * more efficient, or has better coverage.
+ */
+#define KUTF_F_TEST_DEPRECATED          ((unsigned int)(1 << 3))
+
+/**
+ * Class indicating that this test is a known failure.
+ * These tests have typically been run and failed, but marking them as a known
+ * failure means it is easier to triage results.
+ *
+ * It is typically more convenient to triage known failures using the
+ * results database and web UI, as this means there is no need to modify the
+ * test code.
+ */
+#define KUTF_F_TEST_EXPECTED_FAILURE    ((unsigned int)(1 << 4))
+
+/**
+ * Class indicating that this test is a generic test, which is not a member of
+ * a more specific test class. Tests which are not created with a specific set
+ * of filter flags by the user are assigned this test class by default.
+ */
+#define KUTF_F_TEST_GENERIC             ((unsigned int)(1 << 5))
+
+/**
+ * Class indicating this test is a resource allocation failure test.
+ * A resource allocation failure test will test that an error code is
+ * correctly propagated when an allocation fails.
+ */
+#define KUTF_F_TEST_RESFAIL             ((unsigned int)(1 << 6))
+
+/**
+ * Additional flag indicating that this test is an expected failure when
+ * run in resource failure mode. These tests are never run when running
+ * the low resource mode.
+ */
+#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7))
+
+/**
+ * Flag reserved for user-defined filter zero.
+ */
+#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24))
+
+/**
+ * Flag reserved for user-defined filter one.
+ */
+#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25))
+
+/**
+ * Flag reserved for user-defined filter two.
+ */
+#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26))
+
+/**
+ * Flag reserved for user-defined filter three.
+ */
+#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27))
+
+/**
+ * Flag reserved for user-defined filter four.
+ */
+#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28))
+
+/**
+ * Flag reserved for user-defined filter five.
+ */
+#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29))
+
+/**
+ * Flag reserved for user-defined filter six.
+ */
+#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30))
+
+/**
+ * Flag reserved for user-defined filter seven.
+ */
+#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31))
+
+/**
+ * Pseudo-flag indicating that all test classes should be executed.
+ */
+#define KUTF_F_TEST_ALL                 ((unsigned int)(0xFFFFFFFFU))
+
+/**
+ * union kutf_callback_data - Union used to store test callback data
+ * @ptr_value:		pointer to the location where test callback data
+ *                      are stored
+ * @u32_value:		a number which represents test callback data
+ */
+union kutf_callback_data {
+	void *ptr_value;
+	u32  u32_value;
+};
+
+/**
+ * struct kutf_userdata_line - A line of user data to be returned to the user
+ * @node:   struct list_head to link this into a list
+ * @str:    The line of user data to return to user space
+ * @size:   The number of bytes within @str
+ */
+struct kutf_userdata_line {
+	struct list_head node;
+	char *str;
+	size_t size;
+};
+
+/**
+ * KUTF_USERDATA_WARNING_OUTPUT - Flag specifying that a warning has been output
+ *
+ * If user space reads the "run" file while the test is waiting for user data,
+ * then the framework will output a warning message and set this flag within
+ * struct kutf_userdata. A subsequent read will then simply return an end of
+ * file condition rather than outputting the warning again. The upshot of this
+ * is that simply running 'cat' on a test which requires user data will produce
+ * the warning followed by 'cat' exiting due to EOF - which is much more user
+ * friendly than blocking indefinitely waiting for user data.
+ */
+#define KUTF_USERDATA_WARNING_OUTPUT  1
+
+/**
+ * struct kutf_userdata - Structure holding user data
+ * @flags:       See %KUTF_USERDATA_WARNING_OUTPUT
+ * @input_head:  List of struct kutf_userdata_line containing user data
+ *               to be read by the kernel space test.
+ * @input_waitq: Wait queue signalled when there is new user data to be
+ *               read by the kernel space test.
+ */
+struct kutf_userdata {
+	unsigned long flags;
+	struct list_head input_head;
+	wait_queue_head_t input_waitq;
+};
+
+/**
+ * struct kutf_context - Structure representing a kernel test context
+ * @kref:		Refcount for number of users of this context
+ * @suite:		Convenience pointer to the suite this context
+ *                      is running
+ * @test_fix:		The fixture that is being run in this context
+ * @fixture_pool:	The memory pool used for the duration of
+ *                      the fixture/text context.
+ * @fixture:		The user provided fixture structure.
+ * @fixture_index:	The index (id) of the current fixture.
+ * @fixture_name:	The name of the current fixture (or NULL if unnamed).
+ * @test_data:		Any user private data associated with this test
+ * @result_set:		All the results logged by this test context
+ * @status:		The status of the currently running fixture.
+ * @expected_status:	The expected status on exist of the currently
+ *                      running fixture.
+ * @work:		Work item to enqueue onto the work queue to run the test
+ * @userdata:		Structure containing the user data for the test to read
+ */
+struct kutf_context {
+	struct kref                     kref;
+	struct kutf_suite               *suite;
+	struct kutf_test_fixture        *test_fix;
+	struct kutf_mempool             fixture_pool;
+	void                            *fixture;
+	unsigned int                    fixture_index;
+	const char                      *fixture_name;
+	union kutf_callback_data        test_data;
+	struct kutf_result_set          *result_set;
+	enum kutf_result_status         status;
+	enum kutf_result_status         expected_status;
+
+	struct work_struct              work;
+	struct kutf_userdata            userdata;
+};
+
+/**
+ * struct kutf_suite - Structure representing a kernel test suite
+ * @app:			The application this suite belongs to.
+ * @name:			The name of this suite.
+ * @suite_data:			Any user private data associated with this
+ *                              suite.
+ * @create_fixture:		Function used to create a new fixture instance
+ * @remove_fixture:		Function used to destroy a new fixture instance
+ * @fixture_variants:		The number of variants (must be at least 1).
+ * @suite_default_flags:	Suite global filter flags which are set on
+ *                              all tests.
+ * @node:			List node for suite_list
+ * @dir:			The debugfs directory for this suite
+ * @test_list:			List head to store all the tests which are
+ *                              part of this suite
+ */
+struct kutf_suite {
+	struct kutf_application        *app;
+	const char                     *name;
+	union kutf_callback_data       suite_data;
+	void *(*create_fixture)(struct kutf_context *context);
+	void  (*remove_fixture)(struct kutf_context *context);
+	unsigned int                   fixture_variants;
+	unsigned int                   suite_default_flags;
+	struct list_head               node;
+	struct dentry                  *dir;
+	struct list_head               test_list;
+};
+
+/* ============================================================================
+	Application functions
+============================================================================ */
+
+/**
+ * kutf_create_application() - Create an in kernel test application.
+ * @name:	The name of the test application.
+ *
+ * Return: pointer to the kutf_application  on success or NULL
+ * on failure
+ */
+struct kutf_application *kutf_create_application(const char *name);
+
+/**
+ * kutf_destroy_application() - Destroy an in kernel test application.
+ *
+ * @app:	The test application to destroy.
+ */
+void kutf_destroy_application(struct kutf_application *app);
+
+/* ============================================================================
+	Suite functions
+============================================================================ */
+
+/**
+ * kutf_create_suite() - Create a kernel test suite.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *                      functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *                      is stored in the fixture pointer in the context for
+ *                      use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ *
+ * Suite names must be unique. Should two suites with the same name be
+ * registered with the same application then this function will fail, if they
+ * are registered with different applications then the function will not detect
+ * this and the call will succeed.
+ *
+ * Return: pointer to the created kutf_suite on success or NULL
+ * on failure
+ */
+struct kutf_suite *kutf_create_suite(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context));
+
+/**
+ * kutf_create_suite_with_filters() - Create a kernel test suite with user
+ *                                    defined default filters.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *                      functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *			is stored in the fixture pointer in the context for
+ *			use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ * @filters:		Filters to apply to a test if it doesn't provide its own
+ *
+ * Suite names must be unique. Should two suites with the same name be
+ * registered with the same application then this function will fail, if they
+ * are registered with different applications then the function will not detect
+ * this and the call will succeed.
+ *
+ * Return: pointer to the created kutf_suite on success or NULL on failure
+ */
+struct kutf_suite *kutf_create_suite_with_filters(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters);
+
+/**
+ * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with
+ *                                             user defined default filters.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *			functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *			is stored in the fixture pointer in the context for
+ *			use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ * @filters:		Filters to apply to a test if it doesn't provide its own
+ * @suite_data:		Suite specific callback data, provided during the
+ *			running of the test in the kutf_context
+ *
+ * Return: pointer to the created kutf_suite on success or NULL
+ * on failure
+ */
+struct kutf_suite *kutf_create_suite_with_filters_and_data(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data suite_data);
+
+/**
+ * kutf_add_test() - Add a test to a kernel test suite.
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ *
+ * Note: As no filters are provided the test will use the suite filters instead
+ */
+void kutf_add_test(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context));
+
+/**
+ * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ * @filters:	A set of filtering flags, assigning test categories.
+ */
+void kutf_add_test_with_filters(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters);
+
+/**
+ * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite
+ *					   with filters.
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ * @filters:	A set of filtering flags, assigning test categories.
+ * @test_data:	Test specific callback data, provided during the
+ *		running of the test in the kutf_context
+ */
+void kutf_add_test_with_filters_and_data(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data test_data);
+
+
+/* ============================================================================
+	Test functions
+============================================================================ */
+/**
+ * kutf_test_log_result_external() - Log a result which has been created
+ *                                   externally into a in a standard form
+ *                                   recognized by the log parser.
+ * @context:	The test context the test is running in
+ * @message:	The message for this result
+ * @new_status:	The result status of this log message
+ */
+void kutf_test_log_result_external(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status);
+
+/**
+ * kutf_test_expect_abort() - Tell the kernel that you expect the current
+ *                            fixture to produce an abort.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_abort(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_fatal() - Tell the kernel that you expect the current
+ *                            fixture to produce a fatal error.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_fatal(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_fail() - Tell the kernel that you expect the current
+ *                           fixture to fail.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_fail(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_warn() - Tell the kernel that you expect the current
+ *                           fixture to produce a warning.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_warn(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_pass() - Tell the kernel that you expect the current
+ *                           fixture to pass.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_pass(struct kutf_context *context);
+
+/**
+ * kutf_test_skip() - Tell the kernel that the test should be skipped.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_skip(struct kutf_context *context);
+
+/**
+ * kutf_test_skip_msg() - Tell the kernel that this test has been skipped,
+ *                        supplying a reason string.
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the reason for the skip.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a prebaked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_skip_msg(struct kutf_context *context, const char *message);
+
+/**
+ * kutf_test_pass() - Tell the kernel that this test has passed.
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the reason for the pass.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_pass(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_debug() - Send a debug message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the debug information.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_debug(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_info() - Send an information message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the information message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_info(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_warn() - Send a warning message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the warning message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_warn(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_fail() - Tell the kernel that a test has failed
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the failure message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_fail(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the fatal error message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_fatal(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test
+ *
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_abort(struct kutf_context *context);
+
+#endif	/* _KERNEL_UTF_SUITE_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h
new file mode 100644
index 0000000..c458c1f
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KERNEL_UTF_UTILS_H_
+#define _KERNEL_UTF_UTILS_H_
+
+/* kutf_utils.h
+ * Utilities for the kernel UTF test infrastructure.
+ *
+ * This collection of library functions are provided for use by kernel UTF
+ * and users of kernel UTF which don't directly fit within the other
+ * code modules.
+ */
+
+#include <kutf/kutf_mem.h>
+
+/**
+ * Maximum size of the message strings within kernel UTF, messages longer then
+ * this will be truncated.
+ */
+#define KUTF_MAX_DSPRINTF_LEN	1024
+
+/**
+ * kutf_dsprintf() - dynamic sprintf
+ * @pool:	memory pool to allocate from
+ * @fmt:	The format string describing the string to document.
+ * @...		The parameters to feed in to the format string.
+ *
+ * This function implements sprintf which dynamically allocates memory to store
+ * the string. The library will free the memory containing the string when the
+ * result set is cleared or destroyed.
+ *
+ * Note The returned string may be truncated to fit an internal temporary
+ * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length.
+ *
+ * Return: Returns pointer to allocated string, or NULL on error.
+ */
+const char *kutf_dsprintf(struct kutf_mempool *pool,
+		const char *fmt, ...);
+
+#endif	/* _KERNEL_UTF_UTILS_H_ */
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild
new file mode 100644
index 0000000..97f8005
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ccflags-y += -I$(src)/../include
+
+obj-$(CONFIG_MALI_KUTF) += kutf.o
+
+kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o kutf_helpers.o kutf_helpers_user.o
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig
new file mode 100644
index 0000000..6a87bdb
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig
@@ -0,0 +1,22 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config MALI_KUTF
+ tristate "Mali Kernel Unit Test Framework"
+ default m
+ help
+   Enables MALI testing framework. To compile it as a module,
+   choose M here - this will generate a single module called kutf.
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile
new file mode 100644
index 0000000..010c92c
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile
@@ -0,0 +1,29 @@
+#
+# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS=-I$(CURDIR)/../include modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c
new file mode 100644
index 0000000..bf887a5
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF test helpers */
+#include <kutf/kutf_helpers.h>
+
+#include <linux/err.h>
+#include <linux/jiffies.h>
+#include <linux/sched.h>
+#include <linux/preempt.h>
+#include <linux/wait.h>
+#include <linux/uaccess.h>
+
+static DEFINE_SPINLOCK(kutf_input_lock);
+
+static bool pending_input(struct kutf_context *context)
+{
+	bool input_pending;
+
+	spin_lock(&kutf_input_lock);
+
+	input_pending = !list_empty(&context->userdata.input_head);
+
+	spin_unlock(&kutf_input_lock);
+
+	return input_pending;
+}
+
+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size)
+{
+	struct kutf_userdata_line *line;
+
+	spin_lock(&kutf_input_lock);
+
+	while (list_empty(&context->userdata.input_head)) {
+		int err;
+
+		kutf_set_waiting_for_input(context->result_set);
+
+		spin_unlock(&kutf_input_lock);
+
+		err = wait_event_interruptible(context->userdata.input_waitq,
+				pending_input(context));
+
+		if (err)
+			return ERR_PTR(-EINTR);
+
+		spin_lock(&kutf_input_lock);
+	}
+
+	line = list_first_entry(&context->userdata.input_head,
+			struct kutf_userdata_line, node);
+	if (line->str) {
+		/*
+		 * Unless it is the end-of-input marker,
+		 * remove it from the list
+		 */
+		list_del(&line->node);
+	}
+
+	spin_unlock(&kutf_input_lock);
+
+	if (str_size)
+		*str_size = line->size;
+	return line->str;
+}
+
+int kutf_helper_input_enqueue(struct kutf_context *context,
+		const char __user *str, size_t size)
+{
+	struct kutf_userdata_line *line;
+
+	line = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(*line) + size + 1);
+	if (!line)
+		return -ENOMEM;
+	if (str) {
+		unsigned long bytes_not_copied;
+
+		line->size = size;
+		line->str = (void *)(line + 1);
+		bytes_not_copied = copy_from_user(line->str, str, size);
+		if (bytes_not_copied != 0)
+			return -EFAULT;
+		/* Zero terminate the string */
+		line->str[size] = '\0';
+	} else {
+		/* This is used to mark the end of input */
+		WARN_ON(size);
+		line->size = 0;
+		line->str = NULL;
+	}
+
+	spin_lock(&kutf_input_lock);
+
+	list_add_tail(&line->node, &context->userdata.input_head);
+
+	kutf_clear_waiting_for_input(context->result_set);
+
+	spin_unlock(&kutf_input_lock);
+
+	wake_up(&context->userdata.input_waitq);
+
+	return 0;
+}
+
+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context)
+{
+	kutf_helper_input_enqueue(context, NULL, 0);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c
new file mode 100644
index 0000000..9e8ab99
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c
@@ -0,0 +1,462 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF test helpers that mirror those for kutf-userside */
+#include <kutf/kutf_helpers_user.h>
+#include <kutf/kutf_helpers.h>
+#include <kutf/kutf_utils.h>
+
+#include <linux/err.h>
+#include <linux/slab.h>
+
+const char *valtype_names[] = {
+	"INVALID",
+	"U64",
+	"STR",
+};
+
+static const char *get_val_type_name(enum kutf_helper_valtype valtype)
+{
+	/* enums can be signed or unsigned (implementation dependant), so
+	 * enforce it to prevent:
+	 * a) "<0 comparison on unsigned type" warning - if we did both upper
+	 *    and lower bound check
+	 * b) incorrect range checking if it was a signed type - if we did
+	 *    upper bound check only */
+	unsigned int type_idx = (unsigned int)valtype;
+
+	if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT)
+		type_idx = (unsigned int)KUTF_HELPER_VALTYPE_INVALID;
+
+	return valtype_names[type_idx];
+}
+
+/* Check up to str_len chars of val_str to see if it's a valid value name:
+ *
+ * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator
+ * - And, each char is in the character set [A-Z0-9_] */
+static int validate_val_name(const char *val_str, int str_len)
+{
+	int i = 0;
+
+	for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) {
+		char val_chr = val_str[i];
+
+		if (val_chr >= 'A' && val_chr <= 'Z')
+			continue;
+		if (val_chr >= '0' && val_chr <= '9')
+			continue;
+		if (val_chr == '_')
+			continue;
+
+		/* Character not in the set [A-Z0-9_] - report error */
+		return 1;
+	}
+
+	/* Names of 0 length are not valid */
+	if (i == 0)
+		return 1;
+	/* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */
+	if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'))
+		return 1;
+
+	return 0;
+}
+
+/* Find the length of the valid part of the string when it will be in quotes
+ * e.g. "str"
+ *
+ * That is, before any '\\', '\n' or '"' characters. This is so we don't have
+ * to escape the string */
+static int find_quoted_string_valid_len(const char *str)
+{
+	char *ptr;
+	const char *check_chars = "\\\n\"";
+
+	ptr = strpbrk(str, check_chars);
+	if (ptr)
+		return (int)(ptr-str);
+
+	return (int)strlen(str);
+}
+
+static int kutf_helper_userdata_enqueue(struct kutf_context *context,
+		const char *str)
+{
+	char *str_copy;
+	size_t len;
+	int err;
+
+	len = strlen(str)+1;
+
+	str_copy = kutf_mempool_alloc(&context->fixture_pool, len);
+	if (!str_copy)
+		return -ENOMEM;
+
+	strcpy(str_copy, str);
+
+	err = kutf_add_result(context, KUTF_RESULT_USERDATA, str_copy);
+
+	return err;
+}
+
+#define MAX_U64_HEX_LEN 16
+/* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */
+#define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1)
+
+int kutf_helper_send_named_u64(struct kutf_context *context,
+		const char *val_name, u64 val)
+{
+	int ret = 1;
+	char msgbuf[NAMED_U64_VAL_BUF_SZ];
+	const char *errmsg = NULL;
+
+	if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': Invalid value name", val_name);
+		goto out_err;
+	}
+
+	ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val);
+	if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d",
+				val_name, NAMED_U64_VAL_BUF_SZ, ret);
+		goto out_err;
+	}
+
+	ret = kutf_helper_userdata_enqueue(context, msgbuf);
+	if (ret) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': send returned %d",
+				val_name, ret);
+		goto out_err;
+	}
+
+	return ret;
+out_err:
+	kutf_test_fail(context, errmsg);
+	return ret;
+}
+EXPORT_SYMBOL(kutf_helper_send_named_u64);
+
+#define NAMED_VALUE_SEP "="
+#define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\""
+#define NAMED_STR_END_DELIM "\""
+
+int kutf_helper_max_str_len_for_kern(const char *val_name,
+		int kern_buf_sz)
+{
+	const int val_name_len = strlen(val_name);
+	const int start_delim_len = strlen(NAMED_STR_START_DELIM);
+	const int end_delim_len = strlen(NAMED_STR_END_DELIM);
+	int max_msg_len = kern_buf_sz;
+	int max_str_len;
+
+	max_str_len = max_msg_len - val_name_len - start_delim_len -
+		end_delim_len;
+
+	return max_str_len;
+}
+EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern);
+
+int kutf_helper_send_named_str(struct kutf_context *context,
+		const char *val_name,
+		const char *val_str)
+{
+	int val_str_len;
+	int str_buf_sz;
+	char *str_buf = NULL;
+	int ret = 1;
+	char *copy_ptr;
+	int val_name_len;
+	int start_delim_len = strlen(NAMED_STR_START_DELIM);
+	int end_delim_len = strlen(NAMED_STR_END_DELIM);
+	const char *errmsg = NULL;
+
+	if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': Invalid value name", val_name);
+		goto out_err;
+	}
+	val_name_len = strlen(val_name);
+
+	val_str_len = find_quoted_string_valid_len(val_str);
+
+	/* (name length) + ("=\"" length) + (val_str len) + ("\"" length) + terminator */
+	str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1;
+
+	/* Using kmalloc() here instead of mempool since we know we need to free
+	 * before we return */
+	str_buf = kmalloc(str_buf_sz, GFP_KERNEL);
+	if (!str_buf) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d",
+				val_name, str_buf_sz);
+		goto out_err;
+	}
+	copy_ptr = str_buf;
+
+	/* Manually copy each string component instead of snprintf because
+	 * val_str may need to end early, and less error path handling */
+
+	/* name */
+	memcpy(copy_ptr, val_name, val_name_len);
+	copy_ptr += val_name_len;
+
+	/* str start delimiter */
+	memcpy(copy_ptr, NAMED_STR_START_DELIM, start_delim_len);
+	copy_ptr += start_delim_len;
+
+	/* str value */
+	memcpy(copy_ptr, val_str, val_str_len);
+	copy_ptr += val_str_len;
+
+	/* str end delimiter */
+	memcpy(copy_ptr, NAMED_STR_END_DELIM, end_delim_len);
+	copy_ptr += end_delim_len;
+
+	/* Terminator */
+	*copy_ptr = '\0';
+
+	ret = kutf_helper_userdata_enqueue(context, str_buf);
+
+	if (ret) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send str value named '%s': send returned %d",
+				val_name, ret);
+		goto out_err;
+	}
+
+	kfree(str_buf);
+	return ret;
+
+out_err:
+	kutf_test_fail(context, errmsg);
+	kfree(str_buf);
+	return ret;
+}
+EXPORT_SYMBOL(kutf_helper_send_named_str);
+
+int kutf_helper_receive_named_val(
+		struct kutf_context *context,
+		struct kutf_helper_named_val *named_val)
+{
+	size_t recv_sz;
+	char *recv_str;
+	char *search_ptr;
+	char *name_str = NULL;
+	int name_len;
+	int strval_len;
+	enum kutf_helper_valtype type = KUTF_HELPER_VALTYPE_INVALID;
+	char *strval = NULL;
+	u64 u64val = 0;
+	int err = KUTF_HELPER_ERR_INVALID_VALUE;
+
+	recv_str = kutf_helper_input_dequeue(context, &recv_sz);
+	if (!recv_str)
+		return -EBUSY;
+	else if (IS_ERR(recv_str))
+		return PTR_ERR(recv_str);
+
+	/* Find the '=', grab the name and validate it */
+	search_ptr = strnchr(recv_str, recv_sz, NAMED_VALUE_SEP[0]);
+	if (search_ptr) {
+		name_len = search_ptr - recv_str;
+		if (!validate_val_name(recv_str, name_len)) {
+			/* no need to reallocate - just modify string in place */
+			name_str = recv_str;
+			name_str[name_len] = '\0';
+
+			/* Move until after the '=' */
+			recv_str += (name_len + 1);
+			recv_sz -= (name_len + 1);
+		}
+	}
+	if (!name_str) {
+		pr_err("Invalid name part for received string '%s'\n",
+				recv_str);
+		return KUTF_HELPER_ERR_INVALID_NAME;
+	}
+
+	/* detect value type */
+	if (*recv_str == NAMED_STR_START_DELIM[1]) {
+		/* string delimiter start*/
+		++recv_str;
+		--recv_sz;
+
+		/* Find end of string */
+		search_ptr = strnchr(recv_str, recv_sz, NAMED_STR_END_DELIM[0]);
+		if (search_ptr) {
+			strval_len = search_ptr - recv_str;
+			/* Validate the string to ensure it contains no quotes */
+			if (strval_len == find_quoted_string_valid_len(recv_str)) {
+				/* no need to reallocate - just modify string in place */
+				strval = recv_str;
+				strval[strval_len] = '\0';
+
+				/* Move until after the end delimiter */
+				recv_str += (strval_len + 1);
+				recv_sz -= (strval_len + 1);
+				type = KUTF_HELPER_VALTYPE_STR;
+			} else {
+				pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str);
+				err = KUTF_HELPER_ERR_CHARS_AFTER_VAL;
+			}
+		} else {
+			pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str);
+			err = KUTF_HELPER_ERR_NO_END_DELIMITER;
+		}
+	} else {
+		/* possibly a number value - strtoull will parse it */
+		err = kstrtoull(recv_str, 0, &u64val);
+		/* unlike userspace can't get an end ptr, but if kstrtoull()
+		 * reads characters after the number it'll report -EINVAL */
+		if (!err) {
+			int len_remain = strnlen(recv_str, recv_sz);
+
+			type = KUTF_HELPER_VALTYPE_U64;
+			recv_str += len_remain;
+			recv_sz -= len_remain;
+		} else {
+			/* special case: not a number, report as such */
+			pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str);
+		}
+	}
+
+	if (type == KUTF_HELPER_VALTYPE_INVALID)
+		return err;
+
+	/* Any remaining characters - error */
+	if (strnlen(recv_str, recv_sz) != 0) {
+		pr_err("Characters remain after value of type %s: '%s'\n",
+				get_val_type_name(type), recv_str);
+		return KUTF_HELPER_ERR_CHARS_AFTER_VAL;
+	}
+
+	/* Success - write into the output structure */
+	switch (type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		named_val->u.val_u64 = u64val;
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		named_val->u.val_str = strval;
+		break;
+	default:
+		pr_err("Unreachable, fix kutf_helper_receive_named_val\n");
+		/* Coding error, report as though 'run' file failed */
+		return -EINVAL;
+	}
+
+	named_val->val_name = name_str;
+	named_val->type = type;
+
+	return KUTF_HELPER_ERR_NONE;
+}
+EXPORT_SYMBOL(kutf_helper_receive_named_val);
+
+#define DUMMY_MSG "<placeholder due to test fail>"
+int kutf_helper_receive_check_val(
+		struct kutf_helper_named_val *named_val,
+		struct kutf_context *context,
+		const char *expect_val_name,
+		enum kutf_helper_valtype expect_val_type)
+{
+	int err;
+
+	err = kutf_helper_receive_named_val(context, named_val);
+	if (err < 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to receive value named '%s'",
+				expect_val_name);
+		kutf_test_fail(context, msg);
+		return err;
+	} else if (err > 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Named-value parse error when expecting value named '%s'",
+				expect_val_name);
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+	if (strcmp(named_val->val_name, expect_val_name) != 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Expecting to receive value named '%s' but got '%s'",
+				expect_val_name, named_val->val_name);
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+
+	if (named_val->type != expect_val_type) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Expecting value named '%s' to be of type %s but got %s",
+				expect_val_name, get_val_type_name(expect_val_type),
+				get_val_type_name(named_val->type));
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+	return err;
+
+out_fail_and_fixup:
+	/* Produce a valid but incorrect value */
+	switch (expect_val_type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		named_val->u.val_u64 = 0ull;
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		{
+			char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG));
+
+			if (!str)
+				return -1;
+
+			strcpy(str, DUMMY_MSG);
+			named_val->u.val_str = str;
+			break;
+		}
+	default:
+		break;
+	}
+
+	/* Indicate that this is invalid */
+	named_val->type = KUTF_HELPER_VALTYPE_INVALID;
+
+	/* But at least allow the caller to continue in the test with failures */
+	return 0;
+}
+EXPORT_SYMBOL(kutf_helper_receive_check_val);
+
+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val)
+{
+	switch (named_val->type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		pr_warn("%s=0x%llx\n", named_val->val_name, named_val->u.val_u64);
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		pr_warn("%s=\"%s\"\n", named_val->val_name, named_val->u.val_str);
+		break;
+	case KUTF_HELPER_VALTYPE_INVALID:
+		pr_warn("%s is invalid\n", named_val->val_name);
+		break;
+	default:
+		pr_warn("%s has unknown type %d\n", named_val->val_name, named_val->type);
+		break;
+	}
+}
+EXPORT_SYMBOL(kutf_helper_output_named_val);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c
new file mode 100644
index 0000000..a75e15f
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF memory management functions */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include <kutf/kutf_mem.h>
+
+
+/**
+ * struct kutf_alloc_entry - Structure representing an allocation.
+ * @node:	List node for use with kutf_mempool.
+ * @data:	Data area of the allocation
+ */
+struct kutf_alloc_entry {
+	struct list_head node;
+	u8 data[0];
+};
+
+int kutf_mempool_init(struct kutf_mempool *pool)
+{
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		return -1;
+	}
+
+	INIT_LIST_HEAD(&pool->head);
+	mutex_init(&pool->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(kutf_mempool_init);
+
+void kutf_mempool_destroy(struct kutf_mempool *pool)
+{
+	struct list_head *remove;
+	struct list_head *tmp;
+
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		return;
+	}
+
+	mutex_lock(&pool->lock);
+	list_for_each_safe(remove, tmp, &pool->head) {
+		struct kutf_alloc_entry *remove_alloc;
+
+		remove_alloc = list_entry(remove, struct kutf_alloc_entry, node);
+		list_del(&remove_alloc->node);
+		kfree(remove_alloc);
+	}
+	mutex_unlock(&pool->lock);
+
+}
+EXPORT_SYMBOL(kutf_mempool_destroy);
+
+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size)
+{
+	struct kutf_alloc_entry *ret;
+
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		goto fail_pool;
+	}
+
+	mutex_lock(&pool->lock);
+
+	ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL);
+	if (!ret) {
+		pr_err("Failed to allocate memory\n");
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&ret->node);
+	list_add(&ret->node, &pool->head);
+
+	mutex_unlock(&pool->lock);
+
+	return &ret->data[0];
+
+fail_alloc:
+	mutex_unlock(&pool->lock);
+fail_pool:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_mempool_alloc);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c
new file mode 100644
index 0000000..41645a4
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c
@@ -0,0 +1,159 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF result management functions */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_resultset.h>
+
+/* Lock to protect all result structures */
+static DEFINE_SPINLOCK(kutf_result_lock);
+
+struct kutf_result_set *kutf_create_result_set(void)
+{
+	struct kutf_result_set *set;
+
+	set = kmalloc(sizeof(*set), GFP_KERNEL);
+	if (!set) {
+		pr_err("Failed to allocate resultset");
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&set->results);
+	init_waitqueue_head(&set->waitq);
+	set->flags = 0;
+
+	return set;
+
+fail_alloc:
+	return NULL;
+}
+
+int kutf_add_result(struct kutf_context *context,
+		enum kutf_result_status status,
+		const char *message)
+{
+	struct kutf_mempool *mempool = &context->fixture_pool;
+	struct kutf_result_set *set = context->result_set;
+	/* Create the new result */
+	struct kutf_result *new_result;
+
+	BUG_ON(set == NULL);
+
+	new_result = kutf_mempool_alloc(mempool, sizeof(*new_result));
+	if (!new_result) {
+		pr_err("Result allocation failed\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&new_result->node);
+	new_result->status = status;
+	new_result->message = message;
+
+	spin_lock(&kutf_result_lock);
+
+	list_add_tail(&new_result->node, &set->results);
+
+	spin_unlock(&kutf_result_lock);
+
+	wake_up(&set->waitq);
+
+	return 0;
+}
+
+void kutf_destroy_result_set(struct kutf_result_set *set)
+{
+	if (!list_empty(&set->results))
+		pr_err("kutf_destroy_result_set: Unread results from test\n");
+
+	kfree(set);
+}
+
+static bool kutf_has_result(struct kutf_result_set *set)
+{
+	bool has_result;
+
+	spin_lock(&kutf_result_lock);
+	if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT)
+		/* Pretend there are results if waiting for input */
+		has_result = true;
+	else
+		has_result = !list_empty(&set->results);
+	spin_unlock(&kutf_result_lock);
+
+	return has_result;
+}
+
+struct kutf_result *kutf_remove_result(struct kutf_result_set *set)
+{
+	struct kutf_result *result = NULL;
+	int ret;
+
+	do {
+		ret = wait_event_interruptible(set->waitq,
+				kutf_has_result(set));
+
+		if (ret)
+			return ERR_PTR(ret);
+
+		spin_lock(&kutf_result_lock);
+
+		if (!list_empty(&set->results)) {
+			result = list_first_entry(&set->results,
+					struct kutf_result,
+					node);
+			list_del(&result->node);
+		} else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) {
+			/* Return a fake result */
+			static struct kutf_result waiting = {
+				.status = KUTF_RESULT_USERDATA_WAIT
+			};
+			result = &waiting;
+		}
+		/* If result == NULL then there was a race with the event
+		 * being removed between the check in kutf_has_result and
+		 * the lock being obtained. In this case we retry
+		 */
+
+		spin_unlock(&kutf_result_lock);
+	} while (result == NULL);
+
+	return result;
+}
+
+void kutf_set_waiting_for_input(struct kutf_result_set *set)
+{
+	spin_lock(&kutf_result_lock);
+	set->flags |= KUTF_RESULT_SET_WAITING_FOR_INPUT;
+	spin_unlock(&kutf_result_lock);
+
+	wake_up(&set->waitq);
+}
+
+void kutf_clear_waiting_for_input(struct kutf_result_set *set)
+{
+	spin_lock(&kutf_result_lock);
+	set->flags &= ~KUTF_RESULT_SET_WAITING_FOR_INPUT;
+	spin_unlock(&kutf_result_lock);
+}
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
new file mode 100644
index 0000000..4968f24
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
@@ -0,0 +1,1220 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF suite, test and fixture management including user to kernel
+ * interaction */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+
+#include <generated/autoconf.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_resultset.h>
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_helpers.h>
+
+#if defined(CONFIG_DEBUG_FS)
+
+/**
+ * struct kutf_application - Structure which represents kutf application
+ * @name:	The name of this test application.
+ * @dir:	The debugfs directory for this test
+ * @suite_list:	List head to store all the suites which are part of this
+ *              application
+ */
+struct kutf_application {
+	const char         *name;
+	struct dentry      *dir;
+	struct list_head   suite_list;
+};
+
+/**
+ * struct kutf_test_function - Structure which represents kutf test function
+ * @suite:		Back reference to the suite this test function
+ *                      belongs to
+ * @filters:		Filters that apply to this test function
+ * @test_id:		Test ID
+ * @execute:		Function to run for this test
+ * @test_data:		Static data for this test
+ * @node:		List node for test_list
+ * @variant_list:	List head to store all the variants which can run on
+ *                      this function
+ * @dir:		debugfs directory for this test function
+ */
+struct kutf_test_function {
+	struct kutf_suite  *suite;
+	unsigned int       filters;
+	unsigned int       test_id;
+	void (*execute)(struct kutf_context *context);
+	union kutf_callback_data test_data;
+	struct list_head   node;
+	struct list_head   variant_list;
+	struct dentry      *dir;
+};
+
+/**
+ * struct kutf_test_fixture - Structure which holds information on the kutf
+ *                            test fixture
+ * @test_func:		Test function this fixture belongs to
+ * @fixture_index:	Index of this fixture
+ * @node:		List node for variant_list
+ * @dir:		debugfs directory for this test fixture
+ */
+struct kutf_test_fixture {
+	struct kutf_test_function *test_func;
+	unsigned int              fixture_index;
+	struct list_head          node;
+	struct dentry             *dir;
+};
+
+static struct dentry *base_dir;
+static struct workqueue_struct *kutf_workq;
+
+/**
+ * struct kutf_convert_table - Structure which keeps test results
+ * @result_name:	Status of the test result
+ * @result:		Status value for a single test
+ */
+struct kutf_convert_table {
+	char                    result_name[50];
+	enum kutf_result_status result;
+};
+
+struct kutf_convert_table kutf_convert[] = {
+#define ADD_UTF_RESULT(_name) \
+{ \
+	#_name, \
+	_name, \
+},
+ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK)
+ADD_UTF_RESULT(KUTF_RESULT_SKIP)
+ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN)
+ADD_UTF_RESULT(KUTF_RESULT_PASS)
+ADD_UTF_RESULT(KUTF_RESULT_DEBUG)
+ADD_UTF_RESULT(KUTF_RESULT_INFO)
+ADD_UTF_RESULT(KUTF_RESULT_WARN)
+ADD_UTF_RESULT(KUTF_RESULT_FAIL)
+ADD_UTF_RESULT(KUTF_RESULT_FATAL)
+ADD_UTF_RESULT(KUTF_RESULT_ABORT)
+};
+
+#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert))
+
+/**
+ * kutf_create_context() - Create a test context in which a specific fixture
+ *                         of an application will be run and its results
+ *                         reported back to the user
+ * @test_fix:	Test fixture to be run.
+ *
+ * The context's refcount will be initialized to 1.
+ *
+ * Return: Returns the created test context on success or NULL on failure
+ */
+static struct kutf_context *kutf_create_context(
+		struct kutf_test_fixture *test_fix);
+
+/**
+ * kutf_destroy_context() - Destroy a previously created test context, only
+ *                          once its refcount has become zero
+ * @kref:	pointer to kref member within the context
+ *
+ * This should only be used via a kref_put() call on the context's kref member
+ */
+static void kutf_destroy_context(struct kref *kref);
+
+/**
+ * kutf_context_get() - increment refcount on a context
+ * @context:	the kutf context
+ *
+ * This must be used when the lifetime of the context might exceed that of the
+ * thread creating @context
+ */
+static void kutf_context_get(struct kutf_context *context);
+
+/**
+ * kutf_context_put() - decrement refcount on a context, destroying it when it
+ *                      reached zero
+ * @context:	the kutf context
+ *
+ * This must be used only after a corresponding kutf_context_get() call on
+ * @context, and the caller no longer needs access to @context.
+ */
+static void kutf_context_put(struct kutf_context *context);
+
+/**
+ * kutf_set_result() - Set the test result against the specified test context
+ * @context:	Test context
+ * @status:	Result status
+ */
+static void kutf_set_result(struct kutf_context *context,
+		enum kutf_result_status status);
+
+/**
+ * kutf_set_expected_result() - Set the expected test result for the specified
+ *                              test context
+ * @context:		Test context
+ * @expected_status:	Expected result status
+ */
+static void kutf_set_expected_result(struct kutf_context *context,
+		enum kutf_result_status expected_status);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0))
+/* Pre 3.4.0 kernels don't have the simple_open helper */
+
+/**
+ * simple_open() - Helper for file opening which stores the inode private data
+ *                 into the file private data
+ * @inode:	File entry representation
+ * @file:	A specific opening of the file
+ *
+ * Return: always 0; if inode private data do not exist, the file will not
+ *         be assigned private data
+ */
+static int simple_open(struct inode *inode, struct file *file)
+{
+	if (inode->i_private)
+		file->private_data = inode->i_private;
+	return 0;
+}
+#endif
+
+/**
+ * kutf_result_to_string() - Converts a KUTF result into a string
+ * @result_str:      Output result string
+ * @result:          Result status to convert
+ *
+ * Return: 1 if test result was successfully converted to string, 0 otherwise
+ */
+static int kutf_result_to_string(char **result_str,
+		enum kutf_result_status result)
+{
+	int i;
+	int ret = 0;
+
+	for (i = 0; i < UTF_CONVERT_SIZE; i++) {
+		if (result == kutf_convert[i].result) {
+			*result_str = kutf_convert[i].result_name;
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+/**
+ * kutf_debugfs_const_string_read() - Simple debugfs read callback which
+ *                                    returns a constant string
+ * @file:	Opened file to read from
+ * @buf:	User buffer to write the data into
+ * @len:	Amount of data to read
+ * @ppos:	Offset into file to read from
+ *
+ * Return: On success, the number of bytes read and offset @ppos advanced by
+ *         this number; on error, negative value
+ */
+static ssize_t kutf_debugfs_const_string_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	char *str = file->private_data;
+
+	return simple_read_from_buffer(buf, len, ppos, str, strlen(str));
+}
+
+static const struct file_operations kutf_debugfs_const_string_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = kutf_debugfs_const_string_read,
+	.llseek  = default_llseek,
+};
+
+/**
+ * kutf_add_explicit_result() - Check if an explicit result needs to be added
+ * @context:	KUTF test context
+ */
+static void kutf_add_explicit_result(struct kutf_context *context)
+{
+	switch (context->expected_status) {
+	case KUTF_RESULT_UNKNOWN:
+		if (context->status == KUTF_RESULT_UNKNOWN)
+			kutf_test_pass(context, "(implicit pass)");
+		break;
+
+	case KUTF_RESULT_WARN:
+		if (context->status == KUTF_RESULT_WARN)
+			kutf_test_pass(context,
+					"Pass (expected warn occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected warn missing)");
+		break;
+
+	case KUTF_RESULT_FAIL:
+		if (context->status == KUTF_RESULT_FAIL)
+			kutf_test_pass(context,
+					"Pass (expected fail occurred)");
+		else if (context->status != KUTF_RESULT_SKIP) {
+			/* Force the expected status so the fail gets logged */
+			context->expected_status = KUTF_RESULT_PASS;
+			kutf_test_fail(context,
+					"Fail (expected fail missing)");
+		}
+		break;
+
+	case KUTF_RESULT_FATAL:
+		if (context->status == KUTF_RESULT_FATAL)
+			kutf_test_pass(context,
+					"Pass (expected fatal occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected fatal missing)");
+		break;
+
+	case KUTF_RESULT_ABORT:
+		if (context->status == KUTF_RESULT_ABORT)
+			kutf_test_pass(context,
+					"Pass (expected abort occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected abort missing)");
+		break;
+	default:
+		break;
+	}
+}
+
+static void kutf_run_test(struct work_struct *data)
+{
+	struct kutf_context *test_context = container_of(data,
+			struct kutf_context, work);
+	struct kutf_suite *suite = test_context->suite;
+	struct kutf_test_function *test_func;
+
+	test_func = test_context->test_fix->test_func;
+
+	/*
+	 * Call the create fixture function if required before the
+	 * fixture is run
+	 */
+	if (suite->create_fixture)
+		test_context->fixture = suite->create_fixture(test_context);
+
+	/* Only run the test if the fixture was created (if required) */
+	if ((suite->create_fixture && test_context->fixture) ||
+			(!suite->create_fixture)) {
+		/* Run this fixture */
+		test_func->execute(test_context);
+
+		if (suite->remove_fixture)
+			suite->remove_fixture(test_context);
+
+		kutf_add_explicit_result(test_context);
+	}
+
+	kutf_add_result(test_context, KUTF_RESULT_TEST_FINISHED, NULL);
+
+	kutf_context_put(test_context);
+}
+
+/**
+ * kutf_debugfs_run_open() Debugfs open callback for the "run" entry.
+ * @inode:	inode of the opened file
+ * @file:	Opened file to read from
+ *
+ * This function creates a KUTF context and queues it onto a workqueue to be
+ * run asynchronously. The resulting file descriptor can be used to communicate
+ * userdata to the test and to read back the results of the test execution.
+ *
+ * Return: 0 on success
+ */
+static int kutf_debugfs_run_open(struct inode *inode, struct file *file)
+{
+	struct kutf_test_fixture *test_fix = inode->i_private;
+	struct kutf_context *test_context;
+	int err = 0;
+
+	test_context = kutf_create_context(test_fix);
+	if (!test_context) {
+		err = -ENOMEM;
+		goto finish;
+	}
+
+	file->private_data = test_context;
+
+	/* This reference is release by the kutf_run_test */
+	kutf_context_get(test_context);
+
+	queue_work(kutf_workq, &test_context->work);
+
+finish:
+	return err;
+}
+
+#define USERDATA_WARNING_MESSAGE "WARNING: This test requires userdata\n"
+
+/**
+ * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry.
+ * @file:	Opened file to read from
+ * @buf:	User buffer to write the data into
+ * @len:	Amount of data to read
+ * @ppos:	Offset into file to read from
+ *
+ * This function emits the results of the test, blocking until they are
+ * available.
+ *
+ * If the test involves user data then this will also return user data records
+ * to user space. If the test is waiting for user data then this function will
+ * output a message (to make the likes of 'cat' display it), followed by
+ * returning 0 to mark the end of file.
+ *
+ * Results will be emitted one at a time, once all the results have been read
+ * 0 will be returned to indicate there is no more data.
+ *
+ * Return: Number of bytes read.
+ */
+static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct kutf_context *test_context = file->private_data;
+	struct kutf_result *res;
+	unsigned long bytes_not_copied;
+	ssize_t bytes_copied = 0;
+	char *kutf_str_ptr = NULL;
+	size_t kutf_str_len = 0;
+	size_t message_len = 0;
+	char separator = ':';
+	char terminator = '\n';
+
+	res = kutf_remove_result(test_context->result_set);
+
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	/*
+	 * Handle 'fake' results - these results are converted to another
+	 * form before being returned from the kernel
+	 */
+	switch (res->status) {
+	case KUTF_RESULT_TEST_FINISHED:
+		return 0;
+	case KUTF_RESULT_USERDATA_WAIT:
+		if (test_context->userdata.flags &
+				KUTF_USERDATA_WARNING_OUTPUT) {
+			/*
+			 * Warning message already output,
+			 * signal end-of-file
+			 */
+			return 0;
+		}
+
+		message_len = sizeof(USERDATA_WARNING_MESSAGE)-1;
+		if (message_len > len)
+			message_len = len;
+
+		bytes_not_copied = copy_to_user(buf,
+				USERDATA_WARNING_MESSAGE,
+				message_len);
+		if (bytes_not_copied != 0)
+			return -EFAULT;
+		test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT;
+		return message_len;
+	case KUTF_RESULT_USERDATA:
+		message_len = strlen(res->message);
+		if (message_len > len-1) {
+			message_len = len-1;
+			pr_warn("User data truncated, read not long enough\n");
+		}
+		bytes_not_copied = copy_to_user(buf, res->message,
+				message_len);
+		if (bytes_not_copied != 0) {
+			pr_warn("Failed to copy data to user space buffer\n");
+			return -EFAULT;
+		}
+		/* Finally the terminator */
+		bytes_not_copied = copy_to_user(&buf[message_len],
+				&terminator, 1);
+		if (bytes_not_copied != 0) {
+			pr_warn("Failed to copy data to user space buffer\n");
+			return -EFAULT;
+		}
+		return message_len+1;
+	default:
+		/* Fall through - this is a test result */
+		break;
+	}
+
+	/* Note: This code assumes a result is read completely */
+	kutf_result_to_string(&kutf_str_ptr, res->status);
+	if (kutf_str_ptr)
+		kutf_str_len = strlen(kutf_str_ptr);
+
+	if (res->message)
+		message_len = strlen(res->message);
+
+	if ((kutf_str_len + 1 + message_len + 1) > len) {
+		pr_err("Not enough space in user buffer for a single result");
+		return 0;
+	}
+
+	/* First copy the result string */
+	if (kutf_str_ptr) {
+		bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr,
+						kutf_str_len);
+		bytes_copied += kutf_str_len - bytes_not_copied;
+		if (bytes_not_copied)
+			goto exit;
+	}
+
+	/* Then the separator */
+	bytes_not_copied = copy_to_user(&buf[bytes_copied],
+					&separator, 1);
+	bytes_copied += 1 - bytes_not_copied;
+	if (bytes_not_copied)
+		goto exit;
+
+	/* Finally Next copy the result string */
+	if (res->message) {
+		bytes_not_copied = copy_to_user(&buf[bytes_copied],
+						res->message, message_len);
+		bytes_copied += message_len - bytes_not_copied;
+		if (bytes_not_copied)
+			goto exit;
+	}
+
+	/* Finally the terminator */
+	bytes_not_copied = copy_to_user(&buf[bytes_copied],
+					&terminator, 1);
+	bytes_copied += 1 - bytes_not_copied;
+
+exit:
+	return bytes_copied;
+}
+
+/**
+ * kutf_debugfs_run_write() Debugfs write callback for the "run" entry.
+ * @file:	Opened file to write to
+ * @buf:	User buffer to read the data from
+ * @len:	Amount of data to write
+ * @ppos:	Offset into file to write to
+ *
+ * This function allows user and kernel to exchange extra data necessary for
+ * the test fixture.
+ *
+ * The data is added to the first struct kutf_context running the fixture
+ *
+ * Return: Number of bytes written
+ */
+static ssize_t kutf_debugfs_run_write(struct file *file,
+		const char __user *buf, size_t len, loff_t *ppos)
+{
+	int ret = 0;
+	struct kutf_context *test_context = file->private_data;
+
+	if (len > KUTF_MAX_LINE_LENGTH)
+		return -EINVAL;
+
+	ret = kutf_helper_input_enqueue(test_context, buf, len);
+	if (ret < 0)
+		return ret;
+
+	return len;
+}
+
+/**
+ * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry.
+ * @inode:	File entry representation
+ * @file:	A specific opening of the file
+ *
+ * Release any resources that were created during the opening of the file
+ *
+ * Note that resources may not be released immediately, that might only happen
+ * later when other users of the kutf_context release their refcount.
+ *
+ * Return: 0 on success
+ */
+static int kutf_debugfs_run_release(struct inode *inode, struct file *file)
+{
+	struct kutf_context *test_context = file->private_data;
+
+	kutf_helper_input_enqueue_end_of_data(test_context);
+
+	kutf_context_put(test_context);
+	return 0;
+}
+
+static const struct file_operations kutf_debugfs_run_ops = {
+	.owner = THIS_MODULE,
+	.open = kutf_debugfs_run_open,
+	.read = kutf_debugfs_run_read,
+	.write = kutf_debugfs_run_write,
+	.release = kutf_debugfs_run_release,
+	.llseek  = default_llseek,
+};
+
+/**
+ * create_fixture_variant() - Creates a fixture variant for the specified
+ *                            test function and index and the debugfs entries
+ *                            that represent it.
+ * @test_func:		Test function
+ * @fixture_index:	Fixture index
+ *
+ * Return: 0 on success, negative value corresponding to error code in failure
+ */
+static int create_fixture_variant(struct kutf_test_function *test_func,
+		unsigned int fixture_index)
+{
+	struct kutf_test_fixture *test_fix;
+	char name[11];	/* Enough to print the MAX_UINT32 + the null terminator */
+	struct dentry *tmp;
+	int err;
+
+	test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL);
+	if (!test_fix) {
+		pr_err("Failed to create debugfs directory when adding fixture\n");
+		err = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	test_fix->test_func = test_func;
+	test_fix->fixture_index = fixture_index;
+
+	snprintf(name, sizeof(name), "%d", fixture_index);
+	test_fix->dir = debugfs_create_dir(name, test_func->dir);
+	if (!test_func->dir) {
+		pr_err("Failed to create debugfs directory when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_dir;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_file;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	tmp = debugfs_create_file_unsafe(
+#else
+	tmp = debugfs_create_file(
+#endif
+			"run", 0600, test_fix->dir,
+			test_fix,
+			&kutf_debugfs_run_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"run\" when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_file;
+	}
+
+	list_add(&test_fix->node, &test_func->variant_list);
+	return 0;
+
+fail_file:
+	debugfs_remove_recursive(test_fix->dir);
+fail_dir:
+	kfree(test_fix);
+fail_alloc:
+	return err;
+}
+
+/**
+ * kutf_remove_test_variant() - Destroy a previously created fixture variant.
+ * @test_fix:	Test fixture
+ */
+static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix)
+{
+	debugfs_remove_recursive(test_fix->dir);
+	kfree(test_fix);
+}
+
+void kutf_add_test_with_filters_and_data(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data test_data)
+{
+	struct kutf_test_function *test_func;
+	struct dentry *tmp;
+	unsigned int i;
+
+	test_func = kmalloc(sizeof(*test_func), GFP_KERNEL);
+	if (!test_func) {
+		pr_err("Failed to allocate memory when adding test %s\n", name);
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&test_func->variant_list);
+
+	test_func->dir = debugfs_create_dir(name, suite->dir);
+	if (!test_func->dir) {
+		pr_err("Failed to create debugfs directory when adding test %s\n", name);
+		goto fail_dir;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	test_func->filters = filters;
+	tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir,
+				 &test_func->filters);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	test_func->test_id = id;
+	tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir,
+				 &test_func->test_id);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	for (i = 0; i < suite->fixture_variants; i++) {
+		if (create_fixture_variant(test_func, i)) {
+			pr_err("Failed to create fixture %d when adding test %s\n", i, name);
+			goto fail_file;
+		}
+	}
+
+	test_func->suite = suite;
+	test_func->execute = execute;
+	test_func->test_data = test_data;
+
+	list_add(&test_func->node, &suite->test_list);
+	return;
+
+fail_file:
+	debugfs_remove_recursive(test_func->dir);
+fail_dir:
+	kfree(test_func);
+fail_alloc:
+	return;
+}
+EXPORT_SYMBOL(kutf_add_test_with_filters_and_data);
+
+void kutf_add_test_with_filters(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters)
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+
+	kutf_add_test_with_filters_and_data(suite,
+					    id,
+					    name,
+					    execute,
+					    suite->suite_default_flags,
+					    data);
+}
+EXPORT_SYMBOL(kutf_add_test_with_filters);
+
+void kutf_add_test(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context))
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+
+	kutf_add_test_with_filters_and_data(suite,
+					    id,
+					    name,
+					    execute,
+					    suite->suite_default_flags,
+					    data);
+}
+EXPORT_SYMBOL(kutf_add_test);
+
+/**
+ * kutf_remove_test(): Remove a previously added test function.
+ * @test_func: Test function
+ */
+static void kutf_remove_test(struct kutf_test_function *test_func)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &test_func->variant_list) {
+		struct kutf_test_fixture *test_fix;
+
+		test_fix = list_entry(pos, struct kutf_test_fixture, node);
+		kutf_remove_test_variant(test_fix);
+	}
+
+	list_del(&test_func->node);
+	debugfs_remove_recursive(test_func->dir);
+	kfree(test_func);
+}
+
+struct kutf_suite *kutf_create_suite_with_filters_and_data(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data suite_data)
+{
+	struct kutf_suite *suite;
+	struct dentry *tmp;
+
+	suite = kmalloc(sizeof(*suite), GFP_KERNEL);
+	if (!suite) {
+		pr_err("Failed to allocate memory when creating suite %s\n", name);
+		goto fail_kmalloc;
+	}
+
+	suite->dir = debugfs_create_dir(name, app->dir);
+	if (!suite->dir) {
+		pr_err("Failed to create debugfs directory when adding test %s\n", name);
+		goto fail_debugfs;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	INIT_LIST_HEAD(&suite->test_list);
+	suite->app = app;
+	suite->name = name;
+	suite->fixture_variants = fixture_count;
+	suite->create_fixture = create_fixture;
+	suite->remove_fixture = remove_fixture;
+	suite->suite_default_flags = filters;
+	suite->suite_data = suite_data;
+
+	list_add(&suite->node, &app->suite_list);
+
+	return suite;
+
+fail_file:
+	debugfs_remove_recursive(suite->dir);
+fail_debugfs:
+	kfree(suite);
+fail_kmalloc:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data);
+
+struct kutf_suite *kutf_create_suite_with_filters(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters)
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+	return kutf_create_suite_with_filters_and_data(app,
+						       name,
+						       fixture_count,
+						       create_fixture,
+						       remove_fixture,
+						       filters,
+						       data);
+}
+EXPORT_SYMBOL(kutf_create_suite_with_filters);
+
+struct kutf_suite *kutf_create_suite(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context))
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+	return kutf_create_suite_with_filters_and_data(app,
+						       name,
+						       fixture_count,
+						       create_fixture,
+						       remove_fixture,
+						       KUTF_F_TEST_GENERIC,
+						       data);
+}
+EXPORT_SYMBOL(kutf_create_suite);
+
+/**
+ * kutf_destroy_suite() - Destroy a previously added test suite.
+ * @suite:	Test suite
+ */
+static void kutf_destroy_suite(struct kutf_suite *suite)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &suite->test_list) {
+		struct kutf_test_function *test_func;
+
+		test_func = list_entry(pos, struct kutf_test_function, node);
+		kutf_remove_test(test_func);
+	}
+
+	list_del(&suite->node);
+	debugfs_remove_recursive(suite->dir);
+	kfree(suite);
+}
+
+struct kutf_application *kutf_create_application(const char *name)
+{
+	struct kutf_application *app;
+	struct dentry *tmp;
+
+	app = kmalloc(sizeof(*app), GFP_KERNEL);
+	if (!app) {
+		pr_err("Failed to create allocate memory when creating application %s\n", name);
+		goto fail_kmalloc;
+	}
+
+	app->dir = debugfs_create_dir(name, base_dir);
+	if (!app->dir) {
+		pr_err("Failed to create debugfs direcotry when creating application %s\n", name);
+		goto fail_debugfs;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name);
+		goto fail_file;
+	}
+
+	INIT_LIST_HEAD(&app->suite_list);
+	app->name = name;
+
+	return app;
+
+fail_file:
+	debugfs_remove_recursive(app->dir);
+fail_debugfs:
+	kfree(app);
+fail_kmalloc:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_create_application);
+
+void kutf_destroy_application(struct kutf_application *app)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &app->suite_list) {
+		struct kutf_suite *suite;
+
+		suite = list_entry(pos, struct kutf_suite, node);
+		kutf_destroy_suite(suite);
+	}
+
+	debugfs_remove_recursive(app->dir);
+	kfree(app);
+}
+EXPORT_SYMBOL(kutf_destroy_application);
+
+static struct kutf_context *kutf_create_context(
+		struct kutf_test_fixture *test_fix)
+{
+	struct kutf_context *new_context;
+
+	new_context = kmalloc(sizeof(*new_context), GFP_KERNEL);
+	if (!new_context) {
+		pr_err("Failed to allocate test context");
+		goto fail_alloc;
+	}
+
+	new_context->result_set = kutf_create_result_set();
+	if (!new_context->result_set) {
+		pr_err("Failed to create result set");
+		goto fail_result_set;
+	}
+
+	new_context->test_fix = test_fix;
+	/* Save the pointer to the suite as the callbacks will require it */
+	new_context->suite = test_fix->test_func->suite;
+	new_context->status = KUTF_RESULT_UNKNOWN;
+	new_context->expected_status = KUTF_RESULT_UNKNOWN;
+
+	kutf_mempool_init(&new_context->fixture_pool);
+	new_context->fixture = NULL;
+	new_context->fixture_index = test_fix->fixture_index;
+	new_context->fixture_name = NULL;
+	new_context->test_data = test_fix->test_func->test_data;
+
+	new_context->userdata.flags = 0;
+	INIT_LIST_HEAD(&new_context->userdata.input_head);
+	init_waitqueue_head(&new_context->userdata.input_waitq);
+
+	INIT_WORK(&new_context->work, kutf_run_test);
+
+	kref_init(&new_context->kref);
+
+	return new_context;
+
+fail_result_set:
+	kfree(new_context);
+fail_alloc:
+	return NULL;
+}
+
+static void kutf_destroy_context(struct kref *kref)
+{
+	struct kutf_context *context;
+
+	context = container_of(kref, struct kutf_context, kref);
+	kutf_destroy_result_set(context->result_set);
+	kutf_mempool_destroy(&context->fixture_pool);
+	kfree(context);
+}
+
+static void kutf_context_get(struct kutf_context *context)
+{
+	kref_get(&context->kref);
+}
+
+static void kutf_context_put(struct kutf_context *context)
+{
+	kref_put(&context->kref, kutf_destroy_context);
+}
+
+
+static void kutf_set_result(struct kutf_context *context,
+		enum kutf_result_status status)
+{
+	context->status = status;
+}
+
+static void kutf_set_expected_result(struct kutf_context *context,
+		enum kutf_result_status expected_status)
+{
+	context->expected_status = expected_status;
+}
+
+/**
+ * kutf_test_log_result() - Log a result for the specified test context
+ * @context:	Test context
+ * @message:	Result string
+ * @new_status:	Result status
+ */
+static void kutf_test_log_result(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status)
+{
+	if (context->status < new_status)
+		context->status = new_status;
+
+	if (context->expected_status != new_status)
+		kutf_add_result(context, new_status, message);
+}
+
+void kutf_test_log_result_external(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status)
+{
+	kutf_test_log_result(context, message, new_status);
+}
+EXPORT_SYMBOL(kutf_test_log_result_external);
+
+void kutf_test_expect_abort(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_ABORT);
+}
+EXPORT_SYMBOL(kutf_test_expect_abort);
+
+void kutf_test_expect_fatal(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_FATAL);
+}
+EXPORT_SYMBOL(kutf_test_expect_fatal);
+
+void kutf_test_expect_fail(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_FAIL);
+}
+EXPORT_SYMBOL(kutf_test_expect_fail);
+
+void kutf_test_expect_warn(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_WARN);
+}
+EXPORT_SYMBOL(kutf_test_expect_warn);
+
+void kutf_test_expect_pass(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_PASS);
+}
+EXPORT_SYMBOL(kutf_test_expect_pass);
+
+void kutf_test_skip(struct kutf_context *context)
+{
+	kutf_set_result(context, KUTF_RESULT_SKIP);
+	kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN);
+
+	kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP);
+}
+EXPORT_SYMBOL(kutf_test_skip);
+
+void kutf_test_skip_msg(struct kutf_context *context, const char *message)
+{
+	kutf_set_result(context, KUTF_RESULT_SKIP);
+	kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN);
+
+	kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool,
+			     "Test skipped: %s", message), KUTF_RESULT_SKIP);
+	kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP);
+}
+EXPORT_SYMBOL(kutf_test_skip_msg);
+
+void kutf_test_debug(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_DEBUG);
+}
+EXPORT_SYMBOL(kutf_test_debug);
+
+void kutf_test_pass(struct kutf_context *context, char const *message)
+{
+	static const char explicit_message[] = "(explicit pass)";
+
+	if (!message)
+		message = explicit_message;
+
+	kutf_test_log_result(context, message, KUTF_RESULT_PASS);
+}
+EXPORT_SYMBOL(kutf_test_pass);
+
+void kutf_test_info(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_INFO);
+}
+EXPORT_SYMBOL(kutf_test_info);
+
+void kutf_test_warn(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_WARN);
+}
+EXPORT_SYMBOL(kutf_test_warn);
+
+void kutf_test_fail(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_FAIL);
+}
+EXPORT_SYMBOL(kutf_test_fail);
+
+void kutf_test_fatal(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_FATAL);
+}
+EXPORT_SYMBOL(kutf_test_fatal);
+
+void kutf_test_abort(struct kutf_context *context)
+{
+	kutf_test_log_result(context, "", KUTF_RESULT_ABORT);
+}
+EXPORT_SYMBOL(kutf_test_abort);
+
+/**
+ * init_kutf_core() - Module entry point.
+ *
+ * Create the base entry point in debugfs.
+ */
+static int __init init_kutf_core(void)
+{
+	kutf_workq = alloc_workqueue("kutf workq", WQ_UNBOUND, 1);
+	if (!kutf_workq)
+		return -ENOMEM;
+
+	base_dir = debugfs_create_dir("kutf_tests", NULL);
+	if (!base_dir) {
+		destroy_workqueue(kutf_workq);
+		kutf_workq = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * exit_kutf_core() - Module exit point.
+ *
+ * Remove the base entry point in debugfs.
+ */
+static void __exit exit_kutf_core(void)
+{
+	debugfs_remove_recursive(base_dir);
+
+	if (kutf_workq)
+		destroy_workqueue(kutf_workq);
+}
+
+#else	/* defined(CONFIG_DEBUG_FS) */
+
+/**
+ * init_kutf_core() - Module entry point.
+ *
+ * Stub for when build against a kernel without debugfs support
+ */
+static int __init init_kutf_core(void)
+{
+	pr_debug("KUTF requires a kernel with debug fs support");
+
+	return -ENODEV;
+}
+
+/**
+ * exit_kutf_core() - Module exit point.
+ *
+ * Stub for when build against a kernel without debugfs support
+ */
+static void __exit exit_kutf_core(void)
+{
+}
+#endif	/* defined(CONFIG_DEBUG_FS) */
+
+MODULE_LICENSE("GPL");
+
+module_init(init_kutf_core);
+module_exit(exit_kutf_core);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c
new file mode 100644
index 0000000..a429a2d
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* Kernel UTF utility functions */
+
+#include <linux/mutex.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_mem.h>
+
+static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN];
+
+DEFINE_MUTEX(buffer_lock);
+
+const char *kutf_dsprintf(struct kutf_mempool *pool,
+		const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	int size;
+	void *buffer;
+
+	mutex_lock(&buffer_lock);
+	va_start(args, fmt);
+	len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args);
+	va_end(args);
+
+	if (len < 0) {
+		pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt);
+		goto fail_format;
+	}
+
+	if (len >= sizeof(tmp_buffer)) {
+		pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt);
+		size = sizeof(tmp_buffer);
+	} else {
+		size = len + 1;
+	}
+
+	buffer = kutf_mempool_alloc(pool, size);
+	if (!buffer)
+		goto fail_alloc;
+
+	memcpy(buffer, tmp_buffer, size);
+	mutex_unlock(&buffer_lock);
+
+	return buffer;
+
+fail_alloc:
+fail_format:
+	mutex_unlock(&buffer_lock);
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_dsprintf);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/sconscript b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/sconscript
new file mode 100644
index 0000000..d7f1124
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/kutf/sconscript
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2014-2016, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import('kutf_env')
+
+make_args = kutf_env.kernel_get_config_defines(ret_list = True)
+
+mod = kutf_env.BuildKernelModule('$STATIC_LIB_PATH/kutf.ko', Glob('*.c'), make_args = make_args)
+kutf_env.KernelObjTarget('kutf', mod)
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild
new file mode 100644
index 0000000..0cd9ceb
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android
+
+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o
+
+mali_kutf_irq_test-y := mali_kutf_irq_test_main.o
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig b/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig
new file mode 100644
index 0000000..f4553d3
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config MALI_IRQ_LATENCY
+ tristate "Mali GPU IRQ latency measurement"
+ depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF
+ default m
+ help
+   This option will build a test module mali_kutf_irq_test that
+   can determine the latency of the Mali GPU IRQ on your system.
+   Choosing M here will generate a single module called mali_kutf_irq_test.
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
new file mode 100644
index 0000000..2ac4f97
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
@@ -0,0 +1,47 @@
+#
+# (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+TEST_CCFLAGS := \
+	-DMALI_DEBUG=$(MALI_DEBUG) \
+	-DMALI_BACKEND_KERNEL=$(MALI_BACKEND_KERNEL) \
+	-DMALI_NO_MALI=$(MALI_NO_MALI) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_USE_UMP=$(MALI_USE_UMP) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	$(SCONS_CFLAGS) \
+	-I$(CURDIR)/../include \
+	-I$(CURDIR)/../../../../../../include \
+	-I$(CURDIR)/../../../ \
+	-I$(CURDIR)/../../ \
+	-I$(CURDIR)/../../backend/gpu \
+	-I$(CURDIR)/ \
+	-I$(srctree)/drivers/staging/android \
+	-I$(srctree)/include/linux
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS="$(TEST_CCFLAGS)" KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../kutf/Module.symvers $(CURDIR)/../../Module.symvers" modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
new file mode 100644
index 0000000..c9cc444
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -0,0 +1,269 @@
+/*
+ *
+ * (C) COPYRIGHT 2016, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include "mali_kbase.h"
+#include <midgard/backend/gpu/mali_kbase_device_internal.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_utils.h>
+
+/*
+ * This file contains the code which is used for measuring interrupt latency
+ * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is
+ * used with this purpose and it is called within KUTF framework - a kernel
+ * unit test framework. The measured latency provided by this test should
+ * be representative for the latency of the Mali JOB/MMU IRQs as well.
+ */
+
+/* KUTF test application pointer for this test */
+struct kutf_application *irq_app;
+
+/**
+ * struct kutf_irq_fixture data - test fixture used by the test functions.
+ * @kbdev:	kbase device for the GPU.
+ *
+ */
+struct kutf_irq_fixture_data {
+	struct kbase_device *kbdev;
+};
+
+#define SEC_TO_NANO(s)	      ((s)*1000000000LL)
+
+/* ID for the GPU IRQ */
+#define GPU_IRQ_HANDLER 2
+
+#define NR_TEST_IRQS 1000000
+
+/* IRQ for the test to trigger. Currently MULTIPLE_GPU_FAULTS as we would not
+ * expect to see this in normal use (e.g., when Android is running). */
+#define TEST_IRQ MULTIPLE_GPU_FAULTS
+
+#define IRQ_TIMEOUT HZ
+
+/* Kernel API for setting irq throttle hook callback and irq time in us*/
+extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+		irq_handler_t custom_handler,
+		int irq_type);
+extern irqreturn_t kbase_gpu_irq_handler(int irq, void *data);
+
+static DECLARE_WAIT_QUEUE_HEAD(wait);
+static bool triggered;
+static u64 irq_time;
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+/**
+ * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler
+ * @irq:  IRQ number
+ * @data: Data associated with this IRQ
+ *
+ * Return: state of the IRQ
+ */
+static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data)
+{
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+	if (val & TEST_IRQ) {
+		struct timespec tval;
+
+		getnstimeofday(&tval);
+		irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val,
+				NULL);
+
+		triggered = true;
+		wake_up(&wait);
+
+		return IRQ_HANDLED;
+	}
+
+	/* Trigger main irq handler */
+	return kbase_gpu_irq_handler(irq, data);
+}
+
+/**
+ * mali_kutf_irq_default_create_fixture() - Creates the fixture data required
+ *                                          for all the tests in the irq suite.
+ * @context:             KUTF context.
+ *
+ * Return: Fixture data created on success or NULL on failure
+ */
+static void *mali_kutf_irq_default_create_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data;
+
+	data = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(struct kutf_irq_fixture_data));
+
+	if (!data)
+		goto fail;
+
+	/* Acquire the kbase device */
+	data->kbdev = kbase_find_device(-1);
+	if (data->kbdev == NULL) {
+		kutf_test_fail(context, "Failed to find kbase device");
+		goto fail;
+	}
+
+	return data;
+
+fail:
+	return NULL;
+}
+
+/**
+ * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously
+ *                          created by mali_kutf_irq_default_create_fixture.
+ *
+ * @context:             KUTF context.
+ */
+static void mali_kutf_irq_default_remove_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+
+	kbase_release_device(kbdev);
+}
+
+/**
+ * mali_kutf_irq_latency() - measure GPU IRQ latency
+ * @context:		kutf context within which to perform the test
+ *
+ * The test triggers IRQs manually, and measures the
+ * time between triggering the IRQ and the IRQ handler being executed.
+ *
+ * This is not a traditional test, in that the pass/fail status has little
+ * meaning (other than indicating that the IRQ handler executed at all). Instead
+ * the results are in the latencies provided with the test result. There is no
+ * meaningful pass/fail result that can be obtained here, instead the latencies
+ * are provided for manual analysis only.
+ */
+static void mali_kutf_irq_latency(struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+	u64 min_time = U64_MAX, max_time = 0, average_time = 0;
+	int i;
+	bool test_failed = false;
+
+	/* Force GPU to be powered */
+	kbase_pm_context_active(kbdev);
+
+	kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler,
+			GPU_IRQ_HANDLER);
+
+	for (i = 0; i < NR_TEST_IRQS; i++) {
+		struct timespec tval;
+		u64 start_time;
+		int ret;
+
+		triggered = false;
+		getnstimeofday(&tval);
+		start_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
+
+		/* Trigger fake IRQ */
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+				TEST_IRQ, NULL);
+
+		ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT);
+
+		if (ret == 0) {
+			kutf_test_fail(context, "Timed out waiting for IRQ\n");
+			test_failed = true;
+			break;
+		}
+
+		if ((irq_time - start_time) < min_time)
+			min_time = irq_time - start_time;
+		if ((irq_time - start_time) > max_time)
+			max_time = irq_time - start_time;
+		average_time += irq_time - start_time;
+
+		udelay(10);
+	}
+
+	/* Go back to default handler */
+	kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER);
+
+	kbase_pm_context_idle(kbdev);
+
+	if (!test_failed) {
+		const char *results;
+
+		do_div(average_time, NR_TEST_IRQS);
+		results = kutf_dsprintf(&context->fixture_pool,
+				"Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n",
+				min_time, max_time, average_time);
+		kutf_test_pass(context, results);
+	}
+}
+
+/**
+ * Module entry point for this test.
+ */
+int mali_kutf_irq_test_main_init(void)
+{
+	struct kutf_suite *suite;
+
+	irq_app = kutf_create_application("irq");
+
+	if (NULL == irq_app) {
+		pr_warn("Creation of test application failed!\n");
+		return -ENOMEM;
+	}
+
+	suite = kutf_create_suite(irq_app, "irq_default",
+			1, mali_kutf_irq_default_create_fixture,
+			mali_kutf_irq_default_remove_fixture);
+
+	if (NULL == suite) {
+		pr_warn("Creation of test suite failed!\n");
+		kutf_destroy_application(irq_app);
+		return -ENOMEM;
+	}
+
+	kutf_add_test(suite, 0x0, "irq_latency",
+			mali_kutf_irq_latency);
+	return 0;
+}
+
+/**
+ * Module exit point for this test.
+ */
+void mali_kutf_irq_test_main_exit(void)
+{
+	kutf_destroy_application(irq_app);
+}
+
+module_init(mali_kutf_irq_test_main_init);
+module_exit(mali_kutf_irq_test_main_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript b/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
new file mode 100644
index 0000000..280fa98
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import os
+Import('env')
+
+src = [Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/*.c'), Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile')]
+
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[CLEAN] mali_kutf_irq_test'))
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [])
+	env.KernelObjTarget('mali_kutf_irq_test', cmd)
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} MALI_BACKEND_KERNEL=1 MALI_ERROR_INJECT_ON=${error_inject} MALI_NO_MALI=${no_mali} MALI_UNIT_TEST=${unit} MALI_USE_UMP=${ump} MALI_CUSTOMER_RELEASE=${release} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction])
+	env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko')
+	env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko')
+	env.KernelObjTarget('mali_kutf_irq_test', cmd)
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/tests/sconscript b/t83x/kernel/drivers/gpu/arm/midgard/tests/sconscript
new file mode 100644
index 0000000..0458411
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/midgard/tests/sconscript
@@ -0,0 +1,38 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+Import ('env')
+
+kutf_env = env.Clone()
+kutf_env.Append(CPPPATH = '#kernel/drivers/gpu/arm/midgard/tests/include')
+Export('kutf_env')
+
+if Glob('internal/sconscript'):
+	SConscript('internal/sconscript')
+
+if kutf_env['debug'] == '1':
+	SConscript('kutf/sconscript')
+	SConscript('mali_kutf_irq_test/sconscript')
+
+	if Glob('kutf_test/sconscript'):
+		SConscript('kutf_test/sconscript')
+
+	if Glob('kutf_test_runner/sconscript'):
+		SConscript('kutf_test_runner/sconscript')
+
+if env['unit'] == '1':
+	SConscript('mali_kutf_ipa_test/sconscript')
+	SConscript('mali_kutf_ipa_unit_test/sconscript')
+	SConscript('mali_kutf_vinstr_test/sconscript')
diff --git a/t83x/kernel/drivers/gpu/arm/sconscript b/t83x/kernel/drivers/gpu/arm/sconscript
new file mode 100644
index 0000000..924511b
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/arm/sconscript
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+import glob
+
+
+SConscript('midgard/sconscript')
+
diff --git a/t83x/kernel/drivers/gpu/sconscript b/t83x/kernel/drivers/gpu/sconscript
new file mode 100644
index 0000000..b81fb4d
--- /dev/null
+++ b/t83x/kernel/drivers/gpu/sconscript
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010-2013,2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+SConscript('arm/sconscript')
+
diff --git a/t83x/kernel/drivers/sconscript b/t83x/kernel/drivers/sconscript
new file mode 100644
index 0000000..87e231a
--- /dev/null
+++ b/t83x/kernel/drivers/sconscript
@@ -0,0 +1,22 @@
+#
+# (C) COPYRIGHT 2010-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+if Glob('base/sconscript'):
+	SConscript('base/sconscript')
+
+if Glob('video/sconscript'):
+	SConscript('video/sconscript')
+
+SConscript('gpu/sconscript')
diff --git a/t83x/kernel/include/linux/dma-buf-test-exporter.h b/t83x/kernel/include/linux/dma-buf-test-exporter.h
new file mode 100644
index 0000000..98984ba
--- /dev/null
+++ b/t83x/kernel/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/t83x/kernel/include/linux/protected_mode_switcher.h b/t83x/kernel/include/linux/protected_mode_switcher.h
new file mode 100644
index 0000000..5dc2f3b
--- /dev/null
+++ b/t83x/kernel/include/linux/protected_mode_switcher.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PROTECTED_MODE_SWITCH_H_
+#define _PROTECTED_MODE_SWITCH_H_
+
+struct protected_mode_device;
+
+/**
+ * struct protected_mode_ops - Callbacks for protected mode switch operations
+ *
+ * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_disable: Callback to disable protected mode for device
+ */
+struct protected_mode_ops {
+	/**
+	 * protected_mode_enable() - Enable protected mode on device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enable)(
+			struct protected_mode_device *protected_dev);
+
+	/**
+	 * protected_mode_disable() - Disable protected mode on device, and
+	 *                            reset device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_disable)(
+			struct protected_mode_device *protected_dev);
+};
+
+/**
+ * struct protected_mode_device - Device structure for protected mode devices
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_mode_device {
+	struct protected_mode_ops ops;
+	void *data;
+};
+
+#endif /* _PROTECTED_MODE_SWITCH_H_ */
diff --git a/t83x/kernel/include/linux/ump-common.h b/t83x/kernel/include/linux/ump-common.h
new file mode 100644
index 0000000..0015bda
--- /dev/null
+++ b/t83x/kernel/include/linux/ump-common.h
@@ -0,0 +1,252 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/t83x/kernel/include/linux/ump-import.h b/t83x/kernel/include/linux/ump-import.h
new file mode 100644
index 0000000..89ce727
--- /dev/null
+++ b/t83x/kernel/include/linux/ump-import.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/t83x/kernel/include/linux/ump-ioctl.h b/t83x/kernel/include/linux/ump-ioctl.h
new file mode 100644
index 0000000..caf4c0b
--- /dev/null
+++ b/t83x/kernel/include/linux/ump-ioctl.h
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H_
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/t83x/kernel/include/linux/ump.h b/t83x/kernel/include/linux/ump.h
new file mode 100644
index 0000000..16f9c39
--- /dev/null
+++ b/t83x/kernel/include/linux/ump.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a true to indicate that access to the handle is allowed or @n
+ * @a false to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a true to permit access
+ * @li @a false to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+#ifdef CONFIG_KDS
+/**
+ * Retrieve the KDS resource for the specified UMP memory.
+ *
+ * The KDS resource should be used to synchronize access to the UMP allocation.
+ * See the KDS API for how to do that.
+ *
+ * @param mem Handle to the UMP memory to query.
+ * @return Pointer to the KDS resource controlling access to the UMP memory.
+ */
+UMP_KERNEL_API_EXPORT struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem);
+#endif
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
diff --git a/t83x/kernel/license.txt b/t83x/kernel/license.txt
new file mode 100644
index 0000000..77c14bd
--- /dev/null
+++ b/t83x/kernel/license.txt
@@ -0,0 +1,198 @@
+GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE
+
+THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE.
+
+
+
+Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form.
+
+
+
+GPL Licence
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+Version 2, June 1991
+
+
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+
+
+Preamble
+
+
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
+
+
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+
+
+GNU GENERAL PUBLIC LICENSE
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program).
+
+Whether that is true depends on what the Program does.
+
+
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty;
+
+and give any other recipients of the Program a copy of this License along with the Program.
+
+
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the
+
+Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein.
+
+You are not responsible for enforcing compliance by third parties to this License.
+
+
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+
+
+NO WARRANTY
+
+
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+/end
+
diff --git a/t83x/kernel/sconscript b/t83x/kernel/sconscript
new file mode 100644
index 0000000..79d94b8
--- /dev/null
+++ b/t83x/kernel/sconscript
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+Import('env')
+
+if env['backend'] == 'kernel' and int(env['kernel_modules']) == 1:
+	SConscript('drivers/sconscript')
diff --git a/ump/.version b/ump/.version
new file mode 100755
index 0000000..4175ceb
--- /dev/null
+++ b/ump/.version
@@ -0,0 +1 @@
+r7p0-00rel0
diff --git a/ump/Kbuild b/ump/Kbuild
new file mode 100755
index 0000000..72db5e2
--- /dev/null
+++ b/ump/Kbuild
@@ -0,0 +1,99 @@
+#
+# Copyright (C) 2010-2012 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+# Set default configuration to use, if Makefile didn't provide one.
+# Change this to use a different config.h
+TARGET_PLATFORM ?= aml-meson
+CONFIG ?= aml-meson-m400-1
+
+
+# Validate selected config
+ifneq ($(shell [ -d $(src)/arch-$(CONFIG) ] && [ -f  $(src)/arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(src))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L $(src)/arch ] && rm $(src)/arch)
+$(shell ln -sf arch-$(CONFIG) $(src)/arch)
+$(shell touch $(src)/arch/config.h)
+endif
+
+UDD_FILE_PREFIX = ../mali/
+
+# Get subversion revision number, fall back to 0000 if no svn info is available
+SVN_INFO = (cd $(src); svn info 2>/dev/null)
+
+ifneq ($(shell $(SVN_INFO) 2>/dev/null),)
+# SVN detected
+SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null)
+DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV)
+CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-)
+CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-)
+REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-)
+else # SVN
+GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null)
+ifneq ($(GIT_REV),)
+# Git detected
+DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV)
+CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci")
+CHANGED_REVISION := $(GIT_REV)
+REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null)
+else # Git
+# No Git or SVN detected
+DRIVER_REV := $(MALI_RELEASE_NAME)
+CHANGE_DATE := $(MALI_RELEASE_NAME)
+CHANGED_REVISION := $(MALI_RELEASE_NAME)
+endif
+endif
+ccflags-y += -DSVN_REV=$(SVN_REV)
+ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\"
+
+ccflags-y += -I$(src) -I$(src)/common -I$(src)/linux -I$(src)/../mali/common -I$(src)/../mali/linux -I$(src)/include -I$(src)/../../ump/include/ump
+ccflags-y += -DMALI_STATE_TRACKING=0
+ccflags-y += -DMALI_ENABLE_CPU_CYCLES=0
+ccflags-$(CONFIG_UMP_DEBUG) += -DDEBUG
+
+# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases:
+# The ARM proprietary product will only include the license/proprietary directory
+# The GPL product will only include the license/gpl directory
+
+ifeq ($(wildcard $(src)/linux/license/gpl/*),)
+ccflags-y += -I$(src)/linux/license/proprietary -I$(src)/../mali/linux/license/proprietary
+else
+ccflags-y += -I$(src)/linux/license/gpl -I$(src)/../mali/linux/license/gpl
+endif
+
+
+
+ump-y = common/ump_kernel_common.o \
+	common/ump_kernel_descriptor_mapping.o \
+	common/ump_kernel_api.o \
+	common/ump_kernel_ref_drv.o \
+	linux/ump_kernel_linux.o \
+	linux/ump_kernel_memory_backend_os.o \
+	linux/ump_kernel_memory_backend_dedicated.o \
+	linux/ump_memory_backend.o \
+	linux/ump_ukk_wrappers.o \
+	linux/ump_ukk_ref_wrappers.o \
+	linux/ump_osk_atomics.o \
+	linux/ump_osk_low_level_mem.o \
+	linux/ump_osk_misc.o \
+	linux/ump_kernel_random_mapping.o 
+
+ifneq ($(CONFIG_MALI400),y)	
+	$(UDD_FILE_PREFIX)linux/mali_osk_atomics.o \
+	$(UDD_FILE_PREFIX)linux/mali_osk_locks.o \
+	$(UDD_FILE_PREFIX)linux/mali_osk_memory.o \
+	$(UDD_FILE_PREFIX)linux/mali_osk_math.o \
+	$(UDD_FILE_PREFIX)linux/mali_osk_misc.o
+endif
+
+obj-$(CONFIG_UMP) := ump.o
+
diff --git a/ump/Kconfig b/ump/Kconfig
new file mode 100755
index 0000000..8a75854
--- /dev/null
+++ b/ump/Kconfig
@@ -0,0 +1,19 @@
+menu "Mali 400 UMP device driver"
+config UMP
+	tristate "UMP support"
+	depends on ARM
+	default n
+	---help---
+	  This enables support for the UMP memory allocation and sharing API.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called ump.
+
+config UMP_DEBUG
+	bool "Enable extra debug in UMP"
+	depends on UMP
+	default n
+	---help---
+	  This enabled extra debug checks and messages in UMP.
+
+endmenu
diff --git a/ump/Makefile b/ump/Makefile
new file mode 100755
index 0000000..161b006
--- /dev/null
+++ b/ump/Makefile
@@ -0,0 +1,75 @@
+#
+# Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+# Skip this Makefile when included from modpost
+ifeq ($(KBUILD_EXTMOD),)
+
+TARGET_PLATFORM ?= aml-meson
+CONFIG ?= aml-meson-m400-1
+
+# For each arch check: CROSS_COMPILE , KDIR , CFLAGS += -DARCH
+
+export ARCH ?= arm
+BUILD ?= debug
+
+check_cc2 = \
+	$(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \
+	then \
+		echo "$(2)"; \
+	else \
+		echo "$(3)"; \
+	fi ;)
+
+# Check that required parameters are supplied.
+ifeq ($(CONFIG),)
+CONFIG := default
+endif
+ifeq ($(CPU)$(KDIR),)
+$(error "KDIR or CPU must be specified.")
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+ifeq ($(ARCH), arm)
+	# when compiling for ARM we're cross compiling
+export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-)
+endif
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+export CONFIG
+
+export CONFIG_UMP := m
+ifeq ($(BUILD),debug)
+export CONFIG_UMP_DEBUG := y
+else
+export CONFIG_UMP_DEBUG := n
+endif
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) kernelrelease
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
+	$(MAKE) -C $(KDIR) M=$(CURDIR)/../mali clean
+
+endif
diff --git a/ump/Makefile.common b/ump/Makefile.common
new file mode 100755
index 0000000..00153de
--- /dev/null
+++ b/ump/Makefile.common
@@ -0,0 +1,20 @@
+#
+# Copyright (C) 2010-2011, 2013, 2016 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+SRC = $(UMP_FILE_PREFIX)common/ump_kernel_common.c \
+	$(UMP_FILE_PREFIX)common/ump_kernel_descriptor_mapping.c \
+	$(UMP_FILE_PREFIX)common/ump_kernel_api.c \
+	$(UMP_FILE_PREFIX)common/ump_kernel_ref_drv.c
+
+# Get subversion revision number, fall back to 0000 if no svn info is available
+SVN_REV:=$(shell ((svnversion | grep -qv exported && echo -n 'Revision: ' && svnversion) || git svn info | sed -e 's/$$$$/M/' | grep '^Revision: ' || echo ${MALI_RELEASE_NAME}) 2>/dev/null | sed -e 's/^Revision: //')
+
+EXTRA_CFLAGS += -DSVN_REV=$(SVN_REV)
+EXTRA_CFLAGS += -DSVN_REV_STRING=\"$(SVN_REV)\"
diff --git a/ump/arch b/ump/arch
new file mode 120000
index 0000000..b2d568a
--- /dev/null
+++ b/ump/arch
@@ -0,0 +1 @@
+arch-aml-meson-m400-1
\ No newline at end of file
diff --git a/ump/arch-aml-meson-m400-1/config.h b/ump/arch-aml-meson-m400-1/config.h
new file mode 100644
index 0000000..a8d1991
--- /dev/null
+++ b/ump/arch-aml-meson-m400-1/config.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2010 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 256UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/ump/arch-pb-virtex5/config.h b/ump/arch-pb-virtex5/config.h
new file mode 100755
index 0000000..9160ce1
--- /dev/null
+++ b/ump/arch-pb-virtex5/config.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2010-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          0
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0xE1000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 16UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/ump/common/ump_kernel_api.c b/ump/common/ump_kernel_api.c
new file mode 100755
index 0000000..2ad4ae5
--- /dev/null
+++ b/ump/common/ump_kernel_api.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "ump_osk.h"
+#include "ump_uk_types.h"
+#include "ump_kernel_interface.h"
+#include "ump_kernel_common.h"
+#include "ump_kernel_random_mapping.h"
+
+
+
+/* ---------------- UMP kernel space API functions follows ---------------- */
+
+
+
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(ump_dd_handle memh)
+{
+	ump_dd_mem *mem = (ump_dd_mem *)memh;
+
+	DEBUG_ASSERT_POINTER(mem);
+
+	DBG_MSG(5, ("Returning secure ID. ID: %u\n", mem->secure_id));
+
+	return mem->secure_id;
+}
+
+
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	ump_dd_mem *mem;
+
+	DBG_MSG(5, ("Getting handle from secure ID. ID: %u\n", secure_id));
+	mem = ump_random_mapping_get(device.secure_id_map, (int)secure_id);
+	if (NULL == mem) {
+		DBG_MSG(1, ("Secure ID not found. ID: %u\n", secure_id));
+		return UMP_DD_HANDLE_INVALID;
+	}
+
+	/* Keep the reference taken in ump_random_mapping_get() */
+
+	return (ump_dd_handle)mem;
+}
+
+
+
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle memh)
+{
+	ump_dd_mem *mem = (ump_dd_mem *) memh;
+
+	DEBUG_ASSERT_POINTER(mem);
+
+	return mem->nr_blocks;
+}
+
+
+
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle memh, ump_dd_physical_block *blocks, unsigned long num_blocks)
+{
+	ump_dd_mem *mem = (ump_dd_mem *)memh;
+
+	DEBUG_ASSERT_POINTER(mem);
+
+	if (blocks == NULL) {
+		DBG_MSG(1, ("NULL parameter in ump_dd_phys_blocks_get()\n"));
+		return UMP_DD_INVALID;
+	}
+
+	if (mem->nr_blocks != num_blocks) {
+		DBG_MSG(1, ("Specified number of blocks do not match actual number of blocks\n"));
+		return UMP_DD_INVALID;
+	}
+
+	DBG_MSG(5, ("Returning physical block information. ID: %u\n", mem->secure_id));
+
+	_mali_osk_memcpy(blocks, mem->block_array, sizeof(ump_dd_physical_block) * mem->nr_blocks);
+
+	return UMP_DD_SUCCESS;
+}
+
+
+
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle memh, unsigned long index, ump_dd_physical_block *block)
+{
+	ump_dd_mem *mem = (ump_dd_mem *)memh;
+
+	DEBUG_ASSERT_POINTER(mem);
+
+	if (block == NULL) {
+		DBG_MSG(1, ("NULL parameter in ump_dd_phys_block_get()\n"));
+		return UMP_DD_INVALID;
+	}
+
+	if (index >= mem->nr_blocks) {
+		DBG_MSG(5, ("Invalid index specified in ump_dd_phys_block_get()\n"));
+		return UMP_DD_INVALID;
+	}
+
+	DBG_MSG(5, ("Returning physical block information. ID: %u, index: %lu\n", mem->secure_id, index));
+
+	*block = mem->block_array[index];
+
+	return UMP_DD_SUCCESS;
+}
+
+
+
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle memh)
+{
+	ump_dd_mem *mem = (ump_dd_mem *)memh;
+
+	DEBUG_ASSERT_POINTER(mem);
+
+	DBG_MSG(5, ("Returning size. ID: %u, size: %lu\n", mem->secure_id, mem->size_bytes));
+
+	return mem->size_bytes;
+}
+
+
+
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle memh)
+{
+	ump_dd_mem *mem = (ump_dd_mem *)memh;
+	int new_ref;
+
+	DEBUG_ASSERT_POINTER(mem);
+
+	new_ref = _ump_osk_atomic_inc_and_read(&mem->ref_count);
+
+	DBG_MSG(5, ("Memory reference incremented. ID: %u, new value: %d\n", mem->secure_id, new_ref));
+}
+
+
+
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle memh)
+{
+	ump_dd_mem *mem = (ump_dd_mem *)memh;
+
+	DEBUG_ASSERT_POINTER(mem);
+
+	ump_random_mapping_put(mem);
+}
+
+
+
+/* --------------- Handling of user space requests follows --------------- */
+
+
+_mali_osk_errcode_t _ump_uku_get_api_version(_ump_uk_api_version_s *args)
+{
+	ump_session_data *session_data;
+
+	DEBUG_ASSERT_POINTER(args);
+	DEBUG_ASSERT_POINTER(args->ctx);
+
+	session_data = (ump_session_data *)args->ctx;
+
+	/* check compatability */
+	if (args->version == UMP_IOCTL_API_VERSION) {
+		DBG_MSG(3, ("API version set to newest %d (compatible)\n",
+			    GET_VERSION(args->version)));
+		args->compatible = 1;
+		session_data->api_version = args->version;
+	} else {
+		DBG_MSG(2, ("API version set to %d (incompatible with client version %d)\n",
+			    GET_VERSION(UMP_IOCTL_API_VERSION), GET_VERSION(args->version)));
+		args->compatible = 0;
+		args->version = UMP_IOCTL_API_VERSION; /* report our version */
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+_mali_osk_errcode_t _ump_ukk_release(_ump_uk_release_s *release_info)
+{
+	ump_session_memory_list_element *session_memory_element;
+	ump_session_memory_list_element *tmp;
+	ump_session_data *session_data;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_INVALID_FUNC;
+	int secure_id;
+
+	DEBUG_ASSERT_POINTER(release_info);
+	DEBUG_ASSERT_POINTER(release_info->ctx);
+
+	/* Retreive the session data */
+	session_data = (ump_session_data *)release_info->ctx;
+
+	/* If there are many items in the memory session list we
+	 * could be de-referencing this pointer a lot so keep a local copy
+	 */
+	secure_id = release_info->secure_id;
+
+	DBG_MSG(4, ("Releasing memory with IOCTL, ID: %u\n", secure_id));
+
+	/* Iterate through the memory list looking for the requested secure ID */
+	_mali_osk_mutex_wait(session_data->lock);
+	_MALI_OSK_LIST_FOREACHENTRY(session_memory_element, tmp, &session_data->list_head_session_memory_list, ump_session_memory_list_element, list) {
+		if (session_memory_element->mem->secure_id == secure_id) {
+			ump_dd_mem *release_mem;
+
+			release_mem = session_memory_element->mem;
+			_mali_osk_list_del(&session_memory_element->list);
+			ump_dd_reference_release(release_mem);
+			_mali_osk_free(session_memory_element);
+
+			ret = _MALI_OSK_ERR_OK;
+			break;
+		}
+	}
+
+	_mali_osk_mutex_signal(session_data->lock);
+	DBG_MSG_IF(1, _MALI_OSK_ERR_OK != ret, ("UMP memory with ID %u does not belong to this session.\n", secure_id));
+
+	DBG_MSG(4, ("_ump_ukk_release() returning 0x%x\n", ret));
+	return ret;
+}
+
+_mali_osk_errcode_t _ump_ukk_size_get(_ump_uk_size_get_s *user_interaction)
+{
+	ump_dd_mem *mem;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+
+	DEBUG_ASSERT_POINTER(user_interaction);
+
+	/* We lock the mappings so things don't get removed while we are looking for the memory */
+	mem = ump_random_mapping_get(device.secure_id_map, user_interaction->secure_id);
+	if (NULL != mem) {
+		user_interaction->size = mem->size_bytes;
+		DBG_MSG(4, ("Returning size. ID: %u, size: %lu ",
+			    (ump_secure_id)user_interaction->secure_id,
+			    (unsigned long)user_interaction->size));
+		ump_random_mapping_put(mem);
+		ret = _MALI_OSK_ERR_OK;
+	} else {
+		user_interaction->size = 0;
+		DBG_MSG(1, ("Failed to look up mapping in ump_ioctl_size_get(). ID: %u\n",
+			    (ump_secure_id)user_interaction->secure_id));
+	}
+
+	return ret;
+}
+
+
+
+void _ump_ukk_msync(_ump_uk_msync_s *args)
+{
+	ump_dd_mem *mem = NULL;
+	void *virtual = NULL;
+	u32 size = 0;
+	u32 offset = 0;
+
+	mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id);
+	if (NULL == mem) {
+		DBG_MSG(1, ("Failed to look up mapping in _ump_ukk_msync(). ID: %u\n",
+			    (ump_secure_id)args->secure_id));
+		return;
+	}
+
+	/* Returns the cache settings back to Userspace */
+	args->is_cached = mem->is_cached;
+
+	/* If this flag is the only one set, we should not do the actual flush, only the readout */
+	if (_UMP_UK_MSYNC_READOUT_CACHE_ENABLED == args->op) {
+		DBG_MSG(3, ("_ump_ukk_msync READOUT  ID: %u Enabled: %d\n", (ump_secure_id)args->secure_id, mem->is_cached));
+		goto msync_release_and_return;
+	}
+
+	/* Nothing to do if the memory is not caches */
+	if (0 == mem->is_cached) {
+		DBG_MSG(3, ("_ump_ukk_msync IGNORING ID: %u Enabled: %d  OP: %d\n", (ump_secure_id)args->secure_id, mem->is_cached, args->op));
+		goto msync_release_and_return;
+	}
+	DBG_MSG(3, ("UMP[%02u] _ump_ukk_msync  Flush  OP: %d Address: 0x%08x Mapping: 0x%08x\n",
+		    (ump_secure_id)args->secure_id, args->op, args->address, args->mapping));
+
+	if (args->address) {
+		virtual = (void *)((u32)args->address);
+		offset = (u32)((args->address) - (args->mapping));
+	} else {
+		/* Flush entire mapping when no address is specified. */
+		virtual = args->mapping;
+	}
+	if (args->size) {
+		size = args->size;
+	} else {
+		/* Flush entire mapping when no size is specified. */
+		size = mem->size_bytes - offset;
+	}
+
+	if ((offset + size) > mem->size_bytes) {
+		DBG_MSG(1, ("Trying to flush more than the entire UMP allocation: offset: %u + size: %u > %u\n", offset, size, mem->size_bytes));
+		goto msync_release_and_return;
+	}
+
+	/* The actual cache flush - Implemented for each OS*/
+	_ump_osk_msync(mem, virtual, offset, size, args->op, NULL);
+
+msync_release_and_return:
+	ump_random_mapping_put(mem);
+	return;
+}
+
+void _ump_ukk_cache_operations_control(_ump_uk_cache_operations_control_s *args)
+{
+	ump_session_data *session_data;
+	ump_uk_cache_op_control op;
+
+	DEBUG_ASSERT_POINTER(args);
+	DEBUG_ASSERT_POINTER(args->ctx);
+
+	op = args->op;
+	session_data = (ump_session_data *)args->ctx;
+
+	_mali_osk_mutex_wait(session_data->lock);
+	if (op == _UMP_UK_CACHE_OP_START) {
+		session_data->cache_operations_ongoing++;
+		DBG_MSG(4, ("Cache ops start\n"));
+		if (session_data->cache_operations_ongoing != 1) {
+			DBG_MSG(2, ("UMP: Number of simultanious cache control ops: %d\n", session_data->cache_operations_ongoing));
+		}
+	} else if (op == _UMP_UK_CACHE_OP_FINISH) {
+		DBG_MSG(4, ("Cache ops finish\n"));
+		session_data->cache_operations_ongoing--;
+#if 0
+		if (session_data->has_pending_level1_cache_flush) {
+			/* This function will set has_pending_level1_cache_flush=0 */
+			_ump_osk_msync(NULL, NULL, 0, 0, _UMP_UK_MSYNC_FLUSH_L1, session_data);
+		}
+#endif
+
+		/* to be on the safe side: always flush l1 cache when cache operations are done */
+		_ump_osk_msync(NULL, NULL, 0, 0, _UMP_UK_MSYNC_FLUSH_L1, session_data);
+		DBG_MSG(4, ("Cache ops finish end\n"));
+	} else {
+		DBG_MSG(1, ("Illegal call to %s at line %d\n", __FUNCTION__, __LINE__));
+	}
+	_mali_osk_mutex_signal(session_data->lock);
+
+}
+
+void _ump_ukk_switch_hw_usage(_ump_uk_switch_hw_usage_s *args)
+{
+	ump_dd_mem *mem = NULL;
+	ump_uk_user old_user;
+	ump_uk_msync_op cache_op = _UMP_UK_MSYNC_CLEAN_AND_INVALIDATE;
+	ump_session_data *session_data;
+
+	DEBUG_ASSERT_POINTER(args);
+	DEBUG_ASSERT_POINTER(args->ctx);
+
+	session_data = (ump_session_data *)args->ctx;
+
+	mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id);
+	if (NULL == mem) {
+		DBG_MSG(1, ("Failed to look up mapping in _ump_ukk_switch_hw_usage(). ID: %u\n",
+			    (ump_secure_id)args->secure_id));
+		return;
+	}
+
+	old_user = mem->hw_device;
+	mem->hw_device = args->new_user;
+
+	DBG_MSG(3, ("UMP[%02u] Switch usage  Start  New: %s  Prev: %s.\n",
+		    (ump_secure_id)args->secure_id,
+		    args->new_user ? "MALI" : "CPU",
+		    old_user ? "MALI" : "CPU"));
+
+	if (!mem->is_cached) {
+		DBG_MSG(3, ("UMP[%02u] Changing owner of uncached memory. Cache flushing not needed.\n",
+			    (ump_secure_id)args->secure_id));
+		goto out;
+	}
+
+	if (old_user == args->new_user) {
+		DBG_MSG(4, ("UMP[%02u] Setting the new_user equal to previous for. Cache flushing not needed.\n",
+			    (ump_secure_id)args->secure_id));
+		goto out;
+	}
+	if (
+		/* Previous AND new is both different from CPU */
+		(old_user != _UMP_UK_USED_BY_CPU) && (args->new_user != _UMP_UK_USED_BY_CPU)
+	) {
+		DBG_MSG(4, ("UMP[%02u] Previous and new user is not CPU. Cache flushing not needed.\n",
+			    (ump_secure_id)args->secure_id));
+		goto out;
+	}
+
+	if ((old_user != _UMP_UK_USED_BY_CPU) && (args->new_user == _UMP_UK_USED_BY_CPU)) {
+		cache_op = _UMP_UK_MSYNC_INVALIDATE;
+		DBG_MSG(4, ("UMP[%02u] Cache invalidation needed\n", (ump_secure_id)args->secure_id));
+#ifdef UMP_SKIP_INVALIDATION
+#error
+		DBG_MSG(4, ("UMP[%02u] Performing Cache invalidation SKIPPED\n", (ump_secure_id)args->secure_id));
+		goto out;
+#endif
+	}
+
+	/* Take lock to protect: session->cache_operations_ongoing and session->has_pending_level1_cache_flush */
+	_mali_osk_mutex_wait(session_data->lock);
+	/* Actual cache flush */
+	_ump_osk_msync(mem, NULL, 0, mem->size_bytes, cache_op, session_data);
+	_mali_osk_mutex_signal(session_data->lock);
+
+out:
+	ump_random_mapping_put(mem);
+	DBG_MSG(4, ("UMP[%02u] Switch usage  Finish\n", (ump_secure_id)args->secure_id));
+	return;
+}
+
+void _ump_ukk_lock(_ump_uk_lock_s *args)
+{
+	ump_dd_mem *mem = NULL;
+
+	mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id);
+	if (NULL == mem) {
+		DBG_MSG(1, ("UMP[%02u] Failed to look up mapping in _ump_ukk_lock(). ID: %u\n",
+			    (ump_secure_id)args->secure_id));
+		return;
+	}
+
+	DBG_MSG(1, ("UMP[%02u] Lock. New lock flag: %d. Old Lock flag:\n", (u32)args->secure_id, (u32)args->lock_usage, (u32) mem->lock_usage));
+
+	mem->lock_usage = (ump_lock_usage) args->lock_usage;
+
+	ump_random_mapping_put(mem);
+}
+
+void _ump_ukk_unlock(_ump_uk_unlock_s *args)
+{
+	ump_dd_mem *mem = NULL;
+
+	mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id);
+	if (NULL == mem) {
+		DBG_MSG(1, ("Failed to look up mapping in _ump_ukk_unlock(). ID: %u\n",
+			    (ump_secure_id)args->secure_id));
+		return;
+	}
+
+	DBG_MSG(1, ("UMP[%02u] Unlocking. Old Lock flag:\n",
+		    (u32)args->secure_id, (u32) mem->lock_usage));
+
+	mem->lock_usage = (ump_lock_usage) UMP_NOT_LOCKED;
+
+	ump_random_mapping_put(mem);
+}
diff --git a/ump/common/ump_kernel_common.c b/ump/common/ump_kernel_common.c
new file mode 100755
index 0000000..705435a
--- /dev/null
+++ b/ump/common/ump_kernel_common.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_bitops.h"
+#include "mali_osk_list.h"
+#include "ump_osk.h"
+#include "ump_uk_types.h"
+#include "ump_ukk.h"
+#include "ump_kernel_common.h"
+#include "ump_kernel_descriptor_mapping.h"
+#include "ump_kernel_memory_backend.h"
+
+
+
+/**
+ * Define the initial and maximum size of number of secure_ids on the system
+ */
+#define UMP_SECURE_ID_TABLE_ENTRIES_INITIAL (128  )
+#define UMP_SECURE_ID_TABLE_ENTRIES_MAXIMUM (4096 )
+
+
+/**
+ * Define the initial and maximum size of the ump_session_data::cookies_map,
+ * which is a \ref ump_descriptor_mapping. This limits how many secure_ids
+ * may be mapped into a particular process using _ump_ukk_map_mem().
+ */
+
+#define UMP_COOKIES_PER_SESSION_INITIAL (UMP_SECURE_ID_TABLE_ENTRIES_INITIAL )
+#define UMP_COOKIES_PER_SESSION_MAXIMUM (UMP_SECURE_ID_TABLE_ENTRIES_MAXIMUM)
+
+struct ump_dev device;
+
+_mali_osk_errcode_t ump_kernel_constructor(void)
+{
+	_mali_osk_errcode_t err;
+
+	/* Perform OS Specific initialization */
+	err = _ump_osk_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		MSG_ERR(("Failed to initiaze the UMP Device Driver"));
+		return err;
+	}
+
+	/* Init the global device */
+	_mali_osk_memset(&device, 0, sizeof(device));
+
+	/* Create the descriptor map, which will be used for mapping secure ID to ump_dd_mem structs */
+	device.secure_id_map = ump_random_mapping_create();
+	if (NULL == device.secure_id_map) {
+		MSG_ERR(("Failed to create secure id lookup table\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	/* Init memory backend */
+	device.backend = ump_memory_backend_create();
+	if (NULL == device.backend) {
+		MSG_ERR(("Failed to create memory backend\n"));
+		ump_random_mapping_destroy(device.secure_id_map);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void ump_kernel_destructor(void)
+{
+	DEBUG_ASSERT_POINTER(device.secure_id_map);
+
+	ump_random_mapping_destroy(device.secure_id_map);
+	device.secure_id_map = NULL;
+
+	device.backend->shutdown(device.backend);
+	device.backend = NULL;
+
+	ump_memory_backend_destroy();
+
+	_ump_osk_term();
+}
+
+/** Creates a new UMP session
+ */
+_mali_osk_errcode_t _ump_ukk_open(void **context)
+{
+	struct ump_session_data *session_data;
+
+	/* allocated struct to track this session */
+	session_data = (struct ump_session_data *)_mali_osk_malloc(sizeof(struct ump_session_data));
+	if (NULL == session_data) {
+		MSG_ERR(("Failed to allocate ump_session_data in ump_file_open()\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	session_data->lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0);
+	if (NULL == session_data->lock) {
+		MSG_ERR(("Failed to initialize lock for ump_session_data in ump_file_open()\n"));
+		_mali_osk_free(session_data);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	session_data->cookies_map = ump_descriptor_mapping_create(
+					    UMP_COOKIES_PER_SESSION_INITIAL,
+					    UMP_COOKIES_PER_SESSION_MAXIMUM);
+
+	if (NULL == session_data->cookies_map) {
+		MSG_ERR(("Failed to create descriptor mapping for _ump_ukk_map_mem cookies\n"));
+
+		_mali_osk_mutex_term(session_data->lock);
+		_mali_osk_free(session_data);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	_MALI_OSK_INIT_LIST_HEAD(&session_data->list_head_session_memory_list);
+
+	_MALI_OSK_INIT_LIST_HEAD(&session_data->list_head_session_memory_mappings_list);
+
+	/* Since initial version of the UMP interface did not use the API_VERSION ioctl we have to assume
+	   that it is this version, and not the "latest" one: UMP_IOCTL_API_VERSION
+	   Current and later API versions would do an additional call to this IOCTL and update this variable
+	   to the correct one.*/
+	session_data->api_version = MAKE_VERSION_ID(1);
+
+	*context = (void *)session_data;
+
+	session_data->cache_operations_ongoing = 0 ;
+	session_data->has_pending_level1_cache_flush = 0;
+
+	DBG_MSG(2, ("New session opened\n"));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _ump_ukk_close(void **context)
+{
+	struct ump_session_data *session_data;
+	ump_session_memory_list_element *item;
+	ump_session_memory_list_element *tmp;
+
+	session_data = (struct ump_session_data *)*context;
+	if (NULL == session_data) {
+		MSG_ERR(("Session data is NULL in _ump_ukk_close()\n"));
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	/* Unmap any descriptors mapped in. */
+	if (0 == _mali_osk_list_empty(&session_data->list_head_session_memory_mappings_list)) {
+		ump_memory_allocation *descriptor;
+		ump_memory_allocation *temp;
+
+		DBG_MSG(1, ("Memory mappings found on session usage list during session termination\n"));
+
+		/* use the 'safe' list iterator, since freeing removes the active block from the list we're iterating */
+		_MALI_OSK_LIST_FOREACHENTRY(descriptor, temp, &session_data->list_head_session_memory_mappings_list, ump_memory_allocation, list) {
+			_ump_uk_unmap_mem_s unmap_args;
+			DBG_MSG(4, ("Freeing block with phys address 0x%x size 0x%x mapped in user space at 0x%x\n",
+				    descriptor->phys_addr, descriptor->size, descriptor->mapping));
+			unmap_args.ctx = (void *)session_data;
+			unmap_args.mapping = descriptor->mapping;
+			unmap_args.size = descriptor->size;
+			unmap_args._ukk_private = NULL; /* NOTE: unused */
+			unmap_args.cookie = descriptor->cookie;
+
+			/* NOTE: This modifies the list_head_session_memory_mappings_list */
+			_ump_ukk_unmap_mem(&unmap_args);
+		}
+	}
+
+	/* ASSERT that we really did free everything, because _ump_ukk_unmap_mem()
+	 * can fail silently. */
+	DEBUG_ASSERT(_mali_osk_list_empty(&session_data->list_head_session_memory_mappings_list));
+
+	_MALI_OSK_LIST_FOREACHENTRY(item, tmp, &session_data->list_head_session_memory_list, ump_session_memory_list_element, list) {
+		_mali_osk_list_del(&item->list);
+		DBG_MSG(2, ("Releasing UMP memory %u as part of file close\n", item->mem->secure_id));
+		ump_dd_reference_release(item->mem);
+		_mali_osk_free(item);
+	}
+
+	ump_descriptor_mapping_destroy(session_data->cookies_map);
+
+	_mali_osk_mutex_term(session_data->lock);
+	_mali_osk_free(session_data);
+
+	DBG_MSG(2, ("Session closed\n"));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _ump_ukk_map_mem(_ump_uk_map_mem_s *args)
+{
+	struct ump_session_data *session_data;
+	ump_memory_allocation *descriptor;   /* Describes current mapping of memory */
+	_mali_osk_errcode_t err;
+	unsigned long offset = 0;
+	unsigned long left;
+	ump_dd_handle handle;  /* The real UMP handle for this memory. Its real datatype is ump_dd_mem*  */
+	ump_dd_mem *mem;       /* The real UMP memory. It is equal to the handle, but with exposed struct */
+	u32 block;
+	int map_id;
+
+	session_data = (ump_session_data *)args->ctx;
+	if (NULL == session_data) {
+		MSG_ERR(("Session data is NULL in _ump_ukk_map_mem()\n"));
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	descriptor = (ump_memory_allocation *) _mali_osk_calloc(1, sizeof(ump_memory_allocation));
+	if (NULL == descriptor) {
+		MSG_ERR(("ump_ukk_map_mem: descriptor allocation failed\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	handle = ump_dd_handle_create_from_secure_id(args->secure_id);
+	if (UMP_DD_HANDLE_INVALID == handle) {
+		_mali_osk_free(descriptor);
+		DBG_MSG(1, ("Trying to map unknown secure ID %u\n", args->secure_id));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mem = (ump_dd_mem *)handle;
+	DEBUG_ASSERT(mem);
+	if (mem->size_bytes != args->size) {
+		_mali_osk_free(descriptor);
+		ump_dd_reference_release(handle);
+		DBG_MSG(1, ("Trying to map too much or little. ID: %u, virtual size=%lu, UMP size: %lu\n", args->secure_id, args->size, mem->size_bytes));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	map_id = ump_descriptor_mapping_allocate_mapping(session_data->cookies_map, (void *) descriptor);
+
+	if (map_id < 0) {
+		_mali_osk_free(descriptor);
+		ump_dd_reference_release(handle);
+		DBG_MSG(1, ("ump_ukk_map_mem: unable to allocate a descriptor_mapping for return cookie\n"));
+
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	descriptor->size = args->size;
+	descriptor->handle = handle;
+	descriptor->phys_addr = args->phys_addr;
+	descriptor->process_mapping_info = args->_ukk_private;
+	descriptor->ump_session = session_data;
+	descriptor->cookie = (u32)map_id;
+
+	if (mem->is_cached) {
+		descriptor->is_cached = 1;
+		DBG_MSG(3, ("Mapping UMP secure_id: %d as cached.\n", args->secure_id));
+	} else {
+		descriptor->is_cached = 0;
+		DBG_MSG(3, ("Mapping UMP secure_id: %d  as Uncached.\n", args->secure_id));
+	}
+
+	_mali_osk_list_init(&descriptor->list);
+
+	err = _ump_osk_mem_mapregion_init(descriptor);
+	if (_MALI_OSK_ERR_OK != err) {
+		DBG_MSG(1, ("Failed to initialize memory mapping in _ump_ukk_map_mem(). ID: %u\n", args->secure_id));
+		ump_descriptor_mapping_free(session_data->cookies_map, map_id);
+		_mali_osk_free(descriptor);
+		ump_dd_reference_release(mem);
+		return err;
+	}
+
+	DBG_MSG(4, ("Mapping virtual to physical memory: ID: %u, size:%lu, first physical addr: 0x%08lx, number of regions: %lu\n",
+		    mem->secure_id,
+		    mem->size_bytes,
+		    ((NULL != mem->block_array) ? mem->block_array->addr : 0),
+		    mem->nr_blocks));
+
+	left = descriptor->size;
+	/* loop over all blocks and map them in */
+	for (block = 0; block < mem->nr_blocks; block++) {
+		unsigned long size_to_map;
+
+		if (left >  mem->block_array[block].size) {
+			size_to_map = mem->block_array[block].size;
+		} else {
+			size_to_map = left;
+		}
+
+		if (_MALI_OSK_ERR_OK != _ump_osk_mem_mapregion_map(descriptor, offset, (u32 *) & (mem->block_array[block].addr), size_to_map)) {
+			DBG_MSG(1, ("WARNING: _ump_ukk_map_mem failed to map memory into userspace\n"));
+			ump_descriptor_mapping_free(session_data->cookies_map, map_id);
+			ump_dd_reference_release(mem);
+			_ump_osk_mem_mapregion_term(descriptor);
+			_mali_osk_free(descriptor);
+			return _MALI_OSK_ERR_FAULT;
+		}
+		left -= size_to_map;
+		offset += size_to_map;
+	}
+
+	/* Add to the ump_memory_allocation tracking list */
+	_mali_osk_mutex_wait(session_data->lock);
+	_mali_osk_list_add(&descriptor->list, &session_data->list_head_session_memory_mappings_list);
+	_mali_osk_mutex_signal(session_data->lock);
+
+	args->mapping = descriptor->mapping;
+	args->cookie = descriptor->cookie;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _ump_ukk_unmap_mem(_ump_uk_unmap_mem_s *args)
+{
+	struct ump_session_data *session_data;
+	ump_memory_allocation *descriptor;
+	ump_dd_handle handle;
+
+	session_data = (ump_session_data *)args->ctx;
+
+	if (NULL == session_data) {
+		MSG_ERR(("Session data is NULL in _ump_ukk_map_mem()\n"));
+		return;
+	}
+
+	if (0 != ump_descriptor_mapping_get(session_data->cookies_map, (int)args->cookie, (void **)&descriptor)) {
+		MSG_ERR(("_ump_ukk_map_mem: cookie 0x%X not found for this session\n", args->cookie));
+		return;
+	}
+
+	DEBUG_ASSERT_POINTER(descriptor);
+
+	handle = descriptor->handle;
+	if (UMP_DD_HANDLE_INVALID == handle) {
+		DBG_MSG(1, ("WARNING: Trying to unmap unknown handle: UNKNOWN\n"));
+		return;
+	}
+
+	/* Remove the ump_memory_allocation from the list of tracked mappings */
+	_mali_osk_mutex_wait(session_data->lock);
+	_mali_osk_list_del(&descriptor->list);
+	_mali_osk_mutex_signal(session_data->lock);
+
+	ump_descriptor_mapping_free(session_data->cookies_map, (int)args->cookie);
+
+	ump_dd_reference_release(handle);
+
+	_ump_osk_mem_mapregion_term(descriptor);
+	_mali_osk_free(descriptor);
+}
+
+u32 _ump_ukk_report_memory_usage(void)
+{
+	if (device.backend->stat)
+		return device.backend->stat(device.backend);
+	else
+		return 0;
+}
diff --git a/ump/common/ump_kernel_common.h b/ump/common/ump_kernel_common.h
new file mode 100755
index 0000000..1969c97
--- /dev/null
+++ b/ump/common/ump_kernel_common.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __UMP_KERNEL_COMMON_H__
+#define __UMP_KERNEL_COMMON_H__
+
+#include "ump_kernel_types.h"
+#include "ump_kernel_interface.h"
+#include "ump_kernel_descriptor_mapping.h"
+#include "ump_kernel_random_mapping.h"
+#include "ump_kernel_memory_backend.h"
+
+
+#ifdef DEBUG
+extern int ump_debug_level;
+#define UMP_DEBUG_PRINT(args) _mali_osk_dbgmsg args
+#define UMP_DEBUG_CODE(args) args
+#define DBG_MSG(level,args)  do { /* args should be in brackets */ \
+		((level) <=  ump_debug_level)?\
+		UMP_DEBUG_PRINT(("UMP<" #level ">: ")), \
+		UMP_DEBUG_PRINT(args):0; \
+	} while (0)
+
+#define DBG_MSG_IF(level,condition,args) /* args should be in brackets */ \
+	if((condition)&&((level) <=  ump_debug_level)) {\
+		UMP_DEBUG_PRINT(("UMP<" #level ">: ")); \
+		UMP_DEBUG_PRINT(args); \
+	}
+
+#define DBG_MSG_ELSE(level,args) /* args should be in brackets */ \
+	else if((level) <=  ump_debug_level) { \
+		UMP_DEBUG_PRINT(("UMP<" #level ">: ")); \
+		UMP_DEBUG_PRINT(args); \
+	}
+
+#define DEBUG_ASSERT_POINTER(pointer) do  {if( (pointer)== NULL) MSG_ERR(("NULL pointer " #pointer)); } while(0)
+#define DEBUG_ASSERT(condition) do  {if(!(condition)) MSG_ERR(("ASSERT failed: " #condition)); } while(0)
+#else /* DEBUG */
+#define UMP_DEBUG_PRINT(args) do {} while(0)
+#define UMP_DEBUG_CODE(args)
+#define DBG_MSG(level,args) do {} while(0)
+#define DBG_MSG_IF(level,condition,args) do {} while(0)
+#define DBG_MSG_ELSE(level,args) do {} while(0)
+#define DEBUG_ASSERT(condition) do {} while(0)
+#define DEBUG_ASSERT_POINTER(pointer) do  {} while(0)
+#endif /* DEBUG */
+
+#define MSG_ERR(args) do{ /* args should be in brackets */ \
+		_mali_osk_dbgmsg("UMP: ERR: %s\n" ,__FILE__); \
+		_mali_osk_dbgmsg( "           %s()%4d\n", __FUNCTION__, __LINE__) ; \
+		_mali_osk_dbgmsg args ; \
+		_mali_osk_dbgmsg("\n"); \
+	} while(0)
+
+#define MSG(args) do{ /* args should be in brackets */ \
+		_mali_osk_dbgmsg("UMP: "); \
+		_mali_osk_dbgmsg args; \
+	} while (0)
+
+
+
+/*
+ * This struct is used to store per session data.
+ * A session is created when someone open() the device, and
+ * closed when someone close() it or the user space application terminates.
+ */
+typedef struct ump_session_data {
+	_mali_osk_list_t list_head_session_memory_list;  /**< List of ump allocations made by the process (elements are ump_session_memory_list_element) */
+	_mali_osk_list_t list_head_session_memory_mappings_list; /**< List of ump_memory_allocations mapped in */
+	int api_version;
+	_mali_osk_mutex_t *lock;
+	ump_descriptor_mapping *cookies_map;  /**< Secure mapping of cookies from _ump_ukk_map_mem() */
+	int cache_operations_ongoing;
+	int has_pending_level1_cache_flush;
+} ump_session_data;
+
+
+
+/*
+ * This struct is used to track the UMP memory references a session has.
+ * We need to track this in order to be able to clean up after user space processes
+ * which don't do it themself (e.g. due to a crash or premature termination).
+ */
+typedef struct ump_session_memory_list_element {
+	struct ump_dd_mem *mem;
+	_mali_osk_list_t list;
+} ump_session_memory_list_element;
+
+
+
+/*
+ * Device specific data, created when device driver is loaded, and then kept as the global variable device.
+ */
+typedef struct ump_dev {
+	ump_random_mapping *secure_id_map;
+	ump_memory_backend *backend;
+} ump_dev;
+
+
+
+extern int ump_debug_level;
+extern struct ump_dev device;
+
+_mali_osk_errcode_t ump_kernel_constructor(void);
+void ump_kernel_destructor(void);
+int ump_map_errcode(_mali_osk_errcode_t err);
+
+/**
+ * variables from user space cannot be dereferenced from kernel space; tagging them
+ * with __user allows the GCC compiler to generate a warning. Other compilers may
+ * not support this so we define it here as an empty macro if the compiler doesn't
+ * define it.
+ */
+#ifndef __user
+#define __user
+#endif
+
+#endif /* __UMP_KERNEL_COMMON_H__ */
diff --git a/ump/common/ump_kernel_descriptor_mapping.c b/ump/common/ump_kernel_descriptor_mapping.c
new file mode 100755
index 0000000..d22c049
--- /dev/null
+++ b/ump/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_bitops.h"
+#include "ump_kernel_common.h"
+#include "ump_kernel_descriptor_mapping.h"
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static ump_descriptor_table *descriptor_table_alloc(int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(ump_descriptor_table *table);
+
+ump_descriptor_mapping *ump_descriptor_mapping_create(int init_entries, int max_entries)
+{
+	ump_descriptor_mapping *map = _mali_osk_calloc(1, sizeof(ump_descriptor_mapping));
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map) {
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table) {
+			map->lock = _mali_osk_mutex_rw_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0);
+			if (NULL != map->lock) {
+				_mali_osk_set_nonatomic_bit(0, map->table->usage); /* reserve bit 0 to prevent NULL/zero logic to kick in */
+				map->max_nr_mappings_allowed = max_entries;
+				map->current_nr_mappings = init_entries;
+				return map;
+			}
+			descriptor_table_free(map->table);
+		}
+		_mali_osk_free(map);
+	}
+	return NULL;
+}
+
+void ump_descriptor_mapping_destroy(ump_descriptor_mapping *map)
+{
+	descriptor_table_free(map->table);
+	_mali_osk_mutex_rw_term(map->lock);
+	_mali_osk_free(map);
+}
+
+int ump_descriptor_mapping_allocate_mapping(ump_descriptor_mapping *map, void *target)
+{
+	int descriptor = -1;/*-EFAULT;*/
+	_mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW);
+	descriptor = _mali_osk_find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings) {
+		int nr_mappings_new;
+		/* no free descriptor, try to expand the table */
+		ump_descriptor_table *new_table;
+		ump_descriptor_table *old_table = map->table;
+		nr_mappings_new = map->current_nr_mappings * 2;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed) {
+			descriptor = -1;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table) {
+			descriptor = -1;
+			goto unlock_and_exit;
+		}
+
+		_mali_osk_memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+		_mali_osk_memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void *));
+		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	_mali_osk_set_nonatomic_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	_mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW);
+	return descriptor;
+}
+
+int ump_descriptor_mapping_get(ump_descriptor_mapping *map, int descriptor, void **target)
+{
+	int result = -1;/*-EFAULT;*/
+	DEBUG_ASSERT(map);
+	_mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RO);
+	if ((descriptor > 0) && (descriptor < map->current_nr_mappings) && _mali_osk_test_bit(descriptor, map->table->usage)) {
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	} else *target = NULL;
+	_mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RO);
+	return result;
+}
+
+int ump_descriptor_mapping_set(ump_descriptor_mapping *map, int descriptor, void *target)
+{
+	int result = -1;/*-EFAULT;*/
+	_mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RO);
+	if ((descriptor > 0) && (descriptor < map->current_nr_mappings) && _mali_osk_test_bit(descriptor, map->table->usage)) {
+		map->table->mappings[descriptor] = target;
+		result = 0;
+	}
+	_mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RO);
+	return result;
+}
+
+void ump_descriptor_mapping_free(ump_descriptor_mapping *map, int descriptor)
+{
+	_mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW);
+	if ((descriptor > 0) && (descriptor < map->current_nr_mappings) && _mali_osk_test_bit(descriptor, map->table->usage)) {
+		map->table->mappings[descriptor] = NULL;
+		_mali_osk_clear_nonatomic_bit(descriptor, map->table->usage);
+	}
+	_mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW);
+}
+
+static ump_descriptor_table *descriptor_table_alloc(int count)
+{
+	ump_descriptor_table *table;
+
+	table = _mali_osk_calloc(1, sizeof(ump_descriptor_table) + ((sizeof(unsigned long) * count) / BITS_PER_LONG) + (sizeof(void *) * count));
+
+	if (NULL != table) {
+		table->usage = (u32 *)((u8 *)table + sizeof(ump_descriptor_table));
+		table->mappings = (void **)((u8 *)table + sizeof(ump_descriptor_table) + ((sizeof(unsigned long) * count) / BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(ump_descriptor_table *table)
+{
+	_mali_osk_free(table);
+}
+
diff --git a/ump/common/ump_kernel_descriptor_mapping.h b/ump/common/ump_kernel_descriptor_mapping.h
new file mode 100755
index 0000000..bad215a
--- /dev/null
+++ b/ump/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef __UMP_KERNEL_DESCRIPTOR_MAPPING_H__
+#define __UMP_KERNEL_DESCRIPTOR_MAPPING_H__
+
+#include "mali_osk.h"
+
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct ump_descriptor_table {
+	u32 *usage;  /**< Pointer to bitpattern indicating if a descriptor is valid/used or not */
+	void **mappings; /**< Array of the pointers the descriptors map to */
+} ump_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct ump_descriptor_mapping {
+	_mali_osk_mutex_rw_t *lock; /**< Lock protecting access to the mapping object */
+	int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	int current_nr_mappings; /**< Current number of possible mappings */
+	ump_descriptor_table *table;  /**< Pointer to the current mapping table */
+} ump_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+ump_descriptor_mapping *ump_descriptor_mapping_create(int init_entries, int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param map The map to free
+ */
+void ump_descriptor_mapping_destroy(ump_descriptor_mapping *map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param map The map to allocate a new entry in
+ * @param target The value to map to
+ * @return The descriptor allocated, a negative value on error
+ */
+int ump_descriptor_mapping_allocate_mapping(ump_descriptor_mapping *map, void *target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param map The map to lookup the descriptor id in
+ * @param descriptor The descriptor ID to lookup
+ * @param target Pointer to a pointer which will receive the stored value
+ * @return 0 on successful lookup, negative on error
+ */
+int ump_descriptor_mapping_get(ump_descriptor_mapping *map, int descriptor, void **target);
+
+/**
+ * Set the value mapped to by a descriptor ID
+ * @param map The map to lookup the descriptor id in
+ * @param descriptor The descriptor ID to lookup
+ * @param target Pointer to replace the current value with
+ * @return 0 on successful lookup, negative on error
+ */
+int ump_descriptor_mapping_set(ump_descriptor_mapping *map, int descriptor, void *target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void ump_descriptor_mapping_free(ump_descriptor_mapping *map, int descriptor);
+
+#endif /* __UMP_KERNEL_DESCRIPTOR_MAPPING_H__ */
diff --git a/ump/common/ump_kernel_memory_backend.h b/ump/common/ump_kernel_memory_backend.h
new file mode 100755
index 0000000..1b4fbc5
--- /dev/null
+++ b/ump/common/ump_kernel_memory_backend.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_kernel_memory_mapping.h
+ */
+
+#ifndef __UMP_KERNEL_MEMORY_BACKEND_H__
+#define __UMP_KERNEL_MEMORY_BACKEND_H__
+
+#include "ump_kernel_interface.h"
+#include "ump_kernel_types.h"
+
+
+typedef struct ump_memory_allocation {
+	void                     *phys_addr;
+	void                     *mapping;
+	unsigned long             size;
+	ump_dd_handle             handle;
+	void                     *process_mapping_info;
+	u32                       cookie;               /**< necessary on some U/K interface implementations */
+	struct ump_session_data *ump_session;           /**< Session that this allocation belongs to */
+	_mali_osk_list_t          list;                 /**< List for linking together memory allocations into the session's memory head */
+	u32 is_cached;
+} ump_memory_allocation;
+
+typedef struct ump_memory_backend {
+	int (*allocate)(void *ctx, ump_dd_mem *descriptor);
+	void (*release)(void *ctx, ump_dd_mem *descriptor);
+	void (*shutdown)(struct ump_memory_backend *backend);
+	u32(*stat)(struct ump_memory_backend *backend);
+	int (*pre_allocate_physical_check)(void *ctx, u32 size);
+	u32(*adjust_to_mali_phys)(void *ctx, u32 cpu_phys);
+	void *ctx;
+} ump_memory_backend;
+
+ump_memory_backend *ump_memory_backend_create(void);
+void ump_memory_backend_destroy(void);
+
+#endif /*__UMP_KERNEL_MEMORY_BACKEND_H__ */
+
diff --git a/ump/common/ump_kernel_ref_drv.c b/ump/common/ump_kernel_ref_drv.c
new file mode 100755
index 0000000..39f32af
--- /dev/null
+++ b/ump/common/ump_kernel_ref_drv.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "ump_osk.h"
+#include "ump_uk_types.h"
+
+#include "ump_kernel_interface_ref_drv.h"
+#include "ump_kernel_common.h"
+#include "ump_kernel_descriptor_mapping.h"
+
+#define UMP_MINIMUM_SIZE         4096
+#define UMP_MINIMUM_SIZE_MASK    (~(UMP_MINIMUM_SIZE-1))
+#define UMP_SIZE_ALIGN(x)        (((x)+UMP_MINIMUM_SIZE-1)&UMP_MINIMUM_SIZE_MASK)
+#define UMP_ADDR_ALIGN_OFFSET(x) ((x)&(UMP_MINIMUM_SIZE-1))
+static void phys_blocks_release(void *ctx, struct ump_dd_mem *descriptor);
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block *blocks, unsigned long num_blocks)
+{
+	ump_dd_mem *mem;
+	unsigned long size_total = 0;
+	int ret;
+	u32 i;
+
+	/* Go through the input blocks and verify that they are sane */
+	for (i = 0; i < num_blocks; i++) {
+		unsigned long addr = blocks[i].addr;
+		unsigned long size = blocks[i].size;
+
+		DBG_MSG(5, ("Adding physical memory to new handle. Address: 0x%08lx, size: %lu\n", addr, size));
+		size_total += blocks[i].size;
+
+		if (0 != UMP_ADDR_ALIGN_OFFSET(addr)) {
+			MSG_ERR(("Trying to create UMP memory from unaligned physical address. Address: 0x%08lx\n", addr));
+			return UMP_DD_HANDLE_INVALID;
+		}
+
+		if (0 != UMP_ADDR_ALIGN_OFFSET(size)) {
+			MSG_ERR(("Trying to create UMP memory with unaligned size. Size: %lu\n", size));
+			return UMP_DD_HANDLE_INVALID;
+		}
+	}
+
+	/* Allocate the ump_dd_mem struct for this allocation */
+	mem = _mali_osk_malloc(sizeof(*mem));
+	if (NULL == mem) {
+		DBG_MSG(1, ("Could not allocate ump_dd_mem in ump_dd_handle_create_from_phys_blocks()\n"));
+		return UMP_DD_HANDLE_INVALID;
+	}
+
+	/* Now, make a copy of the block information supplied by the user */
+	mem->block_array = _mali_osk_malloc(sizeof(ump_dd_physical_block) * num_blocks);
+	if (NULL == mem->block_array) {
+		_mali_osk_free(mem);
+		DBG_MSG(1, ("Could not allocate a mem handle for function ump_dd_handle_create_from_phys_blocks().\n"));
+		return UMP_DD_HANDLE_INVALID;
+	}
+
+	_mali_osk_memcpy(mem->block_array, blocks, sizeof(ump_dd_physical_block) * num_blocks);
+
+	/* And setup the rest of the ump_dd_mem struct */
+	_mali_osk_atomic_init(&mem->ref_count, 1);
+	mem->size_bytes = size_total;
+	mem->nr_blocks = num_blocks;
+	mem->backend_info = NULL;
+	mem->ctx = NULL;
+	mem->release_func = phys_blocks_release;
+	/* For now UMP handles created by ump_dd_handle_create_from_phys_blocks() is forced to be Uncached */
+	mem->is_cached = 0;
+	mem->hw_device = _UMP_UK_USED_BY_CPU;
+	mem->lock_usage = UMP_NOT_LOCKED;
+
+	/* Find a secure ID for this allocation */
+	ret = ump_random_mapping_insert(device.secure_id_map, mem);
+	if (unlikely(ret)) {
+		_mali_osk_free(mem->block_array);
+		_mali_osk_free(mem);
+		DBG_MSG(1, ("Failed to allocate secure ID in ump_dd_handle_create_from_phys_blocks()\n"));
+		return UMP_DD_HANDLE_INVALID;
+	}
+
+	DBG_MSG(3, ("UMP memory created. ID: %u, size: %lu\n", mem->secure_id, mem->size_bytes));
+
+	return (ump_dd_handle)mem;
+}
+
+static void phys_blocks_release(void *ctx, struct ump_dd_mem *descriptor)
+{
+	_mali_osk_free(descriptor->block_array);
+	descriptor->block_array = NULL;
+}
+
+_mali_osk_errcode_t _ump_ukk_allocate(_ump_uk_allocate_s *user_interaction)
+{
+	ump_session_data *session_data = NULL;
+	ump_dd_mem *new_allocation = NULL;
+	ump_session_memory_list_element *session_memory_element = NULL;
+	int ret;
+
+	DEBUG_ASSERT_POINTER(user_interaction);
+	DEBUG_ASSERT_POINTER(user_interaction->ctx);
+
+	session_data = (ump_session_data *) user_interaction->ctx;
+
+	session_memory_element = _mali_osk_calloc(1, sizeof(ump_session_memory_list_element));
+	if (NULL == session_memory_element) {
+		DBG_MSG(1, ("Failed to allocate ump_session_memory_list_element in ump_ioctl_allocate()\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+
+	new_allocation = _mali_osk_calloc(1, sizeof(ump_dd_mem));
+	if (NULL == new_allocation) {
+		_mali_osk_free(session_memory_element);
+		DBG_MSG(1, ("Failed to allocate ump_dd_mem in _ump_ukk_allocate()\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	/* Initialize the part of the new_allocation that we know so for */
+	_mali_osk_atomic_init(&new_allocation->ref_count, 1);
+	if (0 == (UMP_REF_DRV_UK_CONSTRAINT_USE_CACHE & user_interaction->constraints))
+		new_allocation->is_cached = 0;
+	else new_allocation->is_cached = 1;
+
+	/* Special case a size of 0, we should try to emulate what malloc does
+	 * in this case, which is to return a valid pointer that must be freed,
+	 * but can't be dereferenced */
+	if (0 == user_interaction->size) {
+		/* Emulate by actually allocating the minimum block size */
+		user_interaction->size = 1;
+	}
+
+	/* Page align the size */
+	new_allocation->size_bytes = UMP_SIZE_ALIGN(user_interaction->size);
+	new_allocation->lock_usage = UMP_NOT_LOCKED;
+
+	/* Now, ask the active memory backend to do the actual memory allocation */
+	if (!device.backend->allocate(device.backend->ctx, new_allocation)) {
+		DBG_MSG(3, ("OOM: No more UMP memory left. Failed to allocate memory in ump_ioctl_allocate(). Size: %lu, requested size: %lu\n",
+			    new_allocation->size_bytes,
+			    (unsigned long)user_interaction->size));
+		_mali_osk_free(new_allocation);
+		_mali_osk_free(session_memory_element);
+		return _MALI_OSK_ERR_INVALID_FUNC;
+	}
+	new_allocation->hw_device = _UMP_UK_USED_BY_CPU;
+	new_allocation->ctx = device.backend->ctx;
+	new_allocation->release_func = device.backend->release;
+
+	/* Initialize the session_memory_element, and add it to the session object */
+	session_memory_element->mem = new_allocation;
+	_mali_osk_mutex_wait(session_data->lock);
+	_mali_osk_list_add(&(session_memory_element->list), &(session_data->list_head_session_memory_list));
+	_mali_osk_mutex_signal(session_data->lock);
+
+	/* Create a secure ID for this allocation */
+	ret = ump_random_mapping_insert(device.secure_id_map, new_allocation);
+	if (unlikely(ret)) {
+		new_allocation->release_func(new_allocation->ctx, new_allocation);
+		_mali_osk_free(session_memory_element);
+		_mali_osk_free(new_allocation);
+		DBG_MSG(1, ("Failed to allocate secure ID in ump_ioctl_allocate()\n"));
+		return _MALI_OSK_ERR_INVALID_FUNC;
+	}
+
+	user_interaction->secure_id = new_allocation->secure_id;
+	user_interaction->size = new_allocation->size_bytes;
+	DBG_MSG(3, ("UMP memory allocated. ID: %u, size: %lu\n",
+		    new_allocation->secure_id,
+		    new_allocation->size_bytes));
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/ump/common/ump_kernel_types.h b/ump/common/ump_kernel_types.h
new file mode 100755
index 0000000..a1c9bd3
--- /dev/null
+++ b/ump/common/ump_kernel_types.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __UMP_KERNEL_TYPES_H__
+#define __UMP_KERNEL_TYPES_H__
+
+#include "ump_kernel_interface.h"
+#include "mali_osk.h"
+
+#include <linux/rbtree.h>
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif
+
+typedef enum {
+	UMP_USED_BY_CPU = 0,
+	UMP_USED_BY_MALI = 1,
+	UMP_USED_BY_UNKNOWN_DEVICE = 100,
+} ump_hw_usage;
+
+typedef enum {
+	UMP_NOT_LOCKED = 0,
+	UMP_READ = 1,
+	UMP_READ_WRITE = 3,
+} ump_lock_usage;
+
+/*
+ * This struct is what is "behind" a ump_dd_handle
+ */
+typedef struct ump_dd_mem {
+	struct rb_node node;
+	ump_secure_id secure_id;
+	_mali_osk_atomic_t ref_count;
+	unsigned long size_bytes;
+	unsigned long nr_blocks;
+	ump_dd_physical_block *block_array;
+	void (*release_func)(void *ctx, struct ump_dd_mem *descriptor);
+	void *ctx;
+	void *backend_info;
+	int is_cached;
+	ump_hw_usage hw_device;
+	ump_lock_usage lock_usage;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	struct dma_buf_attachment *import_attach;
+	struct sg_table *sgt;
+#endif
+} ump_dd_mem;
+
+
+
+#endif /* __UMP_KERNEL_TYPES_H__ */
diff --git a/ump/common/ump_osk.h b/ump/common/ump_osk.h
new file mode 100755
index 0000000..bd1aef5
--- /dev/null
+++ b/ump/common/ump_osk.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_osk.h
+ * Defines the OS abstraction layer for the UMP kernel device driver (OSK)
+ */
+
+#ifndef __UMP_OSK_H__
+#define __UMP_OSK_H__
+
+#include <mali_osk.h>
+#include <ump_kernel_memory_backend.h>
+#include "ump_uk_types.h"
+#include "ump_kernel_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_mali_osk_errcode_t _ump_osk_init(void);
+
+_mali_osk_errcode_t _ump_osk_term(void);
+
+int _ump_osk_atomic_inc_and_read(_mali_osk_atomic_t *atom);
+
+int _ump_osk_atomic_dec_and_read(_mali_osk_atomic_t *atom);
+
+_mali_osk_errcode_t _ump_osk_mem_mapregion_init(ump_memory_allocation *descriptor);
+
+_mali_osk_errcode_t _ump_osk_mem_mapregion_map(ump_memory_allocation *descriptor, u32 offset, u32 *phys_addr, unsigned long size);
+
+void _ump_osk_mem_mapregion_term(ump_memory_allocation *descriptor);
+
+void _ump_osk_msync(ump_dd_mem *mem, void *virt, u32 offset, u32 size, ump_uk_msync_op op, ump_session_data *session_data);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/ump/common/ump_uk_types.h b/ump/common/ump_uk_types.h
new file mode 100755
index 0000000..0e42a89
--- /dev/null
+++ b/ump/common/ump_uk_types.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_uk_types.h
+ * Defines the types and constants used in the user-kernel interface
+ */
+
+#ifndef __UMP_UK_TYPES_H__
+#define __UMP_UK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Helpers for API version handling */
+#define MAKE_VERSION_ID(x) (((x) << 16UL) | (x))
+#define IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF))
+#define GET_VERSION(x) (((x) >> 16UL) & 0xFFFF)
+#define IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y))))
+
+/**
+ * API version define.
+ * Indicates the version of the kernel API
+ * The version is a 16bit integer incremented on each API change.
+ * The 16bit integer is stored twice in a 32bit integer
+ * So for version 1 the value would be 0x00010001
+ */
+#define UMP_IOCTL_API_VERSION MAKE_VERSION_ID(3)
+
+typedef enum
+{
+	_UMP_IOC_QUERY_API_VERSION = 1,
+	_UMP_IOC_ALLOCATE,
+	_UMP_IOC_RELEASE,
+	_UMP_IOC_SIZE_GET,
+	_UMP_IOC_MAP_MEM,    /* not used in Linux */
+	_UMP_IOC_UNMAP_MEM,  /* not used in Linux */
+	_UMP_IOC_MSYNC,
+	_UMP_IOC_CACHE_OPERATIONS_CONTROL,
+	_UMP_IOC_SWITCH_HW_USAGE,
+	_UMP_IOC_LOCK,
+	_UMP_IOC_UNLOCK,
+	_UMP_IOC_DMABUF_IMPORT,
+} _ump_uk_functions;
+
+typedef enum
+{
+	UMP_REF_DRV_UK_CONSTRAINT_NONE = 0,
+	UMP_REF_DRV_UK_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	UMP_REF_DRV_UK_CONSTRAINT_USE_CACHE = 4,
+} ump_uk_alloc_constraints;
+
+typedef enum
+{
+	_UMP_UK_MSYNC_CLEAN = 0,
+	_UMP_UK_MSYNC_CLEAN_AND_INVALIDATE = 1,
+	_UMP_UK_MSYNC_INVALIDATE = 2,
+	_UMP_UK_MSYNC_FLUSH_L1   = 3,
+	_UMP_UK_MSYNC_READOUT_CACHE_ENABLED = 128,
+} ump_uk_msync_op;
+
+typedef enum
+{
+	_UMP_UK_CACHE_OP_START = 0,
+	_UMP_UK_CACHE_OP_FINISH  = 1,
+} ump_uk_cache_op_control;
+
+typedef enum
+{
+	_UMP_UK_READ = 1,
+	_UMP_UK_READ_WRITE = 3,
+} ump_uk_lock_usage;
+
+typedef enum
+{
+	_UMP_UK_USED_BY_CPU = 0,
+	_UMP_UK_USED_BY_MALI = 1,
+	_UMP_UK_USED_BY_UNKNOWN_DEVICE = 100,
+} ump_uk_user;
+
+/**
+ * Get API version ([in,out] u32 api_version, [out] u32 compatible)
+ */
+typedef struct _ump_uk_api_version_s
+{
+	void *ctx;      /**< [in,out] user-kernel context (trashed on output) */
+	u32 version;    /**< Set to the user space version on entry, stores the device driver version on exit */
+	u32 compatible; /**< Non-null if the device is compatible with the client */
+} _ump_uk_api_version_s;
+
+/**
+ * ALLOCATE ([out] u32 secure_id, [in,out] u32 size,  [in] contraints)
+ */
+typedef struct _ump_uk_allocate_s
+{
+	void *ctx;                              /**< [in,out] user-kernel context (trashed on output) */
+	u32 secure_id;                          /**< Return value from DD to Userdriver */
+	u32 size;                               /**< Input and output. Requested size; input. Returned size; output */
+	ump_uk_alloc_constraints constraints;   /**< Only input to Devicedriver */
+} _ump_uk_allocate_s;
+
+/**
+ * SIZE_GET ([in] u32 secure_id, [out]size )
+ */
+typedef struct _ump_uk_size_get_s
+{
+	void *ctx;                              /**< [in,out] user-kernel context (trashed on output) */
+	u32 secure_id;                          /**< Input to DD */
+	u32 size;                               /**< Returned size; output */
+} _ump_uk_size_get_s;
+
+/**
+ * Release ([in] u32 secure_id)
+ */
+typedef struct _ump_uk_release_s
+{
+	void *ctx;                              /**< [in,out] user-kernel context (trashed on output) */
+	u32 secure_id;                          /**< Input to DD */
+} _ump_uk_release_s;
+
+typedef struct _ump_uk_map_mem_s
+{
+	void *ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [out] Returns user-space virtual address for the mapping */
+	void *phys_addr;                /**< [in] physical address */
+	unsigned long size;             /**< [in] size */
+	u32 secure_id;                  /**< [in] secure_id to assign to mapping */
+	void *_ukk_private;             /**< Only used inside linux port between kernel frontend and common part to store vma */
+	u32 cookie;
+	u32 is_cached;            /**< [in,out] caching of CPU mappings */
+} _ump_uk_map_mem_s;
+
+typedef struct _ump_uk_unmap_mem_s
+{
+	void *ctx;            /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;
+	u32 size;
+	void *_ukk_private;
+	u32 cookie;
+} _ump_uk_unmap_mem_s;
+
+typedef struct _ump_uk_msync_s
+{
+	void *ctx;            /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;        /**< [in] mapping addr */
+	void *address;        /**< [in] flush start addr */
+	u32 size;             /**< [in] size to flush */
+	ump_uk_msync_op op;   /**< [in] flush operation */
+	u32 cookie;           /**< [in] cookie stored with reference to the kernel mapping internals */
+	u32 secure_id;        /**< [in] secure_id that identifies the ump buffer */
+	u32 is_cached;        /**< [out] caching of CPU mappings */
+} _ump_uk_msync_s;
+
+typedef struct _ump_uk_cache_operations_control_s
+{
+	void *ctx;                   /**< [in,out] user-kernel context (trashed on output) */
+	ump_uk_cache_op_control op;  /**< [in] cache operations start/stop */
+} _ump_uk_cache_operations_control_s;
+
+
+typedef struct _ump_uk_switch_hw_usage_s
+{
+	void *ctx;            /**< [in,out] user-kernel context (trashed on output) */
+	u32 secure_id;        /**< [in] secure_id that identifies the ump buffer */
+	ump_uk_user new_user;         /**< [in] cookie stored with reference to the kernel mapping internals */
+
+} _ump_uk_switch_hw_usage_s;
+
+typedef struct _ump_uk_lock_s
+{
+	void *ctx;            /**< [in,out] user-kernel context (trashed on output) */
+	u32 secure_id;        /**< [in] secure_id that identifies the ump buffer */
+	ump_uk_lock_usage lock_usage;
+} _ump_uk_lock_s;
+
+typedef struct _ump_uk_unlock_s
+{
+	void *ctx;            /**< [in,out] user-kernel context (trashed on output) */
+	u32 secure_id;        /**< [in] secure_id that identifies the ump buffer */
+} _ump_uk_unlock_s;
+
+typedef struct _ump_uk_dmabuf_s
+{
+	void *ctx;            /**< [in,out] user-kernel context (trashed on output) */
+	int fd;               /**< [in] dmabuf_fd that identifies the dmabuf buffer */
+	size_t size;          /**< [in] size of the buffer */
+	u32 secure_id;        /**< [out] secure_id that identifies the ump buffer */
+} _ump_uk_dmabuf_s;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __UMP_UK_TYPES_H__ */
diff --git a/ump/common/ump_ukk.h b/ump/common/ump_ukk.h
new file mode 100755
index 0000000..b8b1624
--- /dev/null
+++ b/ump/common/ump_ukk.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_ukk.h
+ * Defines the kernel-side interface of the user-kernel interface
+ */
+
+#ifndef __UMP_UKK_H__
+#define __UMP_UKK_H__
+
+#include "mali_osk.h"
+#include "ump_uk_types.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+_mali_osk_errcode_t _ump_ukk_open(void **context);
+
+_mali_osk_errcode_t _ump_ukk_close(void **context);
+
+_mali_osk_errcode_t _ump_ukk_allocate(_ump_uk_allocate_s *user_interaction);
+
+_mali_osk_errcode_t _ump_ukk_release(_ump_uk_release_s *release_info);
+
+_mali_osk_errcode_t _ump_ukk_size_get(_ump_uk_size_get_s *user_interaction);
+
+_mali_osk_errcode_t _ump_ukk_map_mem(_ump_uk_map_mem_s *args);
+
+_mali_osk_errcode_t _ump_uku_get_api_version(_ump_uk_api_version_s *args);
+
+void _ump_ukk_unmap_mem(_ump_uk_unmap_mem_s *args);
+
+void _ump_ukk_msync(_ump_uk_msync_s *args);
+
+void _ump_ukk_cache_operations_control(_ump_uk_cache_operations_control_s *args);
+
+void _ump_ukk_switch_hw_usage(_ump_uk_switch_hw_usage_s *args);
+
+void _ump_ukk_lock(_ump_uk_lock_s *args);
+
+void _ump_ukk_unlock(_ump_uk_unlock_s *args);
+
+u32 _ump_ukk_report_memory_usage(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __UMP_UKK_H__ */
diff --git a/ump/include/ump/ump_kernel_interface.h b/ump/include/ump/ump_kernel_interface.h
new file mode 100755
index 0000000..3f47637
--- /dev/null
+++ b/ump/include/ump/ump_kernel_interface.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2010, 2013-2014 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_kernel_interface.h
+ *
+ * This file contains the kernel space part of the UMP API.
+ */
+
+#ifndef __UMP_KERNEL_INTERFACE_H__
+#define __UMP_KERNEL_INTERFACE_H__
+
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+
+#include "ump_kernel_platform.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void *ump_dd_handle;
+
+/**
+ * Typedef for a secure ID, a system wide identificator for UMP memory buffers.
+ */
+typedef unsigned int ump_secure_id;
+
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID ((ump_dd_handle)0)
+
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID ((ump_secure_id)-1)
+
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID, /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identificator is unique across the entire system, and uniquely identifies
+ * the specified UMP memory. This identificator can later be used through the
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id" or
+ * @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ * functions in order to access this UMP memory, for instance from another process.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get "ump_secure_id_get"
+ *
+ * @see ump_dd_handle_create_from_secure_id
+ * @see ump_handle_create_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block *blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block *block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+
+#endif  /* __UMP_KERNEL_INTERFACE_H__ */
diff --git a/ump/include/ump/ump_kernel_interface_ref_drv.h b/ump/include/ump/ump_kernel_interface_ref_drv.h
new file mode 100755
index 0000000..75a78eb
--- /dev/null
+++ b/ump/include/ump/ump_kernel_interface_ref_drv.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2010, 2013-2014 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_kernel_interface.h
+ */
+
+#ifndef __UMP_KERNEL_INTERFACE_REF_DRV_H__
+#define __UMP_KERNEL_INTERFACE_REF_DRV_H__
+
+#include "ump_kernel_interface.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Turn specified physical memory into UMP memory. */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block *blocks, unsigned long num_blocks);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __UMP_KERNEL_INTERFACE_REF_DRV_H__ */
diff --git a/ump/include/ump/ump_kernel_platform.h b/ump/include/ump/ump_kernel_platform.h
new file mode 100755
index 0000000..eca30c6
--- /dev/null
+++ b/ump/include/ump/ump_kernel_platform.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010, 2014 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_kernel_platform.h
+ *
+ * This file should define UMP_KERNEL_API_EXPORT,
+ * which dictates how the UMP kernel API should be exported/imported.
+ * Modify this file, if needed, to match your platform setup.
+ */
+
+#ifndef __UMP_KERNEL_PLATFORM_H__
+#define __UMP_KERNEL_PLATFORM_H__
+
+/** @addtogroup ump_kernel_space_api
+ * @{ */
+
+/**
+ * A define which controls how UMP kernel space API functions are imported and exported.
+ * This define should be set by the implementor of the UMP API.
+ */
+
+#if defined(_WIN32)
+
+#if defined(UMP_BUILDING_UMP_LIBRARY)
+#define UMP_KERNEL_API_EXPORT __declspec(dllexport)
+#else
+#define UMP_KERNEL_API_EXPORT __declspec(dllimport)
+#endif
+
+#else
+
+#define UMP_KERNEL_API_EXPORT
+
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+
+#endif /* __UMP_KERNEL_PLATFORM_H__ */
diff --git a/ump/linux/license/gpl/ump_kernel_license.h b/ump/linux/license/gpl/ump_kernel_license.h
new file mode 100755
index 0000000..c0a1b83
--- /dev/null
+++ b/ump/linux/license/gpl/ump_kernel_license.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_kernel_license.h
+ * Defines for the macro MODULE_LICENSE.
+ */
+
+#ifndef __UMP_KERNEL_LICENSE_H__
+#define __UMP_KERNEL_LICENSE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define UMP_KERNEL_LINUX_LICENSE     "GPL"
+#define UMP_LICENSE_IS_GPL 1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __UMP_KERNEL_LICENSE_H__ */
diff --git a/ump/linux/ump_ioctl.h b/ump/linux/ump_ioctl.h
new file mode 100755
index 0000000..b3fe7d3
--- /dev/null
+++ b/ump/linux/ump_ioctl.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2010-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ 
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __UMP_IOCTL_H__
+#define __UMP_IOCTL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#include <ump_uk_types.h>
+
+#ifndef __user
+#define __user
+#endif
+
+
+/**
+ * @file UMP_ioctl.h
+ * This file describes the interface needed to use the Linux device driver.
+ * The interface is used by the userpace UMP driver.
+ */
+
+#define UMP_IOCTL_NR 0x90
+
+
+#define UMP_IOC_QUERY_API_VERSION _IOR(UMP_IOCTL_NR, _UMP_IOC_QUERY_API_VERSION, _ump_uk_api_version_s)
+#define UMP_IOC_ALLOCATE  _IOWR(UMP_IOCTL_NR,  _UMP_IOC_ALLOCATE,  _ump_uk_allocate_s)
+#define UMP_IOC_RELEASE  _IOR(UMP_IOCTL_NR,  _UMP_IOC_RELEASE,  _ump_uk_release_s)
+#define UMP_IOC_SIZE_GET  _IOWR(UMP_IOCTL_NR,  _UMP_IOC_SIZE_GET, _ump_uk_size_get_s)
+#define UMP_IOC_MSYNC     _IOW(UMP_IOCTL_NR,  _UMP_IOC_MSYNC, _ump_uk_msync_s)
+
+#define UMP_IOC_CACHE_OPERATIONS_CONTROL _IOW(UMP_IOCTL_NR,  _UMP_IOC_CACHE_OPERATIONS_CONTROL, _ump_uk_cache_operations_control_s)
+#define UMP_IOC_SWITCH_HW_USAGE   _IOW(UMP_IOCTL_NR,  _UMP_IOC_SWITCH_HW_USAGE, _ump_uk_switch_hw_usage_s)
+#define UMP_IOC_LOCK          _IOW(UMP_IOCTL_NR,  _UMP_IOC_LOCK, _ump_uk_lock_s)
+#define UMP_IOC_UNLOCK        _IOW(UMP_IOCTL_NR,  _UMP_IOC_UNLOCK, _ump_uk_unlock_s)
+
+#define UMP_IOC_DMABUF_IMPORT  _IOW(UMP_IOCTL_NR,  _UMP_IOC_DMABUF_IMPORT, _ump_uk_dmabuf_s)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __UMP_IOCTL_H__ */
diff --git a/ump/linux/ump_kernel_linux.c b/ump/linux/ump_kernel_linux.c
new file mode 100755
index 0000000..51ae953
--- /dev/null
+++ b/ump/linux/ump_kernel_linux.c
@@ -0,0 +1,449 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h>                /* memory management functions and types */
+#include <asm/uaccess.h>             /* user space access */
+#include <asm/atomic.h>
+#include <linux/device.h>
+#include <linux/debugfs.h>
+
+#include "arch/config.h"             /* Configuration for current platform. The symlinc for arch is set by Makefile */
+#include "ump_ioctl.h"
+#include "ump_kernel_common.h"
+#include "ump_kernel_interface.h"
+#include "ump_kernel_interface_ref_drv.h"
+#include "ump_kernel_descriptor_mapping.h"
+#include "ump_kernel_memory_backend.h"
+#include "ump_kernel_memory_backend_os.h"
+#include "ump_kernel_memory_backend_dedicated.h"
+#include "ump_kernel_license.h"
+
+#include "ump_osk.h"
+#include "ump_ukk.h"
+#include "ump_uk_types.h"
+#include "ump_ukk_wrappers.h"
+#include "ump_ukk_ref_wrappers.h"
+
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+
+#if UMP_LICENSE_IS_GPL
+static struct dentry *ump_debugfs_dir = NULL;
+#endif
+
+/*
+ * The data which we attached to each virtual memory mapping request we get.
+ * Each memory mapping has a reference to the UMP memory it maps.
+ * We release this reference when the last memory mapping is unmapped.
+ */
+typedef struct ump_vma_usage_tracker {
+	int references;
+	ump_dd_handle handle;
+} ump_vma_usage_tracker;
+
+struct ump_device {
+	struct cdev cdev;
+#if UMP_LICENSE_IS_GPL
+	struct class *ump_class;
+#endif
+};
+
+/* The global variable containing the global device data */
+static struct ump_device ump_device;
+struct device *ump_global_mdev = NULL;
+
+/* Forward declare static functions */
+static int ump_file_open(struct inode *inode, struct file *filp);
+static int ump_file_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long ump_file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int ump_file_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+static int ump_file_mmap(struct file *filp, struct vm_area_struct *vma);
+
+
+/* This variable defines the file operations this UMP device driver offer */
+static struct file_operations ump_fops = {
+	.owner   = THIS_MODULE,
+	.open    = ump_file_open,
+	.release = ump_file_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = ump_file_ioctl,
+#else
+	.ioctl   = ump_file_ioctl,
+#endif
+	.mmap    = ump_file_mmap
+};
+
+
+/* This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int ump_initialize_module(void)
+{
+	_mali_osk_errcode_t err;
+
+	DBG_MSG(2, ("Inserting UMP device driver. Compiled: %s, time: %s\n", __DATE__, __TIME__));
+
+	err = ump_kernel_constructor();
+	if (_MALI_OSK_ERR_OK != err) {
+		MSG_ERR(("UMP device driver init failed\n"));
+		return ump_map_errcode(err);
+	}
+
+	MSG(("UMP device driver %s loaded\n", SVN_REV_STRING));
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void ump_cleanup_module(void)
+{
+	DBG_MSG(2, ("Unloading UMP device driver\n"));
+	ump_kernel_destructor();
+	DBG_MSG(2, ("Module unloaded\n"));
+}
+
+
+
+static ssize_t ump_memory_used_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 mem = _ump_ukk_report_memory_usage();
+
+	r = snprintf(buf, 64, "%u\n", mem);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static const struct file_operations ump_memory_usage_fops = {
+	.owner = THIS_MODULE,
+	.read = ump_memory_used_read,
+};
+
+/*
+ * Initialize the UMP device driver.
+ */
+int ump_kernel_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+#if UMP_LICENSE_IS_GPL
+	ump_debugfs_dir = debugfs_create_dir(ump_dev_name, NULL);
+	if (ERR_PTR(-ENODEV) == ump_debugfs_dir) {
+		ump_debugfs_dir = NULL;
+	} else {
+		debugfs_create_file("memory_usage", 0400, ump_debugfs_dir, NULL, &ump_memory_usage_fops);
+	}
+#endif
+
+	if (0 == ump_major) {
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	} else {
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err) {
+		memset(&ump_device, 0, sizeof(ump_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_device.cdev, &ump_fops);
+		ump_device.cdev.owner = THIS_MODULE;
+		ump_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_device.cdev, dev, 1/*count*/);
+		if (0 == err) {
+
+#if UMP_LICENSE_IS_GPL
+			ump_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_device.ump_class)) {
+				err = PTR_ERR(ump_device.ump_class);
+			} else {
+				ump_global_mdev = device_create(ump_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(ump_global_mdev)) {
+					return 0;
+				}
+
+				err = PTR_ERR(ump_global_mdev);
+			}
+			cdev_del(&ump_device.cdev);
+#else
+			return 0;
+#endif
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return err;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void ump_kernel_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+#if UMP_LICENSE_IS_GPL
+	device_destroy(ump_device.ump_class, dev);
+	class_destroy(ump_device.ump_class);
+#endif
+
+	/* unregister char device */
+	cdev_del(&ump_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+
+#if UMP_LICENSE_IS_GPL
+	if (ump_debugfs_dir)
+		debugfs_remove_recursive(ump_debugfs_dir);
+#endif
+}
+
+/*
+ * Open a new session. User space has called open() on us.
+ */
+static int ump_file_open(struct inode *inode, struct file *filp)
+{
+	struct ump_session_data *session_data;
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (0 != MINOR(inode->i_rdev)) {
+		MSG_ERR(("Minor not zero in ump_file_open()\n"));
+		return -ENODEV;
+	}
+
+	/* Call the OS-Independent UMP Open function */
+	err = _ump_ukk_open((void **) &session_data);
+	if (_MALI_OSK_ERR_OK != err) {
+		MSG_ERR(("Ump failed to open a new session\n"));
+		return ump_map_errcode(err);
+	}
+
+	filp->private_data = (void *)session_data;
+	filp->f_pos = 0;
+
+	return 0; /* success */
+}
+
+
+
+/*
+ * Close a session. User space has called close() or crashed/terminated.
+ */
+static int ump_file_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_errcode_t err;
+
+	err = _ump_ukk_close((void **) &filp->private_data);
+	if (_MALI_OSK_ERR_OK != err) {
+		return ump_map_errcode(err);
+	}
+
+	return 0;  /* success */
+}
+
+
+
+/*
+ * Handle IOCTL requests.
+ */
+#ifdef HAVE_UNLOCKED_IOCTL
+static long ump_file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int ump_file_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int err = -ENOTTY;
+	void __user *argument;
+	struct ump_session_data *session_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* inode not used */
+#endif
+
+	session_data = (struct ump_session_data *)filp->private_data;
+	if (NULL == session_data) {
+		MSG_ERR(("No session data attached to file object\n"));
+		return -ENOTTY;
+	}
+
+	/* interpret the argument as a user pointer to something */
+	argument = (void __user *)arg;
+
+	switch (cmd) {
+	case UMP_IOC_QUERY_API_VERSION:
+		err = ump_get_api_version_wrapper((u32 __user *)argument, session_data);
+		break;
+
+	case UMP_IOC_ALLOCATE :
+		err = ump_allocate_wrapper((u32 __user *)argument, session_data);
+		break;
+
+	case UMP_IOC_RELEASE:
+		err = ump_release_wrapper((u32 __user *)argument, session_data);
+		break;
+
+	case UMP_IOC_SIZE_GET:
+		err = ump_size_get_wrapper((u32 __user *)argument, session_data);
+		break;
+
+	case UMP_IOC_MSYNC:
+		err = ump_msync_wrapper((u32 __user *)argument, session_data);
+		break;
+
+	case UMP_IOC_CACHE_OPERATIONS_CONTROL:
+		err = ump_cache_operations_control_wrapper((u32 __user *)argument, session_data);
+		break;
+
+	case UMP_IOC_SWITCH_HW_USAGE:
+		err = ump_switch_hw_usage_wrapper((u32 __user *)argument, session_data);
+		break;
+
+	case UMP_IOC_LOCK:
+		err = ump_lock_wrapper((u32 __user *)argument, session_data);
+		break;
+
+	case UMP_IOC_UNLOCK:
+		err = ump_unlock_wrapper((u32 __user *)argument, session_data);
+		break;
+
+	case UMP_IOC_DMABUF_IMPORT:
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		err = ump_dmabuf_import_wrapper((u32 __user *)argument, session_data);
+#else
+		err = -EFAULT;
+		DBG_MSG(1, ("User space use dmabuf API, but kernel don't support DMA BUF\n"));
+#endif
+		break;
+
+	default:
+		DBG_MSG(1, ("No handler for IOCTL. cmd: 0x%08x, arg: 0x%08lx\n", cmd, arg));
+		err = -EFAULT;
+		break;
+	}
+
+	return err;
+}
+
+int ump_map_errcode(_mali_osk_errcode_t err)
+{
+	switch (err) {
+	case _MALI_OSK_ERR_OK :
+		return 0;
+	case _MALI_OSK_ERR_FAULT:
+		return -EFAULT;
+	case _MALI_OSK_ERR_INVALID_FUNC:
+		return -ENOTTY;
+	case _MALI_OSK_ERR_INVALID_ARGS:
+		return -EINVAL;
+	case _MALI_OSK_ERR_NOMEM:
+		return -ENOMEM;
+	case _MALI_OSK_ERR_TIMEOUT:
+		return -ETIMEDOUT;
+	case _MALI_OSK_ERR_RESTARTSYSCALL:
+		return -ERESTARTSYS;
+	case _MALI_OSK_ERR_ITEM_NOT_FOUND:
+		return -ENOENT;
+	default:
+		return -EFAULT;
+	}
+}
+
+/*
+ * Handle from OS to map specified virtual memory to specified UMP memory.
+ */
+static int ump_file_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	_ump_uk_map_mem_s args;
+	_mali_osk_errcode_t err;
+	struct ump_session_data *session_data;
+
+	/* Validate the session data */
+	session_data = (struct ump_session_data *)filp->private_data;
+	if (NULL == session_data) {
+		MSG_ERR(("mmap() called without any session data available\n"));
+		return -EFAULT;
+	}
+
+	/* Re-pack the arguments that mmap() packed for us */
+	args.ctx = session_data;
+	args.phys_addr = 0;
+	args.size = vma->vm_end - vma->vm_start;
+	args._ukk_private = vma;
+	args.secure_id = vma->vm_pgoff;
+
+	/* By setting this flag, during a process fork; the child process will not have the parent UMP mappings */
+	vma->vm_flags |= VM_DONTCOPY;
+
+	DBG_MSG(4, ("UMP vma->flags: %x\n", vma->vm_flags));
+
+	/* Call the common mmap handler */
+	err = _ump_ukk_map_mem(&args);
+	if (_MALI_OSK_ERR_OK != err) {
+		MSG_ERR(("_ump_ukk_map_mem() failed in function ump_file_mmap()"));
+		return ump_map_errcode(err);
+	}
+
+	return 0; /* success */
+}
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+
+/* Export our own extended kernel space allocator */
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+/* Setup init and exit functions for this module */
+module_init(ump_initialize_module);
+module_exit(ump_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE(UMP_KERNEL_LINUX_LICENSE);
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(SVN_REV_STRING);
diff --git a/ump/linux/ump_kernel_linux.h b/ump/linux/ump_kernel_linux.h
new file mode 100755
index 0000000..f7c0a96
--- /dev/null
+++ b/ump/linux/ump_kernel_linux.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2010-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __UMP_KERNEL_LINUX_H__
+#define __UMP_KERNEL_LINUX_H__
+
+int ump_kernel_device_initialize(void);
+void ump_kernel_device_terminate(void);
+
+
+#endif /* __UMP_KERNEL_H__ */
diff --git a/ump/linux/ump_kernel_memory_backend_dedicated.c b/ump/linux/ump_kernel_memory_backend_dedicated.c
new file mode 100755
index 0000000..517b46b
--- /dev/null
+++ b/ump/linux/ump_kernel_memory_backend_dedicated.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/* needed to detect kernel version specific code */
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+#include <linux/semaphore.h>
+#else /* pre 2.6.26 the file was in the arch specific location */
+#include <asm/semaphore.h>
+#endif
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/atomic.h>
+#include <linux/vmalloc.h>
+#include "ump_kernel_common.h"
+#include "ump_kernel_memory_backend.h"
+
+
+
+#define UMP_BLOCK_SIZE (256UL * 1024UL)  /* 256kB, remember to keep the ()s */
+
+
+
+typedef struct block_info {
+	struct block_info *next;
+} block_info;
+
+
+
+typedef struct block_allocator {
+	struct semaphore mutex;
+	block_info *all_blocks;
+	block_info *first_free;
+	u32 base;
+	u32 num_blocks;
+	u32 num_free;
+} block_allocator;
+
+
+static void block_allocator_shutdown(ump_memory_backend *backend);
+static int block_allocator_allocate(void *ctx, ump_dd_mem *mem);
+static void block_allocator_release(void *ctx, ump_dd_mem *handle);
+static inline u32 get_phys(block_allocator *allocator, block_info *block);
+static u32 block_allocator_stat(struct ump_memory_backend *backend);
+
+
+
+/*
+ * Create dedicated memory backend
+ */
+ump_memory_backend *ump_block_allocator_create(u32 base_address, u32 size)
+{
+	ump_memory_backend *backend;
+	block_allocator *allocator;
+	u32 usable_size;
+	u32 num_blocks;
+
+	usable_size = (size + UMP_BLOCK_SIZE - 1) & ~(UMP_BLOCK_SIZE - 1);
+	num_blocks = usable_size / UMP_BLOCK_SIZE;
+
+	if (0 == usable_size) {
+		DBG_MSG(1, ("Memory block of size %u is unusable\n", size));
+		return NULL;
+	}
+
+	DBG_MSG(5, ("Creating dedicated UMP memory backend. Base address: 0x%08x, size: 0x%08x\n", base_address, size));
+	DBG_MSG(6, ("%u usable bytes which becomes %u blocks\n", usable_size, num_blocks));
+
+	backend = kzalloc(sizeof(ump_memory_backend), GFP_KERNEL);
+	if (NULL != backend) {
+		allocator = kmalloc(sizeof(block_allocator), GFP_KERNEL);
+		if (NULL != allocator) {
+			allocator->all_blocks = kmalloc(sizeof(block_info) * num_blocks, GFP_KERNEL);
+			if (NULL != allocator->all_blocks) {
+				int i;
+
+				allocator->first_free = NULL;
+				allocator->num_blocks = num_blocks;
+				allocator->num_free = num_blocks;
+				allocator->base = base_address;
+				sema_init(&allocator->mutex, 1);
+
+				for (i = 0; i < num_blocks; i++) {
+					allocator->all_blocks[i].next = allocator->first_free;
+					allocator->first_free = &allocator->all_blocks[i];
+				}
+
+				backend->ctx = allocator;
+				backend->allocate = block_allocator_allocate;
+				backend->release = block_allocator_release;
+				backend->shutdown = block_allocator_shutdown;
+				backend->stat = block_allocator_stat;
+				backend->pre_allocate_physical_check = NULL;
+				backend->adjust_to_mali_phys = NULL;
+
+				return backend;
+			}
+			kfree(allocator);
+		}
+		kfree(backend);
+	}
+
+	return NULL;
+}
+
+
+
+/*
+ * Destroy specified dedicated memory backend
+ */
+static void block_allocator_shutdown(ump_memory_backend *backend)
+{
+	block_allocator *allocator;
+
+	BUG_ON(!backend);
+	BUG_ON(!backend->ctx);
+
+	allocator = (block_allocator *)backend->ctx;
+
+	DBG_MSG_IF(1, allocator->num_free != allocator->num_blocks, ("%u blocks still in use during shutdown\n", allocator->num_blocks - allocator->num_free));
+
+	kfree(allocator->all_blocks);
+	kfree(allocator);
+	kfree(backend);
+}
+
+
+
+static int block_allocator_allocate(void *ctx, ump_dd_mem *mem)
+{
+	block_allocator *allocator;
+	u32 left;
+	block_info *last_allocated = NULL;
+	int i = 0;
+
+	BUG_ON(!ctx);
+	BUG_ON(!mem);
+
+	allocator = (block_allocator *)ctx;
+	left = mem->size_bytes;
+
+	BUG_ON(!left);
+	BUG_ON(!&allocator->mutex);
+
+	mem->nr_blocks = ((left + UMP_BLOCK_SIZE - 1) & ~(UMP_BLOCK_SIZE - 1)) / UMP_BLOCK_SIZE;
+	mem->block_array = (ump_dd_physical_block *)vmalloc(sizeof(ump_dd_physical_block) * mem->nr_blocks);
+	if (NULL == mem->block_array) {
+		MSG_ERR(("Failed to allocate block array\n"));
+		return 0;
+	}
+
+	if (down_interruptible(&allocator->mutex)) {
+		MSG_ERR(("Could not get mutex to do block_allocate\n"));
+		return 0;
+	}
+
+	mem->size_bytes = 0;
+
+	while ((left > 0) && (allocator->first_free)) {
+		block_info *block;
+
+		block = allocator->first_free;
+		allocator->first_free = allocator->first_free->next;
+		block->next = last_allocated;
+		last_allocated = block;
+		allocator->num_free--;
+
+		mem->block_array[i].addr = get_phys(allocator, block);
+		mem->block_array[i].size = UMP_BLOCK_SIZE;
+		mem->size_bytes += UMP_BLOCK_SIZE;
+
+		i++;
+
+		if (left < UMP_BLOCK_SIZE) left = 0;
+		else left -= UMP_BLOCK_SIZE;
+	}
+
+	if (left) {
+		block_info *block;
+		/* release all memory back to the pool */
+		while (last_allocated) {
+			block = last_allocated->next;
+			last_allocated->next = allocator->first_free;
+			allocator->first_free = last_allocated;
+			last_allocated = block;
+			allocator->num_free++;
+		}
+
+		vfree(mem->block_array);
+		mem->backend_info = NULL;
+		mem->block_array = NULL;
+
+		DBG_MSG(4, ("Could not find a mem-block for the allocation.\n"));
+		up(&allocator->mutex);
+
+		return 0;
+	}
+
+	mem->backend_info = last_allocated;
+
+	up(&allocator->mutex);
+	mem->is_cached = 0;
+
+	return 1;
+}
+
+
+
+static void block_allocator_release(void *ctx, ump_dd_mem *handle)
+{
+	block_allocator *allocator;
+	block_info *block, * next;
+
+	BUG_ON(!ctx);
+	BUG_ON(!handle);
+
+	allocator = (block_allocator *)ctx;
+	block = (block_info *)handle->backend_info;
+	BUG_ON(!block);
+
+	if (down_interruptible(&allocator->mutex)) {
+		MSG_ERR(("Allocator release: Failed to get mutex - memory leak\n"));
+		return;
+	}
+
+	while (block) {
+		next = block->next;
+
+		BUG_ON((block < allocator->all_blocks) || (block > (allocator->all_blocks + allocator->num_blocks)));
+
+		block->next = allocator->first_free;
+		allocator->first_free = block;
+		allocator->num_free++;
+
+		block = next;
+	}
+	DBG_MSG(3, ("%d blocks free after release call\n", allocator->num_free));
+	up(&allocator->mutex);
+
+	vfree(handle->block_array);
+	handle->block_array = NULL;
+}
+
+
+
+/*
+ * Helper function for calculating the physical base adderss of a memory block
+ */
+static inline u32 get_phys(block_allocator *allocator, block_info *block)
+{
+	return allocator->base + ((block - allocator->all_blocks) * UMP_BLOCK_SIZE);
+}
+
+static u32 block_allocator_stat(struct ump_memory_backend *backend)
+{
+	block_allocator *allocator;
+	BUG_ON(!backend);
+	allocator = (block_allocator *)backend->ctx;
+	BUG_ON(!allocator);
+
+	return (allocator->num_blocks - allocator->num_free) * UMP_BLOCK_SIZE;
+}
diff --git a/ump/linux/ump_kernel_memory_backend_dedicated.h b/ump/linux/ump_kernel_memory_backend_dedicated.h
new file mode 100755
index 0000000..8c1ea1e
--- /dev/null
+++ b/ump/linux/ump_kernel_memory_backend_dedicated.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_kernel_memory_backend_dedicated.h
+ */
+
+#ifndef __UMP_KERNEL_MEMORY_BACKEND_DEDICATED_H__
+#define __UMP_KERNEL_MEMORY_BACKEND_DEDICATED_H__
+
+#include "ump_kernel_memory_backend.h"
+
+ump_memory_backend *ump_block_allocator_create(u32 base_address, u32 size);
+
+#endif /* __UMP_KERNEL_MEMORY_BACKEND_DEDICATED_H__ */
+
diff --git a/ump/linux/ump_kernel_memory_backend_os.c b/ump/linux/ump_kernel_memory_backend_os.c
new file mode 100755
index 0000000..f7791b0
--- /dev/null
+++ b/ump/linux/ump_kernel_memory_backend_os.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/* needed to detect kernel version specific code */
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+#include <linux/semaphore.h>
+#else /* pre 2.6.26 the file was in the arch specific location */
+#include <asm/semaphore.h>
+#endif
+
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/atomic.h>
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
+#include "ump_kernel_common.h"
+#include "ump_kernel_memory_backend.h"
+
+
+
+typedef struct os_allocator {
+	struct semaphore mutex;
+	u32 num_pages_max;       /**< Maximum number of pages to allocate from the OS */
+	u32 num_pages_allocated; /**< Number of pages allocated from the OS */
+} os_allocator;
+
+
+
+static void os_free(void *ctx, ump_dd_mem *descriptor);
+static int os_allocate(void *ctx, ump_dd_mem *descriptor);
+static void os_memory_backend_destroy(ump_memory_backend *backend);
+static u32 os_stat(struct ump_memory_backend *backend);
+
+
+
+/*
+ * Create OS memory backend
+ */
+ump_memory_backend *ump_os_memory_backend_create(const int max_allocation)
+{
+	ump_memory_backend *backend;
+	os_allocator *info;
+
+	info = kmalloc(sizeof(os_allocator), GFP_KERNEL);
+	if (NULL == info) {
+		return NULL;
+	}
+
+	info->num_pages_max = max_allocation >> PAGE_SHIFT;
+	info->num_pages_allocated = 0;
+
+	sema_init(&info->mutex, 1);
+
+	backend = kmalloc(sizeof(ump_memory_backend), GFP_KERNEL);
+	if (NULL == backend) {
+		kfree(info);
+		return NULL;
+	}
+
+	backend->ctx = info;
+	backend->allocate = os_allocate;
+	backend->release = os_free;
+	backend->shutdown = os_memory_backend_destroy;
+	backend->stat = os_stat;
+	backend->pre_allocate_physical_check = NULL;
+	backend->adjust_to_mali_phys = NULL;
+
+	return backend;
+}
+
+
+
+/*
+ * Destroy specified OS memory backend
+ */
+static void os_memory_backend_destroy(ump_memory_backend *backend)
+{
+	os_allocator *info = (os_allocator *)backend->ctx;
+
+	DBG_MSG_IF(1, 0 != info->num_pages_allocated, ("%d pages still in use during shutdown\n", info->num_pages_allocated));
+
+	kfree(info);
+	kfree(backend);
+}
+
+
+
+/*
+ * Allocate UMP memory
+ */
+static int os_allocate(void *ctx, ump_dd_mem *descriptor)
+{
+	u32 left;
+	os_allocator *info;
+	int pages_allocated = 0;
+	int is_cached;
+
+	BUG_ON(!descriptor);
+	BUG_ON(!ctx);
+
+	info = (os_allocator *)ctx;
+	left = descriptor->size_bytes;
+	is_cached = descriptor->is_cached;
+
+	if (down_interruptible(&info->mutex)) {
+		DBG_MSG(1, ("Failed to get mutex in os_free\n"));
+		return 0; /* failure */
+	}
+
+	descriptor->backend_info = NULL;
+	descriptor->nr_blocks = ((left + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) >> PAGE_SHIFT;
+
+	DBG_MSG(5, ("Allocating page array. Size: %lu\n", descriptor->nr_blocks * sizeof(ump_dd_physical_block)));
+
+	descriptor->block_array = (ump_dd_physical_block *)vmalloc(sizeof(ump_dd_physical_block) * descriptor->nr_blocks);
+	if (NULL == descriptor->block_array) {
+		up(&info->mutex);
+		DBG_MSG(1, ("Block array could not be allocated\n"));
+		return 0; /* failure */
+	}
+
+	while (left > 0 && ((info->num_pages_allocated + pages_allocated) < info->num_pages_max)) {
+		struct page *new_page;
+
+		if (is_cached) {
+			new_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO | __GFP_REPEAT | __GFP_NOWARN);
+		} else {
+			new_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO | __GFP_REPEAT | __GFP_NOWARN | __GFP_COLD);
+		}
+		if (NULL == new_page) {
+			break;
+		}
+
+		/* Ensure page caches are flushed. */
+		if (is_cached) {
+			descriptor->block_array[pages_allocated].addr = page_to_phys(new_page);
+			descriptor->block_array[pages_allocated].size = PAGE_SIZE;
+		} else {
+			descriptor->block_array[pages_allocated].addr = dma_map_page(NULL, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+			descriptor->block_array[pages_allocated].size = PAGE_SIZE;
+		}
+
+		DBG_MSG(5, ("Allocated page 0x%08lx cached: %d\n", descriptor->block_array[pages_allocated].addr, is_cached));
+
+		if (left < PAGE_SIZE) {
+			left = 0;
+		} else {
+			left -= PAGE_SIZE;
+		}
+
+		pages_allocated++;
+	}
+
+	DBG_MSG(5, ("Alloce for ID:%2d got %d pages, cached: %d\n", descriptor->secure_id,  pages_allocated));
+
+	if (left) {
+		DBG_MSG(1, ("Failed to allocate needed pages\n"));
+
+		while (pages_allocated) {
+			pages_allocated--;
+			if (!is_cached) {
+				dma_unmap_page(NULL, descriptor->block_array[pages_allocated].addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+			}
+			__free_page(pfn_to_page(descriptor->block_array[pages_allocated].addr >> PAGE_SHIFT));
+		}
+
+		up(&info->mutex);
+
+		return 0; /* failure */
+	}
+
+	info->num_pages_allocated += pages_allocated;
+
+	DBG_MSG(6, ("%d out of %d pages now allocated\n", info->num_pages_allocated, info->num_pages_max));
+
+	up(&info->mutex);
+
+	return 1; /* success*/
+}
+
+
+/*
+ * Free specified UMP memory
+ */
+static void os_free(void *ctx, ump_dd_mem *descriptor)
+{
+	os_allocator *info;
+	int i;
+
+	BUG_ON(!ctx);
+	BUG_ON(!descriptor);
+
+	info = (os_allocator *)ctx;
+
+	BUG_ON(descriptor->nr_blocks > info->num_pages_allocated);
+
+	if (down_interruptible(&info->mutex)) {
+		DBG_MSG(1, ("Failed to get mutex in os_free\n"));
+		return;
+	}
+
+	DBG_MSG(5, ("Releasing %lu OS pages\n", descriptor->nr_blocks));
+
+	info->num_pages_allocated -= descriptor->nr_blocks;
+
+	up(&info->mutex);
+
+	for (i = 0; i < descriptor->nr_blocks; i++) {
+		DBG_MSG(6, ("Freeing physical page. Address: 0x%08lx\n", descriptor->block_array[i].addr));
+		if (! descriptor->is_cached) {
+			dma_unmap_page(NULL, descriptor->block_array[i].addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		}
+		__free_page(pfn_to_page(descriptor->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	vfree(descriptor->block_array);
+}
+
+
+static u32 os_stat(struct ump_memory_backend *backend)
+{
+	os_allocator *info;
+	info = (os_allocator *)backend->ctx;
+	return info->num_pages_allocated * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/ump/linux/ump_kernel_memory_backend_os.h b/ump/linux/ump_kernel_memory_backend_os.h
new file mode 100755
index 0000000..2d62f4d
--- /dev/null
+++ b/ump/linux/ump_kernel_memory_backend_os.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_kernel_memory_backend_os.h
+ */
+
+#ifndef __UMP_KERNEL_MEMORY_BACKEND_OS_H__
+#define __UMP_KERNEL_MEMORY_BACKEND_OS_H__
+
+#include "ump_kernel_memory_backend.h"
+
+ump_memory_backend *ump_os_memory_backend_create(const int max_allocation);
+
+#endif /* __UMP_KERNEL_MEMORY_BACKEND_OS_H__ */
+
diff --git a/ump/linux/ump_kernel_random_mapping.c b/ump/linux/ump_kernel_random_mapping.c
new file mode 100755
index 0000000..f584878
--- /dev/null
+++ b/ump/linux/ump_kernel_random_mapping.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "ump_osk.h"
+#include "ump_kernel_common.h"
+#include "ump_kernel_types.h"
+#include "ump_kernel_random_mapping.h"
+
+#include <linux/random.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+
+
+static ump_dd_mem *search(struct rb_root *root, int id)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		ump_dd_mem *e = container_of(node, ump_dd_mem, node);
+
+		if (id < e->secure_id) {
+			node = node->rb_left;
+		} else if (id > e->secure_id) {
+			node = node->rb_right;
+		} else {
+			return e;
+		}
+	}
+
+	return NULL;
+}
+
+static mali_bool insert(struct rb_root *root, int id, ump_dd_mem *mem)
+{
+	struct rb_node **new = &(root->rb_node);
+	struct rb_node *parent = NULL;
+
+	while (*new) {
+		ump_dd_mem *this = container_of(*new, ump_dd_mem, node);
+
+		parent = *new;
+		if (id < this->secure_id) {
+			new = &((*new)->rb_left);
+		} else if (id > this->secure_id) {
+			new = &((*new)->rb_right);
+		} else {
+			printk(KERN_ERR "UMP: ID already used %x\n", id);
+			return MALI_FALSE;
+		}
+	}
+
+	rb_link_node(&mem->node, parent, new);
+	rb_insert_color(&mem->node, root);
+
+	return MALI_TRUE;
+}
+
+
+ump_random_mapping *ump_random_mapping_create(void)
+{
+	ump_random_mapping *map = _mali_osk_calloc(1, sizeof(ump_random_mapping));
+
+	if (NULL == map)
+		return NULL;
+
+	map->lock = _mali_osk_mutex_rw_init(_MALI_OSK_LOCKFLAG_ORDERED,
+					    _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP);
+	if (NULL != map->lock) {
+		map->root = RB_ROOT;
+#if UMP_RANDOM_MAP_DELAY
+		map->failed.count = 0;
+		map->failed.timestamp = jiffies;
+#endif
+		return map;
+	}
+	return NULL;
+}
+
+void ump_random_mapping_destroy(ump_random_mapping *map)
+{
+	_mali_osk_mutex_rw_term(map->lock);
+	_mali_osk_free(map);
+}
+
+int ump_random_mapping_insert(ump_random_mapping *map, ump_dd_mem *mem)
+{
+	_mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW);
+
+	while (1) {
+		u32 id;
+
+		get_random_bytes(&id, sizeof(id));
+
+		/* Try a new random number if id happened to be the invalid
+		 * secure ID (-1). */
+		if (unlikely(id == UMP_INVALID_SECURE_ID))
+			continue;
+
+		/* Insert into the tree. If the id was already in use, get a
+		 * new random id and try again. */
+		if (insert(&map->root, id, mem)) {
+			mem->secure_id = id;
+			break;
+		}
+	}
+	_mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW);
+
+	return 0;
+}
+
+ump_dd_mem *ump_random_mapping_get(ump_random_mapping *map, int id)
+{
+	ump_dd_mem *mem = NULL;
+#if UMP_RANDOM_MAP_DELAY
+	int do_delay = 0;
+#endif
+
+	DEBUG_ASSERT(map);
+
+	_mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RO);
+	mem = search(&map->root, id);
+
+	if (unlikely(NULL == mem)) {
+#if UMP_RANDOM_MAP_DELAY
+		map->failed.count++;
+
+		if (time_is_before_jiffies(map->failed.timestamp +
+					   UMP_FAILED_LOOKUP_DELAY * HZ)) {
+			/* If it is a long time since last failure, reset
+			 * the counter and skip the delay this time. */
+			map->failed.count = 0;
+		} else if (map->failed.count > UMP_FAILED_LOOKUPS_ALLOWED) {
+			do_delay = 1;
+		}
+
+		map->failed.timestamp = jiffies;
+#endif /* UMP_RANDOM_MAP_DELAY */
+	} else {
+		ump_dd_reference_add(mem);
+	}
+	_mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RO);
+
+#if UMP_RANDOM_MAP_DELAY
+	if (do_delay) {
+		/* Apply delay */
+		schedule_timeout_killable(UMP_FAILED_LOOKUP_DELAY);
+	}
+#endif /* UMP_RANDOM_MAP_DELAY */
+
+	return mem;
+}
+
+static ump_dd_mem *ump_random_mapping_remove_internal(ump_random_mapping *map, int id)
+{
+	ump_dd_mem *mem = NULL;
+
+	mem = search(&map->root, id);
+
+	if (mem) {
+		rb_erase(&mem->node, &map->root);
+	}
+
+	return mem;
+}
+
+void ump_random_mapping_put(ump_dd_mem *mem)
+{
+	int new_ref;
+
+	_mali_osk_mutex_rw_wait(device.secure_id_map->lock, _MALI_OSK_LOCKMODE_RW);
+
+	new_ref = _ump_osk_atomic_dec_and_read(&mem->ref_count);
+	DBG_MSG(5, ("Memory reference decremented. ID: %u, new value: %d\n",
+		    mem->secure_id, new_ref));
+
+	if (0 == new_ref) {
+		DBG_MSG(3, ("Final release of memory. ID: %u\n", mem->secure_id));
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		if (mem->import_attach) {
+			struct dma_buf_attachment *attach = mem->import_attach;
+			struct dma_buf *dma_buf;
+
+			if (mem->sgt)
+				dma_buf_unmap_attachment(attach, mem->sgt,
+							 DMA_BIDIRECTIONAL);
+
+			dma_buf = attach->dmabuf;
+			dma_buf_detach(attach->dmabuf, attach);
+			dma_buf_put(dma_buf);
+
+		}
+#endif
+		ump_random_mapping_remove_internal(device.secure_id_map, mem->secure_id);
+
+		mem->release_func(mem->ctx, mem);
+		_mali_osk_free(mem);
+	}
+
+	_mali_osk_mutex_rw_signal(device.secure_id_map->lock, _MALI_OSK_LOCKMODE_RW);
+}
+
+ump_dd_mem *ump_random_mapping_remove(ump_random_mapping *map, int descriptor)
+{
+	ump_dd_mem *mem;
+
+	_mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW);
+	mem = ump_random_mapping_remove_internal(map, descriptor);
+	_mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW);
+
+	return mem;
+}
diff --git a/ump/linux/ump_kernel_random_mapping.h b/ump/linux/ump_kernel_random_mapping.h
new file mode 100755
index 0000000..67d4a86
--- /dev/null
+++ b/ump/linux/ump_kernel_random_mapping.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_kernel_random_mapping.h
+ */
+
+#ifndef __UMP_KERNEL_RANDOM_MAPPING_H__
+#define __UMP_KERNEL_RANDOM_MAPPING_H__
+
+#include "mali_osk.h"
+#include <linux/rbtree.h>
+
+#define UMP_RANDOM_MAP_DELAY 1
+#define UMP_FAILED_LOOKUP_DELAY 10 /* ms */
+#define UMP_FAILED_LOOKUPS_ALLOWED 10 /* number of allowed failed lookups */
+
+/**
+ * The random mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct ump_random_mapping {
+	_mali_osk_mutex_rw_t *lock; /**< Lock protecting access to the mapping object */
+	struct rb_root root;
+#if UMP_RANDOM_MAP_DELAY
+	struct {
+		unsigned long count;
+		unsigned long timestamp;
+	} failed;
+#endif
+} ump_random_mapping;
+
+/**
+ * Create a random mapping object
+ * Create a random mapping capable of holding 2^20 entries
+ * @return Pointer to a random mapping object, NULL on failure
+ */
+ump_random_mapping *ump_random_mapping_create(void);
+
+/**
+ * Destroy a random mapping object
+ * @param map The map to free
+ */
+void ump_random_mapping_destroy(ump_random_mapping *map);
+
+/**
+ * Allocate a new mapping entry (random ID)
+ * Allocates a new entry in the map.
+ * @param map The map to allocate a new entry in
+ * @param target The value to map to
+ * @return The random allocated, a negative value on error
+ */
+int ump_random_mapping_insert(ump_random_mapping *map, ump_dd_mem *mem);
+
+/**
+ * Get the value mapped to by a random ID
+ *
+ * If the lookup fails, punish the calling thread by applying a delay.
+ *
+ * @param map The map to lookup the random id in
+ * @param id The ID to lookup
+ * @param target Pointer to a pointer which will receive the stored value
+ * @return ump_dd_mem pointer on successful lookup, NULL on error
+ */
+ump_dd_mem *ump_random_mapping_get(ump_random_mapping *map, int id);
+
+void ump_random_mapping_put(ump_dd_mem *mem);
+
+/**
+ * Free the random ID
+ * For the random to be reused it has to be freed
+ * @param map The map to free the random from
+ * @param id The ID to free
+ */
+ump_dd_mem *ump_random_mapping_remove(ump_random_mapping *map, int id);
+
+#endif /* __UMP_KERNEL_RANDOM_MAPPING_H__ */
diff --git a/ump/linux/ump_memory_backend.c b/ump/linux/ump_memory_backend.c
new file mode 100755
index 0000000..87b1419
--- /dev/null
+++ b/ump/linux/ump_memory_backend.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+
+#include "arch/config.h"             /* Configuration for current platform. The symlink for arch is set by Makefile */
+
+#include "ump_osk.h"
+#include "ump_kernel_common.h"
+#include "ump_kernel_memory_backend_os.h"
+#include "ump_kernel_memory_backend_dedicated.h"
+
+/* Configure which dynamic memory allocator to use */
+int ump_backend = ARCH_UMP_BACKEND_DEFAULT;
+module_param(ump_backend, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_backend, "0 = dedicated memory backend (default), 1 = OS memory backend");
+
+/* The base address of the memory block for the dedicated memory backend */
+unsigned int ump_memory_address = ARCH_UMP_MEMORY_ADDRESS_DEFAULT;
+module_param(ump_memory_address, uint, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_memory_address, "The physical address to map for the dedicated memory backend");
+
+/* The size of the memory block for the dedicated memory backend */
+unsigned int ump_memory_size = ARCH_UMP_MEMORY_SIZE_DEFAULT;
+module_param(ump_memory_size, uint, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_memory_size, "The size of fixed memory to map in the dedicated memory backend");
+
+ump_memory_backend *ump_memory_backend_create(void)
+{
+	ump_memory_backend *backend = NULL;
+
+	/* Create the dynamic memory allocator backend */
+	if (0 == ump_backend) {
+		DBG_MSG(2, ("Using dedicated memory backend\n"));
+
+		DBG_MSG(2, ("Requesting dedicated memory: 0x%08x, size: %u\n", ump_memory_address, ump_memory_size));
+		/* Ask the OS if we can use the specified physical memory */
+		if (NULL == request_mem_region(ump_memory_address, ump_memory_size, "UMP Memory")) {
+			MSG_ERR(("Failed to request memory region (0x%08X - 0x%08X). Is Mali DD already loaded?\n", ump_memory_address, ump_memory_address + ump_memory_size - 1));
+			return NULL;
+		}
+		backend = ump_block_allocator_create(ump_memory_address, ump_memory_size);
+	} else if (1 == ump_backend) {
+		DBG_MSG(2, ("Using OS memory backend, allocation limit: %d\n", ump_memory_size));
+		backend = ump_os_memory_backend_create(ump_memory_size);
+	}
+
+	return backend;
+}
+
+void ump_memory_backend_destroy(void)
+{
+	if (0 == ump_backend) {
+		DBG_MSG(2, ("Releasing dedicated memory: 0x%08x\n", ump_memory_address));
+		release_mem_region(ump_memory_address, ump_memory_size);
+	}
+}
diff --git a/ump/linux/ump_osk_atomics.c b/ump/linux/ump_osk_atomics.c
new file mode 100755
index 0000000..f9fc194
--- /dev/null
+++ b/ump/linux/ump_osk_atomics.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2010, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_osk_atomics.c
+ * Implementation of the OS abstraction layer for the UMP kernel device driver
+ */
+
+#include "ump_osk.h"
+#include <asm/atomic.h>
+
+int _ump_osk_atomic_dec_and_read(_mali_osk_atomic_t *atom)
+{
+	return atomic_dec_return((atomic_t *)&atom->u.val);
+}
+
+int _ump_osk_atomic_inc_and_read(_mali_osk_atomic_t *atom)
+{
+	return atomic_inc_return((atomic_t *)&atom->u.val);
+}
diff --git a/ump/linux/ump_osk_low_level_mem.c b/ump/linux/ump_osk_low_level_mem.c
new file mode 100755
index 0000000..ef35052
--- /dev/null
+++ b/ump/linux/ump_osk_low_level_mem.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_osk_memory.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+/* needed to detect kernel version specific code */
+#include <linux/version.h>
+
+#include "ump_osk.h"
+#include "ump_uk_types.h"
+#include "ump_ukk.h"
+#include "ump_kernel_common.h"
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/memory.h>
+#include <asm/uaccess.h>                        /* to verify pointers from user space */
+#include <asm/cacheflush.h>
+#include <linux/dma-mapping.h>
+
+typedef struct ump_vma_usage_tracker {
+	atomic_t references;
+	ump_memory_allocation *descriptor;
+} ump_vma_usage_tracker;
+
+static void ump_vma_open(struct vm_area_struct *vma);
+static void ump_vma_close(struct vm_area_struct *vma);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+static int ump_cpu_page_fault_handler(struct vm_area_struct *vma, struct vm_fault *vmf);
+#else
+static unsigned long ump_cpu_page_fault_handler(struct vm_area_struct *vma, unsigned long address);
+#endif
+
+static struct vm_operations_struct ump_vm_ops = {
+	.open = ump_vma_open,
+	.close = ump_vma_close,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+	.fault = ump_cpu_page_fault_handler
+#else
+	.nopfn = ump_cpu_page_fault_handler
+#endif
+};
+
+/*
+ * Page fault for VMA region
+ * This should never happen since we always map in the entire virtual memory range.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+static int ump_cpu_page_fault_handler(struct vm_area_struct *vma, struct vm_fault *vmf)
+#else
+static unsigned long ump_cpu_page_fault_handler(struct vm_area_struct *vma, unsigned long address)
+#endif
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+	void __user *address;
+	address = vmf->virtual_address;
+#endif
+	MSG_ERR(("Page-fault in UMP memory region caused by the CPU\n"));
+	MSG_ERR(("VMA: 0x%08lx, virtual address: 0x%08lx\n", (unsigned long)vma, address));
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+	return VM_FAULT_SIGBUS;
+#else
+	return NOPFN_SIGBUS;
+#endif
+}
+
+static void ump_vma_open(struct vm_area_struct *vma)
+{
+	ump_vma_usage_tracker *vma_usage_tracker;
+	int new_val;
+
+	vma_usage_tracker = (ump_vma_usage_tracker *)vma->vm_private_data;
+	BUG_ON(NULL == vma_usage_tracker);
+
+	new_val = atomic_inc_return(&vma_usage_tracker->references);
+
+	DBG_MSG(4, ("VMA open, VMA reference count incremented. VMA: 0x%08lx, reference count: %d\n", (unsigned long)vma, new_val));
+}
+
+static void ump_vma_close(struct vm_area_struct *vma)
+{
+	ump_vma_usage_tracker *vma_usage_tracker;
+	_ump_uk_unmap_mem_s args;
+	int new_val;
+
+	vma_usage_tracker = (ump_vma_usage_tracker *)vma->vm_private_data;
+	BUG_ON(NULL == vma_usage_tracker);
+
+	new_val = atomic_dec_return(&vma_usage_tracker->references);
+
+	DBG_MSG(4, ("VMA close, VMA reference count decremented. VMA: 0x%08lx, reference count: %d\n", (unsigned long)vma, new_val));
+
+	if (0 == new_val) {
+		ump_memory_allocation *descriptor;
+
+		descriptor = vma_usage_tracker->descriptor;
+
+		args.ctx = descriptor->ump_session;
+		args.cookie = descriptor->cookie;
+		args.mapping = descriptor->mapping;
+		args.size = descriptor->size;
+
+		args._ukk_private = NULL; /** @note unused */
+
+		DBG_MSG(4, ("No more VMA references left, releasing UMP memory\n"));
+		_ump_ukk_unmap_mem(& args);
+
+		/* vma_usage_tracker is free()d by _ump_osk_mem_mapregion_term() */
+	}
+}
+
+_mali_osk_errcode_t _ump_osk_mem_mapregion_init(ump_memory_allocation *descriptor)
+{
+	ump_vma_usage_tracker *vma_usage_tracker;
+	struct vm_area_struct *vma;
+
+	if (NULL == descriptor) return _MALI_OSK_ERR_FAULT;
+
+	vma_usage_tracker = kmalloc(sizeof(ump_vma_usage_tracker), GFP_KERNEL);
+	if (NULL == vma_usage_tracker) {
+		DBG_MSG(1, ("Failed to allocate memory for ump_vma_usage_tracker in _mali_osk_mem_mapregion_init\n"));
+		return -_MALI_OSK_ERR_FAULT;
+	}
+
+	vma = (struct vm_area_struct *)descriptor->process_mapping_info;
+	if (NULL == vma) {
+		kfree(vma_usage_tracker);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	vma->vm_private_data = vma_usage_tracker;
+	vma->vm_flags |= VM_IO;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
+	vma->vm_flags |= VM_RESERVED;
+#else
+	vma->vm_flags |= VM_DONTDUMP;
+	vma->vm_flags |= VM_DONTEXPAND;
+	vma->vm_flags |= VM_PFNMAP;
+#endif
+
+
+	if (0 == descriptor->is_cached) {
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+	DBG_MSG(3, ("Mapping with page_prot: 0x%x\n", vma->vm_page_prot));
+
+	/* Setup the functions which handle further VMA handling */
+	vma->vm_ops = &ump_vm_ops;
+
+	/* Do the va range allocation - in this case, it was done earlier, so we copy in that information */
+	descriptor->mapping = (void __user *)vma->vm_start;
+
+	atomic_set(&vma_usage_tracker->references, 1); /*this can later be increased if process is forked, see ump_vma_open() */
+	vma_usage_tracker->descriptor = descriptor;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _ump_osk_mem_mapregion_term(ump_memory_allocation *descriptor)
+{
+	struct vm_area_struct *vma;
+	ump_vma_usage_tracker *vma_usage_tracker;
+
+	if (NULL == descriptor) return;
+
+	/* Linux does the right thing as part of munmap to remove the mapping
+	 * All that remains is that we remove the vma_usage_tracker setup in init() */
+	vma = (struct vm_area_struct *)descriptor->process_mapping_info;
+
+	vma_usage_tracker = vma->vm_private_data;
+
+	/* We only get called if mem_mapregion_init succeeded */
+	kfree(vma_usage_tracker);
+	return;
+}
+
+_mali_osk_errcode_t _ump_osk_mem_mapregion_map(ump_memory_allocation *descriptor, u32 offset, u32 *phys_addr, unsigned long size)
+{
+	struct vm_area_struct *vma;
+	_mali_osk_errcode_t retval;
+
+	if (NULL == descriptor) return _MALI_OSK_ERR_FAULT;
+
+	vma = (struct vm_area_struct *)descriptor->process_mapping_info;
+
+	if (NULL == vma) return _MALI_OSK_ERR_FAULT;
+
+	retval = remap_pfn_range(vma, ((u32)descriptor->mapping) + offset, (*phys_addr) >> PAGE_SHIFT, size, vma->vm_page_prot) ? _MALI_OSK_ERR_FAULT : _MALI_OSK_ERR_OK;;
+
+	DBG_MSG(4, ("Mapping virtual to physical memory. ID: %u, vma: 0x%08lx, virtual addr:0x%08lx, physical addr: 0x%08lx, size:%lu, prot:0x%x, vm_flags:0x%x RETVAL: 0x%x\n",
+		    ump_dd_secure_id_get(descriptor->handle),
+		    (unsigned long)vma,
+		    (unsigned long)(vma->vm_start + offset),
+		    (unsigned long)*phys_addr,
+		    size,
+		    (unsigned int)vma->vm_page_prot, vma->vm_flags, retval));
+
+	return retval;
+}
+
+static void level1_cache_flush_all(void)
+{
+	DBG_MSG(4, ("UMP[xx] Flushing complete L1 cache\n"));
+	__cpuc_flush_kern_all();
+}
+
+void _ump_osk_msync(ump_dd_mem *mem, void *virt, u32 offset, u32 size, ump_uk_msync_op op, ump_session_data *session_data)
+{
+	int i;
+
+	/* Flush L1 using virtual address, the entire range in one go.
+	 * Only flush if user space process has a valid write mapping on given address. */
+	if ((mem) && (virt != NULL) && (access_ok(VERIFY_WRITE, virt, size))) {
+		__cpuc_flush_dcache_area(virt, size);
+		DBG_MSG(3, ("UMP[%02u] Flushing CPU L1 Cache. CPU address: %x, size: %x\n", mem->secure_id, virt, size));
+	} else {
+		if (session_data) {
+			if (op == _UMP_UK_MSYNC_FLUSH_L1) {
+				DBG_MSG(4, ("UMP Pending L1 cache flushes: %d\n", session_data->has_pending_level1_cache_flush));
+				session_data->has_pending_level1_cache_flush = 0;
+				level1_cache_flush_all();
+				return;
+			} else {
+				if (session_data->cache_operations_ongoing) {
+					session_data->has_pending_level1_cache_flush++;
+					DBG_MSG(4, ("UMP[%02u] Defering the L1 flush. Nr pending:%d\n", mem->secure_id, session_data->has_pending_level1_cache_flush));
+				} else {
+					/* Flushing the L1 cache for each switch_user() if ump_cache_operations_control(START) is not called */
+					level1_cache_flush_all();
+				}
+			}
+		} else {
+			DBG_MSG(4, ("Unkown state %s %d\n", __FUNCTION__, __LINE__));
+			level1_cache_flush_all();
+		}
+	}
+
+	if (NULL == mem) return;
+
+	if (mem->size_bytes == size) {
+		DBG_MSG(3, ("UMP[%02u] Flushing CPU L2 Cache\n", mem->secure_id));
+	} else {
+		DBG_MSG(3, ("UMP[%02u] Flushing CPU L2 Cache. Blocks:%u, TotalSize:%u. FlushSize:%u Offset:0x%x FirstPaddr:0x%08x\n",
+			    mem->secure_id, mem->nr_blocks, mem->size_bytes, size, offset, mem->block_array[0].addr));
+	}
+
+
+	/* Flush L2 using physical addresses, block for block. */
+	for (i = 0 ; i < mem->nr_blocks; i++) {
+		u32 start_p, end_p;
+		ump_dd_physical_block *block;
+		block = &mem->block_array[i];
+
+		if (offset >= block->size) {
+			offset -= block->size;
+			continue;
+		}
+
+		if (offset) {
+			start_p = (u32)block->addr + offset;
+			/* We'll zero the offset later, after using it to calculate end_p. */
+		} else {
+			start_p = (u32)block->addr;
+		}
+
+		if (size < block->size - offset) {
+			end_p = start_p + size;
+			size = 0;
+		} else {
+			if (offset) {
+				end_p = start_p + (block->size - offset);
+				size -= block->size - offset;
+				offset = 0;
+			} else {
+				end_p = start_p + block->size;
+				size -= block->size;
+			}
+		}
+
+		switch (op) {
+		case _UMP_UK_MSYNC_CLEAN:
+			outer_clean_range(start_p, end_p);
+			break;
+		case _UMP_UK_MSYNC_CLEAN_AND_INVALIDATE:
+			outer_flush_range(start_p, end_p);
+			break;
+		case _UMP_UK_MSYNC_INVALIDATE:
+			outer_inv_range(start_p, end_p);
+			break;
+		default:
+			break;
+		}
+
+		if (0 == size) {
+			/* Nothing left to flush. */
+			break;
+		}
+	}
+
+	return;
+}
diff --git a/ump/linux/ump_osk_misc.c b/ump/linux/ump_osk_misc.c
new file mode 100755
index 0000000..ce0ab06
--- /dev/null
+++ b/ump/linux/ump_osk_misc.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_osk_misc.c
+ * Implementation of the OS abstraction layer for the UMP kernel device driver
+ */
+
+
+#include "ump_osk.h"
+
+#include <linux/kernel.h>
+#include "ump_kernel_linux.h"
+
+/* is called from ump_kernel_constructor in common code */
+_mali_osk_errcode_t _ump_osk_init(void)
+{
+	if (0 != ump_kernel_device_initialize()) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _ump_osk_term(void)
+{
+	ump_kernel_device_terminate();
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/ump/linux/ump_ukk_ref_wrappers.c b/ump/linux/ump_ukk_ref_wrappers.c
new file mode 100755
index 0000000..1ee1955
--- /dev/null
+++ b/ump/linux/ump_ukk_ref_wrappers.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_ukk_wrappers.c
+ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls for the reference implementation
+ */
+
+
+#include <asm/uaccess.h>             /* user space access */
+
+#include "ump_osk.h"
+#include "ump_uk_types.h"
+#include "ump_ukk.h"
+#include "ump_kernel_common.h"
+#include <linux/scatterlist.h>
+#include "ump_kernel_interface_ref_drv.h"
+#include "mali_osk_list.h"
+
+extern struct device *ump_global_mdev;
+
+/*
+ * IOCTL operation; Allocate UMP memory
+ */
+int ump_allocate_wrapper(u32 __user *argument, struct ump_session_data   *session_data)
+{
+	_ump_uk_allocate_s user_interaction;
+	_mali_osk_errcode_t err;
+
+	/* Sanity check input parameters */
+	if (NULL == argument || NULL == session_data) {
+		MSG_ERR(("NULL parameter in ump_ioctl_allocate()\n"));
+		return -ENOTTY;
+	}
+
+	/* Copy the user space memory to kernel space (so we safely can read it) */
+	if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) {
+		MSG_ERR(("copy_from_user() in ump_ioctl_allocate()\n"));
+		return -EFAULT;
+	}
+
+	user_interaction.ctx = (void *) session_data;
+
+	err = _ump_ukk_allocate(&user_interaction);
+	if (_MALI_OSK_ERR_OK != err) {
+		DBG_MSG(1, ("_ump_ukk_allocate() failed in ump_ioctl_allocate()\n"));
+		return ump_map_errcode(err);
+	}
+	user_interaction.ctx = NULL;
+
+	if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) {
+		/* If the copy fails then we should release the memory. We can use the IOCTL release to accomplish this */
+		_ump_uk_release_s release_args;
+
+		MSG_ERR(("copy_to_user() failed in ump_ioctl_allocate()\n"));
+
+		release_args.ctx = (void *) session_data;
+		release_args.secure_id = user_interaction.secure_id;
+
+		err = _ump_ukk_release(&release_args);
+		if (_MALI_OSK_ERR_OK != err) {
+			MSG_ERR(("_ump_ukk_release() also failed when trying to release newly allocated memory in ump_ioctl_allocate()\n"));
+		}
+
+		return -EFAULT;
+	}
+
+	return 0; /* success */
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static ump_dd_handle get_ump_handle_from_dmabuf(struct ump_session_data *session_data,
+		struct dma_buf *dmabuf)
+{
+	ump_session_memory_list_element *session_mem, *tmp;
+	struct dma_buf_attachment *attach;
+	ump_dd_handle ump_handle;
+
+	DEBUG_ASSERT_POINTER(session_data);
+
+	_mali_osk_mutex_wait(session_data->lock);
+
+	_MALI_OSK_LIST_FOREACHENTRY(session_mem, tmp,
+				    &session_data->list_head_session_memory_list,
+				    ump_session_memory_list_element, list) {
+		if (session_mem->mem->import_attach) {
+			attach = session_mem->mem->import_attach;
+			if (attach->dmabuf == dmabuf) {
+				_mali_osk_mutex_signal(session_data->lock);
+				ump_handle = (ump_dd_handle)session_mem->mem;
+				ump_random_mapping_get(device.secure_id_map, ump_dd_secure_id_get(ump_handle));
+				return ump_handle;
+			}
+		}
+	}
+
+	_mali_osk_mutex_signal(session_data->lock);
+
+	return NULL;
+}
+
+int ump_dmabuf_import_wrapper(u32 __user *argument,
+			      struct ump_session_data  *session_data)
+{
+	ump_session_memory_list_element *session = NULL;
+	_ump_uk_dmabuf_s ump_dmabuf;
+	ump_dd_handle ump_handle;
+	ump_dd_physical_block *blocks = NULL;
+	struct dma_buf_attachment *attach = NULL;
+	struct dma_buf *dma_buf;
+	struct sg_table *sgt = NULL;
+	struct scatterlist *sgl;
+	unsigned int i = 0;
+	int ret = 0;
+
+	/* Sanity check input parameters */
+	if (!argument || !session_data) {
+		MSG_ERR(("NULL parameter.\n"));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&ump_dmabuf, argument,
+			   sizeof(_ump_uk_dmabuf_s))) {
+		MSG_ERR(("copy_from_user() failed.\n"));
+		return -EFAULT;
+	}
+
+	dma_buf = dma_buf_get(ump_dmabuf.fd);
+	if (IS_ERR(dma_buf))
+		return PTR_ERR(dma_buf);
+
+	/*
+	 * if already imported then increase a refcount to the ump descriptor
+	 * and call dma_buf_put() and then go to found to return previous
+	 * ump secure id.
+	 */
+	ump_handle = get_ump_handle_from_dmabuf(session_data, dma_buf);
+	if (ump_handle) {
+		dma_buf_put(dma_buf);
+		goto found;
+	}
+
+	attach = dma_buf_attach(dma_buf, ump_global_mdev);
+	if (IS_ERR(attach)) {
+		ret = PTR_ERR(attach);
+		goto err_dma_buf_put;
+	}
+
+	sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto err_dma_buf_detach;
+	}
+
+	blocks = (ump_dd_physical_block *)_mali_osk_malloc(sizeof(ump_dd_physical_block) * sgt->nents);
+	if (!blocks) {
+		DBG_MSG(1, ("Failed to allocate blocks.\n"));
+		ret = -EFAULT;
+		goto err_dma_buf_unmap;
+	}
+	for_each_sg(sgt->sgl, sgl, sgt->nents, i) {
+		blocks[i].addr = sg_phys(sgl);
+		blocks[i].size = sg_dma_len(sgl);
+	}
+
+	/*
+	 * Initialize the session memory list element, and add it
+	 * to the session object
+	 */
+	session = _mali_osk_calloc(1, sizeof(*session));
+	if (!session) {
+		DBG_MSG(1, ("Failed to allocate session.\n"));
+		ret = -EFAULT;
+		goto err_free_block;
+	}
+
+	ump_handle = ump_dd_handle_create_from_phys_blocks(blocks, i);
+	if (UMP_DD_HANDLE_INVALID == ump_handle) {
+		DBG_MSG(1, ("Failed to create ump handle.\n"));
+		ret = -EFAULT;
+		goto err_free_session;
+	}
+
+	session->mem = (ump_dd_mem *)ump_handle;
+	session->mem->import_attach = attach;
+	session->mem->sgt = sgt;
+
+	_mali_osk_mutex_wait(session_data->lock);
+	_mali_osk_list_add(&(session->list),
+			   &(session_data->list_head_session_memory_list));
+	_mali_osk_mutex_signal(session_data->lock);
+
+	_mali_osk_free(blocks);
+
+found:
+	ump_dmabuf.ctx = (void *)session_data;
+	ump_dmabuf.secure_id = ump_dd_secure_id_get(ump_handle);
+	ump_dmabuf.size = ump_dd_size_get(ump_handle);
+
+	if (copy_to_user(argument, &ump_dmabuf,
+			 sizeof(_ump_uk_dmabuf_s))) {
+		MSG_ERR(("copy_to_user() failed.\n"));
+		ret =  -EFAULT;
+		goto err_release_ump_handle;
+	}
+
+	return ret;
+
+err_release_ump_handle:
+	ump_dd_reference_release(ump_handle);
+err_free_session:
+	_mali_osk_free(session);
+err_free_block:
+	_mali_osk_free(blocks);
+err_dma_buf_unmap:
+	dma_buf_unmap_attachment(attach, sgt, DMA_BIDIRECTIONAL);
+err_dma_buf_detach:
+	dma_buf_detach(dma_buf, attach);
+err_dma_buf_put:
+	dma_buf_put(dma_buf);
+	return ret;
+}
+#endif
diff --git a/ump/linux/ump_ukk_ref_wrappers.h b/ump/linux/ump_ukk_ref_wrappers.h
new file mode 100755
index 0000000..b8b2cce
--- /dev/null
+++ b/ump/linux/ump_ukk_ref_wrappers.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_ukk_wrappers.h
+ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls for the reference implementation
+ */
+
+#ifndef __UMP_UKK_REF_WRAPPERS_H__
+#define __UMP_UKK_REF_WRAPPERS_H__
+
+#include <linux/kernel.h>
+#include "ump_kernel_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+int ump_allocate_wrapper(u32 __user *argument, struct ump_session_data   *session_data);
+#ifdef CONFIG_DMA_SHARED_BUFFER
+int ump_dmabuf_import_wrapper(u32 __user *argument, struct ump_session_data  *session_data);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __UMP_UKK_REF_WRAPPERS_H__ */
diff --git a/ump/linux/ump_ukk_wrappers.c b/ump/linux/ump_ukk_wrappers.c
new file mode 100755
index 0000000..2a88f50
--- /dev/null
+++ b/ump/linux/ump_ukk_wrappers.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_ukk_wrappers.c
+ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls
+ */
+
+#include <asm/uaccess.h>             /* user space access */
+
+#include "ump_osk.h"
+#include "ump_uk_types.h"
+#include "ump_ukk.h"
+#include "ump_kernel_common.h"
+
+/*
+ * IOCTL operation; Negotiate version of IOCTL API
+ */
+int ump_get_api_version_wrapper(u32 __user *argument, struct ump_session_data *session_data)
+{
+	_ump_uk_api_version_s version_info;
+	_mali_osk_errcode_t err;
+
+	/* Sanity check input parameters */
+	if (NULL == argument || NULL == session_data) {
+		MSG_ERR(("NULL parameter in ump_ioctl_get_api_version()\n"));
+		return -ENOTTY;
+	}
+
+	/* Copy the user space memory to kernel space (so we safely can read it) */
+	if (0 != copy_from_user(&version_info, argument, sizeof(version_info))) {
+		MSG_ERR(("copy_from_user() in ump_ioctl_get_api_version()\n"));
+		return -EFAULT;
+	}
+
+	version_info.ctx = (void *) session_data;
+	err = _ump_uku_get_api_version(&version_info);
+	if (_MALI_OSK_ERR_OK != err) {
+		MSG_ERR(("_ump_uku_get_api_version() failed in ump_ioctl_get_api_version()\n"));
+		return ump_map_errcode(err);
+	}
+
+	version_info.ctx = NULL;
+
+	/* Copy ouput data back to user space */
+	if (0 != copy_to_user(argument, &version_info, sizeof(version_info))) {
+		MSG_ERR(("copy_to_user() failed in ump_ioctl_get_api_version()\n"));
+		return -EFAULT;
+	}
+
+	return 0; /* success */
+}
+
+
+/*
+ * IOCTL operation; Release reference to specified UMP memory.
+ */
+int ump_release_wrapper(u32 __user *argument, struct ump_session_data   *session_data)
+{
+	_ump_uk_release_s release_args;
+	_mali_osk_errcode_t err;
+
+	/* Sanity check input parameters */
+	if (NULL == session_data) {
+		MSG_ERR(("NULL parameter in ump_ioctl_release()\n"));
+		return -ENOTTY;
+	}
+
+	/* Copy the user space memory to kernel space (so we safely can read it) */
+	if (0 != copy_from_user(&release_args, argument, sizeof(release_args))) {
+		MSG_ERR(("copy_from_user() in ump_ioctl_get_api_version()\n"));
+		return -EFAULT;
+	}
+
+	release_args.ctx = (void *) session_data;
+	err = _ump_ukk_release(&release_args);
+	if (_MALI_OSK_ERR_OK != err) {
+		MSG_ERR(("_ump_ukk_release() failed in ump_ioctl_release()\n"));
+		return ump_map_errcode(err);
+	}
+
+
+	return 0; /* success */
+}
+
+/*
+ * IOCTL operation; Return size for specified UMP memory.
+ */
+int ump_size_get_wrapper(u32 __user *argument, struct ump_session_data   *session_data)
+{
+	_ump_uk_size_get_s user_interaction;
+	_mali_osk_errcode_t err;
+
+	/* Sanity check input parameters */
+	if (NULL == argument || NULL == session_data) {
+		MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n"));
+		return -ENOTTY;
+	}
+
+	if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) {
+		MSG_ERR(("copy_from_user() in ump_ioctl_size_get()\n"));
+		return -EFAULT;
+	}
+
+	user_interaction.ctx = (void *) session_data;
+	err = _ump_ukk_size_get(&user_interaction);
+	if (_MALI_OSK_ERR_OK != err) {
+		MSG_ERR(("_ump_ukk_size_get() failed in ump_ioctl_size_get()\n"));
+		return ump_map_errcode(err);
+	}
+
+	user_interaction.ctx = NULL;
+
+	if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) {
+		MSG_ERR(("copy_to_user() failed in ump_ioctl_size_get()\n"));
+		return -EFAULT;
+	}
+
+	return 0; /* success */
+}
+
+/*
+ * IOCTL operation; Do cache maintenance on specified UMP memory.
+ */
+int ump_msync_wrapper(u32 __user *argument, struct ump_session_data   *session_data)
+{
+	_ump_uk_msync_s user_interaction;
+
+	/* Sanity check input parameters */
+	if (NULL == argument || NULL == session_data) {
+		MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n"));
+		return -ENOTTY;
+	}
+
+	if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) {
+		MSG_ERR(("copy_from_user() in ump_ioctl_msync()\n"));
+		return -EFAULT;
+	}
+
+	user_interaction.ctx = (void *) session_data;
+
+	_ump_ukk_msync(&user_interaction);
+
+	user_interaction.ctx = NULL;
+
+	if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) {
+		MSG_ERR(("copy_to_user() failed in ump_ioctl_msync()\n"));
+		return -EFAULT;
+	}
+
+	return 0; /* success */
+}
+int ump_cache_operations_control_wrapper(u32 __user *argument, struct ump_session_data   *session_data)
+{
+	_ump_uk_cache_operations_control_s user_interaction;
+
+	/* Sanity check input parameters */
+	if (NULL == argument || NULL == session_data) {
+		MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n"));
+		return -ENOTTY;
+	}
+
+	if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) {
+		MSG_ERR(("copy_from_user() in ump_ioctl_cache_operations_control()\n"));
+		return -EFAULT;
+	}
+
+	user_interaction.ctx = (void *) session_data;
+
+	_ump_ukk_cache_operations_control((_ump_uk_cache_operations_control_s *) &user_interaction);
+
+	user_interaction.ctx = NULL;
+
+#if 0  /* No data to copy back */
+	if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) {
+		MSG_ERR(("copy_to_user() failed in ump_ioctl_cache_operations_control()\n"));
+		return -EFAULT;
+	}
+#endif
+	return 0; /* success */
+}
+
+int ump_switch_hw_usage_wrapper(u32 __user *argument, struct ump_session_data   *session_data)
+{
+	_ump_uk_switch_hw_usage_s user_interaction;
+
+	/* Sanity check input parameters */
+	if (NULL == argument || NULL == session_data) {
+		MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n"));
+		return -ENOTTY;
+	}
+
+	if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) {
+		MSG_ERR(("copy_from_user() in ump_ioctl_switch_hw_usage()\n"));
+		return -EFAULT;
+	}
+
+	user_interaction.ctx = (void *) session_data;
+
+	_ump_ukk_switch_hw_usage(&user_interaction);
+
+	user_interaction.ctx = NULL;
+
+#if 0  /* No data to copy back */
+	if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) {
+		MSG_ERR(("copy_to_user() failed in ump_ioctl_switch_hw_usage()\n"));
+		return -EFAULT;
+	}
+#endif
+	return 0; /* success */
+}
+
+int ump_lock_wrapper(u32 __user *argument, struct ump_session_data   *session_data)
+{
+	_ump_uk_lock_s user_interaction;
+
+	/* Sanity check input parameters */
+	if (NULL == argument || NULL == session_data) {
+		MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n"));
+		return -ENOTTY;
+	}
+
+	if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) {
+		MSG_ERR(("copy_from_user() in ump_ioctl_switch_hw_usage()\n"));
+		return -EFAULT;
+	}
+
+	user_interaction.ctx = (void *) session_data;
+
+	_ump_ukk_lock(&user_interaction);
+
+	user_interaction.ctx = NULL;
+
+#if 0  /* No data to copy back */
+	if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) {
+		MSG_ERR(("copy_to_user() failed in ump_ioctl_switch_hw_usage()\n"));
+		return -EFAULT;
+	}
+#endif
+
+	return 0; /* success */
+}
+
+int ump_unlock_wrapper(u32 __user *argument, struct ump_session_data   *session_data)
+{
+	_ump_uk_unlock_s user_interaction;
+
+	/* Sanity check input parameters */
+	if (NULL == argument || NULL == session_data) {
+		MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n"));
+		return -ENOTTY;
+	}
+
+	if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) {
+		MSG_ERR(("copy_from_user() in ump_ioctl_switch_hw_usage()\n"));
+		return -EFAULT;
+	}
+
+	user_interaction.ctx = (void *) session_data;
+
+	_ump_ukk_unlock(&user_interaction);
+
+	user_interaction.ctx = NULL;
+
+#if 0  /* No data to copy back */
+	if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) {
+		MSG_ERR(("copy_to_user() failed in ump_ioctl_switch_hw_usage()\n"));
+		return -EFAULT;
+	}
+#endif
+
+	return 0; /* success */
+}
diff --git a/ump/linux/ump_ukk_wrappers.h b/ump/linux/ump_ukk_wrappers.h
new file mode 100755
index 0000000..1adad5b
--- /dev/null
+++ b/ump/linux/ump_ukk_wrappers.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file ump_ukk_wrappers.h
+ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls
+ */
+
+#ifndef __UMP_UKK_WRAPPERS_H__
+#define __UMP_UKK_WRAPPERS_H__
+
+#include <linux/kernel.h>
+#include "ump_kernel_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+
+int ump_get_api_version_wrapper(u32 __user *argument, struct ump_session_data *session_data);
+int ump_release_wrapper(u32 __user *argument, struct ump_session_data   *session_data);
+int ump_size_get_wrapper(u32 __user *argument, struct ump_session_data   *session_data);
+int ump_msync_wrapper(u32 __user *argument, struct ump_session_data   *session_data);
+int ump_cache_operations_control_wrapper(u32 __user *argument, struct ump_session_data   *session_data);
+int ump_switch_hw_usage_wrapper(u32 __user *argument, struct ump_session_data   *session_data);
+int ump_lock_wrapper(u32 __user *argument, struct ump_session_data   *session_data);
+int ump_unlock_wrapper(u32 __user *argument, struct ump_session_data   *session_data);
+
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* __UMP_UKK_WRAPPERS_H__ */
diff --git a/ump/readme.txt b/ump/readme.txt
new file mode 100755
index 0000000..c238cf0
--- /dev/null
+++ b/ump/readme.txt
@@ -0,0 +1,28 @@
+Building the UMP Device Driver for Linux
+----------------------------------------
+
+Build the UMP Device Driver for Linux by running the following make command:
+
+KDIR=<kdir_path> CONFIG=<your_config> BUILD=<build_option> make
+
+where
+    kdir_path: Path to your Linux Kernel directory
+    your_config: Name of the sub-folder to find the required config.h file
+                 ("arch-" will be prepended)
+    build_option: debug or release. Debug is default.
+
+The config.h contains following configuration parameters:
+
+ARCH_UMP_BACKEND_DEFAULT
+    0 specifies the dedicated memory allocator.
+    1 specifies the OS memory allocator.
+ARCH_UMP_MEMORY_ADDRESS_DEFAULT
+    This is only required for the dedicated memory allocator, and specifies
+    the physical start address of the memory block reserved for UMP.
+ARCH_UMP_MEMORY_SIZE_DEFAULT
+    This specified the size of the memory block reserved for UMP, or the
+    maximum limit for allocations from the OS.
+
+The result will be a ump.ko file, which can be loaded into the Linux kernel
+by using the insmod command. The driver can also be built as a part of the
+kernel itself.
diff --git a/umplock/Makefile b/umplock/Makefile
new file mode 100755
index 0000000..d3dc0e3
--- /dev/null
+++ b/umplock/Makefile
@@ -0,0 +1,69 @@
+#
+# Copyright (C) 2012, 2016 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD)
+
+SRC =	umplock_driver.c
+
+MODULE:=umplock.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+
+$(MODULE:.ko=-objs) := $(SRC:.c=.o) 
+
+else
+# Outside the kernel build system
+#
+#
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+ifeq ($(ARCH), arm)
+	# when compiling for ARM we're cross compiling
+	export CROSS_COMPILE ?= arm-none-linux-gnueabi-
+	CONFIG ?= arm
+else
+	# Compiling for the host
+	CONFIG ?= $(shell uname -m)
+endif
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
diff --git a/umplock/umplock_driver.c b/umplock/umplock_driver.c
new file mode 100755
index 0000000..ccd96d3
--- /dev/null
+++ b/umplock/umplock_driver.c
@@ -0,0 +1,618 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <asm/uaccess.h>
+#include "umplock_ioctl.h"
+#include <linux/sched.h>
+
+#define MAX_ITEMS 1024
+#define MAX_PIDS 128
+
+typedef struct lock_cmd_priv {
+	uint32_t msg[128];    /*ioctl args*/
+	u32 pid;              /*process id*/
+} _lock_cmd_priv;
+
+typedef struct lock_ref {
+	int ref_count;
+	u32 pid;
+	u32 down_count;
+} _lock_ref;
+
+typedef struct umplock_item {
+	u32 secure_id;
+	u32 id_ref_count;
+	u32 owner;
+	_lock_access_usage usage;
+	_lock_ref references[MAX_PIDS];
+	struct semaphore item_lock;
+} umplock_item;
+
+typedef struct umplock_device_private {
+	struct mutex item_list_lock;
+	atomic_t sessions;
+	umplock_item items[MAX_ITEMS];
+	u32 pids[MAX_PIDS];
+} umplock_device_private;
+
+struct umplock_device {
+	struct cdev cdev;
+	struct class *umplock_class;
+};
+
+static struct umplock_device umplock_device;
+static umplock_device_private device;
+static dev_t umplock_dev;
+static char umplock_dev_name[] = "umplock";
+
+int umplock_debug_level = 0;
+module_param(umplock_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(umplock_debug_level, "set umplock_debug_level to print debug messages");
+
+#define PDEBUG(level, fmt, args...) do { if ((level) <= umplock_debug_level) printk(KERN_DEBUG "umplock: " fmt, ##args); } while (0)
+#define PERROR(fmt, args...) do { printk(KERN_ERR "umplock: " fmt, ##args); } while (0)
+
+int umplock_find_item(u32 secure_id)
+{
+	int i;
+	for (i = 0; i < MAX_ITEMS; i++) {
+		if (device.items[i].secure_id == secure_id) {
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+static int umplock_find_item_by_pid(_lock_cmd_priv *lock_cmd, int *item_slot, int *ref_slot)
+{
+	_lock_item_s *lock_item;
+	int i, j;
+
+	lock_item = (_lock_item_s *)&lock_cmd->msg;
+
+	i = umplock_find_item(lock_item->secure_id);
+
+	if (i < 0) {
+		return -1;
+	}
+
+	for (j = 0; j < MAX_PIDS; j++) {
+		if (device.items[i].references[j].pid == lock_cmd->pid) {
+			*item_slot = i;
+			*ref_slot = j;
+			return 0;
+		}
+	}
+	return -1 ;
+}
+
+static int umplock_find_client_valid(u32 pid)
+{
+	int i;
+
+	if (pid == 0) {
+		return -1;
+	}
+
+	for (i = 0; i < MAX_PIDS; i++) {
+		if (device.pids[i] == pid) {
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+static int do_umplock_create_locked(_lock_cmd_priv *lock_cmd)
+{
+	int i_index, ref_index;
+	int ret;
+	_lock_item_s *lock_item = (_lock_item_s *)&lock_cmd->msg;
+
+	i_index = ref_index = -1;
+
+	ret = umplock_find_client_valid(lock_cmd->pid);
+	if (ret < 0) {
+		/*lock request from an invalid client pid, do nothing*/
+		return -EINVAL;
+	}
+
+	ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index);
+	if (ret >= 0) {
+	} else if ((i_index = umplock_find_item(lock_item->secure_id)) >= 0) {
+		for (ref_index = 0; ref_index < MAX_PIDS; ref_index++) {
+			if (device.items[i_index].references[ref_index].pid == 0) {
+				break;
+			}
+		}
+		if (ref_index < MAX_PIDS) {
+			device.items[i_index].references[ref_index].pid = lock_cmd->pid;
+			device.items[i_index].references[ref_index].ref_count = 0;
+			device.items[i_index].references[ref_index].down_count = 0;
+		} else {
+			PERROR("whoops, item ran out of available reference slots\n");
+			return -EINVAL;
+
+		}
+	} else {
+		i_index = umplock_find_item(0);
+
+		if (i_index >= 0) {
+			device.items[i_index].secure_id = lock_item->secure_id;
+			device.items[i_index].id_ref_count = 0;
+			device.items[i_index].usage = lock_item->usage;
+			device.items[i_index].references[0].pid = lock_cmd->pid;
+			device.items[i_index].references[0].ref_count = 0;
+			device.items[i_index].references[0].down_count = 0;
+			sema_init(&device.items[i_index].item_lock, 1);
+		} else {
+			PERROR("whoops, ran out of available slots\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+/** IOCTLs **/
+
+static int do_umplock_create(_lock_cmd_priv *lock_cmd)
+{
+	return 0;
+}
+
+static int do_umplock_process(_lock_cmd_priv *lock_cmd)
+{
+	int ret, i_index, ref_index;
+	_lock_item_s *lock_item = (_lock_item_s *)&lock_cmd->msg;
+
+	mutex_lock(&device.item_list_lock);
+
+	if (0 == lock_item->secure_id) {
+		PERROR("IOCTL_UMPLOCK_PROCESS called with secure_id is 0, pid: %d\n", lock_cmd->pid);
+		mutex_unlock(&device.item_list_lock);
+		return -EINVAL;
+	}
+
+	ret = do_umplock_create_locked(lock_cmd);
+	if (ret < 0) {
+		mutex_unlock(&device.item_list_lock);
+		return -EINVAL;
+	}
+
+	ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index);
+	if (ret < 0) {
+		/*fail to find a item*/
+		PERROR("IOCTL_UMPLOCK_PROCESS called with invalid parameter, pid: %d\n", lock_cmd->pid);
+		mutex_unlock(&device.item_list_lock);
+		return -EINVAL;
+	}
+	device.items[i_index].references[ref_index].ref_count++;
+	device.items[i_index].id_ref_count++;
+	PDEBUG(1, "try to lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count);
+
+	if (lock_cmd->pid == device.items[i_index].owner) {
+		PDEBUG(1, "already own the lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count);
+		mutex_unlock(&device.item_list_lock);
+		return 0;
+	}
+
+	device.items[i_index].references[ref_index].down_count++;
+	mutex_unlock(&device.item_list_lock);
+	if (down_interruptible(&device.items[i_index].item_lock)) {
+		/*wait up without hold the umplock. restore previous state and return*/
+		mutex_lock(&device.item_list_lock);
+		device.items[i_index].references[ref_index].ref_count--;
+		device.items[i_index].id_ref_count--;
+		device.items[i_index].references[ref_index].down_count--;
+		if (0 == device.items[i_index].references[ref_index].ref_count) {
+			device.items[i_index].references[ref_index].pid = 0;
+			if (0 == device.items[i_index].id_ref_count) {
+				PDEBUG(1, "release item, pid: %d, secure_id: 0x%x\n", lock_cmd->pid, lock_item->secure_id);
+				device.items[i_index].secure_id = 0;
+			}
+		}
+
+		PERROR("failed lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count);
+
+		mutex_unlock(&device.item_list_lock);
+		return -ERESTARTSYS;
+	}
+
+	mutex_lock(&device.item_list_lock);
+	PDEBUG(1, "got lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count);
+	device.items[i_index].owner = lock_cmd->pid;
+	mutex_unlock(&device.item_list_lock);
+
+	return 0;
+}
+
+static int do_umplock_release(_lock_cmd_priv *lock_cmd)
+{
+	int ret, i_index, ref_index, call_up;
+	_lock_item_s *lock_item = (_lock_item_s *)&lock_cmd->msg;
+
+	mutex_lock(&device.item_list_lock);
+
+	if (0 == lock_item->secure_id) {
+		PERROR("IOCTL_UMPLOCK_RELEASE called with secure_id is 0, pid: %d\n", lock_cmd->pid);
+		mutex_unlock(&device.item_list_lock);
+		return -EINVAL;
+	}
+
+	ret = umplock_find_client_valid(lock_cmd->pid);
+	if (ret < 0) {
+		/*lock request from an invalid client pid, do nothing*/
+		mutex_unlock(&device.item_list_lock);
+		return -EPERM;
+	}
+
+	i_index = ref_index = -1;
+
+	ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index);
+	if (ret < 0) {
+		/*fail to find item*/
+		PERROR("IOCTL_UMPLOCK_RELEASE called with invalid parameter pid: %d, secid: 0x%x\n", lock_cmd->pid, lock_item->secure_id);
+		mutex_unlock(&device.item_list_lock);
+		return -EINVAL;
+	}
+
+	/* if the lock is not owned by this process */
+	if (lock_cmd->pid != device.items[i_index].owner) {
+		mutex_unlock(&device.item_list_lock);
+		return -EPERM;
+	}
+
+	/* if the ref_count is 0, that means nothing to unlock, just return */
+	if (0 == device.items[i_index].references[ref_index].ref_count) {
+		mutex_unlock(&device.item_list_lock);
+		return 0;
+	}
+
+	device.items[i_index].references[ref_index].ref_count--;
+	device.items[i_index].id_ref_count--;
+	PDEBUG(1, "unlock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count);
+
+	call_up = 0;
+	if (device.items[i_index].references[ref_index].down_count > 1) {
+		call_up = 1;
+		device.items[i_index].references[ref_index].down_count--;
+	}
+	if (0 == device.items[i_index].references[ref_index].ref_count) {
+		device.items[i_index].references[ref_index].pid = 0;
+		if (0 == device.items[i_index].id_ref_count) {
+			PDEBUG(1, "release item, pid: %d, secure_id: 0x%x\n", lock_cmd->pid, lock_item->secure_id);
+			device.items[i_index].secure_id = 0;
+		}
+		device.items[i_index].owner = 0;
+		call_up = 1;
+	}
+	if (call_up) {
+		PDEBUG(1, "call up, pid: %d, secure_id: 0x%x\n", lock_cmd->pid, lock_item->secure_id);
+		up(&device.items[i_index].item_lock);
+	}
+	mutex_unlock(&device.item_list_lock);
+
+	return 0;
+}
+
+static int do_umplock_zap(void)
+{
+	int i;
+
+	PDEBUG(1, "ZAP ALL ENTRIES!\n");
+
+	mutex_lock(&device.item_list_lock);
+
+	for (i = 0; i < MAX_ITEMS; i++) {
+		device.items[i].secure_id = 0;
+		memset(&device.items[i].references, 0, sizeof(_lock_ref) * MAX_PIDS);
+		sema_init(&device.items[i].item_lock, 1);
+	}
+
+	for (i = 0; i < MAX_PIDS; i++) {
+		device.pids[i] = 0;
+	}
+	mutex_unlock(&device.item_list_lock);
+
+	return 0;
+}
+
+static int do_umplock_dump(void)
+{
+	int i, j;
+
+	mutex_lock(&device.item_list_lock);
+	PERROR("dump all the items begin\n");
+	for (i = 0; i < MAX_ITEMS; i++) {
+		for (j = 0; j < MAX_PIDS; j++) {
+			if (device.items[i].secure_id != 0 && device.items[i].references[j].pid != 0) {
+				PERROR("item[%d]->secure_id=0x%x, owner=%d\t reference[%d].ref_count=%d.pid=%d\n",
+				       i,
+				       device.items[i].secure_id,
+				       device.items[i].owner,
+				       j,
+				       device.items[i].references[j].ref_count,
+				       device.items[i].references[j].pid);
+			}
+		}
+	}
+	PERROR("dump all the items end\n");
+	mutex_unlock(&device.item_list_lock);
+
+	return 0;
+}
+
+int do_umplock_client_add(_lock_cmd_priv *lock_cmd)
+{
+	int i;
+	mutex_lock(&device.item_list_lock);
+	for (i = 0; i < MAX_PIDS; i++) {
+		if (device.pids[i] == lock_cmd->pid) {
+			mutex_unlock(&device.item_list_lock);
+			return 0;
+		}
+	}
+	for (i = 0; i < MAX_PIDS; i++) {
+		if (device.pids[i] == 0) {
+			device.pids[i] = lock_cmd->pid;
+			break;
+		}
+	}
+	mutex_unlock(&device.item_list_lock);
+	if (i == MAX_PIDS) {
+		PERROR("Oops, Run out of client slots\n ");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int do_umplock_client_delete(_lock_cmd_priv *lock_cmd)
+{
+	int p_index = -1, i_index = -1, ref_index = -1;
+	int ret;
+	_lock_item_s *lock_item;
+	lock_item = (_lock_item_s *)&lock_cmd->msg;
+
+	mutex_lock(&device.item_list_lock);
+	p_index = umplock_find_client_valid(lock_cmd->pid);
+	/*lock item pid is not valid.*/
+	if (p_index < 0) {
+		mutex_unlock(&device.item_list_lock);
+		return 0;
+	}
+
+	/*walk through umplock item list and release reference attached to this client*/
+	for (i_index = 0; i_index < MAX_ITEMS; i_index++) {
+		lock_item->secure_id = device.items[i_index].secure_id;
+
+		/*find the item index and reference slot for the lock_item*/
+		ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index);
+
+		if (ret < 0) {
+			/*client has no reference on this umplock item, skip*/
+			continue;
+		}
+		while (device.items[i_index].references[ref_index].ref_count) {
+			/*release references on this client*/
+
+			PDEBUG(1, "delete client, pid: %d, ref_count: %d\n", lock_cmd->pid, device.items[i_index].references[ref_index].ref_count);
+
+			mutex_unlock(&device.item_list_lock);
+			do_umplock_release(lock_cmd);
+			mutex_lock(&device.item_list_lock);
+		}
+	}
+
+	/*remove the pid from umplock valid pid list*/
+	device.pids[p_index] = 0;
+	mutex_unlock(&device.item_list_lock);
+
+	return 0;
+}
+
+static long umplock_driver_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	int ret;
+	uint32_t size = _IOC_SIZE(cmd);
+	_lock_cmd_priv lock_cmd ;
+
+	if (_IOC_TYPE(cmd) != LOCK_IOCTL_GROUP) {
+		return -ENOTTY;
+	}
+
+	if (_IOC_NR(cmd) >= LOCK_IOCTL_MAX_CMDS) {
+		return -ENOTTY;
+	}
+
+	switch (cmd) {
+	case LOCK_IOCTL_CREATE:
+		if (size != sizeof(_lock_item_s)) {
+			return -ENOTTY;
+		}
+
+		if (copy_from_user(&lock_cmd.msg, (void __user *)arg, size)) {
+			return -EFAULT;
+		}
+		lock_cmd.pid = (u32)current->tgid;
+		ret = do_umplock_create(&lock_cmd);
+		if (ret) {
+			return ret;
+		}
+		return 0;
+
+	case LOCK_IOCTL_PROCESS:
+		if (size != sizeof(_lock_item_s)) {
+			return -ENOTTY;
+		}
+
+		if (copy_from_user(&lock_cmd.msg, (void __user *)arg, size)) {
+			return -EFAULT;
+		}
+		lock_cmd.pid = (u32)current->tgid;
+		return do_umplock_process(&lock_cmd);
+
+	case LOCK_IOCTL_RELEASE:
+		if (size != sizeof(_lock_item_s)) {
+			return -ENOTTY;
+		}
+
+		if (copy_from_user(&lock_cmd.msg, (void __user *)arg, size)) {
+			return -EFAULT;
+		}
+		lock_cmd.pid = (u32)current->tgid;
+		ret = do_umplock_release(&lock_cmd);
+		if (ret) {
+			return ret;
+		}
+		return 0;
+
+	case LOCK_IOCTL_ZAP:
+		do_umplock_zap();
+		return 0;
+
+	case LOCK_IOCTL_DUMP:
+		do_umplock_dump();
+		return 0;
+	}
+
+	return -ENOIOCTLCMD;
+}
+
+static int umplock_driver_open(struct inode *inode, struct file *filp)
+{
+	_lock_cmd_priv lock_cmd;
+
+	atomic_inc(&device.sessions);
+	PDEBUG(1, "OPEN SESSION (%i references)\n", atomic_read(&device.sessions));
+
+	lock_cmd.pid = (u32)current->tgid;
+	do_umplock_client_add(&lock_cmd);
+
+	return 0;
+}
+
+static int umplock_driver_release(struct inode *inode, struct file *filp)
+{
+	int sessions = 0;
+	_lock_cmd_priv lock_cmd;
+
+	lock_cmd.pid = (u32)current->tgid;
+	do_umplock_client_delete(&lock_cmd);
+
+	mutex_lock(&device.item_list_lock);
+	atomic_dec(&device.sessions);
+	sessions = atomic_read(&device.sessions);
+	PDEBUG(1, "CLOSE SESSION (%i references)\n", sessions);
+	mutex_unlock(&device.item_list_lock);
+	if (sessions == 0) {
+		do_umplock_zap();
+	}
+
+	return 0;
+}
+
+static struct file_operations umplock_fops = {
+	.owner   = THIS_MODULE,
+	.open    = umplock_driver_open,
+	.release = umplock_driver_release,
+	.unlocked_ioctl = umplock_driver_ioctl,
+};
+
+int umplock_device_initialize(void)
+{
+	int err;
+
+	err = alloc_chrdev_region(&umplock_dev, 0, 1, umplock_dev_name);
+
+	if (0 == err) {
+		memset(&umplock_device, 0, sizeof(umplock_device));
+		cdev_init(&umplock_device.cdev, &umplock_fops);
+		umplock_device.cdev.owner = THIS_MODULE;
+		umplock_device.cdev.ops = &umplock_fops;
+
+		err = cdev_add(&umplock_device.cdev, umplock_dev, 1);
+		if (0 == err) {
+			umplock_device.umplock_class = class_create(THIS_MODULE, umplock_dev_name);
+			if (IS_ERR(umplock_device.umplock_class)) {
+				err = PTR_ERR(umplock_device.umplock_class);
+			} else {
+				struct device *mdev;
+				mdev = device_create(umplock_device.umplock_class, NULL, umplock_dev, NULL, umplock_dev_name);
+				if (!IS_ERR(mdev)) {
+					return 0; /* all ok */
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(umplock_device.umplock_class);
+			}
+			cdev_del(&umplock_device.cdev);
+		}
+
+		unregister_chrdev_region(umplock_dev, 1);
+	} else {
+		PERROR("alloc chardev region failed\n");
+	}
+
+	return err;
+}
+
+void umplock_device_terminate(void)
+{
+	device_destroy(umplock_device.umplock_class, umplock_dev);
+	class_destroy(umplock_device.umplock_class);
+
+	cdev_del(&umplock_device.cdev);
+	unregister_chrdev_region(umplock_dev, 1);
+}
+
+static int __init umplock_initialize_module(void)
+{
+	PDEBUG(1, "Inserting UMP lock device driver. Compiled: %s, time: %s\n", __DATE__, __TIME__);
+
+	mutex_init(&device.item_list_lock);
+	if (umplock_device_initialize() != 0) {
+		PERROR("UMP lock device driver init failed\n");
+		return -ENOTTY;
+	}
+	memset(&device.items, 0, sizeof(umplock_item) * MAX_ITEMS);
+	memset(&device.pids, 0, sizeof(u32) * MAX_PIDS);
+	atomic_set(&device.sessions, 0);
+
+	PDEBUG(1, "UMP lock device driver loaded\n");
+
+	return 0;
+}
+
+static void __exit umplock_cleanup_module(void)
+{
+	PDEBUG(1, "unloading UMP lock module\n");
+
+	memset(&device.items, 0, sizeof(umplock_item) * MAX_ITEMS);
+	memset(&device.pids, 0, sizeof(u32) * MAX_PIDS);
+	umplock_device_terminate();
+	mutex_destroy(&device.item_list_lock);
+
+	PDEBUG(1, "UMP lock module unloaded\n");
+}
+
+module_init(umplock_initialize_module);
+module_exit(umplock_cleanup_module);
+
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_DESCRIPTION("ARM UMP locker");
diff --git a/umplock/umplock_ioctl.h b/umplock/umplock_ioctl.h
new file mode 100755
index 0000000..32462f3
--- /dev/null
+++ b/umplock/umplock_ioctl.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2012-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __UMPLOCK_IOCTL_H__
+#define __UMPLOCK_IOCTL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#ifndef __user
+#define __user
+#endif
+
+
+/**
+ * @file umplock_ioctl.h
+ * This file describes the interface needed to use the Linux device driver.
+ * The interface is used by the userpace Mali DDK.
+ */
+
+typedef enum {
+	_LOCK_ACCESS_RENDERABLE = 1,
+	_LOCK_ACCESS_TEXTURE,
+	_LOCK_ACCESS_CPU_WRITE,
+	_LOCK_ACCESS_CPU_READ,
+} _lock_access_usage;
+
+typedef struct _lock_item_s {
+	unsigned int secure_id;
+	_lock_access_usage usage;
+} _lock_item_s;
+
+
+#define LOCK_IOCTL_GROUP 0x91
+
+#define _LOCK_IOCTL_CREATE_CMD  0   /* create kernel lock item        */
+#define _LOCK_IOCTL_PROCESS_CMD 1   /* process kernel lock item       */
+#define _LOCK_IOCTL_RELEASE_CMD 2   /* release kernel lock item       */
+#define _LOCK_IOCTL_ZAP_CMD     3   /* clean up all kernel lock items */
+#define _LOCK_IOCTL_DUMP_CMD    4   /* dump all the items */
+
+#define LOCK_IOCTL_MAX_CMDS     5
+
+#define LOCK_IOCTL_CREATE  _IOW( LOCK_IOCTL_GROUP, _LOCK_IOCTL_CREATE_CMD,  _lock_item_s )
+#define LOCK_IOCTL_PROCESS _IOW( LOCK_IOCTL_GROUP, _LOCK_IOCTL_PROCESS_CMD, _lock_item_s )
+#define LOCK_IOCTL_RELEASE _IOW( LOCK_IOCTL_GROUP, _LOCK_IOCTL_RELEASE_CMD, _lock_item_s )
+#define LOCK_IOCTL_ZAP     _IO ( LOCK_IOCTL_GROUP, _LOCK_IOCTL_ZAP_CMD )
+#define LOCK_IOCTL_DUMP    _IO ( LOCK_IOCTL_GROUP, _LOCK_IOCTL_DUMP_CMD )
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __UMPLOCK_IOCTL_H__ */
+
diff --git a/utgard/Makefile b/utgard/Makefile
new file mode 100644
index 0000000..e38ce50
--- /dev/null
+++ b/utgard/Makefile
@@ -0,0 +1,40 @@
+#$(warning src is $(src))
+#$(warning MAKEFILE_LIST is $(MAKEFILE_LIST))
+
+GPU_DRV_VERSION?=r7p0
+
+KDIR?=/home/jiyu.yang/source/br-wayland/output/meson8b_m201/build/linux/
+ARCH?=arm
+CROSS_COMPILE?=arm-linux-gnueabihf-
+MALI400?=m
+EXTRA_CFLAGS+="-DCONFIG_MALI400=m"
+UTGARD_MAKEFILE:=$(shell pwd)/$(lastword $(MAKEFILE_LIST))
+UTGARD_DIR=$(dir $(UTGARD_MAKEFILE))
+
+.ONESHELL:
+gpu:
+	echo "utgard makefile dir is $(UTGARD_MAKEFILE)"; \
+	echo "utgard dir is $(UTGARD_DIR)"; \
+	echo "gpu build"; \
+	echo "MAKE is $(MAKE)"; \
+	echo "MAKE1 is $(MAKE1)"; \
+	if [ ! -d $(GPU_DRV_VERSION) ]; then \
+		ln -s $(UTGARD_DIR)../mali $(GPU_DRV_VERSION); \
+	fi ;\
+	cd $(GPU_DRV_VERSION); \
+	if [ ! -d platform ]; then \
+		ln -s $(UTGARD_DIR)platform platform; \
+	fi ;\
+	$(MAKE) -C $(KDIR) M=`pwd` CONFIG_MALI400=$(MALI400) modules
+
+clean:
+	echo "clean this module"
+	cd $(UTGARD_DIR)
+	-cd $(GPU_DRV_VERSION)
+	find . -name "*.o" -o -name "*.cmd" -o -name "*~" | xargs rm -rf
+	find . -name "Module.symvers" -o -name "*.ko" -o -name "*.mod.c" \
+	-o -name "modules.order" -o -name "*pyc" -o -name __malidrv_build_info.c | xargs rm -f
+	-cd $(UTGARD_DIR)
+	find . -name "*.o" -o -name "*.cmd" -o -name "*~" | xargs rm -rf
+	find . -name "Module.symvers" -o -name "*.ko" -o -name "*.mod.c" \
+	-o -name "modules.order" -o -name "*pyc" -o -name __malidrv_build_info.c | xargs rm -f
diff --git a/utgard/platform/Kbuild.amlogic b/utgard/platform/Kbuild.amlogic
new file mode 100644
index 0000000..51a8d8d
--- /dev/null
+++ b/utgard/platform/Kbuild.amlogic
@@ -0,0 +1,159 @@
+ifeq ($(CONFIG_MALI_DVFS),y)
+    EXTRA_DEFINES += -DCONFIG_MALI_DVFS
+    USING_GPU_UTILIZATION=0
+    USING_DVFS=1
+else
+    USING_GPU_UTILIZATION=1
+    USING_DVFS=0
+endif
+
+ifeq ($(CONFIG_MALI400_DEBUG),y)
+	BUILD ?= debug
+else
+	BUILD ?= release
+	ldflags-y += --strip-debug
+endif
+
+#########
+TARGET_PLATFORM:=meson_bu
+ifeq ($(CONFIG_ARCH_MESON1),y)
+TARGET_PLATFORM:= meson_m400
+endif
+ifeq ($(CONFIG_ARCH_MESON3),y)
+TARGET_PLATFORM:= meson_m400
+endif
+ifeq ($(CONFIG_ARCH_MESON6),y)
+TARGET_PLATFORM:= meson_m400
+endif
+ifeq ($(CONFIG_ARCH_MESON6TV),y)
+TARGET_PLATFORM:= meson_m400
+endif
+
+ifeq ($(CONFIG_ARCH_MESON8),y)
+TARGET_PLATFORM:= meson_m450
+endif
+ifeq ($(CONFIG_ARCH_MESON6TVD),y)
+TARGET_PLATFORM:= meson_m450
+endif
+ifeq ($(CONFIG_ARCH_MESON8B),y)
+TARGET_PLATFORM:= meson_m450
+endif
+ifeq ($(CONFIG_ARCH_MESONG9TV),y)
+TARGET_PLATFORM:= meson_m450
+endif
+ifeq ($(CONFIG_ARCH_MESONG9BB),y)
+TARGET_PLATFORM:= meson_m450
+endif
+#########
+$(platform Kbuild.amlogic warning TARGET_PLATFORM is  $(TARGET_PLATFORM))
+ifeq ($(TARGET_PLATFORM),meson_m400)
+MALI_PLATFORM_FILES:= \
+	platform/meson_m400/mali_fix.c \
+	platform/meson_m400/mali_platform.c \
+	platform/meson_m400/platform_mx.c
+endif
+
+ifeq ($(TARGET_PLATFORM),meson_m450)
+EXTRA_DEFINES += -DCONFIG_MALI450=y
+MALI_PLATFORM_FILES:= \
+	platform/meson_m450/meson_main.c \
+	platform/meson_m450/mali_pm_device.c \
+	platform/meson_m450/mali_clock.c \
+	platform/meson_m450/mpgpu.c \
+	platform/meson_m450/scaling.c
+
+ifeq ($(CONFIG_ARCH_MESON),y)
+MALI_PLATFORM_FILES+= \
+	platform/meson_m450/platform_m8.c
+endif
+ifeq ($(CONFIG_ARCH_MESON8),y)
+MALI_PLATFORM_FILES+= \
+	platform/meson_m450/platform_m8.c
+endif
+ifeq ($(CONFIG_ARCH_MESON6TVD),y)
+MALI_PLATFORM_FILES+= \
+	platform/meson_m450/platform_m6tvd.c
+endif
+ifeq ($(CONFIG_ARCH_MESON8B),y)
+MALI_PLATFORM_FILES+= \
+	platform/meson_m450/platform_m8b.c
+endif
+ifeq ($(CONFIG_ARCH_MESONG9TV),y)
+MALI_PLATFORM_FILES+= \
+	platform/meson_m450/platform_m8.c
+endif
+ifeq ($(CONFIG_ARCH_MESONG9BB),y)
+MALI_PLATFORM_FILES+= \
+	platform/meson_m450/platform_m8b.c
+endif
+
+endif
+
+ifeq ($(TARGET_PLATFORM),meson_bu)
+ifndef CONFIG_MALI450
+EXTRA_DEFINES += -DCONFIG_MALI450=y
+endif
+
+ccflags-y += -DCONFIG_MALI_DT=y
+ccflags-y += -DMESON_CPU_TYPE=0x80
+ccflags-y += -DMESON_CPU_TYPE_MESON6=0x60
+ccflags-y += -DMESON_CPU_TYPE_MESON6TVD=0x75
+ccflags-y += -DMESON_CPU_TYPE_MESON8=0x80
+ccflags-y += -DMESON_CPU_TYPE_MESON8B=0x8B
+
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+
+MALI_PLATFORM_FILES:= \
+	platform/meson_bu/mali_pm_device.c \
+	platform/meson_bu/meson_main2.c \
+	platform/meson_bu/mali_clock.c \
+	platform/meson_bu/mpgpu.c \
+	platform/meson_bu/platform_gx.c
+
+ifeq ($(CONFIG_MALI_DVFS),y)
+MALI_PLATFORM_FILES+= \
+    meson_bu/mali_dvfs.c
+else
+MALI_PLATFORM_FILES+= \
+	platform/meson_bu/scaling.c
+endif
+endif
+
+ifndef CONFIG_DMA_SHARED_BUFFER
+    ccflags-y += -DCONFIG_DMA_SHARED_BUFFER=y
+endif
+
+ifndef CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH
+    ccflags-y += -DCONFIG_MALI_DMA_BUF_MAP_ON_ATTACH=y
+endif
+
+ccflags-y += -I$(src)/platform/$(TARGET_PLATFORM)
+ccflags-y += -DMALI_FAKE_PLATFORM_DEVICE=1
+#$(warning r7p0/Kbuild.plat ccflags-y is $(ccflags-y))
+
+
+#####################################################
+ifeq (true,false)
+ifndef CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH
+    ccflags-y += -DCONFIG_MALI_DMA_BUF_MAP_ON_ATTACH=y
+endif
+
+ccflags-y += -DCONFIG_MALI_DT=y
+ccflags-y += -DMESON_CPU_TYPE=0x80
+ccflags-y += -DMESON_CPU_TYPE_MESON6=0x60
+ccflags-y += -DMESON_CPU_TYPE_MESON6TVD=0x75
+ccflags-y += -DMESON_CPU_TYPE_MESON8=0x80
+ccflags-y += -DMESON_CPU_TYPE_MESON8B=0x8B
+
+USE_GPPLL?=0
+ifdef CONFIG_AM_VIDEO
+    USE_GPPLL:=1
+endif
+
+ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL)
+endif
diff --git a/utgard/platform/meson_bu/mali_clock.c b/utgard/platform/meson_bu/mali_clock.c
new file mode 100644
index 0000000..b208f3d
--- /dev/null
+++ b/utgard/platform/meson_bu/mali_clock.c
@@ -0,0 +1,680 @@
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include <linux/mali/mali_utgard.h>
+#include <linux/amlogic/cpu_version.h>
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+#ifndef AML_CLK_LOCK_ERROR
+#define AML_CLK_LOCK_ERROR 1
+#endif
+#define GXBBM_MAX_GPU_FREQ 700000000UL
+struct clk;
+static unsigned gpu_dbg_level = 0;
+module_param(gpu_dbg_level, uint, 0644);
+MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level");
+
+#define gpu_dbg(level, fmt, arg...)			\
+	do {			\
+		if (gpu_dbg_level >= (level))		\
+		printk("gpu_debug"fmt , ## arg); 	\
+	} while (0)
+
+#define GPU_CLK_DBG(fmt, arg...)
+
+//disable print
+//#define _dev_info(...)
+
+//static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+//static u32 mali_extr_backup = 0;
+//static u32 mali_extr_sample_backup = 0;
+struct timeval start;
+struct timeval end;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16))
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle;
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+#ifdef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+#endif
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_set_parent(clk_mali_0, clk_mali_0_parent);
+
+	clk_prepare_enable(clk_mali_0);
+
+	clk_set_parent(clk_mali, clk_mali_0);
+
+#ifdef AML_CLK_LOCK_ERROR
+	clk_set_parent(clk_mali_1, clk_mali_0_parent);
+	clk_prepare_enable(clk_mali_1);
+#endif
+
+	GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n",
+	  clk_mali_0->name,  clk_mali_0->enable_count,
+	  clk_mali_0_parent->name, clk_mali_0_parent->enable_count);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali_0 = pmali_plat->clk_mali_0;
+	struct clk *clk_mali_1 = pmali_plat->clk_mali_1;
+	struct clk *clk_mali_x   = NULL;
+	struct clk *clk_mali_x_parent = NULL;
+	struct clk *clk_mali_x_old = NULL;
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+	clk_mali_x_old  = clk_get_parent(clk_mali);
+
+	if (!clk_mali_x_old) {
+		printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n");
+		return 0;
+	}
+	if (clk_mali_x_old == clk_mali_0) {
+		clk_mali_x = clk_mali_1;
+	} else if (clk_mali_x_old == clk_mali_1) {
+		clk_mali_x = clk_mali_0;
+	} else {
+		printk("gpu: unmatched clk_mali_x_old\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq);
+	clk_mali_x_parent = dvfs_tbl->clkp_handle;
+	if (!clk_mali_x_parent) {
+		printk("gpu: could not get clk_mali_x_parent\n");
+		return 0;
+	}
+
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq);
+	GPU_CLK_DBG();
+	ret = clk_set_parent(clk_mali_x, clk_mali_x_parent);
+	GPU_CLK_DBG();
+	ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	ret = clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name,  clk_mali_x->enable_count);
+	do_gettimeofday(&start);
+	udelay(1);// delay 10ns
+	do_gettimeofday(&end);
+	ret = clk_set_parent(clk_mali, clk_mali_x);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name,  clk_mali_x_old->enable_count);
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+	struct clk *clk_mali = pmali_plat->clk_mali;
+	struct clk *clk_mali_x = NULL;
+
+	clk_mali_x = clk_get_parent(clk_mali);
+	GPU_CLK_DBG();
+#ifndef AML_CLK_LOCK_ERROR
+	clk_prepare_enable(clk_mali_x);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	struct mali_gpu_clk_item *clk_item;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+///////////
+	mpdata->clk_items = devm_kzalloc(&pdev->dev, sizeof(struct mali_gpu_clk_item) * length, GFP_KERNEL);
+	if (mpdata->clk_items == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_item table\n");
+		return -ENOMEM;
+	}
+	clk_item = mpdata->clk_items;
+//
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+#if 0
+#ifdef MESON_CPU_VERSION_OPS
+		if (is_meson_gxbbm_cpu()) {
+			if (dvfs_tbl->clk_freq >= GXBBM_MAX_GPU_FREQ)
+				continue;
+		}
+#endif
+#endif
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+										&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		}
+		dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+		if (IS_ERR(dvfs_tbl->clkp_handle)) {
+			dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+		clk_item->clock = dvfs_tbl->clk_freq / 1000000;
+		clk_item->vol = dvfs_tbl->voltage;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_item ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	if (mpdata->def_clock > mpdata->scale_info.maxclk)
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_mali");
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+	if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+#else
+int mali_clock_init_clk_tree(struct platform_device* pdev)
+{
+	//mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock];
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+
+	return 0;
+}
+
+int mali_clock_init(mali_plat_info_t *pdev)
+{
+	*pdev = *pdev;
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+
+	ret = critical(param);
+
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	int ret = 0;
+	unsigned int idx = param;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx];
+
+	struct clk *clk_mali   = pmali_plat->clk_mali;
+	unsigned long time_use=0;
+
+
+	GPU_CLK_DBG();
+	do_gettimeofday(&start);
+	ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq);
+	do_gettimeofday(&end);
+	GPU_CLK_DBG();
+
+#ifndef AML_CLK_LOCK_ERROR
+	clk_disable_unprepare(clk_mali_x_old);
+#endif
+	time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+	GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use);
+
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	GPU_CLK_DBG();
+	clk_disable_unprepare(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+void enable_clock(void)
+{
+#ifndef AML_CLK_LOCK_ERROR
+	struct clk *clk_mali = pmali_plat->clk_mali;
+
+	clk_prepare_enable(clk_mali);
+#endif
+	GPU_CLK_DBG();
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__);
+}
+
+int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata)
+{
+	struct device_node *gpu_dn = pdev->dev.of_node;
+	struct device_node *gpu_clk_dn;
+	struct mali_gpu_clk_item *clk_item;
+	phandle dvfs_clk_hdl;
+	mali_dvfs_threshold_table *dvfs_tbl = NULL;
+	uint32_t *clk_sample = NULL;
+
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	int ret = 0;
+	if (!gpu_dn) {
+		dev_notice(&pdev->dev, "gpu device node not right\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"num_of_pp",
+			&mpdata->cfg_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set max pp to default 6\n");
+		mpdata->cfg_pp = 6;
+	}
+	mpdata->scale_info.maxpp = mpdata->cfg_pp;
+	mpdata->maxpp_sysfs = mpdata->cfg_pp;
+	_dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_pp",
+			&mpdata->cfg_min_pp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min pp to default 1\n");
+		mpdata->cfg_min_pp = 1;
+	}
+	mpdata->scale_info.minpp = mpdata->cfg_min_pp;
+	_dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp);
+
+	ret = of_property_read_u32(gpu_dn,"min_clk",
+			&mpdata->cfg_min_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "set min clk default to 0\n");
+		mpdata->cfg_min_clock = 0;
+	}
+	mpdata->scale_info.minclk = mpdata->cfg_min_clock;
+	_dev_info(&pdev->dev, "min clk  is %d\n", mpdata->scale_info.minclk);
+
+	mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_hiubus);
+
+	mpdata->reg_base_aobus = of_iomap(gpu_dn, 2);
+	_dev_info(&pdev->dev, "hiu io source  0x%p\n", mpdata->reg_base_aobus);
+
+	ret = of_property_read_u32(gpu_dn,"sc_mpp",
+			&mpdata->sc_mpp);
+	if (ret) {
+		dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp);
+		mpdata->sc_mpp = mpdata->cfg_pp;
+	}
+	_dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp);
+
+	of_get_property(gpu_dn, "tbl", &length);
+
+	length = length /sizeof(u32);
+	_dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length);
+
+	mpdata->dvfs_table = devm_kzalloc(&pdev->dev,
+								  sizeof(struct mali_dvfs_threshold_table)*length,
+								  GFP_KERNEL);
+	dvfs_tbl = mpdata->dvfs_table;
+	if (mpdata->dvfs_table == NULL) {
+		dev_err(&pdev->dev, "failed to alloc dvfs table\n");
+		return -ENOMEM;
+	}
+	mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL);
+	if (mpdata->clk_sample == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_sample table\n");
+		return -ENOMEM;
+	}
+	clk_sample = mpdata->clk_sample;
+///////////
+	mpdata->clk_items = devm_kzalloc(&pdev->dev, sizeof(struct mali_gpu_clk_item) * length, GFP_KERNEL);
+	if (mpdata->clk_items == NULL) {
+		dev_err(&pdev->dev, "failed to alloc clk_item table\n");
+		return -ENOMEM;
+	}
+	clk_item = mpdata->clk_items;
+//
+	of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) {
+		dvfs_clk_hdl = (phandle) u;
+		gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl);
+		ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_freq failed\n");
+		}
+
+		ret = of_property_read_string(gpu_clk_dn,"clk_parent",
+				&dvfs_tbl->clk_parent);
+		if (ret) {
+			dev_notice(&pdev->dev, "read clk_parent failed\n");
+		} else if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+			dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent);
+			if (IS_ERR(dvfs_tbl->clkp_handle)) {
+				dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent);
+			}
+			ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq);
+			if (ret) {
+				dev_notice(&pdev->dev, "read clk_parent freq failed\n");
+			}
+		}
+
+		ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage);
+		if (ret) {
+			dev_notice(&pdev->dev, "read voltage failed\n");
+		}
+		ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count);
+		if (ret) {
+			dev_notice(&pdev->dev, "read keep_count failed\n");
+		}
+		//downthreshold and upthreshold shall be u32
+		ret = of_property_read_u32_array(gpu_clk_dn,"threshold",
+		&dvfs_tbl->downthreshold, 2);
+		if (ret) {
+			dev_notice(&pdev->dev, "read threshold failed\n");
+		}
+		dvfs_tbl->freq_index = i;
+		clk_item->clock = dvfs_tbl->clk_freq / 1000000;
+		clk_item->vol = dvfs_tbl->voltage;
+
+		*clk_sample = dvfs_tbl->clk_freq / 1000000;
+
+		dvfs_tbl ++;
+		clk_item ++;
+		clk_sample ++;
+		i++;
+		mpdata->dvfs_table_size ++;
+	}
+
+	ret = of_property_read_u32(gpu_dn,"max_clk",
+			&mpdata->cfg_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2);
+		mpdata->cfg_clock = mpdata->dvfs_table_size-2;
+	}
+
+	mpdata->cfg_clock_bkup = mpdata->cfg_clock;
+	mpdata->maxclk_sysfs = mpdata->cfg_clock;
+	mpdata->scale_info.maxclk = mpdata->cfg_clock;
+	_dev_info(&pdev->dev, "max clk  is %d\n", mpdata->scale_info.maxclk);
+
+	ret = of_property_read_u32(gpu_dn,"turbo_clk",
+			&mpdata->turbo_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1);
+		mpdata->turbo_clock = mpdata->dvfs_table_size-1;
+	}
+	_dev_info(&pdev->dev, "turbo clk  is %d\n", mpdata->turbo_clock);
+
+	ret = of_property_read_u32(gpu_dn,"def_clk",
+			&mpdata->def_clock);
+	if (ret) {
+		dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->dvfs_table_size/2-1);
+		mpdata->def_clock = mpdata->dvfs_table_size/2 - 1;
+	}
+	if (mpdata->def_clock > mpdata->scale_info.maxclk)
+		mpdata->def_clock = mpdata->scale_info.maxclk;
+	_dev_info(&pdev->dev, "default clk  is %d\n", mpdata->def_clock);
+
+	dvfs_tbl = mpdata->dvfs_table;
+	clk_sample = mpdata->clk_sample;
+	for (i = 0; i< mpdata->dvfs_table_size; i++) {
+		_dev_info(&pdev->dev, "====================%d====================\n"
+		            "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n",
+					i,
+					dvfs_tbl->clk_freq, dvfs_tbl->clk_parent,
+					dvfs_tbl->voltage,  dvfs_tbl->keep_count,
+					dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample);
+		dvfs_tbl ++;
+		clk_sample ++;
+	}
+	_dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size);
+
+	mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux");
+#if 0
+	mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0");
+	mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1");
+#endif
+	if (IS_ERR(mpdata->clk_mali)) {
+		dev_err(&pdev->dev, "failed to get clock pointer\n");
+		return -EFAULT;
+	}
+
+	pmali_plat = mpdata;
+	mpdata->pdev = pdev;
+	return 0;
+}
+
+#endif
diff --git a/utgard/platform/meson_bu/mali_clock.h b/utgard/platform/meson_bu/mali_clock.h
new file mode 100644
index 0000000..9b8b392
--- /dev/null
+++ b/utgard/platform/meson_bu/mali_clock.h
@@ -0,0 +1,37 @@
+#ifndef __MALI_CLOCK_H__
+#define __MALI_CLOCK_H__
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <linux/clk.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29))
+#include <linux/amlogic/iomap.h>
+#endif
+
+#ifndef HHI_MALI_CLK_CNTL
+#define HHI_MALI_CLK_CNTL   0x6C
+#define mplt_read(r)        readl((pmali_plat->reg_base_hiubus) + ((r)<<2))
+#define mplt_write(r, v)    writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2)))
+#define mplt_setbits(r, m)  mplt_write((r), (mplt_read(r) | (m)));
+#define mplt_clrbits(r, m)  mplt_write((r), (mplt_read(r) & (~(m))));
+#endif
+
+//extern int mali_clock_init(struct platform_device *dev);
+int mali_clock_init_clk_tree(struct platform_device *pdev);
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+int mali_dt_info(struct platform_device *pdev,
+        struct mali_plat_info_t *mpdata);
+#endif
diff --git a/utgard/platform/meson_bu/mali_dvfs.c b/utgard/platform/meson_bu/mali_dvfs.c
new file mode 100644
index 0000000..fb4ebef
--- /dev/null
+++ b/utgard/platform/meson_bu/mali_dvfs.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/mali/mali_utgard.h>
+#include <mali_kernel_common.h>
+#include <mali_osk_profiling.h>
+#include <linux/time.h>
+
+//#include <linux/amlogic/amports/gp_pll.h>
+#include "meson_main2.h"
+
+
+static int currentStep;
+static int  scaling_mode = MALI_PP_FS_SCALING;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+//static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+//static int is_gp_pll_get;
+//static int is_gp_pll_put;
+
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+static mali_plat_info_t* pmali_plat = NULL;
+static struct workqueue_struct *mali_scaling_wq = NULL;
+//static DEFINE_SPINLOCK(lock);
+
+static int cur_gpu_clk_index = 0;
+static int exec_gpu_clk_index = 0;
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+struct mali_gpu_clock meson_gpu_clk_info = {
+    .item = NULL,
+    .num_of_steps = 0,
+};
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+}
+
+static void do_scaling(struct work_struct *work)
+{
+    //unsigned long flags;
+    mali_plat_info_t *pinfo = container_of(work, struct mali_plat_info_t, wq_work);
+
+    *pinfo = *pinfo;
+	//mali_dev_pause();
+    //spin_lock_irqsave(&lock, flags);
+    mali_clock_set(exec_gpu_clk_index);
+    cur_gpu_clk_index = exec_gpu_clk_index;
+    //spin_unlock_irqrestore(&lock, flags);
+	//mali_dev_resume();
+}
+void flush_scaling_job(void)
+{
+    if (mali_scaling_wq == NULL) return;
+
+	flush_workqueue(mali_scaling_wq);
+    printk("%s, %d\n", __func__, __LINE__);
+}
+
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+	pmali_plat = mali_plat;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_scaling_wq = alloc_workqueue("gpu_scaling_wq", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_scaling_wq = create_workqueue("gpu_scaling_wq");
+#endif
+	INIT_WORK(&pmali_plat->wq_work, do_scaling);
+    if (mali_scaling_wq == NULL) printk("Unable to create gpu scaling workqueue\n");
+
+    meson_gpu_clk_info.num_of_steps = pmali_plat->scale_info.maxclk;
+
+	return 0;
+}
+
+void mali_core_scaling_term(void)
+{
+    flush_scaling_job();
+    destroy_workqueue(mali_scaling_wq);
+    mali_scaling_wq = NULL;
+}
+
+void mali_pp_scaling_update(struct mali_gpu_utilization_data *data)
+{
+}
+
+void mali_pp_fs_scaling_update(struct mali_gpu_utilization_data *data)
+{
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+    scaling_mode = mode;
+	if (scaling_mode == MALI_TURBO_MODE) {
+        printk ("turbo mode\n");
+		pmali_plat->limit_on = 0;
+        meson_gpu_clk_info.num_of_steps = pmali_plat->turbo_clock;
+	} else {
+        printk ("not turbo mode\n");
+		pmali_plat->limit_on = 1;
+        meson_gpu_clk_info.num_of_steps  = pmali_plat->scale_info.maxclk;
+	}
+
+    printk("total_enable_steps = %d\n", meson_gpu_clk_info.num_of_steps);
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data)
+{
+}
+
+void mali_dev_restore(void)
+{
+    //TO add this
+	//mali_perf_set_num_pp_cores(num_cores_enabled);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
+
+/* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */
+static void meson_platform_get_clock_info(struct mali_gpu_clock **data) {
+    if (pmali_plat) {
+        meson_gpu_clk_info.item = pmali_plat->clk_items;
+        meson_gpu_clk_info.num_of_steps = pmali_plat->scale_info.maxclk;
+        printk("get clock info\n");
+    } else {
+        printk("error pmali_plat is null");
+    }
+    *data = &meson_gpu_clk_info;
+}
+
+/* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */
+static int meson_platform_get_freq(void) {
+    scalingdbg(1, "cur_gpu_clk_index =%d\n", cur_gpu_clk_index);
+    //dynamically changed the num of steps;
+    return  cur_gpu_clk_index;
+}
+
+/* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */
+static int meson_platform_set_freq(int setting_clock_step) {
+
+    if (exec_gpu_clk_index == setting_clock_step) {
+        return 0;
+    }
+
+    queue_work(mali_scaling_wq, &pmali_plat->wq_work);
+    exec_gpu_clk_index = setting_clock_step;
+    scalingdbg(1, "set cur_gpu_clk_index =%d\n", cur_gpu_clk_index);
+    return 0;
+}
+
+int mali_meson_get_gpu_data(struct mali_gpu_device_data *mgpu_data)
+{
+    mgpu_data->get_clock_info = meson_platform_get_clock_info,
+    mgpu_data->get_freq = meson_platform_get_freq,
+    mgpu_data->set_freq = meson_platform_set_freq,
+    mgpu_data->utilization_callback = NULL;
+    return 0;
+}
diff --git a/utgard/platform/meson_bu/mali_platform.h b/utgard/platform/meson_bu/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/utgard/platform/meson_bu/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/utgard/platform/meson_bu/mali_pm_device.c b/utgard/platform/meson_bu/mali_pm_device.c
new file mode 100644
index 0000000..6149031
--- /dev/null
+++ b/utgard/platform/meson_bu/mali_pm_device.c
@@ -0,0 +1,169 @@
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include "meson_main.h"
+#include <common/mali_kernel_common.h>
+#include <common/mali_osk_profiling.h>
+#include <common/mali_pmu.h>
+#include <linux/mali/mali_utgard.h>
+
+static int mali_os_suspend(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_os_suspend() called\n"));
+	ret = mali_deep_suspend(device);
+
+	return ret;
+}
+
+static int mali_os_resume(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_os_resume() called\n"));
+
+	ret = mali_deep_resume(device);
+
+	return ret;
+}
+
+static int mali_os_freeze(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_os_freeze() called\n"));
+
+	mali_dev_freeze();
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->freeze)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->freeze(device);
+	}
+
+	return ret;
+}
+//copy from r4p1 linux/mali_pmu_power_up_down.c
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+static int mali_pmu_powerup(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(5, ("Mali PMU: Power up\n"));
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	if (NULL == pmu) {
+		return -ENXIO;
+	}
+
+	mali_pmu_power_up_all(pmu);
+
+	return 0;
+}
+#endif
+
+static int mali_os_thaw(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_os_thaw() called\n"));
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+	enable_clock();
+	mali_pmu_powerup();
+#endif
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->thaw)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->thaw(device);
+	}
+
+	return ret;
+}
+
+static int mali_os_restore(struct device *device)
+{
+	MALI_DEBUG_PRINT(4, ("mali_os_thaw() called\n"));
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+	mali_dev_restore();
+#endif
+	return mali_os_resume(device);
+}
+
+#ifdef CONFIG_PM_RUNTIME
+#if 0
+static int mali_runtime_suspend(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_runtime_suspend() called\n"));
+	ret = mali_light_suspend(device);
+
+	return ret;
+}
+
+static int mali_runtime_resume(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_run	time_resume() called\n"));
+	ret = mali_light_resume(device);
+
+	return ret;
+}
+
+static int mali_runtime_idle(struct device *device)
+{
+	MALI_DEBUG_PRINT(4, ("mali_runtime_idle() called\n"));
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->runtime_idle)
+	{
+		/* Need to notify Mali driver about this event */
+		int ret = device->driver->pm->runtime_idle(device);
+		if (0 != ret)
+		{
+			return ret;
+		}
+	}
+
+	pm_runtime_suspend(device);
+
+	return 0;
+}
+#endif
+#endif
+
+static struct dev_pm_ops mali_gpu_device_type_pm_ops =
+{
+	.suspend = mali_os_suspend,
+	.resume = mali_os_resume,
+	.freeze = mali_os_freeze,
+	.thaw = mali_os_thaw,
+	.restore = mali_os_restore,
+#if 0//def CONFIG_PM_RUNTIME
+	.runtime_suspend = mali_runtime_suspend,
+	.runtime_resume = mali_runtime_resume,
+	.runtime_idle = mali_runtime_idle,
+#endif
+};
+
+struct device_type mali_pm_device =
+{
+	.pm = &mali_gpu_device_type_pm_ops,
+};
diff --git a/utgard/platform/meson_bu/mali_scaling.h b/utgard/platform/meson_bu/mali_scaling.h
new file mode 100644
index 0000000..a8ba4e4
--- /dev/null
+++ b/utgard/platform/meson_bu/mali_scaling.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t    upthreshold;
+    uint32_t    clk_freq;
+    const char  *clk_parent;
+    struct clk  *clkp_handle;
+    uint32_t    clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+    struct mali_gpu_clk_item *clk_items;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+extern int mali_pm_statue;
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/utgard/platform/meson_bu/meson_main.h b/utgard/platform/meson_bu/meson_main.h
new file mode 100644
index 0000000..a67441f
--- /dev/null
+++ b/utgard/platform/meson_bu/meson_main.h
@@ -0,0 +1,37 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+extern struct device_type mali_pm_device;
+extern int mali_pm_statue;
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mali_light_suspend(struct device *device);
+int mali_light_resume(struct device *device);
+int mali_deep_suspend(struct device *device);
+int mali_deep_resume(struct device *device);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/utgard/platform/meson_bu/meson_main2.c b/utgard/platform/meson_bu/meson_main2.c
new file mode 100644
index 0000000..8dd3dc4
--- /dev/null
+++ b/utgard/platform/meson_bu/meson_main2.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * meson8m2 and the newer chip
+ */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include "mali_executor.h"
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main2.h"
+
+int mali_pm_statue = 0;
+extern void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data);
+
+u32 mali_gp_reset_fail = 0;
+module_param(mali_gp_reset_fail, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_gp_reset_fail, "times of failed to reset GP");
+u32 mali_core_timeout  = 0;
+module_param(mali_core_timeout, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_core_timeout, "timeout of failed to reset GP");
+
+static struct mali_gpu_device_data mali_gpu_data = {
+
+#if defined(CONFIG_ARCH_REALVIEW)
+	.dedicated_mem_start = 0x80000000, /* Physical start address (use 0xD0000000 for old indirect setup) */
+	.dedicated_mem_size = 0x10000000, /* 256MB */
+#endif
+#if defined(CONFIG_ARM64)
+	.fb_start = 0x5f000000,
+	.fb_size = 0x91000000,
+#else
+	.fb_start = 0xe0000000,
+	.fb_size = 0x01000000,
+#endif
+	.control_interval = 200, /* 1000ms */
+};
+
+int mali_platform_device_init(struct platform_device *device)
+{
+	int err = -1;
+
+	err = mali_meson_init_start(device);
+	if (0 != err) printk("mali init failed\n");
+	err = mali_meson_get_gpu_data(&mali_gpu_data);
+	if (0 != err) printk("mali get gpu data failed\n");
+
+	err = platform_device_add_data(device, &mali_gpu_data, sizeof(mali_gpu_data));
+
+	if (0 == err) {
+		device->dev.type = &mali_pm_device; /* We should probably use the pm_domain instead of type on newer kernels */
+#ifdef CONFIG_PM_RUNTIME
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+		pm_runtime_set_autosuspend_delay(&device->dev, 1000);
+		pm_runtime_use_autosuspend(&device->dev);
+#endif
+		pm_runtime_enable(&device->dev);
+#endif
+		mali_meson_init_finish(device);
+	}
+
+	mali_gp_reset_fail = 0;
+	mali_core_timeout  = 0;
+
+	return err;
+}
+
+int mali_platform_device_deinit(struct platform_device *device)
+{
+	MALI_IGNORE(device);
+
+	printk("%s, %d\n", __FILE__, __LINE__);
+	MALI_DEBUG_PRINT(4, ("mali_platform_device_deinit() called\n"));
+
+
+	mali_meson_uninit(device);
+
+	return 0;
+}
+
+#if 0
+static int param_set_core_scaling(const char *val, const struct kernel_param *kp)
+{
+	int ret = param_set_int(val, kp);
+	printk("%s, %d\n", __FILE__, __LINE__);
+
+	if (1 == mali_core_scaling_enable) {
+		mali_core_scaling_sync(mali_executor_get_num_cores_enabled());
+	}
+	return ret;
+}
+
+static struct kernel_param_ops param_ops_core_scaling = {
+	.set = param_set_core_scaling,
+	.get = param_get_int,
+};
+
+module_param_cb(mali_core_scaling_enable, &param_ops_core_scaling, &mali_core_scaling_enable, 0644);
+MODULE_PARM_DESC(mali_core_scaling_enable, "1 means to enable core scaling policy, 0 means to disable core scaling policy");
+#endif
diff --git a/utgard/platform/meson_bu/meson_main2.h b/utgard/platform/meson_bu/meson_main2.h
new file mode 100644
index 0000000..5a65cb2
--- /dev/null
+++ b/utgard/platform/meson_bu/meson_main2.h
@@ -0,0 +1,39 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <linux/mali/mali_utgard.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+extern struct device_type mali_pm_device;
+extern int mali_pm_statue;
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_get_gpu_data(struct mali_gpu_device_data *mgpu_data);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mali_light_suspend(struct device *device);
+int mali_light_resume(struct device *device);
+int mali_deep_suspend(struct device *device);
+int mali_deep_resume(struct device *device);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/utgard/platform/meson_bu/mpgpu.c b/utgard/platform/meson_bu/mpgpu.c
new file mode 100644
index 0000000..b480109
--- /dev/null
+++ b/utgard/platform/meson_bu/mpgpu.c
@@ -0,0 +1,363 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+#include <linux/mali/mali_utgard.h>
+#include <common/mali_kernel_common.h>
+#include <common/mali_pmu.h>
+//#include "mali_pp_scheduler.h"
+#include "meson_main.h"
+
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+static ssize_t domain_stat_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+	return 0;
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+static ssize_t max_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxpp:%d, maxpp_sysfs:%d, total=%d\n",
+			pmali_plat->scale_info.maxpp, pmali_plat->maxpp_sysfs,
+			pmali_plat->cfg_pp);
+	return sprintf(buf, "%d\n", pmali_plat->cfg_pp);
+}
+
+static ssize_t max_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_pp) || (val < pinfo->minpp))
+		return -EINVAL;
+
+	pmali_plat->maxpp_sysfs = val;
+	pinfo->maxpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minpp);
+}
+
+static ssize_t min_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxpp) || (val < 1))
+		return -EINVAL;
+
+	pinfo->minpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t max_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+}
+
+static ssize_t max_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+static ssize_t current_pp_read(struct class *class,
+		struct class_attribute *attr, char *buf)
+{
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+	return sprintf(buf, "%d\n", pp);
+}
+
+static ssize_t current_pp_write(struct class *class,
+		struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+
+	get_mali_rt_clkpp(&clk, &pp);
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(clk, val, 1);
+
+	return count;
+}
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+	__ATTR(min_pp,		0644, min_pp_read,	min_pp_write),
+	__ATTR(max_pp,		0644, max_pp_read,	max_pp_write),
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+	__ATTR(cur_pp,		0644, current_pp_read,	current_pp_write),
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+#endif
+
+int mpgpu_class_init(void)
+{
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+#else
+	return 0;
+#endif
+}
+
+void  mpgpu_class_exit(void)
+{
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+	class_unregister(&mpgpu_class);
+#endif
+}
+
diff --git a/utgard/platform/meson_bu/platform_gx.c b/utgard/platform/meson_bu/platform_gx.c
new file mode 100644
index 0000000..79f513c
--- /dev/null
+++ b/utgard/platform/meson_bu/platform_gx.c
@@ -0,0 +1,391 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#include <linux/mali/mali_utgard.h>
+#ifdef CONFIG_GPU_THERMAL
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/amlogic/aml_thermal_hw.h>
+#include <common/mali_ukk.h>
+#endif
+#endif
+#include <common/mali_kernel_common.h>
+#include <common/mali_osk_profiling.h>
+#include <common/mali_pmu.h>
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+#include "meson_main.h"
+#include "mali_executor.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+    .have_switch = 1,
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq < mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq < mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+                break;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return mali_plat_data.cfg_clock;
+}
+
+#ifdef CONFIG_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    if (idx > mali_plat_data.maxclk_sysfs) {
+        printk("idx > max freq\n");
+        return;
+    }
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 get_mali_utilization(void)
+{
+    return (_mali_ukk_utilization_pp() * 100) / 256;
+}
+#endif
+#endif
+
+#ifdef CONFIG_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+
+    if (num > mali_plat_data.maxpp_sysfs) {
+        printk("pp > sysfs set pp\n");
+        goto quit;
+    }
+
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#ifdef CONFIG_DEVFREQ_THERMAL
+static u32 mali_get_online_pp(void)
+{
+    unsigned int val;
+    mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+    val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+    if (val == 0x07)    /* No pp is working */
+        return 0;
+
+    return mali_executor_get_num_cores_enabled();
+}
+#endif
+#endif
+
+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data);
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data);
+    mali_dt_info(ptr_plt_dev, &mali_plat_data);
+    mali_clock_init_clk_tree(ptr_plt_dev);
+    return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    mali_core_scaling_term();
+    return 0;
+}
+
+static int mali_cri_light_suspend(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    mali_pm_statue = 1;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->runtime_suspend)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->runtime_suspend(device);
+    }
+    mali_pmu_power_down_all(pmu);
+    return ret;
+}
+
+static int mali_cri_light_resume(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    mali_pmu_power_up_all(pmu);
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->runtime_resume)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->runtime_resume(device);
+    }
+    mali_pm_statue = 0;
+    return ret;
+}
+
+static int mali_cri_deep_suspend(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->suspend)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->suspend(device);
+    }
+    mali_pmu_power_down_all(pmu);
+    return ret;
+}
+
+static int mali_cri_deep_resume(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    mali_pmu_power_up_all(pmu);
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->resume)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->resume(device);
+    }
+    return ret;
+
+}
+
+int mali_light_suspend(struct device *device)
+{
+    int ret = 0;
+#ifdef CONFIG_MALI400_PROFILING
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_EVENT_CHANNEL_GPU |
+            MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+            0, 0, 0, 0, 0);
+#endif
+
+    flush_scaling_job();
+    /* clock scaling. Kasin..*/
+    ret = mali_clock_critical(mali_cri_light_suspend, (size_t)device);
+    disable_clock();
+    return ret;
+}
+
+int mali_light_resume(struct device *device)
+{
+    int ret = 0;
+    enable_clock();
+    ret = mali_clock_critical(mali_cri_light_resume, (size_t)device);
+#ifdef CONFIG_MALI400_PROFILING
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_EVENT_CHANNEL_GPU |
+            MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+            get_current_frequency(), 0, 0, 0, 0);
+#endif
+    return ret;
+}
+
+int mali_deep_suspend(struct device *device)
+{
+    int ret = 0;
+
+    mali_pm_statue = 1;
+    flush_scaling_job();
+
+
+    /* clock scaling off. Kasin... */
+    ret = mali_clock_critical(mali_cri_deep_suspend, (size_t)device);
+    disable_clock();
+    return ret;
+}
+
+int mali_deep_resume(struct device *device)
+{
+    int ret = 0;
+
+    /* clock scaling up. Kasin.. */
+    enable_clock();
+    ret = mali_clock_critical(mali_cri_deep_resume, (size_t)device);
+    mali_pm_statue = 0;
+    return ret;
+
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+        gcdev->get_gpu_freq = get_mali_freq;
+        gcdev->get_gpu_loading = get_mali_utilization;
+        gcdev->get_online_pp = mali_get_online_pp;
+#endif
+        err = gpufreq_cooling_register(gcdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gcdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+#ifdef CONFIG_DEVFREQ_THERMAL
+        aml_thermal_min_update(gccdev->cool_dev);
+#endif
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/utgard/platform/meson_bu/scaling.c b/utgard/platform/meson_bu/scaling.c
new file mode 100644
index 0000000..489564f
--- /dev/null
+++ b/utgard/platform/meson_bu/scaling.c
@@ -0,0 +1,589 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/mali/mali_utgard.h>
+#include <mali_kernel_common.h>
+#include <mali_osk_profiling.h>
+
+#if AMLOGIC_GPU_USE_GPPLL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)
+#include <linux/amlogic/amports/gp_pll.h>
+#else
+#include <linux/amlogic/media/clk/gp_pll.h>
+#endif
+#endif
+#define LOG_MALI_SCALING 1
+#include "meson_main2.h"
+#include "mali_clock.h"
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+//static int  scaling_mode = MALI_SCALING_DISABLE;
+//static int  scaling_mode = MALI_PP_SCALING;
+
+#if AMLOGIC_GPU_USE_GPPLL
+static struct gp_pll_user_handle_s *gp_pll_user_gpu;
+static int is_gp_pll_get;
+static int is_gp_pll_put;
+#endif
+
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)				   \
+	do {											   \
+		if (scaling_dbg_level >= (level))			   \
+		printk(fmt , ## arg);						   \
+	} while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static inline void mali_clk_exected(void)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	uint32_t execStep = currentStep;
+	mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep];
+
+	//if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return;
+	if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) ||
+		(pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){
+		return;
+	}
+
+#if AMLOGIC_GPU_USE_GPPLL
+	if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) {
+		gp_pll_request(gp_pll_user_gpu);
+		if (!is_gp_pll_get) {
+			//printk("not get pll\n");
+			execStep = currentStep - 1;
+		}
+	} else {
+		//not get the gp pll, do need put
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+		gp_pll_release(gp_pll_user_gpu);
+	}
+#else
+	if ((0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) &&
+            !IS_ERR(dvfs_tbl->clkp_handle) &&
+            (0 != dvfs_tbl->clkp_freq)) {
+        clk_prepare_enable(dvfs_tbl->clkp_handle);
+        clk_set_rate(dvfs_tbl->clkp_handle, dvfs_tbl->clkp_freq);
+    }
+
+#endif
+
+	//mali_dev_pause();
+	mali_clock_set(pdvfs[execStep].freq_index);
+	//mali_dev_resume();
+#if AMLOGIC_GPU_USE_GPPLL==0
+	if ((0 == strcmp(pdvfs[lastStep].clk_parent,"gp0_pll")) &&
+		(0 != strcmp(pdvfs[execStep].clk_parent, "gp0_pll"))) {
+				clk_disable_unprepare(pdvfs[lastStep].clkp_handle);
+	}
+#endif
+	lastStep = execStep;
+#if AMLOGIC_GPU_USE_GPPLL
+	if (is_gp_pll_put) {
+		//printk("release gp0 pll\n");
+		gp_pll_release(gp_pll_user_gpu);
+		gp_pll_request(gp_pll_user_gpu);
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 0;
+	}
+#endif
+
+}
+#if AMLOGIC_GPU_USE_GPPLL
+static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user,
+		int event)
+{
+	if (event == GP_PLL_USER_EVENT_GRANT) {
+		//printk("granted\n");
+		is_gp_pll_get = 1;
+		is_gp_pll_put = 0;
+		schedule_work(&wq_work);
+	} else if (event == GP_PLL_USER_EVENT_YIELD) {
+		//printk("ask for yield\n");
+		is_gp_pll_get = 0;
+		is_gp_pll_put = 1;
+		schedule_work(&wq_work);
+	}
+
+	return 0;
+}
+#endif
+
+static void do_scaling(struct work_struct *work)
+{
+	mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+	scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+	MALI_DEBUG_ASSERT(0 == err);
+	MALI_IGNORE(err);
+	scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n",
+			currentStep, pdvfs[currentStep].freq_index,
+			lastStep, pdvfs[lastStep].freq_index);
+	mali_clk_exected();
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+			MALI_PROFILING_EVENT_CHANNEL_GPU |
+			MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+			get_current_frequency(),
+			0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+
+	pinfo = &pmali_plat->scale_info;
+
+	if (val < pinfo->minclk)
+		val = pinfo->minclk;
+	else if (val >  pinfo->maxclk)
+		val =  pinfo->maxclk;
+
+	if (val != currentStep) {
+		currentStep = val;
+		if (flush)
+			schedule_work(&wq_work);
+		else
+			ret = 1;
+	}
+#endif
+	return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+	*clk = currentStep;
+	*pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+	u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+	mali_scale_info_t* pinfo;
+	u32 flush_work = 0;
+
+	pinfo = &pmali_plat->scale_info;
+	if (clk < pinfo->minclk)
+		clk = pinfo->minclk;
+	else if (clk >  pinfo->maxclk)
+		clk =  pinfo->maxclk;
+
+	if (clk != currentStep) {
+		currentStep = clk;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+	if (pp < pinfo->minpp)
+		pp = pinfo->minpp;
+	else if (pp > pinfo->maxpp)
+		pp = pinfo->maxpp;
+
+	if (pp != num_cores_enabled) {
+		num_cores_enabled = pp;
+		if (flush)
+			flush_work++;
+		else
+			ret = 1;
+	}
+
+	if (flush_work)
+		schedule_work(&wq_work);
+#endif
+	return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+	scalingdbg(2, "meson:	 one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+	scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+	return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+	return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+	scalingdbg(2, "meson: enable %d pp cores\n", val);
+	return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_plat == NULL) {
+		scalingdbg(2, " Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+	num_cores_enabled = pmali_plat->sc_mpp;
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_gpu = gp_pll_user_register("gpu", 1,
+		gp_pll_user_cb_gpu);
+	//not get the gp pll, do need put
+	is_gp_pll_get = 0;
+	is_gp_pll_put = 0;
+	if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n");
+#endif
+
+	currentStep = pmali_plat->def_clock;
+	lastStep = currentStep;
+	INIT_WORK(&wq_work, do_scaling);
+#endif
+	return 0;
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+	flush_scheduled_work();
+#if AMLOGIC_GPU_USE_GPPLL
+	gp_pll_user_unregister(gp_pll_user_gpu);
+#endif
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+	102, /* 40% */
+	128, /* 50% */
+	230, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(struct mali_gpu_utilization_data *data)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+
+	if (mali_threshold[2] < data->utilization_pp)
+		ret = enable_max_num_cores();
+	else if (mali_threshold[1]< data->utilization_pp)
+		ret = enable_one_core();
+	else if (0 < data->utilization_pp)
+		ret = disable_one_core();
+	if (ret == 1)
+		schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(struct mali_gpu_utilization_data *data, u32 current_idx, u32 next,
+		u32 current_pp, u32 next_pp)
+{
+	char direction;
+	if (next > current_idx)
+		direction = '>';
+	else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+		direction = '<';
+	else
+		direction = '~';
+
+	scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+			direction,
+			get_mali_freq(current_idx),
+			get_mali_freq(next),
+			data->utilization_gpu,
+			pmali_plat->dvfs_table[current_idx].downthreshold,
+			pmali_plat->dvfs_table[current_idx].upthreshold,
+			current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(struct mali_gpu_utilization_data *data, int* next_fs_idx,
+		int* pp_change_flag)
+{
+	u32 utilization, mali_up_limit, decided_fs_idx;
+	u32 ld_left, ld_right;
+	u32 ld_up, ld_down;
+	u32 change_mode;
+
+	*pp_change_flag = 0;
+	change_mode = 0;
+	utilization = data->utilization_gpu;
+
+	scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n",
+			__LINE__,  scaling_mode, MALI_TURBO_MODE,
+		    pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk);
+
+	mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+		pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+	decided_fs_idx = currentStep;
+
+	ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+	ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+	scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization,  ld_up);
+	if (utilization >= ld_up) { /* go up */
+
+		scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+		if (currentStep < mali_up_limit) {
+			change_mode = 1;
+			if ((currentStep < pmali_plat->def_clock) && (utilization > pmali_plat->bst_gpu))
+				decided_fs_idx = pmali_plat->def_clock;
+			else
+				decided_fs_idx++;
+		}
+		if ((data->utilization_pp >= ld_up) &&
+				(num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+			if ((num_cores_enabled < pmali_plat->sc_mpp) && (data->utilization_pp >= pmali_plat->bst_pp)) {
+				*pp_change_flag = 1;
+				change_mode = 1;
+			} else if (change_mode == 0) {
+				*pp_change_flag = 2;
+				change_mode = 1;
+			}
+		}
+#if LOG_MALI_SCALING
+		scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+				data->utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+				num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+	} else if (utilization <= ld_down) { /* go down */
+		if (mali_stay_count > 0) {
+			*next_fs_idx = decided_fs_idx;
+			mali_stay_count--;
+			return;
+		}
+
+		if (num_cores_enabled > pmali_plat->sc_mpp) {
+			change_mode = 1;
+			if (data->utilization_pp <= ld_down) {
+				ld_left = data->utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		} else if (currentStep > pmali_plat->scale_info.minclk) {
+			change_mode = 1;
+		} else if (num_cores_enabled > 1) { /* decrease PPS */
+			if (data->utilization_pp <= ld_down) {
+				ld_left = data->utilization_pp * num_cores_enabled;
+				ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+					(num_cores_enabled - 1);
+				scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+				if (ld_left < ld_right) {
+					change_mode = 2;
+				}
+			}
+		}
+
+		if (change_mode == 1) {
+			decided_fs_idx--;
+		} else if (change_mode == 2) { /* decrease PPS */
+			*pp_change_flag = -1;
+		}
+	}
+
+	if (decided_fs_idx < 0 ) {
+		printk("gpu debug, next index below 0\n");
+		decided_fs_idx = 0;
+	}
+	if (decided_fs_idx > pmali_plat->scale_info.maxclk) {
+		decided_fs_idx = pmali_plat->scale_info.maxclk;
+		printk("gpu debug, next index above max, set to %d\n", decided_fs_idx);
+	}
+
+	if (change_mode)
+		mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+	*next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(struct mali_gpu_utilization_data *data)
+{
+#ifndef CONFIG_MALI_DVFS
+	int ret = 0;
+	int pp_change_flag = 0;
+	u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+	u32 last_pp = num_cores_enabled;
+#endif
+	mali_decide_next_status(data, &next_idx, &pp_change_flag);
+
+	if (pp_change_flag == 1)
+		ret = enable_pp_cores(pmali_plat->sc_mpp);
+	else if (pp_change_flag == 2)
+		ret = enable_one_core();
+	else if (pp_change_flag == -1) {
+		ret = disable_one_core();
+	}
+
+#if LOG_MALI_SCALING
+	if (pp_change_flag || (next_idx != currentStep))
+		trace_utilization(data, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+	if (next_idx != currentStep) {
+		ret = 1;
+		currentStep = next_idx;
+	}
+
+	if (ret == 1)
+		schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+	else
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				MALI_PROFILING_EVENT_CHANNEL_GPU |
+				MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				get_current_frequency(),
+				0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+	return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+	MALI_DEBUG_ASSERT(mode < MALI_SCALING_MODE_MAX);
+	if (mode >= MALI_SCALING_MODE_MAX)
+		return;
+	scaling_mode = mode;
+
+	//disable thermal in turbo mode
+	if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->limit_on = 0;
+	} else {
+		pmali_plat->limit_on = 1;
+	}
+	/* set default performance range. */
+	pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+	pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+	pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+	pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+	/* set current status and tune max freq */
+	if (scaling_mode == MALI_PP_FS_SCALING) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_pp_cores(pmali_plat->sc_mpp);
+	} else if (scaling_mode == MALI_SCALING_DISABLE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		enable_max_num_cores();
+	} else if (scaling_mode == MALI_TURBO_MODE) {
+		pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+		enable_max_num_cores();
+	}
+	currentStep = pmali_plat->scale_info.maxclk;
+	schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+	return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data)
+{
+#ifndef CONFIG_MALI_DVFS
+	if (mali_pm_statue)
+		return;
+
+	switch (scaling_mode) {
+		case MALI_PP_FS_SCALING:
+			mali_pp_fs_scaling_update(data);
+			break;
+		case MALI_PP_SCALING:
+			mali_pp_scaling_update(data);
+			break;
+		default:
+			break;
+	}
+#endif
+}
+static u32 clk_cntl_save = 0;
+void mali_dev_freeze(void)
+{
+	clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL);
+}
+
+void mali_dev_restore(void)
+{
+
+	mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save);
+	if (pmali_plat && pmali_plat->pdev) {
+		mali_clock_init_clk_tree(pmali_plat->pdev);
+	} else {
+		printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n",
+				pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev);
+	}
+}
+
+int mali_meson_get_gpu_data(struct mali_gpu_device_data *mgpu_data)
+{
+	mgpu_data->get_clock_info = NULL;
+	mgpu_data->get_freq = NULL;
+	mgpu_data->set_freq = NULL;
+	mgpu_data->utilization_callback = mali_gpu_utilization_callback;
+	return 0;
+}
diff --git a/utgard/platform/meson_m400/mali_fix.c b/utgard/platform/meson_m400/mali_fix.c
new file mode 100644
index 0000000..121ada7
--- /dev/null
+++ b/utgard/platform/meson_m400/mali_fix.c
@@ -0,0 +1,185 @@
+/*
+ * AMLOGIC Mali fix driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the named License,
+ * or any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Author:  Tim Yao <timyao@amlogic.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <asm/io.h>
+#include <linux/ioport.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/module.h>
+
+#include <mach/am_regs.h>
+#include <mach/clock.h>
+#include <linux/io.h>
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_platform.h"
+#include "mali_fix.h"
+
+#define MALI_MM1_REG_ADDR 0xd0064000
+#define MALI_MMU_REGISTER_INT_STATUS     0x0008
+#define MALI_MM2_REG_ADDR 0xd0065000
+#define MALI_MMU_REGISTER_INT_STATUS     0x0008
+#define MALI_MM_REG_SIZE 0x1000
+
+#define READ_MALI_MMU1_REG(r) (ioread32(((u8*)mali_mm1_regs) + r))
+#define READ_MALI_MMU2_REG(r) (ioread32(((u8*)mali_mm2_regs) + r))
+
+extern int mali_PP0_int_cnt(void);
+extern int mali_PP1_int_cnt(void);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+static ulong * mali_mm1_regs = NULL;
+static ulong * mali_mm2_regs = NULL;
+static struct timer_list timer;
+
+static u32 mali_pp1_int_count = 0;
+static u32 mali_pp2_int_count = 0;
+static u32 mali_pp1_mmu_int_count = 0;
+static u32 mali_pp2_mmu_int_count = 0;
+static u32 mali_mmu_int_process_state[2];
+
+static void timer_callback(ulong data)
+{
+	unsigned long mali_flags;
+
+	mali_pp1_int_count = mali_PP0_int_cnt();
+	mali_pp2_int_count = mali_PP1_int_cnt();
+
+	/* lock mali_clock_gating when access Mali registers */
+	mali_flags = mali_clock_gating_lock();
+
+	if (readl((u32 *)P_HHI_MALI_CLK_CNTL) & 0x100) {
+		/* polling for PP1 MMU interrupt */
+		if (mali_mmu_int_process_state[0] == MMU_INT_NONE) {
+			if (READ_MALI_MMU1_REG(MALI_MMU_REGISTER_INT_STATUS) != 0) {
+				mali_pp1_mmu_int_count++;
+				MALI_DEBUG_PRINT(3, ("Mali MMU: core0 page fault emit \n"));
+				mali_mmu_int_process_state[0] = MMU_INT_HIT;
+				__raw_writel(1, (volatile void *)P_ISA_TIMERC);
+			}
+		}
+
+		/* polling for PP2 MMU interrupt */
+		if (mali_mmu_int_process_state[1] == MMU_INT_NONE) {
+			if (READ_MALI_MMU2_REG(MALI_MMU_REGISTER_INT_STATUS) != 0) {
+				mali_pp2_mmu_int_count++;
+				MALI_DEBUG_PRINT(3, ("Mali MMU: core1 page fault emit \n"));
+				mali_mmu_int_process_state[1] = MMU_INT_HIT;
+				__raw_writel(1, (volatile void *)P_ISA_TIMERC);
+			}
+		}
+	}
+
+	mali_clock_gating_unlock(mali_flags);
+
+	timer.expires = jiffies + HZ/100;
+
+	add_timer(&timer);
+}
+
+void malifix_set_mmu_int_process_state(int index, int state)
+{
+	if (index < 2)
+		mali_mmu_int_process_state[index] = state;
+}
+
+int malifix_get_mmu_int_process_state(int index)
+{
+	if (index < 2)
+		return mali_mmu_int_process_state[index];
+	return 0;
+}
+#endif
+
+void malifix_init(void)
+{
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+	if (!mali_meson_is_revb())
+		return;
+
+	if ((mali_mm1_regs) && (mali_mm2_regs)) return;
+	mali_mmu_int_process_state[0] = 0;
+	mali_mmu_int_process_state[1] = 0;
+
+	/* set up Timer C as a 1uS one-shot timer */
+	aml_clr_reg32_mask(P_ISA_TIMER_MUX, (1<<18)|(1<<14)|(3<<4));
+	aml_set_reg32_mask(P_ISA_TIMER_MUX,	(1<<18)|(0<<14)|(0<<4));
+
+	setup_timer(&timer, timer_callback, 0);
+
+	mali_mm1_regs = (ulong *)ioremap_nocache(MALI_MM1_REG_ADDR, MALI_MM_REG_SIZE);
+	if (mali_mm1_regs)
+		printk("Mali pp1 MMU register mapped at %p...\n", mali_mm1_regs);
+
+	mali_mm2_regs = (ulong *)ioremap_nocache(MALI_MM2_REG_ADDR, MALI_MM_REG_SIZE);
+	if (mali_mm2_regs)
+		printk("Mali pp2 MMU register mapped at %p...\n", mali_mm2_regs);
+
+	if ((mali_mm1_regs != NULL) && (mali_mm2_regs != NULL))
+		mod_timer(&timer, jiffies + HZ/100);
+#endif
+}
+
+void malifix_exit(void)
+{
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+	if (!mali_meson_is_revb())
+		return;
+
+	del_timer(&timer);
+
+	if (mali_mm1_regs != NULL)
+		iounmap(mali_mm1_regs);
+	mali_mm1_regs = NULL;
+
+	if (mali_mm2_regs != NULL)
+		iounmap(mali_mm2_regs);
+	mali_mm2_regs = NULL;
+
+#endif
+	return;
+}
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+module_param(mali_pp1_int_count, uint, 0664);
+MODULE_PARM_DESC(mali_pp1_int_count, "Mali PP1 interrupt count\n");
+
+module_param(mali_pp2_int_count, uint, 0664);
+MODULE_PARM_DESC(mali_pp2_int_count, "Mali PP1 interrupt count\n");
+
+module_param(mali_pp1_mmu_int_count, uint, 0664);
+MODULE_PARM_DESC(mali_pp1_mmu_int_count, "Mali PP1 mmu interrupt count\n");
+
+module_param(mali_pp2_mmu_int_count, uint, 0664);
+MODULE_PARM_DESC(mali_pp2_mmu_int_count, "Mali PP2 mmu interrupt count\n");
+#endif
+
+MODULE_DESCRIPTION("AMLOGIC mali fix driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tim Yao <timyao@amlogic.com>");
diff --git a/utgard/platform/meson_m400/mali_fix.h b/utgard/platform/meson_m400/mali_fix.h
new file mode 100644
index 0000000..3c29161
--- /dev/null
+++ b/utgard/platform/meson_m400/mali_fix.h
@@ -0,0 +1,14 @@
+#ifndef MALI_FIX_H
+#define MALI_FIX_H
+
+#define MMU_INT_NONE 0
+#define MMU_INT_HIT  1
+#define MMU_INT_TOP  2
+#define MMU_INT_BOT  3
+
+extern void malifix_init(void);
+extern void malifix_exit(void);
+extern void malifix_set_mmu_int_process_state(int, int);
+extern int  malifix_get_mmu_int_process_state(int);
+extern int mali_meson_is_revb(void);
+#endif /* MALI_FIX_H */
diff --git a/utgard/platform/meson_m400/mali_platform.c b/utgard/platform/meson_m400/mali_platform.c
new file mode 100644
index 0000000..33c52f1
--- /dev/null
+++ b/utgard/platform/meson_m400/mali_platform.c
@@ -0,0 +1,315 @@
+/*
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for meson platform
+ *
+ * Copyright (C) 2017 Amlogic, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <mach/am_regs.h>
+#include <mach/clock.h>
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_platform.h"
+#include "mali_poweron_reg.h"
+#include "mali_fix.h"
+#include "mali_platform.h"
+
+static int last_power_mode = -1;
+static int mali_init_flag = 0;
+static const u32 poweron_data[] =
+{
+/* commands */
+/* 000 */ 0x00000040, 0x20400000, 0x00000300, 0x30040000,
+/* 010 */ 0x00000400, 0x400a0000, 0x0f000033, 0x10000042,
+/* 020 */ 0x00300c00, 0x10000040, 0x4c000001, 0x00000000,
+/* 030 */ 0x00000000, 0x60000000, 0x00000000, 0x00000000,
+/* 040 */ 0x00004000, 0x00002000, 0x00000210, 0x0000203f,
+/* 050 */ 0x00000220, 0x0000203f, 0x00000230, 0x0000203f,
+/* 060 */ 0x00000240, 0x0000203f, 0x00000250, 0x0000203f,
+/* 070 */ 0x00000260, 0x0000203f, 0x00000270, 0x0000203f,
+/* 080 */ 0x00000280, 0x0000203f, 0x00000290, 0x0000203f,
+/* 090 */ 0x000002a0, 0x0000203f, 0x000002b0, 0x0000203f,
+/* 0a0 */ 0x000002c0, 0x0000203f, 0x000002d0, 0x0000203f,
+/* 0b0 */ 0x000002e0, 0x0000203f, 0x000002f0, 0x0000203f,
+/* 0c0 */ 0x00002000, 0x00002000, 0x00002010, 0x0000203f,
+/* 0d0 */ 0x00002020, 0x0000203f, 0x00002030, 0x0000203f,
+/* 0e0 */ 0x00002040, 0x0000203f, 0x00002050, 0x0000203f,
+/* 0f0 */ 0x00002060, 0x0000203f, 0x00002070, 0x0000203f,
+/* 100 */ 0x00002080, 0x0000203f, 0x00002090, 0x0000203f,
+/* 110 */ 0x000020a0, 0x0000203f, 0x000020b0, 0x0000203f,
+/* 120 */ 0x000020c0, 0x0000203f, 0x000020d0, 0x0000203f,
+/* 130 */ 0x000020e0, 0x0000203f, 0x000020f0, 0x0000203f,
+/* 140 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 150 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 160 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 170 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 180 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 190 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 1a0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 1b0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 1c0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 1d0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 1e0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 1f0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 200 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 210 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 220 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 230 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 240 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 250 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 260 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 270 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 280 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 290 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 2a0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 2b0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 2c0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 2d0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 2e0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 2f0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* const */
+/* 300 */ 0x3f2a6400, 0xbf317600, 0x3e8d8e00, 0x00000000,
+/* 310 */ 0x3f2f7000, 0x3f36e200, 0x3e10c500, 0x00000000,
+/* 320 */ 0xbe974e00, 0x3dc35300, 0x3f735800, 0x00000000,
+/* 330 */ 0x00000000, 0x00000000, 0x00000000, 0x3f800000,
+/* 340 */ 0x42b00000, 0x42dc0000, 0x3f800000, 0x3f800000,
+/* 350 */ 0x42b00000, 0x42dc0000, 0x00000000, 0x00000000,
+/* 360 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 370 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 380 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 390 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 3a0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 3b0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 3c0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 3d0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 3e0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* 3f0 */ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+/* inst */
+/* 400 */ 0xad4ad6b5, 0x438002b5, 0x0007ffe0, 0x00001e00,
+/* 410 */ 0xad4ad694, 0x038002b5, 0x0087ffe0, 0x00005030,
+/* 420 */ 0xad4bda56, 0x038002b5, 0x0007ffe0, 0x00001c10,
+/* 430 */ 0xad4ad6b5, 0x038002b5, 0x4007fee0, 0x00001c00
+};
+
+static struct clk *mali_clk = NULL;
+
+#if MESON_CPU_TYPE <= MESON_CPU_TYPE_MESON6
+
+#define OFFSET_MMU_DTE          0
+#define OFFSET_MMU_PTE          4096
+#define OFFSET_MMU_VIRTUAL_ZERO 8192
+
+#define INT_MALI_GP_BITS     (1<<16)
+#define INT_MALI_PP_BITS     (1<<18)
+#define INT_MALI_PP_MMU_BITS (1<<19)
+#define INT_ALL              (0xffffffff)
+
+#define INT_MALI_PP_MMU_PAGE_FAULT (1<<0)
+
+#define MMU_FLAG_DTE_PRESENT            0x01
+#define MMU_FLAG_PTE_PAGE_PRESENT       0x01
+#define MMU_FLAG_PTE_RD_PERMISSION      0x02
+#define MMU_FLAG_PTE_WR_PERMISSION      0x04
+
+//static int mali_revb_flag = -1;
+static DEFINE_SPINLOCK(lock);
+extern int mali_revb_flag;
+int mali_meson_is_revb(void)
+{
+	printk("mail version=%d\n",mali_revb_flag);
+	if (mali_revb_flag == -1)
+		mali_revb_flag = 1;
+	else if (mali_revb_flag == 0)
+	    panic("rev-a! you should neet earlier version of mali_driver.!\n");
+
+    return mali_revb_flag;
+}
+
+static void mali_meson_poweron(int first_poweron)
+{
+	unsigned long flags;
+	u32 p, p_aligned;
+	dma_addr_t p_phy;
+	int i;
+	unsigned int_mask;
+
+	if (!first_poweron) {
+		if ((last_power_mode != -1) && (last_power_mode != MALI_POWER_MODE_DEEP_SLEEP)) {
+			 MALI_DEBUG_PRINT(3, ("Maybe your system not deep sleep now.......\n"));
+			//printk("Maybe your system not deep sleep now.......\n");
+			return;
+		}
+	}
+
+	MALI_DEBUG_PRINT(2, ("mali_meson_poweron: Mali APB bus accessing\n"));
+	if (READ_MALI_REG(MALI_PP_PP_VERSION) != MALI_PP_PP_VERSION_MAGIC) {
+	MALI_DEBUG_PRINT(3, ("mali_meson_poweron: Mali APB bus access failed\n"));
+	//printk("mali_meson_poweron: Mali APB bus access failed.");
+	return;
+	}
+	MALI_DEBUG_PRINT(2, ("..........accessing done.\n"));
+	if (READ_MALI_REG(MALI_MMU_DTE_ADDR) != 0) {
+		MALI_DEBUG_PRINT(3, ("mali_meson_poweron: Mali is not really powered off\n"));
+		//printk("mali_meson_poweron: Mali is not really powered off.");
+		return;
+	}
+
+	p = (u32)kcalloc(4096 * 4, 1, GFP_KERNEL);
+	if (!p) {
+		printk("mali_meson_poweron: NOMEM in meson_poweron\n");
+		return;
+	}
+
+	p_aligned = __ALIGN_MASK(p, 4096);
+
+	/* DTE */
+	*(u32 *)(p_aligned) = (virt_to_phys((void *)p_aligned) + OFFSET_MMU_PTE) | MMU_FLAG_DTE_PRESENT;
+	/* PTE */
+	for (i=0; i<1024; i++) {
+		*(u32 *)(p_aligned + OFFSET_MMU_PTE + i*4) =
+		    (virt_to_phys((void *)p_aligned) + OFFSET_MMU_VIRTUAL_ZERO + 4096 * i) |
+		    MMU_FLAG_PTE_PAGE_PRESENT |
+		    MMU_FLAG_PTE_RD_PERMISSION;
+	}
+
+	/* command & data */
+	memcpy((void *)(p_aligned + OFFSET_MMU_VIRTUAL_ZERO), poweron_data, 4096);
+
+	p_phy = dma_map_single(NULL, (void *)p_aligned, 4096 * 3, DMA_TO_DEVICE);
+
+	/* Set up Mali GP MMU */
+	WRITE_MALI_REG(MALI_MMU_DTE_ADDR, p_phy);
+	WRITE_MALI_REG(MALI_MMU_CMD, 0);
+
+	if ((READ_MALI_REG(MALI_MMU_STATUS) & 1) != 1)
+		printk("mali_meson_poweron: MMU enabling failed.\n");
+
+	/* Set up Mali command registers */
+	WRITE_MALI_REG(MALI_APB_GP_VSCL_START, 0);
+	WRITE_MALI_REG(MALI_APB_GP_VSCL_END, 0x38);
+
+	spin_lock_irqsave(&lock, flags);
+
+	int_mask = READ_MALI_REG(MALI_APB_GP_INT_MASK);
+	WRITE_MALI_REG(MALI_APB_GP_INT_CLEAR, 0x707bff);
+	WRITE_MALI_REG(MALI_APB_GP_INT_MASK, 0);
+
+	/* Start GP */
+	WRITE_MALI_REG(MALI_APB_GP_CMD, 1);
+
+	for (i = 0; i<100; i++)
+		udelay(500);
+
+	/* check Mali GP interrupt */
+	if (READ_MALI_REG(MALI_APB_GP_INT_RAWSTAT) & 0x707bff)
+		printk("mali_meson_poweron: Interrupt received.\n");
+	else
+		printk("mali_meson_poweron: No interrupt received.\n");
+
+	/* force reset GP */
+	WRITE_MALI_REG(MALI_APB_GP_CMD, 1 << 5);
+
+	/* stop MMU paging and reset */
+	WRITE_MALI_REG(MALI_MMU_CMD, 1);
+	WRITE_MALI_REG(MALI_MMU_CMD, 6);
+
+	for (i = 0; i<100; i++)
+		udelay(500);
+
+	WRITE_MALI_REG(MALI_APB_GP_INT_CLEAR, 0x3ff);
+	WRITE_MALI_REG(MALI_MMU_INT_CLEAR, INT_ALL);
+	WRITE_MALI_REG(MALI_MMU_INT_MASK, 0);
+
+	WRITE_MALI_REG(MALI_APB_GP_INT_CLEAR, 0x707bff);
+	WRITE_MALI_REG(MALI_APB_GP_INT_MASK, int_mask);
+
+	spin_unlock_irqrestore(&lock, flags);
+
+	dma_unmap_single(NULL, p_phy, 4096 * 3, DMA_TO_DEVICE);
+
+	kfree((void *)p);
+
+	/* Mali revision detection */
+	if (last_power_mode == -1)
+		mali_revb_flag = mali_meson_is_revb();
+}
+#else
+static void mali_meson_poweron(int first_poweron) {
+	return;
+}
+#endif /*MESON_CPU_TYPE <= MESON_CPU_TYPE_MESON6 */
+
+_mali_osk_errcode_t mali_platform_init(void)
+{
+	mali_clk = clk_get_sys("mali", "pll_fixed");
+
+	if (mali_clk ) {
+		if (!mali_init_flag) {
+			clk_set_rate(mali_clk, 333000000);
+			mali_clk->enable(mali_clk);
+			malifix_init();
+			mali_meson_poweron(1);
+			mali_init_flag = 1;
+		}
+		MALI_SUCCESS;
+	} else
+		panic("linux kernel should > 3.0\n");
+
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON6
+    MALI_PRINT_ERROR(("Failed to lookup mali clock"));
+	MALI_ERROR(_MALI_OSK_ERR_FAULT);
+#else
+	MALI_SUCCESS;
+#endif /* CONFIG_ARCH_MESON6 */
+}
+
+_mali_osk_errcode_t mali_platform_deinit(void)
+{
+	mali_init_flag =0;
+	printk("MALI:mali_platform_deinit\n");
+	malifix_exit();
+
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t mali_platform_power_mode_change(mali_power_mode power_mode)
+{
+	MALI_DEBUG_PRINT(3, ( "mali_platform_power_mode_change power_mode=%d\n", power_mode));
+
+	switch (power_mode) {
+	case MALI_POWER_MODE_LIGHT_SLEEP:
+	case MALI_POWER_MODE_DEEP_SLEEP:
+		/* Turn off mali clock gating */
+		mali_clk->disable(mali_clk);
+		break;
+
+        case MALI_POWER_MODE_ON:
+		/* Turn on MALI clock gating */
+		mali_clk->enable(mali_clk);
+		mali_meson_poweron(0);
+		break;
+	}
+	last_power_mode = power_mode;
+	MALI_SUCCESS;
+}
+
diff --git a/utgard/platform/meson_m400/mali_platform.h b/utgard/platform/meson_m400/mali_platform.h
new file mode 100644
index 0000000..63f6118
--- /dev/null
+++ b/utgard/platform/meson_m400/mali_platform.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2010-2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_platform.h
+ * Platform specific Mali driver functions
+ */
+
+#ifndef __MALI_PLATFORM_H__
+#define __MALI_PLATFORM_H__
+
+#include "mali_osk.h"
+
+/** @brief description of power change reasons
+ */
+typedef enum mali_power_mode_tag
+{
+	MALI_POWER_MODE_ON,           /**< Power Mali on */
+	MALI_POWER_MODE_LIGHT_SLEEP,  /**< Mali has been idle for a short time, or runtime PM suspend */
+	MALI_POWER_MODE_DEEP_SLEEP,   /**< Mali has been idle for a long time, or OS suspend */
+} mali_power_mode;
+
+/** @brief Platform specific setup and initialisation of MALI
+ *
+ * This is called from the entrypoint of the driver to initialize the platform
+ *
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_platform_init(void);
+
+/** @brief Platform specific deinitialisation of MALI
+ *
+ * This is called on the exit of the driver to terminate the platform
+ *
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_platform_deinit(void);
+
+/** @brief Platform specific powerdown sequence of MALI
+ *
+ * Notification from the Mali device driver stating the new desired power mode.
+ * MALI_POWER_MODE_ON must be obeyed, while the other modes are optional.
+ * @param power_mode defines the power modes
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_platform_power_mode_change(mali_power_mode power_mode);
+
+
+/** @brief Platform specific handling of GPU utilization data
+ *
+ * When GPU utilization data is enabled, this function will be
+ * periodically called.
+ *
+ * @param utilization The workload utilization of the Mali GPU. 0 = no utilization, 256 = full utilization.
+ */
+void mali_gpu_utilization_handler(u32 utilization);
+
+/** @brief Setting the power domain of MALI
+ *
+ * This function sets the power domain of MALI if Linux run time power management is enabled
+ *
+ * @param dev Reference to struct platform_device (defined in linux) used by MALI GPU
+ */
+void set_mali_parent_power_domain(void* dev);
+
+#endif
diff --git a/utgard/platform/meson_m400/mali_poweron_reg.h b/utgard/platform/meson_m400/mali_poweron_reg.h
new file mode 100644
index 0000000..ee0a880
--- /dev/null
+++ b/utgard/platform/meson_m400/mali_poweron_reg.h
@@ -0,0 +1,76 @@
+/*
+ * @file mali_poweron_reg.h
+ * Platform specific Mali driver functions for meson platform
+ *
+ * Copyright (C) 2017 Amlogic, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef MALI_POWERON_REG_H
+#define MALI_POWERON_REG_H
+
+#define MALI_PP_PP_VERSION_MAGIC      0xCD070100UL
+
+#if defined(IO_APB2_BUS_PHY_BASE)
+#define WRITE_MALI_REG(reg, val) \
+    __raw_writel(val, (volatile void *)(reg - IO_APB2_BUS_PHY_BASE + IO_APB2_BUS_BASE))
+#define READ_MALI_REG(reg) \
+    __raw_readl((volatile void *)(reg - IO_APB2_BUS_PHY_BASE + IO_APB2_BUS_BASE))
+#else
+#define WRITE_MALI_REG(reg, val) \
+    __raw_writel(val, (volatile void *)(reg - IO_APB_BUS_PHY_BASE + IO_APB_BUS_BASE))
+#define READ_MALI_REG(reg) \
+    __raw_readl((volatile void *)(reg - IO_APB_BUS_PHY_BASE + IO_APB_BUS_BASE))
+#endif
+
+#define MALI_APB_GP_VSCL_START        0xd0060000
+#define MALI_APB_GP_VSCL_END          0xd0060004
+#define MALI_APB_GP_CMD               0xd0060020
+#define MALI_APB_GP_INT_RAWSTAT       0xd0060024
+#define MALI_APB_GP_INT_CLEAR         0xd0060028
+#define MALI_APB_GP_INT_MASK          0xd006002c
+#define MALI_APB_GP_INT_STAT          0xd0060030
+
+#define MALI_MMU_DTE_ADDR             0xd0063000
+#define MALI_MMU_STATUS               0xd0063004
+#define MALI_MMU_CMD                  0xd0063008
+#define MALI_MMU_RAW_STATUS           0xd0064014
+#define MALI_MMU_INT_CLEAR            0xd0064018
+#define MALI_MMU_INT_MASK             0xd006401c
+#define MALI_MMU_INT_STATUS           0xd0064020
+
+#define MALI_PP_MMU_DTE_ADDR          0xd0064000
+#define MALI_PP_MMU_STATUS            0xd0064004
+#define MALI_PP_MMU_CMD               0xd0064008
+#define MALI_PP_MMU_RAW_STATUS        0xd0064014
+#define MALI_PP_MMU_INT_CLEAR         0xd0064018
+#define MALI_PP_MMU_INT_MASK          0xd006401c
+#define MALI_PP_MMU_INT_STATUS        0xd0064020
+
+#define MALI_APB_PP_REND_LIST_ADDR    0xd0068000
+#define MALI_APB_PP_REND_RSW_BASE     0xd0068004
+#define MALI_APB_PP_REND_VERTEX_BASE  0xd0068008
+#define MALI_APB_PPSUBPIXEL_SPECIFIER 0xd0068048
+#define MALI_APB_WB0_SOURCE_SELECT    0xd0068100
+#define MALI_APB_WB0_TARGET_ADDR      0xd0068104
+#define MALI_APB_WB0_TARGET_SCANLINE_LENGTH 0xd0068114
+
+#define MALI_PP_PP_VERSION            0xd0069000
+#define MALI_PP_STATUS                0xd0069008
+#define MALI_PP_CTRL_MGMT             0xd006900C
+#define MALI_PP_INT_RAWSTAT           0xd0069020
+#define MALI_PP_INT_CLEAR             0xd0069024
+#define MALI_PP_INT_MASK              0xd0069028
+#define MALI_PP_INT_STAT              0xd006902C
+
+#endif /* MALI_POWERON_REG_H */
diff --git a/utgard/platform/meson_m400/platform_mx.c b/utgard/platform/meson_m400/platform_mx.c
new file mode 100644
index 0000000..993e51c
--- /dev/null
+++ b/utgard/platform/meson_m400/platform_mx.c
@@ -0,0 +1,240 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#include <asm/io.h>
+#include <linux/mali/mali_utgard.h>
+
+#include <common/mali_kernel_common.h>
+#include <common/mali_osk_profiling.h>
+#include <common/mali_pmu.h>
+
+#include "meson_main.h"
+#include "mali_fix.h"
+#include "mali_platform.h"
+
+/**
+ *    For Meson 6tvd.
+ *
+ */
+
+#if MESON_CPU_TYPE <= MESON_CPU_TYPE_MESON6TV
+
+u32 mali_dvfs_clk[1];
+u32 mali_dvfs_clk_sample[1];
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6TV
+#undef INT_MALI_GP
+#undef INT_MALI_GP_MMU
+#undef INT_MALI_PP
+#undef INT_MALI_PP2
+#undef INT_MALI_PP3
+#undef INT_MALI_PP4
+#undef INT_MALI_PP_MMU
+#undef INT_MALI_PP2_MMU
+#undef INT_MALI_PP3_MMU
+#undef INT_MALI_PP4_MMU
+
+#define INT_MALI_GP      (48+32)
+#define INT_MALI_GP_MMU  (49+32)
+#define INT_MALI_PP      (50+32)
+#define INT_MALI_PP2     (58+32)
+#define INT_MALI_PP3     (60+32)
+#define INT_MALI_PP4     (62+32)
+#define INT_MALI_PP_MMU  (51+32)
+#define INT_MALI_PP2_MMU (59+32)
+#define INT_MALI_PP3_MMU (61+32)
+#define INT_MALI_PP4_MMU (63+32)
+
+#ifndef CONFIG_MALI400_4_PP
+static struct resource meson_mali_resources[] =
+{
+	MALI_GPU_RESOURCES_MALI400_MP2(0xd0060000,
+			INT_MALI_GP, INT_MALI_GP_MMU,
+			INT_MALI_PP, INT_MALI_PP_MMU,
+			INT_MALI_PP2, INT_MALI_PP2_MMU)
+};
+#else
+static struct resource meson_mali_resources[] =
+{
+	MALI_GPU_RESOURCES_MALI400_MP4(0xd0060000,
+			INT_MALI_GP, INT_MALI_GP_MMU,
+			INT_MALI_PP, INT_MALI_PP_MMU,
+			INT_MALI_PP2, INT_MALI_PP2_MMU,
+			INT_MALI_PP3, INT_MALI_PP3_MMU,
+			INT_MALI_PP4, INT_MALI_PP4_MMU
+			)
+};
+#endif
+
+#elif MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+
+#undef INT_MALI_GP
+#undef INT_MALI_GP_MMU
+#undef INT_MALI_PP
+#undef INT_MALI_PP2
+#undef INT_MALI_PP_MMU
+#undef INT_MALI_PP2_MMU
+
+#define INT_MALI_GP      (48+32)
+#define INT_MALI_GP_MMU  (49+32)
+#define INT_MALI_PP      (50+32)
+#define INT_MALI_PP_MMU  (51+32)
+#define INT_MALI_PP2_MMU ( 6+32)
+
+static struct resource meson_mali_resources[] =
+{
+	MALI_GPU_RESOURCES_MALI400_MP2(0xd0060000,
+			INT_MALI_GP, INT_MALI_GP_MMU,
+			INT_MALI_PP, INT_MALI_PP2_MMU,
+			INT_MALI_PP_MMU, INT_MALI_PP2_MMU)
+};
+
+#else  /* MESON_CPU_TYPE == MESON_CPU_TYPE_MESON3 */
+
+#undef INT_MALI_GP
+#undef INT_MALI_GP_MMU
+#undef INT_MALI_PP
+#undef INT_MALI_PP_MMU
+
+#define INT_MALI_GP	48
+#define INT_MALI_GP_MMU 49
+#define INT_MALI_PP	50
+#define INT_MALI_PP_MMU 51
+
+static struct resource meson_mali_resources[] =
+{
+	MALI_GPU_RESOURCES_MALI400_MP1(0xd0060000,
+			INT_MALI_GP, INT_MALI_GP_MMU, INT_MALI_PP, INT_MALI_PP_MMU)
+};
+#endif /* MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6TV */
+
+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data)
+{
+
+}
+
+mali_plat_info_t mali_plat_data = {
+
+};
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+	/* for mali platform data. */
+	struct mali_gpu_device_data* pdev = ptr_plt_dev->dev.platform_data;
+
+	pdev->control_interval = 1000;
+	pdev->utilization_callback = mali_gpu_utilization_callback;
+
+	/* for resource data. */
+	ptr_plt_dev->num_resources = ARRAY_SIZE(meson_mali_resources);
+	ptr_plt_dev->resource = meson_mali_resources;
+	return 0;
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+	mali_platform_init();
+	return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+	mali_platform_deinit();
+	return 0;
+}
+
+int mali_light_suspend(struct device *device)
+{
+	int ret = 0;
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->runtime_suspend)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->runtime_suspend(device);
+	}
+
+	mali_platform_power_mode_change(MALI_POWER_MODE_LIGHT_SLEEP);
+	return ret;
+}
+
+int mali_light_resume(struct device *device)
+{
+	int ret = 0;
+
+	mali_platform_power_mode_change(MALI_POWER_MODE_ON);
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->runtime_resume)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->runtime_resume(device);
+	}
+	return ret;
+}
+
+int mali_deep_suspend(struct device *device)
+{
+	int ret = 0;
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->suspend)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->suspend(device);
+	}
+
+	mali_platform_power_mode_change(MALI_POWER_MODE_DEEP_SLEEP);
+	return ret;
+}
+
+int mali_deep_resume(struct device *device)
+{
+	int ret = 0;
+
+	mali_platform_power_mode_change(MALI_POWER_MODE_ON);
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->resume)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->resume(device);
+	}
+	return ret;
+
+}
+
+void mali_core_scaling_term(void)
+{
+
+}
+
+int get_gpu_max_clk_level(void)
+{
+    return 0;
+}
+
+void mali_post_init(void)
+{
+}
+#endif /* MESON_CPU_TYPE <= MESON_CPU_TYPE_MESON6 */
diff --git a/utgard/platform/meson_m450/mali_clock.c b/utgard/platform/meson_m450/mali_clock.c
new file mode 100644
index 0000000..7fa0d29
--- /dev/null
+++ b/utgard/platform/meson_m450/mali_clock.c
@@ -0,0 +1,122 @@
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <linux/io.h>
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#include "meson_main.h"
+
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON6TVD
+
+#define FCLK_MPLL2 (2 << 9)
+static DEFINE_SPINLOCK(lock);
+static mali_plat_info_t* pmali_plat = NULL;
+static u32 mali_extr_backup = 0;
+static u32 mali_extr_sample_backup = 0;
+
+int mali_clock_init(mali_plat_info_t* mali_plat)
+{
+	u32 def_clk_data;
+	if (mali_plat == NULL) {
+		printk(" Mali platform data is NULL!!!\n");
+		return -1;
+	}
+
+	pmali_plat = mali_plat;
+	if (pmali_plat->have_switch) {
+		def_clk_data = pmali_plat->clk[pmali_plat->def_clock];
+		writel(def_clk_data | (def_clk_data << 16), (u32*)P_HHI_MALI_CLK_CNTL);
+		setbits_le32((u32)P_HHI_MALI_CLK_CNTL, 1 << 24);
+		setbits_le32((u32)P_HHI_MALI_CLK_CNTL, 1 << 8);
+	} else {
+		mali_clock_set(pmali_plat->def_clock);
+	}
+
+	mali_extr_backup = pmali_plat->clk[pmali_plat->clk_len - 1];
+	mali_extr_sample_backup = pmali_plat->clk_sample[pmali_plat->clk_len - 1];
+	return 0;
+}
+
+int mali_clock_critical(critical_t critical, size_t param)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&lock, flags);
+	ret = critical(param);
+	spin_unlock_irqrestore(&lock, flags);
+	return ret;
+}
+
+static int critical_clock_set(size_t param)
+{
+	unsigned int idx = param;
+	if (pmali_plat->have_switch) {
+		u32 clk_value;
+		setbits_le32((u32)P_HHI_MALI_CLK_CNTL, 1 << 31);
+		clk_value = readl((u32 *)P_HHI_MALI_CLK_CNTL) & 0xffff0000;
+		clk_value = clk_value | pmali_plat->clk[idx] | (1 << 8);
+		writel(clk_value, (u32*)P_HHI_MALI_CLK_CNTL);
+		clrbits_le32((u32)P_HHI_MALI_CLK_CNTL, 1 << 31);
+	} else {
+		clrbits_le32((u32)P_HHI_MALI_CLK_CNTL, 1 << 8);
+		clrbits_le32((u32)P_HHI_MALI_CLK_CNTL, (0x7F | (0x7 << 9)));
+		writel(pmali_plat->clk[idx], (u32*)P_HHI_MALI_CLK_CNTL);
+		setbits_le32((u32)P_HHI_MALI_CLK_CNTL, 1 << 8);
+	}
+	return 0;
+}
+
+int mali_clock_set(unsigned int clock)
+{
+	return mali_clock_critical(critical_clock_set, (size_t)clock);
+}
+
+void disable_clock(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&lock, flags);
+	clrbits_le32((u32)P_HHI_MALI_CLK_CNTL, 1 << 8);
+	spin_unlock_irqrestore(&lock, flags);
+}
+
+void enable_clock(void)
+{
+	u32 ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&lock, flags);
+	setbits_le32((u32)P_HHI_MALI_CLK_CNTL, 1 << 8);
+	ret = readl((u32 *)P_HHI_MALI_CLK_CNTL) & (1 << 8);
+	spin_unlock_irqrestore(&lock, flags);
+}
+
+u32 get_mali_freq(u32 idx)
+{
+	if (!mali_pm_statue) {
+		return pmali_plat->clk_sample[idx];
+	} else {
+		return 0;
+	}
+}
+
+void set_str_src(u32 data)
+{
+	if (data == 11) {
+		writel(0x0004d000, (u32*)P_HHI_MPLL_CNTL9);
+	} else if (data > 11) {
+		writel(data, (u32*)P_HHI_MPLL_CNTL9);
+	}
+	if (data == 0) {
+		pmali_plat->clk[pmali_plat->clk_len - 1] = mali_extr_backup;
+		pmali_plat->clk_sample[pmali_plat->clk_len - 1] = mali_extr_sample_backup;
+	} else if (data > 10) {
+		pmali_plat->clk_sample[pmali_plat->clk_len - 1] = 600;
+		pmali_plat->clk[pmali_plat->clk_len - 1] = FCLK_MPLL2;
+	}
+}
+#endif
diff --git a/utgard/platform/meson_m450/mali_clock.h b/utgard/platform/meson_m450/mali_clock.h
new file mode 100644
index 0000000..53ccda0
--- /dev/null
+++ b/utgard/platform/meson_m450/mali_clock.h
@@ -0,0 +1,13 @@
+#ifndef _MALI_CLOCK_H_
+#define _MALI_CLOCK_H_
+
+typedef int (*critical_t)(size_t param);
+int mali_clock_critical(critical_t critical, size_t param);
+
+int mali_clock_init(mali_plat_info_t*);
+int mali_clock_set(unsigned int index);
+void disable_clock(void);
+void enable_clock(void);
+u32 get_mali_freq(u32 idx);
+void set_str_src(u32 data);
+#endif /* _MALI_CLOCK_H_ */
diff --git a/utgard/platform/meson_m450/mali_platform.h b/utgard/platform/meson_m450/mali_platform.h
new file mode 100644
index 0000000..41185d0
--- /dev/null
+++ b/utgard/platform/meson_m450/mali_platform.h
@@ -0,0 +1,15 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/kernel.h>
+#ifndef MALI_PLATFORM_H_
+#define MALI_PLATFORM_H_
+
+extern u32 mali_gp_reset_fail;
+extern u32 mali_core_timeout;
+
+#endif /* MALI_PLATFORM_H_ */
diff --git a/utgard/platform/meson_m450/mali_pm_device.c b/utgard/platform/meson_m450/mali_pm_device.c
new file mode 100644
index 0000000..4f9e0d0
--- /dev/null
+++ b/utgard/platform/meson_m450/mali_pm_device.c
@@ -0,0 +1,167 @@
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include "meson_main.h"
+#include <common/mali_kernel_common.h>
+#include <common/mali_osk_profiling.h>
+#include <common/mali_pmu.h>
+#include <linux/mali/mali_utgard.h>
+
+static int mali_os_suspend(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_os_suspend() called\n"));
+	ret = mali_deep_suspend(device);
+
+	return ret;
+}
+
+static int mali_os_resume(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_os_resume() called\n"));
+
+	ret = mali_deep_resume(device);
+
+	return ret;
+}
+
+static int mali_os_freeze(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_os_freeze() called\n"));
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->freeze)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->freeze(device);
+	}
+
+	return ret;
+}
+//copy from r4p1 linux/mali_pmu_power_up_down.c
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+static int mali_pmu_powerup(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(5, ("Mali PMU: Power up\n"));
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	if (NULL == pmu) {
+		return -ENXIO;
+	}
+
+	mali_pmu_power_up_all(pmu);
+
+	return 0;
+}
+#endif
+
+static int mali_os_thaw(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_os_thaw() called\n"));
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+	enable_clock();
+	mali_pmu_powerup();
+#endif
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->thaw)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->thaw(device);
+	}
+
+	return ret;
+}
+
+static int mali_os_restore(struct device *device)
+{
+	MALI_DEBUG_PRINT(4, ("mali_os_thaw() called\n"));
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+	mali_dev_restore();
+#endif
+	return mali_os_resume(device);
+}
+
+#ifdef CONFIG_PM_RUNTIME
+#if 0
+static int mali_runtime_suspend(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_runtime_suspend() called\n"));
+	ret = mali_light_suspend(device);
+
+	return ret;
+}
+
+static int mali_runtime_resume(struct device *device)
+{
+	int ret = 0;
+
+	MALI_DEBUG_PRINT(4, ("mali_run	time_resume() called\n"));
+	ret = mali_light_resume(device);
+
+	return ret;
+}
+
+static int mali_runtime_idle(struct device *device)
+{
+	MALI_DEBUG_PRINT(4, ("mali_runtime_idle() called\n"));
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->runtime_idle)
+	{
+		/* Need to notify Mali driver about this event */
+		int ret = device->driver->pm->runtime_idle(device);
+		if (0 != ret)
+		{
+			return ret;
+		}
+	}
+
+	pm_runtime_suspend(device);
+
+	return 0;
+}
+#endif
+#endif
+
+static struct dev_pm_ops mali_gpu_device_type_pm_ops =
+{
+	.suspend = mali_os_suspend,
+	.resume = mali_os_resume,
+	.freeze = mali_os_freeze,
+	.thaw = mali_os_thaw,
+	.restore = mali_os_restore,
+#if 0//def CONFIG_PM_RUNTIME
+	.runtime_suspend = mali_runtime_suspend,
+	.runtime_resume = mali_runtime_resume,
+	.runtime_idle = mali_runtime_idle,
+#endif
+};
+
+struct device_type mali_pm_device =
+{
+	.pm = &mali_gpu_device_type_pm_ops,
+};
diff --git a/utgard/platform/meson_m450/mali_scaling.h b/utgard/platform/meson_m450/mali_scaling.h
new file mode 100644
index 0000000..0d75f71
--- /dev/null
+++ b/utgard/platform/meson_m450/mali_scaling.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+
+enum mali_scale_mode_t {
+	MALI_PP_SCALING = 0,
+	MALI_PP_FS_SCALING,
+	MALI_SCALING_DISABLE,
+	MALI_TURBO_MODE,
+	MALI_SCALING_MODE_MAX
+};
+
+typedef struct mali_dvfs_threshold_table {
+	uint32_t    freq_index;
+	uint32_t    voltage;
+	uint32_t    keep_count;
+	uint32_t    downthreshold;
+	uint32_t	upthreshold;
+	uint32_t	clk_freq;
+	const char  *clk_parent;
+	struct clk  *clkp_handle;
+	uint32_t	clkp_freq;
+} mali_dvfs_threshold_table;
+
+/**
+ *  restrictions on frequency and number of pp.
+ */
+typedef struct mali_scale_info_t {
+	u32 minpp;
+	u32 maxpp;
+	u32 minclk;
+	u32 maxclk;
+} mali_scale_info_t;
+
+/**
+ * Platform spesific data for meson chips.
+ */
+typedef struct mali_plat_info_t {
+	u32 cfg_pp; /* number of pp. */
+	u32 cfg_min_pp;
+	u32 turbo_clock; /* reserved clock src. */
+	u32 def_clock; /* gpu clock used most of time.*/
+	u32 cfg_clock; /* max clock could be used.*/
+	u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */
+	u32 cfg_min_clock;
+
+	u32  sc_mpp; /* number of pp used most of time.*/
+	u32  bst_gpu; /* threshold for boosting gpu. */
+	u32  bst_pp; /* threshold for boosting PP. */
+
+	u32 *clk;
+	u32 *clk_sample;
+	u32 clk_len;
+	u32 have_switch; /* have clock gate switch or not. */
+
+	mali_dvfs_threshold_table *dvfs_table;
+    //struct mali_gpu_clk_item *clk_items;
+	u32 dvfs_table_size;
+
+	mali_scale_info_t scale_info;
+	u32 maxclk_sysfs;
+	u32 maxpp_sysfs;
+
+	/* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/
+	u32 limit_on;
+
+	/* for boost up gpu by user. */
+	void (*plat_preheat)(void);
+
+#if 0
+    struct platform_device *pdev;
+    void __iomem *reg_base_hiubus;
+    void __iomem *reg_base_aobus;
+	struct work_struct wq_work;
+    struct clk *clk_mali;
+    struct clk *clk_mali_0;
+    struct clk *clk_mali_1;
+#endif
+} mali_plat_info_t;
+mali_plat_info_t* get_mali_plat_data(void);
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+int mali_core_scaling_init(mali_plat_info_t*);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * cancel and flush scaling job queue.
+ */
+void flush_scaling_job(void);
+
+/* get current state(pp, clk). */
+void get_mali_rt_clkpp(u32* clk, u32* pp);
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush);
+void revise_mali_rt(void);
+/* get max gpu clk level of this chip*/
+//int get_gpu_max_clk_level(void);
+
+/* get or set the scale mode. */
+u32 get_mali_schel_mode(void);
+void set_mali_schel_mode(u32 mode);
+
+/* for frequency reporter in DS-5 streamline. */
+u32 get_current_frequency(void);
+//void mali_dev_freeze(void);
+void mali_dev_restore(void);
+
+extern int mali_pm_statue;
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/utgard/platform/meson_m450/meson_main.c b/utgard/platform/meson_m450/meson_main.c
new file mode 100644
index 0000000..9a550a6
--- /dev/null
+++ b/utgard/platform/meson_m450/meson_main.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2010, 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+
+#include "meson_main.h"
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "common/mali_pmu.h"
+#include "common/mali_osk_profiling.h"
+
+int mali_pm_statue = 0;
+u32 mali_gp_reset_fail = 0;
+module_param(mali_gp_reset_fail, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_gp_reset_fail, "times of failed to reset GP");
+u32 mali_core_timeout = 0;
+module_param(mali_core_timeout, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_core_timeout, "times of failed to reset GP");
+
+static struct mali_gpu_device_data mali_gpu_data =
+{
+	.shared_mem_size = 1024 * 1024 * 1024,
+	.max_job_runtime = 60000, /* 60 seconds */
+	.pmu_switch_delay = 0xFFFF, /* do not have to be this high on FPGA, but it is good for testing to have a delay */
+#if defined(CONFIG_ARCH_MESON8B)||defined(CONFIG_ARCH_MESONG9BB)
+	.pmu_domain_config = {0x1, 0x2, 0x4, 0x0,
+						  0x0, 0x0, 0x0, 0x0,
+						  0x0, 0x1, 0x2, 0x0},
+#else
+	.pmu_domain_config = {0x1, 0x2, 0x4, 0x4,
+                          0x0, 0x8, 0x8, 0x8,
+                          0x0, 0x1, 0x2, 0x8},
+#endif
+};
+
+static void mali_platform_device_release(struct device *device);
+static struct platform_device mali_gpu_device =
+{
+	.name = MALI_GPU_NAME_UTGARD,
+	.id = 0,
+	.dev.release = mali_platform_device_release,
+	.dev.coherent_dma_mask = DMA_BIT_MASK(32),
+	.dev.platform_data = &mali_gpu_data,
+	.dev.type = &mali_pm_device, /* We should probably use the pm_domain instead of type on newer kernels */
+};
+
+int mali_pdev_pre_init(struct platform_device* ptr_plt_dev)
+{
+	MALI_DEBUG_PRINT(4, ("mali_platform_device_register() called\n"));
+	if (mali_gpu_data.shared_mem_size < 10) {
+		MALI_DEBUG_PRINT(2, ("mali os memory didn't configered, set to default(512M)\n"));
+		mali_gpu_data.shared_mem_size = 1024 * 1024 *1024;
+	}
+	return mali_meson_init_start(ptr_plt_dev);
+}
+
+void mali_pdev_post_init(struct platform_device* pdev)
+{
+	mali_gp_reset_fail = 0;
+	mali_core_timeout = 0;
+#ifdef CONFIG_PM_RUNTIME
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37))
+	pm_runtime_set_autosuspend_delay(&(pdev->dev), 1000);
+	pm_runtime_use_autosuspend(&(pdev->dev));
+#endif
+	pm_runtime_enable(&(pdev->dev));
+#endif
+	mali_meson_init_finish(pdev);
+}
+
+int mali_pdev_dts_init(struct platform_device* mali_gpu_device)
+{
+	struct device_node     *cfg_node = mali_gpu_device->dev.of_node;
+	struct device_node     *child;
+	u32 prop_value;
+	int err;
+
+	for_each_child_of_node(cfg_node, child) {
+		err = of_property_read_u32(child, "shared_memory", &prop_value);
+		if (err == 0) {
+			MALI_DEBUG_PRINT(2, ("shared_memory configurate  %d\n", prop_value));
+			mali_gpu_data.shared_mem_size = prop_value * 1024 * 1024;
+		}
+	}
+
+	err = mali_pdev_pre_init(mali_gpu_device);
+	if (err == 0)
+		mali_pdev_post_init(mali_gpu_device);
+	return err;
+}
+
+int mali_platform_device_register(void)
+{
+	int err = -1;
+	err = mali_pdev_pre_init(&mali_gpu_device);
+	if (err == 0) {
+		err = platform_device_register(&mali_gpu_device);
+		if (0 == err)
+			mali_pdev_post_init(&mali_gpu_device);
+	}
+	return err;
+}
+
+void mali_platform_device_unregister(void)
+{
+	MALI_DEBUG_PRINT(4, ("mali_platform_device_unregister() called\n"));
+	mali_core_scaling_term();
+	platform_device_unregister(&mali_gpu_device);
+	platform_device_put(&mali_gpu_device);
+}
+
+static void mali_platform_device_release(struct device *device)
+{
+	MALI_DEBUG_PRINT(4, ("mali_platform_device_release() called\n"));
+}
+
+
diff --git a/utgard/platform/meson_m450/meson_main.h b/utgard/platform/meson_m450/meson_main.h
new file mode 100644
index 0000000..a67441f
--- /dev/null
+++ b/utgard/platform/meson_m450/meson_main.h
@@ -0,0 +1,37 @@
+/*
+ * mali_platform.h
+ *
+ *  Created on: Nov 8, 2013
+ *      Author: amlogic
+ */
+
+#ifndef MESON_MAIN_H_
+#define MESON_MAIN_H_
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+
+#include "mali_scaling.h"
+#include "mali_clock.h"
+
+extern struct device_type mali_pm_device;
+extern int mali_pm_statue;
+
+u32 set_max_mali_freq(u32 idx);
+u32 get_max_mali_freq(void);
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev);
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev);
+int mali_meson_uninit(struct platform_device* ptr_plt_dev);
+int mali_light_suspend(struct device *device);
+int mali_light_resume(struct device *device);
+int mali_deep_suspend(struct device *device);
+int mali_deep_resume(struct device *device);
+
+#endif /* MESON_MAIN_H_ */
diff --git a/utgard/platform/meson_m450/mpgpu.c b/utgard/platform/meson_m450/mpgpu.c
new file mode 100644
index 0000000..cc5b604
--- /dev/null
+++ b/utgard/platform/meson_m450/mpgpu.c
@@ -0,0 +1,374 @@
+/*******************************************************************
+ *
+ *  Copyright C 2013 by Amlogic, Inc. All Rights Reserved.
+ *
+ *  Description:
+ *
+ *  Author: Amlogic Software
+ *  Created: 2010/4/1   19:46
+ *
+ *******************************************************************/
+/* Standard Linux headers */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/io.h>
+#include <plat/io.h>
+#include <asm/io.h>
+#endif
+
+#include <linux/mali/mali_utgard.h>
+#include <common/mali_kernel_common.h>
+#include <common/mali_pmu.h>
+//include "mali_pp_scheduler.h"
+#include "meson_main.h"
+
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+static ssize_t domain_stat_read(struct class *class,
+			struct class_attribute *attr, char *buf)
+{
+#if 0
+	unsigned int val;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff;
+	return sprintf(buf, "%x\n", val>>4);
+#else
+	return 0;
+#endif
+}
+
+#define PREHEAT_CMD "preheat"
+#define PLL2_CMD "mpl2"  /* mpl2 [11] or [0xxxxxxx] */
+#define SCMPP_CMD "scmpp"  /* scmpp [number of pp your want in most of time]. */
+#define BSTGPU_CMD "bstgpu"  /* bstgpu [0-256] */
+#define BSTPP_CMD "bstpp"  /* bstpp [0-256] */
+#define LIMIT_CMD "lmt"  /* lmt [0 or 1] */
+#define MAX_TOKEN 20
+#define FULL_UTILIZATION 256
+
+static ssize_t mpgpu_write(struct class *class,
+			struct class_attribute *attr, const char *buf, size_t count)
+{
+	char *pstart, *cprt = NULL;
+	u32 val = 0;
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+
+	cprt = skip_spaces(buf);
+	pstart = strsep(&cprt," ");
+	if (strlen(pstart) < 1)
+		goto quit;
+
+	if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) {
+		if (pmali_plat->plat_preheat) {
+			pmali_plat->plat_preheat();
+		}
+	} else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) {
+		int base = 10;
+		if ((strlen(cprt) > 2) && (cprt[0] == '0') &&
+				(cprt[1] == 'x' || cprt[1] == 'X'))
+			base = 16;
+		if (kstrtouint(cprt, base, &val) <0)
+			goto quit;
+		if (val < 11)
+			pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup;
+		else
+			pmali_plat->cfg_clock = pmali_plat->turbo_clock;
+		pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+		set_str_src(val);
+	} else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < pmali_plat->cfg_pp)) {
+			pmali_plat->sc_mpp = val;
+		}
+	} else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_gpu = val;
+		}
+	} else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+		if ((val > 0) && (val < FULL_UTILIZATION)) {
+			pmali_plat->bst_pp = val;
+		}
+	} else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) {
+		if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL)
+			goto quit;
+
+		if (val < 2) {
+			pmali_plat->limit_on = val;
+			if (val == 0) {
+				pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+				pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+				revise_mali_rt();
+			}
+		}
+	}
+quit:
+	return count;
+}
+
+static ssize_t scale_mode_read(struct class *class,
+			struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_mali_schel_mode());
+}
+
+static ssize_t scale_mode_write(struct class *class,
+			struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	set_mali_schel_mode(val);
+
+	return count;
+}
+
+static ssize_t max_pp_read(struct class *class,
+			struct class_attribute *attr, char *buf)
+{
+#if 0
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxpp:%d, maxpp_sysfs:%d, total=%d\n",
+			pmali_plat->scale_info.maxpp, pmali_plat->maxpp_sysfs,
+			pmali_plat->cfg_pp);
+	return sprintf(buf, "%d\n", pmali_plat->cfg_pp);
+#else
+	return 0;
+#endif
+}
+
+static ssize_t max_pp_write(struct class *class,
+			struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_pp) || (val < pinfo->minpp))
+		return -EINVAL;
+
+	pmali_plat->maxpp_sysfs = val;
+	pinfo->maxpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_pp_read(struct class *class,
+			struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minpp);
+}
+
+static ssize_t min_pp_write(struct class *class,
+			struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxpp) || (val < 1))
+		return -EINVAL;
+
+	pinfo->minpp = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t max_freq_read(struct class *class,
+			struct class_attribute *attr, char *buf)
+{
+#if 0
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n",
+			pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level());
+	return sprintf(buf, "%d\n", get_gpu_max_clk_level());
+#else
+	return 0;
+#endif
+}
+
+static ssize_t max_freq_write(struct class *class,
+			struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk))
+		return -EINVAL;
+
+	pmali_plat->maxclk_sysfs = val;
+	pinfo->maxclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t min_freq_read(struct class *class,
+			struct class_attribute *attr, char *buf)
+{
+	mali_plat_info_t* pmali_plat = get_mali_plat_data();
+	return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk);
+}
+
+static ssize_t min_freq_write(struct class *class,
+			struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	mali_plat_info_t* pmali_plat;
+	mali_scale_info_t* pinfo;
+
+	pmali_plat = get_mali_plat_data();
+	pinfo = &pmali_plat->scale_info;
+
+	ret = kstrtouint(buf, 10, &val);
+	if ((0 != ret) || (val > pinfo->maxclk))
+		return -EINVAL;
+
+	pinfo->minclk = val;
+	revise_mali_rt();
+
+	return count;
+}
+
+static ssize_t freq_read(struct class *class,
+			struct class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", get_current_frequency());
+}
+
+static ssize_t freq_write(struct class *class,
+			struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(val, pp, 1);
+
+	return count;
+}
+
+static ssize_t current_pp_read(struct class *class,
+			struct class_attribute *attr, char *buf)
+{
+	u32 clk, pp;
+	get_mali_rt_clkpp(&clk, &pp);
+	return sprintf(buf, "%d\n", pp);
+}
+
+static ssize_t current_pp_write(struct class *class,
+			struct class_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned int val;
+	u32 clk, pp;
+
+	get_mali_rt_clkpp(&clk, &pp);
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+	{
+		return -EINVAL;
+	}
+
+	ret = kstrtouint(buf, 10, &val);
+	if (0 != ret)
+		return -EINVAL;
+
+	set_mali_rt_clkpp(clk, val, 1);
+
+	return count;
+}
+
+static struct class_attribute mali_class_attrs[] = {
+	__ATTR(domain_stat,	0644, domain_stat_read, NULL),
+	__ATTR(mpgpucmd,	0644, NULL,		mpgpu_write),
+	__ATTR(scale_mode,	0644, scale_mode_read,  scale_mode_write),
+	__ATTR(min_freq,	0644, min_freq_read,  	min_freq_write),
+	__ATTR(max_freq,	0644, max_freq_read,	max_freq_write),
+	__ATTR(min_pp,		0644, min_pp_read,	min_pp_write),
+	__ATTR(max_pp,		0644, max_pp_read,	max_pp_write),
+	__ATTR(cur_freq,	0644, freq_read,	freq_write),
+	__ATTR(cur_pp,		0644, current_pp_read,	current_pp_write),
+};
+
+static struct class mpgpu_class = {
+	.name = "mpgpu",
+};
+#endif
+
+int mpgpu_class_init(void)
+{
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+	int ret = 0;
+	int i;
+	int attr_num =  ARRAY_SIZE(mali_class_attrs);
+
+	ret = class_register(&mpgpu_class);
+	if (ret) {
+		printk(KERN_ERR "%s: class_register failed\n", __func__);
+		return ret;
+	}
+	for (i = 0; i< attr_num; i++) {
+		ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]);
+		if (ret) {
+			printk(KERN_ERR "%d ST: class item failed to register\n", i);
+		}
+	}
+	return ret;
+#else
+	return 0;
+#endif
+}
+
+void  mpgpu_class_exit(void)
+{
+#if MESON_CPU_TYPE >= MESON_CPU_TYPE_MESON8
+	class_unregister(&mpgpu_class);
+#endif
+}
+
diff --git a/utgard/platform/meson_m450/platform_m6tvd.c b/utgard/platform/meson_m450/platform_m6tvd.c
new file mode 100644
index 0000000..58b3090
--- /dev/null
+++ b/utgard/platform/meson_m450/platform_m6tvd.c
@@ -0,0 +1,197 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#include <asm/io.h>
+#include <linux/mali/mali_utgard.h>
+
+#include <common/mali_kernel_common.h>
+#include <common/mali_osk_profiling.h>
+#include <common/mali_pmu.h>
+
+#include "meson_main.h"
+
+/*
+ *    For Meson 8TVD.
+ *
+ */
+
+#define CFG_PP 2
+#define CFG_CLOCK 3
+#define CFG_MIN_PP 1
+#define CFG_MIN_CLOCK 0
+
+/* fclk is 2Ghz. */
+#define FCLK_DEV5 (7 << 9)		/*	400   Mhz  */
+#define FCLK_DEV3 (6 << 9)		/*	666   Mhz  */
+#define FCLK_DEV2 (5 << 9)		/*	1000  Mhz  */
+#define FCLK_DEV7 (4 << 9)		/*	285   Mhz  */
+
+u32 mali_dvfs_clk[] = {
+	FCLK_DEV7 | 9,     /* 100 Mhz */
+	FCLK_DEV2 | 4,     /* 200 Mhz */
+	FCLK_DEV3 | 1,     /* 333 Mhz */
+	FCLK_DEV5 | 0,     /* 400 Mhz */
+};
+
+u32 mali_dvfs_clk_sample[] = {
+	100,     /* 182.1 Mhz */
+	200,     /* 318.7 Mhz */
+	333,     /* 425 Mhz */
+	400,     /* 510 Mhz */
+};
+
+static mali_plat_info_t mali_plat_data = {
+	.cfg_pp = CFG_PP,  /* number of pp. */
+	.cfg_min_pp = CFG_MIN_PP,
+	.def_clock = CFG_CLOCK, /* gpu clock used most of time.*/
+	.cfg_clock = CFG_CLOCK, /* max gpu clock. */
+	.cfg_min_clock = CFG_MIN_CLOCK,
+
+	.clk = mali_dvfs_clk, /* clock source table. */
+	.clk_sample = mali_dvfs_clk_sample, /* freqency table for show. */
+	.clk_len = sizeof(mali_dvfs_clk) / sizeof(mali_dvfs_clk[0]),
+	.have_switch = 0,
+};
+
+#define MALI_USER_PP0	AM_IRQ4(31)
+
+static struct resource mali_gpu_resources[] =
+{
+MALI_GPU_RESOURCES_MALI450_MP2_PMU(0xC9140000, INT_MALI_GP, INT_MALI_GP_MMU,
+				MALI_USER_PP0, INT_MALI_PP_MMU,
+				INT_MALI_PP1, INT_MALI_PP_MMU1,
+				INT_MALI_PP)
+};
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+	ptr_plt_dev->num_resources = ARRAY_SIZE(mali_gpu_resources);
+	ptr_plt_dev->resource = mali_gpu_resources;
+	return mali_clock_init(&mali_plat_data);
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+	return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+	return 0;
+}
+
+static int mali_cri_pmu_on_off(size_t param)
+{
+	struct mali_pmu_core *pmu;
+
+	MALI_DEBUG_PRINT(4, ("mali_os_suspend() called\n"));
+	pmu = mali_pmu_get_global_pmu_core();
+	if (param == 0)
+		mali_pmu_power_down_all(pmu);
+	else
+		mali_pmu_power_up_all(pmu);
+	return 0;
+}
+
+int mali_light_suspend(struct device *device)
+{
+	int ret = 0;
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					MALI_PROFILING_EVENT_CHANNEL_GPU |
+					MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					0, 0,	0,	0,	0);
+#endif
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->runtime_suspend)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->runtime_suspend(device);
+	}
+
+	/* clock scaling. Kasin..*/
+	mali_clock_critical(mali_cri_pmu_on_off, 0);
+	disable_clock();
+	return ret;
+}
+
+int mali_light_resume(struct device *device)
+{
+	int ret = 0;
+	/* clock scaling. Kasin..*/
+	enable_clock();
+
+	mali_clock_critical(mali_cri_pmu_on_off, 1);
+#ifdef CONFIG_MALI400_PROFILING
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					MALI_PROFILING_EVENT_CHANNEL_GPU |
+					MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					0, 0,	0,	0,	0);
+#endif
+
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->runtime_resume)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->runtime_resume(device);
+	}
+	return ret;
+}
+
+int mali_deep_suspend(struct device *device)
+{
+	int ret = 0;
+	//enable_clock();
+	//flush_scaling_job();
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->suspend)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->suspend(device);
+	}
+
+	/* clock scaling off. Kasin... */
+	mali_clock_critical(mali_cri_pmu_on_off, 0);
+	disable_clock();
+	return ret;
+}
+
+int mali_deep_resume(struct device *device)
+{
+	int ret = 0;
+	/* clock scaling up. Kasin.. */
+	enable_clock();
+	mali_clock_critical(mali_cri_pmu_on_off, 1);
+	if (NULL != device->driver &&
+	    NULL != device->driver->pm &&
+	    NULL != device->driver->pm->resume)
+	{
+		/* Need to notify Mali driver about this event */
+		ret = device->driver->pm->resume(device);
+	}
+	return ret;
+
+}
+
+void mali_post_init(void)
+{
+}
diff --git a/utgard/platform/meson_m450/platform_m8.c b/utgard/platform/meson_m450/platform_m8.c
new file mode 100644
index 0000000..66a4e25
--- /dev/null
+++ b/utgard/platform/meson_m450/platform_m8.c
@@ -0,0 +1,489 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#endif
+#include <asm/io.h>
+#include <linux/mali/mali_utgard.h>
+#ifdef CONFIG_GPU_THERMAL
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#endif
+#include <common/mali_kernel_common.h>
+#include <common/mali_osk_profiling.h>
+#include <common/mali_pmu.h>
+
+#include "meson_main.h"
+
+/*
+ *    For Meson 8 M2.
+ *
+ */
+
+#define CFG_PP 6
+#define CFG_CLOCK 3
+#define CFG_MIN_PP 1
+#define CFG_MIN_CLOCK 0
+
+/* fclk is 2550Mhz. */
+#define FCLK_DEV3 (6 << 9)      /*  850   Mhz  */
+#define FCLK_DEV4 (5 << 9)      /*  637.5 Mhz  */
+#define FCLK_DEV5 (7 << 9)      /*  510   Mhz  */
+#define FCLK_DEV7 (4 << 9)      /*  364.3 Mhz  */
+
+static u32 mali_dvfs_clk[] = {
+    FCLK_DEV7 | 1,     /* 182.1 Mhz */
+    FCLK_DEV4 | 1,     /* 318.7 Mhz */
+    FCLK_DEV3 | 1,     /* 425 Mhz */
+    FCLK_DEV5 | 0,     /* 510 Mhz */
+    FCLK_DEV4 | 0,     /* 637.5 Mhz */
+};
+
+static u32 mali_dvfs_clk_sample[] = {
+    182,     /* 182.1 Mhz */
+    319,     /* 318.7 Mhz */
+    425,     /* 425 Mhz */
+    510,     /* 510 Mhz */
+    637,     /* 637.5 Mhz */
+};
+//////////////////////////////////////
+//for dvfs
+struct mali_gpu_clk_item  meson_gpu_clk[]  = {
+    {182,  1150},   /* 182.1 Mhz, 1150mV */
+    {319,  1150},   /* 318.7 Mhz */
+    {425,  1150},   /* 425 Mhz */
+    {510,  1150},   /* 510 Mhz */
+    {637,  1150},   /* 637.5 Mhz */
+};
+struct mali_gpu_clock meson_gpu_clk_info = {
+    .item = meson_gpu_clk,
+    .num_of_steps = ARRAY_SIZE(meson_gpu_clk),
+};
+static int cur_gpu_clk_index = 0;
+//////////////////////////////////////
+static mali_dvfs_threshold_table mali_dvfs_table[]={
+    { 0, 0, 3,  30,  80}, /* for 182.1  */
+    { 1, 1, 3,  40, 205}, /* for 318.7  */
+    { 2, 2, 3, 150, 215}, /* for 425.0  */
+    { 3, 3, 3, 170, 253}, /* for 510.0  */
+    { 4, 4, 3, 230, 255},  /* for 637.5  */
+    { 0, 0, 3,   0,   0}
+};
+
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .cfg_pp = CFG_PP,  /* number of pp. */
+    .cfg_min_pp = CFG_MIN_PP,
+    .turbo_clock = 4, /* reserved clock src. */
+    .def_clock = 2, /* gpu clock used most of time.*/
+    .cfg_clock = CFG_CLOCK, /* max gpu clock. */
+    .cfg_clock_bkup = CFG_CLOCK,
+    .cfg_min_clock = CFG_MIN_CLOCK,
+
+    .sc_mpp = 3, /* number of pp used most of time.*/
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+
+    .clk = mali_dvfs_clk, /* clock source table. */
+    .clk_sample = mali_dvfs_clk_sample, /* freqency table for show. */
+    .clk_len = sizeof(mali_dvfs_clk) / sizeof(mali_dvfs_clk[0]),
+    .have_switch = 1,
+
+    .dvfs_table = mali_dvfs_table, /* DVFS table. */
+    .dvfs_table_size = sizeof(mali_dvfs_table) / sizeof(mali_dvfs_threshold_table),
+
+    .scale_info = {
+        CFG_MIN_PP, /* minpp */
+        CFG_PP, /* maxpp, should be same as cfg_pp */
+        CFG_MIN_CLOCK, /* minclk */
+        CFG_CLOCK, /* maxclk should be same as cfg_clock */
+    },
+
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq < mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq < mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+                break;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+static struct resource mali_gpu_resources[] =
+{
+    MALI_GPU_RESOURCES_MALI450_MP6_PMU(IO_MALI_APB_PHY_BASE, INT_MALI_GP, INT_MALI_GP_MMU,
+            INT_MALI_PP0, INT_MALI_PP0_MMU,
+            INT_MALI_PP1, INT_MALI_PP1_MMU,
+            INT_MALI_PP2, INT_MALI_PP2_MMU,
+            INT_MALI_PP4, INT_MALI_PP4_MMU,
+            INT_MALI_PP5, INT_MALI_PP5_MMU,
+            INT_MALI_PP6, INT_MALI_PP6_MMU,
+            INT_MALI_PP)
+};
+
+#ifdef CONFIG_GPU_THERMAL
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    mali_plat_data.scale_info.maxclk= idx;
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+#endif
+
+#ifdef CONFIG_GPU_THERMAL
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+#endif
+
+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data);
+
+#if 0
+struct mali_gpu_clk_item {
+    unsigned int clock; /* unit(MHz) */
+    unsigned int vol;
+};
+
+struct mali_gpu_clock {
+    struct mali_gpu_clk_item *item;
+    unsigned int num_of_steps;
+};
+#endif
+
+/* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */
+void meson_platform_get_clock_info(struct mali_gpu_clock **data) {
+    *data = &meson_gpu_clk_info;
+}
+
+/* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */
+int meson_platform_get_freq(void) {
+    printk("get cur_gpu_clk_index =%d\n", cur_gpu_clk_index);
+    return  cur_gpu_clk_index;
+}
+
+/* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */
+int meson_platform_set_freq(int setting_clock_step) {
+
+    if (cur_gpu_clk_index == setting_clock_step) {
+        return 0;
+    }
+
+    mali_clock_set(setting_clock_step);
+
+    cur_gpu_clk_index = setting_clock_step;
+    printk("set cur_gpu_clk_index =%d\n", cur_gpu_clk_index);
+
+    return 0;
+}
+
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    struct mali_gpu_device_data* pdev = ptr_plt_dev->dev.platform_data;
+
+    /* chip mark detect. */
+#ifdef IS_MESON_M8_CPU
+    if (IS_MESON_M8_CPU) {
+        mali_plat_data.have_switch = 0;
+    }
+#endif
+
+    /* for resource data. */
+    ptr_plt_dev->num_resources = ARRAY_SIZE(mali_gpu_resources);
+    ptr_plt_dev->resource = mali_gpu_resources;
+
+    /*for dvfs*/
+#ifndef CONFIG_MALI_DVFS
+    /* for mali platform data. */
+    pdev->control_interval = 300;
+    pdev->utilization_callback = mali_gpu_utilization_callback;
+#else
+    pdev->get_clock_info = meson_platform_get_clock_info;
+    pdev->get_freq = meson_platform_get_freq;
+    pdev->set_freq = meson_platform_set_freq;
+#endif
+
+    return mali_clock_init(&mali_plat_data);
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+#ifndef CONFIG_MALI_DVFS
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+#else
+    printk("disable meson own dvfs\n");
+#endif
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    return 0;
+}
+
+static int mali_cri_light_suspend(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    mali_pm_statue = 1;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->runtime_suspend)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->runtime_suspend(device);
+    }
+    mali_pmu_power_down_all(pmu);
+    return ret;
+}
+
+static int mali_cri_light_resume(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    mali_pmu_power_up_all(pmu);
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->runtime_resume)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->runtime_resume(device);
+    }
+    mali_pm_statue = 0;
+    return ret;
+}
+
+static int mali_cri_deep_suspend(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->suspend)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->suspend(device);
+    }
+    mali_pmu_power_down_all(pmu);
+    return ret;
+}
+
+static int mali_cri_deep_resume(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    mali_pmu_power_up_all(pmu);
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->resume)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->resume(device);
+    }
+    return ret;
+
+}
+
+int mali_light_suspend(struct device *device)
+{
+    int ret = 0;
+#ifdef CONFIG_MALI400_PROFILING
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_EVENT_CHANNEL_GPU |
+            MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+            0, 0, 0, 0, 0);
+#endif
+
+    /* clock scaling. Kasin..*/
+    ret = mali_clock_critical(mali_cri_light_suspend, (size_t)device);
+    disable_clock();
+    return ret;
+}
+
+int mali_light_resume(struct device *device)
+{
+    int ret = 0;
+    enable_clock();
+    ret = mali_clock_critical(mali_cri_light_resume, (size_t)device);
+#ifdef CONFIG_MALI400_PROFILING
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_EVENT_CHANNEL_GPU |
+            MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+            get_current_frequency(), 0, 0, 0, 0);
+#endif
+    return ret;
+}
+
+int mali_deep_suspend(struct device *device)
+{
+    int ret = 0;
+
+    mali_pm_statue = 1;
+    enable_clock();
+#ifndef CONFIG_MALI_DVFS
+    flush_scaling_job();
+#endif
+
+    /* clock scaling off. Kasin... */
+    ret = mali_clock_critical(mali_cri_deep_suspend, (size_t)device);
+    disable_clock();
+    return ret;
+}
+
+int mali_deep_resume(struct device *device)
+{
+    int ret = 0;
+
+    /* clock scaling up. Kasin.. */
+    enable_clock();
+    ret = mali_clock_critical(mali_cri_deep_resume, (size_t)device);
+    mali_pm_statue = 0;
+    return ret;
+
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+        err = gpufreq_cooling_register(gcdev);
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/utgard/platform/meson_m450/platform_m8b.c b/utgard/platform/meson_m450/platform_m8b.c
new file mode 100644
index 0000000..692aafe
--- /dev/null
+++ b/utgard/platform/meson_m450/platform_m8b.c
@@ -0,0 +1,472 @@
+/*
+ * platform.c
+ *
+ * clock source setting and resource config
+ *
+ *  Created on: Dec 4, 2013
+ *      Author: amlogic
+ */
+
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/slab.h>
+#include <mach/register.h>
+#include <mach/irqs.h>
+#include <mach/io.h>
+#include <asm/io.h>
+#include <linux/mali/mali_utgard.h>
+#include <linux/gpu_cooling.h>
+#include <linux/gpucore_cooling.h>
+#include <common/mali_kernel_common.h>
+#include <common/mali_osk_profiling.h>
+#include <common/mali_pmu.h>
+
+#include "meson_main.h"
+
+/*
+ *    For Meson 8B.
+ *
+ */
+
+#define CFG_PP 2
+#define CFG_CLOCK 3
+#define CFG_MIN_PP 1
+#define CFG_MIN_CLOCK 0
+
+/* fclk is 2550Mhz. */
+#define FCLK_DEV3 (6 << 9)      /*  850   Mhz  */
+#define FCLK_DEV4 (5 << 9)      /*  637.5 Mhz  */
+#define FCLK_DEV5 (7 << 9)      /*  510   Mhz  */
+#define FCLK_DEV7 (4 << 9)      /*  364.3 Mhz  */
+
+static u32 mali_dvfs_clk[] = {
+    FCLK_DEV5 | 1,     /* 255 Mhz */
+    FCLK_DEV7 | 0,     /* 364 Mhz */
+    FCLK_DEV3 | 1,     /* 425 Mhz */
+    FCLK_DEV5 | 0,     /* 510 Mhz */
+    FCLK_DEV4 | 0,     /* 637.5 Mhz */
+};
+
+static u32 mali_dvfs_clk_sample[] = {
+    255,     /* 182.1 Mhz */
+    364,     /* 318.7 Mhz */
+    425,     /* 425 Mhz */
+    510,     /* 510 Mhz */
+    637,     /* 637.5 Mhz */
+};
+
+//////////////////////////////////////
+//for dvfs
+struct mali_gpu_clk_item  meson_gpu_clk[]  = {
+    {255,  1150},   /* 182.1 Mhz, 1150mV */
+    {364,  1150},   /* 318.7 Mhz */
+    {425,  1150},   /* 425 Mhz */
+    {510,  1150},   /* 510 Mhz */
+    {637,  1150},   /* 637.5 Mhz */
+};
+struct mali_gpu_clock meson_gpu_clk_info = {
+    .item = meson_gpu_clk,
+    .num_of_steps = ARRAY_SIZE(meson_gpu_clk),
+};
+static int cur_gpu_clk_index = 0;
+//////////////////////////////////////
+
+static mali_dvfs_threshold_table mali_dvfs_table[]={
+    { 0, 0, 5, 30 , 180}, /* for 255  */
+    { 1, 1, 5, 152, 205}, /* for 364  */
+    { 2, 2, 5, 180, 212}, /* for 425  */
+    { 3, 3, 5, 205, 236}, /* for 510  */
+    { 4, 4, 5, 230, 255}, /* for 637  */
+    { 0, 0, 5,   0,   0}
+};
+
+static void mali_plat_preheat(void);
+static mali_plat_info_t mali_plat_data = {
+    .cfg_pp = CFG_PP,  /* number of pp. */
+    .cfg_min_pp = CFG_MIN_PP,
+    .turbo_clock = 4, /* reserved clock src. */
+    .def_clock = 2, /* gpu clock used most of time.*/
+    .cfg_clock = CFG_CLOCK, /* max gpu clock. */
+    .cfg_clock_bkup = CFG_CLOCK,
+    .cfg_min_clock = CFG_MIN_CLOCK,
+
+    .sc_mpp = 2, /* number of pp used most of time.*/
+    .bst_gpu = 210, /* threshold for boosting gpu. */
+    .bst_pp = 160, /* threshold for boosting PP. */
+
+    .clk = mali_dvfs_clk, /* clock source table. */
+    .clk_sample = mali_dvfs_clk_sample, /* freqency table for show. */
+    .clk_len = sizeof(mali_dvfs_clk) / sizeof(mali_dvfs_clk[0]),
+    .have_switch = 1,
+
+    .dvfs_table = mali_dvfs_table, /* DVFS table. */
+    .dvfs_table_size = sizeof(mali_dvfs_table) / sizeof(mali_dvfs_threshold_table),
+
+    .scale_info = {
+        CFG_MIN_PP, /* minpp */
+        CFG_PP, /* maxpp, should be same as cfg_pp */
+        CFG_MIN_CLOCK, /* minclk */
+        CFG_CLOCK, /* maxclk should be same as cfg_clock */
+    },
+
+    .limit_on = 1,
+    .plat_preheat = mali_plat_preheat,
+};
+
+static void mali_plat_preheat(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    u32 pre_fs;
+    u32 clk, pp;
+
+    if (get_mali_schel_mode() != MALI_PP_FS_SCALING)
+        return;
+
+    get_mali_rt_clkpp(&clk, &pp);
+    pre_fs = mali_plat_data.def_clock + 1;
+    if (clk < pre_fs)
+        clk = pre_fs;
+    if (pp < mali_plat_data.sc_mpp)
+        pp = mali_plat_data.sc_mpp;
+    set_mali_rt_clkpp(clk, pp, 1);
+#endif
+}
+
+mali_plat_info_t* get_mali_plat_data(void) {
+    return &mali_plat_data;
+}
+
+int get_mali_freq_level(int freq)
+{
+    int i = 0, level = -1;
+    int mali_freq_num;
+
+    if (freq < 0)
+        return level;
+
+    mali_freq_num = mali_plat_data.dvfs_table_size - 1;
+    if (freq < mali_plat_data.clk_sample[0])
+        level = mali_freq_num-1;
+    else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1])
+        level = 0;
+    else {
+        for (i=0; i<mali_freq_num - 1 ;i++) {
+            if (freq >= mali_plat_data.clk_sample[i] && freq < mali_plat_data.clk_sample[i + 1]) {
+                level = i;
+                level = mali_freq_num-level - 1;
+                break;
+            }
+        }
+    }
+    return level;
+}
+
+unsigned int get_mali_max_level(void)
+{
+    return mali_plat_data.dvfs_table_size - 1;
+}
+
+static struct resource mali_gpu_resources[] =
+{
+    MALI_GPU_RESOURCES_MALI450_MP2_PMU(IO_MALI_APB_PHY_BASE, INT_MALI_GP, INT_MALI_GP_MMU,
+            INT_MALI_PP0, INT_MALI_PP0_MMU,
+            INT_MALI_PP1, INT_MALI_PP1_MMU,
+            INT_MALI_PP)
+};
+
+static void set_limit_mali_freq(u32 idx)
+{
+    if (mali_plat_data.limit_on == 0)
+        return;
+    if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk)
+        return;
+    mali_plat_data.scale_info.maxclk= idx;
+
+    revise_mali_rt();
+}
+
+static u32 get_limit_mali_freq(void)
+{
+    return mali_plat_data.scale_info.maxclk;
+}
+
+static u32 set_limit_pp_num(u32 num)
+{
+    u32 ret = -1;
+    if (mali_plat_data.limit_on == 0)
+        goto quit;
+    if (num > mali_plat_data.cfg_pp ||
+            num < mali_plat_data.scale_info.minpp)
+        goto quit;
+    mali_plat_data.scale_info.maxpp = num;
+    revise_mali_rt();
+    ret = 0;
+quit:
+    return ret;
+}
+
+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data);
+
+#if 0
+struct mali_gpu_clk_item {
+    unsigned int clock; /* unit(MHz) */
+    unsigned int vol;
+};
+
+struct mali_gpu_clock {
+    struct mali_gpu_clk_item *item;
+    unsigned int num_of_steps;
+};
+#endif
+
+/* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */
+void meson_platform_get_clock_info(struct mali_gpu_clock **data) {
+    *data = &meson_gpu_clk_info;
+}
+
+/* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */
+int meson_platform_get_freq(void) {
+    printk("get cur_gpu_clk_index =%d\n", cur_gpu_clk_index);
+    return  cur_gpu_clk_index;
+}
+
+/* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */
+int meson_platform_set_freq(int setting_clock_step) {
+
+    if (cur_gpu_clk_index == setting_clock_step) {
+        return 0;
+    }
+
+    mali_clock_set(setting_clock_step);
+
+    cur_gpu_clk_index = setting_clock_step;
+    printk("set cur_gpu_clk_index =%d\n", cur_gpu_clk_index);
+
+    return 0;
+}
+int mali_meson_init_start(struct platform_device* ptr_plt_dev)
+{
+    struct mali_gpu_device_data* pdev = ptr_plt_dev->dev.platform_data;
+
+
+    /* for resource data. */
+    ptr_plt_dev->num_resources = ARRAY_SIZE(mali_gpu_resources);
+    ptr_plt_dev->resource = mali_gpu_resources;
+
+    /*for dvfs*/
+#ifndef CONFIG_MALI_DVFS
+    /* for mali platform data. */
+    pdev->control_interval = 200;
+    pdev->utilization_callback = mali_gpu_utilization_callback;
+#else
+    pdev->get_clock_info = meson_platform_get_clock_info;
+    pdev->get_freq = meson_platform_get_freq;
+    pdev->set_freq = meson_platform_set_freq;
+#endif
+
+    return mali_clock_init(&mali_plat_data);
+}
+
+int mali_meson_init_finish(struct platform_device* ptr_plt_dev)
+{
+#ifndef CONFIG_MALI_DVFS
+    if (mali_core_scaling_init(&mali_plat_data) < 0)
+        return -1;
+#endif
+    return 0;
+}
+
+int mali_meson_uninit(struct platform_device* ptr_plt_dev)
+{
+    return 0;
+}
+
+static int mali_cri_light_suspend(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    mali_pm_statue = 1;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->runtime_suspend)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->runtime_suspend(device);
+    }
+    mali_pmu_power_down_all(pmu);
+    return ret;
+}
+
+static int mali_cri_light_resume(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    mali_pmu_power_up_all(pmu);
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->runtime_resume)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->runtime_resume(device);
+    }
+    mali_pm_statue = 0;
+    return ret;
+}
+
+static int mali_cri_deep_suspend(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->suspend)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->suspend(device);
+    }
+    mali_pmu_power_down_all(pmu);
+    return ret;
+}
+
+static int mali_cri_deep_resume(size_t param)
+{
+    int ret;
+    struct device *device;
+    struct mali_pmu_core *pmu;
+
+    ret = 0;
+    device = (struct device *)param;
+    pmu = mali_pmu_get_global_pmu_core();
+
+    mali_pmu_power_up_all(pmu);
+    if (NULL != device->driver &&
+            NULL != device->driver->pm &&
+            NULL != device->driver->pm->resume)
+    {
+        /* Need to notify Mali driver about this event */
+        ret = device->driver->pm->resume(device);
+    }
+    return ret;
+
+}
+
+int mali_light_suspend(struct device *device)
+{
+    int ret = 0;
+#ifdef CONFIG_MALI400_PROFILING
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_EVENT_CHANNEL_GPU |
+            MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+            0, 0, 0, 0, 0);
+#endif
+
+    /* clock scaling. Kasin..*/
+    ret = mali_clock_critical(mali_cri_light_suspend, (size_t)device);
+    disable_clock();
+    return ret;
+}
+
+int mali_light_resume(struct device *device)
+{
+    int ret = 0;
+    enable_clock();
+    ret = mali_clock_critical(mali_cri_light_resume, (size_t)device);
+#ifdef CONFIG_MALI400_PROFILING
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_EVENT_CHANNEL_GPU |
+            MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+            get_current_frequency(), 0, 0, 0, 0);
+#endif
+    return ret;
+}
+
+int mali_deep_suspend(struct device *device)
+{
+    int ret = 0;
+    struct mali_pmu_core *pmu;
+
+    mali_pm_statue = 1;
+    pmu = mali_pmu_get_global_pmu_core();
+    enable_clock();
+#ifndef CONFIG_MALI_DVFS
+    flush_scaling_job();
+#endif
+
+    /* clock scaling off. Kasin... */
+    ret = mali_clock_critical(mali_cri_deep_suspend, (size_t)device);
+    disable_clock();
+    return ret;
+}
+
+int mali_deep_resume(struct device *device)
+{
+    int ret = 0;
+
+    /* clock scaling up. Kasin.. */
+    enable_clock();
+    ret = mali_clock_critical(mali_cri_deep_resume, (size_t)device);
+    mali_pm_statue = 0;
+    return ret;
+}
+
+void mali_post_init(void)
+{
+#ifdef CONFIG_GPU_THERMAL
+    int err;
+    struct gpufreq_cooling_device *gcdev = NULL;
+    struct gpucore_cooling_device *gccdev = NULL;
+
+    gcdev = gpufreq_cooling_alloc();
+    register_gpu_freq_info(get_current_frequency);
+    if (IS_ERR(gcdev))
+        printk("malloc gpu cooling buffer error!!\n");
+    else if (!gcdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gcdev->get_gpu_freq_level = get_mali_freq_level;
+        gcdev->get_gpu_max_level = get_mali_max_level;
+        gcdev->set_gpu_freq_idx = set_limit_mali_freq;
+        gcdev->get_gpu_current_max_level = get_limit_mali_freq;
+        err = gpufreq_cooling_register(gcdev);
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu cooling register okay with err=%d\n",err);
+    }
+
+    gccdev = gpucore_cooling_alloc();
+    if (IS_ERR(gccdev))
+        printk("malloc gpu core cooling buffer error!!\n");
+    else if (!gccdev)
+        printk("system does not enable thermal driver\n");
+    else {
+        gccdev->max_gpu_core_num=mali_plat_data.cfg_pp;
+        gccdev->set_max_pp_num=set_limit_pp_num;
+        err = (int)gpucore_cooling_register(gccdev);
+        if (err < 0)
+            printk("register GPU  cooling error\n");
+        printk("gpu core cooling register okay with err=%d\n",err);
+    }
+#endif
+}
diff --git a/utgard/platform/meson_m450/scaling.c b/utgard/platform/meson_m450/scaling.c
new file mode 100644
index 0000000..8a30346
--- /dev/null
+++ b/utgard/platform/meson_m450/scaling.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/mali/mali_utgard.h>
+#include <mali_kernel_common.h>
+#include <mali_osk_profiling.h>
+
+#include <meson_main.h>
+
+#define LOG_MALI_SCALING 0
+
+
+static int currentStep;
+#ifndef CONFIG_MALI_DVFS
+static int num_cores_enabled;
+static int lastStep;
+static struct work_struct wq_work;
+static mali_plat_info_t* pmali_plat = NULL;
+#endif
+static int  scaling_mode = MALI_PP_FS_SCALING;
+
+
+static unsigned scaling_dbg_level = 0;
+module_param(scaling_dbg_level, uint, 0644);
+MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level");
+
+#define scalingdbg(level, fmt, arg...)                      \
+    do {                                                    \
+        if (scaling_dbg_level >= (level))                   \
+        printk(fmt , ## arg);                           \
+    } while (0)
+
+#ifndef CONFIG_MALI_DVFS
+static void do_scaling(struct work_struct *work)
+{
+    mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+    int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+    scalingdbg(1, "set pp cores to %d\n", num_cores_enabled);
+    MALI_DEBUG_ASSERT(0 == err);
+    MALI_IGNORE(err);
+    if (pdvfs[currentStep].freq_index != pdvfs[lastStep].freq_index) {
+        mali_dev_pause();
+        mali_clock_set(pdvfs[currentStep].freq_index);
+        mali_dev_resume();
+        lastStep = currentStep;
+    }
+#ifdef CONFIG_MALI400_PROFILING
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_EVENT_CHANNEL_GPU |
+            MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+            get_current_frequency(),
+            0,	0,	0,	0);
+#endif
+}
+#endif
+
+u32 revise_set_clk(u32 val, u32 flush)
+{
+    u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+    mali_scale_info_t* pinfo;
+
+    pinfo = &pmali_plat->scale_info;
+
+    if (val < pinfo->minclk)
+        val = pinfo->minclk;
+    else if (val >  pinfo->maxclk)
+        val =  pinfo->maxclk;
+
+    if (val != currentStep) {
+        currentStep = val;
+        if (flush)
+            schedule_work(&wq_work);
+        else
+            ret = 1;
+    }
+#endif
+    return ret;
+}
+
+void get_mali_rt_clkpp(u32* clk, u32* pp)
+{
+#ifndef CONFIG_MALI_DVFS
+    *clk = currentStep;
+    *pp = num_cores_enabled;
+#endif
+}
+
+u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush)
+{
+    u32 ret = 0;
+#ifndef CONFIG_MALI_DVFS
+    mali_scale_info_t* pinfo;
+    u32 flush_work = 0;
+
+    pinfo = &pmali_plat->scale_info;
+    if (clk < pinfo->minclk)
+        clk = pinfo->minclk;
+    else if (clk >  pinfo->maxclk)
+        clk =  pinfo->maxclk;
+
+    if (clk != currentStep) {
+        currentStep = clk;
+        if (flush)
+            flush_work++;
+        else
+            ret = 1;
+    }
+    if (pp < pinfo->minpp)
+        pp = pinfo->minpp;
+    else if (pp > pinfo->maxpp)
+        pp = pinfo->maxpp;
+
+    if (pp != num_cores_enabled) {
+        num_cores_enabled = pp;
+        if (flush)
+            flush_work++;
+        else
+            ret = 1;
+    }
+
+    if (flush_work)
+        schedule_work(&wq_work);
+#endif
+    return ret;
+}
+
+void revise_mali_rt(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    set_mali_rt_clkpp(currentStep, num_cores_enabled, 1);
+#endif
+}
+
+void flush_scaling_job(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    cancel_work_sync(&wq_work);
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 enable_one_core(void)
+{
+    scalingdbg(2, "meson:     one more pp, curent has %d pp cores\n",  num_cores_enabled + 1);
+    return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0);
+}
+
+static u32 disable_one_core(void)
+{
+    scalingdbg(2, "meson: disable one pp, current has %d pp cores\n",  num_cores_enabled - 1);
+    return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0);
+}
+
+static u32 enable_max_num_cores(void)
+{
+    return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0);
+}
+
+static u32 enable_pp_cores(u32 val)
+{
+    scalingdbg(2, "meson: enable %d pp cores\n", val);
+    return set_mali_rt_clkpp(currentStep, val, 0);
+}
+#endif
+
+int mali_core_scaling_init(mali_plat_info_t *mali_plat)
+{
+#ifndef CONFIG_MALI_DVFS
+    if (mali_plat == NULL) {
+        scalingdbg(2, " Mali platform data is NULL!!!\n");
+        return -1;
+    }
+
+    pmali_plat = mali_plat;
+    num_cores_enabled = pmali_plat->sc_mpp;
+
+    currentStep = pmali_plat->def_clock;
+    lastStep = currentStep;
+    INIT_WORK(&wq_work, do_scaling);
+#endif
+    return 0;
+    /* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_term(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    flush_scheduled_work();
+#endif
+}
+
+#ifndef CONFIG_MALI_DVFS
+static u32 mali_threshold [] = {
+    102, /* 40% */
+    128, /* 50% */
+    230, /* 90% */
+};
+#endif
+
+void mali_pp_scaling_update(struct mali_gpu_utilization_data *data)
+{
+#ifndef CONFIG_MALI_DVFS
+    int ret = 0;
+
+    if (mali_threshold[2] < data->utilization_pp)
+        ret = enable_max_num_cores();
+    else if (mali_threshold[1]< data->utilization_pp)
+        ret = enable_one_core();
+    else if (0 < data->utilization_pp)
+        ret = disable_one_core();
+    if (ret == 1)
+        schedule_work(&wq_work);
+#endif
+}
+
+#if LOG_MALI_SCALING
+void trace_utilization(struct mali_gpu_utilization_data *data, u32 current_idx, u32 next,
+        u32 current_pp, u32 next_pp)
+{
+    char direction;
+    if (next > current_idx)
+        direction = '>';
+    else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx))
+        direction = '<';
+    else
+        direction = '~';
+
+    scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n",
+            direction,
+            get_mali_freq(current_idx),
+            get_mali_freq(next),
+            data->utilization_gpu,
+            pmali_plat->dvfs_table[current_idx].downthreshold,
+            pmali_plat->dvfs_table[current_idx].upthreshold,
+            current_pp, next_pp);
+}
+#endif
+
+#ifndef CONFIG_MALI_DVFS
+static int mali_stay_count = 0;
+static void mali_decide_next_status(struct mali_gpu_utilization_data *data, int* next_fs_idx,
+        int* pp_change_flag)
+{
+    u32 utilization, mali_up_limit, decided_fs_idx;
+    u32 ld_left, ld_right;
+    u32 ld_up, ld_down;
+    u32 change_mode;
+
+    *pp_change_flag = 0;
+    change_mode = 0;
+    utilization = data->utilization_gpu;
+
+    mali_up_limit = (scaling_mode ==  MALI_TURBO_MODE) ?
+        pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk;
+    decided_fs_idx = currentStep;
+
+    ld_up = pmali_plat->dvfs_table[currentStep].upthreshold;
+    ld_down = pmali_plat->dvfs_table[currentStep].downthreshold;
+
+    scalingdbg(2, "utilization=%d,  ld_up=%d\n ", utilization,  ld_up);
+    if (utilization >= ld_up) { /* go up */
+
+        scalingdbg(2, "currentStep=%d,  mali_up_limit=%d\n ", currentStep, mali_up_limit);
+        if (currentStep < mali_up_limit) {
+            change_mode = 1;
+            if ((currentStep < pmali_plat->def_clock) && (utilization > pmali_plat->bst_gpu))
+                decided_fs_idx = pmali_plat->def_clock;
+            else
+                decided_fs_idx++;
+        }
+        if ((data->utilization_pp >= ld_up) &&
+                (num_cores_enabled < pmali_plat->scale_info.maxpp)) {
+            if ((num_cores_enabled < pmali_plat->sc_mpp) && (data->utilization_pp >= pmali_plat->bst_pp)) {
+                *pp_change_flag = 1;
+                change_mode = 1;
+            } else if (change_mode == 0) {
+                *pp_change_flag = 2;
+                change_mode = 1;
+            }
+        }
+#if LOG_MALI_SCALING
+        scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n",
+                data->utilization_pp, currentStep, mali_up_limit, decided_fs_idx,
+                num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode);
+#endif
+    } else if (utilization <= ld_down) { /* go down */
+        if (mali_stay_count > 0) {
+            *next_fs_idx = decided_fs_idx;
+            mali_stay_count--;
+            return;
+        }
+
+        if (num_cores_enabled > pmali_plat->sc_mpp) {
+            change_mode = 1;
+            if (data->utilization_pp <= ld_down) {
+                ld_left = data->utilization_pp * num_cores_enabled;
+                ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+                    (num_cores_enabled - 1);
+                if (ld_left < ld_right) {
+                    change_mode = 2;
+                }
+            }
+        } else if (currentStep > pmali_plat->scale_info.minclk) {
+            change_mode = 1;
+        } else if (num_cores_enabled > 1) { /* decrease PPS */
+            if (data->utilization_pp <= ld_down) {
+                ld_left = data->utilization_pp * num_cores_enabled;
+                ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) *
+                    (num_cores_enabled - 1);
+                scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right);
+                if (ld_left < ld_right) {
+                    change_mode = 2;
+                }
+            }
+        }
+
+        if (change_mode == 1) {
+            decided_fs_idx--;
+        } else if (change_mode == 2) { /* decrease PPS */
+            *pp_change_flag = -1;
+        }
+    }
+    if (change_mode)
+        mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count;
+    *next_fs_idx = decided_fs_idx;
+}
+#endif
+
+void mali_pp_fs_scaling_update(struct mali_gpu_utilization_data *data)
+{
+#ifndef CONFIG_MALI_DVFS
+    int ret = 0;
+    int pp_change_flag = 0;
+    u32 next_idx = 0;
+
+#if LOG_MALI_SCALING
+    u32 last_pp = num_cores_enabled;
+#endif
+    mali_decide_next_status(data, &next_idx, &pp_change_flag);
+
+    if (pp_change_flag == 1)
+        ret = enable_pp_cores(pmali_plat->sc_mpp);
+    else if (pp_change_flag == 2)
+        ret = enable_one_core();
+    else if (pp_change_flag == -1) {
+        ret = disable_one_core();
+    }
+
+#if LOG_MALI_SCALING
+    if (pp_change_flag || (next_idx != currentStep))
+        trace_utilization(data, currentStep, next_idx, last_pp, num_cores_enabled);
+#endif
+
+    if (next_idx != currentStep) {
+        ret = 1;
+        currentStep = next_idx;
+    }
+
+    if (ret == 1)
+        schedule_work(&wq_work);
+#ifdef CONFIG_MALI400_PROFILING
+    else
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+                get_current_frequency(),
+                0,	0,	0,	0);
+#endif
+#endif
+}
+
+u32 get_mali_schel_mode(void)
+{
+    return scaling_mode;
+}
+
+void set_mali_schel_mode(u32 mode)
+{
+#ifndef CONFIG_MALI_DVFS
+    MALI_DEBUG_ASSERT(mode < MALI_SCALING_MODE_MAX);
+    if (mode >= MALI_SCALING_MODE_MAX)
+        return;
+    scaling_mode = mode;
+
+    /* set default performance range. */
+    pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock;
+    pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+    pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp;
+    pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp;
+
+    /* set current status and tune max freq */
+    if (scaling_mode == MALI_PP_FS_SCALING) {
+        pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+        enable_pp_cores(pmali_plat->sc_mpp);
+    } else if (scaling_mode == MALI_SCALING_DISABLE) {
+        pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock;
+        enable_max_num_cores();
+    } else if (scaling_mode == MALI_TURBO_MODE) {
+        pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock;
+        enable_max_num_cores();
+    }
+    currentStep = pmali_plat->scale_info.maxclk;
+    schedule_work(&wq_work);
+#endif
+}
+
+u32 get_current_frequency(void)
+{
+    return get_mali_freq(currentStep);
+}
+
+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data)
+{
+#ifndef CONFIG_MALI_DVFS
+    if (mali_pm_statue)
+        return;
+
+    switch (scaling_mode) {
+        case MALI_PP_FS_SCALING:
+            mali_pp_fs_scaling_update(data);
+            break;
+        case MALI_PP_SCALING:
+            mali_pp_scaling_update(data);
+            break;
+        default:
+            break;
+    }
+#endif
+}
+
+void mali_dev_restore(void)
+{
+#ifndef CONFIG_MALI_DVFS
+    mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table;
+
+    //mali_perf_set_num_pp_cores(num_cores_enabled);
+    mali_clock_set(pdvfs[currentStep].freq_index);
+#endif
+}
diff --git a/utgard/r5p1/Kbuild b/utgard/r5p1/Kbuild
new file mode 100755
index 0000000..c969fc4
--- /dev/null
+++ b/utgard/r5p1/Kbuild
@@ -0,0 +1,229 @@
+#
+# Copyright (C) 2010-2011 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+# This file is called by the Linux build system.
+
+# set up defaults if not defined by the user
+include $(src)/platform/Kbuild.amlogic
+
+TIMESTAMP ?= default
+OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB ?= 16
+USING_GPU_UTILIZATION ?= 0
+PROFILING_SKIP_PP_JOBS ?= 0
+PROFILING_SKIP_PP_AND_GP_JOBS ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP ?= 0
+MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS ?= 0
+MALI_UPPER_HALF_SCHEDULING ?= 1
+MALI_ENABLE_CPU_CYCLES ?= 0
+
+# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases:
+# The ARM proprietary product will only include the license/proprietary directory
+# The GPL product will only include the license/gpl directory
+ifeq ($(wildcard $(src)/linux/license/gpl/*),)
+    ccflags-y += -I$(src)/linux/license/proprietary
+    ifeq ($(CONFIG_MALI400_PROFILING),y)
+        $(error Profiling is incompatible with non-GPL license)
+    endif
+    ifeq ($(CONFIG_PM_RUNTIME),y)
+        $(error Runtime PM is incompatible with non-GPL license)
+    endif
+    ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
+        $(error DMA-BUF is incompatible with non-GPL license)
+    endif
+    $(error Linux Device integration is incompatible with non-GPL license)
+else
+    ccflags-y += -I$(src)/linux/license/gpl
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+mali-y += \
+	linux/mali_osk_atomics.o \
+	linux/mali_osk_irq.o \
+	linux/mali_osk_wq.o \
+	linux/mali_osk_locks.o \
+	linux/mali_osk_wait_queue.o \
+	linux/mali_osk_low_level_mem.o \
+	linux/mali_osk_math.o \
+	linux/mali_osk_memory.o \
+	linux/mali_osk_misc.o \
+	linux/mali_osk_mali.o \
+	linux/mali_osk_notification.o \
+	linux/mali_osk_time.o \
+	linux/mali_osk_timers.o
+
+mali-y += linux/mali_memory.o linux/mali_memory_os_alloc.o
+mali-y += linux/mali_memory_external.o
+mali-y += linux/mali_memory_block_alloc.o
+
+mali-y += \
+	linux/mali_memory_manager.o \
+	linux/mali_memory_virtual.o \
+	linux/mali_memory_util.o
+
+mali-y += \
+	linux/mali_ukk_mem.o \
+	linux/mali_ukk_gp.o \
+	linux/mali_ukk_pp.o \
+	linux/mali_ukk_core.o \
+	linux/mali_ukk_soft_job.o \
+	linux/mali_ukk_timeline.o
+
+# Source files which always are included in a build
+mali-y += \
+	common/mali_kernel_core.o \
+	linux/mali_kernel_linux.o \
+	common/mali_session.o \
+	linux/mali_device_pause_resume.o \
+	common/mali_kernel_vsync.o \
+	linux/mali_ukk_vsync.o \
+	linux/mali_kernel_sysfs.o \
+	common/mali_mmu.o \
+	common/mali_mmu_page_directory.o \
+	common/mali_mem_validation.o \
+	common/mali_hw_core.o \
+	common/mali_gp.o \
+	common/mali_pp.o \
+	common/mali_pp_job.o \
+	common/mali_gp_job.o \
+	common/mali_soft_job.o \
+	common/mali_scheduler.o \
+	common/mali_executor.o \
+	common/mali_group.o \
+	common/mali_dlbu.o \
+	common/mali_broadcast.o \
+	common/mali_pm.o \
+	common/mali_pmu.o \
+	common/mali_user_settings_db.o \
+	common/mali_kernel_utilization.o \
+	common/mali_control_timer.o \
+	common/mali_l2_cache.o \
+	common/mali_timeline.o \
+	common/mali_timeline_fence_wait.o \
+	common/mali_timeline_sync_fence.o \
+	common/mali_spinlock_reentrant.o \
+	common/mali_pm_domain.o \
+	linux/mali_osk_pm.o \
+	linux/mali_pmu_power_up_down.o \
+	__malidrv_build_info.o
+
+ifneq ($(MALI_PLATFORM_FILES),)
+	mali-y += $(MALI_PLATFORM_FILES:.c=.o)
+endif
+
+ifneq ($(MALI_PLATFORM_FILES_ADD_PREFIX),)
+	mali-y += $(MALI_PLATFORM_FILES_ADD_PREFIX:.c=.o)
+endif
+
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_ukk_profiling.o
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_osk_profiling.o
+
+mali-$(CONFIG_MALI400_INTERNAL_PROFILING) += linux/mali_profiling_internal.o timestamp-$(TIMESTAMP)/mali_timestamp.o
+ccflags-$(CONFIG_MALI400_INTERNAL_PROFILING) += -I$(src)/timestamp-$(TIMESTAMP)
+
+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_dma_buf.o
+mali-$(CONFIG_SYNC) += linux/mali_sync.o
+ccflags-$(CONFIG_SYNC) += -Idrivers/staging/android
+
+mali-$(CONFIG_MALI400_UMP) += linux/mali_memory_ump.o
+
+mali-$(CONFIG_MALI_DVFS) += common/mali_dvfs_policy.o
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI400) := mali.o
+
+ccflags-y += $(EXTRA_DEFINES)
+
+# Set up our defines, which will be passed to gcc
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP)
+ccflags-y += -DMALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED=$(MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED)
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS)
+ifdef CONFIG_MALI400_DEBUG
+ccflags-y += -DMALI_STATE_TRACKING=1
+else
+ccflags-y += -DMALI_STATE_TRACKING=0
+endif
+ccflags-y += -DMALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+ccflags-y += -DUSING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+ccflags-y += -DMALI_ENABLE_CPU_CYCLES=$(MALI_ENABLE_CPU_CYCLES)
+
+ifeq ($(MALI_UPPER_HALF_SCHEDULING),1)
+	ccflags-y += -DMALI_UPPER_HALF_SCHEDULING
+endif
+
+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../../ump/include/ump
+ccflags-$(CONFIG_MALI400_DEBUG) += -DDEBUG
+
+# Use our defines when compiling
+ccflags-y += -I$(src) -I$(src)/include -I$(src)/common -I$(src)/linux -I$(src)/platform
+
+# Get subversion revision number, fall back to only ${MALI_RELEASE_NAME} if no svn info is available
+MALI_RELEASE_NAME=$(shell cat $(src)/.version 2> /dev/null)
+
+SVN_INFO = (cd $(src); svn info 2>/dev/null)
+
+ifneq ($(shell $(SVN_INFO) 2>/dev/null),)
+# SVN detected
+SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null)
+DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV)
+CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-)
+CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-)
+REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-)
+
+else # SVN
+GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null)
+ifneq ($(GIT_REV),)
+# Git detected
+DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV)
+CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci")
+CHANGED_REVISION := $(GIT_REV)
+REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null)
+
+else # Git
+# No Git or SVN detected
+DRIVER_REV := $(MALI_RELEASE_NAME)
+CHANGE_DATE := $(MALI_RELEASE_NAME)
+CHANGED_REVISION := $(MALI_RELEASE_NAME)
+endif
+endif
+
+ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\"
+
+VERSION_STRINGS :=
+VERSION_STRINGS += API_VERSION=$(shell cd $(src); grep "\#define _MALI_API_VERSION" $(FILES_PREFIX)include/linux/mali/mali_utgard_uk_types.h | cut -d' ' -f 3 )
+VERSION_STRINGS += REPO_URL=$(REPO_URL)
+VERSION_STRINGS += REVISION=$(DRIVER_REV)
+VERSION_STRINGS += CHANGED_REVISION=$(CHANGED_REVISION)
+VERSION_STRINGS += CHANGE_DATE=$(CHANGE_DATE)
+VERSION_STRINGS += BUILD_DATE=$(shell date)
+ifdef CONFIG_MALI400_DEBUG
+VERSION_STRINGS += BUILD=debug
+else
+VERSION_STRINGS += BUILD=release
+endif
+VERSION_STRINGS += TARGET_PLATFORM=$(TARGET_PLATFORM)
+VERSION_STRINGS += MALI_PLATFORM=$(MALI_PLATFORM)
+VERSION_STRINGS += KDIR=$(KDIR)
+VERSION_STRINGS += OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+VERSION_STRINGS += USING_UMP=$(CONFIG_MALI400_UMP)
+VERSION_STRINGS += USING_PROFILING=$(CONFIG_MALI400_PROFILING)
+VERSION_STRINGS += USING_INTERNAL_PROFILING=$(CONFIG_MALI400_INTERNAL_PROFILING)
+VERSION_STRINGS += USING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+VERSION_STRINGS += USING_DVFS=$(CONFIG_MALI_DVFS)
+VERSION_STRINGS += MALI_UPPER_HALF_SCHEDULING=$(MALI_UPPER_HALF_SCHEDULING)
+
+# Create file with Mali driver configuration
+$(src)/__malidrv_build_info.c:
+	@echo 'const char *__malidrv_build_info(void) { return "malidrv: $(VERSION_STRINGS)";}' > $(src)/__malidrv_build_info.c
diff --git a/utgard/r5p1/Kconfig b/utgard/r5p1/Kconfig
new file mode 100755
index 0000000..09687d5
--- /dev/null
+++ b/utgard/r5p1/Kconfig
@@ -0,0 +1,119 @@
+menu "Mali GPU OpenGL device driver"
+config MALI400
+	tristate "Mali-300/400/450 support"
+	depends on ARM || ARM64
+	default m
+	select DMA_SHARED_BUFFER
+	---help---
+	  This enables support for the ARM Mali-300, Mali-400, and Mali-450
+	  GPUs.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called mali.
+
+config MALI400_DEBUG
+	bool "Enable debug in Mali driver"
+	depends on MALI400
+	default n
+	---help---
+	  This enabled extra debug checks and messages in the Mali driver.
+
+config MALI400_PROFILING_EXTRA_SUPPORT
+	bool "Select other items in kernel to support PROFILING."
+	depends on MALI400_PROFILING
+	select PROFILING
+	select FTRACE
+	select PERF_EVENTS
+	select ENABLE_DEFAULT_TRACERS
+	select DEBUG_MUTEXES
+	select HIGH_RES_TIMERS
+	select HW_PERF_EVENTS
+	select CPU_FREQ
+	select MALI400_DEBUG
+
+config MALI400_PROFILING
+	bool "Enable Mali profiling"
+	depends on MALI400
+	select TRACEPOINTS
+	default n
+	---help---
+	  This enables gator profiling of Mali GPU events.
+
+config MALI400_INTERNAL_PROFILING
+	bool "Enable internal Mali profiling API"
+	depends on MALI400_PROFILING
+	default n
+	---help---
+	  This enables the internal legacy Mali profiling API.
+
+config MALI400_UMP
+	bool "Enable UMP support"
+	depends on MALI400
+	---help---
+	  This enables support for the UMP memory sharing API in the Mali driver.
+
+config MALI_DVFS
+	bool "Enable Mali dynamically frequency change"
+	depends on MALI400
+	default n
+	---help---
+	  This enables support for dynamic change frequency of Mali with the goal of lowering power consumption.
+
+config MALI_DMA_BUF_MAP_ON_ATTACH
+	bool "Map dma-buf attachments on attach"
+	depends on MALI400 && DMA_SHARED_BUFFER
+	default y
+	---help---
+	  This makes the Mali driver map dma-buf attachments after doing
+	  attach. If this is not set the dma-buf attachments will be mapped for
+	  every time the GPU need to access the buffer.
+
+	  Mapping for each access can cause lower performance.
+
+config MALI_SHARED_INTERRUPTS
+	bool "Support for shared interrupts"
+	depends on MALI400
+	default n
+	---help---
+	  Adds functionality required to properly support shared interrupts.  Without this support,
+	  the device driver will fail during insmod if it detects shared interrupts.  This also
+	  works when the GPU is not using shared interrupts, but might have a slight performance
+	  impact.
+
+if ARCH_MESON6
+config	MESON6_GPU_EXTRA
+	bool "M6 fix"
+	depends on MALI400
+	default y
+	select MALI_SHARED_INTERRUPTS
+endif
+
+config MALI_PMU_PARALLEL_POWER_UP
+	bool "Power up Mali PMU domains in parallel"
+	depends on MALI400
+	default n
+	---help---
+	  This makes the Mali driver power up all PMU power domains in parallel, instead of
+	  powering up domains one by one, with a slight delay in between. Powering on all power
+	  domains at the same time may cause peak currents higher than what some systems can handle.
+	  These systems must not enable this option.
+
+config MALI_DT
+	bool "Using device tree to initialize module"
+	depends on MALI400 && OF
+	default n
+	---help---
+	  This enable the Mali driver to choose the device tree path to get platform resoures
+	  and disable the old config method. Mali driver could run on the platform which the
+	  device tree is enabled in kernel and corresponding hardware description is implemented
+	  properly in device DTS file.
+
+config MALI_QUIET
+	bool "Make Mali driver very quiet"
+	depends on MALI400 && !MALI400_DEBUG
+	default n
+	---help---
+	  This forces the Mali driver to never print any messages.
+
+	  If unsure, say N.
+endmenu
diff --git a/utgard/r5p1/Makefile b/utgard/r5p1/Makefile
new file mode 100755
index 0000000..a0bef30
--- /dev/null
+++ b/utgard/r5p1/Makefile
@@ -0,0 +1,184 @@
+#
+# Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+USE_UMPV2=0
+USING_PROFILING ?= 1
+USING_INTERNAL_PROFILING ?= 0
+USING_DVFS ?= 1
+MALI_HEATMAPS_ENABLED ?= 0
+MALI_DMA_BUF_MAP_ON_ATTACH ?= 1
+MALI_PMU_PARALLEL_POWER_UP ?= 0
+USING_DT ?= 0
+
+# The Makefile sets up "arch" based on the CONFIG, creates the version info
+# string and the __malidrv_build_info.c file, and then call the Linux build
+# system to actually build the driver. After that point the Kbuild file takes
+# over.
+
+# set up defaults if not defined by the user
+ARCH ?= arm
+
+OSKOS=linux
+FILES_PREFIX=
+
+check_cc2 = \
+	$(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \
+	then \
+		echo "$(2)"; \
+	else \
+		echo "$(3)"; \
+	fi ;)
+
+# This conditional makefile exports the global definition ARM_INTERNAL_BUILD. Customer releases will not include arm_internal.mak
+-include ../../../arm_internal.mak
+
+# Give warning of old config parameters are used
+ifneq ($(CONFIG),)
+$(warning "You have specified the CONFIG variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+ifneq ($(CPU),)
+$(warning "You have specified the CPU variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+# Include the mapping between TARGET_PLATFORM and KDIR + MALI_PLATFORM
+-include MALI_CONFIGURATION
+export KDIR ?= $(KDIR-$(TARGET_PLATFORM))
+export MALI_PLATFORM ?= $(MALI_PLATFORM-$(TARGET_PLATFORM))
+
+ifneq ($(TARGET_PLATFORM),)
+ifeq ($(MALI_PLATFORM),)
+$(error "Invalid TARGET_PLATFORM: $(TARGET_PLATFORM)")
+endif
+endif
+
+# validate lookup result
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(TARGET_PLATFORM))
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+ifeq ($(USING_UMP),1)
+export CONFIG_MALI400_UMP=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_UMP=1
+ifeq ($(USE_UMPV2),1)
+UMP_SYMVERS_FILE ?= ../umpv2/Module.symvers
+else
+UMP_SYMVERS_FILE ?= ../ump/Module.symvers
+endif
+KBUILD_EXTRA_SYMBOLS = $(realpath $(UMP_SYMVERS_FILE))
+$(warning $(KBUILD_EXTRA_SYMBOLS))
+endif
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+include $(KDIR)/.config
+
+ifeq ($(ARCH), arm)
+# when compiling for ARM we're cross compiling
+export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-)
+endif
+
+# report detected/selected settings
+ifdef ARM_INTERNAL_BUILD
+$(warning TARGET_PLATFORM $(TARGET_PLATFORM))
+$(warning KDIR $(KDIR))
+$(warning MALI_PLATFORM $(MALI_PLATFORM))
+endif
+
+# Set up build config
+export CONFIG_MALI400=m
+export CONFIG_MALI450=y
+
+export EXTRA_DEFINES += -DCONFIG_MALI400=1
+export EXTRA_DEFINES += -DCONFIG_MALI450=1
+
+ifneq ($(MALI_PLATFORM),)
+export EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+export MALI_PLATFORM_FILES = $(wildcard platform/$(MALI_PLATFORM)/*.c)
+endif
+
+ifeq ($(USING_PROFILING),1)
+ifeq ($(CONFIG_TRACEPOINTS),)
+$(warning CONFIG_TRACEPOINTS required for profiling)
+else
+export CONFIG_MALI400_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_PROFILING=1
+ifeq ($(USING_INTERNAL_PROFILING),1)
+export CONFIG_MALI400_INTERNAL_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_INTERNAL_PROFILING=1
+endif
+ifeq ($(MALI_HEATMAPS_ENABLED),1)
+export MALI_HEATMAPS_ENABLED=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_HEATMAPS_ENABLED
+endif
+endif
+endif
+
+ifeq ($(MALI_DMA_BUF_MAP_ON_ATTACH),1)
+export CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_MAP_ON_ATTACH
+endif
+
+ifeq ($(MALI_SHARED_INTERRUPTS),1)
+export CONFIG_MALI_SHARED_INTERRUPTS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_SHARED_INTERRUPTS
+endif
+
+ifeq ($(USING_DVFS),1)
+	xxxyyyy
+export CONFIG_MALI_DVFS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DVFS
+endif
+
+ifeq ($(MALI_PMU_PARALLEL_POWER_UP),1)
+export CONFIG_MALI_PMU_PARALLEL_POWER_UP=y
+export EXTRA_DEFINES += -DCONFIG_MALI_PMU_PARALLEL_POWER_UP
+endif
+
+ifdef CONFIG_OF
+ifeq ($(USING_DT),1)
+export CONFIG_MALI_DT=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DT
+endif
+endif
+
+ifneq ($(BUILD),release)
+# Debug
+export CONFIG_MALI400_DEBUG=y
+else
+# Release
+ifeq ($(MALI_QUIET),1)
+export CONFIG_MALI_QUIET=y
+export EXTRA_DEFINES += -DCONFIG_MALI_QUIET
+endif
+endif
+
+ifeq ($(MALI_SKIP_JOBS),1)
+EXTRA_DEFINES += -DPROFILING_SKIP_PP_JOBS=1 -DPROFILING_SKIP_GP_JOBS=1
+endif
+
+all: $(UMP_SYMVERS_FILE)
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules
+	@rm $(FILES_PREFIX)__malidrv_build_info.c $(FILES_PREFIX)__malidrv_build_info.o
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+kernelrelease:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) kernelrelease
+
+export CONFIG KBUILD_EXTRA_SYMBOLS
diff --git a/utgard/r5p1/clean.sh b/utgard/r5p1/clean.sh
new file mode 100755
index 0000000..130fd73
--- /dev/null
+++ b/utgard/r5p1/clean.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# rm *.o, *.cmd, *~
+find . -name "*.o" -o -name "*.cmd" -o -name "*~" | xargs rm -rf
+rm	Module.symvers
+rm	mali.ko
+rm	mali.mod.c
+rm	modules.order
diff --git a/utgard/r5p1/common/mali_broadcast.c b/utgard/r5p1/common/mali_broadcast.c
new file mode 100755
index 0000000..136db61
--- /dev/null
+++ b/utgard/r5p1/common/mali_broadcast.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_broadcast.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#define MALI_BROADCAST_REGISTER_SIZE      0x1000
+#define MALI_BROADCAST_REG_BROADCAST_MASK    0x0
+#define MALI_BROADCAST_REG_INTERRUPT_MASK    0x4
+
+struct mali_bcast_unit {
+	struct mali_hw_core hw_core;
+	u32 current_mask;
+};
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_bcast_unit *bcast_unit = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+	MALI_DEBUG_PRINT(2, ("Broadcast: Creating Mali Broadcast unit: %s\n",
+			     resource->description));
+
+	bcast_unit = _mali_osk_malloc(sizeof(struct mali_bcast_unit));
+	if (NULL == bcast_unit) {
+		MALI_PRINT_ERROR(("Broadcast: Failed to allocate memory for Broadcast unit\n"));
+		return NULL;
+	}
+
+	if (_MALI_OSK_ERR_OK == mali_hw_core_create(&bcast_unit->hw_core,
+			resource, MALI_BROADCAST_REGISTER_SIZE)) {
+		bcast_unit->current_mask = 0;
+		mali_bcast_reset(bcast_unit);
+
+		return bcast_unit;
+	} else {
+		MALI_PRINT_ERROR(("Broadcast: Failed map broadcast unit\n"));
+	}
+
+	_mali_osk_free(bcast_unit);
+
+	return NULL;
+}
+
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	mali_hw_core_delete(&bcast_unit->hw_core);
+	_mali_osk_free(bcast_unit);
+}
+
+/* Call this function to add the @group's id into bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit,
+			  struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask |= (bcast_id); /* add PP core to broadcast */
+	broadcast_mask |= (bcast_id << 16); /* add MMU to broadcast */
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+/* Call this function to remove @group's id from bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit,
+			     struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask &= ~((bcast_id << 16) | bcast_id);
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4,
+			 ("Broadcast: setting mask 0x%08X + 0x%08X (reset)\n",
+			  bcast_unit->current_mask,
+			  bcast_unit->current_mask & 0xFF));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    bcast_unit->current_mask);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    bcast_unit->current_mask & 0xFF);
+}
+
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4, ("Broadcast: setting mask 0x0 + 0x0 (disable)\n"));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    0x0);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    0x0);
+}
diff --git a/utgard/r5p1/common/mali_broadcast.h b/utgard/r5p1/common/mali_broadcast.h
new file mode 100755
index 0000000..efce441
--- /dev/null
+++ b/utgard/r5p1/common/mali_broadcast.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BROADCAST_H__
+#define __MALI_BROADCAST_H__
+
+/*
+ *  Interface for the broadcast unit on Mali-450.
+ *
+ * - Represents up to 8 × (MMU + PP) pairs.
+ * - Supports dynamically changing which (MMU + PP) pairs receive the broadcast by
+ *   setting a mask.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_group.h"
+
+struct mali_bcast_unit;
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource);
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit);
+
+/* Add a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Remove a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Re-set cached mask. This needs to be called after having been suspended. */
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Disable broadcast unit
+ *
+ * mali_bcast_enable must be called to re-enable the unit. Cores may not be
+ * added or removed when the unit is disabled.
+ */
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Re-enable broadcast unit
+ *
+ * This resets the masks to include the cores present when mali_bcast_disable was called.
+ */
+MALI_STATIC_INLINE void mali_bcast_enable(struct mali_bcast_unit *bcast_unit)
+{
+	mali_bcast_reset(bcast_unit);
+}
+
+#endif /* __MALI_BROADCAST_H__ */
diff --git a/utgard/r5p1/common/mali_control_timer.c b/utgard/r5p1/common/mali_control_timer.c
new file mode 100755
index 0000000..d0dd95a
--- /dev/null
+++ b/utgard/r5p1/common/mali_control_timer.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+static u64 period_start_time = 0;
+
+static _mali_osk_timer_t *mali_control_timer = NULL;
+static mali_bool timer_running = MALI_FALSE;
+
+static u32 mali_control_timeout = 1000;
+
+void mali_control_timer_add(u32 timeout)
+{
+	_mali_osk_timer_add(mali_control_timer, _mali_osk_time_mstoticks(timeout));
+}
+
+static void mali_control_timer_callback(void *arg)
+{
+	if (mali_utilization_enabled()) {
+		struct mali_gpu_utilization_data *util_data = NULL;
+		u64 time_period = 0;
+		mali_bool need_add_timer = MALI_TRUE;
+
+		/* Calculate gpu utilization */
+		util_data = mali_utilization_calculate(&period_start_time, &time_period, &need_add_timer);
+
+		if (util_data) {
+#if defined(CONFIG_MALI_DVFS)
+			mali_dvfs_policy_realize(util_data, time_period);
+#else
+			mali_utilization_platform_realize(util_data);
+#endif
+
+			if (MALI_TRUE == need_add_timer) {
+				mali_control_timer_add(mali_control_timeout);
+			}
+		}
+	}
+}
+
+/* Init a timer (for now it is used for GPU utilization and dvfs) */
+_mali_osk_errcode_t mali_control_timer_init(void)
+{
+	_mali_osk_device_data data;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Use device specific settings (if defined) */
+		if (0 != data.control_interval) {
+			mali_control_timeout = data.control_interval;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Timer: %u\n", mali_control_timeout));
+		}
+	}
+
+	mali_control_timer = _mali_osk_timer_init();
+	if (NULL == mali_control_timer) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	_mali_osk_timer_setcallback(mali_control_timer, mali_control_timer_callback, NULL);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_control_timer_term(void)
+{
+	if (NULL != mali_control_timer) {
+		_mali_osk_timer_del(mali_control_timer);
+		timer_running = MALI_FALSE;
+		_mali_osk_timer_term(mali_control_timer);
+		mali_control_timer = NULL;
+	}
+}
+
+mali_bool mali_control_timer_resume(u64 time_now)
+{
+	mali_utilization_data_assert_locked();
+
+	if (timer_running != MALI_TRUE) {
+		timer_running = MALI_TRUE;
+
+		period_start_time = time_now;
+
+		mali_utilization_reset();
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+void mali_control_timer_pause(void)
+{
+	mali_utilization_data_assert_locked();
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+	}
+}
+
+void mali_control_timer_suspend(mali_bool suspend)
+{
+	mali_utilization_data_lock();
+
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+
+		mali_utilization_data_unlock();
+
+		if (suspend == MALI_TRUE) {
+			_mali_osk_timer_del(mali_control_timer);
+			mali_utilization_reset();
+		}
+	} else {
+		mali_utilization_data_unlock();
+	}
+}
diff --git a/utgard/r5p1/common/mali_control_timer.h b/utgard/r5p1/common/mali_control_timer.h
new file mode 100755
index 0000000..4f919ec
--- /dev/null
+++ b/utgard/r5p1/common/mali_control_timer.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_CONTROL_TIMER_H__
+#define __MALI_CONTROL_TIMER_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_control_timer_init(void);
+
+void mali_control_timer_term(void);
+
+mali_bool mali_control_timer_resume(u64 time_now);
+
+void mali_control_timer_suspend(mali_bool suspend);
+void mali_control_timer_pause(void);
+
+void mali_control_timer_add(u32 timeout);
+
+#endif /* __MALI_CONTROL_TIMER_H__ */
+
diff --git a/utgard/r5p1/common/mali_dlbu.c b/utgard/r5p1/common/mali_dlbu.c
new file mode 100755
index 0000000..efe1ab3
--- /dev/null
+++ b/utgard/r5p1/common/mali_dlbu.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_dlbu.h"
+#include "mali_memory.h"
+#include "mali_pp.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+/**
+ * Size of DLBU registers in bytes
+ */
+#define MALI_DLBU_SIZE 0x400
+
+mali_dma_addr mali_dlbu_phys_addr = 0;
+static mali_io_address mali_dlbu_cpu_addr = NULL;
+
+/**
+ * DLBU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_dlbu_register {
+	MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR = 0x0000, /**< Master tile list physical base address;
+                                                             31:12 Physical address to the page used for the DLBU
+                                                             0 DLBU enable - set this bit to 1 enables the AXI bus
+                                                             between PPs and L2s, setting to 0 disables the router and
+                                                             no further transactions are sent to DLBU */
+	MALI_DLBU_REGISTER_MASTER_TLLIST_VADDR     = 0x0004, /**< Master tile list virtual base address;
+                                                             31:12 Virtual address to the page used for the DLBU */
+	MALI_DLBU_REGISTER_TLLIST_VBASEADDR     = 0x0008, /**< Tile list virtual base address;
+                                                             31:12 Virtual address to the tile list. This address is used when
+                                                             calculating the call address sent to PP.*/
+	MALI_DLBU_REGISTER_FB_DIM                 = 0x000C, /**< Framebuffer dimension;
+                                                             23:16 Number of tiles in Y direction-1
+                                                             7:0 Number of tiles in X direction-1 */
+	MALI_DLBU_REGISTER_TLLIST_CONF       = 0x0010, /**< Tile list configuration;
+                                                             29:28 select the size of each allocated block: 0=128 bytes, 1=256, 2=512, 3=1024
+                                                             21:16 2^n number of tiles to be binned to one tile list in Y direction
+                                                             5:0 2^n number of tiles to be binned to one tile list in X direction */
+	MALI_DLBU_REGISTER_START_TILE_POS         = 0x0014, /**< Start tile positions;
+                                                             31:24 start position in Y direction for group 1
+                                                             23:16 start position in X direction for group 1
+                                                             15:8 start position in Y direction for group 0
+                                                             7:0 start position in X direction for group 0 */
+	MALI_DLBU_REGISTER_PP_ENABLE_MASK         = 0x0018, /**< PP enable mask;
+                                                             7 enable PP7 for load balancing
+                                                             6 enable PP6 for load balancing
+                                                             5 enable PP5 for load balancing
+                                                             4 enable PP4 for load balancing
+                                                             3 enable PP3 for load balancing
+                                                             2 enable PP2 for load balancing
+                                                             1 enable PP1 for load balancing
+                                                             0 enable PP0 for load balancing */
+} mali_dlbu_register;
+
+typedef enum {
+	PP0ENABLE = 0,
+	PP1ENABLE,
+	PP2ENABLE,
+	PP3ENABLE,
+	PP4ENABLE,
+	PP5ENABLE,
+	PP6ENABLE,
+	PP7ENABLE
+} mali_dlbu_pp_enable;
+
+struct mali_dlbu_core {
+	struct mali_hw_core     hw_core;           /**< Common for all HW cores */
+	u32                     pp_cores_mask;     /**< This is a mask for the PP cores whose operation will be controlled by LBU
+                                                      see MALI_DLBU_REGISTER_PP_ENABLE_MASK register */
+};
+
+_mali_osk_errcode_t mali_dlbu_initialize(void)
+{
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Initializing\n"));
+
+	if (_MALI_OSK_ERR_OK ==
+	    mali_mmu_get_table_page(&mali_dlbu_phys_addr,
+				    &mali_dlbu_cpu_addr)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_dlbu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: terminating\n"));
+
+	if (0 != mali_dlbu_phys_addr && 0 != mali_dlbu_cpu_addr) {
+		mali_mmu_release_table_page(mali_dlbu_phys_addr,
+					    mali_dlbu_cpu_addr);
+		mali_dlbu_phys_addr = 0;
+		mali_dlbu_cpu_addr = 0;
+	}
+}
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_dlbu_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Creating Mali dynamic load balancing unit: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_dlbu_core));
+	if (NULL != core) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI_DLBU_SIZE)) {
+			core->pp_cores_mask = 0;
+			if (_MALI_OSK_ERR_OK == mali_dlbu_reset(core)) {
+				return core;
+			}
+			MALI_PRINT_ERROR(("Failed to reset DLBU %s\n", core->hw_core.description));
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali DLBU: Failed to allocate memory for DLBU core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	mali_hw_core_delete(&dlbu->hw_core);
+	_mali_osk_free(dlbu);
+}
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu)
+{
+	u32 dlbu_registers[7];
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: mali_dlbu_reset: %s\n", dlbu->hw_core.description));
+
+	dlbu_registers[0] = mali_dlbu_phys_addr | 1; /* bit 0 enables the whole core */
+	dlbu_registers[1] = MALI_DLBU_VIRT_ADDR;
+	dlbu_registers[2] = 0;
+	dlbu_registers[3] = 0;
+	dlbu_registers[4] = 0;
+	dlbu_registers[5] = 0;
+	dlbu_registers[6] = dlbu->pp_cores_mask;
+
+	/* write reset values to core registers */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR, dlbu_registers, 7);
+
+	err = _MALI_OSK_ERR_OK;
+
+	return err;
+}
+
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	mali_hw_core_register_write(&dlbu->hw_core, MALI_DLBU_REGISTER_PP_ENABLE_MASK, dlbu->pp_cores_mask);
+}
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask |= bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Adding core[%d] New mask= 0x%02x\n", bcast_id , dlbu->pp_cores_mask));
+}
+
+/* Remove a group from the DLBU */
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask &= ~bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Removing core[%d] New mask= 0x%02x\n", bcast_id, dlbu->pp_cores_mask));
+}
+
+/* Configure the DLBU for \a job. This needs to be done before the job is started on the groups in the DLBU. */
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job)
+{
+	u32 *registers;
+	MALI_DEBUG_ASSERT(job);
+	registers = mali_pp_job_get_dlbu_registers(job);
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: Starting job\n"));
+
+	/* Writing 4 registers:
+	 * DLBU registers except the first two (written once at DLBU initialisation / reset) and the PP_ENABLE_MASK register */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_TLLIST_VBASEADDR, registers, 4);
+
+}
diff --git a/utgard/r5p1/common/mali_dlbu.h b/utgard/r5p1/common/mali_dlbu.h
new file mode 100755
index 0000000..6b06888
--- /dev/null
+++ b/utgard/r5p1/common/mali_dlbu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DLBU_H__
+#define __MALI_DLBU_H__
+
+#define MALI_DLBU_VIRT_ADDR 0xFFF00000 /* master tile virtual address fixed at this value and mapped into every session */
+
+#include "mali_osk.h"
+
+struct mali_pp_job;
+struct mali_group;
+struct mali_dlbu_core;
+
+extern mali_dma_addr mali_dlbu_phys_addr;
+
+_mali_osk_errcode_t mali_dlbu_initialize(void);
+void mali_dlbu_terminate(void);
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource);
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu);
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+
+/** @brief Called to update HW after DLBU state changed
+ *
+ * This function must be called after \a mali_dlbu_add_group or \a
+ * mali_dlbu_remove_group to write the updated mask to hardware, unless the
+ * same is accomplished by calling \a mali_dlbu_reset.
+ */
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job);
+
+#endif /* __MALI_DLBU_H__ */
diff --git a/utgard/r5p1/common/mali_dvfs_policy.c b/utgard/r5p1/common/mali_dvfs_policy.c
new file mode 100755
index 0000000..12ba069
--- /dev/null
+++ b/utgard/r5p1/common/mali_dvfs_policy.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_dvfs_policy.h"
+#include "mali_osk_mali.h"
+#include "mali_osk_profiling.h"
+
+#define CLOCK_TUNING_TIME_DEBUG 0
+
+#define MAX_PERFORMANCE_VALUE 256
+#define MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(percent) ((int) ((percent)*(MAX_PERFORMANCE_VALUE)/100.0 + 0.5))
+
+/** The max fps the same as display vsync default 60, can set by module insert parameter */
+int mali_max_system_fps = 60;
+/** A lower limit on their desired FPS default 58, can set by module insert parameter */
+int mali_desired_fps = 58;
+
+static int mali_fps_step1 = 0;
+static int mali_fps_step2 = 0;
+
+static int clock_step = -1;
+static int cur_clk_step = -1;
+static struct mali_gpu_clock *gpu_clk = NULL;
+
+/*Function prototype */
+static int (*mali_gpu_set_freq)(int) = NULL;
+static int (*mali_gpu_get_freq)(void) = NULL;
+
+static mali_bool mali_dvfs_enabled = MALI_FALSE;
+
+#define NUMBER_OF_NANOSECONDS_PER_SECOND  1000000000ULL
+static u32 calculate_window_render_fps(u64 time_period)
+{
+	u32 max_window_number;
+	u64 tmp;
+	u64 max = time_period;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 time_period_shift;
+	u32 max_window_number_shift;
+	u32 ret_val;
+
+	max_window_number = mali_session_max_window_num();
+
+	/* To avoid float division, extend the dividend to ns unit */
+	tmp = (u64)max_window_number * NUMBER_OF_NANOSECONDS_PER_SECOND;
+	if (tmp > time_period) {
+		max = tmp;
+	}
+
+	/*
+	 * We may have 64-bit values, a dividend or a divisor or both
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 */
+	leading_zeroes = _mali_osk_clz((u32)(max >> 32));
+	shift_val = 32 - leading_zeroes;
+
+	time_period_shift = (u32)(time_period >> shift_val);
+	max_window_number_shift = (u32)(tmp >> shift_val);
+
+	ret_val = max_window_number_shift / time_period_shift;
+
+	return ret_val;
+}
+
+static bool mali_pickup_closest_avail_clock(int target_clock_mhz, mali_bool pick_clock_up)
+{
+	int i = 0;
+	bool clock_changed = false;
+
+	/* Round up the closest available frequency step for target_clock_hz */
+	for (i = 0; i < gpu_clk->num_of_steps; i++) {
+		/* Find the first item > target_clock_hz */
+		if (((int)(gpu_clk->item[i].clock) - target_clock_mhz) > 0) {
+			break;
+		}
+	}
+
+	/* If the target clock greater than the maximum clock just pick the maximum one*/
+	if (i == gpu_clk->num_of_steps) {
+		i = gpu_clk->num_of_steps - 1;
+	} else {
+		if ((!pick_clock_up) && (i > 0)) {
+			i = i - 1;
+		}
+	}
+
+	clock_step = i;
+	if (cur_clk_step != clock_step) {
+		clock_changed = true;
+	}
+
+	return clock_changed;
+}
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period)
+{
+	int under_perform_boundary_value = 0;
+	int over_perform_boundary_value = 0;
+	int current_fps = 0;
+	int current_gpu_util = 0;
+	bool clock_changed = false;
+#if CLOCK_TUNING_TIME_DEBUG
+	struct timeval start;
+	struct timeval stop;
+	unsigned int elapse_time;
+	do_gettimeofday(&start);
+#endif
+	u32 window_render_fps;
+
+	if (NULL == gpu_clk) {
+		MALI_DEBUG_PRINT(2, ("Enable DVFS but patform doesn't Support freq change. \n"));
+		return;
+	}
+
+	window_render_fps = calculate_window_render_fps(time_period);
+
+	current_fps = window_render_fps;
+	current_gpu_util = data->utilization_gpu;
+
+	/* Get the specific under_perform_boundary_value and over_perform_boundary_value */
+	if ((mali_desired_fps <= current_fps) && (current_fps < mali_max_system_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(90);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+	} else if ((mali_fps_step1 <= current_fps) && (current_fps < mali_desired_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	} else if ((mali_fps_step2 <= current_fps) && (current_fps < mali_fps_step1)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(50);
+	} else {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	}
+
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: gpu util = %d \n", current_gpu_util));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: under_perform = %d,  over_perform = %d \n", under_perform_boundary_value, over_perform_boundary_value));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: render fps = %d,  pressure render fps = %d \n", current_fps, window_render_fps));
+
+	/* Get current clock value */
+	cur_clk_step = mali_gpu_get_freq();
+
+	/* Consider offscreen */
+	if (0 == current_fps) {
+		/* GP or PP under perform, need to give full power */
+		if (current_gpu_util > over_perform_boundary_value) {
+			if (cur_clk_step != gpu_clk->num_of_steps - 1) {
+				clock_changed = true;
+				clock_step = gpu_clk->num_of_steps - 1;
+			}
+		}
+
+		/* If GPU is idle, use lowest power */
+		if (0 == current_gpu_util) {
+			if (cur_clk_step != 0) {
+				clock_changed = true;
+				clock_step = 0;
+			}
+		}
+
+		goto real_setting;
+	}
+
+	/* 2. Calculate target clock if the GPU clock can be tuned */
+	if (-1 != cur_clk_step) {
+		int target_clk_mhz = -1;
+		mali_bool pick_clock_up = MALI_TRUE;
+
+		if (current_gpu_util > under_perform_boundary_value) {
+			/* when under perform, need to consider the fps part */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util * mali_desired_fps / under_perform_boundary_value / current_fps;
+			pick_clock_up = MALI_TRUE;
+		} else if (current_gpu_util < over_perform_boundary_value) {
+			/* when over perform, did't need to consider fps, system didn't want to reach desired fps */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util / under_perform_boundary_value;
+			pick_clock_up = MALI_FALSE;
+		}
+
+		if (-1 != target_clk_mhz) {
+			clock_changed = mali_pickup_closest_avail_clock(target_clk_mhz, pick_clock_up);
+		}
+	}
+
+real_setting:
+	if (clock_changed) {
+		mali_gpu_set_freq(clock_step);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      gpu_clk->item[clock_step].clock,
+					      gpu_clk->item[clock_step].vol / 1000,
+					      0, 0, 0);
+	}
+
+#if CLOCK_TUNING_TIME_DEBUG
+	do_gettimeofday(&stop);
+
+	elapse_time = timeval_to_ns(&stop) - timeval_to_ns(&start);
+	MALI_DEBUG_PRINT(2, ("Using ARM power policy:  eclapse time = %d\n", elapse_time));
+#endif
+}
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void)
+{
+	_mali_osk_device_data data;
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if ((NULL != data.get_clock_info) && (NULL != data.set_freq) && (NULL != data.get_freq)) {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: using arm dvfs policy \n"));
+
+
+			mali_fps_step1 = mali_max_system_fps / 3;
+			mali_fps_step2 = mali_max_system_fps / 5;
+
+			data.get_clock_info(&gpu_clk);
+
+			if (gpu_clk != NULL) {
+#ifdef DEBUG
+				int i;
+				for (i = 0; i < gpu_clk->num_of_steps; i++) {
+					MALI_DEBUG_PRINT(5, ("mali gpu clock info: step%d clock(%d)Hz,vol(%d) \n",
+							     i, gpu_clk->item[i].clock, gpu_clk->item[i].vol));
+				}
+#endif
+			} else {
+				MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform didn't define enough info for ddk to do DVFS \n"));
+			}
+
+			mali_gpu_get_freq = data.get_freq;
+			mali_gpu_set_freq = data.set_freq;
+
+			if ((NULL != gpu_clk) && (gpu_clk->num_of_steps > 0)
+			    && (NULL != mali_gpu_get_freq) && (NULL != mali_gpu_set_freq)) {
+				mali_dvfs_enabled = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	} else {
+		err = _MALI_OSK_ERR_FAULT;
+		MALI_DEBUG_PRINT(2, ("Mali DVFS init: get platform data error .\n"));
+	}
+
+	return err;
+}
+
+/*
+ * Always give full power when start a new period,
+ * if mali dvfs enabled, for performance consideration
+ */
+void mali_dvfs_policy_new_period(void)
+{
+	/* Always give full power when start a new period */
+	unsigned int cur_clk_step = 0;
+
+	cur_clk_step = mali_gpu_get_freq();
+
+	if (cur_clk_step != (gpu_clk->num_of_steps - 1)) {
+		mali_gpu_set_freq(gpu_clk->num_of_steps - 1);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, gpu_clk->item[gpu_clk->num_of_steps - 1].clock,
+					      gpu_clk->item[gpu_clk->num_of_steps - 1].vol / 1000, 0, 0, 0);
+	}
+}
+
+mali_bool mali_dvfs_policy_enabled(void)
+{
+	return mali_dvfs_enabled;
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item)
+{
+	if (mali_platform_device != NULL) {
+
+		struct mali_gpu_device_data *device_data = NULL;
+		device_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+
+		if ((NULL != device_data->get_clock_info) && (NULL != device_data->get_freq)) {
+
+			int cur_clk_step = device_data->get_freq();
+			struct mali_gpu_clock *mali_gpu_clk = NULL;
+
+			device_data->get_clock_info(&mali_gpu_clk);
+			clk_item->clock = mali_gpu_clk->item[cur_clk_step].clock;
+			clk_item->vol = mali_gpu_clk->item[cur_clk_step].vol;
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	}
+}
+#endif
+
diff --git a/utgard/r5p1/common/mali_dvfs_policy.h b/utgard/r5p1/common/mali_dvfs_policy.h
new file mode 100755
index 0000000..55e4b35
--- /dev/null
+++ b/utgard/r5p1/common/mali_dvfs_policy.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DVFS_POLICY_H__
+#define __MALI_DVFS_POLICY_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period);
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void);
+
+void mali_dvfs_policy_new_period(void);
+
+mali_bool mali_dvfs_policy_enabled(void);
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif/* __MALI_DVFS_POLICY_H__ */
diff --git a/utgard/r5p1/common/mali_executor.c b/utgard/r5p1/common/mali_executor.c
new file mode 100755
index 0000000..28bbc1d
--- /dev/null
+++ b/utgard/r5p1/common/mali_executor.c
@@ -0,0 +1,2513 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_executor.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_timeline.h"
+#include "mali_osk_profiling.h"
+#include "mali_session.h"
+
+/*
+ * If dma_buf with map on demand is used, we defer job deletion and job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_DELETE 1
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif /* !defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) */
+
+/*
+ * ---------- static type definitions (structs, enums, etc) ----------
+ */
+
+enum mali_executor_state_t {
+	EXEC_STATE_NOT_PRESENT, /* Virtual group on Mali-300/400 (do not use) */
+	EXEC_STATE_DISABLED,    /* Disabled by core scaling (do not use) */
+	EXEC_STATE_EMPTY,       /* No child groups for virtual group (do not use) */
+	EXEC_STATE_INACTIVE,    /* Can be used, but must be activate first */
+	EXEC_STATE_IDLE,        /* Active and ready to be used */
+	EXEC_STATE_WORKING,     /* Executing a job */
+};
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock for this module (protecting all HW access except L2 caches) */
+_mali_osk_spinlock_irq_t *mali_executor_lock_obj = NULL;
+
+mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/*
+ * ---------- static variables ----------
+ */
+
+/* Used to defer job scheduling */
+static _mali_osk_wq_work_t *executor_wq_high_pri = NULL;
+
+/* Store version from GP and PP (user space wants to know this) */
+static u32 pp_version = 0;
+static u32 gp_version = 0;
+
+/* List of physical PP groups which are disabled by some external source */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_disabled);
+static u32 group_list_disabled_count = 0;
+
+/* List of groups which can be used, but activate first */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_inactive);
+static u32 group_list_inactive_count = 0;
+
+/* List of groups which are active and ready to be used */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_idle);
+static u32 group_list_idle_count = 0;
+
+/* List of groups which are executing a job */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_working);
+static u32 group_list_working_count = 0;
+
+/* Virtual group (if any) */
+static struct mali_group *virtual_group = NULL;
+
+/* Virtual group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t virtual_group_state = EXEC_STATE_NOT_PRESENT;
+
+/* GP group */
+static struct mali_group *gp_group = NULL;
+
+/* GP group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t gp_group_state = EXEC_STATE_NOT_PRESENT;
+
+static u32 gp_returned_cookie = 0;
+
+/* Total number of physical PP cores present */
+static u32 num_physical_pp_cores_total = 0;
+
+/* Number of physical cores which are enabled */
+static u32 num_physical_pp_cores_enabled = 0;
+
+/* Enable or disable core scaling */
+static mali_bool core_scaling_enabled = MALI_TRUE;
+
+/* Variables to allow safe pausing of the scheduler */
+static _mali_osk_wait_queue_t *executor_working_wait_queue = NULL;
+static u32 pause_count = 0;
+
+/* PP cores haven't been enabled because of some pp cores haven't been disabled. */
+static int core_scaling_delay_up_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+
+/* Variables used to implement notify pp core changes to userspace when core scaling
+ * is finished in mali_executor_complete_group() function. */
+static _mali_osk_wq_work_t *executor_wq_notify_core_change = NULL;
+static _mali_osk_wait_queue_t *executor_notify_core_change_wait_queue = NULL;
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+static void mali_executor_lock(void);
+static void mali_executor_unlock(void);
+static mali_bool mali_executor_is_suspended(void *data);
+static mali_bool mali_executor_is_working(void);
+static void mali_executor_disable_empty_virtual(void);
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group);
+static mali_bool mali_executor_has_virtual_group(void);
+static mali_bool mali_executor_virtual_group_is_usable(void);
+static void mali_executor_schedule(void);
+static void mali_executor_wq_schedule(void *arg);
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job);
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done);
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state);
+
+static void mali_executor_group_enable_internal(struct mali_group *group);
+static void mali_executor_group_disable_internal(struct mali_group *group);
+static void mali_executor_core_scale(unsigned int target_core_nr);
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group);
+static void mali_executor_notify_core_change(u32 num_cores);
+static void mali_executor_wq_notify_core_change(void *arg);
+static void mali_executor_change_group_status_disabled(struct mali_group *group);
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group);
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_executor_initialize(void)
+{
+	mali_executor_lock_obj = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_EXECUTOR);
+	if (NULL == mali_executor_lock_obj) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_high_pri = _mali_osk_wq_create_work_high_pri(mali_executor_wq_schedule, NULL);
+	if (NULL == executor_wq_high_pri) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_working_wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == executor_working_wait_queue) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_notify_core_change = _mali_osk_wq_create_work(mali_executor_wq_notify_core_change, NULL);
+	if (NULL == executor_wq_notify_core_change) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_notify_core_change_wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == executor_notify_core_change_wait_queue) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_terminate(void)
+{
+	if (NULL != executor_notify_core_change_wait_queue) {
+		_mali_osk_wait_queue_term(executor_notify_core_change_wait_queue);
+		executor_notify_core_change_wait_queue = NULL;
+	}
+
+	if (NULL != executor_wq_notify_core_change) {
+		_mali_osk_wq_delete_work(executor_wq_notify_core_change);
+		executor_wq_notify_core_change = NULL;
+	}
+
+	if (NULL != executor_working_wait_queue) {
+		_mali_osk_wait_queue_term(executor_working_wait_queue);
+		executor_working_wait_queue = NULL;
+	}
+
+	if (NULL != executor_wq_high_pri) {
+		_mali_osk_wq_delete_work(executor_wq_high_pri);
+		executor_wq_high_pri = NULL;
+	}
+
+	if (NULL != mali_executor_lock_obj) {
+		_mali_osk_spinlock_irq_term(mali_executor_lock_obj);
+		mali_executor_lock_obj = NULL;
+	}
+}
+
+void mali_executor_populate(void)
+{
+	u32 num_groups;
+	u32 i;
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	/* Do we have a virtual group? */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (mali_group_is_virtual(group)) {
+			virtual_group = group;
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			break;
+		}
+	}
+
+	/* Find all the available physical GP and PP cores */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+			struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+
+			if (!mali_group_is_virtual(group)) {
+				if (NULL != pp_core) {
+					if (0 == pp_version) {
+						/* Retrieve PP version from the first available PP core */
+						pp_version = mali_pp_core_get_version(pp_core);
+					}
+
+					if (NULL != virtual_group) {
+						mali_executor_lock();
+						mali_group_add_group(virtual_group, group);
+						mali_executor_unlock();
+					} else {
+						_mali_osk_list_add(&group->executor_list, &group_list_inactive);
+						group_list_inactive_count++;
+					}
+
+					num_physical_pp_cores_total++;
+				} else {
+					MALI_DEBUG_ASSERT_POINTER(gp_core);
+
+					if (0 == gp_version) {
+						/* Retrieve GP version */
+						gp_version = mali_gp_core_get_version(gp_core);
+					}
+
+					gp_group = group;
+					gp_group_state = EXEC_STATE_INACTIVE;
+				}
+
+			}
+		}
+	}
+
+	num_physical_pp_cores_enabled = num_physical_pp_cores_total;
+}
+
+void mali_executor_depopulate(void)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state);
+
+	if (NULL != gp_group) {
+		mali_group_delete(gp_group);
+		gp_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state);
+
+	if (NULL != virtual_group) {
+		mali_group_delete(virtual_group);
+		virtual_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&group_list_working));
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+}
+
+void mali_executor_suspend(void)
+{
+	mali_executor_lock();
+
+	/* Increment the pause_count so that no more jobs will be scheduled */
+	pause_count++;
+
+	mali_executor_unlock();
+
+	_mali_osk_wait_queue_wait_event(executor_working_wait_queue,
+					mali_executor_is_suspended, NULL);
+
+	/*
+	 * mali_executor_complete_XX() leaves jobs in idle state.
+	 * deactivate option is used when we are going to power down
+	 * the entire GPU (OS suspend) and want a consistent SW vs HW
+	 * state.
+	 */
+	mali_executor_lock();
+
+	mali_executor_deactivate_list_idle(MALI_TRUE);
+
+	/*
+	 * The following steps are used to deactive all of activated
+	 * (MALI_GROUP_STATE_ACTIVE) and activating (MALI_GROUP
+	 * _STAET_ACTIVATION_PENDING) groups, to make sure the variable
+	 * pd_mask_wanted is equal with 0. */
+	if (MALI_GROUP_STATE_INACTIVE != mali_group_get_state(gp_group)) {
+		gp_group_state = EXEC_STATE_INACTIVE;
+		mali_group_deactivate(gp_group);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (MALI_GROUP_STATE_INACTIVE
+		    != mali_group_get_state(virtual_group)) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			mali_group_deactivate(virtual_group);
+		}
+	}
+
+	if (0 < group_list_inactive_count) {
+		struct mali_group *group;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+					    &group_list_inactive,
+					    struct mali_group, executor_list) {
+			if (MALI_GROUP_STATE_ACTIVATION_PENDING
+			    == mali_group_get_state(group)) {
+				mali_group_deactivate(group);
+			}
+
+			/*
+			 * On mali-450 platform, we may have physical group in the group inactive
+			 * list, and its state is MALI_GROUP_STATE_ACTIVATION_PENDING, so we only
+			 * deactivate it is not enough, we still also need add it back to virtual group.
+			 * And now, virtual group must be in INACTIVE state, so it's safe to add
+			 * physical group to virtual group at this point.
+			 */
+			if (NULL != virtual_group) {
+				_mali_osk_list_delinit(&group->executor_list);
+				group_list_inactive_count--;
+
+				mali_group_add_group(virtual_group, group);
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_resume(void)
+{
+	mali_executor_lock();
+
+	/* Decrement pause_count to allow scheduling again (if it reaches 0) */
+	pause_count--;
+	if (0 == pause_count) {
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+u32 mali_executor_get_num_cores_total(void)
+{
+	return num_physical_pp_cores_total;
+}
+
+u32 mali_executor_get_num_cores_enabled(void)
+{
+	return num_physical_pp_cores_enabled;
+}
+
+struct mali_pp_core *mali_executor_get_virtual_pp(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(virtual_group);
+	MALI_DEBUG_ASSERT_POINTER(virtual_group->pp_core);
+	return virtual_group->pp_core;
+}
+
+struct mali_group *mali_executor_get_virtual_group(void)
+{
+	return virtual_group;
+}
+
+void mali_executor_zap_all_active(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+	mali_bool ret;
+
+	mali_executor_lock();
+
+	/*
+	 * This function is a bit complicated because
+	 * mali_group_zap_session() can fail. This only happens because the
+	 * group is in an unhandled page fault status.
+	 * We need to make sure this page fault is handled before we return,
+	 * so that we know every single outstanding MMU transactions have
+	 * completed. This will allow caller to safely remove physical pages
+	 * when we have returned.
+	 */
+
+	MALI_DEBUG_ASSERT(NULL != gp_group);
+	ret = mali_group_zap_session(gp_group, session);
+	if (MALI_FALSE == ret) {
+		struct mali_gp_job *gp_job = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+		MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		ret = mali_group_zap_session(virtual_group, session);
+		if (MALI_FALSE == ret) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working,
+				    struct mali_group, executor_list) {
+		ret = mali_group_zap_session(group, session);
+		if (MALI_FALSE == ret) {
+			ret = mali_group_zap_session(group, session);
+			if (MALI_FALSE == ret) {
+				struct mali_pp_job *pp_job = NULL;
+
+				mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+				if (NULL != pp_job) {
+					/* PP job completed, free it */
+					mali_scheduler_complete_pp_job(pp_job,
+								       0, MALI_FALSE,
+								       MALI_TRUE);
+				}
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule)
+{
+	if (MALI_SCHEDULER_MASK_EMPTY != mask) {
+		if (MALI_TRUE == deferred_schedule) {
+			_mali_osk_wq_schedule_work_high_pri(executor_wq_high_pri);
+		} else {
+			/* Schedule from this thread*/
+			mali_executor_lock();
+			mali_executor_schedule();
+			mali_executor_unlock();
+		}
+	}
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: GP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor GP: Job %d Timeout on %s\n",
+			    mali_gp_job_get_id(group->gp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_gp(group);
+		if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result);
+#endif
+
+	mali_group_mask_all_interrupts_gp(group);
+
+	if (MALI_INTERRUPT_RESULT_SUCCESS_VS == int_result) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only VS completed so far, while PLBU is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (MALI_INTERRUPT_RESULT_SUCCESS_PLBU == int_result) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only PLBU completed so far, while VS is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (MALI_INTERRUPT_RESULT_OOM == int_result) {
+		struct mali_gp_job *job = mali_group_get_running_gp_job(group);
+
+		/* PLBU out of mem */
+		MALI_DEBUG_PRINT(3, ("Executor: PLBU needs more heap memory\n"));
+
+#if defined(CONFIG_MALI400_PROFILING)
+		/* Give group a chance to generate a SUSPEND event */
+		mali_group_oom(group);
+#endif
+
+		/*
+		 * no need to hold interrupt raised while
+		 * waiting for more memory.
+		 */
+		mali_executor_send_gp_oom_to_user(job);
+
+		mali_executor_unlock();
+
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (MALI_INTERRUPT_RESULT_ERROR == int_result) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_executor_unlock();
+
+		if (MALI_FALSE == time_out) {
+			mali_group_schedule_bottom_half_gp(group);
+		}
+	} else {
+		struct mali_gp_job *job;
+		mali_bool success;
+
+		success = (int_result != MALI_INTERRUPT_RESULT_ERROR) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, &job, NULL);
+
+		mali_executor_unlock();
+
+		/* GP jobs always fully complete */
+		MALI_DEBUG_ASSERT(NULL != job);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, success,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: PP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (in_upper_half) {
+		if (mali_group_is_in_virtual(group)) {
+			/* Child groups should never handle PP interrupts */
+			MALI_DEBUG_ASSERT(!mali_group_has_timed_out(group));
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+	MALI_DEBUG_ASSERT(!mali_group_is_in_virtual(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor PP: Job %d Timeout on %s\n",
+			    mali_pp_job_get_id(group->pp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_pp(group);
+		if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	} else if (MALI_INTERRUPT_RESULT_SUCCESS == int_result) {
+		if (mali_group_is_virtual(group) && mali_group_pp_is_active(group)) {
+			/* Some child groups are still working, so nothing to do right now */
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result);
+	if (!mali_group_has_timed_out(group)) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(group));
+	}
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (MALI_INTERRUPT_RESULT_ERROR == int_result) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_group_mask_all_interrupts_pp(group);
+		mali_executor_unlock();
+
+		if (MALI_FALSE == time_out) {
+			mali_group_schedule_bottom_half_pp(group);
+		}
+	} else {
+		struct mali_pp_job *job = NULL;
+		mali_bool success;
+
+		success = (int_result == MALI_INTERRUPT_RESULT_SUCCESS) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, NULL, &job);
+
+		mali_executor_unlock();
+
+		if (NULL != job) {
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+
+	MALI_DEBUG_PRINT(4, ("Executor: MMU interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	int_result = mali_group_get_interrupt_result_mmu(group);
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_ERROR == int_result);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	if (in_upper_half) {
+		/* Don't bother to do processing of errors in upper half */
+
+		struct mali_group *parent = group->parent_group;
+
+		mali_mmu_mask_all_interrupts(group->mmu);
+
+		mali_executor_unlock();
+
+		if (NULL == parent) {
+			mali_group_schedule_bottom_half_mmu(group);
+		} else {
+			mali_group_schedule_bottom_half_mmu(parent);
+		}
+
+	} else {
+		struct mali_gp_job *gp_job = NULL;
+		struct mali_pp_job *pp_job = NULL;
+
+#ifdef DEBUG
+
+		u32 fault_address = mali_mmu_get_page_fault_addr(group->mmu);
+		u32 status = mali_mmu_get_status(group->mmu);
+		MALI_DEBUG_PRINT(2, ("Executor: Mali page fault detected at 0x%x from bus id %d of type %s on %s\n",
+				     (void *)(uintptr_t)fault_address,
+				     (status >> 6) & 0x1F,
+				     (status & 32) ? "write" : "read",
+				     group->mmu->hw_core.description));
+		MALI_DEBUG_PRINT(3, ("Executor: MMU rawstat = 0x%08X, MMU status = 0x%08X\n",
+				     mali_mmu_get_rawstat(group->mmu), status));
+#endif
+
+		mali_executor_complete_group(group, MALI_FALSE, &gp_job, &pp_job);
+
+		mali_executor_unlock();
+
+		if (NULL != gp_job) {
+			MALI_DEBUG_ASSERT(NULL == pp_job);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_TRUE, MALI_TRUE);
+		} else if (NULL != pp_job) {
+			MALI_DEBUG_ASSERT(NULL == gp_job);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(pp_job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups)
+{
+	u32 i;
+	mali_bool child_groups_activated = MALI_FALSE;
+	mali_bool do_schedule = MALI_FALSE;
+#if defined(DEBUG)
+	u32 num_activated = 0;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(0 < num_groups);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_up(groups[i]);
+
+		if ((MALI_GROUP_STATE_ACTIVATION_PENDING != mali_group_get_state(groups[i]) ||
+		     (MALI_TRUE != mali_executor_group_is_in_state(groups[i], EXEC_STATE_INACTIVE)))) {
+			/* nothing more to do for this group */
+			continue;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Executor: activating group %s\n",
+				     mali_group_core_description(groups[i])));
+
+#if defined(DEBUG)
+		num_activated++;
+#endif
+
+		if (mali_group_is_in_virtual(groups[i])) {
+			/*
+			 * At least one child group of virtual group is powered on.
+			 */
+			child_groups_activated = MALI_TRUE;
+		} else if (MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			/* Set gp and pp not in virtual to active. */
+			mali_group_set_active(groups[i]);
+		}
+
+		/* Move group from inactive to idle list */
+		if (groups[i] == gp_group) {
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  gp_group_state);
+			gp_group_state = EXEC_STATE_IDLE;
+		} else if (MALI_FALSE == mali_group_is_in_virtual(groups[i])
+			   && MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_group_is_in_state(groups[i],
+					  EXEC_STATE_INACTIVE));
+
+			mali_executor_change_state_pp_physical(groups[i],
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+		}
+
+		do_schedule = MALI_TRUE;
+	}
+
+	if (mali_executor_has_virtual_group() &&
+	    MALI_TRUE == child_groups_activated &&
+	    MALI_GROUP_STATE_ACTIVATION_PENDING ==
+	    mali_group_get_state(virtual_group)) {
+		/*
+		 * Try to active virtual group while it may be not sucessful every time,
+		 * because there is one situation that not all of child groups are powered on
+		 * in one time and virtual group is in activation pending state.
+		 */
+		if (mali_group_set_active(virtual_group)) {
+			/* Move group from inactive to idle */
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  virtual_group_state);
+			virtual_group_state = EXEC_STATE_IDLE;
+
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u  physical activated, 1 virtual activated.\n", num_groups, num_activated));
+		} else {
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+		}
+	} else {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+	}
+
+	if (MALI_TRUE == do_schedule) {
+		/* Trigger a schedule */
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_group_power_down(struct mali_group *groups[],
+				    u32 num_groups)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(0 < num_groups);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		/* Groups must be either disabled or inactive. while for virtual group,
+		 * it maybe in empty state, because when we meet pm_runtime_suspend,
+		 * virtual group could be powered off, and before we acquire mali_executor_lock,
+		 * we must release mali_pm_state_lock, if there is a new physical job was queued,
+		 * all of physical groups in virtual group could be pulled out, so we only can
+		 * powered down an empty virtual group. Those physical groups will be powered
+		 * up in following pm_runtime_resume callback function.
+		 */
+		MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(groups[i],
+				  EXEC_STATE_DISABLED) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_INACTIVE) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_EMPTY));
+
+		MALI_DEBUG_PRINT(3, ("Executor: powering down group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_down(groups[i]);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups completed\n", num_groups));
+
+	mali_executor_unlock();
+}
+
+void mali_executor_abort_session(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *tmp_group;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Executor: Aborting all jobs from session 0x%08X.\n",
+			  session));
+
+	mali_executor_lock();
+
+	if (mali_group_get_session(gp_group) == session) {
+		if (EXEC_STATE_WORKING == gp_group_state) {
+			struct mali_gp_job *gp_job = NULL;
+
+			mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+			MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+			/* GP job completed, make sure it is freed */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_FALSE, MALI_TRUE);
+		} else {
+			/* Same session, but not working, so just clear it */
+			mali_group_clear_session(gp_group);
+		}
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (EXEC_STATE_WORKING == virtual_group_state
+		    && mali_group_get_session(virtual_group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working,
+				    struct mali_group, executor_list) {
+		if (mali_group_get_session(group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	mali_executor_unlock();
+}
+
+
+void mali_executor_core_scaling_enable(void)
+{
+	/* PS: Core scaling is by default enabled */
+	core_scaling_enabled = MALI_TRUE;
+}
+
+void mali_executor_core_scaling_disable(void)
+{
+	core_scaling_enabled = MALI_FALSE;
+}
+
+mali_bool mali_executor_core_scaling_is_enabled(void)
+{
+	return core_scaling_enabled;
+}
+
+void mali_executor_group_enable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_enable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+/*
+ * If a physical group is inactive or idle, we should disable it immediately,
+ * if group is in virtual, and virtual group is idle, disable given physical group in it.
+ */
+void mali_executor_group_disable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_disable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+mali_bool mali_executor_group_is_disabled(struct mali_group *group)
+{
+	/* NB: This function is not optimized for time critical usage */
+
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+	ret = mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED);
+	mali_executor_unlock();
+
+	return ret;
+}
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override)
+{
+	if (target_core_nr == num_physical_pp_cores_enabled) return 0;
+	if (MALI_FALSE == core_scaling_enabled && MALI_FALSE == override) return -EPERM;
+	if (target_core_nr > num_physical_pp_cores_total) return -EINVAL;
+	if (0 == target_core_nr) return -EINVAL;
+
+	mali_executor_core_scale(target_core_nr);
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+
+	return 0;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	mali_executor_lock();
+
+	switch (gp_group_state) {
+	case EXEC_STATE_INACTIVE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state INACTIVE\n");
+		break;
+	case EXEC_STATE_IDLE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state IDLE\n");
+		break;
+	case EXEC_STATE_WORKING:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state WORKING\n");
+		break;
+	default:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in unknown/illegal state %u\n",
+					gp_group_state);
+		break;
+	}
+
+	n += mali_group_dump_state(gp_group, buf + n, size - n);
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in WORKING state (count = %u):\n",
+				group_list_working_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in IDLE state (count = %u):\n",
+				group_list_idle_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in INACTIVE state (count = %u):\n",
+				group_list_inactive_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in DISABLED state (count = %u):\n",
+				group_list_disabled_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		switch (virtual_group_state) {
+		case EXEC_STATE_EMPTY:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state EMPTY\n");
+			break;
+		case EXEC_STATE_INACTIVE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state INACTIVE\n");
+			break;
+		case EXEC_STATE_IDLE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state IDLE\n");
+			break;
+		case EXEC_STATE_WORKING:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state WORKING\n");
+			break;
+		default:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in unknown/illegal state %u\n",
+						virtual_group_state);
+			break;
+		}
+
+		n += mali_group_dump_state(virtual_group, buf + n, size - n);
+	}
+
+	mali_executor_unlock();
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_total_cores = num_physical_pp_cores_total;
+	args->number_of_enabled_cores = num_physical_pp_cores_enabled;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = pp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_cores = 1;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = gp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (_MALIGP_JOB_RESUME_WITH_NEW_HEAP == args->code) {
+		_mali_osk_notification_t *new_notification = NULL;
+
+		new_notification = _mali_osk_notification_create(
+					   _MALI_NOTIFICATION_GP_STALLED,
+					   sizeof(_mali_uk_gp_job_suspended_s));
+
+		if (NULL != new_notification) {
+			MALI_DEBUG_PRINT(3, ("Executor: Resuming job %u with new heap; 0x%08X - 0x%08X\n",
+					     args->cookie, args->arguments[0], args->arguments[1]));
+
+			mali_executor_lock();
+
+			/* Resume the job in question if it is still running */
+			job = mali_group_get_running_gp_job(gp_group);
+			if (NULL != job &&
+			    args->cookie == mali_gp_job_get_id(job) &&
+			    session == mali_gp_job_get_session(job)) {
+				/*
+				 * Correct job is running, resume with new heap
+				 */
+
+				mali_gp_job_set_oom_notification(job,
+								 new_notification);
+
+				/* This will also re-enable interrupts */
+				mali_group_resume_gp_with_new_heap(gp_group,
+								   args->cookie,
+								   args->arguments[0],
+								   args->arguments[1]);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_PRINT_ERROR(("Executor: Unable to resume, GP job no longer running.\n"));
+
+				_mali_osk_notification_delete(new_notification);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Executor: Failed to allocate notification object. Will abort GP job.\n"));
+		}
+	} else {
+		MALI_DEBUG_PRINT(2, ("Executor: Aborting job %u, no new heap provided\n", args->cookie));
+	}
+
+	mali_executor_lock();
+
+	/* Abort the job in question if it is still running */
+	job = mali_group_get_running_gp_job(gp_group);
+	if (NULL != job &&
+	    args->cookie == mali_gp_job_get_id(job) &&
+	    session == mali_gp_job_get_session(job)) {
+		/* Correct job is still running */
+		struct mali_gp_job *job_done = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &job_done, NULL);
+
+		/* The same job should have completed */
+		MALI_DEBUG_ASSERT(job_done == job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(job_done, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	mali_executor_unlock();
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+static void mali_executor_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_executor_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Executor: lock taken\n"));
+}
+
+static void mali_executor_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Executor: Releasing lock\n"));
+	_mali_osk_spinlock_irq_unlock(mali_executor_lock_obj);
+}
+
+static mali_bool mali_executor_is_suspended(void *data)
+{
+	mali_bool ret;
+
+	/* This callback does not use the data pointer. */
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	ret = pause_count > 0 && !mali_executor_is_working();
+
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static mali_bool mali_executor_is_working()
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return (0 != group_list_working_count ||
+		EXEC_STATE_WORKING == gp_group_state ||
+		EXEC_STATE_WORKING == virtual_group_state);
+}
+
+static void mali_executor_disable_empty_virtual(void)
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_EMPTY);
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_WORKING);
+
+	if (mali_group_is_empty(virtual_group)) {
+		virtual_group_state = EXEC_STATE_EMPTY;
+	}
+}
+
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	/* Only rejoining after job has completed (still active) */
+	MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+			  mali_group_get_state(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_has_virtual_group());
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_group_is_virtual(group));
+
+	/* Make sure group and virtual group have same status */
+
+	if (MALI_GROUP_STATE_INACTIVE == mali_group_get_state(virtual_group)) {
+		if (mali_group_deactivate(group)) {
+			trigger_pm_update = MALI_TRUE;
+		}
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else if (MALI_GROUP_STATE_ACTIVATION_PENDING ==
+		   mali_group_get_state(virtual_group)) {
+		/*
+		 * Activation is pending for virtual group, leave
+		 * this child group as active.
+		 */
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+				  mali_group_get_state(virtual_group));
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_IDLE;
+		}
+	}
+
+	/* Remove group from idle list */
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group,
+			  EXEC_STATE_IDLE));
+	_mali_osk_list_delinit(&group->executor_list);
+	group_list_idle_count--;
+
+	/*
+	 * And finally rejoin the virtual group
+	 * group will start working on same job as virtual_group,
+	 * if virtual_group is working on a job
+	 */
+	mali_group_add_group(virtual_group, group);
+
+	return trigger_pm_update;
+}
+
+static mali_bool mali_executor_has_virtual_group(void)
+{
+#if defined(CONFIG_MALI450)
+	return (NULL != virtual_group) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* defined(CONFIG_MALI450) */
+}
+
+static mali_bool mali_executor_virtual_group_is_usable(void)
+{
+#if defined(CONFIG_MALI450)
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return ((EXEC_STATE_INACTIVE == virtual_group_state ||
+		EXEC_STATE_IDLE == virtual_group_state)&&(virtual_group->state != MALI_GROUP_STATE_ACTIVATION_PENDING)) ?
+	       MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* defined(CONFIG_MALI450) */
+}
+
+static mali_bool mali_executor_tackle_gp_bound(void)
+{
+	struct mali_pp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	job = mali_scheduler_job_pp_physical_peek();
+
+	if (NULL != job && MALI_TRUE == mali_is_mali400()) {
+		if (0 < group_list_working_count &&
+		    mali_pp_job_is_large_and_unstarted(job)) {
+			return MALI_TRUE;
+		}
+	}
+
+	return MALI_FALSE;
+}
+
+/*
+ * This is where jobs are actually started.
+ */
+static void mali_executor_schedule(void)
+{
+	u32 i;
+	u32 num_physical_needed = 0;
+	u32 num_physical_to_process = 0;
+	mali_bool trigger_pm_update = MALI_FALSE;
+	mali_bool deactivate_idle_group = MALI_TRUE;
+
+	/* Physical groups + jobs to start in this function */
+	struct mali_group *groups_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	struct mali_pp_job *jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	u32 sub_jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	int num_jobs_to_start = 0;
+
+	/* Virtual job to start in this function */
+	struct mali_pp_job *virtual_job_to_start = NULL;
+
+	/* GP job to start in this function */
+	struct mali_gp_job *gp_job_to_start = NULL;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (pause_count > 0) {
+		/* Execution is suspended, don't schedule any jobs. */
+		return;
+	}
+
+	/* Lock needed in order to safely handle the job queues */
+	mali_scheduler_lock();
+
+	/* 1. Activate gp firstly if have gp job queued. */
+	if (EXEC_STATE_INACTIVE == gp_group_state &&
+	    0 < mali_scheduler_job_gp_count()) {
+
+		enum mali_group_state state =
+			mali_group_activate(gp_group);
+		if (MALI_GROUP_STATE_ACTIVE == state) {
+			/* Set GP group state to idle */
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			trigger_pm_update = MALI_TRUE;
+		}
+	}
+
+	/* 2. Prepare as many physical groups as needed/possible */
+
+	num_physical_needed = mali_scheduler_job_physical_head_count();
+
+	/* On mali-450 platform, we don't need to enter in this block frequently. */
+	if (0 < num_physical_needed) {
+
+		if (num_physical_needed <= group_list_idle_count) {
+			/* We have enough groups on idle list already */
+			num_physical_to_process = num_physical_needed;
+			num_physical_needed = 0;
+		} else {
+			/* We need to get a hold of some more groups */
+			num_physical_to_process = group_list_idle_count;
+			num_physical_needed -= group_list_idle_count;
+		}
+
+		if (0 < num_physical_needed) {
+
+			/* 2.1. Activate groups which are inactive */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive,
+						    struct mali_group, executor_list) {
+				enum mali_group_state state =
+					mali_group_activate(group);
+				if (MALI_GROUP_STATE_ACTIVE == state) {
+					/* Move from inactive to idle */
+					mali_executor_change_state_pp_physical(group,
+									       &group_list_inactive,
+									       &group_list_inactive_count,
+									       &group_list_idle,
+									       &group_list_idle_count);
+					num_physical_to_process++;
+				} else {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				num_physical_needed--;
+				if (0 == num_physical_needed) {
+					/* We have activated all the groups we need */
+					break;
+				}
+			}
+		}
+
+		if (mali_executor_virtual_group_is_usable()) {
+
+			/*
+			 * 2.2. And finally, steal and activate groups
+			 * from virtual group if we need even more
+			 */
+			while (0 < num_physical_needed) {
+				struct mali_group *group;
+
+				group = mali_group_acquire_group(virtual_group);
+				if (NULL != group) {
+					enum mali_group_state state;
+
+					mali_executor_disable_empty_virtual();
+
+					state = mali_group_activate(group);
+					if (MALI_GROUP_STATE_ACTIVE == state) {
+						/* Group is ready, add to idle list */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_idle);
+						group_list_idle_count++;
+						num_physical_to_process++;
+					} else {
+						/*
+						 * Group is not ready yet,
+						 * add to inactive list
+						 */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_inactive);
+						group_list_inactive_count++;
+
+						trigger_pm_update = MALI_TRUE;
+					}
+					num_physical_needed--;
+				} else {
+					/*
+					 * We could not get enough groups
+					 * from the virtual group.
+					 */
+					break;
+				}
+			}
+		}
+
+		/* 2.3. Assign physical jobs to groups */
+
+		if (0 < num_physical_to_process) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle,
+						    struct mali_group, executor_list) {
+				struct mali_pp_job *job = NULL;
+				u32 sub_job = MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+
+				MALI_DEBUG_ASSERT(num_jobs_to_start <
+						  MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				MALI_DEBUG_ASSERT(0 <
+						  mali_scheduler_job_physical_head_count());
+
+				if (mali_executor_hint_is_enabled(
+					    MALI_EXECUTOR_HINT_GP_BOUND)) {
+					if (MALI_TRUE == mali_executor_tackle_gp_bound()) {
+						/*
+						* We're gp bound,
+						* don't start this right now.
+						*/
+						deactivate_idle_group = MALI_FALSE;
+						num_physical_to_process = 0;
+						break;
+					}
+				}
+
+				job = mali_scheduler_job_pp_physical_get(
+					      &sub_job);
+
+				MALI_DEBUG_ASSERT_POINTER(job);
+				MALI_DEBUG_ASSERT(sub_job <= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				/* Put job + group on list of jobs to start later on */
+
+				groups_to_start[num_jobs_to_start] = group;
+				jobs_to_start[num_jobs_to_start] = job;
+				sub_jobs_to_start[num_jobs_to_start] = sub_job;
+				num_jobs_to_start++;
+
+				/* Move group from idle to working */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_working,
+								       &group_list_working_count);
+
+				num_physical_to_process--;
+				if (0 == num_physical_to_process) {
+					/* Got all we needed */
+					break;
+				}
+			}
+		}
+	}
+
+
+	/* 3. Deactivate idle pp group , must put deactive here before active vitual group
+	 *    for cover case first only has physical job in normal queue but group inactive,
+	 *    so delay the job start go to active group, when group activated,
+	 *    call scheduler again, but now if we get high queue virtual job,
+	 *    we will do nothing in schedule cause executor schedule stop
+	 */
+
+	if (MALI_TRUE == mali_executor_deactivate_list_idle(deactivate_idle_group
+			&& (!mali_timeline_has_physical_pp_job()))) {
+		trigger_pm_update = MALI_TRUE;
+	}
+
+	/* 4. Activate virtual group, if needed */
+
+	if (EXEC_STATE_INACTIVE == virtual_group_state &&
+	    0 < mali_scheduler_job_next_is_virtual()) {
+		enum mali_group_state state =
+			mali_group_activate(virtual_group);
+		if (MALI_GROUP_STATE_ACTIVE == state) {
+			/* Set virtual group state to idle */
+			virtual_group_state = EXEC_STATE_IDLE;
+		} else {
+			trigger_pm_update = MALI_TRUE;
+		}
+	}
+
+	/* 5. To power up group asap, we trigger pm update here. */
+
+	if (MALI_TRUE == trigger_pm_update) {
+		trigger_pm_update = MALI_FALSE;
+		mali_pm_update_async();
+	}
+
+	/* 6. Assign jobs to idle virtual group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == virtual_group_state) {
+		if (0 < mali_scheduler_job_next_is_virtual()) {
+			virtual_job_to_start =
+				mali_scheduler_job_pp_virtual_get();
+			virtual_group_state = EXEC_STATE_WORKING;
+		} else if (!mali_timeline_has_virtual_pp_job()) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+
+			if (mali_group_deactivate(virtual_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 7. Assign job to idle GP group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == gp_group_state) {
+		if (0 < mali_scheduler_job_gp_count()) {
+			gp_job_to_start = mali_scheduler_job_gp_get();
+			gp_group_state = EXEC_STATE_WORKING;
+		} else if (!mali_timeline_has_gp_job()) {
+			gp_group_state = EXEC_STATE_INACTIVE;
+			if (mali_group_deactivate(gp_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 8. We no longer need the schedule/queue lock */
+
+	mali_scheduler_unlock();
+
+	/* 9. start jobs */
+
+	if (NULL != virtual_job_to_start) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(virtual_group));
+		mali_group_start_pp_job(virtual_group,
+					virtual_job_to_start, 0);
+	}
+
+	for (i = 0; i < num_jobs_to_start; i++) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(
+					  groups_to_start[i]));
+		mali_group_start_pp_job(groups_to_start[i],
+					jobs_to_start[i],
+					sub_jobs_to_start[i]);
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(gp_group);
+
+	if (NULL != gp_job_to_start) {
+		MALI_DEBUG_ASSERT(!mali_group_gp_is_active(gp_group));
+		mali_group_start_gp_job(gp_group, gp_job_to_start);
+	}
+
+	/* 10. Trigger any pending PM updates */
+	if (MALI_TRUE == trigger_pm_update) {
+		mali_pm_update_async();
+	}
+}
+
+/* Handler for deferred schedule requests */
+static void mali_executor_wq_schedule(void *arg)
+{
+	MALI_IGNORE(arg);
+	mali_executor_lock();
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job)
+{
+	_mali_uk_gp_job_suspended_s *jobres;
+	_mali_osk_notification_t *notification;
+
+	notification = mali_gp_job_get_oom_notification(job);
+
+	/*
+	 * Remember the id we send to user space, so we have something to
+	 * verify when we get a response
+	 */
+	gp_returned_cookie = mali_gp_job_get_id(job);
+
+	jobres = (_mali_uk_gp_job_suspended_s *)notification->result_buffer;
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	jobres->cookie = gp_returned_cookie;
+
+	mali_session_send_notification(mali_gp_job_get_session(job),
+				       notification);
+}
+static struct mali_gp_job *mali_executor_complete_gp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_gp(group, success);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	gp_group_state = EXEC_STATE_IDLE;
+
+	/* This will potentially queue more GP and PP jobs */
+	mali_timeline_tracker_release(&job->tracker);
+
+	/* Signal PP job */
+	mali_gp_job_signal_pp_tracker(job, success);
+
+	return job;
+}
+
+static struct mali_pp_job *mali_executor_complete_pp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_pp_job *job;
+	u32 sub_job;
+	mali_bool job_is_done;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_pp(group, success, &sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	if (mali_group_is_virtual(group)) {
+		virtual_group_state = EXEC_STATE_IDLE;
+	} else {
+		/* Move from working to idle state */
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_working,
+						       &group_list_working_count,
+						       &group_list_idle,
+						       &group_list_idle_count);
+	}
+
+	/* It is the executor module which owns the jobs themselves by now */
+	mali_pp_job_mark_sub_job_completed(job, success);
+	job_is_done = mali_pp_job_is_complete(job);
+
+	if (job_is_done) {
+		/* This will potentially queue more GP and PP jobs */
+		mali_timeline_tracker_release(&job->tracker);
+	}
+
+	return job;
+}
+
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done)
+{
+	struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+	struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	mali_bool pp_job_is_done = MALI_TRUE;
+
+	if (NULL != gp_core) {
+		gp_job = mali_executor_complete_gp(group, success);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(pp_core);
+		MALI_IGNORE(pp_core);
+		pp_job = mali_executor_complete_pp(group, success);
+
+		pp_job_is_done = mali_pp_job_is_complete(pp_job);
+	}
+
+	if (pause_count > 0) {
+		/* Execution has been suspended */
+
+		if (!mali_executor_is_working()) {
+			/* Last job completed, wake up sleepers */
+			_mali_osk_wait_queue_wake_up(
+				executor_working_wait_queue);
+		}
+	} else if (MALI_TRUE == mali_group_disable_requested(group)) {
+		mali_executor_core_scale_in_group_complete(group);
+
+		mali_executor_schedule();
+	} else {
+		/* try to schedule new jobs */
+		mali_executor_schedule();
+	}
+
+	if (NULL != gp_job) {
+		MALI_DEBUG_ASSERT_POINTER(gp_job_done);
+		*gp_job_done = gp_job;
+	} else if (pp_job_is_done) {
+		MALI_DEBUG_ASSERT_POINTER(pp_job);
+		MALI_DEBUG_ASSERT_POINTER(pp_job_done);
+		*pp_job_done = pp_job;
+	}
+}
+
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	/*
+	 * It's a bit more complicated to change the state for the physical PP
+	 * groups since their state is determined by the list they are on.
+	 */
+#if defined(DEBUG)
+	mali_bool found = MALI_FALSE;
+	struct mali_group *group_iter;
+	struct mali_group *temp;
+	u32 old_counted = 0;
+	u32 new_counted = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(old_list);
+	MALI_DEBUG_ASSERT_POINTER(old_count);
+	MALI_DEBUG_ASSERT_POINTER(new_list);
+	MALI_DEBUG_ASSERT_POINTER(new_count);
+
+	/*
+	 * Verify that group is present on old list,
+	 * and that the count is correct
+	 */
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, old_list,
+				    struct mali_group, executor_list) {
+		old_counted++;
+		if (group == group_iter) {
+			found = MALI_TRUE;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, new_list,
+				    struct mali_group, executor_list) {
+		new_counted++;
+	}
+
+	if (MALI_FALSE == found) {
+		if (old_list == &group_list_idle) {
+			MALI_DEBUG_PRINT(1, (" old Group list is idle,"));
+		} else if (old_list == &group_list_inactive) {
+			MALI_DEBUG_PRINT(1, (" old Group list is inactive,"));
+		} else if (old_list == &group_list_working) {
+			MALI_DEBUG_PRINT(1, (" old Group list is working,"));
+		} else if (old_list == &group_list_disabled) {
+			MALI_DEBUG_PRINT(1, (" old Group list is disable,"));
+		}
+
+		if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_WORKING)) {
+			MALI_DEBUG_PRINT(1, (" group in working \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_INACTIVE)) {
+			MALI_DEBUG_PRINT(1, (" group in inactive \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_IDLE)) {
+			MALI_DEBUG_PRINT(1, (" group in idle \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)) {
+			MALI_DEBUG_PRINT(1, (" but group in disabled \n"));
+		}
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == found);
+	MALI_DEBUG_ASSERT(0 < (*old_count));
+	MALI_DEBUG_ASSERT((*old_count) == old_counted);
+	MALI_DEBUG_ASSERT((*new_count) == new_counted);
+#endif
+
+	_mali_osk_list_move(&group->executor_list, new_list);
+	(*old_count)--;
+	(*new_count)++;
+}
+
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	_mali_osk_list_add(&group->executor_list, new_list);
+	(*new_count)++;
+}
+
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (gp_group == group) {
+		if (gp_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else if (virtual_group == group || mali_group_is_in_virtual(group)) {
+		if (virtual_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else {
+		/* Physical PP group */
+		struct mali_group *group_iter;
+		struct mali_group *temp;
+		_mali_osk_list_t *list;
+
+		if (EXEC_STATE_DISABLED == state) {
+			list = &group_list_disabled;
+		} else if (EXEC_STATE_INACTIVE == state) {
+			list = &group_list_inactive;
+		} else if (EXEC_STATE_IDLE == state) {
+			list = &group_list_idle;
+		} else {
+			MALI_DEBUG_ASSERT(EXEC_STATE_WORKING == state);
+			list = &group_list_working;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, list,
+					    struct mali_group, executor_list) {
+			if (group_iter == group) {
+				return MALI_TRUE;
+			}
+		}
+	}
+
+	/* group not in correct state */
+	return MALI_FALSE;
+}
+
+static void mali_executor_group_enable_internal(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	/* Put into inactive state (== "lowest" enabled state) */
+	if (group == gp_group) {
+		MALI_DEBUG_ASSERT(EXEC_STATE_DISABLED == gp_group_state);
+		gp_group_state = EXEC_STATE_INACTIVE;
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_disabled,
+						       &group_list_disabled_count,
+						       &group_list_inactive,
+						       &group_list_inactive_count);
+
+		++num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Enabling group id %d \n", group->pp_core->core_id));
+	}
+
+	if (MALI_GROUP_STATE_ACTIVE == mali_group_activate(group)) {
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_power_is_on(group));
+
+		/* Move from inactive to idle */
+		if (group == gp_group) {
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			mali_executor_change_state_pp_physical(group,
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+
+			if (mali_executor_has_virtual_group()) {
+				if (mali_executor_physical_rejoin_virtual(group)) {
+					mali_pm_update_async();
+				}
+			}
+		}
+	} else {
+		mali_pm_update_async();
+	}
+}
+
+static void mali_executor_group_disable_internal(struct mali_group *group)
+{
+	mali_bool working;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	working = mali_executor_group_is_in_state(group, EXEC_STATE_WORKING);
+	if (MALI_TRUE == working) {
+		/** Group to be disabled once it completes current work,
+		 * when virtual group completes, also check child groups for this flag */
+		mali_group_set_disable_request(group, MALI_TRUE);
+		return;
+	}
+
+	/* Put into disabled state */
+	if (group == gp_group) {
+		/* GP group */
+		MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state);
+		gp_group_state = EXEC_STATE_DISABLED;
+	} else {
+		if (mali_group_is_in_virtual(group)) {
+			/* A child group of virtual group. move the specific group from virtual group */
+			MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state);
+
+			mali_executor_set_state_pp_physical(group,
+							    &group_list_disabled,
+							    &group_list_disabled_count);
+
+			mali_group_remove_group(virtual_group, group);
+			mali_executor_disable_empty_virtual();
+		} else {
+			mali_executor_change_group_status_disabled(group);
+		}
+
+		--num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Disabling group id %d \n", group->pp_core->core_id));
+	}
+
+	if (MALI_GROUP_STATE_INACTIVE != group->state) {
+		if (MALI_TRUE == mali_group_deactivate(group)) {
+			mali_pm_update_async();
+		}
+	}
+}
+
+static void mali_executor_notify_core_change(u32 num_cores)
+{
+	mali_bool done = MALI_FALSE;
+
+	if (mali_is_mali450()) {
+		return;
+	}
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (0 == num_sessions_alloc) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (NULL == notobjs) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			/* there is probably no point in trying again, system must be really low on memory and probably unusable now anyway */
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE, sizeof(_mali_uk_pp_num_cores_changed_s));
+			if (NULL != notobjs[i]) {
+				_mali_uk_pp_num_cores_changed_s *data = notobjs[i]->result_buffer;
+				data->number_of_enabled_cores = num_cores;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (NULL != notobjs[used_notification_objects]) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (NULL != notobjs[used_notification_objects]) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+static mali_bool mali_executor_core_scaling_is_done(void *data)
+{
+	u32 i;
+	u32 num_groups;
+	mali_bool ret = MALI_TRUE;
+
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			if (MALI_TRUE == group->disable_requested && NULL != mali_group_get_pp_core(group)) {
+				ret = MALI_FALSE;
+				break;
+			}
+		}
+	}
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static void mali_executor_wq_notify_core_change(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	if (mali_is_mali450()) {
+		return;
+	}
+
+	_mali_osk_wait_queue_wait_event(executor_notify_core_change_wait_queue,
+					mali_executor_core_scaling_is_done, NULL);
+
+	mali_executor_notify_core_change(num_physical_pp_cores_enabled);
+}
+
+/**
+ * Clear all disable request from the _last_ core scaling behavior.
+ */
+static void mali_executor_core_scaling_reset(void)
+{
+	u32 i;
+	u32 num_groups;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			group->disable_requested = MALI_FALSE;
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		core_scaling_delay_up_mask[i] = 0;
+	}
+}
+
+static void mali_executor_core_scale(unsigned int target_core_nr)
+{
+	int current_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int target_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int i;
+
+	MALI_DEBUG_ASSERT(0 < target_core_nr);
+	MALI_DEBUG_ASSERT(num_physical_pp_cores_total >= target_core_nr);
+
+	mali_executor_lock();
+
+	if (target_core_nr < num_physical_pp_cores_enabled) {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: disabling %d cores\n", target_core_nr, num_physical_pp_cores_enabled - target_core_nr));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: enabling %d cores\n", target_core_nr, target_core_nr - num_physical_pp_cores_enabled));
+	}
+
+	/* When a new core scaling request is comming,  we should remove the un-doing
+	 * part of the last core scaling request.  It's safe because we have only one
+	 * lock(executor lock) protection. */
+	mali_executor_core_scaling_reset();
+
+	mali_pm_get_best_power_cost_mask(num_physical_pp_cores_enabled, current_core_scaling_mask);
+	mali_pm_get_best_power_cost_mask(target_core_nr, target_core_scaling_mask);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		target_core_scaling_mask[i] = target_core_scaling_mask[i] - current_core_scaling_mask[i];
+		MALI_DEBUG_PRINT(5, ("target_core_scaling_mask[%d] = %d\n", i, target_core_scaling_mask[i]));
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 > target_core_scaling_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_disable_internal(group);
+						target_core_scaling_mask[i]++;
+						if ((0 == target_core_scaling_mask[i])) {
+							break;
+						}
+
+					}
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		/**
+		 * Target_core_scaling_mask[i] is bigger than 0,
+		 * means we need to enable some pp cores in
+		 * this domain whose domain index is i.
+		 */
+		if (0 < target_core_scaling_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			if (num_physical_pp_cores_enabled >= target_core_nr) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_enable_internal(group);
+						target_core_scaling_mask[i]--;
+
+						if ((0 == target_core_scaling_mask[i]) || num_physical_pp_cores_enabled == target_core_nr) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * Here, we may still have some pp cores not been enabled because of some
+	 * pp cores need to be disabled are still in working state.
+	 */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 < target_core_scaling_mask[i]) {
+			core_scaling_delay_up_mask[i] = target_core_scaling_mask[i];
+		}
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group)
+{
+	int num_pp_cores_disabled = 0;
+	int num_pp_cores_to_enable = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_disable_requested(group));
+
+	/* Disable child group of virtual group */
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *child;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			if (MALI_TRUE == mali_group_disable_requested(child)) {
+				mali_group_set_disable_request(child, MALI_FALSE);
+				mali_executor_group_disable_internal(child);
+				num_pp_cores_disabled++;
+			}
+		}
+		mali_group_set_disable_request(group, MALI_FALSE);
+	} else {
+		mali_executor_group_disable_internal(group);
+		mali_group_set_disable_request(group, MALI_FALSE);
+		if (NULL != mali_group_get_pp_core(group)) {
+			num_pp_cores_disabled++;
+		}
+	}
+
+	num_pp_cores_to_enable = num_pp_cores_disabled;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 < core_scaling_delay_up_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			if (0 == num_pp_cores_to_enable) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *disabled_group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(disabled_group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(disabled_group) && mali_executor_group_is_in_state(disabled_group, EXEC_STATE_DISABLED)) {
+						mali_executor_group_enable_internal(disabled_group);
+						core_scaling_delay_up_mask[i]--;
+						num_pp_cores_to_enable--;
+
+						if ((0 == core_scaling_delay_up_mask[i]) || 0 == num_pp_cores_to_enable) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	_mali_osk_wait_queue_wake_up(executor_notify_core_change_wait_queue);
+}
+
+static void mali_executor_change_group_status_disabled(struct mali_group *group)
+{
+	/* Physical PP group */
+	mali_bool idle;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	idle = mali_executor_group_is_in_state(group, EXEC_STATE_IDLE);
+	if (MALI_TRUE == idle) {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_idle,
+						       &group_list_idle_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_inactive,
+						       &group_list_inactive_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	}
+}
+
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	if (group_list_idle_count > 0) {
+		if (mali_executor_has_virtual_group()) {
+
+			/* Rejoin virtual group on Mali-450 */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_executor_physical_rejoin_virtual(
+					    group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+			}
+		} else if (deactivate_idle_group) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			/* Deactivate group on Mali-300/400 */
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_group_deactivate(group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				/* Move from idle to inactive */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_inactive,
+								       &group_list_inactive_count);
+			}
+		}
+	}
+
+	return trigger_pm_update;
+}
diff --git a/utgard/r5p1/common/mali_executor.h b/utgard/r5p1/common/mali_executor.h
new file mode 100755
index 0000000..4485405
--- /dev/null
+++ b/utgard/r5p1/common/mali_executor.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_EXECUTOR_H__
+#define __MALI_EXECUTOR_H__
+
+#include "mali_osk.h"
+#include "mali_scheduler_types.h"
+#include "mali_kernel_common.h"
+
+typedef enum {
+	MALI_EXECUTOR_HINT_GP_BOUND = 0
+#define MALI_EXECUTOR_HINT_MAX        1
+} mali_executor_hint;
+
+extern mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/* forward declare struct instead of using include */
+struct mali_session_data;
+struct mali_group;
+struct mali_pp_core;
+
+extern _mali_osk_spinlock_irq_t *mali_executor_lock_obj;
+
+#define MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+_mali_osk_errcode_t mali_executor_initialize(void);
+void mali_executor_terminate(void);
+
+void mali_executor_populate(void);
+void mali_executor_depopulate(void);
+
+void mali_executor_suspend(void);
+void mali_executor_resume(void);
+
+u32 mali_executor_get_num_cores_total(void);
+u32 mali_executor_get_num_cores_enabled(void);
+struct mali_pp_core *mali_executor_get_virtual_pp(void);
+struct mali_group *mali_executor_get_virtual_group(void);
+
+void mali_executor_zap_all_active(struct mali_session_data *session);
+
+/**
+ * Schedule GP and PP according to bitmask.
+ *
+ * @param mask A scheduling bitmask.
+ * @param deferred_schedule MALI_TRUE if schedule should be deferred, MALI_FALSE if not.
+ */
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule);
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group, mali_bool in_upper_half);
+
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups);
+void mali_executor_group_power_down(struct mali_group *groups[], u32 num_groups);
+
+void mali_executor_abort_session(struct mali_session_data *session);
+
+void mali_executor_core_scaling_enable(void);
+void mali_executor_core_scaling_disable(void);
+mali_bool mali_executor_core_scaling_is_enabled(void);
+
+void mali_executor_group_enable(struct mali_group *group);
+void mali_executor_group_disable(struct mali_group *group);
+mali_bool mali_executor_group_is_disabled(struct mali_group *group);
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override);
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size);
+#endif
+
+MALI_STATIC_INLINE void mali_executor_hint_enable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_executor_hint_disable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_executor_hint_is_enabled(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	return mali_executor_hints[hint];
+}
+
+#endif /* __MALI_EXECUTOR_H__ */
diff --git a/utgard/r5p1/common/mali_gp.c b/utgard/r5p1/common/mali_gp.c
new file mode 100755
index 0000000..3893cb0
--- /dev/null
+++ b/utgard/r5p1/common/mali_gp.c
@@ -0,0 +1,361 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "regs/mali_gp_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+#include <mali_platform.h>
+
+static struct mali_gp_core *mali_global_gp_core = NULL;
+
+/* Interrupt handlers */
+static void mali_gp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group)
+{
+	struct mali_gp_core *core = NULL;
+
+	MALI_DEBUG_ASSERT(NULL == mali_global_gp_core);
+	MALI_DEBUG_PRINT(2, ("Mali GP: Creating Mali GP core: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_gp_core));
+	if (NULL != core) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALIGP2_REGISTER_ADDRESS_SPACE_SIZE)) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_gp_reset(core);
+
+			if (_MALI_OSK_ERR_OK == ret) {
+				ret = mali_group_add_gp_core(group, core);
+				if (_MALI_OSK_ERR_OK == ret) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_gp,
+								       group,
+								       mali_gp_irq_probe_trigger,
+								       mali_gp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (NULL != core->irq) {
+						MALI_DEBUG_PRINT(4, ("Mali GP: set global gp core from 0x%08X to 0x%08X\n", mali_global_gp_core, core));
+						mali_global_gp_core = core;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali GP: Failed to setup interrupt handlers for GP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_gp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali GP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for GP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_gp_delete(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+	mali_global_gp_core = NULL;
+	_mali_osk_free(core);
+}
+
+void mali_gp_stop_bus(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_gp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_SLOW; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS) & MALIGP2_REG_VAL_STATUS_BUS_STOPPED) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_SLOW == i) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to stop bus on %s\n", core->hw_core.description));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_gp_hard_reset(struct mali_gp_core *core)
+{
+	const u32 reset_wait_target_register = MALIGP2_REG_ADDR_MGMT_WRITE_BOUND_LOW;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	const u32 reset_default_value = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(4, ("Mali GP: Hard reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali GP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_default_value); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+}
+
+void mali_gp_reset_async(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALI400GP_REG_VAL_IRQ_RESET_COMPLETED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALI400GP_REG_VAL_CMD_SOFT_RESET);
+
+}
+
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		rawstat = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+		if (rawstat & MALI400GP_REG_VAL_IRQ_RESET_COMPLETED) {
+			break;
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core)
+{
+	mali_gp_reset_async(core);
+	return mali_gp_reset_wait(core);
+}
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 startcmd = 0;
+	u32 *frame_registers = mali_gp_job_get_frame_registers(job);
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	if (mali_gp_job_has_vs_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_VS;
+	}
+
+	if (mali_gp_job_has_plbu_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_PLBU;
+	}
+
+	MALI_DEBUG_ASSERT(0 != startcmd);
+
+	mali_hw_core_register_write_array_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR, frame_registers, MALIGP2_NUM_REGS_FRAME);
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali GP: Starting job (0x%08x) on core %s with command 0x%08X\n", job, core->hw_core.description, startcmd));
+
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_GP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, startcmd);
+#else
+	{
+		u32 bits = 0;
+
+		if (mali_gp_job_has_vs_job(job))
+			bits = MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+		if (mali_gp_job_has_plbu_job(job))
+			bits |= MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+
+		mali_hw_core_register_write_relaxed(&core->hw_core,
+						    MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, bits);
+	}
+#endif
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr)
+{
+	u32 irq_readout;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+
+	if (irq_readout & MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | MALIGP2_REG_VAL_IRQ_HANG));
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); /* re-enable interrupts */
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR, start_addr);
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR, end_addr);
+
+		MALI_DEBUG_PRINT(3, ("Mali GP: Resuming job\n"));
+
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+		_mali_osk_write_mem_barrier();
+	}
+	/*
+	 * else: core has been reset between PLBU_OUT_OF_MEM interrupt and this new heap response.
+	 * A timeout or a page fault on Mali-200 PP core can cause this behaviour.
+	 */
+}
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void)
+{
+	return mali_global_gp_core;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_gp_irq_probe_trigger(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, MALIGP2_REG_VAL_CMD_FORCE_HANG);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT);
+	if (MALIGP2_REG_VAL_IRQ_FORCE_HANG & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_FORCE_HANG);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+/* ------ local helper functions below --------- */
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tGP: %s\n", core->hw_core.description);
+
+	return n;
+}
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		val0 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_gp_job_set_perf_counter_value0(job, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C0, val0);
+#endif
+
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		val1 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_gp_job_set_perf_counter_value1(job, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C1, val1);
+#endif
+	}
+}
diff --git a/utgard/r5p1/common/mali_gp.h b/utgard/r5p1/common/mali_gp.h
new file mode 100755
index 0000000..8d5f69c
--- /dev/null
+++ b/utgard/r5p1/common/mali_gp.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_H__
+#define __MALI_GP_H__
+
+#include "mali_osk.h"
+#include "mali_gp_job.h"
+#include "mali_hw_core.h"
+#include "regs/mali_gp_regs.h"
+
+struct mali_group;
+
+/**
+ * Definition of the GP core struct
+ * Used to track a GP core in the system.
+ */
+struct mali_gp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_gp_initialize(void);
+void mali_gp_terminate(void);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group);
+void mali_gp_delete(struct mali_gp_core *core);
+
+void mali_gp_stop_bus(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core);
+void mali_gp_reset_async(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core);
+void mali_gp_hard_reset(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core);
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job);
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr);
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core);
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size);
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job);
+
+MALI_STATIC_INLINE const char *mali_gp_core_description(struct mali_gp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_gp_get_interrupt_result(struct mali_gp_core *core)
+{
+	u32 stat_used = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT) &
+			MALIGP2_REG_VAL_IRQ_MASK_USED;
+
+	if (0 == stat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if ((MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST |
+		    MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST) == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	} else if (MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_VS;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_PLBU;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM & stat_used) {
+		return MALI_INTERRUPT_RESULT_OOM;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_get_rawstat(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_is_active(struct mali_gp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS);
+	return (status & MALIGP2_REG_VAL_STATUS_MASK_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_gp_mask_all_interrupts(struct mali_gp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_gp_enable_interrupts(struct mali_gp_core *core, enum mali_interrupt_result exceptions)
+{
+	/* Enable all interrupts, except those specified in exceptions */
+	u32 value;
+
+	if (MALI_INTERRUPT_RESULT_SUCCESS_VS == exceptions) {
+		/* Enable all used except VS complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+	} else {
+		MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_SUCCESS_PLBU ==
+				  exceptions);
+		/* Enable all used except PLBU complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+	}
+
+	mali_hw_core_register_write(&core->hw_core,
+				    MALIGP2_REG_ADDR_MGMT_INT_MASK,
+				    value);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_read_plbu_alloc_start_addr(struct mali_gp_core *core)
+{
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR);
+}
+
+#endif /* __MALI_GP_H__ */
diff --git a/utgard/r5p1/common/mali_gp_job.c b/utgard/r5p1/common/mali_gp_job.c
new file mode 100755
index 0000000..d6ef312
--- /dev/null
+++ b/utgard/r5p1/common/mali_gp_job.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+
+static u32 gp_counter_src0 = MALI_HW_CORE_NO_COUNTER;      /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 gp_counter_src1 = MALI_HW_CORE_NO_COUNTER;           /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker)
+{
+	struct mali_gp_job *job;
+	u32 perf_counter_flag;
+
+	job = _mali_osk_malloc(sizeof(struct mali_gp_job));
+	if (NULL != job) {
+		job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_FINISHED, sizeof(_mali_uk_gp_job_finished_s));
+		if (NULL == job->finished_notification) {
+			_mali_osk_free(job);
+			return NULL;
+		}
+
+		job->oom_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_STALLED, sizeof(_mali_uk_gp_job_suspended_s));
+		if (NULL == job->oom_notification) {
+			_mali_osk_notification_delete(job->finished_notification);
+			_mali_osk_free(job);
+			return NULL;
+		}
+
+		if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_gp_start_job_s))) {
+			_mali_osk_notification_delete(job->finished_notification);
+			_mali_osk_notification_delete(job->oom_notification);
+			_mali_osk_free(job);
+			return NULL;
+		}
+
+		perf_counter_flag = mali_gp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			mali_gp_job_set_perf_counter_src0(job, mali_gp_job_get_gp_counter_src0());
+			mali_gp_job_set_perf_counter_src1(job, mali_gp_job_get_gp_counter_src1());
+		}
+
+		_mali_osk_list_init(&job->list);
+		job->session = session;
+		job->id = id;
+		job->heap_current_addr = job->uargs.frame_registers[4];
+		job->perf_counter_value0 = 0;
+		job->perf_counter_value1 = 0;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+		job->pp_tracker = pp_tracker;
+		if (NULL != job->pp_tracker) {
+			/* Take a reference on PP job's tracker that will be released when the GP
+			   job is done. */
+			mali_timeline_system_tracker_get(session->timeline_system, pp_tracker);
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_GP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		return job;
+	}
+
+	return NULL;
+}
+
+void mali_gp_job_delete(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(NULL == job->pp_tracker);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+
+	/* de-allocate the pre-allocated oom notifications */
+	if (NULL != job->oom_notification) {
+		_mali_osk_notification_delete(job->oom_notification);
+		job->oom_notification = NULL;
+	}
+	if (NULL != job->finished_notification) {
+		_mali_osk_notification_delete(job->finished_notification);
+		job->finished_notification = NULL;
+	}
+
+	_mali_osk_free(job);
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_gp_job *iter;
+	struct mali_gp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_gp_job, list) {
+
+		/* A span is used to handle job ID wrapping. */
+		bool job_is_after = (mali_gp_job_get_id(job) -
+				     mali_gp_job_get_id(iter)) <
+				    MALI_SCHEDULER_JOB_ID_SPAN;
+
+		if (job_is_after) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+u32 mali_gp_job_get_gp_counter_src0(void)
+{
+	return gp_counter_src0;
+}
+
+void mali_gp_job_set_gp_counter_src0(u32 counter)
+{
+	gp_counter_src0 = counter;
+}
+
+u32 mali_gp_job_get_gp_counter_src1(void)
+{
+	return gp_counter_src1;
+}
+
+void mali_gp_job_set_gp_counter_src1(u32 counter)
+{
+	gp_counter_src1 = counter;
+}
+
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (NULL != job->pp_tracker) {
+		schedule_mask |= mali_timeline_system_tracker_put(job->session->timeline_system, job->pp_tracker, MALI_FALSE == success);
+		job->pp_tracker = NULL;
+	}
+
+	return schedule_mask;
+}
diff --git a/utgard/r5p1/common/mali_gp_job.h b/utgard/r5p1/common/mali_gp_job.h
new file mode 100755
index 0000000..e4b0858
--- /dev/null
+++ b/utgard/r5p1/common/mali_gp_job.h
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_JOB_H__
+#define __MALI_GP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_timeline.h"
+#include "mali_scheduler_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#include "mali_timeline.h"
+
+/**
+ * This structure represents a GP job
+ *
+ * The GP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the GP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_gp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_gp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	struct mali_timeline_tracker *pp_tracker;          /**< Pointer to Timeline tracker for PP job that depends on this job. */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+
+	/*
+	 * These members are used by the executor and/or group,
+	 * protected by executor lock
+	 */
+	_mali_osk_notification_t *oom_notification;        /**< Notification sent back to userspace on OOM */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 heap_current_addr;                             /**< Holds the current HEAP address when the job has completed */
+	u32 perf_counter_value0;                           /**< Value of performance counter 0 (to be returned to user space) */
+	u32 perf_counter_value1;                           /**< Value of performance counter 1 (to be returned to user space) */
+};
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker);
+void mali_gp_job_delete(struct mali_gp_job *job);
+
+u32 mali_gp_job_get_gp_counter_src0(void);
+void mali_gp_job_set_gp_counter_src0(u32 counter);
+u32 mali_gp_job_get_gp_counter_src1(void);
+void mali_gp_job_set_gp_counter_src1(u32 counter);
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_cache_order(struct mali_gp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_cache_order(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_gp_job_get_user_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_frame_builder_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_flush_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_pid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_tid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_frame_registers(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_gp_job_get_session(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_vs_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[0] != job->uargs.frame_registers[1]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_plbu_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[2] != job->uargs.frame_registers[3]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_current_heap_addr(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->heap_current_addr;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_current_heap_addr(struct mali_gp_job *job, u32 heap_addr)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->heap_current_addr = heap_addr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_flag(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src1;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src0(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src0 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src1(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src1 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value0(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0 = value;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value1(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1 = value;
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_gp_job_list_move(struct mali_gp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_gp_job_list_remove(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_gp_job_get_finished_notification(struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *mali_gp_job_get_oom_notification(
+	struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job->oom_notification);
+
+	notification = job->oom_notification;
+	job->oom_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_oom_notification(
+	struct mali_gp_job *job,
+	_mali_osk_notification_t *notification)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(NULL == job->oom_notification);
+	job->oom_notification = notification;
+}
+
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_gp_job_get_tracker(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_timeline_point_ptr(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+/**
+ * Release reference on tracker for PP job that depends on this GP job.
+ *
+ * @note If GP job has a reference on tracker, this function MUST be called before the GP job is
+ * deleted.
+ *
+ * @param job GP job that is done.
+ * @param success MALI_TRUE if job completed successfully, MALI_FALSE if not.
+ * @return A scheduling bitmask indicating whether scheduling needs to be done.
+ */
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success);
+
+#endif /* __MALI_GP_JOB_H__ */
diff --git a/utgard/r5p1/common/mali_group.c b/utgard/r5p1/common/mali_group.c
new file mode 100755
index 0000000..0cbd861
--- /dev/null
+++ b/utgard/r5p1/common/mali_group.c
@@ -0,0 +1,1738 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+#include "mali_kernel_common.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_mmu.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_scheduler.h"
+#include "mali_osk_profiling.h"
+#include "mali_pm_domain.h"
+#include "mali_pm.h"
+#include "mali_executor.h"
+#include <mali_platform.h>
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+
+#define MALI_MAX_NUM_DOMAIN_REFS (MALI_MAX_NUMBER_OF_GROUPS * 2)
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num);
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+static struct mali_group *mali_global_groups[MALI_MAX_NUMBER_OF_GROUPS] = { NULL, };
+static u32 mali_global_num_groups = 0;
+
+/* SW timer for job execution */
+int mali_max_job_runtime = MALI_MAX_JOB_RUNTIME_DEFAULT;
+
+/* local helper functions */
+static void mali_group_bottom_half_mmu(void *data);
+static void mali_group_bottom_half_gp(void *data);
+static void mali_group_bottom_half_pp(void *data);
+static void mali_group_timeout(void *data);
+
+static void mali_group_reset_pp(struct mali_group *group);
+static void mali_group_reset_mmu(struct mali_group *group);
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session);
+static void mali_group_recovery_reset(struct mali_group *group);
+
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+        struct mali_dlbu_core *dlbu,
+        struct mali_bcast_unit *bcast,
+        u32 domain_index)
+{
+    struct mali_group *group = NULL;
+
+    if (mali_global_num_groups >= MALI_MAX_NUMBER_OF_GROUPS) {
+        MALI_PRINT_ERROR(("Mali group: Too many group objects created\n"));
+        return NULL;
+    }
+
+    group = _mali_osk_calloc(1, sizeof(struct mali_group));
+    if (NULL != group) {
+        group->timeout_timer = _mali_osk_timer_init();
+        if (NULL != group->timeout_timer) {
+            _mali_osk_timer_setcallback(group->timeout_timer, mali_group_timeout, (void *)group);
+
+            group->l2_cache_core[0] = core;
+            _mali_osk_list_init(&group->group_list);
+            _mali_osk_list_init(&group->executor_list);
+            _mali_osk_list_init(&group->pm_domain_list);
+            group->bcast_core = bcast;
+            group->dlbu_core = dlbu;
+
+            /* register this object as a part of the correct power domain */
+            if ((NULL != core) || (NULL != dlbu) || (NULL != bcast))
+                group->pm_domain = mali_pm_register_group(domain_index, group);
+
+            mali_global_groups[mali_global_num_groups] = group;
+            mali_global_num_groups++;
+
+            return group;
+        }
+        _mali_osk_free(group);
+    }
+
+    return NULL;
+}
+
+void mali_group_delete(struct mali_group *group)
+{
+    u32 i;
+
+    MALI_DEBUG_PRINT(4, ("Deleting group %s\n",
+                mali_group_core_description(group)));
+
+    MALI_DEBUG_ASSERT(NULL == group->parent_group);
+    MALI_DEBUG_ASSERT((MALI_GROUP_STATE_INACTIVE == group->state) || ((MALI_GROUP_STATE_ACTIVATION_PENDING == group->state)));
+
+    /* Delete the resources that this group owns */
+    if (NULL != group->gp_core) {
+        mali_gp_delete(group->gp_core);
+    }
+
+    if (NULL != group->pp_core) {
+        mali_pp_delete(group->pp_core);
+    }
+
+    if (NULL != group->mmu) {
+        mali_mmu_delete(group->mmu);
+    }
+
+    if (mali_group_is_virtual(group)) {
+        /* Remove all groups from virtual group */
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            child->parent_group = NULL;
+            mali_group_delete(child);
+        }
+
+        mali_dlbu_delete(group->dlbu_core);
+
+        if (NULL != group->bcast_core) {
+            mali_bcast_unit_delete(group->bcast_core);
+        }
+    }
+
+    for (i = 0; i < mali_global_num_groups; i++) {
+        if (mali_global_groups[i] == group) {
+            mali_global_groups[i] = NULL;
+            mali_global_num_groups--;
+
+            if (i != mali_global_num_groups) {
+                /* We removed a group from the middle of the array -- move the last
+                 * group to the current position to close the gap */
+                mali_global_groups[i] = mali_global_groups[mali_global_num_groups];
+                mali_global_groups[mali_global_num_groups] = NULL;
+            }
+
+            break;
+        }
+    }
+
+    if (NULL != group->timeout_timer) {
+        _mali_osk_timer_del(group->timeout_timer);
+        _mali_osk_timer_term(group->timeout_timer);
+    }
+
+    if (NULL != group->bottom_half_work_mmu) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+    }
+
+    if (NULL != group->bottom_half_work_gp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_gp);
+    }
+
+    if (NULL != group->bottom_half_work_pp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_pp);
+    }
+
+    _mali_osk_free(group);
+}
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group, struct mali_mmu_core *mmu_core)
+{
+    /* This group object now owns the MMU core object */
+    group->mmu = mmu_core;
+    group->bottom_half_work_mmu = _mali_osk_wq_create_work(mali_group_bottom_half_mmu, group);
+    if (NULL == group->bottom_half_work_mmu) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_mmu_core(struct mali_group *group)
+{
+    /* This group object no longer owns the MMU core object */
+    group->mmu = NULL;
+    if (NULL != group->bottom_half_work_mmu) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+    }
+}
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group, struct mali_gp_core *gp_core)
+{
+    /* This group object now owns the GP core object */
+    group->gp_core = gp_core;
+    group->bottom_half_work_gp = _mali_osk_wq_create_work(mali_group_bottom_half_gp, group);
+    if (NULL == group->bottom_half_work_gp) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_gp_core(struct mali_group *group)
+{
+    /* This group object no longer owns the GP core object */
+    group->gp_core = NULL;
+    if (NULL != group->bottom_half_work_gp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_gp);
+    }
+}
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group, struct mali_pp_core *pp_core)
+{
+    /* This group object now owns the PP core object */
+    group->pp_core = pp_core;
+    group->bottom_half_work_pp = _mali_osk_wq_create_work(mali_group_bottom_half_pp, group);
+    if (NULL == group->bottom_half_work_pp) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_pp_core(struct mali_group *group)
+{
+    /* This group object no longer owns the PP core object */
+    group->pp_core = NULL;
+    if (NULL != group->bottom_half_work_pp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_pp);
+    }
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(4, ("Group: Activating group %s\n",
+                mali_group_core_description(group)));
+
+    if (MALI_GROUP_STATE_INACTIVE == group->state) {
+        /* Group is inactive, get PM refs in order to power up */
+
+        /*
+         * We'll take a maximum of 2 power domain references pr group,
+         * one for the group itself, and one for it's L2 cache.
+         */
+        struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+        struct mali_group *groups[MALI_MAX_NUM_DOMAIN_REFS];
+        u32 num_domains = 0;
+        mali_bool all_groups_on;
+
+        /* Deal with child groups first */
+        if (mali_group_is_virtual(group)) {
+            /*
+             * The virtual group might have 0, 1 or 2 L2s in
+             * its l2_cache_core array, but we ignore these and
+             * let the child groups take the needed L2 cache ref
+             * on behalf of the virtual group.
+             * In other words; The L2 refs are taken in pair with
+             * the physical group which the L2 is attached to.
+             */
+            struct mali_group *child;
+            struct mali_group *temp;
+
+            /*
+             * Child group is inactive, get PM
+             * refs in order to power up.
+             */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp,
+                    &group->group_list,
+                    struct mali_group, group_list) {
+                MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE
+                        == child->state);
+
+                child->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+                MALI_DEBUG_ASSERT_POINTER(
+                        child->pm_domain);
+                domains[num_domains] = child->pm_domain;
+                groups[num_domains] = child;
+                num_domains++;
+
+                /*
+                 * Take L2 domain ref for child group.
+                 */
+                MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS
+                        > num_domains);
+                domains[num_domains] = mali_l2_cache_get_pm_domain(
+                        child->l2_cache_core[0]);
+                groups[num_domains] = NULL;
+                MALI_DEBUG_ASSERT(NULL ==
+                        child->l2_cache_core[1]);
+                num_domains++;
+            }
+        } else {
+            /* Take L2 domain ref for physical groups. */
+            MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                    num_domains);
+
+            domains[num_domains] = mali_l2_cache_get_pm_domain(
+                    group->l2_cache_core[0]);
+            groups[num_domains] = NULL;
+            MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]);
+            num_domains++;
+        }
+
+        /* Do the group itself last (it's dependencies first) */
+
+        group->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+        MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+        domains[num_domains] = group->pm_domain;
+        groups[num_domains] = group;
+        num_domains++;
+
+        all_groups_on = mali_pm_get_domain_refs(domains, groups,
+                num_domains);
+
+        /*
+         * Complete activation for group, include
+         * virtual group or physical group.
+         */
+        if (MALI_TRUE == all_groups_on) {
+
+            mali_group_set_active(group);
+        }
+    } else if (MALI_GROUP_STATE_ACTIVE == group->state) {
+        /* Already active */
+        MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+    } else {
+        /*
+         * Activation already pending, group->power_is_on could
+         * be both true or false. We need to wait for power up
+         * notification anyway.
+         */
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING
+                == group->state);
+    }
+
+    MALI_DEBUG_PRINT(4, ("Group: group %s activation result: %s\n",
+                mali_group_core_description(group),
+                MALI_GROUP_STATE_ACTIVE == group->state ?
+                "ACTIVE" : "PENDING"));
+
+    return group->state;
+}
+
+mali_bool mali_group_set_active(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING == group->state);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+
+    MALI_DEBUG_PRINT(4, ("Group: Activation completed for %s\n",
+                mali_group_core_description(group)));
+
+    if (mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+                struct mali_group, group_list) {
+            if (MALI_TRUE != child->power_is_on) {
+                return MALI_FALSE;
+            }
+
+            child->state = MALI_GROUP_STATE_ACTIVE;
+        }
+
+        mali_group_reset(group);
+    }
+
+    /* Go to ACTIVE state */
+    group->state = MALI_GROUP_STATE_ACTIVE;
+
+    return MALI_TRUE;
+}
+
+mali_bool mali_group_deactivate(struct mali_group *group)
+{
+    struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+    u32 num_domains = 0;
+    mali_bool power_down = MALI_FALSE;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE != group->state);
+
+    MALI_DEBUG_PRINT(3, ("Group: Deactivating group %s\n",
+                mali_group_core_description(group)));
+
+    group->state = MALI_GROUP_STATE_INACTIVE;
+
+    MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+    domains[num_domains] = group->pm_domain;
+    num_domains++;
+
+    if (mali_group_is_virtual(group)) {
+        /* Release refs for all child groups */
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp,
+                &group->group_list,
+                struct mali_group, group_list) {
+            child->state = MALI_GROUP_STATE_INACTIVE;
+
+            MALI_DEBUG_ASSERT_POINTER(child->pm_domain);
+            domains[num_domains] = child->pm_domain;
+            num_domains++;
+
+            /* Release L2 cache domain for child groups */
+            MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                    num_domains);
+            domains[num_domains] = mali_l2_cache_get_pm_domain(
+                    child->l2_cache_core[0]);
+            MALI_DEBUG_ASSERT(NULL == child->l2_cache_core[1]);
+            num_domains++;
+        }
+
+        /*
+         * Must do mali_group_power_down() steps right here for
+         * virtual group, because virtual group itself is likely to
+         * stay powered on, however child groups are now very likely
+         * to be powered off (and thus lose their state).
+         */
+
+        mali_group_clear_session(group);
+        /*
+         * Disable the broadcast unit (clear it's mask).
+         * This is needed in case the GPU isn't actually
+         * powered down at this point and groups are
+         * removed from an inactive virtual group.
+         * If not, then the broadcast unit will intercept
+         * their interrupts!
+         */
+        mali_bcast_disable(group->bcast_core);
+    } else {
+        /* Release L2 cache domain for physical groups */
+        MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                num_domains);
+        domains[num_domains] = mali_l2_cache_get_pm_domain(
+                group->l2_cache_core[0]);
+        MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]);
+        num_domains++;
+    }
+
+    power_down = mali_pm_put_domain_refs(domains, num_domains);
+
+    return power_down;
+}
+
+void mali_group_power_up(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Power up for %s\n",
+                mali_group_core_description(group)));
+
+    group->power_is_on = MALI_TRUE;
+
+    if (MALI_FALSE == mali_group_is_virtual(group)
+            && MALI_FALSE == mali_group_is_in_virtual(group)) {
+        mali_group_reset(group);
+    }
+
+    /*
+     * When we just acquire only one physical group form virt group,
+     * we should remove the bcast&dlbu mask from virt group and
+     * reset bcast and dlbu core, although part of pp cores in virt
+     * group maybe not be powered on.
+     */
+    if (MALI_TRUE == mali_group_is_virtual(group)) {
+        mali_bcast_reset(group->bcast_core);
+        mali_dlbu_update_mask(group->dlbu_core);
+    }
+}
+
+void mali_group_power_down(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Power down for %s\n",
+                mali_group_core_description(group)));
+
+    group->power_is_on = MALI_FALSE;
+
+    if (mali_group_is_virtual(group)) {
+        /*
+         * What we do for physical jobs in this function should
+         * already have been done in mali_group_deactivate()
+         * for virtual group.
+         */
+        MALI_DEBUG_ASSERT(NULL == group->session);
+    } else {
+        mali_group_clear_session(group);
+    }
+}
+
+MALI_DEBUG_CODE(static void mali_group_print_virtual(struct mali_group *vgroup)
+        {
+        u32 i;
+        struct mali_group *group;
+        struct mali_group *temp;
+
+        MALI_DEBUG_PRINT(4, ("Virtual group %s (%p)\n",
+                mali_group_core_description(vgroup),
+                vgroup));
+        MALI_DEBUG_PRINT(4, ("l2_cache_core[0] = %p, ref = %d\n", vgroup->l2_cache_core[0], vgroup->l2_cache_core_ref_count[0]));
+        MALI_DEBUG_PRINT(4, ("l2_cache_core[1] = %p, ref = %d\n", vgroup->l2_cache_core[1], vgroup->l2_cache_core_ref_count[1]));
+
+        i = 0;
+        _MALI_OSK_LIST_FOREACHENTRY(group, temp, &vgroup->group_list, struct mali_group, group_list) {
+        MALI_DEBUG_PRINT(4, ("[%d] %s (%p), l2_cache_core[0] = %p\n",
+                i, mali_group_core_description(group),
+                group, group->l2_cache_core[0]));
+        i++;
+        }
+        })
+
+/**
+ * @brief Add child group to virtual group parent
+ */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child)
+{
+    mali_bool found;
+    u32 i;
+
+    MALI_DEBUG_PRINT(3, ("Adding group %s to virtual group %s\n",
+                mali_group_core_description(child),
+                mali_group_core_description(parent)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+    MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+    MALI_DEBUG_ASSERT(NULL == child->parent_group);
+
+    _mali_osk_list_addtail(&child->group_list, &parent->group_list);
+
+    child->parent_group = parent;
+
+    MALI_DEBUG_ASSERT_POINTER(child->l2_cache_core[0]);
+
+    MALI_DEBUG_PRINT(4, ("parent->l2_cache_core: [0] = %p, [1] = %p\n", parent->l2_cache_core[0], parent->l2_cache_core[1]));
+    MALI_DEBUG_PRINT(4, ("child->l2_cache_core: [0] = %p, [1] = %p\n", child->l2_cache_core[0], child->l2_cache_core[1]));
+
+    /* Keep track of the L2 cache cores of child groups */
+    found = MALI_FALSE;
+    for (i = 0; i < 2; i++) {
+        if (parent->l2_cache_core[i] == child->l2_cache_core[0]) {
+            MALI_DEBUG_ASSERT(parent->l2_cache_core_ref_count[i] > 0);
+            parent->l2_cache_core_ref_count[i]++;
+            found = MALI_TRUE;
+        }
+    }
+
+    if (!found) {
+        /* First time we see this L2 cache, add it to our list */
+        i = (NULL == parent->l2_cache_core[0]) ? 0 : 1;
+
+        MALI_DEBUG_PRINT(4, ("First time we see l2_cache %p. Adding to [%d] = %p\n", child->l2_cache_core[0], i, parent->l2_cache_core[i]));
+
+        MALI_DEBUG_ASSERT(NULL == parent->l2_cache_core[i]);
+
+        parent->l2_cache_core[i] = child->l2_cache_core[0];
+        parent->l2_cache_core_ref_count[i]++;
+    }
+
+    /* Update Broadcast Unit and DLBU */
+    mali_bcast_add_group(parent->bcast_core, child);
+    mali_dlbu_add_group(parent->dlbu_core, child);
+
+    if (MALI_TRUE == parent->power_is_on) {
+        mali_bcast_reset(parent->bcast_core);
+        mali_dlbu_update_mask(parent->dlbu_core);
+    }
+
+    if (MALI_TRUE == child->power_is_on) {
+        if (NULL == parent->session) {
+            if (NULL != child->session) {
+                /*
+                 * Parent has no session, so clear
+                 * child session as well.
+                 */
+                mali_mmu_activate_empty_page_directory(child->mmu);
+            }
+        } else {
+            if (parent->session == child->session) {
+                /* We already have same session as parent,
+                 * so a simple zap should be enough.
+                 */
+                mali_mmu_zap_tlb(child->mmu);
+            } else {
+                /*
+                 * Parent has a different session, so we must
+                 * switch to that sessions page table
+                 */
+                mali_mmu_activate_page_directory(child->mmu, mali_session_get_page_directory(parent->session));
+            }
+
+            /* It is the parent which keeps the session from now on */
+            child->session = NULL;
+        }
+    } else {
+        /* should have been cleared when child was powered down */
+        MALI_DEBUG_ASSERT(NULL == child->session);
+    }
+
+    /* Start job on child when parent is active */
+    if (NULL != parent->pp_running_job) {
+        struct mali_pp_job *job = parent->pp_running_job;
+
+        MALI_DEBUG_PRINT(3, ("Group %x joining running job %d on virtual group %x\n",
+                    child, mali_pp_job_get_id(job), parent));
+
+        /* Only allowed to add active child to an active parent */
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == parent->state);
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == child->state);
+
+        mali_pp_job_start(child->pp_core, job, mali_pp_core_get_id(child->pp_core), MALI_TRUE);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_pp_core_description(group->pp_core),
+                sched_clock(), mali_pp_job_get_tid(job),
+                0, mali_pp_job_get_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+    }
+
+    MALI_DEBUG_CODE(mali_group_print_virtual(parent);)
+}
+
+/**
+ * @brief Remove child group from virtual group parent
+ */
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child)
+{
+    u32 i;
+
+    MALI_DEBUG_PRINT(3, ("Removing group %s from virtual group %s\n",
+                mali_group_core_description(child),
+                mali_group_core_description(parent)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+    MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+    MALI_DEBUG_ASSERT(parent == child->parent_group);
+
+    /* Update Broadcast Unit and DLBU */
+    mali_bcast_remove_group(parent->bcast_core, child);
+    mali_dlbu_remove_group(parent->dlbu_core, child);
+
+    if (MALI_TRUE == parent->power_is_on) {
+        mali_bcast_reset(parent->bcast_core);
+        mali_dlbu_update_mask(parent->dlbu_core);
+    }
+
+    child->session = parent->session;
+    child->parent_group = NULL;
+
+    _mali_osk_list_delinit(&child->group_list);
+    if (_mali_osk_list_empty(&parent->group_list)) {
+        parent->session = NULL;
+    }
+
+    /* Keep track of the L2 cache cores of child groups */
+    i = (child->l2_cache_core[0] == parent->l2_cache_core[0]) ? 0 : 1;
+
+    MALI_DEBUG_ASSERT(child->l2_cache_core[0] == parent->l2_cache_core[i]);
+
+    parent->l2_cache_core_ref_count[i]--;
+    if (parent->l2_cache_core_ref_count[i] == 0) {
+        parent->l2_cache_core[i] = NULL;
+    }
+
+    MALI_DEBUG_CODE(mali_group_print_virtual(parent));
+}
+
+struct mali_group *mali_group_acquire_group(struct mali_group *parent)
+{
+    struct mali_group *child = NULL;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+
+    if (!_mali_osk_list_empty(&parent->group_list)) {
+        child = _MALI_OSK_LIST_ENTRY(parent->group_list.prev, struct mali_group, group_list);
+        mali_group_remove_group(parent, child);
+    }
+
+    if (NULL != child) {
+        if (MALI_GROUP_STATE_ACTIVE != parent->state
+                && MALI_TRUE == child->power_is_on) {
+            mali_group_reset(child);
+        }
+    }
+
+    return child;
+}
+
+void mali_group_reset(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(NULL == group->gp_running_job);
+    MALI_DEBUG_ASSERT(NULL == group->pp_running_job);
+    MALI_DEBUG_ASSERT(NULL == group->session);
+
+    MALI_DEBUG_PRINT(3, ("Group: reset of %s\n",
+                mali_group_core_description(group)));
+
+    if (NULL != group->dlbu_core) {
+        mali_dlbu_reset(group->dlbu_core);
+    }
+
+    if (NULL != group->bcast_core) {
+        mali_bcast_reset(group->bcast_core);
+    }
+
+    MALI_DEBUG_ASSERT(NULL != group->mmu);
+    mali_group_reset_mmu(group);
+
+    if (NULL != group->gp_core) {
+        MALI_DEBUG_ASSERT(NULL == group->pp_core);
+        mali_gp_reset(group->gp_core);
+    } else {
+        MALI_DEBUG_ASSERT(NULL != group->pp_core);
+        mali_group_reset_pp(group);
+    }
+}
+
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job)
+{
+    struct mali_session_data *session;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Starting GP job 0x%08X on group %s\n",
+                job,
+                mali_group_core_description(group)));
+
+    session = mali_gp_job_get_session(job);
+
+    MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+    mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_gp_job_get_cache_order(job));
+
+    mali_group_activate_page_directory(group, session);
+
+    mali_gp_job_start(group->gp_core, job);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0) |
+            MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+            mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job), 0, 0, 0);
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+            mali_gp_job_get_pid(job), mali_gp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+    trace_mali_core_active(mali_gp_job_get_pid(job), 1 /* active */, 1 /* GP */,  0 /* core */,
+            mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+    if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+            (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+        mali_group_report_l2_cache_counters_per_core(group, 0);
+    }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+    trace_gpu_sched_switch(mali_gp_core_description(group->gp_core),
+            sched_clock(), mali_gp_job_get_tid(job),
+            0, mali_gp_job_get_id(job));
+#endif
+
+    group->gp_running_job = job;
+    group->is_working = MALI_TRUE;
+
+    /* Setup SW timer and record start time */
+    group->start_time = _mali_osk_time_tickcount();
+    _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+    MALI_DEBUG_PRINT(4, ("Group: Started GP job 0x%08X on group %s at %u\n",
+                job,
+                mali_group_core_description(group),
+                group->start_time));
+}
+
+/* Used to set all the registers except frame renderer list address and fragment shader stack address
+ * It means the caller must set these two registers properly before calling this function
+ */
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job)
+{
+    struct mali_session_data *session;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Starting PP job 0x%08X part %u/%u on group %s\n",
+                job, sub_job + 1,
+                mali_pp_job_get_sub_job_count(job),
+                mali_group_core_description(group)));
+
+    session = mali_pp_job_get_session(job);
+
+    if (NULL != group->l2_cache_core[0]) {
+        mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_pp_job_get_cache_order(job));
+    }
+
+    if (NULL != group->l2_cache_core[1]) {
+        mali_l2_cache_invalidate_conditional(group->l2_cache_core[1], mali_pp_job_get_cache_order(job));
+    }
+
+    mali_group_activate_page_directory(group, session);
+
+    if (mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+        u32 core_num = 0;
+
+        MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+        /* Configure DLBU for the job */
+        mali_dlbu_config_job(group->dlbu_core, job);
+
+        /* Write stack address for each child group */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            mali_pp_write_addr_stack(child->pp_core, job);
+            core_num++;
+        }
+
+        mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+    } else {
+        mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+    }
+
+    /* if the group is virtual, loop through physical groups which belong to this group
+     * and call profiling events for its cores as virtual */
+    if (MALI_TRUE == mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                    mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                    mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+            trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                    mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+        }
+
+#if defined(CONFIG_MALI400_PROFILING)
+        if (0 != group->l2_cache_core_ref_count[0]) {
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+            }
+        }
+        if (0 != group->l2_cache_core_ref_count[1]) {
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+            }
+        }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+    } else { /* group is physical - call profiling events for physical cores */
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+                mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+            mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+        }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+    }
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+    trace_gpu_sched_switch(mali_pp_core_description(group->pp_core),
+            sched_clock(), mali_pp_job_get_tid(job),
+            0, mali_pp_job_get_id(job));
+#endif
+
+    group->pp_running_job = job;
+    group->pp_running_sub_job = sub_job;
+    group->is_working = MALI_TRUE;
+
+    /* Setup SW timer and record start time */
+    group->start_time = _mali_osk_time_tickcount();
+    _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+    MALI_DEBUG_PRINT(4, ("Group: Started PP job 0x%08X part %u/%u on group %s at %u\n",
+                job, sub_job + 1,
+                mali_pp_job_get_sub_job_count(job),
+                mali_group_core_description(group),
+                group->start_time));
+
+}
+
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr)
+{
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+    mali_l2_cache_invalidate(group->l2_cache_core[0]);
+
+    mali_mmu_zap_tlb_without_stall(group->mmu);
+
+    mali_gp_resume_with_new_heap(group->gp_core, start_addr, end_addr);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+            0, 0, 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+    trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 1 /* active */, 1 /* GP */,  0 /* core */,
+            mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+}
+
+static void mali_group_reset_mmu(struct mali_group *group)
+{
+    struct mali_group *child;
+    struct mali_group *temp;
+    _mali_osk_errcode_t err;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (!mali_group_is_virtual(group)) {
+        /* This is a physical group or an idle virtual group -- simply wait for
+         * the reset to complete. */
+        err = mali_mmu_reset(group->mmu);
+        MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+    } else { /* virtual group */
+        /* Loop through all members of this virtual group and wait
+         * until they are done resetting.
+         */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            err = mali_mmu_reset(child->mmu);
+            MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+        }
+    }
+}
+
+static void mali_group_reset_pp(struct mali_group *group)
+{
+    struct mali_group *child;
+    struct mali_group *temp;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    mali_pp_reset_async(group->pp_core);
+
+    if (!mali_group_is_virtual(group) || NULL == group->pp_running_job) {
+        /* This is a physical group or an idle virtual group -- simply wait for
+         * the reset to complete. */
+        mali_pp_reset_wait(group->pp_core);
+    } else {
+        /* Loop through all members of this virtual group and wait until they
+         * are done resetting.
+         */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            mali_pp_reset_wait(child->pp_core);
+        }
+    }
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job)
+{
+    struct mali_pp_job *pp_job_to_return;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_running_job);
+    MALI_DEBUG_ASSERT_POINTER(sub_job);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working);
+
+    /* Stop/clear the timeout timer. */
+    _mali_osk_timer_del_async(group->timeout_timer);
+
+    if (NULL != group->pp_running_job) {
+
+        /* Deal with HW counters and profiling */
+
+        if (MALI_TRUE == mali_group_is_virtual(group)) {
+            struct mali_group *child;
+            struct mali_group *temp;
+
+            /* update performance counters from each physical pp core within this virtual group */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                mali_pp_update_performance_counters(group->pp_core, child->pp_core, group->pp_running_job, mali_pp_core_get_id(child->pp_core));
+            }
+
+#if defined(CONFIG_MALI400_PROFILING)
+            /* send profiling data per physical core */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                        MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                        MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                        mali_pp_job_get_perf_counter_value0(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+                        mali_pp_job_get_perf_counter_value1(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+                        mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+                        0, 0);
+
+                trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+                        0 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                        mali_pp_job_get_frame_builder_id(group->pp_running_job),
+                        mali_pp_job_get_flush_id(group->pp_running_job));
+            }
+            if (0 != group->l2_cache_core_ref_count[0]) {
+                if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                        (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+                }
+            }
+            if (0 != group->l2_cache_core_ref_count[1]) {
+                if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) &&
+                        (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) {
+                    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+                }
+            }
+
+#endif
+        } else {
+            /* update performance counters for a physical group's pp core */
+            mali_pp_update_performance_counters(group->pp_core, group->pp_core, group->pp_running_job, group->pp_running_sub_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+                    mali_pp_job_get_perf_counter_value0(group->pp_running_job, group->pp_running_sub_job),
+                    mali_pp_job_get_perf_counter_value1(group->pp_running_job, group->pp_running_sub_job),
+                    mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+                    0, 0);
+
+            trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+                    0 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+                    mali_pp_job_get_frame_builder_id(group->pp_running_job),
+                    mali_pp_job_get_flush_id(group->pp_running_job));
+
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+            }
+#endif
+        }
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_gp_core_description(group->gp_core),
+                sched_clock(), 0, 0, 0);
+#endif
+
+    }
+
+    if (success) {
+        /* Only do soft reset for successful jobs, a full recovery
+         * reset will be done for failed jobs. */
+        mali_pp_reset_async(group->pp_core);
+    }
+
+    pp_job_to_return = group->pp_running_job;
+    group->pp_running_job = NULL;
+    group->is_working = MALI_FALSE;
+    *sub_job = group->pp_running_sub_job;
+
+    if (!success) {
+        MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+        mali_group_recovery_reset(group);
+    } else if (_MALI_OSK_ERR_OK != mali_pp_reset_wait(group->pp_core)) {
+        MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+        mali_group_recovery_reset(group);
+    }
+
+    return pp_job_to_return;
+}
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success)
+{
+    struct mali_gp_job *gp_job_to_return;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_running_job);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working);
+
+    /* Stop/clear the timeout timer. */
+    _mali_osk_timer_del_async(group->timeout_timer);
+
+    if (NULL != group->gp_running_job) {
+        mali_gp_update_performance_counters(group->gp_core, group->gp_running_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+                mali_gp_job_get_perf_counter_value0(group->gp_running_job),
+                mali_gp_job_get_perf_counter_value1(group->gp_running_job),
+                mali_gp_job_get_perf_counter_src0(group->gp_running_job) | (mali_gp_job_get_perf_counter_src1(group->gp_running_job) << 8),
+                0, 0);
+
+        if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0])))
+            mali_group_report_l2_cache_counters_per_core(group, 0);
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_pp_core_description(group->pp_core),
+                sched_clock(), 0, 0, 0);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 0 /* active */, 1 /* GP */,  0 /* core */,
+                mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+
+        mali_gp_job_set_current_heap_addr(group->gp_running_job,
+                mali_gp_read_plbu_alloc_start_addr(group->gp_core));
+    }
+
+    if (success) {
+        /* Only do soft reset for successful jobs, a full recovery
+         * reset will be done for failed jobs. */
+        mali_gp_reset_async(group->gp_core);
+    }
+
+    gp_job_to_return = group->gp_running_job;
+    group->gp_running_job = NULL;
+    group->is_working = MALI_FALSE;
+
+    if (!success) {
+        MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+        mali_group_recovery_reset(group);
+    } else if (_MALI_OSK_ERR_OK != mali_gp_reset_wait(group->gp_core)) {
+        MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+        mali_group_recovery_reset(group);
+    }
+
+    return gp_job_to_return;
+}
+
+struct mali_group *mali_group_get_glob_group(u32 index)
+{
+    if (mali_global_num_groups > index) {
+        return mali_global_groups[index];
+    }
+
+    return NULL;
+}
+
+u32 mali_group_get_glob_num_groups(void)
+{
+    return mali_global_num_groups;
+}
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session)
+{
+    MALI_DEBUG_PRINT(5, ("Mali group: Activating page directory 0x%08X from session 0x%08X on group %s\n",
+                mali_session_get_page_directory(session), session,
+                mali_group_core_description(group)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (group->session != session) {
+        /* Different session than last time, so we need to do some work */
+        MALI_DEBUG_PRINT(5, ("Mali group: Activate session: %08x previous: %08x on group %s\n",
+                    session, group->session,
+                    mali_group_core_description(group)));
+        mali_mmu_activate_page_directory(group->mmu, mali_session_get_page_directory(session));
+        group->session = session;
+    } else {
+        /* Same session as last time, so no work required */
+        MALI_DEBUG_PRINT(4, ("Mali group: Activate existing session 0x%08X on group %s\n",
+                    session->page_directory,
+                    mali_group_core_description(group)));
+        mali_mmu_zap_tlb_without_stall(group->mmu);
+    }
+}
+
+static void mali_group_recovery_reset(struct mali_group *group)
+{
+    _mali_osk_errcode_t err;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    /* Stop cores, bus stop */
+    if (NULL != group->pp_core) {
+        mali_pp_stop_bus(group->pp_core);
+    } else {
+        mali_gp_stop_bus(group->gp_core);
+    }
+
+    /* Flush MMU and clear page fault (if any) */
+    mali_mmu_activate_fault_flush_page_directory(group->mmu);
+    mali_mmu_page_fault_done(group->mmu);
+
+    /* Wait for cores to stop bus, then do a hard reset on them */
+    if (NULL != group->pp_core) {
+        if (mali_group_is_virtual(group)) {
+            struct mali_group *child, *temp;
+
+            /* Disable the broadcast unit while we do reset directly on the member cores. */
+            mali_bcast_disable(group->bcast_core);
+
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                mali_pp_stop_bus_wait(child->pp_core);
+                mali_pp_hard_reset(child->pp_core);
+            }
+
+            mali_bcast_enable(group->bcast_core);
+        } else {
+            mali_pp_stop_bus_wait(group->pp_core);
+            mali_pp_hard_reset(group->pp_core);
+        }
+    } else {
+        mali_gp_stop_bus_wait(group->gp_core);
+        mali_gp_hard_reset(group->gp_core);
+    }
+
+    /* Reset MMU */
+    err = mali_mmu_reset(group->mmu);
+    MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+    MALI_IGNORE(err);
+
+    group->session = NULL;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size)
+{
+    int n = 0;
+    int i;
+    struct mali_group *child;
+    struct mali_group *temp;
+
+    if (mali_group_is_virtual(group)) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Virtual PP Group: %p\n", group);
+    } else if (mali_group_is_in_virtual(group)) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Child PP Group: %p\n", group);
+    } else if (NULL != group->pp_core) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Physical PP Group: %p\n", group);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "GP Group: %p\n", group);
+    }
+
+    switch (group->state) {
+        case MALI_GROUP_STATE_INACTIVE:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: INACTIVE\n");
+            break;
+        case MALI_GROUP_STATE_ACTIVATION_PENDING:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: ACTIVATION_PENDING\n");
+            break;
+        case MALI_GROUP_STATE_ACTIVE:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: MALI_GROUP_STATE_ACTIVE\n");
+            break;
+        default:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: UNKNOWN (%d)\n", group->state);
+            MALI_DEBUG_ASSERT(0);
+            break;
+    }
+
+    n += _mali_osk_snprintf(buf + n, size - n,
+            "\tSW power: %s\n",
+            group->power_is_on ? "On" : "Off");
+
+    n += mali_pm_dump_state_domain(group->pm_domain, buf + n, size - n);
+
+    for (i = 0; i < 2; i++) {
+        if (NULL != group->l2_cache_core[i]) {
+            struct mali_pm_domain *domain;
+            domain = mali_l2_cache_get_pm_domain(
+                    group->l2_cache_core[i]);
+            n += mali_pm_dump_state_domain(domain,
+                    buf + n, size - n);
+        }
+    }
+
+    if (group->gp_core) {
+        n += mali_gp_dump_state(group->gp_core, buf + n, size - n);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "\tGP running job: %p\n", group->gp_running_job);
+    }
+
+    if (group->pp_core) {
+        n += mali_pp_dump_state(group->pp_core, buf + n, size - n);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "\tPP running job: %p, subjob %d \n",
+                group->pp_running_job,
+                group->pp_running_sub_job);
+    }
+
+    _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+            struct mali_group, group_list) {
+        n += mali_group_dump_state(child, buf + n, size - n);
+    }
+
+    return n;
+}
+#endif
+
+/* Kasin added. */
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+#include <platform/meson_m400/mali_fix.h>
+#define INT_MALI_PP2_MMU ( 6+32)
+struct _mali_osk_irq_t_struct;
+u32 get_irqnum(struct _mali_osk_irq_t_struct* irq);
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_mmu_core *mmu = group->mmu;
+#endif
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (MALI_FALSE == group->power_is_on)
+        MALI_SUCCESS;
+    if (get_irqnum(mmu->irq) == INT_MALI_PP2_MMU)
+    {
+        if (group == NULL || group->pp_core == NULL)
+            MALI_SUCCESS;
+        if (group->pp_core->core_id == 0) {
+            if (malifix_get_mmu_int_process_state(0) == MMU_INT_HIT)
+                malifix_set_mmu_int_process_state(0, MMU_INT_TOP);
+            else
+                MALI_SUCCESS;
+        }
+        else if (group->pp_core->core_id == 1) {
+            if (malifix_get_mmu_int_process_state(1) == MMU_INT_HIT)
+                malifix_set_mmu_int_process_state(1, MMU_INT_TOP);
+            else
+                MALI_SUCCESS;
+        } else
+            MALI_SUCCESS;
+    }
+#endif
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+
+    ret = mali_executor_interrupt_mmu(group, MALI_TRUE);
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+
+    return ret;
+}
+
+static void mali_group_bottom_half_mmu(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_mmu_core *mmu = group->mmu;
+#endif
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+
+    mali_executor_interrupt_mmu(group, MALI_FALSE);
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (get_irqnum(mmu->irq) == INT_MALI_PP2_MMU)
+    {
+        if (group->pp_core->core_id == 0) {
+            if (malifix_get_mmu_int_process_state(0) == MMU_INT_TOP)
+                malifix_set_mmu_int_process_state(0, MMU_INT_NONE);
+        }
+        else if (group->pp_core->core_id == 1) {
+            if (malifix_get_mmu_int_process_state(1) == MMU_INT_TOP)
+                malifix_set_mmu_int_process_state(1, MMU_INT_NONE);
+        }
+    }
+#endif
+}
+
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+
+    MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+                mali_gp_get_rawstat(group->gp_core),
+                mali_group_core_description(group)));
+
+    ret = mali_executor_interrupt_gp(group, MALI_TRUE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+
+    return ret;
+}
+
+static void mali_group_bottom_half_gp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+
+    mali_executor_interrupt_gp(group, MALI_FALSE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+}
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+int PP0_int_cnt = 0;
+int mali_PP0_int_cnt(void)
+{
+    return PP0_int_cnt;
+}
+EXPORT_SYMBOL(mali_PP0_int_cnt);
+
+int PP1_int_cnt = 0;
+int mali_PP1_int_cnt(void)
+{
+    return PP1_int_cnt;
+}
+EXPORT_SYMBOL(mali_PP1_int_cnt);
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_pp_core *core = group->pp_core;
+#endif
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+    MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+                mali_pp_get_rawstat(group->pp_core),
+                mali_group_core_description(group)));
+
+    ret = mali_executor_interrupt_pp(group, MALI_TRUE);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (core->core_id == 0)
+        PP0_int_cnt++;
+    else if (core->core_id == 1)
+        PP1_int_cnt++;
+#endif
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+    return ret;
+}
+
+static void mali_group_bottom_half_pp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+    mali_executor_interrupt_pp(group, MALI_FALSE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+}
+
+static void mali_group_timeout(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+    MALI_DEBUG_ASSERT_POINTER(group);
+
+    MALI_DEBUG_PRINT(2, ("Group: timeout handler for %s at %u\n",
+                mali_group_core_description(group),
+                _mali_osk_time_tickcount()));
+
+    if (mali_core_timeout < 65533)
+        mali_core_timeout++;
+    if (NULL != group->gp_core) {
+        mali_group_schedule_bottom_half_gp(group);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        mali_group_schedule_bottom_half_pp(group);
+    }
+}
+
+mali_bool mali_group_zap_session(struct mali_group *group,
+        struct mali_session_data *session)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(session);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (group->session != session) {
+        /* not running from this session */
+        return MALI_TRUE; /* success */
+    }
+
+    if (group->is_working) {
+        /* The Zap also does the stall and disable_stall */
+        mali_bool zap_success = mali_mmu_zap_tlb(group->mmu);
+        return zap_success;
+    } else {
+        /* Just remove the session instead of zapping */
+        mali_group_clear_session(group);
+        return MALI_TRUE; /* success */
+    }
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num)
+{
+    u32 source0 = 0;
+    u32 value0 = 0;
+    u32 source1 = 0;
+    u32 value1 = 0;
+    u32 profiling_channel = 0;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    switch (core_num) {
+        case 0:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+            break;
+        case 1:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS;
+            break;
+        case 2:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS;
+            break;
+        default:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+            break;
+    }
+
+    if (0 == core_num) {
+        mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+    }
+    if (1 == core_num) {
+        if (1 == mali_l2_cache_get_id(group->l2_cache_core[0])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+        } else if (1 == mali_l2_cache_get_id(group->l2_cache_core[1])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+        }
+    }
+    if (2 == core_num) {
+        if (2 == mali_l2_cache_get_id(group->l2_cache_core[0])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+        } else if (2 == mali_l2_cache_get_id(group->l2_cache_core[1])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+        }
+    }
+
+    _mali_osk_profiling_add_event(profiling_channel, source1 << 8 | source0, value0, value1, 0, 0);
+}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
diff --git a/utgard/r5p1/common/mali_group.h b/utgard/r5p1/common/mali_group.h
new file mode 100755
index 0000000..9f9fd6b
--- /dev/null
+++ b/utgard/r5p1/common/mali_group.h
@@ -0,0 +1,457 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GROUP_H__
+#define __MALI_GROUP_H__
+
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_mmu.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_session.h"
+#include "mali_osk_profiling.h"
+
+/**
+ * @brief Default max runtime [ms] for a core job - used by timeout timers
+ */
+#define MALI_MAX_JOB_RUNTIME_DEFAULT 5000
+
+extern int mali_max_job_runtime;
+
+#define MALI_MAX_NUMBER_OF_GROUPS 10
+#define MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS 8
+
+enum mali_group_state {
+	MALI_GROUP_STATE_INACTIVE,
+	MALI_GROUP_STATE_ACTIVATION_PENDING,
+	MALI_GROUP_STATE_ACTIVE,
+};
+
+/**
+ * The structure represents a render group
+ * A render group is defined by all the cores that share the same Mali MMU
+ */
+
+struct mali_group {
+	struct mali_mmu_core        *mmu;
+	struct mali_session_data    *session;
+
+	enum mali_group_state        state;
+	mali_bool                    power_is_on;
+
+	mali_bool                    is_working;
+	unsigned long                start_time; /* in ticks */
+
+	struct mali_gp_core         *gp_core;
+	struct mali_gp_job          *gp_running_job;
+
+	struct mali_pp_core         *pp_core;
+	struct mali_pp_job          *pp_running_job;
+	u32                         pp_running_sub_job;
+
+	struct mali_pm_domain       *pm_domain;
+
+	struct mali_l2_cache_core   *l2_cache_core[2];
+	u32                         l2_cache_core_ref_count[2];
+
+	/* Parent virtual group (if any) */
+	struct mali_group           *parent_group;
+
+	struct mali_dlbu_core       *dlbu_core;
+	struct mali_bcast_unit      *bcast_core;
+
+	/* Used for working groups which needs to be disabled */
+	mali_bool                    disable_requested;
+
+	/* Used by group to link child groups (for virtual group) */
+	_mali_osk_list_t            group_list;
+
+	/* Used by executor module in order to link groups of same state */
+	_mali_osk_list_t            executor_list;
+
+	/* Used by PM domains to link groups of same domain */
+	_mali_osk_list_t             pm_domain_list;
+
+	_mali_osk_wq_work_t         *bottom_half_work_mmu;
+	_mali_osk_wq_work_t         *bottom_half_work_gp;
+	_mali_osk_wq_work_t         *bottom_half_work_pp;
+
+	_mali_osk_timer_t           *timeout_timer;
+};
+
+/** @brief Create a new Mali group object
+ *
+ * @return A pointer to a new group object
+ */
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+				     struct mali_dlbu_core *dlbu,
+				     struct mali_bcast_unit *bcast,
+				     u32 domain_index);
+
+void mali_group_delete(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group,
+		struct mali_mmu_core *mmu_core);
+void mali_group_remove_mmu_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group,
+		struct mali_gp_core *gp_core);
+void mali_group_remove_gp_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group,
+		struct mali_pp_core *pp_core);
+void mali_group_remove_pp_core(struct mali_group *group);
+
+MALI_STATIC_INLINE const char *mali_group_core_description(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	if (NULL != group->pp_core) {
+		return mali_pp_core_description(group->pp_core);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+		return mali_gp_core_description(group->gp_core);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_is_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+#if defined(CONFIG_MALI450)
+	return (NULL != group->dlbu_core);
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Check if a group is a part of a virtual group or not
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_in_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+#if defined(CONFIG_MALI450)
+	return (NULL != group->parent_group) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Reset group
+ *
+ * This function will reset the entire group,
+ * including all the cores present in the group.
+ *
+ * @param group Pointer to the group to reset
+ */
+void mali_group_reset(struct mali_group *group);
+
+MALI_STATIC_INLINE struct mali_session_data *mali_group_get_session(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return group->session;
+}
+
+MALI_STATIC_INLINE void mali_group_clear_session(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (NULL != group->session) {
+		mali_mmu_activate_empty_page_directory(group->mmu);
+		group->session = NULL;
+	}
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group);
+
+/*
+ * Change state from ACTIVATION_PENDING to ACTIVE
+ * For virtual group, all childs need to be ACTIVE first
+ */
+mali_bool mali_group_set_active(struct mali_group *group);
+
+/*
+ * @return MALI_TRUE means one or more domains can now be powered off,
+ * and caller should call either mali_pm_update_async() or
+ * mali_pm_update_sync() in order to do so.
+ */
+mali_bool mali_group_deactivate(struct mali_group *group);
+
+MALI_STATIC_INLINE enum mali_group_state mali_group_get_state(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->state;
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_power_is_on(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->power_is_on;
+}
+
+void mali_group_power_up(struct mali_group *group);
+void mali_group_power_down(struct mali_group *group);
+
+MALI_STATIC_INLINE void mali_group_set_disable_request(
+	struct mali_group *group, mali_bool disable)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	group->disable_requested = disable;
+
+	/**
+	 * When one of child group's disable_requeset is set TRUE, then
+	 * the disable_request of parent group should also be set to TRUE.
+	 * While, the disable_request of parent group should only be set to FALSE
+	 * only when all of its child group's disable_request are set to FALSE.
+	 */
+	if (NULL != group->parent_group && MALI_TRUE == disable) {
+		group->parent_group->disable_requested = disable;
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_disable_requested(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->disable_requested;
+}
+
+/** @brief Virtual groups */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child);
+struct mali_group *mali_group_acquire_group(struct mali_group *parent);
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child);
+
+/** @brief Checks if the group is working.
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_working(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	if (mali_group_is_in_virtual(group)) {
+		struct mali_group *tmp_group = mali_executor_get_virtual_group();
+		return tmp_group->is_working;
+	}
+	return group->is_working;
+}
+
+MALI_STATIC_INLINE struct mali_gp_job *mali_group_get_running_gp_job(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->gp_running_job;
+}
+
+/** @brief Zap MMU TLB on all groups
+ *
+ * Zap TLB on group if \a session is active.
+ */
+mali_bool mali_group_zap_session(struct mali_group *group,
+				 struct mali_session_data *session);
+
+/** @brief Get pointer to GP core object
+ */
+MALI_STATIC_INLINE struct mali_gp_core *mali_group_get_gp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->gp_core;
+}
+
+/** @brief Get pointer to PP core object
+ */
+MALI_STATIC_INLINE struct mali_pp_core *mali_group_get_pp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->pp_core;
+}
+
+/** @brief Start GP job
+ */
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job);
+
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job);
+
+/** @brief Start virtual group Job on a virtual group
+*/
+void mali_group_start_job_on_virtual(struct mali_group *group, struct mali_pp_job *job, u32 first_subjob, u32 last_subjob);
+
+
+/** @brief Start a subjob from a particular on a specific PP group
+*/
+void mali_group_start_job_on_group(struct mali_group *group, struct mali_pp_job *job, u32 subjob);
+
+
+/** @brief remove all the unused groups in tmp_unused group  list, so that the group is in consistent status.
+ */
+void mali_group_non_dlbu_job_done_virtual(struct mali_group *group);
+
+
+/** @brief Resume GP job that suspended waiting for more heap memory
+ */
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_get_interrupt_result(group->gp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_get_interrupt_result(group->pp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_mmu_get_interrupt_result(group->mmu);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_gp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_is_active(group->gp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_pp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_is_active(group->pp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_has_timed_out(struct mali_group *group)
+{
+	unsigned long time_cost;
+	struct mali_group *tmp_group = group;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* if the group is in virtual need to use virtual_group's start time */
+	if (mali_group_is_in_virtual(group)) {
+		tmp_group = mali_executor_get_virtual_group();
+	}
+
+	time_cost = _mali_osk_time_tickcount() - tmp_group->start_time;
+	if (_mali_osk_time_mstoticks(mali_max_job_runtime) <= time_cost) {
+		/*
+		 * current tick is at or after timeout end time,
+		 * so this is a valid timeout
+		 */
+		return MALI_TRUE;
+	} else {
+		/*
+		 * Not a valid timeout. A HW interrupt probably beat
+		 * us to it, and the timer wasn't properly deleted
+		 * (async deletion used due to atomic context).
+		 */
+		return MALI_FALSE;
+	}
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_mask_all_interrupts(group->gp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_mask_all_interrupts(group->pp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_enable_interrupts_gp(
+	struct mali_group *group,
+	enum mali_interrupt_result exceptions)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	mali_gp_enable_interrupts(group->gp_core, exceptions);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_gp);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_pp);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_mmu);
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job);
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success);
+
+#if defined(CONFIG_MALI400_PROFILING)
+MALI_STATIC_INLINE void mali_group_oom(struct mali_group *group)
+{
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+				      MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+				      0, 0, 0, 0, 0);
+}
+#endif
+
+struct mali_group *mali_group_get_glob_group(u32 index);
+u32 mali_group_get_glob_num_groups(void);
+
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size);
+
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data);
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data);
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data);
+
+MALI_STATIC_INLINE mali_bool mali_group_is_empty(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(mali_group_is_virtual(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return _mali_osk_list_empty(&group->group_list);
+}
+
+#endif /* __MALI_GROUP_H__ */
diff --git a/utgard/r5p1/common/mali_hw_core.c b/utgard/r5p1/common/mali_hw_core.c
new file mode 100755
index 0000000..c90cf38
--- /dev/null
+++ b/utgard/r5p1/common/mali_hw_core.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_osk_mali.h"
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size)
+{
+	core->phys_addr = resource->base;
+	core->phys_offset = resource->base - _mali_osk_resource_base_address();
+	core->description = resource->description;
+	core->size = reg_size;
+
+	MALI_DEBUG_ASSERT(core->phys_offset < core->phys_addr);
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_mem_reqregion(core->phys_addr, core->size, core->description)) {
+		core->mapped_registers = _mali_osk_mem_mapioregion(core->phys_addr, core->size, core->description);
+		if (NULL != core->mapped_registers) {
+			return _MALI_OSK_ERR_OK;
+		} else {
+			MALI_PRINT_ERROR(("Failed to map memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+		}
+		_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+	} else {
+		MALI_PRINT_ERROR(("Failed to request memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_hw_core_delete(struct mali_hw_core *core)
+{
+	if (NULL != core->mapped_registers) {
+		_mali_osk_mem_unmapioregion(core->phys_addr, core->size, core->mapped_registers);
+		core->mapped_registers = NULL;
+	}
+	_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+}
diff --git a/utgard/r5p1/common/mali_hw_core.h b/utgard/r5p1/common/mali_hw_core.h
new file mode 100755
index 0000000..9fbac23
--- /dev/null
+++ b/utgard/r5p1/common/mali_hw_core.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_HW_CORE_H__
+#define __MALI_HW_CORE_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * The common parts for all Mali HW cores (GP, PP, MMU, L2 and PMU)
+ * This struct is embedded inside all core specific structs.
+ */
+struct mali_hw_core {
+	uintptr_t phys_addr;              /**< Physical address of the registers */
+	u32 phys_offset;                  /**< Offset from start of Mali to registers */
+	u32 size;                         /**< Size of registers */
+	mali_io_address mapped_registers; /**< Virtual mapping of the registers */
+	const char *description;          /**< Name of unit (as specified in device configuration) */
+};
+
+#define MALI_REG_POLL_COUNT_FAST 1000
+#define MALI_REG_POLL_COUNT_SLOW 1000000
+
+/*
+ * GP and PP core translate their int_stat/rawstat into one of these
+ */
+enum mali_interrupt_result {
+	MALI_INTERRUPT_RESULT_NONE,
+	MALI_INTERRUPT_RESULT_SUCCESS,
+	MALI_INTERRUPT_RESULT_SUCCESS_VS,
+	MALI_INTERRUPT_RESULT_SUCCESS_PLBU,
+	MALI_INTERRUPT_RESULT_OOM,
+	MALI_INTERRUPT_RESULT_ERROR
+};
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size);
+void mali_hw_core_delete(struct mali_hw_core *core);
+
+MALI_STATIC_INLINE u32 mali_hw_core_register_read(struct mali_hw_core *core, u32 relative_address)
+{
+	u32 read_val;
+	read_val = _mali_osk_mem_ioread32(core->mapped_registers, relative_address);
+	MALI_DEBUG_PRINT(6, ("register_read for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, read_val));
+	return read_val;
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+}
+
+/* Conditionally write a register.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 new_val, const u32 old_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	if (old_val != new_val) {
+		_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+	}
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32(core->mapped_registers, relative_address, new_val);
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs)
+{
+	u32 i;
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+	}
+}
+
+/* Conditionally write a set of registers.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs, const u32 *old_array)
+{
+	u32 i;
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		if (old_array[i] != write_array[i]) {
+			mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+		}
+	}
+}
+
+#endif /* __MALI_HW_CORE_H__ */
diff --git a/utgard/r5p1/common/mali_kernel_common.h b/utgard/r5p1/common/mali_kernel_common.h
new file mode 100755
index 0000000..990cf3a
--- /dev/null
+++ b/utgard/r5p1/common/mali_kernel_common.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_COMMON_H__
+#define __MALI_KERNEL_COMMON_H__
+
+#include "mali_osk.h"
+
+/* Make sure debug is defined when it should be */
+#ifndef DEBUG
+#if defined(_DEBUG)
+#define DEBUG
+#endif
+#endif
+
+/* The file include several useful macros for error checking, debugging and printing.
+ * - MALI_PRINTF(...)           Do not use this function: Will be included in Release builds.
+ * - MALI_DEBUG_PRINT(nr, (X) ) Prints the second argument if nr<=MALI_DEBUG_LEVEL.
+ * - MALI_DEBUG_ERROR( (X) )    Prints an errortext, a source trace, and the given error message.
+ * - MALI_DEBUG_ASSERT(exp,(X)) If the asserted expr is false, the program will exit.
+ * - MALI_DEBUG_ASSERT_POINTER(pointer)  Triggers if the pointer is a zero pointer.
+ * - MALI_DEBUG_CODE( X )       The code inside the macro is only compiled in Debug builds.
+ *
+ * The (X) means that you must add an extra parenthesis around the argumentlist.
+ *
+ * The  printf function: MALI_PRINTF(...) is routed to _mali_osk_debugmsg
+ *
+ * Suggested range for the DEBUG-LEVEL is [1:6] where
+ * [1:2] Is messages with highest priority, indicate possible errors.
+ * [3:4] Is messages with medium priority, output important variables.
+ * [5:6] Is messages with low priority, used during extensive debugging.
+ */
+
+/**
+*  Fundamental error macro. Reports an error code. This is abstracted to allow us to
+*  easily switch to a different error reporting method if we want, and also to allow
+*  us to search for error returns easily.
+*
+*  Note no closing semicolon - this is supplied in typical usage:
+*
+*  MALI_ERROR(MALI_ERROR_OUT_OF_MEMORY);
+*/
+#define MALI_ERROR(error_code) return (error_code)
+
+/**
+ *  Basic error macro, to indicate success.
+ *  Note no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_SUCCESS;
+ */
+#define MALI_SUCCESS MALI_ERROR(_MALI_OSK_ERR_OK)
+
+/**
+ *  Basic error macro. This checks whether the given condition is true, and if not returns
+ *  from this function with the supplied error code. This is a macro so that we can override it
+ *  for stress testing.
+ *
+ *  Note that this uses the do-while-0 wrapping to ensure that we don't get problems with dangling
+ *  else clauses. Note also no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_CHECK((p!=NULL), ERROR_NO_OBJECT);
+ */
+#define MALI_CHECK(condition, error_code) do { if(!(condition)) MALI_ERROR(error_code); } while(0)
+
+/**
+ *  Error propagation macro. If the expression given is anything other than
+ *  _MALI_OSK_NO_ERROR, then the value is returned from the enclosing function
+ *  as an error code. This effectively acts as a guard clause, and propagates
+ *  error values up the call stack. This uses a temporary value to ensure that
+ *  the error expression is not evaluated twice.
+ *  If the counter for forcing a failure has been set using _mali_force_error,
+ *  this error will be returned without evaluating the expression in
+ *  MALI_CHECK_NO_ERROR
+ */
+#define MALI_CHECK_NO_ERROR(expression) \
+	do { _mali_osk_errcode_t _check_no_error_result=(expression); \
+		if(_check_no_error_result != _MALI_OSK_ERR_OK) \
+			MALI_ERROR(_check_no_error_result); \
+	} while(0)
+
+/**
+ *  Pointer check macro. Checks non-null pointer.
+ */
+#define MALI_CHECK_NON_NULL(pointer, error_code) MALI_CHECK( ((pointer)!=NULL), (error_code) )
+
+/**
+ *  Error macro with goto. This checks whether the given condition is true, and if not jumps
+ *  to the specified label using a goto. The label must therefore be local to the function in
+ *  which this macro appears. This is most usually used to execute some clean-up code before
+ *  exiting with a call to ERROR.
+ *
+ *  Like the other macros, this is a macro to allow us to override the condition if we wish,
+ *  e.g. to force an error during stress testing.
+ */
+#define MALI_CHECK_GOTO(condition, label) do { if(!(condition)) goto label; } while(0)
+
+/**
+ *  Explicitly ignore a parameter passed into a function, to suppress compiler warnings.
+ *  Should only be used with parameter names.
+ */
+#define MALI_IGNORE(x) x=x
+
+#if defined(CONFIG_MALI_QUIET)
+#define MALI_PRINTF(args)
+#else
+#define MALI_PRINTF(args) _mali_osk_dbgmsg args;
+#endif
+
+#define MALI_PRINT_ERROR(args) do{ \
+		MALI_PRINTF(("Mali: ERR: %s\n" ,__FILE__)); \
+		MALI_PRINTF(("           %s()%4d\n           ", __FUNCTION__, __LINE__)) ; \
+		MALI_PRINTF(args); \
+		MALI_PRINTF(("\n")); \
+	} while(0)
+
+#define MALI_PRINT(args) do{ \
+		MALI_PRINTF(("Mali: ")); \
+		MALI_PRINTF(args); \
+	} while (0)
+
+#ifdef DEBUG
+#ifndef mali_debug_level
+extern int mali_debug_level;
+#endif
+
+#define MALI_DEBUG_CODE(code) code
+#define MALI_DEBUG_PRINT(level, args)  do { \
+		if((level) <=  mali_debug_level)\
+		{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } \
+	} while (0)
+
+#define MALI_DEBUG_PRINT_ERROR(args) MALI_PRINT_ERROR(args)
+
+#define MALI_DEBUG_PRINT_IF(level,condition,args)  \
+	if((condition)&&((level) <=  mali_debug_level))\
+	{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+#define MALI_DEBUG_PRINT_ELSE(level, args)\
+	else if((level) <=  mali_debug_level)\
+	{ MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+/**
+ * @note these variants of DEBUG ASSERTS will cause a debugger breakpoint
+ * to be entered (see _mali_osk_break() ). An alternative would be to call
+ * _mali_osk_abort(), on OSs that support it.
+ */
+#define MALI_DEBUG_PRINT_ASSERT(condition, args) do  {if( !(condition)) { MALI_PRINT_ERROR(args); _mali_osk_break(); } } while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do  {if( (pointer)== NULL) {MALI_PRINT_ERROR(("NULL pointer " #pointer)); _mali_osk_break();} } while(0)
+#define MALI_DEBUG_ASSERT(condition) do  {if( !(condition)) {MALI_PRINT_ERROR(("ASSERT failed: " #condition )); _mali_osk_break();} } while(0)
+
+#else /* DEBUG */
+
+#define MALI_DEBUG_CODE(code)
+#define MALI_DEBUG_PRINT(string,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ERROR(args) do {} while(0)
+#define MALI_DEBUG_PRINT_IF(level,condition,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ELSE(level,condition,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ASSERT(condition,args) do {} while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do {} while(0)
+#define MALI_DEBUG_ASSERT(condition) do {} while(0)
+
+#endif /* DEBUG */
+
+/**
+ * variables from user space cannot be dereferenced from kernel space; tagging them
+ * with __user allows the GCC compiler to generate a warning. Other compilers may
+ * not support this so we define it here as an empty macro if the compiler doesn't
+ * define it.
+ */
+#ifndef __user
+#define __user
+#endif
+
+#endif /* __MALI_KERNEL_COMMON_H__ */
diff --git a/utgard/r5p1/common/mali_kernel_core.c b/utgard/r5p1/common/mali_kernel_core.c
new file mode 100755
index 0000000..4cc2ea2
--- /dev/null
+++ b/utgard/r5p1/common/mali_kernel_core.c
@@ -0,0 +1,1269 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_kernel_core.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_mmu.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_utilization.h"
+#include "mali_l2_cache.h"
+#include "mali_timeline.h"
+#include "mali_soft_job.h"
+#include "mali_pm_domain.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#include "mali_control_timer.h"
+#include "mali_dvfs_policy.h"
+
+#define MALI_SHARED_MEMORY_DEFAULT_SIZE 0xffffffff
+
+/* Mali GPU memory. Real values come from module parameter or from device specific data */
+unsigned int mali_dedicated_mem_start = 0;
+unsigned int mali_dedicated_mem_size = 0;
+
+/* Default shared memory size is set to 4G. */
+unsigned int mali_shared_mem_size = MALI_SHARED_MEMORY_DEFAULT_SIZE;
+
+/* Frame buffer memory to be accessible by Mali GPU */
+int mali_fb_start = 0;
+int mali_fb_size = 0;
+
+/* Mali max job runtime */
+extern int mali_max_job_runtime;
+
+/** Start profiling from module load? */
+int mali_boot_profiling = 0;
+
+/** Limits for the number of PP cores behind each L2 cache. */
+int mali_max_pp_cores_group_1 = 0xFF;
+int mali_max_pp_cores_group_2 = 0xFF;
+
+int mali_inited_pp_cores_group_1 = 0;
+int mali_inited_pp_cores_group_2 = 0;
+
+static _mali_product_id_t global_product_id = _MALI_PRODUCT_ID_UNKNOWN;
+static uintptr_t global_gpu_base_address = 0;
+static u32 global_gpu_major_version = 0;
+static u32 global_gpu_minor_version = 0;
+
+mali_bool mali_gpu_class_is_mali450 = MALI_FALSE;
+
+static _mali_osk_errcode_t mali_set_global_gpu_base_address(void)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	global_gpu_base_address = _mali_osk_resource_base_address();
+	if (0 == global_gpu_base_address) {
+		err = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return err;
+}
+
+static u32 mali_get_bcast_id(_mali_osk_resource_t *resource_pp)
+{
+	switch (resource_pp->base - global_gpu_base_address) {
+	case 0x08000:
+	case 0x20000: /* fall-through for aliased mapping */
+		return 0x01;
+	case 0x0A000:
+	case 0x22000: /* fall-through for aliased mapping */
+		return 0x02;
+	case 0x0C000:
+	case 0x24000: /* fall-through for aliased mapping */
+		return 0x04;
+	case 0x0E000:
+	case 0x26000: /* fall-through for aliased mapping */
+		return 0x08;
+	case 0x28000:
+		return 0x10;
+	case 0x2A000:
+		return 0x20;
+	case 0x2C000:
+		return 0x40;
+	case 0x2E000:
+		return 0x80;
+	default:
+		return 0;
+	}
+}
+
+static _mali_osk_errcode_t mali_parse_product_info(void)
+{
+	_mali_osk_resource_t first_pp_resource;
+
+	/* Find the first PP core resource (again) */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PP0, &first_pp_resource)) {
+		/* Create a dummy PP object for this core so that we can read the version register */
+		struct mali_group *group = mali_group_create(NULL, NULL, NULL, MALI_DOMAIN_INDEX_PP0);
+		if (NULL != group) {
+			struct mali_pp_core *pp_core = mali_pp_create(&first_pp_resource, group, MALI_FALSE, mali_get_bcast_id(&first_pp_resource));
+			if (NULL != pp_core) {
+				u32 pp_version;
+
+				pp_version = mali_pp_core_get_version(pp_core);
+
+				mali_group_delete(group);
+
+				global_gpu_major_version = (pp_version >> 8) & 0xFF;
+				global_gpu_minor_version = pp_version & 0xFF;
+
+				switch (pp_version >> 16) {
+				case MALI200_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI200;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-200 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					MALI_PRINT_ERROR(("Mali-200 is not supported by this driver.\n"));
+					_mali_osk_abort();
+					break;
+				case MALI300_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI300;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-300 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI400_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI400;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-400 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI450_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI450;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-450 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				default:
+					MALI_DEBUG_PRINT(2, ("Found unknown Mali GPU (r%up%u)\n", global_gpu_major_version, global_gpu_minor_version));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_PRINT_ERROR(("Failed to create initial PP object\n"));
+			}
+		} else {
+			MALI_PRINT_ERROR(("Failed to create initial group object\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("First PP core not specified in config file\n"));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+static void mali_delete_groups(void)
+{
+	struct mali_group *group;
+
+	group = mali_group_get_glob_group(0);
+	while (NULL != group) {
+		mali_group_delete(group);
+		group = mali_group_get_glob_group(0);
+	}
+
+	MALI_DEBUG_ASSERT(0 == mali_group_get_glob_num_groups());
+}
+
+static void mali_delete_l2_cache_cores(void)
+{
+	struct mali_l2_cache_core *l2;
+
+	l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	while (NULL != l2) {
+		mali_l2_cache_delete(l2);
+		l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	}
+
+	MALI_DEBUG_ASSERT(0 == mali_l2_cache_core_get_glob_num_l2_cores());
+}
+
+static struct mali_l2_cache_core *mali_create_l2_cache_core(_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (NULL != resource) {
+
+		MALI_DEBUG_PRINT(3, ("Found L2 cache %s\n", resource->description));
+
+		l2_cache = mali_l2_cache_create(resource, domain_index);
+		if (NULL == l2_cache) {
+			MALI_PRINT_ERROR(("Failed to create L2 cache object\n"));
+			return NULL;
+		}
+	}
+	MALI_DEBUG_PRINT(3, ("Created L2 cache core object\n"));
+
+	return l2_cache;
+}
+
+static _mali_osk_errcode_t mali_parse_config_l2_cache(void)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (mali_is_mali400()) {
+		_mali_osk_resource_t l2_resource;
+		if (_MALI_OSK_ERR_OK != _mali_osk_resource_find(MALI400_OFFSET_L2_CACHE0, &l2_resource)) {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		l2_cache = mali_create_l2_cache_core(&l2_resource, MALI_DOMAIN_INDEX_L20);
+		if (NULL == l2_cache) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else if (mali_is_mali450()) {
+		/*
+		 * L2 for GP    at 0x10000
+		 * L2 for PP0-3 at 0x01000
+		 * L2 for PP4-7 at 0x11000 (optional)
+		 */
+
+		_mali_osk_resource_t l2_gp_resource;
+		_mali_osk_resource_t l2_pp_grp0_resource;
+		_mali_osk_resource_t l2_pp_grp1_resource;
+
+		/* Make cluster for GP's L2 */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE0, &l2_gp_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for GP\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_gp_resource, MALI_DOMAIN_INDEX_L20);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for GP in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Find corresponding l2 domain */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE1, &l2_pp_grp0_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 0\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp0_resource, MALI_DOMAIN_INDEX_L21);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for PP group 0 in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Second PP core group is optional, don't fail if we don't find it */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE2, &l2_pp_grp1_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp1_resource, MALI_DOMAIN_INDEX_L22);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static struct mali_group *mali_create_group(struct mali_l2_cache_core *cache,
+		_mali_osk_resource_t *resource_mmu,
+		_mali_osk_resource_t *resource_gp,
+		_mali_osk_resource_t *resource_pp,
+		u32 domain_index)
+{
+	struct mali_mmu_core *mmu;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(3, ("Starting new group for MMU %s\n", resource_mmu->description));
+
+	/* Create the group object */
+	group = mali_group_create(cache, NULL, NULL, domain_index);
+	if (NULL == group) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU %s\n", resource_mmu->description));
+		return NULL;
+	}
+
+	/* Create the MMU object inside group */
+	mmu = mali_mmu_create(resource_mmu, group, MALI_FALSE);
+	if (NULL == mmu) {
+		MALI_PRINT_ERROR(("Failed to create MMU object\n"));
+		mali_group_delete(group);
+		return NULL;
+	}
+
+	if (NULL != resource_gp) {
+		/* Create the GP core object inside this group */
+		struct mali_gp_core *gp_core = mali_gp_create(resource_gp, group);
+		if (NULL == gp_core) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create GP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	if (NULL != resource_pp) {
+		struct mali_pp_core *pp_core;
+
+		/* Create the PP core object inside this group */
+		pp_core = mali_pp_create(resource_pp, group, MALI_FALSE, mali_get_bcast_id(resource_pp));
+		if (NULL == pp_core) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create PP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	return group;
+}
+
+static _mali_osk_errcode_t mali_create_virtual_group(_mali_osk_resource_t *resource_mmu_pp_bcast,
+		_mali_osk_resource_t *resource_pp_bcast,
+		_mali_osk_resource_t *resource_dlbu,
+		_mali_osk_resource_t *resource_bcast)
+{
+	struct mali_mmu_core *mmu_pp_bcast_core;
+	struct mali_pp_core *pp_bcast_core;
+	struct mali_dlbu_core *dlbu_core;
+	struct mali_bcast_unit *bcast_core;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(2, ("Starting new virtual group for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+
+	/* Create the DLBU core object */
+	dlbu_core = mali_dlbu_create(resource_dlbu);
+	if (NULL == dlbu_core) {
+		MALI_PRINT_ERROR(("Failed to create DLBU object \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the Broadcast unit core */
+	bcast_core = mali_bcast_unit_create(resource_bcast);
+	if (NULL == bcast_core) {
+		MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the group object */
+#if defined(DEBUG)
+	/* Get a physical PP group to temporarily add to broadcast unit.  IRQ
+	 * verification needs a physical group in the broadcast unit to test
+	 * the broadcast unit interrupt line. */
+	{
+		struct mali_group *phys_group = NULL;
+		int i;
+		for (i = 0; i < mali_group_get_glob_num_groups(); i++) {
+			phys_group = mali_group_get_glob_group(i);
+			if (NULL != mali_group_get_pp_core(phys_group)) break;
+		}
+		MALI_DEBUG_ASSERT(NULL != mali_group_get_pp_core(phys_group));
+
+		/* Add the group temporarily to the broadcast, and update the
+		 * broadcast HW. Since the HW is not updated when removing the
+		 * group the IRQ check will work when the virtual PP is created
+		 * later.
+		 *
+		 * When the virtual group gets populated, the actually used
+		 * groups will be added to the broadcast unit and the HW will
+		 * be updated.
+		 */
+		mali_bcast_add_group(bcast_core, phys_group);
+		mali_bcast_reset(bcast_core);
+		mali_bcast_remove_group(bcast_core, phys_group);
+	}
+#endif /* DEBUG */
+	group = mali_group_create(NULL, dlbu_core, bcast_core, MALI_DOMAIN_INDEX_DUMMY);
+	if (NULL == group) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+		mali_bcast_unit_delete(bcast_core);
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the MMU object inside group */
+	mmu_pp_bcast_core = mali_mmu_create(resource_mmu_pp_bcast, group, MALI_TRUE);
+	if (NULL == mmu_pp_bcast_core) {
+		MALI_PRINT_ERROR(("Failed to create MMU PP broadcast object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the PP core object inside this group */
+	pp_bcast_core = mali_pp_create(resource_pp_bcast, group, MALI_TRUE, 0);
+	if (NULL == pp_bcast_core) {
+		/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+		MALI_PRINT_ERROR(("Failed to create PP object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_groups(void)
+{
+	struct mali_group *group;
+	int cluster_id_gp = 0;
+	int cluster_id_pp_grp0 = 0;
+	int cluster_id_pp_grp1 = 0;
+	int i;
+
+	_mali_osk_resource_t resource_gp;
+	_mali_osk_resource_t resource_gp_mmu;
+	_mali_osk_resource_t resource_pp[8];
+	_mali_osk_resource_t resource_pp_mmu[8];
+	_mali_osk_resource_t resource_pp_mmu_bcast;
+	_mali_osk_resource_t resource_pp_bcast;
+	_mali_osk_resource_t resource_dlbu;
+	_mali_osk_resource_t resource_bcast;
+	_mali_osk_errcode_t resource_gp_found;
+	_mali_osk_errcode_t resource_gp_mmu_found;
+	_mali_osk_errcode_t resource_pp_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_bcast_found;
+	_mali_osk_errcode_t resource_pp_bcast_found;
+	_mali_osk_errcode_t resource_dlbu_found;
+	_mali_osk_errcode_t resource_bcast_found;
+
+	if (!(mali_is_mali400() || mali_is_mali450())) {
+		/* No known HW core */
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (MALI_MAX_JOB_RUNTIME_DEFAULT == mali_max_job_runtime) {
+		/* Group settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+			/* Use device specific settings (if defined) */
+			if (0 != data.max_job_runtime) {
+				mali_max_job_runtime = data.max_job_runtime;
+			}
+		}
+	}
+
+	if (mali_is_mali450()) {
+		/* Mali-450 have separate L2s for GP, and PP core group(s) */
+		cluster_id_pp_grp0 = 1;
+		cluster_id_pp_grp1 = 2;
+	}
+
+	resource_gp_found = _mali_osk_resource_find(MALI_OFFSET_GP, &resource_gp);
+	resource_gp_mmu_found = _mali_osk_resource_find(MALI_OFFSET_GP_MMU, &resource_gp_mmu);
+	resource_pp_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0, &(resource_pp[0]));
+	resource_pp_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1, &(resource_pp[1]));
+	resource_pp_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2, &(resource_pp[2]));
+	resource_pp_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3, &(resource_pp[3]));
+	resource_pp_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4, &(resource_pp[4]));
+	resource_pp_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5, &(resource_pp[5]));
+	resource_pp_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6, &(resource_pp[6]));
+	resource_pp_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7, &(resource_pp[7]));
+	resource_pp_mmu_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0_MMU, &(resource_pp_mmu[0]));
+	resource_pp_mmu_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1_MMU, &(resource_pp_mmu[1]));
+	resource_pp_mmu_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2_MMU, &(resource_pp_mmu[2]));
+	resource_pp_mmu_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3_MMU, &(resource_pp_mmu[3]));
+	resource_pp_mmu_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4_MMU, &(resource_pp_mmu[4]));
+	resource_pp_mmu_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5_MMU, &(resource_pp_mmu[5]));
+	resource_pp_mmu_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6_MMU, &(resource_pp_mmu[6]));
+	resource_pp_mmu_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7_MMU, &(resource_pp_mmu[7]));
+
+
+	if (mali_is_mali450()) {
+		resource_bcast_found = _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast);
+		resource_dlbu_found = _mali_osk_resource_find(MALI_OFFSET_DLBU, &resource_dlbu);
+		resource_pp_mmu_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST_MMU, &resource_pp_mmu_bcast);
+		resource_pp_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST, &resource_pp_bcast);
+
+		if (_MALI_OSK_ERR_OK != resource_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_dlbu_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_mmu_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_bcast_found) {
+			/* Missing mandatory core(s) for Mali-450 */
+			MALI_DEBUG_PRINT(2, ("Missing mandatory resources, Mali-450 needs DLBU, Broadcast unit, virtual PP core and virtual MMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK != resource_gp_found ||
+	    _MALI_OSK_ERR_OK != resource_gp_mmu_found ||
+	    _MALI_OSK_ERR_OK != resource_pp_found[0] ||
+	    _MALI_OSK_ERR_OK != resource_pp_mmu_found[0]) {
+		/* Missing mandatory core(s) */
+		MALI_DEBUG_PRINT(2, ("Missing mandatory resource, need at least one GP and one PP, both with a separate MMU\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(1 <= mali_l2_cache_core_get_glob_num_l2_cores());
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_gp), &resource_gp_mmu, &resource_gp, NULL, MALI_DOMAIN_INDEX_GP);
+	if (NULL == group) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create group for first (and mandatory) PP core */
+	MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= (cluster_id_pp_grp0 + 1)); /* >= 1 on Mali-300 and Mali-400, >= 2 on Mali-450 */
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[0], NULL, &resource_pp[0], MALI_DOMAIN_INDEX_PP0);
+	if (NULL == group) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_inited_pp_cores_group_1++;
+
+	/* Create groups for rest of the cores in the first PP core group */
+	for (i = 1; i < 4; i++) { /* First half of the PP cores belong to first core group */
+		if (mali_inited_pp_cores_group_1 < mali_max_pp_cores_group_1) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (NULL == group) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_1++;
+			}
+		}
+	}
+
+	/* Create groups for cores in the second PP core group */
+	for (i = 4; i < 8; i++) { /* Second half of the PP cores belong to second core group */
+		if (mali_inited_pp_cores_group_2 < mali_max_pp_cores_group_2) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= 2); /* Only Mali-450 have a second core group */
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp1), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (NULL == group) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_2++;
+			}
+		}
+	}
+
+	if (mali_is_mali450()) {
+		_mali_osk_errcode_t err = mali_create_virtual_group(&resource_pp_mmu_bcast, &resource_pp_bcast, &resource_dlbu, &resource_bcast);
+		if (_MALI_OSK_ERR_OK != err) {
+			return err;
+		}
+	}
+
+	mali_max_pp_cores_group_1 = mali_inited_pp_cores_group_1;
+	mali_max_pp_cores_group_2 = mali_inited_pp_cores_group_2;
+	MALI_DEBUG_PRINT(2, ("%d+%d PP cores initialized\n", mali_inited_pp_cores_group_1, mali_inited_pp_cores_group_2));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_check_shared_interrupts(void)
+{
+#if !defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_TRUE == _mali_osk_shared_interrupts()) {
+		MALI_PRINT_ERROR(("Shared interrupts detected, but driver support is not enabled\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* !defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	/* It is OK to compile support for shared interrupts even if Mali is not using it. */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_pmu(void)
+{
+	_mali_osk_resource_t resource_pmu;
+
+	MALI_DEBUG_ASSERT(0 != global_gpu_base_address);
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PMU, &resource_pmu)) {
+		struct mali_pmu_core *pmu;
+
+		pmu = mali_pmu_create(&resource_pmu);
+		if (NULL == pmu) {
+			MALI_PRINT_ERROR(("Failed to create PMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	/* It's ok if the PMU doesn't exist */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_memory(void)
+{
+	_mali_osk_device_data data = { 0, };
+	_mali_osk_errcode_t ret;
+
+	/* The priority of setting the value of mali_shared_mem_size,
+	 * mali_dedicated_mem_start and mali_dedicated_mem_size:
+	 * 1. module parameter;
+	 * 2. platform data;
+	 * 3. default value;
+	 **/
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Memory settings are not overridden by module parameters, so use device settings */
+		if (0 == mali_dedicated_mem_start && 0 == mali_dedicated_mem_size) {
+			/* Use device specific settings (if defined) */
+			mali_dedicated_mem_start = data.dedicated_mem_start;
+			mali_dedicated_mem_size = data.dedicated_mem_size;
+		}
+
+		if (MALI_SHARED_MEMORY_DEFAULT_SIZE == mali_shared_mem_size &&
+		    0 != data.shared_mem_size) {
+			mali_shared_mem_size = data.shared_mem_size;
+		}
+	}
+
+	if (0 < mali_dedicated_mem_size && 0 != mali_dedicated_mem_start) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (dedicated: 0x%08X@0x%08X)\n",
+				     mali_dedicated_mem_size, mali_dedicated_mem_start));
+
+		/* Dedicated memory */
+		ret = mali_memory_core_resource_dedicated_memory(mali_dedicated_mem_start, mali_dedicated_mem_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register dedicated memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 < mali_shared_mem_size) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (shared: 0x%08X)\n", mali_shared_mem_size));
+
+		/* Shared OS memory */
+		ret = mali_memory_core_resource_os_memory(mali_shared_mem_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register shared OS memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 == mali_fb_start && 0 == mali_fb_size) {
+		/* Frame buffer settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+			/* Use device specific settings (if defined) */
+			mali_fb_start = data.fb_start;
+			mali_fb_size = data.fb_size;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Using device defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Using module defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	}
+
+	if (0 != mali_fb_size) {
+		/* Register frame buffer */
+		ret = mali_mem_validation_add_range(mali_fb_start, mali_fb_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register frame buffer memory region\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_detect_gpu_class(void)
+{
+	if (_mali_osk_l2_resource_count() > 1) {
+		mali_gpu_class_is_mali450 = MALI_TRUE;
+	}
+}
+
+static _mali_osk_errcode_t mali_init_hw_reset(void)
+{
+#if defined(CONFIG_MALI450)
+	_mali_osk_resource_t resource_bcast;
+
+	/* Ensure broadcast unit is in a good state before we start creating
+	 * groups and cores.
+	 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast)) {
+		struct mali_bcast_unit *bcast_core;
+
+		bcast_core = mali_bcast_unit_create(&resource_bcast);
+		if (NULL == bcast_core) {
+			MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_bcast_unit_delete(bcast_core);
+	}
+#endif /* CONFIG_MALI450 */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_initialize_subsystems(void)
+{
+	_mali_osk_errcode_t err;
+
+#ifdef CONFIG_MALI_DT
+	err = _mali_osk_resource_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	mali_pp_job_initialize();
+
+	mali_timeline_initialize();
+
+	err = mali_session_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+#if defined(CONFIG_MALI400_PROFILING)
+	err = _mali_osk_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (_MALI_OSK_ERR_OK != err) {
+		/* No biggie if we weren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	err = mali_memory_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_executor_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_scheduler_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Configure memory early, needed by mali_mmu_initialize. */
+	err = mali_parse_config_memory();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_set_global_gpu_base_address();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect GPU class (uses L2 cache count) */
+	mali_detect_gpu_class();
+
+	err = mali_check_shared_interrupts();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the MALI PMU (will not touch HW!) */
+	err = mali_parse_config_pmu();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the power management module */
+	err = mali_pm_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Make sure the entire GPU stays on for the rest of this function */
+	mali_pm_init_begin();
+
+	/* Ensure HW is in a good state before starting to access cores. */
+	err = mali_init_hw_reset();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect which Mali GPU we are dealing with */
+	err = mali_parse_product_info();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* The global_product_id is now populated with the correct Mali GPU */
+
+	/* Start configuring the actual Mali hardware. */
+
+	err = mali_mmu_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	if (mali_is_mali450()) {
+		err = mali_dlbu_initialize();
+		if (_MALI_OSK_ERR_OK != err) {
+			mali_pm_init_end();
+			mali_terminate_subsystems();
+			return err;
+		}
+	}
+
+	err = mali_parse_config_l2_cache();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_parse_config_groups();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Move groups into executor */
+	mali_executor_populate();
+
+	/* Need call after all group has assigned a domain */
+	mali_pm_power_cost_setup();
+
+	/* Initialize the GPU timer */
+	err = mali_control_timer_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the GPU utilization tracking */
+	err = mali_utilization_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	err = mali_dvfs_policy_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	/* Allowing the system to be turned off */
+	mali_pm_init_end();
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+void mali_terminate_subsystems(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(2, ("terminate_subsystems() called\n"));
+
+	mali_utilization_term();
+	mali_control_timer_term();
+
+	mali_executor_depopulate();
+	mali_delete_groups(); /* Delete groups not added to executor */
+	mali_executor_terminate();
+
+	mali_scheduler_terminate();
+	mali_pp_job_terminate();
+	mali_delete_l2_cache_cores();
+	mali_mmu_terminate();
+
+	if (mali_is_mali450()) {
+		mali_dlbu_terminate();
+	}
+
+	mali_pm_terminate();
+
+	if (NULL != pmu) {
+		mali_pmu_delete(pmu);
+	}
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_term();
+#endif
+
+	mali_memory_terminate();
+
+	mali_session_terminate();
+
+	mali_timeline_terminate();
+
+	global_gpu_base_address = 0;
+}
+
+_mali_product_id_t mali_kernel_core_get_product_id(void)
+{
+	return global_product_id;
+}
+
+u32 mali_kernel_core_get_gpu_major_version(void)
+{
+	return global_gpu_major_version;
+}
+
+u32 mali_kernel_core_get_gpu_minor_version(void)
+{
+	return global_gpu_minor_version;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	return _MALI_OSK_ERR_OK;;
+}
+
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (NULL == queue) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		args->type = _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS;
+		return _MALI_OSK_ERR_OK;;
+	}
+
+	/* receive a notification, might sleep */
+	err = _mali_osk_notification_queue_receive(queue, &notification);
+	if (_MALI_OSK_ERR_OK != err) {
+		MALI_ERROR(err); /* errcode returned, pass on to caller */
+	}
+
+	/* copy the buffer to the user */
+	args->type = (_mali_uk_notification_type)notification->notification_type;
+	_mali_osk_memcpy(&args->data, notification->result_buffer, notification->result_buffer_size);
+
+	/* finished with the notification */
+	_mali_osk_notification_delete(notification);
+
+	return _MALI_OSK_ERR_OK;; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args)
+{
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (NULL == queue) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		return _MALI_OSK_ERR_OK;;
+	}
+
+	notification = _mali_osk_notification_create(args->type, 0);
+	if (NULL == notification) {
+		MALI_PRINT_ERROR(("Failed to create notification object\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	_mali_osk_notification_queue_send(queue, notification);
+
+	return _MALI_OSK_ERR_OK;; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (!session->use_high_priority_job_queue) {
+		session->use_high_priority_job_queue = MALI_TRUE;
+		MALI_DEBUG_PRINT(2, ("Session 0x%08X with pid %d was granted higher priority.\n", session, _mali_osk_get_pid()));
+	}
+
+	return _MALI_OSK_ERR_OK;;
+}
+
+_mali_osk_errcode_t _mali_ukk_open(void **context)
+{
+	u32 i;
+	struct mali_session_data *session;
+
+	/* allocated struct to track this session */
+	session = (struct mali_session_data *)_mali_osk_calloc(1, sizeof(struct mali_session_data));
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_NOMEM);
+
+	MALI_DEBUG_PRINT(3, ("Session starting\n"));
+
+	/* create a response queue for this session */
+	session->ioctl_queue = _mali_osk_notification_queue_init();
+	if (NULL == session->ioctl_queue) {
+		_mali_osk_free(session);
+		MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+	}
+
+	session->page_directory = mali_mmu_pagedir_alloc();
+	if (NULL == session->page_directory) {
+		_mali_osk_notification_queue_term(session->ioctl_queue);
+		_mali_osk_free(session);
+		MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_mmu_pagedir_map(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE)) {
+		MALI_PRINT_ERROR(("Failed to map DLBU page into session\n"));
+		_mali_osk_notification_queue_term(session->ioctl_queue);
+		_mali_osk_free(session);
+		MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+	}
+
+	if (0 != mali_dlbu_phys_addr) {
+		mali_mmu_pagedir_update(session->page_directory, MALI_DLBU_VIRT_ADDR, mali_dlbu_phys_addr,
+					_MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_memory_session_begin(session)) {
+		mali_mmu_pagedir_free(session->page_directory);
+		_mali_osk_notification_queue_term(session->ioctl_queue);
+		_mali_osk_free(session);
+		MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Create soft system. */
+	session->soft_job_system = mali_soft_job_system_create(session);
+	if (NULL == session->soft_job_system) {
+		mali_memory_session_end(session);
+		mali_mmu_pagedir_free(session->page_directory);
+		_mali_osk_notification_queue_term(session->ioctl_queue);
+		_mali_osk_free(session);
+		MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Create timeline system. */
+	session->timeline_system = mali_timeline_system_create(session);
+	if (NULL == session->timeline_system) {
+		mali_soft_job_system_destroy(session->soft_job_system);
+		mali_memory_session_end(session);
+		mali_mmu_pagedir_free(session->page_directory);
+		_mali_osk_notification_queue_term(session->ioctl_queue);
+		_mali_osk_free(session);
+		MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_init(&session->number_of_window_jobs, 0);
+#endif
+
+	session->use_high_priority_job_queue = MALI_FALSE;
+
+	/* Initialize list of PP jobs on this session. */
+	_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_list);
+
+	/* Initialize the pp_job_fb_lookup_list array used to quickly lookup jobs from a given frame builder */
+	for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i) {
+		_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_fb_lookup_list[i]);
+	}
+
+	session->pid = _mali_osk_get_pid();
+	session->comm = _mali_osk_get_comm();
+	session->max_mali_mem_allocated = 0;
+	_mali_osk_memset(session->mali_mem_array, 0, sizeof(size_t) * MALI_MEM_TYPE_MAX);
+	*context = (void *)session;
+
+	/* Add session to the list of all sessions. */
+	mali_session_add(session);
+
+	MALI_DEBUG_PRINT(3, ("Session started\n"));
+	return _MALI_OSK_ERR_OK;;
+}
+
+#if defined(DEBUG)
+/* parameter used for debug */
+extern u32 num_pm_runtime_resume;
+extern u32 num_pm_updates;
+extern u32 num_pm_updates_up;
+extern u32 num_pm_updates_down;
+#endif
+
+_mali_osk_errcode_t _mali_ukk_close(void **context)
+{
+	struct mali_session_data *session;
+	MALI_CHECK_NON_NULL(context, _MALI_OSK_ERR_INVALID_ARGS);
+	session = (struct mali_session_data *)*context;
+
+	MALI_DEBUG_PRINT(3, ("Session ending\n"));
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+	MALI_DEBUG_ASSERT_POINTER(session->timeline_system);
+
+	/* Remove session from list of all sessions. */
+	mali_session_remove(session);
+
+	/* This flag is used to prevent queueing of jobs due to activation. */
+	session->is_aborting = MALI_TRUE;
+
+	/* Stop the soft job timer. */
+	mali_timeline_system_stop_timer(session->timeline_system);
+
+	/* Abort queued jobs */
+	mali_scheduler_abort_session(session);
+
+	/* Abort executing jobs */
+	mali_executor_abort_session(session);
+
+	/* Abort the soft job system. */
+	mali_soft_job_system_abort(session->soft_job_system);
+
+	/* Force execution of all pending bottom half processing for GP and PP. */
+	_mali_osk_wq_flush();
+
+	/* The session PP list should now be empty. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_list));
+
+	/* At this point the GP and PP scheduler no longer has any jobs queued or running from this
+	 * session, and all soft jobs in the soft job system has been destroyed. */
+
+	/* Any trackers left in the timeline system are directly or indirectly waiting on external
+	 * sync fences.  Cancel all sync fence waiters to trigger activation of all remaining
+	 * trackers.  This call will sleep until all timelines are empty. */
+	mali_timeline_system_abort(session->timeline_system);
+
+	/* Flush pending work.
+	 * Needed to make sure all bottom half processing related to this
+	 * session has been completed, before we free internal data structures.
+	 */
+	_mali_osk_wq_flush();
+
+	/* Destroy timeline system. */
+	mali_timeline_system_destroy(session->timeline_system);
+	session->timeline_system = NULL;
+
+	/* Destroy soft system. */
+	mali_soft_job_system_destroy(session->soft_job_system);
+	session->soft_job_system = NULL;
+
+	MALI_DEBUG_CODE({
+		/* Check that the pp_job_fb_lookup_list array is empty. */
+		u32 i;
+		for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i)
+		{
+			MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_fb_lookup_list[i]));
+		}
+	});
+
+	/* Free remaining memory allocated to this session */
+	mali_memory_session_end(session);
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_term(&session->number_of_window_jobs);
+#endif
+
+	/* Free session data structures */
+	mali_mmu_pagedir_free(session->page_directory);
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+	_mali_osk_free(session);
+
+	*context = NULL;
+
+	MALI_DEBUG_PRINT(3, ("Session has ended\n"));
+
+#if defined(DEBUG)
+	MALI_DEBUG_PRINT(3, ("Stats: # runtime resumes: %u\n", num_pm_runtime_resume));
+	MALI_DEBUG_PRINT(3, ("       # PM updates: .... %u (up %u, down %u)\n", num_pm_updates, num_pm_updates_up, num_pm_updates_down));
+
+	num_pm_runtime_resume = 0;
+	num_pm_updates = 0;
+	num_pm_updates_up = 0;
+	num_pm_updates_down = 0;
+#endif
+
+	return _MALI_OSK_ERR_OK;;
+}
+
+#if MALI_STATE_TRACKING
+u32 _mali_kernel_core_dump_state(char *buf, u32 size)
+{
+	int n = 0; /* Number of bytes written to buf */
+
+	n += mali_scheduler_dump_state(buf + n, size - n);
+	n += mali_executor_dump_state(buf + n, size - n);
+
+	return n;
+}
+#endif
diff --git a/utgard/r5p1/common/mali_kernel_core.h b/utgard/r5p1/common/mali_kernel_core.h
new file mode 100644
index 0000000..57d3cd2
--- /dev/null
+++ b/utgard/r5p1/common/mali_kernel_core.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_CORE_H__
+#define __MALI_KERNEL_CORE_H__
+
+#include "mali_osk.h"
+
+typedef enum {
+	_MALI_PRODUCT_ID_UNKNOWN,
+	_MALI_PRODUCT_ID_MALI200,
+	_MALI_PRODUCT_ID_MALI300,
+	_MALI_PRODUCT_ID_MALI400,
+	_MALI_PRODUCT_ID_MALI450,
+} _mali_product_id_t;
+
+extern mali_bool mali_gpu_class_is_mali450;
+
+_mali_osk_errcode_t mali_initialize_subsystems(void);
+
+void mali_terminate_subsystems(void);
+
+_mali_product_id_t mali_kernel_core_get_product_id(void);
+
+u32 mali_kernel_core_get_gpu_major_version(void);
+
+u32 mali_kernel_core_get_gpu_minor_version(void);
+
+u32 _mali_kernel_core_dump_state(char *buf, u32 size);
+
+MALI_STATIC_INLINE mali_bool mali_is_mali450(void)
+{
+#if defined(CONFIG_MALI450)
+	return mali_gpu_class_is_mali450;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali400(void)
+{
+#if !defined(CONFIG_MALI450)
+	return MALI_TRUE;
+#else
+	return !mali_gpu_class_is_mali450;
+#endif
+}
+
+#endif /* __MALI_KERNEL_CORE_H__ */
diff --git a/utgard/r5p1/common/mali_kernel_utilization.c b/utgard/r5p1/common/mali_kernel_utilization.c
new file mode 100755
index 0000000..63b9417
--- /dev/null
+++ b/utgard/r5p1/common/mali_kernel_utilization.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_scheduler.h"
+
+#include "mali_executor.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+/* Thresholds for GP bound detection. */
+#define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
+#define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
+
+static _mali_osk_spinlock_irq_t *utilization_data_lock;
+
+static u32 num_running_gp_cores = 0;
+static u32 num_running_pp_cores = 0;
+
+static u64 work_start_time_gpu = 0;
+static u64 work_start_time_gp = 0;
+static u64 work_start_time_pp = 0;
+static u64 accumulated_work_time_gpu = 0;
+static u64 accumulated_work_time_gp = 0;
+static u64 accumulated_work_time_pp = 0;
+
+static u32 last_utilization_gpu = 0 ;
+static u32 last_utilization_gp = 0 ;
+static u32 last_utilization_pp = 0 ;
+
+void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
+
+/* Define the first timer control timer timeout in milliseconds */
+static u32 mali_control_first_timeout = 100;
+static struct mali_gpu_utilization_data mali_util_data = {0, };
+
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer)
+{
+	u64 time_now;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 work_normalized_gpu;
+	u32 work_normalized_gp;
+	u32 work_normalized_pp;
+	u32 period_normalized;
+	u32 utilization_gpu;
+	u32 utilization_gp;
+	u32 utilization_pp;
+
+	mali_utilization_data_lock();
+
+	time_now = _mali_osk_time_get_ns();
+
+	*time_period = time_now - *start_time;
+
+	if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
+		mali_control_timer_pause();
+		/*
+		 * No work done for this period
+		 * - No need to reschedule timer
+		 * - Report zero usage
+		 */
+		last_utilization_gpu = 0;
+		last_utilization_gp = 0;
+		last_utilization_pp = 0;
+
+		mali_util_data.utilization_gpu = last_utilization_gpu;
+		mali_util_data.utilization_gp = last_utilization_gp;
+		mali_util_data.utilization_pp = last_utilization_pp;
+
+		mali_utilization_data_unlock();
+
+		*need_add_timer = MALI_FALSE;
+
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+
+		MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
+		MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
+		MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
+
+		return &mali_util_data;
+	}
+
+	/* If we are currently busy, update working period up to now */
+	if (work_start_time_gpu != 0) {
+		accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+		work_start_time_gpu = time_now;
+
+		/* GP and/or PP will also be busy if the GPU is busy at this point */
+
+		if (work_start_time_gp != 0) {
+			accumulated_work_time_gp += (time_now - work_start_time_gp);
+			work_start_time_gp = time_now;
+		}
+
+		if (work_start_time_pp != 0) {
+			accumulated_work_time_pp += (time_now - work_start_time_pp);
+			work_start_time_pp = time_now;
+		}
+	}
+
+	/*
+	 * We have two 64-bit values, a dividend and a divisor.
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 * We shift the dividend up and possibly the divisor down, making the result X in 256.
+	 */
+
+	/* Shift the 64-bit values down so they fit inside a 32-bit integer */
+	leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32));
+	shift_val = 32 - leading_zeroes;
+	work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
+	work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
+	work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
+	period_normalized = (u32)(*time_period >> shift_val);
+
+	/*
+	 * Now, we should report the usage in parts of 256
+	 * this means we must shift up the dividend or down the divisor by 8
+	 * (we could do a combination, but we just use one for simplicity,
+	 * but the end result should be good enough anyway)
+	 */
+	if (period_normalized > 0x00FFFFFF) {
+		/* The divisor is so big that it is safe to shift it down */
+		period_normalized >>= 8;
+	} else {
+		/*
+		 * The divisor is so small that we can shift up the dividend, without loosing any data.
+		 * (dividend is always smaller than the divisor)
+		 */
+		work_normalized_gpu <<= 8;
+		work_normalized_gp <<= 8;
+		work_normalized_pp <<= 8;
+	}
+
+	utilization_gpu = work_normalized_gpu / period_normalized;
+	utilization_gp = work_normalized_gp / period_normalized;
+	utilization_pp = work_normalized_pp / period_normalized;
+
+	last_utilization_gpu = utilization_gpu;
+	last_utilization_gp = utilization_gp;
+	last_utilization_pp = utilization_pp;
+
+	if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) &&
+	    (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) {
+		mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND);
+	} else {
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+	}
+
+	/* starting a new period */
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	*start_time = time_now;
+
+	mali_util_data.utilization_gp = last_utilization_gp;
+	mali_util_data.utilization_gpu = last_utilization_gpu;
+	mali_util_data.utilization_pp = last_utilization_pp;
+
+	mali_utilization_data_unlock();
+
+	*need_add_timer = MALI_TRUE;
+
+	MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
+	MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
+	MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
+
+	return &mali_util_data;
+}
+
+_mali_osk_errcode_t mali_utilization_init(void)
+{
+#if USING_GPU_UTILIZATION
+	_mali_osk_device_data data;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if (NULL != data.utilization_callback) {
+			mali_utilization_callback = data.utilization_callback;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed \n"));
+		}
+	}
+#endif /* defined(USING_GPU_UTILIZATION) */
+
+	if (NULL == mali_utilization_callback) {
+		MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n"));
+	}
+
+	utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
+	if (NULL == utilization_data_lock) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	num_running_gp_cores = 0;
+	num_running_pp_cores = 0;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_utilization_term(void)
+{
+	if (NULL != utilization_data_lock) {
+		_mali_osk_spinlock_irq_term(utilization_data_lock);
+	}
+}
+
+void mali_utilization_gp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_gp_cores;
+	if (1 == num_running_gp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First GP core started, consider GP busy from now and onwards */
+		work_start_time_gp = time_now;
+
+		if (0 == num_running_pp_cores) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			is_resume  = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+				/* Do some policy in new period for performance consideration */
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (0 == last_utilization_gpu) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_pp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_pp_cores;
+	if (1 == num_running_pp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First PP core started, consider PP busy from now and onwards */
+		work_start_time_pp = time_now;
+
+		if (0 == num_running_gp_cores) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			/* Start a new period if stoped */
+			is_resume = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (0 == last_utilization_gpu) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_gp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_gp_cores;
+	if (0 == num_running_gp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last GP core ended, consider GP idle from now and onwards */
+		accumulated_work_time_gp += (time_now - work_start_time_gp);
+		work_start_time_gp = 0;
+
+		if (0 == num_running_pp_cores) {
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+void mali_utilization_pp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_pp_cores;
+	if (0 == num_running_pp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last PP core ended, consider PP idle from now and onwards */
+		accumulated_work_time_pp += (time_now - work_start_time_pp);
+		work_start_time_pp = 0;
+
+		if (0 == num_running_gp_cores) {
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+mali_bool mali_utilization_enabled(void)
+{
+#if defined(CONFIG_MALI_DVFS)
+	return mali_dvfs_policy_enabled();
+#else
+	return (NULL != mali_utilization_callback);
+#endif /* defined(CONFIG_MALI_DVFS) */
+}
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data)
+{
+	MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback);
+
+	mali_utilization_callback(util_data);
+}
+
+void mali_utilization_reset(void)
+{
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	last_utilization_gpu = 0;
+	last_utilization_gp = 0;
+	last_utilization_pp = 0;
+}
+
+void mali_utilization_data_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(utilization_data_lock);
+}
+
+void mali_utilization_data_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(utilization_data_lock);
+}
+
+void mali_utilization_data_assert_locked(void)
+{
+	MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock);
+}
+
+u32 _mali_ukk_utilization_gp_pp(void)
+{
+	return last_utilization_gpu;
+}
+
+u32 _mali_ukk_utilization_gp(void)
+{
+	return last_utilization_gp;
+}
+
+u32 _mali_ukk_utilization_pp(void)
+{
+	return last_utilization_pp;
+}
diff --git a/utgard/r5p1/common/mali_kernel_utilization.h b/utgard/r5p1/common/mali_kernel_utilization.h
new file mode 100755
index 0000000..3c20b19
--- /dev/null
+++ b/utgard/r5p1/common/mali_kernel_utilization.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_UTILIZATION_H__
+#define __MALI_KERNEL_UTILIZATION_H__
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_osk.h"
+
+/**
+ * Initialize/start the Mali GPU utilization metrics reporting.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_utilization_init(void);
+
+/**
+ * Terminate the Mali GPU utilization metrics reporting
+ */
+void mali_utilization_term(void);
+
+/**
+ * Check if Mali utilization is enabled
+ */
+mali_bool mali_utilization_enabled(void);
+
+/**
+ * Should be called when a job is about to execute a GP job
+ */
+void mali_utilization_gp_start(void);
+
+/**
+ * Should be called when a job has completed executing a GP job
+ */
+void mali_utilization_gp_end(void);
+
+/**
+ * Should be called when a job is about to execute a PP job
+ */
+void mali_utilization_pp_start(void);
+
+/**
+ * Should be called when a job has completed executing a PP job
+ */
+void mali_utilization_pp_end(void);
+
+/**
+ * Should be called to calcution the GPU utilization
+ */
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer);
+
+_mali_osk_spinlock_irq_t *mali_utilization_get_lock(void);
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data);
+
+void mali_utilization_data_lock(void);
+
+void mali_utilization_data_unlock(void);
+
+void mali_utilization_data_assert_locked(void);
+
+void mali_utilization_reset(void);
+
+
+#endif /* __MALI_KERNEL_UTILIZATION_H__ */
diff --git a/utgard/r5p1/common/mali_kernel_vsync.c b/utgard/r5p1/common/mali_kernel_vsync.c
new file mode 100755
index 0000000..2eed4c8
--- /dev/null
+++ b/utgard/r5p1/common/mali_kernel_vsync.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+#include "mali_osk_profiling.h"
+
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args)
+{
+	_mali_uk_vsync_event event = (_mali_uk_vsync_event)args->event;
+	MALI_IGNORE(event); /* event is not used for release code, and that is OK */
+
+	/*
+	 * Manually generate user space events in kernel space.
+	 * This saves user space from calling kernel space twice in this case.
+	 * We just need to remember to add pid and tid manually.
+	 */
+	if (event == _MALI_UK_VSYNC_EVENT_BEGIN_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+	if (event == _MALI_UK_VSYNC_EVENT_END_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("Received VSYNC event: %d\n", event));
+	MALI_SUCCESS;
+}
+
diff --git a/utgard/r5p1/common/mali_l2_cache.c b/utgard/r5p1/common/mali_l2_cache.c
new file mode 100755
index 0000000..ff53d6c
--- /dev/null
+++ b/utgard/r5p1/common/mali_l2_cache.c
@@ -0,0 +1,539 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_scheduler.h"
+#include "mali_pm.h"
+#include "mali_pm_domain.h"
+
+/**
+ * Size of the Mali L2 cache registers in bytes
+ */
+#define MALI400_L2_CACHE_REGISTERS_SIZE 0x30
+
+/**
+ * Mali L2 cache register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_l2_cache_register {
+	MALI400_L2_CACHE_REGISTER_SIZE         = 0x0004,
+	MALI400_L2_CACHE_REGISTER_STATUS       = 0x0008,
+	/*unused                               = 0x000C */
+	MALI400_L2_CACHE_REGISTER_COMMAND      = 0x0010,
+	MALI400_L2_CACHE_REGISTER_CLEAR_PAGE   = 0x0014,
+	MALI400_L2_CACHE_REGISTER_MAX_READS    = 0x0018,
+	MALI400_L2_CACHE_REGISTER_ENABLE       = 0x001C,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0 = 0x0020,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0 = 0x0024,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1 = 0x0028,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1 = 0x002C,
+} mali_l2_cache_register;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_command {
+	MALI400_L2_CACHE_COMMAND_CLEAR_ALL = 0x01,
+} mali_l2_cache_command;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_enable {
+	MALI400_L2_CACHE_ENABLE_DEFAULT = 0x0, /* Default */
+	MALI400_L2_CACHE_ENABLE_ACCESS = 0x01,
+	MALI400_L2_CACHE_ENABLE_READ_ALLOCATE = 0x02,
+} mali_l2_cache_enable;
+
+/**
+ * Mali L2 cache status bits
+ */
+typedef enum mali_l2_cache_status {
+	MALI400_L2_CACHE_STATUS_COMMAND_BUSY = 0x01,
+	MALI400_L2_CACHE_STATUS_DATA_BUSY    = 0x02,
+} mali_l2_cache_status;
+
+#define MALI400_L2_MAX_READS_NOT_SET -1
+
+static struct mali_l2_cache_core *
+	mali_global_l2s[MALI_MAX_NUMBER_OF_L2_CACHE_CORES] = { NULL, };
+static u32 mali_global_num_l2s = 0;
+
+int mali_l2_max_reads = MALI400_L2_MAX_READS_NOT_SET;
+
+
+/* Local helper functions */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache);
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val);
+
+static void mali_l2_cache_lock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_lock(cache->lock);
+}
+
+static void mali_l2_cache_unlock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_unlock(cache->lock);
+}
+
+/* Implementation of the L2 cache interface */
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *cache = NULL;
+#if defined(DEBUG)
+	u32 cache_size;
+#endif
+
+	MALI_DEBUG_PRINT(4, ("Mali L2 cache: Creating Mali L2 cache: %s\n",
+			     resource->description));
+
+	if (mali_global_num_l2s >= MALI_MAX_NUMBER_OF_L2_CACHE_CORES) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Too many L2 caches\n"));
+		return NULL;
+	}
+
+	cache = _mali_osk_malloc(sizeof(struct mali_l2_cache_core));
+	if (NULL == cache) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to allocate memory for L2 cache core\n"));
+		return NULL;
+	}
+
+	cache->core_id =  mali_global_num_l2s;
+	cache->counter_src0 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_src1 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_value0_base = 0;
+	cache->counter_value1_base = 0;
+	cache->pm_domain = NULL;
+	cache->power_is_on = MALI_FALSE;
+	cache->last_invalidated_id = 0;
+
+	if (_MALI_OSK_ERR_OK != mali_hw_core_create(&cache->hw_core,
+			resource, MALI400_L2_CACHE_REGISTERS_SIZE)) {
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	cache_size = mali_hw_core_register_read(&cache->hw_core,
+						MALI400_L2_CACHE_REGISTER_SIZE);
+	MALI_DEBUG_PRINT(2, ("Mali L2 cache: Created %s: % 3uK, %u-way, % 2ubyte cache line, % 3ubit external bus\n",
+			     resource->description,
+			     1 << (((cache_size >> 16) & 0xff) - 10),
+			     1 << ((cache_size >> 8) & 0xff),
+			     1 << (cache_size & 0xff),
+			     1 << ((cache_size >> 24) & 0xff)));
+#endif
+
+	cache->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_L2);
+	if (NULL == cache->lock) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to create counter lock for L2 cache core %s\n",
+				  cache->hw_core.description));
+		mali_hw_core_delete(&cache->hw_core);
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+	/* register with correct power domain */
+	cache->pm_domain = mali_pm_register_l2_cache(
+				   domain_index, cache);
+
+	mali_global_l2s[mali_global_num_l2s] = cache;
+	mali_global_num_l2s++;
+
+	return cache;
+}
+
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		if (mali_global_l2s[i] != cache) {
+			continue;
+		}
+
+		mali_global_l2s[i] = NULL;
+		mali_global_num_l2s--;
+
+		if (i == mali_global_num_l2s) {
+			/* Removed last element, nothing more to do */
+			break;
+		}
+
+		/*
+		 * We removed a l2 cache from the middle of the array,
+		 * so move the last l2 cache to current position
+		 */
+		mali_global_l2s[i] = mali_global_l2s[mali_global_num_l2s];
+		mali_global_l2s[mali_global_num_l2s] = NULL;
+
+		/* All good */
+		break;
+	}
+
+	_mali_osk_spinlock_irq_term(cache->lock);
+	mali_hw_core_delete(&cache->hw_core);
+	_mali_osk_free(cache);
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	mali_l2_cache_reset(cache);
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == cache->power_is_on);
+	cache->power_is_on = MALI_TRUE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == cache->power_is_on);
+
+	/*
+	 * The HW counters will start from zero again when we resume,
+	 * but we should report counters as always increasing.
+	 * Take a copy of the HW values now in order to add this to
+	 * the values we report after being powered up.
+	 *
+	 * The physical power off of the L2 cache might be outside our
+	 * own control (e.g. runtime PM). That is why we must manually
+	 * set set the counter value to zero as well.
+	 */
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value0_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0, 0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value1_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1, 0);
+	}
+
+
+	cache->power_is_on = MALI_FALSE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter)
+{
+	u32 reg_offset_src;
+	u32 reg_offset_val;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(source_id >= 0 && source_id <= 1);
+
+	mali_l2_cache_lock(cache);
+
+	if (0 == source_id) {
+		/* start counting from 0 */
+		cache->counter_value0_base = 0;
+		cache->counter_src0 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0;
+	} else {
+		/* start counting from 0 */
+		cache->counter_value1_base = 0;
+		cache->counter_src1 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1;
+	}
+
+	if (cache->power_is_on) {
+		u32 hw_src;
+
+		if (MALI_HW_CORE_NO_COUNTER != counter) {
+			hw_src = counter;
+		} else {
+			hw_src = 0; /* disable value for HW */
+		}
+
+		/* Set counter src */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_src, hw_src);
+
+		/* Make sure the HW starts counting from 0 again */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_val, 0);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(NULL != src0);
+	MALI_DEBUG_ASSERT(NULL != value0);
+	MALI_DEBUG_ASSERT(NULL != src1);
+	MALI_DEBUG_ASSERT(NULL != value1);
+
+	mali_l2_cache_lock(cache);
+
+	*src0 = cache->counter_src0;
+	*src1 = cache->counter_src1;
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		if (MALI_TRUE == cache->power_is_on) {
+			*value0 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		} else {
+			*value0 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value0 += cache->counter_value0_base;
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		if (MALI_TRUE == cache->power_is_on) {
+			*value1 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		} else {
+			*value1 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value1 += cache->counter_value1_base;
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index)
+{
+	if (mali_global_num_l2s > index) {
+		return mali_global_l2s[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void)
+{
+	return mali_global_num_l2s;
+}
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (NULL == cache) {
+		return;
+	}
+
+	mali_l2_cache_lock(cache);
+
+	cache->last_invalidated_id = mali_scheduler_get_new_cache_order();
+	mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+				   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (NULL == cache) {
+		return;
+	}
+
+	/*
+	 * If the last cache invalidation was done by a job with a higher id we
+	 * don't have to flush. Since user space will store jobs w/ their
+	 * corresponding memory in sequence (first job #0, then job #1, ...),
+	 * we don't have to flush for job n-1 if job n has already invalidated
+	 * the cache since we know for sure that job n-1's memory was already
+	 * written when job n was started.
+	 */
+
+	mali_l2_cache_lock(cache);
+
+	if (((s32)id) > ((s32)cache->last_invalidated_id)) {
+		/* Set latest invalidated id to current "point in time" */
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+		mali_l2_cache_send_command(cache,
+					   MALI400_L2_CACHE_REGISTER_COMMAND,
+					   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_all(void)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		_mali_osk_errcode_t ret;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (MALI_TRUE != cache->power_is_on) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+
+		ret = mali_l2_cache_send_command(cache,
+						 MALI400_L2_CACHE_REGISTER_COMMAND,
+						 MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to invalidate cache\n"));
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		u32 j;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (MALI_TRUE != cache->power_is_on) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		for (j = 0; j < num_pages; j++) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_l2_cache_send_command(cache,
+							 MALI400_L2_CACHE_REGISTER_CLEAR_PAGE,
+							 pages[j]);
+			if (_MALI_OSK_ERR_OK != ret) {
+				MALI_PRINT_ERROR(("Failed to invalidate cache (page)\n"));
+			}
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+/* -------- local helper functions below -------- */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache)
+{
+    MALI_DEBUG_ASSERT_POINTER(cache);
+    MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+    /* Kasin Added, skip off power domain. */
+    if (cache && cache->pm_domain && cache->pm_domain->power_is_on == MALI_FALSE) {
+        printk("===========%s, %d skip off power domain?\n", __FUNCTION__, __LINE__);
+    }
+
+
+    /* Invalidate cache (just to keep it in a known state at startup) */
+    mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+            MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+    /* Enable cache */
+    mali_hw_core_register_write(&cache->hw_core,
+            MALI400_L2_CACHE_REGISTER_ENABLE,
+            (u32)MALI400_L2_CACHE_ENABLE_ACCESS |
+            (u32)MALI400_L2_CACHE_ENABLE_READ_ALLOCATE);
+
+	if (MALI400_L2_MAX_READS_NOT_SET != mali_l2_max_reads) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_MAX_READS,
+					    (u32)mali_l2_max_reads);
+	}
+
+	/* Restart any performance counters (if enabled) */
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0,
+					    cache->counter_src0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1,
+					    cache->counter_src1);
+	}
+}
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val)
+{
+	int i = 0;
+	const int loop_count = 100000;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+	/*
+	 * First, wait for L2 cache command handler to go idle.
+	 * (Commands received while processing another command will be ignored)
+	 */
+	for (i = 0; i < loop_count; i++) {
+		if (!(mali_hw_core_register_read(&cache->hw_core,
+						 MALI400_L2_CACHE_REGISTER_STATUS) &
+		      (u32)MALI400_L2_CACHE_STATUS_COMMAND_BUSY)) {
+			break;
+		}
+	}
+
+	if (i == loop_count) {
+		MALI_DEBUG_PRINT(1, ("Mali L2 cache: aborting wait for command interface to go idle\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* then issue the command */
+	mali_hw_core_register_write(&cache->hw_core, reg, val);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r5p1/common/mali_l2_cache.h b/utgard/r5p1/common/mali_l2_cache.h
new file mode 100755
index 0000000..6dc8ec2
--- /dev/null
+++ b/utgard/r5p1/common/mali_l2_cache.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_L2_CACHE_H__
+#define __MALI_KERNEL_L2_CACHE_H__
+
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+#define MALI_MAX_NUMBER_OF_L2_CACHE_CORES  3
+/* Maximum 1 GP and 4 PP for an L2 cache core (Mali-400 MP4) */
+#define MALI_MAX_NUMBER_OF_GROUPS_PER_L2_CACHE 5
+
+/**
+ * Definition of the L2 cache core struct
+ * Used to track a L2 cache unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_l2_cache_core {
+	/* Common HW core functionality */
+	struct mali_hw_core hw_core;
+
+	/* Synchronize L2 cache access */
+	_mali_osk_spinlock_irq_t *lock;
+
+	/* Unique core ID */
+	u32 core_id;
+
+	/* The power domain this L2 cache belongs to */
+	struct mali_pm_domain *pm_domain;
+
+	/* MALI_TRUE if power is on for this L2 cache */
+	mali_bool power_is_on;
+
+	/* A "timestamp" to avoid unnecessary flushes */
+	u32 last_invalidated_id;
+
+	/* Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src0;
+
+	/* Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src1;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value0_base;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value1_base;
+
+	/* Used by PM domains to link L2 caches of same domain */
+	_mali_osk_list_t pm_domain_list;
+};
+
+_mali_osk_errcode_t mali_l2_cache_initialize(void);
+void mali_l2_cache_terminate(void);
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index);
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_get_id(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->core_id;
+}
+
+MALI_STATIC_INLINE struct mali_pm_domain *mali_l2_cache_get_pm_domain(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->pm_domain;
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache);
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache);
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src0(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src1(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src1;
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1);
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index);
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void);
+
+struct mali_group *mali_l2_cache_get_group(
+	struct mali_l2_cache_core *cache, u32 index);
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache);
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id);
+
+void mali_l2_cache_invalidate_all(void);
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages);
+
+#endif /* __MALI_KERNEL_L2_CACHE_H__ */
diff --git a/utgard/r5p1/common/mali_mem_validation.c b/utgard/r5p1/common/mali_mem_validation.c
new file mode 100644
index 0000000..d9d3f5e
--- /dev/null
+++ b/utgard/r5p1/common/mali_mem_validation.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_mem_validation.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#define MALI_INVALID_MEM_ADDR 0xFFFFFFFF
+
+typedef struct {
+	u32 phys_base;        /**< Mali physical base of the memory, page aligned */
+	u32 size;             /**< size in bytes of the memory, multiple of page size */
+} _mali_mem_validation_t;
+
+static _mali_mem_validation_t mali_mem_validator = { MALI_INVALID_MEM_ADDR, MALI_INVALID_MEM_ADDR };
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size)
+{
+	/* Check that no other MEM_VALIDATION resources exist */
+	if (MALI_INVALID_MEM_ADDR != mali_mem_validator.phys_base) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; another range is already specified\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Check restrictions on page alignment */
+	if ((0 != (start & (~_MALI_OSK_CPU_PAGE_MASK))) ||
+	    (0 != (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; incorrect alignment\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_mem_validator.phys_base = start;
+	mali_mem_validator.size = size;
+	MALI_DEBUG_PRINT(2, ("Memory Validator installed for Mali physical address base=0x%08X, size=0x%08X\n",
+			     mali_mem_validator.phys_base, mali_mem_validator.size));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size)
+{
+#if 0
+	if (phys_addr < (phys_addr + size)) { /* Don't allow overflow (or zero size) */
+		if ((0 == (phys_addr & (~_MALI_OSK_CPU_PAGE_MASK))) &&
+		    (0 == (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+			if ((phys_addr          >= mali_mem_validator.phys_base) &&
+			    ((phys_addr + (size - 1)) >= mali_mem_validator.phys_base) &&
+			    (phys_addr          <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1))) &&
+			    ((phys_addr + (size - 1)) <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1)))) {
+				MALI_DEBUG_PRINT(3, ("Accepted range 0x%08X + size 0x%08X (= 0x%08X)\n", phys_addr, size, (phys_addr + size - 1)));
+				return _MALI_OSK_ERR_OK;
+			}
+		}
+	}
+
+	MALI_PRINT_ERROR(("MALI PHYSICAL RANGE VALIDATION ERROR: The range supplied was: phys_base=0x%08X, size=0x%08X\n", phys_addr, size));
+
+	return _MALI_OSK_ERR_FAULT;
+#else
+	return _MALI_OSK_ERR_OK;
+#endif
+}
diff --git a/utgard/r5p1/common/mali_mem_validation.h b/utgard/r5p1/common/mali_mem_validation.h
new file mode 100644
index 0000000..2677206
--- /dev/null
+++ b/utgard/r5p1/common/mali_mem_validation.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2011-2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEM_VALIDATION_H__
+#define __MALI_MEM_VALIDATION_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size);
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size);
+
+#endif /* __MALI_MEM_VALIDATION_H__ */
diff --git a/utgard/r5p1/common/mali_mmu.c b/utgard/r5p1/common/mali_mmu.c
new file mode 100755
index 0000000..fac1f7c
--- /dev/null
+++ b/utgard/r5p1/common/mali_mmu.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_ukk.h"
+
+#include "mali_mmu.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_mmu_page_directory.h"
+
+/**
+ * Size of the MMU registers in bytes
+ */
+#define MALI_MMU_REGISTERS_SIZE 0x24
+
+/**
+ * MMU commands
+ * These are the commands that can be sent
+ * to the MMU unit.
+ */
+typedef enum mali_mmu_command {
+	MALI_MMU_COMMAND_ENABLE_PAGING = 0x00, /**< Enable paging (memory translation) */
+	MALI_MMU_COMMAND_DISABLE_PAGING = 0x01, /**< Disable paging (memory translation) */
+	MALI_MMU_COMMAND_ENABLE_STALL = 0x02, /**<  Enable stall on page fault */
+	MALI_MMU_COMMAND_DISABLE_STALL = 0x03, /**< Disable stall on page fault */
+	MALI_MMU_COMMAND_ZAP_CACHE = 0x04, /**< Zap the entire page table cache */
+	MALI_MMU_COMMAND_PAGE_FAULT_DONE = 0x05, /**< Page fault processed */
+	MALI_MMU_COMMAND_HARD_RESET = 0x06 /**< Reset the MMU back to power-on settings */
+} mali_mmu_command;
+
+static void mali_mmu_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data);
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu);
+
+/* page fault queue flush helper pages
+ * note that the mapping pointers are currently unused outside of the initialization functions */
+static mali_dma_addr mali_page_fault_flush_page_directory = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_directory_mapping = NULL;
+static mali_dma_addr mali_page_fault_flush_page_table = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_table_mapping = NULL;
+static mali_dma_addr mali_page_fault_flush_data_page = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_data_page_mapping = NULL;
+
+/* an empty page directory (no address valid) which is active on any MMU not currently marked as in use */
+static mali_dma_addr mali_empty_page_directory_phys   = MALI_INVALID_PAGE;
+static mali_io_address mali_empty_page_directory_virt = NULL;
+
+
+_mali_osk_errcode_t mali_mmu_initialize(void)
+{
+	/* allocate the helper pages */
+	mali_empty_page_directory_phys = mali_allocate_empty_page(&mali_empty_page_directory_virt);
+	if (0 == mali_empty_page_directory_phys) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate empty page directory.\n"));
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_create_fault_flush_pages(&mali_page_fault_flush_page_directory,
+			&mali_page_fault_flush_page_directory_mapping,
+			&mali_page_fault_flush_page_table,
+			&mali_page_fault_flush_page_table_mapping,
+			&mali_page_fault_flush_data_page,
+			&mali_page_fault_flush_data_page_mapping)) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate fault flush pages\n"));
+		mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		mali_empty_page_directory_virt = NULL;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mmu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali MMU: terminating\n"));
+
+	/* Free global helper pages */
+	mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+	mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+	mali_empty_page_directory_virt = NULL;
+
+	/* Free the page fault flush pages */
+	mali_destroy_fault_flush_pages(&mali_page_fault_flush_page_directory,
+				       &mali_page_fault_flush_page_directory_mapping,
+				       &mali_page_fault_flush_page_table,
+				       &mali_page_fault_flush_page_table_mapping,
+				       &mali_page_fault_flush_data_page,
+				       &mali_page_fault_flush_data_page_mapping);
+}
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual)
+{
+	struct mali_mmu_core *mmu = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+
+	MALI_DEBUG_PRINT(2, ("Mali MMU: Creating Mali MMU: %s\n", resource->description));
+
+	mmu = _mali_osk_calloc(1, sizeof(struct mali_mmu_core));
+	if (NULL != mmu) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&mmu->hw_core, resource, MALI_MMU_REGISTERS_SIZE)) {
+			if (_MALI_OSK_ERR_OK == mali_group_add_mmu_core(group, mmu)) {
+				if (is_virtual) {
+					/* Skip reset and IRQ setup for virtual MMU */
+					return mmu;
+				}
+
+				if (_MALI_OSK_ERR_OK == mali_mmu_reset(mmu)) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					mmu->irq = _mali_osk_irq_init(resource->irq,
+								      mali_group_upper_half_mmu,
+								      group,
+								      mali_mmu_probe_trigger,
+								      mali_mmu_probe_ack,
+								      mmu,
+								      resource->description);
+					if (NULL != mmu->irq) {
+						return mmu;
+					} else {
+						MALI_PRINT_ERROR(("Mali MMU: Failed to setup interrupt handlers for MMU %s\n", mmu->hw_core.description));
+					}
+				}
+				mali_group_remove_mmu_core(group);
+			} else {
+				MALI_PRINT_ERROR(("Mali MMU: Failed to add core %s to group\n", mmu->hw_core.description));
+			}
+			mali_hw_core_delete(&mmu->hw_core);
+		}
+
+		_mali_osk_free(mmu);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for MMU\n"));
+	}
+
+	return NULL;
+}
+
+void mali_mmu_delete(struct mali_mmu_core *mmu)
+{
+	if (NULL != mmu->irq) {
+		_mali_osk_irq_term(mmu->irq);
+	}
+
+	mali_hw_core_delete(&mmu->hw_core);
+	_mali_osk_free(mmu);
+}
+
+static void mali_mmu_enable_paging(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_PAGING);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS) & MALI_MMU_STATUS_BIT_PAGING_ENABLED) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Enable paging request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+	}
+}
+
+/**
+ * Issues the enable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ * @return MALI_TRUE if HW stall was successfully engaged, otherwise MALI_FALSE (req timed out)
+ */
+static mali_bool mali_mmu_enable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(4, ("MMU stall is implicit when Paging is not enabled.\n"));
+		return MALI_TRUE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(3, ("Aborting MMU stall request since it is in pagefault state.\n"));
+		return MALI_FALSE;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if ((mmu_status & MALI_MMU_STATUS_BIT_STALL_ACTIVE) && (0 == (mmu_status & MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE))) {
+			break;
+		}
+		if (0 == (mmu_status & (MALI_MMU_STATUS_BIT_PAGING_ENABLED))) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_DEBUG_PRINT(2, ("Enable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return MALI_FALSE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU stall request since it has a pagefault.\n"));
+		return MALI_FALSE;
+	}
+
+	return MALI_TRUE;
+}
+
+/**
+ * Issues the disable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ */
+static void mali_mmu_disable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(3, ("MMU disable skipped since it was not enabled.\n"));
+		return;
+	}
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU disable stall request since it is in pagefault state.\n"));
+		return;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_DISABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		u32 status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (0 == (status & MALI_MMU_STATUS_BIT_STALL_ACTIVE)) {
+			break;
+		}
+		if (status &  MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) MALI_DEBUG_PRINT(1, ("Disable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(4, ("Mali MMU: %s: Leaving page fault mode\n", mmu->hw_core.description));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_PAGE_FAULT_DONE);
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, 0xCAFEBABE);
+	MALI_DEBUG_ASSERT(0xCAFEB000 == mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_HARD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR) == 0) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Reset request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	if (!stall_success) {
+		err = _MALI_OSK_ERR_BUSY;
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali MMU: mali_kernel_mmu_reset: %s\n", mmu->hw_core.description));
+
+	if (_MALI_OSK_ERR_OK == mali_mmu_raw_reset(mmu)) {
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+		/* no session is active, so just activate the empty page directory */
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, mali_empty_page_directory_phys);
+		mali_mmu_enable_paging(mmu);
+		err = _MALI_OSK_ERR_OK;
+	}
+	mali_mmu_disable_stall(mmu);
+
+	return err;
+}
+
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success = mali_mmu_enable_stall(mmu);
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+	if (MALI_FALSE == stall_success) {
+		/* False means that it is in Pagefault state. Not possible to disable_stall then */
+		return MALI_FALSE;
+	}
+
+	mali_mmu_disable_stall(mmu);
+	return MALI_TRUE;
+}
+
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+}
+
+
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_ZAP_ONE_LINE, MALI_MMU_PDE_ENTRY(mali_address));
+}
+
+static void mali_mmu_activate_address_space(struct mali_mmu_core *mmu, u32 page_directory)
+{
+	/* The MMU must be in stalled or page fault mode, for this writing to work */
+	MALI_DEBUG_ASSERT(0 != (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)
+				& (MALI_MMU_STATUS_BIT_STALL_ACTIVE | MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE)));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, page_directory);
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+}
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir)
+{
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(5, ("Asked to activate page directory 0x%x on MMU %s\n", pagedir, mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+	mali_mmu_activate_address_space(mmu, pagedir->page_directory);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+	MALI_DEBUG_PRINT(3, ("Activating the empty page directory on MMU %s\n", mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+
+	/* This function can only be called when the core is idle, so it could not fail. */
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+
+	mali_mmu_activate_address_space(mmu, mali_empty_page_directory_phys);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(3, ("Activating the page fault flush page directory on MMU %s\n", mmu->hw_core.description));
+	stall_success = mali_mmu_enable_stall(mmu);
+	/* This function is expect to fail the stalling, since it might be in PageFault mode when it is called */
+	mali_mmu_activate_address_space(mmu, mali_page_fault_flush_page_directory);
+	if (MALI_TRUE == stall_success) mali_mmu_disable_stall(mmu);
+}
+
+/* Is called when we want the mmu to give an interrupt */
+static void mali_mmu_probe_trigger(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+}
+
+/* Is called when the irq probe wants the mmu to acknowledge an interrupt from the hw */
+extern int mali_page_fault;
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	u32 int_stat;
+
+	int_stat = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+
+	MALI_DEBUG_PRINT(2, ("mali_mmu_probe_irq_acknowledge: intstat 0x%x\n", int_stat));
+	if (int_stat & MALI_MMU_INTERRUPT_PAGE_FAULT) {
+		MALI_DEBUG_PRINT(2, ("Probe: Page fault detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_PAGE_FAULT);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Page fault detect: FAILED\n"));
+		mali_page_fault++;
+	}
+
+	if (int_stat & MALI_MMU_INTERRUPT_READ_BUS_ERROR) {
+		MALI_DEBUG_PRINT(2, ("Probe: Bus read error detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Bus read error detect: FAILED\n"));
+		mali_page_fault++;
+	}
+
+	if ((int_stat & (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) ==
+	    (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+#if 0
+void mali_mmu_print_state(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(2, ("MMU: State of %s is 0x%08x\n", mmu->hw_core.description, mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+#endif
diff --git a/utgard/r5p1/common/mali_mmu.h b/utgard/r5p1/common/mali_mmu.h
new file mode 100755
index 0000000..101c968
--- /dev/null
+++ b/utgard/r5p1/common/mali_mmu.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_H__
+#define __MALI_MMU_H__
+
+#include "mali_osk.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_hw_core.h"
+
+/* Forward declaration from mali_group.h */
+struct mali_group;
+
+/**
+ * MMU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_mmu_register {
+	MALI_MMU_REGISTER_DTE_ADDR = 0x0000, /**< Current Page Directory Pointer */
+	MALI_MMU_REGISTER_STATUS = 0x0004, /**< Status of the MMU */
+	MALI_MMU_REGISTER_COMMAND = 0x0008, /**< Command register, used to control the MMU */
+	MALI_MMU_REGISTER_PAGE_FAULT_ADDR = 0x000C, /**< Logical address of the last page fault */
+	MALI_MMU_REGISTER_ZAP_ONE_LINE = 0x010, /**< Used to invalidate the mapping of a single page from the MMU */
+	MALI_MMU_REGISTER_INT_RAWSTAT = 0x0014, /**< Raw interrupt status, all interrupts visible */
+	MALI_MMU_REGISTER_INT_CLEAR = 0x0018, /**< Indicate to the MMU that the interrupt has been received */
+	MALI_MMU_REGISTER_INT_MASK = 0x001C, /**< Enable/disable types of interrupts */
+	MALI_MMU_REGISTER_INT_STATUS = 0x0020 /**< Interrupt status based on the mask */
+} mali_mmu_register;
+
+/**
+ * MMU interrupt register bits
+ * Each cause of the interrupt is reported
+ * through the (raw) interrupt status registers.
+ * Multiple interrupts can be pending, so multiple bits
+ * can be set at once.
+ */
+typedef enum mali_mmu_interrupt {
+	MALI_MMU_INTERRUPT_PAGE_FAULT = 0x01, /**< A page fault occured */
+	MALI_MMU_INTERRUPT_READ_BUS_ERROR = 0x02 /**< A bus read error occured */
+} mali_mmu_interrupt;
+
+typedef enum mali_mmu_status_bits {
+	MALI_MMU_STATUS_BIT_PAGING_ENABLED      = 1 << 0,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE   = 1 << 1,
+	MALI_MMU_STATUS_BIT_STALL_ACTIVE        = 1 << 2,
+	MALI_MMU_STATUS_BIT_IDLE                = 1 << 3,
+	MALI_MMU_STATUS_BIT_REPLAY_BUFFER_EMPTY = 1 << 4,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_IS_WRITE = 1 << 5,
+	MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE    = 1 << 31,
+} mali_mmu_status_bits;
+
+/**
+ * Definition of the MMU struct
+ * Used to track a MMU unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_mmu_core {
+	struct mali_hw_core hw_core; /**< Common for all HW cores */
+	_mali_osk_irq_t *irq;        /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_mmu_initialize(void);
+
+void mali_mmu_terminate(void);
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual);
+void mali_mmu_delete(struct mali_mmu_core *mmu);
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu);
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu);
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu);
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address);
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir);
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu);
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu);
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_mmu_get_interrupt_result(struct mali_mmu_core *mmu)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+	if (0 == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	}
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+
+MALI_STATIC_INLINE u32 mali_mmu_get_int_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_rawstat(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE void mali_mmu_mask_all_interrupts(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, 0);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_page_fault_addr(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_PAGE_FAULT_ADDR);
+}
+
+#endif /* __MALI_MMU_H__ */
diff --git a/utgard/r5p1/common/mali_mmu_page_directory.c b/utgard/r5p1/common/mali_mmu_page_directory.c
new file mode 100755
index 0000000..a4f4e58
--- /dev/null
+++ b/utgard/r5p1/common/mali_mmu_page_directory.c
@@ -0,0 +1,484 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_memory.h"
+#include "mali_l2_cache.h"
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data);
+
+u32 mali_allocate_empty_page(mali_io_address *virt_addr)
+{
+	_mali_osk_errcode_t err;
+	mali_io_address mapping;
+	mali_dma_addr address;
+
+	if (_MALI_OSK_ERR_OK != mali_mmu_get_table_page(&address, &mapping)) {
+		/* Allocation failed */
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to get table page for empty pgdir\n"));
+		return 0;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	err = fill_page(mapping, 0);
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_mmu_release_table_page(address, mapping);
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to zero page\n"));
+		return 0;
+	}
+
+	*virt_addr = mapping;
+	return address;
+}
+
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr)
+{
+	if (MALI_INVALID_PAGE != address) {
+		mali_mmu_release_table_page(address, virt_addr);
+	}
+}
+
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	_mali_osk_errcode_t err;
+
+	err = mali_mmu_get_table_page(data_page, data_page_mapping);
+	if (_MALI_OSK_ERR_OK == err) {
+		err = mali_mmu_get_table_page(page_table, page_table_mapping);
+		if (_MALI_OSK_ERR_OK == err) {
+			err = mali_mmu_get_table_page(page_directory, page_directory_mapping);
+			if (_MALI_OSK_ERR_OK == err) {
+				fill_page(*data_page_mapping, 0);
+				fill_page(*page_table_mapping, *data_page | MALI_MMU_FLAGS_DEFAULT);
+				fill_page(*page_directory_mapping, *page_table | MALI_MMU_FLAGS_PRESENT);
+				MALI_SUCCESS;
+			}
+			mali_mmu_release_table_page(*page_table, *page_table_mapping);
+			*page_table = MALI_INVALID_PAGE;
+		}
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+	}
+	return err;
+}
+
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	if (MALI_INVALID_PAGE != *page_directory) {
+		mali_mmu_release_table_page(*page_directory, *page_directory_mapping);
+		*page_directory = MALI_INVALID_PAGE;
+		*page_directory_mapping = NULL;
+	}
+
+	if (MALI_INVALID_PAGE != *page_table) {
+		mali_mmu_release_table_page(*page_table, *page_table_mapping);
+		*page_table = MALI_INVALID_PAGE;
+		*page_table_mapping = NULL;
+	}
+
+	if (MALI_INVALID_PAGE != *data_page) {
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+		*data_page_mapping = NULL;
+	}
+}
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data)
+{
+	int i;
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	for (i = 0; i < MALI_MMU_PAGE_SIZE / 4; i++) {
+		_mali_osk_mem_iowrite32_relaxed(mapping, i * sizeof(u32), data);
+	}
+	_mali_osk_mem_barrier();
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	_mali_osk_errcode_t err;
+	mali_io_address pde_mapping;
+	mali_dma_addr pde_phys;
+	int i;
+
+	if (last_pde < first_pde)
+		return _MALI_OSK_ERR_INVALID_ARGS;
+
+	for (i = first_pde; i <= last_pde; i++) {
+		if (0 == (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+						 i * sizeof(u32)) & MALI_MMU_FLAGS_PRESENT)) {
+			/* Page table not present */
+			MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]);
+			MALI_DEBUG_ASSERT(NULL == pagedir->page_entries_mapped[i]);
+
+			err = mali_mmu_get_table_page(&pde_phys, &pde_mapping);
+			if (_MALI_OSK_ERR_OK != err) {
+				MALI_PRINT_ERROR(("Failed to allocate page table page.\n"));
+				return err;
+			}
+			pagedir->page_entries_mapped[i] = pde_mapping;
+
+			/* Update PDE, mark as present */
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32),
+							pde_phys | MALI_MMU_FLAGS_PRESENT);
+
+			MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]);
+			pagedir->page_entries_usage_count[i] = 1;
+		} else {
+			pagedir->page_entries_usage_count[i]++;
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE void mali_mmu_zero_pte(mali_io_address page_table, u32 mali_address, u32 size)
+{
+	int i;
+	const int first_pte = MALI_MMU_PTE_ENTRY(mali_address);
+	const int last_pte = MALI_MMU_PTE_ENTRY(mali_address + size - 1);
+
+	for (i = first_pte; i <= last_pte; i++) {
+		_mali_osk_mem_iowrite32_relaxed(page_table, i * sizeof(u32), 0);
+	}
+}
+
+static u32 mali_page_directory_get_phys_address(struct mali_page_directory *pagedir, u32 index)
+{
+	return (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				       index * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK);
+}
+
+
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	u32 left = size;
+	int i;
+	mali_bool pd_changed = MALI_FALSE;
+	u32 pages_to_invalidate[3]; /* hard-coded to 3: max two pages from the PT level plus max one page from PD level */
+	u32 num_pages_inv = 0;
+	mali_bool invalidate_all = MALI_FALSE; /* safety mechanism in case page_entries_usage_count is unreliable */
+
+	/* For all page directory entries in range. */
+	for (i = first_pde; i <= last_pde; i++) {
+		u32 size_in_pde, offset;
+
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[i]);
+		MALI_DEBUG_ASSERT(0 != pagedir->page_entries_usage_count[i]);
+
+		/* Offset into page table, 0 if mali_address is 4MiB aligned */
+		offset = (mali_address & (MALI_MMU_VIRTUAL_PAGE_SIZE - 1));
+		if (left < MALI_MMU_VIRTUAL_PAGE_SIZE - offset) {
+			size_in_pde = left;
+		} else {
+			size_in_pde = MALI_MMU_VIRTUAL_PAGE_SIZE - offset;
+		}
+
+		pagedir->page_entries_usage_count[i]--;
+
+		/* If entire page table is unused, free it */
+		if (0 == pagedir->page_entries_usage_count[i]) {
+			u32 page_phys;
+			void *page_virt;
+			MALI_DEBUG_PRINT(4, ("Releasing page table as this is the last reference\n"));
+			/* last reference removed, no need to zero out each PTE  */
+
+			page_phys = MALI_MMU_ENTRY_ADDRESS(_mali_osk_mem_ioread32(pagedir->page_directory_mapped, i * sizeof(u32)));
+			page_virt = pagedir->page_entries_mapped[i];
+			pagedir->page_entries_mapped[i] = NULL;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+
+			mali_mmu_release_table_page(page_phys, page_virt);
+			pd_changed = MALI_TRUE;
+		} else {
+			MALI_DEBUG_ASSERT(num_pages_inv < 2);
+			if (num_pages_inv < 2) {
+				pages_to_invalidate[num_pages_inv] = mali_page_directory_get_phys_address(pagedir, i);
+				num_pages_inv++;
+			} else {
+				invalidate_all = MALI_TRUE;
+			}
+
+			/* If part of the page table is still in use, zero the relevant PTEs */
+			mali_mmu_zero_pte(pagedir->page_entries_mapped[i], mali_address, size_in_pde);
+		}
+
+		left -= size_in_pde;
+		mali_address += size_in_pde;
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* L2 pages invalidation */
+	if (MALI_TRUE == pd_changed) {
+		MALI_DEBUG_ASSERT(num_pages_inv < 3);
+		if (num_pages_inv < 3) {
+			pages_to_invalidate[num_pages_inv] = pagedir->page_directory;
+			num_pages_inv++;
+		} else {
+			invalidate_all = MALI_TRUE;
+		}
+	}
+
+	if (invalidate_all) {
+		mali_l2_cache_invalidate_all();
+	} else {
+		mali_l2_cache_invalidate_all_pages(pages_to_invalidate, num_pages_inv);
+	}
+
+	MALI_SUCCESS;
+}
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void)
+{
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	mali_dma_addr phys;
+
+	pagedir = _mali_osk_calloc(1, sizeof(struct mali_page_directory));
+	if (NULL == pagedir) {
+		return NULL;
+	}
+
+	err = mali_mmu_get_table_page(&phys, &pagedir->page_directory_mapped);
+	if (_MALI_OSK_ERR_OK != err) {
+		_mali_osk_free(pagedir);
+		return NULL;
+	}
+
+	pagedir->page_directory = (u32)phys;
+
+	/* Zero page directory */
+	fill_page(pagedir->page_directory_mapped, 0);
+
+	return pagedir;
+}
+
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir)
+{
+	const int num_page_table_entries = sizeof(pagedir->page_entries_mapped) / sizeof(pagedir->page_entries_mapped[0]);
+	int i;
+
+	/* Free referenced page tables and zero PDEs. */
+	for (i = 0; i < num_page_table_entries; i++) {
+		if (pagedir->page_directory_mapped && (_mali_osk_mem_ioread32(
+				pagedir->page_directory_mapped,
+				sizeof(u32)*i) & MALI_MMU_FLAGS_PRESENT)) {
+			mali_dma_addr phys = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+					     i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+			mali_mmu_release_table_page(phys, pagedir->page_entries_mapped[i]);
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* Free the page directory page. */
+	mali_mmu_release_table_page(pagedir->page_directory, pagedir->page_directory_mapped);
+
+	_mali_osk_free(pagedir);
+}
+
+
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits)
+{
+	u32 end_address = mali_address + size;
+	u32 mali_phys = (u32)phys_address;
+
+	/* Map physical pages into MMU page tables */
+	for (; mali_address < end_address; mali_address += MALI_MMU_PAGE_SIZE, mali_phys += MALI_MMU_PAGE_SIZE) {
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)]);
+		_mali_osk_mem_iowrite32_relaxed(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)],
+						MALI_MMU_PTE_ENTRY(mali_address) * sizeof(u32),
+						mali_phys | permission_bits);
+	}
+}
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr)
+{
+#if defined(DEBUG)
+	u32 pde_index, pte_index;
+	u32 pde, pte;
+
+	pde_index = MALI_MMU_PDE_ENTRY(fault_addr);
+	pte_index = MALI_MMU_PTE_ENTRY(fault_addr);
+
+
+	pde = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				     pde_index * sizeof(u32));
+
+
+	if (pde & MALI_MMU_FLAGS_PRESENT) {
+		u32 pte_addr = MALI_MMU_ENTRY_ADDRESS(pde);
+
+		pte = _mali_osk_mem_ioread32(pagedir->page_entries_mapped[pde_index],
+					     pte_index * sizeof(u32));
+
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table present: %08x\n"
+				     "\t\tPTE: %08x, page %08x is %s\n",
+				     fault_addr, pte_addr, pte,
+				     MALI_MMU_ENTRY_ADDRESS(pte),
+				     pte & MALI_MMU_FLAGS_DEFAULT ? "rw" : "not present"));
+	} else {
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table not present: %08x\n",
+				     fault_addr, pde));
+	}
+#else
+	MALI_IGNORE(pagedir);
+	MALI_IGNORE(fault_addr);
+#endif
+}
+
+/* For instrumented */
+struct dump_info {
+	u32 buffer_left;
+	u32 register_writes_size;
+	u32 page_table_dump_size;
+	u32 *buffer;
+};
+
+static _mali_osk_errcode_t writereg(u32 where, u32 what, const char *comment, struct dump_info *info)
+{
+	if (NULL != info) {
+		info->register_writes_size += sizeof(u32) * 2; /* two 32-bit words */
+
+		if (NULL != info->buffer) {
+			/* check that we have enough space */
+			if (info->buffer_left < sizeof(u32) * 2) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = where;
+			info->buffer++;
+
+			*info->buffer = what;
+			info->buffer++;
+
+			info->buffer_left -= sizeof(u32) * 2;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t mali_mmu_dump_page(mali_io_address page, u32 phys_addr, struct dump_info *info)
+{
+	if (NULL != info) {
+		/* 4096 for the page and 4 bytes for the address */
+		const u32 page_size_in_elements = MALI_MMU_PAGE_SIZE / 4;
+		const u32 page_size_in_bytes = MALI_MMU_PAGE_SIZE;
+		const u32 dump_size_in_bytes = MALI_MMU_PAGE_SIZE + 4;
+
+		info->page_table_dump_size += dump_size_in_bytes;
+
+		if (NULL != info->buffer) {
+			if (info->buffer_left < dump_size_in_bytes) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = phys_addr;
+			info->buffer++;
+
+			_mali_osk_memcpy(info->buffer, page, page_size_in_bytes);
+			info->buffer += page_size_in_elements;
+
+			info->buffer_left -= dump_size_in_bytes;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_page_table(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_DEBUG_ASSERT_POINTER(pagedir);
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	if (NULL != pagedir->page_directory_mapped) {
+		int i;
+
+		MALI_CHECK_NO_ERROR(
+			mali_mmu_dump_page(pagedir->page_directory_mapped, pagedir->page_directory, info)
+		);
+
+		for (i = 0; i < 1024; i++) {
+			if (NULL != pagedir->page_entries_mapped[i]) {
+				MALI_CHECK_NO_ERROR(
+					mali_mmu_dump_page(pagedir->page_entries_mapped[i],
+							   _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+									   i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK, info)
+				);
+			}
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_registers(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_CHECK_NO_ERROR(writereg(0x00000000, pagedir->page_directory,
+				     "set the page directory address", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 4, "zap???", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 0, "enable paging", info));
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+	args->size = info.register_writes_size + info.page_table_dump_size;
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+
+	info.buffer_left = args->size;
+	info.buffer = (u32 *)(uintptr_t)args->buffer;
+
+	args->register_writes = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+
+	args->page_table_dump = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+
+	args->register_writes_size = info.register_writes_size;
+	args->page_table_dump_size = info.page_table_dump_size;
+
+	MALI_SUCCESS;
+}
diff --git a/utgard/r5p1/common/mali_mmu_page_directory.h b/utgard/r5p1/common/mali_mmu_page_directory.h
new file mode 100755
index 0000000..561fb60
--- /dev/null
+++ b/utgard/r5p1/common/mali_mmu_page_directory.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_PAGE_DIRECTORY_H__
+#define __MALI_MMU_PAGE_DIRECTORY_H__
+
+#include "mali_osk.h"
+
+/**
+ * Size of an MMU page in bytes
+ */
+#define MALI_MMU_PAGE_SIZE 0x1000
+
+/*
+ * Size of the address space referenced by a page table page
+ */
+#define MALI_MMU_VIRTUAL_PAGE_SIZE 0x400000 /* 4 MiB */
+
+/**
+ * Page directory index from address
+ * Calculates the page directory index from the given address
+ */
+#define MALI_MMU_PDE_ENTRY(address) (((address)>>22) & 0x03FF)
+
+/**
+ * Page table index from address
+ * Calculates the page table index from the given address
+ */
+#define MALI_MMU_PTE_ENTRY(address) (((address)>>12) & 0x03FF)
+
+/**
+ * Extract the memory address from an PDE/PTE entry
+ */
+#define MALI_MMU_ENTRY_ADDRESS(value) ((value) & 0xFFFFFC00)
+
+#define MALI_INVALID_PAGE ((u32)(~0))
+
+/**
+ *
+ */
+typedef enum mali_mmu_entry_flags {
+	MALI_MMU_FLAGS_PRESENT = 0x01,
+	MALI_MMU_FLAGS_READ_PERMISSION = 0x02,
+	MALI_MMU_FLAGS_WRITE_PERMISSION = 0x04,
+	MALI_MMU_FLAGS_OVERRIDE_CACHE  = 0x8,
+	MALI_MMU_FLAGS_WRITE_CACHEABLE  = 0x10,
+	MALI_MMU_FLAGS_WRITE_ALLOCATE  = 0x20,
+	MALI_MMU_FLAGS_WRITE_BUFFERABLE  = 0x40,
+	MALI_MMU_FLAGS_READ_CACHEABLE  = 0x80,
+	MALI_MMU_FLAGS_READ_ALLOCATE  = 0x100,
+	MALI_MMU_FLAGS_MASK = 0x1FF,
+} mali_mmu_entry_flags;
+
+
+#define MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE ( \
+		MALI_MMU_FLAGS_PRESENT | \
+		MALI_MMU_FLAGS_READ_PERMISSION |  \
+		MALI_MMU_FLAGS_WRITE_PERMISSION | \
+		MALI_MMU_FLAGS_OVERRIDE_CACHE | \
+		MALI_MMU_FLAGS_WRITE_CACHEABLE | \
+		MALI_MMU_FLAGS_WRITE_BUFFERABLE | \
+		MALI_MMU_FLAGS_READ_CACHEABLE | \
+		MALI_MMU_FLAGS_READ_ALLOCATE )
+
+#define MALI_MMU_FLAGS_DEFAULT ( \
+				 MALI_MMU_FLAGS_PRESENT | \
+				 MALI_MMU_FLAGS_READ_PERMISSION |  \
+				 MALI_MMU_FLAGS_WRITE_PERMISSION )
+
+
+struct mali_page_directory {
+	u32 page_directory; /**< Physical address of the memory session's page directory */
+	mali_io_address page_directory_mapped; /**< Pointer to the mapped version of the page directory into the kernel's address space */
+
+	mali_io_address page_entries_mapped[1024]; /**< Pointers to the page tables which exists in the page directory mapped into the kernel's address space */
+	u32   page_entries_usage_count[1024]; /**< Tracks usage count of the page table pages, so they can be releases on the last reference */
+};
+
+/* Map Mali virtual address space (i.e. ensure page tables exist for the virtual range)  */
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+
+/* Back virtual address space with actual pages. Assumes input is contiguous and 4k aligned. */
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits);
+
+u32 mali_allocate_empty_page(mali_io_address *virtual);
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr);
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void);
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir);
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr);
+
+#endif /* __MALI_MMU_PAGE_DIRECTORY_H__ */
diff --git a/utgard/r5p1/common/mali_osk.h b/utgard/r5p1/common/mali_osk.h
new file mode 100755
index 0000000..c48cfc0
--- /dev/null
+++ b/utgard/r5p1/common/mali_osk.h
@@ -0,0 +1,1345 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk.h
+ * Defines the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_H__
+#define __MALI_OSK_H__
+
+#include "mali_osk_types.h"
+#include "mali_osk_specific.h"           /* include any per-os specifics */
+#include "mali_osk_locks.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @addtogroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+#ifdef DEBUG
+/** @brief Macro for asserting that the current thread holds a given lock
+ */
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) MALI_DEBUG_ASSERT(_mali_osk_lock_get_owner((_mali_osk_lock_debug_t *)l) == _mali_osk_get_tid());
+
+/** @brief returns a lock's owner (thread id) if debugging is enabled
+ */
+#else
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) do {} while(0)
+#endif
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Find the containing structure of another structure
+ *
+ * This is the reverse of the operation 'offsetof'. This means that the
+ * following condition is satisfied:
+ *
+ *   ptr == _MALI_OSK_CONTAINER_OF( &ptr->member, type, member )
+ *
+ * When ptr is of type 'type'.
+ *
+ * Its purpose it to recover a larger structure that has wrapped a smaller one.
+ *
+ * @note no type or memory checking occurs to ensure that a wrapper structure
+ * does in fact exist, and that it is being recovered with respect to the
+ * correct member.
+ *
+ * @param ptr the pointer to the member that is contained within the larger
+ * structure
+ * @param type the type of the structure that contains the member
+ * @param member the name of the member in the structure that ptr points to.
+ * @return a pointer to a \a type object which contains \a member, as pointed
+ * to by \a ptr.
+ */
+#define _MALI_OSK_CONTAINER_OF(ptr, type, member) \
+	((type *)( ((char *)ptr) - offsetof(type,member) ))
+
+/** @addtogroup _mali_osk_wq
+ * @{ */
+
+/** @brief Initialize work queues (for deferred work)
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_wq_init(void);
+
+/** @brief Terminate work queues (for deferred work)
+ */
+void _mali_osk_wq_term(void);
+
+/** @brief Create work in the work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, \a handler will be called with \a data as the argument.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delete_work()
+ * when no longer needed.
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief A high priority version of \a _mali_osk_wq_create_work()
+ *
+ * Creates a work object which can be scheduled in the high priority work queue.
+ *
+ * This is unfortunately needed to get low latency scheduling of the Mali cores.  Normally we would
+ * schedule the next job in hw_irq or tasklet, but often we can't since we need to synchronously map
+ * and unmap shared memory when a job is connected to external fences (timelines). And this requires
+ * taking a mutex.
+ *
+ * We do signal a lot of other (low priority) work also as part of the job being finished, and if we
+ * don't set this Mali scheduling thread as high priority, we see that the CPU scheduler often runs
+ * random things instead of starting the next GPU job when the GPU is idle.  So setting the gpu
+ * scheduler to high priority does give a visually more responsive system.
+ *
+ * Start the high priority work with: \a _mali_osk_wq_schedule_work_high_pri()
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will flush the work queue to ensure that the work handler will not
+ * be called after deletion.
+ */
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the work handler
+ *
+ * _mali_osk_wq_schedule_work provides a mechanism for enqueuing deferred calls
+ * to the work handler. After calling \ref _mali_osk_wq_schedule_work(), the
+ * work handler will be scheduled to run at some point in the future.
+ *
+ * Typically this is called by the IRQ upper-half to defer further processing of
+ * IRQ-related work to the IRQ bottom-half handler. This is necessary for work
+ * that cannot be done in an IRQ context by the IRQ upper-half handler. Timer
+ * callbacks also use this mechanism, because they are treated as though they
+ * operate in an IRQ context. Refer to \ref _mali_osk_timer_t for more
+ * information.
+ *
+ * Code that operates in a kernel-process context (with no IRQ context
+ * restrictions) may also enqueue deferred calls to the IRQ bottom-half. The
+ * advantage over direct calling is that deferred calling allows the caller and
+ * IRQ bottom half to hold the same mutex, with a guarantee that they will not
+ * deadlock just by using this mechanism.
+ *
+ * _mali_osk_wq_schedule_work() places deferred call requests on a queue, to
+ * allow for more than one thread to make a deferred call. Therfore, if it is
+ * called 'K' times, then the IRQ bottom-half will be scheduled 'K' times too.
+ * 'K' is a number that is implementation-specific.
+ *
+ * _mali_osk_wq_schedule_work() is guaranteed to not block on:
+ * - enqueuing a deferred call request.
+ * - the completion of the work handler.
+ *
+ * This is to prevent deadlock. For example, if _mali_osk_wq_schedule_work()
+ * blocked, then it would cause a deadlock when the following two conditions
+ * hold:
+ * - The work handler callback (of type _mali_osk_wq_work_handler_t) locks
+ * a mutex
+ * - And, at the same time, the caller of _mali_osk_wq_schedule_work() also
+ * holds the same mutex
+ *
+ * @note care must be taken to not overflow the queue that
+ * _mali_osk_wq_schedule_work() operates on. Code must be structured to
+ * ensure that the number of requests made to the queue is bounded. Otherwise,
+ * work will be lost.
+ *
+ * The queue that _mali_osk_wq_schedule_work implements is a FIFO of N-writer,
+ * 1-reader type. The writers are the callers of _mali_osk_wq_schedule_work
+ * (all OSK-registered IRQ upper-half handlers in the system, watchdog timers,
+ * callers from a Kernel-process context). The reader is a single thread that
+ * handles all OSK-registered work.
+ *
+ * @param work a pointer to the _mali_osk_wq_work_t object corresponding to the
+ * work to begin processing.
+ */
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the high priority work handler
+ *
+ * Function is the same as \a _mali_osk_wq_schedule_work() with the only
+ * difference that it runs in a high (real time) priority on the system.
+ *
+ * Should only be used as a substitue for doing the same work in interrupts.
+ *
+ * This is allowed to sleep, but the work should be small since it will block
+ * all other applications.
+*/
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work);
+
+/** @brief Flush the work queue
+ *
+ * This will flush the OSK work queue, ensuring all work in the queue has
+ * completed before returning.
+ *
+ * Since this blocks on the completion of work in the work-queue, the
+ * caller of this function \b must \b not hold any mutexes that are taken by
+ * any registered work handler. To do so may cause a deadlock.
+ *
+ */
+void _mali_osk_wq_flush(void);
+
+/** @brief Create work in the delayed work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, a timer will be start and the \a handler will be called with
+ * \a data as the argument when timer out
+ *
+ * Refer to \ref _mali_osk_wq_delayed_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delayed_delete_work_nonflush()
+ * when no longer needed.
+ */
+_mali_osk_wq_delayed_work_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work without waiting for it to finish
+ *
+ * Note that the \a work callback function may still be running on return from
+ * _mali_osk_wq_delayed_cancel_work_async().
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work and wait for it to finish
+ *
+ * When this function returns, the \a work was either cancelled or it finished running.
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Put \a work task in global workqueue after delay
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue.
+ *
+ * If \a work was already on a queue, this function will return without doing anything
+ *
+ * @param work job to be done
+ * @param delay number of jiffies to wait or 0 for immediate execution
+ */
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay);
+
+/** @} */ /* end group _mali_osk_wq */
+
+
+/** @addtogroup _mali_osk_irq
+ * @{ */
+
+/** @brief Initialize IRQ handling for a resource
+ *
+ * Registers an interrupt handler \a uhandler for the given IRQ number \a irqnum.
+ * \a data will be passed as argument to the handler when an interrupt occurs.
+ *
+ * If \a irqnum is -1, _mali_osk_irq_init will probe for the IRQ number using
+ * the supplied \a trigger_func and \a ack_func. These functions will also
+ * receive \a data as their argument.
+ *
+ * @param irqnum The IRQ number that the resource uses, as seen by the CPU.
+ * The value -1 has a special meaning which indicates the use of probing, and
+ * trigger_func and ack_func must be non-NULL.
+ * @param uhandler The interrupt handler, corresponding to a ISR handler for
+ * the resource
+ * @param int_data resource specific data, which will be passed to uhandler
+ * @param trigger_func Optional: a function to trigger the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param ack_func Optional: a function to acknowledge the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param probe_data resource-specific data, which will be passed to
+ * (if present) trigger_func and ack_func
+ * @param description textual description of the IRQ resource.
+ * @return on success, a pointer to a _mali_osk_irq_t object, which represents
+ * the IRQ handling on this resource. NULL on failure.
+ */
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description);
+
+/** @brief Terminate IRQ handling on a resource.
+ *
+ * This will disable the interrupt from the device, and then waits for any
+ * currently executing IRQ handlers to complete.
+ *
+ * @note If work is deferred to an IRQ bottom-half handler through
+ * \ref _mali_osk_wq_schedule_work(), be sure to flush any remaining work
+ * with \ref _mali_osk_wq_flush() or (implicitly) with \ref _mali_osk_wq_delete_work()
+ *
+ * @param irq a pointer to the _mali_osk_irq_t object corresponding to the
+ * resource whose IRQ handling is to be terminated.
+ */
+void _mali_osk_irq_term(_mali_osk_irq_t *irq);
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @addtogroup _mali_osk_atomic
+ * @{ */
+
+/** @brief Decrement an atomic counter
+ *
+ * @note It is an error to decrement the counter beyond -(1<<23)
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom);
+
+/** @brief Decrement an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter
+ * @return The new value, after decrement */
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter
+ *
+ * @note It is an error to increment the counter beyond (1<<23)-1
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter */
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom);
+
+/** @brief Initialize an atomic counter
+ *
+ * @note the parameter required is a u32, and so signed integers should be
+ * cast to u32.
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the value to initialize the atomic counter.
+ */
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val);
+
+/** @brief Read a value from an atomic counter
+ *
+ * This can only be safely used to determine the value of the counter when it
+ * is guaranteed that other threads will not be modifying the counter. This
+ * makes its usefulness limited.
+ *
+ * @param atom pointer to an atomic counter
+ */
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom);
+
+/** @brief Terminate an atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ */
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom);
+
+/** @brief Assign a new val to atomic counter, and return the old atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the new value assign to the atomic counter
+ * @return the old value of the atomic counter
+ */
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val);
+/** @} */  /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_memory OSK Memory Allocation
+ * @{ */
+
+/** @brief Allocate zero-initialized memory.
+ *
+ * Returns a buffer capable of containing at least \a n elements of \a size
+ * bytes each. The buffer is initialized to zero.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * @param n Number of elements to allocate
+ * @param size Size of each element
+ * @return On success, the zero-initialized buffer allocated. NULL on failure
+ */
+void *_mali_osk_calloc(u32 n, u32 size);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_malloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_malloc() and _mali_osk_calloc()
+ * must be freed before the application exits. Otherwise,
+ * a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_free(void *ptr);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * This function is potentially slower than _mali_osk_malloc() and _mali_osk_calloc(),
+ * but do support bigger sizes.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_valloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_valloc() must be freed before the
+ * application exits. Otherwise a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_vfree(void *ptr);
+
+/** @brief Copies memory.
+ *
+ * Copies the \a len bytes from the buffer pointed by the parameter \a src
+ * directly to the buffer pointed by \a dst.
+ *
+ * It is an error for \a src to overlap \a dst anywhere in \a len bytes.
+ *
+ * @param dst Pointer to the destination array where the content is to be
+ * copied.
+ * @param src Pointer to the source of data to be copied.
+ * @param len Number of bytes to copy.
+ * @return \a dst is always passed through unmodified.
+ */
+void *_mali_osk_memcpy(void *dst, const void *src, u32 len);
+
+/** @brief Fills memory.
+ *
+ * Sets the first \a n bytes of the block of memory pointed to by \a s to
+ * the specified value
+ * @param s Pointer to the block of memory to fill.
+ * @param c Value to be set, passed as u32. Only the 8 Least Significant Bits (LSB)
+ * are used.
+ * @param n Number of bytes to be set to the value.
+ * @return \a s is always passed through unmodified
+ */
+void *_mali_osk_memset(void *s, u32 c, u32 n);
+/** @} */ /* end group _mali_osk_memory */
+
+
+/** @brief Checks the amount of memory allocated
+ *
+ * Checks that not more than \a max_allocated bytes are allocated.
+ *
+ * Some OS bring up an interactive out of memory dialogue when the
+ * system runs out of memory. This can stall non-interactive
+ * apps (e.g. automated test runs). This function can be used to
+ * not trigger the OOM dialogue by keeping allocations
+ * within a certain limit.
+ *
+ * @return MALI_TRUE when \a max_allocated bytes are not in use yet. MALI_FALSE
+ * when at least \a max_allocated bytes are in use.
+ */
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated);
+
+
+/** @addtogroup _mali_osk_low_level_memory
+ * @{ */
+
+/** @brief Issue a memory barrier
+ *
+ * This defines an arbitrary memory barrier operation, which forces an ordering constraint
+ * on memory read and write operations.
+ */
+void _mali_osk_mem_barrier(void);
+
+/** @brief Issue a write memory barrier
+ *
+ * This defines an write memory barrier operation which forces an ordering constraint
+ * on memory write operations.
+ */
+void _mali_osk_write_mem_barrier(void);
+
+/** @brief Map a physically contiguous region into kernel space
+ *
+ * This is primarily used for mapping in registers from resources, and Mali-MMU
+ * page tables. The mapping is only visable from kernel-space.
+ *
+ * Access has to go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @param phys CPU-physical base address of the memory to map in. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * map in
+ * @param description A textual description of the memory being mapped in.
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure.
+ */
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Unmap a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_mapioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt an unmap twice
+ * - unmap only part of a range obtained through _mali_osk_mem_mapioregion
+ * - unmap more than the range obtained through  _mali_osk_mem_mapioregion
+ * - unmap an address range that was not successfully mapped using
+ * _mali_osk_mem_mapioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in. This must be aligned to the system's page size, which is assumed
+ * to be 4K
+ * @param size The number of bytes that were originally mapped in.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address mapping);
+
+/** @brief Allocate and Map a physically contiguous region into kernel space
+ *
+ * This is used for allocating physically contiguous regions (such as Mali-MMU
+ * page tables) and mapping them into kernel space. The mapping is only
+ * visible from kernel-space.
+ *
+ * The alignment of the returned memory is guaranteed to be at least
+ * _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * Access must go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @note This function is primarily to provide support for OSs that are
+ * incapable of separating the tasks 'allocate physically contiguous memory'
+ * and 'map it into kernel space'
+ *
+ * @param[out] phys CPU-physical base address of memory that was allocated.
+ * (*phys) will be guaranteed to be aligned to at least
+ * _MALI_OSK_CPU_PAGE_SIZE on success.
+ *
+ * @param[in] size the number of bytes of physically contiguous memory to
+ * allocate. This must be a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure, and (*phys) is unmodified.
+ */
+mali_io_address _mali_osk_mem_allocioregion(u32 *phys, u32 size);
+
+/** @brief Free a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_allocioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt a free twice on the same ioregion
+ * - free only part of a range obtained through _mali_osk_mem_allocioregion
+ * - free more than the range obtained through  _mali_osk_mem_allocioregion
+ * - free an address range that was not successfully mapped using
+ * _mali_osk_mem_allocioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in, which was aligned to _MALI_OSK_CPU_PAGE_SIZE.
+ * @param size The number of bytes that were originally mapped in, which was
+ * a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_freeioregion(u32 phys, u32 size, mali_io_address mapping);
+
+/** @brief Request a region of physically contiguous memory
+ *
+ * This is used to ensure exclusive access to a region of physically contigous
+ * memory.
+ *
+ * It is acceptable to implement this as a stub. However, it is then the job
+ * of the System Integrator to ensure that no other device driver will be using
+ * the physical address ranges used by Mali, while the Mali device driver is
+ * loaded.
+ *
+ * @param phys CPU-physical base address of the memory to request. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * request.
+ * @param description A textual description of the memory being requested.
+ * @return _MALI_OSK_ERR_OK on success. Otherwise, a suitable
+ * _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Un-request a region of physically contiguous memory
+ *
+ * This is used to release a regious of physically contiguous memory previously
+ * requested through _mali_osk_mem_reqregion, so that other device drivers may
+ * use it. This will be called at time of Mali device driver termination.
+ *
+ * It is a programming error to attempt to:
+ * - unrequest a region twice
+ * - unrequest only part of a range obtained through _mali_osk_mem_reqregion
+ * - unrequest more than the range obtained through  _mali_osk_mem_reqregion
+ * - unrequest an address range that was not successfully requested using
+ * _mali_osk_mem_reqregion
+ *
+ * @param phys CPU-physical base address of the memory to un-request. This must
+ * be aligned to the system's page size, which is assumed to be 4K
+ * @param size the number of bytes of physically contiguous address space to
+ * un-request.
+ */
+void _mali_osk_mem_unreqregion(uintptr_t phys, u32 size);
+
+/** @brief Read from a location currently mapped in through
+ * _mali_osk_mem_mapioregion
+ *
+ * This reads a 32-bit word from a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to read from memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to read from
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @return the 32-bit word from the specified location.
+ */
+u32 _mali_osk_mem_ioread32(volatile mali_io_address mapping, u32 offset);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion without memory barriers
+ *
+ * This write a 32-bit word to a 32-bit aligned location without using memory barrier.
+ * It is a programming error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion with write memory barrier
+ *
+ * This write a 32-bit word to a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32(volatile mali_io_address mapping, u32 offset, u32 val);
+
+/** @brief Flush all CPU caches
+ *
+ * This should only be implemented if flushing of the cache is required for
+ * memory mapped in through _mali_osk_mem_mapregion.
+ */
+void _mali_osk_cache_flushall(void);
+
+/** @brief Flush any caches necessary for the CPU and MALI to have the same view of a range of uncached mapped memory
+ *
+ * This should only be implemented if your OS doesn't do a full cache flush (inner & outer)
+ * after allocating uncached mapped memory.
+ *
+ * Some OS do not perform a full cache flush (including all outer caches) for uncached mapped memory.
+ * They zero the memory through a cached mapping, then flush the inner caches but not the outer caches.
+ * This is required for MALI to have the correct view of the memory.
+ */
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size);
+
+/** @brief Safely copy as much data as possible from src to dest
+ *
+ * Do not crash if src or dest isn't available.
+ *
+ * @param dest Destination buffer (limited to user space mapped Mali memory)
+ * @param src Source buffer
+ * @param size Number of bytes to copy
+ * @return Number of bytes actually copied
+ */
+u32 _mali_osk_mem_write_safe(void *dest, const void *src, u32 size);
+
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+
+/** @addtogroup _mali_osk_notification
+ *
+ * User space notification framework
+ *
+ * Communication with user space of asynchronous events is performed through a
+ * synchronous call to the \ref u_k_api.
+ *
+ * Since the events are asynchronous, the events have to be queued until a
+ * synchronous U/K API call can be made by user-space. A U/K API call might also
+ * be received before any event has happened. Therefore the notifications the
+ * different subsystems wants to send to user space has to be queued for later
+ * reception, or a U/K API call has to be blocked until an event has occured.
+ *
+ * Typical uses of notifications are after running of jobs on the hardware or
+ * when changes to the system is detected that needs to be relayed to user
+ * space.
+ *
+ * After an event has occured user space has to be notified using some kind of
+ * message. The notification framework supports sending messages to waiting
+ * threads or queueing of messages until a U/K API call is made.
+ *
+ * The notification queue is a FIFO. There are no restrictions on the numbers
+ * of readers or writers in the queue.
+ *
+ * A message contains what user space needs to identifiy how to handle an
+ * event. This includes a type field and a possible type specific payload.
+ *
+ * A notification to user space is represented by a
+ * \ref _mali_osk_notification_t object. A sender gets hold of such an object
+ * using _mali_osk_notification_create(). The buffer given by the
+ * _mali_osk_notification_t::result_buffer field in the object is used to store
+ * any type specific data. The other fields are internal to the queue system
+ * and should not be touched.
+ *
+ * @{ */
+
+/** @brief Create a notification object
+ *
+ * Returns a notification object which can be added to the queue of
+ * notifications pending for user space transfer.
+ *
+ * The implementation will initialize all members of the
+ * \ref _mali_osk_notification_t object. In particular, the
+ * _mali_osk_notification_t::result_buffer member will be initialized to point
+ * to \a size bytes of storage, and that storage will be suitably aligned for
+ * storage of any structure. That is, the created buffer meets the same
+ * requirements as _mali_osk_malloc().
+ *
+ * The notification object must be deleted when not in use. Use
+ * _mali_osk_notification_delete() for deleting it.
+ *
+ * @note You \b must \b not call _mali_osk_free() on a \ref _mali_osk_notification_t,
+ * object, or on a _mali_osk_notification_t::result_buffer. You must only use
+ * _mali_osk_notification_delete() to free the resources assocaited with a
+ * \ref _mali_osk_notification_t object.
+ *
+ * @param type The notification type
+ * @param size The size of the type specific buffer to send
+ * @return Pointer to a notification object with a suitable buffer, or NULL on error.
+ */
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size);
+
+/** @brief Delete a notification object
+ *
+ * This must be called to reclaim the resources of a notification object. This
+ * includes:
+ * - The _mali_osk_notification_t::result_buffer
+ * - The \ref _mali_osk_notification_t itself.
+ *
+ * A notification object \b must \b not be used after it has been deleted by
+ * _mali_osk_notification_delete().
+ *
+ * In addition, the notification object may not be deleted while it is in a
+ * queue. That is, if it has been placed on a queue with
+ * _mali_osk_notification_queue_send(), then it must not be deleted until
+ * it has been received by a call to _mali_osk_notification_queue_receive().
+ * Otherwise, the queue may be corrupted.
+ *
+ * @param object the notification object to delete.
+ */
+void _mali_osk_notification_delete(_mali_osk_notification_t *object);
+
+/** @brief Create a notification queue
+ *
+ * Creates a notification queue which can be used to queue messages for user
+ * delivery and get queued messages from
+ *
+ * The queue is a FIFO, and has no restrictions on the numbers of readers or
+ * writers.
+ *
+ * When the queue is no longer in use, it must be terminated with
+ * \ref _mali_osk_notification_queue_term(). Failure to do so will result in a
+ * memory leak.
+ *
+ * @return Pointer to a new notification queue or NULL on error.
+ */
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void);
+
+/** @brief Destroy a notification queue
+ *
+ * Destroys a notification queue and frees associated resources from the queue.
+ *
+ * A notification queue \b must \b not be destroyed in the following cases:
+ * - while there are \ref _mali_osk_notification_t objects in the queue.
+ * - while there are writers currently acting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_send() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_send() on the queue in the future.
+ * - while there are readers currently waiting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_receive() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_receive() on the queue in the future.
+ *
+ * Therefore, all \ref _mali_osk_notification_t objects must be flushed and
+ * deleted by the code that makes use of the notification queues, since only
+ * they know the structure of the _mali_osk_notification_t::result_buffer
+ * (even if it may only be a flat sturcture).
+ *
+ * @note Since the queue is a FIFO, the code using notification queues may
+ * create its own 'flush' type of notification, to assist in flushing the
+ * queue.
+ *
+ * Once the queue has been destroyed, it must not be used again.
+ *
+ * @param queue The queue to destroy
+ */
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue);
+
+/** @brief Schedule notification for delivery
+ *
+ * When a \ref _mali_osk_notification_t object has been created successfully
+ * and set up, it may be added to the queue of objects waiting for user space
+ * transfer.
+ *
+ * The sending will not block if the queue is full.
+ *
+ * A \ref _mali_osk_notification_t object \b must \b not be put on two different
+ * queues at the same time, or enqueued twice onto a single queue before
+ * reception. However, it is acceptable for it to be requeued \em after reception
+ * from a call to _mali_osk_notification_queue_receive(), even onto the same queue.
+ *
+ * Again, requeuing must also not enqueue onto two different queues at the same
+ * time, or enqueue onto the same queue twice before reception.
+ *
+ * @param queue The notification queue to add this notification to
+ * @param object The entry to add
+ */
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object);
+
+/** @brief Receive a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the thread will sleep until one becomes ready.
+ * Therefore, notifications may not be received into an
+ * IRQ or 'atomic' context (that is, a context where sleeping is disallowed).
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success. _MALI_OSK_ERR_RESTARTSYSCALL if the sleep was interrupted.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @brief Dequeues a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the function call will return an error code.
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if queue was empty.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @addtogroup _mali_osk_timer
+ *
+ * Timers use the OS's representation of time, which are 'ticks'. This is to
+ * prevent aliasing problems between the internal timer time, and the time
+ * asked for.
+ *
+ * @{ */
+
+/** @brief Initialize a timer
+ *
+ * Allocates resources for a new timer, and initializes them. This does not
+ * start the timer.
+ *
+ * @return a pointer to the allocated timer object, or NULL on failure.
+ */
+_mali_osk_timer_t *_mali_osk_timer_init(void);
+
+/** @brief Start a timer
+ *
+ * It is an error to start a timer without setting the callback via
+ * _mali_osk_timer_setcallback().
+ *
+ * It is an error to use this to start an already started timer.
+ *
+ * The timer will expire in \a ticks_to_expire ticks, at which point, the
+ * callback function will be invoked with the callback-specific data,
+ * as registered by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to start
+ * @param ticks_to_expire the amount of time in ticks for the timer to run
+ * before triggering.
+ */
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Modify a timer
+ *
+ * Set the relative time at which a timer will expire, and start it if it is
+ * stopped. If \a ticks_to_expire 0 the timer fires immediately.
+ *
+ * It is an error to modify a timer without setting the callback via
+ *  _mali_osk_timer_setcallback().
+ *
+ * The timer will expire at \a ticks_to_expire from the time of the call, at
+ * which point, the callback function will be invoked with the
+ * callback-specific data, as set by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to modify, and start if necessary
+ * @param ticks_to_expire the \em absolute time in ticks at which this timer
+ * should trigger.
+ *
+ */
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Stop a timer, and block on its completion.
+ *
+ * Stop the timer. When the function returns, it is guaranteed that the timer's
+ * callback will not be running on any CPU core.
+ *
+ * Since stoping the timer blocks on compeletion of the callback, the callback
+ * may not obtain any mutexes that the caller holds. Otherwise, a deadlock will
+ * occur.
+ *
+ * @note While the callback itself is guaranteed to not be running, work
+ * enqueued on the work-queue by the timer (with
+ * \ref _mali_osk_wq_schedule_work()) may still run. The timer callback and
+ * work handler must take this into account.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ *
+ */
+void _mali_osk_timer_del(_mali_osk_timer_t *tim);
+
+/** @brief Stop a timer.
+ *
+ * Stop the timer. When the function returns, the timer's callback may still be
+ * running on any CPU core.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ */
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim);
+
+/** @brief Check if timer is pending.
+ *
+ * Check if timer is active.
+ *
+ * @param tim the timer to check
+ * @return MALI_TRUE if time is active, MALI_FALSE if it is not active
+ */
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim);
+
+/** @brief Set a timer's callback parameters.
+ *
+ * This must be called at least once before a timer is started/modified.
+ *
+ * After a timer has been stopped or expires, the callback remains set. This
+ * means that restarting the timer will call the same function with the same
+ * parameters on expiry.
+ *
+ * @param tim the timer to set callback on.
+ * @param callback Function to call when timer expires
+ * @param data Function-specific data to supply to the function on expiry.
+ */
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data);
+
+/** @brief Terminate a timer, and deallocate resources.
+ *
+ * The timer must first be stopped by calling _mali_osk_timer_del().
+ *
+ * It is a programming error for _mali_osk_timer_term() to be called on:
+ * - timer that is currently running
+ * - a timer that is currently executing its callback.
+ *
+ * @param tim the timer to deallocate.
+ */
+void _mali_osk_timer_term(_mali_osk_timer_t *tim);
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @defgroup _mali_osk_time OSK Time functions
+ *
+ * \ref _mali_osk_time use the OS's representation of time, which are
+ * 'ticks'. This is to prevent aliasing problems between the internal timer
+ * time, and the time asked for.
+ *
+ * OS tick time is measured as a u32. The time stored in a u32 may either be
+ * an absolute time, or a time delta between two events. Whilst it is valid to
+ * use math opeartors to \em change the tick value represented as a u32, it
+ * is often only meaningful to do such operations on time deltas, rather than
+ * on absolute time. However, it is meaningful to add/subtract time deltas to
+ * absolute times.
+ *
+ * Conversion between tick time and milliseconds (ms) may not be loss-less,
+ * and are \em implementation \em depenedant.
+ *
+ * Code use OS time must take this into account, since:
+ * - a small OS time may (or may not) be rounded
+ * - a large time may (or may not) overflow
+ *
+ * @{ */
+
+/** @brief Return whether ticka occurs after or at the same time as  tickb
+ *
+ * Systems where ticks can wrap must handle that.
+ *
+ * @param ticka ticka
+ * @param tickb tickb
+ * @return MALI_TRUE if ticka represents a time that occurs at or after tickb.
+ */
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb);
+
+/** @brief Convert milliseconds to OS 'ticks'
+ *
+ * @param ms time interval in milliseconds
+ * @return the corresponding time interval in OS ticks.
+ */
+unsigned long _mali_osk_time_mstoticks(u32 ms);
+
+/** @brief Convert OS 'ticks' to milliseconds
+ *
+ * @param ticks time interval in OS ticks.
+ * @return the corresponding time interval in milliseconds
+ */
+u32 _mali_osk_time_tickstoms(unsigned long ticks);
+
+
+/** @brief Get the current time in OS 'ticks'.
+ * @return the current time in OS 'ticks'.
+ */
+unsigned long _mali_osk_time_tickcount(void);
+
+/** @brief Cause a microsecond delay
+ *
+ * The delay will have microsecond resolution, and is necessary for correct
+ * operation of the driver. At worst, the delay will be \b at least \a usecs
+ * microseconds, and so may be (significantly) more.
+ *
+ * This function may be implemented as a busy-wait, which is the most sensible
+ * implementation. On OSs where there are situations in which a thread must not
+ * sleep, this is definitely implemented as a busy-wait.
+ *
+ * @param usecs the number of microseconds to wait for.
+ */
+void _mali_osk_time_ubusydelay(u32 usecs);
+
+/** @brief Return time in nano seconds, since any given reference.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_time_get_ns(void);
+
+/** @brief Return time in nano seconds, since boot time.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_boot_time_get_ns(void);
+
+/** @} */ /* end group _mali_osk_time */
+
+/** @defgroup _mali_osk_math OSK Math
+ * @{ */
+
+/** @brief Count Leading Zeros (Little-endian)
+ *
+ * @note This function must be implemented to support the reference
+ * implementation of _mali_osk_find_first_zero_bit, as defined in
+ * mali_osk_bitops.h.
+ *
+ * @param val 32-bit words to count leading zeros on
+ * @return the number of leading zeros.
+ */
+u32 _mali_osk_clz(u32 val);
+
+/** @brief find last (most-significant) bit set
+ *
+ * @param val 32-bit words to count last bit set on
+ * @return last bit set.
+ */
+u32 _mali_osk_fls(u32 val);
+
+/** @} */ /* end group _mali_osk_math */
+
+/** @addtogroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+
+/** @brief Initialize an empty Wait Queue */
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.
+ */
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ * @param timeout timeout in ms
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.  Will return if time
+ * exceeds timeout.
+ */
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout);
+
+/** @brief Wake up all threads in wait queue if their respective conditions are
+ * true
+ *
+ * @param queue the queue whose threads should be woken up
+ *
+ * Wake up all threads in wait queue \a queue whose condition is now true.
+ */
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue);
+
+/** @brief terminate a wait queue
+ *
+ * @param queue the queue to terminate.
+ */
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue);
+/** @} */ /* end group _mali_osk_wait_queue */
+
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Output a device driver debug message.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ */
+void _mali_osk_dbgmsg(const char *fmt, ...);
+
+/** @brief Print fmt into buf.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param buf a pointer to the result buffer
+ * @param size the total number of bytes allowed to write to \a buf
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ * @return The number of bytes written to \a buf
+ */
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...);
+
+/** @brief Print fmt into print_ctx.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param print_ctx a pointer to the result file buffer
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ */
+void _mali_osk_ctxprintf(_mali_osk_print_ctx *print_ctx, const char *fmt, ...);
+
+/** @brief Abnormal process abort.
+ *
+ * Terminates the caller-process if this function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h.
+ *
+ * This function will never return - because to continue from a Debug assert
+ * could cause even more problems, and hinder debugging of the initial problem.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_abort(void);
+
+/** @brief Sets breakpoint at point where function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h,
+ * to assist in debugging. If debugging at this level is not required, then this
+ * function may be implemented as a stub.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_break(void);
+
+/** @brief Return an identificator for calling process.
+ *
+ * @return Identificator for calling process.
+ */
+u32 _mali_osk_get_pid(void);
+
+/** @brief Return an name for calling process.
+ *
+ * @return name for calling process.
+ */
+char *_mali_osk_get_comm(void);
+
+/** @brief Return an identificator for calling thread.
+ *
+ * @return Identificator for calling thread.
+ */
+u32 _mali_osk_get_tid(void);
+
+
+/** @brief Take a reference to the power manager system for the Mali device (synchronously).
+ *
+ * When function returns successfully, Mali is ON.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_put() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void);
+
+/** @brief Take a reference to the external power manager system for the Mali device (asynchronously).
+ *
+ * Mali might not yet be on after this function as returned.
+ * Please use \a _mali_osk_pm_dev_barrier() or \a _mali_osk_pm_dev_ref_get_sync()
+ * to wait for Mali to be powered on.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_dec() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void);
+
+/** @brief Release the reference to the external power manger system for the Mali device.
+ *
+ * When reference count reach zero, the cores can be off.
+ *
+ * @note This must be used to release references taken with
+ * \a _mali_osk_pm_dev_ref_get_sync() or \a _mali_osk_pm_dev_ref_get_sync().
+ */
+void _mali_osk_pm_dev_ref_put(void);
+
+/** @brief Block until pending PM operations are done
+ */
+void _mali_osk_pm_dev_barrier(void);
+
+/** @} */ /* end group  _mali_osk_miscellaneous */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Check standard inlines */
+#ifndef MALI_STATIC_INLINE
+#error MALI_STATIC_INLINE not defined on your OS
+#endif
+
+#ifndef MALI_NON_STATIC_INLINE
+#error MALI_NON_STATIC_INLINE not defined on your OS
+#endif
+
+#endif /* __MALI_OSK_H__ */
diff --git a/utgard/r5p1/common/mali_osk_bitops.h b/utgard/r5p1/common/mali_osk_bitops.h
new file mode 100755
index 0000000..c1709f9
--- /dev/null
+++ b/utgard/r5p1/common/mali_osk_bitops.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2010, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitops.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_BITOPS_H__
+#define __MALI_OSK_BITOPS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void _mali_internal_clear_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(NULL != addr);
+
+	(*addr) &= ~(1 << bit);
+}
+
+MALI_STATIC_INLINE void _mali_internal_set_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(NULL != addr);
+
+	(*addr) |= (1 << bit);
+}
+
+MALI_STATIC_INLINE u32 _mali_internal_test_bit(u32 bit, u32 value)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	return value & (1 << bit);
+}
+
+MALI_STATIC_INLINE int _mali_internal_find_first_zero_bit(u32 value)
+{
+	u32 inverted;
+	u32 negated;
+	u32 isolated;
+	u32 leading_zeros;
+
+	/* Begin with xxx...x0yyy...y, where ys are 1, number of ys is in range  0..31 */
+	inverted = ~value; /* zzz...z1000...0 */
+	/* Using count_trailing_zeros on inverted value -
+	 * See ARM System Developers Guide for details of count_trailing_zeros */
+
+	/* Isolate the zero: it is preceeded by a run of 1s, so add 1 to it */
+	negated = (u32) - inverted ; /* -a == ~a + 1 (mod 2^n) for n-bit numbers */
+	/* negated = xxx...x1000...0 */
+
+	isolated = negated & inverted ; /* xxx...x1000...0 & zzz...z1000...0, zs are ~xs */
+	/* And so the first zero bit is in the same position as the 1 == number of 1s that preceeded it
+	 * Note that the output is zero if value was all 1s */
+
+	leading_zeros = _mali_osk_clz(isolated);
+
+	return 31 - leading_zeros;
+}
+
+
+/** @defgroup _mali_osk_bitops OSK Non-atomic Bit-operations
+ * @{ */
+
+/**
+ * These bit-operations do not work atomically, and so locks must be used if
+ * atomicity is required.
+ *
+ * Reference implementations for Little Endian are provided, and so it should
+ * not normally be necessary to re-implement these. Efficient bit-twiddling
+ * techniques are used where possible, implemented in portable C.
+ *
+ * Note that these reference implementations rely on _mali_osk_clz() being
+ * implemented.
+ */
+
+/** @brief Clear a bit in a sequence of 32-bit words
+ * @param nr bit number to clear, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_clear_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_clear_bit(nr, addr);
+}
+
+/** @brief Set a bit in a sequence of 32-bit words
+ * @param nr bit number to set, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_set_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_set_bit(nr, addr);
+}
+
+/** @brief Test a bit in a sequence of 32-bit words
+ * @param nr bit number to test, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ * @return zero if bit was clear, non-zero if set. Do not rely on the return
+ * value being related to the actual word under test.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_test_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	return _mali_internal_test_bit(nr, *addr);
+}
+
+/* Return maxbit if not found */
+/** @brief Find the first zero bit in a sequence of 32-bit words
+ * @param addr starting point for search.
+ * @param maxbit the maximum number of bits to search
+ * @return the number of the first zero bit found, or maxbit if none were found
+ * in the specified range.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_find_first_zero_bit(const u32 *addr, u32 maxbit)
+{
+	u32 total;
+
+	for (total = 0; total < maxbit; total += 32, ++addr) {
+		int result;
+		result = _mali_internal_find_first_zero_bit(*addr);
+
+		/* non-negative signifies the bit was found */
+		if (result >= 0) {
+			total += (u32)result;
+			break;
+		}
+	}
+
+	/* Now check if we reached maxbit or above */
+	if (total >= maxbit) {
+		total = maxbit;
+	}
+
+	return total; /* either the found bit nr, or maxbit if not found */
+}
+/** @} */ /* end group _mali_osk_bitops */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_BITOPS_H__ */
diff --git a/utgard/r5p1/common/mali_osk_list.h b/utgard/r5p1/common/mali_osk_list.h
new file mode 100755
index 0000000..22d2244
--- /dev/null
+++ b/utgard/r5p1/common/mali_osk_list.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_list.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_LIST_H__
+#define __MALI_OSK_LIST_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void __mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = new_entry;
+	new_entry->next = next;
+	new_entry->prev = prev;
+	prev->next = new_entry;
+}
+
+MALI_STATIC_INLINE void __mali_osk_list_del(_mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** Reference implementations of Doubly-linked Circular Lists are provided.
+ * There is often no need to re-implement these.
+ *
+ * @note The implementation may differ subtly from any lists the OS provides.
+ * For this reason, these lists should not be mixed with OS-specific lists
+ * inside the OSK/UKK implementation. */
+
+/** @brief Initialize a list to be a head of an empty list
+ * @param exp the list to initialize. */
+#define _MALI_OSK_INIT_LIST_HEAD(exp) _mali_osk_list_init(exp)
+
+/** @brief Define a list variable, which is uninitialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD(exp) _mali_osk_list_t exp
+
+/** @brief Define a list variable, which is initialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD_STATIC_INIT(exp) _mali_osk_list_t exp = { &exp, &exp }
+
+/** @brief Initialize a list element.
+ *
+ * All list elements must be initialized before use.
+ *
+ * Do not use on any list element that is present in a list without using
+ * _mali_osk_list_del first, otherwise this will break the list.
+ *
+ * @param list the list element to initialize
+ */
+MALI_STATIC_INLINE void _mali_osk_list_init(_mali_osk_list_t *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/** @brief Insert a single list element after an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the first element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the next
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list, list->next);
+}
+
+/** @brief Insert a single list element before an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the last element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the previous
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_addtail(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list->prev, list);
+}
+
+/** @brief Remove a single element from a list
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will be uninitialized, and so should not be traversed. It must be
+ * initialized before further use.
+ *
+ * @param list the list element to remove.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_del(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+}
+
+/** @brief Remove a single element from a list, and re-initialize it
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will initialized, and so can be used as normal.
+ *
+ * @param list the list element to remove and initialize.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_delinit(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+	_mali_osk_list_init(list);
+}
+
+/** @brief Determine whether a list is empty.
+ *
+ * An empty list is one that contains a single element that points to itself.
+ *
+ * @param list the list to check.
+ * @return non-zero if the list is empty, and zero otherwise.
+ */
+MALI_STATIC_INLINE mali_bool _mali_osk_list_empty(_mali_osk_list_t *list)
+{
+	return list->next == list;
+}
+
+/** @brief Move a list element from one list to another.
+ *
+ * The list element must be initialized.
+ *
+ * As an example, moving a list item to the head of a new list causes this item
+ * to be the first element in the new list.
+ *
+ * @param move the list element to move
+ * @param list the new list into which the element will be inserted, as the next
+ * element in the list.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move(_mali_osk_list_t *move_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_del(move_entry->prev, move_entry->next);
+	_mali_osk_list_add(move_entry, list);
+}
+
+/** @brief Move an entire list
+ *
+ * The list element must be initialized.
+ *
+ * Allows you to move a list from one list head to another list head
+ *
+ * @param old_list The existing list head
+ * @param new_list The new list head (must be an empty list)
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move_list(_mali_osk_list_t *old_list, _mali_osk_list_t *new_list)
+{
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(new_list));
+	if (!_mali_osk_list_empty(old_list)) {
+		new_list->next = old_list->next;
+		new_list->prev = old_list->prev;
+		new_list->next->prev = new_list;
+		new_list->prev->next = new_list;
+		old_list->next = old_list;
+		old_list->prev = old_list;
+	}
+}
+
+/** @brief Find the containing structure of a list
+ *
+ * When traversing a list, this is used to recover the containing structure,
+ * given that is contains a _mali_osk_list_t member.
+ *
+ * Each list must be of structures of one type, and must link the same members
+ * together, otherwise it will not be possible to correctly recover the
+ * sturctures that the lists link.
+ *
+ * @note no type or memory checking occurs to ensure that a structure does in
+ * fact exist for the list entry, and that it is being recovered with respect
+ * to the correct list member.
+ *
+ * @param ptr the pointer to the _mali_osk_list_t member in this structure
+ * @param type the type of the structure that contains the member
+ * @param member the member of the structure that ptr points to.
+ * @return a pointer to a \a type object which contains the _mali_osk_list_t
+ * \a member, as pointed to by the _mali_osk_list_t \a *ptr.
+ */
+#define _MALI_OSK_LIST_ENTRY(ptr, type, member) \
+	_MALI_OSK_CONTAINER_OF(ptr, type, member)
+
+/** @brief Enumerate a list safely
+ *
+ * With this macro, lists can be enumerated in a 'safe' manner. That is,
+ * entries can be deleted from the list without causing an error during
+ * enumeration. To achieve this, a 'temporary' pointer is required, which must
+ * be provided to the macro.
+ *
+ * Use it like a 'for()', 'while()' or 'do()' construct, and so it must be
+ * followed by a statement or compound-statement which will be executed for
+ * each list entry.
+ *
+ * Upon loop completion, providing that an early out was not taken in the
+ * loop body, then it is guaranteed that ptr->member == list, even if the loop
+ * body never executed.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY(ptr, tmp, list, type, member)         \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->next, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.next, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.next, type, member))
+
+/** @brief Enumerate a list in reverse order safely
+ *
+ * This macro is identical to @ref _MALI_OSK_LIST_FOREACHENTRY, except that
+ * entries are enumerated in reverse order.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY_REVERSE(ptr, tmp, list, type, member) \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->prev, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.prev, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.prev, type, member))
+
+/** @} */ /* end group _mali_osk_list */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_LIST_H__ */
diff --git a/utgard/r5p1/common/mali_osk_mali.h b/utgard/r5p1/common/mali_osk_mali.h
new file mode 100755
index 0000000..9e4f11d
--- /dev/null
+++ b/utgard/r5p1/common/mali_osk_mali.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.h
+ * Defines the OS abstraction layer which is specific for the Mali kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_MALI_H__
+#define __MALI_OSK_MALI_H__
+
+#include <linux/mali/mali_utgard.h>
+#include <mali_osk.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Struct with device specific configuration data
+ */
+typedef struct mali_gpu_device_data _mali_osk_device_data;
+
+#ifdef CONFIG_MALI_DT
+/** @brief Initialize those device resources when we use device tree
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_resource_initialize(void);
+#endif
+
+/** @brief Find Mali GPU HW resource
+ *
+ * @param addr Address of Mali GPU resource to find
+ * @param res Storage for resource information if resource is found.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if resource is not found
+ */
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res);
+
+
+/** @brief Find Mali GPU HW base address
+ *
+ * @return 0 if resources are found, otherwise the Mali GPU component with lowest address.
+ */
+uintptr_t _mali_osk_resource_base_address(void);
+
+/** @brief Find the number of L2 cache cores.
+ *
+ * @return return the number of l2 cache cores we find in device resources.
+ */
+u32 _mali_osk_l2_resource_count(void);
+
+/** @brief Retrieve the Mali GPU specific data
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data);
+
+/** @brief Find the pmu domain config from device data.
+ *
+ * @param domain_config_array used to store pmu domain config found in device data.
+ * @param array_size is the size of array domain_config_array.
+ */
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size);
+
+/** @brief Get Mali PMU switch delay
+ *
+ *@return pmu switch delay if it is configured
+ */
+u32 _mali_osk_get_pmu_switch_delay(void);
+
+/** @brief Determines if Mali GPU has been configured with shared interrupts.
+ *
+ * @return MALI_TRUE if shared interrupts, MALI_FALSE if not.
+ */
+mali_bool _mali_osk_shared_interrupts(void);
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_MALI_H__ */
diff --git a/utgard/r5p1/common/mali_osk_profiling.h b/utgard/r5p1/common/mali_osk_profiling.h
new file mode 100755
index 0000000..e85c28b
--- /dev/null
+++ b/utgard/r5p1/common/mali_osk_profiling.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_OSK_PROFILING_H__
+#define __MALI_OSK_PROFILING_H__
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+
+#include "mali_linux_trace.h"
+#include "mali_profiling_events.h"
+#include "mali_profiling_gator_api.h"
+
+#define MALI_PROFILING_MAX_BUFFER_ENTRIES 1048576
+
+#define MALI_PROFILING_NO_HW_COUNTER = ((u32)-1)
+
+/** @defgroup _mali_osk_profiling External profiling connectivity
+ * @{ */
+
+/**
+ * Initialize the profiling module.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start);
+
+/*
+ * Terminate the profiling module.
+ */
+void _mali_osk_profiling_term(void);
+
+/**
+ * Start recording profiling data
+ *
+ * The specified limit will determine how large the capture buffer is.
+ * MALI_PROFILING_MAX_BUFFER_ENTRIES determines the maximum size allowed by the device driver.
+ *
+ * @param limit The desired maximum number of events to record on input, the actual maximum on output.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_start(u32 *limit);
+
+/**
+ * Add an profiling event
+ *
+ * @param event_id The event identificator.
+ * @param data0 First data parameter, depending on event_id specified.
+ * @param data1 Second data parameter, depending on event_id specified.
+ * @param data2 Third data parameter, depending on event_id specified.
+ * @param data3 Fourth data parameter, depending on event_id specified.
+ * @param data4 Fifth data parameter, depending on event_id specified.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+/* Call Linux tracepoint directly */
+#define _mali_osk_profiling_add_event(event_id, data0, data1, data2, data3, data4) trace_mali_timeline_event((event_id), (data0), (data1), (data2), (data3), (data4))
+
+/**
+ * Report a hardware counter event.
+ *
+ * @param counter_id The ID of the counter.
+ * @param value The value of the counter.
+ */
+
+/* Call Linux tracepoint directly */
+#define _mali_osk_profiling_report_hw_counter(counter_id, value) trace_mali_hw_counter(counter_id, value)
+
+/**
+ * Report SW counters
+ *
+ * @param counters array of counter values
+ */
+void _mali_osk_profiling_report_sw_counters(u32 *counters);
+
+/**
+ * Stop recording profiling data
+ *
+ * @param count Returns the number of recorded events.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_stop(u32 *count);
+
+/**
+ * Retrieves the number of events that can be retrieved
+ *
+ * @return The number of recorded events that can be retrieved.
+ */
+u32 _mali_osk_profiling_get_count(void);
+
+/**
+ * Retrieve an event
+ *
+ * @param index Event index (start with 0 and continue until this function fails to retrieve all events)
+ * @param timestamp The timestamp for the retrieved event will be stored here.
+ * @param event_id The event ID for the retrieved event will be stored here.
+ * @param data The 5 data values for the retrieved event will be stored here.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+
+/**
+ * Clear the recorded buffer.
+ *
+ * This is needed in order to start another recording.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_clear(void);
+
+/**
+ * Checks if a recording of profiling data is in progress
+ *
+ * @return MALI_TRUE if recording of profiling data is in progress, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_is_recording(void);
+
+/**
+ * Checks if profiling data is available for retrival
+ *
+ * @return MALI_TRUE if profiling data is avaiable, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_have_recording(void);
+
+/** @} */ /* end group _mali_osk_profiling */
+
+#else /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+/* Dummy add_event, for when profiling is disabled. */
+
+#define _mali_osk_profiling_add_event(event_id, data0, data1, data2, data3, data4)
+
+#endif /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+#endif /* __MALI_OSK_PROFILING_H__ */
+
+
diff --git a/utgard/r5p1/common/mali_osk_types.h b/utgard/r5p1/common/mali_osk_types.h
new file mode 100755
index 0000000..5204d30
--- /dev/null
+++ b/utgard/r5p1/common/mali_osk_types.h
@@ -0,0 +1,456 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_types.h
+ * Defines types of the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_TYPES_H__
+#define __MALI_OSK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_osk_miscellaneous OSK Miscellaneous functions, constants and types
+ * @{ */
+
+/* Define integer types used by OSK. Note: these currently clash with Linux so we only define them if not defined already */
+#ifndef __KERNEL__
+typedef unsigned char      u8;
+typedef signed char        s8;
+typedef unsigned short     u16;
+typedef signed short       s16;
+typedef unsigned int       u32;
+typedef signed int         s32;
+typedef unsigned long long u64;
+#define BITS_PER_LONG (sizeof(long)*8)
+#else
+/* Ensure Linux types u32, etc. are defined */
+#include <linux/types.h>
+#endif
+
+/** @brief Mali Boolean type which uses MALI_TRUE and MALI_FALSE
+  */
+typedef unsigned long mali_bool;
+
+#ifndef MALI_TRUE
+#define MALI_TRUE ((mali_bool)1)
+#endif
+
+#ifndef MALI_FALSE
+#define MALI_FALSE ((mali_bool)0)
+#endif
+
+#define MALI_HW_CORE_NO_COUNTER     ((u32)-1)
+
+/**
+ * @brief OSK Error codes
+ *
+ * Each OS may use its own set of error codes, and may require that the
+ * User/Kernel interface take certain error code. This means that the common
+ * error codes need to be sufficiently rich to pass the correct error code
+ * thorugh from the OSK to U/K layer, across all OSs.
+ *
+ * The result is that some error codes will appear redundant on some OSs.
+ * Under all OSs, the OSK layer must translate native OS error codes to
+ * _mali_osk_errcode_t codes. Similarly, the U/K layer must translate from
+ * _mali_osk_errcode_t codes to native OS error codes.
+ */
+typedef enum {
+	_MALI_OSK_ERR_OK = 0, /**< Success. */
+	_MALI_OSK_ERR_FAULT = -1, /**< General non-success */
+	_MALI_OSK_ERR_INVALID_FUNC = -2, /**< Invalid function requested through User/Kernel interface (e.g. bad IOCTL number) */
+	_MALI_OSK_ERR_INVALID_ARGS = -3, /**< Invalid arguments passed through User/Kernel interface */
+	_MALI_OSK_ERR_NOMEM = -4, /**< Insufficient memory */
+	_MALI_OSK_ERR_TIMEOUT = -5, /**< Timeout occurred */
+	_MALI_OSK_ERR_RESTARTSYSCALL = -6, /**< Special: On certain OSs, must report when an interruptable mutex is interrupted. Ignore otherwise. */
+	_MALI_OSK_ERR_ITEM_NOT_FOUND = -7, /**< Table Lookup failed */
+	_MALI_OSK_ERR_BUSY = -8, /**< Device/operation is busy. Try again later */
+	_MALI_OSK_ERR_UNSUPPORTED = -9, /**< Optional part of the interface used, and is unsupported */
+} _mali_osk_errcode_t;
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wq OSK work queues
+ * @{ */
+
+/** @brief Private type for work objects */
+typedef struct _mali_osk_wq_work_s _mali_osk_wq_work_t;
+typedef struct _mali_osk_wq_delayed_work_s _mali_osk_wq_delayed_work_t;
+
+/** @brief Work queue handler function
+ *
+ * This function type is called when the work is scheduled by the work queue,
+ * e.g. as an IRQ bottom-half handler.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for more information on the
+ * work-queue and work handlers.
+ *
+ * @param arg resource-specific data
+ */
+typedef void (*_mali_osk_wq_work_handler_t)(void *arg);
+
+/* @} */ /* end group _mali_osk_wq */
+
+/** @defgroup _mali_osk_irq OSK IRQ handling
+ * @{ */
+
+/** @brief Private type for IRQ handling objects */
+typedef struct _mali_osk_irq_t_struct _mali_osk_irq_t;
+
+/** @brief Optional function to trigger an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data */
+typedef void (*_mali_osk_irq_trigger_t)(void *arg);
+
+/** @brief Optional function to acknowledge an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was successful, or a suitable _mali_osk_errcode_t on failure. */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_ack_t)(void *arg);
+
+/** @brief IRQ 'upper-half' handler callback.
+ *
+ * This function is implemented by the common layer to do the initial handling of a
+ * resource's IRQ. This maps on to the concept of an ISR that does the minimum
+ * work necessary before handing off to an IST.
+ *
+ * The communication of the resource-specific data from the ISR to the IST is
+ * handled by the OSK implementation.
+ *
+ * On most systems, the IRQ upper-half handler executes in IRQ context.
+ * Therefore, the system may have restrictions about what can be done in this
+ * context
+ *
+ * If an IRQ upper-half handler requires more work to be done than can be
+ * acheived in an IRQ context, then it may defer the work with
+ * _mali_osk_wq_schedule_work(). Refer to \ref _mali_osk_wq_create_work() for
+ * more information.
+ *
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was correctly handled, or a suitable
+ * _mali_osk_errcode_t otherwise.
+ */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_uhandler_t)(void *arg);
+
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @defgroup _mali_osk_atomic OSK Atomic counters
+ * @{ */
+
+/** @brief Public type of atomic counters
+ *
+ * This is public for allocation on stack. On systems that support it, this is just a single 32-bit value.
+ * On others, it could be encapsulating an object stored elsewhere.
+ *
+ * Regardless of implementation, the \ref _mali_osk_atomic functions \b must be used
+ * for all accesses to the variable's value, even if atomicity is not required.
+ * Do not access u.val or u.obj directly.
+ */
+typedef struct {
+	union {
+		u32 val;
+		void *obj;
+	} u;
+} _mali_osk_atomic_t;
+/** @} */ /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+
+/** @brief OSK Mutual Exclusion Lock ordered list
+ *
+ * This lists the various types of locks in the system and is used to check
+ * that locks are taken in the correct order.
+ *
+ * - Holding more than one lock of the same order at the same time is not
+ *   allowed.
+ * - Taking a lock of a lower order than the highest-order lock currently held
+ *   is not allowed.
+ *
+ */
+typedef enum {
+	/*  ||    Locks    ||  */
+	/*  ||   must be   ||  */
+	/* _||_  taken in _||_ */
+	/* \  /    this   \  / */
+	/*  \/    order!   \/  */
+
+	_MALI_OSK_LOCK_ORDER_FIRST = 0,
+
+	_MALI_OSK_LOCK_ORDER_SESSIONS,
+	_MALI_OSK_LOCK_ORDER_MEM_SESSION,
+	_MALI_OSK_LOCK_ORDER_MEM_INFO,
+	_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE,
+	_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP,
+	_MALI_OSK_LOCK_ORDER_PM_EXECUTION,
+	_MALI_OSK_LOCK_ORDER_EXECUTOR,
+	_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED,
+	_MALI_OSK_LOCK_ORDER_PROFILING,
+	_MALI_OSK_LOCK_ORDER_L2,
+	_MALI_OSK_LOCK_ORDER_L2_COMMAND,
+	_MALI_OSK_LOCK_ORDER_UTILIZATION,
+	_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS,
+	_MALI_OSK_LOCK_ORDER_PM_STATE,
+
+	_MALI_OSK_LOCK_ORDER_LAST,
+} _mali_osk_lock_order_t;
+
+
+/** @brief OSK Mutual Exclusion Lock flags type
+ *
+ * - Any lock can use the order parameter.
+ */
+typedef enum {
+	_MALI_OSK_LOCKFLAG_UNORDERED        = 0x1, /**< Indicate that the order of this lock should not be checked */
+	_MALI_OSK_LOCKFLAG_ORDERED          = 0x2,
+	/** @enum _mali_osk_lock_flags_t
+	 *
+	 * Flags from 0x10000--0x80000000 are RESERVED for User-mode */
+
+} _mali_osk_lock_flags_t;
+
+/** @brief Mutual Exclusion Lock Mode Optimization hint
+ *
+ * The lock mode is used to implement the read/write locking of locks when we call
+ * functions _mali_osk_mutex_rw_init/wait/signal/term/. In this case, the RO mode can
+ * be used to allow multiple concurrent readers, but no writers. The RW mode is used for
+ * writers, and so will wait for all readers to release the lock (if any present).
+ * Further readers and writers will wait until the writer releases the lock.
+ *
+ * The mode is purely an optimization hint: for example, it is permissible for
+ * all locks to behave in RW mode, regardless of that supplied.
+ *
+ * It is an error to attempt to use locks in anything other that RW mode when
+ * call functions _mali_osk_mutex_rw_wait/signal().
+ *
+ */
+typedef enum {
+	_MALI_OSK_LOCKMODE_UNDEF = -1,  /**< Undefined lock mode. For internal use only */
+	_MALI_OSK_LOCKMODE_RW    = 0x0, /**< Read-write mode, default. All readers and writers are mutually-exclusive */
+	_MALI_OSK_LOCKMODE_RO,          /**< Read-only mode, to support multiple concurrent readers, but mutual exclusion in the presence of writers. */
+	/** @enum _mali_osk_lock_mode_t
+	 *
+	 * Lock modes 0x40--0x7F are RESERVED for User-mode */
+} _mali_osk_lock_mode_t;
+
+/** @brief Private types for Mutual Exclusion lock objects */
+typedef struct _mali_osk_lock_debug_s _mali_osk_lock_debug_t;
+typedef struct _mali_osk_spinlock_s _mali_osk_spinlock_t;
+typedef struct _mali_osk_spinlock_irq_s _mali_osk_spinlock_irq_t;
+typedef struct _mali_osk_mutex_s _mali_osk_mutex_t;
+typedef struct _mali_osk_mutex_rw_s _mali_osk_mutex_rw_t;
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @defgroup _mali_osk_low_level_memory OSK Low-level Memory Operations
+ * @{ */
+
+/**
+ * @brief Private data type for use in IO accesses to/from devices.
+ *
+ * This represents some range that is accessible from the device. Examples
+ * include:
+ * - Device Registers, which could be readable and/or writeable.
+ * - Memory that the device has access to, for storing configuration structures.
+ *
+ * Access to this range must be made through the _mali_osk_mem_ioread32() and
+ * _mali_osk_mem_iowrite32() functions.
+ */
+typedef struct _mali_io_address *mali_io_address;
+
+/** @defgroup _MALI_OSK_CPU_PAGE CPU Physical page size macros.
+ *
+ * The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The CPU Physical Page Size has been assumed to be the same as the Mali
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** CPU Page Order, as log to base 2 of the Page size. @see _MALI_OSK_CPU_PAGE_SIZE */
+#define _MALI_OSK_CPU_PAGE_ORDER ((u32)12)
+/** CPU Page Size, in bytes.               */
+#define _MALI_OSK_CPU_PAGE_SIZE (((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER))
+/** CPU Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_CPU_PAGE_MASK (~((((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER)) - ((u32)1)))
+/** @} */ /* end of group _MALI_OSK_CPU_PAGE */
+
+/** @defgroup _MALI_OSK_MALI_PAGE Mali Physical Page size macros
+ *
+ * Mali Physical page size macros. The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The Mali Physical Page Size has been assumed to be the same as the CPU
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** Mali Page Order, as log to base 2 of the Page size. @see _MALI_OSK_MALI_PAGE_SIZE */
+#define _MALI_OSK_MALI_PAGE_ORDER PAGE_SHIFT
+/** Mali Page Size, in bytes.               */
+#define _MALI_OSK_MALI_PAGE_SIZE PAGE_SIZE
+/** Mali Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_MALI_PAGE_MASK PAGE_MASK
+/** @} */ /* end of group _MALI_OSK_MALI_PAGE*/
+
+/** @brief flags for mapping a user-accessible memory range
+ *
+ * Where a function with prefix '_mali_osk_mem_mapregion' accepts flags as one
+ * of the function parameters, it will use one of these. These allow per-page
+ * control over mappings. Compare with the mali_memory_allocation_flag type,
+ * which acts over an entire range
+ *
+ * These may be OR'd together with bitwise OR (|), but must be cast back into
+ * the type after OR'ing.
+ */
+typedef enum {
+	_MALI_OSK_MEM_MAPREGION_FLAG_OS_ALLOCATED_PHYSADDR = 0x1, /**< Physical address is OS Allocated */
+} _mali_osk_mem_mapregion_flags_t;
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+/** @defgroup _mali_osk_notification OSK Notification Queues
+ * @{ */
+
+/** @brief Private type for notification queue objects */
+typedef struct _mali_osk_notification_queue_t_struct _mali_osk_notification_queue_t;
+
+/** @brief Public notification data object type */
+typedef struct _mali_osk_notification_t_struct {
+	u32 notification_type;   /**< The notification type */
+	u32 result_buffer_size; /**< Size of the result buffer to copy to user space */
+	void *result_buffer;    /**< Buffer containing any type specific data */
+} _mali_osk_notification_t;
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @defgroup _mali_osk_timer OSK Timer Callbacks
+ * @{ */
+
+/** @brief Function to call when a timer expires
+ *
+ * When a timer expires, this function is called. Note that on many systems,
+ * a timer callback will be executed in IRQ context. Therefore, restrictions
+ * may apply on what can be done inside the timer callback.
+ *
+ * If a timer requires more work to be done than can be acheived in an IRQ
+ * context, then it may defer the work with a work-queue. For example, it may
+ * use \ref _mali_osk_wq_schedule_work() to make use of a bottom-half handler
+ * to carry out the remaining work.
+ *
+ * Stopping the timer with \ref _mali_osk_timer_del() blocks on compeletion of
+ * the callback. Therefore, the callback may not obtain any mutexes also held
+ * by any callers of _mali_osk_timer_del(). Otherwise, a deadlock may occur.
+ *
+ * @param arg Function-specific data */
+typedef void (*_mali_osk_timer_callback_t)(void *arg);
+
+/** @brief Private type for Timer Callback Objects */
+typedef struct _mali_osk_timer_t_struct _mali_osk_timer_t;
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** @brief Public List objects.
+ *
+ * To use, add a _mali_osk_list_t member to the structure that may become part
+ * of a list. When traversing the _mali_osk_list_t objects, use the
+ * _MALI_OSK_CONTAINER_OF() macro to recover the structure from its
+ *_mali_osk_list_t member
+ *
+ * Each structure may have multiple _mali_osk_list_t members, so that the
+ * structure is part of multiple lists. When traversing lists, ensure that the
+ * correct _mali_osk_list_t member is used, because type-checking will be
+ * lost by the compiler.
+ */
+typedef struct _mali_osk_list_s {
+	struct _mali_osk_list_s *next;
+	struct _mali_osk_list_s *prev;
+} _mali_osk_list_t;
+/** @} */ /* end group _mali_osk_list */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief resource description struct
+ *
+ * Platform independent representation of a Mali HW resource
+ */
+typedef struct _mali_osk_resource {
+	const char *description;        /**< short description of the resource */
+	uintptr_t base;                 /**< Physical base address of the resource, as seen by Mali resources. */
+	const char *irq_name;           /**< Name of irq belong to this resource */
+	u32 irq;                        /**< IRQ number delivered to the CPU, or -1 to tell the driver to probe for it (if possible) */
+} _mali_osk_resource_t;
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+/** @brief Private type for wait queue objects */
+typedef struct _mali_osk_wait_queue_t_struct _mali_osk_wait_queue_t;
+/** @} */ /* end group _mali_osk_wait_queue */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+/** @brief Mali print ctx type which uses seq_file
+  */
+typedef struct seq_file _mali_osk_print_ctx;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_TYPES_H__ */
diff --git a/utgard/r5p1/common/mali_pm.c b/utgard/r5p1/common/mali_pm.c
new file mode 100755
index 0000000..85cdde0
--- /dev/null
+++ b/utgard/r5p1/common/mali_pm.c
@@ -0,0 +1,1339 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pm.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_scheduler.h"
+#include "mali_group.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+
+#include "mali_executor.h"
+#include "mali_control_timer.h"
+
+#if defined(DEBUG)
+u32 num_pm_runtime_resume = 0;
+u32 num_pm_updates = 0;
+u32 num_pm_updates_up = 0;
+u32 num_pm_updates_down = 0;
+#endif
+
+#define MALI_PM_DOMAIN_DUMMY_MASK (1 << MALI_DOMAIN_INDEX_DUMMY)
+
+/* lock protecting power state (including pm_domains) */
+static _mali_osk_spinlock_irq_t *pm_lock_state = NULL;
+
+/* the wanted domain mask (protected by pm_lock_state) */
+static u32 pd_mask_wanted = 0;
+
+/* used to deferring the actual power changes */
+static _mali_osk_wq_work_t *pm_work = NULL;
+
+/* lock protecting power change execution */
+static _mali_osk_mutex_t *pm_lock_exec = NULL;
+
+/* PMU domains which are actually powered on (protected by pm_lock_exec) */
+static u32 pmu_mask_current = 0;
+
+/*
+ * domains which marked as powered on (protected by pm_lock_exec)
+ * This can be different from pmu_mask_current right after GPU power on
+ * if the PMU domains default to powered up.
+ */
+static u32 pd_mask_current = 0;
+
+static u16 domain_config[MALI_MAX_NUMBER_OF_DOMAINS] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	1 << MALI_DOMAIN_INDEX_DUMMY
+};
+
+/* The relative core power cost */
+#define MALI_GP_COST 3
+#define MALI_PP_COST 6
+#define MALI_L2_COST 1
+
+/*
+ *We have MALI_MAX_NUMBER_OF_PP_PHYSICAL_CORES + 1 rows in this matrix
+ *because we mush store the mask of different pp cores: 0, 1, 2, 3, 4, 5, 6, 7, 8.
+ */
+static int mali_pm_domain_power_cost_result[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1][MALI_MAX_NUMBER_OF_DOMAINS];
+/*
+ * Keep track of runtime PM state, so that we know
+ * how to resume during OS resume.
+ */
+#ifdef CONFIG_PM_RUNTIME
+static mali_bool mali_pm_runtime_active = MALI_FALSE;
+#else
+/* when kernel don't enable PM_RUNTIME, set the flag always true,
+ * for GPU will not power off by runtime */
+static mali_bool mali_pm_runtime_active = MALI_TRUE;
+#endif
+
+static void mali_pm_state_lock(void);
+static void mali_pm_state_unlock(void);
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void);
+static void mali_pm_set_pmu_domain_config(void);
+static u32 mali_pm_get_registered_cores_mask(void);
+static void mali_pm_update_sync_internal(void);
+static mali_bool mali_pm_common_suspend(void);
+static void mali_pm_update_work(void *data);
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+const char *mali_pm_group_stats_to_string(void);
+#endif
+
+_mali_osk_errcode_t mali_pm_initialize(void)
+{
+	_mali_osk_errcode_t err;
+	struct mali_pmu_core *pmu;
+
+	pm_lock_state = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (NULL == pm_lock_state) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_lock_exec = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+					    _MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (NULL == pm_lock_exec) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_work = _mali_osk_wq_create_work(mali_pm_update_work, NULL);
+	if (NULL == pm_work) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pmu = mali_pmu_get_global_pmu_core();
+	if (NULL != pmu) {
+		/*
+		 * We have a Mali PMU, set the correct domain
+		 * configuration (default or custom)
+		 */
+
+		u32 registered_cores_mask;
+
+		mali_pm_set_pmu_domain_config();
+
+		registered_cores_mask = mali_pm_get_registered_cores_mask();
+		mali_pmu_set_registered_cores_mask(pmu, registered_cores_mask);
+
+		MALI_DEBUG_ASSERT(0 == pd_mask_wanted);
+	}
+
+	/* Create all power domains needed (at least one dummy domain) */
+	err = mali_pm_create_pm_domains();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_terminate();
+		return err;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pm_terminate(void)
+{
+	if (NULL != pm_work) {
+		_mali_osk_wq_delete_work(pm_work);
+		pm_work = NULL;
+	}
+
+	mali_pm_domain_terminate();
+
+	if (NULL != pm_lock_exec) {
+		_mali_osk_mutex_term(pm_lock_exec);
+		pm_lock_exec = NULL;
+	}
+
+	if (NULL != pm_lock_state) {
+		_mali_osk_spinlock_irq_term(pm_lock_state);
+		pm_lock_state = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (NULL == domain) {
+		MALI_DEBUG_ASSERT(0 == domain_config[domain_index]);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(0 != domain_config[domain_index]);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != domain);
+
+	mali_pm_domain_add_l2_cache(domain, l2_cache);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (NULL == domain) {
+		MALI_DEBUG_ASSERT(0 == domain_config[domain_index]);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(0 != domain_config[domain_index]);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != domain);
+
+	mali_pm_domain_add_group(domain, group);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains)
+{
+	mali_bool ret = MALI_TRUE; /* Assume all is powered on instantly */
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		pd_mask_wanted |= mali_pm_domain_ref_get(domains[i]);
+		if (MALI_FALSE == mali_pm_domain_power_is_on(domains[i])) {
+			/*
+			 * Tell caller that the corresponding group
+			 * was not already powered on.
+			 */
+			ret = MALI_FALSE;
+		} else {
+			/*
+			 * There is a time gap between we power on the domain and
+			 * set the power state of the corresponding groups to be on.
+			 */
+			if (NULL != groups[i] &&
+			    MALI_FALSE == mali_group_power_is_on(groups[i])) {
+				ret = MALI_FALSE;
+			}
+		}
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (get refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains)
+{
+	u32 mask = 0;
+	mali_bool ret;
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		mask |= mali_pm_domain_ref_put(domains[i]);
+	}
+
+	if (0 == mask) {
+		/* return false, all domains should still stay on */
+		ret = MALI_FALSE;
+	} else {
+		/* Assert that we are dealing with a change */
+		MALI_DEBUG_ASSERT((pd_mask_wanted & mask) == mask);
+
+		/* Update our desired domain mask */
+		pd_mask_wanted &= ~mask;
+
+		/* return true; one or more domains can now be powered down */
+		ret = MALI_TRUE;
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (put refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+void mali_pm_init_begin(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	_mali_osk_pm_dev_ref_get_sync();
+
+	/* Ensure all PMU domains are on */
+	if (NULL != pmu) {
+		mali_pmu_power_up_all(pmu);
+	}
+}
+
+void mali_pm_init_end(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	/* Ensure all PMU domains are off */
+	if (NULL != pmu) {
+		mali_pmu_power_down_all(pmu);
+	}
+
+	_mali_osk_pm_dev_ref_put();
+}
+
+void mali_pm_update_sync(void)
+{
+	mali_pm_exec_lock();
+
+	if (MALI_TRUE == mali_pm_runtime_active) {
+		/*
+		 * Only update if GPU is powered on.
+		 * Deactivation of the last group will result in both a
+		 * deferred runtime PM suspend operation and
+		 * deferred execution of this function.
+		 * mali_pm_runtime_active will be false if runtime PM
+		 * executed first and thus the GPU is now fully powered off.
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_update_async(void)
+{
+	_mali_osk_wq_schedule_work(pm_work);
+}
+
+void mali_pm_os_suspend(mali_bool os_suspend)
+{
+	int ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS suspend\n"));
+
+	/* Suspend execution of all jobs, and go to inactive state */
+	mali_executor_suspend();
+
+	if (os_suspend) {
+		mali_control_timer_suspend(MALI_TRUE);
+	}
+
+	mali_pm_exec_lock();
+
+	ret = mali_pm_common_suspend();
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == ret);
+	MALI_IGNORE(ret);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_os_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS resume\n"));
+
+	mali_pm_exec_lock();
+
+#if defined(DEBUG)
+	mali_pm_state_lock();
+
+	/* Assert that things are as we left them in os_suspend(). */
+	MALI_DEBUG_ASSERT(0 == pd_mask_wanted);
+	MALI_DEBUG_ASSERT(0 == pd_mask_current);
+	MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+	mali_pm_state_unlock();
+#endif
+
+	if (MALI_TRUE == mali_pm_runtime_active) {
+		/* Runtime PM was active, so reset PMU */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+			pmu_mask_current = mali_pmu_get_mask(pmu);
+
+			MALI_DEBUG_PRINT(3, ("Mali PM: OS resume 0x%x \n", pmu_mask_current));
+		}
+
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	/* Start executing jobs again */
+	mali_executor_resume();
+}
+
+mali_bool mali_pm_runtime_suspend(void)
+{
+	mali_bool ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: Runtime suspend\n"));
+
+	mali_pm_exec_lock();
+
+	/*
+	 * Put SW state directly into "off" state, and do not bother to power
+	 * down each power domain, because entire GPU will be powered off
+	 * when we return.
+	 * For runtime PM suspend, in contrast to OS suspend, there is a race
+	 * between this function and the mali_pm_update_sync_internal(), which
+	 * is fine...
+	 */
+	ret = mali_pm_common_suspend();
+	if (MALI_TRUE == ret) {
+		mali_pm_runtime_active = MALI_FALSE;
+	} else {
+		/*
+		 * Process the "power up" instead,
+		 * which could have been "lost"
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	return ret;
+}
+
+void mali_pm_runtime_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	mali_pm_exec_lock();
+
+	mali_pm_runtime_active = MALI_TRUE;
+
+#if defined(DEBUG)
+	++num_pm_runtime_resume;
+
+	mali_pm_state_lock();
+
+	/*
+	 * Assert that things are as we left them in runtime_suspend(),
+	 * except for pd_mask_wanted which normally will be the reason we
+	 * got here (job queued => domains wanted)
+	 */
+	MALI_DEBUG_ASSERT(0 == pd_mask_current);
+	MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+	mali_pm_state_unlock();
+#endif
+
+	if (NULL != pmu) {
+		mali_pmu_reset(pmu);
+		pmu_mask_current = mali_pmu_get_mask(pmu);
+		MALI_DEBUG_PRINT(3, ("Mali PM: Runtime resume 0x%x \n", pmu_mask_current));
+	}
+
+	/*
+	 * Normally we are resumed because a job has just been queued.
+	 * pd_mask_wanted should thus be != 0.
+	 * It is however possible for others to take a Mali Runtime PM ref
+	 * without having a job queued.
+	 * We should however always call mali_pm_update_sync_internal(),
+	 * because this will take care of any potential mismatch between
+	 * pmu_mask_current and pd_mask_current.
+	 */
+	mali_pm_update_sync_internal();
+
+	mali_pm_exec_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tPower domain: id %u\n",
+				mali_pm_domain_get_id(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tMask: 0x%04x\n",
+				mali_pm_domain_get_mask(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tUse count: %u\n",
+				mali_pm_domain_get_use_count(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tCurrent power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_current) ?
+				"On" : "Off");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tWanted power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_wanted) ?
+				"On" : "Off");
+
+	return n;
+}
+#endif
+
+static void mali_pm_state_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(pm_lock_state);
+}
+
+static void mali_pm_state_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(pm_lock_state);
+}
+
+void mali_pm_exec_lock(void)
+{
+	_mali_osk_mutex_wait(pm_lock_exec);
+}
+
+void mali_pm_exec_unlock(void)
+{
+	_mali_osk_mutex_signal(pm_lock_exec);
+}
+
+static void mali_pm_domain_power_up(u32 power_up_mask,
+				    struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS],
+				    u32 *num_groups_up,
+				    struct mali_l2_cache_core *l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				    u32 *num_l2_up)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_up_mask;
+
+	MALI_DEBUG_ASSERT(0 != power_up_mask);
+	MALI_DEBUG_ASSERT_POINTER(groups_up);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_up);
+	MALI_DEBUG_ASSERT(0 == *num_groups_up);
+	MALI_DEBUG_ASSERT_POINTER(l2_up);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_up);
+	MALI_DEBUG_ASSERT(0 == *num_l2_up);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering up domains: . [%s]\n",
+			  mali_pm_mask_to_string(power_up_mask)));
+
+	pd_mask_current |= power_up_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (0 != domain_bit) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(
+				domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered up */
+		mali_pm_domain_set_power_on(domain, MALI_TRUE);
+
+		/*
+		 * Make a note of the L2 and/or group(s) to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(
+						    domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_up <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_up[*num_l2_up] = l2_cache;
+			(*num_l2_up)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_up <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_up[*num_groups_up] = group;
+
+			(*num_groups_up)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+static void mali_pm_domain_power_down(u32 power_down_mask,
+				      struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS],
+				      u32 *num_groups_down,
+				      struct mali_l2_cache_core *l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				      u32 *num_l2_down)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_down_mask;
+
+	MALI_DEBUG_ASSERT(0 != power_down_mask);
+	MALI_DEBUG_ASSERT_POINTER(groups_down);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_down);
+	MALI_DEBUG_ASSERT(0 == *num_groups_down);
+	MALI_DEBUG_ASSERT_POINTER(l2_down);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_down);
+	MALI_DEBUG_ASSERT(0 == *num_l2_down);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering down domains: [%s]\n",
+			  mali_pm_mask_to_string(power_down_mask)));
+
+	pd_mask_current &= ~power_down_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (0 != domain_bit) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered down */
+		mali_pm_domain_set_power_on(domain, MALI_FALSE);
+
+		/*
+		 * Make a note of the L2s and/or groups to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_down <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_down[*num_l2_down] = l2_cache;
+			(*num_l2_down)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_down <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_down[*num_groups_down] = group;
+			(*num_groups_down)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+
+/*
+ * Execute pending power domain changes
+ * pm_lock_exec lock must be taken by caller.
+ */
+static void mali_pm_update_sync_internal(void)
+{
+	/*
+	 * This should only be called in non-atomic context
+	 * (normally as deferred work)
+	 *
+	 * Look at the pending power domain changes, and execute these.
+	 * Make sure group and schedulers are notified about changes.
+	 */
+
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	u32 power_down_mask;
+	u32 power_up_mask;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+
+#if defined(DEBUG)
+	++num_pm_updates;
+#endif
+
+	/* Hold PM state lock while we look at (and obey) the wanted state */
+	mali_pm_state_lock();
+
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	/* Figure out which cores we need to power on */
+	power_up_mask = pd_mask_wanted &
+			(pd_mask_wanted ^ pd_mask_current);
+
+	if (0 != power_up_mask) {
+		u32 power_up_mask_pmu;
+		struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_up = 0;
+		struct mali_l2_cache_core *
+			l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_up = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_up;
+#endif
+
+		/*
+		 * Make sure dummy/global domain is always included when
+		 * powering up, since this is controlled by runtime PM,
+		 * and device power is on at this stage.
+		 */
+		power_up_mask |= MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* Power up only real PMU domains */
+		power_up_mask_pmu = power_up_mask & ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* But not those that happen to be powered on already */
+		power_up_mask_pmu &= (power_up_mask ^ pmu_mask_current) &
+				     power_up_mask;
+
+		if (0 != power_up_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current |= power_up_mask_pmu;
+			mali_pmu_power_up(pmu, power_up_mask_pmu);
+		}
+
+		/*
+		 * Put the domains themselves in power up state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_up(power_up_mask,
+					groups_up, &num_groups_up,
+					l2_up, &num_l2_up);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/* Notify each L2 cache that we have be powered up */
+		for (i = 0; i < num_l2_up; i++) {
+			mali_l2_cache_power_up(l2_up[i]);
+		}
+
+		/*
+		 * Tell execution module about all the groups we have
+		 * powered up. Groups will be notified as a result of this.
+		 */
+		mali_executor_group_power_up(groups_up, num_groups_up);
+
+		/* Lock state again before checking for power down */
+		mali_pm_state_lock();
+	}
+
+	/* Figure out which cores we need to power off */
+	power_down_mask = pd_mask_current &
+			  (pd_mask_wanted ^ pd_mask_current);
+
+	/*
+	 * Never power down the dummy/global domain here. This is to be done
+	 * from a suspend request (since this domain is only physicall powered
+	 * down at that point)
+	 */
+	power_down_mask &= ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+	if (0 != power_down_mask) {
+		u32 power_down_mask_pmu;
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_down;
+#endif
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(power_down_mask,
+					  groups_down, &num_groups_down,
+					  l2_down, &num_l2_down);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (0 < num_groups_down) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (0 != power_down_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+
+		}
+	} else {
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		u32 power_down_mask_pmu;
+
+		/* No need for state lock since we'll only update PMU */
+		mali_pm_state_unlock();
+
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (0 != power_down_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+		}
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM update post: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+}
+
+static mali_bool mali_pm_common_suspend(void)
+{
+	mali_pm_state_lock();
+
+	if (0 != pd_mask_wanted) {
+		MALI_DEBUG_PRINT(5, ("PM: Aborting suspend operation\n\n\n"));
+		mali_pm_state_unlock();
+		return MALI_FALSE;
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	if (0 != pd_mask_current) {
+		/*
+		 * We have still some domains powered on.
+		 * It is for instance very normal that at least the
+		 * dummy/global domain is marked as powered on at this point.
+		 * (because it is physically powered on until this function
+		 * returns)
+		 */
+
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(pd_mask_current,
+					  groups_down,
+					  &num_groups_down,
+					  l2_down,
+					  &num_l2_down);
+
+		MALI_DEBUG_ASSERT(0 == pd_mask_current);
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (0 < num_groups_down) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		pmu_mask_current = 0;
+	} else {
+		MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+		mali_pm_state_unlock();
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current domain mask:  [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current PMU mask: ... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Group power stats: .. <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	return MALI_TRUE;
+}
+
+static void mali_pm_update_work(void *data)
+{
+	MALI_IGNORE(data);
+	mali_pm_update_sync();
+}
+
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void)
+{
+	int i;
+
+	/* Create all domains (including dummy domain) */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0x0 == domain_config[i]) continue;
+
+		if (NULL == mali_pm_domain_create(domain_config[i])) {
+			return _MALI_OSK_ERR_NOMEM;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_pm_set_default_pm_domain_config(void)
+{
+	MALI_DEBUG_ASSERT(0 != _mali_osk_resource_base_address());
+
+	/* GP core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_GP, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_GP] = 0x01;
+	}
+
+	/* PP0 - PP3 core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP0, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 2;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP1, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 3;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 2;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP2, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 4;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 2;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP3, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 5;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 2;
+		}
+	}
+
+	/* PP4 - PP7 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP4, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP4] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP5, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP5] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP6, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP6] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP7, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP7] = 0x01 << 3;
+	}
+
+	/* L2gp/L2PP0/L2PP4 */
+	if (mali_is_mali400()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI400_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 1;
+		}
+	} else if (mali_is_mali450()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 0;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 1;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE2, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L22] = 0x01 << 3;
+		}
+	}
+}
+
+static u32 mali_pm_get_registered_cores_mask(void)
+{
+	int i = 0;
+	u32 mask = 0;
+
+	for (i = 0; i < MALI_DOMAIN_INDEX_DUMMY; i++) {
+		mask |= domain_config[i];
+	}
+
+	return mask;
+}
+
+static void mali_pm_set_pmu_domain_config(void)
+{
+	int i = 0;
+
+	_mali_osk_device_data_pmu_config_get(domain_config, MALI_MAX_NUMBER_OF_DOMAINS - 1);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (0 != domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("Using customer pmu config:\n"));
+			break;
+		}
+	}
+
+	if (MALI_MAX_NUMBER_OF_DOMAINS - 1 == i) {
+		MALI_DEBUG_PRINT(2, ("Using hw detect pmu config:\n"));
+		mali_pm_set_default_pm_domain_config();
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("domain_config[%d] = 0x%x \n", i, domain_config[i]));
+		}
+	}
+	/* Can't override dummy domain mask */
+	domain_config[MALI_DOMAIN_INDEX_DUMMY] =
+		1 << MALI_DOMAIN_INDEX_DUMMY;
+}
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_DOMAINS + 1];
+	int bit;
+	int str_pos = 0;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (NULL != pm_lock_exec) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (bit = MALI_MAX_NUMBER_OF_DOMAINS - 1; bit >= 0; bit--) {
+		if (mask & (1 << bit)) {
+			bit_str[str_pos] = 'X';
+		} else {
+			bit_str[str_pos] = '-';
+		}
+		str_pos++;
+	}
+
+	bit_str[MALI_MAX_NUMBER_OF_DOMAINS] = '\0';
+
+	return bit_str;
+}
+
+const char *mali_pm_group_stats_to_string(void)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_GROUPS + 1];
+	u32 num_groups = mali_group_get_glob_num_groups();
+	u32 i;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (NULL != pm_lock_exec) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (i = 0; i < num_groups && i < MALI_MAX_NUMBER_OF_GROUPS; i++) {
+		struct mali_group *group;
+
+		group = mali_group_get_glob_group(i);
+
+		if (MALI_TRUE == mali_group_power_is_on(group)) {
+			bit_str[i] = 'X';
+		} else {
+			bit_str[i] = '-';
+		}
+	}
+
+	bit_str[i] = '\0';
+
+	return bit_str;
+}
+#endif
+
+/*
+ * num_pp is the number of PP cores which will be powered on given this mask
+ * cost is the total power cost of cores which will be powered on given this mask
+ */
+static void mali_pm_stat_from_mask(u32 mask, u32 *num_pp, u32 *cost)
+{
+	u32 i;
+
+	/* loop through all cores */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (!(domain_config[i] & mask)) {
+			continue;
+		}
+
+		switch (i) {
+		case MALI_DOMAIN_INDEX_GP:
+			*cost += MALI_GP_COST;
+
+			break;
+		case MALI_DOMAIN_INDEX_PP0: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP1: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP2: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP3:
+			if (mali_is_mali400()) {
+				if ((domain_config[MALI_DOMAIN_INDEX_L20] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L20])) {
+					*num_pp += 1;
+				}
+			} else {
+				if ((domain_config[MALI_DOMAIN_INDEX_L21] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L21])) {
+					*num_pp += 1;
+				}
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_PP4: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP5: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP6: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP7:
+			MALI_DEBUG_ASSERT(mali_is_mali450());
+
+			if ((domain_config[MALI_DOMAIN_INDEX_L22] & mask)
+			    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+				== domain_config[MALI_DOMAIN_INDEX_L22])) {
+				*num_pp += 1;
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_L20: /* Fall through */
+		case MALI_DOMAIN_INDEX_L21: /* Fall through */
+		case MALI_DOMAIN_INDEX_L22:
+			*cost += MALI_L2_COST;
+
+			break;
+		}
+	}
+}
+
+void mali_pm_power_cost_setup(void)
+{
+	/*
+	 * Two parallel arrays which store the best domain mask and its cost
+	 * The index is the number of PP cores, E.g. Index 0 is for 1 PP option,
+	 * might have mask 0x2 and with cost of 1, lower cost is better
+	 */
+	u32 best_mask[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	u32 best_cost[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	/* Array cores_in_domain is used to store the total pp cores in each pm domain. */
+	u32 cores_in_domain[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	/* Domain_count is used to represent the max domain we have.*/
+	u32 max_domain_mask = 0;
+	u32 max_domain_id = 0;
+	u32 always_on_pp_cores = 0;
+
+	u32 num_pp, cost, mask;
+	u32 i, j , k;
+
+	/* Initialize statistics */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) {
+		best_mask[i] = 0;
+		best_cost[i] = 0xFFFFFFFF; /* lower cost is better */
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1; i++) {
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			mali_pm_domain_power_cost_result[i][j] = 0;
+		}
+	}
+
+	/* Caculate number of pp cores of a given domain config. */
+	for (i = MALI_DOMAIN_INDEX_PP0; i <= MALI_DOMAIN_INDEX_PP7; i++) {
+		if (0 < domain_config[i]) {
+			/* Get the max domain mask value used to caculate power cost
+			 * and we don't count in always on pp cores. */
+			if (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i]
+			    && max_domain_mask < domain_config[i]) {
+				max_domain_mask = domain_config[i];
+			}
+
+			if (MALI_PM_DOMAIN_DUMMY_MASK == domain_config[i]) {
+				always_on_pp_cores++;
+			}
+		}
+	}
+	max_domain_id = _mali_osk_fls(max_domain_mask);
+
+	/*
+	 * Try all combinations of power domains and check how many PP cores
+	 * they have and their power cost.
+	 */
+	for (mask = 0; mask < (1 << max_domain_id); mask++) {
+		num_pp = 0;
+		cost = 0;
+
+		mali_pm_stat_from_mask(mask, &num_pp, &cost);
+
+		/* This mask is usable for all MP1 up to num_pp PP cores, check statistics for all */
+		for (i = 0; i < num_pp; i++) {
+			if (best_cost[i] >= cost) {
+				best_cost[i] = cost;
+				best_mask[i] = mask;
+			}
+		}
+	}
+
+	/*
+	 * If we want to enable x pp cores, if x is less than number of always_on pp cores,
+	 * all of pp cores we will enable must be always_on pp cores.
+	 */
+	for (i = 0; i < mali_executor_get_num_cores_total(); i++) {
+		if (i < always_on_pp_cores) {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= i + 1;
+		} else {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= always_on_pp_cores;
+		}
+	}
+
+	/* In this loop, variable i represent for the number of non-always on pp cores we want to enabled. */
+	for (i = 0; i < (mali_executor_get_num_cores_total() - always_on_pp_cores); i++) {
+		if (best_mask[i] == 0) {
+			/* This MP variant is not available */
+			continue;
+		}
+
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			cores_in_domain[j] = 0;
+		}
+
+		for (j = MALI_DOMAIN_INDEX_PP0; j <= MALI_DOMAIN_INDEX_PP7; j++) {
+			if (0 < domain_config[j]
+			    && (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i])) {
+				cores_in_domain[_mali_osk_fls(domain_config[j]) - 1]++;
+			}
+		}
+
+		/* In this loop, j represent for the number we have already enabled.*/
+		for (j = 0; j <= i;) {
+			/* j used to visit all of domain to get the number of pp cores remained in it. */
+			for (k = 0; k < max_domain_id; k++) {
+				/* If domain k in best_mask[i] is enabled and this domain has extra pp cores,
+				 * we know we must pick at least one pp core from this domain.
+				 * And then we move to next enabled pm domain. */
+				if ((best_mask[i] & (0x1 << k)) && (0 < cores_in_domain[k])) {
+					cores_in_domain[k]--;
+					mali_pm_domain_power_cost_result[always_on_pp_cores + i + 1][k]++;
+					j++;
+					if (j > i) {
+						break;
+					}
+				}
+			}
+		}
+	}
+}
+
+/*
+ * When we are doing core scaling,
+ * this function is called to return the best mask to
+ * achieve the best pp group power cost.
+ */
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst)
+{
+	MALI_DEBUG_ASSERT((mali_executor_get_num_cores_total() >= num_requested) && (0 <= num_requested));
+
+	_mali_osk_memcpy(dst, mali_pm_domain_power_cost_result[num_requested], MALI_MAX_NUMBER_OF_DOMAINS * sizeof(int));
+}
diff --git a/utgard/r5p1/common/mali_pm.h b/utgard/r5p1/common/mali_pm.h
new file mode 100755
index 0000000..ca4db24
--- /dev/null
+++ b/utgard/r5p1/common/mali_pm.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_H__
+#define __MALI_PM_H__
+
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+
+#define MALI_DOMAIN_INDEX_GP        0
+#define MALI_DOMAIN_INDEX_PP0       1
+#define MALI_DOMAIN_INDEX_PP1       2
+#define MALI_DOMAIN_INDEX_PP2       3
+#define MALI_DOMAIN_INDEX_PP3       4
+#define MALI_DOMAIN_INDEX_PP4       5
+#define MALI_DOMAIN_INDEX_PP5       6
+#define MALI_DOMAIN_INDEX_PP6       7
+#define MALI_DOMAIN_INDEX_PP7       8
+#define MALI_DOMAIN_INDEX_L20       9
+#define MALI_DOMAIN_INDEX_L21      10
+#define MALI_DOMAIN_INDEX_L22      11
+/*
+ * The dummy domain is used when there is no physical power domain
+ * (e.g. no PMU or always on cores)
+ */
+#define MALI_DOMAIN_INDEX_DUMMY    12
+#define MALI_MAX_NUMBER_OF_DOMAINS 13
+
+/**
+ * Initialize the Mali PM module
+ *
+ * PM module covers Mali PM core, PM domains and Mali PMU
+ */
+_mali_osk_errcode_t mali_pm_initialize(void);
+
+/**
+ * Terminate the Mali PM module
+ */
+void mali_pm_terminate(void);
+
+void mali_pm_exec_lock(void);
+void mali_pm_exec_unlock(void);
+
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache);
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group);
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains);
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains);
+
+void mali_pm_init_begin(void);
+void mali_pm_init_end(void);
+
+void mali_pm_update_sync(void);
+void mali_pm_update_async(void);
+
+/* Callback functions for system power management */
+void mali_pm_os_suspend(mali_bool os_suspend);
+void mali_pm_os_resume(void);
+
+mali_bool mali_pm_runtime_suspend(void);
+void mali_pm_runtime_resume(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size);
+#endif
+
+void mali_pm_power_cost_setup(void);
+
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst);
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+#endif
+
+#endif /* __MALI_PM_H__ */
diff --git a/utgard/r5p1/common/mali_pm_domain.c b/utgard/r5p1/common/mali_pm_domain.c
new file mode 100755
index 0000000..dbf985e
--- /dev/null
+++ b/utgard/r5p1/common/mali_pm_domain.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+
+static struct mali_pm_domain *mali_pm_domains[MALI_MAX_NUMBER_OF_DOMAINS] =
+{ NULL, };
+
+void mali_pm_domain_initialize(void)
+{
+	/* Domains will be initialized/created on demand */
+}
+
+void mali_pm_domain_terminate(void)
+{
+	int i;
+
+	/* Delete all domains that has been created */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		mali_pm_domain_delete(mali_pm_domains[i]);
+		mali_pm_domains[i] = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask)
+{
+	struct mali_pm_domain *domain = NULL;
+	u32 domain_id = 0;
+
+	domain = mali_pm_domain_get_from_mask(pmu_mask);
+	if (NULL != domain) return domain;
+
+	MALI_DEBUG_PRINT(2,
+			 ("Mali PM domain: Creating Mali PM domain (mask=0x%08X)\n",
+			  pmu_mask));
+
+	domain = (struct mali_pm_domain *)_mali_osk_malloc(
+			 sizeof(struct mali_pm_domain));
+	if (NULL != domain) {
+		domain->power_is_on = MALI_FALSE;
+		domain->pmu_mask = pmu_mask;
+		domain->use_count = 0;
+		_mali_osk_list_init(&domain->group_list);
+		_mali_osk_list_init(&domain->l2_cache_list);
+
+		domain_id = _mali_osk_fls(pmu_mask) - 1;
+		/* Verify the domain_id */
+		MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > domain_id);
+		/* Verify that pmu_mask only one bit is set */
+		MALI_DEBUG_ASSERT((1 << domain_id) == pmu_mask);
+		mali_pm_domains[domain_id] = domain;
+
+		return domain;
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Unable to create PM domain\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pm_domain_delete(struct mali_pm_domain *domain)
+{
+	if (NULL == domain) {
+		return;
+	}
+
+	_mali_osk_list_delinit(&domain->group_list);
+	_mali_osk_list_delinit(&domain->l2_cache_list);
+
+	_mali_osk_free(domain);
+}
+
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	/*
+	 * Use addtail because virtual group is created last and it needs
+	 * to be at the end of the list (in order to be activated after
+	 * all children.
+	 */
+	_mali_osk_list_addtail(&group->pm_domain_list, &domain->group_list);
+}
+
+void mali_pm_domain_add_l2_cache(struct mali_pm_domain *domain,
+				 struct mali_l2_cache_core *l2_cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(l2_cache);
+	_mali_osk_list_add(&l2_cache->pm_domain_list, &domain->l2_cache_list);
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask)
+{
+	u32 id = 0;
+
+	if (0 == mask) {
+		return NULL;
+	}
+
+	id = _mali_osk_fls(mask) - 1;
+
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == mask);
+
+	return mali_pm_domains[id];
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id)
+{
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+
+	return mali_pm_domains[id];
+}
+
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	if (0 == domain->use_count) {
+		_mali_osk_pm_dev_ref_get_async();
+	}
+
+	++domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_get, use_count => %u\n", domain, domain->use_count));
+
+	/* Return our mask so caller can check this against wanted mask */
+	return domain->pmu_mask;
+}
+
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	--domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_put, use_count => %u\n", domain, domain->use_count));
+
+	if (0 == domain->use_count) {
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	/*
+	 * Return the PMU mask which now could be be powered down
+	 * (the bit for this domain).
+	 * This is the responsibility of the caller (mali_pm)
+	 */
+	return (0 == domain->use_count ? domain->pmu_mask : 0);
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain)
+{
+	u32 id = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT(0 != domain->pmu_mask);
+
+	id = _mali_osk_fls(domain->pmu_mask) - 1;
+
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == domain->pmu_mask);
+	/* Verify that we have stored the domain at right id/index */
+	MALI_DEBUG_ASSERT(domain == mali_pm_domains[id]);
+
+	return id;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void)
+{
+	int i;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (NULL == mali_pm_domains[i]) {
+			/* Nothing to check */
+			continue;
+		}
+
+		if (MALI_TRUE == mali_pm_domains[i]->power_is_on) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+
+		if (0 != mali_pm_domains[i]->use_count) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+	}
+
+	return MALI_TRUE;
+}
+#endif
diff --git a/utgard/r5p1/common/mali_pm_domain.h b/utgard/r5p1/common/mali_pm_domain.h
new file mode 100755
index 0000000..aceb344
--- /dev/null
+++ b/utgard/r5p1/common/mali_pm_domain.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_DOMAIN_H__
+#define __MALI_PM_DOMAIN_H__
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#include "mali_l2_cache.h"
+#include "mali_group.h"
+#include "mali_pmu.h"
+
+/* Instances are protected by PM state lock */
+struct mali_pm_domain {
+	mali_bool power_is_on;
+	s32 use_count;
+	u32 pmu_mask;
+
+	/* Zero or more groups can belong to this domain */
+	_mali_osk_list_t group_list;
+
+	/* Zero or more L2 caches can belong to this domain */
+	_mali_osk_list_t l2_cache_list;
+};
+
+
+void mali_pm_domain_initialize(void);
+void mali_pm_domain_terminate(void);
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask);
+void mali_pm_domain_delete(struct mali_pm_domain *domain);
+
+void mali_pm_domain_add_l2_cache(
+	struct mali_pm_domain *domain,
+	struct mali_l2_cache_core *l2_cache);
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group);
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask);
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id);
+
+/* Ref counting */
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain);
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_group_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->group_list;
+}
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_l2_cache_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->l2_cache_list;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pm_domain_power_is_on(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->power_is_on;
+}
+
+MALI_STATIC_INLINE void mali_pm_domain_set_power_on(
+	struct mali_pm_domain *domain,
+	mali_bool power_is_on)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	domain->power_is_on = power_is_on;
+}
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_use_count(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->use_count;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_mask(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->pmu_mask;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void);
+#endif
+
+#endif /* __MALI_PM_DOMAIN_H__ */
diff --git a/utgard/r5p1/common/mali_pmu.c b/utgard/r5p1/common/mali_pmu.c
new file mode 100755
index 0000000..f667b65
--- /dev/null
+++ b/utgard/r5p1/common/mali_pmu.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu.c
+ * Mali driver functions for Mali 400 PMU hardware
+ */
+#include "mali_hw_core.h"
+#include "mali_pmu.h"
+#include "mali_pp.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm.h"
+#include "mali_osk_mali.h"
+
+struct mali_pmu_core *mali_global_pmu_core = NULL;
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu);
+
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource)
+{
+	struct mali_pmu_core *pmu;
+
+	MALI_DEBUG_ASSERT(NULL == mali_global_pmu_core);
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Creating Mali PMU core\n"));
+
+	pmu = (struct mali_pmu_core *)_mali_osk_malloc(
+		      sizeof(struct mali_pmu_core));
+	if (NULL != pmu) {
+		pmu->registered_cores_mask = 0; /* to be set later */
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&pmu->hw_core,
+				resource, PMU_REGISTER_ADDRESS_SPACE_SIZE)) {
+
+			pmu->switch_delay = _mali_osk_get_pmu_switch_delay();
+
+			mali_global_pmu_core = pmu;
+
+			return pmu;
+		}
+		_mali_osk_free(pmu);
+	}
+
+	return NULL;
+}
+
+void mali_pmu_delete(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu == mali_global_pmu_core);
+
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Deleting Mali PMU core\n"));
+
+	mali_global_pmu_core = NULL;
+
+	mali_hw_core_delete(&pmu->hw_core);
+	_mali_osk_free(pmu);
+}
+
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask)
+{
+	pmu->registered_cores_mask = mask;
+}
+
+void mali_pmu_reset(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	/* Setup the desired defaults */
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_MASK, 0);
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_SW_DELAY, pmu->switch_delay);
+}
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	mali_pmu_reset(pmu);
+
+	/* Now simply power up the domains which are marked as powered down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_up(pmu, stat);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	/* Now simply power down the domains which are marked as powered up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_down(pmu, (~stat) & pmu->registered_cores_mask);
+
+	mali_pm_exec_unlock();
+}
+
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power down: ...................... [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+
+	/*
+	 * Assert that we are not powering down domains which are already
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+
+	if (0 == mask || 0 == ((~stat) & mask)) return _MALI_OSK_ERR_OK;
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_DOWN, mask);
+
+	/*
+	 * Do not wait for interrupt on Mali-300/400 if all domains are
+	 * powered off by our power down command, because the HW will simply
+	 * not generate an interrupt in this case.
+	 */
+	if (mali_is_mali450() || pmu->registered_cores_mask != (mask | stat)) {
+		err = mali_pmu_wait_for_command_finish(pmu);
+		if (_MALI_OSK_ERR_OK != err) {
+			return err;
+		}
+	} else {
+		mali_hw_core_register_write(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+	}
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+#endif
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+#if !defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	u32 current_domain;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power up: ........................ [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	stat &= pmu->registered_cores_mask;
+	if (0 == mask || 0 == (stat & mask)) return _MALI_OSK_ERR_OK;
+
+	/*
+	 * Assert that we are only powering up domains which are currently
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+
+#if defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_UP, mask);
+
+	err = mali_pmu_wait_for_command_finish(pmu);
+	if (_MALI_OSK_ERR_OK != err) {
+		return err;
+	}
+#else
+	for (current_domain = 1;
+	     current_domain <= pmu->registered_cores_mask;
+	     current_domain <<= 1) {
+		if (current_domain & mask & stat) {
+			mali_hw_core_register_write(&pmu->hw_core,
+						    PMU_REG_ADDR_MGMT_POWER_UP,
+						    current_domain);
+
+			err = mali_pmu_wait_for_command_finish(pmu);
+			if (_MALI_OSK_ERR_OK != err) {
+				return err;
+			}
+		}
+	}
+#endif
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+#endif /* defined(DEBUG) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu)
+{
+	u32 rawstat;
+	u32 timeout = MALI_REG_POLL_COUNT_SLOW;
+
+	MALI_DEBUG_ASSERT(pmu);
+
+	/* Wait for the command to complete */
+	do {
+		rawstat = mali_hw_core_register_read(&pmu->hw_core,
+						     PMU_REG_ADDR_MGMT_INT_RAWSTAT);
+		--timeout;
+	} while (0 == (rawstat & PMU_REG_VAL_IRQ) && 0 < timeout);
+
+	MALI_DEBUG_ASSERT(0 < timeout);
+
+	if (0 == timeout) {
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r5p1/common/mali_pmu.h b/utgard/r5p1/common/mali_pmu.h
new file mode 100755
index 0000000..5ca7879
--- /dev/null
+++ b/utgard/r5p1/common/mali_pmu.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_platform.h
+ * Platform specific Mali driver functions
+ */
+
+#ifndef __MALI_PMU_H__
+#define __MALI_PMU_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_hw_core.h"
+
+/** @brief MALI inbuilt PMU hardware info and PMU hardware has knowledge of cores power mask
+ */
+struct mali_pmu_core {
+	struct mali_hw_core hw_core;
+	u32 registered_cores_mask;
+	u32 switch_delay;
+};
+
+/** @brief Register layout for hardware PMU
+ */
+typedef enum {
+	PMU_REG_ADDR_MGMT_POWER_UP                  = 0x00,     /*< Power up register */
+	PMU_REG_ADDR_MGMT_POWER_DOWN                = 0x04,     /*< Power down register */
+	PMU_REG_ADDR_MGMT_STATUS                    = 0x08,     /*< Core sleep status register */
+	PMU_REG_ADDR_MGMT_INT_MASK                  = 0x0C,     /*< Interrupt mask register */
+	PMU_REG_ADDR_MGMT_INT_RAWSTAT               = 0x10,     /*< Interrupt raw status register */
+	PMU_REG_ADDR_MGMT_INT_CLEAR                 = 0x18,     /*< Interrupt clear register */
+	PMU_REG_ADDR_MGMT_SW_DELAY                  = 0x1C,     /*< Switch delay register */
+	PMU_REGISTER_ADDRESS_SPACE_SIZE             = 0x28,     /*< Size of register space */
+} pmu_reg_addr_mgmt_addr;
+
+#define PMU_REG_VAL_IRQ 1
+
+extern struct mali_pmu_core *mali_global_pmu_core;
+
+/** @brief Initialisation of MALI PMU
+ *
+ * This is called from entry point of the driver in order to create and intialize the PMU resource
+ *
+ * @param resource it will be a pointer to a PMU resource
+ * @param number_of_pp_cores Number of found PP resources in configuration
+ * @param number_of_l2_caches Number of found L2 cache resources in configuration
+ * @return The created PMU object, or NULL in case of failure.
+ */
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource);
+
+/** @brief It deallocates the PMU resource
+ *
+ * This is called on the exit of the driver to terminate the PMU resource
+ *
+ * @param pmu Pointer to PMU core object to delete
+ */
+void mali_pmu_delete(struct mali_pmu_core *pmu);
+
+/** @brief Set registered cores mask
+ *
+ * @param pmu Pointer to PMU core object
+ * @param mask All available/valid domain bits
+ */
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief Retrieves the Mali PMU core object (if any)
+ *
+ * @return The Mali PMU object, or NULL if no PMU exists.
+ */
+MALI_STATIC_INLINE struct mali_pmu_core *mali_pmu_get_global_pmu_core(void)
+{
+	return mali_global_pmu_core;
+}
+
+/** @brief Reset PMU core
+ *
+ * @param pmu Pointer to PMU core object to reset
+ */
+void mali_pmu_reset(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu);
+
+/** @brief Returns a mask of the currently powered up domains
+ *
+ * @param pmu Pointer to PMU core object
+ */
+MALI_STATIC_INLINE u32 mali_pmu_get_mask(struct mali_pmu_core *pmu)
+{
+	u32 stat = mali_hw_core_register_read(&pmu->hw_core, PMU_REG_ADDR_MGMT_STATUS);
+	return ((~stat) & pmu->registered_cores_mask);
+}
+
+/** @brief MALI GPU power down using MALI in-built PMU
+ *
+ * Called to power down the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power down
+ * @param mask Mask specifying which power domains to power down
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief MALI GPU power up using MALI in-built PMU
+ *
+ * Called to power up the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power up
+ * @param mask Mask specifying which power domains to power up
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask);
+
+#endif /* __MALI_PMU_H__ */
diff --git a/utgard/r5p1/common/mali_pp.c b/utgard/r5p1/common/mali_pp.c
new file mode 100755
index 0000000..b1fcdea
--- /dev/null
+++ b/utgard/r5p1/common/mali_pp.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp_job.h"
+#include "mali_pp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+#include <mali_platform.h>
+
+/* Number of frame registers on Mali-200 */
+#define MALI_PP_MALI200_NUM_FRAME_REGISTERS ((0x04C/4)+1)
+/* Number of frame registers on Mali-300 and later */
+#define MALI_PP_MALI400_NUM_FRAME_REGISTERS ((0x058/4)+1)
+
+static struct mali_pp_core *mali_global_pp_cores[MALI_MAX_NUMBER_OF_PP_CORES] = { NULL };
+static u32 mali_global_num_pp_cores = 0;
+
+/* Interrupt handlers */
+static void mali_pp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id)
+{
+	struct mali_pp_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali PP: Creating Mali PP core: %s\n", resource->description));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Base address of PP core: 0x%x\n", resource->base));
+
+	if (mali_global_num_pp_cores >= MALI_MAX_NUMBER_OF_PP_CORES) {
+		MALI_PRINT_ERROR(("Mali PP: Too many PP core objects created\n"));
+		return NULL;
+	}
+
+	core = _mali_osk_malloc(sizeof(struct mali_pp_core));
+	if (NULL != core) {
+		core->core_id = mali_global_num_pp_cores;
+		core->bcast_id = bcast_id;
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI200_REG_SIZEOF_REGISTER_BANK)) {
+			_mali_osk_errcode_t ret;
+
+			if (!is_virtual) {
+				ret = mali_pp_reset(core);
+			} else {
+				ret = _MALI_OSK_ERR_OK;
+			}
+
+			if (_MALI_OSK_ERR_OK == ret) {
+				ret = mali_group_add_pp_core(group, core);
+				if (_MALI_OSK_ERR_OK == ret) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					MALI_DEBUG_ASSERT(!is_virtual || -1 != resource->irq);
+
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_pp,
+								       group,
+								       mali_pp_irq_probe_trigger,
+								       mali_pp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (NULL != core->irq) {
+						mali_global_pp_cores[mali_global_num_pp_cores] = core;
+						mali_global_num_pp_cores++;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali PP: Failed to setup interrupt handlers for PP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_pp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali PP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali PP: Failed to allocate memory for PP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pp_delete(struct mali_pp_core *core)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+
+	/* Remove core from global list */
+	for (i = 0; i < mali_global_num_pp_cores; i++) {
+		if (mali_global_pp_cores[i] == core) {
+			mali_global_pp_cores[i] = NULL;
+			mali_global_num_pp_cores--;
+
+			if (i != mali_global_num_pp_cores) {
+				/* We removed a PP core from the middle of the array -- move the last
+				 * PP core to the current position to close the gap */
+				mali_global_pp_cores[i] = mali_global_pp_cores[mali_global_num_pp_cores];
+				mali_global_pp_cores[mali_global_num_pp_cores] = NULL;
+			}
+
+			break;
+		}
+	}
+
+	_mali_osk_free(core);
+}
+
+void mali_pp_stop_bus(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	/* Will only send the stop bus command, and not wait for it to complete */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_pp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS) & MALI200_REG_VAL_STATUS_BUS_STOPPED)
+			break;
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to stop bus on %s. Status: 0x%08x\n", core->hw_core.description, mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Frame register reset values.
+ * Taken from the Mali400 TRM, 3.6. Pixel processor control register summary */
+static const u32 mali_frame_registers_reset_values[_MALI_PP_MAX_FRAME_REGISTERS] = {
+	0x0, /* Renderer List Address Register */
+	0x0, /* Renderer State Word Base Address Register */
+	0x0, /* Renderer Vertex Base Register */
+	0x2, /* Feature Enable Register */
+	0x0, /* Z Clear Value Register */
+	0x0, /* Stencil Clear Value Register */
+	0x0, /* ABGR Clear Value 0 Register */
+	0x0, /* ABGR Clear Value 1 Register */
+	0x0, /* ABGR Clear Value 2 Register */
+	0x0, /* ABGR Clear Value 3 Register */
+	0x0, /* Bounding Box Left Right Register */
+	0x0, /* Bounding Box Bottom Register */
+	0x0, /* FS Stack Address Register */
+	0x0, /* FS Stack Size and Initial Value Register */
+	0x0, /* Reserved */
+	0x0, /* Reserved */
+	0x0, /* Origin Offset X Register */
+	0x0, /* Origin Offset Y Register */
+	0x75, /* Subpixel Specifier Register */
+	0x0, /* Tiebreak mode Register */
+	0x0, /* Polygon List Format Register */
+	0x0, /* Scaling Register */
+	0x0 /* Tilebuffer configuration Register */
+};
+
+/* WBx register reset values */
+static const u32 mali_wb_registers_reset_values[_MALI_PP_MAX_WB_REGISTERS] = {
+	0x0, /* WBx Source Select Register */
+	0x0, /* WBx Target Address Register */
+	0x0, /* WBx Target Pixel Format Register */
+	0x0, /* WBx Target AA Format Register */
+	0x0, /* WBx Target Layout */
+	0x0, /* WBx Target Scanline Length */
+	0x0, /* WBx Target Flags Register */
+	0x0, /* WBx MRT Enable Register */
+	0x0, /* WBx MRT Offset Register */
+	0x0, /* WBx Global Test Enable Register */
+	0x0, /* WBx Global Test Reference Value Register */
+	0x0  /* WBx Global Test Compare Function Register */
+};
+
+/* Performance Counter 0 Enable Register reset value */
+static const u32 mali_perf_cnt_enable_reset_value = 0;
+
+extern int pp_hardware_reset;
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core)
+{
+	/* Bus must be stopped before calling this function */
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(2, ("Mali PP: Hard reset of core %s\n", core->hw_core.description));
+	pp_hardware_reset ++;
+
+	/* Set register to a bogus value. The register will be used to detect when reset is complete */
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_WRITE_BOUNDARY_LOW, reset_invalid_value);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+
+	/* Force core to reset */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET);
+
+	/* Wait for reset to be complete */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_WRITE_BOUNDARY_LOW, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_WRITE_BOUNDARY_LOW)) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali PP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_WRITE_BOUNDARY_LOW, 0x00000000); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pp_reset_async(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET);
+}
+
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		u32 status =  mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+		if (!(status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE)) {
+			rawstat = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+			if (rawstat == MALI400PP_REG_VAL_IRQ_RESET_COMPLETED) {
+				break;
+			}
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core)
+{
+	mali_pp_reset_async(core);
+	return mali_pp_reset_wait(core);
+}
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual)
+{
+	u32 relative_address;
+	u32 start_index;
+	u32 nr_of_regs;
+	u32 *frame_registers = mali_pp_job_get_frame_registers(job);
+	u32 *wb0_registers = mali_pp_job_get_wb0_registers(job);
+	u32 *wb1_registers = mali_pp_job_get_wb1_registers(job);
+	u32 *wb2_registers = mali_pp_job_get_wb2_registers(job);
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, sub_job);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Write frame registers */
+
+	/*
+	 * There are two frame registers which are different for each sub job:
+	 * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME)
+	 * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK)
+	 */
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]);
+
+	/* For virtual jobs, the stack address shouldn't be broadcast but written individually */
+	if (!mali_pp_job_is_virtual(job) || restart_virtual) {
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]);
+	}
+
+	/* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */
+	relative_address = MALI200_REG_ADDR_RSW;
+	start_index = MALI200_REG_ADDR_RSW / sizeof(u32);
+	nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* MALI200_REG_ADDR_STACK_SIZE */
+	relative_address = MALI200_REG_ADDR_STACK_SIZE;
+	start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32);
+
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core,
+			relative_address, frame_registers[start_index],
+			mali_frame_registers_reset_values[start_index]);
+
+	/* Skip 2 reserved registers */
+
+	/* Write remaining registers */
+	relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X;
+	start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+	nr_of_regs = MALI_PP_MALI400_NUM_FRAME_REGISTERS - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* Write WBx registers */
+	if (wb0_registers[0]) { /* M200_WB0_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb1_registers[0]) { /* M200_WB1_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb2_registers[0]) { /* M200_WB2_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+
+#ifdef CONFIG_MALI400_HEATMAPS_ENABLED
+	if (job->uargs.perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_CONTR, ((job->uargs.tilesx & 0x3FF) << 16) | 1);
+		mali_hw_core_register_write_relaxed(&core->hw_core,  MALI200_REG_ADDR_MGMT_PERFMON_BASE, job->uargs.heatmap_mem & 0xFFFFFFF8);
+	}
+#endif /* CONFIG_MALI400_HEATMAPS_ENABLED */
+
+	MALI_DEBUG_PRINT(3, ("Mali PP: Starting job 0x%08X part %u/%u on PP core %s\n", job, sub_job + 1, mali_pp_job_get_sub_job_count(job), core->hw_core.description));
+
+	/* Adding barrier to make sure all rester writes are finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_PP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING);
+#else
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_END_OF_FRAME);
+#endif
+
+	/* Adding barrier to make sure previous rester writes is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index)
+{
+	if (mali_global_num_pp_cores > index) {
+		return mali_global_pp_cores[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_pp_get_glob_num_pp_cores(void)
+{
+	return mali_global_num_pp_cores;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_pp_irq_probe_trigger(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_FORCE_HANG);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS);
+	if (MALI200_REG_VAL_IRQ_FORCE_HANG & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_FORCE_HANG);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+#if 0
+static void mali_pp_print_registers(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_VERSION = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_RAWSTAT = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_MASK = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE)));
+}
+#endif
+
+#if 0
+void mali_pp_print_state(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: State: 0x%08x\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+}
+#endif
+
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, subjob);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, subjob);
+#if defined(CONFIG_MALI400_PROFILING)
+	int counter_index = COUNTER_FP_0_C0 + (2 * child->core_id);
+#endif
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		val0 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_pp_job_set_perf_counter_value0(job, subjob, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index, val0);
+#endif
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		val1 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_pp_job_set_perf_counter_value1(job, subjob, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index + 1, val1);
+#endif
+	}
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tPP #%d: %s\n", core->core_id, core->hw_core.description);
+
+	return n;
+}
+#endif
diff --git a/utgard/r5p1/common/mali_pp.h b/utgard/r5p1/common/mali_pp.h
new file mode 100755
index 0000000..45712a3
--- /dev/null
+++ b/utgard/r5p1/common/mali_pp.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_H__
+#define __MALI_PP_H__
+
+#include "mali_osk.h"
+#include "mali_pp_job.h"
+#include "mali_hw_core.h"
+
+struct mali_group;
+
+#define MALI_MAX_NUMBER_OF_PP_CORES        9
+
+/**
+ * Definition of the PP core struct
+ * Used to track a PP core in the system.
+ */
+struct mali_pp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+	u32                  core_id;           /**< Unique core ID */
+	u32                  bcast_id;          /**< The "flag" value used by the Mali-450 broadcast and DLBU unit */
+};
+
+_mali_osk_errcode_t mali_pp_initialize(void);
+void mali_pp_terminate(void);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id);
+void mali_pp_delete(struct mali_pp_core *core);
+
+void mali_pp_stop_bus(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core);
+void mali_pp_reset_async(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core);
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual);
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core);
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->core_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_bcast_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->bcast_id;
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index);
+u32 mali_pp_get_glob_num_pp_cores(void);
+
+/* Debug */
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size);
+
+/**
+ * Put instrumented HW counters from the core(s) to the job object (if enabled)
+ *
+ * parent and child is always the same, except for virtual jobs on Mali-450.
+ * In this case, the counters will be enabled on the virtual core (parent),
+ * but values need to be read from the child cores.
+ *
+ * @param parent The core used to see if the counters was enabled
+ * @param child The core to actually read the values from
+ * @job Job object to update with counter values (if enabled)
+ * @subjob Which subjob the counters are applicable for (core ID for virtual jobs)
+ */
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob);
+
+MALI_STATIC_INLINE const char *mali_pp_core_description(struct mali_pp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_pp_get_interrupt_result(struct mali_pp_core *core)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT) &
+			   MALI200_REG_VAL_IRQ_MASK_USED;
+	if (0 == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if (MALI200_REG_VAL_IRQ_END_OF_FRAME == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	}
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_get_rawstat(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+
+MALI_STATIC_INLINE u32 mali_pp_is_active(struct mali_pp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+	return (status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_mask_all_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_pp_enable_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+}
+
+MALI_STATIC_INLINE void mali_pp_write_addr_renderer_list(struct mali_pp_core *core,
+		struct mali_pp_job *job, u32 subjob)
+{
+	u32 addr = mali_pp_job_get_addr_frame(job, subjob);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_FRAME, addr);
+}
+
+
+MALI_STATIC_INLINE void mali_pp_write_addr_stack(struct mali_pp_core *core, struct mali_pp_job *job)
+{
+	u32 addr = mali_pp_job_get_addr_stack(job, core->core_id);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_STACK, addr);
+}
+
+#endif /* __MALI_PP_H__ */
diff --git a/utgard/r5p1/common/mali_pp_job.c b/utgard/r5p1/common/mali_pp_job.c
new file mode 100755
index 0000000..86ca935
--- /dev/null
+++ b/utgard/r5p1/common/mali_pp_job.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+
+static u32 pp_counter_src0 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 pp_counter_src1 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static _mali_osk_atomic_t pp_counter_per_sub_job_count; /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+static u32 pp_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+static u32 pp_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+
+void mali_pp_job_initialize(void)
+{
+	_mali_osk_atomic_init(&pp_counter_per_sub_job_count, 0);
+}
+
+void mali_pp_job_terminate(void)
+{
+	_mali_osk_atomic_term(&pp_counter_per_sub_job_count);
+}
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session,
+				       _mali_uk_pp_start_job_s __user *uargs, u32 id)
+{
+	struct mali_pp_job *job;
+	u32 perf_counter_flag;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_pp_job));
+	if (NULL != job) {
+		u32 num_memory_cookies = 0;
+		if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_pp_start_job_s))) {
+			goto fail;
+		}
+
+		if (job->uargs.num_cores > _MALI_PP_MAX_SUB_JOBS) {
+			MALI_PRINT_ERROR(("Mali PP job: Too many sub jobs specified in job object\n"));
+			goto fail;
+		}
+
+		if (!mali_pp_job_use_no_notification(job)) {
+			job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_FINISHED, sizeof(_mali_uk_pp_job_finished_s));
+			if (NULL == job->finished_notification) goto fail;
+		}
+
+		perf_counter_flag = mali_pp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			u32 sub_job_count = _mali_osk_atomic_read(&pp_counter_per_sub_job_count);
+
+			/* These counters apply for all virtual jobs, and where no per sub job counter is specified */
+			job->uargs.perf_counter_src0 = pp_counter_src0;
+			job->uargs.perf_counter_src1 = pp_counter_src1;
+
+			/* We only copy the per sub job array if it is enabled with at least one counter */
+			if (0 < sub_job_count) {
+				job->perf_counter_per_sub_job_count = sub_job_count;
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src0, pp_counter_per_sub_job_src0, sizeof(pp_counter_per_sub_job_src0));
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src1, pp_counter_per_sub_job_src1, sizeof(pp_counter_per_sub_job_src1));
+			}
+		}
+
+		_mali_osk_list_init(&job->list);
+		job->session = session;
+		job->id = id;
+
+		job->sub_jobs_num = job->uargs.num_cores ? job->uargs.num_cores : 1;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+		_mali_osk_atomic_init(&job->sub_jobs_completed, 0);
+		_mali_osk_atomic_init(&job->sub_job_errors, 0);
+		num_memory_cookies = job->uargs.num_memory_cookies;
+		if (num_memory_cookies !=  0) {
+			u32 size;
+			u32 __user *memory_cookies = (u32 __user *)(uintptr_t)job->uargs.memory_cookies;
+
+			if (num_memory_cookies > session->allocation_mgr.mali_allocation_nr) {
+				MALI_PRINT_ERROR(("Mali PP job: Too many memory cookies specified in job object\n"));
+				goto fail;
+			}
+
+			size = sizeof(*memory_cookies) * num_memory_cookies;
+
+			job->memory_cookies = _mali_osk_malloc(size);
+			if (NULL == job->memory_cookies) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to allocate %d bytes of memory cookies!\n", size));
+				goto fail;
+			}
+
+			if (0 != _mali_osk_copy_from_user(job->memory_cookies, memory_cookies, size)) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to copy %d bytes of memory cookies from user!\n", size));
+				goto fail;
+			}
+		}
+
+		if (_MALI_OSK_ERR_OK != mali_pp_job_check(job)) {
+			/* Not a valid job. */
+			goto fail;
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_PP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		return job;
+	}
+
+fail:
+	if (NULL != job) {
+		mali_pp_job_delete(job);
+	}
+
+	return NULL;
+}
+
+void mali_pp_job_delete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->session_fb_lookup_list));
+
+	if (NULL != job->finished_notification) {
+		_mali_osk_notification_delete(job->finished_notification);
+	}
+
+	if (NULL != job->memory_cookies) {
+		_mali_osk_free(job->memory_cookies);
+	}
+
+	_mali_osk_atomic_term(&job->sub_jobs_completed);
+	_mali_osk_atomic_term(&job->sub_job_errors);
+
+	_mali_osk_free(job);
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_pp_job *iter;
+	struct mali_pp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_pp_job, list) {
+		/* job should be started after iter if iter is in progress. */
+		if (0 < iter->sub_jobs_started) {
+			break;
+		}
+
+		/*
+		 * job should be started after iter if it has a higher
+		 * job id. A span is used to handle job id wrapping.
+		 */
+		if ((mali_pp_job_get_id(job) -
+		     mali_pp_job_get_id(iter)) <
+		    MALI_SCHEDULER_JOB_ID_SPAN) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		return job->uargs.perf_counter_src0;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src0[sub_job]) {
+		return job->perf_counter_per_sub_job_src0[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src0;
+}
+
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		/* Virtual jobs always use the global job counter */
+		return job->uargs.perf_counter_src1;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src1[sub_job]) {
+		return job->perf_counter_per_sub_job_src1[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src1;
+}
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter)
+{
+	pp_counter_src0 = counter;
+}
+
+void mali_pp_job_set_pp_counter_global_src1(u32 counter)
+{
+	pp_counter_src1 = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src0[sub_job]) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == counter) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src0[sub_job] = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src1[sub_job]) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == counter) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src1[sub_job] = counter;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src0(void)
+{
+	return pp_counter_src0;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src1(void)
+{
+	return pp_counter_src1;
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src0[sub_job];
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src1[sub_job];
+}
diff --git a/utgard/r5p1/common/mali_pp_job.h b/utgard/r5p1/common/mali_pp_job.h
new file mode 100755
index 0000000..bdd61c5
--- /dev/null
+++ b/utgard/r5p1/common/mali_pp_job.h
@@ -0,0 +1,564 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_JOB_H__
+#define __MALI_PP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_core.h"
+#include "mali_dlbu.h"
+#include "mali_timeline.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+
+/**
+ * This structure represents a PP job, including all sub jobs.
+ *
+ * The PP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the PP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_pp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_pp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+	u32 perf_counter_per_sub_job_count;                /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+	u32 perf_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src0 */
+	u32 perf_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src1 */
+	u32 sub_jobs_num;                                  /**< Number of subjobs; set to 1 for Mali-450 if DLBU is used, otherwise equals number of PP cores */
+
+	/*
+	 * These members are used by both scheduler and executor.
+	 * They are "protected" by atomic operations.
+	 */
+	_mali_osk_atomic_t sub_jobs_completed;                            /**< Number of completed sub-jobs in this superjob */
+	_mali_osk_atomic_t sub_job_errors;                                /**< Bitfield with errors (errors for each single sub-job is or'ed together) */
+
+	/*
+	 * These members are used by scheduler, but only when no one else
+	 * knows about this job object but the working function.
+	 * No lock is thus needed for these.
+	 */
+	u32 *memory_cookies;                               /**< Memory cookies attached to job */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+	_mali_osk_list_t session_fb_lookup_list;           /**< Used to link jobs together from the same frame builder in the session */
+	u32 sub_jobs_started;                              /**< Total number of sub-jobs started (always started in ascending order) */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 perf_counter_value0[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 0 (to be returned to user space), one for each sub job */
+	u32 perf_counter_value1[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 1 (to be returned to user space), one for each sub job */
+};
+
+void mali_pp_job_initialize(void);
+void mali_pp_job_terminate(void);
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, _mali_uk_pp_start_job_s *uargs, u32 id);
+void mali_pp_job_delete(struct mali_pp_job *job);
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job);
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job);
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter);
+void mali_pp_job_set_pp_counter_global_src1(u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter);
+
+u32 mali_pp_job_get_pp_counter_global_src0(void);
+u32 mali_pp_job_get_pp_counter_global_src1(void);
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job);
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job);
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_cache_order(struct mali_pp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_cache_order(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_pp_job_get_user_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_frame_builder_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_flush_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_pid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_tid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_frame_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_dlbu_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.dlbu_registers;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_virtual(struct mali_pp_job *job)
+{
+#if defined(CONFIG_MALI450)
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (0 == job->uargs.num_cores) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_frame(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (mali_pp_job_is_virtual(job)) {
+		return MALI_DLBU_VIRT_ADDR;
+	} else if (0 == sub_job) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_FRAME / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_frame[sub_job - 1];
+	}
+
+	return 0;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_stack(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (0 == sub_job) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_STACK / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_stack[sub_job - 1];
+	}
+
+	return 0;
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_pp_job_list_addtail(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	_mali_osk_list_addtail(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_move(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb0_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb1_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb2_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb0_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb1_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb2_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb0(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb1(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb2(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_all_writeback_unit_disabled(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT]
+	   ) {
+		/* At least one output unit active */
+		return MALI_FALSE;
+	}
+
+	/* All outputs are disabled - we can abort the job */
+	return MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_add(struct mali_pp_job *job)
+{
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	fb_lookup_id = MALI_PP_JOB_FB_LOOKUP_LIST_MASK & job->uargs.frame_builder_id;
+
+	MALI_DEBUG_ASSERT(MALI_PP_JOB_FB_LOOKUP_LIST_SIZE > fb_lookup_id);
+
+	_mali_osk_list_addtail(&job->session_fb_lookup_list,
+			       &job->session->pp_job_fb_lookup_list[fb_lookup_id]);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->session_fb_lookup_list);
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_pp_job_get_session(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_started_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (0 < job->sub_jobs_started) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_unstarted_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (job->sub_jobs_started < job->sub_jobs_num) ? MALI_TRUE : MALI_FALSE;
+}
+
+/* Function used when we are terminating a session with jobs. Return TRUE if it has a rendering job.
+   Makes sure that no new subjobs are started. */
+MALI_STATIC_INLINE void mali_pp_job_mark_unstarted_failed(struct mali_pp_job *job)
+{
+	u32 jobs_remaining;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	jobs_remaining = job->sub_jobs_num - job->sub_jobs_started;
+	job->sub_jobs_started += jobs_remaining;
+
+	/* Not the most optimal way, but this is only used in error cases */
+	for (i = 0; i < jobs_remaining; i++) {
+		_mali_osk_atomic_inc(&job->sub_jobs_completed);
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_complete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->sub_jobs_num ==
+		_mali_osk_atomic_read(&job->sub_jobs_completed)) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_first_unstarted_sub_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return job->sub_jobs_started;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->sub_jobs_num;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_unstarted_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(job->sub_jobs_num >= job->sub_jobs_started);
+	return (job->sub_jobs_num - job->sub_jobs_started);
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_num_memory_cookies(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.num_memory_cookies;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_memory_cookie(
+	struct mali_pp_job *job, u32 index)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(index < job->uargs.num_memory_cookies);
+	MALI_DEBUG_ASSERT_POINTER(job->memory_cookies);
+	return job->memory_cookies[index];
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_needs_dma_buf_mapping(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (0 < job->uargs.num_memory_cookies) {
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_started(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Assert that we are marking the "first unstarted sub job" as started */
+	MALI_DEBUG_ASSERT(job->sub_jobs_started == sub_job);
+
+	job->sub_jobs_started++;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_completed(struct mali_pp_job *job, mali_bool success)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_atomic_inc(&job->sub_jobs_completed);
+	if (MALI_FALSE == success) {
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_was_success(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (0 == _mali_osk_atomic_read(&job->sub_job_errors)) {
+		return MALI_TRUE;
+	}
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_use_no_notification(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_NO_NOTIFICATION) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_pilot_job(struct mali_pp_job *job)
+{
+	/*
+	 * A pilot job is currently identified as jobs which
+	 * require no callback notification.
+	 */
+	return mali_pp_job_use_no_notification(job);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_pp_job_get_finished_notification(struct mali_pp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_window_surface(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE)
+	       ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_flag(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value0(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0[sub_job];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value1(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1[sub_job];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value0(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0[sub_job] = value;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value1(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1[sub_job] = value;
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_pp_job_check(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (mali_pp_job_is_virtual(job) && job->sub_jobs_num != 1) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Returns MALI_TRUE if this job has more than two sub jobs and all sub jobs are unstarted.
+ *
+ * @param job Job to check.
+ * @return MALI_TRUE if job has more than two sub jobs and all sub jobs are unstarted, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_large_and_unstarted(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_pp_job_is_virtual(job));
+
+	return (0 == job->sub_jobs_started && 2 < job->sub_jobs_num);
+}
+
+/**
+ * Get PP job's Timeline tracker.
+ *
+ * @param job PP job.
+ * @return Pointer to Timeline tracker for the job.
+ */
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_pp_job_get_tracker(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_timeline_point_ptr(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+#endif /* __MALI_PP_JOB_H__ */
diff --git a/utgard/r5p1/common/mali_scheduler.c b/utgard/r5p1/common/mali_scheduler.c
new file mode 100755
index 0000000..b07f78a
--- /dev/null
+++ b/utgard/r5p1/common/mali_scheduler.c
@@ -0,0 +1,1294 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_profiling.h"
+#include "mali_kernel_utilization.h"
+#include "mali_timeline.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+#include "mali_group.h"
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+/*
+ * ---------- static defines/constants ----------
+ */
+
+/*
+ * If dma_buf with map on demand is used, we defer job deletion and job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE 1
+#define MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif
+#endif
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock protecting this module */
+_mali_osk_spinlock_irq_t *mali_scheduler_lock_obj = NULL;
+
+/* Queue of jobs to be executed on the GP group */
+struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+struct mali_scheduler_job_queue job_queue_pp;
+
+_mali_osk_atomic_t mali_job_id_autonumber;
+_mali_osk_atomic_t mali_job_cache_order_autonumber;
+/*
+ * ---------- static variables ----------
+ */
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE)
+static _mali_osk_wq_work_t *scheduler_wq_pp_job_delete = NULL;
+static _mali_osk_spinlock_irq_t *scheduler_pp_job_delete_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_deletion_queue);
+#endif
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static _mali_osk_wq_work_t *scheduler_wq_pp_job_queue = NULL;
+static _mali_osk_spinlock_irq_t *scheduler_pp_job_queue_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_queue_list);
+#endif
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job);
+static mali_timeline_point mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job);
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job);
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job);
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success);
+static void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE)
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job);
+static void mali_scheduler_do_pp_job_delete(void *arg);
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE) */
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job);
+static void mali_scheduler_do_pp_job_queue(void *arg);
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_scheduler_initialize(void)
+{
+	_mali_osk_atomic_init(&mali_job_id_autonumber, 0);
+	_mali_osk_atomic_init(&mali_job_cache_order_autonumber, 0);
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.high_pri);
+	job_queue_gp.depth = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.high_pri);
+	job_queue_pp.depth = 0;
+
+	mali_scheduler_lock_obj = _mali_osk_spinlock_irq_init(
+					  _MALI_OSK_LOCKFLAG_ORDERED,
+					  _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (NULL == mali_scheduler_lock_obj) {
+		mali_scheduler_terminate();
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE)
+	scheduler_wq_pp_job_delete = _mali_osk_wq_create_work(
+					     mali_scheduler_do_pp_job_delete, NULL);
+	if (NULL == scheduler_wq_pp_job_delete) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_delete_lock = _mali_osk_spinlock_irq_init(
+					       _MALI_OSK_LOCKFLAG_ORDERED,
+					       _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (NULL == scheduler_pp_job_delete_lock) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE) */
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	scheduler_wq_pp_job_queue = _mali_osk_wq_create_work(
+					    mali_scheduler_do_pp_job_queue, NULL);
+	if (NULL == scheduler_wq_pp_job_queue) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_queue_lock = _mali_osk_spinlock_irq_init(
+					      _MALI_OSK_LOCKFLAG_ORDERED,
+					      _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (NULL == scheduler_pp_job_queue_lock) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_scheduler_terminate(void)
+{
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (NULL != scheduler_pp_job_queue_lock) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_queue_lock);
+		scheduler_pp_job_queue_lock = NULL;
+	}
+
+	if (NULL != scheduler_wq_pp_job_queue) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_queue);
+		scheduler_wq_pp_job_queue = NULL;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE)
+	if (NULL != scheduler_pp_job_delete_lock) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_delete_lock);
+		scheduler_pp_job_delete_lock = NULL;
+	}
+
+	if (NULL != scheduler_wq_pp_job_delete) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_delete);
+		scheduler_wq_pp_job_delete = NULL;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE) */
+
+	if (NULL != mali_scheduler_lock_obj) {
+		_mali_osk_spinlock_irq_term(mali_scheduler_lock_obj);
+		mali_scheduler_lock_obj = NULL;
+	}
+
+	_mali_osk_atomic_term(&mali_job_cache_order_autonumber);
+	_mali_osk_atomic_term(&mali_job_id_autonumber);
+}
+
+u32 mali_scheduler_job_physical_head_count(void)
+{
+	/*
+	 * Count how many physical sub jobs are present from the head of queue
+	 * until the first virtual job is present.
+	 * Early out when we have reached maximum number of PP cores (8)
+	 */
+	u32 count = 0;
+	struct mali_pp_job *job;
+	struct mali_pp_job *temp;
+
+	/* Check for partially started normal pri jobs */
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					   struct mali_pp_job, list);
+
+		MALI_DEBUG_ASSERT_POINTER(job);
+
+		if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) {
+			/*
+			 * Remember; virtual jobs can't be queued and started
+			 * at the same time, so this must be a physical job
+			 */
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if (MALI_FALSE == mali_pp_job_is_virtual(job)) {
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if (MALI_FALSE == mali_pp_job_is_virtual(job)) {
+			/* any partially started is already counted */
+			if (MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) {
+				count += mali_pp_job_unstarted_sub_job_count(job);
+				if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <=
+				    count) {
+					return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+				}
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+
+	return count;
+}
+
+mali_bool mali_scheduler_job_next_is_virtual(void)
+{
+	struct mali_pp_job *job;
+
+	job = mali_scheduler_job_pp_virtual_peek();
+	if (NULL != job) {
+		MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void)
+{
+	_mali_osk_list_t *queue;
+	struct mali_gp_job *job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT(0 < job_queue_gp.depth);
+
+	if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+		MALI_DEBUG_ASSERT(!_mali_osk_list_empty(queue));
+	}
+
+	job = _MALI_OSK_LIST_ENTRY(queue->next, struct mali_gp_job, list);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_gp_job_list_remove(job);
+	job_queue_gp.depth--;
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	/*
+	 * For PP jobs we favour partially started jobs in normal
+	 * priority queue over unstarted jobs in high priority queue
+	 */
+
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					       struct mali_pp_job, list);
+		MALI_DEBUG_ASSERT(NULL != tmp_job);
+
+		if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job ||
+	    MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) {
+		/*
+		 * There isn't a partially started job in normal queue, so
+		 * look in high priority queue.
+		 */
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+						       struct mali_pp_job, list);
+			MALI_DEBUG_ASSERT(NULL != tmp_job);
+
+			if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+					       struct mali_pp_job, list);
+
+		if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job) {
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+						       struct mali_pp_job, list);
+
+			if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_physical_peek();
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_virtual(job));
+
+	if (NULL != job) {
+		*sub_job = mali_pp_job_get_first_unstarted_sub_job(job);
+
+		mali_pp_job_mark_sub_job_started(job, *sub_job);
+		if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(job)) {
+			/* Remove from queue when last sub job has been retrieved */
+			mali_pp_job_list_remove(job);
+		}
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_virtual_peek();
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_virtual(job));
+
+	if (NULL != job) {
+		MALI_DEBUG_ASSERT(0 ==
+				  mali_pp_job_get_first_unstarted_sub_job(job));
+		MALI_DEBUG_ASSERT(1 ==
+				  mali_pp_job_get_sub_job_count(job));
+
+		mali_pp_job_mark_sub_job_started(job, 0);
+
+		mali_pp_job_list_remove(job);
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_gp_job_get_id(job), job));
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_gp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(job));
+		mali_gp_job_signal_pp_tracker(job, MALI_FALSE);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, MALI_FALSE,
+					       MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+
+	return MALI_SCHEDULER_MASK_GP;
+}
+
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_pp_job_get_id(job), job));
+
+	if (MALI_TRUE == mali_timeline_tracker_activation_error(
+		    mali_pp_job_get_tracker(job))) {
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Job %u (0x%08X) activated with error, aborting.\n",
+				     mali_pp_job_get_id(job), job));
+
+		mali_scheduler_lock();
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (mali_pp_job_needs_dma_buf_mapping(job)) {
+		mali_scheduler_deferred_pp_job_queue(job);
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_pp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+	return MALI_SCHEDULER_MASK_PP;
+}
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	if (user_notification) {
+		mali_scheduler_return_gp_job_to_user(job, success);
+	}
+
+	if (dequeued) {
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_gp_end();
+		}
+	}
+
+	mali_gp_job_delete(job);
+}
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	if (user_notification) {
+		mali_scheduler_return_pp_job_to_user(job,
+						     num_cores_in_virtual);
+	}
+
+	if (dequeued) {
+#if defined(CONFIG_MALI_DVFS)
+		if (mali_pp_job_is_window_surface(job)) {
+			struct mali_session_data *session;
+			session = mali_pp_job_get_session(job);
+			mali_session_inc_num_window_jobs(session);
+		}
+#endif
+
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_pp_end();
+		}
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE)
+	/*
+	 * The deletion of the job object (releasing sync refs etc)
+	 * must be done in a different context
+	 */
+	mali_scheduler_deferred_pp_job_delete(job);
+#else
+	/* no use cases need this in this configuration */
+	mali_pp_job_delete(job);
+#endif
+}
+
+void mali_scheduler_abort_session(struct mali_session_data *session)
+{
+	struct mali_gp_job *gp_job;
+	struct mali_gp_job *gp_tmp;
+	struct mali_pp_job *pp_job;
+	struct mali_pp_job *pp_tmp;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_gp);
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_pp);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali scheduler: Aborting all queued jobs from session 0x%08X.\n",
+			     session));
+
+	mali_scheduler_lock();
+
+	/* Remove from GP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.normal_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+		}
+	}
+
+	/* Remove from GP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.high_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+		}
+	}
+
+	/* Remove from PP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							&removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/* Remove from PP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							&removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/*
+	 * Release scheduler lock so we can release trackers
+	 * (which will potentially queue new jobs)
+	 */
+	mali_scheduler_unlock();
+
+	/* Release and complete all (non-running) found GP jobs  */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &removed_jobs_gp,
+				    struct mali_gp_job, list) {
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(gp_job));
+		mali_gp_job_signal_pp_tracker(gp_job, MALI_FALSE);
+		_mali_osk_list_delinit(&gp_job->list);
+		mali_scheduler_complete_gp_job(gp_job,
+					       MALI_FALSE, MALI_FALSE, MALI_TRUE);
+	}
+
+	/* Release and complete non-running PP jobs */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, &removed_jobs_pp,
+				    struct mali_pp_job, list) {
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(pp_job));
+		_mali_osk_list_delinit(&pp_job->list);
+		mali_scheduler_complete_pp_job(pp_job, 0,
+					       MALI_FALSE, MALI_TRUE);
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx,
+		_mali_uk_gp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_gp_job_create(session, uargs, mali_scheduler_get_new_id(),
+				 NULL);
+	if (NULL == job) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_gp_job_get_timeline_point_ptr(job);
+
+	point = mali_scheduler_submit_gp_job(session, job);
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the job was started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx,
+		_mali_uk_pp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_pp_job_create(session, uargs, mali_scheduler_get_new_id());
+	if (NULL == job) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(job);
+
+	point = mali_scheduler_submit_pp_job(session, job);
+	job = NULL;
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the job was started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx,
+		_mali_uk_pp_and_gp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	_mali_uk_pp_and_gp_start_job_s kargs;
+	struct mali_pp_job *pp_job;
+	struct mali_gp_job *gp_job;
+	u32 __user *point_ptr = NULL;
+	mali_timeline_point point;
+	_mali_uk_pp_start_job_s __user *pp_args;
+	_mali_uk_gp_start_job_s __user *gp_args;
+
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+
+	session = (struct mali_session_data *) ctx;
+
+	if (0 != _mali_osk_copy_from_user(&kargs, uargs,
+					  sizeof(_mali_uk_pp_and_gp_start_job_s))) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	pp_args = (_mali_uk_pp_start_job_s __user *)(uintptr_t)kargs.pp_args;
+	gp_args = (_mali_uk_gp_start_job_s __user *)(uintptr_t)kargs.gp_args;
+
+	pp_job = mali_pp_job_create(session, pp_args,
+				    mali_scheduler_get_new_id());
+	if (NULL == pp_job) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	gp_job = mali_gp_job_create(session, gp_args,
+				    mali_scheduler_get_new_id(),
+				    mali_pp_job_get_tracker(pp_job));
+	if (NULL == gp_job) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		mali_pp_job_delete(pp_job);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(pp_job);
+
+	/* Submit GP job. */
+	mali_scheduler_submit_gp_job(session, gp_job);
+	gp_job = NULL;
+
+	/* Submit PP job. */
+	point = mali_scheduler_submit_pp_job(session, pp_job);
+	pp_job = NULL;
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the jobs were started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	fb_lookup_id = args->fb_id & MALI_PP_JOB_FB_LOOKUP_LIST_MASK;
+
+	mali_scheduler_lock();
+
+	/* Iterate over all jobs for given frame builder_id. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp,
+				    &session->pp_job_fb_lookup_list[fb_lookup_id],
+				    struct mali_pp_job, session_fb_lookup_list) {
+		MALI_DEBUG_CODE(u32 disable_mask = 0);
+
+		if (mali_pp_job_get_frame_builder_id(job) !=
+		    (u32) args->fb_id) {
+			MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Disable WB mismatching FB.\n"));
+			continue;
+		}
+
+		MALI_DEBUG_CODE(disable_mask |= 0xD << (4 * 3));
+
+		if (mali_pp_job_get_wb0_source_addr(job) == args->wb0_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x1 << (4 * 1));
+			mali_pp_job_disable_wb0(job);
+		}
+
+		if (mali_pp_job_get_wb1_source_addr(job) == args->wb1_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x2 << (4 * 2));
+			mali_pp_job_disable_wb1(job);
+		}
+
+		if (mali_pp_job_get_wb2_source_addr(job) == args->wb2_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x3 << (4 * 3));
+			mali_pp_job_disable_wb2(job);
+		}
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Disable WB: 0x%X.\n",
+				     disable_mask));
+	}
+
+	mali_scheduler_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "GP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_gp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.normal_pri) ?
+				"empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.high_pri) ?
+				"empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"PP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_pp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.normal_pri)
+				? "empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.high_pri)
+				? "empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job)
+{
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Add job to Timeline system. */
+	point = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_gp_job_get_tracker(job), MALI_TIMELINE_GP);
+
+	return point;
+}
+
+static mali_timeline_point mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job)
+{
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_scheduler_lock();
+	/*
+	 * Adding job to the lookup list used to quickly discard
+	 * writeback units of queued jobs.
+	 */
+	mali_pp_job_fb_lookup_add(job);
+	mali_scheduler_unlock();
+
+	/* Add job to Timeline system. */
+	point = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+
+	return point;
+}
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_gp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	}
+
+	mali_gp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	/* Determine which queue the job should be added to. */
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+	}
+
+	job_queue_gp.depth += 1;
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_gp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the GP as busy from the
+		 * time a GP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_gp_start();
+	}
+
+	/* Add profiling events for job enqueued */
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE,
+		mali_gp_job_get_pid(job),
+		mali_gp_job_get_tid(job),
+		mali_gp_job_get_frame_builder_id(job),
+		mali_gp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_gp_job_get_tid(job),
+			      mali_gp_job_get_id(job), "GP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali GP scheduler: Job %u (0x%08X) queued\n",
+			     mali_gp_job_get_id(job), job));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue = NULL;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	}
+
+	mali_pp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_pp.high_pri;
+	} else {
+		queue = &job_queue_pp.normal_pri;
+	}
+
+	job_queue_pp.depth +=
+		mali_pp_job_get_sub_job_count(job);
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_pp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the PP as busy from the
+		 * time a PP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_pp_start();
+	}
+
+	/* Add profiling events for job enqueued */
+
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE,
+		mali_pp_job_get_pid(job),
+		mali_pp_job_get_tid(job),
+		mali_pp_job_get_frame_builder_id(job),
+		mali_pp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_pp_job_get_tid(job),
+			      mali_pp_job_get_id(job), "PP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali PP scheduler: %s job %u (0x%08X) with %u parts queued.\n",
+			     mali_pp_job_is_virtual(job)
+			     ? "Virtual" : "Physical",
+			     mali_pp_job_get_id(job), job,
+			     mali_pp_job_get_sub_job_count(job)));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success)
+{
+	_mali_uk_gp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_gp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	if (MALI_TRUE == success) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+	jobres->heap_current_addr = mali_gp_job_get_current_heap_addr(job);
+	jobres->perf_counter0 = mali_gp_job_get_perf_counter_value0(job);
+	jobres->perf_counter1 = mali_gp_job_get_perf_counter_value1(job);
+
+	mali_session_send_notification(session, notification);
+}
+
+static void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual)
+{
+	u32 i;
+	u32 num_counters_to_copy;
+	_mali_uk_pp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	if (MALI_TRUE == mali_pp_job_use_no_notification(job)) {
+		return;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_pp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->user_job_ptr = mali_pp_job_get_user_id(job);
+	if (MALI_TRUE == mali_pp_job_was_success(job)) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+
+	if (mali_pp_job_is_virtual(job)) {
+		num_counters_to_copy = num_cores_in_virtual;
+	} else {
+		num_counters_to_copy = mali_pp_job_get_sub_job_count(job);
+	}
+
+	for (i = 0; i < num_counters_to_copy; i++) {
+		jobres->perf_counter0[i] =
+			mali_pp_job_get_perf_counter_value0(job, i);
+		jobres->perf_counter1[i] =
+			mali_pp_job_get_perf_counter_value1(job, i);
+		jobres->perf_counter_src0 =
+			mali_pp_job_get_pp_counter_global_src0();
+		jobres->perf_counter_src1 =
+			mali_pp_job_get_pp_counter_global_src1();
+	}
+
+	mali_session_send_notification(session, notification);
+}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE)
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_deletion_queue);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_delete);
+}
+
+static void mali_scheduler_do_pp_job_delete(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be deleted, so we can release
+	 * the lock before we start deleting the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_deletion_queue, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+
+		_mali_osk_list_delinit(&job->list);
+		mali_pp_job_delete(job); /* delete the job object itself */
+	}
+}
+
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_DELETE) */
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_queue_list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_queue);
+}
+
+static void mali_scheduler_do_pp_job_queue(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be queued, so we can release
+	 * the lock before we start queueing the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_queue_list, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	/* First loop through all jobs and do the pre-work (no locks needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_needs_dma_buf_mapping(job)) {
+			/*
+			 * This operation could fail, but we continue anyway,
+			 * because the worst that could happen is that this
+			 * job will fail due to a Mali page fault.
+			 */
+			mali_dma_buf_map_job(job);
+		}
+	}
+
+	mali_scheduler_lock();
+
+	/* Then loop through all jobs again to queue them (lock needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+
+		/* Remove from scheduler_pp_job_queue_list before queueing */
+		mali_pp_job_list_remove(job);
+
+		if (mali_scheduler_queue_pp_job(job)) {
+			/* Job queued successfully */
+			schedule_mask |= MALI_SCHEDULER_MASK_PP;
+		} else {
+			/* Failed to enqueue job, release job (with error) */
+			mali_pp_job_fb_lookup_remove(job);
+			mali_pp_job_mark_unstarted_failed(job);
+
+			/* unlock scheduler in this uncommon case */
+			mali_scheduler_unlock();
+
+			schedule_mask |= mali_timeline_tracker_release(
+						 mali_pp_job_get_tracker(job));
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job, 0, MALI_TRUE,
+						       MALI_FALSE);
+
+			mali_scheduler_lock();
+		}
+	}
+
+	mali_scheduler_unlock();
+
+	/* Trigger scheduling of jobs */
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
diff --git a/utgard/r5p1/common/mali_scheduler.h b/utgard/r5p1/common/mali_scheduler.h
new file mode 100755
index 0000000..47632b5
--- /dev/null
+++ b/utgard/r5p1/common/mali_scheduler.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_H__
+#define __MALI_SCHEDULER_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_scheduler_types.h"
+#include "mali_session.h"
+
+struct mali_scheduler_job_queue {
+	_MALI_OSK_LIST_HEAD(normal_pri); /* Queued jobs with normal priority */
+	_MALI_OSK_LIST_HEAD(high_pri);   /* Queued jobs with high priority */
+	u32 depth;                       /* Depth of combined queues. */
+};
+
+extern _mali_osk_spinlock_irq_t *mali_scheduler_lock_obj;
+
+/* Queue of jobs to be executed on the GP group */
+extern struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+extern struct mali_scheduler_job_queue job_queue_pp;
+
+extern _mali_osk_atomic_t mali_job_id_autonumber;
+extern _mali_osk_atomic_t mali_job_cache_order_autonumber;
+
+#define MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+_mali_osk_errcode_t mali_scheduler_initialize(void);
+void mali_scheduler_terminate(void);
+
+MALI_STATIC_INLINE void mali_scheduler_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_scheduler_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: scheduler lock taken.\n"));
+}
+
+MALI_STATIC_INLINE void mali_scheduler_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: Releasing scheduler lock.\n"));
+	_mali_osk_spinlock_irq_unlock(mali_scheduler_lock_obj);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_count(void)
+{
+	return job_queue_gp.depth;
+}
+
+u32 mali_scheduler_job_physical_head_count(void);
+
+mali_bool mali_scheduler_job_next_is_virtual(void);
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void);
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_id(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_id_autonumber);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_cache_order(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_cache_order_autonumber);
+}
+
+/**
+ * @brief Used by the Timeline system to queue a GP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The GP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job);
+
+/**
+ * @brief Used by the Timeline system to queue a PP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The PP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job);
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_abort_session(struct mali_session_data *session);
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size);
+#endif
+
+#endif /* __MALI_SCHEDULER_H__ */
diff --git a/utgard/r5p1/common/mali_scheduler_types.h b/utgard/r5p1/common/mali_scheduler_types.h
new file mode 100755
index 0000000..f862961
--- /dev/null
+++ b/utgard/r5p1/common/mali_scheduler_types.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_TYPES_H__
+#define __MALI_SCHEDULER_TYPES_H__
+
+#include "mali_osk.h"
+
+#define MALI_SCHEDULER_JOB_ID_SPAN 65535
+
+/**
+ * Bitmask used for defered scheduling of subsystems.
+ */
+typedef u32 mali_scheduler_mask;
+
+#define MALI_SCHEDULER_MASK_GP (1<<0)
+#define MALI_SCHEDULER_MASK_PP (1<<1)
+
+#define MALI_SCHEDULER_MASK_EMPTY 0
+#define MALI_SCHEDULER_MASK_ALL (MALI_SCHEDULER_MASK_GP | MALI_SCHEDULER_MASK_PP)
+
+#endif /* __MALI_SCHEDULER_TYPES_H__ */
diff --git a/utgard/r5p1/common/mali_session.c b/utgard/r5p1/common/mali_session.c
new file mode 100755
index 0000000..5c06c3e
--- /dev/null
+++ b/utgard/r5p1/common/mali_session.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/sched.h>
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_session.h"
+#include "mali_ukk.h"
+
+_MALI_OSK_LIST_HEAD(mali_sessions);
+static u32 mali_session_count = 0;
+
+_mali_osk_spinlock_irq_t *mali_sessions_lock = NULL;
+
+_mali_osk_errcode_t mali_session_initialize(void)
+{
+	_MALI_OSK_INIT_LIST_HEAD(&mali_sessions);
+
+	mali_sessions_lock = _mali_osk_spinlock_irq_init(
+				     _MALI_OSK_LOCKFLAG_ORDERED,
+				     _MALI_OSK_LOCK_ORDER_SESSIONS);
+	if (NULL == mali_sessions_lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_session_terminate(void)
+{
+	if (NULL != mali_sessions_lock) {
+		_mali_osk_spinlock_irq_term(mali_sessions_lock);
+		mali_sessions_lock = NULL;
+	}
+}
+
+void mali_session_add(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_add(&session->link, &mali_sessions);
+	mali_session_count++;
+	mali_session_unlock();
+}
+
+void mali_session_remove(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_delinit(&session->link);
+	mali_session_count--;
+	mali_session_unlock();
+}
+
+u32 mali_session_get_count(void)
+{
+	return mali_session_count;
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in window render frame per sec calculate
+ */
+#if defined(CONFIG_MALI_DVFS)
+u32 mali_session_max_window_num(void)
+{
+	struct mali_session_data *session, *tmp;
+	u32 max_window_num = 0;
+	u32 tmp_number = 0;
+
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		tmp_number = _mali_osk_atomic_xchg(
+				     &session->number_of_window_jobs, 0);
+		if (max_window_num < tmp_number) {
+			max_window_num = tmp_number;
+		}
+	}
+
+	mali_session_unlock();
+
+	return max_window_num;
+}
+#endif
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx)
+{
+	struct mali_session_data *session, *tmp;
+	char task_comm[TASK_COMM_LEN];
+	char task_default[] = "not found";
+	struct task_struct *ttask;
+	u32 mali_mem_usage;
+	u32 total_mali_mem_size;
+
+	MALI_DEBUG_ASSERT_POINTER(print_ctx);
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		ttask = pid_task(find_vpid(session->pid), PIDTYPE_PID);
+		//get_task_comm(task_comm, ttask);
+		if (ttask)
+			strncpy(task_comm, ttask->comm, sizeof(ttask->comm));
+		else
+			strncpy(task_comm, task_default, sizeof(task_default));
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u %-25s %-10u  %-15u  %-15u  %-10u  %-10u\n",
+				    session->comm, session->pid,  task_comm,
+				    session->mali_mem_array[MALI_MEM_OS] + session->mali_mem_array[MALI_MEM_BLOCK], session->max_mali_mem_allocated,
+				    session->mali_mem_array[MALI_MEM_EXTERNAL], session->mali_mem_array[MALI_MEM_UMP],
+				    session->mali_mem_array[MALI_MEM_DMA_BUF]);
+	}
+	mali_session_unlock();
+	mali_mem_usage  = _mali_ukk_report_memory_usage();
+	total_mali_mem_size = _mali_ukk_report_total_memory_size();
+	_mali_osk_ctxprintf(print_ctx, "Mali mem usage: %u\nMali mem limit: %u\n", mali_mem_usage, total_mali_mem_size);
+}
diff --git a/utgard/r5p1/common/mali_session.h b/utgard/r5p1/common/mali_session.h
new file mode 100755
index 0000000..9fad5cf
--- /dev/null
+++ b/utgard/r5p1/common/mali_session.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SESSION_H__
+#define __MALI_SESSION_H__
+
+#include "mali_mmu_page_directory.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_memory_types.h"
+#include "mali_memory_manager.h"
+
+struct mali_timeline_system;
+struct mali_soft_system;
+
+/* Number of frame builder job lists per session. */
+#define MALI_PP_JOB_FB_LOOKUP_LIST_SIZE 16
+#define MALI_PP_JOB_FB_LOOKUP_LIST_MASK (MALI_PP_JOB_FB_LOOKUP_LIST_SIZE - 1)
+
+struct mali_session_data {
+	_mali_osk_notification_queue_t *ioctl_queue;
+
+	_mali_osk_mutex_t *memory_lock; /**< Lock protecting the vm manipulation */
+#if 0
+	_mali_osk_list_t memory_head; /**< Track all the memory allocated in this session, for freeing on abnormal termination */
+#endif
+	struct mali_page_directory *page_directory; /**< MMU page directory for this session */
+
+	_MALI_OSK_LIST_HEAD(link); /**< Link for list of all sessions */
+	_MALI_OSK_LIST_HEAD(pp_job_list); /**< List of all PP jobs on this session */
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_t number_of_window_jobs; /**< Record the window jobs completed on this session in a period */
+#endif
+
+	_mali_osk_list_t pp_job_fb_lookup_list[MALI_PP_JOB_FB_LOOKUP_LIST_SIZE]; /**< List of PP job lists per frame builder id.  Used to link jobs from same frame builder. */
+
+	struct mali_soft_job_system *soft_job_system; /**< Soft job system for this session. */
+	struct mali_timeline_system *timeline_system; /**< Timeline system for this session. */
+
+	mali_bool is_aborting; /**< MALI_TRUE if the session is aborting, MALI_FALSE if not. */
+	mali_bool use_high_priority_job_queue; /**< If MALI_TRUE, jobs added from this session will use the high priority job queues. */
+	u32 pid;
+	char *comm;
+	size_t mali_mem_array[MALI_MEM_TYPE_MAX]; /**< The array to record all mali mem types' usage for this session. */
+	size_t max_mali_mem_allocated; /**< The past max mali memory usage for this session. */
+	/* Added for new memroy system */
+	struct mali_allocation_manager allocation_mgr;
+};
+
+_mali_osk_errcode_t mali_session_initialize(void);
+void mali_session_terminate(void);
+
+/* List of all sessions. Actual list head in mali_kernel_core.c */
+extern _mali_osk_list_t mali_sessions;
+/* Lock to protect modification and access to the mali_sessions list */
+extern _mali_osk_spinlock_irq_t *mali_sessions_lock;
+
+MALI_STATIC_INLINE void mali_session_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_sessions_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(mali_sessions_lock);
+}
+
+void mali_session_add(struct mali_session_data *session);
+void mali_session_remove(struct mali_session_data *session);
+u32 mali_session_get_count(void);
+
+#define MALI_SESSION_FOREACH(session, tmp, link) \
+	_MALI_OSK_LIST_FOREACHENTRY(session, tmp, &mali_sessions, struct mali_session_data, link)
+
+MALI_STATIC_INLINE struct mali_page_directory *mali_session_get_page_directory(struct mali_session_data *session)
+{
+	return session->page_directory;
+}
+
+MALI_STATIC_INLINE void mali_session_memory_lock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_wait(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_memory_unlock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_signal(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_send_notification(struct mali_session_data *session, _mali_osk_notification_t *object)
+{
+	_mali_osk_notification_queue_send(session->ioctl_queue, object);
+}
+
+#if defined(CONFIG_MALI_DVFS)
+
+MALI_STATIC_INLINE void mali_session_inc_num_window_jobs(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_atomic_inc(&session->number_of_window_jobs);
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in  window render frame per sec calculate
+ */
+u32 mali_session_max_window_num(void);
+
+#endif
+
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx);
+
+#endif /* __MALI_SESSION_H__ */
diff --git a/utgard/r5p1/common/mali_soft_job.c b/utgard/r5p1/common/mali_soft_job.c
new file mode 100755
index 0000000..36ac982
--- /dev/null
+++ b/utgard/r5p1/common/mali_soft_job.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_soft_job.h"
+#include "mali_osk.h"
+#include "mali_timeline.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+
+MALI_STATIC_INLINE void mali_soft_job_system_lock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	_mali_osk_spinlock_irq_lock(system->lock);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: soft system %p lock taken\n", system));
+	MALI_DEBUG_ASSERT(0 == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = _mali_osk_get_tid());
+}
+
+MALI_STATIC_INLINE void mali_soft_job_system_unlock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: releasing soft system %p lock\n", system));
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = 0);
+	_mali_osk_spinlock_irq_unlock(system->lock);
+}
+
+#if defined(DEBUG)
+MALI_STATIC_INLINE void mali_soft_job_system_assert_locked(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+}
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system) mali_soft_job_system_assert_locked(system)
+#else
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system)
+#endif /* defined(DEBUG) */
+
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session)
+{
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	system = (struct mali_soft_job_system *) _mali_osk_calloc(1, sizeof(struct mali_soft_job_system));
+	if (NULL == system) {
+		return NULL;
+	}
+
+	system->session = session;
+
+	system->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (NULL == system->lock) {
+		mali_soft_job_system_destroy(system);
+		return NULL;
+	}
+	system->lock_owner = 0;
+	system->last_job_id = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&(system->jobs_used));
+
+	return system;
+}
+
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	/* All jobs should be free at this point. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&(system->jobs_used)));
+
+	if (NULL != system) {
+		if (NULL != system->lock) {
+			_mali_osk_spinlock_irq_term(system->lock);
+		}
+		_mali_osk_free(system);
+	}
+}
+
+static void mali_soft_job_system_free_job(struct mali_soft_job_system *system, struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id);
+	MALI_DEBUG_ASSERT(system == job->system);
+
+	_mali_osk_list_del(&(job->system_list));
+
+	mali_soft_job_system_unlock(job->system);
+
+	_mali_osk_free(job);
+}
+
+MALI_STATIC_INLINE struct mali_soft_job *mali_soft_job_system_lookup_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job, *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		if (job->id == job_id)
+			return job;
+	}
+
+	return NULL;
+}
+
+void mali_soft_job_destroy(struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: destroying soft job %u (0x%08X)\n", job->id, job));
+
+	if (NULL != job) {
+		if (0 < _mali_osk_atomic_dec_return(&job->refcount)) return;
+
+		_mali_osk_atomic_term(&job->refcount);
+
+		if (NULL != job->activated_notification) {
+			_mali_osk_notification_delete(job->activated_notification);
+			job->activated_notification = NULL;
+		}
+
+		mali_soft_job_system_free_job(job->system, job);
+	}
+}
+
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job)
+{
+	struct mali_soft_job *job;
+	_mali_osk_notification_t *notification = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT((MALI_SOFT_JOB_TYPE_USER_SIGNALED == type) ||
+			  (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == type));
+
+	notification = _mali_osk_notification_create(_MALI_NOTIFICATION_SOFT_ACTIVATED, sizeof(_mali_uk_soft_job_activated_s));
+	if (unlikely(NULL == notification)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to allocate notification"));
+		return NULL;
+	}
+
+	job = _mali_osk_malloc(sizeof(struct mali_soft_job));
+	if (unlikely(NULL == job)) {
+		MALI_DEBUG_PRINT(2, ("Mali Soft Job: system alloc job failed. \n"));
+		return NULL;
+	}
+
+	mali_soft_job_system_lock(system);
+
+	job->system = system;
+	job->id = system->last_job_id++;
+	job->state = MALI_SOFT_JOB_STATE_ALLOCATED;
+
+	_mali_osk_list_add(&(job->system_list), &(system->jobs_used));
+
+	job->type = type;
+	job->user_job = user_job;
+	job->activated = MALI_FALSE;
+
+	job->activated_notification = notification;
+
+	_mali_osk_atomic_init(&job->refcount, 1);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state);
+	MALI_DEBUG_ASSERT(system == job->system);
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id);
+
+	mali_soft_job_system_unlock(system);
+
+	return job;
+}
+
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence)
+{
+	mali_timeline_point point;
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	system = job->system;
+
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(system->session->timeline_system);
+
+	mali_soft_job_system_lock(system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state);
+	job->state = MALI_SOFT_JOB_STATE_STARTED;
+
+	mali_soft_job_system_unlock(system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: starting soft job %u (0x%08X)\n", job->id, job));
+
+	mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_SOFT, fence, job);
+	point = mali_timeline_system_add_tracker(system->session->timeline_system, &job->tracker, MALI_TIMELINE_SOFT);
+
+	return point;
+}
+
+static mali_bool mali_soft_job_is_activated(void *data)
+{
+	struct mali_soft_job *job;
+
+	job = (struct mali_soft_job *) data;
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	return job->activated;
+}
+
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job;
+	struct mali_timeline_system *timeline_system;
+	mali_scheduler_mask schedule_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(system);
+
+	job = mali_soft_job_system_lookup_job(system, job_id);
+
+	if ((NULL == job) || (MALI_SOFT_JOB_TYPE_USER_SIGNALED != job->type)
+	    || !(MALI_SOFT_JOB_STATE_STARTED == job->state || MALI_SOFT_JOB_STATE_TIMED_OUT == job->state)) {
+		mali_soft_job_system_unlock(system);
+		MALI_PRINT_ERROR(("Mali Soft Job: invalid soft job id %u", job_id));
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) {
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(system);
+
+		MALI_DEBUG_ASSERT(MALI_TRUE == job->activated);
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: soft job %u (0x%08X) was timed out\n", job->id, job));
+		mali_soft_job_destroy(job);
+
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+	job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+	mali_soft_job_system_unlock(system);
+
+	/* Since the job now is in signaled state, timeouts from the timeline system will be
+	 * ignored, and it is not possible to signal this job again. */
+
+	timeline_system = system->session->timeline_system;
+	MALI_DEBUG_ASSERT_POINTER(timeline_system);
+
+	/* Wait until activated. */
+	_mali_osk_wait_queue_wait_event(timeline_system->wait_queue, mali_soft_job_is_activated, (void *) job);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: signaling soft job %u (0x%08X)\n", job->id, job));
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+
+	mali_soft_job_destroy(job);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_soft_job_send_activated_notification(struct mali_soft_job *job)
+{
+	if (NULL != job->activated_notification) {
+		_mali_uk_soft_job_activated_s *res = job->activated_notification->result_buffer;
+		res->user_job = job->user_job;
+		mali_session_send_notification(job->system->session, job->activated_notification);
+	}
+	job->activated_notification = NULL;
+}
+
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline activation for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		MALI_DEBUG_PRINT(3, ("Mali Soft Job: Soft job %u (0x%08X) activated while session is aborting.\n", job->id, job));
+
+		mali_soft_job_system_unlock(job->system);
+
+		/* Since we are in shutdown, we can ignore the scheduling bitmask. */
+		mali_timeline_tracker_release(&job->tracker);
+		mali_soft_job_destroy(job);
+		return schedule_mask;
+	}
+
+	/* Send activated notification. */
+	mali_soft_job_send_activated_notification(job);
+
+	/* Wake up sleeping signaler. */
+	job->activated = MALI_TRUE;
+
+	/* If job type is self signaled, release tracker, move soft job to free list, and scheduler at once */
+	if (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == job->type) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(job->system);
+
+		schedule_mask |= mali_timeline_tracker_release(&job->tracker);
+
+		mali_soft_job_destroy(job);
+	} else {
+		_mali_osk_wait_queue_wake_up(job->tracker.system->wait_queue);
+
+		mali_soft_job_system_unlock(job->system);
+	}
+
+	return schedule_mask;
+}
+
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+	MALI_DEBUG_ASSERT(MALI_TRUE == job->activated);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline timeout for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED  == job->state ||
+			  MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		/* The session is aborting.  This job will be released and destroyed by @ref
+		 * mali_soft_job_system_abort(). */
+		mali_soft_job_system_unlock(job->system);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	if (MALI_SOFT_JOB_STATE_STARTED != job->state) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+		/* The job is about to be signaled, ignore timeout. */
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeout on soft job %u (0x%08X) in signaled state.\n", job->id, job));
+		mali_soft_job_system_unlock(job->system);
+		return schedule_mask;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+	job->state = MALI_SOFT_JOB_STATE_TIMED_OUT;
+	_mali_osk_atomic_inc(&job->refcount);
+
+	mali_soft_job_system_unlock(job->system);
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+
+	mali_soft_job_destroy(job);
+
+	return schedule_mask;
+}
+
+void mali_soft_job_system_abort(struct mali_soft_job_system *system)
+{
+	struct mali_soft_job *job, *tmp;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(jobs);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting soft job system for session 0x%08X.\n", system->session));
+
+	mali_soft_job_system_lock(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED   == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (MALI_SOFT_JOB_STATE_STARTED == job->state) {
+			/* If the job has been activated, we have to release the tracker and destroy
+			 * the job.  If not, the tracker will be released and the job destroyed when
+			 * it is activated. */
+			if (MALI_TRUE == job->activated) {
+				MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting unsignaled soft job %u (0x%08X).\n", job->id, job));
+
+				job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+				_mali_osk_list_move(&job->system_list, &jobs);
+			}
+		} else if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) {
+			MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting timed out soft job %u (0x%08X).\n", job->id, job));
+
+			/* We need to destroy this soft job. */
+			_mali_osk_list_move(&job->system_list, &jobs);
+		}
+	}
+
+	mali_soft_job_system_unlock(system);
+
+	/* Release and destroy jobs. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &jobs, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED  == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (MALI_SOFT_JOB_STATE_SIGNALED == job->state) {
+			mali_timeline_tracker_release(&job->tracker);
+		}
+
+		/* Move job back to used list before destroying. */
+		_mali_osk_list_move(&job->system_list, &system->jobs_used);
+
+		mali_soft_job_destroy(job);
+	}
+}
diff --git a/utgard/r5p1/common/mali_soft_job.h b/utgard/r5p1/common/mali_soft_job.h
new file mode 100755
index 0000000..f35394e
--- /dev/null
+++ b/utgard/r5p1/common/mali_soft_job.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SOFT_JOB_H__
+#define __MALI_SOFT_JOB_H__
+
+#include "mali_osk.h"
+
+#include "mali_timeline.h"
+
+struct mali_timeline_fence;
+struct mali_session_data;
+struct mali_soft_job;
+struct mali_soft_job_system;
+
+/**
+ * Soft job types.
+ *
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED will only complete after activation if either
+ * they are signaled by user-space (@ref mali_soft_job_system_signaled_job) or if they are timed out
+ * by the Timeline system.
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_SELF_SIGNALED will release job resource automatically
+ * in kernel when the job is activated.
+ */
+typedef enum mali_soft_job_type {
+	MALI_SOFT_JOB_TYPE_SELF_SIGNALED,
+	MALI_SOFT_JOB_TYPE_USER_SIGNALED,
+} mali_soft_job_type;
+
+/**
+ * Soft job state.
+ *
+ * mali_soft_job_system_start_job a job will first be allocated.The job's state set to MALI_SOFT_JOB_STATE_ALLOCATED.
+ * Once the job is added to the timeline system, the state changes to MALI_SOFT_JOB_STATE_STARTED.
+ *
+ * For soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED the state is changed to
+ * MALI_SOFT_JOB_STATE_SIGNALED when @ref mali_soft_job_system_signal_job is called and the soft
+ * job's state is MALI_SOFT_JOB_STATE_STARTED or MALI_SOFT_JOB_STATE_TIMED_OUT.
+ *
+ * If a soft job of type MALI_SOFT_JOB_TYPE_USER_SIGNALED is timed out before being signaled, the
+ * state is changed to MALI_SOFT_JOB_STATE_TIMED_OUT.  This can only happen to soft jobs in state
+ * MALI_SOFT_JOB_STATE_STARTED.
+ *
+ */
+typedef enum mali_soft_job_state {
+	MALI_SOFT_JOB_STATE_ALLOCATED,
+	MALI_SOFT_JOB_STATE_STARTED,
+	MALI_SOFT_JOB_STATE_SIGNALED,
+	MALI_SOFT_JOB_STATE_TIMED_OUT,
+} mali_soft_job_state;
+
+#define MALI_SOFT_JOB_INVALID_ID ((u32) -1)
+
+/**
+ * Soft job struct.
+ *
+ * Soft job can be used to represent any kind of CPU work done in kernel-space.
+ */
+typedef struct mali_soft_job {
+	mali_soft_job_type            type;                   /**< Soft job type.  Must be one of MALI_SOFT_JOB_TYPE_*. */
+	u64                           user_job;               /**< Identifier for soft job in user space. */
+	_mali_osk_atomic_t            refcount;               /**< Soft jobs are reference counted to prevent premature deletion. */
+	struct mali_timeline_tracker  tracker;                /**< Timeline tracker for soft job. */
+	mali_bool                     activated;              /**< MALI_TRUE if the job has been activated, MALI_FALSE if not. */
+	_mali_osk_notification_t     *activated_notification; /**< Pre-allocated notification object for ACTIVATED_NOTIFICATION. */
+
+	/* Protected by soft job system lock. */
+	u32                           id;                     /**< Used by user-space to find corresponding soft job in kernel-space. */
+	mali_soft_job_state           state;                  /**< State of soft job, must be one of MALI_SOFT_JOB_STATE_*. */
+	struct mali_soft_job_system  *system;                 /**< The soft job system this job is in. */
+	_mali_osk_list_t              system_list;            /**< List element used by soft job system. */
+} mali_soft_job;
+
+/**
+ * Per-session soft job system.
+ *
+ * The soft job system is used to manage all soft jobs that belongs to a session.
+ */
+typedef struct mali_soft_job_system {
+	struct mali_session_data *session;                    /**< The session this soft job system belongs to. */
+	_MALI_OSK_LIST_HEAD(jobs_used);                       /**< List of all allocated soft jobs. */
+
+	_mali_osk_spinlock_irq_t *lock;                       /**< Lock used to protect soft job system and its soft jobs. */
+	u32 lock_owner;                                       /**< Contains tid of thread that locked the system or 0, if not locked. */
+	u32 last_job_id;                                      /**< Recored the last job id protected by lock. */
+} mali_soft_job_system;
+
+/**
+ * Create a soft job system.
+ *
+ * @param session The session this soft job system will belong to.
+ * @return The new soft job system, or NULL if unsuccessful.
+ */
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session);
+
+/**
+ * Destroy a soft job system.
+ *
+ * @note The soft job must not have any started or activated jobs.  Call @ref
+ * mali_soft_job_system_abort first.
+ *
+ * @param system The soft job system we are destroying.
+ */
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system);
+
+/**
+ * Create a soft job.
+ *
+ * @param system Soft job system to create soft job from.
+ * @param type Type of the soft job.
+ * @param user_job Identifier for soft job in user space.
+ * @return New soft job if successful, NULL if not.
+ */
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job);
+
+/**
+ * Destroy soft job.
+ *
+ * @param job Soft job to destroy.
+ */
+void mali_soft_job_destroy(struct mali_soft_job *job);
+
+/**
+ * Start a soft job.
+ *
+ * The soft job will be added to the Timeline system which will then activate it after all
+ * dependencies have been resolved.
+ *
+ * Create soft jobs with @ref mali_soft_job_create before starting them.
+ *
+ * @param job Soft job to start.
+ * @param fence Fence representing dependencies for this soft job.
+ * @return Point on soft job timeline.
+ */
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence);
+
+/**
+ * Use by user-space to signal that a soft job has completed.
+ *
+ * @note Only valid for soft jobs with type MALI_SOFT_JOB_TYPE_USER_SIGNALED.
+ *
+ * @note The soft job must be in state MALI_SOFT_JOB_STATE_STARTED for the signal to be successful.
+ *
+ * @note If the soft job was signaled successfully, or it received a time out, the soft job will be
+ * destroyed after this call and should no longer be used.
+ *
+ * @note This function will block until the soft job has been activated.
+ *
+ * @param system The soft job system the job was started in.
+ * @param job_id ID of soft job we are signaling.
+ *
+ * @return _MALI_OSK_ERR_ITEM_NOT_FOUND if the soft job ID was invalid, _MALI_OSK_ERR_TIMEOUT if the
+ * soft job was timed out or _MALI_OSK_ERR_OK if we successfully signaled the soft job.
+ */
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id);
+
+/**
+ * Used by the Timeline system to activate a soft job.
+ *
+ * @param job The soft job that is being activated.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job);
+
+/**
+ * Used by the Timeline system to timeout a soft job.
+ *
+ * A soft job is timed out if it completes or is signaled later than MALI_TIMELINE_TIMEOUT_HZ after
+ * activation.
+ *
+ * @param job The soft job that is being timed out.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job);
+
+/**
+ * Used to cleanup activated soft jobs in the soft job system on session abort.
+ *
+ * @param system The soft job system that is being aborted.
+ */
+void mali_soft_job_system_abort(struct mali_soft_job_system *system);
+
+#endif /* __MALI_SOFT_JOB_H__ */
diff --git a/utgard/r5p1/common/mali_spinlock_reentrant.c b/utgard/r5p1/common/mali_spinlock_reentrant.c
new file mode 100755
index 0000000..178abaf
--- /dev/null
+++ b/utgard/r5p1/common/mali_spinlock_reentrant.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_spinlock_reentrant.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order)
+{
+	struct mali_spinlock_reentrant *spinlock;
+
+	spinlock = _mali_osk_calloc(1, sizeof(struct mali_spinlock_reentrant));
+	if (NULL == spinlock) {
+		return NULL;
+	}
+
+	spinlock->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, lock_order);
+	if (NULL == spinlock->lock) {
+		mali_spinlock_reentrant_term(spinlock);
+		return NULL;
+	}
+
+	return spinlock;
+}
+
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT(0 == spinlock->counter && 0 == spinlock->owner);
+
+	if (NULL != spinlock->lock) {
+		_mali_osk_spinlock_irq_term(spinlock->lock);
+	}
+
+	_mali_osk_free(spinlock);
+}
+
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid);
+
+	MALI_DEBUG_PRINT(5, ("%s ^\n", __FUNCTION__));
+
+	if (tid != spinlock->owner) {
+		_mali_osk_spinlock_irq_lock(spinlock->lock);
+		MALI_DEBUG_ASSERT(0 == spinlock->owner && 0 == spinlock->counter);
+		spinlock->owner = tid;
+	}
+
+	MALI_DEBUG_PRINT(5, ("%s v\n", __FUNCTION__));
+
+	++spinlock->counter;
+}
+
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid && tid == spinlock->owner);
+
+	--spinlock->counter;
+	if (0 == spinlock->counter) {
+		spinlock->owner = 0;
+		MALI_DEBUG_PRINT(5, ("%s release last\n", __FUNCTION__));
+		_mali_osk_spinlock_irq_unlock(spinlock->lock);
+	}
+}
diff --git a/utgard/r5p1/common/mali_spinlock_reentrant.h b/utgard/r5p1/common/mali_spinlock_reentrant.h
new file mode 100755
index 0000000..6a62df8
--- /dev/null
+++ b/utgard/r5p1/common/mali_spinlock_reentrant.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SPINLOCK_REENTRANT_H__
+#define __MALI_SPINLOCK_REENTRANT_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * Reentrant spinlock.
+ */
+struct mali_spinlock_reentrant {
+	_mali_osk_spinlock_irq_t *lock;
+	u32               owner;
+	u32               counter;
+};
+
+/**
+ * Create a new reentrant spinlock.
+ *
+ * @param lock_order Lock order.
+ * @return New reentrant spinlock.
+ */
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order);
+
+/**
+ * Terminate reentrant spinlock and free any associated resources.
+ *
+ * @param spinlock Reentrant spinlock to terminate.
+ */
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock);
+
+/**
+ * Wait for reentrant spinlock to be signaled.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Signal reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Check if thread is holding reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ * @return MALI_TRUE if thread is holding spinlock, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_spinlock_reentrant_is_held(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	return (tid == spinlock->owner && 0 < spinlock->counter);
+}
+
+#endif /* __MALI_SPINLOCK_REENTRANT_H__ */
diff --git a/utgard/r5p1/common/mali_timeline.c b/utgard/r5p1/common/mali_timeline.c
new file mode 100755
index 0000000..f18aaa6
--- /dev/null
+++ b/utgard/r5p1/common/mali_timeline.c
@@ -0,0 +1,1518 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline.h"
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_soft_job.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+
+#define MALI_TIMELINE_SYSTEM_LOCKED(system) (mali_spinlock_reentrant_is_held((system)->spinlock, _mali_osk_get_tid()))
+
+/*
+ * Following three elements are used to record how many
+ * gp, physical pp or virtual pp jobs are delayed in the whole
+ * timeline system, we can use these three value to decide
+ * if need to deactivate idle group.
+ */
+_mali_osk_atomic_t gp_tracker_count;
+_mali_osk_atomic_t phy_pp_tracker_count;
+_mali_osk_atomic_t virt_pp_tracker_count;
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter);
+
+#if defined(CONFIG_SYNC)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+struct mali_deferred_fence_put_entry {
+	struct hlist_node list;
+	struct sync_fence *fence;
+};
+
+static HLIST_HEAD(mali_timeline_sync_fence_to_free_list);
+static DEFINE_SPINLOCK(mali_timeline_sync_fence_to_free_lock);
+
+static void put_sync_fences(struct work_struct *ignore)
+{
+	struct hlist_head list;
+	struct hlist_node *tmp, *pos;
+	unsigned long flags;
+	struct mali_deferred_fence_put_entry *o;
+
+	spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+	hlist_move_list(&mali_timeline_sync_fence_to_free_list, &list);
+	spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+	hlist_for_each_entry_safe(o, pos, tmp, &list, list) {
+		sync_fence_put(o->fence);
+		kfree(o);
+	}
+}
+
+static DECLARE_DELAYED_WORK(delayed_sync_fence_put, put_sync_fences);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+/* Callback that is called when a sync fence a tracker is waiting on is signaled. */
+static void mali_timeline_sync_fence_callback(struct sync_fence *sync_fence, struct sync_fence_waiter *sync_fence_waiter)
+{
+	struct mali_timeline_system  *system;
+	struct mali_timeline_waiter  *waiter;
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool is_aborting = MALI_FALSE;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	int fence_status = sync_fence->status;
+#else
+	int fence_status = atomic_read(&sync_fence->status);
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_waiter);
+
+	tracker = _MALI_OSK_CONTAINER_OF(sync_fence_waiter, struct mali_timeline_tracker, sync_fence_waiter);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	is_aborting = system->session->is_aborting;
+	if (!is_aborting && (0 > fence_status)) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, fence_status));
+		tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+	}
+
+	waiter = tracker->waiter_sync;
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	tracker->sync_fence = NULL;
+	tracker->fence.sync_fd = -1;
+
+	schedule_mask |= mali_timeline_system_release_waiter(system, waiter);
+
+	/* If aborting, wake up sleepers that are waiting for sync fence callbacks to complete. */
+	if (is_aborting) {
+		_mali_osk_wait_queue_wake_up(system->wait_queue);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/*
+	 * Older versions of Linux, before 3.5, doesn't support fput() in interrupt
+	 * context. For those older kernels, allocate a list object and put the
+	 * fence object on that and defer the call to sync_fence_put() to a workqueue.
+	 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+	{
+		struct mali_deferred_fence_put_entry *obj;
+
+		obj = kzalloc(sizeof(struct mali_deferred_fence_put_entry), GFP_ATOMIC);
+		if (obj) {
+			unsigned long flags;
+			mali_bool schedule = MALI_FALSE;
+
+			obj->fence = sync_fence;
+
+			spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+			if (hlist_empty(&mali_timeline_sync_fence_to_free_list))
+				schedule = MALI_TRUE;
+			hlist_add_head(&obj->list, &mali_timeline_sync_fence_to_free_list);
+			spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+			if (schedule)
+				schedule_delayed_work(&delayed_sync_fence_put, 0);
+		}
+	}
+#else
+	sync_fence_put(sync_fence);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+	if (!is_aborting) {
+		mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE);
+	}
+}
+#endif /* defined(CONFIG_SYNC) */
+
+static mali_scheduler_mask mali_timeline_tracker_time_out(struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_SOFT == tracker->type);
+
+	return mali_soft_job_system_timeout_job((struct mali_soft_job *) tracker->job);
+}
+
+static void mali_timeline_timer_callback(void *data)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker;
+	struct mali_timeline *timeline;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	timeline = (struct mali_timeline *) data;
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	if (!system->timer_enabled) {
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		return;
+	}
+
+	tracker = timeline->tracker_tail;
+	timeline->timer_active = MALI_FALSE;
+
+	if (NULL != tracker && MALI_TRUE == tracker->timer_active) {
+		/* This is likely the delayed work that has been schedule out before cancelled. */
+		if (MALI_TIMELINE_TIMEOUT_HZ > (_mali_osk_time_tickcount() - tracker->os_tick_activate)) {
+			mali_spinlock_reentrant_signal(system->spinlock, tid);
+			return;
+		}
+
+		schedule_mask = mali_timeline_tracker_time_out(tracker);
+		tracker->timer_active = MALI_FALSE;
+	} else {
+		MALI_PRINT_ERROR(("Mali Timeline: Soft job timer callback without a waiting tracker.\n"));
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+	system->timer_enabled = MALI_FALSE;
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (NULL != timeline->delayed_work) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			timeline->timer_active = MALI_FALSE;
+		}
+	}
+}
+
+static void mali_timeline_destroy(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	if (NULL != timeline) {
+		/* Assert that the timeline object has been properly cleaned up before destroying it. */
+		MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_head);
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+		MALI_DEBUG_ASSERT(NULL == timeline->waiter_head);
+		MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail);
+		MALI_DEBUG_ASSERT(NULL != timeline->system);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_MAX > timeline->id);
+
+#if defined(CONFIG_SYNC)
+		if (NULL != timeline->sync_tl) {
+			sync_timeline_destroy(timeline->sync_tl);
+		}
+#endif /* defined(CONFIG_SYNC) */
+
+		if (NULL != timeline->delayed_work) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			_mali_osk_wq_delayed_delete_work_nonflush(timeline->delayed_work);
+		}
+
+		_mali_osk_free(timeline);
+	}
+}
+
+static struct mali_timeline *mali_timeline_create(struct mali_timeline_system *system, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(id < MALI_TIMELINE_MAX);
+
+	timeline = (struct mali_timeline *) _mali_osk_calloc(1, sizeof(struct mali_timeline));
+	if (NULL == timeline) {
+		return NULL;
+	}
+
+	/* Initially the timeline is empty. */
+#if defined(MALI_TIMELINE_DEBUG_START_POINT)
+	/* Start the timeline a bit before wrapping when debugging. */
+	timeline->point_next = UINT_MAX - MALI_TIMELINE_MAX_POINT_SPAN - 128;
+#else
+	timeline->point_next = 1;
+#endif
+	timeline->point_oldest = timeline->point_next;
+
+	/* The tracker and waiter lists will initially be empty. */
+
+	timeline->system = system;
+	timeline->id = id;
+
+	timeline->delayed_work = _mali_osk_wq_delayed_create_work(mali_timeline_timer_callback, timeline);
+	if (NULL == timeline->delayed_work) {
+		mali_timeline_destroy(timeline);
+		return NULL;
+	}
+
+	timeline->timer_active = MALI_FALSE;
+
+#if defined(CONFIG_SYNC)
+	{
+		char timeline_name[32];
+
+		switch (id) {
+		case MALI_TIMELINE_GP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-gp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_PP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-pp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_SOFT:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-soft", _mali_osk_get_pid());
+			break;
+		default:
+			MALI_PRINT_ERROR(("Mali Timeline: Invalid timeline id %d\n", id));
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->sync_tl = mali_sync_timeline_create(timeline, timeline_name);
+		if (NULL == timeline->sync_tl) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	return timeline;
+}
+
+static void mali_timeline_insert_tracker(struct mali_timeline *timeline, struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	if (mali_timeline_is_full(timeline)) {
+		/* Don't add tracker if timeline is full. */
+		tracker->point = MALI_TIMELINE_NO_POINT;
+		return;
+	}
+
+	tracker->timeline = timeline;
+	tracker->point    = timeline->point_next;
+
+	/* Find next available point. */
+	timeline->point_next++;
+	if (MALI_TIMELINE_NO_POINT == timeline->point_next) {
+		timeline->point_next++;
+	}
+
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+
+	if (MALI_TIMELINE_TRACKER_GP == tracker->type) {
+		_mali_osk_atomic_inc(&gp_tracker_count);
+	} else if (MALI_TIMELINE_TRACKER_PP == tracker->type) {
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_inc(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_inc(&phy_pp_tracker_count);
+		}
+	}
+
+	/* Add tracker as new head on timeline's tracker list. */
+	if (NULL == timeline->tracker_head) {
+		/* Tracker list is empty. */
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+
+		timeline->tracker_tail = tracker;
+
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_next);
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_prev);
+	} else {
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next);
+
+		tracker->timeline_prev = timeline->tracker_head;
+		timeline->tracker_head->timeline_next = tracker;
+
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_next);
+	}
+	timeline->tracker_head = tracker;
+
+	MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next);
+	MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail->timeline_prev);
+}
+
+/* Inserting the waiter object into the given timeline */
+static void mali_timeline_insert_waiter(struct mali_timeline *timeline, struct mali_timeline_waiter *waiter_new)
+{
+	struct mali_timeline_waiter *waiter_prev;
+	struct mali_timeline_waiter *waiter_next;
+
+	/* Waiter time must be between timeline head and tail, and there must
+	 * be less than MALI_TIMELINE_MAX_POINT_SPAN elements between */
+	MALI_DEBUG_ASSERT((waiter_new->point - timeline->point_oldest) < MALI_TIMELINE_MAX_POINT_SPAN);
+	MALI_DEBUG_ASSERT((-waiter_new->point + timeline->point_next) < MALI_TIMELINE_MAX_POINT_SPAN);
+
+	/* Finding out where to put this waiter, in the linked waiter list of the given timeline **/
+	waiter_prev = timeline->waiter_head; /* Insert new after  waiter_prev */
+	waiter_next = NULL;                  /* Insert new before waiter_next */
+
+	/* Iterating backwards from head (newest) to tail (oldest) until we
+	 * find the correct spot to insert the new waiter */
+	while (waiter_prev && mali_timeline_point_after(waiter_prev->point, waiter_new->point)) {
+		waiter_next = waiter_prev;
+		waiter_prev = waiter_prev->timeline_prev;
+	}
+
+	if (NULL == waiter_prev && NULL == waiter_next) {
+		/* list is empty */
+		timeline->waiter_head = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else if (NULL == waiter_next) {
+		/* insert at head */
+		waiter_new->timeline_prev = timeline->waiter_head;
+		timeline->waiter_head->timeline_next = waiter_new;
+		timeline->waiter_head = waiter_new;
+	} else if (NULL == waiter_prev) {
+		/* insert at tail */
+		waiter_new->timeline_next = timeline->waiter_tail;
+		timeline->waiter_tail->timeline_prev = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else {
+		/* insert between */
+		waiter_new->timeline_next = waiter_next;
+		waiter_new->timeline_prev = waiter_prev;
+		waiter_next->timeline_prev = waiter_new;
+		waiter_prev->timeline_next = waiter_new;
+	}
+}
+
+static void mali_timeline_update_delayed_work(struct mali_timeline *timeline)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *oldest_tracker;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SOFT == timeline->id);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Timer is disabled, early out. */
+	if (!system->timer_enabled) return;
+
+	oldest_tracker = timeline->tracker_tail;
+	if (NULL != oldest_tracker && 0 == oldest_tracker->trigger_ref_count) {
+		if (MALI_FALSE == oldest_tracker->timer_active) {
+			if (MALI_TRUE == timeline->timer_active) {
+				_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+			}
+			_mali_osk_wq_delayed_schedule_work(timeline->delayed_work, MALI_TIMELINE_TIMEOUT_HZ);
+			oldest_tracker->timer_active = MALI_TRUE;
+			timeline->timer_active = MALI_TRUE;
+		}
+	} else if (MALI_TRUE == timeline->timer_active) {
+		_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+		timeline->timer_active = MALI_FALSE;
+	}
+}
+
+static mali_scheduler_mask mali_timeline_update_oldest_point(struct mali_timeline *timeline)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	MALI_DEBUG_CODE({
+		struct mali_timeline_system *system = timeline->system;
+		MALI_DEBUG_ASSERT_POINTER(system);
+
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	});
+
+	if (NULL != timeline->tracker_tail) {
+		/* Set oldest point to oldest tracker's point */
+		timeline->point_oldest = timeline->tracker_tail->point;
+	} else {
+		/* No trackers, mark point list as empty */
+		timeline->point_oldest = timeline->point_next;
+	}
+
+	/* Release all waiters no longer on the timeline's point list.
+	 * Releasing a waiter can trigger this function to be called again, so
+	 * we do not store any pointers on stack. */
+	while (NULL != timeline->waiter_tail) {
+		u32 waiter_time_relative;
+		u32 time_head_relative;
+		struct mali_timeline_waiter *waiter = timeline->waiter_tail;
+
+		time_head_relative = timeline->point_next - timeline->point_oldest;
+		waiter_time_relative = waiter->point - timeline->point_oldest;
+
+		if (waiter_time_relative < time_head_relative) {
+			/* This and all following waiters are on the point list, so we are done. */
+			break;
+		}
+
+		/* Remove waiter from timeline's waiter list. */
+		if (NULL != waiter->timeline_next) {
+			waiter->timeline_next->timeline_prev = NULL;
+		} else {
+			/* This was the last waiter */
+			timeline->waiter_head = NULL;
+		}
+		timeline->waiter_tail = waiter->timeline_next;
+
+		/* Release waiter.  This could activate a tracker, if this was
+		 * the last waiter for the tracker. */
+		schedule_mask |= mali_timeline_system_release_waiter(timeline->system, waiter);
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > type);
+
+	/* Zero out all tracker members. */
+	_mali_osk_memset(tracker, 0, sizeof(*tracker));
+
+	tracker->type = type;
+	tracker->job = job;
+	tracker->trigger_ref_count = 1;  /* Prevents any callback from trigging while adding it */
+	tracker->os_tick_create = _mali_osk_time_tickcount();
+	MALI_DEBUG_CODE(tracker->magic = MALI_TIMELINE_TRACKER_MAGIC);
+
+	tracker->activation_error = MALI_TIMELINE_ACTIVATION_ERROR_NONE;
+
+	/* Copy fence. */
+	if (NULL != fence) {
+		_mali_osk_memcpy(&tracker->fence, fence, sizeof(struct mali_timeline_fence));
+	}
+}
+
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker_next, *tracker_prev;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	/* Upon entry a group lock will be held, but not a scheduler lock. */
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	/* Tracker should have been triggered */
+	MALI_DEBUG_ASSERT(0 == tracker->trigger_ref_count);
+
+	/* All waiters should have been released at this point */
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_head);
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: releasing tracker for job 0x%08X\n", tracker->job));
+
+	timeline = tracker->timeline;
+	if (NULL == timeline) {
+		/* Tracker was not on a timeline, there is nothing to release. */
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Tracker should still be on timeline */
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, tracker->point));
+
+	/* Tracker is no longer valid. */
+	MALI_DEBUG_CODE(tracker->magic = 0);
+
+	tracker_next = tracker->timeline_next;
+	tracker_prev = tracker->timeline_prev;
+	tracker->timeline_next = NULL;
+	tracker->timeline_prev = NULL;
+
+	/* Removing tracker from timeline's tracker list */
+	if (NULL == tracker_next) {
+		/* This tracker was the head */
+		timeline->tracker_head = tracker_prev;
+	} else {
+		tracker_next->timeline_prev = tracker_prev;
+	}
+
+	if (NULL == tracker_prev) {
+		/* This tracker was the tail */
+		timeline->tracker_tail = tracker_next;
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+		/* Update the timeline's oldest time and release any waiters */
+		schedule_mask |= mali_timeline_update_oldest_point(timeline);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	} else {
+		tracker_prev->timeline_next = tracker_next;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Update delayed work only when it is the soft job timeline */
+	if (MALI_TIMELINE_SOFT == tracker->timeline->id) {
+		mali_timeline_update_delayed_work(tracker->timeline);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_release_waiter_list(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *tail,
+		struct mali_timeline_waiter *head)
+{
+	struct mali_timeline_waiter    *waiter = NULL;
+	struct mali_timeline_waiter    *next = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(head);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	head->tracker_next = system->waiter_empty_list;
+	system->waiter_empty_list = tail;
+
+	waiter = system->waiter_empty_list;
+	while (NULL != waiter) {
+		next = waiter->tracker_next;
+		_mali_osk_free(waiter);
+		waiter = next;
+	}
+	system->waiter_empty_list = NULL;
+}
+
+static mali_scheduler_mask mali_timeline_tracker_activate(struct mali_timeline_tracker *tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	struct mali_timeline_system *system;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker->os_tick_activate = _mali_osk_time_tickcount();
+
+	if (NULL != tracker->waiter_head) {
+		mali_timeline_system_release_waiter_list(system, tracker->waiter_tail, tracker->waiter_head);
+		tracker->waiter_head = NULL;
+		tracker->waiter_tail = NULL;
+	}
+
+	switch (tracker->type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		schedule_mask = mali_scheduler_activate_gp_job((struct mali_gp_job *) tracker->job);
+
+		_mali_osk_atomic_dec(&gp_tracker_count);
+		break;
+	case MALI_TIMELINE_TRACKER_PP:
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_dec(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_dec(&phy_pp_tracker_count);
+		}
+		schedule_mask = mali_scheduler_activate_pp_job((struct mali_pp_job *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SOFT:
+		timeline = tracker->timeline;
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		schedule_mask |= mali_soft_job_system_activate_job((struct mali_soft_job *) tracker->job);
+
+		/* Start a soft timer to make sure the soft job be released in a limited time */
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		mali_timeline_update_delayed_work(timeline);
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		break;
+	case MALI_TIMELINE_TRACKER_WAIT:
+		mali_timeline_fence_wait_activate((struct mali_timeline_fence_wait_tracker *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SYNC:
+#if defined(CONFIG_SYNC)
+		mali_timeline_sync_fence_activate((struct mali_timeline_sync_fence_tracker *) tracker->job);
+#else
+		MALI_PRINT_ERROR(("Mali Timeline: sync tracker not supported\n", tracker->type));
+#endif /* defined(CONFIG_SYNC) */
+		break;
+	default:
+		MALI_PRINT_ERROR(("Mali Timeline - Illegal tracker type: %d\n", tracker->type));
+		break;
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker)
+{
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->trigger_ref_count++;
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error)
+{
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->trigger_ref_count--;
+
+	tracker->activation_error |= activation_error;
+
+	if (0 == tracker->trigger_ref_count) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(uk_fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		fence->points[i] = uk_fence->points[i];
+	}
+
+	fence->sync_fd = uk_fence->sync_fd;
+}
+
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session)
+{
+	u32 i;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: creating timeline system\n"));
+
+	system = (struct mali_timeline_system *) _mali_osk_calloc(1, sizeof(struct mali_timeline_system));
+	if (NULL == system) {
+		return NULL;
+	}
+
+	system->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+	if (NULL == system->spinlock) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		system->timelines[i] = mali_timeline_create(system, (enum mali_timeline_id)i);
+		if (NULL == system->timelines[i]) {
+			mali_timeline_system_destroy(system);
+			return NULL;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	system->signaled_sync_tl = mali_sync_timeline_create(NULL, "mali-always-signaled");
+	if (NULL == system->signaled_sync_tl) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	system->waiter_empty_list = NULL;
+	system->session = session;
+	system->timer_enabled = MALI_TRUE;
+
+	system->wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == system->wait_queue) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	return system;
+}
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Check if there are any trackers left on timeline.
+ *
+ * Used as a wait queue conditional.
+ *
+ * @param data Timeline.
+ * @return MALI_TRUE if there are no trackers on timeline, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_has_no_trackers(void *data)
+{
+	struct mali_timeline *timeline = (struct mali_timeline *) data;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	return mali_timeline_is_empty(timeline);
+}
+
+/**
+ * Cancel sync fence waiters waited upon by trackers on all timelines.
+ *
+ * Will return after all timelines have no trackers left.
+ *
+ * @param system Timeline system.
+ */
+static void mali_timeline_cancel_sync_fence_waiters(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+	struct mali_timeline_tracker *tracker, *tracker_next;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(tracker_list);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Cancel sync fence waiters. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		tracker_next = timeline->tracker_tail;
+		while (NULL != tracker_next) {
+			tracker = tracker_next;
+			tracker_next = tracker->timeline_next;
+
+			if (NULL == tracker->sync_fence) continue;
+
+			MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling sync fence wait for tracker 0x%08X.\n", tracker));
+
+			/* Cancel sync fence waiter. */
+			if (0 == sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter)) {
+				/* Callback was not called, move tracker to local list. */
+				_mali_osk_list_add(&tracker->sync_fence_cancel_list, &tracker_list);
+			}
+		}
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/* Manually call sync fence callback in order to release waiter and trigger activation of tracker. */
+	_MALI_OSK_LIST_FOREACHENTRY(tracker, tracker_next, &tracker_list, struct mali_timeline_tracker, sync_fence_cancel_list) {
+		mali_timeline_sync_fence_callback(tracker->sync_fence, &tracker->sync_fence_waiter);
+	}
+
+	/* Sleep until all sync fence callbacks are done and all timelines are empty. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline);
+	}
+}
+
+#endif /* defined(CONFIG_SYNC) */
+
+void mali_timeline_system_abort(struct mali_timeline_system *system)
+{
+	MALI_DEBUG_CODE(u32 tid = _mali_osk_get_tid(););
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: Aborting timeline system for session 0x%08X.\n", system->session));
+
+#if defined(CONFIG_SYNC)
+	mali_timeline_cancel_sync_fence_waiters(system);
+#endif /* defined(CONFIG_SYNC) */
+
+	/* Should not be any waiters or trackers left at this point. */
+	MALI_DEBUG_CODE({
+		u32 i;
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i)
+		{
+			struct mali_timeline *timeline = system->timelines[i];
+			MALI_DEBUG_ASSERT_POINTER(timeline);
+			MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+			MALI_DEBUG_ASSERT(NULL == timeline->tracker_head);
+			MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+			MALI_DEBUG_ASSERT(NULL == timeline->waiter_head);
+			MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail);
+		}
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+	});
+}
+
+void mali_timeline_system_destroy(struct mali_timeline_system *system)
+{
+	u32 i;
+	struct mali_timeline_waiter *waiter, *next;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: destroying timeline system\n"));
+
+	if (NULL != system) {
+		/* There should be no waiters left on this queue. */
+		if (NULL != system->wait_queue) {
+			_mali_osk_wait_queue_term(system->wait_queue);
+			system->wait_queue = NULL;
+		}
+
+		/* Free all waiters in empty list */
+		waiter = system->waiter_empty_list;
+		while (NULL != waiter) {
+			next = waiter->tracker_next;
+			_mali_osk_free(waiter);
+			waiter = next;
+		}
+
+#if defined(CONFIG_SYNC)
+		if (NULL != system->signaled_sync_tl) {
+			sync_timeline_destroy(system->signaled_sync_tl);
+		}
+#endif /* defined(CONFIG_SYNC) */
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if (NULL != system->timelines[i]) {
+				mali_timeline_destroy(system->timelines[i]);
+			}
+		}
+		if (NULL != system->spinlock) {
+			mali_spinlock_reentrant_term(system->spinlock);
+		}
+
+		_mali_osk_free(system);
+	}
+}
+
+/**
+ * Find how many waiters are needed for a given fence.
+ *
+ * @param fence The fence to check.
+ * @return Number of waiters needed for fence.
+ */
+static u32 mali_timeline_fence_num_waiters(struct mali_timeline_fence *fence)
+{
+	u32 i, num_waiters = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		if (MALI_TIMELINE_NO_POINT != fence->points[i]) {
+			++num_waiters;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	if (-1 != fence->sync_fd) ++num_waiters;
+#endif /* defined(CONFIG_SYNC) */
+
+	return num_waiters;
+}
+
+static struct mali_timeline_waiter *mali_timeline_system_get_zeroed_waiter(struct mali_timeline_system *system)
+{
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	waiter = system->waiter_empty_list;
+	if (NULL != waiter) {
+		/* Remove waiter from empty list and zero it */
+		system->waiter_empty_list = waiter->tracker_next;
+		_mali_osk_memset(waiter, 0, sizeof(*waiter));
+	}
+
+	/* Return NULL if list was empty. */
+	return waiter;
+}
+
+static void mali_timeline_system_allocate_waiters(struct mali_timeline_system *system,
+		struct mali_timeline_waiter **tail,
+		struct mali_timeline_waiter **head,
+		int max_num_waiters)
+{
+	u32 i, tid = _mali_osk_get_tid();
+	mali_bool do_alloc;
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT_POINTER(head);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	*head = *tail = NULL;
+	do_alloc = MALI_FALSE;
+	i = 0;
+	while (i < max_num_waiters) {
+		if (MALI_FALSE == do_alloc) {
+			waiter = mali_timeline_system_get_zeroed_waiter(system);
+			if (NULL == waiter) {
+				do_alloc = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+				continue;
+			}
+		} else {
+			waiter = _mali_osk_calloc(1, sizeof(struct mali_timeline_waiter));
+			if (NULL == waiter) break;
+		}
+		++i;
+		if (NULL == *tail) {
+			*tail = waiter;
+			*head = waiter;
+		} else {
+			(*head)->tracker_next = waiter;
+			*head = waiter;
+		}
+	}
+	if (MALI_TRUE == do_alloc) {
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+	}
+}
+
+/**
+ * Create waiters for the given tracker. The tracker is activated when all waiters are release.
+ *
+ * @note Tracker can potentially be activated before this function returns.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker we will create waiters for.
+ * @param waiter_tail List of pre-allocated waiters.
+ * @param waiter_head List of pre-allocated waiters.
+ */
+static void mali_timeline_system_create_waiters_and_unlock(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		struct mali_timeline_waiter *waiter_tail,
+		struct mali_timeline_waiter *waiter_head)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *sync_fence = NULL;
+#endif /* defined(CONFIG_SYNC) */
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_head);
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+	MALI_DEBUG_ASSERT(NULL != tracker->job);
+
+	/* Creating waiter object for all the timelines the fence is put on. Inserting this waiter
+	 * into the timelines sorted list of waiters */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		mali_timeline_point point;
+		struct mali_timeline *timeline;
+		struct mali_timeline_waiter *waiter;
+
+		/* Get point on current timeline from tracker's fence. */
+		point = tracker->fence.points[i];
+
+		if (likely(MALI_TIMELINE_NO_POINT == point)) {
+			/* Fence contains no point on this timeline so we don't need a waiter. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n",
+					  point, timeline->point_oldest, timeline->point_next));
+			continue;
+		}
+
+		if (likely(mali_timeline_is_point_released(timeline, point))) {
+			/* Tracker representing the point has been released so we don't need a
+			 * waiter. */
+			continue;
+		}
+
+		/* The point is on timeline. */
+		MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, point));
+
+		/* Get a new zeroed waiter object. */
+		if (likely(NULL != waiter_tail)) {
+			waiter = waiter_tail;
+			waiter_tail = waiter_tail->tracker_next;
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			continue;
+		}
+
+		/* Yanking the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = point;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (NULL == tracker->waiter_head) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Add waiter to timeline. */
+		mali_timeline_insert_waiter(timeline, waiter);
+	}
+#if defined(CONFIG_SYNC)
+	if (-1 != tracker->fence.sync_fd) {
+		int ret;
+		struct mali_timeline_waiter *waiter;
+
+		sync_fence = sync_fence_fdget(tracker->fence.sync_fd);
+		if (unlikely(NULL == sync_fence)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", tracker->fence.sync_fd));
+			goto exit;
+		}
+
+		/* Check if we have a zeroed waiter object available. */
+		if (unlikely(NULL == waiter_tail)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			goto exit;
+		}
+
+		/* Start asynchronous wait that will release waiter when the fence is signaled. */
+		sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback);
+		ret = sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter);
+		if (1 == ret) {
+			/* Fence already signaled, no waiter needed. */
+			tracker->fence.sync_fd = -1;
+			goto exit;
+		} else if (0 != ret) {
+			MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, ret));
+			tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+			goto exit;
+		}
+
+		/* Grab new zeroed waiter object. */
+		waiter = waiter_tail;
+		waiter_tail = waiter_tail->tracker_next;
+
+		/* Increase the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = MALI_TIMELINE_NO_POINT;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (NULL == tracker->waiter_head) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Also store waiter in separate field for easy access by sync callback. */
+		tracker->waiter_sync = waiter;
+
+		/* Store the sync fence in tracker so we can retrieve in abort session, if needed. */
+		tracker->sync_fence = sync_fence;
+
+		sync_fence = NULL;
+	}
+exit:
+#endif /* defined(CONFIG_SYNC) */
+
+	if (NULL != waiter_tail) {
+		mali_timeline_system_release_waiter_list(system, waiter_tail, waiter_head);
+	}
+
+	/* Release the initial trigger ref count. */
+	tracker->trigger_ref_count--;
+
+	/* If there were no waiters added to this tracker we activate immediately. */
+	if (0 == tracker->trigger_ref_count) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC)
+	if (NULL != sync_fence) {
+		sync_fence_put(sync_fence);
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id)
+{
+	int num_waiters = 0;
+	struct mali_timeline_waiter *waiter_tail, *waiter_head;
+	u32 tid = _mali_osk_get_tid();
+	mali_timeline_point point = MALI_TIMELINE_NO_POINT;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == system->session->is_aborting);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > tracker->type);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: adding tracker for job %p, timeline: %d\n", tracker->job, timeline_id));
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->system = system;
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	num_waiters = mali_timeline_fence_num_waiters(&tracker->fence);
+
+	/* Allocate waiters. */
+	mali_timeline_system_allocate_waiters(system, &waiter_tail, &waiter_head, num_waiters);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Add tracker to timeline.  This will allocate a point for the tracker on the timeline. If
+	 * timeline ID is MALI_TIMELINE_NONE the tracker will NOT be added to a timeline and the
+	 * point will be MALI_TIMELINE_NO_POINT.
+	 *
+	 * NOTE: the tracker can fail to be added if the timeline is full.  If this happens, the
+	 * point will be MALI_TIMELINE_NO_POINT. */
+	MALI_DEBUG_ASSERT(timeline_id < MALI_TIMELINE_MAX || timeline_id == MALI_TIMELINE_NONE);
+	if (likely(timeline_id < MALI_TIMELINE_MAX)) {
+		struct mali_timeline *timeline = system->timelines[timeline_id];
+		mali_timeline_insert_tracker(timeline, tracker);
+		MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	}
+
+	point = tracker->point;
+
+	/* Create waiters for tracker based on supplied fence.  Each waiter will increase the
+	 * trigger ref count. */
+	mali_timeline_system_create_waiters_and_unlock(system, tracker, waiter_tail, waiter_head);
+	tracker = NULL;
+
+	/* At this point the tracker object might have been freed so we should no longer
+	 * access it. */
+
+
+	/* The tracker will always be activated after calling add_tracker, even if NO_POINT is
+	 * returned. */
+	return point;
+}
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter)
+{
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker = waiter->tracker;
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	/* At this point the waiter has been removed from the timeline's waiter list, but it is
+	 * still on the tracker's waiter list.  All of the tracker's waiters will be released when
+	 * the tracker is activated. */
+
+	waiter->point   = MALI_TIMELINE_NO_POINT;
+	waiter->tracker = NULL;
+
+	tracker->trigger_ref_count--;
+	if (0 == tracker->trigger_ref_count) {
+		/* This was the last waiter; activate tracker */
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	return schedule_mask;
+}
+
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id)
+{
+	mali_timeline_point point;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	if (MALI_TIMELINE_MAX <= timeline_id) {
+		return MALI_TIMELINE_NO_POINT;
+	}
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	timeline = system->timelines[timeline_id];
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	point = MALI_TIMELINE_NO_POINT;
+	if (timeline->point_oldest != timeline->point_next) {
+		point = timeline->point_next - 1;
+		if (MALI_TIMELINE_NO_POINT == point) point--;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return point;
+}
+
+void mali_timeline_initialize(void)
+{
+	_mali_osk_atomic_init(&gp_tracker_count, 0);
+	_mali_osk_atomic_init(&phy_pp_tracker_count, 0);
+	_mali_osk_atomic_init(&virt_pp_tracker_count, 0);
+}
+
+void mali_timeline_terminate(void)
+{
+	_mali_osk_atomic_term(&gp_tracker_count);
+	_mali_osk_atomic_term(&phy_pp_tracker_count);
+	_mali_osk_atomic_term(&virt_pp_tracker_count);
+}
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+static mali_bool is_waiting_on_timeline(struct mali_timeline_tracker *tracker, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT_POINTER(tracker->timeline);
+	timeline = tracker->timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline->system);
+	system = timeline->system;
+
+	if (MALI_TIMELINE_MAX > id) {
+		if (MALI_TIMELINE_NO_POINT != tracker->fence.points[id]) {
+			return mali_timeline_is_point_on(system->timelines[id], tracker->fence.points[id]);
+		} else {
+			return MALI_FALSE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_NONE == id);
+		return MALI_FALSE;
+	}
+}
+
+static const char *timeline_id_to_string(enum mali_timeline_id id)
+{
+	switch (id) {
+	case MALI_TIMELINE_GP:
+		return "GP";
+	case MALI_TIMELINE_PP:
+		return "PP";
+	case MALI_TIMELINE_SOFT:
+		return "SOFT";
+	default:
+		return "NONE";
+	}
+}
+
+static const char *timeline_tracker_type_to_string(enum mali_timeline_tracker_type type)
+{
+	switch (type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		return "GP";
+	case MALI_TIMELINE_TRACKER_PP:
+		return "PP";
+	case MALI_TIMELINE_TRACKER_SOFT:
+		return "SOFT";
+	case MALI_TIMELINE_TRACKER_WAIT:
+		return "WAIT";
+	case MALI_TIMELINE_TRACKER_SYNC:
+		return "SYNC";
+	default:
+		return "INVALID";
+	}
+}
+
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	timeline = tracker->timeline;
+
+	if (0 != tracker->trigger_ref_count) {
+		return MALI_TIMELINE_TS_WAITING;
+	}
+
+	if (timeline && (timeline->tracker_tail == tracker || NULL != tracker->timeline_prev)) {
+		return MALI_TIMELINE_TS_ACTIVE;
+	}
+
+	if (timeline && (MALI_TIMELINE_NO_POINT == tracker->point)) {
+		return MALI_TIMELINE_TS_INIT;
+	}
+
+	return MALI_TIMELINE_TS_FINISH;
+}
+
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC)
+	if (0 != tracker->trigger_ref_count) {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+				    tracker->fence.sync_fd, tracker->sync_fence, tracker->job);
+	} else {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char,
+				    tracker->fence.sync_fd, tracker->sync_fence, tracker->job);
+	}
+#else
+	if (0 != tracker->trigger_ref_count) {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+				    tracker->job);
+	} else {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char,
+				    tracker->job);
+	}
+#endif
+}
+
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (NULL != tracker) {
+		mali_timeline_debug_print_tracker(tracker, print_ctx);
+		tracker = tracker->timeline_next;
+	}
+}
+
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx)
+{
+	int i;
+	int num_printed = 0;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Print all timelines */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (NULL == timeline->tracker_head) continue;
+
+		_mali_osk_ctxprintf(print_ctx, "TL: Timeline %s:\n",
+				    timeline_id_to_string((enum mali_timeline_id)i));
+
+		mali_timeline_debug_print_timeline(timeline, print_ctx);
+		num_printed++;
+	}
+
+	if (0 == num_printed) {
+		_mali_osk_ctxprintf(print_ctx, "TL: All timelines empty\n");
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
diff --git a/utgard/r5p1/common/mali_timeline.h b/utgard/r5p1/common/mali_timeline.h
new file mode 100755
index 0000000..1ad308f
--- /dev/null
+++ b/utgard/r5p1/common/mali_timeline.h
@@ -0,0 +1,527 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMELINE_H__
+#define __MALI_TIMELINE_H__
+
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+#include "mali_sync.h"
+#include "mali_scheduler_types.h"
+
+/**
+ * Soft job timeout.
+ *
+ * Soft jobs have to be signaled as complete after activation.  Normally this is done by user space,
+ * but in order to guarantee that every soft job is completed, we also have a timer.
+ */
+#define MALI_TIMELINE_TIMEOUT_HZ ((unsigned long) (HZ * 3 / 2)) /* 1500 ms. */
+
+/**
+ * Timeline type.
+ */
+typedef enum mali_timeline_id {
+	MALI_TIMELINE_GP   = MALI_UK_TIMELINE_GP,   /**< GP job timeline. */
+	MALI_TIMELINE_PP   = MALI_UK_TIMELINE_PP,   /**< PP job timeline. */
+	MALI_TIMELINE_SOFT = MALI_UK_TIMELINE_SOFT, /**< Soft job timeline. */
+	MALI_TIMELINE_MAX  = MALI_UK_TIMELINE_MAX
+} mali_timeline_id;
+
+/**
+ * Used by trackers that should not be added to a timeline (@ref mali_timeline_system_add_tracker).
+ */
+#define MALI_TIMELINE_NONE MALI_TIMELINE_MAX
+
+/**
+ * Tracker type.
+ */
+typedef enum mali_timeline_tracker_type {
+	MALI_TIMELINE_TRACKER_GP   = 0, /**< Tracker used by GP jobs. */
+	MALI_TIMELINE_TRACKER_PP   = 1, /**< Tracker used by PP jobs. */
+	MALI_TIMELINE_TRACKER_SOFT = 2, /**< Tracker used by soft jobs. */
+	MALI_TIMELINE_TRACKER_WAIT = 3, /**< Tracker used for fence wait. */
+	MALI_TIMELINE_TRACKER_SYNC = 4, /**< Tracker used for sync fence. */
+	MALI_TIMELINE_TRACKER_MAX  = 5,
+} mali_timeline_tracker_type;
+
+/**
+ * Tracker activation error.
+ */
+typedef u32 mali_timeline_activation_error;
+#define MALI_TIMELINE_ACTIVATION_ERROR_NONE      0
+#define MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT  (1<<1)
+#define MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT (1<<0)
+
+/**
+ * Type used to represent a point on a timeline.
+ */
+typedef u32 mali_timeline_point;
+
+/**
+ * Used to represent that no point on a timeline.
+ */
+#define MALI_TIMELINE_NO_POINT ((mali_timeline_point) 0)
+
+/**
+ * The maximum span of points on a timeline.  A timeline will be considered full if the difference
+ * between the oldest and newest points is equal or larger to this value.
+ */
+#define MALI_TIMELINE_MAX_POINT_SPAN 65536
+
+/**
+ * Magic value used to assert on validity of trackers.
+ */
+#define MALI_TIMELINE_TRACKER_MAGIC 0xabcdabcd
+
+struct mali_timeline;
+struct mali_timeline_waiter;
+struct mali_timeline_tracker;
+
+/**
+ * Timeline fence.
+ */
+struct mali_timeline_fence {
+	mali_timeline_point points[MALI_TIMELINE_MAX]; /**< For each timeline, a point or MALI_TIMELINE_NO_POINT. */
+	s32                 sync_fd;                   /**< A file descriptor representing a sync fence, or -1. */
+};
+
+/**
+ * Timeline system.
+ *
+ * The Timeline system has a set of timelines associated with a session.
+ */
+struct mali_timeline_system {
+	struct mali_spinlock_reentrant *spinlock;   /**< Spin lock protecting the timeline system */
+	struct mali_timeline           *timelines[MALI_TIMELINE_MAX]; /**< The timelines in this system */
+
+	/* Single-linked list of unused waiter objects.  Uses the tracker_next field in tracker. */
+	struct mali_timeline_waiter    *waiter_empty_list;
+
+	struct mali_session_data       *session;    /**< Session that owns this system. */
+
+	mali_bool                       timer_enabled; /**< Set to MALI_TRUE if soft job timer should be enabled, MALI_FALSE if not. */
+
+	_mali_osk_wait_queue_t         *wait_queue; /**< Wait queue. */
+
+#if defined(CONFIG_SYNC)
+	struct sync_timeline           *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */
+#endif /* defined(CONFIG_SYNC) */
+};
+
+/**
+ * Timeline.  Each Timeline system will have MALI_TIMELINE_MAX timelines.
+ */
+struct mali_timeline {
+	mali_timeline_point           point_next;   /**< The next available point. */
+	mali_timeline_point           point_oldest; /**< The oldest point not released. */
+
+	/* Double-linked list of trackers.  Sorted in ascending order by tracker->time_number with
+	 * tail pointing to the tracker with the oldest time. */
+	struct mali_timeline_tracker *tracker_head;
+	struct mali_timeline_tracker *tracker_tail;
+
+	/* Double-linked list of waiters.  Sorted in ascending order by waiter->time_number_wait
+	 * with tail pointing to the waiter with oldest wait time. */
+	struct mali_timeline_waiter  *waiter_head;
+	struct mali_timeline_waiter  *waiter_tail;
+
+	struct mali_timeline_system  *system;       /**< Timeline system this timeline belongs to. */
+	enum mali_timeline_id         id;           /**< Timeline type. */
+
+#if defined(CONFIG_SYNC)
+	struct sync_timeline         *sync_tl;      /**< Sync timeline that corresponds to this timeline. */
+#endif /* defined(CONFIG_SYNC) */
+
+	/* The following fields are used to time out soft job trackers. */
+	_mali_osk_wq_delayed_work_t  *delayed_work;
+	mali_bool                     timer_active;
+};
+
+/**
+ * Timeline waiter.
+ */
+struct mali_timeline_waiter {
+	mali_timeline_point           point;         /**< Point on timeline we are waiting for to be released. */
+	struct mali_timeline_tracker *tracker;       /**< Tracker that is waiting. */
+
+	struct mali_timeline_waiter  *timeline_next; /**< Next waiter on timeline's waiter list. */
+	struct mali_timeline_waiter  *timeline_prev; /**< Previous waiter on timeline's waiter list. */
+
+	struct mali_timeline_waiter  *tracker_next;  /**< Next waiter on tracker's waiter list. */
+};
+
+/**
+ * Timeline tracker.
+ */
+struct mali_timeline_tracker {
+	MALI_DEBUG_CODE(u32            magic); /**< Should always be MALI_TIMELINE_TRACKER_MAGIC for a valid tracker. */
+
+	mali_timeline_point            point; /**< Point on timeline for this tracker */
+
+	struct mali_timeline_tracker  *timeline_next; /**< Next tracker on timeline's tracker list */
+	struct mali_timeline_tracker  *timeline_prev; /**< Previous tracker on timeline's tracker list */
+
+	u32                            trigger_ref_count; /**< When zero tracker will be activated */
+	mali_timeline_activation_error activation_error;  /**< Activation error. */
+	struct mali_timeline_fence     fence;             /**< Fence used to create this tracker */
+
+	/* Single-linked list of waiters.  Sorted in order of insertions with
+	 * tail pointing to first waiter. */
+	struct mali_timeline_waiter   *waiter_head;
+	struct mali_timeline_waiter   *waiter_tail;
+
+#if defined(CONFIG_SYNC)
+	/* These are only used if the tracker is waiting on a sync fence. */
+	struct mali_timeline_waiter   *waiter_sync; /**< A direct pointer to timeline waiter representing sync fence. */
+	struct sync_fence_waiter       sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */
+	struct sync_fence             *sync_fence;   /**< The sync fence this tracker is waiting on. */
+	_mali_osk_list_t               sync_fence_cancel_list; /**< List node used to cancel sync fence waiters. */
+#endif /* defined(CONFIG_SYNC) */
+
+	struct mali_timeline_system   *system;       /**< Timeline system. */
+	struct mali_timeline          *timeline;     /**< Timeline, or NULL if not on a timeline. */
+	enum mali_timeline_tracker_type type;        /**< Type of tracker. */
+	void                          *job;          /**< Owner of tracker. */
+
+	/* The following fields are used to time out soft job trackers. */
+	unsigned long                 os_tick_create;
+	unsigned long                 os_tick_activate;
+	mali_bool                     timer_active;
+};
+
+extern _mali_osk_atomic_t gp_tracker_count;
+extern _mali_osk_atomic_t phy_pp_tracker_count;
+extern _mali_osk_atomic_t virt_pp_tracker_count;
+
+/**
+ * What follows is a set of functions to check the state of a timeline and to determine where on a
+ * timeline a given point is.  Most of these checks will translate the timeline so the oldest point
+ * on the timeline is aligned with zero.  Remember that all of these calculation are done on
+ * unsigned integers.
+ *
+ * The following example illustrates the three different states a point can be in.  The timeline has
+ * been translated to put the oldest point at zero:
+ *
+ *
+ *
+ *                               [ point is in forbidden zone ]
+ *                                          64k wide
+ *                                MALI_TIMELINE_MAX_POINT_SPAN
+ *
+ *    [ point is on timeline     )                            ( point is released ]
+ *
+ *    0--------------------------##############################--------------------2^32 - 1
+ *    ^                          ^
+ *    \                          |
+ *     oldest point on timeline  |
+ *                               \
+ *                                next point on timeline
+ */
+
+/**
+ * Compare two timeline points
+ *
+ * Returns true if a is after b, false if a is before or equal to b.
+ *
+ * This funcion ignores MALI_TIMELINE_MAX_POINT_SPAN. Wrapping is supported and
+ * the result will be correct if the points is less then UINT_MAX/2 apart.
+ *
+ * @param a Point on timeline
+ * @param b Point on timeline
+ * @return MALI_TRUE if a is after b
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_point_after(mali_timeline_point a, mali_timeline_point b)
+{
+	return 0 > ((s32)b) - ((s32)a);
+}
+
+/**
+ * Check if a point is on timeline.  A point is on a timeline if it is greater than, or equal to,
+ * the oldest point, and less than the next point.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is on timeline, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_on(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	return (point - timeline->point_oldest) < (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Check if a point has been released.  A point is released if it is older than the oldest point on
+ * the timeline, newer than the next point, and also not in the forbidden zone.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point has been release, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_released(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	mali_timeline_point point_normalized;
+	mali_timeline_point next_normalized;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	point_normalized = point - timeline->point_oldest;
+	next_normalized = timeline->point_next - timeline->point_oldest;
+
+	return point_normalized > (next_normalized + MALI_TIMELINE_MAX_POINT_SPAN);
+}
+
+/**
+ * Check if a point is valid.  A point is valid if is on the timeline or has been released.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is valid, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_valid(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return mali_timeline_is_point_on(timeline, point) || mali_timeline_is_point_released(timeline, point);
+}
+
+/**
+ * Check if timeline is empty (has no points on it).  A timeline is empty if next == oldest.
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is empty, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_empty(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return timeline->point_next == timeline->point_oldest;
+}
+
+/**
+ * Check if timeline is full.  A valid timeline cannot span more than 64k points (@ref
+ * MALI_TIMELINE_MAX_POINT_SPAN).
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is full, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_full(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return MALI_TIMELINE_MAX_POINT_SPAN <= (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Create a new timeline system.
+ *
+ * @param session The session this timeline system will belong to.
+ * @return New timeline system.
+ */
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session);
+
+/**
+ * Abort timeline system.
+ *
+ * This will release all pending waiters in the timeline system causing all trackers to be
+ * activated.
+ *
+ * @param system Timeline system to abort all jobs from.
+ */
+void mali_timeline_system_abort(struct mali_timeline_system *system);
+
+/**
+ * Destroy an empty timeline system.
+ *
+ * @note @ref mali_timeline_system_abort() should be called prior to this function.
+ *
+ * @param system Timeline system to destroy.
+ */
+void mali_timeline_system_destroy(struct mali_timeline_system *system);
+
+/**
+ * Stop the soft job timer.
+ *
+ * @param system Timeline system
+ */
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system);
+
+/**
+ * Add a tracker to a timeline system and optionally also on a timeline.
+ *
+ * Once added to the timeline system, the tracker is guaranteed to be activated.  The tracker can be
+ * activated before this function returns.  Thus, it is also possible that the tracker is released
+ * before this function returns, depending on the tracker type.
+ *
+ * @note Tracker must be initialized (@ref mali_timeline_tracker_init) before being added to the
+ * timeline system.
+ *
+ * @param system Timeline system the tracker will be added to.
+ * @param tracker The tracker to be added.
+ * @param timeline_id Id of the timeline the tracker will be added to, or
+ *                    MALI_TIMELINE_NONE if it should not be added on a timeline.
+ * @return Point on timeline identifying this tracker, or MALI_TIMELINE_NO_POINT if not on timeline.
+ */
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Get latest point on timeline.
+ *
+ * @param system Timeline system.
+ * @param timeline_id Id of timeline to get latest point from.
+ * @return Latest point on timeline, or MALI_TIMELINE_NO_POINT if the timeline is empty.
+ */
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Initialize tracker.
+ *
+ * Must be called before tracker is added to timeline system (@ref mali_timeline_system_add_tracker).
+ *
+ * @param tracker Tracker to initialize.
+ * @param type Type of tracker.
+ * @param fence Fence used to set up dependencies for tracker.
+ * @param job Pointer to job struct this tracker is associated with.
+ */
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job);
+
+/**
+ * Grab trigger ref count on tracker.
+ *
+ * This will prevent tracker from being activated until the trigger ref count reaches zero.
+ *
+ * @note Tracker must have been initialized (@ref mali_timeline_tracker_init).
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ */
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker);
+
+/**
+ * Release trigger ref count on tracker.
+ *
+ * If the trigger ref count reaches zero, the tracker will be activated.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ * @param activation_error Error bitmask if activated with error, or MALI_TIMELINE_ACTIVATION_ERROR_NONE if no error.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error);
+
+/**
+ * Release a tracker from the timeline system.
+ *
+ * This is used to signal that the job being tracker is finished, either due to normal circumstances
+ * (job complete/abort) or due to a timeout.
+ *
+ * We may need to schedule some subsystems after a tracker has been released and the returned
+ * bitmask will tell us if it is necessary.  If the return value is non-zero, this value needs to be
+ * sent as an input parameter to @ref mali_scheduler_schedule_from_mask() to do the scheduling.
+ *
+ * @note Tracker must have been activated before being released.
+ * @warning Not calling @ref mali_scheduler_schedule_from_mask() after releasing a tracker can lead
+ * to a deadlock.
+ *
+ * @param tracker Tracker being released.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_tracker_activation_error(
+	struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	return (MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT &
+		tracker->activation_error) ? MALI_TRUE : MALI_FALSE;
+}
+
+/**
+ * Copy data from a UK fence to a Timeline fence.
+ *
+ * @param fence Timeline fence.
+ * @param uk_fence UK fence.
+ */
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence);
+
+void mali_timeline_initialize(void);
+
+void mali_timeline_terminate(void);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_gp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&gp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_physical_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&phy_pp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_virtual_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&virt_pp_tracker_count);
+}
+
+#if defined(DEBUG)
+#define MALI_TIMELINE_DEBUG_FUNCTIONS
+#endif /* DEBUG */
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+/**
+ * Tracker state.  Used for debug printing.
+ */
+typedef enum mali_timeline_tracker_state {
+	MALI_TIMELINE_TS_INIT    = 0,
+	MALI_TIMELINE_TS_WAITING = 1,
+	MALI_TIMELINE_TS_ACTIVE  = 2,
+	MALI_TIMELINE_TS_FINISH  = 3,
+} mali_timeline_tracker_state;
+
+/**
+ * Get tracker state.
+ *
+ * @param tracker Tracker to check.
+ * @return State of tracker.
+ */
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker);
+
+/**
+ * Print debug information about tracker.
+ *
+ * @param tracker Tracker to print.
+ */
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx);
+
+/**
+ * Print debug information about timeline.
+ *
+ * @param timeline Timeline to print.
+ */
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx);
+
+/**
+ * Print debug information about timeline system.
+ *
+ * @param system Timeline system to print.
+ */
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx);
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
+
+#endif /* __MALI_TIMELINE_H__ */
diff --git a/utgard/r5p1/common/mali_timeline_fence_wait.c b/utgard/r5p1/common/mali_timeline_fence_wait.c
new file mode 100755
index 0000000..3c58928
--- /dev/null
+++ b/utgard/r5p1/common/mali_timeline_fence_wait.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline_fence_wait.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+
+/**
+ * Allocate a fence waiter tracker.
+ *
+ * @return New fence waiter if successful, NULL if not.
+ */
+static struct mali_timeline_fence_wait_tracker *mali_timeline_fence_wait_tracker_alloc(void)
+{
+	return (struct mali_timeline_fence_wait_tracker *) _mali_osk_calloc(1, sizeof(struct mali_timeline_fence_wait_tracker));
+}
+
+/**
+ * Free fence waiter tracker.
+ *
+ * @param wait Fence wait tracker to free.
+ */
+static void mali_timeline_fence_wait_tracker_free(struct mali_timeline_fence_wait_tracker *wait)
+{
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	_mali_osk_atomic_term(&wait->refcount);
+	_mali_osk_free(wait);
+}
+
+/**
+ * Check if fence wait tracker has been activated.  Used as a wait queue condition.
+ *
+ * @param data Fence waiter.
+ * @return MALI_TRUE if tracker has been activated, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_tracker_is_activated(void *data)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+
+	wait = (struct mali_timeline_fence_wait_tracker *) data;
+	MALI_DEBUG_ASSERT_POINTER(wait);
+
+	return wait->activated;
+}
+
+/**
+ * Check if fence has been signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Timeline fence.
+ * @return MALI_TRUE if fence is signaled, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_check_status(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool ret = MALI_TRUE;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *sync_fence = NULL;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		mali_timeline_point   point;
+
+		point = fence->points[i];
+
+		if (likely(MALI_TIMELINE_NO_POINT == point)) {
+			/* Fence contains no point on this timeline. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n", point, timeline->point_oldest, timeline->point_next));
+		}
+
+		if (!mali_timeline_is_point_released(timeline, point)) {
+			ret = MALI_FALSE;
+			goto exit;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	if (-1 != fence->sync_fd) {
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+		if (likely(NULL != sync_fence)) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+			if (0 == sync_fence->status) {
+#else
+			if (0 == atomic_read(&sync_fence->status)) {
+#endif
+				ret = MALI_FALSE;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", fence->sync_fd));
+		}
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+exit:
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC)
+	if (NULL != sync_fence) {
+		sync_fence_put(sync_fence);
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	return ret;
+}
+
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+	mali_timeline_point point;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: wait on fence\n"));
+
+	if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY == timeout) {
+		return mali_timeline_fence_wait_check_status(system, fence);
+	}
+
+	wait = mali_timeline_fence_wait_tracker_alloc();
+	if (unlikely(NULL == wait)) {
+		MALI_PRINT_ERROR(("Mali Timeline: failed to allocate data for fence wait\n"));
+		return MALI_FALSE;
+	}
+
+	wait->activated = MALI_FALSE;
+	wait->system = system;
+
+	/* Initialize refcount to two references.  The reference first will be released by this
+	 * function after the wait is over.  The second reference will be released when the tracker
+	 * is activated. */
+	_mali_osk_atomic_init(&wait->refcount, 2);
+
+	/* Add tracker to timeline system, but not to a timeline. */
+	mali_timeline_tracker_init(&wait->tracker, MALI_TIMELINE_TRACKER_WAIT, fence, wait);
+	point = mali_timeline_system_add_tracker(system, &wait->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point);
+	MALI_IGNORE(point);
+
+	/* Wait for the tracker to be activated or time out. */
+	if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER == timeout) {
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait);
+	} else {
+		_mali_osk_wait_queue_wait_event_timeout(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait, timeout);
+	}
+
+	ret = wait->activated;
+
+	if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+
+	return ret;
+}
+
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *wait)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	MALI_DEBUG_ASSERT_POINTER(wait->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for fence wait tracker\n"));
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == wait->activated);
+	wait->activated = MALI_TRUE;
+
+	_mali_osk_wait_queue_wake_up(wait->system->wait_queue);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&wait->tracker);
+	MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask);
+	MALI_IGNORE(schedule_mask);
+
+	if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+}
diff --git a/utgard/r5p1/common/mali_timeline_fence_wait.h b/utgard/r5p1/common/mali_timeline_fence_wait.h
new file mode 100755
index 0000000..f5440ab
--- /dev/null
+++ b/utgard/r5p1/common/mali_timeline_fence_wait.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_fence_wait.h
+ *
+ * This file contains functions used to wait until a Timeline fence is signaled.
+ */
+
+#ifndef __MALI_TIMELINE_FENCE_WAIT_H__
+#define __MALI_TIMELINE_FENCE_WAIT_H__
+
+#include "mali_osk.h"
+#include "mali_timeline.h"
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, a timer is not used and the
+ * function only returns when the fence is signaled.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER ((u32) -1)
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, the function will return
+ * immediately with the current state of the fence.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY 0
+
+/**
+ * Fence wait tracker.
+ *
+ * The fence wait tracker is added to the Timeline system with the fence we are waiting on as a
+ * dependency.  We will then perform a blocking wait, possibly with a timeout, until the tracker is
+ * activated, which happens when the fence is signaled.
+ */
+struct mali_timeline_fence_wait_tracker {
+	mali_bool activated;                  /**< MALI_TRUE if the tracker has been activated, MALI_FALSE if not. */
+	_mali_osk_atomic_t refcount;          /**< Reference count. */
+	struct mali_timeline_system *system;  /**< Timeline system. */
+	struct mali_timeline_tracker tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Wait for a fence to be signaled, or timeout is reached.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to wait on.
+ * @param timeout Timeout in ms, or MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER or
+ * MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY.
+ * @return MALI_TRUE if signaled, MALI_FALSE if timed out.
+ */
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout);
+
+/**
+ * Used by the Timeline system to activate a fence wait tracker.
+ *
+ * @param fence_wait_tracker Fence waiter tracker.
+ */
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *fence_wait_tracker);
+
+#endif /* __MALI_TIMELINE_FENCE_WAIT_H__ */
diff --git a/utgard/r5p1/common/mali_timeline_sync_fence.c b/utgard/r5p1/common/mali_timeline_sync_fence.c
new file mode 100755
index 0000000..73843f0
--- /dev/null
+++ b/utgard/r5p1/common/mali_timeline_sync_fence.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline_sync_fence.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_sync.h"
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Creates a sync fence tracker and a sync fence.  Adds sync fence tracker to Timeline system and
+ * returns sync fence.  The sync fence will be signaled when the sync fence tracker is activated.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return Sync fence that will be signaled when tracker is activated.
+ */
+static struct sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	struct mali_timeline_sync_fence_tracker *sync_fence_tracker;
+	struct sync_fence                       *sync_fence;
+	struct mali_timeline_fence               fence;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	/* Allocate sync fence tracker. */
+	sync_fence_tracker = _mali_osk_calloc(1, sizeof(struct mali_timeline_sync_fence_tracker));
+	if (NULL == sync_fence_tracker) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence_tracker allocation failed\n"));
+		return NULL;
+	}
+
+	/* Create sync flag. */
+	MALI_DEBUG_ASSERT_POINTER(timeline->sync_tl);
+	sync_fence_tracker->flag = mali_sync_flag_create(timeline->sync_tl, point);
+	if (NULL == sync_fence_tracker->flag) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_flag creation failed\n"));
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Create sync fence from sync flag. */
+	sync_fence = mali_sync_flag_create_fence(sync_fence_tracker->flag);
+	if (NULL == sync_fence) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence creation failed\n"));
+		mali_sync_flag_put(sync_fence_tracker->flag);
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Setup fence for tracker. */
+	_mali_osk_memset(&fence, 0, sizeof(struct mali_timeline_fence));
+	fence.sync_fd = -1;
+	fence.points[timeline->id] = point;
+
+	/* Finally, add the tracker to Timeline system. */
+	mali_timeline_tracker_init(&sync_fence_tracker->tracker, MALI_TIMELINE_TRACKER_SYNC, &fence, sync_fence_tracker);
+	point = mali_timeline_system_add_tracker(timeline->system, &sync_fence_tracker->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point);
+
+	return sync_fence;
+}
+
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	u32 i;
+	struct sync_fence *sync_fence_acc = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		struct sync_fence *sync_fence;
+
+		if (MALI_TIMELINE_NO_POINT == fence->points[i]) continue;
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		sync_fence = mali_timeline_sync_fence_create_and_add_tracker(timeline, fence->points[i]);
+		if (NULL == sync_fence) goto error;
+
+		if (NULL != sync_fence_acc) {
+			/* Merge sync fences. */
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (NULL == sync_fence_acc) goto error;
+		} else {
+			/* This was the first sync fence created. */
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (-1 != fence->sync_fd) {
+		struct sync_fence *sync_fence;
+
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+		if (NULL == sync_fence) goto error;
+
+		if (NULL != sync_fence_acc) {
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (NULL == sync_fence_acc) goto error;
+		} else {
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (NULL == sync_fence_acc) {
+		MALI_DEBUG_ASSERT_POINTER(system->signaled_sync_tl);
+
+		/* There was nothing to wait on, so return an already signaled fence. */
+
+		sync_fence_acc = mali_sync_timeline_create_signaled_fence(system->signaled_sync_tl);
+		if (NULL == sync_fence_acc) goto error;
+	}
+
+	/* Return file descriptor for the accumulated sync fence. */
+	return mali_sync_fence_fd_alloc(sync_fence_acc);
+
+error:
+	if (NULL != sync_fence_acc) {
+		sync_fence_put(sync_fence_acc);
+	}
+
+	return -1;
+}
+
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker->flag);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for sync fence tracker\n"));
+
+	/* Signal flag and release reference. */
+	mali_sync_flag_signal(sync_fence_tracker->flag, 0);
+	mali_sync_flag_put(sync_fence_tracker->flag);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&sync_fence_tracker->tracker);
+	MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask);
+
+	_mali_osk_free(sync_fence_tracker);
+}
+
+#endif /* defined(CONFIG_SYNC) */
diff --git a/utgard/r5p1/common/mali_timeline_sync_fence.h b/utgard/r5p1/common/mali_timeline_sync_fence.h
new file mode 100755
index 0000000..29a3822
--- /dev/null
+++ b/utgard/r5p1/common/mali_timeline_sync_fence.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_sync_fence.h
+ *
+ * This file contains code related to creating sync fences from timeline fences.
+ */
+
+#ifndef __MALI_TIMELINE_SYNC_FENCE_H__
+#define __MALI_TIMELINE_SYNC_FENCE_H__
+
+#include "mali_timeline.h"
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Sync fence tracker.
+ */
+struct mali_timeline_sync_fence_tracker {
+	struct mali_sync_flag        *flag;    /**< Sync flag used to connect tracker and sync fence. */
+	struct mali_timeline_tracker  tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Create a sync fence that will be signaled when @ref fence is signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to create sync fence from.
+ * @return File descriptor for new sync fence, or -1 on error.
+ */
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence);
+
+/**
+ * Used by the Timeline system to activate a sync fence tracker.
+ *
+ * @param sync_fence_tracker Sync fence tracker.
+ *
+ */
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker);
+
+#endif /* defined(CONFIG_SYNC) */
+
+#endif /* __MALI_TIMELINE_SYNC_FENCE_H__ */
diff --git a/utgard/r5p1/common/mali_ukk.h b/utgard/r5p1/common/mali_ukk.h
new file mode 100755
index 0000000..fdfce57
--- /dev/null
+++ b/utgard/r5p1/common/mali_ukk.h
@@ -0,0 +1,541 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk.h
+ * Defines the kernel-side interface of the user-kernel interface
+ */
+
+#ifndef __MALI_UKK_H__
+#define __MALI_UKK_H__
+
+#include "mali_osk.h"
+#include "mali_uk_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * - The _mali_uk functions are an abstraction of the interface to the device
+ * driver. On certain OSs, this would be implemented via the IOCTL interface.
+ * On other OSs, it could be via extension of some Device Driver Class, or
+ * direct function call for Bare metal/RTOSs.
+ * - It is important to note that:
+ *   -  The Device Driver has implemented the _mali_ukk set of functions
+ *   -  The Base Driver calls the corresponding set of _mali_uku functions.
+ * - What requires porting is solely the calling mechanism from User-side to
+ * Kernel-side, and propagating back the results.
+ * - Each U/K function is associated with a (group, number) pair from
+ * \ref _mali_uk_functions to make it possible for a common function in the
+ * Base Driver and Device Driver to route User/Kernel calls from/to the
+ * correct _mali_uk function. For example, in an IOCTL system, the IOCTL number
+ * would be formed based on the group and number assigned to the _mali_uk
+ * function, as listed in \ref _mali_uk_functions. On the user-side, each
+ * _mali_uku function would just make an IOCTL with the IOCTL-code being an
+ * encoded form of the (group, number) pair. On the kernel-side, the Device
+ * Driver's IOCTL handler decodes the IOCTL-code back into a (group, number)
+ * pair, and uses this to determine which corresponding _mali_ukk should be
+ * called.
+ *   - Refer to \ref _mali_uk_functions for more information about this
+ * (group, number) pairing.
+ * - In a system where there is no distinction between user and kernel-side,
+ * the U/K interface may be implemented as:@code
+ * MALI_STATIC_INLINE _mali_osk_errcode_t _mali_uku_examplefunction( _mali_uk_examplefunction_s *args )
+ * {
+ *     return mali_ukk_examplefunction( args );
+ * }
+ * @endcode
+ * - Therefore, all U/K calls behave \em as \em though they were direct
+ * function calls (but the \b implementation \em need \em not be a direct
+ * function calls)
+ *
+ * @note Naming the _mali_uk functions the same on both User and Kernel sides
+ * on non-RTOS systems causes debugging issues when setting breakpoints. In
+ * this case, it is not clear which function the breakpoint is put on.
+ * Therefore the _mali_uk functions in user space are prefixed with \c _mali_uku
+ * and in kernel space with \c _mali_ukk. The naming for the argument
+ * structures is unaffected.
+ *
+ * - The _mali_uk functions are synchronous.
+ * - Arguments to the _mali_uk functions are passed in a structure. The only
+ * parameter passed to the _mali_uk functions is a pointer to this structure.
+ * This first member of this structure, ctx, is a pointer to a context returned
+ * by _mali_uku_open(). For example:@code
+ * typedef struct
+ * {
+ *     void *ctx;
+ *     u32 number_of_cores;
+ * } _mali_uk_get_gp_number_of_cores_s;
+ * @endcode
+ *
+ * - Each _mali_uk function has its own argument structure named after the
+ *  function. The argument is distinguished by the _s suffix.
+ * - The argument types are defined by the base driver and user-kernel
+ *  interface.
+ * - All _mali_uk functions return a standard \ref _mali_osk_errcode_t.
+ * - Only arguments of type input or input/output need be initialized before
+ * calling a _mali_uk function.
+ * - Arguments of type output and input/output are only valid when the
+ * _mali_uk function returns \ref _MALI_OSK_ERR_OK.
+ * - The \c ctx member is always invalid after it has been used by a
+ * _mali_uk function, except for the context management functions
+ *
+ *
+ * \b Interface \b restrictions
+ *
+ * The requirements of the interface mean that an implementation of the
+ * User-kernel interface may do no 'real' work. For example, the following are
+ * illegal in the User-kernel implementation:
+ * - Calling functions necessary for operation on all systems,  which would
+ * not otherwise get called on RTOS systems.
+ *     - For example, a  U/K interface that calls multiple _mali_ukk functions
+ * during one particular U/K call. This could not be achieved by the same code
+ * which uses direct function calls for the U/K interface.
+ * -  Writing in values to the args members, when otherwise these members would
+ * not hold a useful value for a direct function call U/K interface.
+ *     - For example, U/K interface implementation that take NULL members in
+ * their arguments structure from the user side, but those members are
+ * replaced with non-NULL values in the kernel-side of the U/K interface
+ * implementation. A scratch area for writing data is one such example. In this
+ * case, a direct function call U/K interface would segfault, because no code
+ * would be present to replace the NULL pointer with a meaningful pointer.
+ *     - Note that we discourage the case where the U/K implementation changes
+ * a NULL argument member to non-NULL, and then the Device Driver code (outside
+ * of the U/K layer) re-checks this member for NULL, and corrects it when
+ * necessary. Whilst such code works even on direct function call U/K
+ * intefaces, it reduces the testing coverage of the Device Driver code. This
+ * is because we have no way of testing the NULL == value path on an OS
+ * implementation.
+ *
+ * A number of allowable examples exist where U/K interfaces do 'real' work:
+ * - The 'pointer switching' technique for \ref _mali_ukk_get_system_info
+ *     - In this case, without the pointer switching on direct function call
+ * U/K interface, the Device Driver code still sees the same thing: a pointer
+ * to which it can write memory. This is because such a system has no
+ * distinction between a user and kernel pointer.
+ * - Writing an OS-specific value into the ukk_private member for
+ * _mali_ukk_mem_mmap().
+ *     - In this case, this value is passed around by Device Driver code, but
+ * its actual value is never checked. Device Driver code simply passes it from
+ * the U/K layer to the OSK layer, where it can be acted upon. In this case,
+ * \em some OS implementations of the U/K (_mali_ukk_mem_mmap()) and OSK
+ * (_mali_osk_mem_mapregion_init()) functions will collaborate on the
+ *  meaning of ukk_private member. On other OSs, it may be unused by both
+ * U/K and OSK layers
+ *     - Therefore, on error inside the U/K interface implementation itself,
+ * it will be as though the _mali_ukk function itself had failed, and cleaned
+ * up after itself.
+ *     - Compare this to a direct function call U/K implementation, where all
+ * error cleanup is handled by the _mali_ukk function itself. The direct
+ * function call U/K interface implementation is automatically atomic.
+ *
+ * The last example highlights a consequence of all U/K interface
+ * implementations: they must be atomic with respect to the Device Driver code.
+ * And therefore, should Device Driver code succeed but the U/K implementation
+ * fail afterwards (but before return to user-space), then the U/K
+ * implementation must cause appropriate cleanup actions to preserve the
+ * atomicity of the interface.
+ *
+ * @{
+ */
+
+
+/** @defgroup _mali_uk_context U/K Context management
+ *
+ * These functions allow for initialisation of the user-kernel interface once per process.
+ *
+ * Generally the context will store the OS specific object to communicate with the kernel device driver and further
+ * state information required by the specific implementation. The context is shareable among all threads in the caller process.
+ *
+ * On IOCTL systems, this is likely to be a file descriptor as a result of opening the kernel device driver.
+ *
+ * On a bare-metal/RTOS system with no distinction between kernel and
+ * user-space, the U/K interface simply calls the _mali_ukk variant of the
+ * function by direct function call. In this case, the context returned is the
+ * mali_session_data from _mali_ukk_open().
+ *
+ * The kernel side implementations of the U/K interface expect the first member of the argument structure to
+ * be the context created by _mali_uku_open(). On some OS implementations, the meaning of this context
+ * will be different between user-side and kernel-side. In which case, the kernel-side will need to replace this context
+ * with the kernel-side equivalent, because user-side will not have access to kernel-side data. The context parameter
+ * in the argument structure therefore has to be of type input/output.
+ *
+ * It should be noted that the caller cannot reuse the \c ctx member of U/K
+ * argument structure after a U/K call, because it may be overwritten. Instead,
+ * the context handle must always be stored  elsewhere, and copied into
+ * the appropriate U/K argument structure for each user-side call to
+ * the U/K interface. This is not usually a problem, since U/K argument
+ * structures are usually placed on the stack.
+ *
+ * @{ */
+
+/** @brief Begin a new Mali Device Driver session
+ *
+ * This is used to obtain a per-process context handle for all future U/K calls.
+ *
+ * @param context pointer to storage to return a (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_open(void **context);
+
+/** @brief End a Mali Device Driver session
+ *
+ * This should be called when the process no longer requires use of the Mali Device Driver.
+ *
+ * The context handle must not be used after it has been closed.
+ *
+ * @param context pointer to a stored (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_close(void **context);
+
+/** @} */ /* end group _mali_uk_context */
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ *
+ * The core functions provide the following functionality:
+ * - verify that the user and kernel API are compatible
+ * - retrieve information about the cores and memory banks in the system
+ * - wait for the result of jobs started on a core
+ *
+ * @{ */
+
+/** @brief Waits for a job notification.
+ *
+ * Sleeps until notified or a timeout occurs. Returns information about the notification.
+ *
+ * @param args see _mali_uk_wait_for_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args);
+
+/** @brief Post a notification to the notification queue of this application.
+ *
+ * @param args see _mali_uk_post_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * This function is obsolete, but kept to allow old, incompatible user space
+ * clients to robustly detect the incompatibility.
+ *
+ * @param args see _mali_uk_get_api_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * @param args see _mali_uk_get_api_version_v2_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args);
+
+/** @brief Get the user space settings applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_settings_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args);
+
+/** @brief Get a user space setting applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_setting_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args);
+
+/* @brief Grant or deny high priority scheduling for this session.
+ *
+ * @param args see _mali_uk_request_high_priority_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args);
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ *
+ * The memory functions provide functionality with and without a Mali-MMU present.
+ *
+ * For Mali-MMU based systems, the following functionality is provided:
+ * - Initialize and terminate MALI virtual address space
+ * - Allocate/deallocate physical memory to a MALI virtual address range and map into/unmap from the
+ * current process address space
+ * - Map/unmap external physical memory into the MALI virtual address range
+ *
+ * For Mali-nonMMU based systems:
+ * - Allocate/deallocate MALI memory
+ *
+ * @{ */
+
+/** @brief Map Mali Memory into the current user process
+ *
+ * Maps Mali memory into the current user process in a generic way.
+ *
+ * This function is to be used for Mali-MMU mode. The function is available in both Mali-MMU and Mali-nonMMU modes,
+ * but should not be called by a user process in Mali-nonMMU mode.
+ *
+ * The implementation and operation of _mali_ukk_mem_mmap() is dependant on whether the driver is built for Mali-MMU
+ * or Mali-nonMMU:
+ * - In the nonMMU case, _mali_ukk_mem_mmap() requires a physical address to be specified. For this reason, an OS U/K
+ * implementation should not allow this to be called from user-space. In any case, nonMMU implementations are
+ * inherently insecure, and so the overall impact is minimal. Mali-MMU mode should be used if security is desired.
+ * - In the MMU case, _mali_ukk_mem_mmap() the _mali_uk_mem_mmap_s::phys_addr
+ * member is used for the \em Mali-virtual address desired for the mapping. The
+ * implementation of _mali_ukk_mem_mmap() will allocate both the CPU-virtual
+ * and CPU-physical addresses, and can cope with mapping a contiguous virtual
+ * address range to a sequence of non-contiguous physical pages. In this case,
+ * the CPU-physical addresses are not communicated back to the user-side, as
+ * they are unnecsessary; the \em Mali-virtual address range must be used for
+ * programming Mali structures.
+ *
+ * In the second (MMU) case, _mali_ukk_mem_mmap() handles management of
+ * CPU-virtual and CPU-physical ranges, but the \em caller must manage the
+ * \em Mali-virtual address range from the user-side.
+ *
+ * @note Mali-virtual address ranges are entirely separate between processes.
+ * It is not possible for a process to accidentally corrupt another process'
+ * \em Mali-virtual address space.
+ *
+ * @param args see _mali_uk_mem_mmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_mmap(_mali_uk_mem_mmap_s *args);
+
+/** @brief Unmap Mali Memory from the current user process
+ *
+ * Unmaps Mali memory from the current user process in a generic way. This only operates on Mali memory supplied
+ * from _mali_ukk_mem_mmap().
+ *
+ * @param args see _mali_uk_mem_munmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_munmap(_mali_uk_mem_munmap_s *args);
+
+/** @brief Determine the buffer size necessary for an MMU page table dump.
+ * @param args see _mali_uk_query_mmu_page_table_dump_size_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args);
+/** @brief Dump MMU Page tables.
+ * @param args see _mali_uk_dump_mmu_page_table_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args);
+
+/** @brief Write user data to specified Mali memory without causing segfaults.
+ * @param args see _mali_uk_mem_write_safe_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args);
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ *
+ * The Fragment Processor (aka PP (Pixel Processor)) functions provide the following functionality:
+ * - retrieving version of the fragment processors
+ * - determine number of fragment processors
+ * - starting a job on a fragment processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Fragment Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started instead and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx, _mali_uk_pp_start_job_s *uargs);
+
+/**
+ * @brief Issue a request to start new jobs on both Vertex Processor and Fragment Processor.
+ *
+ * @note Will call into @ref _mali_ukk_pp_start_job and @ref _mali_ukk_gp_start_job.
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_and_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx, _mali_uk_pp_and_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Fragment Processors in the system
+ *
+ * @param args see _mali_uk_get_pp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Fragment Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_ukk_get_pp_number_of_cores() indicated at least one Fragment
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args);
+
+/** @brief Disable Write-back unit(s) on specified job
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ */
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args);
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ *
+ * The Vertex Processor (aka GP (Geometry Processor)) functions provide the following functionality:
+ * - retrieving version of the Vertex Processors
+ * - determine number of Vertex Processors available
+ * - starting a job on a Vertex Processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Vertex Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx, _mali_uk_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Vertex Processors in the system.
+ *
+ * @param args see _mali_uk_get_gp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Vertex Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_uk_get_gp_number_of_cores() indicated at least one Vertex
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_gp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args);
+
+/** @brief Resume or abort suspended Vertex Processor jobs.
+ *
+ * After receiving notification that a Vertex Processor job was suspended from
+ * _mali_ukk_wait_for_notification() you can use this function to resume or abort the job.
+ *
+ * @param args see _mali_uk_gp_suspend_response_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args);
+
+/** @} */ /* end group _mali_uk_gp */
+
+#if defined(CONFIG_MALI400_PROFILING)
+/** @addtogroup _mali_uk_profiling U/K Timeline profiling module
+ * @{ */
+
+/** @brief Add event to profiling buffer.
+ *
+ * @param args see _mali_uk_profiling_add_event_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args);
+
+/** @brief Return the total memory usage
+ *
+ * @param args see _mali_uk_profiling_memory_usage_get_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_memory_usage_get(_mali_uk_profiling_memory_usage_get_s *args);
+
+/** @} */ /* end group _mali_uk_profiling */
+#endif
+
+/** @addtogroup _mali_uk_vsync U/K VSYNC reporting module
+ * @{ */
+
+/** @brief Report events related to vsync.
+ *
+ * @note Events should be reported when starting to wait for vsync and when the
+ * waiting is finished. This information can then be used in kernel space to
+ * complement the GPU utilization metric.
+ *
+ * @param args see _mali_uk_vsync_event_report_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args);
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @addtogroup _mali_sw_counters_report U/K Software counter reporting
+ * @{ */
+
+/** @brief Report software counters.
+ *
+ * @param args see _mali_uk_sw_counters_report_s in "mali_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args);
+
+/** @} */ /* end group _mali_sw_counters_report */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+u32 _mali_ukk_report_memory_usage(void);
+
+u32 _mali_ukk_report_total_memory_size(void);
+
+u32 _mali_ukk_utilization_gp_pp(void);
+
+u32 _mali_ukk_utilization_gp(void);
+
+u32 _mali_ukk_utilization_pp(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_H__ */
diff --git a/utgard/r5p1/common/mali_user_settings_db.c b/utgard/r5p1/common/mali_user_settings_db.c
new file mode 100755
index 0000000..54e1580
--- /dev/null
+++ b/utgard/r5p1/common/mali_user_settings_db.c
@@ -0,0 +1,147 @@
+/**
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_user_settings_db.h"
+#include "mali_session.h"
+
+static u32 mali_user_settings[_MALI_UK_USER_SETTING_MAX];
+const char *_mali_uk_user_setting_descriptions[] = _MALI_UK_USER_SETTING_DESCRIPTIONS;
+
+static void mali_user_settings_notify(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool done = MALI_FALSE;
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (0 == num_sessions_alloc) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (NULL == notobjs) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_SETTINGS_CHANGED,
+					sizeof(_mali_uk_settings_changed_s));
+			if (NULL != notobjs[i]) {
+				_mali_uk_settings_changed_s *data;
+				data = notobjs[i]->result_buffer;
+
+				data->setting = setting;
+				data->value = value;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about setting change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (NULL != notobjs[used_notification_objects]) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (NULL != notobjs[used_notification_objects]) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool notify = MALI_FALSE;
+
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid user setting %ud\n"));
+		return;
+	}
+
+	if (mali_user_settings[setting] != value) {
+		notify = MALI_TRUE;
+	}
+
+	mali_user_settings[setting] = value;
+
+	if (notify) {
+		mali_user_settings_notify(setting, value);
+	}
+}
+
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting)
+{
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		return 0;
+	}
+
+	return mali_user_settings[setting];
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args)
+{
+	_mali_uk_user_setting_t setting;
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	setting = args->setting;
+
+	if (_MALI_UK_USER_SETTING_MAX > setting) {
+		args->value = mali_user_settings[setting];
+		return _MALI_OSK_ERR_OK;
+	} else {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	_mali_osk_memcpy(args->settings, mali_user_settings, sizeof(mali_user_settings));
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r5p1/common/mali_user_settings_db.h b/utgard/r5p1/common/mali_user_settings_db.h
new file mode 100755
index 0000000..0732c3e
--- /dev/null
+++ b/utgard/r5p1/common/mali_user_settings_db.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) 2012-2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_USER_SETTINGS_DB_H__
+#define __MALI_USER_SETTINGS_DB_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+
+/** @brief Set Mali user setting in DB
+ *
+ * Update the DB with a new value for \a setting. If the value is different from theprevious set value running sessions will be notified of the change.
+ *
+ * @param setting the setting to be changed
+ * @param value the new value to set
+ */
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value);
+
+/** @brief Get current Mali user setting value from DB
+ *
+ * @param setting the setting to extract
+ * @return the value of the selected setting
+ */
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* __MALI_KERNEL_USER_SETTING__ */
diff --git a/utgard/r5p1/include/linux/mali/mali_utgard.h b/utgard/r5p1/include/linux/mali/mali_utgard.h
new file mode 100755
index 0000000..a3f7ac4
--- /dev/null
+++ b/utgard/r5p1/include/linux/mali/mali_utgard.h
@@ -0,0 +1,447 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_utgard.h
+ * Defines types and interface exposed by the Mali Utgard device driver
+ */
+
+#ifndef __MALI_UTGARD_H__
+#define __MALI_UTGARD_H__
+
+#include "mali_osk_types.h"
+
+#define MALI_GPU_NAME_UTGARD "mali-utgard"
+
+
+#define MALI_OFFSET_GP                    0x00000
+#define MALI_OFFSET_GP_MMU                0x03000
+
+#define MALI_OFFSET_PP0                   0x08000
+#define MALI_OFFSET_PP0_MMU               0x04000
+#define MALI_OFFSET_PP1                   0x0A000
+#define MALI_OFFSET_PP1_MMU               0x05000
+#define MALI_OFFSET_PP2                   0x0C000
+#define MALI_OFFSET_PP2_MMU               0x06000
+#define MALI_OFFSET_PP3                   0x0E000
+#define MALI_OFFSET_PP3_MMU               0x07000
+
+#define MALI_OFFSET_PP4                   0x28000
+#define MALI_OFFSET_PP4_MMU               0x1C000
+#define MALI_OFFSET_PP5                   0x2A000
+#define MALI_OFFSET_PP5_MMU               0x1D000
+#define MALI_OFFSET_PP6                   0x2C000
+#define MALI_OFFSET_PP6_MMU               0x1E000
+#define MALI_OFFSET_PP7                   0x2E000
+#define MALI_OFFSET_PP7_MMU               0x1F000
+
+#define MALI_OFFSET_L2_RESOURCE0          0x01000
+#define MALI_OFFSET_L2_RESOURCE1          0x10000
+#define MALI_OFFSET_L2_RESOURCE2          0x11000
+
+#define MALI400_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE1
+#define MALI450_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE2          MALI_OFFSET_L2_RESOURCE2
+
+#define MALI_OFFSET_BCAST                 0x13000
+#define MALI_OFFSET_DLBU                  0x14000
+
+#define MALI_OFFSET_PP_BCAST              0x16000
+#define MALI_OFFSET_PP_BCAST_MMU          0x15000
+
+#define MALI_OFFSET_PMU                   0x02000
+#define MALI_OFFSET_DMA                   0x12000
+
+/* Mali-300 */
+
+#define MALI_GPU_RESOURCES_MALI300(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI300_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+/* Mali-400 */
+
+#define MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali-450 */
+#define MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI450_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP4, pp3_irq, base_addr + MALI_OFFSET_PP4_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP5, pp4_irq, base_addr + MALI_OFFSET_PP5_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP6, pp5_irq, base_addr + MALI_OFFSET_PP6_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP6_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP4, pp4_irq, base_addr + MALI_OFFSET_PP4_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP5, pp5_irq, base_addr + MALI_OFFSET_PP5_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(6, base_addr + MALI_OFFSET_PP6, pp6_irq, base_addr + MALI_OFFSET_PP6_MMU, pp6_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(7, base_addr + MALI_OFFSET_PP7, pp7_irq, base_addr + MALI_OFFSET_PP7_MMU, pp7_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP8_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCE_L2(addr) \
+	{ \
+		.name = "Mali_L2", \
+			.flags = IORESOURCE_MEM, \
+				 .start = addr, \
+					  .end   = addr + 0x200, \
+	},
+
+#define MALI_GPU_RESOURCE_GP(gp_addr, gp_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_GP_WITH_MMU(gp_addr, gp_irq, gp_mmu_addr, gp_mmu_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_mmu_addr, \
+					  .end =   gp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_mmu_irq, \
+					  .end =   gp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PP(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_WITH_MMU(id, pp_addr, pp_irq, pp_mmu_addr, pp_mmu_irq) \
+	{ \
+		.name = "Mali_PP" #id, \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_addr, \
+					  .end =   pp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_mmu_irq, \
+					  .end =   pp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_MMU(mmu_addr, mmu_irq) \
+	{ \
+		.name = "Mali_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = mmu_addr, \
+					  .end =   mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = mmu_irq, \
+					  .end =   mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PMU(pmu_addr) \
+	{ \
+		.name = "Mali_PMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pmu_addr, \
+					  .end =   pmu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DMA(dma_addr) \
+	{ \
+		.name = "Mali_DMA", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dma_addr, \
+					  .end = dma_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DLBU(dlbu_addr) \
+	{ \
+		.name = "Mali_DLBU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dlbu_addr, \
+					  .end = dlbu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_BCAST(bcast_addr) \
+	{ \
+		.name = "Mali_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = bcast_addr, \
+					  .end = bcast_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_PP_BCAST(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_Broadcast_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_MMU_BCAST(pp_mmu_bcast_addr) \
+	{ \
+		.name = "Mali_PP_MMU_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_bcast_addr, \
+					  .end = pp_mmu_bcast_addr + 0x100, \
+	},
+
+	struct mali_gpu_utilization_data {
+		unsigned int utilization_gpu; /* Utilization for GP and all PP cores combined, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_gp;  /* Utilization for GP core only, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_pp;  /* Utilization for all PP cores combined, 0 = no utilization, 256 = full utilization */
+	};
+
+	struct mali_gpu_clk_item {
+		unsigned int clock; /* unit(MHz) */
+		unsigned int vol;
+	};
+
+	struct mali_gpu_clock {
+		struct mali_gpu_clk_item *item;
+		unsigned int num_of_steps;
+	};
+
+	struct mali_gpu_device_data {
+		/* Shared GPU memory */
+		unsigned long shared_mem_size;
+
+		/*
+		 * Mali PMU switch delay.
+		 * Only needed if the power gates are connected to the PMU in a high fanout
+		 * network. This value is the number of Mali clock cycles it takes to
+		 * enable the power gates and turn on the power mesh.
+		 * This value will have no effect if a daisy chain implementation is used.
+		 */
+		u32 pmu_switch_delay;
+
+		/* Mali Dynamic power domain configuration in sequence from 0-11
+		 *  GP  PP0 PP1  PP2  PP3  PP4  PP5  PP6  PP7, L2$0 L2$1 L2$2
+		 */
+		u16 pmu_domain_config[12];
+
+		/* Dedicated GPU memory range (physical). */
+		unsigned long dedicated_mem_start;
+		unsigned long dedicated_mem_size;
+
+		/* Frame buffer memory to be accessible by Mali GPU (physical) */
+		unsigned long fb_start;
+		unsigned long fb_size;
+
+		/* Max runtime [ms] for jobs */
+		int max_job_runtime;
+
+		/* Report GPU utilization and related control in this interval (specified in ms) */
+		unsigned long control_interval;
+
+		/* Function that will receive periodic GPU utilization numbers */
+		void (*utilization_callback)(struct mali_gpu_utilization_data *data);
+
+		/* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */
+		int (*set_freq)(int setting_clock_step);
+		/* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */
+		void (*get_clock_info)(struct mali_gpu_clock **data);
+		/* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */
+		int (*get_freq)(void);
+	};
+
+	/**
+	 * Pause the scheduling and power state changes of Mali device driver.
+	 * mali_dev_resume() must always be called as soon as possible after this function
+	 * in order to resume normal operation of the Mali driver.
+	 */
+	void mali_dev_pause(void);
+
+	/**
+	 * Resume scheduling and allow power changes in Mali device driver.
+	 * This must always be called after mali_dev_pause().
+	 */
+	void mali_dev_resume(void);
+
+	/** @brief Set the desired number of PP cores to use.
+	 *
+	 * The internal Mali PMU will be used, if present, to physically power off the PP cores.
+	 *
+	 * @param num_cores The number of desired cores
+	 * @return 0 on success, otherwise error. -EINVAL means an invalid number of cores was specified.
+	 */
+	int mali_perf_set_num_pp_cores(unsigned int num_cores);
+
+#endif
diff --git a/utgard/r5p1/include/linux/mali/mali_utgard_counters.h b/utgard/r5p1/include/linux/mali/mali_utgard_counters.h
new file mode 100755
index 0000000..06097fa
--- /dev/null
+++ b/utgard/r5p1/include/linux/mali/mali_utgard_counters.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2010-2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALI_UTGARD_COUNTERS_H_
+#define _MALI_UTGARD_COUNTERS_H_
+
+typedef struct {
+	void *unused;
+} mali_cinstr_counter_info;
+
+typedef enum {
+	MALI_CINSTR_COUNTER_SOURCE_EGL      =     0,
+	MALI_CINSTR_COUNTER_SOURCE_OPENGLES =  1000,
+	MALI_CINSTR_COUNTER_SOURCE_OPENVG   =  2000,
+	MALI_CINSTR_COUNTER_SOURCE_GP       =  3000,
+	MALI_CINSTR_COUNTER_SOURCE_PP       =  4000,
+} cinstr_counter_source;
+
+#define MALI_CINSTR_EGL_FIRST_COUNTER MALI_CINSTR_COUNTER_SOURCE_EGL
+#define MALI_CINSTR_EGL_LAST_COUNTER (MALI_CINSTR_COUNTER_SOURCE_EGL + 999)
+
+#define MALI_CINSTR_GLES_FIRST_COUNTER MALI_CINSTR_COUNTER_SOURCE_OPENGLES
+#define MALI_CINSTR_GLES_LAST_COUNTER (MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 999)
+
+#define MALI_CINSTR_VG_FIRST_COUNTER MALI_CINSTR_COUNTER_SOURCE_OPENVG
+#define MALI_CINSTR_VG_LAST_COUNTER (MALI_CINSTR_COUNTER_SOURCE_OPENVG + 999)
+
+#define MALI_CINSTR_GP_FIRST_COUNTER MALI_CINSTR_COUNTER_SOURCE_GP
+#define MALI_CINSTR_GP_LAST_COUNTER (MALI_CINSTR_COUNTER_SOURCE_GP + 999)
+
+#define MALI_CINSTR_PP_FIRST_COUNTER MALI_CINSTR_COUNTER_SOURCE_PP
+#define MALI_CINSTR_PP_LAST_COUNTER (MALI_CINSTR_COUNTER_SOURCE_PP + 999)
+
+
+typedef enum {
+	/* EGL counters */
+
+	MALI_CINSTR_EGL_BLIT_TIME                                            = MALI_CINSTR_COUNTER_SOURCE_EGL + 0,
+
+	/* Last counter in the EGL set */
+	MALI_CINSTR_EGL_MAX_COUNTER                                           = MALI_CINSTR_COUNTER_SOURCE_EGL + 1,
+
+	/* GLES counters */
+
+	MALI_CINSTR_GLES_DRAW_ELEMENTS_CALLS                                 = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 0,
+	MALI_CINSTR_GLES_DRAW_ELEMENTS_NUM_INDICES                           = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 1,
+	MALI_CINSTR_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED                       = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 2,
+	MALI_CINSTR_GLES_DRAW_ARRAYS_CALLS                                   = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 3,
+	MALI_CINSTR_GLES_DRAW_ARRAYS_NUM_TRANSFORMED                         = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 4,
+	MALI_CINSTR_GLES_DRAW_POINTS                                         = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 5,
+	MALI_CINSTR_GLES_DRAW_LINES                                          = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 6,
+	MALI_CINSTR_GLES_DRAW_LINE_LOOP                                      = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 7,
+	MALI_CINSTR_GLES_DRAW_LINE_STRIP                                     = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 8,
+	MALI_CINSTR_GLES_DRAW_TRIANGLES                                      = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 9,
+	MALI_CINSTR_GLES_DRAW_TRIANGLE_STRIP                                 = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 10,
+	MALI_CINSTR_GLES_DRAW_TRIANGLE_FAN                                   = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 11,
+	MALI_CINSTR_GLES_NON_VBO_DATA_COPY_TIME                              = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 12,
+	MALI_CINSTR_GLES_UNIFORM_BYTES_COPIED_TO_MALI                        = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 13,
+	MALI_CINSTR_GLES_UPLOAD_TEXTURE_TIME                                 = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 14,
+	MALI_CINSTR_GLES_UPLOAD_VBO_TIME                                     = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 15,
+	MALI_CINSTR_GLES_NUM_FLUSHES                                         = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 16,
+	MALI_CINSTR_GLES_NUM_VSHADERS_GENERATED                              = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 17,
+	MALI_CINSTR_GLES_NUM_FSHADERS_GENERATED                              = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 18,
+	MALI_CINSTR_GLES_VSHADER_GEN_TIME                                    = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 19,
+	MALI_CINSTR_GLES_FSHADER_GEN_TIME                                    = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 20,
+	MALI_CINSTR_GLES_INPUT_TRIANGLES                                     = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 21,
+	MALI_CINSTR_GLES_VXCACHE_HIT                                         = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 22,
+	MALI_CINSTR_GLES_VXCACHE_MISS                                        = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 23,
+	MALI_CINSTR_GLES_VXCACHE_COLLISION                                   = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 24,
+	MALI_CINSTR_GLES_CULLED_TRIANGLES                                    = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 25,
+	MALI_CINSTR_GLES_CULLED_LINES                                        = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 26,
+	MALI_CINSTR_GLES_BACKFACE_TRIANGLES                                  = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 27,
+	MALI_CINSTR_GLES_GBCLIP_TRIANGLES                                    = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 28,
+	MALI_CINSTR_GLES_GBCLIP_LINES                                        = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 29,
+	MALI_CINSTR_GLES_TRIANGLES_DRAWN                                     = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 30,
+	MALI_CINSTR_GLES_DRAWCALL_TIME                                       = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 31,
+	MALI_CINSTR_GLES_TRIANGLES_COUNT                                     = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 32,
+	MALI_CINSTR_GLES_INDEPENDENT_TRIANGLES_COUNT                         = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 33,
+	MALI_CINSTR_GLES_STRIP_TRIANGLES_COUNT                               = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 34,
+	MALI_CINSTR_GLES_FAN_TRIANGLES_COUNT                                 = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 35,
+	MALI_CINSTR_GLES_LINES_COUNT                                         = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 36,
+	MALI_CINSTR_GLES_INDEPENDENT_LINES_COUNT                             = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 37,
+	MALI_CINSTR_GLES_STRIP_LINES_COUNT                                   = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 38,
+	MALI_CINSTR_GLES_LOOP_LINES_COUNT                                    = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 39,
+	MALI_CINSTR_GLES_POINTS_COUNT                                        = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 40,
+
+	/* Last counter in the GLES set */
+	MALI_CINSTR_GLES_MAX_COUNTER                                         = MALI_CINSTR_COUNTER_SOURCE_OPENGLES + 41,
+
+	/* OpenVG counters */
+
+	MALI_CINSTR_VG_MASK_COUNTER                                          = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 0,
+	MALI_CINSTR_VG_CLEAR_COUNTER                                         = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 1,
+	MALI_CINSTR_VG_APPEND_PATH_COUNTER                                   = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 2,
+	MALI_CINSTR_VG_APPEND_PATH_DATA_COUNTER                              = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 3,
+	MALI_CINSTR_VG_MODIFY_PATH_COORDS_COUNTER                            = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 4,
+	MALI_CINSTR_VG_TRANSFORM_PATH_COUNTER                                = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 5,
+	MALI_CINSTR_VG_INTERPOLATE_PATH_COUNTER                              = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 6,
+	MALI_CINSTR_VG_PATH_LENGTH_COUNTER                                   = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 7,
+	MALI_CINSTR_VG_POINT_ALONG_PATH_COUNTER                              = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 8,
+	MALI_CINSTR_VG_PATH_BOUNDS_COUNTER                                   = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 9,
+	MALI_CINSTR_VG_PATH_TRANSFORMED_BOUNDS_COUNTER                       = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 10,
+	MALI_CINSTR_VG_DRAW_PATH_COUNTER                                     = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 11,
+	MALI_CINSTR_VG_CLEAR_IMAGE_COUNTER                                   = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 12,
+	MALI_CINSTR_VG_IMAGE_SUB_DATA_COUNTER                                = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 13,
+	MALI_CINSTR_VG_GET_IMAGE_SUB_DATA_COUNTER                            = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 14,
+	MALI_CINSTR_VG_COPY_IMAGE_COUNTER                                    = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 15,
+	MALI_CINSTR_VG_DRAW_IMAGE_COUNTER                                    = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 16,
+	MALI_CINSTR_VG_SET_PIXELS_COUNTER                                    = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 17,
+	MALI_CINSTR_VG_WRITE_PIXELS_COUNTER                                  = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 18,
+	MALI_CINSTR_VG_GET_PIXELS_COUNTER                                    = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 19,
+	MALI_CINSTR_VG_READ_PIXELS_COUNTER                                   = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 20,
+	MALI_CINSTR_VG_COPY_PIXELS_COUNTER                                   = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 21,
+	MALI_CINSTR_VG_COLOR_MATRIX_COUNTER                                  = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 22,
+	MALI_CINSTR_VG_CONVOLVE_COUNTER                                      = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 23,
+	MALI_CINSTR_VG_SEPARABLE_CONVOLVE_COUNTER                            = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 24,
+	MALI_CINSTR_VG_GAUSSIAN_BLUR_COUNTER                                 = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 25,
+	MALI_CINSTR_VG_LOOKUP_COUNTER                                        = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 26,
+	MALI_CINSTR_VG_LOOKUP_SINGLE_COUNTER                                 = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 27,
+	MALI_CINSTR_VG_CONTEXT_CREATE_COUNTER                                = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 28,
+	MALI_CINSTR_VG_STROKED_CUBICS_COUNTER                                = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 29,
+	MALI_CINSTR_VG_STROKED_QUADS_COUNTER                                 = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 30,
+	MALI_CINSTR_VG_STROKED_ARCS_COUNTER                                  = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 31,
+	MALI_CINSTR_VG_STROKED_LINES_COUNTER                                 = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 32,
+	MALI_CINSTR_VG_FILLED_CUBICS_COUNTER                                 = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 33,
+	MALI_CINSTR_VG_FILLED_QUADS_COUNTER                                  = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 34,
+	MALI_CINSTR_VG_FILLED_ARCS_COUNTER                                   = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 35,
+	MALI_CINSTR_VG_FILLED_LINES_COUNTER                                  = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 36,
+	MALI_CINSTR_VG_DRAW_PATH_CALLS_COUNTER                               = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 37,
+	MALI_CINSTR_VG_TRIANGLES_COUNTER                                     = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 38,
+	MALI_CINSTR_VG_VERTICES_COUNTER                                      = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 39,
+	MALI_CINSTR_VG_INDICES_COUNTER                                       = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 40,
+	MALI_CINSTR_VG_FILLED_PATHS_COUNTER                                  = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 41,
+	MALI_CINSTR_VG_STROKED_PATHS_COUNTER                                 = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 42,
+	MALI_CINSTR_VG_FILL_EXTRACT_COUNTER                                  = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 43,
+	MALI_CINSTR_VG_DRAW_FILLED_PATH_COUNTER                              = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 44,
+	MALI_CINSTR_VG_STROKE_EXTRACT_COUNTER                                = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 45,
+	MALI_CINSTR_VG_DRAW_STROKED_PATH_COUNTER                             = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 46,
+	MALI_CINSTR_VG_DRAW_PAINT_COUNTER                                    = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 47,
+	MALI_CINSTR_VG_DATA_STRUCTURES_COUNTER                               = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 48,
+	MALI_CINSTR_VG_MEM_PATH_COUNTER                                      = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 49,
+	MALI_CINSTR_VG_RSW_COUNTER                                           = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 50,
+
+	/* Last counter in the VG set */
+	MALI_CINSTR_VG_MAX_COUNTER                                           = MALI_CINSTR_COUNTER_SOURCE_OPENVG + 51,
+
+	/* Mali GP counters */
+
+	MALI_CINSTR_GP_DEPRECATED_0                                          = MALI_CINSTR_COUNTER_SOURCE_GP + 0,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_GP                                      = MALI_CINSTR_COUNTER_SOURCE_GP + 1,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_VERTEX_SHADER                           = MALI_CINSTR_COUNTER_SOURCE_GP + 2,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_VERTEX_STORER                           = MALI_CINSTR_COUNTER_SOURCE_GP + 3,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_VERTEX_LOADER                           = MALI_CINSTR_COUNTER_SOURCE_GP + 4,
+	MALI_CINSTR_GP_CYCLES_VERTEX_LOADER_WAITING_FOR_VERTEX_SHADER        = MALI_CINSTR_COUNTER_SOURCE_GP + 5,
+	MALI_CINSTR_GP_NUMBER_OF_WORDS_READ                                  = MALI_CINSTR_COUNTER_SOURCE_GP + 6,
+	MALI_CINSTR_GP_NUMBER_OF_WORDS_WRITTEN                               = MALI_CINSTR_COUNTER_SOURCE_GP + 7,
+	MALI_CINSTR_GP_NUMBER_OF_READ_BURSTS                                 = MALI_CINSTR_COUNTER_SOURCE_GP + 8,
+	MALI_CINSTR_GP_NUMBER_OF_WRITE_BURSTS                                = MALI_CINSTR_COUNTER_SOURCE_GP + 9,
+	MALI_CINSTR_GP_NUMBER_OF_VERTICES_PROCESSED                          = MALI_CINSTR_COUNTER_SOURCE_GP + 10,
+	MALI_CINSTR_GP_NUMBER_OF_VERTICES_FETCHED                            = MALI_CINSTR_COUNTER_SOURCE_GP + 11,
+	MALI_CINSTR_GP_NUMBER_OF_PRIMITIVES_FETCHED                          = MALI_CINSTR_COUNTER_SOURCE_GP + 12,
+	MALI_CINSTR_GP_RESERVED_13                                           = MALI_CINSTR_COUNTER_SOURCE_GP + 13,
+	MALI_CINSTR_GP_NUMBER_OF_BACKFACE_CULLINGS_DONE                      = MALI_CINSTR_COUNTER_SOURCE_GP + 14,
+	MALI_CINSTR_GP_NUMBER_OF_COMMANDS_WRITTEN_TO_TILES                   = MALI_CINSTR_COUNTER_SOURCE_GP + 15,
+	MALI_CINSTR_GP_NUMBER_OF_MEMORY_BLOCKS_ALLOCATED                     = MALI_CINSTR_COUNTER_SOURCE_GP + 16,
+	MALI_CINSTR_GP_RESERVED_17                                           = MALI_CINSTR_COUNTER_SOURCE_GP + 17,
+	MALI_CINSTR_GP_RESERVED_18                                           = MALI_CINSTR_COUNTER_SOURCE_GP + 18,
+	MALI_CINSTR_GP_NUMBER_OF_VERTEX_LOADER_CACHE_MISSES                  = MALI_CINSTR_COUNTER_SOURCE_GP + 19,
+	MALI_CINSTR_GP_RESERVED_20                                           = MALI_CINSTR_COUNTER_SOURCE_GP + 20,
+	MALI_CINSTR_GP_RESERVED_21                                           = MALI_CINSTR_COUNTER_SOURCE_GP + 21,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_VERTEX_SHADER_COMMAND_PROCESSOR         = MALI_CINSTR_COUNTER_SOURCE_GP + 22,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_PLBU_COMMAND_PROCESSOR                  = MALI_CINSTR_COUNTER_SOURCE_GP + 23,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_PLBU_LIST_WRITER                        = MALI_CINSTR_COUNTER_SOURCE_GP + 24,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_THROUGH_THE_PREPARE_LIST_COMMANDS       = MALI_CINSTR_COUNTER_SOURCE_GP + 25,
+	MALI_CINSTR_GP_RESERVED_26                                           = MALI_CINSTR_COUNTER_SOURCE_GP + 26,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_PRIMITIVE_ASSEMBLY                      = MALI_CINSTR_COUNTER_SOURCE_GP + 27,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_PLBU_VERTEX_FETCHER                     = MALI_CINSTR_COUNTER_SOURCE_GP + 28,
+	MALI_CINSTR_GP_RESERVED_29                                           = MALI_CINSTR_COUNTER_SOURCE_GP + 29,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_BOUNDINGBOX_AND_COMMAND_GENERATOR       = MALI_CINSTR_COUNTER_SOURCE_GP + 30,
+	MALI_CINSTR_GP_RESERVED_31                                           = MALI_CINSTR_COUNTER_SOURCE_GP + 31,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_SCISSOR_TILE_ITERATOR                   = MALI_CINSTR_COUNTER_SOURCE_GP + 32,
+	MALI_CINSTR_GP_ACTIVE_CYCLES_PLBU_TILE_ITERATOR                      = MALI_CINSTR_COUNTER_SOURCE_GP + 33,
+	MALI_CINSTR_GP_JOB_COUNT                                             = MALI_CINSTR_COUNTER_SOURCE_GP + 900,
+
+	/* Mali PP counters */
+
+	MALI_CINSTR_PP_ACTIVE_CLOCK_CYCLES_COUNT                             = MALI_CINSTR_COUNTER_SOURCE_PP + 0,
+	MALI_CINSTR_PP_TOTAL_CLOCK_CYCLES_COUNT_REMOVED                      = MALI_CINSTR_COUNTER_SOURCE_PP + 1,
+	MALI_CINSTR_PP_TOTAL_BUS_READS                                       = MALI_CINSTR_COUNTER_SOURCE_PP + 2,
+	MALI_CINSTR_PP_TOTAL_BUS_WRITES                                      = MALI_CINSTR_COUNTER_SOURCE_PP + 3,
+	MALI_CINSTR_PP_BUS_READ_REQUEST_CYCLES_COUNT                         = MALI_CINSTR_COUNTER_SOURCE_PP + 4,
+	MALI_CINSTR_PP_BUS_WRITE_REQUEST_CYCLES_COUNT                        = MALI_CINSTR_COUNTER_SOURCE_PP + 5,
+	MALI_CINSTR_PP_BUS_READ_TRANSACTIONS_COUNT                           = MALI_CINSTR_COUNTER_SOURCE_PP + 6,
+	MALI_CINSTR_PP_BUS_WRITE_TRANSACTIONS_COUNT                          = MALI_CINSTR_COUNTER_SOURCE_PP + 7,
+	MALI_CINSTR_PP_RESERVED_08                                           = MALI_CINSTR_COUNTER_SOURCE_PP + 8,
+	MALI_CINSTR_PP_TILE_WRITEBACK_WRITES                                 = MALI_CINSTR_COUNTER_SOURCE_PP + 9,
+	MALI_CINSTR_PP_STORE_UNIT_WRITES                                     = MALI_CINSTR_COUNTER_SOURCE_PP + 10,
+	MALI_CINSTR_PP_RESERVED_11                                           = MALI_CINSTR_COUNTER_SOURCE_PP + 11,
+	MALI_CINSTR_PP_PALETTE_CACHE_READS                                   = MALI_CINSTR_COUNTER_SOURCE_PP + 12,
+	MALI_CINSTR_PP_TEXTURE_CACHE_UNCOMPRESSED_READS                      = MALI_CINSTR_COUNTER_SOURCE_PP + 13,
+	MALI_CINSTR_PP_POLYGON_LIST_READS                                    = MALI_CINSTR_COUNTER_SOURCE_PP + 14,
+	MALI_CINSTR_PP_RSW_READS                                             = MALI_CINSTR_COUNTER_SOURCE_PP + 15,
+	MALI_CINSTR_PP_VERTEX_CACHE_READS                                    = MALI_CINSTR_COUNTER_SOURCE_PP + 16,
+	MALI_CINSTR_PP_UNIFORM_REMAPPING_READS                               = MALI_CINSTR_COUNTER_SOURCE_PP + 17,
+	MALI_CINSTR_PP_PROGRAM_CACHE_READS                                   = MALI_CINSTR_COUNTER_SOURCE_PP + 18,
+	MALI_CINSTR_PP_VARYING_READS                                         = MALI_CINSTR_COUNTER_SOURCE_PP + 19,
+	MALI_CINSTR_PP_TEXTURE_DESCRIPTORS_READS                             = MALI_CINSTR_COUNTER_SOURCE_PP + 20,
+	MALI_CINSTR_PP_TEXTURE_DESCRIPTORS_REMAPPING_READS                   = MALI_CINSTR_COUNTER_SOURCE_PP + 21,
+	MALI_CINSTR_PP_TEXTURE_CACHE_COMPRESSED_READS                        = MALI_CINSTR_COUNTER_SOURCE_PP + 22,
+	MALI_CINSTR_PP_LOAD_UNIT_READS                                       = MALI_CINSTR_COUNTER_SOURCE_PP + 23,
+	MALI_CINSTR_PP_POLYGON_COUNT                                         = MALI_CINSTR_COUNTER_SOURCE_PP + 24,
+	MALI_CINSTR_PP_PIXEL_RECTANGLE_COUNT                                 = MALI_CINSTR_COUNTER_SOURCE_PP + 25,
+	MALI_CINSTR_PP_LINES_COUNT                                           = MALI_CINSTR_COUNTER_SOURCE_PP + 26,
+	MALI_CINSTR_PP_POINTS_COUNT                                          = MALI_CINSTR_COUNTER_SOURCE_PP + 27,
+	MALI_CINSTR_PP_STALL_CYCLES_POLYGON_LIST_READER                      = MALI_CINSTR_COUNTER_SOURCE_PP + 28,
+	MALI_CINSTR_PP_STALL_CYCLES_TRIANGLE_SETUP                           = MALI_CINSTR_COUNTER_SOURCE_PP + 29,
+	MALI_CINSTR_PP_QUAD_RASTERIZED_COUNT                                 = MALI_CINSTR_COUNTER_SOURCE_PP + 30,
+	MALI_CINSTR_PP_FRAGMENT_RASTERIZED_COUNT                             = MALI_CINSTR_COUNTER_SOURCE_PP + 31,
+	MALI_CINSTR_PP_FRAGMENT_REJECTED_FRAGMENT_KILL_COUNT                 = MALI_CINSTR_COUNTER_SOURCE_PP + 32,
+	MALI_CINSTR_PP_FRAGMENT_REJECTED_FWD_FRAGMENT_KILL_COUNT             = MALI_CINSTR_COUNTER_SOURCE_PP + 33,
+	MALI_CINSTR_PP_FRAGMENT_PASSED_ZSTENCIL_COUNT                        = MALI_CINSTR_COUNTER_SOURCE_PP + 34,
+	MALI_CINSTR_PP_PATCHES_REJECTED_EARLY_Z_STENCIL_COUNT                = MALI_CINSTR_COUNTER_SOURCE_PP + 35,
+	MALI_CINSTR_PP_PATCHES_EVALUATED                                     = MALI_CINSTR_COUNTER_SOURCE_PP + 36,
+	MALI_CINSTR_PP_INSTRUCTION_COMPLETED_COUNT                           = MALI_CINSTR_COUNTER_SOURCE_PP + 37,
+	MALI_CINSTR_PP_INSTRUCTION_FAILED_RENDEZVOUS_COUNT                   = MALI_CINSTR_COUNTER_SOURCE_PP + 38,
+	MALI_CINSTR_PP_INSTRUCTION_FAILED_VARYING_MISS_COUNT                 = MALI_CINSTR_COUNTER_SOURCE_PP + 39,
+	MALI_CINSTR_PP_INSTRUCTION_FAILED_TEXTURE_MISS_COUNT                 = MALI_CINSTR_COUNTER_SOURCE_PP + 40,
+	MALI_CINSTR_PP_INSTRUCTION_FAILED_LOAD_MISS_COUNT                    = MALI_CINSTR_COUNTER_SOURCE_PP + 41,
+	MALI_CINSTR_PP_INSTRUCTION_FAILED_TILE_READ_MISS_COUNT               = MALI_CINSTR_COUNTER_SOURCE_PP + 42,
+	MALI_CINSTR_PP_INSTRUCTION_FAILED_STORE_MISS_COUNT                   = MALI_CINSTR_COUNTER_SOURCE_PP + 43,
+	MALI_CINSTR_PP_RENDEZVOUS_BREAKAGE_COUNT                             = MALI_CINSTR_COUNTER_SOURCE_PP + 44,
+	MALI_CINSTR_PP_PIPELINE_BUBBLES_CYCLE_COUNT                          = MALI_CINSTR_COUNTER_SOURCE_PP + 45,
+	MALI_CINSTR_PP_TEXTURE_MAPPER_MULTIPASS_COUNT                        = MALI_CINSTR_COUNTER_SOURCE_PP + 46,
+	MALI_CINSTR_PP_TEXTURE_MAPPER_CYCLE_COUNT                            = MALI_CINSTR_COUNTER_SOURCE_PP + 47,
+	MALI_CINSTR_PP_VERTEX_CACHE_HIT_COUNT                                = MALI_CINSTR_COUNTER_SOURCE_PP + 48,
+	MALI_CINSTR_PP_VERTEX_CACHE_MISS_COUNT                               = MALI_CINSTR_COUNTER_SOURCE_PP + 49,
+	MALI_CINSTR_PP_VARYING_CACHE_HIT_COUNT                               = MALI_CINSTR_COUNTER_SOURCE_PP + 50,
+	MALI_CINSTR_PP_VARYING_CACHE_MISS_COUNT                              = MALI_CINSTR_COUNTER_SOURCE_PP + 51,
+	MALI_CINSTR_PP_VARYING_CACHE_CONFLICT_MISS_COUNT                     = MALI_CINSTR_COUNTER_SOURCE_PP + 52,
+	MALI_CINSTR_PP_TEXTURE_CACHE_HIT_COUNT                               = MALI_CINSTR_COUNTER_SOURCE_PP + 53,
+	MALI_CINSTR_PP_TEXTURE_CACHE_MISS_COUNT                              = MALI_CINSTR_COUNTER_SOURCE_PP + 54,
+	MALI_CINSTR_PP_TEXTURE_CACHE_CONFLICT_MISS_COUNT                     = MALI_CINSTR_COUNTER_SOURCE_PP + 55,
+	MALI_CINSTR_PP_PALETTE_CACHE_HIT_COUNT                               = MALI_CINSTR_COUNTER_SOURCE_PP + 56, /* Mali 200 only */
+	MALI_CINSTR_PP_PALETTE_CACHE_MISS_COUNT                              = MALI_CINSTR_COUNTER_SOURCE_PP + 57, /* Mali 200 only */
+	MALI_CINSTR_PP_COMPRESSED_TEXTURE_CACHE_HIT_COUNT                    = MALI_CINSTR_COUNTER_SOURCE_PP + 56, /* Mali 400 class only */
+	MALI_CINSTR_PP_COMPRESSED_TEXTURE_CACHE_MISS_COUNT                   = MALI_CINSTR_COUNTER_SOURCE_PP + 57, /* Mali 400 class only */
+	MALI_CINSTR_PP_LOAD_STORE_CACHE_HIT_COUNT                            = MALI_CINSTR_COUNTER_SOURCE_PP + 58,
+	MALI_CINSTR_PP_LOAD_STORE_CACHE_MISS_COUNT                           = MALI_CINSTR_COUNTER_SOURCE_PP + 59,
+	MALI_CINSTR_PP_PROGRAM_CACHE_HIT_COUNT                               = MALI_CINSTR_COUNTER_SOURCE_PP + 60,
+	MALI_CINSTR_PP_PROGRAM_CACHE_MISS_COUNT                              = MALI_CINSTR_COUNTER_SOURCE_PP + 61,
+	MALI_CINSTR_PP_JOB_COUNT                                             = MALI_CINSTR_COUNTER_SOURCE_PP + 900,
+} cinstr_counters_m200_t;
+
+#endif /*_MALI_UTGARD_COUNTERS_H_*/
diff --git a/utgard/r5p1/include/linux/mali/mali_utgard_ioctl.h b/utgard/r5p1/include/linux/mali/mali_utgard_ioctl.h
new file mode 100755
index 0000000..6bb675f
--- /dev/null
+++ b/utgard/r5p1/include/linux/mali/mali_utgard_ioctl.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_UTGARD_IOCTL_H__
+#define __MALI_UTGARD_IOCTL_H__
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/fs.h>       /* file system operations */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file mali_kernel_ioctl.h
+ * Interface to the Linux device driver.
+ * This file describes the interface needed to use the Linux device driver.
+ * Its interface is designed to used by the HAL implementation through a thin arch layer.
+ */
+
+/**
+ * ioctl commands
+ */
+
+#define MALI_IOC_BASE           0x82
+#define MALI_IOC_CORE_BASE      (_MALI_UK_CORE_SUBSYSTEM      + MALI_IOC_BASE)
+#define MALI_IOC_MEMORY_BASE    (_MALI_UK_MEMORY_SUBSYSTEM    + MALI_IOC_BASE)
+#define MALI_IOC_PP_BASE        (_MALI_UK_PP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_GP_BASE        (_MALI_UK_GP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_PROFILING_BASE (_MALI_UK_PROFILING_SUBSYSTEM + MALI_IOC_BASE)
+#define MALI_IOC_VSYNC_BASE     (_MALI_UK_VSYNC_SUBSYSTEM + MALI_IOC_BASE)
+
+#define MALI_IOC_WAIT_FOR_NOTIFICATION      _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_WAIT_FOR_NOTIFICATION, _mali_uk_wait_for_notification_s)
+#define MALI_IOC_GET_API_VERSION            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, u32)
+#define MALI_IOC_GET_API_VERSION_V2         _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, _mali_uk_get_api_version_v2_s)
+#define MALI_IOC_POST_NOTIFICATION          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_POST_NOTIFICATION, _mali_uk_post_notification_s)
+#define MALI_IOC_GET_USER_SETTING           _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTING, _mali_uk_get_user_setting_s)
+#define MALI_IOC_GET_USER_SETTINGS          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTINGS, _mali_uk_get_user_settings_s)
+#define MALI_IOC_REQUEST_HIGH_PRIORITY      _IOW (MALI_IOC_CORE_BASE, _MALI_UK_REQUEST_HIGH_PRIORITY, _mali_uk_request_high_priority_s)
+#define MALI_IOC_TIMELINE_GET_LATEST_POINT  _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_GET_LATEST_POINT, _mali_uk_timeline_get_latest_point_s)
+#define MALI_IOC_TIMELINE_WAIT              _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_WAIT, _mali_uk_timeline_wait_s)
+#define MALI_IOC_TIMELINE_CREATE_SYNC_FENCE _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_CREATE_SYNC_FENCE, _mali_uk_timeline_create_sync_fence_s)
+#define MALI_IOC_SOFT_JOB_START             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_START, _mali_uk_soft_job_start_s)
+#define MALI_IOC_SOFT_JOB_SIGNAL            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_SIGNAL, _mali_uk_soft_job_signal_s)
+
+#define MALI_IOC_MEM_ALLOC                  _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_ALLOC_MEM, _mali_uk_alloc_mem_s)
+#define MALI_IOC_MEM_FREE                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_FREE_MEM, _mali_uk_free_mem_s)
+#define MALI_IOC_MEM_BIND                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_BIND_MEM, _mali_uk_bind_mem_s)
+#define MALI_IOC_MEM_UNBIND                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_UNBIND_MEM, _mali_uk_unbind_mem_s)
+#define MALI_IOC_MEM_DMA_BUF_GET_SIZE       _IOR(MALI_IOC_MEMORY_BASE, _MALI_UK_DMA_BUF_GET_SIZE, _mali_uk_dma_buf_get_size_s)
+#define MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE _IOR (MALI_IOC_MEMORY_BASE, _MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, _mali_uk_query_mmu_page_table_dump_size_s)
+#define MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_DUMP_MMU_PAGE_TABLE, _mali_uk_dump_mmu_page_table_s)
+#define MALI_IOC_MEM_WRITE_SAFE             _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_MEM_WRITE_SAFE, _mali_uk_mem_write_safe_s)
+
+#define MALI_IOC_PP_START_JOB               _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_START_JOB, _mali_uk_pp_start_job_s)
+#define MALI_IOC_PP_AND_GP_START_JOB        _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_AND_GP_START_JOB, _mali_uk_pp_and_gp_start_job_s)
+#define MALI_IOC_PP_NUMBER_OF_CORES_GET     _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_NUMBER_OF_CORES, _mali_uk_get_pp_number_of_cores_s)
+#define MALI_IOC_PP_CORE_VERSION_GET        _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_CORE_VERSION, _mali_uk_get_pp_core_version_s)
+#define MALI_IOC_PP_DISABLE_WB              _IOW (MALI_IOC_PP_BASE, _MALI_UK_PP_DISABLE_WB, _mali_uk_pp_disable_wb_s)
+
+#define MALI_IOC_GP2_START_JOB              _IOWR(MALI_IOC_GP_BASE, _MALI_UK_GP_START_JOB, _mali_uk_gp_start_job_s)
+#define MALI_IOC_GP2_NUMBER_OF_CORES_GET    _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_NUMBER_OF_CORES, _mali_uk_get_gp_number_of_cores_s)
+#define MALI_IOC_GP2_CORE_VERSION_GET       _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_CORE_VERSION, _mali_uk_get_gp_core_version_s)
+#define MALI_IOC_GP2_SUSPEND_RESPONSE       _IOW (MALI_IOC_GP_BASE, _MALI_UK_GP_SUSPEND_RESPONSE,_mali_uk_gp_suspend_response_s)
+
+#define MALI_IOC_PROFILING_ADD_EVENT        _IOWR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_ADD_EVENT, _mali_uk_profiling_add_event_s)
+#define MALI_IOC_PROFILING_REPORT_SW_COUNTERS  _IOW (MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_REPORT_SW_COUNTERS, _mali_uk_sw_counters_report_s)
+#define MALI_IOC_PROFILING_MEMORY_USAGE_GET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_MEMORY_USAGE_GET, _mali_uk_profiling_memory_usage_get_s)
+
+#define MALI_IOC_VSYNC_EVENT_REPORT         _IOW (MALI_IOC_VSYNC_BASE, _MALI_UK_VSYNC_EVENT_REPORT, _mali_uk_vsync_event_report_s)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_IOCTL_H__ */
diff --git a/utgard/r5p1/include/linux/mali/mali_utgard_profiling_events.h b/utgard/r5p1/include/linux/mali/mali_utgard_profiling_events.h
new file mode 100755
index 0000000..6eff721
--- /dev/null
+++ b/utgard/r5p1/include/linux/mali/mali_utgard_profiling_events.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALI_UTGARD_PROFILING_EVENTS_H_
+#define _MALI_UTGARD_PROFILING_EVENTS_H_
+
+/*
+ * The event ID is a 32 bit value consisting of different fields
+ * reserved, 4 bits, for future use
+ * event type, 4 bits, cinstr_profiling_event_type_t
+ * event channel, 8 bits, the source of the event.
+ * event data, 16 bit field, data depending on event type
+ */
+
+/**
+ * Specifies what kind of event this is
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_TYPE_SINGLE  = 0 << 24,
+	MALI_PROFILING_EVENT_TYPE_START   = 1 << 24,
+	MALI_PROFILING_EVENT_TYPE_STOP    = 2 << 24,
+	MALI_PROFILING_EVENT_TYPE_SUSPEND = 3 << 24,
+	MALI_PROFILING_EVENT_TYPE_RESUME  = 4 << 24,
+} cinstr_profiling_event_type_t;
+
+
+/**
+ * Secifies the channel/source of the event
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_CHANNEL_SOFTWARE =  0 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GP0      =  1 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP0      =  5 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP1      =  6 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP2      =  7 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP3      =  8 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP4      =  9 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP5      = 10 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP6      = 11 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP7      = 12 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GPU      = 21 << 16,
+} cinstr_profiling_event_channel_t;
+
+
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(num) (((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) + (num)) << 16)
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(num) (((MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) + (num)) << 16)
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_NONE                  = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_NEW_FRAME         = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FLUSH                 = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SWAP_BUFFERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FB_EVENT              = 4,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE            = 5,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE            = 6,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_READBACK              = 7,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_WRITEBACK             = 8,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_ENTER_API_FUNC        = 10,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC        = 11,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_DISCARD_ATTACHMENTS   = 13,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_TRY_LOCK          = 53,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_LOCK              = 54,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_UNLOCK            = 55,
+	MALI_PROFILING_EVENT_REASON_SINGLE_LOCK_CONTENDED           = 56,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_MALI_FENCE_DUP    = 57,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SET_PP_JOB_FENCE  = 58,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_WAIT_SYNC         = 59,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_FENCE_SYNC = 60,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_NATIVE_FENCE_SYNC = 61,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FENCE_FLUSH       = 62,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FLUSH_SERVER_WAITS = 63,
+} cinstr_profiling_event_reason_single_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ * to inform whether the core is physical or virtual
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL  = 0,
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL   = 1,
+} cinstr_profiling_event_reason_start_stop_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ */
+typedef enum {
+	/*MALI_PROFILING_EVENT_REASON_START_STOP_SW_NONE            = 0,*/
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_MALI            = 1,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_CALLBACK_THREAD = 2,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_WORKER_THREAD   = 3,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF     = 4,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF      = 5,
+} cinstr_profiling_event_reason_start_stop_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SUSPEND/RESUME is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_NONE                     =  0, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PIPELINE_FULL            =  1, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC                    = 26, /* used in some build configurations */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_WAIT           = 27, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_SYNC           = 28, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_FILTER_CLEANUP   = 29, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_TEXTURE          = 30, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_MIPLEVEL       = 31, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_READPIXELS     = 32, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SWAP_IMMEDIATE  = 33, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_QUEUE_BUFFER         = 34, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_DEQUEUE_BUFFER       = 35, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_UMP_LOCK                 = 36, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_GLOBAL_LOCK          = 37, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_SWAP                 = 38, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_MALI_EGL_IMAGE_SYNC_WAIT = 39, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GP_JOB_HANDLING          = 40, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PP_JOB_HANDLING          = 41, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_MERGE     = 42, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_DUP       = 43,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_FLUSH_SERVER_WAITS   = 44,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SYNC            = 45, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_JOBS_WAIT             = 46, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOFRAMES_WAIT         = 47, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOJOBS_WAIT           = 48, /* USED */
+} cinstr_profiling_event_reason_suspend_resume_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from a HW channel (GPx+PPx)
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_NONE          = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_INTERRUPT     = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH         = 2,
+} cinstr_profiling_event_reason_single_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_NONE              = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE  = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS      = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS      = 4,
+} cinstr_profiling_event_reason_single_gpu_t;
+
+/**
+ * These values are applicable for the 3rd data parameter when
+ * the type MALI_PROFILING_EVENT_TYPE_START is used from the software channel
+ * with the MALI_PROFILING_EVENT_REASON_START_STOP_BOTTOM_HALF reason.
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_DATA_CORE_GP0             =  1,
+	MALI_PROFILING_EVENT_DATA_CORE_PP0             =  5,
+	MALI_PROFILING_EVENT_DATA_CORE_PP1             =  6,
+	MALI_PROFILING_EVENT_DATA_CORE_PP2             =  7,
+	MALI_PROFILING_EVENT_DATA_CORE_PP3             =  8,
+	MALI_PROFILING_EVENT_DATA_CORE_PP4             =  9,
+	MALI_PROFILING_EVENT_DATA_CORE_PP5             = 10,
+	MALI_PROFILING_EVENT_DATA_CORE_PP6             = 11,
+	MALI_PROFILING_EVENT_DATA_CORE_PP7             = 12,
+	MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU         = 22, /* GP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU         = 26, /* PP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP1_MMU         = 27, /* PP1 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP2_MMU         = 28, /* PP2 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP3_MMU         = 29, /* PP3 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP4_MMU         = 30, /* PP4 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP5_MMU         = 31, /* PP5 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP6_MMU         = 32, /* PP6 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP7_MMU         = 33, /* PP7 + 21 */
+
+} cinstr_profiling_event_data_core_t;
+
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU + (num))
+
+
+#endif /*_MALI_UTGARD_PROFILING_EVENTS_H_*/
diff --git a/utgard/r5p1/include/linux/mali/mali_utgard_profiling_gator_api.h b/utgard/r5p1/include/linux/mali/mali_utgard_profiling_gator_api.h
new file mode 100755
index 0000000..76f03fc
--- /dev/null
+++ b/utgard/r5p1/include/linux/mali/mali_utgard_profiling_gator_api.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__
+#define __MALI_UTGARD_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_PROFILING_API_VERSION 4
+
+#define MAX_NUM_L2_CACHE_CORES 3
+#define MAX_NUM_FP_CORES 8
+#define MAX_NUM_VP_CORES 1
+
+/** The list of events supported by the Mali DDK. */
+typedef enum {
+	/* Vertex processor activity */
+	ACTIVITY_VP_0 = 0,
+
+	/* Fragment processor activity */
+	ACTIVITY_FP_0,
+	ACTIVITY_FP_1,
+	ACTIVITY_FP_2,
+	ACTIVITY_FP_3,
+	ACTIVITY_FP_4,
+	ACTIVITY_FP_5,
+	ACTIVITY_FP_6,
+	ACTIVITY_FP_7,
+
+	/* L2 cache counters */
+	COUNTER_L2_0_C0,
+	COUNTER_L2_0_C1,
+	COUNTER_L2_1_C0,
+	COUNTER_L2_1_C1,
+	COUNTER_L2_2_C0,
+	COUNTER_L2_2_C1,
+
+	/* Vertex processor counters */
+	COUNTER_VP_0_C0,
+	COUNTER_VP_0_C1,
+
+	/* Fragment processor counters */
+	COUNTER_FP_0_C0,
+	COUNTER_FP_0_C1,
+	COUNTER_FP_1_C0,
+	COUNTER_FP_1_C1,
+	COUNTER_FP_2_C0,
+	COUNTER_FP_2_C1,
+	COUNTER_FP_3_C0,
+	COUNTER_FP_3_C1,
+	COUNTER_FP_4_C0,
+	COUNTER_FP_4_C1,
+	COUNTER_FP_5_C0,
+	COUNTER_FP_5_C1,
+	COUNTER_FP_6_C0,
+	COUNTER_FP_6_C1,
+	COUNTER_FP_7_C0,
+	COUNTER_FP_7_C1,
+
+	/*
+	 * If more hardware counters are added, the _mali_osk_hw_counter_table
+	 * below should also be updated.
+	 */
+
+	/* EGL software counters */
+	COUNTER_EGL_BLIT_TIME,
+
+	/* GLES software counters */
+	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_ARRAYS_CALLS,
+	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_POINTS,
+	COUNTER_GLES_DRAW_LINES,
+	COUNTER_GLES_DRAW_LINE_LOOP,
+	COUNTER_GLES_DRAW_LINE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLES,
+	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLE_FAN,
+	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+	COUNTER_GLES_UPLOAD_VBO_TIME,
+	COUNTER_GLES_NUM_FLUSHES,
+	COUNTER_GLES_NUM_VSHADERS_GENERATED,
+	COUNTER_GLES_NUM_FSHADERS_GENERATED,
+	COUNTER_GLES_VSHADER_GEN_TIME,
+	COUNTER_GLES_FSHADER_GEN_TIME,
+	COUNTER_GLES_INPUT_TRIANGLES,
+	COUNTER_GLES_VXCACHE_HIT,
+	COUNTER_GLES_VXCACHE_MISS,
+	COUNTER_GLES_VXCACHE_COLLISION,
+	COUNTER_GLES_CULLED_TRIANGLES,
+	COUNTER_GLES_CULLED_LINES,
+	COUNTER_GLES_BACKFACE_TRIANGLES,
+	COUNTER_GLES_GBCLIP_TRIANGLES,
+	COUNTER_GLES_GBCLIP_LINES,
+	COUNTER_GLES_TRIANGLES_DRAWN,
+	COUNTER_GLES_DRAWCALL_TIME,
+	COUNTER_GLES_TRIANGLES_COUNT,
+	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+	COUNTER_GLES_FAN_TRIANGLES_COUNT,
+	COUNTER_GLES_LINES_COUNT,
+	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+	COUNTER_GLES_STRIP_LINES_COUNT,
+	COUNTER_GLES_LOOP_LINES_COUNT,
+
+	/* Framebuffer capture pseudo-counter */
+	COUNTER_FILMSTRIP,
+
+	NUMBER_OF_EVENTS
+} _mali_osk_counter_id;
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_7
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_7_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+#define FIRST_SPECIAL_COUNTER   COUNTER_FILMSTRIP
+#define LAST_SPECIAL_COUNTER    COUNTER_FILMSTRIP
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters {
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+} _mali_profiling_core_counters;
+
+/**
+ * Structure to pass performance counter data of Mali L2 cache cores
+ */
+typedef struct _mali_profiling_l2_counter_values {
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+/**
+ * Structure to pass data defining Mali instance in use:
+ *
+ * mali_product_id - Mali product id
+ * mali_version_major - Mali version major number
+ * mali_version_minor - Mali version minor number
+ * num_of_l2_cores - number of L2 cache cores
+ * num_of_fp_cores - number of fragment processor cores
+ * num_of_vp_cores - number of vertex processor cores
+ */
+typedef struct _mali_profiling_mali_version {
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+
+void _mali_profiling_control(u32 action, u32 value);
+
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+int _mali_profiling_set_event(u32 counter_id, s32 event_id);
+
+u32 _mali_profiling_get_api_version(void);
+
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */
diff --git a/utgard/r5p1/include/linux/mali/mali_utgard_uk_types.h b/utgard/r5p1/include/linux/mali/mali_utgard_uk_types.h
new file mode 100755
index 0000000..61b77ff
--- /dev/null
+++ b/utgard/r5p1/include/linux/mali/mali_utgard_uk_types.h
@@ -0,0 +1,1037 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_uk_types.h
+ * Defines the types and constants used in the user-kernel interface
+ */
+
+#ifndef __MALI_UTGARD_UK_TYPES_H__
+#define __MALI_UTGARD_UK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Iteration functions depend on these values being consecutive. */
+#define MALI_UK_TIMELINE_GP   0
+#define MALI_UK_TIMELINE_PP   1
+#define MALI_UK_TIMELINE_SOFT 2
+#define MALI_UK_TIMELINE_MAX  3
+
+typedef struct {
+	u32 points[MALI_UK_TIMELINE_MAX];
+	s32 sync_fd;
+} _mali_uk_fence_t;
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_uk_core U/K Core
+ * @{ */
+
+/** Definition of subsystem numbers, to assist in creating a unique identifier
+ * for each U/K call.
+ *
+ * @see _mali_uk_functions */
+typedef enum {
+	_MALI_UK_CORE_SUBSYSTEM,      /**< Core Group of U/K calls */
+	_MALI_UK_MEMORY_SUBSYSTEM,    /**< Memory Group of U/K calls */
+	_MALI_UK_PP_SUBSYSTEM,        /**< Fragment Processor Group of U/K calls */
+	_MALI_UK_GP_SUBSYSTEM,        /**< Vertex Processor Group of U/K calls */
+	_MALI_UK_PROFILING_SUBSYSTEM, /**< Profiling Group of U/K calls */
+	_MALI_UK_VSYNC_SUBSYSTEM,     /**< VSYNC Group of U/K calls */
+} _mali_uk_subsystem_t;
+
+/** Within a function group each function has its unique sequence number
+ * to assist in creating a unique identifier for each U/K call.
+ *
+ * An ordered pair of numbers selected from
+ * ( \ref _mali_uk_subsystem_t,\ref  _mali_uk_functions) will uniquely identify the
+ * U/K call across all groups of functions, and all functions. */
+typedef enum {
+	/** Core functions */
+
+	_MALI_UK_OPEN                    = 0, /**< _mali_ukk_open() */
+	_MALI_UK_CLOSE,                       /**< _mali_ukk_close() */
+	_MALI_UK_WAIT_FOR_NOTIFICATION,       /**< _mali_ukk_wait_for_notification() */
+	_MALI_UK_GET_API_VERSION,             /**< _mali_ukk_get_api_version() */
+	_MALI_UK_POST_NOTIFICATION,           /**< _mali_ukk_post_notification() */
+	_MALI_UK_GET_USER_SETTING,            /**< _mali_ukk_get_user_setting() *//**< [out] */
+	_MALI_UK_GET_USER_SETTINGS,           /**< _mali_ukk_get_user_settings() *//**< [out] */
+	_MALI_UK_REQUEST_HIGH_PRIORITY,       /**< _mali_ukk_request_high_priority() */
+	_MALI_UK_TIMELINE_GET_LATEST_POINT,   /**< _mali_ukk_timeline_get_latest_point() */
+	_MALI_UK_TIMELINE_WAIT,               /**< _mali_ukk_timeline_wait() */
+	_MALI_UK_TIMELINE_CREATE_SYNC_FENCE,  /**< _mali_ukk_timeline_create_sync_fence() */
+	_MALI_UK_SOFT_JOB_START,              /**< _mali_ukk_soft_job_start() */
+	_MALI_UK_SOFT_JOB_SIGNAL,             /**< _mali_ukk_soft_job_signal() */
+
+	/** Memory functions */
+
+	_MALI_UK_ALLOC_MEM                = 0,   /**< _mali_ukk_init_mem() */
+	_MALI_UK_FREE_MEM,                       /**< _mali_ukk_term_mem() */
+	_MALI_UK_BIND_MEM,                       /**< _mali_ukk_mem_mmap() */
+	_MALI_UK_UNBIND_MEM,                     /**< _mali_ukk_mem_munmap() */
+	_MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, /**< _mali_ukk_mem_get_mmu_page_table_dump_size() */
+	_MALI_UK_DUMP_MMU_PAGE_TABLE,            /**< _mali_ukk_mem_dump_mmu_page_table() */
+	_MALI_UK_DMA_BUF_GET_SIZE,               /**< _mali_ukk_dma_buf_get_size() */
+	_MALI_UK_MEM_WRITE_SAFE,                 /**< _mali_uku_mem_write_safe() */
+
+	/** Common functions for each core */
+
+	_MALI_UK_START_JOB           = 0,     /**< Start a Fragment/Vertex Processor Job on a core */
+	_MALI_UK_GET_NUMBER_OF_CORES,         /**< Get the number of Fragment/Vertex Processor cores */
+	_MALI_UK_GET_CORE_VERSION,            /**< Get the Fragment/Vertex Processor version compatible with all cores */
+
+	/** Fragment Processor Functions  */
+
+	_MALI_UK_PP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_pp_start_job() */
+	_MALI_UK_GET_PP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_pp_number_of_cores() */
+	_MALI_UK_GET_PP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_pp_core_version() */
+	_MALI_UK_PP_DISABLE_WB,                                           /**< _mali_ukk_pp_job_disable_wb() */
+	_MALI_UK_PP_AND_GP_START_JOB,                                     /**< _mali_ukk_pp_and_gp_start_job() */
+
+	/** Vertex Processor Functions  */
+
+	_MALI_UK_GP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_gp_start_job() */
+	_MALI_UK_GET_GP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_gp_number_of_cores() */
+	_MALI_UK_GET_GP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_gp_core_version() */
+	_MALI_UK_GP_SUSPEND_RESPONSE,                                     /**< _mali_ukk_gp_suspend_response() */
+
+	/** Profiling functions */
+
+	_MALI_UK_PROFILING_ADD_EVENT     = 0, /**< __mali_uku_profiling_add_event() */
+	_MALI_UK_PROFILING_REPORT_SW_COUNTERS,/**< __mali_uku_profiling_report_sw_counters() */
+	_MALI_UK_PROFILING_MEMORY_USAGE_GET,  /**< __mali_uku_profiling_memory_usage_get() */
+
+	/** VSYNC reporting fuctions */
+	_MALI_UK_VSYNC_EVENT_REPORT      = 0, /**< _mali_ukk_vsync_event_report() */
+} _mali_uk_functions;
+
+/** @defgroup _mali_uk_getsysteminfo U/K Get System Info
+ * @{ */
+
+/**
+ * Type definition for the core version number.
+ * Used when returning the version number read from a core
+ *
+ * Its format is that of the 32-bit Version register for a particular core.
+ * Refer to the "Mali200 and MaliGP2 3D Graphics Processor Technical Reference
+ * Manual", ARM DDI 0415C, for more information.
+ */
+typedef u32 _mali_core_version;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @defgroup _mali_uk_gp_suspend_response_s Vertex Processor Suspend Response
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_gp_suspend_response()
+ *
+ * When _mali_wait_for_notification() receives notification that a
+ * Vertex Processor job was suspended, you need to send a response to indicate
+ * what needs to happen with this job. You can either abort or resume the job.
+ *
+ * - set @c code to indicate response code. This is either @c _MALIGP_JOB_ABORT or
+ * @c _MALIGP_JOB_RESUME_WITH_NEW_HEAP to indicate you will provide a new heap
+ * for the job that will resolve the out of memory condition for the job.
+ * - copy the @c cookie value from the @c _mali_uk_gp_job_suspended_s notification;
+ * this is an identifier for the suspended job
+ * - set @c arguments[0] and @c arguments[1] to zero if you abort the job. If
+ * you resume it, @c argument[0] should specify the Mali start address for the new
+ * heap and @c argument[1] the Mali end address of the heap.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ */
+typedef enum _maligp_job_suspended_response_code {
+	_MALIGP_JOB_ABORT,                  /**< Abort the Vertex Processor job */
+	_MALIGP_JOB_RESUME_WITH_NEW_HEAP    /**< Resume the Vertex Processor job with a new heap */
+} _maligp_job_suspended_response_code;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 cookie;                     /**< [in] cookie from the _mali_uk_gp_job_suspended_s notification */
+	_maligp_job_suspended_response_code code; /**< [in] abort or resume response code, see \ref _maligp_job_suspended_response_code */
+	u32 arguments[2];               /**< [in] 0 when aborting a job. When resuming a job, the Mali start and end address for a new heap to resume the job with */
+} _mali_uk_gp_suspend_response_s;
+
+/** @} */ /* end group _mali_uk_gp_suspend_response_s */
+
+/** @defgroup _mali_uk_gpstartjob_s Vertex Processor Start Job
+ * @{ */
+
+/** @brief Status indicating the result of the execution of a Vertex or Fragment processor job  */
+typedef enum {
+	_MALI_UK_JOB_STATUS_END_SUCCESS         = 1 << (16 + 0),
+	_MALI_UK_JOB_STATUS_END_OOM             = 1 << (16 + 1),
+	_MALI_UK_JOB_STATUS_END_ABORT           = 1 << (16 + 2),
+	_MALI_UK_JOB_STATUS_END_TIMEOUT_SW      = 1 << (16 + 3),
+	_MALI_UK_JOB_STATUS_END_HANG            = 1 << (16 + 4),
+	_MALI_UK_JOB_STATUS_END_SEG_FAULT       = 1 << (16 + 5),
+	_MALI_UK_JOB_STATUS_END_ILLEGAL_JOB     = 1 << (16 + 6),
+	_MALI_UK_JOB_STATUS_END_UNKNOWN_ERR     = 1 << (16 + 7),
+	_MALI_UK_JOB_STATUS_END_SHUTDOWN        = 1 << (16 + 8),
+	_MALI_UK_JOB_STATUS_END_SYSTEM_UNUSABLE = 1 << (16 + 9)
+} _mali_uk_job_status;
+
+#define MALIGP2_NUM_REGS_FRAME (6)
+
+/** @brief Arguments for _mali_ukk_gp_start_job()
+ *
+ * To start a Vertex Processor job
+ * - associate the request with a reference to a @c mali_gp_job_info by setting
+ * user_job_ptr to the address of the @c mali_gp_job_info of the job.
+ * - set @c priority to the priority of the @c mali_gp_job_info
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_gp_job_info into @c frame_registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ * When @c _mali_ukk_gp_start_job() returns @c _MALI_OSK_ERR_OK, status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, @c _mali_wait_for_notification() will be notified
+ * that the job finished or got suspended. It may get suspended due to
+ * resource shortage. If it finished (see _mali_ukk_wait_for_notification())
+ * the notification will contain a @c _mali_uk_gp_job_finished_s result. If
+ * it got suspended the notification will contain a @c _mali_uk_gp_job_suspended_s
+ * result.
+ *
+ * The @c _mali_uk_gp_job_finished_s contains the job status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ * In case the job got suspended, @c _mali_uk_gp_job_suspended_s contains
+ * the @c user_job_ptr identifier used to start the job with, the @c reason
+ * why the job stalled (see \ref _maligp_job_suspended_reason) and a @c cookie
+ * to identify the core on which the job stalled.  This @c cookie will be needed
+ * when responding to this nofication by means of _mali_ukk_gp_suspend_response().
+ * (see _mali_ukk_gp_suspend_response()). The response is either to abort or
+ * resume the job. If the job got suspended due to an out of memory condition
+ * you may be able to resolve this by providing more memory and resuming the job.
+ *
+ */
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;                   /**< [in] identifier for the job in user space, a @c mali_gp_job_info* */
+	u32 priority;                       /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[MALIGP2_NUM_REGS_FRAME]; /**< [in] core specific registers associated with this job */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;            /**< [in,out] pointer to u32: location where point on gp timeline for this job will be written */
+} _mali_uk_gp_start_job_s;
+
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE (1<<0) /**< Enable performance counter SRC0 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE (1<<1) /**< Enable performance counter SRC1 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE (1<<2) /**< Enable per tile (aka heatmap) generation with for a job (using the enabled counter sources) */
+
+/** @} */ /* end group _mali_uk_gpstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;               /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;     /**< [out] status of finished job */
+	u32 heap_current_addr;          /**< [out] value of the GP PLB PL heap start address register */
+	u32 perf_counter0;              /**< [out] value of performance counter 0 (see ARM DDI0415A) */
+	u32 perf_counter1;              /**< [out] value of performance counter 1 (see ARM DDI0415A) */
+} _mali_uk_gp_job_finished_s;
+
+typedef struct {
+	u64 user_job_ptr;                    /**< [out] identifier for the job in user space */
+	u32 cookie;                          /**< [out] identifier for the core in kernel space on which the job stalled */
+} _mali_uk_gp_job_suspended_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+
+/** @defgroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+#define _MALI_PP_MAX_SUB_JOBS 8
+
+#define _MALI_PP_MAX_FRAME_REGISTERS ((0x058/4)+1)
+
+#define _MALI_PP_MAX_WB_REGISTERS ((0x02C/4)+1)
+
+#define _MALI_DLBU_MAX_REGISTERS 4
+
+/** Flag for _mali_uk_pp_start_job_s */
+#define _MALI_PP_JOB_FLAG_NO_NOTIFICATION (1<<0)
+#define _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE (1<<1)
+
+/** @defgroup _mali_uk_ppstartjob_s Fragment Processor Start Job
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_pp_start_job()
+ *
+ * To start a Fragment Processor job
+ * - associate the request with a reference to a mali_pp_job by setting
+ * @c user_job_ptr to the address of the @c mali_pp_job of the job.
+ * - set @c priority to the priority of the mali_pp_job
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_pp_job into @c frame_registers.
+ * For MALI200 you also need to copy the write back 0,1 and 2 registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context in @c ctx that was returned from _mali_ukk_open()
+ *
+ * When _mali_ukk_pp_start_job() returns @c _MALI_OSK_ERR_OK, @c status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, _mali_wait_for_notification() will be notified
+ * when the job finished. The notification will contain a
+ * @c _mali_uk_pp_job_finished_s result. It contains the @c user_job_ptr
+ * identifier used to start the job with, the job @c status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than @c watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;               /**< [in] identifier for the job in user space */
+	u32 priority;                   /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[_MALI_PP_MAX_FRAME_REGISTERS];         /**< [in] core specific registers associated with first sub job, see ARM DDI0415A */
+	u32 frame_registers_addr_frame[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_FRAME registers for sub job 1-7 */
+	u32 frame_registers_addr_stack[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_STACK registers for sub job 1-7 */
+	u32 wb0_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb1_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb2_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 dlbu_registers[_MALI_DLBU_MAX_REGISTERS]; /**< [in] Dynamic load balancing unit registers */
+	u32 num_cores;                      /**< [in] Number of cores to set up (valid range: 1-8(M450) or 4(M400)) */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	u32 flags;                          /**< [in] See _MALI_PP_JOB_FLAG_* for a list of avaiable flags */
+	u32 tilesx;                         /**< [in] number of tiles in the x direction (needed for heatmap generation */
+	u32 tilesy;                         /**< [in] number of tiles in y direction (needed for reading the heatmap memory) */
+	u32 heatmap_mem;                    /**< [in] memory address to store counter values per tile (aka heatmap) */
+	u32 num_memory_cookies;             /**< [in] number of memory cookies attached to job */
+	u64 memory_cookies;               /**< [in] pointer to array of u32 memory cookies attached to job */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;           /**< [in,out] pointer to location of u32 where point on pp timeline for this job will be written */
+} _mali_uk_pp_start_job_s;
+
+typedef struct {
+	u64 ctx;       /**< [in,out] user-kernel context (trashed on output) */
+	u64 gp_args;   /**< [in,out] GP uk arguments (see _mali_uk_gp_start_job_s) */
+	u64 pp_args;   /**< [in,out] PP uk arguments (see _mali_uk_pp_start_job_s) */
+} _mali_uk_pp_and_gp_start_job_s;
+
+/** @} */ /* end group _mali_uk_ppstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;                          /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;                /**< [out] status of finished job */
+	u32 perf_counter0[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 0 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter1[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 1 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter_src0;
+	u32 perf_counter_src1;
+} _mali_uk_pp_job_finished_s;
+
+typedef struct {
+	u32 number_of_enabled_cores;               /**< [out] the new number of enabled cores */
+} _mali_uk_pp_num_cores_changed_s;
+
+
+
+/**
+ * Flags to indicate write-back units
+ */
+typedef enum {
+	_MALI_UK_PP_JOB_WB0 = 1,
+	_MALI_UK_PP_JOB_WB1 = 2,
+	_MALI_UK_PP_JOB_WB2 = 4,
+} _mali_uk_pp_job_wbx_flag;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 fb_id;                      /**< [in] Frame builder ID of job to disable WB units for */
+	u32 wb0_memory;
+	u32 wb1_memory;
+	u32 wb2_memory;
+} _mali_uk_pp_disable_wb_s;
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+/** @defgroup _mali_uk_soft_job U/K Soft Job
+ * @{ */
+
+typedef struct {
+	u64 ctx;                            /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job;                       /**< [in] identifier for the job in user space */
+	u64 job_id_ptr;                     /**< [in,out] pointer to location of u32 where job id will be written */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u32 point;                          /**< [out] point on soft timeline for this job */
+	u32 type;                           /**< [in] type of soft job */
+} _mali_uk_soft_job_start_s;
+
+typedef struct {
+	u64 user_job;                       /**< [out] identifier for the job in user space */
+} _mali_uk_soft_job_activated_s;
+
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 job_id;                         /**< [in] id for soft job */
+} _mali_uk_soft_job_signal_s;
+
+/** @} */ /* end group _mali_uk_soft_job */
+
+/** @addtogroup _mali_uk_core U/K Core
+ * @{ */
+
+/** @defgroup _mali_uk_waitfornotification_s Wait For Notification
+ * @{ */
+
+/** @brief Notification type encodings
+ *
+ * Each Notification type is an ordered pair of (subsystem,id), and is unique.
+ *
+ * The encoding of subsystem,id into a 32-bit word is:
+ * encoding = (( subsystem << _MALI_NOTIFICATION_SUBSYSTEM_SHIFT ) & _MALI_NOTIFICATION_SUBSYSTEM_MASK)
+ *            | (( id <<  _MALI_NOTIFICATION_ID_SHIFT ) & _MALI_NOTIFICATION_ID_MASK)
+ *
+ * @see _mali_uk_wait_for_notification_s
+ */
+typedef enum {
+	/** core notifications */
+
+	_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x20,
+	_MALI_NOTIFICATION_APPLICATION_QUIT = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x40,
+	_MALI_NOTIFICATION_SETTINGS_CHANGED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x80,
+	_MALI_NOTIFICATION_SOFT_ACTIVATED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x100,
+
+	/** Fragment Processor notifications */
+
+	_MALI_NOTIFICATION_PP_FINISHED = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x20,
+
+	/** Vertex Processor notifications */
+
+	_MALI_NOTIFICATION_GP_FINISHED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_GP_STALLED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x20,
+
+} _mali_uk_notification_type;
+
+/** to assist in splitting up 32-bit notification value in subsystem and id value */
+#define _MALI_NOTIFICATION_SUBSYSTEM_MASK 0xFFFF0000
+#define _MALI_NOTIFICATION_SUBSYSTEM_SHIFT 16
+#define _MALI_NOTIFICATION_ID_MASK 0x0000FFFF
+#define _MALI_NOTIFICATION_ID_SHIFT 0
+
+
+/** @brief Enumeration of possible settings which match mali_setting_t in user space
+ *
+ *
+ */
+typedef enum {
+	_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE = 0,
+	_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_DEPTHBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_STENCILBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_PER_TILE_COUNTERS_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_COMPOSITOR,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_WINDOW,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_OTHER,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR,
+	_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED,
+	_MALI_UK_USER_SETTING_MAX,
+} _mali_uk_user_setting_t;
+
+/* See mali_user_settings_db.c */
+extern const char *_mali_uk_user_setting_descriptions[];
+#define _MALI_UK_USER_SETTING_DESCRIPTIONS \
+	{                                           \
+		"sw_events_enable",                 \
+		"colorbuffer_capture_enable",       \
+		"depthbuffer_capture_enable",       \
+		"stencilbuffer_capture_enable",     \
+		"per_tile_counters_enable",         \
+		"buffer_capture_compositor",        \
+		"buffer_capture_window",            \
+		"buffer_capture_other",             \
+		"buffer_capture_n_frames",          \
+		"buffer_capture_resize_factor",     \
+		"sw_counters_enable",               \
+	};
+
+/** @brief struct to hold the value to a particular setting as seen in the kernel space
+ */
+typedef struct {
+	_mali_uk_user_setting_t setting;
+	u32 value;
+} _mali_uk_settings_changed_s;
+
+/** @brief Arguments for _mali_ukk_wait_for_notification()
+ *
+ * On successful return from _mali_ukk_wait_for_notification(), the members of
+ * this structure will indicate the reason for notification.
+ *
+ * Specifically, the source of the notification can be identified by the
+ * subsystem and id fields of the mali_uk_notification_type in the code.type
+ * member. The type member is encoded in a way to divide up the types into a
+ * subsystem field, and a per-subsystem ID field. See
+ * _mali_uk_notification_type for more information.
+ *
+ * Interpreting the data union member depends on the notification type:
+ *
+ * - type == _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS
+ *     - The kernel side is shutting down. No further
+ * _mali_uk_wait_for_notification() calls should be made.
+ *     - In this case, the value of the data union member is undefined.
+ *     - This is used to indicate to the user space client that it should close
+ * the connection to the Mali Device Driver.
+ * - type == _MALI_NOTIFICATION_PP_FINISHED
+ *    - The notification data is of type _mali_uk_pp_job_finished_s. It contains the user_job_ptr
+ * identifier used to start the job with, the job status, the number of milliseconds the job took to render,
+ * and values of core registers when the job finished (irq status, performance counters, renderer list
+ * address).
+ *    - A job has finished succesfully when its status member is _MALI_UK_JOB_STATUS_FINISHED.
+ *    - If the hardware detected a timeout while rendering the job, or software detected the job is
+ * taking more than watchdog_msecs (see _mali_ukk_pp_start_job()) to complete, the status member will
+ * indicate _MALI_UK_JOB_STATUS_HANG.
+ *    - If the hardware detected a bus error while accessing memory associated with the job, status will
+ * indicate _MALI_UK_JOB_STATUS_SEG_FAULT.
+ *    - Status will indicate MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to stop the job but the job
+ * didn't start the hardware yet, e.g. when the driver closes.
+ * - type == _MALI_NOTIFICATION_GP_FINISHED
+ *     - The notification data is of type _mali_uk_gp_job_finished_s. The notification is similar to that of
+ * type == _MALI_NOTIFICATION_PP_FINISHED, except that several other GP core register values are returned.
+ * The status values have the same meaning for type == _MALI_NOTIFICATION_PP_FINISHED.
+ * - type == _MALI_NOTIFICATION_GP_STALLED
+ *     - The nofication data is of type _mali_uk_gp_job_suspended_s. It contains the user_job_ptr
+ * identifier used to start the job with, the reason why the job stalled and a cookie to identify the core on
+ * which the job stalled.
+ *     - The reason member of gp_job_suspended is set to _MALIGP_JOB_SUSPENDED_OUT_OF_MEMORY
+ * when the polygon list builder unit has run out of memory.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [out] Type of notification available */
+	union {
+		_mali_uk_gp_job_suspended_s gp_job_suspended;/**< [out] Notification data for _MALI_NOTIFICATION_GP_STALLED notification type */
+		_mali_uk_gp_job_finished_s  gp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_GP_FINISHED notification type */
+		_mali_uk_pp_job_finished_s  pp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_PP_FINISHED notification type */
+		_mali_uk_settings_changed_s setting_changed;/**< [out] Notification data for _MALI_NOTIFICAATION_SETTINGS_CHANGED notification type */
+		_mali_uk_soft_job_activated_s soft_job_activated; /**< [out] Notification data for _MALI_NOTIFICATION_SOFT_ACTIVATED notification type */
+	} data;
+} _mali_uk_wait_for_notification_s;
+
+/** @brief Arguments for _mali_ukk_post_notification()
+ *
+ * Posts the specified notification to the notification queue for this application.
+ * This is used to send a quit message to the callback thread.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [in] Type of notification to post */
+} _mali_uk_post_notification_s;
+
+/** @} */ /* end group _mali_uk_waitfornotification_s */
+
+/** @defgroup _mali_uk_getapiversion_s Get API Version
+ * @{ */
+
+/** helpers for Device Driver API version handling */
+
+/** @brief Encode a version ID from a 16-bit input
+ *
+ * @note the input is assumed to be 16 bits. It must not exceed 16 bits. */
+#define _MAKE_VERSION_ID(x) (((x) << 16UL) | (x))
+
+/** @brief Check whether a 32-bit value is likely to be Device Driver API
+ * version ID. */
+#define _IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF))
+
+/** @brief Decode a 16-bit version number from a 32-bit Device Driver API version
+ * ID */
+#define _GET_VERSION(x) (((x) >> 16UL) & 0xFFFF)
+
+/** @brief Determine whether two 32-bit encoded version IDs match */
+#define _IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y))))
+
+/**
+ * API version define.
+ * Indicates the version of the kernel API
+ * The version is a 16bit integer incremented on each API change.
+ * The 16bit integer is stored twice in a 32bit integer
+ * For example, for version 1 the value would be 0x00010001
+ */
+#define _MALI_API_VERSION 800
+#define _MALI_UK_API_VERSION _MAKE_VERSION_ID(_MALI_API_VERSION)
+
+/**
+ * The API version is a 16-bit integer stored in both the lower and upper 16-bits
+ * of a 32-bit value. The 16-bit API version value is incremented on each API
+ * change. Version 1 would be 0x00010001. Used in _mali_uk_get_api_version_s.
+ */
+typedef u32 _mali_uk_api_version;
+
+/** @brief Arguments for _mali_uk_get_api_version()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u32 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_s;
+
+/** @brief Arguments for _mali_uk_get_api_version_v2()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u64 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_v2_s;
+
+/** @} */ /* end group _mali_uk_getapiversion_s */
+
+/** @defgroup _mali_uk_get_user_settings_s Get user space settings */
+
+/** @brief struct to keep the matching values of the user space settings within certain context
+ *
+ * Each member of the settings array corresponds to a matching setting in the user space and its value is the value
+ * of that particular setting.
+ *
+ * All settings are given reference to the context pointed to by the ctx pointer.
+ *
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	u32 settings[_MALI_UK_USER_SETTING_MAX]; /**< [out] The values for all settings */
+} _mali_uk_get_user_settings_s;
+
+/** @brief struct to hold the value of a particular setting from the user space within a given context
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_user_setting_t setting; /**< [in] setting to get */
+	u32 value;                       /**< [out] value of setting */
+} _mali_uk_get_user_setting_s;
+
+/** @brief Arguments for _mali_ukk_request_high_priority() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_request_high_priority_s;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_memory U/K Memory
+ * @{ */
+
+#define _MALI_MEMORY_ALLOCATE_RESIZEABLE  (1<<4) /* BUFFER can trim dow/grow*/
+#define _MALI_MEMORY_ALLOCATE_NO_BIND_GPU (1<<5) /*Not map to GPU when allocate, must call bind later*/
+
+typedef struct {
+	u64 ctx;                                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                            /**< [in] GPU virtual address */
+	u32 vsize;                                        /**< [in] vitrual size of the allocation */
+	u32 psize;                                        /**< [in] physical size of the allocation */
+	u32 flags;
+	u64 backend_handle;          /**< [out] backend handle */
+	struct {
+		/* buffer types*/
+		/* CPU read/write info*/
+	} buffer_info;
+} _mali_uk_alloc_mem_s;
+
+
+typedef struct {
+	u64 ctx;                                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                /**< [in] use as handle to free allocation */
+} _mali_uk_free_mem_s;
+
+
+#define _MALI_MEMORY_BIND_BACKEND_UMP             (1<<8)
+#define _MALI_MEMORY_BIND_BACKEND_DMA_BUF         (1<<9)
+#define _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY     (1<<10)
+#define _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY (1<<11)
+#define _MALI_MEMORY_BIND_BACKEND_EXT_COW         (1<<12)
+#define _MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION (1<<13)
+
+
+#define _MALI_MEMORY_BIND_BACKEND_MASK (_MALI_MEMORY_BIND_BACKEND_UMP| \
+					_MALI_MEMORY_BIND_BACKEND_DMA_BUF |\
+					_MALI_MEMORY_BIND_BACKEND_MALI_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXT_COW |\
+					_MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION)
+
+
+#define _MALI_MEMORY_GPU_READ_ALLOCATE            (1<<16)
+
+
+typedef struct {
+	u64 ctx;                                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 vaddr;                    /**< [in] mali address to map the physical memory to */
+	u32 size;                                         /**< [in] size */
+	u32 flags;                    /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 padding;                              /** padding for 32/64 struct alignment */
+	union {
+		struct {
+			u32 secure_id;                                  /**< [in] secure id */
+			u32 rights;                                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                                              /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ump;
+		struct {
+			u32 mem_fd;                     /**< [in] Memory descriptor */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_dma_buf;
+		struct {
+			/**/
+		} bind_mali_memory;
+		struct {
+			u32 phys_addr;                  /**< [in] physical address */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ext_memory;
+	} mem_union;
+} _mali_uk_bind_mem_s;
+
+typedef struct {
+	u64 ctx;                                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 flags;                    /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 vaddr;                   /**<  [in] identifier for mapped memory object in kernel space  */
+} _mali_uk_unbind_mem_s;
+
+typedef struct {
+	u64 ctx;                                                                /**< [in,out] user-kernel context (trashed on output) */
+	u32 target_handle;                                              /**< [in] handle of allocation need to do COW */
+	u32 range_start;                                                /**< [in] re allocate range start offset, offset from the start of allocation */
+	u32 size;                                                               /**< [in] re allocate size*/
+	u32 vaddr;                                                              /**< [in] mali address for the new allocaiton */
+	u32 backend_handle;                                             /**< [out] backend handle */
+} _mali_uk_cow_mem_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 mem_fd;                     /**< [in] Memory descriptor */
+	u32 size;                       /**< [out] size */
+} _mali_uk_dma_buf_get_size_s;
+
+/** Flag for _mali_uk_map_external_mem_s, _mali_uk_attach_ump_mem_s and _mali_uk_attach_dma_buf_s */
+#define _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE (1<<0)
+#if 0
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 phys_addr;                  /**< [in] physical address */
+	u32 size;                       /**< [in] size */
+	u32 mali_address;               /**< [in] mali address to map the physical memory to */
+	u32 rights;                     /**< [in] rights necessary for accessing memory */
+	u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+	u32 cookie;                     /**< [out] identifier for mapped memory object in kernel space  */
+} _mali_uk_map_external_mem_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 cookie;                     /**< [out] identifier for mapped memory object in kernel space  */
+} _mali_uk_unmap_external_mem_s;
+#endif
+
+/**
+ * @brief Arguments for _mali_uk[uk]_mem_write_safe()
+ */
+typedef struct {
+	u64 ctx;  /**< [in,out] user-kernel context (trashed on output) */
+	u64 src;  /**< [in] Pointer to source data */
+	u64 dest; /**< [in] Destination Mali buffer */
+	u32 size;   /**< [in,out] Number of bytes to write/copy on input, number of bytes actually written/copied on output */
+} _mali_uk_mem_write_safe_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [out] size of MMU page table information (registers + page tables) */
+} _mali_uk_query_mmu_page_table_dump_size_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [in] size of buffer to receive mmu page table information */
+	u64 buffer;                   /**< [in,out] buffer to receive mmu page table information */
+	u32 register_writes_size;       /**< [out] size of MMU register dump */
+	u64 register_writes;           /**< [out] pointer within buffer where MMU register dump is stored */
+	u32 page_table_dump_size;       /**< [out] size of MMU page table dump */
+	u64 page_table_dump;           /**< [out] pointer within buffer where MMU page table dump is stored */
+} _mali_uk_dump_mmu_page_table_s;
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_pp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_number_of_cores(), @c number_of_cores
+ * will contain the number of Fragment Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_total_cores;      /**< [out] Total number of Fragment Processor cores in the system */
+	u32 number_of_enabled_cores;    /**< [out] Number of enabled Fragment Processor cores */
+} _mali_uk_get_pp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_pp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_core_version(), @c version contains
+ * the version that all Fragment Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version  */
+	u32 padding;
+} _mali_uk_get_pp_core_version_s;
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_gp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_number_of_cores(), @c number_of_cores
+ * will contain the number of Vertex Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_cores;            /**< [out] number of Vertex Processor cores in the system */
+} _mali_uk_get_gp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_gp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_core_version(), @c version contains
+ * the version that all Vertex Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version */
+} _mali_uk_get_gp_core_version_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 event_id;                   /**< [in] event id to register (see  enum mali_profiling_events for values) */
+	u32 data[5];                    /**< [in] event specific data */
+} _mali_uk_profiling_add_event_s;
+
+typedef struct {
+	u64 ctx;                     /**< [in,out] user-kernel context (trashed on output) */
+	u32 memory_usage;              /**< [out] total memory usage */
+} _mali_uk_profiling_memory_usage_get_s;
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ * @{ */
+
+/** @brief Arguments to _mali_ukk_mem_mmap()
+ *
+ * Use of the phys_addr member depends on whether the driver is compiled for
+ * Mali-MMU or nonMMU:
+ * - in the nonMMU case, this is the physical address of the memory as seen by
+ * the CPU (which may be a constant offset from that used by Mali)
+ * - in the MMU case, this is the Mali Virtual base address of the memory to
+ * allocate, and the particular physical pages used to back the memory are
+ * entirely determined by _mali_ukk_mem_mmap(). The details of the physical pages
+ * are not reported to user-space for security reasons.
+ *
+ * The cookie member must be stored for use later when freeing the memory by
+ * calling _mali_ukk_mem_munmap(). In the Mali-MMU case, the cookie is secure.
+ *
+ * The ukk_private word must be set to zero when calling from user-space. On
+ * Kernel-side, the  OS implementation of the U/K interface can use it to
+ * communicate data to the OS implementation of the OSK layer. In particular,
+ * _mali_ukk_get_big_block() directly calls _mali_ukk_mem_mmap directly, and
+ * will communicate its own ukk_private word through the ukk_private member
+ * here. The common code itself will not inspect or modify the ukk_private
+ * word, and so it may be safely used for whatever purposes necessary to
+ * integrate Mali Memory handling into the OS.
+ *
+ * The uku_private member is currently reserved for use by the user-side
+ * implementation of the U/K interface. Its value must be zero.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [out] Returns user-space virtual address for the mapping */
+	u32 size;                       /**< [in] Size of the requested mapping */
+	u32 phys_addr;                  /**< [in] Physical address - could be offset, depending on caller+callee convention */
+} _mali_uk_mem_mmap_s;
+
+/** @brief Arguments to _mali_ukk_mem_munmap()
+ *
+ * The cookie and mapping members must be that returned from the same previous
+ * call to _mali_ukk_mem_mmap(). The size member must correspond to cookie
+ * and mapping - that is, it must be the value originally supplied to a call to
+ * _mali_ukk_mem_mmap that returned the values of mapping and cookie.
+ *
+ * An error will be returned if an attempt is made to unmap only part of the
+ * originally obtained range, or to unmap more than was originally obtained.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [in] The mapping returned from mmap call */
+	u32 size;                       /**< [in] The size passed to mmap call */
+} _mali_uk_mem_munmap_s;
+/** @} */ /* end group _mali_uk_memory */
+
+/** @defgroup _mali_uk_vsync U/K VSYNC Wait Reporting Module
+ * @{ */
+
+/** @brief VSYNC events
+ *
+ * These events are reported when DDK starts to wait for vsync and when the
+ * vsync has occured and the DDK can continue on the next frame.
+ */
+typedef enum _mali_uk_vsync_event {
+	_MALI_UK_VSYNC_EVENT_BEGIN_WAIT = 0,
+	_MALI_UK_VSYNC_EVENT_END_WAIT
+} _mali_uk_vsync_event;
+
+/** @brief Arguments to _mali_ukk_vsync_event()
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_vsync_event event;     /**< [in] VSYNCH event type */
+} _mali_uk_vsync_event_report_s;
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @defgroup _mali_uk_sw_counters_report U/K Software Counter Reporting
+ * @{ */
+
+/** @brief Software counter values
+ *
+ * Values recorded for each of the software counters during a single renderpass.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 counters;                  /**< [in] The array of u32 counter values */
+	u32 num_counters;              /**< [in] The number of elements in counters array */
+} _mali_uk_sw_counters_report_s;
+
+/** @} */ /* end group _mali_uk_sw_counters_report */
+
+/** @defgroup _mali_uk_timeline U/K Mali Timeline
+ * @{ */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 timeline;                   /**< [in] timeline id */
+	u32 point;                      /**< [out] latest point on timeline */
+} _mali_uk_timeline_get_latest_point_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] fence */
+	u32 timeout;                    /**< [in] timeout (0 for no wait, -1 for blocking) */
+	u32 status;                     /**< [out] status of fence (1 if signaled, 0 if timeout) */
+} _mali_uk_timeline_wait_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] mali fence to create linux sync fence from */
+	s32 sync_fd;                    /**< [out] file descriptor for new linux sync fence */
+} _mali_uk_timeline_create_sync_fence_s;
+
+/** @} */ /* end group _mali_uk_timeline */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_UK_TYPES_H__ */
diff --git a/utgard/r5p1/linux/license/gpl/mali_kernel_license.h b/utgard/r5p1/linux/license/gpl/mali_kernel_license.h
new file mode 100755
index 0000000..c88c992
--- /dev/null
+++ b/utgard/r5p1/linux/license/gpl/mali_kernel_license.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_license.h
+ * Defines for the macro MODULE_LICENSE.
+ */
+
+#ifndef __MALI_KERNEL_LICENSE_H__
+#define __MALI_KERNEL_LICENSE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_KERNEL_LINUX_LICENSE     "GPL"
+#define MALI_LICENSE_IS_GPL 1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LICENSE_H__ */
diff --git a/utgard/r5p1/linux/mali_device_pause_resume.c b/utgard/r5p1/linux/mali_device_pause_resume.c
new file mode 100755
index 0000000..37076a2
--- /dev/null
+++ b/utgard/r5p1/linux/mali_device_pause_resume.c
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_device_pause_resume.c
+ * Implementation of the Mali pause/resume functionality
+ */
+
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+
+void mali_dev_pause(void)
+{
+	/*
+	 * Deactive all groups to prevent hardware being touched
+	 * during the period of mali device pausing
+	 */
+	mali_pm_os_suspend(MALI_FALSE);
+}
+
+EXPORT_SYMBOL(mali_dev_pause);
+
+void mali_dev_resume(void)
+{
+	mali_pm_os_resume();
+}
+
+EXPORT_SYMBOL(mali_dev_resume);
diff --git a/utgard/r5p1/linux/mali_kernel_linux.c b/utgard/r5p1/linux/mali_kernel_linux.c
new file mode 100755
index 0000000..b5dede6
--- /dev/null
+++ b/utgard/r5p1/linux/mali_kernel_linux.c
@@ -0,0 +1,922 @@
+/**
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_linux.c
+ * Implementation of the Linux device driver entrypoints
+ */
+#include <linux/module.h>   /* kernel module definitions */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/mm.h>       /* memory manager definitions */
+#include <linux/mali/mali_utgard_ioctl.h>
+#include <linux/version.h>
+#include <linux/device.h>
+#include "mali_kernel_license.h"
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/bug.h>
+#include <linux/of.h>
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_ukk.h"
+#include "mali_ukk_wrappers.h"
+#include "mali_kernel_sysfs.h"
+#include "mali_pm.h"
+#include "mali_kernel_license.h"
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_manager.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+#include "mali_osk_profiling.h"
+#include "mali_dvfs_policy.h"
+static int is_first_resume = 1;
+/*Store the clk and vol for boot/insmod and mali_resume*/
+static struct mali_gpu_clk_item mali_gpu_clk[2];
+#endif
+
+/* Streamline support for the Mali driver */
+#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_MALI400_PROFILING)
+/* Ask Linux to create the tracepoints */
+#define CREATE_TRACE_POINTS
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_hw_counter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counters);
+#endif /* CONFIG_TRACEPOINTS */
+
+/* from the __malidrv_build_info.c file that is generated during build */
+extern const char *__malidrv_build_info(void);
+extern void mali_post_init(void);
+extern int mali_pdev_dts_init(struct platform_device* mali_gpu_device);
+extern int mpgpu_class_init(void);
+extern void mpgpu_class_exit(void);
+
+int mali_page_fault = 0;
+module_param(mali_page_fault, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_page_fault, "mali_page_fault");
+
+int pp_hardware_reset = 0;
+module_param(pp_hardware_reset, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(pp_hardware_reset, "mali_hardware_reset");
+/* Module parameter to control log level */
+int mali_debug_level = 2;
+module_param(mali_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_debug_level, "Higher number, more dmesg output");
+
+extern int mali_max_job_runtime;
+module_param(mali_max_job_runtime, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_job_runtime, "Maximum allowed job runtime in msecs.\nJobs will be killed after this no matter what");
+
+extern int mali_l2_max_reads;
+module_param(mali_l2_max_reads, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_l2_max_reads, "Maximum reads for Mali L2 cache");
+
+extern unsigned int mali_dedicated_mem_start;
+module_param(mali_dedicated_mem_start, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_dedicated_mem_start, "Physical start address of dedicated Mali GPU memory.");
+
+extern unsigned int mali_dedicated_mem_size;
+module_param(mali_dedicated_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_dedicated_mem_size, "Size of dedicated Mali GPU memory.");
+
+extern unsigned int mali_shared_mem_size;
+module_param(mali_shared_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_shared_mem_size, "Size of shared Mali GPU memory.");
+
+#if defined(CONFIG_MALI400_PROFILING)
+extern int mali_boot_profiling;
+module_param(mali_boot_profiling, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_boot_profiling, "Start profiling as a part of Mali driver initialization");
+#endif
+
+extern int mali_max_pp_cores_group_1;
+module_param(mali_max_pp_cores_group_1, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_pp_cores_group_1, "Limit the number of PP cores to use from first PP group.");
+
+extern int mali_max_pp_cores_group_2;
+module_param(mali_max_pp_cores_group_2, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_pp_cores_group_2, "Limit the number of PP cores to use from second PP group (Mali-450 only).");
+
+#if defined(CONFIG_MALI_DVFS)
+/** the max fps the same as display vsync default 60, can set by module insert parameter */
+extern int mali_max_system_fps;
+module_param(mali_max_system_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_system_fps, "Max system fps the same as display VSYNC.");
+
+/** a lower limit on their desired FPS default 58, can set by module insert parameter*/
+extern int mali_desired_fps;
+module_param(mali_desired_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_desired_fps, "A bit lower than max_system_fps which user desired fps");
+#endif
+
+#if MALI_ENABLE_CPU_CYCLES
+#include <linux/cpumask.h>
+#include <linux/timer.h>
+#include <asm/smp.h>
+static struct timer_list mali_init_cpu_clock_timers[8];
+static u32 mali_cpu_clock_last_value[8] = {0,};
+#endif
+
+/* Export symbols from common code: mali_user_settings.c */
+#include "mali_user_settings_db.h"
+EXPORT_SYMBOL(mali_set_user_setting);
+EXPORT_SYMBOL(mali_get_user_setting);
+
+static char mali_dev_name[] = "mali"; /* should be const, but the functions we call requires non-cost */
+
+/* This driver only supports one Mali device, and this variable stores this single platform device */
+struct platform_device *mali_platform_device = NULL;
+
+/* This driver only supports one Mali device, and this variable stores the exposed misc device (/dev/mali) */
+static struct miscdevice mali_miscdevice = { 0, };
+
+static int mali_miscdevice_register(struct platform_device *pdev);
+static void mali_miscdevice_unregister(void);
+
+static int mali_open(struct inode *inode, struct file *filp);
+static int mali_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static int mali_probe(struct platform_device *pdev);
+static int mali_remove(struct platform_device *pdev);
+
+static int mali_driver_suspend_scheduler(struct device *dev);
+static int mali_driver_resume_scheduler(struct device *dev);
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev);
+static int mali_driver_runtime_resume(struct device *dev);
+static int mali_driver_runtime_idle(struct device *dev);
+#endif
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#if defined(CONFIG_MALI_DT)
+extern int mali_platform_device_init(struct platform_device *device);
+extern int mali_platform_device_deinit(struct platform_device *device);
+#else
+extern int mali_platform_device_register(void);
+extern int mali_platform_device_unregister(void);
+#endif
+#endif
+
+/* Linux power management operations provided by the Mali device driver */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
+struct pm_ext_ops mali_dev_ext_pm_ops = {
+	.base =
+	{
+		.suspend = mali_driver_suspend_scheduler,
+		.resume = mali_driver_resume_scheduler,
+		.freeze = mali_driver_suspend_scheduler,
+		.thaw =   mali_driver_resume_scheduler,
+	},
+};
+#else
+static const struct dev_pm_ops mali_dev_pm_ops = {
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = mali_driver_runtime_suspend,
+	.runtime_resume = mali_driver_runtime_resume,
+	.runtime_idle = mali_driver_runtime_idle,
+#endif
+	.suspend = mali_driver_suspend_scheduler,
+	.resume = mali_driver_resume_scheduler,
+	.freeze = mali_driver_suspend_scheduler,
+	.thaw = mali_driver_resume_scheduler,
+	.poweroff = mali_driver_suspend_scheduler,
+};
+#endif
+
+#ifdef CONFIG_MALI_DT
+static struct of_device_id base_dt_ids[] = {
+	{.compatible = "arm,mali-300"},
+	{.compatible = "arm,mali-400"},
+	{.compatible = "arm,mali-450"},
+	{.compatible = "arm,mali-utgard"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, base_dt_ids);
+#endif
+
+/* The Mali device driver struct */
+static struct platform_driver mali_platform_driver = {
+	.probe  = mali_probe,
+	.remove = mali_remove,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
+	.pm = &mali_dev_ext_pm_ops,
+#endif
+	.driver =
+	{
+		.name   = MALI_GPU_NAME_UTGARD,
+		.owner  = THIS_MODULE,
+		.bus = &platform_bus_type,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29))
+		.pm = &mali_dev_pm_ops,
+#endif
+#ifdef CONFIG_MALI_DT
+		.of_match_table = of_match_ptr(base_dt_ids),
+#endif
+	},
+};
+
+/* Linux misc device operations (/dev/mali) */
+struct file_operations mali_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_open,
+	.release = mali_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl = mali_ioctl,
+#else
+	.ioctl = mali_ioctl,
+#endif
+	.compat_ioctl = mali_ioctl,
+	.mmap = mali_mmap
+};
+
+#if MALI_ENABLE_CPU_CYCLES
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64)
+{
+	/* The CPU assembly reference used is: ARM Architecture Reference Manual ARMv7-AR C.b */
+	u32 write_value;
+
+	/* See B4.1.116 PMCNTENSET, Performance Monitors Count Enable Set register, VMSA */
+	/* setting p15 c9 c12 1 to 0x8000000f==CPU_CYCLE_ENABLE |EVENT_3_ENABLE|EVENT_2_ENABLE|EVENT_1_ENABLE|EVENT_0_ENABLE */
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
+
+
+	/* See B4.1.117 PMCR, Performance Monitors Control Register. Writing to p15, c9, c12, 0 */
+	write_value = 1 << 0; /* Bit 0 set. Enable counters */
+	if (reset) {
+		write_value |= 1 << 1; /* Reset event counters */
+		write_value |= 1 << 2; /* Reset cycle counter  */
+	}
+	if (enable_divide_by_64) {
+		write_value |= 1 << 3; /* Enable the Clock divider by 64 */
+	}
+	write_value |= 1 << 4; /* Export enable. Not needed */
+	asm volatile("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(write_value));
+
+	/* PMOVSR Overflow Flag Status Register - Clear Clock and Event overflows */
+	asm volatile("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f));
+
+
+	/* See B4.1.124 PMUSERENR - setting p15 c9 c14 to 1" */
+	/* User mode access to the Performance Monitors enabled. */
+	/* Lets User space read cpu clock cycles */
+	asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1));
+}
+
+/** A timer function that configures the cycle clock counter on current CPU.
+ * The function \a mali_init_cpu_time_counters_on_all_cpus sets up this
+ * function to trigger on all Cpus during module load.
+ */
+static void mali_init_cpu_clock_timer_func(unsigned long data)
+{
+	int reset_counters, enable_divide_clock_counter_by_64;
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+	unsigned int sample1;
+
+	MALI_IGNORE(data);
+
+	reset_counters = 1;
+	enable_divide_clock_counter_by_64 = 0;
+	mali_init_cpu_time_counters(reset_counters, enable_divide_clock_counter_by_64);
+
+	sample0 = mali_get_cpu_cyclecount();
+	sample1 = mali_get_cpu_cyclecount();
+
+	MALI_DEBUG_PRINT(3, ("Init Cpu %d cycle counter- First two samples: %08x %08x \n", current_cpu, sample0, sample1));
+}
+
+/** A timer functions for storing current time on all cpus.
+ * Used for checking if the clocks have similar values or if they are drifting.
+ */
+static void mali_print_cpu_clock_timer_func(unsigned long data)
+{
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+
+	MALI_IGNORE(data);
+	sample0 = mali_get_cpu_cyclecount();
+	if (current_cpu < 8) {
+		mali_cpu_clock_last_value[current_cpu] = sample0;
+	}
+}
+
+/** Init the performance registers on all CPUs to count clock cycles.
+ * For init \a print_only should be 0.
+ * If \a print_only is 1, it will intead print the current clock value of all CPUs.
+ */
+void mali_init_cpu_time_counters_on_all_cpus(int print_only)
+{
+	int i = 0;
+	int cpu_number;
+	int jiffies_trigger;
+	int jiffies_wait;
+
+	jiffies_wait = msecs_to_jiffies(20);
+	jiffies_trigger = jiffies + jiffies_wait;
+
+	for (i = 0 ; i < 8 ; i++) {
+		init_timer(&mali_init_cpu_clock_timers[i]);
+		if (print_only) mali_init_cpu_clock_timers[i].function = mali_print_cpu_clock_timer_func;
+		else            mali_init_cpu_clock_timers[i].function = mali_init_cpu_clock_timer_func;
+		mali_init_cpu_clock_timers[i].expires = jiffies_trigger ;
+	}
+	cpu_number = cpumask_first(cpu_online_mask);
+	for (i = 0 ; i < 8 ; i++) {
+		int next_cpu;
+		add_timer_on(&mali_init_cpu_clock_timers[i], cpu_number);
+		next_cpu = cpumask_next(cpu_number, cpu_online_mask);
+		if (next_cpu >= nr_cpu_ids) break;
+		cpu_number = next_cpu;
+	}
+
+	while (jiffies_wait) jiffies_wait = schedule_timeout_uninterruptible(jiffies_wait);
+
+	for (i = 0 ; i < 8 ; i++) {
+		del_timer_sync(&mali_init_cpu_clock_timers[i]);
+	}
+
+	if (print_only) {
+		if ((0 == mali_cpu_clock_last_value[2]) && (0 == mali_cpu_clock_last_value[3])) {
+			/* Diff can be printed if we want to check if the clocks are in sync
+			int diff = mali_cpu_clock_last_value[0] - mali_cpu_clock_last_value[1];*/
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1]));
+		} else {
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1], mali_cpu_clock_last_value[2], mali_cpu_clock_last_value[3]));
+		}
+	}
+}
+#endif
+
+int mali_module_init(void)
+{
+	int err = 0;
+
+	MALI_DEBUG_PRINT(2, ("Inserting Mali v%d device driver. \n", _MALI_API_VERSION));
+	//MALI_DEBUG_PRINT(2, ("Compiled: %s, time: %s.\n", __DATE__, __TIME__));
+	MALI_DEBUG_PRINT(2, ("Driver revision: %s\n", SVN_REV_STRING));
+
+#if MALI_ENABLE_CPU_CYCLES
+	mali_init_cpu_time_counters_on_all_cpus(0);
+	MALI_DEBUG_PRINT(2, ("CPU cycle counter setup complete\n"));
+	/* Printing the current cpu counters */
+	mali_init_cpu_time_counters_on_all_cpus(1);
+#endif
+
+	/* Initialize module wide settings */
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering device\n"));
+	err = mali_platform_device_register();
+	if (0 != err) {
+		return err;
+	}
+#endif
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering driver\n"));
+
+	err = platform_driver_register(&mali_platform_driver);
+
+	if (0 != err) {
+		MALI_DEBUG_PRINT(2, ("mali_module_init() Failed to register driver (%d)\n", err));
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+		mali_platform_device_unregister();
+#endif
+#endif
+		mali_platform_device = NULL;
+		return err;
+	}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	err = _mali_internal_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (0 != err) {
+		/* No biggie if we wheren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	/* Tracing the current frequency and voltage from boot/insmod*/
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item(),to record current clk info.*/
+	mali_get_current_gpu_clk_item(&mali_gpu_clk[0]);
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[0].clock,
+				      mali_gpu_clk[0].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	MALI_PRINT(("Mali device driver loaded\n"));
+
+	mpgpu_class_init();
+
+	return 0; /* Success */
+}
+
+void mali_module_exit(void)
+{
+	MALI_DEBUG_PRINT(2, ("Unloading Mali v%d device driver.\n", _MALI_API_VERSION));
+
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering driver\n"));
+
+	platform_driver_unregister(&mali_platform_driver);
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering device\n"));
+	mali_platform_device_unregister();
+#endif
+#endif
+
+	/* Tracing the current frequency and voltage from rmmod*/
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	_mali_internal_profiling_term();
+#endif
+	mpgpu_class_exit();
+
+	MALI_PRINT(("Mali device driver unloaded\n"));
+}
+
+static int mali_probe(struct platform_device *pdev)
+{
+	int err;
+
+	MALI_DEBUG_PRINT(2, ("mali_probe(): Called for platform device %s\n", pdev->name));
+
+	if (NULL != mali_platform_device) {
+		/* Already connected to a device, return error */
+		MALI_PRINT_ERROR(("mali_probe(): The Mali driver is already connected with a Mali device."));
+		return -EEXIST;
+	}
+
+	mali_platform_device = pdev;
+
+#ifdef CONFIG_MALI_DT
+	/* If we use DT to initialize our DDK, we have to prepare somethings. */
+	err = mali_platform_device_init(mali_platform_device);
+	if (0 != err) {
+		MALI_PRINT_ERROR(("mali_probe(): Failed to initialize platform device."));
+		return -EFAULT;
+	}
+#endif
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_wq_init()) {
+		/* Initialize the Mali GPU HW specified by pdev */
+		if (_MALI_OSK_ERR_OK == mali_initialize_subsystems()) {
+			/* Register a misc device (so we are accessible from user space) */
+			err = mali_miscdevice_register(pdev);
+			if (0 == err) {
+				/* Setup sysfs entries */
+				err = mali_sysfs_register(mali_dev_name);
+
+				if (0 == err) {
+					mali_post_init();
+					MALI_DEBUG_PRINT(2, ("mali_probe(): Successfully initialized driver for platform device %s\n", pdev->name));
+
+					return 0;
+				} else {
+					MALI_PRINT_ERROR(("mali_probe(): failed to register sysfs entries"));
+				}
+				mali_miscdevice_unregister();
+			} else {
+				MALI_PRINT_ERROR(("mali_probe(): failed to register Mali misc device."));
+			}
+			mali_terminate_subsystems();
+		} else {
+			MALI_PRINT_ERROR(("mali_probe(): Failed to initialize Mali device driver."));
+		}
+		_mali_osk_wq_term();
+	}
+
+	mali_platform_device = NULL;
+	return -EFAULT;
+}
+
+static int mali_remove(struct platform_device *pdev)
+{
+	MALI_DEBUG_PRINT(2, ("mali_remove() called for platform device %s\n", pdev->name));
+	mali_sysfs_unregister();
+	mali_miscdevice_unregister();
+	mali_terminate_subsystems();
+	_mali_osk_wq_term();
+#ifdef CONFIG_MALI_DT
+	mali_platform_device_deinit(mali_platform_device);
+#endif
+	mali_platform_device = NULL;
+	return 0;
+}
+
+static int mali_miscdevice_register(struct platform_device *pdev)
+{
+	int err;
+
+	mali_miscdevice.minor = MISC_DYNAMIC_MINOR;
+	mali_miscdevice.name = mali_dev_name;
+	mali_miscdevice.fops = &mali_fops;
+	mali_miscdevice.parent = get_device(&pdev->dev);
+
+	err = misc_register(&mali_miscdevice);
+	if (0 != err) {
+		MALI_PRINT_ERROR(("Failed to register misc device, misc_register() returned %d\n", err));
+	}
+
+	return err;
+}
+
+static void mali_miscdevice_unregister(void)
+{
+	misc_deregister(&mali_miscdevice);
+}
+
+static int mali_driver_suspend_scheduler(struct device *dev)
+{
+	mali_pm_os_suspend(MALI_TRUE);
+	/* Tracing the frequency and voltage after mali is suspended */
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+	return 0;
+}
+
+static int mali_driver_resume_scheduler(struct device *dev)
+{
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+	mali_pm_os_resume();
+	return 0;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev)
+{
+	if (MALI_TRUE == mali_pm_runtime_suspend()) {
+		/* Tracing the frequency and voltage after mali is suspended */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      0,
+					      0,
+					      0, 0, 0);
+
+		return 0;
+	} else {
+		return -EBUSY;
+	}
+}
+
+static int mali_driver_runtime_resume(struct device *dev)
+{
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	mali_pm_runtime_resume();
+	return 0;
+}
+
+static int mali_driver_runtime_idle(struct device *dev)
+{
+	/* Nothing to do */
+	return 0;
+}
+#endif
+
+static int mali_open(struct inode *inode, struct file *filp)
+{
+	struct mali_session_data *session_data;
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_open() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	/* allocated struct to track this session */
+	err = _mali_ukk_open((void **)&session_data);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* initialize file pointer */
+	filp->f_pos = 0;
+
+	/* link in our session data */
+	filp->private_data = (void *)session_data;
+
+	return 0;
+}
+
+static int mali_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_release() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	err = _mali_ukk_close((void **)&filp->private_data);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int map_errcode(_mali_osk_errcode_t err)
+{
+	switch (err) {
+	case _MALI_OSK_ERR_OK :
+		return 0;
+	case _MALI_OSK_ERR_FAULT:
+		return -EFAULT;
+	case _MALI_OSK_ERR_INVALID_FUNC:
+		return -ENOTTY;
+	case _MALI_OSK_ERR_INVALID_ARGS:
+		return -EINVAL;
+	case _MALI_OSK_ERR_NOMEM:
+		return -ENOMEM;
+	case _MALI_OSK_ERR_TIMEOUT:
+		return -ETIMEDOUT;
+	case _MALI_OSK_ERR_RESTARTSYSCALL:
+		return -ERESTARTSYS;
+	case _MALI_OSK_ERR_ITEM_NOT_FOUND:
+		return -ENOENT;
+	default:
+		return -EFAULT;
+	}
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int err;
+	struct mali_session_data *session_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	/* inode not used */
+	(void)inode;
+#endif
+
+	MALI_DEBUG_PRINT(7, ("Ioctl received 0x%08X 0x%08lX\n", cmd, arg));
+
+	session_data = (struct mali_session_data *)filp->private_data;
+	if (NULL == session_data) {
+		MALI_DEBUG_PRINT(7, ("filp->private_data was NULL\n"));
+		return -ENOTTY;
+	}
+
+	if (NULL == (void *)arg) {
+		MALI_DEBUG_PRINT(7, ("arg was NULL\n"));
+		return -ENOTTY;
+	}
+
+	switch (cmd) {
+	case MALI_IOC_WAIT_FOR_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_wait_for_notification_s), sizeof(u64)));
+		err = wait_for_notification_wrapper(session_data, (_mali_uk_wait_for_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION_V2:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_api_version_v2_s), sizeof(u64)));
+		err = get_api_version_v2_wrapper(session_data, (_mali_uk_get_api_version_v2_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION:
+		err = get_api_version_wrapper(session_data, (_mali_uk_get_api_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_POST_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_post_notification_s), sizeof(u64)));
+		err = post_notification_wrapper(session_data, (_mali_uk_post_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_USER_SETTINGS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_user_settings_s), sizeof(u64)));
+		err = get_user_settings_wrapper(session_data, (_mali_uk_get_user_settings_s __user *)arg);
+		break;
+
+	case MALI_IOC_REQUEST_HIGH_PRIORITY:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_request_high_priority_s), sizeof(u64)));
+		err = request_high_priority_wrapper(session_data, (_mali_uk_request_high_priority_s __user *)arg);
+		break;
+
+#if defined(CONFIG_MALI400_PROFILING)
+	case MALI_IOC_PROFILING_ADD_EVENT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_add_event_s), sizeof(u64)));
+		err = profiling_add_event_wrapper(session_data, (_mali_uk_profiling_add_event_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_sw_counters_report_s), sizeof(u64)));
+		err = profiling_report_sw_counters_wrapper(session_data, (_mali_uk_sw_counters_report_s __user *)arg);
+		break;
+
+
+	case MALI_IOC_PROFILING_MEMORY_USAGE_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_memory_usage_get_s), sizeof(u64)));
+		err = profiling_memory_usage_get_wrapper(session_data, (_mali_uk_profiling_memory_usage_get_s __user *)arg);
+		break;
+
+#else
+
+	case MALI_IOC_PROFILING_ADD_EVENT:          /* FALL-THROUGH */
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS: /* FALL-THROUGH */
+	case MALI_IOC_PROFILING_MEMORY_USAGE_GET:   /* FALL-THROUGH */
+		MALI_DEBUG_PRINT(2, ("Profiling not supported\n"));
+		err = -ENOTTY;
+		break;
+
+#endif
+
+	case MALI_IOC_MEM_ALLOC:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_alloc_mem_s), sizeof(u64)));
+		err = mem_alloc_wrapper(session_data, (_mali_uk_alloc_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_FREE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_free_mem_s), sizeof(u64)));
+		err = mem_free_wrapper(session_data, (_mali_uk_free_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_BIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_bind_mem_s), sizeof(u64)));
+		err = mem_bind_wrapper(session_data, (_mali_uk_bind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_UNBIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_unbind_mem_s), sizeof(u64)));
+		err = mem_unbind_wrapper(session_data, (_mali_uk_unbind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_WRITE_SAFE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_write_safe_s), sizeof(u64)));
+		err = mem_write_safe_wrapper(session_data, (_mali_uk_mem_write_safe_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_query_mmu_page_table_dump_size_s), sizeof(u64)));
+		err = mem_query_mmu_page_table_dump_size_wrapper(session_data, (_mali_uk_query_mmu_page_table_dump_size_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dump_mmu_page_table_s), sizeof(u64)));
+		err = mem_dump_mmu_page_table_wrapper(session_data, (_mali_uk_dump_mmu_page_table_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DMA_BUF_GET_SIZE:
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dma_buf_get_size_s), sizeof(u64)));
+		err = mali_dma_buf_get_size(session_data, (_mali_uk_dma_buf_get_size_s __user *)arg);
+#else
+		MALI_DEBUG_PRINT(2, ("DMA-BUF not supported\n"));
+		err = -ENOTTY;
+#endif
+		break;
+
+	case MALI_IOC_PP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_start_job_s), sizeof(u64)));
+		err = pp_start_job_wrapper(session_data, (_mali_uk_pp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_AND_GP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_and_gp_start_job_s), sizeof(u64)));
+		err = pp_and_gp_start_job_wrapper(session_data, (_mali_uk_pp_and_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_number_of_cores_s), sizeof(u64)));
+		err = pp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_pp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_core_version_s), sizeof(u64)));
+		err = pp_get_core_version_wrapper(session_data, (_mali_uk_get_pp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_DISABLE_WB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_disable_wb_s), sizeof(u64)));
+		err = pp_disable_wb_wrapper(session_data, (_mali_uk_pp_disable_wb_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_start_job_s), sizeof(u64)));
+		err = gp_start_job_wrapper(session_data, (_mali_uk_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_number_of_cores_s), sizeof(u64)));
+		err = gp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_gp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_core_version_s), sizeof(u64)));
+		err = gp_get_core_version_wrapper(session_data, (_mali_uk_get_gp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_SUSPEND_RESPONSE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_suspend_response_s), sizeof(u64)));
+		err = gp_suspend_response_wrapper(session_data, (_mali_uk_gp_suspend_response_s __user *)arg);
+		break;
+
+	case MALI_IOC_VSYNC_EVENT_REPORT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_vsync_event_report_s), sizeof(u64)));
+		err = vsync_event_report_wrapper(session_data, (_mali_uk_vsync_event_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_TIMELINE_GET_LATEST_POINT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_get_latest_point_s), sizeof(u64)));
+		err = timeline_get_latest_point_wrapper(session_data, (_mali_uk_timeline_get_latest_point_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_WAIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_wait_s), sizeof(u64)));
+		err = timeline_wait_wrapper(session_data, (_mali_uk_timeline_wait_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_CREATE_SYNC_FENCE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_create_sync_fence_s), sizeof(u64)));
+		err = timeline_create_sync_fence_wrapper(session_data, (_mali_uk_timeline_create_sync_fence_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_START:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_start_s), sizeof(u64)));
+		err = soft_job_start_wrapper(session_data, (_mali_uk_soft_job_start_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_SIGNAL:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_signal_s), sizeof(u64)));
+		err = soft_job_signal_wrapper(session_data, (_mali_uk_soft_job_signal_s __user *)arg);
+		break;
+
+	default:
+		MALI_DEBUG_PRINT(2, ("No handler for ioctl 0x%08X 0x%08lX\n", cmd, arg));
+		err = -ENOTTY;
+	};
+
+	return err;
+}
+
+
+module_init(mali_module_init);
+module_exit(mali_module_exit);
+
+MODULE_LICENSE(MALI_KERNEL_LINUX_LICENSE);
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(SVN_REV_STRING);
diff --git a/utgard/r5p1/linux/mali_kernel_linux.h b/utgard/r5p1/linux/mali_kernel_linux.h
new file mode 100755
index 0000000..f604271
--- /dev/null
+++ b/utgard/r5p1/linux/mali_kernel_linux.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_LINUX_H__
+#define __MALI_KERNEL_LINUX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/idr.h>
+#include <linux/rbtree.h>
+#include "mali_kernel_license.h"
+#include "mali_osk_types.h"
+
+extern struct platform_device *mali_platform_device;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/utgard/r5p1/linux/mali_kernel_sysfs.c b/utgard/r5p1/linux/mali_kernel_sysfs.c
new file mode 100755
index 0000000..ecde82d
--- /dev/null
+++ b/utgard/r5p1/linux/mali_kernel_sysfs.c
@@ -0,0 +1,1400 @@
+/**
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+/**
+ * @file mali_kernel_sysfs.c
+ * Implementation of some sysfs data exports
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include "mali_kernel_license.h"
+#include "mali_kernel_common.h"
+#include "mali_ukk.h"
+
+#if MALI_LICENSE_IS_GPL
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_sysfs.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include <linux/slab.h>
+#include "mali_osk_profiling.h"
+#endif
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_kernel_core.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+
+#define PRIVATE_DATA_COUNTER_MAKE_GP(src) (src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP(src) ((1 << 24) | src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(src, sub_job) ((1 << 24) | (1 << 16) | (sub_job << 8) | src)
+#define PRIVATE_DATA_COUNTER_IS_PP(a) ((((a) >> 24) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SRC(a) (a & 0xFF)
+#define PRIVATE_DATA_COUNTER_IS_SUB_JOB(a) ((((a) >> 16) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SUB_JOB(a) (((a) >> 8) & 0xFF)
+
+#define POWER_BUFFER_SIZE 3
+
+static struct dentry *mali_debugfs_dir = NULL;
+
+typedef enum {
+	_MALI_DEVICE_SUSPEND,
+	_MALI_DEVICE_RESUME,
+	_MALI_DEVICE_DVFS_PAUSE,
+	_MALI_DEVICE_DVFS_RESUME,
+	_MALI_MAX_EVENTS
+} _mali_device_debug_power_events;
+
+static const char *const mali_power_events[_MALI_MAX_EVENTS] = {
+	[_MALI_DEVICE_SUSPEND] = "suspend",
+	[_MALI_DEVICE_RESUME] = "resume",
+	[_MALI_DEVICE_DVFS_PAUSE] = "dvfs_pause",
+	[_MALI_DEVICE_DVFS_RESUME] = "dvfs_resume",
+};
+
+static mali_bool power_always_on_enabled = MALI_FALSE;
+
+static int open_copy_private_data(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t group_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	r = snprintf(buffer, 64, "%u\n",
+		     mali_executor_group_is_disabled(group) ? 0 : 1);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static ssize_t group_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	unsigned long val;
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	r = kstrtoul(&buffer[0], 10, &val);
+	if (0 != r) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_group_enable(group);
+		break;
+	case 0:
+		mali_executor_group_disable(group);
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static const struct file_operations group_enabled_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = group_enabled_read,
+	.write = group_enabled_write,
+};
+
+static ssize_t hw_core_base_addr_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_hw_core *hw_core;
+
+	hw_core = (struct mali_hw_core *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(hw_core);
+
+	r = snprintf(buffer, 64, "0x%lX\n", hw_core->phys_addr);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations hw_core_base_addr_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = hw_core_base_addr_read,
+};
+
+static ssize_t profiling_counter_src_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	int r;
+	u32 val;
+
+	if (MALI_TRUE == is_pp) {
+		/* PP counter */
+		if (MALI_TRUE == is_sub_job) {
+			/* Get counter for a particular sub job */
+			if (0 == src_id) {
+				val = mali_pp_job_get_pp_counter_sub_job_src0(sub_job);
+			} else {
+				val = mali_pp_job_get_pp_counter_sub_job_src1(sub_job);
+			}
+		} else {
+			/* Get default counter for all PP sub jobs */
+			if (0 == src_id) {
+				val = mali_pp_job_get_pp_counter_global_src0();
+			} else {
+				val = mali_pp_job_get_pp_counter_global_src1();
+			}
+		}
+	} else {
+		/* GP counter */
+		if (0 == src_id) {
+			val = mali_gp_job_get_gp_counter_src0();
+		} else {
+			val = mali_gp_job_get_gp_counter_src1();
+		}
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == val) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_counter_src_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	if (MALI_TRUE == is_pp) {
+		/* PP counter */
+		if (MALI_TRUE == is_sub_job) {
+			/* Set counter for a particular sub job */
+			if (0 == src_id) {
+				mali_pp_job_set_pp_counter_sub_job_src0(sub_job, (u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_sub_job_src1(sub_job, (u32)val);
+			}
+		} else {
+			/* Set default counter for all PP sub jobs */
+			if (0 == src_id) {
+				mali_pp_job_set_pp_counter_global_src0((u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_global_src1((u32)val);
+			}
+		}
+	} else {
+		/* GP counter */
+		if (0 == src_id) {
+			mali_gp_job_set_gp_counter_src0((u32)val);
+		} else {
+			mali_gp_job_set_gp_counter_src1((u32)val);
+		}
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_counter_src_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = profiling_counter_src_read,
+	.write = profiling_counter_src_write,
+};
+
+static ssize_t l2_l2x_counter_srcx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 val;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	if (0 == src_id) {
+		val = mali_l2_cache_core_get_counter_src0(l2_core);
+	} else {
+		val = mali_l2_cache_core_get_counter_src1(l2_core);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == val) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	mali_l2_cache_core_set_counter_src(l2_core, src_id, (u32)val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_all_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	long val;
+	int ret;
+	u32 l2_id;
+	struct mali_l2_cache_core *l2_cache;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	l2_id = 0;
+	l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	while (NULL != l2_cache) {
+		mali_l2_cache_core_set_counter_src(l2_cache, src_id, (u32)val);
+
+		/* try next L2 */
+		l2_id++;
+		l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_l2x_counter_src0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_l2x_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_all_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_all_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src0_read,
+	.write = l2_l2x_counter_src0_write,
+};
+
+static const struct file_operations l2_l2x_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src1_read,
+	.write = l2_l2x_counter_src1_write,
+};
+
+static const struct file_operations l2_all_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src0_write,
+};
+
+static const struct file_operations l2_all_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src1_write,
+};
+
+static ssize_t l2_l2x_counter_valx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 src0 = 0;
+	u32 val0 = 0;
+	u32 src1 = 0;
+	u32 val1 = 0;
+	u32 val = -1;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	mali_l2_cache_core_get_counter_values(l2_core, &src0, &val0, &src1, &val1);
+
+	if (0 == src_id) {
+		if (MALI_HW_CORE_NO_COUNTER != val0) {
+			val = val0;
+		}
+	} else {
+		if (MALI_HW_CORE_NO_COUNTER != val1) {
+			val = val1;
+		}
+	}
+
+	r = snprintf(buf, 64, "%u\n", val);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_val0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_val1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_val0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val0_read,
+};
+
+static const struct file_operations l2_l2x_counter_val1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val1_read,
+};
+
+static ssize_t power_always_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (0 != ret) {
+		return ret;
+	}
+
+	/* Update setting (not exactly thread safe) */
+	if (1 == val && MALI_FALSE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_TRUE;
+		_mali_osk_pm_dev_ref_get_sync();
+	} else if (0 == val && MALI_TRUE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_FALSE;
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t power_always_on_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (MALI_TRUE == power_always_on_enabled) {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "1\n", 2);
+	} else {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "0\n", 2);
+	}
+}
+
+static const struct file_operations power_always_on_fops = {
+	.owner = THIS_MODULE,
+	.read  = power_always_on_read,
+	.write = power_always_on_write,
+};
+
+static ssize_t power_power_events_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_SUSPEND], strlen(mali_power_events[_MALI_DEVICE_SUSPEND]) - 1)) {
+		mali_pm_os_suspend(MALI_TRUE);
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_RESUME], strlen(mali_power_events[_MALI_DEVICE_RESUME]) - 1)) {
+		mali_pm_os_resume();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_PAUSE], strlen(mali_power_events[_MALI_DEVICE_DVFS_PAUSE]) - 1)) {
+		mali_dev_pause();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_RESUME], strlen(mali_power_events[_MALI_DEVICE_DVFS_RESUME]) - 1)) {
+		mali_dev_resume();
+	}
+	*ppos += cnt;
+	return cnt;
+}
+
+static loff_t power_power_events_seek(struct file *file, loff_t offset, int orig)
+{
+	file->f_pos = offset;
+	return 0;
+}
+
+static const struct file_operations power_power_events_fops = {
+	.owner = THIS_MODULE,
+	.write = power_power_events_write,
+	.llseek = power_power_events_seek,
+};
+
+#if MALI_STATE_TRACKING
+static int mali_seq_internal_state_show(struct seq_file *seq_file, void *v)
+{
+	u32 len = 0;
+	u32 size;
+	char *buf;
+
+	size = seq_get_buf(seq_file, &buf);
+
+	if (!size) {
+		return -ENOMEM;
+	}
+
+	/* Create the internal state dump. */
+	len  = snprintf(buf + len, size - len, "Mali device driver %s\n", SVN_REV_STRING);
+	len += snprintf(buf + len, size - len, "License: %s\n\n", MALI_KERNEL_LINUX_LICENSE);
+
+	len += _mali_kernel_core_dump_state(buf + len, size - len);
+
+	seq_commit(seq_file, len);
+
+	return 0;
+}
+
+static int mali_seq_internal_state_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mali_seq_internal_state_show, NULL);
+}
+
+static const struct file_operations mali_seq_internal_state_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_seq_internal_state_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif /* MALI_STATE_TRACKING */
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+static ssize_t profiling_record_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = snprintf(buf, 64, "%u\n", _mali_internal_profiling_is_recording() ? 1 : 0);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_record_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	unsigned long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val != 0) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* This can be made configurable at a later stage if we need to */
+
+		/* check if we are already recording */
+		if (MALI_TRUE == _mali_internal_profiling_is_recording()) {
+			MALI_DEBUG_PRINT(3, ("Recording of profiling events already in progress\n"));
+			return -EFAULT;
+		}
+
+		/* check if we need to clear out an old recording first */
+		if (MALI_TRUE == _mali_internal_profiling_have_recording()) {
+			if (_MALI_OSK_ERR_OK != _mali_internal_profiling_clear()) {
+				MALI_DEBUG_PRINT(3, ("Failed to clear existing recording of profiling events\n"));
+				return -EFAULT;
+			}
+		}
+
+		/* start recording profiling data */
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) {
+			MALI_DEBUG_PRINT(3, ("Failed to start recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Profiling recording started (max %u events)\n", limit));
+	} else {
+		/* stop recording profiling data */
+		u32 count = 0;
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_stop(&count)) {
+			MALI_DEBUG_PRINT(2, ("Failed to stop recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Profiling recording stopped (recorded %u events)\n", count));
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_record_fops = {
+	.owner = THIS_MODULE,
+	.read  = profiling_record_read,
+	.write = profiling_record_write,
+};
+
+static void *profiling_events_start(struct seq_file *s, loff_t *pos)
+{
+	loff_t *spos;
+
+	/* check if we have data avaiable */
+	if (MALI_TRUE != _mali_internal_profiling_have_recording()) {
+		return NULL;
+	}
+
+	spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
+	if (NULL == spos) {
+		return NULL;
+	}
+
+	*spos = *pos;
+	return spos;
+}
+
+static void *profiling_events_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	loff_t *spos = v;
+
+	/* check if we have data avaiable */
+	if (MALI_TRUE != _mali_internal_profiling_have_recording()) {
+		return NULL;
+	}
+
+	/* check if the next entry actually is avaiable */
+	if (_mali_internal_profiling_get_count() <= (u32)(*spos + 1)) {
+		return NULL;
+	}
+
+	*pos = ++*spos;
+	return spos;
+}
+
+static void profiling_events_stop(struct seq_file *s, void *v)
+{
+	kfree(v);
+}
+
+static int profiling_events_show(struct seq_file *seq_file, void *v)
+{
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) * spos;
+
+	/* Retrieve all events */
+	if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, &timestamp, &event_id, data)) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u\n", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int profiling_events_show_human_readable(struct seq_file *seq_file, void *v)
+{
+#define MALI_EVENT_ID_IS_HW(event_id) (((event_id & 0x00FF0000) >= MALI_PROFILING_EVENT_CHANNEL_GP0) && ((event_id & 0x00FF0000) <= MALI_PROFILING_EVENT_CHANNEL_PP7))
+
+	static u64 start_time = 0;
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) * spos;
+
+	/* Retrieve all events */
+	if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, &timestamp, &event_id, data)) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u # ", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+
+		if (0 == index) {
+			start_time = timestamp;
+		}
+
+		seq_printf(seq_file, "[%06u] ", index);
+
+		switch (event_id & 0x0F000000) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			seq_printf(seq_file, "SINGLE | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			seq_printf(seq_file, "START | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			seq_printf(seq_file, "STOP | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_SUSPEND:
+			seq_printf(seq_file, "SUSPEND | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_RESUME:
+			seq_printf(seq_file, "RESUME | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%01X | ", (event_id & 0x0F000000) >> 24);
+			break;
+		}
+
+		switch (event_id & 0x00FF0000) {
+		case MALI_PROFILING_EVENT_CHANNEL_SOFTWARE:
+			seq_printf(seq_file, "SW | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GP0:
+			seq_printf(seq_file, "GP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP0:
+			seq_printf(seq_file, "PP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP1:
+			seq_printf(seq_file, "PP1 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP2:
+			seq_printf(seq_file, "PP2 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP3:
+			seq_printf(seq_file, "PP3 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP4:
+			seq_printf(seq_file, "PP4 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP5:
+			seq_printf(seq_file, "PP5 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP6:
+			seq_printf(seq_file, "PP6 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP7:
+			seq_printf(seq_file, "PP7 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GPU:
+			seq_printf(seq_file, "GPU | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%02X | ", (event_id & 0x00FF0000) >> 16);
+			break;
+		}
+
+		if (MALI_EVENT_ID_IS_HW(event_id)) {
+			if (((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_START) || ((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_STOP)) {
+				switch (event_id & 0x0000FFFF) {
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL:
+					seq_printf(seq_file, "PHYSICAL | ");
+					break;
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL:
+					seq_printf(seq_file, "VIRTUAL | ");
+					break;
+				default:
+					seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+					break;
+				}
+			} else {
+				seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+			}
+		} else {
+			seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+		}
+
+		seq_printf(seq_file, "T0 + 0x%016llX\n", timestamp - start_time);
+
+		return 0;
+	}
+
+	return 0;
+}
+
+static const struct seq_operations profiling_events_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show
+};
+
+static int profiling_events_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_seq_ops);
+}
+
+static const struct file_operations profiling_events_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static const struct seq_operations profiling_events_human_readable_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show_human_readable
+};
+
+static int profiling_events_human_readable_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_human_readable_seq_ops);
+}
+
+static const struct file_operations profiling_events_human_readable_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_human_readable_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+static int memory_debugfs_show(struct seq_file *s, void *private_data)
+{
+	seq_printf(s, "  %-25s  %-10s %-25s %-10s  %-15s  %-15s  %-10s  %-10s\n"\
+		   "=======================================================================================================================================\n",
+		   "Name (:bytes)", "pid", "pid-name", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem");
+	mali_session_memory_tracking(s);
+	return 0;
+}
+
+static int memory_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, memory_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations memory_usage_fops = {
+	.owner = THIS_MODULE,
+	.open = memory_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static ssize_t utilization_gp_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_gp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+
+static const struct file_operations utilization_gp_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_pp_read,
+};
+
+static const struct file_operations utilization_gp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_read,
+};
+
+static const struct file_operations utilization_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_pp_read,
+};
+
+static ssize_t user_settings_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	_mali_uk_user_setting_t setting;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (0 != ret) {
+		return ret;
+	}
+
+	/* Update setting */
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	mali_set_user_setting(setting, val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t user_settings_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 value;
+	_mali_uk_user_setting_t setting;
+
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	value = mali_get_user_setting(setting);
+
+	r = snprintf(buf, 64, "%u\n", value);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static const struct file_operations user_settings_fops = {
+	.owner = THIS_MODULE,
+	.open = open_copy_private_data,
+	.read = user_settings_read,
+	.write = user_settings_write,
+};
+
+static int mali_sysfs_user_settings_register(void)
+{
+	struct dentry *mali_user_settings_dir = debugfs_create_dir("userspace_settings", mali_debugfs_dir);
+
+	if (mali_user_settings_dir != NULL) {
+		long i;
+		for (i = 0; i < _MALI_UK_USER_SETTING_MAX; i++) {
+			debugfs_create_file(_mali_uk_user_setting_descriptions[i],
+					    0600, mali_user_settings_dir, (void *)i,
+					    &user_settings_fops);
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t pp_num_cores_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (0 != ret) {
+		return -EINVAL;
+	}
+
+	ret = mali_executor_set_perf_level(val, MALI_TRUE); /* override even if core scaling is disabled */
+	if (ret) {
+		return ret;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_num_cores_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_enabled());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_num_cores_enabled_write,
+	.read = pp_num_cores_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t pp_num_cores_total_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_total());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_total_fops = {
+	.owner = THIS_MODULE,
+	.read = pp_num_cores_total_read,
+};
+
+static ssize_t pp_core_scaling_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (0 != ret) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_core_scaling_enable();
+		break;
+	case 0:
+		mali_executor_core_scaling_disable();
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_core_scaling_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	return simple_read_from_buffer(buf, count, offp, mali_executor_core_scaling_is_enabled() ? "1\n" : "0\n", 2);
+}
+static const struct file_operations pp_core_scaling_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_core_scaling_enabled_write,
+	.read = pp_core_scaling_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t version_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r = 0;
+	char buffer[64];
+
+	switch (mali_kernel_core_get_product_id()) {
+	case _MALI_PRODUCT_ID_MALI200:
+		r = snprintf(buffer, 64, "Mali-200\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI300:
+		r = snprintf(buffer, 64, "Mali-300\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI400:
+		r = snprintf(buffer, 64, "Mali-400 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI450:
+		r = snprintf(buffer, 64, "Mali-450 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_UNKNOWN:
+		return -EINVAL;
+		break;
+	};
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations version_fops = {
+	.owner = THIS_MODULE,
+	.read = version_read,
+};
+
+#if defined(DEBUG)
+static int timeline_debugfs_show(struct seq_file *s, void *private_data)
+{
+	struct mali_session_data *session, *tmp;
+	u32 session_seq = 1;
+
+	seq_printf(s, "timeline system info: \n=================\n\n");
+
+	mali_session_lock();
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		seq_printf(s, "session %d <%p> start:\n", session_seq, session);
+		mali_timeline_debug_print_system(session->timeline_system, s);
+		seq_printf(s, "session %d end\n\n\n", session_seq++);
+	}
+	mali_session_unlock();
+
+	return 0;
+}
+
+static int timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, timeline_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations timeline_dump_fops = {
+	.owner = THIS_MODULE,
+	.open = timeline_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release
+};
+#endif
+
+int mali_sysfs_register(const char *mali_dev_name)
+{
+	mali_debugfs_dir = debugfs_create_dir(mali_dev_name, NULL);
+	if (ERR_PTR(-ENODEV) == mali_debugfs_dir) {
+		/* Debugfs not supported. */
+		mali_debugfs_dir = NULL;
+	} else {
+		if (NULL != mali_debugfs_dir) {
+			/* Debugfs directory created successfully; create files now */
+			struct dentry *mali_power_dir;
+			struct dentry *mali_gp_dir;
+			struct dentry *mali_pp_dir;
+			struct dentry *mali_l2_dir;
+			struct dentry *mali_profiling_dir;
+
+			debugfs_create_file("version", 0400, mali_debugfs_dir, NULL, &version_fops);
+
+			mali_power_dir = debugfs_create_dir("power", mali_debugfs_dir);
+			if (mali_power_dir != NULL) {
+				debugfs_create_file("always_on", 0600, mali_power_dir, NULL, &power_always_on_fops);
+				debugfs_create_file("power_events", 0200, mali_power_dir, NULL, &power_power_events_fops);
+			}
+
+			mali_gp_dir = debugfs_create_dir("gp", mali_debugfs_dir);
+			if (mali_gp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+					if (NULL != gp_core) {
+						struct dentry *mali_gp_gpx_dir;
+						mali_gp_gpx_dir = debugfs_create_dir("gp0", mali_gp_dir);
+						if (NULL != mali_gp_gpx_dir) {
+							debugfs_create_file("base_addr", 0400, mali_gp_gpx_dir, &gp_core->hw_core, &hw_core_base_addr_fops);
+							debugfs_create_file("enabled", 0600, mali_gp_gpx_dir, group, &group_enabled_fops);
+						}
+						break; /* no need to look for any other GP cores */
+					}
+
+				}
+			}
+
+			mali_pp_dir = debugfs_create_dir("pp", mali_debugfs_dir);
+			if (mali_pp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				debugfs_create_file("num_cores_total", 0400, mali_pp_dir, NULL, &pp_num_cores_total_fops);
+				debugfs_create_file("num_cores_enabled", 0600, mali_pp_dir, NULL, &pp_num_cores_enabled_fops);
+				debugfs_create_file("core_scaling_enabled", 0600, mali_pp_dir, NULL, &pp_core_scaling_enabled_fops);
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+					if (NULL != pp_core) {
+						char buf[16];
+						struct dentry *mali_pp_ppx_dir;
+						_mali_osk_snprintf(buf, sizeof(buf), "pp%u", mali_pp_core_get_id(pp_core));
+						mali_pp_ppx_dir = debugfs_create_dir(buf, mali_pp_dir);
+						if (NULL != mali_pp_ppx_dir) {
+							debugfs_create_file("base_addr", 0400, mali_pp_ppx_dir, &pp_core->hw_core, &hw_core_base_addr_fops);
+							if (!mali_group_is_virtual(group)) {
+								debugfs_create_file("enabled", 0600, mali_pp_ppx_dir, group, &group_enabled_fops);
+							}
+						}
+					}
+				}
+			}
+
+			mali_l2_dir = debugfs_create_dir("l2", mali_debugfs_dir);
+			if (mali_l2_dir != NULL) {
+				struct dentry *mali_l2_all_dir;
+				u32 l2_id;
+				struct mali_l2_cache_core *l2_cache;
+
+				mali_l2_all_dir = debugfs_create_dir("all", mali_l2_dir);
+				if (mali_l2_all_dir != NULL) {
+					debugfs_create_file("counter_src0", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src0_fops);
+					debugfs_create_file("counter_src1", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src1_fops);
+				}
+
+				l2_id = 0;
+				l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				while (NULL != l2_cache) {
+					char buf[16];
+					struct dentry *mali_l2_l2x_dir;
+					_mali_osk_snprintf(buf, sizeof(buf), "l2%u", l2_id);
+					mali_l2_l2x_dir = debugfs_create_dir(buf, mali_l2_dir);
+					if (NULL != mali_l2_l2x_dir) {
+						debugfs_create_file("counter_src0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src0_fops);
+						debugfs_create_file("counter_src1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src1_fops);
+						debugfs_create_file("counter_val0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val0_fops);
+						debugfs_create_file("counter_val1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val1_fops);
+						debugfs_create_file("base_addr", 0400, mali_l2_l2x_dir, &l2_cache->hw_core, &hw_core_base_addr_fops);
+					}
+
+					/* try next L2 */
+					l2_id++;
+					l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				}
+			}
+
+			debugfs_create_file("gpu_memory", 0444, mali_debugfs_dir, NULL, &memory_usage_fops);
+
+			debugfs_create_file("utilization_gp_pp", 0400, mali_debugfs_dir, NULL, &utilization_gp_pp_fops);
+			debugfs_create_file("utilization_gp", 0400, mali_debugfs_dir, NULL, &utilization_gp_fops);
+			debugfs_create_file("utilization_pp", 0400, mali_debugfs_dir, NULL, &utilization_pp_fops);
+
+			mali_profiling_dir = debugfs_create_dir("profiling", mali_debugfs_dir);
+			if (mali_profiling_dir != NULL) {
+				u32 max_sub_jobs;
+				long i;
+				struct dentry *mali_profiling_gp_dir;
+				struct dentry *mali_profiling_pp_dir;
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				struct dentry *mali_profiling_proc_dir;
+#endif
+				/*
+				 * Create directory where we can set GP HW counters.
+				 */
+				mali_profiling_gp_dir = debugfs_create_dir("gp", mali_profiling_dir);
+				if (mali_profiling_gp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(1), &profiling_counter_src_fops);
+				}
+
+				/*
+				 * Create directory where we can set PP HW counters.
+				 * Possible override with specific HW counters for a particular sub job
+				 * (Disable core scaling before using the override!)
+				 */
+				mali_profiling_pp_dir = debugfs_create_dir("pp", mali_profiling_dir);
+				if (mali_profiling_pp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(1), &profiling_counter_src_fops);
+				}
+
+				max_sub_jobs = mali_executor_get_num_cores_total();
+				for (i = 0; i < max_sub_jobs; i++) {
+					char buf[16];
+					struct dentry *mali_profiling_pp_x_dir;
+					_mali_osk_snprintf(buf, sizeof(buf), "%u", i);
+					mali_profiling_pp_x_dir = debugfs_create_dir(buf, mali_profiling_pp_dir);
+					if (NULL != mali_profiling_pp_x_dir) {
+						debugfs_create_file("counter_src0",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(0, i),
+								    &profiling_counter_src_fops);
+						debugfs_create_file("counter_src1",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(1, i),
+								    &profiling_counter_src_fops);
+					}
+				}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				mali_profiling_proc_dir = debugfs_create_dir("proc", mali_profiling_dir);
+				if (mali_profiling_proc_dir != NULL) {
+					struct dentry *mali_profiling_proc_default_dir = debugfs_create_dir("default", mali_profiling_proc_dir);
+					if (mali_profiling_proc_default_dir != NULL) {
+						debugfs_create_file("enable", 0600, mali_profiling_proc_default_dir, (void *)_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, &user_settings_fops);
+					}
+				}
+				debugfs_create_file("record", 0600, mali_profiling_dir, NULL, &profiling_record_fops);
+				debugfs_create_file("events", 0400, mali_profiling_dir, NULL, &profiling_events_fops);
+				debugfs_create_file("events_human_readable", 0400, mali_profiling_dir, NULL, &profiling_events_human_readable_fops);
+#endif
+			}
+
+#if MALI_STATE_TRACKING
+			debugfs_create_file("state_dump", 0400, mali_debugfs_dir, NULL, &mali_seq_internal_state_fops);
+#endif
+
+#if defined(DEBUG)
+			debugfs_create_file("timeline_dump", 0400, mali_debugfs_dir, NULL, &timeline_dump_fops);
+#endif
+			if (mali_sysfs_user_settings_register()) {
+				/* Failed to create the debugfs entries for the user settings DB. */
+				MALI_DEBUG_PRINT(2, ("Failed to create user setting debugfs files. Ignoring...\n"));
+			}
+		}
+	}
+
+	/* Success! */
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	if (NULL != mali_debugfs_dir) {
+		debugfs_remove_recursive(mali_debugfs_dir);
+	}
+	return 0;
+}
+
+#else /* MALI_LICENSE_IS_GPL */
+
+/* Dummy implementations for non-GPL */
+
+int mali_sysfs_register(struct mali_dev *device, dev_t dev, const char *mali_dev_name)
+{
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	return 0;
+}
+
+#endif /* MALI_LICENSE_IS_GPL */
diff --git a/utgard/r5p1/linux/mali_kernel_sysfs.h b/utgard/r5p1/linux/mali_kernel_sysfs.h
new file mode 100755
index 0000000..a36a0ce
--- /dev/null
+++ b/utgard/r5p1/linux/mali_kernel_sysfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_SYSFS_H__
+#define __MALI_KERNEL_SYSFS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/device.h>
+
+#define MALI_PROC_DIR "driver/mali"
+
+int mali_sysfs_register(const char *mali_dev_name);
+int mali_sysfs_unregister(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/utgard/r5p1/linux/mali_linux_trace.h b/utgard/r5p1/linux/mali_linux_trace.h
new file mode 100755
index 0000000..c6cd2bf
--- /dev/null
+++ b/utgard/r5p1/linux/mali_linux_trace.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#if !defined (MALI_LINUX_TRACE_H) || defined (TRACE_HEADER_MULTI_READ)
+#define MALI_LINUX_TRACE_H
+
+#include <linux/types.h>
+
+#include <linux/stringify.h>
+#include <linux/tracepoint.h>
+
+#undef  TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_SYSTEM_STRING __stringfy(TRACE_SYSTEM)
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+/**
+ * Define the tracepoint used to communicate the status of a GPU. Called
+ * when a GPU turns on or turns off.
+ *
+ * @param event_id The type of the event. This parameter is a bitfield
+ *  encoding the type of the event.
+ *
+ * @param d0 First data parameter.
+ * @param d1 Second data parameter.
+ * @param d2 Third data parameter.
+ * @param d3 Fourth data parameter.
+ * @param d4 Fifth data parameter.
+ */
+TRACE_EVENT(mali_timeline_event,
+
+	    TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1,
+		     unsigned int d2, unsigned int d3, unsigned int d4),
+
+	    TP_ARGS(event_id, d0, d1, d2, d3, d4),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, event_id)
+		    __field(unsigned int, d0)
+		    __field(unsigned int, d1)
+		    __field(unsigned int, d2)
+		    __field(unsigned int, d3)
+		    __field(unsigned int, d4)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->event_id = event_id;
+		    __entry->d0 = d0;
+		    __entry->d1 = d1;
+		    __entry->d2 = d2;
+		    __entry->d3 = d3;
+		    __entry->d4 = d4;
+	    ),
+
+	    TP_printk("event=%d", __entry->event_id)
+	   );
+
+/**
+ * Define a tracepoint used to regsiter the value of a hardware counter.
+ * Hardware counters belonging to the vertex or fragment processor are
+ * reported via this tracepoint each frame, whilst L2 cache hardware
+ * counters are reported continuously.
+ *
+ * @param counter_id The counter ID.
+ * @param value The value of the counter.
+ */
+TRACE_EVENT(mali_hw_counter,
+
+	    TP_PROTO(unsigned int counter_id, unsigned int value),
+
+	    TP_ARGS(counter_id, value),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, counter_id)
+		    __field(unsigned int, value)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->counter_id = counter_id;
+	    ),
+
+	    TP_printk("event %d = %d", __entry->counter_id, __entry->value)
+	   );
+
+/**
+ * Define a tracepoint used to send a bundle of software counters.
+ *
+ * @param counters The bundle of counters.
+ */
+TRACE_EVENT(mali_sw_counters,
+
+	    TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters),
+
+	    TP_ARGS(pid, tid, surface_id, counters),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(pid_t, tid)
+		    __field(void *, surface_id)
+		    __field(unsigned int *, counters)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->tid = tid;
+		    __entry->surface_id = surface_id;
+		    __entry->counters = counters;
+	    ),
+
+	    TP_printk("counters were %s", __entry->counters == NULL ? "NULL" : "not NULL")
+	   );
+
+/**
+ * Define a tracepoint used to gather core activity for systrace
+ * @param pid The process id for which the core activity originates from
+ * @param active If the core is active (1) or not (0)
+ * @param core_type The type of core active, either GP (1) or PP (0)
+ * @param core_id The core id that is active for the core_type
+ * @param frame_builder_id The frame builder id associated with this core activity
+ * @param flush_id The flush id associated with this core activity
+ */
+TRACE_EVENT(mali_core_active,
+
+	    TP_PROTO(pid_t pid, unsigned int active, unsigned int core_type, unsigned int core_id, unsigned int frame_builder_id, unsigned int flush_id),
+
+	    TP_ARGS(pid, active, core_type, core_id, frame_builder_id, flush_id),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(unsigned int, active)
+		    __field(unsigned int, core_type)
+		    __field(unsigned int, core_id)
+		    __field(unsigned int, frame_builder_id)
+		    __field(unsigned int, flush_id)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->active = active;
+		    __entry->core_type = core_type;
+		    __entry->core_id = core_id;
+		    __entry->frame_builder_id = frame_builder_id;
+		    __entry->flush_id = flush_id;
+	    ),
+
+	    TP_printk("%s|%d|%s%i:%x|%d", __entry->active ? "S" : "F", __entry->pid, __entry->core_type ? "GP" : "PP", __entry->core_id, __entry->flush_id, __entry->frame_builder_id)
+	   );
+
+#endif /* MALI_LINUX_TRACE_H */
+
+/* This part must exist outside the header guard. */
+#include <trace/define_trace.h>
+
diff --git a/utgard/r5p1/linux/mali_memory.c b/utgard/r5p1/linux/mali_memory.c
new file mode 100755
index 0000000..f370094
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_executor.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_util.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_manager.h"
+
+extern unsigned int mali_dedicated_mem_size;
+extern unsigned int mali_shared_mem_size;
+
+/* session->memory_lock must be held when calling this function */
+static void mali_mem_vma_open(struct vm_area_struct *vma)
+{
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+	MALI_DEBUG_PRINT(4, ("Open called on vma %p\n", vma));
+
+	/* If need to share the allocation, add ref_count here */
+	mali_allocation_ref(alloc);
+	return;
+}
+static void mali_mem_vma_close(struct vm_area_struct *vma)
+{
+	/* If need to share the allocation, unref ref_count here */
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+
+	mali_allocation_unref(&alloc);
+	vma->vm_private_data = NULL;
+}
+
+static int mali_mem_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	/* Not support yet */
+	MALI_DEBUG_ASSERT(0);
+	return 0;
+}
+
+static struct vm_operations_struct mali_kernel_vm_ops = {
+	.open = mali_mem_vma_open,
+	.close = mali_mem_vma_close,
+	.fault = mali_mem_vma_fault,
+};
+
+
+/** @ map mali allocation to CPU address
+*
+* Supported backend types:
+* --MALI_MEM_OS
+* -- need to add COW?
+ *Not supported backend types:
+* -_MALI_MEMORY_BIND_BACKEND_UMP
+* -_MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* -_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+*
+*/
+int mali_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct mali_session_data *session;
+	mali_mem_allocation *mali_alloc = NULL;
+	u32 mali_addr = vma->vm_pgoff << PAGE_SHIFT;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret;
+
+	session = (struct mali_session_data *)filp->private_data;
+	if (NULL == session) {
+		MALI_PRINT_ERROR(("mmap called without any session data available\n"));
+		return -EFAULT;
+	}
+
+	MALI_DEBUG_PRINT(4, ("MMap() handler: start=0x%08X, phys=0x%08X, size=0x%08X vma->flags 0x%08x\n",
+			     (unsigned int)vma->vm_start, (unsigned int)(vma->vm_pgoff << PAGE_SHIFT),
+			     (unsigned int)(vma->vm_end - vma->vm_start), vma->vm_flags));
+
+	/* Set some bits which indicate that, the memory is IO memory, meaning
+	 * that no paging is to be performed and the memory should not be
+	 * included in crash dumps. And that the memory is reserved, meaning
+	 * that it's present and can never be paged out (see also previous
+	 * entry)
+	 */
+	vma->vm_flags |= VM_IO;
+	vma->vm_flags |= VM_DONTCOPY;
+	vma->vm_flags |= VM_PFNMAP;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	vma->vm_flags |= VM_RESERVED;
+#else
+	vma->vm_flags |= VM_DONTDUMP;
+	vma->vm_flags |= VM_DONTEXPAND;
+#endif
+
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	vma->vm_ops = &mali_kernel_vm_ops;
+	/* Operations used on any memory system */
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		if (unlikely(mali_addr != mali_vma_node->vm_node.start)) {
+			/* only allow to use start address for mmap */
+			return -EFAULT;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(NULL == mali_vma_node);
+		return -EFAULT;
+	}
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		return -EFAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		ret = mali_mem_os_cpu_map(&mem_bkend->os_mem, vma);
+	} else if (mem_bkend->type == MALI_MEM_BLOCK) {
+		ret = mali_mem_block_cpu_map(mem_bkend, vma);
+	} else {
+		/* Not support yet*/
+		MALI_DEBUG_ASSERT(0);
+		ret = -EFAULT;
+	}
+
+	if (ret != 0)
+		return -EFAULT;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == mali_alloc->magic);
+
+	vma->vm_private_data = (void *)mali_alloc;
+	mali_allocation_ref(mali_alloc);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor)
+{
+	u32 size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic);
+
+	/* Map dma-buf into this session's page tables */
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start, size);
+}
+
+
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags)
+{
+	if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	/* Umap and flush L2 */
+	mali_mmu_pagedir_unmap(session->page_directory, vaddr, size);
+	mali_executor_zap_all_active(session);
+}
+
+u32 _mali_ukk_report_memory_usage(void)
+{
+	u32 sum = 0;
+
+	if (MALI_TRUE == mali_memory_have_dedicated_memory()) {
+		sum += mali_mem_block_allocator_stat();
+	}
+
+	sum += mali_mem_os_stat();
+
+	return sum;
+}
+
+u32 _mali_ukk_report_total_memory_size(void)
+{
+	return mali_dedicated_mem_size + mali_shared_mem_size;
+}
+
+
+/**
+ * Per-session memory descriptor mapping table sizes
+ */
+#define MALI_MEM_DESCRIPTORS_INIT 64
+#define MALI_MEM_DESCRIPTORS_MAX 65536
+
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session_data)
+{
+	MALI_DEBUG_PRINT(5, ("Memory session begin\n"));
+
+	session_data->memory_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+				    _MALI_OSK_LOCK_ORDER_MEM_SESSION);
+
+	if (NULL == session_data->memory_lock) {
+		_mali_osk_free(session_data);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_memory_manager_init(&session_data->allocation_mgr);
+
+	MALI_DEBUG_PRINT(5, ("MMU session begin: success\n"));
+	MALI_SUCCESS;
+}
+
+void mali_memory_session_end(struct mali_session_data *session)
+{
+	MALI_DEBUG_PRINT(3, ("MMU session end\n"));
+
+	if (NULL == session) {
+		MALI_DEBUG_PRINT(1, ("No session data found during session end\n"));
+		return;
+	}
+	/* free allocation */
+	mali_free_session_allocations(session);
+	/* do some check in unint*/
+	mali_memory_manager_uninit(&session->allocation_mgr);
+
+	/* Free the lock */
+	_mali_osk_mutex_term(session->memory_lock);
+
+	return;
+}
+
+_mali_osk_errcode_t mali_memory_initialize(void)
+{
+	idr_init(&mali_backend_idr);
+	mutex_init(&mali_idr_mutex);
+	return mali_mem_os_init();
+}
+
+void mali_memory_terminate(void)
+{
+	mali_mem_os_term();
+	if (mali_memory_have_dedicated_memory()) {
+		mali_mem_block_allocator_destroy();
+	}
+}
+
+
diff --git a/utgard/r5p1/linux/mali_memory.h b/utgard/r5p1/linux/mali_memory.h
new file mode 100755
index 0000000..9ad047c
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_H__
+#define __MALI_MEMORY_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+
+_mali_osk_errcode_t mali_memory_initialize(void);
+void mali_memory_terminate(void);
+
+/** @brief Allocate a page table page
+ *
+ * Allocate a page for use as a page directory or page table. The page is
+ * mapped into kernel space.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise an error code
+ * @param table_page GPU pointer to the allocated page
+ * @param mapping CPU pointer to the mapping of the allocated page
+ */
+MALI_STATIC_INLINE _mali_osk_errcode_t
+mali_mmu_get_table_page(mali_dma_addr *table_page, mali_io_address *mapping)
+{
+	return mali_mem_os_get_table_page(table_page, mapping);
+}
+
+/** @brief Release a page table page
+ *
+ * Release a page table page allocated through \a mali_mmu_get_table_page
+ *
+ * @param pa the GPU address of the page to release
+ */
+MALI_STATIC_INLINE void
+mali_mmu_release_table_page(mali_dma_addr phys, void *virt)
+{
+	mali_mem_os_release_table_page(phys, virt);
+}
+
+/** @brief mmap function
+ *
+ * mmap syscalls on the Mali device node will end up here.
+ *
+ * This function allocates Mali memory and maps it on CPU and Mali.
+ */
+int mali_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/** @brief Start a new memory session
+ *
+ * Called when a process opens the Mali device node.
+ *
+ * @param session Pointer to session to initialize
+ */
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session);
+
+/** @brief Close a memory session
+ *
+ * Called when a process closes the Mali device node.
+ *
+ * Memory allocated by the session will be freed
+ *
+ * @param session Pointer to the session to terminate
+ */
+void mali_memory_session_end(struct mali_session_data *session);
+
+/** @brief Prepare Mali page tables for mapping
+ *
+ * This function will prepare the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ */
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor);
+
+/** @brief Free Mali page tables for mapping
+ *
+ * This function will unmap pages from Mali memory and free the page tables
+ * that are now unused.
+ *
+ * The updated pages in the Mali L2 cache will be invalidated, and the MMU TLBs will be zapped if necessary.
+ *
+ * @param descriptor Pointer to the memory descriptor to unmap
+ */
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags);
+
+
+/** @brief Parse resource and prepare the OS memory allocator
+ *
+ * @param size Maximum size to allocate for Mali GPU.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size);
+
+/** @brief Parse resource and prepare the dedicated memory allocator
+ *
+ * @param start Physical start address of dedicated Mali GPU memory.
+ * @param size Size of dedicated Mali GPU memory.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+
+
+#endif /* __MALI_MEMORY_H__ */
diff --git a/utgard/r5p1/linux/mali_memory_block_alloc.c b/utgard/r5p1/linux/mali_memory_block_alloc.c
new file mode 100755
index 0000000..22a7fdf
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_block_alloc.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_memory.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_osk.h"
+#include <linux/mutex.h>
+
+
+struct mali_block_node *_mali_block_node_allocate(mali_page_node_type type)
+{
+	mali_block_node *block_node = NULL;
+
+	block_node = kzalloc(sizeof(mali_block_node), GFP_KERNEL);
+	MALI_DEBUG_ASSERT(NULL != block_node);
+
+	if (block_node) {
+		block_node->type = type;
+		INIT_LIST_HEAD(&block_node->list);
+	}
+
+	return block_node;
+}
+
+void _mali_block_node_ref(struct mali_block_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_add_ref(node);
+	} else
+		MALI_DEBUG_ASSERT(0);
+}
+
+void _mali_block_node_unref(struct mali_block_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_dec_ref(node);
+	} else
+		MALI_DEBUG_ASSERT(0);
+}
+
+
+
+void _mali_block_node_add_block_item(struct mali_block_node *node, mali_block_item *item)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_BLOCK == node->type);
+	node->blk_it = item;
+}
+
+
+int _mali_block_node_get_ref_count(struct mali_block_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_get_ref_count(node);
+	} else {
+		MALI_DEBUG_ASSERT(0);
+	}
+	return -1;
+}
+
+
+dma_addr_t _mali_block_node_get_phy_addr(struct mali_block_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return _mali_blk_item_get_phy_addr(node->blk_it);
+	} else {
+		MALI_DEBUG_ASSERT(0);
+	}
+	return 0;
+}
+
+
+unsigned long _mali_block_node_get_pfn(struct mali_block_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_BLOCK) {
+		/* get phy addr for BLOCK page*/
+		return _mali_blk_item_get_pfn(node->blk_it);
+	} else {
+		MALI_DEBUG_ASSERT(0);
+	}
+	return 0;
+}
+
+
+static mali_block_allocator *mali_mem_block_gobal_allocator = NULL;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item)
+{
+	return (item->phy_addr & ~(MALI_BLOCK_REF_MASK));
+}
+
+
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item)
+{
+	return (item->phy_addr / MALI_BLOCK_SIZE);
+}
+
+
+u32 mali_mem_block_get_ref_count(mali_block_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	return (node->blk_it->phy_addr & MALI_BLOCK_REF_MASK);
+}
+
+
+/* Increase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+
+u32 mali_mem_block_add_ref(mali_block_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) < MALI_BLOCK_MAX_REF_COUNT);
+	return (node->blk_it->phy_addr++ & MALI_BLOCK_REF_MASK);
+}
+
+/* Decase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+u32 mali_mem_block_dec_ref(mali_block_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) > 0);
+	return (node->blk_it->phy_addr-- & MALI_BLOCK_REF_MASK);
+}
+
+
+static mali_block_allocator *mali_mem_block_allocator_create(u32 base_address, u32 size)
+{
+	mali_block_allocator *info;
+	u32 usable_size;
+	u32 num_blocks;
+	mali_block_node *m_node;
+	mali_block_item *mali_blk_items = NULL;
+	int i = 0;
+
+	usable_size = size & ~(MALI_BLOCK_SIZE - 1);
+	MALI_DEBUG_PRINT(3, ("Mali block allocator create for region starting at 0x%08X length 0x%08X\n", base_address, size));
+	MALI_DEBUG_PRINT(4, ("%d usable bytes\n", usable_size));
+	num_blocks = usable_size / MALI_BLOCK_SIZE;
+	MALI_DEBUG_PRINT(4, ("which becomes %d blocks\n", num_blocks));
+
+	if (usable_size == 0) {
+		MALI_DEBUG_PRINT(1, ("Memory block of size %d is unusable\n", size));
+		return NULL;
+	}
+
+	info = _mali_osk_calloc(1, sizeof(mali_block_allocator));
+	if (NULL != info) {
+		INIT_LIST_HEAD(&info->free);
+		spin_lock_init(&info->sp_lock);
+		info->total_num = num_blocks;
+		mali_blk_items = _mali_osk_calloc(1, sizeof(mali_block_item) * num_blocks);
+
+		if (mali_blk_items) {
+			info->items = mali_blk_items;
+			/* add blocks(4k size) to free list*/
+			for (i = 0 ; i < num_blocks ; i++) {
+				/* add block information*/
+				mali_blk_items[i].phy_addr = base_address + (i * MALI_BLOCK_SIZE);
+				/* add  to free list */
+				m_node = _mali_block_node_allocate(MALI_PAGE_NODE_BLOCK);
+				if (m_node == NULL)
+					goto fail;
+				_mali_block_node_add_block_item(m_node, &(mali_blk_items[i]));
+				list_add_tail(&m_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			}
+			return info;
+		}
+	}
+fail:
+	mali_mem_block_allocator_destroy();
+	return NULL;
+}
+
+void mali_mem_block_allocator_destroy(void)
+{
+	struct mali_block_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(info);
+	MALI_DEBUG_PRINT(4, ("Memory block destroy !\n"));
+
+	if (NULL == info)
+		return;
+
+	list_for_each_entry_safe(m_page, m_tmp , &info->free, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		list_del(&m_page->list);
+		kfree(m_page);
+	}
+
+	_mali_osk_free(info->items);
+	_mali_osk_free(info);
+}
+
+void mali_mem_block_release(mali_mem_backend *mem_bkend)
+{
+	mali_mem_allocation *alloc = mem_bkend->mali_allocation;;
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem: Release size = 0x%x\n", mem_bkend->size));
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_block_mali_unmap(alloc);
+	mali_mem_block_free(&mem_bkend->block_mem);
+}
+
+
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size)
+{
+	struct mali_block_node *m_page, *m_tmp;
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem: Allocate size = 0x%x\n", size));
+	/*do some init */
+	INIT_LIST_HEAD(&block_mem->pfns);
+
+	spin_lock(&info->sp_lock);
+	/*check if have enough space*/
+	if (atomic_read(&info->free_num) > page_count) {
+		list_for_each_entry_safe(m_page, m_tmp , &info->free, list) {
+			if (page_count > 0) {
+				MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+				MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(m_page) == 0);
+				list_move(&m_page->list, &block_mem->pfns);
+				block_mem->count++;
+				atomic_dec(&info->free_num);
+				_mali_block_node_ref(m_page);
+			} else {
+				break;
+			}
+			page_count--;
+		}
+	} else {
+		/* can't allocate from BLOCK memory*/
+		spin_unlock(&info->sp_lock);
+		return -1;
+	}
+
+	spin_unlock(&info->sp_lock);
+	return 0;
+}
+
+void mali_mem_block_free(mali_mem_block_mem *block_mem)
+{
+	mali_mem_block_free_list(&block_mem->pfns);
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem free : size = 0x%x\n", block_mem->count * _MALI_OSK_MALI_PAGE_SIZE));
+	block_mem->count = 0;
+	MALI_DEBUG_ASSERT(list_empty(&block_mem->pfns));
+}
+
+
+void mali_mem_block_free_list(struct list_head *list)
+{
+	struct mali_block_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+
+	if (info) {
+		spin_lock(&info->sp_lock);
+		list_for_each_entry_safe(m_page, m_tmp , list, list) {
+			mali_mem_block_free_node(m_page);
+		}
+		spin_unlock(&info->sp_lock);
+	}
+}
+
+/* free the node,*/
+void mali_mem_block_free_node(struct mali_block_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (1 == _mali_block_node_get_ref_count(node)) {
+			/*Move to free list*/
+			_mali_block_node_unref(node);
+			list_move_tail(&node->list, &info->free);
+			atomic_add(1, &info->free_num);
+		} else {
+			_mali_block_node_unref(node);
+			list_del(&node->list);
+			kfree(node);
+		}
+	}
+}
+
+
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_block_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		phys = _mali_block_node_get_phy_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+		/* Verify that the "physical" address is 32-bit and
+		 * usable for Mali, when on a system with bus addresses
+		 * wider than 32-bit. */
+		MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+		mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	session->mali_mem_array[alloc->type] -= alloc->psize;
+	mali_session_memory_unlock(session);
+}
+
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	int ret;
+	mali_mem_block_mem *block_mem = &mem_bkend->block_mem;
+	unsigned long addr = vma->vm_start;
+	struct mali_block_node *m_page;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		ret = vm_insert_pfn(vma, addr, _mali_block_node_get_pfn(m_page));
+
+		if (unlikely(0 != ret)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size)
+{
+	mali_block_allocator *allocator;
+
+	/* Do the low level linux operation first */
+
+	/* Request ownership of the memory */
+	if (_MALI_OSK_ERR_OK != _mali_osk_mem_reqregion(start, size, "Dedicated Mali GPU memory")) {
+		MALI_DEBUG_PRINT(1, ("Failed to request memory region for frame buffer (0x%08X - 0x%08X)\n", start, start + size - 1));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create generic block allocator object to handle it */
+	allocator = mali_mem_block_allocator_create(start, size);
+
+	if (NULL == allocator) {
+		MALI_DEBUG_PRINT(1, ("Memory bank registration failed\n"));
+		_mali_osk_mem_unreqregion(start, size);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_mem_block_gobal_allocator = (mali_block_allocator *)allocator;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool mali_memory_have_dedicated_memory(void)
+{
+	return mali_mem_block_gobal_allocator ? MALI_TRUE : MALI_FALSE;
+}
+
+u32 mali_mem_block_allocator_stat(void)
+{
+	mali_block_allocator *allocator = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(allocator);
+
+	return (allocator->total_num - atomic_read(&allocator->free_num)) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/utgard/r5p1/linux/mali_memory_block_alloc.h b/utgard/r5p1/linux/mali_memory_block_alloc.h
new file mode 100755
index 0000000..65282a6
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_block_alloc.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2010, 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BLOCK_ALLOCATOR_H__
+#define __MALI_BLOCK_ALLOCATOR_H__
+
+#include "mali_session.h"
+#include "mali_memory.h"
+#include <linux/spinlock.h>
+
+#include "mali_memory_types.h"
+
+#define MALI_BLOCK_SIZE (PAGE_SIZE)  /* 4 kB, manage BLOCK memory as page size */
+#define MALI_BLOCK_REF_MASK (0xFFF)
+#define MALI_BLOCK_MAX_REF_COUNT (0xFFF)
+
+
+
+typedef struct mali_block_allocator {
+	/*
+	* In free list, each node's ref_count is 0,
+	* ref_count added when allocated or referenced in COW
+	*/
+	mali_block_item *items; /* information for each block item*/
+	struct list_head free; /*free list of mali_memory_node*/
+	spinlock_t sp_lock; /*lock for reference count & free list opertion*/
+	u32 total_num; /* Number of total pages*/
+	atomic_t free_num; /*number of free pages*/
+} mali_block_allocator;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item);
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item);
+u32 mali_mem_block_get_ref_count(mali_block_node *node);
+u32 mali_mem_block_add_ref(mali_block_node *node);
+u32 mali_mem_block_dec_ref(mali_block_node *node);
+void mali_mem_block_release(mali_mem_backend *mem_bkend);
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size);
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+mali_bool mali_memory_have_dedicated_memory(void);
+void mali_mem_block_free(mali_mem_block_mem *block_mem);
+void mali_mem_block_free_list(struct list_head *list);
+void mali_mem_block_free_node(struct mali_block_node *node);
+void mali_mem_block_allocator_destroy(void);
+u32 mali_mem_block_allocator_stat(void);
+
+#endif /* __MALI_BLOCK_ALLOCATOR_H__ */
diff --git a/utgard/r5p1/linux/mali_memory_dma_buf.c b/utgard/r5p1/linux/mali_memory_dma_buf.c
new file mode 100755
index 0000000..83890b7
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_dma_buf.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/fs.h>      /* file system operations */
+#include <asm/uaccess.h>        /* user space access */
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+#include <linux/rbtree.h>
+#include <linux/platform_device.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_virtual.h"
+#include "mali_pp_job.h"
+
+static void mali_dma_buf_unmap(mali_mem_allocation *alloc, struct mali_dma_buf_attachment *mem);
+
+void mali_mem_dma_buf_release(mali_mem_backend *mem_backend)
+{
+	struct mali_dma_buf_attachment *mem;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_DMA_BUF == mem_backend->type);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_PRINT(3, ("Mali DMA-buf: release attachment %p\n", mem));
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	MALI_DEBUG_ASSERT_POINTER(mem_backend->mali_allocation);
+	/* We mapped implicitly on attach, so we need to unmap on release */
+	mali_dma_buf_unmap(mem_backend->mali_allocation, mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(mem->wait_queue, !mem->is_mapped);
+	MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+	dma_buf_detach(mem->buf, mem->attachment);
+	dma_buf_put(mem->buf);
+
+	_mali_osk_free(mem);
+}
+
+/*
+ * Map DMA buf attachment \a mem into \a session at virtual address \a virt.
+ */
+static int mali_dma_buf_map(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_dma_buf_attachment *mem;
+	struct  mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	struct scatterlist *sg;
+	u32 virt, flags;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(mem->session == session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	flags = alloc->flags;
+
+	mali_session_memory_lock(session);
+	mem->map_ref++;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: map attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (1 == mem->map_ref) {
+
+		/* First reference taken, so we need to map the dma buf */
+		MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+		mem->sgt = dma_buf_map_attachment(mem->attachment, DMA_BIDIRECTIONAL);
+		if (IS_ERR_OR_NULL(mem->sgt)) {
+			MALI_DEBUG_PRINT_ERROR(("Failed to map dma-buf attachment\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -EFAULT;
+		}
+
+		err = mali_mem_mali_map_prepare(alloc);
+		if (_MALI_OSK_ERR_OK != err) {
+			MALI_DEBUG_PRINT(1, ("Mapping of DMA memory failed\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -ENOMEM;
+		}
+
+		pagedir = mali_session_get_page_directory(session);
+		MALI_DEBUG_ASSERT_POINTER(pagedir);
+
+		for_each_sg(mem->sgt->sgl, sg, mem->sgt->nents, i) {
+			u32 size = sg_dma_len(sg);
+			dma_addr_t phys = sg_dma_address(sg);
+
+			/* sg must be page aligned. */
+			MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE);
+			MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF));
+
+			mali_mmu_pagedir_update(pagedir, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+			virt += size;
+		}
+
+		if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+			u32 guard_phys;
+			MALI_DEBUG_PRINT(7, ("Mapping in extra guard page\n"));
+
+			guard_phys = sg_dma_address(mem->sgt->sgl);
+			mali_mmu_pagedir_update(pagedir, virt, guard_phys, MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+
+		mem->is_mapped = MALI_TRUE;
+		session->mali_mem_array[mem_backend->type] += mem_backend->size;
+		mali_session_memory_unlock(session);
+		/* Wake up any thread waiting for buffer to become mapped */
+		wake_up_all(&mem->wait_queue);
+	} else {
+		MALI_DEBUG_ASSERT(mem->is_mapped);
+		mali_session_memory_unlock(session);
+	}
+
+	return 0;
+}
+
+static void mali_dma_buf_unmap(mali_mem_allocation *alloc, struct mali_dma_buf_attachment *mem)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+
+	mali_session_memory_lock(alloc->session);
+	mem->map_ref--;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: unmap attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (0 == mem->map_ref) {
+		dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL);
+		if (MALI_TRUE == mem->is_mapped) {
+			mali_mem_mali_map_free(alloc->session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+					       alloc->flags);
+		}
+		mem->is_mapped = MALI_FALSE;
+		alloc->session->mali_mem_array[alloc->type] -= alloc->psize;
+	}
+	mali_session_memory_unlock(alloc->session);
+	/* Wake up any thread waiting for buffer to become unmapped */
+	wake_up_all(&mem->wait_queue);
+}
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	_mali_osk_errcode_t err;
+	int i;
+	int ret = 0;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+		if (MALI_MEM_DMA_BUF != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+
+		err = mali_dma_buf_map(mem_bkend);
+		if (0 != err) {
+			MALI_DEBUG_PRINT_ERROR(("Mali DMA-buf: Failed to map dma-buf for mali address %x\n", mali_addr));
+			ret = -EFAULT;
+			continue;
+		}
+	}
+	return ret;
+}
+
+void mali_dma_buf_unmap_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	int i;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+		if (MALI_MEM_DMA_BUF != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+		mali_dma_buf_unmap(mem_bkend->mali_allocation, mem);
+	}
+}
+#endif /* !CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH */
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *user_arg)
+{
+	_mali_uk_dma_buf_get_size_s args;
+	int fd;
+	struct dma_buf *buf;
+
+	/* get call arguments from user space. copy_from_user returns how many bytes which where NOT copied */
+	if (0 != copy_from_user(&args, (void __user *)user_arg, sizeof(_mali_uk_dma_buf_get_size_s))) {
+		return -EFAULT;
+	}
+
+	/* Do DMA-BUF stuff */
+	fd = args.mem_fd;
+
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma-buf from fd: %d\n", fd));
+		return PTR_RET(buf);
+	}
+
+	if (0 != put_user(buf->size, &user_arg->size)) {
+		dma_buf_put(buf);
+		return -EFAULT;
+	}
+
+	dma_buf_put(buf);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_memory_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags)
+{
+	struct dma_buf *buf;
+	struct mali_dma_buf_attachment *dma_mem;
+	struct  mali_session_data *session = alloc->session;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	/* get dma buffer */
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Currently, mapping of the full buffer are supported. */
+	if (alloc->psize != buf->size) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem = _mali_osk_calloc(1, sizeof(struct mali_dma_buf_attachment));
+	if (NULL == dma_mem) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem->buf = buf;
+	dma_mem->session = session;
+	dma_mem->map_ref = 0;
+	init_waitqueue_head(&dma_mem->wait_queue);
+
+	dma_mem->attachment = dma_buf_attach(dma_mem->buf, &mali_platform_device->dev);
+	if (NULL == dma_mem->attachment) {
+		goto failed_dma_attach;
+	}
+
+	mem_backend->dma_buf.attachment = dma_mem;
+
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	/* Map memory into session's Mali virtual address space. */
+	if (0 != mali_dma_buf_map(mem_backend)) {
+		goto Failed_dma_map;
+	}
+#endif
+
+	return _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+Failed_dma_map:
+	mali_dma_buf_unmap(alloc, dma_mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(dma_mem->wait_queue, !dma_mem->is_mapped);
+	MALI_DEBUG_ASSERT(!dma_mem->is_mapped);
+	dma_buf_detach(dma_mem->buf, dma_mem->attachment);
+failed_dma_attach:
+	_mali_osk_free(dma_mem);
+failed_alloc_mem:
+	dma_buf_put(buf);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+void mali_memory_unbind_dma_buf(mali_mem_backend *mem_backend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	mali_mem_dma_buf_release(mem_backend);
+}
+
+
diff --git a/utgard/r5p1/linux/mali_memory_dma_buf.h b/utgard/r5p1/linux/mali_memory_dma_buf.h
new file mode 100755
index 0000000..e8682ee
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_dma_buf.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_DMA_BUF_H__
+#define __MALI_MEMORY_DMA_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+struct mali_pp_job;
+
+struct mali_dma_buf_attachment;
+struct mali_dma_buf_attachment {
+	struct dma_buf *buf;
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct mali_session_data *session;
+	int map_ref;
+	struct mutex map_lock;
+	mali_bool is_mapped;
+	wait_queue_head_t wait_queue;
+};
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *arg);
+
+void mali_mem_dma_buf_release(mali_mem_backend *mem_backend);
+
+_mali_osk_errcode_t mali_memory_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags);
+
+void mali_memory_unbind_dma_buf(mali_mem_backend *mem_backend);
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job);
+void mali_dma_buf_unmap_job(struct mali_pp_job *job);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/utgard/r5p1/linux/mali_memory_external.c b/utgard/r5p1/linux/mali_memory_external.c
new file mode 100755
index 0000000..ff2a5ae
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_external.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_uk_types.h"
+
+void mali_mem_external_release(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT(MALI_MEM_EXTERNAL == mem_backend->type);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	session->mali_mem_array[mem_backend->type] -= mem_backend->size;
+	mali_session_memory_unlock(session);
+}
+
+_mali_osk_errcode_t mali_memory_bind_ext_mem(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag)
+{
+	struct mali_session_data *session;
+	_mali_osk_errcode_t err;
+	u32 virt, phys, size;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	size = alloc->psize;
+	session = (struct mali_session_data *)(uintptr_t)alloc->session;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check arguments */
+	/* NULL might be a valid Mali address */
+	if (!size) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size must be a multiple of the system page size */
+	if (size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* Validate the mali physical range */
+	if (_MALI_OSK_ERR_OK != mali_mem_validation_check(phys_addr, size)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (flag & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mali_session_memory_lock(session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	phys = phys_addr;
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_session_memory_unlock(session);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	mali_mmu_pagedir_update(session->page_directory, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+	if (alloc->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		mali_mmu_pagedir_update(session->page_directory, virt + size, phys, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map physical memory 0x%x-0x%x into virtual memory 0x%x\n",
+			  phys_addr, (phys_addr + size - 1),
+			  virt));
+	session->mali_mem_array[mem_backend->type] += mem_backend->size;
+	mali_session_memory_unlock(session);
+
+	MALI_SUCCESS;
+}
+
+
+void mali_memory_unbind_ext_mem(mali_mem_backend *mem_backend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	mali_mem_external_release(mem_backend);
+}
+
diff --git a/utgard/r5p1/linux/mali_memory_external.h b/utgard/r5p1/linux/mali_memory_external.h
new file mode 100644
index 0000000..9e9a9e9
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_external.h
@@ -0,0 +1,34 @@
+
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_EXTERNAL_H__
+#define __MALI_MEMORY_EXTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_mali_osk_errcode_t mali_memory_bind_ext_mem(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag);
+
+void mali_memory_unbind_ext_mem(mali_mem_backend *mem_backend);
+
+void mali_mem_external_release(mali_mem_backend *mem_backend);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/utgard/r5p1/linux/mali_memory_manager.c b/utgard/r5p1/linux/mali_memory_manager.c
new file mode 100644
index 0000000..510c82e
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_manager.c
@@ -0,0 +1,543 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_util.h"
+#include "mali_memory_external.h"
+#include "mali_memory_block_alloc.h"
+
+#define MALI_S32_MAX 0x7fffffff
+
+/*
+* New memory system interface
+*/
+
+/*inti idr for backend memory */
+struct idr mali_backend_idr;
+struct mutex mali_idr_mutex;
+
+/* init allocation manager */
+int mali_memory_manager_init(struct mali_allocation_manager *mgr)
+{
+	/* init Locks */
+	rwlock_init(&mgr->vm_lock);
+	mutex_init(&mgr->list_mutex);
+
+	/* init link */
+	INIT_LIST_HEAD(&mgr->head);
+
+	/* init RB tree */
+	mgr->allocation_mgr_rb = RB_ROOT;
+	mgr->mali_allocation_nr = 0;
+	return 0;
+}
+
+/* deinit allocation manager
+* Do some check for debug
+*/
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr)
+{
+	/* check RB tree is empty */
+	MALI_DEBUG_ASSERT(((void *)(mgr->allocation_mgr_rb.rb_node) == (void *)rb_last(&mgr->allocation_mgr_rb)));
+	/* check allocation List */
+	MALI_DEBUG_ASSERT(list_empty(&mgr->head));
+}
+
+/* Prepare memory descriptor */
+static mali_mem_allocation *mali_mem_allocation_struct_create(struct mali_session_data *session)
+{
+	mali_mem_allocation *mali_allocation;
+
+	/* Allocate memory */
+	mali_allocation = (mali_mem_allocation *)kzalloc(sizeof(mali_mem_allocation), GFP_KERNEL);
+	if (NULL == mali_allocation) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_allocation_struct_create: descriptor was NULL\n"));
+		return NULL;
+	}
+
+	MALI_DEBUG_CODE(mali_allocation->magic = MALI_MEM_ALLOCATION_VALID_MAGIC);
+
+	/* do init */
+	mali_allocation->flags = 0;
+	mali_allocation->session = session;
+
+	INIT_LIST_HEAD(&mali_allocation->list);
+	kref_init(&mali_allocation->ref);
+
+	/**
+	*add to session list
+	*/
+	mutex_lock(&session->allocation_mgr.list_mutex);
+	list_add_tail(&mali_allocation->list, &session->allocation_mgr.head);
+	session->allocation_mgr.mali_allocation_nr++;
+	mutex_unlock(&session->allocation_mgr.list_mutex);
+
+	return mali_allocation;
+}
+
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+	mutex_lock(&alloc->session->allocation_mgr.list_mutex);
+	list_del(&alloc->list);
+	alloc->session->allocation_mgr.mali_allocation_nr--;
+	mutex_unlock(&alloc->session->allocation_mgr.list_mutex);
+
+	kfree(alloc);
+}
+
+int mali_mem_backend_struct_create(mali_mem_backend **backend, u32 psize)
+{
+	mali_mem_backend *mem_backend = NULL;
+	s32 ret = -ENOSPC;
+	s32 index = -1;
+	*backend = (mali_mem_backend *)kzalloc(sizeof(mali_mem_backend), GFP_KERNEL);
+	if (NULL == *backend) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: backend descriptor was NULL\n"));
+		return -1;
+	}
+	mem_backend = *backend;
+	mem_backend->size = psize;
+	/* link backend with id */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
+again:
+	if (!idr_pre_get(&mali_backend_idr, GFP_KERNEL)) {
+		kfree(mem_backend);
+		return -ENOMEM;
+	}
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_get_new_above(&mali_backend_idr, mem_backend, 1, &index);
+	mutex_unlock(&mali_idr_mutex);
+
+	if (-ENOSPC == ret) {
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+	if (-EAGAIN == ret)
+		goto again;
+#else
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_alloc(&mali_backend_idr, mem_backend, 1, MALI_S32_MAX, GFP_KERNEL);
+	mutex_unlock(&mali_idr_mutex);
+	index = ret;
+	if (ret < 0) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: Can't allocate idr for backend! \n"));
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+#endif
+	return index;
+}
+
+static void mali_mem_backend_struct_destory(mali_mem_backend **backend, s32 backend_handle)
+{
+	mali_mem_backend *mem_backend = *backend;
+
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_backend);
+	*backend = NULL;
+}
+
+
+/* Set GPU MMU properties */
+static void _mali_memory_gpu_map_property_set(u32 *properties, u32 flags)
+{
+
+	if (_MALI_MEMORY_GPU_READ_ALLOCATE & flags) {
+		*properties = MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE;
+	} else {
+		*properties = MALI_MMU_FLAGS_DEFAULT;
+	}
+}
+
+
+/**
+*  function@_mali_ukk_mem_allocate - allocate mali memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args)
+{
+	struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_allocate, vaddr=0x%x, size =0x%x! \n", args->gpu_vaddr, args->psize));
+
+	/* Check if the address is allocated
+	*  Can we trust User mode?
+	*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->gpu_vaddr, 0);
+	if (unlikely(mali_vma_node)) {
+		/* Not support yet */
+		MALI_DEBUG_ASSERT(0);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/**
+	*create mali memory allocation
+	*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_allocate: Failed to create allocation struct! \n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->psize;
+	mali_allocation->vsize = args->vsize;
+
+	/* check if have dedicated memory */
+	if (MALI_TRUE == mali_memory_have_dedicated_memory()) {
+		mali_allocation->type = MALI_MEM_BLOCK;
+	} else {
+		mali_allocation->type = MALI_MEM_OS;
+	}
+
+	/**
+	*add allocation node to RB tree for index
+	*/
+	mali_allocation->mali_vma_node.vm_node.start = args->gpu_vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->vsize;
+
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* check if need to allocate backend */
+	if (mali_allocation->psize == 0)
+		return _MALI_OSK_ERR_OK;
+
+	/**
+	*allocate physical backend & pages
+	*/
+	if (likely(mali_allocation->psize > 0)) {
+		mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, args->psize);
+		if (mali_allocation->backend_handle < 0) {
+			ret = _MALI_OSK_ERR_NOMEM;
+			MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n"));
+			goto failed_alloc_backend;
+		}
+
+		mem_backend->mali_allocation = mali_allocation;
+		mem_backend->type = mali_allocation->type;
+
+		if (mem_backend->type == MALI_MEM_OS) {
+			retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+		} else if (mem_backend->type == MALI_MEM_BLOCK) {
+			/* try to allocated from BLOCK memory first, then try OS memory if failed.*/
+			if (mali_mem_block_alloc(&mem_backend->block_mem, mem_backend->size)) {
+				retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+				mem_backend->type = MALI_MEM_OS;
+				mali_allocation->type = MALI_MEM_OS;
+			}
+		} else {
+			/* ONLY support mem_os type */
+			MALI_DEBUG_ASSERT(0);
+		}
+
+		if (retval) {
+			ret = _MALI_OSK_ERR_NOMEM;
+			MALI_DEBUG_PRINT(1, (" can't allocate enough pages! \n"));
+			goto failed_alloc_pages;
+		}
+	}
+
+	/**
+	*map to GPU side
+	*/
+	mali_allocation->mali_mapping.addr = args->gpu_vaddr;
+
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+		/* Map on Mali */
+		ret = mali_mem_mali_map_prepare(mali_allocation);
+		if (0 != ret) {
+			MALI_DEBUG_PRINT(1, (" prepare map fail! \n"));
+			goto failed_gpu_map;
+		}
+		/* only support os memory type now */
+		if (mem_backend->type == MALI_MEM_OS) {
+			mali_mem_os_mali_map(mem_backend, args->gpu_vaddr,
+					     mali_allocation->mali_mapping.properties);
+		} else if (mem_backend->type == MALI_MEM_BLOCK) {
+			mali_mem_block_mali_map(&mem_backend->block_mem, session, args->gpu_vaddr,
+						mali_allocation->mali_mapping.properties);
+		} else {
+			/* Not support yet */
+			MALI_DEBUG_ASSERT(0);
+		}
+		session->mali_mem_array[mem_backend->type] += mem_backend->size;
+		if (session->mali_mem_array[MALI_MEM_OS] + session->mali_mem_array[MALI_MEM_BLOCK] > session->max_mali_mem_allocated) {
+			session->max_mali_mem_allocated = session->mali_mem_array[MALI_MEM_OS] + session->mali_mem_array[MALI_MEM_BLOCK];
+		}
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+	if (mem_backend->type == MALI_MEM_OS) {
+		mali_mem_os_free(&mem_backend->os_mem);
+	} else {
+		mali_mem_block_free(&mem_backend->block_mem);
+	}
+failed_alloc_pages:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	u32 vaddr = args->gpu_vaddr;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, vaddr, 0);
+
+	MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+
+	if (mali_alloc)
+		/* check ref_count */
+		mali_allocation_unref(&mali_alloc);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Function _mali_ukk_mem_bind -- bind a external memory to a new GPU address
+* It will allocate a new mem allocation and bind external memory to it.
+* Supported backend type are:
+* _MALI_MEMORY_BIND_BACKEND_UMP
+* _MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+* CPU access is not supported yet
+*/
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_bind, vaddr=0x%x, size =0x%x! \n", args->vaddr, args->size));
+
+	/**
+	* allocate mali allocation.
+	*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->size;
+	mali_allocation->vsize = args->size;
+	mali_allocation->mali_mapping.addr = args->vaddr;
+
+	/* add allocation node to RB tree for index  */
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->size;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* allocate backend*/
+	if (mali_allocation->psize > 0) {
+		mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+		if (mali_allocation->backend_handle < 0) {
+			goto Failed_alloc_backend;
+		}
+
+	} else {
+		goto Failed_alloc_backend;
+	}
+
+	mem_backend->size = mali_allocation->psize;
+	mem_backend->mali_allocation = mali_allocation;
+
+	switch (args->flags & _MALI_MEMORY_BIND_BACKEND_MASK) {
+	case  _MALI_MEMORY_BIND_BACKEND_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_allocation->type = MALI_MEM_UMP;
+		mem_backend->type = MALI_MEM_UMP;
+		ret = mali_memory_bind_ump_buf(mali_allocation, mem_backend,
+					       args->mem_union.bind_ump.secure_id, args->mem_union.bind_ump.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind ump buf failed\n"));
+			goto  Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("UMP not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case  _MALI_MEMORY_BIND_BACKEND_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_allocation->type = MALI_MEM_DMA_BUF;
+		mem_backend->type = MALI_MEM_DMA_BUF;
+		ret = mali_memory_bind_dma_buf(mali_allocation, mem_backend,
+					       args->mem_union.bind_dma_buf.mem_fd, args->mem_union.bind_dma_buf.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind dma buf failed\n"));
+			goto Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY:
+		/* not allowed */
+		MALI_DEBUG_ASSERT(0);
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY:
+		mali_allocation->type = MALI_MEM_EXTERNAL;
+		mem_backend->type = MALI_MEM_EXTERNAL;
+		ret = mali_memory_bind_ext_mem(mali_allocation, mem_backend, args->mem_union.bind_ext_memory.phys_addr,
+					       args->mem_union.bind_ext_memory.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind external buf failed\n"));
+			goto Failed_bind_backend;
+		}
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXT_COW:
+		/* not allowed */
+		MALI_DEBUG_ASSERT(0);
+		break;
+
+	default:
+		MALI_DEBUG_ASSERT(0);
+		break;
+	}
+	return _MALI_OSK_ERR_OK;
+
+
+Failed_bind_backend:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+
+Failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	MALI_DEBUG_PRINT(1, (" _mali_ukk_mem_bind, return ERROR! \n"));
+	return ret;
+}
+
+
+/*
+* Function _mali_ukk_mem_unbind -- unbind a external memory to a new GPU address
+* This function unbind the backend memory and free the allocation
+* no ref_count for this type of memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args)
+{
+	/**/
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	u32 mali_addr = args->vaddr;
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_unbind, vaddr=0x%x! \n", args->vaddr));
+
+	/* find the allocation by vaddr */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		mali_allocation = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	} else {
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		/* Not support yet */
+		MALI_DEBUG_ASSERT(0);
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	if (NULL != mali_allocation)
+		/* check ref_count */
+		mali_allocation_unref(&mali_allocation);
+	return _MALI_OSK_ERR_OK;
+}
+
+
+
+/*
+* Function _mali_ukk_mem_cow --  COW for an allocation
+* This function allocate new pages for  a range (range, range+size) of allocation
+*  And Map it(keep use the not in range pages from target allocation ) to an GPU vaddr
+*/
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+
+	/* create new alloction if needed */
+
+	/* Get the target allocation and it's backend*/
+
+	/* allocate new pages from os mem for modified range */
+
+
+	/* fill the COW backend, all pages for this allocation
+	*  including the new page for modified range and pages not modified in old allocation.
+	* Do Add ref to pages from target allocation
+	*/
+
+
+	/* map it to GPU side */
+	return ret;
+}
+
+/**
+*  attach a backend to an exist mali allocation
+*/
+
+
+/**
+*  deattach a backend from an exist mali allocation
+*/
+
diff --git a/utgard/r5p1/linux/mali_memory_manager.h b/utgard/r5p1/linux/mali_memory_manager.h
new file mode 100644
index 0000000..e44c032
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_manager.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_MANAGER_H__
+#define __MALI_MEMORY_MANAGER_H__
+
+#include "mali_osk.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_allocation_manager {
+	rwlock_t vm_lock;
+	struct rb_root allocation_mgr_rb;
+	struct list_head head;
+	struct mutex list_mutex;
+	u32 mali_allocation_nr;
+};
+
+extern struct idr mali_backend_idr;
+extern struct mutex mali_idr_mutex;
+
+int mali_memory_manager_init(struct mali_allocation_manager *mgr);
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr);
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc);
+
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args);
+
+
+#endif
+
diff --git a/utgard/r5p1/linux/mali_memory_os_alloc.c b/utgard/r5p1/linux/mali_memory_os_alloc.c
new file mode 100755
index 0000000..66523fd
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_os_alloc.c
@@ -0,0 +1,665 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+
+#include "mali_osk.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_kernel_linux.h"
+
+/* Minimum size of allocator page pool */
+#define MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB * 256)
+#define MALI_OS_MEMORY_POOL_TRIM_JIFFIES (10 * CONFIG_HZ) /* Default to 10s */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+static unsigned long mali_dma_attrs;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+/* Write combine dma_attrs */
+static DEFINE_DMA_ATTRS(dma_attrs_wc);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask);
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask);
+#endif
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc);
+#endif
+#endif
+static void mali_mem_os_trim_pool(struct work_struct *work);
+static void mali_mem_os_free_page(struct mali_page_node *m_page);
+
+
+static struct mali_mem_os_allocator {
+	spinlock_t pool_lock;
+	struct list_head pool_pages;
+	size_t pool_count;
+
+	atomic_t allocated_pages;
+	size_t allocation_limit;
+
+	struct shrinker shrinker;
+	struct delayed_work timed_shrinker;
+	struct workqueue_struct *wq;
+} mali_mem_os_allocator = {
+	.pool_lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+	.pool_pages = LIST_HEAD_INIT(mali_mem_os_allocator.pool_pages),
+	.pool_count = 0,
+
+	.allocated_pages = ATOMIC_INIT(0),
+	.allocation_limit = 0,
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	.shrinker.shrink = mali_mem_os_shrink,
+#else
+	.shrinker.count_objects = mali_mem_os_shrink_count,
+	.shrinker.scan_objects = mali_mem_os_shrink,
+#endif
+	.shrinker.seeks = DEFAULT_SEEKS,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool, TIMER_DEFERRABLE),
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
+	.timed_shrinker = __DEFERRED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#else
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#endif
+};
+
+void mali_mem_os_free(mali_mem_os_mem *os_mem)
+{
+	LIST_HEAD(pages);
+
+	atomic_sub(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+
+	/* Put pages on pool. */
+	list_cut_position(&pages, &os_mem->pages, os_mem->pages.prev);
+
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+
+	list_splice(&pages, &mali_mem_os_allocator.pool_pages);
+	mali_mem_os_allocator.pool_count += os_mem->count;
+
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(5, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+}
+
+/**
+* free memory without put it into page pool
+
+void mali_mem_os_free_not_pooled(mali_mem_os_mem *os_mem)
+{
+
+}
+*/
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size)
+{
+	struct page *new_page;
+	LIST_HEAD(pages_list);
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	size_t remaining = page_count;
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&os_mem->pages);
+	os_mem->count = page_count;
+
+	/* Grab pages from pool. */
+	{
+		size_t pool_pages;
+		spin_lock(&mali_mem_os_allocator.pool_lock);
+		pool_pages = min(remaining, mali_mem_os_allocator.pool_count);
+		for (i = pool_pages; i > 0; i--) {
+			BUG_ON(list_empty(&mali_mem_os_allocator.pool_pages));
+			list_move(mali_mem_os_allocator.pool_pages.next, &pages_list);
+		}
+		mali_mem_os_allocator.pool_count -= pool_pages;
+		remaining -= pool_pages;
+		spin_unlock(&mali_mem_os_allocator.pool_lock);
+	}
+
+	/* Process pages from pool. */
+	i = 0;
+	list_for_each_entry_safe(m_page, m_tmp, &pages_list, list) {
+		BUG_ON(NULL == m_page);
+
+		list_move_tail(&m_page->list, &os_mem->pages);
+	}
+
+	/* Allocate new pages, if needed. */
+	for (i = 0; i < remaining; i++) {
+		dma_addr_t dma_addr;
+		struct mali_page_node *new_page_node = NULL;
+		gfp_t flags = __GFP_ZERO | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD;
+		int err;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+		flags |= GFP_HIGHUSER;
+#else
+		/* After 3.15.0 kernel use ZONE_DMA replace ZONE_DMA32 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
+		flags |= GFP_DMA32;
+#else
+		flags |= GFP_DMA;
+#endif
+#endif
+
+		new_page = alloc_page(flags);
+
+		if (unlikely(NULL == new_page)) {
+			/* Calculate the number of pages actually allocated, and free them. */
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(os_mem);
+			return -ENOMEM;
+		}
+
+		/* Ensure page is flushed from CPU caches. */
+		dma_addr = dma_map_page(&mali_platform_device->dev, new_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+		err = dma_mapping_error(&mali_platform_device->dev, dma_addr);
+		if (unlikely(err)) {
+			MALI_DEBUG_PRINT_ERROR(("OS Mem: Failed to DMA map page %p: %u",
+						new_page, err));
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(os_mem);
+			return -EFAULT;
+		}
+
+		/* Store page phys addr */
+		SetPagePrivate(new_page);
+		set_page_private(new_page, dma_addr);
+
+		new_page_node = kmalloc(sizeof(mali_page_node), GFP_KERNEL);
+		if (unlikely(NULL == new_page_node)) {
+			MALI_PRINT_ERROR(("OS Mem: Can't allocate mali_page node! \n"));
+			dma_unmap_page(&mali_platform_device->dev, page_private(new_page),
+				       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+			ClearPagePrivate(new_page);
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(os_mem);
+			return -EFAULT;
+		}
+		new_page_node->page = new_page;
+		INIT_LIST_HEAD(&new_page_node->list);
+
+		list_add_tail(&new_page_node->list, &os_mem->pages);
+	}
+
+	atomic_add(page_count, &mali_mem_os_allocator.allocated_pages);
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Stopping pool trim timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+	return 0;
+}
+
+
+void mali_mem_os_mali_map(mali_mem_backend *mem_bkend, u32 vaddr, u32 props)
+{
+	struct mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	struct mali_page_node *m_page;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend->mali_allocation);
+
+	session = mem_bkend->mali_allocation->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	pagedir = session->page_directory;
+
+	list_for_each_entry(m_page, &mem_bkend->os_mem.pages, list) {
+		dma_addr_t phys = page_private(m_page->page);
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+		/* Verify that the "physical" address is 32-bit and
+		 * usable for Mali, when on a system with bus addresses
+		 * wider than 32-bit. */
+		MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+
+		mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+}
+
+
+static void mali_mem_os_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	session->mali_mem_array[alloc->type] -= alloc->psize;
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_os_cpu_map(mali_mem_os_mem *os_mem, struct vm_area_struct *vma)
+{
+	struct mali_page_node *m_page;
+	struct page *page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+
+	list_for_each_entry(m_page, &os_mem->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		page = m_page->page;
+		ret = vm_insert_pfn(vma, addr, page_to_pfn(page));
+
+		if (unlikely(0 != ret)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+void mali_mem_os_release(mali_mem_backend *mem_bkend)
+{
+
+	mali_mem_allocation *alloc;
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type);
+
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_os_mali_unmap(alloc);
+
+	/* Free pages */
+	mali_mem_os_free(&mem_bkend->os_mem);
+}
+
+
+#define MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE 128
+static struct {
+	struct {
+		mali_dma_addr phys;
+		mali_io_address mapping;
+	} page[MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE];
+	size_t count;
+	spinlock_t lock;
+} mali_mem_page_table_page_pool = {
+	.count = 0,
+	.lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+};
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_NOMEM;
+	dma_addr_t tmp_phys;
+
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (0 < mali_mem_page_table_page_pool.count) {
+		u32 i = --mali_mem_page_table_page_pool.count;
+		*phys = mali_mem_page_table_page_pool.page[i].phys;
+		*mapping = mali_mem_page_table_page_pool.page[i].mapping;
+
+		ret = _MALI_OSK_ERR_OK;
+	}
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	if (_MALI_OSK_ERR_OK != ret) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, &dma_attrs_wc);
+#else
+		*mapping = dma_alloc_writecombine(&mali_platform_device->dev,
+						  _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys, GFP_KERNEL);
+#endif
+		if (NULL != *mapping) {
+			ret = _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			/* Verify that the "physical" address is 32-bit and
+			 * usable for Mali, when on a system with bus addresses
+			 * wider than 32-bit. */
+			MALI_DEBUG_ASSERT(0 == (tmp_phys >> 32));
+#endif
+
+			*phys = (mali_dma_addr)tmp_phys;
+		}
+	}
+
+	return ret;
+}
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt)
+{
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE > mali_mem_page_table_page_pool.count) {
+		u32 i = mali_mem_page_table_page_pool.count;
+		mali_mem_page_table_page_pool.page[i].phys = phys;
+		mali_mem_page_table_page_pool.page[i].mapping = virt;
+
+		++mali_mem_page_table_page_pool.count;
+
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+	} else {
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			       _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			       mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			       _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			       &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE, virt, phys);
+#endif
+	}
+}
+
+static void mali_mem_os_free_page(struct mali_page_node *m_page)
+{
+	struct page *page = m_page->page;
+	BUG_ON(page_count(page) != 1);
+
+	dma_unmap_page(&mali_platform_device->dev, page_private(page),
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	ClearPagePrivate(page);
+
+	__free_page(page);
+	kfree(m_page);
+}
+
+/* The maximum number of page table pool pages to free in one go. */
+#define MALI_MEM_OS_CHUNK_TO_FREE 64UL
+
+/* Free a certain number of pages from the page table page pool.
+ * The pool lock must be held when calling the function, and the lock will be
+ * released before returning.
+ */
+static void mali_mem_os_page_table_pool_free(size_t nr_to_free)
+{
+	mali_dma_addr phys_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	void *virt_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	u32 i;
+
+	MALI_DEBUG_ASSERT(nr_to_free <= MALI_MEM_OS_CHUNK_TO_FREE);
+
+	/* Remove nr_to_free pages from the pool and store them locally on stack. */
+	for (i = 0; i < nr_to_free; i++) {
+		u32 pool_index = mali_mem_page_table_page_pool.count - i - 1;
+
+		phys_arr[i] = mali_mem_page_table_page_pool.page[pool_index].phys;
+		virt_arr[i] = mali_mem_page_table_page_pool.page[pool_index].mapping;
+	}
+
+	mali_mem_page_table_page_pool.count -= nr_to_free;
+
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	/* After releasing the spinlock: free the pages we removed from the pool. */
+	for (i = 0; i < nr_to_free; i++) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE,
+				      virt_arr[i], (dma_addr_t)phys_arr[i]);
+#endif
+	}
+}
+
+static void mali_mem_os_trim_page_table_page_pool(void)
+{
+	size_t nr_to_free = 0;
+	size_t nr_to_keep;
+
+	/* Keep 2 page table pages for each 1024 pages in the page cache. */
+	nr_to_keep = mali_mem_os_allocator.pool_count / 512;
+	/* And a minimum of eight pages, to accomodate new sessions. */
+	nr_to_keep += 8;
+
+	if (0 == spin_trylock(&mali_mem_page_table_page_pool.lock)) return;
+
+	if (nr_to_keep < mali_mem_page_table_page_pool.count) {
+		nr_to_free = mali_mem_page_table_page_pool.count - nr_to_keep;
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, nr_to_free);
+	}
+
+	/* Pool lock will be released by the callee. */
+	mali_mem_os_page_table_pool_free(nr_to_free);
+}
+
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	return mali_mem_os_allocator.pool_count;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask)
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask)
+#endif /* Linux < 2.6.35 */
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#endif /* Linux < 3.12.0 */
+#endif /* Linux < 3.0.0 */
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unsigned long flags;
+	struct list_head *le, pages;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+	int nr = nr_to_scan;
+#else
+	int nr = sc->nr_to_scan;
+#endif
+
+	if (0 == nr) {
+		return mali_mem_os_shrink_count(shrinker, sc);
+	}
+
+	if (0 == spin_trylock_irqsave(&mali_mem_os_allocator.pool_lock, flags)) {
+		/* Not able to lock. */
+		return -1;
+	}
+
+	if (0 == mali_mem_os_allocator.pool_count) {
+		/* No pages availble */
+		spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+		return 0;
+	}
+
+	/* Release from general page pool */
+	nr = min((size_t)nr, mali_mem_os_allocator.pool_count);
+	mali_mem_os_allocator.pool_count -= nr;
+	list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+		--nr;
+		if (0 == nr) break;
+	}
+	list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page(m_page);
+	}
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) {
+		/* Pools are empty, stop timer */
+		MALI_DEBUG_PRINT(5, ("Stopping timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	return mali_mem_os_shrink_count(shrinker, sc);
+#else
+	return nr;
+#endif
+}
+
+static void mali_mem_os_trim_pool(struct work_struct *data)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	struct list_head *le;
+	LIST_HEAD(pages);
+	size_t nr_to_free;
+
+	MALI_IGNORE(data);
+
+	MALI_DEBUG_PRINT(3, ("OS Mem: Trimming pool %u\n", mali_mem_os_allocator.pool_count));
+
+	/* Release from general page pool */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		size_t count = mali_mem_os_allocator.pool_count - MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES;
+		const size_t min_to_free = min(64, MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES);
+
+		/* Free half the pages on the pool above the static limit. Or 64 pages, 256KB. */
+		nr_to_free = max(count / 2, min_to_free);
+
+		mali_mem_os_allocator.pool_count -= nr_to_free;
+		list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+			--nr_to_free;
+			if (0 == nr_to_free) break;
+		}
+		list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	}
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page(m_page);
+	}
+
+	/* Release some pages from page table page pool */
+	mali_mem_os_trim_page_table_page_pool();
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+}
+
+_mali_osk_errcode_t mali_mem_os_init(void)
+{
+	mali_mem_os_allocator.wq = alloc_workqueue("mali-mem", WQ_UNBOUND, 1);
+	if (NULL == mali_mem_os_allocator.wq) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	mali_dma_attrs = DMA_ATTR_WRITE_COMBINE;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &dma_attrs_wc);
+#endif
+
+	register_shrinker(&mali_mem_os_allocator.shrinker);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_os_term(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unregister_shrinker(&mali_mem_os_allocator.shrinker);
+	cancel_delayed_work_sync(&mali_mem_os_allocator.timed_shrinker);
+
+	if (NULL != mali_mem_os_allocator.wq) {
+		destroy_workqueue(mali_mem_os_allocator.wq);
+		mali_mem_os_allocator.wq = NULL;
+	}
+
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_for_each_entry_safe(m_page, m_tmp, &mali_mem_os_allocator.pool_pages, list) {
+		mali_mem_os_free_page(m_page);
+
+		--mali_mem_os_allocator.pool_count;
+	}
+	BUG_ON(mali_mem_os_allocator.pool_count);
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	/* Release from page table page pool */
+	do {
+		u32 nr_to_free;
+
+		spin_lock(&mali_mem_page_table_page_pool.lock);
+
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, mali_mem_page_table_page_pool.count);
+
+		/* Pool lock will be released by the callee. */
+		mali_mem_os_page_table_pool_free(nr_to_free);
+	} while (0 != mali_mem_page_table_page_pool.count);
+}
+
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size)
+{
+	mali_mem_os_allocator.allocation_limit = size;
+
+	MALI_SUCCESS;
+}
+
+u32 mali_mem_os_stat(void)
+{
+	return atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/utgard/r5p1/linux/mali_memory_os_alloc.h b/utgard/r5p1/linux/mali_memory_os_alloc.h
new file mode 100755
index 0000000..572b067
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_os_alloc.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_OS_ALLOC_H__
+#define __MALI_MEMORY_OS_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_memory_types.h"
+
+/** @brief Release Mali OS memory
+ *
+ * The session memory_lock must be held when calling this function.
+ *
+ * @param mem_bkend Pointer to the mali_mem_backend to release
+ */
+void mali_mem_os_release(mali_mem_backend *mem_bkend);
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping);
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt);
+
+_mali_osk_errcode_t mali_mem_os_init(void);
+void mali_mem_os_term(void);
+u32 mali_mem_os_stat(void);
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size);
+void mali_mem_os_free(mali_mem_os_mem *os_mem);
+void mali_mem_os_mali_map(mali_mem_backend *mem_bkend, u32 vaddr, u32 props);
+int mali_mem_os_cpu_map(mali_mem_os_mem *os_mem, struct vm_area_struct *vma);
+
+
+#endif /* __MALI_MEMORY_OS_ALLOC_H__ */
diff --git a/utgard/r5p1/linux/mali_memory_types.h b/utgard/r5p1/linux/mali_memory_types.h
new file mode 100755
index 0000000..5376f77
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_types.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_TYPES_H__
+#define __MALI_MEMORY_TYPES_H__
+
+#if defined(CONFIG_MALI400_UMP)
+#include "ump_kernel_interface.h"
+#endif
+
+typedef u32 mali_address_t;
+
+typedef enum mali_mem_type {
+	MALI_MEM_OS,
+	MALI_MEM_EXTERNAL,
+	MALI_MEM_DMA_BUF,
+	MALI_MEM_UMP,
+	MALI_MEM_BLOCK,
+	MALI_MEM_TYPE_MAX,
+} mali_mem_type;
+
+typedef struct mali_block_item {
+	/* for block type, the block_phy is alway page size align
+	* so use low 12bit used for ref_cout.
+	*/
+	unsigned long phy_addr;
+} mali_block_item;
+
+typedef enum mali_page_node_type {
+	MALI_PAGE_NODE_OS,
+	MALI_PAGE_NODE_BLOCK,
+} mali_page_node_type;
+
+typedef struct mali_block_node {
+	struct list_head list;
+	mali_block_item *blk_it; /*pointer to block item*/
+	u32 type;
+} mali_block_node;
+
+typedef struct mali_page_node {
+	struct list_head list;
+	struct page *page;
+} mali_page_node;
+
+typedef struct mali_mem_os_mem {
+	struct list_head pages;
+	u32 count;
+} mali_mem_os_mem;
+
+typedef struct mali_mem_dma_buf {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+	struct mali_dma_buf_attachment *attachment;
+#endif
+} mali_mem_dma_buf;
+
+typedef struct mali_mem_external {
+	dma_addr_t phys;
+	u32 size;
+} mali_mem_external;
+
+typedef struct mali_mem_ump {
+#if defined(CONFIG_MALI400_UMP)
+	ump_dd_handle handle;
+#endif
+} mali_mem_ump;
+
+typedef struct block_allocator_allocation {
+	/* The list will be released in reverse order */
+	struct block_info *last_allocated;
+	u32 mapping_length;
+	struct block_allocator *info;
+} block_allocator_allocation;
+
+typedef struct mali_mem_block_mem {
+	struct list_head pfns;
+	u32 count;
+} mali_mem_block_mem;
+
+typedef struct mali_mem_virt_mali_mapping {
+	mali_address_t addr; /* Virtual Mali address */
+	u32 properties;      /* MMU Permissions + cache, must match MMU HW */
+} mali_mem_virt_mali_mapping;
+
+typedef struct mali_mem_virt_cpu_mapping {
+	void __user *addr;
+	u32 ref;
+} mali_mem_virt_cpu_mapping;
+
+#define MALI_MEM_ALLOCATION_VALID_MAGIC 0xdeda110c
+#define MALI_MEM_ALLOCATION_FREED_MAGIC 0x10101010
+
+typedef struct mali_mm_node {
+	/* MALI GPU vaddr start, use u32 for mmu only support 32bit address*/
+	uint32_t start; /* GPU vaddr */
+	uint32_t size;  /* GPU allocation virtual size */
+	unsigned allocated : 1;
+} mali_mm_node;
+
+typedef struct mali_vma_node {
+	struct mali_mm_node vm_node;
+	struct rb_node vm_rb;
+} mali_vma_node;
+
+
+typedef struct mali_mem_allocation {
+	MALI_DEBUG_CODE(u32 magic);
+	mali_mem_type type;                /**< Type of memory */
+	int id;                            /**< ID in the descriptor map for this allocation */
+
+	u32 size;                          /**< Size of the allocation */
+	u32 flags;                         /**< Flags for this allocation */
+
+	struct mali_session_data *session; /**< Pointer to session that owns the allocation */
+
+	/* Union selected by type. */
+	union {
+		mali_mem_os_mem os_mem;       /**< MALI_MEM_OS */
+		mali_mem_external ext_mem;    /**< MALI_MEM_EXTERNAL */
+		mali_mem_dma_buf dma_buf;     /**< MALI_MEM_DMA_BUF */
+		mali_mem_ump ump_mem;         /**< MALI_MEM_UMP */
+		mali_mem_block_mem block_mem; /**< MALI_MEM_BLOCK */
+	};
+
+	mali_mem_virt_cpu_mapping cpu_mapping; /**< CPU mapping */
+	mali_mem_virt_mali_mapping mali_mapping; /**< Mali mapping */
+
+	/* add for new memory system */
+	struct mali_vma_node mali_vma_node;
+	u32 vsize; /* virtual size*/
+	u32 psize; /* physical backend memory size*/
+	struct list_head list;
+	s32 backend_handle; /* idr for mem_backend */
+	struct kref ref;
+} mali_mem_allocation;
+
+
+/* COW backend memory type */
+typedef struct mali_mem_cow {
+	struct list_head pages;  /**< all pages for this cow backend allocation,
+                                                                including new allocated pages for modified range*/
+	u32 count;               /**< number of pages */
+} mali_mem_cow;
+
+typedef struct mali_mem_backend {
+	mali_mem_type type;                /**< Type of backend memory */
+	u32 flags;                         /**< Flags for this allocation */
+	u32 size;
+	/* Union selected by type. */
+	union {
+		mali_mem_os_mem os_mem;       /**< MALI_MEM_OS */
+		mali_mem_external ext_mem;    /**< MALI_MEM_EXTERNAL */
+		mali_mem_dma_buf dma_buf;     /**< MALI_MEM_DMA_BUF */
+		mali_mem_ump ump_mem;         /**< MALI_MEM_UMP */
+		mali_mem_block_mem block_mem; /**< MALI_MEM_BLOCK */
+		mali_mem_cow cow_mem;
+	};
+	mali_mem_allocation *mali_allocation;
+} mali_mem_backend;
+
+#define MALI_MEM_FLAG_MALI_GUARD_PAGE (1 << 0)
+#define MALI_MEM_FLAG_DONT_CPU_MAP    (1 << 1)
+
+#endif /* __MALI_MEMORY_TYPES__ */
diff --git a/utgard/r5p1/linux/mali_memory_ump.c b/utgard/r5p1/linux/mali_memory_ump.c
new file mode 100755
index 0000000..207ac91
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_ump.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory.h"
+#include "ump_kernel_interface.h"
+
+static int mali_mem_ump_map(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 nr_blocks;
+	u32 i;
+	ump_dd_physical_block *ump_blocks;
+	struct mali_page_directory *pagedir;
+	u32 offset = 0;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem);
+
+	nr_blocks = ump_dd_phys_block_count_get(ump_mem);
+	if (nr_blocks == 0) {
+		MALI_DEBUG_PRINT(1, ("No block count\n"));
+		return -EINVAL;
+	}
+
+	ump_blocks = _mali_osk_malloc(sizeof(*ump_blocks) * nr_blocks);
+	if (NULL == ump_blocks) {
+		return -ENOMEM;
+	}
+
+	if (UMP_DD_INVALID == ump_dd_phys_blocks_get(ump_mem, ump_blocks, nr_blocks)) {
+		_mali_osk_free(ump_blocks);
+		return -EFAULT;
+	}
+
+	pagedir = session->page_directory;
+
+	mali_session_memory_lock(session);
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (_MALI_OSK_ERR_OK != err) {
+		MALI_DEBUG_PRINT(1, ("Mapping of UMP memory failed\n"));
+
+		_mali_osk_free(ump_blocks);
+		mali_session_memory_unlock(session);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_blocks; ++i) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		MALI_DEBUG_PRINT(7, ("Mapping in 0x%08x size %d\n", ump_blocks[i].addr , ump_blocks[i].size));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[i].addr,
+					ump_blocks[i].size, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += ump_blocks[i].size;
+	}
+
+	if (alloc->flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		/* Map in an extra virtual guard page at the end of the VMA */
+		MALI_DEBUG_PRINT(6, ("Mapping in extra guard page\n"));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[0].addr, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+	session->mali_mem_array[mem_backend->type] += mem_backend->size;
+	mali_session_memory_unlock(session);
+	_mali_osk_free(ump_blocks);
+	return 0;
+}
+
+static void mali_mem_ump_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+
+	session->mali_mem_array[alloc->type] -= alloc->psize;
+	mali_session_memory_unlock(session);
+}
+
+int mali_memory_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags)
+{
+	ump_dd_handle ump_mem;
+	int ret;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map ump memory with secure id %d into virtual memory 0x%08X, size 0x%08X\n",
+			  secure_id, alloc->mali_vma_node.vm_node.start, alloc->mali_vma_node.vm_node.size));
+
+	ump_mem = ump_dd_handle_create_from_secure_id(secure_id);
+	if (UMP_DD_HANDLE_INVALID == ump_mem) MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mem_backend->ump_mem.handle = ump_mem;
+
+	ret = mali_mem_ump_map(mem_backend);
+	if (0 != ret) {
+		ump_dd_reference_release(ump_mem);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	MALI_DEBUG_PRINT(3, ("Returning from UMP bind\n"));
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_ump_release(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	mali_mem_ump_unmap(alloc);
+	ump_dd_reference_release(ump_mem);
+}
+
+int mali_memory_unbind_ump_buf(mali_mem_backend *mem_backend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	mali_mem_ump_release(mem_backend);
+	return _MALI_OSK_ERR_OK;
+}
+
diff --git a/utgard/r5p1/linux/mali_memory_ump.h b/utgard/r5p1/linux/mali_memory_ump.h
new file mode 100644
index 0000000..0f4aa19
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_ump.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_UMP_BUF_H__
+#define __MALI_MEMORY_UMP_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+int mali_memory_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags);
+int mali_memory_unbind_ump_buf(mali_mem_backend *mem_backend);
+void mali_mem_ump_release(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/utgard/r5p1/linux/mali_memory_util.c b/utgard/r5p1/linux/mali_memory_util.c
new file mode 100644
index 0000000..5947caf
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_util.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_external.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_block_alloc.h"
+
+/**
+*function @_mali_free_allocation_mem - free a memory allocation
+* support backend type:
+* MALI_MEM_OS
+* maybe COW later?
+*/
+static void _mali_free_allocation_mem(struct kref *kref)
+{
+	mali_mem_backend *mem_bkend = NULL;
+
+	mali_mem_allocation *mali_alloc = container_of(kref, struct mali_mem_allocation, ref);
+
+	struct mali_session_data *session = mali_alloc->session;
+	MALI_DEBUG_PRINT(4, (" _mali_free_allocation_mem, psize =0x%x! \n", mali_alloc->psize));
+	if (0 == mali_alloc->psize)
+		goto out;
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+
+	MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+	switch (mem_bkend->type) {
+	case MALI_MEM_OS:
+		mali_mem_os_release(mem_bkend);
+		break;
+	case MALI_MEM_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_mem_ump_release(mem_bkend);
+#else
+		MALI_DEBUG_PRINT(2, ("DMA not supported\n"));
+#endif
+		break;
+	case MALI_MEM_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_mem_dma_buf_release(mem_bkend);
+#else
+		MALI_DEBUG_PRINT(2, ("DMA not supported\n"));
+#endif
+		break;
+	case MALI_MEM_EXTERNAL:
+		mali_mem_external_release(mem_bkend);
+		break;
+	case MALI_MEM_BLOCK:
+		mali_mem_block_release(mem_bkend);
+		break;
+	default:
+		MALI_DEBUG_PRINT(1, ("mem type %d is not in the mali_mem_type enum.\n", mem_bkend->type));
+		break;
+	}
+
+	/* remove backend memory idex */
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_bkend);
+out:
+	/* remove memory allocation  */
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_alloc->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_alloc);
+
+}
+
+
+
+/**
+*  ref_count for allocation
+*/
+void mali_allocation_unref(struct mali_mem_allocation **alloc)
+{
+	mali_mem_allocation *mali_alloc = *alloc;
+	*alloc = NULL;
+	kref_put(&mali_alloc->ref, _mali_free_allocation_mem);
+}
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc)
+{
+	kref_get(&alloc->ref);
+}
+
+void mali_free_session_allocations(struct mali_session_data *session)
+{
+
+	struct mali_mem_allocation *entry, *next;
+
+	MALI_DEBUG_PRINT(4, (" mali_free_session_allocations! \n"));
+
+	list_for_each_entry_safe(entry, next, &session->allocation_mgr.head, list) {
+		mali_allocation_unref(&entry);
+	}
+}
+
diff --git a/utgard/r5p1/linux/mali_memory_util.h b/utgard/r5p1/linux/mali_memory_util.h
new file mode 100644
index 0000000..419e05f
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_util.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+void mali_allocation_unref(struct mali_mem_allocation **alloc);
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc);
+
+void mali_free_session_allocations(struct mali_session_data *session);
+
diff --git a/utgard/r5p1/linux/mali_memory_virtual.c b/utgard/r5p1/linux/mali_memory_virtual.c
new file mode 100644
index 0000000..535feff
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_virtual.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+
+
+/**
+*internal helper to link node into the rb-tree
+*/
+static inline void _mali_vma_offset_add_rb(struct mali_allocation_manager *mgr,
+		struct mali_vma_node *node)
+{
+	struct rb_node **iter = &mgr->allocation_mgr_rb.rb_node;
+	struct rb_node *parent = NULL;
+	struct mali_vma_node *iter_node;
+
+	while (likely(*iter)) {
+		parent = *iter;
+		iter_node = rb_entry(*iter, struct mali_vma_node, vm_rb);
+
+		if (node->vm_node.start < iter_node->vm_node.start)
+			iter = &(*iter)->rb_left;
+		else if (node->vm_node.start > iter_node->vm_node.start)
+			iter = &(*iter)->rb_right;
+		else
+			/* Not support yet */
+			MALI_DEBUG_ASSERT(0);
+	}
+
+	rb_link_node(&node->vm_rb, parent, iter);
+	rb_insert_color(&node->vm_rb, &mgr->allocation_mgr_rb);
+}
+
+/**
+ * mali_vma_offset_add() - Add offset node to RB Tree
+ */
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node)
+{
+	int ret = 0;
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		goto out;
+	}
+
+	_mali_vma_offset_add_rb(mgr, node);
+	/* set to allocated */
+	node->vm_node.allocated = 1;
+
+out:
+	write_unlock(&mgr->vm_lock);
+	return ret;
+}
+
+/**
+ * mali_vma_offset_remove() - Remove offset node from RB tree
+ */
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node)
+{
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		rb_erase(&node->vm_rb, &mgr->allocation_mgr_rb);
+		memset(&node->vm_node, 0, sizeof(node->vm_node));
+	}
+	write_unlock(&mgr->vm_lock);
+}
+
+/**
+* mali_vma_offset_search - Search the node in RB tree
+*/
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start, unsigned long pages)
+{
+	struct mali_vma_node *node, *best;
+	struct rb_node *iter;
+	unsigned long offset;
+	read_lock(&mgr->vm_lock);
+
+	iter = mgr->allocation_mgr_rb.rb_node;
+	best = NULL;
+
+	while (likely(iter)) {
+		node = rb_entry(iter, struct mali_vma_node, vm_rb);
+		offset = node->vm_node.start;
+		if (start >= offset) {
+			iter = iter->rb_right;
+			best = node;
+			if (start == offset)
+				break;
+		} else {
+			iter = iter->rb_left;
+		}
+	}
+
+	if (best) {
+		offset = best->vm_node.start + best->vm_node.size;
+		if (offset <= start + pages)
+			best = NULL;
+	}
+	read_unlock(&mgr->vm_lock);
+
+	return best;
+}
+
diff --git a/utgard/r5p1/linux/mali_memory_virtual.h b/utgard/r5p1/linux/mali_memory_virtual.h
new file mode 100644
index 0000000..1f64c5e
--- /dev/null
+++ b/utgard/r5p1/linux/mali_memory_virtual.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_GPU_VMEM_H__
+#define __MALI_GPU_VMEM_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+
+
+
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node);
+
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node);
+
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start,    unsigned long pages);
+
+#endif
diff --git a/utgard/r5p1/linux/mali_osk_atomics.c b/utgard/r5p1/linux/mali_osk_atomics.c
new file mode 100755
index 0000000..ba630e2
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_atomics.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_atomics.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <asm/atomic.h>
+#include "mali_kernel_common.h"
+
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom)
+{
+	atomic_dec((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_dec_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom)
+{
+	atomic_inc((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_inc_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val)
+{
+	MALI_DEBUG_ASSERT_POINTER(atom);
+	atomic_set((atomic_t *)&atom->u.val, val);
+}
+
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom)
+{
+	return atomic_read((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom)
+{
+	MALI_IGNORE(atom);
+}
+
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val)
+{
+	return atomic_xchg((atomic_t *)&atom->u.val, val);
+}
diff --git a/utgard/r5p1/linux/mali_osk_irq.c b/utgard/r5p1/linux/mali_osk_irq.c
new file mode 100755
index 0000000..6876b6d
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_irq.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_irq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+#include <linux/slab.h>	/* For memory allocation */
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+typedef struct _mali_osk_irq_t_struct {
+	u32 irqnum;
+	void *data;
+	_mali_osk_irq_uhandler_t uhandler;
+} mali_osk_irq_object_t;
+
+typedef irqreturn_t (*irq_handler_func_t)(int, void *, struct pt_regs *);
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id);   /* , struct pt_regs *regs*/
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+u32 get_irqnum(struct _mali_osk_irq_t_struct* irq)
+{
+	if (irq)
+		return irq->irqnum;
+	else
+		return 0;
+}
+#endif
+
+#if defined(DEBUG)
+
+struct test_interrupt_data {
+	_mali_osk_irq_ack_t ack_func;
+	void *probe_data;
+	mali_bool interrupt_received;
+	wait_queue_head_t wq;
+};
+
+static irqreturn_t test_interrupt_upper_half(int port_name, void *dev_id)
+{
+	irqreturn_t ret = IRQ_NONE;
+	struct test_interrupt_data *data = (struct test_interrupt_data *)dev_id;
+
+	if (_MALI_OSK_ERR_OK == data->ack_func(data->probe_data)) {
+		data->interrupt_received = MALI_TRUE;
+		wake_up(&data->wq);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static _mali_osk_errcode_t test_interrupt(u32 irqnum,
+		_mali_osk_irq_trigger_t trigger_func,
+		_mali_osk_irq_ack_t ack_func,
+		void *probe_data,
+		const char *description)
+{
+	unsigned long irq_flags = 0;
+	struct test_interrupt_data data = {
+		.ack_func = ack_func,
+		.probe_data = probe_data,
+		.interrupt_received = MALI_FALSE,
+	};
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	if (0 != request_irq(irqnum, test_interrupt_upper_half, irq_flags, description, &data)) {
+		MALI_DEBUG_PRINT(2, ("Unable to install test IRQ handler for core '%s'\n", description));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	init_waitqueue_head(&data.wq);
+
+	trigger_func(probe_data);
+	wait_event_timeout(data.wq, data.interrupt_received, 100);
+
+	free_irq(irqnum, &data);
+
+	if (data.interrupt_received) {
+		MALI_DEBUG_PRINT(3, ("%s: Interrupt test OK\n", description));
+		return _MALI_OSK_ERR_OK;
+	} else {
+		MALI_PRINT_ERROR(("%s: Failed interrupt test on %u\n", description, irqnum));
+		return _MALI_OSK_ERR_FAULT;
+	}
+}
+
+#endif /* defined(DEBUG) */
+
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description)
+{
+	mali_osk_irq_object_t *irq_object;
+	unsigned long irq_flags = 0;
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	irq_object = kmalloc(sizeof(mali_osk_irq_object_t), GFP_KERNEL);
+	if (NULL == irq_object) {
+		return NULL;
+	}
+
+	if (-1 == irqnum) {
+		/* Probe for IRQ */
+		if ((NULL != trigger_func) && (NULL != ack_func)) {
+			unsigned long probe_count = 3;
+			_mali_osk_errcode_t err;
+			int irq;
+
+			MALI_DEBUG_PRINT(2, ("Probing for irq\n"));
+
+			do {
+				unsigned long mask;
+
+				mask = probe_irq_on();
+				trigger_func(probe_data);
+
+				_mali_osk_time_ubusydelay(5);
+
+				irq = probe_irq_off(mask);
+				err = ack_func(probe_data);
+			} while (irq < 0 && (err == _MALI_OSK_ERR_OK) && probe_count--);
+
+			if (irq < 0 || (_MALI_OSK_ERR_OK != err)) irqnum = -1;
+			else irqnum = irq;
+		} else irqnum = -1; /* no probe functions, fault */
+
+		if (-1 != irqnum) {
+			/* found an irq */
+			MALI_DEBUG_PRINT(2, ("Found irq %d\n", irqnum));
+		} else {
+			MALI_DEBUG_PRINT(2, ("Probe for irq failed\n"));
+		}
+	}
+
+	irq_object->irqnum = irqnum;
+	irq_object->uhandler = uhandler;
+	irq_object->data = int_data;
+
+	if (-1 == irqnum) {
+		MALI_DEBUG_PRINT(2, ("No IRQ for core '%s' found during probe\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	/* Verify that the configured interrupt settings are working */
+	if (_MALI_OSK_ERR_OK != test_interrupt(irqnum, trigger_func, ack_func, probe_data, description)) {
+		MALI_DEBUG_PRINT(2, ("Test of IRQ handler for core '%s' failed\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+#endif
+
+	if (0 != request_irq(irqnum, irq_handler_upper_half, irq_flags, description, irq_object)) {
+		MALI_DEBUG_PRINT(2, ("Unable to install IRQ handler for core '%s'\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+	return irq_object;
+}
+
+void _mali_osk_irq_term(_mali_osk_irq_t *irq)
+{
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)irq;
+	free_irq(irq_object->irqnum, irq_object);
+	kfree(irq_object);
+}
+
+
+/** This function is called directly in interrupt context from the OS just after
+ * the CPU get the hw-irq from mali, or other devices on the same IRQ-channel.
+ * It is registered one of these function for each mali core. When an interrupt
+ * arrives this function will be called equal times as registered mali cores.
+ * That means that we only check one mali core in one function call, and the
+ * core we check for each turn is given by the \a dev_id variable.
+ * If we detect an pending interrupt on the given core, we mask the interrupt
+ * out by settging the core's IRQ_MASK register to zero.
+ * Then we schedule the mali_core_irq_handler_bottom_half to run as high priority
+ * work queue job.
+ */
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id)   /* , struct pt_regs *regs*/
+{
+	irqreturn_t ret = IRQ_NONE;
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)dev_id;
+
+	if (_MALI_OSK_ERR_OK == irq_object->uhandler(irq_object->data)) {
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
diff --git a/utgard/r5p1/linux/mali_osk_locks.c b/utgard/r5p1/linux/mali_osk_locks.c
new file mode 100755
index 0000000..50c0a9d
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_locks.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk_locks.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+
+#ifdef DEBUG
+#ifdef LOCK_ORDER_CHECKING
+static DEFINE_SPINLOCK(lock_tracking_lock);
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order);
+#endif /* LOCK_ORDER_CHECKING */
+
+void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+{
+	checker->orig_flags = flags;
+	checker->owner = 0;
+
+#ifdef LOCK_ORDER_CHECKING
+	checker->order = order;
+	checker->next = NULL;
+#endif
+}
+
+void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker)
+{
+	checker->owner = _mali_osk_get_tid();
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		if (!add_lock_to_log_and_check(checker, _mali_osk_get_tid())) {
+			printk(KERN_ERR "%d: ERROR lock %p taken while holding a lock of a higher order.\n",
+			       _mali_osk_get_tid(), checker);
+			dump_stack();
+		}
+	}
+#endif
+}
+
+void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker)
+{
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		remove_lock_from_log(checker, _mali_osk_get_tid());
+	}
+#endif
+	checker->owner = 0;
+}
+
+
+#ifdef LOCK_ORDER_CHECKING
+/* Lock order checking
+ * -------------------
+ *
+ * To assure that lock ordering scheme defined by _mali_osk_lock_order_t is strictly adhered to, the
+ * following function will, together with a linked list and some extra members in _mali_osk_lock_debug_s,
+ * make sure that a lock that is taken has a higher order than the current highest-order lock a
+ * thread holds.
+ *
+ * This is done in the following manner:
+ * - A linked list keeps track of locks held by a thread.
+ * - A `next' pointer is added to each lock. This is used to chain the locks together.
+ * - When taking a lock, the `add_lock_to_log_and_check' makes sure that taking
+ *   the given lock is legal. It will follow the linked list  to find the last
+ *   lock taken by this thread. If the last lock's order was lower than the
+ *   lock that is to be taken, it appends the new lock to the list and returns
+ *   true, if not, it return false. This return value is assert()'ed on in
+ *   _mali_osk_lock_wait().
+ */
+
+static struct _mali_osk_lock_debug_s *lock_lookup_list;
+
+static void dump_lock_tracking_list(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 1;
+
+	/* print list for debugging purposes */
+	l = lock_lookup_list;
+
+	while (NULL != l) {
+		printk(" [lock: %p, tid_owner: %d, order: %d] ->", l, l->owner, l->order);
+		l = l->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+	printk(" NULL\n");
+}
+
+static int tracking_list_length(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 0;
+	l = lock_lookup_list;
+
+	while (NULL != l) {
+		l = l->next;
+		n++;
+		MALI_DEBUG_ASSERT(n < 100);
+	}
+	return n;
+}
+
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	mali_bool ret = MALI_FALSE;
+	_mali_osk_lock_order_t highest_order_for_tid = _MALI_OSK_LOCK_ORDER_FIRST;
+	struct _mali_osk_lock_debug_s *highest_order_lock = (struct _mali_osk_lock_debug_s *)0xbeefbabe;
+	struct _mali_osk_lock_debug_s *l;
+	unsigned long local_lock_flag;
+	u32 len;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+
+	l  = lock_lookup_list;
+	if (NULL == l) { /* This is the first lock taken by this thread -- record and return true */
+		lock_lookup_list = lock;
+		spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+		return MALI_TRUE;
+	} else {
+		/* Traverse the locks taken and find the lock of the highest order.
+		 * Since several threads may hold locks, each lock's owner must be
+		 * checked so that locks not owned by this thread can be ignored. */
+		for (;;) {
+			MALI_DEBUG_ASSERT_POINTER(l);
+			if (tid == l->owner && l->order >= highest_order_for_tid) {
+				highest_order_for_tid = l->order;
+				highest_order_lock = l;
+			}
+
+			if (NULL != l->next) {
+				l = l->next;
+			} else {
+				break;
+			}
+		}
+
+		l->next = lock;
+		l->next = NULL;
+	}
+
+	/* We have now found the highest order lock currently held by this thread and can see if it is
+	 * legal to take the requested lock. */
+	ret = highest_order_for_tid < lock->order;
+
+	if (!ret) {
+		printk(KERN_ERR "Took lock of order %d (%s) while holding lock of order %d (%s)\n",
+		       lock->order, lock_order_to_string(lock->order),
+		       highest_order_for_tid, lock_order_to_string(highest_order_for_tid));
+		dump_lock_tracking_list();
+	}
+
+	if (len + 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+	return ret;
+}
+
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	struct _mali_osk_lock_debug_s *curr;
+	struct _mali_osk_lock_debug_s *prev = NULL;
+	unsigned long local_lock_flag;
+	u32 len;
+	u32 n = 0;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+	curr = lock_lookup_list;
+
+	if (NULL == curr) {
+		printk(KERN_ERR "Error: Lock tracking list was empty on call to remove_lock_from_log\n");
+		dump_lock_tracking_list();
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(curr);
+
+
+	while (lock != curr) {
+		prev = curr;
+
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		curr = curr->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+
+	if (NULL == prev) {
+		lock_lookup_list = curr->next;
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		MALI_DEBUG_ASSERT_POINTER(prev);
+		prev->next = curr->next;
+	}
+
+	lock->next = NULL;
+
+	if (len - 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+}
+
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order)
+{
+	switch (order) {
+	case _MALI_OSK_LOCK_ORDER_SESSIONS:
+		return "_MALI_OSK_LOCK_ORDER_SESSIONS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_SESSION:
+		return "_MALI_OSK_LOCK_ORDER_MEM_SESSION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_INFO:
+		return "_MALI_OSK_LOCK_ORDER_MEM_INFO";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_PT_CACHE:
+		return "_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP:
+		return "_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_EXECUTION:
+		return "_MALI_OSK_LOCK_ORDER_PM_EXECUTION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_EXECUTOR:
+		return "_MALI_OSK_LOCK_ORDER_EXECUTOR";
+		break;
+	case _MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM:
+		return "_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DMA_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_DMA_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PROFILING:
+		return "_MALI_OSK_LOCK_ORDER_PROFILING";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2:
+		return "_MALI_OSK_LOCK_ORDER_L2";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_L2_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_UTILIZATION:
+		return "_MALI_OSK_LOCK_ORDER_UTILIZATION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS:
+		return "_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_STATE:
+		return "_MALI_OSK_LOCK_ORDER_PM_STATE";
+		break;
+	default:
+		return "<UNKNOWN_LOCK_ORDER>";
+	}
+}
+#endif /* LOCK_ORDER_CHECKING */
+#endif /* DEBUG */
diff --git a/utgard/r5p1/linux/mali_osk_locks.h b/utgard/r5p1/linux/mali_osk_locks.h
new file mode 100755
index 0000000..aa32a81
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_locks.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.h
+ * Defines OS abstraction of lock and mutex
+ */
+#ifndef _MALI_OSK_LOCKS_H
+#define _MALI_OSK_LOCKS_H
+
+#include <linux/spinlock.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+
+#include <linux/slab.h>
+
+#include "mali_osk_types.h"
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+	/* When DEBUG is enabled, this struct will be used to track owner, mode and order checking */
+#ifdef DEBUG
+	struct _mali_osk_lock_debug_s {
+		u32 owner;
+		_mali_osk_lock_flags_t orig_flags;
+		_mali_osk_lock_order_t order;
+		struct _mali_osk_lock_debug_s *next;
+	};
+#endif
+
+	/* Anstraction of spinlock_t */
+	struct _mali_osk_spinlock_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		spinlock_t spinlock;
+	};
+
+	/* Abstration of spinlock_t and lock flag which is used to store register's state before locking */
+	struct _mali_osk_spinlock_irq_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+
+		spinlock_t spinlock;
+		unsigned long flags;
+	};
+
+	/* Abstraction of rw_semaphore in OS */
+	struct _mali_osk_mutex_rw_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+		_mali_osk_lock_mode_t mode;
+#endif
+
+		struct rw_semaphore rw_sema;
+	};
+
+	/* Mutex and mutex_interruptible functions share the same osk mutex struct */
+	struct _mali_osk_mutex_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		struct mutex mutex;
+	};
+
+#ifdef DEBUG
+	/** @brief _mali_osk_locks_debug_init/add/remove() functions are declared when DEBUG is enabled and
+	 * defined in file mali_osk_locks.c. When LOCK_ORDER_CHECKING is enabled, calling these functions when we
+	 * init/lock/unlock a lock/mutex, we could track lock order of a given tid. */
+	void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order);
+	void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker);
+	void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker);
+
+	/** @brief This function can return a given lock's owner when DEBUG     is enabled. */
+	static inline u32 _mali_osk_lock_get_owner(struct _mali_osk_lock_debug_s *lock)
+	{
+		return lock->owner;
+	}
+#else
+#define _mali_osk_locks_debug_init(x, y, z) do {} while (0)
+#define _mali_osk_locks_debug_add(x) do {} while (0)
+#define _mali_osk_locks_debug_remove(x) do {} while (0)
+#endif
+
+	/** @brief Before use _mali_osk_spin_lock, init function should be used to allocate memory and initial spinlock*/
+	static inline _mali_osk_spinlock_t *_mali_osk_spinlock_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_spinlock_t), GFP_KERNEL);
+		if (NULL == lock) {
+			return NULL;
+		}
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock a spinlock */
+	static inline void  _mali_osk_spinlock_lock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		spin_lock(&lock->spinlock);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock a spinlock */
+	static inline void _mali_osk_spinlock_unlock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock(&lock->spinlock);
+	}
+
+	/** @brief Free a memory block which the argument lock pointed to and its type must be
+	 * _mali_osk_spinlock_t *. */
+	static inline void _mali_osk_spinlock_term(_mali_osk_spinlock_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_spinlock_irq_lock/unlock/term() is called, init function should be
+	 * called to initial spinlock and flags in struct _mali_osk_spinlock_irq_t. */
+	static inline _mali_osk_spinlock_irq_t *_mali_osk_spinlock_irq_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_irq_t *lock = NULL;
+		lock = kmalloc(sizeof(_mali_osk_spinlock_irq_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+
+		lock->flags = 0;
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock spinlock and save the register's state */
+	static inline void _mali_osk_spinlock_irq_lock(_mali_osk_spinlock_irq_t *lock)
+	{
+		unsigned long tmp_flags;
+
+		BUG_ON(NULL == lock);
+		spin_lock_irqsave(&lock->spinlock, tmp_flags);
+		lock->flags = tmp_flags;
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock spinlock with saved register's state */
+	static inline void _mali_osk_spinlock_irq_unlock(_mali_osk_spinlock_irq_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock_irqrestore(&lock->spinlock, lock->flags);
+	}
+
+	/** @brief Destroy a given memory block which lock pointed to, and the lock type must be
+	 * _mali_osk_spinlock_irq_t *. */
+	static inline void _mali_osk_spinlock_irq_term(_mali_osk_spinlock_irq_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_mutex_rw_wait/signal/term() is called, we should call
+	 * _mali_osk_mutex_rw_init() to kmalloc a memory block and initial part of elements in it. */
+	static inline _mali_osk_mutex_rw_t *_mali_osk_mutex_rw_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_rw_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_rw_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+
+		init_rwsem(&lock->rw_sema);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief When call _mali_osk_mutex_rw_wait/signal() functions, the second argument mode
+	 * should be assigned with value _MALI_OSK_LOCKMODE_RO or _MALI_OSK_LOCKMODE_RW */
+	static inline void _mali_osk_mutex_rw_wait(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(NULL == lock);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			down_read(&lock->rw_sema);
+		} else {
+			down_write(&lock->rw_sema);
+		}
+
+#ifdef DEBUG
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			lock->mode = mode;
+		} else { /* mode == _MALI_OSK_LOCKMODE_RO */
+			lock->mode = mode;
+		}
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+#endif
+	}
+
+	/** @brief Up lock->rw_sema with up_read/write() accordinf argument mode's value. */
+	static inline void  _mali_osk_mutex_rw_signal(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(NULL == lock);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+#ifdef DEBUG
+		/* make sure the thread releasing the lock actually was the owner */
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+			/* This lock now has no owner */
+			lock->checker.owner = 0;
+		}
+#endif
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			up_read(&lock->rw_sema);
+		} else {
+			up_write(&lock->rw_sema);
+		}
+	}
+
+	/** @brief Free a given memory block which lock pointed to and its type must be
+	 * _mali_sok_mutex_rw_t *. */
+	static inline void _mali_osk_mutex_rw_term(_mali_osk_mutex_rw_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Mutex & mutex_interruptible share the same init and term function, because they have the
+	 * same osk mutex struct, and the difference between them is which locking function they use */
+	static inline _mali_osk_mutex_t *_mali_osk_mutex_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+		mutex_init(&lock->mutex);
+
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief  Lock the lock->mutex with mutex_lock_interruptible function */
+	static inline _mali_osk_errcode_t _mali_osk_mutex_wait_interruptible(_mali_osk_mutex_t *lock)
+	{
+		_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+		BUG_ON(NULL == lock);
+
+		if (mutex_lock_interruptible(&lock->mutex)) {
+			printk(KERN_WARNING "Mali: Can not lock mutex\n");
+			err = _MALI_OSK_ERR_RESTARTSYSCALL;
+		}
+
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+		return err;
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock_interruptible() function. */
+	static inline void _mali_osk_mutex_signal_interruptible(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Lock the lock->mutex just with mutex_lock() function which could not be interruptted. */
+	static inline void _mali_osk_mutex_wait(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		mutex_lock(&lock->mutex);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock() function. */
+	static inline void _mali_osk_mutex_signal(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Free a given memory block which lock point. */
+	static inline void _mali_osk_mutex_term(_mali_osk_mutex_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif
diff --git a/utgard/r5p1/linux/mali_osk_low_level_mem.c b/utgard/r5p1/linux/mali_osk_low_level_mem.c
new file mode 100755
index 0000000..c14e872
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_low_level_mem.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_low_level_mem.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <asm/io.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+void _mali_osk_mem_barrier(void)
+{
+	mb();
+}
+
+void _mali_osk_write_mem_barrier(void)
+{
+	wmb();
+}
+
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description)
+{
+	return (mali_io_address)ioremap_nocache(phys, size);
+}
+
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address virt)
+{
+	iounmap((void *)virt);
+}
+
+_mali_osk_errcode_t inline _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description)
+{
+#if MALI_LICENSE_IS_GPL
+	return _MALI_OSK_ERR_OK; /* GPL driver gets the mem region for the resources registered automatically */
+#else
+	return ((NULL == request_mem_region(phys, size, description)) ? _MALI_OSK_ERR_NOMEM : _MALI_OSK_ERR_OK);
+#endif
+}
+
+void inline _mali_osk_mem_unreqregion(uintptr_t phys, u32 size)
+{
+#if !MALI_LICENSE_IS_GPL
+	release_mem_region(phys, size);
+#endif
+}
+
+void inline _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	__raw_writel(cpu_to_le32(val), ((u8 *)addr) + offset);
+}
+
+u32 inline _mali_osk_mem_ioread32(volatile mali_io_address addr, u32 offset)
+{
+	return ioread32(((u8 *)addr) + offset);
+}
+
+void inline _mali_osk_mem_iowrite32(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	iowrite32(val, ((u8 *)addr) + offset);
+}
+
+void _mali_osk_cache_flushall(void)
+{
+	/** @note Cached memory is not currently supported in this implementation */
+}
+
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size)
+{
+	_mali_osk_write_mem_barrier();
+}
+
+u32 _mali_osk_mem_write_safe(void __user *dest, const void __user *src, u32 size)
+{
+#define MALI_MEM_SAFE_COPY_BLOCK_SIZE 4096
+	u32 retval = 0;
+	void *temp_buf;
+
+	temp_buf = kmalloc(MALI_MEM_SAFE_COPY_BLOCK_SIZE, GFP_KERNEL);
+	if (NULL != temp_buf) {
+		u32 bytes_left_to_copy = size;
+		u32 i;
+		for (i = 0; i < size; i += MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+			u32 size_to_copy;
+			u32 size_copied;
+			u32 bytes_left;
+
+			if (bytes_left_to_copy > MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+				size_to_copy = MALI_MEM_SAFE_COPY_BLOCK_SIZE;
+			} else {
+				size_to_copy = bytes_left_to_copy;
+			}
+
+			bytes_left = copy_from_user(temp_buf, ((char *)src) + i, size_to_copy);
+			size_copied = size_to_copy - bytes_left;
+
+			bytes_left = copy_to_user(((char *)dest) + i, temp_buf, size_copied);
+			size_copied -= bytes_left;
+
+			bytes_left_to_copy -= size_copied;
+			retval += size_copied;
+
+			if (size_copied != size_to_copy) {
+				break; /* Early out, we was not able to copy this entire block */
+			}
+		}
+
+		kfree(temp_buf);
+	}
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args)
+{
+	void __user *src;
+	void __user *dst;
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (NULL == session) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	src = (void __user *)(uintptr_t)args->src;
+	dst = (void __user *)(uintptr_t)args->dest;
+
+	/* Return number of bytes actually copied */
+	args->size = _mali_osk_mem_write_safe(dst, src, args->size);
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r5p1/linux/mali_osk_mali.c b/utgard/r5p1/linux/mali_osk_mali.c
new file mode 100755
index 0000000..f5b72e6
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_mali.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.c
+ * Implementation of the OS abstraction layer which is specific for the Mali kernel device driver
+ */
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/mali/mali_utgard.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h" /* MALI_xxx macros */
+#include "mali_osk.h"           /* kernel side OS functions */
+#include "mali_kernel_linux.h"
+
+
+#ifdef CONFIG_MALI_DT
+
+#define MALI_OSK_INVALID_RESOURCE_ADDRESS 0xFFFFFFFF
+
+/**
+ * Define the max number of resource we could have.
+ */
+#define MALI_OSK_MAX_RESOURCE_NUMBER 27
+
+/**
+ * Define the max number of resource with interrupts, and they are
+ * the first 20 elements in array mali_osk_resource_bank.
+ */
+#define MALI_OSK_RESOURCE_WITH_IRQ_NUMBER 20
+
+/**
+ * pp core start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_PP_LOCATION_START 2
+#define MALI_OSK_RESOURCE_PP_LOCATION_END 17
+
+/**
+ * L2 cache start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_L2_LOCATION_START 20
+#define MALI_OSK_RESOURCE_l2_LOCATION_END 22
+
+static _mali_osk_resource_t mali_osk_resource_bank[MALI_OSK_MAX_RESOURCE_NUMBER] = {
+	{.description = "Mali_GP", .base = MALI_OFFSET_GP, .irq_name = "IRQGP",},
+	{.description = "Mali_GP_MMU", .base = MALI_OFFSET_GP_MMU, .irq_name = "IRQGPMMU",},
+	{.description = "Mali_PP0", .base = MALI_OFFSET_PP0, .irq_name = "IRQPP0",},
+	{.description = "Mali_PP0_MMU", .base = MALI_OFFSET_PP0_MMU, .irq_name = "IRQPPMMU0",},
+	{.description = "Mali_PP1", .base = MALI_OFFSET_PP1, .irq_name = "IRQPP1",},
+	{.description = "Mali_PP1_MMU", .base = MALI_OFFSET_PP1_MMU, .irq_name = "IRQPPMMU1",},
+	{.description = "Mali_PP2", .base = MALI_OFFSET_PP2, .irq_name = "IRQPP2",},
+	{.description = "Mali_PP2_MMU", .base = MALI_OFFSET_PP2_MMU, .irq_name = "IRQPPMMU2",},
+	{.description = "Mali_PP3", .base = MALI_OFFSET_PP3, .irq_name = "IRQPP3",},
+	{.description = "Mali_PP3_MMU", .base = MALI_OFFSET_PP3_MMU, .irq_name = "IRQPPMMU3",},
+	{.description = "Mali_PP4", .base = MALI_OFFSET_PP4, .irq_name = "IRQPP4",},
+	{.description = "Mali_PP4_MMU", .base = MALI_OFFSET_PP4_MMU, .irq_name = "IRQPPMMU4",},
+	{.description = "Mali_PP5", .base = MALI_OFFSET_PP5, .irq_name = "IRQPP5",},
+	{.description = "Mali_PP5_MMU", .base = MALI_OFFSET_PP5_MMU, .irq_name = "IRQPPMMU5",},
+	{.description = "Mali_PP6", .base = MALI_OFFSET_PP6, .irq_name = "IRQPP6",},
+	{.description = "Mali_PP6_MMU", .base = MALI_OFFSET_PP6_MMU, .irq_name = "IRQPPMMU6",},
+	{.description = "Mali_PP7", .base = MALI_OFFSET_PP7, .irq_name = "IRQPP7",},
+	{.description = "Mali_PP7_MMU", .base = MALI_OFFSET_PP7_MMU, .irq_name = "IRQPPMMU",},
+	{.description = "Mali_PP_Broadcast", .base = MALI_OFFSET_PP_BCAST, .irq_name = "IRQPP",},
+	{.description = "Mali_PMU", .base = MALI_OFFSET_PMU, .irq_name = "IRQPMU",},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE0,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE1,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE2,},
+	{.description = "Mali_PP_MMU_Broadcast", .base = MALI_OFFSET_PP_BCAST_MMU,},
+	{.description = "Mali_Broadcast", .base = MALI_OFFSET_BCAST,},
+	{.description = "Mali_DLBU", .base = MALI_OFFSET_DLBU,},
+	{.description = "Mali_DMA", .base = MALI_OFFSET_DMA,},
+};
+
+_mali_osk_errcode_t _mali_osk_resource_initialize(void)
+{
+	mali_bool mali_is_450 = MALI_FALSE;
+	int i, pp_core_num = 0, l2_core_num = 0;
+	struct resource *res;
+
+	for (i = 0; i < MALI_OSK_RESOURCE_WITH_IRQ_NUMBER; i++) {
+		res = platform_get_resource_byname(mali_platform_device, IORESOURCE_IRQ, mali_osk_resource_bank[i].irq_name);
+		if (res) {
+			mali_osk_resource_bank[i].irq = res->start;
+			if (0 == strncmp("Mali_PP_Broadcast", mali_osk_resource_bank[i].description,
+					 strlen(mali_osk_resource_bank[i].description))) {
+				mali_is_450 = MALI_TRUE;
+			}
+		} else {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_PP_LOCATION_START; i <= MALI_OSK_RESOURCE_PP_LOCATION_END; i++) {
+		if (MALI_OSK_INVALID_RESOURCE_ADDRESS != mali_osk_resource_bank[i].base) {
+			pp_core_num++;
+		}
+	}
+
+	/* We have to divide by 2, because we caculate twice for only one pp(pp_core and pp_mmu_core). */
+	if (0 != pp_core_num % 2) {
+		MALI_DEBUG_PRINT(2, ("The value of pp core number isn't normal."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pp_core_num /= 2;
+
+	/**
+	 * we can caculate the number of l2 cache core according the number of pp core number
+	 * and device type(mali400/mali450).
+	 */
+	if (mali_is_450 && 4 < pp_core_num) {
+		l2_core_num = 3;
+	} else if (mali_is_450 && 4 >= pp_core_num) {
+		l2_core_num = 2;
+	} else {
+		l2_core_num = 1;
+	}
+
+	for (i = MALI_OSK_RESOURCE_l2_LOCATION_END; i > MALI_OSK_RESOURCE_L2_LOCATION_START + l2_core_num - 1; i--) {
+		mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+	}
+
+	/* If device is not mali-450 type, we have to remove related resource from resource bank. */
+	if (!mali_is_450) {
+		for (i = MALI_OSK_RESOURCE_l2_LOCATION_END + 1; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+
+	if (NULL == mali_platform_device) {
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	/* Traverse all of resources in resources bank to find the matching one. */
+	for (i = 0; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+		if (mali_osk_resource_bank[i].base == addr) {
+			if (NULL != res) {
+				res->base = addr + _mali_osk_resource_base_address();
+				res->description = mali_osk_resource_bank[i].description;
+				res->irq = mali_osk_resource_bank[i].irq;
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	struct resource *reg_res = NULL;
+	uintptr_t ret = 0;
+
+	reg_res = platform_get_resource(mali_platform_device, IORESOURCE_MEM, 0);
+
+	if (NULL != reg_res) {
+		ret = reg_res->start;
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from device tree configuration.\n"));
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	if (!of_get_property(node, "pmu_domain_config", &length)) {
+		return;
+	}
+
+	if (array_size != length / sizeof(u32)) {
+		MALI_PRINT_ERROR(("Wrong pmu domain config in device tree."));
+		return;
+	}
+
+	of_property_for_each_u32(node, "pmu_domain_config", prop, p, u) {
+		domain_config_array[i] = (u16)u;
+		i++;
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	u32 switch_delay;
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	if (0 == of_property_read_u32(node, "pmu_switch_delay", &switch_delay)) {
+		return switch_delay;
+	} else {
+		MALI_DEBUG_PRINT(2, ("Couldn't find pmu_switch_delay in device tree configuration.\n"));
+	}
+
+	return 0;
+}
+
+#else /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+	uintptr_t phys_addr;
+
+	if (NULL == mali_platform_device) {
+		/* Not connected to a device */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	phys_addr = addr + _mali_osk_resource_base_address();
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_MEM == resource_type(&(mali_platform_device->resource[i])) &&
+		    mali_platform_device->resource[i].start == phys_addr) {
+			if (NULL != res) {
+				res->base = phys_addr;
+				res->description = mali_platform_device->resource[i].name;
+
+				/* Any (optional) IRQ resource belonging to this resource will follow */
+				if ((i + 1) < mali_platform_device->num_resources &&
+				    IORESOURCE_IRQ == resource_type(&(mali_platform_device->resource[i + 1]))) {
+					res->irq = mali_platform_device->resource[i + 1].start;
+				} else {
+					res->irq = -1;
+				}
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	uintptr_t lowest_addr = (uintptr_t)(0 - 1);
+	uintptr_t ret = 0;
+
+	if (NULL != mali_platform_device) {
+		int i;
+		for (i = 0; i < mali_platform_device->num_resources; i++) {
+			if (mali_platform_device->resource[i].flags & IORESOURCE_MEM &&
+			    mali_platform_device->resource[i].start < lowest_addr) {
+				lowest_addr = mali_platform_device->resource[i].start;
+				ret = lowest_addr;
+			}
+		}
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	_mali_osk_device_data data = { 0, };
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from platform device data.\n"));
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Copy the custom customer power domain config */
+		_mali_osk_memcpy(domain_config_array, data.pmu_domain_config, sizeof(data.pmu_domain_config));
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_device_data data = { 0, };
+
+	err = _mali_osk_device_data_get(&data);
+
+	if (_MALI_OSK_ERR_OK == err) {
+		return data.pmu_switch_delay;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(data);
+
+	if (NULL != mali_platform_device) {
+		struct mali_gpu_device_data *os_data = NULL;
+
+		os_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+		if (NULL != os_data) {
+			/* Copy data from OS dependant struct to Mali neutral struct (identical!) */
+			BUILD_BUG_ON(sizeof(*os_data) != sizeof(*data));
+			_mali_osk_memcpy(data, os_data, sizeof(*os_data));
+
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+u32 _mali_osk_l2_resource_count(void)
+{
+	u32 l2_core_num = 0;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_L2_RESOURCE0, NULL))
+		l2_core_num++;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_L2_RESOURCE1, NULL))
+		l2_core_num++;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_L2_RESOURCE2, NULL))
+		l2_core_num++;
+
+	MALI_DEBUG_ASSERT(0 < l2_core_num);
+
+	return l2_core_num;
+}
+
+mali_bool _mali_osk_shared_interrupts(void)
+{
+	u32 irqs[128];
+	u32 i, j, irq, num_irqs_found = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	MALI_DEBUG_ASSERT(128 >= mali_platform_device->num_resources);
+
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_IRQ & mali_platform_device->resource[i].flags) {
+			irq = mali_platform_device->resource[i].start;
+
+			for (j = 0; j < num_irqs_found; ++j) {
+				if (irq == irqs[j]) {
+					return MALI_TRUE;
+				}
+			}
+
+			irqs[num_irqs_found++] = irq;
+		}
+	}
+
+	return MALI_FALSE;
+}
diff --git a/utgard/r5p1/linux/mali_osk_math.c b/utgard/r5p1/linux/mali_osk_math.c
new file mode 100755
index 0000000..085ce76
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_math.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2010, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_math.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/bitops.h>
+
+u32 _mali_osk_clz(u32 input)
+{
+	return 32 - fls(input);
+}
+
+u32 _mali_osk_fls(u32 input)
+{
+	return fls(input);
+}
diff --git a/utgard/r5p1/linux/mali_osk_memory.c b/utgard/r5p1/linux/mali_osk_memory.c
new file mode 100755
index 0000000..390e613
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_memory.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_memory.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+void inline *_mali_osk_calloc(u32 n, u32 size)
+{
+	return kcalloc(n, size, GFP_KERNEL);
+}
+
+void inline *_mali_osk_malloc(u32 size)
+{
+	return kmalloc(size, GFP_KERNEL);
+}
+
+void inline _mali_osk_free(void *ptr)
+{
+	kfree(ptr);
+}
+
+void inline *_mali_osk_valloc(u32 size)
+{
+	return vmalloc(size);
+}
+
+void inline _mali_osk_vfree(void *ptr)
+{
+	vfree(ptr);
+}
+
+void inline *_mali_osk_memcpy(void *dst, const void *src, u32  len)
+{
+	return memcpy(dst, src, len);
+}
+
+void inline *_mali_osk_memset(void *s, u32 c, u32 n)
+{
+	return memset(s, c, n);
+}
+
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated)
+{
+	/* No need to prevent an out-of-memory dialogue appearing on Linux,
+	 * so we always return MALI_TRUE.
+	 */
+	return MALI_TRUE;
+}
diff --git a/utgard/r5p1/linux/mali_osk_misc.c b/utgard/r5p1/linux/mali_osk_misc.c
new file mode 100755
index 0000000..0a619e3
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_misc.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_misc.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include "mali_osk.h"
+
+#if !defined(CONFIG_MALI_QUIET)
+void _mali_osk_dbgmsg(const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+}
+#endif /* !defined(CONFIG_MALI_QUIET) */
+
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...)
+{
+	int res;
+	va_list args;
+	va_start(args, fmt);
+
+	res = vscnprintf(buf, (size_t)size, fmt, args);
+
+	va_end(args);
+	return res;
+}
+
+void _mali_osk_ctxprintf(_mali_osk_print_ctx *print_ctx, const char *fmt, ...)
+{
+	va_list args;
+	char buf[512];
+
+	va_start(args, fmt);
+	vscnprintf(buf, 512, fmt, args);
+	seq_printf(print_ctx, buf);
+	va_end(args);
+}
+
+void _mali_osk_abort(void)
+{
+	/* make a simple fault by dereferencing a NULL pointer */
+	dump_stack();
+	*(int *)0 = 0;
+}
+
+void _mali_osk_break(void)
+{
+	_mali_osk_abort();
+}
+
+u32 _mali_osk_get_pid(void)
+{
+	/* Thread group ID is the process ID on Linux */
+	return (u32)current->tgid;
+}
+
+char *_mali_osk_get_comm(void)
+{
+	return (char *)current->comm;
+}
+
+
+u32 _mali_osk_get_tid(void)
+{
+	/* pid is actually identifying the thread on Linux */
+	u32 tid = current->pid;
+
+	/* If the pid is 0 the core was idle.  Instead of returning 0 we return a special number
+	 * identifying which core we are on. */
+	if (0 == tid) {
+		tid = -(1 + raw_smp_processor_id());
+	}
+
+	return tid;
+}
diff --git a/utgard/r5p1/linux/mali_osk_notification.c b/utgard/r5p1/linux/mali_osk_notification.c
new file mode 100755
index 0000000..e66fe83
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_notification.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_notification.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/**
+ * Declaration of the notification queue object type
+ * Contains a linked list of notification pending delivery to user space.
+ * It also contains a wait queue of exclusive waiters blocked in the ioctl
+ * When a new notification is posted a single thread is resumed.
+ */
+struct _mali_osk_notification_queue_t_struct {
+	spinlock_t mutex; /**< Mutex protecting the list */
+	wait_queue_head_t receive_queue; /**< Threads waiting for new entries to the queue */
+	struct list_head head; /**< List of notifications waiting to be picked up */
+};
+
+typedef struct _mali_osk_notification_wrapper_t_struct {
+	struct list_head list;           /**< Internal linked list variable */
+	_mali_osk_notification_t data;   /**< Notification data */
+} _mali_osk_notification_wrapper_t;
+
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void)
+{
+	_mali_osk_notification_queue_t         *result;
+
+	result = (_mali_osk_notification_queue_t *)kmalloc(sizeof(_mali_osk_notification_queue_t), GFP_KERNEL);
+	if (NULL == result) return NULL;
+
+	spin_lock_init(&result->mutex);
+	init_waitqueue_head(&result->receive_queue);
+	INIT_LIST_HEAD(&result->head);
+
+	return result;
+}
+
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size)
+{
+	/* OPT Recycling of notification objects */
+	_mali_osk_notification_wrapper_t *notification;
+
+	notification = (_mali_osk_notification_wrapper_t *)kmalloc(sizeof(_mali_osk_notification_wrapper_t) + size,
+			GFP_KERNEL | __GFP_HIGH | __GFP_REPEAT);
+	if (NULL == notification) {
+		MALI_DEBUG_PRINT(1, ("Failed to create a notification object\n"));
+		return NULL;
+	}
+
+	/* Init the list */
+	INIT_LIST_HEAD(&notification->list);
+
+	if (0 != size) {
+		notification->data.result_buffer = ((u8 *)notification) + sizeof(_mali_osk_notification_wrapper_t);
+	} else {
+		notification->data.result_buffer = NULL;
+	}
+
+	/* set up the non-allocating fields */
+	notification->data.notification_type = type;
+	notification->data.result_buffer_size = size;
+
+	/* all ok */
+	return &(notification->data);
+}
+
+void _mali_osk_notification_delete(_mali_osk_notification_t *object)
+{
+	_mali_osk_notification_wrapper_t *notification;
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+	/* Free the container */
+	kfree(notification);
+}
+
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue)
+{
+	_mali_osk_notification_t *result;
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	while (_MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, &result)) {
+		_mali_osk_notification_delete(result);
+	}
+
+	/* not much to do, just free the memory */
+	kfree(queue);
+}
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_notification_wrapper_t *notification;
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	list_add_tail(&notification->list, &queue->head);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	/* and wake up one possible exclusive waiter */
+	wake_up(&queue->receive_queue);
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	_mali_osk_notification_wrapper_t *wrapper_object;
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	if (!list_empty(&queue->head)) {
+		wrapper_object = list_entry(queue->head.next, _mali_osk_notification_wrapper_t, list);
+		*result = &(wrapper_object->data);
+		list_del_init(&wrapper_object->list);
+		ret = _MALI_OSK_ERR_OK;
+	}
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(result);
+
+	/* default result */
+	*result = NULL;
+
+	if (wait_event_interruptible(queue->receive_queue,
+				     _MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, result))) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
diff --git a/utgard/r5p1/linux/mali_osk_pm.c b/utgard/r5p1/linux/mali_osk_pm.c
new file mode 100755
index 0000000..21180d3
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_pm.c
@@ -0,0 +1,83 @@
+/**
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_pm.c
+ * Implementation of the callback functions from common power management
+ */
+
+#include <linux/sched.h>
+
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_PM_RUNTIME */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_linux.h"
+
+/* Can NOT run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get_sync(&(mali_platform_device->dev));
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get_sync() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Can run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get(&(mali_platform_device->dev));
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err && -EINPROGRESS != err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* Can run in atomic context */
+void _mali_osk_pm_dev_ref_put(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+	pm_runtime_put_autosuspend(&(mali_platform_device->dev));
+#else
+	pm_runtime_put(&(mali_platform_device->dev));
+#endif
+#endif
+}
+
+void _mali_osk_pm_dev_barrier(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	pm_runtime_barrier(&(mali_platform_device->dev));
+#endif
+}
diff --git a/utgard/r5p1/linux/mali_osk_profiling.c b/utgard/r5p1/linux/mali_osk_profiling.c
new file mode 100755
index 0000000..a9b22e9
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_profiling.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/module.h>
+
+#include <mali_profiling_gator_api.h>
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_osk_profiling.h"
+#include "mali_linux_trace.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_user_settings_db.h"
+#include "mali_executor.h"
+
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start)
+{
+	if (MALI_TRUE == auto_start) {
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_profiling_term(void)
+{
+	/* Nothing to do */
+}
+
+void _mali_osk_profiling_report_sw_counters(u32 *counters)
+{
+	trace_mali_sw_counters(_mali_osk_get_pid(), _mali_osk_get_tid(), NULL, counters);
+}
+
+void _mali_osk_profiling_memory_usage_get(u32 *memory_usage)
+{
+	*memory_usage = _mali_ukk_report_memory_usage();
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args)
+{
+	/* Always add process and thread identificator in the first two data elements for events from user space */
+	_mali_osk_profiling_add_event(args->event_id, _mali_osk_get_pid(), _mali_osk_get_tid(), args->data[2], args->data[3], args->data[4]);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args)
+{
+	u32 *counters = (u32 *)(uintptr_t)args->counters;
+
+	_mali_osk_profiling_report_sw_counters(counters);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_memory_usage_get(_mali_uk_profiling_memory_usage_get_s *args)
+{
+	_mali_osk_profiling_memory_usage_get(&args->memory_usage);
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Called by gator.ko to set HW counters
+ *
+ * @param counter_id The counter ID.
+ * @param event_id Event ID that the counter should count (HW counter value from TRM).
+ *
+ * @return 1 on success, 0 on failure.
+ */
+int _mali_profiling_set_event(u32 counter_id, s32 event_id)
+{
+	if (COUNTER_VP_0_C0 == counter_id) {
+		mali_gp_job_set_gp_counter_src0(event_id);
+	} else if (COUNTER_VP_0_C1 == counter_id) {
+		mali_gp_job_set_gp_counter_src1(event_id);
+	} else if (COUNTER_FP_0_C0 <= counter_id && COUNTER_FP_7_C1 >= counter_id) {
+		/*
+		 * Two compatibility notes for this function:
+		 *
+		 * 1) Previously the DDK allowed per core counters.
+		 *
+		 *    This did not make much sense on Mali-450 with the "virtual PP core" concept,
+		 *    so this option was removed, and only the same pair of HW counters was allowed on all cores,
+		 *    beginning with r3p2 release.
+		 *
+		 *    Starting with r4p0, it is now possible to set different HW counters for the different sub jobs.
+		 *    This should be almost the same, since sub job 0 is designed to run on core 0,
+		 *    sub job 1 on core 1, and so on.
+		 *
+		 *    The scheduling of PP sub jobs is not predictable, and this often led to situations where core 0 ran 2
+		 *    sub jobs, while for instance core 1 ran zero. Having the counters set per sub job would thus increase
+		 *    the predictability of the returned data (as you would be guaranteed data for all the selected HW counters).
+		 *
+		 *    PS: Core scaling needs to be disabled in order to use this reliably (goes for both solutions).
+		 *
+		 *    The framework/#defines with Gator still indicates that the counter is for a particular core,
+		 *    but this is internally used as a sub job ID instead (no translation needed).
+		 *
+		 *  2) Global/default vs per sub job counters
+		 *
+		 *     Releases before r3p2 had only per PP core counters.
+		 *     r3p2 releases had only one set of default/global counters which applied to all PP cores
+		 *     Starting with r4p0, we have both a set of default/global counters,
+		 *     and individual counters per sub job (equal to per core).
+		 *
+		 *     To keep compatibility with Gator/DS-5/streamline, the following scheme is used:
+		 *
+		 *     r3p2 release; only counters set for core 0 is handled,
+		 *     this is applied as the default/global set of counters, and will thus affect all cores.
+		 *
+		 *     r4p0 release; counters set for core 0 is applied as both the global/default set of counters,
+		 *     and counters for sub job 0.
+		 *     Counters set for core 1-7 is only applied for the corresponding sub job.
+		 *
+		 *     This should allow the DS-5/Streamline GUI to have a simple mode where it only allows setting the
+		 *     values for core 0, and thus this will be applied to all PP sub jobs/cores.
+		 *     Advanced mode will also be supported, where individual pairs of HW counters can be selected.
+		 *
+		 *     The GUI will (until it is updated) still refer to cores instead of sub jobs, but this is probably
+		 *     something we can live with!
+		 *
+		 *     Mali-450 note: Each job is not divided into a deterministic number of sub jobs, as the HW DLBU
+		 *     automatically distributes the load between whatever number of cores is available at this particular time.
+		 *     A normal PP job on Mali-450 is thus considered a single (virtual) job, and it will thus only be possible
+		 *     to use a single pair of HW counters (even if the job ran on multiple PP cores).
+		 *     In other words, only the global/default pair of PP HW counters will be used for normal Mali-450 jobs.
+		 */
+		u32 sub_job = (counter_id - COUNTER_FP_0_C0) >> 1;
+		u32 counter_src = (counter_id - COUNTER_FP_0_C0) & 1;
+		if (0 == counter_src) {
+			mali_pp_job_set_pp_counter_sub_job_src0(sub_job, event_id);
+			if (0 == sub_job) {
+				mali_pp_job_set_pp_counter_global_src0(event_id);
+			}
+		} else {
+			mali_pp_job_set_pp_counter_sub_job_src1(sub_job, event_id);
+			if (0 == sub_job) {
+				mali_pp_job_set_pp_counter_global_src1(event_id);
+			}
+		}
+	} else if (COUNTER_L2_0_C0 <= counter_id && COUNTER_L2_2_C1 >= counter_id) {
+		u32 core_id = (counter_id - COUNTER_L2_0_C0) >> 1;
+		struct mali_l2_cache_core *l2_cache_core = mali_l2_cache_core_get_glob_l2_core(core_id);
+
+		if (NULL != l2_cache_core) {
+			u32 counter_src = (counter_id - COUNTER_L2_0_C0) & 1;
+			mali_l2_cache_core_set_counter_src(l2_cache_core,
+							   counter_src, event_id);
+		}
+	} else {
+		return 0; /* Failure, unknown event */
+	}
+
+	return 1; /* success */
+}
+
+/**
+ * Called by gator.ko to retrieve the L2 cache counter values for all L2 cache cores.
+ * The L2 cache counters are unique in that they are polled by gator, rather than being
+ * transmitted via the tracepoint mechanism.
+ *
+ * @param values Pointer to a _mali_profiling_l2_counter_values structure where
+ *               the counter sources and values will be output
+ * @return 0 if all went well; otherwise, return the mask with the bits set for the powered off cores
+ */
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values)
+{
+	u32 l2_cores_num = mali_l2_cache_core_get_glob_num_l2_cores();
+	u32 i;
+
+	MALI_DEBUG_ASSERT(l2_cores_num <= 3);
+
+	for (i = 0; i < l2_cores_num; i++) {
+		struct mali_l2_cache_core *l2_cache = mali_l2_cache_core_get_glob_l2_core(i);
+
+		if (NULL == l2_cache) {
+			continue;
+		}
+
+		mali_l2_cache_core_get_counter_values(l2_cache,
+						      &values->cores[i].source0,
+						      &values->cores[i].value0,
+						      &values->cores[i].source1,
+						      &values->cores[i].value1);
+	}
+
+	return 0;
+}
+
+/**
+ * Called by gator to control the production of profiling information at runtime.
+ */
+void _mali_profiling_control(u32 action, u32 value)
+{
+	switch (action) {
+	case FBDUMP_CONTROL_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED, (value == 0 ? MALI_FALSE : MALI_TRUE));
+		break;
+	case FBDUMP_CONTROL_RATE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES, value);
+		break;
+	case SW_COUNTER_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED, value);
+		break;
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR, value);
+		break;
+	default:
+		break;  /* Ignore unimplemented actions */
+	}
+}
+
+/**
+ * Called by gator to get mali api version.
+ */
+u32 _mali_profiling_get_api_version(void)
+{
+	return MALI_PROFILING_API_VERSION;
+}
+
+/**
+* Called by gator to get the data about Mali instance in use:
+* product id, version, number of cores
+*/
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values)
+{
+	values->mali_product_id = (u32)mali_kernel_core_get_product_id();
+	values->mali_version_major = mali_kernel_core_get_gpu_major_version();
+	values->mali_version_minor = mali_kernel_core_get_gpu_minor_version();
+	values->num_of_l2_cores = mali_l2_cache_core_get_glob_num_l2_cores();
+	values->num_of_fp_cores = mali_executor_get_num_cores_total();
+	values->num_of_vp_cores = 1;
+}
+
+
+EXPORT_SYMBOL(_mali_profiling_set_event);
+EXPORT_SYMBOL(_mali_profiling_get_l2_counters);
+EXPORT_SYMBOL(_mali_profiling_control);
+EXPORT_SYMBOL(_mali_profiling_get_api_version);
+EXPORT_SYMBOL(_mali_profiling_get_mali_version);
diff --git a/utgard/r5p1/linux/mali_osk_specific.h b/utgard/r5p1/linux/mali_osk_specific.h
new file mode 100755
index 0000000..db034a5
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_specific.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_specific.h
+ * Defines per-OS Kernel level specifics, such as unusual workarounds for
+ * certain OSs.
+ */
+
+#ifndef __MALI_OSK_SPECIFIC_H__
+#define __MALI_OSK_SPECIFIC_H__
+
+#include <asm/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/hardirq.h>
+
+
+#include "mali_osk_types.h"
+#include "mali_kernel_linux.h"
+
+#define MALI_STATIC_INLINE static inline
+#define MALI_NON_STATIC_INLINE inline
+
+typedef struct dma_pool *mali_dma_pool;
+
+typedef u32 mali_dma_addr;
+
+#if MALI_ENABLE_CPU_CYCLES
+/* Reads out the clock cycle performance counter of the current cpu.
+   It is useful for cost-free (2 cycle) measuring of the time spent
+   in a code path. Sample before and after, the diff number of cycles.
+   When the CPU is idle it will not increase this clock counter.
+   It means that the counter is accurate if only spin-locks are used,
+   but mutexes may lead to too low values since the cpu might "idle"
+   waiting for the mutex to become available.
+   The clock source is configured on the CPU during mali module load,
+   but will not give useful output after a CPU has been power cycled.
+   It is therefore important to configure the system to not turn of
+   the cpu cores when using this functionallity.*/
+static inline unsigned int mali_get_cpu_cyclecount(void)
+{
+	unsigned int value;
+	/* Reading the CCNT Register - CPU clock counter */
+	asm volatile("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(value));
+	return value;
+}
+
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64);
+#endif
+
+
+MALI_STATIC_INLINE u32 _mali_osk_copy_from_user(void *to, void *from, u32 n)
+{
+	return (u32)copy_from_user(to, from, (unsigned long)n);
+}
+
+MALI_STATIC_INLINE mali_bool _mali_osk_in_atomic(void)
+{
+	return in_atomic();
+}
+
+#define _mali_osk_put_user(x, ptr) put_user(x, ptr)
+
+#endif /* __MALI_OSK_SPECIFIC_H__ */
diff --git a/utgard/r5p1/linux/mali_osk_time.c b/utgard/r5p1/linux/mali_osk_time.c
new file mode 100755
index 0000000..4deaa10
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_time.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_time.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <asm/delay.h>
+
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb)
+{
+	return time_after_eq(ticka, tickb) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+unsigned long _mali_osk_time_mstoticks(u32 ms)
+{
+	return msecs_to_jiffies(ms);
+}
+
+u32 _mali_osk_time_tickstoms(unsigned long ticks)
+{
+	return jiffies_to_msecs(ticks);
+}
+
+unsigned long _mali_osk_time_tickcount(void)
+{
+	return jiffies;
+}
+
+void _mali_osk_time_ubusydelay(u32 usecs)
+{
+	udelay(usecs);
+}
+
+u64 _mali_osk_time_get_ns(void)
+{
+	struct timespec tsval;
+	getnstimeofday(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
+
+u64 _mali_osk_boot_time_get_ns(void)
+{
+	struct timespec tsval;
+	get_monotonic_boottime(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
diff --git a/utgard/r5p1/linux/mali_osk_timers.c b/utgard/r5p1/linux/mali_osk_timers.c
new file mode 100755
index 0000000..6bbaee7
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_timers.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_timers.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_timer_t_struct {
+	struct timer_list timer;
+};
+
+typedef void (*timer_timeout_function_t)(unsigned long);
+
+_mali_osk_timer_t *_mali_osk_timer_init(void)
+{
+	_mali_osk_timer_t *t = (_mali_osk_timer_t *)kmalloc(sizeof(_mali_osk_timer_t), GFP_KERNEL);
+	if (NULL != t) init_timer(&t->timer);
+	return t;
+}
+
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.expires = jiffies + ticks_to_expire;
+	add_timer(&(tim->timer));
+}
+
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	mod_timer(&(tim->timer), jiffies + ticks_to_expire);
+}
+
+void _mali_osk_timer_del(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer_sync(&(tim->timer));
+}
+
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer(&(tim->timer));
+}
+
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	return 1 == timer_pending(&(tim->timer));
+}
+
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.data = (unsigned long)data;
+	tim->timer.function = (timer_timeout_function_t)callback;
+}
+
+void _mali_osk_timer_term(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	kfree(tim);
+}
diff --git a/utgard/r5p1/linux/mali_osk_wait_queue.c b/utgard/r5p1/linux/mali_osk_wait_queue.c
new file mode 100755
index 0000000..15d5ce2
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_wait_queue.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wait_queue.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_wait_queue_t_struct {
+	wait_queue_head_t wait_queue;
+};
+
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void)
+{
+	_mali_osk_wait_queue_t *ret = NULL;
+
+	ret = kmalloc(sizeof(_mali_osk_wait_queue_t), GFP_KERNEL);
+
+	if (NULL == ret) {
+		return ret;
+	}
+
+	init_waitqueue_head(&ret->wait_queue);
+	MALI_DEBUG_ASSERT(!waitqueue_active(&ret->wait_queue));
+
+	return ret;
+}
+
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event(queue->wait_queue, condition(data));
+}
+
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event_timeout(queue->wait_queue, condition(data), _mali_osk_time_mstoticks(timeout));
+}
+
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* if queue is empty, don't attempt to wake up its elements */
+	if (!waitqueue_active(&queue->wait_queue)) return;
+
+	MALI_DEBUG_PRINT(6, ("Waking up elements in wait queue %p ....\n", queue));
+
+	wake_up_all(&queue->wait_queue);
+
+	MALI_DEBUG_PRINT(6, ("... elements in wait queue %p woken up\n", queue));
+}
+
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue)
+{
+	/* Parameter validation  */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* Linux requires no explicit termination of wait queues */
+	kfree(queue);
+}
diff --git a/utgard/r5p1/linux/mali_osk_wq.c b/utgard/r5p1/linux/mali_osk_wq.c
new file mode 100755
index 0000000..2c34c91
--- /dev/null
+++ b/utgard/r5p1/linux/mali_osk_wq.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/slab.h> /* For memory allocation */
+#include <linux/workqueue.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_license.h"
+#include "mali_kernel_linux.h"
+
+typedef struct _mali_osk_wq_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	mali_bool high_pri;
+	struct work_struct work_handle;
+} mali_osk_wq_work_object_t;
+
+typedef struct _mali_osk_wq_delayed_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	struct delayed_work work;
+} mali_osk_wq_delayed_work_object_t;
+
+#if MALI_LICENSE_IS_GPL
+static struct workqueue_struct *mali_wq_normal = NULL;
+static struct workqueue_struct *mali_wq_high = NULL;
+#endif
+
+static void _mali_osk_wq_work_func(struct work_struct *work);
+
+_mali_osk_errcode_t _mali_osk_wq_init(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(NULL == mali_wq_normal);
+	MALI_DEBUG_ASSERT(NULL == mali_wq_high);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_wq_normal = alloc_workqueue("mali", WQ_UNBOUND, 0);
+	mali_wq_high = alloc_workqueue("mali_high_pri", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_wq_normal = create_workqueue("mali");
+	mali_wq_high = create_workqueue("mali_high_pri");
+#endif
+	if (NULL == mali_wq_normal || NULL == mali_wq_high) {
+		MALI_PRINT_ERROR(("Unable to create Mali workqueues\n"));
+
+		if (mali_wq_normal) destroy_workqueue(mali_wq_normal);
+		if (mali_wq_high)   destroy_workqueue(mali_wq_high);
+
+		mali_wq_normal = NULL;
+		mali_wq_high   = NULL;
+
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* MALI_LICENSE_IS_GPL */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_wq_flush(void)
+{
+#if MALI_LICENSE_IS_GPL
+	flush_workqueue(mali_wq_high);
+	flush_workqueue(mali_wq_normal);
+#else
+	flush_scheduled_work();
+#endif
+}
+
+void _mali_osk_wq_term(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(NULL != mali_wq_normal);
+	MALI_DEBUG_ASSERT(NULL != mali_wq_high);
+
+	flush_workqueue(mali_wq_normal);
+	destroy_workqueue(mali_wq_normal);
+
+	flush_workqueue(mali_wq_high);
+	destroy_workqueue(mali_wq_high);
+
+	mali_wq_normal = NULL;
+	mali_wq_high   = NULL;
+#else
+	flush_scheduled_work();
+#endif
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_FALSE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_TRUE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+	_mali_osk_wq_flush();
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+	kfree(work_object);
+}
+
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_normal, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_high, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+static void _mali_osk_wq_work_func(struct work_struct *work)
+{
+	mali_osk_wq_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_work_object_t, work_handle);
+
+#if MALI_LICENSE_IS_GPL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+	/* We want highest Dynamic priority of the thread so that the Jobs depending
+	** on this thread could be scheduled in time. Without this, this thread might
+	** sometimes need to wait for some threads in user mode to finish its round-robin
+	** time, causing *bubble* in the Mali pipeline. Thanks to the new implementation
+	** of high-priority workqueue in new kernel, this only happens in older kernel.
+	*/
+	if (MALI_TRUE == work_object->high_pri) {
+		set_user_nice(current, -19);
+	}
+#endif
+#endif /* MALI_LICENSE_IS_GPL */
+
+	work_object->handler(work_object->data);
+}
+
+static void _mali_osk_wq_delayed_work_func(struct work_struct *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_delayed_work_object_t, work.work);
+	work_object->handler(work_object->data);
+}
+
+mali_osk_wq_delayed_work_object_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_delayed_work_object_t *work = kmalloc(sizeof(mali_osk_wq_delayed_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+
+	INIT_DELAYED_WORK(&work->work, _mali_osk_wq_delayed_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	cancel_delayed_work(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	cancel_delayed_work_sync(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+
+#if MALI_LICENSE_IS_GPL
+	queue_delayed_work(mali_wq_normal, &work_object->work, delay);
+#else
+	schedule_delayed_work(&work_object->work, delay);
+#endif
+
+}
diff --git a/utgard/r5p1/linux/mali_pmu_power_up_down.c b/utgard/r5p1/linux/mali_pmu_power_up_down.c
new file mode 100755
index 0000000..7414a3e
--- /dev/null
+++ b/utgard/r5p1/linux/mali_pmu_power_up_down.c
@@ -0,0 +1,27 @@
+/**
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu_power_up_down.c
+ */
+
+#include <linux/module.h>
+#include "mali_executor.h"
+
+int mali_perf_set_num_pp_cores(unsigned int num_cores)
+{
+#ifndef CONFIG_MALI_DVFS
+	return mali_executor_set_perf_level(num_cores, MALI_TRUE);
+#else
+	return mali_executor_set_perf_level(num_cores, MALI_FALSE);
+#endif
+}
+
+EXPORT_SYMBOL(mali_perf_set_num_pp_cores);
diff --git a/utgard/r5p1/linux/mali_profiling_events.h b/utgard/r5p1/linux/mali_profiling_events.h
new file mode 100755
index 0000000..0b90e8c
--- /dev/null
+++ b/utgard/r5p1/linux/mali_profiling_events.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_EVENTS_H__
+#define __MALI_PROFILING_EVENTS_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_events.h>
+
+#endif /* __MALI_PROFILING_EVENTS_H__ */
diff --git a/utgard/r5p1/linux/mali_profiling_gator_api.h b/utgard/r5p1/linux/mali_profiling_gator_api.h
new file mode 100755
index 0000000..c98d127
--- /dev/null
+++ b/utgard/r5p1/linux/mali_profiling_gator_api.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012-2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_GATOR_API_H__
+#define __MALI_PROFILING_GATOR_API_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_gator_api.h>
+
+#endif /* __MALI_PROFILING_GATOR_API_H__ */
diff --git a/utgard/r5p1/linux/mali_profiling_internal.c b/utgard/r5p1/linux/mali_profiling_internal.c
new file mode 100755
index 0000000..1e460dc
--- /dev/null
+++ b/utgard/r5p1/linux/mali_profiling_internal.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_timestamp.h"
+#include "mali_osk_profiling.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+
+typedef struct mali_profiling_entry {
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+} mali_profiling_entry;
+
+typedef enum mali_profiling_state {
+	MALI_PROFILING_STATE_UNINITIALIZED,
+	MALI_PROFILING_STATE_IDLE,
+	MALI_PROFILING_STATE_RUNNING,
+	MALI_PROFILING_STATE_RETURN,
+} mali_profiling_state;
+
+static _mali_osk_mutex_t *lock = NULL;
+static mali_profiling_state prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+static mali_profiling_entry *profile_entries = NULL;
+static _mali_osk_atomic_t profile_insert_index;
+static u32 profile_mask = 0;
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+void probe_mali_timeline_event(void *data, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned
+			       int d2, unsigned int d3, unsigned int d4))
+{
+	add_event(event_id, d0, d1, d2, d3, d4);
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_init(mali_bool auto_start)
+{
+	profile_entries = NULL;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_PROFILING);
+	if (NULL == lock) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+
+	if (MALI_TRUE == auto_start) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* Use maximum buffer size */
+
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_internal_profiling_term(void)
+{
+	u32 count;
+
+	/* Ensure profiling is stopped */
+	_mali_internal_profiling_stop(&count);
+
+	prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+
+	if (NULL != profile_entries) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	if (NULL != lock) {
+		_mali_osk_mutex_term(lock);
+		lock = NULL;
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_start(u32 *limit)
+{
+	_mali_osk_errcode_t ret;
+	mali_profiling_entry *new_profile_entries;
+
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RUNNING == prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	new_profile_entries = _mali_osk_valloc(*limit * sizeof(mali_profiling_entry));
+
+	if (NULL == new_profile_entries) {
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (MALI_PROFILING_MAX_BUFFER_ENTRIES < *limit) {
+		*limit = MALI_PROFILING_MAX_BUFFER_ENTRIES;
+	}
+
+	profile_mask = 1;
+	while (profile_mask <= *limit) {
+		profile_mask <<= 1;
+	}
+	profile_mask >>= 1;
+
+	*limit = profile_mask;
+
+	profile_mask--; /* turns the power of two into a mask of one less */
+
+	if (MALI_PROFILING_STATE_IDLE != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	profile_entries = new_profile_entries;
+
+	ret = _mali_timestamp_reset();
+
+	if (_MALI_OSK_ERR_OK == ret) {
+		prof_state = MALI_PROFILING_STATE_RUNNING;
+	} else {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	register_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+	return ret;
+}
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	u32 cur_index = (_mali_osk_atomic_inc_return(&profile_insert_index) - 1) & profile_mask;
+
+	profile_entries[cur_index].timestamp = _mali_timestamp_get();
+	profile_entries[cur_index].event_id = event_id;
+	profile_entries[cur_index].data[0] = data0;
+	profile_entries[cur_index].data[1] = data1;
+	profile_entries[cur_index].data[2] = data2;
+	profile_entries[cur_index].data[3] = data3;
+	profile_entries[cur_index].data[4] = data4;
+
+	/* If event is "leave API function", add current memory usage to the event
+	 * as data point 4.  This is used in timeline profiling to indicate how
+	 * much memory was used when leaving a function. */
+	if (event_id == (MALI_PROFILING_EVENT_TYPE_SINGLE | MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC)) {
+		profile_entries[cur_index].data[4] = _mali_ukk_report_memory_usage();
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_stop(u32 *count)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RUNNING != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	/* go into return state (user to retreive events), no more events will be added after this */
+	prof_state = MALI_PROFILING_STATE_RETURN;
+
+	unregister_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+
+	tracepoint_synchronize_unregister();
+
+	*count = _mali_osk_atomic_read(&profile_insert_index);
+	if (*count > profile_mask) *count = profile_mask;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 _mali_internal_profiling_get_count(void)
+{
+	u32 retval = 0;
+
+	_mali_osk_mutex_wait(lock);
+	if (MALI_PROFILING_STATE_RETURN == prof_state) {
+		retval = _mali_osk_atomic_read(&profile_insert_index);
+		if (retval > profile_mask) retval = profile_mask;
+	}
+	_mali_osk_mutex_signal(lock);
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5])
+{
+	u32 raw_index = _mali_osk_atomic_read(&profile_insert_index);
+
+	_mali_osk_mutex_wait(lock);
+
+	if (index < profile_mask) {
+		if ((raw_index & ~profile_mask) != 0) {
+			index += raw_index;
+			index &= profile_mask;
+		}
+
+		if (prof_state != MALI_PROFILING_STATE_RETURN) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+		}
+
+		if (index >= raw_index) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		*timestamp = profile_entries[index].timestamp;
+		*event_id = profile_entries[index].event_id;
+		data[0] = profile_entries[index].data[0];
+		data[1] = profile_entries[index].data[1];
+		data[2] = profile_entries[index].data[2];
+		data[3] = profile_entries[index].data[3];
+		data[4] = profile_entries[index].data[4];
+	} else {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_clear(void)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RETURN != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	if (NULL != profile_entries) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool _mali_internal_profiling_is_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RUNNING ? MALI_TRUE : MALI_FALSE;
+}
+
+mali_bool _mali_internal_profiling_have_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RETURN ? MALI_TRUE : MALI_FALSE;
+}
diff --git a/utgard/r5p1/linux/mali_profiling_internal.h b/utgard/r5p1/linux/mali_profiling_internal.h
new file mode 100755
index 0000000..1c6f4da
--- /dev/null
+++ b/utgard/r5p1/linux/mali_profiling_internal.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_INTERNAL_H__
+#define __MALI_PROFILING_INTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_osk.h"
+
+int _mali_internal_profiling_init(mali_bool auto_start);
+void _mali_internal_profiling_term(void);
+
+mali_bool _mali_internal_profiling_is_recording(void);
+mali_bool _mali_internal_profiling_have_recording(void);
+_mali_osk_errcode_t _mali_internal_profiling_clear(void);
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+u32 _mali_internal_profiling_get_count(void);
+int _mali_internal_profiling_stop(u32 *count);
+int _mali_internal_profiling_start(u32 *limit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_PROFILING_INTERNAL_H__ */
diff --git a/utgard/r5p1/linux/mali_sync.c b/utgard/r5p1/linux/mali_sync.c
new file mode 100755
index 0000000..652a5c1
--- /dev/null
+++ b/utgard/r5p1/linux/mali_sync.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_sync.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_timeline.h"
+
+#include <linux/file.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <asm-generic/fcntl.h>
+
+struct mali_sync_pt {
+	struct sync_pt         sync_pt;
+	struct mali_sync_flag *flag;
+	struct sync_timeline *sync_tl;  /**< Sync timeline this pt is connected to. */
+};
+
+/**
+ * The sync flag is used to connect sync fences to the Mali Timeline system.  Sync fences can be
+ * created from a sync flag, and when the flag is signaled, the sync fences will also be signaled.
+ */
+struct mali_sync_flag {
+	struct sync_timeline *sync_tl;  /**< Sync timeline this flag is connected to. */
+	u32                   point;    /**< Point on timeline. */
+	int                   status;   /**< 0 if unsignaled, 1 if signaled without error or negative if signaled with error. */
+	struct kref           refcount; /**< Reference count. */
+};
+
+/**
+ * Mali sync timeline is used to connect mali timeline to sync_timeline.
+ * When fence timeout can print more detailed mali timeline system info.
+ */
+struct mali_sync_timeline_container {
+	struct sync_timeline sync_timeline;
+	struct mali_timeline *timeline;
+};
+
+MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, sync_pt);
+}
+
+MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct sync_timeline *sync_tl)
+{
+	return container_of(sync_tl, struct mali_sync_timeline_container, sync_timeline);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt, *new_mpt;
+	struct sync_pt *new_pt;
+
+	if (NULL == pt) {
+		dump_stack();
+		return NULL;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	new_pt = sync_pt_create(mpt->sync_tl, sizeof(struct mali_sync_pt));
+	if (NULL == new_pt) return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+
+	mali_sync_flag_get(mpt->flag);
+	new_mpt->flag = mpt->flag;
+	new_mpt->sync_tl = mpt->sync_tl;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	MALI_DEBUG_ASSERT_POINTER(mpt->flag);
+
+	return mpt->flag->status;
+}
+
+static int timeline_compare(struct sync_pt *pta, struct sync_pt *ptb)
+{
+	struct mali_sync_pt *mpta;
+	struct mali_sync_pt *mptb;
+	u32 a, b;
+
+	MALI_DEBUG_ASSERT_POINTER(pta);
+	MALI_DEBUG_ASSERT_POINTER(ptb);
+	mpta = to_mali_sync_pt(pta);
+	mptb = to_mali_sync_pt(ptb);
+
+	MALI_DEBUG_ASSERT_POINTER(mpta->flag);
+	MALI_DEBUG_ASSERT_POINTER(mptb->flag);
+
+	a = mpta->flag->point;
+	b = mptb->flag->point;
+
+	if (a == b) return 0;
+
+	return ((b - a) < (a - b) ? -1 : 1);
+}
+
+static void timeline_free_pt(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	mali_sync_flag_put(mpt->flag);
+}
+
+static void timeline_release(struct sync_timeline *sync_timeline)
+{
+	module_put(THIS_MODULE);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+static void timeline_print_pt(struct seq_file *s, struct sync_pt *sync_pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(s);
+	MALI_DEBUG_ASSERT_POINTER(sync_pt);
+
+	mpt = to_mali_sync_pt(sync_pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		seq_printf(s, "%u", mpt->flag->point);
+	} else {
+		seq_printf(s, "uninitialized");
+	}
+}
+
+#else
+static void timeline_pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(str);
+	MALI_DEBUG_ASSERT_POINTER(pt);
+
+	mpt = to_mali_sync_pt(pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		_mali_osk_snprintf(str, size, "%u", mpt->flag->point);
+	} else {
+		_mali_osk_snprintf(str, size, "uninitialized");
+	}
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str, int size)
+{
+	struct mali_sync_timeline_container *mali_sync_tl;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT_POINTER(str);
+
+	mali_sync_tl = to_mali_sync_tl_container(timeline);
+
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	if (NULL != mali_sync_tl->timeline) {
+		_mali_osk_snprintf(str, size, "oldest (%u)  next (%u)\n", mali_sync_tl->timeline->point_oldest,
+				   mali_sync_tl->timeline->point_next);
+	}
+}
+#endif
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name    = "Mali",
+	.dup            = timeline_dup,
+	.has_signaled   = timeline_has_signaled,
+	.compare        = timeline_compare,
+	.free_pt        = timeline_free_pt,
+	.release_obj    = timeline_release,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	.print_pt       = timeline_print_pt,
+#else
+	.pt_value_str = timeline_pt_value_str,
+	.timeline_value_str = timeline_value_str,
+#endif
+};
+
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name)
+{
+	struct sync_timeline *sync_tl;
+	struct mali_sync_timeline_container *mali_sync_tl;
+
+	sync_tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name);
+	if (NULL == sync_tl) return NULL;
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	mali_sync_tl->timeline = timeline;
+
+	/* Grab a reference on the module to ensure the callbacks are present
+	 * as long some timeline exists. The reference is released when the
+	 * timeline is freed.
+	 * Since this function is called from a ioctl on an open file we know
+	 * we already have a reference, so using __module_get is safe. */
+	__module_get(THIS_MODULE);
+
+	return sync_tl;
+}
+
+mali_bool mali_sync_timeline_is_ours(struct sync_timeline *sync_tl)
+{
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+	return (sync_tl->ops == &mali_timeline_ops) ? MALI_TRUE : MALI_FALSE;
+}
+
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence)
+{
+	s32 fd = -1;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
+	fd = get_unused_fd();
+#else
+	fd = get_unused_fd_flags(O_CLOEXEC);
+#endif
+	if (fd < 0) {
+		sync_fence_put(sync_fence);
+		return -1;
+	}
+	sync_fence_install(sync_fence, fd);
+
+	return fd;
+}
+
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2)
+{
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+
+	sync_fence = sync_fence_merge("mali_merge_fence", sync_fence1, sync_fence2);
+	sync_fence_put(sync_fence1);
+	sync_fence_put(sync_fence2);
+
+	return sync_fence;
+}
+
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl)
+{
+	struct mali_sync_flag *flag;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	flag = mali_sync_flag_create(sync_tl, 0);
+	if (NULL == flag) return NULL;
+
+	sync_fence = mali_sync_flag_create_fence(flag);
+
+	mali_sync_flag_signal(flag, 0);
+	mali_sync_flag_put(flag);
+
+	return sync_fence;
+}
+
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, mali_timeline_point point)
+{
+	struct mali_sync_flag *flag;
+
+	if (NULL == sync_tl) return NULL;
+
+	flag = _mali_osk_calloc(1, sizeof(*flag));
+	if (NULL == flag) return NULL;
+
+	flag->sync_tl = sync_tl;
+	flag->point = point;
+
+	flag->status = 0;
+	kref_init(&flag->refcount);
+
+	return flag;
+}
+
+void mali_sync_flag_get(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_get(&flag->refcount);
+}
+
+/**
+ * Free sync flag.
+ *
+ * @param ref kref object embedded in sync flag that should be freed.
+ */
+static void mali_sync_flag_free(struct kref *ref)
+{
+	struct mali_sync_flag *flag;
+
+	MALI_DEBUG_ASSERT_POINTER(ref);
+	flag = container_of(ref, struct mali_sync_flag, refcount);
+
+	_mali_osk_free(flag);
+}
+
+void mali_sync_flag_put(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_put(&flag->refcount, mali_sync_flag_free);
+}
+
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+
+	MALI_DEBUG_ASSERT(0 == flag->status);
+	flag->status = (0 > error) ? error : 1;
+
+	_mali_osk_write_mem_barrier();
+
+	sync_timeline_signal(flag->sync_tl);
+}
+
+/**
+ * Create a sync point attached to given sync flag.
+ *
+ * @note Sync points must be triggered in *exactly* the same order as they are created.
+ *
+ * @param flag Sync flag.
+ * @return New sync point if successful, NULL if not.
+ */
+static struct sync_pt *mali_sync_flag_create_pt(struct mali_sync_flag *flag)
+{
+	struct sync_pt *pt;
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	pt = sync_pt_create(flag->sync_tl, sizeof(struct mali_sync_pt));
+	if (NULL == pt) return NULL;
+
+	mali_sync_flag_get(flag);
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->flag = flag;
+	mpt->sync_tl = flag->sync_tl;
+
+	return pt;
+}
+
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag)
+{
+	struct sync_pt    *sync_pt;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	sync_pt = mali_sync_flag_create_pt(flag);
+	if (NULL == sync_pt) return NULL;
+
+	sync_fence = sync_fence_create("mali_flag_fence", sync_pt);
+	if (NULL == sync_fence) {
+		sync_pt_free(sync_pt);
+		return NULL;
+	}
+
+	return sync_fence;
+}
diff --git a/utgard/r5p1/linux/mali_sync.h b/utgard/r5p1/linux/mali_sync.h
new file mode 100755
index 0000000..61c7536
--- /dev/null
+++ b/utgard/r5p1/linux/mali_sync.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_sync.h
+ *
+ * Mali interface for Linux sync objects.
+ */
+
+#ifndef _MALI_SYNC_H_
+#define _MALI_SYNC_H_
+
+#if defined(CONFIG_SYNC)
+
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+#include <linux/sync.h>
+#else
+#include <sync.h>
+#endif
+
+
+#include "mali_osk.h"
+
+struct mali_sync_flag;
+struct mali_timeline;
+
+/**
+ * Create a sync timeline.
+ *
+ * @param name Name of the sync timeline.
+ * @return The new sync timeline if successful, NULL if not.
+ */
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name);
+
+/**
+ * Check if sync timeline belongs to Mali.
+ *
+ * @param sync_tl Sync timeline to check.
+ * @return MALI_TRUE if sync timeline belongs to Mali, MALI_FALSE if not.
+ */
+mali_bool mali_sync_timeline_is_ours(struct sync_timeline *sync_tl);
+
+/**
+ * Creates a file descriptor representing the sync fence.  Will release sync fence if allocation of
+ * file descriptor fails.
+ *
+ * @param sync_fence Sync fence.
+ * @return File descriptor representing sync fence if successful, or -1 if not.
+ */
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence);
+
+/**
+ * Merges two sync fences.  Both input sync fences will be released.
+ *
+ * @param sync_fence1 First sync fence.
+ * @param sync_fence2 Second sync fence.
+ * @return New sync fence that is the result of the merger if successful, or NULL if not.
+ */
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2);
+
+/**
+ * Create a sync fence that is already signaled.
+ *
+ * @param tl Sync timeline.
+ * @return New signaled sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl);
+
+/**
+ * Create a sync flag.
+ *
+ * @param sync_tl Sync timeline.
+ * @param point Point on Mali timeline.
+ * @return New sync flag if successful, NULL if not.
+ */
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, u32 point);
+
+/**
+ * Grab sync flag reference.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_get(struct mali_sync_flag *flag);
+
+/**
+ * Release sync flag reference.  If this was the last reference, the sync flag will be freed.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_put(struct mali_sync_flag *flag);
+
+/**
+ * Signal sync flag.  All sync fences created from this flag will be signaled.
+ *
+ * @param flag Sync flag to signal.
+ * @param error Negative error code, or 0 if no error.
+ */
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error);
+
+/**
+ * Create a sync fence attached to given sync flag.
+ *
+ * @param flag Sync flag.
+ * @return New sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag);
+
+#endif /* defined(CONFIG_SYNC) */
+
+#endif /* _MALI_SYNC_H_ */
diff --git a/utgard/r5p1/linux/mali_uk_types.h b/utgard/r5p1/linux/mali_uk_types.h
new file mode 100755
index 0000000..1884cdb
--- /dev/null
+++ b/utgard/r5p1/linux/mali_uk_types.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_UK_TYPES_H__
+#define __MALI_UK_TYPES_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_uk_types.h>
+
+#endif /* __MALI_UK_TYPES_H__ */
diff --git a/utgard/r5p1/linux/mali_ukk_core.c b/utgard/r5p1/linux/mali_ukk_core.c
new file mode 100755
index 0000000..215cd36
--- /dev/null
+++ b/utgard/r5p1/linux/mali_ukk_core.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/slab.h>     /* memort allocation functions */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs)
+{
+	_mali_uk_get_api_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+	if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT;
+
+	return 0;
+}
+
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs)
+{
+	_mali_uk_get_api_version_v2_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version_v2(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+	if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT;
+
+	return 0;
+}
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs)
+{
+	_mali_uk_wait_for_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_wait_for_notification(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS != kargs.type) {
+		kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+		if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_wait_for_notification_s))) return -EFAULT;
+	} else {
+		if (0 != put_user(kargs.type, &uargs->type)) return -EFAULT;
+	}
+
+	return 0;
+}
+
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs)
+{
+	_mali_uk_post_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	if (0 != get_user(kargs.type, &uargs->type)) {
+		return -EFAULT;
+	}
+
+	err = _mali_ukk_post_notification(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs)
+{
+	_mali_uk_get_user_settings_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_user_settings(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = 0; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_user_settings_s))) return -EFAULT;
+
+	return 0;
+}
+
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs)
+{
+	_mali_uk_request_high_priority_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_request_high_priority(&kargs);
+
+	kargs.ctx = 0;
+
+	return map_errcode(err);
+}
diff --git a/utgard/r5p1/linux/mali_ukk_gp.c b/utgard/r5p1/linux/mali_ukk_gp.c
new file mode 100755
index 0000000..d4144c0
--- /dev/null
+++ b/utgard/r5p1/linux/mali_ukk_gp.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_gp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs)
+{
+	_mali_uk_get_gp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err =  _mali_ukk_get_gp_core_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	return 0;
+}
+
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs)
+{
+	_mali_uk_gp_suspend_response_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_gp_suspend_response_s))) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_gp_suspend_response(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.cookie, &uargs->cookie)) return -EFAULT;
+
+	/* no known transactions to roll-back */
+	return 0;
+}
+
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_gp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_gp_number_of_cores(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (0 != put_user(kargs.number_of_cores, &uargs->number_of_cores)) return -EFAULT;
+
+	return 0;
+}
diff --git a/utgard/r5p1/linux/mali_ukk_mem.c b/utgard/r5p1/linux/mali_ukk_mem.c
new file mode 100755
index 0000000..68d140d
--- /dev/null
+++ b/utgard/r5p1/linux/mali_ukk_mem.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs)
+{
+	_mali_uk_alloc_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_alloc_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_allocate(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs)
+{
+	_mali_uk_free_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_free_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_free(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs)
+{
+	_mali_uk_bind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_bind_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_bind(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs)
+{
+	_mali_uk_unbind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_unbind_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_unbind(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs)
+{
+	_mali_uk_mem_write_safe_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_write_safe_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	/* Check if we can access the buffers */
+	if (!access_ok(VERIFY_WRITE, kargs.dest, kargs.size)
+	    || !access_ok(VERIFY_READ, kargs.src, kargs.size)) {
+		return -EINVAL;
+	}
+
+	/* Check if size wraps */
+	if ((kargs.size + kargs.dest) <= kargs.dest
+	    || (kargs.size + kargs.src) <= kargs.src) {
+		return -EINVAL;
+	}
+
+	err = _mali_ukk_mem_write_safe(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.size, &uargs->size)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+
+
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs)
+{
+	_mali_uk_query_mmu_page_table_dump_size_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_query_mmu_page_table_dump_size(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.size, &uargs->size)) return -EFAULT;
+
+	return 0;
+}
+
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs)
+{
+	_mali_uk_dump_mmu_page_table_s kargs;
+	_mali_osk_errcode_t err;
+	void __user *user_buffer;
+	void *buffer = NULL;
+	int rc = -EFAULT;
+
+	/* validate input */
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	/* the session_data pointer was validated by caller */
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_dump_mmu_page_table_s)))
+		goto err_exit;
+
+	user_buffer = (void __user *)(uintptr_t)kargs.buffer;
+	if (!access_ok(VERIFY_WRITE, user_buffer, kargs.size))
+		goto err_exit;
+
+	/* allocate temporary buffer (kernel side) to store mmu page table info */
+	if (kargs.size <= 0)
+		return -EINVAL;
+	/* Allow at most 8MiB buffers, this is more than enough to dump a fully
+	 * populated page table. */
+	if (kargs.size > SZ_8M)
+		return -EINVAL;
+
+	buffer = (void *)(uintptr_t)_mali_osk_valloc(kargs.size);
+	if (NULL == buffer) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.buffer = (uintptr_t)buffer;
+	err = _mali_ukk_dump_mmu_page_table(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		rc = map_errcode(err);
+		goto err_exit;
+	}
+
+	/* copy mmu page table info back to user space and update pointers */
+	if (0 != copy_to_user(user_buffer, buffer, kargs.size))
+		goto err_exit;
+
+	kargs.register_writes = kargs.register_writes -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+	kargs.page_table_dump = kargs.page_table_dump -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+
+	if (0 != copy_to_user(uargs, &kargs, sizeof(kargs)))
+		goto err_exit;
+
+	rc = 0;
+
+err_exit:
+	if (buffer) _mali_osk_vfree(buffer);
+	return rc;
+}
diff --git a/utgard/r5p1/linux/mali_ukk_pp.c b/utgard/r5p1/linux/mali_ukk_pp.c
new file mode 100755
index 0000000..4c1c381
--- /dev/null
+++ b/utgard/r5p1/linux/mali_ukk_pp.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the jobs were started successfully, 0 is returned.  If there was an error, but the
+	 * jobs were started, we return -ENOENT.  For anything else returned, the jobs were not
+	 * started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_and_gp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_pp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_get_pp_number_of_cores(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_pp_number_of_cores_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs)
+{
+	_mali_uk_get_pp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_pp_core_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	return 0;
+}
+
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs)
+{
+	_mali_uk_pp_disable_wb_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_pp_disable_wb_s))) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	_mali_ukk_pp_job_disable_wb(&kargs);
+
+	return 0;
+}
diff --git a/utgard/r5p1/linux/mali_ukk_profiling.c b/utgard/r5p1/linux/mali_ukk_profiling.c
new file mode 100755
index 0000000..62ed1d6
--- /dev/null
+++ b/utgard/r5p1/linux/mali_ukk_profiling.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+#include <linux/slab.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs)
+{
+	_mali_uk_profiling_add_event_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_add_event_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_add_event(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_memory_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+	_mali_uk_profiling_memory_usage_get_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_memory_usage_get(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs)
+{
+	_mali_uk_sw_counters_report_s kargs;
+	_mali_osk_errcode_t err;
+	u32 *counter_buffer;
+	u32 __user *counters;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_sw_counters_report_s))) {
+		return -EFAULT;
+	}
+
+	/* make sure that kargs.num_counters is [at least somewhat] sane */
+	if (kargs.num_counters > 10000) {
+		MALI_DEBUG_PRINT(1, ("User space attempted to allocate too many counters.\n"));
+		return -EINVAL;
+	}
+
+	counter_buffer = (u32 *)kmalloc(sizeof(u32) * kargs.num_counters, GFP_KERNEL);
+	if (NULL == counter_buffer) {
+		return -ENOMEM;
+	}
+
+	counters = (u32 *)(uintptr_t)kargs.counters;
+
+	if (0 != copy_from_user(counter_buffer, counters, sizeof(u32) * kargs.num_counters)) {
+		kfree(counter_buffer);
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.counters = (uintptr_t)counter_buffer;
+
+	err = _mali_ukk_sw_counters_report(&kargs);
+
+	kfree(counter_buffer);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
diff --git a/utgard/r5p1/linux/mali_ukk_soft_job.c b/utgard/r5p1/linux/mali_ukk_soft_job.c
new file mode 100755
index 0000000..11c7006
--- /dev/null
+++ b/utgard/r5p1/linux/mali_ukk_soft_job.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_soft_job.h"
+#include "mali_timeline.h"
+
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs)
+{
+	_mali_uk_soft_job_start_s kargs;
+	u32 type, point;
+	u64 user_job;
+	struct mali_timeline_fence fence;
+	struct mali_soft_job *job = NULL;
+	u32 __user *job_id_ptr = NULL;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session, -EINVAL);
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(kargs))) {
+		return -EFAULT;
+	}
+
+	type = kargs.type;
+	user_job = kargs.user_job;
+	job_id_ptr = (u32 __user *)(uintptr_t)kargs.job_id_ptr;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &kargs.fence);
+
+	if ((MALI_SOFT_JOB_TYPE_USER_SIGNALED != type) && (MALI_SOFT_JOB_TYPE_SELF_SIGNALED != type)) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid soft job type specified\n"));
+		return -EINVAL;
+	}
+
+	/* Create soft job. */
+	job = mali_soft_job_create(session->soft_job_system, (enum mali_soft_job_type)type, user_job);
+	if (unlikely(NULL == job)) {
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Write job id back to user space. */
+	if (0 != put_user(job->id, job_id_ptr)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to put job id"));
+		mali_soft_job_destroy(job);
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Start soft job. */
+	point = mali_soft_job_start(job, &fence);
+
+	if (0 != put_user(point, &uargs->point)) {
+		/* Let user space know that something failed after the job was started. */
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs)
+{
+	u32 job_id;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != get_user(job_id, &uargs->job_id)) return -EFAULT;
+
+	err = mali_soft_job_system_signal_job(session->soft_job_system, job_id);
+
+	return map_errcode(err);
+}
diff --git a/utgard/r5p1/linux/mali_ukk_timeline.c b/utgard/r5p1/linux/mali_ukk_timeline.c
new file mode 100755
index 0000000..484d404
--- /dev/null
+++ b/utgard/r5p1/linux/mali_ukk_timeline.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_timeline.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs)
+{
+	u32 val;
+	mali_timeline_id timeline;
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != get_user(val, &uargs->timeline)) return -EFAULT;
+
+	if (MALI_UK_TIMELINE_MAX <= val) {
+		return -EINVAL;
+	}
+
+	timeline = (mali_timeline_id)val;
+
+	point = mali_timeline_system_get_latest_point(session->timeline_system, timeline);
+
+	if (0 != put_user(point, &uargs->point)) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs)
+{
+	u32 timeout, status;
+	mali_bool ret;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT;
+	if (0 != get_user(timeout, &uargs->timeout)) return -EFAULT;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+	ret = mali_timeline_fence_wait(session->timeline_system, &fence, timeout);
+	status = (MALI_TRUE == ret ? 1 : 0);
+
+	if (0 != put_user(status, &uargs->status)) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs)
+{
+	s32 sync_fd = -1;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT;
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+#if defined(CONFIG_SYNC)
+	sync_fd = mali_timeline_sync_fence_create(session->timeline_system, &fence);
+#else
+	sync_fd = -1;
+#endif /* defined(CONFIG_SYNC) */
+
+	if (0 != put_user(sync_fd, &uargs->sync_fd)) return -EFAULT;
+
+	return 0;
+}
diff --git a/utgard/r5p1/linux/mali_ukk_vsync.c b/utgard/r5p1/linux/mali_ukk_vsync.c
new file mode 100755
index 0000000..487c247
--- /dev/null
+++ b/utgard/r5p1/linux/mali_ukk_vsync.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs)
+{
+	_mali_uk_vsync_event_report_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_vsync_event_report_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_vsync_event_report(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
diff --git a/utgard/r5p1/linux/mali_ukk_wrappers.h b/utgard/r5p1/linux/mali_ukk_wrappers.h
new file mode 100755
index 0000000..9b03e43
--- /dev/null
+++ b/utgard/r5p1/linux/mali_ukk_wrappers.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk_wrappers.h
+ * Defines the wrapper functions for each user-kernel function
+ */
+
+#ifndef __MALI_UKK_WRAPPERS_H__
+#define __MALI_UKK_WRAPPERS_H__
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs);
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs);
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs);
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs);
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs);
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs);
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs);
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs);
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs);
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs);
+
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs);
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs);
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs);
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs);
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs);
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs);
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs);
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs);
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs);
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs);
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs);
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs);
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs);
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs);
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs);
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs);
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs);
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs);
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs);
+int profiling_memory_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs);
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs);
+
+
+int map_errcode(_mali_osk_errcode_t err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_WRAPPERS_H__ */
diff --git a/utgard/r5p1/readme.txt b/utgard/r5p1/readme.txt
new file mode 100755
index 0000000..6785ac9
--- /dev/null
+++ b/utgard/r5p1/readme.txt
@@ -0,0 +1,28 @@
+Building the Mali Device Driver for Linux
+-----------------------------------------
+
+Build the Mali Device Driver for Linux by running the following make command:
+
+KDIR=<kdir_path> USING_UMP=<ump_option> BUILD=<build_option> make
+
+where
+    kdir_path: Path to your Linux Kernel directory
+    ump_option: 1 = Enable UMP support(*)
+                0 = disable UMP support
+    build_option: debug = debug build of driver
+                  release = release build of driver
+
+(*)  For newer Linux Kernels, the Module.symvers file for the UMP device driver
+     must be available. The UMP_SYMVERS_FILE variable in the Makefile should
+     point to this file. This file is generated when the UMP driver is built.
+
+The result will be a mali.ko file, which can be loaded into the Linux kernel
+by using the insmod command.
+
+Use of UMP is not recommended. The dma-buf API in the Linux kernel has
+replaced UMP. The Mali Device Driver will be built with dma-buf support if the
+kernel config includes enabled dma-buf.
+
+The kernel needs to be provided with a platform_device struct for the Mali GPU
+device. See the mali_utgard.h header file for how to set up the Mali GPU
+resources.
diff --git a/utgard/r5p1/regs/mali_200_regs.h b/utgard/r5p1/regs/mali_200_regs.h
new file mode 100755
index 0000000..5c072fb
--- /dev/null
+++ b/utgard/r5p1/regs/mali_200_regs.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALI200_REGS_H_
+#define _MALI200_REGS_H_
+
+/**
+ *  Enum for management register addresses.
+ */
+enum mali200_mgmt_reg {
+	MALI200_REG_ADDR_MGMT_VERSION                              = 0x1000,
+	MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR               = 0x1004,
+	MALI200_REG_ADDR_MGMT_STATUS                               = 0x1008,
+	MALI200_REG_ADDR_MGMT_CTRL_MGMT                            = 0x100c,
+
+	MALI200_REG_ADDR_MGMT_INT_RAWSTAT                          = 0x1020,
+	MALI200_REG_ADDR_MGMT_INT_CLEAR                            = 0x1024,
+	MALI200_REG_ADDR_MGMT_INT_MASK                             = 0x1028,
+	MALI200_REG_ADDR_MGMT_INT_STATUS                           = 0x102c,
+
+	MALI200_REG_ADDR_MGMT_WRITE_BOUNDARY_LOW                   = 0x1044,
+
+	MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS                     = 0x1050,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE                    = 0x1080,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC                       = 0x1084,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE                     = 0x108c,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE                    = 0x10a0,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC                       = 0x10a4,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE                     = 0x10ac,
+
+	MALI200_REG_ADDR_MGMT_PERFMON_CONTR                        = 0x10b0,
+	MALI200_REG_ADDR_MGMT_PERFMON_BASE                         = 0x10b4,
+
+	MALI200_REG_SIZEOF_REGISTER_BANK                           = 0x10f0
+
+};
+
+#define MALI200_REG_VAL_PERF_CNT_ENABLE 1
+
+enum mali200_mgmt_ctrl_mgmt {
+	MALI200_REG_VAL_CTRL_MGMT_STOP_BUS         = (1 << 0),
+	MALI200_REG_VAL_CTRL_MGMT_FLUSH_CACHES     = (1 << 3),
+	MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET      = (1 << 5),
+	MALI200_REG_VAL_CTRL_MGMT_START_RENDERING  = (1 << 6),
+	MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET     = (1 << 7), /* Only valid for Mali-300 and later */
+};
+
+enum mali200_mgmt_irq {
+	MALI200_REG_VAL_IRQ_END_OF_FRAME          = (1 << 0),
+	MALI200_REG_VAL_IRQ_END_OF_TILE           = (1 << 1),
+	MALI200_REG_VAL_IRQ_HANG                  = (1 << 2),
+	MALI200_REG_VAL_IRQ_FORCE_HANG            = (1 << 3),
+	MALI200_REG_VAL_IRQ_BUS_ERROR             = (1 << 4),
+	MALI200_REG_VAL_IRQ_BUS_STOP              = (1 << 5),
+	MALI200_REG_VAL_IRQ_CNT_0_LIMIT           = (1 << 6),
+	MALI200_REG_VAL_IRQ_CNT_1_LIMIT           = (1 << 7),
+	MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR  = (1 << 8),
+	MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND = (1 << 9),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW  = (1 << 10),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW   = (1 << 11),
+	MALI400PP_REG_VAL_IRQ_RESET_COMPLETED       = (1 << 12),
+};
+
+#define MALI200_REG_VAL_IRQ_MASK_ALL  ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_END_OF_TILE                            |\
+				       MALI200_REG_VAL_IRQ_HANG                                   |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_BUS_STOP                               |\
+				       MALI200_REG_VAL_IRQ_CNT_0_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_CNT_1_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW                    |\
+				       MALI400PP_REG_VAL_IRQ_RESET_COMPLETED))
+
+#define MALI200_REG_VAL_IRQ_MASK_USED ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW))
+
+#define MALI200_REG_VAL_IRQ_MASK_NONE ((enum mali200_mgmt_irq)(0))
+
+enum mali200_mgmt_status {
+	MALI200_REG_VAL_STATUS_RENDERING_ACTIVE     = (1 << 0),
+	MALI200_REG_VAL_STATUS_BUS_STOPPED          = (1 << 4),
+};
+
+enum mali200_render_unit {
+	MALI200_REG_ADDR_FRAME = 0x0000,
+	MALI200_REG_ADDR_RSW   = 0x0004,
+	MALI200_REG_ADDR_STACK = 0x0030,
+	MALI200_REG_ADDR_STACK_SIZE = 0x0034,
+	MALI200_REG_ADDR_ORIGIN_OFFSET_X  = 0x0040
+};
+
+enum mali200_wb_unit {
+	MALI200_REG_ADDR_WB0 = 0x0100,
+	MALI200_REG_ADDR_WB1 = 0x0200,
+	MALI200_REG_ADDR_WB2 = 0x0300
+};
+
+enum mali200_wb_unit_regs {
+	MALI200_REG_ADDR_WB_SOURCE_SELECT = 0x0000,
+	MALI200_REG_ADDR_WB_SOURCE_ADDR   = 0x0004,
+};
+
+/* This should be in the top 16 bit of the version register of Mali PP */
+#define MALI200_PP_PRODUCT_ID 0xC807
+#define MALI300_PP_PRODUCT_ID 0xCE07
+#define MALI400_PP_PRODUCT_ID 0xCD07
+#define MALI450_PP_PRODUCT_ID 0xCF07
+
+
+#endif /* _MALI200_REGS_H_ */
diff --git a/utgard/r5p1/regs/mali_gp_regs.h b/utgard/r5p1/regs/mali_gp_regs.h
new file mode 100755
index 0000000..c5e6116
--- /dev/null
+++ b/utgard/r5p1/regs/mali_gp_regs.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALIGP2_CONROL_REGS_H_
+#define _MALIGP2_CONROL_REGS_H_
+
+/**
+ * These are the different geometry processor control registers.
+ * Their usage is to control and monitor the operation of the
+ * Vertex Shader and the Polygon List Builder in the geometry processor.
+ * Addresses are in 32-bit word relative sizes.
+ * @see [P0081] "Geometry Processor Data Structures" for details
+ */
+
+typedef enum {
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR           = 0x00,
+	MALIGP2_REG_ADDR_MGMT_VSCL_END_ADDR             = 0x04,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_START_ADDR         = 0x08,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_END_ADDR           = 0x0c,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR     = 0x10,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR       = 0x14,
+	MALIGP2_REG_ADDR_MGMT_CMD                       = 0x20,
+	MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT               = 0x24,
+	MALIGP2_REG_ADDR_MGMT_INT_CLEAR                 = 0x28,
+	MALIGP2_REG_ADDR_MGMT_INT_MASK                  = 0x2C,
+	MALIGP2_REG_ADDR_MGMT_INT_STAT                  = 0x30,
+	MALIGP2_REG_ADDR_MGMT_WRITE_BOUND_LOW           = 0x34,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE         = 0x3C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE         = 0x40,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC            = 0x44,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC            = 0x48,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE          = 0x4C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE          = 0x50,
+	MALIGP2_REG_ADDR_MGMT_STATUS                    = 0x68,
+	MALIGP2_REG_ADDR_MGMT_VERSION                   = 0x6C,
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR_READ      = 0x80,
+	MALIGP2_REG_ADDR_MGMT_PLBCL_START_ADDR_READ     = 0x84,
+	MALIGP2_CONTR_AXI_BUS_ERROR_STAT                = 0x94,
+	MALIGP2_REGISTER_ADDRESS_SPACE_SIZE             = 0x98,
+} maligp_reg_addr_mgmt_addr;
+
+#define MALIGP2_REG_VAL_PERF_CNT_ENABLE 1
+
+/**
+ * Commands to geometry processor.
+ *  @see MALIGP2_CTRL_REG_CMD
+ */
+typedef enum {
+	MALIGP2_REG_VAL_CMD_START_VS                    = (1 << 0),
+	MALIGP2_REG_VAL_CMD_START_PLBU                  = (1 << 1),
+	MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC   = (1 << 4),
+	MALIGP2_REG_VAL_CMD_RESET                               = (1 << 5),
+	MALIGP2_REG_VAL_CMD_FORCE_HANG                  = (1 << 6),
+	MALIGP2_REG_VAL_CMD_STOP_BUS                    = (1 << 9),
+	MALI400GP_REG_VAL_CMD_SOFT_RESET                = (1 << 10), /* only valid for Mali-300 and later */
+} mgp_contr_reg_val_cmd;
+
+
+/**  @defgroup MALIGP2_IRQ
+ * Interrupt status of geometry processor.
+ *  @see MALIGP2_CTRL_REG_INT_RAWSTAT, MALIGP2_REG_ADDR_MGMT_INT_CLEAR,
+ *       MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_ADDR_MGMT_INT_STAT
+ * @{
+ */
+#define MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      (1 << 0)
+#define MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    (1 << 1)
+#define MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     (1 << 2)
+#define MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          (1 << 3)
+#define MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        (1 << 4)
+#define MALIGP2_REG_VAL_IRQ_HANG                (1 << 5)
+#define MALIGP2_REG_VAL_IRQ_FORCE_HANG          (1 << 6)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    (1 << 7)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    (1 << 8)
+#define MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     (1 << 9)
+#define MALIGP2_REG_VAL_IRQ_SYNC_ERROR          (1 << 10)
+#define MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       (1 << 11)
+#define MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     (1 << 12)
+#define MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      (1 << 13)
+#define MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     (1 << 14)
+#define MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     (1 << 19)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW (1 << 20)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  (1 << 21)
+#define MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  (1 << 22)
+
+/* Mask defining all IRQs in Mali GP */
+#define MALIGP2_REG_VAL_IRQ_MASK_ALL \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        | \
+	 MALIGP2_REG_VAL_IRQ_HANG                | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining the IRQs in Mali GP which we use */
+#define MALIGP2_REG_VAL_IRQ_MASK_USED \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining non IRQs on MaliGP2*/
+#define MALIGP2_REG_VAL_IRQ_MASK_NONE 0
+
+/** }@ defgroup MALIGP2_IRQ*/
+
+/** @defgroup MALIGP2_STATUS
+ * The different Status values to the geometry processor.
+ *  @see MALIGP2_CTRL_REG_STATUS
+ * @{
+ */
+#define MALIGP2_REG_VAL_STATUS_VS_ACTIVE         0x0002
+#define MALIGP2_REG_VAL_STATUS_BUS_STOPPED       0x0004
+#define MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE       0x0008
+#define MALIGP2_REG_VAL_STATUS_BUS_ERROR         0x0040
+#define MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR   0x0100
+/** }@ defgroup MALIGP2_STATUS*/
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ACTIVE (\
+		MALIGP2_REG_VAL_STATUS_VS_ACTIVE|\
+		MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE)
+
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ERROR (\
+		MALIGP2_REG_VAL_STATUS_BUS_ERROR |\
+		MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR )
+
+/* This should be in the top 16 bit of the version register of gp.*/
+#define MALI200_GP_PRODUCT_ID 0xA07
+#define MALI300_GP_PRODUCT_ID 0xC07
+#define MALI400_GP_PRODUCT_ID 0xB07
+#define MALI450_GP_PRODUCT_ID 0xD07
+
+/**
+ * The different sources for instrumented on the geometry processor.
+ *  @see MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC
+ */
+
+enum MALIGP2_cont_reg_perf_cnt_src {
+	MALIGP2_REG_VAL_PERF_CNT1_SRC_NUMBER_OF_VERTICES_PROCESSED = 0x0a,
+};
+
+#endif
diff --git a/utgard/r5p1/timestamp-arm11-cc/mali_timestamp.c b/utgard/r5p1/timestamp-arm11-cc/mali_timestamp.c
new file mode 100755
index 0000000..a486e2f
--- /dev/null
+++ b/utgard/r5p1/timestamp-arm11-cc/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/utgard/r5p1/timestamp-arm11-cc/mali_timestamp.h b/utgard/r5p1/timestamp-arm11-cc/mali_timestamp.h
new file mode 100755
index 0000000..65f3ab2
--- /dev/null
+++ b/utgard/r5p1/timestamp-arm11-cc/mali_timestamp.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	/*
+	 * reset counters and overflow flags
+	 */
+
+	u32 mask = (1 << 0) | /* enable all three counters */
+		   (0 << 1) | /* reset both Count Registers to 0x0 */
+		   (1 << 2) | /* reset the Cycle Counter Register to 0x0 */
+		   (0 << 3) | /* 1 = Cycle Counter Register counts every 64th processor clock cycle */
+		   (0 << 4) | /* Count Register 0 interrupt enable */
+		   (0 << 5) | /* Count Register 1 interrupt enable */
+		   (0 << 6) | /* Cycle Counter interrupt enable */
+		   (0 << 8) | /* Count Register 0 overflow flag (clear or write, flag on read) */
+		   (0 << 9) | /* Count Register 1 overflow flag (clear or write, flag on read) */
+		   (1 << 10); /* Cycle Counter Register overflow flag (clear or write, flag on read) */
+
+	__asm__ __volatile__("MCR    p15, 0, %0, c15, c12, 0" : : "r"(mask));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	u32 result;
+
+	/* this is for the clock cycles */
+	__asm__ __volatile__("MRC    p15, 0, %0, c15, c12, 1" : "=r"(result));
+
+	return (u64)result;
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/utgard/r5p1/timestamp-default/mali_timestamp.c b/utgard/r5p1/timestamp-default/mali_timestamp.c
new file mode 100755
index 0000000..a486e2f
--- /dev/null
+++ b/utgard/r5p1/timestamp-default/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/utgard/r5p1/timestamp-default/mali_timestamp.h b/utgard/r5p1/timestamp-default/mali_timestamp.h
new file mode 100755
index 0000000..8ba4706
--- /dev/null
+++ b/utgard/r5p1/timestamp-default/mali_timestamp.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	return _mali_osk_boot_time_get_ns();
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/utgard/r6p1/.version b/utgard/r6p1/.version
new file mode 100644
index 0000000..3b2c330
--- /dev/null
+++ b/utgard/r6p1/.version
@@ -0,0 +1 @@
+r6p1-01rel0
diff --git a/utgard/r6p1/Kbuild b/utgard/r6p1/Kbuild
new file mode 100755
index 0000000..66b14af
--- /dev/null
+++ b/utgard/r6p1/Kbuild
@@ -0,0 +1,258 @@
+#
+# Copyright (C) 2010-2011 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+# This file is called by the Linux build system.
+
+# set up defaults if not defined by the user
+include $(src)/platform/Kbuild.amlogic
+
+TIMESTAMP ?= default
+OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB ?= 16
+USING_GPU_UTILIZATION ?= 0
+PROFILING_SKIP_PP_JOBS ?= 0
+PROFILING_SKIP_PP_AND_GP_JOBS ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP ?= 0
+MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS ?= 0
+MALI_UPPER_HALF_SCHEDULING ?= 1
+MALI_ENABLE_CPU_CYCLES ?= 0
+
+# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases:
+# The ARM proprietary product will only include the license/proprietary directory
+# The GPL product will only include the license/gpl directory
+ifeq ($(wildcard $(src)/linux/license/gpl/*),)
+    ccflags-y += -I$(src)/linux/license/proprietary
+    ifeq ($(CONFIG_MALI400_PROFILING),y)
+        $(error Profiling is incompatible with non-GPL license)
+    endif
+    ifeq ($(CONFIG_PM_RUNTIME),y)
+        $(error Runtime PM is incompatible with non-GPL license)
+    endif
+    ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
+        $(error DMA-BUF is incompatible with non-GPL license)
+    endif
+    $(error Linux Device integration is incompatible with non-GPL license)
+else
+    ccflags-y += -I$(src)/linux/license/gpl
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+ifeq ($(MALI_PLATFORM_FILES),)
+ifeq ($(CONFIG_ARCH_EXYNOS4),y)
+EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+export MALI_PLATFORM=exynos4
+export MALI_PLATFORM_FILES_BUILDIN = $(notdir $(wildcard $(src)/platform/$(MALI_PLATFORM)/*.c))
+export MALI_PLATFORM_FILES_ADD_PREFIX = $(addprefix platform/$(MALI_PLATFORM)/,$(MALI_PLATFORM_FILES_BUILDIN)) 
+endif
+endif
+
+mali-y += \
+	linux/mali_osk_atomics.o \
+	linux/mali_osk_irq.o \
+	linux/mali_osk_wq.o \
+	linux/mali_osk_locks.o \
+	linux/mali_osk_wait_queue.o \
+	linux/mali_osk_low_level_mem.o \
+	linux/mali_osk_math.o \
+	linux/mali_osk_memory.o \
+	linux/mali_osk_misc.o \
+	linux/mali_osk_mali.o \
+	linux/mali_osk_notification.o \
+	linux/mali_osk_time.o \
+	linux/mali_osk_timers.o \
+	linux/mali_osk_bitmap.o
+
+mali-y += linux/mali_memory.o linux/mali_memory_os_alloc.o
+mali-y += linux/mali_memory_external.o
+mali-y += linux/mali_memory_block_alloc.o
+mali-y += linux/mali_memory_swap_alloc.o
+
+mali-y += \
+	linux/mali_memory_manager.o \
+	linux/mali_memory_virtual.o \
+	linux/mali_memory_util.o \
+	linux/mali_memory_cow.o \
+	linux/mali_memory_defer_bind.o
+
+mali-y += \
+	linux/mali_ukk_mem.o \
+	linux/mali_ukk_gp.o \
+	linux/mali_ukk_pp.o \
+	linux/mali_ukk_core.o \
+	linux/mali_ukk_soft_job.o \
+	linux/mali_ukk_timeline.o
+
+mali-$(CONFIG_MALI_DEVFREQ) += \
+	linux/mali_devfreq.o \
+	common/mali_pm_metrics.o
+
+# Source files which always are included in a build
+mali-y += \
+	common/mali_kernel_core.o \
+	linux/mali_kernel_linux.o \
+	common/mali_session.o \
+	linux/mali_device_pause_resume.o \
+	common/mali_kernel_vsync.o \
+	linux/mali_ukk_vsync.o \
+	linux/mali_kernel_sysfs.o \
+	common/mali_mmu.o \
+	common/mali_mmu_page_directory.o \
+	common/mali_mem_validation.o \
+	common/mali_hw_core.o \
+	common/mali_gp.o \
+	common/mali_pp.o \
+	common/mali_pp_job.o \
+	common/mali_gp_job.o \
+	common/mali_soft_job.o \
+	common/mali_scheduler.o \
+	common/mali_executor.o \
+	common/mali_group.o \
+	common/mali_dlbu.o \
+	common/mali_broadcast.o \
+	common/mali_pm.o \
+	common/mali_pmu.o \
+	common/mali_user_settings_db.o \
+	common/mali_kernel_utilization.o \
+	common/mali_control_timer.o \
+	common/mali_l2_cache.o \
+	common/mali_timeline.o \
+	common/mali_timeline_fence_wait.o \
+	common/mali_timeline_sync_fence.o \
+	common/mali_spinlock_reentrant.o \
+	common/mali_pm_domain.o \
+	linux/mali_osk_pm.o \
+	linux/mali_pmu_power_up_down.o \
+	__malidrv_build_info.o
+
+ifneq ($(wildcard $(src)/linux/mali_slp_global_lock.c),)
+	mali-y += linux/mali_slp_global_lock.o
+endif
+
+ifneq ($(MALI_PLATFORM_FILES),)
+	mali-y += $(MALI_PLATFORM_FILES:.c=.o)
+endif
+
+ifneq ($(MALI_PLATFORM_FILES_ADD_PREFIX),)
+	mali-y += $(MALI_PLATFORM_FILES_ADD_PREFIX:.c=.o)
+endif
+
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_ukk_profiling.o
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_osk_profiling.o
+
+mali-$(CONFIG_MALI400_INTERNAL_PROFILING) += linux/mali_profiling_internal.o timestamp-$(TIMESTAMP)/mali_timestamp.o
+ccflags-$(CONFIG_MALI400_INTERNAL_PROFILING) += -I$(src)/timestamp-$(TIMESTAMP)
+
+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_dma_buf.o
+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_secure.o
+mali-$(CONFIG_SYNC) += linux/mali_sync.o
+mali-$(CONFIG_MALI_DMA_BUF_FENCE) += linux/mali_dma_fence.o
+ccflags-$(CONFIG_SYNC) += -Idrivers/staging/android
+
+mali-$(CONFIG_MALI400_UMP) += linux/mali_memory_ump.o
+
+mali-$(CONFIG_MALI_DVFS) += common/mali_dvfs_policy.o
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI400) := mali.o
+
+ccflags-y += $(EXTRA_DEFINES)
+
+# Set up our defines, which will be passed to gcc
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP)
+ccflags-y += -DMALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED=$(MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED)
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS)
+ifdef CONFIG_MALI400_DEBUG
+ccflags-y += -DMALI_STATE_TRACKING=1
+else
+ccflags-y += -DMALI_STATE_TRACKING=0
+endif
+ccflags-y += -DMALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+ccflags-y += -DUSING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+ccflags-y += -DMALI_ENABLE_CPU_CYCLES=$(MALI_ENABLE_CPU_CYCLES)
+
+ifeq ($(MALI_UPPER_HALF_SCHEDULING),1)
+	ccflags-y += -DMALI_UPPER_HALF_SCHEDULING
+endif
+
+#build-in include path is different
+ifeq ($(MALI_PLATFORM_FILES),)
+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../ump/include/
+else
+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../../ump/include/ump
+endif
+ccflags-$(CONFIG_MALI400_DEBUG) += -DDEBUG
+
+# Use our defines when compiling
+ccflags-y += -I$(src) -I$(src)/include -I$(src)/common -I$(src)/linux -I$(src)/platform
+
+# Get subversion revision number, fall back to only ${MALI_RELEASE_NAME} if no svn info is available
+MALI_RELEASE_NAME=$(shell cat $(src)/.version 2> /dev/null)
+
+SVN_INFO = (cd $(src); svn info 2>/dev/null)
+
+ifneq ($(shell $(SVN_INFO) 2>/dev/null),)
+# SVN detected
+SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null)
+DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV)
+CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-)
+CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-)
+REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-)
+
+else # SVN
+GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null)
+ifneq ($(GIT_REV),)
+# Git detected
+DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV)
+CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci")
+CHANGED_REVISION := $(GIT_REV)
+REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null)
+
+else # Git
+# No Git or SVN detected
+DRIVER_REV := $(MALI_RELEASE_NAME)
+CHANGE_DATE := $(MALI_RELEASE_NAME)
+CHANGED_REVISION := $(MALI_RELEASE_NAME)
+endif
+endif
+
+ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\"
+
+VERSION_STRINGS :=
+VERSION_STRINGS += API_VERSION=$(shell cd $(src); grep "\#define _MALI_API_VERSION" $(FILES_PREFIX)include/linux/mali/mali_utgard_uk_types.h | cut -d' ' -f 3 )
+VERSION_STRINGS += REPO_URL=$(REPO_URL)
+VERSION_STRINGS += REVISION=$(DRIVER_REV)
+VERSION_STRINGS += CHANGED_REVISION=$(CHANGED_REVISION)
+VERSION_STRINGS += CHANGE_DATE=$(CHANGE_DATE)
+VERSION_STRINGS += BUILD_DATE=$(shell date)
+ifdef CONFIG_MALI400_DEBUG
+VERSION_STRINGS += BUILD=debug
+else
+VERSION_STRINGS += BUILD=release
+endif
+VERSION_STRINGS += TARGET_PLATFORM=$(TARGET_PLATFORM)
+VERSION_STRINGS += MALI_PLATFORM=$(MALI_PLATFORM)
+VERSION_STRINGS += KDIR=$(KDIR)
+VERSION_STRINGS += OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+VERSION_STRINGS += USING_UMP=$(CONFIG_MALI400_UMP)
+VERSION_STRINGS += USING_PROFILING=$(CONFIG_MALI400_PROFILING)
+VERSION_STRINGS += USING_INTERNAL_PROFILING=$(CONFIG_MALI400_INTERNAL_PROFILING)
+VERSION_STRINGS += USING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+VERSION_STRINGS += USING_DVFS=$(CONFIG_MALI_DVFS)
+VERSION_STRINGS += USING_DMA_BUF_FENCE = $(CONFIG_MALI_DMA_BUF_FENCE)
+VERSION_STRINGS += MALI_UPPER_HALF_SCHEDULING=$(MALI_UPPER_HALF_SCHEDULING)
+
+# Create file with Mali driver configuration
+$(src)/__malidrv_build_info.c:
+	@echo 'const char *__malidrv_build_info(void) { return "malidrv: $(VERSION_STRINGS)";}' > $(src)/__malidrv_build_info.c
diff --git a/utgard/r6p1/Kconfig b/utgard/r6p1/Kconfig
new file mode 100755
index 0000000..fbf08de
--- /dev/null
+++ b/utgard/r6p1/Kconfig
@@ -0,0 +1,136 @@
+menu "Mali GPU OpenGL device driver"
+config MALI400
+	tristate "Mali-300/400/450 support"
+	depends on ARM || ARM64
+	default m
+	select DMA_SHARED_BUFFER
+	---help---
+	  This enables support for the ARM Mali-300, Mali-400, and Mali-450
+	  GPUs.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called mali.
+
+config MALI470
+	bool "Enable Mali-470 support"
+	depends on MALI400
+	---help---
+	  This enables support for Mali-470 specific features.
+
+config MALI400_DEBUG
+	bool "Enable debug in Mali driver"
+	depends on MALI400
+	default n
+	---help---
+	  This enabled extra debug checks and messages in the Mali driver.
+
+config MALI400_PROFILING_EXTRA_SUPPORT
+	bool "Select other items in kernel to support PROFILING."
+	depends on MALI400_PROFILING
+	select PROFILING
+	select FTRACE
+	select PERF_EVENTS
+	select ENABLE_DEFAULT_TRACERS
+	select DEBUG_MUTEXES
+	select HIGH_RES_TIMERS
+	select HW_PERF_EVENTS
+	select CPU_FREQ
+	select MALI400_DEBUG
+
+config MALI400_PROFILING
+	bool "Enable Mali profiling"
+	depends on MALI400
+	select TRACEPOINTS
+	default n
+	---help---
+	  This enables gator profiling of Mali GPU events.
+
+config MALI400_INTERNAL_PROFILING
+	bool "Enable internal Mali profiling API"
+	depends on MALI400_PROFILING
+	default n
+	---help---
+	  This enables the internal legacy Mali profiling API.
+
+config MALI400_UMP
+	bool "Enable UMP support"
+	depends on MALI400
+	---help---
+	  This enables support for the UMP memory sharing API in the Mali driver.
+
+config MALI_DVFS
+	bool "Enable Mali dynamically frequency change"
+	depends on MALI400 && !MALI_DEVFREQ
+	default n
+	---help---
+	  This enables support for dynamic change frequency of Mali with the goal of lowering power consumption.
+
+config MALI_DMA_BUF_MAP_ON_ATTACH
+	bool "Map dma-buf attachments on attach"
+	depends on MALI400 && DMA_SHARED_BUFFER
+	default y
+	---help---
+	  This makes the Mali driver map dma-buf attachments after doing
+	  attach. If this is not set the dma-buf attachments will be mapped for
+	  every time the GPU need to access the buffer.
+
+	  Mapping for each access can cause lower performance.
+
+config MALI_SHARED_INTERRUPTS
+	bool "Support for shared interrupts"
+	depends on MALI400
+	default n
+	---help---
+	  Adds functionality required to properly support shared interrupts.  Without this support,
+	  the device driver will fail during insmod if it detects shared interrupts.  This also
+	  works when the GPU is not using shared interrupts, but might have a slight performance
+	  impact.
+
+if ARCH_MESON6
+config	MESON6_GPU_EXTRA
+	bool "M6 fix"
+	depends on MALI400
+	default y
+	select MALI_SHARED_INTERRUPTS
+endif
+
+config MALI_PMU_PARALLEL_POWER_UP
+	bool "Power up Mali PMU domains in parallel"
+	depends on MALI400
+	default n
+	---help---
+	  This makes the Mali driver power up all PMU power domains in parallel, instead of
+	  powering up domains one by one, with a slight delay in between. Powering on all power
+	  domains at the same time may cause peak currents higher than what some systems can handle.
+	  These systems must not enable this option.
+
+config MALI_DT
+	bool "Using device tree to initialize module"
+	depends on MALI400 && OF
+	default n
+	---help---
+	  This enable the Mali driver to choose the device tree path to get platform resoures
+	  and disable the old config method. Mali driver could run on the platform which the
+	  device tree is enabled in kernel and corresponding hardware description is implemented
+	  properly in device DTS file.
+
+config MALI_DEVFREQ
+	bool "Using devfreq to tuning frequency"
+	depends on MALI400 && PM_DEVFREQ
+	default n
+	---help---
+	Support devfreq for Mali.
+
+	Using the devfreq framework and, by default, the simpleondemand
+	governor, the frequency of Mali will be dynamically selected from the
+	available OPPs.
+
+config MALI_QUIET
+	bool "Make Mali driver very quiet"
+	depends on MALI400 && !MALI400_DEBUG
+	default n
+	---help---
+	  This forces the Mali driver to never print any messages.
+
+	  If unsure, say N.
+endmenu
diff --git a/utgard/r6p1/Makefile b/utgard/r6p1/Makefile
new file mode 100755
index 0000000..31fb99e
--- /dev/null
+++ b/utgard/r6p1/Makefile
@@ -0,0 +1,201 @@
+#
+# Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+USE_UMPV2=0
+USING_PROFILING ?= 1
+USING_INTERNAL_PROFILING ?= 0
+USING_DVFS ?= 1
+MALI_HEATMAPS_ENABLED ?= 0
+MALI_DMA_BUF_MAP_ON_ATTACH ?= 1
+MALI_PMU_PARALLEL_POWER_UP ?= 0
+USING_DT ?= 0
+MALI_MEM_SWAP_TRACKING ?= 0
+USING_DEVFREQ ?= 0
+
+# The Makefile sets up "arch" based on the CONFIG, creates the version info
+# string and the __malidrv_build_info.c file, and then call the Linux build
+# system to actually build the driver. After that point the Kbuild file takes
+# over.
+
+# set up defaults if not defined by the user
+ARCH ?= arm
+
+OSKOS=linux
+FILES_PREFIX=
+
+check_cc2 = \
+	$(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \
+	then \
+		echo "$(2)"; \
+	else \
+		echo "$(3)"; \
+	fi ;)
+
+# This conditional makefile exports the global definition ARM_INTERNAL_BUILD. Customer releases will not include arm_internal.mak
+-include ../../../arm_internal.mak
+
+# Give warning of old config parameters are used
+ifneq ($(CONFIG),)
+$(warning "You have specified the CONFIG variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+ifneq ($(CPU),)
+$(warning "You have specified the CPU variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+# Include the mapping between TARGET_PLATFORM and KDIR + MALI_PLATFORM
+-include MALI_CONFIGURATION
+export KDIR ?= $(KDIR-$(TARGET_PLATFORM))
+export MALI_PLATFORM ?= $(MALI_PLATFORM-$(TARGET_PLATFORM))
+
+ifneq ($(TARGET_PLATFORM),)
+ifeq ($(MALI_PLATFORM),)
+$(error "Invalid TARGET_PLATFORM: $(TARGET_PLATFORM)")
+endif
+endif
+
+# validate lookup result
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(TARGET_PLATFORM))
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+ifeq ($(USING_UMP),1)
+export CONFIG_MALI400_UMP=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_UMP=1
+ifeq ($(USE_UMPV2),1)
+UMP_SYMVERS_FILE ?= ../umpv2/Module.symvers
+else
+UMP_SYMVERS_FILE ?= ../ump/Module.symvers
+endif
+KBUILD_EXTRA_SYMBOLS = $(realpath $(UMP_SYMVERS_FILE))
+$(warning $(KBUILD_EXTRA_SYMBOLS))
+endif
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+include $(KDIR)/.config
+
+ifeq ($(ARCH), arm)
+# when compiling for ARM we're cross compiling
+export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-)
+endif
+
+# report detected/selected settings
+ifdef ARM_INTERNAL_BUILD
+$(warning TARGET_PLATFORM $(TARGET_PLATFORM))
+$(warning KDIR $(KDIR))
+$(warning MALI_PLATFORM $(MALI_PLATFORM))
+endif
+
+# Set up build config
+export CONFIG_MALI400=m
+export CONFIG_MALI450=y
+export CONFIG_MALI470=y
+
+export EXTRA_DEFINES += -DCONFIG_MALI400=1
+export EXTRA_DEFINES += -DCONFIG_MALI450=1
+export EXTRA_DEFINES += -DCONFIG_MALI470=1
+
+ifneq ($(MALI_PLATFORM),)
+export EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+export MALI_PLATFORM_FILES = $(wildcard platform/$(MALI_PLATFORM)/*.c)
+endif
+
+ifeq ($(USING_PROFILING),1)
+ifeq ($(CONFIG_TRACEPOINTS),)
+$(warning CONFIG_TRACEPOINTS required for profiling)
+else
+export CONFIG_MALI400_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_PROFILING=1
+ifeq ($(USING_INTERNAL_PROFILING),1)
+export CONFIG_MALI400_INTERNAL_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_INTERNAL_PROFILING=1
+endif
+ifeq ($(MALI_HEATMAPS_ENABLED),1)
+export MALI_HEATMAPS_ENABLED=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_HEATMAPS_ENABLED
+endif
+endif
+endif
+
+ifeq ($(MALI_DMA_BUF_MAP_ON_ATTACH),1)
+export CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_MAP_ON_ATTACH
+endif
+
+ifeq ($(MALI_SHARED_INTERRUPTS),1)
+export CONFIG_MALI_SHARED_INTERRUPTS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_SHARED_INTERRUPTS
+endif
+
+ifeq ($(USING_DVFS),1)
+	xxxyyyy
+export CONFIG_MALI_DVFS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DVFS
+endif
+
+ifeq ($(MALI_PMU_PARALLEL_POWER_UP),1)
+export CONFIG_MALI_PMU_PARALLEL_POWER_UP=y
+export EXTRA_DEFINES += -DCONFIG_MALI_PMU_PARALLEL_POWER_UP
+endif
+
+ifdef CONFIG_OF
+ifeq ($(USING_DT),1)
+export CONFIG_MALI_DT=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DT
+endif
+endif
+
+ifeq ($(USING_DEVFREQ), 1)
+ifdef CONFIG_PM_DEVFREQ
+export CONFIG_MALI_DEVFREQ=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DEVFREQ=1
+else
+$(warning "You want to support DEVFREQ but kernel didn't support DEVFREQ.")
+endif
+endif
+
+ifneq ($(BUILD),release)
+# Debug
+export CONFIG_MALI400_DEBUG=y
+else
+# Release
+ifeq ($(MALI_QUIET),1)
+export CONFIG_MALI_QUIET=y
+export EXTRA_DEFINES += -DCONFIG_MALI_QUIET
+endif
+endif
+
+ifeq ($(MALI_SKIP_JOBS),1)
+EXTRA_DEFINES += -DPROFILING_SKIP_PP_JOBS=1 -DPROFILING_SKIP_GP_JOBS=1
+endif
+
+ifeq ($(MALI_MEM_SWAP_TRACKING),1)
+EXTRA_DEFINES += -DMALI_MEM_SWAP_TRACKING=1
+endif
+
+all: $(UMP_SYMVERS_FILE)
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules
+	@rm $(FILES_PREFIX)__malidrv_build_info.c $(FILES_PREFIX)__malidrv_build_info.o
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+kernelrelease:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) kernelrelease
+
+export CONFIG KBUILD_EXTRA_SYMBOLS
diff --git a/utgard/r6p1/clean.sh b/utgard/r6p1/clean.sh
new file mode 100755
index 0000000..130fd73
--- /dev/null
+++ b/utgard/r6p1/clean.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# rm *.o, *.cmd, *~
+find . -name "*.o" -o -name "*.cmd" -o -name "*~" | xargs rm -rf
+rm	Module.symvers
+rm	mali.ko
+rm	mali.mod.c
+rm	modules.order
diff --git a/utgard/r6p1/common/mali_broadcast.c b/utgard/r6p1/common/mali_broadcast.c
new file mode 100755
index 0000000..4c4b2bc
--- /dev/null
+++ b/utgard/r6p1/common/mali_broadcast.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_broadcast.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#define MALI_BROADCAST_REGISTER_SIZE      0x1000
+#define MALI_BROADCAST_REG_BROADCAST_MASK    0x0
+#define MALI_BROADCAST_REG_INTERRUPT_MASK    0x4
+
+struct mali_bcast_unit {
+	struct mali_hw_core hw_core;
+	u32 current_mask;
+};
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_bcast_unit *bcast_unit = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+	MALI_DEBUG_PRINT(2, ("Broadcast: Creating Mali Broadcast unit: %s\n",
+			     resource->description));
+
+	bcast_unit = _mali_osk_malloc(sizeof(struct mali_bcast_unit));
+	if (NULL == bcast_unit) {
+		MALI_PRINT_ERROR(("Broadcast: Failed to allocate memory for Broadcast unit\n"));
+		return NULL;
+	}
+
+	if (_MALI_OSK_ERR_OK == mali_hw_core_create(&bcast_unit->hw_core,
+			resource, MALI_BROADCAST_REGISTER_SIZE)) {
+		bcast_unit->current_mask = 0;
+		mali_bcast_reset(bcast_unit);
+
+		return bcast_unit;
+	} else {
+		MALI_PRINT_ERROR(("Broadcast: Failed map broadcast unit\n"));
+	}
+
+	_mali_osk_free(bcast_unit);
+
+	return NULL;
+}
+
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	mali_hw_core_delete(&bcast_unit->hw_core);
+	_mali_osk_free(bcast_unit);
+}
+
+/* Call this function to add the @group's id into bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit,
+			  struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask |= (bcast_id); /* add PP core to broadcast */
+	broadcast_mask |= (bcast_id << 16); /* add MMU to broadcast */
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+/* Call this function to remove @group's id from bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit,
+			     struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask &= ~((bcast_id << 16) | bcast_id);
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4,
+			 ("Broadcast: setting mask 0x%08X + 0x%08X (reset)\n",
+			  bcast_unit->current_mask,
+			  bcast_unit->current_mask & 0xFF));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    bcast_unit->current_mask);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    bcast_unit->current_mask & 0xFF);
+}
+
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4, ("Broadcast: setting mask 0x0 + 0x0 (disable)\n"));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    0x0);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    0x0);
+}
diff --git a/utgard/r6p1/common/mali_broadcast.h b/utgard/r6p1/common/mali_broadcast.h
new file mode 100755
index 0000000..e12e8a2
--- /dev/null
+++ b/utgard/r6p1/common/mali_broadcast.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BROADCAST_H__
+#define __MALI_BROADCAST_H__
+
+/*
+ *  Interface for the broadcast unit on Mali-450.
+ *
+ * - Represents up to 8 × (MMU + PP) pairs.
+ * - Supports dynamically changing which (MMU + PP) pairs receive the broadcast by
+ *   setting a mask.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_group.h"
+
+struct mali_bcast_unit;
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource);
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit);
+
+/* Add a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Remove a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Re-set cached mask. This needs to be called after having been suspended. */
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Disable broadcast unit
+ *
+ * mali_bcast_enable must be called to re-enable the unit. Cores may not be
+ * added or removed when the unit is disabled.
+ */
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Re-enable broadcast unit
+ *
+ * This resets the masks to include the cores present when mali_bcast_disable was called.
+ */
+MALI_STATIC_INLINE void mali_bcast_enable(struct mali_bcast_unit *bcast_unit)
+{
+	mali_bcast_reset(bcast_unit);
+}
+
+#endif /* __MALI_BROADCAST_H__ */
diff --git a/utgard/r6p1/common/mali_control_timer.c b/utgard/r6p1/common/mali_control_timer.c
new file mode 100755
index 0000000..fc6ceb4
--- /dev/null
+++ b/utgard/r6p1/common/mali_control_timer.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+static u64 period_start_time = 0;
+
+static _mali_osk_timer_t *mali_control_timer = NULL;
+static mali_bool timer_running = MALI_FALSE;
+
+static u32 mali_control_timeout = 1000;
+
+void mali_control_timer_add(u32 timeout)
+{
+	_mali_osk_timer_add(mali_control_timer, _mali_osk_time_mstoticks(timeout));
+}
+
+static void mali_control_timer_callback(void *arg)
+{
+	if (mali_utilization_enabled()) {
+		struct mali_gpu_utilization_data *util_data = NULL;
+		u64 time_period = 0;
+		mali_bool need_add_timer = MALI_TRUE;
+
+		/* Calculate gpu utilization */
+		util_data = mali_utilization_calculate(&period_start_time, &time_period, &need_add_timer);
+
+		if (util_data) {
+#if defined(CONFIG_MALI_DVFS)
+			mali_dvfs_policy_realize(util_data, time_period);
+#else
+			mali_utilization_platform_realize(util_data);
+#endif
+
+			if (MALI_TRUE == need_add_timer) {
+				mali_control_timer_add(mali_control_timeout);
+			}
+		}
+	}
+}
+
+/* Init a timer (for now it is used for GPU utilization and dvfs) */
+_mali_osk_errcode_t mali_control_timer_init(void)
+{
+	_mali_osk_device_data data;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Use device specific settings (if defined) */
+		if (0 != data.control_interval) {
+			mali_control_timeout = data.control_interval;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Timer: %u\n", mali_control_timeout));
+		}
+	}
+
+	mali_control_timer = _mali_osk_timer_init();
+	if (NULL == mali_control_timer) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	_mali_osk_timer_setcallback(mali_control_timer, mali_control_timer_callback, NULL);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_control_timer_term(void)
+{
+	if (NULL != mali_control_timer) {
+		_mali_osk_timer_del(mali_control_timer);
+		timer_running = MALI_FALSE;
+		_mali_osk_timer_term(mali_control_timer);
+		mali_control_timer = NULL;
+	}
+}
+
+mali_bool mali_control_timer_resume(u64 time_now)
+{
+	mali_utilization_data_assert_locked();
+
+	if (timer_running != MALI_TRUE) {
+		timer_running = MALI_TRUE;
+
+		period_start_time = time_now;
+
+		mali_utilization_reset();
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+void mali_control_timer_pause(void)
+{
+	mali_utilization_data_assert_locked();
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+	}
+}
+
+void mali_control_timer_suspend(mali_bool suspend)
+{
+	mali_utilization_data_lock();
+
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+
+		mali_utilization_data_unlock();
+
+		if (suspend == MALI_TRUE) {
+			_mali_osk_timer_del(mali_control_timer);
+			mali_utilization_reset();
+		}
+	} else {
+		mali_utilization_data_unlock();
+	}
+}
diff --git a/utgard/r6p1/common/mali_control_timer.h b/utgard/r6p1/common/mali_control_timer.h
new file mode 100755
index 0000000..1265390
--- /dev/null
+++ b/utgard/r6p1/common/mali_control_timer.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_CONTROL_TIMER_H__
+#define __MALI_CONTROL_TIMER_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_control_timer_init(void);
+
+void mali_control_timer_term(void);
+
+mali_bool mali_control_timer_resume(u64 time_now);
+
+void mali_control_timer_suspend(mali_bool suspend);
+void mali_control_timer_pause(void);
+
+void mali_control_timer_add(u32 timeout);
+
+#endif /* __MALI_CONTROL_TIMER_H__ */
+
diff --git a/utgard/r6p1/common/mali_dlbu.c b/utgard/r6p1/common/mali_dlbu.c
new file mode 100755
index 0000000..4f2a121
--- /dev/null
+++ b/utgard/r6p1/common/mali_dlbu.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_dlbu.h"
+#include "mali_memory.h"
+#include "mali_pp.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+/**
+ * Size of DLBU registers in bytes
+ */
+#define MALI_DLBU_SIZE 0x400
+
+mali_dma_addr mali_dlbu_phys_addr = 0;
+static mali_io_address mali_dlbu_cpu_addr = NULL;
+
+/**
+ * DLBU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_dlbu_register {
+	MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR = 0x0000, /**< Master tile list physical base address;
+                                                             31:12 Physical address to the page used for the DLBU
+                                                             0 DLBU enable - set this bit to 1 enables the AXI bus
+                                                             between PPs and L2s, setting to 0 disables the router and
+                                                             no further transactions are sent to DLBU */
+	MALI_DLBU_REGISTER_MASTER_TLLIST_VADDR     = 0x0004, /**< Master tile list virtual base address;
+                                                             31:12 Virtual address to the page used for the DLBU */
+	MALI_DLBU_REGISTER_TLLIST_VBASEADDR     = 0x0008, /**< Tile list virtual base address;
+                                                             31:12 Virtual address to the tile list. This address is used when
+                                                             calculating the call address sent to PP.*/
+	MALI_DLBU_REGISTER_FB_DIM                 = 0x000C, /**< Framebuffer dimension;
+                                                             23:16 Number of tiles in Y direction-1
+                                                             7:0 Number of tiles in X direction-1 */
+	MALI_DLBU_REGISTER_TLLIST_CONF       = 0x0010, /**< Tile list configuration;
+                                                             29:28 select the size of each allocated block: 0=128 bytes, 1=256, 2=512, 3=1024
+                                                             21:16 2^n number of tiles to be binned to one tile list in Y direction
+                                                             5:0 2^n number of tiles to be binned to one tile list in X direction */
+	MALI_DLBU_REGISTER_START_TILE_POS         = 0x0014, /**< Start tile positions;
+                                                             31:24 start position in Y direction for group 1
+                                                             23:16 start position in X direction for group 1
+                                                             15:8 start position in Y direction for group 0
+                                                             7:0 start position in X direction for group 0 */
+	MALI_DLBU_REGISTER_PP_ENABLE_MASK         = 0x0018, /**< PP enable mask;
+                                                             7 enable PP7 for load balancing
+                                                             6 enable PP6 for load balancing
+                                                             5 enable PP5 for load balancing
+                                                             4 enable PP4 for load balancing
+                                                             3 enable PP3 for load balancing
+                                                             2 enable PP2 for load balancing
+                                                             1 enable PP1 for load balancing
+                                                             0 enable PP0 for load balancing */
+} mali_dlbu_register;
+
+typedef enum {
+	PP0ENABLE = 0,
+	PP1ENABLE,
+	PP2ENABLE,
+	PP3ENABLE,
+	PP4ENABLE,
+	PP5ENABLE,
+	PP6ENABLE,
+	PP7ENABLE
+} mali_dlbu_pp_enable;
+
+struct mali_dlbu_core {
+	struct mali_hw_core     hw_core;           /**< Common for all HW cores */
+	u32                     pp_cores_mask;     /**< This is a mask for the PP cores whose operation will be controlled by LBU
+                                                      see MALI_DLBU_REGISTER_PP_ENABLE_MASK register */
+};
+
+_mali_osk_errcode_t mali_dlbu_initialize(void)
+{
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Initializing\n"));
+
+	if (_MALI_OSK_ERR_OK ==
+	    mali_mmu_get_table_page(&mali_dlbu_phys_addr,
+				    &mali_dlbu_cpu_addr)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_dlbu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: terminating\n"));
+
+	if (0 != mali_dlbu_phys_addr && 0 != mali_dlbu_cpu_addr) {
+		mali_mmu_release_table_page(mali_dlbu_phys_addr,
+					    mali_dlbu_cpu_addr);
+		mali_dlbu_phys_addr = 0;
+		mali_dlbu_cpu_addr = 0;
+	}
+}
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_dlbu_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Creating Mali dynamic load balancing unit: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_dlbu_core));
+	if (NULL != core) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI_DLBU_SIZE)) {
+			core->pp_cores_mask = 0;
+			if (_MALI_OSK_ERR_OK == mali_dlbu_reset(core)) {
+				return core;
+			}
+			MALI_PRINT_ERROR(("Failed to reset DLBU %s\n", core->hw_core.description));
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali DLBU: Failed to allocate memory for DLBU core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	mali_hw_core_delete(&dlbu->hw_core);
+	_mali_osk_free(dlbu);
+}
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu)
+{
+	u32 dlbu_registers[7];
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: mali_dlbu_reset: %s\n", dlbu->hw_core.description));
+
+	dlbu_registers[0] = mali_dlbu_phys_addr | 1; /* bit 0 enables the whole core */
+	dlbu_registers[1] = MALI_DLBU_VIRT_ADDR;
+	dlbu_registers[2] = 0;
+	dlbu_registers[3] = 0;
+	dlbu_registers[4] = 0;
+	dlbu_registers[5] = 0;
+	dlbu_registers[6] = dlbu->pp_cores_mask;
+
+	/* write reset values to core registers */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR, dlbu_registers, 7);
+
+	err = _MALI_OSK_ERR_OK;
+
+	return err;
+}
+
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	mali_hw_core_register_write(&dlbu->hw_core, MALI_DLBU_REGISTER_PP_ENABLE_MASK, dlbu->pp_cores_mask);
+}
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask |= bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Adding core[%d] New mask= 0x%02x\n", bcast_id , dlbu->pp_cores_mask));
+}
+
+/* Remove a group from the DLBU */
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask &= ~bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Removing core[%d] New mask= 0x%02x\n", bcast_id, dlbu->pp_cores_mask));
+}
+
+/* Configure the DLBU for \a job. This needs to be done before the job is started on the groups in the DLBU. */
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job)
+{
+	u32 *registers;
+	MALI_DEBUG_ASSERT(job);
+	registers = mali_pp_job_get_dlbu_registers(job);
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: Starting job\n"));
+
+	/* Writing 4 registers:
+	 * DLBU registers except the first two (written once at DLBU initialisation / reset) and the PP_ENABLE_MASK register */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_TLLIST_VBASEADDR, registers, 4);
+
+}
diff --git a/utgard/r6p1/common/mali_dlbu.h b/utgard/r6p1/common/mali_dlbu.h
new file mode 100755
index 0000000..c031f11
--- /dev/null
+++ b/utgard/r6p1/common/mali_dlbu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DLBU_H__
+#define __MALI_DLBU_H__
+
+#define MALI_DLBU_VIRT_ADDR 0xFFF00000 /* master tile virtual address fixed at this value and mapped into every session */
+
+#include "mali_osk.h"
+
+struct mali_pp_job;
+struct mali_group;
+struct mali_dlbu_core;
+
+extern mali_dma_addr mali_dlbu_phys_addr;
+
+_mali_osk_errcode_t mali_dlbu_initialize(void);
+void mali_dlbu_terminate(void);
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource);
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu);
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+
+/** @brief Called to update HW after DLBU state changed
+ *
+ * This function must be called after \a mali_dlbu_add_group or \a
+ * mali_dlbu_remove_group to write the updated mask to hardware, unless the
+ * same is accomplished by calling \a mali_dlbu_reset.
+ */
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job);
+
+#endif /* __MALI_DLBU_H__ */
diff --git a/utgard/r6p1/common/mali_dvfs_policy.c b/utgard/r6p1/common/mali_dvfs_policy.c
new file mode 100755
index 0000000..1094f9d
--- /dev/null
+++ b/utgard/r6p1/common/mali_dvfs_policy.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_dvfs_policy.h"
+#include "mali_osk_mali.h"
+#include "mali_osk_profiling.h"
+
+#define CLOCK_TUNING_TIME_DEBUG 0
+
+#define MAX_PERFORMANCE_VALUE 256
+#define MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(percent) ((int) ((percent)*(MAX_PERFORMANCE_VALUE)/100.0 + 0.5))
+
+/** The max fps the same as display vsync default 60, can set by module insert parameter */
+int mali_max_system_fps = 60;
+/** A lower limit on their desired FPS default 58, can set by module insert parameter */
+int mali_desired_fps = 58;
+
+static int mali_fps_step1 = 0;
+static int mali_fps_step2 = 0;
+
+static int clock_step = -1;
+static int cur_clk_step = -1;
+static struct mali_gpu_clock *gpu_clk = NULL;
+
+/*Function prototype */
+static int (*mali_gpu_set_freq)(int) = NULL;
+static int (*mali_gpu_get_freq)(void) = NULL;
+
+static mali_bool mali_dvfs_enabled = MALI_FALSE;
+
+#define NUMBER_OF_NANOSECONDS_PER_SECOND  1000000000ULL
+static u32 calculate_window_render_fps(u64 time_period)
+{
+	u32 max_window_number;
+	u64 tmp;
+	u64 max = time_period;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 time_period_shift;
+	u32 max_window_number_shift;
+	u32 ret_val;
+
+	max_window_number = mali_session_max_window_num();
+
+	/* To avoid float division, extend the dividend to ns unit */
+	tmp = (u64)max_window_number * NUMBER_OF_NANOSECONDS_PER_SECOND;
+	if (tmp > time_period) {
+		max = tmp;
+	}
+
+	/*
+	 * We may have 64-bit values, a dividend or a divisor or both
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 */
+	leading_zeroes = _mali_osk_clz((u32)(max >> 32));
+	shift_val = 32 - leading_zeroes;
+
+	time_period_shift = (u32)(time_period >> shift_val);
+	max_window_number_shift = (u32)(tmp >> shift_val);
+
+	ret_val = max_window_number_shift / time_period_shift;
+
+	return ret_val;
+}
+
+static bool mali_pickup_closest_avail_clock(int target_clock_mhz, mali_bool pick_clock_up)
+{
+	int i = 0;
+	bool clock_changed = false;
+
+	/* Round up the closest available frequency step for target_clock_hz */
+	for (i = 0; i < gpu_clk->num_of_steps; i++) {
+		/* Find the first item > target_clock_hz */
+		if (((int)(gpu_clk->item[i].clock) - target_clock_mhz) > 0) {
+			break;
+		}
+	}
+
+	/* If the target clock greater than the maximum clock just pick the maximum one*/
+	if (i == gpu_clk->num_of_steps) {
+		i = gpu_clk->num_of_steps - 1;
+	} else {
+		if ((!pick_clock_up) && (i > 0)) {
+			i = i - 1;
+		}
+	}
+
+	clock_step = i;
+	if (cur_clk_step != clock_step) {
+		clock_changed = true;
+	}
+
+	return clock_changed;
+}
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period)
+{
+	int under_perform_boundary_value = 0;
+	int over_perform_boundary_value = 0;
+	int current_fps = 0;
+	int current_gpu_util = 0;
+	bool clock_changed = false;
+#if CLOCK_TUNING_TIME_DEBUG
+	struct timeval start;
+	struct timeval stop;
+	unsigned int elapse_time;
+	do_gettimeofday(&start);
+#endif
+	u32 window_render_fps;
+
+	if (NULL == gpu_clk) {
+		MALI_DEBUG_PRINT(2, ("Enable DVFS but patform doesn't Support freq change. \n"));
+		return;
+	}
+
+	window_render_fps = calculate_window_render_fps(time_period);
+
+	current_fps = window_render_fps;
+	current_gpu_util = data->utilization_gpu;
+
+	/* Get the specific under_perform_boundary_value and over_perform_boundary_value */
+	if ((mali_desired_fps <= current_fps) && (current_fps < mali_max_system_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(90);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+	} else if ((mali_fps_step1 <= current_fps) && (current_fps < mali_desired_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	} else if ((mali_fps_step2 <= current_fps) && (current_fps < mali_fps_step1)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(50);
+	} else {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	}
+
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: gpu util = %d \n", current_gpu_util));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: under_perform = %d,  over_perform = %d \n", under_perform_boundary_value, over_perform_boundary_value));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: render fps = %d,  pressure render fps = %d \n", current_fps, window_render_fps));
+
+	/* Get current clock value */
+	cur_clk_step = mali_gpu_get_freq();
+
+	/* Consider offscreen */
+	if (0 == current_fps) {
+		/* GP or PP under perform, need to give full power */
+		if (current_gpu_util > over_perform_boundary_value) {
+			if (cur_clk_step != gpu_clk->num_of_steps - 1) {
+				clock_changed = true;
+				clock_step = gpu_clk->num_of_steps - 1;
+			}
+		}
+
+		/* If GPU is idle, use lowest power */
+		if (0 == current_gpu_util) {
+			if (cur_clk_step != 0) {
+				clock_changed = true;
+				clock_step = 0;
+			}
+		}
+
+		goto real_setting;
+	}
+
+	/* 2. Calculate target clock if the GPU clock can be tuned */
+	if (-1 != cur_clk_step) {
+		int target_clk_mhz = -1;
+		mali_bool pick_clock_up = MALI_TRUE;
+
+		if (current_gpu_util > under_perform_boundary_value) {
+			/* when under perform, need to consider the fps part */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util * mali_desired_fps / under_perform_boundary_value / current_fps;
+			pick_clock_up = MALI_TRUE;
+		} else if (current_gpu_util < over_perform_boundary_value) {
+			/* when over perform, did't need to consider fps, system didn't want to reach desired fps */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util / under_perform_boundary_value;
+			pick_clock_up = MALI_FALSE;
+		}
+
+		if (-1 != target_clk_mhz) {
+			clock_changed = mali_pickup_closest_avail_clock(target_clk_mhz, pick_clock_up);
+		}
+	}
+
+real_setting:
+	if (clock_changed) {
+		mali_gpu_set_freq(clock_step);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      gpu_clk->item[clock_step].clock,
+					      gpu_clk->item[clock_step].vol / 1000,
+					      0, 0, 0);
+	}
+
+#if CLOCK_TUNING_TIME_DEBUG
+	do_gettimeofday(&stop);
+
+	elapse_time = timeval_to_ns(&stop) - timeval_to_ns(&start);
+	MALI_DEBUG_PRINT(2, ("Using ARM power policy:  eclapse time = %d\n", elapse_time));
+#endif
+}
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void)
+{
+	_mali_osk_device_data data;
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if ((NULL != data.get_clock_info) && (NULL != data.set_freq) && (NULL != data.get_freq)) {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: using arm dvfs policy \n"));
+
+
+			mali_fps_step1 = mali_max_system_fps / 3;
+			mali_fps_step2 = mali_max_system_fps / 5;
+
+			data.get_clock_info(&gpu_clk);
+
+			if (gpu_clk != NULL) {
+#ifdef DEBUG
+				int i;
+				for (i = 0; i < gpu_clk->num_of_steps; i++) {
+					MALI_DEBUG_PRINT(5, ("mali gpu clock info: step%d clock(%d)Hz,vol(%d) \n",
+							     i, gpu_clk->item[i].clock, gpu_clk->item[i].vol));
+				}
+#endif
+			} else {
+				MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform didn't define enough info for ddk to do DVFS \n"));
+			}
+
+			mali_gpu_get_freq = data.get_freq;
+			mali_gpu_set_freq = data.set_freq;
+
+			if ((NULL != gpu_clk) && (gpu_clk->num_of_steps > 0)
+			    && (NULL != mali_gpu_get_freq) && (NULL != mali_gpu_set_freq)) {
+				mali_dvfs_enabled = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	} else {
+		err = _MALI_OSK_ERR_FAULT;
+		MALI_DEBUG_PRINT(2, ("Mali DVFS init: get platform data error .\n"));
+	}
+
+	return err;
+}
+
+/*
+ * Always give full power when start a new period,
+ * if mali dvfs enabled, for performance consideration
+ */
+void mali_dvfs_policy_new_period(void)
+{
+	/* Always give full power when start a new period */
+	unsigned int cur_clk_step = 0;
+
+	cur_clk_step = mali_gpu_get_freq();
+
+	if (cur_clk_step != (gpu_clk->num_of_steps - 1)) {
+		mali_gpu_set_freq(gpu_clk->num_of_steps - 1);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, gpu_clk->item[gpu_clk->num_of_steps - 1].clock,
+					      gpu_clk->item[gpu_clk->num_of_steps - 1].vol / 1000, 0, 0, 0);
+	}
+}
+
+mali_bool mali_dvfs_policy_enabled(void)
+{
+	return mali_dvfs_enabled;
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item)
+{
+	if (mali_platform_device != NULL) {
+
+		struct mali_gpu_device_data *device_data = NULL;
+		device_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+
+		if ((NULL != device_data->get_clock_info) && (NULL != device_data->get_freq)) {
+
+			int cur_clk_step = device_data->get_freq();
+			struct mali_gpu_clock *mali_gpu_clk = NULL;
+
+			device_data->get_clock_info(&mali_gpu_clk);
+			clk_item->clock = mali_gpu_clk->item[cur_clk_step].clock;
+			clk_item->vol = mali_gpu_clk->item[cur_clk_step].vol;
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	}
+}
+#endif
+
diff --git a/utgard/r6p1/common/mali_dvfs_policy.h b/utgard/r6p1/common/mali_dvfs_policy.h
new file mode 100755
index 0000000..1770426
--- /dev/null
+++ b/utgard/r6p1/common/mali_dvfs_policy.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DVFS_POLICY_H__
+#define __MALI_DVFS_POLICY_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period);
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void);
+
+void mali_dvfs_policy_new_period(void);
+
+mali_bool mali_dvfs_policy_enabled(void);
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif/* __MALI_DVFS_POLICY_H__ */
diff --git a/utgard/r6p1/common/mali_executor.c b/utgard/r6p1/common/mali_executor.c
new file mode 100755
index 0000000..76bf701
--- /dev/null
+++ b/utgard/r6p1/common/mali_executor.c
@@ -0,0 +1,2692 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_executor.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_timeline.h"
+#include "mali_osk_profiling.h"
+#include "mali_session.h"
+#include "mali_osk_mali.h"
+
+/*
+ * If dma_buf with map on demand is used, we defer job deletion and job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_DELETE 1
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif /* !defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) */
+
+/*
+ * ---------- static type definitions (structs, enums, etc) ----------
+ */
+
+enum mali_executor_state_t {
+	EXEC_STATE_NOT_PRESENT, /* Virtual group on Mali-300/400 (do not use) */
+	EXEC_STATE_DISABLED,    /* Disabled by core scaling (do not use) */
+	EXEC_STATE_EMPTY,       /* No child groups for virtual group (do not use) */
+	EXEC_STATE_INACTIVE,    /* Can be used, but must be activate first */
+	EXEC_STATE_IDLE,        /* Active and ready to be used */
+	EXEC_STATE_WORKING,     /* Executing a job */
+};
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock for this module (protecting all HW access except L2 caches) */
+_mali_osk_spinlock_irq_t *mali_executor_lock_obj = NULL;
+
+mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/*
+ * ---------- static variables ----------
+ */
+
+/* Used to defer job scheduling */
+static _mali_osk_wq_work_t *executor_wq_high_pri = NULL;
+
+/* Store version from GP and PP (user space wants to know this) */
+static u32 pp_version = 0;
+static u32 gp_version = 0;
+
+/* List of physical PP groups which are disabled by some external source */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_disabled);
+static u32 group_list_disabled_count = 0;
+
+/* List of groups which can be used, but activate first */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_inactive);
+static u32 group_list_inactive_count = 0;
+
+/* List of groups which are active and ready to be used */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_idle);
+static u32 group_list_idle_count = 0;
+
+/* List of groups which are executing a job */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_working);
+static u32 group_list_working_count = 0;
+
+/* Virtual group (if any) */
+static struct mali_group *virtual_group = NULL;
+
+/* Virtual group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t virtual_group_state = EXEC_STATE_NOT_PRESENT;
+
+/* GP group */
+static struct mali_group *gp_group = NULL;
+
+/* GP group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t gp_group_state = EXEC_STATE_NOT_PRESENT;
+
+static u32 gp_returned_cookie = 0;
+
+/* Total number of physical PP cores present */
+static u32 num_physical_pp_cores_total = 0;
+
+/* Number of physical cores which are enabled */
+static u32 num_physical_pp_cores_enabled = 0;
+
+/* Enable or disable core scaling */
+static mali_bool core_scaling_enabled = MALI_TRUE;
+
+/* Variables to allow safe pausing of the scheduler */
+static _mali_osk_wait_queue_t *executor_working_wait_queue = NULL;
+static u32 pause_count = 0;
+
+/* PP cores haven't been enabled because of some pp cores haven't been disabled. */
+static int core_scaling_delay_up_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+
+/* Variables used to implement notify pp core changes to userspace when core scaling
+ * is finished in mali_executor_complete_group() function. */
+static _mali_osk_wq_work_t *executor_wq_notify_core_change = NULL;
+static _mali_osk_wait_queue_t *executor_notify_core_change_wait_queue = NULL;
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+static mali_bool mali_executor_is_suspended(void *data);
+static mali_bool mali_executor_is_working(void);
+static void mali_executor_disable_empty_virtual(void);
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group);
+static mali_bool mali_executor_has_virtual_group(void);
+static mali_bool mali_executor_virtual_group_is_usable(void);
+static void mali_executor_schedule(void);
+static void mali_executor_wq_schedule(void *arg);
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job);
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done);
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state);
+
+static void mali_executor_group_enable_internal(struct mali_group *group);
+static void mali_executor_group_disable_internal(struct mali_group *group);
+static void mali_executor_core_scale(unsigned int target_core_nr);
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group);
+static void mali_executor_notify_core_change(u32 num_cores);
+static void mali_executor_wq_notify_core_change(void *arg);
+static void mali_executor_change_group_status_disabled(struct mali_group *group);
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group);
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_executor_initialize(void)
+{
+	mali_executor_lock_obj = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_EXECUTOR);
+	if (NULL == mali_executor_lock_obj) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_high_pri = _mali_osk_wq_create_work_high_pri(mali_executor_wq_schedule, NULL);
+	if (NULL == executor_wq_high_pri) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_working_wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == executor_working_wait_queue) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_notify_core_change = _mali_osk_wq_create_work(mali_executor_wq_notify_core_change, NULL);
+	if (NULL == executor_wq_notify_core_change) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_notify_core_change_wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == executor_notify_core_change_wait_queue) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_terminate(void)
+{
+	if (NULL != executor_notify_core_change_wait_queue) {
+		_mali_osk_wait_queue_term(executor_notify_core_change_wait_queue);
+		executor_notify_core_change_wait_queue = NULL;
+	}
+
+	if (NULL != executor_wq_notify_core_change) {
+		_mali_osk_wq_delete_work(executor_wq_notify_core_change);
+		executor_wq_notify_core_change = NULL;
+	}
+
+	if (NULL != executor_working_wait_queue) {
+		_mali_osk_wait_queue_term(executor_working_wait_queue);
+		executor_working_wait_queue = NULL;
+	}
+
+	if (NULL != executor_wq_high_pri) {
+		_mali_osk_wq_delete_work(executor_wq_high_pri);
+		executor_wq_high_pri = NULL;
+	}
+
+	if (NULL != mali_executor_lock_obj) {
+		_mali_osk_spinlock_irq_term(mali_executor_lock_obj);
+		mali_executor_lock_obj = NULL;
+	}
+}
+
+void mali_executor_populate(void)
+{
+	u32 num_groups;
+	u32 i;
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	/* Do we have a virtual group? */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (mali_group_is_virtual(group)) {
+			virtual_group = group;
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			break;
+		}
+	}
+
+	/* Find all the available physical GP and PP cores */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+			struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+
+			if (!mali_group_is_virtual(group)) {
+				if (NULL != pp_core) {
+					if (0 == pp_version) {
+						/* Retrieve PP version from the first available PP core */
+						pp_version = mali_pp_core_get_version(pp_core);
+					}
+
+					if (NULL != virtual_group) {
+						mali_executor_lock();
+						mali_group_add_group(virtual_group, group);
+						mali_executor_unlock();
+					} else {
+						_mali_osk_list_add(&group->executor_list, &group_list_inactive);
+						group_list_inactive_count++;
+					}
+
+					num_physical_pp_cores_total++;
+				} else {
+					MALI_DEBUG_ASSERT_POINTER(gp_core);
+
+					if (0 == gp_version) {
+						/* Retrieve GP version */
+						gp_version = mali_gp_core_get_version(gp_core);
+					}
+
+					gp_group = group;
+					gp_group_state = EXEC_STATE_INACTIVE;
+				}
+
+			}
+		}
+	}
+
+	num_physical_pp_cores_enabled = num_physical_pp_cores_total;
+}
+
+void mali_executor_depopulate(void)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state);
+
+	if (NULL != gp_group) {
+		mali_group_delete(gp_group);
+		gp_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state);
+
+	if (NULL != virtual_group) {
+		mali_group_delete(virtual_group);
+		virtual_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&group_list_working));
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+}
+
+void mali_executor_suspend(void)
+{
+	mali_executor_lock();
+
+	/* Increment the pause_count so that no more jobs will be scheduled */
+	pause_count++;
+
+	mali_executor_unlock();
+
+	_mali_osk_wait_queue_wait_event(executor_working_wait_queue,
+					mali_executor_is_suspended, NULL);
+
+	/*
+	 * mali_executor_complete_XX() leaves jobs in idle state.
+	 * deactivate option is used when we are going to power down
+	 * the entire GPU (OS suspend) and want a consistent SW vs HW
+	 * state.
+	 */
+	mali_executor_lock();
+
+	mali_executor_deactivate_list_idle(MALI_TRUE);
+
+	/*
+	 * The following steps are used to deactive all of activated
+	 * (MALI_GROUP_STATE_ACTIVE) and activating (MALI_GROUP
+	 * _STAET_ACTIVATION_PENDING) groups, to make sure the variable
+	 * pd_mask_wanted is equal with 0. */
+	if (MALI_GROUP_STATE_INACTIVE != mali_group_get_state(gp_group)) {
+		gp_group_state = EXEC_STATE_INACTIVE;
+		mali_group_deactivate(gp_group);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (MALI_GROUP_STATE_INACTIVE
+		    != mali_group_get_state(virtual_group)) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			mali_group_deactivate(virtual_group);
+		}
+	}
+
+	if (0 < group_list_inactive_count) {
+		struct mali_group *group;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+					    &group_list_inactive,
+					    struct mali_group, executor_list) {
+			if (MALI_GROUP_STATE_ACTIVATION_PENDING
+			    == mali_group_get_state(group)) {
+				mali_group_deactivate(group);
+			}
+
+			/*
+			 * On mali-450 platform, we may have physical group in the group inactive
+			 * list, and its state is MALI_GROUP_STATE_ACTIVATION_PENDING, so we only
+			 * deactivate it is not enough, we still also need add it back to virtual group.
+			 * And now, virtual group must be in INACTIVE state, so it's safe to add
+			 * physical group to virtual group at this point.
+			 */
+			if (NULL != virtual_group) {
+				_mali_osk_list_delinit(&group->executor_list);
+				group_list_inactive_count--;
+
+				mali_group_add_group(virtual_group, group);
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_resume(void)
+{
+	mali_executor_lock();
+
+	/* Decrement pause_count to allow scheduling again (if it reaches 0) */
+	pause_count--;
+	if (0 == pause_count) {
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+u32 mali_executor_get_num_cores_total(void)
+{
+	return num_physical_pp_cores_total;
+}
+
+u32 mali_executor_get_num_cores_enabled(void)
+{
+	return num_physical_pp_cores_enabled;
+}
+
+struct mali_pp_core *mali_executor_get_virtual_pp(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(virtual_group);
+	MALI_DEBUG_ASSERT_POINTER(virtual_group->pp_core);
+	return virtual_group->pp_core;
+}
+
+struct mali_group *mali_executor_get_virtual_group(void)
+{
+	return virtual_group;
+}
+
+void mali_executor_zap_all_active(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+	mali_bool ret;
+
+	mali_executor_lock();
+
+	/*
+	 * This function is a bit complicated because
+	 * mali_group_zap_session() can fail. This only happens because the
+	 * group is in an unhandled page fault status.
+	 * We need to make sure this page fault is handled before we return,
+	 * so that we know every single outstanding MMU transactions have
+	 * completed. This will allow caller to safely remove physical pages
+	 * when we have returned.
+	 */
+
+	MALI_DEBUG_ASSERT(NULL != gp_group);
+	ret = mali_group_zap_session(gp_group, session);
+	if (MALI_FALSE == ret) {
+		struct mali_gp_job *gp_job = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+		MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		ret = mali_group_zap_session(virtual_group, session);
+		if (MALI_FALSE == ret) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working,
+				    struct mali_group, executor_list) {
+		ret = mali_group_zap_session(group, session);
+		if (MALI_FALSE == ret) {
+			ret = mali_group_zap_session(group, session);
+			if (MALI_FALSE == ret) {
+				struct mali_pp_job *pp_job = NULL;
+
+				mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+				if (NULL != pp_job) {
+					/* PP job completed, free it */
+					mali_scheduler_complete_pp_job(pp_job,
+								       0, MALI_FALSE,
+								       MALI_TRUE);
+				}
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule)
+{
+	if (MALI_SCHEDULER_MASK_EMPTY != mask) {
+		if (MALI_TRUE == deferred_schedule) {
+			_mali_osk_wq_schedule_work_high_pri(executor_wq_high_pri);
+		} else {
+			/* Schedule from this thread*/
+			mali_executor_lock();
+			mali_executor_schedule();
+			mali_executor_unlock();
+		}
+	}
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: GP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor GP: Job %d Timeout on %s\n",
+			    mali_gp_job_get_id(group->gp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_gp(group);
+		if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result);
+#endif
+
+	mali_group_mask_all_interrupts_gp(group);
+
+	if (MALI_INTERRUPT_RESULT_SUCCESS_VS == int_result) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only VS completed so far, while PLBU is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (MALI_INTERRUPT_RESULT_SUCCESS_PLBU == int_result) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only PLBU completed so far, while VS is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (MALI_INTERRUPT_RESULT_OOM == int_result) {
+		struct mali_gp_job *job = mali_group_get_running_gp_job(group);
+
+		/* PLBU out of mem */
+		MALI_DEBUG_PRINT(3, ("Executor: PLBU needs more heap memory\n"));
+
+#if defined(CONFIG_MALI400_PROFILING)
+		/* Give group a chance to generate a SUSPEND event */
+		mali_group_oom(group);
+#endif
+
+		/*
+		 * no need to hold interrupt raised while
+		 * waiting for more memory.
+		 */
+		mali_executor_send_gp_oom_to_user(job);
+
+		mali_executor_unlock();
+
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (MALI_INTERRUPT_RESULT_ERROR == int_result) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_executor_unlock();
+
+		if (MALI_FALSE == time_out) {
+			mali_group_schedule_bottom_half_gp(group);
+		}
+	} else {
+		struct mali_gp_job *job;
+		mali_bool success;
+
+		if (MALI_TRUE == time_out) {
+			mali_group_dump_status(group);
+		}
+
+		success = (int_result != MALI_INTERRUPT_RESULT_ERROR) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, &job, NULL);
+
+		mali_executor_unlock();
+
+		/* GP jobs always fully complete */
+		MALI_DEBUG_ASSERT(NULL != job);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, success,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: PP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (in_upper_half) {
+		if (mali_group_is_in_virtual(group)) {
+			/* Child groups should never handle PP interrupts */
+			MALI_DEBUG_ASSERT(!mali_group_has_timed_out(group));
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+	MALI_DEBUG_ASSERT(!mali_group_is_in_virtual(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor PP: Job %d Timeout on %s\n",
+			    mali_pp_job_get_id(group->pp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_pp(group);
+		if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	} else if (MALI_INTERRUPT_RESULT_SUCCESS == int_result) {
+		if (mali_group_is_virtual(group) && mali_group_pp_is_active(group)) {
+			/* Some child groups are still working, so nothing to do right now */
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (MALI_INTERRUPT_RESULT_ERROR == int_result) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_group_mask_all_interrupts_pp(group);
+		mali_executor_unlock();
+
+		if (MALI_FALSE == time_out) {
+			mali_group_schedule_bottom_half_pp(group);
+		}
+	} else {
+		struct mali_pp_job *job = NULL;
+		mali_bool success;
+
+		if (MALI_TRUE == time_out) {
+			mali_group_dump_status(group);
+		}
+
+		success = (int_result == MALI_INTERRUPT_RESULT_SUCCESS) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, NULL, &job);
+
+		mali_executor_unlock();
+
+		if (NULL != job) {
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+
+	MALI_DEBUG_PRINT(4, ("Executor: MMU interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	int_result = mali_group_get_interrupt_result_mmu(group);
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_ERROR == int_result);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	if (in_upper_half) {
+		/* Don't bother to do processing of errors in upper half */
+
+		struct mali_group *parent = group->parent_group;
+
+		mali_mmu_mask_all_interrupts(group->mmu);
+
+		mali_executor_unlock();
+
+		if (NULL == parent) {
+			mali_group_schedule_bottom_half_mmu(group);
+		} else {
+			mali_group_schedule_bottom_half_mmu(parent);
+		}
+
+	} else {
+		struct mali_gp_job *gp_job = NULL;
+		struct mali_pp_job *pp_job = NULL;
+
+#ifdef DEBUG
+
+		u32 fault_address = mali_mmu_get_page_fault_addr(group->mmu);
+		u32 status = mali_mmu_get_status(group->mmu);
+		MALI_DEBUG_PRINT(2, ("Executor: Mali page fault detected at 0x%x from bus id %d of type %s on %s\n",
+				     (void *)(uintptr_t)fault_address,
+				     (status >> 6) & 0x1F,
+				     (status & 32) ? "write" : "read",
+				     group->mmu->hw_core.description));
+		MALI_DEBUG_PRINT(3, ("Executor: MMU rawstat = 0x%08X, MMU status = 0x%08X\n",
+				     mali_mmu_get_rawstat(group->mmu), status));
+		mali_mmu_pagedir_diag(mali_session_get_page_directory(group->session), fault_address);
+#endif
+
+		mali_executor_complete_group(group, MALI_FALSE, &gp_job, &pp_job);
+
+		mali_executor_unlock();
+
+		if (NULL != gp_job) {
+			MALI_DEBUG_ASSERT(NULL == pp_job);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_TRUE, MALI_TRUE);
+		} else if (NULL != pp_job) {
+			MALI_DEBUG_ASSERT(NULL == gp_job);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(pp_job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups)
+{
+	u32 i;
+	mali_bool child_groups_activated = MALI_FALSE;
+	mali_bool do_schedule = MALI_FALSE;
+#if defined(DEBUG)
+	u32 num_activated = 0;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(0 < num_groups);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_up(groups[i]);
+
+		if ((MALI_GROUP_STATE_ACTIVATION_PENDING != mali_group_get_state(groups[i]) ||
+		     (MALI_TRUE != mali_executor_group_is_in_state(groups[i], EXEC_STATE_INACTIVE)))) {
+			/* nothing more to do for this group */
+			continue;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Executor: activating group %s\n",
+				     mali_group_core_description(groups[i])));
+
+#if defined(DEBUG)
+		num_activated++;
+#endif
+
+		if (mali_group_is_in_virtual(groups[i])) {
+			/*
+			 * At least one child group of virtual group is powered on.
+			 */
+			child_groups_activated = MALI_TRUE;
+		} else if (MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			/* Set gp and pp not in virtual to active. */
+			mali_group_set_active(groups[i]);
+		}
+
+		/* Move group from inactive to idle list */
+		if (groups[i] == gp_group) {
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  gp_group_state);
+			gp_group_state = EXEC_STATE_IDLE;
+		} else if (MALI_FALSE == mali_group_is_in_virtual(groups[i])
+			   && MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_group_is_in_state(groups[i],
+					  EXEC_STATE_INACTIVE));
+
+			mali_executor_change_state_pp_physical(groups[i],
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+		}
+
+		do_schedule = MALI_TRUE;
+	}
+
+	if (mali_executor_has_virtual_group() &&
+	    MALI_TRUE == child_groups_activated &&
+	    MALI_GROUP_STATE_ACTIVATION_PENDING ==
+	    mali_group_get_state(virtual_group)) {
+		/*
+		 * Try to active virtual group while it may be not sucessful every time,
+		 * because there is one situation that not all of child groups are powered on
+		 * in one time and virtual group is in activation pending state.
+		 */
+		if (mali_group_set_active(virtual_group)) {
+			/* Move group from inactive to idle */
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  virtual_group_state);
+			virtual_group_state = EXEC_STATE_IDLE;
+
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u  physical activated, 1 virtual activated.\n", num_groups, num_activated));
+		} else {
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+		}
+	} else {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+	}
+
+	if (MALI_TRUE == do_schedule) {
+		/* Trigger a schedule */
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_group_power_down(struct mali_group *groups[],
+				    u32 num_groups)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(0 < num_groups);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		/* Groups must be either disabled or inactive. while for virtual group,
+		 * it maybe in empty state, because when we meet pm_runtime_suspend,
+		 * virtual group could be powered off, and before we acquire mali_executor_lock,
+		 * we must release mali_pm_state_lock, if there is a new physical job was queued,
+		 * all of physical groups in virtual group could be pulled out, so we only can
+		 * powered down an empty virtual group. Those physical groups will be powered
+		 * up in following pm_runtime_resume callback function.
+		 */
+		MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(groups[i],
+				  EXEC_STATE_DISABLED) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_INACTIVE) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_EMPTY));
+
+		MALI_DEBUG_PRINT(3, ("Executor: powering down group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_down(groups[i]);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups completed\n", num_groups));
+
+	mali_executor_unlock();
+}
+
+void mali_executor_abort_session(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *tmp_group;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Executor: Aborting all jobs from session 0x%08X.\n",
+			  session));
+
+	mali_executor_lock();
+
+	if (mali_group_get_session(gp_group) == session) {
+		if (EXEC_STATE_WORKING == gp_group_state) {
+			struct mali_gp_job *gp_job = NULL;
+
+			mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+			MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+			/* GP job completed, make sure it is freed */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_FALSE, MALI_TRUE);
+		} else {
+			/* Same session, but not working, so just clear it */
+			mali_group_clear_session(gp_group);
+		}
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (EXEC_STATE_WORKING == virtual_group_state
+		    && mali_group_get_session(virtual_group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working,
+				    struct mali_group, executor_list) {
+		if (mali_group_get_session(group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	mali_executor_unlock();
+}
+
+
+void mali_executor_core_scaling_enable(void)
+{
+	/* PS: Core scaling is by default enabled */
+	core_scaling_enabled = MALI_TRUE;
+}
+
+void mali_executor_core_scaling_disable(void)
+{
+	core_scaling_enabled = MALI_FALSE;
+}
+
+mali_bool mali_executor_core_scaling_is_enabled(void)
+{
+	return core_scaling_enabled;
+}
+
+void mali_executor_group_enable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_enable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+/*
+ * If a physical group is inactive or idle, we should disable it immediately,
+ * if group is in virtual, and virtual group is idle, disable given physical group in it.
+ */
+void mali_executor_group_disable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_disable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+mali_bool mali_executor_group_is_disabled(struct mali_group *group)
+{
+	/* NB: This function is not optimized for time critical usage */
+
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+	ret = mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED);
+	mali_executor_unlock();
+
+	return ret;
+}
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override)
+{
+	if (target_core_nr == num_physical_pp_cores_enabled) return 0;
+	if (MALI_FALSE == core_scaling_enabled && MALI_FALSE == override) return -EPERM;
+	if (target_core_nr > num_physical_pp_cores_total) return -EINVAL;
+	if (0 == target_core_nr) return -EINVAL;
+
+	mali_executor_core_scale(target_core_nr);
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+
+	return 0;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	mali_executor_lock();
+
+	switch (gp_group_state) {
+	case EXEC_STATE_INACTIVE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state INACTIVE\n");
+		break;
+	case EXEC_STATE_IDLE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state IDLE\n");
+		break;
+	case EXEC_STATE_WORKING:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state WORKING\n");
+		break;
+	default:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in unknown/illegal state %u\n",
+					gp_group_state);
+		break;
+	}
+
+	n += mali_group_dump_state(gp_group, buf + n, size - n);
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in WORKING state (count = %u):\n",
+				group_list_working_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in IDLE state (count = %u):\n",
+				group_list_idle_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in INACTIVE state (count = %u):\n",
+				group_list_inactive_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in DISABLED state (count = %u):\n",
+				group_list_disabled_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		switch (virtual_group_state) {
+		case EXEC_STATE_EMPTY:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state EMPTY\n");
+			break;
+		case EXEC_STATE_INACTIVE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state INACTIVE\n");
+			break;
+		case EXEC_STATE_IDLE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state IDLE\n");
+			break;
+		case EXEC_STATE_WORKING:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state WORKING\n");
+			break;
+		default:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in unknown/illegal state %u\n",
+						virtual_group_state);
+			break;
+		}
+
+		n += mali_group_dump_state(virtual_group, buf + n, size - n);
+	}
+
+	mali_executor_unlock();
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_total_cores = num_physical_pp_cores_total;
+	args->number_of_enabled_cores = num_physical_pp_cores_enabled;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = pp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_cores = 1;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = gp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (_MALIGP_JOB_RESUME_WITH_NEW_HEAP == args->code) {
+		_mali_osk_notification_t *new_notification = NULL;
+
+		new_notification = _mali_osk_notification_create(
+					   _MALI_NOTIFICATION_GP_STALLED,
+					   sizeof(_mali_uk_gp_job_suspended_s));
+
+		if (NULL != new_notification) {
+			MALI_DEBUG_PRINT(3, ("Executor: Resuming job %u with new heap; 0x%08X - 0x%08X\n",
+					     args->cookie, args->arguments[0], args->arguments[1]));
+
+			mali_executor_lock();
+
+			/* Resume the job in question if it is still running */
+			job = mali_group_get_running_gp_job(gp_group);
+			if (NULL != job &&
+			    args->cookie == mali_gp_job_get_id(job) &&
+			    session == mali_gp_job_get_session(job)) {
+				/*
+				 * Correct job is running, resume with new heap
+				 */
+
+				mali_gp_job_set_oom_notification(job,
+								 new_notification);
+
+				/* This will also re-enable interrupts */
+				mali_group_resume_gp_with_new_heap(gp_group,
+								   args->cookie,
+								   args->arguments[0],
+								   args->arguments[1]);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_PRINT_ERROR(("Executor: Unable to resume, GP job no longer running.\n"));
+
+				_mali_osk_notification_delete(new_notification);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Executor: Failed to allocate notification object. Will abort GP job.\n"));
+		}
+	} else {
+		MALI_DEBUG_PRINT(2, ("Executor: Aborting job %u, no new heap provided\n", args->cookie));
+	}
+
+	mali_executor_lock();
+
+	/* Abort the job in question if it is still running */
+	job = mali_group_get_running_gp_job(gp_group);
+	if (NULL != job &&
+	    args->cookie == mali_gp_job_get_id(job) &&
+	    session == mali_gp_job_get_session(job)) {
+		/* Correct job is still running */
+		struct mali_gp_job *job_done = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &job_done, NULL);
+
+		/* The same job should have completed */
+		MALI_DEBUG_ASSERT(job_done == job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(job_done, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	mali_executor_unlock();
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+void mali_executor_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_executor_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Executor: lock taken\n"));
+}
+
+void mali_executor_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Executor: Releasing lock\n"));
+	_mali_osk_spinlock_irq_unlock(mali_executor_lock_obj);
+}
+
+static mali_bool mali_executor_is_suspended(void *data)
+{
+	mali_bool ret;
+
+	/* This callback does not use the data pointer. */
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	ret = pause_count > 0 && !mali_executor_is_working();
+
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static mali_bool mali_executor_is_working()
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return (0 != group_list_working_count ||
+		EXEC_STATE_WORKING == gp_group_state ||
+		EXEC_STATE_WORKING == virtual_group_state);
+}
+
+static void mali_executor_disable_empty_virtual(void)
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_EMPTY);
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_WORKING);
+
+	if (mali_group_is_empty(virtual_group)) {
+		virtual_group_state = EXEC_STATE_EMPTY;
+	}
+}
+
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	/* Only rejoining after job has completed (still active) */
+	MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+			  mali_group_get_state(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_has_virtual_group());
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_group_is_virtual(group));
+
+	/* Make sure group and virtual group have same status */
+
+	if (MALI_GROUP_STATE_INACTIVE == mali_group_get_state(virtual_group)) {
+		if (mali_group_deactivate(group)) {
+			trigger_pm_update = MALI_TRUE;
+		}
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else if (MALI_GROUP_STATE_ACTIVATION_PENDING ==
+		   mali_group_get_state(virtual_group)) {
+		/*
+		 * Activation is pending for virtual group, leave
+		 * this child group as active.
+		 */
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+				  mali_group_get_state(virtual_group));
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_IDLE;
+		}
+	}
+
+	/* Remove group from idle list */
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group,
+			  EXEC_STATE_IDLE));
+	_mali_osk_list_delinit(&group->executor_list);
+	group_list_idle_count--;
+
+	/*
+	 * And finally rejoin the virtual group
+	 * group will start working on same job as virtual_group,
+	 * if virtual_group is working on a job
+	 */
+	mali_group_add_group(virtual_group, group);
+
+	return trigger_pm_update;
+}
+
+static mali_bool mali_executor_has_virtual_group(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != virtual_group) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+}
+
+static mali_bool mali_executor_virtual_group_is_usable(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return ((EXEC_STATE_INACTIVE == virtual_group_state ||
+		EXEC_STATE_IDLE == virtual_group_state) && (virtual_group->state != MALI_GROUP_STATE_ACTIVATION_PENDING)) ?
+	       MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+}
+
+static mali_bool mali_executor_tackle_gp_bound(void)
+{
+	struct mali_pp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	job = mali_scheduler_job_pp_physical_peek();
+
+	if (NULL != job && MALI_TRUE == mali_is_mali400()) {
+		if (0 < group_list_working_count &&
+		    mali_pp_job_is_large_and_unstarted(job)) {
+			return MALI_TRUE;
+		}
+	}
+
+	return MALI_FALSE;
+}
+
+static mali_bool mali_executor_schedule_is_early_out(mali_bool *gpu_secure_mode_is_needed)
+{
+	struct mali_pp_job *next_pp_job_to_start = NULL;
+	struct mali_group *group;
+	struct mali_group *tmp_group;
+	struct mali_pp_job *physical_pp_job_working = NULL;
+	struct mali_pp_job *virtual_pp_job_working = NULL;
+	mali_bool gpu_working_in_protected_mode = MALI_FALSE;
+	mali_bool gpu_working_in_non_protected_mode = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	*gpu_secure_mode_is_needed = MALI_FALSE;
+
+	/* Check if the gpu secure mode is supported, exit if not.*/
+	if (MALI_FALSE == _mali_osk_gpu_secure_mode_is_supported()) {
+		return MALI_FALSE;
+	}
+
+	/* Check if need to set gpu secure mode for the next pp job,
+	 * get the next pp job that will be scheduled  if exist.
+	 */
+	next_pp_job_to_start = mali_scheduler_job_pp_next();
+
+	/* Check current pp physical/virtual running job is protected job or not if exist.*/
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working,
+				    struct mali_group, executor_list) {
+		physical_pp_job_working = group->pp_running_job;
+		break;
+	}
+
+	if (EXEC_STATE_WORKING == virtual_group_state) {
+		virtual_pp_job_working = virtual_group->pp_running_job;
+	}
+
+	if (NULL != physical_pp_job_working) {
+		if (MALI_TRUE == mali_pp_job_is_protected_job(physical_pp_job_working)) {
+			gpu_working_in_protected_mode = MALI_TRUE;
+		} else {
+			gpu_working_in_non_protected_mode = MALI_TRUE;
+		}
+	} else if (NULL != virtual_pp_job_working) {
+		if (MALI_TRUE == mali_pp_job_is_protected_job(virtual_pp_job_working)) {
+			gpu_working_in_protected_mode = MALI_TRUE;
+		} else {
+			gpu_working_in_non_protected_mode = MALI_TRUE;
+		}
+	} else if (EXEC_STATE_WORKING == gp_group_state) {
+		gpu_working_in_non_protected_mode = MALI_TRUE;
+	}
+
+	/* If the next pp job is the protected pp job.*/
+	if ((NULL != next_pp_job_to_start) && MALI_TRUE == mali_pp_job_is_protected_job(next_pp_job_to_start)) {
+		/* if gp is working or any non-protected pp job is working now, unable to schedule protected pp job. */
+		if (MALI_TRUE == gpu_working_in_non_protected_mode)
+			return MALI_TRUE;
+
+		*gpu_secure_mode_is_needed = MALI_TRUE;
+		return MALI_FALSE;
+
+	}
+
+	if (MALI_TRUE == gpu_working_in_protected_mode) {
+		/* Unable to schedule non-protected pp job/gp job if exist protected pp running jobs*/
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+/*
+ * This is where jobs are actually started.
+ */
+static void mali_executor_schedule(void)
+{
+	u32 i;
+	u32 num_physical_needed = 0;
+	u32 num_physical_to_process = 0;
+	mali_bool trigger_pm_update = MALI_FALSE;
+	mali_bool deactivate_idle_group = MALI_TRUE;
+	mali_bool gpu_secure_mode_is_needed = MALI_FALSE;
+
+	/* Physical groups + jobs to start in this function */
+	struct mali_group *groups_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	struct mali_pp_job *jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	u32 sub_jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	int num_jobs_to_start = 0;
+
+	/* Virtual job to start in this function */
+	struct mali_pp_job *virtual_job_to_start = NULL;
+
+	/* GP job to start in this function */
+	struct mali_gp_job *gp_job_to_start = NULL;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (pause_count > 0) {
+		/* Execution is suspended, don't schedule any jobs. */
+		return;
+	}
+
+	/* Lock needed in order to safely handle the job queues */
+	mali_scheduler_lock();
+
+	/* 1. Check the schedule if need to early out. */
+	if (MALI_TRUE == mali_executor_schedule_is_early_out(&gpu_secure_mode_is_needed)) {
+		mali_scheduler_unlock();
+		return;
+	}
+
+	/* 2. Activate gp firstly if have gp job queued. */
+	if ((EXEC_STATE_INACTIVE == gp_group_state)
+	    && (0 < mali_scheduler_job_gp_count())
+	    && (gpu_secure_mode_is_needed == MALI_FALSE)) {
+
+		enum mali_group_state state =
+			mali_group_activate(gp_group);
+		if (MALI_GROUP_STATE_ACTIVE == state) {
+			/* Set GP group state to idle */
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			trigger_pm_update = MALI_TRUE;
+		}
+	}
+
+	/* 3. Prepare as many physical groups as needed/possible */
+
+	num_physical_needed = mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed);
+
+	/* On mali-450 platform, we don't need to enter in this block frequently. */
+	if (0 < num_physical_needed) {
+
+		if (num_physical_needed <= group_list_idle_count) {
+			/* We have enough groups on idle list already */
+			num_physical_to_process = num_physical_needed;
+			num_physical_needed = 0;
+		} else {
+			/* We need to get a hold of some more groups */
+			num_physical_to_process = group_list_idle_count;
+			num_physical_needed -= group_list_idle_count;
+		}
+
+		if (0 < num_physical_needed) {
+
+			/* 3.1. Activate groups which are inactive */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive,
+						    struct mali_group, executor_list) {
+				enum mali_group_state state =
+					mali_group_activate(group);
+				if (MALI_GROUP_STATE_ACTIVE == state) {
+					/* Move from inactive to idle */
+					mali_executor_change_state_pp_physical(group,
+									       &group_list_inactive,
+									       &group_list_inactive_count,
+									       &group_list_idle,
+									       &group_list_idle_count);
+					num_physical_to_process++;
+				} else {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				num_physical_needed--;
+				if (0 == num_physical_needed) {
+					/* We have activated all the groups we need */
+					break;
+				}
+			}
+		}
+
+		if (mali_executor_virtual_group_is_usable()) {
+
+			/*
+			 * 3.2. And finally, steal and activate groups
+			 * from virtual group if we need even more
+			 */
+			while (0 < num_physical_needed) {
+				struct mali_group *group;
+
+				group = mali_group_acquire_group(virtual_group);
+				if (NULL != group) {
+					enum mali_group_state state;
+
+					mali_executor_disable_empty_virtual();
+
+					state = mali_group_activate(group);
+					if (MALI_GROUP_STATE_ACTIVE == state) {
+						/* Group is ready, add to idle list */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_idle);
+						group_list_idle_count++;
+						num_physical_to_process++;
+					} else {
+						/*
+						 * Group is not ready yet,
+						 * add to inactive list
+						 */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_inactive);
+						group_list_inactive_count++;
+
+						trigger_pm_update = MALI_TRUE;
+					}
+					num_physical_needed--;
+				} else {
+					/*
+					 * We could not get enough groups
+					 * from the virtual group.
+					 */
+					break;
+				}
+			}
+		}
+
+		/* 3.3. Assign physical jobs to groups */
+
+		if (0 < num_physical_to_process) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle,
+						    struct mali_group, executor_list) {
+				struct mali_pp_job *job = NULL;
+				u32 sub_job = MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+
+				MALI_DEBUG_ASSERT(num_jobs_to_start <
+						  MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				MALI_DEBUG_ASSERT(0 <
+						  mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed));
+
+				/* If the next pp job is non-protected, check if gp bound now. */
+				if ((MALI_FALSE == gpu_secure_mode_is_needed)
+				    && (mali_executor_hint_is_enabled(MALI_EXECUTOR_HINT_GP_BOUND))
+				    && (MALI_TRUE == mali_executor_tackle_gp_bound())) {
+					/*
+					* We're gp bound,
+					* don't start this right now.
+					*/
+					deactivate_idle_group = MALI_FALSE;
+					num_physical_to_process = 0;
+					break;
+				}
+
+				job = mali_scheduler_job_pp_physical_get(
+					      &sub_job);
+
+				if (MALI_FALSE == gpu_secure_mode_is_needed) {
+					MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_protected_job(job));
+				} else {
+					MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_protected_job(job));
+				}
+
+				MALI_DEBUG_ASSERT_POINTER(job);
+				MALI_DEBUG_ASSERT(sub_job <= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				/* Put job + group on list of jobs to start later on */
+
+				groups_to_start[num_jobs_to_start] = group;
+				jobs_to_start[num_jobs_to_start] = job;
+				sub_jobs_to_start[num_jobs_to_start] = sub_job;
+				num_jobs_to_start++;
+
+				/* Move group from idle to working */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_working,
+								       &group_list_working_count);
+
+				num_physical_to_process--;
+				if (0 == num_physical_to_process) {
+					/* Got all we needed */
+					break;
+				}
+			}
+		}
+	}
+
+	/* 4. Deactivate idle pp group , must put deactive here before active vitual group
+	 *    for cover case first only has physical job in normal queue but group inactive,
+	 *    so delay the job start go to active group, when group activated,
+	 *    call scheduler again, but now if we get high queue virtual job,
+	 *    we will do nothing in schedule cause executor schedule stop
+	 */
+
+	if (MALI_TRUE == mali_executor_deactivate_list_idle(deactivate_idle_group
+			&& (!mali_timeline_has_physical_pp_job()))) {
+		trigger_pm_update = MALI_TRUE;
+	}
+
+	/* 5. Activate virtual group, if needed */
+	if (EXEC_STATE_INACTIVE == virtual_group_state &&
+	    MALI_TRUE ==  mali_scheduler_job_next_is_virtual()) {
+		struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek();
+		if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job))
+		    || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) {
+			enum mali_group_state state =
+				mali_group_activate(virtual_group);
+			if (MALI_GROUP_STATE_ACTIVE == state) {
+				/* Set virtual group state to idle */
+				virtual_group_state = EXEC_STATE_IDLE;
+			} else {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 6. To power up group asap, we trigger pm update here. */
+
+	if (MALI_TRUE == trigger_pm_update) {
+		trigger_pm_update = MALI_FALSE;
+		mali_pm_update_async();
+	}
+
+	/* 7. Assign jobs to idle virtual group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == virtual_group_state) {
+		if (MALI_TRUE == mali_scheduler_job_next_is_virtual()) {
+			struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek();
+			if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job))
+			    || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) {
+				virtual_job_to_start =
+					mali_scheduler_job_pp_virtual_get();
+				virtual_group_state = EXEC_STATE_WORKING;
+			}
+		} else if (!mali_timeline_has_virtual_pp_job()) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+
+			if (mali_group_deactivate(virtual_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 8. Assign job to idle GP group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == gp_group_state && MALI_FALSE == gpu_secure_mode_is_needed) {
+		if (0 < mali_scheduler_job_gp_count()) {
+			gp_job_to_start = mali_scheduler_job_gp_get();
+			gp_group_state = EXEC_STATE_WORKING;
+		} else if (!mali_timeline_has_gp_job()) {
+			gp_group_state = EXEC_STATE_INACTIVE;
+			if (mali_group_deactivate(gp_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 9. We no longer need the schedule/queue lock */
+
+	mali_scheduler_unlock();
+
+	/* 10. start jobs */
+	if (NULL != virtual_job_to_start) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(virtual_group));
+		mali_group_start_pp_job(virtual_group,
+					virtual_job_to_start, 0);
+	}
+
+	for (i = 0; i < num_jobs_to_start; i++) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(
+					  groups_to_start[i]));
+		mali_group_start_pp_job(groups_to_start[i],
+					jobs_to_start[i],
+					sub_jobs_to_start[i]);
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(gp_group);
+
+	if (NULL != gp_job_to_start) {
+		MALI_DEBUG_ASSERT(!mali_group_gp_is_active(gp_group));
+		mali_group_start_gp_job(gp_group, gp_job_to_start);
+	}
+
+	/* 11. Trigger any pending PM updates */
+	if (MALI_TRUE == trigger_pm_update) {
+		mali_pm_update_async();
+	}
+}
+
+/* Handler for deferred schedule requests */
+static void mali_executor_wq_schedule(void *arg)
+{
+	MALI_IGNORE(arg);
+	mali_executor_lock();
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job)
+{
+	_mali_uk_gp_job_suspended_s *jobres;
+	_mali_osk_notification_t *notification;
+
+	notification = mali_gp_job_get_oom_notification(job);
+
+	/*
+	 * Remember the id we send to user space, so we have something to
+	 * verify when we get a response
+	 */
+	gp_returned_cookie = mali_gp_job_get_id(job);
+
+	jobres = (_mali_uk_gp_job_suspended_s *)notification->result_buffer;
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	jobres->cookie = gp_returned_cookie;
+
+	mali_session_send_notification(mali_gp_job_get_session(job),
+				       notification);
+}
+static struct mali_gp_job *mali_executor_complete_gp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_gp(group, success);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	gp_group_state = EXEC_STATE_IDLE;
+
+	/* This will potentially queue more GP and PP jobs */
+	mali_timeline_tracker_release(&job->tracker);
+
+	/* Signal PP job */
+	mali_gp_job_signal_pp_tracker(job, success);
+
+	return job;
+}
+
+static struct mali_pp_job *mali_executor_complete_pp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_pp_job *job;
+	u32 sub_job;
+	mali_bool job_is_done;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_pp(group, success, &sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	if (mali_group_is_virtual(group)) {
+		virtual_group_state = EXEC_STATE_IDLE;
+	} else {
+		/* Move from working to idle state */
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_working,
+						       &group_list_working_count,
+						       &group_list_idle,
+						       &group_list_idle_count);
+	}
+
+	/* It is the executor module which owns the jobs themselves by now */
+	mali_pp_job_mark_sub_job_completed(job, success);
+	job_is_done = mali_pp_job_is_complete(job);
+
+	if (job_is_done) {
+		/* This will potentially queue more GP and PP jobs */
+		mali_timeline_tracker_release(&job->tracker);
+	}
+
+	return job;
+}
+
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done)
+{
+	struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+	struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	mali_bool pp_job_is_done = MALI_TRUE;
+
+	if (NULL != gp_core) {
+		gp_job = mali_executor_complete_gp(group, success);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(pp_core);
+		MALI_IGNORE(pp_core);
+		pp_job = mali_executor_complete_pp(group, success);
+
+		pp_job_is_done = mali_pp_job_is_complete(pp_job);
+	}
+
+	if (pause_count > 0) {
+		/* Execution has been suspended */
+
+		if (!mali_executor_is_working()) {
+			/* Last job completed, wake up sleepers */
+			_mali_osk_wait_queue_wake_up(
+				executor_working_wait_queue);
+		}
+	} else if (MALI_TRUE == mali_group_disable_requested(group)) {
+		mali_executor_core_scale_in_group_complete(group);
+
+		mali_executor_schedule();
+	} else {
+		/* try to schedule new jobs */
+		mali_executor_schedule();
+	}
+
+	if (NULL != gp_job) {
+		MALI_DEBUG_ASSERT_POINTER(gp_job_done);
+		*gp_job_done = gp_job;
+	} else if (pp_job_is_done) {
+		MALI_DEBUG_ASSERT_POINTER(pp_job);
+		MALI_DEBUG_ASSERT_POINTER(pp_job_done);
+		*pp_job_done = pp_job;
+	}
+}
+
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	/*
+	 * It's a bit more complicated to change the state for the physical PP
+	 * groups since their state is determined by the list they are on.
+	 */
+#if defined(DEBUG)
+	mali_bool found = MALI_FALSE;
+	struct mali_group *group_iter;
+	struct mali_group *temp;
+	u32 old_counted = 0;
+	u32 new_counted = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(old_list);
+	MALI_DEBUG_ASSERT_POINTER(old_count);
+	MALI_DEBUG_ASSERT_POINTER(new_list);
+	MALI_DEBUG_ASSERT_POINTER(new_count);
+
+	/*
+	 * Verify that group is present on old list,
+	 * and that the count is correct
+	 */
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, old_list,
+				    struct mali_group, executor_list) {
+		old_counted++;
+		if (group == group_iter) {
+			found = MALI_TRUE;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, new_list,
+				    struct mali_group, executor_list) {
+		new_counted++;
+	}
+
+	if (MALI_FALSE == found) {
+		if (old_list == &group_list_idle) {
+			MALI_DEBUG_PRINT(1, (" old Group list is idle,"));
+		} else if (old_list == &group_list_inactive) {
+			MALI_DEBUG_PRINT(1, (" old Group list is inactive,"));
+		} else if (old_list == &group_list_working) {
+			MALI_DEBUG_PRINT(1, (" old Group list is working,"));
+		} else if (old_list == &group_list_disabled) {
+			MALI_DEBUG_PRINT(1, (" old Group list is disable,"));
+		}
+
+		if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_WORKING)) {
+			MALI_DEBUG_PRINT(1, (" group in working \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_INACTIVE)) {
+			MALI_DEBUG_PRINT(1, (" group in inactive \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_IDLE)) {
+			MALI_DEBUG_PRINT(1, (" group in idle \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)) {
+			MALI_DEBUG_PRINT(1, (" but group in disabled \n"));
+		}
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == found);
+	MALI_DEBUG_ASSERT(0 < (*old_count));
+	MALI_DEBUG_ASSERT((*old_count) == old_counted);
+	MALI_DEBUG_ASSERT((*new_count) == new_counted);
+#endif
+
+	_mali_osk_list_move(&group->executor_list, new_list);
+	(*old_count)--;
+	(*new_count)++;
+}
+
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	_mali_osk_list_add(&group->executor_list, new_list);
+	(*new_count)++;
+}
+
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (gp_group == group) {
+		if (gp_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else if (virtual_group == group || mali_group_is_in_virtual(group)) {
+		if (virtual_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else {
+		/* Physical PP group */
+		struct mali_group *group_iter;
+		struct mali_group *temp;
+		_mali_osk_list_t *list;
+
+		if (EXEC_STATE_DISABLED == state) {
+			list = &group_list_disabled;
+		} else if (EXEC_STATE_INACTIVE == state) {
+			list = &group_list_inactive;
+		} else if (EXEC_STATE_IDLE == state) {
+			list = &group_list_idle;
+		} else {
+			MALI_DEBUG_ASSERT(EXEC_STATE_WORKING == state);
+			list = &group_list_working;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, list,
+					    struct mali_group, executor_list) {
+			if (group_iter == group) {
+				return MALI_TRUE;
+			}
+		}
+	}
+
+	/* group not in correct state */
+	return MALI_FALSE;
+}
+
+static void mali_executor_group_enable_internal(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	/* Put into inactive state (== "lowest" enabled state) */
+	if (group == gp_group) {
+		MALI_DEBUG_ASSERT(EXEC_STATE_DISABLED == gp_group_state);
+		gp_group_state = EXEC_STATE_INACTIVE;
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_disabled,
+						       &group_list_disabled_count,
+						       &group_list_inactive,
+						       &group_list_inactive_count);
+
+		++num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Enabling group id %d \n", group->pp_core->core_id));
+	}
+
+	if (MALI_GROUP_STATE_ACTIVE == mali_group_activate(group)) {
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_power_is_on(group));
+
+		/* Move from inactive to idle */
+		if (group == gp_group) {
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			mali_executor_change_state_pp_physical(group,
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+
+			if (mali_executor_has_virtual_group()) {
+				if (mali_executor_physical_rejoin_virtual(group)) {
+					mali_pm_update_async();
+				}
+			}
+		}
+	} else {
+		mali_pm_update_async();
+	}
+}
+
+static void mali_executor_group_disable_internal(struct mali_group *group)
+{
+	mali_bool working;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	working = mali_executor_group_is_in_state(group, EXEC_STATE_WORKING);
+	if (MALI_TRUE == working) {
+		/** Group to be disabled once it completes current work,
+		 * when virtual group completes, also check child groups for this flag */
+		mali_group_set_disable_request(group, MALI_TRUE);
+		return;
+	}
+
+	/* Put into disabled state */
+	if (group == gp_group) {
+		/* GP group */
+		MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state);
+		gp_group_state = EXEC_STATE_DISABLED;
+	} else {
+		if (mali_group_is_in_virtual(group)) {
+			/* A child group of virtual group. move the specific group from virtual group */
+			MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state);
+
+			mali_executor_set_state_pp_physical(group,
+							    &group_list_disabled,
+							    &group_list_disabled_count);
+
+			mali_group_remove_group(virtual_group, group);
+			mali_executor_disable_empty_virtual();
+		} else {
+			mali_executor_change_group_status_disabled(group);
+		}
+
+		--num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Disabling group id %d \n", group->pp_core->core_id));
+	}
+
+	if (MALI_GROUP_STATE_INACTIVE != group->state) {
+		if (MALI_TRUE == mali_group_deactivate(group)) {
+			mali_pm_update_async();
+		}
+	}
+}
+
+static void mali_executor_notify_core_change(u32 num_cores)
+{
+	mali_bool done = MALI_FALSE;
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		return;
+	}
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (0 == num_sessions_alloc) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (NULL == notobjs) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			/* there is probably no point in trying again, system must be really low on memory and probably unusable now anyway */
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE, sizeof(_mali_uk_pp_num_cores_changed_s));
+			if (NULL != notobjs[i]) {
+				_mali_uk_pp_num_cores_changed_s *data = notobjs[i]->result_buffer;
+				data->number_of_enabled_cores = num_cores;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (NULL != notobjs[used_notification_objects]) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (NULL != notobjs[used_notification_objects]) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+static mali_bool mali_executor_core_scaling_is_done(void *data)
+{
+	u32 i;
+	u32 num_groups;
+	mali_bool ret = MALI_TRUE;
+
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			if (MALI_TRUE == group->disable_requested && NULL != mali_group_get_pp_core(group)) {
+				ret = MALI_FALSE;
+				break;
+			}
+		}
+	}
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static void mali_executor_wq_notify_core_change(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		return;
+	}
+
+	_mali_osk_wait_queue_wait_event(executor_notify_core_change_wait_queue,
+					mali_executor_core_scaling_is_done, NULL);
+
+	mali_executor_notify_core_change(num_physical_pp_cores_enabled);
+}
+
+/**
+ * Clear all disable request from the _last_ core scaling behavior.
+ */
+static void mali_executor_core_scaling_reset(void)
+{
+	u32 i;
+	u32 num_groups;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			group->disable_requested = MALI_FALSE;
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		core_scaling_delay_up_mask[i] = 0;
+	}
+}
+
+static void mali_executor_core_scale(unsigned int target_core_nr)
+{
+	int current_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int target_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int i;
+
+	MALI_DEBUG_ASSERT(0 < target_core_nr);
+	MALI_DEBUG_ASSERT(num_physical_pp_cores_total >= target_core_nr);
+
+	mali_executor_lock();
+
+	if (target_core_nr < num_physical_pp_cores_enabled) {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: disabling %d cores\n", target_core_nr, num_physical_pp_cores_enabled - target_core_nr));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: enabling %d cores\n", target_core_nr, target_core_nr - num_physical_pp_cores_enabled));
+	}
+
+	/* When a new core scaling request is comming,  we should remove the un-doing
+	 * part of the last core scaling request.  It's safe because we have only one
+	 * lock(executor lock) protection. */
+	mali_executor_core_scaling_reset();
+
+	mali_pm_get_best_power_cost_mask(num_physical_pp_cores_enabled, current_core_scaling_mask);
+	mali_pm_get_best_power_cost_mask(target_core_nr, target_core_scaling_mask);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		target_core_scaling_mask[i] = target_core_scaling_mask[i] - current_core_scaling_mask[i];
+		MALI_DEBUG_PRINT(5, ("target_core_scaling_mask[%d] = %d\n", i, target_core_scaling_mask[i]));
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 > target_core_scaling_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_disable_internal(group);
+						target_core_scaling_mask[i]++;
+						if ((0 == target_core_scaling_mask[i])) {
+							break;
+						}
+
+					}
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		/**
+		 * Target_core_scaling_mask[i] is bigger than 0,
+		 * means we need to enable some pp cores in
+		 * this domain whose domain index is i.
+		 */
+		if (0 < target_core_scaling_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			if (num_physical_pp_cores_enabled >= target_core_nr) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_enable_internal(group);
+						target_core_scaling_mask[i]--;
+
+						if ((0 == target_core_scaling_mask[i]) || num_physical_pp_cores_enabled == target_core_nr) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * Here, we may still have some pp cores not been enabled because of some
+	 * pp cores need to be disabled are still in working state.
+	 */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 < target_core_scaling_mask[i]) {
+			core_scaling_delay_up_mask[i] = target_core_scaling_mask[i];
+		}
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group)
+{
+	int num_pp_cores_disabled = 0;
+	int num_pp_cores_to_enable = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_disable_requested(group));
+
+	/* Disable child group of virtual group */
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *child;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			if (MALI_TRUE == mali_group_disable_requested(child)) {
+				mali_group_set_disable_request(child, MALI_FALSE);
+				mali_executor_group_disable_internal(child);
+				num_pp_cores_disabled++;
+			}
+		}
+		mali_group_set_disable_request(group, MALI_FALSE);
+	} else {
+		mali_executor_group_disable_internal(group);
+		mali_group_set_disable_request(group, MALI_FALSE);
+		if (NULL != mali_group_get_pp_core(group)) {
+			num_pp_cores_disabled++;
+		}
+	}
+
+	num_pp_cores_to_enable = num_pp_cores_disabled;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 < core_scaling_delay_up_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			if (0 == num_pp_cores_to_enable) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *disabled_group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(disabled_group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(disabled_group) && mali_executor_group_is_in_state(disabled_group, EXEC_STATE_DISABLED)) {
+						mali_executor_group_enable_internal(disabled_group);
+						core_scaling_delay_up_mask[i]--;
+						num_pp_cores_to_enable--;
+
+						if ((0 == core_scaling_delay_up_mask[i]) || 0 == num_pp_cores_to_enable) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	_mali_osk_wait_queue_wake_up(executor_notify_core_change_wait_queue);
+}
+
+static void mali_executor_change_group_status_disabled(struct mali_group *group)
+{
+	/* Physical PP group */
+	mali_bool idle;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	idle = mali_executor_group_is_in_state(group, EXEC_STATE_IDLE);
+	if (MALI_TRUE == idle) {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_idle,
+						       &group_list_idle_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_inactive,
+						       &group_list_inactive_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	}
+}
+
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	if (group_list_idle_count > 0) {
+		if (mali_executor_has_virtual_group()) {
+
+			/* Rejoin virtual group on Mali-450 */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_executor_physical_rejoin_virtual(
+					    group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+			}
+		} else if (deactivate_idle_group) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			/* Deactivate group on Mali-300/400 */
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_group_deactivate(group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				/* Move from idle to inactive */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_inactive,
+								       &group_list_inactive_count);
+			}
+		}
+	}
+
+	return trigger_pm_update;
+}
+
+void mali_executor_running_status_print(void)
+{
+	struct mali_group *group = NULL;
+	struct mali_group *temp = NULL;
+
+	MALI_PRINT(("GP running job: %p\n", gp_group->gp_running_job));
+	if ((gp_group->gp_core) && (gp_group->is_working)) {
+		mali_group_dump_status(gp_group);
+	}
+	MALI_PRINT(("Physical PP groups in WORKING state (count = %u):\n", group_list_working_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		MALI_PRINT(("PP running job: %p, subjob %d \n", group->pp_running_job, group->pp_running_sub_job));
+		mali_group_dump_status(group);
+	}
+	MALI_PRINT(("Physical PP groups in INACTIVE state (count = %u):\n", group_list_inactive_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+	MALI_PRINT(("Physical PP groups in IDLE state (count = %u):\n", group_list_idle_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+	MALI_PRINT(("Physical PP groups in DISABLED state (count = %u):\n", group_list_disabled_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		MALI_PRINT(("Virtual group running job: %p\n", virtual_group->pp_running_job));
+		MALI_PRINT(("Virtual group status: %d\n", virtual_group_state));
+		MALI_PRINT(("Virtual group->status: %d\n", virtual_group->state));
+		MALI_PRINT(("\tSW power: %s\n", virtual_group->power_is_on ? "On" : "Off"));
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp, &virtual_group->group_list,
+					    struct mali_group, group_list) {
+			int i = 0;
+			MALI_PRINT(("\tchild group(%s) running job: %p\n", group->pp_core->hw_core.description, group->pp_running_job));
+			MALI_PRINT(("\tchild group(%s)->status: %d\n", group->pp_core->hw_core.description, group->state));
+			MALI_PRINT(("\tchild group(%s) SW power: %s\n", group->pp_core->hw_core.description, group->power_is_on ? "On" : "Off"));
+#if MALI_STATE_TRACKING
+			if (group->pm_domain) {
+				MALI_PRINT(("\tPower domain: id %u\n", mali_pm_domain_get_id(group->pm_domain)));
+				MALI_PRINT(("\tMask:0x%04x \n", mali_pm_domain_get_mask(group->pm_domain)));
+				MALI_PRINT(("\tUse-count:%u \n", mali_pm_domain_get_use_count(group->pm_domain)));
+				MALI_PRINT(("\tCurrent power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_current_mask()) ? "On" : "Off"));
+				MALI_PRINT(("\tWanted  power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_wanted_mask()) ? "On" : "Off"));
+			}
+#endif
+
+			for (i = 0; i < 2; i++) {
+				if (NULL != group->l2_cache_core[i]) {
+					struct mali_pm_domain *domain;
+					domain = mali_l2_cache_get_pm_domain(group->l2_cache_core[i]);
+					MALI_PRINT(("\t L2(index %d) group SW power: %s\n", i, group->l2_cache_core[i]->power_is_on ? "On" : "Off"));
+#if MALI_STATE_TRACKING
+					if (domain) {
+						MALI_PRINT(("\tL2 Power domain: id %u\n", mali_pm_domain_get_id(domain)));
+						MALI_PRINT(("\tL2 Mask:0x%04x \n", mali_pm_domain_get_mask(domain)));
+						MALI_PRINT(("\tL2 Use-count:%u \n", mali_pm_domain_get_use_count(domain)));
+						MALI_PRINT(("\tL2 Current power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_current_mask()) ? "On" : "Off"));
+						MALI_PRINT(("\tL2 Wanted  power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_wanted_mask()) ? "On" : "Off"));
+					}
+#endif
+				}
+			}
+		}
+		if (EXEC_STATE_WORKING == virtual_group_state) {
+			mali_group_dump_status(virtual_group);
+		}
+	}
+}
+
+void mali_executor_status_dump(void)
+{
+	mali_executor_lock();
+	mali_scheduler_lock();
+
+	/* print schedule queue status */
+	mali_scheduler_gp_pp_job_queue_print();
+
+	mali_scheduler_unlock();
+	mali_executor_unlock();
+}
diff --git a/utgard/r6p1/common/mali_executor.h b/utgard/r6p1/common/mali_executor.h
new file mode 100755
index 0000000..927a384
--- /dev/null
+++ b/utgard/r6p1/common/mali_executor.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_EXECUTOR_H__
+#define __MALI_EXECUTOR_H__
+
+#include "mali_osk.h"
+#include "mali_scheduler_types.h"
+#include "mali_kernel_common.h"
+
+typedef enum {
+	MALI_EXECUTOR_HINT_GP_BOUND = 0
+#define MALI_EXECUTOR_HINT_MAX        1
+} mali_executor_hint;
+
+extern mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/* forward declare struct instead of using include */
+struct mali_session_data;
+struct mali_group;
+struct mali_pp_core;
+
+extern _mali_osk_spinlock_irq_t *mali_executor_lock_obj;
+
+#define MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+_mali_osk_errcode_t mali_executor_initialize(void);
+void mali_executor_terminate(void);
+
+void mali_executor_populate(void);
+void mali_executor_depopulate(void);
+
+void mali_executor_suspend(void);
+void mali_executor_resume(void);
+
+u32 mali_executor_get_num_cores_total(void);
+u32 mali_executor_get_num_cores_enabled(void);
+struct mali_pp_core *mali_executor_get_virtual_pp(void);
+struct mali_group *mali_executor_get_virtual_group(void);
+
+void mali_executor_zap_all_active(struct mali_session_data *session);
+
+/**
+ * Schedule GP and PP according to bitmask.
+ *
+ * @param mask A scheduling bitmask.
+ * @param deferred_schedule MALI_TRUE if schedule should be deferred, MALI_FALSE if not.
+ */
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule);
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group, mali_bool in_upper_half);
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups);
+void mali_executor_group_power_down(struct mali_group *groups[], u32 num_groups);
+
+void mali_executor_abort_session(struct mali_session_data *session);
+
+void mali_executor_core_scaling_enable(void);
+void mali_executor_core_scaling_disable(void);
+mali_bool mali_executor_core_scaling_is_enabled(void);
+
+void mali_executor_group_enable(struct mali_group *group);
+void mali_executor_group_disable(struct mali_group *group);
+mali_bool mali_executor_group_is_disabled(struct mali_group *group);
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override);
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size);
+#endif
+
+MALI_STATIC_INLINE void mali_executor_hint_enable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_executor_hint_disable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_executor_hint_is_enabled(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	return mali_executor_hints[hint];
+}
+
+void mali_executor_running_status_print(void);
+void mali_executor_status_dump(void);
+void mali_executor_lock(void);
+void mali_executor_unlock(void);
+#endif /* __MALI_EXECUTOR_H__ */
diff --git a/utgard/r6p1/common/mali_gp.c b/utgard/r6p1/common/mali_gp.c
new file mode 100755
index 0000000..d8d4919
--- /dev/null
+++ b/utgard/r6p1/common/mali_gp.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "regs/mali_gp_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+#include <mali_platform.h>
+
+static struct mali_gp_core *mali_global_gp_core = NULL;
+
+/* Interrupt handlers */
+static void mali_gp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group)
+{
+	struct mali_gp_core *core = NULL;
+
+	MALI_DEBUG_ASSERT(NULL == mali_global_gp_core);
+	MALI_DEBUG_PRINT(2, ("Mali GP: Creating Mali GP core: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_gp_core));
+	if (NULL != core) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALIGP2_REGISTER_ADDRESS_SPACE_SIZE)) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_gp_reset(core);
+
+			if (_MALI_OSK_ERR_OK == ret) {
+				ret = mali_group_add_gp_core(group, core);
+				if (_MALI_OSK_ERR_OK == ret) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_gp,
+								       group,
+								       mali_gp_irq_probe_trigger,
+								       mali_gp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (NULL != core->irq) {
+						MALI_DEBUG_PRINT(4, ("Mali GP: set global gp core from 0x%08X to 0x%08X\n", mali_global_gp_core, core));
+						mali_global_gp_core = core;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali GP: Failed to setup interrupt handlers for GP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_gp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali GP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for GP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_gp_delete(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+	mali_global_gp_core = NULL;
+	_mali_osk_free(core);
+}
+
+void mali_gp_stop_bus(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_gp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_SLOW; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS) & MALIGP2_REG_VAL_STATUS_BUS_STOPPED) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_SLOW == i) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to stop bus on %s\n", core->hw_core.description));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_gp_hard_reset(struct mali_gp_core *core)
+{
+	const u32 reset_wait_target_register = MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	const u32 reset_default_value = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(4, ("Mali GP: Hard reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali GP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_default_value); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+}
+
+void mali_gp_reset_async(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALI400GP_REG_VAL_IRQ_RESET_COMPLETED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALI400GP_REG_VAL_CMD_SOFT_RESET);
+
+}
+
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		rawstat = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+		if (rawstat & MALI400GP_REG_VAL_IRQ_RESET_COMPLETED) {
+			break;
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core)
+{
+	mali_gp_reset_async(core);
+	return mali_gp_reset_wait(core);
+}
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 startcmd = 0;
+	u32 *frame_registers = mali_gp_job_get_frame_registers(job);
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	/* Disable gpu secure mode. */
+	if (MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		_mali_osk_gpu_secure_mode_disable();
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	if (mali_gp_job_has_vs_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_VS;
+	}
+
+	if (mali_gp_job_has_plbu_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_PLBU;
+	}
+
+	MALI_DEBUG_ASSERT(0 != startcmd);
+
+	mali_hw_core_register_write_array_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR, frame_registers, MALIGP2_NUM_REGS_FRAME);
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali GP: Starting job (0x%08x) on core %s with command 0x%08X\n", job, core->hw_core.description, startcmd));
+
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_GP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, startcmd);
+#else
+	{
+		u32 bits = 0;
+
+		if (mali_gp_job_has_vs_job(job))
+			bits = MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+		if (mali_gp_job_has_plbu_job(job))
+			bits |= MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+
+		mali_hw_core_register_write_relaxed(&core->hw_core,
+						    MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, bits);
+	}
+#endif
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr)
+{
+	u32 irq_readout;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+
+	if (irq_readout & MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | MALIGP2_REG_VAL_IRQ_HANG));
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); /* re-enable interrupts */
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR, start_addr);
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR, end_addr);
+
+		MALI_DEBUG_PRINT(3, ("Mali GP: Resuming job\n"));
+
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+		_mali_osk_write_mem_barrier();
+	}
+	/*
+	 * else: core has been reset between PLBU_OUT_OF_MEM interrupt and this new heap response.
+	 * A timeout or a page fault on Mali-200 PP core can cause this behaviour.
+	 */
+}
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void)
+{
+	return mali_global_gp_core;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_gp_irq_probe_trigger(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT);
+	if (MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+/* ------ local helper functions below --------- */
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tGP: %s\n", core->hw_core.description);
+
+	return n;
+}
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		val0 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_gp_job_set_perf_counter_value0(job, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C0, val0);
+		_mali_osk_profiling_record_global_counters(COUNTER_VP_0_C0, val0);
+#endif
+
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		val1 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_gp_job_set_perf_counter_value1(job, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C1, val1);
+		_mali_osk_profiling_record_global_counters(COUNTER_VP_0_C1, val1);
+#endif
+	}
+}
diff --git a/utgard/r6p1/common/mali_gp.h b/utgard/r6p1/common/mali_gp.h
new file mode 100755
index 0000000..ecbe70e
--- /dev/null
+++ b/utgard/r6p1/common/mali_gp.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_H__
+#define __MALI_GP_H__
+
+#include "mali_osk.h"
+#include "mali_gp_job.h"
+#include "mali_hw_core.h"
+#include "regs/mali_gp_regs.h"
+
+struct mali_group;
+
+/**
+ * Definition of the GP core struct
+ * Used to track a GP core in the system.
+ */
+struct mali_gp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_gp_initialize(void);
+void mali_gp_terminate(void);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group);
+void mali_gp_delete(struct mali_gp_core *core);
+
+void mali_gp_stop_bus(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core);
+void mali_gp_reset_async(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core);
+void mali_gp_hard_reset(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core);
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job);
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr);
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core);
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size);
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job);
+
+MALI_STATIC_INLINE const char *mali_gp_core_description(struct mali_gp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_gp_get_interrupt_result(struct mali_gp_core *core)
+{
+	u32 stat_used = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT) &
+			MALIGP2_REG_VAL_IRQ_MASK_USED;
+
+	if (0 == stat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if ((MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST |
+		    MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST) == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	} else if (MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_VS;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_PLBU;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM & stat_used) {
+		return MALI_INTERRUPT_RESULT_OOM;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_get_rawstat(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_is_active(struct mali_gp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS);
+	return (status & MALIGP2_REG_VAL_STATUS_MASK_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_gp_mask_all_interrupts(struct mali_gp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_gp_enable_interrupts(struct mali_gp_core *core, enum mali_interrupt_result exceptions)
+{
+	/* Enable all interrupts, except those specified in exceptions */
+	u32 value;
+
+	if (MALI_INTERRUPT_RESULT_SUCCESS_VS == exceptions) {
+		/* Enable all used except VS complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+	} else {
+		MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_SUCCESS_PLBU ==
+				  exceptions);
+		/* Enable all used except PLBU complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+	}
+
+	mali_hw_core_register_write(&core->hw_core,
+				    MALIGP2_REG_ADDR_MGMT_INT_MASK,
+				    value);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_read_plbu_alloc_start_addr(struct mali_gp_core *core)
+{
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR);
+}
+
+#endif /* __MALI_GP_H__ */
diff --git a/utgard/r6p1/common/mali_gp_job.c b/utgard/r6p1/common/mali_gp_job.c
new file mode 100755
index 0000000..8dd19cb
--- /dev/null
+++ b/utgard/r6p1/common/mali_gp_job.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_defer_bind.h"
+
+static u32 gp_counter_src0 = MALI_HW_CORE_NO_COUNTER;      /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 gp_counter_src1 = MALI_HW_CORE_NO_COUNTER;           /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job);
+
+
+static int _mali_gp_add_varying_allocations(struct mali_session_data *session,
+		struct mali_gp_job *job,
+		u32 *alloc,
+		u32 num)
+{
+	int i = 0;
+	struct mali_gp_allocation_node *alloc_node;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	for (i = 0 ; i < num ; i++) {
+		MALI_DEBUG_ASSERT(alloc[i]);
+		alloc_node = _mali_osk_calloc(1, sizeof(struct mali_gp_allocation_node));
+		if (alloc_node) {
+			INIT_LIST_HEAD(&alloc_node->node);
+			/* find mali allocation structure by vaddress*/
+			mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, alloc[i], 0);
+
+			if (likely(mali_vma_node)) {
+				mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+				MALI_DEBUG_ASSERT(alloc[i] == mali_vma_node->vm_node.start);
+			} else {
+				MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,can't find allocation %d by address =0x%x, num=%d\n", i, alloc[i], num));
+				_mali_osk_free(alloc_node);
+				goto fail;
+			}
+			alloc_node->alloc = mali_alloc;
+			/* add to gp job varying alloc list*/
+			list_move(&alloc_node->node, &job->varying_alloc);
+		} else
+			goto fail;
+	}
+
+	return 0;
+fail:
+	MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,failed to alloc memory!\n"));
+	_mali_gp_del_varying_allocations(job);
+	return -1;
+}
+
+
+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job)
+{
+	struct mali_gp_allocation_node *alloc_node, *tmp_node;
+
+	list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) {
+		list_del(&alloc_node->node);
+		kfree(alloc_node);
+	}
+	INIT_LIST_HEAD(&job->varying_alloc);
+}
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker)
+{
+	struct mali_gp_job *job;
+	u32 perf_counter_flag;
+	u32 __user *memory_list = NULL;
+	struct mali_gp_allocation_node *alloc_node, *tmp_node;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_gp_job));
+	if (NULL != job) {
+		job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_FINISHED, sizeof(_mali_uk_gp_job_finished_s));
+		if (NULL == job->finished_notification) {
+			goto fail3;
+		}
+
+		job->oom_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_STALLED, sizeof(_mali_uk_gp_job_suspended_s));
+		if (NULL == job->oom_notification) {
+			goto fail2;
+		}
+
+		if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_gp_start_job_s))) {
+			goto fail1;
+		}
+
+		perf_counter_flag = mali_gp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			mali_gp_job_set_perf_counter_src0(job, mali_gp_job_get_gp_counter_src0());
+			mali_gp_job_set_perf_counter_src1(job, mali_gp_job_get_gp_counter_src1());
+		}
+
+		_mali_osk_list_init(&job->list);
+		job->session = session;
+		job->id = id;
+		job->heap_current_addr = job->uargs.frame_registers[4];
+		job->perf_counter_value0 = 0;
+		job->perf_counter_value1 = 0;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+
+		INIT_LIST_HEAD(&job->varying_alloc);
+		INIT_LIST_HEAD(&job->vary_todo);
+		job->dmem = NULL;
+
+		if (job->uargs.deferred_mem_num > session->allocation_mgr.mali_allocation_num) {
+			MALI_PRINT_ERROR(("Mali GP job: The number of  varying buffer to defer bind  is invalid !\n"));
+			goto fail1;
+		}
+
+		/* add varying allocation list*/
+		if (job->uargs.deferred_mem_num > 0) {
+			/* copy varying list from user space*/
+			job->varying_list = _mali_osk_calloc(1, sizeof(u32) * job->uargs.deferred_mem_num);
+			if (!job->varying_list) {
+				MALI_PRINT_ERROR(("Mali GP job: allocate varying_list failed varying_alloc_num = %d !\n", job->uargs.deferred_mem_num));
+				goto fail1;
+			}
+
+			memory_list = (u32 __user *)(uintptr_t)uargs->deferred_mem_list;
+
+			if (0 != _mali_osk_copy_from_user(job->varying_list, memory_list, sizeof(u32) * job->uargs.deferred_mem_num)) {
+				MALI_PRINT_ERROR(("Mali GP job: Failed to copy varying list from user space!\n"));
+				goto fail;
+			}
+
+			if (unlikely(_mali_gp_add_varying_allocations(session, job, job->varying_list,
+					job->uargs.deferred_mem_num))) {
+				MALI_PRINT_ERROR(("Mali GP job: _mali_gp_add_varying_allocations failed!\n"));
+				goto fail;
+			}
+
+			/* do preparetion for each allocation */
+			list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) {
+				if (unlikely(_MALI_OSK_ERR_OK != mali_mem_defer_bind_allocation_prepare(alloc_node->alloc, &job->vary_todo, &job->required_varying_memsize))) {
+					MALI_PRINT_ERROR(("Mali GP job: mali_mem_defer_bind_allocation_prepare failed!\n"));
+					goto fail;
+				}
+			}
+
+			_mali_gp_del_varying_allocations(job);
+
+			/* bind varying here, to avoid memory latency issue. */
+			{
+				struct mali_defer_mem_block dmem_block;
+
+				INIT_LIST_HEAD(&dmem_block.free_pages);
+				atomic_set(&dmem_block.num_free_pages, 0);
+
+				if (mali_mem_prepare_mem_for_job(job, &dmem_block)) {
+					MALI_PRINT_ERROR(("Mali GP job: mali_mem_prepare_mem_for_job failed!\n"));
+					goto fail;
+				}
+				if (_MALI_OSK_ERR_OK != mali_mem_defer_bind(job, &dmem_block)) {
+					MALI_PRINT_ERROR(("gp job create, mali_mem_defer_bind failed! GP %x fail!", job));
+					goto fail;
+				}
+			}
+
+			if (uargs->varying_memsize > MALI_UK_BIG_VARYING_SIZE) {
+				job->big_job = 1;
+			}
+		}
+		job->pp_tracker = pp_tracker;
+		if (NULL != job->pp_tracker) {
+			/* Take a reference on PP job's tracker that will be released when the GP
+			   job is done. */
+			mali_timeline_system_tracker_get(session->timeline_system, pp_tracker);
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_GP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		return job;
+	} else {
+		MALI_PRINT_ERROR(("Mali GP job: _mali_osk_calloc failed!\n"));
+		return NULL;
+	}
+
+
+fail:
+	_mali_osk_free(job->varying_list);
+	/* Handle allocate fail here, free all varying node */
+	{
+		struct mali_backend_bind_list *bkn, *bkn_tmp;
+		list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) {
+			list_del(&bkn->node);
+			_mali_osk_free(bkn);
+		}
+	}
+fail1:
+	_mali_osk_notification_delete(job->oom_notification);
+fail2:
+	_mali_osk_notification_delete(job->finished_notification);
+fail3:
+	_mali_osk_free(job);
+	return NULL;
+}
+
+void mali_gp_job_delete(struct mali_gp_job *job)
+{
+	struct mali_backend_bind_list *bkn, *bkn_tmp;
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(NULL == job->pp_tracker);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	_mali_osk_free(job->varying_list);
+
+	/* Handle allocate fail here, free all varying node */
+	list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) {
+		list_del(&bkn->node);
+		_mali_osk_free(bkn);
+	}
+
+	mali_mem_defer_dmem_free(job);
+
+	/* de-allocate the pre-allocated oom notifications */
+	if (NULL != job->oom_notification) {
+		_mali_osk_notification_delete(job->oom_notification);
+		job->oom_notification = NULL;
+	}
+	if (NULL != job->finished_notification) {
+		_mali_osk_notification_delete(job->finished_notification);
+		job->finished_notification = NULL;
+	}
+
+	_mali_osk_free(job);
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_gp_job *iter;
+	struct mali_gp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_gp_job, list) {
+
+		/* A span is used to handle job ID wrapping. */
+		bool job_is_after = (mali_gp_job_get_id(job) -
+				     mali_gp_job_get_id(iter)) <
+				    MALI_SCHEDULER_JOB_ID_SPAN;
+
+		if (job_is_after) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+u32 mali_gp_job_get_gp_counter_src0(void)
+{
+	return gp_counter_src0;
+}
+
+void mali_gp_job_set_gp_counter_src0(u32 counter)
+{
+	gp_counter_src0 = counter;
+}
+
+u32 mali_gp_job_get_gp_counter_src1(void)
+{
+	return gp_counter_src1;
+}
+
+void mali_gp_job_set_gp_counter_src1(u32 counter)
+{
+	gp_counter_src1 = counter;
+}
+
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (NULL != job->pp_tracker) {
+		schedule_mask |= mali_timeline_system_tracker_put(job->session->timeline_system, job->pp_tracker, MALI_FALSE == success);
+		job->pp_tracker = NULL;
+	}
+
+	return schedule_mask;
+}
diff --git a/utgard/r6p1/common/mali_gp_job.h b/utgard/r6p1/common/mali_gp_job.h
new file mode 100755
index 0000000..fbf8ee8
--- /dev/null
+++ b/utgard/r6p1/common/mali_gp_job.h
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_JOB_H__
+#define __MALI_GP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_timeline.h"
+#include "mali_scheduler_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#include "mali_timeline.h"
+
+struct mali_defer_mem;
+/**
+ * This structure represents a GP job
+ *
+ * The GP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the GP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_gp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_gp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	struct mali_timeline_tracker *pp_tracker;          /**< Pointer to Timeline tracker for PP job that depends on this job. */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+
+	/*
+	 * These members are used by the executor and/or group,
+	 * protected by executor lock
+	 */
+	_mali_osk_notification_t *oom_notification;        /**< Notification sent back to userspace on OOM */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 heap_current_addr;                             /**< Holds the current HEAP address when the job has completed */
+	u32 perf_counter_value0;                           /**< Value of performance counter 0 (to be returned to user space) */
+	u32 perf_counter_value1;                           /**< Value of performance counter 1 (to be returned to user space) */
+	struct mali_defer_mem *dmem;                                          /** < used for defer bind to store dmem info */
+	struct list_head varying_alloc;                    /**< hold the list of varying allocations */
+	u32 bind_flag;                                     /** < flag for deferbind*/
+	u32 *varying_list;                                 /**< varying memory list need to to defer bind*/
+	struct list_head vary_todo;                        /**< list of backend list need to do defer bind*/
+	u32 required_varying_memsize;                      /** < size of varying memory to reallocate*/
+	u32 big_job;                                       /** < if the gp job have large varying output and may take long time*/
+};
+
+#define MALI_DEFER_BIND_MEMORY_PREPARED (0x1 << 0)
+#define MALI_DEFER_BIND_MEMORY_BINDED (0x1 << 2)
+
+struct mali_gp_allocation_node {
+	struct list_head node;
+	mali_mem_allocation *alloc;
+};
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker);
+void mali_gp_job_delete(struct mali_gp_job *job);
+
+u32 mali_gp_job_get_gp_counter_src0(void);
+void mali_gp_job_set_gp_counter_src0(u32 counter);
+u32 mali_gp_job_get_gp_counter_src1(void);
+void mali_gp_job_set_gp_counter_src1(u32 counter);
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_cache_order(struct mali_gp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_cache_order(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_gp_job_get_user_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_frame_builder_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_flush_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_pid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_tid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_frame_registers(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_gp_job_get_session(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_vs_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[0] != job->uargs.frame_registers[1]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_plbu_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[2] != job->uargs.frame_registers[3]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_current_heap_addr(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->heap_current_addr;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_current_heap_addr(struct mali_gp_job *job, u32 heap_addr)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->heap_current_addr = heap_addr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_flag(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src1;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src0(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src0 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src1(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src1 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value0(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0 = value;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value1(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1 = value;
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_gp_job_list_move(struct mali_gp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_gp_job_list_remove(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_gp_job_get_finished_notification(struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *mali_gp_job_get_oom_notification(
+	struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job->oom_notification);
+
+	notification = job->oom_notification;
+	job->oom_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_oom_notification(
+	struct mali_gp_job *job,
+	_mali_osk_notification_t *notification)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(NULL == job->oom_notification);
+	job->oom_notification = notification;
+}
+
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_gp_job_get_tracker(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_timeline_point_ptr(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+/**
+ * Release reference on tracker for PP job that depends on this GP job.
+ *
+ * @note If GP job has a reference on tracker, this function MUST be called before the GP job is
+ * deleted.
+ *
+ * @param job GP job that is done.
+ * @param success MALI_TRUE if job completed successfully, MALI_FALSE if not.
+ * @return A scheduling bitmask indicating whether scheduling needs to be done.
+ */
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success);
+
+#endif /* __MALI_GP_JOB_H__ */
diff --git a/utgard/r6p1/common/mali_group.c b/utgard/r6p1/common/mali_group.c
new file mode 100755
index 0000000..df03519
--- /dev/null
+++ b/utgard/r6p1/common/mali_group.c
@@ -0,0 +1,1908 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+#include "mali_kernel_common.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_mmu.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_scheduler.h"
+#include "mali_osk_profiling.h"
+#include "mali_pm_domain.h"
+#include "mali_pm.h"
+#include "mali_executor.h"
+#include <mali_platform.h>
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+
+#define MALI_MAX_NUM_DOMAIN_REFS (MALI_MAX_NUMBER_OF_GROUPS * 2)
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num);
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+static struct mali_group *mali_global_groups[MALI_MAX_NUMBER_OF_GROUPS] = { NULL, };
+static u32 mali_global_num_groups = 0;
+
+/* SW timer for job execution */
+int mali_max_job_runtime = MALI_MAX_JOB_RUNTIME_DEFAULT;
+
+/* local helper functions */
+static void mali_group_bottom_half_mmu(void *data);
+static void mali_group_bottom_half_gp(void *data);
+static void mali_group_bottom_half_pp(void *data);
+static void mali_group_timeout(void *data);
+static void mali_group_reset_pp(struct mali_group *group);
+static void mali_group_reset_mmu(struct mali_group *group);
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session);
+static void mali_group_recovery_reset(struct mali_group *group);
+
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+        struct mali_dlbu_core *dlbu,
+        struct mali_bcast_unit *bcast,
+        u32 domain_index)
+{
+    struct mali_group *group = NULL;
+
+    if (mali_global_num_groups >= MALI_MAX_NUMBER_OF_GROUPS) {
+        MALI_PRINT_ERROR(("Mali group: Too many group objects created\n"));
+        return NULL;
+    }
+
+    group = _mali_osk_calloc(1, sizeof(struct mali_group));
+    if (NULL != group) {
+        group->timeout_timer = _mali_osk_timer_init();
+        if (NULL != group->timeout_timer) {
+            _mali_osk_timer_setcallback(group->timeout_timer, mali_group_timeout, (void *)group);
+
+            group->l2_cache_core[0] = core;
+            _mali_osk_list_init(&group->group_list);
+            _mali_osk_list_init(&group->executor_list);
+            _mali_osk_list_init(&group->pm_domain_list);
+            group->bcast_core = bcast;
+            group->dlbu_core = dlbu;
+
+            /* register this object as a part of the correct power domain */
+            if ((NULL != core) || (NULL != dlbu) || (NULL != bcast))
+                group->pm_domain = mali_pm_register_group(domain_index, group);
+
+            mali_global_groups[mali_global_num_groups] = group;
+            mali_global_num_groups++;
+
+            return group;
+        }
+        _mali_osk_free(group);
+    }
+
+    return NULL;
+}
+
+void mali_group_delete(struct mali_group *group)
+{
+    u32 i;
+
+    MALI_DEBUG_PRINT(4, ("Deleting group %s\n",
+                mali_group_core_description(group)));
+
+    MALI_DEBUG_ASSERT(NULL == group->parent_group);
+    MALI_DEBUG_ASSERT((MALI_GROUP_STATE_INACTIVE == group->state) || ((MALI_GROUP_STATE_ACTIVATION_PENDING == group->state)));
+
+    /* Delete the resources that this group owns */
+    if (NULL != group->gp_core) {
+        mali_gp_delete(group->gp_core);
+    }
+
+    if (NULL != group->pp_core) {
+        mali_pp_delete(group->pp_core);
+    }
+
+    if (NULL != group->mmu) {
+        mali_mmu_delete(group->mmu);
+    }
+
+    if (mali_group_is_virtual(group)) {
+        /* Remove all groups from virtual group */
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            child->parent_group = NULL;
+            mali_group_delete(child);
+        }
+
+        mali_dlbu_delete(group->dlbu_core);
+
+        if (NULL != group->bcast_core) {
+            mali_bcast_unit_delete(group->bcast_core);
+        }
+    }
+
+    for (i = 0; i < mali_global_num_groups; i++) {
+        if (mali_global_groups[i] == group) {
+            mali_global_groups[i] = NULL;
+            mali_global_num_groups--;
+
+            if (i != mali_global_num_groups) {
+                /* We removed a group from the middle of the array -- move the last
+                 * group to the current position to close the gap */
+                mali_global_groups[i] = mali_global_groups[mali_global_num_groups];
+                mali_global_groups[mali_global_num_groups] = NULL;
+            }
+
+            break;
+        }
+    }
+
+    if (NULL != group->timeout_timer) {
+        _mali_osk_timer_del(group->timeout_timer);
+        _mali_osk_timer_term(group->timeout_timer);
+    }
+
+    if (NULL != group->bottom_half_work_mmu) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+    }
+
+    if (NULL != group->bottom_half_work_gp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_gp);
+    }
+
+    if (NULL != group->bottom_half_work_pp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_pp);
+    }
+
+    _mali_osk_free(group);
+}
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group, struct mali_mmu_core *mmu_core)
+{
+    /* This group object now owns the MMU core object */
+    group->mmu = mmu_core;
+    group->bottom_half_work_mmu = _mali_osk_wq_create_work(mali_group_bottom_half_mmu, group);
+    if (NULL == group->bottom_half_work_mmu) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_mmu_core(struct mali_group *group)
+{
+    /* This group object no longer owns the MMU core object */
+    group->mmu = NULL;
+    if (NULL != group->bottom_half_work_mmu) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+    }
+}
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group, struct mali_gp_core *gp_core)
+{
+    /* This group object now owns the GP core object */
+    group->gp_core = gp_core;
+    group->bottom_half_work_gp = _mali_osk_wq_create_work(mali_group_bottom_half_gp, group);
+    if (NULL == group->bottom_half_work_gp) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_gp_core(struct mali_group *group)
+{
+    /* This group object no longer owns the GP core object */
+    group->gp_core = NULL;
+    if (NULL != group->bottom_half_work_gp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_gp);
+    }
+}
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group, struct mali_pp_core *pp_core)
+{
+    /* This group object now owns the PP core object */
+    group->pp_core = pp_core;
+    group->bottom_half_work_pp = _mali_osk_wq_create_work(mali_group_bottom_half_pp, group);
+    if (NULL == group->bottom_half_work_pp) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_pp_core(struct mali_group *group)
+{
+    /* This group object no longer owns the PP core object */
+    group->pp_core = NULL;
+    if (NULL != group->bottom_half_work_pp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_pp);
+    }
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(4, ("Group: Activating group %s\n",
+                mali_group_core_description(group)));
+
+    if (MALI_GROUP_STATE_INACTIVE == group->state) {
+        /* Group is inactive, get PM refs in order to power up */
+
+        /*
+         * We'll take a maximum of 2 power domain references pr group,
+         * one for the group itself, and one for it's L2 cache.
+         */
+        struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+        struct mali_group *groups[MALI_MAX_NUM_DOMAIN_REFS];
+        u32 num_domains = 0;
+        mali_bool all_groups_on;
+
+        /* Deal with child groups first */
+        if (mali_group_is_virtual(group)) {
+            /*
+             * The virtual group might have 0, 1 or 2 L2s in
+             * its l2_cache_core array, but we ignore these and
+             * let the child groups take the needed L2 cache ref
+             * on behalf of the virtual group.
+             * In other words; The L2 refs are taken in pair with
+             * the physical group which the L2 is attached to.
+             */
+            struct mali_group *child;
+            struct mali_group *temp;
+
+            /*
+             * Child group is inactive, get PM
+             * refs in order to power up.
+             */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp,
+                    &group->group_list,
+                    struct mali_group, group_list) {
+                MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE
+                        == child->state);
+
+                child->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+                MALI_DEBUG_ASSERT_POINTER(
+                        child->pm_domain);
+                domains[num_domains] = child->pm_domain;
+                groups[num_domains] = child;
+                num_domains++;
+
+                /*
+                 * Take L2 domain ref for child group.
+                 */
+                MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS
+                        > num_domains);
+                domains[num_domains] = mali_l2_cache_get_pm_domain(
+                        child->l2_cache_core[0]);
+                groups[num_domains] = NULL;
+                MALI_DEBUG_ASSERT(NULL ==
+                        child->l2_cache_core[1]);
+                num_domains++;
+            }
+        } else {
+            /* Take L2 domain ref for physical groups. */
+            MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                    num_domains);
+
+            domains[num_domains] = mali_l2_cache_get_pm_domain(
+                    group->l2_cache_core[0]);
+            groups[num_domains] = NULL;
+            MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]);
+            num_domains++;
+        }
+
+        /* Do the group itself last (it's dependencies first) */
+
+        group->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+        MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+        domains[num_domains] = group->pm_domain;
+        groups[num_domains] = group;
+        num_domains++;
+
+        all_groups_on = mali_pm_get_domain_refs(domains, groups,
+                num_domains);
+
+        /*
+         * Complete activation for group, include
+         * virtual group or physical group.
+         */
+        if (MALI_TRUE == all_groups_on) {
+
+            mali_group_set_active(group);
+        }
+    } else if (MALI_GROUP_STATE_ACTIVE == group->state) {
+        /* Already active */
+        MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+    } else {
+        /*
+         * Activation already pending, group->power_is_on could
+         * be both true or false. We need to wait for power up
+         * notification anyway.
+         */
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING
+                == group->state);
+    }
+
+    MALI_DEBUG_PRINT(4, ("Group: group %s activation result: %s\n",
+                mali_group_core_description(group),
+                MALI_GROUP_STATE_ACTIVE == group->state ?
+                "ACTIVE" : "PENDING"));
+
+    return group->state;
+}
+
+mali_bool mali_group_set_active(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING == group->state);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+
+    MALI_DEBUG_PRINT(4, ("Group: Activation completed for %s\n",
+                mali_group_core_description(group)));
+
+    if (mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+                struct mali_group, group_list) {
+            if (MALI_TRUE != child->power_is_on) {
+                return MALI_FALSE;
+            }
+
+            child->state = MALI_GROUP_STATE_ACTIVE;
+        }
+
+        mali_group_reset(group);
+    }
+
+    /* Go to ACTIVE state */
+    group->state = MALI_GROUP_STATE_ACTIVE;
+
+    return MALI_TRUE;
+}
+
+mali_bool mali_group_deactivate(struct mali_group *group)
+{
+    struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+    u32 num_domains = 0;
+    mali_bool power_down = MALI_FALSE;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE != group->state);
+
+    MALI_DEBUG_PRINT(3, ("Group: Deactivating group %s\n",
+                mali_group_core_description(group)));
+
+    group->state = MALI_GROUP_STATE_INACTIVE;
+
+    MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+    domains[num_domains] = group->pm_domain;
+    num_domains++;
+
+    if (mali_group_is_virtual(group)) {
+        /* Release refs for all child groups */
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp,
+                &group->group_list,
+                struct mali_group, group_list) {
+            child->state = MALI_GROUP_STATE_INACTIVE;
+
+            MALI_DEBUG_ASSERT_POINTER(child->pm_domain);
+            domains[num_domains] = child->pm_domain;
+            num_domains++;
+
+            /* Release L2 cache domain for child groups */
+            MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                    num_domains);
+            domains[num_domains] = mali_l2_cache_get_pm_domain(
+                    child->l2_cache_core[0]);
+            MALI_DEBUG_ASSERT(NULL == child->l2_cache_core[1]);
+            num_domains++;
+        }
+
+        /*
+         * Must do mali_group_power_down() steps right here for
+         * virtual group, because virtual group itself is likely to
+         * stay powered on, however child groups are now very likely
+         * to be powered off (and thus lose their state).
+         */
+
+        mali_group_clear_session(group);
+        /*
+         * Disable the broadcast unit (clear it's mask).
+         * This is needed in case the GPU isn't actually
+         * powered down at this point and groups are
+         * removed from an inactive virtual group.
+         * If not, then the broadcast unit will intercept
+         * their interrupts!
+         */
+        mali_bcast_disable(group->bcast_core);
+    } else {
+        /* Release L2 cache domain for physical groups */
+        MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                num_domains);
+        domains[num_domains] = mali_l2_cache_get_pm_domain(
+                group->l2_cache_core[0]);
+        MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]);
+        num_domains++;
+    }
+
+    power_down = mali_pm_put_domain_refs(domains, num_domains);
+
+    return power_down;
+}
+
+void mali_group_power_up(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Power up for %s\n",
+                mali_group_core_description(group)));
+
+    group->power_is_on = MALI_TRUE;
+
+    if (MALI_FALSE == mali_group_is_virtual(group)
+            && MALI_FALSE == mali_group_is_in_virtual(group)) {
+        mali_group_reset(group);
+    }
+
+    /*
+     * When we just acquire only one physical group form virt group,
+     * we should remove the bcast&dlbu mask from virt group and
+     * reset bcast and dlbu core, although part of pp cores in virt
+     * group maybe not be powered on.
+     */
+    if (MALI_TRUE == mali_group_is_virtual(group)) {
+        mali_bcast_reset(group->bcast_core);
+        mali_dlbu_update_mask(group->dlbu_core);
+    }
+}
+
+void mali_group_power_down(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Power down for %s\n",
+                mali_group_core_description(group)));
+
+    group->power_is_on = MALI_FALSE;
+
+    if (mali_group_is_virtual(group)) {
+        /*
+         * What we do for physical jobs in this function should
+         * already have been done in mali_group_deactivate()
+         * for virtual group.
+         */
+        MALI_DEBUG_ASSERT(NULL == group->session);
+    } else {
+        mali_group_clear_session(group);
+    }
+}
+
+MALI_DEBUG_CODE(static void mali_group_print_virtual(struct mali_group *vgroup)
+        {
+        u32 i;
+        struct mali_group *group;
+        struct mali_group *temp;
+
+        MALI_DEBUG_PRINT(4, ("Virtual group %s (%p)\n",
+                mali_group_core_description(vgroup),
+                vgroup));
+        MALI_DEBUG_PRINT(4, ("l2_cache_core[0] = %p, ref = %d\n", vgroup->l2_cache_core[0], vgroup->l2_cache_core_ref_count[0]));
+        MALI_DEBUG_PRINT(4, ("l2_cache_core[1] = %p, ref = %d\n", vgroup->l2_cache_core[1], vgroup->l2_cache_core_ref_count[1]));
+
+        i = 0;
+        _MALI_OSK_LIST_FOREACHENTRY(group, temp, &vgroup->group_list, struct mali_group, group_list) {
+        MALI_DEBUG_PRINT(4, ("[%d] %s (%p), l2_cache_core[0] = %p\n",
+                i, mali_group_core_description(group),
+                group, group->l2_cache_core[0]));
+        i++;
+        }
+        })
+
+static void mali_group_dump_core_status(struct mali_group *group)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(NULL != group->gp_core || (NULL != group->pp_core && !mali_group_is_virtual(group)));
+
+	if (NULL != group->gp_core) {
+		MALI_PRINT(("Dump Group %s\n", group->gp_core->hw_core.description));
+
+		for (i = 0; i < 0xA8; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->gp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 12)));
+		}
+
+
+	} else {
+		MALI_PRINT(("Dump Group %s\n", group->pp_core->hw_core.description));
+
+		for (i = 0; i < 0x5c; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 12)));
+		}
+
+		/* Ignore some minor registers */
+		for (i = 0x1000; i < 0x1068; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 12)));
+		}
+	}
+
+	MALI_PRINT(("Dump Group MMU\n"));
+	for (i = 0; i < 0x24; i += 0x10) {
+		MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->mmu->hw_core, i),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 4),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 8),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 12)));
+	}
+}
+
+
+/**
+ * @Dump group status
+ */
+void mali_group_dump_status(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *group_c;
+		struct mali_group *temp;
+		_MALI_OSK_LIST_FOREACHENTRY(group_c, temp, &group->group_list, struct mali_group, group_list) {
+			mali_group_dump_core_status(group_c);
+		}
+	} else {
+		mali_group_dump_core_status(group);
+	}
+}
+
+/**
+ * @brief Add child group to virtual group parent
+ */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child)
+{
+    mali_bool found;
+    u32 i;
+
+    MALI_DEBUG_PRINT(3, ("Adding group %s to virtual group %s\n",
+                mali_group_core_description(child),
+                mali_group_core_description(parent)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+    MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+    MALI_DEBUG_ASSERT(NULL == child->parent_group);
+
+    _mali_osk_list_addtail(&child->group_list, &parent->group_list);
+
+    child->parent_group = parent;
+
+    MALI_DEBUG_ASSERT_POINTER(child->l2_cache_core[0]);
+
+    MALI_DEBUG_PRINT(4, ("parent->l2_cache_core: [0] = %p, [1] = %p\n", parent->l2_cache_core[0], parent->l2_cache_core[1]));
+    MALI_DEBUG_PRINT(4, ("child->l2_cache_core: [0] = %p, [1] = %p\n", child->l2_cache_core[0], child->l2_cache_core[1]));
+
+    /* Keep track of the L2 cache cores of child groups */
+    found = MALI_FALSE;
+    for (i = 0; i < 2; i++) {
+        if (parent->l2_cache_core[i] == child->l2_cache_core[0]) {
+            MALI_DEBUG_ASSERT(parent->l2_cache_core_ref_count[i] > 0);
+            parent->l2_cache_core_ref_count[i]++;
+            found = MALI_TRUE;
+        }
+    }
+
+    if (!found) {
+        /* First time we see this L2 cache, add it to our list */
+        i = (NULL == parent->l2_cache_core[0]) ? 0 : 1;
+
+        MALI_DEBUG_PRINT(4, ("First time we see l2_cache %p. Adding to [%d] = %p\n", child->l2_cache_core[0], i, parent->l2_cache_core[i]));
+
+        MALI_DEBUG_ASSERT(NULL == parent->l2_cache_core[i]);
+
+        parent->l2_cache_core[i] = child->l2_cache_core[0];
+        parent->l2_cache_core_ref_count[i]++;
+    }
+
+    /* Update Broadcast Unit and DLBU */
+    mali_bcast_add_group(parent->bcast_core, child);
+    mali_dlbu_add_group(parent->dlbu_core, child);
+
+    if (MALI_TRUE == parent->power_is_on) {
+        mali_bcast_reset(parent->bcast_core);
+        mali_dlbu_update_mask(parent->dlbu_core);
+    }
+
+    if (MALI_TRUE == child->power_is_on) {
+        if (NULL == parent->session) {
+            if (NULL != child->session) {
+                /*
+                 * Parent has no session, so clear
+                 * child session as well.
+                 */
+                mali_mmu_activate_empty_page_directory(child->mmu);
+            }
+        } else {
+            if (parent->session == child->session) {
+                /* We already have same session as parent,
+                 * so a simple zap should be enough.
+                 */
+                mali_mmu_zap_tlb(child->mmu);
+            } else {
+                /*
+                 * Parent has a different session, so we must
+                 * switch to that sessions page table
+                 */
+                mali_mmu_activate_page_directory(child->mmu, mali_session_get_page_directory(parent->session));
+            }
+
+            /* It is the parent which keeps the session from now on */
+            child->session = NULL;
+        }
+    } else {
+        /* should have been cleared when child was powered down */
+        MALI_DEBUG_ASSERT(NULL == child->session);
+    }
+
+    /* Start job on child when parent is active */
+    if (NULL != parent->pp_running_job) {
+        struct mali_pp_job *job = parent->pp_running_job;
+
+        MALI_DEBUG_PRINT(3, ("Group %x joining running job %d on virtual group %x\n",
+                    child, mali_pp_job_get_id(job), parent));
+
+        /* Only allowed to add active child to an active parent */
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == parent->state);
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == child->state);
+
+        mali_pp_job_start(child->pp_core, job, mali_pp_core_get_id(child->pp_core), MALI_TRUE);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_pp_core_description(group->pp_core),
+                sched_clock(), mali_pp_job_get_tid(job),
+                0, mali_pp_job_get_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+    }
+
+    MALI_DEBUG_CODE(mali_group_print_virtual(parent);)
+}
+
+/**
+ * @brief Remove child group from virtual group parent
+ */
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child)
+{
+    u32 i;
+
+    MALI_DEBUG_PRINT(3, ("Removing group %s from virtual group %s\n",
+                mali_group_core_description(child),
+                mali_group_core_description(parent)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+    MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+    MALI_DEBUG_ASSERT(parent == child->parent_group);
+
+    /* Update Broadcast Unit and DLBU */
+    mali_bcast_remove_group(parent->bcast_core, child);
+    mali_dlbu_remove_group(parent->dlbu_core, child);
+
+    if (MALI_TRUE == parent->power_is_on) {
+        mali_bcast_reset(parent->bcast_core);
+        mali_dlbu_update_mask(parent->dlbu_core);
+    }
+
+    child->session = parent->session;
+    child->parent_group = NULL;
+
+    _mali_osk_list_delinit(&child->group_list);
+    if (_mali_osk_list_empty(&parent->group_list)) {
+        parent->session = NULL;
+    }
+
+    /* Keep track of the L2 cache cores of child groups */
+    i = (child->l2_cache_core[0] == parent->l2_cache_core[0]) ? 0 : 1;
+
+    MALI_DEBUG_ASSERT(child->l2_cache_core[0] == parent->l2_cache_core[i]);
+
+    parent->l2_cache_core_ref_count[i]--;
+    if (parent->l2_cache_core_ref_count[i] == 0) {
+        parent->l2_cache_core[i] = NULL;
+    }
+
+    MALI_DEBUG_CODE(mali_group_print_virtual(parent));
+}
+
+struct mali_group *mali_group_acquire_group(struct mali_group *parent)
+{
+    struct mali_group *child = NULL;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+
+    if (!_mali_osk_list_empty(&parent->group_list)) {
+        child = _MALI_OSK_LIST_ENTRY(parent->group_list.prev, struct mali_group, group_list);
+        mali_group_remove_group(parent, child);
+    }
+
+    if (NULL != child) {
+        if (MALI_GROUP_STATE_ACTIVE != parent->state
+                && MALI_TRUE == child->power_is_on) {
+            mali_group_reset(child);
+        }
+    }
+
+    return child;
+}
+
+void mali_group_reset(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(NULL == group->gp_running_job);
+    MALI_DEBUG_ASSERT(NULL == group->pp_running_job);
+    MALI_DEBUG_ASSERT(NULL == group->session);
+
+    MALI_DEBUG_PRINT(3, ("Group: reset of %s\n",
+                mali_group_core_description(group)));
+
+    if (NULL != group->dlbu_core) {
+        mali_dlbu_reset(group->dlbu_core);
+    }
+
+    if (NULL != group->bcast_core) {
+        mali_bcast_reset(group->bcast_core);
+    }
+
+    MALI_DEBUG_ASSERT(NULL != group->mmu);
+    mali_group_reset_mmu(group);
+
+    if (NULL != group->gp_core) {
+        MALI_DEBUG_ASSERT(NULL == group->pp_core);
+        mali_gp_reset(group->gp_core);
+    } else {
+        MALI_DEBUG_ASSERT(NULL != group->pp_core);
+        mali_group_reset_pp(group);
+    }
+}
+
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job)
+{
+    struct mali_session_data *session;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Starting GP job 0x%08X on group %s\n",
+                job,
+                mali_group_core_description(group)));
+
+    session = mali_gp_job_get_session(job);
+
+    MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+    mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_gp_job_get_cache_order(job));
+
+    mali_group_activate_page_directory(group, session);
+
+    mali_gp_job_start(group->gp_core, job);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0) |
+            MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+            mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job), 0, 0, 0);
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+            mali_gp_job_get_pid(job), mali_gp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+    trace_mali_core_active(mali_gp_job_get_pid(job), 1 /* active */, 1 /* GP */,  0 /* core */,
+            mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+    if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+            (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+        mali_group_report_l2_cache_counters_per_core(group, 0);
+    }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+    trace_gpu_sched_switch(mali_gp_core_description(group->gp_core),
+            sched_clock(), mali_gp_job_get_tid(job),
+            0, mali_gp_job_get_id(job));
+#endif
+
+    group->gp_running_job = job;
+    group->is_working = MALI_TRUE;
+
+    /* Setup SW timer and record start time */
+    group->start_time = _mali_osk_time_tickcount();
+    _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+    MALI_DEBUG_PRINT(4, ("Group: Started GP job 0x%08X on group %s at %u\n",
+                job,
+                mali_group_core_description(group),
+                group->start_time));
+}
+
+/* Used to set all the registers except frame renderer list address and fragment shader stack address
+ * It means the caller must set these two registers properly before calling this function
+ */
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job)
+{
+    struct mali_session_data *session;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Starting PP job 0x%08X part %u/%u on group %s\n",
+                job, sub_job + 1,
+                mali_pp_job_get_sub_job_count(job),
+                mali_group_core_description(group)));
+
+    session = mali_pp_job_get_session(job);
+
+    if (NULL != group->l2_cache_core[0]) {
+        mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_pp_job_get_cache_order(job));
+    }
+
+    if (NULL != group->l2_cache_core[1]) {
+        mali_l2_cache_invalidate_conditional(group->l2_cache_core[1], mali_pp_job_get_cache_order(job));
+    }
+
+    mali_group_activate_page_directory(group, session);
+
+    if (mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+        u32 core_num = 0;
+
+        MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+        /* Configure DLBU for the job */
+        mali_dlbu_config_job(group->dlbu_core, job);
+
+        /* Write stack address for each child group */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            mali_pp_write_addr_stack(child->pp_core, job);
+            core_num++;
+        }
+
+        mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+    } else {
+        mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+    }
+
+    /* if the group is virtual, loop through physical groups which belong to this group
+     * and call profiling events for its cores as virtual */
+    if (MALI_TRUE == mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                    mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                    mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+            trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                    mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+        }
+
+#if defined(CONFIG_MALI400_PROFILING)
+        if (0 != group->l2_cache_core_ref_count[0]) {
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+            }
+        }
+        if (0 != group->l2_cache_core_ref_count[1]) {
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+            }
+        }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+    } else { /* group is physical - call profiling events for physical cores */
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+                mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+            mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+        }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+    }
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+    trace_gpu_sched_switch(mali_pp_core_description(group->pp_core),
+            sched_clock(), mali_pp_job_get_tid(job),
+            0, mali_pp_job_get_id(job));
+#endif
+
+    group->pp_running_job = job;
+    group->pp_running_sub_job = sub_job;
+    group->is_working = MALI_TRUE;
+
+    /* Setup SW timer and record start time */
+    group->start_time = _mali_osk_time_tickcount();
+    _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+    MALI_DEBUG_PRINT(4, ("Group: Started PP job 0x%08X part %u/%u on group %s at %u\n",
+                job, sub_job + 1,
+                mali_pp_job_get_sub_job_count(job),
+                mali_group_core_description(group),
+                group->start_time));
+
+}
+
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr)
+{
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+    mali_l2_cache_invalidate(group->l2_cache_core[0]);
+
+    mali_mmu_zap_tlb_without_stall(group->mmu);
+
+    mali_gp_resume_with_new_heap(group->gp_core, start_addr, end_addr);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+            0, 0, 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+    trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 1 /* active */, 1 /* GP */,  0 /* core */,
+            mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+}
+
+static void mali_group_reset_mmu(struct mali_group *group)
+{
+    struct mali_group *child;
+    struct mali_group *temp;
+    _mali_osk_errcode_t err;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (!mali_group_is_virtual(group)) {
+        /* This is a physical group or an idle virtual group -- simply wait for
+         * the reset to complete. */
+        err = mali_mmu_reset(group->mmu);
+        MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+    } else { /* virtual group */
+        /* Loop through all members of this virtual group and wait
+         * until they are done resetting.
+         */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            err = mali_mmu_reset(child->mmu);
+            MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+        }
+    }
+}
+
+static void mali_group_reset_pp(struct mali_group *group)
+{
+    struct mali_group *child;
+    struct mali_group *temp;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    mali_pp_reset_async(group->pp_core);
+
+    if (!mali_group_is_virtual(group) || NULL == group->pp_running_job) {
+        /* This is a physical group or an idle virtual group -- simply wait for
+         * the reset to complete. */
+        mali_pp_reset_wait(group->pp_core);
+    } else {
+        /* Loop through all members of this virtual group and wait until they
+         * are done resetting.
+         */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            mali_pp_reset_wait(child->pp_core);
+        }
+    }
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job)
+{
+    struct mali_pp_job *pp_job_to_return;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_running_job);
+    MALI_DEBUG_ASSERT_POINTER(sub_job);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working);
+
+    /* Stop/clear the timeout timer. */
+    _mali_osk_timer_del_async(group->timeout_timer);
+
+    if (NULL != group->pp_running_job) {
+
+        /* Deal with HW counters and profiling */
+
+        if (MALI_TRUE == mali_group_is_virtual(group)) {
+            struct mali_group *child;
+            struct mali_group *temp;
+
+            /* update performance counters from each physical pp core within this virtual group */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                mali_pp_update_performance_counters(group->pp_core, child->pp_core, group->pp_running_job, mali_pp_core_get_id(child->pp_core));
+            }
+
+#if defined(CONFIG_MALI400_PROFILING)
+            /* send profiling data per physical core */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                        MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                        MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                        mali_pp_job_get_perf_counter_value0(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+                        mali_pp_job_get_perf_counter_value1(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+                        mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+                        0, 0);
+
+                trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+                        0 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                        mali_pp_job_get_frame_builder_id(group->pp_running_job),
+                        mali_pp_job_get_flush_id(group->pp_running_job));
+            }
+            if (0 != group->l2_cache_core_ref_count[0]) {
+                if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                        (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+                }
+            }
+            if (0 != group->l2_cache_core_ref_count[1]) {
+                if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) &&
+                        (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) {
+                    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+                }
+            }
+
+#endif
+        } else {
+            /* update performance counters for a physical group's pp core */
+            mali_pp_update_performance_counters(group->pp_core, group->pp_core, group->pp_running_job, group->pp_running_sub_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+                    mali_pp_job_get_perf_counter_value0(group->pp_running_job, group->pp_running_sub_job),
+                    mali_pp_job_get_perf_counter_value1(group->pp_running_job, group->pp_running_sub_job),
+                    mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+                    0, 0);
+
+            trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+                    0 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+                    mali_pp_job_get_frame_builder_id(group->pp_running_job),
+                    mali_pp_job_get_flush_id(group->pp_running_job));
+
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+            }
+#endif
+        }
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_gp_core_description(group->gp_core),
+                sched_clock(), 0, 0, 0);
+#endif
+
+    }
+
+    if (success) {
+        /* Only do soft reset for successful jobs, a full recovery
+         * reset will be done for failed jobs. */
+        mali_pp_reset_async(group->pp_core);
+    }
+
+    pp_job_to_return = group->pp_running_job;
+    group->pp_running_job = NULL;
+    group->is_working = MALI_FALSE;
+    *sub_job = group->pp_running_sub_job;
+
+    if (!success) {
+        MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+        mali_group_recovery_reset(group);
+    } else if (_MALI_OSK_ERR_OK != mali_pp_reset_wait(group->pp_core)) {
+        MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+        mali_group_recovery_reset(group);
+    }
+
+    return pp_job_to_return;
+}
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success)
+{
+    struct mali_gp_job *gp_job_to_return;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_running_job);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working);
+
+    /* Stop/clear the timeout timer. */
+    _mali_osk_timer_del_async(group->timeout_timer);
+
+    if (NULL != group->gp_running_job) {
+        mali_gp_update_performance_counters(group->gp_core, group->gp_running_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+                mali_gp_job_get_perf_counter_value0(group->gp_running_job),
+                mali_gp_job_get_perf_counter_value1(group->gp_running_job),
+                mali_gp_job_get_perf_counter_src0(group->gp_running_job) | (mali_gp_job_get_perf_counter_src1(group->gp_running_job) << 8),
+                0, 0);
+
+        if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0])))
+            mali_group_report_l2_cache_counters_per_core(group, 0);
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_pp_core_description(group->pp_core),
+                sched_clock(), 0, 0, 0);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 0 /* active */, 1 /* GP */,  0 /* core */,
+                mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+
+        mali_gp_job_set_current_heap_addr(group->gp_running_job,
+                mali_gp_read_plbu_alloc_start_addr(group->gp_core));
+    }
+
+    if (success) {
+        /* Only do soft reset for successful jobs, a full recovery
+         * reset will be done for failed jobs. */
+        mali_gp_reset_async(group->gp_core);
+    }
+
+    gp_job_to_return = group->gp_running_job;
+    group->gp_running_job = NULL;
+    group->is_working = MALI_FALSE;
+
+    if (!success) {
+        MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+        mali_group_recovery_reset(group);
+    } else if (_MALI_OSK_ERR_OK != mali_gp_reset_wait(group->gp_core)) {
+        MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+        mali_group_recovery_reset(group);
+    }
+
+    return gp_job_to_return;
+}
+
+struct mali_group *mali_group_get_glob_group(u32 index)
+{
+    if (mali_global_num_groups > index) {
+        return mali_global_groups[index];
+    }
+
+    return NULL;
+}
+
+u32 mali_group_get_glob_num_groups(void)
+{
+    return mali_global_num_groups;
+}
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session)
+{
+    MALI_DEBUG_PRINT(5, ("Mali group: Activating page directory 0x%08X from session 0x%08X on group %s\n",
+                mali_session_get_page_directory(session), session,
+                mali_group_core_description(group)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (group->session != session) {
+        /* Different session than last time, so we need to do some work */
+        MALI_DEBUG_PRINT(5, ("Mali group: Activate session: %08x previous: %08x on group %s\n",
+                    session, group->session,
+                    mali_group_core_description(group)));
+        mali_mmu_activate_page_directory(group->mmu, mali_session_get_page_directory(session));
+        group->session = session;
+    } else {
+        /* Same session as last time, so no work required */
+        MALI_DEBUG_PRINT(4, ("Mali group: Activate existing session 0x%08X on group %s\n",
+                    session->page_directory,
+                    mali_group_core_description(group)));
+        mali_mmu_zap_tlb_without_stall(group->mmu);
+    }
+}
+
+static void mali_group_recovery_reset(struct mali_group *group)
+{
+    _mali_osk_errcode_t err;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    /* Stop cores, bus stop */
+    if (NULL != group->pp_core) {
+        mali_pp_stop_bus(group->pp_core);
+    } else {
+        mali_gp_stop_bus(group->gp_core);
+    }
+
+    /* Flush MMU and clear page fault (if any) */
+    mali_mmu_activate_fault_flush_page_directory(group->mmu);
+    mali_mmu_page_fault_done(group->mmu);
+
+    /* Wait for cores to stop bus, then do a hard reset on them */
+    if (NULL != group->pp_core) {
+        if (mali_group_is_virtual(group)) {
+            struct mali_group *child, *temp;
+
+            /* Disable the broadcast unit while we do reset directly on the member cores. */
+            mali_bcast_disable(group->bcast_core);
+
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                mali_pp_stop_bus_wait(child->pp_core);
+                mali_pp_hard_reset(child->pp_core);
+            }
+
+            mali_bcast_enable(group->bcast_core);
+        } else {
+            mali_pp_stop_bus_wait(group->pp_core);
+            mali_pp_hard_reset(group->pp_core);
+        }
+    } else {
+        mali_gp_stop_bus_wait(group->gp_core);
+        mali_gp_hard_reset(group->gp_core);
+    }
+
+    /* Reset MMU */
+    err = mali_mmu_reset(group->mmu);
+    MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+    MALI_IGNORE(err);
+
+    group->session = NULL;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size)
+{
+    int n = 0;
+    int i;
+    struct mali_group *child;
+    struct mali_group *temp;
+
+    if (mali_group_is_virtual(group)) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Virtual PP Group: %p\n", group);
+    } else if (mali_group_is_in_virtual(group)) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Child PP Group: %p\n", group);
+    } else if (NULL != group->pp_core) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Physical PP Group: %p\n", group);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "GP Group: %p\n", group);
+    }
+
+    switch (group->state) {
+        case MALI_GROUP_STATE_INACTIVE:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: INACTIVE\n");
+            break;
+        case MALI_GROUP_STATE_ACTIVATION_PENDING:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: ACTIVATION_PENDING\n");
+            break;
+        case MALI_GROUP_STATE_ACTIVE:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: MALI_GROUP_STATE_ACTIVE\n");
+            break;
+        default:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: UNKNOWN (%d)\n", group->state);
+            MALI_DEBUG_ASSERT(0);
+            break;
+    }
+
+    n += _mali_osk_snprintf(buf + n, size - n,
+            "\tSW power: %s\n",
+            group->power_is_on ? "On" : "Off");
+
+    n += mali_pm_dump_state_domain(group->pm_domain, buf + n, size - n);
+
+    for (i = 0; i < 2; i++) {
+        if (NULL != group->l2_cache_core[i]) {
+            struct mali_pm_domain *domain;
+            domain = mali_l2_cache_get_pm_domain(
+                    group->l2_cache_core[i]);
+            n += mali_pm_dump_state_domain(domain,
+                    buf + n, size - n);
+        }
+    }
+
+    if (group->gp_core) {
+        n += mali_gp_dump_state(group->gp_core, buf + n, size - n);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "\tGP running job: %p\n", group->gp_running_job);
+    }
+
+    if (group->pp_core) {
+        n += mali_pp_dump_state(group->pp_core, buf + n, size - n);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "\tPP running job: %p, subjob %d \n",
+                group->pp_running_job,
+                group->pp_running_sub_job);
+    }
+
+    _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+            struct mali_group, group_list) {
+        n += mali_group_dump_state(child, buf + n, size - n);
+    }
+
+    return n;
+}
+#endif
+
+/* Kasin added. */
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+#include <platform/meson_m400/mali_fix.h>
+#define INT_MALI_PP2_MMU ( 6+32)
+struct _mali_osk_irq_t_struct;
+u32 get_irqnum(struct _mali_osk_irq_t_struct* irq);
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_mmu_core *mmu = group->mmu;
+#endif
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (MALI_FALSE == group->power_is_on)
+        MALI_SUCCESS;
+    if (get_irqnum(mmu->irq) == INT_MALI_PP2_MMU)
+    {
+        if (group == NULL || group->pp_core == NULL)
+            MALI_SUCCESS;
+        if (group->pp_core->core_id == 0) {
+            if (malifix_get_mmu_int_process_state(0) == MMU_INT_HIT)
+                malifix_set_mmu_int_process_state(0, MMU_INT_TOP);
+            else
+                MALI_SUCCESS;
+        }
+        else if (group->pp_core->core_id == 1) {
+            if (malifix_get_mmu_int_process_state(1) == MMU_INT_HIT)
+                malifix_set_mmu_int_process_state(1, MMU_INT_TOP);
+            else
+                MALI_SUCCESS;
+        } else
+            MALI_SUCCESS;
+    }
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    ret = mali_executor_interrupt_mmu(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		if (NULL != group->gp_core) {
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+						      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+						      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+						      0, 0, /* No pid and tid for interrupt handler */
+						      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+						      0xFFFFFFFF, 0);
+		} else {
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+						      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+						      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+						      0, 0, /* No pid and tid for interrupt handler */
+						      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+							      mali_pp_core_get_id(group->pp_core)),
+						      0xFFFFFFFF, 0);
+		}
+
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    return ret;
+}
+
+static void mali_group_bottom_half_mmu(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_mmu_core *mmu = group->mmu;
+#endif
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+
+    mali_executor_interrupt_mmu(group, MALI_FALSE);
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (get_irqnum(mmu->irq) == INT_MALI_PP2_MMU)
+    {
+        if (group->pp_core->core_id == 0) {
+            if (malifix_get_mmu_int_process_state(0) == MMU_INT_TOP)
+                malifix_set_mmu_int_process_state(0, MMU_INT_NONE);
+        }
+        else if (group->pp_core->core_id == 1) {
+            if (malifix_get_mmu_int_process_state(1) == MMU_INT_TOP)
+                malifix_set_mmu_int_process_state(1, MMU_INT_NONE);
+        }
+    }
+#endif
+}
+
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+					      0xFFFFFFFF, 0);
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+
+    MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+                mali_gp_get_rawstat(group->gp_core),
+                mali_group_core_description(group)));
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    ret = mali_executor_interrupt_gp(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    return ret;
+}
+
+static void mali_group_bottom_half_gp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+
+    mali_executor_interrupt_gp(group, MALI_FALSE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+}
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+int PP0_int_cnt = 0;
+int mali_PP0_int_cnt(void)
+{
+    return PP0_int_cnt;
+}
+EXPORT_SYMBOL(mali_PP0_int_cnt);
+
+int PP1_int_cnt = 0;
+int mali_PP1_int_cnt(void)
+{
+    return PP1_int_cnt;
+}
+EXPORT_SYMBOL(mali_PP1_int_cnt);
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_pp_core *core = group->pp_core;
+#endif
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+						      mali_pp_core_get_id(group->pp_core)),
+					      0xFFFFFFFF, 0);
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+    MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+                mali_pp_get_rawstat(group->pp_core),
+                mali_group_core_description(group)));
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    ret = mali_executor_interrupt_pp(group, MALI_TRUE);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (core->core_id == 0)
+        PP0_int_cnt++;
+    else if (core->core_id == 1)
+        PP1_int_cnt++;
+#endif
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    return ret;
+}
+
+static void mali_group_bottom_half_pp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+    mali_executor_interrupt_pp(group, MALI_FALSE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+}
+
+static void mali_group_timeout(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+    MALI_DEBUG_ASSERT_POINTER(group);
+
+    MALI_DEBUG_PRINT(2, ("Group: timeout handler for %s at %u\n",
+                mali_group_core_description(group),
+                _mali_osk_time_tickcount()));
+
+    if (mali_core_timeout < 65533)
+        mali_core_timeout++;
+    if (NULL != group->gp_core) {
+        mali_group_schedule_bottom_half_gp(group);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        mali_group_schedule_bottom_half_pp(group);
+    }
+}
+
+mali_bool mali_group_zap_session(struct mali_group *group,
+        struct mali_session_data *session)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(session);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (group->session != session) {
+        /* not running from this session */
+        return MALI_TRUE; /* success */
+    }
+
+    if (group->is_working) {
+        /* The Zap also does the stall and disable_stall */
+        mali_bool zap_success = mali_mmu_zap_tlb(group->mmu);
+        return zap_success;
+    } else {
+        /* Just remove the session instead of zapping */
+        mali_group_clear_session(group);
+        return MALI_TRUE; /* success */
+    }
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num)
+{
+    u32 source0 = 0;
+    u32 value0 = 0;
+    u32 source1 = 0;
+    u32 value1 = 0;
+    u32 profiling_channel = 0;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    switch (core_num) {
+        case 0:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+            break;
+        case 1:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS;
+            break;
+        case 2:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS;
+            break;
+        default:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+            break;
+    }
+
+    if (0 == core_num) {
+        mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+    }
+    if (1 == core_num) {
+        if (1 == mali_l2_cache_get_id(group->l2_cache_core[0])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+        } else if (1 == mali_l2_cache_get_id(group->l2_cache_core[1])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+        }
+    }
+    if (2 == core_num) {
+        if (2 == mali_l2_cache_get_id(group->l2_cache_core[0])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+        } else if (2 == mali_l2_cache_get_id(group->l2_cache_core[1])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+        }
+    }
+
+    _mali_osk_profiling_add_event(profiling_channel, source1 << 8 | source0, value0, value1, 0, 0);
+}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
diff --git a/utgard/r6p1/common/mali_group.h b/utgard/r6p1/common/mali_group.h
new file mode 100755
index 0000000..f2c0313
--- /dev/null
+++ b/utgard/r6p1/common/mali_group.h
@@ -0,0 +1,460 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GROUP_H__
+#define __MALI_GROUP_H__
+
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_mmu.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_session.h"
+#include "mali_osk_profiling.h"
+
+/**
+ * @brief Default max runtime [ms] for a core job - used by timeout timers
+ */
+#define MALI_MAX_JOB_RUNTIME_DEFAULT 5000
+
+extern int mali_max_job_runtime;
+
+#define MALI_MAX_NUMBER_OF_GROUPS 10
+#define MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS 8
+
+enum mali_group_state {
+	MALI_GROUP_STATE_INACTIVE,
+	MALI_GROUP_STATE_ACTIVATION_PENDING,
+	MALI_GROUP_STATE_ACTIVE,
+};
+
+/**
+ * The structure represents a render group
+ * A render group is defined by all the cores that share the same Mali MMU
+ */
+
+struct mali_group {
+	struct mali_mmu_core        *mmu;
+	struct mali_session_data    *session;
+
+	enum mali_group_state        state;
+	mali_bool                    power_is_on;
+
+	mali_bool                    is_working;
+	unsigned long                start_time; /* in ticks */
+
+	struct mali_gp_core         *gp_core;
+	struct mali_gp_job          *gp_running_job;
+
+	struct mali_pp_core         *pp_core;
+	struct mali_pp_job          *pp_running_job;
+	u32                         pp_running_sub_job;
+
+	struct mali_pm_domain       *pm_domain;
+
+	struct mali_l2_cache_core   *l2_cache_core[2];
+	u32                         l2_cache_core_ref_count[2];
+
+	/* Parent virtual group (if any) */
+	struct mali_group           *parent_group;
+
+	struct mali_dlbu_core       *dlbu_core;
+	struct mali_bcast_unit      *bcast_core;
+
+	/* Used for working groups which needs to be disabled */
+	mali_bool                    disable_requested;
+
+	/* Used by group to link child groups (for virtual group) */
+	_mali_osk_list_t            group_list;
+
+	/* Used by executor module in order to link groups of same state */
+	_mali_osk_list_t            executor_list;
+
+	/* Used by PM domains to link groups of same domain */
+	_mali_osk_list_t             pm_domain_list;
+
+	_mali_osk_wq_work_t         *bottom_half_work_mmu;
+	_mali_osk_wq_work_t         *bottom_half_work_gp;
+	_mali_osk_wq_work_t         *bottom_half_work_pp;
+
+	_mali_osk_timer_t           *timeout_timer;
+};
+
+/** @brief Create a new Mali group object
+ *
+ * @return A pointer to a new group object
+ */
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+				     struct mali_dlbu_core *dlbu,
+				     struct mali_bcast_unit *bcast,
+				     u32 domain_index);
+
+void mali_group_dump_status(struct mali_group *group);
+
+void mali_group_delete(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group,
+		struct mali_mmu_core *mmu_core);
+void mali_group_remove_mmu_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group,
+		struct mali_gp_core *gp_core);
+void mali_group_remove_gp_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group,
+		struct mali_pp_core *pp_core);
+void mali_group_remove_pp_core(struct mali_group *group);
+
+MALI_STATIC_INLINE const char *mali_group_core_description(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	if (NULL != group->pp_core) {
+		return mali_pp_core_description(group->pp_core);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+		return mali_gp_core_description(group->gp_core);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_is_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != group->dlbu_core);
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Check if a group is a part of a virtual group or not
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_in_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != group->parent_group) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Reset group
+ *
+ * This function will reset the entire group,
+ * including all the cores present in the group.
+ *
+ * @param group Pointer to the group to reset
+ */
+void mali_group_reset(struct mali_group *group);
+
+MALI_STATIC_INLINE struct mali_session_data *mali_group_get_session(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return group->session;
+}
+
+MALI_STATIC_INLINE void mali_group_clear_session(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (NULL != group->session) {
+		mali_mmu_activate_empty_page_directory(group->mmu);
+		group->session = NULL;
+	}
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group);
+
+/*
+ * Change state from ACTIVATION_PENDING to ACTIVE
+ * For virtual group, all childs need to be ACTIVE first
+ */
+mali_bool mali_group_set_active(struct mali_group *group);
+
+/*
+ * @return MALI_TRUE means one or more domains can now be powered off,
+ * and caller should call either mali_pm_update_async() or
+ * mali_pm_update_sync() in order to do so.
+ */
+mali_bool mali_group_deactivate(struct mali_group *group);
+
+MALI_STATIC_INLINE enum mali_group_state mali_group_get_state(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->state;
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_power_is_on(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->power_is_on;
+}
+
+void mali_group_power_up(struct mali_group *group);
+void mali_group_power_down(struct mali_group *group);
+
+MALI_STATIC_INLINE void mali_group_set_disable_request(
+	struct mali_group *group, mali_bool disable)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	group->disable_requested = disable;
+
+	/**
+	 * When one of child group's disable_requeset is set TRUE, then
+	 * the disable_request of parent group should also be set to TRUE.
+	 * While, the disable_request of parent group should only be set to FALSE
+	 * only when all of its child group's disable_request are set to FALSE.
+	 */
+	if (NULL != group->parent_group && MALI_TRUE == disable) {
+		group->parent_group->disable_requested = disable;
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_disable_requested(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->disable_requested;
+}
+
+/** @brief Virtual groups */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child);
+struct mali_group *mali_group_acquire_group(struct mali_group *parent);
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child);
+
+/** @brief Checks if the group is working.
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_working(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	if (mali_group_is_in_virtual(group)) {
+		struct mali_group *tmp_group = mali_executor_get_virtual_group();
+		return tmp_group->is_working;
+	}
+	return group->is_working;
+}
+
+MALI_STATIC_INLINE struct mali_gp_job *mali_group_get_running_gp_job(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->gp_running_job;
+}
+
+/** @brief Zap MMU TLB on all groups
+ *
+ * Zap TLB on group if \a session is active.
+ */
+mali_bool mali_group_zap_session(struct mali_group *group,
+				 struct mali_session_data *session);
+
+/** @brief Get pointer to GP core object
+ */
+MALI_STATIC_INLINE struct mali_gp_core *mali_group_get_gp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->gp_core;
+}
+
+/** @brief Get pointer to PP core object
+ */
+MALI_STATIC_INLINE struct mali_pp_core *mali_group_get_pp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->pp_core;
+}
+
+/** @brief Start GP job
+ */
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job);
+
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job);
+
+/** @brief Start virtual group Job on a virtual group
+*/
+void mali_group_start_job_on_virtual(struct mali_group *group, struct mali_pp_job *job, u32 first_subjob, u32 last_subjob);
+
+
+/** @brief Start a subjob from a particular on a specific PP group
+*/
+void mali_group_start_job_on_group(struct mali_group *group, struct mali_pp_job *job, u32 subjob);
+
+
+/** @brief remove all the unused groups in tmp_unused group  list, so that the group is in consistent status.
+ */
+void mali_group_non_dlbu_job_done_virtual(struct mali_group *group);
+
+
+/** @brief Resume GP job that suspended waiting for more heap memory
+ */
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_get_interrupt_result(group->gp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_get_interrupt_result(group->pp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_mmu_get_interrupt_result(group->mmu);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_gp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_is_active(group->gp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_pp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_is_active(group->pp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_has_timed_out(struct mali_group *group)
+{
+	unsigned long time_cost;
+	struct mali_group *tmp_group = group;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* if the group is in virtual need to use virtual_group's start time */
+	if (mali_group_is_in_virtual(group)) {
+		tmp_group = mali_executor_get_virtual_group();
+	}
+
+	time_cost = _mali_osk_time_tickcount() - tmp_group->start_time;
+	if (_mali_osk_time_mstoticks(mali_max_job_runtime) <= time_cost) {
+		/*
+		 * current tick is at or after timeout end time,
+		 * so this is a valid timeout
+		 */
+		return MALI_TRUE;
+	} else {
+		/*
+		 * Not a valid timeout. A HW interrupt probably beat
+		 * us to it, and the timer wasn't properly deleted
+		 * (async deletion used due to atomic context).
+		 */
+		return MALI_FALSE;
+	}
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_mask_all_interrupts(group->gp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_mask_all_interrupts(group->pp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_enable_interrupts_gp(
+	struct mali_group *group,
+	enum mali_interrupt_result exceptions)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	mali_gp_enable_interrupts(group->gp_core, exceptions);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_gp);
+}
+
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_pp);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_mmu);
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job);
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success);
+
+#if defined(CONFIG_MALI400_PROFILING)
+MALI_STATIC_INLINE void mali_group_oom(struct mali_group *group)
+{
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+				      MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+				      0, 0, 0, 0, 0);
+}
+#endif
+
+struct mali_group *mali_group_get_glob_group(u32 index);
+u32 mali_group_get_glob_num_groups(void);
+
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size);
+
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data);
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data);
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data);
+
+MALI_STATIC_INLINE mali_bool mali_group_is_empty(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(mali_group_is_virtual(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return _mali_osk_list_empty(&group->group_list);
+}
+
+#endif /* __MALI_GROUP_H__ */
diff --git a/utgard/r6p1/common/mali_hw_core.c b/utgard/r6p1/common/mali_hw_core.c
new file mode 100755
index 0000000..dd10cfe
--- /dev/null
+++ b/utgard/r6p1/common/mali_hw_core.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_osk_mali.h"
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size)
+{
+	core->phys_addr = resource->base;
+	core->phys_offset = resource->base - _mali_osk_resource_base_address();
+	core->description = resource->description;
+	core->size = reg_size;
+
+	MALI_DEBUG_ASSERT(core->phys_offset < core->phys_addr);
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_mem_reqregion(core->phys_addr, core->size, core->description)) {
+		core->mapped_registers = _mali_osk_mem_mapioregion(core->phys_addr, core->size, core->description);
+		if (NULL != core->mapped_registers) {
+			return _MALI_OSK_ERR_OK;
+		} else {
+			MALI_PRINT_ERROR(("Failed to map memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+		}
+		_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+	} else {
+		MALI_PRINT_ERROR(("Failed to request memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_hw_core_delete(struct mali_hw_core *core)
+{
+	if (NULL != core->mapped_registers) {
+		_mali_osk_mem_unmapioregion(core->phys_addr, core->size, core->mapped_registers);
+		core->mapped_registers = NULL;
+	}
+	_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+}
diff --git a/utgard/r6p1/common/mali_hw_core.h b/utgard/r6p1/common/mali_hw_core.h
new file mode 100755
index 0000000..e435725
--- /dev/null
+++ b/utgard/r6p1/common/mali_hw_core.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_HW_CORE_H__
+#define __MALI_HW_CORE_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * The common parts for all Mali HW cores (GP, PP, MMU, L2 and PMU)
+ * This struct is embedded inside all core specific structs.
+ */
+struct mali_hw_core {
+	uintptr_t phys_addr;              /**< Physical address of the registers */
+	u32 phys_offset;                  /**< Offset from start of Mali to registers */
+	u32 size;                         /**< Size of registers */
+	mali_io_address mapped_registers; /**< Virtual mapping of the registers */
+	const char *description;          /**< Name of unit (as specified in device configuration) */
+};
+
+#define MALI_REG_POLL_COUNT_FAST 1000000
+#define MALI_REG_POLL_COUNT_SLOW 1000000
+
+/*
+ * GP and PP core translate their int_stat/rawstat into one of these
+ */
+enum mali_interrupt_result {
+	MALI_INTERRUPT_RESULT_NONE,
+	MALI_INTERRUPT_RESULT_SUCCESS,
+	MALI_INTERRUPT_RESULT_SUCCESS_VS,
+	MALI_INTERRUPT_RESULT_SUCCESS_PLBU,
+	MALI_INTERRUPT_RESULT_OOM,
+	MALI_INTERRUPT_RESULT_ERROR
+};
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size);
+void mali_hw_core_delete(struct mali_hw_core *core);
+
+MALI_STATIC_INLINE u32 mali_hw_core_register_read(struct mali_hw_core *core, u32 relative_address)
+{
+	u32 read_val;
+	read_val = _mali_osk_mem_ioread32(core->mapped_registers, relative_address);
+	MALI_DEBUG_PRINT(6, ("register_read for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, read_val));
+	return read_val;
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+}
+
+/* Conditionally write a register.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 new_val, const u32 old_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	if (old_val != new_val) {
+		_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+	}
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32(core->mapped_registers, relative_address, new_val);
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs)
+{
+	u32 i;
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+	}
+}
+
+/* Conditionally write a set of registers.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs, const u32 *old_array)
+{
+	u32 i;
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		if (old_array[i] != write_array[i]) {
+			mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+		}
+	}
+}
+
+#endif /* __MALI_HW_CORE_H__ */
diff --git a/utgard/r6p1/common/mali_kernel_common.h b/utgard/r6p1/common/mali_kernel_common.h
new file mode 100755
index 0000000..9bae265
--- /dev/null
+++ b/utgard/r6p1/common/mali_kernel_common.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_COMMON_H__
+#define __MALI_KERNEL_COMMON_H__
+
+#include "mali_osk.h"
+
+/* Make sure debug is defined when it should be */
+#ifndef DEBUG
+#if defined(_DEBUG)
+#define DEBUG
+#endif
+#endif
+
+/* The file include several useful macros for error checking, debugging and printing.
+ * - MALI_PRINTF(...)           Do not use this function: Will be included in Release builds.
+ * - MALI_DEBUG_PRINT(nr, (X) ) Prints the second argument if nr<=MALI_DEBUG_LEVEL.
+ * - MALI_DEBUG_ERROR( (X) )    Prints an errortext, a source trace, and the given error message.
+ * - MALI_DEBUG_ASSERT(exp,(X)) If the asserted expr is false, the program will exit.
+ * - MALI_DEBUG_ASSERT_POINTER(pointer)  Triggers if the pointer is a zero pointer.
+ * - MALI_DEBUG_CODE( X )       The code inside the macro is only compiled in Debug builds.
+ *
+ * The (X) means that you must add an extra parenthesis around the argumentlist.
+ *
+ * The  printf function: MALI_PRINTF(...) is routed to _mali_osk_debugmsg
+ *
+ * Suggested range for the DEBUG-LEVEL is [1:6] where
+ * [1:2] Is messages with highest priority, indicate possible errors.
+ * [3:4] Is messages with medium priority, output important variables.
+ * [5:6] Is messages with low priority, used during extensive debugging.
+ */
+
+/**
+*  Fundamental error macro. Reports an error code. This is abstracted to allow us to
+*  easily switch to a different error reporting method if we want, and also to allow
+*  us to search for error returns easily.
+*
+*  Note no closing semicolon - this is supplied in typical usage:
+*
+*  MALI_ERROR(MALI_ERROR_OUT_OF_MEMORY);
+*/
+#define MALI_ERROR(error_code) return (error_code)
+
+/**
+ *  Basic error macro, to indicate success.
+ *  Note no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_SUCCESS;
+ */
+#define MALI_SUCCESS MALI_ERROR(_MALI_OSK_ERR_OK)
+
+/**
+ *  Basic error macro. This checks whether the given condition is true, and if not returns
+ *  from this function with the supplied error code. This is a macro so that we can override it
+ *  for stress testing.
+ *
+ *  Note that this uses the do-while-0 wrapping to ensure that we don't get problems with dangling
+ *  else clauses. Note also no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_CHECK((p!=NULL), ERROR_NO_OBJECT);
+ */
+#define MALI_CHECK(condition, error_code) do { if(!(condition)) MALI_ERROR(error_code); } while(0)
+
+/**
+ *  Error propagation macro. If the expression given is anything other than
+ *  _MALI_OSK_NO_ERROR, then the value is returned from the enclosing function
+ *  as an error code. This effectively acts as a guard clause, and propagates
+ *  error values up the call stack. This uses a temporary value to ensure that
+ *  the error expression is not evaluated twice.
+ *  If the counter for forcing a failure has been set using _mali_force_error,
+ *  this error will be returned without evaluating the expression in
+ *  MALI_CHECK_NO_ERROR
+ */
+#define MALI_CHECK_NO_ERROR(expression) \
+	do { _mali_osk_errcode_t _check_no_error_result=(expression); \
+		if(_check_no_error_result != _MALI_OSK_ERR_OK) \
+			MALI_ERROR(_check_no_error_result); \
+	} while(0)
+
+/**
+ *  Pointer check macro. Checks non-null pointer.
+ */
+#define MALI_CHECK_NON_NULL(pointer, error_code) MALI_CHECK( ((pointer)!=NULL), (error_code) )
+
+/**
+ *  Error macro with goto. This checks whether the given condition is true, and if not jumps
+ *  to the specified label using a goto. The label must therefore be local to the function in
+ *  which this macro appears. This is most usually used to execute some clean-up code before
+ *  exiting with a call to ERROR.
+ *
+ *  Like the other macros, this is a macro to allow us to override the condition if we wish,
+ *  e.g. to force an error during stress testing.
+ */
+#define MALI_CHECK_GOTO(condition, label) do { if(!(condition)) goto label; } while(0)
+
+/**
+ *  Explicitly ignore a parameter passed into a function, to suppress compiler warnings.
+ *  Should only be used with parameter names.
+ */
+#define MALI_IGNORE(x) x=x
+
+#if defined(CONFIG_MALI_QUIET)
+#define MALI_PRINTF(args)
+#else
+#define MALI_PRINTF(args) _mali_osk_dbgmsg args;
+#endif
+
+#define MALI_PRINT_ERROR(args) do{ \
+		MALI_PRINTF(("Mali: ERR: %s\n" ,__FILE__)); \
+		MALI_PRINTF(("           %s()%4d\n           ", __FUNCTION__, __LINE__)) ; \
+		MALI_PRINTF(args); \
+		MALI_PRINTF(("\n")); \
+	} while(0)
+
+#define MALI_PRINT(args) do{ \
+		MALI_PRINTF(("Mali: ")); \
+		MALI_PRINTF(args); \
+	} while (0)
+
+#ifdef DEBUG
+#ifndef mali_debug_level
+extern int mali_debug_level;
+#endif
+
+#define MALI_DEBUG_CODE(code) code
+#define MALI_DEBUG_PRINT(level, args)  do { \
+		if((level) <=  mali_debug_level)\
+		{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } \
+	} while (0)
+
+#define MALI_DEBUG_PRINT_ERROR(args) MALI_PRINT_ERROR(args)
+
+#define MALI_DEBUG_PRINT_IF(level,condition,args)  \
+	if((condition)&&((level) <=  mali_debug_level))\
+	{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+#define MALI_DEBUG_PRINT_ELSE(level, args)\
+	else if((level) <=  mali_debug_level)\
+	{ MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+/**
+ * @note these variants of DEBUG ASSERTS will cause a debugger breakpoint
+ * to be entered (see _mali_osk_break() ). An alternative would be to call
+ * _mali_osk_abort(), on OSs that support it.
+ */
+#define MALI_DEBUG_PRINT_ASSERT(condition, args) do  {if( !(condition)) { MALI_PRINT_ERROR(args); _mali_osk_break(); } } while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do  {if( (pointer)== NULL) {MALI_PRINT_ERROR(("NULL pointer " #pointer)); _mali_osk_break();} } while(0)
+#define MALI_DEBUG_ASSERT(condition) do  {if( !(condition)) {MALI_PRINT_ERROR(("ASSERT failed: " #condition )); _mali_osk_break();} } while(0)
+
+#else /* DEBUG */
+
+#define MALI_DEBUG_CODE(code)
+#define MALI_DEBUG_PRINT(string,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ERROR(args) do {} while(0)
+#define MALI_DEBUG_PRINT_IF(level,condition,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ELSE(level,condition,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ASSERT(condition,args) do {} while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do {} while(0)
+#define MALI_DEBUG_ASSERT(condition) do {} while(0)
+
+#endif /* DEBUG */
+
+/**
+ * variables from user space cannot be dereferenced from kernel space; tagging them
+ * with __user allows the GCC compiler to generate a warning. Other compilers may
+ * not support this so we define it here as an empty macro if the compiler doesn't
+ * define it.
+ */
+#ifndef __user
+#define __user
+#endif
+
+#endif /* __MALI_KERNEL_COMMON_H__ */
diff --git a/utgard/r6p1/common/mali_kernel_core.c b/utgard/r6p1/common/mali_kernel_core.c
new file mode 100755
index 0000000..6a62c92
--- /dev/null
+++ b/utgard/r6p1/common/mali_kernel_core.c
@@ -0,0 +1,1319 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_kernel_core.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_mmu.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_utilization.h"
+#include "mali_l2_cache.h"
+#include "mali_timeline.h"
+#include "mali_soft_job.h"
+#include "mali_pm_domain.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#include "mali_control_timer.h"
+#include "mali_dvfs_policy.h"
+#include <linux/sched.h>
+
+#define MALI_SHARED_MEMORY_DEFAULT_SIZE 0xffffffff
+
+/* Mali GPU memory. Real values come from module parameter or from device specific data */
+unsigned int mali_dedicated_mem_start = 0;
+unsigned int mali_dedicated_mem_size = 0;
+
+/* Default shared memory size is set to 4G. */
+unsigned int mali_shared_mem_size = MALI_SHARED_MEMORY_DEFAULT_SIZE;
+
+/* Frame buffer memory to be accessible by Mali GPU */
+int mali_fb_start = 0;
+int mali_fb_size = 0;
+
+/* Mali max job runtime */
+extern int mali_max_job_runtime;
+
+/** Start profiling from module load? */
+int mali_boot_profiling = 0;
+
+/** Limits for the number of PP cores behind each L2 cache. */
+int mali_max_pp_cores_group_1 = 0xFF;
+int mali_max_pp_cores_group_2 = 0xFF;
+
+int mali_inited_pp_cores_group_1 = 0;
+int mali_inited_pp_cores_group_2 = 0;
+
+static _mali_product_id_t global_product_id = _MALI_PRODUCT_ID_UNKNOWN;
+static uintptr_t global_gpu_base_address = 0;
+static u32 global_gpu_major_version = 0;
+static u32 global_gpu_minor_version = 0;
+
+mali_bool mali_gpu_class_is_mali450 = MALI_FALSE;
+mali_bool mali_gpu_class_is_mali470 = MALI_FALSE;
+
+static _mali_osk_errcode_t mali_set_global_gpu_base_address(void)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	global_gpu_base_address = _mali_osk_resource_base_address();
+	if (0 == global_gpu_base_address) {
+		err = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return err;
+}
+
+static u32 mali_get_bcast_id(_mali_osk_resource_t *resource_pp)
+{
+	switch (resource_pp->base - global_gpu_base_address) {
+	case 0x08000:
+	case 0x20000: /* fall-through for aliased mapping */
+		return 0x01;
+	case 0x0A000:
+	case 0x22000: /* fall-through for aliased mapping */
+		return 0x02;
+	case 0x0C000:
+	case 0x24000: /* fall-through for aliased mapping */
+		return 0x04;
+	case 0x0E000:
+	case 0x26000: /* fall-through for aliased mapping */
+		return 0x08;
+	case 0x28000:
+		return 0x10;
+	case 0x2A000:
+		return 0x20;
+	case 0x2C000:
+		return 0x40;
+	case 0x2E000:
+		return 0x80;
+	default:
+		return 0;
+	}
+}
+
+static _mali_osk_errcode_t mali_parse_product_info(void)
+{
+	_mali_osk_resource_t first_pp_resource;
+
+	/* Find the first PP core resource (again) */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PP0, &first_pp_resource)) {
+		/* Create a dummy PP object for this core so that we can read the version register */
+		struct mali_group *group = mali_group_create(NULL, NULL, NULL, MALI_DOMAIN_INDEX_PP0);
+		if (NULL != group) {
+			struct mali_pp_core *pp_core = mali_pp_create(&first_pp_resource, group, MALI_FALSE, mali_get_bcast_id(&first_pp_resource));
+			if (NULL != pp_core) {
+				u32 pp_version;
+
+				pp_version = mali_pp_core_get_version(pp_core);
+
+				mali_group_delete(group);
+
+				global_gpu_major_version = (pp_version >> 8) & 0xFF;
+				global_gpu_minor_version = pp_version & 0xFF;
+
+				switch (pp_version >> 16) {
+				case MALI200_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI200;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-200 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					MALI_PRINT_ERROR(("Mali-200 is not supported by this driver.\n"));
+					_mali_osk_abort();
+					break;
+				case MALI300_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI300;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-300 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI400_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI400;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-400 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI450_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI450;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-450 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI470_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI470;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-470 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				default:
+					MALI_DEBUG_PRINT(2, ("Found unknown Mali GPU (r%up%u)\n", global_gpu_major_version, global_gpu_minor_version));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_PRINT_ERROR(("Failed to create initial PP object\n"));
+			}
+		} else {
+			MALI_PRINT_ERROR(("Failed to create initial group object\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("First PP core not specified in config file\n"));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+static void mali_delete_groups(void)
+{
+	struct mali_group *group;
+
+	group = mali_group_get_glob_group(0);
+	while (NULL != group) {
+		mali_group_delete(group);
+		group = mali_group_get_glob_group(0);
+	}
+
+	MALI_DEBUG_ASSERT(0 == mali_group_get_glob_num_groups());
+}
+
+static void mali_delete_l2_cache_cores(void)
+{
+	struct mali_l2_cache_core *l2;
+
+	l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	while (NULL != l2) {
+		mali_l2_cache_delete(l2);
+		l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	}
+
+	MALI_DEBUG_ASSERT(0 == mali_l2_cache_core_get_glob_num_l2_cores());
+}
+
+static struct mali_l2_cache_core *mali_create_l2_cache_core(_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (NULL != resource) {
+
+		MALI_DEBUG_PRINT(3, ("Found L2 cache %s\n", resource->description));
+
+		l2_cache = mali_l2_cache_create(resource, domain_index);
+		if (NULL == l2_cache) {
+			MALI_PRINT_ERROR(("Failed to create L2 cache object\n"));
+			return NULL;
+		}
+	}
+	MALI_DEBUG_PRINT(3, ("Created L2 cache core object\n"));
+
+	return l2_cache;
+}
+
+static _mali_osk_errcode_t mali_parse_config_l2_cache(void)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (mali_is_mali400()) {
+		_mali_osk_resource_t l2_resource;
+		if (_MALI_OSK_ERR_OK != _mali_osk_resource_find(MALI400_OFFSET_L2_CACHE0, &l2_resource)) {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		l2_cache = mali_create_l2_cache_core(&l2_resource, MALI_DOMAIN_INDEX_L20);
+		if (NULL == l2_cache) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else if (mali_is_mali450()) {
+		/*
+		 * L2 for GP    at 0x10000
+		 * L2 for PP0-3 at 0x01000
+		 * L2 for PP4-7 at 0x11000 (optional)
+		 */
+
+		_mali_osk_resource_t l2_gp_resource;
+		_mali_osk_resource_t l2_pp_grp0_resource;
+		_mali_osk_resource_t l2_pp_grp1_resource;
+
+		/* Make cluster for GP's L2 */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE0, &l2_gp_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for GP\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_gp_resource, MALI_DOMAIN_INDEX_L20);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for GP in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Find corresponding l2 domain */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE1, &l2_pp_grp0_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 0\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp0_resource, MALI_DOMAIN_INDEX_L21);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for PP group 0 in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Second PP core group is optional, don't fail if we don't find it */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE2, &l2_pp_grp1_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp1_resource, MALI_DOMAIN_INDEX_L22);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		}
+	} else if (mali_is_mali470()) {
+		_mali_osk_resource_t l2c1_resource;
+
+		/* Make cluster for L2C1 */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI470_OFFSET_L2_CACHE1, &l2c1_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-470 L2 cache 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2c1_resource, MALI_DOMAIN_INDEX_L21);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for L2C1\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static struct mali_group *mali_create_group(struct mali_l2_cache_core *cache,
+		_mali_osk_resource_t *resource_mmu,
+		_mali_osk_resource_t *resource_gp,
+		_mali_osk_resource_t *resource_pp,
+		u32 domain_index)
+{
+	struct mali_mmu_core *mmu;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(3, ("Starting new group for MMU %s\n", resource_mmu->description));
+
+	/* Create the group object */
+	group = mali_group_create(cache, NULL, NULL, domain_index);
+	if (NULL == group) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU %s\n", resource_mmu->description));
+		return NULL;
+	}
+
+	/* Create the MMU object inside group */
+	mmu = mali_mmu_create(resource_mmu, group, MALI_FALSE);
+	if (NULL == mmu) {
+		MALI_PRINT_ERROR(("Failed to create MMU object\n"));
+		mali_group_delete(group);
+		return NULL;
+	}
+
+	if (NULL != resource_gp) {
+		/* Create the GP core object inside this group */
+		struct mali_gp_core *gp_core = mali_gp_create(resource_gp, group);
+		if (NULL == gp_core) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create GP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	if (NULL != resource_pp) {
+		struct mali_pp_core *pp_core;
+
+		/* Create the PP core object inside this group */
+		pp_core = mali_pp_create(resource_pp, group, MALI_FALSE, mali_get_bcast_id(resource_pp));
+		if (NULL == pp_core) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create PP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	return group;
+}
+
+static _mali_osk_errcode_t mali_create_virtual_group(_mali_osk_resource_t *resource_mmu_pp_bcast,
+		_mali_osk_resource_t *resource_pp_bcast,
+		_mali_osk_resource_t *resource_dlbu,
+		_mali_osk_resource_t *resource_bcast)
+{
+	struct mali_mmu_core *mmu_pp_bcast_core;
+	struct mali_pp_core *pp_bcast_core;
+	struct mali_dlbu_core *dlbu_core;
+	struct mali_bcast_unit *bcast_core;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(2, ("Starting new virtual group for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+
+	/* Create the DLBU core object */
+	dlbu_core = mali_dlbu_create(resource_dlbu);
+	if (NULL == dlbu_core) {
+		MALI_PRINT_ERROR(("Failed to create DLBU object \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the Broadcast unit core */
+	bcast_core = mali_bcast_unit_create(resource_bcast);
+	if (NULL == bcast_core) {
+		MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the group object */
+#if defined(DEBUG)
+	/* Get a physical PP group to temporarily add to broadcast unit.  IRQ
+	 * verification needs a physical group in the broadcast unit to test
+	 * the broadcast unit interrupt line. */
+	{
+		struct mali_group *phys_group = NULL;
+		int i;
+		for (i = 0; i < mali_group_get_glob_num_groups(); i++) {
+			phys_group = mali_group_get_glob_group(i);
+			if (NULL != mali_group_get_pp_core(phys_group)) break;
+		}
+		MALI_DEBUG_ASSERT(NULL != mali_group_get_pp_core(phys_group));
+
+		/* Add the group temporarily to the broadcast, and update the
+		 * broadcast HW. Since the HW is not updated when removing the
+		 * group the IRQ check will work when the virtual PP is created
+		 * later.
+		 *
+		 * When the virtual group gets populated, the actually used
+		 * groups will be added to the broadcast unit and the HW will
+		 * be updated.
+		 */
+		mali_bcast_add_group(bcast_core, phys_group);
+		mali_bcast_reset(bcast_core);
+		mali_bcast_remove_group(bcast_core, phys_group);
+	}
+#endif /* DEBUG */
+	group = mali_group_create(NULL, dlbu_core, bcast_core, MALI_DOMAIN_INDEX_DUMMY);
+	if (NULL == group) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+		mali_bcast_unit_delete(bcast_core);
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the MMU object inside group */
+	mmu_pp_bcast_core = mali_mmu_create(resource_mmu_pp_bcast, group, MALI_TRUE);
+	if (NULL == mmu_pp_bcast_core) {
+		MALI_PRINT_ERROR(("Failed to create MMU PP broadcast object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the PP core object inside this group */
+	pp_bcast_core = mali_pp_create(resource_pp_bcast, group, MALI_TRUE, 0);
+	if (NULL == pp_bcast_core) {
+		/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+		MALI_PRINT_ERROR(("Failed to create PP object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_groups(void)
+{
+	struct mali_group *group;
+	int cluster_id_gp = 0;
+	int cluster_id_pp_grp0 = 0;
+	int cluster_id_pp_grp1 = 0;
+	int i;
+
+	_mali_osk_resource_t resource_gp;
+	_mali_osk_resource_t resource_gp_mmu;
+	_mali_osk_resource_t resource_pp[8];
+	_mali_osk_resource_t resource_pp_mmu[8];
+	_mali_osk_resource_t resource_pp_mmu_bcast;
+	_mali_osk_resource_t resource_pp_bcast;
+	_mali_osk_resource_t resource_dlbu;
+	_mali_osk_resource_t resource_bcast;
+	_mali_osk_errcode_t resource_gp_found;
+	_mali_osk_errcode_t resource_gp_mmu_found;
+	_mali_osk_errcode_t resource_pp_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_bcast_found;
+	_mali_osk_errcode_t resource_pp_bcast_found;
+	_mali_osk_errcode_t resource_dlbu_found;
+	_mali_osk_errcode_t resource_bcast_found;
+
+	if (!(mali_is_mali400() || mali_is_mali450() || mali_is_mali470())) {
+		/* No known HW core */
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (MALI_MAX_JOB_RUNTIME_DEFAULT == mali_max_job_runtime) {
+		/* Group settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+			/* Use device specific settings (if defined) */
+			if (0 != data.max_job_runtime) {
+				mali_max_job_runtime = data.max_job_runtime;
+			}
+		}
+	}
+
+	if (mali_is_mali450()) {
+		/* Mali-450 have separate L2s for GP, and PP core group(s) */
+		cluster_id_pp_grp0 = 1;
+		cluster_id_pp_grp1 = 2;
+	}
+
+	resource_gp_found = _mali_osk_resource_find(MALI_OFFSET_GP, &resource_gp);
+	resource_gp_mmu_found = _mali_osk_resource_find(MALI_OFFSET_GP_MMU, &resource_gp_mmu);
+	resource_pp_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0, &(resource_pp[0]));
+	resource_pp_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1, &(resource_pp[1]));
+	resource_pp_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2, &(resource_pp[2]));
+	resource_pp_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3, &(resource_pp[3]));
+	resource_pp_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4, &(resource_pp[4]));
+	resource_pp_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5, &(resource_pp[5]));
+	resource_pp_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6, &(resource_pp[6]));
+	resource_pp_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7, &(resource_pp[7]));
+	resource_pp_mmu_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0_MMU, &(resource_pp_mmu[0]));
+	resource_pp_mmu_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1_MMU, &(resource_pp_mmu[1]));
+	resource_pp_mmu_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2_MMU, &(resource_pp_mmu[2]));
+	resource_pp_mmu_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3_MMU, &(resource_pp_mmu[3]));
+	resource_pp_mmu_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4_MMU, &(resource_pp_mmu[4]));
+	resource_pp_mmu_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5_MMU, &(resource_pp_mmu[5]));
+	resource_pp_mmu_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6_MMU, &(resource_pp_mmu[6]));
+	resource_pp_mmu_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7_MMU, &(resource_pp_mmu[7]));
+
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		resource_bcast_found = _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast);
+		resource_dlbu_found = _mali_osk_resource_find(MALI_OFFSET_DLBU, &resource_dlbu);
+		resource_pp_mmu_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST_MMU, &resource_pp_mmu_bcast);
+		resource_pp_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST, &resource_pp_bcast);
+
+		if (_MALI_OSK_ERR_OK != resource_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_dlbu_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_mmu_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_bcast_found) {
+			/* Missing mandatory core(s) for Mali-450 or Mali-470 */
+			MALI_DEBUG_PRINT(2, ("Missing mandatory resources, Mali-450 needs DLBU, Broadcast unit, virtual PP core and virtual MMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK != resource_gp_found ||
+	    _MALI_OSK_ERR_OK != resource_gp_mmu_found ||
+	    _MALI_OSK_ERR_OK != resource_pp_found[0] ||
+	    _MALI_OSK_ERR_OK != resource_pp_mmu_found[0]) {
+		/* Missing mandatory core(s) */
+		MALI_DEBUG_PRINT(2, ("Missing mandatory resource, need at least one GP and one PP, both with a separate MMU\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(1 <= mali_l2_cache_core_get_glob_num_l2_cores());
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_gp), &resource_gp_mmu, &resource_gp, NULL, MALI_DOMAIN_INDEX_GP);
+	if (NULL == group) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create group for first (and mandatory) PP core */
+	MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= (cluster_id_pp_grp0 + 1)); /* >= 1 on Mali-300 and Mali-400, >= 2 on Mali-450 */
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[0], NULL, &resource_pp[0], MALI_DOMAIN_INDEX_PP0);
+	if (NULL == group) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_inited_pp_cores_group_1++;
+
+	/* Create groups for rest of the cores in the first PP core group */
+	for (i = 1; i < 4; i++) { /* First half of the PP cores belong to first core group */
+		if (mali_inited_pp_cores_group_1 < mali_max_pp_cores_group_1) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (NULL == group) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_1++;
+			}
+		}
+	}
+
+	/* Create groups for cores in the second PP core group */
+	for (i = 4; i < 8; i++) { /* Second half of the PP cores belong to second core group */
+		if (mali_inited_pp_cores_group_2 < mali_max_pp_cores_group_2) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= 2); /* Only Mali-450 have a second core group */
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp1), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (NULL == group) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_2++;
+			}
+		}
+	}
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		_mali_osk_errcode_t err = mali_create_virtual_group(&resource_pp_mmu_bcast, &resource_pp_bcast, &resource_dlbu, &resource_bcast);
+		if (_MALI_OSK_ERR_OK != err) {
+			return err;
+		}
+	}
+
+	mali_max_pp_cores_group_1 = mali_inited_pp_cores_group_1;
+	mali_max_pp_cores_group_2 = mali_inited_pp_cores_group_2;
+	MALI_DEBUG_PRINT(2, ("%d+%d PP cores initialized\n", mali_inited_pp_cores_group_1, mali_inited_pp_cores_group_2));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_check_shared_interrupts(void)
+{
+#if !defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_TRUE == _mali_osk_shared_interrupts()) {
+		MALI_PRINT_ERROR(("Shared interrupts detected, but driver support is not enabled\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* !defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	/* It is OK to compile support for shared interrupts even if Mali is not using it. */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_pmu(void)
+{
+	_mali_osk_resource_t resource_pmu;
+
+	MALI_DEBUG_ASSERT(0 != global_gpu_base_address);
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PMU, &resource_pmu)) {
+		struct mali_pmu_core *pmu;
+
+		pmu = mali_pmu_create(&resource_pmu);
+		if (NULL == pmu) {
+			MALI_PRINT_ERROR(("Failed to create PMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	/* It's ok if the PMU doesn't exist */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_memory(void)
+{
+	_mali_osk_device_data data = { 0, };
+	_mali_osk_errcode_t ret;
+
+	/* The priority of setting the value of mali_shared_mem_size,
+	 * mali_dedicated_mem_start and mali_dedicated_mem_size:
+	 * 1. module parameter;
+	 * 2. platform data;
+	 * 3. default value;
+	 **/
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Memory settings are not overridden by module parameters, so use device settings */
+		if (0 == mali_dedicated_mem_start && 0 == mali_dedicated_mem_size) {
+			/* Use device specific settings (if defined) */
+			mali_dedicated_mem_start = data.dedicated_mem_start;
+			mali_dedicated_mem_size = data.dedicated_mem_size;
+		}
+
+		if (MALI_SHARED_MEMORY_DEFAULT_SIZE == mali_shared_mem_size &&
+		    0 != data.shared_mem_size) {
+			mali_shared_mem_size = data.shared_mem_size;
+		}
+	}
+
+	if (0 < mali_dedicated_mem_size && 0 != mali_dedicated_mem_start) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (dedicated: 0x%08X@0x%08X)\n",
+				     mali_dedicated_mem_size, mali_dedicated_mem_start));
+
+		/* Dedicated memory */
+		ret = mali_memory_core_resource_dedicated_memory(mali_dedicated_mem_start, mali_dedicated_mem_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register dedicated memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 < mali_shared_mem_size) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (shared: 0x%08X)\n", mali_shared_mem_size));
+
+		/* Shared OS memory */
+		ret = mali_memory_core_resource_os_memory(mali_shared_mem_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register shared OS memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 == mali_fb_start && 0 == mali_fb_size) {
+		/* Frame buffer settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+			/* Use device specific settings (if defined) */
+			mali_fb_start = data.fb_start;
+			mali_fb_size = data.fb_size;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Using device defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Using module defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	}
+
+	if (0 != mali_fb_size) {
+		/* Register frame buffer */
+		ret = mali_mem_validation_add_range(mali_fb_start, mali_fb_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register frame buffer memory region\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_detect_gpu_class(void)
+{
+	if (_mali_osk_identify_gpu_resource() == 0x450)
+		mali_gpu_class_is_mali450 = MALI_TRUE;
+
+	if (_mali_osk_identify_gpu_resource() == 0x470)
+		mali_gpu_class_is_mali470 = MALI_TRUE;
+}
+
+static _mali_osk_errcode_t mali_init_hw_reset(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	_mali_osk_resource_t resource_bcast;
+
+	/* Ensure broadcast unit is in a good state before we start creating
+	 * groups and cores.
+	 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast)) {
+		struct mali_bcast_unit *bcast_core;
+
+		bcast_core = mali_bcast_unit_create(&resource_bcast);
+		if (NULL == bcast_core) {
+			MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_bcast_unit_delete(bcast_core);
+	}
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_initialize_subsystems(void)
+{
+	_mali_osk_errcode_t err;
+
+#ifdef CONFIG_MALI_DT
+	err = _mali_osk_resource_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	mali_pp_job_initialize();
+
+	mali_timeline_initialize();
+
+	err = mali_session_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/*Try to init gpu secure mode */
+	_mali_osk_gpu_secure_mode_init();
+
+#if defined(CONFIG_MALI400_PROFILING)
+	err = _mali_osk_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (_MALI_OSK_ERR_OK != err) {
+		/* No biggie if we weren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	err = mali_memory_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_executor_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_scheduler_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Configure memory early, needed by mali_mmu_initialize. */
+	err = mali_parse_config_memory();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_set_global_gpu_base_address();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect GPU class (uses L2 cache count) */
+	mali_detect_gpu_class();
+
+	err = mali_check_shared_interrupts();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the MALI PMU (will not touch HW!) */
+	err = mali_parse_config_pmu();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the power management module */
+	err = mali_pm_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Make sure the entire GPU stays on for the rest of this function */
+	mali_pm_init_begin();
+
+	/* Ensure HW is in a good state before starting to access cores. */
+	err = mali_init_hw_reset();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect which Mali GPU we are dealing with */
+	err = mali_parse_product_info();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* The global_product_id is now populated with the correct Mali GPU */
+
+	/* Start configuring the actual Mali hardware. */
+
+	err = mali_mmu_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		err = mali_dlbu_initialize();
+		if (_MALI_OSK_ERR_OK != err) {
+			mali_pm_init_end();
+			mali_terminate_subsystems();
+			return err;
+		}
+	}
+
+	err = mali_parse_config_l2_cache();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_parse_config_groups();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Move groups into executor */
+	mali_executor_populate();
+
+	/* Need call after all group has assigned a domain */
+	mali_pm_power_cost_setup();
+
+	/* Initialize the GPU timer */
+	err = mali_control_timer_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the GPU utilization tracking */
+	err = mali_utilization_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	err = mali_dvfs_policy_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	/* Allowing the system to be turned off */
+	mali_pm_init_end();
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+void mali_terminate_subsystems(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(2, ("terminate_subsystems() called\n"));
+
+	mali_utilization_term();
+	mali_control_timer_term();
+
+	mali_executor_depopulate();
+	mali_delete_groups(); /* Delete groups not added to executor */
+	mali_executor_terminate();
+
+	mali_scheduler_terminate();
+	mali_pp_job_terminate();
+	mali_delete_l2_cache_cores();
+	mali_mmu_terminate();
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		mali_dlbu_terminate();
+	}
+
+	mali_pm_terminate();
+
+	if (NULL != pmu) {
+		mali_pmu_delete(pmu);
+	}
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_term();
+#endif
+
+	_mali_osk_gpu_secure_mode_deinit();
+
+	mali_memory_terminate();
+
+	mali_session_terminate();
+
+	mali_timeline_terminate();
+
+	global_gpu_base_address = 0;
+}
+
+_mali_product_id_t mali_kernel_core_get_product_id(void)
+{
+	return global_product_id;
+}
+
+u32 mali_kernel_core_get_gpu_major_version(void)
+{
+	return global_gpu_major_version;
+}
+
+u32 mali_kernel_core_get_gpu_minor_version(void)
+{
+	return global_gpu_minor_version;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (NULL == queue) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		args->type = _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS;
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* receive a notification, might sleep */
+	err = _mali_osk_notification_queue_receive(queue, &notification);
+	if (_MALI_OSK_ERR_OK != err) {
+		MALI_ERROR(err); /* errcode returned, pass on to caller */
+	}
+
+	/* copy the buffer to the user */
+	args->type = (_mali_uk_notification_type)notification->notification_type;
+	_mali_osk_memcpy(&args->data, notification->result_buffer, notification->result_buffer_size);
+
+	/* finished with the notification */
+	_mali_osk_notification_delete(notification);
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args)
+{
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (NULL == queue) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		return _MALI_OSK_ERR_OK;
+	}
+
+	notification = _mali_osk_notification_create(args->type, 0);
+	if (NULL == notification) {
+		MALI_PRINT_ERROR(("Failed to create notification object\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	_mali_osk_notification_queue_send(queue, notification);
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args)
+{
+	wait_queue_head_t *queue;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	queue = mali_session_get_wait_queue();
+
+	/* check pending big job number, might sleep if larger than MAX allowed number */
+	if (wait_event_interruptible(*queue, MALI_MAX_PENDING_BIG_JOB > mali_scheduler_job_gp_big_job_count())) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (!session->use_high_priority_job_queue) {
+		session->use_high_priority_job_queue = MALI_TRUE;
+		MALI_DEBUG_PRINT(2, ("Session 0x%08X with pid %d was granted higher priority.\n", session, _mali_osk_get_pid()));
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_open(void **context)
+{
+	u32 i;
+	struct mali_session_data *session;
+
+	/* allocated struct to track this session */
+	session = (struct mali_session_data *)_mali_osk_calloc(1, sizeof(struct mali_session_data));
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_NOMEM);
+
+	MALI_DEBUG_PRINT(3, ("Session starting\n"));
+
+	/* create a response queue for this session */
+	session->ioctl_queue = _mali_osk_notification_queue_init();
+	if (NULL == session->ioctl_queue) {
+		goto err;
+	}
+
+	session->page_directory = mali_mmu_pagedir_alloc();
+	if (NULL == session->page_directory) {
+		goto err_mmu;
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_mmu_pagedir_map(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE)) {
+		MALI_PRINT_ERROR(("Failed to map DLBU page into session\n"));
+		goto err_mmu;
+	}
+
+	if (0 != mali_dlbu_phys_addr) {
+		mali_mmu_pagedir_update(session->page_directory, MALI_DLBU_VIRT_ADDR, mali_dlbu_phys_addr,
+					_MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_memory_session_begin(session)) {
+		goto err_session;
+	}
+
+	/* Create soft system. */
+	session->soft_job_system = mali_soft_job_system_create(session);
+	if (NULL == session->soft_job_system) {
+		goto err_soft;
+	}
+
+	/* Create timeline system. */
+	session->timeline_system = mali_timeline_system_create(session);
+	if (NULL == session->timeline_system) {
+		goto err_time_line;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_init(&session->number_of_window_jobs, 0);
+#endif
+
+	session->use_high_priority_job_queue = MALI_FALSE;
+
+	/* Initialize list of PP jobs on this session. */
+	_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_list);
+
+	/* Initialize the pp_job_fb_lookup_list array used to quickly lookup jobs from a given frame builder */
+	for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i) {
+		_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_fb_lookup_list[i]);
+	}
+
+	session->pid = _mali_osk_get_pid();
+	session->comm = _mali_osk_get_comm();
+	session->max_mali_mem_allocated_size = 0;
+	for (i = 0; i < MALI_MEM_TYPE_MAX; i ++) {
+		atomic_set(&session->mali_mem_array[i], 0);
+	}
+	atomic_set(&session->mali_mem_allocated_pages, 0);
+	*context = (void *)session;
+
+	/* Add session to the list of all sessions. */
+	mali_session_add(session);
+
+	MALI_DEBUG_PRINT(3, ("Session started\n"));
+	return _MALI_OSK_ERR_OK;
+
+err_time_line:
+	mali_soft_job_system_destroy(session->soft_job_system);
+err_soft:
+	mali_memory_session_end(session);
+err_session:
+	mali_mmu_pagedir_free(session->page_directory);
+err_mmu:
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+err:
+	_mali_osk_free(session);
+	MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+}
+
+#if defined(DEBUG)
+/* parameter used for debug */
+extern u32 num_pm_runtime_resume;
+extern u32 num_pm_updates;
+extern u32 num_pm_updates_up;
+extern u32 num_pm_updates_down;
+#endif
+
+_mali_osk_errcode_t _mali_ukk_close(void **context)
+{
+	struct mali_session_data *session;
+	MALI_CHECK_NON_NULL(context, _MALI_OSK_ERR_INVALID_ARGS);
+	session = (struct mali_session_data *)*context;
+
+	MALI_DEBUG_PRINT(3, ("Session ending\n"));
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+	MALI_DEBUG_ASSERT_POINTER(session->timeline_system);
+
+	/* Remove session from list of all sessions. */
+	mali_session_remove(session);
+
+	/* This flag is used to prevent queueing of jobs due to activation. */
+	session->is_aborting = MALI_TRUE;
+
+	/* Stop the soft job timer. */
+	mali_timeline_system_stop_timer(session->timeline_system);
+
+	/* Abort queued jobs */
+	mali_scheduler_abort_session(session);
+
+	/* Abort executing jobs */
+	mali_executor_abort_session(session);
+
+	/* Abort the soft job system. */
+	mali_soft_job_system_abort(session->soft_job_system);
+
+	/* Force execution of all pending bottom half processing for GP and PP. */
+	_mali_osk_wq_flush();
+
+	/* The session PP list should now be empty. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_list));
+
+	/* At this point the GP and PP scheduler no longer has any jobs queued or running from this
+	 * session, and all soft jobs in the soft job system has been destroyed. */
+
+	/* Any trackers left in the timeline system are directly or indirectly waiting on external
+	 * sync fences.  Cancel all sync fence waiters to trigger activation of all remaining
+	 * trackers.  This call will sleep until all timelines are empty. */
+	mali_timeline_system_abort(session->timeline_system);
+
+	/* Flush pending work.
+	 * Needed to make sure all bottom half processing related to this
+	 * session has been completed, before we free internal data structures.
+	 */
+	_mali_osk_wq_flush();
+
+	/* Destroy timeline system. */
+	mali_timeline_system_destroy(session->timeline_system);
+	session->timeline_system = NULL;
+
+	/* Destroy soft system. */
+	mali_soft_job_system_destroy(session->soft_job_system);
+	session->soft_job_system = NULL;
+
+	MALI_DEBUG_CODE({
+		/* Check that the pp_job_fb_lookup_list array is empty. */
+		u32 i;
+		for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i)
+		{
+			MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_fb_lookup_list[i]));
+		}
+	});
+
+	/* Free remaining memory allocated to this session */
+	mali_memory_session_end(session);
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_term(&session->number_of_window_jobs);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_stop_sampling(session->pid);
+#endif
+
+	/* Free session data structures */
+	mali_mmu_pagedir_unmap(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE);
+	mali_mmu_pagedir_free(session->page_directory);
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+	_mali_osk_free(session);
+
+	*context = NULL;
+
+	MALI_DEBUG_PRINT(3, ("Session has ended\n"));
+
+#if defined(DEBUG)
+	MALI_DEBUG_PRINT(3, ("Stats: # runtime resumes: %u\n", num_pm_runtime_resume));
+	MALI_DEBUG_PRINT(3, ("       # PM updates: .... %u (up %u, down %u)\n", num_pm_updates, num_pm_updates_up, num_pm_updates_down));
+
+	num_pm_runtime_resume = 0;
+	num_pm_updates = 0;
+	num_pm_updates_up = 0;
+	num_pm_updates_down = 0;
+#endif
+
+	return _MALI_OSK_ERR_OK;;
+}
+
+#if MALI_STATE_TRACKING
+u32 _mali_kernel_core_dump_state(char *buf, u32 size)
+{
+	int n = 0; /* Number of bytes written to buf */
+
+	n += mali_scheduler_dump_state(buf + n, size - n);
+	n += mali_executor_dump_state(buf + n, size - n);
+
+	return n;
+}
+#endif
diff --git a/utgard/r6p1/common/mali_kernel_core.h b/utgard/r6p1/common/mali_kernel_core.h
new file mode 100644
index 0000000..cf6af32
--- /dev/null
+++ b/utgard/r6p1/common/mali_kernel_core.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_CORE_H__
+#define __MALI_KERNEL_CORE_H__
+
+#include "mali_osk.h"
+
+typedef enum {
+	_MALI_PRODUCT_ID_UNKNOWN,
+	_MALI_PRODUCT_ID_MALI200,
+	_MALI_PRODUCT_ID_MALI300,
+	_MALI_PRODUCT_ID_MALI400,
+	_MALI_PRODUCT_ID_MALI450,
+	_MALI_PRODUCT_ID_MALI470,
+} _mali_product_id_t;
+
+extern mali_bool mali_gpu_class_is_mali450;
+extern mali_bool mali_gpu_class_is_mali470;
+
+_mali_osk_errcode_t mali_initialize_subsystems(void);
+
+void mali_terminate_subsystems(void);
+
+_mali_product_id_t mali_kernel_core_get_product_id(void);
+
+u32 mali_kernel_core_get_gpu_major_version(void);
+
+u32 mali_kernel_core_get_gpu_minor_version(void);
+
+u32 _mali_kernel_core_dump_state(char *buf, u32 size);
+
+MALI_STATIC_INLINE mali_bool mali_is_mali470(void)
+{
+	return mali_gpu_class_is_mali470;
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali450(void)
+{
+	return mali_gpu_class_is_mali450;
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali400(void)
+{
+	if (mali_gpu_class_is_mali450 || mali_gpu_class_is_mali470)
+		return MALI_FALSE;
+
+	return MALI_TRUE;
+}
+#endif /* __MALI_KERNEL_CORE_H__ */
diff --git a/utgard/r6p1/common/mali_kernel_utilization.c b/utgard/r6p1/common/mali_kernel_utilization.c
new file mode 100755
index 0000000..ca7ebea
--- /dev/null
+++ b/utgard/r6p1/common/mali_kernel_utilization.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_scheduler.h"
+
+#include "mali_executor.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+/* Thresholds for GP bound detection. */
+#define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
+#define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
+
+static _mali_osk_spinlock_irq_t *utilization_data_lock;
+
+static u32 num_running_gp_cores = 0;
+static u32 num_running_pp_cores = 0;
+
+static u64 work_start_time_gpu = 0;
+static u64 work_start_time_gp = 0;
+static u64 work_start_time_pp = 0;
+static u64 accumulated_work_time_gpu = 0;
+static u64 accumulated_work_time_gp = 0;
+static u64 accumulated_work_time_pp = 0;
+
+static u32 last_utilization_gpu = 0 ;
+static u32 last_utilization_gp = 0 ;
+static u32 last_utilization_pp = 0 ;
+
+void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
+
+/* Define the first timer control timer timeout in milliseconds */
+static u32 mali_control_first_timeout = 100;
+static struct mali_gpu_utilization_data mali_util_data = {0, };
+
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer)
+{
+	u64 time_now;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 work_normalized_gpu;
+	u32 work_normalized_gp;
+	u32 work_normalized_pp;
+	u32 period_normalized;
+	u32 utilization_gpu;
+	u32 utilization_gp;
+	u32 utilization_pp;
+
+	mali_utilization_data_lock();
+
+	time_now = _mali_osk_time_get_ns();
+
+	*time_period = time_now - *start_time;
+
+	if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
+		mali_control_timer_pause();
+		/*
+		 * No work done for this period
+		 * - No need to reschedule timer
+		 * - Report zero usage
+		 */
+		last_utilization_gpu = 0;
+		last_utilization_gp = 0;
+		last_utilization_pp = 0;
+
+		mali_util_data.utilization_gpu = last_utilization_gpu;
+		mali_util_data.utilization_gp = last_utilization_gp;
+		mali_util_data.utilization_pp = last_utilization_pp;
+
+		mali_utilization_data_unlock();
+
+		*need_add_timer = MALI_FALSE;
+
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+
+		MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
+		MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
+		MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
+
+		return &mali_util_data;
+	}
+
+	/* If we are currently busy, update working period up to now */
+	if (work_start_time_gpu != 0) {
+		accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+		work_start_time_gpu = time_now;
+
+		/* GP and/or PP will also be busy if the GPU is busy at this point */
+
+		if (work_start_time_gp != 0) {
+			accumulated_work_time_gp += (time_now - work_start_time_gp);
+			work_start_time_gp = time_now;
+		}
+
+		if (work_start_time_pp != 0) {
+			accumulated_work_time_pp += (time_now - work_start_time_pp);
+			work_start_time_pp = time_now;
+		}
+	}
+
+	/*
+	 * We have two 64-bit values, a dividend and a divisor.
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 * We shift the dividend up and possibly the divisor down, making the result X in 256.
+	 */
+
+	/* Shift the 64-bit values down so they fit inside a 32-bit integer */
+	leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32));
+	shift_val = 32 - leading_zeroes;
+	work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
+	work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
+	work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
+	period_normalized = (u32)(*time_period >> shift_val);
+
+	/*
+	 * Now, we should report the usage in parts of 256
+	 * this means we must shift up the dividend or down the divisor by 8
+	 * (we could do a combination, but we just use one for simplicity,
+	 * but the end result should be good enough anyway)
+	 */
+	if (period_normalized > 0x00FFFFFF) {
+		/* The divisor is so big that it is safe to shift it down */
+		period_normalized >>= 8;
+	} else {
+		/*
+		 * The divisor is so small that we can shift up the dividend, without loosing any data.
+		 * (dividend is always smaller than the divisor)
+		 */
+		work_normalized_gpu <<= 8;
+		work_normalized_gp <<= 8;
+		work_normalized_pp <<= 8;
+	}
+
+	utilization_gpu = work_normalized_gpu / period_normalized;
+	utilization_gp = work_normalized_gp / period_normalized;
+	utilization_pp = work_normalized_pp / period_normalized;
+
+	last_utilization_gpu = utilization_gpu;
+	last_utilization_gp = utilization_gp;
+	last_utilization_pp = utilization_pp;
+
+	if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) &&
+	    (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) {
+		mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND);
+	} else {
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+	}
+
+	/* starting a new period */
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	*start_time = time_now;
+
+	mali_util_data.utilization_gp = last_utilization_gp;
+	mali_util_data.utilization_gpu = last_utilization_gpu;
+	mali_util_data.utilization_pp = last_utilization_pp;
+
+	mali_utilization_data_unlock();
+
+	*need_add_timer = MALI_TRUE;
+
+	MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
+	MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
+	MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
+
+	return &mali_util_data;
+}
+
+_mali_osk_errcode_t mali_utilization_init(void)
+{
+#if USING_GPU_UTILIZATION
+	_mali_osk_device_data data;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if (NULL != data.utilization_callback) {
+			mali_utilization_callback = data.utilization_callback;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed \n"));
+		}
+	}
+#endif /* defined(USING_GPU_UTILIZATION) */
+
+	if (NULL == mali_utilization_callback) {
+		MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n"));
+	}
+
+	utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
+	if (NULL == utilization_data_lock) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	num_running_gp_cores = 0;
+	num_running_pp_cores = 0;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_utilization_term(void)
+{
+	if (NULL != utilization_data_lock) {
+		_mali_osk_spinlock_irq_term(utilization_data_lock);
+	}
+}
+
+void mali_utilization_gp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_gp_cores;
+	if (1 == num_running_gp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First GP core started, consider GP busy from now and onwards */
+		work_start_time_gp = time_now;
+
+		if (0 == num_running_pp_cores) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			is_resume  = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+				/* Do some policy in new period for performance consideration */
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (0 == last_utilization_gpu) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_pp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_pp_cores;
+	if (1 == num_running_pp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First PP core started, consider PP busy from now and onwards */
+		work_start_time_pp = time_now;
+
+		if (0 == num_running_gp_cores) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			/* Start a new period if stoped */
+			is_resume = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (0 == last_utilization_gpu) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_gp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_gp_cores;
+	if (0 == num_running_gp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last GP core ended, consider GP idle from now and onwards */
+		accumulated_work_time_gp += (time_now - work_start_time_gp);
+		work_start_time_gp = 0;
+
+		if (0 == num_running_pp_cores) {
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+void mali_utilization_pp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_pp_cores;
+	if (0 == num_running_pp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last PP core ended, consider PP idle from now and onwards */
+		accumulated_work_time_pp += (time_now - work_start_time_pp);
+		work_start_time_pp = 0;
+
+		if (0 == num_running_gp_cores) {
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+mali_bool mali_utilization_enabled(void)
+{
+#if defined(CONFIG_MALI_DVFS)
+	return mali_dvfs_policy_enabled();
+#else
+	return (NULL != mali_utilization_callback);
+#endif /* defined(CONFIG_MALI_DVFS) */
+}
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data)
+{
+	MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback);
+
+	mali_utilization_callback(util_data);
+}
+
+void mali_utilization_reset(void)
+{
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	last_utilization_gpu = 0;
+	last_utilization_gp = 0;
+	last_utilization_pp = 0;
+}
+
+void mali_utilization_data_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(utilization_data_lock);
+}
+
+void mali_utilization_data_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(utilization_data_lock);
+}
+
+void mali_utilization_data_assert_locked(void)
+{
+	MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock);
+}
+
+u32 _mali_ukk_utilization_gp_pp(void)
+{
+	return last_utilization_gpu;
+}
+
+u32 _mali_ukk_utilization_gp(void)
+{
+	return last_utilization_gp;
+}
+
+u32 _mali_ukk_utilization_pp(void)
+{
+	return last_utilization_pp;
+}
diff --git a/utgard/r6p1/common/mali_kernel_utilization.h b/utgard/r6p1/common/mali_kernel_utilization.h
new file mode 100755
index 0000000..5206225
--- /dev/null
+++ b/utgard/r6p1/common/mali_kernel_utilization.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_UTILIZATION_H__
+#define __MALI_KERNEL_UTILIZATION_H__
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_osk.h"
+
+/**
+ * Initialize/start the Mali GPU utilization metrics reporting.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_utilization_init(void);
+
+/**
+ * Terminate the Mali GPU utilization metrics reporting
+ */
+void mali_utilization_term(void);
+
+/**
+ * Check if Mali utilization is enabled
+ */
+mali_bool mali_utilization_enabled(void);
+
+/**
+ * Should be called when a job is about to execute a GP job
+ */
+void mali_utilization_gp_start(void);
+
+/**
+ * Should be called when a job has completed executing a GP job
+ */
+void mali_utilization_gp_end(void);
+
+/**
+ * Should be called when a job is about to execute a PP job
+ */
+void mali_utilization_pp_start(void);
+
+/**
+ * Should be called when a job has completed executing a PP job
+ */
+void mali_utilization_pp_end(void);
+
+/**
+ * Should be called to calcution the GPU utilization
+ */
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer);
+
+_mali_osk_spinlock_irq_t *mali_utilization_get_lock(void);
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data);
+
+void mali_utilization_data_lock(void);
+
+void mali_utilization_data_unlock(void);
+
+void mali_utilization_data_assert_locked(void);
+
+void mali_utilization_reset(void);
+
+
+#endif /* __MALI_KERNEL_UTILIZATION_H__ */
diff --git a/utgard/r6p1/common/mali_kernel_vsync.c b/utgard/r6p1/common/mali_kernel_vsync.c
new file mode 100755
index 0000000..3b2b108
--- /dev/null
+++ b/utgard/r6p1/common/mali_kernel_vsync.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+#include "mali_osk_profiling.h"
+
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args)
+{
+	_mali_uk_vsync_event event = (_mali_uk_vsync_event)args->event;
+	MALI_IGNORE(event); /* event is not used for release code, and that is OK */
+
+	/*
+	 * Manually generate user space events in kernel space.
+	 * This saves user space from calling kernel space twice in this case.
+	 * We just need to remember to add pid and tid manually.
+	 */
+	if (event == _MALI_UK_VSYNC_EVENT_BEGIN_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+	if (event == _MALI_UK_VSYNC_EVENT_END_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("Received VSYNC event: %d\n", event));
+	MALI_SUCCESS;
+}
+
diff --git a/utgard/r6p1/common/mali_l2_cache.c b/utgard/r6p1/common/mali_l2_cache.c
new file mode 100755
index 0000000..60c3957
--- /dev/null
+++ b/utgard/r6p1/common/mali_l2_cache.c
@@ -0,0 +1,540 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_scheduler.h"
+#include "mali_pm.h"
+#include "mali_pm_domain.h"
+
+/**
+ * Size of the Mali L2 cache registers in bytes
+ */
+#define MALI400_L2_CACHE_REGISTERS_SIZE 0x30
+
+/**
+ * Mali L2 cache register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_l2_cache_register {
+	MALI400_L2_CACHE_REGISTER_SIZE         = 0x0004,
+	MALI400_L2_CACHE_REGISTER_STATUS       = 0x0008,
+	/*unused                               = 0x000C */
+	MALI400_L2_CACHE_REGISTER_COMMAND      = 0x0010,
+	MALI400_L2_CACHE_REGISTER_CLEAR_PAGE   = 0x0014,
+	MALI400_L2_CACHE_REGISTER_MAX_READS    = 0x0018,
+	MALI400_L2_CACHE_REGISTER_ENABLE       = 0x001C,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0 = 0x0020,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0 = 0x0024,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1 = 0x0028,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1 = 0x002C,
+} mali_l2_cache_register;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_command {
+	MALI400_L2_CACHE_COMMAND_CLEAR_ALL = 0x01,
+} mali_l2_cache_command;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_enable {
+	MALI400_L2_CACHE_ENABLE_DEFAULT = 0x0, /* Default */
+	MALI400_L2_CACHE_ENABLE_ACCESS = 0x01,
+	MALI400_L2_CACHE_ENABLE_READ_ALLOCATE = 0x02,
+} mali_l2_cache_enable;
+
+/**
+ * Mali L2 cache status bits
+ */
+typedef enum mali_l2_cache_status {
+	MALI400_L2_CACHE_STATUS_COMMAND_BUSY = 0x01,
+	MALI400_L2_CACHE_STATUS_DATA_BUSY    = 0x02,
+} mali_l2_cache_status;
+
+#define MALI400_L2_MAX_READS_NOT_SET -1
+
+static struct mali_l2_cache_core *
+	mali_global_l2s[MALI_MAX_NUMBER_OF_L2_CACHE_CORES] = { NULL, };
+static u32 mali_global_num_l2s = 0;
+
+int mali_l2_max_reads = MALI400_L2_MAX_READS_NOT_SET;
+
+
+/* Local helper functions */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache);
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val);
+
+static void mali_l2_cache_lock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_lock(cache->lock);
+}
+
+static void mali_l2_cache_unlock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_unlock(cache->lock);
+}
+
+/* Implementation of the L2 cache interface */
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *cache = NULL;
+#if defined(DEBUG)
+	u32 cache_size;
+#endif
+
+	MALI_DEBUG_PRINT(4, ("Mali L2 cache: Creating Mali L2 cache: %s\n",
+			     resource->description));
+
+	if (mali_global_num_l2s >= MALI_MAX_NUMBER_OF_L2_CACHE_CORES) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Too many L2 caches\n"));
+		return NULL;
+	}
+
+	cache = _mali_osk_malloc(sizeof(struct mali_l2_cache_core));
+	if (NULL == cache) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to allocate memory for L2 cache core\n"));
+		return NULL;
+	}
+
+	cache->core_id =  mali_global_num_l2s;
+	cache->counter_src0 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_src1 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_value0_base = 0;
+	cache->counter_value1_base = 0;
+	cache->pm_domain = NULL;
+	cache->power_is_on = MALI_FALSE;
+	cache->last_invalidated_id = 0;
+
+	if (_MALI_OSK_ERR_OK != mali_hw_core_create(&cache->hw_core,
+			resource, MALI400_L2_CACHE_REGISTERS_SIZE)) {
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	cache_size = mali_hw_core_register_read(&cache->hw_core,
+						MALI400_L2_CACHE_REGISTER_SIZE);
+	MALI_DEBUG_PRINT(2, ("Mali L2 cache: Created %s: % 3uK, %u-way, % 2ubyte cache line, % 3ubit external bus\n",
+			     resource->description,
+			     1 << (((cache_size >> 16) & 0xff) - 10),
+			     1 << ((cache_size >> 8) & 0xff),
+			     1 << (cache_size & 0xff),
+			     1 << ((cache_size >> 24) & 0xff)));
+#endif
+
+	cache->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_L2);
+	if (NULL == cache->lock) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to create counter lock for L2 cache core %s\n",
+				  cache->hw_core.description));
+		mali_hw_core_delete(&cache->hw_core);
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+	/* register with correct power domain */
+	cache->pm_domain = mali_pm_register_l2_cache(
+				   domain_index, cache);
+
+	mali_global_l2s[mali_global_num_l2s] = cache;
+	mali_global_num_l2s++;
+
+	return cache;
+}
+
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		if (mali_global_l2s[i] != cache) {
+			continue;
+		}
+
+		mali_global_l2s[i] = NULL;
+		mali_global_num_l2s--;
+
+		if (i == mali_global_num_l2s) {
+			/* Removed last element, nothing more to do */
+			break;
+		}
+
+		/*
+		 * We removed a l2 cache from the middle of the array,
+		 * so move the last l2 cache to current position
+		 */
+		mali_global_l2s[i] = mali_global_l2s[mali_global_num_l2s];
+		mali_global_l2s[mali_global_num_l2s] = NULL;
+
+		/* All good */
+		break;
+	}
+
+	_mali_osk_spinlock_irq_term(cache->lock);
+	mali_hw_core_delete(&cache->hw_core);
+	_mali_osk_free(cache);
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	mali_l2_cache_reset(cache);
+
+	if ((1 << MALI_DOMAIN_INDEX_DUMMY) != cache->pm_domain->pmu_mask)
+		MALI_DEBUG_ASSERT(MALI_FALSE == cache->power_is_on);
+	cache->power_is_on = MALI_TRUE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == cache->power_is_on);
+
+	/*
+	 * The HW counters will start from zero again when we resume,
+	 * but we should report counters as always increasing.
+	 * Take a copy of the HW values now in order to add this to
+	 * the values we report after being powered up.
+	 *
+	 * The physical power off of the L2 cache might be outside our
+	 * own control (e.g. runtime PM). That is why we must manually
+	 * set set the counter value to zero as well.
+	 */
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value0_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0, 0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value1_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1, 0);
+	}
+
+
+	cache->power_is_on = MALI_FALSE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter)
+{
+	u32 reg_offset_src;
+	u32 reg_offset_val;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(source_id >= 0 && source_id <= 1);
+
+	mali_l2_cache_lock(cache);
+
+	if (0 == source_id) {
+		/* start counting from 0 */
+		cache->counter_value0_base = 0;
+		cache->counter_src0 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0;
+	} else {
+		/* start counting from 0 */
+		cache->counter_value1_base = 0;
+		cache->counter_src1 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1;
+	}
+
+	if (cache->power_is_on) {
+		u32 hw_src;
+
+		if (MALI_HW_CORE_NO_COUNTER != counter) {
+			hw_src = counter;
+		} else {
+			hw_src = 0; /* disable value for HW */
+		}
+
+		/* Set counter src */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_src, hw_src);
+
+		/* Make sure the HW starts counting from 0 again */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_val, 0);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(NULL != src0);
+	MALI_DEBUG_ASSERT(NULL != value0);
+	MALI_DEBUG_ASSERT(NULL != src1);
+	MALI_DEBUG_ASSERT(NULL != value1);
+
+	mali_l2_cache_lock(cache);
+
+	*src0 = cache->counter_src0;
+	*src1 = cache->counter_src1;
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		if (MALI_TRUE == cache->power_is_on) {
+			*value0 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		} else {
+			*value0 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value0 += cache->counter_value0_base;
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		if (MALI_TRUE == cache->power_is_on) {
+			*value1 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		} else {
+			*value1 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value1 += cache->counter_value1_base;
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index)
+{
+	if (mali_global_num_l2s > index) {
+		return mali_global_l2s[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void)
+{
+	return mali_global_num_l2s;
+}
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (NULL == cache) {
+		return;
+	}
+
+	mali_l2_cache_lock(cache);
+
+	cache->last_invalidated_id = mali_scheduler_get_new_cache_order();
+	mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+				   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (NULL == cache) {
+		return;
+	}
+
+	/*
+	 * If the last cache invalidation was done by a job with a higher id we
+	 * don't have to flush. Since user space will store jobs w/ their
+	 * corresponding memory in sequence (first job #0, then job #1, ...),
+	 * we don't have to flush for job n-1 if job n has already invalidated
+	 * the cache since we know for sure that job n-1's memory was already
+	 * written when job n was started.
+	 */
+
+	mali_l2_cache_lock(cache);
+
+	if (((s32)id) > ((s32)cache->last_invalidated_id)) {
+		/* Set latest invalidated id to current "point in time" */
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+		mali_l2_cache_send_command(cache,
+					   MALI400_L2_CACHE_REGISTER_COMMAND,
+					   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_all(void)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		_mali_osk_errcode_t ret;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (MALI_TRUE != cache->power_is_on) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+
+		ret = mali_l2_cache_send_command(cache,
+						 MALI400_L2_CACHE_REGISTER_COMMAND,
+						 MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to invalidate cache\n"));
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		u32 j;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (MALI_TRUE != cache->power_is_on) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		for (j = 0; j < num_pages; j++) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_l2_cache_send_command(cache,
+							 MALI400_L2_CACHE_REGISTER_CLEAR_PAGE,
+							 pages[j]);
+			if (_MALI_OSK_ERR_OK != ret) {
+				MALI_PRINT_ERROR(("Failed to invalidate cache (page)\n"));
+			}
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+/* -------- local helper functions below -------- */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache)
+{
+    MALI_DEBUG_ASSERT_POINTER(cache);
+    MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+    /* Kasin Added, skip off power domain. */
+    if (cache && cache->pm_domain && cache->pm_domain->power_is_on == MALI_FALSE) {
+        printk("===========%s, %d skip off power domain?\n", __FUNCTION__, __LINE__);
+    }
+
+
+    /* Invalidate cache (just to keep it in a known state at startup) */
+    mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+            MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+    /* Enable cache */
+    mali_hw_core_register_write(&cache->hw_core,
+            MALI400_L2_CACHE_REGISTER_ENABLE,
+            (u32)MALI400_L2_CACHE_ENABLE_ACCESS |
+            (u32)MALI400_L2_CACHE_ENABLE_READ_ALLOCATE);
+
+	if (MALI400_L2_MAX_READS_NOT_SET != mali_l2_max_reads) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_MAX_READS,
+					    (u32)mali_l2_max_reads);
+	}
+
+	/* Restart any performance counters (if enabled) */
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0,
+					    cache->counter_src0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1,
+					    cache->counter_src1);
+	}
+}
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val)
+{
+	int i = 0;
+	const int loop_count = 100000;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+	/*
+	 * First, wait for L2 cache command handler to go idle.
+	 * (Commands received while processing another command will be ignored)
+	 */
+	for (i = 0; i < loop_count; i++) {
+		if (!(mali_hw_core_register_read(&cache->hw_core,
+						 MALI400_L2_CACHE_REGISTER_STATUS) &
+		      (u32)MALI400_L2_CACHE_STATUS_COMMAND_BUSY)) {
+			break;
+		}
+	}
+
+	if (i == loop_count) {
+		MALI_DEBUG_PRINT(1, ("Mali L2 cache: aborting wait for command interface to go idle\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* then issue the command */
+	mali_hw_core_register_write(&cache->hw_core, reg, val);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p1/common/mali_l2_cache.h b/utgard/r6p1/common/mali_l2_cache.h
new file mode 100755
index 0000000..de92ad6
--- /dev/null
+++ b/utgard/r6p1/common/mali_l2_cache.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_L2_CACHE_H__
+#define __MALI_KERNEL_L2_CACHE_H__
+
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+#define MALI_MAX_NUMBER_OF_L2_CACHE_CORES  3
+/* Maximum 1 GP and 4 PP for an L2 cache core (Mali-400 MP4) */
+#define MALI_MAX_NUMBER_OF_GROUPS_PER_L2_CACHE 5
+
+/**
+ * Definition of the L2 cache core struct
+ * Used to track a L2 cache unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_l2_cache_core {
+	/* Common HW core functionality */
+	struct mali_hw_core hw_core;
+
+	/* Synchronize L2 cache access */
+	_mali_osk_spinlock_irq_t *lock;
+
+	/* Unique core ID */
+	u32 core_id;
+
+	/* The power domain this L2 cache belongs to */
+	struct mali_pm_domain *pm_domain;
+
+	/* MALI_TRUE if power is on for this L2 cache */
+	mali_bool power_is_on;
+
+	/* A "timestamp" to avoid unnecessary flushes */
+	u32 last_invalidated_id;
+
+	/* Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src0;
+
+	/* Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src1;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value0_base;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value1_base;
+
+	/* Used by PM domains to link L2 caches of same domain */
+	_mali_osk_list_t pm_domain_list;
+};
+
+_mali_osk_errcode_t mali_l2_cache_initialize(void);
+void mali_l2_cache_terminate(void);
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index);
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_get_id(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->core_id;
+}
+
+MALI_STATIC_INLINE struct mali_pm_domain *mali_l2_cache_get_pm_domain(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->pm_domain;
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache);
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache);
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src0(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src1(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src1;
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1);
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index);
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void);
+
+struct mali_group *mali_l2_cache_get_group(
+	struct mali_l2_cache_core *cache, u32 index);
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache);
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id);
+
+void mali_l2_cache_invalidate_all(void);
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages);
+
+#endif /* __MALI_KERNEL_L2_CACHE_H__ */
diff --git a/utgard/r6p1/common/mali_mem_validation.c b/utgard/r6p1/common/mali_mem_validation.c
new file mode 100644
index 0000000..dddfb58
--- /dev/null
+++ b/utgard/r6p1/common/mali_mem_validation.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_mem_validation.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#define MALI_INVALID_MEM_ADDR 0xFFFFFFFF
+
+typedef struct {
+	u32 phys_base;        /**< Mali physical base of the memory, page aligned */
+	u32 size;             /**< size in bytes of the memory, multiple of page size */
+} _mali_mem_validation_t;
+
+static _mali_mem_validation_t mali_mem_validator = { MALI_INVALID_MEM_ADDR, MALI_INVALID_MEM_ADDR };
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size)
+{
+	/* Check that no other MEM_VALIDATION resources exist */
+	if (MALI_INVALID_MEM_ADDR != mali_mem_validator.phys_base) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; another range is already specified\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Check restrictions on page alignment */
+	if ((0 != (start & (~_MALI_OSK_CPU_PAGE_MASK))) ||
+	    (0 != (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; incorrect alignment\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_mem_validator.phys_base = start;
+	mali_mem_validator.size = size;
+	MALI_DEBUG_PRINT(2, ("Memory Validator installed for Mali physical address base=0x%08X, size=0x%08X\n",
+			     mali_mem_validator.phys_base, mali_mem_validator.size));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size)
+{
+#if 0
+	if (phys_addr < (phys_addr + size)) { /* Don't allow overflow (or zero size) */
+		if ((0 == (phys_addr & (~_MALI_OSK_CPU_PAGE_MASK))) &&
+		    (0 == (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+			if ((phys_addr          >= mali_mem_validator.phys_base) &&
+			    ((phys_addr + (size - 1)) >= mali_mem_validator.phys_base) &&
+			    (phys_addr          <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1))) &&
+			    ((phys_addr + (size - 1)) <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1)))) {
+				MALI_DEBUG_PRINT(3, ("Accepted range 0x%08X + size 0x%08X (= 0x%08X)\n", phys_addr, size, (phys_addr + size - 1)));
+				return _MALI_OSK_ERR_OK;
+			}
+		}
+	}
+
+	MALI_PRINT_ERROR(("MALI PHYSICAL RANGE VALIDATION ERROR: The range supplied was: phys_base=0x%08X, size=0x%08X\n", phys_addr, size));
+
+	return _MALI_OSK_ERR_FAULT;
+#else
+	return _MALI_OSK_ERR_OK;
+#endif
+}
diff --git a/utgard/r6p1/common/mali_mem_validation.h b/utgard/r6p1/common/mali_mem_validation.h
new file mode 100644
index 0000000..f7e6cb1
--- /dev/null
+++ b/utgard/r6p1/common/mali_mem_validation.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2011-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEM_VALIDATION_H__
+#define __MALI_MEM_VALIDATION_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size);
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size);
+
+#endif /* __MALI_MEM_VALIDATION_H__ */
diff --git a/utgard/r6p1/common/mali_mmu.c b/utgard/r6p1/common/mali_mmu.c
new file mode 100755
index 0000000..c08ce1f
--- /dev/null
+++ b/utgard/r6p1/common/mali_mmu.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_ukk.h"
+
+#include "mali_mmu.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_mmu_page_directory.h"
+
+/**
+ * Size of the MMU registers in bytes
+ */
+#define MALI_MMU_REGISTERS_SIZE 0x24
+
+/**
+ * MMU commands
+ * These are the commands that can be sent
+ * to the MMU unit.
+ */
+typedef enum mali_mmu_command {
+	MALI_MMU_COMMAND_ENABLE_PAGING = 0x00, /**< Enable paging (memory translation) */
+	MALI_MMU_COMMAND_DISABLE_PAGING = 0x01, /**< Disable paging (memory translation) */
+	MALI_MMU_COMMAND_ENABLE_STALL = 0x02, /**<  Enable stall on page fault */
+	MALI_MMU_COMMAND_DISABLE_STALL = 0x03, /**< Disable stall on page fault */
+	MALI_MMU_COMMAND_ZAP_CACHE = 0x04, /**< Zap the entire page table cache */
+	MALI_MMU_COMMAND_PAGE_FAULT_DONE = 0x05, /**< Page fault processed */
+	MALI_MMU_COMMAND_HARD_RESET = 0x06 /**< Reset the MMU back to power-on settings */
+} mali_mmu_command;
+
+static void mali_mmu_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data);
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu);
+
+/* page fault queue flush helper pages
+ * note that the mapping pointers are currently unused outside of the initialization functions */
+static mali_dma_addr mali_page_fault_flush_page_directory = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_directory_mapping = NULL;
+static mali_dma_addr mali_page_fault_flush_page_table = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_table_mapping = NULL;
+static mali_dma_addr mali_page_fault_flush_data_page = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_data_page_mapping = NULL;
+
+/* an empty page directory (no address valid) which is active on any MMU not currently marked as in use */
+static mali_dma_addr mali_empty_page_directory_phys   = MALI_INVALID_PAGE;
+static mali_io_address mali_empty_page_directory_virt = NULL;
+
+
+_mali_osk_errcode_t mali_mmu_initialize(void)
+{
+	/* allocate the helper pages */
+	mali_empty_page_directory_phys = mali_allocate_empty_page(&mali_empty_page_directory_virt);
+	if (0 == mali_empty_page_directory_phys) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate empty page directory.\n"));
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_create_fault_flush_pages(&mali_page_fault_flush_page_directory,
+			&mali_page_fault_flush_page_directory_mapping,
+			&mali_page_fault_flush_page_table,
+			&mali_page_fault_flush_page_table_mapping,
+			&mali_page_fault_flush_data_page,
+			&mali_page_fault_flush_data_page_mapping)) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate fault flush pages\n"));
+		mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		mali_empty_page_directory_virt = NULL;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mmu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali MMU: terminating\n"));
+
+	/* Free global helper pages */
+	mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+	mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+	mali_empty_page_directory_virt = NULL;
+
+	/* Free the page fault flush pages */
+	mali_destroy_fault_flush_pages(&mali_page_fault_flush_page_directory,
+				       &mali_page_fault_flush_page_directory_mapping,
+				       &mali_page_fault_flush_page_table,
+				       &mali_page_fault_flush_page_table_mapping,
+				       &mali_page_fault_flush_data_page,
+				       &mali_page_fault_flush_data_page_mapping);
+}
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual)
+{
+	struct mali_mmu_core *mmu = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+
+	MALI_DEBUG_PRINT(2, ("Mali MMU: Creating Mali MMU: %s\n", resource->description));
+
+	mmu = _mali_osk_calloc(1, sizeof(struct mali_mmu_core));
+	if (NULL != mmu) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&mmu->hw_core, resource, MALI_MMU_REGISTERS_SIZE)) {
+			if (_MALI_OSK_ERR_OK == mali_group_add_mmu_core(group, mmu)) {
+				if (is_virtual) {
+					/* Skip reset and IRQ setup for virtual MMU */
+					return mmu;
+				}
+
+				if (_MALI_OSK_ERR_OK == mali_mmu_reset(mmu)) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					mmu->irq = _mali_osk_irq_init(resource->irq,
+								      mali_group_upper_half_mmu,
+								      group,
+								      mali_mmu_probe_trigger,
+								      mali_mmu_probe_ack,
+								      mmu,
+								      resource->description);
+					if (NULL != mmu->irq) {
+						return mmu;
+					} else {
+						MALI_PRINT_ERROR(("Mali MMU: Failed to setup interrupt handlers for MMU %s\n", mmu->hw_core.description));
+					}
+				}
+				mali_group_remove_mmu_core(group);
+			} else {
+				MALI_PRINT_ERROR(("Mali MMU: Failed to add core %s to group\n", mmu->hw_core.description));
+			}
+			mali_hw_core_delete(&mmu->hw_core);
+		}
+
+		_mali_osk_free(mmu);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for MMU\n"));
+	}
+
+	return NULL;
+}
+
+void mali_mmu_delete(struct mali_mmu_core *mmu)
+{
+	if (NULL != mmu->irq) {
+		_mali_osk_irq_term(mmu->irq);
+	}
+
+	mali_hw_core_delete(&mmu->hw_core);
+	_mali_osk_free(mmu);
+}
+
+static void mali_mmu_enable_paging(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_PAGING);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS) & MALI_MMU_STATUS_BIT_PAGING_ENABLED) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Enable paging request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+	}
+}
+
+/**
+ * Issues the enable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ * @return MALI_TRUE if HW stall was successfully engaged, otherwise MALI_FALSE (req timed out)
+ */
+static mali_bool mali_mmu_enable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(4, ("MMU stall is implicit when Paging is not enabled.\n"));
+		return MALI_TRUE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(3, ("Aborting MMU stall request since it is in pagefault state.\n"));
+		return MALI_FALSE;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if ((mmu_status & MALI_MMU_STATUS_BIT_STALL_ACTIVE) && (0 == (mmu_status & MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE))) {
+			break;
+		}
+		if (0 == (mmu_status & (MALI_MMU_STATUS_BIT_PAGING_ENABLED))) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_DEBUG_PRINT(2, ("Enable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return MALI_FALSE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU stall request since it has a pagefault.\n"));
+		return MALI_FALSE;
+	}
+
+	return MALI_TRUE;
+}
+
+/**
+ * Issues the disable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ */
+static void mali_mmu_disable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(3, ("MMU disable skipped since it was not enabled.\n"));
+		return;
+	}
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU disable stall request since it is in pagefault state.\n"));
+		return;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_DISABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		u32 status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (0 == (status & MALI_MMU_STATUS_BIT_STALL_ACTIVE)) {
+			break;
+		}
+		if (status &  MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) MALI_DEBUG_PRINT(1, ("Disable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(4, ("Mali MMU: %s: Leaving page fault mode\n", mmu->hw_core.description));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_PAGE_FAULT_DONE);
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, 0xCAFEBABE);
+	MALI_DEBUG_ASSERT(0xCAFEB000 == mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_HARD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR) == 0) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Reset request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	if (!stall_success) {
+		err = _MALI_OSK_ERR_BUSY;
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali MMU: mali_kernel_mmu_reset: %s\n", mmu->hw_core.description));
+
+	if (_MALI_OSK_ERR_OK == mali_mmu_raw_reset(mmu)) {
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+		/* no session is active, so just activate the empty page directory */
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, mali_empty_page_directory_phys);
+		mali_mmu_enable_paging(mmu);
+		err = _MALI_OSK_ERR_OK;
+	}
+	mali_mmu_disable_stall(mmu);
+
+	return err;
+}
+
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success = mali_mmu_enable_stall(mmu);
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+	if (MALI_FALSE == stall_success) {
+		/* False means that it is in Pagefault state. Not possible to disable_stall then */
+		return MALI_FALSE;
+	}
+
+	mali_mmu_disable_stall(mmu);
+	return MALI_TRUE;
+}
+
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+}
+
+
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_ZAP_ONE_LINE, MALI_MMU_PDE_ENTRY(mali_address));
+}
+
+static void mali_mmu_activate_address_space(struct mali_mmu_core *mmu, u32 page_directory)
+{
+	/* The MMU must be in stalled or page fault mode, for this writing to work */
+	MALI_DEBUG_ASSERT(0 != (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)
+				& (MALI_MMU_STATUS_BIT_STALL_ACTIVE | MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE)));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, page_directory);
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+}
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir)
+{
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(5, ("Asked to activate page directory 0x%x on MMU %s\n", pagedir, mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+	mali_mmu_activate_address_space(mmu, pagedir->page_directory);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+	MALI_DEBUG_PRINT(3, ("Activating the empty page directory on MMU %s\n", mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+
+	/* This function can only be called when the core is idle, so it could not fail. */
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+
+	mali_mmu_activate_address_space(mmu, mali_empty_page_directory_phys);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(3, ("Activating the page fault flush page directory on MMU %s\n", mmu->hw_core.description));
+	stall_success = mali_mmu_enable_stall(mmu);
+	/* This function is expect to fail the stalling, since it might be in PageFault mode when it is called */
+	mali_mmu_activate_address_space(mmu, mali_page_fault_flush_page_directory);
+	if (MALI_TRUE == stall_success) mali_mmu_disable_stall(mmu);
+}
+
+/* Is called when we want the mmu to give an interrupt */
+static void mali_mmu_probe_trigger(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+}
+
+/* Is called when the irq probe wants the mmu to acknowledge an interrupt from the hw */
+extern int mali_page_fault;
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	u32 int_stat;
+
+	int_stat = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+
+	MALI_DEBUG_PRINT(2, ("mali_mmu_probe_irq_acknowledge: intstat 0x%x\n", int_stat));
+	if (int_stat & MALI_MMU_INTERRUPT_PAGE_FAULT) {
+		MALI_DEBUG_PRINT(2, ("Probe: Page fault detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_PAGE_FAULT);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Page fault detect: FAILED\n"));
+		mali_page_fault++;
+	}
+
+	if (int_stat & MALI_MMU_INTERRUPT_READ_BUS_ERROR) {
+		MALI_DEBUG_PRINT(2, ("Probe: Bus read error detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Bus read error detect: FAILED\n"));
+		mali_page_fault++;
+	}
+
+	if ((int_stat & (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) ==
+	    (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+#if 0
+void mali_mmu_print_state(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(2, ("MMU: State of %s is 0x%08x\n", mmu->hw_core.description, mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+#endif
diff --git a/utgard/r6p1/common/mali_mmu.h b/utgard/r6p1/common/mali_mmu.h
new file mode 100755
index 0000000..398b209
--- /dev/null
+++ b/utgard/r6p1/common/mali_mmu.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_H__
+#define __MALI_MMU_H__
+
+#include "mali_osk.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_hw_core.h"
+
+/* Forward declaration from mali_group.h */
+struct mali_group;
+
+/**
+ * MMU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_mmu_register {
+	MALI_MMU_REGISTER_DTE_ADDR = 0x0000, /**< Current Page Directory Pointer */
+	MALI_MMU_REGISTER_STATUS = 0x0004, /**< Status of the MMU */
+	MALI_MMU_REGISTER_COMMAND = 0x0008, /**< Command register, used to control the MMU */
+	MALI_MMU_REGISTER_PAGE_FAULT_ADDR = 0x000C, /**< Logical address of the last page fault */
+	MALI_MMU_REGISTER_ZAP_ONE_LINE = 0x010, /**< Used to invalidate the mapping of a single page from the MMU */
+	MALI_MMU_REGISTER_INT_RAWSTAT = 0x0014, /**< Raw interrupt status, all interrupts visible */
+	MALI_MMU_REGISTER_INT_CLEAR = 0x0018, /**< Indicate to the MMU that the interrupt has been received */
+	MALI_MMU_REGISTER_INT_MASK = 0x001C, /**< Enable/disable types of interrupts */
+	MALI_MMU_REGISTER_INT_STATUS = 0x0020 /**< Interrupt status based on the mask */
+} mali_mmu_register;
+
+/**
+ * MMU interrupt register bits
+ * Each cause of the interrupt is reported
+ * through the (raw) interrupt status registers.
+ * Multiple interrupts can be pending, so multiple bits
+ * can be set at once.
+ */
+typedef enum mali_mmu_interrupt {
+	MALI_MMU_INTERRUPT_PAGE_FAULT = 0x01, /**< A page fault occured */
+	MALI_MMU_INTERRUPT_READ_BUS_ERROR = 0x02 /**< A bus read error occured */
+} mali_mmu_interrupt;
+
+typedef enum mali_mmu_status_bits {
+	MALI_MMU_STATUS_BIT_PAGING_ENABLED      = 1 << 0,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE   = 1 << 1,
+	MALI_MMU_STATUS_BIT_STALL_ACTIVE        = 1 << 2,
+	MALI_MMU_STATUS_BIT_IDLE                = 1 << 3,
+	MALI_MMU_STATUS_BIT_REPLAY_BUFFER_EMPTY = 1 << 4,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_IS_WRITE = 1 << 5,
+	MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE    = 1 << 31,
+} mali_mmu_status_bits;
+
+/**
+ * Definition of the MMU struct
+ * Used to track a MMU unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_mmu_core {
+	struct mali_hw_core hw_core; /**< Common for all HW cores */
+	_mali_osk_irq_t *irq;        /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_mmu_initialize(void);
+
+void mali_mmu_terminate(void);
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual);
+void mali_mmu_delete(struct mali_mmu_core *mmu);
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu);
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu);
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu);
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address);
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir);
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu);
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu);
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_mmu_get_interrupt_result(struct mali_mmu_core *mmu)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+	if (0 == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+
+MALI_STATIC_INLINE u32 mali_mmu_get_int_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_rawstat(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE void mali_mmu_mask_all_interrupts(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, 0);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_page_fault_addr(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_PAGE_FAULT_ADDR);
+}
+
+#endif /* __MALI_MMU_H__ */
diff --git a/utgard/r6p1/common/mali_mmu_page_directory.c b/utgard/r6p1/common/mali_mmu_page_directory.c
new file mode 100755
index 0000000..74d8cc8
--- /dev/null
+++ b/utgard/r6p1/common/mali_mmu_page_directory.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_memory.h"
+#include "mali_l2_cache.h"
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data);
+
+u32 mali_allocate_empty_page(mali_io_address *virt_addr)
+{
+	_mali_osk_errcode_t err;
+	mali_io_address mapping;
+	mali_dma_addr address;
+
+	if (_MALI_OSK_ERR_OK != mali_mmu_get_table_page(&address, &mapping)) {
+		/* Allocation failed */
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to get table page for empty pgdir\n"));
+		return 0;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	err = fill_page(mapping, 0);
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_mmu_release_table_page(address, mapping);
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to zero page\n"));
+		return 0;
+	}
+
+	*virt_addr = mapping;
+	return address;
+}
+
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr)
+{
+	if (MALI_INVALID_PAGE != address) {
+		mali_mmu_release_table_page(address, virt_addr);
+	}
+}
+
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	_mali_osk_errcode_t err;
+
+	err = mali_mmu_get_table_page(data_page, data_page_mapping);
+	if (_MALI_OSK_ERR_OK == err) {
+		err = mali_mmu_get_table_page(page_table, page_table_mapping);
+		if (_MALI_OSK_ERR_OK == err) {
+			err = mali_mmu_get_table_page(page_directory, page_directory_mapping);
+			if (_MALI_OSK_ERR_OK == err) {
+				fill_page(*data_page_mapping, 0);
+				fill_page(*page_table_mapping, *data_page | MALI_MMU_FLAGS_DEFAULT);
+				fill_page(*page_directory_mapping, *page_table | MALI_MMU_FLAGS_PRESENT);
+				MALI_SUCCESS;
+			}
+			mali_mmu_release_table_page(*page_table, *page_table_mapping);
+			*page_table = MALI_INVALID_PAGE;
+		}
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+	}
+	return err;
+}
+
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	if (MALI_INVALID_PAGE != *page_directory) {
+		mali_mmu_release_table_page(*page_directory, *page_directory_mapping);
+		*page_directory = MALI_INVALID_PAGE;
+		*page_directory_mapping = NULL;
+	}
+
+	if (MALI_INVALID_PAGE != *page_table) {
+		mali_mmu_release_table_page(*page_table, *page_table_mapping);
+		*page_table = MALI_INVALID_PAGE;
+		*page_table_mapping = NULL;
+	}
+
+	if (MALI_INVALID_PAGE != *data_page) {
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+		*data_page_mapping = NULL;
+	}
+}
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data)
+{
+	int i;
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	for (i = 0; i < MALI_MMU_PAGE_SIZE / 4; i++) {
+		_mali_osk_mem_iowrite32_relaxed(mapping, i * sizeof(u32), data);
+	}
+	_mali_osk_mem_barrier();
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	_mali_osk_errcode_t err;
+	mali_io_address pde_mapping;
+	mali_dma_addr pde_phys;
+	int i, page_count;
+	u32 start_address;
+	if (last_pde < first_pde)
+		return _MALI_OSK_ERR_INVALID_ARGS;
+
+	for (i = first_pde; i <= last_pde; i++) {
+		if (0 == (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+						 i * sizeof(u32)) & MALI_MMU_FLAGS_PRESENT)) {
+			/* Page table not present */
+			MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]);
+			MALI_DEBUG_ASSERT(NULL == pagedir->page_entries_mapped[i]);
+
+			err = mali_mmu_get_table_page(&pde_phys, &pde_mapping);
+			if (_MALI_OSK_ERR_OK != err) {
+				MALI_PRINT_ERROR(("Failed to allocate page table page.\n"));
+				return err;
+			}
+			pagedir->page_entries_mapped[i] = pde_mapping;
+
+			/* Update PDE, mark as present */
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32),
+							pde_phys | MALI_MMU_FLAGS_PRESENT);
+
+			MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]);
+		}
+
+		if (first_pde == last_pde) {
+			pagedir->page_entries_usage_count[i] += size / MALI_MMU_PAGE_SIZE;
+		} else if (i == first_pde) {
+			start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE;
+			page_count = (start_address + MALI_MMU_VIRTUAL_PAGE_SIZE - mali_address) / MALI_MMU_PAGE_SIZE;
+			pagedir->page_entries_usage_count[i] += page_count;
+		} else if (i == last_pde) {
+			start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE;
+			page_count = (mali_address + size - start_address) / MALI_MMU_PAGE_SIZE;
+			pagedir->page_entries_usage_count[i] += page_count;
+		} else {
+			pagedir->page_entries_usage_count[i] = 1024;
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE void mali_mmu_zero_pte(mali_io_address page_table, u32 mali_address, u32 size)
+{
+	int i;
+	const int first_pte = MALI_MMU_PTE_ENTRY(mali_address);
+	const int last_pte = MALI_MMU_PTE_ENTRY(mali_address + size - 1);
+
+	for (i = first_pte; i <= last_pte; i++) {
+		_mali_osk_mem_iowrite32_relaxed(page_table, i * sizeof(u32), 0);
+	}
+}
+
+static u32 mali_page_directory_get_phys_address(struct mali_page_directory *pagedir, u32 index)
+{
+	return (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				       index * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK);
+}
+
+
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	u32 left = size;
+	int i;
+	mali_bool pd_changed = MALI_FALSE;
+	u32 pages_to_invalidate[3]; /* hard-coded to 3: max two pages from the PT level plus max one page from PD level */
+	u32 num_pages_inv = 0;
+	mali_bool invalidate_all = MALI_FALSE; /* safety mechanism in case page_entries_usage_count is unreliable */
+
+	/* For all page directory entries in range. */
+	for (i = first_pde; i <= last_pde; i++) {
+		u32 size_in_pde, offset;
+
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[i]);
+		MALI_DEBUG_ASSERT(0 != pagedir->page_entries_usage_count[i]);
+
+		/* Offset into page table, 0 if mali_address is 4MiB aligned */
+		offset = (mali_address & (MALI_MMU_VIRTUAL_PAGE_SIZE - 1));
+		if (left < MALI_MMU_VIRTUAL_PAGE_SIZE - offset) {
+			size_in_pde = left;
+		} else {
+			size_in_pde = MALI_MMU_VIRTUAL_PAGE_SIZE - offset;
+		}
+
+		pagedir->page_entries_usage_count[i] -= size_in_pde / MALI_MMU_PAGE_SIZE;
+
+		/* If entire page table is unused, free it */
+		if (0 == pagedir->page_entries_usage_count[i]) {
+			u32 page_phys;
+			void *page_virt;
+			MALI_DEBUG_PRINT(4, ("Releasing page table as this is the last reference\n"));
+			/* last reference removed, no need to zero out each PTE  */
+
+			page_phys = MALI_MMU_ENTRY_ADDRESS(_mali_osk_mem_ioread32(pagedir->page_directory_mapped, i * sizeof(u32)));
+			page_virt = pagedir->page_entries_mapped[i];
+			pagedir->page_entries_mapped[i] = NULL;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+
+			mali_mmu_release_table_page(page_phys, page_virt);
+			pd_changed = MALI_TRUE;
+		} else {
+			MALI_DEBUG_ASSERT(num_pages_inv < 2);
+			if (num_pages_inv < 2) {
+				pages_to_invalidate[num_pages_inv] = mali_page_directory_get_phys_address(pagedir, i);
+				num_pages_inv++;
+			} else {
+				invalidate_all = MALI_TRUE;
+			}
+
+			/* If part of the page table is still in use, zero the relevant PTEs */
+			mali_mmu_zero_pte(pagedir->page_entries_mapped[i], mali_address, size_in_pde);
+		}
+
+		left -= size_in_pde;
+		mali_address += size_in_pde;
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* L2 pages invalidation */
+	if (MALI_TRUE == pd_changed) {
+		MALI_DEBUG_ASSERT(num_pages_inv < 3);
+		if (num_pages_inv < 3) {
+			pages_to_invalidate[num_pages_inv] = pagedir->page_directory;
+			num_pages_inv++;
+		} else {
+			invalidate_all = MALI_TRUE;
+		}
+	}
+
+	if (invalidate_all) {
+		mali_l2_cache_invalidate_all();
+	} else {
+		mali_l2_cache_invalidate_all_pages(pages_to_invalidate, num_pages_inv);
+	}
+
+	MALI_SUCCESS;
+}
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void)
+{
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	mali_dma_addr phys;
+
+	pagedir = _mali_osk_calloc(1, sizeof(struct mali_page_directory));
+	if (NULL == pagedir) {
+		return NULL;
+	}
+
+	err = mali_mmu_get_table_page(&phys, &pagedir->page_directory_mapped);
+	if (_MALI_OSK_ERR_OK != err) {
+		_mali_osk_free(pagedir);
+		return NULL;
+	}
+
+	pagedir->page_directory = (u32)phys;
+
+	/* Zero page directory */
+	fill_page(pagedir->page_directory_mapped, 0);
+
+	return pagedir;
+}
+
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir)
+{
+	const int num_page_table_entries = sizeof(pagedir->page_entries_mapped) / sizeof(pagedir->page_entries_mapped[0]);
+	int i;
+
+	/* Free referenced page tables and zero PDEs. */
+	for (i = 0; i < num_page_table_entries; i++) {
+		if (pagedir->page_directory_mapped && (_mali_osk_mem_ioread32(
+				pagedir->page_directory_mapped,
+				sizeof(u32)*i) & MALI_MMU_FLAGS_PRESENT)) {
+			mali_dma_addr phys = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+					     i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+			mali_mmu_release_table_page(phys, pagedir->page_entries_mapped[i]);
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* Free the page directory page. */
+	mali_mmu_release_table_page(pagedir->page_directory, pagedir->page_directory_mapped);
+
+	_mali_osk_free(pagedir);
+}
+
+
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits)
+{
+	u32 end_address = mali_address + size;
+	u32 mali_phys = (u32)phys_address;
+
+	/* Map physical pages into MMU page tables */
+	for (; mali_address < end_address; mali_address += MALI_MMU_PAGE_SIZE, mali_phys += MALI_MMU_PAGE_SIZE) {
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)]);
+		_mali_osk_mem_iowrite32_relaxed(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)],
+						MALI_MMU_PTE_ENTRY(mali_address) * sizeof(u32),
+						mali_phys | permission_bits);
+	}
+}
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr)
+{
+#if defined(DEBUG)
+	u32 pde_index, pte_index;
+	u32 pde, pte;
+
+	pde_index = MALI_MMU_PDE_ENTRY(fault_addr);
+	pte_index = MALI_MMU_PTE_ENTRY(fault_addr);
+
+
+	pde = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				     pde_index * sizeof(u32));
+
+
+	if (pde & MALI_MMU_FLAGS_PRESENT) {
+		u32 pte_addr = MALI_MMU_ENTRY_ADDRESS(pde);
+
+		pte = _mali_osk_mem_ioread32(pagedir->page_entries_mapped[pde_index],
+					     pte_index * sizeof(u32));
+
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table present: %08x\n"
+				     "\t\tPTE: %08x, page %08x is %s\n",
+				     fault_addr, pte_addr, pte,
+				     MALI_MMU_ENTRY_ADDRESS(pte),
+				     pte & MALI_MMU_FLAGS_DEFAULT ? "rw" : "not present"));
+	} else {
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table not present: %08x\n",
+				     fault_addr, pde));
+	}
+#else
+	MALI_IGNORE(pagedir);
+	MALI_IGNORE(fault_addr);
+#endif
+}
+
+/* For instrumented */
+struct dump_info {
+	u32 buffer_left;
+	u32 register_writes_size;
+	u32 page_table_dump_size;
+	u32 *buffer;
+};
+
+static _mali_osk_errcode_t writereg(u32 where, u32 what, const char *comment, struct dump_info *info)
+{
+	if (NULL != info) {
+		info->register_writes_size += sizeof(u32) * 2; /* two 32-bit words */
+
+		if (NULL != info->buffer) {
+			/* check that we have enough space */
+			if (info->buffer_left < sizeof(u32) * 2) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = where;
+			info->buffer++;
+
+			*info->buffer = what;
+			info->buffer++;
+
+			info->buffer_left -= sizeof(u32) * 2;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t mali_mmu_dump_page(mali_io_address page, u32 phys_addr, struct dump_info *info)
+{
+	if (NULL != info) {
+		/* 4096 for the page and 4 bytes for the address */
+		const u32 page_size_in_elements = MALI_MMU_PAGE_SIZE / 4;
+		const u32 page_size_in_bytes = MALI_MMU_PAGE_SIZE;
+		const u32 dump_size_in_bytes = MALI_MMU_PAGE_SIZE + 4;
+
+		info->page_table_dump_size += dump_size_in_bytes;
+
+		if (NULL != info->buffer) {
+			if (info->buffer_left < dump_size_in_bytes) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = phys_addr;
+			info->buffer++;
+
+			_mali_osk_memcpy(info->buffer, page, page_size_in_bytes);
+			info->buffer += page_size_in_elements;
+
+			info->buffer_left -= dump_size_in_bytes;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_page_table(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_DEBUG_ASSERT_POINTER(pagedir);
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	if (NULL != pagedir->page_directory_mapped) {
+		int i;
+
+		MALI_CHECK_NO_ERROR(
+			mali_mmu_dump_page(pagedir->page_directory_mapped, pagedir->page_directory, info)
+		);
+
+		for (i = 0; i < 1024; i++) {
+			if (NULL != pagedir->page_entries_mapped[i]) {
+				MALI_CHECK_NO_ERROR(
+					mali_mmu_dump_page(pagedir->page_entries_mapped[i],
+							   _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+									   i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK, info)
+				);
+			}
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_registers(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_CHECK_NO_ERROR(writereg(0x00000000, pagedir->page_directory,
+				     "set the page directory address", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 4, "zap???", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 0, "enable paging", info));
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+	args->size = info.register_writes_size + info.page_table_dump_size;
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+
+	info.buffer_left = args->size;
+	info.buffer = (u32 *)(uintptr_t)args->buffer;
+
+	args->register_writes = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+
+	args->page_table_dump = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+
+	args->register_writes_size = info.register_writes_size;
+	args->page_table_dump_size = info.page_table_dump_size;
+
+	MALI_SUCCESS;
+}
diff --git a/utgard/r6p1/common/mali_mmu_page_directory.h b/utgard/r6p1/common/mali_mmu_page_directory.h
new file mode 100755
index 0000000..4fc1058
--- /dev/null
+++ b/utgard/r6p1/common/mali_mmu_page_directory.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_PAGE_DIRECTORY_H__
+#define __MALI_MMU_PAGE_DIRECTORY_H__
+
+#include "mali_osk.h"
+
+/**
+ * Size of an MMU page in bytes
+ */
+#define MALI_MMU_PAGE_SIZE 0x1000
+
+/*
+ * Size of the address space referenced by a page table page
+ */
+#define MALI_MMU_VIRTUAL_PAGE_SIZE 0x400000 /* 4 MiB */
+
+/**
+ * Page directory index from address
+ * Calculates the page directory index from the given address
+ */
+#define MALI_MMU_PDE_ENTRY(address) (((address)>>22) & 0x03FF)
+
+/**
+ * Page table index from address
+ * Calculates the page table index from the given address
+ */
+#define MALI_MMU_PTE_ENTRY(address) (((address)>>12) & 0x03FF)
+
+/**
+ * Extract the memory address from an PDE/PTE entry
+ */
+#define MALI_MMU_ENTRY_ADDRESS(value) ((value) & 0xFFFFFC00)
+
+#define MALI_INVALID_PAGE ((u32)(~0))
+
+/**
+ *
+ */
+typedef enum mali_mmu_entry_flags {
+	MALI_MMU_FLAGS_PRESENT = 0x01,
+	MALI_MMU_FLAGS_READ_PERMISSION = 0x02,
+	MALI_MMU_FLAGS_WRITE_PERMISSION = 0x04,
+	MALI_MMU_FLAGS_OVERRIDE_CACHE  = 0x8,
+	MALI_MMU_FLAGS_WRITE_CACHEABLE  = 0x10,
+	MALI_MMU_FLAGS_WRITE_ALLOCATE  = 0x20,
+	MALI_MMU_FLAGS_WRITE_BUFFERABLE  = 0x40,
+	MALI_MMU_FLAGS_READ_CACHEABLE  = 0x80,
+	MALI_MMU_FLAGS_READ_ALLOCATE  = 0x100,
+	MALI_MMU_FLAGS_MASK = 0x1FF,
+} mali_mmu_entry_flags;
+
+
+#define MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE ( \
+		MALI_MMU_FLAGS_PRESENT | \
+		MALI_MMU_FLAGS_READ_PERMISSION |  \
+		MALI_MMU_FLAGS_WRITE_PERMISSION | \
+		MALI_MMU_FLAGS_OVERRIDE_CACHE | \
+		MALI_MMU_FLAGS_WRITE_CACHEABLE | \
+		MALI_MMU_FLAGS_WRITE_BUFFERABLE | \
+		MALI_MMU_FLAGS_READ_CACHEABLE | \
+		MALI_MMU_FLAGS_READ_ALLOCATE )
+
+#define MALI_MMU_FLAGS_DEFAULT ( \
+				 MALI_MMU_FLAGS_PRESENT | \
+				 MALI_MMU_FLAGS_READ_PERMISSION |  \
+				 MALI_MMU_FLAGS_WRITE_PERMISSION )
+
+
+struct mali_page_directory {
+	u32 page_directory; /**< Physical address of the memory session's page directory */
+	mali_io_address page_directory_mapped; /**< Pointer to the mapped version of the page directory into the kernel's address space */
+
+	mali_io_address page_entries_mapped[1024]; /**< Pointers to the page tables which exists in the page directory mapped into the kernel's address space */
+	u32   page_entries_usage_count[1024]; /**< Tracks usage count of the page table pages, so they can be releases on the last reference */
+};
+
+/* Map Mali virtual address space (i.e. ensure page tables exist for the virtual range)  */
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+
+/* Back virtual address space with actual pages. Assumes input is contiguous and 4k aligned. */
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits);
+
+u32 mali_allocate_empty_page(mali_io_address *virtual);
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr);
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void);
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir);
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr);
+
+#endif /* __MALI_MMU_PAGE_DIRECTORY_H__ */
diff --git a/utgard/r6p1/common/mali_osk.h b/utgard/r6p1/common/mali_osk.h
new file mode 100755
index 0000000..5c20fc3
--- /dev/null
+++ b/utgard/r6p1/common/mali_osk.h
@@ -0,0 +1,1389 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk.h
+ * Defines the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_H__
+#define __MALI_OSK_H__
+
+#include <linux/seq_file.h>
+#include "mali_osk_types.h"
+#include "mali_osk_specific.h"           /* include any per-os specifics */
+#include "mali_osk_locks.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @addtogroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+#ifdef DEBUG
+/** @brief Macro for asserting that the current thread holds a given lock
+ */
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) MALI_DEBUG_ASSERT(_mali_osk_lock_get_owner((_mali_osk_lock_debug_t *)l) == _mali_osk_get_tid());
+
+/** @brief returns a lock's owner (thread id) if debugging is enabled
+ */
+#else
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) do {} while(0)
+#endif
+
+#define _mali_osk_ctxprintf     seq_printf
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Find the containing structure of another structure
+ *
+ * This is the reverse of the operation 'offsetof'. This means that the
+ * following condition is satisfied:
+ *
+ *   ptr == _MALI_OSK_CONTAINER_OF( &ptr->member, type, member )
+ *
+ * When ptr is of type 'type'.
+ *
+ * Its purpose it to recover a larger structure that has wrapped a smaller one.
+ *
+ * @note no type or memory checking occurs to ensure that a wrapper structure
+ * does in fact exist, and that it is being recovered with respect to the
+ * correct member.
+ *
+ * @param ptr the pointer to the member that is contained within the larger
+ * structure
+ * @param type the type of the structure that contains the member
+ * @param member the name of the member in the structure that ptr points to.
+ * @return a pointer to a \a type object which contains \a member, as pointed
+ * to by \a ptr.
+ */
+#define _MALI_OSK_CONTAINER_OF(ptr, type, member) \
+	((type *)( ((char *)ptr) - offsetof(type,member) ))
+
+/** @addtogroup _mali_osk_wq
+ * @{ */
+
+/** @brief Initialize work queues (for deferred work)
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_wq_init(void);
+
+/** @brief Terminate work queues (for deferred work)
+ */
+void _mali_osk_wq_term(void);
+
+/** @brief Create work in the work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, \a handler will be called with \a data as the argument.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delete_work()
+ * when no longer needed.
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief A high priority version of \a _mali_osk_wq_create_work()
+ *
+ * Creates a work object which can be scheduled in the high priority work queue.
+ *
+ * This is unfortunately needed to get low latency scheduling of the Mali cores.  Normally we would
+ * schedule the next job in hw_irq or tasklet, but often we can't since we need to synchronously map
+ * and unmap shared memory when a job is connected to external fences (timelines). And this requires
+ * taking a mutex.
+ *
+ * We do signal a lot of other (low priority) work also as part of the job being finished, and if we
+ * don't set this Mali scheduling thread as high priority, we see that the CPU scheduler often runs
+ * random things instead of starting the next GPU job when the GPU is idle.  So setting the gpu
+ * scheduler to high priority does give a visually more responsive system.
+ *
+ * Start the high priority work with: \a _mali_osk_wq_schedule_work_high_pri()
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will flush the work queue to ensure that the work handler will not
+ * be called after deletion.
+ */
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the work handler
+ *
+ * _mali_osk_wq_schedule_work provides a mechanism for enqueuing deferred calls
+ * to the work handler. After calling \ref _mali_osk_wq_schedule_work(), the
+ * work handler will be scheduled to run at some point in the future.
+ *
+ * Typically this is called by the IRQ upper-half to defer further processing of
+ * IRQ-related work to the IRQ bottom-half handler. This is necessary for work
+ * that cannot be done in an IRQ context by the IRQ upper-half handler. Timer
+ * callbacks also use this mechanism, because they are treated as though they
+ * operate in an IRQ context. Refer to \ref _mali_osk_timer_t for more
+ * information.
+ *
+ * Code that operates in a kernel-process context (with no IRQ context
+ * restrictions) may also enqueue deferred calls to the IRQ bottom-half. The
+ * advantage over direct calling is that deferred calling allows the caller and
+ * IRQ bottom half to hold the same mutex, with a guarantee that they will not
+ * deadlock just by using this mechanism.
+ *
+ * _mali_osk_wq_schedule_work() places deferred call requests on a queue, to
+ * allow for more than one thread to make a deferred call. Therfore, if it is
+ * called 'K' times, then the IRQ bottom-half will be scheduled 'K' times too.
+ * 'K' is a number that is implementation-specific.
+ *
+ * _mali_osk_wq_schedule_work() is guaranteed to not block on:
+ * - enqueuing a deferred call request.
+ * - the completion of the work handler.
+ *
+ * This is to prevent deadlock. For example, if _mali_osk_wq_schedule_work()
+ * blocked, then it would cause a deadlock when the following two conditions
+ * hold:
+ * - The work handler callback (of type _mali_osk_wq_work_handler_t) locks
+ * a mutex
+ * - And, at the same time, the caller of _mali_osk_wq_schedule_work() also
+ * holds the same mutex
+ *
+ * @note care must be taken to not overflow the queue that
+ * _mali_osk_wq_schedule_work() operates on. Code must be structured to
+ * ensure that the number of requests made to the queue is bounded. Otherwise,
+ * work will be lost.
+ *
+ * The queue that _mali_osk_wq_schedule_work implements is a FIFO of N-writer,
+ * 1-reader type. The writers are the callers of _mali_osk_wq_schedule_work
+ * (all OSK-registered IRQ upper-half handlers in the system, watchdog timers,
+ * callers from a Kernel-process context). The reader is a single thread that
+ * handles all OSK-registered work.
+ *
+ * @param work a pointer to the _mali_osk_wq_work_t object corresponding to the
+ * work to begin processing.
+ */
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the high priority work handler
+ *
+ * Function is the same as \a _mali_osk_wq_schedule_work() with the only
+ * difference that it runs in a high (real time) priority on the system.
+ *
+ * Should only be used as a substitue for doing the same work in interrupts.
+ *
+ * This is allowed to sleep, but the work should be small since it will block
+ * all other applications.
+*/
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work);
+
+/** @brief Flush the work queue
+ *
+ * This will flush the OSK work queue, ensuring all work in the queue has
+ * completed before returning.
+ *
+ * Since this blocks on the completion of work in the work-queue, the
+ * caller of this function \b must \b not hold any mutexes that are taken by
+ * any registered work handler. To do so may cause a deadlock.
+ *
+ */
+void _mali_osk_wq_flush(void);
+
+/** @brief Create work in the delayed work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, a timer will be start and the \a handler will be called with
+ * \a data as the argument when timer out
+ *
+ * Refer to \ref _mali_osk_wq_delayed_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delayed_delete_work_nonflush()
+ * when no longer needed.
+ */
+_mali_osk_wq_delayed_work_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work without waiting for it to finish
+ *
+ * Note that the \a work callback function may still be running on return from
+ * _mali_osk_wq_delayed_cancel_work_async().
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work and wait for it to finish
+ *
+ * When this function returns, the \a work was either cancelled or it finished running.
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Put \a work task in global workqueue after delay
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue.
+ *
+ * If \a work was already on a queue, this function will return without doing anything
+ *
+ * @param work job to be done
+ * @param delay number of jiffies to wait or 0 for immediate execution
+ */
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay);
+
+/** @} */ /* end group _mali_osk_wq */
+
+
+/** @addtogroup _mali_osk_irq
+ * @{ */
+
+/** @brief Initialize IRQ handling for a resource
+ *
+ * Registers an interrupt handler \a uhandler for the given IRQ number \a irqnum.
+ * \a data will be passed as argument to the handler when an interrupt occurs.
+ *
+ * If \a irqnum is -1, _mali_osk_irq_init will probe for the IRQ number using
+ * the supplied \a trigger_func and \a ack_func. These functions will also
+ * receive \a data as their argument.
+ *
+ * @param irqnum The IRQ number that the resource uses, as seen by the CPU.
+ * The value -1 has a special meaning which indicates the use of probing, and
+ * trigger_func and ack_func must be non-NULL.
+ * @param uhandler The interrupt handler, corresponding to a ISR handler for
+ * the resource
+ * @param int_data resource specific data, which will be passed to uhandler
+ * @param trigger_func Optional: a function to trigger the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param ack_func Optional: a function to acknowledge the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param probe_data resource-specific data, which will be passed to
+ * (if present) trigger_func and ack_func
+ * @param description textual description of the IRQ resource.
+ * @return on success, a pointer to a _mali_osk_irq_t object, which represents
+ * the IRQ handling on this resource. NULL on failure.
+ */
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description);
+
+/** @brief Terminate IRQ handling on a resource.
+ *
+ * This will disable the interrupt from the device, and then waits for any
+ * currently executing IRQ handlers to complete.
+ *
+ * @note If work is deferred to an IRQ bottom-half handler through
+ * \ref _mali_osk_wq_schedule_work(), be sure to flush any remaining work
+ * with \ref _mali_osk_wq_flush() or (implicitly) with \ref _mali_osk_wq_delete_work()
+ *
+ * @param irq a pointer to the _mali_osk_irq_t object corresponding to the
+ * resource whose IRQ handling is to be terminated.
+ */
+void _mali_osk_irq_term(_mali_osk_irq_t *irq);
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @addtogroup _mali_osk_atomic
+ * @{ */
+
+/** @brief Decrement an atomic counter
+ *
+ * @note It is an error to decrement the counter beyond -(1<<23)
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom);
+
+/** @brief Decrement an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter
+ * @return The new value, after decrement */
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter
+ *
+ * @note It is an error to increment the counter beyond (1<<23)-1
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter */
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom);
+
+/** @brief Initialize an atomic counter
+ *
+ * @note the parameter required is a u32, and so signed integers should be
+ * cast to u32.
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the value to initialize the atomic counter.
+ */
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val);
+
+/** @brief Read a value from an atomic counter
+ *
+ * This can only be safely used to determine the value of the counter when it
+ * is guaranteed that other threads will not be modifying the counter. This
+ * makes its usefulness limited.
+ *
+ * @param atom pointer to an atomic counter
+ */
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom);
+
+/** @brief Terminate an atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ */
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom);
+
+/** @brief Assign a new val to atomic counter, and return the old atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the new value assign to the atomic counter
+ * @return the old value of the atomic counter
+ */
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val);
+/** @} */  /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_memory OSK Memory Allocation
+ * @{ */
+
+/** @brief Allocate zero-initialized memory.
+ *
+ * Returns a buffer capable of containing at least \a n elements of \a size
+ * bytes each. The buffer is initialized to zero.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * @param n Number of elements to allocate
+ * @param size Size of each element
+ * @return On success, the zero-initialized buffer allocated. NULL on failure
+ */
+void *_mali_osk_calloc(u32 n, u32 size);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_malloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_malloc() and _mali_osk_calloc()
+ * must be freed before the application exits. Otherwise,
+ * a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_free(void *ptr);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * This function is potentially slower than _mali_osk_malloc() and _mali_osk_calloc(),
+ * but do support bigger sizes.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_valloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_valloc() must be freed before the
+ * application exits. Otherwise a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_vfree(void *ptr);
+
+/** @brief Copies memory.
+ *
+ * Copies the \a len bytes from the buffer pointed by the parameter \a src
+ * directly to the buffer pointed by \a dst.
+ *
+ * It is an error for \a src to overlap \a dst anywhere in \a len bytes.
+ *
+ * @param dst Pointer to the destination array where the content is to be
+ * copied.
+ * @param src Pointer to the source of data to be copied.
+ * @param len Number of bytes to copy.
+ * @return \a dst is always passed through unmodified.
+ */
+void *_mali_osk_memcpy(void *dst, const void *src, u32 len);
+
+/** @brief Fills memory.
+ *
+ * Sets the first \a n bytes of the block of memory pointed to by \a s to
+ * the specified value
+ * @param s Pointer to the block of memory to fill.
+ * @param c Value to be set, passed as u32. Only the 8 Least Significant Bits (LSB)
+ * are used.
+ * @param n Number of bytes to be set to the value.
+ * @return \a s is always passed through unmodified
+ */
+void *_mali_osk_memset(void *s, u32 c, u32 n);
+/** @} */ /* end group _mali_osk_memory */
+
+
+/** @brief Checks the amount of memory allocated
+ *
+ * Checks that not more than \a max_allocated bytes are allocated.
+ *
+ * Some OS bring up an interactive out of memory dialogue when the
+ * system runs out of memory. This can stall non-interactive
+ * apps (e.g. automated test runs). This function can be used to
+ * not trigger the OOM dialogue by keeping allocations
+ * within a certain limit.
+ *
+ * @return MALI_TRUE when \a max_allocated bytes are not in use yet. MALI_FALSE
+ * when at least \a max_allocated bytes are in use.
+ */
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated);
+
+
+/** @addtogroup _mali_osk_low_level_memory
+ * @{ */
+
+/** @brief Issue a memory barrier
+ *
+ * This defines an arbitrary memory barrier operation, which forces an ordering constraint
+ * on memory read and write operations.
+ */
+void _mali_osk_mem_barrier(void);
+
+/** @brief Issue a write memory barrier
+ *
+ * This defines an write memory barrier operation which forces an ordering constraint
+ * on memory write operations.
+ */
+void _mali_osk_write_mem_barrier(void);
+
+/** @brief Map a physically contiguous region into kernel space
+ *
+ * This is primarily used for mapping in registers from resources, and Mali-MMU
+ * page tables. The mapping is only visable from kernel-space.
+ *
+ * Access has to go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @param phys CPU-physical base address of the memory to map in. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * map in
+ * @param description A textual description of the memory being mapped in.
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure.
+ */
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Unmap a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_mapioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt an unmap twice
+ * - unmap only part of a range obtained through _mali_osk_mem_mapioregion
+ * - unmap more than the range obtained through  _mali_osk_mem_mapioregion
+ * - unmap an address range that was not successfully mapped using
+ * _mali_osk_mem_mapioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in. This must be aligned to the system's page size, which is assumed
+ * to be 4K
+ * @param size The number of bytes that were originally mapped in.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address mapping);
+
+/** @brief Allocate and Map a physically contiguous region into kernel space
+ *
+ * This is used for allocating physically contiguous regions (such as Mali-MMU
+ * page tables) and mapping them into kernel space. The mapping is only
+ * visible from kernel-space.
+ *
+ * The alignment of the returned memory is guaranteed to be at least
+ * _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * Access must go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @note This function is primarily to provide support for OSs that are
+ * incapable of separating the tasks 'allocate physically contiguous memory'
+ * and 'map it into kernel space'
+ *
+ * @param[out] phys CPU-physical base address of memory that was allocated.
+ * (*phys) will be guaranteed to be aligned to at least
+ * _MALI_OSK_CPU_PAGE_SIZE on success.
+ *
+ * @param[in] size the number of bytes of physically contiguous memory to
+ * allocate. This must be a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure, and (*phys) is unmodified.
+ */
+mali_io_address _mali_osk_mem_allocioregion(u32 *phys, u32 size);
+
+/** @brief Free a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_allocioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt a free twice on the same ioregion
+ * - free only part of a range obtained through _mali_osk_mem_allocioregion
+ * - free more than the range obtained through  _mali_osk_mem_allocioregion
+ * - free an address range that was not successfully mapped using
+ * _mali_osk_mem_allocioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in, which was aligned to _MALI_OSK_CPU_PAGE_SIZE.
+ * @param size The number of bytes that were originally mapped in, which was
+ * a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_freeioregion(u32 phys, u32 size, mali_io_address mapping);
+
+/** @brief Request a region of physically contiguous memory
+ *
+ * This is used to ensure exclusive access to a region of physically contigous
+ * memory.
+ *
+ * It is acceptable to implement this as a stub. However, it is then the job
+ * of the System Integrator to ensure that no other device driver will be using
+ * the physical address ranges used by Mali, while the Mali device driver is
+ * loaded.
+ *
+ * @param phys CPU-physical base address of the memory to request. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * request.
+ * @param description A textual description of the memory being requested.
+ * @return _MALI_OSK_ERR_OK on success. Otherwise, a suitable
+ * _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Un-request a region of physically contiguous memory
+ *
+ * This is used to release a regious of physically contiguous memory previously
+ * requested through _mali_osk_mem_reqregion, so that other device drivers may
+ * use it. This will be called at time of Mali device driver termination.
+ *
+ * It is a programming error to attempt to:
+ * - unrequest a region twice
+ * - unrequest only part of a range obtained through _mali_osk_mem_reqregion
+ * - unrequest more than the range obtained through  _mali_osk_mem_reqregion
+ * - unrequest an address range that was not successfully requested using
+ * _mali_osk_mem_reqregion
+ *
+ * @param phys CPU-physical base address of the memory to un-request. This must
+ * be aligned to the system's page size, which is assumed to be 4K
+ * @param size the number of bytes of physically contiguous address space to
+ * un-request.
+ */
+void _mali_osk_mem_unreqregion(uintptr_t phys, u32 size);
+
+/** @brief Read from a location currently mapped in through
+ * _mali_osk_mem_mapioregion
+ *
+ * This reads a 32-bit word from a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to read from memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to read from
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @return the 32-bit word from the specified location.
+ */
+u32 _mali_osk_mem_ioread32(volatile mali_io_address mapping, u32 offset);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion without memory barriers
+ *
+ * This write a 32-bit word to a 32-bit aligned location without using memory barrier.
+ * It is a programming error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion with write memory barrier
+ *
+ * This write a 32-bit word to a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32(volatile mali_io_address mapping, u32 offset, u32 val);
+
+/** @brief Flush all CPU caches
+ *
+ * This should only be implemented if flushing of the cache is required for
+ * memory mapped in through _mali_osk_mem_mapregion.
+ */
+void _mali_osk_cache_flushall(void);
+
+/** @brief Flush any caches necessary for the CPU and MALI to have the same view of a range of uncached mapped memory
+ *
+ * This should only be implemented if your OS doesn't do a full cache flush (inner & outer)
+ * after allocating uncached mapped memory.
+ *
+ * Some OS do not perform a full cache flush (including all outer caches) for uncached mapped memory.
+ * They zero the memory through a cached mapping, then flush the inner caches but not the outer caches.
+ * This is required for MALI to have the correct view of the memory.
+ */
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size);
+
+/** @brief Safely copy as much data as possible from src to dest
+ *
+ * Do not crash if src or dest isn't available.
+ *
+ * @param dest Destination buffer (limited to user space mapped Mali memory)
+ * @param src Source buffer
+ * @param size Number of bytes to copy
+ * @return Number of bytes actually copied
+ */
+u32 _mali_osk_mem_write_safe(void *dest, const void *src, u32 size);
+
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+
+/** @addtogroup _mali_osk_notification
+ *
+ * User space notification framework
+ *
+ * Communication with user space of asynchronous events is performed through a
+ * synchronous call to the \ref u_k_api.
+ *
+ * Since the events are asynchronous, the events have to be queued until a
+ * synchronous U/K API call can be made by user-space. A U/K API call might also
+ * be received before any event has happened. Therefore the notifications the
+ * different subsystems wants to send to user space has to be queued for later
+ * reception, or a U/K API call has to be blocked until an event has occured.
+ *
+ * Typical uses of notifications are after running of jobs on the hardware or
+ * when changes to the system is detected that needs to be relayed to user
+ * space.
+ *
+ * After an event has occured user space has to be notified using some kind of
+ * message. The notification framework supports sending messages to waiting
+ * threads or queueing of messages until a U/K API call is made.
+ *
+ * The notification queue is a FIFO. There are no restrictions on the numbers
+ * of readers or writers in the queue.
+ *
+ * A message contains what user space needs to identifiy how to handle an
+ * event. This includes a type field and a possible type specific payload.
+ *
+ * A notification to user space is represented by a
+ * \ref _mali_osk_notification_t object. A sender gets hold of such an object
+ * using _mali_osk_notification_create(). The buffer given by the
+ * _mali_osk_notification_t::result_buffer field in the object is used to store
+ * any type specific data. The other fields are internal to the queue system
+ * and should not be touched.
+ *
+ * @{ */
+
+/** @brief Create a notification object
+ *
+ * Returns a notification object which can be added to the queue of
+ * notifications pending for user space transfer.
+ *
+ * The implementation will initialize all members of the
+ * \ref _mali_osk_notification_t object. In particular, the
+ * _mali_osk_notification_t::result_buffer member will be initialized to point
+ * to \a size bytes of storage, and that storage will be suitably aligned for
+ * storage of any structure. That is, the created buffer meets the same
+ * requirements as _mali_osk_malloc().
+ *
+ * The notification object must be deleted when not in use. Use
+ * _mali_osk_notification_delete() for deleting it.
+ *
+ * @note You \b must \b not call _mali_osk_free() on a \ref _mali_osk_notification_t,
+ * object, or on a _mali_osk_notification_t::result_buffer. You must only use
+ * _mali_osk_notification_delete() to free the resources assocaited with a
+ * \ref _mali_osk_notification_t object.
+ *
+ * @param type The notification type
+ * @param size The size of the type specific buffer to send
+ * @return Pointer to a notification object with a suitable buffer, or NULL on error.
+ */
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size);
+
+/** @brief Delete a notification object
+ *
+ * This must be called to reclaim the resources of a notification object. This
+ * includes:
+ * - The _mali_osk_notification_t::result_buffer
+ * - The \ref _mali_osk_notification_t itself.
+ *
+ * A notification object \b must \b not be used after it has been deleted by
+ * _mali_osk_notification_delete().
+ *
+ * In addition, the notification object may not be deleted while it is in a
+ * queue. That is, if it has been placed on a queue with
+ * _mali_osk_notification_queue_send(), then it must not be deleted until
+ * it has been received by a call to _mali_osk_notification_queue_receive().
+ * Otherwise, the queue may be corrupted.
+ *
+ * @param object the notification object to delete.
+ */
+void _mali_osk_notification_delete(_mali_osk_notification_t *object);
+
+/** @brief Create a notification queue
+ *
+ * Creates a notification queue which can be used to queue messages for user
+ * delivery and get queued messages from
+ *
+ * The queue is a FIFO, and has no restrictions on the numbers of readers or
+ * writers.
+ *
+ * When the queue is no longer in use, it must be terminated with
+ * \ref _mali_osk_notification_queue_term(). Failure to do so will result in a
+ * memory leak.
+ *
+ * @return Pointer to a new notification queue or NULL on error.
+ */
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void);
+
+/** @brief Destroy a notification queue
+ *
+ * Destroys a notification queue and frees associated resources from the queue.
+ *
+ * A notification queue \b must \b not be destroyed in the following cases:
+ * - while there are \ref _mali_osk_notification_t objects in the queue.
+ * - while there are writers currently acting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_send() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_send() on the queue in the future.
+ * - while there are readers currently waiting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_receive() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_receive() on the queue in the future.
+ *
+ * Therefore, all \ref _mali_osk_notification_t objects must be flushed and
+ * deleted by the code that makes use of the notification queues, since only
+ * they know the structure of the _mali_osk_notification_t::result_buffer
+ * (even if it may only be a flat sturcture).
+ *
+ * @note Since the queue is a FIFO, the code using notification queues may
+ * create its own 'flush' type of notification, to assist in flushing the
+ * queue.
+ *
+ * Once the queue has been destroyed, it must not be used again.
+ *
+ * @param queue The queue to destroy
+ */
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue);
+
+/** @brief Schedule notification for delivery
+ *
+ * When a \ref _mali_osk_notification_t object has been created successfully
+ * and set up, it may be added to the queue of objects waiting for user space
+ * transfer.
+ *
+ * The sending will not block if the queue is full.
+ *
+ * A \ref _mali_osk_notification_t object \b must \b not be put on two different
+ * queues at the same time, or enqueued twice onto a single queue before
+ * reception. However, it is acceptable for it to be requeued \em after reception
+ * from a call to _mali_osk_notification_queue_receive(), even onto the same queue.
+ *
+ * Again, requeuing must also not enqueue onto two different queues at the same
+ * time, or enqueue onto the same queue twice before reception.
+ *
+ * @param queue The notification queue to add this notification to
+ * @param object The entry to add
+ */
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object);
+
+/** @brief Receive a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the thread will sleep until one becomes ready.
+ * Therefore, notifications may not be received into an
+ * IRQ or 'atomic' context (that is, a context where sleeping is disallowed).
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success. _MALI_OSK_ERR_RESTARTSYSCALL if the sleep was interrupted.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @brief Dequeues a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the function call will return an error code.
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if queue was empty.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @addtogroup _mali_osk_timer
+ *
+ * Timers use the OS's representation of time, which are 'ticks'. This is to
+ * prevent aliasing problems between the internal timer time, and the time
+ * asked for.
+ *
+ * @{ */
+
+/** @brief Initialize a timer
+ *
+ * Allocates resources for a new timer, and initializes them. This does not
+ * start the timer.
+ *
+ * @return a pointer to the allocated timer object, or NULL on failure.
+ */
+_mali_osk_timer_t *_mali_osk_timer_init(void);
+
+/** @brief Start a timer
+ *
+ * It is an error to start a timer without setting the callback via
+ * _mali_osk_timer_setcallback().
+ *
+ * It is an error to use this to start an already started timer.
+ *
+ * The timer will expire in \a ticks_to_expire ticks, at which point, the
+ * callback function will be invoked with the callback-specific data,
+ * as registered by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to start
+ * @param ticks_to_expire the amount of time in ticks for the timer to run
+ * before triggering.
+ */
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Modify a timer
+ *
+ * Set the relative time at which a timer will expire, and start it if it is
+ * stopped. If \a ticks_to_expire 0 the timer fires immediately.
+ *
+ * It is an error to modify a timer without setting the callback via
+ *  _mali_osk_timer_setcallback().
+ *
+ * The timer will expire at \a ticks_to_expire from the time of the call, at
+ * which point, the callback function will be invoked with the
+ * callback-specific data, as set by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to modify, and start if necessary
+ * @param ticks_to_expire the \em absolute time in ticks at which this timer
+ * should trigger.
+ *
+ */
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Stop a timer, and block on its completion.
+ *
+ * Stop the timer. When the function returns, it is guaranteed that the timer's
+ * callback will not be running on any CPU core.
+ *
+ * Since stoping the timer blocks on compeletion of the callback, the callback
+ * may not obtain any mutexes that the caller holds. Otherwise, a deadlock will
+ * occur.
+ *
+ * @note While the callback itself is guaranteed to not be running, work
+ * enqueued on the work-queue by the timer (with
+ * \ref _mali_osk_wq_schedule_work()) may still run. The timer callback and
+ * work handler must take this into account.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ *
+ */
+void _mali_osk_timer_del(_mali_osk_timer_t *tim);
+
+/** @brief Stop a timer.
+ *
+ * Stop the timer. When the function returns, the timer's callback may still be
+ * running on any CPU core.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ */
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim);
+
+/** @brief Check if timer is pending.
+ *
+ * Check if timer is active.
+ *
+ * @param tim the timer to check
+ * @return MALI_TRUE if time is active, MALI_FALSE if it is not active
+ */
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim);
+
+/** @brief Set a timer's callback parameters.
+ *
+ * This must be called at least once before a timer is started/modified.
+ *
+ * After a timer has been stopped or expires, the callback remains set. This
+ * means that restarting the timer will call the same function with the same
+ * parameters on expiry.
+ *
+ * @param tim the timer to set callback on.
+ * @param callback Function to call when timer expires
+ * @param data Function-specific data to supply to the function on expiry.
+ */
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data);
+
+/** @brief Terminate a timer, and deallocate resources.
+ *
+ * The timer must first be stopped by calling _mali_osk_timer_del().
+ *
+ * It is a programming error for _mali_osk_timer_term() to be called on:
+ * - timer that is currently running
+ * - a timer that is currently executing its callback.
+ *
+ * @param tim the timer to deallocate.
+ */
+void _mali_osk_timer_term(_mali_osk_timer_t *tim);
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @defgroup _mali_osk_time OSK Time functions
+ *
+ * \ref _mali_osk_time use the OS's representation of time, which are
+ * 'ticks'. This is to prevent aliasing problems between the internal timer
+ * time, and the time asked for.
+ *
+ * OS tick time is measured as a u32. The time stored in a u32 may either be
+ * an absolute time, or a time delta between two events. Whilst it is valid to
+ * use math opeartors to \em change the tick value represented as a u32, it
+ * is often only meaningful to do such operations on time deltas, rather than
+ * on absolute time. However, it is meaningful to add/subtract time deltas to
+ * absolute times.
+ *
+ * Conversion between tick time and milliseconds (ms) may not be loss-less,
+ * and are \em implementation \em depenedant.
+ *
+ * Code use OS time must take this into account, since:
+ * - a small OS time may (or may not) be rounded
+ * - a large time may (or may not) overflow
+ *
+ * @{ */
+
+/** @brief Return whether ticka occurs after or at the same time as  tickb
+ *
+ * Systems where ticks can wrap must handle that.
+ *
+ * @param ticka ticka
+ * @param tickb tickb
+ * @return MALI_TRUE if ticka represents a time that occurs at or after tickb.
+ */
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb);
+
+/** @brief Convert milliseconds to OS 'ticks'
+ *
+ * @param ms time interval in milliseconds
+ * @return the corresponding time interval in OS ticks.
+ */
+unsigned long _mali_osk_time_mstoticks(u32 ms);
+
+/** @brief Convert OS 'ticks' to milliseconds
+ *
+ * @param ticks time interval in OS ticks.
+ * @return the corresponding time interval in milliseconds
+ */
+u32 _mali_osk_time_tickstoms(unsigned long ticks);
+
+
+/** @brief Get the current time in OS 'ticks'.
+ * @return the current time in OS 'ticks'.
+ */
+unsigned long _mali_osk_time_tickcount(void);
+
+/** @brief Cause a microsecond delay
+ *
+ * The delay will have microsecond resolution, and is necessary for correct
+ * operation of the driver. At worst, the delay will be \b at least \a usecs
+ * microseconds, and so may be (significantly) more.
+ *
+ * This function may be implemented as a busy-wait, which is the most sensible
+ * implementation. On OSs where there are situations in which a thread must not
+ * sleep, this is definitely implemented as a busy-wait.
+ *
+ * @param usecs the number of microseconds to wait for.
+ */
+void _mali_osk_time_ubusydelay(u32 usecs);
+
+/** @brief Return time in nano seconds, since any given reference.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_time_get_ns(void);
+
+/** @brief Return time in nano seconds, since boot time.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_boot_time_get_ns(void);
+
+/** @} */ /* end group _mali_osk_time */
+
+/** @defgroup _mali_osk_math OSK Math
+ * @{ */
+
+/** @brief Count Leading Zeros (Little-endian)
+ *
+ * @note This function must be implemented to support the reference
+ * implementation of _mali_osk_find_first_zero_bit, as defined in
+ * mali_osk_bitops.h.
+ *
+ * @param val 32-bit words to count leading zeros on
+ * @return the number of leading zeros.
+ */
+u32 _mali_osk_clz(u32 val);
+
+/** @brief find last (most-significant) bit set
+ *
+ * @param val 32-bit words to count last bit set on
+ * @return last bit set.
+ */
+u32 _mali_osk_fls(u32 val);
+
+/** @} */ /* end group _mali_osk_math */
+
+/** @addtogroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+
+/** @brief Initialize an empty Wait Queue */
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.
+ */
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ * @param timeout timeout in ms
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.  Will return if time
+ * exceeds timeout.
+ */
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout);
+
+/** @brief Wake up all threads in wait queue if their respective conditions are
+ * true
+ *
+ * @param queue the queue whose threads should be woken up
+ *
+ * Wake up all threads in wait queue \a queue whose condition is now true.
+ */
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue);
+
+/** @brief terminate a wait queue
+ *
+ * @param queue the queue to terminate.
+ */
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue);
+/** @} */ /* end group _mali_osk_wait_queue */
+
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Output a device driver debug message.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ */
+void _mali_osk_dbgmsg(const char *fmt, ...);
+
+/** @brief Print fmt into buf.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param buf a pointer to the result buffer
+ * @param size the total number of bytes allowed to write to \a buf
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ * @return The number of bytes written to \a buf
+ */
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...);
+
+/** @brief Abnormal process abort.
+ *
+ * Terminates the caller-process if this function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h.
+ *
+ * This function will never return - because to continue from a Debug assert
+ * could cause even more problems, and hinder debugging of the initial problem.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_abort(void);
+
+/** @brief Sets breakpoint at point where function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h,
+ * to assist in debugging. If debugging at this level is not required, then this
+ * function may be implemented as a stub.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_break(void);
+
+/** @brief Return an identificator for calling process.
+ *
+ * @return Identificator for calling process.
+ */
+u32 _mali_osk_get_pid(void);
+
+/** @brief Return an name for calling process.
+ *
+ * @return name for calling process.
+ */
+char *_mali_osk_get_comm(void);
+
+/** @brief Return an identificator for calling thread.
+ *
+ * @return Identificator for calling thread.
+ */
+u32 _mali_osk_get_tid(void);
+
+
+/** @brief Take a reference to the power manager system for the Mali device (synchronously).
+ *
+ * When function returns successfully, Mali is ON.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_put() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void);
+
+/** @brief Take a reference to the external power manager system for the Mali device (asynchronously).
+ *
+ * Mali might not yet be on after this function as returned.
+ * Please use \a _mali_osk_pm_dev_barrier() or \a _mali_osk_pm_dev_ref_get_sync()
+ * to wait for Mali to be powered on.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_dec() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void);
+
+/** @brief Release the reference to the external power manger system for the Mali device.
+ *
+ * When reference count reach zero, the cores can be off.
+ *
+ * @note This must be used to release references taken with
+ * \a _mali_osk_pm_dev_ref_get_sync() or \a _mali_osk_pm_dev_ref_get_sync().
+ */
+void _mali_osk_pm_dev_ref_put(void);
+
+/** @brief Block until pending PM operations are done
+ */
+void _mali_osk_pm_dev_barrier(void);
+
+/** @} */ /* end group  _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_bitmap OSK Bitmap
+ * @{ */
+
+/** @brief Allocate a unique number from the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @return An unique existence in the bitmap object.
+ */
+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap);
+
+/** @brief Free a interger to the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @param obj An number allocated from bitmap object.
+ */
+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj);
+
+/** @brief Allocate continuous number from the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @return start number of the continuous number block.
+ */
+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt);
+
+/** @brief Free a block of continuous number block to the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @param obj Start number.
+ * @param cnt The size of the continuous number block.
+ */
+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt);
+
+/** @brief Available count could be used to allocate in the given bitmap object.
+ *
+ */
+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap);
+
+/** @brief Initialize an bitmap object..
+ *
+ * @param bitmap An poiter of uninitialized bitmap object.
+ * @param num Size of thei bitmap object and decide the memory size allocated.
+ * @param reserve start number used to allocate.
+ */
+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve);
+
+/** @brief Free the given bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ */
+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap);
+/** @} */ /* end group  _mali_osk_bitmap */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Check standard inlines */
+#ifndef MALI_STATIC_INLINE
+#error MALI_STATIC_INLINE not defined on your OS
+#endif
+
+#ifndef MALI_NON_STATIC_INLINE
+#error MALI_NON_STATIC_INLINE not defined on your OS
+#endif
+
+#endif /* __MALI_OSK_H__ */
diff --git a/utgard/r6p1/common/mali_osk_bitops.h b/utgard/r6p1/common/mali_osk_bitops.h
new file mode 100755
index 0000000..61e0a91
--- /dev/null
+++ b/utgard/r6p1/common/mali_osk_bitops.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitops.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_BITOPS_H__
+#define __MALI_OSK_BITOPS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void _mali_internal_clear_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(NULL != addr);
+
+	(*addr) &= ~(1 << bit);
+}
+
+MALI_STATIC_INLINE void _mali_internal_set_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(NULL != addr);
+
+	(*addr) |= (1 << bit);
+}
+
+MALI_STATIC_INLINE u32 _mali_internal_test_bit(u32 bit, u32 value)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	return value & (1 << bit);
+}
+
+MALI_STATIC_INLINE int _mali_internal_find_first_zero_bit(u32 value)
+{
+	u32 inverted;
+	u32 negated;
+	u32 isolated;
+	u32 leading_zeros;
+
+	/* Begin with xxx...x0yyy...y, where ys are 1, number of ys is in range  0..31 */
+	inverted = ~value; /* zzz...z1000...0 */
+	/* Using count_trailing_zeros on inverted value -
+	 * See ARM System Developers Guide for details of count_trailing_zeros */
+
+	/* Isolate the zero: it is preceeded by a run of 1s, so add 1 to it */
+	negated = (u32) - inverted ; /* -a == ~a + 1 (mod 2^n) for n-bit numbers */
+	/* negated = xxx...x1000...0 */
+
+	isolated = negated & inverted ; /* xxx...x1000...0 & zzz...z1000...0, zs are ~xs */
+	/* And so the first zero bit is in the same position as the 1 == number of 1s that preceeded it
+	 * Note that the output is zero if value was all 1s */
+
+	leading_zeros = _mali_osk_clz(isolated);
+
+	return 31 - leading_zeros;
+}
+
+
+/** @defgroup _mali_osk_bitops OSK Non-atomic Bit-operations
+ * @{ */
+
+/**
+ * These bit-operations do not work atomically, and so locks must be used if
+ * atomicity is required.
+ *
+ * Reference implementations for Little Endian are provided, and so it should
+ * not normally be necessary to re-implement these. Efficient bit-twiddling
+ * techniques are used where possible, implemented in portable C.
+ *
+ * Note that these reference implementations rely on _mali_osk_clz() being
+ * implemented.
+ */
+
+/** @brief Clear a bit in a sequence of 32-bit words
+ * @param nr bit number to clear, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_clear_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_clear_bit(nr, addr);
+}
+
+/** @brief Set a bit in a sequence of 32-bit words
+ * @param nr bit number to set, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_set_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_set_bit(nr, addr);
+}
+
+/** @brief Test a bit in a sequence of 32-bit words
+ * @param nr bit number to test, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ * @return zero if bit was clear, non-zero if set. Do not rely on the return
+ * value being related to the actual word under test.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_test_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	return _mali_internal_test_bit(nr, *addr);
+}
+
+/* Return maxbit if not found */
+/** @brief Find the first zero bit in a sequence of 32-bit words
+ * @param addr starting point for search.
+ * @param maxbit the maximum number of bits to search
+ * @return the number of the first zero bit found, or maxbit if none were found
+ * in the specified range.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_find_first_zero_bit(const u32 *addr, u32 maxbit)
+{
+	u32 total;
+
+	for (total = 0; total < maxbit; total += 32, ++addr) {
+		int result;
+		result = _mali_internal_find_first_zero_bit(*addr);
+
+		/* non-negative signifies the bit was found */
+		if (result >= 0) {
+			total += (u32)result;
+			break;
+		}
+	}
+
+	/* Now check if we reached maxbit or above */
+	if (total >= maxbit) {
+		total = maxbit;
+	}
+
+	return total; /* either the found bit nr, or maxbit if not found */
+}
+/** @} */ /* end group _mali_osk_bitops */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_BITOPS_H__ */
diff --git a/utgard/r6p1/common/mali_osk_list.h b/utgard/r6p1/common/mali_osk_list.h
new file mode 100755
index 0000000..a41f5ab
--- /dev/null
+++ b/utgard/r6p1/common/mali_osk_list.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_list.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_LIST_H__
+#define __MALI_OSK_LIST_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void __mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = new_entry;
+	new_entry->next = next;
+	new_entry->prev = prev;
+	prev->next = new_entry;
+}
+
+MALI_STATIC_INLINE void __mali_osk_list_del(_mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** Reference implementations of Doubly-linked Circular Lists are provided.
+ * There is often no need to re-implement these.
+ *
+ * @note The implementation may differ subtly from any lists the OS provides.
+ * For this reason, these lists should not be mixed with OS-specific lists
+ * inside the OSK/UKK implementation. */
+
+/** @brief Initialize a list to be a head of an empty list
+ * @param exp the list to initialize. */
+#define _MALI_OSK_INIT_LIST_HEAD(exp) _mali_osk_list_init(exp)
+
+/** @brief Define a list variable, which is uninitialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD(exp) _mali_osk_list_t exp
+
+/** @brief Define a list variable, which is initialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD_STATIC_INIT(exp) _mali_osk_list_t exp = { &exp, &exp }
+
+/** @brief Initialize a list element.
+ *
+ * All list elements must be initialized before use.
+ *
+ * Do not use on any list element that is present in a list without using
+ * _mali_osk_list_del first, otherwise this will break the list.
+ *
+ * @param list the list element to initialize
+ */
+MALI_STATIC_INLINE void _mali_osk_list_init(_mali_osk_list_t *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/** @brief Insert a single list element after an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the first element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the next
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list, list->next);
+}
+
+/** @brief Insert a single list element before an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the last element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the previous
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_addtail(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list->prev, list);
+}
+
+/** @brief Remove a single element from a list
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will be uninitialized, and so should not be traversed. It must be
+ * initialized before further use.
+ *
+ * @param list the list element to remove.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_del(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+}
+
+/** @brief Remove a single element from a list, and re-initialize it
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will initialized, and so can be used as normal.
+ *
+ * @param list the list element to remove and initialize.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_delinit(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+	_mali_osk_list_init(list);
+}
+
+/** @brief Determine whether a list is empty.
+ *
+ * An empty list is one that contains a single element that points to itself.
+ *
+ * @param list the list to check.
+ * @return non-zero if the list is empty, and zero otherwise.
+ */
+MALI_STATIC_INLINE mali_bool _mali_osk_list_empty(_mali_osk_list_t *list)
+{
+	return list->next == list;
+}
+
+/** @brief Move a list element from one list to another.
+ *
+ * The list element must be initialized.
+ *
+ * As an example, moving a list item to the head of a new list causes this item
+ * to be the first element in the new list.
+ *
+ * @param move the list element to move
+ * @param list the new list into which the element will be inserted, as the next
+ * element in the list.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move(_mali_osk_list_t *move_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_del(move_entry->prev, move_entry->next);
+	_mali_osk_list_add(move_entry, list);
+}
+
+/** @brief Move an entire list
+ *
+ * The list element must be initialized.
+ *
+ * Allows you to move a list from one list head to another list head
+ *
+ * @param old_list The existing list head
+ * @param new_list The new list head (must be an empty list)
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move_list(_mali_osk_list_t *old_list, _mali_osk_list_t *new_list)
+{
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(new_list));
+	if (!_mali_osk_list_empty(old_list)) {
+		new_list->next = old_list->next;
+		new_list->prev = old_list->prev;
+		new_list->next->prev = new_list;
+		new_list->prev->next = new_list;
+		old_list->next = old_list;
+		old_list->prev = old_list;
+	}
+}
+
+/** @brief Find the containing structure of a list
+ *
+ * When traversing a list, this is used to recover the containing structure,
+ * given that is contains a _mali_osk_list_t member.
+ *
+ * Each list must be of structures of one type, and must link the same members
+ * together, otherwise it will not be possible to correctly recover the
+ * sturctures that the lists link.
+ *
+ * @note no type or memory checking occurs to ensure that a structure does in
+ * fact exist for the list entry, and that it is being recovered with respect
+ * to the correct list member.
+ *
+ * @param ptr the pointer to the _mali_osk_list_t member in this structure
+ * @param type the type of the structure that contains the member
+ * @param member the member of the structure that ptr points to.
+ * @return a pointer to a \a type object which contains the _mali_osk_list_t
+ * \a member, as pointed to by the _mali_osk_list_t \a *ptr.
+ */
+#define _MALI_OSK_LIST_ENTRY(ptr, type, member) \
+	_MALI_OSK_CONTAINER_OF(ptr, type, member)
+
+/** @brief Enumerate a list safely
+ *
+ * With this macro, lists can be enumerated in a 'safe' manner. That is,
+ * entries can be deleted from the list without causing an error during
+ * enumeration. To achieve this, a 'temporary' pointer is required, which must
+ * be provided to the macro.
+ *
+ * Use it like a 'for()', 'while()' or 'do()' construct, and so it must be
+ * followed by a statement or compound-statement which will be executed for
+ * each list entry.
+ *
+ * Upon loop completion, providing that an early out was not taken in the
+ * loop body, then it is guaranteed that ptr->member == list, even if the loop
+ * body never executed.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY(ptr, tmp, list, type, member)         \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->next, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.next, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.next, type, member))
+
+/** @brief Enumerate a list in reverse order safely
+ *
+ * This macro is identical to @ref _MALI_OSK_LIST_FOREACHENTRY, except that
+ * entries are enumerated in reverse order.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY_REVERSE(ptr, tmp, list, type, member) \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->prev, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.prev, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.prev, type, member))
+
+/** @} */ /* end group _mali_osk_list */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_LIST_H__ */
diff --git a/utgard/r6p1/common/mali_osk_mali.h b/utgard/r6p1/common/mali_osk_mali.h
new file mode 100755
index 0000000..0d6906a
--- /dev/null
+++ b/utgard/r6p1/common/mali_osk_mali.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.h
+ * Defines the OS abstraction layer which is specific for the Mali kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_MALI_H__
+#define __MALI_OSK_MALI_H__
+
+#include <linux/mali/mali_utgard.h>
+#include <mali_osk.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CONFIG_MALI_DEVFREQ
+struct mali_device {
+	struct device *dev;
+#ifdef CONFIG_HAVE_CLK
+	struct clk *clock;
+#endif
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+#endif
+	struct mali_pm_metrics_data mali_metrics;
+};
+#endif
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Struct with device specific configuration data
+ */
+typedef struct mali_gpu_device_data _mali_osk_device_data;
+
+#ifdef CONFIG_MALI_DT
+/** @brief Initialize those device resources when we use device tree
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_resource_initialize(void);
+#endif
+
+/** @brief Find Mali GPU HW resource
+ *
+ * @param addr Address of Mali GPU resource to find
+ * @param res Storage for resource information if resource is found.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if resource is not found
+ */
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res);
+
+
+/** @brief Find Mali GPU HW base address
+ *
+ * @return 0 if resources are found, otherwise the Mali GPU component with lowest address.
+ */
+uintptr_t _mali_osk_resource_base_address(void);
+
+/** @brief Find the specific GPU resource.
+ *
+ * @return value
+ * 0x400 if Mali 400 specific GPU resource identified
+ * 0x450 if Mali 450 specific GPU resource identified
+ * 0x470 if Mali 470 specific GPU resource identified
+ *
+ */
+u32 _mali_osk_identify_gpu_resource(void);
+
+/** @brief Retrieve the Mali GPU specific data
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data);
+
+/** @brief Find the pmu domain config from device data.
+ *
+ * @param domain_config_array used to store pmu domain config found in device data.
+ * @param array_size is the size of array domain_config_array.
+ */
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size);
+
+/** @brief Get Mali PMU switch delay
+ *
+ *@return pmu switch delay if it is configured
+ */
+u32 _mali_osk_get_pmu_switch_delay(void);
+
+/** @brief Determines if Mali GPU has been configured with shared interrupts.
+ *
+ * @return MALI_TRUE if shared interrupts, MALI_FALSE if not.
+ */
+mali_bool _mali_osk_shared_interrupts(void);
+
+/** @brief Initialize the gpu secure mode.
+ * The gpu secure mode will initially be in a disabled state.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void);
+
+/** @brief Deinitialize the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void);
+
+/** @brief Enable the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_enable(void);
+
+/** @brief Disable the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_disable(void);
+
+/** @brief Check if the gpu secure mode has been enabled.
+ * @return MALI_TRUE if enabled, otherwise MALI_FALSE.
+ */
+mali_bool _mali_osk_gpu_secure_mode_is_enabled(void);
+
+/** @brief Check if the gpu secure mode is supported.
+ * @return MALI_TRUE if supported, otherwise MALI_FALSE.
+ */
+mali_bool _mali_osk_gpu_secure_mode_is_supported(void);
+
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_MALI_H__ */
diff --git a/utgard/r6p1/common/mali_osk_profiling.h b/utgard/r6p1/common/mali_osk_profiling.h
new file mode 100755
index 0000000..eca6ad4
--- /dev/null
+++ b/utgard/r6p1/common/mali_osk_profiling.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_OSK_PROFILING_H__
+#define __MALI_OSK_PROFILING_H__
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+
+#include "mali_linux_trace.h"
+#include "mali_profiling_events.h"
+#include "mali_profiling_gator_api.h"
+
+#define MALI_PROFILING_MAX_BUFFER_ENTRIES 1048576
+
+#define MALI_PROFILING_NO_HW_COUNTER = ((u32)-1)
+
+/** @defgroup _mali_osk_profiling External profiling connectivity
+ * @{ */
+
+/**
+ * Initialize the profiling module.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start);
+
+/*
+ * Terminate the profiling module.
+ */
+void _mali_osk_profiling_term(void);
+
+/**
+ * Stop the profile sampling operation.
+ */
+void _mali_osk_profiling_stop_sampling(u32 pid);
+
+/**
+ * Start recording profiling data
+ *
+ * The specified limit will determine how large the capture buffer is.
+ * MALI_PROFILING_MAX_BUFFER_ENTRIES determines the maximum size allowed by the device driver.
+ *
+ * @param limit The desired maximum number of events to record on input, the actual maximum on output.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_start(u32 *limit);
+
+/**
+ * Add an profiling event
+ *
+ * @param event_id The event identificator.
+ * @param data0 First data parameter, depending on event_id specified.
+ * @param data1 Second data parameter, depending on event_id specified.
+ * @param data2 Third data parameter, depending on event_id specified.
+ * @param data3 Fourth data parameter, depending on event_id specified.
+ * @param data4 Fifth data parameter, depending on event_id specified.
+ */
+void    _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+/**
+ * Report a hardware counter event.
+ *
+ * @param counter_id The ID of the counter.
+ * @param value The value of the counter.
+ */
+
+/* Call Linux tracepoint directly */
+#define _mali_osk_profiling_report_hw_counter(counter_id, value) trace_mali_hw_counter(counter_id, value)
+
+/**
+ * Report SW counters
+ *
+ * @param counters array of counter values
+ */
+void _mali_osk_profiling_report_sw_counters(u32 *counters);
+
+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value);
+
+/**
+ * Stop recording profiling data
+ *
+ * @param count Returns the number of recorded events.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_stop(u32 *count);
+
+/**
+ * Retrieves the number of events that can be retrieved
+ *
+ * @return The number of recorded events that can be retrieved.
+ */
+u32 _mali_osk_profiling_get_count(void);
+
+/**
+ * Retrieve an event
+ *
+ * @param index Event index (start with 0 and continue until this function fails to retrieve all events)
+ * @param timestamp The timestamp for the retrieved event will be stored here.
+ * @param event_id The event ID for the retrieved event will be stored here.
+ * @param data The 5 data values for the retrieved event will be stored here.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+
+/**
+ * Clear the recorded buffer.
+ *
+ * This is needed in order to start another recording.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_clear(void);
+
+/**
+ * Checks if a recording of profiling data is in progress
+ *
+ * @return MALI_TRUE if recording of profiling data is in progress, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_is_recording(void);
+
+/**
+ * Checks if profiling data is available for retrival
+ *
+ * @return MALI_TRUE if profiling data is avaiable, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_have_recording(void);
+
+/** @} */ /* end group _mali_osk_profiling */
+
+#else /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+/* Dummy add_event, for when profiling is disabled. */
+
+#define _mali_osk_profiling_add_event(event_id, data0, data1, data2, data3, data4)
+
+#endif /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+#endif /* __MALI_OSK_PROFILING_H__ */
+
+
diff --git a/utgard/r6p1/common/mali_osk_types.h b/utgard/r6p1/common/mali_osk_types.h
new file mode 100755
index 0000000..6e9a133
--- /dev/null
+++ b/utgard/r6p1/common/mali_osk_types.h
@@ -0,0 +1,471 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_types.h
+ * Defines types of the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_TYPES_H__
+#define __MALI_OSK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_osk_miscellaneous OSK Miscellaneous functions, constants and types
+ * @{ */
+
+/* Define integer types used by OSK. Note: these currently clash with Linux so we only define them if not defined already */
+#ifndef __KERNEL__
+typedef unsigned char      u8;
+typedef signed char        s8;
+typedef unsigned short     u16;
+typedef signed short       s16;
+typedef unsigned int       u32;
+typedef signed int         s32;
+typedef unsigned long long u64;
+#define BITS_PER_LONG (sizeof(long)*8)
+#else
+/* Ensure Linux types u32, etc. are defined */
+#include <linux/types.h>
+#endif
+
+/** @brief Mali Boolean type which uses MALI_TRUE and MALI_FALSE
+  */
+typedef unsigned long mali_bool;
+
+#ifndef MALI_TRUE
+#define MALI_TRUE ((mali_bool)1)
+#endif
+
+#ifndef MALI_FALSE
+#define MALI_FALSE ((mali_bool)0)
+#endif
+
+#define MALI_HW_CORE_NO_COUNTER     ((u32)-1)
+
+
+#define MALI_S32_MAX 0x7fffffff
+
+/**
+ * @brief OSK Error codes
+ *
+ * Each OS may use its own set of error codes, and may require that the
+ * User/Kernel interface take certain error code. This means that the common
+ * error codes need to be sufficiently rich to pass the correct error code
+ * thorugh from the OSK to U/K layer, across all OSs.
+ *
+ * The result is that some error codes will appear redundant on some OSs.
+ * Under all OSs, the OSK layer must translate native OS error codes to
+ * _mali_osk_errcode_t codes. Similarly, the U/K layer must translate from
+ * _mali_osk_errcode_t codes to native OS error codes.
+ */
+typedef enum {
+	_MALI_OSK_ERR_OK = 0, /**< Success. */
+	_MALI_OSK_ERR_FAULT = -1, /**< General non-success */
+	_MALI_OSK_ERR_INVALID_FUNC = -2, /**< Invalid function requested through User/Kernel interface (e.g. bad IOCTL number) */
+	_MALI_OSK_ERR_INVALID_ARGS = -3, /**< Invalid arguments passed through User/Kernel interface */
+	_MALI_OSK_ERR_NOMEM = -4, /**< Insufficient memory */
+	_MALI_OSK_ERR_TIMEOUT = -5, /**< Timeout occurred */
+	_MALI_OSK_ERR_RESTARTSYSCALL = -6, /**< Special: On certain OSs, must report when an interruptable mutex is interrupted. Ignore otherwise. */
+	_MALI_OSK_ERR_ITEM_NOT_FOUND = -7, /**< Table Lookup failed */
+	_MALI_OSK_ERR_BUSY = -8, /**< Device/operation is busy. Try again later */
+	_MALI_OSK_ERR_UNSUPPORTED = -9, /**< Optional part of the interface used, and is unsupported */
+} _mali_osk_errcode_t;
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wq OSK work queues
+ * @{ */
+
+/** @brief Private type for work objects */
+typedef struct _mali_osk_wq_work_s _mali_osk_wq_work_t;
+typedef struct _mali_osk_wq_delayed_work_s _mali_osk_wq_delayed_work_t;
+
+/** @brief Work queue handler function
+ *
+ * This function type is called when the work is scheduled by the work queue,
+ * e.g. as an IRQ bottom-half handler.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for more information on the
+ * work-queue and work handlers.
+ *
+ * @param arg resource-specific data
+ */
+typedef void (*_mali_osk_wq_work_handler_t)(void *arg);
+
+/* @} */ /* end group _mali_osk_wq */
+
+/** @defgroup _mali_osk_irq OSK IRQ handling
+ * @{ */
+
+/** @brief Private type for IRQ handling objects */
+typedef struct _mali_osk_irq_t_struct _mali_osk_irq_t;
+
+/** @brief Optional function to trigger an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data */
+typedef void (*_mali_osk_irq_trigger_t)(void *arg);
+
+/** @brief Optional function to acknowledge an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was successful, or a suitable _mali_osk_errcode_t on failure. */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_ack_t)(void *arg);
+
+/** @brief IRQ 'upper-half' handler callback.
+ *
+ * This function is implemented by the common layer to do the initial handling of a
+ * resource's IRQ. This maps on to the concept of an ISR that does the minimum
+ * work necessary before handing off to an IST.
+ *
+ * The communication of the resource-specific data from the ISR to the IST is
+ * handled by the OSK implementation.
+ *
+ * On most systems, the IRQ upper-half handler executes in IRQ context.
+ * Therefore, the system may have restrictions about what can be done in this
+ * context
+ *
+ * If an IRQ upper-half handler requires more work to be done than can be
+ * acheived in an IRQ context, then it may defer the work with
+ * _mali_osk_wq_schedule_work(). Refer to \ref _mali_osk_wq_create_work() for
+ * more information.
+ *
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was correctly handled, or a suitable
+ * _mali_osk_errcode_t otherwise.
+ */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_uhandler_t)(void *arg);
+
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @defgroup _mali_osk_atomic OSK Atomic counters
+ * @{ */
+
+/** @brief Public type of atomic counters
+ *
+ * This is public for allocation on stack. On systems that support it, this is just a single 32-bit value.
+ * On others, it could be encapsulating an object stored elsewhere.
+ *
+ * Regardless of implementation, the \ref _mali_osk_atomic functions \b must be used
+ * for all accesses to the variable's value, even if atomicity is not required.
+ * Do not access u.val or u.obj directly.
+ */
+typedef struct {
+	union {
+		u32 val;
+		void *obj;
+	} u;
+} _mali_osk_atomic_t;
+/** @} */ /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+
+/** @brief OSK Mutual Exclusion Lock ordered list
+ *
+ * This lists the various types of locks in the system and is used to check
+ * that locks are taken in the correct order.
+ *
+ * - Holding more than one lock of the same order at the same time is not
+ *   allowed.
+ * - Taking a lock of a lower order than the highest-order lock currently held
+ *   is not allowed.
+ *
+ */
+typedef enum {
+	/*  ||    Locks    ||  */
+	/*  ||   must be   ||  */
+	/* _||_  taken in _||_ */
+	/* \  /    this   \  / */
+	/*  \/    order!   \/  */
+
+	_MALI_OSK_LOCK_ORDER_FIRST = 0,
+
+	_MALI_OSK_LOCK_ORDER_SESSIONS,
+	_MALI_OSK_LOCK_ORDER_MEM_SESSION,
+	_MALI_OSK_LOCK_ORDER_MEM_INFO,
+	_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE,
+	_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP,
+	_MALI_OSK_LOCK_ORDER_PM_EXECUTION,
+	_MALI_OSK_LOCK_ORDER_EXECUTOR,
+	_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED,
+	_MALI_OSK_LOCK_ORDER_PROFILING,
+	_MALI_OSK_LOCK_ORDER_L2,
+	_MALI_OSK_LOCK_ORDER_L2_COMMAND,
+	_MALI_OSK_LOCK_ORDER_UTILIZATION,
+	_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS,
+	_MALI_OSK_LOCK_ORDER_PM_STATE,
+
+	_MALI_OSK_LOCK_ORDER_LAST,
+} _mali_osk_lock_order_t;
+
+
+/** @brief OSK Mutual Exclusion Lock flags type
+ *
+ * - Any lock can use the order parameter.
+ */
+typedef enum {
+	_MALI_OSK_LOCKFLAG_UNORDERED        = 0x1, /**< Indicate that the order of this lock should not be checked */
+	_MALI_OSK_LOCKFLAG_ORDERED          = 0x2,
+	/** @enum _mali_osk_lock_flags_t
+	 *
+	 * Flags from 0x10000--0x80000000 are RESERVED for User-mode */
+
+} _mali_osk_lock_flags_t;
+
+/** @brief Mutual Exclusion Lock Mode Optimization hint
+ *
+ * The lock mode is used to implement the read/write locking of locks when we call
+ * functions _mali_osk_mutex_rw_init/wait/signal/term/. In this case, the RO mode can
+ * be used to allow multiple concurrent readers, but no writers. The RW mode is used for
+ * writers, and so will wait for all readers to release the lock (if any present).
+ * Further readers and writers will wait until the writer releases the lock.
+ *
+ * The mode is purely an optimization hint: for example, it is permissible for
+ * all locks to behave in RW mode, regardless of that supplied.
+ *
+ * It is an error to attempt to use locks in anything other that RW mode when
+ * call functions _mali_osk_mutex_rw_wait/signal().
+ *
+ */
+typedef enum {
+	_MALI_OSK_LOCKMODE_UNDEF = -1,  /**< Undefined lock mode. For internal use only */
+	_MALI_OSK_LOCKMODE_RW    = 0x0, /**< Read-write mode, default. All readers and writers are mutually-exclusive */
+	_MALI_OSK_LOCKMODE_RO,          /**< Read-only mode, to support multiple concurrent readers, but mutual exclusion in the presence of writers. */
+	/** @enum _mali_osk_lock_mode_t
+	 *
+	 * Lock modes 0x40--0x7F are RESERVED for User-mode */
+} _mali_osk_lock_mode_t;
+
+/** @brief Private types for Mutual Exclusion lock objects */
+typedef struct _mali_osk_lock_debug_s _mali_osk_lock_debug_t;
+typedef struct _mali_osk_spinlock_s _mali_osk_spinlock_t;
+typedef struct _mali_osk_spinlock_irq_s _mali_osk_spinlock_irq_t;
+typedef struct _mali_osk_mutex_s _mali_osk_mutex_t;
+typedef struct _mali_osk_mutex_rw_s _mali_osk_mutex_rw_t;
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @defgroup _mali_osk_low_level_memory OSK Low-level Memory Operations
+ * @{ */
+
+/**
+ * @brief Private data type for use in IO accesses to/from devices.
+ *
+ * This represents some range that is accessible from the device. Examples
+ * include:
+ * - Device Registers, which could be readable and/or writeable.
+ * - Memory that the device has access to, for storing configuration structures.
+ *
+ * Access to this range must be made through the _mali_osk_mem_ioread32() and
+ * _mali_osk_mem_iowrite32() functions.
+ */
+typedef struct _mali_io_address *mali_io_address;
+
+/** @defgroup _MALI_OSK_CPU_PAGE CPU Physical page size macros.
+ *
+ * The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The CPU Physical Page Size has been assumed to be the same as the Mali
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** CPU Page Order, as log to base 2 of the Page size. @see _MALI_OSK_CPU_PAGE_SIZE */
+#define _MALI_OSK_CPU_PAGE_ORDER ((u32)12)
+/** CPU Page Size, in bytes.               */
+#define _MALI_OSK_CPU_PAGE_SIZE (((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER))
+/** CPU Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_CPU_PAGE_MASK (~((((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER)) - ((u32)1)))
+/** @} */ /* end of group _MALI_OSK_CPU_PAGE */
+
+/** @defgroup _MALI_OSK_MALI_PAGE Mali Physical Page size macros
+ *
+ * Mali Physical page size macros. The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The Mali Physical Page Size has been assumed to be the same as the CPU
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** Mali Page Order, as log to base 2 of the Page size. @see _MALI_OSK_MALI_PAGE_SIZE */
+#define _MALI_OSK_MALI_PAGE_ORDER PAGE_SHIFT
+/** Mali Page Size, in bytes.               */
+#define _MALI_OSK_MALI_PAGE_SIZE PAGE_SIZE
+/** Mali Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_MALI_PAGE_MASK PAGE_MASK
+/** @} */ /* end of group _MALI_OSK_MALI_PAGE*/
+
+/** @brief flags for mapping a user-accessible memory range
+ *
+ * Where a function with prefix '_mali_osk_mem_mapregion' accepts flags as one
+ * of the function parameters, it will use one of these. These allow per-page
+ * control over mappings. Compare with the mali_memory_allocation_flag type,
+ * which acts over an entire range
+ *
+ * These may be OR'd together with bitwise OR (|), but must be cast back into
+ * the type after OR'ing.
+ */
+typedef enum {
+	_MALI_OSK_MEM_MAPREGION_FLAG_OS_ALLOCATED_PHYSADDR = 0x1, /**< Physical address is OS Allocated */
+} _mali_osk_mem_mapregion_flags_t;
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+/** @defgroup _mali_osk_notification OSK Notification Queues
+ * @{ */
+
+/** @brief Private type for notification queue objects */
+typedef struct _mali_osk_notification_queue_t_struct _mali_osk_notification_queue_t;
+
+/** @brief Public notification data object type */
+typedef struct _mali_osk_notification_t_struct {
+	u32 notification_type;   /**< The notification type */
+	u32 result_buffer_size; /**< Size of the result buffer to copy to user space */
+	void *result_buffer;    /**< Buffer containing any type specific data */
+} _mali_osk_notification_t;
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @defgroup _mali_osk_timer OSK Timer Callbacks
+ * @{ */
+
+/** @brief Function to call when a timer expires
+ *
+ * When a timer expires, this function is called. Note that on many systems,
+ * a timer callback will be executed in IRQ context. Therefore, restrictions
+ * may apply on what can be done inside the timer callback.
+ *
+ * If a timer requires more work to be done than can be acheived in an IRQ
+ * context, then it may defer the work with a work-queue. For example, it may
+ * use \ref _mali_osk_wq_schedule_work() to make use of a bottom-half handler
+ * to carry out the remaining work.
+ *
+ * Stopping the timer with \ref _mali_osk_timer_del() blocks on compeletion of
+ * the callback. Therefore, the callback may not obtain any mutexes also held
+ * by any callers of _mali_osk_timer_del(). Otherwise, a deadlock may occur.
+ *
+ * @param arg Function-specific data */
+typedef void (*_mali_osk_timer_callback_t)(void *arg);
+
+/** @brief Private type for Timer Callback Objects */
+typedef struct _mali_osk_timer_t_struct _mali_osk_timer_t;
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** @brief Public List objects.
+ *
+ * To use, add a _mali_osk_list_t member to the structure that may become part
+ * of a list. When traversing the _mali_osk_list_t objects, use the
+ * _MALI_OSK_CONTAINER_OF() macro to recover the structure from its
+ *_mali_osk_list_t member
+ *
+ * Each structure may have multiple _mali_osk_list_t members, so that the
+ * structure is part of multiple lists. When traversing lists, ensure that the
+ * correct _mali_osk_list_t member is used, because type-checking will be
+ * lost by the compiler.
+ */
+typedef struct _mali_osk_list_s {
+	struct _mali_osk_list_s *next;
+	struct _mali_osk_list_s *prev;
+} _mali_osk_list_t;
+/** @} */ /* end group _mali_osk_list */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief resource description struct
+ *
+ * Platform independent representation of a Mali HW resource
+ */
+typedef struct _mali_osk_resource {
+	const char *description;        /**< short description of the resource */
+	uintptr_t base;                 /**< Physical base address of the resource, as seen by Mali resources. */
+	const char *irq_name;           /**< Name of irq belong to this resource */
+	u32 irq;                        /**< IRQ number delivered to the CPU, or -1 to tell the driver to probe for it (if possible) */
+} _mali_osk_resource_t;
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+/** @brief Private type for wait queue objects */
+typedef struct _mali_osk_wait_queue_t_struct _mali_osk_wait_queue_t;
+/** @} */ /* end group _mali_osk_wait_queue */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+/** @brief Mali print ctx type which uses seq_file
+  */
+typedef struct seq_file _mali_osk_print_ctx;
+
+#define _MALI_OSK_BITMAP_INVALIDATE_INDEX -1
+
+typedef struct _mali_osk_bitmap {
+	u32         reserve;
+	u32         last;
+	u32         max;
+	u32         avail;
+	_mali_osk_spinlock_t   *lock;
+	unsigned long          *table;
+} _mali_osk_bitmap_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_TYPES_H__ */
diff --git a/utgard/r6p1/common/mali_pm.c b/utgard/r6p1/common/mali_pm.c
new file mode 100755
index 0000000..1ef03a6
--- /dev/null
+++ b/utgard/r6p1/common/mali_pm.c
@@ -0,0 +1,1362 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pm.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_scheduler.h"
+#include "mali_group.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+
+#include "mali_executor.h"
+#include "mali_control_timer.h"
+
+#if defined(DEBUG)
+u32 num_pm_runtime_resume = 0;
+u32 num_pm_updates = 0;
+u32 num_pm_updates_up = 0;
+u32 num_pm_updates_down = 0;
+#endif
+
+#define MALI_PM_DOMAIN_DUMMY_MASK (1 << MALI_DOMAIN_INDEX_DUMMY)
+
+/* lock protecting power state (including pm_domains) */
+static _mali_osk_spinlock_irq_t *pm_lock_state = NULL;
+
+/* the wanted domain mask (protected by pm_lock_state) */
+static u32 pd_mask_wanted = 0;
+
+/* used to deferring the actual power changes */
+static _mali_osk_wq_work_t *pm_work = NULL;
+
+/* lock protecting power change execution */
+static _mali_osk_mutex_t *pm_lock_exec = NULL;
+
+/* PMU domains which are actually powered on (protected by pm_lock_exec) */
+static u32 pmu_mask_current = 0;
+
+/*
+ * domains which marked as powered on (protected by pm_lock_exec)
+ * This can be different from pmu_mask_current right after GPU power on
+ * if the PMU domains default to powered up.
+ */
+static u32 pd_mask_current = 0;
+
+static u16 domain_config[MALI_MAX_NUMBER_OF_DOMAINS] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	1 << MALI_DOMAIN_INDEX_DUMMY
+};
+
+/* The relative core power cost */
+#define MALI_GP_COST 3
+#define MALI_PP_COST 6
+#define MALI_L2_COST 1
+
+/*
+ *We have MALI_MAX_NUMBER_OF_PP_PHYSICAL_CORES + 1 rows in this matrix
+ *because we mush store the mask of different pp cores: 0, 1, 2, 3, 4, 5, 6, 7, 8.
+ */
+static int mali_pm_domain_power_cost_result[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1][MALI_MAX_NUMBER_OF_DOMAINS];
+/*
+ * Keep track of runtime PM state, so that we know
+ * how to resume during OS resume.
+ */
+#ifdef CONFIG_PM_RUNTIME
+static mali_bool mali_pm_runtime_active = MALI_FALSE;
+#else
+/* when kernel don't enable PM_RUNTIME, set the flag always true,
+ * for GPU will not power off by runtime */
+static mali_bool mali_pm_runtime_active = MALI_TRUE;
+#endif
+
+static void mali_pm_state_lock(void);
+static void mali_pm_state_unlock(void);
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void);
+static void mali_pm_set_pmu_domain_config(void);
+static u32 mali_pm_get_registered_cores_mask(void);
+static void mali_pm_update_sync_internal(void);
+static mali_bool mali_pm_common_suspend(void);
+static void mali_pm_update_work(void *data);
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+const char *mali_pm_group_stats_to_string(void);
+#endif
+
+_mali_osk_errcode_t mali_pm_initialize(void)
+{
+	_mali_osk_errcode_t err;
+	struct mali_pmu_core *pmu;
+
+	pm_lock_state = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (NULL == pm_lock_state) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_lock_exec = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+					    _MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (NULL == pm_lock_exec) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_work = _mali_osk_wq_create_work(mali_pm_update_work, NULL);
+	if (NULL == pm_work) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pmu = mali_pmu_get_global_pmu_core();
+	if (NULL != pmu) {
+		/*
+		 * We have a Mali PMU, set the correct domain
+		 * configuration (default or custom)
+		 */
+
+		u32 registered_cores_mask;
+
+		mali_pm_set_pmu_domain_config();
+
+		registered_cores_mask = mali_pm_get_registered_cores_mask();
+		mali_pmu_set_registered_cores_mask(pmu, registered_cores_mask);
+
+		MALI_DEBUG_ASSERT(0 == pd_mask_wanted);
+	}
+
+	/* Create all power domains needed (at least one dummy domain) */
+	err = mali_pm_create_pm_domains();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_terminate();
+		return err;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pm_terminate(void)
+{
+	if (NULL != pm_work) {
+		_mali_osk_wq_delete_work(pm_work);
+		pm_work = NULL;
+	}
+
+	mali_pm_domain_terminate();
+
+	if (NULL != pm_lock_exec) {
+		_mali_osk_mutex_term(pm_lock_exec);
+		pm_lock_exec = NULL;
+	}
+
+	if (NULL != pm_lock_state) {
+		_mali_osk_spinlock_irq_term(pm_lock_state);
+		pm_lock_state = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (NULL == domain) {
+		MALI_DEBUG_ASSERT(0 == domain_config[domain_index]);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(0 != domain_config[domain_index]);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != domain);
+
+	mali_pm_domain_add_l2_cache(domain, l2_cache);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (NULL == domain) {
+		MALI_DEBUG_ASSERT(0 == domain_config[domain_index]);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(0 != domain_config[domain_index]);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != domain);
+
+	mali_pm_domain_add_group(domain, group);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains)
+{
+	mali_bool ret = MALI_TRUE; /* Assume all is powered on instantly */
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		pd_mask_wanted |= mali_pm_domain_ref_get(domains[i]);
+		if (MALI_FALSE == mali_pm_domain_power_is_on(domains[i])) {
+			/*
+			 * Tell caller that the corresponding group
+			 * was not already powered on.
+			 */
+			ret = MALI_FALSE;
+		} else {
+			/*
+			 * There is a time gap between we power on the domain and
+			 * set the power state of the corresponding groups to be on.
+			 */
+			if (NULL != groups[i] &&
+			    MALI_FALSE == mali_group_power_is_on(groups[i])) {
+				ret = MALI_FALSE;
+			}
+		}
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (get refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains)
+{
+	u32 mask = 0;
+	mali_bool ret;
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		mask |= mali_pm_domain_ref_put(domains[i]);
+	}
+
+	if (0 == mask) {
+		/* return false, all domains should still stay on */
+		ret = MALI_FALSE;
+	} else {
+		/* Assert that we are dealing with a change */
+		MALI_DEBUG_ASSERT((pd_mask_wanted & mask) == mask);
+
+		/* Update our desired domain mask */
+		pd_mask_wanted &= ~mask;
+
+		/* return true; one or more domains can now be powered down */
+		ret = MALI_TRUE;
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (put refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+void mali_pm_init_begin(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	_mali_osk_pm_dev_ref_get_sync();
+
+	/* Ensure all PMU domains are on */
+	if (NULL != pmu) {
+		mali_pmu_power_up_all(pmu);
+	}
+}
+
+void mali_pm_init_end(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	/* Ensure all PMU domains are off */
+	if (NULL != pmu) {
+		mali_pmu_power_down_all(pmu);
+	}
+
+	_mali_osk_pm_dev_ref_put();
+}
+
+void mali_pm_update_sync(void)
+{
+	mali_pm_exec_lock();
+
+	if (MALI_TRUE == mali_pm_runtime_active) {
+		/*
+		 * Only update if GPU is powered on.
+		 * Deactivation of the last group will result in both a
+		 * deferred runtime PM suspend operation and
+		 * deferred execution of this function.
+		 * mali_pm_runtime_active will be false if runtime PM
+		 * executed first and thus the GPU is now fully powered off.
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_update_async(void)
+{
+	_mali_osk_wq_schedule_work(pm_work);
+}
+
+void mali_pm_os_suspend(mali_bool os_suspend)
+{
+	int ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS suspend\n"));
+
+	/* Suspend execution of all jobs, and go to inactive state */
+	mali_executor_suspend();
+
+	if (os_suspend) {
+		mali_control_timer_suspend(MALI_TRUE);
+	}
+
+	mali_pm_exec_lock();
+
+	ret = mali_pm_common_suspend();
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == ret);
+	MALI_IGNORE(ret);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_os_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS resume\n"));
+
+	mali_pm_exec_lock();
+
+#if defined(DEBUG)
+	mali_pm_state_lock();
+
+	/* Assert that things are as we left them in os_suspend(). */
+	MALI_DEBUG_ASSERT(0 == pd_mask_wanted);
+	MALI_DEBUG_ASSERT(0 == pd_mask_current);
+	MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+	mali_pm_state_unlock();
+#endif
+
+	if (MALI_TRUE == mali_pm_runtime_active) {
+		/* Runtime PM was active, so reset PMU */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+			pmu_mask_current = mali_pmu_get_mask(pmu);
+
+			MALI_DEBUG_PRINT(3, ("Mali PM: OS resume 0x%x \n", pmu_mask_current));
+		}
+
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	/* Start executing jobs again */
+	mali_executor_resume();
+}
+
+mali_bool mali_pm_runtime_suspend(void)
+{
+	mali_bool ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: Runtime suspend\n"));
+
+	mali_pm_exec_lock();
+
+	/*
+	 * Put SW state directly into "off" state, and do not bother to power
+	 * down each power domain, because entire GPU will be powered off
+	 * when we return.
+	 * For runtime PM suspend, in contrast to OS suspend, there is a race
+	 * between this function and the mali_pm_update_sync_internal(), which
+	 * is fine...
+	 */
+	ret = mali_pm_common_suspend();
+	if (MALI_TRUE == ret) {
+		mali_pm_runtime_active = MALI_FALSE;
+	} else {
+		/*
+		 * Process the "power up" instead,
+		 * which could have been "lost"
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	return ret;
+}
+
+void mali_pm_runtime_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	mali_pm_exec_lock();
+
+	mali_pm_runtime_active = MALI_TRUE;
+
+#if defined(DEBUG)
+	++num_pm_runtime_resume;
+
+	mali_pm_state_lock();
+
+	/*
+	 * Assert that things are as we left them in runtime_suspend(),
+	 * except for pd_mask_wanted which normally will be the reason we
+	 * got here (job queued => domains wanted)
+	 */
+	MALI_DEBUG_ASSERT(0 == pd_mask_current);
+	MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+	mali_pm_state_unlock();
+#endif
+
+	if (NULL != pmu) {
+		mali_pmu_reset(pmu);
+		pmu_mask_current = mali_pmu_get_mask(pmu);
+		MALI_DEBUG_PRINT(3, ("Mali PM: Runtime resume 0x%x \n", pmu_mask_current));
+	}
+
+	/*
+	 * Normally we are resumed because a job has just been queued.
+	 * pd_mask_wanted should thus be != 0.
+	 * It is however possible for others to take a Mali Runtime PM ref
+	 * without having a job queued.
+	 * We should however always call mali_pm_update_sync_internal(),
+	 * because this will take care of any potential mismatch between
+	 * pmu_mask_current and pd_mask_current.
+	 */
+	mali_pm_update_sync_internal();
+
+	mali_pm_exec_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tPower domain: id %u\n",
+				mali_pm_domain_get_id(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tMask: 0x%04x\n",
+				mali_pm_domain_get_mask(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tUse count: %u\n",
+				mali_pm_domain_get_use_count(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tCurrent power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_current) ?
+				"On" : "Off");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tWanted power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_wanted) ?
+				"On" : "Off");
+
+	return n;
+}
+#endif
+
+static void mali_pm_state_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(pm_lock_state);
+}
+
+static void mali_pm_state_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(pm_lock_state);
+}
+
+void mali_pm_exec_lock(void)
+{
+	_mali_osk_mutex_wait(pm_lock_exec);
+}
+
+void mali_pm_exec_unlock(void)
+{
+	_mali_osk_mutex_signal(pm_lock_exec);
+}
+
+static void mali_pm_domain_power_up(u32 power_up_mask,
+				    struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS],
+				    u32 *num_groups_up,
+				    struct mali_l2_cache_core *l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				    u32 *num_l2_up)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_up_mask;
+
+	MALI_DEBUG_ASSERT(0 != power_up_mask);
+	MALI_DEBUG_ASSERT_POINTER(groups_up);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_up);
+	MALI_DEBUG_ASSERT(0 == *num_groups_up);
+	MALI_DEBUG_ASSERT_POINTER(l2_up);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_up);
+	MALI_DEBUG_ASSERT(0 == *num_l2_up);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering up domains: . [%s]\n",
+			  mali_pm_mask_to_string(power_up_mask)));
+
+	pd_mask_current |= power_up_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (0 != domain_bit) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(
+				domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered up */
+		mali_pm_domain_set_power_on(domain, MALI_TRUE);
+
+		/*
+		 * Make a note of the L2 and/or group(s) to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(
+						    domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_up <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_up[*num_l2_up] = l2_cache;
+			(*num_l2_up)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_up <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_up[*num_groups_up] = group;
+
+			(*num_groups_up)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+static void mali_pm_domain_power_down(u32 power_down_mask,
+				      struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS],
+				      u32 *num_groups_down,
+				      struct mali_l2_cache_core *l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				      u32 *num_l2_down)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_down_mask;
+
+	MALI_DEBUG_ASSERT(0 != power_down_mask);
+	MALI_DEBUG_ASSERT_POINTER(groups_down);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_down);
+	MALI_DEBUG_ASSERT(0 == *num_groups_down);
+	MALI_DEBUG_ASSERT_POINTER(l2_down);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_down);
+	MALI_DEBUG_ASSERT(0 == *num_l2_down);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering down domains: [%s]\n",
+			  mali_pm_mask_to_string(power_down_mask)));
+
+	pd_mask_current &= ~power_down_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (0 != domain_bit) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered down */
+		mali_pm_domain_set_power_on(domain, MALI_FALSE);
+
+		/*
+		 * Make a note of the L2s and/or groups to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_down <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_down[*num_l2_down] = l2_cache;
+			(*num_l2_down)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_down <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_down[*num_groups_down] = group;
+			(*num_groups_down)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+
+/*
+ * Execute pending power domain changes
+ * pm_lock_exec lock must be taken by caller.
+ */
+static void mali_pm_update_sync_internal(void)
+{
+	/*
+	 * This should only be called in non-atomic context
+	 * (normally as deferred work)
+	 *
+	 * Look at the pending power domain changes, and execute these.
+	 * Make sure group and schedulers are notified about changes.
+	 */
+
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	u32 power_down_mask;
+	u32 power_up_mask;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+
+#if defined(DEBUG)
+	++num_pm_updates;
+#endif
+
+	/* Hold PM state lock while we look at (and obey) the wanted state */
+	mali_pm_state_lock();
+
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	/* Figure out which cores we need to power on */
+	power_up_mask = pd_mask_wanted &
+			(pd_mask_wanted ^ pd_mask_current);
+
+	if (0 != power_up_mask) {
+		u32 power_up_mask_pmu;
+		struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_up = 0;
+		struct mali_l2_cache_core *
+			l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_up = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_up;
+#endif
+
+		/*
+		 * Make sure dummy/global domain is always included when
+		 * powering up, since this is controlled by runtime PM,
+		 * and device power is on at this stage.
+		 */
+		power_up_mask |= MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* Power up only real PMU domains */
+		power_up_mask_pmu = power_up_mask & ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* But not those that happen to be powered on already */
+		power_up_mask_pmu &= (power_up_mask ^ pmu_mask_current) &
+				     power_up_mask;
+
+		if (0 != power_up_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current |= power_up_mask_pmu;
+			mali_pmu_power_up(pmu, power_up_mask_pmu);
+		}
+
+		/*
+		 * Put the domains themselves in power up state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_up(power_up_mask,
+					groups_up, &num_groups_up,
+					l2_up, &num_l2_up);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/* Notify each L2 cache that we have be powered up */
+		for (i = 0; i < num_l2_up; i++) {
+			mali_l2_cache_power_up(l2_up[i]);
+		}
+
+		/*
+		 * Tell execution module about all the groups we have
+		 * powered up. Groups will be notified as a result of this.
+		 */
+		mali_executor_group_power_up(groups_up, num_groups_up);
+
+		/* Lock state again before checking for power down */
+		mali_pm_state_lock();
+	}
+
+	/* Figure out which cores we need to power off */
+	power_down_mask = pd_mask_current &
+			  (pd_mask_wanted ^ pd_mask_current);
+
+	/*
+	 * Never power down the dummy/global domain here. This is to be done
+	 * from a suspend request (since this domain is only physicall powered
+	 * down at that point)
+	 */
+	power_down_mask &= ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+	if (0 != power_down_mask) {
+		u32 power_down_mask_pmu;
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_down;
+#endif
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(power_down_mask,
+					  groups_down, &num_groups_down,
+					  l2_down, &num_l2_down);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (0 < num_groups_down) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (0 != power_down_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+
+		}
+	} else {
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		u32 power_down_mask_pmu;
+
+		/* No need for state lock since we'll only update PMU */
+		mali_pm_state_unlock();
+
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (0 != power_down_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+		}
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM update post: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+}
+
+static mali_bool mali_pm_common_suspend(void)
+{
+	mali_pm_state_lock();
+
+	if (0 != pd_mask_wanted) {
+		MALI_DEBUG_PRINT(5, ("PM: Aborting suspend operation\n\n\n"));
+		mali_pm_state_unlock();
+		return MALI_FALSE;
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	if (0 != pd_mask_current) {
+		/*
+		 * We have still some domains powered on.
+		 * It is for instance very normal that at least the
+		 * dummy/global domain is marked as powered on at this point.
+		 * (because it is physically powered on until this function
+		 * returns)
+		 */
+
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(pd_mask_current,
+					  groups_down,
+					  &num_groups_down,
+					  l2_down,
+					  &num_l2_down);
+
+		MALI_DEBUG_ASSERT(0 == pd_mask_current);
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (0 < num_groups_down) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		pmu_mask_current = 0;
+	} else {
+		MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+		mali_pm_state_unlock();
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current domain mask:  [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current PMU mask: ... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Group power stats: .. <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	return MALI_TRUE;
+}
+
+static void mali_pm_update_work(void *data)
+{
+	MALI_IGNORE(data);
+	mali_pm_update_sync();
+}
+
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void)
+{
+	int i;
+
+	/* Create all domains (including dummy domain) */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0x0 == domain_config[i]) continue;
+
+		if (NULL == mali_pm_domain_create(domain_config[i])) {
+			return _MALI_OSK_ERR_NOMEM;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_pm_set_default_pm_domain_config(void)
+{
+	MALI_DEBUG_ASSERT(0 != _mali_osk_resource_base_address());
+
+	/* GP core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_GP, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_GP] = 0x01;
+	}
+
+	/* PP0 - PP3 core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP0, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 2;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 1;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 0;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP1, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 3;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP2, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 4;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP3, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 5;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 1;
+		}
+	}
+
+	/* PP4 - PP7 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP4, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP4] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP5, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP5] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP6, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP6] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP7, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP7] = 0x01 << 3;
+	}
+
+	/* L2gp/L2PP0/L2PP4 */
+	if (mali_is_mali400()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI400_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 1;
+		}
+	} else if (mali_is_mali450()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 0;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 1;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE2, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L22] = 0x01 << 3;
+		}
+	} else if (mali_is_mali470()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI470_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 0;
+		}
+	}
+}
+
+static u32 mali_pm_get_registered_cores_mask(void)
+{
+	int i = 0;
+	u32 mask = 0;
+
+	for (i = 0; i < MALI_DOMAIN_INDEX_DUMMY; i++) {
+		mask |= domain_config[i];
+	}
+
+	return mask;
+}
+
+static void mali_pm_set_pmu_domain_config(void)
+{
+	int i = 0;
+
+	_mali_osk_device_data_pmu_config_get(domain_config, MALI_MAX_NUMBER_OF_DOMAINS - 1);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (0 != domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("Using customer pmu config:\n"));
+			break;
+		}
+	}
+
+	if (MALI_MAX_NUMBER_OF_DOMAINS - 1 == i) {
+		MALI_DEBUG_PRINT(2, ("Using hw detect pmu config:\n"));
+		mali_pm_set_default_pm_domain_config();
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("domain_config[%d] = 0x%x \n", i, domain_config[i]));
+		}
+	}
+	/* Can't override dummy domain mask */
+	domain_config[MALI_DOMAIN_INDEX_DUMMY] =
+		1 << MALI_DOMAIN_INDEX_DUMMY;
+}
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_DOMAINS + 1];
+	int bit;
+	int str_pos = 0;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (NULL != pm_lock_exec) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (bit = MALI_MAX_NUMBER_OF_DOMAINS - 1; bit >= 0; bit--) {
+		if (mask & (1 << bit)) {
+			bit_str[str_pos] = 'X';
+		} else {
+			bit_str[str_pos] = '-';
+		}
+		str_pos++;
+	}
+
+	bit_str[MALI_MAX_NUMBER_OF_DOMAINS] = '\0';
+
+	return bit_str;
+}
+
+const char *mali_pm_group_stats_to_string(void)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_GROUPS + 1];
+	u32 num_groups = mali_group_get_glob_num_groups();
+	u32 i;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (NULL != pm_lock_exec) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (i = 0; i < num_groups && i < MALI_MAX_NUMBER_OF_GROUPS; i++) {
+		struct mali_group *group;
+
+		group = mali_group_get_glob_group(i);
+
+		if (MALI_TRUE == mali_group_power_is_on(group)) {
+			bit_str[i] = 'X';
+		} else {
+			bit_str[i] = '-';
+		}
+	}
+
+	bit_str[i] = '\0';
+
+	return bit_str;
+}
+#endif
+
+/*
+ * num_pp is the number of PP cores which will be powered on given this mask
+ * cost is the total power cost of cores which will be powered on given this mask
+ */
+static void mali_pm_stat_from_mask(u32 mask, u32 *num_pp, u32 *cost)
+{
+	u32 i;
+
+	/* loop through all cores */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (!(domain_config[i] & mask)) {
+			continue;
+		}
+
+		switch (i) {
+		case MALI_DOMAIN_INDEX_GP:
+			*cost += MALI_GP_COST;
+
+			break;
+		case MALI_DOMAIN_INDEX_PP0: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP1: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP2: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP3:
+			if (mali_is_mali400()) {
+				if ((domain_config[MALI_DOMAIN_INDEX_L20] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L20])) {
+					*num_pp += 1;
+				}
+			} else {
+				if ((domain_config[MALI_DOMAIN_INDEX_L21] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L21])) {
+					*num_pp += 1;
+				}
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_PP4: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP5: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP6: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP7:
+			MALI_DEBUG_ASSERT(mali_is_mali450());
+
+			if ((domain_config[MALI_DOMAIN_INDEX_L22] & mask)
+			    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+				== domain_config[MALI_DOMAIN_INDEX_L22])) {
+				*num_pp += 1;
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_L20: /* Fall through */
+		case MALI_DOMAIN_INDEX_L21: /* Fall through */
+		case MALI_DOMAIN_INDEX_L22:
+			*cost += MALI_L2_COST;
+
+			break;
+		}
+	}
+}
+
+void mali_pm_power_cost_setup(void)
+{
+	/*
+	 * Two parallel arrays which store the best domain mask and its cost
+	 * The index is the number of PP cores, E.g. Index 0 is for 1 PP option,
+	 * might have mask 0x2 and with cost of 1, lower cost is better
+	 */
+	u32 best_mask[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	u32 best_cost[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	/* Array cores_in_domain is used to store the total pp cores in each pm domain. */
+	u32 cores_in_domain[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	/* Domain_count is used to represent the max domain we have.*/
+	u32 max_domain_mask = 0;
+	u32 max_domain_id = 0;
+	u32 always_on_pp_cores = 0;
+
+	u32 num_pp, cost, mask;
+	u32 i, j , k;
+
+	/* Initialize statistics */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) {
+		best_mask[i] = 0;
+		best_cost[i] = 0xFFFFFFFF; /* lower cost is better */
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1; i++) {
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			mali_pm_domain_power_cost_result[i][j] = 0;
+		}
+	}
+
+	/* Caculate number of pp cores of a given domain config. */
+	for (i = MALI_DOMAIN_INDEX_PP0; i <= MALI_DOMAIN_INDEX_PP7; i++) {
+		if (0 < domain_config[i]) {
+			/* Get the max domain mask value used to caculate power cost
+			 * and we don't count in always on pp cores. */
+			if (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i]
+			    && max_domain_mask < domain_config[i]) {
+				max_domain_mask = domain_config[i];
+			}
+
+			if (MALI_PM_DOMAIN_DUMMY_MASK == domain_config[i]) {
+				always_on_pp_cores++;
+			}
+		}
+	}
+	max_domain_id = _mali_osk_fls(max_domain_mask);
+
+	/*
+	 * Try all combinations of power domains and check how many PP cores
+	 * they have and their power cost.
+	 */
+	for (mask = 0; mask < (1 << max_domain_id); mask++) {
+		num_pp = 0;
+		cost = 0;
+
+		mali_pm_stat_from_mask(mask, &num_pp, &cost);
+
+		/* This mask is usable for all MP1 up to num_pp PP cores, check statistics for all */
+		for (i = 0; i < num_pp; i++) {
+			if (best_cost[i] >= cost) {
+				best_cost[i] = cost;
+				best_mask[i] = mask;
+			}
+		}
+	}
+
+	/*
+	 * If we want to enable x pp cores, if x is less than number of always_on pp cores,
+	 * all of pp cores we will enable must be always_on pp cores.
+	 */
+	for (i = 0; i < mali_executor_get_num_cores_total(); i++) {
+		if (i < always_on_pp_cores) {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= i + 1;
+		} else {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= always_on_pp_cores;
+		}
+	}
+
+	/* In this loop, variable i represent for the number of non-always on pp cores we want to enabled. */
+	for (i = 0; i < (mali_executor_get_num_cores_total() - always_on_pp_cores); i++) {
+		if (best_mask[i] == 0) {
+			/* This MP variant is not available */
+			continue;
+		}
+
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			cores_in_domain[j] = 0;
+		}
+
+		for (j = MALI_DOMAIN_INDEX_PP0; j <= MALI_DOMAIN_INDEX_PP7; j++) {
+			if (0 < domain_config[j]
+			    && (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i])) {
+				cores_in_domain[_mali_osk_fls(domain_config[j]) - 1]++;
+			}
+		}
+
+		/* In this loop, j represent for the number we have already enabled.*/
+		for (j = 0; j <= i;) {
+			/* j used to visit all of domain to get the number of pp cores remained in it. */
+			for (k = 0; k < max_domain_id; k++) {
+				/* If domain k in best_mask[i] is enabled and this domain has extra pp cores,
+				 * we know we must pick at least one pp core from this domain.
+				 * And then we move to next enabled pm domain. */
+				if ((best_mask[i] & (0x1 << k)) && (0 < cores_in_domain[k])) {
+					cores_in_domain[k]--;
+					mali_pm_domain_power_cost_result[always_on_pp_cores + i + 1][k]++;
+					j++;
+					if (j > i) {
+						break;
+					}
+				}
+			}
+		}
+	}
+}
+
+/*
+ * When we are doing core scaling,
+ * this function is called to return the best mask to
+ * achieve the best pp group power cost.
+ */
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst)
+{
+	MALI_DEBUG_ASSERT((mali_executor_get_num_cores_total() >= num_requested) && (0 <= num_requested));
+
+	_mali_osk_memcpy(dst, mali_pm_domain_power_cost_result[num_requested], MALI_MAX_NUMBER_OF_DOMAINS * sizeof(int));
+}
+
+u32 mali_pm_get_current_mask(void)
+{
+	return pd_mask_current;
+}
+
+u32 mali_pm_get_wanted_mask(void)
+{
+	return pd_mask_wanted;
+}
diff --git a/utgard/r6p1/common/mali_pm.h b/utgard/r6p1/common/mali_pm.h
new file mode 100755
index 0000000..3c11977
--- /dev/null
+++ b/utgard/r6p1/common/mali_pm.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_H__
+#define __MALI_PM_H__
+
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+
+#define MALI_DOMAIN_INDEX_GP        0
+#define MALI_DOMAIN_INDEX_PP0       1
+#define MALI_DOMAIN_INDEX_PP1       2
+#define MALI_DOMAIN_INDEX_PP2       3
+#define MALI_DOMAIN_INDEX_PP3       4
+#define MALI_DOMAIN_INDEX_PP4       5
+#define MALI_DOMAIN_INDEX_PP5       6
+#define MALI_DOMAIN_INDEX_PP6       7
+#define MALI_DOMAIN_INDEX_PP7       8
+#define MALI_DOMAIN_INDEX_L20       9
+#define MALI_DOMAIN_INDEX_L21      10
+#define MALI_DOMAIN_INDEX_L22      11
+/*
+ * The dummy domain is used when there is no physical power domain
+ * (e.g. no PMU or always on cores)
+ */
+#define MALI_DOMAIN_INDEX_DUMMY    12
+#define MALI_MAX_NUMBER_OF_DOMAINS 13
+
+/**
+ * Initialize the Mali PM module
+ *
+ * PM module covers Mali PM core, PM domains and Mali PMU
+ */
+_mali_osk_errcode_t mali_pm_initialize(void);
+
+/**
+ * Terminate the Mali PM module
+ */
+void mali_pm_terminate(void);
+
+void mali_pm_exec_lock(void);
+void mali_pm_exec_unlock(void);
+
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache);
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group);
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains);
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains);
+
+void mali_pm_init_begin(void);
+void mali_pm_init_end(void);
+
+void mali_pm_update_sync(void);
+void mali_pm_update_async(void);
+
+/* Callback functions for system power management */
+void mali_pm_os_suspend(mali_bool os_suspend);
+void mali_pm_os_resume(void);
+
+mali_bool mali_pm_runtime_suspend(void);
+void mali_pm_runtime_resume(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size);
+#endif
+
+void mali_pm_power_cost_setup(void);
+
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst);
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+#endif
+
+u32 mali_pm_get_current_mask(void);
+u32 mali_pm_get_wanted_mask(void);
+#endif /* __MALI_PM_H__ */
diff --git a/utgard/r6p1/common/mali_pm_domain.c b/utgard/r6p1/common/mali_pm_domain.c
new file mode 100755
index 0000000..9db8488
--- /dev/null
+++ b/utgard/r6p1/common/mali_pm_domain.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+
+static struct mali_pm_domain *mali_pm_domains[MALI_MAX_NUMBER_OF_DOMAINS] =
+{ NULL, };
+
+void mali_pm_domain_initialize(void)
+{
+	/* Domains will be initialized/created on demand */
+}
+
+void mali_pm_domain_terminate(void)
+{
+	int i;
+
+	/* Delete all domains that has been created */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		mali_pm_domain_delete(mali_pm_domains[i]);
+		mali_pm_domains[i] = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask)
+{
+	struct mali_pm_domain *domain = NULL;
+	u32 domain_id = 0;
+
+	domain = mali_pm_domain_get_from_mask(pmu_mask);
+	if (NULL != domain) return domain;
+
+	MALI_DEBUG_PRINT(2,
+			 ("Mali PM domain: Creating Mali PM domain (mask=0x%08X)\n",
+			  pmu_mask));
+
+	domain = (struct mali_pm_domain *)_mali_osk_malloc(
+			 sizeof(struct mali_pm_domain));
+	if (NULL != domain) {
+		domain->power_is_on = MALI_FALSE;
+		domain->pmu_mask = pmu_mask;
+		domain->use_count = 0;
+		_mali_osk_list_init(&domain->group_list);
+		_mali_osk_list_init(&domain->l2_cache_list);
+
+		domain_id = _mali_osk_fls(pmu_mask) - 1;
+		/* Verify the domain_id */
+		MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > domain_id);
+		/* Verify that pmu_mask only one bit is set */
+		MALI_DEBUG_ASSERT((1 << domain_id) == pmu_mask);
+		mali_pm_domains[domain_id] = domain;
+
+		return domain;
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Unable to create PM domain\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pm_domain_delete(struct mali_pm_domain *domain)
+{
+	if (NULL == domain) {
+		return;
+	}
+
+	_mali_osk_list_delinit(&domain->group_list);
+	_mali_osk_list_delinit(&domain->l2_cache_list);
+
+	_mali_osk_free(domain);
+}
+
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	/*
+	 * Use addtail because virtual group is created last and it needs
+	 * to be at the end of the list (in order to be activated after
+	 * all children.
+	 */
+	_mali_osk_list_addtail(&group->pm_domain_list, &domain->group_list);
+}
+
+void mali_pm_domain_add_l2_cache(struct mali_pm_domain *domain,
+				 struct mali_l2_cache_core *l2_cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(l2_cache);
+	_mali_osk_list_add(&l2_cache->pm_domain_list, &domain->l2_cache_list);
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask)
+{
+	u32 id = 0;
+
+	if (0 == mask) {
+		return NULL;
+	}
+
+	id = _mali_osk_fls(mask) - 1;
+
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == mask);
+
+	return mali_pm_domains[id];
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id)
+{
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+
+	return mali_pm_domains[id];
+}
+
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	if (0 == domain->use_count) {
+		_mali_osk_pm_dev_ref_get_async();
+	}
+
+	++domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_get, use_count => %u\n", domain, domain->use_count));
+
+	/* Return our mask so caller can check this against wanted mask */
+	return domain->pmu_mask;
+}
+
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	--domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_put, use_count => %u\n", domain, domain->use_count));
+
+	if (0 == domain->use_count) {
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	/*
+	 * Return the PMU mask which now could be be powered down
+	 * (the bit for this domain).
+	 * This is the responsibility of the caller (mali_pm)
+	 */
+	return (0 == domain->use_count ? domain->pmu_mask : 0);
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain)
+{
+	u32 id = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT(0 != domain->pmu_mask);
+
+	id = _mali_osk_fls(domain->pmu_mask) - 1;
+
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == domain->pmu_mask);
+	/* Verify that we have stored the domain at right id/index */
+	MALI_DEBUG_ASSERT(domain == mali_pm_domains[id]);
+
+	return id;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void)
+{
+	int i;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (NULL == mali_pm_domains[i]) {
+			/* Nothing to check */
+			continue;
+		}
+
+		if (MALI_TRUE == mali_pm_domains[i]->power_is_on) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+
+		if (0 != mali_pm_domains[i]->use_count) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+	}
+
+	return MALI_TRUE;
+}
+#endif
diff --git a/utgard/r6p1/common/mali_pm_domain.h b/utgard/r6p1/common/mali_pm_domain.h
new file mode 100755
index 0000000..2ac8c0d
--- /dev/null
+++ b/utgard/r6p1/common/mali_pm_domain.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_DOMAIN_H__
+#define __MALI_PM_DOMAIN_H__
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#include "mali_l2_cache.h"
+#include "mali_group.h"
+#include "mali_pmu.h"
+
+/* Instances are protected by PM state lock */
+struct mali_pm_domain {
+	mali_bool power_is_on;
+	s32 use_count;
+	u32 pmu_mask;
+
+	/* Zero or more groups can belong to this domain */
+	_mali_osk_list_t group_list;
+
+	/* Zero or more L2 caches can belong to this domain */
+	_mali_osk_list_t l2_cache_list;
+};
+
+
+void mali_pm_domain_initialize(void);
+void mali_pm_domain_terminate(void);
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask);
+void mali_pm_domain_delete(struct mali_pm_domain *domain);
+
+void mali_pm_domain_add_l2_cache(
+	struct mali_pm_domain *domain,
+	struct mali_l2_cache_core *l2_cache);
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group);
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask);
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id);
+
+/* Ref counting */
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain);
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_group_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->group_list;
+}
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_l2_cache_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->l2_cache_list;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pm_domain_power_is_on(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->power_is_on;
+}
+
+MALI_STATIC_INLINE void mali_pm_domain_set_power_on(
+	struct mali_pm_domain *domain,
+	mali_bool power_is_on)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	domain->power_is_on = power_is_on;
+}
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_use_count(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->use_count;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_mask(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->pmu_mask;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void);
+#endif
+
+#endif /* __MALI_PM_DOMAIN_H__ */
diff --git a/utgard/r6p1/common/mali_pm_metrics.c b/utgard/r6p1/common/mali_pm_metrics.c
new file mode 100644
index 0000000..981ec81
--- /dev/null
+++ b/utgard/r6p1/common/mali_pm_metrics.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_pm_metrics.h"
+#include "mali_osk_locks.h"
+#include "mali_osk_mali.h"
+#include <linux/ktime.h>
+
+#define MALI_PM_TIME_SHIFT 0
+#define MALI_UTILIZATION_MAX_PERIOD 80000000/* ns = 100ms */
+
+_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev)
+{
+	int i = 0;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	mdev->mali_metrics.time_period_start = ktime_get();
+	mdev->mali_metrics.time_period_start_gp = mdev->mali_metrics.time_period_start;
+	mdev->mali_metrics.time_period_start_pp = mdev->mali_metrics.time_period_start;
+
+	mdev->mali_metrics.time_busy = 0;
+	mdev->mali_metrics.time_idle = 0;
+	mdev->mali_metrics.prev_busy = 0;
+	mdev->mali_metrics.prev_idle = 0;
+	mdev->mali_metrics.num_running_gp_cores = 0;
+	mdev->mali_metrics.num_running_pp_cores = 0;
+	mdev->mali_metrics.time_busy_gp = 0;
+	mdev->mali_metrics.time_idle_gp = 0;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) {
+		mdev->mali_metrics.time_busy_pp[i] = 0;
+		mdev->mali_metrics.time_idle_pp[i] = 0;
+	}
+	mdev->mali_metrics.gpu_active = MALI_FALSE;
+
+	mdev->mali_metrics.lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST);
+	if (NULL == mdev->mali_metrics.lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pm_metrics_term(struct mali_device *mdev)
+{
+	_mali_osk_spinlock_irq_term(mdev->mali_metrics.lock);
+}
+
+/*caller needs to hold mdev->mali_metrics.lock before calling this function*/
+void mali_pm_record_job_status(struct mali_device *mdev)
+{
+	ktime_t now;
+	ktime_t diff;
+	u64 ns_time;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	now = ktime_get();
+	diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+
+	ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	mdev->mali_metrics.time_busy += ns_time;
+	mdev->mali_metrics.time_period_start = now;
+}
+
+void mali_pm_record_gpu_idle(mali_bool is_gp)
+{
+	ktime_t now;
+	ktime_t diff;
+	u64 ns_time;
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	now = ktime_get();
+
+	if (MALI_TRUE == is_gp) {
+		--mdev->mali_metrics.num_running_gp_cores;
+		if (0 == mdev->mali_metrics.num_running_gp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp);
+			ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_busy_gp += ns_time;
+			mdev->mali_metrics.time_period_start_gp = now;
+
+			if (0 == mdev->mali_metrics.num_running_pp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_busy += ns_time;
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_FALSE;
+			}
+		}
+	} else {
+		--mdev->mali_metrics.num_running_pp_cores;
+		if (0 == mdev->mali_metrics.num_running_pp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp);
+			ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_busy_pp[0] += ns_time;
+			mdev->mali_metrics.time_period_start_pp = now;
+
+			if (0 == mdev->mali_metrics.num_running_gp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_busy += ns_time;
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_FALSE;
+			}
+		}
+	}
+
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_record_gpu_active(mali_bool is_gp)
+{
+	ktime_t now;
+	ktime_t diff;
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	now = ktime_get();
+
+	if (MALI_TRUE == is_gp) {
+		mdev->mali_metrics.num_running_gp_cores++;
+		if (1 == mdev->mali_metrics.num_running_gp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp);
+			mdev->mali_metrics.time_idle_gp += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_period_start_gp = now;
+			if (0 == mdev->mali_metrics.num_running_pp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+		}
+	} else {
+		mdev->mali_metrics.num_running_pp_cores++;
+		if (1 == mdev->mali_metrics.num_running_pp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp);
+			mdev->mali_metrics.time_idle_pp[0] += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_period_start_pp = now;
+			if (0 == mdev->mali_metrics.num_running_gp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+		}
+	}
+
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+
+/*caller needs to hold mdev->mali_metrics.lock before calling this function*/
+static void mali_pm_get_dvfs_utilisation_calc(struct mali_device *mdev, ktime_t now)
+{
+	ktime_t diff;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+
+	if (mdev->mali_metrics.gpu_active) {
+		mdev->mali_metrics.time_busy += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	} else {
+		mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	}
+}
+
+/* Caller needs to hold mdev->mali_metrics.lock before calling this function. */
+static void mali_pm_reset_dvfs_utilisation_unlocked(struct mali_device *mdev, ktime_t now)
+{
+	/* Store previous value */
+	mdev->mali_metrics.prev_idle = mdev->mali_metrics.time_idle;
+	mdev->mali_metrics.prev_busy = mdev->mali_metrics.time_busy;
+
+	/* Reset current values */
+	mdev->mali_metrics.time_period_start = now;
+	mdev->mali_metrics.time_period_start_gp = now;
+	mdev->mali_metrics.time_period_start_pp = now;
+	mdev->mali_metrics.time_idle = 0;
+	mdev->mali_metrics.time_busy = 0;
+
+	mdev->mali_metrics.time_busy_gp = 0;
+	mdev->mali_metrics.time_idle_gp = 0;
+	mdev->mali_metrics.time_busy_pp[0] = 0;
+	mdev->mali_metrics.time_idle_pp[0] = 0;
+}
+
+void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev)
+{
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	mali_pm_reset_dvfs_utilisation_unlocked(mdev, ktime_get());
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_get_dvfs_utilisation(struct mali_device *mdev,
+				  unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	u64 busy = 0;
+	u64 total = 0;
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+
+	mali_pm_get_dvfs_utilisation_calc(mdev, now);
+
+	busy = mdev->mali_metrics.time_busy;
+	total = busy + mdev->mali_metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		mali_pm_reset_dvfs_utilisation_unlocked(mdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += mdev->mali_metrics.prev_idle +
+			 mdev->mali_metrics.prev_busy;
+		busy += mdev->mali_metrics.prev_busy;
+	}
+
+	*total_out = (unsigned long)total;
+	*busy_out = (unsigned long)busy;
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_metrics_spin_lock(void)
+{
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_metrics_spin_unlock(void)
+{
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
diff --git a/utgard/r6p1/common/mali_pm_metrics.h b/utgard/r6p1/common/mali_pm_metrics.h
new file mode 100644
index 0000000..256f448
--- /dev/null
+++ b/utgard/r6p1/common/mali_pm_metrics.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_METRICS_H__
+#define __MALI_PM_METRICS_H__
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include "mali_osk_locks.h"
+#include "mali_group.h"
+
+struct mali_device;
+
+/**
+ * Metrics data collected for use by the power management framework.
+ */
+struct mali_pm_metrics_data {
+	ktime_t time_period_start;
+	u64 time_busy;
+	u64 time_idle;
+	u64 prev_busy;
+	u64 prev_idle;
+	u32 num_running_gp_cores;
+	u32 num_running_pp_cores;
+	ktime_t time_period_start_gp;
+	u64 time_busy_gp;
+	u64 time_idle_gp;
+	ktime_t time_period_start_pp;
+	u64 time_busy_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	u64 time_idle_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	mali_bool gpu_active;
+	_mali_osk_spinlock_irq_t *lock;
+};
+
+/**
+ * Initialize/start the Mali GPU pm_metrics metrics reporting.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev);
+
+/**
+ * Terminate the Mali GPU pm_metrics metrics reporting
+ */
+void mali_pm_metrics_term(struct mali_device *mdev);
+
+/**
+ * Should be called when a job is about to execute a GPU job
+ */
+void mali_pm_record_gpu_active(mali_bool is_gp);
+
+/**
+ * Should be called when a job is finished
+ */
+void mali_pm_record_gpu_idle(mali_bool is_gp);
+
+void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev);
+
+void mali_pm_get_dvfs_utilisation(struct mali_device *mdev, unsigned long *total_out, unsigned long *busy_out);
+
+void mali_pm_metrics_spin_lock(void);
+
+void mali_pm_metrics_spin_unlock(void);
+#else
+void mali_pm_record_gpu_idle(mali_bool is_gp) {}
+void mali_pm_record_gpu_active(mali_bool is_gp) {}
+#endif
+#endif /* __MALI_PM_METRICS_H__ */
diff --git a/utgard/r6p1/common/mali_pmu.c b/utgard/r6p1/common/mali_pmu.c
new file mode 100755
index 0000000..bf0d413
--- /dev/null
+++ b/utgard/r6p1/common/mali_pmu.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu.c
+ * Mali driver functions for Mali 400 PMU hardware
+ */
+#include "mali_hw_core.h"
+#include "mali_pmu.h"
+#include "mali_pp.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm.h"
+#include "mali_osk_mali.h"
+
+struct mali_pmu_core *mali_global_pmu_core = NULL;
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu);
+
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource)
+{
+	struct mali_pmu_core *pmu;
+
+	MALI_DEBUG_ASSERT(NULL == mali_global_pmu_core);
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Creating Mali PMU core\n"));
+
+	pmu = (struct mali_pmu_core *)_mali_osk_malloc(
+		      sizeof(struct mali_pmu_core));
+	if (NULL != pmu) {
+		pmu->registered_cores_mask = 0; /* to be set later */
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&pmu->hw_core,
+				resource, PMU_REGISTER_ADDRESS_SPACE_SIZE)) {
+
+			pmu->switch_delay = _mali_osk_get_pmu_switch_delay();
+
+			mali_global_pmu_core = pmu;
+
+			return pmu;
+		}
+		_mali_osk_free(pmu);
+	}
+
+	return NULL;
+}
+
+void mali_pmu_delete(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu == mali_global_pmu_core);
+
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Deleting Mali PMU core\n"));
+
+	mali_global_pmu_core = NULL;
+
+	mali_hw_core_delete(&pmu->hw_core);
+	_mali_osk_free(pmu);
+}
+
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask)
+{
+	pmu->registered_cores_mask = mask;
+}
+
+void mali_pmu_reset(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	/* Setup the desired defaults */
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_MASK, 0);
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_SW_DELAY, pmu->switch_delay);
+}
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	mali_pmu_reset(pmu);
+
+	/* Now simply power up the domains which are marked as powered down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_up(pmu, stat);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	/* Now simply power down the domains which are marked as powered up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_down(pmu, (~stat) & pmu->registered_cores_mask);
+
+	mali_pm_exec_unlock();
+}
+
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power down: ...................... [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+
+	/*
+	 * Assert that we are not powering down domains which are already
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+
+	mask  &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY);
+
+	if (0 == mask || 0 == ((~stat) & mask)) return _MALI_OSK_ERR_OK;
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_DOWN, mask);
+
+	/*
+	 * Do not wait for interrupt on Mali-300/400 if all domains are
+	 * powered off by our power down command, because the HW will simply
+	 * not generate an interrupt in this case.
+	 */
+	if (mali_is_mali450() || mali_is_mali470() || pmu->registered_cores_mask != (mask | stat)) {
+		err = mali_pmu_wait_for_command_finish(pmu);
+		if (_MALI_OSK_ERR_OK != err) {
+			return err;
+		}
+	} else {
+		mali_hw_core_register_write(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+	}
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+#endif
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+#if !defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	u32 current_domain;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power up: ........................ [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	stat &= pmu->registered_cores_mask;
+
+	mask  &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY);
+	if (0 == mask || 0 == (stat & mask)) return _MALI_OSK_ERR_OK;
+
+	/*
+	 * Assert that we are only powering up domains which are currently
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+
+#if defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_UP, mask);
+
+	err = mali_pmu_wait_for_command_finish(pmu);
+	if (_MALI_OSK_ERR_OK != err) {
+		return err;
+	}
+#else
+	for (current_domain = 1;
+	     current_domain <= pmu->registered_cores_mask;
+	     current_domain <<= 1) {
+		if (current_domain & mask & stat) {
+			mali_hw_core_register_write(&pmu->hw_core,
+						    PMU_REG_ADDR_MGMT_POWER_UP,
+						    current_domain);
+
+			err = mali_pmu_wait_for_command_finish(pmu);
+			if (_MALI_OSK_ERR_OK != err) {
+				return err;
+			}
+		}
+	}
+#endif
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+#endif /* defined(DEBUG) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu)
+{
+	u32 rawstat;
+	u32 timeout = MALI_REG_POLL_COUNT_SLOW;
+
+	MALI_DEBUG_ASSERT(pmu);
+
+	/* Wait for the command to complete */
+	do {
+		rawstat = mali_hw_core_register_read(&pmu->hw_core,
+						     PMU_REG_ADDR_MGMT_INT_RAWSTAT);
+		--timeout;
+	} while (0 == (rawstat & PMU_REG_VAL_IRQ) && 0 < timeout);
+
+	MALI_DEBUG_ASSERT(0 < timeout);
+
+	if (0 == timeout) {
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p1/common/mali_pmu.h b/utgard/r6p1/common/mali_pmu.h
new file mode 100755
index 0000000..36ff58e
--- /dev/null
+++ b/utgard/r6p1/common/mali_pmu.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_platform.h
+ * Platform specific Mali driver functions
+ */
+
+#ifndef __MALI_PMU_H__
+#define __MALI_PMU_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_hw_core.h"
+
+/** @brief MALI inbuilt PMU hardware info and PMU hardware has knowledge of cores power mask
+ */
+struct mali_pmu_core {
+	struct mali_hw_core hw_core;
+	u32 registered_cores_mask;
+	u32 switch_delay;
+};
+
+/** @brief Register layout for hardware PMU
+ */
+typedef enum {
+	PMU_REG_ADDR_MGMT_POWER_UP                  = 0x00,     /*< Power up register */
+	PMU_REG_ADDR_MGMT_POWER_DOWN                = 0x04,     /*< Power down register */
+	PMU_REG_ADDR_MGMT_STATUS                    = 0x08,     /*< Core sleep status register */
+	PMU_REG_ADDR_MGMT_INT_MASK                  = 0x0C,     /*< Interrupt mask register */
+	PMU_REG_ADDR_MGMT_INT_RAWSTAT               = 0x10,     /*< Interrupt raw status register */
+	PMU_REG_ADDR_MGMT_INT_CLEAR                 = 0x18,     /*< Interrupt clear register */
+	PMU_REG_ADDR_MGMT_SW_DELAY                  = 0x1C,     /*< Switch delay register */
+	PMU_REGISTER_ADDRESS_SPACE_SIZE             = 0x28,     /*< Size of register space */
+} pmu_reg_addr_mgmt_addr;
+
+#define PMU_REG_VAL_IRQ 1
+
+extern struct mali_pmu_core *mali_global_pmu_core;
+
+/** @brief Initialisation of MALI PMU
+ *
+ * This is called from entry point of the driver in order to create and intialize the PMU resource
+ *
+ * @param resource it will be a pointer to a PMU resource
+ * @param number_of_pp_cores Number of found PP resources in configuration
+ * @param number_of_l2_caches Number of found L2 cache resources in configuration
+ * @return The created PMU object, or NULL in case of failure.
+ */
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource);
+
+/** @brief It deallocates the PMU resource
+ *
+ * This is called on the exit of the driver to terminate the PMU resource
+ *
+ * @param pmu Pointer to PMU core object to delete
+ */
+void mali_pmu_delete(struct mali_pmu_core *pmu);
+
+/** @brief Set registered cores mask
+ *
+ * @param pmu Pointer to PMU core object
+ * @param mask All available/valid domain bits
+ */
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief Retrieves the Mali PMU core object (if any)
+ *
+ * @return The Mali PMU object, or NULL if no PMU exists.
+ */
+MALI_STATIC_INLINE struct mali_pmu_core *mali_pmu_get_global_pmu_core(void)
+{
+	return mali_global_pmu_core;
+}
+
+/** @brief Reset PMU core
+ *
+ * @param pmu Pointer to PMU core object to reset
+ */
+void mali_pmu_reset(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu);
+
+/** @brief Returns a mask of the currently powered up domains
+ *
+ * @param pmu Pointer to PMU core object
+ */
+MALI_STATIC_INLINE u32 mali_pmu_get_mask(struct mali_pmu_core *pmu)
+{
+	u32 stat = mali_hw_core_register_read(&pmu->hw_core, PMU_REG_ADDR_MGMT_STATUS);
+	return ((~stat) & pmu->registered_cores_mask);
+}
+
+/** @brief MALI GPU power down using MALI in-built PMU
+ *
+ * Called to power down the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power down
+ * @param mask Mask specifying which power domains to power down
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief MALI GPU power up using MALI in-built PMU
+ *
+ * Called to power up the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power up
+ * @param mask Mask specifying which power domains to power up
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask);
+
+#endif /* __MALI_PMU_H__ */
diff --git a/utgard/r6p1/common/mali_pp.c b/utgard/r6p1/common/mali_pp.c
new file mode 100755
index 0000000..1d9c111
--- /dev/null
+++ b/utgard/r6p1/common/mali_pp.c
@@ -0,0 +1,516 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp_job.h"
+#include "mali_pp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#include "mali_osk_mali.h"
+
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+#include <mali_platform.h>
+
+/* Number of frame registers on Mali-200 */
+#define MALI_PP_MALI200_NUM_FRAME_REGISTERS ((0x04C/4)+1)
+/* Number of frame registers on Mali-300 and later */
+#define MALI_PP_MALI400_NUM_FRAME_REGISTERS ((0x058/4)+1)
+
+static struct mali_pp_core *mali_global_pp_cores[MALI_MAX_NUMBER_OF_PP_CORES] = { NULL };
+static u32 mali_global_num_pp_cores = 0;
+
+/* Interrupt handlers */
+static void mali_pp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id)
+{
+	struct mali_pp_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali PP: Creating Mali PP core: %s\n", resource->description));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Base address of PP core: 0x%x\n", resource->base));
+
+	if (mali_global_num_pp_cores >= MALI_MAX_NUMBER_OF_PP_CORES) {
+		MALI_PRINT_ERROR(("Mali PP: Too many PP core objects created\n"));
+		return NULL;
+	}
+
+	core = _mali_osk_calloc(1, sizeof(struct mali_pp_core));
+	if (NULL != core) {
+		core->core_id = mali_global_num_pp_cores;
+		core->bcast_id = bcast_id;
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI200_REG_SIZEOF_REGISTER_BANK)) {
+			_mali_osk_errcode_t ret;
+
+			if (!is_virtual) {
+				ret = mali_pp_reset(core);
+			} else {
+				ret = _MALI_OSK_ERR_OK;
+			}
+
+			if (_MALI_OSK_ERR_OK == ret) {
+				ret = mali_group_add_pp_core(group, core);
+				if (_MALI_OSK_ERR_OK == ret) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					MALI_DEBUG_ASSERT(!is_virtual || -1 != resource->irq);
+
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_pp,
+								       group,
+								       mali_pp_irq_probe_trigger,
+								       mali_pp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (NULL != core->irq) {
+						mali_global_pp_cores[mali_global_num_pp_cores] = core;
+						mali_global_num_pp_cores++;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali PP: Failed to setup interrupt handlers for PP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_pp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali PP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali PP: Failed to allocate memory for PP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pp_delete(struct mali_pp_core *core)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+
+	/* Remove core from global list */
+	for (i = 0; i < mali_global_num_pp_cores; i++) {
+		if (mali_global_pp_cores[i] == core) {
+			mali_global_pp_cores[i] = NULL;
+			mali_global_num_pp_cores--;
+
+			if (i != mali_global_num_pp_cores) {
+				/* We removed a PP core from the middle of the array -- move the last
+				 * PP core to the current position to close the gap */
+				mali_global_pp_cores[i] = mali_global_pp_cores[mali_global_num_pp_cores];
+				mali_global_pp_cores[mali_global_num_pp_cores] = NULL;
+			}
+
+			break;
+		}
+	}
+
+	_mali_osk_free(core);
+}
+
+void mali_pp_stop_bus(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	/* Will only send the stop bus command, and not wait for it to complete */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_pp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS) & MALI200_REG_VAL_STATUS_BUS_STOPPED)
+			break;
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to stop bus on %s. Status: 0x%08x\n", core->hw_core.description, mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Frame register reset values.
+ * Taken from the Mali400 TRM, 3.6. Pixel processor control register summary */
+static const u32 mali_frame_registers_reset_values[_MALI_PP_MAX_FRAME_REGISTERS] = {
+	0x0, /* Renderer List Address Register */
+	0x0, /* Renderer State Word Base Address Register */
+	0x0, /* Renderer Vertex Base Register */
+	0x2, /* Feature Enable Register */
+	0x0, /* Z Clear Value Register */
+	0x0, /* Stencil Clear Value Register */
+	0x0, /* ABGR Clear Value 0 Register */
+	0x0, /* ABGR Clear Value 1 Register */
+	0x0, /* ABGR Clear Value 2 Register */
+	0x0, /* ABGR Clear Value 3 Register */
+	0x0, /* Bounding Box Left Right Register */
+	0x0, /* Bounding Box Bottom Register */
+	0x0, /* FS Stack Address Register */
+	0x0, /* FS Stack Size and Initial Value Register */
+	0x0, /* Reserved */
+	0x0, /* Reserved */
+	0x0, /* Origin Offset X Register */
+	0x0, /* Origin Offset Y Register */
+	0x75, /* Subpixel Specifier Register */
+	0x0, /* Tiebreak mode Register */
+	0x0, /* Polygon List Format Register */
+	0x0, /* Scaling Register */
+	0x0 /* Tilebuffer configuration Register */
+};
+
+/* WBx register reset values */
+static const u32 mali_wb_registers_reset_values[_MALI_PP_MAX_WB_REGISTERS] = {
+	0x0, /* WBx Source Select Register */
+	0x0, /* WBx Target Address Register */
+	0x0, /* WBx Target Pixel Format Register */
+	0x0, /* WBx Target AA Format Register */
+	0x0, /* WBx Target Layout */
+	0x0, /* WBx Target Scanline Length */
+	0x0, /* WBx Target Flags Register */
+	0x0, /* WBx MRT Enable Register */
+	0x0, /* WBx MRT Offset Register */
+	0x0, /* WBx Global Test Enable Register */
+	0x0, /* WBx Global Test Reference Value Register */
+	0x0  /* WBx Global Test Compare Function Register */
+};
+
+/* Performance Counter 0 Enable Register reset value */
+static const u32 mali_perf_cnt_enable_reset_value = 0;
+
+extern int pp_hardware_reset;
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core)
+{
+	/* Bus must be stopped before calling this function */
+	const u32 reset_wait_target_register = MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(2, ("Mali PP: Hard reset of core %s\n", core->hw_core.description));
+	pp_hardware_reset ++;
+
+	/* Set register to a bogus value. The register will be used to detect when reset is complete */
+	mali_hw_core_register_write_relaxed(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+
+	/* Force core to reset */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET);
+
+	/* Wait for reset to be complete */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali PP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, 0x00000000); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pp_reset_async(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET);
+}
+
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		u32 status =  mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+		if (!(status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE)) {
+			rawstat = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+			if (rawstat == MALI400PP_REG_VAL_IRQ_RESET_COMPLETED) {
+				break;
+			}
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core)
+{
+	mali_pp_reset_async(core);
+	return mali_pp_reset_wait(core);
+}
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual)
+{
+	u32 relative_address;
+	u32 start_index;
+	u32 nr_of_regs;
+	u32 *frame_registers = mali_pp_job_get_frame_registers(job);
+	u32 *wb0_registers = mali_pp_job_get_wb0_registers(job);
+	u32 *wb1_registers = mali_pp_job_get_wb1_registers(job);
+	u32 *wb2_registers = mali_pp_job_get_wb2_registers(job);
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, sub_job);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Change gpu secure mode if needed. */
+	if (MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		_mali_osk_gpu_secure_mode_enable();
+	} else if (MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		_mali_osk_gpu_secure_mode_disable();
+	}
+
+	/* Write frame registers */
+
+	/*
+	 * There are two frame registers which are different for each sub job:
+	 * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME)
+	 * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK)
+	 */
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]);
+
+	/* For virtual jobs, the stack address shouldn't be broadcast but written individually */
+	if (!mali_pp_job_is_virtual(job) || restart_virtual) {
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]);
+	}
+
+	/* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */
+	relative_address = MALI200_REG_ADDR_RSW;
+	start_index = MALI200_REG_ADDR_RSW / sizeof(u32);
+	nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* MALI200_REG_ADDR_STACK_SIZE */
+	relative_address = MALI200_REG_ADDR_STACK_SIZE;
+	start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32);
+
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core,
+			relative_address, frame_registers[start_index],
+			mali_frame_registers_reset_values[start_index]);
+
+	/* Skip 2 reserved registers */
+
+	/* Write remaining registers */
+	relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X;
+	start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+	nr_of_regs = MALI_PP_MALI400_NUM_FRAME_REGISTERS - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* Write WBx registers */
+	if (wb0_registers[0]) { /* M200_WB0_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb1_registers[0]) { /* M200_WB1_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb2_registers[0]) { /* M200_WB2_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+
+#ifdef CONFIG_MALI400_HEATMAPS_ENABLED
+	if (job->uargs.perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_CONTR, ((job->uargs.tilesx & 0x3FF) << 16) | 1);
+		mali_hw_core_register_write_relaxed(&core->hw_core,  MALI200_REG_ADDR_MGMT_PERFMON_BASE, job->uargs.heatmap_mem & 0xFFFFFFF8);
+	}
+#endif /* CONFIG_MALI400_HEATMAPS_ENABLED */
+
+	MALI_DEBUG_PRINT(3, ("Mali PP: Starting job 0x%08X part %u/%u on PP core %s\n", job, sub_job + 1, mali_pp_job_get_sub_job_count(job), core->hw_core.description));
+
+	/* Adding barrier to make sure all rester writes are finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_PP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING);
+#else
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_END_OF_FRAME);
+#endif
+
+	/* Adding barrier to make sure previous rester writes is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index)
+{
+	if (mali_global_num_pp_cores > index) {
+		return mali_global_pp_cores[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_pp_get_glob_num_pp_cores(void)
+{
+	return mali_global_num_pp_cores;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_pp_irq_probe_trigger(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_BUS_ERROR);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS);
+	if (MALI200_REG_VAL_IRQ_BUS_ERROR & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_BUS_ERROR);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+#if 0
+static void mali_pp_print_registers(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_VERSION = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_RAWSTAT = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_MASK = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE)));
+}
+#endif
+
+#if 0
+void mali_pp_print_state(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: State: 0x%08x\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+}
+#endif
+
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, subjob);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, subjob);
+#if defined(CONFIG_MALI400_PROFILING)
+	int counter_index = COUNTER_FP_0_C0 + (2 * child->core_id);
+#endif
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		val0 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_pp_job_set_perf_counter_value0(job, subjob, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index, val0);
+		_mali_osk_profiling_record_global_counters(counter_index, val0);
+#endif
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		val1 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_pp_job_set_perf_counter_value1(job, subjob, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index + 1, val1);
+		_mali_osk_profiling_record_global_counters(counter_index + 1, val1);
+#endif
+	}
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tPP #%d: %s\n", core->core_id, core->hw_core.description);
+
+	return n;
+}
+#endif
diff --git a/utgard/r6p1/common/mali_pp.h b/utgard/r6p1/common/mali_pp.h
new file mode 100755
index 0000000..76f5bd0
--- /dev/null
+++ b/utgard/r6p1/common/mali_pp.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_H__
+#define __MALI_PP_H__
+
+#include "mali_osk.h"
+#include "mali_pp_job.h"
+#include "mali_hw_core.h"
+
+struct mali_group;
+
+#define MALI_MAX_NUMBER_OF_PP_CORES        9
+
+/**
+ * Definition of the PP core struct
+ * Used to track a PP core in the system.
+ */
+struct mali_pp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+	u32                  core_id;           /**< Unique core ID */
+	u32                  bcast_id;          /**< The "flag" value used by the Mali-450 broadcast and DLBU unit */
+};
+
+_mali_osk_errcode_t mali_pp_initialize(void);
+void mali_pp_terminate(void);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id);
+void mali_pp_delete(struct mali_pp_core *core);
+
+void mali_pp_stop_bus(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core);
+void mali_pp_reset_async(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core);
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual);
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core);
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->core_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_bcast_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->bcast_id;
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index);
+u32 mali_pp_get_glob_num_pp_cores(void);
+
+/* Debug */
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size);
+
+/**
+ * Put instrumented HW counters from the core(s) to the job object (if enabled)
+ *
+ * parent and child is always the same, except for virtual jobs on Mali-450.
+ * In this case, the counters will be enabled on the virtual core (parent),
+ * but values need to be read from the child cores.
+ *
+ * @param parent The core used to see if the counters was enabled
+ * @param child The core to actually read the values from
+ * @job Job object to update with counter values (if enabled)
+ * @subjob Which subjob the counters are applicable for (core ID for virtual jobs)
+ */
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob);
+
+MALI_STATIC_INLINE const char *mali_pp_core_description(struct mali_pp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_pp_get_interrupt_result(struct mali_pp_core *core)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT) &
+			   MALI200_REG_VAL_IRQ_MASK_USED;
+	if (0 == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if (MALI200_REG_VAL_IRQ_END_OF_FRAME == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_get_rawstat(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+
+MALI_STATIC_INLINE u32 mali_pp_is_active(struct mali_pp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+	return (status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_mask_all_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_pp_enable_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+}
+
+MALI_STATIC_INLINE void mali_pp_write_addr_renderer_list(struct mali_pp_core *core,
+		struct mali_pp_job *job, u32 subjob)
+{
+	u32 addr = mali_pp_job_get_addr_frame(job, subjob);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_FRAME, addr);
+}
+
+
+MALI_STATIC_INLINE void mali_pp_write_addr_stack(struct mali_pp_core *core, struct mali_pp_job *job)
+{
+	u32 addr = mali_pp_job_get_addr_stack(job, core->core_id);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_STACK, addr);
+}
+
+#endif /* __MALI_PP_H__ */
diff --git a/utgard/r6p1/common/mali_pp_job.c b/utgard/r6p1/common/mali_pp_job.c
new file mode 100755
index 0000000..27349c9
--- /dev/null
+++ b/utgard/r6p1/common/mali_pp_job.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+#include "mali_memory_swap_alloc.h"
+#include "mali_scheduler.h"
+
+static u32 pp_counter_src0 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 pp_counter_src1 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static _mali_osk_atomic_t pp_counter_per_sub_job_count; /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+static u32 pp_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+static u32 pp_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+
+void mali_pp_job_initialize(void)
+{
+	_mali_osk_atomic_init(&pp_counter_per_sub_job_count, 0);
+}
+
+void mali_pp_job_terminate(void)
+{
+	_mali_osk_atomic_term(&pp_counter_per_sub_job_count);
+}
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session,
+				       _mali_uk_pp_start_job_s __user *uargs, u32 id)
+{
+	struct mali_pp_job *job;
+	u32 perf_counter_flag;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_pp_job));
+	if (NULL != job) {
+
+		_mali_osk_list_init(&job->list);
+		_mali_osk_list_init(&job->session_fb_lookup_list);
+
+		if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_pp_start_job_s))) {
+			goto fail;
+		}
+
+		if (job->uargs.num_cores > _MALI_PP_MAX_SUB_JOBS) {
+			MALI_PRINT_ERROR(("Mali PP job: Too many sub jobs specified in job object\n"));
+			goto fail;
+		}
+
+		if (!mali_pp_job_use_no_notification(job)) {
+			job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_FINISHED, sizeof(_mali_uk_pp_job_finished_s));
+			if (NULL == job->finished_notification) goto fail;
+		}
+
+		perf_counter_flag = mali_pp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			u32 sub_job_count = _mali_osk_atomic_read(&pp_counter_per_sub_job_count);
+
+			/* These counters apply for all virtual jobs, and where no per sub job counter is specified */
+			job->uargs.perf_counter_src0 = pp_counter_src0;
+			job->uargs.perf_counter_src1 = pp_counter_src1;
+
+			/* We only copy the per sub job array if it is enabled with at least one counter */
+			if (0 < sub_job_count) {
+				job->perf_counter_per_sub_job_count = sub_job_count;
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src0, pp_counter_per_sub_job_src0, sizeof(pp_counter_per_sub_job_src0));
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src1, pp_counter_per_sub_job_src1, sizeof(pp_counter_per_sub_job_src1));
+			}
+		}
+
+		job->session = session;
+		job->id = id;
+
+		job->sub_jobs_num = job->uargs.num_cores ? job->uargs.num_cores : 1;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+		_mali_osk_atomic_init(&job->sub_jobs_completed, 0);
+		_mali_osk_atomic_init(&job->sub_job_errors, 0);
+		job->swap_status = MALI_NO_SWAP_IN;
+		job->user_notification = MALI_FALSE;
+		job->num_pp_cores_in_virtual = 0;
+
+		if (job->uargs.num_memory_cookies > session->allocation_mgr.mali_allocation_num) {
+			MALI_PRINT_ERROR(("Mali PP job: The number of memory cookies is invalid !\n"));
+			goto fail;
+		}
+
+		if (job->uargs.num_memory_cookies > 0) {
+			u32 size;
+			u32 __user *memory_cookies = (u32 __user *)(uintptr_t)job->uargs.memory_cookies;
+
+			size = sizeof(*memory_cookies) * (job->uargs.num_memory_cookies);
+
+			job->memory_cookies = _mali_osk_malloc(size);
+			if (NULL == job->memory_cookies) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to allocate %d bytes of memory cookies!\n", size));
+				goto fail;
+			}
+
+			if (0 != _mali_osk_copy_from_user(job->memory_cookies, memory_cookies, size)) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to copy %d bytes of memory cookies from user!\n", size));
+				goto fail;
+			}
+		}
+
+		if (_MALI_OSK_ERR_OK != mali_pp_job_check(job)) {
+			/* Not a valid job. */
+			goto fail;
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_PP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		mali_mem_swap_in_pages(job);
+
+		return job;
+	}
+
+fail:
+	if (NULL != job) {
+		mali_pp_job_delete(job);
+	}
+
+	return NULL;
+}
+
+void mali_pp_job_delete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->session_fb_lookup_list));
+
+	if (NULL != job->memory_cookies) {
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+		/* Unmap buffers attached to job */
+		mali_dma_buf_unmap_job(job);
+#endif
+		if (MALI_NO_SWAP_IN != job->swap_status) {
+			mali_mem_swap_out_pages(job);
+		}
+
+		_mali_osk_free(job->memory_cookies);
+	}
+
+	if (job->user_notification) {
+		mali_scheduler_return_pp_job_to_user(job,
+						     job->num_pp_cores_in_virtual);
+	}
+
+	if (NULL != job->finished_notification) {
+		_mali_osk_notification_delete(job->finished_notification);
+	}
+
+	_mali_osk_atomic_term(&job->sub_jobs_completed);
+	_mali_osk_atomic_term(&job->sub_job_errors);
+
+	_mali_osk_free(job);
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_pp_job *iter;
+	struct mali_pp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_pp_job, list) {
+		/* job should be started after iter if iter is in progress. */
+		if (0 < iter->sub_jobs_started) {
+			break;
+		}
+
+		/*
+		 * job should be started after iter if it has a higher
+		 * job id. A span is used to handle job id wrapping.
+		 */
+		if ((mali_pp_job_get_id(job) -
+		     mali_pp_job_get_id(iter)) <
+		    MALI_SCHEDULER_JOB_ID_SPAN) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		return job->uargs.perf_counter_src0;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src0[sub_job]) {
+		return job->perf_counter_per_sub_job_src0[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src0;
+}
+
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		/* Virtual jobs always use the global job counter */
+		return job->uargs.perf_counter_src1;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src1[sub_job]) {
+		return job->perf_counter_per_sub_job_src1[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src1;
+}
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter)
+{
+	pp_counter_src0 = counter;
+}
+
+void mali_pp_job_set_pp_counter_global_src1(u32 counter)
+{
+	pp_counter_src1 = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src0[sub_job]) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == counter) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src0[sub_job] = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src1[sub_job]) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == counter) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src1[sub_job] = counter;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src0(void)
+{
+	return pp_counter_src0;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src1(void)
+{
+	return pp_counter_src1;
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src0[sub_job];
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src1[sub_job];
+}
diff --git a/utgard/r6p1/common/mali_pp_job.h b/utgard/r6p1/common/mali_pp_job.h
new file mode 100755
index 0000000..d033fa3
--- /dev/null
+++ b/utgard/r6p1/common/mali_pp_job.h
@@ -0,0 +1,581 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_JOB_H__
+#define __MALI_PP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_core.h"
+#include "mali_dlbu.h"
+#include "mali_timeline.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+
+typedef enum pp_job_status {
+	MALI_NO_SWAP_IN,
+	MALI_SWAP_IN_FAIL,
+	MALI_SWAP_IN_SUCC,
+} pp_job_status;
+
+/**
+ * This structure represents a PP job, including all sub jobs.
+ *
+ * The PP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the PP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_pp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_pp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+	u32 perf_counter_per_sub_job_count;                /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+	u32 perf_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src0 */
+	u32 perf_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src1 */
+	u32 sub_jobs_num;                                  /**< Number of subjobs; set to 1 for Mali-450 if DLBU is used, otherwise equals number of PP cores */
+
+	pp_job_status swap_status;                         /**< Used to track each PP job swap status, if fail, we need to drop them in scheduler part */
+	mali_bool user_notification;                       /**< When we deferred delete PP job, we need to judge if we need to send job finish notification to user space */
+	u32 num_pp_cores_in_virtual;                       /**< How many PP cores we have when job finished */
+
+	/*
+	 * These members are used by both scheduler and executor.
+	 * They are "protected" by atomic operations.
+	 */
+	_mali_osk_atomic_t sub_jobs_completed;                            /**< Number of completed sub-jobs in this superjob */
+	_mali_osk_atomic_t sub_job_errors;                                /**< Bitfield with errors (errors for each single sub-job is or'ed together) */
+
+	/*
+	 * These members are used by scheduler, but only when no one else
+	 * knows about this job object but the working function.
+	 * No lock is thus needed for these.
+	 */
+	u32 *memory_cookies;                               /**< Memory cookies attached to job */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+	_mali_osk_list_t session_fb_lookup_list;           /**< Used to link jobs together from the same frame builder in the session */
+	u32 sub_jobs_started;                              /**< Total number of sub-jobs started (always started in ascending order) */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 perf_counter_value0[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 0 (to be returned to user space), one for each sub job */
+	u32 perf_counter_value1[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 1 (to be returned to user space), one for each sub job */
+};
+
+void mali_pp_job_initialize(void);
+void mali_pp_job_terminate(void);
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, _mali_uk_pp_start_job_s *uargs, u32 id);
+void mali_pp_job_delete(struct mali_pp_job *job);
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job);
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job);
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter);
+void mali_pp_job_set_pp_counter_global_src1(u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter);
+
+u32 mali_pp_job_get_pp_counter_global_src0(void);
+u32 mali_pp_job_get_pp_counter_global_src1(void);
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job);
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job);
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_cache_order(struct mali_pp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_cache_order(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_pp_job_get_user_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_frame_builder_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_flush_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_pid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_tid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_frame_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_dlbu_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.dlbu_registers;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_virtual(struct mali_pp_job *job)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (0 == job->uargs.num_cores) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_frame(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (mali_pp_job_is_virtual(job)) {
+		return MALI_DLBU_VIRT_ADDR;
+	} else if (0 == sub_job) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_FRAME / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_frame[sub_job - 1];
+	}
+
+	return 0;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_stack(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (0 == sub_job) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_STACK / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_stack[sub_job - 1];
+	}
+
+	return 0;
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_pp_job_list_addtail(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	_mali_osk_list_addtail(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_move(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb0_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb1_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb2_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb0_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb1_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb2_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb0(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb1(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb2(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_all_writeback_unit_disabled(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT]
+	   ) {
+		/* At least one output unit active */
+		return MALI_FALSE;
+	}
+
+	/* All outputs are disabled - we can abort the job */
+	return MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_add(struct mali_pp_job *job)
+{
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	fb_lookup_id = MALI_PP_JOB_FB_LOOKUP_LIST_MASK & job->uargs.frame_builder_id;
+
+	MALI_DEBUG_ASSERT(MALI_PP_JOB_FB_LOOKUP_LIST_SIZE > fb_lookup_id);
+
+	_mali_osk_list_addtail(&job->session_fb_lookup_list,
+			       &job->session->pp_job_fb_lookup_list[fb_lookup_id]);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->session_fb_lookup_list);
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_pp_job_get_session(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_started_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (0 < job->sub_jobs_started) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_unstarted_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (job->sub_jobs_started < job->sub_jobs_num) ? MALI_TRUE : MALI_FALSE;
+}
+
+/* Function used when we are terminating a session with jobs. Return TRUE if it has a rendering job.
+   Makes sure that no new subjobs are started. */
+MALI_STATIC_INLINE void mali_pp_job_mark_unstarted_failed(struct mali_pp_job *job)
+{
+	u32 jobs_remaining;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	jobs_remaining = job->sub_jobs_num - job->sub_jobs_started;
+	job->sub_jobs_started += jobs_remaining;
+
+	/* Not the most optimal way, but this is only used in error cases */
+	for (i = 0; i < jobs_remaining; i++) {
+		_mali_osk_atomic_inc(&job->sub_jobs_completed);
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_complete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->sub_jobs_num ==
+		_mali_osk_atomic_read(&job->sub_jobs_completed)) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_first_unstarted_sub_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return job->sub_jobs_started;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->sub_jobs_num;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_unstarted_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(job->sub_jobs_num >= job->sub_jobs_started);
+	return (job->sub_jobs_num - job->sub_jobs_started);
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_num_memory_cookies(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.num_memory_cookies;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_memory_cookie(
+	struct mali_pp_job *job, u32 index)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(index < job->uargs.num_memory_cookies);
+	MALI_DEBUG_ASSERT_POINTER(job->memory_cookies);
+	return job->memory_cookies[index];
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_needs_dma_buf_mapping(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (0 < job->uargs.num_memory_cookies) {
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_started(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Assert that we are marking the "first unstarted sub job" as started */
+	MALI_DEBUG_ASSERT(job->sub_jobs_started == sub_job);
+
+	job->sub_jobs_started++;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_completed(struct mali_pp_job *job, mali_bool success)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_atomic_inc(&job->sub_jobs_completed);
+	if (MALI_FALSE == success) {
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_was_success(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (0 == _mali_osk_atomic_read(&job->sub_job_errors)) {
+		return MALI_TRUE;
+	}
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_use_no_notification(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_NO_NOTIFICATION) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_pilot_job(struct mali_pp_job *job)
+{
+	/*
+	 * A pilot job is currently identified as jobs which
+	 * require no callback notification.
+	 */
+	return mali_pp_job_use_no_notification(job);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_pp_job_get_finished_notification(struct mali_pp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_window_surface(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE)
+	       ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_protected_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_PROTECTED)
+	       ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_flag(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value0(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0[sub_job];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value1(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1[sub_job];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value0(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0[sub_job] = value;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value1(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1[sub_job] = value;
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_pp_job_check(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (mali_pp_job_is_virtual(job) && job->sub_jobs_num != 1) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Returns MALI_TRUE if this job has more than two sub jobs and all sub jobs are unstarted.
+ *
+ * @param job Job to check.
+ * @return MALI_TRUE if job has more than two sub jobs and all sub jobs are unstarted, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_large_and_unstarted(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_pp_job_is_virtual(job));
+
+	return (0 == job->sub_jobs_started && 2 < job->sub_jobs_num);
+}
+
+/**
+ * Get PP job's Timeline tracker.
+ *
+ * @param job PP job.
+ * @return Pointer to Timeline tracker for the job.
+ */
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_pp_job_get_tracker(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_timeline_point_ptr(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+#endif /* __MALI_PP_JOB_H__ */
diff --git a/utgard/r6p1/common/mali_scheduler.c b/utgard/r6p1/common/mali_scheduler.c
new file mode 100755
index 0000000..a4136ce
--- /dev/null
+++ b/utgard/r6p1/common/mali_scheduler.c
@@ -0,0 +1,1399 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_profiling.h"
+#include "mali_kernel_utilization.h"
+#include "mali_timeline.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+#include "mali_group.h"
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include "mali_pm_metrics.h"
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+/*
+ * ---------- static defines/constants ----------
+ */
+
+/*
+ * If dma_buf with map on demand is used, we defer job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif
+#endif
+
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock protecting this module */
+_mali_osk_spinlock_irq_t *mali_scheduler_lock_obj = NULL;
+
+/* Queue of jobs to be executed on the GP group */
+struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+struct mali_scheduler_job_queue job_queue_pp;
+
+_mali_osk_atomic_t mali_job_id_autonumber;
+_mali_osk_atomic_t mali_job_cache_order_autonumber;
+/*
+ * ---------- static variables ----------
+ */
+
+_mali_osk_wq_work_t *scheduler_wq_pp_job_delete = NULL;
+_mali_osk_spinlock_irq_t *scheduler_pp_job_delete_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_deletion_queue);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static _mali_osk_wq_work_t *scheduler_wq_pp_job_queue = NULL;
+static _mali_osk_spinlock_irq_t *scheduler_pp_job_queue_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_queue_list);
+#endif
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job);
+static mali_timeline_point mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job);
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job);
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job);
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success);
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job);
+void mali_scheduler_do_pp_job_delete(void *arg);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job);
+static void mali_scheduler_do_pp_job_queue(void *arg);
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_scheduler_initialize(void)
+{
+	_mali_osk_atomic_init(&mali_job_id_autonumber, 0);
+	_mali_osk_atomic_init(&mali_job_cache_order_autonumber, 0);
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.high_pri);
+	job_queue_gp.depth = 0;
+	job_queue_gp.big_job_num = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.high_pri);
+	job_queue_pp.depth = 0;
+	job_queue_pp.big_job_num = 0;
+
+	mali_scheduler_lock_obj = _mali_osk_spinlock_irq_init(
+					  _MALI_OSK_LOCKFLAG_ORDERED,
+					  _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (NULL == mali_scheduler_lock_obj) {
+		mali_scheduler_terminate();
+	}
+
+	scheduler_wq_pp_job_delete = _mali_osk_wq_create_work(
+					     mali_scheduler_do_pp_job_delete, NULL);
+	if (NULL == scheduler_wq_pp_job_delete) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_delete_lock = _mali_osk_spinlock_irq_init(
+					       _MALI_OSK_LOCKFLAG_ORDERED,
+					       _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (NULL == scheduler_pp_job_delete_lock) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	scheduler_wq_pp_job_queue = _mali_osk_wq_create_work(
+					    mali_scheduler_do_pp_job_queue, NULL);
+	if (NULL == scheduler_wq_pp_job_queue) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_queue_lock = _mali_osk_spinlock_irq_init(
+					      _MALI_OSK_LOCKFLAG_ORDERED,
+					      _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (NULL == scheduler_pp_job_queue_lock) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_scheduler_terminate(void)
+{
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (NULL != scheduler_pp_job_queue_lock) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_queue_lock);
+		scheduler_pp_job_queue_lock = NULL;
+	}
+
+	if (NULL != scheduler_wq_pp_job_queue) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_queue);
+		scheduler_wq_pp_job_queue = NULL;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	if (NULL != scheduler_pp_job_delete_lock) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_delete_lock);
+		scheduler_pp_job_delete_lock = NULL;
+	}
+
+	if (NULL != scheduler_wq_pp_job_delete) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_delete);
+		scheduler_wq_pp_job_delete = NULL;
+	}
+
+	if (NULL != mali_scheduler_lock_obj) {
+		_mali_osk_spinlock_irq_term(mali_scheduler_lock_obj);
+		mali_scheduler_lock_obj = NULL;
+	}
+
+	_mali_osk_atomic_term(&mali_job_cache_order_autonumber);
+	_mali_osk_atomic_term(&mali_job_id_autonumber);
+}
+
+u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure)
+{
+	/*
+	 * Count how many physical sub jobs are present from the head of queue
+	 * until the first virtual job is present.
+	 * Early out when we have reached maximum number of PP cores (8)
+	 */
+	u32 count = 0;
+	struct mali_pp_job *job;
+	struct mali_pp_job *temp;
+
+	/* Check for partially started normal pri jobs */
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					   struct mali_pp_job, list);
+
+		MALI_DEBUG_ASSERT_POINTER(job);
+
+		if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) {
+			/*
+			 * Remember; virtual jobs can't be queued and started
+			 * at the same time, so this must be a physical job
+			 */
+			if ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			    || (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job))) {
+
+				count += mali_pp_job_unstarted_sub_job_count(job);
+				if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+					return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+				}
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if ((MALI_FALSE == mali_pp_job_is_virtual(job))
+		    && ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			|| (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) {
+
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if ((MALI_FALSE == mali_pp_job_is_virtual(job))
+		    && (MALI_FALSE == mali_pp_job_has_started_sub_jobs(job))
+		    && ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			|| (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) {
+
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+	return count;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_next(void)
+{
+	struct mali_pp_job *job;
+	struct mali_pp_job *temp;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	/* Check for partially started normal pri jobs */
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					   struct mali_pp_job, list);
+
+		MALI_DEBUG_ASSERT_POINTER(job);
+
+		if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) {
+			return job;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		return job;
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		return job;
+	}
+
+	return NULL;
+}
+
+mali_bool mali_scheduler_job_next_is_virtual(void)
+{
+	struct mali_pp_job *job;
+
+	job = mali_scheduler_job_pp_virtual_peek();
+	if (NULL != job) {
+		MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void)
+{
+	_mali_osk_list_t *queue;
+	struct mali_gp_job *job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT(0 < job_queue_gp.depth);
+	MALI_DEBUG_ASSERT(job_queue_gp.big_job_num <= job_queue_gp.depth);
+
+	if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+		MALI_DEBUG_ASSERT(!_mali_osk_list_empty(queue));
+	}
+
+	job = _MALI_OSK_LIST_ENTRY(queue->next, struct mali_gp_job, list);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_gp_job_list_remove(job);
+	job_queue_gp.depth--;
+	if (job->big_job) {
+		job_queue_gp.big_job_num --;
+		if (job_queue_gp.big_job_num < MALI_MAX_PENDING_BIG_JOB) {
+			/* wake up process */
+			wait_queue_head_t *queue = mali_session_get_wait_queue();
+			wake_up(queue);
+		}
+	}
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	/*
+	 * For PP jobs we favour partially started jobs in normal
+	 * priority queue over unstarted jobs in high priority queue
+	 */
+
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					       struct mali_pp_job, list);
+		MALI_DEBUG_ASSERT(NULL != tmp_job);
+
+		if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job ||
+	    MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) {
+		/*
+		 * There isn't a partially started job in normal queue, so
+		 * look in high priority queue.
+		 */
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+						       struct mali_pp_job, list);
+			MALI_DEBUG_ASSERT(NULL != tmp_job);
+
+			if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+					       struct mali_pp_job, list);
+
+		if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job) {
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+						       struct mali_pp_job, list);
+
+			if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_physical_peek();
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_virtual(job));
+
+	if (NULL != job) {
+		*sub_job = mali_pp_job_get_first_unstarted_sub_job(job);
+
+		mali_pp_job_mark_sub_job_started(job, *sub_job);
+		if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(job)) {
+			/* Remove from queue when last sub job has been retrieved */
+			mali_pp_job_list_remove(job);
+		}
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_virtual_peek();
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_virtual(job));
+
+	if (NULL != job) {
+		MALI_DEBUG_ASSERT(0 ==
+				  mali_pp_job_get_first_unstarted_sub_job(job));
+		MALI_DEBUG_ASSERT(1 ==
+				  mali_pp_job_get_sub_job_count(job));
+
+		mali_pp_job_mark_sub_job_started(job, 0);
+
+		mali_pp_job_list_remove(job);
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_gp_job_get_id(job), job));
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_gp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(job));
+		mali_gp_job_signal_pp_tracker(job, MALI_FALSE);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, MALI_FALSE,
+					       MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+
+	return MALI_SCHEDULER_MASK_GP;
+}
+
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_pp_job_get_id(job), job));
+
+	if (MALI_TRUE == mali_timeline_tracker_activation_error(
+		    mali_pp_job_get_tracker(job))) {
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Job %u (0x%08X) activated with error, aborting.\n",
+				     mali_pp_job_get_id(job), job));
+
+		mali_scheduler_lock();
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (mali_pp_job_needs_dma_buf_mapping(job)) {
+		mali_scheduler_deferred_pp_job_queue(job);
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_pp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+	return MALI_SCHEDULER_MASK_PP;
+}
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	if (user_notification) {
+		mali_scheduler_return_gp_job_to_user(job, success);
+	}
+
+	if (dequeued) {
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_gp_end();
+		}
+		mali_pm_record_gpu_idle(MALI_TRUE);
+	}
+
+	mali_gp_job_delete(job);
+}
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	job->user_notification = user_notification;
+	job->num_pp_cores_in_virtual = num_cores_in_virtual;
+
+	if (dequeued) {
+#if defined(CONFIG_MALI_DVFS)
+		if (mali_pp_job_is_window_surface(job)) {
+			struct mali_session_data *session;
+			session = mali_pp_job_get_session(job);
+			mali_session_inc_num_window_jobs(session);
+		}
+#endif
+
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_pp_end();
+		}
+		mali_pm_record_gpu_idle(MALI_FALSE);
+	}
+
+	/* With ZRAM feature enabled, all pp jobs will be force to use deferred delete. */
+	mali_scheduler_deferred_pp_job_delete(job);
+}
+
+void mali_scheduler_abort_session(struct mali_session_data *session)
+{
+	struct mali_gp_job *gp_job;
+	struct mali_gp_job *gp_tmp;
+	struct mali_pp_job *pp_job;
+	struct mali_pp_job *pp_tmp;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_gp);
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_pp);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali scheduler: Aborting all queued jobs from session 0x%08X.\n",
+			     session));
+
+	mali_scheduler_lock();
+
+	/* Remove from GP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.normal_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+			job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0;
+		}
+	}
+
+	/* Remove from GP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.high_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+			job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0;
+		}
+	}
+
+	/* Remove from PP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							      &removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/* Remove from PP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							      &removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/*
+	 * Release scheduler lock so we can release trackers
+	 * (which will potentially queue new jobs)
+	 */
+	mali_scheduler_unlock();
+
+	/* Release and complete all (non-running) found GP jobs  */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &removed_jobs_gp,
+				    struct mali_gp_job, list) {
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(gp_job));
+		mali_gp_job_signal_pp_tracker(gp_job, MALI_FALSE);
+		_mali_osk_list_delinit(&gp_job->list);
+		mali_scheduler_complete_gp_job(gp_job,
+					       MALI_FALSE, MALI_FALSE, MALI_TRUE);
+	}
+
+	/* Release and complete non-running PP jobs */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, &removed_jobs_pp,
+				    struct mali_pp_job, list) {
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(pp_job));
+		_mali_osk_list_delinit(&pp_job->list);
+		mali_scheduler_complete_pp_job(pp_job, 0,
+					       MALI_FALSE, MALI_TRUE);
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx,
+		_mali_uk_gp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_gp_job_create(session, uargs, mali_scheduler_get_new_id(),
+				 NULL);
+	if (NULL == job) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_gp_job_get_timeline_point_ptr(job);
+
+	point = mali_scheduler_submit_gp_job(session, job);
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the job was started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx,
+		_mali_uk_pp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_pp_job_create(session, uargs, mali_scheduler_get_new_id());
+	if (NULL == job) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(job);
+
+	point = mali_scheduler_submit_pp_job(session, job);
+	job = NULL;
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the job was started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx,
+		_mali_uk_pp_and_gp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	_mali_uk_pp_and_gp_start_job_s kargs;
+	struct mali_pp_job *pp_job;
+	struct mali_gp_job *gp_job;
+	u32 __user *point_ptr = NULL;
+	mali_timeline_point point;
+	_mali_uk_pp_start_job_s __user *pp_args;
+	_mali_uk_gp_start_job_s __user *gp_args;
+
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+
+	session = (struct mali_session_data *) ctx;
+
+	if (0 != _mali_osk_copy_from_user(&kargs, uargs,
+					  sizeof(_mali_uk_pp_and_gp_start_job_s))) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	pp_args = (_mali_uk_pp_start_job_s __user *)(uintptr_t)kargs.pp_args;
+	gp_args = (_mali_uk_gp_start_job_s __user *)(uintptr_t)kargs.gp_args;
+
+	pp_job = mali_pp_job_create(session, pp_args,
+				    mali_scheduler_get_new_id());
+	if (NULL == pp_job) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	gp_job = mali_gp_job_create(session, gp_args,
+				    mali_scheduler_get_new_id(),
+				    mali_pp_job_get_tracker(pp_job));
+	if (NULL == gp_job) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		mali_pp_job_delete(pp_job);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(pp_job);
+
+	/* Submit GP job. */
+	mali_scheduler_submit_gp_job(session, gp_job);
+	gp_job = NULL;
+
+	/* Submit PP job. */
+	point = mali_scheduler_submit_pp_job(session, pp_job);
+	pp_job = NULL;
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the jobs were started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	fb_lookup_id = args->fb_id & MALI_PP_JOB_FB_LOOKUP_LIST_MASK;
+
+	mali_scheduler_lock();
+
+	/* Iterate over all jobs for given frame builder_id. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp,
+				    &session->pp_job_fb_lookup_list[fb_lookup_id],
+				    struct mali_pp_job, session_fb_lookup_list) {
+		MALI_DEBUG_CODE(u32 disable_mask = 0);
+
+		if (mali_pp_job_get_frame_builder_id(job) !=
+		    (u32) args->fb_id) {
+			MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Disable WB mismatching FB.\n"));
+			continue;
+		}
+
+		MALI_DEBUG_CODE(disable_mask |= 0xD << (4 * 3));
+
+		if (mali_pp_job_get_wb0_source_addr(job) == args->wb0_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x1 << (4 * 1));
+			mali_pp_job_disable_wb0(job);
+		}
+
+		if (mali_pp_job_get_wb1_source_addr(job) == args->wb1_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x2 << (4 * 2));
+			mali_pp_job_disable_wb1(job);
+		}
+
+		if (mali_pp_job_get_wb2_source_addr(job) == args->wb2_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x3 << (4 * 3));
+			mali_pp_job_disable_wb2(job);
+		}
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Disable WB: 0x%X.\n",
+				     disable_mask));
+	}
+
+	mali_scheduler_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "GP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_gp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.normal_pri) ?
+				"empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.high_pri) ?
+				"empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"PP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_pp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.normal_pri)
+				? "empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.high_pri)
+				? "empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job)
+{
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Add job to Timeline system. */
+	point = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_gp_job_get_tracker(job), MALI_TIMELINE_GP);
+
+	return point;
+}
+
+static mali_timeline_point mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job)
+{
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_scheduler_lock();
+	/*
+	 * Adding job to the lookup list used to quickly discard
+	 * writeback units of queued jobs.
+	 */
+	mali_pp_job_fb_lookup_add(job);
+	mali_scheduler_unlock();
+
+	/* Add job to Timeline system. */
+	point = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+
+	return point;
+}
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_gp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	}
+
+	mali_gp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	/* Determine which queue the job should be added to. */
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+	}
+
+	job_queue_gp.depth += 1;
+	job_queue_gp.big_job_num += (job->big_job) ? 1 : 0;
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_gp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the GP as busy from the
+		 * time a GP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_gp_start();
+	}
+
+	mali_pm_record_gpu_active(MALI_TRUE);
+
+	/* Add profiling events for job enqueued */
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE,
+		mali_gp_job_get_pid(job),
+		mali_gp_job_get_tid(job),
+		mali_gp_job_get_frame_builder_id(job),
+		mali_gp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_gp_job_get_tid(job),
+			      mali_gp_job_get_id(job), "GP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali GP scheduler: Job %u (0x%08X) queued\n",
+			     mali_gp_job_get_id(job), job));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue = NULL;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	} else if (unlikely(MALI_SWAP_IN_FAIL == job->swap_status)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while swap in failed.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE;
+	}
+
+	mali_pp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_pp.high_pri;
+	} else {
+		queue = &job_queue_pp.normal_pri;
+	}
+
+	job_queue_pp.depth +=
+		mali_pp_job_get_sub_job_count(job);
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_pp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the PP as busy from the
+		 * time a PP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_pp_start();
+	}
+
+	mali_pm_record_gpu_active(MALI_FALSE);
+
+	/* Add profiling events for job enqueued */
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE,
+		mali_pp_job_get_pid(job),
+		mali_pp_job_get_tid(job),
+		mali_pp_job_get_frame_builder_id(job),
+		mali_pp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_pp_job_get_tid(job),
+			      mali_pp_job_get_id(job), "PP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali PP scheduler: %s job %u (0x%08X) with %u parts queued.\n",
+			     mali_pp_job_is_virtual(job)
+			     ? "Virtual" : "Physical",
+			     mali_pp_job_get_id(job), job,
+			     mali_pp_job_get_sub_job_count(job)));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success)
+{
+	_mali_uk_gp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_gp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->pending_big_job_num = mali_scheduler_job_gp_big_job_count();
+
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	if (MALI_TRUE == success) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+	jobres->heap_current_addr = mali_gp_job_get_current_heap_addr(job);
+	jobres->perf_counter0 = mali_gp_job_get_perf_counter_value0(job);
+	jobres->perf_counter1 = mali_gp_job_get_perf_counter_value1(job);
+
+	mali_session_send_notification(session, notification);
+}
+
+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual)
+{
+	u32 i;
+	u32 num_counters_to_copy;
+	_mali_uk_pp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	if (MALI_TRUE == mali_pp_job_use_no_notification(job)) {
+		return;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_pp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->user_job_ptr = mali_pp_job_get_user_id(job);
+	if (MALI_TRUE == mali_pp_job_was_success(job)) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+
+	if (mali_pp_job_is_virtual(job)) {
+		num_counters_to_copy = num_cores_in_virtual;
+	} else {
+		num_counters_to_copy = mali_pp_job_get_sub_job_count(job);
+	}
+
+	for (i = 0; i < num_counters_to_copy; i++) {
+		jobres->perf_counter0[i] =
+			mali_pp_job_get_perf_counter_value0(job, i);
+		jobres->perf_counter1[i] =
+			mali_pp_job_get_perf_counter_value1(job, i);
+		jobres->perf_counter_src0 =
+			mali_pp_job_get_pp_counter_global_src0();
+		jobres->perf_counter_src1 =
+			mali_pp_job_get_pp_counter_global_src1();
+	}
+
+	mali_session_send_notification(session, notification);
+}
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_deletion_queue);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_delete);
+}
+
+void mali_scheduler_do_pp_job_delete(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be deleted, so we can release
+	 * the lock before we start deleting the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_deletion_queue, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		_mali_osk_list_delinit(&job->list);
+
+		mali_pp_job_delete(job); /* delete the job object itself */
+	}
+}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_queue_list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_queue);
+}
+
+static void mali_scheduler_do_pp_job_queue(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be queued, so we can release
+	 * the lock before we start queueing the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_queue_list, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	/* First loop through all jobs and do the pre-work (no locks needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_needs_dma_buf_mapping(job)) {
+			/*
+			 * This operation could fail, but we continue anyway,
+			 * because the worst that could happen is that this
+			 * job will fail due to a Mali page fault.
+			 */
+			mali_dma_buf_map_job(job);
+		}
+	}
+
+	mali_scheduler_lock();
+
+	/* Then loop through all jobs again to queue them (lock needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+
+		/* Remove from scheduler_pp_job_queue_list before queueing */
+		mali_pp_job_list_remove(job);
+
+		if (mali_scheduler_queue_pp_job(job)) {
+			/* Job queued successfully */
+			schedule_mask |= MALI_SCHEDULER_MASK_PP;
+		} else {
+			/* Failed to enqueue job, release job (with error) */
+			mali_pp_job_fb_lookup_remove(job);
+			mali_pp_job_mark_unstarted_failed(job);
+
+			/* unlock scheduler in this uncommon case */
+			mali_scheduler_unlock();
+
+			schedule_mask |= mali_timeline_tracker_release(
+						 mali_pp_job_get_tracker(job));
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job, 0, MALI_TRUE,
+						       MALI_FALSE);
+
+			mali_scheduler_lock();
+		}
+	}
+
+	mali_scheduler_unlock();
+
+	/* Trigger scheduling of jobs */
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+void mali_scheduler_gp_pp_job_queue_print(void)
+{
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_gp_job *tmp_gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	struct mali_pp_job *tmp_pp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+	/* dump job queup status */
+	if ((0 == job_queue_gp.depth) && (0 == job_queue_pp.depth)) {
+		MALI_PRINT(("No GP&PP job in the job queue.\n"));
+		return;
+	}
+
+	MALI_PRINT(("Total (%d) GP job in the job queue.\n", job_queue_gp.depth));
+	if (job_queue_gp.depth > 0) {
+		if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.high_pri,
+						    struct mali_gp_job, list) {
+				MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job high_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid));
+			}
+		}
+
+		if (!_mali_osk_list_empty(&job_queue_gp.normal_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.normal_pri,
+						    struct mali_gp_job, list) {
+				MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job normal_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid));
+			}
+		}
+	}
+
+	MALI_PRINT(("Total (%d) PP job in the job queue.\n", job_queue_pp.depth));
+	if (job_queue_pp.depth > 0) {
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.high_pri,
+						    struct mali_pp_job, list) {
+				if (mali_pp_job_is_virtual(pp_job)) {
+					MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				} else {
+					MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				}
+			}
+		}
+
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.normal_pri,
+						    struct mali_pp_job, list) {
+				if (mali_pp_job_is_virtual(pp_job)) {
+					MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				} else {
+					MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				}
+			}
+		}
+	}
+
+	/* dump group running job status */
+	mali_executor_running_status_print();
+}
diff --git a/utgard/r6p1/common/mali_scheduler.h b/utgard/r6p1/common/mali_scheduler.h
new file mode 100755
index 0000000..0ab9177
--- /dev/null
+++ b/utgard/r6p1/common/mali_scheduler.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_H__
+#define __MALI_SCHEDULER_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_scheduler_types.h"
+#include "mali_session.h"
+
+struct mali_scheduler_job_queue {
+	_MALI_OSK_LIST_HEAD(normal_pri); /* Queued jobs with normal priority */
+	_MALI_OSK_LIST_HEAD(high_pri);   /* Queued jobs with high priority */
+	u32 depth;                       /* Depth of combined queues. */
+	u32 big_job_num;
+};
+
+extern _mali_osk_spinlock_irq_t *mali_scheduler_lock_obj;
+
+/* Queue of jobs to be executed on the GP group */
+extern struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+extern struct mali_scheduler_job_queue job_queue_pp;
+
+extern _mali_osk_atomic_t mali_job_id_autonumber;
+extern _mali_osk_atomic_t mali_job_cache_order_autonumber;
+
+#define MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+_mali_osk_errcode_t mali_scheduler_initialize(void);
+void mali_scheduler_terminate(void);
+
+MALI_STATIC_INLINE void mali_scheduler_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_scheduler_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: scheduler lock taken.\n"));
+}
+
+MALI_STATIC_INLINE void mali_scheduler_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: Releasing scheduler lock.\n"));
+	_mali_osk_spinlock_irq_unlock(mali_scheduler_lock_obj);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_count(void)
+{
+	return job_queue_gp.depth;
+}
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_big_job_count(void)
+{
+	return job_queue_gp.big_job_num;
+}
+
+u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure);
+
+mali_bool mali_scheduler_job_next_is_virtual(void);
+struct mali_pp_job *mali_scheduler_job_pp_next(void);
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void);
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_id(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_id_autonumber);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_cache_order(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_cache_order_autonumber);
+}
+
+/**
+ * @brief Used by the Timeline system to queue a GP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The GP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job);
+
+/**
+ * @brief Used by the Timeline system to queue a PP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The PP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job);
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_abort_session(struct mali_session_data *session);
+
+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual);
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size);
+#endif
+
+void mali_scheduler_gp_pp_job_queue_print(void);
+
+#endif /* __MALI_SCHEDULER_H__ */
diff --git a/utgard/r6p1/common/mali_scheduler_types.h b/utgard/r6p1/common/mali_scheduler_types.h
new file mode 100755
index 0000000..0819c59
--- /dev/null
+++ b/utgard/r6p1/common/mali_scheduler_types.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_TYPES_H__
+#define __MALI_SCHEDULER_TYPES_H__
+
+#include "mali_osk.h"
+
+#define MALI_SCHEDULER_JOB_ID_SPAN 65535
+
+/**
+ * Bitmask used for defered scheduling of subsystems.
+ */
+typedef u32 mali_scheduler_mask;
+
+#define MALI_SCHEDULER_MASK_GP (1<<0)
+#define MALI_SCHEDULER_MASK_PP (1<<1)
+
+#define MALI_SCHEDULER_MASK_EMPTY 0
+#define MALI_SCHEDULER_MASK_ALL (MALI_SCHEDULER_MASK_GP | MALI_SCHEDULER_MASK_PP)
+
+#endif /* __MALI_SCHEDULER_TYPES_H__ */
diff --git a/utgard/r6p1/common/mali_session.c b/utgard/r6p1/common/mali_session.c
new file mode 100755
index 0000000..9c5a64a
--- /dev/null
+++ b/utgard/r6p1/common/mali_session.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/sched.h>
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_session.h"
+#include "mali_ukk.h"
+#ifdef MALI_MEM_SWAP_TRACKING
+#include "mali_memory_swap_alloc.h"
+#endif
+
+_MALI_OSK_LIST_HEAD(mali_sessions);
+static u32 mali_session_count = 0;
+
+_mali_osk_spinlock_irq_t *mali_sessions_lock = NULL;
+wait_queue_head_t pending_queue;
+
+_mali_osk_errcode_t mali_session_initialize(void)
+{
+	_MALI_OSK_INIT_LIST_HEAD(&mali_sessions);
+	/* init wait queue for big varying job */
+	init_waitqueue_head(&pending_queue);
+
+	mali_sessions_lock = _mali_osk_spinlock_irq_init(
+				     _MALI_OSK_LOCKFLAG_ORDERED,
+				     _MALI_OSK_LOCK_ORDER_SESSIONS);
+	if (NULL == mali_sessions_lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_session_terminate(void)
+{
+	if (NULL != mali_sessions_lock) {
+		_mali_osk_spinlock_irq_term(mali_sessions_lock);
+		mali_sessions_lock = NULL;
+	}
+}
+
+void mali_session_add(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_add(&session->link, &mali_sessions);
+	mali_session_count++;
+	mali_session_unlock();
+}
+
+void mali_session_remove(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_delinit(&session->link);
+	mali_session_count--;
+	mali_session_unlock();
+}
+
+u32 mali_session_get_count(void)
+{
+	return mali_session_count;
+}
+
+wait_queue_head_t *mali_session_get_wait_queue(void)
+{
+	return &pending_queue;
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in window render frame per sec calculate
+ */
+#if defined(CONFIG_MALI_DVFS)
+u32 mali_session_max_window_num(void)
+{
+	struct mali_session_data *session, *tmp;
+	u32 max_window_num = 0;
+	u32 tmp_number = 0;
+
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		tmp_number = _mali_osk_atomic_xchg(
+				     &session->number_of_window_jobs, 0);
+		if (max_window_num < tmp_number) {
+			max_window_num = tmp_number;
+		}
+	}
+
+	mali_session_unlock();
+
+	return max_window_num;
+}
+#endif
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx)
+{
+	struct mali_session_data *session, *tmp;
+	char task_comm[TASK_COMM_LEN];
+	char task_default[] = "not found";
+	struct task_struct *ttask;
+	u32 mali_mem_usage;
+	u32 total_mali_mem_size;
+#ifdef MALI_MEM_SWAP_TRACKING
+	u32 swap_pool_size;
+	u32 swap_unlock_size;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(print_ctx);
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		ttask = pid_task(find_vpid(session->pid), PIDTYPE_PID);
+		if (ttask)
+			strncpy(task_comm, ttask->comm, sizeof(ttask->comm));
+		else
+			strncpy(task_comm, task_default, sizeof(task_default));
+#ifdef MALI_MEM_SWAP_TRACKING
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u %-25s %-10u  %-15u  %-15u  %-10u  %-10u %-10u\n",
+				    session->comm, session->pid, task_comm,
+				    (unsigned int)((atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)session->max_mali_mem_allocated_size,
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_SWAP])) * _MALI_OSK_MALI_PAGE_SIZE)
+				   );
+#else
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u %-25s %-10u  %-15u  %-15u  %-10u  %-10u\n",
+				    session->comm, session->pid, task_comm,
+				    (unsigned int)((atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)session->max_mali_mem_allocated_size,
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE)
+				   );
+
+#endif
+	}
+	mali_session_unlock();
+	mali_mem_usage  = _mali_ukk_report_memory_usage();
+	total_mali_mem_size = _mali_ukk_report_total_memory_size();
+	_mali_osk_ctxprintf(print_ctx, "Mali mem usage: %u\nMali mem limit: %u\n", mali_mem_usage, total_mali_mem_size);
+#ifdef MALI_MEM_SWAP_TRACKING
+	mali_mem_swap_tracking(&swap_pool_size, &swap_unlock_size);
+	_mali_osk_ctxprintf(print_ctx, "Mali swap mem pool : %u\nMali swap mem unlock: %u\n", swap_pool_size, swap_unlock_size);
+#endif
+}
diff --git a/utgard/r6p1/common/mali_session.h b/utgard/r6p1/common/mali_session.h
new file mode 100755
index 0000000..66d95c3
--- /dev/null
+++ b/utgard/r6p1/common/mali_session.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SESSION_H__
+#define __MALI_SESSION_H__
+
+#include "mali_mmu_page_directory.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_memory_types.h"
+#include "mali_memory_manager.h"
+
+struct mali_timeline_system;
+struct mali_soft_system;
+
+/* Number of frame builder job lists per session. */
+#define MALI_PP_JOB_FB_LOOKUP_LIST_SIZE 16
+#define MALI_PP_JOB_FB_LOOKUP_LIST_MASK (MALI_PP_JOB_FB_LOOKUP_LIST_SIZE - 1)
+/*Max pending big job allowed in kernel*/
+#define MALI_MAX_PENDING_BIG_JOB (2)
+
+struct mali_session_data {
+	_mali_osk_notification_queue_t *ioctl_queue;
+
+	_mali_osk_mutex_t *memory_lock; /**< Lock protecting the vm manipulation */
+	_mali_osk_mutex_t *cow_lock; /** < Lock protecting the cow memory free manipulation */
+#if 0
+	_mali_osk_list_t memory_head; /**< Track all the memory allocated in this session, for freeing on abnormal termination */
+#endif
+	struct mali_page_directory *page_directory; /**< MMU page directory for this session */
+
+	_MALI_OSK_LIST_HEAD(link); /**< Link for list of all sessions */
+	_MALI_OSK_LIST_HEAD(pp_job_list); /**< List of all PP jobs on this session */
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_t number_of_window_jobs; /**< Record the window jobs completed on this session in a period */
+#endif
+
+	_mali_osk_list_t pp_job_fb_lookup_list[MALI_PP_JOB_FB_LOOKUP_LIST_SIZE]; /**< List of PP job lists per frame builder id.  Used to link jobs from same frame builder. */
+
+	struct mali_soft_job_system *soft_job_system; /**< Soft job system for this session. */
+	struct mali_timeline_system *timeline_system; /**< Timeline system for this session. */
+
+	mali_bool is_aborting; /**< MALI_TRUE if the session is aborting, MALI_FALSE if not. */
+	mali_bool use_high_priority_job_queue; /**< If MALI_TRUE, jobs added from this session will use the high priority job queues. */
+	u32 pid;
+	char *comm;
+	atomic_t mali_mem_array[MALI_MEM_TYPE_MAX]; /**< The array to record mem types' usage for this session. */
+	atomic_t mali_mem_allocated_pages; /** The current allocated mali memory pages, which include mali os memory and mali dedicated memory.*/
+	size_t max_mali_mem_allocated_size; /**< The past max mali memory allocated size, which include mali os memory and mali dedicated memory. */
+	/* Added for new memroy system */
+	struct mali_allocation_manager allocation_mgr;
+};
+
+_mali_osk_errcode_t mali_session_initialize(void);
+void mali_session_terminate(void);
+
+/* List of all sessions. Actual list head in mali_kernel_core.c */
+extern _mali_osk_list_t mali_sessions;
+/* Lock to protect modification and access to the mali_sessions list */
+extern _mali_osk_spinlock_irq_t *mali_sessions_lock;
+
+MALI_STATIC_INLINE void mali_session_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_sessions_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(mali_sessions_lock);
+}
+
+void mali_session_add(struct mali_session_data *session);
+void mali_session_remove(struct mali_session_data *session);
+u32 mali_session_get_count(void);
+wait_queue_head_t *mali_session_get_wait_queue(void);
+
+#define MALI_SESSION_FOREACH(session, tmp, link) \
+	_MALI_OSK_LIST_FOREACHENTRY(session, tmp, &mali_sessions, struct mali_session_data, link)
+
+MALI_STATIC_INLINE struct mali_page_directory *mali_session_get_page_directory(struct mali_session_data *session)
+{
+	return session->page_directory;
+}
+
+MALI_STATIC_INLINE void mali_session_memory_lock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_wait(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_memory_unlock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_signal(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_send_notification(struct mali_session_data *session, _mali_osk_notification_t *object)
+{
+	_mali_osk_notification_queue_send(session->ioctl_queue, object);
+}
+
+#if defined(CONFIG_MALI_DVFS)
+
+MALI_STATIC_INLINE void mali_session_inc_num_window_jobs(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_atomic_inc(&session->number_of_window_jobs);
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in  window render frame per sec calculate
+ */
+u32 mali_session_max_window_num(void);
+
+#endif
+
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx);
+
+#endif /* __MALI_SESSION_H__ */
diff --git a/utgard/r6p1/common/mali_soft_job.c b/utgard/r6p1/common/mali_soft_job.c
new file mode 100755
index 0000000..c76d1fe
--- /dev/null
+++ b/utgard/r6p1/common/mali_soft_job.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_soft_job.h"
+#include "mali_osk.h"
+#include "mali_timeline.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+
+MALI_STATIC_INLINE void mali_soft_job_system_lock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	_mali_osk_spinlock_irq_lock(system->lock);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: soft system %p lock taken\n", system));
+	MALI_DEBUG_ASSERT(0 == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = _mali_osk_get_tid());
+}
+
+MALI_STATIC_INLINE void mali_soft_job_system_unlock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: releasing soft system %p lock\n", system));
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = 0);
+	_mali_osk_spinlock_irq_unlock(system->lock);
+}
+
+#if defined(DEBUG)
+MALI_STATIC_INLINE void mali_soft_job_system_assert_locked(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+}
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system) mali_soft_job_system_assert_locked(system)
+#else
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system)
+#endif /* defined(DEBUG) */
+
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session)
+{
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	system = (struct mali_soft_job_system *) _mali_osk_calloc(1, sizeof(struct mali_soft_job_system));
+	if (NULL == system) {
+		return NULL;
+	}
+
+	system->session = session;
+
+	system->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (NULL == system->lock) {
+		mali_soft_job_system_destroy(system);
+		return NULL;
+	}
+	system->lock_owner = 0;
+	system->last_job_id = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&(system->jobs_used));
+
+	return system;
+}
+
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	/* All jobs should be free at this point. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&(system->jobs_used)));
+
+	if (NULL != system) {
+		if (NULL != system->lock) {
+			_mali_osk_spinlock_irq_term(system->lock);
+		}
+		_mali_osk_free(system);
+	}
+}
+
+static void mali_soft_job_system_free_job(struct mali_soft_job_system *system, struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id);
+	MALI_DEBUG_ASSERT(system == job->system);
+
+	_mali_osk_list_del(&(job->system_list));
+
+	mali_soft_job_system_unlock(job->system);
+
+	_mali_osk_free(job);
+}
+
+MALI_STATIC_INLINE struct mali_soft_job *mali_soft_job_system_lookup_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job, *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		if (job->id == job_id)
+			return job;
+	}
+
+	return NULL;
+}
+
+void mali_soft_job_destroy(struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: destroying soft job %u (0x%08X)\n", job->id, job));
+
+	if (NULL != job) {
+		if (0 < _mali_osk_atomic_dec_return(&job->refcount)) return;
+
+		_mali_osk_atomic_term(&job->refcount);
+
+		if (NULL != job->activated_notification) {
+			_mali_osk_notification_delete(job->activated_notification);
+			job->activated_notification = NULL;
+		}
+
+		mali_soft_job_system_free_job(job->system, job);
+	}
+}
+
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job)
+{
+	struct mali_soft_job *job;
+	_mali_osk_notification_t *notification = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT((MALI_SOFT_JOB_TYPE_USER_SIGNALED == type) ||
+			  (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == type));
+
+	notification = _mali_osk_notification_create(_MALI_NOTIFICATION_SOFT_ACTIVATED, sizeof(_mali_uk_soft_job_activated_s));
+	if (unlikely(NULL == notification)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to allocate notification"));
+		return NULL;
+	}
+
+	job = _mali_osk_malloc(sizeof(struct mali_soft_job));
+	if (unlikely(NULL == job)) {
+		MALI_DEBUG_PRINT(2, ("Mali Soft Job: system alloc job failed. \n"));
+		return NULL;
+	}
+
+	mali_soft_job_system_lock(system);
+
+	job->system = system;
+	job->id = system->last_job_id++;
+	job->state = MALI_SOFT_JOB_STATE_ALLOCATED;
+
+	_mali_osk_list_add(&(job->system_list), &(system->jobs_used));
+
+	job->type = type;
+	job->user_job = user_job;
+	job->activated = MALI_FALSE;
+
+	job->activated_notification = notification;
+
+	_mali_osk_atomic_init(&job->refcount, 1);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state);
+	MALI_DEBUG_ASSERT(system == job->system);
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id);
+
+	mali_soft_job_system_unlock(system);
+
+	return job;
+}
+
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence)
+{
+	mali_timeline_point point;
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	system = job->system;
+
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(system->session->timeline_system);
+
+	mali_soft_job_system_lock(system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state);
+	job->state = MALI_SOFT_JOB_STATE_STARTED;
+
+	mali_soft_job_system_unlock(system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: starting soft job %u (0x%08X)\n", job->id, job));
+
+	mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_SOFT, fence, job);
+	point = mali_timeline_system_add_tracker(system->session->timeline_system, &job->tracker, MALI_TIMELINE_SOFT);
+
+	return point;
+}
+
+static mali_bool mali_soft_job_is_activated(void *data)
+{
+	struct mali_soft_job *job;
+
+	job = (struct mali_soft_job *) data;
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	return job->activated;
+}
+
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job;
+	struct mali_timeline_system *timeline_system;
+	mali_scheduler_mask schedule_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(system);
+
+	job = mali_soft_job_system_lookup_job(system, job_id);
+
+	if ((NULL == job) || (MALI_SOFT_JOB_TYPE_USER_SIGNALED != job->type)
+	    || !(MALI_SOFT_JOB_STATE_STARTED == job->state || MALI_SOFT_JOB_STATE_TIMED_OUT == job->state)) {
+		mali_soft_job_system_unlock(system);
+		MALI_PRINT_ERROR(("Mali Soft Job: invalid soft job id %u", job_id));
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) {
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(system);
+
+		MALI_DEBUG_ASSERT(MALI_TRUE == job->activated);
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: soft job %u (0x%08X) was timed out\n", job->id, job));
+		mali_soft_job_destroy(job);
+
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+	job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+	mali_soft_job_system_unlock(system);
+
+	/* Since the job now is in signaled state, timeouts from the timeline system will be
+	 * ignored, and it is not possible to signal this job again. */
+
+	timeline_system = system->session->timeline_system;
+	MALI_DEBUG_ASSERT_POINTER(timeline_system);
+
+	/* Wait until activated. */
+	_mali_osk_wait_queue_wait_event(timeline_system->wait_queue, mali_soft_job_is_activated, (void *) job);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: signaling soft job %u (0x%08X)\n", job->id, job));
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+
+	mali_soft_job_destroy(job);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_soft_job_send_activated_notification(struct mali_soft_job *job)
+{
+	if (NULL != job->activated_notification) {
+		_mali_uk_soft_job_activated_s *res = job->activated_notification->result_buffer;
+		res->user_job = job->user_job;
+		mali_session_send_notification(job->system->session, job->activated_notification);
+	}
+	job->activated_notification = NULL;
+}
+
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline activation for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		MALI_DEBUG_PRINT(3, ("Mali Soft Job: Soft job %u (0x%08X) activated while session is aborting.\n", job->id, job));
+
+		mali_soft_job_system_unlock(job->system);
+
+		/* Since we are in shutdown, we can ignore the scheduling bitmask. */
+		mali_timeline_tracker_release(&job->tracker);
+		mali_soft_job_destroy(job);
+		return schedule_mask;
+	}
+
+	/* Send activated notification. */
+	mali_soft_job_send_activated_notification(job);
+
+	/* Wake up sleeping signaler. */
+	job->activated = MALI_TRUE;
+
+	/* If job type is self signaled, release tracker, move soft job to free list, and scheduler at once */
+	if (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == job->type) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(job->system);
+
+		schedule_mask |= mali_timeline_tracker_release(&job->tracker);
+
+		mali_soft_job_destroy(job);
+	} else {
+		_mali_osk_wait_queue_wake_up(job->tracker.system->wait_queue);
+
+		mali_soft_job_system_unlock(job->system);
+	}
+
+	return schedule_mask;
+}
+
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+	MALI_DEBUG_ASSERT(MALI_TRUE == job->activated);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline timeout for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED  == job->state ||
+			  MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		/* The session is aborting.  This job will be released and destroyed by @ref
+		 * mali_soft_job_system_abort(). */
+		mali_soft_job_system_unlock(job->system);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	if (MALI_SOFT_JOB_STATE_STARTED != job->state) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+		/* The job is about to be signaled, ignore timeout. */
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeout on soft job %u (0x%08X) in signaled state.\n", job->id, job));
+		mali_soft_job_system_unlock(job->system);
+		return schedule_mask;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+	job->state = MALI_SOFT_JOB_STATE_TIMED_OUT;
+	_mali_osk_atomic_inc(&job->refcount);
+
+	mali_soft_job_system_unlock(job->system);
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+
+	mali_soft_job_destroy(job);
+
+	return schedule_mask;
+}
+
+void mali_soft_job_system_abort(struct mali_soft_job_system *system)
+{
+	struct mali_soft_job *job, *tmp;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(jobs);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting soft job system for session 0x%08X.\n", system->session));
+
+	mali_soft_job_system_lock(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED   == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (MALI_SOFT_JOB_STATE_STARTED == job->state) {
+			/* If the job has been activated, we have to release the tracker and destroy
+			 * the job.  If not, the tracker will be released and the job destroyed when
+			 * it is activated. */
+			if (MALI_TRUE == job->activated) {
+				MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting unsignaled soft job %u (0x%08X).\n", job->id, job));
+
+				job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+				_mali_osk_list_move(&job->system_list, &jobs);
+			}
+		} else if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) {
+			MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting timed out soft job %u (0x%08X).\n", job->id, job));
+
+			/* We need to destroy this soft job. */
+			_mali_osk_list_move(&job->system_list, &jobs);
+		}
+	}
+
+	mali_soft_job_system_unlock(system);
+
+	/* Release and destroy jobs. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &jobs, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED  == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (MALI_SOFT_JOB_STATE_SIGNALED == job->state) {
+			mali_timeline_tracker_release(&job->tracker);
+		}
+
+		/* Move job back to used list before destroying. */
+		_mali_osk_list_move(&job->system_list, &system->jobs_used);
+
+		mali_soft_job_destroy(job);
+	}
+}
diff --git a/utgard/r6p1/common/mali_soft_job.h b/utgard/r6p1/common/mali_soft_job.h
new file mode 100755
index 0000000..4dd0589
--- /dev/null
+++ b/utgard/r6p1/common/mali_soft_job.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SOFT_JOB_H__
+#define __MALI_SOFT_JOB_H__
+
+#include "mali_osk.h"
+
+#include "mali_timeline.h"
+
+struct mali_timeline_fence;
+struct mali_session_data;
+struct mali_soft_job;
+struct mali_soft_job_system;
+
+/**
+ * Soft job types.
+ *
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED will only complete after activation if either
+ * they are signaled by user-space (@ref mali_soft_job_system_signaled_job) or if they are timed out
+ * by the Timeline system.
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_SELF_SIGNALED will release job resource automatically
+ * in kernel when the job is activated.
+ */
+typedef enum mali_soft_job_type {
+	MALI_SOFT_JOB_TYPE_SELF_SIGNALED,
+	MALI_SOFT_JOB_TYPE_USER_SIGNALED,
+} mali_soft_job_type;
+
+/**
+ * Soft job state.
+ *
+ * mali_soft_job_system_start_job a job will first be allocated.The job's state set to MALI_SOFT_JOB_STATE_ALLOCATED.
+ * Once the job is added to the timeline system, the state changes to MALI_SOFT_JOB_STATE_STARTED.
+ *
+ * For soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED the state is changed to
+ * MALI_SOFT_JOB_STATE_SIGNALED when @ref mali_soft_job_system_signal_job is called and the soft
+ * job's state is MALI_SOFT_JOB_STATE_STARTED or MALI_SOFT_JOB_STATE_TIMED_OUT.
+ *
+ * If a soft job of type MALI_SOFT_JOB_TYPE_USER_SIGNALED is timed out before being signaled, the
+ * state is changed to MALI_SOFT_JOB_STATE_TIMED_OUT.  This can only happen to soft jobs in state
+ * MALI_SOFT_JOB_STATE_STARTED.
+ *
+ */
+typedef enum mali_soft_job_state {
+	MALI_SOFT_JOB_STATE_ALLOCATED,
+	MALI_SOFT_JOB_STATE_STARTED,
+	MALI_SOFT_JOB_STATE_SIGNALED,
+	MALI_SOFT_JOB_STATE_TIMED_OUT,
+} mali_soft_job_state;
+
+#define MALI_SOFT_JOB_INVALID_ID ((u32) -1)
+
+/**
+ * Soft job struct.
+ *
+ * Soft job can be used to represent any kind of CPU work done in kernel-space.
+ */
+typedef struct mali_soft_job {
+	mali_soft_job_type            type;                   /**< Soft job type.  Must be one of MALI_SOFT_JOB_TYPE_*. */
+	u64                           user_job;               /**< Identifier for soft job in user space. */
+	_mali_osk_atomic_t            refcount;               /**< Soft jobs are reference counted to prevent premature deletion. */
+	struct mali_timeline_tracker  tracker;                /**< Timeline tracker for soft job. */
+	mali_bool                     activated;              /**< MALI_TRUE if the job has been activated, MALI_FALSE if not. */
+	_mali_osk_notification_t     *activated_notification; /**< Pre-allocated notification object for ACTIVATED_NOTIFICATION. */
+
+	/* Protected by soft job system lock. */
+	u32                           id;                     /**< Used by user-space to find corresponding soft job in kernel-space. */
+	mali_soft_job_state           state;                  /**< State of soft job, must be one of MALI_SOFT_JOB_STATE_*. */
+	struct mali_soft_job_system  *system;                 /**< The soft job system this job is in. */
+	_mali_osk_list_t              system_list;            /**< List element used by soft job system. */
+} mali_soft_job;
+
+/**
+ * Per-session soft job system.
+ *
+ * The soft job system is used to manage all soft jobs that belongs to a session.
+ */
+typedef struct mali_soft_job_system {
+	struct mali_session_data *session;                    /**< The session this soft job system belongs to. */
+	_MALI_OSK_LIST_HEAD(jobs_used);                       /**< List of all allocated soft jobs. */
+
+	_mali_osk_spinlock_irq_t *lock;                       /**< Lock used to protect soft job system and its soft jobs. */
+	u32 lock_owner;                                       /**< Contains tid of thread that locked the system or 0, if not locked. */
+	u32 last_job_id;                                      /**< Recored the last job id protected by lock. */
+} mali_soft_job_system;
+
+/**
+ * Create a soft job system.
+ *
+ * @param session The session this soft job system will belong to.
+ * @return The new soft job system, or NULL if unsuccessful.
+ */
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session);
+
+/**
+ * Destroy a soft job system.
+ *
+ * @note The soft job must not have any started or activated jobs.  Call @ref
+ * mali_soft_job_system_abort first.
+ *
+ * @param system The soft job system we are destroying.
+ */
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system);
+
+/**
+ * Create a soft job.
+ *
+ * @param system Soft job system to create soft job from.
+ * @param type Type of the soft job.
+ * @param user_job Identifier for soft job in user space.
+ * @return New soft job if successful, NULL if not.
+ */
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job);
+
+/**
+ * Destroy soft job.
+ *
+ * @param job Soft job to destroy.
+ */
+void mali_soft_job_destroy(struct mali_soft_job *job);
+
+/**
+ * Start a soft job.
+ *
+ * The soft job will be added to the Timeline system which will then activate it after all
+ * dependencies have been resolved.
+ *
+ * Create soft jobs with @ref mali_soft_job_create before starting them.
+ *
+ * @param job Soft job to start.
+ * @param fence Fence representing dependencies for this soft job.
+ * @return Point on soft job timeline.
+ */
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence);
+
+/**
+ * Use by user-space to signal that a soft job has completed.
+ *
+ * @note Only valid for soft jobs with type MALI_SOFT_JOB_TYPE_USER_SIGNALED.
+ *
+ * @note The soft job must be in state MALI_SOFT_JOB_STATE_STARTED for the signal to be successful.
+ *
+ * @note If the soft job was signaled successfully, or it received a time out, the soft job will be
+ * destroyed after this call and should no longer be used.
+ *
+ * @note This function will block until the soft job has been activated.
+ *
+ * @param system The soft job system the job was started in.
+ * @param job_id ID of soft job we are signaling.
+ *
+ * @return _MALI_OSK_ERR_ITEM_NOT_FOUND if the soft job ID was invalid, _MALI_OSK_ERR_TIMEOUT if the
+ * soft job was timed out or _MALI_OSK_ERR_OK if we successfully signaled the soft job.
+ */
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id);
+
+/**
+ * Used by the Timeline system to activate a soft job.
+ *
+ * @param job The soft job that is being activated.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job);
+
+/**
+ * Used by the Timeline system to timeout a soft job.
+ *
+ * A soft job is timed out if it completes or is signaled later than MALI_TIMELINE_TIMEOUT_HZ after
+ * activation.
+ *
+ * @param job The soft job that is being timed out.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job);
+
+/**
+ * Used to cleanup activated soft jobs in the soft job system on session abort.
+ *
+ * @param system The soft job system that is being aborted.
+ */
+void mali_soft_job_system_abort(struct mali_soft_job_system *system);
+
+#endif /* __MALI_SOFT_JOB_H__ */
diff --git a/utgard/r6p1/common/mali_spinlock_reentrant.c b/utgard/r6p1/common/mali_spinlock_reentrant.c
new file mode 100755
index 0000000..28e2e17
--- /dev/null
+++ b/utgard/r6p1/common/mali_spinlock_reentrant.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_spinlock_reentrant.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order)
+{
+	struct mali_spinlock_reentrant *spinlock;
+
+	spinlock = _mali_osk_calloc(1, sizeof(struct mali_spinlock_reentrant));
+	if (NULL == spinlock) {
+		return NULL;
+	}
+
+	spinlock->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, lock_order);
+	if (NULL == spinlock->lock) {
+		mali_spinlock_reentrant_term(spinlock);
+		return NULL;
+	}
+
+	return spinlock;
+}
+
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT(0 == spinlock->counter && 0 == spinlock->owner);
+
+	if (NULL != spinlock->lock) {
+		_mali_osk_spinlock_irq_term(spinlock->lock);
+	}
+
+	_mali_osk_free(spinlock);
+}
+
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid);
+
+	MALI_DEBUG_PRINT(5, ("%s ^\n", __FUNCTION__));
+
+	if (tid != spinlock->owner) {
+		_mali_osk_spinlock_irq_lock(spinlock->lock);
+		MALI_DEBUG_ASSERT(0 == spinlock->owner && 0 == spinlock->counter);
+		spinlock->owner = tid;
+	}
+
+	MALI_DEBUG_PRINT(5, ("%s v\n", __FUNCTION__));
+
+	++spinlock->counter;
+}
+
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid && tid == spinlock->owner);
+
+	--spinlock->counter;
+	if (0 == spinlock->counter) {
+		spinlock->owner = 0;
+		MALI_DEBUG_PRINT(5, ("%s release last\n", __FUNCTION__));
+		_mali_osk_spinlock_irq_unlock(spinlock->lock);
+	}
+}
diff --git a/utgard/r6p1/common/mali_spinlock_reentrant.h b/utgard/r6p1/common/mali_spinlock_reentrant.h
new file mode 100755
index 0000000..793875a
--- /dev/null
+++ b/utgard/r6p1/common/mali_spinlock_reentrant.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SPINLOCK_REENTRANT_H__
+#define __MALI_SPINLOCK_REENTRANT_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * Reentrant spinlock.
+ */
+struct mali_spinlock_reentrant {
+	_mali_osk_spinlock_irq_t *lock;
+	u32               owner;
+	u32               counter;
+};
+
+/**
+ * Create a new reentrant spinlock.
+ *
+ * @param lock_order Lock order.
+ * @return New reentrant spinlock.
+ */
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order);
+
+/**
+ * Terminate reentrant spinlock and free any associated resources.
+ *
+ * @param spinlock Reentrant spinlock to terminate.
+ */
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock);
+
+/**
+ * Wait for reentrant spinlock to be signaled.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Signal reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Check if thread is holding reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ * @return MALI_TRUE if thread is holding spinlock, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_spinlock_reentrant_is_held(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	return (tid == spinlock->owner && 0 < spinlock->counter);
+}
+
+#endif /* __MALI_SPINLOCK_REENTRANT_H__ */
diff --git a/utgard/r6p1/common/mali_timeline.c b/utgard/r6p1/common/mali_timeline.c
new file mode 100755
index 0000000..0da475f
--- /dev/null
+++ b/utgard/r6p1/common/mali_timeline.c
@@ -0,0 +1,1597 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline.h"
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_soft_job.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+
+#define MALI_TIMELINE_SYSTEM_LOCKED(system) (mali_spinlock_reentrant_is_held((system)->spinlock, _mali_osk_get_tid()))
+
+/*
+ * Following three elements are used to record how many
+ * gp, physical pp or virtual pp jobs are delayed in the whole
+ * timeline system, we can use these three value to decide
+ * if need to deactivate idle group.
+ */
+_mali_osk_atomic_t gp_tracker_count;
+_mali_osk_atomic_t phy_pp_tracker_count;
+_mali_osk_atomic_t virt_pp_tracker_count;
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter);
+
+#if defined(CONFIG_SYNC)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+struct mali_deferred_fence_put_entry {
+	struct hlist_node list;
+	struct sync_fence *fence;
+};
+
+static HLIST_HEAD(mali_timeline_sync_fence_to_free_list);
+static DEFINE_SPINLOCK(mali_timeline_sync_fence_to_free_lock);
+
+static void put_sync_fences(struct work_struct *ignore)
+{
+	struct hlist_head list;
+	struct hlist_node *tmp, *pos;
+	unsigned long flags;
+	struct mali_deferred_fence_put_entry *o;
+
+	spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+	hlist_move_list(&mali_timeline_sync_fence_to_free_list, &list);
+	spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+	hlist_for_each_entry_safe(o, pos, tmp, &list, list) {
+		sync_fence_put(o->fence);
+		kfree(o);
+	}
+}
+
+static DECLARE_DELAYED_WORK(delayed_sync_fence_put, put_sync_fences);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+/* Callback that is called when a sync fence a tracker is waiting on is signaled. */
+static void mali_timeline_sync_fence_callback(struct sync_fence *sync_fence, struct sync_fence_waiter *sync_fence_waiter)
+{
+	struct mali_timeline_system  *system;
+	struct mali_timeline_waiter  *waiter;
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool is_aborting = MALI_FALSE;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	int fence_status = sync_fence->status;
+#else
+	int fence_status = atomic_read(&sync_fence->status);
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_waiter);
+
+	tracker = _MALI_OSK_CONTAINER_OF(sync_fence_waiter, struct mali_timeline_tracker, sync_fence_waiter);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	is_aborting = system->session->is_aborting;
+	if (!is_aborting && (0 > fence_status)) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, fence_status));
+		tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+	}
+
+	waiter = tracker->waiter_sync;
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	tracker->sync_fence = NULL;
+	tracker->fence.sync_fd = -1;
+
+	schedule_mask |= mali_timeline_system_release_waiter(system, waiter);
+
+	/* If aborting, wake up sleepers that are waiting for sync fence callbacks to complete. */
+	if (is_aborting) {
+		_mali_osk_wait_queue_wake_up(system->wait_queue);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/*
+	 * Older versions of Linux, before 3.5, doesn't support fput() in interrupt
+	 * context. For those older kernels, allocate a list object and put the
+	 * fence object on that and defer the call to sync_fence_put() to a workqueue.
+	 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+	{
+		struct mali_deferred_fence_put_entry *obj;
+
+		obj = kzalloc(sizeof(struct mali_deferred_fence_put_entry), GFP_ATOMIC);
+		if (obj) {
+			unsigned long flags;
+			mali_bool schedule = MALI_FALSE;
+
+			obj->fence = sync_fence;
+
+			spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+			if (hlist_empty(&mali_timeline_sync_fence_to_free_list))
+				schedule = MALI_TRUE;
+			hlist_add_head(&obj->list, &mali_timeline_sync_fence_to_free_list);
+			spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+			if (schedule)
+				schedule_delayed_work(&delayed_sync_fence_put, 0);
+		}
+	}
+#else
+	sync_fence_put(sync_fence);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+	if (!is_aborting) {
+		mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE);
+	}
+}
+#endif /* defined(CONFIG_SYNC) */
+
+static mali_scheduler_mask mali_timeline_tracker_time_out(struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_SOFT == tracker->type);
+
+	return mali_soft_job_system_timeout_job((struct mali_soft_job *) tracker->job);
+}
+
+static void mali_timeline_timer_callback(void *data)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker;
+	struct mali_timeline *timeline;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	timeline = (struct mali_timeline *) data;
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	if (!system->timer_enabled) {
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		return;
+	}
+
+	tracker = timeline->tracker_tail;
+	timeline->timer_active = MALI_FALSE;
+
+	if (NULL != tracker && MALI_TRUE == tracker->timer_active) {
+		/* This is likely the delayed work that has been schedule out before cancelled. */
+		if (MALI_TIMELINE_TIMEOUT_HZ > (_mali_osk_time_tickcount() - tracker->os_tick_activate)) {
+			mali_spinlock_reentrant_signal(system->spinlock, tid);
+			return;
+		}
+
+		schedule_mask = mali_timeline_tracker_time_out(tracker);
+		tracker->timer_active = MALI_FALSE;
+	} else {
+		MALI_PRINT_ERROR(("Mali Timeline: Soft job timer callback without a waiting tracker.\n"));
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+	system->timer_enabled = MALI_FALSE;
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (NULL != timeline->delayed_work) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			timeline->timer_active = MALI_FALSE;
+		}
+	}
+}
+
+static void mali_timeline_destroy(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	if (NULL != timeline) {
+		/* Assert that the timeline object has been properly cleaned up before destroying it. */
+		MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_head);
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+		MALI_DEBUG_ASSERT(NULL == timeline->waiter_head);
+		MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail);
+		MALI_DEBUG_ASSERT(NULL != timeline->system);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_MAX > timeline->id);
+
+		if (NULL != timeline->delayed_work) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			_mali_osk_wq_delayed_delete_work_nonflush(timeline->delayed_work);
+		}
+
+#if defined(CONFIG_SYNC)
+		if (NULL != timeline->sync_tl) {
+			sync_timeline_destroy(timeline->sync_tl);
+		}
+#endif /* defined(CONFIG_SYNC) */
+
+#ifndef CONFIG_SYNC
+		_mali_osk_free(timeline);
+#endif
+	}
+}
+
+static struct mali_timeline *mali_timeline_create(struct mali_timeline_system *system, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(id < MALI_TIMELINE_MAX);
+
+	timeline = (struct mali_timeline *) _mali_osk_calloc(1, sizeof(struct mali_timeline));
+	if (NULL == timeline) {
+		return NULL;
+	}
+
+	/* Initially the timeline is empty. */
+#if defined(MALI_TIMELINE_DEBUG_START_POINT)
+	/* Start the timeline a bit before wrapping when debugging. */
+	timeline->point_next = UINT_MAX - MALI_TIMELINE_MAX_POINT_SPAN - 128;
+#else
+	timeline->point_next = 1;
+#endif
+	timeline->point_oldest = timeline->point_next;
+
+	/* The tracker and waiter lists will initially be empty. */
+
+	timeline->system = system;
+	timeline->id = id;
+
+	timeline->delayed_work = _mali_osk_wq_delayed_create_work(mali_timeline_timer_callback, timeline);
+	if (NULL == timeline->delayed_work) {
+		mali_timeline_destroy(timeline);
+		return NULL;
+	}
+
+	timeline->timer_active = MALI_FALSE;
+
+#if defined(CONFIG_SYNC)
+	{
+		char timeline_name[32];
+
+		switch (id) {
+		case MALI_TIMELINE_GP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-gp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_PP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-pp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_SOFT:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-soft", _mali_osk_get_pid());
+			break;
+		default:
+			MALI_PRINT_ERROR(("Mali Timeline: Invalid timeline id %d\n", id));
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->destroyed = MALI_FALSE;
+
+		timeline->sync_tl = mali_sync_timeline_create(timeline, timeline_name);
+		if (NULL == timeline->sync_tl) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+		if (NULL == timeline->spinlock) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	return timeline;
+}
+
+static void mali_timeline_insert_tracker(struct mali_timeline *timeline, struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	if (mali_timeline_is_full(timeline)) {
+		/* Don't add tracker if timeline is full. */
+		tracker->point = MALI_TIMELINE_NO_POINT;
+		return;
+	}
+
+	tracker->timeline = timeline;
+	tracker->point    = timeline->point_next;
+
+	/* Find next available point. */
+	timeline->point_next++;
+	if (MALI_TIMELINE_NO_POINT == timeline->point_next) {
+		timeline->point_next++;
+	}
+
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+
+	if (MALI_TIMELINE_TRACKER_GP == tracker->type) {
+		_mali_osk_atomic_inc(&gp_tracker_count);
+	} else if (MALI_TIMELINE_TRACKER_PP == tracker->type) {
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_inc(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_inc(&phy_pp_tracker_count);
+		}
+	}
+
+	/* Add tracker as new head on timeline's tracker list. */
+	if (NULL == timeline->tracker_head) {
+		/* Tracker list is empty. */
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+
+		timeline->tracker_tail = tracker;
+
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_next);
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_prev);
+	} else {
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next);
+
+		tracker->timeline_prev = timeline->tracker_head;
+		timeline->tracker_head->timeline_next = tracker;
+
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_next);
+	}
+	timeline->tracker_head = tracker;
+
+	MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next);
+	MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail->timeline_prev);
+}
+
+/* Inserting the waiter object into the given timeline */
+static void mali_timeline_insert_waiter(struct mali_timeline *timeline, struct mali_timeline_waiter *waiter_new)
+{
+	struct mali_timeline_waiter *waiter_prev;
+	struct mali_timeline_waiter *waiter_next;
+
+	/* Waiter time must be between timeline head and tail, and there must
+	 * be less than MALI_TIMELINE_MAX_POINT_SPAN elements between */
+	MALI_DEBUG_ASSERT((waiter_new->point - timeline->point_oldest) < MALI_TIMELINE_MAX_POINT_SPAN);
+	MALI_DEBUG_ASSERT((-waiter_new->point + timeline->point_next) < MALI_TIMELINE_MAX_POINT_SPAN);
+
+	/* Finding out where to put this waiter, in the linked waiter list of the given timeline **/
+	waiter_prev = timeline->waiter_head; /* Insert new after  waiter_prev */
+	waiter_next = NULL;                  /* Insert new before waiter_next */
+
+	/* Iterating backwards from head (newest) to tail (oldest) until we
+	 * find the correct spot to insert the new waiter */
+	while (waiter_prev && mali_timeline_point_after(waiter_prev->point, waiter_new->point)) {
+		waiter_next = waiter_prev;
+		waiter_prev = waiter_prev->timeline_prev;
+	}
+
+	if (NULL == waiter_prev && NULL == waiter_next) {
+		/* list is empty */
+		timeline->waiter_head = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else if (NULL == waiter_next) {
+		/* insert at head */
+		waiter_new->timeline_prev = timeline->waiter_head;
+		timeline->waiter_head->timeline_next = waiter_new;
+		timeline->waiter_head = waiter_new;
+	} else if (NULL == waiter_prev) {
+		/* insert at tail */
+		waiter_new->timeline_next = timeline->waiter_tail;
+		timeline->waiter_tail->timeline_prev = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else {
+		/* insert between */
+		waiter_new->timeline_next = waiter_next;
+		waiter_new->timeline_prev = waiter_prev;
+		waiter_next->timeline_prev = waiter_new;
+		waiter_prev->timeline_next = waiter_new;
+	}
+}
+
+static void mali_timeline_update_delayed_work(struct mali_timeline *timeline)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *oldest_tracker;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SOFT == timeline->id);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Timer is disabled, early out. */
+	if (!system->timer_enabled) return;
+
+	oldest_tracker = timeline->tracker_tail;
+	if (NULL != oldest_tracker && 0 == oldest_tracker->trigger_ref_count) {
+		if (MALI_FALSE == oldest_tracker->timer_active) {
+			if (MALI_TRUE == timeline->timer_active) {
+				_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+			}
+			_mali_osk_wq_delayed_schedule_work(timeline->delayed_work, MALI_TIMELINE_TIMEOUT_HZ);
+			oldest_tracker->timer_active = MALI_TRUE;
+			timeline->timer_active = MALI_TRUE;
+		}
+	} else if (MALI_TRUE == timeline->timer_active) {
+		_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+		timeline->timer_active = MALI_FALSE;
+	}
+}
+
+static mali_scheduler_mask mali_timeline_update_oldest_point(struct mali_timeline *timeline)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	MALI_DEBUG_CODE({
+		struct mali_timeline_system *system = timeline->system;
+		MALI_DEBUG_ASSERT_POINTER(system);
+
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	});
+
+	if (NULL != timeline->tracker_tail) {
+		/* Set oldest point to oldest tracker's point */
+		timeline->point_oldest = timeline->tracker_tail->point;
+	} else {
+		/* No trackers, mark point list as empty */
+		timeline->point_oldest = timeline->point_next;
+	}
+
+	/* Release all waiters no longer on the timeline's point list.
+	 * Releasing a waiter can trigger this function to be called again, so
+	 * we do not store any pointers on stack. */
+	while (NULL != timeline->waiter_tail) {
+		u32 waiter_time_relative;
+		u32 time_head_relative;
+		struct mali_timeline_waiter *waiter = timeline->waiter_tail;
+
+		time_head_relative = timeline->point_next - timeline->point_oldest;
+		waiter_time_relative = waiter->point - timeline->point_oldest;
+
+		if (waiter_time_relative < time_head_relative) {
+			/* This and all following waiters are on the point list, so we are done. */
+			break;
+		}
+
+		/* Remove waiter from timeline's waiter list. */
+		if (NULL != waiter->timeline_next) {
+			waiter->timeline_next->timeline_prev = NULL;
+		} else {
+			/* This was the last waiter */
+			timeline->waiter_head = NULL;
+		}
+		timeline->waiter_tail = waiter->timeline_next;
+
+		/* Release waiter.  This could activate a tracker, if this was
+		 * the last waiter for the tracker. */
+		schedule_mask |= mali_timeline_system_release_waiter(timeline->system, waiter);
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > type);
+
+	/* Zero out all tracker members. */
+	_mali_osk_memset(tracker, 0, sizeof(*tracker));
+
+	tracker->type = type;
+	tracker->job = job;
+	tracker->trigger_ref_count = 1;  /* Prevents any callback from trigging while adding it */
+	tracker->os_tick_create = _mali_osk_time_tickcount();
+	MALI_DEBUG_CODE(tracker->magic = MALI_TIMELINE_TRACKER_MAGIC);
+
+	tracker->activation_error = MALI_TIMELINE_ACTIVATION_ERROR_NONE;
+
+	/* Copy fence. */
+	if (NULL != fence) {
+		_mali_osk_memcpy(&tracker->fence, fence, sizeof(struct mali_timeline_fence));
+	}
+}
+
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker_next, *tracker_prev;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	/* Upon entry a group lock will be held, but not a scheduler lock. */
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	/* Tracker should have been triggered */
+	MALI_DEBUG_ASSERT(0 == tracker->trigger_ref_count);
+
+	/* All waiters should have been released at this point */
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_head);
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: releasing tracker for job 0x%08X\n", tracker->job));
+
+	timeline = tracker->timeline;
+	if (NULL == timeline) {
+		/* Tracker was not on a timeline, there is nothing to release. */
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Tracker should still be on timeline */
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, tracker->point));
+
+	/* Tracker is no longer valid. */
+	MALI_DEBUG_CODE(tracker->magic = 0);
+
+	tracker_next = tracker->timeline_next;
+	tracker_prev = tracker->timeline_prev;
+	tracker->timeline_next = NULL;
+	tracker->timeline_prev = NULL;
+
+	/* Removing tracker from timeline's tracker list */
+	if (NULL == tracker_next) {
+		/* This tracker was the head */
+		timeline->tracker_head = tracker_prev;
+	} else {
+		tracker_next->timeline_prev = tracker_prev;
+	}
+
+	if (NULL == tracker_prev) {
+		/* This tracker was the tail */
+		timeline->tracker_tail = tracker_next;
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+		/* Update the timeline's oldest time and release any waiters */
+		schedule_mask |= mali_timeline_update_oldest_point(timeline);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	} else {
+		tracker_prev->timeline_next = tracker_next;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Update delayed work only when it is the soft job timeline */
+	if (MALI_TIMELINE_SOFT == tracker->timeline->id) {
+		mali_timeline_update_delayed_work(tracker->timeline);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_release_waiter_list(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *tail,
+		struct mali_timeline_waiter *head)
+{
+	struct mali_timeline_waiter    *waiter = NULL;
+	struct mali_timeline_waiter    *next = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(head);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	head->tracker_next = system->waiter_empty_list;
+	system->waiter_empty_list = tail;
+
+	waiter = system->waiter_empty_list;
+	while (NULL != waiter) {
+		next = waiter->tracker_next;
+		_mali_osk_free(waiter);
+		waiter = next;
+	}
+	system->waiter_empty_list = NULL;
+}
+
+static mali_scheduler_mask mali_timeline_tracker_activate(struct mali_timeline_tracker *tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	struct mali_timeline_system *system;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker->os_tick_activate = _mali_osk_time_tickcount();
+
+	if (NULL != tracker->waiter_head) {
+		mali_timeline_system_release_waiter_list(system, tracker->waiter_tail, tracker->waiter_head);
+		tracker->waiter_head = NULL;
+		tracker->waiter_tail = NULL;
+	}
+
+	switch (tracker->type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		schedule_mask = mali_scheduler_activate_gp_job((struct mali_gp_job *) tracker->job);
+
+		_mali_osk_atomic_dec(&gp_tracker_count);
+		break;
+	case MALI_TIMELINE_TRACKER_PP:
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_dec(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_dec(&phy_pp_tracker_count);
+		}
+		schedule_mask = mali_scheduler_activate_pp_job((struct mali_pp_job *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SOFT:
+		timeline = tracker->timeline;
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		schedule_mask |= mali_soft_job_system_activate_job((struct mali_soft_job *) tracker->job);
+
+		/* Start a soft timer to make sure the soft job be released in a limited time */
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		mali_timeline_update_delayed_work(timeline);
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		break;
+	case MALI_TIMELINE_TRACKER_WAIT:
+		mali_timeline_fence_wait_activate((struct mali_timeline_fence_wait_tracker *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SYNC:
+#if defined(CONFIG_SYNC)
+		mali_timeline_sync_fence_activate((struct mali_timeline_sync_fence_tracker *) tracker->job);
+#else
+		MALI_PRINT_ERROR(("Mali Timeline: sync tracker not supported\n", tracker->type));
+#endif /* defined(CONFIG_SYNC) */
+		break;
+	default:
+		MALI_PRINT_ERROR(("Mali Timeline - Illegal tracker type: %d\n", tracker->type));
+		break;
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker)
+{
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->trigger_ref_count++;
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error)
+{
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->trigger_ref_count--;
+
+	tracker->activation_error |= activation_error;
+
+	if (0 == tracker->trigger_ref_count) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(uk_fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		fence->points[i] = uk_fence->points[i];
+	}
+
+	fence->sync_fd = uk_fence->sync_fd;
+}
+
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session)
+{
+	u32 i;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: creating timeline system\n"));
+
+	system = (struct mali_timeline_system *) _mali_osk_calloc(1, sizeof(struct mali_timeline_system));
+	if (NULL == system) {
+		return NULL;
+	}
+
+	system->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+	if (NULL == system->spinlock) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		system->timelines[i] = mali_timeline_create(system, (enum mali_timeline_id)i);
+		if (NULL == system->timelines[i]) {
+			mali_timeline_system_destroy(system);
+			return NULL;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	system->signaled_sync_tl = mali_sync_timeline_create(NULL, "mali-always-signaled");
+	if (NULL == system->signaled_sync_tl) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	system->waiter_empty_list = NULL;
+	system->session = session;
+	system->timer_enabled = MALI_TRUE;
+
+	system->wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == system->wait_queue) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	return system;
+}
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Check if there are any trackers left on timeline.
+ *
+ * Used as a wait queue conditional.
+ *
+ * @param data Timeline.
+ * @return MALI_TRUE if there are no trackers on timeline, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_has_no_trackers(void *data)
+{
+	struct mali_timeline *timeline = (struct mali_timeline *) data;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	return mali_timeline_is_empty(timeline);
+}
+
+/**
+ * Cancel sync fence waiters waited upon by trackers on all timelines.
+ *
+ * Will return after all timelines have no trackers left.
+ *
+ * @param system Timeline system.
+ */
+static void mali_timeline_cancel_sync_fence_waiters(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+	struct mali_timeline_tracker *tracker, *tracker_next;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(tracker_list);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Cancel sync fence waiters. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		tracker_next = timeline->tracker_tail;
+		while (NULL != tracker_next) {
+			tracker = tracker_next;
+			tracker_next = tracker->timeline_next;
+
+			if (NULL == tracker->sync_fence) continue;
+
+			MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling sync fence wait for tracker 0x%08X.\n", tracker));
+
+			/* Cancel sync fence waiter. */
+			if (0 == sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter)) {
+				/* Callback was not called, move tracker to local list. */
+				_mali_osk_list_add(&tracker->sync_fence_cancel_list, &tracker_list);
+			}
+		}
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/* Manually call sync fence callback in order to release waiter and trigger activation of tracker. */
+	_MALI_OSK_LIST_FOREACHENTRY(tracker, tracker_next, &tracker_list, struct mali_timeline_tracker, sync_fence_cancel_list) {
+		mali_timeline_sync_fence_callback(tracker->sync_fence, &tracker->sync_fence_waiter);
+	}
+
+	/* Sleep until all sync fence callbacks are done and all timelines are empty. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline);
+	}
+}
+
+#endif /* defined(CONFIG_SYNC) */
+
+void mali_timeline_system_abort(struct mali_timeline_system *system)
+{
+	MALI_DEBUG_CODE(u32 tid = _mali_osk_get_tid(););
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: Aborting timeline system for session 0x%08X.\n", system->session));
+
+#if defined(CONFIG_SYNC)
+	mali_timeline_cancel_sync_fence_waiters(system);
+#endif /* defined(CONFIG_SYNC) */
+
+	/* Should not be any waiters or trackers left at this point. */
+	MALI_DEBUG_CODE({
+		u32 i;
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i)
+		{
+			struct mali_timeline *timeline = system->timelines[i];
+			MALI_DEBUG_ASSERT_POINTER(timeline);
+			MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+			MALI_DEBUG_ASSERT(NULL == timeline->tracker_head);
+			MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+			MALI_DEBUG_ASSERT(NULL == timeline->waiter_head);
+			MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail);
+		}
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+	});
+}
+
+void mali_timeline_system_destroy(struct mali_timeline_system *system)
+{
+	u32 i;
+	struct mali_timeline_waiter *waiter, *next;
+#if defined(CONFIG_SYNC)
+	u32 tid = _mali_osk_get_tid();
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: destroying timeline system\n"));
+
+	if (NULL != system) {
+
+		/* There should be no waiters left on this queue. */
+		if (NULL != system->wait_queue) {
+			_mali_osk_wait_queue_term(system->wait_queue);
+			system->wait_queue = NULL;
+		}
+
+		/* Free all waiters in empty list */
+		waiter = system->waiter_empty_list;
+		while (NULL != waiter) {
+			next = waiter->tracker_next;
+			_mali_osk_free(waiter);
+			waiter = next;
+		}
+
+#if defined(CONFIG_SYNC)
+		if (NULL != system->signaled_sync_tl) {
+			sync_timeline_destroy(system->signaled_sync_tl);
+		}
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if ((NULL != system->timelines[i]) && (NULL != system->timelines[i]->spinlock)) {
+				mali_spinlock_reentrant_wait(system->timelines[i]->spinlock, tid);
+				system->timelines[i]->destroyed = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->timelines[i]->spinlock, tid);
+			}
+		}
+#endif /* defined(CONFIG_SYNC) */
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if (NULL != system->timelines[i]) {
+				mali_timeline_destroy(system->timelines[i]);
+			}
+		}
+
+		if (NULL != system->spinlock) {
+			mali_spinlock_reentrant_term(system->spinlock);
+		}
+
+		_mali_osk_free(system);
+	}
+}
+
+/**
+ * Find how many waiters are needed for a given fence.
+ *
+ * @param fence The fence to check.
+ * @return Number of waiters needed for fence.
+ */
+static u32 mali_timeline_fence_num_waiters(struct mali_timeline_fence *fence)
+{
+	u32 i, num_waiters = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		if (MALI_TIMELINE_NO_POINT != fence->points[i]) {
+			++num_waiters;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	if (-1 != fence->sync_fd) ++num_waiters;
+#endif /* defined(CONFIG_SYNC) */
+
+	return num_waiters;
+}
+
+static struct mali_timeline_waiter *mali_timeline_system_get_zeroed_waiter(struct mali_timeline_system *system)
+{
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	waiter = system->waiter_empty_list;
+	if (NULL != waiter) {
+		/* Remove waiter from empty list and zero it */
+		system->waiter_empty_list = waiter->tracker_next;
+		_mali_osk_memset(waiter, 0, sizeof(*waiter));
+	}
+
+	/* Return NULL if list was empty. */
+	return waiter;
+}
+
+static void mali_timeline_system_allocate_waiters(struct mali_timeline_system *system,
+		struct mali_timeline_waiter **tail,
+		struct mali_timeline_waiter **head,
+		int max_num_waiters)
+{
+	u32 i, tid = _mali_osk_get_tid();
+	mali_bool do_alloc;
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT_POINTER(head);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	*head = *tail = NULL;
+	do_alloc = MALI_FALSE;
+	i = 0;
+	while (i < max_num_waiters) {
+		if (MALI_FALSE == do_alloc) {
+			waiter = mali_timeline_system_get_zeroed_waiter(system);
+			if (NULL == waiter) {
+				do_alloc = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+				continue;
+			}
+		} else {
+			waiter = _mali_osk_calloc(1, sizeof(struct mali_timeline_waiter));
+			if (NULL == waiter) break;
+		}
+		++i;
+		if (NULL == *tail) {
+			*tail = waiter;
+			*head = waiter;
+		} else {
+			(*head)->tracker_next = waiter;
+			*head = waiter;
+		}
+	}
+	if (MALI_TRUE == do_alloc) {
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+	}
+}
+
+/**
+ * Create waiters for the given tracker. The tracker is activated when all waiters are release.
+ *
+ * @note Tracker can potentially be activated before this function returns.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker we will create waiters for.
+ * @param waiter_tail List of pre-allocated waiters.
+ * @param waiter_head List of pre-allocated waiters.
+ */
+static void mali_timeline_system_create_waiters_and_unlock(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		struct mali_timeline_waiter *waiter_tail,
+		struct mali_timeline_waiter *waiter_head)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *sync_fence = NULL;
+#endif /* defined(CONFIG_SYNC) */
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_head);
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+	MALI_DEBUG_ASSERT(NULL != tracker->job);
+
+	/* Creating waiter object for all the timelines the fence is put on. Inserting this waiter
+	 * into the timelines sorted list of waiters */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		mali_timeline_point point;
+		struct mali_timeline *timeline;
+		struct mali_timeline_waiter *waiter;
+
+		/* Get point on current timeline from tracker's fence. */
+		point = tracker->fence.points[i];
+
+		if (likely(MALI_TIMELINE_NO_POINT == point)) {
+			/* Fence contains no point on this timeline so we don't need a waiter. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n",
+					  point, timeline->point_oldest, timeline->point_next));
+			continue;
+		}
+
+		if (likely(mali_timeline_is_point_released(timeline, point))) {
+			/* Tracker representing the point has been released so we don't need a
+			 * waiter. */
+			continue;
+		}
+
+		/* The point is on timeline. */
+		MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, point));
+
+		/* Get a new zeroed waiter object. */
+		if (likely(NULL != waiter_tail)) {
+			waiter = waiter_tail;
+			waiter_tail = waiter_tail->tracker_next;
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			continue;
+		}
+
+		/* Yanking the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = point;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (NULL == tracker->waiter_head) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Add waiter to timeline. */
+		mali_timeline_insert_waiter(timeline, waiter);
+	}
+#if defined(CONFIG_SYNC)
+	if (-1 != tracker->fence.sync_fd) {
+		int ret;
+		struct mali_timeline_waiter *waiter;
+
+		sync_fence = sync_fence_fdget(tracker->fence.sync_fd);
+		if (unlikely(NULL == sync_fence)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", tracker->fence.sync_fd));
+			goto exit;
+		}
+
+		/* Check if we have a zeroed waiter object available. */
+		if (unlikely(NULL == waiter_tail)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			goto exit;
+		}
+
+		/* Start asynchronous wait that will release waiter when the fence is signaled. */
+		sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback);
+		ret = sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter);
+		if (1 == ret) {
+			/* Fence already signaled, no waiter needed. */
+			tracker->fence.sync_fd = -1;
+			goto exit;
+		} else if (0 != ret) {
+			MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, ret));
+			tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+			goto exit;
+		}
+
+		/* Grab new zeroed waiter object. */
+		waiter = waiter_tail;
+		waiter_tail = waiter_tail->tracker_next;
+
+		/* Increase the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = MALI_TIMELINE_NO_POINT;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (NULL == tracker->waiter_head) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Also store waiter in separate field for easy access by sync callback. */
+		tracker->waiter_sync = waiter;
+
+		/* Store the sync fence in tracker so we can retrieve in abort session, if needed. */
+		tracker->sync_fence = sync_fence;
+
+		sync_fence = NULL;
+	}
+exit:
+#endif /* defined(CONFIG_SYNC) */
+
+	if (NULL != waiter_tail) {
+		mali_timeline_system_release_waiter_list(system, waiter_tail, waiter_head);
+	}
+
+	/* Release the initial trigger ref count. */
+	tracker->trigger_ref_count--;
+
+	/* If there were no waiters added to this tracker we activate immediately. */
+	if (0 == tracker->trigger_ref_count) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC)
+	if (NULL != sync_fence) {
+		sync_fence_put(sync_fence);
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id)
+{
+	int num_waiters = 0;
+	struct mali_timeline_waiter *waiter_tail, *waiter_head;
+	u32 tid = _mali_osk_get_tid();
+	mali_timeline_point point = MALI_TIMELINE_NO_POINT;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == system->session->is_aborting);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > tracker->type);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: adding tracker for job %p, timeline: %d\n", tracker->job, timeline_id));
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->system = system;
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	num_waiters = mali_timeline_fence_num_waiters(&tracker->fence);
+
+	/* Allocate waiters. */
+	mali_timeline_system_allocate_waiters(system, &waiter_tail, &waiter_head, num_waiters);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Add tracker to timeline.  This will allocate a point for the tracker on the timeline. If
+	 * timeline ID is MALI_TIMELINE_NONE the tracker will NOT be added to a timeline and the
+	 * point will be MALI_TIMELINE_NO_POINT.
+	 *
+	 * NOTE: the tracker can fail to be added if the timeline is full.  If this happens, the
+	 * point will be MALI_TIMELINE_NO_POINT. */
+	MALI_DEBUG_ASSERT(timeline_id < MALI_TIMELINE_MAX || timeline_id == MALI_TIMELINE_NONE);
+	if (likely(timeline_id < MALI_TIMELINE_MAX)) {
+		struct mali_timeline *timeline = system->timelines[timeline_id];
+		mali_timeline_insert_tracker(timeline, tracker);
+		MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	}
+
+	point = tracker->point;
+
+	/* Create waiters for tracker based on supplied fence.  Each waiter will increase the
+	 * trigger ref count. */
+	mali_timeline_system_create_waiters_and_unlock(system, tracker, waiter_tail, waiter_head);
+	tracker = NULL;
+
+	/* At this point the tracker object might have been freed so we should no longer
+	 * access it. */
+
+
+	/* The tracker will always be activated after calling add_tracker, even if NO_POINT is
+	 * returned. */
+	return point;
+}
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter)
+{
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker = waiter->tracker;
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	/* At this point the waiter has been removed from the timeline's waiter list, but it is
+	 * still on the tracker's waiter list.  All of the tracker's waiters will be released when
+	 * the tracker is activated. */
+
+	waiter->point   = MALI_TIMELINE_NO_POINT;
+	waiter->tracker = NULL;
+
+	tracker->trigger_ref_count--;
+	if (0 == tracker->trigger_ref_count) {
+		/* This was the last waiter; activate tracker */
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	return schedule_mask;
+}
+
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id)
+{
+	mali_timeline_point point;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	if (MALI_TIMELINE_MAX <= timeline_id) {
+		return MALI_TIMELINE_NO_POINT;
+	}
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	timeline = system->timelines[timeline_id];
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	point = MALI_TIMELINE_NO_POINT;
+	if (timeline->point_oldest != timeline->point_next) {
+		point = timeline->point_next - 1;
+		if (MALI_TIMELINE_NO_POINT == point) point--;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return point;
+}
+
+void mali_timeline_initialize(void)
+{
+	_mali_osk_atomic_init(&gp_tracker_count, 0);
+	_mali_osk_atomic_init(&phy_pp_tracker_count, 0);
+	_mali_osk_atomic_init(&virt_pp_tracker_count, 0);
+}
+
+void mali_timeline_terminate(void)
+{
+	_mali_osk_atomic_term(&gp_tracker_count);
+	_mali_osk_atomic_term(&phy_pp_tracker_count);
+	_mali_osk_atomic_term(&virt_pp_tracker_count);
+}
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+static mali_bool is_waiting_on_timeline(struct mali_timeline_tracker *tracker, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT_POINTER(tracker->timeline);
+	timeline = tracker->timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline->system);
+	system = timeline->system;
+
+	if (MALI_TIMELINE_MAX > id) {
+		if (MALI_TIMELINE_NO_POINT != tracker->fence.points[id]) {
+			return mali_timeline_is_point_on(system->timelines[id], tracker->fence.points[id]);
+		} else {
+			return MALI_FALSE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_NONE == id);
+		return MALI_FALSE;
+	}
+}
+
+static const char *timeline_id_to_string(enum mali_timeline_id id)
+{
+	switch (id) {
+	case MALI_TIMELINE_GP:
+		return "GP";
+	case MALI_TIMELINE_PP:
+		return "PP";
+	case MALI_TIMELINE_SOFT:
+		return "SOFT";
+	default:
+		return "NONE";
+	}
+}
+
+static const char *timeline_tracker_type_to_string(enum mali_timeline_tracker_type type)
+{
+	switch (type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		return "GP";
+	case MALI_TIMELINE_TRACKER_PP:
+		return "PP";
+	case MALI_TIMELINE_TRACKER_SOFT:
+		return "SOFT";
+	case MALI_TIMELINE_TRACKER_WAIT:
+		return "WAIT";
+	case MALI_TIMELINE_TRACKER_SYNC:
+		return "SYNC";
+	default:
+		return "INVALID";
+	}
+}
+
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	timeline = tracker->timeline;
+
+	if (0 != tracker->trigger_ref_count) {
+		return MALI_TIMELINE_TS_WAITING;
+	}
+
+	if (timeline && (timeline->tracker_tail == tracker || NULL != tracker->timeline_prev)) {
+		return MALI_TIMELINE_TS_ACTIVE;
+	}
+
+	if (timeline && (MALI_TIMELINE_NO_POINT == tracker->point)) {
+		return MALI_TIMELINE_TS_INIT;
+	}
+
+	return MALI_TIMELINE_TS_FINISH;
+}
+
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC)
+	if (0 != tracker->trigger_ref_count) {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+				    tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job));
+	} else {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char,
+				    tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job));
+	}
+#else
+	if (0 != tracker->trigger_ref_count) {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+				    (unsigned int)(uintptr_t)(tracker->job));
+	} else {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char,
+				    (unsigned int)(uintptr_t)(tracker->job));
+	}
+#endif
+}
+
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (NULL != tracker) {
+		mali_timeline_debug_print_tracker(tracker, print_ctx);
+		tracker = tracker->timeline_next;
+	}
+}
+
+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0))
+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC)
+	if (0 != tracker->trigger_ref_count) {
+		MALI_PRINT(("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+			    tracker->fence.sync_fd, tracker->sync_fence, tracker->job));
+	} else {
+		MALI_PRINT(("TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char,
+			    tracker->fence.sync_fd, tracker->sync_fence, tracker->job));
+	}
+#else
+	if (0 != tracker->trigger_ref_count) {
+		MALI_PRINT(("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+			    tracker->job));
+	} else {
+		MALI_PRINT(("TL:  %s %u %c  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char,
+			    tracker->job));
+	}
+#endif
+}
+
+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (NULL != tracker) {
+		mali_timeline_debug_direct_print_tracker(tracker);
+		tracker = tracker->timeline_next;
+	}
+}
+
+#endif
+
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx)
+{
+	int i;
+	int num_printed = 0;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Print all timelines */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (NULL == timeline->tracker_head) continue;
+
+		_mali_osk_ctxprintf(print_ctx, "TL: Timeline %s:\n",
+				    timeline_id_to_string((enum mali_timeline_id)i));
+
+		mali_timeline_debug_print_timeline(timeline, print_ctx);
+		num_printed++;
+	}
+
+	if (0 == num_printed) {
+		_mali_osk_ctxprintf(print_ctx, "TL: All timelines empty\n");
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
diff --git a/utgard/r6p1/common/mali_timeline.h b/utgard/r6p1/common/mali_timeline.h
new file mode 100755
index 0000000..268676a
--- /dev/null
+++ b/utgard/r6p1/common/mali_timeline.h
@@ -0,0 +1,535 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMELINE_H__
+#define __MALI_TIMELINE_H__
+
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+#include "mali_sync.h"
+#include "mali_scheduler_types.h"
+#include <linux/version.h>
+
+/**
+ * Soft job timeout.
+ *
+ * Soft jobs have to be signaled as complete after activation.  Normally this is done by user space,
+ * but in order to guarantee that every soft job is completed, we also have a timer.
+ */
+#define MALI_TIMELINE_TIMEOUT_HZ ((unsigned long) (HZ * 3 / 2)) /* 1500 ms. */
+
+/**
+ * Timeline type.
+ */
+typedef enum mali_timeline_id {
+	MALI_TIMELINE_GP   = MALI_UK_TIMELINE_GP,   /**< GP job timeline. */
+	MALI_TIMELINE_PP   = MALI_UK_TIMELINE_PP,   /**< PP job timeline. */
+	MALI_TIMELINE_SOFT = MALI_UK_TIMELINE_SOFT, /**< Soft job timeline. */
+	MALI_TIMELINE_MAX  = MALI_UK_TIMELINE_MAX
+} mali_timeline_id;
+
+/**
+ * Used by trackers that should not be added to a timeline (@ref mali_timeline_system_add_tracker).
+ */
+#define MALI_TIMELINE_NONE MALI_TIMELINE_MAX
+
+/**
+ * Tracker type.
+ */
+typedef enum mali_timeline_tracker_type {
+	MALI_TIMELINE_TRACKER_GP   = 0, /**< Tracker used by GP jobs. */
+	MALI_TIMELINE_TRACKER_PP   = 1, /**< Tracker used by PP jobs. */
+	MALI_TIMELINE_TRACKER_SOFT = 2, /**< Tracker used by soft jobs. */
+	MALI_TIMELINE_TRACKER_WAIT = 3, /**< Tracker used for fence wait. */
+	MALI_TIMELINE_TRACKER_SYNC = 4, /**< Tracker used for sync fence. */
+	MALI_TIMELINE_TRACKER_MAX  = 5,
+} mali_timeline_tracker_type;
+
+/**
+ * Tracker activation error.
+ */
+typedef u32 mali_timeline_activation_error;
+#define MALI_TIMELINE_ACTIVATION_ERROR_NONE      0
+#define MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT  (1<<1)
+#define MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT (1<<0)
+
+/**
+ * Type used to represent a point on a timeline.
+ */
+typedef u32 mali_timeline_point;
+
+/**
+ * Used to represent that no point on a timeline.
+ */
+#define MALI_TIMELINE_NO_POINT ((mali_timeline_point) 0)
+
+/**
+ * The maximum span of points on a timeline.  A timeline will be considered full if the difference
+ * between the oldest and newest points is equal or larger to this value.
+ */
+#define MALI_TIMELINE_MAX_POINT_SPAN 65536
+
+/**
+ * Magic value used to assert on validity of trackers.
+ */
+#define MALI_TIMELINE_TRACKER_MAGIC 0xabcdabcd
+
+struct mali_timeline;
+struct mali_timeline_waiter;
+struct mali_timeline_tracker;
+
+/**
+ * Timeline fence.
+ */
+struct mali_timeline_fence {
+	mali_timeline_point points[MALI_TIMELINE_MAX]; /**< For each timeline, a point or MALI_TIMELINE_NO_POINT. */
+	s32                 sync_fd;                   /**< A file descriptor representing a sync fence, or -1. */
+};
+
+/**
+ * Timeline system.
+ *
+ * The Timeline system has a set of timelines associated with a session.
+ */
+struct mali_timeline_system {
+	struct mali_spinlock_reentrant *spinlock;   /**< Spin lock protecting the timeline system */
+	struct mali_timeline           *timelines[MALI_TIMELINE_MAX]; /**< The timelines in this system */
+
+	/* Single-linked list of unused waiter objects.  Uses the tracker_next field in tracker. */
+	struct mali_timeline_waiter    *waiter_empty_list;
+
+	struct mali_session_data       *session;    /**< Session that owns this system. */
+
+	mali_bool                       timer_enabled; /**< Set to MALI_TRUE if soft job timer should be enabled, MALI_FALSE if not. */
+
+	_mali_osk_wait_queue_t         *wait_queue; /**< Wait queue. */
+
+#if defined(CONFIG_SYNC)
+	struct sync_timeline           *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */
+#endif /* defined(CONFIG_SYNC) */
+};
+
+/**
+ * Timeline.  Each Timeline system will have MALI_TIMELINE_MAX timelines.
+ */
+struct mali_timeline {
+	mali_timeline_point           point_next;   /**< The next available point. */
+	mali_timeline_point           point_oldest; /**< The oldest point not released. */
+
+	/* Double-linked list of trackers.  Sorted in ascending order by tracker->time_number with
+	 * tail pointing to the tracker with the oldest time. */
+	struct mali_timeline_tracker *tracker_head;
+	struct mali_timeline_tracker *tracker_tail;
+
+	/* Double-linked list of waiters.  Sorted in ascending order by waiter->time_number_wait
+	 * with tail pointing to the waiter with oldest wait time. */
+	struct mali_timeline_waiter  *waiter_head;
+	struct mali_timeline_waiter  *waiter_tail;
+
+	struct mali_timeline_system  *system;       /**< Timeline system this timeline belongs to. */
+	enum mali_timeline_id         id;           /**< Timeline type. */
+
+#if defined(CONFIG_SYNC)
+	struct sync_timeline         *sync_tl;      /**< Sync timeline that corresponds to this timeline. */
+	mali_bool destroyed;
+	struct mali_spinlock_reentrant *spinlock;       /**< Spin lock protecting the timeline system */
+#endif /* defined(CONFIG_SYNC) */
+
+	/* The following fields are used to time out soft job trackers. */
+	_mali_osk_wq_delayed_work_t  *delayed_work;
+	mali_bool                     timer_active;
+};
+
+/**
+ * Timeline waiter.
+ */
+struct mali_timeline_waiter {
+	mali_timeline_point           point;         /**< Point on timeline we are waiting for to be released. */
+	struct mali_timeline_tracker *tracker;       /**< Tracker that is waiting. */
+
+	struct mali_timeline_waiter  *timeline_next; /**< Next waiter on timeline's waiter list. */
+	struct mali_timeline_waiter  *timeline_prev; /**< Previous waiter on timeline's waiter list. */
+
+	struct mali_timeline_waiter  *tracker_next;  /**< Next waiter on tracker's waiter list. */
+};
+
+/**
+ * Timeline tracker.
+ */
+struct mali_timeline_tracker {
+	MALI_DEBUG_CODE(u32            magic); /**< Should always be MALI_TIMELINE_TRACKER_MAGIC for a valid tracker. */
+
+	mali_timeline_point            point; /**< Point on timeline for this tracker */
+
+	struct mali_timeline_tracker  *timeline_next; /**< Next tracker on timeline's tracker list */
+	struct mali_timeline_tracker  *timeline_prev; /**< Previous tracker on timeline's tracker list */
+
+	u32                            trigger_ref_count; /**< When zero tracker will be activated */
+	mali_timeline_activation_error activation_error;  /**< Activation error. */
+	struct mali_timeline_fence     fence;             /**< Fence used to create this tracker */
+
+	/* Single-linked list of waiters.  Sorted in order of insertions with
+	 * tail pointing to first waiter. */
+	struct mali_timeline_waiter   *waiter_head;
+	struct mali_timeline_waiter   *waiter_tail;
+
+#if defined(CONFIG_SYNC)
+	/* These are only used if the tracker is waiting on a sync fence. */
+	struct mali_timeline_waiter   *waiter_sync; /**< A direct pointer to timeline waiter representing sync fence. */
+	struct sync_fence_waiter       sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */
+	struct sync_fence             *sync_fence;   /**< The sync fence this tracker is waiting on. */
+	_mali_osk_list_t               sync_fence_cancel_list; /**< List node used to cancel sync fence waiters. */
+#endif /* defined(CONFIG_SYNC) */
+
+	struct mali_timeline_system   *system;       /**< Timeline system. */
+	struct mali_timeline          *timeline;     /**< Timeline, or NULL if not on a timeline. */
+	enum mali_timeline_tracker_type type;        /**< Type of tracker. */
+	void                          *job;          /**< Owner of tracker. */
+
+	/* The following fields are used to time out soft job trackers. */
+	unsigned long                 os_tick_create;
+	unsigned long                 os_tick_activate;
+	mali_bool                     timer_active;
+};
+
+extern _mali_osk_atomic_t gp_tracker_count;
+extern _mali_osk_atomic_t phy_pp_tracker_count;
+extern _mali_osk_atomic_t virt_pp_tracker_count;
+
+/**
+ * What follows is a set of functions to check the state of a timeline and to determine where on a
+ * timeline a given point is.  Most of these checks will translate the timeline so the oldest point
+ * on the timeline is aligned with zero.  Remember that all of these calculation are done on
+ * unsigned integers.
+ *
+ * The following example illustrates the three different states a point can be in.  The timeline has
+ * been translated to put the oldest point at zero:
+ *
+ *
+ *
+ *                               [ point is in forbidden zone ]
+ *                                          64k wide
+ *                                MALI_TIMELINE_MAX_POINT_SPAN
+ *
+ *    [ point is on timeline     )                            ( point is released ]
+ *
+ *    0--------------------------##############################--------------------2^32 - 1
+ *    ^                          ^
+ *    \                          |
+ *     oldest point on timeline  |
+ *                               \
+ *                                next point on timeline
+ */
+
+/**
+ * Compare two timeline points
+ *
+ * Returns true if a is after b, false if a is before or equal to b.
+ *
+ * This funcion ignores MALI_TIMELINE_MAX_POINT_SPAN. Wrapping is supported and
+ * the result will be correct if the points is less then UINT_MAX/2 apart.
+ *
+ * @param a Point on timeline
+ * @param b Point on timeline
+ * @return MALI_TRUE if a is after b
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_point_after(mali_timeline_point a, mali_timeline_point b)
+{
+	return 0 > ((s32)b) - ((s32)a);
+}
+
+/**
+ * Check if a point is on timeline.  A point is on a timeline if it is greater than, or equal to,
+ * the oldest point, and less than the next point.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is on timeline, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_on(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	return (point - timeline->point_oldest) < (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Check if a point has been released.  A point is released if it is older than the oldest point on
+ * the timeline, newer than the next point, and also not in the forbidden zone.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point has been release, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_released(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	mali_timeline_point point_normalized;
+	mali_timeline_point next_normalized;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	point_normalized = point - timeline->point_oldest;
+	next_normalized = timeline->point_next - timeline->point_oldest;
+
+	return point_normalized > (next_normalized + MALI_TIMELINE_MAX_POINT_SPAN);
+}
+
+/**
+ * Check if a point is valid.  A point is valid if is on the timeline or has been released.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is valid, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_valid(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return mali_timeline_is_point_on(timeline, point) || mali_timeline_is_point_released(timeline, point);
+}
+
+/**
+ * Check if timeline is empty (has no points on it).  A timeline is empty if next == oldest.
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is empty, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_empty(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return timeline->point_next == timeline->point_oldest;
+}
+
+/**
+ * Check if timeline is full.  A valid timeline cannot span more than 64k points (@ref
+ * MALI_TIMELINE_MAX_POINT_SPAN).
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is full, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_full(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return MALI_TIMELINE_MAX_POINT_SPAN <= (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Create a new timeline system.
+ *
+ * @param session The session this timeline system will belong to.
+ * @return New timeline system.
+ */
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session);
+
+/**
+ * Abort timeline system.
+ *
+ * This will release all pending waiters in the timeline system causing all trackers to be
+ * activated.
+ *
+ * @param system Timeline system to abort all jobs from.
+ */
+void mali_timeline_system_abort(struct mali_timeline_system *system);
+
+/**
+ * Destroy an empty timeline system.
+ *
+ * @note @ref mali_timeline_system_abort() should be called prior to this function.
+ *
+ * @param system Timeline system to destroy.
+ */
+void mali_timeline_system_destroy(struct mali_timeline_system *system);
+
+/**
+ * Stop the soft job timer.
+ *
+ * @param system Timeline system
+ */
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system);
+
+/**
+ * Add a tracker to a timeline system and optionally also on a timeline.
+ *
+ * Once added to the timeline system, the tracker is guaranteed to be activated.  The tracker can be
+ * activated before this function returns.  Thus, it is also possible that the tracker is released
+ * before this function returns, depending on the tracker type.
+ *
+ * @note Tracker must be initialized (@ref mali_timeline_tracker_init) before being added to the
+ * timeline system.
+ *
+ * @param system Timeline system the tracker will be added to.
+ * @param tracker The tracker to be added.
+ * @param timeline_id Id of the timeline the tracker will be added to, or
+ *                    MALI_TIMELINE_NONE if it should not be added on a timeline.
+ * @return Point on timeline identifying this tracker, or MALI_TIMELINE_NO_POINT if not on timeline.
+ */
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Get latest point on timeline.
+ *
+ * @param system Timeline system.
+ * @param timeline_id Id of timeline to get latest point from.
+ * @return Latest point on timeline, or MALI_TIMELINE_NO_POINT if the timeline is empty.
+ */
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Initialize tracker.
+ *
+ * Must be called before tracker is added to timeline system (@ref mali_timeline_system_add_tracker).
+ *
+ * @param tracker Tracker to initialize.
+ * @param type Type of tracker.
+ * @param fence Fence used to set up dependencies for tracker.
+ * @param job Pointer to job struct this tracker is associated with.
+ */
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job);
+
+/**
+ * Grab trigger ref count on tracker.
+ *
+ * This will prevent tracker from being activated until the trigger ref count reaches zero.
+ *
+ * @note Tracker must have been initialized (@ref mali_timeline_tracker_init).
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ */
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker);
+
+/**
+ * Release trigger ref count on tracker.
+ *
+ * If the trigger ref count reaches zero, the tracker will be activated.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ * @param activation_error Error bitmask if activated with error, or MALI_TIMELINE_ACTIVATION_ERROR_NONE if no error.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error);
+
+/**
+ * Release a tracker from the timeline system.
+ *
+ * This is used to signal that the job being tracker is finished, either due to normal circumstances
+ * (job complete/abort) or due to a timeout.
+ *
+ * We may need to schedule some subsystems after a tracker has been released and the returned
+ * bitmask will tell us if it is necessary.  If the return value is non-zero, this value needs to be
+ * sent as an input parameter to @ref mali_scheduler_schedule_from_mask() to do the scheduling.
+ *
+ * @note Tracker must have been activated before being released.
+ * @warning Not calling @ref mali_scheduler_schedule_from_mask() after releasing a tracker can lead
+ * to a deadlock.
+ *
+ * @param tracker Tracker being released.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_tracker_activation_error(
+	struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	return (MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT &
+		tracker->activation_error) ? MALI_TRUE : MALI_FALSE;
+}
+
+/**
+ * Copy data from a UK fence to a Timeline fence.
+ *
+ * @param fence Timeline fence.
+ * @param uk_fence UK fence.
+ */
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence);
+
+void mali_timeline_initialize(void);
+
+void mali_timeline_terminate(void);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_gp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&gp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_physical_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&phy_pp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_virtual_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&virt_pp_tracker_count);
+}
+
+#if defined(DEBUG)
+#define MALI_TIMELINE_DEBUG_FUNCTIONS
+#endif /* DEBUG */
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+/**
+ * Tracker state.  Used for debug printing.
+ */
+typedef enum mali_timeline_tracker_state {
+	MALI_TIMELINE_TS_INIT    = 0,
+	MALI_TIMELINE_TS_WAITING = 1,
+	MALI_TIMELINE_TS_ACTIVE  = 2,
+	MALI_TIMELINE_TS_FINISH  = 3,
+} mali_timeline_tracker_state;
+
+/**
+ * Get tracker state.
+ *
+ * @param tracker Tracker to check.
+ * @return State of tracker.
+ */
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker);
+
+/**
+ * Print debug information about tracker.
+ *
+ * @param tracker Tracker to print.
+ */
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx);
+
+/**
+ * Print debug information about timeline.
+ *
+ * @param timeline Timeline to print.
+ */
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx);
+
+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0))
+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker);
+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline);
+#endif
+
+/**
+ * Print debug information about timeline system.
+ *
+ * @param system Timeline system to print.
+ */
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx);
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
+
+#endif /* __MALI_TIMELINE_H__ */
diff --git a/utgard/r6p1/common/mali_timeline_fence_wait.c b/utgard/r6p1/common/mali_timeline_fence_wait.c
new file mode 100755
index 0000000..1f5d421
--- /dev/null
+++ b/utgard/r6p1/common/mali_timeline_fence_wait.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline_fence_wait.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+
+/**
+ * Allocate a fence waiter tracker.
+ *
+ * @return New fence waiter if successful, NULL if not.
+ */
+static struct mali_timeline_fence_wait_tracker *mali_timeline_fence_wait_tracker_alloc(void)
+{
+	return (struct mali_timeline_fence_wait_tracker *) _mali_osk_calloc(1, sizeof(struct mali_timeline_fence_wait_tracker));
+}
+
+/**
+ * Free fence waiter tracker.
+ *
+ * @param wait Fence wait tracker to free.
+ */
+static void mali_timeline_fence_wait_tracker_free(struct mali_timeline_fence_wait_tracker *wait)
+{
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	_mali_osk_atomic_term(&wait->refcount);
+	_mali_osk_free(wait);
+}
+
+/**
+ * Check if fence wait tracker has been activated.  Used as a wait queue condition.
+ *
+ * @param data Fence waiter.
+ * @return MALI_TRUE if tracker has been activated, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_tracker_is_activated(void *data)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+
+	wait = (struct mali_timeline_fence_wait_tracker *) data;
+	MALI_DEBUG_ASSERT_POINTER(wait);
+
+	return wait->activated;
+}
+
+/**
+ * Check if fence has been signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Timeline fence.
+ * @return MALI_TRUE if fence is signaled, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_check_status(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool ret = MALI_TRUE;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *sync_fence = NULL;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		mali_timeline_point   point;
+
+		point = fence->points[i];
+
+		if (likely(MALI_TIMELINE_NO_POINT == point)) {
+			/* Fence contains no point on this timeline. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n", point, timeline->point_oldest, timeline->point_next));
+		}
+
+		if (!mali_timeline_is_point_released(timeline, point)) {
+			ret = MALI_FALSE;
+			goto exit;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	if (-1 != fence->sync_fd) {
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+		if (likely(NULL != sync_fence)) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+			if (0 == sync_fence->status) {
+#else
+			if (0 == atomic_read(&sync_fence->status)) {
+#endif
+				ret = MALI_FALSE;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", fence->sync_fd));
+		}
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+exit:
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC)
+	if (NULL != sync_fence) {
+		sync_fence_put(sync_fence);
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	return ret;
+}
+
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+	mali_timeline_point point;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: wait on fence\n"));
+
+	if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY == timeout) {
+		return mali_timeline_fence_wait_check_status(system, fence);
+	}
+
+	wait = mali_timeline_fence_wait_tracker_alloc();
+	if (unlikely(NULL == wait)) {
+		MALI_PRINT_ERROR(("Mali Timeline: failed to allocate data for fence wait\n"));
+		return MALI_FALSE;
+	}
+
+	wait->activated = MALI_FALSE;
+	wait->system = system;
+
+	/* Initialize refcount to two references.  The reference first will be released by this
+	 * function after the wait is over.  The second reference will be released when the tracker
+	 * is activated. */
+	_mali_osk_atomic_init(&wait->refcount, 2);
+
+	/* Add tracker to timeline system, but not to a timeline. */
+	mali_timeline_tracker_init(&wait->tracker, MALI_TIMELINE_TRACKER_WAIT, fence, wait);
+	point = mali_timeline_system_add_tracker(system, &wait->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point);
+	MALI_IGNORE(point);
+
+	/* Wait for the tracker to be activated or time out. */
+	if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER == timeout) {
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait);
+	} else {
+		_mali_osk_wait_queue_wait_event_timeout(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait, timeout);
+	}
+
+	ret = wait->activated;
+
+	if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+
+	return ret;
+}
+
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *wait)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	MALI_DEBUG_ASSERT_POINTER(wait->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for fence wait tracker\n"));
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == wait->activated);
+	wait->activated = MALI_TRUE;
+
+	_mali_osk_wait_queue_wake_up(wait->system->wait_queue);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&wait->tracker);
+	MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask);
+	MALI_IGNORE(schedule_mask);
+
+	if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+}
diff --git a/utgard/r6p1/common/mali_timeline_fence_wait.h b/utgard/r6p1/common/mali_timeline_fence_wait.h
new file mode 100755
index 0000000..348652b
--- /dev/null
+++ b/utgard/r6p1/common/mali_timeline_fence_wait.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_fence_wait.h
+ *
+ * This file contains functions used to wait until a Timeline fence is signaled.
+ */
+
+#ifndef __MALI_TIMELINE_FENCE_WAIT_H__
+#define __MALI_TIMELINE_FENCE_WAIT_H__
+
+#include "mali_osk.h"
+#include "mali_timeline.h"
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, a timer is not used and the
+ * function only returns when the fence is signaled.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER ((u32) -1)
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, the function will return
+ * immediately with the current state of the fence.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY 0
+
+/**
+ * Fence wait tracker.
+ *
+ * The fence wait tracker is added to the Timeline system with the fence we are waiting on as a
+ * dependency.  We will then perform a blocking wait, possibly with a timeout, until the tracker is
+ * activated, which happens when the fence is signaled.
+ */
+struct mali_timeline_fence_wait_tracker {
+	mali_bool activated;                  /**< MALI_TRUE if the tracker has been activated, MALI_FALSE if not. */
+	_mali_osk_atomic_t refcount;          /**< Reference count. */
+	struct mali_timeline_system *system;  /**< Timeline system. */
+	struct mali_timeline_tracker tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Wait for a fence to be signaled, or timeout is reached.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to wait on.
+ * @param timeout Timeout in ms, or MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER or
+ * MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY.
+ * @return MALI_TRUE if signaled, MALI_FALSE if timed out.
+ */
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout);
+
+/**
+ * Used by the Timeline system to activate a fence wait tracker.
+ *
+ * @param fence_wait_tracker Fence waiter tracker.
+ */
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *fence_wait_tracker);
+
+#endif /* __MALI_TIMELINE_FENCE_WAIT_H__ */
diff --git a/utgard/r6p1/common/mali_timeline_sync_fence.c b/utgard/r6p1/common/mali_timeline_sync_fence.c
new file mode 100755
index 0000000..a83252b
--- /dev/null
+++ b/utgard/r6p1/common/mali_timeline_sync_fence.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline_sync_fence.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_sync.h"
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Creates a sync fence tracker and a sync fence.  Adds sync fence tracker to Timeline system and
+ * returns sync fence.  The sync fence will be signaled when the sync fence tracker is activated.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return Sync fence that will be signaled when tracker is activated.
+ */
+static struct sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	struct mali_timeline_sync_fence_tracker *sync_fence_tracker;
+	struct sync_fence                       *sync_fence;
+	struct mali_timeline_fence               fence;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	/* Allocate sync fence tracker. */
+	sync_fence_tracker = _mali_osk_calloc(1, sizeof(struct mali_timeline_sync_fence_tracker));
+	if (NULL == sync_fence_tracker) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence_tracker allocation failed\n"));
+		return NULL;
+	}
+
+	/* Create sync flag. */
+	MALI_DEBUG_ASSERT_POINTER(timeline->sync_tl);
+	sync_fence_tracker->flag = mali_sync_flag_create(timeline->sync_tl, point);
+	if (NULL == sync_fence_tracker->flag) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_flag creation failed\n"));
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Create sync fence from sync flag. */
+	sync_fence = mali_sync_flag_create_fence(sync_fence_tracker->flag);
+	if (NULL == sync_fence) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence creation failed\n"));
+		mali_sync_flag_put(sync_fence_tracker->flag);
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Setup fence for tracker. */
+	_mali_osk_memset(&fence, 0, sizeof(struct mali_timeline_fence));
+	fence.sync_fd = -1;
+	fence.points[timeline->id] = point;
+
+	/* Finally, add the tracker to Timeline system. */
+	mali_timeline_tracker_init(&sync_fence_tracker->tracker, MALI_TIMELINE_TRACKER_SYNC, &fence, sync_fence_tracker);
+	point = mali_timeline_system_add_tracker(timeline->system, &sync_fence_tracker->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point);
+
+	return sync_fence;
+}
+
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	u32 i;
+	struct sync_fence *sync_fence_acc = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		struct sync_fence *sync_fence;
+
+		if (MALI_TIMELINE_NO_POINT == fence->points[i]) continue;
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		sync_fence = mali_timeline_sync_fence_create_and_add_tracker(timeline, fence->points[i]);
+		if (NULL == sync_fence) goto error;
+
+		if (NULL != sync_fence_acc) {
+			/* Merge sync fences. */
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (NULL == sync_fence_acc) goto error;
+		} else {
+			/* This was the first sync fence created. */
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (-1 != fence->sync_fd) {
+		struct sync_fence *sync_fence;
+
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+		if (NULL == sync_fence) goto error;
+
+		if (NULL != sync_fence_acc) {
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (NULL == sync_fence_acc) goto error;
+		} else {
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (NULL == sync_fence_acc) {
+		MALI_DEBUG_ASSERT_POINTER(system->signaled_sync_tl);
+
+		/* There was nothing to wait on, so return an already signaled fence. */
+
+		sync_fence_acc = mali_sync_timeline_create_signaled_fence(system->signaled_sync_tl);
+		if (NULL == sync_fence_acc) goto error;
+	}
+
+	/* Return file descriptor for the accumulated sync fence. */
+	return mali_sync_fence_fd_alloc(sync_fence_acc);
+
+error:
+	if (NULL != sync_fence_acc) {
+		sync_fence_put(sync_fence_acc);
+	}
+
+	return -1;
+}
+
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker->flag);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for sync fence tracker\n"));
+
+	/* Signal flag and release reference. */
+	mali_sync_flag_signal(sync_fence_tracker->flag, 0);
+	mali_sync_flag_put(sync_fence_tracker->flag);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&sync_fence_tracker->tracker);
+	MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask);
+
+	_mali_osk_free(sync_fence_tracker);
+}
+
+#endif /* defined(CONFIG_SYNC) */
diff --git a/utgard/r6p1/common/mali_timeline_sync_fence.h b/utgard/r6p1/common/mali_timeline_sync_fence.h
new file mode 100755
index 0000000..6662b25
--- /dev/null
+++ b/utgard/r6p1/common/mali_timeline_sync_fence.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_sync_fence.h
+ *
+ * This file contains code related to creating sync fences from timeline fences.
+ */
+
+#ifndef __MALI_TIMELINE_SYNC_FENCE_H__
+#define __MALI_TIMELINE_SYNC_FENCE_H__
+
+#include "mali_timeline.h"
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Sync fence tracker.
+ */
+struct mali_timeline_sync_fence_tracker {
+	struct mali_sync_flag        *flag;    /**< Sync flag used to connect tracker and sync fence. */
+	struct mali_timeline_tracker  tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Create a sync fence that will be signaled when @ref fence is signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to create sync fence from.
+ * @return File descriptor for new sync fence, or -1 on error.
+ */
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence);
+
+/**
+ * Used by the Timeline system to activate a sync fence tracker.
+ *
+ * @param sync_fence_tracker Sync fence tracker.
+ *
+ */
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker);
+
+#endif /* defined(CONFIG_SYNC) */
+
+#endif /* __MALI_TIMELINE_SYNC_FENCE_H__ */
diff --git a/utgard/r6p1/common/mali_ukk.h b/utgard/r6p1/common/mali_ukk.h
new file mode 100755
index 0000000..79829b9
--- /dev/null
+++ b/utgard/r6p1/common/mali_ukk.h
@@ -0,0 +1,551 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk.h
+ * Defines the kernel-side interface of the user-kernel interface
+ */
+
+#ifndef __MALI_UKK_H__
+#define __MALI_UKK_H__
+
+#include "mali_osk.h"
+#include "mali_uk_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * - The _mali_uk functions are an abstraction of the interface to the device
+ * driver. On certain OSs, this would be implemented via the IOCTL interface.
+ * On other OSs, it could be via extension of some Device Driver Class, or
+ * direct function call for Bare metal/RTOSs.
+ * - It is important to note that:
+ *   -  The Device Driver has implemented the _mali_ukk set of functions
+ *   -  The Base Driver calls the corresponding set of _mali_uku functions.
+ * - What requires porting is solely the calling mechanism from User-side to
+ * Kernel-side, and propagating back the results.
+ * - Each U/K function is associated with a (group, number) pair from
+ * \ref _mali_uk_functions to make it possible for a common function in the
+ * Base Driver and Device Driver to route User/Kernel calls from/to the
+ * correct _mali_uk function. For example, in an IOCTL system, the IOCTL number
+ * would be formed based on the group and number assigned to the _mali_uk
+ * function, as listed in \ref _mali_uk_functions. On the user-side, each
+ * _mali_uku function would just make an IOCTL with the IOCTL-code being an
+ * encoded form of the (group, number) pair. On the kernel-side, the Device
+ * Driver's IOCTL handler decodes the IOCTL-code back into a (group, number)
+ * pair, and uses this to determine which corresponding _mali_ukk should be
+ * called.
+ *   - Refer to \ref _mali_uk_functions for more information about this
+ * (group, number) pairing.
+ * - In a system where there is no distinction between user and kernel-side,
+ * the U/K interface may be implemented as:@code
+ * MALI_STATIC_INLINE _mali_osk_errcode_t _mali_uku_examplefunction( _mali_uk_examplefunction_s *args )
+ * {
+ *     return mali_ukk_examplefunction( args );
+ * }
+ * @endcode
+ * - Therefore, all U/K calls behave \em as \em though they were direct
+ * function calls (but the \b implementation \em need \em not be a direct
+ * function calls)
+ *
+ * @note Naming the _mali_uk functions the same on both User and Kernel sides
+ * on non-RTOS systems causes debugging issues when setting breakpoints. In
+ * this case, it is not clear which function the breakpoint is put on.
+ * Therefore the _mali_uk functions in user space are prefixed with \c _mali_uku
+ * and in kernel space with \c _mali_ukk. The naming for the argument
+ * structures is unaffected.
+ *
+ * - The _mali_uk functions are synchronous.
+ * - Arguments to the _mali_uk functions are passed in a structure. The only
+ * parameter passed to the _mali_uk functions is a pointer to this structure.
+ * This first member of this structure, ctx, is a pointer to a context returned
+ * by _mali_uku_open(). For example:@code
+ * typedef struct
+ * {
+ *     void *ctx;
+ *     u32 number_of_cores;
+ * } _mali_uk_get_gp_number_of_cores_s;
+ * @endcode
+ *
+ * - Each _mali_uk function has its own argument structure named after the
+ *  function. The argument is distinguished by the _s suffix.
+ * - The argument types are defined by the base driver and user-kernel
+ *  interface.
+ * - All _mali_uk functions return a standard \ref _mali_osk_errcode_t.
+ * - Only arguments of type input or input/output need be initialized before
+ * calling a _mali_uk function.
+ * - Arguments of type output and input/output are only valid when the
+ * _mali_uk function returns \ref _MALI_OSK_ERR_OK.
+ * - The \c ctx member is always invalid after it has been used by a
+ * _mali_uk function, except for the context management functions
+ *
+ *
+ * \b Interface \b restrictions
+ *
+ * The requirements of the interface mean that an implementation of the
+ * User-kernel interface may do no 'real' work. For example, the following are
+ * illegal in the User-kernel implementation:
+ * - Calling functions necessary for operation on all systems,  which would
+ * not otherwise get called on RTOS systems.
+ *     - For example, a  U/K interface that calls multiple _mali_ukk functions
+ * during one particular U/K call. This could not be achieved by the same code
+ * which uses direct function calls for the U/K interface.
+ * -  Writing in values to the args members, when otherwise these members would
+ * not hold a useful value for a direct function call U/K interface.
+ *     - For example, U/K interface implementation that take NULL members in
+ * their arguments structure from the user side, but those members are
+ * replaced with non-NULL values in the kernel-side of the U/K interface
+ * implementation. A scratch area for writing data is one such example. In this
+ * case, a direct function call U/K interface would segfault, because no code
+ * would be present to replace the NULL pointer with a meaningful pointer.
+ *     - Note that we discourage the case where the U/K implementation changes
+ * a NULL argument member to non-NULL, and then the Device Driver code (outside
+ * of the U/K layer) re-checks this member for NULL, and corrects it when
+ * necessary. Whilst such code works even on direct function call U/K
+ * intefaces, it reduces the testing coverage of the Device Driver code. This
+ * is because we have no way of testing the NULL == value path on an OS
+ * implementation.
+ *
+ * A number of allowable examples exist where U/K interfaces do 'real' work:
+ * - The 'pointer switching' technique for \ref _mali_ukk_get_system_info
+ *     - In this case, without the pointer switching on direct function call
+ * U/K interface, the Device Driver code still sees the same thing: a pointer
+ * to which it can write memory. This is because such a system has no
+ * distinction between a user and kernel pointer.
+ * - Writing an OS-specific value into the ukk_private member for
+ * _mali_ukk_mem_mmap().
+ *     - In this case, this value is passed around by Device Driver code, but
+ * its actual value is never checked. Device Driver code simply passes it from
+ * the U/K layer to the OSK layer, where it can be acted upon. In this case,
+ * \em some OS implementations of the U/K (_mali_ukk_mem_mmap()) and OSK
+ * (_mali_osk_mem_mapregion_init()) functions will collaborate on the
+ *  meaning of ukk_private member. On other OSs, it may be unused by both
+ * U/K and OSK layers
+ *     - Therefore, on error inside the U/K interface implementation itself,
+ * it will be as though the _mali_ukk function itself had failed, and cleaned
+ * up after itself.
+ *     - Compare this to a direct function call U/K implementation, where all
+ * error cleanup is handled by the _mali_ukk function itself. The direct
+ * function call U/K interface implementation is automatically atomic.
+ *
+ * The last example highlights a consequence of all U/K interface
+ * implementations: they must be atomic with respect to the Device Driver code.
+ * And therefore, should Device Driver code succeed but the U/K implementation
+ * fail afterwards (but before return to user-space), then the U/K
+ * implementation must cause appropriate cleanup actions to preserve the
+ * atomicity of the interface.
+ *
+ * @{
+ */
+
+
+/** @defgroup _mali_uk_context U/K Context management
+ *
+ * These functions allow for initialisation of the user-kernel interface once per process.
+ *
+ * Generally the context will store the OS specific object to communicate with the kernel device driver and further
+ * state information required by the specific implementation. The context is shareable among all threads in the caller process.
+ *
+ * On IOCTL systems, this is likely to be a file descriptor as a result of opening the kernel device driver.
+ *
+ * On a bare-metal/RTOS system with no distinction between kernel and
+ * user-space, the U/K interface simply calls the _mali_ukk variant of the
+ * function by direct function call. In this case, the context returned is the
+ * mali_session_data from _mali_ukk_open().
+ *
+ * The kernel side implementations of the U/K interface expect the first member of the argument structure to
+ * be the context created by _mali_uku_open(). On some OS implementations, the meaning of this context
+ * will be different between user-side and kernel-side. In which case, the kernel-side will need to replace this context
+ * with the kernel-side equivalent, because user-side will not have access to kernel-side data. The context parameter
+ * in the argument structure therefore has to be of type input/output.
+ *
+ * It should be noted that the caller cannot reuse the \c ctx member of U/K
+ * argument structure after a U/K call, because it may be overwritten. Instead,
+ * the context handle must always be stored  elsewhere, and copied into
+ * the appropriate U/K argument structure for each user-side call to
+ * the U/K interface. This is not usually a problem, since U/K argument
+ * structures are usually placed on the stack.
+ *
+ * @{ */
+
+/** @brief Begin a new Mali Device Driver session
+ *
+ * This is used to obtain a per-process context handle for all future U/K calls.
+ *
+ * @param context pointer to storage to return a (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_open(void **context);
+
+/** @brief End a Mali Device Driver session
+ *
+ * This should be called when the process no longer requires use of the Mali Device Driver.
+ *
+ * The context handle must not be used after it has been closed.
+ *
+ * @param context pointer to a stored (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_close(void **context);
+
+/** @} */ /* end group _mali_uk_context */
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ *
+ * The core functions provide the following functionality:
+ * - verify that the user and kernel API are compatible
+ * - retrieve information about the cores and memory banks in the system
+ * - wait for the result of jobs started on a core
+ *
+ * @{ */
+
+/** @brief Waits for a job notification.
+ *
+ * Sleeps until notified or a timeout occurs. Returns information about the notification.
+ *
+ * @param args see _mali_uk_wait_for_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args);
+
+/** @brief Post a notification to the notification queue of this application.
+ *
+ * @param args see _mali_uk_post_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * This function is obsolete, but kept to allow old, incompatible user space
+ * clients to robustly detect the incompatibility.
+ *
+ * @param args see _mali_uk_get_api_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * @param args see _mali_uk_get_api_version_v2_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args);
+
+/** @brief Get the user space settings applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_settings_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args);
+
+/** @brief Get a user space setting applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_setting_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args);
+
+/* @brief Grant or deny high priority scheduling for this session.
+ *
+ * @param args see _mali_uk_request_high_priority_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args);
+
+/** @brief Make process sleep if the pending big job in kernel  >= MALI_MAX_PENDING_BIG_JOB
+ *
+ */
+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args);
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ *
+ * The memory functions provide functionality with and without a Mali-MMU present.
+ *
+ * For Mali-MMU based systems, the following functionality is provided:
+ * - Initialize and terminate MALI virtual address space
+ * - Allocate/deallocate physical memory to a MALI virtual address range and map into/unmap from the
+ * current process address space
+ * - Map/unmap external physical memory into the MALI virtual address range
+ *
+ * For Mali-nonMMU based systems:
+ * - Allocate/deallocate MALI memory
+ *
+ * @{ */
+
+/** @brief Map Mali Memory into the current user process
+ *
+ * Maps Mali memory into the current user process in a generic way.
+ *
+ * This function is to be used for Mali-MMU mode. The function is available in both Mali-MMU and Mali-nonMMU modes,
+ * but should not be called by a user process in Mali-nonMMU mode.
+ *
+ * The implementation and operation of _mali_ukk_mem_mmap() is dependant on whether the driver is built for Mali-MMU
+ * or Mali-nonMMU:
+ * - In the nonMMU case, _mali_ukk_mem_mmap() requires a physical address to be specified. For this reason, an OS U/K
+ * implementation should not allow this to be called from user-space. In any case, nonMMU implementations are
+ * inherently insecure, and so the overall impact is minimal. Mali-MMU mode should be used if security is desired.
+ * - In the MMU case, _mali_ukk_mem_mmap() the _mali_uk_mem_mmap_s::phys_addr
+ * member is used for the \em Mali-virtual address desired for the mapping. The
+ * implementation of _mali_ukk_mem_mmap() will allocate both the CPU-virtual
+ * and CPU-physical addresses, and can cope with mapping a contiguous virtual
+ * address range to a sequence of non-contiguous physical pages. In this case,
+ * the CPU-physical addresses are not communicated back to the user-side, as
+ * they are unnecsessary; the \em Mali-virtual address range must be used for
+ * programming Mali structures.
+ *
+ * In the second (MMU) case, _mali_ukk_mem_mmap() handles management of
+ * CPU-virtual and CPU-physical ranges, but the \em caller must manage the
+ * \em Mali-virtual address range from the user-side.
+ *
+ * @note Mali-virtual address ranges are entirely separate between processes.
+ * It is not possible for a process to accidentally corrupt another process'
+ * \em Mali-virtual address space.
+ *
+ * @param args see _mali_uk_mem_mmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_mmap(_mali_uk_mem_mmap_s *args);
+
+/** @brief Unmap Mali Memory from the current user process
+ *
+ * Unmaps Mali memory from the current user process in a generic way. This only operates on Mali memory supplied
+ * from _mali_ukk_mem_mmap().
+ *
+ * @param args see _mali_uk_mem_munmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_munmap(_mali_uk_mem_munmap_s *args);
+
+/** @brief Determine the buffer size necessary for an MMU page table dump.
+ * @param args see _mali_uk_query_mmu_page_table_dump_size_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args);
+/** @brief Dump MMU Page tables.
+ * @param args see _mali_uk_dump_mmu_page_table_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args);
+
+/** @brief Write user data to specified Mali memory without causing segfaults.
+ * @param args see _mali_uk_mem_write_safe_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args);
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ *
+ * The Fragment Processor (aka PP (Pixel Processor)) functions provide the following functionality:
+ * - retrieving version of the fragment processors
+ * - determine number of fragment processors
+ * - starting a job on a fragment processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Fragment Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started instead and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx, _mali_uk_pp_start_job_s *uargs);
+
+/**
+ * @brief Issue a request to start new jobs on both Vertex Processor and Fragment Processor.
+ *
+ * @note Will call into @ref _mali_ukk_pp_start_job and @ref _mali_ukk_gp_start_job.
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_and_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx, _mali_uk_pp_and_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Fragment Processors in the system
+ *
+ * @param args see _mali_uk_get_pp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Fragment Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_ukk_get_pp_number_of_cores() indicated at least one Fragment
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args);
+
+/** @brief Disable Write-back unit(s) on specified job
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ */
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args);
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ *
+ * The Vertex Processor (aka GP (Geometry Processor)) functions provide the following functionality:
+ * - retrieving version of the Vertex Processors
+ * - determine number of Vertex Processors available
+ * - starting a job on a Vertex Processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Vertex Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx, _mali_uk_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Vertex Processors in the system.
+ *
+ * @param args see _mali_uk_get_gp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Vertex Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_uk_get_gp_number_of_cores() indicated at least one Vertex
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_gp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args);
+
+/** @brief Resume or abort suspended Vertex Processor jobs.
+ *
+ * After receiving notification that a Vertex Processor job was suspended from
+ * _mali_ukk_wait_for_notification() you can use this function to resume or abort the job.
+ *
+ * @param args see _mali_uk_gp_suspend_response_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args);
+
+/** @} */ /* end group _mali_uk_gp */
+
+#if defined(CONFIG_MALI400_PROFILING)
+/** @addtogroup _mali_uk_profiling U/K Timeline profiling module
+ * @{ */
+
+/** @brief Add event to profiling buffer.
+ *
+ * @param args see _mali_uk_profiling_add_event_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args);
+
+/** @brief Get profiling stream fd.
+ *
+ * @param args see _mali_uk_profiling_stream_fd_get_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args);
+
+/** @brief Profiling control set.
+ *
+ * @param args see _mali_uk_profiling_control_set_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args);
+
+/** @} */ /* end group _mali_uk_profiling */
+#endif
+
+/** @addtogroup _mali_uk_vsync U/K VSYNC reporting module
+ * @{ */
+
+/** @brief Report events related to vsync.
+ *
+ * @note Events should be reported when starting to wait for vsync and when the
+ * waiting is finished. This information can then be used in kernel space to
+ * complement the GPU utilization metric.
+ *
+ * @param args see _mali_uk_vsync_event_report_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args);
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @addtogroup _mali_sw_counters_report U/K Software counter reporting
+ * @{ */
+
+/** @brief Report software counters.
+ *
+ * @param args see _mali_uk_sw_counters_report_s in "mali_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args);
+
+/** @} */ /* end group _mali_sw_counters_report */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+u32 _mali_ukk_report_memory_usage(void);
+
+u32 _mali_ukk_report_total_memory_size(void);
+
+u32 _mali_ukk_utilization_gp_pp(void);
+
+u32 _mali_ukk_utilization_gp(void);
+
+u32 _mali_ukk_utilization_pp(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_H__ */
diff --git a/utgard/r6p1/common/mali_user_settings_db.c b/utgard/r6p1/common/mali_user_settings_db.c
new file mode 100755
index 0000000..a1a613f
--- /dev/null
+++ b/utgard/r6p1/common/mali_user_settings_db.c
@@ -0,0 +1,147 @@
+/**
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_user_settings_db.h"
+#include "mali_session.h"
+
+static u32 mali_user_settings[_MALI_UK_USER_SETTING_MAX];
+const char *_mali_uk_user_setting_descriptions[] = _MALI_UK_USER_SETTING_DESCRIPTIONS;
+
+static void mali_user_settings_notify(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool done = MALI_FALSE;
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (0 == num_sessions_alloc) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (NULL == notobjs) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_SETTINGS_CHANGED,
+					sizeof(_mali_uk_settings_changed_s));
+			if (NULL != notobjs[i]) {
+				_mali_uk_settings_changed_s *data;
+				data = notobjs[i]->result_buffer;
+
+				data->setting = setting;
+				data->value = value;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about setting change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (NULL != notobjs[used_notification_objects]) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (NULL != notobjs[used_notification_objects]) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool notify = MALI_FALSE;
+
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid user setting %ud\n"));
+		return;
+	}
+
+	if (mali_user_settings[setting] != value) {
+		notify = MALI_TRUE;
+	}
+
+	mali_user_settings[setting] = value;
+
+	if (notify) {
+		mali_user_settings_notify(setting, value);
+	}
+}
+
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting)
+{
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		return 0;
+	}
+
+	return mali_user_settings[setting];
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args)
+{
+	_mali_uk_user_setting_t setting;
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	setting = args->setting;
+
+	if (_MALI_UK_USER_SETTING_MAX > setting) {
+		args->value = mali_user_settings[setting];
+		return _MALI_OSK_ERR_OK;
+	} else {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	_mali_osk_memcpy(args->settings, mali_user_settings, sizeof(mali_user_settings));
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p1/common/mali_user_settings_db.h b/utgard/r6p1/common/mali_user_settings_db.h
new file mode 100755
index 0000000..6828dc7
--- /dev/null
+++ b/utgard/r6p1/common/mali_user_settings_db.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) 2012-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_USER_SETTINGS_DB_H__
+#define __MALI_USER_SETTINGS_DB_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+
+/** @brief Set Mali user setting in DB
+ *
+ * Update the DB with a new value for \a setting. If the value is different from theprevious set value running sessions will be notified of the change.
+ *
+ * @param setting the setting to be changed
+ * @param value the new value to set
+ */
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value);
+
+/** @brief Get current Mali user setting value from DB
+ *
+ * @param setting the setting to extract
+ * @return the value of the selected setting
+ */
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* __MALI_KERNEL_USER_SETTING__ */
diff --git a/utgard/r6p1/include/linux/mali/mali_utgard.h b/utgard/r6p1/include/linux/mali/mali_utgard.h
new file mode 100755
index 0000000..a2416a5
--- /dev/null
+++ b/utgard/r6p1/include/linux/mali/mali_utgard.h
@@ -0,0 +1,526 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_utgard.h
+ * Defines types and interface exposed by the Mali Utgard device driver
+ */
+
+#ifndef __MALI_UTGARD_H__
+#define __MALI_UTGARD_H__
+
+#include "mali_osk_types.h"
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#include "mali_pm_metrics.h"
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#endif
+
+#define MALI_GPU_NAME_UTGARD "mali-utgard"
+
+
+#define MALI_OFFSET_GP                    0x00000
+#define MALI_OFFSET_GP_MMU                0x03000
+
+#define MALI_OFFSET_PP0                   0x08000
+#define MALI_OFFSET_PP0_MMU               0x04000
+#define MALI_OFFSET_PP1                   0x0A000
+#define MALI_OFFSET_PP1_MMU               0x05000
+#define MALI_OFFSET_PP2                   0x0C000
+#define MALI_OFFSET_PP2_MMU               0x06000
+#define MALI_OFFSET_PP3                   0x0E000
+#define MALI_OFFSET_PP3_MMU               0x07000
+
+#define MALI_OFFSET_PP4                   0x28000
+#define MALI_OFFSET_PP4_MMU               0x1C000
+#define MALI_OFFSET_PP5                   0x2A000
+#define MALI_OFFSET_PP5_MMU               0x1D000
+#define MALI_OFFSET_PP6                   0x2C000
+#define MALI_OFFSET_PP6_MMU               0x1E000
+#define MALI_OFFSET_PP7                   0x2E000
+#define MALI_OFFSET_PP7_MMU               0x1F000
+
+#define MALI_OFFSET_L2_RESOURCE0          0x01000
+#define MALI_OFFSET_L2_RESOURCE1          0x10000
+#define MALI_OFFSET_L2_RESOURCE2          0x11000
+
+#define MALI400_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE1
+#define MALI450_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE2          MALI_OFFSET_L2_RESOURCE2
+#define MALI470_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+
+#define MALI_OFFSET_BCAST                 0x13000
+#define MALI_OFFSET_DLBU                  0x14000
+
+#define MALI_OFFSET_PP_BCAST              0x16000
+#define MALI_OFFSET_PP_BCAST_MMU          0x15000
+
+#define MALI_OFFSET_PMU                   0x02000
+#define MALI_OFFSET_DMA                   0x12000
+
+/* Mali-300 */
+
+#define MALI_GPU_RESOURCES_MALI300(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI300_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+/* Mali-400 */
+
+#define MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali-450 */
+#define MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI450_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP4, pp3_irq, base_addr + MALI_OFFSET_PP4_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP5, pp4_irq, base_addr + MALI_OFFSET_PP5_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP6, pp5_irq, base_addr + MALI_OFFSET_PP6_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP6_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP4, pp4_irq, base_addr + MALI_OFFSET_PP4_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP5, pp5_irq, base_addr + MALI_OFFSET_PP5_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(6, base_addr + MALI_OFFSET_PP6, pp6_irq, base_addr + MALI_OFFSET_PP6_MMU, pp6_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(7, base_addr + MALI_OFFSET_PP7, pp7_irq, base_addr + MALI_OFFSET_PP7_MMU, pp7_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP8_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali - 470 */
+#define MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCE_L2(addr) \
+	{ \
+		.name = "Mali_L2", \
+			.flags = IORESOURCE_MEM, \
+				 .start = addr, \
+					  .end   = addr + 0x200, \
+	},
+
+#define MALI_GPU_RESOURCE_GP(gp_addr, gp_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_GP_WITH_MMU(gp_addr, gp_irq, gp_mmu_addr, gp_mmu_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_mmu_addr, \
+					  .end =   gp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_mmu_irq, \
+					  .end =   gp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PP(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_WITH_MMU(id, pp_addr, pp_irq, pp_mmu_addr, pp_mmu_irq) \
+	{ \
+		.name = "Mali_PP" #id, \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_addr, \
+					  .end =   pp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_mmu_irq, \
+					  .end =   pp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_MMU(mmu_addr, mmu_irq) \
+	{ \
+		.name = "Mali_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = mmu_addr, \
+					  .end =   mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = mmu_irq, \
+					  .end =   mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PMU(pmu_addr) \
+	{ \
+		.name = "Mali_PMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pmu_addr, \
+					  .end =   pmu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DMA(dma_addr) \
+	{ \
+		.name = "Mali_DMA", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dma_addr, \
+					  .end = dma_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DLBU(dlbu_addr) \
+	{ \
+		.name = "Mali_DLBU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dlbu_addr, \
+					  .end = dlbu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_BCAST(bcast_addr) \
+	{ \
+		.name = "Mali_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = bcast_addr, \
+					  .end = bcast_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_PP_BCAST(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_Broadcast_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_MMU_BCAST(pp_mmu_bcast_addr) \
+	{ \
+		.name = "Mali_PP_MMU_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_bcast_addr, \
+					  .end = pp_mmu_bcast_addr + 0x100, \
+	},
+
+	struct mali_gpu_utilization_data {
+		unsigned int utilization_gpu; /* Utilization for GP and all PP cores combined, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_gp;  /* Utilization for GP core only, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_pp;  /* Utilization for all PP cores combined, 0 = no utilization, 256 = full utilization */
+	};
+
+	struct mali_gpu_clk_item {
+		unsigned int clock; /* unit(MHz) */
+		unsigned int vol;
+	};
+
+	struct mali_gpu_clock {
+		struct mali_gpu_clk_item *item;
+		unsigned int num_of_steps;
+	};
+
+	struct mali_gpu_device_data {
+		/* Shared GPU memory */
+		unsigned long shared_mem_size;
+
+		/*
+		 * Mali PMU switch delay.
+		 * Only needed if the power gates are connected to the PMU in a high fanout
+		 * network. This value is the number of Mali clock cycles it takes to
+		 * enable the power gates and turn on the power mesh.
+		 * This value will have no effect if a daisy chain implementation is used.
+		 */
+		u32 pmu_switch_delay;
+
+		/* Mali Dynamic power domain configuration in sequence from 0-11
+		 *  GP  PP0 PP1  PP2  PP3  PP4  PP5  PP6  PP7, L2$0 L2$1 L2$2
+		 */
+		u16 pmu_domain_config[12];
+
+		/* Dedicated GPU memory range (physical). */
+		unsigned long dedicated_mem_start;
+		unsigned long dedicated_mem_size;
+
+		/* Frame buffer memory to be accessible by Mali GPU (physical) */
+		unsigned long fb_start;
+		unsigned long fb_size;
+
+		/* Max runtime [ms] for jobs */
+		int max_job_runtime;
+
+		/* Report GPU utilization and related control in this interval (specified in ms) */
+		unsigned long control_interval;
+
+		/* Function that will receive periodic GPU utilization numbers */
+		void (*utilization_callback)(struct mali_gpu_utilization_data *data);
+
+		/* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */
+		int (*set_freq)(int setting_clock_step);
+		/* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */
+		void (*get_clock_info)(struct mali_gpu_clock **data);
+		/* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */
+		int (*get_freq)(void);
+		/* Function that init the mali gpu secure mode */
+		int (*secure_mode_init)(void);
+		/* Function that deinit the mali gpu secure mode */
+		void (*secure_mode_deinit)(void);
+		/* Function that enable the mali gpu secure mode */
+		int (*secure_mode_enable)(void);
+		/* Function that disable the mali gpu secure mode */
+		int (*secure_mode_disable)(void);
+		/* ipa related interface customer need register */
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+		struct devfreq_cooling_power *gpu_cooling_ops;
+#endif
+	};
+
+	/**
+	 * Pause the scheduling and power state changes of Mali device driver.
+	 * mali_dev_resume() must always be called as soon as possible after this function
+	 * in order to resume normal operation of the Mali driver.
+	 */
+	void mali_dev_pause(void);
+
+	/**
+	 * Resume scheduling and allow power changes in Mali device driver.
+	 * This must always be called after mali_dev_pause().
+	 */
+	void mali_dev_resume(void);
+
+	/** @brief Set the desired number of PP cores to use.
+	 *
+	 * The internal Mali PMU will be used, if present, to physically power off the PP cores.
+	 *
+	 * @param num_cores The number of desired cores
+	 * @return 0 on success, otherwise error. -EINVAL means an invalid number of cores was specified.
+	 */
+	int mali_perf_set_num_pp_cores(unsigned int num_cores);
+
+#endif
diff --git a/utgard/r6p1/include/linux/mali/mali_utgard_ioctl.h b/utgard/r6p1/include/linux/mali/mali_utgard_ioctl.h
new file mode 100755
index 0000000..dfe152b
--- /dev/null
+++ b/utgard/r6p1/include/linux/mali/mali_utgard_ioctl.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __MALI_UTGARD_IOCTL_H__
+#define __MALI_UTGARD_IOCTL_H__
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/fs.h>       /* file system operations */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file mali_kernel_ioctl.h
+ * Interface to the Linux device driver.
+ * This file describes the interface needed to use the Linux device driver.
+ * Its interface is designed to used by the HAL implementation through a thin arch layer.
+ */
+
+/**
+ * ioctl commands
+ */
+
+#define MALI_IOC_BASE           0x82
+#define MALI_IOC_CORE_BASE      (_MALI_UK_CORE_SUBSYSTEM      + MALI_IOC_BASE)
+#define MALI_IOC_MEMORY_BASE    (_MALI_UK_MEMORY_SUBSYSTEM    + MALI_IOC_BASE)
+#define MALI_IOC_PP_BASE        (_MALI_UK_PP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_GP_BASE        (_MALI_UK_GP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_PROFILING_BASE (_MALI_UK_PROFILING_SUBSYSTEM + MALI_IOC_BASE)
+#define MALI_IOC_VSYNC_BASE     (_MALI_UK_VSYNC_SUBSYSTEM + MALI_IOC_BASE)
+
+#define MALI_IOC_WAIT_FOR_NOTIFICATION      _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_WAIT_FOR_NOTIFICATION, _mali_uk_wait_for_notification_s)
+#define MALI_IOC_GET_API_VERSION            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, u32)
+#define MALI_IOC_GET_API_VERSION_V2         _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, _mali_uk_get_api_version_v2_s)
+#define MALI_IOC_POST_NOTIFICATION          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_POST_NOTIFICATION, _mali_uk_post_notification_s)
+#define MALI_IOC_GET_USER_SETTING           _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTING, _mali_uk_get_user_setting_s)
+#define MALI_IOC_GET_USER_SETTINGS          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTINGS, _mali_uk_get_user_settings_s)
+#define MALI_IOC_REQUEST_HIGH_PRIORITY      _IOW (MALI_IOC_CORE_BASE, _MALI_UK_REQUEST_HIGH_PRIORITY, _mali_uk_request_high_priority_s)
+#define MALI_IOC_TIMELINE_GET_LATEST_POINT  _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_GET_LATEST_POINT, _mali_uk_timeline_get_latest_point_s)
+#define MALI_IOC_TIMELINE_WAIT              _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_WAIT, _mali_uk_timeline_wait_s)
+#define MALI_IOC_TIMELINE_CREATE_SYNC_FENCE _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_CREATE_SYNC_FENCE, _mali_uk_timeline_create_sync_fence_s)
+#define MALI_IOC_SOFT_JOB_START             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_START, _mali_uk_soft_job_start_s)
+#define MALI_IOC_SOFT_JOB_SIGNAL            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_SIGNAL, _mali_uk_soft_job_signal_s)
+#define MALI_IOC_PENDING_SUBMIT             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_PENDING_SUBMIT, _mali_uk_pending_submit_s)
+
+#define MALI_IOC_MEM_ALLOC                  _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_ALLOC_MEM, _mali_uk_alloc_mem_s)
+#define MALI_IOC_MEM_FREE                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_FREE_MEM, _mali_uk_free_mem_s)
+#define MALI_IOC_MEM_BIND                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_BIND_MEM, _mali_uk_bind_mem_s)
+#define MALI_IOC_MEM_UNBIND                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_UNBIND_MEM, _mali_uk_unbind_mem_s)
+#define MALI_IOC_MEM_COW                    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MEM, _mali_uk_cow_mem_s)
+#define MALI_IOC_MEM_COW_MODIFY_RANGE       _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MODIFY_RANGE, _mali_uk_cow_modify_range_s)
+#define MALI_IOC_MEM_RESIZE                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_RESIZE_MEM, _mali_uk_mem_resize_s)
+#define MALI_IOC_MEM_DMA_BUF_GET_SIZE       _IOR(MALI_IOC_MEMORY_BASE, _MALI_UK_DMA_BUF_GET_SIZE, _mali_uk_dma_buf_get_size_s)
+#define MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE _IOR (MALI_IOC_MEMORY_BASE, _MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, _mali_uk_query_mmu_page_table_dump_size_s)
+#define MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_DUMP_MMU_PAGE_TABLE, _mali_uk_dump_mmu_page_table_s)
+#define MALI_IOC_MEM_WRITE_SAFE             _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_MEM_WRITE_SAFE, _mali_uk_mem_write_safe_s)
+
+#define MALI_IOC_PP_START_JOB               _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_START_JOB, _mali_uk_pp_start_job_s)
+#define MALI_IOC_PP_AND_GP_START_JOB        _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_AND_GP_START_JOB, _mali_uk_pp_and_gp_start_job_s)
+#define MALI_IOC_PP_NUMBER_OF_CORES_GET     _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_NUMBER_OF_CORES, _mali_uk_get_pp_number_of_cores_s)
+#define MALI_IOC_PP_CORE_VERSION_GET        _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_CORE_VERSION, _mali_uk_get_pp_core_version_s)
+#define MALI_IOC_PP_DISABLE_WB              _IOW (MALI_IOC_PP_BASE, _MALI_UK_PP_DISABLE_WB, _mali_uk_pp_disable_wb_s)
+
+#define MALI_IOC_GP2_START_JOB              _IOWR(MALI_IOC_GP_BASE, _MALI_UK_GP_START_JOB, _mali_uk_gp_start_job_s)
+#define MALI_IOC_GP2_NUMBER_OF_CORES_GET    _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_NUMBER_OF_CORES, _mali_uk_get_gp_number_of_cores_s)
+#define MALI_IOC_GP2_CORE_VERSION_GET       _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_CORE_VERSION, _mali_uk_get_gp_core_version_s)
+#define MALI_IOC_GP2_SUSPEND_RESPONSE       _IOW (MALI_IOC_GP_BASE, _MALI_UK_GP_SUSPEND_RESPONSE,_mali_uk_gp_suspend_response_s)
+
+#define MALI_IOC_PROFILING_ADD_EVENT        _IOWR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_ADD_EVENT, _mali_uk_profiling_add_event_s)
+#define MALI_IOC_PROFILING_REPORT_SW_COUNTERS  _IOW (MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_REPORT_SW_COUNTERS, _mali_uk_sw_counters_report_s)
+#define MALI_IOC_PROFILING_MEMORY_USAGE_GET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_MEMORY_USAGE_GET, _mali_uk_profiling_memory_usage_get_s)
+#define MALI_IOC_PROFILING_STREAM_FD_GET        _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_STREAM_FD_GET, _mali_uk_profiling_stream_fd_get_s)
+#define MALI_IOC_PROILING_CONTROL_SET   _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_CONTROL_SET, _mali_uk_profiling_control_set_s)
+
+#define MALI_IOC_VSYNC_EVENT_REPORT         _IOW (MALI_IOC_VSYNC_BASE, _MALI_UK_VSYNC_EVENT_REPORT, _mali_uk_vsync_event_report_s)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_IOCTL_H__ */
diff --git a/utgard/r6p1/include/linux/mali/mali_utgard_profiling_events.h b/utgard/r6p1/include/linux/mali/mali_utgard_profiling_events.h
new file mode 100755
index 0000000..eea6dac
--- /dev/null
+++ b/utgard/r6p1/include/linux/mali/mali_utgard_profiling_events.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+
+#ifndef _MALI_UTGARD_PROFILING_EVENTS_H_
+#define _MALI_UTGARD_PROFILING_EVENTS_H_
+
+/*
+ * The event ID is a 32 bit value consisting of different fields
+ * reserved, 4 bits, for future use
+ * event type, 4 bits, cinstr_profiling_event_type_t
+ * event channel, 8 bits, the source of the event.
+ * event data, 16 bit field, data depending on event type
+ */
+
+/**
+ * Specifies what kind of event this is
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_TYPE_SINGLE  = 0 << 24,
+	MALI_PROFILING_EVENT_TYPE_START   = 1 << 24,
+	MALI_PROFILING_EVENT_TYPE_STOP    = 2 << 24,
+	MALI_PROFILING_EVENT_TYPE_SUSPEND = 3 << 24,
+	MALI_PROFILING_EVENT_TYPE_RESUME  = 4 << 24,
+} cinstr_profiling_event_type_t;
+
+
+/**
+ * Secifies the channel/source of the event
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_CHANNEL_SOFTWARE =  0 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GP0      =  1 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP0      =  5 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP1      =  6 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP2      =  7 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP3      =  8 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP4      =  9 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP5      = 10 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP6      = 11 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP7      = 12 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GPU      = 21 << 16,
+} cinstr_profiling_event_channel_t;
+
+
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(num) (((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) + (num)) << 16)
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(num) (((MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) + (num)) << 16)
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_NONE                  = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_NEW_FRAME         = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FLUSH                 = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SWAP_BUFFERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FB_EVENT              = 4,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE            = 5,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE            = 6,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_READBACK              = 7,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_WRITEBACK             = 8,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_ENTER_API_FUNC        = 10,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC        = 11,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_DISCARD_ATTACHMENTS   = 13,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_TRY_LOCK          = 53,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_LOCK              = 54,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_UNLOCK            = 55,
+	MALI_PROFILING_EVENT_REASON_SINGLE_LOCK_CONTENDED           = 56,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_MALI_FENCE_DUP    = 57,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SET_PP_JOB_FENCE  = 58,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_WAIT_SYNC         = 59,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_FENCE_SYNC = 60,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_NATIVE_FENCE_SYNC = 61,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FENCE_FLUSH       = 62,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FLUSH_SERVER_WAITS = 63,
+} cinstr_profiling_event_reason_single_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ * to inform whether the core is physical or virtual
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL  = 0,
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL   = 1,
+} cinstr_profiling_event_reason_start_stop_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ */
+typedef enum {
+	/*MALI_PROFILING_EVENT_REASON_START_STOP_SW_NONE            = 0,*/
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_MALI            = 1,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_CALLBACK_THREAD = 2,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_WORKER_THREAD   = 3,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF     = 4,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF      = 5,
+} cinstr_profiling_event_reason_start_stop_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SUSPEND/RESUME is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_NONE                     =  0, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PIPELINE_FULL            =  1, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC                    = 26, /* used in some build configurations */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_WAIT           = 27, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_SYNC           = 28, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_FILTER_CLEANUP   = 29, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_TEXTURE          = 30, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_MIPLEVEL       = 31, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_READPIXELS     = 32, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SWAP_IMMEDIATE  = 33, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_QUEUE_BUFFER         = 34, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_DEQUEUE_BUFFER       = 35, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_UMP_LOCK                 = 36, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_GLOBAL_LOCK          = 37, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_SWAP                 = 38, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_MALI_EGL_IMAGE_SYNC_WAIT = 39, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GP_JOB_HANDLING          = 40, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PP_JOB_HANDLING          = 41, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_MERGE     = 42, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_DUP       = 43,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_FLUSH_SERVER_WAITS   = 44,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SYNC            = 45, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_JOBS_WAIT             = 46, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOFRAMES_WAIT         = 47, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOJOBS_WAIT           = 48, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_SUBMIT_LIMITER_WAIT      = 49, /* USED */
+} cinstr_profiling_event_reason_suspend_resume_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from a HW channel (GPx+PPx)
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_NONE          = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_INTERRUPT     = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH         = 2,
+} cinstr_profiling_event_reason_single_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_NONE              = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE  = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS      = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS      = 4,
+} cinstr_profiling_event_reason_single_gpu_t;
+
+/**
+ * These values are applicable for the 3rd data parameter when
+ * the type MALI_PROFILING_EVENT_TYPE_START is used from the software channel
+ * with the MALI_PROFILING_EVENT_REASON_START_STOP_BOTTOM_HALF reason.
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_DATA_CORE_GP0             =  1,
+	MALI_PROFILING_EVENT_DATA_CORE_PP0             =  5,
+	MALI_PROFILING_EVENT_DATA_CORE_PP1             =  6,
+	MALI_PROFILING_EVENT_DATA_CORE_PP2             =  7,
+	MALI_PROFILING_EVENT_DATA_CORE_PP3             =  8,
+	MALI_PROFILING_EVENT_DATA_CORE_PP4             =  9,
+	MALI_PROFILING_EVENT_DATA_CORE_PP5             = 10,
+	MALI_PROFILING_EVENT_DATA_CORE_PP6             = 11,
+	MALI_PROFILING_EVENT_DATA_CORE_PP7             = 12,
+	MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU         = 22, /* GP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU         = 26, /* PP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP1_MMU         = 27, /* PP1 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP2_MMU         = 28, /* PP2 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP3_MMU         = 29, /* PP3 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP4_MMU         = 30, /* PP4 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP5_MMU         = 31, /* PP5 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP6_MMU         = 32, /* PP6 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP7_MMU         = 33, /* PP7 + 21 */
+
+} cinstr_profiling_event_data_core_t;
+
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU + (num))
+
+
+#endif /*_MALI_UTGARD_PROFILING_EVENTS_H_*/
diff --git a/utgard/r6p1/include/linux/mali/mali_utgard_profiling_gator_api.h b/utgard/r6p1/include/linux/mali/mali_utgard_profiling_gator_api.h
new file mode 100755
index 0000000..c82d8b1
--- /dev/null
+++ b/utgard/r6p1/include/linux/mali/mali_utgard_profiling_gator_api.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2013, 2015-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__
+#define __MALI_UTGARD_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_PROFILING_API_VERSION 4
+
+#define MAX_NUM_L2_CACHE_CORES 3
+#define MAX_NUM_FP_CORES 8
+#define MAX_NUM_VP_CORES 1
+
+#define _MALI_SPCIAL_COUNTER_DESCRIPTIONS \
+	{                                           \
+		"Filmstrip_cnt0",                 \
+		"Frequency",       \
+		"Voltage",       \
+		"vertex",     \
+		"fragment",         \
+		"Total_alloc_pages",        \
+	};
+
+#define _MALI_MEM_COUTNER_DESCRIPTIONS \
+	{                                           \
+		"untyped_memory",                 \
+		"vertex_index_buffer",       \
+		"texture_buffer",       \
+		"varying_buffer",     \
+		"render_target",         \
+		"pbuffer_buffer",        \
+		"plbu_heap",            \
+		"pointer_array_buffer",             \
+		"slave_tilelist",          \
+		"untyped_gp_cmdlist",     \
+		"polygon_cmdlist",               \
+		"texture_descriptor",               \
+		"render_state_word",               \
+		"shader",               \
+		"stream_buffer",               \
+		"fragment_stack",               \
+		"uniform",               \
+		"untyped_frame_pool",               \
+		"untyped_surface",               \
+	};
+
+/** The list of events supported by the Mali DDK. */
+typedef enum {
+	/* Vertex processor activity */
+	ACTIVITY_VP_0 = 0,
+
+	/* Fragment processor activity */
+	ACTIVITY_FP_0,
+	ACTIVITY_FP_1,
+	ACTIVITY_FP_2,
+	ACTIVITY_FP_3,
+	ACTIVITY_FP_4,
+	ACTIVITY_FP_5,
+	ACTIVITY_FP_6,
+	ACTIVITY_FP_7,
+
+	/* L2 cache counters */
+	COUNTER_L2_0_C0,
+	COUNTER_L2_0_C1,
+	COUNTER_L2_1_C0,
+	COUNTER_L2_1_C1,
+	COUNTER_L2_2_C0,
+	COUNTER_L2_2_C1,
+
+	/* Vertex processor counters */
+	COUNTER_VP_0_C0,
+	COUNTER_VP_0_C1,
+
+	/* Fragment processor counters */
+	COUNTER_FP_0_C0,
+	COUNTER_FP_0_C1,
+	COUNTER_FP_1_C0,
+	COUNTER_FP_1_C1,
+	COUNTER_FP_2_C0,
+	COUNTER_FP_2_C1,
+	COUNTER_FP_3_C0,
+	COUNTER_FP_3_C1,
+	COUNTER_FP_4_C0,
+	COUNTER_FP_4_C1,
+	COUNTER_FP_5_C0,
+	COUNTER_FP_5_C1,
+	COUNTER_FP_6_C0,
+	COUNTER_FP_6_C1,
+	COUNTER_FP_7_C0,
+	COUNTER_FP_7_C1,
+
+	/*
+	 * If more hardware counters are added, the _mali_osk_hw_counter_table
+	 * below should also be updated.
+	 */
+
+	/* EGL software counters */
+	COUNTER_EGL_BLIT_TIME,
+
+	/* GLES software counters */
+	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_ARRAYS_CALLS,
+	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_POINTS,
+	COUNTER_GLES_DRAW_LINES,
+	COUNTER_GLES_DRAW_LINE_LOOP,
+	COUNTER_GLES_DRAW_LINE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLES,
+	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLE_FAN,
+	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+	COUNTER_GLES_UPLOAD_VBO_TIME,
+	COUNTER_GLES_NUM_FLUSHES,
+	COUNTER_GLES_NUM_VSHADERS_GENERATED,
+	COUNTER_GLES_NUM_FSHADERS_GENERATED,
+	COUNTER_GLES_VSHADER_GEN_TIME,
+	COUNTER_GLES_FSHADER_GEN_TIME,
+	COUNTER_GLES_INPUT_TRIANGLES,
+	COUNTER_GLES_VXCACHE_HIT,
+	COUNTER_GLES_VXCACHE_MISS,
+	COUNTER_GLES_VXCACHE_COLLISION,
+	COUNTER_GLES_CULLED_TRIANGLES,
+	COUNTER_GLES_CULLED_LINES,
+	COUNTER_GLES_BACKFACE_TRIANGLES,
+	COUNTER_GLES_GBCLIP_TRIANGLES,
+	COUNTER_GLES_GBCLIP_LINES,
+	COUNTER_GLES_TRIANGLES_DRAWN,
+	COUNTER_GLES_DRAWCALL_TIME,
+	COUNTER_GLES_TRIANGLES_COUNT,
+	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+	COUNTER_GLES_FAN_TRIANGLES_COUNT,
+	COUNTER_GLES_LINES_COUNT,
+	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+	COUNTER_GLES_STRIP_LINES_COUNT,
+	COUNTER_GLES_LOOP_LINES_COUNT,
+
+	/* Special counter */
+
+	/* Framebuffer capture pseudo-counter */
+	COUNTER_FILMSTRIP,
+	COUNTER_FREQUENCY,
+	COUNTER_VOLTAGE,
+	COUNTER_VP_ACTIVITY,
+	COUNTER_FP_ACTIVITY,
+	COUNTER_TOTAL_ALLOC_PAGES,
+
+	/* Memory usage counter */
+	COUNTER_MEM_UNTYPED,
+	COUNTER_MEM_VB_IB,
+	COUNTER_MEM_TEXTURE,
+	COUNTER_MEM_VARYING,
+	COUNTER_MEM_RT,
+	COUNTER_MEM_PBUFFER,
+	/* memory usages for gp command */
+	COUNTER_MEM_PLBU_HEAP,
+	COUNTER_MEM_POINTER_ARRAY,
+	COUNTER_MEM_SLAVE_TILELIST,
+	COUNTER_MEM_UNTYPE_GP_CMDLIST,
+	/* memory usages for polygon list command */
+	COUNTER_MEM_POLYGON_CMDLIST,
+	/* memory usages for pp command */
+	COUNTER_MEM_TD,
+	COUNTER_MEM_RSW,
+	/* other memory usages */
+	COUNTER_MEM_SHADER,
+	COUNTER_MEM_STREAMS,
+	COUNTER_MEM_FRAGMENT_STACK,
+	COUNTER_MEM_UNIFORM,
+	/* Special mem usage, which is used for mem pool allocation */
+	COUNTER_MEM_UNTYPE_MEM_POOL,
+	COUNTER_MEM_UNTYPE_SURFACE,
+
+	NUMBER_OF_EVENTS
+} _mali_osk_counter_id;
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_7
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_7_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+#define FIRST_SPECIAL_COUNTER   COUNTER_FILMSTRIP
+#define LAST_SPECIAL_COUNTER    COUNTER_TOTAL_ALLOC_PAGES
+
+#define FIRST_MEM_COUNTER               COUNTER_MEM_UNTYPED
+#define LAST_MEM_COUNTER                COUNTER_MEM_UNTYPE_SURFACE
+
+#define MALI_PROFILING_MEM_COUNTERS_NUM (LAST_MEM_COUNTER - FIRST_MEM_COUNTER + 1)
+#define MALI_PROFILING_SPECIAL_COUNTERS_NUM     (LAST_SPECIAL_COUNTER - FIRST_SPECIAL_COUNTER + 1)
+#define MALI_PROFILING_SW_COUNTERS_NUM  (LAST_SW_COUNTER - FIRST_SW_COUNTER + 1)
+
+/**
+ * Define the stream header type for porfiling stream.
+ */
+#define  STREAM_HEADER_FRAMEBUFFER 0x05         /* The stream packet header type for framebuffer dumping. */
+#define STREAM_HEADER_COUNTER_VALUE  0x09       /* The stream packet header type for hw/sw/memory counter sampling. */
+#define STREAM_HEADER_CORE_ACTIVITY 0x0a                /* The stream packet header type for activity counter sampling. */
+#define STREAM_HEADER_SIZE      5
+
+/**
+ * Define the packet header type of profiling control packet.
+ */
+#define PACKET_HEADER_ERROR            0x80             /* The response packet header type if error. */
+#define PACKET_HEADER_ACK              0x81             /* The response packet header type if OK. */
+#define PACKET_HEADER_COUNTERS_REQUEST 0x82             /* The control packet header type to request counter information from ddk. */
+#define PACKET_HEADER_COUNTERS_ACK         0x83         /* The response packet header type to send out counter information. */
+#define PACKET_HEADER_COUNTERS_ENABLE  0x84             /* The control packet header type to enable counters. */
+#define PACKET_HEADER_START_CAPTURE_VALUE            0x85               /* The control packet header type to start capture values. */
+
+#define PACKET_HEADER_SIZE      5
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters {
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+} _mali_profiling_core_counters;
+
+/**
+ * Structure to pass performance counter data of Mali L2 cache cores
+ */
+typedef struct _mali_profiling_l2_counter_values {
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+/**
+ * Structure to pass data defining Mali instance in use:
+ *
+ * mali_product_id - Mali product id
+ * mali_version_major - Mali version major number
+ * mali_version_minor - Mali version minor number
+ * num_of_l2_cores - number of L2 cache cores
+ * num_of_fp_cores - number of fragment processor cores
+ * num_of_vp_cores - number of vertex processor cores
+ */
+typedef struct _mali_profiling_mali_version {
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+/**
+ * Structure to define the mali profiling counter struct.
+ */
+typedef struct mali_profiling_counter {
+	char counter_name[40];
+	u32 counter_id;
+	u32 counter_event;
+	u32 prev_counter_value;
+	u32 current_counter_value;
+	u32 key;
+	int enabled;
+} mali_profiling_counter;
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define MEM_COUNTER_ENABLE (5)
+#define ANNOTATE_PROFILING_ENABLE (6)
+
+void _mali_profiling_control(u32 action, u32 value);
+
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+int _mali_profiling_set_event(u32 counter_id, s32 event_id);
+
+u32 _mali_profiling_get_api_version(void);
+
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */
diff --git a/utgard/r6p1/include/linux/mali/mali_utgard_uk_types.h b/utgard/r6p1/include/linux/mali/mali_utgard_uk_types.h
new file mode 100755
index 0000000..ef60764
--- /dev/null
+++ b/utgard/r6p1/include/linux/mali/mali_utgard_uk_types.h
@@ -0,0 +1,1107 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+/**
+ * @file mali_uk_types.h
+ * Defines the types and constants used in the user-kernel interface
+ */
+
+#ifndef __MALI_UTGARD_UK_TYPES_H__
+#define __MALI_UTGARD_UK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Iteration functions depend on these values being consecutive. */
+#define MALI_UK_TIMELINE_GP   0
+#define MALI_UK_TIMELINE_PP   1
+#define MALI_UK_TIMELINE_SOFT 2
+#define MALI_UK_TIMELINE_MAX  3
+
+#define MALI_UK_BIG_VARYING_SIZE  (1024*1024*2)
+
+typedef struct {
+	u32 points[MALI_UK_TIMELINE_MAX];
+	s32 sync_fd;
+} _mali_uk_fence_t;
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_uk_core U/K Core
+ * @{ */
+
+/** Definition of subsystem numbers, to assist in creating a unique identifier
+ * for each U/K call.
+ *
+ * @see _mali_uk_functions */
+typedef enum {
+	_MALI_UK_CORE_SUBSYSTEM,      /**< Core Group of U/K calls */
+	_MALI_UK_MEMORY_SUBSYSTEM,    /**< Memory Group of U/K calls */
+	_MALI_UK_PP_SUBSYSTEM,        /**< Fragment Processor Group of U/K calls */
+	_MALI_UK_GP_SUBSYSTEM,        /**< Vertex Processor Group of U/K calls */
+	_MALI_UK_PROFILING_SUBSYSTEM, /**< Profiling Group of U/K calls */
+	_MALI_UK_VSYNC_SUBSYSTEM,     /**< VSYNC Group of U/K calls */
+} _mali_uk_subsystem_t;
+
+/** Within a function group each function has its unique sequence number
+ * to assist in creating a unique identifier for each U/K call.
+ *
+ * An ordered pair of numbers selected from
+ * ( \ref _mali_uk_subsystem_t,\ref  _mali_uk_functions) will uniquely identify the
+ * U/K call across all groups of functions, and all functions. */
+typedef enum {
+	/** Core functions */
+
+	_MALI_UK_OPEN                    = 0, /**< _mali_ukk_open() */
+	_MALI_UK_CLOSE,                       /**< _mali_ukk_close() */
+	_MALI_UK_WAIT_FOR_NOTIFICATION,       /**< _mali_ukk_wait_for_notification() */
+	_MALI_UK_GET_API_VERSION,             /**< _mali_ukk_get_api_version() */
+	_MALI_UK_POST_NOTIFICATION,           /**< _mali_ukk_post_notification() */
+	_MALI_UK_GET_USER_SETTING,            /**< _mali_ukk_get_user_setting() *//**< [out] */
+	_MALI_UK_GET_USER_SETTINGS,           /**< _mali_ukk_get_user_settings() *//**< [out] */
+	_MALI_UK_REQUEST_HIGH_PRIORITY,       /**< _mali_ukk_request_high_priority() */
+	_MALI_UK_TIMELINE_GET_LATEST_POINT,   /**< _mali_ukk_timeline_get_latest_point() */
+	_MALI_UK_TIMELINE_WAIT,               /**< _mali_ukk_timeline_wait() */
+	_MALI_UK_TIMELINE_CREATE_SYNC_FENCE,  /**< _mali_ukk_timeline_create_sync_fence() */
+	_MALI_UK_SOFT_JOB_START,              /**< _mali_ukk_soft_job_start() */
+	_MALI_UK_SOFT_JOB_SIGNAL,             /**< _mali_ukk_soft_job_signal() */
+	_MALI_UK_PENDING_SUBMIT,             /**< _mali_ukk_pending_submit() */
+
+	/** Memory functions */
+
+	_MALI_UK_ALLOC_MEM                = 0,   /**< _mali_ukk_alloc_mem() */
+	_MALI_UK_FREE_MEM,                       /**< _mali_ukk_free_mem() */
+	_MALI_UK_BIND_MEM,                       /**< _mali_ukk_mem_bind() */
+	_MALI_UK_UNBIND_MEM,                     /**< _mali_ukk_mem_unbind() */
+	_MALI_UK_COW_MEM,                        /**< _mali_ukk_mem_cow() */
+	_MALI_UK_COW_MODIFY_RANGE,               /**< _mali_ukk_mem_cow_modify_range() */
+	_MALI_UK_RESIZE_MEM,                     /**<._mali_ukk_mem_resize() */
+	_MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, /**< _mali_ukk_mem_get_mmu_page_table_dump_size() */
+	_MALI_UK_DUMP_MMU_PAGE_TABLE,            /**< _mali_ukk_mem_dump_mmu_page_table() */
+	_MALI_UK_DMA_BUF_GET_SIZE,               /**< _mali_ukk_dma_buf_get_size() */
+	_MALI_UK_MEM_WRITE_SAFE,                 /**< _mali_uku_mem_write_safe() */
+
+	/** Common functions for each core */
+
+	_MALI_UK_START_JOB           = 0,     /**< Start a Fragment/Vertex Processor Job on a core */
+	_MALI_UK_GET_NUMBER_OF_CORES,         /**< Get the number of Fragment/Vertex Processor cores */
+	_MALI_UK_GET_CORE_VERSION,            /**< Get the Fragment/Vertex Processor version compatible with all cores */
+
+	/** Fragment Processor Functions  */
+
+	_MALI_UK_PP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_pp_start_job() */
+	_MALI_UK_GET_PP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_pp_number_of_cores() */
+	_MALI_UK_GET_PP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_pp_core_version() */
+	_MALI_UK_PP_DISABLE_WB,                                           /**< _mali_ukk_pp_job_disable_wb() */
+	_MALI_UK_PP_AND_GP_START_JOB,                                     /**< _mali_ukk_pp_and_gp_start_job() */
+
+	/** Vertex Processor Functions  */
+
+	_MALI_UK_GP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_gp_start_job() */
+	_MALI_UK_GET_GP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_gp_number_of_cores() */
+	_MALI_UK_GET_GP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_gp_core_version() */
+	_MALI_UK_GP_SUSPEND_RESPONSE,                                     /**< _mali_ukk_gp_suspend_response() */
+
+	/** Profiling functions */
+
+	_MALI_UK_PROFILING_ADD_EVENT     = 0, /**< __mali_uku_profiling_add_event() */
+	_MALI_UK_PROFILING_REPORT_SW_COUNTERS,/**< __mali_uku_profiling_report_sw_counters() */
+	_MALI_UK_PROFILING_MEMORY_USAGE_GET,  /**< __mali_uku_profiling_memory_usage_get() */
+	_MALI_UK_PROFILING_STREAM_FD_GET, /** < __mali_uku_profiling_stream_fd_get() */
+	_MALI_UK_PROFILING_CONTROL_SET, /** < __mali_uku_profiling_control_set() */
+
+	/** VSYNC reporting fuctions */
+	_MALI_UK_VSYNC_EVENT_REPORT      = 0, /**< _mali_ukk_vsync_event_report() */
+} _mali_uk_functions;
+
+/** @defgroup _mali_uk_getsysteminfo U/K Get System Info
+ * @{ */
+
+/**
+ * Type definition for the core version number.
+ * Used when returning the version number read from a core
+ *
+ * Its format is that of the 32-bit Version register for a particular core.
+ * Refer to the "Mali200 and MaliGP2 3D Graphics Processor Technical Reference
+ * Manual", ARM DDI 0415C, for more information.
+ */
+typedef u32 _mali_core_version;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @defgroup _mali_uk_gp_suspend_response_s Vertex Processor Suspend Response
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_gp_suspend_response()
+ *
+ * When _mali_wait_for_notification() receives notification that a
+ * Vertex Processor job was suspended, you need to send a response to indicate
+ * what needs to happen with this job. You can either abort or resume the job.
+ *
+ * - set @c code to indicate response code. This is either @c _MALIGP_JOB_ABORT or
+ * @c _MALIGP_JOB_RESUME_WITH_NEW_HEAP to indicate you will provide a new heap
+ * for the job that will resolve the out of memory condition for the job.
+ * - copy the @c cookie value from the @c _mali_uk_gp_job_suspended_s notification;
+ * this is an identifier for the suspended job
+ * - set @c arguments[0] and @c arguments[1] to zero if you abort the job. If
+ * you resume it, @c argument[0] should specify the Mali start address for the new
+ * heap and @c argument[1] the Mali end address of the heap.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ */
+typedef enum _maligp_job_suspended_response_code {
+	_MALIGP_JOB_ABORT,                  /**< Abort the Vertex Processor job */
+	_MALIGP_JOB_RESUME_WITH_NEW_HEAP    /**< Resume the Vertex Processor job with a new heap */
+} _maligp_job_suspended_response_code;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 cookie;                     /**< [in] cookie from the _mali_uk_gp_job_suspended_s notification */
+	_maligp_job_suspended_response_code code; /**< [in] abort or resume response code, see \ref _maligp_job_suspended_response_code */
+	u32 arguments[2];               /**< [in] 0 when aborting a job. When resuming a job, the Mali start and end address for a new heap to resume the job with */
+} _mali_uk_gp_suspend_response_s;
+
+/** @} */ /* end group _mali_uk_gp_suspend_response_s */
+
+/** @defgroup _mali_uk_gpstartjob_s Vertex Processor Start Job
+ * @{ */
+
+/** @brief Status indicating the result of the execution of a Vertex or Fragment processor job  */
+typedef enum {
+	_MALI_UK_JOB_STATUS_END_SUCCESS         = 1 << (16 + 0),
+	_MALI_UK_JOB_STATUS_END_OOM             = 1 << (16 + 1),
+	_MALI_UK_JOB_STATUS_END_ABORT           = 1 << (16 + 2),
+	_MALI_UK_JOB_STATUS_END_TIMEOUT_SW      = 1 << (16 + 3),
+	_MALI_UK_JOB_STATUS_END_HANG            = 1 << (16 + 4),
+	_MALI_UK_JOB_STATUS_END_SEG_FAULT       = 1 << (16 + 5),
+	_MALI_UK_JOB_STATUS_END_ILLEGAL_JOB     = 1 << (16 + 6),
+	_MALI_UK_JOB_STATUS_END_UNKNOWN_ERR     = 1 << (16 + 7),
+	_MALI_UK_JOB_STATUS_END_SHUTDOWN        = 1 << (16 + 8),
+	_MALI_UK_JOB_STATUS_END_SYSTEM_UNUSABLE = 1 << (16 + 9)
+} _mali_uk_job_status;
+
+#define MALIGP2_NUM_REGS_FRAME (6)
+
+/** @brief Arguments for _mali_ukk_gp_start_job()
+ *
+ * To start a Vertex Processor job
+ * - associate the request with a reference to a @c mali_gp_job_info by setting
+ * user_job_ptr to the address of the @c mali_gp_job_info of the job.
+ * - set @c priority to the priority of the @c mali_gp_job_info
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_gp_job_info into @c frame_registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ * When @c _mali_ukk_gp_start_job() returns @c _MALI_OSK_ERR_OK, status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, @c _mali_wait_for_notification() will be notified
+ * that the job finished or got suspended. It may get suspended due to
+ * resource shortage. If it finished (see _mali_ukk_wait_for_notification())
+ * the notification will contain a @c _mali_uk_gp_job_finished_s result. If
+ * it got suspended the notification will contain a @c _mali_uk_gp_job_suspended_s
+ * result.
+ *
+ * The @c _mali_uk_gp_job_finished_s contains the job status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ * In case the job got suspended, @c _mali_uk_gp_job_suspended_s contains
+ * the @c user_job_ptr identifier used to start the job with, the @c reason
+ * why the job stalled (see \ref _maligp_job_suspended_reason) and a @c cookie
+ * to identify the core on which the job stalled.  This @c cookie will be needed
+ * when responding to this nofication by means of _mali_ukk_gp_suspend_response().
+ * (see _mali_ukk_gp_suspend_response()). The response is either to abort or
+ * resume the job. If the job got suspended due to an out of memory condition
+ * you may be able to resolve this by providing more memory and resuming the job.
+ *
+ */
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;                   /**< [in] identifier for the job in user space, a @c mali_gp_job_info* */
+	u32 priority;                       /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[MALIGP2_NUM_REGS_FRAME]; /**< [in] core specific registers associated with this job */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;            /**< [in,out] pointer to u32: location where point on gp timeline for this job will be written */
+	u32 varying_memsize;            /** < [in] size of varying memory to use deffer bind*/
+	u32 deferred_mem_num;
+	u64 deferred_mem_list;         /** < [in] memory hanlde list of varying buffer to use deffer bind */
+} _mali_uk_gp_start_job_s;
+
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE (1<<0) /**< Enable performance counter SRC0 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE (1<<1) /**< Enable performance counter SRC1 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE (1<<2) /**< Enable per tile (aka heatmap) generation with for a job (using the enabled counter sources) */
+
+/** @} */ /* end group _mali_uk_gpstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;               /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;     /**< [out] status of finished job */
+	u32 heap_current_addr;          /**< [out] value of the GP PLB PL heap start address register */
+	u32 perf_counter0;              /**< [out] value of performance counter 0 (see ARM DDI0415A) */
+	u32 perf_counter1;              /**< [out] value of performance counter 1 (see ARM DDI0415A) */
+	u32 pending_big_job_num;
+} _mali_uk_gp_job_finished_s;
+
+typedef struct {
+	u64 user_job_ptr;                    /**< [out] identifier for the job in user space */
+	u32 cookie;                          /**< [out] identifier for the core in kernel space on which the job stalled */
+} _mali_uk_gp_job_suspended_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+
+/** @defgroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+#define _MALI_PP_MAX_SUB_JOBS 8
+
+#define _MALI_PP_MAX_FRAME_REGISTERS ((0x058/4)+1)
+
+#define _MALI_PP_MAX_WB_REGISTERS ((0x02C/4)+1)
+
+#define _MALI_DLBU_MAX_REGISTERS 4
+
+/** Flag for _mali_uk_pp_start_job_s */
+#define _MALI_PP_JOB_FLAG_NO_NOTIFICATION (1<<0)
+#define _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE (1<<1)
+#define _MALI_PP_JOB_FLAG_PROTECTED (1<<2)
+
+/** @defgroup _mali_uk_ppstartjob_s Fragment Processor Start Job
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_pp_start_job()
+ *
+ * To start a Fragment Processor job
+ * - associate the request with a reference to a mali_pp_job by setting
+ * @c user_job_ptr to the address of the @c mali_pp_job of the job.
+ * - set @c priority to the priority of the mali_pp_job
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_pp_job into @c frame_registers.
+ * For MALI200 you also need to copy the write back 0,1 and 2 registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context in @c ctx that was returned from _mali_ukk_open()
+ *
+ * When _mali_ukk_pp_start_job() returns @c _MALI_OSK_ERR_OK, @c status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, _mali_wait_for_notification() will be notified
+ * when the job finished. The notification will contain a
+ * @c _mali_uk_pp_job_finished_s result. It contains the @c user_job_ptr
+ * identifier used to start the job with, the job @c status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than @c watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;               /**< [in] identifier for the job in user space */
+	u32 priority;                   /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[_MALI_PP_MAX_FRAME_REGISTERS];         /**< [in] core specific registers associated with first sub job, see ARM DDI0415A */
+	u32 frame_registers_addr_frame[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_FRAME registers for sub job 1-7 */
+	u32 frame_registers_addr_stack[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_STACK registers for sub job 1-7 */
+	u32 wb0_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb1_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb2_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 dlbu_registers[_MALI_DLBU_MAX_REGISTERS]; /**< [in] Dynamic load balancing unit registers */
+	u32 num_cores;                      /**< [in] Number of cores to set up (valid range: 1-8(M450) or 4(M400)) */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	u32 flags;                          /**< [in] See _MALI_PP_JOB_FLAG_* for a list of avaiable flags */
+	u32 tilesx;                         /**< [in] number of tiles in the x direction (needed for heatmap generation */
+	u32 tilesy;                         /**< [in] number of tiles in y direction (needed for reading the heatmap memory) */
+	u32 heatmap_mem;                    /**< [in] memory address to store counter values per tile (aka heatmap) */
+	u32 num_memory_cookies;             /**< [in] number of memory cookies attached to job */
+	u64 memory_cookies;               /**< [in] pointer to array of u32 memory cookies attached to job */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;           /**< [in,out] pointer to location of u32 where point on pp timeline for this job will be written */
+} _mali_uk_pp_start_job_s;
+
+typedef struct {
+	u64 ctx;       /**< [in,out] user-kernel context (trashed on output) */
+	u64 gp_args;   /**< [in,out] GP uk arguments (see _mali_uk_gp_start_job_s) */
+	u64 pp_args;   /**< [in,out] PP uk arguments (see _mali_uk_pp_start_job_s) */
+} _mali_uk_pp_and_gp_start_job_s;
+
+/** @} */ /* end group _mali_uk_ppstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;                          /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;                /**< [out] status of finished job */
+	u32 perf_counter0[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 0 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter1[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 1 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter_src0;
+	u32 perf_counter_src1;
+} _mali_uk_pp_job_finished_s;
+
+typedef struct {
+	u32 number_of_enabled_cores;               /**< [out] the new number of enabled cores */
+} _mali_uk_pp_num_cores_changed_s;
+
+
+
+/**
+ * Flags to indicate write-back units
+ */
+typedef enum {
+	_MALI_UK_PP_JOB_WB0 = 1,
+	_MALI_UK_PP_JOB_WB1 = 2,
+	_MALI_UK_PP_JOB_WB2 = 4,
+} _mali_uk_pp_job_wbx_flag;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 fb_id;                      /**< [in] Frame builder ID of job to disable WB units for */
+	u32 wb0_memory;
+	u32 wb1_memory;
+	u32 wb2_memory;
+} _mali_uk_pp_disable_wb_s;
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+/** @defgroup _mali_uk_soft_job U/K Soft Job
+ * @{ */
+
+typedef struct {
+	u64 ctx;                            /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job;                       /**< [in] identifier for the job in user space */
+	u64 job_id_ptr;                     /**< [in,out] pointer to location of u32 where job id will be written */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u32 point;                          /**< [out] point on soft timeline for this job */
+	u32 type;                           /**< [in] type of soft job */
+} _mali_uk_soft_job_start_s;
+
+typedef struct {
+	u64 user_job;                       /**< [out] identifier for the job in user space */
+} _mali_uk_soft_job_activated_s;
+
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 job_id;                         /**< [in] id for soft job */
+} _mali_uk_soft_job_signal_s;
+
+/** @} */ /* end group _mali_uk_soft_job */
+
+typedef struct {
+	u32 counter_id;
+	u32 key;
+	int enable;
+} _mali_uk_annotate_profiling_mem_counter_s;
+
+typedef struct {
+	u32 sampling_rate;
+	int enable;
+} _mali_uk_annotate_profiling_enable_s;
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ * @{ */
+
+/** @defgroup _mali_uk_waitfornotification_s Wait For Notification
+ * @{ */
+
+/** @brief Notification type encodings
+ *
+ * Each Notification type is an ordered pair of (subsystem,id), and is unique.
+ *
+ * The encoding of subsystem,id into a 32-bit word is:
+ * encoding = (( subsystem << _MALI_NOTIFICATION_SUBSYSTEM_SHIFT ) & _MALI_NOTIFICATION_SUBSYSTEM_MASK)
+ *            | (( id <<  _MALI_NOTIFICATION_ID_SHIFT ) & _MALI_NOTIFICATION_ID_MASK)
+ *
+ * @see _mali_uk_wait_for_notification_s
+ */
+typedef enum {
+	/** core notifications */
+
+	_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x20,
+	_MALI_NOTIFICATION_APPLICATION_QUIT = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x40,
+	_MALI_NOTIFICATION_SETTINGS_CHANGED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x80,
+	_MALI_NOTIFICATION_SOFT_ACTIVATED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x100,
+
+	/** Fragment Processor notifications */
+
+	_MALI_NOTIFICATION_PP_FINISHED = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x20,
+
+	/** Vertex Processor notifications */
+
+	_MALI_NOTIFICATION_GP_FINISHED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_GP_STALLED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x20,
+
+	/** Profiling notifications */
+	_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x20,
+} _mali_uk_notification_type;
+
+/** to assist in splitting up 32-bit notification value in subsystem and id value */
+#define _MALI_NOTIFICATION_SUBSYSTEM_MASK 0xFFFF0000
+#define _MALI_NOTIFICATION_SUBSYSTEM_SHIFT 16
+#define _MALI_NOTIFICATION_ID_MASK 0x0000FFFF
+#define _MALI_NOTIFICATION_ID_SHIFT 0
+
+
+/** @brief Enumeration of possible settings which match mali_setting_t in user space
+ *
+ *
+ */
+typedef enum {
+	_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE = 0,
+	_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_DEPTHBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_STENCILBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_PER_TILE_COUNTERS_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_COMPOSITOR,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_WINDOW,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_OTHER,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR,
+	_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED,
+	_MALI_UK_USER_SETTING_MAX,
+} _mali_uk_user_setting_t;
+
+/* See mali_user_settings_db.c */
+extern const char *_mali_uk_user_setting_descriptions[];
+#define _MALI_UK_USER_SETTING_DESCRIPTIONS \
+	{                                           \
+		"sw_events_enable",                 \
+		"colorbuffer_capture_enable",       \
+		"depthbuffer_capture_enable",       \
+		"stencilbuffer_capture_enable",     \
+		"per_tile_counters_enable",         \
+		"buffer_capture_compositor",        \
+		"buffer_capture_window",            \
+		"buffer_capture_other",             \
+		"buffer_capture_n_frames",          \
+		"buffer_capture_resize_factor",     \
+		"sw_counters_enable",               \
+	};
+
+/** @brief struct to hold the value to a particular setting as seen in the kernel space
+ */
+typedef struct {
+	_mali_uk_user_setting_t setting;
+	u32 value;
+} _mali_uk_settings_changed_s;
+
+/** @brief Arguments for _mali_ukk_wait_for_notification()
+ *
+ * On successful return from _mali_ukk_wait_for_notification(), the members of
+ * this structure will indicate the reason for notification.
+ *
+ * Specifically, the source of the notification can be identified by the
+ * subsystem and id fields of the mali_uk_notification_type in the code.type
+ * member. The type member is encoded in a way to divide up the types into a
+ * subsystem field, and a per-subsystem ID field. See
+ * _mali_uk_notification_type for more information.
+ *
+ * Interpreting the data union member depends on the notification type:
+ *
+ * - type == _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS
+ *     - The kernel side is shutting down. No further
+ * _mali_uk_wait_for_notification() calls should be made.
+ *     - In this case, the value of the data union member is undefined.
+ *     - This is used to indicate to the user space client that it should close
+ * the connection to the Mali Device Driver.
+ * - type == _MALI_NOTIFICATION_PP_FINISHED
+ *    - The notification data is of type _mali_uk_pp_job_finished_s. It contains the user_job_ptr
+ * identifier used to start the job with, the job status, the number of milliseconds the job took to render,
+ * and values of core registers when the job finished (irq status, performance counters, renderer list
+ * address).
+ *    - A job has finished succesfully when its status member is _MALI_UK_JOB_STATUS_FINISHED.
+ *    - If the hardware detected a timeout while rendering the job, or software detected the job is
+ * taking more than watchdog_msecs (see _mali_ukk_pp_start_job()) to complete, the status member will
+ * indicate _MALI_UK_JOB_STATUS_HANG.
+ *    - If the hardware detected a bus error while accessing memory associated with the job, status will
+ * indicate _MALI_UK_JOB_STATUS_SEG_FAULT.
+ *    - Status will indicate MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to stop the job but the job
+ * didn't start the hardware yet, e.g. when the driver closes.
+ * - type == _MALI_NOTIFICATION_GP_FINISHED
+ *     - The notification data is of type _mali_uk_gp_job_finished_s. The notification is similar to that of
+ * type == _MALI_NOTIFICATION_PP_FINISHED, except that several other GP core register values are returned.
+ * The status values have the same meaning for type == _MALI_NOTIFICATION_PP_FINISHED.
+ * - type == _MALI_NOTIFICATION_GP_STALLED
+ *     - The nofication data is of type _mali_uk_gp_job_suspended_s. It contains the user_job_ptr
+ * identifier used to start the job with, the reason why the job stalled and a cookie to identify the core on
+ * which the job stalled.
+ *     - The reason member of gp_job_suspended is set to _MALIGP_JOB_SUSPENDED_OUT_OF_MEMORY
+ * when the polygon list builder unit has run out of memory.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [out] Type of notification available */
+	union {
+		_mali_uk_gp_job_suspended_s gp_job_suspended;/**< [out] Notification data for _MALI_NOTIFICATION_GP_STALLED notification type */
+		_mali_uk_gp_job_finished_s  gp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_GP_FINISHED notification type */
+		_mali_uk_pp_job_finished_s  pp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_PP_FINISHED notification type */
+		_mali_uk_settings_changed_s setting_changed;/**< [out] Notification data for _MALI_NOTIFICAATION_SETTINGS_CHANGED notification type */
+		_mali_uk_soft_job_activated_s soft_job_activated; /**< [out] Notification data for _MALI_NOTIFICATION_SOFT_ACTIVATED notification type */
+		_mali_uk_annotate_profiling_mem_counter_s profiling_mem_counter;
+		_mali_uk_annotate_profiling_enable_s profiling_enable;
+	} data;
+} _mali_uk_wait_for_notification_s;
+
+/** @brief Arguments for _mali_ukk_post_notification()
+ *
+ * Posts the specified notification to the notification queue for this application.
+ * This is used to send a quit message to the callback thread.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [in] Type of notification to post */
+} _mali_uk_post_notification_s;
+
+/** @} */ /* end group _mali_uk_waitfornotification_s */
+
+/** @defgroup _mali_uk_getapiversion_s Get API Version
+ * @{ */
+
+/** helpers for Device Driver API version handling */
+
+/** @brief Encode a version ID from a 16-bit input
+ *
+ * @note the input is assumed to be 16 bits. It must not exceed 16 bits. */
+#define _MAKE_VERSION_ID(x) (((x) << 16UL) | (x))
+
+/** @brief Check whether a 32-bit value is likely to be Device Driver API
+ * version ID. */
+#define _IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF))
+
+/** @brief Decode a 16-bit version number from a 32-bit Device Driver API version
+ * ID */
+#define _GET_VERSION(x) (((x) >> 16UL) & 0xFFFF)
+
+/** @brief Determine whether two 32-bit encoded version IDs match */
+#define _IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y))))
+
+/**
+ * API version define.
+ * Indicates the version of the kernel API
+ * The version is a 16bit integer incremented on each API change.
+ * The 16bit integer is stored twice in a 32bit integer
+ * For example, for version 1 the value would be 0x00010001
+ */
+#define _MALI_API_VERSION 900
+#define _MALI_UK_API_VERSION _MAKE_VERSION_ID(_MALI_API_VERSION)
+
+/**
+ * The API version is a 16-bit integer stored in both the lower and upper 16-bits
+ * of a 32-bit value. The 16-bit API version value is incremented on each API
+ * change. Version 1 would be 0x00010001. Used in _mali_uk_get_api_version_s.
+ */
+typedef u32 _mali_uk_api_version;
+
+/** @brief Arguments for _mali_uk_get_api_version()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u32 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_s;
+
+/** @brief Arguments for _mali_uk_get_api_version_v2()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u64 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_v2_s;
+
+/** @} */ /* end group _mali_uk_getapiversion_s */
+
+/** @defgroup _mali_uk_get_user_settings_s Get user space settings */
+
+/** @brief struct to keep the matching values of the user space settings within certain context
+ *
+ * Each member of the settings array corresponds to a matching setting in the user space and its value is the value
+ * of that particular setting.
+ *
+ * All settings are given reference to the context pointed to by the ctx pointer.
+ *
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	u32 settings[_MALI_UK_USER_SETTING_MAX]; /**< [out] The values for all settings */
+} _mali_uk_get_user_settings_s;
+
+/** @brief struct to hold the value of a particular setting from the user space within a given context
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_user_setting_t setting; /**< [in] setting to get */
+	u32 value;                       /**< [out] value of setting */
+} _mali_uk_get_user_setting_s;
+
+/** @brief Arguments for _mali_ukk_request_high_priority() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_request_high_priority_s;
+
+/** @brief Arguments for _mali_ukk_pending_submit() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_pending_submit_s;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_memory U/K Memory
+ * @{ */
+
+#define _MALI_MEMORY_ALLOCATE_RESIZEABLE  (1<<4) /* BUFFER can trim dow/grow*/
+#define _MALI_MEMORY_ALLOCATE_NO_BIND_GPU (1<<5) /*Not map to GPU when allocate, must call bind later*/
+#define _MALI_MEMORY_ALLOCATE_SWAPPABLE   (1<<6) /* Allocate swappale memory. */
+#define _MALI_MEMORY_ALLOCATE_DEFER_BIND (1<<7) /*Not map to GPU when allocate, must call bind later*/
+#define _MALI_MEMORY_ALLOCATE_SECURE (1<<8) /* Allocate secure memory. */
+
+
+typedef struct {
+	u64 ctx;                                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                                    /**< [in] GPU virtual address */
+	u32 vsize;                                        /**< [in] vitrual size of the allocation */
+	u32 psize;                                        /**< [in] physical size of the allocation */
+	u32 flags;
+	u64 backend_handle;                               /**< [out] backend handle */
+	s32 secure_shared_fd;                           /** < [in] the mem handle for secure mem */
+	struct {
+		/* buffer types*/
+		/* CPU read/write info*/
+	} buffer_info;
+} _mali_uk_alloc_mem_s;
+
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                /**< [in] use as handle to free allocation */
+	u32 free_pages_nr;      /** < [out] record the number of free pages */
+} _mali_uk_free_mem_s;
+
+
+#define _MALI_MEMORY_BIND_BACKEND_UMP             (1<<8)
+#define _MALI_MEMORY_BIND_BACKEND_DMA_BUF         (1<<9)
+#define _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY     (1<<10)
+#define _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY (1<<11)
+#define _MALI_MEMORY_BIND_BACKEND_EXT_COW         (1<<12)
+#define _MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION (1<<13)
+
+
+#define _MALI_MEMORY_BIND_BACKEND_MASK (_MALI_MEMORY_BIND_BACKEND_UMP| \
+					_MALI_MEMORY_BIND_BACKEND_DMA_BUF |\
+					_MALI_MEMORY_BIND_BACKEND_MALI_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXT_COW |\
+					_MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION)
+
+
+#define _MALI_MEMORY_GPU_READ_ALLOCATE            (1<<16)
+
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 vaddr;                                      /**< [in] mali address to map the physical memory to */
+	u32 size;                                       /**< [in] size */
+	u32 flags;                                      /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 padding;                                    /** padding for 32/64 struct alignment */
+	union {
+		struct {
+			u32 secure_id;                  /**< [in] secure id */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ump;
+		struct {
+			u32 mem_fd;                     /**< [in] Memory descriptor */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_dma_buf;
+		struct {
+			/**/
+		} bind_mali_memory;
+		struct {
+			u32 phys_addr;                  /**< [in] physical address */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ext_memory;
+	} mem_union;
+} _mali_uk_bind_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 flags;                                      /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 vaddr;                                      /**<  [in] identifier for mapped memory object in kernel space  */
+} _mali_uk_unbind_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 target_handle;                              /**< [in] handle of allocation need to do COW */
+	u32 target_offset;                              /**< [in] offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, PAGE_SIZE align)*/
+	u32 target_size;                                /**< [in] size of target allocation to do COW (for support memory bank, PAGE_SIZE align)(in byte) */
+	u32 range_start;                                /**< [in] re allocate range start offset, offset from the start of allocation (PAGE_SIZE align)*/
+	u32 range_size;                                 /**< [in] re allocate size (PAGE_SIZE align)*/
+	u32 vaddr;                                      /**< [in] mali address for the new allocaiton */
+	u32 backend_handle;                             /**< [out] backend handle */
+	u32 flags;
+} _mali_uk_cow_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 range_start;                                /**< [in] re allocate range start offset, offset from the start of allocation */
+	u32 size;                                       /**< [in] re allocate size*/
+	u32 vaddr;                                      /**< [in] mali address for the new allocaiton */
+	s32 change_pages_nr;                            /**< [out] record the page number change for cow operation */
+} _mali_uk_cow_modify_range_s;
+
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 mem_fd;                     /**< [in] Memory descriptor */
+	u32 size;                       /**< [out] size */
+} _mali_uk_dma_buf_get_size_s;
+
+/** Flag for _mali_uk_map_external_mem_s, _mali_uk_attach_ump_mem_s and _mali_uk_attach_dma_buf_s */
+#define _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE (1<<0)
+
+
+typedef struct {
+	u64 ctx;                                /**< [in,out] user-kernel context (trashed on output) */
+	u64 vaddr;                              /* the buffer to do resize*/
+	u32 psize;                              /* wanted physical size of this memory */
+} _mali_uk_mem_resize_s;
+
+/**
+ * @brief Arguments for _mali_uk[uk]_mem_write_safe()
+ */
+typedef struct {
+	u64 ctx;  /**< [in,out] user-kernel context (trashed on output) */
+	u64 src;  /**< [in] Pointer to source data */
+	u64 dest; /**< [in] Destination Mali buffer */
+	u32 size;   /**< [in,out] Number of bytes to write/copy on input, number of bytes actually written/copied on output */
+} _mali_uk_mem_write_safe_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [out] size of MMU page table information (registers + page tables) */
+} _mali_uk_query_mmu_page_table_dump_size_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [in] size of buffer to receive mmu page table information */
+	u64 buffer;                   /**< [in,out] buffer to receive mmu page table information */
+	u32 register_writes_size;       /**< [out] size of MMU register dump */
+	u64 register_writes;           /**< [out] pointer within buffer where MMU register dump is stored */
+	u32 page_table_dump_size;       /**< [out] size of MMU page table dump */
+	u64 page_table_dump;           /**< [out] pointer within buffer where MMU page table dump is stored */
+} _mali_uk_dump_mmu_page_table_s;
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_pp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_number_of_cores(), @c number_of_cores
+ * will contain the number of Fragment Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_total_cores;      /**< [out] Total number of Fragment Processor cores in the system */
+	u32 number_of_enabled_cores;    /**< [out] Number of enabled Fragment Processor cores */
+} _mali_uk_get_pp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_pp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_core_version(), @c version contains
+ * the version that all Fragment Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version  */
+	u32 padding;
+} _mali_uk_get_pp_core_version_s;
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_gp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_number_of_cores(), @c number_of_cores
+ * will contain the number of Vertex Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_cores;            /**< [out] number of Vertex Processor cores in the system */
+} _mali_uk_get_gp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_gp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_core_version(), @c version contains
+ * the version that all Vertex Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version */
+} _mali_uk_get_gp_core_version_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 event_id;                   /**< [in] event id to register (see  enum mali_profiling_events for values) */
+	u32 data[5];                    /**< [in] event specific data */
+} _mali_uk_profiling_add_event_s;
+
+typedef struct {
+	u64 ctx;                     /**< [in,out] user-kernel context (trashed on output) */
+	u32 memory_usage;              /**< [out] total memory usage */
+	u32 vaddr;                                      /**< [in] mali address for the cow allocaiton */
+	s32 change_pages_nr;            /**< [out] record the page number change for cow operation */
+} _mali_uk_profiling_memory_usage_get_s;
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ * @{ */
+
+/** @brief Arguments to _mali_ukk_mem_mmap()
+ *
+ * Use of the phys_addr member depends on whether the driver is compiled for
+ * Mali-MMU or nonMMU:
+ * - in the nonMMU case, this is the physical address of the memory as seen by
+ * the CPU (which may be a constant offset from that used by Mali)
+ * - in the MMU case, this is the Mali Virtual base address of the memory to
+ * allocate, and the particular physical pages used to back the memory are
+ * entirely determined by _mali_ukk_mem_mmap(). The details of the physical pages
+ * are not reported to user-space for security reasons.
+ *
+ * The cookie member must be stored for use later when freeing the memory by
+ * calling _mali_ukk_mem_munmap(). In the Mali-MMU case, the cookie is secure.
+ *
+ * The ukk_private word must be set to zero when calling from user-space. On
+ * Kernel-side, the  OS implementation of the U/K interface can use it to
+ * communicate data to the OS implementation of the OSK layer. In particular,
+ * _mali_ukk_get_big_block() directly calls _mali_ukk_mem_mmap directly, and
+ * will communicate its own ukk_private word through the ukk_private member
+ * here. The common code itself will not inspect or modify the ukk_private
+ * word, and so it may be safely used for whatever purposes necessary to
+ * integrate Mali Memory handling into the OS.
+ *
+ * The uku_private member is currently reserved for use by the user-side
+ * implementation of the U/K interface. Its value must be zero.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [out] Returns user-space virtual address for the mapping */
+	u32 size;                       /**< [in] Size of the requested mapping */
+	u32 phys_addr;                  /**< [in] Physical address - could be offset, depending on caller+callee convention */
+	mali_bool writeable;
+} _mali_uk_mem_mmap_s;
+
+/** @brief Arguments to _mali_ukk_mem_munmap()
+ *
+ * The cookie and mapping members must be that returned from the same previous
+ * call to _mali_ukk_mem_mmap(). The size member must correspond to cookie
+ * and mapping - that is, it must be the value originally supplied to a call to
+ * _mali_ukk_mem_mmap that returned the values of mapping and cookie.
+ *
+ * An error will be returned if an attempt is made to unmap only part of the
+ * originally obtained range, or to unmap more than was originally obtained.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [in] The mapping returned from mmap call */
+	u32 size;                       /**< [in] The size passed to mmap call */
+} _mali_uk_mem_munmap_s;
+/** @} */ /* end group _mali_uk_memory */
+
+/** @defgroup _mali_uk_vsync U/K VSYNC Wait Reporting Module
+ * @{ */
+
+/** @brief VSYNC events
+ *
+ * These events are reported when DDK starts to wait for vsync and when the
+ * vsync has occured and the DDK can continue on the next frame.
+ */
+typedef enum _mali_uk_vsync_event {
+	_MALI_UK_VSYNC_EVENT_BEGIN_WAIT = 0,
+	_MALI_UK_VSYNC_EVENT_END_WAIT
+} _mali_uk_vsync_event;
+
+/** @brief Arguments to _mali_ukk_vsync_event()
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_vsync_event event;     /**< [in] VSYNCH event type */
+} _mali_uk_vsync_event_report_s;
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @defgroup _mali_uk_sw_counters_report U/K Software Counter Reporting
+ * @{ */
+
+/** @brief Software counter values
+ *
+ * Values recorded for each of the software counters during a single renderpass.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 counters;                  /**< [in] The array of u32 counter values */
+	u32 num_counters;              /**< [in] The number of elements in counters array */
+} _mali_uk_sw_counters_report_s;
+
+/** @} */ /* end group _mali_uk_sw_counters_report */
+
+/** @defgroup _mali_uk_timeline U/K Mali Timeline
+ * @{ */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 timeline;                   /**< [in] timeline id */
+	u32 point;                      /**< [out] latest point on timeline */
+} _mali_uk_timeline_get_latest_point_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] fence */
+	u32 timeout;                    /**< [in] timeout (0 for no wait, -1 for blocking) */
+	u32 status;                     /**< [out] status of fence (1 if signaled, 0 if timeout) */
+} _mali_uk_timeline_wait_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] mali fence to create linux sync fence from */
+	s32 sync_fd;                    /**< [out] file descriptor for new linux sync fence */
+} _mali_uk_timeline_create_sync_fence_s;
+
+/** @} */ /* end group _mali_uk_timeline */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+typedef struct {
+	u64 ctx;                 /**< [in,out] user-kernel context (trashed on output) */
+	s32 stream_fd;   /**< [in] The profiling kernel base stream fd handle */
+} _mali_uk_profiling_stream_fd_get_s;
+
+typedef struct {
+	u64 ctx;        /**< [in,out] user-kernel context (trashed on output) */
+	u64 control_packet_data; /**< [in] the control packet data for control settings */
+	u32 control_packet_size;  /**< [in] The control packet size */
+	u64 response_packet_data; /** < [out] The response packet data */
+	u32 response_packet_size; /** < [in,out] The response packet data */
+} _mali_uk_profiling_control_set_s;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_UK_TYPES_H__ */
diff --git a/utgard/r6p1/linux/license/gpl/mali_kernel_license.h b/utgard/r6p1/linux/license/gpl/mali_kernel_license.h
new file mode 100755
index 0000000..264555d
--- /dev/null
+++ b/utgard/r6p1/linux/license/gpl/mali_kernel_license.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_license.h
+ * Defines for the macro MODULE_LICENSE.
+ */
+
+#ifndef __MALI_KERNEL_LICENSE_H__
+#define __MALI_KERNEL_LICENSE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_KERNEL_LINUX_LICENSE     "GPL"
+#define MALI_LICENSE_IS_GPL 1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LICENSE_H__ */
diff --git a/utgard/r6p1/linux/mali_devfreq.c b/utgard/r6p1/linux/mali_devfreq.c
new file mode 100644
index 0000000..0b0ba14
--- /dev/null
+++ b/utgard/r6p1/linux/mali_devfreq.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/driver.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+#include "mali_pm_metrics.h"
+
+static int
+mali_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		MALI_PRINT_ERROR(("Failed to get opp (%ld)\n", PTR_ERR(opp)));
+		return PTR_ERR(opp);
+	}
+
+	MALI_DEBUG_PRINT(2, ("mali_devfreq_target:set_freq = %lld flags = 0x%x\n", freq, flags));
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (mdev->current_freq == freq) {
+		*target_freq = freq;
+		mali_pm_reset_dvfs_utilisation(mdev);
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (mdev->regulator && mdev->current_voltage != voltage
+	    && mdev->current_freq < freq) {
+		err = regulator_set_voltage(mdev->regulator, voltage, voltage);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to increase voltage (%d)\n", err));
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(mdev->clock, freq);
+	if (err) {
+		MALI_PRINT_ERROR(("Failed to set clock %lu (target %lu)\n", freq, *target_freq));
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (mdev->regulator && mdev->current_voltage != voltage
+	    && mdev->current_freq > freq) {
+		err = regulator_set_voltage(mdev->regulator, voltage, voltage);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to decrease voltage (%d)\n", err));
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	mdev->current_voltage = voltage;
+	mdev->current_freq = freq;
+
+	mali_pm_reset_dvfs_utilisation(mdev);
+
+	return err;
+}
+
+static int
+mali_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	*freq = mdev->current_freq;
+
+	MALI_DEBUG_PRINT(2, ("mali_devfreq_cur_freq: freq = %d \n", *freq));
+	return 0;
+}
+
+static int
+mali_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = mdev->current_freq;
+
+	mali_pm_get_dvfs_utilisation(mdev,
+				     &stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	memcpy(&mdev->devfreq->last_status, stat, sizeof(*stat));
+#endif
+
+	return 0;
+}
+
+/* setup platform specific opp in platform.c*/
+int __weak setup_opps(void)
+{
+	return 0;
+}
+
+/* term platform specific opp in platform.c*/
+int __weak term_opps(struct device *dev)
+{
+	return 0;
+}
+
+static int mali_devfreq_init_freq_table(struct mali_device *mdev,
+					struct devfreq_dev_profile *dp)
+{
+	int err, count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	err = setup_opps();
+	if (err)
+		return err;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(mdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	MALI_DEBUG_PRINT(2, ("mali devfreq table count %d\n", count));
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				       GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(mdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+		MALI_DEBUG_PRINT(2, ("mali devfreq table array[%d] = %d\n", i, freq));
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		MALI_PRINT_ERROR(("Unable to enumerate all OPPs (%d!=%d)\n",
+				  count, i));
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void mali_devfreq_term_freq_table(struct mali_device *mdev)
+{
+	struct devfreq_dev_profile *dp = mdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+	term_opps(mdev->dev);
+}
+
+static void mali_devfreq_exit(struct device *dev)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	mali_devfreq_term_freq_table(mdev);
+}
+
+int mali_devfreq_init(struct mali_device *mdev)
+{
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct devfreq_cooling_power *callbacks = NULL;
+	_mali_osk_device_data data;
+#endif
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	MALI_DEBUG_PRINT(2, ("Init Mali devfreq\n"));
+
+	if (!mdev->clock)
+		return -ENODEV;
+
+	mdev->current_freq = clk_get_rate(mdev->clock);
+
+	dp = &mdev->devfreq_profile;
+
+	dp->initial_freq = mdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = mali_devfreq_target;
+	dp->get_dev_status = mali_devfreq_status;
+	dp->get_cur_freq = mali_devfreq_cur_freq;
+	dp->exit = mali_devfreq_exit;
+
+	if (mali_devfreq_init_freq_table(mdev, dp))
+		return -EFAULT;
+
+	mdev->devfreq = devfreq_add_device(mdev->dev, dp,
+					   "simple_ondemand", NULL);
+	if (IS_ERR(mdev->devfreq)) {
+		mali_devfreq_term_freq_table(mdev);
+		return PTR_ERR(mdev->devfreq);
+	}
+
+	err = devfreq_register_opp_notifier(mdev->dev, mdev->devfreq);
+	if (err) {
+		MALI_PRINT_ERROR(("Failed to register OPP notifier (%d)\n", err));
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	/* Initilization last_status it will be used when first power allocate called */
+	mdev->devfreq->last_status.current_frequency = mdev->current_freq;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if (NULL != data.gpu_cooling_ops) {
+			callbacks = data.gpu_cooling_ops;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Thermal: Callback handler installed \n"));
+		}
+	}
+
+	if (callbacks) {
+		mdev->devfreq_cooling = of_devfreq_cooling_register_power(
+						mdev->dev->of_node,
+						mdev->devfreq,
+						callbacks);
+		if (IS_ERR_OR_NULL(mdev->devfreq_cooling)) {
+			err = PTR_ERR(mdev->devfreq_cooling);
+			MALI_PRINT_ERROR(("Failed to register cooling device (%d)\n", err));
+			goto cooling_failed;
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali GPU Thermal Cooling installed \n"));
+		}
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	err = devfreq_remove_device(mdev->devfreq);
+	if (err)
+		MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err));
+	else
+		mdev->devfreq = NULL;
+
+	return err;
+}
+
+void mali_devfreq_term(struct mali_device *mdev)
+{
+	int err;
+
+	MALI_DEBUG_PRINT(2, ("Term Mali devfreq\n"));
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	devfreq_cooling_unregister(mdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq);
+
+	err = devfreq_remove_device(mdev->devfreq);
+	if (err)
+		MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err));
+	else
+		mdev->devfreq = NULL;
+}
diff --git a/utgard/r6p1/linux/mali_devfreq.h b/utgard/r6p1/linux/mali_devfreq.h
new file mode 100644
index 0000000..faf84f2
--- /dev/null
+++ b/utgard/r6p1/linux/mali_devfreq.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef _MALI_DEVFREQ_H_
+#define _MALI_DEVFREQ_H_
+
+int mali_devfreq_init(struct mali_device *mdev);
+
+void mali_devfreq_term(struct mali_device *mdev);
+
+#endif
diff --git a/utgard/r6p1/linux/mali_device_pause_resume.c b/utgard/r6p1/linux/mali_device_pause_resume.c
new file mode 100755
index 0000000..cb2f702
--- /dev/null
+++ b/utgard/r6p1/linux/mali_device_pause_resume.c
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_device_pause_resume.c
+ * Implementation of the Mali pause/resume functionality
+ */
+
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+
+void mali_dev_pause(void)
+{
+	/*
+	 * Deactive all groups to prevent hardware being touched
+	 * during the period of mali device pausing
+	 */
+	mali_pm_os_suspend(MALI_FALSE);
+}
+
+EXPORT_SYMBOL(mali_dev_pause);
+
+void mali_dev_resume(void)
+{
+	mali_pm_os_resume();
+}
+
+EXPORT_SYMBOL(mali_dev_resume);
diff --git a/utgard/r6p1/linux/mali_kernel_linux.c b/utgard/r6p1/linux/mali_kernel_linux.c
new file mode 100755
index 0000000..70dab67
--- /dev/null
+++ b/utgard/r6p1/linux/mali_kernel_linux.c
@@ -0,0 +1,1149 @@
+/**
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_linux.c
+ * Implementation of the Linux device driver entrypoints
+ */
+#include <linux/module.h>   /* kernel module definitions */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/mm.h>       /* memory manager definitions */
+#include <linux/mali/mali_utgard_ioctl.h>
+#include <linux/version.h>
+#include <linux/device.h>
+#include "mali_kernel_license.h"
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/bug.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_ukk.h"
+#include "mali_ukk_wrappers.h"
+#include "mali_kernel_sysfs.h"
+#include "mali_pm.h"
+#include "mali_kernel_license.h"
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_swap_alloc.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+#include "mali_osk_profiling.h"
+#include "mali_dvfs_policy.h"
+
+static int is_first_resume = 1;
+/*Store the clk and vol for boot/insmod and mali_resume*/
+static struct mali_gpu_clk_item mali_gpu_clk[2];
+#endif
+
+/* Streamline support for the Mali driver */
+#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_MALI400_PROFILING)
+/* Ask Linux to create the tracepoints */
+#define CREATE_TRACE_POINTS
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_hw_counter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counters);
+#endif /* CONFIG_TRACEPOINTS */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include "mali_devfreq.h"
+#include "mali_osk_mali.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
+#include <linux/pm_opp.h>
+#else
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif /* Linux >= 3.13*/
+#define dev_pm_opp_of_add_table of_init_opp_table
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
+#define dev_pm_opp_of_remove_table of_free_opp_table
+#endif /* Linux >= 3.19 */
+#endif /* Linux >= 4.4.0 */
+#endif
+
+/* from the __malidrv_build_info.c file that is generated during build */
+extern const char *__malidrv_build_info(void);
+extern void mali_post_init(void);
+extern int mali_pdev_dts_init(struct platform_device* mali_gpu_device);
+extern int mpgpu_class_init(void);
+extern void mpgpu_class_exit(void);
+
+int mali_page_fault = 0;
+module_param(mali_page_fault, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_page_fault, "mali_page_fault");
+
+int pp_hardware_reset = 0;
+module_param(pp_hardware_reset, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(pp_hardware_reset, "mali_hardware_reset");
+/* Module parameter to control log level */
+int mali_debug_level = 2;
+module_param(mali_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_debug_level, "Higher number, more dmesg output");
+
+extern int mali_max_job_runtime;
+module_param(mali_max_job_runtime, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_job_runtime, "Maximum allowed job runtime in msecs.\nJobs will be killed after this no matter what");
+
+extern int mali_l2_max_reads;
+module_param(mali_l2_max_reads, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_l2_max_reads, "Maximum reads for Mali L2 cache");
+
+extern unsigned int mali_dedicated_mem_start;
+module_param(mali_dedicated_mem_start, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_dedicated_mem_start, "Physical start address of dedicated Mali GPU memory.");
+
+extern unsigned int mali_dedicated_mem_size;
+module_param(mali_dedicated_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_dedicated_mem_size, "Size of dedicated Mali GPU memory.");
+
+extern unsigned int mali_shared_mem_size;
+module_param(mali_shared_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_shared_mem_size, "Size of shared Mali GPU memory.");
+
+#if defined(CONFIG_MALI400_PROFILING)
+extern int mali_boot_profiling;
+module_param(mali_boot_profiling, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_boot_profiling, "Start profiling as a part of Mali driver initialization");
+#endif
+
+extern int mali_max_pp_cores_group_1;
+module_param(mali_max_pp_cores_group_1, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_pp_cores_group_1, "Limit the number of PP cores to use from first PP group.");
+
+extern int mali_max_pp_cores_group_2;
+module_param(mali_max_pp_cores_group_2, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_pp_cores_group_2, "Limit the number of PP cores to use from second PP group (Mali-450 only).");
+
+extern unsigned int mali_mem_swap_out_threshold_value;
+module_param(mali_mem_swap_out_threshold_value, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_mem_swap_out_threshold_value, "Threshold value used to limit how much swappable memory cached in Mali driver.");
+
+#if defined(CONFIG_MALI_DVFS)
+/** the max fps the same as display vsync default 60, can set by module insert parameter */
+extern int mali_max_system_fps;
+module_param(mali_max_system_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_system_fps, "Max system fps the same as display VSYNC.");
+
+/** a lower limit on their desired FPS default 58, can set by module insert parameter*/
+extern int mali_desired_fps;
+module_param(mali_desired_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_desired_fps, "A bit lower than max_system_fps which user desired fps");
+#endif
+
+#if MALI_ENABLE_CPU_CYCLES
+#include <linux/cpumask.h>
+#include <linux/timer.h>
+#include <asm/smp.h>
+static struct timer_list mali_init_cpu_clock_timers[8];
+static u32 mali_cpu_clock_last_value[8] = {0,};
+#endif
+
+/* Export symbols from common code: mali_user_settings.c */
+#include "mali_user_settings_db.h"
+EXPORT_SYMBOL(mali_set_user_setting);
+EXPORT_SYMBOL(mali_get_user_setting);
+
+static char mali_dev_name[] = "mali"; /* should be const, but the functions we call requires non-cost */
+
+/* This driver only supports one Mali device, and this variable stores this single platform device */
+struct platform_device *mali_platform_device = NULL;
+
+/* This driver only supports one Mali device, and this variable stores the exposed misc device (/dev/mali) */
+static struct miscdevice mali_miscdevice = { 0, };
+
+static int mali_miscdevice_register(struct platform_device *pdev);
+static void mali_miscdevice_unregister(void);
+
+static int mali_open(struct inode *inode, struct file *filp);
+static int mali_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static int mali_probe(struct platform_device *pdev);
+static int mali_remove(struct platform_device *pdev);
+
+static int mali_driver_suspend_scheduler(struct device *dev);
+static int mali_driver_resume_scheduler(struct device *dev);
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev);
+static int mali_driver_runtime_resume(struct device *dev);
+static int mali_driver_runtime_idle(struct device *dev);
+#endif
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#if defined(CONFIG_MALI_DT)
+extern int mali_platform_device_init(struct platform_device *device);
+extern int mali_platform_device_deinit(struct platform_device *device);
+#else
+extern int mali_platform_device_register(void);
+extern int mali_platform_device_unregister(void);
+#endif
+#endif
+
+/* Linux power management operations provided by the Mali device driver */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
+struct pm_ext_ops mali_dev_ext_pm_ops = {
+	.base =
+	{
+		.suspend = mali_driver_suspend_scheduler,
+		.resume = mali_driver_resume_scheduler,
+		.freeze = mali_driver_suspend_scheduler,
+		.thaw =   mali_driver_resume_scheduler,
+	},
+};
+#else
+static const struct dev_pm_ops mali_dev_pm_ops = {
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = mali_driver_runtime_suspend,
+	.runtime_resume = mali_driver_runtime_resume,
+	.runtime_idle = mali_driver_runtime_idle,
+#endif
+	.suspend = mali_driver_suspend_scheduler,
+	.resume = mali_driver_resume_scheduler,
+	.freeze = mali_driver_suspend_scheduler,
+	.thaw = mali_driver_resume_scheduler,
+	.poweroff = mali_driver_suspend_scheduler,
+};
+#endif
+
+#ifdef CONFIG_MALI_DT
+static struct of_device_id base_dt_ids[] = {
+	{.compatible = "arm,mali-300"},
+	{.compatible = "arm,mali-400"},
+	{.compatible = "arm,mali-450"},
+	{.compatible = "arm,mali-470"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, base_dt_ids);
+#endif
+
+/* The Mali device driver struct */
+static struct platform_driver mali_platform_driver = {
+	.probe  = mali_probe,
+	.remove = mali_remove,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
+	.pm = &mali_dev_ext_pm_ops,
+#endif
+	.driver =
+	{
+		.name   = MALI_GPU_NAME_UTGARD,
+		.owner  = THIS_MODULE,
+		.bus = &platform_bus_type,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29))
+		.pm = &mali_dev_pm_ops,
+#endif
+#ifdef CONFIG_MALI_DT
+		.of_match_table = of_match_ptr(base_dt_ids),
+#endif
+	},
+};
+
+/* Linux misc device operations (/dev/mali) */
+struct file_operations mali_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_open,
+	.release = mali_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl = mali_ioctl,
+#else
+	.ioctl = mali_ioctl,
+#endif
+	.compat_ioctl = mali_ioctl,
+	.mmap = mali_mmap
+};
+
+#if MALI_ENABLE_CPU_CYCLES
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64)
+{
+	/* The CPU assembly reference used is: ARM Architecture Reference Manual ARMv7-AR C.b */
+	u32 write_value;
+
+	/* See B4.1.116 PMCNTENSET, Performance Monitors Count Enable Set register, VMSA */
+	/* setting p15 c9 c12 1 to 0x8000000f==CPU_CYCLE_ENABLE |EVENT_3_ENABLE|EVENT_2_ENABLE|EVENT_1_ENABLE|EVENT_0_ENABLE */
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
+
+
+	/* See B4.1.117 PMCR, Performance Monitors Control Register. Writing to p15, c9, c12, 0 */
+	write_value = 1 << 0; /* Bit 0 set. Enable counters */
+	if (reset) {
+		write_value |= 1 << 1; /* Reset event counters */
+		write_value |= 1 << 2; /* Reset cycle counter  */
+	}
+	if (enable_divide_by_64) {
+		write_value |= 1 << 3; /* Enable the Clock divider by 64 */
+	}
+	write_value |= 1 << 4; /* Export enable. Not needed */
+	asm volatile("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(write_value));
+
+	/* PMOVSR Overflow Flag Status Register - Clear Clock and Event overflows */
+	asm volatile("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f));
+
+
+	/* See B4.1.124 PMUSERENR - setting p15 c9 c14 to 1" */
+	/* User mode access to the Performance Monitors enabled. */
+	/* Lets User space read cpu clock cycles */
+	asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1));
+}
+
+/** A timer function that configures the cycle clock counter on current CPU.
+ * The function \a mali_init_cpu_time_counters_on_all_cpus sets up this
+ * function to trigger on all Cpus during module load.
+ */
+static void mali_init_cpu_clock_timer_func(unsigned long data)
+{
+	int reset_counters, enable_divide_clock_counter_by_64;
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+	unsigned int sample1;
+
+	MALI_IGNORE(data);
+
+	reset_counters = 1;
+	enable_divide_clock_counter_by_64 = 0;
+	mali_init_cpu_time_counters(reset_counters, enable_divide_clock_counter_by_64);
+
+	sample0 = mali_get_cpu_cyclecount();
+	sample1 = mali_get_cpu_cyclecount();
+
+	MALI_DEBUG_PRINT(3, ("Init Cpu %d cycle counter- First two samples: %08x %08x \n", current_cpu, sample0, sample1));
+}
+
+/** A timer functions for storing current time on all cpus.
+ * Used for checking if the clocks have similar values or if they are drifting.
+ */
+static void mali_print_cpu_clock_timer_func(unsigned long data)
+{
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+
+	MALI_IGNORE(data);
+	sample0 = mali_get_cpu_cyclecount();
+	if (current_cpu < 8) {
+		mali_cpu_clock_last_value[current_cpu] = sample0;
+	}
+}
+
+/** Init the performance registers on all CPUs to count clock cycles.
+ * For init \a print_only should be 0.
+ * If \a print_only is 1, it will intead print the current clock value of all CPUs.
+ */
+void mali_init_cpu_time_counters_on_all_cpus(int print_only)
+{
+	int i = 0;
+	int cpu_number;
+	int jiffies_trigger;
+	int jiffies_wait;
+
+	jiffies_wait = msecs_to_jiffies(20);
+	jiffies_trigger = jiffies + jiffies_wait;
+
+	for (i = 0 ; i < 8 ; i++) {
+		init_timer(&mali_init_cpu_clock_timers[i]);
+		if (print_only) mali_init_cpu_clock_timers[i].function = mali_print_cpu_clock_timer_func;
+		else            mali_init_cpu_clock_timers[i].function = mali_init_cpu_clock_timer_func;
+		mali_init_cpu_clock_timers[i].expires = jiffies_trigger ;
+	}
+	cpu_number = cpumask_first(cpu_online_mask);
+	for (i = 0 ; i < 8 ; i++) {
+		int next_cpu;
+		add_timer_on(&mali_init_cpu_clock_timers[i], cpu_number);
+		next_cpu = cpumask_next(cpu_number, cpu_online_mask);
+		if (next_cpu >= nr_cpu_ids) break;
+		cpu_number = next_cpu;
+	}
+
+	while (jiffies_wait) jiffies_wait = schedule_timeout_uninterruptible(jiffies_wait);
+
+	for (i = 0 ; i < 8 ; i++) {
+		del_timer_sync(&mali_init_cpu_clock_timers[i]);
+	}
+
+	if (print_only) {
+		if ((0 == mali_cpu_clock_last_value[2]) && (0 == mali_cpu_clock_last_value[3])) {
+			/* Diff can be printed if we want to check if the clocks are in sync
+			int diff = mali_cpu_clock_last_value[0] - mali_cpu_clock_last_value[1];*/
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1]));
+		} else {
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1], mali_cpu_clock_last_value[2], mali_cpu_clock_last_value[3]));
+		}
+	}
+}
+#endif
+
+int mali_module_init(void)
+{
+	int err = 0;
+
+	MALI_DEBUG_PRINT(2, ("Inserting Mali v%d device driver. \n", _MALI_API_VERSION));
+	//MALI_DEBUG_PRINT(2, ("Compiled: %s, time: %s.\n", __DATE__, __TIME__));
+	MALI_DEBUG_PRINT(2, ("Driver revision: %s\n", SVN_REV_STRING));
+
+#if MALI_ENABLE_CPU_CYCLES
+	mali_init_cpu_time_counters_on_all_cpus(0);
+	MALI_DEBUG_PRINT(2, ("CPU cycle counter setup complete\n"));
+	/* Printing the current cpu counters */
+	mali_init_cpu_time_counters_on_all_cpus(1);
+#endif
+
+	/* Initialize module wide settings */
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering device\n"));
+	err = mali_platform_device_register();
+	if (0 != err) {
+		return err;
+	}
+#endif
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering driver\n"));
+
+	err = platform_driver_register(&mali_platform_driver);
+
+	if (0 != err) {
+		MALI_DEBUG_PRINT(2, ("mali_module_init() Failed to register driver (%d)\n", err));
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+		mali_platform_device_unregister();
+#endif
+#endif
+		mali_platform_device = NULL;
+		return err;
+	}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	err = _mali_internal_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (0 != err) {
+		/* No biggie if we wheren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	/* Tracing the current frequency and voltage from boot/insmod*/
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item(),to record current clk info.*/
+	mali_get_current_gpu_clk_item(&mali_gpu_clk[0]);
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[0].clock,
+				      mali_gpu_clk[0].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	MALI_PRINT(("Mali device driver loaded\n"));
+
+	mpgpu_class_init();
+
+	return 0; /* Success */
+}
+
+void mali_module_exit(void)
+{
+	MALI_DEBUG_PRINT(2, ("Unloading Mali v%d device driver.\n", _MALI_API_VERSION));
+
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering driver\n"));
+
+	platform_driver_unregister(&mali_platform_driver);
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering device\n"));
+	mali_platform_device_unregister();
+#endif
+#endif
+
+	/* Tracing the current frequency and voltage from rmmod*/
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	_mali_internal_profiling_term();
+#endif
+	mpgpu_class_exit();
+
+	MALI_PRINT(("Mali device driver unloaded\n"));
+}
+
+#ifdef CONFIG_MALI_DEVFREQ
+struct mali_device *mali_device_alloc(void)
+{
+	return kzalloc(sizeof(struct mali_device), GFP_KERNEL);
+}
+
+void mali_device_free(struct mali_device *mdev)
+{
+	kfree(mdev);
+}
+#endif
+
+static int mali_probe(struct platform_device *pdev)
+{
+	int err;
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev;
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_probe(): Called for platform device %s\n", pdev->name));
+
+	if (NULL != mali_platform_device) {
+		/* Already connected to a device, return error */
+		MALI_PRINT_ERROR(("mali_probe(): The Mali driver is already connected with a Mali device."));
+		return -EEXIST;
+	}
+
+	mali_platform_device = pdev;
+
+#ifdef CONFIG_MALI_DT
+	/* If we use DT to initialize our DDK, we have to prepare somethings. */
+	err = mali_platform_device_init(mali_platform_device);
+	if (0 != err) {
+		MALI_PRINT_ERROR(("mali_probe(): Failed to initialize platform device."));
+		mali_platform_device = NULL;
+		return -EFAULT;
+	}
+#endif
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mdev = mali_device_alloc();
+	if (!mdev) {
+		MALI_PRINT_ERROR(("Can't allocate mali device private data\n"));
+		return -ENOMEM;
+	}
+
+	mdev->dev = &pdev->dev;
+	dev_set_drvdata(mdev->dev, mdev);
+
+	/*Initilization clock and regulator*/
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_REGULATOR)
+	mdev->regulator = regulator_get_optional(mdev->dev, "mali");
+	if (IS_ERR_OR_NULL(mdev->regulator)) {
+		MALI_DEBUG_PRINT(2, ("Continuing without Mali regulator control\n"));
+		mdev->regulator = NULL;
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+	if (dev_pm_opp_of_add_table(mdev->dev) < 0)
+		MALI_DEBUG_PRINT(3, ("OPP table not found\n"));
+#endif
+
+	/* Need to name the gpu clock "clk_mali" in the device tree */
+	mdev->clock = clk_get(mdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(mdev->clock)) {
+		MALI_DEBUG_PRINT(2, ("Continuing without Mali clock control\n"));
+		mdev->clock = NULL;
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(mdev->clock);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to prepare and enable clock (%d)\n", err));
+			goto clock_prepare_failed;
+		}
+	}
+
+	/* initilize pm metrics related */
+	if (mali_pm_metrics_init(mdev) < 0) {
+		MALI_DEBUG_PRINT(2, ("mali pm metrics init failed\n"));
+		goto pm_metrics_init_failed;
+	}
+
+	if (mali_devfreq_init(mdev) < 0) {
+		MALI_DEBUG_PRINT(2, ("mali devfreq init failed\n"));
+		goto devfreq_init_failed;
+	}
+#endif
+
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_wq_init()) {
+		/* Initialize the Mali GPU HW specified by pdev */
+		if (_MALI_OSK_ERR_OK == mali_initialize_subsystems()) {
+			/* Register a misc device (so we are accessible from user space) */
+			err = mali_miscdevice_register(pdev);
+			if (0 == err) {
+				/* Setup sysfs entries */
+				err = mali_sysfs_register(mali_dev_name);
+
+				if (0 == err) {
+					mali_post_init();
+					MALI_DEBUG_PRINT(2, ("mali_probe(): Successfully initialized driver for platform device %s\n", pdev->name));
+
+					return 0;
+				} else {
+					MALI_PRINT_ERROR(("mali_probe(): failed to register sysfs entries"));
+				}
+				mali_miscdevice_unregister();
+			} else {
+				MALI_PRINT_ERROR(("mali_probe(): failed to register Mali misc device."));
+			}
+			mali_terminate_subsystems();
+		} else {
+			MALI_PRINT_ERROR(("mali_probe(): Failed to initialize Mali device driver."));
+		}
+		_mali_osk_wq_term();
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mali_devfreq_term(mdev);
+devfreq_init_failed:
+	mali_pm_metrics_term(mdev);
+pm_metrics_init_failed:
+	clk_disable_unprepare(mdev->clock);
+clock_prepare_failed:
+	clk_put(mdev->clock);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_PM_OPP)
+	dev_pm_opp_of_remove_table(mdev->dev);
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_REGULATOR)
+	regulator_put(mdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+	mali_device_free(mdev);
+#endif
+
+#ifdef CONFIG_MALI_DT
+	mali_platform_device_deinit(mali_platform_device);
+#endif
+	mali_platform_device = NULL;
+	return -EFAULT;
+}
+
+static int mali_remove(struct platform_device *pdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(&pdev->dev);
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_remove() called for platform device %s\n", pdev->name));
+	mali_sysfs_unregister();
+	mali_miscdevice_unregister();
+	mali_terminate_subsystems();
+	_mali_osk_wq_term();
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mali_devfreq_term(mdev);
+
+	mali_pm_metrics_term(mdev);
+
+	if (mdev->clock) {
+		clk_disable_unprepare(mdev->clock);
+		clk_put(mdev->clock);
+		mdev->clock = NULL;
+	}
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_PM_OPP)
+	dev_pm_opp_of_remove_table(mdev->dev);
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_REGULATOR)
+	regulator_put(mdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+	mali_device_free(mdev);
+#endif
+
+#ifdef CONFIG_MALI_DT
+	mali_platform_device_deinit(mali_platform_device);
+#endif
+	mali_platform_device = NULL;
+	return 0;
+}
+
+static int mali_miscdevice_register(struct platform_device *pdev)
+{
+	int err;
+
+	mali_miscdevice.minor = MISC_DYNAMIC_MINOR;
+	mali_miscdevice.name = mali_dev_name;
+	mali_miscdevice.fops = &mali_fops;
+	mali_miscdevice.parent = get_device(&pdev->dev);
+
+	err = misc_register(&mali_miscdevice);
+	if (0 != err) {
+		MALI_PRINT_ERROR(("Failed to register misc device, misc_register() returned %d\n", err));
+	}
+
+	return err;
+}
+
+static void mali_miscdevice_unregister(void)
+{
+	misc_deregister(&mali_miscdevice);
+}
+
+static int mali_driver_suspend_scheduler(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(mdev->devfreq);
+#endif
+
+	mali_pm_os_suspend(MALI_TRUE);
+	/* Tracing the frequency and voltage after mali is suspended */
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+	return 0;
+}
+
+static int mali_driver_resume_scheduler(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+	mali_pm_os_resume();
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(mdev->devfreq);
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	if (MALI_TRUE == mali_pm_runtime_suspend()) {
+		/* Tracing the frequency and voltage after mali is suspended */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      0,
+					      0,
+					      0, 0, 0);
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+		MALI_DEBUG_PRINT(4, ("devfreq_suspend_device: stop devfreq monitor\n"));
+		devfreq_suspend_device(mdev->devfreq);
+#endif
+
+		return 0;
+	} else {
+		return -EBUSY;
+	}
+}
+
+static int mali_driver_runtime_resume(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	mali_pm_runtime_resume();
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	MALI_DEBUG_PRINT(4, ("devfreq_resume_device: start devfreq monitor\n"));
+	devfreq_resume_device(mdev->devfreq);
+#endif
+	return 0;
+}
+
+static int mali_driver_runtime_idle(struct device *dev)
+{
+	/* Nothing to do */
+	return 0;
+}
+#endif
+
+static int mali_open(struct inode *inode, struct file *filp)
+{
+	struct mali_session_data *session_data;
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_open() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	/* allocated struct to track this session */
+	err = _mali_ukk_open((void **)&session_data);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* initialize file pointer */
+	filp->f_pos = 0;
+
+	/* link in our session data */
+	filp->private_data = (void *)session_data;
+
+	filp->f_mapping = mali_mem_swap_get_global_swap_file()->f_mapping;
+
+	return 0;
+}
+
+static int mali_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_release() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	err = _mali_ukk_close((void **)&filp->private_data);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int map_errcode(_mali_osk_errcode_t err)
+{
+	switch (err) {
+	case _MALI_OSK_ERR_OK :
+		return 0;
+	case _MALI_OSK_ERR_FAULT:
+		return -EFAULT;
+	case _MALI_OSK_ERR_INVALID_FUNC:
+		return -ENOTTY;
+	case _MALI_OSK_ERR_INVALID_ARGS:
+		return -EINVAL;
+	case _MALI_OSK_ERR_NOMEM:
+		return -ENOMEM;
+	case _MALI_OSK_ERR_TIMEOUT:
+		return -ETIMEDOUT;
+	case _MALI_OSK_ERR_RESTARTSYSCALL:
+		return -ERESTARTSYS;
+	case _MALI_OSK_ERR_ITEM_NOT_FOUND:
+		return -ENOENT;
+	default:
+		return -EFAULT;
+	}
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int err;
+	struct mali_session_data *session_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	/* inode not used */
+	(void)inode;
+#endif
+
+	MALI_DEBUG_PRINT(7, ("Ioctl received 0x%08X 0x%08lX\n", cmd, arg));
+
+	session_data = (struct mali_session_data *)filp->private_data;
+	if (NULL == session_data) {
+		MALI_DEBUG_PRINT(7, ("filp->private_data was NULL\n"));
+		return -ENOTTY;
+	}
+
+	if (NULL == (void *)arg) {
+		MALI_DEBUG_PRINT(7, ("arg was NULL\n"));
+		return -ENOTTY;
+	}
+
+	switch (cmd) {
+	case MALI_IOC_WAIT_FOR_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_wait_for_notification_s), sizeof(u64)));
+		err = wait_for_notification_wrapper(session_data, (_mali_uk_wait_for_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION_V2:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_api_version_v2_s), sizeof(u64)));
+		err = get_api_version_v2_wrapper(session_data, (_mali_uk_get_api_version_v2_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION:
+		err = get_api_version_wrapper(session_data, (_mali_uk_get_api_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_POST_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_post_notification_s), sizeof(u64)));
+		err = post_notification_wrapper(session_data, (_mali_uk_post_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_USER_SETTINGS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_user_settings_s), sizeof(u64)));
+		err = get_user_settings_wrapper(session_data, (_mali_uk_get_user_settings_s __user *)arg);
+		break;
+
+	case MALI_IOC_REQUEST_HIGH_PRIORITY:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_request_high_priority_s), sizeof(u64)));
+		err = request_high_priority_wrapper(session_data, (_mali_uk_request_high_priority_s __user *)arg);
+		break;
+
+	case MALI_IOC_PENDING_SUBMIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pending_submit_s), sizeof(u64)));
+		err = pending_submit_wrapper(session_data, (_mali_uk_pending_submit_s __user *)arg);
+		break;
+
+#if defined(CONFIG_MALI400_PROFILING)
+	case MALI_IOC_PROFILING_ADD_EVENT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_add_event_s), sizeof(u64)));
+		err = profiling_add_event_wrapper(session_data, (_mali_uk_profiling_add_event_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_sw_counters_report_s), sizeof(u64)));
+		err = profiling_report_sw_counters_wrapper(session_data, (_mali_uk_sw_counters_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_STREAM_FD_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_stream_fd_get_s), sizeof(u64)));
+		err = profiling_get_stream_fd_wrapper(session_data, (_mali_uk_profiling_stream_fd_get_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROILING_CONTROL_SET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_control_set_s), sizeof(u64)));
+		err = profiling_control_set_wrapper(session_data, (_mali_uk_profiling_control_set_s __user *)arg);
+		break;
+#else
+
+	case MALI_IOC_PROFILING_ADD_EVENT:          /* FALL-THROUGH */
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS: /* FALL-THROUGH */
+		MALI_DEBUG_PRINT(2, ("Profiling not supported\n"));
+		err = -ENOTTY;
+		break;
+#endif
+
+	case MALI_IOC_PROFILING_MEMORY_USAGE_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_memory_usage_get_s), sizeof(u64)));
+		err = mem_usage_get_wrapper(session_data, (_mali_uk_profiling_memory_usage_get_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_ALLOC:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_alloc_mem_s), sizeof(u64)));
+		err = mem_alloc_wrapper(session_data, (_mali_uk_alloc_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_FREE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_free_mem_s), sizeof(u64)));
+		err = mem_free_wrapper(session_data, (_mali_uk_free_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_BIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_bind_mem_s), sizeof(u64)));
+		err = mem_bind_wrapper(session_data, (_mali_uk_bind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_UNBIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_unbind_mem_s), sizeof(u64)));
+		err = mem_unbind_wrapper(session_data, (_mali_uk_unbind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_COW:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_mem_s), sizeof(u64)));
+		err = mem_cow_wrapper(session_data, (_mali_uk_cow_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_COW_MODIFY_RANGE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_modify_range_s), sizeof(u64)));
+		err = mem_cow_modify_range_wrapper(session_data, (_mali_uk_cow_modify_range_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_RESIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_resize_s), sizeof(u64)));
+		err = mem_resize_mem_wrapper(session_data, (_mali_uk_mem_resize_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_WRITE_SAFE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_write_safe_s), sizeof(u64)));
+		err = mem_write_safe_wrapper(session_data, (_mali_uk_mem_write_safe_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_query_mmu_page_table_dump_size_s), sizeof(u64)));
+		err = mem_query_mmu_page_table_dump_size_wrapper(session_data, (_mali_uk_query_mmu_page_table_dump_size_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dump_mmu_page_table_s), sizeof(u64)));
+		err = mem_dump_mmu_page_table_wrapper(session_data, (_mali_uk_dump_mmu_page_table_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DMA_BUF_GET_SIZE:
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dma_buf_get_size_s), sizeof(u64)));
+		err = mali_dma_buf_get_size(session_data, (_mali_uk_dma_buf_get_size_s __user *)arg);
+#else
+		MALI_DEBUG_PRINT(2, ("DMA-BUF not supported\n"));
+		err = -ENOTTY;
+#endif
+		break;
+
+	case MALI_IOC_PP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_start_job_s), sizeof(u64)));
+		err = pp_start_job_wrapper(session_data, (_mali_uk_pp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_AND_GP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_and_gp_start_job_s), sizeof(u64)));
+		err = pp_and_gp_start_job_wrapper(session_data, (_mali_uk_pp_and_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_number_of_cores_s), sizeof(u64)));
+		err = pp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_pp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_core_version_s), sizeof(u64)));
+		err = pp_get_core_version_wrapper(session_data, (_mali_uk_get_pp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_DISABLE_WB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_disable_wb_s), sizeof(u64)));
+		err = pp_disable_wb_wrapper(session_data, (_mali_uk_pp_disable_wb_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_start_job_s), sizeof(u64)));
+		err = gp_start_job_wrapper(session_data, (_mali_uk_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_number_of_cores_s), sizeof(u64)));
+		err = gp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_gp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_core_version_s), sizeof(u64)));
+		err = gp_get_core_version_wrapper(session_data, (_mali_uk_get_gp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_SUSPEND_RESPONSE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_suspend_response_s), sizeof(u64)));
+		err = gp_suspend_response_wrapper(session_data, (_mali_uk_gp_suspend_response_s __user *)arg);
+		break;
+
+	case MALI_IOC_VSYNC_EVENT_REPORT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_vsync_event_report_s), sizeof(u64)));
+		err = vsync_event_report_wrapper(session_data, (_mali_uk_vsync_event_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_TIMELINE_GET_LATEST_POINT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_get_latest_point_s), sizeof(u64)));
+		err = timeline_get_latest_point_wrapper(session_data, (_mali_uk_timeline_get_latest_point_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_WAIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_wait_s), sizeof(u64)));
+		err = timeline_wait_wrapper(session_data, (_mali_uk_timeline_wait_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_CREATE_SYNC_FENCE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_create_sync_fence_s), sizeof(u64)));
+		err = timeline_create_sync_fence_wrapper(session_data, (_mali_uk_timeline_create_sync_fence_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_START:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_start_s), sizeof(u64)));
+		err = soft_job_start_wrapper(session_data, (_mali_uk_soft_job_start_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_SIGNAL:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_signal_s), sizeof(u64)));
+		err = soft_job_signal_wrapper(session_data, (_mali_uk_soft_job_signal_s __user *)arg);
+		break;
+
+	default:
+		MALI_DEBUG_PRINT(2, ("No handler for ioctl 0x%08X 0x%08lX\n", cmd, arg));
+		err = -ENOTTY;
+	};
+
+	return err;
+}
+
+
+module_init(mali_module_init);
+module_exit(mali_module_exit);
+
+MODULE_LICENSE(MALI_KERNEL_LINUX_LICENSE);
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(SVN_REV_STRING);
diff --git a/utgard/r6p1/linux/mali_kernel_linux.h b/utgard/r6p1/linux/mali_kernel_linux.h
new file mode 100755
index 0000000..e87879a
--- /dev/null
+++ b/utgard/r6p1/linux/mali_kernel_linux.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_LINUX_H__
+#define __MALI_KERNEL_LINUX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/idr.h>
+#include <linux/rbtree.h>
+#include "mali_kernel_license.h"
+#include "mali_osk_types.h"
+#include <linux/version.h>
+
+extern struct platform_device *mali_platform_device;
+
+/* After 3.19.0 kenrel droped CONFIG_PM_RUNTIME define,define by ourself */
+#if defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
+#define CONFIG_PM_RUNTIME 1
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/utgard/r6p1/linux/mali_kernel_sysfs.c b/utgard/r6p1/linux/mali_kernel_sysfs.c
new file mode 100755
index 0000000..9e00977
--- /dev/null
+++ b/utgard/r6p1/linux/mali_kernel_sysfs.c
@@ -0,0 +1,1410 @@
+/**
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+/**
+ * @file mali_kernel_sysfs.c
+ * Implementation of some sysfs data exports
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include "mali_kernel_license.h"
+#include "mali_kernel_common.h"
+#include "mali_ukk.h"
+
+#if MALI_LICENSE_IS_GPL
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_sysfs.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include <linux/slab.h>
+#include "mali_osk_profiling.h"
+#endif
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_kernel_core.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+
+#define PRIVATE_DATA_COUNTER_MAKE_GP(src) (src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP(src) ((1 << 24) | src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(src, sub_job) ((1 << 24) | (1 << 16) | (sub_job << 8) | src)
+#define PRIVATE_DATA_COUNTER_IS_PP(a) ((((a) >> 24) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SRC(a) (a & 0xFF)
+#define PRIVATE_DATA_COUNTER_IS_SUB_JOB(a) ((((a) >> 16) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SUB_JOB(a) (((a) >> 8) & 0xFF)
+
+#define POWER_BUFFER_SIZE 3
+
+static struct dentry *mali_debugfs_dir = NULL;
+
+typedef enum {
+	_MALI_DEVICE_SUSPEND,
+	_MALI_DEVICE_RESUME,
+	_MALI_DEVICE_DVFS_PAUSE,
+	_MALI_DEVICE_DVFS_RESUME,
+	_MALI_MAX_EVENTS
+} _mali_device_debug_power_events;
+
+static const char *const mali_power_events[_MALI_MAX_EVENTS] = {
+	[_MALI_DEVICE_SUSPEND] = "suspend",
+	[_MALI_DEVICE_RESUME] = "resume",
+	[_MALI_DEVICE_DVFS_PAUSE] = "dvfs_pause",
+	[_MALI_DEVICE_DVFS_RESUME] = "dvfs_resume",
+};
+
+static mali_bool power_always_on_enabled = MALI_FALSE;
+
+static int open_copy_private_data(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t group_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	r = snprintf(buffer, 64, "%u\n",
+		     mali_executor_group_is_disabled(group) ? 0 : 1);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static ssize_t group_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	unsigned long val;
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	r = kstrtoul(&buffer[0], 10, &val);
+	if (0 != r) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_group_enable(group);
+		break;
+	case 0:
+		mali_executor_group_disable(group);
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static const struct file_operations group_enabled_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = group_enabled_read,
+	.write = group_enabled_write,
+};
+
+static ssize_t hw_core_base_addr_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_hw_core *hw_core;
+
+	hw_core = (struct mali_hw_core *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(hw_core);
+
+	r = snprintf(buffer, 64, "0x%lX\n", hw_core->phys_addr);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations hw_core_base_addr_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = hw_core_base_addr_read,
+};
+
+static ssize_t profiling_counter_src_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	int r;
+	u32 val;
+
+	if (MALI_TRUE == is_pp) {
+		/* PP counter */
+		if (MALI_TRUE == is_sub_job) {
+			/* Get counter for a particular sub job */
+			if (0 == src_id) {
+				val = mali_pp_job_get_pp_counter_sub_job_src0(sub_job);
+			} else {
+				val = mali_pp_job_get_pp_counter_sub_job_src1(sub_job);
+			}
+		} else {
+			/* Get default counter for all PP sub jobs */
+			if (0 == src_id) {
+				val = mali_pp_job_get_pp_counter_global_src0();
+			} else {
+				val = mali_pp_job_get_pp_counter_global_src1();
+			}
+		}
+	} else {
+		/* GP counter */
+		if (0 == src_id) {
+			val = mali_gp_job_get_gp_counter_src0();
+		} else {
+			val = mali_gp_job_get_gp_counter_src1();
+		}
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == val) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_counter_src_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	if (MALI_TRUE == is_pp) {
+		/* PP counter */
+		if (MALI_TRUE == is_sub_job) {
+			/* Set counter for a particular sub job */
+			if (0 == src_id) {
+				mali_pp_job_set_pp_counter_sub_job_src0(sub_job, (u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_sub_job_src1(sub_job, (u32)val);
+			}
+		} else {
+			/* Set default counter for all PP sub jobs */
+			if (0 == src_id) {
+				mali_pp_job_set_pp_counter_global_src0((u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_global_src1((u32)val);
+			}
+		}
+	} else {
+		/* GP counter */
+		if (0 == src_id) {
+			mali_gp_job_set_gp_counter_src0((u32)val);
+		} else {
+			mali_gp_job_set_gp_counter_src1((u32)val);
+		}
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_counter_src_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = profiling_counter_src_read,
+	.write = profiling_counter_src_write,
+};
+
+static ssize_t l2_l2x_counter_srcx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 val;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	if (0 == src_id) {
+		val = mali_l2_cache_core_get_counter_src0(l2_core);
+	} else {
+		val = mali_l2_cache_core_get_counter_src1(l2_core);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == val) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	mali_l2_cache_core_set_counter_src(l2_core, src_id, (u32)val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_all_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	long val;
+	int ret;
+	u32 l2_id;
+	struct mali_l2_cache_core *l2_cache;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	l2_id = 0;
+	l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	while (NULL != l2_cache) {
+		mali_l2_cache_core_set_counter_src(l2_cache, src_id, (u32)val);
+
+		/* try next L2 */
+		l2_id++;
+		l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_l2x_counter_src0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_l2x_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_all_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_all_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src0_read,
+	.write = l2_l2x_counter_src0_write,
+};
+
+static const struct file_operations l2_l2x_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src1_read,
+	.write = l2_l2x_counter_src1_write,
+};
+
+static const struct file_operations l2_all_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src0_write,
+};
+
+static const struct file_operations l2_all_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src1_write,
+};
+
+static ssize_t l2_l2x_counter_valx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 src0 = 0;
+	u32 val0 = 0;
+	u32 src1 = 0;
+	u32 val1 = 0;
+	u32 val = -1;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	mali_l2_cache_core_get_counter_values(l2_core, &src0, &val0, &src1, &val1);
+
+	if (0 == src_id) {
+		if (MALI_HW_CORE_NO_COUNTER != val0) {
+			val = val0;
+		}
+	} else {
+		if (MALI_HW_CORE_NO_COUNTER != val1) {
+			val = val1;
+		}
+	}
+
+	r = snprintf(buf, 64, "%u\n", val);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_val0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_val1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_val0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val0_read,
+};
+
+static const struct file_operations l2_l2x_counter_val1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val1_read,
+};
+
+static ssize_t power_always_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (0 != ret) {
+		return ret;
+	}
+
+	/* Update setting (not exactly thread safe) */
+	if (1 == val && MALI_FALSE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_TRUE;
+		_mali_osk_pm_dev_ref_get_sync();
+	} else if (0 == val && MALI_TRUE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_FALSE;
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t power_always_on_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (MALI_TRUE == power_always_on_enabled) {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "1\n", 2);
+	} else {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "0\n", 2);
+	}
+}
+
+static const struct file_operations power_always_on_fops = {
+	.owner = THIS_MODULE,
+	.read  = power_always_on_read,
+	.write = power_always_on_write,
+};
+
+static ssize_t power_power_events_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_SUSPEND], strlen(mali_power_events[_MALI_DEVICE_SUSPEND]) - 1)) {
+		mali_pm_os_suspend(MALI_TRUE);
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_RESUME], strlen(mali_power_events[_MALI_DEVICE_RESUME]) - 1)) {
+		mali_pm_os_resume();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_PAUSE], strlen(mali_power_events[_MALI_DEVICE_DVFS_PAUSE]) - 1)) {
+		mali_dev_pause();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_RESUME], strlen(mali_power_events[_MALI_DEVICE_DVFS_RESUME]) - 1)) {
+		mali_dev_resume();
+	}
+	*ppos += cnt;
+	return cnt;
+}
+
+static loff_t power_power_events_seek(struct file *file, loff_t offset, int orig)
+{
+	file->f_pos = offset;
+	return 0;
+}
+
+static const struct file_operations power_power_events_fops = {
+	.owner = THIS_MODULE,
+	.write = power_power_events_write,
+	.llseek = power_power_events_seek,
+};
+
+#if MALI_STATE_TRACKING
+static int mali_seq_internal_state_show(struct seq_file *seq_file, void *v)
+{
+	u32 len = 0;
+	u32 size;
+	char *buf;
+
+	size = seq_get_buf(seq_file, &buf);
+
+	if (!size) {
+		return -ENOMEM;
+	}
+
+	/* Create the internal state dump. */
+	len  = snprintf(buf + len, size - len, "Mali device driver %s\n", SVN_REV_STRING);
+	len += snprintf(buf + len, size - len, "License: %s\n\n", MALI_KERNEL_LINUX_LICENSE);
+
+	len += _mali_kernel_core_dump_state(buf + len, size - len);
+
+	seq_commit(seq_file, len);
+
+	return 0;
+}
+
+static int mali_seq_internal_state_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mali_seq_internal_state_show, NULL);
+}
+
+static const struct file_operations mali_seq_internal_state_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_seq_internal_state_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif /* MALI_STATE_TRACKING */
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+static ssize_t profiling_record_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = snprintf(buf, 64, "%u\n", _mali_internal_profiling_is_recording() ? 1 : 0);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_record_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	unsigned long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val != 0) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* This can be made configurable at a later stage if we need to */
+
+		/* check if we are already recording */
+		if (MALI_TRUE == _mali_internal_profiling_is_recording()) {
+			MALI_DEBUG_PRINT(3, ("Recording of profiling events already in progress\n"));
+			return -EFAULT;
+		}
+
+		/* check if we need to clear out an old recording first */
+		if (MALI_TRUE == _mali_internal_profiling_have_recording()) {
+			if (_MALI_OSK_ERR_OK != _mali_internal_profiling_clear()) {
+				MALI_DEBUG_PRINT(3, ("Failed to clear existing recording of profiling events\n"));
+				return -EFAULT;
+			}
+		}
+
+		/* start recording profiling data */
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) {
+			MALI_DEBUG_PRINT(3, ("Failed to start recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Profiling recording started (max %u events)\n", limit));
+	} else {
+		/* stop recording profiling data */
+		u32 count = 0;
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_stop(&count)) {
+			MALI_DEBUG_PRINT(2, ("Failed to stop recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Profiling recording stopped (recorded %u events)\n", count));
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_record_fops = {
+	.owner = THIS_MODULE,
+	.read  = profiling_record_read,
+	.write = profiling_record_write,
+};
+
+static void *profiling_events_start(struct seq_file *s, loff_t *pos)
+{
+	loff_t *spos;
+
+	/* check if we have data avaiable */
+	if (MALI_TRUE != _mali_internal_profiling_have_recording()) {
+		return NULL;
+	}
+
+	spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
+	if (NULL == spos) {
+		return NULL;
+	}
+
+	*spos = *pos;
+	return spos;
+}
+
+static void *profiling_events_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	loff_t *spos = v;
+
+	/* check if we have data avaiable */
+	if (MALI_TRUE != _mali_internal_profiling_have_recording()) {
+		return NULL;
+	}
+
+	/* check if the next entry actually is avaiable */
+	if (_mali_internal_profiling_get_count() <= (u32)(*spos + 1)) {
+		return NULL;
+	}
+
+	*pos = ++*spos;
+	return spos;
+}
+
+static void profiling_events_stop(struct seq_file *s, void *v)
+{
+	kfree(v);
+}
+
+static int profiling_events_show(struct seq_file *seq_file, void *v)
+{
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) * spos;
+
+	/* Retrieve all events */
+	if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, &timestamp, &event_id, data)) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u\n", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int profiling_events_show_human_readable(struct seq_file *seq_file, void *v)
+{
+#define MALI_EVENT_ID_IS_HW(event_id) (((event_id & 0x00FF0000) >= MALI_PROFILING_EVENT_CHANNEL_GP0) && ((event_id & 0x00FF0000) <= MALI_PROFILING_EVENT_CHANNEL_PP7))
+
+	static u64 start_time = 0;
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) * spos;
+
+	/* Retrieve all events */
+	if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, &timestamp, &event_id, data)) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u # ", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+
+		if (0 == index) {
+			start_time = timestamp;
+		}
+
+		seq_printf(seq_file, "[%06u] ", index);
+
+		switch (event_id & 0x0F000000) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			seq_printf(seq_file, "SINGLE | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			seq_printf(seq_file, "START | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			seq_printf(seq_file, "STOP | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_SUSPEND:
+			seq_printf(seq_file, "SUSPEND | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_RESUME:
+			seq_printf(seq_file, "RESUME | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%01X | ", (event_id & 0x0F000000) >> 24);
+			break;
+		}
+
+		switch (event_id & 0x00FF0000) {
+		case MALI_PROFILING_EVENT_CHANNEL_SOFTWARE:
+			seq_printf(seq_file, "SW | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GP0:
+			seq_printf(seq_file, "GP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP0:
+			seq_printf(seq_file, "PP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP1:
+			seq_printf(seq_file, "PP1 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP2:
+			seq_printf(seq_file, "PP2 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP3:
+			seq_printf(seq_file, "PP3 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP4:
+			seq_printf(seq_file, "PP4 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP5:
+			seq_printf(seq_file, "PP5 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP6:
+			seq_printf(seq_file, "PP6 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP7:
+			seq_printf(seq_file, "PP7 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GPU:
+			seq_printf(seq_file, "GPU | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%02X | ", (event_id & 0x00FF0000) >> 16);
+			break;
+		}
+
+		if (MALI_EVENT_ID_IS_HW(event_id)) {
+			if (((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_START) || ((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_STOP)) {
+				switch (event_id & 0x0000FFFF) {
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL:
+					seq_printf(seq_file, "PHYSICAL | ");
+					break;
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL:
+					seq_printf(seq_file, "VIRTUAL | ");
+					break;
+				default:
+					seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+					break;
+				}
+			} else {
+				seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+			}
+		} else {
+			seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+		}
+
+		seq_printf(seq_file, "T0 + 0x%016llX\n", timestamp - start_time);
+
+		return 0;
+	}
+
+	return 0;
+}
+
+static const struct seq_operations profiling_events_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show
+};
+
+static int profiling_events_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_seq_ops);
+}
+
+static const struct file_operations profiling_events_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static const struct seq_operations profiling_events_human_readable_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show_human_readable
+};
+
+static int profiling_events_human_readable_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_human_readable_seq_ops);
+}
+
+static const struct file_operations profiling_events_human_readable_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_human_readable_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+static int memory_debugfs_show(struct seq_file *s, void *private_data)
+{
+#ifdef MALI_MEM_SWAP_TRACKING
+	seq_printf(s, "  %-25s  %-10s %-25s %-10s  %-15s  %-15s  %-10s  %-10s %-10s\n"\
+		   "=======================================================================================================================================\n",
+		   "Name (:bytes)", "pid", "pid-name", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem", "swap_mem");
+#else
+	seq_printf(s, "  %-25s  %-10s %-25s %-10s  %-15s  %-15s  %-10s  %-10s\n"\
+		   "=======================================================================================================================================\n",
+		   "Name (:bytes)", "pid", "pid-name", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem");
+#endif
+	mali_session_memory_tracking(s);
+	return 0;
+}
+
+static int memory_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, memory_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations memory_usage_fops = {
+	.owner = THIS_MODULE,
+	.open = memory_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static ssize_t utilization_gp_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_gp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+
+static const struct file_operations utilization_gp_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_pp_read,
+};
+
+static const struct file_operations utilization_gp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_read,
+};
+
+static const struct file_operations utilization_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_pp_read,
+};
+
+static ssize_t user_settings_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	_mali_uk_user_setting_t setting;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (0 != ret) {
+		return ret;
+	}
+
+	/* Update setting */
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	mali_set_user_setting(setting, val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t user_settings_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 value;
+	_mali_uk_user_setting_t setting;
+
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	value = mali_get_user_setting(setting);
+
+	r = snprintf(buf, 64, "%u\n", value);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static const struct file_operations user_settings_fops = {
+	.owner = THIS_MODULE,
+	.open = open_copy_private_data,
+	.read = user_settings_read,
+	.write = user_settings_write,
+};
+
+static int mali_sysfs_user_settings_register(void)
+{
+	struct dentry *mali_user_settings_dir = debugfs_create_dir("userspace_settings", mali_debugfs_dir);
+
+	if (mali_user_settings_dir != NULL) {
+		long i;
+		for (i = 0; i < _MALI_UK_USER_SETTING_MAX; i++) {
+			debugfs_create_file(_mali_uk_user_setting_descriptions[i],
+					    0600, mali_user_settings_dir, (void *)i,
+					    &user_settings_fops);
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t pp_num_cores_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (0 != ret) {
+		return -EINVAL;
+	}
+
+	ret = mali_executor_set_perf_level(val, MALI_TRUE); /* override even if core scaling is disabled */
+	if (ret) {
+		return ret;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_num_cores_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_enabled());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_num_cores_enabled_write,
+	.read = pp_num_cores_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t pp_num_cores_total_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_total());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_total_fops = {
+	.owner = THIS_MODULE,
+	.read = pp_num_cores_total_read,
+};
+
+static ssize_t pp_core_scaling_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (0 != ret) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_core_scaling_enable();
+		break;
+	case 0:
+		mali_executor_core_scaling_disable();
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_core_scaling_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	return simple_read_from_buffer(buf, count, offp, mali_executor_core_scaling_is_enabled() ? "1\n" : "0\n", 2);
+}
+static const struct file_operations pp_core_scaling_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_core_scaling_enabled_write,
+	.read = pp_core_scaling_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t version_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r = 0;
+	char buffer[64];
+
+	switch (mali_kernel_core_get_product_id()) {
+	case _MALI_PRODUCT_ID_MALI200:
+		r = snprintf(buffer, 64, "Mali-200\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI300:
+		r = snprintf(buffer, 64, "Mali-300\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI400:
+		r = snprintf(buffer, 64, "Mali-400 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI450:
+		r = snprintf(buffer, 64, "Mali-450 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI470:
+		r = snprintf(buffer, 64, "Mali-470 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_UNKNOWN:
+		return -EINVAL;
+		break;
+	};
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations version_fops = {
+	.owner = THIS_MODULE,
+	.read = version_read,
+};
+
+#if defined(DEBUG)
+static int timeline_debugfs_show(struct seq_file *s, void *private_data)
+{
+	struct mali_session_data *session, *tmp;
+	u32 session_seq = 1;
+
+	seq_printf(s, "timeline system info: \n=================\n\n");
+
+	mali_session_lock();
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		seq_printf(s, "session %d <%p> start:\n", session_seq, session);
+		mali_timeline_debug_print_system(session->timeline_system, s);
+		seq_printf(s, "session %d end\n\n\n", session_seq++);
+	}
+	mali_session_unlock();
+
+	return 0;
+}
+
+static int timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, timeline_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations timeline_dump_fops = {
+	.owner = THIS_MODULE,
+	.open = timeline_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release
+};
+#endif
+
+int mali_sysfs_register(const char *mali_dev_name)
+{
+	mali_debugfs_dir = debugfs_create_dir(mali_dev_name, NULL);
+	if (ERR_PTR(-ENODEV) == mali_debugfs_dir) {
+		/* Debugfs not supported. */
+		mali_debugfs_dir = NULL;
+	} else {
+		if (NULL != mali_debugfs_dir) {
+			/* Debugfs directory created successfully; create files now */
+			struct dentry *mali_power_dir;
+			struct dentry *mali_gp_dir;
+			struct dentry *mali_pp_dir;
+			struct dentry *mali_l2_dir;
+			struct dentry *mali_profiling_dir;
+
+			debugfs_create_file("version", 0400, mali_debugfs_dir, NULL, &version_fops);
+
+			mali_power_dir = debugfs_create_dir("power", mali_debugfs_dir);
+			if (mali_power_dir != NULL) {
+				debugfs_create_file("always_on", 0600, mali_power_dir, NULL, &power_always_on_fops);
+				debugfs_create_file("power_events", 0200, mali_power_dir, NULL, &power_power_events_fops);
+			}
+
+			mali_gp_dir = debugfs_create_dir("gp", mali_debugfs_dir);
+			if (mali_gp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+					if (NULL != gp_core) {
+						struct dentry *mali_gp_gpx_dir;
+						mali_gp_gpx_dir = debugfs_create_dir("gp0", mali_gp_dir);
+						if (NULL != mali_gp_gpx_dir) {
+							debugfs_create_file("base_addr", 0400, mali_gp_gpx_dir, &gp_core->hw_core, &hw_core_base_addr_fops);
+							debugfs_create_file("enabled", 0600, mali_gp_gpx_dir, group, &group_enabled_fops);
+						}
+						break; /* no need to look for any other GP cores */
+					}
+
+				}
+			}
+
+			mali_pp_dir = debugfs_create_dir("pp", mali_debugfs_dir);
+			if (mali_pp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				debugfs_create_file("num_cores_total", 0400, mali_pp_dir, NULL, &pp_num_cores_total_fops);
+				debugfs_create_file("num_cores_enabled", 0600, mali_pp_dir, NULL, &pp_num_cores_enabled_fops);
+				debugfs_create_file("core_scaling_enabled", 0600, mali_pp_dir, NULL, &pp_core_scaling_enabled_fops);
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+					if (NULL != pp_core) {
+						char buf[16];
+						struct dentry *mali_pp_ppx_dir;
+						_mali_osk_snprintf(buf, sizeof(buf), "pp%u", mali_pp_core_get_id(pp_core));
+						mali_pp_ppx_dir = debugfs_create_dir(buf, mali_pp_dir);
+						if (NULL != mali_pp_ppx_dir) {
+							debugfs_create_file("base_addr", 0400, mali_pp_ppx_dir, &pp_core->hw_core, &hw_core_base_addr_fops);
+							if (!mali_group_is_virtual(group)) {
+								debugfs_create_file("enabled", 0600, mali_pp_ppx_dir, group, &group_enabled_fops);
+							}
+						}
+					}
+				}
+			}
+
+			mali_l2_dir = debugfs_create_dir("l2", mali_debugfs_dir);
+			if (mali_l2_dir != NULL) {
+				struct dentry *mali_l2_all_dir;
+				u32 l2_id;
+				struct mali_l2_cache_core *l2_cache;
+
+				mali_l2_all_dir = debugfs_create_dir("all", mali_l2_dir);
+				if (mali_l2_all_dir != NULL) {
+					debugfs_create_file("counter_src0", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src0_fops);
+					debugfs_create_file("counter_src1", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src1_fops);
+				}
+
+				l2_id = 0;
+				l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				while (NULL != l2_cache) {
+					char buf[16];
+					struct dentry *mali_l2_l2x_dir;
+					_mali_osk_snprintf(buf, sizeof(buf), "l2%u", l2_id);
+					mali_l2_l2x_dir = debugfs_create_dir(buf, mali_l2_dir);
+					if (NULL != mali_l2_l2x_dir) {
+						debugfs_create_file("counter_src0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src0_fops);
+						debugfs_create_file("counter_src1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src1_fops);
+						debugfs_create_file("counter_val0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val0_fops);
+						debugfs_create_file("counter_val1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val1_fops);
+						debugfs_create_file("base_addr", 0400, mali_l2_l2x_dir, &l2_cache->hw_core, &hw_core_base_addr_fops);
+					}
+
+					/* try next L2 */
+					l2_id++;
+					l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				}
+			}
+
+			debugfs_create_file("gpu_memory", 0444, mali_debugfs_dir, NULL, &memory_usage_fops);
+
+			debugfs_create_file("utilization_gp_pp", 0400, mali_debugfs_dir, NULL, &utilization_gp_pp_fops);
+			debugfs_create_file("utilization_gp", 0400, mali_debugfs_dir, NULL, &utilization_gp_fops);
+			debugfs_create_file("utilization_pp", 0400, mali_debugfs_dir, NULL, &utilization_pp_fops);
+
+			mali_profiling_dir = debugfs_create_dir("profiling", mali_debugfs_dir);
+			if (mali_profiling_dir != NULL) {
+				u32 max_sub_jobs;
+				long i;
+				struct dentry *mali_profiling_gp_dir;
+				struct dentry *mali_profiling_pp_dir;
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				struct dentry *mali_profiling_proc_dir;
+#endif
+				/*
+				 * Create directory where we can set GP HW counters.
+				 */
+				mali_profiling_gp_dir = debugfs_create_dir("gp", mali_profiling_dir);
+				if (mali_profiling_gp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(1), &profiling_counter_src_fops);
+				}
+
+				/*
+				 * Create directory where we can set PP HW counters.
+				 * Possible override with specific HW counters for a particular sub job
+				 * (Disable core scaling before using the override!)
+				 */
+				mali_profiling_pp_dir = debugfs_create_dir("pp", mali_profiling_dir);
+				if (mali_profiling_pp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(1), &profiling_counter_src_fops);
+				}
+
+				max_sub_jobs = mali_executor_get_num_cores_total();
+				for (i = 0; i < max_sub_jobs; i++) {
+					char buf[16];
+					struct dentry *mali_profiling_pp_x_dir;
+					_mali_osk_snprintf(buf, sizeof(buf), "%u", i);
+					mali_profiling_pp_x_dir = debugfs_create_dir(buf, mali_profiling_pp_dir);
+					if (NULL != mali_profiling_pp_x_dir) {
+						debugfs_create_file("counter_src0",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(0, i),
+								    &profiling_counter_src_fops);
+						debugfs_create_file("counter_src1",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(1, i),
+								    &profiling_counter_src_fops);
+					}
+				}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				mali_profiling_proc_dir = debugfs_create_dir("proc", mali_profiling_dir);
+				if (mali_profiling_proc_dir != NULL) {
+					struct dentry *mali_profiling_proc_default_dir = debugfs_create_dir("default", mali_profiling_proc_dir);
+					if (mali_profiling_proc_default_dir != NULL) {
+						debugfs_create_file("enable", 0600, mali_profiling_proc_default_dir, (void *)_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, &user_settings_fops);
+					}
+				}
+				debugfs_create_file("record", 0600, mali_profiling_dir, NULL, &profiling_record_fops);
+				debugfs_create_file("events", 0400, mali_profiling_dir, NULL, &profiling_events_fops);
+				debugfs_create_file("events_human_readable", 0400, mali_profiling_dir, NULL, &profiling_events_human_readable_fops);
+#endif
+			}
+
+#if MALI_STATE_TRACKING
+			debugfs_create_file("state_dump", 0400, mali_debugfs_dir, NULL, &mali_seq_internal_state_fops);
+#endif
+
+#if defined(DEBUG)
+			debugfs_create_file("timeline_dump", 0400, mali_debugfs_dir, NULL, &timeline_dump_fops);
+#endif
+			if (mali_sysfs_user_settings_register()) {
+				/* Failed to create the debugfs entries for the user settings DB. */
+				MALI_DEBUG_PRINT(2, ("Failed to create user setting debugfs files. Ignoring...\n"));
+			}
+		}
+	}
+
+	/* Success! */
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	if (NULL != mali_debugfs_dir) {
+		debugfs_remove_recursive(mali_debugfs_dir);
+	}
+	return 0;
+}
+
+#else /* MALI_LICENSE_IS_GPL */
+
+/* Dummy implementations for non-GPL */
+
+int mali_sysfs_register(struct mali_dev *device, dev_t dev, const char *mali_dev_name)
+{
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	return 0;
+}
+
+#endif /* MALI_LICENSE_IS_GPL */
diff --git a/utgard/r6p1/linux/mali_kernel_sysfs.h b/utgard/r6p1/linux/mali_kernel_sysfs.h
new file mode 100755
index 0000000..9dad7f2
--- /dev/null
+++ b/utgard/r6p1/linux/mali_kernel_sysfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_SYSFS_H__
+#define __MALI_KERNEL_SYSFS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/device.h>
+
+#define MALI_PROC_DIR "driver/mali"
+
+int mali_sysfs_register(const char *mali_dev_name);
+int mali_sysfs_unregister(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/utgard/r6p1/linux/mali_linux_trace.h b/utgard/r6p1/linux/mali_linux_trace.h
new file mode 100755
index 0000000..2c91ddc
--- /dev/null
+++ b/utgard/r6p1/linux/mali_linux_trace.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#if !defined (MALI_LINUX_TRACE_H) || defined (TRACE_HEADER_MULTI_READ)
+#define MALI_LINUX_TRACE_H
+
+#include <linux/types.h>
+
+#include <linux/stringify.h>
+#include <linux/tracepoint.h>
+
+#undef  TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_SYSTEM_STRING __stringfy(TRACE_SYSTEM)
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+/**
+ * Define the tracepoint used to communicate the status of a GPU. Called
+ * when a GPU turns on or turns off.
+ *
+ * @param event_id The type of the event. This parameter is a bitfield
+ *  encoding the type of the event.
+ *
+ * @param d0 First data parameter.
+ * @param d1 Second data parameter.
+ * @param d2 Third data parameter.
+ * @param d3 Fourth data parameter.
+ * @param d4 Fifth data parameter.
+ */
+TRACE_EVENT(mali_timeline_event,
+
+	    TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1,
+		     unsigned int d2, unsigned int d3, unsigned int d4),
+
+	    TP_ARGS(event_id, d0, d1, d2, d3, d4),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, event_id)
+		    __field(unsigned int, d0)
+		    __field(unsigned int, d1)
+		    __field(unsigned int, d2)
+		    __field(unsigned int, d3)
+		    __field(unsigned int, d4)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->event_id = event_id;
+		    __entry->d0 = d0;
+		    __entry->d1 = d1;
+		    __entry->d2 = d2;
+		    __entry->d3 = d3;
+		    __entry->d4 = d4;
+	    ),
+
+	    TP_printk("event=%d", __entry->event_id)
+	   );
+
+/**
+ * Define a tracepoint used to regsiter the value of a hardware counter.
+ * Hardware counters belonging to the vertex or fragment processor are
+ * reported via this tracepoint each frame, whilst L2 cache hardware
+ * counters are reported continuously.
+ *
+ * @param counter_id The counter ID.
+ * @param value The value of the counter.
+ */
+TRACE_EVENT(mali_hw_counter,
+
+	    TP_PROTO(unsigned int counter_id, unsigned int value),
+
+	    TP_ARGS(counter_id, value),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, counter_id)
+		    __field(unsigned int, value)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->counter_id = counter_id;
+	    ),
+
+	    TP_printk("event %d = %d", __entry->counter_id, __entry->value)
+	   );
+
+/**
+ * Define a tracepoint used to send a bundle of software counters.
+ *
+ * @param counters The bundle of counters.
+ */
+TRACE_EVENT(mali_sw_counters,
+
+	    TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters),
+
+	    TP_ARGS(pid, tid, surface_id, counters),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(pid_t, tid)
+		    __field(void *, surface_id)
+		    __field(unsigned int *, counters)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->tid = tid;
+		    __entry->surface_id = surface_id;
+		    __entry->counters = counters;
+	    ),
+
+	    TP_printk("counters were %s", __entry->counters == NULL ? "NULL" : "not NULL")
+	   );
+
+/**
+ * Define a tracepoint used to gather core activity for systrace
+ * @param pid The process id for which the core activity originates from
+ * @param active If the core is active (1) or not (0)
+ * @param core_type The type of core active, either GP (1) or PP (0)
+ * @param core_id The core id that is active for the core_type
+ * @param frame_builder_id The frame builder id associated with this core activity
+ * @param flush_id The flush id associated with this core activity
+ */
+TRACE_EVENT(mali_core_active,
+
+	    TP_PROTO(pid_t pid, unsigned int active, unsigned int core_type, unsigned int core_id, unsigned int frame_builder_id, unsigned int flush_id),
+
+	    TP_ARGS(pid, active, core_type, core_id, frame_builder_id, flush_id),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(unsigned int, active)
+		    __field(unsigned int, core_type)
+		    __field(unsigned int, core_id)
+		    __field(unsigned int, frame_builder_id)
+		    __field(unsigned int, flush_id)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->active = active;
+		    __entry->core_type = core_type;
+		    __entry->core_id = core_id;
+		    __entry->frame_builder_id = frame_builder_id;
+		    __entry->flush_id = flush_id;
+	    ),
+
+	    TP_printk("%s|%d|%s%i:%x|%d", __entry->active ? "S" : "F", __entry->pid, __entry->core_type ? "GP" : "PP", __entry->core_id, __entry->flush_id, __entry->frame_builder_id)
+	   );
+
+#endif /* MALI_LINUX_TRACE_H */
+
+/* This part must exist outside the header guard. */
+#include <trace/define_trace.h>
+
diff --git a/utgard/r6p1/linux/mali_memory.c b/utgard/r6p1/linux/mali_memory.c
new file mode 100755
index 0000000..95f04ed
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory.c
@@ -0,0 +1,531 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_executor.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_util.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_swap_alloc.h"
+#include "mali_memory_defer_bind.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_secure.h"
+#endif
+
+extern unsigned int mali_dedicated_mem_size;
+extern unsigned int mali_shared_mem_size;
+
+#define MALI_VM_NUM_FAULT_PREFETCH (0x8)
+
+static void mali_mem_vma_open(struct vm_area_struct *vma)
+{
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+	MALI_DEBUG_PRINT(4, ("Open called on vma %p\n", vma));
+
+	/* If need to share the allocation, add ref_count here */
+	mali_allocation_ref(alloc);
+	return;
+}
+static void mali_mem_vma_close(struct vm_area_struct *vma)
+{
+	/* If need to share the allocation, unref ref_count here */
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+
+	mali_allocation_unref(&alloc);
+	vma->vm_private_data = NULL;
+}
+
+static int mali_mem_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret;
+	int prefetch_num = MALI_VM_NUM_FAULT_PREFETCH;
+
+	unsigned long address = (unsigned long)vmf->virtual_address;
+	MALI_DEBUG_ASSERT(alloc->backend_handle);
+	MALI_DEBUG_ASSERT((unsigned long)alloc->cpu_mapping.addr <= address);
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		return VM_FAULT_SIGBUS;
+	}
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(mem_bkend->type == alloc->type);
+
+	if ((mem_bkend->type == MALI_MEM_COW && (MALI_MEM_BACKEND_FLAG_SWAP_COWED !=
+			(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) &&
+	    (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE)) {
+		/*check if use page fault to do COW*/
+		MALI_DEBUG_PRINT(4, ("mali_vma_fault: do cow allocate on demand!, address=0x%x\n", address));
+		mutex_lock(&mem_bkend->mutex);
+		ret = mali_mem_cow_allocate_on_demand(mem_bkend,
+						      (address - vma->vm_start) / PAGE_SIZE);
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (ret != _MALI_OSK_ERR_OK) {
+			return VM_FAULT_OOM;
+		}
+		prefetch_num = 1;
+
+		/* handle COW modified range cpu mapping
+		 we zap the mapping in cow_modify_range, it will trigger page fault
+		 when CPU access it, so here we map it to CPU*/
+		mutex_lock(&mem_bkend->mutex);
+		ret = mali_mem_cow_cpu_map_pages_locked(mem_bkend, vma, address, prefetch_num);
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+			return VM_FAULT_SIGBUS;
+		}
+	} else if ((mem_bkend->type == MALI_MEM_SWAP) ||
+		   (mem_bkend->type == MALI_MEM_COW && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+		u32 offset_in_bkend = (address - vma->vm_start) / PAGE_SIZE;
+		int ret = _MALI_OSK_ERR_OK;
+
+		mutex_lock(&mem_bkend->mutex);
+		if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE) {
+			ret = mali_mem_swap_cow_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page);
+		} else {
+			ret = mali_mem_swap_allocate_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page);
+		}
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(2, ("Mali swap memory page fault process failed, address=0x%x\n", address));
+			return VM_FAULT_OOM;
+		} else {
+			return VM_FAULT_LOCKED;
+		}
+	} else {
+		MALI_PRINT_ERROR(("Mali vma fault! It never happen, indicating some logic errors in caller.\n"));
+		/*NOT support yet or OOM*/
+		return VM_FAULT_OOM;
+	}
+	return VM_FAULT_NOPAGE;
+}
+
+static struct vm_operations_struct mali_kernel_vm_ops = {
+	.open = mali_mem_vma_open,
+	.close = mali_mem_vma_close,
+	.fault = mali_mem_vma_fault,
+};
+
+
+/** @ map mali allocation to CPU address
+*
+* Supported backend types:
+* --MALI_MEM_OS
+* -- need to add COW?
+ *Not supported backend types:
+* -_MALI_MEMORY_BIND_BACKEND_UMP
+* -_MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* -_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+*
+*/
+int mali_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct mali_session_data *session;
+	mali_mem_allocation *mali_alloc = NULL;
+	u32 mali_addr = vma->vm_pgoff << PAGE_SHIFT;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret = -EFAULT;
+
+	session = (struct mali_session_data *)filp->private_data;
+	if (NULL == session) {
+		MALI_PRINT_ERROR(("mmap called without any session data available\n"));
+		return -EFAULT;
+	}
+
+	MALI_DEBUG_PRINT(4, ("MMap() handler: start=0x%08X, phys=0x%08X, size=0x%08X vma->flags 0x%08x\n",
+			     (unsigned int)vma->vm_start, (unsigned int)(vma->vm_pgoff << PAGE_SHIFT),
+			     (unsigned int)(vma->vm_end - vma->vm_start), vma->vm_flags));
+
+	/* Operations used on any memory system */
+	/* do not need to anything in vm open/close now */
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		if (unlikely(mali_addr != mali_vma_node->vm_node.start)) {
+			/* only allow to use start address for mmap */
+			MALI_DEBUG_PRINT(1, ("mali_addr != mali_vma_node->vm_node.start\n"));
+			return -EFAULT;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(NULL == mali_vma_node);
+		return -EFAULT;
+	}
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	if (mali_alloc->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) {
+		MALI_DEBUG_PRINT(1, ("ERROR : trying to access varying memory by CPU!\n"));
+		return -EFAULT;
+	}
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		return -EFAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+
+	if (!(MALI_MEM_SWAP == mali_alloc->type ||
+	      (MALI_MEM_COW == mali_alloc->type && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) {
+		/* Set some bits which indicate that, the memory is IO memory, meaning
+		 * that no paging is to be performed and the memory should not be
+		 * included in crash dumps. And that the memory is reserved, meaning
+		 * that it's present and can never be paged out (see also previous
+		 * entry)
+		 */
+		vma->vm_flags |= VM_IO;
+		vma->vm_flags |= VM_DONTCOPY;
+		vma->vm_flags |= VM_PFNMAP;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+		vma->vm_flags |= VM_RESERVED;
+#else
+		vma->vm_flags |= VM_DONTDUMP;
+		vma->vm_flags |= VM_DONTEXPAND;
+#endif
+	} else if (MALI_MEM_SWAP == mali_alloc->type) {
+		vma->vm_pgoff = mem_bkend->start_idx;
+	}
+
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	vma->vm_ops = &mali_kernel_vm_ops;
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	/* If it's a copy-on-write mapping, map to read only */
+	if (!(vma->vm_flags & VM_WRITE)) {
+		MALI_DEBUG_PRINT(4, ("mmap allocation with read only !\n"));
+		/* add VM_WRITE for do_page_fault will check this when a write fault */
+		vma->vm_flags |= VM_WRITE | VM_READ;
+		vma->vm_page_prot = PAGE_READONLY;
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE;
+		goto out;
+	}
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		ret = mali_mem_os_cpu_map(mem_bkend, vma);
+	} else if (mem_bkend->type == MALI_MEM_COW &&
+		   (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+		ret = mali_mem_cow_cpu_map(mem_bkend, vma);
+	} else if (mem_bkend->type == MALI_MEM_BLOCK) {
+		ret = mali_mem_block_cpu_map(mem_bkend, vma);
+	} else if ((mem_bkend->type == MALI_MEM_SWAP) || (mem_bkend->type == MALI_MEM_COW &&
+			(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) {
+		/*For swappable memory, CPU page table will be created by page fault handler. */
+		ret = 0;
+	} else if (mem_bkend->type == MALI_MEM_SECURE) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		ret = mali_mem_secure_cpu_map(mem_bkend, vma);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n"));
+		return -EFAULT;
+#endif
+	} else {
+		/* Not support yet*/
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of backend memory! \n"));
+		return -EFAULT;
+	}
+
+	if (ret != 0) {
+		MALI_DEBUG_PRINT(1, ("ret != 0\n"));
+		return -EFAULT;
+	}
+out:
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == mali_alloc->magic);
+
+	vma->vm_private_data = (void *)mali_alloc;
+	mali_alloc->cpu_mapping.vma = vma;
+
+	mali_allocation_ref(mali_alloc);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor)
+{
+	u32 size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic);
+
+	/* Map dma-buf into this session's page tables */
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start, size);
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size)
+{
+	u32 old_size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic);
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		new_size  += MALI_MMU_PAGE_SIZE;
+	}
+
+	if (new_size > old_size) {
+		MALI_DEBUG_ASSERT(new_size <= descriptor->mali_vma_node.vm_node.size);
+		return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start + old_size, new_size - old_size);
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags)
+{
+	if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	/* Umap and flush L2 */
+	mali_mmu_pagedir_unmap(session->page_directory, vaddr, size);
+	mali_executor_zap_all_active(session);
+}
+
+u32 _mali_ukk_report_memory_usage(void)
+{
+	u32 sum = 0;
+
+	if (MALI_TRUE == mali_memory_have_dedicated_memory()) {
+		sum += mali_mem_block_allocator_stat();
+	}
+
+	sum += mali_mem_os_stat();
+
+	return sum;
+}
+
+u32 _mali_ukk_report_total_memory_size(void)
+{
+	return mali_dedicated_mem_size + mali_shared_mem_size;
+}
+
+
+/**
+ * Per-session memory descriptor mapping table sizes
+ */
+#define MALI_MEM_DESCRIPTORS_INIT 64
+#define MALI_MEM_DESCRIPTORS_MAX 65536
+
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session_data)
+{
+	MALI_DEBUG_PRINT(5, ("Memory session begin\n"));
+
+	session_data->memory_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+				    _MALI_OSK_LOCK_ORDER_MEM_SESSION);
+
+	if (NULL == session_data->memory_lock) {
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	session_data->cow_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0);
+	if (NULL == session_data->cow_lock) {
+		_mali_osk_mutex_term(session_data->memory_lock);
+		_mali_osk_free(session_data);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_memory_manager_init(&session_data->allocation_mgr);
+
+	MALI_DEBUG_PRINT(5, ("MMU session begin: success\n"));
+	MALI_SUCCESS;
+}
+
+void mali_memory_session_end(struct mali_session_data *session)
+{
+	MALI_DEBUG_PRINT(3, ("MMU session end\n"));
+
+	if (NULL == session) {
+		MALI_DEBUG_PRINT(1, ("No session data found during session end\n"));
+		return;
+	}
+	/* free allocation */
+	mali_free_session_allocations(session);
+	/* do some check in unint*/
+	mali_memory_manager_uninit(&session->allocation_mgr);
+
+	/* Free the lock */
+	_mali_osk_mutex_term(session->memory_lock);
+	_mali_osk_mutex_term(session->cow_lock);
+	return;
+}
+
+_mali_osk_errcode_t mali_memory_initialize(void)
+{
+	_mali_osk_errcode_t err;
+
+	idr_init(&mali_backend_idr);
+	mutex_init(&mali_idr_mutex);
+
+	err = mali_mem_swap_init();
+	if (err != _MALI_OSK_ERR_OK) {
+		return err;
+	}
+	err = mali_mem_os_init();
+	if (_MALI_OSK_ERR_OK == err) {
+		err = mali_mem_defer_bind_manager_init();
+	}
+
+	return err;
+}
+
+void mali_memory_terminate(void)
+{
+	mali_mem_swap_term();
+	mali_mem_defer_bind_manager_destory();
+	mali_mem_os_term();
+	if (mali_memory_have_dedicated_memory()) {
+		mali_mem_block_allocator_destroy();
+	}
+}
+
+
+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type)
+{
+	mali_page_node *page_node = NULL;
+
+	page_node = kzalloc(sizeof(mali_page_node), GFP_KERNEL);
+	MALI_DEBUG_ASSERT(NULL != page_node);
+
+	if (page_node) {
+		page_node->type = type;
+		INIT_LIST_HEAD(&page_node->list);
+	}
+
+	return page_node;
+}
+
+void _mali_page_node_ref(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* add ref to this page */
+		get_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_add_ref(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		atomic_inc(&node->swap_it->ref_count);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+}
+
+void _mali_page_node_unref(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* unref to this page */
+		put_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_dec_ref(node);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+}
+
+
+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_OS == node->type);
+	node->page = page;
+}
+
+
+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_SWAP == node->type);
+	node->swap_it = item;
+}
+
+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_BLOCK == node->type);
+	node->blk_it = item;
+}
+
+
+int _mali_page_node_get_ref_count(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* get ref count of this page */
+		return page_count(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_get_ref_count(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return atomic_read(&node->swap_it->ref_count);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+	return -1;
+}
+
+
+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return page_private(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return _mali_blk_item_get_phy_addr(node->blk_it);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return node->swap_it->dma_addr;
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+	return 0;
+}
+
+
+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return page_to_pfn(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		/* get phy addr for BLOCK page*/
+		return _mali_blk_item_get_pfn(node->blk_it);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return page_to_pfn(node->swap_it->page);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+	return 0;
+}
+
+
diff --git a/utgard/r6p1/linux/mali_memory.h b/utgard/r6p1/linux/mali_memory.h
new file mode 100755
index 0000000..e5e4f66
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_H__
+#define __MALI_MEMORY_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+
+_mali_osk_errcode_t mali_memory_initialize(void);
+void mali_memory_terminate(void);
+
+/** @brief Allocate a page table page
+ *
+ * Allocate a page for use as a page directory or page table. The page is
+ * mapped into kernel space.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise an error code
+ * @param table_page GPU pointer to the allocated page
+ * @param mapping CPU pointer to the mapping of the allocated page
+ */
+MALI_STATIC_INLINE _mali_osk_errcode_t
+mali_mmu_get_table_page(mali_dma_addr *table_page, mali_io_address *mapping)
+{
+	return mali_mem_os_get_table_page(table_page, mapping);
+}
+
+/** @brief Release a page table page
+ *
+ * Release a page table page allocated through \a mali_mmu_get_table_page
+ *
+ * @param pa the GPU address of the page to release
+ */
+MALI_STATIC_INLINE void
+mali_mmu_release_table_page(mali_dma_addr phys, void *virt)
+{
+	mali_mem_os_release_table_page(phys, virt);
+}
+
+/** @brief mmap function
+ *
+ * mmap syscalls on the Mali device node will end up here.
+ *
+ * This function allocates Mali memory and maps it on CPU and Mali.
+ */
+int mali_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/** @brief Start a new memory session
+ *
+ * Called when a process opens the Mali device node.
+ *
+ * @param session Pointer to session to initialize
+ */
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session);
+
+/** @brief Close a memory session
+ *
+ * Called when a process closes the Mali device node.
+ *
+ * Memory allocated by the session will be freed
+ *
+ * @param session Pointer to the session to terminate
+ */
+void mali_memory_session_end(struct mali_session_data *session);
+
+/** @brief Prepare Mali page tables for mapping
+ *
+ * This function will prepare the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ */
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor);
+
+/** @brief Resize Mali page tables for mapping
+ *
+ * This function will Resize the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ * @param new_size The new size of descriptor
+ */
+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size);
+
+/** @brief Free Mali page tables for mapping
+ *
+ * This function will unmap pages from Mali memory and free the page tables
+ * that are now unused.
+ *
+ * The updated pages in the Mali L2 cache will be invalidated, and the MMU TLBs will be zapped if necessary.
+ *
+ * @param descriptor Pointer to the memory descriptor to unmap
+ */
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags);
+
+/** @brief Parse resource and prepare the OS memory allocator
+ *
+ * @param size Maximum size to allocate for Mali GPU.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size);
+
+/** @brief Parse resource and prepare the dedicated memory allocator
+ *
+ * @param start Physical start address of dedicated Mali GPU memory.
+ * @param size Size of dedicated Mali GPU memory.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+
+
+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type);
+
+void _mali_page_node_ref(struct mali_page_node *node);
+void _mali_page_node_unref(struct mali_page_node *node);
+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page);
+
+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item);
+
+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item);
+
+int _mali_page_node_get_ref_count(struct mali_page_node *node);
+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node);
+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node);
+
+#endif /* __MALI_MEMORY_H__ */
diff --git a/utgard/r6p1/linux/mali_memory_block_alloc.c b/utgard/r6p1/linux/mali_memory_block_alloc.c
new file mode 100755
index 0000000..af3dd4d
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_block_alloc.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_memory.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_osk.h"
+#include <linux/mutex.h>
+
+
+static mali_block_allocator *mali_mem_block_gobal_allocator = NULL;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item)
+{
+	return (item->phy_addr & ~(MALI_BLOCK_REF_MASK));
+}
+
+
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item)
+{
+	return (item->phy_addr / MALI_BLOCK_SIZE);
+}
+
+
+u32 mali_mem_block_get_ref_count(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	return (node->blk_it->phy_addr & MALI_BLOCK_REF_MASK);
+}
+
+
+/* Increase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+
+u32 mali_mem_block_add_ref(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) < MALI_BLOCK_MAX_REF_COUNT);
+	return (node->blk_it->phy_addr++ & MALI_BLOCK_REF_MASK);
+}
+
+/* Decase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+u32 mali_mem_block_dec_ref(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) > 0);
+	return (node->blk_it->phy_addr-- & MALI_BLOCK_REF_MASK);
+}
+
+
+static mali_block_allocator *mali_mem_block_allocator_create(u32 base_address, u32 size)
+{
+	mali_block_allocator *info;
+	u32 usable_size;
+	u32 num_blocks;
+	mali_page_node *m_node;
+	mali_block_item *mali_blk_items = NULL;
+	int i = 0;
+
+	usable_size = size & ~(MALI_BLOCK_SIZE - 1);
+	MALI_DEBUG_PRINT(3, ("Mali block allocator create for region starting at 0x%08X length 0x%08X\n", base_address, size));
+	MALI_DEBUG_PRINT(4, ("%d usable bytes\n", usable_size));
+	num_blocks = usable_size / MALI_BLOCK_SIZE;
+	MALI_DEBUG_PRINT(4, ("which becomes %d blocks\n", num_blocks));
+
+	if (usable_size == 0) {
+		MALI_DEBUG_PRINT(1, ("Memory block of size %d is unusable\n", size));
+		return NULL;
+	}
+
+	info = _mali_osk_calloc(1, sizeof(mali_block_allocator));
+	if (NULL != info) {
+		INIT_LIST_HEAD(&info->free);
+		spin_lock_init(&info->sp_lock);
+		info->total_num = num_blocks;
+		mali_blk_items = _mali_osk_calloc(1, sizeof(mali_block_item) * num_blocks);
+
+		if (mali_blk_items) {
+			info->items = mali_blk_items;
+			/* add blocks(4k size) to free list*/
+			for (i = 0 ; i < num_blocks ; i++) {
+				/* add block information*/
+				mali_blk_items[i].phy_addr = base_address + (i * MALI_BLOCK_SIZE);
+				/* add  to free list */
+				m_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK);
+				if (m_node == NULL)
+					goto fail;
+				_mali_page_node_add_block_item(m_node, &(mali_blk_items[i]));
+				list_add_tail(&m_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			}
+			return info;
+		}
+	}
+fail:
+	mali_mem_block_allocator_destroy();
+	return NULL;
+}
+
+void mali_mem_block_allocator_destroy(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(info);
+	MALI_DEBUG_PRINT(4, ("Memory block destroy !\n"));
+
+	if (NULL == info)
+		return;
+
+	list_for_each_entry_safe(m_page, m_tmp , &info->free, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		list_del(&m_page->list);
+		kfree(m_page);
+	}
+
+	_mali_osk_free(info->items);
+	_mali_osk_free(info);
+}
+
+u32 mali_mem_block_release(mali_mem_backend *mem_bkend)
+{
+	mali_mem_allocation *alloc = mem_bkend->mali_allocation;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_block_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	free_pages_nr = mali_mem_block_free(&mem_bkend->block_mem);
+	mutex_unlock(&mem_bkend->mutex);
+	return free_pages_nr;
+}
+
+
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem: Allocate size = 0x%x\n", size));
+	/*do some init */
+	INIT_LIST_HEAD(&block_mem->pfns);
+
+	spin_lock(&info->sp_lock);
+	/*check if have enough space*/
+	if (atomic_read(&info->free_num) > page_count) {
+		list_for_each_entry_safe(m_page, m_tmp , &info->free, list) {
+			if (page_count > 0) {
+				MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+				MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(m_page) == 0);
+				list_move(&m_page->list, &block_mem->pfns);
+				block_mem->count++;
+				atomic_dec(&info->free_num);
+				_mali_page_node_ref(m_page);
+			} else {
+				break;
+			}
+			page_count--;
+		}
+	} else {
+		/* can't allocate from BLOCK memory*/
+		spin_unlock(&info->sp_lock);
+		return -1;
+	}
+
+	spin_unlock(&info->sp_lock);
+	return 0;
+}
+
+u32 mali_mem_block_free(mali_mem_block_mem *block_mem)
+{
+	u32 free_pages_nr = 0;
+
+	free_pages_nr = mali_mem_block_free_list(&block_mem->pfns);
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem free : allocated size = 0x%x, free size = 0x%x\n", block_mem->count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+	block_mem->count = 0;
+	MALI_DEBUG_ASSERT(list_empty(&block_mem->pfns));
+
+	return free_pages_nr;
+}
+
+
+u32 mali_mem_block_free_list(struct list_head *list)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	u32 free_pages_nr = 0;
+
+	if (info) {
+		spin_lock(&info->sp_lock);
+		list_for_each_entry_safe(m_page, m_tmp , list, list) {
+			if (1 == _mali_page_node_get_ref_count(m_page)) {
+				free_pages_nr++;
+			}
+			mali_mem_block_free_node(m_page);
+		}
+		spin_unlock(&info->sp_lock);
+	}
+	return free_pages_nr;
+}
+
+/* free the node,*/
+void mali_mem_block_free_node(struct mali_page_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (1 == _mali_page_node_get_ref_count(node)) {
+			/*Move to free list*/
+			_mali_page_node_unref(node);
+			list_move_tail(&node->list, &info->free);
+			atomic_add(1, &info->free_num);
+		} else {
+			_mali_page_node_unref(node);
+			list_del(&node->list);
+			kfree(node);
+		}
+	}
+}
+
+/* unref the node, but not free it */
+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	mali_page_node *new_node;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (1 == _mali_page_node_get_ref_count(node)) {
+			/* allocate a  new node, Add to free list, keep the old node*/
+			_mali_page_node_unref(node);
+			new_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK);
+			if (new_node) {
+				memcpy(new_node, node, sizeof(mali_page_node));
+				list_add(&new_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			} else
+				return _MALI_OSK_ERR_FAULT;
+
+		} else {
+			_mali_page_node_unref(node);
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		phys = _mali_page_node_get_dma_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+		/* Verify that the "physical" address is 32-bit and
+		 * usable for Mali, when on a system with bus addresses
+		 * wider than 32-bit. */
+		MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+		mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	int ret;
+	mali_mem_block_mem *block_mem = &mem_bkend->block_mem;
+	unsigned long addr = vma->vm_start;
+	struct mali_page_node *m_page;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		ret = vm_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page));
+
+		if (unlikely(0 != ret)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size)
+{
+	mali_block_allocator *allocator;
+
+	/* Do the low level linux operation first */
+
+	/* Request ownership of the memory */
+	if (_MALI_OSK_ERR_OK != _mali_osk_mem_reqregion(start, size, "Dedicated Mali GPU memory")) {
+		MALI_DEBUG_PRINT(1, ("Failed to request memory region for frame buffer (0x%08X - 0x%08X)\n", start, start + size - 1));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create generic block allocator object to handle it */
+	allocator = mali_mem_block_allocator_create(start, size);
+
+	if (NULL == allocator) {
+		MALI_DEBUG_PRINT(1, ("Memory bank registration failed\n"));
+		_mali_osk_mem_unreqregion(start, size);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_mem_block_gobal_allocator = (mali_block_allocator *)allocator;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool mali_memory_have_dedicated_memory(void)
+{
+	return mali_mem_block_gobal_allocator ? MALI_TRUE : MALI_FALSE;
+}
+
+u32 mali_mem_block_allocator_stat(void)
+{
+	mali_block_allocator *allocator = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(allocator);
+
+
+	return (allocator->total_num - atomic_read(&allocator->free_num)) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/utgard/r6p1/linux/mali_memory_block_alloc.h b/utgard/r6p1/linux/mali_memory_block_alloc.h
new file mode 100755
index 0000000..3e11ef2
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_block_alloc.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2010, 2013, 2015-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BLOCK_ALLOCATOR_H__
+#define __MALI_BLOCK_ALLOCATOR_H__
+
+#include "mali_session.h"
+#include "mali_memory.h"
+#include <linux/spinlock.h>
+
+#include "mali_memory_types.h"
+
+#define MALI_BLOCK_SIZE (PAGE_SIZE)  /* 4 kB, manage BLOCK memory as page size */
+#define MALI_BLOCK_REF_MASK (0xFFF)
+#define MALI_BLOCK_MAX_REF_COUNT (0xFFF)
+
+
+
+typedef struct mali_block_allocator {
+	/*
+	* In free list, each node's ref_count is 0,
+	* ref_count added when allocated or referenced in COW
+	*/
+	mali_block_item *items; /* information for each block item*/
+	struct list_head free; /*free list of mali_memory_node*/
+	spinlock_t sp_lock; /*lock for reference count & free list opertion*/
+	u32 total_num; /* Number of total pages*/
+	atomic_t free_num; /*number of free pages*/
+} mali_block_allocator;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item);
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item);
+u32 mali_mem_block_get_ref_count(mali_page_node *node);
+u32 mali_mem_block_add_ref(mali_page_node *node);
+u32 mali_mem_block_dec_ref(mali_page_node *node);
+u32 mali_mem_block_release(mali_mem_backend *mem_bkend);
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size);
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+mali_bool mali_memory_have_dedicated_memory(void);
+u32 mali_mem_block_free(mali_mem_block_mem *block_mem);
+u32 mali_mem_block_free_list(struct list_head *list);
+void mali_mem_block_free_node(struct mali_page_node *node);
+void mali_mem_block_allocator_destroy(void);
+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node);
+u32 mali_mem_block_allocator_stat(void);
+
+#endif /* __MALI_BLOCK_ALLOCATOR_H__ */
diff --git a/utgard/r6p1/linux/mali_memory_cow.c b/utgard/r6p1/linux/mali_memory_cow.c
new file mode 100644
index 0000000..0a11f4b
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_cow.c
@@ -0,0 +1,776 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#ifdef CONFIG_ARM
+#include <asm/outercache.h>
+#endif
+#include <asm/dma-mapping.h>
+
+#include "mali_memory.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_swap_alloc.h"
+
+/**
+* allocate pages for COW backend and flush cache
+*/
+static struct page *mali_mem_cow_alloc_page(void)
+
+{
+	mali_mem_os_mem os_mem;
+	struct mali_page_node *node;
+	struct page *new_page;
+
+	int ret = 0;
+	/* allocate pages from os mem */
+	ret = mali_mem_os_alloc_pages(&os_mem, _MALI_OSK_MALI_PAGE_SIZE);
+
+	if (ret) {
+		return NULL;
+	}
+
+	MALI_DEBUG_ASSERT(1 == os_mem.count);
+
+	node = _MALI_OSK_CONTAINER_OF(os_mem.pages.next, struct mali_page_node, list);
+	new_page = node->page;
+	node->page = NULL;
+	list_del(&node->list);
+	kfree(node);
+
+	return new_page;
+}
+
+
+static struct list_head *_mali_memory_cow_get_node_list(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size)
+{
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == target_bk->type || MALI_MEM_COW == target_bk->type ||
+			  MALI_MEM_BLOCK == target_bk->type || MALI_MEM_SWAP == target_bk->type);
+
+	if (MALI_MEM_OS == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->os_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->os_mem.count);
+		return &target_bk->os_mem.pages;
+	} else if (MALI_MEM_COW == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->cow_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->cow_mem.count);
+		return  &target_bk->cow_mem.pages;
+	} else if (MALI_MEM_BLOCK == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->block_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->block_mem.count);
+		return  &target_bk->block_mem.pfns;
+	} else if (MALI_MEM_SWAP == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->swap_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->swap_mem.count);
+		return  &target_bk->swap_mem.pages;
+	}
+
+	return NULL;
+}
+
+/**
+* Do COW for os memory - support do COW for memory from bank memory
+* The range_start/size can be zero, which means it will call cow_modify_range
+* latter.
+* This function allocate new pages for COW backend from os mem for a modified range
+* It will keep the page which not in the modified range and Add ref to it
+*
+* @target_bk - target allocation's backend(the allocation need to do COW)
+* @target_offset - the offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, 4K align)
+* @target_size - size of target allocation to do COW (for support memory bank)
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range
+*/
+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp, *page_node;
+	int target_page = 0;
+	struct page *new_page;
+	struct list_head *pages = NULL;
+
+	pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size);
+
+	if (NULL == pages) {
+		MALI_DEBUG_PRINT_ERROR(("No memory page  need to cow ! \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(0 == cow->count);
+
+	INIT_LIST_HEAD(&cow->pages);
+	mutex_lock(&target_bk->mutex);
+	list_for_each_entry_safe(m_page, m_tmp, pages, list) {
+		/* add page from (target_offset,target_offset+size) to cow backend */
+		if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) {
+
+			/* allocate a new page node, alway use OS memory for COW */
+			page_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+
+			if (NULL == page_node) {
+				mutex_unlock(&target_bk->mutex);
+				goto error;
+			}
+
+			INIT_LIST_HEAD(&page_node->list);
+
+			/* check if in the modified range*/
+			if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+			    (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+				/* need to allocate a new page */
+				/* To simplify the case, All COW memory is allocated from os memory ?*/
+				new_page = mali_mem_cow_alloc_page();
+
+				if (NULL == new_page) {
+					kfree(page_node);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				_mali_page_node_add_page(page_node, new_page);
+			} else {
+				/*Add Block memory case*/
+				if (m_page->type != MALI_PAGE_NODE_BLOCK) {
+					_mali_page_node_add_page(page_node, m_page->page);
+				} else {
+					page_node->type = MALI_PAGE_NODE_BLOCK;
+					_mali_page_node_add_block_item(page_node, m_page->blk_it);
+				}
+
+				/* add ref to this page */
+				_mali_page_node_ref(m_page);
+			}
+
+			/* add it to COW backend page list */
+			list_add_tail(&page_node->list, &cow->pages);
+			cow->count++;
+		}
+		target_page++;
+	}
+	mutex_unlock(&target_bk->mutex);
+	return _MALI_OSK_ERR_OK;
+error:
+	mali_mem_cow_release(backend, MALI_FALSE);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_memory_cow_swap_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp, *page_node;
+	int target_page = 0;
+	struct mali_swap_item *swap_item;
+	struct list_head *pages = NULL;
+
+	pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size);
+	if (NULL == pages) {
+		MALI_DEBUG_PRINT_ERROR(("No swap memory page need to cow ! \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(0 == cow->count);
+
+	INIT_LIST_HEAD(&cow->pages);
+	mutex_lock(&target_bk->mutex);
+
+	backend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN;
+
+	list_for_each_entry_safe(m_page, m_tmp, pages, list) {
+		/* add page from (target_offset,target_offset+size) to cow backend */
+		if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) {
+
+			/* allocate a new page node, use swap memory for COW memory swap cowed flag. */
+			page_node = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP);
+
+			if (NULL == page_node) {
+				mutex_unlock(&target_bk->mutex);
+				goto error;
+			}
+
+			/* check if in the modified range*/
+			if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+			    (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+				/* need to allocate a new page */
+				/* To simplify the case, All COW memory is allocated from os memory ?*/
+				swap_item = mali_mem_swap_alloc_swap_item();
+
+				if (NULL == swap_item) {
+					kfree(page_node);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				swap_item->idx = mali_mem_swap_idx_alloc();
+
+				if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) {
+					MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW.\n"));
+					kfree(page_node);
+					kfree(swap_item);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				_mali_page_node_add_swap_item(page_node, swap_item);
+			} else {
+				_mali_page_node_add_swap_item(page_node, m_page->swap_it);
+
+				/* add ref to this page */
+				_mali_page_node_ref(m_page);
+			}
+
+			list_add_tail(&page_node->list, &cow->pages);
+			cow->count++;
+		}
+		target_page++;
+	}
+	mutex_unlock(&target_bk->mutex);
+
+	return _MALI_OSK_ERR_OK;
+error:
+	mali_mem_swap_release(backend, MALI_FALSE);
+	return _MALI_OSK_ERR_FAULT;
+
+}
+
+
+_mali_osk_errcode_t _mali_mem_put_page_node(mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return mali_mem_os_put_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_unref_node(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return _mali_mem_swap_put_page_node(node);
+	} else
+		MALI_DEBUG_ASSERT(0);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/**
+* Modify a range of a exist COW backend
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range(in byte)
+*/
+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_allocation *alloc = NULL;
+	struct mali_session_data *session;
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp;
+	LIST_HEAD(pages);
+	struct page *new_page;
+	u32 count = 0;
+	s32 change_pages_nr = 0;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+
+	if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	alloc = backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type);
+	MALI_DEBUG_ASSERT(((range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE) <= cow->count);
+
+	mutex_lock(&backend->mutex);
+
+	/* free pages*/
+	list_for_each_entry_safe(m_page, m_tmp, &cow->pages, list) {
+
+		/* check if in the modified range*/
+		if ((count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+			if (MALI_PAGE_NODE_SWAP != m_page->type) {
+				new_page = mali_mem_cow_alloc_page();
+
+				if (NULL == new_page) {
+					goto error;
+				}
+				if (1 != _mali_page_node_get_ref_count(m_page))
+					change_pages_nr++;
+				/* unref old page*/
+				_mali_osk_mutex_wait(session->cow_lock);
+				if (_mali_mem_put_page_node(m_page)) {
+					__free_page(new_page);
+					_mali_osk_mutex_signal(session->cow_lock);
+					goto error;
+				}
+				_mali_osk_mutex_signal(session->cow_lock);
+				/* add new page*/
+				/* always use OS for COW*/
+				m_page->type = MALI_PAGE_NODE_OS;
+				_mali_page_node_add_page(m_page, new_page);
+			} else {
+				struct mali_swap_item *swap_item;
+
+				swap_item = mali_mem_swap_alloc_swap_item();
+
+				if (NULL == swap_item) {
+					goto error;
+				}
+
+				swap_item->idx = mali_mem_swap_idx_alloc();
+
+				if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) {
+					MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW modify range.\n"));
+					kfree(swap_item);
+					goto error;
+				}
+
+				if (1 != _mali_page_node_get_ref_count(m_page)) {
+					change_pages_nr++;
+				}
+
+				if (_mali_mem_put_page_node(m_page)) {
+					mali_mem_swap_free_swap_item(swap_item);
+					goto error;
+				}
+
+				_mali_page_node_add_swap_item(m_page, swap_item);
+			}
+		}
+		count++;
+	}
+	cow->change_pages_nr  = change_pages_nr;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == alloc->type);
+
+	/* ZAP cpu mapping(modified range), and do cpu mapping here if need */
+	if (NULL != alloc->cpu_mapping.vma) {
+		MALI_DEBUG_ASSERT(0 != alloc->backend_handle);
+		MALI_DEBUG_ASSERT(NULL != alloc->cpu_mapping.vma);
+		MALI_DEBUG_ASSERT(alloc->cpu_mapping.vma->vm_end - alloc->cpu_mapping.vma->vm_start >= range_size);
+
+		if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+			zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size);
+
+			ret = mali_mem_cow_cpu_map_pages_locked(backend, alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start  + range_start, range_size / _MALI_OSK_MALI_PAGE_SIZE);
+
+			if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+				MALI_DEBUG_PRINT(2, ("mali_memory_cow_modify_range: cpu mapping failed !\n"));
+				ret =  _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			/* used to trigger page fault for swappable cowed memory. */
+			alloc->cpu_mapping.vma->vm_flags |= VM_PFNMAP;
+			alloc->cpu_mapping.vma->vm_flags |= VM_MIXEDMAP;
+
+			zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size);
+			/* delete this flag to let swappble is ummapped regard to stauct page not page frame. */
+			alloc->cpu_mapping.vma->vm_flags &= ~VM_PFNMAP;
+			alloc->cpu_mapping.vma->vm_flags &= ~VM_MIXEDMAP;
+		}
+	}
+
+error:
+	mutex_unlock(&backend->mutex);
+	return ret;
+
+}
+
+
+/**
+* Allocate pages for COW backend
+* @alloc  -allocation for COW allocation
+* @target_bk - target allocation's backend(the allocation need to do COW)
+* @target_offset - the offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, 4K align)
+* @target_size - size of target allocation to do COW (for support memory bank)(in byte)
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range(in byte)
+*/
+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk,
+				       u32 target_offset,
+				       u32 target_size,
+				       mali_mem_backend *backend,
+				       u32 range_start,
+				       u32 range_size)
+{
+	struct mali_session_data *session = backend->mali_allocation->session;
+
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size & offset must be a multiple of the system page size */
+	if (target_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (target_offset % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check backend type */
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type);
+
+	switch (target_bk->type) {
+	case MALI_MEM_OS:
+	case MALI_MEM_BLOCK:
+		return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		break;
+	case MALI_MEM_COW:
+		if (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) {
+			return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		} else {
+			return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		}
+		break;
+	case MALI_MEM_SWAP:
+		return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		break;
+	case MALI_MEM_EXTERNAL:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("External physical memory not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	case MALI_MEM_DMA_BUF:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("DMA buffer not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	case MALI_MEM_UMP:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("UMP buffer not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	default:
+		/*Not support yet*/
+		MALI_DEBUG_PRINT_ERROR(("Invalid memory type not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Map COW backend memory to mali
+* Support OS/BLOCK for mali_page_node
+*/
+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size)
+{
+	mali_mem_allocation *cow_alloc;
+	struct mali_page_node *m_page;
+	struct mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	u32 virt, start;
+
+	cow_alloc = mem_bkend->mali_allocation;
+	virt = cow_alloc->mali_vma_node.vm_node.start;
+	start = virt;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT_POINTER(cow_alloc);
+
+	session = cow_alloc->session;
+	pagedir = session->page_directory;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+	list_for_each_entry(m_page, &mem_bkend->cow_mem.pages, list) {
+		if ((virt - start >= range_start) && (virt - start < range_start + range_size)) {
+			dma_addr_t phys = _mali_page_node_get_dma_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+			mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys,
+						MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+	return 0;
+}
+
+/**
+* Map COW backend to cpu
+* support OS/BLOCK memory
+*/
+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+
+	list_for_each_entry(m_page, &cow->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		ret = vm_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page));
+
+		if (unlikely(0 != ret)) {
+			return ret;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+/**
+* Map some pages(COW backend) to CPU vma@vaddr
+*@ mem_bkend - COW backend
+*@ vma
+*@ vaddr -start CPU vaddr mapped to
+*@ num - max number of pages to map to CPU vaddr
+*/
+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend,
+		struct vm_area_struct *vma,
+		unsigned long vaddr,
+		int num)
+{
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	int offset;
+	int count ;
+	unsigned long vstart = vma->vm_start;
+	count = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+	MALI_DEBUG_ASSERT(0 == vaddr % _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE);
+	offset = (vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE;
+
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if ((count >= offset) && (count < offset + num)) {
+			ret = vm_insert_pfn(vma, vaddr, _mali_page_node_get_pfn(m_page));
+
+			if (unlikely(0 != ret)) {
+				if (count == offset) {
+					return _MALI_OSK_ERR_FAULT;
+				} else {
+					/* ret is EBUSY when page isn't in modify range, but now it's OK*/
+					return _MALI_OSK_ERR_OK;
+				}
+			}
+			vaddr += _MALI_OSK_MALI_PAGE_SIZE;
+		}
+		count++;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+* Release COW backend memory
+* free it directly(put_page--unref page), not put into pool
+*/
+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped)
+{
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags)) {
+		/* Unmap the memory from the mali virtual address space. */
+		if (MALI_TRUE == is_mali_mapped)
+			mali_mem_os_mali_unmap(alloc);
+		/* free cow backend list*/
+		_mali_osk_mutex_wait(session->cow_lock);
+		free_pages_nr = mali_mem_os_free(&mem_bkend->cow_mem.pages, mem_bkend->cow_mem.count, MALI_TRUE);
+		_mali_osk_mutex_signal(session->cow_lock);
+
+		free_pages_nr += mali_mem_block_free_list(&mem_bkend->cow_mem.pages);
+
+		MALI_DEBUG_ASSERT(list_empty(&mem_bkend->cow_mem.pages));
+	} else {
+		free_pages_nr = mali_mem_swap_release(mem_bkend, is_mali_mapped);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("COW Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->cow_mem.count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+
+	mem_bkend->cow_mem.count = 0;
+	return free_pages_nr;
+}
+
+
+/* Dst node could os node or swap node. */
+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node)
+{
+	void *dst, *src;
+	struct page *dst_page;
+	dma_addr_t dma_addr;
+
+	MALI_DEBUG_ASSERT(src_node != NULL);
+	MALI_DEBUG_ASSERT(dst_node != NULL);
+	MALI_DEBUG_ASSERT(dst_node->type == MALI_PAGE_NODE_OS
+			  || dst_node->type == MALI_PAGE_NODE_SWAP);
+
+	if (dst_node->type == MALI_PAGE_NODE_OS) {
+		dst_page = dst_node->page;
+	} else {
+		dst_page = dst_node->swap_it->page;
+	}
+
+	dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(dst_node),
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* map it , and copy the content*/
+	dst = kmap_atomic(dst_page);
+
+	if (src_node->type == MALI_PAGE_NODE_OS ||
+	    src_node->type == MALI_PAGE_NODE_SWAP) {
+		struct page *src_page;
+
+		if (src_node->type == MALI_PAGE_NODE_OS) {
+			src_page = src_node->page;
+		} else {
+			src_page = src_node->swap_it->page;
+		}
+
+		/* Clear and invaliate cache */
+		/* In ARM architecture, speculative read may pull stale data into L1 cache
+		 * for kernel linear mapping page table. DMA_BIDIRECTIONAL could
+		 * invalidate the L1 cache so that following read get the latest data
+		*/
+		dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(src_node),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		src = kmap_atomic(src_page);
+		memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE);
+		kunmap_atomic(src);
+		dma_addr = dma_map_page(&mali_platform_device->dev, src_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		if (src_node->type == MALI_PAGE_NODE_SWAP) {
+			src_node->swap_it->dma_addr = dma_addr;
+		}
+	} else if (src_node->type == MALI_PAGE_NODE_BLOCK) {
+		/*
+		* use ioremap to map src for BLOCK memory
+		*/
+		src = ioremap_nocache(_mali_page_node_get_dma_addr(src_node), _MALI_OSK_MALI_PAGE_SIZE);
+		memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE);
+		iounmap(src);
+	}
+	kunmap_atomic(dst);
+	dma_addr = dma_map_page(&mali_platform_device->dev, dst_page,
+				0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	if (dst_node->type == MALI_PAGE_NODE_SWAP) {
+		dst_node->swap_it->dma_addr = dma_addr;
+	}
+}
+
+
+/*
+* allocate page on demand when CPU access it,
+* THis used in page fault handler
+*/
+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page)
+{
+	struct page *new_page = NULL;
+	struct mali_page_node *new_node = NULL;
+	int i = 0;
+	struct mali_page_node *m_page, *found_node = NULL;
+	struct  mali_session_data *session = NULL;
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT(offset_page < mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_PRINT(4, ("mali_mem_cow_allocate_on_demand !, offset_page =0x%x\n", offset_page));
+
+	/* allocate new page here */
+	new_page = mali_mem_cow_alloc_page();
+	if (!new_page)
+		return _MALI_OSK_ERR_NOMEM;
+
+	new_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+	if (!new_node) {
+		__free_page(new_page);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	/* find the page in backend*/
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if (i == offset_page) {
+			found_node = m_page;
+			break;
+		}
+		i++;
+	}
+	MALI_DEBUG_ASSERT(found_node);
+	if (NULL == found_node) {
+		__free_page(new_page);
+		kfree(new_node);
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	_mali_page_node_add_page(new_node, new_page);
+
+	/* Copy the src page's content to new page */
+	_mali_mem_cow_copy_page(found_node, new_node);
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend->mali_allocation);
+	session = mem_bkend->mali_allocation->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	if (1 != _mali_page_node_get_ref_count(found_node)) {
+		atomic_add(1, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		mem_bkend->cow_mem.change_pages_nr++;
+	}
+
+	_mali_osk_mutex_wait(session->cow_lock);
+	if (_mali_mem_put_page_node(found_node)) {
+		__free_page(new_page);
+		kfree(new_node);
+		_mali_osk_mutex_signal(session->cow_lock);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	_mali_osk_mutex_signal(session->cow_lock);
+
+	list_replace(&found_node->list, &new_node->list);
+
+	kfree(found_node);
+
+	/* map to GPU side*/
+	_mali_osk_mutex_wait(session->memory_lock);
+	mali_mem_cow_mali_map(mem_bkend, offset_page * _MALI_OSK_MALI_PAGE_SIZE, _MALI_OSK_MALI_PAGE_SIZE);
+	_mali_osk_mutex_signal(session->memory_lock);
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p1/linux/mali_memory_cow.h b/utgard/r6p1/linux/mali_memory_cow.h
new file mode 100644
index 0000000..9b9e834
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_cow.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_COW_H__
+#define __MALI_MEMORY_COW_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include "mali_memory_types.h"
+
+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend,
+		struct vm_area_struct *vma,
+		unsigned long vaddr,
+		int num);
+
+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk,
+				       u32 target_offset,
+				       u32 target_size,
+				       mali_mem_backend *backend,
+				       u32 range_start,
+				       u32 range_size);
+
+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size);
+
+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size);
+
+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node);
+
+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size);
+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped);
+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page);
+#endif
+
diff --git a/utgard/r6p1/linux/mali_memory_defer_bind.c b/utgard/r6p1/linux/mali_memory_defer_bind.c
new file mode 100644
index 0000000..6d84558
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_defer_bind.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#ifdef CONFIG_ARM
+#include <asm/outercache.h>
+#endif
+#include <asm/dma-mapping.h>
+
+#include "mali_memory.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory_defer_bind.h"
+#include "mali_executor.h"
+#include "mali_osk.h"
+#include "mali_scheduler.h"
+#include "mali_gp_job.h"
+
+mali_defer_bind_manager *mali_dmem_man = NULL;
+
+static u32 mali_dmem_get_gp_varying_size(struct mali_gp_job *gp_job)
+{
+	return gp_job->required_varying_memsize / _MALI_OSK_MALI_PAGE_SIZE;
+}
+
+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void)
+{
+	mali_dmem_man = _mali_osk_calloc(1, sizeof(struct mali_defer_bind_manager));
+	if (!mali_dmem_man)
+		return _MALI_OSK_ERR_NOMEM;
+
+	atomic_set(&mali_dmem_man->num_used_pages, 0);
+	atomic_set(&mali_dmem_man->num_dmem, 0);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_mem_defer_bind_manager_destory(void)
+{
+	if (mali_dmem_man) {
+		MALI_DEBUG_ASSERT(0 == atomic_read(&mali_dmem_man->num_dmem));
+		kfree(mali_dmem_man);
+	}
+	mali_dmem_man = NULL;
+}
+
+
+/*allocate pages from OS memory*/
+_mali_osk_errcode_t mali_mem_defer_alloc_mem(u32 require, struct mali_session_data *session, mali_defer_mem_block *dblock)
+{
+	int retval = 0;
+	u32 num_pages = require;
+	mali_mem_os_mem os_mem;
+
+	retval = mali_mem_os_alloc_pages(&os_mem, num_pages * _MALI_OSK_MALI_PAGE_SIZE);
+
+	/* add to free pages list */
+	if (0 == retval) {
+		MALI_DEBUG_PRINT(4, ("mali_mem_defer_alloc_mem ,,*** pages allocate = 0x%x \n", num_pages));
+		list_splice(&os_mem.pages, &dblock->free_pages);
+		atomic_add(os_mem.count, &dblock->num_free_pages);
+		atomic_add(os_mem.count, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		return _MALI_OSK_ERR_OK;
+	} else
+		return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock)
+{
+	u32 require_page;
+
+	if (!next_gp_job)
+		return _MALI_OSK_ERR_FAULT;
+
+	require_page = mali_dmem_get_gp_varying_size(next_gp_job);
+
+	MALI_DEBUG_PRINT(4, ("mali_mem_defer_prepare_mem_work, require alloc page 0x%x\n",
+			     require_page));
+	/* allocate more pages from OS */
+	if (_MALI_OSK_ERR_OK != mali_mem_defer_alloc_mem(require_page, next_gp_job->session, dblock)) {
+		MALI_DEBUG_PRINT(1, ("ERROR##mali_mem_defer_prepare_mem_work, allocate page failed!!"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	next_gp_job->bind_flag = MALI_DEFER_BIND_MEMORY_PREPARED;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* do preparetion for allocation before defer bind */
+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list, u32 *required_varying_memsize)
+{
+	mali_mem_backend *mem_bkend = NULL;
+	struct mali_backend_bind_list *bk_list = _mali_osk_calloc(1, sizeof(struct mali_backend_bind_list));
+	if (NULL == bk_list)
+		return _MALI_OSK_ERR_FAULT;
+
+	INIT_LIST_HEAD(&bk_list->node);
+	/* Get backend memory */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in defer bind!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		_mali_osk_free(bk_list);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+
+	/* If the mem backend has already been bound, no need to bind again.*/
+	if (mem_bkend->os_mem.count > 0) {
+		_mali_osk_free(bk_list);
+		return _MALI_OSK_ERR_OK;
+	}
+
+	MALI_DEBUG_PRINT(4, ("bind_allocation_prepare:: allocation =%x vaddr=0x%x!\n", alloc, alloc->mali_vma_node.vm_node.start));
+
+	INIT_LIST_HEAD(&mem_bkend->os_mem.pages);
+
+	bk_list->bkend = mem_bkend;
+	bk_list->vaddr = alloc->mali_vma_node.vm_node.start;
+	bk_list->session = alloc->session;
+	bk_list->page_num = mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE;
+	*required_varying_memsize +=  mem_bkend->size;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+
+	/* add to job to do list */
+	list_add(&bk_list->node, list);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+
+/* bind phyiscal memory to allocation
+This function will be called in IRQ handler*/
+static _mali_osk_errcode_t mali_mem_defer_bind_allocation(struct mali_backend_bind_list *bk_node,
+		struct list_head *pages)
+{
+	struct mali_session_data *session = bk_node->session;
+	mali_mem_backend *mem_bkend = bk_node->bkend;
+	MALI_DEBUG_PRINT(4, ("mali_mem_defer_bind_allocation, bind bkend = %x page num=0x%x vaddr=%x session=%x\n", mem_bkend, bk_node->page_num, bk_node->vaddr, session));
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+	list_splice(pages, &mem_bkend->os_mem.pages);
+	mem_bkend->os_mem.count = bk_node->page_num;
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		mali_mem_os_mali_map(&mem_bkend->os_mem, session, bk_node->vaddr, 0,
+				     mem_bkend->os_mem.count, MALI_MMU_FLAGS_DEFAULT);
+	}
+	smp_wmb();
+	bk_node->flag = MALI_DEFER_BIND_MEMORY_BINDED;
+	mem_bkend->flags &= ~MALI_MEM_BACKEND_FLAG_NOT_BINDED;
+	mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_BINDED;
+	return _MALI_OSK_ERR_OK;
+}
+
+
+static struct list_head *mali_mem_defer_get_free_page_list(u32 count, struct list_head *pages, mali_defer_mem_block *dblock)
+{
+	int i = 0;
+	struct mali_page_node *m_page, *m_tmp;
+
+	if (atomic_read(&dblock->num_free_pages) < count) {
+		return NULL;
+	} else {
+		list_for_each_entry_safe(m_page, m_tmp, &dblock->free_pages, list) {
+			if (i < count) {
+				list_move_tail(&m_page->list, pages);
+			} else {
+				break;
+			}
+			i++;
+		}
+		MALI_DEBUG_ASSERT(i == count);
+		atomic_sub(count, &dblock->num_free_pages);
+		return pages;
+	}
+}
+
+
+/* called in job start IOCTL to bind physical memory for each allocations
+@ bk_list backend list to do defer bind
+@ pages page list to do this bind
+@ count number of pages
+*/
+_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp,
+					struct mali_defer_mem_block *dmem_block)
+{
+	struct mali_defer_mem *dmem = NULL;
+	struct mali_backend_bind_list *bkn, *bkn_tmp;
+	LIST_HEAD(pages);
+
+	if (gp->required_varying_memsize != (atomic_read(&dmem_block->num_free_pages) * _MALI_OSK_MALI_PAGE_SIZE)) {
+		MALI_DEBUG_PRINT_ERROR(("#BIND:  The memsize of varying buffer not match to the pagesize of the dmem_block!!## \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_PRINT(4, ("#BIND: GP job=%x## \n", gp));
+	dmem = (mali_defer_mem *)_mali_osk_calloc(1, sizeof(struct mali_defer_mem));
+	if (dmem) {
+		INIT_LIST_HEAD(&dmem->node);
+		gp->dmem = dmem;
+	} else {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	atomic_add(1, &mali_dmem_man->num_dmem);
+	/* for each bk_list backend, do bind */
+	list_for_each_entry_safe(bkn, bkn_tmp , &gp->vary_todo, node) {
+		INIT_LIST_HEAD(&pages);
+		if (likely(mali_mem_defer_get_free_page_list(bkn->page_num, &pages, dmem_block))) {
+			list_del(&bkn->node);
+			mali_mem_defer_bind_allocation(bkn, &pages);
+			_mali_osk_free(bkn);
+		} else {
+			/* not enough memory will not happen */
+			MALI_DEBUG_PRINT_ERROR(("#BIND: NOT enough memory when binded !!## \n"));
+			_mali_osk_free(gp->dmem);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+	}
+
+	if (!list_empty(&gp->vary_todo)) {
+		MALI_DEBUG_PRINT_ERROR(("#BIND:  The deferbind backend list isn't empty !!## \n"));
+		_mali_osk_free(gp->dmem);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	dmem->flag = MALI_DEFER_BIND_MEMORY_BINDED;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_defer_dmem_free(struct mali_gp_job *gp)
+{
+	if (gp->dmem) {
+		atomic_dec(&mali_dmem_man->num_dmem);
+		_mali_osk_free(gp->dmem);
+	}
+}
+
diff --git a/utgard/r6p1/linux/mali_memory_defer_bind.h b/utgard/r6p1/linux/mali_memory_defer_bind.h
new file mode 100644
index 0000000..98f6beb
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_defer_bind.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_MEMORY_DEFER_BIND_H_
+#define __MALI_MEMORY_DEFER_BIND_H_
+
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_gp_job;
+
+typedef struct mali_defer_mem {
+	struct list_head node;   /*dlist node in bind manager */
+	u32 flag;
+} mali_defer_mem;
+
+
+typedef struct mali_defer_mem_block {
+	struct list_head free_pages; /* page pool */
+	atomic_t num_free_pages;
+} mali_defer_mem_block;
+
+/* varying memory list need to bind */
+typedef struct mali_backend_bind_list {
+	struct list_head node;
+	struct mali_mem_backend *bkend;
+	u32 vaddr;
+	u32 page_num;
+	struct mali_session_data *session;
+	u32 flag;
+} mali_backend_bind_lists;
+
+
+typedef struct mali_defer_bind_manager {
+	atomic_t num_used_pages;
+	atomic_t num_dmem;
+} mali_defer_bind_manager;
+
+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void);
+void mali_mem_defer_bind_manager_destory(void);
+_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp, struct mali_defer_mem_block *dmem_block);
+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list,  u32 *required_varying_memsize);
+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock);
+void mali_mem_defer_dmem_free(struct mali_gp_job *gp);
+
+#endif
diff --git a/utgard/r6p1/linux/mali_memory_dma_buf.c b/utgard/r6p1/linux/mali_memory_dma_buf.c
new file mode 100755
index 0000000..f0d1e07
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_dma_buf.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/fs.h>      /* file system operations */
+#include <asm/uaccess.h>        /* user space access */
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+#include <linux/rbtree.h>
+#include <linux/platform_device.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_virtual.h"
+#include "mali_pp_job.h"
+
+/*
+ * Map DMA buf attachment \a mem into \a session at virtual address \a virt.
+ */
+static int mali_dma_buf_map(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_dma_buf_attachment *mem;
+	struct  mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	struct scatterlist *sg;
+	u32 virt, flags;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(mem->session == session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	flags = alloc->flags;
+
+	mali_session_memory_lock(session);
+	mem->map_ref++;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: map attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (1 == mem->map_ref) {
+
+		/* First reference taken, so we need to map the dma buf */
+		MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+		mem->sgt = dma_buf_map_attachment(mem->attachment, DMA_BIDIRECTIONAL);
+		if (IS_ERR_OR_NULL(mem->sgt)) {
+			MALI_DEBUG_PRINT_ERROR(("Failed to map dma-buf attachment\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -EFAULT;
+		}
+
+		err = mali_mem_mali_map_prepare(alloc);
+		if (_MALI_OSK_ERR_OK != err) {
+			MALI_DEBUG_PRINT(1, ("Mapping of DMA memory failed\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -ENOMEM;
+		}
+
+		pagedir = mali_session_get_page_directory(session);
+		MALI_DEBUG_ASSERT_POINTER(pagedir);
+
+		for_each_sg(mem->sgt->sgl, sg, mem->sgt->nents, i) {
+			u32 size = sg_dma_len(sg);
+			dma_addr_t phys = sg_dma_address(sg);
+
+			/* sg must be page aligned. */
+			MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE);
+			MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF));
+
+			mali_mmu_pagedir_update(pagedir, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+			virt += size;
+		}
+
+		if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+			u32 guard_phys;
+			MALI_DEBUG_PRINT(7, ("Mapping in extra guard page\n"));
+
+			guard_phys = sg_dma_address(mem->sgt->sgl);
+			mali_mmu_pagedir_update(pagedir, virt, guard_phys, MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+
+		mem->is_mapped = MALI_TRUE;
+		mali_session_memory_unlock(session);
+		/* Wake up any thread waiting for buffer to become mapped */
+		wake_up_all(&mem->wait_queue);
+	} else {
+		MALI_DEBUG_ASSERT(mem->is_mapped);
+		mali_session_memory_unlock(session);
+	}
+
+	return 0;
+}
+
+static void mali_dma_buf_unmap(mali_mem_allocation *alloc, struct mali_dma_buf_attachment *mem)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+
+	mali_session_memory_lock(alloc->session);
+	mem->map_ref--;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: unmap attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (0 == mem->map_ref) {
+		dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL);
+		if (MALI_TRUE == mem->is_mapped) {
+			mali_mem_mali_map_free(alloc->session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+					       alloc->flags);
+		}
+		mem->is_mapped = MALI_FALSE;
+	}
+	mali_session_memory_unlock(alloc->session);
+	/* Wake up any thread waiting for buffer to become unmapped */
+	wake_up_all(&mem->wait_queue);
+}
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	_mali_osk_errcode_t err;
+	int i;
+	int ret = 0;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+		if (MALI_MEM_DMA_BUF != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+
+		err = mali_dma_buf_map(mem_bkend);
+		if (0 != err) {
+			MALI_DEBUG_PRINT_ERROR(("Mali DMA-buf: Failed to map dma-buf for mali address %x\n", mali_addr));
+			ret = -EFAULT;
+			continue;
+		}
+	}
+	return ret;
+}
+
+void mali_dma_buf_unmap_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	int i;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+		if (MALI_MEM_DMA_BUF != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+		mali_dma_buf_unmap(mem_bkend->mali_allocation, mem);
+	}
+}
+#endif /* !CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH */
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *user_arg)
+{
+	_mali_uk_dma_buf_get_size_s args;
+	int fd;
+	struct dma_buf *buf;
+
+	/* get call arguments from user space. copy_from_user returns how many bytes which where NOT copied */
+	if (0 != copy_from_user(&args, (void __user *)user_arg, sizeof(_mali_uk_dma_buf_get_size_s))) {
+		return -EFAULT;
+	}
+
+	/* Do DMA-BUF stuff */
+	fd = args.mem_fd;
+
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma-buf from fd: %d\n", fd));
+		return PTR_RET(buf);
+	}
+
+	if (0 != put_user(buf->size, &user_arg->size)) {
+		dma_buf_put(buf);
+		return -EFAULT;
+	}
+
+	dma_buf_put(buf);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags)
+{
+	struct dma_buf *buf;
+	struct mali_dma_buf_attachment *dma_mem;
+	struct  mali_session_data *session = alloc->session;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	/* get dma buffer */
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Currently, mapping of the full buffer are supported. */
+	if (alloc->psize != buf->size) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem = _mali_osk_calloc(1, sizeof(struct mali_dma_buf_attachment));
+	if (NULL == dma_mem) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem->buf = buf;
+	dma_mem->session = session;
+	dma_mem->map_ref = 0;
+	init_waitqueue_head(&dma_mem->wait_queue);
+
+	dma_mem->attachment = dma_buf_attach(dma_mem->buf, &mali_platform_device->dev);
+	if (NULL == dma_mem->attachment) {
+		goto failed_dma_attach;
+	}
+
+	mem_backend->dma_buf.attachment = dma_mem;
+
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	/* Map memory into session's Mali virtual address space. */
+	if (0 != mali_dma_buf_map(mem_backend)) {
+		goto Failed_dma_map;
+	}
+#endif
+
+	return _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+Failed_dma_map:
+	mali_dma_buf_unmap(alloc, dma_mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(dma_mem->wait_queue, !dma_mem->is_mapped);
+	MALI_DEBUG_ASSERT(!dma_mem->is_mapped);
+	dma_buf_detach(dma_mem->buf, dma_mem->attachment);
+failed_dma_attach:
+	_mali_osk_free(dma_mem);
+failed_alloc_mem:
+	dma_buf_put(buf);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend)
+{
+	struct mali_dma_buf_attachment *mem;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_DMA_BUF == mem_backend->type);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_PRINT(3, ("Mali DMA-buf: release attachment %p\n", mem));
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	MALI_DEBUG_ASSERT_POINTER(mem_backend->mali_allocation);
+	/* We mapped implicitly on attach, so we need to unmap on release */
+	mali_dma_buf_unmap(mem_backend->mali_allocation, mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(mem->wait_queue, !mem->is_mapped);
+	MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+	dma_buf_detach(mem->buf, mem->attachment);
+	dma_buf_put(mem->buf);
+
+	_mali_osk_free(mem);
+}
diff --git a/utgard/r6p1/linux/mali_memory_dma_buf.h b/utgard/r6p1/linux/mali_memory_dma_buf.h
new file mode 100755
index 0000000..37b6f6b
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_dma_buf.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_DMA_BUF_H__
+#define __MALI_MEMORY_DMA_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+struct mali_pp_job;
+
+struct mali_dma_buf_attachment;
+struct mali_dma_buf_attachment {
+	struct dma_buf *buf;
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct mali_session_data *session;
+	int map_ref;
+	struct mutex map_lock;
+	mali_bool is_mapped;
+	wait_queue_head_t wait_queue;
+};
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *arg);
+
+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend);
+
+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags);
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job);
+void mali_dma_buf_unmap_job(struct mali_pp_job *job);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/utgard/r6p1/linux/mali_memory_external.c b/utgard/r6p1/linux/mali_memory_external.c
new file mode 100755
index 0000000..c0097f6
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_external.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_uk_types.h"
+
+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT(MALI_MEM_EXTERNAL == mem_backend->type);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag)
+{
+	struct mali_session_data *session;
+	_mali_osk_errcode_t err;
+	u32 virt, phys, size;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	size = alloc->psize;
+	session = (struct mali_session_data *)(uintptr_t)alloc->session;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check arguments */
+	/* NULL might be a valid Mali address */
+	if (!size) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size must be a multiple of the system page size */
+	if (size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* Validate the mali physical range */
+	if (_MALI_OSK_ERR_OK != mali_mem_validation_check(phys_addr, size)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (flag & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mali_session_memory_lock(session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	phys = phys_addr;
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_session_memory_unlock(session);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	mali_mmu_pagedir_update(session->page_directory, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+	if (alloc->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		mali_mmu_pagedir_update(session->page_directory, virt + size, phys, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map physical memory 0x%x-0x%x into virtual memory 0x%x\n",
+			  phys_addr, (phys_addr + size - 1),
+			  virt));
+	mali_session_memory_unlock(session);
+
+	MALI_SUCCESS;
+}
+
diff --git a/utgard/r6p1/linux/mali_memory_external.h b/utgard/r6p1/linux/mali_memory_external.h
new file mode 100644
index 0000000..800ac2a
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_external.h
@@ -0,0 +1,29 @@
+
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_EXTERNAL_H__
+#define __MALI_MEMORY_EXTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag);
+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/utgard/r6p1/linux/mali_memory_manager.c b/utgard/r6p1/linux/mali_memory_manager.c
new file mode 100644
index 0000000..9001da0
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_manager.c
@@ -0,0 +1,993 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include <linux/platform_device.h>
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_secure.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_util.h"
+#include "mali_memory_external.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_ukk.h"
+#include "mali_memory_swap_alloc.h"
+
+/*
+* New memory system interface
+*/
+
+/*inti idr for backend memory */
+struct idr mali_backend_idr;
+struct mutex mali_idr_mutex;
+
+/* init allocation manager */
+int mali_memory_manager_init(struct mali_allocation_manager *mgr)
+{
+	/* init Locks */
+	rwlock_init(&mgr->vm_lock);
+	mutex_init(&mgr->list_mutex);
+
+	/* init link */
+	INIT_LIST_HEAD(&mgr->head);
+
+	/* init RB tree */
+	mgr->allocation_mgr_rb = RB_ROOT;
+	mgr->mali_allocation_num = 0;
+	return 0;
+}
+
+/* Deinit allocation manager
+* Do some check for debug
+*/
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr)
+{
+	/* check RB tree is empty */
+	MALI_DEBUG_ASSERT(((void *)(mgr->allocation_mgr_rb.rb_node) == (void *)rb_last(&mgr->allocation_mgr_rb)));
+	/* check allocation List */
+	MALI_DEBUG_ASSERT(list_empty(&mgr->head));
+}
+
+/* Prepare memory descriptor */
+static mali_mem_allocation *mali_mem_allocation_struct_create(struct mali_session_data *session)
+{
+	mali_mem_allocation *mali_allocation;
+
+	/* Allocate memory */
+	mali_allocation = (mali_mem_allocation *)kzalloc(sizeof(mali_mem_allocation), GFP_KERNEL);
+	if (NULL == mali_allocation) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_allocation_struct_create: descriptor was NULL\n"));
+		return NULL;
+	}
+
+	MALI_DEBUG_CODE(mali_allocation->magic = MALI_MEM_ALLOCATION_VALID_MAGIC);
+
+	/* do init */
+	mali_allocation->flags = 0;
+	mali_allocation->session = session;
+
+	INIT_LIST_HEAD(&mali_allocation->list);
+	_mali_osk_atomic_init(&mali_allocation->mem_alloc_refcount, 1);
+
+	/**
+	*add to session list
+	*/
+	mutex_lock(&session->allocation_mgr.list_mutex);
+	list_add_tail(&mali_allocation->list, &session->allocation_mgr.head);
+	session->allocation_mgr.mali_allocation_num++;
+	mutex_unlock(&session->allocation_mgr.list_mutex);
+
+	return mali_allocation;
+}
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+	mutex_lock(&alloc->session->allocation_mgr.list_mutex);
+	list_del(&alloc->list);
+	alloc->session->allocation_mgr.mali_allocation_num--;
+	mutex_unlock(&alloc->session->allocation_mgr.list_mutex);
+
+	kfree(alloc);
+}
+
+int mali_mem_backend_struct_create(mali_mem_backend **backend, u32 psize)
+{
+	mali_mem_backend *mem_backend = NULL;
+	s32 ret = -ENOSPC;
+	s32 index = -1;
+	*backend = (mali_mem_backend *)kzalloc(sizeof(mali_mem_backend), GFP_KERNEL);
+	if (NULL == *backend) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: backend descriptor was NULL\n"));
+		return -1;
+	}
+	mem_backend = *backend;
+	mem_backend->size = psize;
+	mutex_init(&mem_backend->mutex);
+	INIT_LIST_HEAD(&mem_backend->list);
+	mem_backend->using_count = 0;
+
+
+	/* link backend with id */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
+again:
+	if (!idr_pre_get(&mali_backend_idr, GFP_KERNEL)) {
+		kfree(mem_backend);
+		return -ENOMEM;
+	}
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_get_new_above(&mali_backend_idr, mem_backend, 1, &index);
+	mutex_unlock(&mali_idr_mutex);
+
+	if (-ENOSPC == ret) {
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+	if (-EAGAIN == ret)
+		goto again;
+#else
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_alloc(&mali_backend_idr, mem_backend, 1, MALI_S32_MAX, GFP_KERNEL);
+	mutex_unlock(&mali_idr_mutex);
+	index = ret;
+	if (ret < 0) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: Can't allocate idr for backend! \n"));
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+#endif
+	return index;
+}
+
+
+static void mali_mem_backend_struct_destory(mali_mem_backend **backend, s32 backend_handle)
+{
+	mali_mem_backend *mem_backend = *backend;
+
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_backend);
+	*backend = NULL;
+}
+
+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address)
+{
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_address, 0);
+	if (NULL == mali_vma_node)  {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_search:vma node was NULL\n"));
+		return NULL;
+	}
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(NULL != mem_bkend);
+	return mem_bkend;
+}
+
+static _mali_osk_errcode_t mali_mem_resize(struct mali_session_data *session, mali_mem_backend *mem_backend, u32 physical_size)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	mali_mem_os_mem tmp_os_mem;
+	s32 change_page_count;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n"));
+	MALI_DEBUG_ASSERT(0 == physical_size %  MALI_MMU_PAGE_SIZE);
+
+	mali_allocation = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(mali_allocation);
+
+	MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE & mali_allocation->flags);
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mali_allocation->type);
+
+	mutex_lock(&mem_backend->mutex);
+
+	/* Do resize*/
+	if (physical_size > mem_backend->size) {
+		u32 add_size = physical_size - mem_backend->size;
+
+		MALI_DEBUG_ASSERT(0 == add_size %  MALI_MMU_PAGE_SIZE);
+
+		/* Allocate new pages from os mem */
+		retval = mali_mem_os_alloc_pages(&tmp_os_mem, add_size);
+
+		if (retval) {
+			if (-ENOMEM == retval) {
+				ret = _MALI_OSK_ERR_NOMEM;
+			} else {
+				ret = _MALI_OSK_ERR_FAULT;
+			}
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory allocation failed !\n"));
+			goto failed_alloc_memory;
+		}
+
+		MALI_DEBUG_ASSERT(tmp_os_mem.count == add_size / MALI_MMU_PAGE_SIZE);
+
+		/* Resize the memory of the backend */
+		ret = mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory	resizing failed !\n"));
+			goto failed_resize_pages;
+		}
+
+		/*Resize cpu mapping */
+		if (NULL != mali_allocation->cpu_mapping.vma) {
+			ret = mali_mem_os_resize_cpu_map_locked(mem_backend, mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start  + mem_backend->size, add_size);
+			if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+				MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: cpu mapping failed !\n"));
+				goto  failed_cpu_map;
+			}
+		}
+
+		/* Resize mali mapping */
+		_mali_osk_mutex_wait(session->memory_lock);
+		ret = mali_mem_mali_map_resize(mali_allocation, physical_size);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_resize: mali map resize fail !\n"));
+			goto failed_gpu_map;
+		}
+
+		ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, mali_allocation->mali_vma_node.vm_node.start,
+					   mali_allocation->psize / MALI_MMU_PAGE_SIZE, add_size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties);
+		if (ret) {
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: mali mapping failed !\n"));
+			goto failed_gpu_map;
+		}
+
+		_mali_osk_mutex_signal(session->memory_lock);
+	} else {
+		u32 dec_size, page_count;
+		u32 vaddr = 0;
+		INIT_LIST_HEAD(&tmp_os_mem.pages);
+		tmp_os_mem.count = 0;
+
+		dec_size = mem_backend->size - physical_size;
+		MALI_DEBUG_ASSERT(0 == dec_size %  MALI_MMU_PAGE_SIZE);
+
+		page_count = dec_size / MALI_MMU_PAGE_SIZE;
+		vaddr = mali_allocation->mali_vma_node.vm_node.start + physical_size;
+
+		/* Resize the memory of the backend */
+		ret = mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, physical_size / MALI_MMU_PAGE_SIZE, page_count);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(4, ("_mali_ukk_mem_resize: mali map resize failed!\n"));
+			goto failed_resize_pages;
+		}
+
+		/* Resize mali map */
+		_mali_osk_mutex_wait(session->memory_lock);
+		mali_mem_mali_map_free(session, dec_size, vaddr, mali_allocation->flags);
+		_mali_osk_mutex_signal(session->memory_lock);
+
+		/* Zap cpu mapping */
+		if (0 != mali_allocation->cpu_mapping.addr) {
+			MALI_DEBUG_ASSERT(NULL != mali_allocation->cpu_mapping.vma);
+			zap_vma_ptes(mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start + physical_size, dec_size);
+		}
+
+		/* Free those extra pages */
+		mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE);
+	}
+
+	/* Resize memory allocation and memory backend */
+	change_page_count = (s32)(physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE;
+	mali_allocation->psize = physical_size;
+	mem_backend->size = physical_size;
+	mutex_unlock(&mem_backend->mutex);
+
+	if (change_page_count > 0) {
+		atomic_add(change_page_count, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+
+	} else {
+		atomic_sub((s32)(-change_page_count), &session->mali_mem_allocated_pages);
+	}
+
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+failed_cpu_map:
+	if (physical_size > mem_backend->size) {
+		mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, mem_backend->size / MALI_MMU_PAGE_SIZE,
+					 (physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE);
+	} else {
+		mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count);
+	}
+failed_resize_pages:
+	if (0 != tmp_os_mem.count)
+		mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE);
+failed_alloc_memory:
+
+	mutex_unlock(&mem_backend->mutex);
+	return ret;
+}
+
+
+/* Set GPU MMU properties */
+static void _mali_memory_gpu_map_property_set(u32 *properties, u32 flags)
+{
+	if (_MALI_MEMORY_GPU_READ_ALLOCATE & flags) {
+		*properties = MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE;
+	} else {
+		*properties = MALI_MMU_FLAGS_DEFAULT;
+	}
+}
+
+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size)
+{
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+	u32 new_physical_size;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(0 == add_size %  MALI_MMU_PAGE_SIZE);
+
+	/* Get the memory backend that need to be resize. */
+	mem_backend = mali_mem_backend_struct_search(session, mali_addr);
+
+	if (NULL == mem_backend)  {
+		MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n"));
+		return ret;
+	}
+
+	mali_allocation = mem_backend->mali_allocation;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_allocation);
+
+	new_physical_size = add_size + mem_backend->size;
+
+	if (new_physical_size > (mali_allocation->mali_vma_node.vm_node.size))
+		return ret;
+
+	MALI_DEBUG_ASSERT(new_physical_size != mem_backend->size);
+
+	ret = mali_mem_resize(session, mem_backend, new_physical_size);
+
+	return ret;
+}
+
+/**
+*  function@_mali_ukk_mem_allocate - allocate mali memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args)
+{
+	struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_allocate, vaddr=0x%x, size =0x%x! \n", args->gpu_vaddr, args->psize));
+
+	/* Check if the address is allocated
+	*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->gpu_vaddr, 0);
+
+	if (unlikely(mali_vma_node)) {
+		MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used ! \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+	/**
+	*create mali memory allocation
+	*/
+
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_allocate: Failed to create allocation struct! \n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->psize;
+	mali_allocation->vsize = args->vsize;
+
+	/* MALI_MEM_OS if need to support mem resize,
+	 * or MALI_MEM_BLOCK if have dedicated memory,
+	 * or MALI_MEM_OS,
+	 * or MALI_MEM_SWAP.
+	 */
+	if (args->flags & _MALI_MEMORY_ALLOCATE_SWAPPABLE) {
+		mali_allocation->type = MALI_MEM_SWAP;
+	} else if (args->flags & _MALI_MEMORY_ALLOCATE_RESIZEABLE) {
+		mali_allocation->type = MALI_MEM_OS;
+		mali_allocation->flags |= MALI_MEM_FLAG_CAN_RESIZE;
+	} else if (args->flags & _MALI_MEMORY_ALLOCATE_SECURE) {
+		mali_allocation->type = MALI_MEM_SECURE;
+	} else if (MALI_TRUE == mali_memory_have_dedicated_memory()) {
+		mali_allocation->type = MALI_MEM_BLOCK;
+	} else {
+		mali_allocation->type = MALI_MEM_OS;
+	}
+
+	/**
+	*add allocation node to RB tree for index
+	*/
+	mali_allocation->mali_vma_node.vm_node.start = args->gpu_vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->vsize;
+
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, args->psize);
+	if (mali_allocation->backend_handle < 0) {
+		ret = _MALI_OSK_ERR_NOMEM;
+		MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n"));
+		goto failed_alloc_backend;
+	}
+
+
+	mem_backend->mali_allocation = mali_allocation;
+	mem_backend->type = mali_allocation->type;
+
+	mali_allocation->mali_mapping.addr = args->gpu_vaddr;
+
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+	/* do prepare for MALI mapping */
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+
+		ret = mali_mem_mali_map_prepare(mali_allocation);
+		if (0 != ret) {
+			_mali_osk_mutex_signal(session->memory_lock);
+			goto failed_prepare_map;
+		}
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+
+	if (mali_allocation->psize == 0) {
+		mem_backend->os_mem.count = 0;
+		INIT_LIST_HEAD(&mem_backend->os_mem.pages);
+		goto done;
+	}
+
+	if (args->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) {
+		mali_allocation->flags |= _MALI_MEMORY_ALLOCATE_DEFER_BIND;
+		mem_backend->flags |= MALI_MEM_BACKEND_FLAG_NOT_BINDED;
+		/* init for defer bind backend*/
+		mem_backend->os_mem.count = 0;
+		INIT_LIST_HEAD(&mem_backend->os_mem.pages);
+
+		goto done;
+	}
+
+	if (likely(mali_allocation->psize > 0)) {
+
+		if (MALI_MEM_SECURE == mem_backend->type) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+			ret = mali_mem_secure_attach_dma_buf(&mem_backend->secure_mem, mem_backend->size, args->secure_shared_fd);
+			if (_MALI_OSK_ERR_OK != ret) {
+				MALI_DEBUG_PRINT(1, ("Failed to attach dma buf for secure memory! \n"));
+				goto failed_alloc_pages;
+			}
+#else
+			ret = _MALI_OSK_ERR_UNSUPPORTED;
+			MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory! \n"));
+			goto failed_alloc_pages;
+#endif
+		} else {
+
+			/**
+			*allocate physical memory
+			*/
+			if (mem_backend->type == MALI_MEM_OS) {
+				retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+			} else if (mem_backend->type == MALI_MEM_BLOCK) {
+				/* try to allocated from BLOCK memory first, then try OS memory if failed.*/
+				if (mali_mem_block_alloc(&mem_backend->block_mem, mem_backend->size)) {
+					retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+					mem_backend->type = MALI_MEM_OS;
+					mali_allocation->type = MALI_MEM_OS;
+				}
+			} else if (MALI_MEM_SWAP == mem_backend->type) {
+				retval = mali_mem_swap_alloc_pages(&mem_backend->swap_mem, mali_allocation->mali_vma_node.vm_node.size, &mem_backend->start_idx);
+			}  else {
+				/* ONLY support mem_os type */
+				MALI_DEBUG_ASSERT(0);
+			}
+
+			if (retval) {
+				ret = _MALI_OSK_ERR_NOMEM;
+				MALI_DEBUG_PRINT(1, (" can't allocate enough pages! \n"));
+				goto failed_alloc_pages;
+			}
+		}
+	}
+
+	/**
+	*map to GPU side
+	*/
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+		/* Map on Mali */
+
+		if (mem_backend->type == MALI_MEM_OS) {
+			ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, args->gpu_vaddr, 0,
+						   mem_backend->size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties);
+
+		} else if (mem_backend->type == MALI_MEM_BLOCK) {
+			mali_mem_block_mali_map(&mem_backend->block_mem, session, args->gpu_vaddr,
+						mali_allocation->mali_mapping.properties);
+		} else if (mem_backend->type == MALI_MEM_SWAP) {
+			ret = mali_mem_swap_mali_map(&mem_backend->swap_mem, session, args->gpu_vaddr,
+						     mali_allocation->mali_mapping.properties);
+		} else if (mem_backend->type == MALI_MEM_SECURE) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+			ret = mali_mem_secure_mali_map(&mem_backend->secure_mem, session, args->gpu_vaddr, mali_allocation->mali_mapping.properties);
+#endif
+		} else { /* unsupport type */
+			MALI_DEBUG_ASSERT(0);
+		}
+
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+done:
+	if (MALI_MEM_OS == mem_backend->type) {
+		atomic_add(mem_backend->os_mem.count, &session->mali_mem_allocated_pages);
+	} else if (MALI_MEM_BLOCK == mem_backend->type) {
+		atomic_add(mem_backend->block_mem.count, &session->mali_mem_allocated_pages);
+	} else if (MALI_MEM_SECURE == mem_backend->type) {
+		atomic_add(mem_backend->secure_mem.count, &session->mali_mem_allocated_pages);
+	} else {
+		MALI_DEBUG_ASSERT(MALI_MEM_SWAP == mem_backend->type);
+		atomic_add(mem_backend->swap_mem.count, &session->mali_mem_allocated_pages);
+		atomic_add(mem_backend->swap_mem.count, &session->mali_mem_array[mem_backend->type]);
+	}
+
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+	return _MALI_OSK_ERR_OK;
+
+failed_alloc_pages:
+	mali_mem_mali_map_free(session, mali_allocation->psize, mali_allocation->mali_vma_node.vm_node.start, mali_allocation->flags);
+failed_prepare_map:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	u32 vaddr = args->gpu_vaddr;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, vaddr, 0);
+	if (NULL == mali_vma_node) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_free: invalid addr: 0x%x\n", vaddr));
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+	MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+
+	if (mali_alloc)
+		/* check ref_count */
+		args->free_pages_nr = mali_allocation_unref(&mali_alloc);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Function _mali_ukk_mem_bind -- bind a external memory to a new GPU address
+* It will allocate a new mem allocation and bind external memory to it.
+* Supported backend type are:
+* _MALI_MEMORY_BIND_BACKEND_UMP
+* _MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+* CPU access is not supported yet
+*/
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_bind, vaddr=0x%x, size =0x%x! \n", args->vaddr, args->size));
+
+	/**
+	* allocate mali allocation.
+	*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->size;
+	mali_allocation->vsize = args->size;
+	mali_allocation->mali_mapping.addr = args->vaddr;
+
+	/* add allocation node to RB tree for index  */
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->size;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* allocate backend*/
+	if (mali_allocation->psize > 0) {
+		mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+		if (mali_allocation->backend_handle < 0) {
+			goto Failed_alloc_backend;
+		}
+
+	} else {
+		goto Failed_alloc_backend;
+	}
+
+	mem_backend->size = mali_allocation->psize;
+	mem_backend->mali_allocation = mali_allocation;
+
+	switch (args->flags & _MALI_MEMORY_BIND_BACKEND_MASK) {
+	case  _MALI_MEMORY_BIND_BACKEND_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_allocation->type = MALI_MEM_UMP;
+		mem_backend->type = MALI_MEM_UMP;
+		ret = mali_mem_bind_ump_buf(mali_allocation, mem_backend,
+					    args->mem_union.bind_ump.secure_id, args->mem_union.bind_ump.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind ump buf failed\n"));
+			goto  Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("UMP not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case  _MALI_MEMORY_BIND_BACKEND_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_allocation->type = MALI_MEM_DMA_BUF;
+		mem_backend->type = MALI_MEM_DMA_BUF;
+		ret = mali_mem_bind_dma_buf(mali_allocation, mem_backend,
+					    args->mem_union.bind_dma_buf.mem_fd, args->mem_union.bind_dma_buf.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind dma buf failed\n"));
+			goto Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY:
+		/* not allowed */
+		MALI_DEBUG_PRINT_ERROR(("Mali internal memory type not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY:
+		mali_allocation->type = MALI_MEM_EXTERNAL;
+		mem_backend->type = MALI_MEM_EXTERNAL;
+		ret = mali_mem_bind_ext_buf(mali_allocation, mem_backend, args->mem_union.bind_ext_memory.phys_addr,
+					    args->mem_union.bind_ext_memory.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind external buf failed\n"));
+			goto Failed_bind_backend;
+		}
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXT_COW:
+		/* not allowed */
+		MALI_DEBUG_PRINT_ERROR(("External cow memory  type not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+
+	default:
+		MALI_DEBUG_PRINT_ERROR(("Invalid memory type  not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+	}
+	MALI_DEBUG_ASSERT(0 == mem_backend->size % MALI_MMU_PAGE_SIZE);
+	atomic_add(mem_backend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_backend->type]);
+	return _MALI_OSK_ERR_OK;
+
+Failed_bind_backend:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+
+Failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	MALI_DEBUG_PRINT(1, (" _mali_ukk_mem_bind, return ERROR! \n"));
+	return ret;
+}
+
+
+/*
+* Function _mali_ukk_mem_unbind -- unbind a external memory to a new GPU address
+* This function unbind the backend memory and free the allocation
+* no ref_count for this type of memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args)
+{
+	/**/
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	u32 mali_addr = args->vaddr;
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_unbind, vaddr=0x%x! \n", args->vaddr));
+
+	/* find the allocation by vaddr */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		mali_allocation = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	} else {
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	if (NULL != mali_allocation)
+		/* check ref_count */
+		mali_allocation_unref(&mali_allocation);
+	return _MALI_OSK_ERR_OK;
+}
+
+/*
+* Function _mali_ukk_mem_cow --  COW for an allocation
+* This function allocate new pages for  a range (range, range+size) of allocation
+*  And Map it(keep use the not in range pages from target allocation ) to an GPU vaddr
+*/
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_backend *target_backend = NULL;
+	mali_mem_backend *mem_backend = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_allocation = NULL;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	/* Get the target backend for cow */
+	target_backend = mali_mem_backend_struct_search(session, args->target_handle);
+
+	if (NULL == target_backend || 0 == target_backend->size) {
+		MALI_DEBUG_ASSERT_POINTER(target_backend);
+		MALI_DEBUG_ASSERT(0 != target_backend->size);
+		return ret;
+	}
+
+	/*Cow not support resized mem */
+	MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE != (MALI_MEM_FLAG_CAN_RESIZE & target_backend->mali_allocation->flags));
+
+	/* Check if the new mali address is allocated */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->vaddr, 0);
+
+	if (unlikely(mali_vma_node)) {
+		MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used ! \n"));
+		return ret;
+	}
+
+	/* create new alloction for COW*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to create allocation struct!\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->target_size;
+	mali_allocation->vsize = args->target_size;
+	mali_allocation->type = MALI_MEM_COW;
+
+	/*add allocation node to RB tree for index*/
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = mali_allocation->vsize;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* create new backend for COW memory */
+	mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+	if (mali_allocation->backend_handle < 0) {
+		ret = _MALI_OSK_ERR_NOMEM;
+		MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n"));
+		goto failed_alloc_backend;
+	}
+	mem_backend->mali_allocation = mali_allocation;
+	mem_backend->type = mali_allocation->type;
+
+	if (target_backend->type == MALI_MEM_SWAP ||
+	    (MALI_MEM_COW == target_backend->type && (MALI_MEM_BACKEND_FLAG_SWAP_COWED & target_backend->flags))) {
+		mem_backend->flags |= MALI_MEM_BACKEND_FLAG_SWAP_COWED;
+		/**
+		 *     CoWed swap backends couldn't be mapped as non-linear vma, because if one
+		 * vma is set with flag VM_NONLINEAR, the vma->vm_private_data will be used by kernel,
+		 * while in mali driver, we use this variable to store the pointer of mali_allocation, so there
+		 * is a conflict.
+		 *     To resolve this problem, we have to do some fake things, we reserved about 64MB
+		 * space from index 0, there isn't really page's index will be set from 0 to (64MB>>PAGE_SHIFT_NUM),
+		 * and all of CoWed swap memory backends' start_idx will be assigned with 0, and these
+		 * backends will be mapped as linear and will add to priority tree of global swap file, while
+		 * these vmas will never be found by using normal page->index, these pages in those vma
+		 * also couldn't be swapped out.
+		 */
+		mem_backend->start_idx = 0;
+	}
+
+	/* Add the target backend's cow count, also allocate new pages for COW backend from os mem
+	*for a modified range and keep the page which not in the modified range and Add ref to it
+	*/
+	MALI_DEBUG_PRINT(3, ("Cow mapping: target_addr: 0x%x;  cow_addr: 0x%x,  size: %u\n", target_backend->mali_allocation->mali_vma_node.vm_node.start,
+			     mali_allocation->mali_vma_node.vm_node.start, mali_allocation->mali_vma_node.vm_node.size));
+
+	ret = mali_memory_do_cow(target_backend, args->target_offset, args->target_size, mem_backend, args->range_start, args->range_size);
+	if (_MALI_OSK_ERR_OK != ret) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to cow!\n"));
+		goto failed_do_cow;
+	}
+
+	/**
+	*map to GPU side
+	*/
+	mali_allocation->mali_mapping.addr = args->vaddr;
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+
+	_mali_osk_mutex_wait(session->memory_lock);
+	/* Map on Mali */
+	ret = mali_mem_mali_map_prepare(mali_allocation);
+	if (0 != ret) {
+		MALI_DEBUG_PRINT(1, (" prepare map fail! \n"));
+		goto failed_gpu_map;
+	}
+
+	if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+		mali_mem_cow_mali_map(mem_backend, 0, mem_backend->size);
+	}
+
+	_mali_osk_mutex_signal(session->memory_lock);
+
+	mutex_lock(&target_backend->mutex);
+	target_backend->flags |= MALI_MEM_BACKEND_FLAG_COWED;
+	mutex_unlock(&target_backend->mutex);
+
+	atomic_add(args->range_size / MALI_MMU_PAGE_SIZE, &session->mali_mem_allocated_pages);
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+	mali_mem_cow_release(mem_backend, MALI_FALSE);
+	mem_backend->cow_mem.count = 0;
+failed_do_cow:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_backend *mem_backend = NULL;
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_cow_modify_range called! \n"));
+	/* Get the backend that need to be modified. */
+	mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+
+	if (NULL == mem_backend || 0 == mem_backend->size) {
+		MALI_DEBUG_ASSERT_POINTER(mem_backend);
+		MALI_DEBUG_ASSERT(0 != mem_backend->size);
+		return ret;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW  == mem_backend->type);
+
+	ret =  mali_memory_cow_modify_range(mem_backend, args->range_start, args->size);
+	args->change_pages_nr = mem_backend->cow_mem.change_pages_nr;
+	if (_MALI_OSK_ERR_OK != ret)
+		return  ret;
+	_mali_osk_mutex_wait(session->memory_lock);
+	if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+		mali_mem_cow_mali_map(mem_backend, args->range_start, args->size);
+	}
+	_mali_osk_mutex_signal(session->memory_lock);
+
+	atomic_add(args->change_pages_nr, &session->mali_mem_allocated_pages);
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args)
+{
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n"));
+	MALI_DEBUG_ASSERT(0 == args->psize %  MALI_MMU_PAGE_SIZE);
+
+	/* Get the memory backend that need to be resize. */
+	mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+
+	if (NULL == mem_backend)  {
+		MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n"));
+		return ret;
+	}
+
+	MALI_DEBUG_ASSERT(args->psize != mem_backend->size);
+
+	ret = mali_mem_resize(session, mem_backend, args->psize);
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args)
+{
+	args->memory_usage = _mali_ukk_report_memory_usage();
+	if (0 != args->vaddr) {
+		mali_mem_backend *mem_backend = NULL;
+		struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+		/* Get the backend that need to be modified. */
+		mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+		if (NULL == mem_backend) {
+			MALI_DEBUG_ASSERT_POINTER(mem_backend);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		if (MALI_MEM_COW == mem_backend->type)
+			args->change_pages_nr = mem_backend->cow_mem.change_pages_nr;
+	}
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p1/linux/mali_memory_manager.h b/utgard/r6p1/linux/mali_memory_manager.h
new file mode 100644
index 0000000..caccd9e
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_manager.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_MANAGER_H__
+#define __MALI_MEMORY_MANAGER_H__
+
+#include "mali_osk.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_allocation_manager {
+	rwlock_t vm_lock;
+	struct rb_root allocation_mgr_rb;
+	struct list_head head;
+	struct mutex list_mutex;
+	u32 mali_allocation_num;
+};
+
+extern struct idr mali_backend_idr;
+extern struct mutex mali_idr_mutex;
+
+int mali_memory_manager_init(struct mali_allocation_manager *mgr);
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr);
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc);
+
+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size);
+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address);
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args);
+
+
+#endif
+
diff --git a/utgard/r6p1/linux/mali_memory_os_alloc.c b/utgard/r6p1/linux/mali_memory_os_alloc.c
new file mode 100755
index 0000000..3c4dc3c
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_os_alloc.c
@@ -0,0 +1,827 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+
+#include "mali_osk.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_kernel_linux.h"
+
+/* Minimum size of allocator page pool */
+#define MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB * 256)
+#define MALI_OS_MEMORY_POOL_TRIM_JIFFIES (10 * CONFIG_HZ) /* Default to 10s */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+static unsigned long mali_dma_attrs;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+/* Write combine dma_attrs */
+static DEFINE_DMA_ATTRS(dma_attrs_wc);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask);
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask);
+#endif
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc);
+#endif
+#endif
+static void mali_mem_os_trim_pool(struct work_struct *work);
+
+struct mali_mem_os_allocator mali_mem_os_allocator = {
+	.pool_lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+	.pool_pages = LIST_HEAD_INIT(mali_mem_os_allocator.pool_pages),
+	.pool_count = 0,
+
+	.allocated_pages = ATOMIC_INIT(0),
+	.allocation_limit = 0,
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	.shrinker.shrink = mali_mem_os_shrink,
+#else
+	.shrinker.count_objects = mali_mem_os_shrink_count,
+	.shrinker.scan_objects = mali_mem_os_shrink,
+#endif
+	.shrinker.seeks = DEFAULT_SEEKS,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool, TIMER_DEFERRABLE),
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
+	.timed_shrinker = __DEFERRED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#else
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#endif
+};
+
+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag)
+{
+	LIST_HEAD(pages);
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	if (MALI_TRUE == cow_flag) {
+		list_for_each_entry_safe(m_page, m_tmp, os_pages, list) {
+			/*only handle OS node here */
+			if (m_page->type == MALI_PAGE_NODE_OS) {
+				if (1 == _mali_page_node_get_ref_count(m_page)) {
+					list_move(&m_page->list, &pages);
+					atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+					free_pages_nr ++;
+				} else {
+					_mali_page_node_unref(m_page);
+					m_page->page = NULL;
+					list_del(&m_page->list);
+					kfree(m_page);
+				}
+			}
+		}
+	} else {
+		list_cut_position(&pages, os_pages, os_pages->prev);
+		atomic_sub(pages_count, &mali_mem_os_allocator.allocated_pages);
+		free_pages_nr = pages_count;
+	}
+
+	/* Put pages on pool. */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_splice(&pages, &mali_mem_os_allocator.pool_pages);
+	mali_mem_os_allocator.pool_count += free_pages_nr;
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(5, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+	return free_pages_nr;
+}
+
+/**
+* put page without put it into page pool
+*/
+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page)
+{
+	MALI_DEBUG_ASSERT_POINTER(page);
+	if (1 == page_count(page)) {
+		atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+		dma_unmap_page(&mali_platform_device->dev, page_private(page),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		ClearPagePrivate(page);
+	}
+	put_page(page);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_from);
+	MALI_DEBUG_ASSERT_POINTER(mem_to);
+
+	if (mem_from->count < start_page + page_count) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	list_for_each_entry_safe(m_page, m_tmp, &mem_from->pages, list) {
+		if (i >= start_page && i < start_page + page_count) {
+			list_move_tail(&m_page->list, &mem_to->pages);
+			mem_from->count--;
+			mem_to->count++;
+		}
+		i++;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size)
+{
+	struct page *new_page;
+	LIST_HEAD(pages_list);
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	size_t remaining = page_count;
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&os_mem->pages);
+	os_mem->count = page_count;
+
+	/* Grab pages from pool. */
+	{
+		size_t pool_pages;
+		spin_lock(&mali_mem_os_allocator.pool_lock);
+		pool_pages = min(remaining, mali_mem_os_allocator.pool_count);
+		for (i = pool_pages; i > 0; i--) {
+			BUG_ON(list_empty(&mali_mem_os_allocator.pool_pages));
+			list_move(mali_mem_os_allocator.pool_pages.next, &pages_list);
+		}
+		mali_mem_os_allocator.pool_count -= pool_pages;
+		remaining -= pool_pages;
+		spin_unlock(&mali_mem_os_allocator.pool_lock);
+	}
+
+	/* Process pages from pool. */
+	i = 0;
+	list_for_each_entry_safe(m_page, m_tmp, &pages_list, list) {
+		BUG_ON(NULL == m_page);
+
+		list_move_tail(&m_page->list, &os_mem->pages);
+	}
+
+	/* Allocate new pages, if needed. */
+	for (i = 0; i < remaining; i++) {
+		dma_addr_t dma_addr;
+		gfp_t flags = __GFP_ZERO | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD;
+		int err;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+		flags |= GFP_HIGHUSER;
+#else
+#ifdef CONFIG_ZONE_DMA32
+		flags |= GFP_DMA32;
+#else
+#ifdef CONFIG_ZONE_DMA
+		flags |= GFP_DMA;
+#else
+		/* arm64 utgard only work on < 4G, but the kernel
+		 * didn't provide method to allocte memory < 4G
+		 */
+		MALI_DEBUG_ASSERT(0);
+#endif
+#endif
+#endif
+
+		new_page = alloc_page(flags);
+
+		if (unlikely(NULL == new_page)) {
+			/* Calculate the number of pages actually allocated, and free them. */
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -ENOMEM;
+		}
+
+		/* Ensure page is flushed from CPU caches. */
+		dma_addr = dma_map_page(&mali_platform_device->dev, new_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+		err = dma_mapping_error(&mali_platform_device->dev, dma_addr);
+		if (unlikely(err)) {
+			MALI_DEBUG_PRINT_ERROR(("OS Mem: Failed to DMA map page %p: %u",
+						new_page, err));
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -EFAULT;
+		}
+
+		/* Store page phys addr */
+		SetPagePrivate(new_page);
+		set_page_private(new_page, dma_addr);
+
+		m_page = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+		if (unlikely(NULL == m_page)) {
+			MALI_PRINT_ERROR(("OS Mem: Can't allocate mali_page node! \n"));
+			dma_unmap_page(&mali_platform_device->dev, page_private(new_page),
+				       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+			ClearPagePrivate(new_page);
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -EFAULT;
+		}
+		m_page->page = new_page;
+
+		list_add_tail(&m_page->list, &os_mem->pages);
+	}
+
+	atomic_add(page_count, &mali_mem_os_allocator.allocated_pages);
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Stopping pool trim timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	u32 virt;
+	u32 prop = props;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	MALI_DEBUG_ASSERT(start_page <= os_mem->count);
+	MALI_DEBUG_ASSERT((start_page + mapping_pgae_num) <= os_mem->count);
+
+	if ((start_page + mapping_pgae_num) == os_mem->count) {
+
+		virt = vaddr + MALI_MMU_PAGE_SIZE * (start_page + mapping_pgae_num);
+
+		list_for_each_entry_reverse(m_page, &os_mem->pages, list) {
+
+			virt -= MALI_MMU_PAGE_SIZE;
+			if (mapping_pgae_num > 0) {
+				dma_addr_t phys = page_private(m_page->page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+				/* Verify that the "physical" address is 32-bit and
+				* usable for Mali, when on a system with bus addresses
+				* wider than 32-bit. */
+				MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+				mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+			} else {
+				break;
+			}
+			mapping_pgae_num--;
+		}
+
+	} else {
+		u32 i = 0;
+		virt = vaddr;
+		list_for_each_entry(m_page, &os_mem->pages, list) {
+
+			if (i >= start_page) {
+				dma_addr_t phys = page_private(m_page->page);
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+				/* Verify that the "physical" address is 32-bit and
+				* usable for Mali, when on a system with bus addresses
+				* wider than 32-bit. */
+				MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+				mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+			}
+			i++;
+			virt += MALI_MMU_PAGE_SIZE;
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	mali_mem_os_mem *os_mem = &mem_bkend->os_mem;
+	struct mali_page_node *m_page;
+	struct page *page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type);
+
+	list_for_each_entry(m_page, &os_mem->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		page = m_page->page;
+		ret = vm_insert_pfn(vma, addr, page_to_pfn(page));
+
+		if (unlikely(0 != ret)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size)
+{
+	mali_mem_os_mem *os_mem = &mem_bkend->os_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	int offset;
+	int mapping_page_num;
+	int count ;
+
+	unsigned long vstart = vma->vm_start;
+	count = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+	MALI_DEBUG_ASSERT(0 == start_vaddr % _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE);
+	offset = (start_vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT(offset <= os_mem->count);
+	mapping_page_num = mappig_size / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT((offset + mapping_page_num) <= os_mem->count);
+
+	if ((offset + mapping_page_num) == os_mem->count) {
+
+		unsigned long vm_end = start_vaddr + mappig_size;
+
+		list_for_each_entry_reverse(m_page, &os_mem->pages, list) {
+
+			vm_end -= _MALI_OSK_MALI_PAGE_SIZE;
+			if (mapping_page_num > 0) {
+				ret = vm_insert_pfn(vma, vm_end, page_to_pfn(m_page->page));
+
+				if (unlikely(0 != ret)) {
+					/*will return -EBUSY If the page has already been mapped into table, but it's OK*/
+					if (-EBUSY == ret) {
+						break;
+					} else {
+						MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, offset is %d,page_count is %d\n",
+								     ret,  offset + mapping_page_num, os_mem->count));
+					}
+					return _MALI_OSK_ERR_FAULT;
+				}
+			} else {
+				break;
+			}
+			mapping_page_num--;
+
+		}
+	} else {
+
+		list_for_each_entry(m_page, &os_mem->pages, list) {
+			if (count >= offset) {
+
+				ret = vm_insert_pfn(vma, vstart, page_to_pfn(m_page->page));
+
+				if (unlikely(0 != ret)) {
+					/*will return -EBUSY If the page has already been mapped into table, but it's OK*/
+					if (-EBUSY == ret) {
+						break;
+					} else {
+						MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, count is %d, offset is %d,page_count is %d\n",
+								     ret, count, offset, os_mem->count));
+					}
+					return _MALI_OSK_ERR_FAULT;
+				}
+			}
+			count++;
+			vstart += _MALI_OSK_MALI_PAGE_SIZE;
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 mali_mem_os_release(mali_mem_backend *mem_bkend)
+{
+
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type);
+
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_os_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	/* Free pages */
+	if (MALI_MEM_BACKEND_FLAG_COWED & mem_bkend->flags) {
+		/* Lock to avoid the free race condition for the cow shared memory page node. */
+		_mali_osk_mutex_wait(session->cow_lock);
+		free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_TRUE);
+		_mali_osk_mutex_signal(session->cow_lock);
+	} else {
+		free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_FALSE);
+	}
+	mutex_unlock(&mem_bkend->mutex);
+
+	MALI_DEBUG_PRINT(4, ("OS Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->os_mem.count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+
+	mem_bkend->os_mem.count = 0;
+	return free_pages_nr;
+}
+
+
+#define MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE 128
+static struct {
+	struct {
+		mali_dma_addr phys;
+		mali_io_address mapping;
+	} page[MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE];
+	size_t count;
+	spinlock_t lock;
+} mali_mem_page_table_page_pool = {
+	.count = 0,
+	.lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+};
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_NOMEM;
+	dma_addr_t tmp_phys;
+
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (0 < mali_mem_page_table_page_pool.count) {
+		u32 i = --mali_mem_page_table_page_pool.count;
+		*phys = mali_mem_page_table_page_pool.page[i].phys;
+		*mapping = mali_mem_page_table_page_pool.page[i].mapping;
+
+		ret = _MALI_OSK_ERR_OK;
+	}
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	if (_MALI_OSK_ERR_OK != ret) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, &dma_attrs_wc);
+#else
+		*mapping = dma_alloc_writecombine(&mali_platform_device->dev,
+						  _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys, GFP_KERNEL);
+#endif
+		if (NULL != *mapping) {
+			ret = _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			/* Verify that the "physical" address is 32-bit and
+			 * usable for Mali, when on a system with bus addresses
+			 * wider than 32-bit. */
+			MALI_DEBUG_ASSERT(0 == (tmp_phys >> 32));
+#endif
+
+			*phys = (mali_dma_addr)tmp_phys;
+		}
+	}
+
+	return ret;
+}
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt)
+{
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE > mali_mem_page_table_page_pool.count) {
+		u32 i = mali_mem_page_table_page_pool.count;
+		mali_mem_page_table_page_pool.page[i].phys = phys;
+		mali_mem_page_table_page_pool.page[i].mapping = virt;
+
+		++mali_mem_page_table_page_pool.count;
+
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+	} else {
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			       _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			       mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			       _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			       &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE, virt, phys);
+#endif
+	}
+}
+
+void mali_mem_os_free_page_node(struct mali_page_node *m_page)
+{
+	struct page *page = m_page->page;
+	MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_OS);
+
+	if (1  == page_count(page)) {
+		dma_unmap_page(&mali_platform_device->dev, page_private(page),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		ClearPagePrivate(page);
+	}
+	__free_page(page);
+	m_page->page = NULL;
+	list_del(&m_page->list);
+	kfree(m_page);
+}
+
+/* The maximum number of page table pool pages to free in one go. */
+#define MALI_MEM_OS_CHUNK_TO_FREE 64UL
+
+/* Free a certain number of pages from the page table page pool.
+ * The pool lock must be held when calling the function, and the lock will be
+ * released before returning.
+ */
+static void mali_mem_os_page_table_pool_free(size_t nr_to_free)
+{
+	mali_dma_addr phys_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	void *virt_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	u32 i;
+
+	MALI_DEBUG_ASSERT(nr_to_free <= MALI_MEM_OS_CHUNK_TO_FREE);
+
+	/* Remove nr_to_free pages from the pool and store them locally on stack. */
+	for (i = 0; i < nr_to_free; i++) {
+		u32 pool_index = mali_mem_page_table_page_pool.count - i - 1;
+
+		phys_arr[i] = mali_mem_page_table_page_pool.page[pool_index].phys;
+		virt_arr[i] = mali_mem_page_table_page_pool.page[pool_index].mapping;
+	}
+
+	mali_mem_page_table_page_pool.count -= nr_to_free;
+
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	/* After releasing the spinlock: free the pages we removed from the pool. */
+	for (i = 0; i < nr_to_free; i++) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE,
+				      virt_arr[i], (dma_addr_t)phys_arr[i]);
+#endif
+	}
+}
+
+static void mali_mem_os_trim_page_table_page_pool(void)
+{
+	size_t nr_to_free = 0;
+	size_t nr_to_keep;
+
+	/* Keep 2 page table pages for each 1024 pages in the page cache. */
+	nr_to_keep = mali_mem_os_allocator.pool_count / 512;
+	/* And a minimum of eight pages, to accomodate new sessions. */
+	nr_to_keep += 8;
+
+	if (0 == spin_trylock(&mali_mem_page_table_page_pool.lock)) return;
+
+	if (nr_to_keep < mali_mem_page_table_page_pool.count) {
+		nr_to_free = mali_mem_page_table_page_pool.count - nr_to_keep;
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, nr_to_free);
+	}
+
+	/* Pool lock will be released by the callee. */
+	mali_mem_os_page_table_pool_free(nr_to_free);
+}
+
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	return mali_mem_os_allocator.pool_count;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask)
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask)
+#endif /* Linux < 2.6.35 */
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#endif /* Linux < 3.12.0 */
+#endif /* Linux < 3.0.0 */
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unsigned long flags;
+	struct list_head *le, pages;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+	int nr = nr_to_scan;
+#else
+	int nr = sc->nr_to_scan;
+#endif
+
+	if (0 == nr) {
+		return mali_mem_os_shrink_count(shrinker, sc);
+	}
+
+	if (0 == spin_trylock_irqsave(&mali_mem_os_allocator.pool_lock, flags)) {
+		/* Not able to lock. */
+		return -1;
+	}
+
+	if (0 == mali_mem_os_allocator.pool_count) {
+		/* No pages availble */
+		spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+		return 0;
+	}
+
+	/* Release from general page pool */
+	nr = min((size_t)nr, mali_mem_os_allocator.pool_count);
+	mali_mem_os_allocator.pool_count -= nr;
+	list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+		--nr;
+		if (0 == nr) break;
+	}
+	list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page_node(m_page);
+	}
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) {
+		/* Pools are empty, stop timer */
+		MALI_DEBUG_PRINT(5, ("Stopping timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	return mali_mem_os_shrink_count(shrinker, sc);
+#else
+	return nr;
+#endif
+}
+
+static void mali_mem_os_trim_pool(struct work_struct *data)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	struct list_head *le;
+	LIST_HEAD(pages);
+	size_t nr_to_free;
+
+	MALI_IGNORE(data);
+
+	MALI_DEBUG_PRINT(3, ("OS Mem: Trimming pool %u\n", mali_mem_os_allocator.pool_count));
+
+	/* Release from general page pool */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		size_t count = mali_mem_os_allocator.pool_count - MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES;
+		const size_t min_to_free = min(64, MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES);
+
+		/* Free half the pages on the pool above the static limit. Or 64 pages, 256KB. */
+		nr_to_free = max(count / 2, min_to_free);
+
+		mali_mem_os_allocator.pool_count -= nr_to_free;
+		list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+			--nr_to_free;
+			if (0 == nr_to_free) break;
+		}
+		list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	}
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page_node(m_page);
+	}
+
+	/* Release some pages from page table page pool */
+	mali_mem_os_trim_page_table_page_pool();
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+}
+
+_mali_osk_errcode_t mali_mem_os_init(void)
+{
+	mali_mem_os_allocator.wq = alloc_workqueue("mali-mem", WQ_UNBOUND, 1);
+	if (NULL == mali_mem_os_allocator.wq) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	mali_dma_attrs = DMA_ATTR_WRITE_COMBINE;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &dma_attrs_wc);
+#endif
+
+	register_shrinker(&mali_mem_os_allocator.shrinker);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_os_term(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unregister_shrinker(&mali_mem_os_allocator.shrinker);
+	cancel_delayed_work_sync(&mali_mem_os_allocator.timed_shrinker);
+
+	if (NULL != mali_mem_os_allocator.wq) {
+		destroy_workqueue(mali_mem_os_allocator.wq);
+		mali_mem_os_allocator.wq = NULL;
+	}
+
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_for_each_entry_safe(m_page, m_tmp, &mali_mem_os_allocator.pool_pages, list) {
+		mali_mem_os_free_page_node(m_page);
+
+		--mali_mem_os_allocator.pool_count;
+	}
+	BUG_ON(mali_mem_os_allocator.pool_count);
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	/* Release from page table page pool */
+	do {
+		u32 nr_to_free;
+
+		spin_lock(&mali_mem_page_table_page_pool.lock);
+
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, mali_mem_page_table_page_pool.count);
+
+		/* Pool lock will be released by the callee. */
+		mali_mem_os_page_table_pool_free(nr_to_free);
+	} while (0 != mali_mem_page_table_page_pool.count);
+}
+
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size)
+{
+	mali_mem_os_allocator.allocation_limit = size;
+
+	MALI_SUCCESS;
+}
+
+u32 mali_mem_os_stat(void)
+{
+	return atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/utgard/r6p1/linux/mali_memory_os_alloc.h b/utgard/r6p1/linux/mali_memory_os_alloc.h
new file mode 100755
index 0000000..b92fffe
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_os_alloc.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_OS_ALLOC_H__
+#define __MALI_MEMORY_OS_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_memory_types.h"
+
+
+/** @brief Release Mali OS memory
+ *
+ * The session memory_lock must be held when calling this function.
+ *
+ * @param mem_bkend Pointer to the mali_mem_backend to release
+ */
+u32 mali_mem_os_release(mali_mem_backend *mem_bkend);
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping);
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt);
+
+_mali_osk_errcode_t mali_mem_os_init(void);
+
+void mali_mem_os_term(void);
+
+u32 mali_mem_os_stat(void);
+
+void mali_mem_os_free_page_node(struct mali_page_node *m_page);
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size);
+
+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag);
+
+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page);
+
+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count);
+
+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props);
+
+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+
+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size);
+
+#endif /* __MALI_MEMORY_OS_ALLOC_H__ */
diff --git a/utgard/r6p1/linux/mali_memory_secure.c b/utgard/r6p1/linux/mali_memory_secure.c
new file mode 100644
index 0000000..7856ae6
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_secure.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_memory.h"
+#include "mali_memory_secure.h"
+#include "mali_osk.h"
+#include <linux/mutex.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-buf.h>
+
+_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd)
+{
+	struct dma_buf *buf;
+	MALI_DEBUG_ASSERT_POINTER(secure_mem);
+
+	/* get dma buffer */
+	buf = dma_buf_get(mem_fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf!\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (size != buf->size) {
+		MALI_DEBUG_PRINT_ERROR(("The secure mem size not match to the dma buf size!\n"));
+		goto failed_alloc_mem;
+	}
+
+	secure_mem->buf =  buf;
+	secure_mem->attachment = dma_buf_attach(secure_mem->buf, &mali_platform_device->dev);
+	if (NULL == secure_mem->attachment) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf attachment!\n"));
+		goto failed_dma_attach;
+	}
+
+	secure_mem->sgt = dma_buf_map_attachment(secure_mem->attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(secure_mem->sgt)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to map dma buf attachment\n"));
+		goto  failed_dma_map;
+	}
+
+	secure_mem->count = size / MALI_MMU_PAGE_SIZE;
+
+	return _MALI_OSK_ERR_OK;
+
+failed_dma_map:
+	dma_buf_detach(secure_mem->buf, secure_mem->attachment);
+failed_dma_attach:
+failed_alloc_mem:
+	dma_buf_put(buf);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir;
+	struct scatterlist *sg;
+	u32 virt = vaddr;
+	u32 prop = props;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(secure_mem);
+	MALI_DEBUG_ASSERT_POINTER(secure_mem->sgt);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	pagedir = session->page_directory;
+
+	for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) {
+		u32 size = sg_dma_len(sg);
+		dma_addr_t phys = sg_dma_address(sg);
+
+		/* sg must be page aligned. */
+		MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE);
+		MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF));
+
+		mali_mmu_pagedir_update(pagedir, virt, phys, size, prop);
+
+		MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x gpu virtual address: 0x%x! \n", phys, virt));
+		virt += size;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+
+	int ret = 0;
+	struct scatterlist *sg;
+	mali_mem_secure *secure_mem = &mem_bkend->secure_mem;
+	unsigned long addr = vma->vm_start;
+	int i;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE);
+
+	for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) {
+		phys_addr_t phys;
+		dma_addr_t dev_addr;
+		u32 size, j;
+		dev_addr = sg_dma_address(sg);
+#if defined(CONFIG_ARM64) ||LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+		phys =  dma_to_phys(&mali_platform_device->dev, dev_addr);
+#else
+		phys = page_to_phys(pfn_to_page(dma_to_pfn(&mali_platform_device->dev, dev_addr)));
+#endif
+		size = sg_dma_len(sg);
+		MALI_DEBUG_ASSERT(0 == size % _MALI_OSK_MALI_PAGE_SIZE);
+
+		for (j = 0; j < size / _MALI_OSK_MALI_PAGE_SIZE; j++) {
+			ret = vm_insert_pfn(vma, addr, PFN_DOWN(phys));
+
+			if (unlikely(0 != ret)) {
+				return -EFAULT;
+			}
+			addr += _MALI_OSK_MALI_PAGE_SIZE;
+			phys += _MALI_OSK_MALI_PAGE_SIZE;
+
+			MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x , cpu virtual address: 0x%x! \n", phys, addr));
+		}
+	}
+	return ret;
+}
+
+u32 mali_mem_secure_release(mali_mem_backend *mem_bkend)
+{
+	struct mali_mem_secure *mem;
+	mali_mem_allocation *alloc = mem_bkend->mali_allocation;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE);
+
+	mem = &mem_bkend->secure_mem;
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_ASSERT_POINTER(mem->sgt);
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_secure_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL);
+	dma_buf_detach(mem->buf, mem->attachment);
+	dma_buf_put(mem->buf);
+	mutex_unlock(&mem_bkend->mutex);
+
+	free_pages_nr = mem->count;
+
+	return free_pages_nr;
+}
+
+
diff --git a/utgard/r6p1/linux/mali_memory_secure.h b/utgard/r6p1/linux/mali_memory_secure.h
new file mode 100644
index 0000000..64f40d4
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_secure.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_SECURE_H__
+#define __MALI_MEMORY_SECURE_H__
+
+#include "mali_session.h"
+#include "mali_memory.h"
+#include <linux/spinlock.h>
+
+#include "mali_memory_types.h"
+
+_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd);
+
+_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+
+void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+
+u32 mali_mem_secure_release(mali_mem_backend *mem_bkend);
+
+#endif /* __MALI_MEMORY_SECURE_H__ */
diff --git a/utgard/r6p1/linux/mali_memory_swap_alloc.c b/utgard/r6p1/linux/mali_memory_swap_alloc.c
new file mode 100644
index 0000000..132bad4
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_swap_alloc.c
@@ -0,0 +1,950 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/idr.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+#include <linux/shmem_fs.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_memory.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_cow.h"
+#include "mali_ukk.h"
+#include "mali_kernel_utilization.h"
+#include "mali_memory_swap_alloc.h"
+
+
+static struct _mali_osk_bitmap idx_mgr;
+static struct file *global_swap_file;
+static struct address_space *global_swap_space;
+static _mali_osk_wq_work_t *mali_mem_swap_out_workq = NULL;
+static u32 mem_backend_swapped_pool_size;
+#ifdef MALI_MEM_SWAP_TRACKING
+static u32 mem_backend_swapped_unlock_size;
+#endif
+/* Lock order: mem_backend_swapped_pool_lock  > each memory backend's mutex lock.
+ * This lock used to protect mem_backend_swapped_pool_size and mem_backend_swapped_pool. */
+static struct mutex mem_backend_swapped_pool_lock;
+static struct list_head mem_backend_swapped_pool;
+
+extern struct mali_mem_os_allocator mali_mem_os_allocator;
+
+#define MALI_SWAP_LOW_MEM_DEFAULT_VALUE (60*1024*1024)
+#define MALI_SWAP_INVALIDATE_MALI_ADDRESS (0)               /* Used to mark the given memory cookie is invalidate. */
+#define MALI_SWAP_GLOBAL_SWAP_FILE_SIZE (0xFFFFFFFF)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_SHIFT)
+#else
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_CACHE_SHIFT)
+#endif
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE (1 << 15) /* Reserved for CoW nonlinear swap backend memory, the space size is 128MB. */
+
+unsigned int mali_mem_swap_out_threshold_value = MALI_SWAP_LOW_MEM_DEFAULT_VALUE;
+
+/**
+ * We have two situations to do shrinking things, one is we met low GPU utilization which shows GPU needn't touch too
+ * swappable backends in short time, and the other one is we add new swappable backends, the total pool size exceed
+ * the threshold value of the swapped pool size.
+ */
+typedef enum {
+	MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION = 100,
+	MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS = 257,
+} _mali_mem_swap_pool_shrink_type_t;
+
+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg);
+
+_mali_osk_errcode_t mali_mem_swap_init(void)
+{
+	gfp_t flags = __GFP_NORETRY | __GFP_NOWARN;
+
+	if (_MALI_OSK_ERR_OK != _mali_osk_bitmap_init(&idx_mgr, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE)) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	global_swap_file = shmem_file_setup("mali_swap", MALI_SWAP_GLOBAL_SWAP_FILE_SIZE, VM_NORESERVE);
+	if (IS_ERR(global_swap_file)) {
+		_mali_osk_bitmap_term(&idx_mgr);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	global_swap_space = global_swap_file->f_path.dentry->d_inode->i_mapping;
+
+	mali_mem_swap_out_workq = _mali_osk_wq_create_work(mali_mem_swap_swapped_bkend_pool_check_for_low_utilization, NULL);
+	if (NULL == mali_mem_swap_out_workq) {
+		_mali_osk_bitmap_term(&idx_mgr);
+		fput(global_swap_file);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+	flags |= GFP_HIGHUSER;
+#else
+#ifdef CONFIG_ZONE_DMA32
+	flags |= GFP_DMA32;
+#else
+#ifdef CONFIG_ZONE_DMA
+	flags |= GFP_DMA;
+#else
+	/* arm64 utgard only work on < 4G, but the kernel
+	 * didn't provide method to allocte memory < 4G
+	 */
+	MALI_DEBUG_ASSERT(0);
+#endif
+#endif
+#endif
+
+	/* When we use shmem_read_mapping_page to allocate/swap-in, it will
+	 * use these flags to allocate new page if need.*/
+	mapping_set_gfp_mask(global_swap_space, flags);
+
+	mem_backend_swapped_pool_size = 0;
+#ifdef MALI_MEM_SWAP_TRACKING
+	mem_backend_swapped_unlock_size = 0;
+#endif
+	mutex_init(&mem_backend_swapped_pool_lock);
+	INIT_LIST_HEAD(&mem_backend_swapped_pool);
+
+	MALI_DEBUG_PRINT(2, ("Mali SWAP: Swap out threshold vaule is %uM\n", mali_mem_swap_out_threshold_value >> 20));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_swap_term(void)
+{
+	_mali_osk_bitmap_term(&idx_mgr);
+
+	fput(global_swap_file);
+
+	_mali_osk_wq_delete_work(mali_mem_swap_out_workq);
+
+	MALI_DEBUG_ASSERT(list_empty(&mem_backend_swapped_pool));
+	MALI_DEBUG_ASSERT(0 == mem_backend_swapped_pool_size);
+
+	return;
+}
+
+struct file *mali_mem_swap_get_global_swap_file(void)
+{
+	return  global_swap_file;
+}
+
+/* Judge if swappable backend in swapped pool. */
+static mali_bool mali_memory_swap_backend_in_swapped_pool(mali_mem_backend *mem_bkend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+
+	return !list_empty(&mem_bkend->list);
+}
+
+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+
+	mutex_lock(&mem_backend_swapped_pool_lock);
+	mutex_lock(&mem_bkend->mutex);
+
+	if (MALI_FALSE == mali_memory_swap_backend_in_swapped_pool(mem_bkend)) {
+		mutex_unlock(&mem_bkend->mutex);
+		mutex_unlock(&mem_backend_swapped_pool_lock);
+		return;
+	}
+
+	MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list));
+
+	list_del_init(&mem_bkend->list);
+
+	mutex_unlock(&mem_bkend->mutex);
+
+	mem_backend_swapped_pool_size -= mem_bkend->size;
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+}
+
+static void mali_mem_swap_out_page_node(mali_page_node *page_node)
+{
+	MALI_DEBUG_ASSERT(page_node);
+
+	dma_unmap_page(&mali_platform_device->dev, page_node->swap_it->dma_addr,
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+	set_page_dirty(page_node->swap_it->page);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+	put_page(page_node->swap_it->page);
+#else
+	page_cache_release(page_node->swap_it->page);
+#endif
+}
+
+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend)
+{
+	mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex));
+
+	if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN)) {
+		return;
+	}
+
+	mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN;
+
+	list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+		mali_mem_swap_out_page_node(m_page);
+	}
+
+	return;
+}
+
+static void mali_mem_swap_unlock_partial_locked_mem_backend(mali_mem_backend *mem_bkend, mali_page_node *page_node)
+{
+	mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex));
+
+	list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+		if (m_page == page_node) {
+			break;
+		}
+		mali_mem_swap_out_page_node(m_page);
+	}
+}
+
+static void mali_mem_swap_swapped_bkend_pool_shrink(_mali_mem_swap_pool_shrink_type_t shrink_type)
+{
+	mali_mem_backend *bkend, *tmp_bkend;
+	long system_free_size;
+	u32 last_gpu_utilization, gpu_utilization_threshold_value, temp_swap_out_threshold_value;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_backend_swapped_pool_lock));
+
+	if (MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION == shrink_type) {
+		/**
+		 * When we met that system memory is very low and Mali locked swappable memory size is less than
+		 * threshold value, and at the same time, GPU load is very low and don't need high performance,
+		 * at this condition, we can unlock more swap memory backend from swapped backends pool.
+		 */
+		gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION;
+		temp_swap_out_threshold_value = (mali_mem_swap_out_threshold_value >> 2);
+	} else {
+		/* When we add swappable memory backends to swapped pool, we need to think that we couldn't
+		* hold too much swappable backends in Mali driver, and also we need considering performance.
+		* So there is a balance for swapping out memory backend, we should follow the following conditions:
+		* 1. Total memory size in global mem backend swapped pool is more than the defined threshold value.
+		* 2. System level free memory size is less than the defined threshold value.
+		* 3. Please note that GPU utilization problem isn't considered in this condition.
+		*/
+		gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS;
+		temp_swap_out_threshold_value = mali_mem_swap_out_threshold_value;
+	}
+
+	/* Get system free pages number. */
+	system_free_size = global_page_state(NR_FREE_PAGES) * PAGE_SIZE;
+	last_gpu_utilization = _mali_ukk_utilization_gp_pp();
+
+	if ((last_gpu_utilization < gpu_utilization_threshold_value)
+	    && (system_free_size < mali_mem_swap_out_threshold_value)
+	    && (mem_backend_swapped_pool_size > temp_swap_out_threshold_value)) {
+		list_for_each_entry_safe(bkend, tmp_bkend, &mem_backend_swapped_pool, list) {
+			if (mem_backend_swapped_pool_size <= temp_swap_out_threshold_value) {
+				break;
+			}
+
+			mutex_lock(&bkend->mutex);
+
+			/* check if backend is in use. */
+			if (0 < bkend->using_count) {
+				mutex_unlock(&bkend->mutex);
+				continue;
+			}
+
+			mali_mem_swap_unlock_single_mem_backend(bkend);
+			list_del_init(&bkend->list);
+			mem_backend_swapped_pool_size -= bkend->size;
+#ifdef MALI_MEM_SWAP_TRACKING
+			mem_backend_swapped_unlock_size += bkend->size;
+#endif
+			mutex_unlock(&bkend->mutex);
+		}
+	}
+
+	return;
+}
+
+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	mutex_lock(&mem_backend_swapped_pool_lock);
+
+	mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION);
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+}
+
+/**
+ * After PP job finished, we add all of swappable memory backend used by this PP
+ * job to the tail of the global swapped pool, and if the total size of swappable memory is more than threshold
+ * value, we also need to shrink the swapped pool start from the head of the list.
+ */
+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend)
+{
+	mutex_lock(&mem_backend_swapped_pool_lock);
+	mutex_lock(&mem_bkend->mutex);
+
+	if (mali_memory_swap_backend_in_swapped_pool(mem_bkend)) {
+		MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list));
+
+		list_del_init(&mem_bkend->list);
+		list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool);
+		mutex_unlock(&mem_bkend->mutex);
+		mutex_unlock(&mem_backend_swapped_pool_lock);
+		return;
+	}
+
+	list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool);
+
+	mutex_unlock(&mem_bkend->mutex);
+	mem_backend_swapped_pool_size += mem_bkend->size;
+
+	mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS);
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+	return;
+}
+
+
+u32 mali_mem_swap_idx_alloc(void)
+{
+	return _mali_osk_bitmap_alloc(&idx_mgr);
+}
+
+void mali_mem_swap_idx_free(u32 idx)
+{
+	_mali_osk_bitmap_free(&idx_mgr, idx);
+}
+
+static u32 mali_mem_swap_idx_range_alloc(u32 count)
+{
+	u32 index;
+
+	index = _mali_osk_bitmap_alloc_range(&idx_mgr, count);
+
+	return index;
+}
+
+static void mali_mem_swap_idx_range_free(u32 idx, int num)
+{
+	_mali_osk_bitmap_free_range(&idx_mgr, idx, num);
+}
+
+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void)
+{
+	mali_swap_item *swap_item;
+
+	swap_item = kzalloc(sizeof(mali_swap_item), GFP_KERNEL);
+
+	if (NULL == swap_item) {
+		return NULL;
+	}
+
+	atomic_set(&swap_item->ref_count, 1);
+	swap_item->page = NULL;
+	atomic_add(1, &mali_mem_os_allocator.allocated_pages);
+
+	return swap_item;
+}
+
+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item)
+{
+	struct inode *file_node;
+	long long start, end;
+
+	/* If this swap item is shared, we just reduce the reference counter. */
+	if (0 == atomic_dec_return(&swap_item->ref_count)) {
+		file_node = global_swap_file->f_path.dentry->d_inode;
+		start = swap_item->idx;
+		start = start << 12;
+		end = start + PAGE_SIZE;
+
+		shmem_truncate_range(file_node, start, (end - 1));
+
+		mali_mem_swap_idx_free(swap_item->idx);
+
+		atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+
+		kfree(swap_item);
+	}
+}
+
+/* Used to allocate new swap item for new memory allocation and cow page for write. */
+struct mali_page_node *_mali_mem_swap_page_node_allocate(void)
+{
+	struct mali_page_node *m_page;
+
+	m_page = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP);
+
+	if (NULL == m_page) {
+		return NULL;
+	}
+
+	m_page->swap_it = mali_mem_swap_alloc_swap_item();
+
+	if (NULL == m_page->swap_it) {
+		kfree(m_page);
+		return NULL;
+	}
+
+	return m_page;
+}
+
+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page)
+{
+
+	mali_mem_swap_free_swap_item(m_page->swap_it);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page)
+{
+	_mali_mem_swap_put_page_node(m_page);
+
+	kfree(m_page);
+
+	return;
+}
+
+u32 mali_mem_swap_free(mali_mem_swap *swap_mem)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(swap_mem);
+
+	list_for_each_entry_safe(m_page, m_tmp, &swap_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP);
+
+		/* free the page node and release the swap item, if the ref count is 1,
+		 * then need also free the swap item. */
+		list_del(&m_page->list);
+		if (1 == _mali_page_node_get_ref_count(m_page)) {
+			free_pages_nr++;
+		}
+
+		_mali_mem_swap_page_node_free(m_page);
+	}
+
+	return free_pages_nr;
+}
+
+static u32 mali_mem_swap_cow_free(mali_mem_cow *cow_mem)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(cow_mem);
+
+	list_for_each_entry_safe(m_page, m_tmp, &cow_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP);
+
+		/* free the page node and release the swap item, if the ref count is 1,
+		 * then need also free the swap item. */
+		list_del(&m_page->list);
+		if (1 == _mali_page_node_get_ref_count(m_page)) {
+			free_pages_nr++;
+		}
+
+		_mali_mem_swap_page_node_free(m_page);
+	}
+
+	return free_pages_nr;
+}
+
+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped)
+{
+	mali_mem_allocation *alloc;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	if (is_mali_mapped) {
+		mali_mem_swap_mali_unmap(alloc);
+	}
+
+	mali_memory_swap_list_backend_delete(mem_bkend);
+
+	mutex_lock(&mem_bkend->mutex);
+	/* To make sure the given memory backend was unlocked from Mali side,
+	 * and then free this memory block. */
+	mali_mem_swap_unlock_single_mem_backend(mem_bkend);
+	mutex_unlock(&mem_bkend->mutex);
+
+	if (MALI_MEM_SWAP == mem_bkend->type) {
+		free_pages_nr = mali_mem_swap_free(&mem_bkend->swap_mem);
+	} else {
+		free_pages_nr = mali_mem_swap_cow_free(&mem_bkend->cow_mem);
+	}
+
+	return free_pages_nr;
+}
+
+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node)
+{
+	MALI_DEBUG_ASSERT(NULL != page_node);
+
+	page_node->swap_it->page = shmem_read_mapping_page(global_swap_space, page_node->swap_it->idx);
+
+	if (IS_ERR(page_node->swap_it->page)) {
+		MALI_DEBUG_PRINT_ERROR(("SWAP Mem: failed to swap in page with index: %d.\n", page_node->swap_it->idx));
+		return MALI_FALSE;
+	}
+
+	/* Ensure page is flushed from CPU caches. */
+	page_node->swap_it->dma_addr = dma_map_page(&mali_platform_device->dev, page_node->swap_it->page,
+				       0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	return MALI_TRUE;
+}
+
+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx)
+{
+	size_t page_count = PAGE_ALIGN(size) / PAGE_SIZE;
+	struct mali_page_node *m_page;
+	long system_free_size;
+	u32 i, index;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT(NULL != swap_mem);
+	MALI_DEBUG_ASSERT(NULL != bkend_idx);
+	MALI_DEBUG_ASSERT(page_count <= MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	INIT_LIST_HEAD(&swap_mem->pages);
+	swap_mem->count = page_count;
+	index = mali_mem_swap_idx_range_alloc(page_count);
+
+	if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == index) {
+		MALI_PRINT_ERROR(("Mali Swap: Failed to allocate continuous index for swappable Mali memory."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	for (i = 0; i < page_count; i++) {
+		m_page = _mali_mem_swap_page_node_allocate();
+
+		if (NULL == m_page) {
+			MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Failed to allocate mali page node."));
+			swap_mem->count = i;
+
+			mali_mem_swap_free(swap_mem);
+			mali_mem_swap_idx_range_free(index + i, page_count - i);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		m_page->swap_it->idx = index + i;
+
+		ret = mali_mem_swap_in_page_node(m_page);
+
+		if (MALI_FALSE == ret) {
+			MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Allocate new page from SHMEM file failed."));
+			_mali_mem_swap_page_node_free(m_page);
+			mali_mem_swap_idx_range_free(index + i + 1, page_count - i - 1);
+
+			swap_mem->count = i;
+			mali_mem_swap_free(swap_mem);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+
+		list_add_tail(&m_page->list, &swap_mem->pages);
+	}
+
+	system_free_size = global_page_state(NR_FREE_PAGES) * PAGE_SIZE;
+
+	if ((system_free_size < mali_mem_swap_out_threshold_value)
+	    && (mem_backend_swapped_pool_size > (mali_mem_swap_out_threshold_value >> 2))
+	    && mali_utilization_enabled()) {
+		_mali_osk_wq_schedule_work(mali_mem_swap_out_workq);
+	}
+
+	*bkend_idx = index;
+	return 0;
+}
+
+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+/* Insert these pages from shmem to mali page table*/
+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &swap_mem->pages, list) {
+		MALI_DEBUG_ASSERT(NULL != m_page->swap_it->page);
+		phys = m_page->swap_it->dma_addr;
+
+		mali_mmu_pagedir_update(pagedir, virt, phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_in_pages(struct mali_pp_job *job)
+{
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	struct mali_page_node *m_page;
+	mali_bool swap_in_success = MALI_TRUE;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		if (NULL == mali_vma_node) {
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			swap_in_success = MALI_FALSE;
+			MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr));
+			continue;
+		}
+
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+
+		if (MALI_MEM_SWAP != mali_alloc->type &&
+		    MALI_MEM_COW != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on GPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		/* We neednot hold backend's lock here, race safe.*/
+		if ((MALI_MEM_COW == mem_bkend->type) &&
+		    (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+			continue;
+		}
+
+		mutex_lock(&mem_bkend->mutex);
+
+		/* When swap_in_success is MALI_FALSE, it means this job has memory backend that could not be swapped in,
+		 * and it will be aborted in mali scheduler, so here, we just mark those memory cookies which
+		 * should not be swapped out when delete job to invalide */
+		if (MALI_FALSE == swap_in_success) {
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+		/* Before swap in, checking if this memory backend has been swapped in by the latest flushed jobs. */
+		++mem_bkend->using_count;
+
+		if (1 < mem_bkend->using_count) {
+			MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags));
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+		if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)) {
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+
+		list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+			if (MALI_FALSE == mali_mem_swap_in_page_node(m_page)) {
+				/* Don't have enough memory to swap in page, so release pages have already been swapped
+				 * in and then mark this pp job to be fail. */
+				mali_mem_swap_unlock_partial_locked_mem_backend(mem_bkend, m_page);
+				swap_in_success = MALI_FALSE;
+				break;
+			}
+		}
+
+		if (swap_in_success) {
+#ifdef MALI_MEM_SWAP_TRACKING
+			mem_backend_swapped_unlock_size -= mem_bkend->size;
+#endif
+			_mali_osk_mutex_wait(session->memory_lock);
+			mali_mem_swap_mali_map(&mem_bkend->swap_mem, session, mali_alloc->mali_mapping.addr, mali_alloc->mali_mapping.properties);
+			_mali_osk_mutex_signal(session->memory_lock);
+
+			/* Remove the unlock flag from mem backend flags, mark this backend has been swapped in. */
+			mem_bkend->flags &= ~(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN);
+			mutex_unlock(&mem_bkend->mutex);
+		} else {
+			--mem_bkend->using_count;
+			/* Marking that this backend is not swapped in, need not to be processed anymore. */
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			mutex_unlock(&mem_bkend->mutex);
+		}
+	}
+
+	job->swap_status = swap_in_success ? MALI_SWAP_IN_SUCC : MALI_SWAP_IN_FAIL;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_out_pages(struct mali_pp_job *job)
+{
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		if (MALI_SWAP_INVALIDATE_MALI_ADDRESS == mali_addr) {
+			continue;
+		}
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+
+		if (NULL == mali_vma_node) {
+			MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr));
+			continue;
+		}
+
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+
+		if (MALI_MEM_SWAP != mali_alloc->type &&
+		    MALI_MEM_COW != mali_alloc->type) {
+			continue;
+		}
+
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		/* We neednot hold backend's lock here, race safe.*/
+		if ((MALI_MEM_COW == mem_bkend->type) &&
+		    (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+			continue;
+		}
+
+		mutex_lock(&mem_bkend->mutex);
+
+		MALI_DEBUG_ASSERT(0 < mem_bkend->using_count);
+
+		/* Reducing the using_count of mem backend means less pp job are using this memory backend,
+		 * if this count get to zero, it means no pp job is using it now, could put it to swap out list. */
+		--mem_bkend->using_count;
+
+		if (0 < mem_bkend->using_count) {
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+		mutex_unlock(&mem_bkend->mutex);
+
+		mali_memory_swap_list_backend_add(mem_bkend);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep)
+{
+	struct mali_page_node *m_page, *found_node = NULL;
+	struct page *found_page;
+	mali_mem_swap *swap = NULL;
+	mali_mem_cow *cow = NULL;
+	dma_addr_t dma_addr;
+	u32 i = 0;
+
+	if (MALI_MEM_SWAP == mem_bkend->type) {
+		swap = &mem_bkend->swap_mem;
+		list_for_each_entry(m_page, &swap->pages, list) {
+			if (i == offset) {
+				found_node = m_page;
+				break;
+			}
+			i++;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+		MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags));
+
+		cow = &mem_bkend->cow_mem;
+		list_for_each_entry(m_page, &cow->pages, list) {
+			if (i == offset) {
+				found_node = m_page;
+				break;
+			}
+			i++;
+		}
+	}
+
+	if (NULL == found_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	found_page = shmem_read_mapping_page(global_swap_space, found_node->swap_it->idx);
+
+	if (!IS_ERR(found_page)) {
+		lock_page(found_page);
+		dma_addr = dma_map_page(&mali_platform_device->dev, found_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		dma_unmap_page(&mali_platform_device->dev, dma_addr,
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+		*pagep = found_page;
+	} else {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep)
+{
+	struct mali_page_node *m_page, *found_node = NULL, *new_node = NULL;
+	mali_mem_cow *cow = NULL;
+	u32 i = 0;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED));
+	MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags));
+	MALI_DEBUG_ASSERT(!mali_memory_swap_backend_in_swapped_pool(mem_bkend));
+
+	cow = &mem_bkend->cow_mem;
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if (i == offset) {
+			found_node = m_page;
+			break;
+		}
+		i++;
+	}
+
+	if (NULL == found_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	new_node = _mali_mem_swap_page_node_allocate();
+
+	if (NULL == new_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	new_node->swap_it->idx = mali_mem_swap_idx_alloc();
+
+	if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == new_node->swap_it->idx) {
+		MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW on demand.\n"));
+		kfree(new_node->swap_it);
+		kfree(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (MALI_FALSE == mali_mem_swap_in_page_node(new_node)) {
+		_mali_mem_swap_page_node_free(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* swap in found node for copy in kernel. */
+	if (MALI_FALSE == mali_mem_swap_in_page_node(found_node)) {
+		mali_mem_swap_out_page_node(new_node);
+		_mali_mem_swap_page_node_free(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_mem_cow_copy_page(found_node, new_node);
+
+	list_replace(&found_node->list, &new_node->list);
+
+	if (1 != _mali_page_node_get_ref_count(found_node)) {
+		atomic_add(1, &mem_bkend->mali_allocation->session->mali_mem_allocated_pages);
+		if (atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > mem_bkend->mali_allocation->session->max_mali_mem_allocated_size) {
+			mem_bkend->mali_allocation->session->max_mali_mem_allocated_size = atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		mem_bkend->cow_mem.change_pages_nr++;
+	}
+
+	mali_mem_swap_out_page_node(found_node);
+	_mali_mem_swap_page_node_free(found_node);
+
+	/* When swap in the new page node, we have called dma_map_page for this page.\n */
+	dma_unmap_page(&mali_platform_device->dev, new_node->swap_it->dma_addr,
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	lock_page(new_node->swap_it->page);
+
+	*pagep = new_node->swap_it->page;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+#ifdef MALI_MEM_SWAP_TRACKING
+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size)
+{
+	*swap_pool_size = mem_backend_swapped_pool_size;
+	*unlock_size =  mem_backend_swapped_unlock_size;
+}
+#endif
diff --git a/utgard/r6p1/linux/mali_memory_swap_alloc.h b/utgard/r6p1/linux/mali_memory_swap_alloc.h
new file mode 100644
index 0000000..3624d71
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_swap_alloc.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_SWAP_ALLOC_H__
+#define __MALI_MEMORY_SWAP_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include "mali_memory_types.h"
+#include "mali_pp_job.h"
+
+/**
+ * Initialize memory swapping module.
+ */
+_mali_osk_errcode_t mali_mem_swap_init(void);
+
+void mali_mem_swap_term(void);
+
+/**
+ * Return global share memory file to other modules.
+ */
+struct file *mali_mem_swap_get_global_swap_file(void);
+
+/**
+ * Unlock the given memory backend and pages in it could be swapped out by kernel.
+ */
+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend);
+
+/**
+ * Remove the given memory backend from global swap list.
+ */
+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend);
+
+/**
+ * Add the given memory backend to global swap list.
+ */
+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend);
+
+/**
+ * Allocate 1 index from bitmap used as page index in global swap file.
+ */
+u32 mali_mem_swap_idx_alloc(void);
+
+void mali_mem_swap_idx_free(u32 idx);
+
+/**
+ * Allocate a new swap item without page index.
+ */
+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void);
+
+/**
+ * Free a swap item, truncate the corresponding space in page cache and free index of page.
+ */
+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item);
+
+/**
+ * Allocate a page node with swap item.
+ */
+struct mali_page_node *_mali_mem_swap_page_node_allocate(void);
+
+/**
+ * Reduce the reference count of given page node and if return 0, just free this page node.
+ */
+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page);
+
+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page);
+
+/**
+ * Free a swappable memory backend.
+ */
+u32 mali_mem_swap_free(mali_mem_swap *swap_mem);
+
+/**
+ * Ummap and free.
+ */
+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped);
+
+/**
+ * Read in a page from global swap file with the pre-allcated page index.
+ */
+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node);
+
+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx);
+
+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+
+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc);
+
+/**
+ * When pp job created, we need swap in all of memory backend needed by this pp job.
+ */
+int mali_mem_swap_in_pages(struct mali_pp_job *job);
+
+/**
+ * Put all of memory backends used this pp job to the global swap list.
+ */
+int mali_mem_swap_out_pages(struct mali_pp_job *job);
+
+/**
+ * This will be called in page fault to process CPU read&write.
+ */
+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep) ;
+
+/**
+ * Used to process cow on demand for swappable memory backend.
+ */
+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep);
+
+#ifdef MALI_MEM_SWAP_TRACKING
+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size);
+#endif
+#endif /* __MALI_MEMORY_SWAP_ALLOC_H__ */
+
diff --git a/utgard/r6p1/linux/mali_memory_types.h b/utgard/r6p1/linux/mali_memory_types.h
new file mode 100755
index 0000000..f3a825d
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_types.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_TYPES_H__
+#define __MALI_MEMORY_TYPES_H__
+
+#include <linux/mm.h>
+
+#if defined(CONFIG_MALI400_UMP)
+#include "ump_kernel_interface.h"
+#endif
+
+typedef u32 mali_address_t;
+
+typedef enum mali_mem_type {
+	MALI_MEM_OS,
+	MALI_MEM_EXTERNAL,
+	MALI_MEM_SWAP,
+	MALI_MEM_DMA_BUF,
+	MALI_MEM_UMP,
+	MALI_MEM_BLOCK,
+	MALI_MEM_COW,
+	MALI_MEM_SECURE,
+	MALI_MEM_TYPE_MAX,
+} mali_mem_type;
+
+typedef struct mali_block_item {
+	/* for block type, the block_phy is alway page size align
+	* so use low 12bit used for ref_cout.
+	*/
+	unsigned long phy_addr;
+} mali_block_item;
+
+/**
+ * idx is used to locate the given page in the address space of swap file.
+ * ref_count is used to mark how many memory backends are using this item.
+ */
+typedef struct mali_swap_item {
+	u32 idx;
+	atomic_t ref_count;
+	struct page *page;
+	dma_addr_t dma_addr;
+} mali_swap_item;
+
+typedef enum mali_page_node_type {
+	MALI_PAGE_NODE_OS,
+	MALI_PAGE_NODE_BLOCK,
+	MALI_PAGE_NODE_SWAP,
+} mali_page_node_type;
+
+typedef struct mali_page_node {
+	struct list_head list;
+	union {
+		struct page *page;
+		mali_block_item *blk_it; /*pointer to block item*/
+		mali_swap_item *swap_it;
+	};
+
+	u32 type;
+} mali_page_node;
+
+typedef struct mali_mem_os_mem {
+	struct list_head pages;
+	u32 count;
+} mali_mem_os_mem;
+
+typedef struct mali_mem_dma_buf {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+	struct mali_dma_buf_attachment *attachment;
+#endif
+} mali_mem_dma_buf;
+
+typedef struct mali_mem_external {
+	dma_addr_t phys;
+	u32 size;
+} mali_mem_external;
+
+typedef struct mali_mem_ump {
+#if defined(CONFIG_MALI400_UMP)
+	ump_dd_handle handle;
+#endif
+} mali_mem_ump;
+
+typedef struct block_allocator_allocation {
+	/* The list will be released in reverse order */
+	struct block_info *last_allocated;
+	u32 mapping_length;
+	struct block_allocator *info;
+} block_allocator_allocation;
+
+typedef struct mali_mem_block_mem {
+	struct list_head pfns;
+	u32 count;
+} mali_mem_block_mem;
+
+typedef struct mali_mem_virt_mali_mapping {
+	mali_address_t addr; /* Virtual Mali address */
+	u32 properties;      /* MMU Permissions + cache, must match MMU HW */
+} mali_mem_virt_mali_mapping;
+
+typedef struct mali_mem_virt_cpu_mapping {
+	void __user *addr;
+	struct vm_area_struct *vma;
+} mali_mem_virt_cpu_mapping;
+
+#define MALI_MEM_ALLOCATION_VALID_MAGIC 0xdeda110c
+#define MALI_MEM_ALLOCATION_FREED_MAGIC 0x10101010
+
+typedef struct mali_mm_node {
+	/* MALI GPU vaddr start, use u32 for mmu only support 32bit address*/
+	uint32_t start; /* GPU vaddr */
+	uint32_t size;  /* GPU allocation virtual size */
+	unsigned allocated : 1;
+} mali_mm_node;
+
+typedef struct mali_vma_node {
+	struct mali_mm_node vm_node;
+	struct rb_node vm_rb;
+} mali_vma_node;
+
+
+typedef struct mali_mem_allocation {
+	MALI_DEBUG_CODE(u32 magic);
+	mali_mem_type type;                /**< Type of memory */
+	u32 flags;                         /**< Flags for this allocation */
+
+	struct mali_session_data *session; /**< Pointer to session that owns the allocation */
+
+	mali_mem_virt_cpu_mapping cpu_mapping; /**< CPU mapping */
+	mali_mem_virt_mali_mapping mali_mapping; /**< Mali mapping */
+
+	/* add for new memory system */
+	struct mali_vma_node mali_vma_node;
+	u32 vsize; /* virtual size*/
+	u32 psize; /* physical backend memory size*/
+	struct list_head list;
+	s32 backend_handle; /* idr for mem_backend */
+	_mali_osk_atomic_t mem_alloc_refcount;
+} mali_mem_allocation;
+
+struct mali_mem_os_allocator {
+	spinlock_t pool_lock;
+	struct list_head pool_pages;
+	size_t pool_count;
+
+	atomic_t allocated_pages;
+	size_t allocation_limit;
+
+	struct shrinker shrinker;
+	struct delayed_work timed_shrinker;
+	struct workqueue_struct *wq;
+};
+
+/* COW backend memory type */
+typedef struct mali_mem_cow {
+	struct list_head pages;  /**< all pages for this cow backend allocation,
+                                                                including new allocated pages for modified range*/
+	u32 count;               /**< number of pages */
+	s32 change_pages_nr;
+} mali_mem_cow;
+
+typedef struct mali_mem_swap {
+	struct list_head pages;
+	u32 count;
+} mali_mem_swap;
+
+typedef struct mali_mem_secure {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+	struct dma_buf *buf;
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+#endif
+	u32 count;
+} mali_mem_secure;
+
+#define MALI_MEM_BACKEND_FLAG_COWED                   (0x1)  /* COW has happen on this backend */
+#define MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE        (0x2)  /* This is an COW backend, mapped as not allowed cpu to write */
+#define MALI_MEM_BACKEND_FLAG_SWAP_COWED              (0x4)  /* Mark the given backend is cowed from swappable memory. */
+/* Mark this backend is not swapped_in in MALI driver, and before using it,
+ * we should swap it in and set up corresponding page table. */
+#define MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN            (0x8)
+#define MALI_MEM_BACKEND_FLAG_NOT_BINDED              (0x1 << 5) /* this backend it not back with physical memory, used for defer bind */
+#define MALI_MEM_BACKEND_FLAG_BINDED              (0x1 << 6) /* this backend it back with physical memory, used for defer bind */
+
+typedef struct mali_mem_backend {
+	mali_mem_type type;                /**< Type of backend memory */
+	u32 flags;                         /**< Flags for this allocation */
+	u32 size;
+	/* Union selected by type. */
+	union {
+		mali_mem_os_mem os_mem;       /**< MALI_MEM_OS */
+		mali_mem_external ext_mem;    /**< MALI_MEM_EXTERNAL */
+		mali_mem_dma_buf dma_buf;     /**< MALI_MEM_DMA_BUF */
+		mali_mem_ump ump_mem;         /**< MALI_MEM_UMP */
+		mali_mem_block_mem block_mem; /**< MALI_MEM_BLOCK */
+		mali_mem_cow cow_mem;
+		mali_mem_swap swap_mem;
+		mali_mem_secure secure_mem;
+	};
+	mali_mem_allocation *mali_allocation;
+	struct mutex mutex;
+	mali_mem_type cow_type;
+
+	struct list_head list;           /**< Used to link swappable memory backend to the global swappable list */
+	int using_count;                 /**< Mark how many PP jobs are using this memory backend */
+	u32 start_idx;                   /**< If the correspondign vma of this backend is linear, this value will be used to set vma->vm_pgoff */
+} mali_mem_backend;
+
+#define MALI_MEM_FLAG_MALI_GUARD_PAGE (_MALI_MAP_EXTERNAL_MAP_GUARD_PAGE)
+#define MALI_MEM_FLAG_DONT_CPU_MAP    (1 << 1)
+#define MALI_MEM_FLAG_CAN_RESIZE  (_MALI_MEMORY_ALLOCATE_RESIZEABLE)
+#endif /* __MALI_MEMORY_TYPES__ */
diff --git a/utgard/r6p1/linux/mali_memory_ump.c b/utgard/r6p1/linux/mali_memory_ump.c
new file mode 100755
index 0000000..72bef08
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_ump.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory.h"
+#include "ump_kernel_interface.h"
+
+static int mali_mem_ump_map(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 nr_blocks;
+	u32 i;
+	ump_dd_physical_block *ump_blocks;
+	struct mali_page_directory *pagedir;
+	u32 offset = 0;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem);
+
+	nr_blocks = ump_dd_phys_block_count_get(ump_mem);
+	if (nr_blocks == 0) {
+		MALI_DEBUG_PRINT(1, ("No block count\n"));
+		return -EINVAL;
+	}
+
+	ump_blocks = _mali_osk_malloc(sizeof(*ump_blocks) * nr_blocks);
+	if (NULL == ump_blocks) {
+		return -ENOMEM;
+	}
+
+	if (UMP_DD_INVALID == ump_dd_phys_blocks_get(ump_mem, ump_blocks, nr_blocks)) {
+		_mali_osk_free(ump_blocks);
+		return -EFAULT;
+	}
+
+	pagedir = session->page_directory;
+
+	mali_session_memory_lock(session);
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (_MALI_OSK_ERR_OK != err) {
+		MALI_DEBUG_PRINT(1, ("Mapping of UMP memory failed\n"));
+
+		_mali_osk_free(ump_blocks);
+		mali_session_memory_unlock(session);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_blocks; ++i) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		MALI_DEBUG_PRINT(7, ("Mapping in 0x%08x size %d\n", ump_blocks[i].addr , ump_blocks[i].size));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[i].addr,
+					ump_blocks[i].size, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += ump_blocks[i].size;
+	}
+
+	if (alloc->flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		/* Map in an extra virtual guard page at the end of the VMA */
+		MALI_DEBUG_PRINT(6, ("Mapping in extra guard page\n"));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[0].addr, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+	mali_session_memory_unlock(session);
+	_mali_osk_free(ump_blocks);
+	return 0;
+}
+
+static void mali_mem_ump_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags)
+{
+	ump_dd_handle ump_mem;
+	int ret;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map ump memory with secure id %d into virtual memory 0x%08X, size 0x%08X\n",
+			  secure_id, alloc->mali_vma_node.vm_node.start, alloc->mali_vma_node.vm_node.size));
+
+	ump_mem = ump_dd_handle_create_from_secure_id(secure_id);
+	if (UMP_DD_HANDLE_INVALID == ump_mem) MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mem_backend->ump_mem.handle = ump_mem;
+
+	ret = mali_mem_ump_map(mem_backend);
+	if (0 != ret) {
+		ump_dd_reference_release(ump_mem);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	MALI_DEBUG_PRINT(3, ("Returning from UMP bind\n"));
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	mali_mem_ump_unmap(alloc);
+	ump_dd_reference_release(ump_mem);
+}
+
diff --git a/utgard/r6p1/linux/mali_memory_ump.h b/utgard/r6p1/linux/mali_memory_ump.h
new file mode 100644
index 0000000..23f59ae
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_ump.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_UMP_BUF_H__
+#define __MALI_MEMORY_UMP_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags);
+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/utgard/r6p1/linux/mali_memory_util.c b/utgard/r6p1/linux/mali_memory_util.c
new file mode 100644
index 0000000..8092d57
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_util.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_secure.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_external.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_swap_alloc.h"
+
+
+
+/**
+*function @_mali_free_allocation_mem - free a memory allocation
+*/
+static u32 _mali_free_allocation_mem(mali_mem_allocation *mali_alloc)
+{
+	mali_mem_backend *mem_bkend = NULL;
+	u32 free_pages_nr = 0;
+
+	struct mali_session_data *session = mali_alloc->session;
+	MALI_DEBUG_PRINT(4, (" _mali_free_allocation_mem, psize =0x%x! \n", mali_alloc->psize));
+	if (0 == mali_alloc->psize)
+		goto out;
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+	switch (mem_bkend->type) {
+	case MALI_MEM_OS:
+		free_pages_nr = mali_mem_os_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+	case MALI_MEM_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_mem_unbind_ump_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+#else
+		MALI_DEBUG_PRINT(1, ("UMP not supported\n"));
+#endif
+		break;
+	case MALI_MEM_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_mem_unbind_dma_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported\n"));
+#endif
+		break;
+	case MALI_MEM_EXTERNAL:
+		mali_mem_unbind_ext_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+		break;
+
+	case MALI_MEM_BLOCK:
+		free_pages_nr = mali_mem_block_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+
+	case MALI_MEM_COW:
+		if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) {
+			free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE);
+		} else {
+			free_pages_nr = mali_mem_cow_release(mem_bkend, MALI_TRUE);
+		}
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+	case MALI_MEM_SWAP:
+		free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		atomic_sub(free_pages_nr, &session->mali_mem_array[mem_bkend->type]);
+		break;
+	case MALI_MEM_SECURE:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		free_pages_nr = mali_mem_secure_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n"));
+#endif
+		break;
+	default:
+		MALI_DEBUG_PRINT(1, ("mem type %d is not in the mali_mem_type enum.\n", mem_bkend->type));
+		break;
+	}
+
+	/*Remove backend memory idex */
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_bkend);
+out:
+	/* remove memory allocation  */
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_alloc->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_alloc);
+	return free_pages_nr;
+}
+
+/**
+*  ref_count for allocation
+*/
+u32 mali_allocation_unref(struct mali_mem_allocation **alloc)
+{
+	u32 free_pages_nr = 0;
+	mali_mem_allocation *mali_alloc = *alloc;
+	*alloc = NULL;
+	if (0 == _mali_osk_atomic_dec_return(&mali_alloc->mem_alloc_refcount)) {
+		free_pages_nr = _mali_free_allocation_mem(mali_alloc);
+	}
+	return free_pages_nr;
+}
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc)
+{
+	_mali_osk_atomic_inc(&alloc->mem_alloc_refcount);
+}
+
+void mali_free_session_allocations(struct mali_session_data *session)
+{
+	struct mali_mem_allocation *entry, *next;
+
+	MALI_DEBUG_PRINT(4, (" mali_free_session_allocations! \n"));
+
+	list_for_each_entry_safe(entry, next, &session->allocation_mgr.head, list) {
+		mali_allocation_unref(&entry);
+	}
+}
diff --git a/utgard/r6p1/linux/mali_memory_util.h b/utgard/r6p1/linux/mali_memory_util.h
new file mode 100644
index 0000000..0a23b77
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_util.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef __MALI_MEMORY_UTIL_H__
+#define __MALI_MEMORY_UTIL_H__
+u32 mali_allocation_unref(struct mali_mem_allocation **alloc);
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc);
+
+void mali_free_session_allocations(struct mali_session_data *session);
+
+#endif
diff --git a/utgard/r6p1/linux/mali_memory_virtual.c b/utgard/r6p1/linux/mali_memory_virtual.c
new file mode 100644
index 0000000..a217e43
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_virtual.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+
+
+/**
+*internal helper to link node into the rb-tree
+*/
+static inline void _mali_vma_offset_add_rb(struct mali_allocation_manager *mgr,
+		struct mali_vma_node *node)
+{
+	struct rb_node **iter = &mgr->allocation_mgr_rb.rb_node;
+	struct rb_node *parent = NULL;
+	struct mali_vma_node *iter_node;
+
+	while (likely(*iter)) {
+		parent = *iter;
+		iter_node = rb_entry(*iter, struct mali_vma_node, vm_rb);
+
+		if (node->vm_node.start < iter_node->vm_node.start)
+			iter = &(*iter)->rb_left;
+		else if (node->vm_node.start > iter_node->vm_node.start)
+			iter = &(*iter)->rb_right;
+		else
+			MALI_DEBUG_ASSERT(0);
+	}
+
+	rb_link_node(&node->vm_rb, parent, iter);
+	rb_insert_color(&node->vm_rb, &mgr->allocation_mgr_rb);
+}
+
+/**
+ * mali_vma_offset_add() - Add offset node to RB Tree
+ */
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node)
+{
+	int ret = 0;
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		goto out;
+	}
+
+	_mali_vma_offset_add_rb(mgr, node);
+	/* set to allocated */
+	node->vm_node.allocated = 1;
+
+out:
+	write_unlock(&mgr->vm_lock);
+	return ret;
+}
+
+/**
+ * mali_vma_offset_remove() - Remove offset node from RB tree
+ */
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node)
+{
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		rb_erase(&node->vm_rb, &mgr->allocation_mgr_rb);
+		memset(&node->vm_node, 0, sizeof(node->vm_node));
+	}
+	write_unlock(&mgr->vm_lock);
+}
+
+/**
+* mali_vma_offset_search - Search the node in RB tree
+*/
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start, unsigned long pages)
+{
+	struct mali_vma_node *node, *best;
+	struct rb_node *iter;
+	unsigned long offset;
+	read_lock(&mgr->vm_lock);
+
+	iter = mgr->allocation_mgr_rb.rb_node;
+	best = NULL;
+
+	while (likely(iter)) {
+		node = rb_entry(iter, struct mali_vma_node, vm_rb);
+		offset = node->vm_node.start;
+		if (start >= offset) {
+			iter = iter->rb_right;
+			best = node;
+			if (start == offset)
+				break;
+		} else {
+			iter = iter->rb_left;
+		}
+	}
+
+	if (best) {
+		offset = best->vm_node.start + best->vm_node.size;
+		if (offset <= start + pages)
+			best = NULL;
+	}
+	read_unlock(&mgr->vm_lock);
+
+	return best;
+}
+
diff --git a/utgard/r6p1/linux/mali_memory_virtual.h b/utgard/r6p1/linux/mali_memory_virtual.h
new file mode 100644
index 0000000..2a70cbb
--- /dev/null
+++ b/utgard/r6p1/linux/mali_memory_virtual.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_GPU_VMEM_H__
+#define __MALI_GPU_VMEM_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+
+
+
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node);
+
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node);
+
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start,    unsigned long pages);
+
+#endif
diff --git a/utgard/r6p1/linux/mali_osk_atomics.c b/utgard/r6p1/linux/mali_osk_atomics.c
new file mode 100755
index 0000000..03f4421
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_atomics.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_atomics.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <asm/atomic.h>
+#include "mali_kernel_common.h"
+
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom)
+{
+	atomic_dec((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_dec_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom)
+{
+	atomic_inc((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_inc_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val)
+{
+	MALI_DEBUG_ASSERT_POINTER(atom);
+	atomic_set((atomic_t *)&atom->u.val, val);
+}
+
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom)
+{
+	return atomic_read((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom)
+{
+	MALI_IGNORE(atom);
+}
+
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val)
+{
+	return atomic_xchg((atomic_t *)&atom->u.val, val);
+}
diff --git a/utgard/r6p1/linux/mali_osk_bitmap.c b/utgard/r6p1/linux/mali_osk_bitmap.c
new file mode 100644
index 0000000..67cc7e4
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_bitmap.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2010, 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitmap.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/bitmap.h>
+#include <linux/vmalloc.h>
+#include "common/mali_kernel_common.h"
+#include "mali_osk_types.h"
+#include "mali_osk.h"
+
+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap)
+{
+	u32 obj;
+
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+
+	obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->reserve);
+
+	if (obj < bitmap->max) {
+		set_bit(obj, bitmap->table);
+	} else {
+		obj = -1;
+	}
+
+	if (obj != -1)
+		--bitmap->avail;
+	_mali_osk_spinlock_unlock(bitmap->lock);
+
+	return obj;
+}
+
+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_bitmap_free_range(bitmap, obj, 1);
+}
+
+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt)
+{
+	u32 obj;
+
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	if (0 >= cnt) {
+		return -1;
+	}
+
+	if (1 == cnt) {
+		return _mali_osk_bitmap_alloc(bitmap);
+	}
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+	obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+					 bitmap->last, cnt, 0);
+
+	if (obj >= bitmap->max) {
+		obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+						 bitmap->reserve, cnt, 0);
+	}
+
+	if (obj < bitmap->max) {
+		bitmap_set(bitmap->table, obj, cnt);
+
+		bitmap->last = (obj + cnt);
+		if (bitmap->last >= bitmap->max) {
+			bitmap->last = bitmap->reserve;
+		}
+	} else {
+		obj = -1;
+	}
+
+	if (obj != -1) {
+		bitmap->avail -= cnt;
+	}
+
+	_mali_osk_spinlock_unlock(bitmap->lock);
+
+	return obj;
+}
+
+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	return bitmap->avail;
+}
+
+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+	bitmap_clear(bitmap->table, obj, cnt);
+	bitmap->last = min(bitmap->last, obj);
+
+	bitmap->avail += cnt;
+	_mali_osk_spinlock_unlock(bitmap->lock);
+}
+
+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+	MALI_DEBUG_ASSERT(reserve <= num);
+
+	bitmap->reserve = reserve;
+	bitmap->last = reserve;
+	bitmap->max  = num;
+	bitmap->avail = num - reserve;
+	bitmap->lock = _mali_osk_spinlock_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST);
+	if (!bitmap->lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) *
+				sizeof(long), GFP_KERNEL);
+	if (!bitmap->table) {
+		_mali_osk_spinlock_term(bitmap->lock);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	if (NULL != bitmap->lock) {
+		_mali_osk_spinlock_term(bitmap->lock);
+	}
+
+	if (NULL != bitmap->table) {
+		kfree(bitmap->table);
+	}
+}
+
diff --git a/utgard/r6p1/linux/mali_osk_irq.c b/utgard/r6p1/linux/mali_osk_irq.c
new file mode 100755
index 0000000..960ed81
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_irq.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_irq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+#include <linux/slab.h>	/* For memory allocation */
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+typedef struct _mali_osk_irq_t_struct {
+	u32 irqnum;
+	void *data;
+	_mali_osk_irq_uhandler_t uhandler;
+} mali_osk_irq_object_t;
+
+typedef irqreturn_t (*irq_handler_func_t)(int, void *, struct pt_regs *);
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id);   /* , struct pt_regs *regs*/
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+u32 get_irqnum(struct _mali_osk_irq_t_struct* irq)
+{
+	if (irq)
+		return irq->irqnum;
+	else
+		return 0;
+}
+#endif
+
+#if defined(DEBUG)
+
+struct test_interrupt_data {
+	_mali_osk_irq_ack_t ack_func;
+	void *probe_data;
+	mali_bool interrupt_received;
+	wait_queue_head_t wq;
+};
+
+static irqreturn_t test_interrupt_upper_half(int port_name, void *dev_id)
+{
+	irqreturn_t ret = IRQ_NONE;
+	struct test_interrupt_data *data = (struct test_interrupt_data *)dev_id;
+
+	if (_MALI_OSK_ERR_OK == data->ack_func(data->probe_data)) {
+		data->interrupt_received = MALI_TRUE;
+		wake_up(&data->wq);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static _mali_osk_errcode_t test_interrupt(u32 irqnum,
+		_mali_osk_irq_trigger_t trigger_func,
+		_mali_osk_irq_ack_t ack_func,
+		void *probe_data,
+		const char *description)
+{
+	unsigned long irq_flags = 0;
+	struct test_interrupt_data data = {
+		.ack_func = ack_func,
+		.probe_data = probe_data,
+		.interrupt_received = MALI_FALSE,
+	};
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	if (0 != request_irq(irqnum, test_interrupt_upper_half, irq_flags, description, &data)) {
+		MALI_DEBUG_PRINT(2, ("Unable to install test IRQ handler for core '%s'\n", description));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	init_waitqueue_head(&data.wq);
+
+	trigger_func(probe_data);
+	wait_event_timeout(data.wq, data.interrupt_received, 100);
+
+	free_irq(irqnum, &data);
+
+	if (data.interrupt_received) {
+		MALI_DEBUG_PRINT(3, ("%s: Interrupt test OK\n", description));
+		return _MALI_OSK_ERR_OK;
+	} else {
+		MALI_PRINT_ERROR(("%s: Failed interrupt test on %u\n", description, irqnum));
+		return _MALI_OSK_ERR_FAULT;
+	}
+}
+
+#endif /* defined(DEBUG) */
+
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description)
+{
+	mali_osk_irq_object_t *irq_object;
+	unsigned long irq_flags = 0;
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	irq_object = kmalloc(sizeof(mali_osk_irq_object_t), GFP_KERNEL);
+	if (NULL == irq_object) {
+		return NULL;
+	}
+
+	if (-1 == irqnum) {
+		/* Probe for IRQ */
+		if ((NULL != trigger_func) && (NULL != ack_func)) {
+			unsigned long probe_count = 3;
+			_mali_osk_errcode_t err;
+			int irq;
+
+			MALI_DEBUG_PRINT(2, ("Probing for irq\n"));
+
+			do {
+				unsigned long mask;
+
+				mask = probe_irq_on();
+				trigger_func(probe_data);
+
+				_mali_osk_time_ubusydelay(5);
+
+				irq = probe_irq_off(mask);
+				err = ack_func(probe_data);
+			} while (irq < 0 && (err == _MALI_OSK_ERR_OK) && probe_count--);
+
+			if (irq < 0 || (_MALI_OSK_ERR_OK != err)) irqnum = -1;
+			else irqnum = irq;
+		} else irqnum = -1; /* no probe functions, fault */
+
+		if (-1 != irqnum) {
+			/* found an irq */
+			MALI_DEBUG_PRINT(2, ("Found irq %d\n", irqnum));
+		} else {
+			MALI_DEBUG_PRINT(2, ("Probe for irq failed\n"));
+		}
+	}
+
+	irq_object->irqnum = irqnum;
+	irq_object->uhandler = uhandler;
+	irq_object->data = int_data;
+
+	if (-1 == irqnum) {
+		MALI_DEBUG_PRINT(2, ("No IRQ for core '%s' found during probe\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	/* Verify that the configured interrupt settings are working */
+	if (_MALI_OSK_ERR_OK != test_interrupt(irqnum, trigger_func, ack_func, probe_data, description)) {
+		MALI_DEBUG_PRINT(2, ("Test of IRQ(%d) handler for core '%s' failed\n", irqnum, description));
+		kfree(irq_object);
+		return NULL;
+	}
+#endif
+
+	if (0 != request_irq(irqnum, irq_handler_upper_half, irq_flags, description, irq_object)) {
+		MALI_DEBUG_PRINT(2, ("Unable to install IRQ handler for core '%s'\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+	return irq_object;
+}
+
+void _mali_osk_irq_term(_mali_osk_irq_t *irq)
+{
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)irq;
+	free_irq(irq_object->irqnum, irq_object);
+	kfree(irq_object);
+}
+
+
+/** This function is called directly in interrupt context from the OS just after
+ * the CPU get the hw-irq from mali, or other devices on the same IRQ-channel.
+ * It is registered one of these function for each mali core. When an interrupt
+ * arrives this function will be called equal times as registered mali cores.
+ * That means that we only check one mali core in one function call, and the
+ * core we check for each turn is given by the \a dev_id variable.
+ * If we detect an pending interrupt on the given core, we mask the interrupt
+ * out by settging the core's IRQ_MASK register to zero.
+ * Then we schedule the mali_core_irq_handler_bottom_half to run as high priority
+ * work queue job.
+ */
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id)   /* , struct pt_regs *regs*/
+{
+	irqreturn_t ret = IRQ_NONE;
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)dev_id;
+
+	if (_MALI_OSK_ERR_OK == irq_object->uhandler(irq_object->data)) {
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
diff --git a/utgard/r6p1/linux/mali_osk_locks.c b/utgard/r6p1/linux/mali_osk_locks.c
new file mode 100755
index 0000000..4fa9398
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_locks.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk_locks.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+
+#ifdef DEBUG
+#ifdef LOCK_ORDER_CHECKING
+static DEFINE_SPINLOCK(lock_tracking_lock);
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order);
+#endif /* LOCK_ORDER_CHECKING */
+
+void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+{
+	checker->orig_flags = flags;
+	checker->owner = 0;
+
+#ifdef LOCK_ORDER_CHECKING
+	checker->order = order;
+	checker->next = NULL;
+#endif
+}
+
+void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker)
+{
+	checker->owner = _mali_osk_get_tid();
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		if (!add_lock_to_log_and_check(checker, _mali_osk_get_tid())) {
+			printk(KERN_ERR "%d: ERROR lock %p taken while holding a lock of a higher order.\n",
+			       _mali_osk_get_tid(), checker);
+			dump_stack();
+		}
+	}
+#endif
+}
+
+void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker)
+{
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		remove_lock_from_log(checker, _mali_osk_get_tid());
+	}
+#endif
+	checker->owner = 0;
+}
+
+
+#ifdef LOCK_ORDER_CHECKING
+/* Lock order checking
+ * -------------------
+ *
+ * To assure that lock ordering scheme defined by _mali_osk_lock_order_t is strictly adhered to, the
+ * following function will, together with a linked list and some extra members in _mali_osk_lock_debug_s,
+ * make sure that a lock that is taken has a higher order than the current highest-order lock a
+ * thread holds.
+ *
+ * This is done in the following manner:
+ * - A linked list keeps track of locks held by a thread.
+ * - A `next' pointer is added to each lock. This is used to chain the locks together.
+ * - When taking a lock, the `add_lock_to_log_and_check' makes sure that taking
+ *   the given lock is legal. It will follow the linked list  to find the last
+ *   lock taken by this thread. If the last lock's order was lower than the
+ *   lock that is to be taken, it appends the new lock to the list and returns
+ *   true, if not, it return false. This return value is assert()'ed on in
+ *   _mali_osk_lock_wait().
+ */
+
+static struct _mali_osk_lock_debug_s *lock_lookup_list;
+
+static void dump_lock_tracking_list(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 1;
+
+	/* print list for debugging purposes */
+	l = lock_lookup_list;
+
+	while (NULL != l) {
+		printk(" [lock: %p, tid_owner: %d, order: %d] ->", l, l->owner, l->order);
+		l = l->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+	printk(" NULL\n");
+}
+
+static int tracking_list_length(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 0;
+	l = lock_lookup_list;
+
+	while (NULL != l) {
+		l = l->next;
+		n++;
+		MALI_DEBUG_ASSERT(n < 100);
+	}
+	return n;
+}
+
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	mali_bool ret = MALI_FALSE;
+	_mali_osk_lock_order_t highest_order_for_tid = _MALI_OSK_LOCK_ORDER_FIRST;
+	struct _mali_osk_lock_debug_s *highest_order_lock = (struct _mali_osk_lock_debug_s *)0xbeefbabe;
+	struct _mali_osk_lock_debug_s *l;
+	unsigned long local_lock_flag;
+	u32 len;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+
+	l  = lock_lookup_list;
+	if (NULL == l) { /* This is the first lock taken by this thread -- record and return true */
+		lock_lookup_list = lock;
+		spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+		return MALI_TRUE;
+	} else {
+		/* Traverse the locks taken and find the lock of the highest order.
+		 * Since several threads may hold locks, each lock's owner must be
+		 * checked so that locks not owned by this thread can be ignored. */
+		for (;;) {
+			MALI_DEBUG_ASSERT_POINTER(l);
+			if (tid == l->owner && l->order >= highest_order_for_tid) {
+				highest_order_for_tid = l->order;
+				highest_order_lock = l;
+			}
+
+			if (NULL != l->next) {
+				l = l->next;
+			} else {
+				break;
+			}
+		}
+
+		l->next = lock;
+		l->next = NULL;
+	}
+
+	/* We have now found the highest order lock currently held by this thread and can see if it is
+	 * legal to take the requested lock. */
+	ret = highest_order_for_tid < lock->order;
+
+	if (!ret) {
+		printk(KERN_ERR "Took lock of order %d (%s) while holding lock of order %d (%s)\n",
+		       lock->order, lock_order_to_string(lock->order),
+		       highest_order_for_tid, lock_order_to_string(highest_order_for_tid));
+		dump_lock_tracking_list();
+	}
+
+	if (len + 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+	return ret;
+}
+
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	struct _mali_osk_lock_debug_s *curr;
+	struct _mali_osk_lock_debug_s *prev = NULL;
+	unsigned long local_lock_flag;
+	u32 len;
+	u32 n = 0;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+	curr = lock_lookup_list;
+
+	if (NULL == curr) {
+		printk(KERN_ERR "Error: Lock tracking list was empty on call to remove_lock_from_log\n");
+		dump_lock_tracking_list();
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(curr);
+
+
+	while (lock != curr) {
+		prev = curr;
+
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		curr = curr->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+
+	if (NULL == prev) {
+		lock_lookup_list = curr->next;
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		MALI_DEBUG_ASSERT_POINTER(prev);
+		prev->next = curr->next;
+	}
+
+	lock->next = NULL;
+
+	if (len - 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+}
+
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order)
+{
+	switch (order) {
+	case _MALI_OSK_LOCK_ORDER_SESSIONS:
+		return "_MALI_OSK_LOCK_ORDER_SESSIONS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_SESSION:
+		return "_MALI_OSK_LOCK_ORDER_MEM_SESSION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_INFO:
+		return "_MALI_OSK_LOCK_ORDER_MEM_INFO";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_PT_CACHE:
+		return "_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP:
+		return "_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_EXECUTION:
+		return "_MALI_OSK_LOCK_ORDER_PM_EXECUTION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_EXECUTOR:
+		return "_MALI_OSK_LOCK_ORDER_EXECUTOR";
+		break;
+	case _MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM:
+		return "_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DMA_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_DMA_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PROFILING:
+		return "_MALI_OSK_LOCK_ORDER_PROFILING";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2:
+		return "_MALI_OSK_LOCK_ORDER_L2";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_L2_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_UTILIZATION:
+		return "_MALI_OSK_LOCK_ORDER_UTILIZATION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS:
+		return "_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_STATE:
+		return "_MALI_OSK_LOCK_ORDER_PM_STATE";
+		break;
+	default:
+		return "<UNKNOWN_LOCK_ORDER>";
+	}
+}
+#endif /* LOCK_ORDER_CHECKING */
+#endif /* DEBUG */
diff --git a/utgard/r6p1/linux/mali_osk_locks.h b/utgard/r6p1/linux/mali_osk_locks.h
new file mode 100755
index 0000000..a05586f
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_locks.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.h
+ * Defines OS abstraction of lock and mutex
+ */
+#ifndef _MALI_OSK_LOCKS_H
+#define _MALI_OSK_LOCKS_H
+
+#include <linux/spinlock.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+
+#include <linux/slab.h>
+
+#include "mali_osk_types.h"
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+	/* When DEBUG is enabled, this struct will be used to track owner, mode and order checking */
+#ifdef DEBUG
+	struct _mali_osk_lock_debug_s {
+		u32 owner;
+		_mali_osk_lock_flags_t orig_flags;
+		_mali_osk_lock_order_t order;
+		struct _mali_osk_lock_debug_s *next;
+	};
+#endif
+
+	/* Anstraction of spinlock_t */
+	struct _mali_osk_spinlock_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		spinlock_t spinlock;
+	};
+
+	/* Abstration of spinlock_t and lock flag which is used to store register's state before locking */
+	struct _mali_osk_spinlock_irq_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+
+		spinlock_t spinlock;
+		unsigned long flags;
+	};
+
+	/* Abstraction of rw_semaphore in OS */
+	struct _mali_osk_mutex_rw_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+		_mali_osk_lock_mode_t mode;
+#endif
+
+		struct rw_semaphore rw_sema;
+	};
+
+	/* Mutex and mutex_interruptible functions share the same osk mutex struct */
+	struct _mali_osk_mutex_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		struct mutex mutex;
+	};
+
+#ifdef DEBUG
+	/** @brief _mali_osk_locks_debug_init/add/remove() functions are declared when DEBUG is enabled and
+	 * defined in file mali_osk_locks.c. When LOCK_ORDER_CHECKING is enabled, calling these functions when we
+	 * init/lock/unlock a lock/mutex, we could track lock order of a given tid. */
+	void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order);
+	void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker);
+	void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker);
+
+	/** @brief This function can return a given lock's owner when DEBUG     is enabled. */
+	static inline u32 _mali_osk_lock_get_owner(struct _mali_osk_lock_debug_s *lock)
+	{
+		return lock->owner;
+	}
+#else
+#define _mali_osk_locks_debug_init(x, y, z) do {} while (0)
+#define _mali_osk_locks_debug_add(x) do {} while (0)
+#define _mali_osk_locks_debug_remove(x) do {} while (0)
+#endif
+
+	/** @brief Before use _mali_osk_spin_lock, init function should be used to allocate memory and initial spinlock*/
+	static inline _mali_osk_spinlock_t *_mali_osk_spinlock_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_spinlock_t), GFP_KERNEL);
+		if (NULL == lock) {
+			return NULL;
+		}
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock a spinlock */
+	static inline void  _mali_osk_spinlock_lock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		spin_lock(&lock->spinlock);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock a spinlock */
+	static inline void _mali_osk_spinlock_unlock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock(&lock->spinlock);
+	}
+
+	/** @brief Free a memory block which the argument lock pointed to and its type must be
+	 * _mali_osk_spinlock_t *. */
+	static inline void _mali_osk_spinlock_term(_mali_osk_spinlock_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_spinlock_irq_lock/unlock/term() is called, init function should be
+	 * called to initial spinlock and flags in struct _mali_osk_spinlock_irq_t. */
+	static inline _mali_osk_spinlock_irq_t *_mali_osk_spinlock_irq_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_irq_t *lock = NULL;
+		lock = kmalloc(sizeof(_mali_osk_spinlock_irq_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+
+		lock->flags = 0;
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock spinlock and save the register's state */
+	static inline void _mali_osk_spinlock_irq_lock(_mali_osk_spinlock_irq_t *lock)
+	{
+		unsigned long tmp_flags;
+
+		BUG_ON(NULL == lock);
+		spin_lock_irqsave(&lock->spinlock, tmp_flags);
+		lock->flags = tmp_flags;
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock spinlock with saved register's state */
+	static inline void _mali_osk_spinlock_irq_unlock(_mali_osk_spinlock_irq_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock_irqrestore(&lock->spinlock, lock->flags);
+	}
+
+	/** @brief Destroy a given memory block which lock pointed to, and the lock type must be
+	 * _mali_osk_spinlock_irq_t *. */
+	static inline void _mali_osk_spinlock_irq_term(_mali_osk_spinlock_irq_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_mutex_rw_wait/signal/term() is called, we should call
+	 * _mali_osk_mutex_rw_init() to kmalloc a memory block and initial part of elements in it. */
+	static inline _mali_osk_mutex_rw_t *_mali_osk_mutex_rw_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_rw_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_rw_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+
+		init_rwsem(&lock->rw_sema);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief When call _mali_osk_mutex_rw_wait/signal() functions, the second argument mode
+	 * should be assigned with value _MALI_OSK_LOCKMODE_RO or _MALI_OSK_LOCKMODE_RW */
+	static inline void _mali_osk_mutex_rw_wait(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(NULL == lock);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			down_read(&lock->rw_sema);
+		} else {
+			down_write(&lock->rw_sema);
+		}
+
+#ifdef DEBUG
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			lock->mode = mode;
+		} else { /* mode == _MALI_OSK_LOCKMODE_RO */
+			lock->mode = mode;
+		}
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+#endif
+	}
+
+	/** @brief Up lock->rw_sema with up_read/write() accordinf argument mode's value. */
+	static inline void  _mali_osk_mutex_rw_signal(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(NULL == lock);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+#ifdef DEBUG
+		/* make sure the thread releasing the lock actually was the owner */
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+			/* This lock now has no owner */
+			lock->checker.owner = 0;
+		}
+#endif
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			up_read(&lock->rw_sema);
+		} else {
+			up_write(&lock->rw_sema);
+		}
+	}
+
+	/** @brief Free a given memory block which lock pointed to and its type must be
+	 * _mali_sok_mutex_rw_t *. */
+	static inline void _mali_osk_mutex_rw_term(_mali_osk_mutex_rw_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Mutex & mutex_interruptible share the same init and term function, because they have the
+	 * same osk mutex struct, and the difference between them is which locking function they use */
+	static inline _mali_osk_mutex_t *_mali_osk_mutex_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+		mutex_init(&lock->mutex);
+
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief  Lock the lock->mutex with mutex_lock_interruptible function */
+	static inline _mali_osk_errcode_t _mali_osk_mutex_wait_interruptible(_mali_osk_mutex_t *lock)
+	{
+		_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+		BUG_ON(NULL == lock);
+
+		if (mutex_lock_interruptible(&lock->mutex)) {
+			printk(KERN_WARNING "Mali: Can not lock mutex\n");
+			err = _MALI_OSK_ERR_RESTARTSYSCALL;
+		}
+
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+		return err;
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock_interruptible() function. */
+	static inline void _mali_osk_mutex_signal_interruptible(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Lock the lock->mutex just with mutex_lock() function which could not be interruptted. */
+	static inline void _mali_osk_mutex_wait(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		mutex_lock(&lock->mutex);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock() function. */
+	static inline void _mali_osk_mutex_signal(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Free a given memory block which lock point. */
+	static inline void _mali_osk_mutex_term(_mali_osk_mutex_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif
diff --git a/utgard/r6p1/linux/mali_osk_low_level_mem.c b/utgard/r6p1/linux/mali_osk_low_level_mem.c
new file mode 100755
index 0000000..bc713a1
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_low_level_mem.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_low_level_mem.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <asm/io.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+void _mali_osk_mem_barrier(void)
+{
+	mb();
+}
+
+void _mali_osk_write_mem_barrier(void)
+{
+	wmb();
+}
+
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description)
+{
+	return (mali_io_address)ioremap_nocache(phys, size);
+}
+
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address virt)
+{
+	iounmap((void *)virt);
+}
+
+_mali_osk_errcode_t inline _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description)
+{
+#if MALI_LICENSE_IS_GPL
+	return _MALI_OSK_ERR_OK; /* GPL driver gets the mem region for the resources registered automatically */
+#else
+	return ((NULL == request_mem_region(phys, size, description)) ? _MALI_OSK_ERR_NOMEM : _MALI_OSK_ERR_OK);
+#endif
+}
+
+void inline _mali_osk_mem_unreqregion(uintptr_t phys, u32 size)
+{
+#if !MALI_LICENSE_IS_GPL
+	release_mem_region(phys, size);
+#endif
+}
+
+void inline _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	__raw_writel(cpu_to_le32(val), ((u8 *)addr) + offset);
+}
+
+u32 inline _mali_osk_mem_ioread32(volatile mali_io_address addr, u32 offset)
+{
+	return ioread32(((u8 *)addr) + offset);
+}
+
+void inline _mali_osk_mem_iowrite32(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	iowrite32(val, ((u8 *)addr) + offset);
+}
+
+void _mali_osk_cache_flushall(void)
+{
+	/** @note Cached memory is not currently supported in this implementation */
+}
+
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size)
+{
+	_mali_osk_write_mem_barrier();
+}
+
+u32 _mali_osk_mem_write_safe(void __user *dest, const void __user *src, u32 size)
+{
+#define MALI_MEM_SAFE_COPY_BLOCK_SIZE 4096
+	u32 retval = 0;
+	void *temp_buf;
+
+	temp_buf = kmalloc(MALI_MEM_SAFE_COPY_BLOCK_SIZE, GFP_KERNEL);
+	if (NULL != temp_buf) {
+		u32 bytes_left_to_copy = size;
+		u32 i;
+		for (i = 0; i < size; i += MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+			u32 size_to_copy;
+			u32 size_copied;
+			u32 bytes_left;
+
+			if (bytes_left_to_copy > MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+				size_to_copy = MALI_MEM_SAFE_COPY_BLOCK_SIZE;
+			} else {
+				size_to_copy = bytes_left_to_copy;
+			}
+
+			bytes_left = copy_from_user(temp_buf, ((char *)src) + i, size_to_copy);
+			size_copied = size_to_copy - bytes_left;
+
+			bytes_left = copy_to_user(((char *)dest) + i, temp_buf, size_copied);
+			size_copied -= bytes_left;
+
+			bytes_left_to_copy -= size_copied;
+			retval += size_copied;
+
+			if (size_copied != size_to_copy) {
+				break; /* Early out, we was not able to copy this entire block */
+			}
+		}
+
+		kfree(temp_buf);
+	}
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args)
+{
+	void __user *src;
+	void __user *dst;
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (NULL == session) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	src = (void __user *)(uintptr_t)args->src;
+	dst = (void __user *)(uintptr_t)args->dest;
+
+	/* Return number of bytes actually copied */
+	args->size = _mali_osk_mem_write_safe(dst, src, args->size);
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p1/linux/mali_osk_mali.c b/utgard/r6p1/linux/mali_osk_mali.c
new file mode 100755
index 0000000..23b2c17
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_mali.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.c
+ * Implementation of the OS abstraction layer which is specific for the Mali kernel device driver
+ */
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/mali/mali_utgard.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h" /* MALI_xxx macros */
+#include "mali_osk.h"           /* kernel side OS functions */
+#include "mali_kernel_linux.h"
+
+static mali_bool mali_secure_mode_enabled = MALI_FALSE;
+static mali_bool mali_secure_mode_supported = MALI_FALSE;
+
+/* Function that init the mali gpu secure mode */
+void (*mali_secure_mode_deinit)(void) = NULL;
+/* Function that enable the mali gpu secure mode */
+int (*mali_secure_mode_enable)(void) = NULL;
+/* Function that disable the mali gpu secure mode */
+int (*mali_secure_mode_disable)(void) = NULL;
+
+#ifdef CONFIG_MALI_DT
+
+#define MALI_OSK_INVALID_RESOURCE_ADDRESS 0xFFFFFFFF
+
+/**
+ * Define the max number of resource we could have.
+ */
+#define MALI_OSK_MAX_RESOURCE_NUMBER 27
+
+/**
+ * Define the max number of resource with interrupts, and they are
+ * the first 20 elements in array mali_osk_resource_bank.
+ */
+#define MALI_OSK_RESOURCE_WITH_IRQ_NUMBER 20
+
+/**
+ * pp core start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_PP_LOCATION_START 2
+#define MALI_OSK_RESOURCE_PP_LOCATION_END 17
+
+/**
+ * L2 cache start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_L2_LOCATION_START 20
+#define MALI_OSK_RESOURCE_l2_LOCATION_END 22
+
+/**
+ * DMA unit location.
+ */
+#define MALI_OSK_RESOURCE_DMA_LOCATION 26
+
+static _mali_osk_resource_t mali_osk_resource_bank[MALI_OSK_MAX_RESOURCE_NUMBER] = {
+	{.description = "Mali_GP", .base = MALI_OFFSET_GP, .irq_name = "IRQGP",},
+	{.description = "Mali_GP_MMU", .base = MALI_OFFSET_GP_MMU, .irq_name = "IRQGPMMU",},
+	{.description = "Mali_PP0", .base = MALI_OFFSET_PP0, .irq_name = "IRQPP0",},
+	{.description = "Mali_PP0_MMU", .base = MALI_OFFSET_PP0_MMU, .irq_name = "IRQPPMMU0",},
+	{.description = "Mali_PP1", .base = MALI_OFFSET_PP1, .irq_name = "IRQPP1",},
+	{.description = "Mali_PP1_MMU", .base = MALI_OFFSET_PP1_MMU, .irq_name = "IRQPPMMU1",},
+	{.description = "Mali_PP2", .base = MALI_OFFSET_PP2, .irq_name = "IRQPP2",},
+	{.description = "Mali_PP2_MMU", .base = MALI_OFFSET_PP2_MMU, .irq_name = "IRQPPMMU2",},
+	{.description = "Mali_PP3", .base = MALI_OFFSET_PP3, .irq_name = "IRQPP3",},
+	{.description = "Mali_PP3_MMU", .base = MALI_OFFSET_PP3_MMU, .irq_name = "IRQPPMMU3",},
+	{.description = "Mali_PP4", .base = MALI_OFFSET_PP4, .irq_name = "IRQPP4",},
+	{.description = "Mali_PP4_MMU", .base = MALI_OFFSET_PP4_MMU, .irq_name = "IRQPPMMU4",},
+	{.description = "Mali_PP5", .base = MALI_OFFSET_PP5, .irq_name = "IRQPP5",},
+	{.description = "Mali_PP5_MMU", .base = MALI_OFFSET_PP5_MMU, .irq_name = "IRQPPMMU5",},
+	{.description = "Mali_PP6", .base = MALI_OFFSET_PP6, .irq_name = "IRQPP6",},
+	{.description = "Mali_PP6_MMU", .base = MALI_OFFSET_PP6_MMU, .irq_name = "IRQPPMMU6",},
+	{.description = "Mali_PP7", .base = MALI_OFFSET_PP7, .irq_name = "IRQPP7",},
+	{.description = "Mali_PP7_MMU", .base = MALI_OFFSET_PP7_MMU, .irq_name = "IRQPPMMU",},
+	{.description = "Mali_PP_Broadcast", .base = MALI_OFFSET_PP_BCAST, .irq_name = "IRQPP",},
+	{.description = "Mali_PMU", .base = MALI_OFFSET_PMU, .irq_name = "IRQPMU",},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE0,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE1,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE2,},
+	{.description = "Mali_PP_MMU_Broadcast", .base = MALI_OFFSET_PP_BCAST_MMU,},
+	{.description = "Mali_Broadcast", .base = MALI_OFFSET_BCAST,},
+	{.description = "Mali_DLBU", .base = MALI_OFFSET_DLBU,},
+	{.description = "Mali_DMA", .base = MALI_OFFSET_DMA,},
+};
+
+static int _mali_osk_get_compatible_name(const char **out_string)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	return of_property_read_string(node, "compatible", out_string);
+}
+
+_mali_osk_errcode_t _mali_osk_resource_initialize(void)
+{
+	mali_bool mali_is_450 = MALI_FALSE, mali_is_470 = MALI_FALSE;
+	int i, pp_core_num = 0, l2_core_num = 0;
+	struct resource *res;
+	const char *compatible_name = NULL;
+
+	if (0 == _mali_osk_get_compatible_name(&compatible_name)) {
+		if (0 == strncmp(compatible_name, "arm,mali-450", strlen("arm,mali-450"))) {
+			mali_is_450 = MALI_TRUE;
+			MALI_DEBUG_PRINT(2, ("mali-450 device tree detected."));
+		} else if (0 == strncmp(compatible_name, "arm,mali-470", strlen("arm,mali-470"))) {
+			mali_is_470 = MALI_TRUE;
+			MALI_DEBUG_PRINT(2, ("mali-470 device tree detected."));
+		}
+	}
+
+	for (i = 0; i < MALI_OSK_RESOURCE_WITH_IRQ_NUMBER; i++) {
+		res = platform_get_resource_byname(mali_platform_device, IORESOURCE_IRQ, mali_osk_resource_bank[i].irq_name);
+		if (res) {
+			mali_osk_resource_bank[i].irq = res->start;
+		} else {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_PP_LOCATION_START; i <= MALI_OSK_RESOURCE_PP_LOCATION_END; i++) {
+		if (MALI_OSK_INVALID_RESOURCE_ADDRESS != mali_osk_resource_bank[i].base) {
+			pp_core_num++;
+		}
+	}
+
+	/* We have to divide by 2, because we caculate twice for only one pp(pp_core and pp_mmu_core). */
+	if (0 != pp_core_num % 2) {
+		MALI_DEBUG_PRINT(2, ("The value of pp core number isn't normal."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pp_core_num /= 2;
+
+	/**
+	 * we can caculate the number of l2 cache core according the number of pp core number
+	 * and device type(mali400/mali450/mali470).
+	 */
+	l2_core_num = 1;
+	if (mali_is_450) {
+		if (pp_core_num > 4) {
+			l2_core_num = 3;
+		} else if (pp_core_num <= 4) {
+			l2_core_num = 2;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_l2_LOCATION_END; i > MALI_OSK_RESOURCE_L2_LOCATION_START + l2_core_num - 1; i--) {
+		mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+	}
+
+	/* If device is not mali-450 type, we have to remove related resource from resource bank. */
+	if (!(mali_is_450 || mali_is_470)) {
+		for (i = MALI_OSK_RESOURCE_l2_LOCATION_END + 1; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	if (mali_is_470)
+		mali_osk_resource_bank[MALI_OSK_RESOURCE_DMA_LOCATION].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+
+	if (NULL == mali_platform_device) {
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	/* Traverse all of resources in resources bank to find the matching one. */
+	for (i = 0; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+		if (mali_osk_resource_bank[i].base == addr) {
+			if (NULL != res) {
+				res->base = addr + _mali_osk_resource_base_address();
+				res->description = mali_osk_resource_bank[i].description;
+				res->irq = mali_osk_resource_bank[i].irq;
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	struct resource *reg_res = NULL;
+	uintptr_t ret = 0;
+
+	reg_res = platform_get_resource(mali_platform_device, IORESOURCE_MEM, 0);
+
+	if (NULL != reg_res) {
+		ret = reg_res->start;
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from device tree configuration.\n"));
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	if (!of_get_property(node, "pmu_domain_config", &length)) {
+		return;
+	}
+
+	if (array_size != length / sizeof(u32)) {
+		MALI_PRINT_ERROR(("Wrong pmu domain config in device tree."));
+		return;
+	}
+
+	of_property_for_each_u32(node, "pmu_domain_config", prop, p, u) {
+		domain_config_array[i] = (u16)u;
+		i++;
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	u32 switch_delay;
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	if (0 == of_property_read_u32(node, "pmu_switch_delay", &switch_delay)) {
+		return switch_delay;
+	} else {
+		MALI_DEBUG_PRINT(2, ("Couldn't find pmu_switch_delay in device tree configuration.\n"));
+	}
+
+	return 0;
+}
+
+#else /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+	uintptr_t phys_addr;
+
+	if (NULL == mali_platform_device) {
+		/* Not connected to a device */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	phys_addr = addr + _mali_osk_resource_base_address();
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_MEM == resource_type(&(mali_platform_device->resource[i])) &&
+		    mali_platform_device->resource[i].start == phys_addr) {
+			if (NULL != res) {
+				res->base = phys_addr;
+				res->description = mali_platform_device->resource[i].name;
+
+				/* Any (optional) IRQ resource belonging to this resource will follow */
+				if ((i + 1) < mali_platform_device->num_resources &&
+				    IORESOURCE_IRQ == resource_type(&(mali_platform_device->resource[i + 1]))) {
+					res->irq = mali_platform_device->resource[i + 1].start;
+				} else {
+					res->irq = -1;
+				}
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	uintptr_t lowest_addr = (uintptr_t)(0 - 1);
+	uintptr_t ret = 0;
+
+	if (NULL != mali_platform_device) {
+		int i;
+		for (i = 0; i < mali_platform_device->num_resources; i++) {
+			if (mali_platform_device->resource[i].flags & IORESOURCE_MEM &&
+			    mali_platform_device->resource[i].start < lowest_addr) {
+				lowest_addr = mali_platform_device->resource[i].start;
+				ret = lowest_addr;
+			}
+		}
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	_mali_osk_device_data data = { 0, };
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from platform device data.\n"));
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Copy the custom customer power domain config */
+		_mali_osk_memcpy(domain_config_array, data.pmu_domain_config, sizeof(data.pmu_domain_config));
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_device_data data = { 0, };
+
+	err = _mali_osk_device_data_get(&data);
+
+	if (_MALI_OSK_ERR_OK == err) {
+		return data.pmu_switch_delay;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(data);
+
+	if (NULL != mali_platform_device) {
+		struct mali_gpu_device_data *os_data = NULL;
+
+		os_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+		if (NULL != os_data) {
+			/* Copy data from OS dependant struct to Mali neutral struct (identical!) */
+			BUILD_BUG_ON(sizeof(*os_data) != sizeof(*data));
+			_mali_osk_memcpy(data, os_data, sizeof(*os_data));
+
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+u32 _mali_osk_identify_gpu_resource(void)
+{
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_L2_RESOURCE1, NULL))
+		/* Mali 450 */
+		return 0x450;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_DLBU, NULL))
+		/* Mali 470 */
+		return 0x470;
+
+	/* Mali 400 */
+	return 0x400;
+}
+
+mali_bool _mali_osk_shared_interrupts(void)
+{
+	u32 irqs[128];
+	u32 i, j, irq, num_irqs_found = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	MALI_DEBUG_ASSERT(128 >= mali_platform_device->num_resources);
+
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_IRQ & mali_platform_device->resource[i].flags) {
+			irq = mali_platform_device->resource[i].start;
+
+			for (j = 0; j < num_irqs_found; ++j) {
+				if (irq == irqs[j]) {
+					return MALI_TRUE;
+				}
+			}
+
+			irqs[num_irqs_found++] = irq;
+		}
+	}
+
+	return MALI_FALSE;
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void)
+{
+	_mali_osk_device_data data = { 0, };
+
+	if (_MALI_OSK_ERR_OK ==  _mali_osk_device_data_get(&data)) {
+		if ((NULL != data.secure_mode_init) && (NULL != data.secure_mode_deinit)
+		    && (NULL != data.secure_mode_enable) && (NULL != data.secure_mode_disable)) {
+			int err = data.secure_mode_init();
+			if (err) {
+				MALI_DEBUG_PRINT(1, ("Failed to init gpu secure mode.\n"));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			mali_secure_mode_deinit = data.secure_mode_deinit;
+			mali_secure_mode_enable = data.secure_mode_enable;
+			mali_secure_mode_disable = data.secure_mode_disable;
+
+			mali_secure_mode_supported = MALI_TRUE;
+			mali_secure_mode_enabled = MALI_FALSE;
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+	MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void)
+{
+	if (NULL !=  mali_secure_mode_deinit) {
+		mali_secure_mode_deinit();
+		mali_secure_mode_enabled = MALI_FALSE;
+		mali_secure_mode_supported = MALI_FALSE;
+		return _MALI_OSK_ERR_OK;
+	}
+	MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_enable(void)
+{
+	/* the mali executor lock must be held before enter this function. */
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_secure_mode_enabled);
+
+	if (NULL !=  mali_secure_mode_enable) {
+		if (mali_secure_mode_enable()) {
+			MALI_DEBUG_PRINT(1, ("Failed to enable gpu secure mode.\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_secure_mode_enabled = MALI_TRUE;
+		return _MALI_OSK_ERR_OK;
+	}
+	MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_disable(void)
+{
+	/* the mali executor lock must be held before enter this function. */
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_secure_mode_enabled);
+
+	if (NULL != mali_secure_mode_disable) {
+		if (mali_secure_mode_disable()) {
+			MALI_DEBUG_PRINT(1, ("Failed to disable gpu secure mode.\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_secure_mode_enabled = MALI_FALSE;
+
+		return _MALI_OSK_ERR_OK;
+
+	}
+	MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+mali_bool _mali_osk_gpu_secure_mode_is_enabled(void)
+{
+	return mali_secure_mode_enabled;
+}
+
+mali_bool _mali_osk_gpu_secure_mode_is_supported(void)
+{
+	return mali_secure_mode_supported;
+}
+
+
diff --git a/utgard/r6p1/linux/mali_osk_math.c b/utgard/r6p1/linux/mali_osk_math.c
new file mode 100755
index 0000000..3f06723
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_math.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_math.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/bitops.h>
+
+u32 _mali_osk_clz(u32 input)
+{
+	return 32 - fls(input);
+}
+
+u32 _mali_osk_fls(u32 input)
+{
+	return fls(input);
+}
diff --git a/utgard/r6p1/linux/mali_osk_memory.c b/utgard/r6p1/linux/mali_osk_memory.c
new file mode 100755
index 0000000..ad5494d
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_memory.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_memory.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+void inline *_mali_osk_calloc(u32 n, u32 size)
+{
+	return kcalloc(n, size, GFP_KERNEL);
+}
+
+void inline *_mali_osk_malloc(u32 size)
+{
+	return kmalloc(size, GFP_KERNEL);
+}
+
+void inline _mali_osk_free(void *ptr)
+{
+	kfree(ptr);
+}
+
+void inline *_mali_osk_valloc(u32 size)
+{
+	return vmalloc(size);
+}
+
+void inline _mali_osk_vfree(void *ptr)
+{
+	vfree(ptr);
+}
+
+void inline *_mali_osk_memcpy(void *dst, const void *src, u32  len)
+{
+	return memcpy(dst, src, len);
+}
+
+void inline *_mali_osk_memset(void *s, u32 c, u32 n)
+{
+	return memset(s, c, n);
+}
+
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated)
+{
+	/* No need to prevent an out-of-memory dialogue appearing on Linux,
+	 * so we always return MALI_TRUE.
+	 */
+	return MALI_TRUE;
+}
diff --git a/utgard/r6p1/linux/mali_osk_misc.c b/utgard/r6p1/linux/mali_osk_misc.c
new file mode 100755
index 0000000..7dda283
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_misc.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_misc.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include "mali_osk.h"
+
+#if !defined(CONFIG_MALI_QUIET)
+void _mali_osk_dbgmsg(const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+}
+#endif /* !defined(CONFIG_MALI_QUIET) */
+
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...)
+{
+	int res;
+	va_list args;
+	va_start(args, fmt);
+
+	res = vscnprintf(buf, (size_t)size, fmt, args);
+
+	va_end(args);
+	return res;
+}
+
+void _mali_osk_abort(void)
+{
+	/* make a simple fault by dereferencing a NULL pointer */
+	dump_stack();
+	*(int *)0 = 0;
+}
+
+void _mali_osk_break(void)
+{
+	_mali_osk_abort();
+}
+
+u32 _mali_osk_get_pid(void)
+{
+	/* Thread group ID is the process ID on Linux */
+	return (u32)current->tgid;
+}
+
+char *_mali_osk_get_comm(void)
+{
+	return (char *)current->comm;
+}
+
+
+u32 _mali_osk_get_tid(void)
+{
+	/* pid is actually identifying the thread on Linux */
+	u32 tid = current->pid;
+
+	/* If the pid is 0 the core was idle.  Instead of returning 0 we return a special number
+	 * identifying which core we are on. */
+	if (0 == tid) {
+		tid = -(1 + raw_smp_processor_id());
+	}
+
+	return tid;
+}
diff --git a/utgard/r6p1/linux/mali_osk_notification.c b/utgard/r6p1/linux/mali_osk_notification.c
new file mode 100755
index 0000000..5867841
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_notification.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_notification.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/**
+ * Declaration of the notification queue object type
+ * Contains a linked list of notification pending delivery to user space.
+ * It also contains a wait queue of exclusive waiters blocked in the ioctl
+ * When a new notification is posted a single thread is resumed.
+ */
+struct _mali_osk_notification_queue_t_struct {
+	spinlock_t mutex; /**< Mutex protecting the list */
+	wait_queue_head_t receive_queue; /**< Threads waiting for new entries to the queue */
+	struct list_head head; /**< List of notifications waiting to be picked up */
+};
+
+typedef struct _mali_osk_notification_wrapper_t_struct {
+	struct list_head list;           /**< Internal linked list variable */
+	_mali_osk_notification_t data;   /**< Notification data */
+} _mali_osk_notification_wrapper_t;
+
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void)
+{
+	_mali_osk_notification_queue_t         *result;
+
+	result = (_mali_osk_notification_queue_t *)kmalloc(sizeof(_mali_osk_notification_queue_t), GFP_KERNEL);
+	if (NULL == result) return NULL;
+
+	spin_lock_init(&result->mutex);
+	init_waitqueue_head(&result->receive_queue);
+	INIT_LIST_HEAD(&result->head);
+
+	return result;
+}
+
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size)
+{
+	/* OPT Recycling of notification objects */
+	_mali_osk_notification_wrapper_t *notification;
+
+	notification = (_mali_osk_notification_wrapper_t *)kmalloc(sizeof(_mali_osk_notification_wrapper_t) + size,
+			GFP_KERNEL | __GFP_HIGH | __GFP_REPEAT);
+	if (NULL == notification) {
+		MALI_DEBUG_PRINT(1, ("Failed to create a notification object\n"));
+		return NULL;
+	}
+
+	/* Init the list */
+	INIT_LIST_HEAD(&notification->list);
+
+	if (0 != size) {
+		notification->data.result_buffer = ((u8 *)notification) + sizeof(_mali_osk_notification_wrapper_t);
+	} else {
+		notification->data.result_buffer = NULL;
+	}
+
+	/* set up the non-allocating fields */
+	notification->data.notification_type = type;
+	notification->data.result_buffer_size = size;
+
+	/* all ok */
+	return &(notification->data);
+}
+
+void _mali_osk_notification_delete(_mali_osk_notification_t *object)
+{
+	_mali_osk_notification_wrapper_t *notification;
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+	/* Free the container */
+	kfree(notification);
+}
+
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue)
+{
+	_mali_osk_notification_t *result;
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	while (_MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, &result)) {
+		_mali_osk_notification_delete(result);
+	}
+
+	/* not much to do, just free the memory */
+	kfree(queue);
+}
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_notification_wrapper_t *notification;
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	list_add_tail(&notification->list, &queue->head);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	/* and wake up one possible exclusive waiter */
+	wake_up(&queue->receive_queue);
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	_mali_osk_notification_wrapper_t *wrapper_object;
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	if (!list_empty(&queue->head)) {
+		wrapper_object = list_entry(queue->head.next, _mali_osk_notification_wrapper_t, list);
+		*result = &(wrapper_object->data);
+		list_del_init(&wrapper_object->list);
+		ret = _MALI_OSK_ERR_OK;
+	}
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(result);
+
+	/* default result */
+	*result = NULL;
+
+	if (wait_event_interruptible(queue->receive_queue,
+				     _MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, result))) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
diff --git a/utgard/r6p1/linux/mali_osk_pm.c b/utgard/r6p1/linux/mali_osk_pm.c
new file mode 100755
index 0000000..860ce92
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_pm.c
@@ -0,0 +1,83 @@
+/**
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_pm.c
+ * Implementation of the callback functions from common power management
+ */
+
+#include <linux/sched.h>
+
+#include "mali_kernel_linux.h"
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_PM_RUNTIME */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/* Can NOT run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get_sync(&(mali_platform_device->dev));
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get_sync() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Can run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get(&(mali_platform_device->dev));
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err && -EINPROGRESS != err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* Can run in atomic context */
+void _mali_osk_pm_dev_ref_put(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+	pm_runtime_put_autosuspend(&(mali_platform_device->dev));
+#else
+	pm_runtime_put(&(mali_platform_device->dev));
+#endif
+#endif
+}
+
+void _mali_osk_pm_dev_barrier(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	pm_runtime_barrier(&(mali_platform_device->dev));
+#endif
+}
diff --git a/utgard/r6p1/linux/mali_osk_profiling.c b/utgard/r6p1/linux/mali_osk_profiling.c
new file mode 100755
index 0000000..35d1abc
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_profiling.c
@@ -0,0 +1,1282 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched.h>
+
+#include <mali_profiling_gator_api.h>
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_osk_profiling.h"
+#include "mali_linux_trace.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_user_settings_db.h"
+#include "mali_executor.h"
+#include "mali_memory_manager.h"
+
+#define MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE 100
+#define MALI_PROFILING_STREAM_HOLD_TIME 1000000         /*1 ms */
+
+#define MALI_PROFILING_STREAM_BUFFER_SIZE       (1 << 12)
+#define MALI_PROFILING_STREAM_BUFFER_NUM        100
+
+/**
+ * Define the mali profiling stream struct.
+ */
+typedef struct mali_profiling_stream {
+	u8 data[MALI_PROFILING_STREAM_BUFFER_SIZE];
+	u32 used_size;
+	struct list_head list;
+} mali_profiling_stream;
+
+typedef struct mali_profiling_stream_list {
+	spinlock_t spin_lock;
+	struct list_head free_list;
+	struct list_head queue_list;
+} mali_profiling_stream_list;
+
+static const char mali_name[] = "4xx";
+static const char utgard_setup_version[] = "ANNOTATE_SETUP 1\n";
+
+static u32 profiling_sample_rate = 0;
+static u32 first_sw_counter_index = 0;
+
+static mali_bool l2_cache_counter_if_enabled = MALI_FALSE;
+static u32 num_counters_enabled = 0;
+static u32 mem_counters_enabled = 0;
+
+static _mali_osk_atomic_t stream_fd_if_used;
+
+static wait_queue_head_t stream_fd_wait_queue;
+static mali_profiling_counter *global_mali_profiling_counters = NULL;
+static u32 num_global_mali_profiling_counters = 0;
+
+static mali_profiling_stream_list *global_mali_stream_list = NULL;
+static mali_profiling_stream *mali_counter_stream = NULL;
+static mali_profiling_stream *mali_core_activity_stream = NULL;
+static u64 mali_core_activity_stream_dequeue_time = 0;
+static spinlock_t mali_activity_lock;
+static u32 mali_activity_cores_num =  0;
+static struct hrtimer profiling_sampling_timer;
+
+const char *_mali_mem_counter_descriptions[] = _MALI_MEM_COUTNER_DESCRIPTIONS;
+const char *_mali_special_counter_descriptions[] = _MALI_SPCIAL_COUNTER_DESCRIPTIONS;
+
+static u32 current_profiling_pid = 0;
+
+static void _mali_profiling_stream_list_destory(mali_profiling_stream_list *profiling_stream_list)
+{
+	mali_profiling_stream *profiling_stream, *tmp_profiling_stream;
+	MALI_DEBUG_ASSERT_POINTER(profiling_stream_list);
+
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->free_list, list) {
+		list_del(&profiling_stream->list);
+		kfree(profiling_stream);
+	}
+
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->queue_list, list) {
+		list_del(&profiling_stream->list);
+		kfree(profiling_stream);
+	}
+
+	kfree(profiling_stream_list);
+}
+
+static void _mali_profiling_global_stream_list_free(void)
+{
+	mali_profiling_stream *profiling_stream, *tmp_profiling_stream;
+	unsigned long irq_flags;
+
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &global_mali_stream_list->queue_list, list) {
+		profiling_stream->used_size = 0;
+		list_move(&profiling_stream->list, &global_mali_stream_list->free_list);
+	}
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+}
+
+static _mali_osk_errcode_t _mali_profiling_global_stream_list_dequeue(struct list_head *stream_list, mali_profiling_stream **new_mali_profiling_stream)
+{
+	unsigned long irq_flags;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	MALI_DEBUG_ASSERT_POINTER(stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+
+	if (!list_empty(stream_list)) {
+		*new_mali_profiling_stream = list_entry(stream_list->next, mali_profiling_stream, list);
+		list_del_init(&(*new_mali_profiling_stream)->list);
+	} else {
+		ret = _MALI_OSK_ERR_NOMEM;
+	}
+
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+
+	return ret;
+}
+
+static void _mali_profiling_global_stream_list_queue(struct list_head *stream_list, mali_profiling_stream *current_mali_profiling_stream)
+{
+	unsigned long irq_flags;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	MALI_DEBUG_ASSERT_POINTER(stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	list_add_tail(&current_mali_profiling_stream->list, stream_list);
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+}
+
+static mali_bool _mali_profiling_global_stream_queue_list_if_empty(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	return list_empty(&global_mali_stream_list->queue_list);
+}
+
+static u32 _mali_profiling_global_stream_queue_list_next_size(void)
+{
+	unsigned long irq_flags;
+	u32 size = 0;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	if (!list_empty(&global_mali_stream_list->queue_list)) {
+		mali_profiling_stream *next_mali_profiling_stream =
+			list_entry(global_mali_stream_list->queue_list.next, mali_profiling_stream, list);
+		size = next_mali_profiling_stream->used_size;
+	}
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+	return size;
+}
+
+/* The mali profiling stream file operations functions. */
+static ssize_t _mali_profiling_stream_read(
+	struct file *filp,
+	char __user *buffer,
+	size_t      size,
+	loff_t      *f_pos);
+
+static unsigned int  _mali_profiling_stream_poll(struct file *filp, poll_table *wait);
+
+static int  _mali_profiling_stream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations mali_profiling_stream_fops = {
+	.release = _mali_profiling_stream_release,
+	.read    = _mali_profiling_stream_read,
+	.poll    = _mali_profiling_stream_poll,
+};
+
+static ssize_t _mali_profiling_stream_read(
+	struct file *filp,
+	char __user *buffer,
+	size_t      size,
+	loff_t      *f_pos)
+{
+	u32 copy_len = 0;
+	mali_profiling_stream *current_mali_profiling_stream;
+	u32 used_size;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	while (!_mali_profiling_global_stream_queue_list_if_empty()) {
+		used_size = _mali_profiling_global_stream_queue_list_next_size();
+		if (used_size <= ((u32)size - copy_len)) {
+			current_mali_profiling_stream = NULL;
+			_mali_profiling_global_stream_list_dequeue(&global_mali_stream_list->queue_list,
+					&current_mali_profiling_stream);
+			MALI_DEBUG_ASSERT_POINTER(current_mali_profiling_stream);
+			if (copy_to_user(&buffer[copy_len], current_mali_profiling_stream->data, current_mali_profiling_stream->used_size)) {
+				current_mali_profiling_stream->used_size = 0;
+				_mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream);
+				return -EFAULT;
+			}
+			copy_len += current_mali_profiling_stream->used_size;
+			current_mali_profiling_stream->used_size = 0;
+			_mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream);
+		} else {
+			break;
+		}
+	}
+	return (ssize_t)copy_len;
+}
+
+static unsigned int  _mali_profiling_stream_poll(struct file *filp, poll_table *wait)
+{
+	poll_wait(filp, &stream_fd_wait_queue, wait);
+	if (!_mali_profiling_global_stream_queue_list_if_empty())
+		return POLLIN;
+	return 0;
+}
+
+static int  _mali_profiling_stream_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_atomic_init(&stream_fd_if_used, 0);
+	return 0;
+}
+
+/* The funs for control packet and stream data.*/
+static void _mali_profiling_set_packet_size(unsigned char *const buf, const u32 size)
+{
+	u32 i;
+
+	for (i = 0; i < sizeof(size); ++i)
+		buf[i] = (size >> 8 * i) & 0xFF;
+}
+
+static u32 _mali_profiling_get_packet_size(unsigned char *const buf)
+{
+	u32 i;
+	u32 size = 0;
+	for (i = 0; i < sizeof(size); ++i)
+		size |= (u32)buf[i] << 8 * i;
+	return size;
+}
+
+static u32 _mali_profiling_read_packet_int(unsigned char *const buf, u32 *const pos, u32 const packet_size)
+{
+	u64 int_value = 0;
+	u8 shift = 0;
+	u8 byte_value = ~0;
+
+	while ((byte_value & 0x80) != 0) {
+		if ((*pos) >= packet_size)
+			return -1;
+		byte_value = buf[*pos];
+		*pos += 1;
+		int_value |= (u32)(byte_value & 0x7f) << shift;
+		shift += 7;
+	}
+
+	if (shift < 8 * sizeof(int_value) && (byte_value & 0x40) != 0) {
+		int_value |= -(1 << shift);
+	}
+
+	return int_value;
+}
+
+static u32 _mali_profiling_pack_int(u8 *const buf, u32 const buf_size, u32 const pos, s32 value)
+{
+	u32 add_bytes = 0;
+	int more = 1;
+	while (more) {
+		/* low order 7 bits of val */
+		char byte_value = value & 0x7f;
+		value >>= 7;
+
+		if ((value == 0 && (byte_value & 0x40) == 0) || (value == -1 && (byte_value & 0x40) != 0)) {
+			more = 0;
+		} else {
+			byte_value |= 0x80;
+		}
+
+		if ((pos + add_bytes) >= buf_size)
+			return 0;
+		buf[pos + add_bytes] = byte_value;
+		add_bytes++;
+	}
+
+	return add_bytes;
+}
+
+static int _mali_profiling_pack_long(uint8_t *const buf, u32 const buf_size, u32 const pos, s64 val)
+{
+	int add_bytes = 0;
+	int more = 1;
+	while (more) {
+		/* low order 7 bits of x */
+		char byte_value = val & 0x7f;
+		val >>= 7;
+
+		if ((val == 0 && (byte_value & 0x40) == 0) || (val == -1 && (byte_value & 0x40) != 0)) {
+			more = 0;
+		} else {
+			byte_value |= 0x80;
+		}
+
+		MALI_DEBUG_ASSERT((pos + add_bytes) < buf_size);
+		buf[pos + add_bytes] = byte_value;
+		add_bytes++;
+	}
+
+	return add_bytes;
+}
+
+static void _mali_profiling_stream_add_counter(mali_profiling_stream *profiling_stream, s64 current_time, u32 key, u32 counter_value)
+{
+	u32 add_size = STREAM_HEADER_SIZE;
+	MALI_DEBUG_ASSERT_POINTER(profiling_stream);
+	MALI_DEBUG_ASSERT((profiling_stream->used_size) < MALI_PROFILING_STREAM_BUFFER_SIZE);
+
+	profiling_stream->data[profiling_stream->used_size] = STREAM_HEADER_COUNTER_VALUE;
+
+	add_size += _mali_profiling_pack_long(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					      profiling_stream->used_size + add_size, current_time);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)0);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)key);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)counter_value);
+
+	_mali_profiling_set_packet_size(profiling_stream->data + profiling_stream->used_size + 1,
+					add_size - STREAM_HEADER_SIZE);
+
+	profiling_stream->used_size += add_size;
+}
+
+/* The callback function for sampling timer.*/
+static enum hrtimer_restart  _mali_profiling_sampling_counters(struct hrtimer *timer)
+{
+	u32 counter_index;
+	s64 current_time;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_profiling_counters);
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	MALI_DEBUG_ASSERT(NULL == mali_counter_stream);
+	if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue(
+		    &global_mali_stream_list->free_list, &mali_counter_stream)) {
+
+		MALI_DEBUG_ASSERT_POINTER(mali_counter_stream);
+		MALI_DEBUG_ASSERT(0 == mali_counter_stream->used_size);
+
+		/* Capture l2 cache counter values if enabled */
+		if (MALI_TRUE == l2_cache_counter_if_enabled) {
+			int i, j = 0;
+			_mali_profiling_l2_counter_values l2_counters_values;
+			_mali_profiling_get_l2_counters(&l2_counters_values);
+
+			for (i  = COUNTER_L2_0_C0; i <= COUNTER_L2_2_C1; i++) {
+				if (0 == (j % 2))
+					_mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value0);
+				else
+					_mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value1);
+				j++;
+			}
+		}
+
+		current_time = (s64)_mali_osk_boot_time_get_ns();
+
+		/* Add all enabled counter values into stream */
+		for (counter_index = 0; counter_index < num_global_mali_profiling_counters; counter_index++) {
+			/* No need to sample these couners here. */
+			if (global_mali_profiling_counters[counter_index].enabled) {
+				if ((global_mali_profiling_counters[counter_index].counter_id >= FIRST_MEM_COUNTER &&
+				     global_mali_profiling_counters[counter_index].counter_id <= LAST_MEM_COUNTER)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_VP_ACTIVITY)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FP_ACTIVITY)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FILMSTRIP)) {
+
+					continue;
+				}
+
+				if (global_mali_profiling_counters[counter_index].counter_id >= COUNTER_L2_0_C0 &&
+				    global_mali_profiling_counters[counter_index].counter_id <= COUNTER_L2_2_C1) {
+
+					u32 prev_val = global_mali_profiling_counters[counter_index].prev_counter_value;
+
+					_mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key,
+									   global_mali_profiling_counters[counter_index].current_counter_value - prev_val);
+
+					prev_val = global_mali_profiling_counters[counter_index].current_counter_value;
+
+					global_mali_profiling_counters[counter_index].prev_counter_value = prev_val;
+				} else {
+
+					if (global_mali_profiling_counters[counter_index].counter_id == COUNTER_TOTAL_ALLOC_PAGES) {
+						u32 total_alloc_mem = _mali_ukk_report_memory_usage();
+						global_mali_profiling_counters[counter_index].current_counter_value = total_alloc_mem / _MALI_OSK_MALI_PAGE_SIZE;
+					}
+					_mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key,
+									   global_mali_profiling_counters[counter_index].current_counter_value);
+					if (global_mali_profiling_counters[counter_index].counter_id < FIRST_SPECIAL_COUNTER)
+						global_mali_profiling_counters[counter_index].current_counter_value = 0;
+				}
+			}
+		}
+		_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_counter_stream);
+		mali_counter_stream = NULL;
+	} else {
+		MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n"));
+	}
+
+	wake_up_interruptible(&stream_fd_wait_queue);
+
+	/*Enable the sampling timer again*/
+	if (0 != num_counters_enabled && 0 != profiling_sample_rate) {
+		hrtimer_forward_now(&profiling_sampling_timer, ns_to_ktime(profiling_sample_rate));
+		return HRTIMER_RESTART;
+	}
+	return HRTIMER_NORESTART;
+}
+
+static void _mali_profiling_sampling_core_activity_switch(int counter_id, int core, u32 activity, u32 pid)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&mali_activity_lock, irq_flags);
+	if (activity == 0)
+		mali_activity_cores_num--;
+	else
+		mali_activity_cores_num++;
+	spin_unlock_irqrestore(&mali_activity_lock, irq_flags);
+
+	if (NULL != global_mali_profiling_counters) {
+		int i ;
+		for (i = 0; i < num_global_mali_profiling_counters; i++) {
+			if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) {
+				u64 current_time = _mali_osk_boot_time_get_ns();
+				u32 add_size = STREAM_HEADER_SIZE;
+
+				if (NULL != mali_core_activity_stream) {
+					if ((mali_core_activity_stream_dequeue_time +  MALI_PROFILING_STREAM_HOLD_TIME < current_time) ||
+					    (MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE > MALI_PROFILING_STREAM_BUFFER_SIZE
+					     - mali_core_activity_stream->used_size)) {
+						_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream);
+						mali_core_activity_stream = NULL;
+						wake_up_interruptible(&stream_fd_wait_queue);
+					}
+				}
+
+				if (NULL == mali_core_activity_stream) {
+					if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue(
+						    &global_mali_stream_list->free_list, &mali_core_activity_stream)) {
+						mali_core_activity_stream_dequeue_time = current_time;
+					} else {
+						MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n"));
+						wake_up_interruptible(&stream_fd_wait_queue);
+						break;
+					}
+
+				}
+
+				mali_core_activity_stream->data[mali_core_activity_stream->used_size] = STREAM_HEADER_CORE_ACTIVITY;
+
+				add_size += _mali_profiling_pack_long(mali_core_activity_stream->data,
+								      MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s64)current_time);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, core);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s32)global_mali_profiling_counters[i].key);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, activity);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, pid);
+
+				_mali_profiling_set_packet_size(mali_core_activity_stream->data + mali_core_activity_stream->used_size + 1,
+								add_size - STREAM_HEADER_SIZE);
+
+				mali_core_activity_stream->used_size += add_size;
+
+				if (0 == mali_activity_cores_num) {
+					_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream);
+					mali_core_activity_stream = NULL;
+					wake_up_interruptible(&stream_fd_wait_queue);
+				}
+
+				break;
+			}
+		}
+	}
+}
+
+static mali_bool _mali_profiling_global_counters_init(void)
+{
+	int core_id, counter_index, counter_number, counter_id;
+	u32 num_l2_cache_cores;
+	u32 num_pp_cores;
+	u32 num_gp_cores = 1;
+
+	MALI_DEBUG_ASSERT(NULL == global_mali_profiling_counters);
+	num_pp_cores = mali_pp_get_glob_num_pp_cores();
+	num_l2_cache_cores =    mali_l2_cache_core_get_glob_num_l2_cores();
+
+	num_global_mali_profiling_counters = 3 * (num_gp_cores + num_pp_cores) + 2 * num_l2_cache_cores
+					     + MALI_PROFILING_SW_COUNTERS_NUM
+					     + MALI_PROFILING_SPECIAL_COUNTERS_NUM
+					     + MALI_PROFILING_MEM_COUNTERS_NUM;
+	global_mali_profiling_counters = _mali_osk_calloc(num_global_mali_profiling_counters, sizeof(mali_profiling_counter));
+
+	if (NULL == global_mali_profiling_counters)
+		return MALI_FALSE;
+
+	counter_index = 0;
+	/*Vertex processor counters */
+	for (core_id = 0; core_id < num_gp_cores; core_id ++) {
+		global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_VP_0 + core_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_active", mali_name, core_id);
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* Fragment processors' counters */
+	for (core_id = 0; core_id < num_pp_cores; core_id++) {
+		counter_index++;
+		global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_FP_0 + core_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_active", mali_name, core_id);
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* L2 Cache counters */
+	for (core_id = 0; core_id < num_l2_cache_cores; core_id++) {
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* Now set up the software counter entries */
+	for (counter_id = FIRST_SW_COUNTER; counter_id <= LAST_SW_COUNTER; counter_id++) {
+		counter_index++;
+
+		if (0 == first_sw_counter_index)
+			first_sw_counter_index = counter_index;
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_SW_%d", mali_name, counter_id - FIRST_SW_COUNTER);
+	}
+
+	/* Now set up the special counter entries */
+	for (counter_id = FIRST_SPECIAL_COUNTER; counter_id <= LAST_SPECIAL_COUNTER; counter_id++) {
+
+		counter_index++;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s",
+				   mali_name, _mali_special_counter_descriptions[counter_id - FIRST_SPECIAL_COUNTER]);
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+	}
+
+	/* Now set up the mem counter entries*/
+	for (counter_id = FIRST_MEM_COUNTER; counter_id <= LAST_MEM_COUNTER; counter_id++) {
+
+		counter_index++;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s",
+				   mali_name, _mali_mem_counter_descriptions[counter_id - FIRST_MEM_COUNTER]);
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+	}
+
+	MALI_DEBUG_ASSERT((counter_index + 1) == num_global_mali_profiling_counters);
+
+	return MALI_TRUE;
+}
+
+void _mali_profiling_notification_mem_counter(struct mali_session_data *session, u32 counter_id, u32 key, int enable)
+{
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL != session) {
+		_mali_osk_notification_t *notification;
+		_mali_osk_notification_queue_t *queue;
+
+		queue = session->ioctl_queue;
+		MALI_DEBUG_ASSERT(NULL != queue);
+
+		notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER,
+				sizeof(_mali_uk_annotate_profiling_mem_counter_s));
+
+		if (NULL != notification) {
+			_mali_uk_annotate_profiling_mem_counter_s *data = notification->result_buffer;
+			data->counter_id = counter_id;
+			data->key = key;
+			data->enable = enable;
+
+			_mali_osk_notification_queue_send(queue, notification);
+		} else {
+			MALI_PRINT_ERROR(("Failed to create notification object!\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("Failed to find the right session!\n"));
+	}
+}
+
+void _mali_profiling_notification_enable(struct mali_session_data *session, u32 sampling_rate, int enable)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL != session) {
+		_mali_osk_notification_t *notification;
+		_mali_osk_notification_queue_t *queue;
+
+		queue = session->ioctl_queue;
+		MALI_DEBUG_ASSERT(NULL != queue);
+
+		notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE,
+				sizeof(_mali_uk_annotate_profiling_enable_s));
+
+		if (NULL != notification) {
+			_mali_uk_annotate_profiling_enable_s *data = notification->result_buffer;
+			data->sampling_rate = sampling_rate;
+			data->enable = enable;
+
+			_mali_osk_notification_queue_send(queue, notification);
+		} else {
+			MALI_PRINT_ERROR(("Failed to create notification object!\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("Failed to find the right session!\n"));
+	}
+}
+
+
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start)
+{
+	int i;
+	mali_profiling_stream *new_mali_profiling_stream = NULL;
+	mali_profiling_stream_list *new_mali_profiling_stream_list = NULL;
+	if (MALI_TRUE == auto_start) {
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+	}
+
+	/*Init the global_mali_stream_list*/
+	MALI_DEBUG_ASSERT(NULL == global_mali_stream_list);
+	new_mali_profiling_stream_list = (mali_profiling_stream_list *)kmalloc(sizeof(mali_profiling_stream_list), GFP_KERNEL);
+
+	if (NULL == new_mali_profiling_stream_list) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	spin_lock_init(&new_mali_profiling_stream_list->spin_lock);
+	INIT_LIST_HEAD(&new_mali_profiling_stream_list->free_list);
+	INIT_LIST_HEAD(&new_mali_profiling_stream_list->queue_list);
+
+	spin_lock_init(&mali_activity_lock);
+	mali_activity_cores_num =  0;
+
+	for (i = 0; i < MALI_PROFILING_STREAM_BUFFER_NUM; i++) {
+		new_mali_profiling_stream = (mali_profiling_stream *)kmalloc(sizeof(mali_profiling_stream), GFP_KERNEL);
+		if (NULL == new_mali_profiling_stream) {
+			_mali_profiling_stream_list_destory(new_mali_profiling_stream_list);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+
+		INIT_LIST_HEAD(&new_mali_profiling_stream->list);
+		new_mali_profiling_stream->used_size = 0;
+		list_add_tail(&new_mali_profiling_stream->list, &new_mali_profiling_stream_list->free_list);
+
+	}
+
+	_mali_osk_atomic_init(&stream_fd_if_used, 0);
+	init_waitqueue_head(&stream_fd_wait_queue);
+
+	hrtimer_init(&profiling_sampling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	profiling_sampling_timer.function = _mali_profiling_sampling_counters;
+
+	global_mali_stream_list = new_mali_profiling_stream_list;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_profiling_term(void)
+{
+	if (0 != profiling_sample_rate) {
+		hrtimer_cancel(&profiling_sampling_timer);
+		profiling_sample_rate = 0;
+	}
+	_mali_osk_atomic_term(&stream_fd_if_used);
+
+	if (NULL != global_mali_profiling_counters) {
+		_mali_osk_free(global_mali_profiling_counters);
+		global_mali_profiling_counters = NULL;
+		num_global_mali_profiling_counters = 0;
+	}
+
+	if (NULL != global_mali_stream_list) {
+		_mali_profiling_stream_list_destory(global_mali_stream_list);
+		global_mali_stream_list = NULL;
+	}
+
+}
+
+void _mali_osk_profiling_stop_sampling(u32 pid)
+{
+	if (pid == current_profiling_pid) {
+
+		int i;
+		/* Reset all counter states when closing connection.*/
+		for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+			_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER);
+			global_mali_profiling_counters[i].enabled = 0;
+			global_mali_profiling_counters[i].prev_counter_value = 0;
+			global_mali_profiling_counters[i].current_counter_value = 0;
+		}
+		l2_cache_counter_if_enabled = MALI_FALSE;
+		num_counters_enabled = 0;
+		mem_counters_enabled = 0;
+		_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0);
+		_mali_profiling_control(SW_COUNTER_ENABLE, 0);
+		/* Delete sampling timer when closing connection. */
+		if (0 != profiling_sample_rate) {
+			hrtimer_cancel(&profiling_sampling_timer);
+			profiling_sample_rate = 0;
+		}
+		current_profiling_pid = 0;
+	}
+}
+
+void    _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	/*Record the freq & volt to global_mali_profiling_counters here. */
+	if (0 != profiling_sample_rate) {
+		u32 channel;
+		u32 state;
+		channel = (event_id >> 16) & 0xFF;
+		state = ((event_id >> 24) & 0xF) << 24;
+
+		switch (state) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GPU >> 16) == channel) {
+				u32 reason = (event_id & 0xFFFF);
+				if (MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE == reason) {
+					_mali_osk_profiling_record_global_counters(COUNTER_FREQUENCY, data0);
+					_mali_osk_profiling_record_global_counters(COUNTER_VOLTAGE, data1);
+				}
+			}
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) {
+				_mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 1, data1);
+			} else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) &&
+				   (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) {
+				u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16);
+				_mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 1, data1);
+			}
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) {
+				_mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 0, 0);
+			} else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) &&
+				   (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) {
+				u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16);
+				_mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 0, 0);
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	trace_mali_timeline_event(event_id, data0, data1, data2, data3, data4);
+}
+
+void _mali_osk_profiling_report_sw_counters(u32 *counters)
+{
+	trace_mali_sw_counters(_mali_osk_get_pid(), _mali_osk_get_tid(), NULL, counters);
+}
+
+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value)
+{
+	if (NULL != global_mali_profiling_counters) {
+		int i ;
+		for (i = 0; i < num_global_mali_profiling_counters; i++) {
+			if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) {
+				global_mali_profiling_counters[i].current_counter_value = value;
+				break;
+			}
+		}
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args)
+{
+	/* Always add process and thread identificator in the first two data elements for events from user space */
+	_mali_osk_profiling_add_event(args->event_id, _mali_osk_get_pid(), _mali_osk_get_tid(), args->data[2], args->data[3], args->data[4]);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args)
+{
+	u32 *counters = (u32 *)(uintptr_t)args->counters;
+
+	_mali_osk_profiling_report_sw_counters(counters);
+
+	if (NULL != global_mali_profiling_counters) {
+		int i;
+		for (i = 0; i < MALI_PROFILING_SW_COUNTERS_NUM; i ++) {
+			if (global_mali_profiling_counters[first_sw_counter_index + i].enabled) {
+				global_mali_profiling_counters[first_sw_counter_index + i].current_counter_value = *(counters + i);
+			}
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (1 == _mali_osk_atomic_inc_return(&stream_fd_if_used)) {
+
+		s32 fd = anon_inode_getfd("[mali_profiling_stream]", &mali_profiling_stream_fops,
+					  session,
+					  O_RDONLY | O_CLOEXEC);
+
+		args->stream_fd = fd;
+		if (0 > fd) {
+			_mali_osk_atomic_dec(&stream_fd_if_used);
+			return _MALI_OSK_ERR_FAULT;
+		}
+		args->stream_fd = fd;
+	} else {
+		_mali_osk_atomic_dec(&stream_fd_if_used);
+		args->stream_fd = -1;
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args)
+{
+	u32 control_packet_size;
+	u32 output_buffer_size;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL == global_mali_profiling_counters && MALI_FALSE == _mali_profiling_global_counters_init()) {
+		MALI_PRINT_ERROR(("Failed to create global_mali_profiling_counters.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	control_packet_size = args->control_packet_size;
+	output_buffer_size = args->response_packet_size;
+
+	if (0 != control_packet_size) {
+		u8 control_type;
+		u8 *control_packet_data;
+		u8 *response_packet_data;
+		u32 version_length = sizeof(utgard_setup_version) - 1;
+
+		control_packet_data = (u8 *)(uintptr_t)args->control_packet_data;
+		MALI_DEBUG_ASSERT_POINTER(control_packet_data);
+		response_packet_data = (u8 *)(uintptr_t)args->response_packet_data;
+		MALI_DEBUG_ASSERT_POINTER(response_packet_data);
+
+		/*Decide if need to ignore Utgard setup version.*/
+		if (control_packet_size >= version_length) {
+			if (0 == memcmp(control_packet_data, utgard_setup_version, version_length)) {
+				if (control_packet_size == version_length) {
+					args->response_packet_size = 0;
+					return _MALI_OSK_ERR_OK;
+				} else {
+					control_packet_data += version_length;
+					control_packet_size -= version_length;
+				}
+			}
+		}
+
+		current_profiling_pid = _mali_osk_get_pid();
+
+		control_type = control_packet_data[0];
+		switch (control_type) {
+		case PACKET_HEADER_COUNTERS_REQUEST: {
+			int i;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size, type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Send supported counters */
+			if (PACKET_HEADER_SIZE > output_buffer_size)
+				return _MALI_OSK_ERR_FAULT;
+
+			*response_packet_data = PACKET_HEADER_COUNTERS_ACK;
+			args->response_packet_size = PACKET_HEADER_SIZE;
+
+			for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+				u32 name_size = strlen(global_mali_profiling_counters[i].counter_name);
+
+				if ((args->response_packet_size + name_size + 1) > output_buffer_size) {
+					MALI_PRINT_ERROR(("Response packet data is too large..\n"));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				memcpy(response_packet_data + args->response_packet_size,
+				       global_mali_profiling_counters[i].counter_name, name_size + 1);
+
+				args->response_packet_size += (name_size + 1);
+
+				if (global_mali_profiling_counters[i].counter_id == COUNTER_VP_ACTIVITY) {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32)1);
+				} else if (global_mali_profiling_counters[i].counter_id == COUNTER_FP_ACTIVITY) {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32)mali_pp_get_glob_num_pp_cores());
+				} else {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32) - 1);
+				}
+			}
+
+			_mali_profiling_set_packet_size(response_packet_data + 1, args->response_packet_size);
+			break;
+		}
+
+		case PACKET_HEADER_COUNTERS_ENABLE: {
+			int i;
+			u32 request_pos = PACKET_HEADER_SIZE;
+			mali_bool sw_counter_if_enabled = MALI_FALSE;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Init all counter states before enable requested counters.*/
+			for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+				_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER);
+				global_mali_profiling_counters[i].enabled = 0;
+				global_mali_profiling_counters[i].prev_counter_value = 0;
+				global_mali_profiling_counters[i].current_counter_value = 0;
+
+				if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER &&
+				    global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) {
+					_mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id, 0, 0);
+				}
+			}
+
+			l2_cache_counter_if_enabled = MALI_FALSE;
+			num_counters_enabled = 0;
+			mem_counters_enabled = 0;
+			_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0);
+			_mali_profiling_control(SW_COUNTER_ENABLE, 0);
+			_mali_profiling_notification_enable(session, 0, 0);
+
+			/* Enable requested counters */
+			while (request_pos < control_packet_size) {
+				u32 begin = request_pos;
+				u32 event;
+				u32 key;
+
+				/* Check the counter name which should be ended with null */
+				while (request_pos < control_packet_size && control_packet_data[request_pos] != '\0') {
+					++request_pos;
+				}
+
+				if (request_pos >= control_packet_size)
+					return _MALI_OSK_ERR_FAULT;
+
+				++request_pos;
+				event = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+				key = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+
+				for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+					u32 name_size = strlen((char *)(control_packet_data + begin));
+
+					if (strncmp(global_mali_profiling_counters[i].counter_name, (char *)(control_packet_data + begin), name_size) == 0) {
+						if (!sw_counter_if_enabled && (FIRST_SW_COUNTER <= global_mali_profiling_counters[i].counter_id
+									       && global_mali_profiling_counters[i].counter_id <= LAST_SW_COUNTER)) {
+							sw_counter_if_enabled = MALI_TRUE;
+							_mali_profiling_control(SW_COUNTER_ENABLE, 1);
+						}
+
+						if (COUNTER_FILMSTRIP == global_mali_profiling_counters[i].counter_id) {
+							_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 1);
+							_mali_profiling_control(FBDUMP_CONTROL_RATE, event & 0xff);
+							_mali_profiling_control(FBDUMP_CONTROL_RESIZE_FACTOR, (event >> 8) & 0xff);
+						}
+
+						if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER &&
+						    global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) {
+							_mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id,
+									key, 1);
+							mem_counters_enabled++;
+						}
+
+						global_mali_profiling_counters[i].counter_event = event;
+						global_mali_profiling_counters[i].key = key;
+						global_mali_profiling_counters[i].enabled = 1;
+
+						_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id,
+									  global_mali_profiling_counters[i].counter_event);
+						num_counters_enabled++;
+						break;
+					}
+				}
+
+				if (i == num_global_mali_profiling_counters) {
+					MALI_PRINT_ERROR(("Counter name does not match for type %u.\n", control_type));
+					return _MALI_OSK_ERR_FAULT;
+				}
+			}
+
+			if (PACKET_HEADER_SIZE <= output_buffer_size) {
+				*response_packet_data = PACKET_HEADER_ACK;
+				_mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE);
+				args->response_packet_size = PACKET_HEADER_SIZE;
+			} else {
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			break;
+		}
+
+		case PACKET_HEADER_START_CAPTURE_VALUE: {
+			u32 live_rate;
+			u32 request_pos = PACKET_HEADER_SIZE;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Read samping rate in nanoseconds and live rate, start capture.*/
+			profiling_sample_rate =  _mali_profiling_read_packet_int(control_packet_data,
+						 &request_pos, control_packet_size);
+
+			live_rate = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+
+			if (PACKET_HEADER_SIZE <= output_buffer_size) {
+				*response_packet_data = PACKET_HEADER_ACK;
+				_mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE);
+				args->response_packet_size = PACKET_HEADER_SIZE;
+			} else {
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			if (0 != num_counters_enabled && 0 != profiling_sample_rate) {
+				_mali_profiling_global_stream_list_free();
+				if (mem_counters_enabled > 0) {
+					_mali_profiling_notification_enable(session, profiling_sample_rate, 1);
+				}
+				hrtimer_start(&profiling_sampling_timer,
+					      ktime_set(profiling_sample_rate / 1000000000, profiling_sample_rate % 1000000000),
+					      HRTIMER_MODE_REL_PINNED);
+			}
+
+			break;
+		}
+		default:
+			MALI_PRINT_ERROR(("Unsupported  profiling packet header type %u.\n", control_type));
+			args->response_packet_size  = 0;
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else {
+		_mali_osk_profiling_stop_sampling(current_profiling_pid);
+		_mali_profiling_notification_enable(session, 0, 0);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Called by gator.ko to set HW counters
+ *
+ * @param counter_id The counter ID.
+ * @param event_id Event ID that the counter should count (HW counter value from TRM).
+ *
+ * @return 1 on success, 0 on failure.
+ */
+int _mali_profiling_set_event(u32 counter_id, s32 event_id)
+{
+	if (COUNTER_VP_0_C0 == counter_id) {
+		mali_gp_job_set_gp_counter_src0(event_id);
+	} else if (COUNTER_VP_0_C1 == counter_id) {
+		mali_gp_job_set_gp_counter_src1(event_id);
+	} else if (COUNTER_FP_0_C0 <= counter_id && COUNTER_FP_7_C1 >= counter_id) {
+		/*
+		 * Two compatibility notes for this function:
+		 *
+		 * 1) Previously the DDK allowed per core counters.
+		 *
+		 *    This did not make much sense on Mali-450 with the "virtual PP core" concept,
+		 *    so this option was removed, and only the same pair of HW counters was allowed on all cores,
+		 *    beginning with r3p2 release.
+		 *
+		 *    Starting with r4p0, it is now possible to set different HW counters for the different sub jobs.
+		 *    This should be almost the same, since sub job 0 is designed to run on core 0,
+		 *    sub job 1 on core 1, and so on.
+		 *
+		 *    The scheduling of PP sub jobs is not predictable, and this often led to situations where core 0 ran 2
+		 *    sub jobs, while for instance core 1 ran zero. Having the counters set per sub job would thus increase
+		 *    the predictability of the returned data (as you would be guaranteed data for all the selected HW counters).
+		 *
+		 *    PS: Core scaling needs to be disabled in order to use this reliably (goes for both solutions).
+		 *
+		 *    The framework/#defines with Gator still indicates that the counter is for a particular core,
+		 *    but this is internally used as a sub job ID instead (no translation needed).
+		 *
+		 *  2) Global/default vs per sub job counters
+		 *
+		 *     Releases before r3p2 had only per PP core counters.
+		 *     r3p2 releases had only one set of default/global counters which applied to all PP cores
+		 *     Starting with r4p0, we have both a set of default/global counters,
+		 *     and individual counters per sub job (equal to per core).
+		 *
+		 *     To keep compatibility with Gator/DS-5/streamline, the following scheme is used:
+		 *
+		 *     r3p2 release; only counters set for core 0 is handled,
+		 *     this is applied as the default/global set of counters, and will thus affect all cores.
+		 *
+		 *     r4p0 release; counters set for core 0 is applied as both the global/default set of counters,
+		 *     and counters for sub job 0.
+		 *     Counters set for core 1-7 is only applied for the corresponding sub job.
+		 *
+		 *     This should allow the DS-5/Streamline GUI to have a simple mode where it only allows setting the
+		 *     values for core 0, and thus this will be applied to all PP sub jobs/cores.
+		 *     Advanced mode will also be supported, where individual pairs of HW counters can be selected.
+		 *
+		 *     The GUI will (until it is updated) still refer to cores instead of sub jobs, but this is probably
+		 *     something we can live with!
+		 *
+		 *     Mali-450 note: Each job is not divided into a deterministic number of sub jobs, as the HW DLBU
+		 *     automatically distributes the load between whatever number of cores is available at this particular time.
+		 *     A normal PP job on Mali-450 is thus considered a single (virtual) job, and it will thus only be possible
+		 *     to use a single pair of HW counters (even if the job ran on multiple PP cores).
+		 *     In other words, only the global/default pair of PP HW counters will be used for normal Mali-450 jobs.
+		 */
+		u32 sub_job = (counter_id - COUNTER_FP_0_C0) >> 1;
+		u32 counter_src = (counter_id - COUNTER_FP_0_C0) & 1;
+		if (0 == counter_src) {
+			mali_pp_job_set_pp_counter_sub_job_src0(sub_job, event_id);
+			if (0 == sub_job) {
+				mali_pp_job_set_pp_counter_global_src0(event_id);
+			}
+		} else {
+			mali_pp_job_set_pp_counter_sub_job_src1(sub_job, event_id);
+			if (0 == sub_job) {
+				mali_pp_job_set_pp_counter_global_src1(event_id);
+			}
+		}
+	} else if (COUNTER_L2_0_C0 <= counter_id && COUNTER_L2_2_C1 >= counter_id) {
+		u32 core_id = (counter_id - COUNTER_L2_0_C0) >> 1;
+		struct mali_l2_cache_core *l2_cache_core = mali_l2_cache_core_get_glob_l2_core(core_id);
+
+		if (NULL != l2_cache_core) {
+			u32 counter_src = (counter_id - COUNTER_L2_0_C0) & 1;
+			mali_l2_cache_core_set_counter_src(l2_cache_core,
+							   counter_src, event_id);
+			l2_cache_counter_if_enabled = MALI_TRUE;
+		}
+	} else {
+		return 0; /* Failure, unknown event */
+	}
+
+	return 1; /* success */
+}
+
+/**
+ * Called by gator.ko to retrieve the L2 cache counter values for all L2 cache cores.
+ * The L2 cache counters are unique in that they are polled by gator, rather than being
+ * transmitted via the tracepoint mechanism.
+ *
+ * @param values Pointer to a _mali_profiling_l2_counter_values structure where
+ *               the counter sources and values will be output
+ * @return 0 if all went well; otherwise, return the mask with the bits set for the powered off cores
+ */
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values)
+{
+	u32 l2_cores_num = mali_l2_cache_core_get_glob_num_l2_cores();
+	u32 i;
+
+	MALI_DEBUG_ASSERT(l2_cores_num <= 3);
+
+	for (i = 0; i < l2_cores_num; i++) {
+		struct mali_l2_cache_core *l2_cache = mali_l2_cache_core_get_glob_l2_core(i);
+
+		if (NULL == l2_cache) {
+			continue;
+		}
+
+		mali_l2_cache_core_get_counter_values(l2_cache,
+						      &values->cores[i].source0,
+						      &values->cores[i].value0,
+						      &values->cores[i].source1,
+						      &values->cores[i].value1);
+	}
+
+	return 0;
+}
+
+/**
+ * Called by gator to control the production of profiling information at runtime.
+ */
+void _mali_profiling_control(u32 action, u32 value)
+{
+	switch (action) {
+	case FBDUMP_CONTROL_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED, (value == 0 ? MALI_FALSE : MALI_TRUE));
+		break;
+	case FBDUMP_CONTROL_RATE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES, value);
+		break;
+	case SW_COUNTER_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED, value);
+		break;
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR, value);
+		break;
+	default:
+		break;  /* Ignore unimplemented actions */
+	}
+}
+
+/**
+ * Called by gator to get mali api version.
+ */
+u32 _mali_profiling_get_api_version(void)
+{
+	return MALI_PROFILING_API_VERSION;
+}
+
+/**
+* Called by gator to get the data about Mali instance in use:
+* product id, version, number of cores
+*/
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values)
+{
+	values->mali_product_id = (u32)mali_kernel_core_get_product_id();
+	values->mali_version_major = mali_kernel_core_get_gpu_major_version();
+	values->mali_version_minor = mali_kernel_core_get_gpu_minor_version();
+	values->num_of_l2_cores = mali_l2_cache_core_get_glob_num_l2_cores();
+	values->num_of_fp_cores = mali_executor_get_num_cores_total();
+	values->num_of_vp_cores = 1;
+}
+
+
+EXPORT_SYMBOL(_mali_profiling_set_event);
+EXPORT_SYMBOL(_mali_profiling_get_l2_counters);
+EXPORT_SYMBOL(_mali_profiling_control);
+EXPORT_SYMBOL(_mali_profiling_get_api_version);
+EXPORT_SYMBOL(_mali_profiling_get_mali_version);
diff --git a/utgard/r6p1/linux/mali_osk_specific.h b/utgard/r6p1/linux/mali_osk_specific.h
new file mode 100755
index 0000000..adcca29
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_specific.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_specific.h
+ * Defines per-OS Kernel level specifics, such as unusual workarounds for
+ * certain OSs.
+ */
+
+#ifndef __MALI_OSK_SPECIFIC_H__
+#define __MALI_OSK_SPECIFIC_H__
+
+#include <asm/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/hardirq.h>
+
+
+#include "mali_osk_types.h"
+#include "mali_kernel_linux.h"
+
+#define MALI_STATIC_INLINE static inline
+#define MALI_NON_STATIC_INLINE inline
+
+typedef struct dma_pool *mali_dma_pool;
+
+typedef u32 mali_dma_addr;
+
+#if MALI_ENABLE_CPU_CYCLES
+/* Reads out the clock cycle performance counter of the current cpu.
+   It is useful for cost-free (2 cycle) measuring of the time spent
+   in a code path. Sample before and after, the diff number of cycles.
+   When the CPU is idle it will not increase this clock counter.
+   It means that the counter is accurate if only spin-locks are used,
+   but mutexes may lead to too low values since the cpu might "idle"
+   waiting for the mutex to become available.
+   The clock source is configured on the CPU during mali module load,
+   but will not give useful output after a CPU has been power cycled.
+   It is therefore important to configure the system to not turn of
+   the cpu cores when using this functionallity.*/
+static inline unsigned int mali_get_cpu_cyclecount(void)
+{
+	unsigned int value;
+	/* Reading the CCNT Register - CPU clock counter */
+	asm volatile("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(value));
+	return value;
+}
+
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64);
+#endif
+
+
+MALI_STATIC_INLINE u32 _mali_osk_copy_from_user(void *to, void *from, u32 n)
+{
+	return (u32)copy_from_user(to, from, (unsigned long)n);
+}
+
+MALI_STATIC_INLINE mali_bool _mali_osk_in_atomic(void)
+{
+	return in_atomic();
+}
+
+#define _mali_osk_put_user(x, ptr) put_user(x, ptr)
+
+#endif /* __MALI_OSK_SPECIFIC_H__ */
diff --git a/utgard/r6p1/linux/mali_osk_time.c b/utgard/r6p1/linux/mali_osk_time.c
new file mode 100755
index 0000000..76876b6
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_time.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_time.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <asm/delay.h>
+
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb)
+{
+	return time_after_eq(ticka, tickb) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+unsigned long _mali_osk_time_mstoticks(u32 ms)
+{
+	return msecs_to_jiffies(ms);
+}
+
+u32 _mali_osk_time_tickstoms(unsigned long ticks)
+{
+	return jiffies_to_msecs(ticks);
+}
+
+unsigned long _mali_osk_time_tickcount(void)
+{
+	return jiffies;
+}
+
+void _mali_osk_time_ubusydelay(u32 usecs)
+{
+	udelay(usecs);
+}
+
+u64 _mali_osk_time_get_ns(void)
+{
+	struct timespec tsval;
+	getnstimeofday(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
+
+u64 _mali_osk_boot_time_get_ns(void)
+{
+	struct timespec tsval;
+	get_monotonic_boottime(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
diff --git a/utgard/r6p1/linux/mali_osk_timers.c b/utgard/r6p1/linux/mali_osk_timers.c
new file mode 100755
index 0000000..8ada2da
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_timers.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_timers.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_timer_t_struct {
+	struct timer_list timer;
+};
+
+typedef void (*timer_timeout_function_t)(unsigned long);
+
+_mali_osk_timer_t *_mali_osk_timer_init(void)
+{
+	_mali_osk_timer_t *t = (_mali_osk_timer_t *)kmalloc(sizeof(_mali_osk_timer_t), GFP_KERNEL);
+	if (NULL != t) init_timer(&t->timer);
+	return t;
+}
+
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.expires = jiffies + ticks_to_expire;
+	add_timer(&(tim->timer));
+}
+
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	mod_timer(&(tim->timer), jiffies + ticks_to_expire);
+}
+
+void _mali_osk_timer_del(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer_sync(&(tim->timer));
+}
+
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer(&(tim->timer));
+}
+
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	return 1 == timer_pending(&(tim->timer));
+}
+
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.data = (unsigned long)data;
+	tim->timer.function = (timer_timeout_function_t)callback;
+}
+
+void _mali_osk_timer_term(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	kfree(tim);
+}
diff --git a/utgard/r6p1/linux/mali_osk_wait_queue.c b/utgard/r6p1/linux/mali_osk_wait_queue.c
new file mode 100755
index 0000000..caa3abe
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_wait_queue.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wait_queue.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_wait_queue_t_struct {
+	wait_queue_head_t wait_queue;
+};
+
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void)
+{
+	_mali_osk_wait_queue_t *ret = NULL;
+
+	ret = kmalloc(sizeof(_mali_osk_wait_queue_t), GFP_KERNEL);
+
+	if (NULL == ret) {
+		return ret;
+	}
+
+	init_waitqueue_head(&ret->wait_queue);
+	MALI_DEBUG_ASSERT(!waitqueue_active(&ret->wait_queue));
+
+	return ret;
+}
+
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event(queue->wait_queue, condition(data));
+}
+
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event_timeout(queue->wait_queue, condition(data), _mali_osk_time_mstoticks(timeout));
+}
+
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* if queue is empty, don't attempt to wake up its elements */
+	if (!waitqueue_active(&queue->wait_queue)) return;
+
+	MALI_DEBUG_PRINT(6, ("Waking up elements in wait queue %p ....\n", queue));
+
+	wake_up_all(&queue->wait_queue);
+
+	MALI_DEBUG_PRINT(6, ("... elements in wait queue %p woken up\n", queue));
+}
+
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue)
+{
+	/* Parameter validation  */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* Linux requires no explicit termination of wait queues */
+	kfree(queue);
+}
diff --git a/utgard/r6p1/linux/mali_osk_wq.c b/utgard/r6p1/linux/mali_osk_wq.c
new file mode 100755
index 0000000..06afa04
--- /dev/null
+++ b/utgard/r6p1/linux/mali_osk_wq.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/slab.h> /* For memory allocation */
+#include <linux/workqueue.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_license.h"
+#include "mali_kernel_linux.h"
+
+typedef struct _mali_osk_wq_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	mali_bool high_pri;
+	struct work_struct work_handle;
+} mali_osk_wq_work_object_t;
+
+typedef struct _mali_osk_wq_delayed_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	struct delayed_work work;
+} mali_osk_wq_delayed_work_object_t;
+
+#if MALI_LICENSE_IS_GPL
+static struct workqueue_struct *mali_wq_normal = NULL;
+static struct workqueue_struct *mali_wq_high = NULL;
+#endif
+
+static void _mali_osk_wq_work_func(struct work_struct *work);
+
+_mali_osk_errcode_t _mali_osk_wq_init(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(NULL == mali_wq_normal);
+	MALI_DEBUG_ASSERT(NULL == mali_wq_high);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_wq_normal = alloc_workqueue("mali", WQ_UNBOUND, 0);
+	mali_wq_high = alloc_workqueue("mali_high_pri", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_wq_normal = create_workqueue("mali");
+	mali_wq_high = create_workqueue("mali_high_pri");
+#endif
+	if (NULL == mali_wq_normal || NULL == mali_wq_high) {
+		MALI_PRINT_ERROR(("Unable to create Mali workqueues\n"));
+
+		if (mali_wq_normal) destroy_workqueue(mali_wq_normal);
+		if (mali_wq_high)   destroy_workqueue(mali_wq_high);
+
+		mali_wq_normal = NULL;
+		mali_wq_high   = NULL;
+
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* MALI_LICENSE_IS_GPL */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_wq_flush(void)
+{
+#if MALI_LICENSE_IS_GPL
+	flush_workqueue(mali_wq_high);
+	flush_workqueue(mali_wq_normal);
+#else
+	flush_scheduled_work();
+#endif
+}
+
+void _mali_osk_wq_term(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(NULL != mali_wq_normal);
+	MALI_DEBUG_ASSERT(NULL != mali_wq_high);
+
+	flush_workqueue(mali_wq_normal);
+	destroy_workqueue(mali_wq_normal);
+
+	flush_workqueue(mali_wq_high);
+	destroy_workqueue(mali_wq_high);
+
+	mali_wq_normal = NULL;
+	mali_wq_high   = NULL;
+#else
+	flush_scheduled_work();
+#endif
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_FALSE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_TRUE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+	_mali_osk_wq_flush();
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+	kfree(work_object);
+}
+
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_normal, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_high, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+static void _mali_osk_wq_work_func(struct work_struct *work)
+{
+	mali_osk_wq_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_work_object_t, work_handle);
+
+#if MALI_LICENSE_IS_GPL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+	/* We want highest Dynamic priority of the thread so that the Jobs depending
+	** on this thread could be scheduled in time. Without this, this thread might
+	** sometimes need to wait for some threads in user mode to finish its round-robin
+	** time, causing *bubble* in the Mali pipeline. Thanks to the new implementation
+	** of high-priority workqueue in new kernel, this only happens in older kernel.
+	*/
+	if (MALI_TRUE == work_object->high_pri) {
+		set_user_nice(current, -19);
+	}
+#endif
+#endif /* MALI_LICENSE_IS_GPL */
+
+	work_object->handler(work_object->data);
+}
+
+static void _mali_osk_wq_delayed_work_func(struct work_struct *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_delayed_work_object_t, work.work);
+	work_object->handler(work_object->data);
+}
+
+mali_osk_wq_delayed_work_object_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_delayed_work_object_t *work = kmalloc(sizeof(mali_osk_wq_delayed_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+
+	INIT_DELAYED_WORK(&work->work, _mali_osk_wq_delayed_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	cancel_delayed_work(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	cancel_delayed_work_sync(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+
+#if MALI_LICENSE_IS_GPL
+	queue_delayed_work(mali_wq_normal, &work_object->work, delay);
+#else
+	schedule_delayed_work(&work_object->work, delay);
+#endif
+
+}
diff --git a/utgard/r6p1/linux/mali_pmu_power_up_down.c b/utgard/r6p1/linux/mali_pmu_power_up_down.c
new file mode 100755
index 0000000..6a6c9f8
--- /dev/null
+++ b/utgard/r6p1/linux/mali_pmu_power_up_down.c
@@ -0,0 +1,27 @@
+/**
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu_power_up_down.c
+ */
+
+#include <linux/module.h>
+#include "mali_executor.h"
+
+int mali_perf_set_num_pp_cores(unsigned int num_cores)
+{
+#ifndef CONFIG_MALI_DVFS
+	return mali_executor_set_perf_level(num_cores, MALI_TRUE);
+#else
+	return mali_executor_set_perf_level(num_cores, MALI_FALSE);
+#endif
+}
+
+EXPORT_SYMBOL(mali_perf_set_num_pp_cores);
diff --git a/utgard/r6p1/linux/mali_profiling_events.h b/utgard/r6p1/linux/mali_profiling_events.h
new file mode 100755
index 0000000..5e51095
--- /dev/null
+++ b/utgard/r6p1/linux/mali_profiling_events.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_EVENTS_H__
+#define __MALI_PROFILING_EVENTS_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_events.h>
+
+#endif /* __MALI_PROFILING_EVENTS_H__ */
diff --git a/utgard/r6p1/linux/mali_profiling_gator_api.h b/utgard/r6p1/linux/mali_profiling_gator_api.h
new file mode 100755
index 0000000..e371971
--- /dev/null
+++ b/utgard/r6p1/linux/mali_profiling_gator_api.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_GATOR_API_H__
+#define __MALI_PROFILING_GATOR_API_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_gator_api.h>
+
+#endif /* __MALI_PROFILING_GATOR_API_H__ */
diff --git a/utgard/r6p1/linux/mali_profiling_internal.c b/utgard/r6p1/linux/mali_profiling_internal.c
new file mode 100755
index 0000000..918caa0
--- /dev/null
+++ b/utgard/r6p1/linux/mali_profiling_internal.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_timestamp.h"
+#include "mali_osk_profiling.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+
+typedef struct mali_profiling_entry {
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+} mali_profiling_entry;
+
+typedef enum mali_profiling_state {
+	MALI_PROFILING_STATE_UNINITIALIZED,
+	MALI_PROFILING_STATE_IDLE,
+	MALI_PROFILING_STATE_RUNNING,
+	MALI_PROFILING_STATE_RETURN,
+} mali_profiling_state;
+
+static _mali_osk_mutex_t *lock = NULL;
+static mali_profiling_state prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+static mali_profiling_entry *profile_entries = NULL;
+static _mali_osk_atomic_t profile_insert_index;
+static u32 profile_mask = 0;
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+void probe_mali_timeline_event(void *data, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned
+			       int d2, unsigned int d3, unsigned int d4))
+{
+	add_event(event_id, d0, d1, d2, d3, d4);
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_init(mali_bool auto_start)
+{
+	profile_entries = NULL;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_PROFILING);
+	if (NULL == lock) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+
+	if (MALI_TRUE == auto_start) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* Use maximum buffer size */
+
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_internal_profiling_term(void)
+{
+	u32 count;
+
+	/* Ensure profiling is stopped */
+	_mali_internal_profiling_stop(&count);
+
+	prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+
+	if (NULL != profile_entries) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	if (NULL != lock) {
+		_mali_osk_mutex_term(lock);
+		lock = NULL;
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_start(u32 *limit)
+{
+	_mali_osk_errcode_t ret;
+	mali_profiling_entry *new_profile_entries;
+
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RUNNING == prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	new_profile_entries = _mali_osk_valloc(*limit * sizeof(mali_profiling_entry));
+
+	if (NULL == new_profile_entries) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (MALI_PROFILING_MAX_BUFFER_ENTRIES < *limit) {
+		*limit = MALI_PROFILING_MAX_BUFFER_ENTRIES;
+	}
+
+	profile_mask = 1;
+	while (profile_mask <= *limit) {
+		profile_mask <<= 1;
+	}
+	profile_mask >>= 1;
+
+	*limit = profile_mask;
+
+	profile_mask--; /* turns the power of two into a mask of one less */
+
+	if (MALI_PROFILING_STATE_IDLE != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	profile_entries = new_profile_entries;
+
+	ret = _mali_timestamp_reset();
+
+	if (_MALI_OSK_ERR_OK == ret) {
+		prof_state = MALI_PROFILING_STATE_RUNNING;
+	} else {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	register_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+	return ret;
+}
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	u32 cur_index = (_mali_osk_atomic_inc_return(&profile_insert_index) - 1) & profile_mask;
+
+	profile_entries[cur_index].timestamp = _mali_timestamp_get();
+	profile_entries[cur_index].event_id = event_id;
+	profile_entries[cur_index].data[0] = data0;
+	profile_entries[cur_index].data[1] = data1;
+	profile_entries[cur_index].data[2] = data2;
+	profile_entries[cur_index].data[3] = data3;
+	profile_entries[cur_index].data[4] = data4;
+
+	/* If event is "leave API function", add current memory usage to the event
+	 * as data point 4.  This is used in timeline profiling to indicate how
+	 * much memory was used when leaving a function. */
+	if (event_id == (MALI_PROFILING_EVENT_TYPE_SINGLE | MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC)) {
+		profile_entries[cur_index].data[4] = _mali_ukk_report_memory_usage();
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_stop(u32 *count)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RUNNING != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	/* go into return state (user to retreive events), no more events will be added after this */
+	prof_state = MALI_PROFILING_STATE_RETURN;
+
+	unregister_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+
+	tracepoint_synchronize_unregister();
+
+	*count = _mali_osk_atomic_read(&profile_insert_index);
+	if (*count > profile_mask) *count = profile_mask;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 _mali_internal_profiling_get_count(void)
+{
+	u32 retval = 0;
+
+	_mali_osk_mutex_wait(lock);
+	if (MALI_PROFILING_STATE_RETURN == prof_state) {
+		retval = _mali_osk_atomic_read(&profile_insert_index);
+		if (retval > profile_mask) retval = profile_mask;
+	}
+	_mali_osk_mutex_signal(lock);
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5])
+{
+	u32 raw_index = _mali_osk_atomic_read(&profile_insert_index);
+
+	_mali_osk_mutex_wait(lock);
+
+	if (index < profile_mask) {
+		if ((raw_index & ~profile_mask) != 0) {
+			index += raw_index;
+			index &= profile_mask;
+		}
+
+		if (prof_state != MALI_PROFILING_STATE_RETURN) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+		}
+
+		if (index >= raw_index) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		*timestamp = profile_entries[index].timestamp;
+		*event_id = profile_entries[index].event_id;
+		data[0] = profile_entries[index].data[0];
+		data[1] = profile_entries[index].data[1];
+		data[2] = profile_entries[index].data[2];
+		data[3] = profile_entries[index].data[3];
+		data[4] = profile_entries[index].data[4];
+	} else {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_clear(void)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RETURN != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	if (NULL != profile_entries) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool _mali_internal_profiling_is_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RUNNING ? MALI_TRUE : MALI_FALSE;
+}
+
+mali_bool _mali_internal_profiling_have_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RETURN ? MALI_TRUE : MALI_FALSE;
+}
diff --git a/utgard/r6p1/linux/mali_profiling_internal.h b/utgard/r6p1/linux/mali_profiling_internal.h
new file mode 100755
index 0000000..6e05ffd
--- /dev/null
+++ b/utgard/r6p1/linux/mali_profiling_internal.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_INTERNAL_H__
+#define __MALI_PROFILING_INTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_osk.h"
+
+int _mali_internal_profiling_init(mali_bool auto_start);
+void _mali_internal_profiling_term(void);
+
+mali_bool _mali_internal_profiling_is_recording(void);
+mali_bool _mali_internal_profiling_have_recording(void);
+_mali_osk_errcode_t _mali_internal_profiling_clear(void);
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+u32 _mali_internal_profiling_get_count(void);
+int _mali_internal_profiling_stop(u32 *count);
+int _mali_internal_profiling_start(u32 *limit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_PROFILING_INTERNAL_H__ */
diff --git a/utgard/r6p1/linux/mali_sync.c b/utgard/r6p1/linux/mali_sync.c
new file mode 100755
index 0000000..d8dd21d
--- /dev/null
+++ b/utgard/r6p1/linux/mali_sync.c
@@ -0,0 +1,456 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_sync.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_timeline.h"
+#include "mali_executor.h"
+
+#include <linux/file.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <asm-generic/fcntl.h>
+
+struct mali_sync_pt {
+	struct sync_pt         sync_pt;
+	struct mali_sync_flag *flag;
+	struct sync_timeline *sync_tl;  /**< Sync timeline this pt is connected to. */
+};
+
+/**
+ * The sync flag is used to connect sync fences to the Mali Timeline system.  Sync fences can be
+ * created from a sync flag, and when the flag is signaled, the sync fences will also be signaled.
+ */
+struct mali_sync_flag {
+	struct sync_timeline *sync_tl;  /**< Sync timeline this flag is connected to. */
+	u32                   point;    /**< Point on timeline. */
+	int                   status;   /**< 0 if unsignaled, 1 if signaled without error or negative if signaled with error. */
+	struct kref           refcount; /**< Reference count. */
+};
+
+/**
+ * Mali sync timeline is used to connect mali timeline to sync_timeline.
+ * When fence timeout can print more detailed mali timeline system info.
+ */
+struct mali_sync_timeline_container {
+	struct sync_timeline sync_timeline;
+	struct mali_timeline *timeline;
+};
+
+MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, sync_pt);
+}
+
+MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct sync_timeline *sync_tl)
+{
+	return container_of(sync_tl, struct mali_sync_timeline_container, sync_timeline);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt, *new_mpt;
+	struct sync_pt *new_pt;
+
+	if (NULL == pt) {
+		dump_stack();
+		return NULL;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	new_pt = sync_pt_create(mpt->sync_tl, sizeof(struct mali_sync_pt));
+	if (NULL == new_pt) return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+
+	mali_sync_flag_get(mpt->flag);
+	new_mpt->flag = mpt->flag;
+	new_mpt->sync_tl = mpt->sync_tl;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	MALI_DEBUG_ASSERT_POINTER(mpt->flag);
+
+	return mpt->flag->status;
+}
+
+static int timeline_compare(struct sync_pt *pta, struct sync_pt *ptb)
+{
+	struct mali_sync_pt *mpta;
+	struct mali_sync_pt *mptb;
+	u32 a, b;
+
+	MALI_DEBUG_ASSERT_POINTER(pta);
+	MALI_DEBUG_ASSERT_POINTER(ptb);
+	mpta = to_mali_sync_pt(pta);
+	mptb = to_mali_sync_pt(ptb);
+
+	MALI_DEBUG_ASSERT_POINTER(mpta->flag);
+	MALI_DEBUG_ASSERT_POINTER(mptb->flag);
+
+	a = mpta->flag->point;
+	b = mptb->flag->point;
+
+	if (a == b) return 0;
+
+	return ((b - a) < (a - b) ? -1 : 1);
+}
+
+static void timeline_free_pt(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	mali_sync_flag_put(mpt->flag);
+}
+
+static void timeline_release(struct sync_timeline *sync_timeline)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_timeline);
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_timeline);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	/* always signaled timeline didn't have mali container */
+	if (mali_tl) {
+		if (NULL != mali_tl->spinlock) {
+			mali_spinlock_reentrant_term(mali_tl->spinlock);
+		}
+		_mali_osk_free(mali_tl);
+	}
+
+	module_put(THIS_MODULE);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+static void timeline_print_pt(struct seq_file *s, struct sync_pt *sync_pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(s);
+	MALI_DEBUG_ASSERT_POINTER(sync_pt);
+
+	mpt = to_mali_sync_pt(sync_pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		seq_printf(s, "%u", mpt->flag->point);
+	} else {
+		seq_printf(s, "uninitialized");
+	}
+}
+
+static void timeline_print_obj(struct seq_file *s, struct sync_timeline *sync_tl)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	if (NULL != mali_tl) {
+		seq_printf(s, "oldest (%u) ", mali_tl->point_oldest);
+		seq_printf(s, "next (%u)", mali_tl->point_next);
+		seq_printf(s, "\n");
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+		{
+			u32 tid = _mali_osk_get_tid();
+			struct mali_timeline_system *system = mali_tl->system;
+
+			mali_spinlock_reentrant_wait(mali_tl->spinlock, tid);
+			if (!mali_tl->destroyed) {
+				mali_spinlock_reentrant_wait(system->spinlock, tid);
+				mali_timeline_debug_print_timeline(mali_tl, s);
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+			}
+			mali_spinlock_reentrant_signal(mali_tl->spinlock, tid);
+
+			/* dump job queue status and group running status */
+			mali_executor_status_dump();
+		}
+#endif
+	}
+}
+#else
+static void timeline_pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(str);
+	MALI_DEBUG_ASSERT_POINTER(pt);
+
+	mpt = to_mali_sync_pt(pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		_mali_osk_snprintf(str, size, "%u", mpt->flag->point);
+	} else {
+		_mali_osk_snprintf(str, size, "uninitialized");
+	}
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str, int size)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	mali_sync_tl = to_mali_sync_tl_container(timeline);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	if (NULL != mali_tl) {
+		_mali_osk_snprintf(str, size, "oldest (%u) ", mali_tl->point_oldest);
+		_mali_osk_snprintf(str, size, "next (%u)", mali_tl->point_next);
+		_mali_osk_snprintf(str, size, "\n");
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+		{
+			u32 tid = _mali_osk_get_tid();
+			struct mali_timeline_system *system = mali_tl->system;
+
+			mali_spinlock_reentrant_wait(mali_tl->spinlock, tid);
+			if (!mali_tl->destroyed) {
+				mali_spinlock_reentrant_wait(system->spinlock, tid);
+				mali_timeline_debug_direct_print_timeline(mali_tl);
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+			}
+			mali_spinlock_reentrant_signal(mali_tl->spinlock, tid);
+
+			/* dump job queue status and group running status */
+			mali_executor_status_dump();
+		}
+#endif
+	}
+}
+#endif
+
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name    = "Mali",
+	.dup            = timeline_dup,
+	.has_signaled   = timeline_has_signaled,
+	.compare        = timeline_compare,
+	.free_pt        = timeline_free_pt,
+	.release_obj    = timeline_release,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	.print_pt       = timeline_print_pt,
+	.print_obj      = timeline_print_obj,
+#else
+	.pt_value_str = timeline_pt_value_str,
+	.timeline_value_str = timeline_value_str,
+#endif
+};
+
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name)
+{
+	struct sync_timeline *sync_tl;
+	struct mali_sync_timeline_container *mali_sync_tl;
+
+	sync_tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name);
+	if (NULL == sync_tl) return NULL;
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	mali_sync_tl->timeline = timeline;
+
+	/* Grab a reference on the module to ensure the callbacks are present
+	 * as long some timeline exists. The reference is released when the
+	 * timeline is freed.
+	 * Since this function is called from a ioctl on an open file we know
+	 * we already have a reference, so using __module_get is safe. */
+	__module_get(THIS_MODULE);
+
+	return sync_tl;
+}
+
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence)
+{
+	s32 fd = -1;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
+	fd = get_unused_fd();
+#else
+	fd = get_unused_fd_flags(O_CLOEXEC);
+#endif
+	if (fd < 0) {
+		sync_fence_put(sync_fence);
+		return -1;
+	}
+	sync_fence_install(sync_fence, fd);
+
+	return fd;
+}
+
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2)
+{
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+
+	sync_fence = sync_fence_merge("mali_merge_fence", sync_fence1, sync_fence2);
+	sync_fence_put(sync_fence1);
+	sync_fence_put(sync_fence2);
+
+	return sync_fence;
+}
+
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl)
+{
+	struct mali_sync_flag *flag;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	flag = mali_sync_flag_create(sync_tl, 0);
+	if (NULL == flag) return NULL;
+
+	sync_fence = mali_sync_flag_create_fence(flag);
+
+	mali_sync_flag_signal(flag, 0);
+	mali_sync_flag_put(flag);
+
+	return sync_fence;
+}
+
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, mali_timeline_point point)
+{
+	struct mali_sync_flag *flag;
+
+	if (NULL == sync_tl) return NULL;
+
+	flag = _mali_osk_calloc(1, sizeof(*flag));
+	if (NULL == flag) return NULL;
+
+	flag->sync_tl = sync_tl;
+	flag->point = point;
+
+	flag->status = 0;
+	kref_init(&flag->refcount);
+
+	return flag;
+}
+
+void mali_sync_flag_get(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_get(&flag->refcount);
+}
+
+/**
+ * Free sync flag.
+ *
+ * @param ref kref object embedded in sync flag that should be freed.
+ */
+static void mali_sync_flag_free(struct kref *ref)
+{
+	struct mali_sync_flag *flag;
+
+	MALI_DEBUG_ASSERT_POINTER(ref);
+	flag = container_of(ref, struct mali_sync_flag, refcount);
+
+	_mali_osk_free(flag);
+}
+
+void mali_sync_flag_put(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_put(&flag->refcount, mali_sync_flag_free);
+}
+
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+
+	MALI_DEBUG_ASSERT(0 == flag->status);
+	flag->status = (0 > error) ? error : 1;
+
+	_mali_osk_write_mem_barrier();
+
+	sync_timeline_signal(flag->sync_tl);
+}
+
+/**
+ * Create a sync point attached to given sync flag.
+ *
+ * @note Sync points must be triggered in *exactly* the same order as they are created.
+ *
+ * @param flag Sync flag.
+ * @return New sync point if successful, NULL if not.
+ */
+static struct sync_pt *mali_sync_flag_create_pt(struct mali_sync_flag *flag)
+{
+	struct sync_pt *pt;
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	pt = sync_pt_create(flag->sync_tl, sizeof(struct mali_sync_pt));
+	if (NULL == pt) return NULL;
+
+	mali_sync_flag_get(flag);
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->flag = flag;
+	mpt->sync_tl = flag->sync_tl;
+
+	return pt;
+}
+
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag)
+{
+	struct sync_pt    *sync_pt;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	sync_pt = mali_sync_flag_create_pt(flag);
+	if (NULL == sync_pt) return NULL;
+
+	sync_fence = sync_fence_create("mali_flag_fence", sync_pt);
+	if (NULL == sync_fence) {
+		sync_pt_free(sync_pt);
+		return NULL;
+	}
+
+	return sync_fence;
+}
diff --git a/utgard/r6p1/linux/mali_sync.h b/utgard/r6p1/linux/mali_sync.h
new file mode 100755
index 0000000..79efb8e
--- /dev/null
+++ b/utgard/r6p1/linux/mali_sync.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_sync.h
+ *
+ * Mali interface for Linux sync objects.
+ */
+
+#ifndef _MALI_SYNC_H_
+#define _MALI_SYNC_H_
+
+#if defined(CONFIG_SYNC)
+
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+#include <linux/sync.h>
+#else
+#include <sync.h>
+#endif
+
+
+#include "mali_osk.h"
+
+struct mali_sync_flag;
+struct mali_timeline;
+
+/**
+ * Create a sync timeline.
+ *
+ * @param name Name of the sync timeline.
+ * @return The new sync timeline if successful, NULL if not.
+ */
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name);
+
+/**
+ * Creates a file descriptor representing the sync fence.  Will release sync fence if allocation of
+ * file descriptor fails.
+ *
+ * @param sync_fence Sync fence.
+ * @return File descriptor representing sync fence if successful, or -1 if not.
+ */
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence);
+
+/**
+ * Merges two sync fences.  Both input sync fences will be released.
+ *
+ * @param sync_fence1 First sync fence.
+ * @param sync_fence2 Second sync fence.
+ * @return New sync fence that is the result of the merger if successful, or NULL if not.
+ */
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2);
+
+/**
+ * Create a sync fence that is already signaled.
+ *
+ * @param tl Sync timeline.
+ * @return New signaled sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl);
+
+/**
+ * Create a sync flag.
+ *
+ * @param sync_tl Sync timeline.
+ * @param point Point on Mali timeline.
+ * @return New sync flag if successful, NULL if not.
+ */
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, u32 point);
+
+/**
+ * Grab sync flag reference.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_get(struct mali_sync_flag *flag);
+
+/**
+ * Release sync flag reference.  If this was the last reference, the sync flag will be freed.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_put(struct mali_sync_flag *flag);
+
+/**
+ * Signal sync flag.  All sync fences created from this flag will be signaled.
+ *
+ * @param flag Sync flag to signal.
+ * @param error Negative error code, or 0 if no error.
+ */
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error);
+
+/**
+ * Create a sync fence attached to given sync flag.
+ *
+ * @param flag Sync flag.
+ * @return New sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag);
+
+#endif /* defined(CONFIG_SYNC) */
+
+#endif /* _MALI_SYNC_H_ */
diff --git a/utgard/r6p1/linux/mali_uk_types.h b/utgard/r6p1/linux/mali_uk_types.h
new file mode 100755
index 0000000..00ba28b
--- /dev/null
+++ b/utgard/r6p1/linux/mali_uk_types.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_UK_TYPES_H__
+#define __MALI_UK_TYPES_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_uk_types.h>
+
+#endif /* __MALI_UK_TYPES_H__ */
diff --git a/utgard/r6p1/linux/mali_ukk_core.c b/utgard/r6p1/linux/mali_ukk_core.c
new file mode 100755
index 0000000..a6c43b7
--- /dev/null
+++ b/utgard/r6p1/linux/mali_ukk_core.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/slab.h>     /* memort allocation functions */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs)
+{
+	_mali_uk_get_api_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+	if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT;
+
+	return 0;
+}
+
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs)
+{
+	_mali_uk_get_api_version_v2_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version_v2(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+	if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT;
+
+	return 0;
+}
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs)
+{
+	_mali_uk_wait_for_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_wait_for_notification(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS != kargs.type) {
+		kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+		if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_wait_for_notification_s))) return -EFAULT;
+	} else {
+		if (0 != put_user(kargs.type, &uargs->type)) return -EFAULT;
+	}
+
+	return 0;
+}
+
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs)
+{
+	_mali_uk_post_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	if (0 != get_user(kargs.type, &uargs->type)) {
+		return -EFAULT;
+	}
+
+	err = _mali_ukk_post_notification(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs)
+{
+	_mali_uk_get_user_settings_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_user_settings(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = 0; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_user_settings_s))) return -EFAULT;
+
+	return 0;
+}
+
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs)
+{
+	_mali_uk_request_high_priority_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_request_high_priority(&kargs);
+
+	kargs.ctx = 0;
+
+	return map_errcode(err);
+}
+
+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs)
+{
+	_mali_uk_pending_submit_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_pending_submit(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
diff --git a/utgard/r6p1/linux/mali_ukk_gp.c b/utgard/r6p1/linux/mali_ukk_gp.c
new file mode 100755
index 0000000..d7c5d2f
--- /dev/null
+++ b/utgard/r6p1/linux/mali_ukk_gp.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_gp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs)
+{
+	_mali_uk_get_gp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err =  _mali_ukk_get_gp_core_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	return 0;
+}
+
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs)
+{
+	_mali_uk_gp_suspend_response_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_gp_suspend_response_s))) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_gp_suspend_response(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.cookie, &uargs->cookie)) return -EFAULT;
+
+	/* no known transactions to roll-back */
+	return 0;
+}
+
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_gp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_gp_number_of_cores(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (0 != put_user(kargs.number_of_cores, &uargs->number_of_cores)) return -EFAULT;
+
+	return 0;
+}
diff --git a/utgard/r6p1/linux/mali_ukk_mem.c b/utgard/r6p1/linux/mali_ukk_mem.c
new file mode 100755
index 0000000..3dfe345
--- /dev/null
+++ b/utgard/r6p1/linux/mali_ukk_mem.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs)
+{
+	_mali_uk_alloc_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_alloc_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_allocate(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs)
+{
+	_mali_uk_free_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_free_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_free(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.free_pages_nr, &uargs->free_pages_nr)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs)
+{
+	_mali_uk_bind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_bind_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_bind(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs)
+{
+	_mali_uk_unbind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_unbind_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_unbind(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+
+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs)
+{
+	_mali_uk_cow_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_cow(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs)
+{
+	_mali_uk_cow_modify_range_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_modify_range_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_cow_modify_range(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.change_pages_nr, &uargs->change_pages_nr)) {
+		return -EFAULT;
+	}
+	return 0;
+}
+
+
+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs)
+{
+	_mali_uk_mem_resize_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_resize_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_resize(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs)
+{
+	_mali_uk_mem_write_safe_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_write_safe_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	/* Check if we can access the buffers */
+	if (!access_ok(VERIFY_WRITE, kargs.dest, kargs.size)
+	    || !access_ok(VERIFY_READ, kargs.src, kargs.size)) {
+		return -EINVAL;
+	}
+
+	/* Check if size wraps */
+	if ((kargs.size + kargs.dest) <= kargs.dest
+	    || (kargs.size + kargs.src) <= kargs.src) {
+		return -EINVAL;
+	}
+
+	err = _mali_ukk_mem_write_safe(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.size, &uargs->size)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+
+
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs)
+{
+	_mali_uk_query_mmu_page_table_dump_size_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_query_mmu_page_table_dump_size(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.size, &uargs->size)) return -EFAULT;
+
+	return 0;
+}
+
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs)
+{
+	_mali_uk_dump_mmu_page_table_s kargs;
+	_mali_osk_errcode_t err;
+	void __user *user_buffer;
+	void *buffer = NULL;
+	int rc = -EFAULT;
+
+	/* validate input */
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	/* the session_data pointer was validated by caller */
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_dump_mmu_page_table_s)))
+		goto err_exit;
+
+	user_buffer = (void __user *)(uintptr_t)kargs.buffer;
+	if (!access_ok(VERIFY_WRITE, user_buffer, kargs.size))
+		goto err_exit;
+
+	/* allocate temporary buffer (kernel side) to store mmu page table info */
+	if (kargs.size <= 0)
+		return -EINVAL;
+	/* Allow at most 8MiB buffers, this is more than enough to dump a fully
+	 * populated page table. */
+	if (kargs.size > SZ_8M)
+		return -EINVAL;
+
+	buffer = (void *)(uintptr_t)_mali_osk_valloc(kargs.size);
+	if (NULL == buffer) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.buffer = (uintptr_t)buffer;
+	err = _mali_ukk_dump_mmu_page_table(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		rc = map_errcode(err);
+		goto err_exit;
+	}
+
+	/* copy mmu page table info back to user space and update pointers */
+	if (0 != copy_to_user(user_buffer, buffer, kargs.size))
+		goto err_exit;
+
+	kargs.register_writes = kargs.register_writes -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+	kargs.page_table_dump = kargs.page_table_dump -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+
+	if (0 != copy_to_user(uargs, &kargs, sizeof(kargs)))
+		goto err_exit;
+
+	rc = 0;
+
+err_exit:
+	if (buffer) _mali_osk_vfree(buffer);
+	return rc;
+}
+
+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+	_mali_uk_profiling_memory_usage_get_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_mem_usage_get(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
diff --git a/utgard/r6p1/linux/mali_ukk_pp.c b/utgard/r6p1/linux/mali_ukk_pp.c
new file mode 100755
index 0000000..2c59bc7
--- /dev/null
+++ b/utgard/r6p1/linux/mali_ukk_pp.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the jobs were started successfully, 0 is returned.  If there was an error, but the
+	 * jobs were started, we return -ENOENT.  For anything else returned, the jobs were not
+	 * started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_and_gp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_pp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_get_pp_number_of_cores(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_pp_number_of_cores_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs)
+{
+	_mali_uk_get_pp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_pp_core_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	return 0;
+}
+
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs)
+{
+	_mali_uk_pp_disable_wb_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_pp_disable_wb_s))) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	_mali_ukk_pp_job_disable_wb(&kargs);
+
+	return 0;
+}
diff --git a/utgard/r6p1/linux/mali_ukk_profiling.c b/utgard/r6p1/linux/mali_ukk_profiling.c
new file mode 100755
index 0000000..2487bf3
--- /dev/null
+++ b/utgard/r6p1/linux/mali_ukk_profiling.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+#include <linux/slab.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs)
+{
+	_mali_uk_profiling_add_event_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_add_event_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_add_event(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs)
+{
+	_mali_uk_sw_counters_report_s kargs;
+	_mali_osk_errcode_t err;
+	u32 *counter_buffer;
+	u32 __user *counters;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_sw_counters_report_s))) {
+		return -EFAULT;
+	}
+
+	/* make sure that kargs.num_counters is [at least somewhat] sane */
+	if (kargs.num_counters > 10000) {
+		MALI_DEBUG_PRINT(1, ("User space attempted to allocate too many counters.\n"));
+		return -EINVAL;
+	}
+
+	counter_buffer = (u32 *)kmalloc(sizeof(u32) * kargs.num_counters, GFP_KERNEL);
+	if (NULL == counter_buffer) {
+		return -ENOMEM;
+	}
+
+	counters = (u32 *)(uintptr_t)kargs.counters;
+
+	if (0 != copy_from_user(counter_buffer, counters, sizeof(u32) * kargs.num_counters)) {
+		kfree(counter_buffer);
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.counters = (uintptr_t)counter_buffer;
+
+	err = _mali_ukk_sw_counters_report(&kargs);
+
+	kfree(counter_buffer);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs)
+{
+	_mali_uk_profiling_stream_fd_get_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_stream_fd_get(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs)
+{
+	_mali_uk_profiling_control_set_s kargs;
+	_mali_osk_errcode_t err;
+	u8 *kernel_control_data = NULL;
+	u8 *kernel_response_data = NULL;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.control_packet_size, &uargs->control_packet_size)) return -EFAULT;
+	if (0 != get_user(kargs.response_packet_size, &uargs->response_packet_size)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+
+
+	/* Sanity check about the size */
+	if (kargs.control_packet_size > PAGE_SIZE || kargs.response_packet_size > PAGE_SIZE)
+		return -EINVAL;
+
+	if (0 !=  kargs.control_packet_size) {
+
+		if (0 == kargs.response_packet_size)
+			return -EINVAL;
+
+		kernel_control_data = _mali_osk_calloc(1, kargs.control_packet_size);
+		if (NULL == kernel_control_data) {
+			return -ENOMEM;
+		}
+
+		kernel_response_data = _mali_osk_calloc(1, kargs.response_packet_size);
+		if (NULL == kernel_response_data) {
+			_mali_osk_free(kernel_control_data);
+			return -ENOMEM;
+		}
+
+		kargs.control_packet_data = (uintptr_t)kernel_control_data;
+		kargs.response_packet_data = (uintptr_t)kernel_response_data;
+
+		if (0 != copy_from_user((void *)(uintptr_t)kernel_control_data, (void *)(uintptr_t)uargs->control_packet_data, kargs.control_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		err = _mali_ukk_profiling_control_set(&kargs);
+		if (_MALI_OSK_ERR_OK != err) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return map_errcode(err);
+		}
+
+		if (0 != kargs.response_packet_size && 0 != copy_to_user(((void *)(uintptr_t)uargs->response_packet_data), ((void *)(uintptr_t)kargs.response_packet_data), kargs.response_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		if (0 != put_user(kargs.response_packet_size, &uargs->response_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		_mali_osk_free(kernel_control_data);
+		_mali_osk_free(kernel_response_data);
+	} else {
+
+		err = _mali_ukk_profiling_control_set(&kargs);
+		if (_MALI_OSK_ERR_OK != err) {
+			return map_errcode(err);
+		}
+
+	}
+	return 0;
+}
diff --git a/utgard/r6p1/linux/mali_ukk_soft_job.c b/utgard/r6p1/linux/mali_ukk_soft_job.c
new file mode 100755
index 0000000..beeb753
--- /dev/null
+++ b/utgard/r6p1/linux/mali_ukk_soft_job.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_soft_job.h"
+#include "mali_timeline.h"
+
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs)
+{
+	_mali_uk_soft_job_start_s kargs;
+	u32 type, point;
+	u64 user_job;
+	struct mali_timeline_fence fence;
+	struct mali_soft_job *job = NULL;
+	u32 __user *job_id_ptr = NULL;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session, -EINVAL);
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(kargs))) {
+		return -EFAULT;
+	}
+
+	type = kargs.type;
+	user_job = kargs.user_job;
+	job_id_ptr = (u32 __user *)(uintptr_t)kargs.job_id_ptr;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &kargs.fence);
+
+	if ((MALI_SOFT_JOB_TYPE_USER_SIGNALED != type) && (MALI_SOFT_JOB_TYPE_SELF_SIGNALED != type)) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid soft job type specified\n"));
+		return -EINVAL;
+	}
+
+	/* Create soft job. */
+	job = mali_soft_job_create(session->soft_job_system, (enum mali_soft_job_type)type, user_job);
+	if (unlikely(NULL == job)) {
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Write job id back to user space. */
+	if (0 != put_user(job->id, job_id_ptr)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to put job id"));
+		mali_soft_job_destroy(job);
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Start soft job. */
+	point = mali_soft_job_start(job, &fence);
+
+	if (0 != put_user(point, &uargs->point)) {
+		/* Let user space know that something failed after the job was started. */
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs)
+{
+	u32 job_id;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != get_user(job_id, &uargs->job_id)) return -EFAULT;
+
+	err = mali_soft_job_system_signal_job(session->soft_job_system, job_id);
+
+	return map_errcode(err);
+}
diff --git a/utgard/r6p1/linux/mali_ukk_timeline.c b/utgard/r6p1/linux/mali_ukk_timeline.c
new file mode 100755
index 0000000..f32d8b0
--- /dev/null
+++ b/utgard/r6p1/linux/mali_ukk_timeline.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_timeline.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs)
+{
+	u32 val;
+	mali_timeline_id timeline;
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != get_user(val, &uargs->timeline)) return -EFAULT;
+
+	if (MALI_UK_TIMELINE_MAX <= val) {
+		return -EINVAL;
+	}
+
+	timeline = (mali_timeline_id)val;
+
+	point = mali_timeline_system_get_latest_point(session->timeline_system, timeline);
+
+	if (0 != put_user(point, &uargs->point)) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs)
+{
+	u32 timeout, status;
+	mali_bool ret;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT;
+	if (0 != get_user(timeout, &uargs->timeout)) return -EFAULT;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+	ret = mali_timeline_fence_wait(session->timeline_system, &fence, timeout);
+	status = (MALI_TRUE == ret ? 1 : 0);
+
+	if (0 != put_user(status, &uargs->status)) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs)
+{
+	s32 sync_fd = -1;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT;
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+#if defined(CONFIG_SYNC)
+	sync_fd = mali_timeline_sync_fence_create(session->timeline_system, &fence);
+#else
+	sync_fd = -1;
+#endif /* defined(CONFIG_SYNC) */
+
+	if (0 != put_user(sync_fd, &uargs->sync_fd)) return -EFAULT;
+
+	return 0;
+}
diff --git a/utgard/r6p1/linux/mali_ukk_vsync.c b/utgard/r6p1/linux/mali_ukk_vsync.c
new file mode 100755
index 0000000..5b54fb9
--- /dev/null
+++ b/utgard/r6p1/linux/mali_ukk_vsync.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs)
+{
+	_mali_uk_vsync_event_report_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_vsync_event_report_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_vsync_event_report(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
diff --git a/utgard/r6p1/linux/mali_ukk_wrappers.h b/utgard/r6p1/linux/mali_ukk_wrappers.h
new file mode 100755
index 0000000..7e59346
--- /dev/null
+++ b/utgard/r6p1/linux/mali_ukk_wrappers.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk_wrappers.h
+ * Defines the wrapper functions for each user-kernel function
+ */
+
+#ifndef __MALI_UKK_WRAPPERS_H__
+#define __MALI_UKK_WRAPPERS_H__
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs);
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs);
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs);
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs);
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs);
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs);
+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs);
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs);
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs);
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs);
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs);
+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs);
+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs);
+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs);
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs);
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs);
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs);
+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs);
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs);
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs);
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs);
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs);
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs);
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs);
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs);
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs);
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs);
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs);
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs);
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs);
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs);
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs);
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs);
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs);
+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs);
+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs);
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs);
+
+
+int map_errcode(_mali_osk_errcode_t err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_WRAPPERS_H__ */
diff --git a/utgard/r6p1/readme.txt b/utgard/r6p1/readme.txt
new file mode 100755
index 0000000..6785ac9
--- /dev/null
+++ b/utgard/r6p1/readme.txt
@@ -0,0 +1,28 @@
+Building the Mali Device Driver for Linux
+-----------------------------------------
+
+Build the Mali Device Driver for Linux by running the following make command:
+
+KDIR=<kdir_path> USING_UMP=<ump_option> BUILD=<build_option> make
+
+where
+    kdir_path: Path to your Linux Kernel directory
+    ump_option: 1 = Enable UMP support(*)
+                0 = disable UMP support
+    build_option: debug = debug build of driver
+                  release = release build of driver
+
+(*)  For newer Linux Kernels, the Module.symvers file for the UMP device driver
+     must be available. The UMP_SYMVERS_FILE variable in the Makefile should
+     point to this file. This file is generated when the UMP driver is built.
+
+The result will be a mali.ko file, which can be loaded into the Linux kernel
+by using the insmod command.
+
+Use of UMP is not recommended. The dma-buf API in the Linux kernel has
+replaced UMP. The Mali Device Driver will be built with dma-buf support if the
+kernel config includes enabled dma-buf.
+
+The kernel needs to be provided with a platform_device struct for the Mali GPU
+device. See the mali_utgard.h header file for how to set up the Mali GPU
+resources.
diff --git a/utgard/r6p1/regs/mali_200_regs.h b/utgard/r6p1/regs/mali_200_regs.h
new file mode 100755
index 0000000..c904ad2
--- /dev/null
+++ b/utgard/r6p1/regs/mali_200_regs.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2010, 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALI200_REGS_H_
+#define _MALI200_REGS_H_
+
+/**
+ *  Enum for management register addresses.
+ */
+enum mali200_mgmt_reg {
+	MALI200_REG_ADDR_MGMT_VERSION                              = 0x1000,
+	MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR               = 0x1004,
+	MALI200_REG_ADDR_MGMT_STATUS                               = 0x1008,
+	MALI200_REG_ADDR_MGMT_CTRL_MGMT                            = 0x100c,
+
+	MALI200_REG_ADDR_MGMT_INT_RAWSTAT                          = 0x1020,
+	MALI200_REG_ADDR_MGMT_INT_CLEAR                            = 0x1024,
+	MALI200_REG_ADDR_MGMT_INT_MASK                             = 0x1028,
+	MALI200_REG_ADDR_MGMT_INT_STATUS                           = 0x102c,
+
+	MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS                     = 0x1050,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE                    = 0x1080,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC                       = 0x1084,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT                     = 0x1088,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE                     = 0x108c,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE                    = 0x10a0,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC                       = 0x10a4,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE                     = 0x10ac,
+
+	MALI200_REG_ADDR_MGMT_PERFMON_CONTR                        = 0x10b0,
+	MALI200_REG_ADDR_MGMT_PERFMON_BASE                         = 0x10b4,
+
+	MALI200_REG_SIZEOF_REGISTER_BANK                           = 0x10f0
+
+};
+
+#define MALI200_REG_VAL_PERF_CNT_ENABLE 1
+
+enum mali200_mgmt_ctrl_mgmt {
+	MALI200_REG_VAL_CTRL_MGMT_STOP_BUS         = (1 << 0),
+	MALI200_REG_VAL_CTRL_MGMT_FLUSH_CACHES     = (1 << 3),
+	MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET      = (1 << 5),
+	MALI200_REG_VAL_CTRL_MGMT_START_RENDERING  = (1 << 6),
+	MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET     = (1 << 7), /* Only valid for Mali-300 and later */
+};
+
+enum mali200_mgmt_irq {
+	MALI200_REG_VAL_IRQ_END_OF_FRAME          = (1 << 0),
+	MALI200_REG_VAL_IRQ_END_OF_TILE           = (1 << 1),
+	MALI200_REG_VAL_IRQ_HANG                  = (1 << 2),
+	MALI200_REG_VAL_IRQ_FORCE_HANG            = (1 << 3),
+	MALI200_REG_VAL_IRQ_BUS_ERROR             = (1 << 4),
+	MALI200_REG_VAL_IRQ_BUS_STOP              = (1 << 5),
+	MALI200_REG_VAL_IRQ_CNT_0_LIMIT           = (1 << 6),
+	MALI200_REG_VAL_IRQ_CNT_1_LIMIT           = (1 << 7),
+	MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR  = (1 << 8),
+	MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND = (1 << 9),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW  = (1 << 10),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW   = (1 << 11),
+	MALI400PP_REG_VAL_IRQ_RESET_COMPLETED       = (1 << 12),
+};
+
+#define MALI200_REG_VAL_IRQ_MASK_ALL  ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_END_OF_TILE                            |\
+				       MALI200_REG_VAL_IRQ_HANG                                   |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_BUS_STOP                               |\
+				       MALI200_REG_VAL_IRQ_CNT_0_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_CNT_1_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW                    |\
+				       MALI400PP_REG_VAL_IRQ_RESET_COMPLETED))
+
+#define MALI200_REG_VAL_IRQ_MASK_USED ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW))
+
+#define MALI200_REG_VAL_IRQ_MASK_NONE ((enum mali200_mgmt_irq)(0))
+
+enum mali200_mgmt_status {
+	MALI200_REG_VAL_STATUS_RENDERING_ACTIVE     = (1 << 0),
+	MALI200_REG_VAL_STATUS_BUS_STOPPED          = (1 << 4),
+};
+
+enum mali200_render_unit {
+	MALI200_REG_ADDR_FRAME = 0x0000,
+	MALI200_REG_ADDR_RSW   = 0x0004,
+	MALI200_REG_ADDR_STACK = 0x0030,
+	MALI200_REG_ADDR_STACK_SIZE = 0x0034,
+	MALI200_REG_ADDR_ORIGIN_OFFSET_X  = 0x0040
+};
+
+enum mali200_wb_unit {
+	MALI200_REG_ADDR_WB0 = 0x0100,
+	MALI200_REG_ADDR_WB1 = 0x0200,
+	MALI200_REG_ADDR_WB2 = 0x0300
+};
+
+enum mali200_wb_unit_regs {
+	MALI200_REG_ADDR_WB_SOURCE_SELECT = 0x0000,
+	MALI200_REG_ADDR_WB_SOURCE_ADDR   = 0x0004,
+};
+
+/* This should be in the top 16 bit of the version register of Mali PP */
+#define MALI200_PP_PRODUCT_ID 0xC807
+#define MALI300_PP_PRODUCT_ID 0xCE07
+#define MALI400_PP_PRODUCT_ID 0xCD07
+#define MALI450_PP_PRODUCT_ID 0xCF07
+#define MALI470_PP_PRODUCT_ID 0xCF08
+
+
+
+#endif /* _MALI200_REGS_H_ */
diff --git a/utgard/r6p1/regs/mali_gp_regs.h b/utgard/r6p1/regs/mali_gp_regs.h
new file mode 100755
index 0000000..435953d
--- /dev/null
+++ b/utgard/r6p1/regs/mali_gp_regs.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2010, 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALIGP2_CONROL_REGS_H_
+#define _MALIGP2_CONROL_REGS_H_
+
+/**
+ * These are the different geometry processor control registers.
+ * Their usage is to control and monitor the operation of the
+ * Vertex Shader and the Polygon List Builder in the geometry processor.
+ * Addresses are in 32-bit word relative sizes.
+ * @see [P0081] "Geometry Processor Data Structures" for details
+ */
+
+typedef enum {
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR           = 0x00,
+	MALIGP2_REG_ADDR_MGMT_VSCL_END_ADDR             = 0x04,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_START_ADDR         = 0x08,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_END_ADDR           = 0x0c,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR     = 0x10,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR       = 0x14,
+	MALIGP2_REG_ADDR_MGMT_CMD                       = 0x20,
+	MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT               = 0x24,
+	MALIGP2_REG_ADDR_MGMT_INT_CLEAR                 = 0x28,
+	MALIGP2_REG_ADDR_MGMT_INT_MASK                  = 0x2C,
+	MALIGP2_REG_ADDR_MGMT_INT_STAT                  = 0x30,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE         = 0x3C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE         = 0x40,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC            = 0x44,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC            = 0x48,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE          = 0x4C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE          = 0x50,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT          = 0x54,
+	MALIGP2_REG_ADDR_MGMT_STATUS                    = 0x68,
+	MALIGP2_REG_ADDR_MGMT_VERSION                   = 0x6C,
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR_READ      = 0x80,
+	MALIGP2_REG_ADDR_MGMT_PLBCL_START_ADDR_READ     = 0x84,
+	MALIGP2_CONTR_AXI_BUS_ERROR_STAT                = 0x94,
+	MALIGP2_REGISTER_ADDRESS_SPACE_SIZE             = 0x98,
+} maligp_reg_addr_mgmt_addr;
+
+#define MALIGP2_REG_VAL_PERF_CNT_ENABLE 1
+
+/**
+ * Commands to geometry processor.
+ *  @see MALIGP2_CTRL_REG_CMD
+ */
+typedef enum {
+	MALIGP2_REG_VAL_CMD_START_VS                    = (1 << 0),
+	MALIGP2_REG_VAL_CMD_START_PLBU                  = (1 << 1),
+	MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC   = (1 << 4),
+	MALIGP2_REG_VAL_CMD_RESET                               = (1 << 5),
+	MALIGP2_REG_VAL_CMD_FORCE_HANG                  = (1 << 6),
+	MALIGP2_REG_VAL_CMD_STOP_BUS                    = (1 << 9),
+	MALI400GP_REG_VAL_CMD_SOFT_RESET                = (1 << 10), /* only valid for Mali-300 and later */
+} mgp_contr_reg_val_cmd;
+
+
+/**  @defgroup MALIGP2_IRQ
+ * Interrupt status of geometry processor.
+ *  @see MALIGP2_CTRL_REG_INT_RAWSTAT, MALIGP2_REG_ADDR_MGMT_INT_CLEAR,
+ *       MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_ADDR_MGMT_INT_STAT
+ * @{
+ */
+#define MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      (1 << 0)
+#define MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    (1 << 1)
+#define MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     (1 << 2)
+#define MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          (1 << 3)
+#define MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        (1 << 4)
+#define MALIGP2_REG_VAL_IRQ_HANG                (1 << 5)
+#define MALIGP2_REG_VAL_IRQ_FORCE_HANG          (1 << 6)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    (1 << 7)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    (1 << 8)
+#define MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     (1 << 9)
+#define MALIGP2_REG_VAL_IRQ_SYNC_ERROR          (1 << 10)
+#define MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       (1 << 11)
+#define MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     (1 << 12)
+#define MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      (1 << 13)
+#define MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     (1 << 14)
+#define MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     (1 << 19)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW (1 << 20)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  (1 << 21)
+#define MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  (1 << 22)
+
+/* Mask defining all IRQs in Mali GP */
+#define MALIGP2_REG_VAL_IRQ_MASK_ALL \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        | \
+	 MALIGP2_REG_VAL_IRQ_HANG                | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining the IRQs in Mali GP which we use */
+#define MALIGP2_REG_VAL_IRQ_MASK_USED \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining non IRQs on MaliGP2*/
+#define MALIGP2_REG_VAL_IRQ_MASK_NONE 0
+
+/** }@ defgroup MALIGP2_IRQ*/
+
+/** @defgroup MALIGP2_STATUS
+ * The different Status values to the geometry processor.
+ *  @see MALIGP2_CTRL_REG_STATUS
+ * @{
+ */
+#define MALIGP2_REG_VAL_STATUS_VS_ACTIVE         0x0002
+#define MALIGP2_REG_VAL_STATUS_BUS_STOPPED       0x0004
+#define MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE       0x0008
+#define MALIGP2_REG_VAL_STATUS_BUS_ERROR         0x0040
+#define MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR   0x0100
+/** }@ defgroup MALIGP2_STATUS*/
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ACTIVE (\
+		MALIGP2_REG_VAL_STATUS_VS_ACTIVE|\
+		MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE)
+
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ERROR (\
+		MALIGP2_REG_VAL_STATUS_BUS_ERROR |\
+		MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR )
+
+/* This should be in the top 16 bit of the version register of gp.*/
+#define MALI200_GP_PRODUCT_ID 0xA07
+#define MALI300_GP_PRODUCT_ID 0xC07
+#define MALI400_GP_PRODUCT_ID 0xB07
+#define MALI450_GP_PRODUCT_ID 0xD07
+
+/**
+ * The different sources for instrumented on the geometry processor.
+ *  @see MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC
+ */
+
+enum MALIGP2_cont_reg_perf_cnt_src {
+	MALIGP2_REG_VAL_PERF_CNT1_SRC_NUMBER_OF_VERTICES_PROCESSED = 0x0a,
+};
+
+#endif
diff --git a/utgard/r6p1/timestamp-arm11-cc/mali_timestamp.c b/utgard/r6p1/timestamp-arm11-cc/mali_timestamp.c
new file mode 100755
index 0000000..f7f7baf
--- /dev/null
+++ b/utgard/r6p1/timestamp-arm11-cc/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/utgard/r6p1/timestamp-arm11-cc/mali_timestamp.h b/utgard/r6p1/timestamp-arm11-cc/mali_timestamp.h
new file mode 100755
index 0000000..757f643
--- /dev/null
+++ b/utgard/r6p1/timestamp-arm11-cc/mali_timestamp.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	/*
+	 * reset counters and overflow flags
+	 */
+
+	u32 mask = (1 << 0) | /* enable all three counters */
+		   (0 << 1) | /* reset both Count Registers to 0x0 */
+		   (1 << 2) | /* reset the Cycle Counter Register to 0x0 */
+		   (0 << 3) | /* 1 = Cycle Counter Register counts every 64th processor clock cycle */
+		   (0 << 4) | /* Count Register 0 interrupt enable */
+		   (0 << 5) | /* Count Register 1 interrupt enable */
+		   (0 << 6) | /* Cycle Counter interrupt enable */
+		   (0 << 8) | /* Count Register 0 overflow flag (clear or write, flag on read) */
+		   (0 << 9) | /* Count Register 1 overflow flag (clear or write, flag on read) */
+		   (1 << 10); /* Cycle Counter Register overflow flag (clear or write, flag on read) */
+
+	__asm__ __volatile__("MCR    p15, 0, %0, c15, c12, 0" : : "r"(mask));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	u32 result;
+
+	/* this is for the clock cycles */
+	__asm__ __volatile__("MRC    p15, 0, %0, c15, c12, 1" : "=r"(result));
+
+	return (u64)result;
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/utgard/r6p1/timestamp-default/mali_timestamp.c b/utgard/r6p1/timestamp-default/mali_timestamp.c
new file mode 100755
index 0000000..f7f7baf
--- /dev/null
+++ b/utgard/r6p1/timestamp-default/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/utgard/r6p1/timestamp-default/mali_timestamp.h b/utgard/r6p1/timestamp-default/mali_timestamp.h
new file mode 100755
index 0000000..21700fe
--- /dev/null
+++ b/utgard/r6p1/timestamp-default/mali_timestamp.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	return _mali_osk_boot_time_get_ns();
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/utgard/r6p2/.version b/utgard/r6p2/.version
new file mode 100644
index 0000000..98aa613
--- /dev/null
+++ b/utgard/r6p2/.version
@@ -0,0 +1 @@
+r6p2-01rel0
diff --git a/utgard/r6p2/Kbuild b/utgard/r6p2/Kbuild
new file mode 100755
index 0000000..66b14af
--- /dev/null
+++ b/utgard/r6p2/Kbuild
@@ -0,0 +1,258 @@
+#
+# Copyright (C) 2010-2011 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+# This file is called by the Linux build system.
+
+# set up defaults if not defined by the user
+include $(src)/platform/Kbuild.amlogic
+
+TIMESTAMP ?= default
+OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB ?= 16
+USING_GPU_UTILIZATION ?= 0
+PROFILING_SKIP_PP_JOBS ?= 0
+PROFILING_SKIP_PP_AND_GP_JOBS ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP ?= 0
+MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS ?= 0
+MALI_UPPER_HALF_SCHEDULING ?= 1
+MALI_ENABLE_CPU_CYCLES ?= 0
+
+# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases:
+# The ARM proprietary product will only include the license/proprietary directory
+# The GPL product will only include the license/gpl directory
+ifeq ($(wildcard $(src)/linux/license/gpl/*),)
+    ccflags-y += -I$(src)/linux/license/proprietary
+    ifeq ($(CONFIG_MALI400_PROFILING),y)
+        $(error Profiling is incompatible with non-GPL license)
+    endif
+    ifeq ($(CONFIG_PM_RUNTIME),y)
+        $(error Runtime PM is incompatible with non-GPL license)
+    endif
+    ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
+        $(error DMA-BUF is incompatible with non-GPL license)
+    endif
+    $(error Linux Device integration is incompatible with non-GPL license)
+else
+    ccflags-y += -I$(src)/linux/license/gpl
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+ifeq ($(MALI_PLATFORM_FILES),)
+ifeq ($(CONFIG_ARCH_EXYNOS4),y)
+EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+export MALI_PLATFORM=exynos4
+export MALI_PLATFORM_FILES_BUILDIN = $(notdir $(wildcard $(src)/platform/$(MALI_PLATFORM)/*.c))
+export MALI_PLATFORM_FILES_ADD_PREFIX = $(addprefix platform/$(MALI_PLATFORM)/,$(MALI_PLATFORM_FILES_BUILDIN)) 
+endif
+endif
+
+mali-y += \
+	linux/mali_osk_atomics.o \
+	linux/mali_osk_irq.o \
+	linux/mali_osk_wq.o \
+	linux/mali_osk_locks.o \
+	linux/mali_osk_wait_queue.o \
+	linux/mali_osk_low_level_mem.o \
+	linux/mali_osk_math.o \
+	linux/mali_osk_memory.o \
+	linux/mali_osk_misc.o \
+	linux/mali_osk_mali.o \
+	linux/mali_osk_notification.o \
+	linux/mali_osk_time.o \
+	linux/mali_osk_timers.o \
+	linux/mali_osk_bitmap.o
+
+mali-y += linux/mali_memory.o linux/mali_memory_os_alloc.o
+mali-y += linux/mali_memory_external.o
+mali-y += linux/mali_memory_block_alloc.o
+mali-y += linux/mali_memory_swap_alloc.o
+
+mali-y += \
+	linux/mali_memory_manager.o \
+	linux/mali_memory_virtual.o \
+	linux/mali_memory_util.o \
+	linux/mali_memory_cow.o \
+	linux/mali_memory_defer_bind.o
+
+mali-y += \
+	linux/mali_ukk_mem.o \
+	linux/mali_ukk_gp.o \
+	linux/mali_ukk_pp.o \
+	linux/mali_ukk_core.o \
+	linux/mali_ukk_soft_job.o \
+	linux/mali_ukk_timeline.o
+
+mali-$(CONFIG_MALI_DEVFREQ) += \
+	linux/mali_devfreq.o \
+	common/mali_pm_metrics.o
+
+# Source files which always are included in a build
+mali-y += \
+	common/mali_kernel_core.o \
+	linux/mali_kernel_linux.o \
+	common/mali_session.o \
+	linux/mali_device_pause_resume.o \
+	common/mali_kernel_vsync.o \
+	linux/mali_ukk_vsync.o \
+	linux/mali_kernel_sysfs.o \
+	common/mali_mmu.o \
+	common/mali_mmu_page_directory.o \
+	common/mali_mem_validation.o \
+	common/mali_hw_core.o \
+	common/mali_gp.o \
+	common/mali_pp.o \
+	common/mali_pp_job.o \
+	common/mali_gp_job.o \
+	common/mali_soft_job.o \
+	common/mali_scheduler.o \
+	common/mali_executor.o \
+	common/mali_group.o \
+	common/mali_dlbu.o \
+	common/mali_broadcast.o \
+	common/mali_pm.o \
+	common/mali_pmu.o \
+	common/mali_user_settings_db.o \
+	common/mali_kernel_utilization.o \
+	common/mali_control_timer.o \
+	common/mali_l2_cache.o \
+	common/mali_timeline.o \
+	common/mali_timeline_fence_wait.o \
+	common/mali_timeline_sync_fence.o \
+	common/mali_spinlock_reentrant.o \
+	common/mali_pm_domain.o \
+	linux/mali_osk_pm.o \
+	linux/mali_pmu_power_up_down.o \
+	__malidrv_build_info.o
+
+ifneq ($(wildcard $(src)/linux/mali_slp_global_lock.c),)
+	mali-y += linux/mali_slp_global_lock.o
+endif
+
+ifneq ($(MALI_PLATFORM_FILES),)
+	mali-y += $(MALI_PLATFORM_FILES:.c=.o)
+endif
+
+ifneq ($(MALI_PLATFORM_FILES_ADD_PREFIX),)
+	mali-y += $(MALI_PLATFORM_FILES_ADD_PREFIX:.c=.o)
+endif
+
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_ukk_profiling.o
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_osk_profiling.o
+
+mali-$(CONFIG_MALI400_INTERNAL_PROFILING) += linux/mali_profiling_internal.o timestamp-$(TIMESTAMP)/mali_timestamp.o
+ccflags-$(CONFIG_MALI400_INTERNAL_PROFILING) += -I$(src)/timestamp-$(TIMESTAMP)
+
+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_dma_buf.o
+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_secure.o
+mali-$(CONFIG_SYNC) += linux/mali_sync.o
+mali-$(CONFIG_MALI_DMA_BUF_FENCE) += linux/mali_dma_fence.o
+ccflags-$(CONFIG_SYNC) += -Idrivers/staging/android
+
+mali-$(CONFIG_MALI400_UMP) += linux/mali_memory_ump.o
+
+mali-$(CONFIG_MALI_DVFS) += common/mali_dvfs_policy.o
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI400) := mali.o
+
+ccflags-y += $(EXTRA_DEFINES)
+
+# Set up our defines, which will be passed to gcc
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP)
+ccflags-y += -DMALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED=$(MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED)
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS)
+ifdef CONFIG_MALI400_DEBUG
+ccflags-y += -DMALI_STATE_TRACKING=1
+else
+ccflags-y += -DMALI_STATE_TRACKING=0
+endif
+ccflags-y += -DMALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+ccflags-y += -DUSING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+ccflags-y += -DMALI_ENABLE_CPU_CYCLES=$(MALI_ENABLE_CPU_CYCLES)
+
+ifeq ($(MALI_UPPER_HALF_SCHEDULING),1)
+	ccflags-y += -DMALI_UPPER_HALF_SCHEDULING
+endif
+
+#build-in include path is different
+ifeq ($(MALI_PLATFORM_FILES),)
+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../ump/include/
+else
+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../../ump/include/ump
+endif
+ccflags-$(CONFIG_MALI400_DEBUG) += -DDEBUG
+
+# Use our defines when compiling
+ccflags-y += -I$(src) -I$(src)/include -I$(src)/common -I$(src)/linux -I$(src)/platform
+
+# Get subversion revision number, fall back to only ${MALI_RELEASE_NAME} if no svn info is available
+MALI_RELEASE_NAME=$(shell cat $(src)/.version 2> /dev/null)
+
+SVN_INFO = (cd $(src); svn info 2>/dev/null)
+
+ifneq ($(shell $(SVN_INFO) 2>/dev/null),)
+# SVN detected
+SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null)
+DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV)
+CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-)
+CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-)
+REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-)
+
+else # SVN
+GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null)
+ifneq ($(GIT_REV),)
+# Git detected
+DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV)
+CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci")
+CHANGED_REVISION := $(GIT_REV)
+REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null)
+
+else # Git
+# No Git or SVN detected
+DRIVER_REV := $(MALI_RELEASE_NAME)
+CHANGE_DATE := $(MALI_RELEASE_NAME)
+CHANGED_REVISION := $(MALI_RELEASE_NAME)
+endif
+endif
+
+ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\"
+
+VERSION_STRINGS :=
+VERSION_STRINGS += API_VERSION=$(shell cd $(src); grep "\#define _MALI_API_VERSION" $(FILES_PREFIX)include/linux/mali/mali_utgard_uk_types.h | cut -d' ' -f 3 )
+VERSION_STRINGS += REPO_URL=$(REPO_URL)
+VERSION_STRINGS += REVISION=$(DRIVER_REV)
+VERSION_STRINGS += CHANGED_REVISION=$(CHANGED_REVISION)
+VERSION_STRINGS += CHANGE_DATE=$(CHANGE_DATE)
+VERSION_STRINGS += BUILD_DATE=$(shell date)
+ifdef CONFIG_MALI400_DEBUG
+VERSION_STRINGS += BUILD=debug
+else
+VERSION_STRINGS += BUILD=release
+endif
+VERSION_STRINGS += TARGET_PLATFORM=$(TARGET_PLATFORM)
+VERSION_STRINGS += MALI_PLATFORM=$(MALI_PLATFORM)
+VERSION_STRINGS += KDIR=$(KDIR)
+VERSION_STRINGS += OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+VERSION_STRINGS += USING_UMP=$(CONFIG_MALI400_UMP)
+VERSION_STRINGS += USING_PROFILING=$(CONFIG_MALI400_PROFILING)
+VERSION_STRINGS += USING_INTERNAL_PROFILING=$(CONFIG_MALI400_INTERNAL_PROFILING)
+VERSION_STRINGS += USING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+VERSION_STRINGS += USING_DVFS=$(CONFIG_MALI_DVFS)
+VERSION_STRINGS += USING_DMA_BUF_FENCE = $(CONFIG_MALI_DMA_BUF_FENCE)
+VERSION_STRINGS += MALI_UPPER_HALF_SCHEDULING=$(MALI_UPPER_HALF_SCHEDULING)
+
+# Create file with Mali driver configuration
+$(src)/__malidrv_build_info.c:
+	@echo 'const char *__malidrv_build_info(void) { return "malidrv: $(VERSION_STRINGS)";}' > $(src)/__malidrv_build_info.c
diff --git a/utgard/r6p2/Kconfig b/utgard/r6p2/Kconfig
new file mode 100755
index 0000000..fbf08de
--- /dev/null
+++ b/utgard/r6p2/Kconfig
@@ -0,0 +1,136 @@
+menu "Mali GPU OpenGL device driver"
+config MALI400
+	tristate "Mali-300/400/450 support"
+	depends on ARM || ARM64
+	default m
+	select DMA_SHARED_BUFFER
+	---help---
+	  This enables support for the ARM Mali-300, Mali-400, and Mali-450
+	  GPUs.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called mali.
+
+config MALI470
+	bool "Enable Mali-470 support"
+	depends on MALI400
+	---help---
+	  This enables support for Mali-470 specific features.
+
+config MALI400_DEBUG
+	bool "Enable debug in Mali driver"
+	depends on MALI400
+	default n
+	---help---
+	  This enabled extra debug checks and messages in the Mali driver.
+
+config MALI400_PROFILING_EXTRA_SUPPORT
+	bool "Select other items in kernel to support PROFILING."
+	depends on MALI400_PROFILING
+	select PROFILING
+	select FTRACE
+	select PERF_EVENTS
+	select ENABLE_DEFAULT_TRACERS
+	select DEBUG_MUTEXES
+	select HIGH_RES_TIMERS
+	select HW_PERF_EVENTS
+	select CPU_FREQ
+	select MALI400_DEBUG
+
+config MALI400_PROFILING
+	bool "Enable Mali profiling"
+	depends on MALI400
+	select TRACEPOINTS
+	default n
+	---help---
+	  This enables gator profiling of Mali GPU events.
+
+config MALI400_INTERNAL_PROFILING
+	bool "Enable internal Mali profiling API"
+	depends on MALI400_PROFILING
+	default n
+	---help---
+	  This enables the internal legacy Mali profiling API.
+
+config MALI400_UMP
+	bool "Enable UMP support"
+	depends on MALI400
+	---help---
+	  This enables support for the UMP memory sharing API in the Mali driver.
+
+config MALI_DVFS
+	bool "Enable Mali dynamically frequency change"
+	depends on MALI400 && !MALI_DEVFREQ
+	default n
+	---help---
+	  This enables support for dynamic change frequency of Mali with the goal of lowering power consumption.
+
+config MALI_DMA_BUF_MAP_ON_ATTACH
+	bool "Map dma-buf attachments on attach"
+	depends on MALI400 && DMA_SHARED_BUFFER
+	default y
+	---help---
+	  This makes the Mali driver map dma-buf attachments after doing
+	  attach. If this is not set the dma-buf attachments will be mapped for
+	  every time the GPU need to access the buffer.
+
+	  Mapping for each access can cause lower performance.
+
+config MALI_SHARED_INTERRUPTS
+	bool "Support for shared interrupts"
+	depends on MALI400
+	default n
+	---help---
+	  Adds functionality required to properly support shared interrupts.  Without this support,
+	  the device driver will fail during insmod if it detects shared interrupts.  This also
+	  works when the GPU is not using shared interrupts, but might have a slight performance
+	  impact.
+
+if ARCH_MESON6
+config	MESON6_GPU_EXTRA
+	bool "M6 fix"
+	depends on MALI400
+	default y
+	select MALI_SHARED_INTERRUPTS
+endif
+
+config MALI_PMU_PARALLEL_POWER_UP
+	bool "Power up Mali PMU domains in parallel"
+	depends on MALI400
+	default n
+	---help---
+	  This makes the Mali driver power up all PMU power domains in parallel, instead of
+	  powering up domains one by one, with a slight delay in between. Powering on all power
+	  domains at the same time may cause peak currents higher than what some systems can handle.
+	  These systems must not enable this option.
+
+config MALI_DT
+	bool "Using device tree to initialize module"
+	depends on MALI400 && OF
+	default n
+	---help---
+	  This enable the Mali driver to choose the device tree path to get platform resoures
+	  and disable the old config method. Mali driver could run on the platform which the
+	  device tree is enabled in kernel and corresponding hardware description is implemented
+	  properly in device DTS file.
+
+config MALI_DEVFREQ
+	bool "Using devfreq to tuning frequency"
+	depends on MALI400 && PM_DEVFREQ
+	default n
+	---help---
+	Support devfreq for Mali.
+
+	Using the devfreq framework and, by default, the simpleondemand
+	governor, the frequency of Mali will be dynamically selected from the
+	available OPPs.
+
+config MALI_QUIET
+	bool "Make Mali driver very quiet"
+	depends on MALI400 && !MALI400_DEBUG
+	default n
+	---help---
+	  This forces the Mali driver to never print any messages.
+
+	  If unsure, say N.
+endmenu
diff --git a/utgard/r6p2/Makefile b/utgard/r6p2/Makefile
new file mode 100755
index 0000000..0a293e0
--- /dev/null
+++ b/utgard/r6p2/Makefile
@@ -0,0 +1,207 @@
+#
+# Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+USE_UMPV2=0
+USING_PROFILING ?= 1
+USING_INTERNAL_PROFILING ?= 0
+USING_DVFS ?= 1
+USING_DMA_BUF_FENCE ?= 0
+MALI_HEATMAPS_ENABLED ?= 0
+MALI_DMA_BUF_MAP_ON_ATTACH ?= 1
+MALI_PMU_PARALLEL_POWER_UP ?= 0
+USING_DT ?= 0
+MALI_MEM_SWAP_TRACKING ?= 0
+USING_DEVFREQ ?= 0
+
+# The Makefile sets up "arch" based on the CONFIG, creates the version info
+# string and the __malidrv_build_info.c file, and then call the Linux build
+# system to actually build the driver. After that point the Kbuild file takes
+# over.
+
+# set up defaults if not defined by the user
+ARCH ?= arm
+
+OSKOS=linux
+FILES_PREFIX=
+
+check_cc2 = \
+	$(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \
+	then \
+		echo "$(2)"; \
+	else \
+		echo "$(3)"; \
+	fi ;)
+
+# This conditional makefile exports the global definition ARM_INTERNAL_BUILD. Customer releases will not include arm_internal.mak
+-include ../../../arm_internal.mak
+
+# Give warning of old config parameters are used
+ifneq ($(CONFIG),)
+$(warning "You have specified the CONFIG variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+ifneq ($(CPU),)
+$(warning "You have specified the CPU variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+# Include the mapping between TARGET_PLATFORM and KDIR + MALI_PLATFORM
+-include MALI_CONFIGURATION
+export KDIR ?= $(KDIR-$(TARGET_PLATFORM))
+export MALI_PLATFORM ?= $(MALI_PLATFORM-$(TARGET_PLATFORM))
+
+ifneq ($(TARGET_PLATFORM),)
+ifeq ($(MALI_PLATFORM),)
+$(error "Invalid TARGET_PLATFORM: $(TARGET_PLATFORM)")
+endif
+endif
+
+# validate lookup result
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(TARGET_PLATFORM))
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+ifeq ($(USING_UMP),1)
+export CONFIG_MALI400_UMP=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_UMP=1
+ifeq ($(USE_UMPV2),1)
+UMP_SYMVERS_FILE ?= ../umpv2/Module.symvers
+else
+UMP_SYMVERS_FILE ?= ../ump/Module.symvers
+endif
+KBUILD_EXTRA_SYMBOLS = $(realpath $(UMP_SYMVERS_FILE))
+$(warning $(KBUILD_EXTRA_SYMBOLS))
+endif
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+include $(KDIR)/.config
+
+ifeq ($(ARCH), arm)
+# when compiling for ARM we're cross compiling
+export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-)
+endif
+
+# report detected/selected settings
+ifdef ARM_INTERNAL_BUILD
+$(warning TARGET_PLATFORM $(TARGET_PLATFORM))
+$(warning KDIR $(KDIR))
+$(warning MALI_PLATFORM $(MALI_PLATFORM))
+endif
+
+# Set up build config
+export CONFIG_MALI400=m
+export CONFIG_MALI450=y
+export CONFIG_MALI470=y
+
+export EXTRA_DEFINES += -DCONFIG_MALI400=1
+export EXTRA_DEFINES += -DCONFIG_MALI450=1
+export EXTRA_DEFINES += -DCONFIG_MALI470=1
+
+ifneq ($(MALI_PLATFORM),)
+export EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+export MALI_PLATFORM_FILES = $(wildcard platform/$(MALI_PLATFORM)/*.c)
+endif
+
+ifeq ($(USING_PROFILING),1)
+ifeq ($(CONFIG_TRACEPOINTS),)
+$(warning CONFIG_TRACEPOINTS required for profiling)
+else
+export CONFIG_MALI400_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_PROFILING=1
+ifeq ($(USING_INTERNAL_PROFILING),1)
+export CONFIG_MALI400_INTERNAL_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_INTERNAL_PROFILING=1
+endif
+ifeq ($(MALI_HEATMAPS_ENABLED),1)
+export MALI_HEATMAPS_ENABLED=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_HEATMAPS_ENABLED
+endif
+endif
+endif
+
+ifeq ($(MALI_DMA_BUF_MAP_ON_ATTACH),1)
+export CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_MAP_ON_ATTACH
+endif
+
+ifeq ($(MALI_SHARED_INTERRUPTS),1)
+export CONFIG_MALI_SHARED_INTERRUPTS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_SHARED_INTERRUPTS
+endif
+
+ifeq ($(USING_DVFS),1)
+	xxxyyyy
+export CONFIG_MALI_DVFS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DVFS
+endif
+
+ifeq ($(USING_DMA_BUF_FENCE),1)
+export CONFIG_MALI_DMA_BUF_FENCE=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_FENCE
+endif
+
+ifeq ($(MALI_PMU_PARALLEL_POWER_UP),1)
+export CONFIG_MALI_PMU_PARALLEL_POWER_UP=y
+export EXTRA_DEFINES += -DCONFIG_MALI_PMU_PARALLEL_POWER_UP
+endif
+
+ifdef CONFIG_OF
+ifeq ($(USING_DT),1)
+export CONFIG_MALI_DT=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DT
+endif
+endif
+
+ifeq ($(USING_DEVFREQ), 1)
+ifdef CONFIG_PM_DEVFREQ
+export CONFIG_MALI_DEVFREQ=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DEVFREQ=1
+else
+$(warning "You want to support DEVFREQ but kernel didn't support DEVFREQ.")
+endif
+endif
+
+ifneq ($(BUILD),release)
+# Debug
+export CONFIG_MALI400_DEBUG=y
+else
+# Release
+ifeq ($(MALI_QUIET),1)
+export CONFIG_MALI_QUIET=y
+export EXTRA_DEFINES += -DCONFIG_MALI_QUIET
+endif
+endif
+
+ifeq ($(MALI_SKIP_JOBS),1)
+EXTRA_DEFINES += -DPROFILING_SKIP_PP_JOBS=1 -DPROFILING_SKIP_GP_JOBS=1
+endif
+
+ifeq ($(MALI_MEM_SWAP_TRACKING),1)
+EXTRA_DEFINES += -DMALI_MEM_SWAP_TRACKING=1
+endif
+
+all: $(UMP_SYMVERS_FILE)
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules
+	@rm $(FILES_PREFIX)__malidrv_build_info.c $(FILES_PREFIX)__malidrv_build_info.o
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+kernelrelease:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) kernelrelease
+
+export CONFIG KBUILD_EXTRA_SYMBOLS
diff --git a/utgard/r6p2/clean.sh b/utgard/r6p2/clean.sh
new file mode 100755
index 0000000..130fd73
--- /dev/null
+++ b/utgard/r6p2/clean.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# rm *.o, *.cmd, *~
+find . -name "*.o" -o -name "*.cmd" -o -name "*~" | xargs rm -rf
+rm	Module.symvers
+rm	mali.ko
+rm	mali.mod.c
+rm	modules.order
diff --git a/utgard/r6p2/common/mali_broadcast.c b/utgard/r6p2/common/mali_broadcast.c
new file mode 100755
index 0000000..4c4b2bc
--- /dev/null
+++ b/utgard/r6p2/common/mali_broadcast.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_broadcast.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#define MALI_BROADCAST_REGISTER_SIZE      0x1000
+#define MALI_BROADCAST_REG_BROADCAST_MASK    0x0
+#define MALI_BROADCAST_REG_INTERRUPT_MASK    0x4
+
+struct mali_bcast_unit {
+	struct mali_hw_core hw_core;
+	u32 current_mask;
+};
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_bcast_unit *bcast_unit = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+	MALI_DEBUG_PRINT(2, ("Broadcast: Creating Mali Broadcast unit: %s\n",
+			     resource->description));
+
+	bcast_unit = _mali_osk_malloc(sizeof(struct mali_bcast_unit));
+	if (NULL == bcast_unit) {
+		MALI_PRINT_ERROR(("Broadcast: Failed to allocate memory for Broadcast unit\n"));
+		return NULL;
+	}
+
+	if (_MALI_OSK_ERR_OK == mali_hw_core_create(&bcast_unit->hw_core,
+			resource, MALI_BROADCAST_REGISTER_SIZE)) {
+		bcast_unit->current_mask = 0;
+		mali_bcast_reset(bcast_unit);
+
+		return bcast_unit;
+	} else {
+		MALI_PRINT_ERROR(("Broadcast: Failed map broadcast unit\n"));
+	}
+
+	_mali_osk_free(bcast_unit);
+
+	return NULL;
+}
+
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	mali_hw_core_delete(&bcast_unit->hw_core);
+	_mali_osk_free(bcast_unit);
+}
+
+/* Call this function to add the @group's id into bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit,
+			  struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask |= (bcast_id); /* add PP core to broadcast */
+	broadcast_mask |= (bcast_id << 16); /* add MMU to broadcast */
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+/* Call this function to remove @group's id from bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit,
+			     struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask &= ~((bcast_id << 16) | bcast_id);
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4,
+			 ("Broadcast: setting mask 0x%08X + 0x%08X (reset)\n",
+			  bcast_unit->current_mask,
+			  bcast_unit->current_mask & 0xFF));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    bcast_unit->current_mask);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    bcast_unit->current_mask & 0xFF);
+}
+
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4, ("Broadcast: setting mask 0x0 + 0x0 (disable)\n"));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    0x0);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    0x0);
+}
diff --git a/utgard/r6p2/common/mali_broadcast.h b/utgard/r6p2/common/mali_broadcast.h
new file mode 100755
index 0000000..e12e8a2
--- /dev/null
+++ b/utgard/r6p2/common/mali_broadcast.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BROADCAST_H__
+#define __MALI_BROADCAST_H__
+
+/*
+ *  Interface for the broadcast unit on Mali-450.
+ *
+ * - Represents up to 8 × (MMU + PP) pairs.
+ * - Supports dynamically changing which (MMU + PP) pairs receive the broadcast by
+ *   setting a mask.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_group.h"
+
+struct mali_bcast_unit;
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource);
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit);
+
+/* Add a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Remove a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Re-set cached mask. This needs to be called after having been suspended. */
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Disable broadcast unit
+ *
+ * mali_bcast_enable must be called to re-enable the unit. Cores may not be
+ * added or removed when the unit is disabled.
+ */
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Re-enable broadcast unit
+ *
+ * This resets the masks to include the cores present when mali_bcast_disable was called.
+ */
+MALI_STATIC_INLINE void mali_bcast_enable(struct mali_bcast_unit *bcast_unit)
+{
+	mali_bcast_reset(bcast_unit);
+}
+
+#endif /* __MALI_BROADCAST_H__ */
diff --git a/utgard/r6p2/common/mali_control_timer.c b/utgard/r6p2/common/mali_control_timer.c
new file mode 100755
index 0000000..fc6ceb4
--- /dev/null
+++ b/utgard/r6p2/common/mali_control_timer.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+static u64 period_start_time = 0;
+
+static _mali_osk_timer_t *mali_control_timer = NULL;
+static mali_bool timer_running = MALI_FALSE;
+
+static u32 mali_control_timeout = 1000;
+
+void mali_control_timer_add(u32 timeout)
+{
+	_mali_osk_timer_add(mali_control_timer, _mali_osk_time_mstoticks(timeout));
+}
+
+static void mali_control_timer_callback(void *arg)
+{
+	if (mali_utilization_enabled()) {
+		struct mali_gpu_utilization_data *util_data = NULL;
+		u64 time_period = 0;
+		mali_bool need_add_timer = MALI_TRUE;
+
+		/* Calculate gpu utilization */
+		util_data = mali_utilization_calculate(&period_start_time, &time_period, &need_add_timer);
+
+		if (util_data) {
+#if defined(CONFIG_MALI_DVFS)
+			mali_dvfs_policy_realize(util_data, time_period);
+#else
+			mali_utilization_platform_realize(util_data);
+#endif
+
+			if (MALI_TRUE == need_add_timer) {
+				mali_control_timer_add(mali_control_timeout);
+			}
+		}
+	}
+}
+
+/* Init a timer (for now it is used for GPU utilization and dvfs) */
+_mali_osk_errcode_t mali_control_timer_init(void)
+{
+	_mali_osk_device_data data;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Use device specific settings (if defined) */
+		if (0 != data.control_interval) {
+			mali_control_timeout = data.control_interval;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Timer: %u\n", mali_control_timeout));
+		}
+	}
+
+	mali_control_timer = _mali_osk_timer_init();
+	if (NULL == mali_control_timer) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	_mali_osk_timer_setcallback(mali_control_timer, mali_control_timer_callback, NULL);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_control_timer_term(void)
+{
+	if (NULL != mali_control_timer) {
+		_mali_osk_timer_del(mali_control_timer);
+		timer_running = MALI_FALSE;
+		_mali_osk_timer_term(mali_control_timer);
+		mali_control_timer = NULL;
+	}
+}
+
+mali_bool mali_control_timer_resume(u64 time_now)
+{
+	mali_utilization_data_assert_locked();
+
+	if (timer_running != MALI_TRUE) {
+		timer_running = MALI_TRUE;
+
+		period_start_time = time_now;
+
+		mali_utilization_reset();
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+void mali_control_timer_pause(void)
+{
+	mali_utilization_data_assert_locked();
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+	}
+}
+
+void mali_control_timer_suspend(mali_bool suspend)
+{
+	mali_utilization_data_lock();
+
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+
+		mali_utilization_data_unlock();
+
+		if (suspend == MALI_TRUE) {
+			_mali_osk_timer_del(mali_control_timer);
+			mali_utilization_reset();
+		}
+	} else {
+		mali_utilization_data_unlock();
+	}
+}
diff --git a/utgard/r6p2/common/mali_control_timer.h b/utgard/r6p2/common/mali_control_timer.h
new file mode 100755
index 0000000..1265390
--- /dev/null
+++ b/utgard/r6p2/common/mali_control_timer.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_CONTROL_TIMER_H__
+#define __MALI_CONTROL_TIMER_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_control_timer_init(void);
+
+void mali_control_timer_term(void);
+
+mali_bool mali_control_timer_resume(u64 time_now);
+
+void mali_control_timer_suspend(mali_bool suspend);
+void mali_control_timer_pause(void);
+
+void mali_control_timer_add(u32 timeout);
+
+#endif /* __MALI_CONTROL_TIMER_H__ */
+
diff --git a/utgard/r6p2/common/mali_dlbu.c b/utgard/r6p2/common/mali_dlbu.c
new file mode 100755
index 0000000..4f2a121
--- /dev/null
+++ b/utgard/r6p2/common/mali_dlbu.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_dlbu.h"
+#include "mali_memory.h"
+#include "mali_pp.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+/**
+ * Size of DLBU registers in bytes
+ */
+#define MALI_DLBU_SIZE 0x400
+
+mali_dma_addr mali_dlbu_phys_addr = 0;
+static mali_io_address mali_dlbu_cpu_addr = NULL;
+
+/**
+ * DLBU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_dlbu_register {
+	MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR = 0x0000, /**< Master tile list physical base address;
+                                                             31:12 Physical address to the page used for the DLBU
+                                                             0 DLBU enable - set this bit to 1 enables the AXI bus
+                                                             between PPs and L2s, setting to 0 disables the router and
+                                                             no further transactions are sent to DLBU */
+	MALI_DLBU_REGISTER_MASTER_TLLIST_VADDR     = 0x0004, /**< Master tile list virtual base address;
+                                                             31:12 Virtual address to the page used for the DLBU */
+	MALI_DLBU_REGISTER_TLLIST_VBASEADDR     = 0x0008, /**< Tile list virtual base address;
+                                                             31:12 Virtual address to the tile list. This address is used when
+                                                             calculating the call address sent to PP.*/
+	MALI_DLBU_REGISTER_FB_DIM                 = 0x000C, /**< Framebuffer dimension;
+                                                             23:16 Number of tiles in Y direction-1
+                                                             7:0 Number of tiles in X direction-1 */
+	MALI_DLBU_REGISTER_TLLIST_CONF       = 0x0010, /**< Tile list configuration;
+                                                             29:28 select the size of each allocated block: 0=128 bytes, 1=256, 2=512, 3=1024
+                                                             21:16 2^n number of tiles to be binned to one tile list in Y direction
+                                                             5:0 2^n number of tiles to be binned to one tile list in X direction */
+	MALI_DLBU_REGISTER_START_TILE_POS         = 0x0014, /**< Start tile positions;
+                                                             31:24 start position in Y direction for group 1
+                                                             23:16 start position in X direction for group 1
+                                                             15:8 start position in Y direction for group 0
+                                                             7:0 start position in X direction for group 0 */
+	MALI_DLBU_REGISTER_PP_ENABLE_MASK         = 0x0018, /**< PP enable mask;
+                                                             7 enable PP7 for load balancing
+                                                             6 enable PP6 for load balancing
+                                                             5 enable PP5 for load balancing
+                                                             4 enable PP4 for load balancing
+                                                             3 enable PP3 for load balancing
+                                                             2 enable PP2 for load balancing
+                                                             1 enable PP1 for load balancing
+                                                             0 enable PP0 for load balancing */
+} mali_dlbu_register;
+
+typedef enum {
+	PP0ENABLE = 0,
+	PP1ENABLE,
+	PP2ENABLE,
+	PP3ENABLE,
+	PP4ENABLE,
+	PP5ENABLE,
+	PP6ENABLE,
+	PP7ENABLE
+} mali_dlbu_pp_enable;
+
+struct mali_dlbu_core {
+	struct mali_hw_core     hw_core;           /**< Common for all HW cores */
+	u32                     pp_cores_mask;     /**< This is a mask for the PP cores whose operation will be controlled by LBU
+                                                      see MALI_DLBU_REGISTER_PP_ENABLE_MASK register */
+};
+
+_mali_osk_errcode_t mali_dlbu_initialize(void)
+{
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Initializing\n"));
+
+	if (_MALI_OSK_ERR_OK ==
+	    mali_mmu_get_table_page(&mali_dlbu_phys_addr,
+				    &mali_dlbu_cpu_addr)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_dlbu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: terminating\n"));
+
+	if (0 != mali_dlbu_phys_addr && 0 != mali_dlbu_cpu_addr) {
+		mali_mmu_release_table_page(mali_dlbu_phys_addr,
+					    mali_dlbu_cpu_addr);
+		mali_dlbu_phys_addr = 0;
+		mali_dlbu_cpu_addr = 0;
+	}
+}
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_dlbu_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Creating Mali dynamic load balancing unit: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_dlbu_core));
+	if (NULL != core) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI_DLBU_SIZE)) {
+			core->pp_cores_mask = 0;
+			if (_MALI_OSK_ERR_OK == mali_dlbu_reset(core)) {
+				return core;
+			}
+			MALI_PRINT_ERROR(("Failed to reset DLBU %s\n", core->hw_core.description));
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali DLBU: Failed to allocate memory for DLBU core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	mali_hw_core_delete(&dlbu->hw_core);
+	_mali_osk_free(dlbu);
+}
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu)
+{
+	u32 dlbu_registers[7];
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: mali_dlbu_reset: %s\n", dlbu->hw_core.description));
+
+	dlbu_registers[0] = mali_dlbu_phys_addr | 1; /* bit 0 enables the whole core */
+	dlbu_registers[1] = MALI_DLBU_VIRT_ADDR;
+	dlbu_registers[2] = 0;
+	dlbu_registers[3] = 0;
+	dlbu_registers[4] = 0;
+	dlbu_registers[5] = 0;
+	dlbu_registers[6] = dlbu->pp_cores_mask;
+
+	/* write reset values to core registers */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR, dlbu_registers, 7);
+
+	err = _MALI_OSK_ERR_OK;
+
+	return err;
+}
+
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	mali_hw_core_register_write(&dlbu->hw_core, MALI_DLBU_REGISTER_PP_ENABLE_MASK, dlbu->pp_cores_mask);
+}
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask |= bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Adding core[%d] New mask= 0x%02x\n", bcast_id , dlbu->pp_cores_mask));
+}
+
+/* Remove a group from the DLBU */
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask &= ~bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Removing core[%d] New mask= 0x%02x\n", bcast_id, dlbu->pp_cores_mask));
+}
+
+/* Configure the DLBU for \a job. This needs to be done before the job is started on the groups in the DLBU. */
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job)
+{
+	u32 *registers;
+	MALI_DEBUG_ASSERT(job);
+	registers = mali_pp_job_get_dlbu_registers(job);
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: Starting job\n"));
+
+	/* Writing 4 registers:
+	 * DLBU registers except the first two (written once at DLBU initialisation / reset) and the PP_ENABLE_MASK register */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_TLLIST_VBASEADDR, registers, 4);
+
+}
diff --git a/utgard/r6p2/common/mali_dlbu.h b/utgard/r6p2/common/mali_dlbu.h
new file mode 100755
index 0000000..c031f11
--- /dev/null
+++ b/utgard/r6p2/common/mali_dlbu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DLBU_H__
+#define __MALI_DLBU_H__
+
+#define MALI_DLBU_VIRT_ADDR 0xFFF00000 /* master tile virtual address fixed at this value and mapped into every session */
+
+#include "mali_osk.h"
+
+struct mali_pp_job;
+struct mali_group;
+struct mali_dlbu_core;
+
+extern mali_dma_addr mali_dlbu_phys_addr;
+
+_mali_osk_errcode_t mali_dlbu_initialize(void);
+void mali_dlbu_terminate(void);
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource);
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu);
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+
+/** @brief Called to update HW after DLBU state changed
+ *
+ * This function must be called after \a mali_dlbu_add_group or \a
+ * mali_dlbu_remove_group to write the updated mask to hardware, unless the
+ * same is accomplished by calling \a mali_dlbu_reset.
+ */
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job);
+
+#endif /* __MALI_DLBU_H__ */
diff --git a/utgard/r6p2/common/mali_dvfs_policy.c b/utgard/r6p2/common/mali_dvfs_policy.c
new file mode 100755
index 0000000..1094f9d
--- /dev/null
+++ b/utgard/r6p2/common/mali_dvfs_policy.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_dvfs_policy.h"
+#include "mali_osk_mali.h"
+#include "mali_osk_profiling.h"
+
+#define CLOCK_TUNING_TIME_DEBUG 0
+
+#define MAX_PERFORMANCE_VALUE 256
+#define MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(percent) ((int) ((percent)*(MAX_PERFORMANCE_VALUE)/100.0 + 0.5))
+
+/** The max fps the same as display vsync default 60, can set by module insert parameter */
+int mali_max_system_fps = 60;
+/** A lower limit on their desired FPS default 58, can set by module insert parameter */
+int mali_desired_fps = 58;
+
+static int mali_fps_step1 = 0;
+static int mali_fps_step2 = 0;
+
+static int clock_step = -1;
+static int cur_clk_step = -1;
+static struct mali_gpu_clock *gpu_clk = NULL;
+
+/*Function prototype */
+static int (*mali_gpu_set_freq)(int) = NULL;
+static int (*mali_gpu_get_freq)(void) = NULL;
+
+static mali_bool mali_dvfs_enabled = MALI_FALSE;
+
+#define NUMBER_OF_NANOSECONDS_PER_SECOND  1000000000ULL
+static u32 calculate_window_render_fps(u64 time_period)
+{
+	u32 max_window_number;
+	u64 tmp;
+	u64 max = time_period;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 time_period_shift;
+	u32 max_window_number_shift;
+	u32 ret_val;
+
+	max_window_number = mali_session_max_window_num();
+
+	/* To avoid float division, extend the dividend to ns unit */
+	tmp = (u64)max_window_number * NUMBER_OF_NANOSECONDS_PER_SECOND;
+	if (tmp > time_period) {
+		max = tmp;
+	}
+
+	/*
+	 * We may have 64-bit values, a dividend or a divisor or both
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 */
+	leading_zeroes = _mali_osk_clz((u32)(max >> 32));
+	shift_val = 32 - leading_zeroes;
+
+	time_period_shift = (u32)(time_period >> shift_val);
+	max_window_number_shift = (u32)(tmp >> shift_val);
+
+	ret_val = max_window_number_shift / time_period_shift;
+
+	return ret_val;
+}
+
+static bool mali_pickup_closest_avail_clock(int target_clock_mhz, mali_bool pick_clock_up)
+{
+	int i = 0;
+	bool clock_changed = false;
+
+	/* Round up the closest available frequency step for target_clock_hz */
+	for (i = 0; i < gpu_clk->num_of_steps; i++) {
+		/* Find the first item > target_clock_hz */
+		if (((int)(gpu_clk->item[i].clock) - target_clock_mhz) > 0) {
+			break;
+		}
+	}
+
+	/* If the target clock greater than the maximum clock just pick the maximum one*/
+	if (i == gpu_clk->num_of_steps) {
+		i = gpu_clk->num_of_steps - 1;
+	} else {
+		if ((!pick_clock_up) && (i > 0)) {
+			i = i - 1;
+		}
+	}
+
+	clock_step = i;
+	if (cur_clk_step != clock_step) {
+		clock_changed = true;
+	}
+
+	return clock_changed;
+}
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period)
+{
+	int under_perform_boundary_value = 0;
+	int over_perform_boundary_value = 0;
+	int current_fps = 0;
+	int current_gpu_util = 0;
+	bool clock_changed = false;
+#if CLOCK_TUNING_TIME_DEBUG
+	struct timeval start;
+	struct timeval stop;
+	unsigned int elapse_time;
+	do_gettimeofday(&start);
+#endif
+	u32 window_render_fps;
+
+	if (NULL == gpu_clk) {
+		MALI_DEBUG_PRINT(2, ("Enable DVFS but patform doesn't Support freq change. \n"));
+		return;
+	}
+
+	window_render_fps = calculate_window_render_fps(time_period);
+
+	current_fps = window_render_fps;
+	current_gpu_util = data->utilization_gpu;
+
+	/* Get the specific under_perform_boundary_value and over_perform_boundary_value */
+	if ((mali_desired_fps <= current_fps) && (current_fps < mali_max_system_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(90);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+	} else if ((mali_fps_step1 <= current_fps) && (current_fps < mali_desired_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	} else if ((mali_fps_step2 <= current_fps) && (current_fps < mali_fps_step1)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(50);
+	} else {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	}
+
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: gpu util = %d \n", current_gpu_util));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: under_perform = %d,  over_perform = %d \n", under_perform_boundary_value, over_perform_boundary_value));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: render fps = %d,  pressure render fps = %d \n", current_fps, window_render_fps));
+
+	/* Get current clock value */
+	cur_clk_step = mali_gpu_get_freq();
+
+	/* Consider offscreen */
+	if (0 == current_fps) {
+		/* GP or PP under perform, need to give full power */
+		if (current_gpu_util > over_perform_boundary_value) {
+			if (cur_clk_step != gpu_clk->num_of_steps - 1) {
+				clock_changed = true;
+				clock_step = gpu_clk->num_of_steps - 1;
+			}
+		}
+
+		/* If GPU is idle, use lowest power */
+		if (0 == current_gpu_util) {
+			if (cur_clk_step != 0) {
+				clock_changed = true;
+				clock_step = 0;
+			}
+		}
+
+		goto real_setting;
+	}
+
+	/* 2. Calculate target clock if the GPU clock can be tuned */
+	if (-1 != cur_clk_step) {
+		int target_clk_mhz = -1;
+		mali_bool pick_clock_up = MALI_TRUE;
+
+		if (current_gpu_util > under_perform_boundary_value) {
+			/* when under perform, need to consider the fps part */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util * mali_desired_fps / under_perform_boundary_value / current_fps;
+			pick_clock_up = MALI_TRUE;
+		} else if (current_gpu_util < over_perform_boundary_value) {
+			/* when over perform, did't need to consider fps, system didn't want to reach desired fps */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util / under_perform_boundary_value;
+			pick_clock_up = MALI_FALSE;
+		}
+
+		if (-1 != target_clk_mhz) {
+			clock_changed = mali_pickup_closest_avail_clock(target_clk_mhz, pick_clock_up);
+		}
+	}
+
+real_setting:
+	if (clock_changed) {
+		mali_gpu_set_freq(clock_step);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      gpu_clk->item[clock_step].clock,
+					      gpu_clk->item[clock_step].vol / 1000,
+					      0, 0, 0);
+	}
+
+#if CLOCK_TUNING_TIME_DEBUG
+	do_gettimeofday(&stop);
+
+	elapse_time = timeval_to_ns(&stop) - timeval_to_ns(&start);
+	MALI_DEBUG_PRINT(2, ("Using ARM power policy:  eclapse time = %d\n", elapse_time));
+#endif
+}
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void)
+{
+	_mali_osk_device_data data;
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if ((NULL != data.get_clock_info) && (NULL != data.set_freq) && (NULL != data.get_freq)) {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: using arm dvfs policy \n"));
+
+
+			mali_fps_step1 = mali_max_system_fps / 3;
+			mali_fps_step2 = mali_max_system_fps / 5;
+
+			data.get_clock_info(&gpu_clk);
+
+			if (gpu_clk != NULL) {
+#ifdef DEBUG
+				int i;
+				for (i = 0; i < gpu_clk->num_of_steps; i++) {
+					MALI_DEBUG_PRINT(5, ("mali gpu clock info: step%d clock(%d)Hz,vol(%d) \n",
+							     i, gpu_clk->item[i].clock, gpu_clk->item[i].vol));
+				}
+#endif
+			} else {
+				MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform didn't define enough info for ddk to do DVFS \n"));
+			}
+
+			mali_gpu_get_freq = data.get_freq;
+			mali_gpu_set_freq = data.set_freq;
+
+			if ((NULL != gpu_clk) && (gpu_clk->num_of_steps > 0)
+			    && (NULL != mali_gpu_get_freq) && (NULL != mali_gpu_set_freq)) {
+				mali_dvfs_enabled = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	} else {
+		err = _MALI_OSK_ERR_FAULT;
+		MALI_DEBUG_PRINT(2, ("Mali DVFS init: get platform data error .\n"));
+	}
+
+	return err;
+}
+
+/*
+ * Always give full power when start a new period,
+ * if mali dvfs enabled, for performance consideration
+ */
+void mali_dvfs_policy_new_period(void)
+{
+	/* Always give full power when start a new period */
+	unsigned int cur_clk_step = 0;
+
+	cur_clk_step = mali_gpu_get_freq();
+
+	if (cur_clk_step != (gpu_clk->num_of_steps - 1)) {
+		mali_gpu_set_freq(gpu_clk->num_of_steps - 1);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, gpu_clk->item[gpu_clk->num_of_steps - 1].clock,
+					      gpu_clk->item[gpu_clk->num_of_steps - 1].vol / 1000, 0, 0, 0);
+	}
+}
+
+mali_bool mali_dvfs_policy_enabled(void)
+{
+	return mali_dvfs_enabled;
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item)
+{
+	if (mali_platform_device != NULL) {
+
+		struct mali_gpu_device_data *device_data = NULL;
+		device_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+
+		if ((NULL != device_data->get_clock_info) && (NULL != device_data->get_freq)) {
+
+			int cur_clk_step = device_data->get_freq();
+			struct mali_gpu_clock *mali_gpu_clk = NULL;
+
+			device_data->get_clock_info(&mali_gpu_clk);
+			clk_item->clock = mali_gpu_clk->item[cur_clk_step].clock;
+			clk_item->vol = mali_gpu_clk->item[cur_clk_step].vol;
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	}
+}
+#endif
+
diff --git a/utgard/r6p2/common/mali_dvfs_policy.h b/utgard/r6p2/common/mali_dvfs_policy.h
new file mode 100755
index 0000000..1770426
--- /dev/null
+++ b/utgard/r6p2/common/mali_dvfs_policy.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DVFS_POLICY_H__
+#define __MALI_DVFS_POLICY_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period);
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void);
+
+void mali_dvfs_policy_new_period(void);
+
+mali_bool mali_dvfs_policy_enabled(void);
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif/* __MALI_DVFS_POLICY_H__ */
diff --git a/utgard/r6p2/common/mali_executor.c b/utgard/r6p2/common/mali_executor.c
new file mode 100755
index 0000000..f17e443
--- /dev/null
+++ b/utgard/r6p2/common/mali_executor.c
@@ -0,0 +1,2698 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_executor.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_timeline.h"
+#include "mali_osk_profiling.h"
+#include "mali_session.h"
+#include "mali_osk_mali.h"
+
+/*
+ * If dma_buf with map on demand is used, we defer job deletion and job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_DELETE 1
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif /* !defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) */
+
+/*
+ * ---------- static type definitions (structs, enums, etc) ----------
+ */
+
+enum mali_executor_state_t {
+	EXEC_STATE_NOT_PRESENT, /* Virtual group on Mali-300/400 (do not use) */
+	EXEC_STATE_DISABLED,    /* Disabled by core scaling (do not use) */
+	EXEC_STATE_EMPTY,       /* No child groups for virtual group (do not use) */
+	EXEC_STATE_INACTIVE,    /* Can be used, but must be activate first */
+	EXEC_STATE_IDLE,        /* Active and ready to be used */
+	EXEC_STATE_WORKING,     /* Executing a job */
+};
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock for this module (protecting all HW access except L2 caches) */
+_mali_osk_spinlock_irq_t *mali_executor_lock_obj = NULL;
+
+mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/*
+ * ---------- static variables ----------
+ */
+
+/* Used to defer job scheduling */
+static _mali_osk_wq_work_t *executor_wq_high_pri = NULL;
+
+/* Store version from GP and PP (user space wants to know this) */
+static u32 pp_version = 0;
+static u32 gp_version = 0;
+
+/* List of physical PP groups which are disabled by some external source */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_disabled);
+static u32 group_list_disabled_count = 0;
+
+/* List of groups which can be used, but activate first */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_inactive);
+static u32 group_list_inactive_count = 0;
+
+/* List of groups which are active and ready to be used */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_idle);
+static u32 group_list_idle_count = 0;
+
+/* List of groups which are executing a job */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_working);
+static u32 group_list_working_count = 0;
+
+/* Virtual group (if any) */
+static struct mali_group *virtual_group = NULL;
+
+/* Virtual group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t virtual_group_state = EXEC_STATE_NOT_PRESENT;
+
+/* GP group */
+static struct mali_group *gp_group = NULL;
+
+/* GP group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t gp_group_state = EXEC_STATE_NOT_PRESENT;
+
+static u32 gp_returned_cookie = 0;
+
+/* Total number of physical PP cores present */
+static u32 num_physical_pp_cores_total = 0;
+
+/* Number of physical cores which are enabled */
+static u32 num_physical_pp_cores_enabled = 0;
+
+/* Enable or disable core scaling */
+static mali_bool core_scaling_enabled = MALI_TRUE;
+
+/* Variables to allow safe pausing of the scheduler */
+static _mali_osk_wait_queue_t *executor_working_wait_queue = NULL;
+static u32 pause_count = 0;
+
+/* PP cores haven't been enabled because of some pp cores haven't been disabled. */
+static int core_scaling_delay_up_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+
+/* Variables used to implement notify pp core changes to userspace when core scaling
+ * is finished in mali_executor_complete_group() function. */
+static _mali_osk_wq_work_t *executor_wq_notify_core_change = NULL;
+static _mali_osk_wait_queue_t *executor_notify_core_change_wait_queue = NULL;
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+static mali_bool mali_executor_is_suspended(void *data);
+static mali_bool mali_executor_is_working(void);
+static void mali_executor_disable_empty_virtual(void);
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group);
+static mali_bool mali_executor_has_virtual_group(void);
+static mali_bool mali_executor_virtual_group_is_usable(void);
+static void mali_executor_schedule(void);
+static void mali_executor_wq_schedule(void *arg);
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job);
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done);
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state);
+
+static void mali_executor_group_enable_internal(struct mali_group *group);
+static void mali_executor_group_disable_internal(struct mali_group *group);
+static void mali_executor_core_scale(unsigned int target_core_nr);
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group);
+static void mali_executor_notify_core_change(u32 num_cores);
+static void mali_executor_wq_notify_core_change(void *arg);
+static void mali_executor_change_group_status_disabled(struct mali_group *group);
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group);
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_executor_initialize(void)
+{
+	mali_executor_lock_obj = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_EXECUTOR);
+	if (NULL == mali_executor_lock_obj) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_high_pri = _mali_osk_wq_create_work_high_pri(mali_executor_wq_schedule, NULL);
+	if (NULL == executor_wq_high_pri) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_working_wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == executor_working_wait_queue) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_notify_core_change = _mali_osk_wq_create_work(mali_executor_wq_notify_core_change, NULL);
+	if (NULL == executor_wq_notify_core_change) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_notify_core_change_wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == executor_notify_core_change_wait_queue) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_terminate(void)
+{
+	if (NULL != executor_notify_core_change_wait_queue) {
+		_mali_osk_wait_queue_term(executor_notify_core_change_wait_queue);
+		executor_notify_core_change_wait_queue = NULL;
+	}
+
+	if (NULL != executor_wq_notify_core_change) {
+		_mali_osk_wq_delete_work(executor_wq_notify_core_change);
+		executor_wq_notify_core_change = NULL;
+	}
+
+	if (NULL != executor_working_wait_queue) {
+		_mali_osk_wait_queue_term(executor_working_wait_queue);
+		executor_working_wait_queue = NULL;
+	}
+
+	if (NULL != executor_wq_high_pri) {
+		_mali_osk_wq_delete_work(executor_wq_high_pri);
+		executor_wq_high_pri = NULL;
+	}
+
+	if (NULL != mali_executor_lock_obj) {
+		_mali_osk_spinlock_irq_term(mali_executor_lock_obj);
+		mali_executor_lock_obj = NULL;
+	}
+}
+
+void mali_executor_populate(void)
+{
+	u32 num_groups;
+	u32 i;
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	/* Do we have a virtual group? */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (mali_group_is_virtual(group)) {
+			virtual_group = group;
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			break;
+		}
+	}
+
+	/* Find all the available physical GP and PP cores */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+			struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+
+			if (!mali_group_is_virtual(group)) {
+				if (NULL != pp_core) {
+					if (0 == pp_version) {
+						/* Retrieve PP version from the first available PP core */
+						pp_version = mali_pp_core_get_version(pp_core);
+					}
+
+					if (NULL != virtual_group) {
+						mali_executor_lock();
+						mali_group_add_group(virtual_group, group);
+						mali_executor_unlock();
+					} else {
+						_mali_osk_list_add(&group->executor_list, &group_list_inactive);
+						group_list_inactive_count++;
+					}
+
+					num_physical_pp_cores_total++;
+				} else {
+					MALI_DEBUG_ASSERT_POINTER(gp_core);
+
+					if (0 == gp_version) {
+						/* Retrieve GP version */
+						gp_version = mali_gp_core_get_version(gp_core);
+					}
+
+					gp_group = group;
+					gp_group_state = EXEC_STATE_INACTIVE;
+				}
+
+			}
+		}
+	}
+
+	num_physical_pp_cores_enabled = num_physical_pp_cores_total;
+}
+
+void mali_executor_depopulate(void)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state);
+
+	if (NULL != gp_group) {
+		mali_group_delete(gp_group);
+		gp_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state);
+
+	if (NULL != virtual_group) {
+		mali_group_delete(virtual_group);
+		virtual_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&group_list_working));
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+}
+
+void mali_executor_suspend(void)
+{
+	mali_executor_lock();
+
+	/* Increment the pause_count so that no more jobs will be scheduled */
+	pause_count++;
+
+	mali_executor_unlock();
+
+	_mali_osk_wait_queue_wait_event(executor_working_wait_queue,
+					mali_executor_is_suspended, NULL);
+
+	/*
+	 * mali_executor_complete_XX() leaves jobs in idle state.
+	 * deactivate option is used when we are going to power down
+	 * the entire GPU (OS suspend) and want a consistent SW vs HW
+	 * state.
+	 */
+	mali_executor_lock();
+
+	mali_executor_deactivate_list_idle(MALI_TRUE);
+
+	/*
+	 * The following steps are used to deactive all of activated
+	 * (MALI_GROUP_STATE_ACTIVE) and activating (MALI_GROUP
+	 * _STAET_ACTIVATION_PENDING) groups, to make sure the variable
+	 * pd_mask_wanted is equal with 0. */
+	if (MALI_GROUP_STATE_INACTIVE != mali_group_get_state(gp_group)) {
+		gp_group_state = EXEC_STATE_INACTIVE;
+		mali_group_deactivate(gp_group);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (MALI_GROUP_STATE_INACTIVE
+		    != mali_group_get_state(virtual_group)) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			mali_group_deactivate(virtual_group);
+		}
+	}
+
+	if (0 < group_list_inactive_count) {
+		struct mali_group *group;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+					    &group_list_inactive,
+					    struct mali_group, executor_list) {
+			if (MALI_GROUP_STATE_ACTIVATION_PENDING
+			    == mali_group_get_state(group)) {
+				mali_group_deactivate(group);
+			}
+
+			/*
+			 * On mali-450 platform, we may have physical group in the group inactive
+			 * list, and its state is MALI_GROUP_STATE_ACTIVATION_PENDING, so we only
+			 * deactivate it is not enough, we still also need add it back to virtual group.
+			 * And now, virtual group must be in INACTIVE state, so it's safe to add
+			 * physical group to virtual group at this point.
+			 */
+			if (NULL != virtual_group) {
+				_mali_osk_list_delinit(&group->executor_list);
+				group_list_inactive_count--;
+
+				mali_group_add_group(virtual_group, group);
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_resume(void)
+{
+	mali_executor_lock();
+
+	/* Decrement pause_count to allow scheduling again (if it reaches 0) */
+	pause_count--;
+	if (0 == pause_count) {
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+u32 mali_executor_get_num_cores_total(void)
+{
+	return num_physical_pp_cores_total;
+}
+
+u32 mali_executor_get_num_cores_enabled(void)
+{
+	return num_physical_pp_cores_enabled;
+}
+
+struct mali_pp_core *mali_executor_get_virtual_pp(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(virtual_group);
+	MALI_DEBUG_ASSERT_POINTER(virtual_group->pp_core);
+	return virtual_group->pp_core;
+}
+
+struct mali_group *mali_executor_get_virtual_group(void)
+{
+	return virtual_group;
+}
+
+void mali_executor_zap_all_active(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+	mali_bool ret;
+
+	mali_executor_lock();
+
+	/*
+	 * This function is a bit complicated because
+	 * mali_group_zap_session() can fail. This only happens because the
+	 * group is in an unhandled page fault status.
+	 * We need to make sure this page fault is handled before we return,
+	 * so that we know every single outstanding MMU transactions have
+	 * completed. This will allow caller to safely remove physical pages
+	 * when we have returned.
+	 */
+
+	MALI_DEBUG_ASSERT(NULL != gp_group);
+	ret = mali_group_zap_session(gp_group, session);
+	if (MALI_FALSE == ret) {
+		struct mali_gp_job *gp_job = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+		MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		ret = mali_group_zap_session(virtual_group, session);
+		if (MALI_FALSE == ret) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working,
+				    struct mali_group, executor_list) {
+		ret = mali_group_zap_session(group, session);
+		if (MALI_FALSE == ret) {
+			ret = mali_group_zap_session(group, session);
+			if (MALI_FALSE == ret) {
+				struct mali_pp_job *pp_job = NULL;
+
+				mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+				if (NULL != pp_job) {
+					/* PP job completed, free it */
+					mali_scheduler_complete_pp_job(pp_job,
+								       0, MALI_FALSE,
+								       MALI_TRUE);
+				}
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule)
+{
+	if (MALI_SCHEDULER_MASK_EMPTY != mask) {
+		if (MALI_TRUE == deferred_schedule) {
+			_mali_osk_wq_schedule_work_high_pri(executor_wq_high_pri);
+		} else {
+			/* Schedule from this thread*/
+			mali_executor_lock();
+			mali_executor_schedule();
+			mali_executor_unlock();
+		}
+	}
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: GP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor GP: Job %d Timeout on %s\n",
+			    mali_gp_job_get_id(group->gp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_gp(group);
+		if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result);
+#endif
+
+	mali_group_mask_all_interrupts_gp(group);
+
+	if (MALI_INTERRUPT_RESULT_SUCCESS_VS == int_result) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only VS completed so far, while PLBU is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (MALI_INTERRUPT_RESULT_SUCCESS_PLBU == int_result) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only PLBU completed so far, while VS is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (MALI_INTERRUPT_RESULT_OOM == int_result) {
+		struct mali_gp_job *job = mali_group_get_running_gp_job(group);
+
+		/* PLBU out of mem */
+		MALI_DEBUG_PRINT(3, ("Executor: PLBU needs more heap memory\n"));
+
+#if defined(CONFIG_MALI400_PROFILING)
+		/* Give group a chance to generate a SUSPEND event */
+		mali_group_oom(group);
+#endif
+
+		/*
+		 * no need to hold interrupt raised while
+		 * waiting for more memory.
+		 */
+		mali_executor_send_gp_oom_to_user(job);
+
+		mali_executor_unlock();
+
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (MALI_INTERRUPT_RESULT_ERROR == int_result) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_executor_unlock();
+
+		if (MALI_FALSE == time_out) {
+			mali_group_schedule_bottom_half_gp(group);
+		}
+	} else {
+		struct mali_gp_job *job;
+		mali_bool success;
+
+		if (MALI_TRUE == time_out) {
+			mali_group_dump_status(group);
+		}
+
+		success = (int_result != MALI_INTERRUPT_RESULT_ERROR) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, &job, NULL);
+
+		mali_executor_unlock();
+
+		/* GP jobs always fully complete */
+		MALI_DEBUG_ASSERT(NULL != job);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, success,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: PP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (in_upper_half) {
+		if (mali_group_is_in_virtual(group)) {
+			/* Child groups should never handle PP interrupts */
+			MALI_DEBUG_ASSERT(!mali_group_has_timed_out(group));
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+	MALI_DEBUG_ASSERT(!mali_group_is_in_virtual(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor PP: Job %d Timeout on %s\n",
+			    mali_pp_job_get_id(group->pp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_pp(group);
+		if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	} else if (MALI_INTERRUPT_RESULT_SUCCESS == int_result) {
+		if (mali_group_is_virtual(group) && mali_group_pp_is_active(group)) {
+			/* Some child groups are still working, so nothing to do right now */
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (MALI_INTERRUPT_RESULT_ERROR == int_result) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_group_mask_all_interrupts_pp(group);
+		mali_executor_unlock();
+
+		if (MALI_FALSE == time_out) {
+			mali_group_schedule_bottom_half_pp(group);
+		}
+	} else {
+		struct mali_pp_job *job = NULL;
+		mali_bool success;
+
+		if (MALI_TRUE == time_out) {
+			mali_group_dump_status(group);
+		}
+
+		success = (int_result == MALI_INTERRUPT_RESULT_SUCCESS) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, NULL, &job);
+
+		mali_executor_unlock();
+
+		if (NULL != job) {
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+
+	MALI_DEBUG_PRINT(4, ("Executor: MMU interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	int_result = mali_group_get_interrupt_result_mmu(group);
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_ERROR == int_result);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	if (in_upper_half) {
+		/* Don't bother to do processing of errors in upper half */
+
+		struct mali_group *parent = group->parent_group;
+
+		mali_mmu_mask_all_interrupts(group->mmu);
+
+		mali_executor_unlock();
+
+		if (NULL == parent) {
+			mali_group_schedule_bottom_half_mmu(group);
+		} else {
+			mali_group_schedule_bottom_half_mmu(parent);
+		}
+
+	} else {
+		struct mali_gp_job *gp_job = NULL;
+		struct mali_pp_job *pp_job = NULL;
+
+#ifdef DEBUG
+
+		u32 fault_address = mali_mmu_get_page_fault_addr(group->mmu);
+		u32 status = mali_mmu_get_status(group->mmu);
+		MALI_DEBUG_PRINT(2, ("Executor: Mali page fault detected at 0x%x from bus id %d of type %s on %s\n",
+				     (void *)(uintptr_t)fault_address,
+				     (status >> 6) & 0x1F,
+				     (status & 32) ? "write" : "read",
+				     group->mmu->hw_core.description));
+		MALI_DEBUG_PRINT(3, ("Executor: MMU rawstat = 0x%08X, MMU status = 0x%08X\n",
+				     mali_mmu_get_rawstat(group->mmu), status));
+		mali_mmu_pagedir_diag(mali_session_get_page_directory(group->session), fault_address);
+#endif
+
+		mali_executor_complete_group(group, MALI_FALSE, &gp_job, &pp_job);
+
+		mali_executor_unlock();
+
+		if (NULL != gp_job) {
+			MALI_DEBUG_ASSERT(NULL == pp_job);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_TRUE, MALI_TRUE);
+		} else if (NULL != pp_job) {
+			MALI_DEBUG_ASSERT(NULL == gp_job);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(pp_job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups)
+{
+	u32 i;
+	mali_bool child_groups_activated = MALI_FALSE;
+	mali_bool do_schedule = MALI_FALSE;
+#if defined(DEBUG)
+	u32 num_activated = 0;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(0 < num_groups);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_up(groups[i]);
+
+		if ((MALI_GROUP_STATE_ACTIVATION_PENDING != mali_group_get_state(groups[i]) ||
+		     (MALI_TRUE != mali_executor_group_is_in_state(groups[i], EXEC_STATE_INACTIVE)))) {
+			/* nothing more to do for this group */
+			continue;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Executor: activating group %s\n",
+				     mali_group_core_description(groups[i])));
+
+#if defined(DEBUG)
+		num_activated++;
+#endif
+
+		if (mali_group_is_in_virtual(groups[i])) {
+			/*
+			 * At least one child group of virtual group is powered on.
+			 */
+			child_groups_activated = MALI_TRUE;
+		} else if (MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			/* Set gp and pp not in virtual to active. */
+			mali_group_set_active(groups[i]);
+		}
+
+		/* Move group from inactive to idle list */
+		if (groups[i] == gp_group) {
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  gp_group_state);
+			gp_group_state = EXEC_STATE_IDLE;
+		} else if (MALI_FALSE == mali_group_is_in_virtual(groups[i])
+			   && MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_group_is_in_state(groups[i],
+					  EXEC_STATE_INACTIVE));
+
+			mali_executor_change_state_pp_physical(groups[i],
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+		}
+
+		do_schedule = MALI_TRUE;
+	}
+
+	if (mali_executor_has_virtual_group() &&
+	    MALI_TRUE == child_groups_activated &&
+	    MALI_GROUP_STATE_ACTIVATION_PENDING ==
+	    mali_group_get_state(virtual_group)) {
+		/*
+		 * Try to active virtual group while it may be not sucessful every time,
+		 * because there is one situation that not all of child groups are powered on
+		 * in one time and virtual group is in activation pending state.
+		 */
+		if (mali_group_set_active(virtual_group)) {
+			/* Move group from inactive to idle */
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  virtual_group_state);
+			virtual_group_state = EXEC_STATE_IDLE;
+
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u  physical activated, 1 virtual activated.\n", num_groups, num_activated));
+		} else {
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+		}
+	} else {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+	}
+
+	if (MALI_TRUE == do_schedule) {
+		/* Trigger a schedule */
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_group_power_down(struct mali_group *groups[],
+				    u32 num_groups)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(0 < num_groups);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		/* Groups must be either disabled or inactive. while for virtual group,
+		 * it maybe in empty state, because when we meet pm_runtime_suspend,
+		 * virtual group could be powered off, and before we acquire mali_executor_lock,
+		 * we must release mali_pm_state_lock, if there is a new physical job was queued,
+		 * all of physical groups in virtual group could be pulled out, so we only can
+		 * powered down an empty virtual group. Those physical groups will be powered
+		 * up in following pm_runtime_resume callback function.
+		 */
+		MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(groups[i],
+				  EXEC_STATE_DISABLED) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_INACTIVE) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_EMPTY));
+
+		MALI_DEBUG_PRINT(3, ("Executor: powering down group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_down(groups[i]);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups completed\n", num_groups));
+
+	mali_executor_unlock();
+}
+
+void mali_executor_abort_session(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *tmp_group;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Executor: Aborting all jobs from session 0x%08X.\n",
+			  session));
+
+	mali_executor_lock();
+
+	if (mali_group_get_session(gp_group) == session) {
+		if (EXEC_STATE_WORKING == gp_group_state) {
+			struct mali_gp_job *gp_job = NULL;
+
+			mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+			MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+			/* GP job completed, make sure it is freed */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_FALSE, MALI_TRUE);
+		} else {
+			/* Same session, but not working, so just clear it */
+			mali_group_clear_session(gp_group);
+		}
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (EXEC_STATE_WORKING == virtual_group_state
+		    && mali_group_get_session(virtual_group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working,
+				    struct mali_group, executor_list) {
+		if (mali_group_get_session(group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	mali_executor_unlock();
+}
+
+
+void mali_executor_core_scaling_enable(void)
+{
+	/* PS: Core scaling is by default enabled */
+	core_scaling_enabled = MALI_TRUE;
+}
+
+void mali_executor_core_scaling_disable(void)
+{
+	core_scaling_enabled = MALI_FALSE;
+}
+
+mali_bool mali_executor_core_scaling_is_enabled(void)
+{
+	return core_scaling_enabled;
+}
+
+void mali_executor_group_enable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_enable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+/*
+ * If a physical group is inactive or idle, we should disable it immediately,
+ * if group is in virtual, and virtual group is idle, disable given physical group in it.
+ */
+void mali_executor_group_disable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_disable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+mali_bool mali_executor_group_is_disabled(struct mali_group *group)
+{
+	/* NB: This function is not optimized for time critical usage */
+
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+	ret = mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED);
+	mali_executor_unlock();
+
+	return ret;
+}
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override)
+{
+	if (target_core_nr == num_physical_pp_cores_enabled) return 0;
+	if (MALI_FALSE == core_scaling_enabled && MALI_FALSE == override) return -EPERM;
+	if (target_core_nr > num_physical_pp_cores_total) return -EINVAL;
+	if (0 == target_core_nr) return -EINVAL;
+
+	mali_executor_core_scale(target_core_nr);
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+
+	return 0;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	mali_executor_lock();
+
+	switch (gp_group_state) {
+	case EXEC_STATE_INACTIVE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state INACTIVE\n");
+		break;
+	case EXEC_STATE_IDLE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state IDLE\n");
+		break;
+	case EXEC_STATE_WORKING:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state WORKING\n");
+		break;
+	default:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in unknown/illegal state %u\n",
+					gp_group_state);
+		break;
+	}
+
+	n += mali_group_dump_state(gp_group, buf + n, size - n);
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in WORKING state (count = %u):\n",
+				group_list_working_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in IDLE state (count = %u):\n",
+				group_list_idle_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in INACTIVE state (count = %u):\n",
+				group_list_inactive_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in DISABLED state (count = %u):\n",
+				group_list_disabled_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		switch (virtual_group_state) {
+		case EXEC_STATE_EMPTY:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state EMPTY\n");
+			break;
+		case EXEC_STATE_INACTIVE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state INACTIVE\n");
+			break;
+		case EXEC_STATE_IDLE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state IDLE\n");
+			break;
+		case EXEC_STATE_WORKING:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state WORKING\n");
+			break;
+		default:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in unknown/illegal state %u\n",
+						virtual_group_state);
+			break;
+		}
+
+		n += mali_group_dump_state(virtual_group, buf + n, size - n);
+	}
+
+	mali_executor_unlock();
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_total_cores = num_physical_pp_cores_total;
+	args->number_of_enabled_cores = num_physical_pp_cores_enabled;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = pp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_cores = 1;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = gp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (_MALIGP_JOB_RESUME_WITH_NEW_HEAP == args->code) {
+		_mali_osk_notification_t *new_notification = NULL;
+
+		new_notification = _mali_osk_notification_create(
+					   _MALI_NOTIFICATION_GP_STALLED,
+					   sizeof(_mali_uk_gp_job_suspended_s));
+
+		if (NULL != new_notification) {
+			MALI_DEBUG_PRINT(3, ("Executor: Resuming job %u with new heap; 0x%08X - 0x%08X\n",
+					     args->cookie, args->arguments[0], args->arguments[1]));
+
+			mali_executor_lock();
+
+			/* Resume the job in question if it is still running */
+			job = mali_group_get_running_gp_job(gp_group);
+			if (NULL != job &&
+			    args->cookie == mali_gp_job_get_id(job) &&
+			    session == mali_gp_job_get_session(job)) {
+				/*
+				 * Correct job is running, resume with new heap
+				 */
+
+				mali_gp_job_set_oom_notification(job,
+								 new_notification);
+
+				/* This will also re-enable interrupts */
+				mali_group_resume_gp_with_new_heap(gp_group,
+								   args->cookie,
+								   args->arguments[0],
+								   args->arguments[1]);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_DEBUG_PRINT(2, ("Executor: Unable to resume  gp job becasue gp time out or any other unexpected reason!\n"));
+
+				_mali_osk_notification_delete(new_notification);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Executor: Failed to allocate notification object. Will abort GP job.\n"));
+		}
+	} else {
+		MALI_DEBUG_PRINT(2, ("Executor: Aborting job %u, no new heap provided\n", args->cookie));
+	}
+
+	mali_executor_lock();
+
+	/* Abort the job in question if it is still running */
+	job = mali_group_get_running_gp_job(gp_group);
+	if (NULL != job &&
+	    args->cookie == mali_gp_job_get_id(job) &&
+	    session == mali_gp_job_get_session(job)) {
+		/* Correct job is still running */
+		struct mali_gp_job *job_done = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &job_done, NULL);
+
+		/* The same job should have completed */
+		MALI_DEBUG_ASSERT(job_done == job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(job_done, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	mali_executor_unlock();
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+void mali_executor_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_executor_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Executor: lock taken\n"));
+}
+
+void mali_executor_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Executor: Releasing lock\n"));
+	_mali_osk_spinlock_irq_unlock(mali_executor_lock_obj);
+}
+
+static mali_bool mali_executor_is_suspended(void *data)
+{
+	mali_bool ret;
+
+	/* This callback does not use the data pointer. */
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	ret = pause_count > 0 && !mali_executor_is_working();
+
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static mali_bool mali_executor_is_working()
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return (0 != group_list_working_count ||
+		EXEC_STATE_WORKING == gp_group_state ||
+		EXEC_STATE_WORKING == virtual_group_state);
+}
+
+static void mali_executor_disable_empty_virtual(void)
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_EMPTY);
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_WORKING);
+
+	if (mali_group_is_empty(virtual_group)) {
+		virtual_group_state = EXEC_STATE_EMPTY;
+	}
+}
+
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	/* Only rejoining after job has completed (still active) */
+	MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+			  mali_group_get_state(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_has_virtual_group());
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_group_is_virtual(group));
+
+	/* Make sure group and virtual group have same status */
+
+	if (MALI_GROUP_STATE_INACTIVE == mali_group_get_state(virtual_group)) {
+		if (mali_group_deactivate(group)) {
+			trigger_pm_update = MALI_TRUE;
+		}
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else if (MALI_GROUP_STATE_ACTIVATION_PENDING ==
+		   mali_group_get_state(virtual_group)) {
+		/*
+		 * Activation is pending for virtual group, leave
+		 * this child group as active.
+		 */
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+				  mali_group_get_state(virtual_group));
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_IDLE;
+		}
+	}
+
+	/* Remove group from idle list */
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group,
+			  EXEC_STATE_IDLE));
+	_mali_osk_list_delinit(&group->executor_list);
+	group_list_idle_count--;
+
+	/*
+	 * And finally rejoin the virtual group
+	 * group will start working on same job as virtual_group,
+	 * if virtual_group is working on a job
+	 */
+	mali_group_add_group(virtual_group, group);
+
+	return trigger_pm_update;
+}
+
+static mali_bool mali_executor_has_virtual_group(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != virtual_group) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+}
+
+static mali_bool mali_executor_virtual_group_is_usable(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return ((EXEC_STATE_INACTIVE == virtual_group_state ||
+		EXEC_STATE_IDLE == virtual_group_state) && (virtual_group->state != MALI_GROUP_STATE_ACTIVATION_PENDING)) ?
+	       MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+}
+
+static mali_bool mali_executor_tackle_gp_bound(void)
+{
+	struct mali_pp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	job = mali_scheduler_job_pp_physical_peek();
+
+	if (NULL != job && MALI_TRUE == mali_is_mali400()) {
+		if (0 < group_list_working_count &&
+		    mali_pp_job_is_large_and_unstarted(job)) {
+			return MALI_TRUE;
+		}
+	}
+
+	return MALI_FALSE;
+}
+
+static mali_bool mali_executor_schedule_is_early_out(mali_bool *gpu_secure_mode_is_needed)
+{
+	struct mali_pp_job *next_pp_job_to_start = NULL;
+	struct mali_group *group;
+	struct mali_group *tmp_group;
+	struct mali_pp_job *physical_pp_job_working = NULL;
+	struct mali_pp_job *virtual_pp_job_working = NULL;
+	mali_bool gpu_working_in_protected_mode = MALI_FALSE;
+	mali_bool gpu_working_in_non_protected_mode = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	*gpu_secure_mode_is_needed = MALI_FALSE;
+
+	/* Check if the gpu secure mode is supported, exit if not.*/
+	if (MALI_FALSE == _mali_osk_gpu_secure_mode_is_supported()) {
+		return MALI_FALSE;
+	}
+
+	/* Check if need to set gpu secure mode for the next pp job,
+	 * get the next pp job that will be scheduled  if exist.
+	 */
+	next_pp_job_to_start = mali_scheduler_job_pp_next();
+
+	/* Check current pp physical/virtual running job is protected job or not if exist.*/
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working,
+				    struct mali_group, executor_list) {
+		physical_pp_job_working = group->pp_running_job;
+		break;
+	}
+
+	if (EXEC_STATE_WORKING == virtual_group_state) {
+		virtual_pp_job_working = virtual_group->pp_running_job;
+	}
+
+	if (NULL != physical_pp_job_working) {
+		if (MALI_TRUE == mali_pp_job_is_protected_job(physical_pp_job_working)) {
+			gpu_working_in_protected_mode = MALI_TRUE;
+		} else {
+			gpu_working_in_non_protected_mode = MALI_TRUE;
+		}
+	} else if (NULL != virtual_pp_job_working) {
+		if (MALI_TRUE == mali_pp_job_is_protected_job(virtual_pp_job_working)) {
+			gpu_working_in_protected_mode = MALI_TRUE;
+		} else {
+			gpu_working_in_non_protected_mode = MALI_TRUE;
+		}
+	} else if (EXEC_STATE_WORKING == gp_group_state) {
+		gpu_working_in_non_protected_mode = MALI_TRUE;
+	}
+
+	/* If the next pp job is the protected pp job.*/
+	if ((NULL != next_pp_job_to_start) && MALI_TRUE == mali_pp_job_is_protected_job(next_pp_job_to_start)) {
+		/* if gp is working or any non-protected pp job is working now, unable to schedule protected pp job. */
+		if (MALI_TRUE == gpu_working_in_non_protected_mode)
+			return MALI_TRUE;
+
+		*gpu_secure_mode_is_needed = MALI_TRUE;
+		return MALI_FALSE;
+
+	}
+
+	if (MALI_TRUE == gpu_working_in_protected_mode) {
+		/* Unable to schedule non-protected pp job/gp job if exist protected pp running jobs*/
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+/*
+ * This is where jobs are actually started.
+ */
+static void mali_executor_schedule(void)
+{
+	u32 i;
+	u32 num_physical_needed = 0;
+	u32 num_physical_to_process = 0;
+	mali_bool trigger_pm_update = MALI_FALSE;
+	mali_bool deactivate_idle_group = MALI_TRUE;
+	mali_bool gpu_secure_mode_is_needed = MALI_FALSE;
+	mali_bool is_gpu_secure_mode = MALI_FALSE;
+	/* Physical groups + jobs to start in this function */
+	struct mali_group *groups_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	struct mali_pp_job *jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	u32 sub_jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	int num_jobs_to_start = 0;
+
+	/* Virtual job to start in this function */
+	struct mali_pp_job *virtual_job_to_start = NULL;
+
+	/* GP job to start in this function */
+	struct mali_gp_job *gp_job_to_start = NULL;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (pause_count > 0) {
+		/* Execution is suspended, don't schedule any jobs. */
+		return;
+	}
+
+	/* Lock needed in order to safely handle the job queues */
+	mali_scheduler_lock();
+
+	/* 1. Check the schedule if need to early out. */
+	if (MALI_TRUE == mali_executor_schedule_is_early_out(&gpu_secure_mode_is_needed)) {
+		mali_scheduler_unlock();
+		return;
+	}
+
+	/* 2. Activate gp firstly if have gp job queued. */
+	if ((EXEC_STATE_INACTIVE == gp_group_state)
+	    && (0 < mali_scheduler_job_gp_count())
+	    && (gpu_secure_mode_is_needed == MALI_FALSE)) {
+
+		enum mali_group_state state =
+			mali_group_activate(gp_group);
+		if (MALI_GROUP_STATE_ACTIVE == state) {
+			/* Set GP group state to idle */
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			trigger_pm_update = MALI_TRUE;
+		}
+	}
+
+	/* 3. Prepare as many physical groups as needed/possible */
+
+	num_physical_needed = mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed);
+
+	/* On mali-450 platform, we don't need to enter in this block frequently. */
+	if (0 < num_physical_needed) {
+
+		if (num_physical_needed <= group_list_idle_count) {
+			/* We have enough groups on idle list already */
+			num_physical_to_process = num_physical_needed;
+			num_physical_needed = 0;
+		} else {
+			/* We need to get a hold of some more groups */
+			num_physical_to_process = group_list_idle_count;
+			num_physical_needed -= group_list_idle_count;
+		}
+
+		if (0 < num_physical_needed) {
+
+			/* 3.1. Activate groups which are inactive */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive,
+						    struct mali_group, executor_list) {
+				enum mali_group_state state =
+					mali_group_activate(group);
+				if (MALI_GROUP_STATE_ACTIVE == state) {
+					/* Move from inactive to idle */
+					mali_executor_change_state_pp_physical(group,
+									       &group_list_inactive,
+									       &group_list_inactive_count,
+									       &group_list_idle,
+									       &group_list_idle_count);
+					num_physical_to_process++;
+				} else {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				num_physical_needed--;
+				if (0 == num_physical_needed) {
+					/* We have activated all the groups we need */
+					break;
+				}
+			}
+		}
+
+		if (mali_executor_virtual_group_is_usable()) {
+
+			/*
+			 * 3.2. And finally, steal and activate groups
+			 * from virtual group if we need even more
+			 */
+			while (0 < num_physical_needed) {
+				struct mali_group *group;
+
+				group = mali_group_acquire_group(virtual_group);
+				if (NULL != group) {
+					enum mali_group_state state;
+
+					mali_executor_disable_empty_virtual();
+
+					state = mali_group_activate(group);
+					if (MALI_GROUP_STATE_ACTIVE == state) {
+						/* Group is ready, add to idle list */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_idle);
+						group_list_idle_count++;
+						num_physical_to_process++;
+					} else {
+						/*
+						 * Group is not ready yet,
+						 * add to inactive list
+						 */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_inactive);
+						group_list_inactive_count++;
+
+						trigger_pm_update = MALI_TRUE;
+					}
+					num_physical_needed--;
+				} else {
+					/*
+					 * We could not get enough groups
+					 * from the virtual group.
+					 */
+					break;
+				}
+			}
+		}
+
+		/* 3.3. Assign physical jobs to groups */
+
+		if (0 < num_physical_to_process) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle,
+						    struct mali_group, executor_list) {
+				struct mali_pp_job *job = NULL;
+				u32 sub_job = MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+
+				MALI_DEBUG_ASSERT(num_jobs_to_start <
+						  MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				MALI_DEBUG_ASSERT(0 <
+						  mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed));
+
+				/* If the next pp job is non-protected, check if gp bound now. */
+				if ((MALI_FALSE == gpu_secure_mode_is_needed)
+				    && (mali_executor_hint_is_enabled(MALI_EXECUTOR_HINT_GP_BOUND))
+				    && (MALI_TRUE == mali_executor_tackle_gp_bound())) {
+					/*
+					* We're gp bound,
+					* don't start this right now.
+					*/
+					deactivate_idle_group = MALI_FALSE;
+					num_physical_to_process = 0;
+					break;
+				}
+
+				job = mali_scheduler_job_pp_physical_get(
+					      &sub_job);
+
+				if (MALI_FALSE == gpu_secure_mode_is_needed) {
+					MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_protected_job(job));
+				} else {
+					MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_protected_job(job));
+				}
+
+				MALI_DEBUG_ASSERT_POINTER(job);
+				MALI_DEBUG_ASSERT(sub_job <= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				/* Put job + group on list of jobs to start later on */
+
+				groups_to_start[num_jobs_to_start] = group;
+				jobs_to_start[num_jobs_to_start] = job;
+				sub_jobs_to_start[num_jobs_to_start] = sub_job;
+				num_jobs_to_start++;
+
+				/* Move group from idle to working */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_working,
+								       &group_list_working_count);
+
+				num_physical_to_process--;
+				if (0 == num_physical_to_process) {
+					/* Got all we needed */
+					break;
+				}
+			}
+		}
+	}
+
+	/* 4. Deactivate idle pp group , must put deactive here before active vitual group
+	 *    for cover case first only has physical job in normal queue but group inactive,
+	 *    so delay the job start go to active group, when group activated,
+	 *    call scheduler again, but now if we get high queue virtual job,
+	 *    we will do nothing in schedule cause executor schedule stop
+	 */
+
+	if (MALI_TRUE == mali_executor_deactivate_list_idle(deactivate_idle_group
+			&& (!mali_timeline_has_physical_pp_job()))) {
+		trigger_pm_update = MALI_TRUE;
+	}
+
+	/* 5. Activate virtual group, if needed */
+	if (EXEC_STATE_INACTIVE == virtual_group_state &&
+	    MALI_TRUE ==  mali_scheduler_job_next_is_virtual()) {
+		struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek();
+		if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job))
+		    || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) {
+			enum mali_group_state state =
+				mali_group_activate(virtual_group);
+			if (MALI_GROUP_STATE_ACTIVE == state) {
+				/* Set virtual group state to idle */
+				virtual_group_state = EXEC_STATE_IDLE;
+			} else {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 6. To power up group asap,  trigger pm update only when no need to swith the gpu mode. */
+
+	is_gpu_secure_mode = _mali_osk_gpu_secure_mode_is_enabled();
+
+	if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == is_gpu_secure_mode)
+		||(MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == is_gpu_secure_mode))
+	{
+		if (MALI_TRUE == trigger_pm_update) {
+			trigger_pm_update = MALI_FALSE;
+			mali_pm_update_async();
+		}
+	}
+
+	/* 7. Assign jobs to idle virtual group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == virtual_group_state) {
+		if (MALI_TRUE == mali_scheduler_job_next_is_virtual()) {
+			struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek();
+			if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job))
+			    || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) {
+				virtual_job_to_start =
+					mali_scheduler_job_pp_virtual_get();
+				virtual_group_state = EXEC_STATE_WORKING;
+			}
+		} else if (!mali_timeline_has_virtual_pp_job()) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+
+			if (mali_group_deactivate(virtual_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 8. Assign job to idle GP group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == gp_group_state && MALI_FALSE == gpu_secure_mode_is_needed) {
+		if (0 < mali_scheduler_job_gp_count()) {
+			gp_job_to_start = mali_scheduler_job_gp_get();
+			gp_group_state = EXEC_STATE_WORKING;
+		} else if (!mali_timeline_has_gp_job()) {
+			gp_group_state = EXEC_STATE_INACTIVE;
+			if (mali_group_deactivate(gp_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 9. We no longer need the schedule/queue lock */
+
+	mali_scheduler_unlock();
+
+	/* 10. start jobs */
+	if (NULL != virtual_job_to_start) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(virtual_group));
+		mali_group_start_pp_job(virtual_group,
+					virtual_job_to_start, 0, is_gpu_secure_mode);
+	}
+
+	for (i = 0; i < num_jobs_to_start; i++) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(
+					  groups_to_start[i]));
+		mali_group_start_pp_job(groups_to_start[i],
+					jobs_to_start[i],
+					sub_jobs_to_start[i], is_gpu_secure_mode);
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(gp_group);
+
+	if (NULL != gp_job_to_start) {
+		MALI_DEBUG_ASSERT(!mali_group_gp_is_active(gp_group));
+		mali_group_start_gp_job(gp_group, gp_job_to_start, is_gpu_secure_mode);
+	}
+
+	/* 11. Trigger any pending PM updates */
+	if (MALI_TRUE == trigger_pm_update) {
+		mali_pm_update_async();
+	}
+}
+
+/* Handler for deferred schedule requests */
+static void mali_executor_wq_schedule(void *arg)
+{
+	MALI_IGNORE(arg);
+	mali_executor_lock();
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job)
+{
+	_mali_uk_gp_job_suspended_s *jobres;
+	_mali_osk_notification_t *notification;
+
+	notification = mali_gp_job_get_oom_notification(job);
+
+	/*
+	 * Remember the id we send to user space, so we have something to
+	 * verify when we get a response
+	 */
+	gp_returned_cookie = mali_gp_job_get_id(job);
+
+	jobres = (_mali_uk_gp_job_suspended_s *)notification->result_buffer;
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	jobres->cookie = gp_returned_cookie;
+
+	mali_session_send_notification(mali_gp_job_get_session(job),
+				       notification);
+}
+static struct mali_gp_job *mali_executor_complete_gp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_gp(group, success);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	gp_group_state = EXEC_STATE_IDLE;
+
+	/* This will potentially queue more GP and PP jobs */
+	mali_timeline_tracker_release(&job->tracker);
+
+	/* Signal PP job */
+	mali_gp_job_signal_pp_tracker(job, success);
+
+	return job;
+}
+
+static struct mali_pp_job *mali_executor_complete_pp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_pp_job *job;
+	u32 sub_job;
+	mali_bool job_is_done;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_pp(group, success, &sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	if (mali_group_is_virtual(group)) {
+		virtual_group_state = EXEC_STATE_IDLE;
+	} else {
+		/* Move from working to idle state */
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_working,
+						       &group_list_working_count,
+						       &group_list_idle,
+						       &group_list_idle_count);
+	}
+
+	/* It is the executor module which owns the jobs themselves by now */
+	mali_pp_job_mark_sub_job_completed(job, success);
+	job_is_done = mali_pp_job_is_complete(job);
+
+	if (job_is_done) {
+		/* This will potentially queue more GP and PP jobs */
+		mali_timeline_tracker_release(&job->tracker);
+	}
+
+	return job;
+}
+
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done)
+{
+	struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+	struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	mali_bool pp_job_is_done = MALI_TRUE;
+
+	if (NULL != gp_core) {
+		gp_job = mali_executor_complete_gp(group, success);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(pp_core);
+		MALI_IGNORE(pp_core);
+		pp_job = mali_executor_complete_pp(group, success);
+
+		pp_job_is_done = mali_pp_job_is_complete(pp_job);
+	}
+
+	if (pause_count > 0) {
+		/* Execution has been suspended */
+
+		if (!mali_executor_is_working()) {
+			/* Last job completed, wake up sleepers */
+			_mali_osk_wait_queue_wake_up(
+				executor_working_wait_queue);
+		}
+	} else if (MALI_TRUE == mali_group_disable_requested(group)) {
+		mali_executor_core_scale_in_group_complete(group);
+
+		mali_executor_schedule();
+	} else {
+		/* try to schedule new jobs */
+		mali_executor_schedule();
+	}
+
+	if (NULL != gp_job) {
+		MALI_DEBUG_ASSERT_POINTER(gp_job_done);
+		*gp_job_done = gp_job;
+	} else if (pp_job_is_done) {
+		MALI_DEBUG_ASSERT_POINTER(pp_job);
+		MALI_DEBUG_ASSERT_POINTER(pp_job_done);
+		*pp_job_done = pp_job;
+	}
+}
+
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	/*
+	 * It's a bit more complicated to change the state for the physical PP
+	 * groups since their state is determined by the list they are on.
+	 */
+#if defined(DEBUG)
+	mali_bool found = MALI_FALSE;
+	struct mali_group *group_iter;
+	struct mali_group *temp;
+	u32 old_counted = 0;
+	u32 new_counted = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(old_list);
+	MALI_DEBUG_ASSERT_POINTER(old_count);
+	MALI_DEBUG_ASSERT_POINTER(new_list);
+	MALI_DEBUG_ASSERT_POINTER(new_count);
+
+	/*
+	 * Verify that group is present on old list,
+	 * and that the count is correct
+	 */
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, old_list,
+				    struct mali_group, executor_list) {
+		old_counted++;
+		if (group == group_iter) {
+			found = MALI_TRUE;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, new_list,
+				    struct mali_group, executor_list) {
+		new_counted++;
+	}
+
+	if (MALI_FALSE == found) {
+		if (old_list == &group_list_idle) {
+			MALI_DEBUG_PRINT(1, (" old Group list is idle,"));
+		} else if (old_list == &group_list_inactive) {
+			MALI_DEBUG_PRINT(1, (" old Group list is inactive,"));
+		} else if (old_list == &group_list_working) {
+			MALI_DEBUG_PRINT(1, (" old Group list is working,"));
+		} else if (old_list == &group_list_disabled) {
+			MALI_DEBUG_PRINT(1, (" old Group list is disable,"));
+		}
+
+		if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_WORKING)) {
+			MALI_DEBUG_PRINT(1, (" group in working \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_INACTIVE)) {
+			MALI_DEBUG_PRINT(1, (" group in inactive \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_IDLE)) {
+			MALI_DEBUG_PRINT(1, (" group in idle \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)) {
+			MALI_DEBUG_PRINT(1, (" but group in disabled \n"));
+		}
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == found);
+	MALI_DEBUG_ASSERT(0 < (*old_count));
+	MALI_DEBUG_ASSERT((*old_count) == old_counted);
+	MALI_DEBUG_ASSERT((*new_count) == new_counted);
+#endif
+
+	_mali_osk_list_move(&group->executor_list, new_list);
+	(*old_count)--;
+	(*new_count)++;
+}
+
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	_mali_osk_list_add(&group->executor_list, new_list);
+	(*new_count)++;
+}
+
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (gp_group == group) {
+		if (gp_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else if (virtual_group == group || mali_group_is_in_virtual(group)) {
+		if (virtual_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else {
+		/* Physical PP group */
+		struct mali_group *group_iter;
+		struct mali_group *temp;
+		_mali_osk_list_t *list;
+
+		if (EXEC_STATE_DISABLED == state) {
+			list = &group_list_disabled;
+		} else if (EXEC_STATE_INACTIVE == state) {
+			list = &group_list_inactive;
+		} else if (EXEC_STATE_IDLE == state) {
+			list = &group_list_idle;
+		} else {
+			MALI_DEBUG_ASSERT(EXEC_STATE_WORKING == state);
+			list = &group_list_working;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, list,
+					    struct mali_group, executor_list) {
+			if (group_iter == group) {
+				return MALI_TRUE;
+			}
+		}
+	}
+
+	/* group not in correct state */
+	return MALI_FALSE;
+}
+
+static void mali_executor_group_enable_internal(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	/* Put into inactive state (== "lowest" enabled state) */
+	if (group == gp_group) {
+		MALI_DEBUG_ASSERT(EXEC_STATE_DISABLED == gp_group_state);
+		gp_group_state = EXEC_STATE_INACTIVE;
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_disabled,
+						       &group_list_disabled_count,
+						       &group_list_inactive,
+						       &group_list_inactive_count);
+
+		++num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Enabling group id %d \n", group->pp_core->core_id));
+	}
+
+	if (MALI_GROUP_STATE_ACTIVE == mali_group_activate(group)) {
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_power_is_on(group));
+
+		/* Move from inactive to idle */
+		if (group == gp_group) {
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			mali_executor_change_state_pp_physical(group,
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+
+			if (mali_executor_has_virtual_group()) {
+				if (mali_executor_physical_rejoin_virtual(group)) {
+					mali_pm_update_async();
+				}
+			}
+		}
+	} else {
+		mali_pm_update_async();
+	}
+}
+
+static void mali_executor_group_disable_internal(struct mali_group *group)
+{
+	mali_bool working;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	working = mali_executor_group_is_in_state(group, EXEC_STATE_WORKING);
+	if (MALI_TRUE == working) {
+		/** Group to be disabled once it completes current work,
+		 * when virtual group completes, also check child groups for this flag */
+		mali_group_set_disable_request(group, MALI_TRUE);
+		return;
+	}
+
+	/* Put into disabled state */
+	if (group == gp_group) {
+		/* GP group */
+		MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state);
+		gp_group_state = EXEC_STATE_DISABLED;
+	} else {
+		if (mali_group_is_in_virtual(group)) {
+			/* A child group of virtual group. move the specific group from virtual group */
+			MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state);
+
+			mali_executor_set_state_pp_physical(group,
+							    &group_list_disabled,
+							    &group_list_disabled_count);
+
+			mali_group_remove_group(virtual_group, group);
+			mali_executor_disable_empty_virtual();
+		} else {
+			mali_executor_change_group_status_disabled(group);
+		}
+
+		--num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Disabling group id %d \n", group->pp_core->core_id));
+	}
+
+	if (MALI_GROUP_STATE_INACTIVE != group->state) {
+		if (MALI_TRUE == mali_group_deactivate(group)) {
+			mali_pm_update_async();
+		}
+	}
+}
+
+static void mali_executor_notify_core_change(u32 num_cores)
+{
+	mali_bool done = MALI_FALSE;
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		return;
+	}
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (0 == num_sessions_alloc) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (NULL == notobjs) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			/* there is probably no point in trying again, system must be really low on memory and probably unusable now anyway */
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE, sizeof(_mali_uk_pp_num_cores_changed_s));
+			if (NULL != notobjs[i]) {
+				_mali_uk_pp_num_cores_changed_s *data = notobjs[i]->result_buffer;
+				data->number_of_enabled_cores = num_cores;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (NULL != notobjs[used_notification_objects]) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (NULL != notobjs[used_notification_objects]) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+static mali_bool mali_executor_core_scaling_is_done(void *data)
+{
+	u32 i;
+	u32 num_groups;
+	mali_bool ret = MALI_TRUE;
+
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			if (MALI_TRUE == group->disable_requested && NULL != mali_group_get_pp_core(group)) {
+				ret = MALI_FALSE;
+				break;
+			}
+		}
+	}
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static void mali_executor_wq_notify_core_change(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		return;
+	}
+
+	_mali_osk_wait_queue_wait_event(executor_notify_core_change_wait_queue,
+					mali_executor_core_scaling_is_done, NULL);
+
+	mali_executor_notify_core_change(num_physical_pp_cores_enabled);
+}
+
+/**
+ * Clear all disable request from the _last_ core scaling behavior.
+ */
+static void mali_executor_core_scaling_reset(void)
+{
+	u32 i;
+	u32 num_groups;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			group->disable_requested = MALI_FALSE;
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		core_scaling_delay_up_mask[i] = 0;
+	}
+}
+
+static void mali_executor_core_scale(unsigned int target_core_nr)
+{
+	int current_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int target_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int i;
+
+	MALI_DEBUG_ASSERT(0 < target_core_nr);
+	MALI_DEBUG_ASSERT(num_physical_pp_cores_total >= target_core_nr);
+
+	mali_executor_lock();
+
+	if (target_core_nr < num_physical_pp_cores_enabled) {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: disabling %d cores\n", target_core_nr, num_physical_pp_cores_enabled - target_core_nr));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: enabling %d cores\n", target_core_nr, target_core_nr - num_physical_pp_cores_enabled));
+	}
+
+	/* When a new core scaling request is comming,  we should remove the un-doing
+	 * part of the last core scaling request.  It's safe because we have only one
+	 * lock(executor lock) protection. */
+	mali_executor_core_scaling_reset();
+
+	mali_pm_get_best_power_cost_mask(num_physical_pp_cores_enabled, current_core_scaling_mask);
+	mali_pm_get_best_power_cost_mask(target_core_nr, target_core_scaling_mask);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		target_core_scaling_mask[i] = target_core_scaling_mask[i] - current_core_scaling_mask[i];
+		MALI_DEBUG_PRINT(5, ("target_core_scaling_mask[%d] = %d\n", i, target_core_scaling_mask[i]));
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 > target_core_scaling_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_disable_internal(group);
+						target_core_scaling_mask[i]++;
+						if ((0 == target_core_scaling_mask[i])) {
+							break;
+						}
+
+					}
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		/**
+		 * Target_core_scaling_mask[i] is bigger than 0,
+		 * means we need to enable some pp cores in
+		 * this domain whose domain index is i.
+		 */
+		if (0 < target_core_scaling_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			if (num_physical_pp_cores_enabled >= target_core_nr) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_enable_internal(group);
+						target_core_scaling_mask[i]--;
+
+						if ((0 == target_core_scaling_mask[i]) || num_physical_pp_cores_enabled == target_core_nr) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * Here, we may still have some pp cores not been enabled because of some
+	 * pp cores need to be disabled are still in working state.
+	 */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 < target_core_scaling_mask[i]) {
+			core_scaling_delay_up_mask[i] = target_core_scaling_mask[i];
+		}
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group)
+{
+	int num_pp_cores_disabled = 0;
+	int num_pp_cores_to_enable = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_disable_requested(group));
+
+	/* Disable child group of virtual group */
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *child;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			if (MALI_TRUE == mali_group_disable_requested(child)) {
+				mali_group_set_disable_request(child, MALI_FALSE);
+				mali_executor_group_disable_internal(child);
+				num_pp_cores_disabled++;
+			}
+		}
+		mali_group_set_disable_request(group, MALI_FALSE);
+	} else {
+		mali_executor_group_disable_internal(group);
+		mali_group_set_disable_request(group, MALI_FALSE);
+		if (NULL != mali_group_get_pp_core(group)) {
+			num_pp_cores_disabled++;
+		}
+	}
+
+	num_pp_cores_to_enable = num_pp_cores_disabled;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 < core_scaling_delay_up_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			if (0 == num_pp_cores_to_enable) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *disabled_group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(disabled_group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(disabled_group) && mali_executor_group_is_in_state(disabled_group, EXEC_STATE_DISABLED)) {
+						mali_executor_group_enable_internal(disabled_group);
+						core_scaling_delay_up_mask[i]--;
+						num_pp_cores_to_enable--;
+
+						if ((0 == core_scaling_delay_up_mask[i]) || 0 == num_pp_cores_to_enable) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	_mali_osk_wait_queue_wake_up(executor_notify_core_change_wait_queue);
+}
+
+static void mali_executor_change_group_status_disabled(struct mali_group *group)
+{
+	/* Physical PP group */
+	mali_bool idle;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	idle = mali_executor_group_is_in_state(group, EXEC_STATE_IDLE);
+	if (MALI_TRUE == idle) {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_idle,
+						       &group_list_idle_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_inactive,
+						       &group_list_inactive_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	}
+}
+
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	if (group_list_idle_count > 0) {
+		if (mali_executor_has_virtual_group()) {
+
+			/* Rejoin virtual group on Mali-450 */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_executor_physical_rejoin_virtual(
+					    group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+			}
+		} else if (deactivate_idle_group) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			/* Deactivate group on Mali-300/400 */
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_group_deactivate(group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				/* Move from idle to inactive */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_inactive,
+								       &group_list_inactive_count);
+			}
+		}
+	}
+
+	return trigger_pm_update;
+}
+
+void mali_executor_running_status_print(void)
+{
+	struct mali_group *group = NULL;
+	struct mali_group *temp = NULL;
+
+	MALI_PRINT(("GP running job: %p\n", gp_group->gp_running_job));
+	if ((gp_group->gp_core) && (gp_group->is_working)) {
+		mali_group_dump_status(gp_group);
+	}
+	MALI_PRINT(("Physical PP groups in WORKING state (count = %u):\n", group_list_working_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		MALI_PRINT(("PP running job: %p, subjob %d \n", group->pp_running_job, group->pp_running_sub_job));
+		mali_group_dump_status(group);
+	}
+	MALI_PRINT(("Physical PP groups in INACTIVE state (count = %u):\n", group_list_inactive_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+	MALI_PRINT(("Physical PP groups in IDLE state (count = %u):\n", group_list_idle_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+	MALI_PRINT(("Physical PP groups in DISABLED state (count = %u):\n", group_list_disabled_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		MALI_PRINT(("Virtual group running job: %p\n", virtual_group->pp_running_job));
+		MALI_PRINT(("Virtual group status: %d\n", virtual_group_state));
+		MALI_PRINT(("Virtual group->status: %d\n", virtual_group->state));
+		MALI_PRINT(("\tSW power: %s\n", virtual_group->power_is_on ? "On" : "Off"));
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp, &virtual_group->group_list,
+					    struct mali_group, group_list) {
+			int i = 0;
+			MALI_PRINT(("\tchild group(%s) running job: %p\n", group->pp_core->hw_core.description, group->pp_running_job));
+			MALI_PRINT(("\tchild group(%s)->status: %d\n", group->pp_core->hw_core.description, group->state));
+			MALI_PRINT(("\tchild group(%s) SW power: %s\n", group->pp_core->hw_core.description, group->power_is_on ? "On" : "Off"));
+#if MALI_STATE_TRACKING
+			if (group->pm_domain) {
+				MALI_PRINT(("\tPower domain: id %u\n", mali_pm_domain_get_id(group->pm_domain)));
+				MALI_PRINT(("\tMask:0x%04x \n", mali_pm_domain_get_mask(group->pm_domain)));
+				MALI_PRINT(("\tUse-count:%u \n", mali_pm_domain_get_use_count(group->pm_domain)));
+				MALI_PRINT(("\tCurrent power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_current_mask()) ? "On" : "Off"));
+				MALI_PRINT(("\tWanted  power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_wanted_mask()) ? "On" : "Off"));
+			}
+#endif
+
+			for (i = 0; i < 2; i++) {
+				if (NULL != group->l2_cache_core[i]) {
+					struct mali_pm_domain *domain;
+					domain = mali_l2_cache_get_pm_domain(group->l2_cache_core[i]);
+					MALI_PRINT(("\t L2(index %d) group SW power: %s\n", i, group->l2_cache_core[i]->power_is_on ? "On" : "Off"));
+#if MALI_STATE_TRACKING
+					if (domain) {
+						MALI_PRINT(("\tL2 Power domain: id %u\n", mali_pm_domain_get_id(domain)));
+						MALI_PRINT(("\tL2 Mask:0x%04x \n", mali_pm_domain_get_mask(domain)));
+						MALI_PRINT(("\tL2 Use-count:%u \n", mali_pm_domain_get_use_count(domain)));
+						MALI_PRINT(("\tL2 Current power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_current_mask()) ? "On" : "Off"));
+						MALI_PRINT(("\tL2 Wanted  power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_wanted_mask()) ? "On" : "Off"));
+					}
+#endif
+				}
+			}
+		}
+		if (EXEC_STATE_WORKING == virtual_group_state) {
+			mali_group_dump_status(virtual_group);
+		}
+	}
+}
+
+void mali_executor_status_dump(void)
+{
+	mali_executor_lock();
+	mali_scheduler_lock();
+
+	/* print schedule queue status */
+	mali_scheduler_gp_pp_job_queue_print();
+
+	mali_scheduler_unlock();
+	mali_executor_unlock();
+}
diff --git a/utgard/r6p2/common/mali_executor.h b/utgard/r6p2/common/mali_executor.h
new file mode 100755
index 0000000..1d69dc3
--- /dev/null
+++ b/utgard/r6p2/common/mali_executor.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2012, 2014-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_EXECUTOR_H__
+#define __MALI_EXECUTOR_H__
+
+#include "mali_osk.h"
+#include "mali_scheduler_types.h"
+#include "mali_kernel_common.h"
+
+typedef enum {
+	MALI_EXECUTOR_HINT_GP_BOUND = 0
+#define MALI_EXECUTOR_HINT_MAX        1
+} mali_executor_hint;
+
+extern mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/* forward declare struct instead of using include */
+struct mali_session_data;
+struct mali_group;
+struct mali_pp_core;
+
+extern _mali_osk_spinlock_irq_t *mali_executor_lock_obj;
+
+#define MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+_mali_osk_errcode_t mali_executor_initialize(void);
+void mali_executor_terminate(void);
+
+void mali_executor_populate(void);
+void mali_executor_depopulate(void);
+
+void mali_executor_suspend(void);
+void mali_executor_resume(void);
+
+u32 mali_executor_get_num_cores_total(void);
+u32 mali_executor_get_num_cores_enabled(void);
+struct mali_pp_core *mali_executor_get_virtual_pp(void);
+struct mali_group *mali_executor_get_virtual_group(void);
+
+void mali_executor_zap_all_active(struct mali_session_data *session);
+
+/**
+ * Schedule GP and PP according to bitmask.
+ *
+ * @param mask A scheduling bitmask.
+ * @param deferred_schedule MALI_TRUE if schedule should be deferred, MALI_FALSE if not.
+ */
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule);
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group, mali_bool in_upper_half);
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups);
+void mali_executor_group_power_down(struct mali_group *groups[], u32 num_groups);
+
+void mali_executor_abort_session(struct mali_session_data *session);
+
+void mali_executor_core_scaling_enable(void);
+void mali_executor_core_scaling_disable(void);
+mali_bool mali_executor_core_scaling_is_enabled(void);
+
+void mali_executor_group_enable(struct mali_group *group);
+void mali_executor_group_disable(struct mali_group *group);
+mali_bool mali_executor_group_is_disabled(struct mali_group *group);
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override);
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size);
+#endif
+
+MALI_STATIC_INLINE void mali_executor_hint_enable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_executor_hint_disable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_executor_hint_is_enabled(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	return mali_executor_hints[hint];
+}
+
+void mali_executor_running_status_print(void);
+void mali_executor_status_dump(void);
+void mali_executor_lock(void);
+void mali_executor_unlock(void);
+#endif /* __MALI_EXECUTOR_H__ */
diff --git a/utgard/r6p2/common/mali_gp.c b/utgard/r6p2/common/mali_gp.c
new file mode 100755
index 0000000..60da8ac
--- /dev/null
+++ b/utgard/r6p2/common/mali_gp.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "regs/mali_gp_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+#include <mali_platform.h>
+
+static struct mali_gp_core *mali_global_gp_core = NULL;
+
+/* Interrupt handlers */
+static void mali_gp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group)
+{
+	struct mali_gp_core *core = NULL;
+
+	MALI_DEBUG_ASSERT(NULL == mali_global_gp_core);
+	MALI_DEBUG_PRINT(2, ("Mali GP: Creating Mali GP core: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_gp_core));
+	if (NULL != core) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALIGP2_REGISTER_ADDRESS_SPACE_SIZE)) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_gp_reset(core);
+
+			if (_MALI_OSK_ERR_OK == ret) {
+				ret = mali_group_add_gp_core(group, core);
+				if (_MALI_OSK_ERR_OK == ret) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_gp,
+								       group,
+								       mali_gp_irq_probe_trigger,
+								       mali_gp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (NULL != core->irq) {
+						MALI_DEBUG_PRINT(4, ("Mali GP: set global gp core from 0x%08X to 0x%08X\n", mali_global_gp_core, core));
+						mali_global_gp_core = core;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali GP: Failed to setup interrupt handlers for GP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_gp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali GP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for GP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_gp_delete(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+	mali_global_gp_core = NULL;
+	_mali_osk_free(core);
+}
+
+void mali_gp_stop_bus(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_gp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_SLOW; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS) & MALIGP2_REG_VAL_STATUS_BUS_STOPPED) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_SLOW == i) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to stop bus on %s\n", core->hw_core.description));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_gp_hard_reset(struct mali_gp_core *core)
+{
+	const u32 reset_wait_target_register = MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	const u32 reset_default_value = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(4, ("Mali GP: Hard reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali GP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_default_value); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+}
+
+void mali_gp_reset_async(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALI400GP_REG_VAL_IRQ_RESET_COMPLETED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALI400GP_REG_VAL_CMD_SOFT_RESET);
+
+}
+
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		rawstat = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+		if (rawstat & MALI400GP_REG_VAL_IRQ_RESET_COMPLETED) {
+			break;
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core)
+{
+	mali_gp_reset_async(core);
+	return mali_gp_reset_wait(core);
+}
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 startcmd = 0;
+	u32 *frame_registers = mali_gp_job_get_frame_registers(job);
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	if (mali_gp_job_has_vs_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_VS;
+	}
+
+	if (mali_gp_job_has_plbu_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_PLBU;
+	}
+
+	MALI_DEBUG_ASSERT(0 != startcmd);
+
+	mali_hw_core_register_write_array_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR, frame_registers, MALIGP2_NUM_REGS_FRAME);
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali GP: Starting job (0x%08x) on core %s with command 0x%08X\n", job, core->hw_core.description, startcmd));
+
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_GP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, startcmd);
+#else
+	{
+		u32 bits = 0;
+
+		if (mali_gp_job_has_vs_job(job))
+			bits = MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+		if (mali_gp_job_has_plbu_job(job))
+			bits |= MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+
+		mali_hw_core_register_write_relaxed(&core->hw_core,
+						    MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, bits);
+	}
+#endif
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr)
+{
+	u32 irq_readout;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+
+	if (irq_readout & MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | MALIGP2_REG_VAL_IRQ_HANG));
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); /* re-enable interrupts */
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR, start_addr);
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR, end_addr);
+
+		MALI_DEBUG_PRINT(3, ("Mali GP: Resuming job\n"));
+
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+		_mali_osk_write_mem_barrier();
+	}
+	/*
+	 * else: core has been reset between PLBU_OUT_OF_MEM interrupt and this new heap response.
+	 * A timeout or a page fault on Mali-200 PP core can cause this behaviour.
+	 */
+}
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void)
+{
+	return mali_global_gp_core;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_gp_irq_probe_trigger(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT);
+	if (MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+/* ------ local helper functions below --------- */
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tGP: %s\n", core->hw_core.description);
+
+	return n;
+}
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		val0 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_gp_job_set_perf_counter_value0(job, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C0, val0);
+		_mali_osk_profiling_record_global_counters(COUNTER_VP_0_C0, val0);
+#endif
+
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		val1 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_gp_job_set_perf_counter_value1(job, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C1, val1);
+		_mali_osk_profiling_record_global_counters(COUNTER_VP_0_C1, val1);
+#endif
+	}
+}
diff --git a/utgard/r6p2/common/mali_gp.h b/utgard/r6p2/common/mali_gp.h
new file mode 100755
index 0000000..ecbe70e
--- /dev/null
+++ b/utgard/r6p2/common/mali_gp.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_H__
+#define __MALI_GP_H__
+
+#include "mali_osk.h"
+#include "mali_gp_job.h"
+#include "mali_hw_core.h"
+#include "regs/mali_gp_regs.h"
+
+struct mali_group;
+
+/**
+ * Definition of the GP core struct
+ * Used to track a GP core in the system.
+ */
+struct mali_gp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_gp_initialize(void);
+void mali_gp_terminate(void);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group);
+void mali_gp_delete(struct mali_gp_core *core);
+
+void mali_gp_stop_bus(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core);
+void mali_gp_reset_async(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core);
+void mali_gp_hard_reset(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core);
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job);
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr);
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core);
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size);
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job);
+
+MALI_STATIC_INLINE const char *mali_gp_core_description(struct mali_gp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_gp_get_interrupt_result(struct mali_gp_core *core)
+{
+	u32 stat_used = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT) &
+			MALIGP2_REG_VAL_IRQ_MASK_USED;
+
+	if (0 == stat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if ((MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST |
+		    MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST) == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	} else if (MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_VS;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_PLBU;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM & stat_used) {
+		return MALI_INTERRUPT_RESULT_OOM;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_get_rawstat(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_is_active(struct mali_gp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS);
+	return (status & MALIGP2_REG_VAL_STATUS_MASK_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_gp_mask_all_interrupts(struct mali_gp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_gp_enable_interrupts(struct mali_gp_core *core, enum mali_interrupt_result exceptions)
+{
+	/* Enable all interrupts, except those specified in exceptions */
+	u32 value;
+
+	if (MALI_INTERRUPT_RESULT_SUCCESS_VS == exceptions) {
+		/* Enable all used except VS complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+	} else {
+		MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_SUCCESS_PLBU ==
+				  exceptions);
+		/* Enable all used except PLBU complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+	}
+
+	mali_hw_core_register_write(&core->hw_core,
+				    MALIGP2_REG_ADDR_MGMT_INT_MASK,
+				    value);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_read_plbu_alloc_start_addr(struct mali_gp_core *core)
+{
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR);
+}
+
+#endif /* __MALI_GP_H__ */
diff --git a/utgard/r6p2/common/mali_gp_job.c b/utgard/r6p2/common/mali_gp_job.c
new file mode 100755
index 0000000..8dd19cb
--- /dev/null
+++ b/utgard/r6p2/common/mali_gp_job.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_defer_bind.h"
+
+static u32 gp_counter_src0 = MALI_HW_CORE_NO_COUNTER;      /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 gp_counter_src1 = MALI_HW_CORE_NO_COUNTER;           /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job);
+
+
+static int _mali_gp_add_varying_allocations(struct mali_session_data *session,
+		struct mali_gp_job *job,
+		u32 *alloc,
+		u32 num)
+{
+	int i = 0;
+	struct mali_gp_allocation_node *alloc_node;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	for (i = 0 ; i < num ; i++) {
+		MALI_DEBUG_ASSERT(alloc[i]);
+		alloc_node = _mali_osk_calloc(1, sizeof(struct mali_gp_allocation_node));
+		if (alloc_node) {
+			INIT_LIST_HEAD(&alloc_node->node);
+			/* find mali allocation structure by vaddress*/
+			mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, alloc[i], 0);
+
+			if (likely(mali_vma_node)) {
+				mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+				MALI_DEBUG_ASSERT(alloc[i] == mali_vma_node->vm_node.start);
+			} else {
+				MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,can't find allocation %d by address =0x%x, num=%d\n", i, alloc[i], num));
+				_mali_osk_free(alloc_node);
+				goto fail;
+			}
+			alloc_node->alloc = mali_alloc;
+			/* add to gp job varying alloc list*/
+			list_move(&alloc_node->node, &job->varying_alloc);
+		} else
+			goto fail;
+	}
+
+	return 0;
+fail:
+	MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,failed to alloc memory!\n"));
+	_mali_gp_del_varying_allocations(job);
+	return -1;
+}
+
+
+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job)
+{
+	struct mali_gp_allocation_node *alloc_node, *tmp_node;
+
+	list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) {
+		list_del(&alloc_node->node);
+		kfree(alloc_node);
+	}
+	INIT_LIST_HEAD(&job->varying_alloc);
+}
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker)
+{
+	struct mali_gp_job *job;
+	u32 perf_counter_flag;
+	u32 __user *memory_list = NULL;
+	struct mali_gp_allocation_node *alloc_node, *tmp_node;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_gp_job));
+	if (NULL != job) {
+		job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_FINISHED, sizeof(_mali_uk_gp_job_finished_s));
+		if (NULL == job->finished_notification) {
+			goto fail3;
+		}
+
+		job->oom_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_STALLED, sizeof(_mali_uk_gp_job_suspended_s));
+		if (NULL == job->oom_notification) {
+			goto fail2;
+		}
+
+		if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_gp_start_job_s))) {
+			goto fail1;
+		}
+
+		perf_counter_flag = mali_gp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			mali_gp_job_set_perf_counter_src0(job, mali_gp_job_get_gp_counter_src0());
+			mali_gp_job_set_perf_counter_src1(job, mali_gp_job_get_gp_counter_src1());
+		}
+
+		_mali_osk_list_init(&job->list);
+		job->session = session;
+		job->id = id;
+		job->heap_current_addr = job->uargs.frame_registers[4];
+		job->perf_counter_value0 = 0;
+		job->perf_counter_value1 = 0;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+
+		INIT_LIST_HEAD(&job->varying_alloc);
+		INIT_LIST_HEAD(&job->vary_todo);
+		job->dmem = NULL;
+
+		if (job->uargs.deferred_mem_num > session->allocation_mgr.mali_allocation_num) {
+			MALI_PRINT_ERROR(("Mali GP job: The number of  varying buffer to defer bind  is invalid !\n"));
+			goto fail1;
+		}
+
+		/* add varying allocation list*/
+		if (job->uargs.deferred_mem_num > 0) {
+			/* copy varying list from user space*/
+			job->varying_list = _mali_osk_calloc(1, sizeof(u32) * job->uargs.deferred_mem_num);
+			if (!job->varying_list) {
+				MALI_PRINT_ERROR(("Mali GP job: allocate varying_list failed varying_alloc_num = %d !\n", job->uargs.deferred_mem_num));
+				goto fail1;
+			}
+
+			memory_list = (u32 __user *)(uintptr_t)uargs->deferred_mem_list;
+
+			if (0 != _mali_osk_copy_from_user(job->varying_list, memory_list, sizeof(u32) * job->uargs.deferred_mem_num)) {
+				MALI_PRINT_ERROR(("Mali GP job: Failed to copy varying list from user space!\n"));
+				goto fail;
+			}
+
+			if (unlikely(_mali_gp_add_varying_allocations(session, job, job->varying_list,
+					job->uargs.deferred_mem_num))) {
+				MALI_PRINT_ERROR(("Mali GP job: _mali_gp_add_varying_allocations failed!\n"));
+				goto fail;
+			}
+
+			/* do preparetion for each allocation */
+			list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) {
+				if (unlikely(_MALI_OSK_ERR_OK != mali_mem_defer_bind_allocation_prepare(alloc_node->alloc, &job->vary_todo, &job->required_varying_memsize))) {
+					MALI_PRINT_ERROR(("Mali GP job: mali_mem_defer_bind_allocation_prepare failed!\n"));
+					goto fail;
+				}
+			}
+
+			_mali_gp_del_varying_allocations(job);
+
+			/* bind varying here, to avoid memory latency issue. */
+			{
+				struct mali_defer_mem_block dmem_block;
+
+				INIT_LIST_HEAD(&dmem_block.free_pages);
+				atomic_set(&dmem_block.num_free_pages, 0);
+
+				if (mali_mem_prepare_mem_for_job(job, &dmem_block)) {
+					MALI_PRINT_ERROR(("Mali GP job: mali_mem_prepare_mem_for_job failed!\n"));
+					goto fail;
+				}
+				if (_MALI_OSK_ERR_OK != mali_mem_defer_bind(job, &dmem_block)) {
+					MALI_PRINT_ERROR(("gp job create, mali_mem_defer_bind failed! GP %x fail!", job));
+					goto fail;
+				}
+			}
+
+			if (uargs->varying_memsize > MALI_UK_BIG_VARYING_SIZE) {
+				job->big_job = 1;
+			}
+		}
+		job->pp_tracker = pp_tracker;
+		if (NULL != job->pp_tracker) {
+			/* Take a reference on PP job's tracker that will be released when the GP
+			   job is done. */
+			mali_timeline_system_tracker_get(session->timeline_system, pp_tracker);
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_GP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		return job;
+	} else {
+		MALI_PRINT_ERROR(("Mali GP job: _mali_osk_calloc failed!\n"));
+		return NULL;
+	}
+
+
+fail:
+	_mali_osk_free(job->varying_list);
+	/* Handle allocate fail here, free all varying node */
+	{
+		struct mali_backend_bind_list *bkn, *bkn_tmp;
+		list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) {
+			list_del(&bkn->node);
+			_mali_osk_free(bkn);
+		}
+	}
+fail1:
+	_mali_osk_notification_delete(job->oom_notification);
+fail2:
+	_mali_osk_notification_delete(job->finished_notification);
+fail3:
+	_mali_osk_free(job);
+	return NULL;
+}
+
+void mali_gp_job_delete(struct mali_gp_job *job)
+{
+	struct mali_backend_bind_list *bkn, *bkn_tmp;
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(NULL == job->pp_tracker);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	_mali_osk_free(job->varying_list);
+
+	/* Handle allocate fail here, free all varying node */
+	list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) {
+		list_del(&bkn->node);
+		_mali_osk_free(bkn);
+	}
+
+	mali_mem_defer_dmem_free(job);
+
+	/* de-allocate the pre-allocated oom notifications */
+	if (NULL != job->oom_notification) {
+		_mali_osk_notification_delete(job->oom_notification);
+		job->oom_notification = NULL;
+	}
+	if (NULL != job->finished_notification) {
+		_mali_osk_notification_delete(job->finished_notification);
+		job->finished_notification = NULL;
+	}
+
+	_mali_osk_free(job);
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_gp_job *iter;
+	struct mali_gp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_gp_job, list) {
+
+		/* A span is used to handle job ID wrapping. */
+		bool job_is_after = (mali_gp_job_get_id(job) -
+				     mali_gp_job_get_id(iter)) <
+				    MALI_SCHEDULER_JOB_ID_SPAN;
+
+		if (job_is_after) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+u32 mali_gp_job_get_gp_counter_src0(void)
+{
+	return gp_counter_src0;
+}
+
+void mali_gp_job_set_gp_counter_src0(u32 counter)
+{
+	gp_counter_src0 = counter;
+}
+
+u32 mali_gp_job_get_gp_counter_src1(void)
+{
+	return gp_counter_src1;
+}
+
+void mali_gp_job_set_gp_counter_src1(u32 counter)
+{
+	gp_counter_src1 = counter;
+}
+
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (NULL != job->pp_tracker) {
+		schedule_mask |= mali_timeline_system_tracker_put(job->session->timeline_system, job->pp_tracker, MALI_FALSE == success);
+		job->pp_tracker = NULL;
+	}
+
+	return schedule_mask;
+}
diff --git a/utgard/r6p2/common/mali_gp_job.h b/utgard/r6p2/common/mali_gp_job.h
new file mode 100755
index 0000000..6a67543
--- /dev/null
+++ b/utgard/r6p2/common/mali_gp_job.h
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_JOB_H__
+#define __MALI_GP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_timeline.h"
+#include "mali_scheduler_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#include "mali_timeline.h"
+
+struct mali_defer_mem;
+/**
+ * This structure represents a GP job
+ *
+ * The GP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the GP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_gp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_gp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	struct mali_timeline_tracker *pp_tracker;          /**< Pointer to Timeline tracker for PP job that depends on this job. */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+
+	/*
+	 * These members are used by the executor and/or group,
+	 * protected by executor lock
+	 */
+	_mali_osk_notification_t *oom_notification;        /**< Notification sent back to userspace on OOM */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 heap_current_addr;                             /**< Holds the current HEAP address when the job has completed */
+	u32 perf_counter_value0;                           /**< Value of performance counter 0 (to be returned to user space) */
+	u32 perf_counter_value1;                           /**< Value of performance counter 1 (to be returned to user space) */
+	struct mali_defer_mem *dmem;                                          /** < used for defer bind to store dmem info */
+	struct list_head varying_alloc;                    /**< hold the list of varying allocations */
+	u32 bind_flag;                                     /** < flag for deferbind*/
+	u32 *varying_list;                                 /**< varying memory list need to to defer bind*/
+	struct list_head vary_todo;                        /**< list of backend list need to do defer bind*/
+	u32 required_varying_memsize;                      /** < size of varying memory to reallocate*/
+	u32 big_job;                                       /** < if the gp job have large varying output and may take long time*/
+};
+
+#define MALI_DEFER_BIND_MEMORY_PREPARED (0x1 << 0)
+#define MALI_DEFER_BIND_MEMORY_BINDED (0x1 << 2)
+
+struct mali_gp_allocation_node {
+	struct list_head node;
+	mali_mem_allocation *alloc;
+};
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker);
+void mali_gp_job_delete(struct mali_gp_job *job);
+
+u32 mali_gp_job_get_gp_counter_src0(void);
+void mali_gp_job_set_gp_counter_src0(u32 counter);
+u32 mali_gp_job_get_gp_counter_src1(void);
+void mali_gp_job_set_gp_counter_src1(u32 counter);
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_cache_order(struct mali_gp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_cache_order(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_gp_job_get_user_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_frame_builder_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_flush_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_pid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_tid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_frame_registers(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_gp_job_get_session(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_vs_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[0] != job->uargs.frame_registers[1]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_plbu_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[2] != job->uargs.frame_registers[3]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_current_heap_addr(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->heap_current_addr;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_current_heap_addr(struct mali_gp_job *job, u32 heap_addr)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->heap_current_addr = heap_addr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_flag(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src1;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src0(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src0 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src1(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src1 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value0(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0 = value;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value1(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1 = value;
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_gp_job_list_move(struct mali_gp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_gp_job_list_remove(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_gp_job_get_finished_notification(struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *mali_gp_job_get_oom_notification(
+	struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job->oom_notification);
+
+	notification = job->oom_notification;
+	job->oom_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_oom_notification(
+	struct mali_gp_job *job,
+	_mali_osk_notification_t *notification)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(NULL == job->oom_notification);
+	job->oom_notification = notification;
+}
+
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_gp_job_get_tracker(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_timeline_point_ptr(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+/**
+ * Release reference on tracker for PP job that depends on this GP job.
+ *
+ * @note If GP job has a reference on tracker, this function MUST be called before the GP job is
+ * deleted.
+ *
+ * @param job GP job that is done.
+ * @param success MALI_TRUE if job completed successfully, MALI_FALSE if not.
+ * @return A scheduling bitmask indicating whether scheduling needs to be done.
+ */
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success);
+
+#endif /* __MALI_GP_JOB_H__ */
diff --git a/utgard/r6p2/common/mali_group.c b/utgard/r6p2/common/mali_group.c
new file mode 100755
index 0000000..61ed7cf
--- /dev/null
+++ b/utgard/r6p2/common/mali_group.c
@@ -0,0 +1,1949 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+#include "mali_kernel_common.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_mmu.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_scheduler.h"
+#include "mali_osk_profiling.h"
+#include "mali_osk_mali.h"
+#include "mali_pm_domain.h"
+#include "mali_pm.h"
+#include "mali_executor.h"
+#include <mali_platform.h>
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+
+#define MALI_MAX_NUM_DOMAIN_REFS (MALI_MAX_NUMBER_OF_GROUPS * 2)
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num);
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+static struct mali_group *mali_global_groups[MALI_MAX_NUMBER_OF_GROUPS] = { NULL, };
+static u32 mali_global_num_groups = 0;
+
+/* SW timer for job execution */
+int mali_max_job_runtime = MALI_MAX_JOB_RUNTIME_DEFAULT;
+
+/* local helper functions */
+static void mali_group_bottom_half_mmu(void *data);
+static void mali_group_bottom_half_gp(void *data);
+static void mali_group_bottom_half_pp(void *data);
+static void mali_group_timeout(void *data);
+static void mali_group_reset_pp(struct mali_group *group);
+static void mali_group_reset_mmu(struct mali_group *group);
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session, mali_bool is_reload);
+static void mali_group_recovery_reset(struct mali_group *group);
+
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+        struct mali_dlbu_core *dlbu,
+        struct mali_bcast_unit *bcast,
+        u32 domain_index)
+{
+    struct mali_group *group = NULL;
+
+    if (mali_global_num_groups >= MALI_MAX_NUMBER_OF_GROUPS) {
+        MALI_PRINT_ERROR(("Mali group: Too many group objects created\n"));
+        return NULL;
+    }
+
+    group = _mali_osk_calloc(1, sizeof(struct mali_group));
+    if (NULL != group) {
+        group->timeout_timer = _mali_osk_timer_init();
+        if (NULL != group->timeout_timer) {
+            _mali_osk_timer_setcallback(group->timeout_timer, mali_group_timeout, (void *)group);
+
+            group->l2_cache_core[0] = core;
+            _mali_osk_list_init(&group->group_list);
+            _mali_osk_list_init(&group->executor_list);
+            _mali_osk_list_init(&group->pm_domain_list);
+            group->bcast_core = bcast;
+            group->dlbu_core = dlbu;
+
+            /* register this object as a part of the correct power domain */
+            if ((NULL != core) || (NULL != dlbu) || (NULL != bcast))
+                group->pm_domain = mali_pm_register_group(domain_index, group);
+
+            mali_global_groups[mali_global_num_groups] = group;
+            mali_global_num_groups++;
+
+            return group;
+        }
+        _mali_osk_free(group);
+    }
+
+    return NULL;
+}
+
+void mali_group_delete(struct mali_group *group)
+{
+    u32 i;
+
+    MALI_DEBUG_PRINT(4, ("Deleting group %s\n",
+                mali_group_core_description(group)));
+
+    MALI_DEBUG_ASSERT(NULL == group->parent_group);
+    MALI_DEBUG_ASSERT((MALI_GROUP_STATE_INACTIVE == group->state) || ((MALI_GROUP_STATE_ACTIVATION_PENDING == group->state)));
+
+    /* Delete the resources that this group owns */
+    if (NULL != group->gp_core) {
+        mali_gp_delete(group->gp_core);
+    }
+
+    if (NULL != group->pp_core) {
+        mali_pp_delete(group->pp_core);
+    }
+
+    if (NULL != group->mmu) {
+        mali_mmu_delete(group->mmu);
+    }
+
+    if (mali_group_is_virtual(group)) {
+        /* Remove all groups from virtual group */
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            child->parent_group = NULL;
+            mali_group_delete(child);
+        }
+
+        mali_dlbu_delete(group->dlbu_core);
+
+        if (NULL != group->bcast_core) {
+            mali_bcast_unit_delete(group->bcast_core);
+        }
+    }
+
+    for (i = 0; i < mali_global_num_groups; i++) {
+        if (mali_global_groups[i] == group) {
+            mali_global_groups[i] = NULL;
+            mali_global_num_groups--;
+
+            if (i != mali_global_num_groups) {
+                /* We removed a group from the middle of the array -- move the last
+                 * group to the current position to close the gap */
+                mali_global_groups[i] = mali_global_groups[mali_global_num_groups];
+                mali_global_groups[mali_global_num_groups] = NULL;
+            }
+
+            break;
+        }
+    }
+
+    if (NULL != group->timeout_timer) {
+        _mali_osk_timer_del(group->timeout_timer);
+        _mali_osk_timer_term(group->timeout_timer);
+    }
+
+    if (NULL != group->bottom_half_work_mmu) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+    }
+
+    if (NULL != group->bottom_half_work_gp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_gp);
+    }
+
+    if (NULL != group->bottom_half_work_pp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_pp);
+    }
+
+    _mali_osk_free(group);
+}
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group, struct mali_mmu_core *mmu_core)
+{
+    /* This group object now owns the MMU core object */
+    group->mmu = mmu_core;
+    group->bottom_half_work_mmu = _mali_osk_wq_create_work(mali_group_bottom_half_mmu, group);
+    if (NULL == group->bottom_half_work_mmu) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_mmu_core(struct mali_group *group)
+{
+    /* This group object no longer owns the MMU core object */
+    group->mmu = NULL;
+    if (NULL != group->bottom_half_work_mmu) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+    }
+}
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group, struct mali_gp_core *gp_core)
+{
+    /* This group object now owns the GP core object */
+    group->gp_core = gp_core;
+    group->bottom_half_work_gp = _mali_osk_wq_create_work(mali_group_bottom_half_gp, group);
+    if (NULL == group->bottom_half_work_gp) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_gp_core(struct mali_group *group)
+{
+    /* This group object no longer owns the GP core object */
+    group->gp_core = NULL;
+    if (NULL != group->bottom_half_work_gp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_gp);
+    }
+}
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group, struct mali_pp_core *pp_core)
+{
+    /* This group object now owns the PP core object */
+    group->pp_core = pp_core;
+    group->bottom_half_work_pp = _mali_osk_wq_create_work(mali_group_bottom_half_pp, group);
+    if (NULL == group->bottom_half_work_pp) {
+        return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_pp_core(struct mali_group *group)
+{
+    /* This group object no longer owns the PP core object */
+    group->pp_core = NULL;
+    if (NULL != group->bottom_half_work_pp) {
+        _mali_osk_wq_delete_work(group->bottom_half_work_pp);
+    }
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(4, ("Group: Activating group %s\n",
+                mali_group_core_description(group)));
+
+    if (MALI_GROUP_STATE_INACTIVE == group->state) {
+        /* Group is inactive, get PM refs in order to power up */
+
+        /*
+         * We'll take a maximum of 2 power domain references pr group,
+         * one for the group itself, and one for it's L2 cache.
+         */
+        struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+        struct mali_group *groups[MALI_MAX_NUM_DOMAIN_REFS];
+        u32 num_domains = 0;
+        mali_bool all_groups_on;
+
+        /* Deal with child groups first */
+        if (mali_group_is_virtual(group)) {
+            /*
+             * The virtual group might have 0, 1 or 2 L2s in
+             * its l2_cache_core array, but we ignore these and
+             * let the child groups take the needed L2 cache ref
+             * on behalf of the virtual group.
+             * In other words; The L2 refs are taken in pair with
+             * the physical group which the L2 is attached to.
+             */
+            struct mali_group *child;
+            struct mali_group *temp;
+
+            /*
+             * Child group is inactive, get PM
+             * refs in order to power up.
+             */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp,
+                    &group->group_list,
+                    struct mali_group, group_list) {
+                MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE
+                        == child->state);
+
+                child->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+                MALI_DEBUG_ASSERT_POINTER(
+                        child->pm_domain);
+                domains[num_domains] = child->pm_domain;
+                groups[num_domains] = child;
+                num_domains++;
+
+                /*
+                 * Take L2 domain ref for child group.
+                 */
+                MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS
+                        > num_domains);
+                domains[num_domains] = mali_l2_cache_get_pm_domain(
+                        child->l2_cache_core[0]);
+                groups[num_domains] = NULL;
+                MALI_DEBUG_ASSERT(NULL ==
+                        child->l2_cache_core[1]);
+                num_domains++;
+            }
+        } else {
+            /* Take L2 domain ref for physical groups. */
+            MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                    num_domains);
+
+            domains[num_domains] = mali_l2_cache_get_pm_domain(
+                    group->l2_cache_core[0]);
+            groups[num_domains] = NULL;
+            MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]);
+            num_domains++;
+        }
+
+        /* Do the group itself last (it's dependencies first) */
+
+        group->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+        MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+        domains[num_domains] = group->pm_domain;
+        groups[num_domains] = group;
+        num_domains++;
+
+        all_groups_on = mali_pm_get_domain_refs(domains, groups,
+                num_domains);
+
+        /*
+         * Complete activation for group, include
+         * virtual group or physical group.
+         */
+        if (MALI_TRUE == all_groups_on) {
+
+            mali_group_set_active(group);
+        }
+    } else if (MALI_GROUP_STATE_ACTIVE == group->state) {
+        /* Already active */
+        MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+    } else {
+        /*
+         * Activation already pending, group->power_is_on could
+         * be both true or false. We need to wait for power up
+         * notification anyway.
+         */
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING
+                == group->state);
+    }
+
+    MALI_DEBUG_PRINT(4, ("Group: group %s activation result: %s\n",
+                mali_group_core_description(group),
+                MALI_GROUP_STATE_ACTIVE == group->state ?
+                "ACTIVE" : "PENDING"));
+
+    return group->state;
+}
+
+mali_bool mali_group_set_active(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING == group->state);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+
+    MALI_DEBUG_PRINT(4, ("Group: Activation completed for %s\n",
+                mali_group_core_description(group)));
+
+    if (mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+                struct mali_group, group_list) {
+            if (MALI_TRUE != child->power_is_on) {
+                return MALI_FALSE;
+            }
+
+            child->state = MALI_GROUP_STATE_ACTIVE;
+        }
+
+        mali_group_reset(group);
+    }
+
+    /* Go to ACTIVE state */
+    group->state = MALI_GROUP_STATE_ACTIVE;
+
+    return MALI_TRUE;
+}
+
+mali_bool mali_group_deactivate(struct mali_group *group)
+{
+    struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+    u32 num_domains = 0;
+    mali_bool power_down = MALI_FALSE;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE != group->state);
+
+    MALI_DEBUG_PRINT(3, ("Group: Deactivating group %s\n",
+                mali_group_core_description(group)));
+
+    group->state = MALI_GROUP_STATE_INACTIVE;
+
+    MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+    domains[num_domains] = group->pm_domain;
+    num_domains++;
+
+    if (mali_group_is_virtual(group)) {
+        /* Release refs for all child groups */
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp,
+                &group->group_list,
+                struct mali_group, group_list) {
+            child->state = MALI_GROUP_STATE_INACTIVE;
+
+            MALI_DEBUG_ASSERT_POINTER(child->pm_domain);
+            domains[num_domains] = child->pm_domain;
+            num_domains++;
+
+            /* Release L2 cache domain for child groups */
+            MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                    num_domains);
+            domains[num_domains] = mali_l2_cache_get_pm_domain(
+                    child->l2_cache_core[0]);
+            MALI_DEBUG_ASSERT(NULL == child->l2_cache_core[1]);
+            num_domains++;
+        }
+
+        /*
+         * Must do mali_group_power_down() steps right here for
+         * virtual group, because virtual group itself is likely to
+         * stay powered on, however child groups are now very likely
+         * to be powered off (and thus lose their state).
+         */
+
+        mali_group_clear_session(group);
+        /*
+         * Disable the broadcast unit (clear it's mask).
+         * This is needed in case the GPU isn't actually
+         * powered down at this point and groups are
+         * removed from an inactive virtual group.
+         * If not, then the broadcast unit will intercept
+         * their interrupts!
+         */
+        mali_bcast_disable(group->bcast_core);
+    } else {
+        /* Release L2 cache domain for physical groups */
+        MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+                num_domains);
+        domains[num_domains] = mali_l2_cache_get_pm_domain(
+                group->l2_cache_core[0]);
+        MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]);
+        num_domains++;
+    }
+
+    power_down = mali_pm_put_domain_refs(domains, num_domains);
+
+    return power_down;
+}
+
+void mali_group_power_up(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Power up for %s\n",
+                mali_group_core_description(group)));
+
+    group->power_is_on = MALI_TRUE;
+
+    if (MALI_FALSE == mali_group_is_virtual(group)
+            && MALI_FALSE == mali_group_is_in_virtual(group)) {
+        mali_group_reset(group);
+    }
+
+    /*
+     * When we just acquire only one physical group form virt group,
+     * we should remove the bcast&dlbu mask from virt group and
+     * reset bcast and dlbu core, although part of pp cores in virt
+     * group maybe not be powered on.
+     */
+    if (MALI_TRUE == mali_group_is_virtual(group)) {
+        mali_bcast_reset(group->bcast_core);
+        mali_dlbu_update_mask(group->dlbu_core);
+    }
+}
+
+void mali_group_power_down(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Power down for %s\n",
+                mali_group_core_description(group)));
+
+    group->power_is_on = MALI_FALSE;
+
+    if (mali_group_is_virtual(group)) {
+        /*
+         * What we do for physical jobs in this function should
+         * already have been done in mali_group_deactivate()
+         * for virtual group.
+         */
+        MALI_DEBUG_ASSERT(NULL == group->session);
+    } else {
+        mali_group_clear_session(group);
+    }
+}
+
+MALI_DEBUG_CODE(static void mali_group_print_virtual(struct mali_group *vgroup)
+        {
+        u32 i;
+        struct mali_group *group;
+        struct mali_group *temp;
+
+        MALI_DEBUG_PRINT(4, ("Virtual group %s (%p)\n",
+                mali_group_core_description(vgroup),
+                vgroup));
+        MALI_DEBUG_PRINT(4, ("l2_cache_core[0] = %p, ref = %d\n", vgroup->l2_cache_core[0], vgroup->l2_cache_core_ref_count[0]));
+        MALI_DEBUG_PRINT(4, ("l2_cache_core[1] = %p, ref = %d\n", vgroup->l2_cache_core[1], vgroup->l2_cache_core_ref_count[1]));
+
+        i = 0;
+        _MALI_OSK_LIST_FOREACHENTRY(group, temp, &vgroup->group_list, struct mali_group, group_list) {
+        MALI_DEBUG_PRINT(4, ("[%d] %s (%p), l2_cache_core[0] = %p\n",
+                i, mali_group_core_description(group),
+                group, group->l2_cache_core[0]));
+        i++;
+        }
+        })
+
+static void mali_group_dump_core_status(struct mali_group *group)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(NULL != group->gp_core || (NULL != group->pp_core && !mali_group_is_virtual(group)));
+
+	if (NULL != group->gp_core) {
+		MALI_PRINT(("Dump Group %s\n", group->gp_core->hw_core.description));
+
+		for (i = 0; i < 0xA8; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->gp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 12)));
+		}
+
+
+	} else {
+		MALI_PRINT(("Dump Group %s\n", group->pp_core->hw_core.description));
+
+		for (i = 0; i < 0x5c; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 12)));
+		}
+
+		/* Ignore some minor registers */
+		for (i = 0x1000; i < 0x1068; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 12)));
+		}
+	}
+
+	MALI_PRINT(("Dump Group MMU\n"));
+	for (i = 0; i < 0x24; i += 0x10) {
+		MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->mmu->hw_core, i),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 4),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 8),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 12)));
+	}
+}
+
+
+/**
+ * @Dump group status
+ */
+void mali_group_dump_status(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *group_c;
+		struct mali_group *temp;
+		_MALI_OSK_LIST_FOREACHENTRY(group_c, temp, &group->group_list, struct mali_group, group_list) {
+			mali_group_dump_core_status(group_c);
+		}
+	} else {
+		mali_group_dump_core_status(group);
+	}
+}
+
+/**
+ * @brief Add child group to virtual group parent
+ */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child)
+{
+    mali_bool found;
+    u32 i;
+
+    MALI_DEBUG_PRINT(3, ("Adding group %s to virtual group %s\n",
+                mali_group_core_description(child),
+                mali_group_core_description(parent)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+    MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+    MALI_DEBUG_ASSERT(NULL == child->parent_group);
+
+    _mali_osk_list_addtail(&child->group_list, &parent->group_list);
+
+    child->parent_group = parent;
+
+    MALI_DEBUG_ASSERT_POINTER(child->l2_cache_core[0]);
+
+    MALI_DEBUG_PRINT(4, ("parent->l2_cache_core: [0] = %p, [1] = %p\n", parent->l2_cache_core[0], parent->l2_cache_core[1]));
+    MALI_DEBUG_PRINT(4, ("child->l2_cache_core: [0] = %p, [1] = %p\n", child->l2_cache_core[0], child->l2_cache_core[1]));
+
+    /* Keep track of the L2 cache cores of child groups */
+    found = MALI_FALSE;
+    for (i = 0; i < 2; i++) {
+        if (parent->l2_cache_core[i] == child->l2_cache_core[0]) {
+            MALI_DEBUG_ASSERT(parent->l2_cache_core_ref_count[i] > 0);
+            parent->l2_cache_core_ref_count[i]++;
+            found = MALI_TRUE;
+        }
+    }
+
+    if (!found) {
+        /* First time we see this L2 cache, add it to our list */
+        i = (NULL == parent->l2_cache_core[0]) ? 0 : 1;
+
+        MALI_DEBUG_PRINT(4, ("First time we see l2_cache %p. Adding to [%d] = %p\n", child->l2_cache_core[0], i, parent->l2_cache_core[i]));
+
+        MALI_DEBUG_ASSERT(NULL == parent->l2_cache_core[i]);
+
+        parent->l2_cache_core[i] = child->l2_cache_core[0];
+        parent->l2_cache_core_ref_count[i]++;
+    }
+
+    /* Update Broadcast Unit and DLBU */
+    mali_bcast_add_group(parent->bcast_core, child);
+    mali_dlbu_add_group(parent->dlbu_core, child);
+
+    if (MALI_TRUE == parent->power_is_on) {
+        mali_bcast_reset(parent->bcast_core);
+        mali_dlbu_update_mask(parent->dlbu_core);
+    }
+
+    if (MALI_TRUE == child->power_is_on) {
+        if (NULL == parent->session) {
+            if (NULL != child->session) {
+                /*
+                 * Parent has no session, so clear
+                 * child session as well.
+                 */
+                mali_mmu_activate_empty_page_directory(child->mmu);
+            }
+        } else {
+            if (parent->session == child->session) {
+                /* We already have same session as parent,
+                 * so a simple zap should be enough.
+                 */
+                mali_mmu_zap_tlb(child->mmu);
+            } else {
+                /*
+                 * Parent has a different session, so we must
+                 * switch to that sessions page table
+                 */
+                mali_mmu_activate_page_directory(child->mmu, mali_session_get_page_directory(parent->session));
+            }
+
+            /* It is the parent which keeps the session from now on */
+            child->session = NULL;
+        }
+    } else {
+        /* should have been cleared when child was powered down */
+        MALI_DEBUG_ASSERT(NULL == child->session);
+    }
+
+    /* Start job on child when parent is active */
+    if (NULL != parent->pp_running_job) {
+        struct mali_pp_job *job = parent->pp_running_job;
+
+        MALI_DEBUG_PRINT(3, ("Group %x joining running job %d on virtual group %x\n",
+                    child, mali_pp_job_get_id(job), parent));
+
+        /* Only allowed to add active child to an active parent */
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == parent->state);
+        MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == child->state);
+
+        mali_pp_job_start(child->pp_core, job, mali_pp_core_get_id(child->pp_core), MALI_TRUE);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_pp_core_description(group->pp_core),
+                sched_clock(), mali_pp_job_get_tid(job),
+                0, mali_pp_job_get_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+    }
+
+    MALI_DEBUG_CODE(mali_group_print_virtual(parent);)
+}
+
+/**
+ * @brief Remove child group from virtual group parent
+ */
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child)
+{
+    u32 i;
+
+    MALI_DEBUG_PRINT(3, ("Removing group %s from virtual group %s\n",
+                mali_group_core_description(child),
+                mali_group_core_description(parent)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+    MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+    MALI_DEBUG_ASSERT(parent == child->parent_group);
+
+    /* Update Broadcast Unit and DLBU */
+    mali_bcast_remove_group(parent->bcast_core, child);
+    mali_dlbu_remove_group(parent->dlbu_core, child);
+
+    if (MALI_TRUE == parent->power_is_on) {
+        mali_bcast_reset(parent->bcast_core);
+        mali_dlbu_update_mask(parent->dlbu_core);
+    }
+
+    child->session = parent->session;
+    child->parent_group = NULL;
+
+    _mali_osk_list_delinit(&child->group_list);
+    if (_mali_osk_list_empty(&parent->group_list)) {
+        parent->session = NULL;
+    }
+
+    /* Keep track of the L2 cache cores of child groups */
+    i = (child->l2_cache_core[0] == parent->l2_cache_core[0]) ? 0 : 1;
+
+    MALI_DEBUG_ASSERT(child->l2_cache_core[0] == parent->l2_cache_core[i]);
+
+    parent->l2_cache_core_ref_count[i]--;
+    if (parent->l2_cache_core_ref_count[i] == 0) {
+        parent->l2_cache_core[i] = NULL;
+    }
+
+    MALI_DEBUG_CODE(mali_group_print_virtual(parent));
+}
+
+struct mali_group *mali_group_acquire_group(struct mali_group *parent)
+{
+    struct mali_group *child = NULL;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+
+    if (!_mali_osk_list_empty(&parent->group_list)) {
+        child = _MALI_OSK_LIST_ENTRY(parent->group_list.prev, struct mali_group, group_list);
+        mali_group_remove_group(parent, child);
+    }
+
+    if (NULL != child) {
+        if (MALI_GROUP_STATE_ACTIVE != parent->state
+                && MALI_TRUE == child->power_is_on) {
+            mali_group_reset(child);
+        }
+    }
+
+    return child;
+}
+
+void mali_group_reset(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(NULL == group->gp_running_job);
+    MALI_DEBUG_ASSERT(NULL == group->pp_running_job);
+    MALI_DEBUG_ASSERT(NULL == group->session);
+
+    MALI_DEBUG_PRINT(3, ("Group: reset of %s\n",
+                mali_group_core_description(group)));
+
+    if (NULL != group->dlbu_core) {
+        mali_dlbu_reset(group->dlbu_core);
+    }
+
+    if (NULL != group->bcast_core) {
+        mali_bcast_reset(group->bcast_core);
+    }
+
+    MALI_DEBUG_ASSERT(NULL != group->mmu);
+    mali_group_reset_mmu(group);
+
+    if (NULL != group->gp_core) {
+        MALI_DEBUG_ASSERT(NULL == group->pp_core);
+        mali_gp_reset(group->gp_core);
+    } else {
+        MALI_DEBUG_ASSERT(NULL != group->pp_core);
+        mali_group_reset_pp(group);
+    }
+}
+
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job, mali_bool gpu_secure_mode_pre_enabled)
+{
+    struct mali_session_data *session;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Starting GP job 0x%08X on group %s\n",
+                job,
+                mali_group_core_description(group)));
+
+    session = mali_gp_job_get_session(job);
+
+    MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+    mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_gp_job_get_cache_order(job));
+
+	/* Reset GPU and disable gpu secure mode if needed. */
+	if (MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+		_mali_osk_gpu_reset_and_secure_mode_disable();
+		/* Need to disable the pmu interrupt mask register */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+		}
+	}
+
+	/* Reload mmu page table if needed */
+	if (MALI_TRUE == gpu_secure_mode_pre_enabled) {
+		mali_group_reset(group);
+		mali_group_activate_page_directory(group, session, MALI_TRUE);
+	} else {
+		mali_group_activate_page_directory(group, session, MALI_FALSE);
+	}
+
+    mali_gp_job_start(group->gp_core, job);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0) |
+            MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+            mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job), 0, 0, 0);
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+            mali_gp_job_get_pid(job), mali_gp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+    trace_mali_core_active(mali_gp_job_get_pid(job), 1 /* active */, 1 /* GP */,  0 /* core */,
+            mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+    if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+            (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+        mali_group_report_l2_cache_counters_per_core(group, 0);
+    }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+    trace_gpu_sched_switch(mali_gp_core_description(group->gp_core),
+            sched_clock(), mali_gp_job_get_tid(job),
+            0, mali_gp_job_get_id(job));
+#endif
+
+    group->gp_running_job = job;
+    group->is_working = MALI_TRUE;
+
+    /* Setup SW timer and record start time */
+    group->start_time = _mali_osk_time_tickcount();
+    _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+    MALI_DEBUG_PRINT(4, ("Group: Started GP job 0x%08X on group %s at %u\n",
+                job,
+                mali_group_core_description(group),
+                group->start_time));
+}
+
+/* Used to set all the registers except frame renderer list address and fragment shader stack address
+ * It means the caller must set these two registers properly before calling this function
+ */
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job, mali_bool gpu_secure_mode_pre_enabled)
+{
+    struct mali_session_data *session;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Starting PP job 0x%08X part %u/%u on group %s\n",
+                job, sub_job + 1,
+                mali_pp_job_get_sub_job_count(job),
+                mali_group_core_description(group)));
+
+    session = mali_pp_job_get_session(job);
+
+    if (NULL != group->l2_cache_core[0]) {
+        mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_pp_job_get_cache_order(job));
+    }
+
+    if (NULL != group->l2_cache_core[1]) {
+        mali_l2_cache_invalidate_conditional(group->l2_cache_core[1], mali_pp_job_get_cache_order(job));
+    }
+
+	/* Reset GPU and change gpu secure mode if needed. */
+	if (MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+		_mali_osk_gpu_reset_and_secure_mode_enable();
+		/* Need to disable the pmu interrupt mask register */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+		}
+	} else if (MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+		_mali_osk_gpu_reset_and_secure_mode_disable();
+		/* Need to disable the pmu interrupt mask register */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+		}
+	}
+
+	/* Reload the mmu page table if needed */
+	if ((MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == gpu_secure_mode_pre_enabled)
+		||(MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == gpu_secure_mode_pre_enabled)) {
+		mali_group_reset(group);
+		mali_group_activate_page_directory(group, session, MALI_TRUE);
+	} else {
+		mali_group_activate_page_directory(group, session, MALI_FALSE);
+	}
+
+    if (mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+        u32 core_num = 0;
+
+        MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+        /* Configure DLBU for the job */
+        mali_dlbu_config_job(group->dlbu_core, job);
+
+        /* Write stack address for each child group */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            mali_pp_write_addr_stack(child->pp_core, job);
+            core_num++;
+        }
+
+        mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+    } else {
+        mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+    }
+
+    /* if the group is virtual, loop through physical groups which belong to this group
+     * and call profiling events for its cores as virtual */
+    if (MALI_TRUE == mali_group_is_virtual(group)) {
+        struct mali_group *child;
+        struct mali_group *temp;
+
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                    mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                    mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+            trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                    mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+        }
+
+#if defined(CONFIG_MALI400_PROFILING)
+        if (0 != group->l2_cache_core_ref_count[0]) {
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+            }
+        }
+        if (0 != group->l2_cache_core_ref_count[1]) {
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+            }
+        }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+    } else { /* group is physical - call profiling events for physical cores */
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+                mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+                mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+            mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+        }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+    }
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+    trace_gpu_sched_switch(mali_pp_core_description(group->pp_core),
+            sched_clock(), mali_pp_job_get_tid(job),
+            0, mali_pp_job_get_id(job));
+#endif
+
+    group->pp_running_job = job;
+    group->pp_running_sub_job = sub_job;
+    group->is_working = MALI_TRUE;
+
+    /* Setup SW timer and record start time */
+    group->start_time = _mali_osk_time_tickcount();
+    _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+    MALI_DEBUG_PRINT(4, ("Group: Started PP job 0x%08X part %u/%u on group %s at %u\n",
+                job, sub_job + 1,
+                mali_pp_job_get_sub_job_count(job),
+                mali_group_core_description(group),
+                group->start_time));
+
+}
+
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr)
+{
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+    mali_l2_cache_invalidate(group->l2_cache_core[0]);
+
+    mali_mmu_zap_tlb_without_stall(group->mmu);
+
+    mali_gp_resume_with_new_heap(group->gp_core, start_addr, end_addr);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+            MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+            0, 0, 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+    trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 1 /* active */, 1 /* GP */,  0 /* core */,
+            mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+}
+
+static void mali_group_reset_mmu(struct mali_group *group)
+{
+    struct mali_group *child;
+    struct mali_group *temp;
+    _mali_osk_errcode_t err;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (!mali_group_is_virtual(group)) {
+        /* This is a physical group or an idle virtual group -- simply wait for
+         * the reset to complete. */
+        err = mali_mmu_reset(group->mmu);
+        MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+    } else { /* virtual group */
+        /* Loop through all members of this virtual group and wait
+         * until they are done resetting.
+         */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            err = mali_mmu_reset(child->mmu);
+            MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+        }
+    }
+}
+
+static void mali_group_reset_pp(struct mali_group *group)
+{
+    struct mali_group *child;
+    struct mali_group *temp;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    mali_pp_reset_async(group->pp_core);
+
+    if (!mali_group_is_virtual(group) || NULL == group->pp_running_job) {
+        /* This is a physical group or an idle virtual group -- simply wait for
+         * the reset to complete. */
+        mali_pp_reset_wait(group->pp_core);
+    } else {
+        /* Loop through all members of this virtual group and wait until they
+         * are done resetting.
+         */
+        _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+            mali_pp_reset_wait(child->pp_core);
+        }
+    }
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job)
+{
+    struct mali_pp_job *pp_job_to_return;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_running_job);
+    MALI_DEBUG_ASSERT_POINTER(sub_job);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working);
+
+    /* Stop/clear the timeout timer. */
+    _mali_osk_timer_del_async(group->timeout_timer);
+
+    if (NULL != group->pp_running_job) {
+
+        /* Deal with HW counters and profiling */
+
+        if (MALI_TRUE == mali_group_is_virtual(group)) {
+            struct mali_group *child;
+            struct mali_group *temp;
+
+            /* update performance counters from each physical pp core within this virtual group */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                mali_pp_update_performance_counters(group->pp_core, child->pp_core, group->pp_running_job, mali_pp_core_get_id(child->pp_core));
+            }
+
+#if defined(CONFIG_MALI400_PROFILING)
+            /* send profiling data per physical core */
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                        MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+                        MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+                        mali_pp_job_get_perf_counter_value0(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+                        mali_pp_job_get_perf_counter_value1(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+                        mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+                        0, 0);
+
+                trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+                        0 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+                        mali_pp_job_get_frame_builder_id(group->pp_running_job),
+                        mali_pp_job_get_flush_id(group->pp_running_job));
+            }
+            if (0 != group->l2_cache_core_ref_count[0]) {
+                if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                        (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+                }
+            }
+            if (0 != group->l2_cache_core_ref_count[1]) {
+                if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) &&
+                        (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) {
+                    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+                }
+            }
+
+#endif
+        } else {
+            /* update performance counters for a physical group's pp core */
+            mali_pp_update_performance_counters(group->pp_core, group->pp_core, group->pp_running_job, group->pp_running_sub_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+            _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+                    MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+                    mali_pp_job_get_perf_counter_value0(group->pp_running_job, group->pp_running_sub_job),
+                    mali_pp_job_get_perf_counter_value1(group->pp_running_job, group->pp_running_sub_job),
+                    mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+                    0, 0);
+
+            trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+                    0 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+                    mali_pp_job_get_frame_builder_id(group->pp_running_job),
+                    mali_pp_job_get_flush_id(group->pp_running_job));
+
+            if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+                mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+            }
+#endif
+        }
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_gp_core_description(group->gp_core),
+                sched_clock(), 0, 0, 0);
+#endif
+
+    }
+
+    if (success) {
+        /* Only do soft reset for successful jobs, a full recovery
+         * reset will be done for failed jobs. */
+        mali_pp_reset_async(group->pp_core);
+    }
+
+    pp_job_to_return = group->pp_running_job;
+    group->pp_running_job = NULL;
+    group->is_working = MALI_FALSE;
+    *sub_job = group->pp_running_sub_job;
+
+    if (!success) {
+        MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+        mali_group_recovery_reset(group);
+    } else if (_MALI_OSK_ERR_OK != mali_pp_reset_wait(group->pp_core)) {
+        MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+        mali_group_recovery_reset(group);
+    }
+
+    return pp_job_to_return;
+}
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success)
+{
+    struct mali_gp_job *gp_job_to_return;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_running_job);
+    MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working);
+
+    /* Stop/clear the timeout timer. */
+    _mali_osk_timer_del_async(group->timeout_timer);
+
+    if (NULL != group->gp_running_job) {
+        mali_gp_update_performance_counters(group->gp_core, group->gp_running_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+                mali_gp_job_get_perf_counter_value0(group->gp_running_job),
+                mali_gp_job_get_perf_counter_value1(group->gp_running_job),
+                mali_gp_job_get_perf_counter_src0(group->gp_running_job) | (mali_gp_job_get_perf_counter_src1(group->gp_running_job) << 8),
+                0, 0);
+
+        if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+                (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0])))
+            mali_group_report_l2_cache_counters_per_core(group, 0);
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+        trace_gpu_sched_switch(
+                mali_pp_core_description(group->pp_core),
+                sched_clock(), 0, 0, 0);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+        trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 0 /* active */, 1 /* GP */,  0 /* core */,
+                mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+
+        mali_gp_job_set_current_heap_addr(group->gp_running_job,
+                mali_gp_read_plbu_alloc_start_addr(group->gp_core));
+    }
+
+    if (success) {
+        /* Only do soft reset for successful jobs, a full recovery
+         * reset will be done for failed jobs. */
+        mali_gp_reset_async(group->gp_core);
+    }
+
+    gp_job_to_return = group->gp_running_job;
+    group->gp_running_job = NULL;
+    group->is_working = MALI_FALSE;
+
+    if (!success) {
+        MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+        mali_group_recovery_reset(group);
+    } else if (_MALI_OSK_ERR_OK != mali_gp_reset_wait(group->gp_core)) {
+        MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+        mali_group_recovery_reset(group);
+    }
+
+    return gp_job_to_return;
+}
+
+struct mali_group *mali_group_get_glob_group(u32 index)
+{
+    if (mali_global_num_groups > index) {
+        return mali_global_groups[index];
+    }
+
+    return NULL;
+}
+
+u32 mali_group_get_glob_num_groups(void)
+{
+    return mali_global_num_groups;
+}
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session, mali_bool is_reload)
+{
+    MALI_DEBUG_PRINT(5, ("Mali group: Activating page directory 0x%08X from session 0x%08X on group %s\n",
+                mali_session_get_page_directory(session), session,
+                mali_group_core_description(group)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (group->session != session ||MALI_TRUE == is_reload) {
+		/* Different session than last time, so we need to do some work */
+		MALI_DEBUG_PRINT(5, ("Mali group: Activate session: %08x previous: %08x on group %s\n",
+				     session, group->session,
+				     mali_group_core_description(group)));
+		mali_mmu_activate_page_directory(group->mmu, mali_session_get_page_directory(session));
+		group->session = session;
+	} else {
+		/* Same session as last time, so no work required */
+		MALI_DEBUG_PRINT(4, ("Mali group: Activate existing session 0x%08X on group %s\n",
+				     session->page_directory,
+				     mali_group_core_description(group)));
+		mali_mmu_zap_tlb_without_stall(group->mmu);
+	}
+}
+
+static void mali_group_recovery_reset(struct mali_group *group)
+{
+    _mali_osk_errcode_t err;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    /* Stop cores, bus stop */
+    if (NULL != group->pp_core) {
+        mali_pp_stop_bus(group->pp_core);
+    } else {
+        mali_gp_stop_bus(group->gp_core);
+    }
+
+    /* Flush MMU and clear page fault (if any) */
+    mali_mmu_activate_fault_flush_page_directory(group->mmu);
+    mali_mmu_page_fault_done(group->mmu);
+
+    /* Wait for cores to stop bus, then do a hard reset on them */
+    if (NULL != group->pp_core) {
+        if (mali_group_is_virtual(group)) {
+            struct mali_group *child, *temp;
+
+            /* Disable the broadcast unit while we do reset directly on the member cores. */
+            mali_bcast_disable(group->bcast_core);
+
+            _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+                mali_pp_stop_bus_wait(child->pp_core);
+                mali_pp_hard_reset(child->pp_core);
+            }
+
+            mali_bcast_enable(group->bcast_core);
+        } else {
+            mali_pp_stop_bus_wait(group->pp_core);
+            mali_pp_hard_reset(group->pp_core);
+        }
+    } else {
+        mali_gp_stop_bus_wait(group->gp_core);
+        mali_gp_hard_reset(group->gp_core);
+    }
+
+    /* Reset MMU */
+    err = mali_mmu_reset(group->mmu);
+    MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+    MALI_IGNORE(err);
+
+    group->session = NULL;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size)
+{
+    int n = 0;
+    int i;
+    struct mali_group *child;
+    struct mali_group *temp;
+
+    if (mali_group_is_virtual(group)) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Virtual PP Group: %p\n", group);
+    } else if (mali_group_is_in_virtual(group)) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Child PP Group: %p\n", group);
+    } else if (NULL != group->pp_core) {
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "Physical PP Group: %p\n", group);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "GP Group: %p\n", group);
+    }
+
+    switch (group->state) {
+        case MALI_GROUP_STATE_INACTIVE:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: INACTIVE\n");
+            break;
+        case MALI_GROUP_STATE_ACTIVATION_PENDING:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: ACTIVATION_PENDING\n");
+            break;
+        case MALI_GROUP_STATE_ACTIVE:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: MALI_GROUP_STATE_ACTIVE\n");
+            break;
+        default:
+            n += _mali_osk_snprintf(buf + n, size - n,
+                    "\tstate: UNKNOWN (%d)\n", group->state);
+            MALI_DEBUG_ASSERT(0);
+            break;
+    }
+
+    n += _mali_osk_snprintf(buf + n, size - n,
+            "\tSW power: %s\n",
+            group->power_is_on ? "On" : "Off");
+
+    n += mali_pm_dump_state_domain(group->pm_domain, buf + n, size - n);
+
+    for (i = 0; i < 2; i++) {
+        if (NULL != group->l2_cache_core[i]) {
+            struct mali_pm_domain *domain;
+            domain = mali_l2_cache_get_pm_domain(
+                    group->l2_cache_core[i]);
+            n += mali_pm_dump_state_domain(domain,
+                    buf + n, size - n);
+        }
+    }
+
+    if (group->gp_core) {
+        n += mali_gp_dump_state(group->gp_core, buf + n, size - n);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "\tGP running job: %p\n", group->gp_running_job);
+    }
+
+    if (group->pp_core) {
+        n += mali_pp_dump_state(group->pp_core, buf + n, size - n);
+        n += _mali_osk_snprintf(buf + n, size - n,
+                "\tPP running job: %p, subjob %d \n",
+                group->pp_running_job,
+                group->pp_running_sub_job);
+    }
+
+    _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+            struct mali_group, group_list) {
+        n += mali_group_dump_state(child, buf + n, size - n);
+    }
+
+    return n;
+}
+#endif
+
+/* Kasin added. */
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+#include <platform/meson_m400/mali_fix.h>
+#define INT_MALI_PP2_MMU ( 6+32)
+struct _mali_osk_irq_t_struct;
+u32 get_irqnum(struct _mali_osk_irq_t_struct* irq);
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_mmu_core *mmu = group->mmu;
+#endif
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (MALI_FALSE == group->power_is_on)
+        MALI_SUCCESS;
+    if (get_irqnum(mmu->irq) == INT_MALI_PP2_MMU)
+    {
+        if (group == NULL || group->pp_core == NULL)
+            MALI_SUCCESS;
+        if (group->pp_core->core_id == 0) {
+            if (malifix_get_mmu_int_process_state(0) == MMU_INT_HIT)
+                malifix_set_mmu_int_process_state(0, MMU_INT_TOP);
+            else
+                MALI_SUCCESS;
+        }
+        else if (group->pp_core->core_id == 1) {
+            if (malifix_get_mmu_int_process_state(1) == MMU_INT_HIT)
+                malifix_set_mmu_int_process_state(1, MMU_INT_TOP);
+            else
+                MALI_SUCCESS;
+        } else
+            MALI_SUCCESS;
+    }
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    ret = mali_executor_interrupt_mmu(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		if (NULL != group->gp_core) {
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+						      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+						      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+						      0, 0, /* No pid and tid for interrupt handler */
+						      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+						      0xFFFFFFFF, 0);
+		} else {
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+						      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+						      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+						      0, 0, /* No pid and tid for interrupt handler */
+						      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+							      mali_pp_core_get_id(group->pp_core)),
+						      0xFFFFFFFF, 0);
+		}
+
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+                0, 0, /* No pid and tid for interrupt handler */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    return ret;
+}
+
+static void mali_group_bottom_half_mmu(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_mmu_core *mmu = group->mmu;
+#endif
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+
+    mali_executor_interrupt_mmu(group, MALI_FALSE);
+
+    if (NULL != group->gp_core) {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+        _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+                MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+                MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+                0, _mali_osk_get_tid(), /* pid and tid */
+                MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+                    mali_pp_core_get_id(group->pp_core)),
+                mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (get_irqnum(mmu->irq) == INT_MALI_PP2_MMU)
+    {
+        if (group->pp_core->core_id == 0) {
+            if (malifix_get_mmu_int_process_state(0) == MMU_INT_TOP)
+                malifix_set_mmu_int_process_state(0, MMU_INT_NONE);
+        }
+        else if (group->pp_core->core_id == 1) {
+            if (malifix_get_mmu_int_process_state(1) == MMU_INT_TOP)
+                malifix_set_mmu_int_process_state(1, MMU_INT_NONE);
+        }
+    }
+#endif
+}
+
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+
+    MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+                mali_gp_get_rawstat(group->gp_core),
+                mali_group_core_description(group)));
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    ret = mali_executor_interrupt_gp(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+					      0xFFFFFFFF, 0);
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    return ret;
+}
+
+static void mali_group_bottom_half_gp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+
+    mali_executor_interrupt_gp(group, MALI_FALSE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+            mali_gp_get_rawstat(group->gp_core), 0);
+}
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+int PP0_int_cnt = 0;
+int mali_PP0_int_cnt(void)
+{
+    return PP0_int_cnt;
+}
+EXPORT_SYMBOL(mali_PP0_int_cnt);
+
+int PP1_int_cnt = 0;
+int mali_PP1_int_cnt(void)
+{
+    return PP1_int_cnt;
+}
+EXPORT_SYMBOL(mali_PP1_int_cnt);
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_pp_core *core = group->pp_core;
+#endif
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+    MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+                mali_pp_get_rawstat(group->pp_core),
+                mali_group_core_description(group)));
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    ret = mali_executor_interrupt_pp(group, MALI_TRUE);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (core->core_id == 0)
+        PP0_int_cnt++;
+    else if (core->core_id == 1)
+        PP1_int_cnt++;
+#endif
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+						      mali_pp_core_get_id(group->pp_core)),
+					      0xFFFFFFFF, 0);
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+            0, 0, /* No pid and tid for interrupt handler */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    return ret;
+}
+
+static void mali_group_bottom_half_pp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+
+    mali_executor_interrupt_pp(group, MALI_FALSE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+            MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+            MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+            0, _mali_osk_get_tid(), /* pid and tid */
+            MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+                mali_pp_core_get_id(group->pp_core)),
+            mali_pp_get_rawstat(group->pp_core), 0);
+}
+
+static void mali_group_timeout(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+    MALI_DEBUG_ASSERT_POINTER(group);
+
+    MALI_DEBUG_PRINT(2, ("Group: timeout handler for %s at %u\n",
+                mali_group_core_description(group),
+                _mali_osk_time_tickcount()));
+
+    if (mali_core_timeout < 65533)
+        mali_core_timeout++;
+    if (NULL != group->gp_core) {
+        mali_group_schedule_bottom_half_gp(group);
+    } else {
+        MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+        mali_group_schedule_bottom_half_pp(group);
+    }
+}
+
+mali_bool mali_group_zap_session(struct mali_group *group,
+        struct mali_session_data *session)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(session);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (group->session != session) {
+        /* not running from this session */
+        return MALI_TRUE; /* success */
+    }
+
+    if (group->is_working) {
+        /* The Zap also does the stall and disable_stall */
+        mali_bool zap_success = mali_mmu_zap_tlb(group->mmu);
+        return zap_success;
+    } else {
+        /* Just remove the session instead of zapping */
+        mali_group_clear_session(group);
+        return MALI_TRUE; /* success */
+    }
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num)
+{
+    u32 source0 = 0;
+    u32 value0 = 0;
+    u32 source1 = 0;
+    u32 value1 = 0;
+    u32 profiling_channel = 0;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    switch (core_num) {
+        case 0:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+            break;
+        case 1:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS;
+            break;
+        case 2:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS;
+            break;
+        default:
+            profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+                MALI_PROFILING_EVENT_CHANNEL_GPU |
+                MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+            break;
+    }
+
+    if (0 == core_num) {
+        mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+    }
+    if (1 == core_num) {
+        if (1 == mali_l2_cache_get_id(group->l2_cache_core[0])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+        } else if (1 == mali_l2_cache_get_id(group->l2_cache_core[1])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+        }
+    }
+    if (2 == core_num) {
+        if (2 == mali_l2_cache_get_id(group->l2_cache_core[0])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+        } else if (2 == mali_l2_cache_get_id(group->l2_cache_core[1])) {
+            mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+        }
+    }
+
+    _mali_osk_profiling_add_event(profiling_channel, source1 << 8 | source0, value0, value1, 0, 0);
+}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
diff --git a/utgard/r6p2/common/mali_group.h b/utgard/r6p2/common/mali_group.h
new file mode 100755
index 0000000..afe966f
--- /dev/null
+++ b/utgard/r6p2/common/mali_group.h
@@ -0,0 +1,460 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GROUP_H__
+#define __MALI_GROUP_H__
+
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_mmu.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_session.h"
+#include "mali_osk_profiling.h"
+
+/**
+ * @brief Default max runtime [ms] for a core job - used by timeout timers
+ */
+#define MALI_MAX_JOB_RUNTIME_DEFAULT 5000
+
+extern int mali_max_job_runtime;
+
+#define MALI_MAX_NUMBER_OF_GROUPS 10
+#define MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS 8
+
+enum mali_group_state {
+	MALI_GROUP_STATE_INACTIVE,
+	MALI_GROUP_STATE_ACTIVATION_PENDING,
+	MALI_GROUP_STATE_ACTIVE,
+};
+
+/**
+ * The structure represents a render group
+ * A render group is defined by all the cores that share the same Mali MMU
+ */
+
+struct mali_group {
+	struct mali_mmu_core        *mmu;
+	struct mali_session_data    *session;
+
+	enum mali_group_state        state;
+	mali_bool                    power_is_on;
+
+	mali_bool                    is_working;
+	unsigned long                start_time; /* in ticks */
+
+	struct mali_gp_core         *gp_core;
+	struct mali_gp_job          *gp_running_job;
+
+	struct mali_pp_core         *pp_core;
+	struct mali_pp_job          *pp_running_job;
+	u32                         pp_running_sub_job;
+
+	struct mali_pm_domain       *pm_domain;
+
+	struct mali_l2_cache_core   *l2_cache_core[2];
+	u32                         l2_cache_core_ref_count[2];
+
+	/* Parent virtual group (if any) */
+	struct mali_group           *parent_group;
+
+	struct mali_dlbu_core       *dlbu_core;
+	struct mali_bcast_unit      *bcast_core;
+
+	/* Used for working groups which needs to be disabled */
+	mali_bool                    disable_requested;
+
+	/* Used by group to link child groups (for virtual group) */
+	_mali_osk_list_t            group_list;
+
+	/* Used by executor module in order to link groups of same state */
+	_mali_osk_list_t            executor_list;
+
+	/* Used by PM domains to link groups of same domain */
+	_mali_osk_list_t             pm_domain_list;
+
+	_mali_osk_wq_work_t         *bottom_half_work_mmu;
+	_mali_osk_wq_work_t         *bottom_half_work_gp;
+	_mali_osk_wq_work_t         *bottom_half_work_pp;
+
+	_mali_osk_timer_t           *timeout_timer;
+};
+
+/** @brief Create a new Mali group object
+ *
+ * @return A pointer to a new group object
+ */
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+				     struct mali_dlbu_core *dlbu,
+				     struct mali_bcast_unit *bcast,
+				     u32 domain_index);
+
+void mali_group_dump_status(struct mali_group *group);
+
+void mali_group_delete(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group,
+		struct mali_mmu_core *mmu_core);
+void mali_group_remove_mmu_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group,
+		struct mali_gp_core *gp_core);
+void mali_group_remove_gp_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group,
+		struct mali_pp_core *pp_core);
+void mali_group_remove_pp_core(struct mali_group *group);
+
+MALI_STATIC_INLINE const char *mali_group_core_description(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	if (NULL != group->pp_core) {
+		return mali_pp_core_description(group->pp_core);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+		return mali_gp_core_description(group->gp_core);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_is_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != group->dlbu_core);
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Check if a group is a part of a virtual group or not
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_in_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != group->parent_group) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Reset group
+ *
+ * This function will reset the entire group,
+ * including all the cores present in the group.
+ *
+ * @param group Pointer to the group to reset
+ */
+void mali_group_reset(struct mali_group *group);
+
+MALI_STATIC_INLINE struct mali_session_data *mali_group_get_session(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return group->session;
+}
+
+MALI_STATIC_INLINE void mali_group_clear_session(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (NULL != group->session) {
+		mali_mmu_activate_empty_page_directory(group->mmu);
+		group->session = NULL;
+	}
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group);
+
+/*
+ * Change state from ACTIVATION_PENDING to ACTIVE
+ * For virtual group, all childs need to be ACTIVE first
+ */
+mali_bool mali_group_set_active(struct mali_group *group);
+
+/*
+ * @return MALI_TRUE means one or more domains can now be powered off,
+ * and caller should call either mali_pm_update_async() or
+ * mali_pm_update_sync() in order to do so.
+ */
+mali_bool mali_group_deactivate(struct mali_group *group);
+
+MALI_STATIC_INLINE enum mali_group_state mali_group_get_state(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->state;
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_power_is_on(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->power_is_on;
+}
+
+void mali_group_power_up(struct mali_group *group);
+void mali_group_power_down(struct mali_group *group);
+
+MALI_STATIC_INLINE void mali_group_set_disable_request(
+	struct mali_group *group, mali_bool disable)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	group->disable_requested = disable;
+
+	/**
+	 * When one of child group's disable_requeset is set TRUE, then
+	 * the disable_request of parent group should also be set to TRUE.
+	 * While, the disable_request of parent group should only be set to FALSE
+	 * only when all of its child group's disable_request are set to FALSE.
+	 */
+	if (NULL != group->parent_group && MALI_TRUE == disable) {
+		group->parent_group->disable_requested = disable;
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_disable_requested(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->disable_requested;
+}
+
+/** @brief Virtual groups */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child);
+struct mali_group *mali_group_acquire_group(struct mali_group *parent);
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child);
+
+/** @brief Checks if the group is working.
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_working(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	if (mali_group_is_in_virtual(group)) {
+		struct mali_group *tmp_group = mali_executor_get_virtual_group();
+		return tmp_group->is_working;
+	}
+	return group->is_working;
+}
+
+MALI_STATIC_INLINE struct mali_gp_job *mali_group_get_running_gp_job(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->gp_running_job;
+}
+
+/** @brief Zap MMU TLB on all groups
+ *
+ * Zap TLB on group if \a session is active.
+ */
+mali_bool mali_group_zap_session(struct mali_group *group,
+				 struct mali_session_data *session);
+
+/** @brief Get pointer to GP core object
+ */
+MALI_STATIC_INLINE struct mali_gp_core *mali_group_get_gp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->gp_core;
+}
+
+/** @brief Get pointer to PP core object
+ */
+MALI_STATIC_INLINE struct mali_pp_core *mali_group_get_pp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->pp_core;
+}
+
+/** @brief Start GP job
+ */
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job, mali_bool gpu_secure_mode_pre_enabled);
+
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job, mali_bool gpu_secure_mode_pre_enabled);
+
+/** @brief Start virtual group Job on a virtual group
+*/
+void mali_group_start_job_on_virtual(struct mali_group *group, struct mali_pp_job *job, u32 first_subjob, u32 last_subjob);
+
+
+/** @brief Start a subjob from a particular on a specific PP group
+*/
+void mali_group_start_job_on_group(struct mali_group *group, struct mali_pp_job *job, u32 subjob);
+
+
+/** @brief remove all the unused groups in tmp_unused group  list, so that the group is in consistent status.
+ */
+void mali_group_non_dlbu_job_done_virtual(struct mali_group *group);
+
+
+/** @brief Resume GP job that suspended waiting for more heap memory
+ */
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_get_interrupt_result(group->gp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_get_interrupt_result(group->pp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_mmu_get_interrupt_result(group->mmu);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_gp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_is_active(group->gp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_pp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_is_active(group->pp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_has_timed_out(struct mali_group *group)
+{
+	unsigned long time_cost;
+	struct mali_group *tmp_group = group;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* if the group is in virtual need to use virtual_group's start time */
+	if (mali_group_is_in_virtual(group)) {
+		tmp_group = mali_executor_get_virtual_group();
+	}
+
+	time_cost = _mali_osk_time_tickcount() - tmp_group->start_time;
+	if (_mali_osk_time_mstoticks(mali_max_job_runtime) <= time_cost) {
+		/*
+		 * current tick is at or after timeout end time,
+		 * so this is a valid timeout
+		 */
+		return MALI_TRUE;
+	} else {
+		/*
+		 * Not a valid timeout. A HW interrupt probably beat
+		 * us to it, and the timer wasn't properly deleted
+		 * (async deletion used due to atomic context).
+		 */
+		return MALI_FALSE;
+	}
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_mask_all_interrupts(group->gp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_mask_all_interrupts(group->pp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_enable_interrupts_gp(
+	struct mali_group *group,
+	enum mali_interrupt_result exceptions)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	mali_gp_enable_interrupts(group->gp_core, exceptions);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_gp);
+}
+
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_pp);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_mmu);
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job);
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success);
+
+#if defined(CONFIG_MALI400_PROFILING)
+MALI_STATIC_INLINE void mali_group_oom(struct mali_group *group)
+{
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+				      MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+				      0, 0, 0, 0, 0);
+}
+#endif
+
+struct mali_group *mali_group_get_glob_group(u32 index);
+u32 mali_group_get_glob_num_groups(void);
+
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size);
+
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data);
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data);
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data);
+
+MALI_STATIC_INLINE mali_bool mali_group_is_empty(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(mali_group_is_virtual(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return _mali_osk_list_empty(&group->group_list);
+}
+
+#endif /* __MALI_GROUP_H__ */
diff --git a/utgard/r6p2/common/mali_hw_core.c b/utgard/r6p2/common/mali_hw_core.c
new file mode 100755
index 0000000..dd10cfe
--- /dev/null
+++ b/utgard/r6p2/common/mali_hw_core.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_osk_mali.h"
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size)
+{
+	core->phys_addr = resource->base;
+	core->phys_offset = resource->base - _mali_osk_resource_base_address();
+	core->description = resource->description;
+	core->size = reg_size;
+
+	MALI_DEBUG_ASSERT(core->phys_offset < core->phys_addr);
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_mem_reqregion(core->phys_addr, core->size, core->description)) {
+		core->mapped_registers = _mali_osk_mem_mapioregion(core->phys_addr, core->size, core->description);
+		if (NULL != core->mapped_registers) {
+			return _MALI_OSK_ERR_OK;
+		} else {
+			MALI_PRINT_ERROR(("Failed to map memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+		}
+		_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+	} else {
+		MALI_PRINT_ERROR(("Failed to request memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_hw_core_delete(struct mali_hw_core *core)
+{
+	if (NULL != core->mapped_registers) {
+		_mali_osk_mem_unmapioregion(core->phys_addr, core->size, core->mapped_registers);
+		core->mapped_registers = NULL;
+	}
+	_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+}
diff --git a/utgard/r6p2/common/mali_hw_core.h b/utgard/r6p2/common/mali_hw_core.h
new file mode 100755
index 0000000..e435725
--- /dev/null
+++ b/utgard/r6p2/common/mali_hw_core.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_HW_CORE_H__
+#define __MALI_HW_CORE_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * The common parts for all Mali HW cores (GP, PP, MMU, L2 and PMU)
+ * This struct is embedded inside all core specific structs.
+ */
+struct mali_hw_core {
+	uintptr_t phys_addr;              /**< Physical address of the registers */
+	u32 phys_offset;                  /**< Offset from start of Mali to registers */
+	u32 size;                         /**< Size of registers */
+	mali_io_address mapped_registers; /**< Virtual mapping of the registers */
+	const char *description;          /**< Name of unit (as specified in device configuration) */
+};
+
+#define MALI_REG_POLL_COUNT_FAST 1000000
+#define MALI_REG_POLL_COUNT_SLOW 1000000
+
+/*
+ * GP and PP core translate their int_stat/rawstat into one of these
+ */
+enum mali_interrupt_result {
+	MALI_INTERRUPT_RESULT_NONE,
+	MALI_INTERRUPT_RESULT_SUCCESS,
+	MALI_INTERRUPT_RESULT_SUCCESS_VS,
+	MALI_INTERRUPT_RESULT_SUCCESS_PLBU,
+	MALI_INTERRUPT_RESULT_OOM,
+	MALI_INTERRUPT_RESULT_ERROR
+};
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size);
+void mali_hw_core_delete(struct mali_hw_core *core);
+
+MALI_STATIC_INLINE u32 mali_hw_core_register_read(struct mali_hw_core *core, u32 relative_address)
+{
+	u32 read_val;
+	read_val = _mali_osk_mem_ioread32(core->mapped_registers, relative_address);
+	MALI_DEBUG_PRINT(6, ("register_read for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, read_val));
+	return read_val;
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+}
+
+/* Conditionally write a register.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 new_val, const u32 old_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	if (old_val != new_val) {
+		_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+	}
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32(core->mapped_registers, relative_address, new_val);
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs)
+{
+	u32 i;
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+	}
+}
+
+/* Conditionally write a set of registers.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs, const u32 *old_array)
+{
+	u32 i;
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		if (old_array[i] != write_array[i]) {
+			mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+		}
+	}
+}
+
+#endif /* __MALI_HW_CORE_H__ */
diff --git a/utgard/r6p2/common/mali_kernel_common.h b/utgard/r6p2/common/mali_kernel_common.h
new file mode 100755
index 0000000..9bae265
--- /dev/null
+++ b/utgard/r6p2/common/mali_kernel_common.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_COMMON_H__
+#define __MALI_KERNEL_COMMON_H__
+
+#include "mali_osk.h"
+
+/* Make sure debug is defined when it should be */
+#ifndef DEBUG
+#if defined(_DEBUG)
+#define DEBUG
+#endif
+#endif
+
+/* The file include several useful macros for error checking, debugging and printing.
+ * - MALI_PRINTF(...)           Do not use this function: Will be included in Release builds.
+ * - MALI_DEBUG_PRINT(nr, (X) ) Prints the second argument if nr<=MALI_DEBUG_LEVEL.
+ * - MALI_DEBUG_ERROR( (X) )    Prints an errortext, a source trace, and the given error message.
+ * - MALI_DEBUG_ASSERT(exp,(X)) If the asserted expr is false, the program will exit.
+ * - MALI_DEBUG_ASSERT_POINTER(pointer)  Triggers if the pointer is a zero pointer.
+ * - MALI_DEBUG_CODE( X )       The code inside the macro is only compiled in Debug builds.
+ *
+ * The (X) means that you must add an extra parenthesis around the argumentlist.
+ *
+ * The  printf function: MALI_PRINTF(...) is routed to _mali_osk_debugmsg
+ *
+ * Suggested range for the DEBUG-LEVEL is [1:6] where
+ * [1:2] Is messages with highest priority, indicate possible errors.
+ * [3:4] Is messages with medium priority, output important variables.
+ * [5:6] Is messages with low priority, used during extensive debugging.
+ */
+
+/**
+*  Fundamental error macro. Reports an error code. This is abstracted to allow us to
+*  easily switch to a different error reporting method if we want, and also to allow
+*  us to search for error returns easily.
+*
+*  Note no closing semicolon - this is supplied in typical usage:
+*
+*  MALI_ERROR(MALI_ERROR_OUT_OF_MEMORY);
+*/
+#define MALI_ERROR(error_code) return (error_code)
+
+/**
+ *  Basic error macro, to indicate success.
+ *  Note no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_SUCCESS;
+ */
+#define MALI_SUCCESS MALI_ERROR(_MALI_OSK_ERR_OK)
+
+/**
+ *  Basic error macro. This checks whether the given condition is true, and if not returns
+ *  from this function with the supplied error code. This is a macro so that we can override it
+ *  for stress testing.
+ *
+ *  Note that this uses the do-while-0 wrapping to ensure that we don't get problems with dangling
+ *  else clauses. Note also no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_CHECK((p!=NULL), ERROR_NO_OBJECT);
+ */
+#define MALI_CHECK(condition, error_code) do { if(!(condition)) MALI_ERROR(error_code); } while(0)
+
+/**
+ *  Error propagation macro. If the expression given is anything other than
+ *  _MALI_OSK_NO_ERROR, then the value is returned from the enclosing function
+ *  as an error code. This effectively acts as a guard clause, and propagates
+ *  error values up the call stack. This uses a temporary value to ensure that
+ *  the error expression is not evaluated twice.
+ *  If the counter for forcing a failure has been set using _mali_force_error,
+ *  this error will be returned without evaluating the expression in
+ *  MALI_CHECK_NO_ERROR
+ */
+#define MALI_CHECK_NO_ERROR(expression) \
+	do { _mali_osk_errcode_t _check_no_error_result=(expression); \
+		if(_check_no_error_result != _MALI_OSK_ERR_OK) \
+			MALI_ERROR(_check_no_error_result); \
+	} while(0)
+
+/**
+ *  Pointer check macro. Checks non-null pointer.
+ */
+#define MALI_CHECK_NON_NULL(pointer, error_code) MALI_CHECK( ((pointer)!=NULL), (error_code) )
+
+/**
+ *  Error macro with goto. This checks whether the given condition is true, and if not jumps
+ *  to the specified label using a goto. The label must therefore be local to the function in
+ *  which this macro appears. This is most usually used to execute some clean-up code before
+ *  exiting with a call to ERROR.
+ *
+ *  Like the other macros, this is a macro to allow us to override the condition if we wish,
+ *  e.g. to force an error during stress testing.
+ */
+#define MALI_CHECK_GOTO(condition, label) do { if(!(condition)) goto label; } while(0)
+
+/**
+ *  Explicitly ignore a parameter passed into a function, to suppress compiler warnings.
+ *  Should only be used with parameter names.
+ */
+#define MALI_IGNORE(x) x=x
+
+#if defined(CONFIG_MALI_QUIET)
+#define MALI_PRINTF(args)
+#else
+#define MALI_PRINTF(args) _mali_osk_dbgmsg args;
+#endif
+
+#define MALI_PRINT_ERROR(args) do{ \
+		MALI_PRINTF(("Mali: ERR: %s\n" ,__FILE__)); \
+		MALI_PRINTF(("           %s()%4d\n           ", __FUNCTION__, __LINE__)) ; \
+		MALI_PRINTF(args); \
+		MALI_PRINTF(("\n")); \
+	} while(0)
+
+#define MALI_PRINT(args) do{ \
+		MALI_PRINTF(("Mali: ")); \
+		MALI_PRINTF(args); \
+	} while (0)
+
+#ifdef DEBUG
+#ifndef mali_debug_level
+extern int mali_debug_level;
+#endif
+
+#define MALI_DEBUG_CODE(code) code
+#define MALI_DEBUG_PRINT(level, args)  do { \
+		if((level) <=  mali_debug_level)\
+		{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } \
+	} while (0)
+
+#define MALI_DEBUG_PRINT_ERROR(args) MALI_PRINT_ERROR(args)
+
+#define MALI_DEBUG_PRINT_IF(level,condition,args)  \
+	if((condition)&&((level) <=  mali_debug_level))\
+	{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+#define MALI_DEBUG_PRINT_ELSE(level, args)\
+	else if((level) <=  mali_debug_level)\
+	{ MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+/**
+ * @note these variants of DEBUG ASSERTS will cause a debugger breakpoint
+ * to be entered (see _mali_osk_break() ). An alternative would be to call
+ * _mali_osk_abort(), on OSs that support it.
+ */
+#define MALI_DEBUG_PRINT_ASSERT(condition, args) do  {if( !(condition)) { MALI_PRINT_ERROR(args); _mali_osk_break(); } } while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do  {if( (pointer)== NULL) {MALI_PRINT_ERROR(("NULL pointer " #pointer)); _mali_osk_break();} } while(0)
+#define MALI_DEBUG_ASSERT(condition) do  {if( !(condition)) {MALI_PRINT_ERROR(("ASSERT failed: " #condition )); _mali_osk_break();} } while(0)
+
+#else /* DEBUG */
+
+#define MALI_DEBUG_CODE(code)
+#define MALI_DEBUG_PRINT(string,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ERROR(args) do {} while(0)
+#define MALI_DEBUG_PRINT_IF(level,condition,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ELSE(level,condition,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ASSERT(condition,args) do {} while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do {} while(0)
+#define MALI_DEBUG_ASSERT(condition) do {} while(0)
+
+#endif /* DEBUG */
+
+/**
+ * variables from user space cannot be dereferenced from kernel space; tagging them
+ * with __user allows the GCC compiler to generate a warning. Other compilers may
+ * not support this so we define it here as an empty macro if the compiler doesn't
+ * define it.
+ */
+#ifndef __user
+#define __user
+#endif
+
+#endif /* __MALI_KERNEL_COMMON_H__ */
diff --git a/utgard/r6p2/common/mali_kernel_core.c b/utgard/r6p2/common/mali_kernel_core.c
new file mode 100755
index 0000000..c16b444
--- /dev/null
+++ b/utgard/r6p2/common/mali_kernel_core.c
@@ -0,0 +1,1334 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_kernel_core.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_mmu.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_utilization.h"
+#include "mali_l2_cache.h"
+#include "mali_timeline.h"
+#include "mali_soft_job.h"
+#include "mali_pm_domain.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#include "mali_control_timer.h"
+#include "mali_dvfs_policy.h"
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#include <linux/fence.h>
+#endif
+
+#define MALI_SHARED_MEMORY_DEFAULT_SIZE 0xffffffff
+
+/* Mali GPU memory. Real values come from module parameter or from device specific data */
+unsigned int mali_dedicated_mem_start = 0;
+unsigned int mali_dedicated_mem_size = 0;
+
+/* Default shared memory size is set to 4G. */
+unsigned int mali_shared_mem_size = MALI_SHARED_MEMORY_DEFAULT_SIZE;
+
+/* Frame buffer memory to be accessible by Mali GPU */
+int mali_fb_start = 0;
+int mali_fb_size = 0;
+
+/* Mali max job runtime */
+extern int mali_max_job_runtime;
+
+/** Start profiling from module load? */
+int mali_boot_profiling = 0;
+
+/** Limits for the number of PP cores behind each L2 cache. */
+int mali_max_pp_cores_group_1 = 0xFF;
+int mali_max_pp_cores_group_2 = 0xFF;
+
+int mali_inited_pp_cores_group_1 = 0;
+int mali_inited_pp_cores_group_2 = 0;
+
+static _mali_product_id_t global_product_id = _MALI_PRODUCT_ID_UNKNOWN;
+static uintptr_t global_gpu_base_address = 0;
+static u32 global_gpu_major_version = 0;
+static u32 global_gpu_minor_version = 0;
+
+mali_bool mali_gpu_class_is_mali450 = MALI_FALSE;
+mali_bool mali_gpu_class_is_mali470 = MALI_FALSE;
+
+static _mali_osk_errcode_t mali_set_global_gpu_base_address(void)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	global_gpu_base_address = _mali_osk_resource_base_address();
+	if (0 == global_gpu_base_address) {
+		err = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return err;
+}
+
+static u32 mali_get_bcast_id(_mali_osk_resource_t *resource_pp)
+{
+	switch (resource_pp->base - global_gpu_base_address) {
+	case 0x08000:
+	case 0x20000: /* fall-through for aliased mapping */
+		return 0x01;
+	case 0x0A000:
+	case 0x22000: /* fall-through for aliased mapping */
+		return 0x02;
+	case 0x0C000:
+	case 0x24000: /* fall-through for aliased mapping */
+		return 0x04;
+	case 0x0E000:
+	case 0x26000: /* fall-through for aliased mapping */
+		return 0x08;
+	case 0x28000:
+		return 0x10;
+	case 0x2A000:
+		return 0x20;
+	case 0x2C000:
+		return 0x40;
+	case 0x2E000:
+		return 0x80;
+	default:
+		return 0;
+	}
+}
+
+static _mali_osk_errcode_t mali_parse_product_info(void)
+{
+	_mali_osk_resource_t first_pp_resource;
+
+	/* Find the first PP core resource (again) */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PP0, &first_pp_resource)) {
+		/* Create a dummy PP object for this core so that we can read the version register */
+		struct mali_group *group = mali_group_create(NULL, NULL, NULL, MALI_DOMAIN_INDEX_PP0);
+		if (NULL != group) {
+			struct mali_pp_core *pp_core = mali_pp_create(&first_pp_resource, group, MALI_FALSE, mali_get_bcast_id(&first_pp_resource));
+			if (NULL != pp_core) {
+				u32 pp_version;
+
+				pp_version = mali_pp_core_get_version(pp_core);
+
+				mali_group_delete(group);
+
+				global_gpu_major_version = (pp_version >> 8) & 0xFF;
+				global_gpu_minor_version = pp_version & 0xFF;
+
+				switch (pp_version >> 16) {
+				case MALI200_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI200;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-200 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					MALI_PRINT_ERROR(("Mali-200 is not supported by this driver.\n"));
+					_mali_osk_abort();
+					break;
+				case MALI300_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI300;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-300 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI400_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI400;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-400 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI450_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI450;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-450 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI470_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI470;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-470 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				default:
+					MALI_DEBUG_PRINT(2, ("Found unknown Mali GPU (r%up%u)\n", global_gpu_major_version, global_gpu_minor_version));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_PRINT_ERROR(("Failed to create initial PP object\n"));
+			}
+		} else {
+			MALI_PRINT_ERROR(("Failed to create initial group object\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("First PP core not specified in config file\n"));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+static void mali_delete_groups(void)
+{
+	struct mali_group *group;
+
+	group = mali_group_get_glob_group(0);
+	while (NULL != group) {
+		mali_group_delete(group);
+		group = mali_group_get_glob_group(0);
+	}
+
+	MALI_DEBUG_ASSERT(0 == mali_group_get_glob_num_groups());
+}
+
+static void mali_delete_l2_cache_cores(void)
+{
+	struct mali_l2_cache_core *l2;
+
+	l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	while (NULL != l2) {
+		mali_l2_cache_delete(l2);
+		l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	}
+
+	MALI_DEBUG_ASSERT(0 == mali_l2_cache_core_get_glob_num_l2_cores());
+}
+
+static struct mali_l2_cache_core *mali_create_l2_cache_core(_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (NULL != resource) {
+
+		MALI_DEBUG_PRINT(3, ("Found L2 cache %s\n", resource->description));
+
+		l2_cache = mali_l2_cache_create(resource, domain_index);
+		if (NULL == l2_cache) {
+			MALI_PRINT_ERROR(("Failed to create L2 cache object\n"));
+			return NULL;
+		}
+	}
+	MALI_DEBUG_PRINT(3, ("Created L2 cache core object\n"));
+
+	return l2_cache;
+}
+
+static _mali_osk_errcode_t mali_parse_config_l2_cache(void)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (mali_is_mali400()) {
+		_mali_osk_resource_t l2_resource;
+		if (_MALI_OSK_ERR_OK != _mali_osk_resource_find(MALI400_OFFSET_L2_CACHE0, &l2_resource)) {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		l2_cache = mali_create_l2_cache_core(&l2_resource, MALI_DOMAIN_INDEX_L20);
+		if (NULL == l2_cache) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else if (mali_is_mali450()) {
+		/*
+		 * L2 for GP    at 0x10000
+		 * L2 for PP0-3 at 0x01000
+		 * L2 for PP4-7 at 0x11000 (optional)
+		 */
+
+		_mali_osk_resource_t l2_gp_resource;
+		_mali_osk_resource_t l2_pp_grp0_resource;
+		_mali_osk_resource_t l2_pp_grp1_resource;
+
+		/* Make cluster for GP's L2 */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE0, &l2_gp_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for GP\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_gp_resource, MALI_DOMAIN_INDEX_L20);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for GP in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Find corresponding l2 domain */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE1, &l2_pp_grp0_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 0\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp0_resource, MALI_DOMAIN_INDEX_L21);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for PP group 0 in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Second PP core group is optional, don't fail if we don't find it */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE2, &l2_pp_grp1_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp1_resource, MALI_DOMAIN_INDEX_L22);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		}
+	} else if (mali_is_mali470()) {
+		_mali_osk_resource_t l2c1_resource;
+
+		/* Make cluster for L2C1 */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI470_OFFSET_L2_CACHE1, &l2c1_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-470 L2 cache 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2c1_resource, MALI_DOMAIN_INDEX_L21);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for L2C1\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static struct mali_group *mali_create_group(struct mali_l2_cache_core *cache,
+		_mali_osk_resource_t *resource_mmu,
+		_mali_osk_resource_t *resource_gp,
+		_mali_osk_resource_t *resource_pp,
+		u32 domain_index)
+{
+	struct mali_mmu_core *mmu;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(3, ("Starting new group for MMU %s\n", resource_mmu->description));
+
+	/* Create the group object */
+	group = mali_group_create(cache, NULL, NULL, domain_index);
+	if (NULL == group) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU %s\n", resource_mmu->description));
+		return NULL;
+	}
+
+	/* Create the MMU object inside group */
+	mmu = mali_mmu_create(resource_mmu, group, MALI_FALSE);
+	if (NULL == mmu) {
+		MALI_PRINT_ERROR(("Failed to create MMU object\n"));
+		mali_group_delete(group);
+		return NULL;
+	}
+
+	if (NULL != resource_gp) {
+		/* Create the GP core object inside this group */
+		struct mali_gp_core *gp_core = mali_gp_create(resource_gp, group);
+		if (NULL == gp_core) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create GP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	if (NULL != resource_pp) {
+		struct mali_pp_core *pp_core;
+
+		/* Create the PP core object inside this group */
+		pp_core = mali_pp_create(resource_pp, group, MALI_FALSE, mali_get_bcast_id(resource_pp));
+		if (NULL == pp_core) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create PP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	return group;
+}
+
+static _mali_osk_errcode_t mali_create_virtual_group(_mali_osk_resource_t *resource_mmu_pp_bcast,
+		_mali_osk_resource_t *resource_pp_bcast,
+		_mali_osk_resource_t *resource_dlbu,
+		_mali_osk_resource_t *resource_bcast)
+{
+	struct mali_mmu_core *mmu_pp_bcast_core;
+	struct mali_pp_core *pp_bcast_core;
+	struct mali_dlbu_core *dlbu_core;
+	struct mali_bcast_unit *bcast_core;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(2, ("Starting new virtual group for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+
+	/* Create the DLBU core object */
+	dlbu_core = mali_dlbu_create(resource_dlbu);
+	if (NULL == dlbu_core) {
+		MALI_PRINT_ERROR(("Failed to create DLBU object \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the Broadcast unit core */
+	bcast_core = mali_bcast_unit_create(resource_bcast);
+	if (NULL == bcast_core) {
+		MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the group object */
+#if defined(DEBUG)
+	/* Get a physical PP group to temporarily add to broadcast unit.  IRQ
+	 * verification needs a physical group in the broadcast unit to test
+	 * the broadcast unit interrupt line. */
+	{
+		struct mali_group *phys_group = NULL;
+		int i;
+		for (i = 0; i < mali_group_get_glob_num_groups(); i++) {
+			phys_group = mali_group_get_glob_group(i);
+			if (NULL != mali_group_get_pp_core(phys_group)) break;
+		}
+		MALI_DEBUG_ASSERT(NULL != mali_group_get_pp_core(phys_group));
+
+		/* Add the group temporarily to the broadcast, and update the
+		 * broadcast HW. Since the HW is not updated when removing the
+		 * group the IRQ check will work when the virtual PP is created
+		 * later.
+		 *
+		 * When the virtual group gets populated, the actually used
+		 * groups will be added to the broadcast unit and the HW will
+		 * be updated.
+		 */
+		mali_bcast_add_group(bcast_core, phys_group);
+		mali_bcast_reset(bcast_core);
+		mali_bcast_remove_group(bcast_core, phys_group);
+	}
+#endif /* DEBUG */
+	group = mali_group_create(NULL, dlbu_core, bcast_core, MALI_DOMAIN_INDEX_DUMMY);
+	if (NULL == group) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+		mali_bcast_unit_delete(bcast_core);
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the MMU object inside group */
+	mmu_pp_bcast_core = mali_mmu_create(resource_mmu_pp_bcast, group, MALI_TRUE);
+	if (NULL == mmu_pp_bcast_core) {
+		MALI_PRINT_ERROR(("Failed to create MMU PP broadcast object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the PP core object inside this group */
+	pp_bcast_core = mali_pp_create(resource_pp_bcast, group, MALI_TRUE, 0);
+	if (NULL == pp_bcast_core) {
+		/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+		MALI_PRINT_ERROR(("Failed to create PP object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_groups(void)
+{
+	struct mali_group *group;
+	int cluster_id_gp = 0;
+	int cluster_id_pp_grp0 = 0;
+	int cluster_id_pp_grp1 = 0;
+	int i;
+
+	_mali_osk_resource_t resource_gp;
+	_mali_osk_resource_t resource_gp_mmu;
+	_mali_osk_resource_t resource_pp[8];
+	_mali_osk_resource_t resource_pp_mmu[8];
+	_mali_osk_resource_t resource_pp_mmu_bcast;
+	_mali_osk_resource_t resource_pp_bcast;
+	_mali_osk_resource_t resource_dlbu;
+	_mali_osk_resource_t resource_bcast;
+	_mali_osk_errcode_t resource_gp_found;
+	_mali_osk_errcode_t resource_gp_mmu_found;
+	_mali_osk_errcode_t resource_pp_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_bcast_found;
+	_mali_osk_errcode_t resource_pp_bcast_found;
+	_mali_osk_errcode_t resource_dlbu_found;
+	_mali_osk_errcode_t resource_bcast_found;
+
+	if (!(mali_is_mali400() || mali_is_mali450() || mali_is_mali470())) {
+		/* No known HW core */
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (MALI_MAX_JOB_RUNTIME_DEFAULT == mali_max_job_runtime) {
+		/* Group settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+			/* Use device specific settings (if defined) */
+			if (0 != data.max_job_runtime) {
+				mali_max_job_runtime = data.max_job_runtime;
+			}
+		}
+	}
+
+	if (mali_is_mali450()) {
+		/* Mali-450 have separate L2s for GP, and PP core group(s) */
+		cluster_id_pp_grp0 = 1;
+		cluster_id_pp_grp1 = 2;
+	}
+
+	resource_gp_found = _mali_osk_resource_find(MALI_OFFSET_GP, &resource_gp);
+	resource_gp_mmu_found = _mali_osk_resource_find(MALI_OFFSET_GP_MMU, &resource_gp_mmu);
+	resource_pp_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0, &(resource_pp[0]));
+	resource_pp_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1, &(resource_pp[1]));
+	resource_pp_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2, &(resource_pp[2]));
+	resource_pp_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3, &(resource_pp[3]));
+	resource_pp_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4, &(resource_pp[4]));
+	resource_pp_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5, &(resource_pp[5]));
+	resource_pp_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6, &(resource_pp[6]));
+	resource_pp_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7, &(resource_pp[7]));
+	resource_pp_mmu_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0_MMU, &(resource_pp_mmu[0]));
+	resource_pp_mmu_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1_MMU, &(resource_pp_mmu[1]));
+	resource_pp_mmu_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2_MMU, &(resource_pp_mmu[2]));
+	resource_pp_mmu_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3_MMU, &(resource_pp_mmu[3]));
+	resource_pp_mmu_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4_MMU, &(resource_pp_mmu[4]));
+	resource_pp_mmu_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5_MMU, &(resource_pp_mmu[5]));
+	resource_pp_mmu_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6_MMU, &(resource_pp_mmu[6]));
+	resource_pp_mmu_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7_MMU, &(resource_pp_mmu[7]));
+
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		resource_bcast_found = _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast);
+		resource_dlbu_found = _mali_osk_resource_find(MALI_OFFSET_DLBU, &resource_dlbu);
+		resource_pp_mmu_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST_MMU, &resource_pp_mmu_bcast);
+		resource_pp_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST, &resource_pp_bcast);
+
+		if (_MALI_OSK_ERR_OK != resource_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_dlbu_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_mmu_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_bcast_found) {
+			/* Missing mandatory core(s) for Mali-450 or Mali-470 */
+			MALI_DEBUG_PRINT(2, ("Missing mandatory resources, Mali-450 needs DLBU, Broadcast unit, virtual PP core and virtual MMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK != resource_gp_found ||
+	    _MALI_OSK_ERR_OK != resource_gp_mmu_found ||
+	    _MALI_OSK_ERR_OK != resource_pp_found[0] ||
+	    _MALI_OSK_ERR_OK != resource_pp_mmu_found[0]) {
+		/* Missing mandatory core(s) */
+		MALI_DEBUG_PRINT(2, ("Missing mandatory resource, need at least one GP and one PP, both with a separate MMU\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(1 <= mali_l2_cache_core_get_glob_num_l2_cores());
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_gp), &resource_gp_mmu, &resource_gp, NULL, MALI_DOMAIN_INDEX_GP);
+	if (NULL == group) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create group for first (and mandatory) PP core */
+	MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= (cluster_id_pp_grp0 + 1)); /* >= 1 on Mali-300 and Mali-400, >= 2 on Mali-450 */
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[0], NULL, &resource_pp[0], MALI_DOMAIN_INDEX_PP0);
+	if (NULL == group) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_inited_pp_cores_group_1++;
+
+	/* Create groups for rest of the cores in the first PP core group */
+	for (i = 1; i < 4; i++) { /* First half of the PP cores belong to first core group */
+		if (mali_inited_pp_cores_group_1 < mali_max_pp_cores_group_1) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (NULL == group) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_1++;
+			}
+		}
+	}
+
+	/* Create groups for cores in the second PP core group */
+	for (i = 4; i < 8; i++) { /* Second half of the PP cores belong to second core group */
+		if (mali_inited_pp_cores_group_2 < mali_max_pp_cores_group_2) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= 2); /* Only Mali-450 have a second core group */
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp1), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (NULL == group) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_2++;
+			}
+		}
+	}
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		_mali_osk_errcode_t err = mali_create_virtual_group(&resource_pp_mmu_bcast, &resource_pp_bcast, &resource_dlbu, &resource_bcast);
+		if (_MALI_OSK_ERR_OK != err) {
+			return err;
+		}
+	}
+
+	mali_max_pp_cores_group_1 = mali_inited_pp_cores_group_1;
+	mali_max_pp_cores_group_2 = mali_inited_pp_cores_group_2;
+	MALI_DEBUG_PRINT(2, ("%d+%d PP cores initialized\n", mali_inited_pp_cores_group_1, mali_inited_pp_cores_group_2));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_check_shared_interrupts(void)
+{
+#if !defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_TRUE == _mali_osk_shared_interrupts()) {
+		MALI_PRINT_ERROR(("Shared interrupts detected, but driver support is not enabled\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* !defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	/* It is OK to compile support for shared interrupts even if Mali is not using it. */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_pmu(void)
+{
+	_mali_osk_resource_t resource_pmu;
+
+	MALI_DEBUG_ASSERT(0 != global_gpu_base_address);
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PMU, &resource_pmu)) {
+		struct mali_pmu_core *pmu;
+
+		pmu = mali_pmu_create(&resource_pmu);
+		if (NULL == pmu) {
+			MALI_PRINT_ERROR(("Failed to create PMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	/* It's ok if the PMU doesn't exist */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_memory(void)
+{
+	_mali_osk_device_data data = { 0, };
+	_mali_osk_errcode_t ret;
+
+	/* The priority of setting the value of mali_shared_mem_size,
+	 * mali_dedicated_mem_start and mali_dedicated_mem_size:
+	 * 1. module parameter;
+	 * 2. platform data;
+	 * 3. default value;
+	 **/
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Memory settings are not overridden by module parameters, so use device settings */
+		if (0 == mali_dedicated_mem_start && 0 == mali_dedicated_mem_size) {
+			/* Use device specific settings (if defined) */
+			mali_dedicated_mem_start = data.dedicated_mem_start;
+			mali_dedicated_mem_size = data.dedicated_mem_size;
+		}
+
+		if (MALI_SHARED_MEMORY_DEFAULT_SIZE == mali_shared_mem_size &&
+		    0 != data.shared_mem_size) {
+			mali_shared_mem_size = data.shared_mem_size;
+		}
+	}
+
+	if (0 < mali_dedicated_mem_size && 0 != mali_dedicated_mem_start) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (dedicated: 0x%08X@0x%08X)\n",
+				     mali_dedicated_mem_size, mali_dedicated_mem_start));
+
+		/* Dedicated memory */
+		ret = mali_memory_core_resource_dedicated_memory(mali_dedicated_mem_start, mali_dedicated_mem_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register dedicated memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 < mali_shared_mem_size) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (shared: 0x%08X)\n", mali_shared_mem_size));
+
+		/* Shared OS memory */
+		ret = mali_memory_core_resource_os_memory(mali_shared_mem_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register shared OS memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 == mali_fb_start && 0 == mali_fb_size) {
+		/* Frame buffer settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+			/* Use device specific settings (if defined) */
+			mali_fb_start = data.fb_start;
+			mali_fb_size = data.fb_size;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Using device defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Using module defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	}
+
+	if (0 != mali_fb_size) {
+		/* Register frame buffer */
+		ret = mali_mem_validation_add_range(mali_fb_start, mali_fb_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register frame buffer memory region\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_detect_gpu_class(void)
+{
+	if (_mali_osk_identify_gpu_resource() == 0x450)
+		mali_gpu_class_is_mali450 = MALI_TRUE;
+
+	if (_mali_osk_identify_gpu_resource() == 0x470)
+		mali_gpu_class_is_mali470 = MALI_TRUE;
+}
+
+static _mali_osk_errcode_t mali_init_hw_reset(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	_mali_osk_resource_t resource_bcast;
+
+	/* Ensure broadcast unit is in a good state before we start creating
+	 * groups and cores.
+	 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast)) {
+		struct mali_bcast_unit *bcast_core;
+
+		bcast_core = mali_bcast_unit_create(&resource_bcast);
+		if (NULL == bcast_core) {
+			MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_bcast_unit_delete(bcast_core);
+	}
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_initialize_subsystems(void)
+{
+	_mali_osk_errcode_t err;
+
+#ifdef CONFIG_MALI_DT
+	err = _mali_osk_resource_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	mali_pp_job_initialize();
+
+	mali_timeline_initialize();
+
+	err = mali_session_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/*Try to init gpu secure mode */
+	_mali_osk_gpu_secure_mode_init();
+
+#if defined(CONFIG_MALI400_PROFILING)
+	err = _mali_osk_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (_MALI_OSK_ERR_OK != err) {
+		/* No biggie if we weren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	err = mali_memory_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_executor_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_scheduler_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Configure memory early, needed by mali_mmu_initialize. */
+	err = mali_parse_config_memory();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_set_global_gpu_base_address();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect GPU class (uses L2 cache count) */
+	mali_detect_gpu_class();
+
+	err = mali_check_shared_interrupts();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the MALI PMU (will not touch HW!) */
+	err = mali_parse_config_pmu();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the power management module */
+	err = mali_pm_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Make sure the entire GPU stays on for the rest of this function */
+	mali_pm_init_begin();
+
+	/* Ensure HW is in a good state before starting to access cores. */
+	err = mali_init_hw_reset();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect which Mali GPU we are dealing with */
+	err = mali_parse_product_info();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* The global_product_id is now populated with the correct Mali GPU */
+
+	/* Start configuring the actual Mali hardware. */
+
+	err = mali_mmu_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		err = mali_dlbu_initialize();
+		if (_MALI_OSK_ERR_OK != err) {
+			mali_pm_init_end();
+			mali_terminate_subsystems();
+			return err;
+		}
+	}
+
+	err = mali_parse_config_l2_cache();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_parse_config_groups();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Move groups into executor */
+	mali_executor_populate();
+
+	/* Need call after all group has assigned a domain */
+	mali_pm_power_cost_setup();
+
+	/* Initialize the GPU timer */
+	err = mali_control_timer_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the GPU utilization tracking */
+	err = mali_utilization_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	err = mali_dvfs_policy_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	/* Allowing the system to be turned off */
+	mali_pm_init_end();
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+void mali_terminate_subsystems(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(2, ("terminate_subsystems() called\n"));
+
+	mali_utilization_term();
+	mali_control_timer_term();
+
+	mali_executor_depopulate();
+	mali_delete_groups(); /* Delete groups not added to executor */
+	mali_executor_terminate();
+
+	mali_scheduler_terminate();
+	mali_pp_job_terminate();
+	mali_delete_l2_cache_cores();
+	mali_mmu_terminate();
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		mali_dlbu_terminate();
+	}
+
+	mali_pm_terminate();
+
+	if (NULL != pmu) {
+		mali_pmu_delete(pmu);
+	}
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_term();
+#endif
+
+	_mali_osk_gpu_secure_mode_deinit();
+
+	mali_memory_terminate();
+
+	mali_session_terminate();
+
+	mali_timeline_terminate();
+
+	global_gpu_base_address = 0;
+}
+
+_mali_product_id_t mali_kernel_core_get_product_id(void)
+{
+	return global_product_id;
+}
+
+u32 mali_kernel_core_get_gpu_major_version(void)
+{
+	return global_gpu_major_version;
+}
+
+u32 mali_kernel_core_get_gpu_minor_version(void)
+{
+	return global_gpu_minor_version;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (NULL == queue) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		args->type = _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS;
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* receive a notification, might sleep */
+	err = _mali_osk_notification_queue_receive(queue, &notification);
+	if (_MALI_OSK_ERR_OK != err) {
+		MALI_ERROR(err); /* errcode returned, pass on to caller */
+	}
+
+	/* copy the buffer to the user */
+	args->type = (_mali_uk_notification_type)notification->notification_type;
+	_mali_osk_memcpy(&args->data, notification->result_buffer, notification->result_buffer_size);
+
+	/* finished with the notification */
+	_mali_osk_notification_delete(notification);
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args)
+{
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (NULL == queue) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		return _MALI_OSK_ERR_OK;
+	}
+
+	notification = _mali_osk_notification_create(args->type, 0);
+	if (NULL == notification) {
+		MALI_PRINT_ERROR(("Failed to create notification object\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	_mali_osk_notification_queue_send(queue, notification);
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args)
+{
+	wait_queue_head_t *queue;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	queue = mali_session_get_wait_queue();
+
+	/* check pending big job number, might sleep if larger than MAX allowed number */
+	if (wait_event_interruptible(*queue, MALI_MAX_PENDING_BIG_JOB > mali_scheduler_job_gp_big_job_count())) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (!session->use_high_priority_job_queue) {
+		session->use_high_priority_job_queue = MALI_TRUE;
+		MALI_DEBUG_PRINT(2, ("Session 0x%08X with pid %d was granted higher priority.\n", session, _mali_osk_get_pid()));
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_open(void **context)
+{
+	u32 i;
+	struct mali_session_data *session;
+
+	/* allocated struct to track this session */
+	session = (struct mali_session_data *)_mali_osk_calloc(1, sizeof(struct mali_session_data));
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_NOMEM);
+
+	MALI_DEBUG_PRINT(3, ("Session starting\n"));
+
+	/* create a response queue for this session */
+	session->ioctl_queue = _mali_osk_notification_queue_init();
+	if (NULL == session->ioctl_queue) {
+		goto err;
+	}
+
+	session->page_directory = mali_mmu_pagedir_alloc();
+	if (NULL == session->page_directory) {
+		goto err_mmu;
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_mmu_pagedir_map(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE)) {
+		MALI_PRINT_ERROR(("Failed to map DLBU page into session\n"));
+		goto err_mmu;
+	}
+
+	if (0 != mali_dlbu_phys_addr) {
+		mali_mmu_pagedir_update(session->page_directory, MALI_DLBU_VIRT_ADDR, mali_dlbu_phys_addr,
+					_MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_memory_session_begin(session)) {
+		goto err_session;
+	}
+
+	/* Create soft system. */
+	session->soft_job_system = mali_soft_job_system_create(session);
+	if (NULL == session->soft_job_system) {
+		goto err_soft;
+	}
+
+	/* Initialize the dma fence context.*/
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+	session->fence_context = fence_context_alloc(1);
+	_mali_osk_atomic_init(&session->fence_seqno, 0);
+#else
+	MALI_PRINT_ERROR(("The kernel version not support dma fence!\n"));
+	goto err_time_line;
+#endif
+#endif
+
+	/* Create timeline system. */
+	session->timeline_system = mali_timeline_system_create(session);
+	if (NULL == session->timeline_system) {
+		goto err_time_line;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_init(&session->number_of_window_jobs, 0);
+#endif
+
+	session->use_high_priority_job_queue = MALI_FALSE;
+
+	/* Initialize list of PP jobs on this session. */
+	_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_list);
+
+	/* Initialize the pp_job_fb_lookup_list array used to quickly lookup jobs from a given frame builder */
+	for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i) {
+		_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_fb_lookup_list[i]);
+	}
+
+	session->pid = _mali_osk_get_pid();
+	session->comm = _mali_osk_get_comm();
+	session->max_mali_mem_allocated_size = 0;
+	for (i = 0; i < MALI_MEM_TYPE_MAX; i ++) {
+		atomic_set(&session->mali_mem_array[i], 0);
+	}
+	atomic_set(&session->mali_mem_allocated_pages, 0);
+	*context = (void *)session;
+
+	/* Add session to the list of all sessions. */
+	mali_session_add(session);
+
+	MALI_DEBUG_PRINT(3, ("Session started\n"));
+	return _MALI_OSK_ERR_OK;
+
+err_time_line:
+	mali_soft_job_system_destroy(session->soft_job_system);
+err_soft:
+	mali_memory_session_end(session);
+err_session:
+	mali_mmu_pagedir_free(session->page_directory);
+err_mmu:
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+err:
+	_mali_osk_free(session);
+	MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+}
+
+#if defined(DEBUG)
+/* parameter used for debug */
+extern u32 num_pm_runtime_resume;
+extern u32 num_pm_updates;
+extern u32 num_pm_updates_up;
+extern u32 num_pm_updates_down;
+#endif
+
+_mali_osk_errcode_t _mali_ukk_close(void **context)
+{
+	struct mali_session_data *session;
+	MALI_CHECK_NON_NULL(context, _MALI_OSK_ERR_INVALID_ARGS);
+	session = (struct mali_session_data *)*context;
+
+	MALI_DEBUG_PRINT(3, ("Session ending\n"));
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+	MALI_DEBUG_ASSERT_POINTER(session->timeline_system);
+
+	/* Remove session from list of all sessions. */
+	mali_session_remove(session);
+
+	/* This flag is used to prevent queueing of jobs due to activation. */
+	session->is_aborting = MALI_TRUE;
+
+	/* Stop the soft job timer. */
+	mali_timeline_system_stop_timer(session->timeline_system);
+
+	/* Abort queued jobs */
+	mali_scheduler_abort_session(session);
+
+	/* Abort executing jobs */
+	mali_executor_abort_session(session);
+
+	/* Abort the soft job system. */
+	mali_soft_job_system_abort(session->soft_job_system);
+
+	/* Force execution of all pending bottom half processing for GP and PP. */
+	_mali_osk_wq_flush();
+
+	/* The session PP list should now be empty. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_list));
+
+	/* At this point the GP and PP scheduler no longer has any jobs queued or running from this
+	 * session, and all soft jobs in the soft job system has been destroyed. */
+
+	/* Any trackers left in the timeline system are directly or indirectly waiting on external
+	 * sync fences.  Cancel all sync fence waiters to trigger activation of all remaining
+	 * trackers.  This call will sleep until all timelines are empty. */
+	mali_timeline_system_abort(session->timeline_system);
+
+	/* Flush pending work.
+	 * Needed to make sure all bottom half processing related to this
+	 * session has been completed, before we free internal data structures.
+	 */
+	_mali_osk_wq_flush();
+
+	/* Destroy timeline system. */
+	mali_timeline_system_destroy(session->timeline_system);
+	session->timeline_system = NULL;
+
+	/* Destroy soft system. */
+	mali_soft_job_system_destroy(session->soft_job_system);
+	session->soft_job_system = NULL;
+
+	MALI_DEBUG_CODE({
+		/* Check that the pp_job_fb_lookup_list array is empty. */
+		u32 i;
+		for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i)
+		{
+			MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_fb_lookup_list[i]));
+		}
+	});
+
+	/* Free remaining memory allocated to this session */
+	mali_memory_session_end(session);
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_term(&session->number_of_window_jobs);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_stop_sampling(session->pid);
+#endif
+
+	/* Free session data structures */
+	mali_mmu_pagedir_unmap(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE);
+	mali_mmu_pagedir_free(session->page_directory);
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+	_mali_osk_free(session);
+
+	*context = NULL;
+
+	MALI_DEBUG_PRINT(3, ("Session has ended\n"));
+
+#if defined(DEBUG)
+	MALI_DEBUG_PRINT(3, ("Stats: # runtime resumes: %u\n", num_pm_runtime_resume));
+	MALI_DEBUG_PRINT(3, ("       # PM updates: .... %u (up %u, down %u)\n", num_pm_updates, num_pm_updates_up, num_pm_updates_down));
+
+	num_pm_runtime_resume = 0;
+	num_pm_updates = 0;
+	num_pm_updates_up = 0;
+	num_pm_updates_down = 0;
+#endif
+
+	return _MALI_OSK_ERR_OK;;
+}
+
+#if MALI_STATE_TRACKING
+u32 _mali_kernel_core_dump_state(char *buf, u32 size)
+{
+	int n = 0; /* Number of bytes written to buf */
+
+	n += mali_scheduler_dump_state(buf + n, size - n);
+	n += mali_executor_dump_state(buf + n, size - n);
+
+	return n;
+}
+#endif
diff --git a/utgard/r6p2/common/mali_kernel_core.h b/utgard/r6p2/common/mali_kernel_core.h
new file mode 100644
index 0000000..cf6af32
--- /dev/null
+++ b/utgard/r6p2/common/mali_kernel_core.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_CORE_H__
+#define __MALI_KERNEL_CORE_H__
+
+#include "mali_osk.h"
+
+typedef enum {
+	_MALI_PRODUCT_ID_UNKNOWN,
+	_MALI_PRODUCT_ID_MALI200,
+	_MALI_PRODUCT_ID_MALI300,
+	_MALI_PRODUCT_ID_MALI400,
+	_MALI_PRODUCT_ID_MALI450,
+	_MALI_PRODUCT_ID_MALI470,
+} _mali_product_id_t;
+
+extern mali_bool mali_gpu_class_is_mali450;
+extern mali_bool mali_gpu_class_is_mali470;
+
+_mali_osk_errcode_t mali_initialize_subsystems(void);
+
+void mali_terminate_subsystems(void);
+
+_mali_product_id_t mali_kernel_core_get_product_id(void);
+
+u32 mali_kernel_core_get_gpu_major_version(void);
+
+u32 mali_kernel_core_get_gpu_minor_version(void);
+
+u32 _mali_kernel_core_dump_state(char *buf, u32 size);
+
+MALI_STATIC_INLINE mali_bool mali_is_mali470(void)
+{
+	return mali_gpu_class_is_mali470;
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali450(void)
+{
+	return mali_gpu_class_is_mali450;
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali400(void)
+{
+	if (mali_gpu_class_is_mali450 || mali_gpu_class_is_mali470)
+		return MALI_FALSE;
+
+	return MALI_TRUE;
+}
+#endif /* __MALI_KERNEL_CORE_H__ */
diff --git a/utgard/r6p2/common/mali_kernel_utilization.c b/utgard/r6p2/common/mali_kernel_utilization.c
new file mode 100755
index 0000000..ca7ebea
--- /dev/null
+++ b/utgard/r6p2/common/mali_kernel_utilization.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_scheduler.h"
+
+#include "mali_executor.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+/* Thresholds for GP bound detection. */
+#define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
+#define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
+
+static _mali_osk_spinlock_irq_t *utilization_data_lock;
+
+static u32 num_running_gp_cores = 0;
+static u32 num_running_pp_cores = 0;
+
+static u64 work_start_time_gpu = 0;
+static u64 work_start_time_gp = 0;
+static u64 work_start_time_pp = 0;
+static u64 accumulated_work_time_gpu = 0;
+static u64 accumulated_work_time_gp = 0;
+static u64 accumulated_work_time_pp = 0;
+
+static u32 last_utilization_gpu = 0 ;
+static u32 last_utilization_gp = 0 ;
+static u32 last_utilization_pp = 0 ;
+
+void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
+
+/* Define the first timer control timer timeout in milliseconds */
+static u32 mali_control_first_timeout = 100;
+static struct mali_gpu_utilization_data mali_util_data = {0, };
+
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer)
+{
+	u64 time_now;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 work_normalized_gpu;
+	u32 work_normalized_gp;
+	u32 work_normalized_pp;
+	u32 period_normalized;
+	u32 utilization_gpu;
+	u32 utilization_gp;
+	u32 utilization_pp;
+
+	mali_utilization_data_lock();
+
+	time_now = _mali_osk_time_get_ns();
+
+	*time_period = time_now - *start_time;
+
+	if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
+		mali_control_timer_pause();
+		/*
+		 * No work done for this period
+		 * - No need to reschedule timer
+		 * - Report zero usage
+		 */
+		last_utilization_gpu = 0;
+		last_utilization_gp = 0;
+		last_utilization_pp = 0;
+
+		mali_util_data.utilization_gpu = last_utilization_gpu;
+		mali_util_data.utilization_gp = last_utilization_gp;
+		mali_util_data.utilization_pp = last_utilization_pp;
+
+		mali_utilization_data_unlock();
+
+		*need_add_timer = MALI_FALSE;
+
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+
+		MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
+		MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
+		MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
+
+		return &mali_util_data;
+	}
+
+	/* If we are currently busy, update working period up to now */
+	if (work_start_time_gpu != 0) {
+		accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+		work_start_time_gpu = time_now;
+
+		/* GP and/or PP will also be busy if the GPU is busy at this point */
+
+		if (work_start_time_gp != 0) {
+			accumulated_work_time_gp += (time_now - work_start_time_gp);
+			work_start_time_gp = time_now;
+		}
+
+		if (work_start_time_pp != 0) {
+			accumulated_work_time_pp += (time_now - work_start_time_pp);
+			work_start_time_pp = time_now;
+		}
+	}
+
+	/*
+	 * We have two 64-bit values, a dividend and a divisor.
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 * We shift the dividend up and possibly the divisor down, making the result X in 256.
+	 */
+
+	/* Shift the 64-bit values down so they fit inside a 32-bit integer */
+	leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32));
+	shift_val = 32 - leading_zeroes;
+	work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
+	work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
+	work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
+	period_normalized = (u32)(*time_period >> shift_val);
+
+	/*
+	 * Now, we should report the usage in parts of 256
+	 * this means we must shift up the dividend or down the divisor by 8
+	 * (we could do a combination, but we just use one for simplicity,
+	 * but the end result should be good enough anyway)
+	 */
+	if (period_normalized > 0x00FFFFFF) {
+		/* The divisor is so big that it is safe to shift it down */
+		period_normalized >>= 8;
+	} else {
+		/*
+		 * The divisor is so small that we can shift up the dividend, without loosing any data.
+		 * (dividend is always smaller than the divisor)
+		 */
+		work_normalized_gpu <<= 8;
+		work_normalized_gp <<= 8;
+		work_normalized_pp <<= 8;
+	}
+
+	utilization_gpu = work_normalized_gpu / period_normalized;
+	utilization_gp = work_normalized_gp / period_normalized;
+	utilization_pp = work_normalized_pp / period_normalized;
+
+	last_utilization_gpu = utilization_gpu;
+	last_utilization_gp = utilization_gp;
+	last_utilization_pp = utilization_pp;
+
+	if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) &&
+	    (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) {
+		mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND);
+	} else {
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+	}
+
+	/* starting a new period */
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	*start_time = time_now;
+
+	mali_util_data.utilization_gp = last_utilization_gp;
+	mali_util_data.utilization_gpu = last_utilization_gpu;
+	mali_util_data.utilization_pp = last_utilization_pp;
+
+	mali_utilization_data_unlock();
+
+	*need_add_timer = MALI_TRUE;
+
+	MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
+	MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
+	MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
+
+	return &mali_util_data;
+}
+
+_mali_osk_errcode_t mali_utilization_init(void)
+{
+#if USING_GPU_UTILIZATION
+	_mali_osk_device_data data;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if (NULL != data.utilization_callback) {
+			mali_utilization_callback = data.utilization_callback;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed \n"));
+		}
+	}
+#endif /* defined(USING_GPU_UTILIZATION) */
+
+	if (NULL == mali_utilization_callback) {
+		MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n"));
+	}
+
+	utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
+	if (NULL == utilization_data_lock) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	num_running_gp_cores = 0;
+	num_running_pp_cores = 0;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_utilization_term(void)
+{
+	if (NULL != utilization_data_lock) {
+		_mali_osk_spinlock_irq_term(utilization_data_lock);
+	}
+}
+
+void mali_utilization_gp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_gp_cores;
+	if (1 == num_running_gp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First GP core started, consider GP busy from now and onwards */
+		work_start_time_gp = time_now;
+
+		if (0 == num_running_pp_cores) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			is_resume  = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+				/* Do some policy in new period for performance consideration */
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (0 == last_utilization_gpu) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_pp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_pp_cores;
+	if (1 == num_running_pp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First PP core started, consider PP busy from now and onwards */
+		work_start_time_pp = time_now;
+
+		if (0 == num_running_gp_cores) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			/* Start a new period if stoped */
+			is_resume = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (0 == last_utilization_gpu) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_gp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_gp_cores;
+	if (0 == num_running_gp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last GP core ended, consider GP idle from now and onwards */
+		accumulated_work_time_gp += (time_now - work_start_time_gp);
+		work_start_time_gp = 0;
+
+		if (0 == num_running_pp_cores) {
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+void mali_utilization_pp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_pp_cores;
+	if (0 == num_running_pp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last PP core ended, consider PP idle from now and onwards */
+		accumulated_work_time_pp += (time_now - work_start_time_pp);
+		work_start_time_pp = 0;
+
+		if (0 == num_running_gp_cores) {
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+mali_bool mali_utilization_enabled(void)
+{
+#if defined(CONFIG_MALI_DVFS)
+	return mali_dvfs_policy_enabled();
+#else
+	return (NULL != mali_utilization_callback);
+#endif /* defined(CONFIG_MALI_DVFS) */
+}
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data)
+{
+	MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback);
+
+	mali_utilization_callback(util_data);
+}
+
+void mali_utilization_reset(void)
+{
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	last_utilization_gpu = 0;
+	last_utilization_gp = 0;
+	last_utilization_pp = 0;
+}
+
+void mali_utilization_data_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(utilization_data_lock);
+}
+
+void mali_utilization_data_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(utilization_data_lock);
+}
+
+void mali_utilization_data_assert_locked(void)
+{
+	MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock);
+}
+
+u32 _mali_ukk_utilization_gp_pp(void)
+{
+	return last_utilization_gpu;
+}
+
+u32 _mali_ukk_utilization_gp(void)
+{
+	return last_utilization_gp;
+}
+
+u32 _mali_ukk_utilization_pp(void)
+{
+	return last_utilization_pp;
+}
diff --git a/utgard/r6p2/common/mali_kernel_utilization.h b/utgard/r6p2/common/mali_kernel_utilization.h
new file mode 100755
index 0000000..5206225
--- /dev/null
+++ b/utgard/r6p2/common/mali_kernel_utilization.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_UTILIZATION_H__
+#define __MALI_KERNEL_UTILIZATION_H__
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_osk.h"
+
+/**
+ * Initialize/start the Mali GPU utilization metrics reporting.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_utilization_init(void);
+
+/**
+ * Terminate the Mali GPU utilization metrics reporting
+ */
+void mali_utilization_term(void);
+
+/**
+ * Check if Mali utilization is enabled
+ */
+mali_bool mali_utilization_enabled(void);
+
+/**
+ * Should be called when a job is about to execute a GP job
+ */
+void mali_utilization_gp_start(void);
+
+/**
+ * Should be called when a job has completed executing a GP job
+ */
+void mali_utilization_gp_end(void);
+
+/**
+ * Should be called when a job is about to execute a PP job
+ */
+void mali_utilization_pp_start(void);
+
+/**
+ * Should be called when a job has completed executing a PP job
+ */
+void mali_utilization_pp_end(void);
+
+/**
+ * Should be called to calcution the GPU utilization
+ */
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer);
+
+_mali_osk_spinlock_irq_t *mali_utilization_get_lock(void);
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data);
+
+void mali_utilization_data_lock(void);
+
+void mali_utilization_data_unlock(void);
+
+void mali_utilization_data_assert_locked(void);
+
+void mali_utilization_reset(void);
+
+
+#endif /* __MALI_KERNEL_UTILIZATION_H__ */
diff --git a/utgard/r6p2/common/mali_kernel_vsync.c b/utgard/r6p2/common/mali_kernel_vsync.c
new file mode 100755
index 0000000..3b2b108
--- /dev/null
+++ b/utgard/r6p2/common/mali_kernel_vsync.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+#include "mali_osk_profiling.h"
+
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args)
+{
+	_mali_uk_vsync_event event = (_mali_uk_vsync_event)args->event;
+	MALI_IGNORE(event); /* event is not used for release code, and that is OK */
+
+	/*
+	 * Manually generate user space events in kernel space.
+	 * This saves user space from calling kernel space twice in this case.
+	 * We just need to remember to add pid and tid manually.
+	 */
+	if (event == _MALI_UK_VSYNC_EVENT_BEGIN_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+	if (event == _MALI_UK_VSYNC_EVENT_END_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("Received VSYNC event: %d\n", event));
+	MALI_SUCCESS;
+}
+
diff --git a/utgard/r6p2/common/mali_l2_cache.c b/utgard/r6p2/common/mali_l2_cache.c
new file mode 100755
index 0000000..60c3957
--- /dev/null
+++ b/utgard/r6p2/common/mali_l2_cache.c
@@ -0,0 +1,540 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_scheduler.h"
+#include "mali_pm.h"
+#include "mali_pm_domain.h"
+
+/**
+ * Size of the Mali L2 cache registers in bytes
+ */
+#define MALI400_L2_CACHE_REGISTERS_SIZE 0x30
+
+/**
+ * Mali L2 cache register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_l2_cache_register {
+	MALI400_L2_CACHE_REGISTER_SIZE         = 0x0004,
+	MALI400_L2_CACHE_REGISTER_STATUS       = 0x0008,
+	/*unused                               = 0x000C */
+	MALI400_L2_CACHE_REGISTER_COMMAND      = 0x0010,
+	MALI400_L2_CACHE_REGISTER_CLEAR_PAGE   = 0x0014,
+	MALI400_L2_CACHE_REGISTER_MAX_READS    = 0x0018,
+	MALI400_L2_CACHE_REGISTER_ENABLE       = 0x001C,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0 = 0x0020,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0 = 0x0024,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1 = 0x0028,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1 = 0x002C,
+} mali_l2_cache_register;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_command {
+	MALI400_L2_CACHE_COMMAND_CLEAR_ALL = 0x01,
+} mali_l2_cache_command;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_enable {
+	MALI400_L2_CACHE_ENABLE_DEFAULT = 0x0, /* Default */
+	MALI400_L2_CACHE_ENABLE_ACCESS = 0x01,
+	MALI400_L2_CACHE_ENABLE_READ_ALLOCATE = 0x02,
+} mali_l2_cache_enable;
+
+/**
+ * Mali L2 cache status bits
+ */
+typedef enum mali_l2_cache_status {
+	MALI400_L2_CACHE_STATUS_COMMAND_BUSY = 0x01,
+	MALI400_L2_CACHE_STATUS_DATA_BUSY    = 0x02,
+} mali_l2_cache_status;
+
+#define MALI400_L2_MAX_READS_NOT_SET -1
+
+static struct mali_l2_cache_core *
+	mali_global_l2s[MALI_MAX_NUMBER_OF_L2_CACHE_CORES] = { NULL, };
+static u32 mali_global_num_l2s = 0;
+
+int mali_l2_max_reads = MALI400_L2_MAX_READS_NOT_SET;
+
+
+/* Local helper functions */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache);
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val);
+
+static void mali_l2_cache_lock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_lock(cache->lock);
+}
+
+static void mali_l2_cache_unlock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_unlock(cache->lock);
+}
+
+/* Implementation of the L2 cache interface */
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *cache = NULL;
+#if defined(DEBUG)
+	u32 cache_size;
+#endif
+
+	MALI_DEBUG_PRINT(4, ("Mali L2 cache: Creating Mali L2 cache: %s\n",
+			     resource->description));
+
+	if (mali_global_num_l2s >= MALI_MAX_NUMBER_OF_L2_CACHE_CORES) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Too many L2 caches\n"));
+		return NULL;
+	}
+
+	cache = _mali_osk_malloc(sizeof(struct mali_l2_cache_core));
+	if (NULL == cache) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to allocate memory for L2 cache core\n"));
+		return NULL;
+	}
+
+	cache->core_id =  mali_global_num_l2s;
+	cache->counter_src0 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_src1 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_value0_base = 0;
+	cache->counter_value1_base = 0;
+	cache->pm_domain = NULL;
+	cache->power_is_on = MALI_FALSE;
+	cache->last_invalidated_id = 0;
+
+	if (_MALI_OSK_ERR_OK != mali_hw_core_create(&cache->hw_core,
+			resource, MALI400_L2_CACHE_REGISTERS_SIZE)) {
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	cache_size = mali_hw_core_register_read(&cache->hw_core,
+						MALI400_L2_CACHE_REGISTER_SIZE);
+	MALI_DEBUG_PRINT(2, ("Mali L2 cache: Created %s: % 3uK, %u-way, % 2ubyte cache line, % 3ubit external bus\n",
+			     resource->description,
+			     1 << (((cache_size >> 16) & 0xff) - 10),
+			     1 << ((cache_size >> 8) & 0xff),
+			     1 << (cache_size & 0xff),
+			     1 << ((cache_size >> 24) & 0xff)));
+#endif
+
+	cache->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_L2);
+	if (NULL == cache->lock) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to create counter lock for L2 cache core %s\n",
+				  cache->hw_core.description));
+		mali_hw_core_delete(&cache->hw_core);
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+	/* register with correct power domain */
+	cache->pm_domain = mali_pm_register_l2_cache(
+				   domain_index, cache);
+
+	mali_global_l2s[mali_global_num_l2s] = cache;
+	mali_global_num_l2s++;
+
+	return cache;
+}
+
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		if (mali_global_l2s[i] != cache) {
+			continue;
+		}
+
+		mali_global_l2s[i] = NULL;
+		mali_global_num_l2s--;
+
+		if (i == mali_global_num_l2s) {
+			/* Removed last element, nothing more to do */
+			break;
+		}
+
+		/*
+		 * We removed a l2 cache from the middle of the array,
+		 * so move the last l2 cache to current position
+		 */
+		mali_global_l2s[i] = mali_global_l2s[mali_global_num_l2s];
+		mali_global_l2s[mali_global_num_l2s] = NULL;
+
+		/* All good */
+		break;
+	}
+
+	_mali_osk_spinlock_irq_term(cache->lock);
+	mali_hw_core_delete(&cache->hw_core);
+	_mali_osk_free(cache);
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	mali_l2_cache_reset(cache);
+
+	if ((1 << MALI_DOMAIN_INDEX_DUMMY) != cache->pm_domain->pmu_mask)
+		MALI_DEBUG_ASSERT(MALI_FALSE == cache->power_is_on);
+	cache->power_is_on = MALI_TRUE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == cache->power_is_on);
+
+	/*
+	 * The HW counters will start from zero again when we resume,
+	 * but we should report counters as always increasing.
+	 * Take a copy of the HW values now in order to add this to
+	 * the values we report after being powered up.
+	 *
+	 * The physical power off of the L2 cache might be outside our
+	 * own control (e.g. runtime PM). That is why we must manually
+	 * set set the counter value to zero as well.
+	 */
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value0_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0, 0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value1_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1, 0);
+	}
+
+
+	cache->power_is_on = MALI_FALSE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter)
+{
+	u32 reg_offset_src;
+	u32 reg_offset_val;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(source_id >= 0 && source_id <= 1);
+
+	mali_l2_cache_lock(cache);
+
+	if (0 == source_id) {
+		/* start counting from 0 */
+		cache->counter_value0_base = 0;
+		cache->counter_src0 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0;
+	} else {
+		/* start counting from 0 */
+		cache->counter_value1_base = 0;
+		cache->counter_src1 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1;
+	}
+
+	if (cache->power_is_on) {
+		u32 hw_src;
+
+		if (MALI_HW_CORE_NO_COUNTER != counter) {
+			hw_src = counter;
+		} else {
+			hw_src = 0; /* disable value for HW */
+		}
+
+		/* Set counter src */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_src, hw_src);
+
+		/* Make sure the HW starts counting from 0 again */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_val, 0);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(NULL != src0);
+	MALI_DEBUG_ASSERT(NULL != value0);
+	MALI_DEBUG_ASSERT(NULL != src1);
+	MALI_DEBUG_ASSERT(NULL != value1);
+
+	mali_l2_cache_lock(cache);
+
+	*src0 = cache->counter_src0;
+	*src1 = cache->counter_src1;
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		if (MALI_TRUE == cache->power_is_on) {
+			*value0 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		} else {
+			*value0 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value0 += cache->counter_value0_base;
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		if (MALI_TRUE == cache->power_is_on) {
+			*value1 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		} else {
+			*value1 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value1 += cache->counter_value1_base;
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index)
+{
+	if (mali_global_num_l2s > index) {
+		return mali_global_l2s[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void)
+{
+	return mali_global_num_l2s;
+}
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (NULL == cache) {
+		return;
+	}
+
+	mali_l2_cache_lock(cache);
+
+	cache->last_invalidated_id = mali_scheduler_get_new_cache_order();
+	mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+				   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (NULL == cache) {
+		return;
+	}
+
+	/*
+	 * If the last cache invalidation was done by a job with a higher id we
+	 * don't have to flush. Since user space will store jobs w/ their
+	 * corresponding memory in sequence (first job #0, then job #1, ...),
+	 * we don't have to flush for job n-1 if job n has already invalidated
+	 * the cache since we know for sure that job n-1's memory was already
+	 * written when job n was started.
+	 */
+
+	mali_l2_cache_lock(cache);
+
+	if (((s32)id) > ((s32)cache->last_invalidated_id)) {
+		/* Set latest invalidated id to current "point in time" */
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+		mali_l2_cache_send_command(cache,
+					   MALI400_L2_CACHE_REGISTER_COMMAND,
+					   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_all(void)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		_mali_osk_errcode_t ret;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (MALI_TRUE != cache->power_is_on) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+
+		ret = mali_l2_cache_send_command(cache,
+						 MALI400_L2_CACHE_REGISTER_COMMAND,
+						 MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to invalidate cache\n"));
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		u32 j;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (MALI_TRUE != cache->power_is_on) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		for (j = 0; j < num_pages; j++) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_l2_cache_send_command(cache,
+							 MALI400_L2_CACHE_REGISTER_CLEAR_PAGE,
+							 pages[j]);
+			if (_MALI_OSK_ERR_OK != ret) {
+				MALI_PRINT_ERROR(("Failed to invalidate cache (page)\n"));
+			}
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+/* -------- local helper functions below -------- */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache)
+{
+    MALI_DEBUG_ASSERT_POINTER(cache);
+    MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+    /* Kasin Added, skip off power domain. */
+    if (cache && cache->pm_domain && cache->pm_domain->power_is_on == MALI_FALSE) {
+        printk("===========%s, %d skip off power domain?\n", __FUNCTION__, __LINE__);
+    }
+
+
+    /* Invalidate cache (just to keep it in a known state at startup) */
+    mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+            MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+    /* Enable cache */
+    mali_hw_core_register_write(&cache->hw_core,
+            MALI400_L2_CACHE_REGISTER_ENABLE,
+            (u32)MALI400_L2_CACHE_ENABLE_ACCESS |
+            (u32)MALI400_L2_CACHE_ENABLE_READ_ALLOCATE);
+
+	if (MALI400_L2_MAX_READS_NOT_SET != mali_l2_max_reads) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_MAX_READS,
+					    (u32)mali_l2_max_reads);
+	}
+
+	/* Restart any performance counters (if enabled) */
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0,
+					    cache->counter_src0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1,
+					    cache->counter_src1);
+	}
+}
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val)
+{
+	int i = 0;
+	const int loop_count = 100000;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+	/*
+	 * First, wait for L2 cache command handler to go idle.
+	 * (Commands received while processing another command will be ignored)
+	 */
+	for (i = 0; i < loop_count; i++) {
+		if (!(mali_hw_core_register_read(&cache->hw_core,
+						 MALI400_L2_CACHE_REGISTER_STATUS) &
+		      (u32)MALI400_L2_CACHE_STATUS_COMMAND_BUSY)) {
+			break;
+		}
+	}
+
+	if (i == loop_count) {
+		MALI_DEBUG_PRINT(1, ("Mali L2 cache: aborting wait for command interface to go idle\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* then issue the command */
+	mali_hw_core_register_write(&cache->hw_core, reg, val);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p2/common/mali_l2_cache.h b/utgard/r6p2/common/mali_l2_cache.h
new file mode 100755
index 0000000..de92ad6
--- /dev/null
+++ b/utgard/r6p2/common/mali_l2_cache.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_L2_CACHE_H__
+#define __MALI_KERNEL_L2_CACHE_H__
+
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+#define MALI_MAX_NUMBER_OF_L2_CACHE_CORES  3
+/* Maximum 1 GP and 4 PP for an L2 cache core (Mali-400 MP4) */
+#define MALI_MAX_NUMBER_OF_GROUPS_PER_L2_CACHE 5
+
+/**
+ * Definition of the L2 cache core struct
+ * Used to track a L2 cache unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_l2_cache_core {
+	/* Common HW core functionality */
+	struct mali_hw_core hw_core;
+
+	/* Synchronize L2 cache access */
+	_mali_osk_spinlock_irq_t *lock;
+
+	/* Unique core ID */
+	u32 core_id;
+
+	/* The power domain this L2 cache belongs to */
+	struct mali_pm_domain *pm_domain;
+
+	/* MALI_TRUE if power is on for this L2 cache */
+	mali_bool power_is_on;
+
+	/* A "timestamp" to avoid unnecessary flushes */
+	u32 last_invalidated_id;
+
+	/* Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src0;
+
+	/* Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src1;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value0_base;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value1_base;
+
+	/* Used by PM domains to link L2 caches of same domain */
+	_mali_osk_list_t pm_domain_list;
+};
+
+_mali_osk_errcode_t mali_l2_cache_initialize(void);
+void mali_l2_cache_terminate(void);
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index);
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_get_id(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->core_id;
+}
+
+MALI_STATIC_INLINE struct mali_pm_domain *mali_l2_cache_get_pm_domain(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->pm_domain;
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache);
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache);
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src0(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src1(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src1;
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1);
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index);
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void);
+
+struct mali_group *mali_l2_cache_get_group(
+	struct mali_l2_cache_core *cache, u32 index);
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache);
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id);
+
+void mali_l2_cache_invalidate_all(void);
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages);
+
+#endif /* __MALI_KERNEL_L2_CACHE_H__ */
diff --git a/utgard/r6p2/common/mali_mem_validation.c b/utgard/r6p2/common/mali_mem_validation.c
new file mode 100644
index 0000000..dddfb58
--- /dev/null
+++ b/utgard/r6p2/common/mali_mem_validation.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_mem_validation.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#define MALI_INVALID_MEM_ADDR 0xFFFFFFFF
+
+typedef struct {
+	u32 phys_base;        /**< Mali physical base of the memory, page aligned */
+	u32 size;             /**< size in bytes of the memory, multiple of page size */
+} _mali_mem_validation_t;
+
+static _mali_mem_validation_t mali_mem_validator = { MALI_INVALID_MEM_ADDR, MALI_INVALID_MEM_ADDR };
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size)
+{
+	/* Check that no other MEM_VALIDATION resources exist */
+	if (MALI_INVALID_MEM_ADDR != mali_mem_validator.phys_base) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; another range is already specified\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Check restrictions on page alignment */
+	if ((0 != (start & (~_MALI_OSK_CPU_PAGE_MASK))) ||
+	    (0 != (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; incorrect alignment\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_mem_validator.phys_base = start;
+	mali_mem_validator.size = size;
+	MALI_DEBUG_PRINT(2, ("Memory Validator installed for Mali physical address base=0x%08X, size=0x%08X\n",
+			     mali_mem_validator.phys_base, mali_mem_validator.size));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size)
+{
+#if 0
+	if (phys_addr < (phys_addr + size)) { /* Don't allow overflow (or zero size) */
+		if ((0 == (phys_addr & (~_MALI_OSK_CPU_PAGE_MASK))) &&
+		    (0 == (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+			if ((phys_addr          >= mali_mem_validator.phys_base) &&
+			    ((phys_addr + (size - 1)) >= mali_mem_validator.phys_base) &&
+			    (phys_addr          <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1))) &&
+			    ((phys_addr + (size - 1)) <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1)))) {
+				MALI_DEBUG_PRINT(3, ("Accepted range 0x%08X + size 0x%08X (= 0x%08X)\n", phys_addr, size, (phys_addr + size - 1)));
+				return _MALI_OSK_ERR_OK;
+			}
+		}
+	}
+
+	MALI_PRINT_ERROR(("MALI PHYSICAL RANGE VALIDATION ERROR: The range supplied was: phys_base=0x%08X, size=0x%08X\n", phys_addr, size));
+
+	return _MALI_OSK_ERR_FAULT;
+#else
+	return _MALI_OSK_ERR_OK;
+#endif
+}
diff --git a/utgard/r6p2/common/mali_mem_validation.h b/utgard/r6p2/common/mali_mem_validation.h
new file mode 100644
index 0000000..f7e6cb1
--- /dev/null
+++ b/utgard/r6p2/common/mali_mem_validation.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2011-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEM_VALIDATION_H__
+#define __MALI_MEM_VALIDATION_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size);
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size);
+
+#endif /* __MALI_MEM_VALIDATION_H__ */
diff --git a/utgard/r6p2/common/mali_mmu.c b/utgard/r6p2/common/mali_mmu.c
new file mode 100755
index 0000000..c08ce1f
--- /dev/null
+++ b/utgard/r6p2/common/mali_mmu.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_ukk.h"
+
+#include "mali_mmu.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_mmu_page_directory.h"
+
+/**
+ * Size of the MMU registers in bytes
+ */
+#define MALI_MMU_REGISTERS_SIZE 0x24
+
+/**
+ * MMU commands
+ * These are the commands that can be sent
+ * to the MMU unit.
+ */
+typedef enum mali_mmu_command {
+	MALI_MMU_COMMAND_ENABLE_PAGING = 0x00, /**< Enable paging (memory translation) */
+	MALI_MMU_COMMAND_DISABLE_PAGING = 0x01, /**< Disable paging (memory translation) */
+	MALI_MMU_COMMAND_ENABLE_STALL = 0x02, /**<  Enable stall on page fault */
+	MALI_MMU_COMMAND_DISABLE_STALL = 0x03, /**< Disable stall on page fault */
+	MALI_MMU_COMMAND_ZAP_CACHE = 0x04, /**< Zap the entire page table cache */
+	MALI_MMU_COMMAND_PAGE_FAULT_DONE = 0x05, /**< Page fault processed */
+	MALI_MMU_COMMAND_HARD_RESET = 0x06 /**< Reset the MMU back to power-on settings */
+} mali_mmu_command;
+
+static void mali_mmu_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data);
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu);
+
+/* page fault queue flush helper pages
+ * note that the mapping pointers are currently unused outside of the initialization functions */
+static mali_dma_addr mali_page_fault_flush_page_directory = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_directory_mapping = NULL;
+static mali_dma_addr mali_page_fault_flush_page_table = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_table_mapping = NULL;
+static mali_dma_addr mali_page_fault_flush_data_page = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_data_page_mapping = NULL;
+
+/* an empty page directory (no address valid) which is active on any MMU not currently marked as in use */
+static mali_dma_addr mali_empty_page_directory_phys   = MALI_INVALID_PAGE;
+static mali_io_address mali_empty_page_directory_virt = NULL;
+
+
+_mali_osk_errcode_t mali_mmu_initialize(void)
+{
+	/* allocate the helper pages */
+	mali_empty_page_directory_phys = mali_allocate_empty_page(&mali_empty_page_directory_virt);
+	if (0 == mali_empty_page_directory_phys) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate empty page directory.\n"));
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_create_fault_flush_pages(&mali_page_fault_flush_page_directory,
+			&mali_page_fault_flush_page_directory_mapping,
+			&mali_page_fault_flush_page_table,
+			&mali_page_fault_flush_page_table_mapping,
+			&mali_page_fault_flush_data_page,
+			&mali_page_fault_flush_data_page_mapping)) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate fault flush pages\n"));
+		mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		mali_empty_page_directory_virt = NULL;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mmu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali MMU: terminating\n"));
+
+	/* Free global helper pages */
+	mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+	mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+	mali_empty_page_directory_virt = NULL;
+
+	/* Free the page fault flush pages */
+	mali_destroy_fault_flush_pages(&mali_page_fault_flush_page_directory,
+				       &mali_page_fault_flush_page_directory_mapping,
+				       &mali_page_fault_flush_page_table,
+				       &mali_page_fault_flush_page_table_mapping,
+				       &mali_page_fault_flush_data_page,
+				       &mali_page_fault_flush_data_page_mapping);
+}
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual)
+{
+	struct mali_mmu_core *mmu = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+
+	MALI_DEBUG_PRINT(2, ("Mali MMU: Creating Mali MMU: %s\n", resource->description));
+
+	mmu = _mali_osk_calloc(1, sizeof(struct mali_mmu_core));
+	if (NULL != mmu) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&mmu->hw_core, resource, MALI_MMU_REGISTERS_SIZE)) {
+			if (_MALI_OSK_ERR_OK == mali_group_add_mmu_core(group, mmu)) {
+				if (is_virtual) {
+					/* Skip reset and IRQ setup for virtual MMU */
+					return mmu;
+				}
+
+				if (_MALI_OSK_ERR_OK == mali_mmu_reset(mmu)) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					mmu->irq = _mali_osk_irq_init(resource->irq,
+								      mali_group_upper_half_mmu,
+								      group,
+								      mali_mmu_probe_trigger,
+								      mali_mmu_probe_ack,
+								      mmu,
+								      resource->description);
+					if (NULL != mmu->irq) {
+						return mmu;
+					} else {
+						MALI_PRINT_ERROR(("Mali MMU: Failed to setup interrupt handlers for MMU %s\n", mmu->hw_core.description));
+					}
+				}
+				mali_group_remove_mmu_core(group);
+			} else {
+				MALI_PRINT_ERROR(("Mali MMU: Failed to add core %s to group\n", mmu->hw_core.description));
+			}
+			mali_hw_core_delete(&mmu->hw_core);
+		}
+
+		_mali_osk_free(mmu);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for MMU\n"));
+	}
+
+	return NULL;
+}
+
+void mali_mmu_delete(struct mali_mmu_core *mmu)
+{
+	if (NULL != mmu->irq) {
+		_mali_osk_irq_term(mmu->irq);
+	}
+
+	mali_hw_core_delete(&mmu->hw_core);
+	_mali_osk_free(mmu);
+}
+
+static void mali_mmu_enable_paging(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_PAGING);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS) & MALI_MMU_STATUS_BIT_PAGING_ENABLED) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Enable paging request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+	}
+}
+
+/**
+ * Issues the enable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ * @return MALI_TRUE if HW stall was successfully engaged, otherwise MALI_FALSE (req timed out)
+ */
+static mali_bool mali_mmu_enable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(4, ("MMU stall is implicit when Paging is not enabled.\n"));
+		return MALI_TRUE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(3, ("Aborting MMU stall request since it is in pagefault state.\n"));
+		return MALI_FALSE;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if ((mmu_status & MALI_MMU_STATUS_BIT_STALL_ACTIVE) && (0 == (mmu_status & MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE))) {
+			break;
+		}
+		if (0 == (mmu_status & (MALI_MMU_STATUS_BIT_PAGING_ENABLED))) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_DEBUG_PRINT(2, ("Enable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return MALI_FALSE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU stall request since it has a pagefault.\n"));
+		return MALI_FALSE;
+	}
+
+	return MALI_TRUE;
+}
+
+/**
+ * Issues the disable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ */
+static void mali_mmu_disable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(3, ("MMU disable skipped since it was not enabled.\n"));
+		return;
+	}
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU disable stall request since it is in pagefault state.\n"));
+		return;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_DISABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		u32 status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (0 == (status & MALI_MMU_STATUS_BIT_STALL_ACTIVE)) {
+			break;
+		}
+		if (status &  MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) MALI_DEBUG_PRINT(1, ("Disable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(4, ("Mali MMU: %s: Leaving page fault mode\n", mmu->hw_core.description));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_PAGE_FAULT_DONE);
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, 0xCAFEBABE);
+	MALI_DEBUG_ASSERT(0xCAFEB000 == mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_HARD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR) == 0) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Reset request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	if (!stall_success) {
+		err = _MALI_OSK_ERR_BUSY;
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali MMU: mali_kernel_mmu_reset: %s\n", mmu->hw_core.description));
+
+	if (_MALI_OSK_ERR_OK == mali_mmu_raw_reset(mmu)) {
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+		/* no session is active, so just activate the empty page directory */
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, mali_empty_page_directory_phys);
+		mali_mmu_enable_paging(mmu);
+		err = _MALI_OSK_ERR_OK;
+	}
+	mali_mmu_disable_stall(mmu);
+
+	return err;
+}
+
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success = mali_mmu_enable_stall(mmu);
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+	if (MALI_FALSE == stall_success) {
+		/* False means that it is in Pagefault state. Not possible to disable_stall then */
+		return MALI_FALSE;
+	}
+
+	mali_mmu_disable_stall(mmu);
+	return MALI_TRUE;
+}
+
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+}
+
+
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_ZAP_ONE_LINE, MALI_MMU_PDE_ENTRY(mali_address));
+}
+
+static void mali_mmu_activate_address_space(struct mali_mmu_core *mmu, u32 page_directory)
+{
+	/* The MMU must be in stalled or page fault mode, for this writing to work */
+	MALI_DEBUG_ASSERT(0 != (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)
+				& (MALI_MMU_STATUS_BIT_STALL_ACTIVE | MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE)));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, page_directory);
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+}
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir)
+{
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(5, ("Asked to activate page directory 0x%x on MMU %s\n", pagedir, mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+	mali_mmu_activate_address_space(mmu, pagedir->page_directory);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+	MALI_DEBUG_PRINT(3, ("Activating the empty page directory on MMU %s\n", mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+
+	/* This function can only be called when the core is idle, so it could not fail. */
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+
+	mali_mmu_activate_address_space(mmu, mali_empty_page_directory_phys);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(3, ("Activating the page fault flush page directory on MMU %s\n", mmu->hw_core.description));
+	stall_success = mali_mmu_enable_stall(mmu);
+	/* This function is expect to fail the stalling, since it might be in PageFault mode when it is called */
+	mali_mmu_activate_address_space(mmu, mali_page_fault_flush_page_directory);
+	if (MALI_TRUE == stall_success) mali_mmu_disable_stall(mmu);
+}
+
+/* Is called when we want the mmu to give an interrupt */
+static void mali_mmu_probe_trigger(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+}
+
+/* Is called when the irq probe wants the mmu to acknowledge an interrupt from the hw */
+extern int mali_page_fault;
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	u32 int_stat;
+
+	int_stat = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+
+	MALI_DEBUG_PRINT(2, ("mali_mmu_probe_irq_acknowledge: intstat 0x%x\n", int_stat));
+	if (int_stat & MALI_MMU_INTERRUPT_PAGE_FAULT) {
+		MALI_DEBUG_PRINT(2, ("Probe: Page fault detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_PAGE_FAULT);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Page fault detect: FAILED\n"));
+		mali_page_fault++;
+	}
+
+	if (int_stat & MALI_MMU_INTERRUPT_READ_BUS_ERROR) {
+		MALI_DEBUG_PRINT(2, ("Probe: Bus read error detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Bus read error detect: FAILED\n"));
+		mali_page_fault++;
+	}
+
+	if ((int_stat & (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) ==
+	    (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+#if 0
+void mali_mmu_print_state(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(2, ("MMU: State of %s is 0x%08x\n", mmu->hw_core.description, mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+#endif
diff --git a/utgard/r6p2/common/mali_mmu.h b/utgard/r6p2/common/mali_mmu.h
new file mode 100755
index 0000000..ac31c7a
--- /dev/null
+++ b/utgard/r6p2/common/mali_mmu.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_H__
+#define __MALI_MMU_H__
+
+#include "mali_osk.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_hw_core.h"
+
+/* Forward declaration from mali_group.h */
+struct mali_group;
+
+/**
+ * MMU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_mmu_register {
+	MALI_MMU_REGISTER_DTE_ADDR = 0x0000, /**< Current Page Directory Pointer */
+	MALI_MMU_REGISTER_STATUS = 0x0004, /**< Status of the MMU */
+	MALI_MMU_REGISTER_COMMAND = 0x0008, /**< Command register, used to control the MMU */
+	MALI_MMU_REGISTER_PAGE_FAULT_ADDR = 0x000C, /**< Logical address of the last page fault */
+	MALI_MMU_REGISTER_ZAP_ONE_LINE = 0x010, /**< Used to invalidate the mapping of a single page from the MMU */
+	MALI_MMU_REGISTER_INT_RAWSTAT = 0x0014, /**< Raw interrupt status, all interrupts visible */
+	MALI_MMU_REGISTER_INT_CLEAR = 0x0018, /**< Indicate to the MMU that the interrupt has been received */
+	MALI_MMU_REGISTER_INT_MASK = 0x001C, /**< Enable/disable types of interrupts */
+	MALI_MMU_REGISTER_INT_STATUS = 0x0020 /**< Interrupt status based on the mask */
+} mali_mmu_register;
+
+/**
+ * MMU interrupt register bits
+ * Each cause of the interrupt is reported
+ * through the (raw) interrupt status registers.
+ * Multiple interrupts can be pending, so multiple bits
+ * can be set at once.
+ */
+typedef enum mali_mmu_interrupt {
+	MALI_MMU_INTERRUPT_PAGE_FAULT = 0x01, /**< A page fault occured */
+	MALI_MMU_INTERRUPT_READ_BUS_ERROR = 0x02 /**< A bus read error occured */
+} mali_mmu_interrupt;
+
+typedef enum mali_mmu_status_bits {
+	MALI_MMU_STATUS_BIT_PAGING_ENABLED      = 1 << 0,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE   = 1 << 1,
+	MALI_MMU_STATUS_BIT_STALL_ACTIVE        = 1 << 2,
+	MALI_MMU_STATUS_BIT_IDLE                = 1 << 3,
+	MALI_MMU_STATUS_BIT_REPLAY_BUFFER_EMPTY = 1 << 4,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_IS_WRITE = 1 << 5,
+	MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE    = 1 << 31,
+} mali_mmu_status_bits;
+
+/**
+ * Definition of the MMU struct
+ * Used to track a MMU unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_mmu_core {
+	struct mali_hw_core hw_core; /**< Common for all HW cores */
+	_mali_osk_irq_t *irq;        /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_mmu_initialize(void);
+
+void mali_mmu_terminate(void);
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual);
+void mali_mmu_delete(struct mali_mmu_core *mmu);
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu);
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu);
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu);
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address);
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir);
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu);
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu);
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_mmu_get_interrupt_result(struct mali_mmu_core *mmu)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+	if (0 == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+
+MALI_STATIC_INLINE u32 mali_mmu_get_int_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_rawstat(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE void mali_mmu_mask_all_interrupts(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, 0);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_page_fault_addr(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_PAGE_FAULT_ADDR);
+}
+
+#endif /* __MALI_MMU_H__ */
diff --git a/utgard/r6p2/common/mali_mmu_page_directory.c b/utgard/r6p2/common/mali_mmu_page_directory.c
new file mode 100755
index 0000000..74d8cc8
--- /dev/null
+++ b/utgard/r6p2/common/mali_mmu_page_directory.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_memory.h"
+#include "mali_l2_cache.h"
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data);
+
+u32 mali_allocate_empty_page(mali_io_address *virt_addr)
+{
+	_mali_osk_errcode_t err;
+	mali_io_address mapping;
+	mali_dma_addr address;
+
+	if (_MALI_OSK_ERR_OK != mali_mmu_get_table_page(&address, &mapping)) {
+		/* Allocation failed */
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to get table page for empty pgdir\n"));
+		return 0;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	err = fill_page(mapping, 0);
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_mmu_release_table_page(address, mapping);
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to zero page\n"));
+		return 0;
+	}
+
+	*virt_addr = mapping;
+	return address;
+}
+
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr)
+{
+	if (MALI_INVALID_PAGE != address) {
+		mali_mmu_release_table_page(address, virt_addr);
+	}
+}
+
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	_mali_osk_errcode_t err;
+
+	err = mali_mmu_get_table_page(data_page, data_page_mapping);
+	if (_MALI_OSK_ERR_OK == err) {
+		err = mali_mmu_get_table_page(page_table, page_table_mapping);
+		if (_MALI_OSK_ERR_OK == err) {
+			err = mali_mmu_get_table_page(page_directory, page_directory_mapping);
+			if (_MALI_OSK_ERR_OK == err) {
+				fill_page(*data_page_mapping, 0);
+				fill_page(*page_table_mapping, *data_page | MALI_MMU_FLAGS_DEFAULT);
+				fill_page(*page_directory_mapping, *page_table | MALI_MMU_FLAGS_PRESENT);
+				MALI_SUCCESS;
+			}
+			mali_mmu_release_table_page(*page_table, *page_table_mapping);
+			*page_table = MALI_INVALID_PAGE;
+		}
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+	}
+	return err;
+}
+
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	if (MALI_INVALID_PAGE != *page_directory) {
+		mali_mmu_release_table_page(*page_directory, *page_directory_mapping);
+		*page_directory = MALI_INVALID_PAGE;
+		*page_directory_mapping = NULL;
+	}
+
+	if (MALI_INVALID_PAGE != *page_table) {
+		mali_mmu_release_table_page(*page_table, *page_table_mapping);
+		*page_table = MALI_INVALID_PAGE;
+		*page_table_mapping = NULL;
+	}
+
+	if (MALI_INVALID_PAGE != *data_page) {
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+		*data_page_mapping = NULL;
+	}
+}
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data)
+{
+	int i;
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	for (i = 0; i < MALI_MMU_PAGE_SIZE / 4; i++) {
+		_mali_osk_mem_iowrite32_relaxed(mapping, i * sizeof(u32), data);
+	}
+	_mali_osk_mem_barrier();
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	_mali_osk_errcode_t err;
+	mali_io_address pde_mapping;
+	mali_dma_addr pde_phys;
+	int i, page_count;
+	u32 start_address;
+	if (last_pde < first_pde)
+		return _MALI_OSK_ERR_INVALID_ARGS;
+
+	for (i = first_pde; i <= last_pde; i++) {
+		if (0 == (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+						 i * sizeof(u32)) & MALI_MMU_FLAGS_PRESENT)) {
+			/* Page table not present */
+			MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]);
+			MALI_DEBUG_ASSERT(NULL == pagedir->page_entries_mapped[i]);
+
+			err = mali_mmu_get_table_page(&pde_phys, &pde_mapping);
+			if (_MALI_OSK_ERR_OK != err) {
+				MALI_PRINT_ERROR(("Failed to allocate page table page.\n"));
+				return err;
+			}
+			pagedir->page_entries_mapped[i] = pde_mapping;
+
+			/* Update PDE, mark as present */
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32),
+							pde_phys | MALI_MMU_FLAGS_PRESENT);
+
+			MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]);
+		}
+
+		if (first_pde == last_pde) {
+			pagedir->page_entries_usage_count[i] += size / MALI_MMU_PAGE_SIZE;
+		} else if (i == first_pde) {
+			start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE;
+			page_count = (start_address + MALI_MMU_VIRTUAL_PAGE_SIZE - mali_address) / MALI_MMU_PAGE_SIZE;
+			pagedir->page_entries_usage_count[i] += page_count;
+		} else if (i == last_pde) {
+			start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE;
+			page_count = (mali_address + size - start_address) / MALI_MMU_PAGE_SIZE;
+			pagedir->page_entries_usage_count[i] += page_count;
+		} else {
+			pagedir->page_entries_usage_count[i] = 1024;
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE void mali_mmu_zero_pte(mali_io_address page_table, u32 mali_address, u32 size)
+{
+	int i;
+	const int first_pte = MALI_MMU_PTE_ENTRY(mali_address);
+	const int last_pte = MALI_MMU_PTE_ENTRY(mali_address + size - 1);
+
+	for (i = first_pte; i <= last_pte; i++) {
+		_mali_osk_mem_iowrite32_relaxed(page_table, i * sizeof(u32), 0);
+	}
+}
+
+static u32 mali_page_directory_get_phys_address(struct mali_page_directory *pagedir, u32 index)
+{
+	return (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				       index * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK);
+}
+
+
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	u32 left = size;
+	int i;
+	mali_bool pd_changed = MALI_FALSE;
+	u32 pages_to_invalidate[3]; /* hard-coded to 3: max two pages from the PT level plus max one page from PD level */
+	u32 num_pages_inv = 0;
+	mali_bool invalidate_all = MALI_FALSE; /* safety mechanism in case page_entries_usage_count is unreliable */
+
+	/* For all page directory entries in range. */
+	for (i = first_pde; i <= last_pde; i++) {
+		u32 size_in_pde, offset;
+
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[i]);
+		MALI_DEBUG_ASSERT(0 != pagedir->page_entries_usage_count[i]);
+
+		/* Offset into page table, 0 if mali_address is 4MiB aligned */
+		offset = (mali_address & (MALI_MMU_VIRTUAL_PAGE_SIZE - 1));
+		if (left < MALI_MMU_VIRTUAL_PAGE_SIZE - offset) {
+			size_in_pde = left;
+		} else {
+			size_in_pde = MALI_MMU_VIRTUAL_PAGE_SIZE - offset;
+		}
+
+		pagedir->page_entries_usage_count[i] -= size_in_pde / MALI_MMU_PAGE_SIZE;
+
+		/* If entire page table is unused, free it */
+		if (0 == pagedir->page_entries_usage_count[i]) {
+			u32 page_phys;
+			void *page_virt;
+			MALI_DEBUG_PRINT(4, ("Releasing page table as this is the last reference\n"));
+			/* last reference removed, no need to zero out each PTE  */
+
+			page_phys = MALI_MMU_ENTRY_ADDRESS(_mali_osk_mem_ioread32(pagedir->page_directory_mapped, i * sizeof(u32)));
+			page_virt = pagedir->page_entries_mapped[i];
+			pagedir->page_entries_mapped[i] = NULL;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+
+			mali_mmu_release_table_page(page_phys, page_virt);
+			pd_changed = MALI_TRUE;
+		} else {
+			MALI_DEBUG_ASSERT(num_pages_inv < 2);
+			if (num_pages_inv < 2) {
+				pages_to_invalidate[num_pages_inv] = mali_page_directory_get_phys_address(pagedir, i);
+				num_pages_inv++;
+			} else {
+				invalidate_all = MALI_TRUE;
+			}
+
+			/* If part of the page table is still in use, zero the relevant PTEs */
+			mali_mmu_zero_pte(pagedir->page_entries_mapped[i], mali_address, size_in_pde);
+		}
+
+		left -= size_in_pde;
+		mali_address += size_in_pde;
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* L2 pages invalidation */
+	if (MALI_TRUE == pd_changed) {
+		MALI_DEBUG_ASSERT(num_pages_inv < 3);
+		if (num_pages_inv < 3) {
+			pages_to_invalidate[num_pages_inv] = pagedir->page_directory;
+			num_pages_inv++;
+		} else {
+			invalidate_all = MALI_TRUE;
+		}
+	}
+
+	if (invalidate_all) {
+		mali_l2_cache_invalidate_all();
+	} else {
+		mali_l2_cache_invalidate_all_pages(pages_to_invalidate, num_pages_inv);
+	}
+
+	MALI_SUCCESS;
+}
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void)
+{
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	mali_dma_addr phys;
+
+	pagedir = _mali_osk_calloc(1, sizeof(struct mali_page_directory));
+	if (NULL == pagedir) {
+		return NULL;
+	}
+
+	err = mali_mmu_get_table_page(&phys, &pagedir->page_directory_mapped);
+	if (_MALI_OSK_ERR_OK != err) {
+		_mali_osk_free(pagedir);
+		return NULL;
+	}
+
+	pagedir->page_directory = (u32)phys;
+
+	/* Zero page directory */
+	fill_page(pagedir->page_directory_mapped, 0);
+
+	return pagedir;
+}
+
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir)
+{
+	const int num_page_table_entries = sizeof(pagedir->page_entries_mapped) / sizeof(pagedir->page_entries_mapped[0]);
+	int i;
+
+	/* Free referenced page tables and zero PDEs. */
+	for (i = 0; i < num_page_table_entries; i++) {
+		if (pagedir->page_directory_mapped && (_mali_osk_mem_ioread32(
+				pagedir->page_directory_mapped,
+				sizeof(u32)*i) & MALI_MMU_FLAGS_PRESENT)) {
+			mali_dma_addr phys = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+					     i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+			mali_mmu_release_table_page(phys, pagedir->page_entries_mapped[i]);
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* Free the page directory page. */
+	mali_mmu_release_table_page(pagedir->page_directory, pagedir->page_directory_mapped);
+
+	_mali_osk_free(pagedir);
+}
+
+
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits)
+{
+	u32 end_address = mali_address + size;
+	u32 mali_phys = (u32)phys_address;
+
+	/* Map physical pages into MMU page tables */
+	for (; mali_address < end_address; mali_address += MALI_MMU_PAGE_SIZE, mali_phys += MALI_MMU_PAGE_SIZE) {
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)]);
+		_mali_osk_mem_iowrite32_relaxed(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)],
+						MALI_MMU_PTE_ENTRY(mali_address) * sizeof(u32),
+						mali_phys | permission_bits);
+	}
+}
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr)
+{
+#if defined(DEBUG)
+	u32 pde_index, pte_index;
+	u32 pde, pte;
+
+	pde_index = MALI_MMU_PDE_ENTRY(fault_addr);
+	pte_index = MALI_MMU_PTE_ENTRY(fault_addr);
+
+
+	pde = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				     pde_index * sizeof(u32));
+
+
+	if (pde & MALI_MMU_FLAGS_PRESENT) {
+		u32 pte_addr = MALI_MMU_ENTRY_ADDRESS(pde);
+
+		pte = _mali_osk_mem_ioread32(pagedir->page_entries_mapped[pde_index],
+					     pte_index * sizeof(u32));
+
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table present: %08x\n"
+				     "\t\tPTE: %08x, page %08x is %s\n",
+				     fault_addr, pte_addr, pte,
+				     MALI_MMU_ENTRY_ADDRESS(pte),
+				     pte & MALI_MMU_FLAGS_DEFAULT ? "rw" : "not present"));
+	} else {
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table not present: %08x\n",
+				     fault_addr, pde));
+	}
+#else
+	MALI_IGNORE(pagedir);
+	MALI_IGNORE(fault_addr);
+#endif
+}
+
+/* For instrumented */
+struct dump_info {
+	u32 buffer_left;
+	u32 register_writes_size;
+	u32 page_table_dump_size;
+	u32 *buffer;
+};
+
+static _mali_osk_errcode_t writereg(u32 where, u32 what, const char *comment, struct dump_info *info)
+{
+	if (NULL != info) {
+		info->register_writes_size += sizeof(u32) * 2; /* two 32-bit words */
+
+		if (NULL != info->buffer) {
+			/* check that we have enough space */
+			if (info->buffer_left < sizeof(u32) * 2) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = where;
+			info->buffer++;
+
+			*info->buffer = what;
+			info->buffer++;
+
+			info->buffer_left -= sizeof(u32) * 2;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t mali_mmu_dump_page(mali_io_address page, u32 phys_addr, struct dump_info *info)
+{
+	if (NULL != info) {
+		/* 4096 for the page and 4 bytes for the address */
+		const u32 page_size_in_elements = MALI_MMU_PAGE_SIZE / 4;
+		const u32 page_size_in_bytes = MALI_MMU_PAGE_SIZE;
+		const u32 dump_size_in_bytes = MALI_MMU_PAGE_SIZE + 4;
+
+		info->page_table_dump_size += dump_size_in_bytes;
+
+		if (NULL != info->buffer) {
+			if (info->buffer_left < dump_size_in_bytes) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = phys_addr;
+			info->buffer++;
+
+			_mali_osk_memcpy(info->buffer, page, page_size_in_bytes);
+			info->buffer += page_size_in_elements;
+
+			info->buffer_left -= dump_size_in_bytes;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_page_table(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_DEBUG_ASSERT_POINTER(pagedir);
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	if (NULL != pagedir->page_directory_mapped) {
+		int i;
+
+		MALI_CHECK_NO_ERROR(
+			mali_mmu_dump_page(pagedir->page_directory_mapped, pagedir->page_directory, info)
+		);
+
+		for (i = 0; i < 1024; i++) {
+			if (NULL != pagedir->page_entries_mapped[i]) {
+				MALI_CHECK_NO_ERROR(
+					mali_mmu_dump_page(pagedir->page_entries_mapped[i],
+							   _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+									   i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK, info)
+				);
+			}
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_registers(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_CHECK_NO_ERROR(writereg(0x00000000, pagedir->page_directory,
+				     "set the page directory address", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 4, "zap???", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 0, "enable paging", info));
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+	args->size = info.register_writes_size + info.page_table_dump_size;
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+
+	info.buffer_left = args->size;
+	info.buffer = (u32 *)(uintptr_t)args->buffer;
+
+	args->register_writes = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+
+	args->page_table_dump = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+
+	args->register_writes_size = info.register_writes_size;
+	args->page_table_dump_size = info.page_table_dump_size;
+
+	MALI_SUCCESS;
+}
diff --git a/utgard/r6p2/common/mali_mmu_page_directory.h b/utgard/r6p2/common/mali_mmu_page_directory.h
new file mode 100755
index 0000000..4fc1058
--- /dev/null
+++ b/utgard/r6p2/common/mali_mmu_page_directory.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_PAGE_DIRECTORY_H__
+#define __MALI_MMU_PAGE_DIRECTORY_H__
+
+#include "mali_osk.h"
+
+/**
+ * Size of an MMU page in bytes
+ */
+#define MALI_MMU_PAGE_SIZE 0x1000
+
+/*
+ * Size of the address space referenced by a page table page
+ */
+#define MALI_MMU_VIRTUAL_PAGE_SIZE 0x400000 /* 4 MiB */
+
+/**
+ * Page directory index from address
+ * Calculates the page directory index from the given address
+ */
+#define MALI_MMU_PDE_ENTRY(address) (((address)>>22) & 0x03FF)
+
+/**
+ * Page table index from address
+ * Calculates the page table index from the given address
+ */
+#define MALI_MMU_PTE_ENTRY(address) (((address)>>12) & 0x03FF)
+
+/**
+ * Extract the memory address from an PDE/PTE entry
+ */
+#define MALI_MMU_ENTRY_ADDRESS(value) ((value) & 0xFFFFFC00)
+
+#define MALI_INVALID_PAGE ((u32)(~0))
+
+/**
+ *
+ */
+typedef enum mali_mmu_entry_flags {
+	MALI_MMU_FLAGS_PRESENT = 0x01,
+	MALI_MMU_FLAGS_READ_PERMISSION = 0x02,
+	MALI_MMU_FLAGS_WRITE_PERMISSION = 0x04,
+	MALI_MMU_FLAGS_OVERRIDE_CACHE  = 0x8,
+	MALI_MMU_FLAGS_WRITE_CACHEABLE  = 0x10,
+	MALI_MMU_FLAGS_WRITE_ALLOCATE  = 0x20,
+	MALI_MMU_FLAGS_WRITE_BUFFERABLE  = 0x40,
+	MALI_MMU_FLAGS_READ_CACHEABLE  = 0x80,
+	MALI_MMU_FLAGS_READ_ALLOCATE  = 0x100,
+	MALI_MMU_FLAGS_MASK = 0x1FF,
+} mali_mmu_entry_flags;
+
+
+#define MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE ( \
+		MALI_MMU_FLAGS_PRESENT | \
+		MALI_MMU_FLAGS_READ_PERMISSION |  \
+		MALI_MMU_FLAGS_WRITE_PERMISSION | \
+		MALI_MMU_FLAGS_OVERRIDE_CACHE | \
+		MALI_MMU_FLAGS_WRITE_CACHEABLE | \
+		MALI_MMU_FLAGS_WRITE_BUFFERABLE | \
+		MALI_MMU_FLAGS_READ_CACHEABLE | \
+		MALI_MMU_FLAGS_READ_ALLOCATE )
+
+#define MALI_MMU_FLAGS_DEFAULT ( \
+				 MALI_MMU_FLAGS_PRESENT | \
+				 MALI_MMU_FLAGS_READ_PERMISSION |  \
+				 MALI_MMU_FLAGS_WRITE_PERMISSION )
+
+
+struct mali_page_directory {
+	u32 page_directory; /**< Physical address of the memory session's page directory */
+	mali_io_address page_directory_mapped; /**< Pointer to the mapped version of the page directory into the kernel's address space */
+
+	mali_io_address page_entries_mapped[1024]; /**< Pointers to the page tables which exists in the page directory mapped into the kernel's address space */
+	u32   page_entries_usage_count[1024]; /**< Tracks usage count of the page table pages, so they can be releases on the last reference */
+};
+
+/* Map Mali virtual address space (i.e. ensure page tables exist for the virtual range)  */
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+
+/* Back virtual address space with actual pages. Assumes input is contiguous and 4k aligned. */
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits);
+
+u32 mali_allocate_empty_page(mali_io_address *virtual);
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr);
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void);
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir);
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr);
+
+#endif /* __MALI_MMU_PAGE_DIRECTORY_H__ */
diff --git a/utgard/r6p2/common/mali_osk.h b/utgard/r6p2/common/mali_osk.h
new file mode 100755
index 0000000..5c20fc3
--- /dev/null
+++ b/utgard/r6p2/common/mali_osk.h
@@ -0,0 +1,1389 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk.h
+ * Defines the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_H__
+#define __MALI_OSK_H__
+
+#include <linux/seq_file.h>
+#include "mali_osk_types.h"
+#include "mali_osk_specific.h"           /* include any per-os specifics */
+#include "mali_osk_locks.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @addtogroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+#ifdef DEBUG
+/** @brief Macro for asserting that the current thread holds a given lock
+ */
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) MALI_DEBUG_ASSERT(_mali_osk_lock_get_owner((_mali_osk_lock_debug_t *)l) == _mali_osk_get_tid());
+
+/** @brief returns a lock's owner (thread id) if debugging is enabled
+ */
+#else
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) do {} while(0)
+#endif
+
+#define _mali_osk_ctxprintf     seq_printf
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Find the containing structure of another structure
+ *
+ * This is the reverse of the operation 'offsetof'. This means that the
+ * following condition is satisfied:
+ *
+ *   ptr == _MALI_OSK_CONTAINER_OF( &ptr->member, type, member )
+ *
+ * When ptr is of type 'type'.
+ *
+ * Its purpose it to recover a larger structure that has wrapped a smaller one.
+ *
+ * @note no type or memory checking occurs to ensure that a wrapper structure
+ * does in fact exist, and that it is being recovered with respect to the
+ * correct member.
+ *
+ * @param ptr the pointer to the member that is contained within the larger
+ * structure
+ * @param type the type of the structure that contains the member
+ * @param member the name of the member in the structure that ptr points to.
+ * @return a pointer to a \a type object which contains \a member, as pointed
+ * to by \a ptr.
+ */
+#define _MALI_OSK_CONTAINER_OF(ptr, type, member) \
+	((type *)( ((char *)ptr) - offsetof(type,member) ))
+
+/** @addtogroup _mali_osk_wq
+ * @{ */
+
+/** @brief Initialize work queues (for deferred work)
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_wq_init(void);
+
+/** @brief Terminate work queues (for deferred work)
+ */
+void _mali_osk_wq_term(void);
+
+/** @brief Create work in the work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, \a handler will be called with \a data as the argument.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delete_work()
+ * when no longer needed.
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief A high priority version of \a _mali_osk_wq_create_work()
+ *
+ * Creates a work object which can be scheduled in the high priority work queue.
+ *
+ * This is unfortunately needed to get low latency scheduling of the Mali cores.  Normally we would
+ * schedule the next job in hw_irq or tasklet, but often we can't since we need to synchronously map
+ * and unmap shared memory when a job is connected to external fences (timelines). And this requires
+ * taking a mutex.
+ *
+ * We do signal a lot of other (low priority) work also as part of the job being finished, and if we
+ * don't set this Mali scheduling thread as high priority, we see that the CPU scheduler often runs
+ * random things instead of starting the next GPU job when the GPU is idle.  So setting the gpu
+ * scheduler to high priority does give a visually more responsive system.
+ *
+ * Start the high priority work with: \a _mali_osk_wq_schedule_work_high_pri()
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will flush the work queue to ensure that the work handler will not
+ * be called after deletion.
+ */
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the work handler
+ *
+ * _mali_osk_wq_schedule_work provides a mechanism for enqueuing deferred calls
+ * to the work handler. After calling \ref _mali_osk_wq_schedule_work(), the
+ * work handler will be scheduled to run at some point in the future.
+ *
+ * Typically this is called by the IRQ upper-half to defer further processing of
+ * IRQ-related work to the IRQ bottom-half handler. This is necessary for work
+ * that cannot be done in an IRQ context by the IRQ upper-half handler. Timer
+ * callbacks also use this mechanism, because they are treated as though they
+ * operate in an IRQ context. Refer to \ref _mali_osk_timer_t for more
+ * information.
+ *
+ * Code that operates in a kernel-process context (with no IRQ context
+ * restrictions) may also enqueue deferred calls to the IRQ bottom-half. The
+ * advantage over direct calling is that deferred calling allows the caller and
+ * IRQ bottom half to hold the same mutex, with a guarantee that they will not
+ * deadlock just by using this mechanism.
+ *
+ * _mali_osk_wq_schedule_work() places deferred call requests on a queue, to
+ * allow for more than one thread to make a deferred call. Therfore, if it is
+ * called 'K' times, then the IRQ bottom-half will be scheduled 'K' times too.
+ * 'K' is a number that is implementation-specific.
+ *
+ * _mali_osk_wq_schedule_work() is guaranteed to not block on:
+ * - enqueuing a deferred call request.
+ * - the completion of the work handler.
+ *
+ * This is to prevent deadlock. For example, if _mali_osk_wq_schedule_work()
+ * blocked, then it would cause a deadlock when the following two conditions
+ * hold:
+ * - The work handler callback (of type _mali_osk_wq_work_handler_t) locks
+ * a mutex
+ * - And, at the same time, the caller of _mali_osk_wq_schedule_work() also
+ * holds the same mutex
+ *
+ * @note care must be taken to not overflow the queue that
+ * _mali_osk_wq_schedule_work() operates on. Code must be structured to
+ * ensure that the number of requests made to the queue is bounded. Otherwise,
+ * work will be lost.
+ *
+ * The queue that _mali_osk_wq_schedule_work implements is a FIFO of N-writer,
+ * 1-reader type. The writers are the callers of _mali_osk_wq_schedule_work
+ * (all OSK-registered IRQ upper-half handlers in the system, watchdog timers,
+ * callers from a Kernel-process context). The reader is a single thread that
+ * handles all OSK-registered work.
+ *
+ * @param work a pointer to the _mali_osk_wq_work_t object corresponding to the
+ * work to begin processing.
+ */
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the high priority work handler
+ *
+ * Function is the same as \a _mali_osk_wq_schedule_work() with the only
+ * difference that it runs in a high (real time) priority on the system.
+ *
+ * Should only be used as a substitue for doing the same work in interrupts.
+ *
+ * This is allowed to sleep, but the work should be small since it will block
+ * all other applications.
+*/
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work);
+
+/** @brief Flush the work queue
+ *
+ * This will flush the OSK work queue, ensuring all work in the queue has
+ * completed before returning.
+ *
+ * Since this blocks on the completion of work in the work-queue, the
+ * caller of this function \b must \b not hold any mutexes that are taken by
+ * any registered work handler. To do so may cause a deadlock.
+ *
+ */
+void _mali_osk_wq_flush(void);
+
+/** @brief Create work in the delayed work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, a timer will be start and the \a handler will be called with
+ * \a data as the argument when timer out
+ *
+ * Refer to \ref _mali_osk_wq_delayed_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delayed_delete_work_nonflush()
+ * when no longer needed.
+ */
+_mali_osk_wq_delayed_work_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work without waiting for it to finish
+ *
+ * Note that the \a work callback function may still be running on return from
+ * _mali_osk_wq_delayed_cancel_work_async().
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work and wait for it to finish
+ *
+ * When this function returns, the \a work was either cancelled or it finished running.
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Put \a work task in global workqueue after delay
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue.
+ *
+ * If \a work was already on a queue, this function will return without doing anything
+ *
+ * @param work job to be done
+ * @param delay number of jiffies to wait or 0 for immediate execution
+ */
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay);
+
+/** @} */ /* end group _mali_osk_wq */
+
+
+/** @addtogroup _mali_osk_irq
+ * @{ */
+
+/** @brief Initialize IRQ handling for a resource
+ *
+ * Registers an interrupt handler \a uhandler for the given IRQ number \a irqnum.
+ * \a data will be passed as argument to the handler when an interrupt occurs.
+ *
+ * If \a irqnum is -1, _mali_osk_irq_init will probe for the IRQ number using
+ * the supplied \a trigger_func and \a ack_func. These functions will also
+ * receive \a data as their argument.
+ *
+ * @param irqnum The IRQ number that the resource uses, as seen by the CPU.
+ * The value -1 has a special meaning which indicates the use of probing, and
+ * trigger_func and ack_func must be non-NULL.
+ * @param uhandler The interrupt handler, corresponding to a ISR handler for
+ * the resource
+ * @param int_data resource specific data, which will be passed to uhandler
+ * @param trigger_func Optional: a function to trigger the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param ack_func Optional: a function to acknowledge the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param probe_data resource-specific data, which will be passed to
+ * (if present) trigger_func and ack_func
+ * @param description textual description of the IRQ resource.
+ * @return on success, a pointer to a _mali_osk_irq_t object, which represents
+ * the IRQ handling on this resource. NULL on failure.
+ */
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description);
+
+/** @brief Terminate IRQ handling on a resource.
+ *
+ * This will disable the interrupt from the device, and then waits for any
+ * currently executing IRQ handlers to complete.
+ *
+ * @note If work is deferred to an IRQ bottom-half handler through
+ * \ref _mali_osk_wq_schedule_work(), be sure to flush any remaining work
+ * with \ref _mali_osk_wq_flush() or (implicitly) with \ref _mali_osk_wq_delete_work()
+ *
+ * @param irq a pointer to the _mali_osk_irq_t object corresponding to the
+ * resource whose IRQ handling is to be terminated.
+ */
+void _mali_osk_irq_term(_mali_osk_irq_t *irq);
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @addtogroup _mali_osk_atomic
+ * @{ */
+
+/** @brief Decrement an atomic counter
+ *
+ * @note It is an error to decrement the counter beyond -(1<<23)
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom);
+
+/** @brief Decrement an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter
+ * @return The new value, after decrement */
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter
+ *
+ * @note It is an error to increment the counter beyond (1<<23)-1
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter */
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom);
+
+/** @brief Initialize an atomic counter
+ *
+ * @note the parameter required is a u32, and so signed integers should be
+ * cast to u32.
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the value to initialize the atomic counter.
+ */
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val);
+
+/** @brief Read a value from an atomic counter
+ *
+ * This can only be safely used to determine the value of the counter when it
+ * is guaranteed that other threads will not be modifying the counter. This
+ * makes its usefulness limited.
+ *
+ * @param atom pointer to an atomic counter
+ */
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom);
+
+/** @brief Terminate an atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ */
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom);
+
+/** @brief Assign a new val to atomic counter, and return the old atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the new value assign to the atomic counter
+ * @return the old value of the atomic counter
+ */
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val);
+/** @} */  /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_memory OSK Memory Allocation
+ * @{ */
+
+/** @brief Allocate zero-initialized memory.
+ *
+ * Returns a buffer capable of containing at least \a n elements of \a size
+ * bytes each. The buffer is initialized to zero.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * @param n Number of elements to allocate
+ * @param size Size of each element
+ * @return On success, the zero-initialized buffer allocated. NULL on failure
+ */
+void *_mali_osk_calloc(u32 n, u32 size);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_malloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_malloc() and _mali_osk_calloc()
+ * must be freed before the application exits. Otherwise,
+ * a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_free(void *ptr);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * This function is potentially slower than _mali_osk_malloc() and _mali_osk_calloc(),
+ * but do support bigger sizes.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_valloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_valloc() must be freed before the
+ * application exits. Otherwise a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_vfree(void *ptr);
+
+/** @brief Copies memory.
+ *
+ * Copies the \a len bytes from the buffer pointed by the parameter \a src
+ * directly to the buffer pointed by \a dst.
+ *
+ * It is an error for \a src to overlap \a dst anywhere in \a len bytes.
+ *
+ * @param dst Pointer to the destination array where the content is to be
+ * copied.
+ * @param src Pointer to the source of data to be copied.
+ * @param len Number of bytes to copy.
+ * @return \a dst is always passed through unmodified.
+ */
+void *_mali_osk_memcpy(void *dst, const void *src, u32 len);
+
+/** @brief Fills memory.
+ *
+ * Sets the first \a n bytes of the block of memory pointed to by \a s to
+ * the specified value
+ * @param s Pointer to the block of memory to fill.
+ * @param c Value to be set, passed as u32. Only the 8 Least Significant Bits (LSB)
+ * are used.
+ * @param n Number of bytes to be set to the value.
+ * @return \a s is always passed through unmodified
+ */
+void *_mali_osk_memset(void *s, u32 c, u32 n);
+/** @} */ /* end group _mali_osk_memory */
+
+
+/** @brief Checks the amount of memory allocated
+ *
+ * Checks that not more than \a max_allocated bytes are allocated.
+ *
+ * Some OS bring up an interactive out of memory dialogue when the
+ * system runs out of memory. This can stall non-interactive
+ * apps (e.g. automated test runs). This function can be used to
+ * not trigger the OOM dialogue by keeping allocations
+ * within a certain limit.
+ *
+ * @return MALI_TRUE when \a max_allocated bytes are not in use yet. MALI_FALSE
+ * when at least \a max_allocated bytes are in use.
+ */
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated);
+
+
+/** @addtogroup _mali_osk_low_level_memory
+ * @{ */
+
+/** @brief Issue a memory barrier
+ *
+ * This defines an arbitrary memory barrier operation, which forces an ordering constraint
+ * on memory read and write operations.
+ */
+void _mali_osk_mem_barrier(void);
+
+/** @brief Issue a write memory barrier
+ *
+ * This defines an write memory barrier operation which forces an ordering constraint
+ * on memory write operations.
+ */
+void _mali_osk_write_mem_barrier(void);
+
+/** @brief Map a physically contiguous region into kernel space
+ *
+ * This is primarily used for mapping in registers from resources, and Mali-MMU
+ * page tables. The mapping is only visable from kernel-space.
+ *
+ * Access has to go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @param phys CPU-physical base address of the memory to map in. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * map in
+ * @param description A textual description of the memory being mapped in.
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure.
+ */
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Unmap a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_mapioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt an unmap twice
+ * - unmap only part of a range obtained through _mali_osk_mem_mapioregion
+ * - unmap more than the range obtained through  _mali_osk_mem_mapioregion
+ * - unmap an address range that was not successfully mapped using
+ * _mali_osk_mem_mapioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in. This must be aligned to the system's page size, which is assumed
+ * to be 4K
+ * @param size The number of bytes that were originally mapped in.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address mapping);
+
+/** @brief Allocate and Map a physically contiguous region into kernel space
+ *
+ * This is used for allocating physically contiguous regions (such as Mali-MMU
+ * page tables) and mapping them into kernel space. The mapping is only
+ * visible from kernel-space.
+ *
+ * The alignment of the returned memory is guaranteed to be at least
+ * _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * Access must go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @note This function is primarily to provide support for OSs that are
+ * incapable of separating the tasks 'allocate physically contiguous memory'
+ * and 'map it into kernel space'
+ *
+ * @param[out] phys CPU-physical base address of memory that was allocated.
+ * (*phys) will be guaranteed to be aligned to at least
+ * _MALI_OSK_CPU_PAGE_SIZE on success.
+ *
+ * @param[in] size the number of bytes of physically contiguous memory to
+ * allocate. This must be a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure, and (*phys) is unmodified.
+ */
+mali_io_address _mali_osk_mem_allocioregion(u32 *phys, u32 size);
+
+/** @brief Free a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_allocioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt a free twice on the same ioregion
+ * - free only part of a range obtained through _mali_osk_mem_allocioregion
+ * - free more than the range obtained through  _mali_osk_mem_allocioregion
+ * - free an address range that was not successfully mapped using
+ * _mali_osk_mem_allocioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in, which was aligned to _MALI_OSK_CPU_PAGE_SIZE.
+ * @param size The number of bytes that were originally mapped in, which was
+ * a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_freeioregion(u32 phys, u32 size, mali_io_address mapping);
+
+/** @brief Request a region of physically contiguous memory
+ *
+ * This is used to ensure exclusive access to a region of physically contigous
+ * memory.
+ *
+ * It is acceptable to implement this as a stub. However, it is then the job
+ * of the System Integrator to ensure that no other device driver will be using
+ * the physical address ranges used by Mali, while the Mali device driver is
+ * loaded.
+ *
+ * @param phys CPU-physical base address of the memory to request. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * request.
+ * @param description A textual description of the memory being requested.
+ * @return _MALI_OSK_ERR_OK on success. Otherwise, a suitable
+ * _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Un-request a region of physically contiguous memory
+ *
+ * This is used to release a regious of physically contiguous memory previously
+ * requested through _mali_osk_mem_reqregion, so that other device drivers may
+ * use it. This will be called at time of Mali device driver termination.
+ *
+ * It is a programming error to attempt to:
+ * - unrequest a region twice
+ * - unrequest only part of a range obtained through _mali_osk_mem_reqregion
+ * - unrequest more than the range obtained through  _mali_osk_mem_reqregion
+ * - unrequest an address range that was not successfully requested using
+ * _mali_osk_mem_reqregion
+ *
+ * @param phys CPU-physical base address of the memory to un-request. This must
+ * be aligned to the system's page size, which is assumed to be 4K
+ * @param size the number of bytes of physically contiguous address space to
+ * un-request.
+ */
+void _mali_osk_mem_unreqregion(uintptr_t phys, u32 size);
+
+/** @brief Read from a location currently mapped in through
+ * _mali_osk_mem_mapioregion
+ *
+ * This reads a 32-bit word from a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to read from memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to read from
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @return the 32-bit word from the specified location.
+ */
+u32 _mali_osk_mem_ioread32(volatile mali_io_address mapping, u32 offset);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion without memory barriers
+ *
+ * This write a 32-bit word to a 32-bit aligned location without using memory barrier.
+ * It is a programming error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion with write memory barrier
+ *
+ * This write a 32-bit word to a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32(volatile mali_io_address mapping, u32 offset, u32 val);
+
+/** @brief Flush all CPU caches
+ *
+ * This should only be implemented if flushing of the cache is required for
+ * memory mapped in through _mali_osk_mem_mapregion.
+ */
+void _mali_osk_cache_flushall(void);
+
+/** @brief Flush any caches necessary for the CPU and MALI to have the same view of a range of uncached mapped memory
+ *
+ * This should only be implemented if your OS doesn't do a full cache flush (inner & outer)
+ * after allocating uncached mapped memory.
+ *
+ * Some OS do not perform a full cache flush (including all outer caches) for uncached mapped memory.
+ * They zero the memory through a cached mapping, then flush the inner caches but not the outer caches.
+ * This is required for MALI to have the correct view of the memory.
+ */
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size);
+
+/** @brief Safely copy as much data as possible from src to dest
+ *
+ * Do not crash if src or dest isn't available.
+ *
+ * @param dest Destination buffer (limited to user space mapped Mali memory)
+ * @param src Source buffer
+ * @param size Number of bytes to copy
+ * @return Number of bytes actually copied
+ */
+u32 _mali_osk_mem_write_safe(void *dest, const void *src, u32 size);
+
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+
+/** @addtogroup _mali_osk_notification
+ *
+ * User space notification framework
+ *
+ * Communication with user space of asynchronous events is performed through a
+ * synchronous call to the \ref u_k_api.
+ *
+ * Since the events are asynchronous, the events have to be queued until a
+ * synchronous U/K API call can be made by user-space. A U/K API call might also
+ * be received before any event has happened. Therefore the notifications the
+ * different subsystems wants to send to user space has to be queued for later
+ * reception, or a U/K API call has to be blocked until an event has occured.
+ *
+ * Typical uses of notifications are after running of jobs on the hardware or
+ * when changes to the system is detected that needs to be relayed to user
+ * space.
+ *
+ * After an event has occured user space has to be notified using some kind of
+ * message. The notification framework supports sending messages to waiting
+ * threads or queueing of messages until a U/K API call is made.
+ *
+ * The notification queue is a FIFO. There are no restrictions on the numbers
+ * of readers or writers in the queue.
+ *
+ * A message contains what user space needs to identifiy how to handle an
+ * event. This includes a type field and a possible type specific payload.
+ *
+ * A notification to user space is represented by a
+ * \ref _mali_osk_notification_t object. A sender gets hold of such an object
+ * using _mali_osk_notification_create(). The buffer given by the
+ * _mali_osk_notification_t::result_buffer field in the object is used to store
+ * any type specific data. The other fields are internal to the queue system
+ * and should not be touched.
+ *
+ * @{ */
+
+/** @brief Create a notification object
+ *
+ * Returns a notification object which can be added to the queue of
+ * notifications pending for user space transfer.
+ *
+ * The implementation will initialize all members of the
+ * \ref _mali_osk_notification_t object. In particular, the
+ * _mali_osk_notification_t::result_buffer member will be initialized to point
+ * to \a size bytes of storage, and that storage will be suitably aligned for
+ * storage of any structure. That is, the created buffer meets the same
+ * requirements as _mali_osk_malloc().
+ *
+ * The notification object must be deleted when not in use. Use
+ * _mali_osk_notification_delete() for deleting it.
+ *
+ * @note You \b must \b not call _mali_osk_free() on a \ref _mali_osk_notification_t,
+ * object, or on a _mali_osk_notification_t::result_buffer. You must only use
+ * _mali_osk_notification_delete() to free the resources assocaited with a
+ * \ref _mali_osk_notification_t object.
+ *
+ * @param type The notification type
+ * @param size The size of the type specific buffer to send
+ * @return Pointer to a notification object with a suitable buffer, or NULL on error.
+ */
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size);
+
+/** @brief Delete a notification object
+ *
+ * This must be called to reclaim the resources of a notification object. This
+ * includes:
+ * - The _mali_osk_notification_t::result_buffer
+ * - The \ref _mali_osk_notification_t itself.
+ *
+ * A notification object \b must \b not be used after it has been deleted by
+ * _mali_osk_notification_delete().
+ *
+ * In addition, the notification object may not be deleted while it is in a
+ * queue. That is, if it has been placed on a queue with
+ * _mali_osk_notification_queue_send(), then it must not be deleted until
+ * it has been received by a call to _mali_osk_notification_queue_receive().
+ * Otherwise, the queue may be corrupted.
+ *
+ * @param object the notification object to delete.
+ */
+void _mali_osk_notification_delete(_mali_osk_notification_t *object);
+
+/** @brief Create a notification queue
+ *
+ * Creates a notification queue which can be used to queue messages for user
+ * delivery and get queued messages from
+ *
+ * The queue is a FIFO, and has no restrictions on the numbers of readers or
+ * writers.
+ *
+ * When the queue is no longer in use, it must be terminated with
+ * \ref _mali_osk_notification_queue_term(). Failure to do so will result in a
+ * memory leak.
+ *
+ * @return Pointer to a new notification queue or NULL on error.
+ */
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void);
+
+/** @brief Destroy a notification queue
+ *
+ * Destroys a notification queue and frees associated resources from the queue.
+ *
+ * A notification queue \b must \b not be destroyed in the following cases:
+ * - while there are \ref _mali_osk_notification_t objects in the queue.
+ * - while there are writers currently acting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_send() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_send() on the queue in the future.
+ * - while there are readers currently waiting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_receive() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_receive() on the queue in the future.
+ *
+ * Therefore, all \ref _mali_osk_notification_t objects must be flushed and
+ * deleted by the code that makes use of the notification queues, since only
+ * they know the structure of the _mali_osk_notification_t::result_buffer
+ * (even if it may only be a flat sturcture).
+ *
+ * @note Since the queue is a FIFO, the code using notification queues may
+ * create its own 'flush' type of notification, to assist in flushing the
+ * queue.
+ *
+ * Once the queue has been destroyed, it must not be used again.
+ *
+ * @param queue The queue to destroy
+ */
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue);
+
+/** @brief Schedule notification for delivery
+ *
+ * When a \ref _mali_osk_notification_t object has been created successfully
+ * and set up, it may be added to the queue of objects waiting for user space
+ * transfer.
+ *
+ * The sending will not block if the queue is full.
+ *
+ * A \ref _mali_osk_notification_t object \b must \b not be put on two different
+ * queues at the same time, or enqueued twice onto a single queue before
+ * reception. However, it is acceptable for it to be requeued \em after reception
+ * from a call to _mali_osk_notification_queue_receive(), even onto the same queue.
+ *
+ * Again, requeuing must also not enqueue onto two different queues at the same
+ * time, or enqueue onto the same queue twice before reception.
+ *
+ * @param queue The notification queue to add this notification to
+ * @param object The entry to add
+ */
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object);
+
+/** @brief Receive a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the thread will sleep until one becomes ready.
+ * Therefore, notifications may not be received into an
+ * IRQ or 'atomic' context (that is, a context where sleeping is disallowed).
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success. _MALI_OSK_ERR_RESTARTSYSCALL if the sleep was interrupted.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @brief Dequeues a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the function call will return an error code.
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if queue was empty.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @addtogroup _mali_osk_timer
+ *
+ * Timers use the OS's representation of time, which are 'ticks'. This is to
+ * prevent aliasing problems between the internal timer time, and the time
+ * asked for.
+ *
+ * @{ */
+
+/** @brief Initialize a timer
+ *
+ * Allocates resources for a new timer, and initializes them. This does not
+ * start the timer.
+ *
+ * @return a pointer to the allocated timer object, or NULL on failure.
+ */
+_mali_osk_timer_t *_mali_osk_timer_init(void);
+
+/** @brief Start a timer
+ *
+ * It is an error to start a timer without setting the callback via
+ * _mali_osk_timer_setcallback().
+ *
+ * It is an error to use this to start an already started timer.
+ *
+ * The timer will expire in \a ticks_to_expire ticks, at which point, the
+ * callback function will be invoked with the callback-specific data,
+ * as registered by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to start
+ * @param ticks_to_expire the amount of time in ticks for the timer to run
+ * before triggering.
+ */
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Modify a timer
+ *
+ * Set the relative time at which a timer will expire, and start it if it is
+ * stopped. If \a ticks_to_expire 0 the timer fires immediately.
+ *
+ * It is an error to modify a timer without setting the callback via
+ *  _mali_osk_timer_setcallback().
+ *
+ * The timer will expire at \a ticks_to_expire from the time of the call, at
+ * which point, the callback function will be invoked with the
+ * callback-specific data, as set by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to modify, and start if necessary
+ * @param ticks_to_expire the \em absolute time in ticks at which this timer
+ * should trigger.
+ *
+ */
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Stop a timer, and block on its completion.
+ *
+ * Stop the timer. When the function returns, it is guaranteed that the timer's
+ * callback will not be running on any CPU core.
+ *
+ * Since stoping the timer blocks on compeletion of the callback, the callback
+ * may not obtain any mutexes that the caller holds. Otherwise, a deadlock will
+ * occur.
+ *
+ * @note While the callback itself is guaranteed to not be running, work
+ * enqueued on the work-queue by the timer (with
+ * \ref _mali_osk_wq_schedule_work()) may still run. The timer callback and
+ * work handler must take this into account.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ *
+ */
+void _mali_osk_timer_del(_mali_osk_timer_t *tim);
+
+/** @brief Stop a timer.
+ *
+ * Stop the timer. When the function returns, the timer's callback may still be
+ * running on any CPU core.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ */
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim);
+
+/** @brief Check if timer is pending.
+ *
+ * Check if timer is active.
+ *
+ * @param tim the timer to check
+ * @return MALI_TRUE if time is active, MALI_FALSE if it is not active
+ */
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim);
+
+/** @brief Set a timer's callback parameters.
+ *
+ * This must be called at least once before a timer is started/modified.
+ *
+ * After a timer has been stopped or expires, the callback remains set. This
+ * means that restarting the timer will call the same function with the same
+ * parameters on expiry.
+ *
+ * @param tim the timer to set callback on.
+ * @param callback Function to call when timer expires
+ * @param data Function-specific data to supply to the function on expiry.
+ */
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data);
+
+/** @brief Terminate a timer, and deallocate resources.
+ *
+ * The timer must first be stopped by calling _mali_osk_timer_del().
+ *
+ * It is a programming error for _mali_osk_timer_term() to be called on:
+ * - timer that is currently running
+ * - a timer that is currently executing its callback.
+ *
+ * @param tim the timer to deallocate.
+ */
+void _mali_osk_timer_term(_mali_osk_timer_t *tim);
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @defgroup _mali_osk_time OSK Time functions
+ *
+ * \ref _mali_osk_time use the OS's representation of time, which are
+ * 'ticks'. This is to prevent aliasing problems between the internal timer
+ * time, and the time asked for.
+ *
+ * OS tick time is measured as a u32. The time stored in a u32 may either be
+ * an absolute time, or a time delta between two events. Whilst it is valid to
+ * use math opeartors to \em change the tick value represented as a u32, it
+ * is often only meaningful to do such operations on time deltas, rather than
+ * on absolute time. However, it is meaningful to add/subtract time deltas to
+ * absolute times.
+ *
+ * Conversion between tick time and milliseconds (ms) may not be loss-less,
+ * and are \em implementation \em depenedant.
+ *
+ * Code use OS time must take this into account, since:
+ * - a small OS time may (or may not) be rounded
+ * - a large time may (or may not) overflow
+ *
+ * @{ */
+
+/** @brief Return whether ticka occurs after or at the same time as  tickb
+ *
+ * Systems where ticks can wrap must handle that.
+ *
+ * @param ticka ticka
+ * @param tickb tickb
+ * @return MALI_TRUE if ticka represents a time that occurs at or after tickb.
+ */
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb);
+
+/** @brief Convert milliseconds to OS 'ticks'
+ *
+ * @param ms time interval in milliseconds
+ * @return the corresponding time interval in OS ticks.
+ */
+unsigned long _mali_osk_time_mstoticks(u32 ms);
+
+/** @brief Convert OS 'ticks' to milliseconds
+ *
+ * @param ticks time interval in OS ticks.
+ * @return the corresponding time interval in milliseconds
+ */
+u32 _mali_osk_time_tickstoms(unsigned long ticks);
+
+
+/** @brief Get the current time in OS 'ticks'.
+ * @return the current time in OS 'ticks'.
+ */
+unsigned long _mali_osk_time_tickcount(void);
+
+/** @brief Cause a microsecond delay
+ *
+ * The delay will have microsecond resolution, and is necessary for correct
+ * operation of the driver. At worst, the delay will be \b at least \a usecs
+ * microseconds, and so may be (significantly) more.
+ *
+ * This function may be implemented as a busy-wait, which is the most sensible
+ * implementation. On OSs where there are situations in which a thread must not
+ * sleep, this is definitely implemented as a busy-wait.
+ *
+ * @param usecs the number of microseconds to wait for.
+ */
+void _mali_osk_time_ubusydelay(u32 usecs);
+
+/** @brief Return time in nano seconds, since any given reference.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_time_get_ns(void);
+
+/** @brief Return time in nano seconds, since boot time.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_boot_time_get_ns(void);
+
+/** @} */ /* end group _mali_osk_time */
+
+/** @defgroup _mali_osk_math OSK Math
+ * @{ */
+
+/** @brief Count Leading Zeros (Little-endian)
+ *
+ * @note This function must be implemented to support the reference
+ * implementation of _mali_osk_find_first_zero_bit, as defined in
+ * mali_osk_bitops.h.
+ *
+ * @param val 32-bit words to count leading zeros on
+ * @return the number of leading zeros.
+ */
+u32 _mali_osk_clz(u32 val);
+
+/** @brief find last (most-significant) bit set
+ *
+ * @param val 32-bit words to count last bit set on
+ * @return last bit set.
+ */
+u32 _mali_osk_fls(u32 val);
+
+/** @} */ /* end group _mali_osk_math */
+
+/** @addtogroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+
+/** @brief Initialize an empty Wait Queue */
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.
+ */
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ * @param timeout timeout in ms
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.  Will return if time
+ * exceeds timeout.
+ */
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout);
+
+/** @brief Wake up all threads in wait queue if their respective conditions are
+ * true
+ *
+ * @param queue the queue whose threads should be woken up
+ *
+ * Wake up all threads in wait queue \a queue whose condition is now true.
+ */
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue);
+
+/** @brief terminate a wait queue
+ *
+ * @param queue the queue to terminate.
+ */
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue);
+/** @} */ /* end group _mali_osk_wait_queue */
+
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Output a device driver debug message.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ */
+void _mali_osk_dbgmsg(const char *fmt, ...);
+
+/** @brief Print fmt into buf.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param buf a pointer to the result buffer
+ * @param size the total number of bytes allowed to write to \a buf
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ * @return The number of bytes written to \a buf
+ */
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...);
+
+/** @brief Abnormal process abort.
+ *
+ * Terminates the caller-process if this function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h.
+ *
+ * This function will never return - because to continue from a Debug assert
+ * could cause even more problems, and hinder debugging of the initial problem.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_abort(void);
+
+/** @brief Sets breakpoint at point where function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h,
+ * to assist in debugging. If debugging at this level is not required, then this
+ * function may be implemented as a stub.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_break(void);
+
+/** @brief Return an identificator for calling process.
+ *
+ * @return Identificator for calling process.
+ */
+u32 _mali_osk_get_pid(void);
+
+/** @brief Return an name for calling process.
+ *
+ * @return name for calling process.
+ */
+char *_mali_osk_get_comm(void);
+
+/** @brief Return an identificator for calling thread.
+ *
+ * @return Identificator for calling thread.
+ */
+u32 _mali_osk_get_tid(void);
+
+
+/** @brief Take a reference to the power manager system for the Mali device (synchronously).
+ *
+ * When function returns successfully, Mali is ON.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_put() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void);
+
+/** @brief Take a reference to the external power manager system for the Mali device (asynchronously).
+ *
+ * Mali might not yet be on after this function as returned.
+ * Please use \a _mali_osk_pm_dev_barrier() or \a _mali_osk_pm_dev_ref_get_sync()
+ * to wait for Mali to be powered on.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_dec() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void);
+
+/** @brief Release the reference to the external power manger system for the Mali device.
+ *
+ * When reference count reach zero, the cores can be off.
+ *
+ * @note This must be used to release references taken with
+ * \a _mali_osk_pm_dev_ref_get_sync() or \a _mali_osk_pm_dev_ref_get_sync().
+ */
+void _mali_osk_pm_dev_ref_put(void);
+
+/** @brief Block until pending PM operations are done
+ */
+void _mali_osk_pm_dev_barrier(void);
+
+/** @} */ /* end group  _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_bitmap OSK Bitmap
+ * @{ */
+
+/** @brief Allocate a unique number from the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @return An unique existence in the bitmap object.
+ */
+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap);
+
+/** @brief Free a interger to the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @param obj An number allocated from bitmap object.
+ */
+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj);
+
+/** @brief Allocate continuous number from the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @return start number of the continuous number block.
+ */
+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt);
+
+/** @brief Free a block of continuous number block to the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @param obj Start number.
+ * @param cnt The size of the continuous number block.
+ */
+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt);
+
+/** @brief Available count could be used to allocate in the given bitmap object.
+ *
+ */
+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap);
+
+/** @brief Initialize an bitmap object..
+ *
+ * @param bitmap An poiter of uninitialized bitmap object.
+ * @param num Size of thei bitmap object and decide the memory size allocated.
+ * @param reserve start number used to allocate.
+ */
+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve);
+
+/** @brief Free the given bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ */
+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap);
+/** @} */ /* end group  _mali_osk_bitmap */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Check standard inlines */
+#ifndef MALI_STATIC_INLINE
+#error MALI_STATIC_INLINE not defined on your OS
+#endif
+
+#ifndef MALI_NON_STATIC_INLINE
+#error MALI_NON_STATIC_INLINE not defined on your OS
+#endif
+
+#endif /* __MALI_OSK_H__ */
diff --git a/utgard/r6p2/common/mali_osk_bitops.h b/utgard/r6p2/common/mali_osk_bitops.h
new file mode 100755
index 0000000..61e0a91
--- /dev/null
+++ b/utgard/r6p2/common/mali_osk_bitops.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitops.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_BITOPS_H__
+#define __MALI_OSK_BITOPS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void _mali_internal_clear_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(NULL != addr);
+
+	(*addr) &= ~(1 << bit);
+}
+
+MALI_STATIC_INLINE void _mali_internal_set_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(NULL != addr);
+
+	(*addr) |= (1 << bit);
+}
+
+MALI_STATIC_INLINE u32 _mali_internal_test_bit(u32 bit, u32 value)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	return value & (1 << bit);
+}
+
+MALI_STATIC_INLINE int _mali_internal_find_first_zero_bit(u32 value)
+{
+	u32 inverted;
+	u32 negated;
+	u32 isolated;
+	u32 leading_zeros;
+
+	/* Begin with xxx...x0yyy...y, where ys are 1, number of ys is in range  0..31 */
+	inverted = ~value; /* zzz...z1000...0 */
+	/* Using count_trailing_zeros on inverted value -
+	 * See ARM System Developers Guide for details of count_trailing_zeros */
+
+	/* Isolate the zero: it is preceeded by a run of 1s, so add 1 to it */
+	negated = (u32) - inverted ; /* -a == ~a + 1 (mod 2^n) for n-bit numbers */
+	/* negated = xxx...x1000...0 */
+
+	isolated = negated & inverted ; /* xxx...x1000...0 & zzz...z1000...0, zs are ~xs */
+	/* And so the first zero bit is in the same position as the 1 == number of 1s that preceeded it
+	 * Note that the output is zero if value was all 1s */
+
+	leading_zeros = _mali_osk_clz(isolated);
+
+	return 31 - leading_zeros;
+}
+
+
+/** @defgroup _mali_osk_bitops OSK Non-atomic Bit-operations
+ * @{ */
+
+/**
+ * These bit-operations do not work atomically, and so locks must be used if
+ * atomicity is required.
+ *
+ * Reference implementations for Little Endian are provided, and so it should
+ * not normally be necessary to re-implement these. Efficient bit-twiddling
+ * techniques are used where possible, implemented in portable C.
+ *
+ * Note that these reference implementations rely on _mali_osk_clz() being
+ * implemented.
+ */
+
+/** @brief Clear a bit in a sequence of 32-bit words
+ * @param nr bit number to clear, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_clear_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_clear_bit(nr, addr);
+}
+
+/** @brief Set a bit in a sequence of 32-bit words
+ * @param nr bit number to set, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_set_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_set_bit(nr, addr);
+}
+
+/** @brief Test a bit in a sequence of 32-bit words
+ * @param nr bit number to test, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ * @return zero if bit was clear, non-zero if set. Do not rely on the return
+ * value being related to the actual word under test.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_test_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	return _mali_internal_test_bit(nr, *addr);
+}
+
+/* Return maxbit if not found */
+/** @brief Find the first zero bit in a sequence of 32-bit words
+ * @param addr starting point for search.
+ * @param maxbit the maximum number of bits to search
+ * @return the number of the first zero bit found, or maxbit if none were found
+ * in the specified range.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_find_first_zero_bit(const u32 *addr, u32 maxbit)
+{
+	u32 total;
+
+	for (total = 0; total < maxbit; total += 32, ++addr) {
+		int result;
+		result = _mali_internal_find_first_zero_bit(*addr);
+
+		/* non-negative signifies the bit was found */
+		if (result >= 0) {
+			total += (u32)result;
+			break;
+		}
+	}
+
+	/* Now check if we reached maxbit or above */
+	if (total >= maxbit) {
+		total = maxbit;
+	}
+
+	return total; /* either the found bit nr, or maxbit if not found */
+}
+/** @} */ /* end group _mali_osk_bitops */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_BITOPS_H__ */
diff --git a/utgard/r6p2/common/mali_osk_list.h b/utgard/r6p2/common/mali_osk_list.h
new file mode 100755
index 0000000..a41f5ab
--- /dev/null
+++ b/utgard/r6p2/common/mali_osk_list.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_list.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_LIST_H__
+#define __MALI_OSK_LIST_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void __mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = new_entry;
+	new_entry->next = next;
+	new_entry->prev = prev;
+	prev->next = new_entry;
+}
+
+MALI_STATIC_INLINE void __mali_osk_list_del(_mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** Reference implementations of Doubly-linked Circular Lists are provided.
+ * There is often no need to re-implement these.
+ *
+ * @note The implementation may differ subtly from any lists the OS provides.
+ * For this reason, these lists should not be mixed with OS-specific lists
+ * inside the OSK/UKK implementation. */
+
+/** @brief Initialize a list to be a head of an empty list
+ * @param exp the list to initialize. */
+#define _MALI_OSK_INIT_LIST_HEAD(exp) _mali_osk_list_init(exp)
+
+/** @brief Define a list variable, which is uninitialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD(exp) _mali_osk_list_t exp
+
+/** @brief Define a list variable, which is initialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD_STATIC_INIT(exp) _mali_osk_list_t exp = { &exp, &exp }
+
+/** @brief Initialize a list element.
+ *
+ * All list elements must be initialized before use.
+ *
+ * Do not use on any list element that is present in a list without using
+ * _mali_osk_list_del first, otherwise this will break the list.
+ *
+ * @param list the list element to initialize
+ */
+MALI_STATIC_INLINE void _mali_osk_list_init(_mali_osk_list_t *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/** @brief Insert a single list element after an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the first element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the next
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list, list->next);
+}
+
+/** @brief Insert a single list element before an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the last element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the previous
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_addtail(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list->prev, list);
+}
+
+/** @brief Remove a single element from a list
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will be uninitialized, and so should not be traversed. It must be
+ * initialized before further use.
+ *
+ * @param list the list element to remove.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_del(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+}
+
+/** @brief Remove a single element from a list, and re-initialize it
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will initialized, and so can be used as normal.
+ *
+ * @param list the list element to remove and initialize.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_delinit(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+	_mali_osk_list_init(list);
+}
+
+/** @brief Determine whether a list is empty.
+ *
+ * An empty list is one that contains a single element that points to itself.
+ *
+ * @param list the list to check.
+ * @return non-zero if the list is empty, and zero otherwise.
+ */
+MALI_STATIC_INLINE mali_bool _mali_osk_list_empty(_mali_osk_list_t *list)
+{
+	return list->next == list;
+}
+
+/** @brief Move a list element from one list to another.
+ *
+ * The list element must be initialized.
+ *
+ * As an example, moving a list item to the head of a new list causes this item
+ * to be the first element in the new list.
+ *
+ * @param move the list element to move
+ * @param list the new list into which the element will be inserted, as the next
+ * element in the list.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move(_mali_osk_list_t *move_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_del(move_entry->prev, move_entry->next);
+	_mali_osk_list_add(move_entry, list);
+}
+
+/** @brief Move an entire list
+ *
+ * The list element must be initialized.
+ *
+ * Allows you to move a list from one list head to another list head
+ *
+ * @param old_list The existing list head
+ * @param new_list The new list head (must be an empty list)
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move_list(_mali_osk_list_t *old_list, _mali_osk_list_t *new_list)
+{
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(new_list));
+	if (!_mali_osk_list_empty(old_list)) {
+		new_list->next = old_list->next;
+		new_list->prev = old_list->prev;
+		new_list->next->prev = new_list;
+		new_list->prev->next = new_list;
+		old_list->next = old_list;
+		old_list->prev = old_list;
+	}
+}
+
+/** @brief Find the containing structure of a list
+ *
+ * When traversing a list, this is used to recover the containing structure,
+ * given that is contains a _mali_osk_list_t member.
+ *
+ * Each list must be of structures of one type, and must link the same members
+ * together, otherwise it will not be possible to correctly recover the
+ * sturctures that the lists link.
+ *
+ * @note no type or memory checking occurs to ensure that a structure does in
+ * fact exist for the list entry, and that it is being recovered with respect
+ * to the correct list member.
+ *
+ * @param ptr the pointer to the _mali_osk_list_t member in this structure
+ * @param type the type of the structure that contains the member
+ * @param member the member of the structure that ptr points to.
+ * @return a pointer to a \a type object which contains the _mali_osk_list_t
+ * \a member, as pointed to by the _mali_osk_list_t \a *ptr.
+ */
+#define _MALI_OSK_LIST_ENTRY(ptr, type, member) \
+	_MALI_OSK_CONTAINER_OF(ptr, type, member)
+
+/** @brief Enumerate a list safely
+ *
+ * With this macro, lists can be enumerated in a 'safe' manner. That is,
+ * entries can be deleted from the list without causing an error during
+ * enumeration. To achieve this, a 'temporary' pointer is required, which must
+ * be provided to the macro.
+ *
+ * Use it like a 'for()', 'while()' or 'do()' construct, and so it must be
+ * followed by a statement or compound-statement which will be executed for
+ * each list entry.
+ *
+ * Upon loop completion, providing that an early out was not taken in the
+ * loop body, then it is guaranteed that ptr->member == list, even if the loop
+ * body never executed.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY(ptr, tmp, list, type, member)         \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->next, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.next, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.next, type, member))
+
+/** @brief Enumerate a list in reverse order safely
+ *
+ * This macro is identical to @ref _MALI_OSK_LIST_FOREACHENTRY, except that
+ * entries are enumerated in reverse order.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY_REVERSE(ptr, tmp, list, type, member) \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->prev, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.prev, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.prev, type, member))
+
+/** @} */ /* end group _mali_osk_list */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_LIST_H__ */
diff --git a/utgard/r6p2/common/mali_osk_mali.h b/utgard/r6p2/common/mali_osk_mali.h
new file mode 100755
index 0000000..f242cd4
--- /dev/null
+++ b/utgard/r6p2/common/mali_osk_mali.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.h
+ * Defines the OS abstraction layer which is specific for the Mali kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_MALI_H__
+#define __MALI_OSK_MALI_H__
+
+#include <linux/mali/mali_utgard.h>
+#include <mali_osk.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CONFIG_MALI_DEVFREQ
+struct mali_device {
+	struct device *dev;
+#ifdef CONFIG_HAVE_CLK
+	struct clk *clock;
+#endif
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+#endif
+	struct mali_pm_metrics_data mali_metrics;
+};
+#endif
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Struct with device specific configuration data
+ */
+typedef struct mali_gpu_device_data _mali_osk_device_data;
+
+#ifdef CONFIG_MALI_DT
+/** @brief Initialize those device resources when we use device tree
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_resource_initialize(void);
+#endif
+
+/** @brief Find Mali GPU HW resource
+ *
+ * @param addr Address of Mali GPU resource to find
+ * @param res Storage for resource information if resource is found.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if resource is not found
+ */
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res);
+
+
+/** @brief Find Mali GPU HW base address
+ *
+ * @return 0 if resources are found, otherwise the Mali GPU component with lowest address.
+ */
+uintptr_t _mali_osk_resource_base_address(void);
+
+/** @brief Find the specific GPU resource.
+ *
+ * @return value
+ * 0x400 if Mali 400 specific GPU resource identified
+ * 0x450 if Mali 450 specific GPU resource identified
+ * 0x470 if Mali 470 specific GPU resource identified
+ *
+ */
+u32 _mali_osk_identify_gpu_resource(void);
+
+/** @brief Retrieve the Mali GPU specific data
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data);
+
+/** @brief Find the pmu domain config from device data.
+ *
+ * @param domain_config_array used to store pmu domain config found in device data.
+ * @param array_size is the size of array domain_config_array.
+ */
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size);
+
+/** @brief Get Mali PMU switch delay
+ *
+ *@return pmu switch delay if it is configured
+ */
+u32 _mali_osk_get_pmu_switch_delay(void);
+
+/** @brief Determines if Mali GPU has been configured with shared interrupts.
+ *
+ * @return MALI_TRUE if shared interrupts, MALI_FALSE if not.
+ */
+mali_bool _mali_osk_shared_interrupts(void);
+
+/** @brief Initialize the gpu secure mode.
+ * The gpu secure mode will initially be in a disabled state.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void);
+
+/** @brief Deinitialize the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void);
+
+/** @brief Reset GPU and enable the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_enable(void);
+
+/** @brief Reset GPU and disable the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_disable(void);
+
+/** @brief Check if the gpu secure mode has been enabled.
+ * @return MALI_TRUE if enabled, otherwise MALI_FALSE.
+ */
+mali_bool _mali_osk_gpu_secure_mode_is_enabled(void);
+
+/** @brief Check if the gpu secure mode is supported.
+ * @return MALI_TRUE if supported, otherwise MALI_FALSE.
+ */
+mali_bool _mali_osk_gpu_secure_mode_is_supported(void);
+
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_MALI_H__ */
diff --git a/utgard/r6p2/common/mali_osk_profiling.h b/utgard/r6p2/common/mali_osk_profiling.h
new file mode 100755
index 0000000..eca6ad4
--- /dev/null
+++ b/utgard/r6p2/common/mali_osk_profiling.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_OSK_PROFILING_H__
+#define __MALI_OSK_PROFILING_H__
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+
+#include "mali_linux_trace.h"
+#include "mali_profiling_events.h"
+#include "mali_profiling_gator_api.h"
+
+#define MALI_PROFILING_MAX_BUFFER_ENTRIES 1048576
+
+#define MALI_PROFILING_NO_HW_COUNTER = ((u32)-1)
+
+/** @defgroup _mali_osk_profiling External profiling connectivity
+ * @{ */
+
+/**
+ * Initialize the profiling module.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start);
+
+/*
+ * Terminate the profiling module.
+ */
+void _mali_osk_profiling_term(void);
+
+/**
+ * Stop the profile sampling operation.
+ */
+void _mali_osk_profiling_stop_sampling(u32 pid);
+
+/**
+ * Start recording profiling data
+ *
+ * The specified limit will determine how large the capture buffer is.
+ * MALI_PROFILING_MAX_BUFFER_ENTRIES determines the maximum size allowed by the device driver.
+ *
+ * @param limit The desired maximum number of events to record on input, the actual maximum on output.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_start(u32 *limit);
+
+/**
+ * Add an profiling event
+ *
+ * @param event_id The event identificator.
+ * @param data0 First data parameter, depending on event_id specified.
+ * @param data1 Second data parameter, depending on event_id specified.
+ * @param data2 Third data parameter, depending on event_id specified.
+ * @param data3 Fourth data parameter, depending on event_id specified.
+ * @param data4 Fifth data parameter, depending on event_id specified.
+ */
+void    _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+/**
+ * Report a hardware counter event.
+ *
+ * @param counter_id The ID of the counter.
+ * @param value The value of the counter.
+ */
+
+/* Call Linux tracepoint directly */
+#define _mali_osk_profiling_report_hw_counter(counter_id, value) trace_mali_hw_counter(counter_id, value)
+
+/**
+ * Report SW counters
+ *
+ * @param counters array of counter values
+ */
+void _mali_osk_profiling_report_sw_counters(u32 *counters);
+
+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value);
+
+/**
+ * Stop recording profiling data
+ *
+ * @param count Returns the number of recorded events.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_stop(u32 *count);
+
+/**
+ * Retrieves the number of events that can be retrieved
+ *
+ * @return The number of recorded events that can be retrieved.
+ */
+u32 _mali_osk_profiling_get_count(void);
+
+/**
+ * Retrieve an event
+ *
+ * @param index Event index (start with 0 and continue until this function fails to retrieve all events)
+ * @param timestamp The timestamp for the retrieved event will be stored here.
+ * @param event_id The event ID for the retrieved event will be stored here.
+ * @param data The 5 data values for the retrieved event will be stored here.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+
+/**
+ * Clear the recorded buffer.
+ *
+ * This is needed in order to start another recording.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_clear(void);
+
+/**
+ * Checks if a recording of profiling data is in progress
+ *
+ * @return MALI_TRUE if recording of profiling data is in progress, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_is_recording(void);
+
+/**
+ * Checks if profiling data is available for retrival
+ *
+ * @return MALI_TRUE if profiling data is avaiable, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_have_recording(void);
+
+/** @} */ /* end group _mali_osk_profiling */
+
+#else /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+/* Dummy add_event, for when profiling is disabled. */
+
+#define _mali_osk_profiling_add_event(event_id, data0, data1, data2, data3, data4)
+
+#endif /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+#endif /* __MALI_OSK_PROFILING_H__ */
+
+
diff --git a/utgard/r6p2/common/mali_osk_types.h b/utgard/r6p2/common/mali_osk_types.h
new file mode 100755
index 0000000..6e9a133
--- /dev/null
+++ b/utgard/r6p2/common/mali_osk_types.h
@@ -0,0 +1,471 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_types.h
+ * Defines types of the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_TYPES_H__
+#define __MALI_OSK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_osk_miscellaneous OSK Miscellaneous functions, constants and types
+ * @{ */
+
+/* Define integer types used by OSK. Note: these currently clash with Linux so we only define them if not defined already */
+#ifndef __KERNEL__
+typedef unsigned char      u8;
+typedef signed char        s8;
+typedef unsigned short     u16;
+typedef signed short       s16;
+typedef unsigned int       u32;
+typedef signed int         s32;
+typedef unsigned long long u64;
+#define BITS_PER_LONG (sizeof(long)*8)
+#else
+/* Ensure Linux types u32, etc. are defined */
+#include <linux/types.h>
+#endif
+
+/** @brief Mali Boolean type which uses MALI_TRUE and MALI_FALSE
+  */
+typedef unsigned long mali_bool;
+
+#ifndef MALI_TRUE
+#define MALI_TRUE ((mali_bool)1)
+#endif
+
+#ifndef MALI_FALSE
+#define MALI_FALSE ((mali_bool)0)
+#endif
+
+#define MALI_HW_CORE_NO_COUNTER     ((u32)-1)
+
+
+#define MALI_S32_MAX 0x7fffffff
+
+/**
+ * @brief OSK Error codes
+ *
+ * Each OS may use its own set of error codes, and may require that the
+ * User/Kernel interface take certain error code. This means that the common
+ * error codes need to be sufficiently rich to pass the correct error code
+ * thorugh from the OSK to U/K layer, across all OSs.
+ *
+ * The result is that some error codes will appear redundant on some OSs.
+ * Under all OSs, the OSK layer must translate native OS error codes to
+ * _mali_osk_errcode_t codes. Similarly, the U/K layer must translate from
+ * _mali_osk_errcode_t codes to native OS error codes.
+ */
+typedef enum {
+	_MALI_OSK_ERR_OK = 0, /**< Success. */
+	_MALI_OSK_ERR_FAULT = -1, /**< General non-success */
+	_MALI_OSK_ERR_INVALID_FUNC = -2, /**< Invalid function requested through User/Kernel interface (e.g. bad IOCTL number) */
+	_MALI_OSK_ERR_INVALID_ARGS = -3, /**< Invalid arguments passed through User/Kernel interface */
+	_MALI_OSK_ERR_NOMEM = -4, /**< Insufficient memory */
+	_MALI_OSK_ERR_TIMEOUT = -5, /**< Timeout occurred */
+	_MALI_OSK_ERR_RESTARTSYSCALL = -6, /**< Special: On certain OSs, must report when an interruptable mutex is interrupted. Ignore otherwise. */
+	_MALI_OSK_ERR_ITEM_NOT_FOUND = -7, /**< Table Lookup failed */
+	_MALI_OSK_ERR_BUSY = -8, /**< Device/operation is busy. Try again later */
+	_MALI_OSK_ERR_UNSUPPORTED = -9, /**< Optional part of the interface used, and is unsupported */
+} _mali_osk_errcode_t;
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wq OSK work queues
+ * @{ */
+
+/** @brief Private type for work objects */
+typedef struct _mali_osk_wq_work_s _mali_osk_wq_work_t;
+typedef struct _mali_osk_wq_delayed_work_s _mali_osk_wq_delayed_work_t;
+
+/** @brief Work queue handler function
+ *
+ * This function type is called when the work is scheduled by the work queue,
+ * e.g. as an IRQ bottom-half handler.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for more information on the
+ * work-queue and work handlers.
+ *
+ * @param arg resource-specific data
+ */
+typedef void (*_mali_osk_wq_work_handler_t)(void *arg);
+
+/* @} */ /* end group _mali_osk_wq */
+
+/** @defgroup _mali_osk_irq OSK IRQ handling
+ * @{ */
+
+/** @brief Private type for IRQ handling objects */
+typedef struct _mali_osk_irq_t_struct _mali_osk_irq_t;
+
+/** @brief Optional function to trigger an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data */
+typedef void (*_mali_osk_irq_trigger_t)(void *arg);
+
+/** @brief Optional function to acknowledge an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was successful, or a suitable _mali_osk_errcode_t on failure. */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_ack_t)(void *arg);
+
+/** @brief IRQ 'upper-half' handler callback.
+ *
+ * This function is implemented by the common layer to do the initial handling of a
+ * resource's IRQ. This maps on to the concept of an ISR that does the minimum
+ * work necessary before handing off to an IST.
+ *
+ * The communication of the resource-specific data from the ISR to the IST is
+ * handled by the OSK implementation.
+ *
+ * On most systems, the IRQ upper-half handler executes in IRQ context.
+ * Therefore, the system may have restrictions about what can be done in this
+ * context
+ *
+ * If an IRQ upper-half handler requires more work to be done than can be
+ * acheived in an IRQ context, then it may defer the work with
+ * _mali_osk_wq_schedule_work(). Refer to \ref _mali_osk_wq_create_work() for
+ * more information.
+ *
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was correctly handled, or a suitable
+ * _mali_osk_errcode_t otherwise.
+ */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_uhandler_t)(void *arg);
+
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @defgroup _mali_osk_atomic OSK Atomic counters
+ * @{ */
+
+/** @brief Public type of atomic counters
+ *
+ * This is public for allocation on stack. On systems that support it, this is just a single 32-bit value.
+ * On others, it could be encapsulating an object stored elsewhere.
+ *
+ * Regardless of implementation, the \ref _mali_osk_atomic functions \b must be used
+ * for all accesses to the variable's value, even if atomicity is not required.
+ * Do not access u.val or u.obj directly.
+ */
+typedef struct {
+	union {
+		u32 val;
+		void *obj;
+	} u;
+} _mali_osk_atomic_t;
+/** @} */ /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+
+/** @brief OSK Mutual Exclusion Lock ordered list
+ *
+ * This lists the various types of locks in the system and is used to check
+ * that locks are taken in the correct order.
+ *
+ * - Holding more than one lock of the same order at the same time is not
+ *   allowed.
+ * - Taking a lock of a lower order than the highest-order lock currently held
+ *   is not allowed.
+ *
+ */
+typedef enum {
+	/*  ||    Locks    ||  */
+	/*  ||   must be   ||  */
+	/* _||_  taken in _||_ */
+	/* \  /    this   \  / */
+	/*  \/    order!   \/  */
+
+	_MALI_OSK_LOCK_ORDER_FIRST = 0,
+
+	_MALI_OSK_LOCK_ORDER_SESSIONS,
+	_MALI_OSK_LOCK_ORDER_MEM_SESSION,
+	_MALI_OSK_LOCK_ORDER_MEM_INFO,
+	_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE,
+	_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP,
+	_MALI_OSK_LOCK_ORDER_PM_EXECUTION,
+	_MALI_OSK_LOCK_ORDER_EXECUTOR,
+	_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED,
+	_MALI_OSK_LOCK_ORDER_PROFILING,
+	_MALI_OSK_LOCK_ORDER_L2,
+	_MALI_OSK_LOCK_ORDER_L2_COMMAND,
+	_MALI_OSK_LOCK_ORDER_UTILIZATION,
+	_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS,
+	_MALI_OSK_LOCK_ORDER_PM_STATE,
+
+	_MALI_OSK_LOCK_ORDER_LAST,
+} _mali_osk_lock_order_t;
+
+
+/** @brief OSK Mutual Exclusion Lock flags type
+ *
+ * - Any lock can use the order parameter.
+ */
+typedef enum {
+	_MALI_OSK_LOCKFLAG_UNORDERED        = 0x1, /**< Indicate that the order of this lock should not be checked */
+	_MALI_OSK_LOCKFLAG_ORDERED          = 0x2,
+	/** @enum _mali_osk_lock_flags_t
+	 *
+	 * Flags from 0x10000--0x80000000 are RESERVED for User-mode */
+
+} _mali_osk_lock_flags_t;
+
+/** @brief Mutual Exclusion Lock Mode Optimization hint
+ *
+ * The lock mode is used to implement the read/write locking of locks when we call
+ * functions _mali_osk_mutex_rw_init/wait/signal/term/. In this case, the RO mode can
+ * be used to allow multiple concurrent readers, but no writers. The RW mode is used for
+ * writers, and so will wait for all readers to release the lock (if any present).
+ * Further readers and writers will wait until the writer releases the lock.
+ *
+ * The mode is purely an optimization hint: for example, it is permissible for
+ * all locks to behave in RW mode, regardless of that supplied.
+ *
+ * It is an error to attempt to use locks in anything other that RW mode when
+ * call functions _mali_osk_mutex_rw_wait/signal().
+ *
+ */
+typedef enum {
+	_MALI_OSK_LOCKMODE_UNDEF = -1,  /**< Undefined lock mode. For internal use only */
+	_MALI_OSK_LOCKMODE_RW    = 0x0, /**< Read-write mode, default. All readers and writers are mutually-exclusive */
+	_MALI_OSK_LOCKMODE_RO,          /**< Read-only mode, to support multiple concurrent readers, but mutual exclusion in the presence of writers. */
+	/** @enum _mali_osk_lock_mode_t
+	 *
+	 * Lock modes 0x40--0x7F are RESERVED for User-mode */
+} _mali_osk_lock_mode_t;
+
+/** @brief Private types for Mutual Exclusion lock objects */
+typedef struct _mali_osk_lock_debug_s _mali_osk_lock_debug_t;
+typedef struct _mali_osk_spinlock_s _mali_osk_spinlock_t;
+typedef struct _mali_osk_spinlock_irq_s _mali_osk_spinlock_irq_t;
+typedef struct _mali_osk_mutex_s _mali_osk_mutex_t;
+typedef struct _mali_osk_mutex_rw_s _mali_osk_mutex_rw_t;
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @defgroup _mali_osk_low_level_memory OSK Low-level Memory Operations
+ * @{ */
+
+/**
+ * @brief Private data type for use in IO accesses to/from devices.
+ *
+ * This represents some range that is accessible from the device. Examples
+ * include:
+ * - Device Registers, which could be readable and/or writeable.
+ * - Memory that the device has access to, for storing configuration structures.
+ *
+ * Access to this range must be made through the _mali_osk_mem_ioread32() and
+ * _mali_osk_mem_iowrite32() functions.
+ */
+typedef struct _mali_io_address *mali_io_address;
+
+/** @defgroup _MALI_OSK_CPU_PAGE CPU Physical page size macros.
+ *
+ * The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The CPU Physical Page Size has been assumed to be the same as the Mali
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** CPU Page Order, as log to base 2 of the Page size. @see _MALI_OSK_CPU_PAGE_SIZE */
+#define _MALI_OSK_CPU_PAGE_ORDER ((u32)12)
+/** CPU Page Size, in bytes.               */
+#define _MALI_OSK_CPU_PAGE_SIZE (((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER))
+/** CPU Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_CPU_PAGE_MASK (~((((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER)) - ((u32)1)))
+/** @} */ /* end of group _MALI_OSK_CPU_PAGE */
+
+/** @defgroup _MALI_OSK_MALI_PAGE Mali Physical Page size macros
+ *
+ * Mali Physical page size macros. The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The Mali Physical Page Size has been assumed to be the same as the CPU
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** Mali Page Order, as log to base 2 of the Page size. @see _MALI_OSK_MALI_PAGE_SIZE */
+#define _MALI_OSK_MALI_PAGE_ORDER PAGE_SHIFT
+/** Mali Page Size, in bytes.               */
+#define _MALI_OSK_MALI_PAGE_SIZE PAGE_SIZE
+/** Mali Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_MALI_PAGE_MASK PAGE_MASK
+/** @} */ /* end of group _MALI_OSK_MALI_PAGE*/
+
+/** @brief flags for mapping a user-accessible memory range
+ *
+ * Where a function with prefix '_mali_osk_mem_mapregion' accepts flags as one
+ * of the function parameters, it will use one of these. These allow per-page
+ * control over mappings. Compare with the mali_memory_allocation_flag type,
+ * which acts over an entire range
+ *
+ * These may be OR'd together with bitwise OR (|), but must be cast back into
+ * the type after OR'ing.
+ */
+typedef enum {
+	_MALI_OSK_MEM_MAPREGION_FLAG_OS_ALLOCATED_PHYSADDR = 0x1, /**< Physical address is OS Allocated */
+} _mali_osk_mem_mapregion_flags_t;
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+/** @defgroup _mali_osk_notification OSK Notification Queues
+ * @{ */
+
+/** @brief Private type for notification queue objects */
+typedef struct _mali_osk_notification_queue_t_struct _mali_osk_notification_queue_t;
+
+/** @brief Public notification data object type */
+typedef struct _mali_osk_notification_t_struct {
+	u32 notification_type;   /**< The notification type */
+	u32 result_buffer_size; /**< Size of the result buffer to copy to user space */
+	void *result_buffer;    /**< Buffer containing any type specific data */
+} _mali_osk_notification_t;
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @defgroup _mali_osk_timer OSK Timer Callbacks
+ * @{ */
+
+/** @brief Function to call when a timer expires
+ *
+ * When a timer expires, this function is called. Note that on many systems,
+ * a timer callback will be executed in IRQ context. Therefore, restrictions
+ * may apply on what can be done inside the timer callback.
+ *
+ * If a timer requires more work to be done than can be acheived in an IRQ
+ * context, then it may defer the work with a work-queue. For example, it may
+ * use \ref _mali_osk_wq_schedule_work() to make use of a bottom-half handler
+ * to carry out the remaining work.
+ *
+ * Stopping the timer with \ref _mali_osk_timer_del() blocks on compeletion of
+ * the callback. Therefore, the callback may not obtain any mutexes also held
+ * by any callers of _mali_osk_timer_del(). Otherwise, a deadlock may occur.
+ *
+ * @param arg Function-specific data */
+typedef void (*_mali_osk_timer_callback_t)(void *arg);
+
+/** @brief Private type for Timer Callback Objects */
+typedef struct _mali_osk_timer_t_struct _mali_osk_timer_t;
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** @brief Public List objects.
+ *
+ * To use, add a _mali_osk_list_t member to the structure that may become part
+ * of a list. When traversing the _mali_osk_list_t objects, use the
+ * _MALI_OSK_CONTAINER_OF() macro to recover the structure from its
+ *_mali_osk_list_t member
+ *
+ * Each structure may have multiple _mali_osk_list_t members, so that the
+ * structure is part of multiple lists. When traversing lists, ensure that the
+ * correct _mali_osk_list_t member is used, because type-checking will be
+ * lost by the compiler.
+ */
+typedef struct _mali_osk_list_s {
+	struct _mali_osk_list_s *next;
+	struct _mali_osk_list_s *prev;
+} _mali_osk_list_t;
+/** @} */ /* end group _mali_osk_list */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief resource description struct
+ *
+ * Platform independent representation of a Mali HW resource
+ */
+typedef struct _mali_osk_resource {
+	const char *description;        /**< short description of the resource */
+	uintptr_t base;                 /**< Physical base address of the resource, as seen by Mali resources. */
+	const char *irq_name;           /**< Name of irq belong to this resource */
+	u32 irq;                        /**< IRQ number delivered to the CPU, or -1 to tell the driver to probe for it (if possible) */
+} _mali_osk_resource_t;
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+/** @brief Private type for wait queue objects */
+typedef struct _mali_osk_wait_queue_t_struct _mali_osk_wait_queue_t;
+/** @} */ /* end group _mali_osk_wait_queue */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+/** @brief Mali print ctx type which uses seq_file
+  */
+typedef struct seq_file _mali_osk_print_ctx;
+
+#define _MALI_OSK_BITMAP_INVALIDATE_INDEX -1
+
+typedef struct _mali_osk_bitmap {
+	u32         reserve;
+	u32         last;
+	u32         max;
+	u32         avail;
+	_mali_osk_spinlock_t   *lock;
+	unsigned long          *table;
+} _mali_osk_bitmap_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_TYPES_H__ */
diff --git a/utgard/r6p2/common/mali_pm.c b/utgard/r6p2/common/mali_pm.c
new file mode 100755
index 0000000..1ef03a6
--- /dev/null
+++ b/utgard/r6p2/common/mali_pm.c
@@ -0,0 +1,1362 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pm.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_scheduler.h"
+#include "mali_group.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+
+#include "mali_executor.h"
+#include "mali_control_timer.h"
+
+#if defined(DEBUG)
+u32 num_pm_runtime_resume = 0;
+u32 num_pm_updates = 0;
+u32 num_pm_updates_up = 0;
+u32 num_pm_updates_down = 0;
+#endif
+
+#define MALI_PM_DOMAIN_DUMMY_MASK (1 << MALI_DOMAIN_INDEX_DUMMY)
+
+/* lock protecting power state (including pm_domains) */
+static _mali_osk_spinlock_irq_t *pm_lock_state = NULL;
+
+/* the wanted domain mask (protected by pm_lock_state) */
+static u32 pd_mask_wanted = 0;
+
+/* used to deferring the actual power changes */
+static _mali_osk_wq_work_t *pm_work = NULL;
+
+/* lock protecting power change execution */
+static _mali_osk_mutex_t *pm_lock_exec = NULL;
+
+/* PMU domains which are actually powered on (protected by pm_lock_exec) */
+static u32 pmu_mask_current = 0;
+
+/*
+ * domains which marked as powered on (protected by pm_lock_exec)
+ * This can be different from pmu_mask_current right after GPU power on
+ * if the PMU domains default to powered up.
+ */
+static u32 pd_mask_current = 0;
+
+static u16 domain_config[MALI_MAX_NUMBER_OF_DOMAINS] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	1 << MALI_DOMAIN_INDEX_DUMMY
+};
+
+/* The relative core power cost */
+#define MALI_GP_COST 3
+#define MALI_PP_COST 6
+#define MALI_L2_COST 1
+
+/*
+ *We have MALI_MAX_NUMBER_OF_PP_PHYSICAL_CORES + 1 rows in this matrix
+ *because we mush store the mask of different pp cores: 0, 1, 2, 3, 4, 5, 6, 7, 8.
+ */
+static int mali_pm_domain_power_cost_result[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1][MALI_MAX_NUMBER_OF_DOMAINS];
+/*
+ * Keep track of runtime PM state, so that we know
+ * how to resume during OS resume.
+ */
+#ifdef CONFIG_PM_RUNTIME
+static mali_bool mali_pm_runtime_active = MALI_FALSE;
+#else
+/* when kernel don't enable PM_RUNTIME, set the flag always true,
+ * for GPU will not power off by runtime */
+static mali_bool mali_pm_runtime_active = MALI_TRUE;
+#endif
+
+static void mali_pm_state_lock(void);
+static void mali_pm_state_unlock(void);
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void);
+static void mali_pm_set_pmu_domain_config(void);
+static u32 mali_pm_get_registered_cores_mask(void);
+static void mali_pm_update_sync_internal(void);
+static mali_bool mali_pm_common_suspend(void);
+static void mali_pm_update_work(void *data);
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+const char *mali_pm_group_stats_to_string(void);
+#endif
+
+_mali_osk_errcode_t mali_pm_initialize(void)
+{
+	_mali_osk_errcode_t err;
+	struct mali_pmu_core *pmu;
+
+	pm_lock_state = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (NULL == pm_lock_state) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_lock_exec = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+					    _MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (NULL == pm_lock_exec) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_work = _mali_osk_wq_create_work(mali_pm_update_work, NULL);
+	if (NULL == pm_work) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pmu = mali_pmu_get_global_pmu_core();
+	if (NULL != pmu) {
+		/*
+		 * We have a Mali PMU, set the correct domain
+		 * configuration (default or custom)
+		 */
+
+		u32 registered_cores_mask;
+
+		mali_pm_set_pmu_domain_config();
+
+		registered_cores_mask = mali_pm_get_registered_cores_mask();
+		mali_pmu_set_registered_cores_mask(pmu, registered_cores_mask);
+
+		MALI_DEBUG_ASSERT(0 == pd_mask_wanted);
+	}
+
+	/* Create all power domains needed (at least one dummy domain) */
+	err = mali_pm_create_pm_domains();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_terminate();
+		return err;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pm_terminate(void)
+{
+	if (NULL != pm_work) {
+		_mali_osk_wq_delete_work(pm_work);
+		pm_work = NULL;
+	}
+
+	mali_pm_domain_terminate();
+
+	if (NULL != pm_lock_exec) {
+		_mali_osk_mutex_term(pm_lock_exec);
+		pm_lock_exec = NULL;
+	}
+
+	if (NULL != pm_lock_state) {
+		_mali_osk_spinlock_irq_term(pm_lock_state);
+		pm_lock_state = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (NULL == domain) {
+		MALI_DEBUG_ASSERT(0 == domain_config[domain_index]);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(0 != domain_config[domain_index]);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != domain);
+
+	mali_pm_domain_add_l2_cache(domain, l2_cache);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (NULL == domain) {
+		MALI_DEBUG_ASSERT(0 == domain_config[domain_index]);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(0 != domain_config[domain_index]);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != domain);
+
+	mali_pm_domain_add_group(domain, group);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains)
+{
+	mali_bool ret = MALI_TRUE; /* Assume all is powered on instantly */
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		pd_mask_wanted |= mali_pm_domain_ref_get(domains[i]);
+		if (MALI_FALSE == mali_pm_domain_power_is_on(domains[i])) {
+			/*
+			 * Tell caller that the corresponding group
+			 * was not already powered on.
+			 */
+			ret = MALI_FALSE;
+		} else {
+			/*
+			 * There is a time gap between we power on the domain and
+			 * set the power state of the corresponding groups to be on.
+			 */
+			if (NULL != groups[i] &&
+			    MALI_FALSE == mali_group_power_is_on(groups[i])) {
+				ret = MALI_FALSE;
+			}
+		}
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (get refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains)
+{
+	u32 mask = 0;
+	mali_bool ret;
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		mask |= mali_pm_domain_ref_put(domains[i]);
+	}
+
+	if (0 == mask) {
+		/* return false, all domains should still stay on */
+		ret = MALI_FALSE;
+	} else {
+		/* Assert that we are dealing with a change */
+		MALI_DEBUG_ASSERT((pd_mask_wanted & mask) == mask);
+
+		/* Update our desired domain mask */
+		pd_mask_wanted &= ~mask;
+
+		/* return true; one or more domains can now be powered down */
+		ret = MALI_TRUE;
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (put refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+void mali_pm_init_begin(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	_mali_osk_pm_dev_ref_get_sync();
+
+	/* Ensure all PMU domains are on */
+	if (NULL != pmu) {
+		mali_pmu_power_up_all(pmu);
+	}
+}
+
+void mali_pm_init_end(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	/* Ensure all PMU domains are off */
+	if (NULL != pmu) {
+		mali_pmu_power_down_all(pmu);
+	}
+
+	_mali_osk_pm_dev_ref_put();
+}
+
+void mali_pm_update_sync(void)
+{
+	mali_pm_exec_lock();
+
+	if (MALI_TRUE == mali_pm_runtime_active) {
+		/*
+		 * Only update if GPU is powered on.
+		 * Deactivation of the last group will result in both a
+		 * deferred runtime PM suspend operation and
+		 * deferred execution of this function.
+		 * mali_pm_runtime_active will be false if runtime PM
+		 * executed first and thus the GPU is now fully powered off.
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_update_async(void)
+{
+	_mali_osk_wq_schedule_work(pm_work);
+}
+
+void mali_pm_os_suspend(mali_bool os_suspend)
+{
+	int ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS suspend\n"));
+
+	/* Suspend execution of all jobs, and go to inactive state */
+	mali_executor_suspend();
+
+	if (os_suspend) {
+		mali_control_timer_suspend(MALI_TRUE);
+	}
+
+	mali_pm_exec_lock();
+
+	ret = mali_pm_common_suspend();
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == ret);
+	MALI_IGNORE(ret);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_os_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS resume\n"));
+
+	mali_pm_exec_lock();
+
+#if defined(DEBUG)
+	mali_pm_state_lock();
+
+	/* Assert that things are as we left them in os_suspend(). */
+	MALI_DEBUG_ASSERT(0 == pd_mask_wanted);
+	MALI_DEBUG_ASSERT(0 == pd_mask_current);
+	MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+	mali_pm_state_unlock();
+#endif
+
+	if (MALI_TRUE == mali_pm_runtime_active) {
+		/* Runtime PM was active, so reset PMU */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+			pmu_mask_current = mali_pmu_get_mask(pmu);
+
+			MALI_DEBUG_PRINT(3, ("Mali PM: OS resume 0x%x \n", pmu_mask_current));
+		}
+
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	/* Start executing jobs again */
+	mali_executor_resume();
+}
+
+mali_bool mali_pm_runtime_suspend(void)
+{
+	mali_bool ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: Runtime suspend\n"));
+
+	mali_pm_exec_lock();
+
+	/*
+	 * Put SW state directly into "off" state, and do not bother to power
+	 * down each power domain, because entire GPU will be powered off
+	 * when we return.
+	 * For runtime PM suspend, in contrast to OS suspend, there is a race
+	 * between this function and the mali_pm_update_sync_internal(), which
+	 * is fine...
+	 */
+	ret = mali_pm_common_suspend();
+	if (MALI_TRUE == ret) {
+		mali_pm_runtime_active = MALI_FALSE;
+	} else {
+		/*
+		 * Process the "power up" instead,
+		 * which could have been "lost"
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	return ret;
+}
+
+void mali_pm_runtime_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	mali_pm_exec_lock();
+
+	mali_pm_runtime_active = MALI_TRUE;
+
+#if defined(DEBUG)
+	++num_pm_runtime_resume;
+
+	mali_pm_state_lock();
+
+	/*
+	 * Assert that things are as we left them in runtime_suspend(),
+	 * except for pd_mask_wanted which normally will be the reason we
+	 * got here (job queued => domains wanted)
+	 */
+	MALI_DEBUG_ASSERT(0 == pd_mask_current);
+	MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+	mali_pm_state_unlock();
+#endif
+
+	if (NULL != pmu) {
+		mali_pmu_reset(pmu);
+		pmu_mask_current = mali_pmu_get_mask(pmu);
+		MALI_DEBUG_PRINT(3, ("Mali PM: Runtime resume 0x%x \n", pmu_mask_current));
+	}
+
+	/*
+	 * Normally we are resumed because a job has just been queued.
+	 * pd_mask_wanted should thus be != 0.
+	 * It is however possible for others to take a Mali Runtime PM ref
+	 * without having a job queued.
+	 * We should however always call mali_pm_update_sync_internal(),
+	 * because this will take care of any potential mismatch between
+	 * pmu_mask_current and pd_mask_current.
+	 */
+	mali_pm_update_sync_internal();
+
+	mali_pm_exec_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tPower domain: id %u\n",
+				mali_pm_domain_get_id(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tMask: 0x%04x\n",
+				mali_pm_domain_get_mask(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tUse count: %u\n",
+				mali_pm_domain_get_use_count(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tCurrent power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_current) ?
+				"On" : "Off");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tWanted power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_wanted) ?
+				"On" : "Off");
+
+	return n;
+}
+#endif
+
+static void mali_pm_state_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(pm_lock_state);
+}
+
+static void mali_pm_state_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(pm_lock_state);
+}
+
+void mali_pm_exec_lock(void)
+{
+	_mali_osk_mutex_wait(pm_lock_exec);
+}
+
+void mali_pm_exec_unlock(void)
+{
+	_mali_osk_mutex_signal(pm_lock_exec);
+}
+
+static void mali_pm_domain_power_up(u32 power_up_mask,
+				    struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS],
+				    u32 *num_groups_up,
+				    struct mali_l2_cache_core *l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				    u32 *num_l2_up)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_up_mask;
+
+	MALI_DEBUG_ASSERT(0 != power_up_mask);
+	MALI_DEBUG_ASSERT_POINTER(groups_up);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_up);
+	MALI_DEBUG_ASSERT(0 == *num_groups_up);
+	MALI_DEBUG_ASSERT_POINTER(l2_up);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_up);
+	MALI_DEBUG_ASSERT(0 == *num_l2_up);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering up domains: . [%s]\n",
+			  mali_pm_mask_to_string(power_up_mask)));
+
+	pd_mask_current |= power_up_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (0 != domain_bit) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(
+				domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered up */
+		mali_pm_domain_set_power_on(domain, MALI_TRUE);
+
+		/*
+		 * Make a note of the L2 and/or group(s) to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(
+						    domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_up <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_up[*num_l2_up] = l2_cache;
+			(*num_l2_up)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_up <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_up[*num_groups_up] = group;
+
+			(*num_groups_up)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+static void mali_pm_domain_power_down(u32 power_down_mask,
+				      struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS],
+				      u32 *num_groups_down,
+				      struct mali_l2_cache_core *l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				      u32 *num_l2_down)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_down_mask;
+
+	MALI_DEBUG_ASSERT(0 != power_down_mask);
+	MALI_DEBUG_ASSERT_POINTER(groups_down);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_down);
+	MALI_DEBUG_ASSERT(0 == *num_groups_down);
+	MALI_DEBUG_ASSERT_POINTER(l2_down);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_down);
+	MALI_DEBUG_ASSERT(0 == *num_l2_down);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering down domains: [%s]\n",
+			  mali_pm_mask_to_string(power_down_mask)));
+
+	pd_mask_current &= ~power_down_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (0 != domain_bit) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered down */
+		mali_pm_domain_set_power_on(domain, MALI_FALSE);
+
+		/*
+		 * Make a note of the L2s and/or groups to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_down <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_down[*num_l2_down] = l2_cache;
+			(*num_l2_down)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_down <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_down[*num_groups_down] = group;
+			(*num_groups_down)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+
+/*
+ * Execute pending power domain changes
+ * pm_lock_exec lock must be taken by caller.
+ */
+static void mali_pm_update_sync_internal(void)
+{
+	/*
+	 * This should only be called in non-atomic context
+	 * (normally as deferred work)
+	 *
+	 * Look at the pending power domain changes, and execute these.
+	 * Make sure group and schedulers are notified about changes.
+	 */
+
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	u32 power_down_mask;
+	u32 power_up_mask;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+
+#if defined(DEBUG)
+	++num_pm_updates;
+#endif
+
+	/* Hold PM state lock while we look at (and obey) the wanted state */
+	mali_pm_state_lock();
+
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	/* Figure out which cores we need to power on */
+	power_up_mask = pd_mask_wanted &
+			(pd_mask_wanted ^ pd_mask_current);
+
+	if (0 != power_up_mask) {
+		u32 power_up_mask_pmu;
+		struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_up = 0;
+		struct mali_l2_cache_core *
+			l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_up = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_up;
+#endif
+
+		/*
+		 * Make sure dummy/global domain is always included when
+		 * powering up, since this is controlled by runtime PM,
+		 * and device power is on at this stage.
+		 */
+		power_up_mask |= MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* Power up only real PMU domains */
+		power_up_mask_pmu = power_up_mask & ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* But not those that happen to be powered on already */
+		power_up_mask_pmu &= (power_up_mask ^ pmu_mask_current) &
+				     power_up_mask;
+
+		if (0 != power_up_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current |= power_up_mask_pmu;
+			mali_pmu_power_up(pmu, power_up_mask_pmu);
+		}
+
+		/*
+		 * Put the domains themselves in power up state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_up(power_up_mask,
+					groups_up, &num_groups_up,
+					l2_up, &num_l2_up);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/* Notify each L2 cache that we have be powered up */
+		for (i = 0; i < num_l2_up; i++) {
+			mali_l2_cache_power_up(l2_up[i]);
+		}
+
+		/*
+		 * Tell execution module about all the groups we have
+		 * powered up. Groups will be notified as a result of this.
+		 */
+		mali_executor_group_power_up(groups_up, num_groups_up);
+
+		/* Lock state again before checking for power down */
+		mali_pm_state_lock();
+	}
+
+	/* Figure out which cores we need to power off */
+	power_down_mask = pd_mask_current &
+			  (pd_mask_wanted ^ pd_mask_current);
+
+	/*
+	 * Never power down the dummy/global domain here. This is to be done
+	 * from a suspend request (since this domain is only physicall powered
+	 * down at that point)
+	 */
+	power_down_mask &= ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+	if (0 != power_down_mask) {
+		u32 power_down_mask_pmu;
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_down;
+#endif
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(power_down_mask,
+					  groups_down, &num_groups_down,
+					  l2_down, &num_l2_down);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (0 < num_groups_down) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (0 != power_down_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+
+		}
+	} else {
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		u32 power_down_mask_pmu;
+
+		/* No need for state lock since we'll only update PMU */
+		mali_pm_state_unlock();
+
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (0 != power_down_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+		}
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM update post: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+}
+
+static mali_bool mali_pm_common_suspend(void)
+{
+	mali_pm_state_lock();
+
+	if (0 != pd_mask_wanted) {
+		MALI_DEBUG_PRINT(5, ("PM: Aborting suspend operation\n\n\n"));
+		mali_pm_state_unlock();
+		return MALI_FALSE;
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	if (0 != pd_mask_current) {
+		/*
+		 * We have still some domains powered on.
+		 * It is for instance very normal that at least the
+		 * dummy/global domain is marked as powered on at this point.
+		 * (because it is physically powered on until this function
+		 * returns)
+		 */
+
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(pd_mask_current,
+					  groups_down,
+					  &num_groups_down,
+					  l2_down,
+					  &num_l2_down);
+
+		MALI_DEBUG_ASSERT(0 == pd_mask_current);
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (0 < num_groups_down) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		pmu_mask_current = 0;
+	} else {
+		MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+		mali_pm_state_unlock();
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current domain mask:  [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current PMU mask: ... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Group power stats: .. <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	return MALI_TRUE;
+}
+
+static void mali_pm_update_work(void *data)
+{
+	MALI_IGNORE(data);
+	mali_pm_update_sync();
+}
+
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void)
+{
+	int i;
+
+	/* Create all domains (including dummy domain) */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0x0 == domain_config[i]) continue;
+
+		if (NULL == mali_pm_domain_create(domain_config[i])) {
+			return _MALI_OSK_ERR_NOMEM;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_pm_set_default_pm_domain_config(void)
+{
+	MALI_DEBUG_ASSERT(0 != _mali_osk_resource_base_address());
+
+	/* GP core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_GP, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_GP] = 0x01;
+	}
+
+	/* PP0 - PP3 core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP0, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 2;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 1;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 0;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP1, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 3;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP2, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 4;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP3, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 5;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 1;
+		}
+	}
+
+	/* PP4 - PP7 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP4, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP4] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP5, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP5] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP6, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP6] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP7, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP7] = 0x01 << 3;
+	}
+
+	/* L2gp/L2PP0/L2PP4 */
+	if (mali_is_mali400()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI400_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 1;
+		}
+	} else if (mali_is_mali450()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 0;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 1;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE2, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L22] = 0x01 << 3;
+		}
+	} else if (mali_is_mali470()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI470_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 0;
+		}
+	}
+}
+
+static u32 mali_pm_get_registered_cores_mask(void)
+{
+	int i = 0;
+	u32 mask = 0;
+
+	for (i = 0; i < MALI_DOMAIN_INDEX_DUMMY; i++) {
+		mask |= domain_config[i];
+	}
+
+	return mask;
+}
+
+static void mali_pm_set_pmu_domain_config(void)
+{
+	int i = 0;
+
+	_mali_osk_device_data_pmu_config_get(domain_config, MALI_MAX_NUMBER_OF_DOMAINS - 1);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (0 != domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("Using customer pmu config:\n"));
+			break;
+		}
+	}
+
+	if (MALI_MAX_NUMBER_OF_DOMAINS - 1 == i) {
+		MALI_DEBUG_PRINT(2, ("Using hw detect pmu config:\n"));
+		mali_pm_set_default_pm_domain_config();
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("domain_config[%d] = 0x%x \n", i, domain_config[i]));
+		}
+	}
+	/* Can't override dummy domain mask */
+	domain_config[MALI_DOMAIN_INDEX_DUMMY] =
+		1 << MALI_DOMAIN_INDEX_DUMMY;
+}
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_DOMAINS + 1];
+	int bit;
+	int str_pos = 0;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (NULL != pm_lock_exec) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (bit = MALI_MAX_NUMBER_OF_DOMAINS - 1; bit >= 0; bit--) {
+		if (mask & (1 << bit)) {
+			bit_str[str_pos] = 'X';
+		} else {
+			bit_str[str_pos] = '-';
+		}
+		str_pos++;
+	}
+
+	bit_str[MALI_MAX_NUMBER_OF_DOMAINS] = '\0';
+
+	return bit_str;
+}
+
+const char *mali_pm_group_stats_to_string(void)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_GROUPS + 1];
+	u32 num_groups = mali_group_get_glob_num_groups();
+	u32 i;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (NULL != pm_lock_exec) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (i = 0; i < num_groups && i < MALI_MAX_NUMBER_OF_GROUPS; i++) {
+		struct mali_group *group;
+
+		group = mali_group_get_glob_group(i);
+
+		if (MALI_TRUE == mali_group_power_is_on(group)) {
+			bit_str[i] = 'X';
+		} else {
+			bit_str[i] = '-';
+		}
+	}
+
+	bit_str[i] = '\0';
+
+	return bit_str;
+}
+#endif
+
+/*
+ * num_pp is the number of PP cores which will be powered on given this mask
+ * cost is the total power cost of cores which will be powered on given this mask
+ */
+static void mali_pm_stat_from_mask(u32 mask, u32 *num_pp, u32 *cost)
+{
+	u32 i;
+
+	/* loop through all cores */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (!(domain_config[i] & mask)) {
+			continue;
+		}
+
+		switch (i) {
+		case MALI_DOMAIN_INDEX_GP:
+			*cost += MALI_GP_COST;
+
+			break;
+		case MALI_DOMAIN_INDEX_PP0: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP1: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP2: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP3:
+			if (mali_is_mali400()) {
+				if ((domain_config[MALI_DOMAIN_INDEX_L20] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L20])) {
+					*num_pp += 1;
+				}
+			} else {
+				if ((domain_config[MALI_DOMAIN_INDEX_L21] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L21])) {
+					*num_pp += 1;
+				}
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_PP4: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP5: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP6: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP7:
+			MALI_DEBUG_ASSERT(mali_is_mali450());
+
+			if ((domain_config[MALI_DOMAIN_INDEX_L22] & mask)
+			    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+				== domain_config[MALI_DOMAIN_INDEX_L22])) {
+				*num_pp += 1;
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_L20: /* Fall through */
+		case MALI_DOMAIN_INDEX_L21: /* Fall through */
+		case MALI_DOMAIN_INDEX_L22:
+			*cost += MALI_L2_COST;
+
+			break;
+		}
+	}
+}
+
+void mali_pm_power_cost_setup(void)
+{
+	/*
+	 * Two parallel arrays which store the best domain mask and its cost
+	 * The index is the number of PP cores, E.g. Index 0 is for 1 PP option,
+	 * might have mask 0x2 and with cost of 1, lower cost is better
+	 */
+	u32 best_mask[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	u32 best_cost[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	/* Array cores_in_domain is used to store the total pp cores in each pm domain. */
+	u32 cores_in_domain[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	/* Domain_count is used to represent the max domain we have.*/
+	u32 max_domain_mask = 0;
+	u32 max_domain_id = 0;
+	u32 always_on_pp_cores = 0;
+
+	u32 num_pp, cost, mask;
+	u32 i, j , k;
+
+	/* Initialize statistics */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) {
+		best_mask[i] = 0;
+		best_cost[i] = 0xFFFFFFFF; /* lower cost is better */
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1; i++) {
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			mali_pm_domain_power_cost_result[i][j] = 0;
+		}
+	}
+
+	/* Caculate number of pp cores of a given domain config. */
+	for (i = MALI_DOMAIN_INDEX_PP0; i <= MALI_DOMAIN_INDEX_PP7; i++) {
+		if (0 < domain_config[i]) {
+			/* Get the max domain mask value used to caculate power cost
+			 * and we don't count in always on pp cores. */
+			if (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i]
+			    && max_domain_mask < domain_config[i]) {
+				max_domain_mask = domain_config[i];
+			}
+
+			if (MALI_PM_DOMAIN_DUMMY_MASK == domain_config[i]) {
+				always_on_pp_cores++;
+			}
+		}
+	}
+	max_domain_id = _mali_osk_fls(max_domain_mask);
+
+	/*
+	 * Try all combinations of power domains and check how many PP cores
+	 * they have and their power cost.
+	 */
+	for (mask = 0; mask < (1 << max_domain_id); mask++) {
+		num_pp = 0;
+		cost = 0;
+
+		mali_pm_stat_from_mask(mask, &num_pp, &cost);
+
+		/* This mask is usable for all MP1 up to num_pp PP cores, check statistics for all */
+		for (i = 0; i < num_pp; i++) {
+			if (best_cost[i] >= cost) {
+				best_cost[i] = cost;
+				best_mask[i] = mask;
+			}
+		}
+	}
+
+	/*
+	 * If we want to enable x pp cores, if x is less than number of always_on pp cores,
+	 * all of pp cores we will enable must be always_on pp cores.
+	 */
+	for (i = 0; i < mali_executor_get_num_cores_total(); i++) {
+		if (i < always_on_pp_cores) {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= i + 1;
+		} else {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= always_on_pp_cores;
+		}
+	}
+
+	/* In this loop, variable i represent for the number of non-always on pp cores we want to enabled. */
+	for (i = 0; i < (mali_executor_get_num_cores_total() - always_on_pp_cores); i++) {
+		if (best_mask[i] == 0) {
+			/* This MP variant is not available */
+			continue;
+		}
+
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			cores_in_domain[j] = 0;
+		}
+
+		for (j = MALI_DOMAIN_INDEX_PP0; j <= MALI_DOMAIN_INDEX_PP7; j++) {
+			if (0 < domain_config[j]
+			    && (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i])) {
+				cores_in_domain[_mali_osk_fls(domain_config[j]) - 1]++;
+			}
+		}
+
+		/* In this loop, j represent for the number we have already enabled.*/
+		for (j = 0; j <= i;) {
+			/* j used to visit all of domain to get the number of pp cores remained in it. */
+			for (k = 0; k < max_domain_id; k++) {
+				/* If domain k in best_mask[i] is enabled and this domain has extra pp cores,
+				 * we know we must pick at least one pp core from this domain.
+				 * And then we move to next enabled pm domain. */
+				if ((best_mask[i] & (0x1 << k)) && (0 < cores_in_domain[k])) {
+					cores_in_domain[k]--;
+					mali_pm_domain_power_cost_result[always_on_pp_cores + i + 1][k]++;
+					j++;
+					if (j > i) {
+						break;
+					}
+				}
+			}
+		}
+	}
+}
+
+/*
+ * When we are doing core scaling,
+ * this function is called to return the best mask to
+ * achieve the best pp group power cost.
+ */
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst)
+{
+	MALI_DEBUG_ASSERT((mali_executor_get_num_cores_total() >= num_requested) && (0 <= num_requested));
+
+	_mali_osk_memcpy(dst, mali_pm_domain_power_cost_result[num_requested], MALI_MAX_NUMBER_OF_DOMAINS * sizeof(int));
+}
+
+u32 mali_pm_get_current_mask(void)
+{
+	return pd_mask_current;
+}
+
+u32 mali_pm_get_wanted_mask(void)
+{
+	return pd_mask_wanted;
+}
diff --git a/utgard/r6p2/common/mali_pm.h b/utgard/r6p2/common/mali_pm.h
new file mode 100755
index 0000000..3c11977
--- /dev/null
+++ b/utgard/r6p2/common/mali_pm.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_H__
+#define __MALI_PM_H__
+
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+
+#define MALI_DOMAIN_INDEX_GP        0
+#define MALI_DOMAIN_INDEX_PP0       1
+#define MALI_DOMAIN_INDEX_PP1       2
+#define MALI_DOMAIN_INDEX_PP2       3
+#define MALI_DOMAIN_INDEX_PP3       4
+#define MALI_DOMAIN_INDEX_PP4       5
+#define MALI_DOMAIN_INDEX_PP5       6
+#define MALI_DOMAIN_INDEX_PP6       7
+#define MALI_DOMAIN_INDEX_PP7       8
+#define MALI_DOMAIN_INDEX_L20       9
+#define MALI_DOMAIN_INDEX_L21      10
+#define MALI_DOMAIN_INDEX_L22      11
+/*
+ * The dummy domain is used when there is no physical power domain
+ * (e.g. no PMU or always on cores)
+ */
+#define MALI_DOMAIN_INDEX_DUMMY    12
+#define MALI_MAX_NUMBER_OF_DOMAINS 13
+
+/**
+ * Initialize the Mali PM module
+ *
+ * PM module covers Mali PM core, PM domains and Mali PMU
+ */
+_mali_osk_errcode_t mali_pm_initialize(void);
+
+/**
+ * Terminate the Mali PM module
+ */
+void mali_pm_terminate(void);
+
+void mali_pm_exec_lock(void);
+void mali_pm_exec_unlock(void);
+
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache);
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group);
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains);
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains);
+
+void mali_pm_init_begin(void);
+void mali_pm_init_end(void);
+
+void mali_pm_update_sync(void);
+void mali_pm_update_async(void);
+
+/* Callback functions for system power management */
+void mali_pm_os_suspend(mali_bool os_suspend);
+void mali_pm_os_resume(void);
+
+mali_bool mali_pm_runtime_suspend(void);
+void mali_pm_runtime_resume(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size);
+#endif
+
+void mali_pm_power_cost_setup(void);
+
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst);
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+#endif
+
+u32 mali_pm_get_current_mask(void);
+u32 mali_pm_get_wanted_mask(void);
+#endif /* __MALI_PM_H__ */
diff --git a/utgard/r6p2/common/mali_pm_domain.c b/utgard/r6p2/common/mali_pm_domain.c
new file mode 100755
index 0000000..9db8488
--- /dev/null
+++ b/utgard/r6p2/common/mali_pm_domain.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+
+static struct mali_pm_domain *mali_pm_domains[MALI_MAX_NUMBER_OF_DOMAINS] =
+{ NULL, };
+
+void mali_pm_domain_initialize(void)
+{
+	/* Domains will be initialized/created on demand */
+}
+
+void mali_pm_domain_terminate(void)
+{
+	int i;
+
+	/* Delete all domains that has been created */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		mali_pm_domain_delete(mali_pm_domains[i]);
+		mali_pm_domains[i] = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask)
+{
+	struct mali_pm_domain *domain = NULL;
+	u32 domain_id = 0;
+
+	domain = mali_pm_domain_get_from_mask(pmu_mask);
+	if (NULL != domain) return domain;
+
+	MALI_DEBUG_PRINT(2,
+			 ("Mali PM domain: Creating Mali PM domain (mask=0x%08X)\n",
+			  pmu_mask));
+
+	domain = (struct mali_pm_domain *)_mali_osk_malloc(
+			 sizeof(struct mali_pm_domain));
+	if (NULL != domain) {
+		domain->power_is_on = MALI_FALSE;
+		domain->pmu_mask = pmu_mask;
+		domain->use_count = 0;
+		_mali_osk_list_init(&domain->group_list);
+		_mali_osk_list_init(&domain->l2_cache_list);
+
+		domain_id = _mali_osk_fls(pmu_mask) - 1;
+		/* Verify the domain_id */
+		MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > domain_id);
+		/* Verify that pmu_mask only one bit is set */
+		MALI_DEBUG_ASSERT((1 << domain_id) == pmu_mask);
+		mali_pm_domains[domain_id] = domain;
+
+		return domain;
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Unable to create PM domain\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pm_domain_delete(struct mali_pm_domain *domain)
+{
+	if (NULL == domain) {
+		return;
+	}
+
+	_mali_osk_list_delinit(&domain->group_list);
+	_mali_osk_list_delinit(&domain->l2_cache_list);
+
+	_mali_osk_free(domain);
+}
+
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	/*
+	 * Use addtail because virtual group is created last and it needs
+	 * to be at the end of the list (in order to be activated after
+	 * all children.
+	 */
+	_mali_osk_list_addtail(&group->pm_domain_list, &domain->group_list);
+}
+
+void mali_pm_domain_add_l2_cache(struct mali_pm_domain *domain,
+				 struct mali_l2_cache_core *l2_cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(l2_cache);
+	_mali_osk_list_add(&l2_cache->pm_domain_list, &domain->l2_cache_list);
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask)
+{
+	u32 id = 0;
+
+	if (0 == mask) {
+		return NULL;
+	}
+
+	id = _mali_osk_fls(mask) - 1;
+
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == mask);
+
+	return mali_pm_domains[id];
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id)
+{
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+
+	return mali_pm_domains[id];
+}
+
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	if (0 == domain->use_count) {
+		_mali_osk_pm_dev_ref_get_async();
+	}
+
+	++domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_get, use_count => %u\n", domain, domain->use_count));
+
+	/* Return our mask so caller can check this against wanted mask */
+	return domain->pmu_mask;
+}
+
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	--domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_put, use_count => %u\n", domain, domain->use_count));
+
+	if (0 == domain->use_count) {
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	/*
+	 * Return the PMU mask which now could be be powered down
+	 * (the bit for this domain).
+	 * This is the responsibility of the caller (mali_pm)
+	 */
+	return (0 == domain->use_count ? domain->pmu_mask : 0);
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain)
+{
+	u32 id = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT(0 != domain->pmu_mask);
+
+	id = _mali_osk_fls(domain->pmu_mask) - 1;
+
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == domain->pmu_mask);
+	/* Verify that we have stored the domain at right id/index */
+	MALI_DEBUG_ASSERT(domain == mali_pm_domains[id]);
+
+	return id;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void)
+{
+	int i;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (NULL == mali_pm_domains[i]) {
+			/* Nothing to check */
+			continue;
+		}
+
+		if (MALI_TRUE == mali_pm_domains[i]->power_is_on) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+
+		if (0 != mali_pm_domains[i]->use_count) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+	}
+
+	return MALI_TRUE;
+}
+#endif
diff --git a/utgard/r6p2/common/mali_pm_domain.h b/utgard/r6p2/common/mali_pm_domain.h
new file mode 100755
index 0000000..2ac8c0d
--- /dev/null
+++ b/utgard/r6p2/common/mali_pm_domain.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_DOMAIN_H__
+#define __MALI_PM_DOMAIN_H__
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#include "mali_l2_cache.h"
+#include "mali_group.h"
+#include "mali_pmu.h"
+
+/* Instances are protected by PM state lock */
+struct mali_pm_domain {
+	mali_bool power_is_on;
+	s32 use_count;
+	u32 pmu_mask;
+
+	/* Zero or more groups can belong to this domain */
+	_mali_osk_list_t group_list;
+
+	/* Zero or more L2 caches can belong to this domain */
+	_mali_osk_list_t l2_cache_list;
+};
+
+
+void mali_pm_domain_initialize(void);
+void mali_pm_domain_terminate(void);
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask);
+void mali_pm_domain_delete(struct mali_pm_domain *domain);
+
+void mali_pm_domain_add_l2_cache(
+	struct mali_pm_domain *domain,
+	struct mali_l2_cache_core *l2_cache);
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group);
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask);
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id);
+
+/* Ref counting */
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain);
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_group_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->group_list;
+}
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_l2_cache_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->l2_cache_list;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pm_domain_power_is_on(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->power_is_on;
+}
+
+MALI_STATIC_INLINE void mali_pm_domain_set_power_on(
+	struct mali_pm_domain *domain,
+	mali_bool power_is_on)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	domain->power_is_on = power_is_on;
+}
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_use_count(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->use_count;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_mask(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->pmu_mask;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void);
+#endif
+
+#endif /* __MALI_PM_DOMAIN_H__ */
diff --git a/utgard/r6p2/common/mali_pm_metrics.c b/utgard/r6p2/common/mali_pm_metrics.c
new file mode 100644
index 0000000..981ec81
--- /dev/null
+++ b/utgard/r6p2/common/mali_pm_metrics.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_pm_metrics.h"
+#include "mali_osk_locks.h"
+#include "mali_osk_mali.h"
+#include <linux/ktime.h>
+
+#define MALI_PM_TIME_SHIFT 0
+#define MALI_UTILIZATION_MAX_PERIOD 80000000/* ns = 100ms */
+
+_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev)
+{
+	int i = 0;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	mdev->mali_metrics.time_period_start = ktime_get();
+	mdev->mali_metrics.time_period_start_gp = mdev->mali_metrics.time_period_start;
+	mdev->mali_metrics.time_period_start_pp = mdev->mali_metrics.time_period_start;
+
+	mdev->mali_metrics.time_busy = 0;
+	mdev->mali_metrics.time_idle = 0;
+	mdev->mali_metrics.prev_busy = 0;
+	mdev->mali_metrics.prev_idle = 0;
+	mdev->mali_metrics.num_running_gp_cores = 0;
+	mdev->mali_metrics.num_running_pp_cores = 0;
+	mdev->mali_metrics.time_busy_gp = 0;
+	mdev->mali_metrics.time_idle_gp = 0;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) {
+		mdev->mali_metrics.time_busy_pp[i] = 0;
+		mdev->mali_metrics.time_idle_pp[i] = 0;
+	}
+	mdev->mali_metrics.gpu_active = MALI_FALSE;
+
+	mdev->mali_metrics.lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST);
+	if (NULL == mdev->mali_metrics.lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pm_metrics_term(struct mali_device *mdev)
+{
+	_mali_osk_spinlock_irq_term(mdev->mali_metrics.lock);
+}
+
+/*caller needs to hold mdev->mali_metrics.lock before calling this function*/
+void mali_pm_record_job_status(struct mali_device *mdev)
+{
+	ktime_t now;
+	ktime_t diff;
+	u64 ns_time;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	now = ktime_get();
+	diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+
+	ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	mdev->mali_metrics.time_busy += ns_time;
+	mdev->mali_metrics.time_period_start = now;
+}
+
+void mali_pm_record_gpu_idle(mali_bool is_gp)
+{
+	ktime_t now;
+	ktime_t diff;
+	u64 ns_time;
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	now = ktime_get();
+
+	if (MALI_TRUE == is_gp) {
+		--mdev->mali_metrics.num_running_gp_cores;
+		if (0 == mdev->mali_metrics.num_running_gp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp);
+			ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_busy_gp += ns_time;
+			mdev->mali_metrics.time_period_start_gp = now;
+
+			if (0 == mdev->mali_metrics.num_running_pp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_busy += ns_time;
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_FALSE;
+			}
+		}
+	} else {
+		--mdev->mali_metrics.num_running_pp_cores;
+		if (0 == mdev->mali_metrics.num_running_pp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp);
+			ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_busy_pp[0] += ns_time;
+			mdev->mali_metrics.time_period_start_pp = now;
+
+			if (0 == mdev->mali_metrics.num_running_gp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_busy += ns_time;
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_FALSE;
+			}
+		}
+	}
+
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_record_gpu_active(mali_bool is_gp)
+{
+	ktime_t now;
+	ktime_t diff;
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	now = ktime_get();
+
+	if (MALI_TRUE == is_gp) {
+		mdev->mali_metrics.num_running_gp_cores++;
+		if (1 == mdev->mali_metrics.num_running_gp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp);
+			mdev->mali_metrics.time_idle_gp += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_period_start_gp = now;
+			if (0 == mdev->mali_metrics.num_running_pp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+		}
+	} else {
+		mdev->mali_metrics.num_running_pp_cores++;
+		if (1 == mdev->mali_metrics.num_running_pp_cores) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp);
+			mdev->mali_metrics.time_idle_pp[0] += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_period_start_pp = now;
+			if (0 == mdev->mali_metrics.num_running_gp_cores) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+		}
+	}
+
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+
+/*caller needs to hold mdev->mali_metrics.lock before calling this function*/
+static void mali_pm_get_dvfs_utilisation_calc(struct mali_device *mdev, ktime_t now)
+{
+	ktime_t diff;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+
+	if (mdev->mali_metrics.gpu_active) {
+		mdev->mali_metrics.time_busy += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	} else {
+		mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	}
+}
+
+/* Caller needs to hold mdev->mali_metrics.lock before calling this function. */
+static void mali_pm_reset_dvfs_utilisation_unlocked(struct mali_device *mdev, ktime_t now)
+{
+	/* Store previous value */
+	mdev->mali_metrics.prev_idle = mdev->mali_metrics.time_idle;
+	mdev->mali_metrics.prev_busy = mdev->mali_metrics.time_busy;
+
+	/* Reset current values */
+	mdev->mali_metrics.time_period_start = now;
+	mdev->mali_metrics.time_period_start_gp = now;
+	mdev->mali_metrics.time_period_start_pp = now;
+	mdev->mali_metrics.time_idle = 0;
+	mdev->mali_metrics.time_busy = 0;
+
+	mdev->mali_metrics.time_busy_gp = 0;
+	mdev->mali_metrics.time_idle_gp = 0;
+	mdev->mali_metrics.time_busy_pp[0] = 0;
+	mdev->mali_metrics.time_idle_pp[0] = 0;
+}
+
+void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev)
+{
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	mali_pm_reset_dvfs_utilisation_unlocked(mdev, ktime_get());
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_get_dvfs_utilisation(struct mali_device *mdev,
+				  unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	u64 busy = 0;
+	u64 total = 0;
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+
+	mali_pm_get_dvfs_utilisation_calc(mdev, now);
+
+	busy = mdev->mali_metrics.time_busy;
+	total = busy + mdev->mali_metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		mali_pm_reset_dvfs_utilisation_unlocked(mdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += mdev->mali_metrics.prev_idle +
+			 mdev->mali_metrics.prev_busy;
+		busy += mdev->mali_metrics.prev_busy;
+	}
+
+	*total_out = (unsigned long)total;
+	*busy_out = (unsigned long)busy;
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_metrics_spin_lock(void)
+{
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_metrics_spin_unlock(void)
+{
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
diff --git a/utgard/r6p2/common/mali_pm_metrics.h b/utgard/r6p2/common/mali_pm_metrics.h
new file mode 100644
index 0000000..256f448
--- /dev/null
+++ b/utgard/r6p2/common/mali_pm_metrics.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_METRICS_H__
+#define __MALI_PM_METRICS_H__
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include "mali_osk_locks.h"
+#include "mali_group.h"
+
+struct mali_device;
+
+/**
+ * Metrics data collected for use by the power management framework.
+ */
+struct mali_pm_metrics_data {
+	ktime_t time_period_start;
+	u64 time_busy;
+	u64 time_idle;
+	u64 prev_busy;
+	u64 prev_idle;
+	u32 num_running_gp_cores;
+	u32 num_running_pp_cores;
+	ktime_t time_period_start_gp;
+	u64 time_busy_gp;
+	u64 time_idle_gp;
+	ktime_t time_period_start_pp;
+	u64 time_busy_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	u64 time_idle_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	mali_bool gpu_active;
+	_mali_osk_spinlock_irq_t *lock;
+};
+
+/**
+ * Initialize/start the Mali GPU pm_metrics metrics reporting.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev);
+
+/**
+ * Terminate the Mali GPU pm_metrics metrics reporting
+ */
+void mali_pm_metrics_term(struct mali_device *mdev);
+
+/**
+ * Should be called when a job is about to execute a GPU job
+ */
+void mali_pm_record_gpu_active(mali_bool is_gp);
+
+/**
+ * Should be called when a job is finished
+ */
+void mali_pm_record_gpu_idle(mali_bool is_gp);
+
+void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev);
+
+void mali_pm_get_dvfs_utilisation(struct mali_device *mdev, unsigned long *total_out, unsigned long *busy_out);
+
+void mali_pm_metrics_spin_lock(void);
+
+void mali_pm_metrics_spin_unlock(void);
+#else
+void mali_pm_record_gpu_idle(mali_bool is_gp) {}
+void mali_pm_record_gpu_active(mali_bool is_gp) {}
+#endif
+#endif /* __MALI_PM_METRICS_H__ */
diff --git a/utgard/r6p2/common/mali_pmu.c b/utgard/r6p2/common/mali_pmu.c
new file mode 100755
index 0000000..bf0d413
--- /dev/null
+++ b/utgard/r6p2/common/mali_pmu.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu.c
+ * Mali driver functions for Mali 400 PMU hardware
+ */
+#include "mali_hw_core.h"
+#include "mali_pmu.h"
+#include "mali_pp.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm.h"
+#include "mali_osk_mali.h"
+
+struct mali_pmu_core *mali_global_pmu_core = NULL;
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu);
+
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource)
+{
+	struct mali_pmu_core *pmu;
+
+	MALI_DEBUG_ASSERT(NULL == mali_global_pmu_core);
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Creating Mali PMU core\n"));
+
+	pmu = (struct mali_pmu_core *)_mali_osk_malloc(
+		      sizeof(struct mali_pmu_core));
+	if (NULL != pmu) {
+		pmu->registered_cores_mask = 0; /* to be set later */
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&pmu->hw_core,
+				resource, PMU_REGISTER_ADDRESS_SPACE_SIZE)) {
+
+			pmu->switch_delay = _mali_osk_get_pmu_switch_delay();
+
+			mali_global_pmu_core = pmu;
+
+			return pmu;
+		}
+		_mali_osk_free(pmu);
+	}
+
+	return NULL;
+}
+
+void mali_pmu_delete(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu == mali_global_pmu_core);
+
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Deleting Mali PMU core\n"));
+
+	mali_global_pmu_core = NULL;
+
+	mali_hw_core_delete(&pmu->hw_core);
+	_mali_osk_free(pmu);
+}
+
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask)
+{
+	pmu->registered_cores_mask = mask;
+}
+
+void mali_pmu_reset(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	/* Setup the desired defaults */
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_MASK, 0);
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_SW_DELAY, pmu->switch_delay);
+}
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	mali_pmu_reset(pmu);
+
+	/* Now simply power up the domains which are marked as powered down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_up(pmu, stat);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	/* Now simply power down the domains which are marked as powered up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_down(pmu, (~stat) & pmu->registered_cores_mask);
+
+	mali_pm_exec_unlock();
+}
+
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power down: ...................... [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+
+	/*
+	 * Assert that we are not powering down domains which are already
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+
+	mask  &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY);
+
+	if (0 == mask || 0 == ((~stat) & mask)) return _MALI_OSK_ERR_OK;
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_DOWN, mask);
+
+	/*
+	 * Do not wait for interrupt on Mali-300/400 if all domains are
+	 * powered off by our power down command, because the HW will simply
+	 * not generate an interrupt in this case.
+	 */
+	if (mali_is_mali450() || mali_is_mali470() || pmu->registered_cores_mask != (mask | stat)) {
+		err = mali_pmu_wait_for_command_finish(pmu);
+		if (_MALI_OSK_ERR_OK != err) {
+			return err;
+		}
+	} else {
+		mali_hw_core_register_write(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+	}
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+#endif
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+#if !defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	u32 current_domain;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power up: ........................ [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	stat &= pmu->registered_cores_mask;
+
+	mask  &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY);
+	if (0 == mask || 0 == (stat & mask)) return _MALI_OSK_ERR_OK;
+
+	/*
+	 * Assert that we are only powering up domains which are currently
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+
+#if defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_UP, mask);
+
+	err = mali_pmu_wait_for_command_finish(pmu);
+	if (_MALI_OSK_ERR_OK != err) {
+		return err;
+	}
+#else
+	for (current_domain = 1;
+	     current_domain <= pmu->registered_cores_mask;
+	     current_domain <<= 1) {
+		if (current_domain & mask & stat) {
+			mali_hw_core_register_write(&pmu->hw_core,
+						    PMU_REG_ADDR_MGMT_POWER_UP,
+						    current_domain);
+
+			err = mali_pmu_wait_for_command_finish(pmu);
+			if (_MALI_OSK_ERR_OK != err) {
+				return err;
+			}
+		}
+	}
+#endif
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+#endif /* defined(DEBUG) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu)
+{
+	u32 rawstat;
+	u32 timeout = MALI_REG_POLL_COUNT_SLOW;
+
+	MALI_DEBUG_ASSERT(pmu);
+
+	/* Wait for the command to complete */
+	do {
+		rawstat = mali_hw_core_register_read(&pmu->hw_core,
+						     PMU_REG_ADDR_MGMT_INT_RAWSTAT);
+		--timeout;
+	} while (0 == (rawstat & PMU_REG_VAL_IRQ) && 0 < timeout);
+
+	MALI_DEBUG_ASSERT(0 < timeout);
+
+	if (0 == timeout) {
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p2/common/mali_pmu.h b/utgard/r6p2/common/mali_pmu.h
new file mode 100755
index 0000000..36ff58e
--- /dev/null
+++ b/utgard/r6p2/common/mali_pmu.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_platform.h
+ * Platform specific Mali driver functions
+ */
+
+#ifndef __MALI_PMU_H__
+#define __MALI_PMU_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_hw_core.h"
+
+/** @brief MALI inbuilt PMU hardware info and PMU hardware has knowledge of cores power mask
+ */
+struct mali_pmu_core {
+	struct mali_hw_core hw_core;
+	u32 registered_cores_mask;
+	u32 switch_delay;
+};
+
+/** @brief Register layout for hardware PMU
+ */
+typedef enum {
+	PMU_REG_ADDR_MGMT_POWER_UP                  = 0x00,     /*< Power up register */
+	PMU_REG_ADDR_MGMT_POWER_DOWN                = 0x04,     /*< Power down register */
+	PMU_REG_ADDR_MGMT_STATUS                    = 0x08,     /*< Core sleep status register */
+	PMU_REG_ADDR_MGMT_INT_MASK                  = 0x0C,     /*< Interrupt mask register */
+	PMU_REG_ADDR_MGMT_INT_RAWSTAT               = 0x10,     /*< Interrupt raw status register */
+	PMU_REG_ADDR_MGMT_INT_CLEAR                 = 0x18,     /*< Interrupt clear register */
+	PMU_REG_ADDR_MGMT_SW_DELAY                  = 0x1C,     /*< Switch delay register */
+	PMU_REGISTER_ADDRESS_SPACE_SIZE             = 0x28,     /*< Size of register space */
+} pmu_reg_addr_mgmt_addr;
+
+#define PMU_REG_VAL_IRQ 1
+
+extern struct mali_pmu_core *mali_global_pmu_core;
+
+/** @brief Initialisation of MALI PMU
+ *
+ * This is called from entry point of the driver in order to create and intialize the PMU resource
+ *
+ * @param resource it will be a pointer to a PMU resource
+ * @param number_of_pp_cores Number of found PP resources in configuration
+ * @param number_of_l2_caches Number of found L2 cache resources in configuration
+ * @return The created PMU object, or NULL in case of failure.
+ */
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource);
+
+/** @brief It deallocates the PMU resource
+ *
+ * This is called on the exit of the driver to terminate the PMU resource
+ *
+ * @param pmu Pointer to PMU core object to delete
+ */
+void mali_pmu_delete(struct mali_pmu_core *pmu);
+
+/** @brief Set registered cores mask
+ *
+ * @param pmu Pointer to PMU core object
+ * @param mask All available/valid domain bits
+ */
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief Retrieves the Mali PMU core object (if any)
+ *
+ * @return The Mali PMU object, or NULL if no PMU exists.
+ */
+MALI_STATIC_INLINE struct mali_pmu_core *mali_pmu_get_global_pmu_core(void)
+{
+	return mali_global_pmu_core;
+}
+
+/** @brief Reset PMU core
+ *
+ * @param pmu Pointer to PMU core object to reset
+ */
+void mali_pmu_reset(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu);
+
+/** @brief Returns a mask of the currently powered up domains
+ *
+ * @param pmu Pointer to PMU core object
+ */
+MALI_STATIC_INLINE u32 mali_pmu_get_mask(struct mali_pmu_core *pmu)
+{
+	u32 stat = mali_hw_core_register_read(&pmu->hw_core, PMU_REG_ADDR_MGMT_STATUS);
+	return ((~stat) & pmu->registered_cores_mask);
+}
+
+/** @brief MALI GPU power down using MALI in-built PMU
+ *
+ * Called to power down the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power down
+ * @param mask Mask specifying which power domains to power down
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief MALI GPU power up using MALI in-built PMU
+ *
+ * Called to power up the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power up
+ * @param mask Mask specifying which power domains to power up
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask);
+
+#endif /* __MALI_PMU_H__ */
diff --git a/utgard/r6p2/common/mali_pp.c b/utgard/r6p2/common/mali_pp.c
new file mode 100755
index 0000000..e1cd470
--- /dev/null
+++ b/utgard/r6p2/common/mali_pp.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp_job.h"
+#include "mali_pp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+#include <mali_platform.h>
+
+/* Number of frame registers on Mali-200 */
+#define MALI_PP_MALI200_NUM_FRAME_REGISTERS ((0x04C/4)+1)
+/* Number of frame registers on Mali-300 and later */
+#define MALI_PP_MALI400_NUM_FRAME_REGISTERS ((0x058/4)+1)
+
+static struct mali_pp_core *mali_global_pp_cores[MALI_MAX_NUMBER_OF_PP_CORES] = { NULL };
+static u32 mali_global_num_pp_cores = 0;
+
+/* Interrupt handlers */
+static void mali_pp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id)
+{
+	struct mali_pp_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali PP: Creating Mali PP core: %s\n", resource->description));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Base address of PP core: 0x%x\n", resource->base));
+
+	if (mali_global_num_pp_cores >= MALI_MAX_NUMBER_OF_PP_CORES) {
+		MALI_PRINT_ERROR(("Mali PP: Too many PP core objects created\n"));
+		return NULL;
+	}
+
+	core = _mali_osk_calloc(1, sizeof(struct mali_pp_core));
+	if (NULL != core) {
+		core->core_id = mali_global_num_pp_cores;
+		core->bcast_id = bcast_id;
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI200_REG_SIZEOF_REGISTER_BANK)) {
+			_mali_osk_errcode_t ret;
+
+			if (!is_virtual) {
+				ret = mali_pp_reset(core);
+			} else {
+				ret = _MALI_OSK_ERR_OK;
+			}
+
+			if (_MALI_OSK_ERR_OK == ret) {
+				ret = mali_group_add_pp_core(group, core);
+				if (_MALI_OSK_ERR_OK == ret) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					MALI_DEBUG_ASSERT(!is_virtual || -1 != resource->irq);
+
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_pp,
+								       group,
+								       mali_pp_irq_probe_trigger,
+								       mali_pp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (NULL != core->irq) {
+						mali_global_pp_cores[mali_global_num_pp_cores] = core;
+						mali_global_num_pp_cores++;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali PP: Failed to setup interrupt handlers for PP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_pp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali PP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali PP: Failed to allocate memory for PP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pp_delete(struct mali_pp_core *core)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+
+	/* Remove core from global list */
+	for (i = 0; i < mali_global_num_pp_cores; i++) {
+		if (mali_global_pp_cores[i] == core) {
+			mali_global_pp_cores[i] = NULL;
+			mali_global_num_pp_cores--;
+
+			if (i != mali_global_num_pp_cores) {
+				/* We removed a PP core from the middle of the array -- move the last
+				 * PP core to the current position to close the gap */
+				mali_global_pp_cores[i] = mali_global_pp_cores[mali_global_num_pp_cores];
+				mali_global_pp_cores[mali_global_num_pp_cores] = NULL;
+			}
+
+			break;
+		}
+	}
+
+	_mali_osk_free(core);
+}
+
+void mali_pp_stop_bus(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	/* Will only send the stop bus command, and not wait for it to complete */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_pp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS) & MALI200_REG_VAL_STATUS_BUS_STOPPED)
+			break;
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to stop bus on %s. Status: 0x%08x\n", core->hw_core.description, mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Frame register reset values.
+ * Taken from the Mali400 TRM, 3.6. Pixel processor control register summary */
+static const u32 mali_frame_registers_reset_values[_MALI_PP_MAX_FRAME_REGISTERS] = {
+	0x0, /* Renderer List Address Register */
+	0x0, /* Renderer State Word Base Address Register */
+	0x0, /* Renderer Vertex Base Register */
+	0x2, /* Feature Enable Register */
+	0x0, /* Z Clear Value Register */
+	0x0, /* Stencil Clear Value Register */
+	0x0, /* ABGR Clear Value 0 Register */
+	0x0, /* ABGR Clear Value 1 Register */
+	0x0, /* ABGR Clear Value 2 Register */
+	0x0, /* ABGR Clear Value 3 Register */
+	0x0, /* Bounding Box Left Right Register */
+	0x0, /* Bounding Box Bottom Register */
+	0x0, /* FS Stack Address Register */
+	0x0, /* FS Stack Size and Initial Value Register */
+	0x0, /* Reserved */
+	0x0, /* Reserved */
+	0x0, /* Origin Offset X Register */
+	0x0, /* Origin Offset Y Register */
+	0x75, /* Subpixel Specifier Register */
+	0x0, /* Tiebreak mode Register */
+	0x0, /* Polygon List Format Register */
+	0x0, /* Scaling Register */
+	0x0 /* Tilebuffer configuration Register */
+};
+
+/* WBx register reset values */
+static const u32 mali_wb_registers_reset_values[_MALI_PP_MAX_WB_REGISTERS] = {
+	0x0, /* WBx Source Select Register */
+	0x0, /* WBx Target Address Register */
+	0x0, /* WBx Target Pixel Format Register */
+	0x0, /* WBx Target AA Format Register */
+	0x0, /* WBx Target Layout */
+	0x0, /* WBx Target Scanline Length */
+	0x0, /* WBx Target Flags Register */
+	0x0, /* WBx MRT Enable Register */
+	0x0, /* WBx MRT Offset Register */
+	0x0, /* WBx Global Test Enable Register */
+	0x0, /* WBx Global Test Reference Value Register */
+	0x0  /* WBx Global Test Compare Function Register */
+};
+
+/* Performance Counter 0 Enable Register reset value */
+static const u32 mali_perf_cnt_enable_reset_value = 0;
+
+extern int pp_hardware_reset;
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core)
+{
+	/* Bus must be stopped before calling this function */
+	const u32 reset_wait_target_register = MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(2, ("Mali PP: Hard reset of core %s\n", core->hw_core.description));
+	pp_hardware_reset ++;
+
+	/* Set register to a bogus value. The register will be used to detect when reset is complete */
+	mali_hw_core_register_write_relaxed(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+
+	/* Force core to reset */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET);
+
+	/* Wait for reset to be complete */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali PP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, 0x00000000); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pp_reset_async(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET);
+}
+
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		u32 status =  mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+		if (!(status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE)) {
+			rawstat = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+			if (rawstat == MALI400PP_REG_VAL_IRQ_RESET_COMPLETED) {
+				break;
+			}
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core)
+{
+	mali_pp_reset_async(core);
+	return mali_pp_reset_wait(core);
+}
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual)
+{
+	u32 relative_address;
+	u32 start_index;
+	u32 nr_of_regs;
+	u32 *frame_registers = mali_pp_job_get_frame_registers(job);
+	u32 *wb0_registers = mali_pp_job_get_wb0_registers(job);
+	u32 *wb1_registers = mali_pp_job_get_wb1_registers(job);
+	u32 *wb2_registers = mali_pp_job_get_wb2_registers(job);
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, sub_job);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Write frame registers */
+
+	/*
+	 * There are two frame registers which are different for each sub job:
+	 * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME)
+	 * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK)
+	 */
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]);
+
+	/* For virtual jobs, the stack address shouldn't be broadcast but written individually */
+	if (!mali_pp_job_is_virtual(job) || restart_virtual) {
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]);
+	}
+
+	/* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */
+	relative_address = MALI200_REG_ADDR_RSW;
+	start_index = MALI200_REG_ADDR_RSW / sizeof(u32);
+	nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* MALI200_REG_ADDR_STACK_SIZE */
+	relative_address = MALI200_REG_ADDR_STACK_SIZE;
+	start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32);
+
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core,
+			relative_address, frame_registers[start_index],
+			mali_frame_registers_reset_values[start_index]);
+
+	/* Skip 2 reserved registers */
+
+	/* Write remaining registers */
+	relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X;
+	start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+	nr_of_regs = MALI_PP_MALI400_NUM_FRAME_REGISTERS - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* Write WBx registers */
+	if (wb0_registers[0]) { /* M200_WB0_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb1_registers[0]) { /* M200_WB1_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb2_registers[0]) { /* M200_WB2_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+
+#ifdef CONFIG_MALI400_HEATMAPS_ENABLED
+	if (job->uargs.perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_CONTR, ((job->uargs.tilesx & 0x3FF) << 16) | 1);
+		mali_hw_core_register_write_relaxed(&core->hw_core,  MALI200_REG_ADDR_MGMT_PERFMON_BASE, job->uargs.heatmap_mem & 0xFFFFFFF8);
+	}
+#endif /* CONFIG_MALI400_HEATMAPS_ENABLED */
+
+	MALI_DEBUG_PRINT(3, ("Mali PP: Starting job 0x%08X part %u/%u on PP core %s\n", job, sub_job + 1, mali_pp_job_get_sub_job_count(job), core->hw_core.description));
+
+	/* Adding barrier to make sure all rester writes are finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_PP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING);
+#else
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_END_OF_FRAME);
+#endif
+
+	/* Adding barrier to make sure previous rester writes is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index)
+{
+	if (mali_global_num_pp_cores > index) {
+		return mali_global_pp_cores[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_pp_get_glob_num_pp_cores(void)
+{
+	return mali_global_num_pp_cores;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_pp_irq_probe_trigger(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_BUS_ERROR);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS);
+	if (MALI200_REG_VAL_IRQ_BUS_ERROR & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_BUS_ERROR);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+#if 0
+static void mali_pp_print_registers(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_VERSION = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_RAWSTAT = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_MASK = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE)));
+}
+#endif
+
+#if 0
+void mali_pp_print_state(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: State: 0x%08x\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+}
+#endif
+
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, subjob);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, subjob);
+#if defined(CONFIG_MALI400_PROFILING)
+	int counter_index = COUNTER_FP_0_C0 + (2 * child->core_id);
+#endif
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		val0 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_pp_job_set_perf_counter_value0(job, subjob, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index, val0);
+		_mali_osk_profiling_record_global_counters(counter_index, val0);
+#endif
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		val1 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_pp_job_set_perf_counter_value1(job, subjob, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index + 1, val1);
+		_mali_osk_profiling_record_global_counters(counter_index + 1, val1);
+#endif
+	}
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tPP #%d: %s\n", core->core_id, core->hw_core.description);
+
+	return n;
+}
+#endif
diff --git a/utgard/r6p2/common/mali_pp.h b/utgard/r6p2/common/mali_pp.h
new file mode 100755
index 0000000..7633ecd
--- /dev/null
+++ b/utgard/r6p2/common/mali_pp.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_H__
+#define __MALI_PP_H__
+
+#include "mali_osk.h"
+#include "mali_pp_job.h"
+#include "mali_hw_core.h"
+
+struct mali_group;
+
+#define MALI_MAX_NUMBER_OF_PP_CORES        9
+
+/**
+ * Definition of the PP core struct
+ * Used to track a PP core in the system.
+ */
+struct mali_pp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+	u32                  core_id;           /**< Unique core ID */
+	u32                  bcast_id;          /**< The "flag" value used by the Mali-450 broadcast and DLBU unit */
+};
+
+_mali_osk_errcode_t mali_pp_initialize(void);
+void mali_pp_terminate(void);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id);
+void mali_pp_delete(struct mali_pp_core *core);
+
+void mali_pp_stop_bus(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core);
+void mali_pp_reset_async(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core);
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual);
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core);
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->core_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_bcast_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->bcast_id;
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index);
+u32 mali_pp_get_glob_num_pp_cores(void);
+
+/* Debug */
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size);
+
+/**
+ * Put instrumented HW counters from the core(s) to the job object (if enabled)
+ *
+ * parent and child is always the same, except for virtual jobs on Mali-450.
+ * In this case, the counters will be enabled on the virtual core (parent),
+ * but values need to be read from the child cores.
+ *
+ * @param parent The core used to see if the counters was enabled
+ * @param child The core to actually read the values from
+ * @job Job object to update with counter values (if enabled)
+ * @subjob Which subjob the counters are applicable for (core ID for virtual jobs)
+ */
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob);
+
+MALI_STATIC_INLINE const char *mali_pp_core_description(struct mali_pp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_pp_get_interrupt_result(struct mali_pp_core *core)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT) &
+			   MALI200_REG_VAL_IRQ_MASK_USED;
+	if (0 == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if (MALI200_REG_VAL_IRQ_END_OF_FRAME == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_get_rawstat(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+
+MALI_STATIC_INLINE u32 mali_pp_is_active(struct mali_pp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+	return (status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_mask_all_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_pp_enable_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+}
+
+MALI_STATIC_INLINE void mali_pp_write_addr_renderer_list(struct mali_pp_core *core,
+		struct mali_pp_job *job, u32 subjob)
+{
+	u32 addr = mali_pp_job_get_addr_frame(job, subjob);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_FRAME, addr);
+}
+
+
+MALI_STATIC_INLINE void mali_pp_write_addr_stack(struct mali_pp_core *core, struct mali_pp_job *job)
+{
+	u32 addr = mali_pp_job_get_addr_stack(job, core->core_id);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_STACK, addr);
+}
+
+#endif /* __MALI_PP_H__ */
diff --git a/utgard/r6p2/common/mali_pp_job.c b/utgard/r6p2/common/mali_pp_job.c
new file mode 100755
index 0000000..27349c9
--- /dev/null
+++ b/utgard/r6p2/common/mali_pp_job.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+#include "mali_memory_swap_alloc.h"
+#include "mali_scheduler.h"
+
+static u32 pp_counter_src0 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 pp_counter_src1 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static _mali_osk_atomic_t pp_counter_per_sub_job_count; /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+static u32 pp_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+static u32 pp_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+
+void mali_pp_job_initialize(void)
+{
+	_mali_osk_atomic_init(&pp_counter_per_sub_job_count, 0);
+}
+
+void mali_pp_job_terminate(void)
+{
+	_mali_osk_atomic_term(&pp_counter_per_sub_job_count);
+}
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session,
+				       _mali_uk_pp_start_job_s __user *uargs, u32 id)
+{
+	struct mali_pp_job *job;
+	u32 perf_counter_flag;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_pp_job));
+	if (NULL != job) {
+
+		_mali_osk_list_init(&job->list);
+		_mali_osk_list_init(&job->session_fb_lookup_list);
+
+		if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_pp_start_job_s))) {
+			goto fail;
+		}
+
+		if (job->uargs.num_cores > _MALI_PP_MAX_SUB_JOBS) {
+			MALI_PRINT_ERROR(("Mali PP job: Too many sub jobs specified in job object\n"));
+			goto fail;
+		}
+
+		if (!mali_pp_job_use_no_notification(job)) {
+			job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_FINISHED, sizeof(_mali_uk_pp_job_finished_s));
+			if (NULL == job->finished_notification) goto fail;
+		}
+
+		perf_counter_flag = mali_pp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			u32 sub_job_count = _mali_osk_atomic_read(&pp_counter_per_sub_job_count);
+
+			/* These counters apply for all virtual jobs, and where no per sub job counter is specified */
+			job->uargs.perf_counter_src0 = pp_counter_src0;
+			job->uargs.perf_counter_src1 = pp_counter_src1;
+
+			/* We only copy the per sub job array if it is enabled with at least one counter */
+			if (0 < sub_job_count) {
+				job->perf_counter_per_sub_job_count = sub_job_count;
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src0, pp_counter_per_sub_job_src0, sizeof(pp_counter_per_sub_job_src0));
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src1, pp_counter_per_sub_job_src1, sizeof(pp_counter_per_sub_job_src1));
+			}
+		}
+
+		job->session = session;
+		job->id = id;
+
+		job->sub_jobs_num = job->uargs.num_cores ? job->uargs.num_cores : 1;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+		_mali_osk_atomic_init(&job->sub_jobs_completed, 0);
+		_mali_osk_atomic_init(&job->sub_job_errors, 0);
+		job->swap_status = MALI_NO_SWAP_IN;
+		job->user_notification = MALI_FALSE;
+		job->num_pp_cores_in_virtual = 0;
+
+		if (job->uargs.num_memory_cookies > session->allocation_mgr.mali_allocation_num) {
+			MALI_PRINT_ERROR(("Mali PP job: The number of memory cookies is invalid !\n"));
+			goto fail;
+		}
+
+		if (job->uargs.num_memory_cookies > 0) {
+			u32 size;
+			u32 __user *memory_cookies = (u32 __user *)(uintptr_t)job->uargs.memory_cookies;
+
+			size = sizeof(*memory_cookies) * (job->uargs.num_memory_cookies);
+
+			job->memory_cookies = _mali_osk_malloc(size);
+			if (NULL == job->memory_cookies) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to allocate %d bytes of memory cookies!\n", size));
+				goto fail;
+			}
+
+			if (0 != _mali_osk_copy_from_user(job->memory_cookies, memory_cookies, size)) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to copy %d bytes of memory cookies from user!\n", size));
+				goto fail;
+			}
+		}
+
+		if (_MALI_OSK_ERR_OK != mali_pp_job_check(job)) {
+			/* Not a valid job. */
+			goto fail;
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_PP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		mali_mem_swap_in_pages(job);
+
+		return job;
+	}
+
+fail:
+	if (NULL != job) {
+		mali_pp_job_delete(job);
+	}
+
+	return NULL;
+}
+
+void mali_pp_job_delete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->session_fb_lookup_list));
+
+	if (NULL != job->memory_cookies) {
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+		/* Unmap buffers attached to job */
+		mali_dma_buf_unmap_job(job);
+#endif
+		if (MALI_NO_SWAP_IN != job->swap_status) {
+			mali_mem_swap_out_pages(job);
+		}
+
+		_mali_osk_free(job->memory_cookies);
+	}
+
+	if (job->user_notification) {
+		mali_scheduler_return_pp_job_to_user(job,
+						     job->num_pp_cores_in_virtual);
+	}
+
+	if (NULL != job->finished_notification) {
+		_mali_osk_notification_delete(job->finished_notification);
+	}
+
+	_mali_osk_atomic_term(&job->sub_jobs_completed);
+	_mali_osk_atomic_term(&job->sub_job_errors);
+
+	_mali_osk_free(job);
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_pp_job *iter;
+	struct mali_pp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_pp_job, list) {
+		/* job should be started after iter if iter is in progress. */
+		if (0 < iter->sub_jobs_started) {
+			break;
+		}
+
+		/*
+		 * job should be started after iter if it has a higher
+		 * job id. A span is used to handle job id wrapping.
+		 */
+		if ((mali_pp_job_get_id(job) -
+		     mali_pp_job_get_id(iter)) <
+		    MALI_SCHEDULER_JOB_ID_SPAN) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		return job->uargs.perf_counter_src0;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src0[sub_job]) {
+		return job->perf_counter_per_sub_job_src0[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src0;
+}
+
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		/* Virtual jobs always use the global job counter */
+		return job->uargs.perf_counter_src1;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src1[sub_job]) {
+		return job->perf_counter_per_sub_job_src1[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src1;
+}
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter)
+{
+	pp_counter_src0 = counter;
+}
+
+void mali_pp_job_set_pp_counter_global_src1(u32 counter)
+{
+	pp_counter_src1 = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src0[sub_job]) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == counter) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src0[sub_job] = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src1[sub_job]) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == counter) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src1[sub_job] = counter;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src0(void)
+{
+	return pp_counter_src0;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src1(void)
+{
+	return pp_counter_src1;
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src0[sub_job];
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src1[sub_job];
+}
diff --git a/utgard/r6p2/common/mali_pp_job.h b/utgard/r6p2/common/mali_pp_job.h
new file mode 100755
index 0000000..dbb04f2
--- /dev/null
+++ b/utgard/r6p2/common/mali_pp_job.h
@@ -0,0 +1,590 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_JOB_H__
+#define __MALI_PP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_core.h"
+#include "mali_dlbu.h"
+#include "mali_timeline.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#include "linux/mali_dma_fence.h"
+#include <linux/fence.h>
+#endif
+
+typedef enum pp_job_status {
+	MALI_NO_SWAP_IN,
+	MALI_SWAP_IN_FAIL,
+	MALI_SWAP_IN_SUCC,
+} pp_job_status;
+
+/**
+ * This structure represents a PP job, including all sub jobs.
+ *
+ * The PP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the PP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_pp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_pp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+	u32 perf_counter_per_sub_job_count;                /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+	u32 perf_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src0 */
+	u32 perf_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src1 */
+	u32 sub_jobs_num;                                  /**< Number of subjobs; set to 1 for Mali-450 if DLBU is used, otherwise equals number of PP cores */
+
+	pp_job_status swap_status;                         /**< Used to track each PP job swap status, if fail, we need to drop them in scheduler part */
+	mali_bool user_notification;                       /**< When we deferred delete PP job, we need to judge if we need to send job finish notification to user space */
+	u32 num_pp_cores_in_virtual;                       /**< How many PP cores we have when job finished */
+
+	/*
+	 * These members are used by both scheduler and executor.
+	 * They are "protected" by atomic operations.
+	 */
+	_mali_osk_atomic_t sub_jobs_completed;                            /**< Number of completed sub-jobs in this superjob */
+	_mali_osk_atomic_t sub_job_errors;                                /**< Bitfield with errors (errors for each single sub-job is or'ed together) */
+
+	/*
+	 * These members are used by scheduler, but only when no one else
+	 * knows about this job object but the working function.
+	 * No lock is thus needed for these.
+	 */
+	u32 *memory_cookies;                               /**< Memory cookies attached to job */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+	_mali_osk_list_t session_fb_lookup_list;           /**< Used to link jobs together from the same frame builder in the session */
+	u32 sub_jobs_started;                              /**< Total number of sub-jobs started (always started in ascending order) */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 perf_counter_value0[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 0 (to be returned to user space), one for each sub job */
+	u32 perf_counter_value1[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 1 (to be returned to user space), one for each sub job */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	struct mali_dma_fence_context dma_fence_context; /**< The mali dma fence context to record dma fence waiters that this job wait for */
+	struct fence *rendered_dma_fence; /**< the new dma fence link to this job */
+#endif
+};
+
+void mali_pp_job_initialize(void);
+void mali_pp_job_terminate(void);
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, _mali_uk_pp_start_job_s *uargs, u32 id);
+void mali_pp_job_delete(struct mali_pp_job *job);
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job);
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job);
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter);
+void mali_pp_job_set_pp_counter_global_src1(u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter);
+
+u32 mali_pp_job_get_pp_counter_global_src0(void);
+u32 mali_pp_job_get_pp_counter_global_src1(void);
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job);
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job);
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_cache_order(struct mali_pp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_cache_order(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_pp_job_get_user_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_frame_builder_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_flush_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_pid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_tid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_frame_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_dlbu_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.dlbu_registers;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_virtual(struct mali_pp_job *job)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (0 == job->uargs.num_cores) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_frame(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (mali_pp_job_is_virtual(job)) {
+		return MALI_DLBU_VIRT_ADDR;
+	} else if (0 == sub_job) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_FRAME / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_frame[sub_job - 1];
+	}
+
+	return 0;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_stack(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (0 == sub_job) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_STACK / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_stack[sub_job - 1];
+	}
+
+	return 0;
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_pp_job_list_addtail(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	_mali_osk_list_addtail(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_move(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb0_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb1_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb2_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb0_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb1_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb2_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb0(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb1(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb2(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_all_writeback_unit_disabled(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT]
+	   ) {
+		/* At least one output unit active */
+		return MALI_FALSE;
+	}
+
+	/* All outputs are disabled - we can abort the job */
+	return MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_add(struct mali_pp_job *job)
+{
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	fb_lookup_id = MALI_PP_JOB_FB_LOOKUP_LIST_MASK & job->uargs.frame_builder_id;
+
+	MALI_DEBUG_ASSERT(MALI_PP_JOB_FB_LOOKUP_LIST_SIZE > fb_lookup_id);
+
+	_mali_osk_list_addtail(&job->session_fb_lookup_list,
+			       &job->session->pp_job_fb_lookup_list[fb_lookup_id]);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->session_fb_lookup_list);
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_pp_job_get_session(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_started_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (0 < job->sub_jobs_started) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_unstarted_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (job->sub_jobs_started < job->sub_jobs_num) ? MALI_TRUE : MALI_FALSE;
+}
+
+/* Function used when we are terminating a session with jobs. Return TRUE if it has a rendering job.
+   Makes sure that no new subjobs are started. */
+MALI_STATIC_INLINE void mali_pp_job_mark_unstarted_failed(struct mali_pp_job *job)
+{
+	u32 jobs_remaining;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	jobs_remaining = job->sub_jobs_num - job->sub_jobs_started;
+	job->sub_jobs_started += jobs_remaining;
+
+	/* Not the most optimal way, but this is only used in error cases */
+	for (i = 0; i < jobs_remaining; i++) {
+		_mali_osk_atomic_inc(&job->sub_jobs_completed);
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_complete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->sub_jobs_num ==
+		_mali_osk_atomic_read(&job->sub_jobs_completed)) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_first_unstarted_sub_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return job->sub_jobs_started;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->sub_jobs_num;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_unstarted_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(job->sub_jobs_num >= job->sub_jobs_started);
+	return (job->sub_jobs_num - job->sub_jobs_started);
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_num_memory_cookies(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.num_memory_cookies;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_memory_cookie(
+	struct mali_pp_job *job, u32 index)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(index < job->uargs.num_memory_cookies);
+	MALI_DEBUG_ASSERT_POINTER(job->memory_cookies);
+	return job->memory_cookies[index];
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_needs_dma_buf_mapping(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (0 < job->uargs.num_memory_cookies) {
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_started(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Assert that we are marking the "first unstarted sub job" as started */
+	MALI_DEBUG_ASSERT(job->sub_jobs_started == sub_job);
+
+	job->sub_jobs_started++;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_completed(struct mali_pp_job *job, mali_bool success)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_atomic_inc(&job->sub_jobs_completed);
+	if (MALI_FALSE == success) {
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_was_success(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (0 == _mali_osk_atomic_read(&job->sub_job_errors)) {
+		return MALI_TRUE;
+	}
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_use_no_notification(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_NO_NOTIFICATION) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_pilot_job(struct mali_pp_job *job)
+{
+	/*
+	 * A pilot job is currently identified as jobs which
+	 * require no callback notification.
+	 */
+	return mali_pp_job_use_no_notification(job);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_pp_job_get_finished_notification(struct mali_pp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_window_surface(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE)
+	       ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_protected_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_PROTECTED)
+	       ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_flag(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value0(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0[sub_job];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value1(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1[sub_job];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value0(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0[sub_job] = value;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value1(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1[sub_job] = value;
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_pp_job_check(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (mali_pp_job_is_virtual(job) && job->sub_jobs_num != 1) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Returns MALI_TRUE if this job has more than two sub jobs and all sub jobs are unstarted.
+ *
+ * @param job Job to check.
+ * @return MALI_TRUE if job has more than two sub jobs and all sub jobs are unstarted, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_large_and_unstarted(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_pp_job_is_virtual(job));
+
+	return (0 == job->sub_jobs_started && 2 < job->sub_jobs_num);
+}
+
+/**
+ * Get PP job's Timeline tracker.
+ *
+ * @param job PP job.
+ * @return Pointer to Timeline tracker for the job.
+ */
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_pp_job_get_tracker(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_timeline_point_ptr(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+#endif /* __MALI_PP_JOB_H__ */
diff --git a/utgard/r6p2/common/mali_scheduler.c b/utgard/r6p2/common/mali_scheduler.c
new file mode 100755
index 0000000..677d8ee
--- /dev/null
+++ b/utgard/r6p2/common/mali_scheduler.c
@@ -0,0 +1,1548 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_profiling.h"
+#include "mali_kernel_utilization.h"
+#include "mali_timeline.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+#include "mali_group.h"
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include "mali_pm_metrics.h"
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#include "mali_dma_fence.h"
+#include <linux/dma-buf.h>
+#endif
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+/*
+ * ---------- static defines/constants ----------
+ */
+
+/*
+ * If dma_buf with map on demand is used, we defer job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif
+#endif
+
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock protecting this module */
+_mali_osk_spinlock_irq_t *mali_scheduler_lock_obj = NULL;
+
+/* Queue of jobs to be executed on the GP group */
+struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+struct mali_scheduler_job_queue job_queue_pp;
+
+_mali_osk_atomic_t mali_job_id_autonumber;
+_mali_osk_atomic_t mali_job_cache_order_autonumber;
+/*
+ * ---------- static variables ----------
+ */
+
+_mali_osk_wq_work_t *scheduler_wq_pp_job_delete = NULL;
+_mali_osk_spinlock_irq_t *scheduler_pp_job_delete_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_deletion_queue);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static _mali_osk_wq_work_t *scheduler_wq_pp_job_queue = NULL;
+static _mali_osk_spinlock_irq_t *scheduler_pp_job_queue_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_queue_list);
+#endif
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job);
+static _mali_osk_errcode_t mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job, mali_timeline_point *point);
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job);
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job);
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success);
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job);
+void mali_scheduler_do_pp_job_delete(void *arg);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job);
+static void mali_scheduler_do_pp_job_queue(void *arg);
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_scheduler_initialize(void)
+{
+	_mali_osk_atomic_init(&mali_job_id_autonumber, 0);
+	_mali_osk_atomic_init(&mali_job_cache_order_autonumber, 0);
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.high_pri);
+	job_queue_gp.depth = 0;
+	job_queue_gp.big_job_num = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.high_pri);
+	job_queue_pp.depth = 0;
+	job_queue_pp.big_job_num = 0;
+
+	mali_scheduler_lock_obj = _mali_osk_spinlock_irq_init(
+					  _MALI_OSK_LOCKFLAG_ORDERED,
+					  _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (NULL == mali_scheduler_lock_obj) {
+		mali_scheduler_terminate();
+	}
+
+	scheduler_wq_pp_job_delete = _mali_osk_wq_create_work(
+					     mali_scheduler_do_pp_job_delete, NULL);
+	if (NULL == scheduler_wq_pp_job_delete) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_delete_lock = _mali_osk_spinlock_irq_init(
+					       _MALI_OSK_LOCKFLAG_ORDERED,
+					       _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (NULL == scheduler_pp_job_delete_lock) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	scheduler_wq_pp_job_queue = _mali_osk_wq_create_work(
+					    mali_scheduler_do_pp_job_queue, NULL);
+	if (NULL == scheduler_wq_pp_job_queue) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_queue_lock = _mali_osk_spinlock_irq_init(
+					      _MALI_OSK_LOCKFLAG_ORDERED,
+					      _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (NULL == scheduler_pp_job_queue_lock) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_scheduler_terminate(void)
+{
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (NULL != scheduler_pp_job_queue_lock) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_queue_lock);
+		scheduler_pp_job_queue_lock = NULL;
+	}
+
+	if (NULL != scheduler_wq_pp_job_queue) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_queue);
+		scheduler_wq_pp_job_queue = NULL;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	if (NULL != scheduler_pp_job_delete_lock) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_delete_lock);
+		scheduler_pp_job_delete_lock = NULL;
+	}
+
+	if (NULL != scheduler_wq_pp_job_delete) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_delete);
+		scheduler_wq_pp_job_delete = NULL;
+	}
+
+	if (NULL != mali_scheduler_lock_obj) {
+		_mali_osk_spinlock_irq_term(mali_scheduler_lock_obj);
+		mali_scheduler_lock_obj = NULL;
+	}
+
+	_mali_osk_atomic_term(&mali_job_cache_order_autonumber);
+	_mali_osk_atomic_term(&mali_job_id_autonumber);
+}
+
+u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure)
+{
+	/*
+	 * Count how many physical sub jobs are present from the head of queue
+	 * until the first virtual job is present.
+	 * Early out when we have reached maximum number of PP cores (8)
+	 */
+	u32 count = 0;
+	struct mali_pp_job *job;
+	struct mali_pp_job *temp;
+
+	/* Check for partially started normal pri jobs */
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					   struct mali_pp_job, list);
+
+		MALI_DEBUG_ASSERT_POINTER(job);
+
+		if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) {
+			/*
+			 * Remember; virtual jobs can't be queued and started
+			 * at the same time, so this must be a physical job
+			 */
+			if ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			    || (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job))) {
+
+				count += mali_pp_job_unstarted_sub_job_count(job);
+				if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+					return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+				}
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if ((MALI_FALSE == mali_pp_job_is_virtual(job))
+		    && ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			|| (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) {
+
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if ((MALI_FALSE == mali_pp_job_is_virtual(job))
+		    && (MALI_FALSE == mali_pp_job_has_started_sub_jobs(job))
+		    && ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			|| (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) {
+
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+	return count;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_next(void)
+{
+	struct mali_pp_job *job;
+	struct mali_pp_job *temp;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	/* Check for partially started normal pri jobs */
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					   struct mali_pp_job, list);
+
+		MALI_DEBUG_ASSERT_POINTER(job);
+
+		if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) {
+			return job;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		return job;
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		return job;
+	}
+
+	return NULL;
+}
+
+mali_bool mali_scheduler_job_next_is_virtual(void)
+{
+	struct mali_pp_job *job;
+
+	job = mali_scheduler_job_pp_virtual_peek();
+	if (NULL != job) {
+		MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void)
+{
+	_mali_osk_list_t *queue;
+	struct mali_gp_job *job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT(0 < job_queue_gp.depth);
+	MALI_DEBUG_ASSERT(job_queue_gp.big_job_num <= job_queue_gp.depth);
+
+	if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+		MALI_DEBUG_ASSERT(!_mali_osk_list_empty(queue));
+	}
+
+	job = _MALI_OSK_LIST_ENTRY(queue->next, struct mali_gp_job, list);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_gp_job_list_remove(job);
+	job_queue_gp.depth--;
+	if (job->big_job) {
+		job_queue_gp.big_job_num --;
+		if (job_queue_gp.big_job_num < MALI_MAX_PENDING_BIG_JOB) {
+			/* wake up process */
+			wait_queue_head_t *queue = mali_session_get_wait_queue();
+			wake_up(queue);
+		}
+	}
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	/*
+	 * For PP jobs we favour partially started jobs in normal
+	 * priority queue over unstarted jobs in high priority queue
+	 */
+
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					       struct mali_pp_job, list);
+		MALI_DEBUG_ASSERT(NULL != tmp_job);
+
+		if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job ||
+	    MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) {
+		/*
+		 * There isn't a partially started job in normal queue, so
+		 * look in high priority queue.
+		 */
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+						       struct mali_pp_job, list);
+			MALI_DEBUG_ASSERT(NULL != tmp_job);
+
+			if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+					       struct mali_pp_job, list);
+
+		if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job) {
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+						       struct mali_pp_job, list);
+
+			if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_physical_peek();
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_virtual(job));
+
+	if (NULL != job) {
+		*sub_job = mali_pp_job_get_first_unstarted_sub_job(job);
+
+		mali_pp_job_mark_sub_job_started(job, *sub_job);
+		if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(job)) {
+			/* Remove from queue when last sub job has been retrieved */
+			mali_pp_job_list_remove(job);
+		}
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_virtual_peek();
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_virtual(job));
+
+	if (NULL != job) {
+		MALI_DEBUG_ASSERT(0 ==
+				  mali_pp_job_get_first_unstarted_sub_job(job));
+		MALI_DEBUG_ASSERT(1 ==
+				  mali_pp_job_get_sub_job_count(job));
+
+		mali_pp_job_mark_sub_job_started(job, 0);
+
+		mali_pp_job_list_remove(job);
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_gp_job_get_id(job), job));
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_gp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(job));
+		mali_gp_job_signal_pp_tracker(job, MALI_FALSE);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, MALI_FALSE,
+					       MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+
+	return MALI_SCHEDULER_MASK_GP;
+}
+
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_pp_job_get_id(job), job));
+
+	if (MALI_TRUE == mali_timeline_tracker_activation_error(
+		    mali_pp_job_get_tracker(job))) {
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Job %u (0x%08X) activated with error, aborting.\n",
+				     mali_pp_job_get_id(job), job));
+
+		mali_scheduler_lock();
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (mali_pp_job_needs_dma_buf_mapping(job)) {
+		mali_scheduler_deferred_pp_job_queue(job);
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_pp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+	return MALI_SCHEDULER_MASK_PP;
+}
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	if (user_notification) {
+		mali_scheduler_return_gp_job_to_user(job, success);
+	}
+
+	if (dequeued) {
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_gp_end();
+		}
+		mali_pm_record_gpu_idle(MALI_TRUE);
+	}
+
+	mali_gp_job_delete(job);
+}
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	job->user_notification = user_notification;
+	job->num_pp_cores_in_virtual = num_cores_in_virtual;
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	if (NULL != job->rendered_dma_fence)
+		mali_dma_fence_signal_and_put(&job->rendered_dma_fence);
+#endif
+
+	if (dequeued) {
+#if defined(CONFIG_MALI_DVFS)
+		if (mali_pp_job_is_window_surface(job)) {
+			struct mali_session_data *session;
+			session = mali_pp_job_get_session(job);
+			mali_session_inc_num_window_jobs(session);
+		}
+#endif
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_pp_end();
+		}
+		mali_pm_record_gpu_idle(MALI_FALSE);
+	}
+
+	/* With ZRAM feature enabled, all pp jobs will be force to use deferred delete. */
+	mali_scheduler_deferred_pp_job_delete(job);
+}
+
+void mali_scheduler_abort_session(struct mali_session_data *session)
+{
+	struct mali_gp_job *gp_job;
+	struct mali_gp_job *gp_tmp;
+	struct mali_pp_job *pp_job;
+	struct mali_pp_job *pp_tmp;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_gp);
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_pp);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali scheduler: Aborting all queued jobs from session 0x%08X.\n",
+			     session));
+
+	mali_scheduler_lock();
+
+	/* Remove from GP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.normal_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+			job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0;
+		}
+	}
+
+	/* Remove from GP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.high_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+			job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0;
+		}
+	}
+
+	/* Remove from PP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							      &removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/* Remove from PP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							      &removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/*
+	 * Release scheduler lock so we can release trackers
+	 * (which will potentially queue new jobs)
+	 */
+	mali_scheduler_unlock();
+
+	/* Release and complete all (non-running) found GP jobs  */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &removed_jobs_gp,
+				    struct mali_gp_job, list) {
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(gp_job));
+		mali_gp_job_signal_pp_tracker(gp_job, MALI_FALSE);
+		_mali_osk_list_delinit(&gp_job->list);
+		mali_scheduler_complete_gp_job(gp_job,
+					       MALI_FALSE, MALI_FALSE, MALI_TRUE);
+	}
+
+	/* Release and complete non-running PP jobs */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, &removed_jobs_pp,
+				    struct mali_pp_job, list) {
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(pp_job));
+		_mali_osk_list_delinit(&pp_job->list);
+		mali_scheduler_complete_pp_job(pp_job, 0,
+					       MALI_FALSE, MALI_TRUE);
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx,
+		_mali_uk_gp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_gp_job_create(session, uargs, mali_scheduler_get_new_id(),
+				 NULL);
+	if (NULL == job) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_gp_job_get_timeline_point_ptr(job);
+
+	point = mali_scheduler_submit_gp_job(session, job);
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the job was started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx,
+		_mali_uk_pp_start_job_s *uargs)
+{
+	_mali_osk_errcode_t ret;
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_pp_job_create(session, uargs, mali_scheduler_get_new_id());
+	if (NULL == job) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(job);
+
+	/* Submit PP job. */
+	ret = mali_scheduler_submit_pp_job(session, job, &point);
+	job = NULL;
+
+	if (_MALI_OSK_ERR_OK == ret) {
+		if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+			/*
+			* Let user space know that something failed
+			* after the jobs were started.
+			*/
+			return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+		}
+	}
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx,
+		_mali_uk_pp_and_gp_start_job_s *uargs)
+{
+	_mali_osk_errcode_t ret;
+	struct mali_session_data *session;
+	_mali_uk_pp_and_gp_start_job_s kargs;
+	struct mali_pp_job *pp_job;
+	struct mali_gp_job *gp_job;
+	u32 __user *point_ptr = NULL;
+	mali_timeline_point point;
+	_mali_uk_pp_start_job_s __user *pp_args;
+	_mali_uk_gp_start_job_s __user *gp_args;
+
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+
+	session = (struct mali_session_data *) ctx;
+
+	if (0 != _mali_osk_copy_from_user(&kargs, uargs,
+					  sizeof(_mali_uk_pp_and_gp_start_job_s))) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	pp_args = (_mali_uk_pp_start_job_s __user *)(uintptr_t)kargs.pp_args;
+	gp_args = (_mali_uk_gp_start_job_s __user *)(uintptr_t)kargs.gp_args;
+
+	pp_job = mali_pp_job_create(session, pp_args,
+				    mali_scheduler_get_new_id());
+	if (NULL == pp_job) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	gp_job = mali_gp_job_create(session, gp_args,
+				    mali_scheduler_get_new_id(),
+				    mali_pp_job_get_tracker(pp_job));
+	if (NULL == gp_job) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		mali_pp_job_delete(pp_job);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(pp_job);
+
+	/* Submit GP job. */
+	mali_scheduler_submit_gp_job(session, gp_job);
+	gp_job = NULL;
+
+	/* Submit PP job. */
+	ret = mali_scheduler_submit_pp_job(session, pp_job, &point);
+	pp_job = NULL;
+
+	if (_MALI_OSK_ERR_OK == ret) {
+		if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+			/*
+			* Let user space know that something failed
+			* after the jobs were started.
+			*/
+			return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+		}
+	}
+
+	return ret;
+}
+
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	fb_lookup_id = args->fb_id & MALI_PP_JOB_FB_LOOKUP_LIST_MASK;
+
+	mali_scheduler_lock();
+
+	/* Iterate over all jobs for given frame builder_id. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp,
+				    &session->pp_job_fb_lookup_list[fb_lookup_id],
+				    struct mali_pp_job, session_fb_lookup_list) {
+		MALI_DEBUG_CODE(u32 disable_mask = 0);
+
+		if (mali_pp_job_get_frame_builder_id(job) !=
+		    (u32) args->fb_id) {
+			MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Disable WB mismatching FB.\n"));
+			continue;
+		}
+
+		MALI_DEBUG_CODE(disable_mask |= 0xD << (4 * 3));
+
+		if (mali_pp_job_get_wb0_source_addr(job) == args->wb0_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x1 << (4 * 1));
+			mali_pp_job_disable_wb0(job);
+		}
+
+		if (mali_pp_job_get_wb1_source_addr(job) == args->wb1_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x2 << (4 * 2));
+			mali_pp_job_disable_wb1(job);
+		}
+
+		if (mali_pp_job_get_wb2_source_addr(job) == args->wb2_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x3 << (4 * 3));
+			mali_pp_job_disable_wb2(job);
+		}
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Disable WB: 0x%X.\n",
+				     disable_mask));
+	}
+
+	mali_scheduler_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "GP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_gp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.normal_pri) ?
+				"empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.high_pri) ?
+				"empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"PP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_pp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.normal_pri)
+				? "empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.high_pri)
+				? "empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job)
+{
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Add job to Timeline system. */
+	point = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_gp_job_get_tracker(job), MALI_TIMELINE_GP);
+
+	return point;
+}
+
+static _mali_osk_errcode_t mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job, mali_timeline_point *point)
+
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	struct ww_acquire_ctx ww_actx;
+	u32 i;
+	u32 num_memory_cookies = 0;
+	struct reservation_object **reservation_object_list = NULL;
+	unsigned int num_reservation_object = 0;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_scheduler_lock();
+	/*
+	 * Adding job to the lookup list used to quickly discard
+	 * writeback units of queued jobs.
+	 */
+	mali_pp_job_fb_lookup_add(job);
+	mali_scheduler_unlock();
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+
+	/* Allocate the reservation_object_list to list the dma reservation object of dependent dma buffer */
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	if (0 < num_memory_cookies) {
+		reservation_object_list = kzalloc(sizeof(struct reservation_object *) * num_memory_cookies, GFP_KERNEL);
+		if (NULL == reservation_object_list) {
+			MALI_PRINT_ERROR(("Failed to alloc the reservation object list.\n"));
+			ret = _MALI_OSK_ERR_NOMEM;
+			goto failed_to_alloc_reservation_object_list;
+		}
+	}
+
+	/* Add the dma reservation object into reservation_object_list*/
+	for (i = 0; i < num_memory_cookies; i++) {
+		mali_mem_backend *mem_backend = NULL;
+		struct reservation_object *tmp_reservation_object = NULL;
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		mem_backend = mali_mem_backend_struct_search(session, mali_addr);
+
+		MALI_DEBUG_ASSERT_POINTER(mem_backend);
+
+		if (NULL == mem_backend) {
+			MALI_PRINT_ERROR(("Failed to find the memory backend for memory cookie[%d].\n", i));
+			goto failed_to_find_mem_backend;
+		}
+
+		if (MALI_MEM_DMA_BUF != mem_backend->type)
+			continue;
+
+		tmp_reservation_object = mem_backend->dma_buf.attachment->buf->resv;
+
+		if (NULL != tmp_reservation_object) {
+			mali_dma_fence_add_reservation_object_list(tmp_reservation_object,
+					reservation_object_list, &num_reservation_object);
+		}
+	}
+
+	/*
+	 * Add the mali dma fence callback to wait for all dependent dma buf,
+	 * and extend the timeline system to support dma fence,
+	 * then create the new internal dma fence to replace all last dma fence for dependent dma buf.
+	 */
+	if (0 < num_reservation_object) {
+		int error;
+		int num_dma_fence_waiter = 0;
+		/* Create one new dma fence.*/
+		job->rendered_dma_fence = mali_dma_fence_new(job->session->fence_context,
+					  _mali_osk_atomic_inc_return(&job->session->fence_seqno));
+
+		if (NULL == job->rendered_dma_fence) {
+			MALI_PRINT_ERROR(("Failed to creat one new dma fence.\n"));
+			ret = _MALI_OSK_ERR_FAULT;
+			goto failed_to_create_dma_fence;
+		}
+
+		/* In order to avoid deadlock, wait/wound mutex lock to lock all dma buffers*/
+
+		error = mali_dma_fence_lock_reservation_object_list(reservation_object_list,
+				num_reservation_object, &ww_actx);
+
+		if (0 != error) {
+			MALI_PRINT_ERROR(("Failed to lock all reservation objects.\n"));
+			ret = _MALI_OSK_ERR_FAULT;
+			goto failed_to_lock_reservation_object_list;
+		}
+
+		mali_dma_fence_context_init(&job->dma_fence_context,
+					    mali_timeline_dma_fence_callback, (void *)job);
+
+		/* Add dma fence waiters and dma fence callback. */
+		for (i = 0; i < num_reservation_object; i++) {
+			ret = mali_dma_fence_context_add_waiters(&job->dma_fence_context, reservation_object_list[i]);
+			if (_MALI_OSK_ERR_OK != ret) {
+				MALI_PRINT_ERROR(("Failed to add waiter into mali dma fence context.\n"));
+				goto failed_to_add_dma_fence_waiter;
+			}
+		}
+
+		for (i = 0; i < num_reservation_object; i++) {
+			reservation_object_add_excl_fence(reservation_object_list[i], job->rendered_dma_fence);
+		}
+
+		num_dma_fence_waiter = job->dma_fence_context.num_dma_fence_waiter;
+
+		/* Add job to Timeline system. */
+		(*point) = mali_timeline_system_add_tracker(session->timeline_system,
+				mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+
+		if (0 != num_dma_fence_waiter) {
+			mali_dma_fence_context_dec_count(&job->dma_fence_context);
+		}
+
+		/* Unlock all wait/wound mutex lock. */
+		mali_dma_fence_unlock_reservation_object_list(reservation_object_list,
+				num_reservation_object, &ww_actx);
+	} else {
+		/* Add job to Timeline system. */
+		(*point) = mali_timeline_system_add_tracker(session->timeline_system,
+				mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+	}
+
+	kfree(reservation_object_list);
+	return ret;
+#else
+	/* Add job to Timeline system. */
+	(*point) = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+#endif
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+failed_to_add_dma_fence_waiter:
+	mali_dma_fence_context_term(&job->dma_fence_context);
+	mali_dma_fence_unlock_reservation_object_list(reservation_object_list,
+			num_reservation_object, &ww_actx);
+failed_to_lock_reservation_object_list:
+	mali_dma_fence_signal_and_put(&job->rendered_dma_fence);
+failed_to_create_dma_fence:
+failed_to_find_mem_backend:
+	if (NULL != reservation_object_list)
+		kfree(reservation_object_list);
+failed_to_alloc_reservation_object_list:
+	mali_pp_job_fb_lookup_remove(job);
+#endif
+	return ret;
+}
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_gp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	}
+
+	mali_gp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	/* Determine which queue the job should be added to. */
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+	}
+
+	job_queue_gp.depth += 1;
+	job_queue_gp.big_job_num += (job->big_job) ? 1 : 0;
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_gp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the GP as busy from the
+		 * time a GP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_gp_start();
+	}
+
+	mali_pm_record_gpu_active(MALI_TRUE);
+
+	/* Add profiling events for job enqueued */
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE,
+		mali_gp_job_get_pid(job),
+		mali_gp_job_get_tid(job),
+		mali_gp_job_get_frame_builder_id(job),
+		mali_gp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_gp_job_get_tid(job),
+			      mali_gp_job_get_id(job), "GP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali GP scheduler: Job %u (0x%08X) queued\n",
+			     mali_gp_job_get_id(job), job));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue = NULL;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	} else if (unlikely(MALI_SWAP_IN_FAIL == job->swap_status)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while swap in failed.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE;
+	}
+
+	mali_pp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_pp.high_pri;
+	} else {
+		queue = &job_queue_pp.normal_pri;
+	}
+
+	job_queue_pp.depth +=
+		mali_pp_job_get_sub_job_count(job);
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_pp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the PP as busy from the
+		 * time a PP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_pp_start();
+	}
+
+	mali_pm_record_gpu_active(MALI_FALSE);
+
+	/* Add profiling events for job enqueued */
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE,
+		mali_pp_job_get_pid(job),
+		mali_pp_job_get_tid(job),
+		mali_pp_job_get_frame_builder_id(job),
+		mali_pp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_pp_job_get_tid(job),
+			      mali_pp_job_get_id(job), "PP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali PP scheduler: %s job %u (0x%08X) with %u parts queued.\n",
+			     mali_pp_job_is_virtual(job)
+			     ? "Virtual" : "Physical",
+			     mali_pp_job_get_id(job), job,
+			     mali_pp_job_get_sub_job_count(job)));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success)
+{
+	_mali_uk_gp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_gp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->pending_big_job_num = mali_scheduler_job_gp_big_job_count();
+
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	if (MALI_TRUE == success) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+	jobres->heap_current_addr = mali_gp_job_get_current_heap_addr(job);
+	jobres->perf_counter0 = mali_gp_job_get_perf_counter_value0(job);
+	jobres->perf_counter1 = mali_gp_job_get_perf_counter_value1(job);
+
+	mali_session_send_notification(session, notification);
+}
+
+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual)
+{
+	u32 i;
+	u32 num_counters_to_copy;
+	_mali_uk_pp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	if (MALI_TRUE == mali_pp_job_use_no_notification(job)) {
+		return;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_pp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->user_job_ptr = mali_pp_job_get_user_id(job);
+	if (MALI_TRUE == mali_pp_job_was_success(job)) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+
+	if (mali_pp_job_is_virtual(job)) {
+		num_counters_to_copy = num_cores_in_virtual;
+	} else {
+		num_counters_to_copy = mali_pp_job_get_sub_job_count(job);
+	}
+
+	for (i = 0; i < num_counters_to_copy; i++) {
+		jobres->perf_counter0[i] =
+			mali_pp_job_get_perf_counter_value0(job, i);
+		jobres->perf_counter1[i] =
+			mali_pp_job_get_perf_counter_value1(job, i);
+		jobres->perf_counter_src0 =
+			mali_pp_job_get_pp_counter_global_src0();
+		jobres->perf_counter_src1 =
+			mali_pp_job_get_pp_counter_global_src1();
+	}
+
+	mali_session_send_notification(session, notification);
+}
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_deletion_queue);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_delete);
+}
+
+void mali_scheduler_do_pp_job_delete(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be deleted, so we can release
+	 * the lock before we start deleting the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_deletion_queue, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		_mali_osk_list_delinit(&job->list);
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+		mali_dma_fence_context_term(&job->dma_fence_context);
+#endif
+
+		mali_pp_job_delete(job); /* delete the job object itself */
+	}
+}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_queue_list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_queue);
+}
+
+static void mali_scheduler_do_pp_job_queue(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be queued, so we can release
+	 * the lock before we start queueing the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_queue_list, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	/* First loop through all jobs and do the pre-work (no locks needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_needs_dma_buf_mapping(job)) {
+			/*
+			 * This operation could fail, but we continue anyway,
+			 * because the worst that could happen is that this
+			 * job will fail due to a Mali page fault.
+			 */
+			mali_dma_buf_map_job(job);
+		}
+	}
+
+	mali_scheduler_lock();
+
+	/* Then loop through all jobs again to queue them (lock needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+
+		/* Remove from scheduler_pp_job_queue_list before queueing */
+		mali_pp_job_list_remove(job);
+
+		if (mali_scheduler_queue_pp_job(job)) {
+			/* Job queued successfully */
+			schedule_mask |= MALI_SCHEDULER_MASK_PP;
+		} else {
+			/* Failed to enqueue job, release job (with error) */
+			mali_pp_job_fb_lookup_remove(job);
+			mali_pp_job_mark_unstarted_failed(job);
+
+			/* unlock scheduler in this uncommon case */
+			mali_scheduler_unlock();
+
+			schedule_mask |= mali_timeline_tracker_release(
+						 mali_pp_job_get_tracker(job));
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job, 0, MALI_TRUE,
+						       MALI_FALSE);
+
+			mali_scheduler_lock();
+		}
+	}
+
+	mali_scheduler_unlock();
+
+	/* Trigger scheduling of jobs */
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+void mali_scheduler_gp_pp_job_queue_print(void)
+{
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_gp_job *tmp_gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	struct mali_pp_job *tmp_pp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+	/* dump job queup status */
+	if ((0 == job_queue_gp.depth) && (0 == job_queue_pp.depth)) {
+		MALI_PRINT(("No GP&PP job in the job queue.\n"));
+		return;
+	}
+
+	MALI_PRINT(("Total (%d) GP job in the job queue.\n", job_queue_gp.depth));
+	if (job_queue_gp.depth > 0) {
+		if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.high_pri,
+						    struct mali_gp_job, list) {
+				MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job high_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid));
+			}
+		}
+
+		if (!_mali_osk_list_empty(&job_queue_gp.normal_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.normal_pri,
+						    struct mali_gp_job, list) {
+				MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job normal_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid));
+			}
+		}
+	}
+
+	MALI_PRINT(("Total (%d) PP job in the job queue.\n", job_queue_pp.depth));
+	if (job_queue_pp.depth > 0) {
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.high_pri,
+						    struct mali_pp_job, list) {
+				if (mali_pp_job_is_virtual(pp_job)) {
+					MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				} else {
+					MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				}
+			}
+		}
+
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.normal_pri,
+						    struct mali_pp_job, list) {
+				if (mali_pp_job_is_virtual(pp_job)) {
+					MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				} else {
+					MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				}
+			}
+		}
+	}
+
+	/* dump group running job status */
+	mali_executor_running_status_print();
+}
diff --git a/utgard/r6p2/common/mali_scheduler.h b/utgard/r6p2/common/mali_scheduler.h
new file mode 100755
index 0000000..8a7ebc9
--- /dev/null
+++ b/utgard/r6p2/common/mali_scheduler.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_H__
+#define __MALI_SCHEDULER_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_scheduler_types.h"
+#include "mali_session.h"
+
+struct mali_scheduler_job_queue {
+	_MALI_OSK_LIST_HEAD(normal_pri); /* Queued jobs with normal priority */
+	_MALI_OSK_LIST_HEAD(high_pri);   /* Queued jobs with high priority */
+	u32 depth;                       /* Depth of combined queues. */
+	u32 big_job_num;
+};
+
+extern _mali_osk_spinlock_irq_t *mali_scheduler_lock_obj;
+
+/* Queue of jobs to be executed on the GP group */
+extern struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+extern struct mali_scheduler_job_queue job_queue_pp;
+
+extern _mali_osk_atomic_t mali_job_id_autonumber;
+extern _mali_osk_atomic_t mali_job_cache_order_autonumber;
+
+#define MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+_mali_osk_errcode_t mali_scheduler_initialize(void);
+void mali_scheduler_terminate(void);
+
+MALI_STATIC_INLINE void mali_scheduler_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_scheduler_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: scheduler lock taken.\n"));
+}
+
+MALI_STATIC_INLINE void mali_scheduler_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: Releasing scheduler lock.\n"));
+	_mali_osk_spinlock_irq_unlock(mali_scheduler_lock_obj);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_count(void)
+{
+	return job_queue_gp.depth;
+}
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_big_job_count(void)
+{
+	return job_queue_gp.big_job_num;
+}
+
+u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure);
+
+mali_bool mali_scheduler_job_next_is_virtual(void);
+struct mali_pp_job *mali_scheduler_job_pp_next(void);
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void);
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_id(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_id_autonumber);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_cache_order(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_cache_order_autonumber);
+}
+
+/**
+ * @brief Used by the Timeline system to queue a GP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The GP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job);
+
+/**
+ * @brief Used by the Timeline system to queue a PP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The PP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job);
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_abort_session(struct mali_session_data *session);
+
+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual);
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size);
+#endif
+
+void mali_scheduler_gp_pp_job_queue_print(void);
+
+#endif /* __MALI_SCHEDULER_H__ */
diff --git a/utgard/r6p2/common/mali_scheduler_types.h b/utgard/r6p2/common/mali_scheduler_types.h
new file mode 100755
index 0000000..0819c59
--- /dev/null
+++ b/utgard/r6p2/common/mali_scheduler_types.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_TYPES_H__
+#define __MALI_SCHEDULER_TYPES_H__
+
+#include "mali_osk.h"
+
+#define MALI_SCHEDULER_JOB_ID_SPAN 65535
+
+/**
+ * Bitmask used for defered scheduling of subsystems.
+ */
+typedef u32 mali_scheduler_mask;
+
+#define MALI_SCHEDULER_MASK_GP (1<<0)
+#define MALI_SCHEDULER_MASK_PP (1<<1)
+
+#define MALI_SCHEDULER_MASK_EMPTY 0
+#define MALI_SCHEDULER_MASK_ALL (MALI_SCHEDULER_MASK_GP | MALI_SCHEDULER_MASK_PP)
+
+#endif /* __MALI_SCHEDULER_TYPES_H__ */
diff --git a/utgard/r6p2/common/mali_session.c b/utgard/r6p2/common/mali_session.c
new file mode 100755
index 0000000..9c5a64a
--- /dev/null
+++ b/utgard/r6p2/common/mali_session.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/sched.h>
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_session.h"
+#include "mali_ukk.h"
+#ifdef MALI_MEM_SWAP_TRACKING
+#include "mali_memory_swap_alloc.h"
+#endif
+
+_MALI_OSK_LIST_HEAD(mali_sessions);
+static u32 mali_session_count = 0;
+
+_mali_osk_spinlock_irq_t *mali_sessions_lock = NULL;
+wait_queue_head_t pending_queue;
+
+_mali_osk_errcode_t mali_session_initialize(void)
+{
+	_MALI_OSK_INIT_LIST_HEAD(&mali_sessions);
+	/* init wait queue for big varying job */
+	init_waitqueue_head(&pending_queue);
+
+	mali_sessions_lock = _mali_osk_spinlock_irq_init(
+				     _MALI_OSK_LOCKFLAG_ORDERED,
+				     _MALI_OSK_LOCK_ORDER_SESSIONS);
+	if (NULL == mali_sessions_lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_session_terminate(void)
+{
+	if (NULL != mali_sessions_lock) {
+		_mali_osk_spinlock_irq_term(mali_sessions_lock);
+		mali_sessions_lock = NULL;
+	}
+}
+
+void mali_session_add(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_add(&session->link, &mali_sessions);
+	mali_session_count++;
+	mali_session_unlock();
+}
+
+void mali_session_remove(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_delinit(&session->link);
+	mali_session_count--;
+	mali_session_unlock();
+}
+
+u32 mali_session_get_count(void)
+{
+	return mali_session_count;
+}
+
+wait_queue_head_t *mali_session_get_wait_queue(void)
+{
+	return &pending_queue;
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in window render frame per sec calculate
+ */
+#if defined(CONFIG_MALI_DVFS)
+u32 mali_session_max_window_num(void)
+{
+	struct mali_session_data *session, *tmp;
+	u32 max_window_num = 0;
+	u32 tmp_number = 0;
+
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		tmp_number = _mali_osk_atomic_xchg(
+				     &session->number_of_window_jobs, 0);
+		if (max_window_num < tmp_number) {
+			max_window_num = tmp_number;
+		}
+	}
+
+	mali_session_unlock();
+
+	return max_window_num;
+}
+#endif
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx)
+{
+	struct mali_session_data *session, *tmp;
+	char task_comm[TASK_COMM_LEN];
+	char task_default[] = "not found";
+	struct task_struct *ttask;
+	u32 mali_mem_usage;
+	u32 total_mali_mem_size;
+#ifdef MALI_MEM_SWAP_TRACKING
+	u32 swap_pool_size;
+	u32 swap_unlock_size;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(print_ctx);
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		ttask = pid_task(find_vpid(session->pid), PIDTYPE_PID);
+		if (ttask)
+			strncpy(task_comm, ttask->comm, sizeof(ttask->comm));
+		else
+			strncpy(task_comm, task_default, sizeof(task_default));
+#ifdef MALI_MEM_SWAP_TRACKING
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u %-25s %-10u  %-15u  %-15u  %-10u  %-10u %-10u\n",
+				    session->comm, session->pid, task_comm,
+				    (unsigned int)((atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)session->max_mali_mem_allocated_size,
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_SWAP])) * _MALI_OSK_MALI_PAGE_SIZE)
+				   );
+#else
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u %-25s %-10u  %-15u  %-15u  %-10u  %-10u\n",
+				    session->comm, session->pid, task_comm,
+				    (unsigned int)((atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)session->max_mali_mem_allocated_size,
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE)
+				   );
+
+#endif
+	}
+	mali_session_unlock();
+	mali_mem_usage  = _mali_ukk_report_memory_usage();
+	total_mali_mem_size = _mali_ukk_report_total_memory_size();
+	_mali_osk_ctxprintf(print_ctx, "Mali mem usage: %u\nMali mem limit: %u\n", mali_mem_usage, total_mali_mem_size);
+#ifdef MALI_MEM_SWAP_TRACKING
+	mali_mem_swap_tracking(&swap_pool_size, &swap_unlock_size);
+	_mali_osk_ctxprintf(print_ctx, "Mali swap mem pool : %u\nMali swap mem unlock: %u\n", swap_pool_size, swap_unlock_size);
+#endif
+}
diff --git a/utgard/r6p2/common/mali_session.h b/utgard/r6p2/common/mali_session.h
new file mode 100755
index 0000000..9184ff2
--- /dev/null
+++ b/utgard/r6p2/common/mali_session.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SESSION_H__
+#define __MALI_SESSION_H__
+
+#include "mali_mmu_page_directory.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_memory_types.h"
+#include "mali_memory_manager.h"
+
+struct mali_timeline_system;
+struct mali_soft_system;
+
+/* Number of frame builder job lists per session. */
+#define MALI_PP_JOB_FB_LOOKUP_LIST_SIZE 16
+#define MALI_PP_JOB_FB_LOOKUP_LIST_MASK (MALI_PP_JOB_FB_LOOKUP_LIST_SIZE - 1)
+/*Max pending big job allowed in kernel*/
+#define MALI_MAX_PENDING_BIG_JOB (2)
+
+struct mali_session_data {
+	_mali_osk_notification_queue_t *ioctl_queue;
+
+	_mali_osk_mutex_t *memory_lock; /**< Lock protecting the vm manipulation */
+	_mali_osk_mutex_t *cow_lock; /** < Lock protecting the cow memory free manipulation */
+#if 0
+	_mali_osk_list_t memory_head; /**< Track all the memory allocated in this session, for freeing on abnormal termination */
+#endif
+	struct mali_page_directory *page_directory; /**< MMU page directory for this session */
+
+	_MALI_OSK_LIST_HEAD(link); /**< Link for list of all sessions */
+	_MALI_OSK_LIST_HEAD(pp_job_list); /**< List of all PP jobs on this session */
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_t number_of_window_jobs; /**< Record the window jobs completed on this session in a period */
+#endif
+
+	_mali_osk_list_t pp_job_fb_lookup_list[MALI_PP_JOB_FB_LOOKUP_LIST_SIZE]; /**< List of PP job lists per frame builder id.  Used to link jobs from same frame builder. */
+
+	struct mali_soft_job_system *soft_job_system; /**< Soft job system for this session. */
+	struct mali_timeline_system *timeline_system; /**< Timeline system for this session. */
+
+	mali_bool is_aborting; /**< MALI_TRUE if the session is aborting, MALI_FALSE if not. */
+	mali_bool use_high_priority_job_queue; /**< If MALI_TRUE, jobs added from this session will use the high priority job queues. */
+	u32 pid;
+	char *comm;
+	atomic_t mali_mem_array[MALI_MEM_TYPE_MAX]; /**< The array to record mem types' usage for this session. */
+	atomic_t mali_mem_allocated_pages; /** The current allocated mali memory pages, which include mali os memory and mali dedicated memory.*/
+	size_t max_mali_mem_allocated_size; /**< The past max mali memory allocated size, which include mali os memory and mali dedicated memory. */
+	/* Added for new memroy system */
+	struct mali_allocation_manager allocation_mgr;
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	u32 fence_context;      /** <  The execution dma fence context this fence is run on. */
+	_mali_osk_atomic_t fence_seqno; /** < Alinear increasing sequence number for this dma fence context. */
+#endif
+};
+
+_mali_osk_errcode_t mali_session_initialize(void);
+void mali_session_terminate(void);
+
+/* List of all sessions. Actual list head in mali_kernel_core.c */
+extern _mali_osk_list_t mali_sessions;
+/* Lock to protect modification and access to the mali_sessions list */
+extern _mali_osk_spinlock_irq_t *mali_sessions_lock;
+
+MALI_STATIC_INLINE void mali_session_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_sessions_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(mali_sessions_lock);
+}
+
+void mali_session_add(struct mali_session_data *session);
+void mali_session_remove(struct mali_session_data *session);
+u32 mali_session_get_count(void);
+wait_queue_head_t *mali_session_get_wait_queue(void);
+
+#define MALI_SESSION_FOREACH(session, tmp, link) \
+	_MALI_OSK_LIST_FOREACHENTRY(session, tmp, &mali_sessions, struct mali_session_data, link)
+
+MALI_STATIC_INLINE struct mali_page_directory *mali_session_get_page_directory(struct mali_session_data *session)
+{
+	return session->page_directory;
+}
+
+MALI_STATIC_INLINE void mali_session_memory_lock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_wait(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_memory_unlock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_signal(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_send_notification(struct mali_session_data *session, _mali_osk_notification_t *object)
+{
+	_mali_osk_notification_queue_send(session->ioctl_queue, object);
+}
+
+#if defined(CONFIG_MALI_DVFS)
+
+MALI_STATIC_INLINE void mali_session_inc_num_window_jobs(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_atomic_inc(&session->number_of_window_jobs);
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in  window render frame per sec calculate
+ */
+u32 mali_session_max_window_num(void);
+
+#endif
+
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx);
+
+#endif /* __MALI_SESSION_H__ */
diff --git a/utgard/r6p2/common/mali_soft_job.c b/utgard/r6p2/common/mali_soft_job.c
new file mode 100755
index 0000000..c76d1fe
--- /dev/null
+++ b/utgard/r6p2/common/mali_soft_job.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_soft_job.h"
+#include "mali_osk.h"
+#include "mali_timeline.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+
+MALI_STATIC_INLINE void mali_soft_job_system_lock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	_mali_osk_spinlock_irq_lock(system->lock);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: soft system %p lock taken\n", system));
+	MALI_DEBUG_ASSERT(0 == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = _mali_osk_get_tid());
+}
+
+MALI_STATIC_INLINE void mali_soft_job_system_unlock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: releasing soft system %p lock\n", system));
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = 0);
+	_mali_osk_spinlock_irq_unlock(system->lock);
+}
+
+#if defined(DEBUG)
+MALI_STATIC_INLINE void mali_soft_job_system_assert_locked(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+}
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system) mali_soft_job_system_assert_locked(system)
+#else
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system)
+#endif /* defined(DEBUG) */
+
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session)
+{
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	system = (struct mali_soft_job_system *) _mali_osk_calloc(1, sizeof(struct mali_soft_job_system));
+	if (NULL == system) {
+		return NULL;
+	}
+
+	system->session = session;
+
+	system->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (NULL == system->lock) {
+		mali_soft_job_system_destroy(system);
+		return NULL;
+	}
+	system->lock_owner = 0;
+	system->last_job_id = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&(system->jobs_used));
+
+	return system;
+}
+
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	/* All jobs should be free at this point. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&(system->jobs_used)));
+
+	if (NULL != system) {
+		if (NULL != system->lock) {
+			_mali_osk_spinlock_irq_term(system->lock);
+		}
+		_mali_osk_free(system);
+	}
+}
+
+static void mali_soft_job_system_free_job(struct mali_soft_job_system *system, struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id);
+	MALI_DEBUG_ASSERT(system == job->system);
+
+	_mali_osk_list_del(&(job->system_list));
+
+	mali_soft_job_system_unlock(job->system);
+
+	_mali_osk_free(job);
+}
+
+MALI_STATIC_INLINE struct mali_soft_job *mali_soft_job_system_lookup_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job, *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		if (job->id == job_id)
+			return job;
+	}
+
+	return NULL;
+}
+
+void mali_soft_job_destroy(struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: destroying soft job %u (0x%08X)\n", job->id, job));
+
+	if (NULL != job) {
+		if (0 < _mali_osk_atomic_dec_return(&job->refcount)) return;
+
+		_mali_osk_atomic_term(&job->refcount);
+
+		if (NULL != job->activated_notification) {
+			_mali_osk_notification_delete(job->activated_notification);
+			job->activated_notification = NULL;
+		}
+
+		mali_soft_job_system_free_job(job->system, job);
+	}
+}
+
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job)
+{
+	struct mali_soft_job *job;
+	_mali_osk_notification_t *notification = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT((MALI_SOFT_JOB_TYPE_USER_SIGNALED == type) ||
+			  (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == type));
+
+	notification = _mali_osk_notification_create(_MALI_NOTIFICATION_SOFT_ACTIVATED, sizeof(_mali_uk_soft_job_activated_s));
+	if (unlikely(NULL == notification)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to allocate notification"));
+		return NULL;
+	}
+
+	job = _mali_osk_malloc(sizeof(struct mali_soft_job));
+	if (unlikely(NULL == job)) {
+		MALI_DEBUG_PRINT(2, ("Mali Soft Job: system alloc job failed. \n"));
+		return NULL;
+	}
+
+	mali_soft_job_system_lock(system);
+
+	job->system = system;
+	job->id = system->last_job_id++;
+	job->state = MALI_SOFT_JOB_STATE_ALLOCATED;
+
+	_mali_osk_list_add(&(job->system_list), &(system->jobs_used));
+
+	job->type = type;
+	job->user_job = user_job;
+	job->activated = MALI_FALSE;
+
+	job->activated_notification = notification;
+
+	_mali_osk_atomic_init(&job->refcount, 1);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state);
+	MALI_DEBUG_ASSERT(system == job->system);
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id);
+
+	mali_soft_job_system_unlock(system);
+
+	return job;
+}
+
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence)
+{
+	mali_timeline_point point;
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	system = job->system;
+
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(system->session->timeline_system);
+
+	mali_soft_job_system_lock(system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state);
+	job->state = MALI_SOFT_JOB_STATE_STARTED;
+
+	mali_soft_job_system_unlock(system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: starting soft job %u (0x%08X)\n", job->id, job));
+
+	mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_SOFT, fence, job);
+	point = mali_timeline_system_add_tracker(system->session->timeline_system, &job->tracker, MALI_TIMELINE_SOFT);
+
+	return point;
+}
+
+static mali_bool mali_soft_job_is_activated(void *data)
+{
+	struct mali_soft_job *job;
+
+	job = (struct mali_soft_job *) data;
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	return job->activated;
+}
+
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job;
+	struct mali_timeline_system *timeline_system;
+	mali_scheduler_mask schedule_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(system);
+
+	job = mali_soft_job_system_lookup_job(system, job_id);
+
+	if ((NULL == job) || (MALI_SOFT_JOB_TYPE_USER_SIGNALED != job->type)
+	    || !(MALI_SOFT_JOB_STATE_STARTED == job->state || MALI_SOFT_JOB_STATE_TIMED_OUT == job->state)) {
+		mali_soft_job_system_unlock(system);
+		MALI_PRINT_ERROR(("Mali Soft Job: invalid soft job id %u", job_id));
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) {
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(system);
+
+		MALI_DEBUG_ASSERT(MALI_TRUE == job->activated);
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: soft job %u (0x%08X) was timed out\n", job->id, job));
+		mali_soft_job_destroy(job);
+
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+	job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+	mali_soft_job_system_unlock(system);
+
+	/* Since the job now is in signaled state, timeouts from the timeline system will be
+	 * ignored, and it is not possible to signal this job again. */
+
+	timeline_system = system->session->timeline_system;
+	MALI_DEBUG_ASSERT_POINTER(timeline_system);
+
+	/* Wait until activated. */
+	_mali_osk_wait_queue_wait_event(timeline_system->wait_queue, mali_soft_job_is_activated, (void *) job);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: signaling soft job %u (0x%08X)\n", job->id, job));
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+
+	mali_soft_job_destroy(job);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_soft_job_send_activated_notification(struct mali_soft_job *job)
+{
+	if (NULL != job->activated_notification) {
+		_mali_uk_soft_job_activated_s *res = job->activated_notification->result_buffer;
+		res->user_job = job->user_job;
+		mali_session_send_notification(job->system->session, job->activated_notification);
+	}
+	job->activated_notification = NULL;
+}
+
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline activation for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		MALI_DEBUG_PRINT(3, ("Mali Soft Job: Soft job %u (0x%08X) activated while session is aborting.\n", job->id, job));
+
+		mali_soft_job_system_unlock(job->system);
+
+		/* Since we are in shutdown, we can ignore the scheduling bitmask. */
+		mali_timeline_tracker_release(&job->tracker);
+		mali_soft_job_destroy(job);
+		return schedule_mask;
+	}
+
+	/* Send activated notification. */
+	mali_soft_job_send_activated_notification(job);
+
+	/* Wake up sleeping signaler. */
+	job->activated = MALI_TRUE;
+
+	/* If job type is self signaled, release tracker, move soft job to free list, and scheduler at once */
+	if (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == job->type) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(job->system);
+
+		schedule_mask |= mali_timeline_tracker_release(&job->tracker);
+
+		mali_soft_job_destroy(job);
+	} else {
+		_mali_osk_wait_queue_wake_up(job->tracker.system->wait_queue);
+
+		mali_soft_job_system_unlock(job->system);
+	}
+
+	return schedule_mask;
+}
+
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+	MALI_DEBUG_ASSERT(MALI_TRUE == job->activated);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline timeout for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED  == job->state ||
+			  MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		/* The session is aborting.  This job will be released and destroyed by @ref
+		 * mali_soft_job_system_abort(). */
+		mali_soft_job_system_unlock(job->system);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	if (MALI_SOFT_JOB_STATE_STARTED != job->state) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+		/* The job is about to be signaled, ignore timeout. */
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeout on soft job %u (0x%08X) in signaled state.\n", job->id, job));
+		mali_soft_job_system_unlock(job->system);
+		return schedule_mask;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+	job->state = MALI_SOFT_JOB_STATE_TIMED_OUT;
+	_mali_osk_atomic_inc(&job->refcount);
+
+	mali_soft_job_system_unlock(job->system);
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+
+	mali_soft_job_destroy(job);
+
+	return schedule_mask;
+}
+
+void mali_soft_job_system_abort(struct mali_soft_job_system *system)
+{
+	struct mali_soft_job *job, *tmp;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(jobs);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting soft job system for session 0x%08X.\n", system->session));
+
+	mali_soft_job_system_lock(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED   == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (MALI_SOFT_JOB_STATE_STARTED == job->state) {
+			/* If the job has been activated, we have to release the tracker and destroy
+			 * the job.  If not, the tracker will be released and the job destroyed when
+			 * it is activated. */
+			if (MALI_TRUE == job->activated) {
+				MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting unsignaled soft job %u (0x%08X).\n", job->id, job));
+
+				job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+				_mali_osk_list_move(&job->system_list, &jobs);
+			}
+		} else if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) {
+			MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting timed out soft job %u (0x%08X).\n", job->id, job));
+
+			/* We need to destroy this soft job. */
+			_mali_osk_list_move(&job->system_list, &jobs);
+		}
+	}
+
+	mali_soft_job_system_unlock(system);
+
+	/* Release and destroy jobs. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &jobs, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED  == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (MALI_SOFT_JOB_STATE_SIGNALED == job->state) {
+			mali_timeline_tracker_release(&job->tracker);
+		}
+
+		/* Move job back to used list before destroying. */
+		_mali_osk_list_move(&job->system_list, &system->jobs_used);
+
+		mali_soft_job_destroy(job);
+	}
+}
diff --git a/utgard/r6p2/common/mali_soft_job.h b/utgard/r6p2/common/mali_soft_job.h
new file mode 100755
index 0000000..4dd0589
--- /dev/null
+++ b/utgard/r6p2/common/mali_soft_job.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SOFT_JOB_H__
+#define __MALI_SOFT_JOB_H__
+
+#include "mali_osk.h"
+
+#include "mali_timeline.h"
+
+struct mali_timeline_fence;
+struct mali_session_data;
+struct mali_soft_job;
+struct mali_soft_job_system;
+
+/**
+ * Soft job types.
+ *
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED will only complete after activation if either
+ * they are signaled by user-space (@ref mali_soft_job_system_signaled_job) or if they are timed out
+ * by the Timeline system.
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_SELF_SIGNALED will release job resource automatically
+ * in kernel when the job is activated.
+ */
+typedef enum mali_soft_job_type {
+	MALI_SOFT_JOB_TYPE_SELF_SIGNALED,
+	MALI_SOFT_JOB_TYPE_USER_SIGNALED,
+} mali_soft_job_type;
+
+/**
+ * Soft job state.
+ *
+ * mali_soft_job_system_start_job a job will first be allocated.The job's state set to MALI_SOFT_JOB_STATE_ALLOCATED.
+ * Once the job is added to the timeline system, the state changes to MALI_SOFT_JOB_STATE_STARTED.
+ *
+ * For soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED the state is changed to
+ * MALI_SOFT_JOB_STATE_SIGNALED when @ref mali_soft_job_system_signal_job is called and the soft
+ * job's state is MALI_SOFT_JOB_STATE_STARTED or MALI_SOFT_JOB_STATE_TIMED_OUT.
+ *
+ * If a soft job of type MALI_SOFT_JOB_TYPE_USER_SIGNALED is timed out before being signaled, the
+ * state is changed to MALI_SOFT_JOB_STATE_TIMED_OUT.  This can only happen to soft jobs in state
+ * MALI_SOFT_JOB_STATE_STARTED.
+ *
+ */
+typedef enum mali_soft_job_state {
+	MALI_SOFT_JOB_STATE_ALLOCATED,
+	MALI_SOFT_JOB_STATE_STARTED,
+	MALI_SOFT_JOB_STATE_SIGNALED,
+	MALI_SOFT_JOB_STATE_TIMED_OUT,
+} mali_soft_job_state;
+
+#define MALI_SOFT_JOB_INVALID_ID ((u32) -1)
+
+/**
+ * Soft job struct.
+ *
+ * Soft job can be used to represent any kind of CPU work done in kernel-space.
+ */
+typedef struct mali_soft_job {
+	mali_soft_job_type            type;                   /**< Soft job type.  Must be one of MALI_SOFT_JOB_TYPE_*. */
+	u64                           user_job;               /**< Identifier for soft job in user space. */
+	_mali_osk_atomic_t            refcount;               /**< Soft jobs are reference counted to prevent premature deletion. */
+	struct mali_timeline_tracker  tracker;                /**< Timeline tracker for soft job. */
+	mali_bool                     activated;              /**< MALI_TRUE if the job has been activated, MALI_FALSE if not. */
+	_mali_osk_notification_t     *activated_notification; /**< Pre-allocated notification object for ACTIVATED_NOTIFICATION. */
+
+	/* Protected by soft job system lock. */
+	u32                           id;                     /**< Used by user-space to find corresponding soft job in kernel-space. */
+	mali_soft_job_state           state;                  /**< State of soft job, must be one of MALI_SOFT_JOB_STATE_*. */
+	struct mali_soft_job_system  *system;                 /**< The soft job system this job is in. */
+	_mali_osk_list_t              system_list;            /**< List element used by soft job system. */
+} mali_soft_job;
+
+/**
+ * Per-session soft job system.
+ *
+ * The soft job system is used to manage all soft jobs that belongs to a session.
+ */
+typedef struct mali_soft_job_system {
+	struct mali_session_data *session;                    /**< The session this soft job system belongs to. */
+	_MALI_OSK_LIST_HEAD(jobs_used);                       /**< List of all allocated soft jobs. */
+
+	_mali_osk_spinlock_irq_t *lock;                       /**< Lock used to protect soft job system and its soft jobs. */
+	u32 lock_owner;                                       /**< Contains tid of thread that locked the system or 0, if not locked. */
+	u32 last_job_id;                                      /**< Recored the last job id protected by lock. */
+} mali_soft_job_system;
+
+/**
+ * Create a soft job system.
+ *
+ * @param session The session this soft job system will belong to.
+ * @return The new soft job system, or NULL if unsuccessful.
+ */
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session);
+
+/**
+ * Destroy a soft job system.
+ *
+ * @note The soft job must not have any started or activated jobs.  Call @ref
+ * mali_soft_job_system_abort first.
+ *
+ * @param system The soft job system we are destroying.
+ */
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system);
+
+/**
+ * Create a soft job.
+ *
+ * @param system Soft job system to create soft job from.
+ * @param type Type of the soft job.
+ * @param user_job Identifier for soft job in user space.
+ * @return New soft job if successful, NULL if not.
+ */
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job);
+
+/**
+ * Destroy soft job.
+ *
+ * @param job Soft job to destroy.
+ */
+void mali_soft_job_destroy(struct mali_soft_job *job);
+
+/**
+ * Start a soft job.
+ *
+ * The soft job will be added to the Timeline system which will then activate it after all
+ * dependencies have been resolved.
+ *
+ * Create soft jobs with @ref mali_soft_job_create before starting them.
+ *
+ * @param job Soft job to start.
+ * @param fence Fence representing dependencies for this soft job.
+ * @return Point on soft job timeline.
+ */
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence);
+
+/**
+ * Use by user-space to signal that a soft job has completed.
+ *
+ * @note Only valid for soft jobs with type MALI_SOFT_JOB_TYPE_USER_SIGNALED.
+ *
+ * @note The soft job must be in state MALI_SOFT_JOB_STATE_STARTED for the signal to be successful.
+ *
+ * @note If the soft job was signaled successfully, or it received a time out, the soft job will be
+ * destroyed after this call and should no longer be used.
+ *
+ * @note This function will block until the soft job has been activated.
+ *
+ * @param system The soft job system the job was started in.
+ * @param job_id ID of soft job we are signaling.
+ *
+ * @return _MALI_OSK_ERR_ITEM_NOT_FOUND if the soft job ID was invalid, _MALI_OSK_ERR_TIMEOUT if the
+ * soft job was timed out or _MALI_OSK_ERR_OK if we successfully signaled the soft job.
+ */
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id);
+
+/**
+ * Used by the Timeline system to activate a soft job.
+ *
+ * @param job The soft job that is being activated.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job);
+
+/**
+ * Used by the Timeline system to timeout a soft job.
+ *
+ * A soft job is timed out if it completes or is signaled later than MALI_TIMELINE_TIMEOUT_HZ after
+ * activation.
+ *
+ * @param job The soft job that is being timed out.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job);
+
+/**
+ * Used to cleanup activated soft jobs in the soft job system on session abort.
+ *
+ * @param system The soft job system that is being aborted.
+ */
+void mali_soft_job_system_abort(struct mali_soft_job_system *system);
+
+#endif /* __MALI_SOFT_JOB_H__ */
diff --git a/utgard/r6p2/common/mali_spinlock_reentrant.c b/utgard/r6p2/common/mali_spinlock_reentrant.c
new file mode 100755
index 0000000..28e2e17
--- /dev/null
+++ b/utgard/r6p2/common/mali_spinlock_reentrant.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_spinlock_reentrant.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order)
+{
+	struct mali_spinlock_reentrant *spinlock;
+
+	spinlock = _mali_osk_calloc(1, sizeof(struct mali_spinlock_reentrant));
+	if (NULL == spinlock) {
+		return NULL;
+	}
+
+	spinlock->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, lock_order);
+	if (NULL == spinlock->lock) {
+		mali_spinlock_reentrant_term(spinlock);
+		return NULL;
+	}
+
+	return spinlock;
+}
+
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT(0 == spinlock->counter && 0 == spinlock->owner);
+
+	if (NULL != spinlock->lock) {
+		_mali_osk_spinlock_irq_term(spinlock->lock);
+	}
+
+	_mali_osk_free(spinlock);
+}
+
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid);
+
+	MALI_DEBUG_PRINT(5, ("%s ^\n", __FUNCTION__));
+
+	if (tid != spinlock->owner) {
+		_mali_osk_spinlock_irq_lock(spinlock->lock);
+		MALI_DEBUG_ASSERT(0 == spinlock->owner && 0 == spinlock->counter);
+		spinlock->owner = tid;
+	}
+
+	MALI_DEBUG_PRINT(5, ("%s v\n", __FUNCTION__));
+
+	++spinlock->counter;
+}
+
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid && tid == spinlock->owner);
+
+	--spinlock->counter;
+	if (0 == spinlock->counter) {
+		spinlock->owner = 0;
+		MALI_DEBUG_PRINT(5, ("%s release last\n", __FUNCTION__));
+		_mali_osk_spinlock_irq_unlock(spinlock->lock);
+	}
+}
diff --git a/utgard/r6p2/common/mali_spinlock_reentrant.h b/utgard/r6p2/common/mali_spinlock_reentrant.h
new file mode 100755
index 0000000..793875a
--- /dev/null
+++ b/utgard/r6p2/common/mali_spinlock_reentrant.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SPINLOCK_REENTRANT_H__
+#define __MALI_SPINLOCK_REENTRANT_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * Reentrant spinlock.
+ */
+struct mali_spinlock_reentrant {
+	_mali_osk_spinlock_irq_t *lock;
+	u32               owner;
+	u32               counter;
+};
+
+/**
+ * Create a new reentrant spinlock.
+ *
+ * @param lock_order Lock order.
+ * @return New reentrant spinlock.
+ */
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order);
+
+/**
+ * Terminate reentrant spinlock and free any associated resources.
+ *
+ * @param spinlock Reentrant spinlock to terminate.
+ */
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock);
+
+/**
+ * Wait for reentrant spinlock to be signaled.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Signal reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Check if thread is holding reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ * @return MALI_TRUE if thread is holding spinlock, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_spinlock_reentrant_is_held(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	return (tid == spinlock->owner && 0 < spinlock->counter);
+}
+
+#endif /* __MALI_SPINLOCK_REENTRANT_H__ */
diff --git a/utgard/r6p2/common/mali_timeline.c b/utgard/r6p2/common/mali_timeline.c
new file mode 100755
index 0000000..2091411
--- /dev/null
+++ b/utgard/r6p2/common/mali_timeline.c
@@ -0,0 +1,1685 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline.h"
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_soft_job.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+
+#define MALI_TIMELINE_SYSTEM_LOCKED(system) (mali_spinlock_reentrant_is_held((system)->spinlock, _mali_osk_get_tid()))
+
+/*
+ * Following three elements are used to record how many
+ * gp, physical pp or virtual pp jobs are delayed in the whole
+ * timeline system, we can use these three value to decide
+ * if need to deactivate idle group.
+ */
+_mali_osk_atomic_t gp_tracker_count;
+_mali_osk_atomic_t phy_pp_tracker_count;
+_mali_osk_atomic_t virt_pp_tracker_count;
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter);
+
+#if defined(CONFIG_SYNC)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+struct mali_deferred_fence_put_entry {
+	struct hlist_node list;
+	struct sync_fence *fence;
+};
+
+static HLIST_HEAD(mali_timeline_sync_fence_to_free_list);
+static DEFINE_SPINLOCK(mali_timeline_sync_fence_to_free_lock);
+
+static void put_sync_fences(struct work_struct *ignore)
+{
+	struct hlist_head list;
+	struct hlist_node *tmp, *pos;
+	unsigned long flags;
+	struct mali_deferred_fence_put_entry *o;
+
+	spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+	hlist_move_list(&mali_timeline_sync_fence_to_free_list, &list);
+	spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+	hlist_for_each_entry_safe(o, pos, tmp, &list, list) {
+		sync_fence_put(o->fence);
+		kfree(o);
+	}
+}
+
+static DECLARE_DELAYED_WORK(delayed_sync_fence_put, put_sync_fences);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+/* Callback that is called when a sync fence a tracker is waiting on is signaled. */
+static void mali_timeline_sync_fence_callback(struct sync_fence *sync_fence, struct sync_fence_waiter *sync_fence_waiter)
+{
+	struct mali_timeline_system  *system;
+	struct mali_timeline_waiter  *waiter;
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool is_aborting = MALI_FALSE;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	int fence_status = sync_fence->status;
+#else
+	int fence_status = atomic_read(&sync_fence->status);
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_waiter);
+
+	tracker = _MALI_OSK_CONTAINER_OF(sync_fence_waiter, struct mali_timeline_tracker, sync_fence_waiter);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	is_aborting = system->session->is_aborting;
+	if (!is_aborting && (0 > fence_status)) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, fence_status));
+		tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+	}
+
+	waiter = tracker->waiter_sync;
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	tracker->sync_fence = NULL;
+	tracker->fence.sync_fd = -1;
+
+	schedule_mask |= mali_timeline_system_release_waiter(system, waiter);
+
+	/* If aborting, wake up sleepers that are waiting for sync fence callbacks to complete. */
+	if (is_aborting) {
+		_mali_osk_wait_queue_wake_up(system->wait_queue);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/*
+	 * Older versions of Linux, before 3.5, doesn't support fput() in interrupt
+	 * context. For those older kernels, allocate a list object and put the
+	 * fence object on that and defer the call to sync_fence_put() to a workqueue.
+	 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+	{
+		struct mali_deferred_fence_put_entry *obj;
+
+		obj = kzalloc(sizeof(struct mali_deferred_fence_put_entry), GFP_ATOMIC);
+		if (obj) {
+			unsigned long flags;
+			mali_bool schedule = MALI_FALSE;
+
+			obj->fence = sync_fence;
+
+			spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+			if (hlist_empty(&mali_timeline_sync_fence_to_free_list))
+				schedule = MALI_TRUE;
+			hlist_add_head(&obj->list, &mali_timeline_sync_fence_to_free_list);
+			spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+			if (schedule)
+				schedule_delayed_work(&delayed_sync_fence_put, 0);
+		}
+	}
+#else
+	sync_fence_put(sync_fence);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+	if (!is_aborting) {
+		mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE);
+	}
+}
+#endif /* defined(CONFIG_SYNC) */
+
+static mali_scheduler_mask mali_timeline_tracker_time_out(struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_SOFT == tracker->type);
+
+	return mali_soft_job_system_timeout_job((struct mali_soft_job *) tracker->job);
+}
+
+static void mali_timeline_timer_callback(void *data)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker;
+	struct mali_timeline *timeline;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	timeline = (struct mali_timeline *) data;
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	if (!system->timer_enabled) {
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		return;
+	}
+
+	tracker = timeline->tracker_tail;
+	timeline->timer_active = MALI_FALSE;
+
+	if (NULL != tracker && MALI_TRUE == tracker->timer_active) {
+		/* This is likely the delayed work that has been schedule out before cancelled. */
+		if (MALI_TIMELINE_TIMEOUT_HZ > (_mali_osk_time_tickcount() - tracker->os_tick_activate)) {
+			mali_spinlock_reentrant_signal(system->spinlock, tid);
+			return;
+		}
+
+		schedule_mask = mali_timeline_tracker_time_out(tracker);
+		tracker->timer_active = MALI_FALSE;
+	} else {
+		MALI_PRINT_ERROR(("Mali Timeline: Soft job timer callback without a waiting tracker.\n"));
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+	system->timer_enabled = MALI_FALSE;
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (NULL != timeline->delayed_work) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			timeline->timer_active = MALI_FALSE;
+		}
+	}
+}
+
+static void mali_timeline_destroy(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	if (NULL != timeline) {
+		/* Assert that the timeline object has been properly cleaned up before destroying it. */
+		MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_head);
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+		MALI_DEBUG_ASSERT(NULL == timeline->waiter_head);
+		MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail);
+		MALI_DEBUG_ASSERT(NULL != timeline->system);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_MAX > timeline->id);
+
+		if (NULL != timeline->delayed_work) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			_mali_osk_wq_delayed_delete_work_nonflush(timeline->delayed_work);
+		}
+
+#if defined(CONFIG_SYNC)
+		if (NULL != timeline->sync_tl) {
+			sync_timeline_destroy(timeline->sync_tl);
+		}
+#endif /* defined(CONFIG_SYNC) */
+
+#ifndef CONFIG_SYNC
+		_mali_osk_free(timeline);
+#endif
+	}
+}
+
+static struct mali_timeline *mali_timeline_create(struct mali_timeline_system *system, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(id < MALI_TIMELINE_MAX);
+
+	timeline = (struct mali_timeline *) _mali_osk_calloc(1, sizeof(struct mali_timeline));
+	if (NULL == timeline) {
+		return NULL;
+	}
+
+	/* Initially the timeline is empty. */
+#if defined(MALI_TIMELINE_DEBUG_START_POINT)
+	/* Start the timeline a bit before wrapping when debugging. */
+	timeline->point_next = UINT_MAX - MALI_TIMELINE_MAX_POINT_SPAN - 128;
+#else
+	timeline->point_next = 1;
+#endif
+	timeline->point_oldest = timeline->point_next;
+
+	/* The tracker and waiter lists will initially be empty. */
+
+	timeline->system = system;
+	timeline->id = id;
+
+	timeline->delayed_work = _mali_osk_wq_delayed_create_work(mali_timeline_timer_callback, timeline);
+	if (NULL == timeline->delayed_work) {
+		mali_timeline_destroy(timeline);
+		return NULL;
+	}
+
+	timeline->timer_active = MALI_FALSE;
+
+#if defined(CONFIG_SYNC)
+	{
+		char timeline_name[32];
+
+		switch (id) {
+		case MALI_TIMELINE_GP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-gp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_PP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-pp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_SOFT:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-soft", _mali_osk_get_pid());
+			break;
+		default:
+			MALI_PRINT_ERROR(("Mali Timeline: Invalid timeline id %d\n", id));
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->destroyed = MALI_FALSE;
+
+		timeline->sync_tl = mali_sync_timeline_create(timeline, timeline_name);
+		if (NULL == timeline->sync_tl) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+		if (NULL == timeline->spinlock) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	return timeline;
+}
+
+static void mali_timeline_insert_tracker(struct mali_timeline *timeline, struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	if (mali_timeline_is_full(timeline)) {
+		/* Don't add tracker if timeline is full. */
+		tracker->point = MALI_TIMELINE_NO_POINT;
+		return;
+	}
+
+	tracker->timeline = timeline;
+	tracker->point    = timeline->point_next;
+
+	/* Find next available point. */
+	timeline->point_next++;
+	if (MALI_TIMELINE_NO_POINT == timeline->point_next) {
+		timeline->point_next++;
+	}
+
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+
+	if (MALI_TIMELINE_TRACKER_GP == tracker->type) {
+		_mali_osk_atomic_inc(&gp_tracker_count);
+	} else if (MALI_TIMELINE_TRACKER_PP == tracker->type) {
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_inc(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_inc(&phy_pp_tracker_count);
+		}
+	}
+
+	/* Add tracker as new head on timeline's tracker list. */
+	if (NULL == timeline->tracker_head) {
+		/* Tracker list is empty. */
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+
+		timeline->tracker_tail = tracker;
+
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_next);
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_prev);
+	} else {
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next);
+
+		tracker->timeline_prev = timeline->tracker_head;
+		timeline->tracker_head->timeline_next = tracker;
+
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_next);
+	}
+	timeline->tracker_head = tracker;
+
+	MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next);
+	MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail->timeline_prev);
+}
+
+/* Inserting the waiter object into the given timeline */
+static void mali_timeline_insert_waiter(struct mali_timeline *timeline, struct mali_timeline_waiter *waiter_new)
+{
+	struct mali_timeline_waiter *waiter_prev;
+	struct mali_timeline_waiter *waiter_next;
+
+	/* Waiter time must be between timeline head and tail, and there must
+	 * be less than MALI_TIMELINE_MAX_POINT_SPAN elements between */
+	MALI_DEBUG_ASSERT((waiter_new->point - timeline->point_oldest) < MALI_TIMELINE_MAX_POINT_SPAN);
+	MALI_DEBUG_ASSERT((-waiter_new->point + timeline->point_next) < MALI_TIMELINE_MAX_POINT_SPAN);
+
+	/* Finding out where to put this waiter, in the linked waiter list of the given timeline **/
+	waiter_prev = timeline->waiter_head; /* Insert new after  waiter_prev */
+	waiter_next = NULL;                  /* Insert new before waiter_next */
+
+	/* Iterating backwards from head (newest) to tail (oldest) until we
+	 * find the correct spot to insert the new waiter */
+	while (waiter_prev && mali_timeline_point_after(waiter_prev->point, waiter_new->point)) {
+		waiter_next = waiter_prev;
+		waiter_prev = waiter_prev->timeline_prev;
+	}
+
+	if (NULL == waiter_prev && NULL == waiter_next) {
+		/* list is empty */
+		timeline->waiter_head = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else if (NULL == waiter_next) {
+		/* insert at head */
+		waiter_new->timeline_prev = timeline->waiter_head;
+		timeline->waiter_head->timeline_next = waiter_new;
+		timeline->waiter_head = waiter_new;
+	} else if (NULL == waiter_prev) {
+		/* insert at tail */
+		waiter_new->timeline_next = timeline->waiter_tail;
+		timeline->waiter_tail->timeline_prev = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else {
+		/* insert between */
+		waiter_new->timeline_next = waiter_next;
+		waiter_new->timeline_prev = waiter_prev;
+		waiter_next->timeline_prev = waiter_new;
+		waiter_prev->timeline_next = waiter_new;
+	}
+}
+
+static void mali_timeline_update_delayed_work(struct mali_timeline *timeline)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *oldest_tracker;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SOFT == timeline->id);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Timer is disabled, early out. */
+	if (!system->timer_enabled) return;
+
+	oldest_tracker = timeline->tracker_tail;
+	if (NULL != oldest_tracker && 0 == oldest_tracker->trigger_ref_count) {
+		if (MALI_FALSE == oldest_tracker->timer_active) {
+			if (MALI_TRUE == timeline->timer_active) {
+				_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+			}
+			_mali_osk_wq_delayed_schedule_work(timeline->delayed_work, MALI_TIMELINE_TIMEOUT_HZ);
+			oldest_tracker->timer_active = MALI_TRUE;
+			timeline->timer_active = MALI_TRUE;
+		}
+	} else if (MALI_TRUE == timeline->timer_active) {
+		_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+		timeline->timer_active = MALI_FALSE;
+	}
+}
+
+static mali_scheduler_mask mali_timeline_update_oldest_point(struct mali_timeline *timeline)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	MALI_DEBUG_CODE({
+		struct mali_timeline_system *system = timeline->system;
+		MALI_DEBUG_ASSERT_POINTER(system);
+
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	});
+
+	if (NULL != timeline->tracker_tail) {
+		/* Set oldest point to oldest tracker's point */
+		timeline->point_oldest = timeline->tracker_tail->point;
+	} else {
+		/* No trackers, mark point list as empty */
+		timeline->point_oldest = timeline->point_next;
+	}
+
+	/* Release all waiters no longer on the timeline's point list.
+	 * Releasing a waiter can trigger this function to be called again, so
+	 * we do not store any pointers on stack. */
+	while (NULL != timeline->waiter_tail) {
+		u32 waiter_time_relative;
+		u32 time_head_relative;
+		struct mali_timeline_waiter *waiter = timeline->waiter_tail;
+
+		time_head_relative = timeline->point_next - timeline->point_oldest;
+		waiter_time_relative = waiter->point - timeline->point_oldest;
+
+		if (waiter_time_relative < time_head_relative) {
+			/* This and all following waiters are on the point list, so we are done. */
+			break;
+		}
+
+		/* Remove waiter from timeline's waiter list. */
+		if (NULL != waiter->timeline_next) {
+			waiter->timeline_next->timeline_prev = NULL;
+		} else {
+			/* This was the last waiter */
+			timeline->waiter_head = NULL;
+		}
+		timeline->waiter_tail = waiter->timeline_next;
+
+		/* Release waiter.  This could activate a tracker, if this was
+		 * the last waiter for the tracker. */
+		schedule_mask |= mali_timeline_system_release_waiter(timeline->system, waiter);
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > type);
+
+	/* Zero out all tracker members. */
+	_mali_osk_memset(tracker, 0, sizeof(*tracker));
+
+	tracker->type = type;
+	tracker->job = job;
+	tracker->trigger_ref_count = 1;  /* Prevents any callback from trigging while adding it */
+	tracker->os_tick_create = _mali_osk_time_tickcount();
+	MALI_DEBUG_CODE(tracker->magic = MALI_TIMELINE_TRACKER_MAGIC);
+
+	tracker->activation_error = MALI_TIMELINE_ACTIVATION_ERROR_NONE;
+
+	/* Copy fence. */
+	if (NULL != fence) {
+		_mali_osk_memcpy(&tracker->fence, fence, sizeof(struct mali_timeline_fence));
+	}
+}
+
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker_next, *tracker_prev;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	/* Upon entry a group lock will be held, but not a scheduler lock. */
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	/* Tracker should have been triggered */
+	MALI_DEBUG_ASSERT(0 == tracker->trigger_ref_count);
+
+	/* All waiters should have been released at this point */
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_head);
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: releasing tracker for job 0x%08X\n", tracker->job));
+
+	timeline = tracker->timeline;
+	if (NULL == timeline) {
+		/* Tracker was not on a timeline, there is nothing to release. */
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Tracker should still be on timeline */
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, tracker->point));
+
+	/* Tracker is no longer valid. */
+	MALI_DEBUG_CODE(tracker->magic = 0);
+
+	tracker_next = tracker->timeline_next;
+	tracker_prev = tracker->timeline_prev;
+	tracker->timeline_next = NULL;
+	tracker->timeline_prev = NULL;
+
+	/* Removing tracker from timeline's tracker list */
+	if (NULL == tracker_next) {
+		/* This tracker was the head */
+		timeline->tracker_head = tracker_prev;
+	} else {
+		tracker_next->timeline_prev = tracker_prev;
+	}
+
+	if (NULL == tracker_prev) {
+		/* This tracker was the tail */
+		timeline->tracker_tail = tracker_next;
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+		/* Update the timeline's oldest time and release any waiters */
+		schedule_mask |= mali_timeline_update_oldest_point(timeline);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	} else {
+		tracker_prev->timeline_next = tracker_next;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Update delayed work only when it is the soft job timeline */
+	if (MALI_TIMELINE_SOFT == tracker->timeline->id) {
+		mali_timeline_update_delayed_work(tracker->timeline);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_release_waiter_list(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *tail,
+		struct mali_timeline_waiter *head)
+{
+	struct mali_timeline_waiter    *waiter = NULL;
+	struct mali_timeline_waiter    *next = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(head);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	head->tracker_next = system->waiter_empty_list;
+	system->waiter_empty_list = tail;
+
+	waiter = system->waiter_empty_list;
+	while (NULL != waiter) {
+		next = waiter->tracker_next;
+		_mali_osk_free(waiter);
+		waiter = next;
+	}
+	system->waiter_empty_list = NULL;
+}
+
+static mali_scheduler_mask mali_timeline_tracker_activate(struct mali_timeline_tracker *tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	struct mali_timeline_system *system;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker->os_tick_activate = _mali_osk_time_tickcount();
+
+	if (NULL != tracker->waiter_head) {
+		mali_timeline_system_release_waiter_list(system, tracker->waiter_tail, tracker->waiter_head);
+		tracker->waiter_head = NULL;
+		tracker->waiter_tail = NULL;
+	}
+
+	switch (tracker->type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		schedule_mask = mali_scheduler_activate_gp_job((struct mali_gp_job *) tracker->job);
+
+		_mali_osk_atomic_dec(&gp_tracker_count);
+		break;
+	case MALI_TIMELINE_TRACKER_PP:
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_dec(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_dec(&phy_pp_tracker_count);
+		}
+		schedule_mask = mali_scheduler_activate_pp_job((struct mali_pp_job *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SOFT:
+		timeline = tracker->timeline;
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		schedule_mask |= mali_soft_job_system_activate_job((struct mali_soft_job *) tracker->job);
+
+		/* Start a soft timer to make sure the soft job be released in a limited time */
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		mali_timeline_update_delayed_work(timeline);
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		break;
+	case MALI_TIMELINE_TRACKER_WAIT:
+		mali_timeline_fence_wait_activate((struct mali_timeline_fence_wait_tracker *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SYNC:
+#if defined(CONFIG_SYNC)
+		mali_timeline_sync_fence_activate((struct mali_timeline_sync_fence_tracker *) tracker->job);
+#else
+		MALI_PRINT_ERROR(("Mali Timeline: sync tracker not supported\n", tracker->type));
+#endif /* defined(CONFIG_SYNC) */
+		break;
+	default:
+		MALI_PRINT_ERROR(("Mali Timeline - Illegal tracker type: %d\n", tracker->type));
+		break;
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker)
+{
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->trigger_ref_count++;
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error)
+{
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->trigger_ref_count--;
+
+	tracker->activation_error |= activation_error;
+
+	if (0 == tracker->trigger_ref_count) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(uk_fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		fence->points[i] = uk_fence->points[i];
+	}
+
+	fence->sync_fd = uk_fence->sync_fd;
+}
+
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session)
+{
+	u32 i;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: creating timeline system\n"));
+
+	system = (struct mali_timeline_system *) _mali_osk_calloc(1, sizeof(struct mali_timeline_system));
+	if (NULL == system) {
+		return NULL;
+	}
+
+	system->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+	if (NULL == system->spinlock) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		system->timelines[i] = mali_timeline_create(system, (enum mali_timeline_id)i);
+		if (NULL == system->timelines[i]) {
+			mali_timeline_system_destroy(system);
+			return NULL;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	system->signaled_sync_tl = mali_sync_timeline_create(NULL, "mali-always-signaled");
+	if (NULL == system->signaled_sync_tl) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	system->waiter_empty_list = NULL;
+	system->session = session;
+	system->timer_enabled = MALI_TRUE;
+
+	system->wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == system->wait_queue) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	return system;
+}
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Check if there are any trackers left on timeline.
+ *
+ * Used as a wait queue conditional.
+ *
+ * @param data Timeline.
+ * @return MALI_TRUE if there are no trackers on timeline, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_has_no_trackers(void *data)
+{
+	struct mali_timeline *timeline = (struct mali_timeline *) data;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	return mali_timeline_is_empty(timeline);
+}
+
+/**
+ * Cancel sync fence waiters waited upon by trackers on all timelines.
+ *
+ * Will return after all timelines have no trackers left.
+ *
+ * @param system Timeline system.
+ */
+static void mali_timeline_cancel_sync_fence_waiters(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+	struct mali_timeline_tracker *tracker, *tracker_next;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(tracker_list);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Cancel sync fence waiters. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		tracker_next = timeline->tracker_tail;
+		while (NULL != tracker_next) {
+			tracker = tracker_next;
+			tracker_next = tracker->timeline_next;
+
+			if (NULL == tracker->sync_fence) continue;
+
+			MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling sync fence wait for tracker 0x%08X.\n", tracker));
+
+			/* Cancel sync fence waiter. */
+			if (0 == sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter)) {
+				/* Callback was not called, move tracker to local list. */
+				_mali_osk_list_add(&tracker->sync_fence_cancel_list, &tracker_list);
+			}
+		}
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/* Manually call sync fence callback in order to release waiter and trigger activation of tracker. */
+	_MALI_OSK_LIST_FOREACHENTRY(tracker, tracker_next, &tracker_list, struct mali_timeline_tracker, sync_fence_cancel_list) {
+		mali_timeline_sync_fence_callback(tracker->sync_fence, &tracker->sync_fence_waiter);
+	}
+
+	/* Sleep until all sync fence callbacks are done and all timelines are empty. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline);
+	}
+}
+
+#endif /* defined(CONFIG_SYNC) */
+
+void mali_timeline_system_abort(struct mali_timeline_system *system)
+{
+	MALI_DEBUG_CODE(u32 tid = _mali_osk_get_tid(););
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: Aborting timeline system for session 0x%08X.\n", system->session));
+
+#if defined(CONFIG_SYNC)
+	mali_timeline_cancel_sync_fence_waiters(system);
+#endif /* defined(CONFIG_SYNC) */
+
+	/* Should not be any waiters or trackers left at this point. */
+	MALI_DEBUG_CODE({
+		u32 i;
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i)
+		{
+			struct mali_timeline *timeline = system->timelines[i];
+			MALI_DEBUG_ASSERT_POINTER(timeline);
+			MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+			MALI_DEBUG_ASSERT(NULL == timeline->tracker_head);
+			MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+			MALI_DEBUG_ASSERT(NULL == timeline->waiter_head);
+			MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail);
+		}
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+	});
+}
+
+void mali_timeline_system_destroy(struct mali_timeline_system *system)
+{
+	u32 i;
+	struct mali_timeline_waiter *waiter, *next;
+#if defined(CONFIG_SYNC)
+	u32 tid = _mali_osk_get_tid();
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: destroying timeline system\n"));
+
+	if (NULL != system) {
+
+		/* There should be no waiters left on this queue. */
+		if (NULL != system->wait_queue) {
+			_mali_osk_wait_queue_term(system->wait_queue);
+			system->wait_queue = NULL;
+		}
+
+		/* Free all waiters in empty list */
+		waiter = system->waiter_empty_list;
+		while (NULL != waiter) {
+			next = waiter->tracker_next;
+			_mali_osk_free(waiter);
+			waiter = next;
+		}
+
+#if defined(CONFIG_SYNC)
+		if (NULL != system->signaled_sync_tl) {
+			sync_timeline_destroy(system->signaled_sync_tl);
+		}
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if ((NULL != system->timelines[i]) && (NULL != system->timelines[i]->spinlock)) {
+				mali_spinlock_reentrant_wait(system->timelines[i]->spinlock, tid);
+				system->timelines[i]->destroyed = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->timelines[i]->spinlock, tid);
+			}
+		}
+#endif /* defined(CONFIG_SYNC) */
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if (NULL != system->timelines[i]) {
+				mali_timeline_destroy(system->timelines[i]);
+			}
+		}
+
+		if (NULL != system->spinlock) {
+			mali_spinlock_reentrant_term(system->spinlock);
+		}
+
+		_mali_osk_free(system);
+	}
+}
+
+/**
+ * Find how many waiters are needed for a given fence.
+ *
+ * @param fence The fence to check.
+ * @return Number of waiters needed for fence.
+ */
+static u32 mali_timeline_fence_num_waiters(struct mali_timeline_fence *fence)
+{
+	u32 i, num_waiters = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		if (MALI_TIMELINE_NO_POINT != fence->points[i]) {
+			++num_waiters;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	if (-1 != fence->sync_fd) ++num_waiters;
+#endif /* defined(CONFIG_SYNC) */
+
+	return num_waiters;
+}
+
+static struct mali_timeline_waiter *mali_timeline_system_get_zeroed_waiter(struct mali_timeline_system *system)
+{
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	waiter = system->waiter_empty_list;
+	if (NULL != waiter) {
+		/* Remove waiter from empty list and zero it */
+		system->waiter_empty_list = waiter->tracker_next;
+		_mali_osk_memset(waiter, 0, sizeof(*waiter));
+	}
+
+	/* Return NULL if list was empty. */
+	return waiter;
+}
+
+static void mali_timeline_system_allocate_waiters(struct mali_timeline_system *system,
+		struct mali_timeline_waiter **tail,
+		struct mali_timeline_waiter **head,
+		int max_num_waiters)
+{
+	u32 i, tid = _mali_osk_get_tid();
+	mali_bool do_alloc;
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT_POINTER(head);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	*head = *tail = NULL;
+	do_alloc = MALI_FALSE;
+	i = 0;
+	while (i < max_num_waiters) {
+		if (MALI_FALSE == do_alloc) {
+			waiter = mali_timeline_system_get_zeroed_waiter(system);
+			if (NULL == waiter) {
+				do_alloc = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+				continue;
+			}
+		} else {
+			waiter = _mali_osk_calloc(1, sizeof(struct mali_timeline_waiter));
+			if (NULL == waiter) break;
+		}
+		++i;
+		if (NULL == *tail) {
+			*tail = waiter;
+			*head = waiter;
+		} else {
+			(*head)->tracker_next = waiter;
+			*head = waiter;
+		}
+	}
+	if (MALI_TRUE == do_alloc) {
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+	}
+}
+
+/**
+ * Create waiters for the given tracker. The tracker is activated when all waiters are release.
+ *
+ * @note Tracker can potentially be activated before this function returns.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker we will create waiters for.
+ * @param waiter_tail List of pre-allocated waiters.
+ * @param waiter_head List of pre-allocated waiters.
+ */
+static void mali_timeline_system_create_waiters_and_unlock(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		struct mali_timeline_waiter *waiter_tail,
+		struct mali_timeline_waiter *waiter_head)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *sync_fence = NULL;
+#endif /* defined(CONFIG_SYNC) */
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_head);
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+	MALI_DEBUG_ASSERT(NULL != tracker->job);
+
+	/* Creating waiter object for all the timelines the fence is put on. Inserting this waiter
+	 * into the timelines sorted list of waiters */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		mali_timeline_point point;
+		struct mali_timeline *timeline;
+		struct mali_timeline_waiter *waiter;
+
+		/* Get point on current timeline from tracker's fence. */
+		point = tracker->fence.points[i];
+
+		if (likely(MALI_TIMELINE_NO_POINT == point)) {
+			/* Fence contains no point on this timeline so we don't need a waiter. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n",
+					  point, timeline->point_oldest, timeline->point_next));
+			continue;
+		}
+
+		if (likely(mali_timeline_is_point_released(timeline, point))) {
+			/* Tracker representing the point has been released so we don't need a
+			 * waiter. */
+			continue;
+		}
+
+		/* The point is on timeline. */
+		MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, point));
+
+		/* Get a new zeroed waiter object. */
+		if (likely(NULL != waiter_tail)) {
+			waiter = waiter_tail;
+			waiter_tail = waiter_tail->tracker_next;
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			continue;
+		}
+
+		/* Yanking the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = point;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (NULL == tracker->waiter_head) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Add waiter to timeline. */
+		mali_timeline_insert_waiter(timeline, waiter);
+	}
+#if defined(CONFIG_SYNC)
+	if (-1 != tracker->fence.sync_fd) {
+		int ret;
+		struct mali_timeline_waiter *waiter;
+
+		sync_fence = sync_fence_fdget(tracker->fence.sync_fd);
+		if (unlikely(NULL == sync_fence)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", tracker->fence.sync_fd));
+			goto exit;
+		}
+
+		/* Check if we have a zeroed waiter object available. */
+		if (unlikely(NULL == waiter_tail)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			goto exit;
+		}
+
+		/* Start asynchronous wait that will release waiter when the fence is signaled. */
+		sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback);
+		ret = sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter);
+		if (1 == ret) {
+			/* Fence already signaled, no waiter needed. */
+			tracker->fence.sync_fd = -1;
+			goto exit;
+		} else if (0 != ret) {
+			MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, ret));
+			tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+			goto exit;
+		}
+
+		/* Grab new zeroed waiter object. */
+		waiter = waiter_tail;
+		waiter_tail = waiter_tail->tracker_next;
+
+		/* Increase the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = MALI_TIMELINE_NO_POINT;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (NULL == tracker->waiter_head) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Also store waiter in separate field for easy access by sync callback. */
+		tracker->waiter_sync = waiter;
+
+		/* Store the sync fence in tracker so we can retrieve in abort session, if needed. */
+		tracker->sync_fence = sync_fence;
+
+		sync_fence = NULL;
+	}
+#endif /* defined(CONFIG_SYNC)*/
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	if ((NULL != tracker->timeline) && (MALI_TIMELINE_PP == tracker->timeline->id)) {
+
+		struct mali_pp_job *job = (struct mali_pp_job *)tracker->job;
+
+		if (0 < job->dma_fence_context.num_dma_fence_waiter) {
+			struct mali_timeline_waiter *waiter;
+			/* Check if we have a zeroed waiter object available. */
+			if (unlikely(NULL == waiter_tail)) {
+				MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+				goto exit;
+			}
+
+			/* Grab new zeroed waiter object. */
+			waiter = waiter_tail;
+			waiter_tail = waiter_tail->tracker_next;
+
+			/* Increase the trigger ref count of the tracker. */
+			tracker->trigger_ref_count++;
+
+			waiter->point   = MALI_TIMELINE_NO_POINT;
+			waiter->tracker = tracker;
+
+			/* Insert waiter on tracker's singly-linked waiter list. */
+			if (NULL == tracker->waiter_head) {
+				/* list is empty */
+				MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+				tracker->waiter_tail = waiter;
+			} else {
+				tracker->waiter_head->tracker_next = waiter;
+			}
+			tracker->waiter_head = waiter;
+
+			/* Also store waiter in separate field for easy access by sync callback. */
+			tracker->waiter_dma_fence = waiter;
+		}
+	}
+#endif /* defined(CONFIG_MALI_DMA_BUF_FENCE)*/
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE) ||defined(CONFIG_SYNC)
+exit:
+#endif /* defined(CONFIG_MALI_DMA_BUF_FENCE) || defined(CONFIG_SYNC) */
+
+	if (NULL != waiter_tail) {
+		mali_timeline_system_release_waiter_list(system, waiter_tail, waiter_head);
+	}
+
+	/* Release the initial trigger ref count. */
+	tracker->trigger_ref_count--;
+
+	/* If there were no waiters added to this tracker we activate immediately. */
+	if (0 == tracker->trigger_ref_count) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC)
+	if (NULL != sync_fence) {
+		sync_fence_put(sync_fence);
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id)
+{
+	int num_waiters = 0;
+	struct mali_timeline_waiter *waiter_tail, *waiter_head;
+	u32 tid = _mali_osk_get_tid();
+
+	mali_timeline_point point = MALI_TIMELINE_NO_POINT;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == system->session->is_aborting);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > tracker->type);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: adding tracker for job %p, timeline: %d\n", tracker->job, timeline_id));
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->system = system;
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	num_waiters = mali_timeline_fence_num_waiters(&tracker->fence);
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	if (MALI_TIMELINE_PP == timeline_id) {
+		struct mali_pp_job *job = (struct mali_pp_job *)tracker->job;
+		if (0 < job->dma_fence_context.num_dma_fence_waiter)
+			num_waiters++;
+	}
+#endif
+
+	/* Allocate waiters. */
+	mali_timeline_system_allocate_waiters(system, &waiter_tail, &waiter_head, num_waiters);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Add tracker to timeline.  This will allocate a point for the tracker on the timeline. If
+	 * timeline ID is MALI_TIMELINE_NONE the tracker will NOT be added to a timeline and the
+	 * point will be MALI_TIMELINE_NO_POINT.
+	 *
+	 * NOTE: the tracker can fail to be added if the timeline is full.  If this happens, the
+	 * point will be MALI_TIMELINE_NO_POINT. */
+	MALI_DEBUG_ASSERT(timeline_id < MALI_TIMELINE_MAX || timeline_id == MALI_TIMELINE_NONE);
+	if (likely(timeline_id < MALI_TIMELINE_MAX)) {
+		struct mali_timeline *timeline = system->timelines[timeline_id];
+		mali_timeline_insert_tracker(timeline, tracker);
+		MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	}
+
+	point = tracker->point;
+
+	/* Create waiters for tracker based on supplied fence.  Each waiter will increase the
+	 * trigger ref count. */
+	mali_timeline_system_create_waiters_and_unlock(system, tracker, waiter_tail, waiter_head);
+	tracker = NULL;
+
+	/* At this point the tracker object might have been freed so we should no longer
+	 * access it. */
+
+
+	/* The tracker will always be activated after calling add_tracker, even if NO_POINT is
+	 * returned. */
+	return point;
+}
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter)
+{
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker = waiter->tracker;
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	/* At this point the waiter has been removed from the timeline's waiter list, but it is
+	 * still on the tracker's waiter list.  All of the tracker's waiters will be released when
+	 * the tracker is activated. */
+
+	waiter->point   = MALI_TIMELINE_NO_POINT;
+	waiter->tracker = NULL;
+
+	tracker->trigger_ref_count--;
+	if (0 == tracker->trigger_ref_count) {
+		/* This was the last waiter; activate tracker */
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	return schedule_mask;
+}
+
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id)
+{
+	mali_timeline_point point;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	if (MALI_TIMELINE_MAX <= timeline_id) {
+		return MALI_TIMELINE_NO_POINT;
+	}
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	timeline = system->timelines[timeline_id];
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	point = MALI_TIMELINE_NO_POINT;
+	if (timeline->point_oldest != timeline->point_next) {
+		point = timeline->point_next - 1;
+		if (MALI_TIMELINE_NO_POINT == point) point--;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return point;
+}
+
+void mali_timeline_initialize(void)
+{
+	_mali_osk_atomic_init(&gp_tracker_count, 0);
+	_mali_osk_atomic_init(&phy_pp_tracker_count, 0);
+	_mali_osk_atomic_init(&virt_pp_tracker_count, 0);
+}
+
+void mali_timeline_terminate(void)
+{
+	_mali_osk_atomic_term(&gp_tracker_count);
+	_mali_osk_atomic_term(&phy_pp_tracker_count);
+	_mali_osk_atomic_term(&virt_pp_tracker_count);
+}
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+static mali_bool is_waiting_on_timeline(struct mali_timeline_tracker *tracker, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT_POINTER(tracker->timeline);
+	timeline = tracker->timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline->system);
+	system = timeline->system;
+
+	if (MALI_TIMELINE_MAX > id) {
+		if (MALI_TIMELINE_NO_POINT != tracker->fence.points[id]) {
+			return mali_timeline_is_point_on(system->timelines[id], tracker->fence.points[id]);
+		} else {
+			return MALI_FALSE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_NONE == id);
+		return MALI_FALSE;
+	}
+}
+
+static const char *timeline_id_to_string(enum mali_timeline_id id)
+{
+	switch (id) {
+	case MALI_TIMELINE_GP:
+		return "GP";
+	case MALI_TIMELINE_PP:
+		return "PP";
+	case MALI_TIMELINE_SOFT:
+		return "SOFT";
+	default:
+		return "NONE";
+	}
+}
+
+static const char *timeline_tracker_type_to_string(enum mali_timeline_tracker_type type)
+{
+	switch (type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		return "GP";
+	case MALI_TIMELINE_TRACKER_PP:
+		return "PP";
+	case MALI_TIMELINE_TRACKER_SOFT:
+		return "SOFT";
+	case MALI_TIMELINE_TRACKER_WAIT:
+		return "WAIT";
+	case MALI_TIMELINE_TRACKER_SYNC:
+		return "SYNC";
+	default:
+		return "INVALID";
+	}
+}
+
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	timeline = tracker->timeline;
+
+	if (0 != tracker->trigger_ref_count) {
+		return MALI_TIMELINE_TS_WAITING;
+	}
+
+	if (timeline && (timeline->tracker_tail == tracker || NULL != tracker->timeline_prev)) {
+		return MALI_TIMELINE_TS_ACTIVE;
+	}
+
+	if (timeline && (MALI_TIMELINE_NO_POINT == tracker->point)) {
+		return MALI_TIMELINE_TS_INIT;
+	}
+
+	return MALI_TIMELINE_TS_FINISH;
+}
+
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC)
+	if (0 != tracker->trigger_ref_count) {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+				    tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job));
+	} else {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char,
+				    tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job));
+	}
+#else
+	if (0 != tracker->trigger_ref_count) {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+				    (unsigned int)(uintptr_t)(tracker->job));
+	} else {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char,
+				    (unsigned int)(uintptr_t)(tracker->job));
+	}
+#endif
+}
+
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (NULL != tracker) {
+		mali_timeline_debug_print_tracker(tracker, print_ctx);
+		tracker = tracker->timeline_next;
+	}
+}
+
+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0))
+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC)
+	if (0 != tracker->trigger_ref_count) {
+		MALI_PRINT(("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+			    tracker->fence.sync_fd, tracker->sync_fence, tracker->job));
+	} else {
+		MALI_PRINT(("TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char,
+			    tracker->fence.sync_fd, tracker->sync_fence, tracker->job));
+	}
+#else
+	if (0 != tracker->trigger_ref_count) {
+		MALI_PRINT(("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+			    tracker->job));
+	} else {
+		MALI_PRINT(("TL:  %s %u %c  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char,
+			    tracker->job));
+	}
+#endif
+}
+
+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (NULL != tracker) {
+		mali_timeline_debug_direct_print_tracker(tracker);
+		tracker = tracker->timeline_next;
+	}
+}
+
+#endif
+
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx)
+{
+	int i;
+	int num_printed = 0;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Print all timelines */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (NULL == timeline->tracker_head) continue;
+
+		_mali_osk_ctxprintf(print_ctx, "TL: Timeline %s:\n",
+				    timeline_id_to_string((enum mali_timeline_id)i));
+
+		mali_timeline_debug_print_timeline(timeline, print_ctx);
+		num_printed++;
+	}
+
+	if (0 == num_printed) {
+		_mali_osk_ctxprintf(print_ctx, "TL: All timelines empty\n");
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+void mali_timeline_dma_fence_callback(void *pp_job_ptr)
+{
+	struct mali_timeline_system  *system;
+	struct mali_timeline_waiter  *waiter;
+	struct mali_timeline_tracker *tracker;
+	struct mali_pp_job *pp_job = (struct mali_pp_job *)pp_job_ptr;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool is_aborting = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_POINTER(pp_job);
+
+
+	tracker = &pp_job->tracker;
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	waiter = tracker->waiter_dma_fence;
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	schedule_mask |= mali_timeline_system_release_waiter(system, waiter);
+
+	is_aborting = system->session->is_aborting;
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	if (!is_aborting) {
+		mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE);
+	}
+}
+#endif
diff --git a/utgard/r6p2/common/mali_timeline.h b/utgard/r6p2/common/mali_timeline.h
new file mode 100755
index 0000000..7696d78
--- /dev/null
+++ b/utgard/r6p2/common/mali_timeline.h
@@ -0,0 +1,548 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMELINE_H__
+#define __MALI_TIMELINE_H__
+
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+#include "mali_sync.h"
+#include "mali_scheduler_types.h"
+#include <linux/version.h>
+
+/**
+ * Soft job timeout.
+ *
+ * Soft jobs have to be signaled as complete after activation.  Normally this is done by user space,
+ * but in order to guarantee that every soft job is completed, we also have a timer.
+ */
+#define MALI_TIMELINE_TIMEOUT_HZ ((unsigned long) (HZ * 3 / 2)) /* 1500 ms. */
+
+/**
+ * Timeline type.
+ */
+typedef enum mali_timeline_id {
+	MALI_TIMELINE_GP   = MALI_UK_TIMELINE_GP,   /**< GP job timeline. */
+	MALI_TIMELINE_PP   = MALI_UK_TIMELINE_PP,   /**< PP job timeline. */
+	MALI_TIMELINE_SOFT = MALI_UK_TIMELINE_SOFT, /**< Soft job timeline. */
+	MALI_TIMELINE_MAX  = MALI_UK_TIMELINE_MAX
+} mali_timeline_id;
+
+/**
+ * Used by trackers that should not be added to a timeline (@ref mali_timeline_system_add_tracker).
+ */
+#define MALI_TIMELINE_NONE MALI_TIMELINE_MAX
+
+/**
+ * Tracker type.
+ */
+typedef enum mali_timeline_tracker_type {
+	MALI_TIMELINE_TRACKER_GP   = 0, /**< Tracker used by GP jobs. */
+	MALI_TIMELINE_TRACKER_PP   = 1, /**< Tracker used by PP jobs. */
+	MALI_TIMELINE_TRACKER_SOFT = 2, /**< Tracker used by soft jobs. */
+	MALI_TIMELINE_TRACKER_WAIT = 3, /**< Tracker used for fence wait. */
+	MALI_TIMELINE_TRACKER_SYNC = 4, /**< Tracker used for sync fence. */
+	MALI_TIMELINE_TRACKER_MAX  = 5,
+} mali_timeline_tracker_type;
+
+/**
+ * Tracker activation error.
+ */
+typedef u32 mali_timeline_activation_error;
+#define MALI_TIMELINE_ACTIVATION_ERROR_NONE      0
+#define MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT  (1<<1)
+#define MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT (1<<0)
+
+/**
+ * Type used to represent a point on a timeline.
+ */
+typedef u32 mali_timeline_point;
+
+/**
+ * Used to represent that no point on a timeline.
+ */
+#define MALI_TIMELINE_NO_POINT ((mali_timeline_point) 0)
+
+/**
+ * The maximum span of points on a timeline.  A timeline will be considered full if the difference
+ * between the oldest and newest points is equal or larger to this value.
+ */
+#define MALI_TIMELINE_MAX_POINT_SPAN 65536
+
+/**
+ * Magic value used to assert on validity of trackers.
+ */
+#define MALI_TIMELINE_TRACKER_MAGIC 0xabcdabcd
+
+struct mali_timeline;
+struct mali_timeline_waiter;
+struct mali_timeline_tracker;
+
+/**
+ * Timeline fence.
+ */
+struct mali_timeline_fence {
+	mali_timeline_point points[MALI_TIMELINE_MAX]; /**< For each timeline, a point or MALI_TIMELINE_NO_POINT. */
+	s32                 sync_fd;                   /**< A file descriptor representing a sync fence, or -1. */
+};
+
+/**
+ * Timeline system.
+ *
+ * The Timeline system has a set of timelines associated with a session.
+ */
+struct mali_timeline_system {
+	struct mali_spinlock_reentrant *spinlock;   /**< Spin lock protecting the timeline system */
+	struct mali_timeline           *timelines[MALI_TIMELINE_MAX]; /**< The timelines in this system */
+
+	/* Single-linked list of unused waiter objects.  Uses the tracker_next field in tracker. */
+	struct mali_timeline_waiter    *waiter_empty_list;
+
+	struct mali_session_data       *session;    /**< Session that owns this system. */
+
+	mali_bool                       timer_enabled; /**< Set to MALI_TRUE if soft job timer should be enabled, MALI_FALSE if not. */
+
+	_mali_osk_wait_queue_t         *wait_queue; /**< Wait queue. */
+
+#if defined(CONFIG_SYNC)
+	struct sync_timeline           *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */
+#endif /* defined(CONFIG_SYNC) */
+};
+
+/**
+ * Timeline.  Each Timeline system will have MALI_TIMELINE_MAX timelines.
+ */
+struct mali_timeline {
+	mali_timeline_point           point_next;   /**< The next available point. */
+	mali_timeline_point           point_oldest; /**< The oldest point not released. */
+
+	/* Double-linked list of trackers.  Sorted in ascending order by tracker->time_number with
+	 * tail pointing to the tracker with the oldest time. */
+	struct mali_timeline_tracker *tracker_head;
+	struct mali_timeline_tracker *tracker_tail;
+
+	/* Double-linked list of waiters.  Sorted in ascending order by waiter->time_number_wait
+	 * with tail pointing to the waiter with oldest wait time. */
+	struct mali_timeline_waiter  *waiter_head;
+	struct mali_timeline_waiter  *waiter_tail;
+
+	struct mali_timeline_system  *system;       /**< Timeline system this timeline belongs to. */
+	enum mali_timeline_id         id;           /**< Timeline type. */
+
+#if defined(CONFIG_SYNC)
+	struct sync_timeline         *sync_tl;      /**< Sync timeline that corresponds to this timeline. */
+	mali_bool destroyed;
+	struct mali_spinlock_reentrant *spinlock;       /**< Spin lock protecting the timeline system */
+#endif /* defined(CONFIG_SYNC) */
+
+	/* The following fields are used to time out soft job trackers. */
+	_mali_osk_wq_delayed_work_t  *delayed_work;
+	mali_bool                     timer_active;
+};
+
+/**
+ * Timeline waiter.
+ */
+struct mali_timeline_waiter {
+	mali_timeline_point           point;         /**< Point on timeline we are waiting for to be released. */
+	struct mali_timeline_tracker *tracker;       /**< Tracker that is waiting. */
+
+	struct mali_timeline_waiter  *timeline_next; /**< Next waiter on timeline's waiter list. */
+	struct mali_timeline_waiter  *timeline_prev; /**< Previous waiter on timeline's waiter list. */
+
+	struct mali_timeline_waiter  *tracker_next;  /**< Next waiter on tracker's waiter list. */
+};
+
+/**
+ * Timeline tracker.
+ */
+struct mali_timeline_tracker {
+	MALI_DEBUG_CODE(u32            magic); /**< Should always be MALI_TIMELINE_TRACKER_MAGIC for a valid tracker. */
+
+	mali_timeline_point            point; /**< Point on timeline for this tracker */
+
+	struct mali_timeline_tracker  *timeline_next; /**< Next tracker on timeline's tracker list */
+	struct mali_timeline_tracker  *timeline_prev; /**< Previous tracker on timeline's tracker list */
+
+	u32                            trigger_ref_count; /**< When zero tracker will be activated */
+	mali_timeline_activation_error activation_error;  /**< Activation error. */
+	struct mali_timeline_fence     fence;             /**< Fence used to create this tracker */
+
+	/* Single-linked list of waiters.  Sorted in order of insertions with
+	 * tail pointing to first waiter. */
+	struct mali_timeline_waiter   *waiter_head;
+	struct mali_timeline_waiter   *waiter_tail;
+
+#if defined(CONFIG_SYNC)
+	/* These are only used if the tracker is waiting on a sync fence. */
+	struct mali_timeline_waiter   *waiter_sync; /**< A direct pointer to timeline waiter representing sync fence. */
+	struct sync_fence_waiter       sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */
+	struct sync_fence             *sync_fence;   /**< The sync fence this tracker is waiting on. */
+	_mali_osk_list_t               sync_fence_cancel_list; /**< List node used to cancel sync fence waiters. */
+#endif /* defined(CONFIG_SYNC) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	struct mali_timeline_waiter   *waiter_dma_fence; /**< A direct pointer to timeline waiter representing dma fence. */
+#endif
+
+	struct mali_timeline_system   *system;       /**< Timeline system. */
+	struct mali_timeline          *timeline;     /**< Timeline, or NULL if not on a timeline. */
+	enum mali_timeline_tracker_type type;        /**< Type of tracker. */
+	void                          *job;          /**< Owner of tracker. */
+
+	/* The following fields are used to time out soft job trackers. */
+	unsigned long                 os_tick_create;
+	unsigned long                 os_tick_activate;
+	mali_bool                     timer_active;
+};
+
+extern _mali_osk_atomic_t gp_tracker_count;
+extern _mali_osk_atomic_t phy_pp_tracker_count;
+extern _mali_osk_atomic_t virt_pp_tracker_count;
+
+/**
+ * What follows is a set of functions to check the state of a timeline and to determine where on a
+ * timeline a given point is.  Most of these checks will translate the timeline so the oldest point
+ * on the timeline is aligned with zero.  Remember that all of these calculation are done on
+ * unsigned integers.
+ *
+ * The following example illustrates the three different states a point can be in.  The timeline has
+ * been translated to put the oldest point at zero:
+ *
+ *
+ *
+ *                               [ point is in forbidden zone ]
+ *                                          64k wide
+ *                                MALI_TIMELINE_MAX_POINT_SPAN
+ *
+ *    [ point is on timeline     )                            ( point is released ]
+ *
+ *    0--------------------------##############################--------------------2^32 - 1
+ *    ^                          ^
+ *    \                          |
+ *     oldest point on timeline  |
+ *                               \
+ *                                next point on timeline
+ */
+
+/**
+ * Compare two timeline points
+ *
+ * Returns true if a is after b, false if a is before or equal to b.
+ *
+ * This funcion ignores MALI_TIMELINE_MAX_POINT_SPAN. Wrapping is supported and
+ * the result will be correct if the points is less then UINT_MAX/2 apart.
+ *
+ * @param a Point on timeline
+ * @param b Point on timeline
+ * @return MALI_TRUE if a is after b
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_point_after(mali_timeline_point a, mali_timeline_point b)
+{
+	return 0 > ((s32)b) - ((s32)a);
+}
+
+/**
+ * Check if a point is on timeline.  A point is on a timeline if it is greater than, or equal to,
+ * the oldest point, and less than the next point.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is on timeline, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_on(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	return (point - timeline->point_oldest) < (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Check if a point has been released.  A point is released if it is older than the oldest point on
+ * the timeline, newer than the next point, and also not in the forbidden zone.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point has been release, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_released(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	mali_timeline_point point_normalized;
+	mali_timeline_point next_normalized;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	point_normalized = point - timeline->point_oldest;
+	next_normalized = timeline->point_next - timeline->point_oldest;
+
+	return point_normalized > (next_normalized + MALI_TIMELINE_MAX_POINT_SPAN);
+}
+
+/**
+ * Check if a point is valid.  A point is valid if is on the timeline or has been released.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is valid, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_valid(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return mali_timeline_is_point_on(timeline, point) || mali_timeline_is_point_released(timeline, point);
+}
+
+/**
+ * Check if timeline is empty (has no points on it).  A timeline is empty if next == oldest.
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is empty, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_empty(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return timeline->point_next == timeline->point_oldest;
+}
+
+/**
+ * Check if timeline is full.  A valid timeline cannot span more than 64k points (@ref
+ * MALI_TIMELINE_MAX_POINT_SPAN).
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is full, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_full(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return MALI_TIMELINE_MAX_POINT_SPAN <= (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Create a new timeline system.
+ *
+ * @param session The session this timeline system will belong to.
+ * @return New timeline system.
+ */
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session);
+
+/**
+ * Abort timeline system.
+ *
+ * This will release all pending waiters in the timeline system causing all trackers to be
+ * activated.
+ *
+ * @param system Timeline system to abort all jobs from.
+ */
+void mali_timeline_system_abort(struct mali_timeline_system *system);
+
+/**
+ * Destroy an empty timeline system.
+ *
+ * @note @ref mali_timeline_system_abort() should be called prior to this function.
+ *
+ * @param system Timeline system to destroy.
+ */
+void mali_timeline_system_destroy(struct mali_timeline_system *system);
+
+/**
+ * Stop the soft job timer.
+ *
+ * @param system Timeline system
+ */
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system);
+
+/**
+ * Add a tracker to a timeline system and optionally also on a timeline.
+ *
+ * Once added to the timeline system, the tracker is guaranteed to be activated.  The tracker can be
+ * activated before this function returns.  Thus, it is also possible that the tracker is released
+ * before this function returns, depending on the tracker type.
+ *
+ * @note Tracker must be initialized (@ref mali_timeline_tracker_init) before being added to the
+ * timeline system.
+ *
+ * @param system Timeline system the tracker will be added to.
+ * @param tracker The tracker to be added.
+ * @param timeline_id Id of the timeline the tracker will be added to, or
+ *                    MALI_TIMELINE_NONE if it should not be added on a timeline.
+ * @return Point on timeline identifying this tracker, or MALI_TIMELINE_NO_POINT if not on timeline.
+ */
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Get latest point on timeline.
+ *
+ * @param system Timeline system.
+ * @param timeline_id Id of timeline to get latest point from.
+ * @return Latest point on timeline, or MALI_TIMELINE_NO_POINT if the timeline is empty.
+ */
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Initialize tracker.
+ *
+ * Must be called before tracker is added to timeline system (@ref mali_timeline_system_add_tracker).
+ *
+ * @param tracker Tracker to initialize.
+ * @param type Type of tracker.
+ * @param fence Fence used to set up dependencies for tracker.
+ * @param job Pointer to job struct this tracker is associated with.
+ */
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job);
+
+/**
+ * Grab trigger ref count on tracker.
+ *
+ * This will prevent tracker from being activated until the trigger ref count reaches zero.
+ *
+ * @note Tracker must have been initialized (@ref mali_timeline_tracker_init).
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ */
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker);
+
+/**
+ * Release trigger ref count on tracker.
+ *
+ * If the trigger ref count reaches zero, the tracker will be activated.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ * @param activation_error Error bitmask if activated with error, or MALI_TIMELINE_ACTIVATION_ERROR_NONE if no error.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error);
+
+/**
+ * Release a tracker from the timeline system.
+ *
+ * This is used to signal that the job being tracker is finished, either due to normal circumstances
+ * (job complete/abort) or due to a timeout.
+ *
+ * We may need to schedule some subsystems after a tracker has been released and the returned
+ * bitmask will tell us if it is necessary.  If the return value is non-zero, this value needs to be
+ * sent as an input parameter to @ref mali_scheduler_schedule_from_mask() to do the scheduling.
+ *
+ * @note Tracker must have been activated before being released.
+ * @warning Not calling @ref mali_scheduler_schedule_from_mask() after releasing a tracker can lead
+ * to a deadlock.
+ *
+ * @param tracker Tracker being released.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_tracker_activation_error(
+	struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	return (MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT &
+		tracker->activation_error) ? MALI_TRUE : MALI_FALSE;
+}
+
+/**
+ * Copy data from a UK fence to a Timeline fence.
+ *
+ * @param fence Timeline fence.
+ * @param uk_fence UK fence.
+ */
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence);
+
+void mali_timeline_initialize(void);
+
+void mali_timeline_terminate(void);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_gp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&gp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_physical_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&phy_pp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_virtual_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&virt_pp_tracker_count);
+}
+
+#if defined(DEBUG)
+#define MALI_TIMELINE_DEBUG_FUNCTIONS
+#endif /* DEBUG */
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+/**
+ * Tracker state.  Used for debug printing.
+ */
+typedef enum mali_timeline_tracker_state {
+	MALI_TIMELINE_TS_INIT    = 0,
+	MALI_TIMELINE_TS_WAITING = 1,
+	MALI_TIMELINE_TS_ACTIVE  = 2,
+	MALI_TIMELINE_TS_FINISH  = 3,
+} mali_timeline_tracker_state;
+
+/**
+ * Get tracker state.
+ *
+ * @param tracker Tracker to check.
+ * @return State of tracker.
+ */
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker);
+
+/**
+ * Print debug information about tracker.
+ *
+ * @param tracker Tracker to print.
+ */
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx);
+
+/**
+ * Print debug information about timeline.
+ *
+ * @param timeline Timeline to print.
+ */
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx);
+
+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0))
+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker);
+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline);
+#endif
+
+/**
+ * Print debug information about timeline system.
+ *
+ * @param system Timeline system to print.
+ */
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx);
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+/**
+ * The timeline dma fence callback when dma fence signal.
+ *
+ * @param pp_job_ptr The pointer to pp job that link to the signaled dma fence.
+ */
+void mali_timeline_dma_fence_callback(void *pp_job_ptr);
+#endif
+
+#endif /* __MALI_TIMELINE_H__ */
diff --git a/utgard/r6p2/common/mali_timeline_fence_wait.c b/utgard/r6p2/common/mali_timeline_fence_wait.c
new file mode 100755
index 0000000..1f5d421
--- /dev/null
+++ b/utgard/r6p2/common/mali_timeline_fence_wait.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline_fence_wait.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+
+/**
+ * Allocate a fence waiter tracker.
+ *
+ * @return New fence waiter if successful, NULL if not.
+ */
+static struct mali_timeline_fence_wait_tracker *mali_timeline_fence_wait_tracker_alloc(void)
+{
+	return (struct mali_timeline_fence_wait_tracker *) _mali_osk_calloc(1, sizeof(struct mali_timeline_fence_wait_tracker));
+}
+
+/**
+ * Free fence waiter tracker.
+ *
+ * @param wait Fence wait tracker to free.
+ */
+static void mali_timeline_fence_wait_tracker_free(struct mali_timeline_fence_wait_tracker *wait)
+{
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	_mali_osk_atomic_term(&wait->refcount);
+	_mali_osk_free(wait);
+}
+
+/**
+ * Check if fence wait tracker has been activated.  Used as a wait queue condition.
+ *
+ * @param data Fence waiter.
+ * @return MALI_TRUE if tracker has been activated, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_tracker_is_activated(void *data)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+
+	wait = (struct mali_timeline_fence_wait_tracker *) data;
+	MALI_DEBUG_ASSERT_POINTER(wait);
+
+	return wait->activated;
+}
+
+/**
+ * Check if fence has been signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Timeline fence.
+ * @return MALI_TRUE if fence is signaled, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_check_status(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool ret = MALI_TRUE;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *sync_fence = NULL;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		mali_timeline_point   point;
+
+		point = fence->points[i];
+
+		if (likely(MALI_TIMELINE_NO_POINT == point)) {
+			/* Fence contains no point on this timeline. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n", point, timeline->point_oldest, timeline->point_next));
+		}
+
+		if (!mali_timeline_is_point_released(timeline, point)) {
+			ret = MALI_FALSE;
+			goto exit;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	if (-1 != fence->sync_fd) {
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+		if (likely(NULL != sync_fence)) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+			if (0 == sync_fence->status) {
+#else
+			if (0 == atomic_read(&sync_fence->status)) {
+#endif
+				ret = MALI_FALSE;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", fence->sync_fd));
+		}
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+exit:
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC)
+	if (NULL != sync_fence) {
+		sync_fence_put(sync_fence);
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	return ret;
+}
+
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+	mali_timeline_point point;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: wait on fence\n"));
+
+	if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY == timeout) {
+		return mali_timeline_fence_wait_check_status(system, fence);
+	}
+
+	wait = mali_timeline_fence_wait_tracker_alloc();
+	if (unlikely(NULL == wait)) {
+		MALI_PRINT_ERROR(("Mali Timeline: failed to allocate data for fence wait\n"));
+		return MALI_FALSE;
+	}
+
+	wait->activated = MALI_FALSE;
+	wait->system = system;
+
+	/* Initialize refcount to two references.  The reference first will be released by this
+	 * function after the wait is over.  The second reference will be released when the tracker
+	 * is activated. */
+	_mali_osk_atomic_init(&wait->refcount, 2);
+
+	/* Add tracker to timeline system, but not to a timeline. */
+	mali_timeline_tracker_init(&wait->tracker, MALI_TIMELINE_TRACKER_WAIT, fence, wait);
+	point = mali_timeline_system_add_tracker(system, &wait->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point);
+	MALI_IGNORE(point);
+
+	/* Wait for the tracker to be activated or time out. */
+	if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER == timeout) {
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait);
+	} else {
+		_mali_osk_wait_queue_wait_event_timeout(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait, timeout);
+	}
+
+	ret = wait->activated;
+
+	if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+
+	return ret;
+}
+
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *wait)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	MALI_DEBUG_ASSERT_POINTER(wait->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for fence wait tracker\n"));
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == wait->activated);
+	wait->activated = MALI_TRUE;
+
+	_mali_osk_wait_queue_wake_up(wait->system->wait_queue);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&wait->tracker);
+	MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask);
+	MALI_IGNORE(schedule_mask);
+
+	if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+}
diff --git a/utgard/r6p2/common/mali_timeline_fence_wait.h b/utgard/r6p2/common/mali_timeline_fence_wait.h
new file mode 100755
index 0000000..348652b
--- /dev/null
+++ b/utgard/r6p2/common/mali_timeline_fence_wait.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_fence_wait.h
+ *
+ * This file contains functions used to wait until a Timeline fence is signaled.
+ */
+
+#ifndef __MALI_TIMELINE_FENCE_WAIT_H__
+#define __MALI_TIMELINE_FENCE_WAIT_H__
+
+#include "mali_osk.h"
+#include "mali_timeline.h"
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, a timer is not used and the
+ * function only returns when the fence is signaled.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER ((u32) -1)
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, the function will return
+ * immediately with the current state of the fence.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY 0
+
+/**
+ * Fence wait tracker.
+ *
+ * The fence wait tracker is added to the Timeline system with the fence we are waiting on as a
+ * dependency.  We will then perform a blocking wait, possibly with a timeout, until the tracker is
+ * activated, which happens when the fence is signaled.
+ */
+struct mali_timeline_fence_wait_tracker {
+	mali_bool activated;                  /**< MALI_TRUE if the tracker has been activated, MALI_FALSE if not. */
+	_mali_osk_atomic_t refcount;          /**< Reference count. */
+	struct mali_timeline_system *system;  /**< Timeline system. */
+	struct mali_timeline_tracker tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Wait for a fence to be signaled, or timeout is reached.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to wait on.
+ * @param timeout Timeout in ms, or MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER or
+ * MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY.
+ * @return MALI_TRUE if signaled, MALI_FALSE if timed out.
+ */
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout);
+
+/**
+ * Used by the Timeline system to activate a fence wait tracker.
+ *
+ * @param fence_wait_tracker Fence waiter tracker.
+ */
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *fence_wait_tracker);
+
+#endif /* __MALI_TIMELINE_FENCE_WAIT_H__ */
diff --git a/utgard/r6p2/common/mali_timeline_sync_fence.c b/utgard/r6p2/common/mali_timeline_sync_fence.c
new file mode 100755
index 0000000..a83252b
--- /dev/null
+++ b/utgard/r6p2/common/mali_timeline_sync_fence.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline_sync_fence.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_sync.h"
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Creates a sync fence tracker and a sync fence.  Adds sync fence tracker to Timeline system and
+ * returns sync fence.  The sync fence will be signaled when the sync fence tracker is activated.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return Sync fence that will be signaled when tracker is activated.
+ */
+static struct sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	struct mali_timeline_sync_fence_tracker *sync_fence_tracker;
+	struct sync_fence                       *sync_fence;
+	struct mali_timeline_fence               fence;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	/* Allocate sync fence tracker. */
+	sync_fence_tracker = _mali_osk_calloc(1, sizeof(struct mali_timeline_sync_fence_tracker));
+	if (NULL == sync_fence_tracker) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence_tracker allocation failed\n"));
+		return NULL;
+	}
+
+	/* Create sync flag. */
+	MALI_DEBUG_ASSERT_POINTER(timeline->sync_tl);
+	sync_fence_tracker->flag = mali_sync_flag_create(timeline->sync_tl, point);
+	if (NULL == sync_fence_tracker->flag) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_flag creation failed\n"));
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Create sync fence from sync flag. */
+	sync_fence = mali_sync_flag_create_fence(sync_fence_tracker->flag);
+	if (NULL == sync_fence) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence creation failed\n"));
+		mali_sync_flag_put(sync_fence_tracker->flag);
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Setup fence for tracker. */
+	_mali_osk_memset(&fence, 0, sizeof(struct mali_timeline_fence));
+	fence.sync_fd = -1;
+	fence.points[timeline->id] = point;
+
+	/* Finally, add the tracker to Timeline system. */
+	mali_timeline_tracker_init(&sync_fence_tracker->tracker, MALI_TIMELINE_TRACKER_SYNC, &fence, sync_fence_tracker);
+	point = mali_timeline_system_add_tracker(timeline->system, &sync_fence_tracker->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point);
+
+	return sync_fence;
+}
+
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	u32 i;
+	struct sync_fence *sync_fence_acc = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		struct sync_fence *sync_fence;
+
+		if (MALI_TIMELINE_NO_POINT == fence->points[i]) continue;
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		sync_fence = mali_timeline_sync_fence_create_and_add_tracker(timeline, fence->points[i]);
+		if (NULL == sync_fence) goto error;
+
+		if (NULL != sync_fence_acc) {
+			/* Merge sync fences. */
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (NULL == sync_fence_acc) goto error;
+		} else {
+			/* This was the first sync fence created. */
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (-1 != fence->sync_fd) {
+		struct sync_fence *sync_fence;
+
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+		if (NULL == sync_fence) goto error;
+
+		if (NULL != sync_fence_acc) {
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (NULL == sync_fence_acc) goto error;
+		} else {
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (NULL == sync_fence_acc) {
+		MALI_DEBUG_ASSERT_POINTER(system->signaled_sync_tl);
+
+		/* There was nothing to wait on, so return an already signaled fence. */
+
+		sync_fence_acc = mali_sync_timeline_create_signaled_fence(system->signaled_sync_tl);
+		if (NULL == sync_fence_acc) goto error;
+	}
+
+	/* Return file descriptor for the accumulated sync fence. */
+	return mali_sync_fence_fd_alloc(sync_fence_acc);
+
+error:
+	if (NULL != sync_fence_acc) {
+		sync_fence_put(sync_fence_acc);
+	}
+
+	return -1;
+}
+
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker->flag);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for sync fence tracker\n"));
+
+	/* Signal flag and release reference. */
+	mali_sync_flag_signal(sync_fence_tracker->flag, 0);
+	mali_sync_flag_put(sync_fence_tracker->flag);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&sync_fence_tracker->tracker);
+	MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask);
+
+	_mali_osk_free(sync_fence_tracker);
+}
+
+#endif /* defined(CONFIG_SYNC) */
diff --git a/utgard/r6p2/common/mali_timeline_sync_fence.h b/utgard/r6p2/common/mali_timeline_sync_fence.h
new file mode 100755
index 0000000..6662b25
--- /dev/null
+++ b/utgard/r6p2/common/mali_timeline_sync_fence.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_sync_fence.h
+ *
+ * This file contains code related to creating sync fences from timeline fences.
+ */
+
+#ifndef __MALI_TIMELINE_SYNC_FENCE_H__
+#define __MALI_TIMELINE_SYNC_FENCE_H__
+
+#include "mali_timeline.h"
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Sync fence tracker.
+ */
+struct mali_timeline_sync_fence_tracker {
+	struct mali_sync_flag        *flag;    /**< Sync flag used to connect tracker and sync fence. */
+	struct mali_timeline_tracker  tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Create a sync fence that will be signaled when @ref fence is signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to create sync fence from.
+ * @return File descriptor for new sync fence, or -1 on error.
+ */
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence);
+
+/**
+ * Used by the Timeline system to activate a sync fence tracker.
+ *
+ * @param sync_fence_tracker Sync fence tracker.
+ *
+ */
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker);
+
+#endif /* defined(CONFIG_SYNC) */
+
+#endif /* __MALI_TIMELINE_SYNC_FENCE_H__ */
diff --git a/utgard/r6p2/common/mali_ukk.h b/utgard/r6p2/common/mali_ukk.h
new file mode 100755
index 0000000..79829b9
--- /dev/null
+++ b/utgard/r6p2/common/mali_ukk.h
@@ -0,0 +1,551 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk.h
+ * Defines the kernel-side interface of the user-kernel interface
+ */
+
+#ifndef __MALI_UKK_H__
+#define __MALI_UKK_H__
+
+#include "mali_osk.h"
+#include "mali_uk_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * - The _mali_uk functions are an abstraction of the interface to the device
+ * driver. On certain OSs, this would be implemented via the IOCTL interface.
+ * On other OSs, it could be via extension of some Device Driver Class, or
+ * direct function call for Bare metal/RTOSs.
+ * - It is important to note that:
+ *   -  The Device Driver has implemented the _mali_ukk set of functions
+ *   -  The Base Driver calls the corresponding set of _mali_uku functions.
+ * - What requires porting is solely the calling mechanism from User-side to
+ * Kernel-side, and propagating back the results.
+ * - Each U/K function is associated with a (group, number) pair from
+ * \ref _mali_uk_functions to make it possible for a common function in the
+ * Base Driver and Device Driver to route User/Kernel calls from/to the
+ * correct _mali_uk function. For example, in an IOCTL system, the IOCTL number
+ * would be formed based on the group and number assigned to the _mali_uk
+ * function, as listed in \ref _mali_uk_functions. On the user-side, each
+ * _mali_uku function would just make an IOCTL with the IOCTL-code being an
+ * encoded form of the (group, number) pair. On the kernel-side, the Device
+ * Driver's IOCTL handler decodes the IOCTL-code back into a (group, number)
+ * pair, and uses this to determine which corresponding _mali_ukk should be
+ * called.
+ *   - Refer to \ref _mali_uk_functions for more information about this
+ * (group, number) pairing.
+ * - In a system where there is no distinction between user and kernel-side,
+ * the U/K interface may be implemented as:@code
+ * MALI_STATIC_INLINE _mali_osk_errcode_t _mali_uku_examplefunction( _mali_uk_examplefunction_s *args )
+ * {
+ *     return mali_ukk_examplefunction( args );
+ * }
+ * @endcode
+ * - Therefore, all U/K calls behave \em as \em though they were direct
+ * function calls (but the \b implementation \em need \em not be a direct
+ * function calls)
+ *
+ * @note Naming the _mali_uk functions the same on both User and Kernel sides
+ * on non-RTOS systems causes debugging issues when setting breakpoints. In
+ * this case, it is not clear which function the breakpoint is put on.
+ * Therefore the _mali_uk functions in user space are prefixed with \c _mali_uku
+ * and in kernel space with \c _mali_ukk. The naming for the argument
+ * structures is unaffected.
+ *
+ * - The _mali_uk functions are synchronous.
+ * - Arguments to the _mali_uk functions are passed in a structure. The only
+ * parameter passed to the _mali_uk functions is a pointer to this structure.
+ * This first member of this structure, ctx, is a pointer to a context returned
+ * by _mali_uku_open(). For example:@code
+ * typedef struct
+ * {
+ *     void *ctx;
+ *     u32 number_of_cores;
+ * } _mali_uk_get_gp_number_of_cores_s;
+ * @endcode
+ *
+ * - Each _mali_uk function has its own argument structure named after the
+ *  function. The argument is distinguished by the _s suffix.
+ * - The argument types are defined by the base driver and user-kernel
+ *  interface.
+ * - All _mali_uk functions return a standard \ref _mali_osk_errcode_t.
+ * - Only arguments of type input or input/output need be initialized before
+ * calling a _mali_uk function.
+ * - Arguments of type output and input/output are only valid when the
+ * _mali_uk function returns \ref _MALI_OSK_ERR_OK.
+ * - The \c ctx member is always invalid after it has been used by a
+ * _mali_uk function, except for the context management functions
+ *
+ *
+ * \b Interface \b restrictions
+ *
+ * The requirements of the interface mean that an implementation of the
+ * User-kernel interface may do no 'real' work. For example, the following are
+ * illegal in the User-kernel implementation:
+ * - Calling functions necessary for operation on all systems,  which would
+ * not otherwise get called on RTOS systems.
+ *     - For example, a  U/K interface that calls multiple _mali_ukk functions
+ * during one particular U/K call. This could not be achieved by the same code
+ * which uses direct function calls for the U/K interface.
+ * -  Writing in values to the args members, when otherwise these members would
+ * not hold a useful value for a direct function call U/K interface.
+ *     - For example, U/K interface implementation that take NULL members in
+ * their arguments structure from the user side, but those members are
+ * replaced with non-NULL values in the kernel-side of the U/K interface
+ * implementation. A scratch area for writing data is one such example. In this
+ * case, a direct function call U/K interface would segfault, because no code
+ * would be present to replace the NULL pointer with a meaningful pointer.
+ *     - Note that we discourage the case where the U/K implementation changes
+ * a NULL argument member to non-NULL, and then the Device Driver code (outside
+ * of the U/K layer) re-checks this member for NULL, and corrects it when
+ * necessary. Whilst such code works even on direct function call U/K
+ * intefaces, it reduces the testing coverage of the Device Driver code. This
+ * is because we have no way of testing the NULL == value path on an OS
+ * implementation.
+ *
+ * A number of allowable examples exist where U/K interfaces do 'real' work:
+ * - The 'pointer switching' technique for \ref _mali_ukk_get_system_info
+ *     - In this case, without the pointer switching on direct function call
+ * U/K interface, the Device Driver code still sees the same thing: a pointer
+ * to which it can write memory. This is because such a system has no
+ * distinction between a user and kernel pointer.
+ * - Writing an OS-specific value into the ukk_private member for
+ * _mali_ukk_mem_mmap().
+ *     - In this case, this value is passed around by Device Driver code, but
+ * its actual value is never checked. Device Driver code simply passes it from
+ * the U/K layer to the OSK layer, where it can be acted upon. In this case,
+ * \em some OS implementations of the U/K (_mali_ukk_mem_mmap()) and OSK
+ * (_mali_osk_mem_mapregion_init()) functions will collaborate on the
+ *  meaning of ukk_private member. On other OSs, it may be unused by both
+ * U/K and OSK layers
+ *     - Therefore, on error inside the U/K interface implementation itself,
+ * it will be as though the _mali_ukk function itself had failed, and cleaned
+ * up after itself.
+ *     - Compare this to a direct function call U/K implementation, where all
+ * error cleanup is handled by the _mali_ukk function itself. The direct
+ * function call U/K interface implementation is automatically atomic.
+ *
+ * The last example highlights a consequence of all U/K interface
+ * implementations: they must be atomic with respect to the Device Driver code.
+ * And therefore, should Device Driver code succeed but the U/K implementation
+ * fail afterwards (but before return to user-space), then the U/K
+ * implementation must cause appropriate cleanup actions to preserve the
+ * atomicity of the interface.
+ *
+ * @{
+ */
+
+
+/** @defgroup _mali_uk_context U/K Context management
+ *
+ * These functions allow for initialisation of the user-kernel interface once per process.
+ *
+ * Generally the context will store the OS specific object to communicate with the kernel device driver and further
+ * state information required by the specific implementation. The context is shareable among all threads in the caller process.
+ *
+ * On IOCTL systems, this is likely to be a file descriptor as a result of opening the kernel device driver.
+ *
+ * On a bare-metal/RTOS system with no distinction between kernel and
+ * user-space, the U/K interface simply calls the _mali_ukk variant of the
+ * function by direct function call. In this case, the context returned is the
+ * mali_session_data from _mali_ukk_open().
+ *
+ * The kernel side implementations of the U/K interface expect the first member of the argument structure to
+ * be the context created by _mali_uku_open(). On some OS implementations, the meaning of this context
+ * will be different between user-side and kernel-side. In which case, the kernel-side will need to replace this context
+ * with the kernel-side equivalent, because user-side will not have access to kernel-side data. The context parameter
+ * in the argument structure therefore has to be of type input/output.
+ *
+ * It should be noted that the caller cannot reuse the \c ctx member of U/K
+ * argument structure after a U/K call, because it may be overwritten. Instead,
+ * the context handle must always be stored  elsewhere, and copied into
+ * the appropriate U/K argument structure for each user-side call to
+ * the U/K interface. This is not usually a problem, since U/K argument
+ * structures are usually placed on the stack.
+ *
+ * @{ */
+
+/** @brief Begin a new Mali Device Driver session
+ *
+ * This is used to obtain a per-process context handle for all future U/K calls.
+ *
+ * @param context pointer to storage to return a (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_open(void **context);
+
+/** @brief End a Mali Device Driver session
+ *
+ * This should be called when the process no longer requires use of the Mali Device Driver.
+ *
+ * The context handle must not be used after it has been closed.
+ *
+ * @param context pointer to a stored (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_close(void **context);
+
+/** @} */ /* end group _mali_uk_context */
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ *
+ * The core functions provide the following functionality:
+ * - verify that the user and kernel API are compatible
+ * - retrieve information about the cores and memory banks in the system
+ * - wait for the result of jobs started on a core
+ *
+ * @{ */
+
+/** @brief Waits for a job notification.
+ *
+ * Sleeps until notified or a timeout occurs. Returns information about the notification.
+ *
+ * @param args see _mali_uk_wait_for_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args);
+
+/** @brief Post a notification to the notification queue of this application.
+ *
+ * @param args see _mali_uk_post_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * This function is obsolete, but kept to allow old, incompatible user space
+ * clients to robustly detect the incompatibility.
+ *
+ * @param args see _mali_uk_get_api_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * @param args see _mali_uk_get_api_version_v2_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args);
+
+/** @brief Get the user space settings applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_settings_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args);
+
+/** @brief Get a user space setting applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_setting_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args);
+
+/* @brief Grant or deny high priority scheduling for this session.
+ *
+ * @param args see _mali_uk_request_high_priority_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args);
+
+/** @brief Make process sleep if the pending big job in kernel  >= MALI_MAX_PENDING_BIG_JOB
+ *
+ */
+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args);
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ *
+ * The memory functions provide functionality with and without a Mali-MMU present.
+ *
+ * For Mali-MMU based systems, the following functionality is provided:
+ * - Initialize and terminate MALI virtual address space
+ * - Allocate/deallocate physical memory to a MALI virtual address range and map into/unmap from the
+ * current process address space
+ * - Map/unmap external physical memory into the MALI virtual address range
+ *
+ * For Mali-nonMMU based systems:
+ * - Allocate/deallocate MALI memory
+ *
+ * @{ */
+
+/** @brief Map Mali Memory into the current user process
+ *
+ * Maps Mali memory into the current user process in a generic way.
+ *
+ * This function is to be used for Mali-MMU mode. The function is available in both Mali-MMU and Mali-nonMMU modes,
+ * but should not be called by a user process in Mali-nonMMU mode.
+ *
+ * The implementation and operation of _mali_ukk_mem_mmap() is dependant on whether the driver is built for Mali-MMU
+ * or Mali-nonMMU:
+ * - In the nonMMU case, _mali_ukk_mem_mmap() requires a physical address to be specified. For this reason, an OS U/K
+ * implementation should not allow this to be called from user-space. In any case, nonMMU implementations are
+ * inherently insecure, and so the overall impact is minimal. Mali-MMU mode should be used if security is desired.
+ * - In the MMU case, _mali_ukk_mem_mmap() the _mali_uk_mem_mmap_s::phys_addr
+ * member is used for the \em Mali-virtual address desired for the mapping. The
+ * implementation of _mali_ukk_mem_mmap() will allocate both the CPU-virtual
+ * and CPU-physical addresses, and can cope with mapping a contiguous virtual
+ * address range to a sequence of non-contiguous physical pages. In this case,
+ * the CPU-physical addresses are not communicated back to the user-side, as
+ * they are unnecsessary; the \em Mali-virtual address range must be used for
+ * programming Mali structures.
+ *
+ * In the second (MMU) case, _mali_ukk_mem_mmap() handles management of
+ * CPU-virtual and CPU-physical ranges, but the \em caller must manage the
+ * \em Mali-virtual address range from the user-side.
+ *
+ * @note Mali-virtual address ranges are entirely separate between processes.
+ * It is not possible for a process to accidentally corrupt another process'
+ * \em Mali-virtual address space.
+ *
+ * @param args see _mali_uk_mem_mmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_mmap(_mali_uk_mem_mmap_s *args);
+
+/** @brief Unmap Mali Memory from the current user process
+ *
+ * Unmaps Mali memory from the current user process in a generic way. This only operates on Mali memory supplied
+ * from _mali_ukk_mem_mmap().
+ *
+ * @param args see _mali_uk_mem_munmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_munmap(_mali_uk_mem_munmap_s *args);
+
+/** @brief Determine the buffer size necessary for an MMU page table dump.
+ * @param args see _mali_uk_query_mmu_page_table_dump_size_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args);
+/** @brief Dump MMU Page tables.
+ * @param args see _mali_uk_dump_mmu_page_table_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args);
+
+/** @brief Write user data to specified Mali memory without causing segfaults.
+ * @param args see _mali_uk_mem_write_safe_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args);
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ *
+ * The Fragment Processor (aka PP (Pixel Processor)) functions provide the following functionality:
+ * - retrieving version of the fragment processors
+ * - determine number of fragment processors
+ * - starting a job on a fragment processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Fragment Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started instead and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx, _mali_uk_pp_start_job_s *uargs);
+
+/**
+ * @brief Issue a request to start new jobs on both Vertex Processor and Fragment Processor.
+ *
+ * @note Will call into @ref _mali_ukk_pp_start_job and @ref _mali_ukk_gp_start_job.
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_and_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx, _mali_uk_pp_and_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Fragment Processors in the system
+ *
+ * @param args see _mali_uk_get_pp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Fragment Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_ukk_get_pp_number_of_cores() indicated at least one Fragment
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args);
+
+/** @brief Disable Write-back unit(s) on specified job
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ */
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args);
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ *
+ * The Vertex Processor (aka GP (Geometry Processor)) functions provide the following functionality:
+ * - retrieving version of the Vertex Processors
+ * - determine number of Vertex Processors available
+ * - starting a job on a Vertex Processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Vertex Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx, _mali_uk_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Vertex Processors in the system.
+ *
+ * @param args see _mali_uk_get_gp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Vertex Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_uk_get_gp_number_of_cores() indicated at least one Vertex
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_gp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args);
+
+/** @brief Resume or abort suspended Vertex Processor jobs.
+ *
+ * After receiving notification that a Vertex Processor job was suspended from
+ * _mali_ukk_wait_for_notification() you can use this function to resume or abort the job.
+ *
+ * @param args see _mali_uk_gp_suspend_response_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args);
+
+/** @} */ /* end group _mali_uk_gp */
+
+#if defined(CONFIG_MALI400_PROFILING)
+/** @addtogroup _mali_uk_profiling U/K Timeline profiling module
+ * @{ */
+
+/** @brief Add event to profiling buffer.
+ *
+ * @param args see _mali_uk_profiling_add_event_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args);
+
+/** @brief Get profiling stream fd.
+ *
+ * @param args see _mali_uk_profiling_stream_fd_get_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args);
+
+/** @brief Profiling control set.
+ *
+ * @param args see _mali_uk_profiling_control_set_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args);
+
+/** @} */ /* end group _mali_uk_profiling */
+#endif
+
+/** @addtogroup _mali_uk_vsync U/K VSYNC reporting module
+ * @{ */
+
+/** @brief Report events related to vsync.
+ *
+ * @note Events should be reported when starting to wait for vsync and when the
+ * waiting is finished. This information can then be used in kernel space to
+ * complement the GPU utilization metric.
+ *
+ * @param args see _mali_uk_vsync_event_report_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args);
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @addtogroup _mali_sw_counters_report U/K Software counter reporting
+ * @{ */
+
+/** @brief Report software counters.
+ *
+ * @param args see _mali_uk_sw_counters_report_s in "mali_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args);
+
+/** @} */ /* end group _mali_sw_counters_report */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+u32 _mali_ukk_report_memory_usage(void);
+
+u32 _mali_ukk_report_total_memory_size(void);
+
+u32 _mali_ukk_utilization_gp_pp(void);
+
+u32 _mali_ukk_utilization_gp(void);
+
+u32 _mali_ukk_utilization_pp(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_H__ */
diff --git a/utgard/r6p2/common/mali_user_settings_db.c b/utgard/r6p2/common/mali_user_settings_db.c
new file mode 100755
index 0000000..a1a613f
--- /dev/null
+++ b/utgard/r6p2/common/mali_user_settings_db.c
@@ -0,0 +1,147 @@
+/**
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_user_settings_db.h"
+#include "mali_session.h"
+
+static u32 mali_user_settings[_MALI_UK_USER_SETTING_MAX];
+const char *_mali_uk_user_setting_descriptions[] = _MALI_UK_USER_SETTING_DESCRIPTIONS;
+
+static void mali_user_settings_notify(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool done = MALI_FALSE;
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (0 == num_sessions_alloc) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (NULL == notobjs) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_SETTINGS_CHANGED,
+					sizeof(_mali_uk_settings_changed_s));
+			if (NULL != notobjs[i]) {
+				_mali_uk_settings_changed_s *data;
+				data = notobjs[i]->result_buffer;
+
+				data->setting = setting;
+				data->value = value;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about setting change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (NULL != notobjs[used_notification_objects]) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (NULL != notobjs[used_notification_objects]) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool notify = MALI_FALSE;
+
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid user setting %ud\n"));
+		return;
+	}
+
+	if (mali_user_settings[setting] != value) {
+		notify = MALI_TRUE;
+	}
+
+	mali_user_settings[setting] = value;
+
+	if (notify) {
+		mali_user_settings_notify(setting, value);
+	}
+}
+
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting)
+{
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		return 0;
+	}
+
+	return mali_user_settings[setting];
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args)
+{
+	_mali_uk_user_setting_t setting;
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	setting = args->setting;
+
+	if (_MALI_UK_USER_SETTING_MAX > setting) {
+		args->value = mali_user_settings[setting];
+		return _MALI_OSK_ERR_OK;
+	} else {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	_mali_osk_memcpy(args->settings, mali_user_settings, sizeof(mali_user_settings));
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p2/common/mali_user_settings_db.h b/utgard/r6p2/common/mali_user_settings_db.h
new file mode 100755
index 0000000..6828dc7
--- /dev/null
+++ b/utgard/r6p2/common/mali_user_settings_db.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) 2012-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_USER_SETTINGS_DB_H__
+#define __MALI_USER_SETTINGS_DB_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+
+/** @brief Set Mali user setting in DB
+ *
+ * Update the DB with a new value for \a setting. If the value is different from theprevious set value running sessions will be notified of the change.
+ *
+ * @param setting the setting to be changed
+ * @param value the new value to set
+ */
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value);
+
+/** @brief Get current Mali user setting value from DB
+ *
+ * @param setting the setting to extract
+ * @return the value of the selected setting
+ */
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* __MALI_KERNEL_USER_SETTING__ */
diff --git a/utgard/r6p2/include/linux/mali/mali_utgard.h b/utgard/r6p2/include/linux/mali/mali_utgard.h
new file mode 100755
index 0000000..e58ed1f
--- /dev/null
+++ b/utgard/r6p2/include/linux/mali/mali_utgard.h
@@ -0,0 +1,526 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_utgard.h
+ * Defines types and interface exposed by the Mali Utgard device driver
+ */
+
+#ifndef __MALI_UTGARD_H__
+#define __MALI_UTGARD_H__
+
+#include "mali_osk_types.h"
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#include "mali_pm_metrics.h"
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#endif
+
+#define MALI_GPU_NAME_UTGARD "mali-utgard"
+
+
+#define MALI_OFFSET_GP                    0x00000
+#define MALI_OFFSET_GP_MMU                0x03000
+
+#define MALI_OFFSET_PP0                   0x08000
+#define MALI_OFFSET_PP0_MMU               0x04000
+#define MALI_OFFSET_PP1                   0x0A000
+#define MALI_OFFSET_PP1_MMU               0x05000
+#define MALI_OFFSET_PP2                   0x0C000
+#define MALI_OFFSET_PP2_MMU               0x06000
+#define MALI_OFFSET_PP3                   0x0E000
+#define MALI_OFFSET_PP3_MMU               0x07000
+
+#define MALI_OFFSET_PP4                   0x28000
+#define MALI_OFFSET_PP4_MMU               0x1C000
+#define MALI_OFFSET_PP5                   0x2A000
+#define MALI_OFFSET_PP5_MMU               0x1D000
+#define MALI_OFFSET_PP6                   0x2C000
+#define MALI_OFFSET_PP6_MMU               0x1E000
+#define MALI_OFFSET_PP7                   0x2E000
+#define MALI_OFFSET_PP7_MMU               0x1F000
+
+#define MALI_OFFSET_L2_RESOURCE0          0x01000
+#define MALI_OFFSET_L2_RESOURCE1          0x10000
+#define MALI_OFFSET_L2_RESOURCE2          0x11000
+
+#define MALI400_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE1
+#define MALI450_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE2          MALI_OFFSET_L2_RESOURCE2
+#define MALI470_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+
+#define MALI_OFFSET_BCAST                 0x13000
+#define MALI_OFFSET_DLBU                  0x14000
+
+#define MALI_OFFSET_PP_BCAST              0x16000
+#define MALI_OFFSET_PP_BCAST_MMU          0x15000
+
+#define MALI_OFFSET_PMU                   0x02000
+#define MALI_OFFSET_DMA                   0x12000
+
+/* Mali-300 */
+
+#define MALI_GPU_RESOURCES_MALI300(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI300_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+/* Mali-400 */
+
+#define MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali-450 */
+#define MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI450_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP4, pp3_irq, base_addr + MALI_OFFSET_PP4_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP5, pp4_irq, base_addr + MALI_OFFSET_PP5_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP6, pp5_irq, base_addr + MALI_OFFSET_PP6_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP6_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP4, pp4_irq, base_addr + MALI_OFFSET_PP4_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP5, pp5_irq, base_addr + MALI_OFFSET_PP5_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(6, base_addr + MALI_OFFSET_PP6, pp6_irq, base_addr + MALI_OFFSET_PP6_MMU, pp6_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(7, base_addr + MALI_OFFSET_PP7, pp7_irq, base_addr + MALI_OFFSET_PP7_MMU, pp7_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP8_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali - 470 */
+#define MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCE_L2(addr) \
+	{ \
+		.name = "Mali_L2", \
+			.flags = IORESOURCE_MEM, \
+				 .start = addr, \
+					  .end   = addr + 0x200, \
+	},
+
+#define MALI_GPU_RESOURCE_GP(gp_addr, gp_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_GP_WITH_MMU(gp_addr, gp_irq, gp_mmu_addr, gp_mmu_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_mmu_addr, \
+					  .end =   gp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_mmu_irq, \
+					  .end =   gp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PP(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_WITH_MMU(id, pp_addr, pp_irq, pp_mmu_addr, pp_mmu_irq) \
+	{ \
+		.name = "Mali_PP" #id, \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_addr, \
+					  .end =   pp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_mmu_irq, \
+					  .end =   pp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_MMU(mmu_addr, mmu_irq) \
+	{ \
+		.name = "Mali_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = mmu_addr, \
+					  .end =   mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = mmu_irq, \
+					  .end =   mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PMU(pmu_addr) \
+	{ \
+		.name = "Mali_PMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pmu_addr, \
+					  .end =   pmu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DMA(dma_addr) \
+	{ \
+		.name = "Mali_DMA", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dma_addr, \
+					  .end = dma_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DLBU(dlbu_addr) \
+	{ \
+		.name = "Mali_DLBU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dlbu_addr, \
+					  .end = dlbu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_BCAST(bcast_addr) \
+	{ \
+		.name = "Mali_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = bcast_addr, \
+					  .end = bcast_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_PP_BCAST(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_Broadcast_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_MMU_BCAST(pp_mmu_bcast_addr) \
+	{ \
+		.name = "Mali_PP_MMU_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_bcast_addr, \
+					  .end = pp_mmu_bcast_addr + 0x100, \
+	},
+
+	struct mali_gpu_utilization_data {
+		unsigned int utilization_gpu; /* Utilization for GP and all PP cores combined, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_gp;  /* Utilization for GP core only, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_pp;  /* Utilization for all PP cores combined, 0 = no utilization, 256 = full utilization */
+	};
+
+	struct mali_gpu_clk_item {
+		unsigned int clock; /* unit(MHz) */
+		unsigned int vol;
+	};
+
+	struct mali_gpu_clock {
+		struct mali_gpu_clk_item *item;
+		unsigned int num_of_steps;
+	};
+
+	struct mali_gpu_device_data {
+		/* Shared GPU memory */
+		unsigned long shared_mem_size;
+
+		/*
+		 * Mali PMU switch delay.
+		 * Only needed if the power gates are connected to the PMU in a high fanout
+		 * network. This value is the number of Mali clock cycles it takes to
+		 * enable the power gates and turn on the power mesh.
+		 * This value will have no effect if a daisy chain implementation is used.
+		 */
+		u32 pmu_switch_delay;
+
+		/* Mali Dynamic power domain configuration in sequence from 0-11
+		 *  GP  PP0 PP1  PP2  PP3  PP4  PP5  PP6  PP7, L2$0 L2$1 L2$2
+		 */
+		u16 pmu_domain_config[12];
+
+		/* Dedicated GPU memory range (physical). */
+		unsigned long dedicated_mem_start;
+		unsigned long dedicated_mem_size;
+
+		/* Frame buffer memory to be accessible by Mali GPU (physical) */
+		unsigned long fb_start;
+		unsigned long fb_size;
+
+		/* Max runtime [ms] for jobs */
+		int max_job_runtime;
+
+		/* Report GPU utilization and related control in this interval (specified in ms) */
+		unsigned long control_interval;
+
+		/* Function that will receive periodic GPU utilization numbers */
+		void (*utilization_callback)(struct mali_gpu_utilization_data *data);
+
+		/* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */
+		int (*set_freq)(int setting_clock_step);
+		/* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */
+		void (*get_clock_info)(struct mali_gpu_clock **data);
+		/* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */
+		int (*get_freq)(void);
+		/* Function that init the mali gpu secure mode */
+		int (*secure_mode_init)(void);
+		/* Function that deinit the mali gpu secure mode */
+		void (*secure_mode_deinit)(void);
+		/* Function that reset GPU and enable gpu secure mode */
+		int (*gpu_reset_and_secure_mode_enable)(void);
+		/* Function that Reset GPU and disable gpu secure mode */
+		int (*gpu_reset_and_secure_mode_disable)(void);
+		/* ipa related interface customer need register */
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+		struct devfreq_cooling_power *gpu_cooling_ops;
+#endif
+	};
+
+	/**
+	 * Pause the scheduling and power state changes of Mali device driver.
+	 * mali_dev_resume() must always be called as soon as possible after this function
+	 * in order to resume normal operation of the Mali driver.
+	 */
+	void mali_dev_pause(void);
+
+	/**
+	 * Resume scheduling and allow power changes in Mali device driver.
+	 * This must always be called after mali_dev_pause().
+	 */
+	void mali_dev_resume(void);
+
+	/** @brief Set the desired number of PP cores to use.
+	 *
+	 * The internal Mali PMU will be used, if present, to physically power off the PP cores.
+	 *
+	 * @param num_cores The number of desired cores
+	 * @return 0 on success, otherwise error. -EINVAL means an invalid number of cores was specified.
+	 */
+	int mali_perf_set_num_pp_cores(unsigned int num_cores);
+
+#endif
diff --git a/utgard/r6p2/include/linux/mali/mali_utgard_ioctl.h b/utgard/r6p2/include/linux/mali/mali_utgard_ioctl.h
new file mode 100755
index 0000000..dfe152b
--- /dev/null
+++ b/utgard/r6p2/include/linux/mali/mali_utgard_ioctl.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __MALI_UTGARD_IOCTL_H__
+#define __MALI_UTGARD_IOCTL_H__
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/fs.h>       /* file system operations */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file mali_kernel_ioctl.h
+ * Interface to the Linux device driver.
+ * This file describes the interface needed to use the Linux device driver.
+ * Its interface is designed to used by the HAL implementation through a thin arch layer.
+ */
+
+/**
+ * ioctl commands
+ */
+
+#define MALI_IOC_BASE           0x82
+#define MALI_IOC_CORE_BASE      (_MALI_UK_CORE_SUBSYSTEM      + MALI_IOC_BASE)
+#define MALI_IOC_MEMORY_BASE    (_MALI_UK_MEMORY_SUBSYSTEM    + MALI_IOC_BASE)
+#define MALI_IOC_PP_BASE        (_MALI_UK_PP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_GP_BASE        (_MALI_UK_GP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_PROFILING_BASE (_MALI_UK_PROFILING_SUBSYSTEM + MALI_IOC_BASE)
+#define MALI_IOC_VSYNC_BASE     (_MALI_UK_VSYNC_SUBSYSTEM + MALI_IOC_BASE)
+
+#define MALI_IOC_WAIT_FOR_NOTIFICATION      _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_WAIT_FOR_NOTIFICATION, _mali_uk_wait_for_notification_s)
+#define MALI_IOC_GET_API_VERSION            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, u32)
+#define MALI_IOC_GET_API_VERSION_V2         _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, _mali_uk_get_api_version_v2_s)
+#define MALI_IOC_POST_NOTIFICATION          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_POST_NOTIFICATION, _mali_uk_post_notification_s)
+#define MALI_IOC_GET_USER_SETTING           _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTING, _mali_uk_get_user_setting_s)
+#define MALI_IOC_GET_USER_SETTINGS          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTINGS, _mali_uk_get_user_settings_s)
+#define MALI_IOC_REQUEST_HIGH_PRIORITY      _IOW (MALI_IOC_CORE_BASE, _MALI_UK_REQUEST_HIGH_PRIORITY, _mali_uk_request_high_priority_s)
+#define MALI_IOC_TIMELINE_GET_LATEST_POINT  _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_GET_LATEST_POINT, _mali_uk_timeline_get_latest_point_s)
+#define MALI_IOC_TIMELINE_WAIT              _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_WAIT, _mali_uk_timeline_wait_s)
+#define MALI_IOC_TIMELINE_CREATE_SYNC_FENCE _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_CREATE_SYNC_FENCE, _mali_uk_timeline_create_sync_fence_s)
+#define MALI_IOC_SOFT_JOB_START             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_START, _mali_uk_soft_job_start_s)
+#define MALI_IOC_SOFT_JOB_SIGNAL            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_SIGNAL, _mali_uk_soft_job_signal_s)
+#define MALI_IOC_PENDING_SUBMIT             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_PENDING_SUBMIT, _mali_uk_pending_submit_s)
+
+#define MALI_IOC_MEM_ALLOC                  _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_ALLOC_MEM, _mali_uk_alloc_mem_s)
+#define MALI_IOC_MEM_FREE                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_FREE_MEM, _mali_uk_free_mem_s)
+#define MALI_IOC_MEM_BIND                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_BIND_MEM, _mali_uk_bind_mem_s)
+#define MALI_IOC_MEM_UNBIND                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_UNBIND_MEM, _mali_uk_unbind_mem_s)
+#define MALI_IOC_MEM_COW                    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MEM, _mali_uk_cow_mem_s)
+#define MALI_IOC_MEM_COW_MODIFY_RANGE       _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MODIFY_RANGE, _mali_uk_cow_modify_range_s)
+#define MALI_IOC_MEM_RESIZE                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_RESIZE_MEM, _mali_uk_mem_resize_s)
+#define MALI_IOC_MEM_DMA_BUF_GET_SIZE       _IOR(MALI_IOC_MEMORY_BASE, _MALI_UK_DMA_BUF_GET_SIZE, _mali_uk_dma_buf_get_size_s)
+#define MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE _IOR (MALI_IOC_MEMORY_BASE, _MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, _mali_uk_query_mmu_page_table_dump_size_s)
+#define MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_DUMP_MMU_PAGE_TABLE, _mali_uk_dump_mmu_page_table_s)
+#define MALI_IOC_MEM_WRITE_SAFE             _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_MEM_WRITE_SAFE, _mali_uk_mem_write_safe_s)
+
+#define MALI_IOC_PP_START_JOB               _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_START_JOB, _mali_uk_pp_start_job_s)
+#define MALI_IOC_PP_AND_GP_START_JOB        _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_AND_GP_START_JOB, _mali_uk_pp_and_gp_start_job_s)
+#define MALI_IOC_PP_NUMBER_OF_CORES_GET     _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_NUMBER_OF_CORES, _mali_uk_get_pp_number_of_cores_s)
+#define MALI_IOC_PP_CORE_VERSION_GET        _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_CORE_VERSION, _mali_uk_get_pp_core_version_s)
+#define MALI_IOC_PP_DISABLE_WB              _IOW (MALI_IOC_PP_BASE, _MALI_UK_PP_DISABLE_WB, _mali_uk_pp_disable_wb_s)
+
+#define MALI_IOC_GP2_START_JOB              _IOWR(MALI_IOC_GP_BASE, _MALI_UK_GP_START_JOB, _mali_uk_gp_start_job_s)
+#define MALI_IOC_GP2_NUMBER_OF_CORES_GET    _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_NUMBER_OF_CORES, _mali_uk_get_gp_number_of_cores_s)
+#define MALI_IOC_GP2_CORE_VERSION_GET       _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_CORE_VERSION, _mali_uk_get_gp_core_version_s)
+#define MALI_IOC_GP2_SUSPEND_RESPONSE       _IOW (MALI_IOC_GP_BASE, _MALI_UK_GP_SUSPEND_RESPONSE,_mali_uk_gp_suspend_response_s)
+
+#define MALI_IOC_PROFILING_ADD_EVENT        _IOWR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_ADD_EVENT, _mali_uk_profiling_add_event_s)
+#define MALI_IOC_PROFILING_REPORT_SW_COUNTERS  _IOW (MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_REPORT_SW_COUNTERS, _mali_uk_sw_counters_report_s)
+#define MALI_IOC_PROFILING_MEMORY_USAGE_GET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_MEMORY_USAGE_GET, _mali_uk_profiling_memory_usage_get_s)
+#define MALI_IOC_PROFILING_STREAM_FD_GET        _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_STREAM_FD_GET, _mali_uk_profiling_stream_fd_get_s)
+#define MALI_IOC_PROILING_CONTROL_SET   _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_CONTROL_SET, _mali_uk_profiling_control_set_s)
+
+#define MALI_IOC_VSYNC_EVENT_REPORT         _IOW (MALI_IOC_VSYNC_BASE, _MALI_UK_VSYNC_EVENT_REPORT, _mali_uk_vsync_event_report_s)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_IOCTL_H__ */
diff --git a/utgard/r6p2/include/linux/mali/mali_utgard_profiling_events.h b/utgard/r6p2/include/linux/mali/mali_utgard_profiling_events.h
new file mode 100755
index 0000000..f505e91
--- /dev/null
+++ b/utgard/r6p2/include/linux/mali/mali_utgard_profiling_events.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef _MALI_UTGARD_PROFILING_EVENTS_H_
+#define _MALI_UTGARD_PROFILING_EVENTS_H_
+
+/*
+ * The event ID is a 32 bit value consisting of different fields
+ * reserved, 4 bits, for future use
+ * event type, 4 bits, cinstr_profiling_event_type_t
+ * event channel, 8 bits, the source of the event.
+ * event data, 16 bit field, data depending on event type
+ */
+
+/**
+ * Specifies what kind of event this is
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_TYPE_SINGLE  = 0 << 24,
+	MALI_PROFILING_EVENT_TYPE_START   = 1 << 24,
+	MALI_PROFILING_EVENT_TYPE_STOP    = 2 << 24,
+	MALI_PROFILING_EVENT_TYPE_SUSPEND = 3 << 24,
+	MALI_PROFILING_EVENT_TYPE_RESUME  = 4 << 24,
+} cinstr_profiling_event_type_t;
+
+
+/**
+ * Secifies the channel/source of the event
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_CHANNEL_SOFTWARE =  0 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GP0      =  1 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP0      =  5 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP1      =  6 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP2      =  7 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP3      =  8 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP4      =  9 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP5      = 10 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP6      = 11 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP7      = 12 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GPU      = 21 << 16,
+} cinstr_profiling_event_channel_t;
+
+
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(num) (((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) + (num)) << 16)
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(num) (((MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) + (num)) << 16)
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_NONE                  = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_NEW_FRAME         = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FLUSH                 = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SWAP_BUFFERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FB_EVENT              = 4,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE            = 5,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE            = 6,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_READBACK              = 7,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_WRITEBACK             = 8,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_ENTER_API_FUNC        = 10,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC        = 11,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_DISCARD_ATTACHMENTS   = 13,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_TRY_LOCK          = 53,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_LOCK              = 54,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_UNLOCK            = 55,
+	MALI_PROFILING_EVENT_REASON_SINGLE_LOCK_CONTENDED           = 56,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_MALI_FENCE_DUP    = 57,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SET_PP_JOB_FENCE  = 58,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_WAIT_SYNC         = 59,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_FENCE_SYNC = 60,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_NATIVE_FENCE_SYNC = 61,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FENCE_FLUSH       = 62,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FLUSH_SERVER_WAITS = 63,
+} cinstr_profiling_event_reason_single_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ * to inform whether the core is physical or virtual
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL  = 0,
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL   = 1,
+} cinstr_profiling_event_reason_start_stop_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ */
+typedef enum {
+	/*MALI_PROFILING_EVENT_REASON_START_STOP_SW_NONE            = 0,*/
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_MALI            = 1,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_CALLBACK_THREAD = 2,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_WORKER_THREAD   = 3,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF     = 4,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF      = 5,
+} cinstr_profiling_event_reason_start_stop_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SUSPEND/RESUME is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_NONE                     =  0, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PIPELINE_FULL            =  1, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC                    = 26, /* used in some build configurations */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_WAIT           = 27, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_SYNC           = 28, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_FILTER_CLEANUP   = 29, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_TEXTURE          = 30, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_MIPLEVEL       = 31, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_READPIXELS     = 32, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SWAP_IMMEDIATE  = 33, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_QUEUE_BUFFER         = 34, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_DEQUEUE_BUFFER       = 35, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_UMP_LOCK                 = 36, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_GLOBAL_LOCK          = 37, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_SWAP                 = 38, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_MALI_EGL_IMAGE_SYNC_WAIT = 39, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GP_JOB_HANDLING          = 40, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PP_JOB_HANDLING          = 41, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_MERGE     = 42, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_DUP       = 43,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_FLUSH_SERVER_WAITS   = 44,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SYNC            = 45, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_JOBS_WAIT             = 46, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOFRAMES_WAIT         = 47, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOJOBS_WAIT           = 48, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_SUBMIT_LIMITER_WAIT      = 49, /* USED */
+} cinstr_profiling_event_reason_suspend_resume_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from a HW channel (GPx+PPx)
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_NONE          = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_INTERRUPT     = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH         = 2,
+} cinstr_profiling_event_reason_single_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_NONE              = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE  = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS      = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS      = 4,
+} cinstr_profiling_event_reason_single_gpu_t;
+
+/**
+ * These values are applicable for the 3rd data parameter when
+ * the type MALI_PROFILING_EVENT_TYPE_START is used from the software channel
+ * with the MALI_PROFILING_EVENT_REASON_START_STOP_BOTTOM_HALF reason.
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_DATA_CORE_GP0             =  1,
+	MALI_PROFILING_EVENT_DATA_CORE_PP0             =  5,
+	MALI_PROFILING_EVENT_DATA_CORE_PP1             =  6,
+	MALI_PROFILING_EVENT_DATA_CORE_PP2             =  7,
+	MALI_PROFILING_EVENT_DATA_CORE_PP3             =  8,
+	MALI_PROFILING_EVENT_DATA_CORE_PP4             =  9,
+	MALI_PROFILING_EVENT_DATA_CORE_PP5             = 10,
+	MALI_PROFILING_EVENT_DATA_CORE_PP6             = 11,
+	MALI_PROFILING_EVENT_DATA_CORE_PP7             = 12,
+	MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU         = 22, /* GP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU         = 26, /* PP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP1_MMU         = 27, /* PP1 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP2_MMU         = 28, /* PP2 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP3_MMU         = 29, /* PP3 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP4_MMU         = 30, /* PP4 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP5_MMU         = 31, /* PP5 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP6_MMU         = 32, /* PP6 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP7_MMU         = 33, /* PP7 + 21 */
+
+} cinstr_profiling_event_data_core_t;
+
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU + (num))
+
+
+#endif /*_MALI_UTGARD_PROFILING_EVENTS_H_*/
diff --git a/utgard/r6p2/include/linux/mali/mali_utgard_profiling_gator_api.h b/utgard/r6p2/include/linux/mali/mali_utgard_profiling_gator_api.h
new file mode 100755
index 0000000..c82d8b1
--- /dev/null
+++ b/utgard/r6p2/include/linux/mali/mali_utgard_profiling_gator_api.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2013, 2015-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__
+#define __MALI_UTGARD_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_PROFILING_API_VERSION 4
+
+#define MAX_NUM_L2_CACHE_CORES 3
+#define MAX_NUM_FP_CORES 8
+#define MAX_NUM_VP_CORES 1
+
+#define _MALI_SPCIAL_COUNTER_DESCRIPTIONS \
+	{                                           \
+		"Filmstrip_cnt0",                 \
+		"Frequency",       \
+		"Voltage",       \
+		"vertex",     \
+		"fragment",         \
+		"Total_alloc_pages",        \
+	};
+
+#define _MALI_MEM_COUTNER_DESCRIPTIONS \
+	{                                           \
+		"untyped_memory",                 \
+		"vertex_index_buffer",       \
+		"texture_buffer",       \
+		"varying_buffer",     \
+		"render_target",         \
+		"pbuffer_buffer",        \
+		"plbu_heap",            \
+		"pointer_array_buffer",             \
+		"slave_tilelist",          \
+		"untyped_gp_cmdlist",     \
+		"polygon_cmdlist",               \
+		"texture_descriptor",               \
+		"render_state_word",               \
+		"shader",               \
+		"stream_buffer",               \
+		"fragment_stack",               \
+		"uniform",               \
+		"untyped_frame_pool",               \
+		"untyped_surface",               \
+	};
+
+/** The list of events supported by the Mali DDK. */
+typedef enum {
+	/* Vertex processor activity */
+	ACTIVITY_VP_0 = 0,
+
+	/* Fragment processor activity */
+	ACTIVITY_FP_0,
+	ACTIVITY_FP_1,
+	ACTIVITY_FP_2,
+	ACTIVITY_FP_3,
+	ACTIVITY_FP_4,
+	ACTIVITY_FP_5,
+	ACTIVITY_FP_6,
+	ACTIVITY_FP_7,
+
+	/* L2 cache counters */
+	COUNTER_L2_0_C0,
+	COUNTER_L2_0_C1,
+	COUNTER_L2_1_C0,
+	COUNTER_L2_1_C1,
+	COUNTER_L2_2_C0,
+	COUNTER_L2_2_C1,
+
+	/* Vertex processor counters */
+	COUNTER_VP_0_C0,
+	COUNTER_VP_0_C1,
+
+	/* Fragment processor counters */
+	COUNTER_FP_0_C0,
+	COUNTER_FP_0_C1,
+	COUNTER_FP_1_C0,
+	COUNTER_FP_1_C1,
+	COUNTER_FP_2_C0,
+	COUNTER_FP_2_C1,
+	COUNTER_FP_3_C0,
+	COUNTER_FP_3_C1,
+	COUNTER_FP_4_C0,
+	COUNTER_FP_4_C1,
+	COUNTER_FP_5_C0,
+	COUNTER_FP_5_C1,
+	COUNTER_FP_6_C0,
+	COUNTER_FP_6_C1,
+	COUNTER_FP_7_C0,
+	COUNTER_FP_7_C1,
+
+	/*
+	 * If more hardware counters are added, the _mali_osk_hw_counter_table
+	 * below should also be updated.
+	 */
+
+	/* EGL software counters */
+	COUNTER_EGL_BLIT_TIME,
+
+	/* GLES software counters */
+	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_ARRAYS_CALLS,
+	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_POINTS,
+	COUNTER_GLES_DRAW_LINES,
+	COUNTER_GLES_DRAW_LINE_LOOP,
+	COUNTER_GLES_DRAW_LINE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLES,
+	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLE_FAN,
+	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+	COUNTER_GLES_UPLOAD_VBO_TIME,
+	COUNTER_GLES_NUM_FLUSHES,
+	COUNTER_GLES_NUM_VSHADERS_GENERATED,
+	COUNTER_GLES_NUM_FSHADERS_GENERATED,
+	COUNTER_GLES_VSHADER_GEN_TIME,
+	COUNTER_GLES_FSHADER_GEN_TIME,
+	COUNTER_GLES_INPUT_TRIANGLES,
+	COUNTER_GLES_VXCACHE_HIT,
+	COUNTER_GLES_VXCACHE_MISS,
+	COUNTER_GLES_VXCACHE_COLLISION,
+	COUNTER_GLES_CULLED_TRIANGLES,
+	COUNTER_GLES_CULLED_LINES,
+	COUNTER_GLES_BACKFACE_TRIANGLES,
+	COUNTER_GLES_GBCLIP_TRIANGLES,
+	COUNTER_GLES_GBCLIP_LINES,
+	COUNTER_GLES_TRIANGLES_DRAWN,
+	COUNTER_GLES_DRAWCALL_TIME,
+	COUNTER_GLES_TRIANGLES_COUNT,
+	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+	COUNTER_GLES_FAN_TRIANGLES_COUNT,
+	COUNTER_GLES_LINES_COUNT,
+	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+	COUNTER_GLES_STRIP_LINES_COUNT,
+	COUNTER_GLES_LOOP_LINES_COUNT,
+
+	/* Special counter */
+
+	/* Framebuffer capture pseudo-counter */
+	COUNTER_FILMSTRIP,
+	COUNTER_FREQUENCY,
+	COUNTER_VOLTAGE,
+	COUNTER_VP_ACTIVITY,
+	COUNTER_FP_ACTIVITY,
+	COUNTER_TOTAL_ALLOC_PAGES,
+
+	/* Memory usage counter */
+	COUNTER_MEM_UNTYPED,
+	COUNTER_MEM_VB_IB,
+	COUNTER_MEM_TEXTURE,
+	COUNTER_MEM_VARYING,
+	COUNTER_MEM_RT,
+	COUNTER_MEM_PBUFFER,
+	/* memory usages for gp command */
+	COUNTER_MEM_PLBU_HEAP,
+	COUNTER_MEM_POINTER_ARRAY,
+	COUNTER_MEM_SLAVE_TILELIST,
+	COUNTER_MEM_UNTYPE_GP_CMDLIST,
+	/* memory usages for polygon list command */
+	COUNTER_MEM_POLYGON_CMDLIST,
+	/* memory usages for pp command */
+	COUNTER_MEM_TD,
+	COUNTER_MEM_RSW,
+	/* other memory usages */
+	COUNTER_MEM_SHADER,
+	COUNTER_MEM_STREAMS,
+	COUNTER_MEM_FRAGMENT_STACK,
+	COUNTER_MEM_UNIFORM,
+	/* Special mem usage, which is used for mem pool allocation */
+	COUNTER_MEM_UNTYPE_MEM_POOL,
+	COUNTER_MEM_UNTYPE_SURFACE,
+
+	NUMBER_OF_EVENTS
+} _mali_osk_counter_id;
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_7
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_7_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+#define FIRST_SPECIAL_COUNTER   COUNTER_FILMSTRIP
+#define LAST_SPECIAL_COUNTER    COUNTER_TOTAL_ALLOC_PAGES
+
+#define FIRST_MEM_COUNTER               COUNTER_MEM_UNTYPED
+#define LAST_MEM_COUNTER                COUNTER_MEM_UNTYPE_SURFACE
+
+#define MALI_PROFILING_MEM_COUNTERS_NUM (LAST_MEM_COUNTER - FIRST_MEM_COUNTER + 1)
+#define MALI_PROFILING_SPECIAL_COUNTERS_NUM     (LAST_SPECIAL_COUNTER - FIRST_SPECIAL_COUNTER + 1)
+#define MALI_PROFILING_SW_COUNTERS_NUM  (LAST_SW_COUNTER - FIRST_SW_COUNTER + 1)
+
+/**
+ * Define the stream header type for porfiling stream.
+ */
+#define  STREAM_HEADER_FRAMEBUFFER 0x05         /* The stream packet header type for framebuffer dumping. */
+#define STREAM_HEADER_COUNTER_VALUE  0x09       /* The stream packet header type for hw/sw/memory counter sampling. */
+#define STREAM_HEADER_CORE_ACTIVITY 0x0a                /* The stream packet header type for activity counter sampling. */
+#define STREAM_HEADER_SIZE      5
+
+/**
+ * Define the packet header type of profiling control packet.
+ */
+#define PACKET_HEADER_ERROR            0x80             /* The response packet header type if error. */
+#define PACKET_HEADER_ACK              0x81             /* The response packet header type if OK. */
+#define PACKET_HEADER_COUNTERS_REQUEST 0x82             /* The control packet header type to request counter information from ddk. */
+#define PACKET_HEADER_COUNTERS_ACK         0x83         /* The response packet header type to send out counter information. */
+#define PACKET_HEADER_COUNTERS_ENABLE  0x84             /* The control packet header type to enable counters. */
+#define PACKET_HEADER_START_CAPTURE_VALUE            0x85               /* The control packet header type to start capture values. */
+
+#define PACKET_HEADER_SIZE      5
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters {
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+} _mali_profiling_core_counters;
+
+/**
+ * Structure to pass performance counter data of Mali L2 cache cores
+ */
+typedef struct _mali_profiling_l2_counter_values {
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+/**
+ * Structure to pass data defining Mali instance in use:
+ *
+ * mali_product_id - Mali product id
+ * mali_version_major - Mali version major number
+ * mali_version_minor - Mali version minor number
+ * num_of_l2_cores - number of L2 cache cores
+ * num_of_fp_cores - number of fragment processor cores
+ * num_of_vp_cores - number of vertex processor cores
+ */
+typedef struct _mali_profiling_mali_version {
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+/**
+ * Structure to define the mali profiling counter struct.
+ */
+typedef struct mali_profiling_counter {
+	char counter_name[40];
+	u32 counter_id;
+	u32 counter_event;
+	u32 prev_counter_value;
+	u32 current_counter_value;
+	u32 key;
+	int enabled;
+} mali_profiling_counter;
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define MEM_COUNTER_ENABLE (5)
+#define ANNOTATE_PROFILING_ENABLE (6)
+
+void _mali_profiling_control(u32 action, u32 value);
+
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+int _mali_profiling_set_event(u32 counter_id, s32 event_id);
+
+u32 _mali_profiling_get_api_version(void);
+
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */
diff --git a/utgard/r6p2/include/linux/mali/mali_utgard_uk_types.h b/utgard/r6p2/include/linux/mali/mali_utgard_uk_types.h
new file mode 100755
index 0000000..99835a5
--- /dev/null
+++ b/utgard/r6p2/include/linux/mali/mali_utgard_uk_types.h
@@ -0,0 +1,1100 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+/**
+ * @file mali_uk_types.h
+ * Defines the types and constants used in the user-kernel interface
+ */
+
+#ifndef __MALI_UTGARD_UK_TYPES_H__
+#define __MALI_UTGARD_UK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Iteration functions depend on these values being consecutive. */
+#define MALI_UK_TIMELINE_GP   0
+#define MALI_UK_TIMELINE_PP   1
+#define MALI_UK_TIMELINE_SOFT 2
+#define MALI_UK_TIMELINE_MAX  3
+
+#define MALI_UK_BIG_VARYING_SIZE  (1024*1024*2)
+
+typedef struct {
+	u32 points[MALI_UK_TIMELINE_MAX];
+	s32 sync_fd;
+} _mali_uk_fence_t;
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_uk_core U/K Core
+ * @{ */
+
+/** Definition of subsystem numbers, to assist in creating a unique identifier
+ * for each U/K call.
+ *
+ * @see _mali_uk_functions */
+typedef enum {
+	_MALI_UK_CORE_SUBSYSTEM,      /**< Core Group of U/K calls */
+	_MALI_UK_MEMORY_SUBSYSTEM,    /**< Memory Group of U/K calls */
+	_MALI_UK_PP_SUBSYSTEM,        /**< Fragment Processor Group of U/K calls */
+	_MALI_UK_GP_SUBSYSTEM,        /**< Vertex Processor Group of U/K calls */
+	_MALI_UK_PROFILING_SUBSYSTEM, /**< Profiling Group of U/K calls */
+	_MALI_UK_VSYNC_SUBSYSTEM,     /**< VSYNC Group of U/K calls */
+} _mali_uk_subsystem_t;
+
+/** Within a function group each function has its unique sequence number
+ * to assist in creating a unique identifier for each U/K call.
+ *
+ * An ordered pair of numbers selected from
+ * ( \ref _mali_uk_subsystem_t,\ref  _mali_uk_functions) will uniquely identify the
+ * U/K call across all groups of functions, and all functions. */
+typedef enum {
+	/** Core functions */
+
+	_MALI_UK_OPEN                    = 0, /**< _mali_ukk_open() */
+	_MALI_UK_CLOSE,                       /**< _mali_ukk_close() */
+	_MALI_UK_WAIT_FOR_NOTIFICATION,       /**< _mali_ukk_wait_for_notification() */
+	_MALI_UK_GET_API_VERSION,             /**< _mali_ukk_get_api_version() */
+	_MALI_UK_POST_NOTIFICATION,           /**< _mali_ukk_post_notification() */
+	_MALI_UK_GET_USER_SETTING,            /**< _mali_ukk_get_user_setting() *//**< [out] */
+	_MALI_UK_GET_USER_SETTINGS,           /**< _mali_ukk_get_user_settings() *//**< [out] */
+	_MALI_UK_REQUEST_HIGH_PRIORITY,       /**< _mali_ukk_request_high_priority() */
+	_MALI_UK_TIMELINE_GET_LATEST_POINT,   /**< _mali_ukk_timeline_get_latest_point() */
+	_MALI_UK_TIMELINE_WAIT,               /**< _mali_ukk_timeline_wait() */
+	_MALI_UK_TIMELINE_CREATE_SYNC_FENCE,  /**< _mali_ukk_timeline_create_sync_fence() */
+	_MALI_UK_SOFT_JOB_START,              /**< _mali_ukk_soft_job_start() */
+	_MALI_UK_SOFT_JOB_SIGNAL,             /**< _mali_ukk_soft_job_signal() */
+	_MALI_UK_PENDING_SUBMIT,             /**< _mali_ukk_pending_submit() */
+
+	/** Memory functions */
+
+	_MALI_UK_ALLOC_MEM                = 0,   /**< _mali_ukk_alloc_mem() */
+	_MALI_UK_FREE_MEM,                       /**< _mali_ukk_free_mem() */
+	_MALI_UK_BIND_MEM,                       /**< _mali_ukk_mem_bind() */
+	_MALI_UK_UNBIND_MEM,                     /**< _mali_ukk_mem_unbind() */
+	_MALI_UK_COW_MEM,                        /**< _mali_ukk_mem_cow() */
+	_MALI_UK_COW_MODIFY_RANGE,               /**< _mali_ukk_mem_cow_modify_range() */
+	_MALI_UK_RESIZE_MEM,                     /**<._mali_ukk_mem_resize() */
+	_MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, /**< _mali_ukk_mem_get_mmu_page_table_dump_size() */
+	_MALI_UK_DUMP_MMU_PAGE_TABLE,            /**< _mali_ukk_mem_dump_mmu_page_table() */
+	_MALI_UK_DMA_BUF_GET_SIZE,               /**< _mali_ukk_dma_buf_get_size() */
+	_MALI_UK_MEM_WRITE_SAFE,                 /**< _mali_uku_mem_write_safe() */
+
+	/** Common functions for each core */
+
+	_MALI_UK_START_JOB           = 0,     /**< Start a Fragment/Vertex Processor Job on a core */
+	_MALI_UK_GET_NUMBER_OF_CORES,         /**< Get the number of Fragment/Vertex Processor cores */
+	_MALI_UK_GET_CORE_VERSION,            /**< Get the Fragment/Vertex Processor version compatible with all cores */
+
+	/** Fragment Processor Functions  */
+
+	_MALI_UK_PP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_pp_start_job() */
+	_MALI_UK_GET_PP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_pp_number_of_cores() */
+	_MALI_UK_GET_PP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_pp_core_version() */
+	_MALI_UK_PP_DISABLE_WB,                                           /**< _mali_ukk_pp_job_disable_wb() */
+	_MALI_UK_PP_AND_GP_START_JOB,                                     /**< _mali_ukk_pp_and_gp_start_job() */
+
+	/** Vertex Processor Functions  */
+
+	_MALI_UK_GP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_gp_start_job() */
+	_MALI_UK_GET_GP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_gp_number_of_cores() */
+	_MALI_UK_GET_GP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_gp_core_version() */
+	_MALI_UK_GP_SUSPEND_RESPONSE,                                     /**< _mali_ukk_gp_suspend_response() */
+
+	/** Profiling functions */
+
+	_MALI_UK_PROFILING_ADD_EVENT     = 0, /**< __mali_uku_profiling_add_event() */
+	_MALI_UK_PROFILING_REPORT_SW_COUNTERS,/**< __mali_uku_profiling_report_sw_counters() */
+	_MALI_UK_PROFILING_MEMORY_USAGE_GET,  /**< __mali_uku_profiling_memory_usage_get() */
+	_MALI_UK_PROFILING_STREAM_FD_GET, /** < __mali_uku_profiling_stream_fd_get() */
+	_MALI_UK_PROFILING_CONTROL_SET, /** < __mali_uku_profiling_control_set() */
+
+	/** VSYNC reporting fuctions */
+	_MALI_UK_VSYNC_EVENT_REPORT      = 0, /**< _mali_ukk_vsync_event_report() */
+} _mali_uk_functions;
+
+/** @defgroup _mali_uk_getsysteminfo U/K Get System Info
+ * @{ */
+
+/**
+ * Type definition for the core version number.
+ * Used when returning the version number read from a core
+ *
+ * Its format is that of the 32-bit Version register for a particular core.
+ * Refer to the "Mali200 and MaliGP2 3D Graphics Processor Technical Reference
+ * Manual", ARM DDI 0415C, for more information.
+ */
+typedef u32 _mali_core_version;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @defgroup _mali_uk_gp_suspend_response_s Vertex Processor Suspend Response
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_gp_suspend_response()
+ *
+ * When _mali_wait_for_notification() receives notification that a
+ * Vertex Processor job was suspended, you need to send a response to indicate
+ * what needs to happen with this job. You can either abort or resume the job.
+ *
+ * - set @c code to indicate response code. This is either @c _MALIGP_JOB_ABORT or
+ * @c _MALIGP_JOB_RESUME_WITH_NEW_HEAP to indicate you will provide a new heap
+ * for the job that will resolve the out of memory condition for the job.
+ * - copy the @c cookie value from the @c _mali_uk_gp_job_suspended_s notification;
+ * this is an identifier for the suspended job
+ * - set @c arguments[0] and @c arguments[1] to zero if you abort the job. If
+ * you resume it, @c argument[0] should specify the Mali start address for the new
+ * heap and @c argument[1] the Mali end address of the heap.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ */
+typedef enum _maligp_job_suspended_response_code {
+	_MALIGP_JOB_ABORT,                  /**< Abort the Vertex Processor job */
+	_MALIGP_JOB_RESUME_WITH_NEW_HEAP    /**< Resume the Vertex Processor job with a new heap */
+} _maligp_job_suspended_response_code;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 cookie;                     /**< [in] cookie from the _mali_uk_gp_job_suspended_s notification */
+	_maligp_job_suspended_response_code code; /**< [in] abort or resume response code, see \ref _maligp_job_suspended_response_code */
+	u32 arguments[2];               /**< [in] 0 when aborting a job. When resuming a job, the Mali start and end address for a new heap to resume the job with */
+} _mali_uk_gp_suspend_response_s;
+
+/** @} */ /* end group _mali_uk_gp_suspend_response_s */
+
+/** @defgroup _mali_uk_gpstartjob_s Vertex Processor Start Job
+ * @{ */
+
+/** @brief Status indicating the result of the execution of a Vertex or Fragment processor job  */
+typedef enum {
+	_MALI_UK_JOB_STATUS_END_SUCCESS         = 1 << (16 + 0),
+	_MALI_UK_JOB_STATUS_END_OOM             = 1 << (16 + 1),
+	_MALI_UK_JOB_STATUS_END_ABORT           = 1 << (16 + 2),
+	_MALI_UK_JOB_STATUS_END_TIMEOUT_SW      = 1 << (16 + 3),
+	_MALI_UK_JOB_STATUS_END_HANG            = 1 << (16 + 4),
+	_MALI_UK_JOB_STATUS_END_SEG_FAULT       = 1 << (16 + 5),
+	_MALI_UK_JOB_STATUS_END_ILLEGAL_JOB     = 1 << (16 + 6),
+	_MALI_UK_JOB_STATUS_END_UNKNOWN_ERR     = 1 << (16 + 7),
+	_MALI_UK_JOB_STATUS_END_SHUTDOWN        = 1 << (16 + 8),
+	_MALI_UK_JOB_STATUS_END_SYSTEM_UNUSABLE = 1 << (16 + 9)
+} _mali_uk_job_status;
+
+#define MALIGP2_NUM_REGS_FRAME (6)
+
+/** @brief Arguments for _mali_ukk_gp_start_job()
+ *
+ * To start a Vertex Processor job
+ * - associate the request with a reference to a @c mali_gp_job_info by setting
+ * user_job_ptr to the address of the @c mali_gp_job_info of the job.
+ * - set @c priority to the priority of the @c mali_gp_job_info
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_gp_job_info into @c frame_registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ * When @c _mali_ukk_gp_start_job() returns @c _MALI_OSK_ERR_OK, status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, @c _mali_wait_for_notification() will be notified
+ * that the job finished or got suspended. It may get suspended due to
+ * resource shortage. If it finished (see _mali_ukk_wait_for_notification())
+ * the notification will contain a @c _mali_uk_gp_job_finished_s result. If
+ * it got suspended the notification will contain a @c _mali_uk_gp_job_suspended_s
+ * result.
+ *
+ * The @c _mali_uk_gp_job_finished_s contains the job status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ * In case the job got suspended, @c _mali_uk_gp_job_suspended_s contains
+ * the @c user_job_ptr identifier used to start the job with, the @c reason
+ * why the job stalled (see \ref _maligp_job_suspended_reason) and a @c cookie
+ * to identify the core on which the job stalled.  This @c cookie will be needed
+ * when responding to this nofication by means of _mali_ukk_gp_suspend_response().
+ * (see _mali_ukk_gp_suspend_response()). The response is either to abort or
+ * resume the job. If the job got suspended due to an out of memory condition
+ * you may be able to resolve this by providing more memory and resuming the job.
+ *
+ */
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;                   /**< [in] identifier for the job in user space, a @c mali_gp_job_info* */
+	u32 priority;                       /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[MALIGP2_NUM_REGS_FRAME]; /**< [in] core specific registers associated with this job */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;            /**< [in,out] pointer to u32: location where point on gp timeline for this job will be written */
+	u32 varying_memsize;            /** < [in] size of varying memory to use deffer bind*/
+	u32 deferred_mem_num;
+	u64 deferred_mem_list;         /** < [in] memory hanlde list of varying buffer to use deffer bind */
+} _mali_uk_gp_start_job_s;
+
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE (1<<0) /**< Enable performance counter SRC0 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE (1<<1) /**< Enable performance counter SRC1 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE (1<<2) /**< Enable per tile (aka heatmap) generation with for a job (using the enabled counter sources) */
+
+/** @} */ /* end group _mali_uk_gpstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;               /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;     /**< [out] status of finished job */
+	u32 heap_current_addr;          /**< [out] value of the GP PLB PL heap start address register */
+	u32 perf_counter0;              /**< [out] value of performance counter 0 (see ARM DDI0415A) */
+	u32 perf_counter1;              /**< [out] value of performance counter 1 (see ARM DDI0415A) */
+	u32 pending_big_job_num;
+} _mali_uk_gp_job_finished_s;
+
+typedef struct {
+	u64 user_job_ptr;                    /**< [out] identifier for the job in user space */
+	u32 cookie;                          /**< [out] identifier for the core in kernel space on which the job stalled */
+} _mali_uk_gp_job_suspended_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+
+/** @defgroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+#define _MALI_PP_MAX_SUB_JOBS 8
+
+#define _MALI_PP_MAX_FRAME_REGISTERS ((0x058/4)+1)
+
+#define _MALI_PP_MAX_WB_REGISTERS ((0x02C/4)+1)
+
+#define _MALI_DLBU_MAX_REGISTERS 4
+
+/** Flag for _mali_uk_pp_start_job_s */
+#define _MALI_PP_JOB_FLAG_NO_NOTIFICATION (1<<0)
+#define _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE (1<<1)
+#define _MALI_PP_JOB_FLAG_PROTECTED (1<<2)
+
+/** @defgroup _mali_uk_ppstartjob_s Fragment Processor Start Job
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_pp_start_job()
+ *
+ * To start a Fragment Processor job
+ * - associate the request with a reference to a mali_pp_job by setting
+ * @c user_job_ptr to the address of the @c mali_pp_job of the job.
+ * - set @c priority to the priority of the mali_pp_job
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_pp_job into @c frame_registers.
+ * For MALI200 you also need to copy the write back 0,1 and 2 registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context in @c ctx that was returned from _mali_ukk_open()
+ *
+ * When _mali_ukk_pp_start_job() returns @c _MALI_OSK_ERR_OK, @c status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, _mali_wait_for_notification() will be notified
+ * when the job finished. The notification will contain a
+ * @c _mali_uk_pp_job_finished_s result. It contains the @c user_job_ptr
+ * identifier used to start the job with, the job @c status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than @c watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;               /**< [in] identifier for the job in user space */
+	u32 priority;                   /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[_MALI_PP_MAX_FRAME_REGISTERS];         /**< [in] core specific registers associated with first sub job, see ARM DDI0415A */
+	u32 frame_registers_addr_frame[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_FRAME registers for sub job 1-7 */
+	u32 frame_registers_addr_stack[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_STACK registers for sub job 1-7 */
+	u32 wb0_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb1_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb2_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 dlbu_registers[_MALI_DLBU_MAX_REGISTERS]; /**< [in] Dynamic load balancing unit registers */
+	u32 num_cores;                      /**< [in] Number of cores to set up (valid range: 1-8(M450) or 4(M400)) */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	u32 flags;                          /**< [in] See _MALI_PP_JOB_FLAG_* for a list of avaiable flags */
+	u32 tilesx;                         /**< [in] number of tiles in the x direction (needed for heatmap generation */
+	u32 tilesy;                         /**< [in] number of tiles in y direction (needed for reading the heatmap memory) */
+	u32 heatmap_mem;                    /**< [in] memory address to store counter values per tile (aka heatmap) */
+	u32 num_memory_cookies;             /**< [in] number of memory cookies attached to job */
+	u64 memory_cookies;               /**< [in] pointer to array of u32 memory cookies attached to job */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;           /**< [in,out] pointer to location of u32 where point on pp timeline for this job will be written */
+} _mali_uk_pp_start_job_s;
+
+typedef struct {
+	u64 ctx;       /**< [in,out] user-kernel context (trashed on output) */
+	u64 gp_args;   /**< [in,out] GP uk arguments (see _mali_uk_gp_start_job_s) */
+	u64 pp_args;   /**< [in,out] PP uk arguments (see _mali_uk_pp_start_job_s) */
+} _mali_uk_pp_and_gp_start_job_s;
+
+/** @} */ /* end group _mali_uk_ppstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;                          /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;                /**< [out] status of finished job */
+	u32 perf_counter0[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 0 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter1[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 1 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter_src0;
+	u32 perf_counter_src1;
+} _mali_uk_pp_job_finished_s;
+
+typedef struct {
+	u32 number_of_enabled_cores;               /**< [out] the new number of enabled cores */
+} _mali_uk_pp_num_cores_changed_s;
+
+
+
+/**
+ * Flags to indicate write-back units
+ */
+typedef enum {
+	_MALI_UK_PP_JOB_WB0 = 1,
+	_MALI_UK_PP_JOB_WB1 = 2,
+	_MALI_UK_PP_JOB_WB2 = 4,
+} _mali_uk_pp_job_wbx_flag;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 fb_id;                      /**< [in] Frame builder ID of job to disable WB units for */
+	u32 wb0_memory;
+	u32 wb1_memory;
+	u32 wb2_memory;
+} _mali_uk_pp_disable_wb_s;
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+/** @defgroup _mali_uk_soft_job U/K Soft Job
+ * @{ */
+
+typedef struct {
+	u64 ctx;                            /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job;                       /**< [in] identifier for the job in user space */
+	u64 job_id_ptr;                     /**< [in,out] pointer to location of u32 where job id will be written */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u32 point;                          /**< [out] point on soft timeline for this job */
+	u32 type;                           /**< [in] type of soft job */
+} _mali_uk_soft_job_start_s;
+
+typedef struct {
+	u64 user_job;                       /**< [out] identifier for the job in user space */
+} _mali_uk_soft_job_activated_s;
+
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 job_id;                         /**< [in] id for soft job */
+} _mali_uk_soft_job_signal_s;
+
+/** @} */ /* end group _mali_uk_soft_job */
+
+typedef struct {
+	u32 counter_id;
+	u32 key;
+	int enable;
+} _mali_uk_annotate_profiling_mem_counter_s;
+
+typedef struct {
+	u32 sampling_rate;
+	int enable;
+} _mali_uk_annotate_profiling_enable_s;
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ * @{ */
+
+/** @defgroup _mali_uk_waitfornotification_s Wait For Notification
+ * @{ */
+
+/** @brief Notification type encodings
+ *
+ * Each Notification type is an ordered pair of (subsystem,id), and is unique.
+ *
+ * The encoding of subsystem,id into a 32-bit word is:
+ * encoding = (( subsystem << _MALI_NOTIFICATION_SUBSYSTEM_SHIFT ) & _MALI_NOTIFICATION_SUBSYSTEM_MASK)
+ *            | (( id <<  _MALI_NOTIFICATION_ID_SHIFT ) & _MALI_NOTIFICATION_ID_MASK)
+ *
+ * @see _mali_uk_wait_for_notification_s
+ */
+typedef enum {
+	/** core notifications */
+
+	_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x20,
+	_MALI_NOTIFICATION_APPLICATION_QUIT = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x40,
+	_MALI_NOTIFICATION_SETTINGS_CHANGED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x80,
+	_MALI_NOTIFICATION_SOFT_ACTIVATED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x100,
+
+	/** Fragment Processor notifications */
+
+	_MALI_NOTIFICATION_PP_FINISHED = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x20,
+
+	/** Vertex Processor notifications */
+
+	_MALI_NOTIFICATION_GP_FINISHED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_GP_STALLED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x20,
+
+	/** Profiling notifications */
+	_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x20,
+} _mali_uk_notification_type;
+
+/** to assist in splitting up 32-bit notification value in subsystem and id value */
+#define _MALI_NOTIFICATION_SUBSYSTEM_MASK 0xFFFF0000
+#define _MALI_NOTIFICATION_SUBSYSTEM_SHIFT 16
+#define _MALI_NOTIFICATION_ID_MASK 0x0000FFFF
+#define _MALI_NOTIFICATION_ID_SHIFT 0
+
+
+/** @brief Enumeration of possible settings which match mali_setting_t in user space
+ *
+ *
+ */
+typedef enum {
+	_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE = 0,
+	_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_DEPTHBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_STENCILBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_PER_TILE_COUNTERS_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_COMPOSITOR,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_WINDOW,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_OTHER,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR,
+	_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED,
+	_MALI_UK_USER_SETTING_MAX,
+} _mali_uk_user_setting_t;
+
+/* See mali_user_settings_db.c */
+extern const char *_mali_uk_user_setting_descriptions[];
+#define _MALI_UK_USER_SETTING_DESCRIPTIONS \
+	{                                           \
+		"sw_events_enable",                 \
+		"colorbuffer_capture_enable",       \
+		"depthbuffer_capture_enable",       \
+		"stencilbuffer_capture_enable",     \
+		"per_tile_counters_enable",         \
+		"buffer_capture_compositor",        \
+		"buffer_capture_window",            \
+		"buffer_capture_other",             \
+		"buffer_capture_n_frames",          \
+		"buffer_capture_resize_factor",     \
+		"sw_counters_enable",               \
+	};
+
+/** @brief struct to hold the value to a particular setting as seen in the kernel space
+ */
+typedef struct {
+	_mali_uk_user_setting_t setting;
+	u32 value;
+} _mali_uk_settings_changed_s;
+
+/** @brief Arguments for _mali_ukk_wait_for_notification()
+ *
+ * On successful return from _mali_ukk_wait_for_notification(), the members of
+ * this structure will indicate the reason for notification.
+ *
+ * Specifically, the source of the notification can be identified by the
+ * subsystem and id fields of the mali_uk_notification_type in the code.type
+ * member. The type member is encoded in a way to divide up the types into a
+ * subsystem field, and a per-subsystem ID field. See
+ * _mali_uk_notification_type for more information.
+ *
+ * Interpreting the data union member depends on the notification type:
+ *
+ * - type == _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS
+ *     - The kernel side is shutting down. No further
+ * _mali_uk_wait_for_notification() calls should be made.
+ *     - In this case, the value of the data union member is undefined.
+ *     - This is used to indicate to the user space client that it should close
+ * the connection to the Mali Device Driver.
+ * - type == _MALI_NOTIFICATION_PP_FINISHED
+ *    - The notification data is of type _mali_uk_pp_job_finished_s. It contains the user_job_ptr
+ * identifier used to start the job with, the job status, the number of milliseconds the job took to render,
+ * and values of core registers when the job finished (irq status, performance counters, renderer list
+ * address).
+ *    - A job has finished succesfully when its status member is _MALI_UK_JOB_STATUS_FINISHED.
+ *    - If the hardware detected a timeout while rendering the job, or software detected the job is
+ * taking more than watchdog_msecs (see _mali_ukk_pp_start_job()) to complete, the status member will
+ * indicate _MALI_UK_JOB_STATUS_HANG.
+ *    - If the hardware detected a bus error while accessing memory associated with the job, status will
+ * indicate _MALI_UK_JOB_STATUS_SEG_FAULT.
+ *    - Status will indicate MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to stop the job but the job
+ * didn't start the hardware yet, e.g. when the driver closes.
+ * - type == _MALI_NOTIFICATION_GP_FINISHED
+ *     - The notification data is of type _mali_uk_gp_job_finished_s. The notification is similar to that of
+ * type == _MALI_NOTIFICATION_PP_FINISHED, except that several other GP core register values are returned.
+ * The status values have the same meaning for type == _MALI_NOTIFICATION_PP_FINISHED.
+ * - type == _MALI_NOTIFICATION_GP_STALLED
+ *     - The nofication data is of type _mali_uk_gp_job_suspended_s. It contains the user_job_ptr
+ * identifier used to start the job with, the reason why the job stalled and a cookie to identify the core on
+ * which the job stalled.
+ *     - The reason member of gp_job_suspended is set to _MALIGP_JOB_SUSPENDED_OUT_OF_MEMORY
+ * when the polygon list builder unit has run out of memory.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [out] Type of notification available */
+	union {
+		_mali_uk_gp_job_suspended_s gp_job_suspended;/**< [out] Notification data for _MALI_NOTIFICATION_GP_STALLED notification type */
+		_mali_uk_gp_job_finished_s  gp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_GP_FINISHED notification type */
+		_mali_uk_pp_job_finished_s  pp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_PP_FINISHED notification type */
+		_mali_uk_settings_changed_s setting_changed;/**< [out] Notification data for _MALI_NOTIFICAATION_SETTINGS_CHANGED notification type */
+		_mali_uk_soft_job_activated_s soft_job_activated; /**< [out] Notification data for _MALI_NOTIFICATION_SOFT_ACTIVATED notification type */
+		_mali_uk_annotate_profiling_mem_counter_s profiling_mem_counter;
+		_mali_uk_annotate_profiling_enable_s profiling_enable;
+	} data;
+} _mali_uk_wait_for_notification_s;
+
+/** @brief Arguments for _mali_ukk_post_notification()
+ *
+ * Posts the specified notification to the notification queue for this application.
+ * This is used to send a quit message to the callback thread.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [in] Type of notification to post */
+} _mali_uk_post_notification_s;
+
+/** @} */ /* end group _mali_uk_waitfornotification_s */
+
+/** @defgroup _mali_uk_getapiversion_s Get API Version
+ * @{ */
+
+/** helpers for Device Driver API version handling */
+
+/** @brief Encode a version ID from a 16-bit input
+ *
+ * @note the input is assumed to be 16 bits. It must not exceed 16 bits. */
+#define _MAKE_VERSION_ID(x) (((x) << 16UL) | (x))
+
+/** @brief Check whether a 32-bit value is likely to be Device Driver API
+ * version ID. */
+#define _IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF))
+
+/** @brief Decode a 16-bit version number from a 32-bit Device Driver API version
+ * ID */
+#define _GET_VERSION(x) (((x) >> 16UL) & 0xFFFF)
+
+/** @brief Determine whether two 32-bit encoded version IDs match */
+#define _IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y))))
+
+/**
+ * API version define.
+ * Indicates the version of the kernel API
+ * The version is a 16bit integer incremented on each API change.
+ * The 16bit integer is stored twice in a 32bit integer
+ * For example, for version 1 the value would be 0x00010001
+ */
+#define _MALI_API_VERSION 900
+#define _MALI_UK_API_VERSION _MAKE_VERSION_ID(_MALI_API_VERSION)
+
+/**
+ * The API version is a 16-bit integer stored in both the lower and upper 16-bits
+ * of a 32-bit value. The 16-bit API version value is incremented on each API
+ * change. Version 1 would be 0x00010001. Used in _mali_uk_get_api_version_s.
+ */
+typedef u32 _mali_uk_api_version;
+
+/** @brief Arguments for _mali_uk_get_api_version()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u32 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_s;
+
+/** @brief Arguments for _mali_uk_get_api_version_v2()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u64 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_v2_s;
+
+/** @} */ /* end group _mali_uk_getapiversion_s */
+
+/** @defgroup _mali_uk_get_user_settings_s Get user space settings */
+
+/** @brief struct to keep the matching values of the user space settings within certain context
+ *
+ * Each member of the settings array corresponds to a matching setting in the user space and its value is the value
+ * of that particular setting.
+ *
+ * All settings are given reference to the context pointed to by the ctx pointer.
+ *
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	u32 settings[_MALI_UK_USER_SETTING_MAX]; /**< [out] The values for all settings */
+} _mali_uk_get_user_settings_s;
+
+/** @brief struct to hold the value of a particular setting from the user space within a given context
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_user_setting_t setting; /**< [in] setting to get */
+	u32 value;                       /**< [out] value of setting */
+} _mali_uk_get_user_setting_s;
+
+/** @brief Arguments for _mali_ukk_request_high_priority() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_request_high_priority_s;
+
+/** @brief Arguments for _mali_ukk_pending_submit() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_pending_submit_s;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_memory U/K Memory
+ * @{ */
+
+#define _MALI_MEMORY_ALLOCATE_RESIZEABLE  (1<<4) /* BUFFER can trim dow/grow*/
+#define _MALI_MEMORY_ALLOCATE_NO_BIND_GPU (1<<5) /*Not map to GPU when allocate, must call bind later*/
+#define _MALI_MEMORY_ALLOCATE_SWAPPABLE   (1<<6) /* Allocate swappale memory. */
+#define _MALI_MEMORY_ALLOCATE_DEFER_BIND (1<<7) /*Not map to GPU when allocate, must call bind later*/
+#define _MALI_MEMORY_ALLOCATE_SECURE (1<<8) /* Allocate secure memory. */
+
+
+typedef struct {
+	u64 ctx;                                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                                    /**< [in] GPU virtual address */
+	u32 vsize;                                        /**< [in] vitrual size of the allocation */
+	u32 psize;                                        /**< [in] physical size of the allocation */
+	u32 flags;
+	u64 backend_handle;                               /**< [out] backend handle */
+	s32 secure_shared_fd;                           /** < [in] the mem handle for secure mem */
+} _mali_uk_alloc_mem_s;
+
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                /**< [in] use as handle to free allocation */
+	u32 free_pages_nr;      /** < [out] record the number of free pages */
+} _mali_uk_free_mem_s;
+
+
+#define _MALI_MEMORY_BIND_BACKEND_UMP             (1<<8)
+#define _MALI_MEMORY_BIND_BACKEND_DMA_BUF         (1<<9)
+#define _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY     (1<<10)
+#define _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY (1<<11)
+#define _MALI_MEMORY_BIND_BACKEND_EXT_COW         (1<<12)
+#define _MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION (1<<13)
+
+
+#define _MALI_MEMORY_BIND_BACKEND_MASK (_MALI_MEMORY_BIND_BACKEND_UMP| \
+					_MALI_MEMORY_BIND_BACKEND_DMA_BUF |\
+					_MALI_MEMORY_BIND_BACKEND_MALI_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXT_COW |\
+					_MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION)
+
+
+#define _MALI_MEMORY_GPU_READ_ALLOCATE            (1<<16)
+
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 vaddr;                                      /**< [in] mali address to map the physical memory to */
+	u32 size;                                       /**< [in] size */
+	u32 flags;                                      /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 padding;                                    /** padding for 32/64 struct alignment */
+	union {
+		struct {
+			u32 secure_id;                  /**< [in] secure id */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ump;
+		struct {
+			u32 mem_fd;                     /**< [in] Memory descriptor */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_dma_buf;
+		struct {
+			u32 phys_addr;                  /**< [in] physical address */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ext_memory;
+	} mem_union;
+} _mali_uk_bind_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 flags;                                      /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 vaddr;                                      /**<  [in] identifier for mapped memory object in kernel space  */
+} _mali_uk_unbind_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 target_handle;                              /**< [in] handle of allocation need to do COW */
+	u32 target_offset;                              /**< [in] offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, PAGE_SIZE align)*/
+	u32 target_size;                                /**< [in] size of target allocation to do COW (for support memory bank, PAGE_SIZE align)(in byte) */
+	u32 range_start;                                /**< [in] re allocate range start offset, offset from the start of allocation (PAGE_SIZE align)*/
+	u32 range_size;                                 /**< [in] re allocate size (PAGE_SIZE align)*/
+	u32 vaddr;                                      /**< [in] mali address for the new allocaiton */
+	u32 backend_handle;                             /**< [out] backend handle */
+	u32 flags;
+} _mali_uk_cow_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 range_start;                                /**< [in] re allocate range start offset, offset from the start of allocation */
+	u32 size;                                       /**< [in] re allocate size*/
+	u32 vaddr;                                      /**< [in] mali address for the new allocaiton */
+	s32 change_pages_nr;                            /**< [out] record the page number change for cow operation */
+} _mali_uk_cow_modify_range_s;
+
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 mem_fd;                     /**< [in] Memory descriptor */
+	u32 size;                       /**< [out] size */
+} _mali_uk_dma_buf_get_size_s;
+
+/** Flag for _mali_uk_map_external_mem_s, _mali_uk_attach_ump_mem_s and _mali_uk_attach_dma_buf_s */
+#define _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE (1<<0)
+
+
+typedef struct {
+	u64 ctx;                                /**< [in,out] user-kernel context (trashed on output) */
+	u64 vaddr;                              /* the buffer to do resize*/
+	u32 psize;                              /* wanted physical size of this memory */
+} _mali_uk_mem_resize_s;
+
+/**
+ * @brief Arguments for _mali_uk[uk]_mem_write_safe()
+ */
+typedef struct {
+	u64 ctx;  /**< [in,out] user-kernel context (trashed on output) */
+	u64 src;  /**< [in] Pointer to source data */
+	u64 dest; /**< [in] Destination Mali buffer */
+	u32 size;   /**< [in,out] Number of bytes to write/copy on input, number of bytes actually written/copied on output */
+} _mali_uk_mem_write_safe_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [out] size of MMU page table information (registers + page tables) */
+} _mali_uk_query_mmu_page_table_dump_size_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [in] size of buffer to receive mmu page table information */
+	u64 buffer;                   /**< [in,out] buffer to receive mmu page table information */
+	u32 register_writes_size;       /**< [out] size of MMU register dump */
+	u64 register_writes;           /**< [out] pointer within buffer where MMU register dump is stored */
+	u32 page_table_dump_size;       /**< [out] size of MMU page table dump */
+	u64 page_table_dump;           /**< [out] pointer within buffer where MMU page table dump is stored */
+} _mali_uk_dump_mmu_page_table_s;
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_pp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_number_of_cores(), @c number_of_cores
+ * will contain the number of Fragment Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_total_cores;      /**< [out] Total number of Fragment Processor cores in the system */
+	u32 number_of_enabled_cores;    /**< [out] Number of enabled Fragment Processor cores */
+} _mali_uk_get_pp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_pp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_core_version(), @c version contains
+ * the version that all Fragment Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version  */
+	u32 padding;
+} _mali_uk_get_pp_core_version_s;
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_gp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_number_of_cores(), @c number_of_cores
+ * will contain the number of Vertex Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_cores;            /**< [out] number of Vertex Processor cores in the system */
+} _mali_uk_get_gp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_gp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_core_version(), @c version contains
+ * the version that all Vertex Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version */
+} _mali_uk_get_gp_core_version_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 event_id;                   /**< [in] event id to register (see  enum mali_profiling_events for values) */
+	u32 data[5];                    /**< [in] event specific data */
+} _mali_uk_profiling_add_event_s;
+
+typedef struct {
+	u64 ctx;                     /**< [in,out] user-kernel context (trashed on output) */
+	u32 memory_usage;              /**< [out] total memory usage */
+	u32 vaddr;                                      /**< [in] mali address for the cow allocaiton */
+	s32 change_pages_nr;            /**< [out] record the page number change for cow operation */
+} _mali_uk_profiling_memory_usage_get_s;
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ * @{ */
+
+/** @brief Arguments to _mali_ukk_mem_mmap()
+ *
+ * Use of the phys_addr member depends on whether the driver is compiled for
+ * Mali-MMU or nonMMU:
+ * - in the nonMMU case, this is the physical address of the memory as seen by
+ * the CPU (which may be a constant offset from that used by Mali)
+ * - in the MMU case, this is the Mali Virtual base address of the memory to
+ * allocate, and the particular physical pages used to back the memory are
+ * entirely determined by _mali_ukk_mem_mmap(). The details of the physical pages
+ * are not reported to user-space for security reasons.
+ *
+ * The cookie member must be stored for use later when freeing the memory by
+ * calling _mali_ukk_mem_munmap(). In the Mali-MMU case, the cookie is secure.
+ *
+ * The ukk_private word must be set to zero when calling from user-space. On
+ * Kernel-side, the  OS implementation of the U/K interface can use it to
+ * communicate data to the OS implementation of the OSK layer. In particular,
+ * _mali_ukk_get_big_block() directly calls _mali_ukk_mem_mmap directly, and
+ * will communicate its own ukk_private word through the ukk_private member
+ * here. The common code itself will not inspect or modify the ukk_private
+ * word, and so it may be safely used for whatever purposes necessary to
+ * integrate Mali Memory handling into the OS.
+ *
+ * The uku_private member is currently reserved for use by the user-side
+ * implementation of the U/K interface. Its value must be zero.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [out] Returns user-space virtual address for the mapping */
+	u32 size;                       /**< [in] Size of the requested mapping */
+	u32 phys_addr;                  /**< [in] Physical address - could be offset, depending on caller+callee convention */
+	mali_bool writeable;
+} _mali_uk_mem_mmap_s;
+
+/** @brief Arguments to _mali_ukk_mem_munmap()
+ *
+ * The cookie and mapping members must be that returned from the same previous
+ * call to _mali_ukk_mem_mmap(). The size member must correspond to cookie
+ * and mapping - that is, it must be the value originally supplied to a call to
+ * _mali_ukk_mem_mmap that returned the values of mapping and cookie.
+ *
+ * An error will be returned if an attempt is made to unmap only part of the
+ * originally obtained range, or to unmap more than was originally obtained.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [in] The mapping returned from mmap call */
+	u32 size;                       /**< [in] The size passed to mmap call */
+} _mali_uk_mem_munmap_s;
+/** @} */ /* end group _mali_uk_memory */
+
+/** @defgroup _mali_uk_vsync U/K VSYNC Wait Reporting Module
+ * @{ */
+
+/** @brief VSYNC events
+ *
+ * These events are reported when DDK starts to wait for vsync and when the
+ * vsync has occured and the DDK can continue on the next frame.
+ */
+typedef enum _mali_uk_vsync_event {
+	_MALI_UK_VSYNC_EVENT_BEGIN_WAIT = 0,
+	_MALI_UK_VSYNC_EVENT_END_WAIT
+} _mali_uk_vsync_event;
+
+/** @brief Arguments to _mali_ukk_vsync_event()
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_vsync_event event;     /**< [in] VSYNCH event type */
+} _mali_uk_vsync_event_report_s;
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @defgroup _mali_uk_sw_counters_report U/K Software Counter Reporting
+ * @{ */
+
+/** @brief Software counter values
+ *
+ * Values recorded for each of the software counters during a single renderpass.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 counters;                  /**< [in] The array of u32 counter values */
+	u32 num_counters;              /**< [in] The number of elements in counters array */
+} _mali_uk_sw_counters_report_s;
+
+/** @} */ /* end group _mali_uk_sw_counters_report */
+
+/** @defgroup _mali_uk_timeline U/K Mali Timeline
+ * @{ */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 timeline;                   /**< [in] timeline id */
+	u32 point;                      /**< [out] latest point on timeline */
+} _mali_uk_timeline_get_latest_point_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] fence */
+	u32 timeout;                    /**< [in] timeout (0 for no wait, -1 for blocking) */
+	u32 status;                     /**< [out] status of fence (1 if signaled, 0 if timeout) */
+} _mali_uk_timeline_wait_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] mali fence to create linux sync fence from */
+	s32 sync_fd;                    /**< [out] file descriptor for new linux sync fence */
+} _mali_uk_timeline_create_sync_fence_s;
+
+/** @} */ /* end group _mali_uk_timeline */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+typedef struct {
+	u64 ctx;                 /**< [in,out] user-kernel context (trashed on output) */
+	s32 stream_fd;   /**< [in] The profiling kernel base stream fd handle */
+} _mali_uk_profiling_stream_fd_get_s;
+
+typedef struct {
+	u64 ctx;        /**< [in,out] user-kernel context (trashed on output) */
+	u64 control_packet_data; /**< [in] the control packet data for control settings */
+	u32 control_packet_size;  /**< [in] The control packet size */
+	u64 response_packet_data; /** < [out] The response packet data */
+	u32 response_packet_size; /** < [in,out] The response packet data */
+} _mali_uk_profiling_control_set_s;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_UK_TYPES_H__ */
diff --git a/utgard/r6p2/linux/license/gpl/mali_kernel_license.h b/utgard/r6p2/linux/license/gpl/mali_kernel_license.h
new file mode 100755
index 0000000..264555d
--- /dev/null
+++ b/utgard/r6p2/linux/license/gpl/mali_kernel_license.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_license.h
+ * Defines for the macro MODULE_LICENSE.
+ */
+
+#ifndef __MALI_KERNEL_LICENSE_H__
+#define __MALI_KERNEL_LICENSE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_KERNEL_LINUX_LICENSE     "GPL"
+#define MALI_LICENSE_IS_GPL 1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LICENSE_H__ */
diff --git a/utgard/r6p2/linux/mali_devfreq.c b/utgard/r6p2/linux/mali_devfreq.c
new file mode 100644
index 0000000..0b0ba14
--- /dev/null
+++ b/utgard/r6p2/linux/mali_devfreq.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/driver.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+#include "mali_pm_metrics.h"
+
+static int
+mali_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		MALI_PRINT_ERROR(("Failed to get opp (%ld)\n", PTR_ERR(opp)));
+		return PTR_ERR(opp);
+	}
+
+	MALI_DEBUG_PRINT(2, ("mali_devfreq_target:set_freq = %lld flags = 0x%x\n", freq, flags));
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (mdev->current_freq == freq) {
+		*target_freq = freq;
+		mali_pm_reset_dvfs_utilisation(mdev);
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (mdev->regulator && mdev->current_voltage != voltage
+	    && mdev->current_freq < freq) {
+		err = regulator_set_voltage(mdev->regulator, voltage, voltage);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to increase voltage (%d)\n", err));
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(mdev->clock, freq);
+	if (err) {
+		MALI_PRINT_ERROR(("Failed to set clock %lu (target %lu)\n", freq, *target_freq));
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (mdev->regulator && mdev->current_voltage != voltage
+	    && mdev->current_freq > freq) {
+		err = regulator_set_voltage(mdev->regulator, voltage, voltage);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to decrease voltage (%d)\n", err));
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	mdev->current_voltage = voltage;
+	mdev->current_freq = freq;
+
+	mali_pm_reset_dvfs_utilisation(mdev);
+
+	return err;
+}
+
+static int
+mali_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	*freq = mdev->current_freq;
+
+	MALI_DEBUG_PRINT(2, ("mali_devfreq_cur_freq: freq = %d \n", *freq));
+	return 0;
+}
+
+static int
+mali_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = mdev->current_freq;
+
+	mali_pm_get_dvfs_utilisation(mdev,
+				     &stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	memcpy(&mdev->devfreq->last_status, stat, sizeof(*stat));
+#endif
+
+	return 0;
+}
+
+/* setup platform specific opp in platform.c*/
+int __weak setup_opps(void)
+{
+	return 0;
+}
+
+/* term platform specific opp in platform.c*/
+int __weak term_opps(struct device *dev)
+{
+	return 0;
+}
+
+static int mali_devfreq_init_freq_table(struct mali_device *mdev,
+					struct devfreq_dev_profile *dp)
+{
+	int err, count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	err = setup_opps();
+	if (err)
+		return err;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(mdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	MALI_DEBUG_PRINT(2, ("mali devfreq table count %d\n", count));
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				       GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(mdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+		MALI_DEBUG_PRINT(2, ("mali devfreq table array[%d] = %d\n", i, freq));
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		MALI_PRINT_ERROR(("Unable to enumerate all OPPs (%d!=%d)\n",
+				  count, i));
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void mali_devfreq_term_freq_table(struct mali_device *mdev)
+{
+	struct devfreq_dev_profile *dp = mdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+	term_opps(mdev->dev);
+}
+
+static void mali_devfreq_exit(struct device *dev)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	mali_devfreq_term_freq_table(mdev);
+}
+
+int mali_devfreq_init(struct mali_device *mdev)
+{
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct devfreq_cooling_power *callbacks = NULL;
+	_mali_osk_device_data data;
+#endif
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	MALI_DEBUG_PRINT(2, ("Init Mali devfreq\n"));
+
+	if (!mdev->clock)
+		return -ENODEV;
+
+	mdev->current_freq = clk_get_rate(mdev->clock);
+
+	dp = &mdev->devfreq_profile;
+
+	dp->initial_freq = mdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = mali_devfreq_target;
+	dp->get_dev_status = mali_devfreq_status;
+	dp->get_cur_freq = mali_devfreq_cur_freq;
+	dp->exit = mali_devfreq_exit;
+
+	if (mali_devfreq_init_freq_table(mdev, dp))
+		return -EFAULT;
+
+	mdev->devfreq = devfreq_add_device(mdev->dev, dp,
+					   "simple_ondemand", NULL);
+	if (IS_ERR(mdev->devfreq)) {
+		mali_devfreq_term_freq_table(mdev);
+		return PTR_ERR(mdev->devfreq);
+	}
+
+	err = devfreq_register_opp_notifier(mdev->dev, mdev->devfreq);
+	if (err) {
+		MALI_PRINT_ERROR(("Failed to register OPP notifier (%d)\n", err));
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	/* Initilization last_status it will be used when first power allocate called */
+	mdev->devfreq->last_status.current_frequency = mdev->current_freq;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if (NULL != data.gpu_cooling_ops) {
+			callbacks = data.gpu_cooling_ops;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Thermal: Callback handler installed \n"));
+		}
+	}
+
+	if (callbacks) {
+		mdev->devfreq_cooling = of_devfreq_cooling_register_power(
+						mdev->dev->of_node,
+						mdev->devfreq,
+						callbacks);
+		if (IS_ERR_OR_NULL(mdev->devfreq_cooling)) {
+			err = PTR_ERR(mdev->devfreq_cooling);
+			MALI_PRINT_ERROR(("Failed to register cooling device (%d)\n", err));
+			goto cooling_failed;
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali GPU Thermal Cooling installed \n"));
+		}
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	err = devfreq_remove_device(mdev->devfreq);
+	if (err)
+		MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err));
+	else
+		mdev->devfreq = NULL;
+
+	return err;
+}
+
+void mali_devfreq_term(struct mali_device *mdev)
+{
+	int err;
+
+	MALI_DEBUG_PRINT(2, ("Term Mali devfreq\n"));
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	devfreq_cooling_unregister(mdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq);
+
+	err = devfreq_remove_device(mdev->devfreq);
+	if (err)
+		MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err));
+	else
+		mdev->devfreq = NULL;
+}
diff --git a/utgard/r6p2/linux/mali_devfreq.h b/utgard/r6p2/linux/mali_devfreq.h
new file mode 100644
index 0000000..faf84f2
--- /dev/null
+++ b/utgard/r6p2/linux/mali_devfreq.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef _MALI_DEVFREQ_H_
+#define _MALI_DEVFREQ_H_
+
+int mali_devfreq_init(struct mali_device *mdev);
+
+void mali_devfreq_term(struct mali_device *mdev);
+
+#endif
diff --git a/utgard/r6p2/linux/mali_device_pause_resume.c b/utgard/r6p2/linux/mali_device_pause_resume.c
new file mode 100755
index 0000000..cb2f702
--- /dev/null
+++ b/utgard/r6p2/linux/mali_device_pause_resume.c
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_device_pause_resume.c
+ * Implementation of the Mali pause/resume functionality
+ */
+
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+
+void mali_dev_pause(void)
+{
+	/*
+	 * Deactive all groups to prevent hardware being touched
+	 * during the period of mali device pausing
+	 */
+	mali_pm_os_suspend(MALI_FALSE);
+}
+
+EXPORT_SYMBOL(mali_dev_pause);
+
+void mali_dev_resume(void)
+{
+	mali_pm_os_resume();
+}
+
+EXPORT_SYMBOL(mali_dev_resume);
diff --git a/utgard/r6p2/linux/mali_dma_fence.c b/utgard/r6p2/linux/mali_dma_fence.c
new file mode 100644
index 0000000..9f9a5b6
--- /dev/null
+++ b/utgard/r6p2/linux/mali_dma_fence.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/version.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include "mali_dma_fence.h"
+#include <linux/atomic.h>
+#include <linux/workqueue.h>
+#endif
+
+static DEFINE_SPINLOCK(mali_dma_fence_lock);
+
+static bool mali_dma_fence_enable_signaling(struct fence *fence)
+{
+	MALI_IGNORE(fence);
+	return true;
+}
+
+static const char *mali_dma_fence_get_driver_name(struct fence *fence)
+{
+	MALI_IGNORE(fence);
+	return "mali";
+}
+
+static const char *mali_dma_fence_get_timeline_name(struct fence *fence)
+{
+	MALI_IGNORE(fence);
+	return "mali_dma_fence";
+}
+
+static const struct fence_ops mali_dma_fence_ops = {
+	.get_driver_name = mali_dma_fence_get_driver_name,
+	.get_timeline_name = mali_dma_fence_get_timeline_name,
+	.enable_signaling = mali_dma_fence_enable_signaling,
+	.signaled = NULL,
+	.wait = fence_default_wait,
+	.release = NULL
+};
+
+static void mali_dma_fence_context_cleanup(struct mali_dma_fence_context *dma_fence_context)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	for (i = 0; i < dma_fence_context->num_dma_fence_waiter; i++) {
+		if (dma_fence_context->mali_dma_fence_waiters[i]) {
+			fence_remove_callback(dma_fence_context->mali_dma_fence_waiters[i]->fence,
+					      &dma_fence_context->mali_dma_fence_waiters[i]->base);
+			fence_put(dma_fence_context->mali_dma_fence_waiters[i]->fence);
+			kfree(dma_fence_context->mali_dma_fence_waiters[i]);
+			dma_fence_context->mali_dma_fence_waiters[i] = NULL;
+		}
+	}
+
+	if (NULL != dma_fence_context->mali_dma_fence_waiters)
+		kfree(dma_fence_context->mali_dma_fence_waiters);
+
+	dma_fence_context->mali_dma_fence_waiters = NULL;
+	dma_fence_context->num_dma_fence_waiter = 0;
+}
+
+static void mali_dma_fence_context_work_func(struct work_struct *work_handle)
+{
+	struct mali_dma_fence_context *dma_fence_context;
+
+	MALI_DEBUG_ASSERT_POINTER(work_handle);
+
+	dma_fence_context = container_of(work_handle, struct mali_dma_fence_context, work_handle);
+
+	mali_dma_fence_context_cleanup(dma_fence_context);
+
+	dma_fence_context->cb_func(dma_fence_context->pp_job_ptr);
+}
+
+static void mali_dma_fence_callback(struct fence *fence, struct fence_cb *cb)
+{
+	struct mali_dma_fence_waiter *dma_fence_waiter = NULL;
+	struct mali_dma_fence_context *dma_fence_context = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(cb);
+
+	MALI_IGNORE(fence);
+
+	dma_fence_waiter = container_of(cb, struct mali_dma_fence_waiter, base);
+	dma_fence_context = dma_fence_waiter->parent;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	if (atomic_dec_and_test(&dma_fence_context->count))
+		schedule_work(&dma_fence_context->work_handle);
+}
+
+static _mali_osk_errcode_t mali_dma_fence_add_callback(struct mali_dma_fence_context *dma_fence_context, struct fence *fence)
+{
+	int ret = 0;
+	struct mali_dma_fence_waiter *dma_fence_waiter;
+	struct mali_dma_fence_waiter **dma_fence_waiters;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	dma_fence_waiters = krealloc(dma_fence_context->mali_dma_fence_waiters,
+				     (dma_fence_context->num_dma_fence_waiter + 1)
+				     * sizeof(struct mali_dma_fence_waiter *),
+				     GFP_KERNEL);
+
+	if (NULL == dma_fence_waiters) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to realloc the dma fence waiters.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	dma_fence_context->mali_dma_fence_waiters = dma_fence_waiters;
+
+	dma_fence_waiter = kzalloc(sizeof(struct mali_dma_fence_waiter), GFP_KERNEL);
+
+	if (NULL == dma_fence_waiter) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to create mali dma fence waiter.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	fence_get(fence);
+
+	dma_fence_waiter->fence = fence;
+	dma_fence_waiter->parent = dma_fence_context;
+	atomic_inc(&dma_fence_context->count);
+
+	ret = fence_add_callback(fence, &dma_fence_waiter->base,
+				 mali_dma_fence_callback);
+	if (0 > ret) {
+		fence_put(fence);
+		kfree(dma_fence_waiter);
+		atomic_dec(&dma_fence_context->count);
+		if (-ENOENT == ret) {
+			/*-ENOENT if fence has already been signaled, return _MALI_OSK_ERR_OK*/
+			return _MALI_OSK_ERR_OK;
+		}
+		/* Failed to add the fence callback into fence, return _MALI_OSK_ERR_FAULT*/
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into fence.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	dma_fence_context->mali_dma_fence_waiters[dma_fence_context->num_dma_fence_waiter] = dma_fence_waiter;
+	dma_fence_context->num_dma_fence_waiter++;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+struct fence *mali_dma_fence_new(u32  context, u32 seqno)
+{
+	struct fence *fence = NULL;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+
+	if (NULL == fence) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to create dma fence.\n"));
+		return fence;
+	}
+
+	fence_init(fence,
+		   &mali_dma_fence_ops,
+		   &mali_dma_fence_lock,
+		   context, seqno);
+
+	return fence;
+}
+
+void mali_dma_fence_signal_and_put(struct fence **fence)
+{
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(*fence);
+
+	fence_signal(*fence);
+	fence_put(*fence);
+	*fence = NULL;
+}
+
+void mali_dma_fence_context_init(struct mali_dma_fence_context *dma_fence_context,
+				 mali_dma_fence_context_callback_func_t  cb_func,
+				 void *pp_job_ptr)
+{
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	INIT_WORK(&dma_fence_context->work_handle, mali_dma_fence_context_work_func);
+	atomic_set(&dma_fence_context->count, 1);
+	dma_fence_context->num_dma_fence_waiter = 0;
+	dma_fence_context->mali_dma_fence_waiters = NULL;
+	dma_fence_context->cb_func = cb_func;
+	dma_fence_context->pp_job_ptr = pp_job_ptr;
+}
+
+_mali_osk_errcode_t mali_dma_fence_context_add_waiters(struct mali_dma_fence_context *dma_fence_context,
+		struct reservation_object *dma_reservation_object)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+	struct fence *exclusive_fence = NULL;
+	u32 shared_count = 0, i;
+	struct fence **shared_fences = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object);
+
+	/* Get all the shared/exclusive fences in the reservation object of dma buf*/
+	ret = reservation_object_get_fences_rcu(dma_reservation_object, &exclusive_fence,
+						&shared_count, &shared_fences);
+	if (ret < 0) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to get  shared or exclusive_fence dma fences from  the reservation object of dma buf.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (exclusive_fence) {
+		ret = mali_dma_fence_add_callback(dma_fence_context, exclusive_fence);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into exclusive fence.\n"));
+			mali_dma_fence_context_cleanup(dma_fence_context);
+			goto ended;
+		}
+	}
+
+
+	for (i = 0; i < shared_count; i++) {
+		ret = mali_dma_fence_add_callback(dma_fence_context, shared_fences[i]);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into shared fence [%d].\n", i));
+			mali_dma_fence_context_cleanup(dma_fence_context);
+			break;
+		}
+	}
+
+ended:
+
+	if (exclusive_fence)
+		fence_put(exclusive_fence);
+
+	if (shared_fences) {
+		for (i = 0; i < shared_count; i++) {
+			fence_put(shared_fences[i]);
+		}
+		kfree(shared_fences);
+	}
+
+	return ret;
+}
+
+
+void mali_dma_fence_context_term(struct mali_dma_fence_context *dma_fence_context)
+{
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+	atomic_set(&dma_fence_context->count, 0);
+	if (dma_fence_context->work_handle.func) {
+		cancel_work_sync(&dma_fence_context->work_handle);
+	}
+	mali_dma_fence_context_cleanup(dma_fence_context);
+}
+
+void mali_dma_fence_context_dec_count(struct mali_dma_fence_context *dma_fence_context)
+{
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	if (atomic_dec_and_test(&dma_fence_context->count))
+		schedule_work(&dma_fence_context->work_handle);
+}
+
+
+void mali_dma_fence_add_reservation_object_list(struct reservation_object *dma_reservation_object,
+		struct reservation_object **dma_reservation_object_list,
+		u32 *num_dma_reservation_object)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object);
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object_list);
+	MALI_DEBUG_ASSERT_POINTER(num_dma_reservation_object);
+
+	for (i = 0; i < *num_dma_reservation_object; i++) {
+		if (dma_reservation_object_list[i] == dma_reservation_object)
+			return;
+	}
+
+	dma_reservation_object_list[*num_dma_reservation_object] = dma_reservation_object;
+	(*num_dma_reservation_object)++;
+}
+
+int mali_dma_fence_lock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx)
+{
+	u32 i;
+
+	struct reservation_object *reservation_object_to_slow_lock = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object_list);
+	MALI_DEBUG_ASSERT_POINTER(ww_actx);
+
+	ww_acquire_init(ww_actx, &reservation_ww_class);
+
+again:
+	for (i = 0; i < num_dma_reservation_object; i++) {
+		int ret;
+
+		if (dma_reservation_object_list[i] == reservation_object_to_slow_lock) {
+			reservation_object_to_slow_lock = NULL;
+			continue;
+		}
+
+		ret = ww_mutex_lock(&dma_reservation_object_list[i]->lock, ww_actx);
+
+		if (ret < 0) {
+			u32  slow_lock_index = i;
+
+			/* unlock all pre locks we have already locked.*/
+			while (i > 0) {
+				i--;
+				ww_mutex_unlock(&dma_reservation_object_list[i]->lock);
+			}
+
+			if (NULL != reservation_object_to_slow_lock)
+				ww_mutex_unlock(&reservation_object_to_slow_lock->lock);
+
+			if (ret == -EDEADLK) {
+				reservation_object_to_slow_lock = dma_reservation_object_list[slow_lock_index];
+				ww_mutex_lock_slow(&reservation_object_to_slow_lock->lock, ww_actx);
+				goto again;
+			}
+			ww_acquire_fini(ww_actx);
+			MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to lock all dma reservation objects.\n", i));
+			return ret;
+		}
+	}
+
+	ww_acquire_done(ww_actx);
+	return 0;
+}
+
+void mali_dma_fence_unlock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx)
+{
+	u32 i;
+
+	for (i = 0; i < num_dma_reservation_object; i++)
+		ww_mutex_unlock(&dma_reservation_object_list[i]->lock);
+
+	ww_acquire_fini(ww_actx);
+}
diff --git a/utgard/r6p2/linux/mali_dma_fence.h b/utgard/r6p2/linux/mali_dma_fence.h
new file mode 100644
index 0000000..35534e2
--- /dev/null
+++ b/utgard/r6p2/linux/mali_dma_fence.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_dma_fence.h
+ *
+ * Mali interface for Linux dma buf fence objects.
+ */
+
+#ifndef _MALI_DMA_FENCE_H_
+#define _MALI_DMA_FENCE_H_
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include <linux/fence.h>
+#include <linux/reservation.h>
+#endif
+
+struct mali_dma_fence_context;
+
+/* The mali dma fence context callback function */
+typedef void (*mali_dma_fence_context_callback_func_t)(void *pp_job_ptr);
+
+struct mali_dma_fence_waiter {
+	struct fence_cb base;
+	struct mali_dma_fence_context *parent;
+	struct fence *fence;
+};
+
+struct mali_dma_fence_context {
+	struct work_struct work_handle;
+	struct mali_dma_fence_waiter **mali_dma_fence_waiters;
+	u32 num_dma_fence_waiter;
+	atomic_t count;
+	void *pp_job_ptr; /* the mali pp job pointer */;
+	mali_dma_fence_context_callback_func_t cb_func;
+};
+
+/* Create a dma fence
+ * @param context The execution context this fence is run on
+ * @param seqno A linearly increasing sequence number for this context
+ * @return the new dma fence if success, or NULL on failure.
+ */
+struct fence *mali_dma_fence_new(u32  context, u32 seqno);
+
+/* Signal and put dma fence
+ * @param fence The dma fence to signal and put
+ */
+void mali_dma_fence_signal_and_put(struct fence **fence);
+
+/**
+ * Initialize a mali dma fence context for pp job.
+ * @param dma_fence_context The mali dma fence context to initialize.
+ * @param cb_func The dma fence context callback function to call when all dma fence release.
+ * @param pp_job_ptr The pp_job to call function with.
+ */
+void mali_dma_fence_context_init(struct mali_dma_fence_context *dma_fence_context,
+				 mali_dma_fence_context_callback_func_t  cb_func,
+				 void *pp_job_ptr);
+
+/**
+ * Add new mali dma fence waiter into mali dma fence context
+ * @param dma_fence_context The mali dma fence context
+ * @param dma_reservation_object the reservation object to create new mali dma fence waiters
+ * @return _MALI_OSK_ERR_OK if success, or not.
+ */
+_mali_osk_errcode_t mali_dma_fence_context_add_waiters(struct mali_dma_fence_context *dma_fence_context,
+		struct reservation_object *dma_reservation_object);
+
+/**
+ * Release the dma fence context
+ * @param dma_fence_text The mali dma fence context.
+ */
+void mali_dma_fence_context_term(struct mali_dma_fence_context *dma_fence_context);
+
+/**
+ * Decrease the dma fence context atomic count
+ * @param dma_fence_text The mali dma fence context.
+ */
+void mali_dma_fence_context_dec_count(struct mali_dma_fence_context *dma_fence_context);
+
+/**
+ * Get all reservation object
+ * @param dma_reservation_object The reservation object to add into the reservation object list
+ * @param dma_reservation_object_list The reservation object list to store all reservation object
+ * @param num_dma_reservation_object The number of all reservation object
+ */
+void mali_dma_fence_add_reservation_object_list(struct reservation_object *dma_reservation_object,
+		struct reservation_object **dma_reservation_object_list,
+		u32 *num_dma_reservation_object);
+
+/**
+ * Wait/wound mutex lock to lock all reservation object.
+ */
+int mali_dma_fence_lock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32  num_dma_reservation_object, struct ww_acquire_ctx *ww_actx);
+
+/**
+ * Wait/wound mutex lock to unlock all reservation object.
+ */
+void mali_dma_fence_unlock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx);
+#endif
diff --git a/utgard/r6p2/linux/mali_kernel_linux.c b/utgard/r6p2/linux/mali_kernel_linux.c
new file mode 100755
index 0000000..70dab67
--- /dev/null
+++ b/utgard/r6p2/linux/mali_kernel_linux.c
@@ -0,0 +1,1149 @@
+/**
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_linux.c
+ * Implementation of the Linux device driver entrypoints
+ */
+#include <linux/module.h>   /* kernel module definitions */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/mm.h>       /* memory manager definitions */
+#include <linux/mali/mali_utgard_ioctl.h>
+#include <linux/version.h>
+#include <linux/device.h>
+#include "mali_kernel_license.h"
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/bug.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_ukk.h"
+#include "mali_ukk_wrappers.h"
+#include "mali_kernel_sysfs.h"
+#include "mali_pm.h"
+#include "mali_kernel_license.h"
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_swap_alloc.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+#include "mali_osk_profiling.h"
+#include "mali_dvfs_policy.h"
+
+static int is_first_resume = 1;
+/*Store the clk and vol for boot/insmod and mali_resume*/
+static struct mali_gpu_clk_item mali_gpu_clk[2];
+#endif
+
+/* Streamline support for the Mali driver */
+#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_MALI400_PROFILING)
+/* Ask Linux to create the tracepoints */
+#define CREATE_TRACE_POINTS
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_hw_counter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counters);
+#endif /* CONFIG_TRACEPOINTS */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include "mali_devfreq.h"
+#include "mali_osk_mali.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
+#include <linux/pm_opp.h>
+#else
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif /* Linux >= 3.13*/
+#define dev_pm_opp_of_add_table of_init_opp_table
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
+#define dev_pm_opp_of_remove_table of_free_opp_table
+#endif /* Linux >= 3.19 */
+#endif /* Linux >= 4.4.0 */
+#endif
+
+/* from the __malidrv_build_info.c file that is generated during build */
+extern const char *__malidrv_build_info(void);
+extern void mali_post_init(void);
+extern int mali_pdev_dts_init(struct platform_device* mali_gpu_device);
+extern int mpgpu_class_init(void);
+extern void mpgpu_class_exit(void);
+
+int mali_page_fault = 0;
+module_param(mali_page_fault, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_page_fault, "mali_page_fault");
+
+int pp_hardware_reset = 0;
+module_param(pp_hardware_reset, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(pp_hardware_reset, "mali_hardware_reset");
+/* Module parameter to control log level */
+int mali_debug_level = 2;
+module_param(mali_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_debug_level, "Higher number, more dmesg output");
+
+extern int mali_max_job_runtime;
+module_param(mali_max_job_runtime, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_job_runtime, "Maximum allowed job runtime in msecs.\nJobs will be killed after this no matter what");
+
+extern int mali_l2_max_reads;
+module_param(mali_l2_max_reads, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_l2_max_reads, "Maximum reads for Mali L2 cache");
+
+extern unsigned int mali_dedicated_mem_start;
+module_param(mali_dedicated_mem_start, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_dedicated_mem_start, "Physical start address of dedicated Mali GPU memory.");
+
+extern unsigned int mali_dedicated_mem_size;
+module_param(mali_dedicated_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_dedicated_mem_size, "Size of dedicated Mali GPU memory.");
+
+extern unsigned int mali_shared_mem_size;
+module_param(mali_shared_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_shared_mem_size, "Size of shared Mali GPU memory.");
+
+#if defined(CONFIG_MALI400_PROFILING)
+extern int mali_boot_profiling;
+module_param(mali_boot_profiling, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_boot_profiling, "Start profiling as a part of Mali driver initialization");
+#endif
+
+extern int mali_max_pp_cores_group_1;
+module_param(mali_max_pp_cores_group_1, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_pp_cores_group_1, "Limit the number of PP cores to use from first PP group.");
+
+extern int mali_max_pp_cores_group_2;
+module_param(mali_max_pp_cores_group_2, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_pp_cores_group_2, "Limit the number of PP cores to use from second PP group (Mali-450 only).");
+
+extern unsigned int mali_mem_swap_out_threshold_value;
+module_param(mali_mem_swap_out_threshold_value, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_mem_swap_out_threshold_value, "Threshold value used to limit how much swappable memory cached in Mali driver.");
+
+#if defined(CONFIG_MALI_DVFS)
+/** the max fps the same as display vsync default 60, can set by module insert parameter */
+extern int mali_max_system_fps;
+module_param(mali_max_system_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_system_fps, "Max system fps the same as display VSYNC.");
+
+/** a lower limit on their desired FPS default 58, can set by module insert parameter*/
+extern int mali_desired_fps;
+module_param(mali_desired_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_desired_fps, "A bit lower than max_system_fps which user desired fps");
+#endif
+
+#if MALI_ENABLE_CPU_CYCLES
+#include <linux/cpumask.h>
+#include <linux/timer.h>
+#include <asm/smp.h>
+static struct timer_list mali_init_cpu_clock_timers[8];
+static u32 mali_cpu_clock_last_value[8] = {0,};
+#endif
+
+/* Export symbols from common code: mali_user_settings.c */
+#include "mali_user_settings_db.h"
+EXPORT_SYMBOL(mali_set_user_setting);
+EXPORT_SYMBOL(mali_get_user_setting);
+
+static char mali_dev_name[] = "mali"; /* should be const, but the functions we call requires non-cost */
+
+/* This driver only supports one Mali device, and this variable stores this single platform device */
+struct platform_device *mali_platform_device = NULL;
+
+/* This driver only supports one Mali device, and this variable stores the exposed misc device (/dev/mali) */
+static struct miscdevice mali_miscdevice = { 0, };
+
+static int mali_miscdevice_register(struct platform_device *pdev);
+static void mali_miscdevice_unregister(void);
+
+static int mali_open(struct inode *inode, struct file *filp);
+static int mali_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static int mali_probe(struct platform_device *pdev);
+static int mali_remove(struct platform_device *pdev);
+
+static int mali_driver_suspend_scheduler(struct device *dev);
+static int mali_driver_resume_scheduler(struct device *dev);
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev);
+static int mali_driver_runtime_resume(struct device *dev);
+static int mali_driver_runtime_idle(struct device *dev);
+#endif
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#if defined(CONFIG_MALI_DT)
+extern int mali_platform_device_init(struct platform_device *device);
+extern int mali_platform_device_deinit(struct platform_device *device);
+#else
+extern int mali_platform_device_register(void);
+extern int mali_platform_device_unregister(void);
+#endif
+#endif
+
+/* Linux power management operations provided by the Mali device driver */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
+struct pm_ext_ops mali_dev_ext_pm_ops = {
+	.base =
+	{
+		.suspend = mali_driver_suspend_scheduler,
+		.resume = mali_driver_resume_scheduler,
+		.freeze = mali_driver_suspend_scheduler,
+		.thaw =   mali_driver_resume_scheduler,
+	},
+};
+#else
+static const struct dev_pm_ops mali_dev_pm_ops = {
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = mali_driver_runtime_suspend,
+	.runtime_resume = mali_driver_runtime_resume,
+	.runtime_idle = mali_driver_runtime_idle,
+#endif
+	.suspend = mali_driver_suspend_scheduler,
+	.resume = mali_driver_resume_scheduler,
+	.freeze = mali_driver_suspend_scheduler,
+	.thaw = mali_driver_resume_scheduler,
+	.poweroff = mali_driver_suspend_scheduler,
+};
+#endif
+
+#ifdef CONFIG_MALI_DT
+static struct of_device_id base_dt_ids[] = {
+	{.compatible = "arm,mali-300"},
+	{.compatible = "arm,mali-400"},
+	{.compatible = "arm,mali-450"},
+	{.compatible = "arm,mali-470"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, base_dt_ids);
+#endif
+
+/* The Mali device driver struct */
+static struct platform_driver mali_platform_driver = {
+	.probe  = mali_probe,
+	.remove = mali_remove,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
+	.pm = &mali_dev_ext_pm_ops,
+#endif
+	.driver =
+	{
+		.name   = MALI_GPU_NAME_UTGARD,
+		.owner  = THIS_MODULE,
+		.bus = &platform_bus_type,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29))
+		.pm = &mali_dev_pm_ops,
+#endif
+#ifdef CONFIG_MALI_DT
+		.of_match_table = of_match_ptr(base_dt_ids),
+#endif
+	},
+};
+
+/* Linux misc device operations (/dev/mali) */
+struct file_operations mali_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_open,
+	.release = mali_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl = mali_ioctl,
+#else
+	.ioctl = mali_ioctl,
+#endif
+	.compat_ioctl = mali_ioctl,
+	.mmap = mali_mmap
+};
+
+#if MALI_ENABLE_CPU_CYCLES
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64)
+{
+	/* The CPU assembly reference used is: ARM Architecture Reference Manual ARMv7-AR C.b */
+	u32 write_value;
+
+	/* See B4.1.116 PMCNTENSET, Performance Monitors Count Enable Set register, VMSA */
+	/* setting p15 c9 c12 1 to 0x8000000f==CPU_CYCLE_ENABLE |EVENT_3_ENABLE|EVENT_2_ENABLE|EVENT_1_ENABLE|EVENT_0_ENABLE */
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
+
+
+	/* See B4.1.117 PMCR, Performance Monitors Control Register. Writing to p15, c9, c12, 0 */
+	write_value = 1 << 0; /* Bit 0 set. Enable counters */
+	if (reset) {
+		write_value |= 1 << 1; /* Reset event counters */
+		write_value |= 1 << 2; /* Reset cycle counter  */
+	}
+	if (enable_divide_by_64) {
+		write_value |= 1 << 3; /* Enable the Clock divider by 64 */
+	}
+	write_value |= 1 << 4; /* Export enable. Not needed */
+	asm volatile("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(write_value));
+
+	/* PMOVSR Overflow Flag Status Register - Clear Clock and Event overflows */
+	asm volatile("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f));
+
+
+	/* See B4.1.124 PMUSERENR - setting p15 c9 c14 to 1" */
+	/* User mode access to the Performance Monitors enabled. */
+	/* Lets User space read cpu clock cycles */
+	asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1));
+}
+
+/** A timer function that configures the cycle clock counter on current CPU.
+ * The function \a mali_init_cpu_time_counters_on_all_cpus sets up this
+ * function to trigger on all Cpus during module load.
+ */
+static void mali_init_cpu_clock_timer_func(unsigned long data)
+{
+	int reset_counters, enable_divide_clock_counter_by_64;
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+	unsigned int sample1;
+
+	MALI_IGNORE(data);
+
+	reset_counters = 1;
+	enable_divide_clock_counter_by_64 = 0;
+	mali_init_cpu_time_counters(reset_counters, enable_divide_clock_counter_by_64);
+
+	sample0 = mali_get_cpu_cyclecount();
+	sample1 = mali_get_cpu_cyclecount();
+
+	MALI_DEBUG_PRINT(3, ("Init Cpu %d cycle counter- First two samples: %08x %08x \n", current_cpu, sample0, sample1));
+}
+
+/** A timer functions for storing current time on all cpus.
+ * Used for checking if the clocks have similar values or if they are drifting.
+ */
+static void mali_print_cpu_clock_timer_func(unsigned long data)
+{
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+
+	MALI_IGNORE(data);
+	sample0 = mali_get_cpu_cyclecount();
+	if (current_cpu < 8) {
+		mali_cpu_clock_last_value[current_cpu] = sample0;
+	}
+}
+
+/** Init the performance registers on all CPUs to count clock cycles.
+ * For init \a print_only should be 0.
+ * If \a print_only is 1, it will intead print the current clock value of all CPUs.
+ */
+void mali_init_cpu_time_counters_on_all_cpus(int print_only)
+{
+	int i = 0;
+	int cpu_number;
+	int jiffies_trigger;
+	int jiffies_wait;
+
+	jiffies_wait = msecs_to_jiffies(20);
+	jiffies_trigger = jiffies + jiffies_wait;
+
+	for (i = 0 ; i < 8 ; i++) {
+		init_timer(&mali_init_cpu_clock_timers[i]);
+		if (print_only) mali_init_cpu_clock_timers[i].function = mali_print_cpu_clock_timer_func;
+		else            mali_init_cpu_clock_timers[i].function = mali_init_cpu_clock_timer_func;
+		mali_init_cpu_clock_timers[i].expires = jiffies_trigger ;
+	}
+	cpu_number = cpumask_first(cpu_online_mask);
+	for (i = 0 ; i < 8 ; i++) {
+		int next_cpu;
+		add_timer_on(&mali_init_cpu_clock_timers[i], cpu_number);
+		next_cpu = cpumask_next(cpu_number, cpu_online_mask);
+		if (next_cpu >= nr_cpu_ids) break;
+		cpu_number = next_cpu;
+	}
+
+	while (jiffies_wait) jiffies_wait = schedule_timeout_uninterruptible(jiffies_wait);
+
+	for (i = 0 ; i < 8 ; i++) {
+		del_timer_sync(&mali_init_cpu_clock_timers[i]);
+	}
+
+	if (print_only) {
+		if ((0 == mali_cpu_clock_last_value[2]) && (0 == mali_cpu_clock_last_value[3])) {
+			/* Diff can be printed if we want to check if the clocks are in sync
+			int diff = mali_cpu_clock_last_value[0] - mali_cpu_clock_last_value[1];*/
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1]));
+		} else {
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1], mali_cpu_clock_last_value[2], mali_cpu_clock_last_value[3]));
+		}
+	}
+}
+#endif
+
+int mali_module_init(void)
+{
+	int err = 0;
+
+	MALI_DEBUG_PRINT(2, ("Inserting Mali v%d device driver. \n", _MALI_API_VERSION));
+	//MALI_DEBUG_PRINT(2, ("Compiled: %s, time: %s.\n", __DATE__, __TIME__));
+	MALI_DEBUG_PRINT(2, ("Driver revision: %s\n", SVN_REV_STRING));
+
+#if MALI_ENABLE_CPU_CYCLES
+	mali_init_cpu_time_counters_on_all_cpus(0);
+	MALI_DEBUG_PRINT(2, ("CPU cycle counter setup complete\n"));
+	/* Printing the current cpu counters */
+	mali_init_cpu_time_counters_on_all_cpus(1);
+#endif
+
+	/* Initialize module wide settings */
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering device\n"));
+	err = mali_platform_device_register();
+	if (0 != err) {
+		return err;
+	}
+#endif
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering driver\n"));
+
+	err = platform_driver_register(&mali_platform_driver);
+
+	if (0 != err) {
+		MALI_DEBUG_PRINT(2, ("mali_module_init() Failed to register driver (%d)\n", err));
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+		mali_platform_device_unregister();
+#endif
+#endif
+		mali_platform_device = NULL;
+		return err;
+	}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	err = _mali_internal_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (0 != err) {
+		/* No biggie if we wheren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	/* Tracing the current frequency and voltage from boot/insmod*/
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item(),to record current clk info.*/
+	mali_get_current_gpu_clk_item(&mali_gpu_clk[0]);
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[0].clock,
+				      mali_gpu_clk[0].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	MALI_PRINT(("Mali device driver loaded\n"));
+
+	mpgpu_class_init();
+
+	return 0; /* Success */
+}
+
+void mali_module_exit(void)
+{
+	MALI_DEBUG_PRINT(2, ("Unloading Mali v%d device driver.\n", _MALI_API_VERSION));
+
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering driver\n"));
+
+	platform_driver_unregister(&mali_platform_driver);
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering device\n"));
+	mali_platform_device_unregister();
+#endif
+#endif
+
+	/* Tracing the current frequency and voltage from rmmod*/
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	_mali_internal_profiling_term();
+#endif
+	mpgpu_class_exit();
+
+	MALI_PRINT(("Mali device driver unloaded\n"));
+}
+
+#ifdef CONFIG_MALI_DEVFREQ
+struct mali_device *mali_device_alloc(void)
+{
+	return kzalloc(sizeof(struct mali_device), GFP_KERNEL);
+}
+
+void mali_device_free(struct mali_device *mdev)
+{
+	kfree(mdev);
+}
+#endif
+
+static int mali_probe(struct platform_device *pdev)
+{
+	int err;
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev;
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_probe(): Called for platform device %s\n", pdev->name));
+
+	if (NULL != mali_platform_device) {
+		/* Already connected to a device, return error */
+		MALI_PRINT_ERROR(("mali_probe(): The Mali driver is already connected with a Mali device."));
+		return -EEXIST;
+	}
+
+	mali_platform_device = pdev;
+
+#ifdef CONFIG_MALI_DT
+	/* If we use DT to initialize our DDK, we have to prepare somethings. */
+	err = mali_platform_device_init(mali_platform_device);
+	if (0 != err) {
+		MALI_PRINT_ERROR(("mali_probe(): Failed to initialize platform device."));
+		mali_platform_device = NULL;
+		return -EFAULT;
+	}
+#endif
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mdev = mali_device_alloc();
+	if (!mdev) {
+		MALI_PRINT_ERROR(("Can't allocate mali device private data\n"));
+		return -ENOMEM;
+	}
+
+	mdev->dev = &pdev->dev;
+	dev_set_drvdata(mdev->dev, mdev);
+
+	/*Initilization clock and regulator*/
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_REGULATOR)
+	mdev->regulator = regulator_get_optional(mdev->dev, "mali");
+	if (IS_ERR_OR_NULL(mdev->regulator)) {
+		MALI_DEBUG_PRINT(2, ("Continuing without Mali regulator control\n"));
+		mdev->regulator = NULL;
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+	if (dev_pm_opp_of_add_table(mdev->dev) < 0)
+		MALI_DEBUG_PRINT(3, ("OPP table not found\n"));
+#endif
+
+	/* Need to name the gpu clock "clk_mali" in the device tree */
+	mdev->clock = clk_get(mdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(mdev->clock)) {
+		MALI_DEBUG_PRINT(2, ("Continuing without Mali clock control\n"));
+		mdev->clock = NULL;
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(mdev->clock);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to prepare and enable clock (%d)\n", err));
+			goto clock_prepare_failed;
+		}
+	}
+
+	/* initilize pm metrics related */
+	if (mali_pm_metrics_init(mdev) < 0) {
+		MALI_DEBUG_PRINT(2, ("mali pm metrics init failed\n"));
+		goto pm_metrics_init_failed;
+	}
+
+	if (mali_devfreq_init(mdev) < 0) {
+		MALI_DEBUG_PRINT(2, ("mali devfreq init failed\n"));
+		goto devfreq_init_failed;
+	}
+#endif
+
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_wq_init()) {
+		/* Initialize the Mali GPU HW specified by pdev */
+		if (_MALI_OSK_ERR_OK == mali_initialize_subsystems()) {
+			/* Register a misc device (so we are accessible from user space) */
+			err = mali_miscdevice_register(pdev);
+			if (0 == err) {
+				/* Setup sysfs entries */
+				err = mali_sysfs_register(mali_dev_name);
+
+				if (0 == err) {
+					mali_post_init();
+					MALI_DEBUG_PRINT(2, ("mali_probe(): Successfully initialized driver for platform device %s\n", pdev->name));
+
+					return 0;
+				} else {
+					MALI_PRINT_ERROR(("mali_probe(): failed to register sysfs entries"));
+				}
+				mali_miscdevice_unregister();
+			} else {
+				MALI_PRINT_ERROR(("mali_probe(): failed to register Mali misc device."));
+			}
+			mali_terminate_subsystems();
+		} else {
+			MALI_PRINT_ERROR(("mali_probe(): Failed to initialize Mali device driver."));
+		}
+		_mali_osk_wq_term();
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mali_devfreq_term(mdev);
+devfreq_init_failed:
+	mali_pm_metrics_term(mdev);
+pm_metrics_init_failed:
+	clk_disable_unprepare(mdev->clock);
+clock_prepare_failed:
+	clk_put(mdev->clock);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_PM_OPP)
+	dev_pm_opp_of_remove_table(mdev->dev);
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_REGULATOR)
+	regulator_put(mdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+	mali_device_free(mdev);
+#endif
+
+#ifdef CONFIG_MALI_DT
+	mali_platform_device_deinit(mali_platform_device);
+#endif
+	mali_platform_device = NULL;
+	return -EFAULT;
+}
+
+static int mali_remove(struct platform_device *pdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(&pdev->dev);
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_remove() called for platform device %s\n", pdev->name));
+	mali_sysfs_unregister();
+	mali_miscdevice_unregister();
+	mali_terminate_subsystems();
+	_mali_osk_wq_term();
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mali_devfreq_term(mdev);
+
+	mali_pm_metrics_term(mdev);
+
+	if (mdev->clock) {
+		clk_disable_unprepare(mdev->clock);
+		clk_put(mdev->clock);
+		mdev->clock = NULL;
+	}
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_PM_OPP)
+	dev_pm_opp_of_remove_table(mdev->dev);
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                        && defined(CONFIG_REGULATOR)
+	regulator_put(mdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+	mali_device_free(mdev);
+#endif
+
+#ifdef CONFIG_MALI_DT
+	mali_platform_device_deinit(mali_platform_device);
+#endif
+	mali_platform_device = NULL;
+	return 0;
+}
+
+static int mali_miscdevice_register(struct platform_device *pdev)
+{
+	int err;
+
+	mali_miscdevice.minor = MISC_DYNAMIC_MINOR;
+	mali_miscdevice.name = mali_dev_name;
+	mali_miscdevice.fops = &mali_fops;
+	mali_miscdevice.parent = get_device(&pdev->dev);
+
+	err = misc_register(&mali_miscdevice);
+	if (0 != err) {
+		MALI_PRINT_ERROR(("Failed to register misc device, misc_register() returned %d\n", err));
+	}
+
+	return err;
+}
+
+static void mali_miscdevice_unregister(void)
+{
+	misc_deregister(&mali_miscdevice);
+}
+
+static int mali_driver_suspend_scheduler(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(mdev->devfreq);
+#endif
+
+	mali_pm_os_suspend(MALI_TRUE);
+	/* Tracing the frequency and voltage after mali is suspended */
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+	return 0;
+}
+
+static int mali_driver_resume_scheduler(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+	mali_pm_os_resume();
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(mdev->devfreq);
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	if (MALI_TRUE == mali_pm_runtime_suspend()) {
+		/* Tracing the frequency and voltage after mali is suspended */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      0,
+					      0,
+					      0, 0, 0);
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+		MALI_DEBUG_PRINT(4, ("devfreq_suspend_device: stop devfreq monitor\n"));
+		devfreq_suspend_device(mdev->devfreq);
+#endif
+
+		return 0;
+	} else {
+		return -EBUSY;
+	}
+}
+
+static int mali_driver_runtime_resume(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	mali_pm_runtime_resume();
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+                (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	MALI_DEBUG_PRINT(4, ("devfreq_resume_device: start devfreq monitor\n"));
+	devfreq_resume_device(mdev->devfreq);
+#endif
+	return 0;
+}
+
+static int mali_driver_runtime_idle(struct device *dev)
+{
+	/* Nothing to do */
+	return 0;
+}
+#endif
+
+static int mali_open(struct inode *inode, struct file *filp)
+{
+	struct mali_session_data *session_data;
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_open() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	/* allocated struct to track this session */
+	err = _mali_ukk_open((void **)&session_data);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* initialize file pointer */
+	filp->f_pos = 0;
+
+	/* link in our session data */
+	filp->private_data = (void *)session_data;
+
+	filp->f_mapping = mali_mem_swap_get_global_swap_file()->f_mapping;
+
+	return 0;
+}
+
+static int mali_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_release() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	err = _mali_ukk_close((void **)&filp->private_data);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int map_errcode(_mali_osk_errcode_t err)
+{
+	switch (err) {
+	case _MALI_OSK_ERR_OK :
+		return 0;
+	case _MALI_OSK_ERR_FAULT:
+		return -EFAULT;
+	case _MALI_OSK_ERR_INVALID_FUNC:
+		return -ENOTTY;
+	case _MALI_OSK_ERR_INVALID_ARGS:
+		return -EINVAL;
+	case _MALI_OSK_ERR_NOMEM:
+		return -ENOMEM;
+	case _MALI_OSK_ERR_TIMEOUT:
+		return -ETIMEDOUT;
+	case _MALI_OSK_ERR_RESTARTSYSCALL:
+		return -ERESTARTSYS;
+	case _MALI_OSK_ERR_ITEM_NOT_FOUND:
+		return -ENOENT;
+	default:
+		return -EFAULT;
+	}
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int err;
+	struct mali_session_data *session_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	/* inode not used */
+	(void)inode;
+#endif
+
+	MALI_DEBUG_PRINT(7, ("Ioctl received 0x%08X 0x%08lX\n", cmd, arg));
+
+	session_data = (struct mali_session_data *)filp->private_data;
+	if (NULL == session_data) {
+		MALI_DEBUG_PRINT(7, ("filp->private_data was NULL\n"));
+		return -ENOTTY;
+	}
+
+	if (NULL == (void *)arg) {
+		MALI_DEBUG_PRINT(7, ("arg was NULL\n"));
+		return -ENOTTY;
+	}
+
+	switch (cmd) {
+	case MALI_IOC_WAIT_FOR_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_wait_for_notification_s), sizeof(u64)));
+		err = wait_for_notification_wrapper(session_data, (_mali_uk_wait_for_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION_V2:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_api_version_v2_s), sizeof(u64)));
+		err = get_api_version_v2_wrapper(session_data, (_mali_uk_get_api_version_v2_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION:
+		err = get_api_version_wrapper(session_data, (_mali_uk_get_api_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_POST_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_post_notification_s), sizeof(u64)));
+		err = post_notification_wrapper(session_data, (_mali_uk_post_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_USER_SETTINGS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_user_settings_s), sizeof(u64)));
+		err = get_user_settings_wrapper(session_data, (_mali_uk_get_user_settings_s __user *)arg);
+		break;
+
+	case MALI_IOC_REQUEST_HIGH_PRIORITY:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_request_high_priority_s), sizeof(u64)));
+		err = request_high_priority_wrapper(session_data, (_mali_uk_request_high_priority_s __user *)arg);
+		break;
+
+	case MALI_IOC_PENDING_SUBMIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pending_submit_s), sizeof(u64)));
+		err = pending_submit_wrapper(session_data, (_mali_uk_pending_submit_s __user *)arg);
+		break;
+
+#if defined(CONFIG_MALI400_PROFILING)
+	case MALI_IOC_PROFILING_ADD_EVENT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_add_event_s), sizeof(u64)));
+		err = profiling_add_event_wrapper(session_data, (_mali_uk_profiling_add_event_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_sw_counters_report_s), sizeof(u64)));
+		err = profiling_report_sw_counters_wrapper(session_data, (_mali_uk_sw_counters_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_STREAM_FD_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_stream_fd_get_s), sizeof(u64)));
+		err = profiling_get_stream_fd_wrapper(session_data, (_mali_uk_profiling_stream_fd_get_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROILING_CONTROL_SET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_control_set_s), sizeof(u64)));
+		err = profiling_control_set_wrapper(session_data, (_mali_uk_profiling_control_set_s __user *)arg);
+		break;
+#else
+
+	case MALI_IOC_PROFILING_ADD_EVENT:          /* FALL-THROUGH */
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS: /* FALL-THROUGH */
+		MALI_DEBUG_PRINT(2, ("Profiling not supported\n"));
+		err = -ENOTTY;
+		break;
+#endif
+
+	case MALI_IOC_PROFILING_MEMORY_USAGE_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_memory_usage_get_s), sizeof(u64)));
+		err = mem_usage_get_wrapper(session_data, (_mali_uk_profiling_memory_usage_get_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_ALLOC:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_alloc_mem_s), sizeof(u64)));
+		err = mem_alloc_wrapper(session_data, (_mali_uk_alloc_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_FREE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_free_mem_s), sizeof(u64)));
+		err = mem_free_wrapper(session_data, (_mali_uk_free_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_BIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_bind_mem_s), sizeof(u64)));
+		err = mem_bind_wrapper(session_data, (_mali_uk_bind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_UNBIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_unbind_mem_s), sizeof(u64)));
+		err = mem_unbind_wrapper(session_data, (_mali_uk_unbind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_COW:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_mem_s), sizeof(u64)));
+		err = mem_cow_wrapper(session_data, (_mali_uk_cow_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_COW_MODIFY_RANGE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_modify_range_s), sizeof(u64)));
+		err = mem_cow_modify_range_wrapper(session_data, (_mali_uk_cow_modify_range_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_RESIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_resize_s), sizeof(u64)));
+		err = mem_resize_mem_wrapper(session_data, (_mali_uk_mem_resize_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_WRITE_SAFE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_write_safe_s), sizeof(u64)));
+		err = mem_write_safe_wrapper(session_data, (_mali_uk_mem_write_safe_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_query_mmu_page_table_dump_size_s), sizeof(u64)));
+		err = mem_query_mmu_page_table_dump_size_wrapper(session_data, (_mali_uk_query_mmu_page_table_dump_size_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dump_mmu_page_table_s), sizeof(u64)));
+		err = mem_dump_mmu_page_table_wrapper(session_data, (_mali_uk_dump_mmu_page_table_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DMA_BUF_GET_SIZE:
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dma_buf_get_size_s), sizeof(u64)));
+		err = mali_dma_buf_get_size(session_data, (_mali_uk_dma_buf_get_size_s __user *)arg);
+#else
+		MALI_DEBUG_PRINT(2, ("DMA-BUF not supported\n"));
+		err = -ENOTTY;
+#endif
+		break;
+
+	case MALI_IOC_PP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_start_job_s), sizeof(u64)));
+		err = pp_start_job_wrapper(session_data, (_mali_uk_pp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_AND_GP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_and_gp_start_job_s), sizeof(u64)));
+		err = pp_and_gp_start_job_wrapper(session_data, (_mali_uk_pp_and_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_number_of_cores_s), sizeof(u64)));
+		err = pp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_pp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_core_version_s), sizeof(u64)));
+		err = pp_get_core_version_wrapper(session_data, (_mali_uk_get_pp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_DISABLE_WB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_disable_wb_s), sizeof(u64)));
+		err = pp_disable_wb_wrapper(session_data, (_mali_uk_pp_disable_wb_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_start_job_s), sizeof(u64)));
+		err = gp_start_job_wrapper(session_data, (_mali_uk_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_number_of_cores_s), sizeof(u64)));
+		err = gp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_gp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_core_version_s), sizeof(u64)));
+		err = gp_get_core_version_wrapper(session_data, (_mali_uk_get_gp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_SUSPEND_RESPONSE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_suspend_response_s), sizeof(u64)));
+		err = gp_suspend_response_wrapper(session_data, (_mali_uk_gp_suspend_response_s __user *)arg);
+		break;
+
+	case MALI_IOC_VSYNC_EVENT_REPORT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_vsync_event_report_s), sizeof(u64)));
+		err = vsync_event_report_wrapper(session_data, (_mali_uk_vsync_event_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_TIMELINE_GET_LATEST_POINT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_get_latest_point_s), sizeof(u64)));
+		err = timeline_get_latest_point_wrapper(session_data, (_mali_uk_timeline_get_latest_point_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_WAIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_wait_s), sizeof(u64)));
+		err = timeline_wait_wrapper(session_data, (_mali_uk_timeline_wait_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_CREATE_SYNC_FENCE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_create_sync_fence_s), sizeof(u64)));
+		err = timeline_create_sync_fence_wrapper(session_data, (_mali_uk_timeline_create_sync_fence_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_START:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_start_s), sizeof(u64)));
+		err = soft_job_start_wrapper(session_data, (_mali_uk_soft_job_start_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_SIGNAL:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_signal_s), sizeof(u64)));
+		err = soft_job_signal_wrapper(session_data, (_mali_uk_soft_job_signal_s __user *)arg);
+		break;
+
+	default:
+		MALI_DEBUG_PRINT(2, ("No handler for ioctl 0x%08X 0x%08lX\n", cmd, arg));
+		err = -ENOTTY;
+	};
+
+	return err;
+}
+
+
+module_init(mali_module_init);
+module_exit(mali_module_exit);
+
+MODULE_LICENSE(MALI_KERNEL_LINUX_LICENSE);
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(SVN_REV_STRING);
diff --git a/utgard/r6p2/linux/mali_kernel_linux.h b/utgard/r6p2/linux/mali_kernel_linux.h
new file mode 100755
index 0000000..9b4307e
--- /dev/null
+++ b/utgard/r6p2/linux/mali_kernel_linux.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_LINUX_H__
+#define __MALI_KERNEL_LINUX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/idr.h>
+#include <linux/rbtree.h>
+#include "mali_kernel_license.h"
+#include "mali_osk_types.h"
+#include <linux/version.h>
+
+extern struct platform_device *mali_platform_device;
+
+/* After 3.19.0 kenrel droped CONFIG_PM_RUNTIME define,define by ourself */
+#if defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
+#define CONFIG_PM_RUNTIME 1
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/utgard/r6p2/linux/mali_kernel_sysfs.c b/utgard/r6p2/linux/mali_kernel_sysfs.c
new file mode 100755
index 0000000..9e00977
--- /dev/null
+++ b/utgard/r6p2/linux/mali_kernel_sysfs.c
@@ -0,0 +1,1410 @@
+/**
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+/**
+ * @file mali_kernel_sysfs.c
+ * Implementation of some sysfs data exports
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include "mali_kernel_license.h"
+#include "mali_kernel_common.h"
+#include "mali_ukk.h"
+
+#if MALI_LICENSE_IS_GPL
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_sysfs.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include <linux/slab.h>
+#include "mali_osk_profiling.h"
+#endif
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_kernel_core.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+
+#define PRIVATE_DATA_COUNTER_MAKE_GP(src) (src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP(src) ((1 << 24) | src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(src, sub_job) ((1 << 24) | (1 << 16) | (sub_job << 8) | src)
+#define PRIVATE_DATA_COUNTER_IS_PP(a) ((((a) >> 24) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SRC(a) (a & 0xFF)
+#define PRIVATE_DATA_COUNTER_IS_SUB_JOB(a) ((((a) >> 16) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SUB_JOB(a) (((a) >> 8) & 0xFF)
+
+#define POWER_BUFFER_SIZE 3
+
+static struct dentry *mali_debugfs_dir = NULL;
+
+typedef enum {
+	_MALI_DEVICE_SUSPEND,
+	_MALI_DEVICE_RESUME,
+	_MALI_DEVICE_DVFS_PAUSE,
+	_MALI_DEVICE_DVFS_RESUME,
+	_MALI_MAX_EVENTS
+} _mali_device_debug_power_events;
+
+static const char *const mali_power_events[_MALI_MAX_EVENTS] = {
+	[_MALI_DEVICE_SUSPEND] = "suspend",
+	[_MALI_DEVICE_RESUME] = "resume",
+	[_MALI_DEVICE_DVFS_PAUSE] = "dvfs_pause",
+	[_MALI_DEVICE_DVFS_RESUME] = "dvfs_resume",
+};
+
+static mali_bool power_always_on_enabled = MALI_FALSE;
+
+static int open_copy_private_data(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t group_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	r = snprintf(buffer, 64, "%u\n",
+		     mali_executor_group_is_disabled(group) ? 0 : 1);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static ssize_t group_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	unsigned long val;
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	r = kstrtoul(&buffer[0], 10, &val);
+	if (0 != r) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_group_enable(group);
+		break;
+	case 0:
+		mali_executor_group_disable(group);
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static const struct file_operations group_enabled_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = group_enabled_read,
+	.write = group_enabled_write,
+};
+
+static ssize_t hw_core_base_addr_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_hw_core *hw_core;
+
+	hw_core = (struct mali_hw_core *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(hw_core);
+
+	r = snprintf(buffer, 64, "0x%lX\n", hw_core->phys_addr);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations hw_core_base_addr_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = hw_core_base_addr_read,
+};
+
+static ssize_t profiling_counter_src_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	int r;
+	u32 val;
+
+	if (MALI_TRUE == is_pp) {
+		/* PP counter */
+		if (MALI_TRUE == is_sub_job) {
+			/* Get counter for a particular sub job */
+			if (0 == src_id) {
+				val = mali_pp_job_get_pp_counter_sub_job_src0(sub_job);
+			} else {
+				val = mali_pp_job_get_pp_counter_sub_job_src1(sub_job);
+			}
+		} else {
+			/* Get default counter for all PP sub jobs */
+			if (0 == src_id) {
+				val = mali_pp_job_get_pp_counter_global_src0();
+			} else {
+				val = mali_pp_job_get_pp_counter_global_src1();
+			}
+		}
+	} else {
+		/* GP counter */
+		if (0 == src_id) {
+			val = mali_gp_job_get_gp_counter_src0();
+		} else {
+			val = mali_gp_job_get_gp_counter_src1();
+		}
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == val) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_counter_src_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	if (MALI_TRUE == is_pp) {
+		/* PP counter */
+		if (MALI_TRUE == is_sub_job) {
+			/* Set counter for a particular sub job */
+			if (0 == src_id) {
+				mali_pp_job_set_pp_counter_sub_job_src0(sub_job, (u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_sub_job_src1(sub_job, (u32)val);
+			}
+		} else {
+			/* Set default counter for all PP sub jobs */
+			if (0 == src_id) {
+				mali_pp_job_set_pp_counter_global_src0((u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_global_src1((u32)val);
+			}
+		}
+	} else {
+		/* GP counter */
+		if (0 == src_id) {
+			mali_gp_job_set_gp_counter_src0((u32)val);
+		} else {
+			mali_gp_job_set_gp_counter_src1((u32)val);
+		}
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_counter_src_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = profiling_counter_src_read,
+	.write = profiling_counter_src_write,
+};
+
+static ssize_t l2_l2x_counter_srcx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 val;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	if (0 == src_id) {
+		val = mali_l2_cache_core_get_counter_src0(l2_core);
+	} else {
+		val = mali_l2_cache_core_get_counter_src1(l2_core);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == val) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	mali_l2_cache_core_set_counter_src(l2_core, src_id, (u32)val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_all_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	long val;
+	int ret;
+	u32 l2_id;
+	struct mali_l2_cache_core *l2_cache;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	l2_id = 0;
+	l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	while (NULL != l2_cache) {
+		mali_l2_cache_core_set_counter_src(l2_cache, src_id, (u32)val);
+
+		/* try next L2 */
+		l2_id++;
+		l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_l2x_counter_src0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_l2x_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_all_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_all_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src0_read,
+	.write = l2_l2x_counter_src0_write,
+};
+
+static const struct file_operations l2_l2x_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src1_read,
+	.write = l2_l2x_counter_src1_write,
+};
+
+static const struct file_operations l2_all_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src0_write,
+};
+
+static const struct file_operations l2_all_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src1_write,
+};
+
+static ssize_t l2_l2x_counter_valx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 src0 = 0;
+	u32 val0 = 0;
+	u32 src1 = 0;
+	u32 val1 = 0;
+	u32 val = -1;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	mali_l2_cache_core_get_counter_values(l2_core, &src0, &val0, &src1, &val1);
+
+	if (0 == src_id) {
+		if (MALI_HW_CORE_NO_COUNTER != val0) {
+			val = val0;
+		}
+	} else {
+		if (MALI_HW_CORE_NO_COUNTER != val1) {
+			val = val1;
+		}
+	}
+
+	r = snprintf(buf, 64, "%u\n", val);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_val0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_val1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_val0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val0_read,
+};
+
+static const struct file_operations l2_l2x_counter_val1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val1_read,
+};
+
+static ssize_t power_always_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (0 != ret) {
+		return ret;
+	}
+
+	/* Update setting (not exactly thread safe) */
+	if (1 == val && MALI_FALSE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_TRUE;
+		_mali_osk_pm_dev_ref_get_sync();
+	} else if (0 == val && MALI_TRUE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_FALSE;
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t power_always_on_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (MALI_TRUE == power_always_on_enabled) {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "1\n", 2);
+	} else {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "0\n", 2);
+	}
+}
+
+static const struct file_operations power_always_on_fops = {
+	.owner = THIS_MODULE,
+	.read  = power_always_on_read,
+	.write = power_always_on_write,
+};
+
+static ssize_t power_power_events_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_SUSPEND], strlen(mali_power_events[_MALI_DEVICE_SUSPEND]) - 1)) {
+		mali_pm_os_suspend(MALI_TRUE);
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_RESUME], strlen(mali_power_events[_MALI_DEVICE_RESUME]) - 1)) {
+		mali_pm_os_resume();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_PAUSE], strlen(mali_power_events[_MALI_DEVICE_DVFS_PAUSE]) - 1)) {
+		mali_dev_pause();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_RESUME], strlen(mali_power_events[_MALI_DEVICE_DVFS_RESUME]) - 1)) {
+		mali_dev_resume();
+	}
+	*ppos += cnt;
+	return cnt;
+}
+
+static loff_t power_power_events_seek(struct file *file, loff_t offset, int orig)
+{
+	file->f_pos = offset;
+	return 0;
+}
+
+static const struct file_operations power_power_events_fops = {
+	.owner = THIS_MODULE,
+	.write = power_power_events_write,
+	.llseek = power_power_events_seek,
+};
+
+#if MALI_STATE_TRACKING
+static int mali_seq_internal_state_show(struct seq_file *seq_file, void *v)
+{
+	u32 len = 0;
+	u32 size;
+	char *buf;
+
+	size = seq_get_buf(seq_file, &buf);
+
+	if (!size) {
+		return -ENOMEM;
+	}
+
+	/* Create the internal state dump. */
+	len  = snprintf(buf + len, size - len, "Mali device driver %s\n", SVN_REV_STRING);
+	len += snprintf(buf + len, size - len, "License: %s\n\n", MALI_KERNEL_LINUX_LICENSE);
+
+	len += _mali_kernel_core_dump_state(buf + len, size - len);
+
+	seq_commit(seq_file, len);
+
+	return 0;
+}
+
+static int mali_seq_internal_state_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mali_seq_internal_state_show, NULL);
+}
+
+static const struct file_operations mali_seq_internal_state_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_seq_internal_state_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif /* MALI_STATE_TRACKING */
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+static ssize_t profiling_record_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = snprintf(buf, 64, "%u\n", _mali_internal_profiling_is_recording() ? 1 : 0);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_record_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	unsigned long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val != 0) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* This can be made configurable at a later stage if we need to */
+
+		/* check if we are already recording */
+		if (MALI_TRUE == _mali_internal_profiling_is_recording()) {
+			MALI_DEBUG_PRINT(3, ("Recording of profiling events already in progress\n"));
+			return -EFAULT;
+		}
+
+		/* check if we need to clear out an old recording first */
+		if (MALI_TRUE == _mali_internal_profiling_have_recording()) {
+			if (_MALI_OSK_ERR_OK != _mali_internal_profiling_clear()) {
+				MALI_DEBUG_PRINT(3, ("Failed to clear existing recording of profiling events\n"));
+				return -EFAULT;
+			}
+		}
+
+		/* start recording profiling data */
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) {
+			MALI_DEBUG_PRINT(3, ("Failed to start recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Profiling recording started (max %u events)\n", limit));
+	} else {
+		/* stop recording profiling data */
+		u32 count = 0;
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_stop(&count)) {
+			MALI_DEBUG_PRINT(2, ("Failed to stop recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Profiling recording stopped (recorded %u events)\n", count));
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_record_fops = {
+	.owner = THIS_MODULE,
+	.read  = profiling_record_read,
+	.write = profiling_record_write,
+};
+
+static void *profiling_events_start(struct seq_file *s, loff_t *pos)
+{
+	loff_t *spos;
+
+	/* check if we have data avaiable */
+	if (MALI_TRUE != _mali_internal_profiling_have_recording()) {
+		return NULL;
+	}
+
+	spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
+	if (NULL == spos) {
+		return NULL;
+	}
+
+	*spos = *pos;
+	return spos;
+}
+
+static void *profiling_events_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	loff_t *spos = v;
+
+	/* check if we have data avaiable */
+	if (MALI_TRUE != _mali_internal_profiling_have_recording()) {
+		return NULL;
+	}
+
+	/* check if the next entry actually is avaiable */
+	if (_mali_internal_profiling_get_count() <= (u32)(*spos + 1)) {
+		return NULL;
+	}
+
+	*pos = ++*spos;
+	return spos;
+}
+
+static void profiling_events_stop(struct seq_file *s, void *v)
+{
+	kfree(v);
+}
+
+static int profiling_events_show(struct seq_file *seq_file, void *v)
+{
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) * spos;
+
+	/* Retrieve all events */
+	if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, &timestamp, &event_id, data)) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u\n", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int profiling_events_show_human_readable(struct seq_file *seq_file, void *v)
+{
+#define MALI_EVENT_ID_IS_HW(event_id) (((event_id & 0x00FF0000) >= MALI_PROFILING_EVENT_CHANNEL_GP0) && ((event_id & 0x00FF0000) <= MALI_PROFILING_EVENT_CHANNEL_PP7))
+
+	static u64 start_time = 0;
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) * spos;
+
+	/* Retrieve all events */
+	if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, &timestamp, &event_id, data)) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u # ", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+
+		if (0 == index) {
+			start_time = timestamp;
+		}
+
+		seq_printf(seq_file, "[%06u] ", index);
+
+		switch (event_id & 0x0F000000) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			seq_printf(seq_file, "SINGLE | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			seq_printf(seq_file, "START | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			seq_printf(seq_file, "STOP | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_SUSPEND:
+			seq_printf(seq_file, "SUSPEND | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_RESUME:
+			seq_printf(seq_file, "RESUME | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%01X | ", (event_id & 0x0F000000) >> 24);
+			break;
+		}
+
+		switch (event_id & 0x00FF0000) {
+		case MALI_PROFILING_EVENT_CHANNEL_SOFTWARE:
+			seq_printf(seq_file, "SW | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GP0:
+			seq_printf(seq_file, "GP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP0:
+			seq_printf(seq_file, "PP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP1:
+			seq_printf(seq_file, "PP1 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP2:
+			seq_printf(seq_file, "PP2 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP3:
+			seq_printf(seq_file, "PP3 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP4:
+			seq_printf(seq_file, "PP4 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP5:
+			seq_printf(seq_file, "PP5 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP6:
+			seq_printf(seq_file, "PP6 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP7:
+			seq_printf(seq_file, "PP7 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GPU:
+			seq_printf(seq_file, "GPU | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%02X | ", (event_id & 0x00FF0000) >> 16);
+			break;
+		}
+
+		if (MALI_EVENT_ID_IS_HW(event_id)) {
+			if (((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_START) || ((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_STOP)) {
+				switch (event_id & 0x0000FFFF) {
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL:
+					seq_printf(seq_file, "PHYSICAL | ");
+					break;
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL:
+					seq_printf(seq_file, "VIRTUAL | ");
+					break;
+				default:
+					seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+					break;
+				}
+			} else {
+				seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+			}
+		} else {
+			seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+		}
+
+		seq_printf(seq_file, "T0 + 0x%016llX\n", timestamp - start_time);
+
+		return 0;
+	}
+
+	return 0;
+}
+
+static const struct seq_operations profiling_events_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show
+};
+
+static int profiling_events_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_seq_ops);
+}
+
+static const struct file_operations profiling_events_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static const struct seq_operations profiling_events_human_readable_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show_human_readable
+};
+
+static int profiling_events_human_readable_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_human_readable_seq_ops);
+}
+
+static const struct file_operations profiling_events_human_readable_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_human_readable_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+static int memory_debugfs_show(struct seq_file *s, void *private_data)
+{
+#ifdef MALI_MEM_SWAP_TRACKING
+	seq_printf(s, "  %-25s  %-10s %-25s %-10s  %-15s  %-15s  %-10s  %-10s %-10s\n"\
+		   "=======================================================================================================================================\n",
+		   "Name (:bytes)", "pid", "pid-name", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem", "swap_mem");
+#else
+	seq_printf(s, "  %-25s  %-10s %-25s %-10s  %-15s  %-15s  %-10s  %-10s\n"\
+		   "=======================================================================================================================================\n",
+		   "Name (:bytes)", "pid", "pid-name", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem");
+#endif
+	mali_session_memory_tracking(s);
+	return 0;
+}
+
+static int memory_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, memory_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations memory_usage_fops = {
+	.owner = THIS_MODULE,
+	.open = memory_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static ssize_t utilization_gp_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_gp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+
+static const struct file_operations utilization_gp_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_pp_read,
+};
+
+static const struct file_operations utilization_gp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_read,
+};
+
+static const struct file_operations utilization_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_pp_read,
+};
+
+static ssize_t user_settings_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	_mali_uk_user_setting_t setting;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (0 != ret) {
+		return ret;
+	}
+
+	/* Update setting */
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	mali_set_user_setting(setting, val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t user_settings_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 value;
+	_mali_uk_user_setting_t setting;
+
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	value = mali_get_user_setting(setting);
+
+	r = snprintf(buf, 64, "%u\n", value);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static const struct file_operations user_settings_fops = {
+	.owner = THIS_MODULE,
+	.open = open_copy_private_data,
+	.read = user_settings_read,
+	.write = user_settings_write,
+};
+
+static int mali_sysfs_user_settings_register(void)
+{
+	struct dentry *mali_user_settings_dir = debugfs_create_dir("userspace_settings", mali_debugfs_dir);
+
+	if (mali_user_settings_dir != NULL) {
+		long i;
+		for (i = 0; i < _MALI_UK_USER_SETTING_MAX; i++) {
+			debugfs_create_file(_mali_uk_user_setting_descriptions[i],
+					    0600, mali_user_settings_dir, (void *)i,
+					    &user_settings_fops);
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t pp_num_cores_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (0 != ret) {
+		return -EINVAL;
+	}
+
+	ret = mali_executor_set_perf_level(val, MALI_TRUE); /* override even if core scaling is disabled */
+	if (ret) {
+		return ret;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_num_cores_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_enabled());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_num_cores_enabled_write,
+	.read = pp_num_cores_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t pp_num_cores_total_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_total());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_total_fops = {
+	.owner = THIS_MODULE,
+	.read = pp_num_cores_total_read,
+};
+
+static ssize_t pp_core_scaling_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (0 != ret) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_core_scaling_enable();
+		break;
+	case 0:
+		mali_executor_core_scaling_disable();
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_core_scaling_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	return simple_read_from_buffer(buf, count, offp, mali_executor_core_scaling_is_enabled() ? "1\n" : "0\n", 2);
+}
+static const struct file_operations pp_core_scaling_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_core_scaling_enabled_write,
+	.read = pp_core_scaling_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t version_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r = 0;
+	char buffer[64];
+
+	switch (mali_kernel_core_get_product_id()) {
+	case _MALI_PRODUCT_ID_MALI200:
+		r = snprintf(buffer, 64, "Mali-200\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI300:
+		r = snprintf(buffer, 64, "Mali-300\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI400:
+		r = snprintf(buffer, 64, "Mali-400 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI450:
+		r = snprintf(buffer, 64, "Mali-450 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI470:
+		r = snprintf(buffer, 64, "Mali-470 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_UNKNOWN:
+		return -EINVAL;
+		break;
+	};
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations version_fops = {
+	.owner = THIS_MODULE,
+	.read = version_read,
+};
+
+#if defined(DEBUG)
+static int timeline_debugfs_show(struct seq_file *s, void *private_data)
+{
+	struct mali_session_data *session, *tmp;
+	u32 session_seq = 1;
+
+	seq_printf(s, "timeline system info: \n=================\n\n");
+
+	mali_session_lock();
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		seq_printf(s, "session %d <%p> start:\n", session_seq, session);
+		mali_timeline_debug_print_system(session->timeline_system, s);
+		seq_printf(s, "session %d end\n\n\n", session_seq++);
+	}
+	mali_session_unlock();
+
+	return 0;
+}
+
+static int timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, timeline_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations timeline_dump_fops = {
+	.owner = THIS_MODULE,
+	.open = timeline_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release
+};
+#endif
+
+int mali_sysfs_register(const char *mali_dev_name)
+{
+	mali_debugfs_dir = debugfs_create_dir(mali_dev_name, NULL);
+	if (ERR_PTR(-ENODEV) == mali_debugfs_dir) {
+		/* Debugfs not supported. */
+		mali_debugfs_dir = NULL;
+	} else {
+		if (NULL != mali_debugfs_dir) {
+			/* Debugfs directory created successfully; create files now */
+			struct dentry *mali_power_dir;
+			struct dentry *mali_gp_dir;
+			struct dentry *mali_pp_dir;
+			struct dentry *mali_l2_dir;
+			struct dentry *mali_profiling_dir;
+
+			debugfs_create_file("version", 0400, mali_debugfs_dir, NULL, &version_fops);
+
+			mali_power_dir = debugfs_create_dir("power", mali_debugfs_dir);
+			if (mali_power_dir != NULL) {
+				debugfs_create_file("always_on", 0600, mali_power_dir, NULL, &power_always_on_fops);
+				debugfs_create_file("power_events", 0200, mali_power_dir, NULL, &power_power_events_fops);
+			}
+
+			mali_gp_dir = debugfs_create_dir("gp", mali_debugfs_dir);
+			if (mali_gp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+					if (NULL != gp_core) {
+						struct dentry *mali_gp_gpx_dir;
+						mali_gp_gpx_dir = debugfs_create_dir("gp0", mali_gp_dir);
+						if (NULL != mali_gp_gpx_dir) {
+							debugfs_create_file("base_addr", 0400, mali_gp_gpx_dir, &gp_core->hw_core, &hw_core_base_addr_fops);
+							debugfs_create_file("enabled", 0600, mali_gp_gpx_dir, group, &group_enabled_fops);
+						}
+						break; /* no need to look for any other GP cores */
+					}
+
+				}
+			}
+
+			mali_pp_dir = debugfs_create_dir("pp", mali_debugfs_dir);
+			if (mali_pp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				debugfs_create_file("num_cores_total", 0400, mali_pp_dir, NULL, &pp_num_cores_total_fops);
+				debugfs_create_file("num_cores_enabled", 0600, mali_pp_dir, NULL, &pp_num_cores_enabled_fops);
+				debugfs_create_file("core_scaling_enabled", 0600, mali_pp_dir, NULL, &pp_core_scaling_enabled_fops);
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+					if (NULL != pp_core) {
+						char buf[16];
+						struct dentry *mali_pp_ppx_dir;
+						_mali_osk_snprintf(buf, sizeof(buf), "pp%u", mali_pp_core_get_id(pp_core));
+						mali_pp_ppx_dir = debugfs_create_dir(buf, mali_pp_dir);
+						if (NULL != mali_pp_ppx_dir) {
+							debugfs_create_file("base_addr", 0400, mali_pp_ppx_dir, &pp_core->hw_core, &hw_core_base_addr_fops);
+							if (!mali_group_is_virtual(group)) {
+								debugfs_create_file("enabled", 0600, mali_pp_ppx_dir, group, &group_enabled_fops);
+							}
+						}
+					}
+				}
+			}
+
+			mali_l2_dir = debugfs_create_dir("l2", mali_debugfs_dir);
+			if (mali_l2_dir != NULL) {
+				struct dentry *mali_l2_all_dir;
+				u32 l2_id;
+				struct mali_l2_cache_core *l2_cache;
+
+				mali_l2_all_dir = debugfs_create_dir("all", mali_l2_dir);
+				if (mali_l2_all_dir != NULL) {
+					debugfs_create_file("counter_src0", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src0_fops);
+					debugfs_create_file("counter_src1", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src1_fops);
+				}
+
+				l2_id = 0;
+				l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				while (NULL != l2_cache) {
+					char buf[16];
+					struct dentry *mali_l2_l2x_dir;
+					_mali_osk_snprintf(buf, sizeof(buf), "l2%u", l2_id);
+					mali_l2_l2x_dir = debugfs_create_dir(buf, mali_l2_dir);
+					if (NULL != mali_l2_l2x_dir) {
+						debugfs_create_file("counter_src0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src0_fops);
+						debugfs_create_file("counter_src1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src1_fops);
+						debugfs_create_file("counter_val0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val0_fops);
+						debugfs_create_file("counter_val1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val1_fops);
+						debugfs_create_file("base_addr", 0400, mali_l2_l2x_dir, &l2_cache->hw_core, &hw_core_base_addr_fops);
+					}
+
+					/* try next L2 */
+					l2_id++;
+					l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				}
+			}
+
+			debugfs_create_file("gpu_memory", 0444, mali_debugfs_dir, NULL, &memory_usage_fops);
+
+			debugfs_create_file("utilization_gp_pp", 0400, mali_debugfs_dir, NULL, &utilization_gp_pp_fops);
+			debugfs_create_file("utilization_gp", 0400, mali_debugfs_dir, NULL, &utilization_gp_fops);
+			debugfs_create_file("utilization_pp", 0400, mali_debugfs_dir, NULL, &utilization_pp_fops);
+
+			mali_profiling_dir = debugfs_create_dir("profiling", mali_debugfs_dir);
+			if (mali_profiling_dir != NULL) {
+				u32 max_sub_jobs;
+				long i;
+				struct dentry *mali_profiling_gp_dir;
+				struct dentry *mali_profiling_pp_dir;
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				struct dentry *mali_profiling_proc_dir;
+#endif
+				/*
+				 * Create directory where we can set GP HW counters.
+				 */
+				mali_profiling_gp_dir = debugfs_create_dir("gp", mali_profiling_dir);
+				if (mali_profiling_gp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(1), &profiling_counter_src_fops);
+				}
+
+				/*
+				 * Create directory where we can set PP HW counters.
+				 * Possible override with specific HW counters for a particular sub job
+				 * (Disable core scaling before using the override!)
+				 */
+				mali_profiling_pp_dir = debugfs_create_dir("pp", mali_profiling_dir);
+				if (mali_profiling_pp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(1), &profiling_counter_src_fops);
+				}
+
+				max_sub_jobs = mali_executor_get_num_cores_total();
+				for (i = 0; i < max_sub_jobs; i++) {
+					char buf[16];
+					struct dentry *mali_profiling_pp_x_dir;
+					_mali_osk_snprintf(buf, sizeof(buf), "%u", i);
+					mali_profiling_pp_x_dir = debugfs_create_dir(buf, mali_profiling_pp_dir);
+					if (NULL != mali_profiling_pp_x_dir) {
+						debugfs_create_file("counter_src0",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(0, i),
+								    &profiling_counter_src_fops);
+						debugfs_create_file("counter_src1",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(1, i),
+								    &profiling_counter_src_fops);
+					}
+				}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				mali_profiling_proc_dir = debugfs_create_dir("proc", mali_profiling_dir);
+				if (mali_profiling_proc_dir != NULL) {
+					struct dentry *mali_profiling_proc_default_dir = debugfs_create_dir("default", mali_profiling_proc_dir);
+					if (mali_profiling_proc_default_dir != NULL) {
+						debugfs_create_file("enable", 0600, mali_profiling_proc_default_dir, (void *)_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, &user_settings_fops);
+					}
+				}
+				debugfs_create_file("record", 0600, mali_profiling_dir, NULL, &profiling_record_fops);
+				debugfs_create_file("events", 0400, mali_profiling_dir, NULL, &profiling_events_fops);
+				debugfs_create_file("events_human_readable", 0400, mali_profiling_dir, NULL, &profiling_events_human_readable_fops);
+#endif
+			}
+
+#if MALI_STATE_TRACKING
+			debugfs_create_file("state_dump", 0400, mali_debugfs_dir, NULL, &mali_seq_internal_state_fops);
+#endif
+
+#if defined(DEBUG)
+			debugfs_create_file("timeline_dump", 0400, mali_debugfs_dir, NULL, &timeline_dump_fops);
+#endif
+			if (mali_sysfs_user_settings_register()) {
+				/* Failed to create the debugfs entries for the user settings DB. */
+				MALI_DEBUG_PRINT(2, ("Failed to create user setting debugfs files. Ignoring...\n"));
+			}
+		}
+	}
+
+	/* Success! */
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	if (NULL != mali_debugfs_dir) {
+		debugfs_remove_recursive(mali_debugfs_dir);
+	}
+	return 0;
+}
+
+#else /* MALI_LICENSE_IS_GPL */
+
+/* Dummy implementations for non-GPL */
+
+int mali_sysfs_register(struct mali_dev *device, dev_t dev, const char *mali_dev_name)
+{
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	return 0;
+}
+
+#endif /* MALI_LICENSE_IS_GPL */
diff --git a/utgard/r6p2/linux/mali_kernel_sysfs.h b/utgard/r6p2/linux/mali_kernel_sysfs.h
new file mode 100755
index 0000000..9dad7f2
--- /dev/null
+++ b/utgard/r6p2/linux/mali_kernel_sysfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_SYSFS_H__
+#define __MALI_KERNEL_SYSFS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/device.h>
+
+#define MALI_PROC_DIR "driver/mali"
+
+int mali_sysfs_register(const char *mali_dev_name);
+int mali_sysfs_unregister(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/utgard/r6p2/linux/mali_linux_trace.h b/utgard/r6p2/linux/mali_linux_trace.h
new file mode 100755
index 0000000..2c91ddc
--- /dev/null
+++ b/utgard/r6p2/linux/mali_linux_trace.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#if !defined (MALI_LINUX_TRACE_H) || defined (TRACE_HEADER_MULTI_READ)
+#define MALI_LINUX_TRACE_H
+
+#include <linux/types.h>
+
+#include <linux/stringify.h>
+#include <linux/tracepoint.h>
+
+#undef  TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_SYSTEM_STRING __stringfy(TRACE_SYSTEM)
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+/**
+ * Define the tracepoint used to communicate the status of a GPU. Called
+ * when a GPU turns on or turns off.
+ *
+ * @param event_id The type of the event. This parameter is a bitfield
+ *  encoding the type of the event.
+ *
+ * @param d0 First data parameter.
+ * @param d1 Second data parameter.
+ * @param d2 Third data parameter.
+ * @param d3 Fourth data parameter.
+ * @param d4 Fifth data parameter.
+ */
+TRACE_EVENT(mali_timeline_event,
+
+	    TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1,
+		     unsigned int d2, unsigned int d3, unsigned int d4),
+
+	    TP_ARGS(event_id, d0, d1, d2, d3, d4),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, event_id)
+		    __field(unsigned int, d0)
+		    __field(unsigned int, d1)
+		    __field(unsigned int, d2)
+		    __field(unsigned int, d3)
+		    __field(unsigned int, d4)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->event_id = event_id;
+		    __entry->d0 = d0;
+		    __entry->d1 = d1;
+		    __entry->d2 = d2;
+		    __entry->d3 = d3;
+		    __entry->d4 = d4;
+	    ),
+
+	    TP_printk("event=%d", __entry->event_id)
+	   );
+
+/**
+ * Define a tracepoint used to regsiter the value of a hardware counter.
+ * Hardware counters belonging to the vertex or fragment processor are
+ * reported via this tracepoint each frame, whilst L2 cache hardware
+ * counters are reported continuously.
+ *
+ * @param counter_id The counter ID.
+ * @param value The value of the counter.
+ */
+TRACE_EVENT(mali_hw_counter,
+
+	    TP_PROTO(unsigned int counter_id, unsigned int value),
+
+	    TP_ARGS(counter_id, value),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, counter_id)
+		    __field(unsigned int, value)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->counter_id = counter_id;
+	    ),
+
+	    TP_printk("event %d = %d", __entry->counter_id, __entry->value)
+	   );
+
+/**
+ * Define a tracepoint used to send a bundle of software counters.
+ *
+ * @param counters The bundle of counters.
+ */
+TRACE_EVENT(mali_sw_counters,
+
+	    TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters),
+
+	    TP_ARGS(pid, tid, surface_id, counters),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(pid_t, tid)
+		    __field(void *, surface_id)
+		    __field(unsigned int *, counters)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->tid = tid;
+		    __entry->surface_id = surface_id;
+		    __entry->counters = counters;
+	    ),
+
+	    TP_printk("counters were %s", __entry->counters == NULL ? "NULL" : "not NULL")
+	   );
+
+/**
+ * Define a tracepoint used to gather core activity for systrace
+ * @param pid The process id for which the core activity originates from
+ * @param active If the core is active (1) or not (0)
+ * @param core_type The type of core active, either GP (1) or PP (0)
+ * @param core_id The core id that is active for the core_type
+ * @param frame_builder_id The frame builder id associated with this core activity
+ * @param flush_id The flush id associated with this core activity
+ */
+TRACE_EVENT(mali_core_active,
+
+	    TP_PROTO(pid_t pid, unsigned int active, unsigned int core_type, unsigned int core_id, unsigned int frame_builder_id, unsigned int flush_id),
+
+	    TP_ARGS(pid, active, core_type, core_id, frame_builder_id, flush_id),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(unsigned int, active)
+		    __field(unsigned int, core_type)
+		    __field(unsigned int, core_id)
+		    __field(unsigned int, frame_builder_id)
+		    __field(unsigned int, flush_id)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->active = active;
+		    __entry->core_type = core_type;
+		    __entry->core_id = core_id;
+		    __entry->frame_builder_id = frame_builder_id;
+		    __entry->flush_id = flush_id;
+	    ),
+
+	    TP_printk("%s|%d|%s%i:%x|%d", __entry->active ? "S" : "F", __entry->pid, __entry->core_type ? "GP" : "PP", __entry->core_id, __entry->flush_id, __entry->frame_builder_id)
+	   );
+
+#endif /* MALI_LINUX_TRACE_H */
+
+/* This part must exist outside the header guard. */
+#include <trace/define_trace.h>
+
diff --git a/utgard/r6p2/linux/mali_memory.c b/utgard/r6p2/linux/mali_memory.c
new file mode 100755
index 0000000..3300885
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory.c
@@ -0,0 +1,531 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_executor.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_util.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_swap_alloc.h"
+#include "mali_memory_defer_bind.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_secure.h"
+#endif
+
+extern unsigned int mali_dedicated_mem_size;
+extern unsigned int mali_shared_mem_size;
+
+#define MALI_VM_NUM_FAULT_PREFETCH (0x8)
+
+static void mali_mem_vma_open(struct vm_area_struct *vma)
+{
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+	MALI_DEBUG_PRINT(4, ("Open called on vma %p\n", vma));
+
+	/* If need to share the allocation, add ref_count here */
+	mali_allocation_ref(alloc);
+	return;
+}
+static void mali_mem_vma_close(struct vm_area_struct *vma)
+{
+	/* If need to share the allocation, unref ref_count here */
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+
+	mali_allocation_unref(&alloc);
+	vma->vm_private_data = NULL;
+}
+
+static int mali_mem_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret;
+	int prefetch_num = MALI_VM_NUM_FAULT_PREFETCH;
+
+	unsigned long address = (unsigned long)vmf->virtual_address;
+	MALI_DEBUG_ASSERT(alloc->backend_handle);
+	MALI_DEBUG_ASSERT((unsigned long)alloc->cpu_mapping.addr <= address);
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		return VM_FAULT_SIGBUS;
+	}
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(mem_bkend->type == alloc->type);
+
+	if ((mem_bkend->type == MALI_MEM_COW && (MALI_MEM_BACKEND_FLAG_SWAP_COWED !=
+			(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) &&
+	    (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE)) {
+		/*check if use page fault to do COW*/
+		MALI_DEBUG_PRINT(4, ("mali_vma_fault: do cow allocate on demand!, address=0x%x\n", address));
+		mutex_lock(&mem_bkend->mutex);
+		ret = mali_mem_cow_allocate_on_demand(mem_bkend,
+						      (address - vma->vm_start) / PAGE_SIZE);
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (ret != _MALI_OSK_ERR_OK) {
+			return VM_FAULT_OOM;
+		}
+		prefetch_num = 1;
+
+		/* handle COW modified range cpu mapping
+		 we zap the mapping in cow_modify_range, it will trigger page fault
+		 when CPU access it, so here we map it to CPU*/
+		mutex_lock(&mem_bkend->mutex);
+		ret = mali_mem_cow_cpu_map_pages_locked(mem_bkend, vma, address, prefetch_num);
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+			return VM_FAULT_SIGBUS;
+		}
+	} else if ((mem_bkend->type == MALI_MEM_SWAP) ||
+		   (mem_bkend->type == MALI_MEM_COW && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+		u32 offset_in_bkend = (address - vma->vm_start) / PAGE_SIZE;
+		int ret = _MALI_OSK_ERR_OK;
+
+		mutex_lock(&mem_bkend->mutex);
+		if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE) {
+			ret = mali_mem_swap_cow_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page);
+		} else {
+			ret = mali_mem_swap_allocate_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page);
+		}
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(2, ("Mali swap memory page fault process failed, address=0x%x\n", address));
+			return VM_FAULT_OOM;
+		} else {
+			return VM_FAULT_LOCKED;
+		}
+	} else {
+		MALI_PRINT_ERROR(("Mali vma fault! It never happen, indicating some logic errors in caller.\n"));
+		/*NOT support yet or OOM*/
+		return VM_FAULT_OOM;
+	}
+	return VM_FAULT_NOPAGE;
+}
+
+static struct vm_operations_struct mali_kernel_vm_ops = {
+	.open = mali_mem_vma_open,
+	.close = mali_mem_vma_close,
+	.fault = mali_mem_vma_fault,
+};
+
+
+/** @ map mali allocation to CPU address
+*
+* Supported backend types:
+* --MALI_MEM_OS
+* -- need to add COW?
+ *Not supported backend types:
+* -_MALI_MEMORY_BIND_BACKEND_UMP
+* -_MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* -_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+*
+*/
+int mali_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct mali_session_data *session;
+	mali_mem_allocation *mali_alloc = NULL;
+	u32 mali_addr = vma->vm_pgoff << PAGE_SHIFT;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret = -EFAULT;
+
+	session = (struct mali_session_data *)filp->private_data;
+	if (NULL == session) {
+		MALI_PRINT_ERROR(("mmap called without any session data available\n"));
+		return -EFAULT;
+	}
+
+	MALI_DEBUG_PRINT(4, ("MMap() handler: start=0x%08X, phys=0x%08X, size=0x%08X vma->flags 0x%08x\n",
+			     (unsigned int)vma->vm_start, (unsigned int)(vma->vm_pgoff << PAGE_SHIFT),
+			     (unsigned int)(vma->vm_end - vma->vm_start), vma->vm_flags));
+
+	/* Operations used on any memory system */
+	/* do not need to anything in vm open/close now */
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		if (unlikely(mali_addr != mali_vma_node->vm_node.start)) {
+			/* only allow to use start address for mmap */
+			MALI_DEBUG_PRINT(1, ("mali_addr != mali_vma_node->vm_node.start\n"));
+			return -EFAULT;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(NULL == mali_vma_node);
+		return -EFAULT;
+	}
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	if (mali_alloc->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) {
+		MALI_DEBUG_PRINT(1, ("ERROR : trying to access varying memory by CPU!\n"));
+		return -EFAULT;
+	}
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		return -EFAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+
+	if (!(MALI_MEM_SWAP == mali_alloc->type ||
+	      (MALI_MEM_COW == mali_alloc->type && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) {
+		/* Set some bits which indicate that, the memory is IO memory, meaning
+		 * that no paging is to be performed and the memory should not be
+		 * included in crash dumps. And that the memory is reserved, meaning
+		 * that it's present and can never be paged out (see also previous
+		 * entry)
+		 */
+		vma->vm_flags |= VM_IO;
+		vma->vm_flags |= VM_DONTCOPY;
+		vma->vm_flags |= VM_PFNMAP;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+		vma->vm_flags |= VM_RESERVED;
+#else
+		vma->vm_flags |= VM_DONTDUMP;
+		vma->vm_flags |= VM_DONTEXPAND;
+#endif
+	} else if (MALI_MEM_SWAP == mali_alloc->type) {
+		vma->vm_pgoff = mem_bkend->start_idx;
+	}
+
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	vma->vm_ops = &mali_kernel_vm_ops;
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	/* If it's a copy-on-write mapping, map to read only */
+	if (!(vma->vm_flags & VM_WRITE)) {
+		MALI_DEBUG_PRINT(4, ("mmap allocation with read only !\n"));
+		/* add VM_WRITE for do_page_fault will check this when a write fault */
+		vma->vm_flags |= VM_WRITE | VM_READ;
+		vma->vm_page_prot = PAGE_READONLY;
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE;
+		goto out;
+	}
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		ret = mali_mem_os_cpu_map(mem_bkend, vma);
+	} else if (mem_bkend->type == MALI_MEM_COW &&
+		   (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+		ret = mali_mem_cow_cpu_map(mem_bkend, vma);
+	} else if (mem_bkend->type == MALI_MEM_BLOCK) {
+		ret = mali_mem_block_cpu_map(mem_bkend, vma);
+	} else if ((mem_bkend->type == MALI_MEM_SWAP) || (mem_bkend->type == MALI_MEM_COW &&
+			(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) {
+		/*For swappable memory, CPU page table will be created by page fault handler. */
+		ret = 0;
+	} else if (mem_bkend->type == MALI_MEM_SECURE) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		ret = mali_mem_secure_cpu_map(mem_bkend, vma);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n"));
+		return -EFAULT;
+#endif
+	} else {
+		/* Not support yet*/
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of backend memory! \n"));
+		return -EFAULT;
+	}
+
+	if (ret != 0) {
+		MALI_DEBUG_PRINT(1, ("ret != 0\n"));
+		return -EFAULT;
+	}
+out:
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == mali_alloc->magic);
+
+	vma->vm_private_data = (void *)mali_alloc;
+	mali_alloc->cpu_mapping.vma = vma;
+
+	mali_allocation_ref(mali_alloc);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor)
+{
+	u32 size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic);
+
+	/* Map dma-buf into this session's page tables */
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start, size);
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size)
+{
+	u32 old_size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic);
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		new_size  += MALI_MMU_PAGE_SIZE;
+	}
+
+	if (new_size > old_size) {
+		MALI_DEBUG_ASSERT(new_size <= descriptor->mali_vma_node.vm_node.size);
+		return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start + old_size, new_size - old_size);
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags)
+{
+	if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	/* Umap and flush L2 */
+	mali_mmu_pagedir_unmap(session->page_directory, vaddr, size);
+	mali_executor_zap_all_active(session);
+}
+
+u32 _mali_ukk_report_memory_usage(void)
+{
+	u32 sum = 0;
+
+	if (MALI_TRUE == mali_memory_have_dedicated_memory()) {
+		sum += mali_mem_block_allocator_stat();
+	}
+
+	sum += mali_mem_os_stat();
+
+	return sum;
+}
+
+u32 _mali_ukk_report_total_memory_size(void)
+{
+	return mali_dedicated_mem_size + mali_shared_mem_size;
+}
+
+
+/**
+ * Per-session memory descriptor mapping table sizes
+ */
+#define MALI_MEM_DESCRIPTORS_INIT 64
+#define MALI_MEM_DESCRIPTORS_MAX 65536
+
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session_data)
+{
+	MALI_DEBUG_PRINT(5, ("Memory session begin\n"));
+
+	session_data->memory_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+				    _MALI_OSK_LOCK_ORDER_MEM_SESSION);
+
+	if (NULL == session_data->memory_lock) {
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	session_data->cow_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0);
+	if (NULL == session_data->cow_lock) {
+		_mali_osk_mutex_term(session_data->memory_lock);
+		_mali_osk_free(session_data);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_memory_manager_init(&session_data->allocation_mgr);
+
+	MALI_DEBUG_PRINT(5, ("MMU session begin: success\n"));
+	MALI_SUCCESS;
+}
+
+void mali_memory_session_end(struct mali_session_data *session)
+{
+	MALI_DEBUG_PRINT(3, ("MMU session end\n"));
+
+	if (NULL == session) {
+		MALI_DEBUG_PRINT(1, ("No session data found during session end\n"));
+		return;
+	}
+	/* free allocation */
+	mali_free_session_allocations(session);
+	/* do some check in unint*/
+	mali_memory_manager_uninit(&session->allocation_mgr);
+
+	/* Free the lock */
+	_mali_osk_mutex_term(session->memory_lock);
+	_mali_osk_mutex_term(session->cow_lock);
+	return;
+}
+
+_mali_osk_errcode_t mali_memory_initialize(void)
+{
+	_mali_osk_errcode_t err;
+
+	idr_init(&mali_backend_idr);
+	mutex_init(&mali_idr_mutex);
+
+	err = mali_mem_swap_init();
+	if (err != _MALI_OSK_ERR_OK) {
+		return err;
+	}
+	err = mali_mem_os_init();
+	if (_MALI_OSK_ERR_OK == err) {
+		err = mali_mem_defer_bind_manager_init();
+	}
+
+	return err;
+}
+
+void mali_memory_terminate(void)
+{
+	mali_mem_swap_term();
+	mali_mem_defer_bind_manager_destory();
+	mali_mem_os_term();
+	if (mali_memory_have_dedicated_memory()) {
+		mali_mem_block_allocator_destroy();
+	}
+}
+
+
+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type)
+{
+	mali_page_node *page_node = NULL;
+
+	page_node = kzalloc(sizeof(mali_page_node), GFP_KERNEL);
+	MALI_DEBUG_ASSERT(NULL != page_node);
+
+	if (page_node) {
+		page_node->type = type;
+		INIT_LIST_HEAD(&page_node->list);
+	}
+
+	return page_node;
+}
+
+void _mali_page_node_ref(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* add ref to this page */
+		get_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_add_ref(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		atomic_inc(&node->swap_it->ref_count);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+}
+
+void _mali_page_node_unref(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* unref to this page */
+		put_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_dec_ref(node);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+}
+
+
+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_OS == node->type);
+	node->page = page;
+}
+
+
+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_SWAP == node->type);
+	node->swap_it = item;
+}
+
+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_BLOCK == node->type);
+	node->blk_it = item;
+}
+
+
+int _mali_page_node_get_ref_count(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* get ref count of this page */
+		return page_count(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_get_ref_count(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return atomic_read(&node->swap_it->ref_count);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+	return -1;
+}
+
+
+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return page_private(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return _mali_blk_item_get_phy_addr(node->blk_it);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return node->swap_it->dma_addr;
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+	return 0;
+}
+
+
+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return page_to_pfn(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		/* get phy addr for BLOCK page*/
+		return _mali_blk_item_get_pfn(node->blk_it);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return page_to_pfn(node->swap_it->page);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n"));
+	}
+	return 0;
+}
+
+
diff --git a/utgard/r6p2/linux/mali_memory.h b/utgard/r6p2/linux/mali_memory.h
new file mode 100755
index 0000000..e5e4f66
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_H__
+#define __MALI_MEMORY_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+
+_mali_osk_errcode_t mali_memory_initialize(void);
+void mali_memory_terminate(void);
+
+/** @brief Allocate a page table page
+ *
+ * Allocate a page for use as a page directory or page table. The page is
+ * mapped into kernel space.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise an error code
+ * @param table_page GPU pointer to the allocated page
+ * @param mapping CPU pointer to the mapping of the allocated page
+ */
+MALI_STATIC_INLINE _mali_osk_errcode_t
+mali_mmu_get_table_page(mali_dma_addr *table_page, mali_io_address *mapping)
+{
+	return mali_mem_os_get_table_page(table_page, mapping);
+}
+
+/** @brief Release a page table page
+ *
+ * Release a page table page allocated through \a mali_mmu_get_table_page
+ *
+ * @param pa the GPU address of the page to release
+ */
+MALI_STATIC_INLINE void
+mali_mmu_release_table_page(mali_dma_addr phys, void *virt)
+{
+	mali_mem_os_release_table_page(phys, virt);
+}
+
+/** @brief mmap function
+ *
+ * mmap syscalls on the Mali device node will end up here.
+ *
+ * This function allocates Mali memory and maps it on CPU and Mali.
+ */
+int mali_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/** @brief Start a new memory session
+ *
+ * Called when a process opens the Mali device node.
+ *
+ * @param session Pointer to session to initialize
+ */
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session);
+
+/** @brief Close a memory session
+ *
+ * Called when a process closes the Mali device node.
+ *
+ * Memory allocated by the session will be freed
+ *
+ * @param session Pointer to the session to terminate
+ */
+void mali_memory_session_end(struct mali_session_data *session);
+
+/** @brief Prepare Mali page tables for mapping
+ *
+ * This function will prepare the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ */
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor);
+
+/** @brief Resize Mali page tables for mapping
+ *
+ * This function will Resize the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ * @param new_size The new size of descriptor
+ */
+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size);
+
+/** @brief Free Mali page tables for mapping
+ *
+ * This function will unmap pages from Mali memory and free the page tables
+ * that are now unused.
+ *
+ * The updated pages in the Mali L2 cache will be invalidated, and the MMU TLBs will be zapped if necessary.
+ *
+ * @param descriptor Pointer to the memory descriptor to unmap
+ */
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags);
+
+/** @brief Parse resource and prepare the OS memory allocator
+ *
+ * @param size Maximum size to allocate for Mali GPU.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size);
+
+/** @brief Parse resource and prepare the dedicated memory allocator
+ *
+ * @param start Physical start address of dedicated Mali GPU memory.
+ * @param size Size of dedicated Mali GPU memory.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+
+
+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type);
+
+void _mali_page_node_ref(struct mali_page_node *node);
+void _mali_page_node_unref(struct mali_page_node *node);
+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page);
+
+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item);
+
+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item);
+
+int _mali_page_node_get_ref_count(struct mali_page_node *node);
+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node);
+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node);
+
+#endif /* __MALI_MEMORY_H__ */
diff --git a/utgard/r6p2/linux/mali_memory_block_alloc.c b/utgard/r6p2/linux/mali_memory_block_alloc.c
new file mode 100755
index 0000000..af3dd4d
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_block_alloc.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_memory.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_osk.h"
+#include <linux/mutex.h>
+
+
+static mali_block_allocator *mali_mem_block_gobal_allocator = NULL;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item)
+{
+	return (item->phy_addr & ~(MALI_BLOCK_REF_MASK));
+}
+
+
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item)
+{
+	return (item->phy_addr / MALI_BLOCK_SIZE);
+}
+
+
+u32 mali_mem_block_get_ref_count(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	return (node->blk_it->phy_addr & MALI_BLOCK_REF_MASK);
+}
+
+
+/* Increase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+
+u32 mali_mem_block_add_ref(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) < MALI_BLOCK_MAX_REF_COUNT);
+	return (node->blk_it->phy_addr++ & MALI_BLOCK_REF_MASK);
+}
+
+/* Decase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+u32 mali_mem_block_dec_ref(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) > 0);
+	return (node->blk_it->phy_addr-- & MALI_BLOCK_REF_MASK);
+}
+
+
+static mali_block_allocator *mali_mem_block_allocator_create(u32 base_address, u32 size)
+{
+	mali_block_allocator *info;
+	u32 usable_size;
+	u32 num_blocks;
+	mali_page_node *m_node;
+	mali_block_item *mali_blk_items = NULL;
+	int i = 0;
+
+	usable_size = size & ~(MALI_BLOCK_SIZE - 1);
+	MALI_DEBUG_PRINT(3, ("Mali block allocator create for region starting at 0x%08X length 0x%08X\n", base_address, size));
+	MALI_DEBUG_PRINT(4, ("%d usable bytes\n", usable_size));
+	num_blocks = usable_size / MALI_BLOCK_SIZE;
+	MALI_DEBUG_PRINT(4, ("which becomes %d blocks\n", num_blocks));
+
+	if (usable_size == 0) {
+		MALI_DEBUG_PRINT(1, ("Memory block of size %d is unusable\n", size));
+		return NULL;
+	}
+
+	info = _mali_osk_calloc(1, sizeof(mali_block_allocator));
+	if (NULL != info) {
+		INIT_LIST_HEAD(&info->free);
+		spin_lock_init(&info->sp_lock);
+		info->total_num = num_blocks;
+		mali_blk_items = _mali_osk_calloc(1, sizeof(mali_block_item) * num_blocks);
+
+		if (mali_blk_items) {
+			info->items = mali_blk_items;
+			/* add blocks(4k size) to free list*/
+			for (i = 0 ; i < num_blocks ; i++) {
+				/* add block information*/
+				mali_blk_items[i].phy_addr = base_address + (i * MALI_BLOCK_SIZE);
+				/* add  to free list */
+				m_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK);
+				if (m_node == NULL)
+					goto fail;
+				_mali_page_node_add_block_item(m_node, &(mali_blk_items[i]));
+				list_add_tail(&m_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			}
+			return info;
+		}
+	}
+fail:
+	mali_mem_block_allocator_destroy();
+	return NULL;
+}
+
+void mali_mem_block_allocator_destroy(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(info);
+	MALI_DEBUG_PRINT(4, ("Memory block destroy !\n"));
+
+	if (NULL == info)
+		return;
+
+	list_for_each_entry_safe(m_page, m_tmp , &info->free, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		list_del(&m_page->list);
+		kfree(m_page);
+	}
+
+	_mali_osk_free(info->items);
+	_mali_osk_free(info);
+}
+
+u32 mali_mem_block_release(mali_mem_backend *mem_bkend)
+{
+	mali_mem_allocation *alloc = mem_bkend->mali_allocation;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_block_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	free_pages_nr = mali_mem_block_free(&mem_bkend->block_mem);
+	mutex_unlock(&mem_bkend->mutex);
+	return free_pages_nr;
+}
+
+
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem: Allocate size = 0x%x\n", size));
+	/*do some init */
+	INIT_LIST_HEAD(&block_mem->pfns);
+
+	spin_lock(&info->sp_lock);
+	/*check if have enough space*/
+	if (atomic_read(&info->free_num) > page_count) {
+		list_for_each_entry_safe(m_page, m_tmp , &info->free, list) {
+			if (page_count > 0) {
+				MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+				MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(m_page) == 0);
+				list_move(&m_page->list, &block_mem->pfns);
+				block_mem->count++;
+				atomic_dec(&info->free_num);
+				_mali_page_node_ref(m_page);
+			} else {
+				break;
+			}
+			page_count--;
+		}
+	} else {
+		/* can't allocate from BLOCK memory*/
+		spin_unlock(&info->sp_lock);
+		return -1;
+	}
+
+	spin_unlock(&info->sp_lock);
+	return 0;
+}
+
+u32 mali_mem_block_free(mali_mem_block_mem *block_mem)
+{
+	u32 free_pages_nr = 0;
+
+	free_pages_nr = mali_mem_block_free_list(&block_mem->pfns);
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem free : allocated size = 0x%x, free size = 0x%x\n", block_mem->count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+	block_mem->count = 0;
+	MALI_DEBUG_ASSERT(list_empty(&block_mem->pfns));
+
+	return free_pages_nr;
+}
+
+
+u32 mali_mem_block_free_list(struct list_head *list)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	u32 free_pages_nr = 0;
+
+	if (info) {
+		spin_lock(&info->sp_lock);
+		list_for_each_entry_safe(m_page, m_tmp , list, list) {
+			if (1 == _mali_page_node_get_ref_count(m_page)) {
+				free_pages_nr++;
+			}
+			mali_mem_block_free_node(m_page);
+		}
+		spin_unlock(&info->sp_lock);
+	}
+	return free_pages_nr;
+}
+
+/* free the node,*/
+void mali_mem_block_free_node(struct mali_page_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (1 == _mali_page_node_get_ref_count(node)) {
+			/*Move to free list*/
+			_mali_page_node_unref(node);
+			list_move_tail(&node->list, &info->free);
+			atomic_add(1, &info->free_num);
+		} else {
+			_mali_page_node_unref(node);
+			list_del(&node->list);
+			kfree(node);
+		}
+	}
+}
+
+/* unref the node, but not free it */
+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	mali_page_node *new_node;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (1 == _mali_page_node_get_ref_count(node)) {
+			/* allocate a  new node, Add to free list, keep the old node*/
+			_mali_page_node_unref(node);
+			new_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK);
+			if (new_node) {
+				memcpy(new_node, node, sizeof(mali_page_node));
+				list_add(&new_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			} else
+				return _MALI_OSK_ERR_FAULT;
+
+		} else {
+			_mali_page_node_unref(node);
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		phys = _mali_page_node_get_dma_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+		/* Verify that the "physical" address is 32-bit and
+		 * usable for Mali, when on a system with bus addresses
+		 * wider than 32-bit. */
+		MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+		mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	int ret;
+	mali_mem_block_mem *block_mem = &mem_bkend->block_mem;
+	unsigned long addr = vma->vm_start;
+	struct mali_page_node *m_page;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		ret = vm_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page));
+
+		if (unlikely(0 != ret)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size)
+{
+	mali_block_allocator *allocator;
+
+	/* Do the low level linux operation first */
+
+	/* Request ownership of the memory */
+	if (_MALI_OSK_ERR_OK != _mali_osk_mem_reqregion(start, size, "Dedicated Mali GPU memory")) {
+		MALI_DEBUG_PRINT(1, ("Failed to request memory region for frame buffer (0x%08X - 0x%08X)\n", start, start + size - 1));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create generic block allocator object to handle it */
+	allocator = mali_mem_block_allocator_create(start, size);
+
+	if (NULL == allocator) {
+		MALI_DEBUG_PRINT(1, ("Memory bank registration failed\n"));
+		_mali_osk_mem_unreqregion(start, size);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_mem_block_gobal_allocator = (mali_block_allocator *)allocator;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool mali_memory_have_dedicated_memory(void)
+{
+	return mali_mem_block_gobal_allocator ? MALI_TRUE : MALI_FALSE;
+}
+
+u32 mali_mem_block_allocator_stat(void)
+{
+	mali_block_allocator *allocator = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(allocator);
+
+
+	return (allocator->total_num - atomic_read(&allocator->free_num)) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/utgard/r6p2/linux/mali_memory_block_alloc.h b/utgard/r6p2/linux/mali_memory_block_alloc.h
new file mode 100755
index 0000000..3e11ef2
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_block_alloc.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2010, 2013, 2015-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BLOCK_ALLOCATOR_H__
+#define __MALI_BLOCK_ALLOCATOR_H__
+
+#include "mali_session.h"
+#include "mali_memory.h"
+#include <linux/spinlock.h>
+
+#include "mali_memory_types.h"
+
+#define MALI_BLOCK_SIZE (PAGE_SIZE)  /* 4 kB, manage BLOCK memory as page size */
+#define MALI_BLOCK_REF_MASK (0xFFF)
+#define MALI_BLOCK_MAX_REF_COUNT (0xFFF)
+
+
+
+typedef struct mali_block_allocator {
+	/*
+	* In free list, each node's ref_count is 0,
+	* ref_count added when allocated or referenced in COW
+	*/
+	mali_block_item *items; /* information for each block item*/
+	struct list_head free; /*free list of mali_memory_node*/
+	spinlock_t sp_lock; /*lock for reference count & free list opertion*/
+	u32 total_num; /* Number of total pages*/
+	atomic_t free_num; /*number of free pages*/
+} mali_block_allocator;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item);
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item);
+u32 mali_mem_block_get_ref_count(mali_page_node *node);
+u32 mali_mem_block_add_ref(mali_page_node *node);
+u32 mali_mem_block_dec_ref(mali_page_node *node);
+u32 mali_mem_block_release(mali_mem_backend *mem_bkend);
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size);
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+mali_bool mali_memory_have_dedicated_memory(void);
+u32 mali_mem_block_free(mali_mem_block_mem *block_mem);
+u32 mali_mem_block_free_list(struct list_head *list);
+void mali_mem_block_free_node(struct mali_page_node *node);
+void mali_mem_block_allocator_destroy(void);
+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node);
+u32 mali_mem_block_allocator_stat(void);
+
+#endif /* __MALI_BLOCK_ALLOCATOR_H__ */
diff --git a/utgard/r6p2/linux/mali_memory_cow.c b/utgard/r6p2/linux/mali_memory_cow.c
new file mode 100644
index 0000000..54c66b8
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_cow.c
@@ -0,0 +1,776 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#ifdef CONFIG_ARM
+#include <asm/outercache.h>
+#endif
+#include <asm/dma-mapping.h>
+
+#include "mali_memory.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_swap_alloc.h"
+
+/**
+* allocate pages for COW backend and flush cache
+*/
+static struct page *mali_mem_cow_alloc_page(void)
+
+{
+	mali_mem_os_mem os_mem;
+	struct mali_page_node *node;
+	struct page *new_page;
+
+	int ret = 0;
+	/* allocate pages from os mem */
+	ret = mali_mem_os_alloc_pages(&os_mem, _MALI_OSK_MALI_PAGE_SIZE);
+
+	if (ret) {
+		return NULL;
+	}
+
+	MALI_DEBUG_ASSERT(1 == os_mem.count);
+
+	node = _MALI_OSK_CONTAINER_OF(os_mem.pages.next, struct mali_page_node, list);
+	new_page = node->page;
+	node->page = NULL;
+	list_del(&node->list);
+	kfree(node);
+
+	return new_page;
+}
+
+
+static struct list_head *_mali_memory_cow_get_node_list(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size)
+{
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == target_bk->type || MALI_MEM_COW == target_bk->type ||
+			  MALI_MEM_BLOCK == target_bk->type || MALI_MEM_SWAP == target_bk->type);
+
+	if (MALI_MEM_OS == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->os_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->os_mem.count);
+		return &target_bk->os_mem.pages;
+	} else if (MALI_MEM_COW == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->cow_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->cow_mem.count);
+		return  &target_bk->cow_mem.pages;
+	} else if (MALI_MEM_BLOCK == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->block_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->block_mem.count);
+		return  &target_bk->block_mem.pfns;
+	} else if (MALI_MEM_SWAP == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->swap_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->swap_mem.count);
+		return  &target_bk->swap_mem.pages;
+	}
+
+	return NULL;
+}
+
+/**
+* Do COW for os memory - support do COW for memory from bank memory
+* The range_start/size can be zero, which means it will call cow_modify_range
+* latter.
+* This function allocate new pages for COW backend from os mem for a modified range
+* It will keep the page which not in the modified range and Add ref to it
+*
+* @target_bk - target allocation's backend(the allocation need to do COW)
+* @target_offset - the offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, 4K align)
+* @target_size - size of target allocation to do COW (for support memory bank)
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range
+*/
+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp, *page_node;
+	int target_page = 0;
+	struct page *new_page;
+	struct list_head *pages = NULL;
+
+	pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size);
+
+	if (NULL == pages) {
+		MALI_DEBUG_PRINT_ERROR(("No memory page  need to cow ! \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(0 == cow->count);
+
+	INIT_LIST_HEAD(&cow->pages);
+	mutex_lock(&target_bk->mutex);
+	list_for_each_entry_safe(m_page, m_tmp, pages, list) {
+		/* add page from (target_offset,target_offset+size) to cow backend */
+		if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) {
+
+			/* allocate a new page node, alway use OS memory for COW */
+			page_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+
+			if (NULL == page_node) {
+				mutex_unlock(&target_bk->mutex);
+				goto error;
+			}
+
+			INIT_LIST_HEAD(&page_node->list);
+
+			/* check if in the modified range*/
+			if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+			    (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+				/* need to allocate a new page */
+				/* To simplify the case, All COW memory is allocated from os memory ?*/
+				new_page = mali_mem_cow_alloc_page();
+
+				if (NULL == new_page) {
+					kfree(page_node);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				_mali_page_node_add_page(page_node, new_page);
+			} else {
+				/*Add Block memory case*/
+				if (m_page->type != MALI_PAGE_NODE_BLOCK) {
+					_mali_page_node_add_page(page_node, m_page->page);
+				} else {
+					page_node->type = MALI_PAGE_NODE_BLOCK;
+					_mali_page_node_add_block_item(page_node, m_page->blk_it);
+				}
+
+				/* add ref to this page */
+				_mali_page_node_ref(m_page);
+			}
+
+			/* add it to COW backend page list */
+			list_add_tail(&page_node->list, &cow->pages);
+			cow->count++;
+		}
+		target_page++;
+	}
+	mutex_unlock(&target_bk->mutex);
+	return _MALI_OSK_ERR_OK;
+error:
+	mali_mem_cow_release(backend, MALI_FALSE);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_memory_cow_swap_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp, *page_node;
+	int target_page = 0;
+	struct mali_swap_item *swap_item;
+	struct list_head *pages = NULL;
+
+	pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size);
+	if (NULL == pages) {
+		MALI_DEBUG_PRINT_ERROR(("No swap memory page need to cow ! \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(0 == cow->count);
+
+	INIT_LIST_HEAD(&cow->pages);
+	mutex_lock(&target_bk->mutex);
+
+	backend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN;
+
+	list_for_each_entry_safe(m_page, m_tmp, pages, list) {
+		/* add page from (target_offset,target_offset+size) to cow backend */
+		if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) {
+
+			/* allocate a new page node, use swap memory for COW memory swap cowed flag. */
+			page_node = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP);
+
+			if (NULL == page_node) {
+				mutex_unlock(&target_bk->mutex);
+				goto error;
+			}
+
+			/* check if in the modified range*/
+			if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+			    (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+				/* need to allocate a new page */
+				/* To simplify the case, All COW memory is allocated from os memory ?*/
+				swap_item = mali_mem_swap_alloc_swap_item();
+
+				if (NULL == swap_item) {
+					kfree(page_node);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				swap_item->idx = mali_mem_swap_idx_alloc();
+
+				if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) {
+					MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW.\n"));
+					kfree(page_node);
+					kfree(swap_item);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				_mali_page_node_add_swap_item(page_node, swap_item);
+			} else {
+				_mali_page_node_add_swap_item(page_node, m_page->swap_it);
+
+				/* add ref to this page */
+				_mali_page_node_ref(m_page);
+			}
+
+			list_add_tail(&page_node->list, &cow->pages);
+			cow->count++;
+		}
+		target_page++;
+	}
+	mutex_unlock(&target_bk->mutex);
+
+	return _MALI_OSK_ERR_OK;
+error:
+	mali_mem_swap_release(backend, MALI_FALSE);
+	return _MALI_OSK_ERR_FAULT;
+
+}
+
+
+_mali_osk_errcode_t _mali_mem_put_page_node(mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return mali_mem_os_put_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_unref_node(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return _mali_mem_swap_put_page_node(node);
+	} else
+		MALI_DEBUG_ASSERT(0);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/**
+* Modify a range of a exist COW backend
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range(in byte)
+*/
+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_allocation *alloc = NULL;
+	struct mali_session_data *session;
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp;
+	LIST_HEAD(pages);
+	struct page *new_page;
+	u32 count = 0;
+	s32 change_pages_nr = 0;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+
+	if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	alloc = backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type);
+	MALI_DEBUG_ASSERT(((range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE) <= cow->count);
+
+	mutex_lock(&backend->mutex);
+
+	/* free pages*/
+	list_for_each_entry_safe(m_page, m_tmp, &cow->pages, list) {
+
+		/* check if in the modified range*/
+		if ((count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+			if (MALI_PAGE_NODE_SWAP != m_page->type) {
+				new_page = mali_mem_cow_alloc_page();
+
+				if (NULL == new_page) {
+					goto error;
+				}
+				if (1 != _mali_page_node_get_ref_count(m_page))
+					change_pages_nr++;
+				/* unref old page*/
+				_mali_osk_mutex_wait(session->cow_lock);
+				if (_mali_mem_put_page_node(m_page)) {
+					__free_page(new_page);
+					_mali_osk_mutex_signal(session->cow_lock);
+					goto error;
+				}
+				_mali_osk_mutex_signal(session->cow_lock);
+				/* add new page*/
+				/* always use OS for COW*/
+				m_page->type = MALI_PAGE_NODE_OS;
+				_mali_page_node_add_page(m_page, new_page);
+			} else {
+				struct mali_swap_item *swap_item;
+
+				swap_item = mali_mem_swap_alloc_swap_item();
+
+				if (NULL == swap_item) {
+					goto error;
+				}
+
+				swap_item->idx = mali_mem_swap_idx_alloc();
+
+				if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) {
+					MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW modify range.\n"));
+					kfree(swap_item);
+					goto error;
+				}
+
+				if (1 != _mali_page_node_get_ref_count(m_page)) {
+					change_pages_nr++;
+				}
+
+				if (_mali_mem_put_page_node(m_page)) {
+					mali_mem_swap_free_swap_item(swap_item);
+					goto error;
+				}
+
+				_mali_page_node_add_swap_item(m_page, swap_item);
+			}
+		}
+		count++;
+	}
+	cow->change_pages_nr  = change_pages_nr;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == alloc->type);
+
+	/* ZAP cpu mapping(modified range), and do cpu mapping here if need */
+	if (NULL != alloc->cpu_mapping.vma) {
+		MALI_DEBUG_ASSERT(0 != alloc->backend_handle);
+		MALI_DEBUG_ASSERT(NULL != alloc->cpu_mapping.vma);
+		MALI_DEBUG_ASSERT(alloc->cpu_mapping.vma->vm_end - alloc->cpu_mapping.vma->vm_start >= range_size);
+
+		if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+			zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size);
+
+			ret = mali_mem_cow_cpu_map_pages_locked(backend, alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start  + range_start, range_size / _MALI_OSK_MALI_PAGE_SIZE);
+
+			if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+				MALI_DEBUG_PRINT(2, ("mali_memory_cow_modify_range: cpu mapping failed !\n"));
+				ret =  _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			/* used to trigger page fault for swappable cowed memory. */
+			alloc->cpu_mapping.vma->vm_flags |= VM_PFNMAP;
+			alloc->cpu_mapping.vma->vm_flags |= VM_MIXEDMAP;
+
+			zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size);
+			/* delete this flag to let swappble is ummapped regard to stauct page not page frame. */
+			alloc->cpu_mapping.vma->vm_flags &= ~VM_PFNMAP;
+			alloc->cpu_mapping.vma->vm_flags &= ~VM_MIXEDMAP;
+		}
+	}
+
+error:
+	mutex_unlock(&backend->mutex);
+	return ret;
+
+}
+
+
+/**
+* Allocate pages for COW backend
+* @alloc  -allocation for COW allocation
+* @target_bk - target allocation's backend(the allocation need to do COW)
+* @target_offset - the offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, 4K align)
+* @target_size - size of target allocation to do COW (for support memory bank)(in byte)
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range(in byte)
+*/
+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk,
+				       u32 target_offset,
+				       u32 target_size,
+				       mali_mem_backend *backend,
+				       u32 range_start,
+				       u32 range_size)
+{
+	struct mali_session_data *session = backend->mali_allocation->session;
+
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size & offset must be a multiple of the system page size */
+	if (target_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (target_offset % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check backend type */
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type);
+
+	switch (target_bk->type) {
+	case MALI_MEM_OS:
+	case MALI_MEM_BLOCK:
+		return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		break;
+	case MALI_MEM_COW:
+		if (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) {
+			return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		} else {
+			return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		}
+		break;
+	case MALI_MEM_SWAP:
+		return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		break;
+	case MALI_MEM_EXTERNAL:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("External physical memory not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	case MALI_MEM_DMA_BUF:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("DMA buffer not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	case MALI_MEM_UMP:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("UMP buffer not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	default:
+		/*Not support yet*/
+		MALI_DEBUG_PRINT_ERROR(("Invalid memory type not supported ! \n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Map COW backend memory to mali
+* Support OS/BLOCK for mali_page_node
+*/
+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size)
+{
+	mali_mem_allocation *cow_alloc;
+	struct mali_page_node *m_page;
+	struct mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	u32 virt, start;
+
+	cow_alloc = mem_bkend->mali_allocation;
+	virt = cow_alloc->mali_vma_node.vm_node.start;
+	start = virt;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT_POINTER(cow_alloc);
+
+	session = cow_alloc->session;
+	pagedir = session->page_directory;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+	list_for_each_entry(m_page, &mem_bkend->cow_mem.pages, list) {
+		if ((virt - start >= range_start) && (virt - start < range_start + range_size)) {
+			dma_addr_t phys = _mali_page_node_get_dma_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+			mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys,
+						MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+	return 0;
+}
+
+/**
+* Map COW backend to cpu
+* support OS/BLOCK memory
+*/
+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+
+	list_for_each_entry(m_page, &cow->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		ret = vm_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page));
+
+		if (unlikely(0 != ret)) {
+			return ret;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+/**
+* Map some pages(COW backend) to CPU vma@vaddr
+*@ mem_bkend - COW backend
+*@ vma
+*@ vaddr -start CPU vaddr mapped to
+*@ num - max number of pages to map to CPU vaddr
+*/
+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend,
+		struct vm_area_struct *vma,
+		unsigned long vaddr,
+		int num)
+{
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	int offset;
+	int count ;
+	unsigned long vstart = vma->vm_start;
+	count = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+	MALI_DEBUG_ASSERT(0 == vaddr % _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE);
+	offset = (vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE;
+
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if ((count >= offset) && (count < offset + num)) {
+			ret = vm_insert_pfn(vma, vaddr, _mali_page_node_get_pfn(m_page));
+
+			if (unlikely(0 != ret)) {
+				if (count == offset) {
+					return _MALI_OSK_ERR_FAULT;
+				} else {
+					/* ret is EBUSY when page isn't in modify range, but now it's OK*/
+					return _MALI_OSK_ERR_OK;
+				}
+			}
+			vaddr += _MALI_OSK_MALI_PAGE_SIZE;
+		}
+		count++;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+* Release COW backend memory
+* free it directly(put_page--unref page), not put into pool
+*/
+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped)
+{
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags)) {
+		/* Unmap the memory from the mali virtual address space. */
+		if (MALI_TRUE == is_mali_mapped)
+			mali_mem_os_mali_unmap(alloc);
+		/* free cow backend list*/
+		_mali_osk_mutex_wait(session->cow_lock);
+		free_pages_nr = mali_mem_os_free(&mem_bkend->cow_mem.pages, mem_bkend->cow_mem.count, MALI_TRUE);
+		_mali_osk_mutex_signal(session->cow_lock);
+
+		free_pages_nr += mali_mem_block_free_list(&mem_bkend->cow_mem.pages);
+
+		MALI_DEBUG_ASSERT(list_empty(&mem_bkend->cow_mem.pages));
+	} else {
+		free_pages_nr = mali_mem_swap_release(mem_bkend, is_mali_mapped);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("COW Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->cow_mem.count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+
+	mem_bkend->cow_mem.count = 0;
+	return free_pages_nr;
+}
+
+
+/* Dst node could os node or swap node. */
+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node)
+{
+	void *dst, *src;
+	struct page *dst_page;
+	dma_addr_t dma_addr;
+
+	MALI_DEBUG_ASSERT(src_node != NULL);
+	MALI_DEBUG_ASSERT(dst_node != NULL);
+	MALI_DEBUG_ASSERT(dst_node->type == MALI_PAGE_NODE_OS
+			  || dst_node->type == MALI_PAGE_NODE_SWAP);
+
+	if (dst_node->type == MALI_PAGE_NODE_OS) {
+		dst_page = dst_node->page;
+	} else {
+		dst_page = dst_node->swap_it->page;
+	}
+
+	dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(dst_node),
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* map it , and copy the content*/
+	dst = kmap_atomic(dst_page);
+
+	if (src_node->type == MALI_PAGE_NODE_OS ||
+	    src_node->type == MALI_PAGE_NODE_SWAP) {
+		struct page *src_page;
+
+		if (src_node->type == MALI_PAGE_NODE_OS) {
+			src_page = src_node->page;
+		} else {
+			src_page = src_node->swap_it->page;
+		}
+
+		/* Clear and invaliate cache */
+		/* In ARM architecture, speculative read may pull stale data into L1 cache
+		 * for kernel linear mapping page table. DMA_BIDIRECTIONAL could
+		 * invalidate the L1 cache so that following read get the latest data
+		*/
+		dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(src_node),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		src = kmap_atomic(src_page);
+		memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE);
+		kunmap_atomic(src);
+		dma_addr = dma_map_page(&mali_platform_device->dev, src_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		if (src_node->type == MALI_PAGE_NODE_SWAP) {
+			src_node->swap_it->dma_addr = dma_addr;
+		}
+	} else if (src_node->type == MALI_PAGE_NODE_BLOCK) {
+		/*
+		* use ioremap to map src for BLOCK memory
+		*/
+		src = ioremap_nocache(_mali_page_node_get_dma_addr(src_node), _MALI_OSK_MALI_PAGE_SIZE);
+		memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE);
+		iounmap(src);
+	}
+	kunmap_atomic(dst);
+	dma_addr = dma_map_page(&mali_platform_device->dev, dst_page,
+				0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	if (dst_node->type == MALI_PAGE_NODE_SWAP) {
+		dst_node->swap_it->dma_addr = dma_addr;
+	}
+}
+
+
+/*
+* allocate page on demand when CPU access it,
+* THis used in page fault handler
+*/
+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page)
+{
+	struct page *new_page = NULL;
+	struct mali_page_node *new_node = NULL;
+	int i = 0;
+	struct mali_page_node *m_page, *found_node = NULL;
+	struct  mali_session_data *session = NULL;
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT(offset_page < mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_PRINT(4, ("mali_mem_cow_allocate_on_demand !, offset_page =0x%x\n", offset_page));
+
+	/* allocate new page here */
+	new_page = mali_mem_cow_alloc_page();
+	if (!new_page)
+		return _MALI_OSK_ERR_NOMEM;
+
+	new_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+	if (!new_node) {
+		__free_page(new_page);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	/* find the page in backend*/
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if (i == offset_page) {
+			found_node = m_page;
+			break;
+		}
+		i++;
+	}
+	MALI_DEBUG_ASSERT(found_node);
+	if (NULL == found_node) {
+		__free_page(new_page);
+		kfree(new_node);
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	_mali_page_node_add_page(new_node, new_page);
+
+	/* Copy the src page's content to new page */
+	_mali_mem_cow_copy_page(found_node, new_node);
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend->mali_allocation);
+	session = mem_bkend->mali_allocation->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	if (1 != _mali_page_node_get_ref_count(found_node)) {
+		atomic_add(1, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		mem_bkend->cow_mem.change_pages_nr++;
+	}
+
+	_mali_osk_mutex_wait(session->cow_lock);
+	if (_mali_mem_put_page_node(found_node)) {
+		__free_page(new_page);
+		kfree(new_node);
+		_mali_osk_mutex_signal(session->cow_lock);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	_mali_osk_mutex_signal(session->cow_lock);
+
+	list_replace(&found_node->list, &new_node->list);
+
+	kfree(found_node);
+
+	/* map to GPU side*/
+	_mali_osk_mutex_wait(session->memory_lock);
+	mali_mem_cow_mali_map(mem_bkend, offset_page * _MALI_OSK_MALI_PAGE_SIZE, _MALI_OSK_MALI_PAGE_SIZE);
+	_mali_osk_mutex_signal(session->memory_lock);
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p2/linux/mali_memory_cow.h b/utgard/r6p2/linux/mali_memory_cow.h
new file mode 100644
index 0000000..9b9e834
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_cow.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_COW_H__
+#define __MALI_MEMORY_COW_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include "mali_memory_types.h"
+
+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend,
+		struct vm_area_struct *vma,
+		unsigned long vaddr,
+		int num);
+
+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk,
+				       u32 target_offset,
+				       u32 target_size,
+				       mali_mem_backend *backend,
+				       u32 range_start,
+				       u32 range_size);
+
+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size);
+
+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size);
+
+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node);
+
+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size);
+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped);
+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page);
+#endif
+
diff --git a/utgard/r6p2/linux/mali_memory_defer_bind.c b/utgard/r6p2/linux/mali_memory_defer_bind.c
new file mode 100644
index 0000000..a13eea0
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_defer_bind.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#ifdef CONFIG_ARM
+#include <asm/outercache.h>
+#endif
+#include <asm/dma-mapping.h>
+
+#include "mali_memory.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory_defer_bind.h"
+#include "mali_executor.h"
+#include "mali_osk.h"
+#include "mali_scheduler.h"
+#include "mali_gp_job.h"
+
+mali_defer_bind_manager *mali_dmem_man = NULL;
+
+static u32 mali_dmem_get_gp_varying_size(struct mali_gp_job *gp_job)
+{
+	return gp_job->required_varying_memsize / _MALI_OSK_MALI_PAGE_SIZE;
+}
+
+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void)
+{
+	mali_dmem_man = _mali_osk_calloc(1, sizeof(struct mali_defer_bind_manager));
+	if (!mali_dmem_man)
+		return _MALI_OSK_ERR_NOMEM;
+
+	atomic_set(&mali_dmem_man->num_used_pages, 0);
+	atomic_set(&mali_dmem_man->num_dmem, 0);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_mem_defer_bind_manager_destory(void)
+{
+	if (mali_dmem_man) {
+		MALI_DEBUG_ASSERT(0 == atomic_read(&mali_dmem_man->num_dmem));
+		kfree(mali_dmem_man);
+	}
+	mali_dmem_man = NULL;
+}
+
+
+/*allocate pages from OS memory*/
+_mali_osk_errcode_t mali_mem_defer_alloc_mem(u32 require, struct mali_session_data *session, mali_defer_mem_block *dblock)
+{
+	int retval = 0;
+	u32 num_pages = require;
+	mali_mem_os_mem os_mem;
+
+	retval = mali_mem_os_alloc_pages(&os_mem, num_pages * _MALI_OSK_MALI_PAGE_SIZE);
+
+	/* add to free pages list */
+	if (0 == retval) {
+		MALI_DEBUG_PRINT(4, ("mali_mem_defer_alloc_mem ,,*** pages allocate = 0x%x \n", num_pages));
+		list_splice(&os_mem.pages, &dblock->free_pages);
+		atomic_add(os_mem.count, &dblock->num_free_pages);
+		atomic_add(os_mem.count, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		return _MALI_OSK_ERR_OK;
+	} else
+		return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock)
+{
+	u32 require_page;
+
+	if (!next_gp_job)
+		return _MALI_OSK_ERR_FAULT;
+
+	require_page = mali_dmem_get_gp_varying_size(next_gp_job);
+
+	MALI_DEBUG_PRINT(4, ("mali_mem_defer_prepare_mem_work, require alloc page 0x%x\n",
+			     require_page));
+	/* allocate more pages from OS */
+	if (_MALI_OSK_ERR_OK != mali_mem_defer_alloc_mem(require_page, next_gp_job->session, dblock)) {
+		MALI_DEBUG_PRINT(1, ("ERROR##mali_mem_defer_prepare_mem_work, allocate page failed!!"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	next_gp_job->bind_flag = MALI_DEFER_BIND_MEMORY_PREPARED;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* do preparetion for allocation before defer bind */
+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list, u32 *required_varying_memsize)
+{
+	mali_mem_backend *mem_bkend = NULL;
+	struct mali_backend_bind_list *bk_list = _mali_osk_calloc(1, sizeof(struct mali_backend_bind_list));
+	if (NULL == bk_list)
+		return _MALI_OSK_ERR_FAULT;
+
+	INIT_LIST_HEAD(&bk_list->node);
+	/* Get backend memory */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in defer bind!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		_mali_osk_free(bk_list);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+
+	/* If the mem backend has already been bound, no need to bind again.*/
+	if (mem_bkend->os_mem.count > 0) {
+		_mali_osk_free(bk_list);
+		return _MALI_OSK_ERR_OK;
+	}
+
+	MALI_DEBUG_PRINT(4, ("bind_allocation_prepare:: allocation =%x vaddr=0x%x!\n", alloc, alloc->mali_vma_node.vm_node.start));
+
+	INIT_LIST_HEAD(&mem_bkend->os_mem.pages);
+
+	bk_list->bkend = mem_bkend;
+	bk_list->vaddr = alloc->mali_vma_node.vm_node.start;
+	bk_list->session = alloc->session;
+	bk_list->page_num = mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE;
+	*required_varying_memsize +=  mem_bkend->size;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+
+	/* add to job to do list */
+	list_add(&bk_list->node, list);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+
+/* bind phyiscal memory to allocation
+This function will be called in IRQ handler*/
+static _mali_osk_errcode_t mali_mem_defer_bind_allocation(struct mali_backend_bind_list *bk_node,
+		struct list_head *pages)
+{
+	struct mali_session_data *session = bk_node->session;
+	mali_mem_backend *mem_bkend = bk_node->bkend;
+	MALI_DEBUG_PRINT(4, ("mali_mem_defer_bind_allocation, bind bkend = %x page num=0x%x vaddr=%x session=%x\n", mem_bkend, bk_node->page_num, bk_node->vaddr, session));
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+	list_splice(pages, &mem_bkend->os_mem.pages);
+	mem_bkend->os_mem.count = bk_node->page_num;
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		mali_mem_os_mali_map(&mem_bkend->os_mem, session, bk_node->vaddr, 0,
+				     mem_bkend->os_mem.count, MALI_MMU_FLAGS_DEFAULT);
+	}
+	smp_wmb();
+	bk_node->flag = MALI_DEFER_BIND_MEMORY_BINDED;
+	mem_bkend->flags &= ~MALI_MEM_BACKEND_FLAG_NOT_BINDED;
+	mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_BINDED;
+	return _MALI_OSK_ERR_OK;
+}
+
+
+static struct list_head *mali_mem_defer_get_free_page_list(u32 count, struct list_head *pages, mali_defer_mem_block *dblock)
+{
+	int i = 0;
+	struct mali_page_node *m_page, *m_tmp;
+
+	if (atomic_read(&dblock->num_free_pages) < count) {
+		return NULL;
+	} else {
+		list_for_each_entry_safe(m_page, m_tmp, &dblock->free_pages, list) {
+			if (i < count) {
+				list_move_tail(&m_page->list, pages);
+			} else {
+				break;
+			}
+			i++;
+		}
+		MALI_DEBUG_ASSERT(i == count);
+		atomic_sub(count, &dblock->num_free_pages);
+		return pages;
+	}
+}
+
+
+/* called in job start IOCTL to bind physical memory for each allocations
+@ bk_list backend list to do defer bind
+@ pages page list to do this bind
+@ count number of pages
+*/
+_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp,
+					struct mali_defer_mem_block *dmem_block)
+{
+	struct mali_defer_mem *dmem = NULL;
+	struct mali_backend_bind_list *bkn, *bkn_tmp;
+	LIST_HEAD(pages);
+
+	if (gp->required_varying_memsize != (atomic_read(&dmem_block->num_free_pages) * _MALI_OSK_MALI_PAGE_SIZE)) {
+		MALI_DEBUG_PRINT_ERROR(("#BIND:  The memsize of varying buffer not match to the pagesize of the dmem_block!!## \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_PRINT(4, ("#BIND: GP job=%x## \n", gp));
+	dmem = (mali_defer_mem *)_mali_osk_calloc(1, sizeof(struct mali_defer_mem));
+	if (dmem) {
+		INIT_LIST_HEAD(&dmem->node);
+		gp->dmem = dmem;
+	} else {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	atomic_add(1, &mali_dmem_man->num_dmem);
+	/* for each bk_list backend, do bind */
+	list_for_each_entry_safe(bkn, bkn_tmp , &gp->vary_todo, node) {
+		INIT_LIST_HEAD(&pages);
+		if (likely(mali_mem_defer_get_free_page_list(bkn->page_num, &pages, dmem_block))) {
+			list_del(&bkn->node);
+			mali_mem_defer_bind_allocation(bkn, &pages);
+			_mali_osk_free(bkn);
+		} else {
+			/* not enough memory will not happen */
+			MALI_DEBUG_PRINT_ERROR(("#BIND: NOT enough memory when binded !!## \n"));
+			_mali_osk_free(gp->dmem);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+	}
+
+	if (!list_empty(&gp->vary_todo)) {
+		MALI_DEBUG_PRINT_ERROR(("#BIND:  The deferbind backend list isn't empty !!## \n"));
+		_mali_osk_free(gp->dmem);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	dmem->flag = MALI_DEFER_BIND_MEMORY_BINDED;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_defer_dmem_free(struct mali_gp_job *gp)
+{
+	if (gp->dmem) {
+		atomic_dec(&mali_dmem_man->num_dmem);
+		_mali_osk_free(gp->dmem);
+	}
+}
+
diff --git a/utgard/r6p2/linux/mali_memory_defer_bind.h b/utgard/r6p2/linux/mali_memory_defer_bind.h
new file mode 100644
index 0000000..4cf6f16
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_defer_bind.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_MEMORY_DEFER_BIND_H_
+#define __MALI_MEMORY_DEFER_BIND_H_
+
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_gp_job;
+
+typedef struct mali_defer_mem {
+	struct list_head node;   /*dlist node in bind manager */
+	u32 flag;
+} mali_defer_mem;
+
+
+typedef struct mali_defer_mem_block {
+	struct list_head free_pages; /* page pool */
+	atomic_t num_free_pages;
+} mali_defer_mem_block;
+
+/* varying memory list need to bind */
+typedef struct mali_backend_bind_list {
+	struct list_head node;
+	struct mali_mem_backend *bkend;
+	u32 vaddr;
+	u32 page_num;
+	struct mali_session_data *session;
+	u32 flag;
+} mali_backend_bind_lists;
+
+
+typedef struct mali_defer_bind_manager {
+	atomic_t num_used_pages;
+	atomic_t num_dmem;
+} mali_defer_bind_manager;
+
+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void);
+void mali_mem_defer_bind_manager_destory(void);
+_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp, struct mali_defer_mem_block *dmem_block);
+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list,  u32 *required_varying_memsize);
+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock);
+void mali_mem_defer_dmem_free(struct mali_gp_job *gp);
+
+#endif
diff --git a/utgard/r6p2/linux/mali_memory_dma_buf.c b/utgard/r6p2/linux/mali_memory_dma_buf.c
new file mode 100755
index 0000000..f0d1e07
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_dma_buf.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/fs.h>      /* file system operations */
+#include <asm/uaccess.h>        /* user space access */
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+#include <linux/rbtree.h>
+#include <linux/platform_device.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_virtual.h"
+#include "mali_pp_job.h"
+
+/*
+ * Map DMA buf attachment \a mem into \a session at virtual address \a virt.
+ */
+static int mali_dma_buf_map(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_dma_buf_attachment *mem;
+	struct  mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	struct scatterlist *sg;
+	u32 virt, flags;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(mem->session == session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	flags = alloc->flags;
+
+	mali_session_memory_lock(session);
+	mem->map_ref++;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: map attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (1 == mem->map_ref) {
+
+		/* First reference taken, so we need to map the dma buf */
+		MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+		mem->sgt = dma_buf_map_attachment(mem->attachment, DMA_BIDIRECTIONAL);
+		if (IS_ERR_OR_NULL(mem->sgt)) {
+			MALI_DEBUG_PRINT_ERROR(("Failed to map dma-buf attachment\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -EFAULT;
+		}
+
+		err = mali_mem_mali_map_prepare(alloc);
+		if (_MALI_OSK_ERR_OK != err) {
+			MALI_DEBUG_PRINT(1, ("Mapping of DMA memory failed\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -ENOMEM;
+		}
+
+		pagedir = mali_session_get_page_directory(session);
+		MALI_DEBUG_ASSERT_POINTER(pagedir);
+
+		for_each_sg(mem->sgt->sgl, sg, mem->sgt->nents, i) {
+			u32 size = sg_dma_len(sg);
+			dma_addr_t phys = sg_dma_address(sg);
+
+			/* sg must be page aligned. */
+			MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE);
+			MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF));
+
+			mali_mmu_pagedir_update(pagedir, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+			virt += size;
+		}
+
+		if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+			u32 guard_phys;
+			MALI_DEBUG_PRINT(7, ("Mapping in extra guard page\n"));
+
+			guard_phys = sg_dma_address(mem->sgt->sgl);
+			mali_mmu_pagedir_update(pagedir, virt, guard_phys, MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+
+		mem->is_mapped = MALI_TRUE;
+		mali_session_memory_unlock(session);
+		/* Wake up any thread waiting for buffer to become mapped */
+		wake_up_all(&mem->wait_queue);
+	} else {
+		MALI_DEBUG_ASSERT(mem->is_mapped);
+		mali_session_memory_unlock(session);
+	}
+
+	return 0;
+}
+
+static void mali_dma_buf_unmap(mali_mem_allocation *alloc, struct mali_dma_buf_attachment *mem)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+
+	mali_session_memory_lock(alloc->session);
+	mem->map_ref--;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: unmap attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (0 == mem->map_ref) {
+		dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL);
+		if (MALI_TRUE == mem->is_mapped) {
+			mali_mem_mali_map_free(alloc->session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+					       alloc->flags);
+		}
+		mem->is_mapped = MALI_FALSE;
+	}
+	mali_session_memory_unlock(alloc->session);
+	/* Wake up any thread waiting for buffer to become unmapped */
+	wake_up_all(&mem->wait_queue);
+}
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	_mali_osk_errcode_t err;
+	int i;
+	int ret = 0;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+		if (MALI_MEM_DMA_BUF != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+
+		err = mali_dma_buf_map(mem_bkend);
+		if (0 != err) {
+			MALI_DEBUG_PRINT_ERROR(("Mali DMA-buf: Failed to map dma-buf for mali address %x\n", mali_addr));
+			ret = -EFAULT;
+			continue;
+		}
+	}
+	return ret;
+}
+
+void mali_dma_buf_unmap_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	int i;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+		if (MALI_MEM_DMA_BUF != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+		mali_dma_buf_unmap(mem_bkend->mali_allocation, mem);
+	}
+}
+#endif /* !CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH */
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *user_arg)
+{
+	_mali_uk_dma_buf_get_size_s args;
+	int fd;
+	struct dma_buf *buf;
+
+	/* get call arguments from user space. copy_from_user returns how many bytes which where NOT copied */
+	if (0 != copy_from_user(&args, (void __user *)user_arg, sizeof(_mali_uk_dma_buf_get_size_s))) {
+		return -EFAULT;
+	}
+
+	/* Do DMA-BUF stuff */
+	fd = args.mem_fd;
+
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma-buf from fd: %d\n", fd));
+		return PTR_RET(buf);
+	}
+
+	if (0 != put_user(buf->size, &user_arg->size)) {
+		dma_buf_put(buf);
+		return -EFAULT;
+	}
+
+	dma_buf_put(buf);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags)
+{
+	struct dma_buf *buf;
+	struct mali_dma_buf_attachment *dma_mem;
+	struct  mali_session_data *session = alloc->session;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	/* get dma buffer */
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Currently, mapping of the full buffer are supported. */
+	if (alloc->psize != buf->size) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem = _mali_osk_calloc(1, sizeof(struct mali_dma_buf_attachment));
+	if (NULL == dma_mem) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem->buf = buf;
+	dma_mem->session = session;
+	dma_mem->map_ref = 0;
+	init_waitqueue_head(&dma_mem->wait_queue);
+
+	dma_mem->attachment = dma_buf_attach(dma_mem->buf, &mali_platform_device->dev);
+	if (NULL == dma_mem->attachment) {
+		goto failed_dma_attach;
+	}
+
+	mem_backend->dma_buf.attachment = dma_mem;
+
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	/* Map memory into session's Mali virtual address space. */
+	if (0 != mali_dma_buf_map(mem_backend)) {
+		goto Failed_dma_map;
+	}
+#endif
+
+	return _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+Failed_dma_map:
+	mali_dma_buf_unmap(alloc, dma_mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(dma_mem->wait_queue, !dma_mem->is_mapped);
+	MALI_DEBUG_ASSERT(!dma_mem->is_mapped);
+	dma_buf_detach(dma_mem->buf, dma_mem->attachment);
+failed_dma_attach:
+	_mali_osk_free(dma_mem);
+failed_alloc_mem:
+	dma_buf_put(buf);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend)
+{
+	struct mali_dma_buf_attachment *mem;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_DMA_BUF == mem_backend->type);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_PRINT(3, ("Mali DMA-buf: release attachment %p\n", mem));
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	MALI_DEBUG_ASSERT_POINTER(mem_backend->mali_allocation);
+	/* We mapped implicitly on attach, so we need to unmap on release */
+	mali_dma_buf_unmap(mem_backend->mali_allocation, mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(mem->wait_queue, !mem->is_mapped);
+	MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+	dma_buf_detach(mem->buf, mem->attachment);
+	dma_buf_put(mem->buf);
+
+	_mali_osk_free(mem);
+}
diff --git a/utgard/r6p2/linux/mali_memory_dma_buf.h b/utgard/r6p2/linux/mali_memory_dma_buf.h
new file mode 100755
index 0000000..37b6f6b
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_dma_buf.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_DMA_BUF_H__
+#define __MALI_MEMORY_DMA_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+struct mali_pp_job;
+
+struct mali_dma_buf_attachment;
+struct mali_dma_buf_attachment {
+	struct dma_buf *buf;
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct mali_session_data *session;
+	int map_ref;
+	struct mutex map_lock;
+	mali_bool is_mapped;
+	wait_queue_head_t wait_queue;
+};
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *arg);
+
+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend);
+
+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags);
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job);
+void mali_dma_buf_unmap_job(struct mali_pp_job *job);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/utgard/r6p2/linux/mali_memory_external.c b/utgard/r6p2/linux/mali_memory_external.c
new file mode 100755
index 0000000..c0097f6
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_external.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_uk_types.h"
+
+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT(MALI_MEM_EXTERNAL == mem_backend->type);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag)
+{
+	struct mali_session_data *session;
+	_mali_osk_errcode_t err;
+	u32 virt, phys, size;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	size = alloc->psize;
+	session = (struct mali_session_data *)(uintptr_t)alloc->session;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check arguments */
+	/* NULL might be a valid Mali address */
+	if (!size) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size must be a multiple of the system page size */
+	if (size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* Validate the mali physical range */
+	if (_MALI_OSK_ERR_OK != mali_mem_validation_check(phys_addr, size)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (flag & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mali_session_memory_lock(session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	phys = phys_addr;
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_session_memory_unlock(session);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	mali_mmu_pagedir_update(session->page_directory, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+	if (alloc->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		mali_mmu_pagedir_update(session->page_directory, virt + size, phys, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map physical memory 0x%x-0x%x into virtual memory 0x%x\n",
+			  phys_addr, (phys_addr + size - 1),
+			  virt));
+	mali_session_memory_unlock(session);
+
+	MALI_SUCCESS;
+}
+
diff --git a/utgard/r6p2/linux/mali_memory_external.h b/utgard/r6p2/linux/mali_memory_external.h
new file mode 100644
index 0000000..800ac2a
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_external.h
@@ -0,0 +1,29 @@
+
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_EXTERNAL_H__
+#define __MALI_MEMORY_EXTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag);
+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/utgard/r6p2/linux/mali_memory_manager.c b/utgard/r6p2/linux/mali_memory_manager.c
new file mode 100755
index 0000000..814eb7f
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_manager.c
@@ -0,0 +1,993 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include <linux/platform_device.h>
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_secure.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_util.h"
+#include "mali_memory_external.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_ukk.h"
+#include "mali_memory_swap_alloc.h"
+
+/*
+* New memory system interface
+*/
+
+/*inti idr for backend memory */
+struct idr mali_backend_idr;
+struct mutex mali_idr_mutex;
+
+/* init allocation manager */
+int mali_memory_manager_init(struct mali_allocation_manager *mgr)
+{
+	/* init Locks */
+	rwlock_init(&mgr->vm_lock);
+	mutex_init(&mgr->list_mutex);
+
+	/* init link */
+	INIT_LIST_HEAD(&mgr->head);
+
+	/* init RB tree */
+	mgr->allocation_mgr_rb = RB_ROOT;
+	mgr->mali_allocation_num = 0;
+	return 0;
+}
+
+/* Deinit allocation manager
+* Do some check for debug
+*/
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr)
+{
+	/* check RB tree is empty */
+	MALI_DEBUG_ASSERT(((void *)(mgr->allocation_mgr_rb.rb_node) == (void *)rb_last(&mgr->allocation_mgr_rb)));
+	/* check allocation List */
+	MALI_DEBUG_ASSERT(list_empty(&mgr->head));
+}
+
+/* Prepare memory descriptor */
+static mali_mem_allocation *mali_mem_allocation_struct_create(struct mali_session_data *session)
+{
+	mali_mem_allocation *mali_allocation;
+
+	/* Allocate memory */
+	mali_allocation = (mali_mem_allocation *)kzalloc(sizeof(mali_mem_allocation), GFP_KERNEL);
+	if (NULL == mali_allocation) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_allocation_struct_create: descriptor was NULL\n"));
+		return NULL;
+	}
+
+	MALI_DEBUG_CODE(mali_allocation->magic = MALI_MEM_ALLOCATION_VALID_MAGIC);
+
+	/* do init */
+	mali_allocation->flags = 0;
+	mali_allocation->session = session;
+
+	INIT_LIST_HEAD(&mali_allocation->list);
+	_mali_osk_atomic_init(&mali_allocation->mem_alloc_refcount, 1);
+
+	/**
+	*add to session list
+	*/
+	mutex_lock(&session->allocation_mgr.list_mutex);
+	list_add_tail(&mali_allocation->list, &session->allocation_mgr.head);
+	session->allocation_mgr.mali_allocation_num++;
+	mutex_unlock(&session->allocation_mgr.list_mutex);
+
+	return mali_allocation;
+}
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+	mutex_lock(&alloc->session->allocation_mgr.list_mutex);
+	list_del(&alloc->list);
+	alloc->session->allocation_mgr.mali_allocation_num--;
+	mutex_unlock(&alloc->session->allocation_mgr.list_mutex);
+
+	kfree(alloc);
+}
+
+int mali_mem_backend_struct_create(mali_mem_backend **backend, u32 psize)
+{
+	mali_mem_backend *mem_backend = NULL;
+	s32 ret = -ENOSPC;
+	s32 index = -1;
+	*backend = (mali_mem_backend *)kzalloc(sizeof(mali_mem_backend), GFP_KERNEL);
+	if (NULL == *backend) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: backend descriptor was NULL\n"));
+		return -1;
+	}
+	mem_backend = *backend;
+	mem_backend->size = psize;
+	mutex_init(&mem_backend->mutex);
+	INIT_LIST_HEAD(&mem_backend->list);
+	mem_backend->using_count = 0;
+
+
+	/* link backend with id */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
+again:
+	if (!idr_pre_get(&mali_backend_idr, GFP_KERNEL)) {
+		kfree(mem_backend);
+		return -ENOMEM;
+	}
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_get_new_above(&mali_backend_idr, mem_backend, 1, &index);
+	mutex_unlock(&mali_idr_mutex);
+
+	if (-ENOSPC == ret) {
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+	if (-EAGAIN == ret)
+		goto again;
+#else
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_alloc(&mali_backend_idr, mem_backend, 1, MALI_S32_MAX, GFP_KERNEL);
+	mutex_unlock(&mali_idr_mutex);
+	index = ret;
+	if (ret < 0) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: Can't allocate idr for backend! \n"));
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+#endif
+	return index;
+}
+
+
+static void mali_mem_backend_struct_destory(mali_mem_backend **backend, s32 backend_handle)
+{
+	mali_mem_backend *mem_backend = *backend;
+
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_backend);
+	*backend = NULL;
+}
+
+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address)
+{
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_address, 0);
+	if (NULL == mali_vma_node)  {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_search:vma node was NULL\n"));
+		return NULL;
+	}
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(NULL != mem_bkend);
+	return mem_bkend;
+}
+
+static _mali_osk_errcode_t mali_mem_resize(struct mali_session_data *session, mali_mem_backend *mem_backend, u32 physical_size)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	mali_mem_os_mem tmp_os_mem;
+	s32 change_page_count;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n"));
+	MALI_DEBUG_ASSERT(0 == physical_size %  MALI_MMU_PAGE_SIZE);
+
+	mali_allocation = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(mali_allocation);
+
+	MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE & mali_allocation->flags);
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mali_allocation->type);
+
+	mutex_lock(&mem_backend->mutex);
+
+	/* Do resize*/
+	if (physical_size > mem_backend->size) {
+		u32 add_size = physical_size - mem_backend->size;
+
+		MALI_DEBUG_ASSERT(0 == add_size %  MALI_MMU_PAGE_SIZE);
+
+		/* Allocate new pages from os mem */
+		retval = mali_mem_os_alloc_pages(&tmp_os_mem, add_size);
+
+		if (retval) {
+			if (-ENOMEM == retval) {
+				ret = _MALI_OSK_ERR_NOMEM;
+			} else {
+				ret = _MALI_OSK_ERR_FAULT;
+			}
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory allocation failed !\n"));
+			goto failed_alloc_memory;
+		}
+
+		MALI_DEBUG_ASSERT(tmp_os_mem.count == add_size / MALI_MMU_PAGE_SIZE);
+
+		/* Resize the memory of the backend */
+		ret = mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory	resizing failed !\n"));
+			goto failed_resize_pages;
+		}
+
+		/*Resize cpu mapping */
+		if (NULL != mali_allocation->cpu_mapping.vma) {
+			ret = mali_mem_os_resize_cpu_map_locked(mem_backend, mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start  + mem_backend->size, add_size);
+			if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+				MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: cpu mapping failed !\n"));
+				goto  failed_cpu_map;
+			}
+		}
+
+		/* Resize mali mapping */
+		_mali_osk_mutex_wait(session->memory_lock);
+		ret = mali_mem_mali_map_resize(mali_allocation, physical_size);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_resize: mali map resize fail !\n"));
+			goto failed_gpu_map;
+		}
+
+		ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, mali_allocation->mali_vma_node.vm_node.start,
+					   mali_allocation->psize / MALI_MMU_PAGE_SIZE, add_size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties);
+		if (ret) {
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: mali mapping failed !\n"));
+			goto failed_gpu_map;
+		}
+
+		_mali_osk_mutex_signal(session->memory_lock);
+	} else {
+		u32 dec_size, page_count;
+		u32 vaddr = 0;
+		INIT_LIST_HEAD(&tmp_os_mem.pages);
+		tmp_os_mem.count = 0;
+
+		dec_size = mem_backend->size - physical_size;
+		MALI_DEBUG_ASSERT(0 == dec_size %  MALI_MMU_PAGE_SIZE);
+
+		page_count = dec_size / MALI_MMU_PAGE_SIZE;
+		vaddr = mali_allocation->mali_vma_node.vm_node.start + physical_size;
+
+		/* Resize the memory of the backend */
+		ret = mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, physical_size / MALI_MMU_PAGE_SIZE, page_count);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(4, ("_mali_ukk_mem_resize: mali map resize failed!\n"));
+			goto failed_resize_pages;
+		}
+
+		/* Resize mali map */
+		_mali_osk_mutex_wait(session->memory_lock);
+		mali_mem_mali_map_free(session, dec_size, vaddr, mali_allocation->flags);
+		_mali_osk_mutex_signal(session->memory_lock);
+
+		/* Zap cpu mapping */
+		if (0 != mali_allocation->cpu_mapping.addr) {
+			MALI_DEBUG_ASSERT(NULL != mali_allocation->cpu_mapping.vma);
+			zap_vma_ptes(mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start + physical_size, dec_size);
+		}
+
+		/* Free those extra pages */
+		mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE);
+	}
+
+	/* Resize memory allocation and memory backend */
+	change_page_count = (s32)(physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE;
+	mali_allocation->psize = physical_size;
+	mem_backend->size = physical_size;
+	mutex_unlock(&mem_backend->mutex);
+
+	if (change_page_count > 0) {
+		atomic_add(change_page_count, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+
+	} else {
+		atomic_sub((s32)(-change_page_count), &session->mali_mem_allocated_pages);
+	}
+
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+failed_cpu_map:
+	if (physical_size > mem_backend->size) {
+		mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, mem_backend->size / MALI_MMU_PAGE_SIZE,
+					 (physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE);
+	} else {
+		mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count);
+	}
+failed_resize_pages:
+	if (0 != tmp_os_mem.count)
+		mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE);
+failed_alloc_memory:
+
+	mutex_unlock(&mem_backend->mutex);
+	return ret;
+}
+
+
+/* Set GPU MMU properties */
+static void _mali_memory_gpu_map_property_set(u32 *properties, u32 flags)
+{
+	if (_MALI_MEMORY_GPU_READ_ALLOCATE & flags) {
+		*properties = MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE;
+	} else {
+		*properties = MALI_MMU_FLAGS_DEFAULT;
+	}
+}
+
+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size)
+{
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+	u32 new_physical_size;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(0 == add_size %  MALI_MMU_PAGE_SIZE);
+
+	/* Get the memory backend that need to be resize. */
+	mem_backend = mali_mem_backend_struct_search(session, mali_addr);
+
+	if (NULL == mem_backend)  {
+		MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n"));
+		return ret;
+	}
+
+	mali_allocation = mem_backend->mali_allocation;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_allocation);
+
+	new_physical_size = add_size + mem_backend->size;
+
+	if (new_physical_size > (mali_allocation->mali_vma_node.vm_node.size))
+		return ret;
+
+	MALI_DEBUG_ASSERT(new_physical_size != mem_backend->size);
+
+	ret = mali_mem_resize(session, mem_backend, new_physical_size);
+
+	return ret;
+}
+
+/**
+*  function@_mali_ukk_mem_allocate - allocate mali memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args)
+{
+	struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_allocate, vaddr=0x%x, size =0x%x! \n", args->gpu_vaddr, args->psize));
+
+	/* Check if the address is allocated
+	*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->gpu_vaddr, 0);
+
+	if (unlikely(mali_vma_node)) {
+		MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used ! \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+	/**
+	*create mali memory allocation
+	*/
+
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_allocate: Failed to create allocation struct! \n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->psize;
+	mali_allocation->vsize = args->vsize;
+
+	/* MALI_MEM_OS if need to support mem resize,
+	 * or MALI_MEM_BLOCK if have dedicated memory,
+	 * or MALI_MEM_OS,
+	 * or MALI_MEM_SWAP.
+	 */
+	if (args->flags & _MALI_MEMORY_ALLOCATE_SWAPPABLE) {
+		mali_allocation->type = MALI_MEM_SWAP;
+	} else if (args->flags & _MALI_MEMORY_ALLOCATE_RESIZEABLE) {
+		mali_allocation->type = MALI_MEM_OS;
+		mali_allocation->flags |= MALI_MEM_FLAG_CAN_RESIZE;
+	} else if (args->flags & _MALI_MEMORY_ALLOCATE_SECURE) {
+		mali_allocation->type = MALI_MEM_SECURE;
+	} else if (MALI_TRUE == mali_memory_have_dedicated_memory()) {
+		mali_allocation->type = MALI_MEM_BLOCK;
+	} else {
+		mali_allocation->type = MALI_MEM_OS;
+	}
+
+	/**
+	*add allocation node to RB tree for index
+	*/
+	mali_allocation->mali_vma_node.vm_node.start = args->gpu_vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->vsize;
+
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, args->psize);
+	if (mali_allocation->backend_handle < 0) {
+		ret = _MALI_OSK_ERR_NOMEM;
+		MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n"));
+		goto failed_alloc_backend;
+	}
+
+
+	mem_backend->mali_allocation = mali_allocation;
+	mem_backend->type = mali_allocation->type;
+
+	mali_allocation->mali_mapping.addr = args->gpu_vaddr;
+
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+	/* do prepare for MALI mapping */
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+
+		ret = mali_mem_mali_map_prepare(mali_allocation);
+		if (0 != ret) {
+			_mali_osk_mutex_signal(session->memory_lock);
+			goto failed_prepare_map;
+		}
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+
+	if (mali_allocation->psize == 0) {
+		mem_backend->os_mem.count = 0;
+		INIT_LIST_HEAD(&mem_backend->os_mem.pages);
+		goto done;
+	}
+
+	if (args->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) {
+		mali_allocation->flags |= _MALI_MEMORY_ALLOCATE_DEFER_BIND;
+		mem_backend->flags |= MALI_MEM_BACKEND_FLAG_NOT_BINDED;
+		/* init for defer bind backend*/
+		mem_backend->os_mem.count = 0;
+		INIT_LIST_HEAD(&mem_backend->os_mem.pages);
+
+		goto done;
+	}
+
+	if (likely(mali_allocation->psize > 0)) {
+
+		if (MALI_MEM_SECURE == mem_backend->type) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+			ret = mali_mem_secure_attach_dma_buf(&mem_backend->secure_mem, mem_backend->size, args->secure_shared_fd);
+			if (_MALI_OSK_ERR_OK != ret) {
+				MALI_DEBUG_PRINT(1, ("Failed to attach dma buf for secure memory! \n"));
+				goto failed_alloc_pages;
+			}
+#else
+			ret = _MALI_OSK_ERR_UNSUPPORTED;
+			MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory! \n"));
+			goto failed_alloc_pages;
+#endif
+		} else {
+
+			/**
+			*allocate physical memory
+			*/
+			if (mem_backend->type == MALI_MEM_OS) {
+				retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+			} else if (mem_backend->type == MALI_MEM_BLOCK) {
+				/* try to allocated from BLOCK memory first, then try OS memory if failed.*/
+				if (mali_mem_block_alloc(&mem_backend->block_mem, mem_backend->size)) {
+					retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+					mem_backend->type = MALI_MEM_OS;
+					mali_allocation->type = MALI_MEM_OS;
+				}
+			} else if (MALI_MEM_SWAP == mem_backend->type) {
+				retval = mali_mem_swap_alloc_pages(&mem_backend->swap_mem, mali_allocation->mali_vma_node.vm_node.size, &mem_backend->start_idx);
+			}  else {
+				/* ONLY support mem_os type */
+				MALI_DEBUG_ASSERT(0);
+			}
+
+			if (retval) {
+				ret = _MALI_OSK_ERR_NOMEM;
+				MALI_DEBUG_PRINT(1, (" can't allocate enough pages! \n"));
+				goto failed_alloc_pages;
+			}
+		}
+	}
+
+	/**
+	*map to GPU side
+	*/
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+		/* Map on Mali */
+
+		if (mem_backend->type == MALI_MEM_OS) {
+			ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, args->gpu_vaddr, 0,
+						   mem_backend->size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties);
+
+		} else if (mem_backend->type == MALI_MEM_BLOCK) {
+			mali_mem_block_mali_map(&mem_backend->block_mem, session, args->gpu_vaddr,
+						mali_allocation->mali_mapping.properties);
+		} else if (mem_backend->type == MALI_MEM_SWAP) {
+			ret = mali_mem_swap_mali_map(&mem_backend->swap_mem, session, args->gpu_vaddr,
+						     mali_allocation->mali_mapping.properties);
+		} else if (mem_backend->type == MALI_MEM_SECURE) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+			ret = mali_mem_secure_mali_map(&mem_backend->secure_mem, session, args->gpu_vaddr, mali_allocation->mali_mapping.properties);
+#endif
+		} else { /* unsupport type */
+			MALI_DEBUG_ASSERT(0);
+		}
+
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+done:
+	if (MALI_MEM_OS == mem_backend->type) {
+		atomic_add(mem_backend->os_mem.count, &session->mali_mem_allocated_pages);
+	} else if (MALI_MEM_BLOCK == mem_backend->type) {
+		atomic_add(mem_backend->block_mem.count, &session->mali_mem_allocated_pages);
+	} else if (MALI_MEM_SECURE == mem_backend->type) {
+		atomic_add(mem_backend->secure_mem.count, &session->mali_mem_allocated_pages);
+	} else {
+		MALI_DEBUG_ASSERT(MALI_MEM_SWAP == mem_backend->type);
+		atomic_add(mem_backend->swap_mem.count, &session->mali_mem_allocated_pages);
+		atomic_add(mem_backend->swap_mem.count, &session->mali_mem_array[mem_backend->type]);
+	}
+
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+	return _MALI_OSK_ERR_OK;
+
+failed_alloc_pages:
+	mali_mem_mali_map_free(session, mali_allocation->psize, mali_allocation->mali_vma_node.vm_node.start, mali_allocation->flags);
+failed_prepare_map:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	u32 vaddr = args->gpu_vaddr;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, vaddr, 0);
+	if (NULL == mali_vma_node) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_free: invalid addr: 0x%x\n", vaddr));
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+	MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+
+	if (mali_alloc)
+		/* check ref_count */
+		args->free_pages_nr = mali_allocation_unref(&mali_alloc);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Function _mali_ukk_mem_bind -- bind a external memory to a new GPU address
+* It will allocate a new mem allocation and bind external memory to it.
+* Supported backend type are:
+* _MALI_MEMORY_BIND_BACKEND_UMP
+* _MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+* CPU access is not supported yet
+*/
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_bind, vaddr=0x%x, size =0x%x! \n", args->vaddr, args->size));
+
+	/**
+	* allocate mali allocation.
+	*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->size;
+	mali_allocation->vsize = args->size;
+	mali_allocation->mali_mapping.addr = args->vaddr;
+
+	/* add allocation node to RB tree for index  */
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->size;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* allocate backend*/
+	if (mali_allocation->psize > 0) {
+		mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+		if (mali_allocation->backend_handle < 0) {
+			goto Failed_alloc_backend;
+		}
+
+	} else {
+		goto Failed_alloc_backend;
+	}
+
+	mem_backend->size = mali_allocation->psize;
+	mem_backend->mali_allocation = mali_allocation;
+
+	switch (args->flags & _MALI_MEMORY_BIND_BACKEND_MASK) {
+	case  _MALI_MEMORY_BIND_BACKEND_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_allocation->type = MALI_MEM_UMP;
+		mem_backend->type = MALI_MEM_UMP;
+		ret = mali_mem_bind_ump_buf(mali_allocation, mem_backend,
+					    args->mem_union.bind_ump.secure_id, args->mem_union.bind_ump.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind ump buf failed\n"));
+			goto  Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("UMP not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case  _MALI_MEMORY_BIND_BACKEND_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_allocation->type = MALI_MEM_DMA_BUF;
+		mem_backend->type = MALI_MEM_DMA_BUF;
+		ret = mali_mem_bind_dma_buf(mali_allocation, mem_backend,
+					    args->mem_union.bind_dma_buf.mem_fd, args->mem_union.bind_dma_buf.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind dma buf failed\n"));
+			goto Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY:
+		/* not allowed */
+		MALI_DEBUG_PRINT_ERROR(("Mali internal memory type not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY:
+		mali_allocation->type = MALI_MEM_EXTERNAL;
+		mem_backend->type = MALI_MEM_EXTERNAL;
+		ret = mali_mem_bind_ext_buf(mali_allocation, mem_backend, args->mem_union.bind_ext_memory.phys_addr,
+					    args->mem_union.bind_ext_memory.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind external buf failed\n"));
+			goto Failed_bind_backend;
+		}
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXT_COW:
+		/* not allowed */
+		MALI_DEBUG_PRINT_ERROR(("External cow memory  type not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+
+	default:
+		MALI_DEBUG_PRINT_ERROR(("Invalid memory type  not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+	}
+	MALI_DEBUG_ASSERT(0 == mem_backend->size % MALI_MMU_PAGE_SIZE);
+	atomic_add(mem_backend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_backend->type]);
+	return _MALI_OSK_ERR_OK;
+
+Failed_bind_backend:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+
+Failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	MALI_DEBUG_PRINT(1, (" _mali_ukk_mem_bind, return ERROR! \n"));
+	return ret;
+}
+
+
+/*
+* Function _mali_ukk_mem_unbind -- unbind a external memory to a new GPU address
+* This function unbind the backend memory and free the allocation
+* no ref_count for this type of memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args)
+{
+	/**/
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	u32 mali_addr = args->vaddr;
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_unbind, vaddr=0x%x! \n", args->vaddr));
+
+	/* find the allocation by vaddr */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		mali_allocation = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	} else {
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	if (NULL != mali_allocation)
+		/* check ref_count */
+		mali_allocation_unref(&mali_allocation);
+	return _MALI_OSK_ERR_OK;
+}
+
+/*
+* Function _mali_ukk_mem_cow --  COW for an allocation
+* This function allocate new pages for  a range (range, range+size) of allocation
+*  And Map it(keep use the not in range pages from target allocation ) to an GPU vaddr
+*/
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_backend *target_backend = NULL;
+	mali_mem_backend *mem_backend = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_allocation = NULL;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	/* Get the target backend for cow */
+	target_backend = mali_mem_backend_struct_search(session, args->target_handle);
+
+	if (NULL == target_backend || 0 == target_backend->size) {
+		MALI_DEBUG_ASSERT_POINTER(target_backend);
+		MALI_DEBUG_ASSERT(0 != target_backend->size);
+		return ret;
+	}
+
+	/*Cow not support resized mem */
+	MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE != (MALI_MEM_FLAG_CAN_RESIZE & target_backend->mali_allocation->flags));
+
+	/* Check if the new mali address is allocated */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->vaddr, 0);
+
+	if (unlikely(mali_vma_node)) {
+		MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used ! \n"));
+		return ret;
+	}
+
+	/* create new alloction for COW*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to create allocation struct!\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->target_size;
+	mali_allocation->vsize = args->target_size;
+	mali_allocation->type = MALI_MEM_COW;
+
+	/*add allocation node to RB tree for index*/
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = mali_allocation->vsize;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* create new backend for COW memory */
+	mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+	if (mali_allocation->backend_handle < 0) {
+		ret = _MALI_OSK_ERR_NOMEM;
+		MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n"));
+		goto failed_alloc_backend;
+	}
+	mem_backend->mali_allocation = mali_allocation;
+	mem_backend->type = mali_allocation->type;
+
+	if (target_backend->type == MALI_MEM_SWAP ||
+	    (MALI_MEM_COW == target_backend->type && (MALI_MEM_BACKEND_FLAG_SWAP_COWED & target_backend->flags))) {
+		mem_backend->flags |= MALI_MEM_BACKEND_FLAG_SWAP_COWED;
+		/**
+		 *     CoWed swap backends couldn't be mapped as non-linear vma, because if one
+		 * vma is set with flag VM_NONLINEAR, the vma->vm_private_data will be used by kernel,
+		 * while in mali driver, we use this variable to store the pointer of mali_allocation, so there
+		 * is a conflict.
+		 *     To resolve this problem, we have to do some fake things, we reserved about 64MB
+		 * space from index 0, there isn't really page's index will be set from 0 to (64MB>>PAGE_SHIFT_NUM),
+		 * and all of CoWed swap memory backends' start_idx will be assigned with 0, and these
+		 * backends will be mapped as linear and will add to priority tree of global swap file, while
+		 * these vmas will never be found by using normal page->index, these pages in those vma
+		 * also couldn't be swapped out.
+		 */
+		mem_backend->start_idx = 0;
+	}
+
+	/* Add the target backend's cow count, also allocate new pages for COW backend from os mem
+	*for a modified range and keep the page which not in the modified range and Add ref to it
+	*/
+	MALI_DEBUG_PRINT(3, ("Cow mapping: target_addr: 0x%x;  cow_addr: 0x%x,  size: %u\n", target_backend->mali_allocation->mali_vma_node.vm_node.start,
+			     mali_allocation->mali_vma_node.vm_node.start, mali_allocation->mali_vma_node.vm_node.size));
+
+	ret = mali_memory_do_cow(target_backend, args->target_offset, args->target_size, mem_backend, args->range_start, args->range_size);
+	if (_MALI_OSK_ERR_OK != ret) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to cow!\n"));
+		goto failed_do_cow;
+	}
+
+	/**
+	*map to GPU side
+	*/
+	mali_allocation->mali_mapping.addr = args->vaddr;
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+
+	_mali_osk_mutex_wait(session->memory_lock);
+	/* Map on Mali */
+	ret = mali_mem_mali_map_prepare(mali_allocation);
+	if (0 != ret) {
+		MALI_DEBUG_PRINT(1, (" prepare map fail! \n"));
+		goto failed_gpu_map;
+	}
+
+	if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+		mali_mem_cow_mali_map(mem_backend, 0, mem_backend->size);
+	}
+
+	_mali_osk_mutex_signal(session->memory_lock);
+
+	mutex_lock(&target_backend->mutex);
+	target_backend->flags |= MALI_MEM_BACKEND_FLAG_COWED;
+	mutex_unlock(&target_backend->mutex);
+
+	atomic_add(args->range_size / MALI_MMU_PAGE_SIZE, &session->mali_mem_allocated_pages);
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+	mali_mem_cow_release(mem_backend, MALI_FALSE);
+	mem_backend->cow_mem.count = 0;
+failed_do_cow:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_backend *mem_backend = NULL;
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_cow_modify_range called! \n"));
+	/* Get the backend that need to be modified. */
+	mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+
+	if (NULL == mem_backend || 0 == mem_backend->size) {
+		MALI_DEBUG_ASSERT_POINTER(mem_backend);
+		MALI_DEBUG_ASSERT(0 != mem_backend->size);
+		return ret;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW  == mem_backend->type);
+
+	ret =  mali_memory_cow_modify_range(mem_backend, args->range_start, args->size);
+	args->change_pages_nr = mem_backend->cow_mem.change_pages_nr;
+	if (_MALI_OSK_ERR_OK != ret)
+		return  ret;
+	_mali_osk_mutex_wait(session->memory_lock);
+	if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+		mali_mem_cow_mali_map(mem_backend, args->range_start, args->size);
+	}
+	_mali_osk_mutex_signal(session->memory_lock);
+
+	atomic_add(args->change_pages_nr, &session->mali_mem_allocated_pages);
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args)
+{
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n"));
+	MALI_DEBUG_ASSERT(0 == args->psize %  MALI_MMU_PAGE_SIZE);
+
+	/* Get the memory backend that need to be resize. */
+	mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+
+	if (NULL == mem_backend)  {
+		MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n"));
+		return ret;
+	}
+
+	MALI_DEBUG_ASSERT(args->psize != mem_backend->size);
+
+	ret = mali_mem_resize(session, mem_backend, args->psize);
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args)
+{
+	args->memory_usage = _mali_ukk_report_memory_usage();
+	if (0 != args->vaddr) {
+		mali_mem_backend *mem_backend = NULL;
+		struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+		/* Get the backend that need to be modified. */
+		mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+		if (NULL == mem_backend) {
+			MALI_DEBUG_ASSERT_POINTER(mem_backend);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		if (MALI_MEM_COW == mem_backend->type)
+			args->change_pages_nr = mem_backend->cow_mem.change_pages_nr;
+	}
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p2/linux/mali_memory_manager.h b/utgard/r6p2/linux/mali_memory_manager.h
new file mode 100644
index 0000000..caccd9e
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_manager.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_MANAGER_H__
+#define __MALI_MEMORY_MANAGER_H__
+
+#include "mali_osk.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_allocation_manager {
+	rwlock_t vm_lock;
+	struct rb_root allocation_mgr_rb;
+	struct list_head head;
+	struct mutex list_mutex;
+	u32 mali_allocation_num;
+};
+
+extern struct idr mali_backend_idr;
+extern struct mutex mali_idr_mutex;
+
+int mali_memory_manager_init(struct mali_allocation_manager *mgr);
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr);
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc);
+
+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size);
+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address);
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args);
+
+
+#endif
+
diff --git a/utgard/r6p2/linux/mali_memory_os_alloc.c b/utgard/r6p2/linux/mali_memory_os_alloc.c
new file mode 100755
index 0000000..86c9ff3
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_os_alloc.c
@@ -0,0 +1,827 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+
+#include "mali_osk.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_kernel_linux.h"
+
+/* Minimum size of allocator page pool */
+#define MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB * 256)
+#define MALI_OS_MEMORY_POOL_TRIM_JIFFIES (10 * CONFIG_HZ) /* Default to 10s */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+static unsigned long mali_dma_attrs;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+/* Write combine dma_attrs */
+static DEFINE_DMA_ATTRS(dma_attrs_wc);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask);
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask);
+#endif
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc);
+#endif
+#endif
+static void mali_mem_os_trim_pool(struct work_struct *work);
+
+struct mali_mem_os_allocator mali_mem_os_allocator = {
+	.pool_lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+	.pool_pages = LIST_HEAD_INIT(mali_mem_os_allocator.pool_pages),
+	.pool_count = 0,
+
+	.allocated_pages = ATOMIC_INIT(0),
+	.allocation_limit = 0,
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	.shrinker.shrink = mali_mem_os_shrink,
+#else
+	.shrinker.count_objects = mali_mem_os_shrink_count,
+	.shrinker.scan_objects = mali_mem_os_shrink,
+#endif
+	.shrinker.seeks = DEFAULT_SEEKS,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool, TIMER_DEFERRABLE),
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
+	.timed_shrinker = __DEFERRED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#else
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#endif
+};
+
+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag)
+{
+	LIST_HEAD(pages);
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	if (MALI_TRUE == cow_flag) {
+		list_for_each_entry_safe(m_page, m_tmp, os_pages, list) {
+			/*only handle OS node here */
+			if (m_page->type == MALI_PAGE_NODE_OS) {
+				if (1 == _mali_page_node_get_ref_count(m_page)) {
+					list_move(&m_page->list, &pages);
+					atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+					free_pages_nr ++;
+				} else {
+					_mali_page_node_unref(m_page);
+					m_page->page = NULL;
+					list_del(&m_page->list);
+					kfree(m_page);
+				}
+			}
+		}
+	} else {
+		list_cut_position(&pages, os_pages, os_pages->prev);
+		atomic_sub(pages_count, &mali_mem_os_allocator.allocated_pages);
+		free_pages_nr = pages_count;
+	}
+
+	/* Put pages on pool. */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_splice(&pages, &mali_mem_os_allocator.pool_pages);
+	mali_mem_os_allocator.pool_count += free_pages_nr;
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(5, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+	return free_pages_nr;
+}
+
+/**
+* put page without put it into page pool
+*/
+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page)
+{
+	MALI_DEBUG_ASSERT_POINTER(page);
+	if (1 == page_count(page)) {
+		atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+		dma_unmap_page(&mali_platform_device->dev, page_private(page),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		ClearPagePrivate(page);
+	}
+	put_page(page);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_from);
+	MALI_DEBUG_ASSERT_POINTER(mem_to);
+
+	if (mem_from->count < start_page + page_count) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	list_for_each_entry_safe(m_page, m_tmp, &mem_from->pages, list) {
+		if (i >= start_page && i < start_page + page_count) {
+			list_move_tail(&m_page->list, &mem_to->pages);
+			mem_from->count--;
+			mem_to->count++;
+		}
+		i++;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size)
+{
+	struct page *new_page;
+	LIST_HEAD(pages_list);
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	size_t remaining = page_count;
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&os_mem->pages);
+	os_mem->count = page_count;
+
+	/* Grab pages from pool. */
+	{
+		size_t pool_pages;
+		spin_lock(&mali_mem_os_allocator.pool_lock);
+		pool_pages = min(remaining, mali_mem_os_allocator.pool_count);
+		for (i = pool_pages; i > 0; i--) {
+			BUG_ON(list_empty(&mali_mem_os_allocator.pool_pages));
+			list_move(mali_mem_os_allocator.pool_pages.next, &pages_list);
+		}
+		mali_mem_os_allocator.pool_count -= pool_pages;
+		remaining -= pool_pages;
+		spin_unlock(&mali_mem_os_allocator.pool_lock);
+	}
+
+	/* Process pages from pool. */
+	i = 0;
+	list_for_each_entry_safe(m_page, m_tmp, &pages_list, list) {
+		BUG_ON(NULL == m_page);
+
+		list_move_tail(&m_page->list, &os_mem->pages);
+	}
+
+	/* Allocate new pages, if needed. */
+	for (i = 0; i < remaining; i++) {
+		dma_addr_t dma_addr;
+		gfp_t flags = __GFP_ZERO | __GFP_REPEAT | __GFP_NOWARN | __GFP_COLD;
+		int err;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+		flags |= GFP_HIGHUSER;
+#else
+#ifdef CONFIG_ZONE_DMA32
+		flags |= GFP_DMA32;
+#else
+#ifdef CONFIG_ZONE_DMA
+		flags |= GFP_DMA;
+#else
+		/* arm64 utgard only work on < 4G, but the kernel
+		 * didn't provide method to allocte memory < 4G
+		 */
+		MALI_DEBUG_ASSERT(0);
+#endif
+#endif
+#endif
+
+		new_page = alloc_page(flags);
+
+		if (unlikely(NULL == new_page)) {
+			/* Calculate the number of pages actually allocated, and free them. */
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -ENOMEM;
+		}
+
+		/* Ensure page is flushed from CPU caches. */
+		dma_addr = dma_map_page(&mali_platform_device->dev, new_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+		err = dma_mapping_error(&mali_platform_device->dev, dma_addr);
+		if (unlikely(err)) {
+			MALI_DEBUG_PRINT_ERROR(("OS Mem: Failed to DMA map page %p: %u",
+						new_page, err));
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -EFAULT;
+		}
+
+		/* Store page phys addr */
+		SetPagePrivate(new_page);
+		set_page_private(new_page, dma_addr);
+
+		m_page = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+		if (unlikely(NULL == m_page)) {
+			MALI_PRINT_ERROR(("OS Mem: Can't allocate mali_page node! \n"));
+			dma_unmap_page(&mali_platform_device->dev, page_private(new_page),
+				       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+			ClearPagePrivate(new_page);
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -EFAULT;
+		}
+		m_page->page = new_page;
+
+		list_add_tail(&m_page->list, &os_mem->pages);
+	}
+
+	atomic_add(page_count, &mali_mem_os_allocator.allocated_pages);
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Stopping pool trim timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	u32 virt;
+	u32 prop = props;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	MALI_DEBUG_ASSERT(start_page <= os_mem->count);
+	MALI_DEBUG_ASSERT((start_page + mapping_pgae_num) <= os_mem->count);
+
+	if ((start_page + mapping_pgae_num) == os_mem->count) {
+
+		virt = vaddr + MALI_MMU_PAGE_SIZE * (start_page + mapping_pgae_num);
+
+		list_for_each_entry_reverse(m_page, &os_mem->pages, list) {
+
+			virt -= MALI_MMU_PAGE_SIZE;
+			if (mapping_pgae_num > 0) {
+				dma_addr_t phys = page_private(m_page->page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+				/* Verify that the "physical" address is 32-bit and
+				* usable for Mali, when on a system with bus addresses
+				* wider than 32-bit. */
+				MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+				mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+			} else {
+				break;
+			}
+			mapping_pgae_num--;
+		}
+
+	} else {
+		u32 i = 0;
+		virt = vaddr;
+		list_for_each_entry(m_page, &os_mem->pages, list) {
+
+			if (i >= start_page) {
+				dma_addr_t phys = page_private(m_page->page);
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+				/* Verify that the "physical" address is 32-bit and
+				* usable for Mali, when on a system with bus addresses
+				* wider than 32-bit. */
+				MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+				mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+			}
+			i++;
+			virt += MALI_MMU_PAGE_SIZE;
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	mali_mem_os_mem *os_mem = &mem_bkend->os_mem;
+	struct mali_page_node *m_page;
+	struct page *page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type);
+
+	list_for_each_entry(m_page, &os_mem->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		page = m_page->page;
+		ret = vm_insert_pfn(vma, addr, page_to_pfn(page));
+
+		if (unlikely(0 != ret)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size)
+{
+	mali_mem_os_mem *os_mem = &mem_bkend->os_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	int offset;
+	int mapping_page_num;
+	int count ;
+
+	unsigned long vstart = vma->vm_start;
+	count = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+	MALI_DEBUG_ASSERT(0 == start_vaddr % _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE);
+	offset = (start_vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT(offset <= os_mem->count);
+	mapping_page_num = mappig_size / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT((offset + mapping_page_num) <= os_mem->count);
+
+	if ((offset + mapping_page_num) == os_mem->count) {
+
+		unsigned long vm_end = start_vaddr + mappig_size;
+
+		list_for_each_entry_reverse(m_page, &os_mem->pages, list) {
+
+			vm_end -= _MALI_OSK_MALI_PAGE_SIZE;
+			if (mapping_page_num > 0) {
+				ret = vm_insert_pfn(vma, vm_end, page_to_pfn(m_page->page));
+
+				if (unlikely(0 != ret)) {
+					/*will return -EBUSY If the page has already been mapped into table, but it's OK*/
+					if (-EBUSY == ret) {
+						break;
+					} else {
+						MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, offset is %d,page_count is %d\n",
+								     ret,  offset + mapping_page_num, os_mem->count));
+					}
+					return _MALI_OSK_ERR_FAULT;
+				}
+			} else {
+				break;
+			}
+			mapping_page_num--;
+
+		}
+	} else {
+
+		list_for_each_entry(m_page, &os_mem->pages, list) {
+			if (count >= offset) {
+
+				ret = vm_insert_pfn(vma, vstart, page_to_pfn(m_page->page));
+
+				if (unlikely(0 != ret)) {
+					/*will return -EBUSY If the page has already been mapped into table, but it's OK*/
+					if (-EBUSY == ret) {
+						break;
+					} else {
+						MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, count is %d, offset is %d,page_count is %d\n",
+								     ret, count, offset, os_mem->count));
+					}
+					return _MALI_OSK_ERR_FAULT;
+				}
+			}
+			count++;
+			vstart += _MALI_OSK_MALI_PAGE_SIZE;
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 mali_mem_os_release(mali_mem_backend *mem_bkend)
+{
+
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type);
+
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_os_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	/* Free pages */
+	if (MALI_MEM_BACKEND_FLAG_COWED & mem_bkend->flags) {
+		/* Lock to avoid the free race condition for the cow shared memory page node. */
+		_mali_osk_mutex_wait(session->cow_lock);
+		free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_TRUE);
+		_mali_osk_mutex_signal(session->cow_lock);
+	} else {
+		free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_FALSE);
+	}
+	mutex_unlock(&mem_bkend->mutex);
+
+	MALI_DEBUG_PRINT(4, ("OS Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->os_mem.count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+
+	mem_bkend->os_mem.count = 0;
+	return free_pages_nr;
+}
+
+
+#define MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE 128
+static struct {
+	struct {
+		mali_dma_addr phys;
+		mali_io_address mapping;
+	} page[MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE];
+	size_t count;
+	spinlock_t lock;
+} mali_mem_page_table_page_pool = {
+	.count = 0,
+	.lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+};
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_NOMEM;
+	dma_addr_t tmp_phys;
+
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (0 < mali_mem_page_table_page_pool.count) {
+		u32 i = --mali_mem_page_table_page_pool.count;
+		*phys = mali_mem_page_table_page_pool.page[i].phys;
+		*mapping = mali_mem_page_table_page_pool.page[i].mapping;
+
+		ret = _MALI_OSK_ERR_OK;
+	}
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	if (_MALI_OSK_ERR_OK != ret) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, &dma_attrs_wc);
+#else
+		*mapping = dma_alloc_writecombine(&mali_platform_device->dev,
+						  _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys, GFP_KERNEL);
+#endif
+		if (NULL != *mapping) {
+			ret = _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			/* Verify that the "physical" address is 32-bit and
+			 * usable for Mali, when on a system with bus addresses
+			 * wider than 32-bit. */
+			MALI_DEBUG_ASSERT(0 == (tmp_phys >> 32));
+#endif
+
+			*phys = (mali_dma_addr)tmp_phys;
+		}
+	}
+
+	return ret;
+}
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt)
+{
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE > mali_mem_page_table_page_pool.count) {
+		u32 i = mali_mem_page_table_page_pool.count;
+		mali_mem_page_table_page_pool.page[i].phys = phys;
+		mali_mem_page_table_page_pool.page[i].mapping = virt;
+
+		++mali_mem_page_table_page_pool.count;
+
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+	} else {
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			       _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			       mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			       _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			       &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE, virt, phys);
+#endif
+	}
+}
+
+void mali_mem_os_free_page_node(struct mali_page_node *m_page)
+{
+	struct page *page = m_page->page;
+	MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_OS);
+
+	if (1  == page_count(page)) {
+		dma_unmap_page(&mali_platform_device->dev, page_private(page),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		ClearPagePrivate(page);
+	}
+	__free_page(page);
+	m_page->page = NULL;
+	list_del(&m_page->list);
+	kfree(m_page);
+}
+
+/* The maximum number of page table pool pages to free in one go. */
+#define MALI_MEM_OS_CHUNK_TO_FREE 64UL
+
+/* Free a certain number of pages from the page table page pool.
+ * The pool lock must be held when calling the function, and the lock will be
+ * released before returning.
+ */
+static void mali_mem_os_page_table_pool_free(size_t nr_to_free)
+{
+	mali_dma_addr phys_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	void *virt_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	u32 i;
+
+	MALI_DEBUG_ASSERT(nr_to_free <= MALI_MEM_OS_CHUNK_TO_FREE);
+
+	/* Remove nr_to_free pages from the pool and store them locally on stack. */
+	for (i = 0; i < nr_to_free; i++) {
+		u32 pool_index = mali_mem_page_table_page_pool.count - i - 1;
+
+		phys_arr[i] = mali_mem_page_table_page_pool.page[pool_index].phys;
+		virt_arr[i] = mali_mem_page_table_page_pool.page[pool_index].mapping;
+	}
+
+	mali_mem_page_table_page_pool.count -= nr_to_free;
+
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	/* After releasing the spinlock: free the pages we removed from the pool. */
+	for (i = 0; i < nr_to_free; i++) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE,
+				      virt_arr[i], (dma_addr_t)phys_arr[i]);
+#endif
+	}
+}
+
+static void mali_mem_os_trim_page_table_page_pool(void)
+{
+	size_t nr_to_free = 0;
+	size_t nr_to_keep;
+
+	/* Keep 2 page table pages for each 1024 pages in the page cache. */
+	nr_to_keep = mali_mem_os_allocator.pool_count / 512;
+	/* And a minimum of eight pages, to accomodate new sessions. */
+	nr_to_keep += 8;
+
+	if (0 == spin_trylock(&mali_mem_page_table_page_pool.lock)) return;
+
+	if (nr_to_keep < mali_mem_page_table_page_pool.count) {
+		nr_to_free = mali_mem_page_table_page_pool.count - nr_to_keep;
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, nr_to_free);
+	}
+
+	/* Pool lock will be released by the callee. */
+	mali_mem_os_page_table_pool_free(nr_to_free);
+}
+
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	return mali_mem_os_allocator.pool_count;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask)
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask)
+#endif /* Linux < 2.6.35 */
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#endif /* Linux < 3.12.0 */
+#endif /* Linux < 3.0.0 */
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unsigned long flags;
+	struct list_head *le, pages;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+	int nr = nr_to_scan;
+#else
+	int nr = sc->nr_to_scan;
+#endif
+
+	if (0 == nr) {
+		return mali_mem_os_shrink_count(shrinker, sc);
+	}
+
+	if (0 == spin_trylock_irqsave(&mali_mem_os_allocator.pool_lock, flags)) {
+		/* Not able to lock. */
+		return -1;
+	}
+
+	if (0 == mali_mem_os_allocator.pool_count) {
+		/* No pages availble */
+		spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+		return 0;
+	}
+
+	/* Release from general page pool */
+	nr = min((size_t)nr, mali_mem_os_allocator.pool_count);
+	mali_mem_os_allocator.pool_count -= nr;
+	list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+		--nr;
+		if (0 == nr) break;
+	}
+	list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page_node(m_page);
+	}
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) {
+		/* Pools are empty, stop timer */
+		MALI_DEBUG_PRINT(5, ("Stopping timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	return mali_mem_os_shrink_count(shrinker, sc);
+#else
+	return nr;
+#endif
+}
+
+static void mali_mem_os_trim_pool(struct work_struct *data)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	struct list_head *le;
+	LIST_HEAD(pages);
+	size_t nr_to_free;
+
+	MALI_IGNORE(data);
+
+	MALI_DEBUG_PRINT(3, ("OS Mem: Trimming pool %u\n", mali_mem_os_allocator.pool_count));
+
+	/* Release from general page pool */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		size_t count = mali_mem_os_allocator.pool_count - MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES;
+		const size_t min_to_free = min(64, MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES);
+
+		/* Free half the pages on the pool above the static limit. Or 64 pages, 256KB. */
+		nr_to_free = max(count / 2, min_to_free);
+
+		mali_mem_os_allocator.pool_count -= nr_to_free;
+		list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+			--nr_to_free;
+			if (0 == nr_to_free) break;
+		}
+		list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	}
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page_node(m_page);
+	}
+
+	/* Release some pages from page table page pool */
+	mali_mem_os_trim_page_table_page_pool();
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+}
+
+_mali_osk_errcode_t mali_mem_os_init(void)
+{
+	mali_mem_os_allocator.wq = alloc_workqueue("mali-mem", WQ_UNBOUND, 1);
+	if (NULL == mali_mem_os_allocator.wq) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	mali_dma_attrs = DMA_ATTR_WRITE_COMBINE;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &dma_attrs_wc);
+#endif
+
+	register_shrinker(&mali_mem_os_allocator.shrinker);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_os_term(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unregister_shrinker(&mali_mem_os_allocator.shrinker);
+	cancel_delayed_work_sync(&mali_mem_os_allocator.timed_shrinker);
+
+	if (NULL != mali_mem_os_allocator.wq) {
+		destroy_workqueue(mali_mem_os_allocator.wq);
+		mali_mem_os_allocator.wq = NULL;
+	}
+
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_for_each_entry_safe(m_page, m_tmp, &mali_mem_os_allocator.pool_pages, list) {
+		mali_mem_os_free_page_node(m_page);
+
+		--mali_mem_os_allocator.pool_count;
+	}
+	BUG_ON(mali_mem_os_allocator.pool_count);
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	/* Release from page table page pool */
+	do {
+		u32 nr_to_free;
+
+		spin_lock(&mali_mem_page_table_page_pool.lock);
+
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, mali_mem_page_table_page_pool.count);
+
+		/* Pool lock will be released by the callee. */
+		mali_mem_os_page_table_pool_free(nr_to_free);
+	} while (0 != mali_mem_page_table_page_pool.count);
+}
+
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size)
+{
+	mali_mem_os_allocator.allocation_limit = size;
+
+	MALI_SUCCESS;
+}
+
+u32 mali_mem_os_stat(void)
+{
+	return atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/utgard/r6p2/linux/mali_memory_os_alloc.h b/utgard/r6p2/linux/mali_memory_os_alloc.h
new file mode 100755
index 0000000..b92fffe
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_os_alloc.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_OS_ALLOC_H__
+#define __MALI_MEMORY_OS_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_memory_types.h"
+
+
+/** @brief Release Mali OS memory
+ *
+ * The session memory_lock must be held when calling this function.
+ *
+ * @param mem_bkend Pointer to the mali_mem_backend to release
+ */
+u32 mali_mem_os_release(mali_mem_backend *mem_bkend);
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping);
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt);
+
+_mali_osk_errcode_t mali_mem_os_init(void);
+
+void mali_mem_os_term(void);
+
+u32 mali_mem_os_stat(void);
+
+void mali_mem_os_free_page_node(struct mali_page_node *m_page);
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size);
+
+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag);
+
+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page);
+
+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count);
+
+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props);
+
+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+
+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size);
+
+#endif /* __MALI_MEMORY_OS_ALLOC_H__ */
diff --git a/utgard/r6p2/linux/mali_memory_secure.c b/utgard/r6p2/linux/mali_memory_secure.c
new file mode 100644
index 0000000..7856ae6
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_secure.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_memory.h"
+#include "mali_memory_secure.h"
+#include "mali_osk.h"
+#include <linux/mutex.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-buf.h>
+
+_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd)
+{
+	struct dma_buf *buf;
+	MALI_DEBUG_ASSERT_POINTER(secure_mem);
+
+	/* get dma buffer */
+	buf = dma_buf_get(mem_fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf!\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (size != buf->size) {
+		MALI_DEBUG_PRINT_ERROR(("The secure mem size not match to the dma buf size!\n"));
+		goto failed_alloc_mem;
+	}
+
+	secure_mem->buf =  buf;
+	secure_mem->attachment = dma_buf_attach(secure_mem->buf, &mali_platform_device->dev);
+	if (NULL == secure_mem->attachment) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf attachment!\n"));
+		goto failed_dma_attach;
+	}
+
+	secure_mem->sgt = dma_buf_map_attachment(secure_mem->attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(secure_mem->sgt)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to map dma buf attachment\n"));
+		goto  failed_dma_map;
+	}
+
+	secure_mem->count = size / MALI_MMU_PAGE_SIZE;
+
+	return _MALI_OSK_ERR_OK;
+
+failed_dma_map:
+	dma_buf_detach(secure_mem->buf, secure_mem->attachment);
+failed_dma_attach:
+failed_alloc_mem:
+	dma_buf_put(buf);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir;
+	struct scatterlist *sg;
+	u32 virt = vaddr;
+	u32 prop = props;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(secure_mem);
+	MALI_DEBUG_ASSERT_POINTER(secure_mem->sgt);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	pagedir = session->page_directory;
+
+	for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) {
+		u32 size = sg_dma_len(sg);
+		dma_addr_t phys = sg_dma_address(sg);
+
+		/* sg must be page aligned. */
+		MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE);
+		MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF));
+
+		mali_mmu_pagedir_update(pagedir, virt, phys, size, prop);
+
+		MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x gpu virtual address: 0x%x! \n", phys, virt));
+		virt += size;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+
+	int ret = 0;
+	struct scatterlist *sg;
+	mali_mem_secure *secure_mem = &mem_bkend->secure_mem;
+	unsigned long addr = vma->vm_start;
+	int i;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE);
+
+	for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) {
+		phys_addr_t phys;
+		dma_addr_t dev_addr;
+		u32 size, j;
+		dev_addr = sg_dma_address(sg);
+#if defined(CONFIG_ARM64) ||LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+		phys =  dma_to_phys(&mali_platform_device->dev, dev_addr);
+#else
+		phys = page_to_phys(pfn_to_page(dma_to_pfn(&mali_platform_device->dev, dev_addr)));
+#endif
+		size = sg_dma_len(sg);
+		MALI_DEBUG_ASSERT(0 == size % _MALI_OSK_MALI_PAGE_SIZE);
+
+		for (j = 0; j < size / _MALI_OSK_MALI_PAGE_SIZE; j++) {
+			ret = vm_insert_pfn(vma, addr, PFN_DOWN(phys));
+
+			if (unlikely(0 != ret)) {
+				return -EFAULT;
+			}
+			addr += _MALI_OSK_MALI_PAGE_SIZE;
+			phys += _MALI_OSK_MALI_PAGE_SIZE;
+
+			MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x , cpu virtual address: 0x%x! \n", phys, addr));
+		}
+	}
+	return ret;
+}
+
+u32 mali_mem_secure_release(mali_mem_backend *mem_bkend)
+{
+	struct mali_mem_secure *mem;
+	mali_mem_allocation *alloc = mem_bkend->mali_allocation;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE);
+
+	mem = &mem_bkend->secure_mem;
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_ASSERT_POINTER(mem->sgt);
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_secure_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL);
+	dma_buf_detach(mem->buf, mem->attachment);
+	dma_buf_put(mem->buf);
+	mutex_unlock(&mem_bkend->mutex);
+
+	free_pages_nr = mem->count;
+
+	return free_pages_nr;
+}
+
+
diff --git a/utgard/r6p2/linux/mali_memory_secure.h b/utgard/r6p2/linux/mali_memory_secure.h
new file mode 100644
index 0000000..cb85767
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_secure.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2015-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_SECURE_H__
+#define __MALI_MEMORY_SECURE_H__
+
+#include "mali_session.h"
+#include "mali_memory.h"
+#include <linux/spinlock.h>
+
+#include "mali_memory_types.h"
+
+_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd);
+
+_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+
+void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+
+u32 mali_mem_secure_release(mali_mem_backend *mem_bkend);
+
+#endif /* __MALI_MEMORY_SECURE_H__ */
diff --git a/utgard/r6p2/linux/mali_memory_swap_alloc.c b/utgard/r6p2/linux/mali_memory_swap_alloc.c
new file mode 100644
index 0000000..132bad4
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_swap_alloc.c
@@ -0,0 +1,950 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/idr.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+#include <linux/shmem_fs.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_memory.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_cow.h"
+#include "mali_ukk.h"
+#include "mali_kernel_utilization.h"
+#include "mali_memory_swap_alloc.h"
+
+
+static struct _mali_osk_bitmap idx_mgr;
+static struct file *global_swap_file;
+static struct address_space *global_swap_space;
+static _mali_osk_wq_work_t *mali_mem_swap_out_workq = NULL;
+static u32 mem_backend_swapped_pool_size;
+#ifdef MALI_MEM_SWAP_TRACKING
+static u32 mem_backend_swapped_unlock_size;
+#endif
+/* Lock order: mem_backend_swapped_pool_lock  > each memory backend's mutex lock.
+ * This lock used to protect mem_backend_swapped_pool_size and mem_backend_swapped_pool. */
+static struct mutex mem_backend_swapped_pool_lock;
+static struct list_head mem_backend_swapped_pool;
+
+extern struct mali_mem_os_allocator mali_mem_os_allocator;
+
+#define MALI_SWAP_LOW_MEM_DEFAULT_VALUE (60*1024*1024)
+#define MALI_SWAP_INVALIDATE_MALI_ADDRESS (0)               /* Used to mark the given memory cookie is invalidate. */
+#define MALI_SWAP_GLOBAL_SWAP_FILE_SIZE (0xFFFFFFFF)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_SHIFT)
+#else
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_CACHE_SHIFT)
+#endif
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE (1 << 15) /* Reserved for CoW nonlinear swap backend memory, the space size is 128MB. */
+
+unsigned int mali_mem_swap_out_threshold_value = MALI_SWAP_LOW_MEM_DEFAULT_VALUE;
+
+/**
+ * We have two situations to do shrinking things, one is we met low GPU utilization which shows GPU needn't touch too
+ * swappable backends in short time, and the other one is we add new swappable backends, the total pool size exceed
+ * the threshold value of the swapped pool size.
+ */
+typedef enum {
+	MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION = 100,
+	MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS = 257,
+} _mali_mem_swap_pool_shrink_type_t;
+
+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg);
+
+_mali_osk_errcode_t mali_mem_swap_init(void)
+{
+	gfp_t flags = __GFP_NORETRY | __GFP_NOWARN;
+
+	if (_MALI_OSK_ERR_OK != _mali_osk_bitmap_init(&idx_mgr, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE)) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	global_swap_file = shmem_file_setup("mali_swap", MALI_SWAP_GLOBAL_SWAP_FILE_SIZE, VM_NORESERVE);
+	if (IS_ERR(global_swap_file)) {
+		_mali_osk_bitmap_term(&idx_mgr);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	global_swap_space = global_swap_file->f_path.dentry->d_inode->i_mapping;
+
+	mali_mem_swap_out_workq = _mali_osk_wq_create_work(mali_mem_swap_swapped_bkend_pool_check_for_low_utilization, NULL);
+	if (NULL == mali_mem_swap_out_workq) {
+		_mali_osk_bitmap_term(&idx_mgr);
+		fput(global_swap_file);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+	flags |= GFP_HIGHUSER;
+#else
+#ifdef CONFIG_ZONE_DMA32
+	flags |= GFP_DMA32;
+#else
+#ifdef CONFIG_ZONE_DMA
+	flags |= GFP_DMA;
+#else
+	/* arm64 utgard only work on < 4G, but the kernel
+	 * didn't provide method to allocte memory < 4G
+	 */
+	MALI_DEBUG_ASSERT(0);
+#endif
+#endif
+#endif
+
+	/* When we use shmem_read_mapping_page to allocate/swap-in, it will
+	 * use these flags to allocate new page if need.*/
+	mapping_set_gfp_mask(global_swap_space, flags);
+
+	mem_backend_swapped_pool_size = 0;
+#ifdef MALI_MEM_SWAP_TRACKING
+	mem_backend_swapped_unlock_size = 0;
+#endif
+	mutex_init(&mem_backend_swapped_pool_lock);
+	INIT_LIST_HEAD(&mem_backend_swapped_pool);
+
+	MALI_DEBUG_PRINT(2, ("Mali SWAP: Swap out threshold vaule is %uM\n", mali_mem_swap_out_threshold_value >> 20));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_swap_term(void)
+{
+	_mali_osk_bitmap_term(&idx_mgr);
+
+	fput(global_swap_file);
+
+	_mali_osk_wq_delete_work(mali_mem_swap_out_workq);
+
+	MALI_DEBUG_ASSERT(list_empty(&mem_backend_swapped_pool));
+	MALI_DEBUG_ASSERT(0 == mem_backend_swapped_pool_size);
+
+	return;
+}
+
+struct file *mali_mem_swap_get_global_swap_file(void)
+{
+	return  global_swap_file;
+}
+
+/* Judge if swappable backend in swapped pool. */
+static mali_bool mali_memory_swap_backend_in_swapped_pool(mali_mem_backend *mem_bkend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+
+	return !list_empty(&mem_bkend->list);
+}
+
+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+
+	mutex_lock(&mem_backend_swapped_pool_lock);
+	mutex_lock(&mem_bkend->mutex);
+
+	if (MALI_FALSE == mali_memory_swap_backend_in_swapped_pool(mem_bkend)) {
+		mutex_unlock(&mem_bkend->mutex);
+		mutex_unlock(&mem_backend_swapped_pool_lock);
+		return;
+	}
+
+	MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list));
+
+	list_del_init(&mem_bkend->list);
+
+	mutex_unlock(&mem_bkend->mutex);
+
+	mem_backend_swapped_pool_size -= mem_bkend->size;
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+}
+
+static void mali_mem_swap_out_page_node(mali_page_node *page_node)
+{
+	MALI_DEBUG_ASSERT(page_node);
+
+	dma_unmap_page(&mali_platform_device->dev, page_node->swap_it->dma_addr,
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+	set_page_dirty(page_node->swap_it->page);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+	put_page(page_node->swap_it->page);
+#else
+	page_cache_release(page_node->swap_it->page);
+#endif
+}
+
+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend)
+{
+	mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex));
+
+	if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN)) {
+		return;
+	}
+
+	mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN;
+
+	list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+		mali_mem_swap_out_page_node(m_page);
+	}
+
+	return;
+}
+
+static void mali_mem_swap_unlock_partial_locked_mem_backend(mali_mem_backend *mem_bkend, mali_page_node *page_node)
+{
+	mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex));
+
+	list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+		if (m_page == page_node) {
+			break;
+		}
+		mali_mem_swap_out_page_node(m_page);
+	}
+}
+
+static void mali_mem_swap_swapped_bkend_pool_shrink(_mali_mem_swap_pool_shrink_type_t shrink_type)
+{
+	mali_mem_backend *bkend, *tmp_bkend;
+	long system_free_size;
+	u32 last_gpu_utilization, gpu_utilization_threshold_value, temp_swap_out_threshold_value;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_backend_swapped_pool_lock));
+
+	if (MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION == shrink_type) {
+		/**
+		 * When we met that system memory is very low and Mali locked swappable memory size is less than
+		 * threshold value, and at the same time, GPU load is very low and don't need high performance,
+		 * at this condition, we can unlock more swap memory backend from swapped backends pool.
+		 */
+		gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION;
+		temp_swap_out_threshold_value = (mali_mem_swap_out_threshold_value >> 2);
+	} else {
+		/* When we add swappable memory backends to swapped pool, we need to think that we couldn't
+		* hold too much swappable backends in Mali driver, and also we need considering performance.
+		* So there is a balance for swapping out memory backend, we should follow the following conditions:
+		* 1. Total memory size in global mem backend swapped pool is more than the defined threshold value.
+		* 2. System level free memory size is less than the defined threshold value.
+		* 3. Please note that GPU utilization problem isn't considered in this condition.
+		*/
+		gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS;
+		temp_swap_out_threshold_value = mali_mem_swap_out_threshold_value;
+	}
+
+	/* Get system free pages number. */
+	system_free_size = global_page_state(NR_FREE_PAGES) * PAGE_SIZE;
+	last_gpu_utilization = _mali_ukk_utilization_gp_pp();
+
+	if ((last_gpu_utilization < gpu_utilization_threshold_value)
+	    && (system_free_size < mali_mem_swap_out_threshold_value)
+	    && (mem_backend_swapped_pool_size > temp_swap_out_threshold_value)) {
+		list_for_each_entry_safe(bkend, tmp_bkend, &mem_backend_swapped_pool, list) {
+			if (mem_backend_swapped_pool_size <= temp_swap_out_threshold_value) {
+				break;
+			}
+
+			mutex_lock(&bkend->mutex);
+
+			/* check if backend is in use. */
+			if (0 < bkend->using_count) {
+				mutex_unlock(&bkend->mutex);
+				continue;
+			}
+
+			mali_mem_swap_unlock_single_mem_backend(bkend);
+			list_del_init(&bkend->list);
+			mem_backend_swapped_pool_size -= bkend->size;
+#ifdef MALI_MEM_SWAP_TRACKING
+			mem_backend_swapped_unlock_size += bkend->size;
+#endif
+			mutex_unlock(&bkend->mutex);
+		}
+	}
+
+	return;
+}
+
+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	mutex_lock(&mem_backend_swapped_pool_lock);
+
+	mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION);
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+}
+
+/**
+ * After PP job finished, we add all of swappable memory backend used by this PP
+ * job to the tail of the global swapped pool, and if the total size of swappable memory is more than threshold
+ * value, we also need to shrink the swapped pool start from the head of the list.
+ */
+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend)
+{
+	mutex_lock(&mem_backend_swapped_pool_lock);
+	mutex_lock(&mem_bkend->mutex);
+
+	if (mali_memory_swap_backend_in_swapped_pool(mem_bkend)) {
+		MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list));
+
+		list_del_init(&mem_bkend->list);
+		list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool);
+		mutex_unlock(&mem_bkend->mutex);
+		mutex_unlock(&mem_backend_swapped_pool_lock);
+		return;
+	}
+
+	list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool);
+
+	mutex_unlock(&mem_bkend->mutex);
+	mem_backend_swapped_pool_size += mem_bkend->size;
+
+	mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS);
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+	return;
+}
+
+
+u32 mali_mem_swap_idx_alloc(void)
+{
+	return _mali_osk_bitmap_alloc(&idx_mgr);
+}
+
+void mali_mem_swap_idx_free(u32 idx)
+{
+	_mali_osk_bitmap_free(&idx_mgr, idx);
+}
+
+static u32 mali_mem_swap_idx_range_alloc(u32 count)
+{
+	u32 index;
+
+	index = _mali_osk_bitmap_alloc_range(&idx_mgr, count);
+
+	return index;
+}
+
+static void mali_mem_swap_idx_range_free(u32 idx, int num)
+{
+	_mali_osk_bitmap_free_range(&idx_mgr, idx, num);
+}
+
+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void)
+{
+	mali_swap_item *swap_item;
+
+	swap_item = kzalloc(sizeof(mali_swap_item), GFP_KERNEL);
+
+	if (NULL == swap_item) {
+		return NULL;
+	}
+
+	atomic_set(&swap_item->ref_count, 1);
+	swap_item->page = NULL;
+	atomic_add(1, &mali_mem_os_allocator.allocated_pages);
+
+	return swap_item;
+}
+
+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item)
+{
+	struct inode *file_node;
+	long long start, end;
+
+	/* If this swap item is shared, we just reduce the reference counter. */
+	if (0 == atomic_dec_return(&swap_item->ref_count)) {
+		file_node = global_swap_file->f_path.dentry->d_inode;
+		start = swap_item->idx;
+		start = start << 12;
+		end = start + PAGE_SIZE;
+
+		shmem_truncate_range(file_node, start, (end - 1));
+
+		mali_mem_swap_idx_free(swap_item->idx);
+
+		atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+
+		kfree(swap_item);
+	}
+}
+
+/* Used to allocate new swap item for new memory allocation and cow page for write. */
+struct mali_page_node *_mali_mem_swap_page_node_allocate(void)
+{
+	struct mali_page_node *m_page;
+
+	m_page = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP);
+
+	if (NULL == m_page) {
+		return NULL;
+	}
+
+	m_page->swap_it = mali_mem_swap_alloc_swap_item();
+
+	if (NULL == m_page->swap_it) {
+		kfree(m_page);
+		return NULL;
+	}
+
+	return m_page;
+}
+
+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page)
+{
+
+	mali_mem_swap_free_swap_item(m_page->swap_it);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page)
+{
+	_mali_mem_swap_put_page_node(m_page);
+
+	kfree(m_page);
+
+	return;
+}
+
+u32 mali_mem_swap_free(mali_mem_swap *swap_mem)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(swap_mem);
+
+	list_for_each_entry_safe(m_page, m_tmp, &swap_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP);
+
+		/* free the page node and release the swap item, if the ref count is 1,
+		 * then need also free the swap item. */
+		list_del(&m_page->list);
+		if (1 == _mali_page_node_get_ref_count(m_page)) {
+			free_pages_nr++;
+		}
+
+		_mali_mem_swap_page_node_free(m_page);
+	}
+
+	return free_pages_nr;
+}
+
+static u32 mali_mem_swap_cow_free(mali_mem_cow *cow_mem)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(cow_mem);
+
+	list_for_each_entry_safe(m_page, m_tmp, &cow_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP);
+
+		/* free the page node and release the swap item, if the ref count is 1,
+		 * then need also free the swap item. */
+		list_del(&m_page->list);
+		if (1 == _mali_page_node_get_ref_count(m_page)) {
+			free_pages_nr++;
+		}
+
+		_mali_mem_swap_page_node_free(m_page);
+	}
+
+	return free_pages_nr;
+}
+
+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped)
+{
+	mali_mem_allocation *alloc;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	if (is_mali_mapped) {
+		mali_mem_swap_mali_unmap(alloc);
+	}
+
+	mali_memory_swap_list_backend_delete(mem_bkend);
+
+	mutex_lock(&mem_bkend->mutex);
+	/* To make sure the given memory backend was unlocked from Mali side,
+	 * and then free this memory block. */
+	mali_mem_swap_unlock_single_mem_backend(mem_bkend);
+	mutex_unlock(&mem_bkend->mutex);
+
+	if (MALI_MEM_SWAP == mem_bkend->type) {
+		free_pages_nr = mali_mem_swap_free(&mem_bkend->swap_mem);
+	} else {
+		free_pages_nr = mali_mem_swap_cow_free(&mem_bkend->cow_mem);
+	}
+
+	return free_pages_nr;
+}
+
+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node)
+{
+	MALI_DEBUG_ASSERT(NULL != page_node);
+
+	page_node->swap_it->page = shmem_read_mapping_page(global_swap_space, page_node->swap_it->idx);
+
+	if (IS_ERR(page_node->swap_it->page)) {
+		MALI_DEBUG_PRINT_ERROR(("SWAP Mem: failed to swap in page with index: %d.\n", page_node->swap_it->idx));
+		return MALI_FALSE;
+	}
+
+	/* Ensure page is flushed from CPU caches. */
+	page_node->swap_it->dma_addr = dma_map_page(&mali_platform_device->dev, page_node->swap_it->page,
+				       0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	return MALI_TRUE;
+}
+
+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx)
+{
+	size_t page_count = PAGE_ALIGN(size) / PAGE_SIZE;
+	struct mali_page_node *m_page;
+	long system_free_size;
+	u32 i, index;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT(NULL != swap_mem);
+	MALI_DEBUG_ASSERT(NULL != bkend_idx);
+	MALI_DEBUG_ASSERT(page_count <= MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	INIT_LIST_HEAD(&swap_mem->pages);
+	swap_mem->count = page_count;
+	index = mali_mem_swap_idx_range_alloc(page_count);
+
+	if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == index) {
+		MALI_PRINT_ERROR(("Mali Swap: Failed to allocate continuous index for swappable Mali memory."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	for (i = 0; i < page_count; i++) {
+		m_page = _mali_mem_swap_page_node_allocate();
+
+		if (NULL == m_page) {
+			MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Failed to allocate mali page node."));
+			swap_mem->count = i;
+
+			mali_mem_swap_free(swap_mem);
+			mali_mem_swap_idx_range_free(index + i, page_count - i);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		m_page->swap_it->idx = index + i;
+
+		ret = mali_mem_swap_in_page_node(m_page);
+
+		if (MALI_FALSE == ret) {
+			MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Allocate new page from SHMEM file failed."));
+			_mali_mem_swap_page_node_free(m_page);
+			mali_mem_swap_idx_range_free(index + i + 1, page_count - i - 1);
+
+			swap_mem->count = i;
+			mali_mem_swap_free(swap_mem);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+
+		list_add_tail(&m_page->list, &swap_mem->pages);
+	}
+
+	system_free_size = global_page_state(NR_FREE_PAGES) * PAGE_SIZE;
+
+	if ((system_free_size < mali_mem_swap_out_threshold_value)
+	    && (mem_backend_swapped_pool_size > (mali_mem_swap_out_threshold_value >> 2))
+	    && mali_utilization_enabled()) {
+		_mali_osk_wq_schedule_work(mali_mem_swap_out_workq);
+	}
+
+	*bkend_idx = index;
+	return 0;
+}
+
+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+/* Insert these pages from shmem to mali page table*/
+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &swap_mem->pages, list) {
+		MALI_DEBUG_ASSERT(NULL != m_page->swap_it->page);
+		phys = m_page->swap_it->dma_addr;
+
+		mali_mmu_pagedir_update(pagedir, virt, phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_in_pages(struct mali_pp_job *job)
+{
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	struct mali_page_node *m_page;
+	mali_bool swap_in_success = MALI_TRUE;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		if (NULL == mali_vma_node) {
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			swap_in_success = MALI_FALSE;
+			MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr));
+			continue;
+		}
+
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+
+		if (MALI_MEM_SWAP != mali_alloc->type &&
+		    MALI_MEM_COW != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on GPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		/* We neednot hold backend's lock here, race safe.*/
+		if ((MALI_MEM_COW == mem_bkend->type) &&
+		    (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+			continue;
+		}
+
+		mutex_lock(&mem_bkend->mutex);
+
+		/* When swap_in_success is MALI_FALSE, it means this job has memory backend that could not be swapped in,
+		 * and it will be aborted in mali scheduler, so here, we just mark those memory cookies which
+		 * should not be swapped out when delete job to invalide */
+		if (MALI_FALSE == swap_in_success) {
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+		/* Before swap in, checking if this memory backend has been swapped in by the latest flushed jobs. */
+		++mem_bkend->using_count;
+
+		if (1 < mem_bkend->using_count) {
+			MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags));
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+		if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)) {
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+
+		list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+			if (MALI_FALSE == mali_mem_swap_in_page_node(m_page)) {
+				/* Don't have enough memory to swap in page, so release pages have already been swapped
+				 * in and then mark this pp job to be fail. */
+				mali_mem_swap_unlock_partial_locked_mem_backend(mem_bkend, m_page);
+				swap_in_success = MALI_FALSE;
+				break;
+			}
+		}
+
+		if (swap_in_success) {
+#ifdef MALI_MEM_SWAP_TRACKING
+			mem_backend_swapped_unlock_size -= mem_bkend->size;
+#endif
+			_mali_osk_mutex_wait(session->memory_lock);
+			mali_mem_swap_mali_map(&mem_bkend->swap_mem, session, mali_alloc->mali_mapping.addr, mali_alloc->mali_mapping.properties);
+			_mali_osk_mutex_signal(session->memory_lock);
+
+			/* Remove the unlock flag from mem backend flags, mark this backend has been swapped in. */
+			mem_bkend->flags &= ~(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN);
+			mutex_unlock(&mem_bkend->mutex);
+		} else {
+			--mem_bkend->using_count;
+			/* Marking that this backend is not swapped in, need not to be processed anymore. */
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			mutex_unlock(&mem_bkend->mutex);
+		}
+	}
+
+	job->swap_status = swap_in_success ? MALI_SWAP_IN_SUCC : MALI_SWAP_IN_FAIL;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_out_pages(struct mali_pp_job *job)
+{
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		if (MALI_SWAP_INVALIDATE_MALI_ADDRESS == mali_addr) {
+			continue;
+		}
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+
+		if (NULL == mali_vma_node) {
+			MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr));
+			continue;
+		}
+
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+
+		if (MALI_MEM_SWAP != mali_alloc->type &&
+		    MALI_MEM_COW != mali_alloc->type) {
+			continue;
+		}
+
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		/* We neednot hold backend's lock here, race safe.*/
+		if ((MALI_MEM_COW == mem_bkend->type) &&
+		    (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+			continue;
+		}
+
+		mutex_lock(&mem_bkend->mutex);
+
+		MALI_DEBUG_ASSERT(0 < mem_bkend->using_count);
+
+		/* Reducing the using_count of mem backend means less pp job are using this memory backend,
+		 * if this count get to zero, it means no pp job is using it now, could put it to swap out list. */
+		--mem_bkend->using_count;
+
+		if (0 < mem_bkend->using_count) {
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+		mutex_unlock(&mem_bkend->mutex);
+
+		mali_memory_swap_list_backend_add(mem_bkend);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep)
+{
+	struct mali_page_node *m_page, *found_node = NULL;
+	struct page *found_page;
+	mali_mem_swap *swap = NULL;
+	mali_mem_cow *cow = NULL;
+	dma_addr_t dma_addr;
+	u32 i = 0;
+
+	if (MALI_MEM_SWAP == mem_bkend->type) {
+		swap = &mem_bkend->swap_mem;
+		list_for_each_entry(m_page, &swap->pages, list) {
+			if (i == offset) {
+				found_node = m_page;
+				break;
+			}
+			i++;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+		MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags));
+
+		cow = &mem_bkend->cow_mem;
+		list_for_each_entry(m_page, &cow->pages, list) {
+			if (i == offset) {
+				found_node = m_page;
+				break;
+			}
+			i++;
+		}
+	}
+
+	if (NULL == found_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	found_page = shmem_read_mapping_page(global_swap_space, found_node->swap_it->idx);
+
+	if (!IS_ERR(found_page)) {
+		lock_page(found_page);
+		dma_addr = dma_map_page(&mali_platform_device->dev, found_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		dma_unmap_page(&mali_platform_device->dev, dma_addr,
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+		*pagep = found_page;
+	} else {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep)
+{
+	struct mali_page_node *m_page, *found_node = NULL, *new_node = NULL;
+	mali_mem_cow *cow = NULL;
+	u32 i = 0;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED));
+	MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags));
+	MALI_DEBUG_ASSERT(!mali_memory_swap_backend_in_swapped_pool(mem_bkend));
+
+	cow = &mem_bkend->cow_mem;
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if (i == offset) {
+			found_node = m_page;
+			break;
+		}
+		i++;
+	}
+
+	if (NULL == found_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	new_node = _mali_mem_swap_page_node_allocate();
+
+	if (NULL == new_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	new_node->swap_it->idx = mali_mem_swap_idx_alloc();
+
+	if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == new_node->swap_it->idx) {
+		MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW on demand.\n"));
+		kfree(new_node->swap_it);
+		kfree(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (MALI_FALSE == mali_mem_swap_in_page_node(new_node)) {
+		_mali_mem_swap_page_node_free(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* swap in found node for copy in kernel. */
+	if (MALI_FALSE == mali_mem_swap_in_page_node(found_node)) {
+		mali_mem_swap_out_page_node(new_node);
+		_mali_mem_swap_page_node_free(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_mem_cow_copy_page(found_node, new_node);
+
+	list_replace(&found_node->list, &new_node->list);
+
+	if (1 != _mali_page_node_get_ref_count(found_node)) {
+		atomic_add(1, &mem_bkend->mali_allocation->session->mali_mem_allocated_pages);
+		if (atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > mem_bkend->mali_allocation->session->max_mali_mem_allocated_size) {
+			mem_bkend->mali_allocation->session->max_mali_mem_allocated_size = atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		mem_bkend->cow_mem.change_pages_nr++;
+	}
+
+	mali_mem_swap_out_page_node(found_node);
+	_mali_mem_swap_page_node_free(found_node);
+
+	/* When swap in the new page node, we have called dma_map_page for this page.\n */
+	dma_unmap_page(&mali_platform_device->dev, new_node->swap_it->dma_addr,
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	lock_page(new_node->swap_it->page);
+
+	*pagep = new_node->swap_it->page;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+#ifdef MALI_MEM_SWAP_TRACKING
+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size)
+{
+	*swap_pool_size = mem_backend_swapped_pool_size;
+	*unlock_size =  mem_backend_swapped_unlock_size;
+}
+#endif
diff --git a/utgard/r6p2/linux/mali_memory_swap_alloc.h b/utgard/r6p2/linux/mali_memory_swap_alloc.h
new file mode 100644
index 0000000..3624d71
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_swap_alloc.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_SWAP_ALLOC_H__
+#define __MALI_MEMORY_SWAP_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include "mali_memory_types.h"
+#include "mali_pp_job.h"
+
+/**
+ * Initialize memory swapping module.
+ */
+_mali_osk_errcode_t mali_mem_swap_init(void);
+
+void mali_mem_swap_term(void);
+
+/**
+ * Return global share memory file to other modules.
+ */
+struct file *mali_mem_swap_get_global_swap_file(void);
+
+/**
+ * Unlock the given memory backend and pages in it could be swapped out by kernel.
+ */
+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend);
+
+/**
+ * Remove the given memory backend from global swap list.
+ */
+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend);
+
+/**
+ * Add the given memory backend to global swap list.
+ */
+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend);
+
+/**
+ * Allocate 1 index from bitmap used as page index in global swap file.
+ */
+u32 mali_mem_swap_idx_alloc(void);
+
+void mali_mem_swap_idx_free(u32 idx);
+
+/**
+ * Allocate a new swap item without page index.
+ */
+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void);
+
+/**
+ * Free a swap item, truncate the corresponding space in page cache and free index of page.
+ */
+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item);
+
+/**
+ * Allocate a page node with swap item.
+ */
+struct mali_page_node *_mali_mem_swap_page_node_allocate(void);
+
+/**
+ * Reduce the reference count of given page node and if return 0, just free this page node.
+ */
+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page);
+
+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page);
+
+/**
+ * Free a swappable memory backend.
+ */
+u32 mali_mem_swap_free(mali_mem_swap *swap_mem);
+
+/**
+ * Ummap and free.
+ */
+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped);
+
+/**
+ * Read in a page from global swap file with the pre-allcated page index.
+ */
+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node);
+
+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx);
+
+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+
+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc);
+
+/**
+ * When pp job created, we need swap in all of memory backend needed by this pp job.
+ */
+int mali_mem_swap_in_pages(struct mali_pp_job *job);
+
+/**
+ * Put all of memory backends used this pp job to the global swap list.
+ */
+int mali_mem_swap_out_pages(struct mali_pp_job *job);
+
+/**
+ * This will be called in page fault to process CPU read&write.
+ */
+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep) ;
+
+/**
+ * Used to process cow on demand for swappable memory backend.
+ */
+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep);
+
+#ifdef MALI_MEM_SWAP_TRACKING
+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size);
+#endif
+#endif /* __MALI_MEMORY_SWAP_ALLOC_H__ */
+
diff --git a/utgard/r6p2/linux/mali_memory_types.h b/utgard/r6p2/linux/mali_memory_types.h
new file mode 100755
index 0000000..60cd9f4
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_types.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_TYPES_H__
+#define __MALI_MEMORY_TYPES_H__
+
+#include <linux/mm.h>
+
+#if defined(CONFIG_MALI400_UMP)
+#include "ump_kernel_interface.h"
+#endif
+
+typedef u32 mali_address_t;
+
+typedef enum mali_mem_type {
+	MALI_MEM_OS,
+	MALI_MEM_EXTERNAL,
+	MALI_MEM_SWAP,
+	MALI_MEM_DMA_BUF,
+	MALI_MEM_UMP,
+	MALI_MEM_BLOCK,
+	MALI_MEM_COW,
+	MALI_MEM_SECURE,
+	MALI_MEM_TYPE_MAX,
+} mali_mem_type;
+
+typedef struct mali_block_item {
+	/* for block type, the block_phy is alway page size align
+	* so use low 12bit used for ref_cout.
+	*/
+	unsigned long phy_addr;
+} mali_block_item;
+
+/**
+ * idx is used to locate the given page in the address space of swap file.
+ * ref_count is used to mark how many memory backends are using this item.
+ */
+typedef struct mali_swap_item {
+	u32 idx;
+	atomic_t ref_count;
+	struct page *page;
+	dma_addr_t dma_addr;
+} mali_swap_item;
+
+typedef enum mali_page_node_type {
+	MALI_PAGE_NODE_OS,
+	MALI_PAGE_NODE_BLOCK,
+	MALI_PAGE_NODE_SWAP,
+} mali_page_node_type;
+
+typedef struct mali_page_node {
+	struct list_head list;
+	union {
+		struct page *page;
+		mali_block_item *blk_it; /*pointer to block item*/
+		mali_swap_item *swap_it;
+	};
+
+	u32 type;
+} mali_page_node;
+
+typedef struct mali_mem_os_mem {
+	struct list_head pages;
+	u32 count;
+} mali_mem_os_mem;
+
+typedef struct mali_mem_dma_buf {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+	struct mali_dma_buf_attachment *attachment;
+#endif
+} mali_mem_dma_buf;
+
+typedef struct mali_mem_external {
+	dma_addr_t phys;
+	u32 size;
+} mali_mem_external;
+
+typedef struct mali_mem_ump {
+#if defined(CONFIG_MALI400_UMP)
+	ump_dd_handle handle;
+#endif
+} mali_mem_ump;
+
+typedef struct block_allocator_allocation {
+	/* The list will be released in reverse order */
+	struct block_info *last_allocated;
+	u32 mapping_length;
+	struct block_allocator *info;
+} block_allocator_allocation;
+
+typedef struct mali_mem_block_mem {
+	struct list_head pfns;
+	u32 count;
+} mali_mem_block_mem;
+
+typedef struct mali_mem_virt_mali_mapping {
+	mali_address_t addr; /* Virtual Mali address */
+	u32 properties;      /* MMU Permissions + cache, must match MMU HW */
+} mali_mem_virt_mali_mapping;
+
+typedef struct mali_mem_virt_cpu_mapping {
+	void __user *addr;
+	struct vm_area_struct *vma;
+} mali_mem_virt_cpu_mapping;
+
+#define MALI_MEM_ALLOCATION_VALID_MAGIC 0xdeda110c
+#define MALI_MEM_ALLOCATION_FREED_MAGIC 0x10101010
+
+typedef struct mali_mm_node {
+	/* MALI GPU vaddr start, use u32 for mmu only support 32bit address*/
+	uint32_t start; /* GPU vaddr */
+	uint32_t size;  /* GPU allocation virtual size */
+	unsigned allocated : 1;
+} mali_mm_node;
+
+typedef struct mali_vma_node {
+	struct mali_mm_node vm_node;
+	struct rb_node vm_rb;
+} mali_vma_node;
+
+
+typedef struct mali_mem_allocation {
+	MALI_DEBUG_CODE(u32 magic);
+	mali_mem_type type;                /**< Type of memory */
+	u32 flags;                         /**< Flags for this allocation */
+
+	struct mali_session_data *session; /**< Pointer to session that owns the allocation */
+
+	mali_mem_virt_cpu_mapping cpu_mapping; /**< CPU mapping */
+	mali_mem_virt_mali_mapping mali_mapping; /**< Mali mapping */
+
+	/* add for new memory system */
+	struct mali_vma_node mali_vma_node;
+	u32 vsize; /* virtual size*/
+	u32 psize; /* physical backend memory size*/
+	struct list_head list;
+	s32 backend_handle; /* idr for mem_backend */
+	_mali_osk_atomic_t mem_alloc_refcount;
+} mali_mem_allocation;
+
+struct mali_mem_os_allocator {
+	spinlock_t pool_lock;
+	struct list_head pool_pages;
+	size_t pool_count;
+
+	atomic_t allocated_pages;
+	size_t allocation_limit;
+
+	struct shrinker shrinker;
+	struct delayed_work timed_shrinker;
+	struct workqueue_struct *wq;
+};
+
+/* COW backend memory type */
+typedef struct mali_mem_cow {
+	struct list_head pages;  /**< all pages for this cow backend allocation,
+                                                                including new allocated pages for modified range*/
+	u32 count;               /**< number of pages */
+	s32 change_pages_nr;
+} mali_mem_cow;
+
+typedef struct mali_mem_swap {
+	struct list_head pages;
+	u32 count;
+} mali_mem_swap;
+
+typedef struct mali_mem_secure {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+	struct dma_buf *buf;
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+#endif
+	u32 count;
+} mali_mem_secure;
+
+#define MALI_MEM_BACKEND_FLAG_COWED                   (0x1)  /* COW has happen on this backend */
+#define MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE        (0x2)  /* This is an COW backend, mapped as not allowed cpu to write */
+#define MALI_MEM_BACKEND_FLAG_SWAP_COWED              (0x4)  /* Mark the given backend is cowed from swappable memory. */
+/* Mark this backend is not swapped_in in MALI driver, and before using it,
+ * we should swap it in and set up corresponding page table. */
+#define MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN            (0x8)
+#define MALI_MEM_BACKEND_FLAG_NOT_BINDED              (0x1 << 5) /* this backend it not back with physical memory, used for defer bind */
+#define MALI_MEM_BACKEND_FLAG_BINDED              (0x1 << 6) /* this backend it back with physical memory, used for defer bind */
+
+typedef struct mali_mem_backend {
+	mali_mem_type type;                /**< Type of backend memory */
+	u32 flags;                         /**< Flags for this allocation */
+	u32 size;
+	/* Union selected by type. */
+	union {
+		mali_mem_os_mem os_mem;       /**< MALI_MEM_OS */
+		mali_mem_external ext_mem;    /**< MALI_MEM_EXTERNAL */
+		mali_mem_dma_buf dma_buf;     /**< MALI_MEM_DMA_BUF */
+		mali_mem_ump ump_mem;         /**< MALI_MEM_UMP */
+		mali_mem_block_mem block_mem; /**< MALI_MEM_BLOCK */
+		mali_mem_cow cow_mem;
+		mali_mem_swap swap_mem;
+		mali_mem_secure secure_mem;
+	};
+	mali_mem_allocation *mali_allocation;
+	struct mutex mutex;
+	mali_mem_type cow_type;
+
+	struct list_head list;           /**< Used to link swappable memory backend to the global swappable list */
+	int using_count;                 /**< Mark how many PP jobs are using this memory backend */
+	u32 start_idx;                   /**< If the correspondign vma of this backend is linear, this value will be used to set vma->vm_pgoff */
+} mali_mem_backend;
+
+#define MALI_MEM_FLAG_MALI_GUARD_PAGE (_MALI_MAP_EXTERNAL_MAP_GUARD_PAGE)
+#define MALI_MEM_FLAG_DONT_CPU_MAP    (1 << 1)
+#define MALI_MEM_FLAG_CAN_RESIZE  (_MALI_MEMORY_ALLOCATE_RESIZEABLE)
+#endif /* __MALI_MEMORY_TYPES__ */
diff --git a/utgard/r6p2/linux/mali_memory_ump.c b/utgard/r6p2/linux/mali_memory_ump.c
new file mode 100755
index 0000000..72bef08
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_ump.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory.h"
+#include "ump_kernel_interface.h"
+
+static int mali_mem_ump_map(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 nr_blocks;
+	u32 i;
+	ump_dd_physical_block *ump_blocks;
+	struct mali_page_directory *pagedir;
+	u32 offset = 0;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem);
+
+	nr_blocks = ump_dd_phys_block_count_get(ump_mem);
+	if (nr_blocks == 0) {
+		MALI_DEBUG_PRINT(1, ("No block count\n"));
+		return -EINVAL;
+	}
+
+	ump_blocks = _mali_osk_malloc(sizeof(*ump_blocks) * nr_blocks);
+	if (NULL == ump_blocks) {
+		return -ENOMEM;
+	}
+
+	if (UMP_DD_INVALID == ump_dd_phys_blocks_get(ump_mem, ump_blocks, nr_blocks)) {
+		_mali_osk_free(ump_blocks);
+		return -EFAULT;
+	}
+
+	pagedir = session->page_directory;
+
+	mali_session_memory_lock(session);
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (_MALI_OSK_ERR_OK != err) {
+		MALI_DEBUG_PRINT(1, ("Mapping of UMP memory failed\n"));
+
+		_mali_osk_free(ump_blocks);
+		mali_session_memory_unlock(session);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_blocks; ++i) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		MALI_DEBUG_PRINT(7, ("Mapping in 0x%08x size %d\n", ump_blocks[i].addr , ump_blocks[i].size));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[i].addr,
+					ump_blocks[i].size, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += ump_blocks[i].size;
+	}
+
+	if (alloc->flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		/* Map in an extra virtual guard page at the end of the VMA */
+		MALI_DEBUG_PRINT(6, ("Mapping in extra guard page\n"));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[0].addr, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+	mali_session_memory_unlock(session);
+	_mali_osk_free(ump_blocks);
+	return 0;
+}
+
+static void mali_mem_ump_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags)
+{
+	ump_dd_handle ump_mem;
+	int ret;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map ump memory with secure id %d into virtual memory 0x%08X, size 0x%08X\n",
+			  secure_id, alloc->mali_vma_node.vm_node.start, alloc->mali_vma_node.vm_node.size));
+
+	ump_mem = ump_dd_handle_create_from_secure_id(secure_id);
+	if (UMP_DD_HANDLE_INVALID == ump_mem) MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mem_backend->ump_mem.handle = ump_mem;
+
+	ret = mali_mem_ump_map(mem_backend);
+	if (0 != ret) {
+		ump_dd_reference_release(ump_mem);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	MALI_DEBUG_PRINT(3, ("Returning from UMP bind\n"));
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	mali_mem_ump_unmap(alloc);
+	ump_dd_reference_release(ump_mem);
+}
+
diff --git a/utgard/r6p2/linux/mali_memory_ump.h b/utgard/r6p2/linux/mali_memory_ump.h
new file mode 100644
index 0000000..23f59ae
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_ump.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_UMP_BUF_H__
+#define __MALI_MEMORY_UMP_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags);
+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/utgard/r6p2/linux/mali_memory_util.c b/utgard/r6p2/linux/mali_memory_util.c
new file mode 100755
index 0000000..4fbb757
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_util.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_secure.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_external.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_swap_alloc.h"
+
+
+
+/**
+*function @_mali_free_allocation_mem - free a memory allocation
+*/
+static u32 _mali_free_allocation_mem(mali_mem_allocation *mali_alloc)
+{
+	mali_mem_backend *mem_bkend = NULL;
+	u32 free_pages_nr = 0;
+
+	struct mali_session_data *session = mali_alloc->session;
+	MALI_DEBUG_PRINT(4, (" _mali_free_allocation_mem, psize =0x%x! \n", mali_alloc->psize));
+	if (0 == mali_alloc->psize)
+		goto out;
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+	switch (mem_bkend->type) {
+	case MALI_MEM_OS:
+		free_pages_nr = mali_mem_os_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+	case MALI_MEM_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_mem_unbind_ump_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+#else
+		MALI_DEBUG_PRINT(1, ("UMP not supported\n"));
+#endif
+		break;
+	case MALI_MEM_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_mem_unbind_dma_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported\n"));
+#endif
+		break;
+	case MALI_MEM_EXTERNAL:
+		mali_mem_unbind_ext_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+		break;
+
+	case MALI_MEM_BLOCK:
+		free_pages_nr = mali_mem_block_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+
+	case MALI_MEM_COW:
+		if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) {
+			free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE);
+		} else {
+			free_pages_nr = mali_mem_cow_release(mem_bkend, MALI_TRUE);
+		}
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+	case MALI_MEM_SWAP:
+		free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		atomic_sub(free_pages_nr, &session->mali_mem_array[mem_bkend->type]);
+		break;
+	case MALI_MEM_SECURE:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		free_pages_nr = mali_mem_secure_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n"));
+#endif
+		break;
+	default:
+		MALI_DEBUG_PRINT(1, ("mem type %d is not in the mali_mem_type enum.\n", mem_bkend->type));
+		break;
+	}
+
+	/*Remove backend memory idex */
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_bkend);
+out:
+	/* remove memory allocation  */
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_alloc->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_alloc);
+	return free_pages_nr;
+}
+
+/**
+*  ref_count for allocation
+*/
+u32 mali_allocation_unref(struct mali_mem_allocation **alloc)
+{
+	u32 free_pages_nr = 0;
+	mali_mem_allocation *mali_alloc = *alloc;
+	*alloc = NULL;
+	if (0 == _mali_osk_atomic_dec_return(&mali_alloc->mem_alloc_refcount)) {
+		free_pages_nr = _mali_free_allocation_mem(mali_alloc);
+	}
+	return free_pages_nr;
+}
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc)
+{
+	_mali_osk_atomic_inc(&alloc->mem_alloc_refcount);
+}
+
+void mali_free_session_allocations(struct mali_session_data *session)
+{
+	struct mali_mem_allocation *entry, *next;
+
+	MALI_DEBUG_PRINT(4, (" mali_free_session_allocations! \n"));
+
+	list_for_each_entry_safe(entry, next, &session->allocation_mgr.head, list) {
+		mali_allocation_unref(&entry);
+	}
+}
diff --git a/utgard/r6p2/linux/mali_memory_util.h b/utgard/r6p2/linux/mali_memory_util.h
new file mode 100644
index 0000000..0a23b77
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_util.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef __MALI_MEMORY_UTIL_H__
+#define __MALI_MEMORY_UTIL_H__
+u32 mali_allocation_unref(struct mali_mem_allocation **alloc);
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc);
+
+void mali_free_session_allocations(struct mali_session_data *session);
+
+#endif
diff --git a/utgard/r6p2/linux/mali_memory_virtual.c b/utgard/r6p2/linux/mali_memory_virtual.c
new file mode 100644
index 0000000..a217e43
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_virtual.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+
+
+/**
+*internal helper to link node into the rb-tree
+*/
+static inline void _mali_vma_offset_add_rb(struct mali_allocation_manager *mgr,
+		struct mali_vma_node *node)
+{
+	struct rb_node **iter = &mgr->allocation_mgr_rb.rb_node;
+	struct rb_node *parent = NULL;
+	struct mali_vma_node *iter_node;
+
+	while (likely(*iter)) {
+		parent = *iter;
+		iter_node = rb_entry(*iter, struct mali_vma_node, vm_rb);
+
+		if (node->vm_node.start < iter_node->vm_node.start)
+			iter = &(*iter)->rb_left;
+		else if (node->vm_node.start > iter_node->vm_node.start)
+			iter = &(*iter)->rb_right;
+		else
+			MALI_DEBUG_ASSERT(0);
+	}
+
+	rb_link_node(&node->vm_rb, parent, iter);
+	rb_insert_color(&node->vm_rb, &mgr->allocation_mgr_rb);
+}
+
+/**
+ * mali_vma_offset_add() - Add offset node to RB Tree
+ */
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node)
+{
+	int ret = 0;
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		goto out;
+	}
+
+	_mali_vma_offset_add_rb(mgr, node);
+	/* set to allocated */
+	node->vm_node.allocated = 1;
+
+out:
+	write_unlock(&mgr->vm_lock);
+	return ret;
+}
+
+/**
+ * mali_vma_offset_remove() - Remove offset node from RB tree
+ */
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node)
+{
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		rb_erase(&node->vm_rb, &mgr->allocation_mgr_rb);
+		memset(&node->vm_node, 0, sizeof(node->vm_node));
+	}
+	write_unlock(&mgr->vm_lock);
+}
+
+/**
+* mali_vma_offset_search - Search the node in RB tree
+*/
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start, unsigned long pages)
+{
+	struct mali_vma_node *node, *best;
+	struct rb_node *iter;
+	unsigned long offset;
+	read_lock(&mgr->vm_lock);
+
+	iter = mgr->allocation_mgr_rb.rb_node;
+	best = NULL;
+
+	while (likely(iter)) {
+		node = rb_entry(iter, struct mali_vma_node, vm_rb);
+		offset = node->vm_node.start;
+		if (start >= offset) {
+			iter = iter->rb_right;
+			best = node;
+			if (start == offset)
+				break;
+		} else {
+			iter = iter->rb_left;
+		}
+	}
+
+	if (best) {
+		offset = best->vm_node.start + best->vm_node.size;
+		if (offset <= start + pages)
+			best = NULL;
+	}
+	read_unlock(&mgr->vm_lock);
+
+	return best;
+}
+
diff --git a/utgard/r6p2/linux/mali_memory_virtual.h b/utgard/r6p2/linux/mali_memory_virtual.h
new file mode 100644
index 0000000..2a70cbb
--- /dev/null
+++ b/utgard/r6p2/linux/mali_memory_virtual.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_GPU_VMEM_H__
+#define __MALI_GPU_VMEM_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+
+
+
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node);
+
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node);
+
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start,    unsigned long pages);
+
+#endif
diff --git a/utgard/r6p2/linux/mali_osk_atomics.c b/utgard/r6p2/linux/mali_osk_atomics.c
new file mode 100755
index 0000000..03f4421
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_atomics.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_atomics.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <asm/atomic.h>
+#include "mali_kernel_common.h"
+
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom)
+{
+	atomic_dec((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_dec_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom)
+{
+	atomic_inc((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_inc_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val)
+{
+	MALI_DEBUG_ASSERT_POINTER(atom);
+	atomic_set((atomic_t *)&atom->u.val, val);
+}
+
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom)
+{
+	return atomic_read((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom)
+{
+	MALI_IGNORE(atom);
+}
+
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val)
+{
+	return atomic_xchg((atomic_t *)&atom->u.val, val);
+}
diff --git a/utgard/r6p2/linux/mali_osk_bitmap.c b/utgard/r6p2/linux/mali_osk_bitmap.c
new file mode 100644
index 0000000..67cc7e4
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_bitmap.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2010, 2013-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitmap.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/bitmap.h>
+#include <linux/vmalloc.h>
+#include "common/mali_kernel_common.h"
+#include "mali_osk_types.h"
+#include "mali_osk.h"
+
+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap)
+{
+	u32 obj;
+
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+
+	obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->reserve);
+
+	if (obj < bitmap->max) {
+		set_bit(obj, bitmap->table);
+	} else {
+		obj = -1;
+	}
+
+	if (obj != -1)
+		--bitmap->avail;
+	_mali_osk_spinlock_unlock(bitmap->lock);
+
+	return obj;
+}
+
+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_bitmap_free_range(bitmap, obj, 1);
+}
+
+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt)
+{
+	u32 obj;
+
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	if (0 >= cnt) {
+		return -1;
+	}
+
+	if (1 == cnt) {
+		return _mali_osk_bitmap_alloc(bitmap);
+	}
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+	obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+					 bitmap->last, cnt, 0);
+
+	if (obj >= bitmap->max) {
+		obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+						 bitmap->reserve, cnt, 0);
+	}
+
+	if (obj < bitmap->max) {
+		bitmap_set(bitmap->table, obj, cnt);
+
+		bitmap->last = (obj + cnt);
+		if (bitmap->last >= bitmap->max) {
+			bitmap->last = bitmap->reserve;
+		}
+	} else {
+		obj = -1;
+	}
+
+	if (obj != -1) {
+		bitmap->avail -= cnt;
+	}
+
+	_mali_osk_spinlock_unlock(bitmap->lock);
+
+	return obj;
+}
+
+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	return bitmap->avail;
+}
+
+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+	bitmap_clear(bitmap->table, obj, cnt);
+	bitmap->last = min(bitmap->last, obj);
+
+	bitmap->avail += cnt;
+	_mali_osk_spinlock_unlock(bitmap->lock);
+}
+
+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+	MALI_DEBUG_ASSERT(reserve <= num);
+
+	bitmap->reserve = reserve;
+	bitmap->last = reserve;
+	bitmap->max  = num;
+	bitmap->avail = num - reserve;
+	bitmap->lock = _mali_osk_spinlock_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST);
+	if (!bitmap->lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) *
+				sizeof(long), GFP_KERNEL);
+	if (!bitmap->table) {
+		_mali_osk_spinlock_term(bitmap->lock);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	if (NULL != bitmap->lock) {
+		_mali_osk_spinlock_term(bitmap->lock);
+	}
+
+	if (NULL != bitmap->table) {
+		kfree(bitmap->table);
+	}
+}
+
diff --git a/utgard/r6p2/linux/mali_osk_irq.c b/utgard/r6p2/linux/mali_osk_irq.c
new file mode 100755
index 0000000..960ed81
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_irq.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_irq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29))
+#include <mach/cpu.h>
+#endif
+#include <linux/slab.h>	/* For memory allocation */
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+typedef struct _mali_osk_irq_t_struct {
+	u32 irqnum;
+	void *data;
+	_mali_osk_irq_uhandler_t uhandler;
+} mali_osk_irq_object_t;
+
+typedef irqreturn_t (*irq_handler_func_t)(int, void *, struct pt_regs *);
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id);   /* , struct pt_regs *regs*/
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+u32 get_irqnum(struct _mali_osk_irq_t_struct* irq)
+{
+	if (irq)
+		return irq->irqnum;
+	else
+		return 0;
+}
+#endif
+
+#if defined(DEBUG)
+
+struct test_interrupt_data {
+	_mali_osk_irq_ack_t ack_func;
+	void *probe_data;
+	mali_bool interrupt_received;
+	wait_queue_head_t wq;
+};
+
+static irqreturn_t test_interrupt_upper_half(int port_name, void *dev_id)
+{
+	irqreturn_t ret = IRQ_NONE;
+	struct test_interrupt_data *data = (struct test_interrupt_data *)dev_id;
+
+	if (_MALI_OSK_ERR_OK == data->ack_func(data->probe_data)) {
+		data->interrupt_received = MALI_TRUE;
+		wake_up(&data->wq);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static _mali_osk_errcode_t test_interrupt(u32 irqnum,
+		_mali_osk_irq_trigger_t trigger_func,
+		_mali_osk_irq_ack_t ack_func,
+		void *probe_data,
+		const char *description)
+{
+	unsigned long irq_flags = 0;
+	struct test_interrupt_data data = {
+		.ack_func = ack_func,
+		.probe_data = probe_data,
+		.interrupt_received = MALI_FALSE,
+	};
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	if (0 != request_irq(irqnum, test_interrupt_upper_half, irq_flags, description, &data)) {
+		MALI_DEBUG_PRINT(2, ("Unable to install test IRQ handler for core '%s'\n", description));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	init_waitqueue_head(&data.wq);
+
+	trigger_func(probe_data);
+	wait_event_timeout(data.wq, data.interrupt_received, 100);
+
+	free_irq(irqnum, &data);
+
+	if (data.interrupt_received) {
+		MALI_DEBUG_PRINT(3, ("%s: Interrupt test OK\n", description));
+		return _MALI_OSK_ERR_OK;
+	} else {
+		MALI_PRINT_ERROR(("%s: Failed interrupt test on %u\n", description, irqnum));
+		return _MALI_OSK_ERR_FAULT;
+	}
+}
+
+#endif /* defined(DEBUG) */
+
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description)
+{
+	mali_osk_irq_object_t *irq_object;
+	unsigned long irq_flags = 0;
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	irq_object = kmalloc(sizeof(mali_osk_irq_object_t), GFP_KERNEL);
+	if (NULL == irq_object) {
+		return NULL;
+	}
+
+	if (-1 == irqnum) {
+		/* Probe for IRQ */
+		if ((NULL != trigger_func) && (NULL != ack_func)) {
+			unsigned long probe_count = 3;
+			_mali_osk_errcode_t err;
+			int irq;
+
+			MALI_DEBUG_PRINT(2, ("Probing for irq\n"));
+
+			do {
+				unsigned long mask;
+
+				mask = probe_irq_on();
+				trigger_func(probe_data);
+
+				_mali_osk_time_ubusydelay(5);
+
+				irq = probe_irq_off(mask);
+				err = ack_func(probe_data);
+			} while (irq < 0 && (err == _MALI_OSK_ERR_OK) && probe_count--);
+
+			if (irq < 0 || (_MALI_OSK_ERR_OK != err)) irqnum = -1;
+			else irqnum = irq;
+		} else irqnum = -1; /* no probe functions, fault */
+
+		if (-1 != irqnum) {
+			/* found an irq */
+			MALI_DEBUG_PRINT(2, ("Found irq %d\n", irqnum));
+		} else {
+			MALI_DEBUG_PRINT(2, ("Probe for irq failed\n"));
+		}
+	}
+
+	irq_object->irqnum = irqnum;
+	irq_object->uhandler = uhandler;
+	irq_object->data = int_data;
+
+	if (-1 == irqnum) {
+		MALI_DEBUG_PRINT(2, ("No IRQ for core '%s' found during probe\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	/* Verify that the configured interrupt settings are working */
+	if (_MALI_OSK_ERR_OK != test_interrupt(irqnum, trigger_func, ack_func, probe_data, description)) {
+		MALI_DEBUG_PRINT(2, ("Test of IRQ(%d) handler for core '%s' failed\n", irqnum, description));
+		kfree(irq_object);
+		return NULL;
+	}
+#endif
+
+	if (0 != request_irq(irqnum, irq_handler_upper_half, irq_flags, description, irq_object)) {
+		MALI_DEBUG_PRINT(2, ("Unable to install IRQ handler for core '%s'\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+	return irq_object;
+}
+
+void _mali_osk_irq_term(_mali_osk_irq_t *irq)
+{
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)irq;
+	free_irq(irq_object->irqnum, irq_object);
+	kfree(irq_object);
+}
+
+
+/** This function is called directly in interrupt context from the OS just after
+ * the CPU get the hw-irq from mali, or other devices on the same IRQ-channel.
+ * It is registered one of these function for each mali core. When an interrupt
+ * arrives this function will be called equal times as registered mali cores.
+ * That means that we only check one mali core in one function call, and the
+ * core we check for each turn is given by the \a dev_id variable.
+ * If we detect an pending interrupt on the given core, we mask the interrupt
+ * out by settging the core's IRQ_MASK register to zero.
+ * Then we schedule the mali_core_irq_handler_bottom_half to run as high priority
+ * work queue job.
+ */
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id)   /* , struct pt_regs *regs*/
+{
+	irqreturn_t ret = IRQ_NONE;
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)dev_id;
+
+	if (_MALI_OSK_ERR_OK == irq_object->uhandler(irq_object->data)) {
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
diff --git a/utgard/r6p2/linux/mali_osk_locks.c b/utgard/r6p2/linux/mali_osk_locks.c
new file mode 100755
index 0000000..4fa9398
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_locks.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk_locks.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+
+#ifdef DEBUG
+#ifdef LOCK_ORDER_CHECKING
+static DEFINE_SPINLOCK(lock_tracking_lock);
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order);
+#endif /* LOCK_ORDER_CHECKING */
+
+void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+{
+	checker->orig_flags = flags;
+	checker->owner = 0;
+
+#ifdef LOCK_ORDER_CHECKING
+	checker->order = order;
+	checker->next = NULL;
+#endif
+}
+
+void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker)
+{
+	checker->owner = _mali_osk_get_tid();
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		if (!add_lock_to_log_and_check(checker, _mali_osk_get_tid())) {
+			printk(KERN_ERR "%d: ERROR lock %p taken while holding a lock of a higher order.\n",
+			       _mali_osk_get_tid(), checker);
+			dump_stack();
+		}
+	}
+#endif
+}
+
+void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker)
+{
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		remove_lock_from_log(checker, _mali_osk_get_tid());
+	}
+#endif
+	checker->owner = 0;
+}
+
+
+#ifdef LOCK_ORDER_CHECKING
+/* Lock order checking
+ * -------------------
+ *
+ * To assure that lock ordering scheme defined by _mali_osk_lock_order_t is strictly adhered to, the
+ * following function will, together with a linked list and some extra members in _mali_osk_lock_debug_s,
+ * make sure that a lock that is taken has a higher order than the current highest-order lock a
+ * thread holds.
+ *
+ * This is done in the following manner:
+ * - A linked list keeps track of locks held by a thread.
+ * - A `next' pointer is added to each lock. This is used to chain the locks together.
+ * - When taking a lock, the `add_lock_to_log_and_check' makes sure that taking
+ *   the given lock is legal. It will follow the linked list  to find the last
+ *   lock taken by this thread. If the last lock's order was lower than the
+ *   lock that is to be taken, it appends the new lock to the list and returns
+ *   true, if not, it return false. This return value is assert()'ed on in
+ *   _mali_osk_lock_wait().
+ */
+
+static struct _mali_osk_lock_debug_s *lock_lookup_list;
+
+static void dump_lock_tracking_list(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 1;
+
+	/* print list for debugging purposes */
+	l = lock_lookup_list;
+
+	while (NULL != l) {
+		printk(" [lock: %p, tid_owner: %d, order: %d] ->", l, l->owner, l->order);
+		l = l->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+	printk(" NULL\n");
+}
+
+static int tracking_list_length(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 0;
+	l = lock_lookup_list;
+
+	while (NULL != l) {
+		l = l->next;
+		n++;
+		MALI_DEBUG_ASSERT(n < 100);
+	}
+	return n;
+}
+
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	mali_bool ret = MALI_FALSE;
+	_mali_osk_lock_order_t highest_order_for_tid = _MALI_OSK_LOCK_ORDER_FIRST;
+	struct _mali_osk_lock_debug_s *highest_order_lock = (struct _mali_osk_lock_debug_s *)0xbeefbabe;
+	struct _mali_osk_lock_debug_s *l;
+	unsigned long local_lock_flag;
+	u32 len;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+
+	l  = lock_lookup_list;
+	if (NULL == l) { /* This is the first lock taken by this thread -- record and return true */
+		lock_lookup_list = lock;
+		spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+		return MALI_TRUE;
+	} else {
+		/* Traverse the locks taken and find the lock of the highest order.
+		 * Since several threads may hold locks, each lock's owner must be
+		 * checked so that locks not owned by this thread can be ignored. */
+		for (;;) {
+			MALI_DEBUG_ASSERT_POINTER(l);
+			if (tid == l->owner && l->order >= highest_order_for_tid) {
+				highest_order_for_tid = l->order;
+				highest_order_lock = l;
+			}
+
+			if (NULL != l->next) {
+				l = l->next;
+			} else {
+				break;
+			}
+		}
+
+		l->next = lock;
+		l->next = NULL;
+	}
+
+	/* We have now found the highest order lock currently held by this thread and can see if it is
+	 * legal to take the requested lock. */
+	ret = highest_order_for_tid < lock->order;
+
+	if (!ret) {
+		printk(KERN_ERR "Took lock of order %d (%s) while holding lock of order %d (%s)\n",
+		       lock->order, lock_order_to_string(lock->order),
+		       highest_order_for_tid, lock_order_to_string(highest_order_for_tid));
+		dump_lock_tracking_list();
+	}
+
+	if (len + 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+	return ret;
+}
+
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	struct _mali_osk_lock_debug_s *curr;
+	struct _mali_osk_lock_debug_s *prev = NULL;
+	unsigned long local_lock_flag;
+	u32 len;
+	u32 n = 0;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+	curr = lock_lookup_list;
+
+	if (NULL == curr) {
+		printk(KERN_ERR "Error: Lock tracking list was empty on call to remove_lock_from_log\n");
+		dump_lock_tracking_list();
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(curr);
+
+
+	while (lock != curr) {
+		prev = curr;
+
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		curr = curr->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+
+	if (NULL == prev) {
+		lock_lookup_list = curr->next;
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		MALI_DEBUG_ASSERT_POINTER(prev);
+		prev->next = curr->next;
+	}
+
+	lock->next = NULL;
+
+	if (len - 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+}
+
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order)
+{
+	switch (order) {
+	case _MALI_OSK_LOCK_ORDER_SESSIONS:
+		return "_MALI_OSK_LOCK_ORDER_SESSIONS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_SESSION:
+		return "_MALI_OSK_LOCK_ORDER_MEM_SESSION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_INFO:
+		return "_MALI_OSK_LOCK_ORDER_MEM_INFO";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_PT_CACHE:
+		return "_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP:
+		return "_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_EXECUTION:
+		return "_MALI_OSK_LOCK_ORDER_PM_EXECUTION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_EXECUTOR:
+		return "_MALI_OSK_LOCK_ORDER_EXECUTOR";
+		break;
+	case _MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM:
+		return "_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DMA_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_DMA_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PROFILING:
+		return "_MALI_OSK_LOCK_ORDER_PROFILING";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2:
+		return "_MALI_OSK_LOCK_ORDER_L2";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_L2_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_UTILIZATION:
+		return "_MALI_OSK_LOCK_ORDER_UTILIZATION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS:
+		return "_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_STATE:
+		return "_MALI_OSK_LOCK_ORDER_PM_STATE";
+		break;
+	default:
+		return "<UNKNOWN_LOCK_ORDER>";
+	}
+}
+#endif /* LOCK_ORDER_CHECKING */
+#endif /* DEBUG */
diff --git a/utgard/r6p2/linux/mali_osk_locks.h b/utgard/r6p2/linux/mali_osk_locks.h
new file mode 100755
index 0000000..a05586f
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_locks.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.h
+ * Defines OS abstraction of lock and mutex
+ */
+#ifndef _MALI_OSK_LOCKS_H
+#define _MALI_OSK_LOCKS_H
+
+#include <linux/spinlock.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+
+#include <linux/slab.h>
+
+#include "mali_osk_types.h"
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+	/* When DEBUG is enabled, this struct will be used to track owner, mode and order checking */
+#ifdef DEBUG
+	struct _mali_osk_lock_debug_s {
+		u32 owner;
+		_mali_osk_lock_flags_t orig_flags;
+		_mali_osk_lock_order_t order;
+		struct _mali_osk_lock_debug_s *next;
+	};
+#endif
+
+	/* Anstraction of spinlock_t */
+	struct _mali_osk_spinlock_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		spinlock_t spinlock;
+	};
+
+	/* Abstration of spinlock_t and lock flag which is used to store register's state before locking */
+	struct _mali_osk_spinlock_irq_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+
+		spinlock_t spinlock;
+		unsigned long flags;
+	};
+
+	/* Abstraction of rw_semaphore in OS */
+	struct _mali_osk_mutex_rw_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+		_mali_osk_lock_mode_t mode;
+#endif
+
+		struct rw_semaphore rw_sema;
+	};
+
+	/* Mutex and mutex_interruptible functions share the same osk mutex struct */
+	struct _mali_osk_mutex_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		struct mutex mutex;
+	};
+
+#ifdef DEBUG
+	/** @brief _mali_osk_locks_debug_init/add/remove() functions are declared when DEBUG is enabled and
+	 * defined in file mali_osk_locks.c. When LOCK_ORDER_CHECKING is enabled, calling these functions when we
+	 * init/lock/unlock a lock/mutex, we could track lock order of a given tid. */
+	void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order);
+	void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker);
+	void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker);
+
+	/** @brief This function can return a given lock's owner when DEBUG     is enabled. */
+	static inline u32 _mali_osk_lock_get_owner(struct _mali_osk_lock_debug_s *lock)
+	{
+		return lock->owner;
+	}
+#else
+#define _mali_osk_locks_debug_init(x, y, z) do {} while (0)
+#define _mali_osk_locks_debug_add(x) do {} while (0)
+#define _mali_osk_locks_debug_remove(x) do {} while (0)
+#endif
+
+	/** @brief Before use _mali_osk_spin_lock, init function should be used to allocate memory and initial spinlock*/
+	static inline _mali_osk_spinlock_t *_mali_osk_spinlock_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_spinlock_t), GFP_KERNEL);
+		if (NULL == lock) {
+			return NULL;
+		}
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock a spinlock */
+	static inline void  _mali_osk_spinlock_lock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		spin_lock(&lock->spinlock);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock a spinlock */
+	static inline void _mali_osk_spinlock_unlock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock(&lock->spinlock);
+	}
+
+	/** @brief Free a memory block which the argument lock pointed to and its type must be
+	 * _mali_osk_spinlock_t *. */
+	static inline void _mali_osk_spinlock_term(_mali_osk_spinlock_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_spinlock_irq_lock/unlock/term() is called, init function should be
+	 * called to initial spinlock and flags in struct _mali_osk_spinlock_irq_t. */
+	static inline _mali_osk_spinlock_irq_t *_mali_osk_spinlock_irq_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_irq_t *lock = NULL;
+		lock = kmalloc(sizeof(_mali_osk_spinlock_irq_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+
+		lock->flags = 0;
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock spinlock and save the register's state */
+	static inline void _mali_osk_spinlock_irq_lock(_mali_osk_spinlock_irq_t *lock)
+	{
+		unsigned long tmp_flags;
+
+		BUG_ON(NULL == lock);
+		spin_lock_irqsave(&lock->spinlock, tmp_flags);
+		lock->flags = tmp_flags;
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock spinlock with saved register's state */
+	static inline void _mali_osk_spinlock_irq_unlock(_mali_osk_spinlock_irq_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock_irqrestore(&lock->spinlock, lock->flags);
+	}
+
+	/** @brief Destroy a given memory block which lock pointed to, and the lock type must be
+	 * _mali_osk_spinlock_irq_t *. */
+	static inline void _mali_osk_spinlock_irq_term(_mali_osk_spinlock_irq_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_mutex_rw_wait/signal/term() is called, we should call
+	 * _mali_osk_mutex_rw_init() to kmalloc a memory block and initial part of elements in it. */
+	static inline _mali_osk_mutex_rw_t *_mali_osk_mutex_rw_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_rw_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_rw_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+
+		init_rwsem(&lock->rw_sema);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief When call _mali_osk_mutex_rw_wait/signal() functions, the second argument mode
+	 * should be assigned with value _MALI_OSK_LOCKMODE_RO or _MALI_OSK_LOCKMODE_RW */
+	static inline void _mali_osk_mutex_rw_wait(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(NULL == lock);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			down_read(&lock->rw_sema);
+		} else {
+			down_write(&lock->rw_sema);
+		}
+
+#ifdef DEBUG
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			lock->mode = mode;
+		} else { /* mode == _MALI_OSK_LOCKMODE_RO */
+			lock->mode = mode;
+		}
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+#endif
+	}
+
+	/** @brief Up lock->rw_sema with up_read/write() accordinf argument mode's value. */
+	static inline void  _mali_osk_mutex_rw_signal(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(NULL == lock);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+#ifdef DEBUG
+		/* make sure the thread releasing the lock actually was the owner */
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+			/* This lock now has no owner */
+			lock->checker.owner = 0;
+		}
+#endif
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			up_read(&lock->rw_sema);
+		} else {
+			up_write(&lock->rw_sema);
+		}
+	}
+
+	/** @brief Free a given memory block which lock pointed to and its type must be
+	 * _mali_sok_mutex_rw_t *. */
+	static inline void _mali_osk_mutex_rw_term(_mali_osk_mutex_rw_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Mutex & mutex_interruptible share the same init and term function, because they have the
+	 * same osk mutex struct, and the difference between them is which locking function they use */
+	static inline _mali_osk_mutex_t *_mali_osk_mutex_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+		mutex_init(&lock->mutex);
+
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief  Lock the lock->mutex with mutex_lock_interruptible function */
+	static inline _mali_osk_errcode_t _mali_osk_mutex_wait_interruptible(_mali_osk_mutex_t *lock)
+	{
+		_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+		BUG_ON(NULL == lock);
+
+		if (mutex_lock_interruptible(&lock->mutex)) {
+			printk(KERN_WARNING "Mali: Can not lock mutex\n");
+			err = _MALI_OSK_ERR_RESTARTSYSCALL;
+		}
+
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+		return err;
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock_interruptible() function. */
+	static inline void _mali_osk_mutex_signal_interruptible(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Lock the lock->mutex just with mutex_lock() function which could not be interruptted. */
+	static inline void _mali_osk_mutex_wait(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		mutex_lock(&lock->mutex);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock() function. */
+	static inline void _mali_osk_mutex_signal(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Free a given memory block which lock point. */
+	static inline void _mali_osk_mutex_term(_mali_osk_mutex_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif
diff --git a/utgard/r6p2/linux/mali_osk_low_level_mem.c b/utgard/r6p2/linux/mali_osk_low_level_mem.c
new file mode 100755
index 0000000..bc713a1
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_low_level_mem.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_low_level_mem.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <asm/io.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+void _mali_osk_mem_barrier(void)
+{
+	mb();
+}
+
+void _mali_osk_write_mem_barrier(void)
+{
+	wmb();
+}
+
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description)
+{
+	return (mali_io_address)ioremap_nocache(phys, size);
+}
+
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address virt)
+{
+	iounmap((void *)virt);
+}
+
+_mali_osk_errcode_t inline _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description)
+{
+#if MALI_LICENSE_IS_GPL
+	return _MALI_OSK_ERR_OK; /* GPL driver gets the mem region for the resources registered automatically */
+#else
+	return ((NULL == request_mem_region(phys, size, description)) ? _MALI_OSK_ERR_NOMEM : _MALI_OSK_ERR_OK);
+#endif
+}
+
+void inline _mali_osk_mem_unreqregion(uintptr_t phys, u32 size)
+{
+#if !MALI_LICENSE_IS_GPL
+	release_mem_region(phys, size);
+#endif
+}
+
+void inline _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	__raw_writel(cpu_to_le32(val), ((u8 *)addr) + offset);
+}
+
+u32 inline _mali_osk_mem_ioread32(volatile mali_io_address addr, u32 offset)
+{
+	return ioread32(((u8 *)addr) + offset);
+}
+
+void inline _mali_osk_mem_iowrite32(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	iowrite32(val, ((u8 *)addr) + offset);
+}
+
+void _mali_osk_cache_flushall(void)
+{
+	/** @note Cached memory is not currently supported in this implementation */
+}
+
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size)
+{
+	_mali_osk_write_mem_barrier();
+}
+
+u32 _mali_osk_mem_write_safe(void __user *dest, const void __user *src, u32 size)
+{
+#define MALI_MEM_SAFE_COPY_BLOCK_SIZE 4096
+	u32 retval = 0;
+	void *temp_buf;
+
+	temp_buf = kmalloc(MALI_MEM_SAFE_COPY_BLOCK_SIZE, GFP_KERNEL);
+	if (NULL != temp_buf) {
+		u32 bytes_left_to_copy = size;
+		u32 i;
+		for (i = 0; i < size; i += MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+			u32 size_to_copy;
+			u32 size_copied;
+			u32 bytes_left;
+
+			if (bytes_left_to_copy > MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+				size_to_copy = MALI_MEM_SAFE_COPY_BLOCK_SIZE;
+			} else {
+				size_to_copy = bytes_left_to_copy;
+			}
+
+			bytes_left = copy_from_user(temp_buf, ((char *)src) + i, size_to_copy);
+			size_copied = size_to_copy - bytes_left;
+
+			bytes_left = copy_to_user(((char *)dest) + i, temp_buf, size_copied);
+			size_copied -= bytes_left;
+
+			bytes_left_to_copy -= size_copied;
+			retval += size_copied;
+
+			if (size_copied != size_to_copy) {
+				break; /* Early out, we was not able to copy this entire block */
+			}
+		}
+
+		kfree(temp_buf);
+	}
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args)
+{
+	void __user *src;
+	void __user *dst;
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (NULL == session) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	src = (void __user *)(uintptr_t)args->src;
+	dst = (void __user *)(uintptr_t)args->dest;
+
+	/* Return number of bytes actually copied */
+	args->size = _mali_osk_mem_write_safe(dst, src, args->size);
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r6p2/linux/mali_osk_mali.c b/utgard/r6p2/linux/mali_osk_mali.c
new file mode 100755
index 0000000..272d196
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_mali.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.c
+ * Implementation of the OS abstraction layer which is specific for the Mali kernel device driver
+ */
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/mali/mali_utgard.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h" /* MALI_xxx macros */
+#include "mali_osk.h"           /* kernel side OS functions */
+#include "mali_kernel_linux.h"
+
+static mali_bool mali_secure_mode_enabled = MALI_FALSE;
+static mali_bool mali_secure_mode_supported = MALI_FALSE;
+
+/* Function that init the mali gpu secure mode */
+void (*mali_secure_mode_deinit)(void) = NULL;
+/* Function that reset GPU and enable the mali gpu secure mode */
+int (*mali_gpu_reset_and_secure_mode_enable)(void) = NULL;
+/* Function that reset GPU and disable the mali gpu secure mode */
+int (*mali_gpu_reset_and_secure_mode_disable)(void) = NULL;
+
+#ifdef CONFIG_MALI_DT
+
+#define MALI_OSK_INVALID_RESOURCE_ADDRESS 0xFFFFFFFF
+
+/**
+ * Define the max number of resource we could have.
+ */
+#define MALI_OSK_MAX_RESOURCE_NUMBER 27
+
+/**
+ * Define the max number of resource with interrupts, and they are
+ * the first 20 elements in array mali_osk_resource_bank.
+ */
+#define MALI_OSK_RESOURCE_WITH_IRQ_NUMBER 20
+
+/**
+ * pp core start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_PP_LOCATION_START 2
+#define MALI_OSK_RESOURCE_PP_LOCATION_END 17
+
+/**
+ * L2 cache start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_L2_LOCATION_START 20
+#define MALI_OSK_RESOURCE_l2_LOCATION_END 22
+
+/**
+ * DMA unit location.
+ */
+#define MALI_OSK_RESOURCE_DMA_LOCATION 26
+
+static _mali_osk_resource_t mali_osk_resource_bank[MALI_OSK_MAX_RESOURCE_NUMBER] = {
+	{.description = "Mali_GP", .base = MALI_OFFSET_GP, .irq_name = "IRQGP",},
+	{.description = "Mali_GP_MMU", .base = MALI_OFFSET_GP_MMU, .irq_name = "IRQGPMMU",},
+	{.description = "Mali_PP0", .base = MALI_OFFSET_PP0, .irq_name = "IRQPP0",},
+	{.description = "Mali_PP0_MMU", .base = MALI_OFFSET_PP0_MMU, .irq_name = "IRQPPMMU0",},
+	{.description = "Mali_PP1", .base = MALI_OFFSET_PP1, .irq_name = "IRQPP1",},
+	{.description = "Mali_PP1_MMU", .base = MALI_OFFSET_PP1_MMU, .irq_name = "IRQPPMMU1",},
+	{.description = "Mali_PP2", .base = MALI_OFFSET_PP2, .irq_name = "IRQPP2",},
+	{.description = "Mali_PP2_MMU", .base = MALI_OFFSET_PP2_MMU, .irq_name = "IRQPPMMU2",},
+	{.description = "Mali_PP3", .base = MALI_OFFSET_PP3, .irq_name = "IRQPP3",},
+	{.description = "Mali_PP3_MMU", .base = MALI_OFFSET_PP3_MMU, .irq_name = "IRQPPMMU3",},
+	{.description = "Mali_PP4", .base = MALI_OFFSET_PP4, .irq_name = "IRQPP4",},
+	{.description = "Mali_PP4_MMU", .base = MALI_OFFSET_PP4_MMU, .irq_name = "IRQPPMMU4",},
+	{.description = "Mali_PP5", .base = MALI_OFFSET_PP5, .irq_name = "IRQPP5",},
+	{.description = "Mali_PP5_MMU", .base = MALI_OFFSET_PP5_MMU, .irq_name = "IRQPPMMU5",},
+	{.description = "Mali_PP6", .base = MALI_OFFSET_PP6, .irq_name = "IRQPP6",},
+	{.description = "Mali_PP6_MMU", .base = MALI_OFFSET_PP6_MMU, .irq_name = "IRQPPMMU6",},
+	{.description = "Mali_PP7", .base = MALI_OFFSET_PP7, .irq_name = "IRQPP7",},
+	{.description = "Mali_PP7_MMU", .base = MALI_OFFSET_PP7_MMU, .irq_name = "IRQPPMMU",},
+	{.description = "Mali_PP_Broadcast", .base = MALI_OFFSET_PP_BCAST, .irq_name = "IRQPP",},
+	{.description = "Mali_PMU", .base = MALI_OFFSET_PMU, .irq_name = "IRQPMU",},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE0,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE1,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE2,},
+	{.description = "Mali_PP_MMU_Broadcast", .base = MALI_OFFSET_PP_BCAST_MMU,},
+	{.description = "Mali_Broadcast", .base = MALI_OFFSET_BCAST,},
+	{.description = "Mali_DLBU", .base = MALI_OFFSET_DLBU,},
+	{.description = "Mali_DMA", .base = MALI_OFFSET_DMA,},
+};
+
+static int _mali_osk_get_compatible_name(const char **out_string)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	return of_property_read_string(node, "compatible", out_string);
+}
+
+_mali_osk_errcode_t _mali_osk_resource_initialize(void)
+{
+	mali_bool mali_is_450 = MALI_FALSE, mali_is_470 = MALI_FALSE;
+	int i, pp_core_num = 0, l2_core_num = 0;
+	struct resource *res;
+	const char *compatible_name = NULL;
+
+	if (0 == _mali_osk_get_compatible_name(&compatible_name)) {
+		if (0 == strncmp(compatible_name, "arm,mali-450", strlen("arm,mali-450"))) {
+			mali_is_450 = MALI_TRUE;
+			MALI_DEBUG_PRINT(2, ("mali-450 device tree detected."));
+		} else if (0 == strncmp(compatible_name, "arm,mali-470", strlen("arm,mali-470"))) {
+			mali_is_470 = MALI_TRUE;
+			MALI_DEBUG_PRINT(2, ("mali-470 device tree detected."));
+		}
+	}
+
+	for (i = 0; i < MALI_OSK_RESOURCE_WITH_IRQ_NUMBER; i++) {
+		res = platform_get_resource_byname(mali_platform_device, IORESOURCE_IRQ, mali_osk_resource_bank[i].irq_name);
+		if (res) {
+			mali_osk_resource_bank[i].irq = res->start;
+		} else {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_PP_LOCATION_START; i <= MALI_OSK_RESOURCE_PP_LOCATION_END; i++) {
+		if (MALI_OSK_INVALID_RESOURCE_ADDRESS != mali_osk_resource_bank[i].base) {
+			pp_core_num++;
+		}
+	}
+
+	/* We have to divide by 2, because we caculate twice for only one pp(pp_core and pp_mmu_core). */
+	if (0 != pp_core_num % 2) {
+		MALI_DEBUG_PRINT(2, ("The value of pp core number isn't normal."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pp_core_num /= 2;
+
+	/**
+	 * we can caculate the number of l2 cache core according the number of pp core number
+	 * and device type(mali400/mali450/mali470).
+	 */
+	l2_core_num = 1;
+	if (mali_is_450) {
+		if (pp_core_num > 4) {
+			l2_core_num = 3;
+		} else if (pp_core_num <= 4) {
+			l2_core_num = 2;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_l2_LOCATION_END; i > MALI_OSK_RESOURCE_L2_LOCATION_START + l2_core_num - 1; i--) {
+		mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+	}
+
+	/* If device is not mali-450 type, we have to remove related resource from resource bank. */
+	if (!(mali_is_450 || mali_is_470)) {
+		for (i = MALI_OSK_RESOURCE_l2_LOCATION_END + 1; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	if (mali_is_470)
+		mali_osk_resource_bank[MALI_OSK_RESOURCE_DMA_LOCATION].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+
+	if (NULL == mali_platform_device) {
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	/* Traverse all of resources in resources bank to find the matching one. */
+	for (i = 0; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+		if (mali_osk_resource_bank[i].base == addr) {
+			if (NULL != res) {
+				res->base = addr + _mali_osk_resource_base_address();
+				res->description = mali_osk_resource_bank[i].description;
+				res->irq = mali_osk_resource_bank[i].irq;
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	struct resource *reg_res = NULL;
+	uintptr_t ret = 0;
+
+	reg_res = platform_get_resource(mali_platform_device, IORESOURCE_MEM, 0);
+
+	if (NULL != reg_res) {
+		ret = reg_res->start;
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from device tree configuration.\n"));
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	if (!of_get_property(node, "pmu_domain_config", &length)) {
+		return;
+	}
+
+	if (array_size != length / sizeof(u32)) {
+		MALI_PRINT_ERROR(("Wrong pmu domain config in device tree."));
+		return;
+	}
+
+	of_property_for_each_u32(node, "pmu_domain_config", prop, p, u) {
+		domain_config_array[i] = (u16)u;
+		i++;
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	u32 switch_delay;
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	if (0 == of_property_read_u32(node, "pmu_switch_delay", &switch_delay)) {
+		return switch_delay;
+	} else {
+		MALI_DEBUG_PRINT(2, ("Couldn't find pmu_switch_delay in device tree configuration.\n"));
+	}
+
+	return 0;
+}
+
+#else /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+	uintptr_t phys_addr;
+
+	if (NULL == mali_platform_device) {
+		/* Not connected to a device */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	phys_addr = addr + _mali_osk_resource_base_address();
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_MEM == resource_type(&(mali_platform_device->resource[i])) &&
+		    mali_platform_device->resource[i].start == phys_addr) {
+			if (NULL != res) {
+				res->base = phys_addr;
+				res->description = mali_platform_device->resource[i].name;
+
+				/* Any (optional) IRQ resource belonging to this resource will follow */
+				if ((i + 1) < mali_platform_device->num_resources &&
+				    IORESOURCE_IRQ == resource_type(&(mali_platform_device->resource[i + 1]))) {
+					res->irq = mali_platform_device->resource[i + 1].start;
+				} else {
+					res->irq = -1;
+				}
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	uintptr_t lowest_addr = (uintptr_t)(0 - 1);
+	uintptr_t ret = 0;
+
+	if (NULL != mali_platform_device) {
+		int i;
+		for (i = 0; i < mali_platform_device->num_resources; i++) {
+			if (mali_platform_device->resource[i].flags & IORESOURCE_MEM &&
+			    mali_platform_device->resource[i].start < lowest_addr) {
+				lowest_addr = mali_platform_device->resource[i].start;
+				ret = lowest_addr;
+			}
+		}
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	_mali_osk_device_data data = { 0, };
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from platform device data.\n"));
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Copy the custom customer power domain config */
+		_mali_osk_memcpy(domain_config_array, data.pmu_domain_config, sizeof(data.pmu_domain_config));
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_device_data data = { 0, };
+
+	err = _mali_osk_device_data_get(&data);
+
+	if (_MALI_OSK_ERR_OK == err) {
+		return data.pmu_switch_delay;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(data);
+
+	if (NULL != mali_platform_device) {
+		struct mali_gpu_device_data *os_data = NULL;
+
+		os_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+		if (NULL != os_data) {
+			/* Copy data from OS dependant struct to Mali neutral struct (identical!) */
+			BUILD_BUG_ON(sizeof(*os_data) != sizeof(*data));
+			_mali_osk_memcpy(data, os_data, sizeof(*os_data));
+
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+u32 _mali_osk_identify_gpu_resource(void)
+{
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_L2_RESOURCE1, NULL))
+		/* Mali 450 */
+		return 0x450;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_DLBU, NULL))
+		/* Mali 470 */
+		return 0x470;
+
+	/* Mali 400 */
+	return 0x400;
+}
+
+mali_bool _mali_osk_shared_interrupts(void)
+{
+	u32 irqs[128];
+	u32 i, j, irq, num_irqs_found = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	MALI_DEBUG_ASSERT(128 >= mali_platform_device->num_resources);
+
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_IRQ & mali_platform_device->resource[i].flags) {
+			irq = mali_platform_device->resource[i].start;
+
+			for (j = 0; j < num_irqs_found; ++j) {
+				if (irq == irqs[j]) {
+					return MALI_TRUE;
+				}
+			}
+
+			irqs[num_irqs_found++] = irq;
+		}
+	}
+
+	return MALI_FALSE;
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void)
+{
+	_mali_osk_device_data data = { 0, };
+
+	if (_MALI_OSK_ERR_OK ==  _mali_osk_device_data_get(&data)) {
+		if ((NULL != data.secure_mode_init) && (NULL != data.secure_mode_deinit)
+		    && (NULL != data.gpu_reset_and_secure_mode_enable) && (NULL != data.gpu_reset_and_secure_mode_disable)) {
+			int err = data.secure_mode_init();
+			if (err) {
+				MALI_DEBUG_PRINT(1, ("Failed to init gpu secure mode.\n"));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			mali_secure_mode_deinit = data.secure_mode_deinit;
+			mali_gpu_reset_and_secure_mode_enable = data.gpu_reset_and_secure_mode_enable;
+			mali_gpu_reset_and_secure_mode_disable = data.gpu_reset_and_secure_mode_disable;
+
+			mali_secure_mode_supported = MALI_TRUE;
+			mali_secure_mode_enabled = MALI_FALSE;
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+	MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void)
+{
+	if (NULL !=  mali_secure_mode_deinit) {
+		mali_secure_mode_deinit();
+		mali_secure_mode_enabled = MALI_FALSE;
+		mali_secure_mode_supported = MALI_FALSE;
+		return _MALI_OSK_ERR_OK;
+	}
+	MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_enable(void)
+{
+	/* the mali executor lock must be held before enter this function. */
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_secure_mode_enabled);
+
+	if (NULL !=  mali_gpu_reset_and_secure_mode_enable) {
+		if (mali_gpu_reset_and_secure_mode_enable()) {
+			MALI_DEBUG_PRINT(1, ("Failed to reset GPU or enable gpu secure mode.\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_secure_mode_enabled = MALI_TRUE;
+		return _MALI_OSK_ERR_OK;
+	}
+	MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_disable(void)
+{
+	/* the mali executor lock must be held before enter this function. */
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_secure_mode_enabled);
+
+	if (NULL != mali_gpu_reset_and_secure_mode_disable) {
+		if (mali_gpu_reset_and_secure_mode_disable()) {
+			MALI_DEBUG_PRINT(1, ("Failed to reset GPU or disable gpu secure mode.\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_secure_mode_enabled = MALI_FALSE;
+
+		return _MALI_OSK_ERR_OK;
+
+	}
+	MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+mali_bool _mali_osk_gpu_secure_mode_is_enabled(void)
+{
+	return mali_secure_mode_enabled;
+}
+
+mali_bool _mali_osk_gpu_secure_mode_is_supported(void)
+{
+	return mali_secure_mode_supported;
+}
+
+
diff --git a/utgard/r6p2/linux/mali_osk_math.c b/utgard/r6p2/linux/mali_osk_math.c
new file mode 100755
index 0000000..3f06723
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_math.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_math.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/bitops.h>
+
+u32 _mali_osk_clz(u32 input)
+{
+	return 32 - fls(input);
+}
+
+u32 _mali_osk_fls(u32 input)
+{
+	return fls(input);
+}
diff --git a/utgard/r6p2/linux/mali_osk_memory.c b/utgard/r6p2/linux/mali_osk_memory.c
new file mode 100755
index 0000000..ad5494d
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_memory.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_memory.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+void inline *_mali_osk_calloc(u32 n, u32 size)
+{
+	return kcalloc(n, size, GFP_KERNEL);
+}
+
+void inline *_mali_osk_malloc(u32 size)
+{
+	return kmalloc(size, GFP_KERNEL);
+}
+
+void inline _mali_osk_free(void *ptr)
+{
+	kfree(ptr);
+}
+
+void inline *_mali_osk_valloc(u32 size)
+{
+	return vmalloc(size);
+}
+
+void inline _mali_osk_vfree(void *ptr)
+{
+	vfree(ptr);
+}
+
+void inline *_mali_osk_memcpy(void *dst, const void *src, u32  len)
+{
+	return memcpy(dst, src, len);
+}
+
+void inline *_mali_osk_memset(void *s, u32 c, u32 n)
+{
+	return memset(s, c, n);
+}
+
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated)
+{
+	/* No need to prevent an out-of-memory dialogue appearing on Linux,
+	 * so we always return MALI_TRUE.
+	 */
+	return MALI_TRUE;
+}
diff --git a/utgard/r6p2/linux/mali_osk_misc.c b/utgard/r6p2/linux/mali_osk_misc.c
new file mode 100755
index 0000000..7dda283
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_misc.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_misc.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include "mali_osk.h"
+
+#if !defined(CONFIG_MALI_QUIET)
+void _mali_osk_dbgmsg(const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+}
+#endif /* !defined(CONFIG_MALI_QUIET) */
+
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...)
+{
+	int res;
+	va_list args;
+	va_start(args, fmt);
+
+	res = vscnprintf(buf, (size_t)size, fmt, args);
+
+	va_end(args);
+	return res;
+}
+
+void _mali_osk_abort(void)
+{
+	/* make a simple fault by dereferencing a NULL pointer */
+	dump_stack();
+	*(int *)0 = 0;
+}
+
+void _mali_osk_break(void)
+{
+	_mali_osk_abort();
+}
+
+u32 _mali_osk_get_pid(void)
+{
+	/* Thread group ID is the process ID on Linux */
+	return (u32)current->tgid;
+}
+
+char *_mali_osk_get_comm(void)
+{
+	return (char *)current->comm;
+}
+
+
+u32 _mali_osk_get_tid(void)
+{
+	/* pid is actually identifying the thread on Linux */
+	u32 tid = current->pid;
+
+	/* If the pid is 0 the core was idle.  Instead of returning 0 we return a special number
+	 * identifying which core we are on. */
+	if (0 == tid) {
+		tid = -(1 + raw_smp_processor_id());
+	}
+
+	return tid;
+}
diff --git a/utgard/r6p2/linux/mali_osk_notification.c b/utgard/r6p2/linux/mali_osk_notification.c
new file mode 100755
index 0000000..5867841
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_notification.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_notification.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/**
+ * Declaration of the notification queue object type
+ * Contains a linked list of notification pending delivery to user space.
+ * It also contains a wait queue of exclusive waiters blocked in the ioctl
+ * When a new notification is posted a single thread is resumed.
+ */
+struct _mali_osk_notification_queue_t_struct {
+	spinlock_t mutex; /**< Mutex protecting the list */
+	wait_queue_head_t receive_queue; /**< Threads waiting for new entries to the queue */
+	struct list_head head; /**< List of notifications waiting to be picked up */
+};
+
+typedef struct _mali_osk_notification_wrapper_t_struct {
+	struct list_head list;           /**< Internal linked list variable */
+	_mali_osk_notification_t data;   /**< Notification data */
+} _mali_osk_notification_wrapper_t;
+
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void)
+{
+	_mali_osk_notification_queue_t         *result;
+
+	result = (_mali_osk_notification_queue_t *)kmalloc(sizeof(_mali_osk_notification_queue_t), GFP_KERNEL);
+	if (NULL == result) return NULL;
+
+	spin_lock_init(&result->mutex);
+	init_waitqueue_head(&result->receive_queue);
+	INIT_LIST_HEAD(&result->head);
+
+	return result;
+}
+
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size)
+{
+	/* OPT Recycling of notification objects */
+	_mali_osk_notification_wrapper_t *notification;
+
+	notification = (_mali_osk_notification_wrapper_t *)kmalloc(sizeof(_mali_osk_notification_wrapper_t) + size,
+			GFP_KERNEL | __GFP_HIGH | __GFP_REPEAT);
+	if (NULL == notification) {
+		MALI_DEBUG_PRINT(1, ("Failed to create a notification object\n"));
+		return NULL;
+	}
+
+	/* Init the list */
+	INIT_LIST_HEAD(&notification->list);
+
+	if (0 != size) {
+		notification->data.result_buffer = ((u8 *)notification) + sizeof(_mali_osk_notification_wrapper_t);
+	} else {
+		notification->data.result_buffer = NULL;
+	}
+
+	/* set up the non-allocating fields */
+	notification->data.notification_type = type;
+	notification->data.result_buffer_size = size;
+
+	/* all ok */
+	return &(notification->data);
+}
+
+void _mali_osk_notification_delete(_mali_osk_notification_t *object)
+{
+	_mali_osk_notification_wrapper_t *notification;
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+	/* Free the container */
+	kfree(notification);
+}
+
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue)
+{
+	_mali_osk_notification_t *result;
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	while (_MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, &result)) {
+		_mali_osk_notification_delete(result);
+	}
+
+	/* not much to do, just free the memory */
+	kfree(queue);
+}
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_notification_wrapper_t *notification;
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	list_add_tail(&notification->list, &queue->head);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	/* and wake up one possible exclusive waiter */
+	wake_up(&queue->receive_queue);
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	_mali_osk_notification_wrapper_t *wrapper_object;
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	if (!list_empty(&queue->head)) {
+		wrapper_object = list_entry(queue->head.next, _mali_osk_notification_wrapper_t, list);
+		*result = &(wrapper_object->data);
+		list_del_init(&wrapper_object->list);
+		ret = _MALI_OSK_ERR_OK;
+	}
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(result);
+
+	/* default result */
+	*result = NULL;
+
+	if (wait_event_interruptible(queue->receive_queue,
+				     _MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, result))) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
diff --git a/utgard/r6p2/linux/mali_osk_pm.c b/utgard/r6p2/linux/mali_osk_pm.c
new file mode 100755
index 0000000..6a579ad
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_pm.c
@@ -0,0 +1,83 @@
+/**
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_pm.c
+ * Implementation of the callback functions from common power management
+ */
+
+#include <linux/sched.h>
+
+#include "mali_kernel_linux.h"
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_PM_RUNTIME */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/* Can NOT run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get_sync(&(mali_platform_device->dev));
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get_sync() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Can run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get(&(mali_platform_device->dev));
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err && -EINPROGRESS != err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* Can run in atomic context */
+void _mali_osk_pm_dev_ref_put(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+	pm_runtime_put_autosuspend(&(mali_platform_device->dev));
+#else
+	pm_runtime_put(&(mali_platform_device->dev));
+#endif
+#endif
+}
+
+void _mali_osk_pm_dev_barrier(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	pm_runtime_barrier(&(mali_platform_device->dev));
+#endif
+}
diff --git a/utgard/r6p2/linux/mali_osk_profiling.c b/utgard/r6p2/linux/mali_osk_profiling.c
new file mode 100755
index 0000000..35d1abc
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_profiling.c
@@ -0,0 +1,1282 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched.h>
+
+#include <mali_profiling_gator_api.h>
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_osk_profiling.h"
+#include "mali_linux_trace.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_user_settings_db.h"
+#include "mali_executor.h"
+#include "mali_memory_manager.h"
+
+#define MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE 100
+#define MALI_PROFILING_STREAM_HOLD_TIME 1000000         /*1 ms */
+
+#define MALI_PROFILING_STREAM_BUFFER_SIZE       (1 << 12)
+#define MALI_PROFILING_STREAM_BUFFER_NUM        100
+
+/**
+ * Define the mali profiling stream struct.
+ */
+typedef struct mali_profiling_stream {
+	u8 data[MALI_PROFILING_STREAM_BUFFER_SIZE];
+	u32 used_size;
+	struct list_head list;
+} mali_profiling_stream;
+
+typedef struct mali_profiling_stream_list {
+	spinlock_t spin_lock;
+	struct list_head free_list;
+	struct list_head queue_list;
+} mali_profiling_stream_list;
+
+static const char mali_name[] = "4xx";
+static const char utgard_setup_version[] = "ANNOTATE_SETUP 1\n";
+
+static u32 profiling_sample_rate = 0;
+static u32 first_sw_counter_index = 0;
+
+static mali_bool l2_cache_counter_if_enabled = MALI_FALSE;
+static u32 num_counters_enabled = 0;
+static u32 mem_counters_enabled = 0;
+
+static _mali_osk_atomic_t stream_fd_if_used;
+
+static wait_queue_head_t stream_fd_wait_queue;
+static mali_profiling_counter *global_mali_profiling_counters = NULL;
+static u32 num_global_mali_profiling_counters = 0;
+
+static mali_profiling_stream_list *global_mali_stream_list = NULL;
+static mali_profiling_stream *mali_counter_stream = NULL;
+static mali_profiling_stream *mali_core_activity_stream = NULL;
+static u64 mali_core_activity_stream_dequeue_time = 0;
+static spinlock_t mali_activity_lock;
+static u32 mali_activity_cores_num =  0;
+static struct hrtimer profiling_sampling_timer;
+
+const char *_mali_mem_counter_descriptions[] = _MALI_MEM_COUTNER_DESCRIPTIONS;
+const char *_mali_special_counter_descriptions[] = _MALI_SPCIAL_COUNTER_DESCRIPTIONS;
+
+static u32 current_profiling_pid = 0;
+
+static void _mali_profiling_stream_list_destory(mali_profiling_stream_list *profiling_stream_list)
+{
+	mali_profiling_stream *profiling_stream, *tmp_profiling_stream;
+	MALI_DEBUG_ASSERT_POINTER(profiling_stream_list);
+
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->free_list, list) {
+		list_del(&profiling_stream->list);
+		kfree(profiling_stream);
+	}
+
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->queue_list, list) {
+		list_del(&profiling_stream->list);
+		kfree(profiling_stream);
+	}
+
+	kfree(profiling_stream_list);
+}
+
+static void _mali_profiling_global_stream_list_free(void)
+{
+	mali_profiling_stream *profiling_stream, *tmp_profiling_stream;
+	unsigned long irq_flags;
+
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &global_mali_stream_list->queue_list, list) {
+		profiling_stream->used_size = 0;
+		list_move(&profiling_stream->list, &global_mali_stream_list->free_list);
+	}
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+}
+
+static _mali_osk_errcode_t _mali_profiling_global_stream_list_dequeue(struct list_head *stream_list, mali_profiling_stream **new_mali_profiling_stream)
+{
+	unsigned long irq_flags;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	MALI_DEBUG_ASSERT_POINTER(stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+
+	if (!list_empty(stream_list)) {
+		*new_mali_profiling_stream = list_entry(stream_list->next, mali_profiling_stream, list);
+		list_del_init(&(*new_mali_profiling_stream)->list);
+	} else {
+		ret = _MALI_OSK_ERR_NOMEM;
+	}
+
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+
+	return ret;
+}
+
+static void _mali_profiling_global_stream_list_queue(struct list_head *stream_list, mali_profiling_stream *current_mali_profiling_stream)
+{
+	unsigned long irq_flags;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	MALI_DEBUG_ASSERT_POINTER(stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	list_add_tail(&current_mali_profiling_stream->list, stream_list);
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+}
+
+static mali_bool _mali_profiling_global_stream_queue_list_if_empty(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	return list_empty(&global_mali_stream_list->queue_list);
+}
+
+static u32 _mali_profiling_global_stream_queue_list_next_size(void)
+{
+	unsigned long irq_flags;
+	u32 size = 0;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	if (!list_empty(&global_mali_stream_list->queue_list)) {
+		mali_profiling_stream *next_mali_profiling_stream =
+			list_entry(global_mali_stream_list->queue_list.next, mali_profiling_stream, list);
+		size = next_mali_profiling_stream->used_size;
+	}
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+	return size;
+}
+
+/* The mali profiling stream file operations functions. */
+static ssize_t _mali_profiling_stream_read(
+	struct file *filp,
+	char __user *buffer,
+	size_t      size,
+	loff_t      *f_pos);
+
+static unsigned int  _mali_profiling_stream_poll(struct file *filp, poll_table *wait);
+
+static int  _mali_profiling_stream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations mali_profiling_stream_fops = {
+	.release = _mali_profiling_stream_release,
+	.read    = _mali_profiling_stream_read,
+	.poll    = _mali_profiling_stream_poll,
+};
+
+static ssize_t _mali_profiling_stream_read(
+	struct file *filp,
+	char __user *buffer,
+	size_t      size,
+	loff_t      *f_pos)
+{
+	u32 copy_len = 0;
+	mali_profiling_stream *current_mali_profiling_stream;
+	u32 used_size;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	while (!_mali_profiling_global_stream_queue_list_if_empty()) {
+		used_size = _mali_profiling_global_stream_queue_list_next_size();
+		if (used_size <= ((u32)size - copy_len)) {
+			current_mali_profiling_stream = NULL;
+			_mali_profiling_global_stream_list_dequeue(&global_mali_stream_list->queue_list,
+					&current_mali_profiling_stream);
+			MALI_DEBUG_ASSERT_POINTER(current_mali_profiling_stream);
+			if (copy_to_user(&buffer[copy_len], current_mali_profiling_stream->data, current_mali_profiling_stream->used_size)) {
+				current_mali_profiling_stream->used_size = 0;
+				_mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream);
+				return -EFAULT;
+			}
+			copy_len += current_mali_profiling_stream->used_size;
+			current_mali_profiling_stream->used_size = 0;
+			_mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream);
+		} else {
+			break;
+		}
+	}
+	return (ssize_t)copy_len;
+}
+
+static unsigned int  _mali_profiling_stream_poll(struct file *filp, poll_table *wait)
+{
+	poll_wait(filp, &stream_fd_wait_queue, wait);
+	if (!_mali_profiling_global_stream_queue_list_if_empty())
+		return POLLIN;
+	return 0;
+}
+
+static int  _mali_profiling_stream_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_atomic_init(&stream_fd_if_used, 0);
+	return 0;
+}
+
+/* The funs for control packet and stream data.*/
+static void _mali_profiling_set_packet_size(unsigned char *const buf, const u32 size)
+{
+	u32 i;
+
+	for (i = 0; i < sizeof(size); ++i)
+		buf[i] = (size >> 8 * i) & 0xFF;
+}
+
+static u32 _mali_profiling_get_packet_size(unsigned char *const buf)
+{
+	u32 i;
+	u32 size = 0;
+	for (i = 0; i < sizeof(size); ++i)
+		size |= (u32)buf[i] << 8 * i;
+	return size;
+}
+
+static u32 _mali_profiling_read_packet_int(unsigned char *const buf, u32 *const pos, u32 const packet_size)
+{
+	u64 int_value = 0;
+	u8 shift = 0;
+	u8 byte_value = ~0;
+
+	while ((byte_value & 0x80) != 0) {
+		if ((*pos) >= packet_size)
+			return -1;
+		byte_value = buf[*pos];
+		*pos += 1;
+		int_value |= (u32)(byte_value & 0x7f) << shift;
+		shift += 7;
+	}
+
+	if (shift < 8 * sizeof(int_value) && (byte_value & 0x40) != 0) {
+		int_value |= -(1 << shift);
+	}
+
+	return int_value;
+}
+
+static u32 _mali_profiling_pack_int(u8 *const buf, u32 const buf_size, u32 const pos, s32 value)
+{
+	u32 add_bytes = 0;
+	int more = 1;
+	while (more) {
+		/* low order 7 bits of val */
+		char byte_value = value & 0x7f;
+		value >>= 7;
+
+		if ((value == 0 && (byte_value & 0x40) == 0) || (value == -1 && (byte_value & 0x40) != 0)) {
+			more = 0;
+		} else {
+			byte_value |= 0x80;
+		}
+
+		if ((pos + add_bytes) >= buf_size)
+			return 0;
+		buf[pos + add_bytes] = byte_value;
+		add_bytes++;
+	}
+
+	return add_bytes;
+}
+
+static int _mali_profiling_pack_long(uint8_t *const buf, u32 const buf_size, u32 const pos, s64 val)
+{
+	int add_bytes = 0;
+	int more = 1;
+	while (more) {
+		/* low order 7 bits of x */
+		char byte_value = val & 0x7f;
+		val >>= 7;
+
+		if ((val == 0 && (byte_value & 0x40) == 0) || (val == -1 && (byte_value & 0x40) != 0)) {
+			more = 0;
+		} else {
+			byte_value |= 0x80;
+		}
+
+		MALI_DEBUG_ASSERT((pos + add_bytes) < buf_size);
+		buf[pos + add_bytes] = byte_value;
+		add_bytes++;
+	}
+
+	return add_bytes;
+}
+
+static void _mali_profiling_stream_add_counter(mali_profiling_stream *profiling_stream, s64 current_time, u32 key, u32 counter_value)
+{
+	u32 add_size = STREAM_HEADER_SIZE;
+	MALI_DEBUG_ASSERT_POINTER(profiling_stream);
+	MALI_DEBUG_ASSERT((profiling_stream->used_size) < MALI_PROFILING_STREAM_BUFFER_SIZE);
+
+	profiling_stream->data[profiling_stream->used_size] = STREAM_HEADER_COUNTER_VALUE;
+
+	add_size += _mali_profiling_pack_long(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					      profiling_stream->used_size + add_size, current_time);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)0);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)key);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)counter_value);
+
+	_mali_profiling_set_packet_size(profiling_stream->data + profiling_stream->used_size + 1,
+					add_size - STREAM_HEADER_SIZE);
+
+	profiling_stream->used_size += add_size;
+}
+
+/* The callback function for sampling timer.*/
+static enum hrtimer_restart  _mali_profiling_sampling_counters(struct hrtimer *timer)
+{
+	u32 counter_index;
+	s64 current_time;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_profiling_counters);
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	MALI_DEBUG_ASSERT(NULL == mali_counter_stream);
+	if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue(
+		    &global_mali_stream_list->free_list, &mali_counter_stream)) {
+
+		MALI_DEBUG_ASSERT_POINTER(mali_counter_stream);
+		MALI_DEBUG_ASSERT(0 == mali_counter_stream->used_size);
+
+		/* Capture l2 cache counter values if enabled */
+		if (MALI_TRUE == l2_cache_counter_if_enabled) {
+			int i, j = 0;
+			_mali_profiling_l2_counter_values l2_counters_values;
+			_mali_profiling_get_l2_counters(&l2_counters_values);
+
+			for (i  = COUNTER_L2_0_C0; i <= COUNTER_L2_2_C1; i++) {
+				if (0 == (j % 2))
+					_mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value0);
+				else
+					_mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value1);
+				j++;
+			}
+		}
+
+		current_time = (s64)_mali_osk_boot_time_get_ns();
+
+		/* Add all enabled counter values into stream */
+		for (counter_index = 0; counter_index < num_global_mali_profiling_counters; counter_index++) {
+			/* No need to sample these couners here. */
+			if (global_mali_profiling_counters[counter_index].enabled) {
+				if ((global_mali_profiling_counters[counter_index].counter_id >= FIRST_MEM_COUNTER &&
+				     global_mali_profiling_counters[counter_index].counter_id <= LAST_MEM_COUNTER)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_VP_ACTIVITY)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FP_ACTIVITY)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FILMSTRIP)) {
+
+					continue;
+				}
+
+				if (global_mali_profiling_counters[counter_index].counter_id >= COUNTER_L2_0_C0 &&
+				    global_mali_profiling_counters[counter_index].counter_id <= COUNTER_L2_2_C1) {
+
+					u32 prev_val = global_mali_profiling_counters[counter_index].prev_counter_value;
+
+					_mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key,
+									   global_mali_profiling_counters[counter_index].current_counter_value - prev_val);
+
+					prev_val = global_mali_profiling_counters[counter_index].current_counter_value;
+
+					global_mali_profiling_counters[counter_index].prev_counter_value = prev_val;
+				} else {
+
+					if (global_mali_profiling_counters[counter_index].counter_id == COUNTER_TOTAL_ALLOC_PAGES) {
+						u32 total_alloc_mem = _mali_ukk_report_memory_usage();
+						global_mali_profiling_counters[counter_index].current_counter_value = total_alloc_mem / _MALI_OSK_MALI_PAGE_SIZE;
+					}
+					_mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key,
+									   global_mali_profiling_counters[counter_index].current_counter_value);
+					if (global_mali_profiling_counters[counter_index].counter_id < FIRST_SPECIAL_COUNTER)
+						global_mali_profiling_counters[counter_index].current_counter_value = 0;
+				}
+			}
+		}
+		_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_counter_stream);
+		mali_counter_stream = NULL;
+	} else {
+		MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n"));
+	}
+
+	wake_up_interruptible(&stream_fd_wait_queue);
+
+	/*Enable the sampling timer again*/
+	if (0 != num_counters_enabled && 0 != profiling_sample_rate) {
+		hrtimer_forward_now(&profiling_sampling_timer, ns_to_ktime(profiling_sample_rate));
+		return HRTIMER_RESTART;
+	}
+	return HRTIMER_NORESTART;
+}
+
+static void _mali_profiling_sampling_core_activity_switch(int counter_id, int core, u32 activity, u32 pid)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&mali_activity_lock, irq_flags);
+	if (activity == 0)
+		mali_activity_cores_num--;
+	else
+		mali_activity_cores_num++;
+	spin_unlock_irqrestore(&mali_activity_lock, irq_flags);
+
+	if (NULL != global_mali_profiling_counters) {
+		int i ;
+		for (i = 0; i < num_global_mali_profiling_counters; i++) {
+			if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) {
+				u64 current_time = _mali_osk_boot_time_get_ns();
+				u32 add_size = STREAM_HEADER_SIZE;
+
+				if (NULL != mali_core_activity_stream) {
+					if ((mali_core_activity_stream_dequeue_time +  MALI_PROFILING_STREAM_HOLD_TIME < current_time) ||
+					    (MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE > MALI_PROFILING_STREAM_BUFFER_SIZE
+					     - mali_core_activity_stream->used_size)) {
+						_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream);
+						mali_core_activity_stream = NULL;
+						wake_up_interruptible(&stream_fd_wait_queue);
+					}
+				}
+
+				if (NULL == mali_core_activity_stream) {
+					if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue(
+						    &global_mali_stream_list->free_list, &mali_core_activity_stream)) {
+						mali_core_activity_stream_dequeue_time = current_time;
+					} else {
+						MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n"));
+						wake_up_interruptible(&stream_fd_wait_queue);
+						break;
+					}
+
+				}
+
+				mali_core_activity_stream->data[mali_core_activity_stream->used_size] = STREAM_HEADER_CORE_ACTIVITY;
+
+				add_size += _mali_profiling_pack_long(mali_core_activity_stream->data,
+								      MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s64)current_time);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, core);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s32)global_mali_profiling_counters[i].key);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, activity);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, pid);
+
+				_mali_profiling_set_packet_size(mali_core_activity_stream->data + mali_core_activity_stream->used_size + 1,
+								add_size - STREAM_HEADER_SIZE);
+
+				mali_core_activity_stream->used_size += add_size;
+
+				if (0 == mali_activity_cores_num) {
+					_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream);
+					mali_core_activity_stream = NULL;
+					wake_up_interruptible(&stream_fd_wait_queue);
+				}
+
+				break;
+			}
+		}
+	}
+}
+
+static mali_bool _mali_profiling_global_counters_init(void)
+{
+	int core_id, counter_index, counter_number, counter_id;
+	u32 num_l2_cache_cores;
+	u32 num_pp_cores;
+	u32 num_gp_cores = 1;
+
+	MALI_DEBUG_ASSERT(NULL == global_mali_profiling_counters);
+	num_pp_cores = mali_pp_get_glob_num_pp_cores();
+	num_l2_cache_cores =    mali_l2_cache_core_get_glob_num_l2_cores();
+
+	num_global_mali_profiling_counters = 3 * (num_gp_cores + num_pp_cores) + 2 * num_l2_cache_cores
+					     + MALI_PROFILING_SW_COUNTERS_NUM
+					     + MALI_PROFILING_SPECIAL_COUNTERS_NUM
+					     + MALI_PROFILING_MEM_COUNTERS_NUM;
+	global_mali_profiling_counters = _mali_osk_calloc(num_global_mali_profiling_counters, sizeof(mali_profiling_counter));
+
+	if (NULL == global_mali_profiling_counters)
+		return MALI_FALSE;
+
+	counter_index = 0;
+	/*Vertex processor counters */
+	for (core_id = 0; core_id < num_gp_cores; core_id ++) {
+		global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_VP_0 + core_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_active", mali_name, core_id);
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* Fragment processors' counters */
+	for (core_id = 0; core_id < num_pp_cores; core_id++) {
+		counter_index++;
+		global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_FP_0 + core_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_active", mali_name, core_id);
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* L2 Cache counters */
+	for (core_id = 0; core_id < num_l2_cache_cores; core_id++) {
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* Now set up the software counter entries */
+	for (counter_id = FIRST_SW_COUNTER; counter_id <= LAST_SW_COUNTER; counter_id++) {
+		counter_index++;
+
+		if (0 == first_sw_counter_index)
+			first_sw_counter_index = counter_index;
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_SW_%d", mali_name, counter_id - FIRST_SW_COUNTER);
+	}
+
+	/* Now set up the special counter entries */
+	for (counter_id = FIRST_SPECIAL_COUNTER; counter_id <= LAST_SPECIAL_COUNTER; counter_id++) {
+
+		counter_index++;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s",
+				   mali_name, _mali_special_counter_descriptions[counter_id - FIRST_SPECIAL_COUNTER]);
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+	}
+
+	/* Now set up the mem counter entries*/
+	for (counter_id = FIRST_MEM_COUNTER; counter_id <= LAST_MEM_COUNTER; counter_id++) {
+
+		counter_index++;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s",
+				   mali_name, _mali_mem_counter_descriptions[counter_id - FIRST_MEM_COUNTER]);
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+	}
+
+	MALI_DEBUG_ASSERT((counter_index + 1) == num_global_mali_profiling_counters);
+
+	return MALI_TRUE;
+}
+
+void _mali_profiling_notification_mem_counter(struct mali_session_data *session, u32 counter_id, u32 key, int enable)
+{
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL != session) {
+		_mali_osk_notification_t *notification;
+		_mali_osk_notification_queue_t *queue;
+
+		queue = session->ioctl_queue;
+		MALI_DEBUG_ASSERT(NULL != queue);
+
+		notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER,
+				sizeof(_mali_uk_annotate_profiling_mem_counter_s));
+
+		if (NULL != notification) {
+			_mali_uk_annotate_profiling_mem_counter_s *data = notification->result_buffer;
+			data->counter_id = counter_id;
+			data->key = key;
+			data->enable = enable;
+
+			_mali_osk_notification_queue_send(queue, notification);
+		} else {
+			MALI_PRINT_ERROR(("Failed to create notification object!\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("Failed to find the right session!\n"));
+	}
+}
+
+void _mali_profiling_notification_enable(struct mali_session_data *session, u32 sampling_rate, int enable)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL != session) {
+		_mali_osk_notification_t *notification;
+		_mali_osk_notification_queue_t *queue;
+
+		queue = session->ioctl_queue;
+		MALI_DEBUG_ASSERT(NULL != queue);
+
+		notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE,
+				sizeof(_mali_uk_annotate_profiling_enable_s));
+
+		if (NULL != notification) {
+			_mali_uk_annotate_profiling_enable_s *data = notification->result_buffer;
+			data->sampling_rate = sampling_rate;
+			data->enable = enable;
+
+			_mali_osk_notification_queue_send(queue, notification);
+		} else {
+			MALI_PRINT_ERROR(("Failed to create notification object!\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("Failed to find the right session!\n"));
+	}
+}
+
+
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start)
+{
+	int i;
+	mali_profiling_stream *new_mali_profiling_stream = NULL;
+	mali_profiling_stream_list *new_mali_profiling_stream_list = NULL;
+	if (MALI_TRUE == auto_start) {
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+	}
+
+	/*Init the global_mali_stream_list*/
+	MALI_DEBUG_ASSERT(NULL == global_mali_stream_list);
+	new_mali_profiling_stream_list = (mali_profiling_stream_list *)kmalloc(sizeof(mali_profiling_stream_list), GFP_KERNEL);
+
+	if (NULL == new_mali_profiling_stream_list) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	spin_lock_init(&new_mali_profiling_stream_list->spin_lock);
+	INIT_LIST_HEAD(&new_mali_profiling_stream_list->free_list);
+	INIT_LIST_HEAD(&new_mali_profiling_stream_list->queue_list);
+
+	spin_lock_init(&mali_activity_lock);
+	mali_activity_cores_num =  0;
+
+	for (i = 0; i < MALI_PROFILING_STREAM_BUFFER_NUM; i++) {
+		new_mali_profiling_stream = (mali_profiling_stream *)kmalloc(sizeof(mali_profiling_stream), GFP_KERNEL);
+		if (NULL == new_mali_profiling_stream) {
+			_mali_profiling_stream_list_destory(new_mali_profiling_stream_list);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+
+		INIT_LIST_HEAD(&new_mali_profiling_stream->list);
+		new_mali_profiling_stream->used_size = 0;
+		list_add_tail(&new_mali_profiling_stream->list, &new_mali_profiling_stream_list->free_list);
+
+	}
+
+	_mali_osk_atomic_init(&stream_fd_if_used, 0);
+	init_waitqueue_head(&stream_fd_wait_queue);
+
+	hrtimer_init(&profiling_sampling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	profiling_sampling_timer.function = _mali_profiling_sampling_counters;
+
+	global_mali_stream_list = new_mali_profiling_stream_list;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_profiling_term(void)
+{
+	if (0 != profiling_sample_rate) {
+		hrtimer_cancel(&profiling_sampling_timer);
+		profiling_sample_rate = 0;
+	}
+	_mali_osk_atomic_term(&stream_fd_if_used);
+
+	if (NULL != global_mali_profiling_counters) {
+		_mali_osk_free(global_mali_profiling_counters);
+		global_mali_profiling_counters = NULL;
+		num_global_mali_profiling_counters = 0;
+	}
+
+	if (NULL != global_mali_stream_list) {
+		_mali_profiling_stream_list_destory(global_mali_stream_list);
+		global_mali_stream_list = NULL;
+	}
+
+}
+
+void _mali_osk_profiling_stop_sampling(u32 pid)
+{
+	if (pid == current_profiling_pid) {
+
+		int i;
+		/* Reset all counter states when closing connection.*/
+		for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+			_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER);
+			global_mali_profiling_counters[i].enabled = 0;
+			global_mali_profiling_counters[i].prev_counter_value = 0;
+			global_mali_profiling_counters[i].current_counter_value = 0;
+		}
+		l2_cache_counter_if_enabled = MALI_FALSE;
+		num_counters_enabled = 0;
+		mem_counters_enabled = 0;
+		_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0);
+		_mali_profiling_control(SW_COUNTER_ENABLE, 0);
+		/* Delete sampling timer when closing connection. */
+		if (0 != profiling_sample_rate) {
+			hrtimer_cancel(&profiling_sampling_timer);
+			profiling_sample_rate = 0;
+		}
+		current_profiling_pid = 0;
+	}
+}
+
+void    _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	/*Record the freq & volt to global_mali_profiling_counters here. */
+	if (0 != profiling_sample_rate) {
+		u32 channel;
+		u32 state;
+		channel = (event_id >> 16) & 0xFF;
+		state = ((event_id >> 24) & 0xF) << 24;
+
+		switch (state) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GPU >> 16) == channel) {
+				u32 reason = (event_id & 0xFFFF);
+				if (MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE == reason) {
+					_mali_osk_profiling_record_global_counters(COUNTER_FREQUENCY, data0);
+					_mali_osk_profiling_record_global_counters(COUNTER_VOLTAGE, data1);
+				}
+			}
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) {
+				_mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 1, data1);
+			} else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) &&
+				   (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) {
+				u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16);
+				_mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 1, data1);
+			}
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) {
+				_mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 0, 0);
+			} else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) &&
+				   (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) {
+				u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16);
+				_mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 0, 0);
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	trace_mali_timeline_event(event_id, data0, data1, data2, data3, data4);
+}
+
+void _mali_osk_profiling_report_sw_counters(u32 *counters)
+{
+	trace_mali_sw_counters(_mali_osk_get_pid(), _mali_osk_get_tid(), NULL, counters);
+}
+
+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value)
+{
+	if (NULL != global_mali_profiling_counters) {
+		int i ;
+		for (i = 0; i < num_global_mali_profiling_counters; i++) {
+			if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) {
+				global_mali_profiling_counters[i].current_counter_value = value;
+				break;
+			}
+		}
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args)
+{
+	/* Always add process and thread identificator in the first two data elements for events from user space */
+	_mali_osk_profiling_add_event(args->event_id, _mali_osk_get_pid(), _mali_osk_get_tid(), args->data[2], args->data[3], args->data[4]);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args)
+{
+	u32 *counters = (u32 *)(uintptr_t)args->counters;
+
+	_mali_osk_profiling_report_sw_counters(counters);
+
+	if (NULL != global_mali_profiling_counters) {
+		int i;
+		for (i = 0; i < MALI_PROFILING_SW_COUNTERS_NUM; i ++) {
+			if (global_mali_profiling_counters[first_sw_counter_index + i].enabled) {
+				global_mali_profiling_counters[first_sw_counter_index + i].current_counter_value = *(counters + i);
+			}
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (1 == _mali_osk_atomic_inc_return(&stream_fd_if_used)) {
+
+		s32 fd = anon_inode_getfd("[mali_profiling_stream]", &mali_profiling_stream_fops,
+					  session,
+					  O_RDONLY | O_CLOEXEC);
+
+		args->stream_fd = fd;
+		if (0 > fd) {
+			_mali_osk_atomic_dec(&stream_fd_if_used);
+			return _MALI_OSK_ERR_FAULT;
+		}
+		args->stream_fd = fd;
+	} else {
+		_mali_osk_atomic_dec(&stream_fd_if_used);
+		args->stream_fd = -1;
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args)
+{
+	u32 control_packet_size;
+	u32 output_buffer_size;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL == global_mali_profiling_counters && MALI_FALSE == _mali_profiling_global_counters_init()) {
+		MALI_PRINT_ERROR(("Failed to create global_mali_profiling_counters.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	control_packet_size = args->control_packet_size;
+	output_buffer_size = args->response_packet_size;
+
+	if (0 != control_packet_size) {
+		u8 control_type;
+		u8 *control_packet_data;
+		u8 *response_packet_data;
+		u32 version_length = sizeof(utgard_setup_version) - 1;
+
+		control_packet_data = (u8 *)(uintptr_t)args->control_packet_data;
+		MALI_DEBUG_ASSERT_POINTER(control_packet_data);
+		response_packet_data = (u8 *)(uintptr_t)args->response_packet_data;
+		MALI_DEBUG_ASSERT_POINTER(response_packet_data);
+
+		/*Decide if need to ignore Utgard setup version.*/
+		if (control_packet_size >= version_length) {
+			if (0 == memcmp(control_packet_data, utgard_setup_version, version_length)) {
+				if (control_packet_size == version_length) {
+					args->response_packet_size = 0;
+					return _MALI_OSK_ERR_OK;
+				} else {
+					control_packet_data += version_length;
+					control_packet_size -= version_length;
+				}
+			}
+		}
+
+		current_profiling_pid = _mali_osk_get_pid();
+
+		control_type = control_packet_data[0];
+		switch (control_type) {
+		case PACKET_HEADER_COUNTERS_REQUEST: {
+			int i;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size, type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Send supported counters */
+			if (PACKET_HEADER_SIZE > output_buffer_size)
+				return _MALI_OSK_ERR_FAULT;
+
+			*response_packet_data = PACKET_HEADER_COUNTERS_ACK;
+			args->response_packet_size = PACKET_HEADER_SIZE;
+
+			for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+				u32 name_size = strlen(global_mali_profiling_counters[i].counter_name);
+
+				if ((args->response_packet_size + name_size + 1) > output_buffer_size) {
+					MALI_PRINT_ERROR(("Response packet data is too large..\n"));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				memcpy(response_packet_data + args->response_packet_size,
+				       global_mali_profiling_counters[i].counter_name, name_size + 1);
+
+				args->response_packet_size += (name_size + 1);
+
+				if (global_mali_profiling_counters[i].counter_id == COUNTER_VP_ACTIVITY) {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32)1);
+				} else if (global_mali_profiling_counters[i].counter_id == COUNTER_FP_ACTIVITY) {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32)mali_pp_get_glob_num_pp_cores());
+				} else {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32) - 1);
+				}
+			}
+
+			_mali_profiling_set_packet_size(response_packet_data + 1, args->response_packet_size);
+			break;
+		}
+
+		case PACKET_HEADER_COUNTERS_ENABLE: {
+			int i;
+			u32 request_pos = PACKET_HEADER_SIZE;
+			mali_bool sw_counter_if_enabled = MALI_FALSE;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Init all counter states before enable requested counters.*/
+			for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+				_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER);
+				global_mali_profiling_counters[i].enabled = 0;
+				global_mali_profiling_counters[i].prev_counter_value = 0;
+				global_mali_profiling_counters[i].current_counter_value = 0;
+
+				if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER &&
+				    global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) {
+					_mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id, 0, 0);
+				}
+			}
+
+			l2_cache_counter_if_enabled = MALI_FALSE;
+			num_counters_enabled = 0;
+			mem_counters_enabled = 0;
+			_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0);
+			_mali_profiling_control(SW_COUNTER_ENABLE, 0);
+			_mali_profiling_notification_enable(session, 0, 0);
+
+			/* Enable requested counters */
+			while (request_pos < control_packet_size) {
+				u32 begin = request_pos;
+				u32 event;
+				u32 key;
+
+				/* Check the counter name which should be ended with null */
+				while (request_pos < control_packet_size && control_packet_data[request_pos] != '\0') {
+					++request_pos;
+				}
+
+				if (request_pos >= control_packet_size)
+					return _MALI_OSK_ERR_FAULT;
+
+				++request_pos;
+				event = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+				key = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+
+				for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+					u32 name_size = strlen((char *)(control_packet_data + begin));
+
+					if (strncmp(global_mali_profiling_counters[i].counter_name, (char *)(control_packet_data + begin), name_size) == 0) {
+						if (!sw_counter_if_enabled && (FIRST_SW_COUNTER <= global_mali_profiling_counters[i].counter_id
+									       && global_mali_profiling_counters[i].counter_id <= LAST_SW_COUNTER)) {
+							sw_counter_if_enabled = MALI_TRUE;
+							_mali_profiling_control(SW_COUNTER_ENABLE, 1);
+						}
+
+						if (COUNTER_FILMSTRIP == global_mali_profiling_counters[i].counter_id) {
+							_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 1);
+							_mali_profiling_control(FBDUMP_CONTROL_RATE, event & 0xff);
+							_mali_profiling_control(FBDUMP_CONTROL_RESIZE_FACTOR, (event >> 8) & 0xff);
+						}
+
+						if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER &&
+						    global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) {
+							_mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id,
+									key, 1);
+							mem_counters_enabled++;
+						}
+
+						global_mali_profiling_counters[i].counter_event = event;
+						global_mali_profiling_counters[i].key = key;
+						global_mali_profiling_counters[i].enabled = 1;
+
+						_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id,
+									  global_mali_profiling_counters[i].counter_event);
+						num_counters_enabled++;
+						break;
+					}
+				}
+
+				if (i == num_global_mali_profiling_counters) {
+					MALI_PRINT_ERROR(("Counter name does not match for type %u.\n", control_type));
+					return _MALI_OSK_ERR_FAULT;
+				}
+			}
+
+			if (PACKET_HEADER_SIZE <= output_buffer_size) {
+				*response_packet_data = PACKET_HEADER_ACK;
+				_mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE);
+				args->response_packet_size = PACKET_HEADER_SIZE;
+			} else {
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			break;
+		}
+
+		case PACKET_HEADER_START_CAPTURE_VALUE: {
+			u32 live_rate;
+			u32 request_pos = PACKET_HEADER_SIZE;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Read samping rate in nanoseconds and live rate, start capture.*/
+			profiling_sample_rate =  _mali_profiling_read_packet_int(control_packet_data,
+						 &request_pos, control_packet_size);
+
+			live_rate = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+
+			if (PACKET_HEADER_SIZE <= output_buffer_size) {
+				*response_packet_data = PACKET_HEADER_ACK;
+				_mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE);
+				args->response_packet_size = PACKET_HEADER_SIZE;
+			} else {
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			if (0 != num_counters_enabled && 0 != profiling_sample_rate) {
+				_mali_profiling_global_stream_list_free();
+				if (mem_counters_enabled > 0) {
+					_mali_profiling_notification_enable(session, profiling_sample_rate, 1);
+				}
+				hrtimer_start(&profiling_sampling_timer,
+					      ktime_set(profiling_sample_rate / 1000000000, profiling_sample_rate % 1000000000),
+					      HRTIMER_MODE_REL_PINNED);
+			}
+
+			break;
+		}
+		default:
+			MALI_PRINT_ERROR(("Unsupported  profiling packet header type %u.\n", control_type));
+			args->response_packet_size  = 0;
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else {
+		_mali_osk_profiling_stop_sampling(current_profiling_pid);
+		_mali_profiling_notification_enable(session, 0, 0);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Called by gator.ko to set HW counters
+ *
+ * @param counter_id The counter ID.
+ * @param event_id Event ID that the counter should count (HW counter value from TRM).
+ *
+ * @return 1 on success, 0 on failure.
+ */
+int _mali_profiling_set_event(u32 counter_id, s32 event_id)
+{
+	if (COUNTER_VP_0_C0 == counter_id) {
+		mali_gp_job_set_gp_counter_src0(event_id);
+	} else if (COUNTER_VP_0_C1 == counter_id) {
+		mali_gp_job_set_gp_counter_src1(event_id);
+	} else if (COUNTER_FP_0_C0 <= counter_id && COUNTER_FP_7_C1 >= counter_id) {
+		/*
+		 * Two compatibility notes for this function:
+		 *
+		 * 1) Previously the DDK allowed per core counters.
+		 *
+		 *    This did not make much sense on Mali-450 with the "virtual PP core" concept,
+		 *    so this option was removed, and only the same pair of HW counters was allowed on all cores,
+		 *    beginning with r3p2 release.
+		 *
+		 *    Starting with r4p0, it is now possible to set different HW counters for the different sub jobs.
+		 *    This should be almost the same, since sub job 0 is designed to run on core 0,
+		 *    sub job 1 on core 1, and so on.
+		 *
+		 *    The scheduling of PP sub jobs is not predictable, and this often led to situations where core 0 ran 2
+		 *    sub jobs, while for instance core 1 ran zero. Having the counters set per sub job would thus increase
+		 *    the predictability of the returned data (as you would be guaranteed data for all the selected HW counters).
+		 *
+		 *    PS: Core scaling needs to be disabled in order to use this reliably (goes for both solutions).
+		 *
+		 *    The framework/#defines with Gator still indicates that the counter is for a particular core,
+		 *    but this is internally used as a sub job ID instead (no translation needed).
+		 *
+		 *  2) Global/default vs per sub job counters
+		 *
+		 *     Releases before r3p2 had only per PP core counters.
+		 *     r3p2 releases had only one set of default/global counters which applied to all PP cores
+		 *     Starting with r4p0, we have both a set of default/global counters,
+		 *     and individual counters per sub job (equal to per core).
+		 *
+		 *     To keep compatibility with Gator/DS-5/streamline, the following scheme is used:
+		 *
+		 *     r3p2 release; only counters set for core 0 is handled,
+		 *     this is applied as the default/global set of counters, and will thus affect all cores.
+		 *
+		 *     r4p0 release; counters set for core 0 is applied as both the global/default set of counters,
+		 *     and counters for sub job 0.
+		 *     Counters set for core 1-7 is only applied for the corresponding sub job.
+		 *
+		 *     This should allow the DS-5/Streamline GUI to have a simple mode where it only allows setting the
+		 *     values for core 0, and thus this will be applied to all PP sub jobs/cores.
+		 *     Advanced mode will also be supported, where individual pairs of HW counters can be selected.
+		 *
+		 *     The GUI will (until it is updated) still refer to cores instead of sub jobs, but this is probably
+		 *     something we can live with!
+		 *
+		 *     Mali-450 note: Each job is not divided into a deterministic number of sub jobs, as the HW DLBU
+		 *     automatically distributes the load between whatever number of cores is available at this particular time.
+		 *     A normal PP job on Mali-450 is thus considered a single (virtual) job, and it will thus only be possible
+		 *     to use a single pair of HW counters (even if the job ran on multiple PP cores).
+		 *     In other words, only the global/default pair of PP HW counters will be used for normal Mali-450 jobs.
+		 */
+		u32 sub_job = (counter_id - COUNTER_FP_0_C0) >> 1;
+		u32 counter_src = (counter_id - COUNTER_FP_0_C0) & 1;
+		if (0 == counter_src) {
+			mali_pp_job_set_pp_counter_sub_job_src0(sub_job, event_id);
+			if (0 == sub_job) {
+				mali_pp_job_set_pp_counter_global_src0(event_id);
+			}
+		} else {
+			mali_pp_job_set_pp_counter_sub_job_src1(sub_job, event_id);
+			if (0 == sub_job) {
+				mali_pp_job_set_pp_counter_global_src1(event_id);
+			}
+		}
+	} else if (COUNTER_L2_0_C0 <= counter_id && COUNTER_L2_2_C1 >= counter_id) {
+		u32 core_id = (counter_id - COUNTER_L2_0_C0) >> 1;
+		struct mali_l2_cache_core *l2_cache_core = mali_l2_cache_core_get_glob_l2_core(core_id);
+
+		if (NULL != l2_cache_core) {
+			u32 counter_src = (counter_id - COUNTER_L2_0_C0) & 1;
+			mali_l2_cache_core_set_counter_src(l2_cache_core,
+							   counter_src, event_id);
+			l2_cache_counter_if_enabled = MALI_TRUE;
+		}
+	} else {
+		return 0; /* Failure, unknown event */
+	}
+
+	return 1; /* success */
+}
+
+/**
+ * Called by gator.ko to retrieve the L2 cache counter values for all L2 cache cores.
+ * The L2 cache counters are unique in that they are polled by gator, rather than being
+ * transmitted via the tracepoint mechanism.
+ *
+ * @param values Pointer to a _mali_profiling_l2_counter_values structure where
+ *               the counter sources and values will be output
+ * @return 0 if all went well; otherwise, return the mask with the bits set for the powered off cores
+ */
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values)
+{
+	u32 l2_cores_num = mali_l2_cache_core_get_glob_num_l2_cores();
+	u32 i;
+
+	MALI_DEBUG_ASSERT(l2_cores_num <= 3);
+
+	for (i = 0; i < l2_cores_num; i++) {
+		struct mali_l2_cache_core *l2_cache = mali_l2_cache_core_get_glob_l2_core(i);
+
+		if (NULL == l2_cache) {
+			continue;
+		}
+
+		mali_l2_cache_core_get_counter_values(l2_cache,
+						      &values->cores[i].source0,
+						      &values->cores[i].value0,
+						      &values->cores[i].source1,
+						      &values->cores[i].value1);
+	}
+
+	return 0;
+}
+
+/**
+ * Called by gator to control the production of profiling information at runtime.
+ */
+void _mali_profiling_control(u32 action, u32 value)
+{
+	switch (action) {
+	case FBDUMP_CONTROL_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED, (value == 0 ? MALI_FALSE : MALI_TRUE));
+		break;
+	case FBDUMP_CONTROL_RATE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES, value);
+		break;
+	case SW_COUNTER_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED, value);
+		break;
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR, value);
+		break;
+	default:
+		break;  /* Ignore unimplemented actions */
+	}
+}
+
+/**
+ * Called by gator to get mali api version.
+ */
+u32 _mali_profiling_get_api_version(void)
+{
+	return MALI_PROFILING_API_VERSION;
+}
+
+/**
+* Called by gator to get the data about Mali instance in use:
+* product id, version, number of cores
+*/
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values)
+{
+	values->mali_product_id = (u32)mali_kernel_core_get_product_id();
+	values->mali_version_major = mali_kernel_core_get_gpu_major_version();
+	values->mali_version_minor = mali_kernel_core_get_gpu_minor_version();
+	values->num_of_l2_cores = mali_l2_cache_core_get_glob_num_l2_cores();
+	values->num_of_fp_cores = mali_executor_get_num_cores_total();
+	values->num_of_vp_cores = 1;
+}
+
+
+EXPORT_SYMBOL(_mali_profiling_set_event);
+EXPORT_SYMBOL(_mali_profiling_get_l2_counters);
+EXPORT_SYMBOL(_mali_profiling_control);
+EXPORT_SYMBOL(_mali_profiling_get_api_version);
+EXPORT_SYMBOL(_mali_profiling_get_mali_version);
diff --git a/utgard/r6p2/linux/mali_osk_specific.h b/utgard/r6p2/linux/mali_osk_specific.h
new file mode 100755
index 0000000..adcca29
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_specific.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_specific.h
+ * Defines per-OS Kernel level specifics, such as unusual workarounds for
+ * certain OSs.
+ */
+
+#ifndef __MALI_OSK_SPECIFIC_H__
+#define __MALI_OSK_SPECIFIC_H__
+
+#include <asm/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/hardirq.h>
+
+
+#include "mali_osk_types.h"
+#include "mali_kernel_linux.h"
+
+#define MALI_STATIC_INLINE static inline
+#define MALI_NON_STATIC_INLINE inline
+
+typedef struct dma_pool *mali_dma_pool;
+
+typedef u32 mali_dma_addr;
+
+#if MALI_ENABLE_CPU_CYCLES
+/* Reads out the clock cycle performance counter of the current cpu.
+   It is useful for cost-free (2 cycle) measuring of the time spent
+   in a code path. Sample before and after, the diff number of cycles.
+   When the CPU is idle it will not increase this clock counter.
+   It means that the counter is accurate if only spin-locks are used,
+   but mutexes may lead to too low values since the cpu might "idle"
+   waiting for the mutex to become available.
+   The clock source is configured on the CPU during mali module load,
+   but will not give useful output after a CPU has been power cycled.
+   It is therefore important to configure the system to not turn of
+   the cpu cores when using this functionallity.*/
+static inline unsigned int mali_get_cpu_cyclecount(void)
+{
+	unsigned int value;
+	/* Reading the CCNT Register - CPU clock counter */
+	asm volatile("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(value));
+	return value;
+}
+
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64);
+#endif
+
+
+MALI_STATIC_INLINE u32 _mali_osk_copy_from_user(void *to, void *from, u32 n)
+{
+	return (u32)copy_from_user(to, from, (unsigned long)n);
+}
+
+MALI_STATIC_INLINE mali_bool _mali_osk_in_atomic(void)
+{
+	return in_atomic();
+}
+
+#define _mali_osk_put_user(x, ptr) put_user(x, ptr)
+
+#endif /* __MALI_OSK_SPECIFIC_H__ */
diff --git a/utgard/r6p2/linux/mali_osk_time.c b/utgard/r6p2/linux/mali_osk_time.c
new file mode 100755
index 0000000..76876b6
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_time.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_time.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <asm/delay.h>
+
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb)
+{
+	return time_after_eq(ticka, tickb) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+unsigned long _mali_osk_time_mstoticks(u32 ms)
+{
+	return msecs_to_jiffies(ms);
+}
+
+u32 _mali_osk_time_tickstoms(unsigned long ticks)
+{
+	return jiffies_to_msecs(ticks);
+}
+
+unsigned long _mali_osk_time_tickcount(void)
+{
+	return jiffies;
+}
+
+void _mali_osk_time_ubusydelay(u32 usecs)
+{
+	udelay(usecs);
+}
+
+u64 _mali_osk_time_get_ns(void)
+{
+	struct timespec tsval;
+	getnstimeofday(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
+
+u64 _mali_osk_boot_time_get_ns(void)
+{
+	struct timespec tsval;
+	get_monotonic_boottime(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
diff --git a/utgard/r6p2/linux/mali_osk_timers.c b/utgard/r6p2/linux/mali_osk_timers.c
new file mode 100755
index 0000000..8ada2da
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_timers.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_timers.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_timer_t_struct {
+	struct timer_list timer;
+};
+
+typedef void (*timer_timeout_function_t)(unsigned long);
+
+_mali_osk_timer_t *_mali_osk_timer_init(void)
+{
+	_mali_osk_timer_t *t = (_mali_osk_timer_t *)kmalloc(sizeof(_mali_osk_timer_t), GFP_KERNEL);
+	if (NULL != t) init_timer(&t->timer);
+	return t;
+}
+
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.expires = jiffies + ticks_to_expire;
+	add_timer(&(tim->timer));
+}
+
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	mod_timer(&(tim->timer), jiffies + ticks_to_expire);
+}
+
+void _mali_osk_timer_del(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer_sync(&(tim->timer));
+}
+
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer(&(tim->timer));
+}
+
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	return 1 == timer_pending(&(tim->timer));
+}
+
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.data = (unsigned long)data;
+	tim->timer.function = (timer_timeout_function_t)callback;
+}
+
+void _mali_osk_timer_term(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	kfree(tim);
+}
diff --git a/utgard/r6p2/linux/mali_osk_wait_queue.c b/utgard/r6p2/linux/mali_osk_wait_queue.c
new file mode 100755
index 0000000..caa3abe
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_wait_queue.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wait_queue.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_wait_queue_t_struct {
+	wait_queue_head_t wait_queue;
+};
+
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void)
+{
+	_mali_osk_wait_queue_t *ret = NULL;
+
+	ret = kmalloc(sizeof(_mali_osk_wait_queue_t), GFP_KERNEL);
+
+	if (NULL == ret) {
+		return ret;
+	}
+
+	init_waitqueue_head(&ret->wait_queue);
+	MALI_DEBUG_ASSERT(!waitqueue_active(&ret->wait_queue));
+
+	return ret;
+}
+
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event(queue->wait_queue, condition(data));
+}
+
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event_timeout(queue->wait_queue, condition(data), _mali_osk_time_mstoticks(timeout));
+}
+
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* if queue is empty, don't attempt to wake up its elements */
+	if (!waitqueue_active(&queue->wait_queue)) return;
+
+	MALI_DEBUG_PRINT(6, ("Waking up elements in wait queue %p ....\n", queue));
+
+	wake_up_all(&queue->wait_queue);
+
+	MALI_DEBUG_PRINT(6, ("... elements in wait queue %p woken up\n", queue));
+}
+
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue)
+{
+	/* Parameter validation  */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* Linux requires no explicit termination of wait queues */
+	kfree(queue);
+}
diff --git a/utgard/r6p2/linux/mali_osk_wq.c b/utgard/r6p2/linux/mali_osk_wq.c
new file mode 100755
index 0000000..06afa04
--- /dev/null
+++ b/utgard/r6p2/linux/mali_osk_wq.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/slab.h> /* For memory allocation */
+#include <linux/workqueue.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_license.h"
+#include "mali_kernel_linux.h"
+
+typedef struct _mali_osk_wq_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	mali_bool high_pri;
+	struct work_struct work_handle;
+} mali_osk_wq_work_object_t;
+
+typedef struct _mali_osk_wq_delayed_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	struct delayed_work work;
+} mali_osk_wq_delayed_work_object_t;
+
+#if MALI_LICENSE_IS_GPL
+static struct workqueue_struct *mali_wq_normal = NULL;
+static struct workqueue_struct *mali_wq_high = NULL;
+#endif
+
+static void _mali_osk_wq_work_func(struct work_struct *work);
+
+_mali_osk_errcode_t _mali_osk_wq_init(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(NULL == mali_wq_normal);
+	MALI_DEBUG_ASSERT(NULL == mali_wq_high);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_wq_normal = alloc_workqueue("mali", WQ_UNBOUND, 0);
+	mali_wq_high = alloc_workqueue("mali_high_pri", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_wq_normal = create_workqueue("mali");
+	mali_wq_high = create_workqueue("mali_high_pri");
+#endif
+	if (NULL == mali_wq_normal || NULL == mali_wq_high) {
+		MALI_PRINT_ERROR(("Unable to create Mali workqueues\n"));
+
+		if (mali_wq_normal) destroy_workqueue(mali_wq_normal);
+		if (mali_wq_high)   destroy_workqueue(mali_wq_high);
+
+		mali_wq_normal = NULL;
+		mali_wq_high   = NULL;
+
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* MALI_LICENSE_IS_GPL */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_wq_flush(void)
+{
+#if MALI_LICENSE_IS_GPL
+	flush_workqueue(mali_wq_high);
+	flush_workqueue(mali_wq_normal);
+#else
+	flush_scheduled_work();
+#endif
+}
+
+void _mali_osk_wq_term(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(NULL != mali_wq_normal);
+	MALI_DEBUG_ASSERT(NULL != mali_wq_high);
+
+	flush_workqueue(mali_wq_normal);
+	destroy_workqueue(mali_wq_normal);
+
+	flush_workqueue(mali_wq_high);
+	destroy_workqueue(mali_wq_high);
+
+	mali_wq_normal = NULL;
+	mali_wq_high   = NULL;
+#else
+	flush_scheduled_work();
+#endif
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_FALSE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_TRUE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+	_mali_osk_wq_flush();
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+	kfree(work_object);
+}
+
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_normal, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_high, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+static void _mali_osk_wq_work_func(struct work_struct *work)
+{
+	mali_osk_wq_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_work_object_t, work_handle);
+
+#if MALI_LICENSE_IS_GPL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+	/* We want highest Dynamic priority of the thread so that the Jobs depending
+	** on this thread could be scheduled in time. Without this, this thread might
+	** sometimes need to wait for some threads in user mode to finish its round-robin
+	** time, causing *bubble* in the Mali pipeline. Thanks to the new implementation
+	** of high-priority workqueue in new kernel, this only happens in older kernel.
+	*/
+	if (MALI_TRUE == work_object->high_pri) {
+		set_user_nice(current, -19);
+	}
+#endif
+#endif /* MALI_LICENSE_IS_GPL */
+
+	work_object->handler(work_object->data);
+}
+
+static void _mali_osk_wq_delayed_work_func(struct work_struct *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_delayed_work_object_t, work.work);
+	work_object->handler(work_object->data);
+}
+
+mali_osk_wq_delayed_work_object_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_delayed_work_object_t *work = kmalloc(sizeof(mali_osk_wq_delayed_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+
+	INIT_DELAYED_WORK(&work->work, _mali_osk_wq_delayed_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	cancel_delayed_work(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	cancel_delayed_work_sync(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+
+#if MALI_LICENSE_IS_GPL
+	queue_delayed_work(mali_wq_normal, &work_object->work, delay);
+#else
+	schedule_delayed_work(&work_object->work, delay);
+#endif
+
+}
diff --git a/utgard/r6p2/linux/mali_pmu_power_up_down.c b/utgard/r6p2/linux/mali_pmu_power_up_down.c
new file mode 100755
index 0000000..6a6c9f8
--- /dev/null
+++ b/utgard/r6p2/linux/mali_pmu_power_up_down.c
@@ -0,0 +1,27 @@
+/**
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu_power_up_down.c
+ */
+
+#include <linux/module.h>
+#include "mali_executor.h"
+
+int mali_perf_set_num_pp_cores(unsigned int num_cores)
+{
+#ifndef CONFIG_MALI_DVFS
+	return mali_executor_set_perf_level(num_cores, MALI_TRUE);
+#else
+	return mali_executor_set_perf_level(num_cores, MALI_FALSE);
+#endif
+}
+
+EXPORT_SYMBOL(mali_perf_set_num_pp_cores);
diff --git a/utgard/r6p2/linux/mali_profiling_events.h b/utgard/r6p2/linux/mali_profiling_events.h
new file mode 100755
index 0000000..5e51095
--- /dev/null
+++ b/utgard/r6p2/linux/mali_profiling_events.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_EVENTS_H__
+#define __MALI_PROFILING_EVENTS_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_events.h>
+
+#endif /* __MALI_PROFILING_EVENTS_H__ */
diff --git a/utgard/r6p2/linux/mali_profiling_gator_api.h b/utgard/r6p2/linux/mali_profiling_gator_api.h
new file mode 100755
index 0000000..e371971
--- /dev/null
+++ b/utgard/r6p2/linux/mali_profiling_gator_api.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012-2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_GATOR_API_H__
+#define __MALI_PROFILING_GATOR_API_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_gator_api.h>
+
+#endif /* __MALI_PROFILING_GATOR_API_H__ */
diff --git a/utgard/r6p2/linux/mali_profiling_internal.c b/utgard/r6p2/linux/mali_profiling_internal.c
new file mode 100755
index 0000000..918caa0
--- /dev/null
+++ b/utgard/r6p2/linux/mali_profiling_internal.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_timestamp.h"
+#include "mali_osk_profiling.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+
+typedef struct mali_profiling_entry {
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+} mali_profiling_entry;
+
+typedef enum mali_profiling_state {
+	MALI_PROFILING_STATE_UNINITIALIZED,
+	MALI_PROFILING_STATE_IDLE,
+	MALI_PROFILING_STATE_RUNNING,
+	MALI_PROFILING_STATE_RETURN,
+} mali_profiling_state;
+
+static _mali_osk_mutex_t *lock = NULL;
+static mali_profiling_state prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+static mali_profiling_entry *profile_entries = NULL;
+static _mali_osk_atomic_t profile_insert_index;
+static u32 profile_mask = 0;
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+void probe_mali_timeline_event(void *data, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned
+			       int d2, unsigned int d3, unsigned int d4))
+{
+	add_event(event_id, d0, d1, d2, d3, d4);
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_init(mali_bool auto_start)
+{
+	profile_entries = NULL;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_PROFILING);
+	if (NULL == lock) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+
+	if (MALI_TRUE == auto_start) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* Use maximum buffer size */
+
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_internal_profiling_term(void)
+{
+	u32 count;
+
+	/* Ensure profiling is stopped */
+	_mali_internal_profiling_stop(&count);
+
+	prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+
+	if (NULL != profile_entries) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	if (NULL != lock) {
+		_mali_osk_mutex_term(lock);
+		lock = NULL;
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_start(u32 *limit)
+{
+	_mali_osk_errcode_t ret;
+	mali_profiling_entry *new_profile_entries;
+
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RUNNING == prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	new_profile_entries = _mali_osk_valloc(*limit * sizeof(mali_profiling_entry));
+
+	if (NULL == new_profile_entries) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (MALI_PROFILING_MAX_BUFFER_ENTRIES < *limit) {
+		*limit = MALI_PROFILING_MAX_BUFFER_ENTRIES;
+	}
+
+	profile_mask = 1;
+	while (profile_mask <= *limit) {
+		profile_mask <<= 1;
+	}
+	profile_mask >>= 1;
+
+	*limit = profile_mask;
+
+	profile_mask--; /* turns the power of two into a mask of one less */
+
+	if (MALI_PROFILING_STATE_IDLE != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	profile_entries = new_profile_entries;
+
+	ret = _mali_timestamp_reset();
+
+	if (_MALI_OSK_ERR_OK == ret) {
+		prof_state = MALI_PROFILING_STATE_RUNNING;
+	} else {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	register_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+	return ret;
+}
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	u32 cur_index = (_mali_osk_atomic_inc_return(&profile_insert_index) - 1) & profile_mask;
+
+	profile_entries[cur_index].timestamp = _mali_timestamp_get();
+	profile_entries[cur_index].event_id = event_id;
+	profile_entries[cur_index].data[0] = data0;
+	profile_entries[cur_index].data[1] = data1;
+	profile_entries[cur_index].data[2] = data2;
+	profile_entries[cur_index].data[3] = data3;
+	profile_entries[cur_index].data[4] = data4;
+
+	/* If event is "leave API function", add current memory usage to the event
+	 * as data point 4.  This is used in timeline profiling to indicate how
+	 * much memory was used when leaving a function. */
+	if (event_id == (MALI_PROFILING_EVENT_TYPE_SINGLE | MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC)) {
+		profile_entries[cur_index].data[4] = _mali_ukk_report_memory_usage();
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_stop(u32 *count)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RUNNING != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	/* go into return state (user to retreive events), no more events will be added after this */
+	prof_state = MALI_PROFILING_STATE_RETURN;
+
+	unregister_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+
+	tracepoint_synchronize_unregister();
+
+	*count = _mali_osk_atomic_read(&profile_insert_index);
+	if (*count > profile_mask) *count = profile_mask;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 _mali_internal_profiling_get_count(void)
+{
+	u32 retval = 0;
+
+	_mali_osk_mutex_wait(lock);
+	if (MALI_PROFILING_STATE_RETURN == prof_state) {
+		retval = _mali_osk_atomic_read(&profile_insert_index);
+		if (retval > profile_mask) retval = profile_mask;
+	}
+	_mali_osk_mutex_signal(lock);
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5])
+{
+	u32 raw_index = _mali_osk_atomic_read(&profile_insert_index);
+
+	_mali_osk_mutex_wait(lock);
+
+	if (index < profile_mask) {
+		if ((raw_index & ~profile_mask) != 0) {
+			index += raw_index;
+			index &= profile_mask;
+		}
+
+		if (prof_state != MALI_PROFILING_STATE_RETURN) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+		}
+
+		if (index >= raw_index) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		*timestamp = profile_entries[index].timestamp;
+		*event_id = profile_entries[index].event_id;
+		data[0] = profile_entries[index].data[0];
+		data[1] = profile_entries[index].data[1];
+		data[2] = profile_entries[index].data[2];
+		data[3] = profile_entries[index].data[3];
+		data[4] = profile_entries[index].data[4];
+	} else {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_clear(void)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RETURN != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	if (NULL != profile_entries) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool _mali_internal_profiling_is_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RUNNING ? MALI_TRUE : MALI_FALSE;
+}
+
+mali_bool _mali_internal_profiling_have_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RETURN ? MALI_TRUE : MALI_FALSE;
+}
diff --git a/utgard/r6p2/linux/mali_profiling_internal.h b/utgard/r6p2/linux/mali_profiling_internal.h
new file mode 100755
index 0000000..6e05ffd
--- /dev/null
+++ b/utgard/r6p2/linux/mali_profiling_internal.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_INTERNAL_H__
+#define __MALI_PROFILING_INTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_osk.h"
+
+int _mali_internal_profiling_init(mali_bool auto_start);
+void _mali_internal_profiling_term(void);
+
+mali_bool _mali_internal_profiling_is_recording(void);
+mali_bool _mali_internal_profiling_have_recording(void);
+_mali_osk_errcode_t _mali_internal_profiling_clear(void);
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+u32 _mali_internal_profiling_get_count(void);
+int _mali_internal_profiling_stop(u32 *count);
+int _mali_internal_profiling_start(u32 *limit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_PROFILING_INTERNAL_H__ */
diff --git a/utgard/r6p2/linux/mali_sync.c b/utgard/r6p2/linux/mali_sync.c
new file mode 100755
index 0000000..b199b32
--- /dev/null
+++ b/utgard/r6p2/linux/mali_sync.c
@@ -0,0 +1,457 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_sync.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_timeline.h"
+#include "mali_executor.h"
+
+#include <linux/file.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <asm-generic/fcntl.h>
+
+struct mali_sync_pt {
+	struct sync_pt         sync_pt;
+	struct mali_sync_flag *flag;
+	struct sync_timeline *sync_tl;  /**< Sync timeline this pt is connected to. */
+};
+
+/**
+ * The sync flag is used to connect sync fences to the Mali Timeline system.  Sync fences can be
+ * created from a sync flag, and when the flag is signaled, the sync fences will also be signaled.
+ */
+struct mali_sync_flag {
+	struct sync_timeline *sync_tl;  /**< Sync timeline this flag is connected to. */
+	u32                   point;    /**< Point on timeline. */
+	int                   status;   /**< 0 if unsignaled, 1 if signaled without error or negative if signaled with error. */
+	struct kref           refcount; /**< Reference count. */
+};
+
+/**
+ * Mali sync timeline is used to connect mali timeline to sync_timeline.
+ * When fence timeout can print more detailed mali timeline system info.
+ */
+struct mali_sync_timeline_container {
+	struct sync_timeline sync_timeline;
+	struct mali_timeline *timeline;
+};
+
+MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, sync_pt);
+}
+
+MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct sync_timeline *sync_tl)
+{
+	return container_of(sync_tl, struct mali_sync_timeline_container, sync_timeline);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt, *new_mpt;
+	struct sync_pt *new_pt;
+
+	if (NULL == pt) {
+		dump_stack();
+		return NULL;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	new_pt = sync_pt_create(mpt->sync_tl, sizeof(struct mali_sync_pt));
+	if (NULL == new_pt) return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+
+	mali_sync_flag_get(mpt->flag);
+	new_mpt->flag = mpt->flag;
+	new_mpt->sync_tl = mpt->sync_tl;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	MALI_DEBUG_ASSERT_POINTER(mpt->flag);
+
+	return mpt->flag->status;
+}
+
+static int timeline_compare(struct sync_pt *pta, struct sync_pt *ptb)
+{
+	struct mali_sync_pt *mpta;
+	struct mali_sync_pt *mptb;
+	u32 a, b;
+
+	MALI_DEBUG_ASSERT_POINTER(pta);
+	MALI_DEBUG_ASSERT_POINTER(ptb);
+	mpta = to_mali_sync_pt(pta);
+	mptb = to_mali_sync_pt(ptb);
+
+	MALI_DEBUG_ASSERT_POINTER(mpta->flag);
+	MALI_DEBUG_ASSERT_POINTER(mptb->flag);
+
+	a = mpta->flag->point;
+	b = mptb->flag->point;
+
+	if (a == b) return 0;
+
+	return ((b - a) < (a - b) ? -1 : 1);
+}
+
+static void timeline_free_pt(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	mali_sync_flag_put(mpt->flag);
+}
+
+static void timeline_release(struct sync_timeline *sync_timeline)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_timeline);
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_timeline);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	/* always signaled timeline didn't have mali container */
+	if (mali_tl) {
+		if (NULL != mali_tl->spinlock) {
+			mali_spinlock_reentrant_term(mali_tl->spinlock);
+		}
+		_mali_osk_free(mali_tl);
+	}
+
+	module_put(THIS_MODULE);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+static void timeline_print_pt(struct seq_file *s, struct sync_pt *sync_pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(s);
+	MALI_DEBUG_ASSERT_POINTER(sync_pt);
+
+	mpt = to_mali_sync_pt(sync_pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		seq_printf(s, "%u", mpt->flag->point);
+	} else {
+		seq_printf(s, "uninitialized");
+	}
+}
+
+static void timeline_print_obj(struct seq_file *s, struct sync_timeline *sync_tl)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	if (NULL != mali_tl) {
+		seq_printf(s, "oldest (%u) ", mali_tl->point_oldest);
+		seq_printf(s, "next (%u)", mali_tl->point_next);
+		seq_printf(s, "\n");
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+		{
+			u32 tid = _mali_osk_get_tid();
+			struct mali_timeline_system *system = mali_tl->system;
+
+			mali_spinlock_reentrant_wait(mali_tl->spinlock, tid);
+			if (!mali_tl->destroyed) {
+				mali_spinlock_reentrant_wait(system->spinlock, tid);
+				mali_timeline_debug_print_timeline(mali_tl, s);
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+			}
+			mali_spinlock_reentrant_signal(mali_tl->spinlock, tid);
+
+			/* dump job queue status and group running status */
+			mali_executor_status_dump();
+		}
+#endif
+	}
+}
+#else
+static void timeline_pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(str);
+	MALI_DEBUG_ASSERT_POINTER(pt);
+
+	mpt = to_mali_sync_pt(pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		_mali_osk_snprintf(str, size, "%u", mpt->flag->point);
+	} else {
+		_mali_osk_snprintf(str, size, "uninitialized");
+	}
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str, int size)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	mali_sync_tl = to_mali_sync_tl_container(timeline);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	if (NULL != mali_tl) {
+		_mali_osk_snprintf(str, size, "oldest (%u) ", mali_tl->point_oldest);
+		_mali_osk_snprintf(str, size, "next (%u)", mali_tl->point_next);
+		_mali_osk_snprintf(str, size, "\n");
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+		{
+			u32 tid = _mali_osk_get_tid();
+			struct mali_timeline_system *system = mali_tl->system;
+
+			mali_spinlock_reentrant_wait(mali_tl->spinlock, tid);
+			if (!mali_tl->destroyed) {
+				mali_spinlock_reentrant_wait(system->spinlock, tid);
+				mali_timeline_debug_direct_print_timeline(mali_tl);
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+			}
+			mali_spinlock_reentrant_signal(mali_tl->spinlock, tid);
+
+			/* dump job queue status and group running status */
+			mali_executor_status_dump();
+		}
+#endif
+	}
+}
+#endif
+
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name    = "Mali",
+	.dup            = timeline_dup,
+	.has_signaled   = timeline_has_signaled,
+	.compare        = timeline_compare,
+	.free_pt        = timeline_free_pt,
+	.release_obj    = timeline_release,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	.print_pt       = timeline_print_pt,
+	.print_obj      = timeline_print_obj,
+#else
+	.pt_value_str = timeline_pt_value_str,
+	.timeline_value_str = timeline_value_str,
+#endif
+};
+
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name)
+{
+	struct sync_timeline *sync_tl;
+	struct mali_sync_timeline_container *mali_sync_tl;
+
+	sync_tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name);
+	if (NULL == sync_tl) return NULL;
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	mali_sync_tl->timeline = timeline;
+
+	/* Grab a reference on the module to ensure the callbacks are present
+	 * as long some timeline exists. The reference is released when the
+	 * timeline is freed.
+	 * Since this function is called from a ioctl on an open file we know
+	 * we already have a reference, so using __module_get is safe. */
+	__module_get(THIS_MODULE);
+
+	return sync_tl;
+}
+
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence)
+{
+	s32 fd = -1;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
+	fd = get_unused_fd();
+#else
+	fd = get_unused_fd_flags(O_CLOEXEC);
+#endif
+
+	if (fd < 0) {
+		sync_fence_put(sync_fence);
+		return -1;
+	}
+	sync_fence_install(sync_fence, fd);
+
+	return fd;
+}
+
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2)
+{
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+
+	sync_fence = sync_fence_merge("mali_merge_fence", sync_fence1, sync_fence2);
+	sync_fence_put(sync_fence1);
+	sync_fence_put(sync_fence2);
+
+	return sync_fence;
+}
+
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl)
+{
+	struct mali_sync_flag *flag;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	flag = mali_sync_flag_create(sync_tl, 0);
+	if (NULL == flag) return NULL;
+
+	sync_fence = mali_sync_flag_create_fence(flag);
+
+	mali_sync_flag_signal(flag, 0);
+	mali_sync_flag_put(flag);
+
+	return sync_fence;
+}
+
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, mali_timeline_point point)
+{
+	struct mali_sync_flag *flag;
+
+	if (NULL == sync_tl) return NULL;
+
+	flag = _mali_osk_calloc(1, sizeof(*flag));
+	if (NULL == flag) return NULL;
+
+	flag->sync_tl = sync_tl;
+	flag->point = point;
+
+	flag->status = 0;
+	kref_init(&flag->refcount);
+
+	return flag;
+}
+
+void mali_sync_flag_get(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_get(&flag->refcount);
+}
+
+/**
+ * Free sync flag.
+ *
+ * @param ref kref object embedded in sync flag that should be freed.
+ */
+static void mali_sync_flag_free(struct kref *ref)
+{
+	struct mali_sync_flag *flag;
+
+	MALI_DEBUG_ASSERT_POINTER(ref);
+	flag = container_of(ref, struct mali_sync_flag, refcount);
+
+	_mali_osk_free(flag);
+}
+
+void mali_sync_flag_put(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_put(&flag->refcount, mali_sync_flag_free);
+}
+
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+
+	MALI_DEBUG_ASSERT(0 == flag->status);
+	flag->status = (0 > error) ? error : 1;
+
+	_mali_osk_write_mem_barrier();
+
+	sync_timeline_signal(flag->sync_tl);
+}
+
+/**
+ * Create a sync point attached to given sync flag.
+ *
+ * @note Sync points must be triggered in *exactly* the same order as they are created.
+ *
+ * @param flag Sync flag.
+ * @return New sync point if successful, NULL if not.
+ */
+static struct sync_pt *mali_sync_flag_create_pt(struct mali_sync_flag *flag)
+{
+	struct sync_pt *pt;
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	pt = sync_pt_create(flag->sync_tl, sizeof(struct mali_sync_pt));
+	if (NULL == pt) return NULL;
+
+	mali_sync_flag_get(flag);
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->flag = flag;
+	mpt->sync_tl = flag->sync_tl;
+
+	return pt;
+}
+
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag)
+{
+	struct sync_pt    *sync_pt;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	sync_pt = mali_sync_flag_create_pt(flag);
+	if (NULL == sync_pt) return NULL;
+
+	sync_fence = sync_fence_create("mali_flag_fence", sync_pt);
+	if (NULL == sync_fence) {
+		sync_pt_free(sync_pt);
+		return NULL;
+	}
+
+	return sync_fence;
+}
diff --git a/utgard/r6p2/linux/mali_sync.h b/utgard/r6p2/linux/mali_sync.h
new file mode 100755
index 0000000..79efb8e
--- /dev/null
+++ b/utgard/r6p2/linux/mali_sync.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_sync.h
+ *
+ * Mali interface for Linux sync objects.
+ */
+
+#ifndef _MALI_SYNC_H_
+#define _MALI_SYNC_H_
+
+#if defined(CONFIG_SYNC)
+
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+#include <linux/sync.h>
+#else
+#include <sync.h>
+#endif
+
+
+#include "mali_osk.h"
+
+struct mali_sync_flag;
+struct mali_timeline;
+
+/**
+ * Create a sync timeline.
+ *
+ * @param name Name of the sync timeline.
+ * @return The new sync timeline if successful, NULL if not.
+ */
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name);
+
+/**
+ * Creates a file descriptor representing the sync fence.  Will release sync fence if allocation of
+ * file descriptor fails.
+ *
+ * @param sync_fence Sync fence.
+ * @return File descriptor representing sync fence if successful, or -1 if not.
+ */
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence);
+
+/**
+ * Merges two sync fences.  Both input sync fences will be released.
+ *
+ * @param sync_fence1 First sync fence.
+ * @param sync_fence2 Second sync fence.
+ * @return New sync fence that is the result of the merger if successful, or NULL if not.
+ */
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2);
+
+/**
+ * Create a sync fence that is already signaled.
+ *
+ * @param tl Sync timeline.
+ * @return New signaled sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl);
+
+/**
+ * Create a sync flag.
+ *
+ * @param sync_tl Sync timeline.
+ * @param point Point on Mali timeline.
+ * @return New sync flag if successful, NULL if not.
+ */
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, u32 point);
+
+/**
+ * Grab sync flag reference.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_get(struct mali_sync_flag *flag);
+
+/**
+ * Release sync flag reference.  If this was the last reference, the sync flag will be freed.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_put(struct mali_sync_flag *flag);
+
+/**
+ * Signal sync flag.  All sync fences created from this flag will be signaled.
+ *
+ * @param flag Sync flag to signal.
+ * @param error Negative error code, or 0 if no error.
+ */
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error);
+
+/**
+ * Create a sync fence attached to given sync flag.
+ *
+ * @param flag Sync flag.
+ * @return New sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag);
+
+#endif /* defined(CONFIG_SYNC) */
+
+#endif /* _MALI_SYNC_H_ */
diff --git a/utgard/r6p2/linux/mali_uk_types.h b/utgard/r6p2/linux/mali_uk_types.h
new file mode 100755
index 0000000..00ba28b
--- /dev/null
+++ b/utgard/r6p2/linux/mali_uk_types.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_UK_TYPES_H__
+#define __MALI_UK_TYPES_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_uk_types.h>
+
+#endif /* __MALI_UK_TYPES_H__ */
diff --git a/utgard/r6p2/linux/mali_ukk_core.c b/utgard/r6p2/linux/mali_ukk_core.c
new file mode 100755
index 0000000..a6c43b7
--- /dev/null
+++ b/utgard/r6p2/linux/mali_ukk_core.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/slab.h>     /* memort allocation functions */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs)
+{
+	_mali_uk_get_api_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+	if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT;
+
+	return 0;
+}
+
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs)
+{
+	_mali_uk_get_api_version_v2_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version_v2(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+	if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT;
+
+	return 0;
+}
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs)
+{
+	_mali_uk_wait_for_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_wait_for_notification(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS != kargs.type) {
+		kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+		if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_wait_for_notification_s))) return -EFAULT;
+	} else {
+		if (0 != put_user(kargs.type, &uargs->type)) return -EFAULT;
+	}
+
+	return 0;
+}
+
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs)
+{
+	_mali_uk_post_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	if (0 != get_user(kargs.type, &uargs->type)) {
+		return -EFAULT;
+	}
+
+	err = _mali_ukk_post_notification(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs)
+{
+	_mali_uk_get_user_settings_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_user_settings(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = 0; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_user_settings_s))) return -EFAULT;
+
+	return 0;
+}
+
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs)
+{
+	_mali_uk_request_high_priority_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_request_high_priority(&kargs);
+
+	kargs.ctx = 0;
+
+	return map_errcode(err);
+}
+
+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs)
+{
+	_mali_uk_pending_submit_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_pending_submit(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
diff --git a/utgard/r6p2/linux/mali_ukk_gp.c b/utgard/r6p2/linux/mali_ukk_gp.c
new file mode 100755
index 0000000..d7c5d2f
--- /dev/null
+++ b/utgard/r6p2/linux/mali_ukk_gp.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_gp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs)
+{
+	_mali_uk_get_gp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err =  _mali_ukk_get_gp_core_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	return 0;
+}
+
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs)
+{
+	_mali_uk_gp_suspend_response_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_gp_suspend_response_s))) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_gp_suspend_response(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.cookie, &uargs->cookie)) return -EFAULT;
+
+	/* no known transactions to roll-back */
+	return 0;
+}
+
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_gp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_gp_number_of_cores(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (0 != put_user(kargs.number_of_cores, &uargs->number_of_cores)) return -EFAULT;
+
+	return 0;
+}
diff --git a/utgard/r6p2/linux/mali_ukk_mem.c b/utgard/r6p2/linux/mali_ukk_mem.c
new file mode 100755
index 0000000..3dfe345
--- /dev/null
+++ b/utgard/r6p2/linux/mali_ukk_mem.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs)
+{
+	_mali_uk_alloc_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_alloc_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_allocate(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs)
+{
+	_mali_uk_free_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_free_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_free(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.free_pages_nr, &uargs->free_pages_nr)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs)
+{
+	_mali_uk_bind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_bind_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_bind(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs)
+{
+	_mali_uk_unbind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_unbind_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_unbind(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+
+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs)
+{
+	_mali_uk_cow_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_cow(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs)
+{
+	_mali_uk_cow_modify_range_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_modify_range_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_cow_modify_range(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.change_pages_nr, &uargs->change_pages_nr)) {
+		return -EFAULT;
+	}
+	return 0;
+}
+
+
+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs)
+{
+	_mali_uk_mem_resize_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_resize_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_resize(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs)
+{
+	_mali_uk_mem_write_safe_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_write_safe_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	/* Check if we can access the buffers */
+	if (!access_ok(VERIFY_WRITE, kargs.dest, kargs.size)
+	    || !access_ok(VERIFY_READ, kargs.src, kargs.size)) {
+		return -EINVAL;
+	}
+
+	/* Check if size wraps */
+	if ((kargs.size + kargs.dest) <= kargs.dest
+	    || (kargs.size + kargs.src) <= kargs.src) {
+		return -EINVAL;
+	}
+
+	err = _mali_ukk_mem_write_safe(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.size, &uargs->size)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+
+
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs)
+{
+	_mali_uk_query_mmu_page_table_dump_size_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_query_mmu_page_table_dump_size(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.size, &uargs->size)) return -EFAULT;
+
+	return 0;
+}
+
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs)
+{
+	_mali_uk_dump_mmu_page_table_s kargs;
+	_mali_osk_errcode_t err;
+	void __user *user_buffer;
+	void *buffer = NULL;
+	int rc = -EFAULT;
+
+	/* validate input */
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	/* the session_data pointer was validated by caller */
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_dump_mmu_page_table_s)))
+		goto err_exit;
+
+	user_buffer = (void __user *)(uintptr_t)kargs.buffer;
+	if (!access_ok(VERIFY_WRITE, user_buffer, kargs.size))
+		goto err_exit;
+
+	/* allocate temporary buffer (kernel side) to store mmu page table info */
+	if (kargs.size <= 0)
+		return -EINVAL;
+	/* Allow at most 8MiB buffers, this is more than enough to dump a fully
+	 * populated page table. */
+	if (kargs.size > SZ_8M)
+		return -EINVAL;
+
+	buffer = (void *)(uintptr_t)_mali_osk_valloc(kargs.size);
+	if (NULL == buffer) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.buffer = (uintptr_t)buffer;
+	err = _mali_ukk_dump_mmu_page_table(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		rc = map_errcode(err);
+		goto err_exit;
+	}
+
+	/* copy mmu page table info back to user space and update pointers */
+	if (0 != copy_to_user(user_buffer, buffer, kargs.size))
+		goto err_exit;
+
+	kargs.register_writes = kargs.register_writes -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+	kargs.page_table_dump = kargs.page_table_dump -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+
+	if (0 != copy_to_user(uargs, &kargs, sizeof(kargs)))
+		goto err_exit;
+
+	rc = 0;
+
+err_exit:
+	if (buffer) _mali_osk_vfree(buffer);
+	return rc;
+}
+
+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+	_mali_uk_profiling_memory_usage_get_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_mem_usage_get(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
diff --git a/utgard/r6p2/linux/mali_ukk_pp.c b/utgard/r6p2/linux/mali_ukk_pp.c
new file mode 100755
index 0000000..2c59bc7
--- /dev/null
+++ b/utgard/r6p2/linux/mali_ukk_pp.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the jobs were started successfully, 0 is returned.  If there was an error, but the
+	 * jobs were started, we return -ENOENT.  For anything else returned, the jobs were not
+	 * started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_and_gp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_pp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_get_pp_number_of_cores(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_pp_number_of_cores_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs)
+{
+	_mali_uk_get_pp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_pp_core_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	return 0;
+}
+
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs)
+{
+	_mali_uk_pp_disable_wb_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_pp_disable_wb_s))) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	_mali_ukk_pp_job_disable_wb(&kargs);
+
+	return 0;
+}
diff --git a/utgard/r6p2/linux/mali_ukk_profiling.c b/utgard/r6p2/linux/mali_ukk_profiling.c
new file mode 100755
index 0000000..35f916d
--- /dev/null
+++ b/utgard/r6p2/linux/mali_ukk_profiling.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+#include <linux/slab.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs)
+{
+	_mali_uk_profiling_add_event_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_add_event_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_add_event(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs)
+{
+	_mali_uk_sw_counters_report_s kargs;
+	_mali_osk_errcode_t err;
+	u32 *counter_buffer;
+	u32 __user *counters;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_sw_counters_report_s))) {
+		return -EFAULT;
+	}
+
+	/* make sure that kargs.num_counters is [at least somewhat] sane */
+	if (kargs.num_counters > 10000) {
+		MALI_DEBUG_PRINT(1, ("User space attempted to allocate too many counters.\n"));
+		return -EINVAL;
+	}
+
+	counter_buffer = (u32 *)kmalloc(sizeof(u32) * kargs.num_counters, GFP_KERNEL);
+	if (NULL == counter_buffer) {
+		return -ENOMEM;
+	}
+
+	counters = (u32 *)(uintptr_t)kargs.counters;
+
+	if (0 != copy_from_user(counter_buffer, counters, sizeof(u32) * kargs.num_counters)) {
+		kfree(counter_buffer);
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.counters = (uintptr_t)counter_buffer;
+
+	err = _mali_ukk_sw_counters_report(&kargs);
+
+	kfree(counter_buffer);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs)
+{
+	_mali_uk_profiling_stream_fd_get_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_stream_fd_get(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs)
+{
+	_mali_uk_profiling_control_set_s kargs;
+	_mali_osk_errcode_t err;
+	u8 *kernel_control_data = NULL;
+	u8 *kernel_response_data = NULL;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.control_packet_size, &uargs->control_packet_size)) return -EFAULT;
+	if (0 != get_user(kargs.response_packet_size, &uargs->response_packet_size)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+
+
+	/* Sanity check about the size */
+	if (kargs.control_packet_size > PAGE_SIZE || kargs.response_packet_size > PAGE_SIZE)
+		return -EINVAL;
+
+	if (0 !=  kargs.control_packet_size) {
+
+		if (0 == kargs.response_packet_size)
+			return -EINVAL;
+
+		kernel_control_data = _mali_osk_calloc(1, kargs.control_packet_size);
+		if (NULL == kernel_control_data) {
+			return -ENOMEM;
+		}
+
+		kernel_response_data = _mali_osk_calloc(1, kargs.response_packet_size);
+		if (NULL == kernel_response_data) {
+			_mali_osk_free(kernel_control_data);
+			return -ENOMEM;
+		}
+
+		kargs.control_packet_data = (uintptr_t)kernel_control_data;
+		kargs.response_packet_data = (uintptr_t)kernel_response_data;
+
+		if (0 != copy_from_user((void *)(uintptr_t)kernel_control_data, (void *)(uintptr_t)uargs->control_packet_data, kargs.control_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		err = _mali_ukk_profiling_control_set(&kargs);
+		if (_MALI_OSK_ERR_OK != err) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return map_errcode(err);
+		}
+
+		if (0 != kargs.response_packet_size && 0 != copy_to_user(((void *)(uintptr_t)uargs->response_packet_data), ((void *)(uintptr_t)kargs.response_packet_data), kargs.response_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		if (0 != put_user(kargs.response_packet_size, &uargs->response_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		_mali_osk_free(kernel_control_data);
+		_mali_osk_free(kernel_response_data);
+	} else {
+
+		err = _mali_ukk_profiling_control_set(&kargs);
+		if (_MALI_OSK_ERR_OK != err) {
+			return map_errcode(err);
+		}
+
+	}
+	return 0;
+}
diff --git a/utgard/r6p2/linux/mali_ukk_soft_job.c b/utgard/r6p2/linux/mali_ukk_soft_job.c
new file mode 100755
index 0000000..beeb753
--- /dev/null
+++ b/utgard/r6p2/linux/mali_ukk_soft_job.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_soft_job.h"
+#include "mali_timeline.h"
+
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs)
+{
+	_mali_uk_soft_job_start_s kargs;
+	u32 type, point;
+	u64 user_job;
+	struct mali_timeline_fence fence;
+	struct mali_soft_job *job = NULL;
+	u32 __user *job_id_ptr = NULL;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session, -EINVAL);
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(kargs))) {
+		return -EFAULT;
+	}
+
+	type = kargs.type;
+	user_job = kargs.user_job;
+	job_id_ptr = (u32 __user *)(uintptr_t)kargs.job_id_ptr;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &kargs.fence);
+
+	if ((MALI_SOFT_JOB_TYPE_USER_SIGNALED != type) && (MALI_SOFT_JOB_TYPE_SELF_SIGNALED != type)) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid soft job type specified\n"));
+		return -EINVAL;
+	}
+
+	/* Create soft job. */
+	job = mali_soft_job_create(session->soft_job_system, (enum mali_soft_job_type)type, user_job);
+	if (unlikely(NULL == job)) {
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Write job id back to user space. */
+	if (0 != put_user(job->id, job_id_ptr)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to put job id"));
+		mali_soft_job_destroy(job);
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Start soft job. */
+	point = mali_soft_job_start(job, &fence);
+
+	if (0 != put_user(point, &uargs->point)) {
+		/* Let user space know that something failed after the job was started. */
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs)
+{
+	u32 job_id;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != get_user(job_id, &uargs->job_id)) return -EFAULT;
+
+	err = mali_soft_job_system_signal_job(session->soft_job_system, job_id);
+
+	return map_errcode(err);
+}
diff --git a/utgard/r6p2/linux/mali_ukk_timeline.c b/utgard/r6p2/linux/mali_ukk_timeline.c
new file mode 100755
index 0000000..f32d8b0
--- /dev/null
+++ b/utgard/r6p2/linux/mali_ukk_timeline.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_timeline.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs)
+{
+	u32 val;
+	mali_timeline_id timeline;
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != get_user(val, &uargs->timeline)) return -EFAULT;
+
+	if (MALI_UK_TIMELINE_MAX <= val) {
+		return -EINVAL;
+	}
+
+	timeline = (mali_timeline_id)val;
+
+	point = mali_timeline_system_get_latest_point(session->timeline_system, timeline);
+
+	if (0 != put_user(point, &uargs->point)) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs)
+{
+	u32 timeout, status;
+	mali_bool ret;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT;
+	if (0 != get_user(timeout, &uargs->timeout)) return -EFAULT;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+	ret = mali_timeline_fence_wait(session->timeline_system, &fence, timeout);
+	status = (MALI_TRUE == ret ? 1 : 0);
+
+	if (0 != put_user(status, &uargs->status)) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs)
+{
+	s32 sync_fd = -1;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT;
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+#if defined(CONFIG_SYNC)
+	sync_fd = mali_timeline_sync_fence_create(session->timeline_system, &fence);
+#else
+	sync_fd = -1;
+#endif /* defined(CONFIG_SYNC) */
+
+	if (0 != put_user(sync_fd, &uargs->sync_fd)) return -EFAULT;
+
+	return 0;
+}
diff --git a/utgard/r6p2/linux/mali_ukk_vsync.c b/utgard/r6p2/linux/mali_ukk_vsync.c
new file mode 100755
index 0000000..5b54fb9
--- /dev/null
+++ b/utgard/r6p2/linux/mali_ukk_vsync.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs)
+{
+	_mali_uk_vsync_event_report_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_vsync_event_report_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_vsync_event_report(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
diff --git a/utgard/r6p2/linux/mali_ukk_wrappers.h b/utgard/r6p2/linux/mali_ukk_wrappers.h
new file mode 100755
index 0000000..7e59346
--- /dev/null
+++ b/utgard/r6p2/linux/mali_ukk_wrappers.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk_wrappers.h
+ * Defines the wrapper functions for each user-kernel function
+ */
+
+#ifndef __MALI_UKK_WRAPPERS_H__
+#define __MALI_UKK_WRAPPERS_H__
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs);
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs);
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs);
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs);
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs);
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs);
+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs);
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs);
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs);
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs);
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs);
+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs);
+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs);
+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs);
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs);
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs);
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs);
+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs);
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs);
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs);
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs);
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs);
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs);
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs);
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs);
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs);
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs);
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs);
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs);
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs);
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs);
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs);
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs);
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs);
+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs);
+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs);
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs);
+
+
+int map_errcode(_mali_osk_errcode_t err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_WRAPPERS_H__ */
diff --git a/utgard/r6p2/readme.txt b/utgard/r6p2/readme.txt
new file mode 100755
index 0000000..6785ac9
--- /dev/null
+++ b/utgard/r6p2/readme.txt
@@ -0,0 +1,28 @@
+Building the Mali Device Driver for Linux
+-----------------------------------------
+
+Build the Mali Device Driver for Linux by running the following make command:
+
+KDIR=<kdir_path> USING_UMP=<ump_option> BUILD=<build_option> make
+
+where
+    kdir_path: Path to your Linux Kernel directory
+    ump_option: 1 = Enable UMP support(*)
+                0 = disable UMP support
+    build_option: debug = debug build of driver
+                  release = release build of driver
+
+(*)  For newer Linux Kernels, the Module.symvers file for the UMP device driver
+     must be available. The UMP_SYMVERS_FILE variable in the Makefile should
+     point to this file. This file is generated when the UMP driver is built.
+
+The result will be a mali.ko file, which can be loaded into the Linux kernel
+by using the insmod command.
+
+Use of UMP is not recommended. The dma-buf API in the Linux kernel has
+replaced UMP. The Mali Device Driver will be built with dma-buf support if the
+kernel config includes enabled dma-buf.
+
+The kernel needs to be provided with a platform_device struct for the Mali GPU
+device. See the mali_utgard.h header file for how to set up the Mali GPU
+resources.
diff --git a/utgard/r6p2/regs/mali_200_regs.h b/utgard/r6p2/regs/mali_200_regs.h
new file mode 100755
index 0000000..c904ad2
--- /dev/null
+++ b/utgard/r6p2/regs/mali_200_regs.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2010, 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALI200_REGS_H_
+#define _MALI200_REGS_H_
+
+/**
+ *  Enum for management register addresses.
+ */
+enum mali200_mgmt_reg {
+	MALI200_REG_ADDR_MGMT_VERSION                              = 0x1000,
+	MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR               = 0x1004,
+	MALI200_REG_ADDR_MGMT_STATUS                               = 0x1008,
+	MALI200_REG_ADDR_MGMT_CTRL_MGMT                            = 0x100c,
+
+	MALI200_REG_ADDR_MGMT_INT_RAWSTAT                          = 0x1020,
+	MALI200_REG_ADDR_MGMT_INT_CLEAR                            = 0x1024,
+	MALI200_REG_ADDR_MGMT_INT_MASK                             = 0x1028,
+	MALI200_REG_ADDR_MGMT_INT_STATUS                           = 0x102c,
+
+	MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS                     = 0x1050,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE                    = 0x1080,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC                       = 0x1084,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT                     = 0x1088,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE                     = 0x108c,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE                    = 0x10a0,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC                       = 0x10a4,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE                     = 0x10ac,
+
+	MALI200_REG_ADDR_MGMT_PERFMON_CONTR                        = 0x10b0,
+	MALI200_REG_ADDR_MGMT_PERFMON_BASE                         = 0x10b4,
+
+	MALI200_REG_SIZEOF_REGISTER_BANK                           = 0x10f0
+
+};
+
+#define MALI200_REG_VAL_PERF_CNT_ENABLE 1
+
+enum mali200_mgmt_ctrl_mgmt {
+	MALI200_REG_VAL_CTRL_MGMT_STOP_BUS         = (1 << 0),
+	MALI200_REG_VAL_CTRL_MGMT_FLUSH_CACHES     = (1 << 3),
+	MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET      = (1 << 5),
+	MALI200_REG_VAL_CTRL_MGMT_START_RENDERING  = (1 << 6),
+	MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET     = (1 << 7), /* Only valid for Mali-300 and later */
+};
+
+enum mali200_mgmt_irq {
+	MALI200_REG_VAL_IRQ_END_OF_FRAME          = (1 << 0),
+	MALI200_REG_VAL_IRQ_END_OF_TILE           = (1 << 1),
+	MALI200_REG_VAL_IRQ_HANG                  = (1 << 2),
+	MALI200_REG_VAL_IRQ_FORCE_HANG            = (1 << 3),
+	MALI200_REG_VAL_IRQ_BUS_ERROR             = (1 << 4),
+	MALI200_REG_VAL_IRQ_BUS_STOP              = (1 << 5),
+	MALI200_REG_VAL_IRQ_CNT_0_LIMIT           = (1 << 6),
+	MALI200_REG_VAL_IRQ_CNT_1_LIMIT           = (1 << 7),
+	MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR  = (1 << 8),
+	MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND = (1 << 9),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW  = (1 << 10),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW   = (1 << 11),
+	MALI400PP_REG_VAL_IRQ_RESET_COMPLETED       = (1 << 12),
+};
+
+#define MALI200_REG_VAL_IRQ_MASK_ALL  ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_END_OF_TILE                            |\
+				       MALI200_REG_VAL_IRQ_HANG                                   |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_BUS_STOP                               |\
+				       MALI200_REG_VAL_IRQ_CNT_0_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_CNT_1_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW                    |\
+				       MALI400PP_REG_VAL_IRQ_RESET_COMPLETED))
+
+#define MALI200_REG_VAL_IRQ_MASK_USED ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW))
+
+#define MALI200_REG_VAL_IRQ_MASK_NONE ((enum mali200_mgmt_irq)(0))
+
+enum mali200_mgmt_status {
+	MALI200_REG_VAL_STATUS_RENDERING_ACTIVE     = (1 << 0),
+	MALI200_REG_VAL_STATUS_BUS_STOPPED          = (1 << 4),
+};
+
+enum mali200_render_unit {
+	MALI200_REG_ADDR_FRAME = 0x0000,
+	MALI200_REG_ADDR_RSW   = 0x0004,
+	MALI200_REG_ADDR_STACK = 0x0030,
+	MALI200_REG_ADDR_STACK_SIZE = 0x0034,
+	MALI200_REG_ADDR_ORIGIN_OFFSET_X  = 0x0040
+};
+
+enum mali200_wb_unit {
+	MALI200_REG_ADDR_WB0 = 0x0100,
+	MALI200_REG_ADDR_WB1 = 0x0200,
+	MALI200_REG_ADDR_WB2 = 0x0300
+};
+
+enum mali200_wb_unit_regs {
+	MALI200_REG_ADDR_WB_SOURCE_SELECT = 0x0000,
+	MALI200_REG_ADDR_WB_SOURCE_ADDR   = 0x0004,
+};
+
+/* This should be in the top 16 bit of the version register of Mali PP */
+#define MALI200_PP_PRODUCT_ID 0xC807
+#define MALI300_PP_PRODUCT_ID 0xCE07
+#define MALI400_PP_PRODUCT_ID 0xCD07
+#define MALI450_PP_PRODUCT_ID 0xCF07
+#define MALI470_PP_PRODUCT_ID 0xCF08
+
+
+
+#endif /* _MALI200_REGS_H_ */
diff --git a/utgard/r6p2/regs/mali_gp_regs.h b/utgard/r6p2/regs/mali_gp_regs.h
new file mode 100755
index 0000000..435953d
--- /dev/null
+++ b/utgard/r6p2/regs/mali_gp_regs.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2010, 2012-2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALIGP2_CONROL_REGS_H_
+#define _MALIGP2_CONROL_REGS_H_
+
+/**
+ * These are the different geometry processor control registers.
+ * Their usage is to control and monitor the operation of the
+ * Vertex Shader and the Polygon List Builder in the geometry processor.
+ * Addresses are in 32-bit word relative sizes.
+ * @see [P0081] "Geometry Processor Data Structures" for details
+ */
+
+typedef enum {
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR           = 0x00,
+	MALIGP2_REG_ADDR_MGMT_VSCL_END_ADDR             = 0x04,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_START_ADDR         = 0x08,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_END_ADDR           = 0x0c,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR     = 0x10,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR       = 0x14,
+	MALIGP2_REG_ADDR_MGMT_CMD                       = 0x20,
+	MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT               = 0x24,
+	MALIGP2_REG_ADDR_MGMT_INT_CLEAR                 = 0x28,
+	MALIGP2_REG_ADDR_MGMT_INT_MASK                  = 0x2C,
+	MALIGP2_REG_ADDR_MGMT_INT_STAT                  = 0x30,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE         = 0x3C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE         = 0x40,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC            = 0x44,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC            = 0x48,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE          = 0x4C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE          = 0x50,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT          = 0x54,
+	MALIGP2_REG_ADDR_MGMT_STATUS                    = 0x68,
+	MALIGP2_REG_ADDR_MGMT_VERSION                   = 0x6C,
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR_READ      = 0x80,
+	MALIGP2_REG_ADDR_MGMT_PLBCL_START_ADDR_READ     = 0x84,
+	MALIGP2_CONTR_AXI_BUS_ERROR_STAT                = 0x94,
+	MALIGP2_REGISTER_ADDRESS_SPACE_SIZE             = 0x98,
+} maligp_reg_addr_mgmt_addr;
+
+#define MALIGP2_REG_VAL_PERF_CNT_ENABLE 1
+
+/**
+ * Commands to geometry processor.
+ *  @see MALIGP2_CTRL_REG_CMD
+ */
+typedef enum {
+	MALIGP2_REG_VAL_CMD_START_VS                    = (1 << 0),
+	MALIGP2_REG_VAL_CMD_START_PLBU                  = (1 << 1),
+	MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC   = (1 << 4),
+	MALIGP2_REG_VAL_CMD_RESET                               = (1 << 5),
+	MALIGP2_REG_VAL_CMD_FORCE_HANG                  = (1 << 6),
+	MALIGP2_REG_VAL_CMD_STOP_BUS                    = (1 << 9),
+	MALI400GP_REG_VAL_CMD_SOFT_RESET                = (1 << 10), /* only valid for Mali-300 and later */
+} mgp_contr_reg_val_cmd;
+
+
+/**  @defgroup MALIGP2_IRQ
+ * Interrupt status of geometry processor.
+ *  @see MALIGP2_CTRL_REG_INT_RAWSTAT, MALIGP2_REG_ADDR_MGMT_INT_CLEAR,
+ *       MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_ADDR_MGMT_INT_STAT
+ * @{
+ */
+#define MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      (1 << 0)
+#define MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    (1 << 1)
+#define MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     (1 << 2)
+#define MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          (1 << 3)
+#define MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        (1 << 4)
+#define MALIGP2_REG_VAL_IRQ_HANG                (1 << 5)
+#define MALIGP2_REG_VAL_IRQ_FORCE_HANG          (1 << 6)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    (1 << 7)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    (1 << 8)
+#define MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     (1 << 9)
+#define MALIGP2_REG_VAL_IRQ_SYNC_ERROR          (1 << 10)
+#define MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       (1 << 11)
+#define MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     (1 << 12)
+#define MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      (1 << 13)
+#define MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     (1 << 14)
+#define MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     (1 << 19)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW (1 << 20)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  (1 << 21)
+#define MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  (1 << 22)
+
+/* Mask defining all IRQs in Mali GP */
+#define MALIGP2_REG_VAL_IRQ_MASK_ALL \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        | \
+	 MALIGP2_REG_VAL_IRQ_HANG                | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining the IRQs in Mali GP which we use */
+#define MALIGP2_REG_VAL_IRQ_MASK_USED \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining non IRQs on MaliGP2*/
+#define MALIGP2_REG_VAL_IRQ_MASK_NONE 0
+
+/** }@ defgroup MALIGP2_IRQ*/
+
+/** @defgroup MALIGP2_STATUS
+ * The different Status values to the geometry processor.
+ *  @see MALIGP2_CTRL_REG_STATUS
+ * @{
+ */
+#define MALIGP2_REG_VAL_STATUS_VS_ACTIVE         0x0002
+#define MALIGP2_REG_VAL_STATUS_BUS_STOPPED       0x0004
+#define MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE       0x0008
+#define MALIGP2_REG_VAL_STATUS_BUS_ERROR         0x0040
+#define MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR   0x0100
+/** }@ defgroup MALIGP2_STATUS*/
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ACTIVE (\
+		MALIGP2_REG_VAL_STATUS_VS_ACTIVE|\
+		MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE)
+
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ERROR (\
+		MALIGP2_REG_VAL_STATUS_BUS_ERROR |\
+		MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR )
+
+/* This should be in the top 16 bit of the version register of gp.*/
+#define MALI200_GP_PRODUCT_ID 0xA07
+#define MALI300_GP_PRODUCT_ID 0xC07
+#define MALI400_GP_PRODUCT_ID 0xB07
+#define MALI450_GP_PRODUCT_ID 0xD07
+
+/**
+ * The different sources for instrumented on the geometry processor.
+ *  @see MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC
+ */
+
+enum MALIGP2_cont_reg_perf_cnt_src {
+	MALIGP2_REG_VAL_PERF_CNT1_SRC_NUMBER_OF_VERTICES_PROCESSED = 0x0a,
+};
+
+#endif
diff --git a/utgard/r6p2/timestamp-arm11-cc/mali_timestamp.c b/utgard/r6p2/timestamp-arm11-cc/mali_timestamp.c
new file mode 100755
index 0000000..f7f7baf
--- /dev/null
+++ b/utgard/r6p2/timestamp-arm11-cc/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/utgard/r6p2/timestamp-arm11-cc/mali_timestamp.h b/utgard/r6p2/timestamp-arm11-cc/mali_timestamp.h
new file mode 100755
index 0000000..757f643
--- /dev/null
+++ b/utgard/r6p2/timestamp-arm11-cc/mali_timestamp.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	/*
+	 * reset counters and overflow flags
+	 */
+
+	u32 mask = (1 << 0) | /* enable all three counters */
+		   (0 << 1) | /* reset both Count Registers to 0x0 */
+		   (1 << 2) | /* reset the Cycle Counter Register to 0x0 */
+		   (0 << 3) | /* 1 = Cycle Counter Register counts every 64th processor clock cycle */
+		   (0 << 4) | /* Count Register 0 interrupt enable */
+		   (0 << 5) | /* Count Register 1 interrupt enable */
+		   (0 << 6) | /* Cycle Counter interrupt enable */
+		   (0 << 8) | /* Count Register 0 overflow flag (clear or write, flag on read) */
+		   (0 << 9) | /* Count Register 1 overflow flag (clear or write, flag on read) */
+		   (1 << 10); /* Cycle Counter Register overflow flag (clear or write, flag on read) */
+
+	__asm__ __volatile__("MCR    p15, 0, %0, c15, c12, 0" : : "r"(mask));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	u32 result;
+
+	/* this is for the clock cycles */
+	__asm__ __volatile__("MRC    p15, 0, %0, c15, c12, 1" : "=r"(result));
+
+	return (u64)result;
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/utgard/r6p2/timestamp-default/mali_timestamp.c b/utgard/r6p2/timestamp-default/mali_timestamp.c
new file mode 100755
index 0000000..f7f7baf
--- /dev/null
+++ b/utgard/r6p2/timestamp-default/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/utgard/r6p2/timestamp-default/mali_timestamp.h b/utgard/r6p2/timestamp-default/mali_timestamp.h
new file mode 100755
index 0000000..21700fe
--- /dev/null
+++ b/utgard/r6p2/timestamp-default/mali_timestamp.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	return _mali_osk_boot_time_get_ns();
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/utgard/r7p0/.version b/utgard/r7p0/.version
new file mode 100644
index 0000000..4175ceb
--- /dev/null
+++ b/utgard/r7p0/.version
@@ -0,0 +1 @@
+r7p0-00rel0
diff --git a/utgard/r7p0/Kbuild b/utgard/r7p0/Kbuild
new file mode 100644
index 0000000..49b4ea3
--- /dev/null
+++ b/utgard/r7p0/Kbuild
@@ -0,0 +1,258 @@
+#
+# Copyright (C) 2010-2011 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+# This file is called by the Linux build system.
+
+# set up defaults if not defined by the user
+include $(src)/platform/Kbuild.amlogic
+
+TIMESTAMP ?= default
+OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB ?= 16
+USING_GPU_UTILIZATION ?= 0
+PROFILING_SKIP_PP_JOBS ?= 0
+PROFILING_SKIP_PP_AND_GP_JOBS ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP ?= 0
+MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS ?= 0
+MALI_UPPER_HALF_SCHEDULING ?= 1
+MALI_ENABLE_CPU_CYCLES ?= 0
+
+# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases:
+# The ARM proprietary product will only include the license/proprietary directory
+# The GPL product will only include the license/gpl directory
+ifeq ($(wildcard $(src)/linux/license/gpl/*),)
+    ccflags-y += -I$(src)/linux/license/proprietary
+    ifeq ($(CONFIG_MALI400_PROFILING),y)
+        $(error Profiling is incompatible with non-GPL license)
+    endif
+    ifeq ($(CONFIG_PM_RUNTIME),y)
+        $(error Runtime PM is incompatible with non-GPL license)
+    endif
+    ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
+        $(error DMA-BUF is incompatible with non-GPL license)
+    endif
+    $(error Linux Device integration is incompatible with non-GPL license)
+else
+    ccflags-y += -I$(src)/linux/license/gpl
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+ifeq ($(MALI_PLATFORM_FILES),)
+ifeq ($(CONFIG_ARCH_EXYNOS4),y)
+EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+export MALI_PLATFORM=exynos4
+export MALI_PLATFORM_FILES_BUILDIN = $(notdir $(wildcard $(src)/platform/$(MALI_PLATFORM)/*.c))
+export MALI_PLATFORM_FILES_ADD_PREFIX = $(addprefix platform/$(MALI_PLATFORM)/,$(MALI_PLATFORM_FILES_BUILDIN))
+endif
+endif
+
+mali-y += \
+	linux/mali_osk_atomics.o \
+	linux/mali_osk_irq.o \
+	linux/mali_osk_wq.o \
+	linux/mali_osk_locks.o \
+	linux/mali_osk_wait_queue.o \
+	linux/mali_osk_low_level_mem.o \
+	linux/mali_osk_math.o \
+	linux/mali_osk_memory.o \
+	linux/mali_osk_misc.o \
+	linux/mali_osk_mali.o \
+	linux/mali_osk_notification.o \
+	linux/mali_osk_time.o \
+	linux/mali_osk_timers.o \
+	linux/mali_osk_bitmap.o
+
+mali-y += linux/mali_memory.o linux/mali_memory_os_alloc.o
+mali-y += linux/mali_memory_external.o
+mali-y += linux/mali_memory_block_alloc.o
+mali-y += linux/mali_memory_swap_alloc.o
+
+mali-y += \
+	linux/mali_memory_manager.o \
+	linux/mali_memory_virtual.o \
+	linux/mali_memory_util.o \
+	linux/mali_memory_cow.o \
+	linux/mali_memory_defer_bind.o
+
+mali-y += \
+	linux/mali_ukk_mem.o \
+	linux/mali_ukk_gp.o \
+	linux/mali_ukk_pp.o \
+	linux/mali_ukk_core.o \
+	linux/mali_ukk_soft_job.o \
+	linux/mali_ukk_timeline.o
+
+mali-$(CONFIG_MALI_DEVFREQ) += \
+	linux/mali_devfreq.o \
+	common/mali_pm_metrics.o
+
+# Source files which always are included in a build
+mali-y += \
+	common/mali_kernel_core.o \
+	linux/mali_kernel_linux.o \
+	common/mali_session.o \
+	linux/mali_device_pause_resume.o \
+	common/mali_kernel_vsync.o \
+	linux/mali_ukk_vsync.o \
+	linux/mali_kernel_sysfs.o \
+	common/mali_mmu.o \
+	common/mali_mmu_page_directory.o \
+	common/mali_mem_validation.o \
+	common/mali_hw_core.o \
+	common/mali_gp.o \
+	common/mali_pp.o \
+	common/mali_pp_job.o \
+	common/mali_gp_job.o \
+	common/mali_soft_job.o \
+	common/mali_scheduler.o \
+	common/mali_executor.o \
+	common/mali_group.o \
+	common/mali_dlbu.o \
+	common/mali_broadcast.o \
+	common/mali_pm.o \
+	common/mali_pmu.o \
+	common/mali_user_settings_db.o \
+	common/mali_kernel_utilization.o \
+	common/mali_control_timer.o \
+	common/mali_l2_cache.o \
+	common/mali_timeline.o \
+	common/mali_timeline_fence_wait.o \
+	common/mali_timeline_sync_fence.o \
+	common/mali_spinlock_reentrant.o \
+	common/mali_pm_domain.o \
+	linux/mali_osk_pm.o \
+	linux/mali_pmu_power_up_down.o \
+	__malidrv_build_info.o
+
+ifneq ($(wildcard $(src)/linux/mali_slp_global_lock.c),)
+	mali-y += linux/mali_slp_global_lock.o
+endif
+
+ifneq ($(MALI_PLATFORM_FILES),)
+	mali-y += $(MALI_PLATFORM_FILES:.c=.o)
+endif
+
+ifneq ($(MALI_PLATFORM_FILES_ADD_PREFIX),)
+	mali-y += $(MALI_PLATFORM_FILES_ADD_PREFIX:.c=.o)
+endif
+
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_ukk_profiling.o
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_osk_profiling.o
+
+mali-$(CONFIG_MALI400_INTERNAL_PROFILING) += linux/mali_profiling_internal.o timestamp-$(TIMESTAMP)/mali_timestamp.o
+ccflags-$(CONFIG_MALI400_INTERNAL_PROFILING) += -I$(src)/timestamp-$(TIMESTAMP)
+
+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_dma_buf.o
+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_secure.o
+mali-$(CONFIG_SYNC) += linux/mali_sync.o
+mali-$(CONFIG_MALI_DMA_BUF_FENCE) += linux/mali_dma_fence.o
+ccflags-$(CONFIG_SYNC) += -Idrivers/staging/android
+
+mali-$(CONFIG_MALI400_UMP) += linux/mali_memory_ump.o
+
+mali-$(CONFIG_MALI_DVFS) += common/mali_dvfs_policy.o
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI400) := mali.o
+
+ccflags-y += $(EXTRA_DEFINES)
+
+# Set up our defines, which will be passed to gcc
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP)
+ccflags-y += -DMALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED=$(MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED)
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS)
+ifdef CONFIG_MALI400_DEBUG
+ccflags-y += -DMALI_STATE_TRACKING=1
+else
+ccflags-y += -DMALI_STATE_TRACKING=0
+endif
+ccflags-y += -DMALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+ccflags-y += -DUSING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+ccflags-y += -DMALI_ENABLE_CPU_CYCLES=$(MALI_ENABLE_CPU_CYCLES)
+
+ifeq ($(MALI_UPPER_HALF_SCHEDULING),1)
+	ccflags-y += -DMALI_UPPER_HALF_SCHEDULING
+endif
+
+#build-in include path is different
+ifeq ($(MALI_PLATFORM_FILES),)
+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../ump/include/
+else
+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../../ump/include/ump
+endif
+ccflags-$(CONFIG_MALI400_DEBUG) += -DDEBUG
+
+# Use our defines when compiling
+ccflags-y += -I$(src) -I$(src)/include -I$(src)/common -I$(src)/linux -I$(src)/platform
+
+# Get subversion revision number, fall back to only ${MALI_RELEASE_NAME} if no svn info is available
+MALI_RELEASE_NAME=$(shell cat $(src)/.version 2> /dev/null)
+
+SVN_INFO = (cd $(src); svn info 2>/dev/null)
+
+ifneq ($(shell $(SVN_INFO) 2>/dev/null),)
+# SVN detected
+SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null)
+DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV)
+CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-)
+CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-)
+REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-)
+
+else # SVN
+GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null)
+ifneq ($(GIT_REV),)
+# Git detected
+DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV)
+CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci")
+CHANGED_REVISION := $(GIT_REV)
+REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null)
+
+else # Git
+# No Git or SVN detected
+DRIVER_REV := $(MALI_RELEASE_NAME)
+CHANGE_DATE := $(MALI_RELEASE_NAME)
+CHANGED_REVISION := $(MALI_RELEASE_NAME)
+endif
+endif
+
+ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\"
+
+VERSION_STRINGS :=
+VERSION_STRINGS += API_VERSION=$(shell cd $(src); grep "\#define _MALI_API_VERSION" $(FILES_PREFIX)include/linux/mali/mali_utgard_uk_types.h | cut -d' ' -f 3 )
+VERSION_STRINGS += REPO_URL=$(REPO_URL)
+VERSION_STRINGS += REVISION=$(DRIVER_REV)
+VERSION_STRINGS += CHANGED_REVISION=$(CHANGED_REVISION)
+VERSION_STRINGS += CHANGE_DATE=$(CHANGE_DATE)
+VERSION_STRINGS += BUILD_DATE=$(shell date)
+ifdef CONFIG_MALI400_DEBUG
+VERSION_STRINGS += BUILD=debug
+else
+VERSION_STRINGS += BUILD=release
+endif
+VERSION_STRINGS += TARGET_PLATFORM=$(TARGET_PLATFORM)
+VERSION_STRINGS += MALI_PLATFORM=$(MALI_PLATFORM)
+VERSION_STRINGS += KDIR=$(KDIR)
+VERSION_STRINGS += OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+VERSION_STRINGS += USING_UMP=$(CONFIG_MALI400_UMP)
+VERSION_STRINGS += USING_PROFILING=$(CONFIG_MALI400_PROFILING)
+VERSION_STRINGS += USING_INTERNAL_PROFILING=$(CONFIG_MALI400_INTERNAL_PROFILING)
+VERSION_STRINGS += USING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+VERSION_STRINGS += USING_DVFS=$(CONFIG_MALI_DVFS)
+VERSION_STRINGS += USING_DMA_BUF_FENCE = $(CONFIG_MALI_DMA_BUF_FENCE)
+VERSION_STRINGS += MALI_UPPER_HALF_SCHEDULING=$(MALI_UPPER_HALF_SCHEDULING)
+
+# Create file with Mali driver configuration
+$(src)/__malidrv_build_info.c:
+	@echo 'const char *__malidrv_build_info(void) { return "malidrv: $(VERSION_STRINGS)";}' > $(src)/__malidrv_build_info.c
diff --git a/utgard/r7p0/Kconfig b/utgard/r7p0/Kconfig
new file mode 100644
index 0000000..fbf08de
--- /dev/null
+++ b/utgard/r7p0/Kconfig
@@ -0,0 +1,136 @@
+menu "Mali GPU OpenGL device driver"
+config MALI400
+	tristate "Mali-300/400/450 support"
+	depends on ARM || ARM64
+	default m
+	select DMA_SHARED_BUFFER
+	---help---
+	  This enables support for the ARM Mali-300, Mali-400, and Mali-450
+	  GPUs.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called mali.
+
+config MALI470
+	bool "Enable Mali-470 support"
+	depends on MALI400
+	---help---
+	  This enables support for Mali-470 specific features.
+
+config MALI400_DEBUG
+	bool "Enable debug in Mali driver"
+	depends on MALI400
+	default n
+	---help---
+	  This enabled extra debug checks and messages in the Mali driver.
+
+config MALI400_PROFILING_EXTRA_SUPPORT
+	bool "Select other items in kernel to support PROFILING."
+	depends on MALI400_PROFILING
+	select PROFILING
+	select FTRACE
+	select PERF_EVENTS
+	select ENABLE_DEFAULT_TRACERS
+	select DEBUG_MUTEXES
+	select HIGH_RES_TIMERS
+	select HW_PERF_EVENTS
+	select CPU_FREQ
+	select MALI400_DEBUG
+
+config MALI400_PROFILING
+	bool "Enable Mali profiling"
+	depends on MALI400
+	select TRACEPOINTS
+	default n
+	---help---
+	  This enables gator profiling of Mali GPU events.
+
+config MALI400_INTERNAL_PROFILING
+	bool "Enable internal Mali profiling API"
+	depends on MALI400_PROFILING
+	default n
+	---help---
+	  This enables the internal legacy Mali profiling API.
+
+config MALI400_UMP
+	bool "Enable UMP support"
+	depends on MALI400
+	---help---
+	  This enables support for the UMP memory sharing API in the Mali driver.
+
+config MALI_DVFS
+	bool "Enable Mali dynamically frequency change"
+	depends on MALI400 && !MALI_DEVFREQ
+	default n
+	---help---
+	  This enables support for dynamic change frequency of Mali with the goal of lowering power consumption.
+
+config MALI_DMA_BUF_MAP_ON_ATTACH
+	bool "Map dma-buf attachments on attach"
+	depends on MALI400 && DMA_SHARED_BUFFER
+	default y
+	---help---
+	  This makes the Mali driver map dma-buf attachments after doing
+	  attach. If this is not set the dma-buf attachments will be mapped for
+	  every time the GPU need to access the buffer.
+
+	  Mapping for each access can cause lower performance.
+
+config MALI_SHARED_INTERRUPTS
+	bool "Support for shared interrupts"
+	depends on MALI400
+	default n
+	---help---
+	  Adds functionality required to properly support shared interrupts.  Without this support,
+	  the device driver will fail during insmod if it detects shared interrupts.  This also
+	  works when the GPU is not using shared interrupts, but might have a slight performance
+	  impact.
+
+if ARCH_MESON6
+config	MESON6_GPU_EXTRA
+	bool "M6 fix"
+	depends on MALI400
+	default y
+	select MALI_SHARED_INTERRUPTS
+endif
+
+config MALI_PMU_PARALLEL_POWER_UP
+	bool "Power up Mali PMU domains in parallel"
+	depends on MALI400
+	default n
+	---help---
+	  This makes the Mali driver power up all PMU power domains in parallel, instead of
+	  powering up domains one by one, with a slight delay in between. Powering on all power
+	  domains at the same time may cause peak currents higher than what some systems can handle.
+	  These systems must not enable this option.
+
+config MALI_DT
+	bool "Using device tree to initialize module"
+	depends on MALI400 && OF
+	default n
+	---help---
+	  This enable the Mali driver to choose the device tree path to get platform resoures
+	  and disable the old config method. Mali driver could run on the platform which the
+	  device tree is enabled in kernel and corresponding hardware description is implemented
+	  properly in device DTS file.
+
+config MALI_DEVFREQ
+	bool "Using devfreq to tuning frequency"
+	depends on MALI400 && PM_DEVFREQ
+	default n
+	---help---
+	Support devfreq for Mali.
+
+	Using the devfreq framework and, by default, the simpleondemand
+	governor, the frequency of Mali will be dynamically selected from the
+	available OPPs.
+
+config MALI_QUIET
+	bool "Make Mali driver very quiet"
+	depends on MALI400 && !MALI400_DEBUG
+	default n
+	---help---
+	  This forces the Mali driver to never print any messages.
+
+	  If unsure, say N.
+endmenu
diff --git a/utgard/r7p0/Makefile b/utgard/r7p0/Makefile
new file mode 100644
index 0000000..1eda0ab
--- /dev/null
+++ b/utgard/r7p0/Makefile
@@ -0,0 +1,207 @@
+#
+# Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+USE_UMPV2=0
+USING_PROFILING ?= 1
+USING_INTERNAL_PROFILING ?= 0
+USING_DVFS ?= 1
+USING_DMA_BUF_FENCE ?= 0
+MALI_HEATMAPS_ENABLED ?= 0
+MALI_DMA_BUF_MAP_ON_ATTACH ?= 1
+MALI_PMU_PARALLEL_POWER_UP ?= 0
+USING_DT ?= 0
+MALI_MEM_SWAP_TRACKING ?= 0
+USING_DEVFREQ ?= 0
+
+# The Makefile sets up "arch" based on the CONFIG, creates the version info
+# string and the __malidrv_build_info.c file, and then call the Linux build
+# system to actually build the driver. After that point the Kbuild file takes
+# over.
+
+# set up defaults if not defined by the user
+ARCH ?= arm
+
+OSKOS=linux
+FILES_PREFIX=
+
+check_cc2 = \
+	$(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \
+	then \
+		echo "$(2)"; \
+	else \
+		echo "$(3)"; \
+	fi ;)
+
+# This conditional makefile exports the global definition ARM_INTERNAL_BUILD. Customer releases will not include arm_internal.mak
+-include ../../../arm_internal.mak
+
+# Give warning of old config parameters are used
+ifneq ($(CONFIG),)
+$(warning "You have specified the CONFIG variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+ifneq ($(CPU),)
+$(warning "You have specified the CPU variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+# Include the mapping between TARGET_PLATFORM and KDIR + MALI_PLATFORM
+-include MALI_CONFIGURATION
+export KDIR ?= $(KDIR-$(TARGET_PLATFORM))
+export MALI_PLATFORM ?= $(MALI_PLATFORM-$(TARGET_PLATFORM))
+
+ifneq ($(TARGET_PLATFORM),)
+ifeq ($(MALI_PLATFORM),)
+$(error "Invalid TARGET_PLATFORM: $(TARGET_PLATFORM)")
+endif
+endif
+
+# validate lookup result
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(TARGET_PLATFORM))
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+ifeq ($(USING_UMP),1)
+export CONFIG_MALI400_UMP=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_UMP=1
+ifeq ($(USE_UMPV2),1)
+UMP_SYMVERS_FILE ?= ../umpv2/Module.symvers
+else
+UMP_SYMVERS_FILE ?= ../ump/Module.symvers
+endif
+KBUILD_EXTRA_SYMBOLS = $(realpath $(UMP_SYMVERS_FILE))
+$(warning $(KBUILD_EXTRA_SYMBOLS))
+endif
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+include $(KDIR)/.config
+
+ifeq ($(ARCH), arm)
+# when compiling for ARM we're cross compiling
+export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-)
+endif
+
+# report detected/selected settings
+ifdef ARM_INTERNAL_BUILD
+$(warning TARGET_PLATFORM $(TARGET_PLATFORM))
+$(warning KDIR $(KDIR))
+$(warning MALI_PLATFORM $(MALI_PLATFORM))
+endif
+
+# Set up build config
+export CONFIG_MALI400=m
+export CONFIG_MALI450=y
+export CONFIG_MALI470=y
+
+export EXTRA_DEFINES += -DCONFIG_MALI400=1
+export EXTRA_DEFINES += -DCONFIG_MALI450=1
+export EXTRA_DEFINES += -DCONFIG_MALI470=1
+
+ifneq ($(MALI_PLATFORM),)
+export EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+export MALI_PLATFORM_FILES = $(wildcard platform/$(MALI_PLATFORM)/*.c)
+endif
+
+ifeq ($(USING_PROFILING),1)
+ifeq ($(CONFIG_TRACEPOINTS),)
+$(warning CONFIG_TRACEPOINTS required for profiling)
+else
+export CONFIG_MALI400_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_PROFILING=1
+ifeq ($(USING_INTERNAL_PROFILING),1)
+export CONFIG_MALI400_INTERNAL_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_INTERNAL_PROFILING=1
+endif
+ifeq ($(MALI_HEATMAPS_ENABLED),1)
+export MALI_HEATMAPS_ENABLED=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_HEATMAPS_ENABLED
+endif
+endif
+endif
+
+ifeq ($(MALI_DMA_BUF_MAP_ON_ATTACH),1)
+export CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_MAP_ON_ATTACH
+endif
+
+ifeq ($(MALI_SHARED_INTERRUPTS),1)
+export CONFIG_MALI_SHARED_INTERRUPTS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_SHARED_INTERRUPTS
+endif
+
+ifeq ($(USING_DVFS),1)
+	xxxyyyy
+export CONFIG_MALI_DVFS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DVFS
+endif
+
+ifeq ($(USING_DMA_BUF_FENCE),1)
+export CONFIG_MALI_DMA_BUF_FENCE=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_FENCE
+endif
+
+ifeq ($(MALI_PMU_PARALLEL_POWER_UP),1)
+export CONFIG_MALI_PMU_PARALLEL_POWER_UP=y
+export EXTRA_DEFINES += -DCONFIG_MALI_PMU_PARALLEL_POWER_UP
+endif
+
+ifdef CONFIG_OF
+ifeq ($(USING_DT),1)
+export CONFIG_MALI_DT=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DT
+endif
+endif
+
+ifeq ($(USING_DEVFREQ), 1)
+ifdef CONFIG_PM_DEVFREQ
+export CONFIG_MALI_DEVFREQ=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DEVFREQ=1
+else
+$(warning "You want to support DEVFREQ but kernel didn't support DEVFREQ.")
+endif
+endif
+
+ifneq ($(BUILD),release)
+# Debug
+export CONFIG_MALI400_DEBUG=y
+else
+# Release
+ifeq ($(MALI_QUIET),1)
+export CONFIG_MALI_QUIET=y
+export EXTRA_DEFINES += -DCONFIG_MALI_QUIET
+endif
+endif
+
+ifeq ($(MALI_SKIP_JOBS),1)
+EXTRA_DEFINES += -DPROFILING_SKIP_PP_JOBS=1 -DPROFILING_SKIP_GP_JOBS=1
+endif
+
+ifeq ($(MALI_MEM_SWAP_TRACKING),1)
+EXTRA_DEFINES += -DMALI_MEM_SWAP_TRACKING=1
+endif
+
+all: $(UMP_SYMVERS_FILE)
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules
+	@rm $(FILES_PREFIX)__malidrv_build_info.c $(FILES_PREFIX)__malidrv_build_info.o
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+kernelrelease:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) kernelrelease
+
+export CONFIG KBUILD_EXTRA_SYMBOLS
diff --git a/utgard/r7p0/__malidrv_build_info.c b/utgard/r7p0/__malidrv_build_info.c
new file mode 100644
index 0000000..8e04468
--- /dev/null
+++ b/utgard/r7p0/__malidrv_build_info.c
@@ -0,0 +1 @@
+const char *__malidrv_build_info(void) { return "malidrv:  API_VERSION=900 REPO_URL= REVISION=r7p0-00rel0 CHANGED_REVISION=r7p0-00rel0 CHANGE_DATE=r7p0-00rel0 BUILD_DATE=Thu Dec 21 13:30:30 PST 2017 BUILD=release TARGET_PLATFORM=meson_bu MALI_PLATFORM= KDIR=/var/tmp/timyao/biggie/amlogic-sdk/gpu/../kernel OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=16 USING_UMP= USING_PROFILING= USING_INTERNAL_PROFILING= USING_GPU_UTILIZATION=1 USING_DVFS= USING_DMA_BUF_FENCE =  MALI_UPPER_HALF_SCHEDULING=1";}
diff --git a/utgard/r7p0/clean.sh b/utgard/r7p0/clean.sh
new file mode 100644
index 0000000..130fd73
--- /dev/null
+++ b/utgard/r7p0/clean.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# rm *.o, *.cmd, *~
+find . -name "*.o" -o -name "*.cmd" -o -name "*~" | xargs rm -rf
+rm	Module.symvers
+rm	mali.ko
+rm	mali.mod.c
+rm	modules.order
diff --git a/utgard/r7p0/common/mali_broadcast.c b/utgard/r7p0/common/mali_broadcast.c
new file mode 100644
index 0000000..5b6d1ec
--- /dev/null
+++ b/utgard/r7p0/common/mali_broadcast.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_broadcast.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#define MALI_BROADCAST_REGISTER_SIZE      0x1000
+#define MALI_BROADCAST_REG_BROADCAST_MASK    0x0
+#define MALI_BROADCAST_REG_INTERRUPT_MASK    0x4
+
+struct mali_bcast_unit {
+	struct mali_hw_core hw_core;
+	u32 current_mask;
+};
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_bcast_unit *bcast_unit = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+	MALI_DEBUG_PRINT(2, ("Broadcast: Creating Mali Broadcast unit: %s\n",
+			     resource->description));
+
+	bcast_unit = _mali_osk_malloc(sizeof(struct mali_bcast_unit));
+	if (bcast_unit == NULL) {
+		MALI_PRINT_ERROR(("Broadcast: Failed to allocate memory for Broadcast unit\n"));
+		return NULL;
+	}
+
+	if (_MALI_OSK_ERR_OK == mali_hw_core_create(&bcast_unit->hw_core,
+			resource, MALI_BROADCAST_REGISTER_SIZE)) {
+		bcast_unit->current_mask = 0;
+		mali_bcast_reset(bcast_unit);
+
+		return bcast_unit;
+	} else {
+		MALI_PRINT_ERROR(("Broadcast: Failed map broadcast unit\n"));
+	}
+
+	_mali_osk_free(bcast_unit);
+
+	return NULL;
+}
+
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	mali_hw_core_delete(&bcast_unit->hw_core);
+	_mali_osk_free(bcast_unit);
+}
+
+/* Call this function to add the @group's id into bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit,
+			  struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask |= (bcast_id); /* add PP core to broadcast */
+	broadcast_mask |= (bcast_id << 16); /* add MMU to broadcast */
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+/* Call this function to remove @group's id from bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit,
+			     struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask &= ~((bcast_id << 16) | bcast_id);
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4,
+			 ("Broadcast: setting mask 0x%08X + 0x%08X (reset)\n",
+			  bcast_unit->current_mask,
+			  bcast_unit->current_mask & 0xFF));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    bcast_unit->current_mask);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    bcast_unit->current_mask & 0xFF);
+}
+
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4, ("Broadcast: setting mask 0x0 + 0x0 (disable)\n"));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    0x0);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    0x0);
+}
diff --git a/utgard/r7p0/common/mali_broadcast.h b/utgard/r7p0/common/mali_broadcast.h
new file mode 100644
index 0000000..1516243
--- /dev/null
+++ b/utgard/r7p0/common/mali_broadcast.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BROADCAST_H__
+#define __MALI_BROADCAST_H__
+
+/*
+ *  Interface for the broadcast unit on Mali-450.
+ *
+ * - Represents up to 8 × (MMU + PP) pairs.
+ * - Supports dynamically changing which (MMU + PP) pairs receive the broadcast by
+ *   setting a mask.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_group.h"
+
+struct mali_bcast_unit;
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource);
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit);
+
+/* Add a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Remove a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Re-set cached mask. This needs to be called after having been suspended. */
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Disable broadcast unit
+ *
+ * mali_bcast_enable must be called to re-enable the unit. Cores may not be
+ * added or removed when the unit is disabled.
+ */
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Re-enable broadcast unit
+ *
+ * This resets the masks to include the cores present when mali_bcast_disable was called.
+ */
+MALI_STATIC_INLINE void mali_bcast_enable(struct mali_bcast_unit *bcast_unit)
+{
+	mali_bcast_reset(bcast_unit);
+}
+
+#endif /* __MALI_BROADCAST_H__ */
diff --git a/utgard/r7p0/common/mali_control_timer.c b/utgard/r7p0/common/mali_control_timer.c
new file mode 100644
index 0000000..a1f79fe
--- /dev/null
+++ b/utgard/r7p0/common/mali_control_timer.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+static u64 period_start_time;
+
+static _mali_osk_timer_t *mali_control_timer;
+static mali_bool timer_running = MALI_FALSE;
+
+static u32 mali_control_timeout = 1000;
+
+void mali_control_timer_add(u32 timeout)
+{
+	_mali_osk_timer_add(mali_control_timer, _mali_osk_time_mstoticks(timeout));
+}
+
+static void mali_control_timer_callback(void *arg)
+{
+	if (mali_utilization_enabled()) {
+		struct mali_gpu_utilization_data *util_data = NULL;
+		u64 time_period = 0;
+		mali_bool need_add_timer = MALI_TRUE;
+
+		/* Calculate gpu utilization */
+		util_data = mali_utilization_calculate(&period_start_time, &time_period, &need_add_timer);
+
+		if (util_data) {
+#if defined(CONFIG_MALI_DVFS)
+			mali_dvfs_policy_realize(util_data, time_period);
+#else
+			mali_utilization_platform_realize(util_data);
+#endif
+
+			if (need_add_timer == MALI_TRUE) {
+				mali_control_timer_add(mali_control_timeout);
+			}
+		}
+	}
+}
+
+/* Init a timer (for now it is used for GPU utilization and dvfs) */
+_mali_osk_errcode_t mali_control_timer_init(void)
+{
+	_mali_osk_device_data data;
+
+	if (_mali_osk_device_data_get(&data) == _MALI_OSK_ERR_OK) {
+		/* Use device specific settings (if defined) */
+		if (data.control_interval != 0) {
+			mali_control_timeout = data.control_interval;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Timer: %u\n", mali_control_timeout));
+		}
+	}
+
+	mali_control_timer = _mali_osk_timer_init();
+	if (mali_control_timer == NULL) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	_mali_osk_timer_setcallback(mali_control_timer, mali_control_timer_callback, NULL);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_control_timer_term(void)
+{
+	if (mali_control_timer != NULL) {
+		_mali_osk_timer_del(mali_control_timer);
+		timer_running = MALI_FALSE;
+		_mali_osk_timer_term(mali_control_timer);
+		mali_control_timer = NULL;
+	}
+}
+
+mali_bool mali_control_timer_resume(u64 time_now)
+{
+	mali_utilization_data_assert_locked();
+
+	if (timer_running != MALI_TRUE) {
+		timer_running = MALI_TRUE;
+
+		period_start_time = time_now;
+
+		mali_utilization_reset();
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+void mali_control_timer_pause(void)
+{
+	mali_utilization_data_assert_locked();
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+	}
+}
+
+void mali_control_timer_suspend(mali_bool suspend)
+{
+	mali_utilization_data_lock();
+
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+
+		mali_utilization_data_unlock();
+
+		if (suspend == MALI_TRUE) {
+			_mali_osk_timer_del(mali_control_timer);
+			mali_utilization_reset();
+		}
+	} else {
+		mali_utilization_data_unlock();
+	}
+}
diff --git a/utgard/r7p0/common/mali_control_timer.h b/utgard/r7p0/common/mali_control_timer.h
new file mode 100644
index 0000000..258af6d
--- /dev/null
+++ b/utgard/r7p0/common/mali_control_timer.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_CONTROL_TIMER_H__
+#define __MALI_CONTROL_TIMER_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_control_timer_init(void);
+
+void mali_control_timer_term(void);
+
+mali_bool mali_control_timer_resume(u64 time_now);
+
+void mali_control_timer_suspend(mali_bool suspend);
+void mali_control_timer_pause(void);
+
+void mali_control_timer_add(u32 timeout);
+
+#endif /* __MALI_CONTROL_TIMER_H__ */
+
diff --git a/utgard/r7p0/common/mali_dlbu.c b/utgard/r7p0/common/mali_dlbu.c
new file mode 100644
index 0000000..01804b6
--- /dev/null
+++ b/utgard/r7p0/common/mali_dlbu.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_dlbu.h"
+#include "mali_memory.h"
+#include "mali_pp.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+/**
+ * Size of DLBU registers in bytes
+ */
+#define MALI_DLBU_SIZE 0x400
+
+mali_dma_addr mali_dlbu_phys_addr;
+static mali_io_address mali_dlbu_cpu_addr;
+
+/**
+ * DLBU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_dlbu_register {
+	MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR = 0x0000, /**< Master tile list physical base address;
+							     31:12 Physical address to the page used for the DLBU
+							     0 DLBU enable - set this bit to 1 enables the AXI bus
+							     between PPs and L2s, setting to 0 disables the router and
+							     no further transactions are sent to DLBU */
+	MALI_DLBU_REGISTER_MASTER_TLLIST_VADDR     = 0x0004, /**< Master tile list virtual base address;
+							     31:12 Virtual address to the page used for the DLBU */
+	MALI_DLBU_REGISTER_TLLIST_VBASEADDR     = 0x0008, /**< Tile list virtual base address;
+							     31:12 Virtual address to the tile list. This address is used when
+							     calculating the call address sent to PP.*/
+	MALI_DLBU_REGISTER_FB_DIM                 = 0x000C, /**< Framebuffer dimension;
+							     23:16 Number of tiles in Y direction-1
+							     7:0 Number of tiles in X direction-1 */
+	MALI_DLBU_REGISTER_TLLIST_CONF       = 0x0010, /**< Tile list configuration;
+							     29:28 select the size of each allocated block: 0=128 bytes, 1=256, 2=512, 3=1024
+							     21:16 2^n number of tiles to be binned to one tile list in Y direction
+							     5:0 2^n number of tiles to be binned to one tile list in X direction */
+	MALI_DLBU_REGISTER_START_TILE_POS         = 0x0014, /**< Start tile positions;
+							     31:24 start position in Y direction for group 1
+							     23:16 start position in X direction for group 1
+							     15:8 start position in Y direction for group 0
+							     7:0 start position in X direction for group 0 */
+	MALI_DLBU_REGISTER_PP_ENABLE_MASK         = 0x0018, /**< PP enable mask;
+							     7 enable PP7 for load balancing
+							     6 enable PP6 for load balancing
+							     5 enable PP5 for load balancing
+							     4 enable PP4 for load balancing
+							     3 enable PP3 for load balancing
+							     2 enable PP2 for load balancing
+							     1 enable PP1 for load balancing
+							     0 enable PP0 for load balancing */
+} mali_dlbu_register;
+
+typedef enum {
+	PP0ENABLE = 0,
+	PP1ENABLE,
+	PP2ENABLE,
+	PP3ENABLE,
+	PP4ENABLE,
+	PP5ENABLE,
+	PP6ENABLE,
+	PP7ENABLE
+} mali_dlbu_pp_enable;
+
+struct mali_dlbu_core {
+	struct mali_hw_core     hw_core;           /**< Common for all HW cores */
+	u32                     pp_cores_mask;     /**< This is a mask for the PP cores whose operation will be controlled by LBU
+						      see MALI_DLBU_REGISTER_PP_ENABLE_MASK register */
+};
+
+_mali_osk_errcode_t mali_dlbu_initialize(void)
+{
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Initializing\n"));
+
+	if (_MALI_OSK_ERR_OK ==
+	    mali_mmu_get_table_page(&mali_dlbu_phys_addr,
+				    &mali_dlbu_cpu_addr)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_dlbu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: terminating\n"));
+
+	if (0 != mali_dlbu_phys_addr && 0 != mali_dlbu_cpu_addr) {
+		mali_mmu_release_table_page(mali_dlbu_phys_addr,
+					    mali_dlbu_cpu_addr);
+		mali_dlbu_phys_addr = 0;
+		mali_dlbu_cpu_addr = 0;
+	}
+}
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_dlbu_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Creating Mali dynamic load balancing unit: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_dlbu_core));
+	if (core != NULL) {
+		if (mali_hw_core_create(&core->hw_core, resource, MALI_DLBU_SIZE) == _MALI_OSK_ERR_OK) {
+			core->pp_cores_mask = 0;
+			if (mali_dlbu_reset(core) == _MALI_OSK_ERR_OK) {
+				return core;
+			}
+			MALI_PRINT_ERROR(("Failed to reset DLBU %s\n", core->hw_core.description));
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali DLBU: Failed to allocate memory for DLBU core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	mali_hw_core_delete(&dlbu->hw_core);
+	_mali_osk_free(dlbu);
+}
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu)
+{
+	u32 dlbu_registers[7];
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: mali_dlbu_reset: %s\n", dlbu->hw_core.description));
+
+	dlbu_registers[0] = mali_dlbu_phys_addr | 1; /* bit 0 enables the whole core */
+	dlbu_registers[1] = MALI_DLBU_VIRT_ADDR;
+	dlbu_registers[2] = 0;
+	dlbu_registers[3] = 0;
+	dlbu_registers[4] = 0;
+	dlbu_registers[5] = 0;
+	dlbu_registers[6] = dlbu->pp_cores_mask;
+
+	/* write reset values to core registers */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR, dlbu_registers, 7);
+
+	err = _MALI_OSK_ERR_OK;
+
+	return err;
+}
+
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	mali_hw_core_register_write(&dlbu->hw_core, MALI_DLBU_REGISTER_PP_ENABLE_MASK, dlbu->pp_cores_mask);
+}
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask |= bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Adding core[%d] New mask= 0x%02x\n", bcast_id, dlbu->pp_cores_mask));
+}
+
+/* Remove a group from the DLBU */
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask &= ~bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Removing core[%d] New mask= 0x%02x\n", bcast_id, dlbu->pp_cores_mask));
+}
+
+/* Configure the DLBU for \a job. This needs to be done before the job is started on the groups in the DLBU. */
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job)
+{
+	u32 *registers;
+
+	MALI_DEBUG_ASSERT(job);
+	registers = mali_pp_job_get_dlbu_registers(job);
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: Starting job\n"));
+
+	/* Writing 4 registers:
+	 * DLBU registers except the first two (written once at DLBU initialisation / reset) and the PP_ENABLE_MASK register */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_TLLIST_VBASEADDR, registers, 4);
+
+}
diff --git a/utgard/r7p0/common/mali_dlbu.h b/utgard/r7p0/common/mali_dlbu.h
new file mode 100644
index 0000000..720c105
--- /dev/null
+++ b/utgard/r7p0/common/mali_dlbu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DLBU_H__
+#define __MALI_DLBU_H__
+
+#define MALI_DLBU_VIRT_ADDR 0xFFF00000 /* master tile virtual address fixed at this value and mapped into every session */
+
+#include "mali_osk.h"
+
+struct mali_pp_job;
+struct mali_group;
+struct mali_dlbu_core;
+
+extern mali_dma_addr mali_dlbu_phys_addr;
+
+_mali_osk_errcode_t mali_dlbu_initialize(void);
+void mali_dlbu_terminate(void);
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource);
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu);
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+
+/** @brief Called to update HW after DLBU state changed
+ *
+ * This function must be called after \a mali_dlbu_add_group or \a
+ * mali_dlbu_remove_group to write the updated mask to hardware, unless the
+ * same is accomplished by calling \a mali_dlbu_reset.
+ */
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job);
+
+#endif /* __MALI_DLBU_H__ */
diff --git a/utgard/r7p0/common/mali_dvfs_policy.c b/utgard/r7p0/common/mali_dvfs_policy.c
new file mode 100644
index 0000000..712bf11
--- /dev/null
+++ b/utgard/r7p0/common/mali_dvfs_policy.c
@@ -0,0 +1,311 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_dvfs_policy.h"
+#include "mali_osk_mali.h"
+#include "mali_osk_profiling.h"
+
+#define CLOCK_TUNING_TIME_DEBUG 0
+
+#define MAX_PERFORMANCE_VALUE 256
+#define MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(percent) ((int) ((percent)*(MAX_PERFORMANCE_VALUE)/100.0 + 0.5))
+
+/** The max fps the same as display vsync default 60, can set by module insert parameter */
+int mali_max_system_fps = 60;
+/** A lower limit on their desired FPS default 58, can set by module insert parameter */
+int mali_desired_fps = 58;
+
+static int mali_fps_step1;
+static int mali_fps_step2;
+
+static int clock_step = -1;
+static int cur_clk_step = -1;
+static struct mali_gpu_clock *gpu_clk;
+
+/*Function prototype */
+static int (*mali_gpu_set_freq)(int);
+static int (*mali_gpu_get_freq)(void);
+
+static mali_bool mali_dvfs_enabled = MALI_FALSE;
+
+#define NUMBER_OF_NANOSECONDS_PER_SECOND  1000000000ULL
+static u32 calculate_window_render_fps(u64 time_period)
+{
+	u32 max_window_number;
+	u64 tmp;
+	u64 max = time_period;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 time_period_shift;
+	u32 max_window_number_shift;
+	u32 ret_val;
+
+	max_window_number = mali_session_max_window_num();
+
+	/* To avoid float division, extend the dividend to ns unit */
+	tmp = (u64)max_window_number * NUMBER_OF_NANOSECONDS_PER_SECOND;
+	if (tmp > time_period) {
+		max = tmp;
+	}
+
+	/*
+	 * We may have 64-bit values, a dividend or a divisor or both
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 */
+	leading_zeroes = _mali_osk_clz((u32)(max >> 32));
+	shift_val = 32 - leading_zeroes;
+
+	time_period_shift = (u32)(time_period >> shift_val);
+	max_window_number_shift = (u32)(tmp >> shift_val);
+
+	ret_val = max_window_number_shift / time_period_shift;
+
+	return ret_val;
+}
+
+static bool mali_pickup_closest_avail_clock(int target_clock_mhz, mali_bool pick_clock_up)
+{
+	int i = 0;
+	bool clock_changed = false;
+
+	/* Round up the closest available frequency step for target_clock_hz */
+	for (i = 0; i < gpu_clk->num_of_steps; i++) {
+		/* Find the first item > target_clock_hz */
+		if (((int)(gpu_clk->item[i].clock) - target_clock_mhz) > 0) {
+			break;
+		}
+	}
+
+	/* If the target clock greater than the maximum clock just pick the maximum one*/
+	if (i == gpu_clk->num_of_steps) {
+		i = gpu_clk->num_of_steps - 1;
+	} else {
+		if ((!pick_clock_up) && (i > 0)) {
+			i = i - 1;
+		}
+	}
+
+	clock_step = i;
+	if (cur_clk_step != clock_step) {
+		clock_changed = true;
+	}
+
+	return clock_changed;
+}
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period)
+{
+	int under_perform_boundary_value = 0;
+	int over_perform_boundary_value = 0;
+	int current_fps = 0;
+	int current_gpu_util = 0;
+	bool clock_changed = false;
+#if CLOCK_TUNING_TIME_DEBUG
+	struct timeval start;
+	struct timeval stop;
+	unsigned int elapse_time;
+
+	do_gettimeofday(&start);
+#endif
+	u32 window_render_fps;
+
+	if (gpu_clk == NULL) {
+		MALI_DEBUG_PRINT(2, ("Enable DVFS but patform doesn't Support freq change.\n"));
+		return;
+	}
+
+	window_render_fps = calculate_window_render_fps(time_period);
+
+	current_fps = window_render_fps;
+	current_gpu_util = data->utilization_gpu;
+
+	/* Get the specific under_perform_boundary_value and over_perform_boundary_value */
+	if ((mali_desired_fps <= current_fps) && (current_fps < mali_max_system_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(90);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+	} else if ((mali_fps_step1 <= current_fps) && (current_fps < mali_desired_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	} else if ((mali_fps_step2 <= current_fps) && (current_fps < mali_fps_step1)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(50);
+	} else {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	}
+
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: gpu util = %d\n", current_gpu_util));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: under_perform = %d,  over_perform = %d\n", under_perform_boundary_value, over_perform_boundary_value));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: render fps = %d,  pressure render fps = %d\n", current_fps, window_render_fps));
+
+	/* Get current clock value */
+	cur_clk_step = mali_gpu_get_freq();
+
+	/* Consider offscreen */
+	if (current_fps == 0) {
+		/* GP or PP under perform, need to give full power */
+		if (current_gpu_util > over_perform_boundary_value) {
+			if (cur_clk_step != gpu_clk->num_of_steps - 1) {
+				clock_changed = true;
+				clock_step = gpu_clk->num_of_steps - 1;
+			}
+		}
+
+		/* If GPU is idle, use lowest power */
+		if (current_gpu_util == 0) {
+			if (cur_clk_step != 0) {
+				clock_changed = true;
+				clock_step = 0;
+			}
+		}
+
+		goto real_setting;
+	}
+
+	/* 2. Calculate target clock if the GPU clock can be tuned */
+	if (-1 != cur_clk_step) {
+		int target_clk_mhz = -1;
+		mali_bool pick_clock_up = MALI_TRUE;
+
+		if (current_gpu_util > under_perform_boundary_value) {
+			/* when under perform, need to consider the fps part */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util * mali_desired_fps / under_perform_boundary_value / current_fps;
+			pick_clock_up = MALI_TRUE;
+		} else if (current_gpu_util < over_perform_boundary_value) {
+			/* when over perform, did't need to consider fps, system didn't want to reach desired fps */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util / under_perform_boundary_value;
+			pick_clock_up = MALI_FALSE;
+		}
+
+		if (-1 != target_clk_mhz) {
+			clock_changed = mali_pickup_closest_avail_clock(target_clk_mhz, pick_clock_up);
+		}
+	}
+
+real_setting:
+	if (clock_changed) {
+		mali_gpu_set_freq(clock_step);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      gpu_clk->item[clock_step].clock,
+					      gpu_clk->item[clock_step].vol / 1000,
+					      0, 0, 0);
+	}
+
+#if CLOCK_TUNING_TIME_DEBUG
+	do_gettimeofday(&stop);
+
+	elapse_time = timeval_to_ns(&stop) - timeval_to_ns(&start);
+	MALI_DEBUG_PRINT(2, ("Using ARM power policy:  eclapse time = %d\n", elapse_time));
+#endif
+}
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void)
+{
+	_mali_osk_device_data data;
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	if (_mali_osk_device_data_get(&data) == _MALI_OSK_ERR_OK) {
+		if ((NULL != data.get_clock_info) && (NULL != data.set_freq) && (data.get_freq != NULL)) {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: using arm dvfs policy\n"));
+
+
+			mali_fps_step1 = mali_max_system_fps / 3;
+			mali_fps_step2 = mali_max_system_fps / 5;
+
+			data.get_clock_info(&gpu_clk);
+
+			if (gpu_clk != NULL) {
+#ifdef DEBUG
+				int i;
+
+				for (i = 0; i < gpu_clk->num_of_steps; i++) {
+					MALI_DEBUG_PRINT(5, ("mali gpu clock info: step%d clock(%d)Hz,vol(%d)\n",
+							     i, gpu_clk->item[i].clock, gpu_clk->item[i].vol));
+				}
+#endif
+			} else {
+				MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform didn't define enough info for ddk to do DVFS\n"));
+			}
+
+			mali_gpu_get_freq = data.get_freq;
+			mali_gpu_set_freq = data.set_freq;
+
+			if ((gpu_clk != NULL) && (gpu_clk->num_of_steps > 0)
+			    && (NULL != mali_gpu_get_freq) && (mali_gpu_set_freq != NULL)) {
+				mali_dvfs_enabled = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	} else {
+		err = _MALI_OSK_ERR_FAULT;
+		MALI_DEBUG_PRINT(2, ("Mali DVFS init: get platform data error .\n"));
+	}
+
+	return err;
+}
+
+/*
+ * Always give full power when start a new period,
+ * if mali dvfs enabled, for performance consideration
+ */
+void mali_dvfs_policy_new_period(void)
+{
+	/* Always give full power when start a new period */
+	unsigned int cur_clk_step = 0;
+
+	cur_clk_step = mali_gpu_get_freq();
+
+	if (cur_clk_step != (gpu_clk->num_of_steps - 1)) {
+		mali_gpu_set_freq(gpu_clk->num_of_steps - 1);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, gpu_clk->item[gpu_clk->num_of_steps - 1].clock,
+					      gpu_clk->item[gpu_clk->num_of_steps - 1].vol / 1000, 0, 0, 0);
+	}
+}
+
+mali_bool mali_dvfs_policy_enabled(void)
+{
+	return mali_dvfs_enabled;
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item)
+{
+	if (mali_platform_device != NULL) {
+
+		struct mali_gpu_device_data *device_data = NULL;
+
+		device_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+
+		if ((NULL != device_data->get_clock_info) && (device_data->get_freq != NULL)) {
+
+			int cur_clk_step = device_data->get_freq();
+			struct mali_gpu_clock *mali_gpu_clk = NULL;
+
+			device_data->get_clock_info(&mali_gpu_clk);
+			clk_item->clock = mali_gpu_clk->item[cur_clk_step].clock;
+			clk_item->vol = mali_gpu_clk->item[cur_clk_step].vol;
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	}
+}
+#endif
+
diff --git a/utgard/r7p0/common/mali_dvfs_policy.h b/utgard/r7p0/common/mali_dvfs_policy.h
new file mode 100644
index 0000000..3d3193a
--- /dev/null
+++ b/utgard/r7p0/common/mali_dvfs_policy.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010-2012, 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DVFS_POLICY_H__
+#define __MALI_DVFS_POLICY_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period);
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void);
+
+void mali_dvfs_policy_new_period(void);
+
+mali_bool mali_dvfs_policy_enabled(void);
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif/* __MALI_DVFS_POLICY_H__ */
diff --git a/utgard/r7p0/common/mali_executor.c b/utgard/r7p0/common/mali_executor.c
new file mode 100644
index 0000000..d8bf822
--- /dev/null
+++ b/utgard/r7p0/common/mali_executor.c
@@ -0,0 +1,2704 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_executor.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_timeline.h"
+#include "mali_osk_profiling.h"
+#include "mali_session.h"
+#include "mali_osk_mali.h"
+
+/*
+ * If dma_buf with map on demand is used, we defer job deletion and job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_DELETE 1
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif /* !defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) */
+
+/*
+ * ---------- static type definitions (structs, enums, etc) ----------
+ */
+
+enum mali_executor_state_t {
+	EXEC_STATE_NOT_PRESENT, /* Virtual group on Mali-300/400 (do not use) */
+	EXEC_STATE_DISABLED,    /* Disabled by core scaling (do not use) */
+	EXEC_STATE_EMPTY,       /* No child groups for virtual group (do not use) */
+	EXEC_STATE_INACTIVE,    /* Can be used, but must be activate first */
+	EXEC_STATE_IDLE,        /* Active and ready to be used */
+	EXEC_STATE_WORKING,     /* Executing a job */
+};
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock for this module (protecting all HW access except L2 caches) */
+_mali_osk_spinlock_irq_t *mali_executor_lock_obj;
+
+mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/*
+ * ---------- static variables ----------
+ */
+
+/* Used to defer job scheduling */
+static _mali_osk_wq_work_t *executor_wq_high_pri;
+
+/* Store version from GP and PP (user space wants to know this) */
+static u32 pp_version;
+static u32 gp_version;
+
+/* List of physical PP groups which are disabled by some external source */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_disabled);
+static u32 group_list_disabled_count;
+
+/* List of groups which can be used, but activate first */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_inactive);
+static u32 group_list_inactive_count;
+
+/* List of groups which are active and ready to be used */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_idle);
+static u32 group_list_idle_count;
+
+/* List of groups which are executing a job */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_working);
+static u32 group_list_working_count;
+
+/* Virtual group (if any) */
+static struct mali_group *virtual_group;
+
+/* Virtual group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t virtual_group_state = EXEC_STATE_NOT_PRESENT;
+
+/* GP group */
+static struct mali_group *gp_group;
+
+/* GP group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t gp_group_state = EXEC_STATE_NOT_PRESENT;
+
+static u32 gp_returned_cookie;
+
+/* Total number of physical PP cores present */
+static u32 num_physical_pp_cores_total;
+
+/* Number of physical cores which are enabled */
+static u32 num_physical_pp_cores_enabled;
+
+/* Enable or disable core scaling */
+static mali_bool core_scaling_enabled = MALI_TRUE;
+
+/* Variables to allow safe pausing of the scheduler */
+static _mali_osk_wait_queue_t *executor_working_wait_queue;
+static u32 pause_count;
+
+/* PP cores haven't been enabled because of some pp cores haven't been disabled. */
+static int core_scaling_delay_up_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+
+/* Variables used to implement notify pp core changes to userspace when core scaling
+ * is finished in mali_executor_complete_group() function. */
+static _mali_osk_wq_work_t *executor_wq_notify_core_change;
+static _mali_osk_wait_queue_t *executor_notify_core_change_wait_queue;
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+static mali_bool mali_executor_is_suspended(void *data);
+static mali_bool mali_executor_is_working(void);
+static void mali_executor_disable_empty_virtual(void);
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group);
+static mali_bool mali_executor_has_virtual_group(void);
+static mali_bool mali_executor_virtual_group_is_usable(void);
+static void mali_executor_schedule(void);
+static void mali_executor_wq_schedule(void *arg);
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job);
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done);
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state);
+
+static void mali_executor_group_enable_internal(struct mali_group *group);
+static void mali_executor_group_disable_internal(struct mali_group *group);
+static void mali_executor_core_scale(unsigned int target_core_nr);
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group);
+static void mali_executor_notify_core_change(u32 num_cores);
+static void mali_executor_wq_notify_core_change(void *arg);
+static void mali_executor_change_group_status_disabled(struct mali_group *group);
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group);
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_executor_initialize(void)
+{
+	mali_executor_lock_obj = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_EXECUTOR);
+	if (mali_executor_lock_obj == NULL) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_high_pri = _mali_osk_wq_create_work_high_pri(mali_executor_wq_schedule, NULL);
+	if (executor_wq_high_pri == NULL) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_working_wait_queue = _mali_osk_wait_queue_init();
+	if (executor_working_wait_queue == NULL) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_notify_core_change = _mali_osk_wq_create_work(mali_executor_wq_notify_core_change, NULL);
+	if (executor_wq_notify_core_change == NULL) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_notify_core_change_wait_queue = _mali_osk_wait_queue_init();
+	if (executor_notify_core_change_wait_queue == NULL) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_terminate(void)
+{
+	if (executor_notify_core_change_wait_queue != NULL) {
+		_mali_osk_wait_queue_term(executor_notify_core_change_wait_queue);
+		executor_notify_core_change_wait_queue = NULL;
+	}
+
+	if (executor_wq_notify_core_change != NULL) {
+		_mali_osk_wq_delete_work(executor_wq_notify_core_change);
+		executor_wq_notify_core_change = NULL;
+	}
+
+	if (executor_working_wait_queue != NULL) {
+		_mali_osk_wait_queue_term(executor_working_wait_queue);
+		executor_working_wait_queue = NULL;
+	}
+
+	if (executor_wq_high_pri != NULL) {
+		_mali_osk_wq_delete_work(executor_wq_high_pri);
+		executor_wq_high_pri = NULL;
+	}
+
+	if (mali_executor_lock_obj != NULL) {
+		_mali_osk_spinlock_irq_term(mali_executor_lock_obj);
+		mali_executor_lock_obj = NULL;
+	}
+}
+
+void mali_executor_populate(void)
+{
+	u32 num_groups;
+	u32 i;
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	/* Do we have a virtual group? */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (mali_group_is_virtual(group)) {
+			virtual_group = group;
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			break;
+		}
+	}
+
+	/* Find all the available physical GP and PP cores */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (group != NULL) {
+			struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+			struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+
+			if (!mali_group_is_virtual(group)) {
+				if (pp_core != NULL) {
+					if (pp_version == 0) {
+						/* Retrieve PP version from the first available PP core */
+						pp_version = mali_pp_core_get_version(pp_core);
+					}
+
+					if (virtual_group != NULL) {
+						mali_executor_lock();
+						mali_group_add_group(virtual_group, group);
+						mali_executor_unlock();
+					} else {
+						_mali_osk_list_add(&group->executor_list, &group_list_inactive);
+						group_list_inactive_count++;
+					}
+
+					num_physical_pp_cores_total++;
+				} else {
+					MALI_DEBUG_ASSERT_POINTER(gp_core);
+
+					if (gp_version == 0) {
+						/* Retrieve GP version */
+						gp_version = mali_gp_core_get_version(gp_core);
+					}
+
+					gp_group = group;
+					gp_group_state = EXEC_STATE_INACTIVE;
+				}
+
+			}
+		}
+	}
+
+	num_physical_pp_cores_enabled = num_physical_pp_cores_total;
+}
+
+void mali_executor_depopulate(void)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	MALI_DEBUG_ASSERT(gp_group_state != EXEC_STATE_WORKING);
+
+	if (gp_group != NULL) {
+		mali_group_delete(gp_group);
+		gp_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_WORKING);
+
+	if (virtual_group != NULL) {
+		mali_group_delete(virtual_group);
+		virtual_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&group_list_working));
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+}
+
+void mali_executor_suspend(void)
+{
+	mali_executor_lock();
+
+	/* Increment the pause_count so that no more jobs will be scheduled */
+	pause_count++;
+
+	mali_executor_unlock();
+
+	_mali_osk_wait_queue_wait_event(executor_working_wait_queue,
+					mali_executor_is_suspended, NULL);
+
+	/*
+	 * mali_executor_complete_XX() leaves jobs in idle state.
+	 * deactivate option is used when we are going to power down
+	 * the entire GPU (OS suspend) and want a consistent SW vs HW
+	 * state.
+	 */
+	mali_executor_lock();
+
+	mali_executor_deactivate_list_idle(MALI_TRUE);
+
+	/*
+	 * The following steps are used to deactive all of activated
+	 * (MALI_GROUP_STATE_ACTIVE) and activating (MALI_GROUP
+	 * _STAET_ACTIVATION_PENDING) groups, to make sure the variable
+	 * pd_mask_wanted is equal with 0. */
+	if (mali_group_get_state(gp_group) != MALI_GROUP_STATE_INACTIVE) {
+		gp_group_state = EXEC_STATE_INACTIVE;
+		mali_group_deactivate(gp_group);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (MALI_GROUP_STATE_INACTIVE
+		    != mali_group_get_state(virtual_group)) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			mali_group_deactivate(virtual_group);
+		}
+	}
+
+	if (group_list_inactive_count > 0) {
+		struct mali_group *group;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+					    &group_list_inactive,
+					    struct mali_group, executor_list) {
+			if (MALI_GROUP_STATE_ACTIVATION_PENDING
+			    == mali_group_get_state(group)) {
+				mali_group_deactivate(group);
+			}
+
+			/*
+			 * On mali-450 platform, we may have physical group in the group inactive
+			 * list, and its state is MALI_GROUP_STATE_ACTIVATION_PENDING, so we only
+			 * deactivate it is not enough, we still also need add it back to virtual group.
+			 * And now, virtual group must be in INACTIVE state, so it's safe to add
+			 * physical group to virtual group at this point.
+			 */
+			if (virtual_group != NULL) {
+				_mali_osk_list_delinit(&group->executor_list);
+				group_list_inactive_count--;
+
+				mali_group_add_group(virtual_group, group);
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_resume(void)
+{
+	mali_executor_lock();
+
+	/* Decrement pause_count to allow scheduling again (if it reaches 0) */
+	pause_count--;
+	if (pause_count == 0) {
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+u32 mali_executor_get_num_cores_total(void)
+{
+	return num_physical_pp_cores_total;
+}
+
+u32 mali_executor_get_num_cores_enabled(void)
+{
+	return num_physical_pp_cores_enabled;
+}
+
+struct mali_pp_core *mali_executor_get_virtual_pp(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(virtual_group);
+	MALI_DEBUG_ASSERT_POINTER(virtual_group->pp_core);
+	return virtual_group->pp_core;
+}
+
+struct mali_group *mali_executor_get_virtual_group(void)
+{
+	return virtual_group;
+}
+
+void mali_executor_zap_all_active(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+	mali_bool ret;
+
+	mali_executor_lock();
+
+	/*
+	 * This function is a bit complicated because
+	 * mali_group_zap_session() can fail. This only happens because the
+	 * group is in an unhandled page fault status.
+	 * We need to make sure this page fault is handled before we return,
+	 * so that we know every single outstanding MMU transactions have
+	 * completed. This will allow caller to safely remove physical pages
+	 * when we have returned.
+	 */
+
+	MALI_DEBUG_ASSERT(gp_group != NULL);
+	ret = mali_group_zap_session(gp_group, session);
+	if (ret == MALI_FALSE) {
+		struct mali_gp_job *gp_job = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+		MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		ret = mali_group_zap_session(virtual_group, session);
+		if (ret == MALI_FALSE) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (pp_job != NULL) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working,
+				    struct mali_group, executor_list) {
+		ret = mali_group_zap_session(group, session);
+		if (ret == MALI_FALSE) {
+			ret = mali_group_zap_session(group, session);
+			if (ret == MALI_FALSE) {
+				struct mali_pp_job *pp_job = NULL;
+
+				mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+				if (pp_job != NULL) {
+					/* PP job completed, free it */
+					mali_scheduler_complete_pp_job(pp_job,
+								       0, MALI_FALSE,
+								       MALI_TRUE);
+				}
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule)
+{
+	if (mask != MALI_SCHEDULER_MASK_EMPTY) {
+		if (deferred_schedule == MALI_TRUE) {
+			_mali_osk_wq_schedule_work_high_pri(executor_wq_high_pri);
+		} else {
+			/* Schedule from this thread*/
+			mali_executor_lock();
+			mali_executor_schedule();
+			mali_executor_unlock();
+		}
+	}
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: GP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor GP: Job %d Timeout on %s\n",
+			    mali_gp_job_get_id(group->gp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_gp(group);
+		if (int_result == MALI_INTERRUPT_RESULT_NONE) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (int_result == MALI_INTERRUPT_RESULT_NONE) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(int_result != MALI_INTERRUPT_RESULT_NONE);
+#endif
+
+	mali_group_mask_all_interrupts_gp(group);
+
+	if (int_result == MALI_INTERRUPT_RESULT_SUCCESS_VS) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only VS completed so far, while PLBU is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (int_result == MALI_INTERRUPT_RESULT_SUCCESS_PLBU) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only PLBU completed so far, while VS is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (int_result == MALI_INTERRUPT_RESULT_OOM) {
+		struct mali_gp_job *job = mali_group_get_running_gp_job(group);
+
+		/* PLBU out of mem */
+		MALI_DEBUG_PRINT(3, ("Executor: PLBU needs more heap memory\n"));
+
+#if defined(CONFIG_MALI400_PROFILING)
+		/* Give group a chance to generate a SUSPEND event */
+		mali_group_oom(group);
+#endif
+
+		/*
+		 * no need to hold interrupt raised while
+		 * waiting for more memory.
+		 */
+		mali_executor_send_gp_oom_to_user(job);
+
+		mali_executor_unlock();
+
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (int_result == MALI_INTERRUPT_RESULT_ERROR) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_executor_unlock();
+
+		if (time_out == MALI_FALSE) {
+			mali_group_schedule_bottom_half_gp(group);
+		}
+	} else {
+		struct mali_gp_job *job;
+		mali_bool success;
+
+		if (time_out == MALI_TRUE) {
+			mali_group_dump_status(group);
+		}
+
+		success = (int_result != MALI_INTERRUPT_RESULT_ERROR) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, &job, NULL);
+
+		mali_executor_unlock();
+
+		/* GP jobs always fully complete */
+		MALI_DEBUG_ASSERT(job != NULL);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, success,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: PP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (in_upper_half) {
+		if (mali_group_is_in_virtual(group)) {
+			/* Child groups should never handle PP interrupts */
+			MALI_DEBUG_ASSERT(!mali_group_has_timed_out(group));
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+	MALI_DEBUG_ASSERT(!mali_group_is_in_virtual(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor PP: Job %d Timeout on %s\n",
+			    mali_pp_job_get_id(group->pp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_pp(group);
+		if (int_result == MALI_INTERRUPT_RESULT_NONE) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (int_result == MALI_INTERRUPT_RESULT_NONE) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	} else if (int_result == MALI_INTERRUPT_RESULT_SUCCESS) {
+		if (mali_group_is_virtual(group) && mali_group_pp_is_active(group)) {
+			/* Some child groups are still working, so nothing to do right now */
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+#else
+	MALI_DEBUG_ASSERT(int_result != MALI_INTERRUPT_RESULT_NONE);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (int_result == MALI_INTERRUPT_RESULT_ERROR) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_group_mask_all_interrupts_pp(group);
+		mali_executor_unlock();
+
+		if (time_out == MALI_FALSE) {
+			mali_group_schedule_bottom_half_pp(group);
+		}
+	} else {
+		struct mali_pp_job *job = NULL;
+		mali_bool success;
+
+		if (time_out == MALI_TRUE) {
+			mali_group_dump_status(group);
+		}
+
+		success = (int_result == MALI_INTERRUPT_RESULT_SUCCESS) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, NULL, &job);
+
+		mali_executor_unlock();
+
+		if (job != NULL) {
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+
+	MALI_DEBUG_PRINT(4, ("Executor: MMU interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	int_result = mali_group_get_interrupt_result_mmu(group);
+	if (int_result == MALI_INTERRUPT_RESULT_NONE) {
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (int_result == MALI_INTERRUPT_RESULT_NONE) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(int_result == MALI_INTERRUPT_RESULT_ERROR);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	if (in_upper_half) {
+		/* Don't bother to do processing of errors in upper half */
+
+		struct mali_group *parent = group->parent_group;
+
+		mali_mmu_mask_all_interrupts(group->mmu);
+
+		mali_executor_unlock();
+
+		if (parent == NULL) {
+			mali_group_schedule_bottom_half_mmu(group);
+		} else {
+			mali_group_schedule_bottom_half_mmu(parent);
+		}
+
+	} else {
+		struct mali_gp_job *gp_job = NULL;
+		struct mali_pp_job *pp_job = NULL;
+
+#ifdef DEBUG
+
+		u32 fault_address = mali_mmu_get_page_fault_addr(group->mmu);
+		u32 status = mali_mmu_get_status(group->mmu);
+
+		MALI_DEBUG_PRINT(2, ("Executor: Mali page fault detected at 0x%x from bus id %d of type %s on %s\n",
+				     (void *)(uintptr_t)fault_address,
+				     (status >> 6) & 0x1F,
+				     (status & 32) ? "write" : "read",
+				     group->mmu->hw_core.description));
+		MALI_DEBUG_PRINT(3, ("Executor: MMU rawstat = 0x%08X, MMU status = 0x%08X\n",
+				     mali_mmu_get_rawstat(group->mmu), status));
+		mali_mmu_pagedir_diag(mali_session_get_page_directory(group->session), fault_address);
+#endif
+
+		mali_executor_complete_group(group, MALI_FALSE, &gp_job, &pp_job);
+
+		mali_executor_unlock();
+
+		if (gp_job != NULL) {
+			MALI_DEBUG_ASSERT(pp_job == NULL);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_TRUE, MALI_TRUE);
+		} else if (pp_job != NULL) {
+			MALI_DEBUG_ASSERT(gp_job == NULL);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(pp_job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups)
+{
+	u32 i;
+	mali_bool child_groups_activated = MALI_FALSE;
+	mali_bool do_schedule = MALI_FALSE;
+#if defined(DEBUG)
+	u32 num_activated = 0;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(num_groups > 0);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_up(groups[i]);
+
+		if ((MALI_GROUP_STATE_ACTIVATION_PENDING != mali_group_get_state(groups[i]) ||
+		     (mali_executor_group_is_in_state(groups[i], EXEC_STATE_INACTIVE) != MALI_TRUE))) {
+			/* nothing more to do for this group */
+			continue;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Executor: activating group %s\n",
+				     mali_group_core_description(groups[i])));
+
+#if defined(DEBUG)
+		num_activated++;
+#endif
+
+		if (mali_group_is_in_virtual(groups[i])) {
+			/*
+			 * At least one child group of virtual group is powered on.
+			 */
+			child_groups_activated = MALI_TRUE;
+		} else if (mali_group_is_virtual(groups[i]) == MALI_FALSE) {
+			/* Set gp and pp not in virtual to active. */
+			mali_group_set_active(groups[i]);
+		}
+
+		/* Move group from inactive to idle list */
+		if (groups[i] == gp_group) {
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  gp_group_state);
+			gp_group_state = EXEC_STATE_IDLE;
+		} else if (MALI_FALSE == mali_group_is_in_virtual(groups[i])
+			   && MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_group_is_in_state(groups[i],
+					  EXEC_STATE_INACTIVE));
+
+			mali_executor_change_state_pp_physical(groups[i],
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+		}
+
+		do_schedule = MALI_TRUE;
+	}
+
+	if (mali_executor_has_virtual_group() &&
+	    MALI_TRUE == child_groups_activated &&
+	    MALI_GROUP_STATE_ACTIVATION_PENDING ==
+	    mali_group_get_state(virtual_group)) {
+		/*
+		 * Try to active virtual group while it may be not sucessful every time,
+		 * because there is one situation that not all of child groups are powered on
+		 * in one time and virtual group is in activation pending state.
+		 */
+		if (mali_group_set_active(virtual_group)) {
+			/* Move group from inactive to idle */
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  virtual_group_state);
+			virtual_group_state = EXEC_STATE_IDLE;
+
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u  physical activated, 1 virtual activated.\n", num_groups, num_activated));
+		} else {
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+		}
+	} else {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+	}
+
+	if (do_schedule == MALI_TRUE) {
+		/* Trigger a schedule */
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_group_power_down(struct mali_group *groups[],
+				    u32 num_groups)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(num_groups > 0);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		/* Groups must be either disabled or inactive. while for virtual group,
+		 * it maybe in empty state, because when we meet pm_runtime_suspend,
+		 * virtual group could be powered off, and before we acquire mali_executor_lock,
+		 * we must release mali_pm_state_lock, if there is a new physical job was queued,
+		 * all of physical groups in virtual group could be pulled out, so we only can
+		 * powered down an empty virtual group. Those physical groups will be powered
+		 * up in following pm_runtime_resume callback function.
+		 */
+		MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(groups[i],
+				  EXEC_STATE_DISABLED) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_INACTIVE) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_EMPTY));
+
+		MALI_DEBUG_PRINT(3, ("Executor: powering down group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_down(groups[i]);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups completed\n", num_groups));
+
+	mali_executor_unlock();
+}
+
+void mali_executor_abort_session(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *tmp_group;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Executor: Aborting all jobs from session 0x%08X.\n",
+			  session));
+
+	mali_executor_lock();
+
+	if (mali_group_get_session(gp_group) == session) {
+		if (gp_group_state == EXEC_STATE_WORKING) {
+			struct mali_gp_job *gp_job = NULL;
+
+			mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+			MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+			/* GP job completed, make sure it is freed */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_FALSE, MALI_TRUE);
+		} else {
+			/* Same session, but not working, so just clear it */
+			mali_group_clear_session(gp_group);
+		}
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (EXEC_STATE_WORKING == virtual_group_state
+		    && mali_group_get_session(virtual_group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (pp_job != NULL) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working,
+				    struct mali_group, executor_list) {
+		if (mali_group_get_session(group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+			if (pp_job != NULL) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	mali_executor_unlock();
+}
+
+
+void mali_executor_core_scaling_enable(void)
+{
+	/* PS: Core scaling is by default enabled */
+	core_scaling_enabled = MALI_TRUE;
+}
+
+void mali_executor_core_scaling_disable(void)
+{
+	core_scaling_enabled = MALI_FALSE;
+}
+
+mali_bool mali_executor_core_scaling_is_enabled(void)
+{
+	return core_scaling_enabled;
+}
+
+void mali_executor_group_enable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_enable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+/*
+ * If a physical group is inactive or idle, we should disable it immediately,
+ * if group is in virtual, and virtual group is idle, disable given physical group in it.
+ */
+void mali_executor_group_disable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_disable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+mali_bool mali_executor_group_is_disabled(struct mali_group *group)
+{
+	/* NB: This function is not optimized for time critical usage */
+
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+	ret = mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED);
+	mali_executor_unlock();
+
+	return ret;
+}
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override)
+{
+	if (target_core_nr == num_physical_pp_cores_enabled) return 0;
+	if (MALI_FALSE == core_scaling_enabled && MALI_FALSE == override) return -EPERM;
+	if (target_core_nr > num_physical_pp_cores_total) return -EINVAL;
+	if (target_core_nr == 0) return -EINVAL;
+
+	mali_executor_core_scale(target_core_nr);
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+
+	return 0;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	mali_executor_lock();
+
+	switch (gp_group_state) {
+	case EXEC_STATE_INACTIVE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state INACTIVE\n");
+		break;
+	case EXEC_STATE_IDLE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state IDLE\n");
+		break;
+	case EXEC_STATE_WORKING:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state WORKING\n");
+		break;
+	default:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in unknown/illegal state %u\n",
+					gp_group_state);
+		break;
+	}
+
+	n += mali_group_dump_state(gp_group, buf + n, size - n);
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in WORKING state (count = %u):\n",
+				group_list_working_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in IDLE state (count = %u):\n",
+				group_list_idle_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in INACTIVE state (count = %u):\n",
+				group_list_inactive_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in DISABLED state (count = %u):\n",
+				group_list_disabled_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		switch (virtual_group_state) {
+		case EXEC_STATE_EMPTY:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state EMPTY\n");
+			break;
+		case EXEC_STATE_INACTIVE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state INACTIVE\n");
+			break;
+		case EXEC_STATE_IDLE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state IDLE\n");
+			break;
+		case EXEC_STATE_WORKING:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state WORKING\n");
+			break;
+		default:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in unknown/illegal state %u\n",
+						virtual_group_state);
+			break;
+		}
+
+		n += mali_group_dump_state(virtual_group, buf + n, size - n);
+	}
+
+	mali_executor_unlock();
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_total_cores = num_physical_pp_cores_total;
+	args->number_of_enabled_cores = num_physical_pp_cores_enabled;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = pp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_cores = 1;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = gp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (args->code == _MALIGP_JOB_RESUME_WITH_NEW_HEAP) {
+		_mali_osk_notification_t *new_notification = NULL;
+
+		new_notification = _mali_osk_notification_create(
+					   _MALI_NOTIFICATION_GP_STALLED,
+					   sizeof(_mali_uk_gp_job_suspended_s));
+
+		if (new_notification != NULL) {
+			MALI_DEBUG_PRINT(3, ("Executor: Resuming job %u with new heap; 0x%08X - 0x%08X\n",
+					     args->cookie, args->arguments[0], args->arguments[1]));
+
+			mali_executor_lock();
+
+			/* Resume the job in question if it is still running */
+			job = mali_group_get_running_gp_job(gp_group);
+			if (NULL != job &&
+			    args->cookie == mali_gp_job_get_id(job) &&
+			    session == mali_gp_job_get_session(job)) {
+				/*
+				 * Correct job is running, resume with new heap
+				 */
+
+				mali_gp_job_set_oom_notification(job,
+								 new_notification);
+
+				/* This will also re-enable interrupts */
+				mali_group_resume_gp_with_new_heap(gp_group,
+								   args->cookie,
+								   args->arguments[0],
+								   args->arguments[1]);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_DEBUG_PRINT(2, ("Executor: Unable to resume  gp job becasue gp time out or any other unexpected reason!\n"));
+
+				_mali_osk_notification_delete(new_notification);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Executor: Failed to allocate notification object. Will abort GP job.\n"));
+		}
+	} else {
+		MALI_DEBUG_PRINT(2, ("Executor: Aborting job %u, no new heap provided\n", args->cookie));
+	}
+
+	mali_executor_lock();
+
+	/* Abort the job in question if it is still running */
+	job = mali_group_get_running_gp_job(gp_group);
+	if (NULL != job &&
+	    args->cookie == mali_gp_job_get_id(job) &&
+	    session == mali_gp_job_get_session(job)) {
+		/* Correct job is still running */
+		struct mali_gp_job *job_done = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &job_done, NULL);
+
+		/* The same job should have completed */
+		MALI_DEBUG_ASSERT(job_done == job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(job_done, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	mali_executor_unlock();
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+void mali_executor_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_executor_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Executor: lock taken\n"));
+}
+
+void mali_executor_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Executor: Releasing lock\n"));
+	_mali_osk_spinlock_irq_unlock(mali_executor_lock_obj);
+}
+
+static mali_bool mali_executor_is_suspended(void *data)
+{
+	mali_bool ret;
+
+	/* This callback does not use the data pointer. */
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	ret = pause_count > 0 && !mali_executor_is_working();
+
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static mali_bool mali_executor_is_working(void)
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return (0 != group_list_working_count ||
+		EXEC_STATE_WORKING == gp_group_state ||
+		EXEC_STATE_WORKING == virtual_group_state);
+}
+
+static void mali_executor_disable_empty_virtual(void)
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_EMPTY);
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_WORKING);
+
+	if (mali_group_is_empty(virtual_group)) {
+		virtual_group_state = EXEC_STATE_EMPTY;
+	}
+}
+
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	/* Only rejoining after job has completed (still active) */
+	MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+			  mali_group_get_state(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_executor_has_virtual_group() == MALI_TRUE);
+	MALI_DEBUG_ASSERT(mali_group_is_virtual(group) == MALI_FALSE);
+
+	/* Make sure group and virtual group have same status */
+
+	if (mali_group_get_state(virtual_group) == MALI_GROUP_STATE_INACTIVE) {
+		if (mali_group_deactivate(group)) {
+			trigger_pm_update = MALI_TRUE;
+		}
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else if (MALI_GROUP_STATE_ACTIVATION_PENDING ==
+		   mali_group_get_state(virtual_group)) {
+		/*
+		 * Activation is pending for virtual group, leave
+		 * this child group as active.
+		 */
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+				  mali_group_get_state(virtual_group));
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_IDLE;
+		}
+	}
+
+	/* Remove group from idle list */
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group,
+			  EXEC_STATE_IDLE));
+	_mali_osk_list_delinit(&group->executor_list);
+	group_list_idle_count--;
+
+	/*
+	 * And finally rejoin the virtual group
+	 * group will start working on same job as virtual_group,
+	 * if virtual_group is working on a job
+	 */
+	mali_group_add_group(virtual_group, group);
+
+	return trigger_pm_update;
+}
+
+static mali_bool mali_executor_has_virtual_group(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (virtual_group != NULL) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+}
+
+static mali_bool mali_executor_virtual_group_is_usable(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return ((EXEC_STATE_INACTIVE == virtual_group_state ||
+		EXEC_STATE_IDLE == virtual_group_state) && (virtual_group->state != MALI_GROUP_STATE_ACTIVATION_PENDING)) ?
+	       MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+}
+
+static mali_bool mali_executor_tackle_gp_bound(void)
+{
+	struct mali_pp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	job = mali_scheduler_job_pp_physical_peek();
+
+	if (NULL != job && MALI_TRUE == mali_is_mali400()) {
+		if (0 < group_list_working_count &&
+		    mali_pp_job_is_large_and_unstarted(job)) {
+			return MALI_TRUE;
+		}
+	}
+
+	return MALI_FALSE;
+}
+
+static mali_bool mali_executor_schedule_is_early_out(mali_bool *gpu_secure_mode_is_needed)
+{
+	struct mali_pp_job *next_pp_job_to_start = NULL;
+	struct mali_group *group;
+	struct mali_group *tmp_group;
+	struct mali_pp_job *physical_pp_job_working = NULL;
+	struct mali_pp_job *virtual_pp_job_working = NULL;
+	mali_bool gpu_working_in_protected_mode = MALI_FALSE;
+	mali_bool gpu_working_in_non_protected_mode = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	*gpu_secure_mode_is_needed = MALI_FALSE;
+
+	/* Check if the gpu secure mode is supported, exit if not.*/
+	if (_mali_osk_gpu_secure_mode_is_supported() == MALI_FALSE) {
+		return MALI_FALSE;
+	}
+
+	/* Check if need to set gpu secure mode for the next pp job,
+	 * get the next pp job that will be scheduled  if exist.
+	 */
+	next_pp_job_to_start = mali_scheduler_job_pp_next();
+
+	/* Check current pp physical/virtual running job is protected job or not if exist.*/
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working,
+				    struct mali_group, executor_list) {
+		physical_pp_job_working = group->pp_running_job;
+		break;
+	}
+
+	if (virtual_group_state == EXEC_STATE_WORKING) {
+		virtual_pp_job_working = virtual_group->pp_running_job;
+	}
+
+	if (physical_pp_job_working != NULL) {
+		if (mali_pp_job_is_protected_job(physical_pp_job_working) == MALI_TRUE) {
+			gpu_working_in_protected_mode = MALI_TRUE;
+		} else {
+			gpu_working_in_non_protected_mode = MALI_TRUE;
+		}
+	} else if (virtual_pp_job_working != NULL) {
+		if (mali_pp_job_is_protected_job(virtual_pp_job_working) == MALI_TRUE) {
+			gpu_working_in_protected_mode = MALI_TRUE;
+		} else {
+			gpu_working_in_non_protected_mode = MALI_TRUE;
+		}
+	} else if (gp_group_state == EXEC_STATE_WORKING) {
+		gpu_working_in_non_protected_mode = MALI_TRUE;
+	}
+
+	/* If the next pp job is the protected pp job.*/
+	if ((NULL != next_pp_job_to_start) && MALI_TRUE == mali_pp_job_is_protected_job(next_pp_job_to_start)) {
+		/* if gp is working or any non-protected pp job is working now, unable to schedule protected pp job. */
+		if (gpu_working_in_non_protected_mode == MALI_TRUE)
+			return MALI_TRUE;
+
+		*gpu_secure_mode_is_needed = MALI_TRUE;
+		return MALI_FALSE;
+
+	}
+
+	if (gpu_working_in_protected_mode == MALI_TRUE) {
+		/* Unable to schedule non-protected pp job/gp job if exist protected pp running jobs*/
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+/*
+ * This is where jobs are actually started.
+ */
+static void mali_executor_schedule(void)
+{
+	u32 i;
+	u32 num_physical_needed = 0;
+	u32 num_physical_to_process = 0;
+	mali_bool trigger_pm_update = MALI_FALSE;
+	mali_bool deactivate_idle_group = MALI_TRUE;
+	mali_bool gpu_secure_mode_is_needed = MALI_FALSE;
+	mali_bool is_gpu_secure_mode = MALI_FALSE;
+	/* Physical groups + jobs to start in this function */
+	struct mali_group *groups_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	struct mali_pp_job *jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	u32 sub_jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	int num_jobs_to_start = 0;
+
+	/* Virtual job to start in this function */
+	struct mali_pp_job *virtual_job_to_start = NULL;
+
+	/* GP job to start in this function */
+	struct mali_gp_job *gp_job_to_start = NULL;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (pause_count > 0) {
+		/* Execution is suspended, don't schedule any jobs. */
+		return;
+	}
+
+	/* Lock needed in order to safely handle the job queues */
+	mali_scheduler_lock();
+
+	/* 1. Check the schedule if need to early out. */
+	if (mali_executor_schedule_is_early_out(&gpu_secure_mode_is_needed) == MALI_TRUE) {
+		mali_scheduler_unlock();
+		return;
+	}
+
+	/* 2. Activate gp firstly if have gp job queued. */
+	if ((gp_group_state == EXEC_STATE_INACTIVE)
+	    && (mali_scheduler_job_gp_count() > 0)
+	    && (gpu_secure_mode_is_needed == MALI_FALSE)) {
+
+		enum mali_group_state state =
+			mali_group_activate(gp_group);
+		if (state == MALI_GROUP_STATE_ACTIVE) {
+			/* Set GP group state to idle */
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			trigger_pm_update = MALI_TRUE;
+		}
+	}
+
+	/* 3. Prepare as many physical groups as needed/possible */
+
+	num_physical_needed = mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed);
+
+	/* On mali-450 platform, we don't need to enter in this block frequently. */
+	if (num_physical_needed > 0) {
+
+		if (num_physical_needed <= group_list_idle_count) {
+			/* We have enough groups on idle list already */
+			num_physical_to_process = num_physical_needed;
+			num_physical_needed = 0;
+		} else {
+			/* We need to get a hold of some more groups */
+			num_physical_to_process = group_list_idle_count;
+			num_physical_needed -= group_list_idle_count;
+		}
+
+		if (num_physical_needed > 0) {
+
+			/* 3.1. Activate groups which are inactive */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive,
+						    struct mali_group, executor_list) {
+				enum mali_group_state state =
+					mali_group_activate(group);
+				if (state == MALI_GROUP_STATE_ACTIVE) {
+					/* Move from inactive to idle */
+					mali_executor_change_state_pp_physical(group,
+									       &group_list_inactive,
+									       &group_list_inactive_count,
+									       &group_list_idle,
+									       &group_list_idle_count);
+					num_physical_to_process++;
+				} else {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				num_physical_needed--;
+				if (num_physical_needed == 0) {
+					/* We have activated all the groups we need */
+					break;
+				}
+			}
+		}
+
+		if (mali_executor_virtual_group_is_usable()) {
+
+			/*
+			 * 3.2. And finally, steal and activate groups
+			 * from virtual group if we need even more
+			 */
+			while (num_physical_needed > 0) {
+				struct mali_group *group;
+
+				group = mali_group_acquire_group(virtual_group);
+				if (group != NULL) {
+					enum mali_group_state state;
+
+					mali_executor_disable_empty_virtual();
+
+					state = mali_group_activate(group);
+					if (state == MALI_GROUP_STATE_ACTIVE) {
+						/* Group is ready, add to idle list */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_idle);
+						group_list_idle_count++;
+						num_physical_to_process++;
+					} else {
+						/*
+						 * Group is not ready yet,
+						 * add to inactive list
+						 */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_inactive);
+						group_list_inactive_count++;
+
+						trigger_pm_update = MALI_TRUE;
+					}
+					num_physical_needed--;
+				} else {
+					/*
+					 * We could not get enough groups
+					 * from the virtual group.
+					 */
+					break;
+				}
+			}
+		}
+
+		/* 3.3. Assign physical jobs to groups */
+
+		if (num_physical_to_process > 0) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle,
+						    struct mali_group, executor_list) {
+				struct mali_pp_job *job = NULL;
+				u32 sub_job = MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+
+				MALI_DEBUG_ASSERT(num_jobs_to_start <
+						  MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				MALI_DEBUG_ASSERT(0 <
+						  mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed));
+
+				/* If the next pp job is non-protected, check if gp bound now. */
+				if ((gpu_secure_mode_is_needed == MALI_FALSE)
+				    && (mali_executor_hint_is_enabled(MALI_EXECUTOR_HINT_GP_BOUND))
+				    && (mali_executor_tackle_gp_bound() == MALI_TRUE)) {
+					/*
+					* We're gp bound,
+					* don't start this right now.
+					*/
+					deactivate_idle_group = MALI_FALSE;
+					num_physical_to_process = 0;
+					break;
+				}
+
+				job = mali_scheduler_job_pp_physical_get(
+					      &sub_job);
+
+				if (gpu_secure_mode_is_needed == MALI_FALSE) {
+					MALI_DEBUG_ASSERT(mali_pp_job_is_protected_job(job) == MALI_FALSE);
+				} else {
+					MALI_DEBUG_ASSERT(mali_pp_job_is_protected_job(job) == MALI_TRUE);
+				}
+
+				MALI_DEBUG_ASSERT_POINTER(job);
+				MALI_DEBUG_ASSERT(sub_job <= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				/* Put job + group on list of jobs to start later on */
+
+				groups_to_start[num_jobs_to_start] = group;
+				jobs_to_start[num_jobs_to_start] = job;
+				sub_jobs_to_start[num_jobs_to_start] = sub_job;
+				num_jobs_to_start++;
+
+				/* Move group from idle to working */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_working,
+								       &group_list_working_count);
+
+				num_physical_to_process--;
+				if (num_physical_to_process == 0) {
+					/* Got all we needed */
+					break;
+				}
+			}
+		}
+	}
+
+	/* 4. Deactivate idle pp group , must put deactive here before active vitual group
+	 *    for cover case first only has physical job in normal queue but group inactive,
+	 *    so delay the job start go to active group, when group activated,
+	 *    call scheduler again, but now if we get high queue virtual job,
+	 *    we will do nothing in schedule cause executor schedule stop
+	 */
+
+	if (MALI_TRUE == mali_executor_deactivate_list_idle(deactivate_idle_group
+			&& (!mali_timeline_has_physical_pp_job()))) {
+		trigger_pm_update = MALI_TRUE;
+	}
+
+	/* 5. Activate virtual group, if needed */
+	if (EXEC_STATE_INACTIVE == virtual_group_state &&
+	    MALI_TRUE ==  mali_scheduler_job_next_is_virtual()) {
+		struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek();
+
+		if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job))
+		    || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) {
+			enum mali_group_state state =
+				mali_group_activate(virtual_group);
+			if (state == MALI_GROUP_STATE_ACTIVE) {
+				/* Set virtual group state to idle */
+				virtual_group_state = EXEC_STATE_IDLE;
+			} else {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 6. To power up group asap,  trigger pm update only when no need to swith the gpu mode. */
+
+	is_gpu_secure_mode = _mali_osk_gpu_secure_mode_is_enabled();
+
+	if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == is_gpu_secure_mode)
+	    || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == is_gpu_secure_mode)) {
+		if (trigger_pm_update == MALI_TRUE) {
+			trigger_pm_update = MALI_FALSE;
+			mali_pm_update_async();
+		}
+	}
+
+	/* 7. Assign jobs to idle virtual group (or deactivate if no job) */
+
+	if (virtual_group_state == EXEC_STATE_IDLE) {
+		if (mali_scheduler_job_next_is_virtual() == MALI_TRUE) {
+			struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek();
+
+			if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job))
+			    || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) {
+				virtual_job_to_start =
+					mali_scheduler_job_pp_virtual_get();
+				virtual_group_state = EXEC_STATE_WORKING;
+			}
+		} else if (!mali_timeline_has_virtual_pp_job()) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+
+			if (mali_group_deactivate(virtual_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 8. Assign job to idle GP group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == gp_group_state && MALI_FALSE == gpu_secure_mode_is_needed) {
+		if (mali_scheduler_job_gp_count() > 0) {
+			gp_job_to_start = mali_scheduler_job_gp_get();
+			gp_group_state = EXEC_STATE_WORKING;
+		} else if (!mali_timeline_has_gp_job()) {
+			gp_group_state = EXEC_STATE_INACTIVE;
+			if (mali_group_deactivate(gp_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 9. We no longer need the schedule/queue lock */
+
+	mali_scheduler_unlock();
+
+	/* 10. start jobs */
+	if (virtual_job_to_start != NULL) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(virtual_group));
+		mali_group_start_pp_job(virtual_group,
+					virtual_job_to_start, 0, is_gpu_secure_mode);
+	}
+
+	for (i = 0; i < num_jobs_to_start; i++) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(
+					  groups_to_start[i]));
+		mali_group_start_pp_job(groups_to_start[i],
+					jobs_to_start[i],
+					sub_jobs_to_start[i], is_gpu_secure_mode);
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(gp_group);
+
+	if (gp_job_to_start != NULL) {
+		MALI_DEBUG_ASSERT(!mali_group_gp_is_active(gp_group));
+		mali_group_start_gp_job(gp_group, gp_job_to_start, is_gpu_secure_mode);
+	}
+
+	/* 11. Trigger any pending PM updates */
+	if (trigger_pm_update == MALI_TRUE) {
+		mali_pm_update_async();
+	}
+}
+
+/* Handler for deferred schedule requests */
+static void mali_executor_wq_schedule(void *arg)
+{
+	MALI_IGNORE(arg);
+	mali_executor_lock();
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job)
+{
+	_mali_uk_gp_job_suspended_s *jobres;
+	_mali_osk_notification_t *notification;
+
+	notification = mali_gp_job_get_oom_notification(job);
+
+	/*
+	 * Remember the id we send to user space, so we have something to
+	 * verify when we get a response
+	 */
+	gp_returned_cookie = mali_gp_job_get_id(job);
+
+	jobres = (_mali_uk_gp_job_suspended_s *)notification->result_buffer;
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	jobres->cookie = gp_returned_cookie;
+
+	mali_session_send_notification(mali_gp_job_get_session(job),
+				       notification);
+}
+static struct mali_gp_job *mali_executor_complete_gp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_gp(group, success);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	gp_group_state = EXEC_STATE_IDLE;
+
+	/* This will potentially queue more GP and PP jobs */
+	mali_timeline_tracker_release(&job->tracker);
+
+	/* Signal PP job */
+	mali_gp_job_signal_pp_tracker(job, success);
+
+	return job;
+}
+
+static struct mali_pp_job *mali_executor_complete_pp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_pp_job *job;
+	u32 sub_job;
+	mali_bool job_is_done;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_pp(group, success, &sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	if (mali_group_is_virtual(group)) {
+		virtual_group_state = EXEC_STATE_IDLE;
+	} else {
+		/* Move from working to idle state */
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_working,
+						       &group_list_working_count,
+						       &group_list_idle,
+						       &group_list_idle_count);
+	}
+
+	/* It is the executor module which owns the jobs themselves by now */
+	mali_pp_job_mark_sub_job_completed(job, success);
+	job_is_done = mali_pp_job_is_complete(job);
+
+	if (job_is_done) {
+		/* This will potentially queue more GP and PP jobs */
+		mali_timeline_tracker_release(&job->tracker);
+	}
+
+	return job;
+}
+
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done)
+{
+	struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+	struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	mali_bool pp_job_is_done = MALI_TRUE;
+
+	if (gp_core != NULL) {
+		gp_job = mali_executor_complete_gp(group, success);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(pp_core);
+		MALI_IGNORE(pp_core);
+		pp_job = mali_executor_complete_pp(group, success);
+
+		pp_job_is_done = mali_pp_job_is_complete(pp_job);
+	}
+
+	if (pause_count > 0) {
+		/* Execution has been suspended */
+
+		if (!mali_executor_is_working()) {
+			/* Last job completed, wake up sleepers */
+			_mali_osk_wait_queue_wake_up(
+				executor_working_wait_queue);
+		}
+	} else if (mali_group_disable_requested(group) == MALI_TRUE) {
+		mali_executor_core_scale_in_group_complete(group);
+
+		mali_executor_schedule();
+	} else {
+		/* try to schedule new jobs */
+		mali_executor_schedule();
+	}
+
+	if (gp_job != NULL) {
+		MALI_DEBUG_ASSERT_POINTER(gp_job_done);
+		*gp_job_done = gp_job;
+	} else if (pp_job_is_done) {
+		MALI_DEBUG_ASSERT_POINTER(pp_job);
+		MALI_DEBUG_ASSERT_POINTER(pp_job_done);
+		*pp_job_done = pp_job;
+	}
+}
+
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	/*
+	 * It's a bit more complicated to change the state for the physical PP
+	 * groups since their state is determined by the list they are on.
+	 */
+#if defined(DEBUG)
+	mali_bool found = MALI_FALSE;
+	struct mali_group *group_iter;
+	struct mali_group *temp;
+	u32 old_counted = 0;
+	u32 new_counted = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(old_list);
+	MALI_DEBUG_ASSERT_POINTER(old_count);
+	MALI_DEBUG_ASSERT_POINTER(new_list);
+	MALI_DEBUG_ASSERT_POINTER(new_count);
+
+	/*
+	 * Verify that group is present on old list,
+	 * and that the count is correct
+	 */
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, old_list,
+				    struct mali_group, executor_list) {
+		old_counted++;
+		if (group == group_iter) {
+			found = MALI_TRUE;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, new_list,
+				    struct mali_group, executor_list) {
+		new_counted++;
+	}
+
+	if (found == MALI_FALSE) {
+		if (old_list == &group_list_idle) {
+			MALI_DEBUG_PRINT(1, (" old Group list is idle,"));
+		} else if (old_list == &group_list_inactive) {
+			MALI_DEBUG_PRINT(1, (" old Group list is inactive,"));
+		} else if (old_list == &group_list_working) {
+			MALI_DEBUG_PRINT(1, (" old Group list is working,"));
+		} else if (old_list == &group_list_disabled) {
+			MALI_DEBUG_PRINT(1, (" old Group list is disable,"));
+		}
+
+		if (mali_executor_group_is_in_state(group, EXEC_STATE_WORKING) == MALI_TRUE) {
+			MALI_DEBUG_PRINT(1, (" group in working\n"));
+		} else if (mali_executor_group_is_in_state(group, EXEC_STATE_INACTIVE) == MALI_TRUE) {
+			MALI_DEBUG_PRINT(1, (" group in inactive\n"));
+		} else if (mali_executor_group_is_in_state(group, EXEC_STATE_IDLE) == MALI_TRUE) {
+			MALI_DEBUG_PRINT(1, (" group in idle\n"));
+		} else if (mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED) == MALI_TRUE) {
+			MALI_DEBUG_PRINT(1, (" but group in disabled\n"));
+		}
+	}
+
+	MALI_DEBUG_ASSERT(found == MALI_TRUE);
+	MALI_DEBUG_ASSERT(0 < (*old_count));
+	MALI_DEBUG_ASSERT((*old_count) == old_counted);
+	MALI_DEBUG_ASSERT((*new_count) == new_counted);
+#endif
+
+	_mali_osk_list_move(&group->executor_list, new_list);
+	(*old_count)--;
+	(*new_count)++;
+}
+
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	_mali_osk_list_add(&group->executor_list, new_list);
+	(*new_count)++;
+}
+
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (gp_group == group) {
+		if (gp_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else if (virtual_group == group || mali_group_is_in_virtual(group)) {
+		if (virtual_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else {
+		/* Physical PP group */
+		struct mali_group *group_iter;
+		struct mali_group *temp;
+		_mali_osk_list_t *list;
+
+		if (state == EXEC_STATE_DISABLED) {
+			list = &group_list_disabled;
+		} else if (state == EXEC_STATE_INACTIVE) {
+			list = &group_list_inactive;
+		} else if (state == EXEC_STATE_IDLE) {
+			list = &group_list_idle;
+		} else {
+			MALI_DEBUG_ASSERT(state == EXEC_STATE_WORKING);
+			list = &group_list_working;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, list,
+					    struct mali_group, executor_list) {
+			if (group_iter == group) {
+				return MALI_TRUE;
+			}
+		}
+	}
+
+	/* group not in correct state */
+	return MALI_FALSE;
+}
+
+static void mali_executor_group_enable_internal(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	/* Put into inactive state (== "lowest" enabled state) */
+	if (group == gp_group) {
+		MALI_DEBUG_ASSERT(gp_group_state == EXEC_STATE_DISABLED);
+		gp_group_state = EXEC_STATE_INACTIVE;
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_disabled,
+						       &group_list_disabled_count,
+						       &group_list_inactive,
+						       &group_list_inactive_count);
+
+		++num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Enabling group id %d\n", group->pp_core->core_id));
+	}
+
+	if (mali_group_activate(group) == MALI_GROUP_STATE_ACTIVE) {
+		MALI_DEBUG_ASSERT(mali_group_power_is_on(group) == MALI_TRUE);
+
+		/* Move from inactive to idle */
+		if (group == gp_group) {
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			mali_executor_change_state_pp_physical(group,
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+
+			if (mali_executor_has_virtual_group()) {
+				if (mali_executor_physical_rejoin_virtual(group)) {
+					mali_pm_update_async();
+				}
+			}
+		}
+	} else {
+		mali_pm_update_async();
+	}
+}
+
+static void mali_executor_group_disable_internal(struct mali_group *group)
+{
+	mali_bool working;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	working = mali_executor_group_is_in_state(group, EXEC_STATE_WORKING);
+	if (working == MALI_TRUE) {
+		/** Group to be disabled once it completes current work,
+		 * when virtual group completes, also check child groups for this flag */
+		mali_group_set_disable_request(group, MALI_TRUE);
+		return;
+	}
+
+	/* Put into disabled state */
+	if (group == gp_group) {
+		/* GP group */
+		MALI_DEBUG_ASSERT(gp_group_state != EXEC_STATE_WORKING);
+		gp_group_state = EXEC_STATE_DISABLED;
+	} else {
+		if (mali_group_is_in_virtual(group)) {
+			/* A child group of virtual group. move the specific group from virtual group */
+			MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_WORKING);
+
+			mali_executor_set_state_pp_physical(group,
+							    &group_list_disabled,
+							    &group_list_disabled_count);
+
+			mali_group_remove_group(virtual_group, group);
+			mali_executor_disable_empty_virtual();
+		} else {
+			mali_executor_change_group_status_disabled(group);
+		}
+
+		--num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Disabling group id %d\n", group->pp_core->core_id));
+	}
+
+	if (group->state != MALI_GROUP_STATE_INACTIVE) {
+		if (mali_group_deactivate(group) == MALI_TRUE) {
+			mali_pm_update_async();
+		}
+	}
+}
+
+static void mali_executor_notify_core_change(u32 num_cores)
+{
+	mali_bool done = MALI_FALSE;
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		return;
+	}
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (num_sessions_alloc == 0) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (notobjs == NULL) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			/* there is probably no point in trying again, system must be really low on memory and probably unusable now anyway */
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE, sizeof(_mali_uk_pp_num_cores_changed_s));
+			if (notobjs[i] != NULL) {
+				_mali_uk_pp_num_cores_changed_s *data = notobjs[i]->result_buffer;
+
+				data->number_of_enabled_cores = num_cores;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (notobjs[used_notification_objects] != NULL) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (notobjs[used_notification_objects] != NULL) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+static mali_bool mali_executor_core_scaling_is_done(void *data)
+{
+	u32 i;
+	u32 num_groups;
+	mali_bool ret = MALI_TRUE;
+
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (group != NULL) {
+			if (MALI_TRUE == group->disable_requested && NULL != mali_group_get_pp_core(group)) {
+				ret = MALI_FALSE;
+				break;
+			}
+		}
+	}
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static void mali_executor_wq_notify_core_change(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		return;
+	}
+
+	_mali_osk_wait_queue_wait_event(executor_notify_core_change_wait_queue,
+					mali_executor_core_scaling_is_done, NULL);
+
+	mali_executor_notify_core_change(num_physical_pp_cores_enabled);
+}
+
+/**
+ * Clear all disable request from the _last_ core scaling behavior.
+ */
+static void mali_executor_core_scaling_reset(void)
+{
+	u32 i;
+	u32 num_groups;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (group != NULL) {
+			group->disable_requested = MALI_FALSE;
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		core_scaling_delay_up_mask[i] = 0;
+	}
+}
+
+static void mali_executor_core_scale(unsigned int target_core_nr)
+{
+	int current_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int target_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int i;
+
+	MALI_DEBUG_ASSERT(target_core_nr > 0);
+	MALI_DEBUG_ASSERT(num_physical_pp_cores_total >= target_core_nr);
+
+	mali_executor_lock();
+
+	if (target_core_nr < num_physical_pp_cores_enabled) {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: disabling %d cores\n", target_core_nr, num_physical_pp_cores_enabled - target_core_nr));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: enabling %d cores\n", target_core_nr, target_core_nr - num_physical_pp_cores_enabled));
+	}
+
+	/* When a new core scaling request is comming,  we should remove the un-doing
+	 * part of the last core scaling request.  It's safe because we have only one
+	 * lock(executor lock) protection. */
+	mali_executor_core_scaling_reset();
+
+	mali_pm_get_best_power_cost_mask(num_physical_pp_cores_enabled, current_core_scaling_mask);
+	mali_pm_get_best_power_cost_mask(target_core_nr, target_core_scaling_mask);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		target_core_scaling_mask[i] = target_core_scaling_mask[i] - current_core_scaling_mask[i];
+		MALI_DEBUG_PRINT(5, ("target_core_scaling_mask[%d] = %d\n", i, target_core_scaling_mask[i]));
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (target_core_scaling_mask[i] < 0) {
+			struct mali_pm_domain *domain;
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((domain != NULL) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_disable_internal(group);
+						target_core_scaling_mask[i]++;
+						if ((target_core_scaling_mask[i] == 0)) {
+							break;
+						}
+
+					}
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		/**
+		 * Target_core_scaling_mask[i] is bigger than 0,
+		 * means we need to enable some pp cores in
+		 * this domain whose domain index is i.
+		 */
+		if (target_core_scaling_mask[i] > 0) {
+			struct mali_pm_domain *domain;
+
+			if (num_physical_pp_cores_enabled >= target_core_nr) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((domain != NULL) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_enable_internal(group);
+						target_core_scaling_mask[i]--;
+
+						if ((target_core_scaling_mask[i] == 0) || num_physical_pp_cores_enabled == target_core_nr) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * Here, we may still have some pp cores not been enabled because of some
+	 * pp cores need to be disabled are still in working state.
+	 */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (target_core_scaling_mask[i] > 0) {
+			core_scaling_delay_up_mask[i] = target_core_scaling_mask[i];
+		}
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group)
+{
+	int num_pp_cores_disabled = 0;
+	int num_pp_cores_to_enable = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_disable_requested(group) == MALI_TRUE);
+
+	/* Disable child group of virtual group */
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *child;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			if (mali_group_disable_requested(child) == MALI_TRUE) {
+				mali_group_set_disable_request(child, MALI_FALSE);
+				mali_executor_group_disable_internal(child);
+				num_pp_cores_disabled++;
+			}
+		}
+		mali_group_set_disable_request(group, MALI_FALSE);
+	} else {
+		mali_executor_group_disable_internal(group);
+		mali_group_set_disable_request(group, MALI_FALSE);
+		if (mali_group_get_pp_core(group) != NULL) {
+			num_pp_cores_disabled++;
+		}
+	}
+
+	num_pp_cores_to_enable = num_pp_cores_disabled;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (core_scaling_delay_up_mask[i] > 0) {
+			struct mali_pm_domain *domain;
+
+			if (num_pp_cores_to_enable == 0) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((domain != NULL) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *disabled_group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(disabled_group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(disabled_group) && mali_executor_group_is_in_state(disabled_group, EXEC_STATE_DISABLED)) {
+						mali_executor_group_enable_internal(disabled_group);
+						core_scaling_delay_up_mask[i]--;
+						num_pp_cores_to_enable--;
+
+						if ((0 == core_scaling_delay_up_mask[i]) || 0 == num_pp_cores_to_enable) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	_mali_osk_wait_queue_wake_up(executor_notify_core_change_wait_queue);
+}
+
+static void mali_executor_change_group_status_disabled(struct mali_group *group)
+{
+	/* Physical PP group */
+	mali_bool idle;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	idle = mali_executor_group_is_in_state(group, EXEC_STATE_IDLE);
+	if (idle == MALI_TRUE) {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_idle,
+						       &group_list_idle_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_inactive,
+						       &group_list_inactive_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	}
+}
+
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	if (group_list_idle_count > 0) {
+		if (mali_executor_has_virtual_group()) {
+
+			/* Rejoin virtual group on Mali-450 */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_executor_physical_rejoin_virtual(
+					    group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+			}
+		} else if (deactivate_idle_group) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			/* Deactivate group on Mali-300/400 */
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_group_deactivate(group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				/* Move from idle to inactive */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_inactive,
+								       &group_list_inactive_count);
+			}
+		}
+	}
+
+	return trigger_pm_update;
+}
+
+void mali_executor_running_status_print(void)
+{
+	struct mali_group *group = NULL;
+	struct mali_group *temp = NULL;
+
+	MALI_PRINT(("GP running job: %p\n", gp_group->gp_running_job));
+	if ((gp_group->gp_core) && (gp_group->is_working)) {
+		mali_group_dump_status(gp_group);
+	}
+	MALI_PRINT(("Physical PP groups in WORKING state (count = %u):\n", group_list_working_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		MALI_PRINT(("PP running job: %p, subjob %d\n", group->pp_running_job, group->pp_running_sub_job));
+		mali_group_dump_status(group);
+	}
+	MALI_PRINT(("Physical PP groups in INACTIVE state (count = %u):\n", group_list_inactive_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+	MALI_PRINT(("Physical PP groups in IDLE state (count = %u):\n", group_list_idle_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+	MALI_PRINT(("Physical PP groups in DISABLED state (count = %u):\n", group_list_disabled_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		MALI_PRINT(("Virtual group running job: %p\n", virtual_group->pp_running_job));
+		MALI_PRINT(("Virtual group status: %d\n", virtual_group_state));
+		MALI_PRINT(("Virtual group->status: %d\n", virtual_group->state));
+		MALI_PRINT(("\tSW power: %s\n", virtual_group->power_is_on ? "On" : "Off"));
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp, &virtual_group->group_list,
+					    struct mali_group, group_list) {
+			int i = 0;
+
+			MALI_PRINT(("\tchild group(%s) running job: %p\n", group->pp_core->hw_core.description, group->pp_running_job));
+			MALI_PRINT(("\tchild group(%s)->status: %d\n", group->pp_core->hw_core.description, group->state));
+			MALI_PRINT(("\tchild group(%s) SW power: %s\n", group->pp_core->hw_core.description, group->power_is_on ? "On" : "Off"));
+#if MALI_STATE_TRACKING
+			if (group->pm_domain) {
+				MALI_PRINT(("\tPower domain: id %u\n", mali_pm_domain_get_id(group->pm_domain)));
+				MALI_PRINT(("\tMask:0x%04x\n", mali_pm_domain_get_mask(group->pm_domain)));
+				MALI_PRINT(("\tUse-count:%u\n", mali_pm_domain_get_use_count(group->pm_domain)));
+				MALI_PRINT(("\tCurrent power status:%s\n", (mali_pm_domain_get_mask(group->pm_domain) & mali_pm_get_current_mask()) ? "On" : "Off"));
+				MALI_PRINT(("\tWanted  power status:%s\n", (mali_pm_domain_get_mask(group->pm_domain) & mali_pm_get_wanted_mask()) ? "On" : "Off"));
+			}
+#endif
+
+			for (i = 0; i < 2; i++) {
+				if (group->l2_cache_core[i] != NULL) {
+					struct mali_pm_domain *domain;
+
+					domain = mali_l2_cache_get_pm_domain(group->l2_cache_core[i]);
+					MALI_PRINT(("\t L2(index %d) group SW power: %s\n", i, group->l2_cache_core[i]->power_is_on ? "On" : "Off"));
+#if MALI_STATE_TRACKING
+					if (domain) {
+						MALI_PRINT(("\tL2 Power domain: id %u\n", mali_pm_domain_get_id(domain)));
+						MALI_PRINT(("\tL2 Mask:0x%04x\n", mali_pm_domain_get_mask(domain)));
+						MALI_PRINT(("\tL2 Use-count:%u\n", mali_pm_domain_get_use_count(domain)));
+						MALI_PRINT(("\tL2 Current power status:%s\n", (mali_pm_domain_get_mask(domain) & mali_pm_get_current_mask()) ? "On" : "Off"));
+						MALI_PRINT(("\tL2 Wanted  power status:%s\n", (mali_pm_domain_get_mask(domain) & mali_pm_get_wanted_mask()) ? "On" : "Off"));
+					}
+#endif
+				}
+			}
+		}
+		if (virtual_group_state == EXEC_STATE_WORKING) {
+			mali_group_dump_status(virtual_group);
+		}
+	}
+}
+
+void mali_executor_status_dump(void)
+{
+	mali_executor_lock();
+	mali_scheduler_lock();
+
+	/* print schedule queue status */
+	mali_scheduler_gp_pp_job_queue_print();
+
+	mali_scheduler_unlock();
+	mali_executor_unlock();
+}
diff --git a/utgard/r7p0/common/mali_executor.h b/utgard/r7p0/common/mali_executor.h
new file mode 100644
index 0000000..988693b
--- /dev/null
+++ b/utgard/r7p0/common/mali_executor.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2012, 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_EXECUTOR_H__
+#define __MALI_EXECUTOR_H__
+
+#include "mali_osk.h"
+#include "mali_scheduler_types.h"
+#include "mali_kernel_common.h"
+
+typedef enum {
+	MALI_EXECUTOR_HINT_GP_BOUND = 0
+#define MALI_EXECUTOR_HINT_MAX        1
+} mali_executor_hint;
+
+extern mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/* forward declare struct instead of using include */
+struct mali_session_data;
+struct mali_group;
+struct mali_pp_core;
+
+extern _mali_osk_spinlock_irq_t *mali_executor_lock_obj;
+
+#define MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+_mali_osk_errcode_t mali_executor_initialize(void);
+void mali_executor_terminate(void);
+
+void mali_executor_populate(void);
+void mali_executor_depopulate(void);
+
+void mali_executor_suspend(void);
+void mali_executor_resume(void);
+
+u32 mali_executor_get_num_cores_total(void);
+u32 mali_executor_get_num_cores_enabled(void);
+struct mali_pp_core *mali_executor_get_virtual_pp(void);
+struct mali_group *mali_executor_get_virtual_group(void);
+
+void mali_executor_zap_all_active(struct mali_session_data *session);
+
+/**
+ * Schedule GP and PP according to bitmask.
+ *
+ * @param mask A scheduling bitmask.
+ * @param deferred_schedule MALI_TRUE if schedule should be deferred, MALI_FALSE if not.
+ */
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule);
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group, mali_bool in_upper_half);
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups);
+void mali_executor_group_power_down(struct mali_group *groups[], u32 num_groups);
+
+void mali_executor_abort_session(struct mali_session_data *session);
+
+void mali_executor_core_scaling_enable(void);
+void mali_executor_core_scaling_disable(void);
+mali_bool mali_executor_core_scaling_is_enabled(void);
+
+void mali_executor_group_enable(struct mali_group *group);
+void mali_executor_group_disable(struct mali_group *group);
+mali_bool mali_executor_group_is_disabled(struct mali_group *group);
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override);
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size);
+#endif
+
+MALI_STATIC_INLINE void mali_executor_hint_enable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_executor_hint_disable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_executor_hint_is_enabled(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	return mali_executor_hints[hint];
+}
+
+void mali_executor_running_status_print(void);
+void mali_executor_status_dump(void);
+void mali_executor_lock(void);
+void mali_executor_unlock(void);
+#endif /* __MALI_EXECUTOR_H__ */
diff --git a/utgard/r7p0/common/mali_gp.c b/utgard/r7p0/common/mali_gp.c
new file mode 100644
index 0000000..2a1eef2
--- /dev/null
+++ b/utgard/r7p0/common/mali_gp.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "regs/mali_gp_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+#include <mali_platform.h>
+
+static struct mali_gp_core *mali_global_gp_core;
+
+/* Interrupt handlers */
+static void mali_gp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group)
+{
+	struct mali_gp_core *core = NULL;
+
+	MALI_DEBUG_ASSERT(mali_global_gp_core == NULL);
+	MALI_DEBUG_PRINT(2, ("Mali GP: Creating Mali GP core: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_gp_core));
+	if (core != NULL) {
+		if (mali_hw_core_create(&core->hw_core, resource, MALIGP2_REGISTER_ADDRESS_SPACE_SIZE) == _MALI_OSK_ERR_OK) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_gp_reset(core);
+
+			if (ret == _MALI_OSK_ERR_OK) {
+				ret = mali_group_add_gp_core(group, core);
+				if (ret == _MALI_OSK_ERR_OK) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_gp,
+								       group,
+								       mali_gp_irq_probe_trigger,
+								       mali_gp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (core->irq != NULL) {
+						MALI_DEBUG_PRINT(4, ("Mali GP: set global gp core from 0x%08X to 0x%08X\n", mali_global_gp_core, core));
+						mali_global_gp_core = core;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali GP: Failed to setup interrupt handlers for GP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_gp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali GP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for GP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_gp_delete(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+	mali_global_gp_core = NULL;
+	_mali_osk_free(core);
+}
+
+void mali_gp_stop_bus(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_gp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_SLOW; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS) & MALIGP2_REG_VAL_STATUS_BUS_STOPPED) {
+			break;
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_SLOW) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to stop bus on %s\n", core->hw_core.description));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_gp_hard_reset(struct mali_gp_core *core)
+{
+	const u32 reset_wait_target_register = MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	const u32 reset_default_value = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(4, ("Mali GP: Hard reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali GP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_default_value); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+}
+
+void mali_gp_reset_async(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALI400GP_REG_VAL_IRQ_RESET_COMPLETED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALI400GP_REG_VAL_CMD_SOFT_RESET);
+
+}
+
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		rawstat = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+		if (rawstat & MALI400GP_REG_VAL_IRQ_RESET_COMPLETED) {
+			break;
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core)
+{
+	mali_gp_reset_async(core);
+	return mali_gp_reset_wait(core);
+}
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 startcmd = 0;
+	u32 *frame_registers = mali_gp_job_get_frame_registers(job);
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	if (mali_gp_job_has_vs_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_VS;
+	}
+
+	if (mali_gp_job_has_plbu_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_PLBU;
+	}
+
+	MALI_DEBUG_ASSERT(startcmd != 0);
+
+	mali_hw_core_register_write_array_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR, frame_registers, MALIGP2_NUM_REGS_FRAME);
+
+	if (counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+	if (counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali GP: Starting job (0x%08x) on core %s with command 0x%08X\n", job, core->hw_core.description, startcmd));
+
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_GP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, startcmd);
+#else
+	{
+		u32 bits = 0;
+
+		if (mali_gp_job_has_vs_job(job))
+			bits = MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+		if (mali_gp_job_has_plbu_job(job))
+			bits |= MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+
+		mali_hw_core_register_write_relaxed(&core->hw_core,
+						    MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, bits);
+	}
+#endif
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr)
+{
+	u32 irq_readout;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+
+	if (irq_readout & MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | MALIGP2_REG_VAL_IRQ_HANG));
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); /* re-enable interrupts */
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR, start_addr);
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR, end_addr);
+
+		MALI_DEBUG_PRINT(3, ("Mali GP: Resuming job\n"));
+
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+		_mali_osk_write_mem_barrier();
+	}
+	/*
+	 * else: core has been reset between PLBU_OUT_OF_MEM interrupt and this new heap response.
+	 * A timeout or a page fault on Mali-200 PP core can cause this behaviour.
+	 */
+}
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void)
+{
+	return mali_global_gp_core;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_gp_irq_probe_trigger(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT);
+	if (MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+/* ------ local helper functions below --------- */
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tGP: %s\n", core->hw_core.description);
+
+	return n;
+}
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	if (counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		val0 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_gp_job_set_perf_counter_value0(job, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C0, val0);
+		_mali_osk_profiling_record_global_counters(COUNTER_VP_0_C0, val0);
+#endif
+
+	}
+
+	if (counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		val1 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_gp_job_set_perf_counter_value1(job, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C1, val1);
+		_mali_osk_profiling_record_global_counters(COUNTER_VP_0_C1, val1);
+#endif
+	}
+}
diff --git a/utgard/r7p0/common/mali_gp.h b/utgard/r7p0/common/mali_gp.h
new file mode 100644
index 0000000..fe73906
--- /dev/null
+++ b/utgard/r7p0/common/mali_gp.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_H__
+#define __MALI_GP_H__
+
+#include "mali_osk.h"
+#include "mali_gp_job.h"
+#include "mali_hw_core.h"
+#include "regs/mali_gp_regs.h"
+
+struct mali_group;
+
+/**
+ * Definition of the GP core struct
+ * Used to track a GP core in the system.
+ */
+struct mali_gp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_gp_initialize(void);
+void mali_gp_terminate(void);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group);
+void mali_gp_delete(struct mali_gp_core *core);
+
+void mali_gp_stop_bus(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core);
+void mali_gp_reset_async(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core);
+void mali_gp_hard_reset(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core);
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job);
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr);
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core);
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size);
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job);
+
+MALI_STATIC_INLINE const char *mali_gp_core_description(struct mali_gp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_gp_get_interrupt_result(struct mali_gp_core *core)
+{
+	u32 stat_used = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT) &
+			MALIGP2_REG_VAL_IRQ_MASK_USED;
+
+	if (stat_used == 0) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if ((MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST |
+		    MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST) == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	} else if (stat_used == MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_VS;
+	} else if (stat_used == MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_PLBU;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM & stat_used) {
+		return MALI_INTERRUPT_RESULT_OOM;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_get_rawstat(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_is_active(struct mali_gp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS);
+
+	return (status & MALIGP2_REG_VAL_STATUS_MASK_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_gp_mask_all_interrupts(struct mali_gp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_gp_enable_interrupts(struct mali_gp_core *core, enum mali_interrupt_result exceptions)
+{
+	/* Enable all interrupts, except those specified in exceptions */
+	u32 value;
+
+	if (exceptions == MALI_INTERRUPT_RESULT_SUCCESS_VS) {
+		/* Enable all used except VS complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+	} else {
+		MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_SUCCESS_PLBU ==
+				  exceptions);
+		/* Enable all used except PLBU complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+	}
+
+	mali_hw_core_register_write(&core->hw_core,
+				    MALIGP2_REG_ADDR_MGMT_INT_MASK,
+				    value);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_read_plbu_alloc_start_addr(struct mali_gp_core *core)
+{
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR);
+}
+
+#endif /* __MALI_GP_H__ */
diff --git a/utgard/r7p0/common/mali_gp_job.c b/utgard/r7p0/common/mali_gp_job.c
new file mode 100644
index 0000000..89db3fa
--- /dev/null
+++ b/utgard/r7p0/common/mali_gp_job.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_defer_bind.h"
+
+static u32 gp_counter_src0 = MALI_HW_CORE_NO_COUNTER;      /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 gp_counter_src1 = MALI_HW_CORE_NO_COUNTER;           /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job);
+
+
+static int _mali_gp_add_varying_allocations(struct mali_session_data *session,
+		struct mali_gp_job *job,
+		u32 *alloc,
+		u32 num)
+{
+	int i = 0;
+	struct mali_gp_allocation_node *alloc_node;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	for (i = 0 ; i < num ; i++) {
+		MALI_DEBUG_ASSERT(alloc[i]);
+		alloc_node = _mali_osk_calloc(1, sizeof(struct mali_gp_allocation_node));
+		if (alloc_node) {
+			INIT_LIST_HEAD(&alloc_node->node);
+			/* find mali allocation structure by vaddress*/
+			mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, alloc[i], 0);
+
+			if (likely(mali_vma_node)) {
+				mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+				MALI_DEBUG_ASSERT(alloc[i] == mali_vma_node->vm_node.start);
+			} else {
+				MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,can't find allocation %d by address =0x%x, num=%d\n", i, alloc[i], num));
+				_mali_osk_free(alloc_node);
+				goto fail;
+			}
+			alloc_node->alloc = mali_alloc;
+			/* add to gp job varying alloc list*/
+			list_move(&alloc_node->node, &job->varying_alloc);
+		} else
+			goto fail;
+	}
+
+	return 0;
+fail:
+	MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,failed to alloc memory!\n"));
+	_mali_gp_del_varying_allocations(job);
+	return -1;
+}
+
+
+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job)
+{
+	struct mali_gp_allocation_node *alloc_node, *tmp_node;
+
+	list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) {
+		list_del(&alloc_node->node);
+		kfree(alloc_node);
+	}
+	INIT_LIST_HEAD(&job->varying_alloc);
+}
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker)
+{
+	struct mali_gp_job *job;
+	u32 perf_counter_flag;
+	u32 __user *memory_list = NULL;
+	struct mali_gp_allocation_node *alloc_node, *tmp_node;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_gp_job));
+	if (job != NULL) {
+		job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_FINISHED, sizeof(_mali_uk_gp_job_finished_s));
+		if (job->finished_notification == NULL) {
+			goto fail3;
+		}
+
+		job->oom_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_STALLED, sizeof(_mali_uk_gp_job_suspended_s));
+		if (job->oom_notification == NULL) {
+			goto fail2;
+		}
+
+		if (_mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_gp_start_job_s)) != 0) {
+			goto fail1;
+		}
+
+		perf_counter_flag = mali_gp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			mali_gp_job_set_perf_counter_src0(job, mali_gp_job_get_gp_counter_src0());
+			mali_gp_job_set_perf_counter_src1(job, mali_gp_job_get_gp_counter_src1());
+		}
+
+		_mali_osk_list_init(&job->list);
+		job->session = session;
+		job->id = id;
+		job->heap_current_addr = job->uargs.frame_registers[4];
+		job->perf_counter_value0 = 0;
+		job->perf_counter_value1 = 0;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+
+		INIT_LIST_HEAD(&job->varying_alloc);
+		INIT_LIST_HEAD(&job->vary_todo);
+		job->dmem = NULL;
+
+		if (job->uargs.deferred_mem_num > session->allocation_mgr.mali_allocation_num) {
+			MALI_PRINT_ERROR(("Mali GP job: The number of  varying buffer to defer bind  is invalid !\n"));
+			goto fail1;
+		}
+
+		/* add varying allocation list*/
+		if (job->uargs.deferred_mem_num > 0) {
+			/* copy varying list from user space*/
+			job->varying_list = _mali_osk_calloc(1, sizeof(u32) * job->uargs.deferred_mem_num);
+			if (!job->varying_list) {
+				MALI_PRINT_ERROR(("Mali GP job: allocate varying_list failed varying_alloc_num = %d !\n", job->uargs.deferred_mem_num));
+				goto fail1;
+			}
+
+			memory_list = (u32 __user *)(uintptr_t)uargs->deferred_mem_list;
+
+			if (_mali_osk_copy_from_user(job->varying_list, memory_list, sizeof(u32) * job->uargs.deferred_mem_num) != 0) {
+				MALI_PRINT_ERROR(("Mali GP job: Failed to copy varying list from user space!\n"));
+				goto fail;
+			}
+
+			if (unlikely(_mali_gp_add_varying_allocations(session, job, job->varying_list,
+					job->uargs.deferred_mem_num))) {
+				MALI_PRINT_ERROR(("Mali GP job: _mali_gp_add_varying_allocations failed!\n"));
+				goto fail;
+			}
+
+			/* do preparetion for each allocation */
+			list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) {
+				if (unlikely(mali_mem_defer_bind_allocation_prepare(alloc_node->alloc, &job->vary_todo, &job->required_varying_memsize) != _MALI_OSK_ERR_OK)) {
+					MALI_PRINT_ERROR(("Mali GP job: mali_mem_defer_bind_allocation_prepare failed!\n"));
+					goto fail;
+				}
+			}
+
+			_mali_gp_del_varying_allocations(job);
+
+			/* bind varying here, to avoid memory latency issue. */
+			{
+				struct mali_defer_mem_block dmem_block;
+
+				INIT_LIST_HEAD(&dmem_block.free_pages);
+				atomic_set(&dmem_block.num_free_pages, 0);
+
+				if (mali_mem_prepare_mem_for_job(job, &dmem_block)) {
+					MALI_PRINT_ERROR(("Mali GP job: mali_mem_prepare_mem_for_job failed!\n"));
+					goto fail;
+				}
+				if (mali_mem_defer_bind(job, &dmem_block) != _MALI_OSK_ERR_OK) {
+					MALI_PRINT_ERROR(("gp job create, mali_mem_defer_bind failed! GP %x fail!", job));
+					goto fail;
+				}
+			}
+
+			if (job->uargs.varying_memsize > MALI_UK_BIG_VARYING_SIZE) {
+				job->big_job = 1;
+			}
+		}
+		job->pp_tracker = pp_tracker;
+		if (job->pp_tracker != NULL) {
+			/* Take a reference on PP job's tracker that will be released when the GP
+			   job is done. */
+			mali_timeline_system_tracker_get(session->timeline_system, pp_tracker);
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_GP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		return job;
+	} else {
+		MALI_PRINT_ERROR(("Mali GP job: _mali_osk_calloc failed!\n"));
+		return NULL;
+	}
+
+
+fail:
+	_mali_osk_free(job->varying_list);
+	/* Handle allocate fail here, free all varying node */
+	{
+		struct mali_backend_bind_list *bkn, *bkn_tmp;
+
+		list_for_each_entry_safe(bkn, bkn_tmp, &job->vary_todo, node) {
+			list_del(&bkn->node);
+			_mali_osk_free(bkn);
+		}
+	}
+fail1:
+	_mali_osk_notification_delete(job->oom_notification);
+fail2:
+	_mali_osk_notification_delete(job->finished_notification);
+fail3:
+	_mali_osk_free(job);
+	return NULL;
+}
+
+void mali_gp_job_delete(struct mali_gp_job *job)
+{
+	struct mali_backend_bind_list *bkn, *bkn_tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(job->pp_tracker == NULL);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	_mali_osk_free(job->varying_list);
+
+	/* Handle allocate fail here, free all varying node */
+	list_for_each_entry_safe(bkn, bkn_tmp, &job->vary_todo, node) {
+		list_del(&bkn->node);
+		_mali_osk_free(bkn);
+	}
+
+	mali_mem_defer_dmem_free(job);
+
+	/* de-allocate the pre-allocated oom notifications */
+	if (job->oom_notification != NULL) {
+		_mali_osk_notification_delete(job->oom_notification);
+		job->oom_notification = NULL;
+	}
+	if (job->finished_notification != NULL) {
+		_mali_osk_notification_delete(job->finished_notification);
+		job->finished_notification = NULL;
+	}
+
+	_mali_osk_free(job);
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_gp_job *iter;
+	struct mali_gp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_gp_job, list) {
+
+		/* A span is used to handle job ID wrapping. */
+		bool job_is_after = (mali_gp_job_get_id(job) -
+				     mali_gp_job_get_id(iter)) <
+				    MALI_SCHEDULER_JOB_ID_SPAN;
+
+		if (job_is_after) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+u32 mali_gp_job_get_gp_counter_src0(void)
+{
+	return gp_counter_src0;
+}
+
+void mali_gp_job_set_gp_counter_src0(u32 counter)
+{
+	gp_counter_src0 = counter;
+}
+
+u32 mali_gp_job_get_gp_counter_src1(void)
+{
+	return gp_counter_src1;
+}
+
+void mali_gp_job_set_gp_counter_src1(u32 counter)
+{
+	gp_counter_src1 = counter;
+}
+
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (job->pp_tracker != NULL) {
+		schedule_mask |= mali_timeline_system_tracker_put(job->session->timeline_system, job->pp_tracker, MALI_FALSE == success);
+		job->pp_tracker = NULL;
+	}
+
+	return schedule_mask;
+}
diff --git a/utgard/r7p0/common/mali_gp_job.h b/utgard/r7p0/common/mali_gp_job.h
new file mode 100644
index 0000000..c59b6ad
--- /dev/null
+++ b/utgard/r7p0/common/mali_gp_job.h
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_JOB_H__
+#define __MALI_GP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_timeline.h"
+#include "mali_scheduler_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#include "mali_timeline.h"
+
+struct mali_defer_mem;
+/**
+ * This structure represents a GP job
+ *
+ * The GP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the GP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_gp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_gp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	struct mali_timeline_tracker *pp_tracker;          /**< Pointer to Timeline tracker for PP job that depends on this job. */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+
+	/*
+	 * These members are used by the executor and/or group,
+	 * protected by executor lock
+	 */
+	_mali_osk_notification_t *oom_notification;        /**< Notification sent back to userspace on OOM */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 heap_current_addr;                             /**< Holds the current HEAP address when the job has completed */
+	u32 perf_counter_value0;                           /**< Value of performance counter 0 (to be returned to user space) */
+	u32 perf_counter_value1;                           /**< Value of performance counter 1 (to be returned to user space) */
+	struct mali_defer_mem *dmem;                                          /** < used for defer bind to store dmem info */
+	struct list_head varying_alloc;                    /**< hold the list of varying allocations */
+	u32 bind_flag;                                     /** < flag for deferbind*/
+	u32 *varying_list;                                 /**< varying memory list need to to defer bind*/
+	struct list_head vary_todo;                        /**< list of backend list need to do defer bind*/
+	u32 required_varying_memsize;                      /** < size of varying memory to reallocate*/
+	u32 big_job;                                       /** < if the gp job have large varying output and may take long time*/
+};
+
+#define MALI_DEFER_BIND_MEMORY_PREPARED (0x1 << 0)
+#define MALI_DEFER_BIND_MEMORY_BINDED (0x1 << 2)
+
+struct mali_gp_allocation_node {
+	struct list_head node;
+	mali_mem_allocation *alloc;
+};
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker);
+void mali_gp_job_delete(struct mali_gp_job *job);
+
+u32 mali_gp_job_get_gp_counter_src0(void);
+void mali_gp_job_set_gp_counter_src0(u32 counter);
+u32 mali_gp_job_get_gp_counter_src1(void);
+void mali_gp_job_set_gp_counter_src1(u32 counter);
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job == NULL) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_cache_order(struct mali_gp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_cache_order(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job == NULL) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_gp_job_get_user_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_frame_builder_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_flush_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_pid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_tid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_frame_registers(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_gp_job_get_session(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_vs_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[0] != job->uargs.frame_registers[1]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_plbu_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[2] != job->uargs.frame_registers[3]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_current_heap_addr(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->heap_current_addr;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_current_heap_addr(struct mali_gp_job *job, u32 heap_addr)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->heap_current_addr = heap_addr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_flag(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src1;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src0(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src0 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src1(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src1 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value0(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0 = value;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value1(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1 = value;
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_gp_job_list_move(struct mali_gp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_gp_job_list_remove(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_gp_job_get_finished_notification(struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *mali_gp_job_get_oom_notification(
+	struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job->oom_notification);
+
+	notification = job->oom_notification;
+	job->oom_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_oom_notification(
+	struct mali_gp_job *job,
+	_mali_osk_notification_t *notification)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(job->oom_notification == NULL);
+	job->oom_notification = notification;
+}
+
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_gp_job_get_tracker(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_timeline_point_ptr(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+/**
+ * Release reference on tracker for PP job that depends on this GP job.
+ *
+ * @note If GP job has a reference on tracker, this function MUST be called before the GP job is
+ * deleted.
+ *
+ * @param job GP job that is done.
+ * @param success MALI_TRUE if job completed successfully, MALI_FALSE if not.
+ * @return A scheduling bitmask indicating whether scheduling needs to be done.
+ */
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success);
+
+#endif /* __MALI_GP_JOB_H__ */
diff --git a/utgard/r7p0/common/mali_group.c b/utgard/r7p0/common/mali_group.c
new file mode 100644
index 0000000..26cbce4
--- /dev/null
+++ b/utgard/r7p0/common/mali_group.c
@@ -0,0 +1,1953 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#if (KERNEL_VERSION(3, 14, 29) > LINUX_VERSION_CODE)
+#include <mach/cpu.h>
+#endif
+#include "mali_kernel_common.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_mmu.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_scheduler.h"
+#include "mali_osk_profiling.h"
+#include "mali_osk_mali.h"
+#include "mali_pm_domain.h"
+#include "mali_pm.h"
+#include "mali_executor.h"
+#include <mali_platform.h>
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+
+#define MALI_MAX_NUM_DOMAIN_REFS (MALI_MAX_NUMBER_OF_GROUPS * 2)
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num);
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+static struct mali_group *mali_global_groups[MALI_MAX_NUMBER_OF_GROUPS] = { NULL, };
+static u32 mali_global_num_groups;
+
+/* SW timer for job execution */
+int mali_max_job_runtime = MALI_MAX_JOB_RUNTIME_DEFAULT;
+
+/* local helper functions */
+static void mali_group_bottom_half_mmu(void *data);
+static void mali_group_bottom_half_gp(void *data);
+static void mali_group_bottom_half_pp(void *data);
+static void mali_group_timeout(void *data);
+static void mali_group_reset_pp(struct mali_group *group);
+static void mali_group_reset_mmu(struct mali_group *group);
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session, mali_bool is_reload);
+static void mali_group_recovery_reset(struct mali_group *group);
+
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+	struct mali_dlbu_core *dlbu,
+	struct mali_bcast_unit *bcast,
+	u32 domain_index)
+{
+    struct mali_group *group = NULL;
+
+    if (mali_global_num_groups >= MALI_MAX_NUMBER_OF_GROUPS) {
+	MALI_PRINT_ERROR(("Mali group: Too many group objects created\n"));
+	return NULL;
+    }
+
+    group = _mali_osk_calloc(1, sizeof(struct mali_group));
+    if (group != NULL) {
+	group->timeout_timer = _mali_osk_timer_init();
+	if (group->timeout_timer != NULL) {
+	    _mali_osk_timer_setcallback(group->timeout_timer, mali_group_timeout, (void *)group);
+
+	    group->l2_cache_core[0] = core;
+	    _mali_osk_list_init(&group->group_list);
+	    _mali_osk_list_init(&group->executor_list);
+	    _mali_osk_list_init(&group->pm_domain_list);
+	    group->bcast_core = bcast;
+	    group->dlbu_core = dlbu;
+
+	    /* register this object as a part of the correct power domain */
+	    if ((NULL != core) || (NULL != dlbu) || (bcast != NULL))
+		group->pm_domain = mali_pm_register_group(domain_index, group);
+
+	    mali_global_groups[mali_global_num_groups] = group;
+	    mali_global_num_groups++;
+
+	    return group;
+	}
+	_mali_osk_free(group);
+    }
+
+    return NULL;
+}
+
+void mali_group_delete(struct mali_group *group)
+{
+    u32 i;
+
+    MALI_DEBUG_PRINT(4, ("Deleting group %s\n",
+		mali_group_core_description(group)));
+
+    MALI_DEBUG_ASSERT(group->parent_group == NULL);
+    MALI_DEBUG_ASSERT((MALI_GROUP_STATE_INACTIVE == group->state) || ((group->state == MALI_GROUP_STATE_ACTIVATION_PENDING)));
+
+    /* Delete the resources that this group owns */
+    if (group->gp_core != NULL) {
+	mali_gp_delete(group->gp_core);
+    }
+
+    if (group->pp_core != NULL) {
+	mali_pp_delete(group->pp_core);
+    }
+
+    if (group->mmu != NULL) {
+	mali_mmu_delete(group->mmu);
+    }
+
+    if (mali_group_is_virtual(group)) {
+	/* Remove all groups from virtual group */
+	struct mali_group *child;
+	struct mali_group *temp;
+
+	_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+	    child->parent_group = NULL;
+	    mali_group_delete(child);
+	}
+
+	mali_dlbu_delete(group->dlbu_core);
+
+	if (group->bcast_core != NULL) {
+	    mali_bcast_unit_delete(group->bcast_core);
+	}
+    }
+
+    for (i = 0; i < mali_global_num_groups; i++) {
+	if (mali_global_groups[i] == group) {
+	    mali_global_groups[i] = NULL;
+	    mali_global_num_groups--;
+
+	    if (i != mali_global_num_groups) {
+		/* We removed a group from the middle of the array -- move the last
+		 * group to the current position to close the gap */
+		mali_global_groups[i] = mali_global_groups[mali_global_num_groups];
+		mali_global_groups[mali_global_num_groups] = NULL;
+	    }
+
+	    break;
+	}
+    }
+
+    if (group->timeout_timer != NULL) {
+	_mali_osk_timer_del(group->timeout_timer);
+	_mali_osk_timer_term(group->timeout_timer);
+    }
+
+    if (group->bottom_half_work_mmu != NULL) {
+	_mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+    }
+
+    if (group->bottom_half_work_gp != NULL) {
+	_mali_osk_wq_delete_work(group->bottom_half_work_gp);
+    }
+
+    if (group->bottom_half_work_pp != NULL) {
+	_mali_osk_wq_delete_work(group->bottom_half_work_pp);
+    }
+
+    _mali_osk_free(group);
+}
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group, struct mali_mmu_core *mmu_core)
+{
+    /* This group object now owns the MMU core object */
+    group->mmu = mmu_core;
+    group->bottom_half_work_mmu = _mali_osk_wq_create_work(mali_group_bottom_half_mmu, group);
+    if (group->bottom_half_work_mmu == NULL) {
+	return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_mmu_core(struct mali_group *group)
+{
+    /* This group object no longer owns the MMU core object */
+    group->mmu = NULL;
+    if (group->bottom_half_work_mmu != NULL) {
+	_mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+    }
+}
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group, struct mali_gp_core *gp_core)
+{
+    /* This group object now owns the GP core object */
+    group->gp_core = gp_core;
+    group->bottom_half_work_gp = _mali_osk_wq_create_work(mali_group_bottom_half_gp, group);
+    if (group->bottom_half_work_gp == NULL) {
+	return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_gp_core(struct mali_group *group)
+{
+    /* This group object no longer owns the GP core object */
+    group->gp_core = NULL;
+    if (group->bottom_half_work_gp != NULL) {
+	_mali_osk_wq_delete_work(group->bottom_half_work_gp);
+    }
+}
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group, struct mali_pp_core *pp_core)
+{
+    /* This group object now owns the PP core object */
+    group->pp_core = pp_core;
+    group->bottom_half_work_pp = _mali_osk_wq_create_work(mali_group_bottom_half_pp, group);
+    if (group->bottom_half_work_pp == NULL) {
+	return _MALI_OSK_ERR_FAULT;
+    }
+    return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_pp_core(struct mali_group *group)
+{
+    /* This group object no longer owns the PP core object */
+    group->pp_core = NULL;
+    if (group->bottom_half_work_pp != NULL) {
+	_mali_osk_wq_delete_work(group->bottom_half_work_pp);
+    }
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(4, ("Group: Activating group %s\n",
+		mali_group_core_description(group)));
+
+    if (group->state == MALI_GROUP_STATE_INACTIVE) {
+	/* Group is inactive, get PM refs in order to power up */
+
+	/*
+	 * We'll take a maximum of 2 power domain references pr group,
+	 * one for the group itself, and one for it's L2 cache.
+	 */
+	struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+	struct mali_group *groups[MALI_MAX_NUM_DOMAIN_REFS];
+	u32 num_domains = 0;
+	mali_bool all_groups_on;
+
+	/* Deal with child groups first */
+	if (mali_group_is_virtual(group)) {
+	    /*
+	     * The virtual group might have 0, 1 or 2 L2s in
+	     * its l2_cache_core array, but we ignore these and
+	     * let the child groups take the needed L2 cache ref
+	     * on behalf of the virtual group.
+	     * In other words; The L2 refs are taken in pair with
+	     * the physical group which the L2 is attached to.
+	     */
+	    struct mali_group *child;
+	    struct mali_group *temp;
+
+	    /*
+	     * Child group is inactive, get PM
+	     * refs in order to power up.
+	     */
+	    _MALI_OSK_LIST_FOREACHENTRY(child, temp,
+		    &group->group_list,
+		    struct mali_group, group_list) {
+		MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE
+			== child->state);
+
+		child->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+		MALI_DEBUG_ASSERT_POINTER(
+			child->pm_domain);
+		domains[num_domains] = child->pm_domain;
+		groups[num_domains] = child;
+		num_domains++;
+
+		/*
+		 * Take L2 domain ref for child group.
+		 */
+		MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS
+			> num_domains);
+		domains[num_domains] = mali_l2_cache_get_pm_domain(
+			child->l2_cache_core[0]);
+		groups[num_domains] = NULL;
+		MALI_DEBUG_ASSERT(NULL ==
+			child->l2_cache_core[1]);
+		num_domains++;
+	    }
+	} else {
+	    /* Take L2 domain ref for physical groups. */
+	    MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+		    num_domains);
+
+	    domains[num_domains] = mali_l2_cache_get_pm_domain(
+		    group->l2_cache_core[0]);
+	    groups[num_domains] = NULL;
+	    MALI_DEBUG_ASSERT(group->l2_cache_core[1] == NULL);
+	    num_domains++;
+	}
+
+	/* Do the group itself last (it's dependencies first) */
+
+	group->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+	MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+	domains[num_domains] = group->pm_domain;
+	groups[num_domains] = group;
+	num_domains++;
+
+	all_groups_on = mali_pm_get_domain_refs(domains, groups,
+		num_domains);
+
+	/*
+	 * Complete activation for group, include
+	 * virtual group or physical group.
+	 */
+	if (all_groups_on == MALI_TRUE) {
+
+	    mali_group_set_active(group);
+	}
+    } else if (group->state == MALI_GROUP_STATE_ACTIVE) {
+	/* Already active */
+	MALI_DEBUG_ASSERT(group->power_is_on == MALI_TRUE);
+    } else {
+	/*
+	 * Activation already pending, group->power_is_on could
+	 * be both true or false. We need to wait for power up
+	 * notification anyway.
+	 */
+	MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING
+		== group->state);
+    }
+
+    MALI_DEBUG_PRINT(4, ("Group: group %s activation result: %s\n",
+		mali_group_core_description(group),
+		MALI_GROUP_STATE_ACTIVE == group->state ?
+		"ACTIVE" : "PENDING"));
+
+    return group->state;
+}
+
+mali_bool mali_group_set_active(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(group->state == MALI_GROUP_STATE_ACTIVATION_PENDING);
+    MALI_DEBUG_ASSERT(group->power_is_on == MALI_TRUE);
+
+    MALI_DEBUG_PRINT(4, ("Group: Activation completed for %s\n",
+		mali_group_core_description(group)));
+
+    if (mali_group_is_virtual(group)) {
+	struct mali_group *child;
+	struct mali_group *temp;
+
+	_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+		struct mali_group, group_list) {
+	    if (child->power_is_on != MALI_TRUE) {
+		return MALI_FALSE;
+	    }
+
+	    child->state = MALI_GROUP_STATE_ACTIVE;
+	}
+
+	mali_group_reset(group);
+    }
+
+    /* Go to ACTIVE state */
+    group->state = MALI_GROUP_STATE_ACTIVE;
+
+    return MALI_TRUE;
+}
+
+mali_bool mali_group_deactivate(struct mali_group *group)
+{
+    struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+    u32 num_domains = 0;
+    mali_bool power_down = MALI_FALSE;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(group->state != MALI_GROUP_STATE_INACTIVE);
+
+    MALI_DEBUG_PRINT(3, ("Group: Deactivating group %s\n",
+		mali_group_core_description(group)));
+
+    group->state = MALI_GROUP_STATE_INACTIVE;
+
+    MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+    domains[num_domains] = group->pm_domain;
+    num_domains++;
+
+    if (mali_group_is_virtual(group)) {
+	/* Release refs for all child groups */
+	struct mali_group *child;
+	struct mali_group *temp;
+
+	_MALI_OSK_LIST_FOREACHENTRY(child, temp,
+		&group->group_list,
+		struct mali_group, group_list) {
+	    child->state = MALI_GROUP_STATE_INACTIVE;
+
+	    MALI_DEBUG_ASSERT_POINTER(child->pm_domain);
+	    domains[num_domains] = child->pm_domain;
+	    num_domains++;
+
+	    /* Release L2 cache domain for child groups */
+	    MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+		    num_domains);
+	    domains[num_domains] = mali_l2_cache_get_pm_domain(
+		    child->l2_cache_core[0]);
+	    MALI_DEBUG_ASSERT(child->l2_cache_core[1] == NULL);
+	    num_domains++;
+	}
+
+	/*
+	 * Must do mali_group_power_down() steps right here for
+	 * virtual group, because virtual group itself is likely to
+	 * stay powered on, however child groups are now very likely
+	 * to be powered off (and thus lose their state).
+	 */
+
+	mali_group_clear_session(group);
+	/*
+	 * Disable the broadcast unit (clear it's mask).
+	 * This is needed in case the GPU isn't actually
+	 * powered down at this point and groups are
+	 * removed from an inactive virtual group.
+	 * If not, then the broadcast unit will intercept
+	 * their interrupts!
+	 */
+	mali_bcast_disable(group->bcast_core);
+    } else {
+	/* Release L2 cache domain for physical groups */
+	MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+		num_domains);
+	domains[num_domains] = mali_l2_cache_get_pm_domain(
+		group->l2_cache_core[0]);
+	MALI_DEBUG_ASSERT(group->l2_cache_core[1] == NULL);
+	num_domains++;
+    }
+
+    power_down = mali_pm_put_domain_refs(domains, num_domains);
+
+    return power_down;
+}
+
+void mali_group_power_up(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Power up for %s\n",
+		mali_group_core_description(group)));
+
+    group->power_is_on = MALI_TRUE;
+
+    if (MALI_FALSE == mali_group_is_virtual(group)
+	    && MALI_FALSE == mali_group_is_in_virtual(group)) {
+	mali_group_reset(group);
+    }
+
+    /*
+     * When we just acquire only one physical group form virt group,
+     * we should remove the bcast&dlbu mask from virt group and
+     * reset bcast and dlbu core, although part of pp cores in virt
+     * group maybe not be powered on.
+     */
+    if (mali_group_is_virtual(group) == MALI_TRUE) {
+	mali_bcast_reset(group->bcast_core);
+	mali_dlbu_update_mask(group->dlbu_core);
+    }
+}
+
+void mali_group_power_down(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT(group->power_is_on == MALI_TRUE);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Power down for %s\n",
+		mali_group_core_description(group)));
+
+    group->power_is_on = MALI_FALSE;
+
+    if (mali_group_is_virtual(group)) {
+	/*
+	 * What we do for physical jobs in this function should
+	 * already have been done in mali_group_deactivate()
+	 * for virtual group.
+	 */
+	MALI_DEBUG_ASSERT(group->session == NULL);
+    } else {
+	mali_group_clear_session(group);
+    }
+}
+
+MALI_DEBUG_CODE(static void mali_group_print_virtual(struct mali_group *vgroup)
+	{
+	u32 i;
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	MALI_DEBUG_PRINT(4, ("Virtual group %s (%p)\n",
+		mali_group_core_description(vgroup),
+		vgroup));
+	MALI_DEBUG_PRINT(4, ("l2_cache_core[0] = %p, ref = %d\n", vgroup->l2_cache_core[0], vgroup->l2_cache_core_ref_count[0]));
+	MALI_DEBUG_PRINT(4, ("l2_cache_core[1] = %p, ref = %d\n", vgroup->l2_cache_core[1], vgroup->l2_cache_core_ref_count[1]));
+
+	i = 0;
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &vgroup->group_list, struct mali_group, group_list) {
+	MALI_DEBUG_PRINT(4, ("[%d] %s (%p), l2_cache_core[0] = %p\n",
+		i, mali_group_core_description(group),
+		group, group->l2_cache_core[0]));
+	i++;
+	}
+	})
+
+static void mali_group_dump_core_status(struct mali_group *group)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(NULL != group->gp_core || (NULL != group->pp_core && !mali_group_is_virtual(group)));
+
+	if (group->gp_core != NULL) {
+		MALI_PRINT(("Dump Group %s\n", group->gp_core->hw_core.description));
+
+		for (i = 0; i < 0xA8; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->gp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 12)));
+		}
+
+
+	} else {
+		MALI_PRINT(("Dump Group %s\n", group->pp_core->hw_core.description));
+
+		for (i = 0; i < 0x5c; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 12)));
+		}
+
+		/* Ignore some minor registers */
+		for (i = 0x1000; i < 0x1068; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 12)));
+		}
+	}
+
+	MALI_PRINT(("Dump Group MMU\n"));
+	for (i = 0; i < 0x24; i += 0x10) {
+		MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->mmu->hw_core, i),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 4),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 8),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 12)));
+	}
+}
+
+
+/**
+ * @Dump group status
+ */
+void mali_group_dump_status(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *group_c;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(group_c, temp, &group->group_list, struct mali_group, group_list) {
+			mali_group_dump_core_status(group_c);
+		}
+	} else {
+		mali_group_dump_core_status(group);
+	}
+}
+
+/**
+ * @brief Add child group to virtual group parent
+ */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child)
+{
+    mali_bool found;
+    u32 i;
+
+    MALI_DEBUG_PRINT(3, ("Adding group %s to virtual group %s\n",
+		mali_group_core_description(child),
+		mali_group_core_description(parent)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+    MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+    MALI_DEBUG_ASSERT(child->parent_group == NULL);
+
+    _mali_osk_list_addtail(&child->group_list, &parent->group_list);
+
+    child->parent_group = parent;
+
+    MALI_DEBUG_ASSERT_POINTER(child->l2_cache_core[0]);
+
+    MALI_DEBUG_PRINT(4, ("parent->l2_cache_core: [0] = %p, [1] = %p\n", parent->l2_cache_core[0], parent->l2_cache_core[1]));
+    MALI_DEBUG_PRINT(4, ("child->l2_cache_core: [0] = %p, [1] = %p\n", child->l2_cache_core[0], child->l2_cache_core[1]));
+
+    /* Keep track of the L2 cache cores of child groups */
+    found = MALI_FALSE;
+    for (i = 0; i < 2; i++) {
+	if (parent->l2_cache_core[i] == child->l2_cache_core[0]) {
+	    MALI_DEBUG_ASSERT(parent->l2_cache_core_ref_count[i] > 0);
+	    parent->l2_cache_core_ref_count[i]++;
+	    found = MALI_TRUE;
+	}
+    }
+
+    if (!found) {
+	/* First time we see this L2 cache, add it to our list */
+	i = (parent->l2_cache_core[0] == NULL) ? 0 : 1;
+
+	MALI_DEBUG_PRINT(4, ("First time we see l2_cache %p. Adding to [%d] = %p\n", child->l2_cache_core[0], i, parent->l2_cache_core[i]));
+
+	MALI_DEBUG_ASSERT(parent->l2_cache_core[i] == NULL);
+
+	parent->l2_cache_core[i] = child->l2_cache_core[0];
+	parent->l2_cache_core_ref_count[i]++;
+    }
+
+    /* Update Broadcast Unit and DLBU */
+    mali_bcast_add_group(parent->bcast_core, child);
+    mali_dlbu_add_group(parent->dlbu_core, child);
+
+    if (parent->power_is_on == MALI_TRUE) {
+	mali_bcast_reset(parent->bcast_core);
+	mali_dlbu_update_mask(parent->dlbu_core);
+    }
+
+    if (child->power_is_on == MALI_TRUE) {
+	if (parent->session == NULL) {
+	    if (child->session != NULL) {
+		/*
+		 * Parent has no session, so clear
+		 * child session as well.
+		 */
+		mali_mmu_activate_empty_page_directory(child->mmu);
+	    }
+	} else {
+	    if (parent->session == child->session) {
+		/* We already have same session as parent,
+		 * so a simple zap should be enough.
+		 */
+		mali_mmu_zap_tlb(child->mmu);
+	    } else {
+		/*
+		 * Parent has a different session, so we must
+		 * switch to that sessions page table
+		 */
+		mali_mmu_activate_page_directory(child->mmu, mali_session_get_page_directory(parent->session));
+	    }
+
+	    /* It is the parent which keeps the session from now on */
+	    child->session = NULL;
+	}
+    } else {
+	/* should have been cleared when child was powered down */
+	MALI_DEBUG_ASSERT(child->session == NULL);
+    }
+
+    /* Start job on child when parent is active */
+    if (parent->pp_running_job != NULL) {
+	struct mali_pp_job *job = parent->pp_running_job;
+
+	MALI_DEBUG_PRINT(3, ("Group %x joining running job %d on virtual group %x\n",
+		    child, mali_pp_job_get_id(job), parent));
+
+	/* Only allowed to add active child to an active parent */
+	MALI_DEBUG_ASSERT(parent->state == MALI_GROUP_STATE_ACTIVE);
+	MALI_DEBUG_ASSERT(child->state == MALI_GROUP_STATE_ACTIVE);
+
+	mali_pp_job_start(child->pp_core, job, mali_pp_core_get_id(child->pp_core), MALI_TRUE);
+
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+		MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+		mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+		MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+		MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+		mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_sched_switch(
+		mali_pp_core_description(group->pp_core),
+		sched_clock(), mali_pp_job_get_tid(job),
+		0, mali_pp_job_get_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+	trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+		mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+    }
+
+    MALI_DEBUG_CODE(mali_group_print_virtual(parent);)
+}
+
+/**
+ * @brief Remove child group from virtual group parent
+ */
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child)
+{
+    u32 i;
+
+    MALI_DEBUG_PRINT(3, ("Removing group %s from virtual group %s\n",
+		mali_group_core_description(child),
+		mali_group_core_description(parent)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+    MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+    MALI_DEBUG_ASSERT(parent == child->parent_group);
+
+    /* Update Broadcast Unit and DLBU */
+    mali_bcast_remove_group(parent->bcast_core, child);
+    mali_dlbu_remove_group(parent->dlbu_core, child);
+
+    if (parent->power_is_on == MALI_TRUE) {
+	mali_bcast_reset(parent->bcast_core);
+	mali_dlbu_update_mask(parent->dlbu_core);
+    }
+
+    child->session = parent->session;
+    child->parent_group = NULL;
+
+    _mali_osk_list_delinit(&child->group_list);
+    if (_mali_osk_list_empty(&parent->group_list)) {
+	parent->session = NULL;
+    }
+
+    /* Keep track of the L2 cache cores of child groups */
+    i = (child->l2_cache_core[0] == parent->l2_cache_core[0]) ? 0 : 1;
+
+    MALI_DEBUG_ASSERT(child->l2_cache_core[0] == parent->l2_cache_core[i]);
+
+    parent->l2_cache_core_ref_count[i]--;
+    if (parent->l2_cache_core_ref_count[i] == 0) {
+	parent->l2_cache_core[i] = NULL;
+    }
+
+    MALI_DEBUG_CODE(mali_group_print_virtual(parent));
+}
+
+struct mali_group *mali_group_acquire_group(struct mali_group *parent)
+{
+    struct mali_group *child = NULL;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+
+    if (!_mali_osk_list_empty(&parent->group_list)) {
+	child = _MALI_OSK_LIST_ENTRY(parent->group_list.prev, struct mali_group, group_list);
+	mali_group_remove_group(parent, child);
+    }
+
+    if (child != NULL) {
+	if (MALI_GROUP_STATE_ACTIVE != parent->state
+		&& MALI_TRUE == child->power_is_on) {
+	    mali_group_reset(child);
+	}
+    }
+
+    return child;
+}
+
+void mali_group_reset(struct mali_group *group)
+{
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT(group->gp_running_job == NULL);
+    MALI_DEBUG_ASSERT(group->pp_running_job == NULL);
+    MALI_DEBUG_ASSERT(group->session == NULL);
+
+    MALI_DEBUG_PRINT(3, ("Group: reset of %s\n",
+		mali_group_core_description(group)));
+
+    if (group->dlbu_core != NULL) {
+	mali_dlbu_reset(group->dlbu_core);
+    }
+
+    if (group->bcast_core != NULL) {
+	mali_bcast_reset(group->bcast_core);
+    }
+
+    MALI_DEBUG_ASSERT(group->mmu != NULL);
+    mali_group_reset_mmu(group);
+
+    if (group->gp_core != NULL) {
+	MALI_DEBUG_ASSERT(group->pp_core == NULL);
+	mali_gp_reset(group->gp_core);
+    } else {
+	MALI_DEBUG_ASSERT(group->pp_core != NULL);
+	mali_group_reset_pp(group);
+    }
+}
+
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job, mali_bool gpu_secure_mode_pre_enabled)
+{
+    struct mali_session_data *session;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Starting GP job 0x%08X on group %s\n",
+		job,
+		mali_group_core_description(group)));
+
+    session = mali_gp_job_get_session(job);
+
+    MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+    mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_gp_job_get_cache_order(job));
+
+	/* Reset GPU and disable gpu secure mode if needed. */
+	if (_mali_osk_gpu_secure_mode_is_enabled() == MALI_TRUE) {
+		struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+		_mali_osk_gpu_reset_and_secure_mode_disable();
+		/* Need to disable the pmu interrupt mask register */
+		if (pmu != NULL) {
+			mali_pmu_reset(pmu);
+		}
+	}
+
+	/* Reload mmu page table if needed */
+	if (gpu_secure_mode_pre_enabled == MALI_TRUE) {
+		mali_group_reset(group);
+		mali_group_activate_page_directory(group, session, MALI_TRUE);
+	} else {
+		mali_group_activate_page_directory(group, session, MALI_FALSE);
+	}
+
+    mali_gp_job_start(group->gp_core, job);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+	    MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0) |
+	    MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+	    mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job), 0, 0, 0);
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+	    MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+	    mali_gp_job_get_pid(job), mali_gp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+    trace_mali_core_active(mali_gp_job_get_pid(job), 1 /* active */, 1 /* GP */,  0 /* core */,
+	    mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+    if ((mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER) &&
+	    (mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER)) {
+	mali_group_report_l2_cache_counters_per_core(group, 0);
+    }
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+    trace_gpu_sched_switch(mali_gp_core_description(group->gp_core),
+	    sched_clock(), mali_gp_job_get_tid(job),
+	    0, mali_gp_job_get_id(job));
+#endif
+
+    group->gp_running_job = job;
+    group->is_working = MALI_TRUE;
+
+    /* Setup SW timer and record start time */
+    group->start_time = _mali_osk_time_tickcount();
+    _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+    MALI_DEBUG_PRINT(4, ("Group: Started GP job 0x%08X on group %s at %u\n",
+		job,
+		mali_group_core_description(group),
+		group->start_time));
+}
+
+/* Used to set all the registers except frame renderer list address and fragment shader stack address
+ * It means the caller must set these two registers properly before calling this function
+ */
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job, mali_bool gpu_secure_mode_pre_enabled)
+{
+    struct mali_session_data *session;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_PRINT(3, ("Group: Starting PP job 0x%08X part %u/%u on group %s\n",
+		job, sub_job + 1,
+		mali_pp_job_get_sub_job_count(job),
+		mali_group_core_description(group)));
+
+    session = mali_pp_job_get_session(job);
+
+    if (group->l2_cache_core[0] != NULL) {
+	mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_pp_job_get_cache_order(job));
+    }
+
+    if (group->l2_cache_core[1] != NULL) {
+	mali_l2_cache_invalidate_conditional(group->l2_cache_core[1], mali_pp_job_get_cache_order(job));
+    }
+
+	/* Reset GPU and change gpu secure mode if needed. */
+	if (MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+		_mali_osk_gpu_reset_and_secure_mode_enable();
+		/* Need to disable the pmu interrupt mask register */
+		if (pmu != NULL) {
+			mali_pmu_reset(pmu);
+		}
+	} else if (MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) {
+		struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+		_mali_osk_gpu_reset_and_secure_mode_disable();
+		/* Need to disable the pmu interrupt mask register */
+		if (pmu != NULL) {
+			mali_pmu_reset(pmu);
+		}
+	}
+
+	/* Reload the mmu page table if needed */
+	if ((MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == gpu_secure_mode_pre_enabled)
+	    || (MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == gpu_secure_mode_pre_enabled)) {
+		mali_group_reset(group);
+		mali_group_activate_page_directory(group, session, MALI_TRUE);
+	} else {
+		mali_group_activate_page_directory(group, session, MALI_FALSE);
+	}
+
+    if (mali_group_is_virtual(group)) {
+	struct mali_group *child;
+	struct mali_group *temp;
+	u32 core_num = 0;
+
+	MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+	/* Configure DLBU for the job */
+	mali_dlbu_config_job(group->dlbu_core, job);
+
+	/* Write stack address for each child group */
+	_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+	    mali_pp_write_addr_stack(child->pp_core, job);
+	    core_num++;
+	}
+
+	mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+    } else {
+	mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+    }
+
+    /* if the group is virtual, loop through physical groups which belong to this group
+     * and call profiling events for its cores as virtual */
+    if (mali_group_is_virtual(group) == MALI_TRUE) {
+	struct mali_group *child;
+	struct mali_group *temp;
+
+	_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+	    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+		    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+		    MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+		    mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+	    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+		    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+		    MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+		    mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+	    trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+		    mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+	}
+
+#if defined(CONFIG_MALI400_PROFILING)
+	if (group->l2_cache_core_ref_count[0] != 0) {
+	    if ((mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER) &&
+		    (mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER)) {
+		mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+	    }
+	}
+	if (group->l2_cache_core_ref_count[1] != 0) {
+	    if ((mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1]) != MALI_HW_CORE_NO_COUNTER) &&
+		    (mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]) != MALI_HW_CORE_NO_COUNTER)) {
+		mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+	    }
+	}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+    } else { /* group is physical - call profiling events for physical cores */
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+		MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+		mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+		MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+		MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+		mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+	trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+		mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+	if ((mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER) &&
+		(mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER)) {
+	    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+	}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+    }
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+    trace_gpu_sched_switch(mali_pp_core_description(group->pp_core),
+	    sched_clock(), mali_pp_job_get_tid(job),
+	    0, mali_pp_job_get_id(job));
+#endif
+
+    group->pp_running_job = job;
+    group->pp_running_sub_job = sub_job;
+    group->is_working = MALI_TRUE;
+
+    /* Setup SW timer and record start time */
+    group->start_time = _mali_osk_time_tickcount();
+    _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+    MALI_DEBUG_PRINT(4, ("Group: Started PP job 0x%08X part %u/%u on group %s at %u\n",
+		job, sub_job + 1,
+		mali_pp_job_get_sub_job_count(job),
+		mali_group_core_description(group),
+		group->start_time));
+
+}
+
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr)
+{
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+    mali_l2_cache_invalidate(group->l2_cache_core[0]);
+
+    mali_mmu_zap_tlb_without_stall(group->mmu);
+
+    mali_gp_resume_with_new_heap(group->gp_core, start_addr, end_addr);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+	    MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+	    0, 0, 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+    trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 1 /* active */, 1 /* GP */,  0 /* core */,
+	    mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+}
+
+static void mali_group_reset_mmu(struct mali_group *group)
+{
+    struct mali_group *child;
+    struct mali_group *temp;
+    _mali_osk_errcode_t err;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (!mali_group_is_virtual(group)) {
+	/* This is a physical group or an idle virtual group -- simply wait for
+	 * the reset to complete. */
+	err = mali_mmu_reset(group->mmu);
+	MALI_DEBUG_ASSERT(err == _MALI_OSK_ERR_OK);
+    } else { /* virtual group */
+	/* Loop through all members of this virtual group and wait
+	 * until they are done resetting.
+	 */
+	_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+	    err = mali_mmu_reset(child->mmu);
+	    MALI_DEBUG_ASSERT(err == _MALI_OSK_ERR_OK);
+	}
+    }
+}
+
+static void mali_group_reset_pp(struct mali_group *group)
+{
+    struct mali_group *child;
+    struct mali_group *temp;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    mali_pp_reset_async(group->pp_core);
+
+    if (!mali_group_is_virtual(group) || NULL == group->pp_running_job) {
+	/* This is a physical group or an idle virtual group -- simply wait for
+	 * the reset to complete. */
+	mali_pp_reset_wait(group->pp_core);
+    } else {
+	/* Loop through all members of this virtual group and wait until they
+	 * are done resetting.
+	 */
+	_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+	    mali_pp_reset_wait(child->pp_core);
+	}
+    }
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job)
+{
+    struct mali_pp_job *pp_job_to_return;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_running_job);
+    MALI_DEBUG_ASSERT_POINTER(sub_job);
+    MALI_DEBUG_ASSERT(group->is_working == MALI_TRUE);
+
+    /* Stop/clear the timeout timer. */
+    _mali_osk_timer_del_async(group->timeout_timer);
+
+    if (group->pp_running_job != NULL) {
+
+	/* Deal with HW counters and profiling */
+
+	if (mali_group_is_virtual(group) == MALI_TRUE) {
+	    struct mali_group *child;
+	    struct mali_group *temp;
+
+	    /* update performance counters from each physical pp core within this virtual group */
+	    _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+		mali_pp_update_performance_counters(group->pp_core, child->pp_core, group->pp_running_job, mali_pp_core_get_id(child->pp_core));
+	    }
+
+#if defined(CONFIG_MALI400_PROFILING)
+	    /* send profiling data per physical core */
+	    _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+			MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+			MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+			mali_pp_job_get_perf_counter_value0(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+			mali_pp_job_get_perf_counter_value1(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+			mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+			0, 0);
+
+		trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+			0 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+			mali_pp_job_get_frame_builder_id(group->pp_running_job),
+			mali_pp_job_get_flush_id(group->pp_running_job));
+	    }
+	    if (group->l2_cache_core_ref_count[0] != 0) {
+		if ((mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER) &&
+			(mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER)) {
+		    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+		}
+	    }
+	    if (group->l2_cache_core_ref_count[1] != 0) {
+		if ((mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1]) != MALI_HW_CORE_NO_COUNTER) &&
+			(mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]) != MALI_HW_CORE_NO_COUNTER)) {
+		    mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+		}
+	    }
+
+#endif
+	} else {
+	    /* update performance counters for a physical group's pp core */
+	    mali_pp_update_performance_counters(group->pp_core, group->pp_core, group->pp_running_job, group->pp_running_sub_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+	    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+		    MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+		    MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+		    mali_pp_job_get_perf_counter_value0(group->pp_running_job, group->pp_running_sub_job),
+		    mali_pp_job_get_perf_counter_value1(group->pp_running_job, group->pp_running_sub_job),
+		    mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+		    0, 0);
+
+	    trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+		    0 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+		    mali_pp_job_get_frame_builder_id(group->pp_running_job),
+		    mali_pp_job_get_flush_id(group->pp_running_job));
+
+	    if ((mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER) &&
+		    (mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER)) {
+		mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+	    }
+#endif
+	}
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_sched_switch(
+		mali_gp_core_description(group->gp_core),
+		sched_clock(), 0, 0, 0);
+#endif
+
+    }
+
+    if (success) {
+	/* Only do soft reset for successful jobs, a full recovery
+	 * reset will be done for failed jobs. */
+	mali_pp_reset_async(group->pp_core);
+    }
+
+    pp_job_to_return = group->pp_running_job;
+    group->pp_running_job = NULL;
+    group->is_working = MALI_FALSE;
+    *sub_job = group->pp_running_sub_job;
+
+    if (!success) {
+	MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+	mali_group_recovery_reset(group);
+    } else if (mali_pp_reset_wait(group->pp_core) != _MALI_OSK_ERR_OK) {
+	MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+	mali_group_recovery_reset(group);
+    }
+
+    return pp_job_to_return;
+}
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success)
+{
+    struct mali_gp_job *gp_job_to_return;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_running_job);
+    MALI_DEBUG_ASSERT(group->is_working == MALI_TRUE);
+
+    /* Stop/clear the timeout timer. */
+    _mali_osk_timer_del_async(group->timeout_timer);
+
+    if (group->gp_running_job != NULL) {
+	mali_gp_update_performance_counters(group->gp_core, group->gp_running_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+		mali_gp_job_get_perf_counter_value0(group->gp_running_job),
+		mali_gp_job_get_perf_counter_value1(group->gp_running_job),
+		mali_gp_job_get_perf_counter_src0(group->gp_running_job) | (mali_gp_job_get_perf_counter_src1(group->gp_running_job) << 8),
+		0, 0);
+
+	if ((mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER) &&
+		(mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]) != MALI_HW_CORE_NO_COUNTER))
+	    mali_group_report_l2_cache_counters_per_core(group, 0);
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_sched_switch(
+		mali_pp_core_description(group->pp_core),
+		sched_clock(), 0, 0, 0);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+	trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 0 /* active */, 1 /* GP */,  0 /* core */,
+		mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+
+	mali_gp_job_set_current_heap_addr(group->gp_running_job,
+		mali_gp_read_plbu_alloc_start_addr(group->gp_core));
+    }
+
+    if (success) {
+	/* Only do soft reset for successful jobs, a full recovery
+	 * reset will be done for failed jobs. */
+	mali_gp_reset_async(group->gp_core);
+    }
+
+    gp_job_to_return = group->gp_running_job;
+    group->gp_running_job = NULL;
+    group->is_working = MALI_FALSE;
+
+    if (!success) {
+	MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+	mali_group_recovery_reset(group);
+    } else if (mali_gp_reset_wait(group->gp_core) != _MALI_OSK_ERR_OK) {
+	MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+	mali_group_recovery_reset(group);
+    }
+
+    return gp_job_to_return;
+}
+
+struct mali_group *mali_group_get_glob_group(u32 index)
+{
+    if (mali_global_num_groups > index) {
+	return mali_global_groups[index];
+    }
+
+    return NULL;
+}
+
+u32 mali_group_get_glob_num_groups(void)
+{
+    return mali_global_num_groups;
+}
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session, mali_bool is_reload)
+{
+    MALI_DEBUG_PRINT(5, ("Mali group: Activating page directory 0x%08X from session 0x%08X on group %s\n",
+		mali_session_get_page_directory(session), session,
+		mali_group_core_description(group)));
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (group->session != session || MALI_TRUE == is_reload) {
+		/* Different session than last time, so we need to do some work */
+		MALI_DEBUG_PRINT(5, ("Mali group: Activate session: %08x previous: %08x on group %s\n",
+				     session, group->session,
+				     mali_group_core_description(group)));
+		mali_mmu_activate_page_directory(group->mmu, mali_session_get_page_directory(session));
+		group->session = session;
+	} else {
+		/* Same session as last time, so no work required */
+		MALI_DEBUG_PRINT(4, ("Mali group: Activate existing session 0x%08X on group %s\n",
+				     session->page_directory,
+				     mali_group_core_description(group)));
+		mali_mmu_zap_tlb_without_stall(group->mmu);
+	}
+}
+
+static void mali_group_recovery_reset(struct mali_group *group)
+{
+    _mali_osk_errcode_t err;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    /* Stop cores, bus stop */
+    if (group->pp_core != NULL) {
+	mali_pp_stop_bus(group->pp_core);
+    } else {
+	mali_gp_stop_bus(group->gp_core);
+    }
+
+    /* Flush MMU and clear page fault (if any) */
+    mali_mmu_activate_fault_flush_page_directory(group->mmu);
+    mali_mmu_page_fault_done(group->mmu);
+
+    /* Wait for cores to stop bus, then do a hard reset on them */
+    if (group->pp_core != NULL) {
+	if (mali_group_is_virtual(group)) {
+	    struct mali_group *child, *temp;
+
+	    /* Disable the broadcast unit while we do reset directly on the member cores. */
+	    mali_bcast_disable(group->bcast_core);
+
+	    _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+		mali_pp_stop_bus_wait(child->pp_core);
+		mali_pp_hard_reset(child->pp_core);
+	    }
+
+	    mali_bcast_enable(group->bcast_core);
+	} else {
+	    mali_pp_stop_bus_wait(group->pp_core);
+	    mali_pp_hard_reset(group->pp_core);
+	}
+    } else {
+	mali_gp_stop_bus_wait(group->gp_core);
+	mali_gp_hard_reset(group->gp_core);
+    }
+
+    /* Reset MMU */
+    err = mali_mmu_reset(group->mmu);
+    MALI_DEBUG_ASSERT(err == _MALI_OSK_ERR_OK);
+    MALI_IGNORE(err);
+
+    group->session = NULL;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size)
+{
+    int n = 0;
+    int i;
+    struct mali_group *child;
+    struct mali_group *temp;
+
+    if (mali_group_is_virtual(group)) {
+	n += _mali_osk_snprintf(buf + n, size - n,
+		"Virtual PP Group: %p\n", group);
+    } else if (mali_group_is_in_virtual(group)) {
+	n += _mali_osk_snprintf(buf + n, size - n,
+		"Child PP Group: %p\n", group);
+    } else if (group->pp_core != NULL) {
+	n += _mali_osk_snprintf(buf + n, size - n,
+		"Physical PP Group: %p\n", group);
+    } else {
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	n += _mali_osk_snprintf(buf + n, size - n,
+		"GP Group: %p\n", group);
+    }
+
+    switch (group->state) {
+	case MALI_GROUP_STATE_INACTIVE:
+	    n += _mali_osk_snprintf(buf + n, size - n,
+		    "\tstate: INACTIVE\n");
+	    break;
+	case MALI_GROUP_STATE_ACTIVATION_PENDING:
+	    n += _mali_osk_snprintf(buf + n, size - n,
+		    "\tstate: ACTIVATION_PENDING\n");
+	    break;
+	case MALI_GROUP_STATE_ACTIVE:
+	    n += _mali_osk_snprintf(buf + n, size - n,
+		    "\tstate: MALI_GROUP_STATE_ACTIVE\n");
+	    break;
+	default:
+	    n += _mali_osk_snprintf(buf + n, size - n,
+		    "\tstate: UNKNOWN (%d)\n", group->state);
+	    MALI_DEBUG_ASSERT(0);
+	    break;
+    }
+
+    n += _mali_osk_snprintf(buf + n, size - n,
+	    "\tSW power: %s\n",
+	    group->power_is_on ? "On" : "Off");
+
+    n += mali_pm_dump_state_domain(group->pm_domain, buf + n, size - n);
+
+    for (i = 0; i < 2; i++) {
+	if (group->l2_cache_core[i] != NULL) {
+	    struct mali_pm_domain *domain;
+
+	    domain = mali_l2_cache_get_pm_domain(
+		    group->l2_cache_core[i]);
+	    n += mali_pm_dump_state_domain(domain,
+		    buf + n, size - n);
+	}
+    }
+
+    if (group->gp_core) {
+	n += mali_gp_dump_state(group->gp_core, buf + n, size - n);
+	n += _mali_osk_snprintf(buf + n, size - n,
+		"\tGP running job: %p\n", group->gp_running_job);
+    }
+
+    if (group->pp_core) {
+	n += mali_pp_dump_state(group->pp_core, buf + n, size - n);
+	n += _mali_osk_snprintf(buf + n, size - n,
+		"\tPP running job: %p, subjob %d\n",
+		group->pp_running_job,
+		group->pp_running_sub_job);
+    }
+
+    _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+	    struct mali_group, group_list) {
+	n += mali_group_dump_state(child, buf + n, size - n);
+    }
+
+    return n;
+}
+#endif
+
+/* Kasin added. */
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+#include <platform/meson_m400/mali_fix.h>
+#define INT_MALI_PP2_MMU (6+32)
+struct _mali_osk_irq_t_struct;
+u32 get_irqnum(struct _mali_osk_irq_t_struct *irq);
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_mmu_core *mmu = group->mmu;
+#endif
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (group->power_is_on == MALI_FALSE)
+	MALI_SUCCESS;
+    if (get_irqnum(mmu->irq) == INT_MALI_PP2_MMU)
+    {
+	if (group == NULL || group->pp_core == NULL)
+	    MALI_SUCCESS;
+	if (group->pp_core->core_id == 0) {
+	    if (malifix_get_mmu_int_process_state(0) == MMU_INT_HIT)
+		malifix_set_mmu_int_process_state(0, MMU_INT_TOP);
+	    else
+		MALI_SUCCESS;
+        } else if (group->pp_core->core_id == 1) {
+	    if (malifix_get_mmu_int_process_state(1) == MMU_INT_HIT)
+		malifix_set_mmu_int_process_state(1, MMU_INT_TOP);
+	    else
+		MALI_SUCCESS;
+	} else
+	    MALI_SUCCESS;
+    }
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+    if (group->gp_core != NULL) {
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+		0, 0, /* No pid and tid for interrupt handler */
+		MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+		mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+		0, 0, /* No pid and tid for interrupt handler */
+		MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+		    mali_pp_core_get_id(group->pp_core)),
+		mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    ret = mali_executor_interrupt_mmu(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		if (group->gp_core != NULL) {
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+						      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+						      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+						      0, 0, /* No pid and tid for interrupt handler */
+						      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+						      0xFFFFFFFF, 0);
+		} else {
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+						      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+						      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+						      0, 0, /* No pid and tid for interrupt handler */
+						      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+							      mali_pp_core_get_id(group->pp_core)),
+						      0xFFFFFFFF, 0);
+		}
+
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+
+    if (group->gp_core != NULL) {
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+		0, 0, /* No pid and tid for interrupt handler */
+		MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+		mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+		0, 0, /* No pid and tid for interrupt handler */
+		MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+		    mali_pp_core_get_id(group->pp_core)),
+		mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    return ret;
+}
+
+static void mali_group_bottom_half_mmu(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_mmu_core *mmu = group->mmu;
+#endif
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    if (group->gp_core != NULL) {
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+		0, _mali_osk_get_tid(), /* pid and tid */
+		MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+		mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+		0, _mali_osk_get_tid(), /* pid and tid */
+		MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+		    mali_pp_core_get_id(group->pp_core)),
+		mali_mmu_get_rawstat(group->mmu), 0);
+    }
+
+    mali_executor_interrupt_mmu(group, MALI_FALSE);
+
+    if (group->gp_core != NULL) {
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+		0, _mali_osk_get_tid(), /* pid and tid */
+		MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+		mali_mmu_get_rawstat(group->mmu), 0);
+    } else {
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+		0, _mali_osk_get_tid(), /* pid and tid */
+		MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+		    mali_pp_core_get_id(group->pp_core)),
+		mali_mmu_get_rawstat(group->mmu), 0);
+    }
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (get_irqnum(mmu->irq) == INT_MALI_PP2_MMU)
+    {
+	if (group->pp_core->core_id == 0) {
+	    if (malifix_get_mmu_int_process_state(0) == MMU_INT_TOP)
+		malifix_set_mmu_int_process_state(0, MMU_INT_NONE);
+        } else if (group->pp_core->core_id == 1) {
+	    if (malifix_get_mmu_int_process_state(1) == MMU_INT_TOP)
+		malifix_set_mmu_int_process_state(1, MMU_INT_NONE);
+	}
+    }
+#endif
+}
+
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+	    MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+	    MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+	    0, 0, /* No pid and tid for interrupt handler */
+	    MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+	    mali_gp_get_rawstat(group->gp_core), 0);
+
+    MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+		mali_gp_get_rawstat(group->gp_core),
+		mali_group_core_description(group)));
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    ret = mali_executor_interrupt_gp(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+					      0xFFFFFFFF, 0);
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+	    MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+	    MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+	    0, 0, /* No pid and tid for interrupt handler */
+	    MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+	    mali_gp_get_rawstat(group->gp_core), 0);
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    return ret;
+}
+
+static void mali_group_bottom_half_gp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+	    MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+	    MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+	    0, _mali_osk_get_tid(), /* pid and tid */
+	    MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+	    mali_gp_get_rawstat(group->gp_core), 0);
+
+    mali_executor_interrupt_gp(group, MALI_FALSE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+	    MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+	    MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+	    0, _mali_osk_get_tid(), /* pid and tid */
+	    MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+	    mali_gp_get_rawstat(group->gp_core), 0);
+}
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+int PP0_int_cnt;
+int mali_PP0_int_cnt(void)
+{
+    return PP0_int_cnt;
+}
+EXPORT_SYMBOL(mali_PP0_int_cnt);
+
+int PP1_int_cnt;
+int mali_PP1_int_cnt(void)
+{
+    return PP1_int_cnt;
+}
+EXPORT_SYMBOL(mali_PP1_int_cnt);
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    struct mali_pp_core *core = group->pp_core;
+#endif
+    _mali_osk_errcode_t ret;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+	    MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+	    MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+	    0, 0, /* No pid and tid for interrupt handler */
+	    MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+		mali_pp_core_get_id(group->pp_core)),
+	    mali_pp_get_rawstat(group->pp_core), 0);
+
+    MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+		mali_pp_get_rawstat(group->pp_core),
+		mali_group_core_description(group)));
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+    ret = mali_executor_interrupt_pp(group, MALI_TRUE);
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+    if (core->core_id == 0)
+	PP0_int_cnt++;
+    else if (core->core_id == 1)
+	PP1_int_cnt++;
+#endif
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) {
+		/* group complete and on job shedule on it, it already power off */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+						      mali_pp_core_get_id(group->pp_core)),
+					      0xFFFFFFFF, 0);
+		mali_executor_unlock();
+		return ret;
+	}
+#endif
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+	    MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+	    MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+	    0, 0, /* No pid and tid for interrupt handler */
+	    MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+		mali_pp_core_get_id(group->pp_core)),
+	    mali_pp_get_rawstat(group->pp_core), 0);
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+    return ret;
+}
+
+static void mali_group_bottom_half_pp(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+    MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+	    MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+	    MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+	    0, _mali_osk_get_tid(), /* pid and tid */
+	    MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+		mali_pp_core_get_id(group->pp_core)),
+	    mali_pp_get_rawstat(group->pp_core), 0);
+
+    mali_executor_interrupt_pp(group, MALI_FALSE);
+
+    _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+	    MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+	    MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+	    0, _mali_osk_get_tid(), /* pid and tid */
+	    MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+		mali_pp_core_get_id(group->pp_core)),
+	    mali_pp_get_rawstat(group->pp_core), 0);
+}
+
+static void mali_group_timeout(void *data)
+{
+    struct mali_group *group = (struct mali_group *)data;
+
+    MALI_DEBUG_ASSERT_POINTER(group);
+
+    MALI_DEBUG_PRINT(2, ("Group: timeout handler for %s at %u\n",
+		mali_group_core_description(group),
+		_mali_osk_time_tickcount()));
+
+    if (mali_core_timeout < 65533)
+	mali_core_timeout++;
+    if (group->gp_core != NULL) {
+	mali_group_schedule_bottom_half_gp(group);
+    } else {
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	mali_group_schedule_bottom_half_pp(group);
+    }
+}
+
+mali_bool mali_group_zap_session(struct mali_group *group,
+	struct mali_session_data *session)
+{
+    MALI_DEBUG_ASSERT_POINTER(group);
+    MALI_DEBUG_ASSERT_POINTER(session);
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    if (group->session != session) {
+	/* not running from this session */
+	return MALI_TRUE; /* success */
+    }
+
+    if (group->is_working) {
+	/* The Zap also does the stall and disable_stall */
+	mali_bool zap_success = mali_mmu_zap_tlb(group->mmu);
+	return zap_success;
+    } else {
+	/* Just remove the session instead of zapping */
+	mali_group_clear_session(group);
+	return MALI_TRUE; /* success */
+    }
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num)
+{
+    u32 source0 = 0;
+    u32 value0 = 0;
+    u32 source1 = 0;
+    u32 value1 = 0;
+    u32 profiling_channel = 0;
+
+    MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+    switch (core_num) {
+	case 0:
+	    profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_GPU |
+		MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+	    break;
+	case 1:
+	    profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_GPU |
+		MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS;
+	    break;
+	case 2:
+	    profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_GPU |
+		MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS;
+	    break;
+	default:
+	    profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_GPU |
+		MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+	    break;
+    }
+
+    if (core_num == 0) {
+	mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+    }
+    if (core_num == 1) {
+	if (mali_l2_cache_get_id(group->l2_cache_core[0]) == 1) {
+	    mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+	} else if (mali_l2_cache_get_id(group->l2_cache_core[1]) == 1) {
+	    mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+	}
+    }
+    if (core_num == 2) {
+	if (mali_l2_cache_get_id(group->l2_cache_core[0]) == 2) {
+	    mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+	} else if (mali_l2_cache_get_id(group->l2_cache_core[1]) == 2) {
+	    mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+	}
+    }
+
+    _mali_osk_profiling_add_event(profiling_channel, source1 << 8 | source0, value0, value1, 0, 0);
+}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
diff --git a/utgard/r7p0/common/mali_group.h b/utgard/r7p0/common/mali_group.h
new file mode 100644
index 0000000..60d78b8
--- /dev/null
+++ b/utgard/r7p0/common/mali_group.h
@@ -0,0 +1,461 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GROUP_H__
+#define __MALI_GROUP_H__
+
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_mmu.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_session.h"
+#include "mali_osk_profiling.h"
+
+/**
+ * @brief Default max runtime [ms] for a core job - used by timeout timers
+ */
+#define MALI_MAX_JOB_RUNTIME_DEFAULT 5000
+
+extern int mali_max_job_runtime;
+
+#define MALI_MAX_NUMBER_OF_GROUPS 10
+#define MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS 8
+
+enum mali_group_state {
+	MALI_GROUP_STATE_INACTIVE,
+	MALI_GROUP_STATE_ACTIVATION_PENDING,
+	MALI_GROUP_STATE_ACTIVE,
+};
+
+/**
+ * The structure represents a render group
+ * A render group is defined by all the cores that share the same Mali MMU
+ */
+
+struct mali_group {
+	struct mali_mmu_core        *mmu;
+	struct mali_session_data    *session;
+
+	enum mali_group_state        state;
+	mali_bool                    power_is_on;
+
+	mali_bool                    is_working;
+	unsigned long                start_time; /* in ticks */
+
+	struct mali_gp_core         *gp_core;
+	struct mali_gp_job          *gp_running_job;
+
+	struct mali_pp_core         *pp_core;
+	struct mali_pp_job          *pp_running_job;
+	u32                         pp_running_sub_job;
+
+	struct mali_pm_domain       *pm_domain;
+
+	struct mali_l2_cache_core   *l2_cache_core[2];
+	u32                         l2_cache_core_ref_count[2];
+
+	/* Parent virtual group (if any) */
+	struct mali_group           *parent_group;
+
+	struct mali_dlbu_core       *dlbu_core;
+	struct mali_bcast_unit      *bcast_core;
+
+	/* Used for working groups which needs to be disabled */
+	mali_bool                    disable_requested;
+
+	/* Used by group to link child groups (for virtual group) */
+	_mali_osk_list_t            group_list;
+
+	/* Used by executor module in order to link groups of same state */
+	_mali_osk_list_t            executor_list;
+
+	/* Used by PM domains to link groups of same domain */
+	_mali_osk_list_t             pm_domain_list;
+
+	_mali_osk_wq_work_t         *bottom_half_work_mmu;
+	_mali_osk_wq_work_t         *bottom_half_work_gp;
+	_mali_osk_wq_work_t         *bottom_half_work_pp;
+
+	_mali_osk_timer_t           *timeout_timer;
+};
+
+/** @brief Create a new Mali group object
+ *
+ * @return A pointer to a new group object
+ */
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+				     struct mali_dlbu_core *dlbu,
+				     struct mali_bcast_unit *bcast,
+				     u32 domain_index);
+
+void mali_group_dump_status(struct mali_group *group);
+
+void mali_group_delete(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group,
+		struct mali_mmu_core *mmu_core);
+void mali_group_remove_mmu_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group,
+		struct mali_gp_core *gp_core);
+void mali_group_remove_gp_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group,
+		struct mali_pp_core *pp_core);
+void mali_group_remove_pp_core(struct mali_group *group);
+
+MALI_STATIC_INLINE const char *mali_group_core_description(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	if (group->pp_core != NULL) {
+		return mali_pp_core_description(group->pp_core);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+		return mali_gp_core_description(group->gp_core);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_is_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (group->dlbu_core != NULL);
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Check if a group is a part of a virtual group or not
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_in_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (group->parent_group != NULL) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Reset group
+ *
+ * This function will reset the entire group,
+ * including all the cores present in the group.
+ *
+ * @param group Pointer to the group to reset
+ */
+void mali_group_reset(struct mali_group *group);
+
+MALI_STATIC_INLINE struct mali_session_data *mali_group_get_session(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return group->session;
+}
+
+MALI_STATIC_INLINE void mali_group_clear_session(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (group->session != NULL) {
+		mali_mmu_activate_empty_page_directory(group->mmu);
+		group->session = NULL;
+	}
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group);
+
+/*
+ * Change state from ACTIVATION_PENDING to ACTIVE
+ * For virtual group, all childs need to be ACTIVE first
+ */
+mali_bool mali_group_set_active(struct mali_group *group);
+
+/*
+ * @return MALI_TRUE means one or more domains can now be powered off,
+ * and caller should call either mali_pm_update_async() or
+ * mali_pm_update_sync() in order to do so.
+ */
+mali_bool mali_group_deactivate(struct mali_group *group);
+
+MALI_STATIC_INLINE enum mali_group_state mali_group_get_state(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->state;
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_power_is_on(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->power_is_on;
+}
+
+void mali_group_power_up(struct mali_group *group);
+void mali_group_power_down(struct mali_group *group);
+
+MALI_STATIC_INLINE void mali_group_set_disable_request(
+	struct mali_group *group, mali_bool disable)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	group->disable_requested = disable;
+
+	/**
+	 * When one of child group's disable_requeset is set TRUE, then
+	 * the disable_request of parent group should also be set to TRUE.
+	 * While, the disable_request of parent group should only be set to FALSE
+	 * only when all of its child group's disable_request are set to FALSE.
+	 */
+	if (NULL != group->parent_group && MALI_TRUE == disable) {
+		group->parent_group->disable_requested = disable;
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_disable_requested(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->disable_requested;
+}
+
+/** @brief Virtual groups */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child);
+struct mali_group *mali_group_acquire_group(struct mali_group *parent);
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child);
+
+/** @brief Checks if the group is working.
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_working(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	if (mali_group_is_in_virtual(group)) {
+		struct mali_group *tmp_group = mali_executor_get_virtual_group();
+
+		return tmp_group->is_working;
+	}
+	return group->is_working;
+}
+
+MALI_STATIC_INLINE struct mali_gp_job *mali_group_get_running_gp_job(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->gp_running_job;
+}
+
+/** @brief Zap MMU TLB on all groups
+ *
+ * Zap TLB on group if \a session is active.
+ */
+mali_bool mali_group_zap_session(struct mali_group *group,
+				 struct mali_session_data *session);
+
+/** @brief Get pointer to GP core object
+ */
+MALI_STATIC_INLINE struct mali_gp_core *mali_group_get_gp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->gp_core;
+}
+
+/** @brief Get pointer to PP core object
+ */
+MALI_STATIC_INLINE struct mali_pp_core *mali_group_get_pp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->pp_core;
+}
+
+/** @brief Start GP job
+ */
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job, mali_bool gpu_secure_mode_pre_enabled);
+
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job, mali_bool gpu_secure_mode_pre_enabled);
+
+/** @brief Start virtual group Job on a virtual group
+*/
+void mali_group_start_job_on_virtual(struct mali_group *group, struct mali_pp_job *job, u32 first_subjob, u32 last_subjob);
+
+
+/** @brief Start a subjob from a particular on a specific PP group
+*/
+void mali_group_start_job_on_group(struct mali_group *group, struct mali_pp_job *job, u32 subjob);
+
+
+/** @brief remove all the unused groups in tmp_unused group  list, so that the group is in consistent status.
+ */
+void mali_group_non_dlbu_job_done_virtual(struct mali_group *group);
+
+
+/** @brief Resume GP job that suspended waiting for more heap memory
+ */
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_get_interrupt_result(group->gp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_get_interrupt_result(group->pp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_mmu_get_interrupt_result(group->mmu);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_gp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_is_active(group->gp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_pp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_is_active(group->pp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_has_timed_out(struct mali_group *group)
+{
+	unsigned long time_cost;
+	struct mali_group *tmp_group = group;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* if the group is in virtual need to use virtual_group's start time */
+	if (mali_group_is_in_virtual(group)) {
+		tmp_group = mali_executor_get_virtual_group();
+	}
+
+	time_cost = _mali_osk_time_tickcount() - tmp_group->start_time;
+	if (_mali_osk_time_mstoticks(mali_max_job_runtime) <= time_cost) {
+		/*
+		 * current tick is at or after timeout end time,
+		 * so this is a valid timeout
+		 */
+		return MALI_TRUE;
+	} else {
+		/*
+		 * Not a valid timeout. A HW interrupt probably beat
+		 * us to it, and the timer wasn't properly deleted
+		 * (async deletion used due to atomic context).
+		 */
+		return MALI_FALSE;
+	}
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_mask_all_interrupts(group->gp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_mask_all_interrupts(group->pp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_enable_interrupts_gp(
+	struct mali_group *group,
+	enum mali_interrupt_result exceptions)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	mali_gp_enable_interrupts(group->gp_core, exceptions);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_gp);
+}
+
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_pp);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_mmu);
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job);
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success);
+
+#if defined(CONFIG_MALI400_PROFILING)
+MALI_STATIC_INLINE void mali_group_oom(struct mali_group *group)
+{
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+				      MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+				      0, 0, 0, 0, 0);
+}
+#endif
+
+struct mali_group *mali_group_get_glob_group(u32 index);
+u32 mali_group_get_glob_num_groups(void);
+
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size);
+
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data);
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data);
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data);
+
+MALI_STATIC_INLINE mali_bool mali_group_is_empty(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(mali_group_is_virtual(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return _mali_osk_list_empty(&group->group_list);
+}
+
+#endif /* __MALI_GROUP_H__ */
diff --git a/utgard/r7p0/common/mali_hw_core.c b/utgard/r7p0/common/mali_hw_core.c
new file mode 100644
index 0000000..3ee81c1
--- /dev/null
+++ b/utgard/r7p0/common/mali_hw_core.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_osk_mali.h"
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size)
+{
+	core->phys_addr = resource->base;
+	core->phys_offset = resource->base - _mali_osk_resource_base_address();
+	core->description = resource->description;
+	core->size = reg_size;
+
+	MALI_DEBUG_ASSERT(core->phys_offset < core->phys_addr);
+
+	if (_mali_osk_mem_reqregion(core->phys_addr, core->size, core->description) == _MALI_OSK_ERR_OK) {
+		core->mapped_registers = _mali_osk_mem_mapioregion(core->phys_addr, core->size, core->description);
+		if (core->mapped_registers != NULL) {
+			return _MALI_OSK_ERR_OK;
+		} else {
+			MALI_PRINT_ERROR(("Failed to map memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+		}
+		_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+	} else {
+		MALI_PRINT_ERROR(("Failed to request memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_hw_core_delete(struct mali_hw_core *core)
+{
+	if (core->mapped_registers != NULL) {
+		_mali_osk_mem_unmapioregion(core->phys_addr, core->size, core->mapped_registers);
+		core->mapped_registers = NULL;
+	}
+	_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+}
diff --git a/utgard/r7p0/common/mali_hw_core.h b/utgard/r7p0/common/mali_hw_core.h
new file mode 100644
index 0000000..fc0ace6
--- /dev/null
+++ b/utgard/r7p0/common/mali_hw_core.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_HW_CORE_H__
+#define __MALI_HW_CORE_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * The common parts for all Mali HW cores (GP, PP, MMU, L2 and PMU)
+ * This struct is embedded inside all core specific structs.
+ */
+struct mali_hw_core {
+	uintptr_t phys_addr;              /**< Physical address of the registers */
+	u32 phys_offset;                  /**< Offset from start of Mali to registers */
+	u32 size;                         /**< Size of registers */
+	mali_io_address mapped_registers; /**< Virtual mapping of the registers */
+	const char *description;          /**< Name of unit (as specified in device configuration) */
+};
+
+#define MALI_REG_POLL_COUNT_FAST 1000000
+#define MALI_REG_POLL_COUNT_SLOW 1000000
+
+/*
+ * GP and PP core translate their int_stat/rawstat into one of these
+ */
+enum mali_interrupt_result {
+	MALI_INTERRUPT_RESULT_NONE,
+	MALI_INTERRUPT_RESULT_SUCCESS,
+	MALI_INTERRUPT_RESULT_SUCCESS_VS,
+	MALI_INTERRUPT_RESULT_SUCCESS_PLBU,
+	MALI_INTERRUPT_RESULT_OOM,
+	MALI_INTERRUPT_RESULT_ERROR
+};
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size);
+void mali_hw_core_delete(struct mali_hw_core *core);
+
+MALI_STATIC_INLINE u32 mali_hw_core_register_read(struct mali_hw_core *core, u32 relative_address)
+{
+	u32 read_val;
+
+	read_val = _mali_osk_mem_ioread32(core->mapped_registers, relative_address);
+	MALI_DEBUG_PRINT(6, ("register_read for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, read_val));
+	return read_val;
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+}
+
+/* Conditionally write a register.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 new_val, const u32 old_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	if (old_val != new_val) {
+		_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+	}
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32(core->mapped_registers, relative_address, new_val);
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs)
+{
+	u32 i;
+
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+	}
+}
+
+/* Conditionally write a set of registers.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs, const u32 *old_array)
+{
+	u32 i;
+
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		if (old_array[i] != write_array[i]) {
+			mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+		}
+	}
+}
+
+#endif /* __MALI_HW_CORE_H__ */
diff --git a/utgard/r7p0/common/mali_kernel_common.h b/utgard/r7p0/common/mali_kernel_common.h
new file mode 100644
index 0000000..76ebe1a
--- /dev/null
+++ b/utgard/r7p0/common/mali_kernel_common.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_COMMON_H__
+#define __MALI_KERNEL_COMMON_H__
+
+#include "mali_osk.h"
+
+/* Make sure debug is defined when it should be */
+#ifndef DEBUG
+#if defined(_DEBUG)
+#define DEBUG
+#endif
+#endif
+
+/* The file include several useful macros for error checking, debugging and printing.
+ * - MALI_PRINTF(...)           Do not use this function: Will be included in Release builds.
+ * - MALI_DEBUG_PRINT(nr, (X) ) Prints the second argument if nr<=MALI_DEBUG_LEVEL.
+ * - MALI_DEBUG_ERROR( (X) )    Prints an errortext, a source trace, and the given error message.
+ * - MALI_DEBUG_ASSERT(exp,(X)) If the asserted expr is false, the program will exit.
+ * - MALI_DEBUG_ASSERT_POINTER(pointer)  Triggers if the pointer is a zero pointer.
+ * - MALI_DEBUG_CODE( X )       The code inside the macro is only compiled in Debug builds.
+ *
+ * The (X) means that you must add an extra parenthesis around the argumentlist.
+ *
+ * The  printf function: MALI_PRINTF(...) is routed to _mali_osk_debugmsg
+ *
+ * Suggested range for the DEBUG-LEVEL is [1:6] where
+ * [1:2] Is messages with highest priority, indicate possible errors.
+ * [3:4] Is messages with medium priority, output important variables.
+ * [5:6] Is messages with low priority, used during extensive debugging.
+ */
+
+/**
+*  Fundamental error macro. Reports an error code. This is abstracted to allow us to
+*  easily switch to a different error reporting method if we want, and also to allow
+*  us to search for error returns easily.
+*
+*  Note no closing semicolon - this is supplied in typical usage:
+*
+*  MALI_ERROR(MALI_ERROR_OUT_OF_MEMORY);
+*/
+#define MALI_ERROR(error_code) return (error_code)
+
+/**
+ *  Basic error macro, to indicate success.
+ *  Note no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_SUCCESS;
+ */
+#define MALI_SUCCESS MALI_ERROR(_MALI_OSK_ERR_OK)
+
+/**
+ *  Basic error macro. This checks whether the given condition is true, and if not returns
+ *  from this function with the supplied error code. This is a macro so that we can override it
+ *  for stress testing.
+ *
+ *  Note that this uses the do-while-0 wrapping to ensure that we don't get problems with dangling
+ *  else clauses. Note also no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_CHECK((p!=NULL), ERROR_NO_OBJECT);
+ */
+#define MALI_CHECK(condition, error_code) do { if (!(condition)) MALI_ERROR(error_code); } while(0)
+
+/**
+ *  Error propagation macro. If the expression given is anything other than
+ *  _MALI_OSK_NO_ERROR, then the value is returned from the enclosing function
+ *  as an error code. This effectively acts as a guard clause, and propagates
+ *  error values up the call stack. This uses a temporary value to ensure that
+ *  the error expression is not evaluated twice.
+ *  If the counter for forcing a failure has been set using _mali_force_error,
+ *  this error will be returned without evaluating the expression in
+ *  MALI_CHECK_NO_ERROR
+ */
+#define MALI_CHECK_NO_ERROR(expression) \
+	do { _mali_osk_errcode_t _check_no_error_result = (expression); \
+		if (_check_no_error_result != _MALI_OSK_ERR_OK) \
+			MALI_ERROR(_check_no_error_result); \
+	} while (0)
+
+/**
+ *  Pointer check macro. Checks non-null pointer.
+ */
+#define MALI_CHECK_NON_NULL(pointer, error_code) MALI_CHECK(((pointer) != NULL), (error_code))
+
+/**
+ *  Error macro with goto. This checks whether the given condition is true, and if not jumps
+ *  to the specified label using a goto. The label must therefore be local to the function in
+ *  which this macro appears. This is most usually used to execute some clean-up code before
+ *  exiting with a call to ERROR.
+ *
+ *  Like the other macros, this is a macro to allow us to override the condition if we wish,
+ *  e.g. to force an error during stress testing.
+ */
+#define MALI_CHECK_GOTO(condition, label) do { if (!(condition)) goto label; } while(0)
+
+/**
+ *  Explicitly ignore a parameter passed into a function, to suppress compiler warnings.
+ *  Should only be used with parameter names.
+ */
+#define MALI_IGNORE(x) x = x
+
+#if defined(CONFIG_MALI_QUIET)
+#define MALI_PRINTF(args)
+#else
+#define MALI_PRINTF(args) _mali_osk_dbgmsg args;
+#endif
+
+#define MALI_PRINT_ERROR(args) do { \
+		MALI_PRINTF(("Mali: ERR: %s\n", __FILE__)); \
+		MALI_PRINTF(("           %s()%4d\n           ", __func__, __LINE__)) ; \
+		MALI_PRINTF(args); \
+		MALI_PRINTF(("\n")); \
+	} while (0)
+
+#define MALI_PRINT(args) do { \
+		MALI_PRINTF(("Mali: ")); \
+		MALI_PRINTF(args); \
+	} while (0)
+
+#ifdef DEBUG
+#ifndef mali_debug_level
+extern int mali_debug_level;
+#endif
+
+#define MALI_DEBUG_CODE(code) code
+#define MALI_DEBUG_PRINT(level, args)  do { \
+		if ((level) <=  mali_debug_level)\
+		{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } \
+	} while (0)
+
+#define MALI_DEBUG_PRINT_ERROR(args) MALI_PRINT_ERROR(args)
+
+#define MALI_DEBUG_PRINT_IF(level, condition, args)  \
+	if ((condition) && ((level) <=  mali_debug_level))\
+	{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+#define MALI_DEBUG_PRINT_ELSE(level, args)\
+	else if ((level) <=  mali_debug_level)\
+	{ MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+/**
+ * @note these variants of DEBUG ASSERTS will cause a debugger breakpoint
+ * to be entered (see _mali_osk_break() ). An alternative would be to call
+ * _mali_osk_abort(), on OSs that support it.
+ */
+#define MALI_DEBUG_PRINT_ASSERT(condition, args) do  {if (!(condition)) { MALI_PRINT_ERROR(args); _mali_osk_break(); } } while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do  {if ((pointer) == NULL) {MALI_PRINT_ERROR(("NULL pointer " #pointer)); _mali_osk_break(); } } while(0)
+#define MALI_DEBUG_ASSERT(condition) do  {if (!(condition)) {MALI_PRINT_ERROR(("ASSERT failed: " #condition)); _mali_osk_break(); } } while(0)
+
+#else /* DEBUG */
+
+#define MALI_DEBUG_CODE(code)
+#define MALI_DEBUG_PRINT(string, args) do {} while (0)
+#define MALI_DEBUG_PRINT_ERROR(args) do {} while (0)
+#define MALI_DEBUG_PRINT_IF(level, condition, args) do {} while (0)
+#define MALI_DEBUG_PRINT_ELSE(level, condition, args) do {} while (0)
+#define MALI_DEBUG_PRINT_ASSERT(condition, args) do {} while (0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do {} while (0)
+#define MALI_DEBUG_ASSERT(condition) do {} while (0)
+
+#endif /* DEBUG */
+
+/**
+ * variables from user space cannot be dereferenced from kernel space; tagging them
+ * with __user allows the GCC compiler to generate a warning. Other compilers may
+ * not support this so we define it here as an empty macro if the compiler doesn't
+ * define it.
+ */
+#ifndef __user
+#define __user
+#endif
+
+#endif /* __MALI_KERNEL_COMMON_H__ */
diff --git a/utgard/r7p0/common/mali_kernel_core.c b/utgard/r7p0/common/mali_kernel_core.c
new file mode 100644
index 0000000..e4d3558
--- /dev/null
+++ b/utgard/r7p0/common/mali_kernel_core.c
@@ -0,0 +1,1346 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_kernel_core.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_mmu.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_utilization.h"
+#include "mali_l2_cache.h"
+#include "mali_timeline.h"
+#include "mali_soft_job.h"
+#include "mali_pm_domain.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#include "mali_control_timer.h"
+#include "mali_dvfs_policy.h"
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#include <linux/fence.h>
+#endif
+
+#define MALI_SHARED_MEMORY_DEFAULT_SIZE 0xffffffff
+
+/* Mali GPU memory. Real values come from module parameter or from device specific data */
+unsigned int mali_dedicated_mem_start;
+unsigned int mali_dedicated_mem_size;
+
+/* Default shared memory size is set to 4G. */
+unsigned int mali_shared_mem_size = MALI_SHARED_MEMORY_DEFAULT_SIZE;
+
+/* Frame buffer memory to be accessible by Mali GPU */
+int mali_fb_start;
+int mali_fb_size;
+
+/* Mali max job runtime */
+extern int mali_max_job_runtime;
+
+/** Start profiling from module load? */
+int mali_boot_profiling;
+
+/** Limits for the number of PP cores behind each L2 cache. */
+int mali_max_pp_cores_group_1 = 0xFF;
+int mali_max_pp_cores_group_2 = 0xFF;
+
+int mali_inited_pp_cores_group_1;
+int mali_inited_pp_cores_group_2;
+
+static _mali_product_id_t global_product_id = _MALI_PRODUCT_ID_UNKNOWN;
+static uintptr_t global_gpu_base_address;
+static u32 global_gpu_major_version;
+static u32 global_gpu_minor_version;
+
+mali_bool mali_gpu_class_is_mali450 = MALI_FALSE;
+mali_bool mali_gpu_class_is_mali470 = MALI_FALSE;
+
+static _mali_osk_errcode_t mali_set_global_gpu_base_address(void)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	global_gpu_base_address = _mali_osk_resource_base_address();
+	if (global_gpu_base_address == 0) {
+		err = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return err;
+}
+
+static u32 mali_get_bcast_id(_mali_osk_resource_t *resource_pp)
+{
+	switch (resource_pp->base - global_gpu_base_address) {
+	case 0x08000:
+	case 0x20000: /* fall-through for aliased mapping */
+		return 0x01;
+	case 0x0A000:
+	case 0x22000: /* fall-through for aliased mapping */
+		return 0x02;
+	case 0x0C000:
+	case 0x24000: /* fall-through for aliased mapping */
+		return 0x04;
+	case 0x0E000:
+	case 0x26000: /* fall-through for aliased mapping */
+		return 0x08;
+	case 0x28000:
+		return 0x10;
+	case 0x2A000:
+		return 0x20;
+	case 0x2C000:
+		return 0x40;
+	case 0x2E000:
+		return 0x80;
+	default:
+		return 0;
+	}
+}
+
+static _mali_osk_errcode_t mali_parse_product_info(void)
+{
+	_mali_osk_resource_t first_pp_resource;
+
+	/* Find the first PP core resource (again) */
+	if (_mali_osk_resource_find(MALI_OFFSET_PP0, &first_pp_resource) == _MALI_OSK_ERR_OK) {
+		/* Create a dummy PP object for this core so that we can read the version register */
+		struct mali_group *group = mali_group_create(NULL, NULL, NULL, MALI_DOMAIN_INDEX_PP0);
+
+		if (group != NULL) {
+			struct mali_pp_core *pp_core = mali_pp_create(&first_pp_resource, group, MALI_FALSE, mali_get_bcast_id(&first_pp_resource));
+
+			if (pp_core != NULL) {
+				u32 pp_version;
+
+				pp_version = mali_pp_core_get_version(pp_core);
+
+				mali_group_delete(group);
+
+				global_gpu_major_version = (pp_version >> 8) & 0xFF;
+				global_gpu_minor_version = pp_version & 0xFF;
+
+				switch (pp_version >> 16) {
+				case MALI200_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI200;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-200 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					MALI_PRINT_ERROR(("Mali-200 is not supported by this driver.\n"));
+					_mali_osk_abort();
+					break;
+				case MALI300_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI300;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-300 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI400_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI400;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-400 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI450_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI450;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-450 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI470_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI470;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-470 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				default:
+					MALI_DEBUG_PRINT(2, ("Found unknown Mali GPU (r%up%u)\n", global_gpu_major_version, global_gpu_minor_version));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_PRINT_ERROR(("Failed to create initial PP object\n"));
+			}
+		} else {
+			MALI_PRINT_ERROR(("Failed to create initial group object\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("First PP core not specified in config file\n"));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+static void mali_delete_groups(void)
+{
+	struct mali_group *group;
+
+	group = mali_group_get_glob_group(0);
+	while (group != NULL) {
+		mali_group_delete(group);
+		group = mali_group_get_glob_group(0);
+	}
+
+	MALI_DEBUG_ASSERT(mali_group_get_glob_num_groups() == 0);
+}
+
+static void mali_delete_l2_cache_cores(void)
+{
+	struct mali_l2_cache_core *l2;
+
+	l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	while (l2 != NULL) {
+		mali_l2_cache_delete(l2);
+		l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	}
+
+	MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() == 0);
+}
+
+static struct mali_l2_cache_core *mali_create_l2_cache_core(_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (resource != NULL) {
+
+		MALI_DEBUG_PRINT(3, ("Found L2 cache %s\n", resource->description));
+
+		l2_cache = mali_l2_cache_create(resource, domain_index);
+		if (l2_cache == NULL) {
+			MALI_PRINT_ERROR(("Failed to create L2 cache object\n"));
+			return NULL;
+		}
+	}
+	MALI_DEBUG_PRINT(3, ("Created L2 cache core object\n"));
+
+	return l2_cache;
+}
+
+static _mali_osk_errcode_t mali_parse_config_l2_cache(void)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (mali_is_mali400()) {
+		_mali_osk_resource_t l2_resource;
+
+		if (_mali_osk_resource_find(MALI400_OFFSET_L2_CACHE0, &l2_resource) != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		l2_cache = mali_create_l2_cache_core(&l2_resource, MALI_DOMAIN_INDEX_L20);
+		if (l2_cache == NULL) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else if (mali_is_mali450()) {
+		/*
+		 * L2 for GP    at 0x10000
+		 * L2 for PP0-3 at 0x01000
+		 * L2 for PP4-7 at 0x11000 (optional)
+		 */
+
+		_mali_osk_resource_t l2_gp_resource;
+		_mali_osk_resource_t l2_pp_grp0_resource;
+		_mali_osk_resource_t l2_pp_grp1_resource;
+
+		/* Make cluster for GP's L2 */
+		if (_mali_osk_resource_find(MALI450_OFFSET_L2_CACHE0, &l2_gp_resource) == _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for GP\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_gp_resource, MALI_DOMAIN_INDEX_L20);
+			if (l2_cache == NULL) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for GP in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Find corresponding l2 domain */
+		if (_mali_osk_resource_find(MALI450_OFFSET_L2_CACHE1, &l2_pp_grp0_resource) == _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 0\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp0_resource, MALI_DOMAIN_INDEX_L21);
+			if (l2_cache == NULL) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for PP group 0 in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Second PP core group is optional, don't fail if we don't find it */
+		if (_mali_osk_resource_find(MALI450_OFFSET_L2_CACHE2, &l2_pp_grp1_resource) == _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp1_resource, MALI_DOMAIN_INDEX_L22);
+			if (l2_cache == NULL) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		}
+	} else if (mali_is_mali470()) {
+		_mali_osk_resource_t l2c1_resource;
+
+		/* Make cluster for L2C1 */
+		if (_mali_osk_resource_find(MALI470_OFFSET_L2_CACHE1, &l2c1_resource) == _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-470 L2 cache 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2c1_resource, MALI_DOMAIN_INDEX_L21);
+			if (l2_cache == NULL) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for L2C1\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static struct mali_group *mali_create_group(struct mali_l2_cache_core *cache,
+		_mali_osk_resource_t *resource_mmu,
+		_mali_osk_resource_t *resource_gp,
+		_mali_osk_resource_t *resource_pp,
+		u32 domain_index)
+{
+	struct mali_mmu_core *mmu;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(3, ("Starting new group for MMU %s\n", resource_mmu->description));
+
+	/* Create the group object */
+	group = mali_group_create(cache, NULL, NULL, domain_index);
+	if (group == NULL) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU %s\n", resource_mmu->description));
+		return NULL;
+	}
+
+	/* Create the MMU object inside group */
+	mmu = mali_mmu_create(resource_mmu, group, MALI_FALSE);
+	if (mmu == NULL) {
+		MALI_PRINT_ERROR(("Failed to create MMU object\n"));
+		mali_group_delete(group);
+		return NULL;
+	}
+
+	if (resource_gp != NULL) {
+		/* Create the GP core object inside this group */
+		struct mali_gp_core *gp_core = mali_gp_create(resource_gp, group);
+
+		if (gp_core == NULL) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create GP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	if (resource_pp != NULL) {
+		struct mali_pp_core *pp_core;
+
+		/* Create the PP core object inside this group */
+		pp_core = mali_pp_create(resource_pp, group, MALI_FALSE, mali_get_bcast_id(resource_pp));
+		if (pp_core == NULL) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create PP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	return group;
+}
+
+static _mali_osk_errcode_t mali_create_virtual_group(_mali_osk_resource_t *resource_mmu_pp_bcast,
+		_mali_osk_resource_t *resource_pp_bcast,
+		_mali_osk_resource_t *resource_dlbu,
+		_mali_osk_resource_t *resource_bcast)
+{
+	struct mali_mmu_core *mmu_pp_bcast_core;
+	struct mali_pp_core *pp_bcast_core;
+	struct mali_dlbu_core *dlbu_core;
+	struct mali_bcast_unit *bcast_core;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(2, ("Starting new virtual group for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+
+	/* Create the DLBU core object */
+	dlbu_core = mali_dlbu_create(resource_dlbu);
+	if (dlbu_core == NULL) {
+		MALI_PRINT_ERROR(("Failed to create DLBU object\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the Broadcast unit core */
+	bcast_core = mali_bcast_unit_create(resource_bcast);
+	if (bcast_core == NULL) {
+		MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the group object */
+#if defined(DEBUG)
+	/* Get a physical PP group to temporarily add to broadcast unit.  IRQ
+	 * verification needs a physical group in the broadcast unit to test
+	 * the broadcast unit interrupt line. */
+	{
+		struct mali_group *phys_group = NULL;
+		int i;
+
+		for (i = 0; i < mali_group_get_glob_num_groups(); i++) {
+			phys_group = mali_group_get_glob_group(i);
+			if (mali_group_get_pp_core(phys_group) != NULL) break;
+		}
+		MALI_DEBUG_ASSERT(mali_group_get_pp_core(phys_group) != NULL);
+
+		/* Add the group temporarily to the broadcast, and update the
+		 * broadcast HW. Since the HW is not updated when removing the
+		 * group the IRQ check will work when the virtual PP is created
+		 * later.
+		 *
+		 * When the virtual group gets populated, the actually used
+		 * groups will be added to the broadcast unit and the HW will
+		 * be updated.
+		 */
+		mali_bcast_add_group(bcast_core, phys_group);
+		mali_bcast_reset(bcast_core);
+		mali_bcast_remove_group(bcast_core, phys_group);
+	}
+#endif /* DEBUG */
+	group = mali_group_create(NULL, dlbu_core, bcast_core, MALI_DOMAIN_INDEX_DUMMY);
+	if (group == NULL) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+		mali_bcast_unit_delete(bcast_core);
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the MMU object inside group */
+	mmu_pp_bcast_core = mali_mmu_create(resource_mmu_pp_bcast, group, MALI_TRUE);
+	if (mmu_pp_bcast_core == NULL) {
+		MALI_PRINT_ERROR(("Failed to create MMU PP broadcast object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the PP core object inside this group */
+	pp_bcast_core = mali_pp_create(resource_pp_bcast, group, MALI_TRUE, 0);
+	if (pp_bcast_core == NULL) {
+		/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+		MALI_PRINT_ERROR(("Failed to create PP object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_groups(void)
+{
+	struct mali_group *group;
+	int cluster_id_gp = 0;
+	int cluster_id_pp_grp0 = 0;
+	int cluster_id_pp_grp1 = 0;
+	int i;
+
+	_mali_osk_resource_t resource_gp;
+	_mali_osk_resource_t resource_gp_mmu;
+	_mali_osk_resource_t resource_pp[8];
+	_mali_osk_resource_t resource_pp_mmu[8];
+	_mali_osk_resource_t resource_pp_mmu_bcast;
+	_mali_osk_resource_t resource_pp_bcast;
+	_mali_osk_resource_t resource_dlbu;
+	_mali_osk_resource_t resource_bcast;
+	_mali_osk_errcode_t resource_gp_found;
+	_mali_osk_errcode_t resource_gp_mmu_found;
+	_mali_osk_errcode_t resource_pp_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_bcast_found;
+	_mali_osk_errcode_t resource_pp_bcast_found;
+	_mali_osk_errcode_t resource_dlbu_found;
+	_mali_osk_errcode_t resource_bcast_found;
+
+	if (!(mali_is_mali400() || mali_is_mali450() || mali_is_mali470())) {
+		/* No known HW core */
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (mali_max_job_runtime == MALI_MAX_JOB_RUNTIME_DEFAULT) {
+		/* Group settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_mali_osk_device_data_get(&data) == _MALI_OSK_ERR_OK) {
+			/* Use device specific settings (if defined) */
+			if (data.max_job_runtime != 0) {
+				mali_max_job_runtime = data.max_job_runtime;
+			}
+		}
+	}
+
+	if (mali_is_mali450()) {
+		/* Mali-450 have separate L2s for GP, and PP core group(s) */
+		cluster_id_pp_grp0 = 1;
+		cluster_id_pp_grp1 = 2;
+	}
+
+	resource_gp_found = _mali_osk_resource_find(MALI_OFFSET_GP, &resource_gp);
+	resource_gp_mmu_found = _mali_osk_resource_find(MALI_OFFSET_GP_MMU, &resource_gp_mmu);
+	resource_pp_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0, &(resource_pp[0]));
+	resource_pp_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1, &(resource_pp[1]));
+	resource_pp_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2, &(resource_pp[2]));
+	resource_pp_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3, &(resource_pp[3]));
+	resource_pp_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4, &(resource_pp[4]));
+	resource_pp_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5, &(resource_pp[5]));
+	resource_pp_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6, &(resource_pp[6]));
+	resource_pp_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7, &(resource_pp[7]));
+	resource_pp_mmu_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0_MMU, &(resource_pp_mmu[0]));
+	resource_pp_mmu_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1_MMU, &(resource_pp_mmu[1]));
+	resource_pp_mmu_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2_MMU, &(resource_pp_mmu[2]));
+	resource_pp_mmu_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3_MMU, &(resource_pp_mmu[3]));
+	resource_pp_mmu_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4_MMU, &(resource_pp_mmu[4]));
+	resource_pp_mmu_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5_MMU, &(resource_pp_mmu[5]));
+	resource_pp_mmu_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6_MMU, &(resource_pp_mmu[6]));
+	resource_pp_mmu_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7_MMU, &(resource_pp_mmu[7]));
+
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		resource_bcast_found = _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast);
+		resource_dlbu_found = _mali_osk_resource_find(MALI_OFFSET_DLBU, &resource_dlbu);
+		resource_pp_mmu_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST_MMU, &resource_pp_mmu_bcast);
+		resource_pp_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST, &resource_pp_bcast);
+
+		if (_MALI_OSK_ERR_OK != resource_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_dlbu_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_mmu_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_bcast_found) {
+			/* Missing mandatory core(s) for Mali-450 or Mali-470 */
+			MALI_DEBUG_PRINT(2, ("Missing mandatory resources, Mali-450 needs DLBU, Broadcast unit, virtual PP core and virtual MMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK != resource_gp_found ||
+	    _MALI_OSK_ERR_OK != resource_gp_mmu_found ||
+	    _MALI_OSK_ERR_OK != resource_pp_found[0] ||
+	    _MALI_OSK_ERR_OK != resource_pp_mmu_found[0]) {
+		/* Missing mandatory core(s) */
+		MALI_DEBUG_PRINT(2, ("Missing mandatory resource, need at least one GP and one PP, both with a separate MMU\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= 1);
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_gp), &resource_gp_mmu, &resource_gp, NULL, MALI_DOMAIN_INDEX_GP);
+	if (group == NULL) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create group for first (and mandatory) PP core */
+	MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= (cluster_id_pp_grp0 + 1)); /* >= 1 on Mali-300 and Mali-400, >= 2 on Mali-450 */
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[0], NULL, &resource_pp[0], MALI_DOMAIN_INDEX_PP0);
+	if (group == NULL) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_inited_pp_cores_group_1++;
+
+	/* Create groups for rest of the cores in the first PP core group */
+	for (i = 1; i < 4; i++) { /* First half of the PP cores belong to first core group */
+		if (mali_inited_pp_cores_group_1 < mali_max_pp_cores_group_1) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (group == NULL) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_1++;
+			}
+		}
+	}
+
+	/* Create groups for cores in the second PP core group */
+	for (i = 4; i < 8; i++) { /* Second half of the PP cores belong to second core group */
+		if (mali_inited_pp_cores_group_2 < mali_max_pp_cores_group_2) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= 2); /* Only Mali-450 have a second core group */
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp1), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (group == NULL) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_2++;
+			}
+		}
+	}
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		_mali_osk_errcode_t err = mali_create_virtual_group(&resource_pp_mmu_bcast, &resource_pp_bcast, &resource_dlbu, &resource_bcast);
+
+		if (err != _MALI_OSK_ERR_OK) {
+			return err;
+		}
+	}
+
+	mali_max_pp_cores_group_1 = mali_inited_pp_cores_group_1;
+	mali_max_pp_cores_group_2 = mali_inited_pp_cores_group_2;
+	MALI_DEBUG_PRINT(2, ("%d+%d PP cores initialized\n", mali_inited_pp_cores_group_1, mali_inited_pp_cores_group_2));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_check_shared_interrupts(void)
+{
+#if !defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (_mali_osk_shared_interrupts() == MALI_TRUE) {
+		MALI_PRINT_ERROR(("Shared interrupts detected, but driver support is not enabled\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* !defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	/* It is OK to compile support for shared interrupts even if Mali is not using it. */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_pmu(void)
+{
+	_mali_osk_resource_t resource_pmu;
+
+	MALI_DEBUG_ASSERT(global_gpu_base_address != 0);
+
+	if (_mali_osk_resource_find(MALI_OFFSET_PMU, &resource_pmu) == _MALI_OSK_ERR_OK) {
+		struct mali_pmu_core *pmu;
+
+		pmu = mali_pmu_create(&resource_pmu);
+		if (pmu == NULL) {
+			MALI_PRINT_ERROR(("Failed to create PMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	/* It's ok if the PMU doesn't exist */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_memory(void)
+{
+	_mali_osk_device_data data = { 0, };
+	_mali_osk_errcode_t ret;
+
+	/* The priority of setting the value of mali_shared_mem_size,
+	 * mali_dedicated_mem_start and mali_dedicated_mem_size:
+	 * 1. module parameter;
+	 * 2. platform data;
+	 * 3. default value;
+	 **/
+	if (_mali_osk_device_data_get(&data) == _MALI_OSK_ERR_OK) {
+		/* Memory settings are not overridden by module parameters, so use device settings */
+		if (0 == mali_dedicated_mem_start && 0 == mali_dedicated_mem_size) {
+			/* Use device specific settings (if defined) */
+			mali_dedicated_mem_start = data.dedicated_mem_start;
+			mali_dedicated_mem_size = data.dedicated_mem_size;
+		}
+
+		if (MALI_SHARED_MEMORY_DEFAULT_SIZE == mali_shared_mem_size &&
+		    0 != data.shared_mem_size) {
+			mali_shared_mem_size = data.shared_mem_size;
+		}
+	}
+
+	if (0 < mali_dedicated_mem_size && 0 != mali_dedicated_mem_start) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (dedicated: 0x%08X@0x%08X)\n",
+				     mali_dedicated_mem_size, mali_dedicated_mem_start));
+
+		/* Dedicated memory */
+		ret = mali_memory_core_resource_dedicated_memory(mali_dedicated_mem_start, mali_dedicated_mem_size);
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_PRINT_ERROR(("Failed to register dedicated memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (mali_shared_mem_size > 0) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (shared: 0x%08X)\n", mali_shared_mem_size));
+
+		/* Shared OS memory */
+		ret = mali_memory_core_resource_os_memory(mali_shared_mem_size);
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_PRINT_ERROR(("Failed to register shared OS memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 == mali_fb_start && 0 == mali_fb_size) {
+		/* Frame buffer settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_mali_osk_device_data_get(&data) == _MALI_OSK_ERR_OK) {
+			/* Use device specific settings (if defined) */
+			mali_fb_start = data.fb_start;
+			mali_fb_size = data.fb_size;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Using device defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Using module defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	}
+
+	if (mali_fb_size != 0) {
+		/* Register frame buffer */
+		ret = mali_mem_validation_add_range(mali_fb_start, mali_fb_size);
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_PRINT_ERROR(("Failed to register frame buffer memory region\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_detect_gpu_class(void)
+{
+	if (_mali_osk_identify_gpu_resource() == 0x450)
+		mali_gpu_class_is_mali450 = MALI_TRUE;
+
+	if (_mali_osk_identify_gpu_resource() == 0x470)
+		mali_gpu_class_is_mali470 = MALI_TRUE;
+}
+
+static _mali_osk_errcode_t mali_init_hw_reset(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	_mali_osk_resource_t resource_bcast;
+
+	/* Ensure broadcast unit is in a good state before we start creating
+	 * groups and cores.
+	 */
+	if (_mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast) == _MALI_OSK_ERR_OK) {
+		struct mali_bcast_unit *bcast_core;
+
+		bcast_core = mali_bcast_unit_create(&resource_bcast);
+		if (bcast_core == NULL) {
+			MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_bcast_unit_delete(bcast_core);
+	}
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_initialize_subsystems(void)
+{
+	_mali_osk_errcode_t err;
+
+#ifdef CONFIG_MALI_DT
+	err = _mali_osk_resource_initialize();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	mali_pp_job_initialize();
+
+	mali_timeline_initialize();
+
+	err = mali_session_initialize();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/*Try to init gpu secure mode */
+	_mali_osk_gpu_secure_mode_init();
+
+#if defined(CONFIG_MALI400_PROFILING)
+	err = _mali_osk_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (err != _MALI_OSK_ERR_OK) {
+		/* No biggie if we weren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	err = mali_memory_initialize();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_executor_initialize();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_scheduler_initialize();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Configure memory early, needed by mali_mmu_initialize. */
+	err = mali_parse_config_memory();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_set_global_gpu_base_address();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect GPU class (uses L2 cache count) */
+	mali_detect_gpu_class();
+
+	err = mali_check_shared_interrupts();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the MALI PMU (will not touch HW!) */
+	err = mali_parse_config_pmu();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the power management module */
+	err = mali_pm_initialize();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Make sure the entire GPU stays on for the rest of this function */
+	mali_pm_init_begin();
+
+	/* Ensure HW is in a good state before starting to access cores. */
+	err = mali_init_hw_reset();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect which Mali GPU we are dealing with */
+	err = mali_parse_product_info();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* The global_product_id is now populated with the correct Mali GPU */
+
+	/* Start configuring the actual Mali hardware. */
+
+	err = mali_mmu_initialize();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		err = mali_dlbu_initialize();
+		if (err != _MALI_OSK_ERR_OK) {
+			mali_pm_init_end();
+			mali_terminate_subsystems();
+			return err;
+		}
+	}
+
+	err = mali_parse_config_l2_cache();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_parse_config_groups();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Move groups into executor */
+	mali_executor_populate();
+
+	/* Need call after all group has assigned a domain */
+	mali_pm_power_cost_setup();
+
+	/* Initialize the GPU timer */
+	err = mali_control_timer_init();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the GPU utilization tracking */
+	err = mali_utilization_init();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	err = mali_dvfs_policy_init();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	/* Allowing the system to be turned off */
+	mali_pm_init_end();
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+void mali_terminate_subsystems(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(2, ("terminate_subsystems() called\n"));
+
+	mali_utilization_term();
+	mali_control_timer_term();
+
+	mali_executor_depopulate();
+	mali_delete_groups(); /* Delete groups not added to executor */
+	mali_executor_terminate();
+
+	mali_scheduler_terminate();
+	mali_pp_job_terminate();
+	mali_delete_l2_cache_cores();
+	mali_mmu_terminate();
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		mali_dlbu_terminate();
+	}
+
+	mali_pm_terminate();
+
+	if (pmu != NULL) {
+		mali_pmu_delete(pmu);
+	}
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_term();
+#endif
+
+	_mali_osk_gpu_secure_mode_deinit();
+
+	mali_memory_terminate();
+
+	mali_session_terminate();
+
+	mali_timeline_terminate();
+
+	global_gpu_base_address = 0;
+}
+
+_mali_product_id_t mali_kernel_core_get_product_id(void)
+{
+	return global_product_id;
+}
+
+u32 mali_kernel_core_get_gpu_major_version(void)
+{
+	return global_gpu_major_version;
+}
+
+u32 mali_kernel_core_get_gpu_minor_version(void)
+{
+	return global_gpu_minor_version;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (queue == NULL) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		args->type = _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS;
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* receive a notification, might sleep */
+	err = _mali_osk_notification_queue_receive(queue, &notification);
+	if (err != _MALI_OSK_ERR_OK) {
+		MALI_ERROR(err); /* errcode returned, pass on to caller */
+	}
+
+	/* copy the buffer to the user */
+	args->type = (_mali_uk_notification_type)notification->notification_type;
+	_mali_osk_memcpy(&args->data, notification->result_buffer, notification->result_buffer_size);
+
+	/* finished with the notification */
+	_mali_osk_notification_delete(notification);
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args)
+{
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (queue == NULL) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		return _MALI_OSK_ERR_OK;
+	}
+
+	notification = _mali_osk_notification_create(args->type, 0);
+	if (notification == NULL) {
+		MALI_PRINT_ERROR(("Failed to create notification object\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	_mali_osk_notification_queue_send(queue, notification);
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args)
+{
+	wait_queue_head_t *queue;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	queue = mali_session_get_wait_queue();
+
+	/* check pending big job number, might sleep if larger than MAX allowed number */
+	if (wait_event_interruptible(*queue, MALI_MAX_PENDING_BIG_JOB > mali_scheduler_job_gp_big_job_count())) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (!session->use_high_priority_job_queue) {
+		session->use_high_priority_job_queue = MALI_TRUE;
+		MALI_DEBUG_PRINT(2, ("Session 0x%08X with pid %d was granted higher priority.\n", session, _mali_osk_get_pid()));
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_open(void **context)
+{
+	u32 i;
+	struct mali_session_data *session;
+
+	/* allocated struct to track this session */
+	session = (struct mali_session_data *)_mali_osk_calloc(1, sizeof(struct mali_session_data));
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_NOMEM);
+
+	MALI_DEBUG_PRINT(3, ("Session starting\n"));
+
+	/* create a response queue for this session */
+	session->ioctl_queue = _mali_osk_notification_queue_init();
+	if (session->ioctl_queue == NULL) {
+		goto err;
+	}
+
+	/*create a wait queue for this session */
+	session->wait_queue = _mali_osk_wait_queue_init();
+	if (session->wait_queue == NULL) {
+		goto err_wait_queue;
+	}
+
+	session->page_directory = mali_mmu_pagedir_alloc();
+	if (session->page_directory == NULL) {
+		goto err_mmu;
+	}
+
+	if (mali_mmu_pagedir_map(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE) != _MALI_OSK_ERR_OK) {
+		MALI_PRINT_ERROR(("Failed to map DLBU page into session\n"));
+		goto err_mmu;
+	}
+
+	if (mali_dlbu_phys_addr != 0) {
+		mali_mmu_pagedir_update(session->page_directory, MALI_DLBU_VIRT_ADDR, mali_dlbu_phys_addr,
+					_MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+
+	if (mali_memory_session_begin(session) != _MALI_OSK_ERR_OK) {
+		goto err_session;
+	}
+
+	/* Create soft system. */
+	session->soft_job_system = mali_soft_job_system_create(session);
+	if (session->soft_job_system == NULL) {
+		goto err_soft;
+	}
+
+	/* Initialize the dma fence context.*/
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+	session->fence_context = fence_context_alloc(1);
+	_mali_osk_atomic_init(&session->fence_seqno, 0);
+#else
+	MALI_PRINT_ERROR(("The kernel version not support dma fence!\n"));
+	goto err_time_line;
+#endif
+#endif
+
+	/* Create timeline system. */
+	session->timeline_system = mali_timeline_system_create(session);
+	if (session->timeline_system == NULL) {
+		goto err_time_line;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_init(&session->number_of_window_jobs, 0);
+#endif
+
+	_mali_osk_atomic_init(&session->number_of_pp_jobs, 0);
+
+	session->use_high_priority_job_queue = MALI_FALSE;
+
+	/* Initialize list of PP jobs on this session. */
+	_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_list);
+
+	/* Initialize the pp_job_fb_lookup_list array used to quickly lookup jobs from a given frame builder */
+	for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i) {
+		_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_fb_lookup_list[i]);
+	}
+
+	session->pid = _mali_osk_get_pid();
+	session->comm = _mali_osk_get_comm();
+	session->max_mali_mem_allocated_size = 0;
+	for (i = 0; i < MALI_MEM_TYPE_MAX; i++) {
+		atomic_set(&session->mali_mem_array[i], 0);
+	}
+	atomic_set(&session->mali_mem_allocated_pages, 0);
+	*context = (void *)session;
+
+	/* Add session to the list of all sessions. */
+	mali_session_add(session);
+
+	MALI_DEBUG_PRINT(3, ("Session started\n"));
+	return _MALI_OSK_ERR_OK;
+
+err_time_line:
+	mali_soft_job_system_destroy(session->soft_job_system);
+err_soft:
+	mali_memory_session_end(session);
+err_session:
+	mali_mmu_pagedir_free(session->page_directory);
+err_mmu:
+	_mali_osk_wait_queue_term(session->wait_queue);
+err_wait_queue:
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+err:
+	_mali_osk_free(session);
+	MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+}
+
+#if defined(DEBUG)
+/* parameter used for debug */
+extern u32 num_pm_runtime_resume;
+extern u32 num_pm_updates;
+extern u32 num_pm_updates_up;
+extern u32 num_pm_updates_down;
+#endif
+
+_mali_osk_errcode_t _mali_ukk_close(void **context)
+{
+	struct mali_session_data *session;
+
+	MALI_CHECK_NON_NULL(context, _MALI_OSK_ERR_INVALID_ARGS);
+	session = (struct mali_session_data *)*context;
+
+	MALI_DEBUG_PRINT(3, ("Session ending\n"));
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+	MALI_DEBUG_ASSERT_POINTER(session->timeline_system);
+
+	/* Remove session from list of all sessions. */
+	mali_session_remove(session);
+
+	/* This flag is used to prevent queueing of jobs due to activation. */
+	session->is_aborting = MALI_TRUE;
+
+	/* Stop the soft job timer. */
+	mali_timeline_system_stop_timer(session->timeline_system);
+
+	/* Abort queued jobs */
+	mali_scheduler_abort_session(session);
+
+	/* Abort executing jobs */
+	mali_executor_abort_session(session);
+
+	/* Abort the soft job system. */
+	mali_soft_job_system_abort(session->soft_job_system);
+
+	/* Force execution of all pending bottom half processing for GP and PP. */
+	_mali_osk_wq_flush();
+
+	/* The session PP list should now be empty. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_list));
+
+	/* At this point the GP and PP scheduler no longer has any jobs queued or running from this
+	 * session, and all soft jobs in the soft job system has been destroyed. */
+
+	/* Any trackers left in the timeline system are directly or indirectly waiting on external
+	 * sync fences.  Cancel all sync fence waiters to trigger activation of all remaining
+	 * trackers.  This call will sleep until all timelines are empty. */
+	mali_timeline_system_abort(session->timeline_system);
+
+	/* Flush pending work.
+	 * Needed to make sure all bottom half processing related to this
+	 * session has been completed, before we free internal data structures.
+	 */
+	_mali_osk_wq_flush();
+
+	/* Destroy timeline system. */
+	mali_timeline_system_destroy(session->timeline_system);
+	session->timeline_system = NULL;
+
+	/* Destroy soft system. */
+	mali_soft_job_system_destroy(session->soft_job_system);
+	session->soft_job_system = NULL;
+
+	/*Wait for the session job lists become empty.*/
+	_mali_osk_wait_queue_wait_event(session->wait_queue, mali_session_pp_job_is_empty, (void *) session);
+
+	/* Free remaining memory allocated to this session */
+	mali_memory_session_end(session);
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_term(&session->number_of_window_jobs);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_stop_sampling(session->pid);
+#endif
+
+	/* Free session data structures */
+	mali_mmu_pagedir_unmap(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE);
+	mali_mmu_pagedir_free(session->page_directory);
+	_mali_osk_wait_queue_term(session->wait_queue);
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+	_mali_osk_free(session);
+
+	*context = NULL;
+
+	MALI_DEBUG_PRINT(3, ("Session has ended\n"));
+
+#if defined(DEBUG)
+	MALI_DEBUG_PRINT(3, ("Stats: # runtime resumes: %u\n", num_pm_runtime_resume));
+	MALI_DEBUG_PRINT(3, ("       # PM updates: .... %u (up %u, down %u)\n", num_pm_updates, num_pm_updates_up, num_pm_updates_down));
+
+	num_pm_runtime_resume = 0;
+	num_pm_updates = 0;
+	num_pm_updates_up = 0;
+	num_pm_updates_down = 0;
+#endif
+
+	return _MALI_OSK_ERR_OK;
+}
+
+#if MALI_STATE_TRACKING
+u32 _mali_kernel_core_dump_state(char *buf, u32 size)
+{
+	int n = 0; /* Number of bytes written to buf */
+
+	n += mali_scheduler_dump_state(buf + n, size - n);
+	n += mali_executor_dump_state(buf + n, size - n);
+
+	return n;
+}
+#endif
diff --git a/utgard/r7p0/common/mali_kernel_core.h b/utgard/r7p0/common/mali_kernel_core.h
new file mode 100644
index 0000000..7fdf9a7
--- /dev/null
+++ b/utgard/r7p0/common/mali_kernel_core.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_CORE_H__
+#define __MALI_KERNEL_CORE_H__
+
+#include "mali_osk.h"
+
+typedef enum {
+	_MALI_PRODUCT_ID_UNKNOWN,
+	_MALI_PRODUCT_ID_MALI200,
+	_MALI_PRODUCT_ID_MALI300,
+	_MALI_PRODUCT_ID_MALI400,
+	_MALI_PRODUCT_ID_MALI450,
+	_MALI_PRODUCT_ID_MALI470,
+} _mali_product_id_t;
+
+extern mali_bool mali_gpu_class_is_mali450;
+extern mali_bool mali_gpu_class_is_mali470;
+
+_mali_osk_errcode_t mali_initialize_subsystems(void);
+
+void mali_terminate_subsystems(void);
+
+_mali_product_id_t mali_kernel_core_get_product_id(void);
+
+u32 mali_kernel_core_get_gpu_major_version(void);
+
+u32 mali_kernel_core_get_gpu_minor_version(void);
+
+u32 _mali_kernel_core_dump_state(char *buf, u32 size);
+
+MALI_STATIC_INLINE mali_bool mali_is_mali470(void)
+{
+	return mali_gpu_class_is_mali470;
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali450(void)
+{
+	return mali_gpu_class_is_mali450;
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali400(void)
+{
+	if (mali_gpu_class_is_mali450 || mali_gpu_class_is_mali470)
+		return MALI_FALSE;
+
+	return MALI_TRUE;
+}
+#endif /* __MALI_KERNEL_CORE_H__ */
diff --git a/utgard/r7p0/common/mali_kernel_utilization.c b/utgard/r7p0/common/mali_kernel_utilization.c
new file mode 100644
index 0000000..2ec054f
--- /dev/null
+++ b/utgard/r7p0/common/mali_kernel_utilization.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_scheduler.h"
+
+#include "mali_executor.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+/* Thresholds for GP bound detection. */
+#define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
+#define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
+
+static _mali_osk_spinlock_irq_t *utilization_data_lock;
+
+static u32 num_running_gp_cores;
+static u32 num_running_pp_cores;
+
+static u64 work_start_time_gpu;
+static u64 work_start_time_gp;
+static u64 work_start_time_pp;
+static u64 accumulated_work_time_gpu;
+static u64 accumulated_work_time_gp;
+static u64 accumulated_work_time_pp;
+
+static u32 last_utilization_gpu;
+static u32 last_utilization_gp;
+static u32 last_utilization_pp;
+
+void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
+
+/* Define the first timer control timer timeout in milliseconds */
+static u32 mali_control_first_timeout = 100;
+static struct mali_gpu_utilization_data mali_util_data = {0, };
+
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer)
+{
+	u64 time_now;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 work_normalized_gpu;
+	u32 work_normalized_gp;
+	u32 work_normalized_pp;
+	u32 period_normalized;
+	u32 utilization_gpu;
+	u32 utilization_gp;
+	u32 utilization_pp;
+
+	mali_utilization_data_lock();
+
+	time_now = _mali_osk_time_get_ns();
+
+	*time_period = time_now - *start_time;
+
+	if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
+		mali_control_timer_pause();
+		/*
+		 * No work done for this period
+		 * - No need to reschedule timer
+		 * - Report zero usage
+		 */
+		last_utilization_gpu = 0;
+		last_utilization_gp = 0;
+		last_utilization_pp = 0;
+
+		mali_util_data.utilization_gpu = last_utilization_gpu;
+		mali_util_data.utilization_gp = last_utilization_gp;
+		mali_util_data.utilization_pp = last_utilization_pp;
+
+		mali_utilization_data_unlock();
+
+		*need_add_timer = MALI_FALSE;
+
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+
+		MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d\n", last_utilization_gpu));
+		MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d\n", last_utilization_gp));
+		MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d\n", last_utilization_pp));
+
+		return &mali_util_data;
+	}
+
+	/* If we are currently busy, update working period up to now */
+	if (work_start_time_gpu != 0) {
+		accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+		work_start_time_gpu = time_now;
+
+		/* GP and/or PP will also be busy if the GPU is busy at this point */
+
+		if (work_start_time_gp != 0) {
+			accumulated_work_time_gp += (time_now - work_start_time_gp);
+			work_start_time_gp = time_now;
+		}
+
+		if (work_start_time_pp != 0) {
+			accumulated_work_time_pp += (time_now - work_start_time_pp);
+			work_start_time_pp = time_now;
+		}
+	}
+
+	/*
+	 * We have two 64-bit values, a dividend and a divisor.
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 * We shift the dividend up and possibly the divisor down, making the result X in 256.
+	 */
+
+	/* Shift the 64-bit values down so they fit inside a 32-bit integer */
+	leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32));
+	shift_val = 32 - leading_zeroes;
+	work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
+	work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
+	work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
+	period_normalized = (u32)(*time_period >> shift_val);
+
+	/*
+	 * Now, we should report the usage in parts of 256
+	 * this means we must shift up the dividend or down the divisor by 8
+	 * (we could do a combination, but we just use one for simplicity,
+	 * but the end result should be good enough anyway)
+	 */
+	if (period_normalized > 0x00FFFFFF) {
+		/* The divisor is so big that it is safe to shift it down */
+		period_normalized >>= 8;
+	} else {
+		/*
+		 * The divisor is so small that we can shift up the dividend, without loosing any data.
+		 * (dividend is always smaller than the divisor)
+		 */
+		work_normalized_gpu <<= 8;
+		work_normalized_gp <<= 8;
+		work_normalized_pp <<= 8;
+	}
+
+	utilization_gpu = work_normalized_gpu / period_normalized;
+	utilization_gp = work_normalized_gp / period_normalized;
+	utilization_pp = work_normalized_pp / period_normalized;
+
+	last_utilization_gpu = utilization_gpu;
+	last_utilization_gp = utilization_gp;
+	last_utilization_pp = utilization_pp;
+
+	if ((last_utilization_gp > MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD) &&
+	    (last_utilization_pp < MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD)) {
+		mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND);
+	} else {
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+	}
+
+	/* starting a new period */
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	*start_time = time_now;
+
+	mali_util_data.utilization_gp = last_utilization_gp;
+	mali_util_data.utilization_gpu = last_utilization_gpu;
+	mali_util_data.utilization_pp = last_utilization_pp;
+
+	mali_utilization_data_unlock();
+
+	*need_add_timer = MALI_TRUE;
+
+	MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d\n", last_utilization_gpu));
+	MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d\n", last_utilization_gp));
+	MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d\n", last_utilization_pp));
+
+	return &mali_util_data;
+}
+
+_mali_osk_errcode_t mali_utilization_init(void)
+{
+#if USING_GPU_UTILIZATION
+	_mali_osk_device_data data;
+
+	if (_mali_osk_device_data_get(&data) == _MALI_OSK_ERR_OK) {
+		if (data.utilization_callback != NULL) {
+			mali_utilization_callback = data.utilization_callback;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed\n"));
+		}
+	}
+#endif /* defined(USING_GPU_UTILIZATION) */
+
+	if (mali_utilization_callback == NULL) {
+		MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n"));
+	}
+
+	utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
+	if (utilization_data_lock == NULL) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	num_running_gp_cores = 0;
+	num_running_pp_cores = 0;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_utilization_term(void)
+{
+	if (utilization_data_lock != NULL) {
+		_mali_osk_spinlock_irq_term(utilization_data_lock);
+	}
+}
+
+void mali_utilization_gp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_gp_cores;
+	if (num_running_gp_cores == 1) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First GP core started, consider GP busy from now and onwards */
+		work_start_time_gp = time_now;
+
+		if (num_running_pp_cores == 0) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			is_resume  = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+				/* Do some policy in new period for performance consideration */
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (last_utilization_gpu == 0) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_pp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_pp_cores;
+	if (num_running_pp_cores == 1) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First PP core started, consider PP busy from now and onwards */
+		work_start_time_pp = time_now;
+
+		if (num_running_gp_cores == 0) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			/* Start a new period if stoped */
+			is_resume = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (last_utilization_gpu == 0) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_gp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_gp_cores;
+	if (num_running_gp_cores == 0) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last GP core ended, consider GP idle from now and onwards */
+		accumulated_work_time_gp += (time_now - work_start_time_gp);
+		work_start_time_gp = 0;
+
+		if (num_running_pp_cores == 0) {
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+void mali_utilization_pp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_pp_cores;
+	if (num_running_pp_cores == 0) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last PP core ended, consider PP idle from now and onwards */
+		accumulated_work_time_pp += (time_now - work_start_time_pp);
+		work_start_time_pp = 0;
+
+		if (num_running_gp_cores == 0) {
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+mali_bool mali_utilization_enabled(void)
+{
+#if defined(CONFIG_MALI_DVFS)
+	return mali_dvfs_policy_enabled();
+#else
+	return (mali_utilization_callback != NULL);
+#endif /* defined(CONFIG_MALI_DVFS) */
+}
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data)
+{
+	MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback);
+
+	mali_utilization_callback(util_data);
+}
+
+void mali_utilization_reset(void)
+{
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	last_utilization_gpu = 0;
+	last_utilization_gp = 0;
+	last_utilization_pp = 0;
+}
+
+void mali_utilization_data_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(utilization_data_lock);
+}
+
+void mali_utilization_data_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(utilization_data_lock);
+}
+
+void mali_utilization_data_assert_locked(void)
+{
+	MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock);
+}
+
+u32 _mali_ukk_utilization_gp_pp(void)
+{
+	return last_utilization_gpu;
+}
+
+u32 _mali_ukk_utilization_gp(void)
+{
+	return last_utilization_gp;
+}
+
+u32 _mali_ukk_utilization_pp(void)
+{
+	return last_utilization_pp;
+}
diff --git a/utgard/r7p0/common/mali_kernel_utilization.h b/utgard/r7p0/common/mali_kernel_utilization.h
new file mode 100644
index 0000000..0581a3b
--- /dev/null
+++ b/utgard/r7p0/common/mali_kernel_utilization.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_UTILIZATION_H__
+#define __MALI_KERNEL_UTILIZATION_H__
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_osk.h"
+
+/**
+ * Initialize/start the Mali GPU utilization metrics reporting.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_utilization_init(void);
+
+/**
+ * Terminate the Mali GPU utilization metrics reporting
+ */
+void mali_utilization_term(void);
+
+/**
+ * Check if Mali utilization is enabled
+ */
+mali_bool mali_utilization_enabled(void);
+
+/**
+ * Should be called when a job is about to execute a GP job
+ */
+void mali_utilization_gp_start(void);
+
+/**
+ * Should be called when a job has completed executing a GP job
+ */
+void mali_utilization_gp_end(void);
+
+/**
+ * Should be called when a job is about to execute a PP job
+ */
+void mali_utilization_pp_start(void);
+
+/**
+ * Should be called when a job has completed executing a PP job
+ */
+void mali_utilization_pp_end(void);
+
+/**
+ * Should be called to calcution the GPU utilization
+ */
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer);
+
+_mali_osk_spinlock_irq_t *mali_utilization_get_lock(void);
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data);
+
+void mali_utilization_data_lock(void);
+
+void mali_utilization_data_unlock(void);
+
+void mali_utilization_data_assert_locked(void);
+
+void mali_utilization_reset(void);
+
+
+#endif /* __MALI_KERNEL_UTILIZATION_H__ */
diff --git a/utgard/r7p0/common/mali_kernel_vsync.c b/utgard/r7p0/common/mali_kernel_vsync.c
new file mode 100644
index 0000000..8e44c4e
--- /dev/null
+++ b/utgard/r7p0/common/mali_kernel_vsync.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+#include "mali_osk_profiling.h"
+
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args)
+{
+	_mali_uk_vsync_event event = (_mali_uk_vsync_event)args->event;
+
+	MALI_IGNORE(event); /* event is not used for release code, and that is OK */
+
+	/*
+	 * Manually generate user space events in kernel space.
+	 * This saves user space from calling kernel space twice in this case.
+	 * We just need to remember to add pid and tid manually.
+	 */
+	if (event == _MALI_UK_VSYNC_EVENT_BEGIN_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+	if (event == _MALI_UK_VSYNC_EVENT_END_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("Received VSYNC event: %d\n", event));
+	MALI_SUCCESS;
+}
+
diff --git a/utgard/r7p0/common/mali_l2_cache.c b/utgard/r7p0/common/mali_l2_cache.c
new file mode 100644
index 0000000..3ba9b1a
--- /dev/null
+++ b/utgard/r7p0/common/mali_l2_cache.c
@@ -0,0 +1,543 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_scheduler.h"
+#include "mali_pm.h"
+#include "mali_pm_domain.h"
+
+/**
+ * Size of the Mali L2 cache registers in bytes
+ */
+#define MALI400_L2_CACHE_REGISTERS_SIZE 0x30
+
+/**
+ * Mali L2 cache register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_l2_cache_register {
+	MALI400_L2_CACHE_REGISTER_SIZE         = 0x0004,
+	MALI400_L2_CACHE_REGISTER_STATUS       = 0x0008,
+	/*unused                               = 0x000C */
+	MALI400_L2_CACHE_REGISTER_COMMAND      = 0x0010,
+	MALI400_L2_CACHE_REGISTER_CLEAR_PAGE   = 0x0014,
+	MALI400_L2_CACHE_REGISTER_MAX_READS    = 0x0018,
+	MALI400_L2_CACHE_REGISTER_ENABLE       = 0x001C,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0 = 0x0020,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0 = 0x0024,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1 = 0x0028,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1 = 0x002C,
+} mali_l2_cache_register;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_command {
+	MALI400_L2_CACHE_COMMAND_CLEAR_ALL = 0x01,
+} mali_l2_cache_command;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_enable {
+	MALI400_L2_CACHE_ENABLE_DEFAULT = 0x0, /* Default */
+	MALI400_L2_CACHE_ENABLE_ACCESS = 0x01,
+	MALI400_L2_CACHE_ENABLE_READ_ALLOCATE = 0x02,
+} mali_l2_cache_enable;
+
+/**
+ * Mali L2 cache status bits
+ */
+typedef enum mali_l2_cache_status {
+	MALI400_L2_CACHE_STATUS_COMMAND_BUSY = 0x01,
+	MALI400_L2_CACHE_STATUS_DATA_BUSY    = 0x02,
+} mali_l2_cache_status;
+
+#define MALI400_L2_MAX_READS_NOT_SET -1
+
+static struct mali_l2_cache_core *
+	mali_global_l2s[MALI_MAX_NUMBER_OF_L2_CACHE_CORES] = { NULL, };
+static u32 mali_global_num_l2s;
+
+int mali_l2_max_reads = MALI400_L2_MAX_READS_NOT_SET;
+
+
+/* Local helper functions */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache);
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val);
+
+static void mali_l2_cache_lock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_lock(cache->lock);
+}
+
+static void mali_l2_cache_unlock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_unlock(cache->lock);
+}
+
+/* Implementation of the L2 cache interface */
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *cache = NULL;
+#if defined(DEBUG)
+	u32 cache_size;
+#endif
+
+	MALI_DEBUG_PRINT(4, ("Mali L2 cache: Creating Mali L2 cache: %s\n",
+			     resource->description));
+
+	if (mali_global_num_l2s >= MALI_MAX_NUMBER_OF_L2_CACHE_CORES) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Too many L2 caches\n"));
+		return NULL;
+	}
+
+	cache = _mali_osk_malloc(sizeof(struct mali_l2_cache_core));
+	if (cache == NULL) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to allocate memory for L2 cache core\n"));
+		return NULL;
+	}
+
+	cache->core_id =  mali_global_num_l2s;
+	cache->counter_src0 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_src1 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_value0_base = 0;
+	cache->counter_value1_base = 0;
+	cache->pm_domain = NULL;
+	cache->power_is_on = MALI_FALSE;
+	cache->last_invalidated_id = 0;
+
+	if (_MALI_OSK_ERR_OK != mali_hw_core_create(&cache->hw_core,
+			resource, MALI400_L2_CACHE_REGISTERS_SIZE)) {
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	cache_size = mali_hw_core_register_read(&cache->hw_core,
+						MALI400_L2_CACHE_REGISTER_SIZE);
+	MALI_DEBUG_PRINT(2, ("Mali L2 cache: Created %s: % 3uK, %u-way, % 2ubyte cache line, % 3ubit external bus\n",
+			     resource->description,
+			     1 << (((cache_size >> 16) & 0xff) - 10),
+			     1 << ((cache_size >> 8) & 0xff),
+			     1 << (cache_size & 0xff),
+			     1 << ((cache_size >> 24) & 0xff)));
+#endif
+
+	cache->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_L2);
+	if (cache->lock == NULL) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to create counter lock for L2 cache core %s\n",
+				  cache->hw_core.description));
+		mali_hw_core_delete(&cache->hw_core);
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+	/* register with correct power domain */
+	cache->pm_domain = mali_pm_register_l2_cache(
+				   domain_index, cache);
+
+	mali_global_l2s[mali_global_num_l2s] = cache;
+	mali_global_num_l2s++;
+
+	return cache;
+}
+
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache)
+{
+	u32 i;
+
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		if (mali_global_l2s[i] != cache) {
+			continue;
+		}
+
+		mali_global_l2s[i] = NULL;
+		mali_global_num_l2s--;
+
+		if (i == mali_global_num_l2s) {
+			/* Removed last element, nothing more to do */
+			break;
+		}
+
+		/*
+		 * We removed a l2 cache from the middle of the array,
+		 * so move the last l2 cache to current position
+		 */
+		mali_global_l2s[i] = mali_global_l2s[mali_global_num_l2s];
+		mali_global_l2s[mali_global_num_l2s] = NULL;
+
+		/* All good */
+		break;
+	}
+
+	_mali_osk_spinlock_irq_term(cache->lock);
+	mali_hw_core_delete(&cache->hw_core);
+	_mali_osk_free(cache);
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	mali_l2_cache_reset(cache);
+
+	if ((1 << MALI_DOMAIN_INDEX_DUMMY) != cache->pm_domain->pmu_mask)
+		MALI_DEBUG_ASSERT(cache->power_is_on == MALI_FALSE);
+	cache->power_is_on = MALI_TRUE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	MALI_DEBUG_ASSERT(cache->power_is_on == MALI_TRUE);
+
+	/*
+	 * The HW counters will start from zero again when we resume,
+	 * but we should report counters as always increasing.
+	 * Take a copy of the HW values now in order to add this to
+	 * the values we report after being powered up.
+	 *
+	 * The physical power off of the L2 cache might be outside our
+	 * own control (e.g. runtime PM). That is why we must manually
+	 * set set the counter value to zero as well.
+	 */
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value0_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0, 0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value1_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1, 0);
+	}
+
+
+	cache->power_is_on = MALI_FALSE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter)
+{
+	u32 reg_offset_src;
+	u32 reg_offset_val;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(source_id >= 0 && source_id <= 1);
+
+	mali_l2_cache_lock(cache);
+
+	if (source_id == 0) {
+		/* start counting from 0 */
+		cache->counter_value0_base = 0;
+		cache->counter_src0 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0;
+	} else {
+		/* start counting from 0 */
+		cache->counter_value1_base = 0;
+		cache->counter_src1 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1;
+	}
+
+	if (cache->power_is_on) {
+		u32 hw_src;
+
+		if (counter != MALI_HW_CORE_NO_COUNTER) {
+			hw_src = counter;
+		} else {
+			hw_src = 0; /* disable value for HW */
+		}
+
+		/* Set counter src */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_src, hw_src);
+
+		/* Make sure the HW starts counting from 0 again */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_val, 0);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(src0 != NULL);
+	MALI_DEBUG_ASSERT(value0 != NULL);
+	MALI_DEBUG_ASSERT(src1 != NULL);
+	MALI_DEBUG_ASSERT(value1 != NULL);
+
+	mali_l2_cache_lock(cache);
+
+	*src0 = cache->counter_src0;
+	*src1 = cache->counter_src1;
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		if (cache->power_is_on == MALI_TRUE) {
+			*value0 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		} else {
+			*value0 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value0 += cache->counter_value0_base;
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		if (cache->power_is_on == MALI_TRUE) {
+			*value1 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		} else {
+			*value1 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value1 += cache->counter_value1_base;
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index)
+{
+	if (mali_global_num_l2s > index) {
+		return mali_global_l2s[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void)
+{
+	return mali_global_num_l2s;
+}
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (cache == NULL) {
+		return;
+	}
+
+	mali_l2_cache_lock(cache);
+
+	cache->last_invalidated_id = mali_scheduler_get_new_cache_order();
+	mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+				   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (cache == NULL) {
+		return;
+	}
+
+	/*
+	 * If the last cache invalidation was done by a job with a higher id we
+	 * don't have to flush. Since user space will store jobs w/ their
+	 * corresponding memory in sequence (first job #0, then job #1, ...),
+	 * we don't have to flush for job n-1 if job n has already invalidated
+	 * the cache since we know for sure that job n-1's memory was already
+	 * written when job n was started.
+	 */
+
+	mali_l2_cache_lock(cache);
+
+	if (((s32)id) > ((s32)cache->last_invalidated_id)) {
+		/* Set latest invalidated id to current "point in time" */
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+		mali_l2_cache_send_command(cache,
+					   MALI400_L2_CACHE_REGISTER_COMMAND,
+					   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_all(void)
+{
+	u32 i;
+
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		_mali_osk_errcode_t ret;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (cache->power_is_on != MALI_TRUE) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+
+		ret = mali_l2_cache_send_command(cache,
+						 MALI400_L2_CACHE_REGISTER_COMMAND,
+						 MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_PRINT_ERROR(("Failed to invalidate cache\n"));
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages)
+{
+	u32 i;
+
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		u32 j;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (cache->power_is_on != MALI_TRUE) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		for (j = 0; j < num_pages; j++) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_l2_cache_send_command(cache,
+							 MALI400_L2_CACHE_REGISTER_CLEAR_PAGE,
+							 pages[j]);
+			if (ret != _MALI_OSK_ERR_OK) {
+				MALI_PRINT_ERROR(("Failed to invalidate cache (page)\n"));
+			}
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+/* -------- local helper functions below -------- */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache)
+{
+    MALI_DEBUG_ASSERT_POINTER(cache);
+    MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+    /* Kasin Added, skip off power domain. */
+    if (cache && cache->pm_domain && cache->pm_domain->power_is_on == MALI_FALSE) {
+	printk("===========%s, %d skip off power domain?\n", __func__, __LINE__);
+    }
+
+
+    /* Invalidate cache (just to keep it in a known state at startup) */
+    mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+	    MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+    /* Enable cache */
+    mali_hw_core_register_write(&cache->hw_core,
+	    MALI400_L2_CACHE_REGISTER_ENABLE,
+	    (u32)MALI400_L2_CACHE_ENABLE_ACCESS |
+	    (u32)MALI400_L2_CACHE_ENABLE_READ_ALLOCATE);
+
+	if (mali_l2_max_reads != MALI400_L2_MAX_READS_NOT_SET) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_MAX_READS,
+					    (u32)mali_l2_max_reads);
+	}
+
+	/* Restart any performance counters (if enabled) */
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0,
+					    cache->counter_src0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1,
+					    cache->counter_src1);
+	}
+}
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val)
+{
+	int i = 0;
+	const int loop_count = 100000;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+	/*
+	 * First, wait for L2 cache command handler to go idle.
+	 * (Commands received while processing another command will be ignored)
+	 */
+	for (i = 0; i < loop_count; i++) {
+		if (!(mali_hw_core_register_read(&cache->hw_core,
+						 MALI400_L2_CACHE_REGISTER_STATUS) &
+		      (u32)MALI400_L2_CACHE_STATUS_COMMAND_BUSY)) {
+			break;
+		}
+	}
+
+	if (i == loop_count) {
+		MALI_DEBUG_PRINT(1, ("Mali L2 cache: aborting wait for command interface to go idle\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* then issue the command */
+	mali_hw_core_register_write(&cache->hw_core, reg, val);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r7p0/common/mali_l2_cache.h b/utgard/r7p0/common/mali_l2_cache.h
new file mode 100644
index 0000000..42ea40f
--- /dev/null
+++ b/utgard/r7p0/common/mali_l2_cache.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_L2_CACHE_H__
+#define __MALI_KERNEL_L2_CACHE_H__
+
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+#define MALI_MAX_NUMBER_OF_L2_CACHE_CORES  3
+/* Maximum 1 GP and 4 PP for an L2 cache core (Mali-400 MP4) */
+#define MALI_MAX_NUMBER_OF_GROUPS_PER_L2_CACHE 5
+
+/**
+ * Definition of the L2 cache core struct
+ * Used to track a L2 cache unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_l2_cache_core {
+	/* Common HW core functionality */
+	struct mali_hw_core hw_core;
+
+	/* Synchronize L2 cache access */
+	_mali_osk_spinlock_irq_t *lock;
+
+	/* Unique core ID */
+	u32 core_id;
+
+	/* The power domain this L2 cache belongs to */
+	struct mali_pm_domain *pm_domain;
+
+	/* MALI_TRUE if power is on for this L2 cache */
+	mali_bool power_is_on;
+
+	/* A "timestamp" to avoid unnecessary flushes */
+	u32 last_invalidated_id;
+
+	/* Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src0;
+
+	/* Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src1;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value0_base;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value1_base;
+
+	/* Used by PM domains to link L2 caches of same domain */
+	_mali_osk_list_t pm_domain_list;
+};
+
+_mali_osk_errcode_t mali_l2_cache_initialize(void);
+void mali_l2_cache_terminate(void);
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index);
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_get_id(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->core_id;
+}
+
+MALI_STATIC_INLINE struct mali_pm_domain *mali_l2_cache_get_pm_domain(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->pm_domain;
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache);
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache);
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src0(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src1(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src1;
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1);
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index);
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void);
+
+struct mali_group *mali_l2_cache_get_group(
+	struct mali_l2_cache_core *cache, u32 index);
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache);
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id);
+
+void mali_l2_cache_invalidate_all(void);
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages);
+
+#endif /* __MALI_KERNEL_L2_CACHE_H__ */
diff --git a/utgard/r7p0/common/mali_mem_validation.c b/utgard/r7p0/common/mali_mem_validation.c
new file mode 100644
index 0000000..34c2090
--- /dev/null
+++ b/utgard/r7p0/common/mali_mem_validation.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_mem_validation.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#define MALI_INVALID_MEM_ADDR 0xFFFFFFFF
+
+typedef struct {
+	u32 phys_base;        /**< Mali physical base of the memory, page aligned */
+	u32 size;             /**< size in bytes of the memory, multiple of page size */
+} _mali_mem_validation_t;
+
+static _mali_mem_validation_t mali_mem_validator = { MALI_INVALID_MEM_ADDR, MALI_INVALID_MEM_ADDR };
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size)
+{
+	/* Check that no other MEM_VALIDATION resources exist */
+	if (mali_mem_validator.phys_base != MALI_INVALID_MEM_ADDR) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; another range is already specified\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Check restrictions on page alignment */
+	if ((0 != (start & (~_MALI_OSK_CPU_PAGE_MASK))) ||
+	    (0 != (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; incorrect alignment\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_mem_validator.phys_base = start;
+	mali_mem_validator.size = size;
+	MALI_DEBUG_PRINT(2, ("Memory Validator installed for Mali physical address base=0x%08X, size=0x%08X\n",
+			     mali_mem_validator.phys_base, mali_mem_validator.size));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size)
+{
+#if 0
+	if (phys_addr < (phys_addr + size)) { /* Don't allow overflow (or zero size) */
+		if ((0 == (phys_addr & (~_MALI_OSK_CPU_PAGE_MASK))) &&
+		    (0 == (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+			if ((phys_addr          >= mali_mem_validator.phys_base) &&
+			    ((phys_addr + (size - 1)) >= mali_mem_validator.phys_base) &&
+			    (phys_addr          <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1))) &&
+			    ((phys_addr + (size - 1)) <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1)))) {
+				MALI_DEBUG_PRINT(3, ("Accepted range 0x%08X + size 0x%08X (= 0x%08X)\n", phys_addr, size, (phys_addr + size - 1)));
+				return _MALI_OSK_ERR_OK;
+			}
+		}
+	}
+
+	MALI_PRINT_ERROR(("MALI PHYSICAL RANGE VALIDATION ERROR: The range supplied was: phys_base=0x%08X, size=0x%08X\n", phys_addr, size));
+
+	return _MALI_OSK_ERR_FAULT;
+#else
+	return _MALI_OSK_ERR_OK;
+#endif
+}
diff --git a/utgard/r7p0/common/mali_mem_validation.h b/utgard/r7p0/common/mali_mem_validation.h
new file mode 100644
index 0000000..73d600b
--- /dev/null
+++ b/utgard/r7p0/common/mali_mem_validation.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2011-2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEM_VALIDATION_H__
+#define __MALI_MEM_VALIDATION_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size);
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size);
+
+#endif /* __MALI_MEM_VALIDATION_H__ */
diff --git a/utgard/r7p0/common/mali_mmu.c b/utgard/r7p0/common/mali_mmu.c
new file mode 100644
index 0000000..9b34773
--- /dev/null
+++ b/utgard/r7p0/common/mali_mmu.c
@@ -0,0 +1,441 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_ukk.h"
+
+#include "mali_mmu.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_mmu_page_directory.h"
+
+/**
+ * Size of the MMU registers in bytes
+ */
+#define MALI_MMU_REGISTERS_SIZE 0x24
+
+/**
+ * MMU commands
+ * These are the commands that can be sent
+ * to the MMU unit.
+ */
+typedef enum mali_mmu_command {
+	MALI_MMU_COMMAND_ENABLE_PAGING = 0x00, /**< Enable paging (memory translation) */
+	MALI_MMU_COMMAND_DISABLE_PAGING = 0x01, /**< Disable paging (memory translation) */
+	MALI_MMU_COMMAND_ENABLE_STALL = 0x02, /**<  Enable stall on page fault */
+	MALI_MMU_COMMAND_DISABLE_STALL = 0x03, /**< Disable stall on page fault */
+	MALI_MMU_COMMAND_ZAP_CACHE = 0x04, /**< Zap the entire page table cache */
+	MALI_MMU_COMMAND_PAGE_FAULT_DONE = 0x05, /**< Page fault processed */
+	MALI_MMU_COMMAND_HARD_RESET = 0x06 /**< Reset the MMU back to power-on settings */
+} mali_mmu_command;
+
+static void mali_mmu_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data);
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu);
+
+/* page fault queue flush helper pages
+ * note that the mapping pointers are currently unused outside of the initialization functions */
+static mali_dma_addr mali_page_fault_flush_page_directory = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_directory_mapping;
+static mali_dma_addr mali_page_fault_flush_page_table = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_table_mapping;
+static mali_dma_addr mali_page_fault_flush_data_page = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_data_page_mapping;
+
+/* an empty page directory (no address valid) which is active on any MMU not currently marked as in use */
+static mali_dma_addr mali_empty_page_directory_phys   = MALI_INVALID_PAGE;
+static mali_io_address mali_empty_page_directory_virt;
+
+
+_mali_osk_errcode_t mali_mmu_initialize(void)
+{
+	/* allocate the helper pages */
+	mali_empty_page_directory_phys = mali_allocate_empty_page(&mali_empty_page_directory_virt);
+	if (mali_empty_page_directory_phys == 0) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate empty page directory.\n"));
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_create_fault_flush_pages(&mali_page_fault_flush_page_directory,
+			&mali_page_fault_flush_page_directory_mapping,
+			&mali_page_fault_flush_page_table,
+			&mali_page_fault_flush_page_table_mapping,
+			&mali_page_fault_flush_data_page,
+			&mali_page_fault_flush_data_page_mapping)) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate fault flush pages\n"));
+		mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		mali_empty_page_directory_virt = NULL;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mmu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali MMU: terminating\n"));
+
+	/* Free global helper pages */
+	mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+	mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+	mali_empty_page_directory_virt = NULL;
+
+	/* Free the page fault flush pages */
+	mali_destroy_fault_flush_pages(&mali_page_fault_flush_page_directory,
+				       &mali_page_fault_flush_page_directory_mapping,
+				       &mali_page_fault_flush_page_table,
+				       &mali_page_fault_flush_page_table_mapping,
+				       &mali_page_fault_flush_data_page,
+				       &mali_page_fault_flush_data_page_mapping);
+}
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual)
+{
+	struct mali_mmu_core *mmu = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+
+	MALI_DEBUG_PRINT(2, ("Mali MMU: Creating Mali MMU: %s\n", resource->description));
+
+	mmu = _mali_osk_calloc(1, sizeof(struct mali_mmu_core));
+	if (mmu != NULL) {
+		if (mali_hw_core_create(&mmu->hw_core, resource, MALI_MMU_REGISTERS_SIZE) == _MALI_OSK_ERR_OK) {
+			if (mali_group_add_mmu_core(group, mmu) == _MALI_OSK_ERR_OK) {
+				if (is_virtual) {
+					/* Skip reset and IRQ setup for virtual MMU */
+					return mmu;
+				}
+
+				if (mali_mmu_reset(mmu) == _MALI_OSK_ERR_OK) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					mmu->irq = _mali_osk_irq_init(resource->irq,
+								      mali_group_upper_half_mmu,
+								      group,
+								      mali_mmu_probe_trigger,
+								      mali_mmu_probe_ack,
+								      mmu,
+								      resource->description);
+					if (mmu->irq != NULL) {
+						return mmu;
+					} else {
+						MALI_PRINT_ERROR(("Mali MMU: Failed to setup interrupt handlers for MMU %s\n", mmu->hw_core.description));
+					}
+				}
+				mali_group_remove_mmu_core(group);
+			} else {
+				MALI_PRINT_ERROR(("Mali MMU: Failed to add core %s to group\n", mmu->hw_core.description));
+			}
+			mali_hw_core_delete(&mmu->hw_core);
+		}
+
+		_mali_osk_free(mmu);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for MMU\n"));
+	}
+
+	return NULL;
+}
+
+void mali_mmu_delete(struct mali_mmu_core *mmu)
+{
+	if (mmu->irq != NULL) {
+		_mali_osk_irq_term(mmu->irq);
+	}
+
+	mali_hw_core_delete(&mmu->hw_core);
+	_mali_osk_free(mmu);
+}
+
+static void mali_mmu_enable_paging(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_PAGING);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS) & MALI_MMU_STATUS_BIT_PAGING_ENABLED) {
+			break;
+		}
+	}
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Enable paging request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+	}
+}
+
+/**
+ * Issues the enable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ * @return MALI_TRUE if HW stall was successfully engaged, otherwise MALI_FALSE (req timed out)
+ */
+static mali_bool mali_mmu_enable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(4, ("MMU stall is implicit when Paging is not enabled.\n"));
+		return MALI_TRUE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(3, ("Aborting MMU stall request since it is in pagefault state.\n"));
+		return MALI_FALSE;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if ((mmu_status & MALI_MMU_STATUS_BIT_STALL_ACTIVE) && (0 == (mmu_status & MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE))) {
+			break;
+		}
+		if (0 == (mmu_status & (MALI_MMU_STATUS_BIT_PAGING_ENABLED))) {
+			break;
+		}
+	}
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_DEBUG_PRINT(2, ("Enable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return MALI_FALSE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU stall request since it has a pagefault.\n"));
+		return MALI_FALSE;
+	}
+
+	return MALI_TRUE;
+}
+
+/**
+ * Issues the disable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ */
+static void mali_mmu_disable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(3, ("MMU disable skipped since it was not enabled.\n"));
+		return;
+	}
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU disable stall request since it is in pagefault state.\n"));
+		return;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_DISABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		u32 status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+		if (0 == (status & MALI_MMU_STATUS_BIT_STALL_ACTIVE)) {
+			break;
+		}
+		if (status &  MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+			break;
+		}
+	}
+	if (i == MALI_REG_POLL_COUNT_FAST) MALI_DEBUG_PRINT(1, ("Disable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(4, ("Mali MMU: %s: Leaving page fault mode\n", mmu->hw_core.description));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_PAGE_FAULT_DONE);
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, 0xCAFEBABE);
+	MALI_DEBUG_ASSERT(mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR) == 0xCAFEB000);
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_HARD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR) == 0) {
+			break;
+		}
+	}
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Reset request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	mali_bool stall_success;
+
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	if (!stall_success) {
+		err = _MALI_OSK_ERR_BUSY;
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali MMU: mali_kernel_mmu_reset: %s\n", mmu->hw_core.description));
+
+	if (mali_mmu_raw_reset(mmu) == _MALI_OSK_ERR_OK) {
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+		/* no session is active, so just activate the empty page directory */
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, mali_empty_page_directory_phys);
+		mali_mmu_enable_paging(mmu);
+		err = _MALI_OSK_ERR_OK;
+	}
+	mali_mmu_disable_stall(mmu);
+
+	return err;
+}
+
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success = mali_mmu_enable_stall(mmu);
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+	if (stall_success == MALI_FALSE) {
+		/* False means that it is in Pagefault state. Not possible to disable_stall then */
+		return MALI_FALSE;
+	}
+
+	mali_mmu_disable_stall(mmu);
+	return MALI_TRUE;
+}
+
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+}
+
+
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_ZAP_ONE_LINE, MALI_MMU_PDE_ENTRY(mali_address));
+}
+
+static void mali_mmu_activate_address_space(struct mali_mmu_core *mmu, u32 page_directory)
+{
+	/* The MMU must be in stalled or page fault mode, for this writing to work */
+	MALI_DEBUG_ASSERT(0 != (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)
+				& (MALI_MMU_STATUS_BIT_STALL_ACTIVE | MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE)));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, page_directory);
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+}
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir)
+{
+	mali_bool stall_success;
+
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(5, ("Asked to activate page directory 0x%x on MMU %s\n", pagedir, mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+	mali_mmu_activate_address_space(mmu, pagedir->page_directory);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+	MALI_DEBUG_PRINT(3, ("Activating the empty page directory on MMU %s\n", mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+
+	/* This function can only be called when the core is idle, so it could not fail. */
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+
+	mali_mmu_activate_address_space(mmu, mali_empty_page_directory_phys);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(3, ("Activating the page fault flush page directory on MMU %s\n", mmu->hw_core.description));
+	stall_success = mali_mmu_enable_stall(mmu);
+	/* This function is expect to fail the stalling, since it might be in PageFault mode when it is called */
+	mali_mmu_activate_address_space(mmu, mali_page_fault_flush_page_directory);
+	if (stall_success == MALI_TRUE) mali_mmu_disable_stall(mmu);
+}
+
+/* Is called when we want the mmu to give an interrupt */
+static void mali_mmu_probe_trigger(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+}
+
+/* Is called when the irq probe wants the mmu to acknowledge an interrupt from the hw */
+extern int mali_page_fault;
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	u32 int_stat;
+
+	int_stat = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+
+	MALI_DEBUG_PRINT(2, ("mali_mmu_probe_irq_acknowledge: intstat 0x%x\n", int_stat));
+	if (int_stat & MALI_MMU_INTERRUPT_PAGE_FAULT) {
+		MALI_DEBUG_PRINT(2, ("Probe: Page fault detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_PAGE_FAULT);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Page fault detect: FAILED\n"));
+		mali_page_fault++;
+	}
+
+	if (int_stat & MALI_MMU_INTERRUPT_READ_BUS_ERROR) {
+		MALI_DEBUG_PRINT(2, ("Probe: Bus read error detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Bus read error detect: FAILED\n"));
+		mali_page_fault++;
+	}
+
+	if ((int_stat & (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) ==
+	    (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+#if 0
+void mali_mmu_print_state(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(2, ("MMU: State of %s is 0x%08x\n", mmu->hw_core.description, mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+#endif
diff --git a/utgard/r7p0/common/mali_mmu.h b/utgard/r7p0/common/mali_mmu.h
new file mode 100644
index 0000000..7c2d9a2
--- /dev/null
+++ b/utgard/r7p0/common/mali_mmu.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_H__
+#define __MALI_MMU_H__
+
+#include "mali_osk.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_hw_core.h"
+
+/* Forward declaration from mali_group.h */
+struct mali_group;
+
+/**
+ * MMU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_mmu_register {
+	MALI_MMU_REGISTER_DTE_ADDR = 0x0000, /**< Current Page Directory Pointer */
+	MALI_MMU_REGISTER_STATUS = 0x0004, /**< Status of the MMU */
+	MALI_MMU_REGISTER_COMMAND = 0x0008, /**< Command register, used to control the MMU */
+	MALI_MMU_REGISTER_PAGE_FAULT_ADDR = 0x000C, /**< Logical address of the last page fault */
+	MALI_MMU_REGISTER_ZAP_ONE_LINE = 0x010, /**< Used to invalidate the mapping of a single page from the MMU */
+	MALI_MMU_REGISTER_INT_RAWSTAT = 0x0014, /**< Raw interrupt status, all interrupts visible */
+	MALI_MMU_REGISTER_INT_CLEAR = 0x0018, /**< Indicate to the MMU that the interrupt has been received */
+	MALI_MMU_REGISTER_INT_MASK = 0x001C, /**< Enable/disable types of interrupts */
+	MALI_MMU_REGISTER_INT_STATUS = 0x0020 /**< Interrupt status based on the mask */
+} mali_mmu_register;
+
+/**
+ * MMU interrupt register bits
+ * Each cause of the interrupt is reported
+ * through the (raw) interrupt status registers.
+ * Multiple interrupts can be pending, so multiple bits
+ * can be set at once.
+ */
+typedef enum mali_mmu_interrupt {
+	MALI_MMU_INTERRUPT_PAGE_FAULT = 0x01, /**< A page fault occured */
+	MALI_MMU_INTERRUPT_READ_BUS_ERROR = 0x02 /**< A bus read error occured */
+} mali_mmu_interrupt;
+
+typedef enum mali_mmu_status_bits {
+	MALI_MMU_STATUS_BIT_PAGING_ENABLED      = 1 << 0,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE   = 1 << 1,
+	MALI_MMU_STATUS_BIT_STALL_ACTIVE        = 1 << 2,
+	MALI_MMU_STATUS_BIT_IDLE                = 1 << 3,
+	MALI_MMU_STATUS_BIT_REPLAY_BUFFER_EMPTY = 1 << 4,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_IS_WRITE = 1 << 5,
+	MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE    = 1 << 31,
+} mali_mmu_status_bits;
+
+/**
+ * Definition of the MMU struct
+ * Used to track a MMU unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_mmu_core {
+	struct mali_hw_core hw_core; /**< Common for all HW cores */
+	_mali_osk_irq_t *irq;        /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_mmu_initialize(void);
+
+void mali_mmu_terminate(void);
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual);
+void mali_mmu_delete(struct mali_mmu_core *mmu);
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu);
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu);
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu);
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address);
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir);
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu);
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu);
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_mmu_get_interrupt_result(struct mali_mmu_core *mmu)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+
+	if (rawstat_used == 0) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+
+MALI_STATIC_INLINE u32 mali_mmu_get_int_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_rawstat(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE void mali_mmu_mask_all_interrupts(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, 0);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_page_fault_addr(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_PAGE_FAULT_ADDR);
+}
+
+#endif /* __MALI_MMU_H__ */
diff --git a/utgard/r7p0/common/mali_mmu_page_directory.c b/utgard/r7p0/common/mali_mmu_page_directory.c
new file mode 100644
index 0000000..774c233
--- /dev/null
+++ b/utgard/r7p0/common/mali_mmu_page_directory.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_memory.h"
+#include "mali_l2_cache.h"
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data);
+
+u32 mali_allocate_empty_page(mali_io_address *virt_addr)
+{
+	_mali_osk_errcode_t err;
+	mali_io_address mapping;
+	mali_dma_addr address;
+
+	if (mali_mmu_get_table_page(&address, &mapping) != _MALI_OSK_ERR_OK) {
+		/* Allocation failed */
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to get table page for empty pgdir\n"));
+		return 0;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	err = fill_page(mapping, 0);
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_mmu_release_table_page(address, mapping);
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to zero page\n"));
+		return 0;
+	}
+
+	*virt_addr = mapping;
+	return address;
+}
+
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr)
+{
+	if (address != MALI_INVALID_PAGE) {
+		mali_mmu_release_table_page(address, virt_addr);
+	}
+}
+
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	_mali_osk_errcode_t err;
+
+	err = mali_mmu_get_table_page(data_page, data_page_mapping);
+	if (err == _MALI_OSK_ERR_OK) {
+		err = mali_mmu_get_table_page(page_table, page_table_mapping);
+		if (err == _MALI_OSK_ERR_OK) {
+			err = mali_mmu_get_table_page(page_directory, page_directory_mapping);
+			if (err == _MALI_OSK_ERR_OK) {
+				fill_page(*data_page_mapping, 0);
+				fill_page(*page_table_mapping, *data_page | MALI_MMU_FLAGS_DEFAULT);
+				fill_page(*page_directory_mapping, *page_table | MALI_MMU_FLAGS_PRESENT);
+				MALI_SUCCESS;
+			}
+			mali_mmu_release_table_page(*page_table, *page_table_mapping);
+			*page_table = MALI_INVALID_PAGE;
+		}
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+	}
+	return err;
+}
+
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	if (*page_directory != MALI_INVALID_PAGE) {
+		mali_mmu_release_table_page(*page_directory, *page_directory_mapping);
+		*page_directory = MALI_INVALID_PAGE;
+		*page_directory_mapping = NULL;
+	}
+
+	if (*page_table != MALI_INVALID_PAGE) {
+		mali_mmu_release_table_page(*page_table, *page_table_mapping);
+		*page_table = MALI_INVALID_PAGE;
+		*page_table_mapping = NULL;
+	}
+
+	if (*data_page != MALI_INVALID_PAGE) {
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+		*data_page_mapping = NULL;
+	}
+}
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	for (i = 0; i < MALI_MMU_PAGE_SIZE / 4; i++) {
+		_mali_osk_mem_iowrite32_relaxed(mapping, i * sizeof(u32), data);
+	}
+	_mali_osk_mem_barrier();
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	_mali_osk_errcode_t err;
+	mali_io_address pde_mapping;
+	mali_dma_addr pde_phys;
+	int i, page_count;
+	u32 start_address;
+
+	if (last_pde < first_pde)
+		return _MALI_OSK_ERR_INVALID_ARGS;
+
+	for (i = first_pde; i <= last_pde; i++) {
+		if (0 == (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+						 i * sizeof(u32)) & MALI_MMU_FLAGS_PRESENT)) {
+			/* Page table not present */
+			MALI_DEBUG_ASSERT(pagedir->page_entries_usage_count[i] == 0);
+			MALI_DEBUG_ASSERT(pagedir->page_entries_mapped[i] == NULL);
+
+			err = mali_mmu_get_table_page(&pde_phys, &pde_mapping);
+			if (err != _MALI_OSK_ERR_OK) {
+				MALI_PRINT_ERROR(("Failed to allocate page table page.\n"));
+				return err;
+			}
+			pagedir->page_entries_mapped[i] = pde_mapping;
+
+			/* Update PDE, mark as present */
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32),
+							pde_phys | MALI_MMU_FLAGS_PRESENT);
+
+			MALI_DEBUG_ASSERT(pagedir->page_entries_usage_count[i] == 0);
+		}
+
+		if (first_pde == last_pde) {
+			pagedir->page_entries_usage_count[i] += size / MALI_MMU_PAGE_SIZE;
+		} else if (i == first_pde) {
+			start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE;
+			page_count = (start_address + MALI_MMU_VIRTUAL_PAGE_SIZE - mali_address) / MALI_MMU_PAGE_SIZE;
+			pagedir->page_entries_usage_count[i] += page_count;
+		} else if (i == last_pde) {
+			start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE;
+			page_count = (mali_address + size - start_address) / MALI_MMU_PAGE_SIZE;
+			pagedir->page_entries_usage_count[i] += page_count;
+		} else {
+			pagedir->page_entries_usage_count[i] = 1024;
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE void mali_mmu_zero_pte(mali_io_address page_table, u32 mali_address, u32 size)
+{
+	int i;
+	const int first_pte = MALI_MMU_PTE_ENTRY(mali_address);
+	const int last_pte = MALI_MMU_PTE_ENTRY(mali_address + size - 1);
+
+	for (i = first_pte; i <= last_pte; i++) {
+		_mali_osk_mem_iowrite32_relaxed(page_table, i * sizeof(u32), 0);
+	}
+}
+
+static u32 mali_page_directory_get_phys_address(struct mali_page_directory *pagedir, u32 index)
+{
+	return (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				       index * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK);
+}
+
+
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	u32 left = size;
+	int i;
+	mali_bool pd_changed = MALI_FALSE;
+	u32 pages_to_invalidate[3]; /* hard-coded to 3: max two pages from the PT level plus max one page from PD level */
+	u32 num_pages_inv = 0;
+	mali_bool invalidate_all = MALI_FALSE; /* safety mechanism in case page_entries_usage_count is unreliable */
+
+	/* For all page directory entries in range. */
+	for (i = first_pde; i <= last_pde; i++) {
+		u32 size_in_pde, offset;
+
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[i]);
+		MALI_DEBUG_ASSERT(pagedir->page_entries_usage_count[i] != 0);
+
+		/* Offset into page table, 0 if mali_address is 4MiB aligned */
+		offset = (mali_address & (MALI_MMU_VIRTUAL_PAGE_SIZE - 1));
+		if (left < MALI_MMU_VIRTUAL_PAGE_SIZE - offset) {
+			size_in_pde = left;
+		} else {
+			size_in_pde = MALI_MMU_VIRTUAL_PAGE_SIZE - offset;
+		}
+
+		pagedir->page_entries_usage_count[i] -= size_in_pde / MALI_MMU_PAGE_SIZE;
+
+		/* If entire page table is unused, free it */
+		if (pagedir->page_entries_usage_count[i] == 0) {
+			u32 page_phys;
+			void *page_virt;
+
+			MALI_DEBUG_PRINT(4, ("Releasing page table as this is the last reference\n"));
+			/* last reference removed, no need to zero out each PTE  */
+
+			page_phys = MALI_MMU_ENTRY_ADDRESS(_mali_osk_mem_ioread32(pagedir->page_directory_mapped, i * sizeof(u32)));
+			page_virt = pagedir->page_entries_mapped[i];
+			pagedir->page_entries_mapped[i] = NULL;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+
+			mali_mmu_release_table_page(page_phys, page_virt);
+			pd_changed = MALI_TRUE;
+		} else {
+			MALI_DEBUG_ASSERT(num_pages_inv < 2);
+			if (num_pages_inv < 2) {
+				pages_to_invalidate[num_pages_inv] = mali_page_directory_get_phys_address(pagedir, i);
+				num_pages_inv++;
+			} else {
+				invalidate_all = MALI_TRUE;
+			}
+
+			/* If part of the page table is still in use, zero the relevant PTEs */
+			mali_mmu_zero_pte(pagedir->page_entries_mapped[i], mali_address, size_in_pde);
+		}
+
+		left -= size_in_pde;
+		mali_address += size_in_pde;
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* L2 pages invalidation */
+	if (pd_changed == MALI_TRUE) {
+		MALI_DEBUG_ASSERT(num_pages_inv < 3);
+		if (num_pages_inv < 3) {
+			pages_to_invalidate[num_pages_inv] = pagedir->page_directory;
+			num_pages_inv++;
+		} else {
+			invalidate_all = MALI_TRUE;
+		}
+	}
+
+	if (invalidate_all) {
+		mali_l2_cache_invalidate_all();
+	} else {
+		mali_l2_cache_invalidate_all_pages(pages_to_invalidate, num_pages_inv);
+	}
+
+	MALI_SUCCESS;
+}
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void)
+{
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	mali_dma_addr phys;
+
+	pagedir = _mali_osk_calloc(1, sizeof(struct mali_page_directory));
+	if (pagedir == NULL) {
+		return NULL;
+	}
+
+	err = mali_mmu_get_table_page(&phys, &pagedir->page_directory_mapped);
+	if (err != _MALI_OSK_ERR_OK) {
+		_mali_osk_free(pagedir);
+		return NULL;
+	}
+
+	pagedir->page_directory = (u32)phys;
+
+	/* Zero page directory */
+	fill_page(pagedir->page_directory_mapped, 0);
+
+	return pagedir;
+}
+
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir)
+{
+	const int num_page_table_entries = ARRAY_SIZE(pagedir->page_entries_mapped);
+	int i;
+
+	/* Free referenced page tables and zero PDEs. */
+	for (i = 0; i < num_page_table_entries; i++) {
+		if (pagedir->page_directory_mapped && (_mali_osk_mem_ioread32(
+				pagedir->page_directory_mapped,
+				sizeof(u32)*i) & MALI_MMU_FLAGS_PRESENT)) {
+			mali_dma_addr phys = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+					     i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+			mali_mmu_release_table_page(phys, pagedir->page_entries_mapped[i]);
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* Free the page directory page. */
+	mali_mmu_release_table_page(pagedir->page_directory, pagedir->page_directory_mapped);
+
+	_mali_osk_free(pagedir);
+}
+
+
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits)
+{
+	u32 end_address = mali_address + size;
+	u32 mali_phys = (u32)phys_address;
+
+	/* Map physical pages into MMU page tables */
+	for (; mali_address < end_address; mali_address += MALI_MMU_PAGE_SIZE, mali_phys += MALI_MMU_PAGE_SIZE) {
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)]);
+		_mali_osk_mem_iowrite32_relaxed(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)],
+						MALI_MMU_PTE_ENTRY(mali_address) * sizeof(u32),
+						mali_phys | permission_bits);
+	}
+}
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr)
+{
+#if defined(DEBUG)
+	u32 pde_index, pte_index;
+	u32 pde, pte;
+
+	pde_index = MALI_MMU_PDE_ENTRY(fault_addr);
+	pte_index = MALI_MMU_PTE_ENTRY(fault_addr);
+
+
+	pde = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				     pde_index * sizeof(u32));
+
+
+	if (pde & MALI_MMU_FLAGS_PRESENT) {
+		u32 pte_addr = MALI_MMU_ENTRY_ADDRESS(pde);
+
+		pte = _mali_osk_mem_ioread32(pagedir->page_entries_mapped[pde_index],
+					     pte_index * sizeof(u32));
+
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table present: %08x\n"
+				     "\t\tPTE: %08x, page %08x is %s\n",
+				     fault_addr, pte_addr, pte,
+				     MALI_MMU_ENTRY_ADDRESS(pte),
+				     pte & MALI_MMU_FLAGS_DEFAULT ? "rw" : "not present"));
+	} else {
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table not present: %08x\n",
+				     fault_addr, pde));
+	}
+#else
+	MALI_IGNORE(pagedir);
+	MALI_IGNORE(fault_addr);
+#endif
+}
+
+/* For instrumented */
+struct dump_info {
+	u32 buffer_left;
+	u32 register_writes_size;
+	u32 page_table_dump_size;
+	u32 *buffer;
+};
+
+static _mali_osk_errcode_t writereg(u32 where, u32 what, const char *comment, struct dump_info *info)
+{
+	if (info != NULL) {
+		info->register_writes_size += sizeof(u32) * 2; /* two 32-bit words */
+
+		if (info->buffer != NULL) {
+			/* check that we have enough space */
+			if (info->buffer_left < sizeof(u32) * 2) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = where;
+			info->buffer++;
+
+			*info->buffer = what;
+			info->buffer++;
+
+			info->buffer_left -= sizeof(u32) * 2;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t mali_mmu_dump_page(mali_io_address page, u32 phys_addr, struct dump_info *info)
+{
+	if (info != NULL) {
+		/* 4096 for the page and 4 bytes for the address */
+		const u32 page_size_in_elements = MALI_MMU_PAGE_SIZE / 4;
+		const u32 page_size_in_bytes = MALI_MMU_PAGE_SIZE;
+		const u32 dump_size_in_bytes = MALI_MMU_PAGE_SIZE + 4;
+
+		info->page_table_dump_size += dump_size_in_bytes;
+
+		if (info->buffer != NULL) {
+			if (info->buffer_left < dump_size_in_bytes) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = phys_addr;
+			info->buffer++;
+
+			_mali_osk_memcpy(info->buffer, page, page_size_in_bytes);
+			info->buffer += page_size_in_elements;
+
+			info->buffer_left -= dump_size_in_bytes;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_page_table(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_DEBUG_ASSERT_POINTER(pagedir);
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	if (pagedir->page_directory_mapped != NULL) {
+		int i;
+
+		MALI_CHECK_NO_ERROR(
+			mali_mmu_dump_page(pagedir->page_directory_mapped, pagedir->page_directory, info)
+		);
+
+		for (i = 0; i < 1024; i++) {
+			if (pagedir->page_entries_mapped[i] != NULL) {
+				MALI_CHECK_NO_ERROR(
+					mali_mmu_dump_page(pagedir->page_entries_mapped[i],
+							   _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+									   i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK, info)
+				);
+			}
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_registers(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_CHECK_NO_ERROR(writereg(0x00000000, pagedir->page_directory,
+				     "set the page directory address", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 4, "zap???", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 0, "enable paging", info));
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+	args->size = info.register_writes_size + info.page_table_dump_size;
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+
+	info.buffer_left = args->size;
+	info.buffer = (u32 *)(uintptr_t)args->buffer;
+
+	args->register_writes = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+
+	args->page_table_dump = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+
+	args->register_writes_size = info.register_writes_size;
+	args->page_table_dump_size = info.page_table_dump_size;
+
+	MALI_SUCCESS;
+}
diff --git a/utgard/r7p0/common/mali_mmu_page_directory.h b/utgard/r7p0/common/mali_mmu_page_directory.h
new file mode 100644
index 0000000..9c88087
--- /dev/null
+++ b/utgard/r7p0/common/mali_mmu_page_directory.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_PAGE_DIRECTORY_H__
+#define __MALI_MMU_PAGE_DIRECTORY_H__
+
+#include "mali_osk.h"
+
+/**
+ * Size of an MMU page in bytes
+ */
+#define MALI_MMU_PAGE_SIZE 0x1000
+
+/*
+ * Size of the address space referenced by a page table page
+ */
+#define MALI_MMU_VIRTUAL_PAGE_SIZE 0x400000 /* 4 MiB */
+
+/**
+ * Page directory index from address
+ * Calculates the page directory index from the given address
+ */
+#define MALI_MMU_PDE_ENTRY(address) (((address)>>22) & 0x03FF)
+
+/**
+ * Page table index from address
+ * Calculates the page table index from the given address
+ */
+#define MALI_MMU_PTE_ENTRY(address) (((address)>>12) & 0x03FF)
+
+/**
+ * Extract the memory address from an PDE/PTE entry
+ */
+#define MALI_MMU_ENTRY_ADDRESS(value) ((value) & 0xFFFFFC00)
+
+#define MALI_INVALID_PAGE ((u32)(~0))
+
+/**
+ *
+ */
+typedef enum mali_mmu_entry_flags {
+	MALI_MMU_FLAGS_PRESENT = 0x01,
+	MALI_MMU_FLAGS_READ_PERMISSION = 0x02,
+	MALI_MMU_FLAGS_WRITE_PERMISSION = 0x04,
+	MALI_MMU_FLAGS_OVERRIDE_CACHE  = 0x8,
+	MALI_MMU_FLAGS_WRITE_CACHEABLE  = 0x10,
+	MALI_MMU_FLAGS_WRITE_ALLOCATE  = 0x20,
+	MALI_MMU_FLAGS_WRITE_BUFFERABLE  = 0x40,
+	MALI_MMU_FLAGS_READ_CACHEABLE  = 0x80,
+	MALI_MMU_FLAGS_READ_ALLOCATE  = 0x100,
+	MALI_MMU_FLAGS_MASK = 0x1FF,
+} mali_mmu_entry_flags;
+
+
+#define MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE ( \
+		MALI_MMU_FLAGS_PRESENT | \
+		MALI_MMU_FLAGS_READ_PERMISSION |  \
+		MALI_MMU_FLAGS_WRITE_PERMISSION | \
+		MALI_MMU_FLAGS_OVERRIDE_CACHE | \
+		MALI_MMU_FLAGS_WRITE_CACHEABLE | \
+		MALI_MMU_FLAGS_WRITE_BUFFERABLE | \
+		MALI_MMU_FLAGS_READ_CACHEABLE | \
+		MALI_MMU_FLAGS_READ_ALLOCATE)
+
+#define MALI_MMU_FLAGS_DEFAULT ( \
+				 MALI_MMU_FLAGS_PRESENT | \
+				 MALI_MMU_FLAGS_READ_PERMISSION |  \
+				 MALI_MMU_FLAGS_WRITE_PERMISSION)
+
+
+struct mali_page_directory {
+	u32 page_directory; /**< Physical address of the memory session's page directory */
+	mali_io_address page_directory_mapped; /**< Pointer to the mapped version of the page directory into the kernel's address space */
+
+	mali_io_address page_entries_mapped[1024]; /**< Pointers to the page tables which exists in the page directory mapped into the kernel's address space */
+	u32   page_entries_usage_count[1024]; /**< Tracks usage count of the page table pages, so they can be releases on the last reference */
+};
+
+/* Map Mali virtual address space (i.e. ensure page tables exist for the virtual range)  */
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+
+/* Back virtual address space with actual pages. Assumes input is contiguous and 4k aligned. */
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits);
+
+u32 mali_allocate_empty_page(mali_io_address *virtual);
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr);
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void);
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir);
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr);
+
+#endif /* __MALI_MMU_PAGE_DIRECTORY_H__ */
diff --git a/utgard/r7p0/common/mali_osk.h b/utgard/r7p0/common/mali_osk.h
new file mode 100644
index 0000000..d096efe
--- /dev/null
+++ b/utgard/r7p0/common/mali_osk.h
@@ -0,0 +1,1389 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk.h
+ * Defines the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_H__
+#define __MALI_OSK_H__
+
+#include <linux/seq_file.h>
+#include "mali_osk_types.h"
+#include "mali_osk_specific.h"           /* include any per-os specifics */
+#include "mali_osk_locks.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @addtogroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+#ifdef DEBUG
+/** @brief Macro for asserting that the current thread holds a given lock
+ */
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) MALI_DEBUG_ASSERT(_mali_osk_lock_get_owner((_mali_osk_lock_debug_t *)l) == _mali_osk_get_tid());
+
+/** @brief returns a lock's owner (thread id) if debugging is enabled
+ */
+#else
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) do {} while (0)
+#endif
+
+#define _mali_osk_ctxprintf     seq_printf
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Find the containing structure of another structure
+ *
+ * This is the reverse of the operation 'offsetof'. This means that the
+ * following condition is satisfied:
+ *
+ *   ptr == _MALI_OSK_CONTAINER_OF( &ptr->member, type, member )
+ *
+ * When ptr is of type 'type'.
+ *
+ * Its purpose it to recover a larger structure that has wrapped a smaller one.
+ *
+ * @note no type or memory checking occurs to ensure that a wrapper structure
+ * does in fact exist, and that it is being recovered with respect to the
+ * correct member.
+ *
+ * @param ptr the pointer to the member that is contained within the larger
+ * structure
+ * @param type the type of the structure that contains the member
+ * @param member the name of the member in the structure that ptr points to.
+ * @return a pointer to a \a type object which contains \a member, as pointed
+ * to by \a ptr.
+ */
+#define _MALI_OSK_CONTAINER_OF(ptr, type, member) \
+	((type *)(((char *)ptr) - offsetof(type, member)))
+
+/** @addtogroup _mali_osk_wq
+ * @{ */
+
+/** @brief Initialize work queues (for deferred work)
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_wq_init(void);
+
+/** @brief Terminate work queues (for deferred work)
+ */
+void _mali_osk_wq_term(void);
+
+/** @brief Create work in the work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, \a handler will be called with \a data as the argument.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delete_work()
+ * when no longer needed.
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief A high priority version of \a _mali_osk_wq_create_work()
+ *
+ * Creates a work object which can be scheduled in the high priority work queue.
+ *
+ * This is unfortunately needed to get low latency scheduling of the Mali cores.  Normally we would
+ * schedule the next job in hw_irq or tasklet, but often we can't since we need to synchronously map
+ * and unmap shared memory when a job is connected to external fences (timelines). And this requires
+ * taking a mutex.
+ *
+ * We do signal a lot of other (low priority) work also as part of the job being finished, and if we
+ * don't set this Mali scheduling thread as high priority, we see that the CPU scheduler often runs
+ * random things instead of starting the next GPU job when the GPU is idle.  So setting the gpu
+ * scheduler to high priority does give a visually more responsive system.
+ *
+ * Start the high priority work with: \a _mali_osk_wq_schedule_work_high_pri()
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will flush the work queue to ensure that the work handler will not
+ * be called after deletion.
+ */
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the work handler
+ *
+ * _mali_osk_wq_schedule_work provides a mechanism for enqueuing deferred calls
+ * to the work handler. After calling \ref _mali_osk_wq_schedule_work(), the
+ * work handler will be scheduled to run at some point in the future.
+ *
+ * Typically this is called by the IRQ upper-half to defer further processing of
+ * IRQ-related work to the IRQ bottom-half handler. This is necessary for work
+ * that cannot be done in an IRQ context by the IRQ upper-half handler. Timer
+ * callbacks also use this mechanism, because they are treated as though they
+ * operate in an IRQ context. Refer to \ref _mali_osk_timer_t for more
+ * information.
+ *
+ * Code that operates in a kernel-process context (with no IRQ context
+ * restrictions) may also enqueue deferred calls to the IRQ bottom-half. The
+ * advantage over direct calling is that deferred calling allows the caller and
+ * IRQ bottom half to hold the same mutex, with a guarantee that they will not
+ * deadlock just by using this mechanism.
+ *
+ * _mali_osk_wq_schedule_work() places deferred call requests on a queue, to
+ * allow for more than one thread to make a deferred call. Therfore, if it is
+ * called 'K' times, then the IRQ bottom-half will be scheduled 'K' times too.
+ * 'K' is a number that is implementation-specific.
+ *
+ * _mali_osk_wq_schedule_work() is guaranteed to not block on:
+ * - enqueuing a deferred call request.
+ * - the completion of the work handler.
+ *
+ * This is to prevent deadlock. For example, if _mali_osk_wq_schedule_work()
+ * blocked, then it would cause a deadlock when the following two conditions
+ * hold:
+ * - The work handler callback (of type _mali_osk_wq_work_handler_t) locks
+ * a mutex
+ * - And, at the same time, the caller of _mali_osk_wq_schedule_work() also
+ * holds the same mutex
+ *
+ * @note care must be taken to not overflow the queue that
+ * _mali_osk_wq_schedule_work() operates on. Code must be structured to
+ * ensure that the number of requests made to the queue is bounded. Otherwise,
+ * work will be lost.
+ *
+ * The queue that _mali_osk_wq_schedule_work implements is a FIFO of N-writer,
+ * 1-reader type. The writers are the callers of _mali_osk_wq_schedule_work
+ * (all OSK-registered IRQ upper-half handlers in the system, watchdog timers,
+ * callers from a Kernel-process context). The reader is a single thread that
+ * handles all OSK-registered work.
+ *
+ * @param work a pointer to the _mali_osk_wq_work_t object corresponding to the
+ * work to begin processing.
+ */
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the high priority work handler
+ *
+ * Function is the same as \a _mali_osk_wq_schedule_work() with the only
+ * difference that it runs in a high (real time) priority on the system.
+ *
+ * Should only be used as a substitue for doing the same work in interrupts.
+ *
+ * This is allowed to sleep, but the work should be small since it will block
+ * all other applications.
+*/
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work);
+
+/** @brief Flush the work queue
+ *
+ * This will flush the OSK work queue, ensuring all work in the queue has
+ * completed before returning.
+ *
+ * Since this blocks on the completion of work in the work-queue, the
+ * caller of this function \b must \b not hold any mutexes that are taken by
+ * any registered work handler. To do so may cause a deadlock.
+ *
+ */
+void _mali_osk_wq_flush(void);
+
+/** @brief Create work in the delayed work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, a timer will be start and the \a handler will be called with
+ * \a data as the argument when timer out
+ *
+ * Refer to \ref _mali_osk_wq_delayed_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delayed_delete_work_nonflush()
+ * when no longer needed.
+ */
+_mali_osk_wq_delayed_work_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work without waiting for it to finish
+ *
+ * Note that the \a work callback function may still be running on return from
+ * _mali_osk_wq_delayed_cancel_work_async().
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work and wait for it to finish
+ *
+ * When this function returns, the \a work was either cancelled or it finished running.
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Put \a work task in global workqueue after delay
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue.
+ *
+ * If \a work was already on a queue, this function will return without doing anything
+ *
+ * @param work job to be done
+ * @param delay number of jiffies to wait or 0 for immediate execution
+ */
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay);
+
+/** @} */ /* end group _mali_osk_wq */
+
+
+/** @addtogroup _mali_osk_irq
+ * @{ */
+
+/** @brief Initialize IRQ handling for a resource
+ *
+ * Registers an interrupt handler \a uhandler for the given IRQ number \a irqnum.
+ * \a data will be passed as argument to the handler when an interrupt occurs.
+ *
+ * If \a irqnum is -1, _mali_osk_irq_init will probe for the IRQ number using
+ * the supplied \a trigger_func and \a ack_func. These functions will also
+ * receive \a data as their argument.
+ *
+ * @param irqnum The IRQ number that the resource uses, as seen by the CPU.
+ * The value -1 has a special meaning which indicates the use of probing, and
+ * trigger_func and ack_func must be non-NULL.
+ * @param uhandler The interrupt handler, corresponding to a ISR handler for
+ * the resource
+ * @param int_data resource specific data, which will be passed to uhandler
+ * @param trigger_func Optional: a function to trigger the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param ack_func Optional: a function to acknowledge the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param probe_data resource-specific data, which will be passed to
+ * (if present) trigger_func and ack_func
+ * @param description textual description of the IRQ resource.
+ * @return on success, a pointer to a _mali_osk_irq_t object, which represents
+ * the IRQ handling on this resource. NULL on failure.
+ */
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description);
+
+/** @brief Terminate IRQ handling on a resource.
+ *
+ * This will disable the interrupt from the device, and then waits for any
+ * currently executing IRQ handlers to complete.
+ *
+ * @note If work is deferred to an IRQ bottom-half handler through
+ * \ref _mali_osk_wq_schedule_work(), be sure to flush any remaining work
+ * with \ref _mali_osk_wq_flush() or (implicitly) with \ref _mali_osk_wq_delete_work()
+ *
+ * @param irq a pointer to the _mali_osk_irq_t object corresponding to the
+ * resource whose IRQ handling is to be terminated.
+ */
+void _mali_osk_irq_term(_mali_osk_irq_t *irq);
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @addtogroup _mali_osk_atomic
+ * @{ */
+
+/** @brief Decrement an atomic counter
+ *
+ * @note It is an error to decrement the counter beyond -(1<<23)
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom);
+
+/** @brief Decrement an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter
+ * @return The new value, after decrement */
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter
+ *
+ * @note It is an error to increment the counter beyond (1<<23)-1
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter */
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom);
+
+/** @brief Initialize an atomic counter
+ *
+ * @note the parameter required is a u32, and so signed integers should be
+ * cast to u32.
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the value to initialize the atomic counter.
+ */
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val);
+
+/** @brief Read a value from an atomic counter
+ *
+ * This can only be safely used to determine the value of the counter when it
+ * is guaranteed that other threads will not be modifying the counter. This
+ * makes its usefulness limited.
+ *
+ * @param atom pointer to an atomic counter
+ */
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom);
+
+/** @brief Terminate an atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ */
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom);
+
+/** @brief Assign a new val to atomic counter, and return the old atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the new value assign to the atomic counter
+ * @return the old value of the atomic counter
+ */
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val);
+/** @} */  /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_memory OSK Memory Allocation
+ * @{ */
+
+/** @brief Allocate zero-initialized memory.
+ *
+ * Returns a buffer capable of containing at least \a n elements of \a size
+ * bytes each. The buffer is initialized to zero.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * @param n Number of elements to allocate
+ * @param size Size of each element
+ * @return On success, the zero-initialized buffer allocated. NULL on failure
+ */
+void *_mali_osk_calloc(u32 n, u32 size);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_malloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_malloc() and _mali_osk_calloc()
+ * must be freed before the application exits. Otherwise,
+ * a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_free(void *ptr);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * This function is potentially slower than _mali_osk_malloc() and _mali_osk_calloc(),
+ * but do support bigger sizes.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_valloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_valloc() must be freed before the
+ * application exits. Otherwise a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_vfree(void *ptr);
+
+/** @brief Copies memory.
+ *
+ * Copies the \a len bytes from the buffer pointed by the parameter \a src
+ * directly to the buffer pointed by \a dst.
+ *
+ * It is an error for \a src to overlap \a dst anywhere in \a len bytes.
+ *
+ * @param dst Pointer to the destination array where the content is to be
+ * copied.
+ * @param src Pointer to the source of data to be copied.
+ * @param len Number of bytes to copy.
+ * @return \a dst is always passed through unmodified.
+ */
+void *_mali_osk_memcpy(void *dst, const void *src, u32 len);
+
+/** @brief Fills memory.
+ *
+ * Sets the first \a n bytes of the block of memory pointed to by \a s to
+ * the specified value
+ * @param s Pointer to the block of memory to fill.
+ * @param c Value to be set, passed as u32. Only the 8 Least Significant Bits (LSB)
+ * are used.
+ * @param n Number of bytes to be set to the value.
+ * @return \a s is always passed through unmodified
+ */
+void *_mali_osk_memset(void *s, u32 c, u32 n);
+/** @} */ /* end group _mali_osk_memory */
+
+
+/** @brief Checks the amount of memory allocated
+ *
+ * Checks that not more than \a max_allocated bytes are allocated.
+ *
+ * Some OS bring up an interactive out of memory dialogue when the
+ * system runs out of memory. This can stall non-interactive
+ * apps (e.g. automated test runs). This function can be used to
+ * not trigger the OOM dialogue by keeping allocations
+ * within a certain limit.
+ *
+ * @return MALI_TRUE when \a max_allocated bytes are not in use yet. MALI_FALSE
+ * when at least \a max_allocated bytes are in use.
+ */
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated);
+
+
+/** @addtogroup _mali_osk_low_level_memory
+ * @{ */
+
+/** @brief Issue a memory barrier
+ *
+ * This defines an arbitrary memory barrier operation, which forces an ordering constraint
+ * on memory read and write operations.
+ */
+void _mali_osk_mem_barrier(void);
+
+/** @brief Issue a write memory barrier
+ *
+ * This defines an write memory barrier operation which forces an ordering constraint
+ * on memory write operations.
+ */
+void _mali_osk_write_mem_barrier(void);
+
+/** @brief Map a physically contiguous region into kernel space
+ *
+ * This is primarily used for mapping in registers from resources, and Mali-MMU
+ * page tables. The mapping is only visable from kernel-space.
+ *
+ * Access has to go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @param phys CPU-physical base address of the memory to map in. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * map in
+ * @param description A textual description of the memory being mapped in.
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure.
+ */
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Unmap a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_mapioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt an unmap twice
+ * - unmap only part of a range obtained through _mali_osk_mem_mapioregion
+ * - unmap more than the range obtained through  _mali_osk_mem_mapioregion
+ * - unmap an address range that was not successfully mapped using
+ * _mali_osk_mem_mapioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in. This must be aligned to the system's page size, which is assumed
+ * to be 4K
+ * @param size The number of bytes that were originally mapped in.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address mapping);
+
+/** @brief Allocate and Map a physically contiguous region into kernel space
+ *
+ * This is used for allocating physically contiguous regions (such as Mali-MMU
+ * page tables) and mapping them into kernel space. The mapping is only
+ * visible from kernel-space.
+ *
+ * The alignment of the returned memory is guaranteed to be at least
+ * _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * Access must go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @note This function is primarily to provide support for OSs that are
+ * incapable of separating the tasks 'allocate physically contiguous memory'
+ * and 'map it into kernel space'
+ *
+ * @param[out] phys CPU-physical base address of memory that was allocated.
+ * (*phys) will be guaranteed to be aligned to at least
+ * _MALI_OSK_CPU_PAGE_SIZE on success.
+ *
+ * @param[in] size the number of bytes of physically contiguous memory to
+ * allocate. This must be a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure, and (*phys) is unmodified.
+ */
+mali_io_address _mali_osk_mem_allocioregion(u32 *phys, u32 size);
+
+/** @brief Free a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_allocioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt a free twice on the same ioregion
+ * - free only part of a range obtained through _mali_osk_mem_allocioregion
+ * - free more than the range obtained through  _mali_osk_mem_allocioregion
+ * - free an address range that was not successfully mapped using
+ * _mali_osk_mem_allocioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in, which was aligned to _MALI_OSK_CPU_PAGE_SIZE.
+ * @param size The number of bytes that were originally mapped in, which was
+ * a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_freeioregion(u32 phys, u32 size, mali_io_address mapping);
+
+/** @brief Request a region of physically contiguous memory
+ *
+ * This is used to ensure exclusive access to a region of physically contigous
+ * memory.
+ *
+ * It is acceptable to implement this as a stub. However, it is then the job
+ * of the System Integrator to ensure that no other device driver will be using
+ * the physical address ranges used by Mali, while the Mali device driver is
+ * loaded.
+ *
+ * @param phys CPU-physical base address of the memory to request. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * request.
+ * @param description A textual description of the memory being requested.
+ * @return _MALI_OSK_ERR_OK on success. Otherwise, a suitable
+ * _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Un-request a region of physically contiguous memory
+ *
+ * This is used to release a regious of physically contiguous memory previously
+ * requested through _mali_osk_mem_reqregion, so that other device drivers may
+ * use it. This will be called at time of Mali device driver termination.
+ *
+ * It is a programming error to attempt to:
+ * - unrequest a region twice
+ * - unrequest only part of a range obtained through _mali_osk_mem_reqregion
+ * - unrequest more than the range obtained through  _mali_osk_mem_reqregion
+ * - unrequest an address range that was not successfully requested using
+ * _mali_osk_mem_reqregion
+ *
+ * @param phys CPU-physical base address of the memory to un-request. This must
+ * be aligned to the system's page size, which is assumed to be 4K
+ * @param size the number of bytes of physically contiguous address space to
+ * un-request.
+ */
+void _mali_osk_mem_unreqregion(uintptr_t phys, u32 size);
+
+/** @brief Read from a location currently mapped in through
+ * _mali_osk_mem_mapioregion
+ *
+ * This reads a 32-bit word from a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to read from memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to read from
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @return the 32-bit word from the specified location.
+ */
+u32 _mali_osk_mem_ioread32(volatile mali_io_address mapping, u32 offset);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion without memory barriers
+ *
+ * This write a 32-bit word to a 32-bit aligned location without using memory barrier.
+ * It is a programming error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion with write memory barrier
+ *
+ * This write a 32-bit word to a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32(volatile mali_io_address mapping, u32 offset, u32 val);
+
+/** @brief Flush all CPU caches
+ *
+ * This should only be implemented if flushing of the cache is required for
+ * memory mapped in through _mali_osk_mem_mapregion.
+ */
+void _mali_osk_cache_flushall(void);
+
+/** @brief Flush any caches necessary for the CPU and MALI to have the same view of a range of uncached mapped memory
+ *
+ * This should only be implemented if your OS doesn't do a full cache flush (inner & outer)
+ * after allocating uncached mapped memory.
+ *
+ * Some OS do not perform a full cache flush (including all outer caches) for uncached mapped memory.
+ * They zero the memory through a cached mapping, then flush the inner caches but not the outer caches.
+ * This is required for MALI to have the correct view of the memory.
+ */
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size);
+
+/** @brief Safely copy as much data as possible from src to dest
+ *
+ * Do not crash if src or dest isn't available.
+ *
+ * @param dest Destination buffer (limited to user space mapped Mali memory)
+ * @param src Source buffer
+ * @param size Number of bytes to copy
+ * @return Number of bytes actually copied
+ */
+u32 _mali_osk_mem_write_safe(void *dest, const void *src, u32 size);
+
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+
+/** @addtogroup _mali_osk_notification
+ *
+ * User space notification framework
+ *
+ * Communication with user space of asynchronous events is performed through a
+ * synchronous call to the \ref u_k_api.
+ *
+ * Since the events are asynchronous, the events have to be queued until a
+ * synchronous U/K API call can be made by user-space. A U/K API call might also
+ * be received before any event has happened. Therefore the notifications the
+ * different subsystems wants to send to user space has to be queued for later
+ * reception, or a U/K API call has to be blocked until an event has occured.
+ *
+ * Typical uses of notifications are after running of jobs on the hardware or
+ * when changes to the system is detected that needs to be relayed to user
+ * space.
+ *
+ * After an event has occured user space has to be notified using some kind of
+ * message. The notification framework supports sending messages to waiting
+ * threads or queueing of messages until a U/K API call is made.
+ *
+ * The notification queue is a FIFO. There are no restrictions on the numbers
+ * of readers or writers in the queue.
+ *
+ * A message contains what user space needs to identifiy how to handle an
+ * event. This includes a type field and a possible type specific payload.
+ *
+ * A notification to user space is represented by a
+ * \ref _mali_osk_notification_t object. A sender gets hold of such an object
+ * using _mali_osk_notification_create(). The buffer given by the
+ * _mali_osk_notification_t::result_buffer field in the object is used to store
+ * any type specific data. The other fields are internal to the queue system
+ * and should not be touched.
+ *
+ * @{ */
+
+/** @brief Create a notification object
+ *
+ * Returns a notification object which can be added to the queue of
+ * notifications pending for user space transfer.
+ *
+ * The implementation will initialize all members of the
+ * \ref _mali_osk_notification_t object. In particular, the
+ * _mali_osk_notification_t::result_buffer member will be initialized to point
+ * to \a size bytes of storage, and that storage will be suitably aligned for
+ * storage of any structure. That is, the created buffer meets the same
+ * requirements as _mali_osk_malloc().
+ *
+ * The notification object must be deleted when not in use. Use
+ * _mali_osk_notification_delete() for deleting it.
+ *
+ * @note You \b must \b not call _mali_osk_free() on a \ref _mali_osk_notification_t,
+ * object, or on a _mali_osk_notification_t::result_buffer. You must only use
+ * _mali_osk_notification_delete() to free the resources assocaited with a
+ * \ref _mali_osk_notification_t object.
+ *
+ * @param type The notification type
+ * @param size The size of the type specific buffer to send
+ * @return Pointer to a notification object with a suitable buffer, or NULL on error.
+ */
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size);
+
+/** @brief Delete a notification object
+ *
+ * This must be called to reclaim the resources of a notification object. This
+ * includes:
+ * - The _mali_osk_notification_t::result_buffer
+ * - The \ref _mali_osk_notification_t itself.
+ *
+ * A notification object \b must \b not be used after it has been deleted by
+ * _mali_osk_notification_delete().
+ *
+ * In addition, the notification object may not be deleted while it is in a
+ * queue. That is, if it has been placed on a queue with
+ * _mali_osk_notification_queue_send(), then it must not be deleted until
+ * it has been received by a call to _mali_osk_notification_queue_receive().
+ * Otherwise, the queue may be corrupted.
+ *
+ * @param object the notification object to delete.
+ */
+void _mali_osk_notification_delete(_mali_osk_notification_t *object);
+
+/** @brief Create a notification queue
+ *
+ * Creates a notification queue which can be used to queue messages for user
+ * delivery and get queued messages from
+ *
+ * The queue is a FIFO, and has no restrictions on the numbers of readers or
+ * writers.
+ *
+ * When the queue is no longer in use, it must be terminated with
+ * \ref _mali_osk_notification_queue_term(). Failure to do so will result in a
+ * memory leak.
+ *
+ * @return Pointer to a new notification queue or NULL on error.
+ */
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void);
+
+/** @brief Destroy a notification queue
+ *
+ * Destroys a notification queue and frees associated resources from the queue.
+ *
+ * A notification queue \b must \b not be destroyed in the following cases:
+ * - while there are \ref _mali_osk_notification_t objects in the queue.
+ * - while there are writers currently acting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_send() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_send() on the queue in the future.
+ * - while there are readers currently waiting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_receive() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_receive() on the queue in the future.
+ *
+ * Therefore, all \ref _mali_osk_notification_t objects must be flushed and
+ * deleted by the code that makes use of the notification queues, since only
+ * they know the structure of the _mali_osk_notification_t::result_buffer
+ * (even if it may only be a flat sturcture).
+ *
+ * @note Since the queue is a FIFO, the code using notification queues may
+ * create its own 'flush' type of notification, to assist in flushing the
+ * queue.
+ *
+ * Once the queue has been destroyed, it must not be used again.
+ *
+ * @param queue The queue to destroy
+ */
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue);
+
+/** @brief Schedule notification for delivery
+ *
+ * When a \ref _mali_osk_notification_t object has been created successfully
+ * and set up, it may be added to the queue of objects waiting for user space
+ * transfer.
+ *
+ * The sending will not block if the queue is full.
+ *
+ * A \ref _mali_osk_notification_t object \b must \b not be put on two different
+ * queues at the same time, or enqueued twice onto a single queue before
+ * reception. However, it is acceptable for it to be requeued \em after reception
+ * from a call to _mali_osk_notification_queue_receive(), even onto the same queue.
+ *
+ * Again, requeuing must also not enqueue onto two different queues at the same
+ * time, or enqueue onto the same queue twice before reception.
+ *
+ * @param queue The notification queue to add this notification to
+ * @param object The entry to add
+ */
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object);
+
+/** @brief Receive a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the thread will sleep until one becomes ready.
+ * Therefore, notifications may not be received into an
+ * IRQ or 'atomic' context (that is, a context where sleeping is disallowed).
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success. _MALI_OSK_ERR_RESTARTSYSCALL if the sleep was interrupted.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @brief Dequeues a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the function call will return an error code.
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if queue was empty.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @addtogroup _mali_osk_timer
+ *
+ * Timers use the OS's representation of time, which are 'ticks'. This is to
+ * prevent aliasing problems between the internal timer time, and the time
+ * asked for.
+ *
+ * @{ */
+
+/** @brief Initialize a timer
+ *
+ * Allocates resources for a new timer, and initializes them. This does not
+ * start the timer.
+ *
+ * @return a pointer to the allocated timer object, or NULL on failure.
+ */
+_mali_osk_timer_t *_mali_osk_timer_init(void);
+
+/** @brief Start a timer
+ *
+ * It is an error to start a timer without setting the callback via
+ * _mali_osk_timer_setcallback().
+ *
+ * It is an error to use this to start an already started timer.
+ *
+ * The timer will expire in \a ticks_to_expire ticks, at which point, the
+ * callback function will be invoked with the callback-specific data,
+ * as registered by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to start
+ * @param ticks_to_expire the amount of time in ticks for the timer to run
+ * before triggering.
+ */
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Modify a timer
+ *
+ * Set the relative time at which a timer will expire, and start it if it is
+ * stopped. If \a ticks_to_expire 0 the timer fires immediately.
+ *
+ * It is an error to modify a timer without setting the callback via
+ *  _mali_osk_timer_setcallback().
+ *
+ * The timer will expire at \a ticks_to_expire from the time of the call, at
+ * which point, the callback function will be invoked with the
+ * callback-specific data, as set by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to modify, and start if necessary
+ * @param ticks_to_expire the \em absolute time in ticks at which this timer
+ * should trigger.
+ *
+ */
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Stop a timer, and block on its completion.
+ *
+ * Stop the timer. When the function returns, it is guaranteed that the timer's
+ * callback will not be running on any CPU core.
+ *
+ * Since stoping the timer blocks on compeletion of the callback, the callback
+ * may not obtain any mutexes that the caller holds. Otherwise, a deadlock will
+ * occur.
+ *
+ * @note While the callback itself is guaranteed to not be running, work
+ * enqueued on the work-queue by the timer (with
+ * \ref _mali_osk_wq_schedule_work()) may still run. The timer callback and
+ * work handler must take this into account.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ *
+ */
+void _mali_osk_timer_del(_mali_osk_timer_t *tim);
+
+/** @brief Stop a timer.
+ *
+ * Stop the timer. When the function returns, the timer's callback may still be
+ * running on any CPU core.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ */
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim);
+
+/** @brief Check if timer is pending.
+ *
+ * Check if timer is active.
+ *
+ * @param tim the timer to check
+ * @return MALI_TRUE if time is active, MALI_FALSE if it is not active
+ */
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim);
+
+/** @brief Set a timer's callback parameters.
+ *
+ * This must be called at least once before a timer is started/modified.
+ *
+ * After a timer has been stopped or expires, the callback remains set. This
+ * means that restarting the timer will call the same function with the same
+ * parameters on expiry.
+ *
+ * @param tim the timer to set callback on.
+ * @param callback Function to call when timer expires
+ * @param data Function-specific data to supply to the function on expiry.
+ */
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data);
+
+/** @brief Terminate a timer, and deallocate resources.
+ *
+ * The timer must first be stopped by calling _mali_osk_timer_del().
+ *
+ * It is a programming error for _mali_osk_timer_term() to be called on:
+ * - timer that is currently running
+ * - a timer that is currently executing its callback.
+ *
+ * @param tim the timer to deallocate.
+ */
+void _mali_osk_timer_term(_mali_osk_timer_t *tim);
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @defgroup _mali_osk_time OSK Time functions
+ *
+ * \ref _mali_osk_time use the OS's representation of time, which are
+ * 'ticks'. This is to prevent aliasing problems between the internal timer
+ * time, and the time asked for.
+ *
+ * OS tick time is measured as a u32. The time stored in a u32 may either be
+ * an absolute time, or a time delta between two events. Whilst it is valid to
+ * use math opeartors to \em change the tick value represented as a u32, it
+ * is often only meaningful to do such operations on time deltas, rather than
+ * on absolute time. However, it is meaningful to add/subtract time deltas to
+ * absolute times.
+ *
+ * Conversion between tick time and milliseconds (ms) may not be loss-less,
+ * and are \em implementation \em depenedant.
+ *
+ * Code use OS time must take this into account, since:
+ * - a small OS time may (or may not) be rounded
+ * - a large time may (or may not) overflow
+ *
+ * @{ */
+
+/** @brief Return whether ticka occurs after or at the same time as  tickb
+ *
+ * Systems where ticks can wrap must handle that.
+ *
+ * @param ticka ticka
+ * @param tickb tickb
+ * @return MALI_TRUE if ticka represents a time that occurs at or after tickb.
+ */
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb);
+
+/** @brief Convert milliseconds to OS 'ticks'
+ *
+ * @param ms time interval in milliseconds
+ * @return the corresponding time interval in OS ticks.
+ */
+unsigned long _mali_osk_time_mstoticks(u32 ms);
+
+/** @brief Convert OS 'ticks' to milliseconds
+ *
+ * @param ticks time interval in OS ticks.
+ * @return the corresponding time interval in milliseconds
+ */
+u32 _mali_osk_time_tickstoms(unsigned long ticks);
+
+
+/** @brief Get the current time in OS 'ticks'.
+ * @return the current time in OS 'ticks'.
+ */
+unsigned long _mali_osk_time_tickcount(void);
+
+/** @brief Cause a microsecond delay
+ *
+ * The delay will have microsecond resolution, and is necessary for correct
+ * operation of the driver. At worst, the delay will be \b at least \a usecs
+ * microseconds, and so may be (significantly) more.
+ *
+ * This function may be implemented as a busy-wait, which is the most sensible
+ * implementation. On OSs where there are situations in which a thread must not
+ * sleep, this is definitely implemented as a busy-wait.
+ *
+ * @param usecs the number of microseconds to wait for.
+ */
+void _mali_osk_time_ubusydelay(u32 usecs);
+
+/** @brief Return time in nano seconds, since any given reference.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_time_get_ns(void);
+
+/** @brief Return time in nano seconds, since boot time.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_boot_time_get_ns(void);
+
+/** @} */ /* end group _mali_osk_time */
+
+/** @defgroup _mali_osk_math OSK Math
+ * @{ */
+
+/** @brief Count Leading Zeros (Little-endian)
+ *
+ * @note This function must be implemented to support the reference
+ * implementation of _mali_osk_find_first_zero_bit, as defined in
+ * mali_osk_bitops.h.
+ *
+ * @param val 32-bit words to count leading zeros on
+ * @return the number of leading zeros.
+ */
+u32 _mali_osk_clz(u32 val);
+
+/** @brief find last (most-significant) bit set
+ *
+ * @param val 32-bit words to count last bit set on
+ * @return last bit set.
+ */
+u32 _mali_osk_fls(u32 val);
+
+/** @} */ /* end group _mali_osk_math */
+
+/** @addtogroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+
+/** @brief Initialize an empty Wait Queue */
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.
+ */
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ * @param timeout timeout in ms
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.  Will return if time
+ * exceeds timeout.
+ */
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout);
+
+/** @brief Wake up all threads in wait queue if their respective conditions are
+ * true
+ *
+ * @param queue the queue whose threads should be woken up
+ *
+ * Wake up all threads in wait queue \a queue whose condition is now true.
+ */
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue);
+
+/** @brief terminate a wait queue
+ *
+ * @param queue the queue to terminate.
+ */
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue);
+/** @} */ /* end group _mali_osk_wait_queue */
+
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Output a device driver debug message.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ */
+void _mali_osk_dbgmsg(const char *fmt, ...);
+
+/** @brief Print fmt into buf.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param buf a pointer to the result buffer
+ * @param size the total number of bytes allowed to write to \a buf
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ * @return The number of bytes written to \a buf
+ */
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...);
+
+/** @brief Abnormal process abort.
+ *
+ * Terminates the caller-process if this function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h.
+ *
+ * This function will never return - because to continue from a Debug assert
+ * could cause even more problems, and hinder debugging of the initial problem.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_abort(void);
+
+/** @brief Sets breakpoint at point where function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h,
+ * to assist in debugging. If debugging at this level is not required, then this
+ * function may be implemented as a stub.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_break(void);
+
+/** @brief Return an identificator for calling process.
+ *
+ * @return Identificator for calling process.
+ */
+u32 _mali_osk_get_pid(void);
+
+/** @brief Return an name for calling process.
+ *
+ * @return name for calling process.
+ */
+char *_mali_osk_get_comm(void);
+
+/** @brief Return an identificator for calling thread.
+ *
+ * @return Identificator for calling thread.
+ */
+u32 _mali_osk_get_tid(void);
+
+
+/** @brief Take a reference to the power manager system for the Mali device (synchronously).
+ *
+ * When function returns successfully, Mali is ON.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_put() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void);
+
+/** @brief Take a reference to the external power manager system for the Mali device (asynchronously).
+ *
+ * Mali might not yet be on after this function as returned.
+ * Please use \a _mali_osk_pm_dev_barrier() or \a _mali_osk_pm_dev_ref_get_sync()
+ * to wait for Mali to be powered on.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_dec() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void);
+
+/** @brief Release the reference to the external power manger system for the Mali device.
+ *
+ * When reference count reach zero, the cores can be off.
+ *
+ * @note This must be used to release references taken with
+ * \a _mali_osk_pm_dev_ref_get_sync() or \a _mali_osk_pm_dev_ref_get_sync().
+ */
+void _mali_osk_pm_dev_ref_put(void);
+
+/** @brief Block until pending PM operations are done
+ */
+void _mali_osk_pm_dev_barrier(void);
+
+/** @} */ /* end group  _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_bitmap OSK Bitmap
+ * @{ */
+
+/** @brief Allocate a unique number from the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @return An unique existence in the bitmap object.
+ */
+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap);
+
+/** @brief Free a interger to the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @param obj An number allocated from bitmap object.
+ */
+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj);
+
+/** @brief Allocate continuous number from the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @return start number of the continuous number block.
+ */
+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt);
+
+/** @brief Free a block of continuous number block to the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @param obj Start number.
+ * @param cnt The size of the continuous number block.
+ */
+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt);
+
+/** @brief Available count could be used to allocate in the given bitmap object.
+ *
+ */
+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap);
+
+/** @brief Initialize an bitmap object..
+ *
+ * @param bitmap An poiter of uninitialized bitmap object.
+ * @param num Size of thei bitmap object and decide the memory size allocated.
+ * @param reserve start number used to allocate.
+ */
+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve);
+
+/** @brief Free the given bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ */
+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap);
+/** @} */ /* end group  _mali_osk_bitmap */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Check standard inlines */
+#ifndef MALI_STATIC_INLINE
+#error MALI_STATIC_INLINE not defined on your OS
+#endif
+
+#ifndef MALI_NON_STATIC_INLINE
+#error MALI_NON_STATIC_INLINE not defined on your OS
+#endif
+
+#endif /* __MALI_OSK_H__ */
diff --git a/utgard/r7p0/common/mali_osk_bitops.h b/utgard/r7p0/common/mali_osk_bitops.h
new file mode 100644
index 0000000..99a4dec
--- /dev/null
+++ b/utgard/r7p0/common/mali_osk_bitops.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitops.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_BITOPS_H__
+#define __MALI_OSK_BITOPS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void _mali_internal_clear_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(addr != NULL);
+
+	(*addr) &= ~(1 << bit);
+}
+
+MALI_STATIC_INLINE void _mali_internal_set_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(addr != NULL);
+
+	(*addr) |= (1 << bit);
+}
+
+MALI_STATIC_INLINE u32 _mali_internal_test_bit(u32 bit, u32 value)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	return value & (1 << bit);
+}
+
+MALI_STATIC_INLINE int _mali_internal_find_first_zero_bit(u32 value)
+{
+	u32 inverted;
+	u32 negated;
+	u32 isolated;
+	u32 leading_zeros;
+
+	/* Begin with xxx...x0yyy...y, where ys are 1, number of ys is in range  0..31 */
+	inverted = ~value; /* zzz...z1000...0 */
+	/* Using count_trailing_zeros on inverted value -
+	 * See ARM System Developers Guide for details of count_trailing_zeros */
+
+	/* Isolate the zero: it is preceeded by a run of 1s, so add 1 to it */
+	negated = (u32) -inverted ; /* -a == ~a + 1 (mod 2^n) for n-bit numbers */
+	/* negated = xxx...x1000...0 */
+
+	isolated = negated & inverted ; /* xxx...x1000...0 & zzz...z1000...0, zs are ~xs */
+	/* And so the first zero bit is in the same position as the 1 == number of 1s that preceeded it
+	 * Note that the output is zero if value was all 1s */
+
+	leading_zeros = _mali_osk_clz(isolated);
+
+	return 31 - leading_zeros;
+}
+
+
+/** @defgroup _mali_osk_bitops OSK Non-atomic Bit-operations
+ * @{ */
+
+/**
+ * These bit-operations do not work atomically, and so locks must be used if
+ * atomicity is required.
+ *
+ * Reference implementations for Little Endian are provided, and so it should
+ * not normally be necessary to re-implement these. Efficient bit-twiddling
+ * techniques are used where possible, implemented in portable C.
+ *
+ * Note that these reference implementations rely on _mali_osk_clz() being
+ * implemented.
+ */
+
+/** @brief Clear a bit in a sequence of 32-bit words
+ * @param nr bit number to clear, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_clear_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_clear_bit(nr, addr);
+}
+
+/** @brief Set a bit in a sequence of 32-bit words
+ * @param nr bit number to set, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_set_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_set_bit(nr, addr);
+}
+
+/** @brief Test a bit in a sequence of 32-bit words
+ * @param nr bit number to test, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ * @return zero if bit was clear, non-zero if set. Do not rely on the return
+ * value being related to the actual word under test.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_test_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	return _mali_internal_test_bit(nr, *addr);
+}
+
+/* Return maxbit if not found */
+/** @brief Find the first zero bit in a sequence of 32-bit words
+ * @param addr starting point for search.
+ * @param maxbit the maximum number of bits to search
+ * @return the number of the first zero bit found, or maxbit if none were found
+ * in the specified range.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_find_first_zero_bit(const u32 *addr, u32 maxbit)
+{
+	u32 total;
+
+	for (total = 0; total < maxbit; total += 32, ++addr) {
+		int result;
+
+		result = _mali_internal_find_first_zero_bit(*addr);
+
+		/* non-negative signifies the bit was found */
+		if (result >= 0) {
+			total += (u32)result;
+			break;
+		}
+	}
+
+	/* Now check if we reached maxbit or above */
+	if (total >= maxbit) {
+		total = maxbit;
+	}
+
+	return total; /* either the found bit nr, or maxbit if not found */
+}
+/** @} */ /* end group _mali_osk_bitops */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_BITOPS_H__ */
diff --git a/utgard/r7p0/common/mali_osk_list.h b/utgard/r7p0/common/mali_osk_list.h
new file mode 100644
index 0000000..8cdd009
--- /dev/null
+++ b/utgard/r7p0/common/mali_osk_list.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_list.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_LIST_H__
+#define __MALI_OSK_LIST_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void __mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = new_entry;
+	new_entry->next = next;
+	new_entry->prev = prev;
+	prev->next = new_entry;
+}
+
+MALI_STATIC_INLINE void __mali_osk_list_del(_mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** Reference implementations of Doubly-linked Circular Lists are provided.
+ * There is often no need to re-implement these.
+ *
+ * @note The implementation may differ subtly from any lists the OS provides.
+ * For this reason, these lists should not be mixed with OS-specific lists
+ * inside the OSK/UKK implementation. */
+
+/** @brief Initialize a list to be a head of an empty list
+ * @param exp the list to initialize. */
+#define _MALI_OSK_INIT_LIST_HEAD(exp) _mali_osk_list_init(exp)
+
+/** @brief Define a list variable, which is uninitialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD(exp) _mali_osk_list_t exp
+
+/** @brief Define a list variable, which is initialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD_STATIC_INIT(exp) _mali_osk_list_t exp = { &exp, &exp }
+
+/** @brief Initialize a list element.
+ *
+ * All list elements must be initialized before use.
+ *
+ * Do not use on any list element that is present in a list without using
+ * _mali_osk_list_del first, otherwise this will break the list.
+ *
+ * @param list the list element to initialize
+ */
+MALI_STATIC_INLINE void _mali_osk_list_init(_mali_osk_list_t *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/** @brief Insert a single list element after an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the first element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the next
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list, list->next);
+}
+
+/** @brief Insert a single list element before an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the last element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the previous
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_addtail(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list->prev, list);
+}
+
+/** @brief Remove a single element from a list
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will be uninitialized, and so should not be traversed. It must be
+ * initialized before further use.
+ *
+ * @param list the list element to remove.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_del(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+}
+
+/** @brief Remove a single element from a list, and re-initialize it
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will initialized, and so can be used as normal.
+ *
+ * @param list the list element to remove and initialize.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_delinit(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+	_mali_osk_list_init(list);
+}
+
+/** @brief Determine whether a list is empty.
+ *
+ * An empty list is one that contains a single element that points to itself.
+ *
+ * @param list the list to check.
+ * @return non-zero if the list is empty, and zero otherwise.
+ */
+MALI_STATIC_INLINE mali_bool _mali_osk_list_empty(_mali_osk_list_t *list)
+{
+	return list->next == list;
+}
+
+/** @brief Move a list element from one list to another.
+ *
+ * The list element must be initialized.
+ *
+ * As an example, moving a list item to the head of a new list causes this item
+ * to be the first element in the new list.
+ *
+ * @param move the list element to move
+ * @param list the new list into which the element will be inserted, as the next
+ * element in the list.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move(_mali_osk_list_t *move_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_del(move_entry->prev, move_entry->next);
+	_mali_osk_list_add(move_entry, list);
+}
+
+/** @brief Move an entire list
+ *
+ * The list element must be initialized.
+ *
+ * Allows you to move a list from one list head to another list head
+ *
+ * @param old_list The existing list head
+ * @param new_list The new list head (must be an empty list)
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move_list(_mali_osk_list_t *old_list, _mali_osk_list_t *new_list)
+{
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(new_list));
+	if (!_mali_osk_list_empty(old_list)) {
+		new_list->next = old_list->next;
+		new_list->prev = old_list->prev;
+		new_list->next->prev = new_list;
+		new_list->prev->next = new_list;
+		old_list->next = old_list;
+		old_list->prev = old_list;
+	}
+}
+
+/** @brief Find the containing structure of a list
+ *
+ * When traversing a list, this is used to recover the containing structure,
+ * given that is contains a _mali_osk_list_t member.
+ *
+ * Each list must be of structures of one type, and must link the same members
+ * together, otherwise it will not be possible to correctly recover the
+ * sturctures that the lists link.
+ *
+ * @note no type or memory checking occurs to ensure that a structure does in
+ * fact exist for the list entry, and that it is being recovered with respect
+ * to the correct list member.
+ *
+ * @param ptr the pointer to the _mali_osk_list_t member in this structure
+ * @param type the type of the structure that contains the member
+ * @param member the member of the structure that ptr points to.
+ * @return a pointer to a \a type object which contains the _mali_osk_list_t
+ * \a member, as pointed to by the _mali_osk_list_t \a *ptr.
+ */
+#define _MALI_OSK_LIST_ENTRY(ptr, type, member) \
+	_MALI_OSK_CONTAINER_OF(ptr, type, member)
+
+/** @brief Enumerate a list safely
+ *
+ * With this macro, lists can be enumerated in a 'safe' manner. That is,
+ * entries can be deleted from the list without causing an error during
+ * enumeration. To achieve this, a 'temporary' pointer is required, which must
+ * be provided to the macro.
+ *
+ * Use it like a 'for()', 'while()' or 'do()' construct, and so it must be
+ * followed by a statement or compound-statement which will be executed for
+ * each list entry.
+ *
+ * Upon loop completion, providing that an early out was not taken in the
+ * loop body, then it is guaranteed that ptr->member == list, even if the loop
+ * body never executed.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY(ptr, tmp, list, type, member)         \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->next, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.next, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.next, type, member))
+
+/** @brief Enumerate a list in reverse order safely
+ *
+ * This macro is identical to @ref _MALI_OSK_LIST_FOREACHENTRY, except that
+ * entries are enumerated in reverse order.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY_REVERSE(ptr, tmp, list, type, member) \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->prev, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.prev, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.prev, type, member))
+
+/** @} */ /* end group _mali_osk_list */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_LIST_H__ */
diff --git a/utgard/r7p0/common/mali_osk_mali.h b/utgard/r7p0/common/mali_osk_mali.h
new file mode 100644
index 0000000..d6c8442
--- /dev/null
+++ b/utgard/r7p0/common/mali_osk_mali.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.h
+ * Defines the OS abstraction layer which is specific for the Mali kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_MALI_H__
+#define __MALI_OSK_MALI_H__
+
+#include <linux/mali/mali_utgard.h>
+#include <mali_osk.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CONFIG_MALI_DEVFREQ
+struct mali_device {
+	struct device *dev;
+#ifdef CONFIG_HAVE_CLK
+	struct clk *clock;
+#endif
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+#endif
+	struct mali_pm_metrics_data mali_metrics;
+};
+#endif
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Struct with device specific configuration data
+ */
+typedef struct mali_gpu_device_data _mali_osk_device_data;
+
+#ifdef CONFIG_MALI_DT
+/** @brief Initialize those device resources when we use device tree
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_resource_initialize(void);
+#endif
+
+/** @brief Find Mali GPU HW resource
+ *
+ * @param addr Address of Mali GPU resource to find
+ * @param res Storage for resource information if resource is found.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if resource is not found
+ */
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res);
+
+
+/** @brief Find Mali GPU HW base address
+ *
+ * @return 0 if resources are found, otherwise the Mali GPU component with lowest address.
+ */
+uintptr_t _mali_osk_resource_base_address(void);
+
+/** @brief Find the specific GPU resource.
+ *
+ * @return value
+ * 0x400 if Mali 400 specific GPU resource identified
+ * 0x450 if Mali 450 specific GPU resource identified
+ * 0x470 if Mali 470 specific GPU resource identified
+ *
+ */
+u32 _mali_osk_identify_gpu_resource(void);
+
+/** @brief Retrieve the Mali GPU specific data
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data);
+
+/** @brief Find the pmu domain config from device data.
+ *
+ * @param domain_config_array used to store pmu domain config found in device data.
+ * @param array_size is the size of array domain_config_array.
+ */
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size);
+
+/** @brief Get Mali PMU switch delay
+ *
+ *@return pmu switch delay if it is configured
+ */
+u32 _mali_osk_get_pmu_switch_delay(void);
+
+/** @brief Determines if Mali GPU has been configured with shared interrupts.
+ *
+ * @return MALI_TRUE if shared interrupts, MALI_FALSE if not.
+ */
+mali_bool _mali_osk_shared_interrupts(void);
+
+/** @brief Initialize the gpu secure mode.
+ * The gpu secure mode will initially be in a disabled state.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void);
+
+/** @brief Deinitialize the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void);
+
+/** @brief Reset GPU and enable the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_enable(void);
+
+/** @brief Reset GPU and disable the gpu secure mode.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_disable(void);
+
+/** @brief Check if the gpu secure mode has been enabled.
+ * @return MALI_TRUE if enabled, otherwise MALI_FALSE.
+ */
+mali_bool _mali_osk_gpu_secure_mode_is_enabled(void);
+
+/** @brief Check if the gpu secure mode is supported.
+ * @return MALI_TRUE if supported, otherwise MALI_FALSE.
+ */
+mali_bool _mali_osk_gpu_secure_mode_is_supported(void);
+
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_MALI_H__ */
diff --git a/utgard/r7p0/common/mali_osk_profiling.h b/utgard/r7p0/common/mali_osk_profiling.h
new file mode 100644
index 0000000..0f66756
--- /dev/null
+++ b/utgard/r7p0/common/mali_osk_profiling.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_OSK_PROFILING_H__
+#define __MALI_OSK_PROFILING_H__
+
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS)
+
+#include "mali_linux_trace.h"
+#include "mali_profiling_events.h"
+#include "mali_profiling_gator_api.h"
+
+#define MALI_PROFILING_MAX_BUFFER_ENTRIES 1048576
+
+#define MALI_PROFILING_NO_HW_COUNTER = ((u32)-1)
+
+/** @defgroup _mali_osk_profiling External profiling connectivity
+ * @{ */
+
+/**
+ * Initialize the profiling module.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start);
+
+/*
+ * Terminate the profiling module.
+ */
+void _mali_osk_profiling_term(void);
+
+/**
+ * Stop the profile sampling operation.
+ */
+void _mali_osk_profiling_stop_sampling(u32 pid);
+
+/**
+ * Start recording profiling data
+ *
+ * The specified limit will determine how large the capture buffer is.
+ * MALI_PROFILING_MAX_BUFFER_ENTRIES determines the maximum size allowed by the device driver.
+ *
+ * @param limit The desired maximum number of events to record on input, the actual maximum on output.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_start(u32 *limit);
+
+/**
+ * Add an profiling event
+ *
+ * @param event_id The event identificator.
+ * @param data0 First data parameter, depending on event_id specified.
+ * @param data1 Second data parameter, depending on event_id specified.
+ * @param data2 Third data parameter, depending on event_id specified.
+ * @param data3 Fourth data parameter, depending on event_id specified.
+ * @param data4 Fifth data parameter, depending on event_id specified.
+ */
+void    _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+/**
+ * Report a hardware counter event.
+ *
+ * @param counter_id The ID of the counter.
+ * @param value The value of the counter.
+ */
+
+/* Call Linux tracepoint directly */
+#define _mali_osk_profiling_report_hw_counter(counter_id, value) trace_mali_hw_counter(counter_id, value)
+
+/**
+ * Report SW counters
+ *
+ * @param counters array of counter values
+ */
+void _mali_osk_profiling_report_sw_counters(u32 *counters);
+
+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value);
+
+/**
+ * Stop recording profiling data
+ *
+ * @param count Returns the number of recorded events.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_stop(u32 *count);
+
+/**
+ * Retrieves the number of events that can be retrieved
+ *
+ * @return The number of recorded events that can be retrieved.
+ */
+u32 _mali_osk_profiling_get_count(void);
+
+/**
+ * Retrieve an event
+ *
+ * @param index Event index (start with 0 and continue until this function fails to retrieve all events)
+ * @param timestamp The timestamp for the retrieved event will be stored here.
+ * @param event_id The event ID for the retrieved event will be stored here.
+ * @param data The 5 data values for the retrieved event will be stored here.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+
+/**
+ * Clear the recorded buffer.
+ *
+ * This is needed in order to start another recording.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_clear(void);
+
+/**
+ * Checks if a recording of profiling data is in progress
+ *
+ * @return MALI_TRUE if recording of profiling data is in progress, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_is_recording(void);
+
+/**
+ * Checks if profiling data is available for retrival
+ *
+ * @return MALI_TRUE if profiling data is avaiable, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_have_recording(void);
+
+/** @} */ /* end group _mali_osk_profiling */
+
+#else /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+/* Dummy add_event, for when profiling is disabled. */
+
+#define _mali_osk_profiling_add_event(event_id, data0, data1, data2, data3, data4)
+
+#endif /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+#endif /* __MALI_OSK_PROFILING_H__ */
+
+
diff --git a/utgard/r7p0/common/mali_osk_types.h b/utgard/r7p0/common/mali_osk_types.h
new file mode 100644
index 0000000..ae0c6f8
--- /dev/null
+++ b/utgard/r7p0/common/mali_osk_types.h
@@ -0,0 +1,471 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_types.h
+ * Defines types of the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_TYPES_H__
+#define __MALI_OSK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_osk_miscellaneous OSK Miscellaneous functions, constants and types
+ * @{ */
+
+/* Define integer types used by OSK. Note: these currently clash with Linux so we only define them if not defined already */
+#ifndef __KERNEL__
+typedef unsigned char      u8;
+typedef signed char        s8;
+typedef unsigned short     u16;
+typedef signed short       s16;
+typedef unsigned int       u32;
+typedef signed int         s32;
+typedef unsigned long long u64;
+#define BITS_PER_LONG (sizeof(long)*8)
+#else
+/* Ensure Linux types u32, etc. are defined */
+#include <linux/types.h>
+#endif
+
+/** @brief Mali Boolean type which uses MALI_TRUE and MALI_FALSE
+  */
+typedef unsigned long mali_bool;
+
+#ifndef MALI_TRUE
+#define MALI_TRUE ((mali_bool)1)
+#endif
+
+#ifndef MALI_FALSE
+#define MALI_FALSE ((mali_bool)0)
+#endif
+
+#define MALI_HW_CORE_NO_COUNTER     ((u32)-1)
+
+
+#define MALI_S32_MAX 0x7fffffff
+
+/**
+ * @brief OSK Error codes
+ *
+ * Each OS may use its own set of error codes, and may require that the
+ * User/Kernel interface take certain error code. This means that the common
+ * error codes need to be sufficiently rich to pass the correct error code
+ * thorugh from the OSK to U/K layer, across all OSs.
+ *
+ * The result is that some error codes will appear redundant on some OSs.
+ * Under all OSs, the OSK layer must translate native OS error codes to
+ * _mali_osk_errcode_t codes. Similarly, the U/K layer must translate from
+ * _mali_osk_errcode_t codes to native OS error codes.
+ */
+typedef enum {
+	_MALI_OSK_ERR_OK = 0, /**< Success. */
+	_MALI_OSK_ERR_FAULT = -1, /**< General non-success */
+	_MALI_OSK_ERR_INVALID_FUNC = -2, /**< Invalid function requested through User/Kernel interface (e.g. bad IOCTL number) */
+	_MALI_OSK_ERR_INVALID_ARGS = -3, /**< Invalid arguments passed through User/Kernel interface */
+	_MALI_OSK_ERR_NOMEM = -4, /**< Insufficient memory */
+	_MALI_OSK_ERR_TIMEOUT = -5, /**< Timeout occurred */
+	_MALI_OSK_ERR_RESTARTSYSCALL = -6, /**< Special: On certain OSs, must report when an interruptable mutex is interrupted. Ignore otherwise. */
+	_MALI_OSK_ERR_ITEM_NOT_FOUND = -7, /**< Table Lookup failed */
+	_MALI_OSK_ERR_BUSY = -8, /**< Device/operation is busy. Try again later */
+	_MALI_OSK_ERR_UNSUPPORTED = -9, /**< Optional part of the interface used, and is unsupported */
+} _mali_osk_errcode_t;
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wq OSK work queues
+ * @{ */
+
+/** @brief Private type for work objects */
+typedef struct _mali_osk_wq_work_s _mali_osk_wq_work_t;
+typedef struct _mali_osk_wq_delayed_work_s _mali_osk_wq_delayed_work_t;
+
+/** @brief Work queue handler function
+ *
+ * This function type is called when the work is scheduled by the work queue,
+ * e.g. as an IRQ bottom-half handler.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for more information on the
+ * work-queue and work handlers.
+ *
+ * @param arg resource-specific data
+ */
+typedef void (*_mali_osk_wq_work_handler_t)(void *arg);
+
+/* @} */ /* end group _mali_osk_wq */
+
+/** @defgroup _mali_osk_irq OSK IRQ handling
+ * @{ */
+
+/** @brief Private type for IRQ handling objects */
+typedef struct _mali_osk_irq_t_struct _mali_osk_irq_t;
+
+/** @brief Optional function to trigger an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data */
+typedef void (*_mali_osk_irq_trigger_t)(void *arg);
+
+/** @brief Optional function to acknowledge an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was successful, or a suitable _mali_osk_errcode_t on failure. */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_ack_t)(void *arg);
+
+/** @brief IRQ 'upper-half' handler callback.
+ *
+ * This function is implemented by the common layer to do the initial handling of a
+ * resource's IRQ. This maps on to the concept of an ISR that does the minimum
+ * work necessary before handing off to an IST.
+ *
+ * The communication of the resource-specific data from the ISR to the IST is
+ * handled by the OSK implementation.
+ *
+ * On most systems, the IRQ upper-half handler executes in IRQ context.
+ * Therefore, the system may have restrictions about what can be done in this
+ * context
+ *
+ * If an IRQ upper-half handler requires more work to be done than can be
+ * acheived in an IRQ context, then it may defer the work with
+ * _mali_osk_wq_schedule_work(). Refer to \ref _mali_osk_wq_create_work() for
+ * more information.
+ *
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was correctly handled, or a suitable
+ * _mali_osk_errcode_t otherwise.
+ */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_uhandler_t)(void *arg);
+
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @defgroup _mali_osk_atomic OSK Atomic counters
+ * @{ */
+
+/** @brief Public type of atomic counters
+ *
+ * This is public for allocation on stack. On systems that support it, this is just a single 32-bit value.
+ * On others, it could be encapsulating an object stored elsewhere.
+ *
+ * Regardless of implementation, the \ref _mali_osk_atomic functions \b must be used
+ * for all accesses to the variable's value, even if atomicity is not required.
+ * Do not access u.val or u.obj directly.
+ */
+typedef struct {
+	union {
+		u32 val;
+		void *obj;
+	} u;
+} _mali_osk_atomic_t;
+/** @} */ /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+
+/** @brief OSK Mutual Exclusion Lock ordered list
+ *
+ * This lists the various types of locks in the system and is used to check
+ * that locks are taken in the correct order.
+ *
+ * - Holding more than one lock of the same order at the same time is not
+ *   allowed.
+ * - Taking a lock of a lower order than the highest-order lock currently held
+ *   is not allowed.
+ *
+ */
+typedef enum {
+	/*  ||    Locks    ||  */
+	/*  ||   must be   ||  */
+	/* _||_  taken in _||_ */
+	/* \  /    this   \  / */
+	/*  \/    order!   \/  */
+
+	_MALI_OSK_LOCK_ORDER_FIRST = 0,
+
+	_MALI_OSK_LOCK_ORDER_SESSIONS,
+	_MALI_OSK_LOCK_ORDER_MEM_SESSION,
+	_MALI_OSK_LOCK_ORDER_MEM_INFO,
+	_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE,
+	_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP,
+	_MALI_OSK_LOCK_ORDER_PM_EXECUTION,
+	_MALI_OSK_LOCK_ORDER_EXECUTOR,
+	_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED,
+	_MALI_OSK_LOCK_ORDER_PROFILING,
+	_MALI_OSK_LOCK_ORDER_L2,
+	_MALI_OSK_LOCK_ORDER_L2_COMMAND,
+	_MALI_OSK_LOCK_ORDER_UTILIZATION,
+	_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS,
+	_MALI_OSK_LOCK_ORDER_PM_STATE,
+
+	_MALI_OSK_LOCK_ORDER_LAST,
+} _mali_osk_lock_order_t;
+
+
+/** @brief OSK Mutual Exclusion Lock flags type
+ *
+ * - Any lock can use the order parameter.
+ */
+typedef enum {
+	_MALI_OSK_LOCKFLAG_UNORDERED        = 0x1, /**< Indicate that the order of this lock should not be checked */
+	_MALI_OSK_LOCKFLAG_ORDERED          = 0x2,
+	/** @enum _mali_osk_lock_flags_t
+	 *
+	 * Flags from 0x10000--0x80000000 are RESERVED for User-mode */
+
+} _mali_osk_lock_flags_t;
+
+/** @brief Mutual Exclusion Lock Mode Optimization hint
+ *
+ * The lock mode is used to implement the read/write locking of locks when we call
+ * functions _mali_osk_mutex_rw_init/wait/signal/term/. In this case, the RO mode can
+ * be used to allow multiple concurrent readers, but no writers. The RW mode is used for
+ * writers, and so will wait for all readers to release the lock (if any present).
+ * Further readers and writers will wait until the writer releases the lock.
+ *
+ * The mode is purely an optimization hint: for example, it is permissible for
+ * all locks to behave in RW mode, regardless of that supplied.
+ *
+ * It is an error to attempt to use locks in anything other that RW mode when
+ * call functions _mali_osk_mutex_rw_wait/signal().
+ *
+ */
+typedef enum {
+	_MALI_OSK_LOCKMODE_UNDEF = -1,  /**< Undefined lock mode. For internal use only */
+	_MALI_OSK_LOCKMODE_RW    = 0x0, /**< Read-write mode, default. All readers and writers are mutually-exclusive */
+	_MALI_OSK_LOCKMODE_RO,          /**< Read-only mode, to support multiple concurrent readers, but mutual exclusion in the presence of writers. */
+	/** @enum _mali_osk_lock_mode_t
+	 *
+	 * Lock modes 0x40--0x7F are RESERVED for User-mode */
+} _mali_osk_lock_mode_t;
+
+/** @brief Private types for Mutual Exclusion lock objects */
+typedef struct _mali_osk_lock_debug_s _mali_osk_lock_debug_t;
+typedef struct _mali_osk_spinlock_s _mali_osk_spinlock_t;
+typedef struct _mali_osk_spinlock_irq_s _mali_osk_spinlock_irq_t;
+typedef struct _mali_osk_mutex_s _mali_osk_mutex_t;
+typedef struct _mali_osk_mutex_rw_s _mali_osk_mutex_rw_t;
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @defgroup _mali_osk_low_level_memory OSK Low-level Memory Operations
+ * @{ */
+
+/**
+ * @brief Private data type for use in IO accesses to/from devices.
+ *
+ * This represents some range that is accessible from the device. Examples
+ * include:
+ * - Device Registers, which could be readable and/or writeable.
+ * - Memory that the device has access to, for storing configuration structures.
+ *
+ * Access to this range must be made through the _mali_osk_mem_ioread32() and
+ * _mali_osk_mem_iowrite32() functions.
+ */
+typedef struct _mali_io_address *mali_io_address;
+
+/** @defgroup _MALI_OSK_CPU_PAGE CPU Physical page size macros.
+ *
+ * The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The CPU Physical Page Size has been assumed to be the same as the Mali
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** CPU Page Order, as log to base 2 of the Page size. @see _MALI_OSK_CPU_PAGE_SIZE */
+#define _MALI_OSK_CPU_PAGE_ORDER ((u32)12)
+/** CPU Page Size, in bytes.               */
+#define _MALI_OSK_CPU_PAGE_SIZE (((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER))
+/** CPU Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_CPU_PAGE_MASK (~((((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER)) - ((u32)1)))
+/** @} */ /* end of group _MALI_OSK_CPU_PAGE */
+
+/** @defgroup _MALI_OSK_MALI_PAGE Mali Physical Page size macros
+ *
+ * Mali Physical page size macros. The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The Mali Physical Page Size has been assumed to be the same as the CPU
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** Mali Page Order, as log to base 2 of the Page size. @see _MALI_OSK_MALI_PAGE_SIZE */
+#define _MALI_OSK_MALI_PAGE_ORDER PAGE_SHIFT
+/** Mali Page Size, in bytes.               */
+#define _MALI_OSK_MALI_PAGE_SIZE PAGE_SIZE
+/** Mali Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_MALI_PAGE_MASK PAGE_MASK
+/** @} */ /* end of group _MALI_OSK_MALI_PAGE*/
+
+/** @brief flags for mapping a user-accessible memory range
+ *
+ * Where a function with prefix '_mali_osk_mem_mapregion' accepts flags as one
+ * of the function parameters, it will use one of these. These allow per-page
+ * control over mappings. Compare with the mali_memory_allocation_flag type,
+ * which acts over an entire range
+ *
+ * These may be OR'd together with bitwise OR (|), but must be cast back into
+ * the type after OR'ing.
+ */
+typedef enum {
+	_MALI_OSK_MEM_MAPREGION_FLAG_OS_ALLOCATED_PHYSADDR = 0x1, /**< Physical address is OS Allocated */
+} _mali_osk_mem_mapregion_flags_t;
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+/** @defgroup _mali_osk_notification OSK Notification Queues
+ * @{ */
+
+/** @brief Private type for notification queue objects */
+typedef struct _mali_osk_notification_queue_t_struct _mali_osk_notification_queue_t;
+
+/** @brief Public notification data object type */
+typedef struct _mali_osk_notification_t_struct {
+	u32 notification_type;   /**< The notification type */
+	u32 result_buffer_size; /**< Size of the result buffer to copy to user space */
+	void *result_buffer;    /**< Buffer containing any type specific data */
+} _mali_osk_notification_t;
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @defgroup _mali_osk_timer OSK Timer Callbacks
+ * @{ */
+
+/** @brief Function to call when a timer expires
+ *
+ * When a timer expires, this function is called. Note that on many systems,
+ * a timer callback will be executed in IRQ context. Therefore, restrictions
+ * may apply on what can be done inside the timer callback.
+ *
+ * If a timer requires more work to be done than can be acheived in an IRQ
+ * context, then it may defer the work with a work-queue. For example, it may
+ * use \ref _mali_osk_wq_schedule_work() to make use of a bottom-half handler
+ * to carry out the remaining work.
+ *
+ * Stopping the timer with \ref _mali_osk_timer_del() blocks on compeletion of
+ * the callback. Therefore, the callback may not obtain any mutexes also held
+ * by any callers of _mali_osk_timer_del(). Otherwise, a deadlock may occur.
+ *
+ * @param arg Function-specific data */
+typedef void (*_mali_osk_timer_callback_t)(void *arg);
+
+/** @brief Private type for Timer Callback Objects */
+typedef struct _mali_osk_timer_t_struct _mali_osk_timer_t;
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** @brief Public List objects.
+ *
+ * To use, add a _mali_osk_list_t member to the structure that may become part
+ * of a list. When traversing the _mali_osk_list_t objects, use the
+ * _MALI_OSK_CONTAINER_OF() macro to recover the structure from its
+ *_mali_osk_list_t member
+ *
+ * Each structure may have multiple _mali_osk_list_t members, so that the
+ * structure is part of multiple lists. When traversing lists, ensure that the
+ * correct _mali_osk_list_t member is used, because type-checking will be
+ * lost by the compiler.
+ */
+typedef struct _mali_osk_list_s {
+	struct _mali_osk_list_s *next;
+	struct _mali_osk_list_s *prev;
+} _mali_osk_list_t;
+/** @} */ /* end group _mali_osk_list */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief resource description struct
+ *
+ * Platform independent representation of a Mali HW resource
+ */
+typedef struct _mali_osk_resource {
+	const char *description;        /**< short description of the resource */
+	uintptr_t base;                 /**< Physical base address of the resource, as seen by Mali resources. */
+	const char *irq_name;           /**< Name of irq belong to this resource */
+	u32 irq;                        /**< IRQ number delivered to the CPU, or -1 to tell the driver to probe for it (if possible) */
+} _mali_osk_resource_t;
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+/** @brief Private type for wait queue objects */
+typedef struct _mali_osk_wait_queue_t_struct _mali_osk_wait_queue_t;
+/** @} */ /* end group _mali_osk_wait_queue */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+/** @brief Mali print ctx type which uses seq_file
+  */
+typedef struct seq_file _mali_osk_print_ctx;
+
+#define _MALI_OSK_BITMAP_INVALIDATE_INDEX -1
+
+typedef struct _mali_osk_bitmap {
+	u32         reserve;
+	u32         last;
+	u32         max;
+	u32         avail;
+	_mali_osk_spinlock_t   *lock;
+	unsigned long          *table;
+} _mali_osk_bitmap_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_TYPES_H__ */
diff --git a/utgard/r7p0/common/mali_pm.c b/utgard/r7p0/common/mali_pm.c
new file mode 100644
index 0000000..501030f
--- /dev/null
+++ b/utgard/r7p0/common/mali_pm.c
@@ -0,0 +1,1362 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pm.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_scheduler.h"
+#include "mali_group.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+
+#include "mali_executor.h"
+#include "mali_control_timer.h"
+
+#if defined(DEBUG)
+u32 num_pm_runtime_resume;
+u32 num_pm_updates;
+u32 num_pm_updates_up;
+u32 num_pm_updates_down;
+#endif
+
+#define MALI_PM_DOMAIN_DUMMY_MASK (1 << MALI_DOMAIN_INDEX_DUMMY)
+
+/* lock protecting power state (including pm_domains) */
+static _mali_osk_spinlock_irq_t *pm_lock_state;
+
+/* the wanted domain mask (protected by pm_lock_state) */
+static u32 pd_mask_wanted;
+
+/* used to deferring the actual power changes */
+static _mali_osk_wq_work_t *pm_work;
+
+/* lock protecting power change execution */
+static _mali_osk_mutex_t *pm_lock_exec;
+
+/* PMU domains which are actually powered on (protected by pm_lock_exec) */
+static u32 pmu_mask_current;
+
+/*
+ * domains which marked as powered on (protected by pm_lock_exec)
+ * This can be different from pmu_mask_current right after GPU power on
+ * if the PMU domains default to powered up.
+ */
+static u32 pd_mask_current;
+
+static u16 domain_config[MALI_MAX_NUMBER_OF_DOMAINS] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	1 << MALI_DOMAIN_INDEX_DUMMY
+};
+
+/* The relative core power cost */
+#define MALI_GP_COST 3
+#define MALI_PP_COST 6
+#define MALI_L2_COST 1
+
+/*
+ *We have MALI_MAX_NUMBER_OF_PP_PHYSICAL_CORES + 1 rows in this matrix
+ *because we mush store the mask of different pp cores: 0, 1, 2, 3, 4, 5, 6, 7, 8.
+ */
+static int mali_pm_domain_power_cost_result[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1][MALI_MAX_NUMBER_OF_DOMAINS];
+/*
+ * Keep track of runtime PM state, so that we know
+ * how to resume during OS resume.
+ */
+#ifdef CONFIG_PM_RUNTIME
+static mali_bool mali_pm_runtime_active = MALI_FALSE;
+#else
+/* when kernel don't enable PM_RUNTIME, set the flag always true,
+ * for GPU will not power off by runtime */
+static mali_bool mali_pm_runtime_active = MALI_TRUE;
+#endif
+
+static void mali_pm_state_lock(void);
+static void mali_pm_state_unlock(void);
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void);
+static void mali_pm_set_pmu_domain_config(void);
+static u32 mali_pm_get_registered_cores_mask(void);
+static void mali_pm_update_sync_internal(void);
+static mali_bool mali_pm_common_suspend(void);
+static void mali_pm_update_work(void *data);
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+const char *mali_pm_group_stats_to_string(void);
+#endif
+
+_mali_osk_errcode_t mali_pm_initialize(void)
+{
+	_mali_osk_errcode_t err;
+	struct mali_pmu_core *pmu;
+
+	pm_lock_state = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (pm_lock_state == NULL) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_lock_exec = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+					    _MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (pm_lock_exec == NULL) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_work = _mali_osk_wq_create_work(mali_pm_update_work, NULL);
+	if (pm_work == NULL) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pmu = mali_pmu_get_global_pmu_core();
+	if (pmu != NULL) {
+		/*
+		 * We have a Mali PMU, set the correct domain
+		 * configuration (default or custom)
+		 */
+
+		u32 registered_cores_mask;
+
+		mali_pm_set_pmu_domain_config();
+
+		registered_cores_mask = mali_pm_get_registered_cores_mask();
+		mali_pmu_set_registered_cores_mask(pmu, registered_cores_mask);
+
+		MALI_DEBUG_ASSERT(pd_mask_wanted == 0);
+	}
+
+	/* Create all power domains needed (at least one dummy domain) */
+	err = mali_pm_create_pm_domains();
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_pm_terminate();
+		return err;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pm_terminate(void)
+{
+	if (pm_work != NULL) {
+		_mali_osk_wq_delete_work(pm_work);
+		pm_work = NULL;
+	}
+
+	mali_pm_domain_terminate();
+
+	if (pm_lock_exec != NULL) {
+		_mali_osk_mutex_term(pm_lock_exec);
+		pm_lock_exec = NULL;
+	}
+
+	if (pm_lock_state != NULL) {
+		_mali_osk_spinlock_irq_term(pm_lock_state);
+		pm_lock_state = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (domain == NULL) {
+		MALI_DEBUG_ASSERT(domain_config[domain_index] == 0);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(domain_config[domain_index] != 0);
+	}
+
+	MALI_DEBUG_ASSERT(domain != NULL);
+
+	mali_pm_domain_add_l2_cache(domain, l2_cache);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (domain == NULL) {
+		MALI_DEBUG_ASSERT(domain_config[domain_index] == 0);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(domain_config[domain_index] != 0);
+	}
+
+	MALI_DEBUG_ASSERT(domain != NULL);
+
+	mali_pm_domain_add_group(domain, group);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains)
+{
+	mali_bool ret = MALI_TRUE; /* Assume all is powered on instantly */
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		pd_mask_wanted |= mali_pm_domain_ref_get(domains[i]);
+		if (mali_pm_domain_power_is_on(domains[i]) == MALI_FALSE) {
+			/*
+			 * Tell caller that the corresponding group
+			 * was not already powered on.
+			 */
+			ret = MALI_FALSE;
+		} else {
+			/*
+			 * There is a time gap between we power on the domain and
+			 * set the power state of the corresponding groups to be on.
+			 */
+			if (NULL != groups[i] &&
+			    MALI_FALSE == mali_group_power_is_on(groups[i])) {
+				ret = MALI_FALSE;
+			}
+		}
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (get refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains)
+{
+	u32 mask = 0;
+	mali_bool ret;
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		mask |= mali_pm_domain_ref_put(domains[i]);
+	}
+
+	if (mask == 0) {
+		/* return false, all domains should still stay on */
+		ret = MALI_FALSE;
+	} else {
+		/* Assert that we are dealing with a change */
+		MALI_DEBUG_ASSERT((pd_mask_wanted & mask) == mask);
+
+		/* Update our desired domain mask */
+		pd_mask_wanted &= ~mask;
+
+		/* return true; one or more domains can now be powered down */
+		ret = MALI_TRUE;
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (put refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+void mali_pm_init_begin(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	_mali_osk_pm_dev_ref_get_sync();
+
+	/* Ensure all PMU domains are on */
+	if (pmu != NULL) {
+		mali_pmu_power_up_all(pmu);
+	}
+}
+
+void mali_pm_init_end(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	/* Ensure all PMU domains are off */
+	if (pmu != NULL) {
+		mali_pmu_power_down_all(pmu);
+	}
+
+	_mali_osk_pm_dev_ref_put();
+}
+
+void mali_pm_update_sync(void)
+{
+	mali_pm_exec_lock();
+
+	if (mali_pm_runtime_active == MALI_TRUE) {
+		/*
+		 * Only update if GPU is powered on.
+		 * Deactivation of the last group will result in both a
+		 * deferred runtime PM suspend operation and
+		 * deferred execution of this function.
+		 * mali_pm_runtime_active will be false if runtime PM
+		 * executed first and thus the GPU is now fully powered off.
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_update_async(void)
+{
+	_mali_osk_wq_schedule_work(pm_work);
+}
+
+void mali_pm_os_suspend(mali_bool os_suspend)
+{
+	int ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS suspend\n"));
+
+	/* Suspend execution of all jobs, and go to inactive state */
+	mali_executor_suspend();
+
+	if (os_suspend) {
+		mali_control_timer_suspend(MALI_TRUE);
+	}
+
+	mali_pm_exec_lock();
+
+	ret = mali_pm_common_suspend();
+
+	MALI_DEBUG_ASSERT(ret == MALI_TRUE);
+	MALI_IGNORE(ret);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_os_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS resume\n"));
+
+	mali_pm_exec_lock();
+
+#if defined(DEBUG)
+	mali_pm_state_lock();
+
+	/* Assert that things are as we left them in os_suspend(). */
+	MALI_DEBUG_ASSERT(pd_mask_wanted == 0);
+	MALI_DEBUG_ASSERT(pd_mask_current == 0);
+	MALI_DEBUG_ASSERT(pmu_mask_current == 0);
+
+	MALI_DEBUG_ASSERT(mali_pm_domain_all_unused() == MALI_TRUE);
+
+	mali_pm_state_unlock();
+#endif
+
+	if (mali_pm_runtime_active == MALI_TRUE) {
+		/* Runtime PM was active, so reset PMU */
+		if (pmu != NULL) {
+			mali_pmu_reset(pmu);
+			pmu_mask_current = mali_pmu_get_mask(pmu);
+
+			MALI_DEBUG_PRINT(3, ("Mali PM: OS resume 0x%x\n", pmu_mask_current));
+		}
+
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	/* Start executing jobs again */
+	mali_executor_resume();
+}
+
+mali_bool mali_pm_runtime_suspend(void)
+{
+	mali_bool ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: Runtime suspend\n"));
+
+	mali_pm_exec_lock();
+
+	/*
+	 * Put SW state directly into "off" state, and do not bother to power
+	 * down each power domain, because entire GPU will be powered off
+	 * when we return.
+	 * For runtime PM suspend, in contrast to OS suspend, there is a race
+	 * between this function and the mali_pm_update_sync_internal(), which
+	 * is fine...
+	 */
+	ret = mali_pm_common_suspend();
+	if (ret == MALI_TRUE) {
+		mali_pm_runtime_active = MALI_FALSE;
+	} else {
+		/*
+		 * Process the "power up" instead,
+		 * which could have been "lost"
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	return ret;
+}
+
+void mali_pm_runtime_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	mali_pm_exec_lock();
+
+	mali_pm_runtime_active = MALI_TRUE;
+
+#if defined(DEBUG)
+	++num_pm_runtime_resume;
+
+	mali_pm_state_lock();
+
+	/*
+	 * Assert that things are as we left them in runtime_suspend(),
+	 * except for pd_mask_wanted which normally will be the reason we
+	 * got here (job queued => domains wanted)
+	 */
+	MALI_DEBUG_ASSERT(pd_mask_current == 0);
+	MALI_DEBUG_ASSERT(pmu_mask_current == 0);
+
+	mali_pm_state_unlock();
+#endif
+
+	if (pmu != NULL) {
+		mali_pmu_reset(pmu);
+		pmu_mask_current = mali_pmu_get_mask(pmu);
+		MALI_DEBUG_PRINT(3, ("Mali PM: Runtime resume 0x%x\n", pmu_mask_current));
+	}
+
+	/*
+	 * Normally we are resumed because a job has just been queued.
+	 * pd_mask_wanted should thus be != 0.
+	 * It is however possible for others to take a Mali Runtime PM ref
+	 * without having a job queued.
+	 * We should however always call mali_pm_update_sync_internal(),
+	 * because this will take care of any potential mismatch between
+	 * pmu_mask_current and pd_mask_current.
+	 */
+	mali_pm_update_sync_internal();
+
+	mali_pm_exec_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tPower domain: id %u\n",
+				mali_pm_domain_get_id(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tMask: 0x%04x\n",
+				mali_pm_domain_get_mask(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tUse count: %u\n",
+				mali_pm_domain_get_use_count(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tCurrent power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_current) ?
+				"On" : "Off");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tWanted power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_wanted) ?
+				"On" : "Off");
+
+	return n;
+}
+#endif
+
+static void mali_pm_state_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(pm_lock_state);
+}
+
+static void mali_pm_state_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(pm_lock_state);
+}
+
+void mali_pm_exec_lock(void)
+{
+	_mali_osk_mutex_wait(pm_lock_exec);
+}
+
+void mali_pm_exec_unlock(void)
+{
+	_mali_osk_mutex_signal(pm_lock_exec);
+}
+
+static void mali_pm_domain_power_up(u32 power_up_mask,
+				    struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS],
+				    u32 *num_groups_up,
+				    struct mali_l2_cache_core *l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				    u32 *num_l2_up)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_up_mask;
+
+	MALI_DEBUG_ASSERT(power_up_mask != 0);
+	MALI_DEBUG_ASSERT_POINTER(groups_up);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_up);
+	MALI_DEBUG_ASSERT(*num_groups_up == 0);
+	MALI_DEBUG_ASSERT_POINTER(l2_up);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_up);
+	MALI_DEBUG_ASSERT(*num_l2_up == 0);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering up domains: . [%s]\n",
+			  mali_pm_mask_to_string(power_up_mask)));
+
+	pd_mask_current |= power_up_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (domain_bit != 0) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(
+				domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered up */
+		mali_pm_domain_set_power_on(domain, MALI_TRUE);
+
+		/*
+		 * Make a note of the L2 and/or group(s) to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(
+						    domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_up <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_up[*num_l2_up] = l2_cache;
+			(*num_l2_up)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_up <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_up[*num_groups_up] = group;
+
+			(*num_groups_up)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+static void mali_pm_domain_power_down(u32 power_down_mask,
+				      struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS],
+				      u32 *num_groups_down,
+				      struct mali_l2_cache_core *l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				      u32 *num_l2_down)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_down_mask;
+
+	MALI_DEBUG_ASSERT(power_down_mask != 0);
+	MALI_DEBUG_ASSERT_POINTER(groups_down);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_down);
+	MALI_DEBUG_ASSERT(*num_groups_down == 0);
+	MALI_DEBUG_ASSERT_POINTER(l2_down);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_down);
+	MALI_DEBUG_ASSERT(*num_l2_down == 0);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering down domains: [%s]\n",
+			  mali_pm_mask_to_string(power_down_mask)));
+
+	pd_mask_current &= ~power_down_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (domain_bit != 0) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered down */
+		mali_pm_domain_set_power_on(domain, MALI_FALSE);
+
+		/*
+		 * Make a note of the L2s and/or groups to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_down <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_down[*num_l2_down] = l2_cache;
+			(*num_l2_down)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_down <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_down[*num_groups_down] = group;
+			(*num_groups_down)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+
+/*
+ * Execute pending power domain changes
+ * pm_lock_exec lock must be taken by caller.
+ */
+static void mali_pm_update_sync_internal(void)
+{
+	/*
+	 * This should only be called in non-atomic context
+	 * (normally as deferred work)
+	 *
+	 * Look at the pending power domain changes, and execute these.
+	 * Make sure group and schedulers are notified about changes.
+	 */
+
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	u32 power_down_mask;
+	u32 power_up_mask;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+
+#if defined(DEBUG)
+	++num_pm_updates;
+#endif
+
+	/* Hold PM state lock while we look at (and obey) the wanted state */
+	mali_pm_state_lock();
+
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	/* Figure out which cores we need to power on */
+	power_up_mask = pd_mask_wanted &
+			(pd_mask_wanted ^ pd_mask_current);
+
+	if (power_up_mask != 0) {
+		u32 power_up_mask_pmu;
+		struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_up = 0;
+		struct mali_l2_cache_core *
+			l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_up = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_up;
+#endif
+
+		/*
+		 * Make sure dummy/global domain is always included when
+		 * powering up, since this is controlled by runtime PM,
+		 * and device power is on at this stage.
+		 */
+		power_up_mask |= MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* Power up only real PMU domains */
+		power_up_mask_pmu = power_up_mask & ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* But not those that happen to be powered on already */
+		power_up_mask_pmu &= (power_up_mask ^ pmu_mask_current) &
+				     power_up_mask;
+
+		if (power_up_mask_pmu != 0) {
+			MALI_DEBUG_ASSERT(pmu != NULL);
+			pmu_mask_current |= power_up_mask_pmu;
+			mali_pmu_power_up(pmu, power_up_mask_pmu);
+		}
+
+		/*
+		 * Put the domains themselves in power up state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_up(power_up_mask,
+					groups_up, &num_groups_up,
+					l2_up, &num_l2_up);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/* Notify each L2 cache that we have be powered up */
+		for (i = 0; i < num_l2_up; i++) {
+			mali_l2_cache_power_up(l2_up[i]);
+		}
+
+		/*
+		 * Tell execution module about all the groups we have
+		 * powered up. Groups will be notified as a result of this.
+		 */
+		mali_executor_group_power_up(groups_up, num_groups_up);
+
+		/* Lock state again before checking for power down */
+		mali_pm_state_lock();
+	}
+
+	/* Figure out which cores we need to power off */
+	power_down_mask = pd_mask_current &
+			  (pd_mask_wanted ^ pd_mask_current);
+
+	/*
+	 * Never power down the dummy/global domain here. This is to be done
+	 * from a suspend request (since this domain is only physicall powered
+	 * down at that point)
+	 */
+	power_down_mask &= ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+	if (power_down_mask != 0) {
+		u32 power_down_mask_pmu;
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_down;
+#endif
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(power_down_mask,
+					  groups_down, &num_groups_down,
+					  l2_down, &num_l2_down);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (num_groups_down > 0) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (power_down_mask_pmu != 0) {
+			MALI_DEBUG_ASSERT(pmu != NULL);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+
+		}
+	} else {
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		u32 power_down_mask_pmu;
+
+		/* No need for state lock since we'll only update PMU */
+		mali_pm_state_unlock();
+
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (power_down_mask_pmu != 0) {
+			MALI_DEBUG_ASSERT(pmu != NULL);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+		}
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM update post: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+}
+
+static mali_bool mali_pm_common_suspend(void)
+{
+	mali_pm_state_lock();
+
+	if (pd_mask_wanted != 0) {
+		MALI_DEBUG_PRINT(5, ("PM: Aborting suspend operation\n\n\n"));
+		mali_pm_state_unlock();
+		return MALI_FALSE;
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	if (pd_mask_current != 0) {
+		/*
+		 * We have still some domains powered on.
+		 * It is for instance very normal that at least the
+		 * dummy/global domain is marked as powered on at this point.
+		 * (because it is physically powered on until this function
+		 * returns)
+		 */
+
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(pd_mask_current,
+					  groups_down,
+					  &num_groups_down,
+					  l2_down,
+					  &num_l2_down);
+
+		MALI_DEBUG_ASSERT(pd_mask_current == 0);
+		MALI_DEBUG_ASSERT(mali_pm_domain_all_unused() == MALI_TRUE);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (num_groups_down > 0) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		pmu_mask_current = 0;
+	} else {
+		MALI_DEBUG_ASSERT(pmu_mask_current == 0);
+
+		MALI_DEBUG_ASSERT(mali_pm_domain_all_unused() == MALI_TRUE);
+
+		mali_pm_state_unlock();
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current domain mask:  [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current PMU mask: ... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Group power stats: .. <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	return MALI_TRUE;
+}
+
+static void mali_pm_update_work(void *data)
+{
+	MALI_IGNORE(data);
+	mali_pm_update_sync();
+}
+
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void)
+{
+	int i;
+
+	/* Create all domains (including dummy domain) */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (domain_config[i] == 0x0) continue;
+
+		if (mali_pm_domain_create(domain_config[i]) == NULL) {
+			return _MALI_OSK_ERR_NOMEM;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_pm_set_default_pm_domain_config(void)
+{
+	MALI_DEBUG_ASSERT(_mali_osk_resource_base_address() != 0);
+
+	/* GP core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_GP, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_GP] = 0x01;
+	}
+
+	/* PP0 - PP3 core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP0, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 2;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 1;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 0;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP1, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 3;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP2, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 4;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP3, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 5;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 1;
+		}
+	}
+
+	/* PP4 - PP7 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP4, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP4] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP5, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP5] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP6, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP6] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP7, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP7] = 0x01 << 3;
+	}
+
+	/* L2gp/L2PP0/L2PP4 */
+	if (mali_is_mali400()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI400_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 1;
+		}
+	} else if (mali_is_mali450()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 0;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 1;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE2, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L22] = 0x01 << 3;
+		}
+	} else if (mali_is_mali470()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI470_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 0;
+		}
+	}
+}
+
+static u32 mali_pm_get_registered_cores_mask(void)
+{
+	int i = 0;
+	u32 mask = 0;
+
+	for (i = 0; i < MALI_DOMAIN_INDEX_DUMMY; i++) {
+		mask |= domain_config[i];
+	}
+
+	return mask;
+}
+
+static void mali_pm_set_pmu_domain_config(void)
+{
+	int i = 0;
+
+	_mali_osk_device_data_pmu_config_get(domain_config, MALI_MAX_NUMBER_OF_DOMAINS - 1);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (domain_config[i] != 0) {
+			MALI_DEBUG_PRINT(2, ("Using customer pmu config:\n"));
+			break;
+		}
+	}
+
+	if (MALI_MAX_NUMBER_OF_DOMAINS - 1 == i) {
+		MALI_DEBUG_PRINT(2, ("Using hw detect pmu config:\n"));
+		mali_pm_set_default_pm_domain_config();
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("domain_config[%d] = 0x%x\n", i, domain_config[i]));
+		}
+	}
+	/* Can't override dummy domain mask */
+	domain_config[MALI_DOMAIN_INDEX_DUMMY] =
+		1 << MALI_DOMAIN_INDEX_DUMMY;
+}
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_DOMAINS + 1];
+	int bit;
+	int str_pos = 0;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (pm_lock_exec != NULL) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (bit = MALI_MAX_NUMBER_OF_DOMAINS - 1; bit >= 0; bit--) {
+		if (mask & (1 << bit)) {
+			bit_str[str_pos] = 'X';
+		} else {
+			bit_str[str_pos] = '-';
+		}
+		str_pos++;
+	}
+
+	bit_str[MALI_MAX_NUMBER_OF_DOMAINS] = '\0';
+
+	return bit_str;
+}
+
+const char *mali_pm_group_stats_to_string(void)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_GROUPS + 1];
+	u32 num_groups = mali_group_get_glob_num_groups();
+	u32 i;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (pm_lock_exec != NULL) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (i = 0; i < num_groups && i < MALI_MAX_NUMBER_OF_GROUPS; i++) {
+		struct mali_group *group;
+
+		group = mali_group_get_glob_group(i);
+
+		if (mali_group_power_is_on(group) == MALI_TRUE) {
+			bit_str[i] = 'X';
+		} else {
+			bit_str[i] = '-';
+		}
+	}
+
+	bit_str[i] = '\0';
+
+	return bit_str;
+}
+#endif
+
+/*
+ * num_pp is the number of PP cores which will be powered on given this mask
+ * cost is the total power cost of cores which will be powered on given this mask
+ */
+static void mali_pm_stat_from_mask(u32 mask, u32 *num_pp, u32 *cost)
+{
+	u32 i;
+
+	/* loop through all cores */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (!(domain_config[i] & mask)) {
+			continue;
+		}
+
+		switch (i) {
+		case MALI_DOMAIN_INDEX_GP:
+			*cost += MALI_GP_COST;
+
+			break;
+		case MALI_DOMAIN_INDEX_PP0: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP1: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP2: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP3:
+			if (mali_is_mali400()) {
+				if ((domain_config[MALI_DOMAIN_INDEX_L20] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L20])) {
+					*num_pp += 1;
+				}
+			} else {
+				if ((domain_config[MALI_DOMAIN_INDEX_L21] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L21])) {
+					*num_pp += 1;
+				}
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_PP4: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP5: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP6: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP7:
+			MALI_DEBUG_ASSERT(mali_is_mali450());
+
+			if ((domain_config[MALI_DOMAIN_INDEX_L22] & mask)
+			    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+				== domain_config[MALI_DOMAIN_INDEX_L22])) {
+				*num_pp += 1;
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_L20: /* Fall through */
+		case MALI_DOMAIN_INDEX_L21: /* Fall through */
+		case MALI_DOMAIN_INDEX_L22:
+			*cost += MALI_L2_COST;
+
+			break;
+		}
+	}
+}
+
+void mali_pm_power_cost_setup(void)
+{
+	/*
+	 * Two parallel arrays which store the best domain mask and its cost
+	 * The index is the number of PP cores, E.g. Index 0 is for 1 PP option,
+	 * might have mask 0x2 and with cost of 1, lower cost is better
+	 */
+	u32 best_mask[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	u32 best_cost[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	/* Array cores_in_domain is used to store the total pp cores in each pm domain. */
+	u32 cores_in_domain[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	/* Domain_count is used to represent the max domain we have.*/
+	u32 max_domain_mask = 0;
+	u32 max_domain_id = 0;
+	u32 always_on_pp_cores = 0;
+
+	u32 num_pp, cost, mask;
+	u32 i, j, k;
+
+	/* Initialize statistics */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) {
+		best_mask[i] = 0;
+		best_cost[i] = 0xFFFFFFFF; /* lower cost is better */
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1; i++) {
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			mali_pm_domain_power_cost_result[i][j] = 0;
+		}
+	}
+
+	/* Caculate number of pp cores of a given domain config. */
+	for (i = MALI_DOMAIN_INDEX_PP0; i <= MALI_DOMAIN_INDEX_PP7; i++) {
+		if (domain_config[i] > 0) {
+			/* Get the max domain mask value used to caculate power cost
+			 * and we don't count in always on pp cores. */
+			if (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i]
+			    && max_domain_mask < domain_config[i]) {
+				max_domain_mask = domain_config[i];
+			}
+
+			if (domain_config[i] == MALI_PM_DOMAIN_DUMMY_MASK) {
+				always_on_pp_cores++;
+			}
+		}
+	}
+	max_domain_id = _mali_osk_fls(max_domain_mask);
+
+	/*
+	 * Try all combinations of power domains and check how many PP cores
+	 * they have and their power cost.
+	 */
+	for (mask = 0; mask < (1 << max_domain_id); mask++) {
+		num_pp = 0;
+		cost = 0;
+
+		mali_pm_stat_from_mask(mask, &num_pp, &cost);
+
+		/* This mask is usable for all MP1 up to num_pp PP cores, check statistics for all */
+		for (i = 0; i < num_pp; i++) {
+			if (best_cost[i] >= cost) {
+				best_cost[i] = cost;
+				best_mask[i] = mask;
+			}
+		}
+	}
+
+	/*
+	 * If we want to enable x pp cores, if x is less than number of always_on pp cores,
+	 * all of pp cores we will enable must be always_on pp cores.
+	 */
+	for (i = 0; i < mali_executor_get_num_cores_total(); i++) {
+		if (i < always_on_pp_cores) {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= i + 1;
+		} else {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= always_on_pp_cores;
+		}
+	}
+
+	/* In this loop, variable i represent for the number of non-always on pp cores we want to enabled. */
+	for (i = 0; i < (mali_executor_get_num_cores_total() - always_on_pp_cores); i++) {
+		if (best_mask[i] == 0) {
+			/* This MP variant is not available */
+			continue;
+		}
+
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			cores_in_domain[j] = 0;
+		}
+
+		for (j = MALI_DOMAIN_INDEX_PP0; j <= MALI_DOMAIN_INDEX_PP7; j++) {
+			if (0 < domain_config[j]
+			    && (domain_config[i] != MALI_PM_DOMAIN_DUMMY_MASK)) {
+				cores_in_domain[_mali_osk_fls(domain_config[j]) - 1]++;
+			}
+		}
+
+		/* In this loop, j represent for the number we have already enabled.*/
+		for (j = 0; j <= i;) {
+			/* j used to visit all of domain to get the number of pp cores remained in it. */
+			for (k = 0; k < max_domain_id; k++) {
+				/* If domain k in best_mask[i] is enabled and this domain has extra pp cores,
+				 * we know we must pick at least one pp core from this domain.
+				 * And then we move to next enabled pm domain. */
+				if ((best_mask[i] & (0x1 << k)) && (cores_in_domain[k] > 0)) {
+					cores_in_domain[k]--;
+					mali_pm_domain_power_cost_result[always_on_pp_cores + i + 1][k]++;
+					j++;
+					if (j > i) {
+						break;
+					}
+				}
+			}
+		}
+	}
+}
+
+/*
+ * When we are doing core scaling,
+ * this function is called to return the best mask to
+ * achieve the best pp group power cost.
+ */
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst)
+{
+	MALI_DEBUG_ASSERT((mali_executor_get_num_cores_total() >= num_requested) && (num_requested >= 0));
+
+	_mali_osk_memcpy(dst, mali_pm_domain_power_cost_result[num_requested], MALI_MAX_NUMBER_OF_DOMAINS * sizeof(int));
+}
+
+u32 mali_pm_get_current_mask(void)
+{
+	return pd_mask_current;
+}
+
+u32 mali_pm_get_wanted_mask(void)
+{
+	return pd_mask_wanted;
+}
diff --git a/utgard/r7p0/common/mali_pm.h b/utgard/r7p0/common/mali_pm.h
new file mode 100644
index 0000000..3854d44
--- /dev/null
+++ b/utgard/r7p0/common/mali_pm.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_H__
+#define __MALI_PM_H__
+
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+
+#define MALI_DOMAIN_INDEX_GP        0
+#define MALI_DOMAIN_INDEX_PP0       1
+#define MALI_DOMAIN_INDEX_PP1       2
+#define MALI_DOMAIN_INDEX_PP2       3
+#define MALI_DOMAIN_INDEX_PP3       4
+#define MALI_DOMAIN_INDEX_PP4       5
+#define MALI_DOMAIN_INDEX_PP5       6
+#define MALI_DOMAIN_INDEX_PP6       7
+#define MALI_DOMAIN_INDEX_PP7       8
+#define MALI_DOMAIN_INDEX_L20       9
+#define MALI_DOMAIN_INDEX_L21      10
+#define MALI_DOMAIN_INDEX_L22      11
+/*
+ * The dummy domain is used when there is no physical power domain
+ * (e.g. no PMU or always on cores)
+ */
+#define MALI_DOMAIN_INDEX_DUMMY    12
+#define MALI_MAX_NUMBER_OF_DOMAINS 13
+
+/**
+ * Initialize the Mali PM module
+ *
+ * PM module covers Mali PM core, PM domains and Mali PMU
+ */
+_mali_osk_errcode_t mali_pm_initialize(void);
+
+/**
+ * Terminate the Mali PM module
+ */
+void mali_pm_terminate(void);
+
+void mali_pm_exec_lock(void);
+void mali_pm_exec_unlock(void);
+
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache);
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group);
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains);
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains);
+
+void mali_pm_init_begin(void);
+void mali_pm_init_end(void);
+
+void mali_pm_update_sync(void);
+void mali_pm_update_async(void);
+
+/* Callback functions for system power management */
+void mali_pm_os_suspend(mali_bool os_suspend);
+void mali_pm_os_resume(void);
+
+mali_bool mali_pm_runtime_suspend(void);
+void mali_pm_runtime_resume(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size);
+#endif
+
+void mali_pm_power_cost_setup(void);
+
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst);
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+#endif
+
+u32 mali_pm_get_current_mask(void);
+u32 mali_pm_get_wanted_mask(void);
+#endif /* __MALI_PM_H__ */
diff --git a/utgard/r7p0/common/mali_pm_domain.c b/utgard/r7p0/common/mali_pm_domain.c
new file mode 100644
index 0000000..04dc298
--- /dev/null
+++ b/utgard/r7p0/common/mali_pm_domain.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+
+static struct mali_pm_domain *mali_pm_domains[MALI_MAX_NUMBER_OF_DOMAINS] = {
+NULL, };
+
+void mali_pm_domain_initialize(void)
+{
+	/* Domains will be initialized/created on demand */
+}
+
+void mali_pm_domain_terminate(void)
+{
+	int i;
+
+	/* Delete all domains that has been created */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		mali_pm_domain_delete(mali_pm_domains[i]);
+		mali_pm_domains[i] = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask)
+{
+	struct mali_pm_domain *domain = NULL;
+	u32 domain_id = 0;
+
+	domain = mali_pm_domain_get_from_mask(pmu_mask);
+	if (domain != NULL) return domain;
+
+	MALI_DEBUG_PRINT(2,
+			 ("Mali PM domain: Creating Mali PM domain (mask=0x%08X)\n",
+			  pmu_mask));
+
+	domain = (struct mali_pm_domain *)_mali_osk_malloc(
+			 sizeof(struct mali_pm_domain));
+	if (domain != NULL) {
+		domain->power_is_on = MALI_FALSE;
+		domain->pmu_mask = pmu_mask;
+		domain->use_count = 0;
+		_mali_osk_list_init(&domain->group_list);
+		_mali_osk_list_init(&domain->l2_cache_list);
+
+		domain_id = _mali_osk_fls(pmu_mask) - 1;
+		/* Verify the domain_id */
+		MALI_DEBUG_ASSERT(domain_id < MALI_MAX_NUMBER_OF_DOMAINS);
+		/* Verify that pmu_mask only one bit is set */
+		MALI_DEBUG_ASSERT((1 << domain_id) == pmu_mask);
+		mali_pm_domains[domain_id] = domain;
+
+		return domain;
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Unable to create PM domain\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pm_domain_delete(struct mali_pm_domain *domain)
+{
+	if (domain == NULL) {
+		return;
+	}
+
+	_mali_osk_list_delinit(&domain->group_list);
+	_mali_osk_list_delinit(&domain->l2_cache_list);
+
+	_mali_osk_free(domain);
+}
+
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	/*
+	 * Use addtail because virtual group is created last and it needs
+	 * to be at the end of the list (in order to be activated after
+	 * all children.
+	 */
+	_mali_osk_list_addtail(&group->pm_domain_list, &domain->group_list);
+}
+
+void mali_pm_domain_add_l2_cache(struct mali_pm_domain *domain,
+				 struct mali_l2_cache_core *l2_cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(l2_cache);
+	_mali_osk_list_add(&l2_cache->pm_domain_list, &domain->l2_cache_list);
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask)
+{
+	u32 id = 0;
+
+	if (mask == 0) {
+		return NULL;
+	}
+
+	id = _mali_osk_fls(mask) - 1;
+
+	MALI_DEBUG_ASSERT(id < MALI_MAX_NUMBER_OF_DOMAINS);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == mask);
+
+	return mali_pm_domains[id];
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id)
+{
+	MALI_DEBUG_ASSERT(id < MALI_MAX_NUMBER_OF_DOMAINS);
+
+	return mali_pm_domains[id];
+}
+
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	if (domain->use_count == 0) {
+		_mali_osk_pm_dev_ref_get_async();
+	}
+
+	++domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_get, use_count => %u\n", domain, domain->use_count));
+
+	/* Return our mask so caller can check this against wanted mask */
+	return domain->pmu_mask;
+}
+
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	--domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_put, use_count => %u\n", domain, domain->use_count));
+
+	if (domain->use_count == 0) {
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	/*
+	 * Return the PMU mask which now could be be powered down
+	 * (the bit for this domain).
+	 * This is the responsibility of the caller (mali_pm)
+	 */
+	return (0 == domain->use_count ? domain->pmu_mask : 0);
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain)
+{
+	u32 id = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT(domain->pmu_mask != 0);
+
+	id = _mali_osk_fls(domain->pmu_mask) - 1;
+
+	MALI_DEBUG_ASSERT(id < MALI_MAX_NUMBER_OF_DOMAINS);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == domain->pmu_mask);
+	/* Verify that we have stored the domain at right id/index */
+	MALI_DEBUG_ASSERT(domain == mali_pm_domains[id]);
+
+	return id;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void)
+{
+	int i;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (mali_pm_domains[i] == NULL) {
+			/* Nothing to check */
+			continue;
+		}
+
+		if (mali_pm_domains[i]->power_is_on == MALI_TRUE) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+
+		if (mali_pm_domains[i]->use_count != 0) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+	}
+
+	return MALI_TRUE;
+}
+#endif
diff --git a/utgard/r7p0/common/mali_pm_domain.h b/utgard/r7p0/common/mali_pm_domain.h
new file mode 100644
index 0000000..c0ac2f7
--- /dev/null
+++ b/utgard/r7p0/common/mali_pm_domain.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_DOMAIN_H__
+#define __MALI_PM_DOMAIN_H__
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#include "mali_l2_cache.h"
+#include "mali_group.h"
+#include "mali_pmu.h"
+
+/* Instances are protected by PM state lock */
+struct mali_pm_domain {
+	mali_bool power_is_on;
+	s32 use_count;
+	u32 pmu_mask;
+
+	/* Zero or more groups can belong to this domain */
+	_mali_osk_list_t group_list;
+
+	/* Zero or more L2 caches can belong to this domain */
+	_mali_osk_list_t l2_cache_list;
+};
+
+
+void mali_pm_domain_initialize(void);
+void mali_pm_domain_terminate(void);
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask);
+void mali_pm_domain_delete(struct mali_pm_domain *domain);
+
+void mali_pm_domain_add_l2_cache(
+	struct mali_pm_domain *domain,
+	struct mali_l2_cache_core *l2_cache);
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group);
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask);
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id);
+
+/* Ref counting */
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain);
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_group_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->group_list;
+}
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_l2_cache_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->l2_cache_list;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pm_domain_power_is_on(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->power_is_on;
+}
+
+MALI_STATIC_INLINE void mali_pm_domain_set_power_on(
+	struct mali_pm_domain *domain,
+	mali_bool power_is_on)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	domain->power_is_on = power_is_on;
+}
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_use_count(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->use_count;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_mask(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->pmu_mask;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void);
+#endif
+
+#endif /* __MALI_PM_DOMAIN_H__ */
diff --git a/utgard/r7p0/common/mali_pm_metrics.c b/utgard/r7p0/common/mali_pm_metrics.c
new file mode 100644
index 0000000..b150038
--- /dev/null
+++ b/utgard/r7p0/common/mali_pm_metrics.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_pm_metrics.h"
+#include "mali_osk_locks.h"
+#include "mali_osk_mali.h"
+#include <linux/ktime.h>
+
+#define MALI_PM_TIME_SHIFT 0
+#define MALI_UTILIZATION_MAX_PERIOD 80000000/* ns = 100ms */
+
+_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev)
+{
+	int i = 0;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	mdev->mali_metrics.time_period_start = ktime_get();
+	mdev->mali_metrics.time_period_start_gp = mdev->mali_metrics.time_period_start;
+	mdev->mali_metrics.time_period_start_pp = mdev->mali_metrics.time_period_start;
+
+	mdev->mali_metrics.time_busy = 0;
+	mdev->mali_metrics.time_idle = 0;
+	mdev->mali_metrics.prev_busy = 0;
+	mdev->mali_metrics.prev_idle = 0;
+	mdev->mali_metrics.num_running_gp_cores = 0;
+	mdev->mali_metrics.num_running_pp_cores = 0;
+	mdev->mali_metrics.time_busy_gp = 0;
+	mdev->mali_metrics.time_idle_gp = 0;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) {
+		mdev->mali_metrics.time_busy_pp[i] = 0;
+		mdev->mali_metrics.time_idle_pp[i] = 0;
+	}
+	mdev->mali_metrics.gpu_active = MALI_FALSE;
+
+	mdev->mali_metrics.lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST);
+	if (mdev->mali_metrics.lock == NULL) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pm_metrics_term(struct mali_device *mdev)
+{
+	_mali_osk_spinlock_irq_term(mdev->mali_metrics.lock);
+}
+
+/*caller needs to hold mdev->mali_metrics.lock before calling this function*/
+void mali_pm_record_job_status(struct mali_device *mdev)
+{
+	ktime_t now;
+	ktime_t diff;
+	u64 ns_time;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	now = ktime_get();
+	diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+
+	ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	mdev->mali_metrics.time_busy += ns_time;
+	mdev->mali_metrics.time_period_start = now;
+}
+
+void mali_pm_record_gpu_idle(mali_bool is_gp)
+{
+	ktime_t now;
+	ktime_t diff;
+	u64 ns_time;
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	now = ktime_get();
+
+	if (is_gp == MALI_TRUE) {
+		--mdev->mali_metrics.num_running_gp_cores;
+		if (mdev->mali_metrics.num_running_gp_cores == 0) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp);
+			ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_busy_gp += ns_time;
+			mdev->mali_metrics.time_period_start_gp = now;
+
+			if (mdev->mali_metrics.num_running_pp_cores == 0) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_busy += ns_time;
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_FALSE;
+			}
+		}
+	} else {
+		--mdev->mali_metrics.num_running_pp_cores;
+		if (mdev->mali_metrics.num_running_pp_cores == 0) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp);
+			ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_busy_pp[0] += ns_time;
+			mdev->mali_metrics.time_period_start_pp = now;
+
+			if (mdev->mali_metrics.num_running_gp_cores == 0) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_busy += ns_time;
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_FALSE;
+			}
+		}
+	}
+
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_record_gpu_active(mali_bool is_gp)
+{
+	ktime_t now;
+	ktime_t diff;
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	now = ktime_get();
+
+	if (is_gp == MALI_TRUE) {
+		mdev->mali_metrics.num_running_gp_cores++;
+		if (mdev->mali_metrics.num_running_gp_cores == 1) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp);
+			mdev->mali_metrics.time_idle_gp += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_period_start_gp = now;
+			if (mdev->mali_metrics.num_running_pp_cores == 0) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+		}
+	} else {
+		mdev->mali_metrics.num_running_pp_cores++;
+		if (mdev->mali_metrics.num_running_pp_cores == 1) {
+			diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp);
+			mdev->mali_metrics.time_idle_pp[0] += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+			mdev->mali_metrics.time_period_start_pp = now;
+			if (mdev->mali_metrics.num_running_gp_cores == 0) {
+				MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE);
+				diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+				mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+				mdev->mali_metrics.time_period_start = now;
+				mdev->mali_metrics.gpu_active = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE);
+		}
+	}
+
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+
+/*caller needs to hold mdev->mali_metrics.lock before calling this function*/
+static void mali_pm_get_dvfs_utilisation_calc(struct mali_device *mdev, ktime_t now)
+{
+	ktime_t diff;
+
+	MALI_DEBUG_ASSERT(mdev != NULL);
+
+	diff = ktime_sub(now, mdev->mali_metrics.time_period_start);
+
+	if (mdev->mali_metrics.gpu_active) {
+		mdev->mali_metrics.time_busy += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	} else {
+		mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT);
+	}
+}
+
+/* Caller needs to hold mdev->mali_metrics.lock before calling this function. */
+static void mali_pm_reset_dvfs_utilisation_unlocked(struct mali_device *mdev, ktime_t now)
+{
+	/* Store previous value */
+	mdev->mali_metrics.prev_idle = mdev->mali_metrics.time_idle;
+	mdev->mali_metrics.prev_busy = mdev->mali_metrics.time_busy;
+
+	/* Reset current values */
+	mdev->mali_metrics.time_period_start = now;
+	mdev->mali_metrics.time_period_start_gp = now;
+	mdev->mali_metrics.time_period_start_pp = now;
+	mdev->mali_metrics.time_idle = 0;
+	mdev->mali_metrics.time_busy = 0;
+
+	mdev->mali_metrics.time_busy_gp = 0;
+	mdev->mali_metrics.time_idle_gp = 0;
+	mdev->mali_metrics.time_busy_pp[0] = 0;
+	mdev->mali_metrics.time_idle_pp[0] = 0;
+}
+
+void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev)
+{
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+	mali_pm_reset_dvfs_utilisation_unlocked(mdev, ktime_get());
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_get_dvfs_utilisation(struct mali_device *mdev,
+				  unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	u64 busy = 0;
+	u64 total = 0;
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+
+	mali_pm_get_dvfs_utilisation_calc(mdev, now);
+
+	busy = mdev->mali_metrics.time_busy;
+	total = busy + mdev->mali_metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		mali_pm_reset_dvfs_utilisation_unlocked(mdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += mdev->mali_metrics.prev_idle +
+			 mdev->mali_metrics.prev_busy;
+		busy += mdev->mali_metrics.prev_busy;
+	}
+
+	*total_out = (unsigned long)total;
+	*busy_out = (unsigned long)busy;
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_metrics_spin_lock(void)
+{
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+
+	_mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock);
+}
+
+void mali_pm_metrics_spin_unlock(void)
+{
+	struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev);
+
+	_mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock);
+}
diff --git a/utgard/r7p0/common/mali_pm_metrics.h b/utgard/r7p0/common/mali_pm_metrics.h
new file mode 100644
index 0000000..faee781
--- /dev/null
+++ b/utgard/r7p0/common/mali_pm_metrics.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_METRICS_H__
+#define __MALI_PM_METRICS_H__
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include "mali_osk_locks.h"
+#include "mali_group.h"
+
+struct mali_device;
+
+/**
+ * Metrics data collected for use by the power management framework.
+ */
+struct mali_pm_metrics_data {
+	ktime_t time_period_start;
+	u64 time_busy;
+	u64 time_idle;
+	u64 prev_busy;
+	u64 prev_idle;
+	u32 num_running_gp_cores;
+	u32 num_running_pp_cores;
+	ktime_t time_period_start_gp;
+	u64 time_busy_gp;
+	u64 time_idle_gp;
+	ktime_t time_period_start_pp;
+	u64 time_busy_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	u64 time_idle_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	mali_bool gpu_active;
+	_mali_osk_spinlock_irq_t *lock;
+};
+
+/**
+ * Initialize/start the Mali GPU pm_metrics metrics reporting.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev);
+
+/**
+ * Terminate the Mali GPU pm_metrics metrics reporting
+ */
+void mali_pm_metrics_term(struct mali_device *mdev);
+
+/**
+ * Should be called when a job is about to execute a GPU job
+ */
+void mali_pm_record_gpu_active(mali_bool is_gp);
+
+/**
+ * Should be called when a job is finished
+ */
+void mali_pm_record_gpu_idle(mali_bool is_gp);
+
+void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev);
+
+void mali_pm_get_dvfs_utilisation(struct mali_device *mdev, unsigned long *total_out, unsigned long *busy_out);
+
+void mali_pm_metrics_spin_lock(void);
+
+void mali_pm_metrics_spin_unlock(void);
+#else
+void mali_pm_record_gpu_idle(mali_bool is_gp) {}
+void mali_pm_record_gpu_active(mali_bool is_gp) {}
+#endif
+#endif /* __MALI_PM_METRICS_H__ */
diff --git a/utgard/r7p0/common/mali_pmu.c b/utgard/r7p0/common/mali_pmu.c
new file mode 100644
index 0000000..3beab99
--- /dev/null
+++ b/utgard/r7p0/common/mali_pmu.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu.c
+ * Mali driver functions for Mali 400 PMU hardware
+ */
+#include "mali_hw_core.h"
+#include "mali_pmu.h"
+#include "mali_pp.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm.h"
+#include "mali_osk_mali.h"
+
+struct mali_pmu_core *mali_global_pmu_core;
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu);
+
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource)
+{
+	struct mali_pmu_core *pmu;
+
+	MALI_DEBUG_ASSERT(mali_global_pmu_core == NULL);
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Creating Mali PMU core\n"));
+
+	pmu = (struct mali_pmu_core *)_mali_osk_malloc(
+		      sizeof(struct mali_pmu_core));
+	if (pmu != NULL) {
+		pmu->registered_cores_mask = 0; /* to be set later */
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&pmu->hw_core,
+				resource, PMU_REGISTER_ADDRESS_SPACE_SIZE)) {
+
+			pmu->switch_delay = _mali_osk_get_pmu_switch_delay();
+
+			mali_global_pmu_core = pmu;
+
+			return pmu;
+		}
+		_mali_osk_free(pmu);
+	}
+
+	return NULL;
+}
+
+void mali_pmu_delete(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu == mali_global_pmu_core);
+
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Deleting Mali PMU core\n"));
+
+	mali_global_pmu_core = NULL;
+
+	mali_hw_core_delete(&pmu->hw_core);
+	_mali_osk_free(pmu);
+}
+
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask)
+{
+	pmu->registered_cores_mask = mask;
+}
+
+void mali_pmu_reset(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	/* Setup the desired defaults */
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_MASK, 0);
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_SW_DELAY, pmu->switch_delay);
+}
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	mali_pmu_reset(pmu);
+
+	/* Now simply power up the domains which are marked as powered down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_up(pmu, stat);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	/* Now simply power down the domains which are marked as powered up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_down(pmu, (~stat) & pmu->registered_cores_mask);
+
+	mali_pm_exec_unlock();
+}
+
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power down: ...................... [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+
+	/*
+	 * Assert that we are not powering down domains which are already
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+
+	mask  &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY);
+
+	if (0 == mask || 0 == ((~stat) & mask)) return _MALI_OSK_ERR_OK;
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_DOWN, mask);
+
+	/*
+	 * Do not wait for interrupt on Mali-300/400 if all domains are
+	 * powered off by our power down command, because the HW will simply
+	 * not generate an interrupt in this case.
+	 */
+	if (mali_is_mali450() || mali_is_mali470() || pmu->registered_cores_mask != (mask | stat)) {
+		err = mali_pmu_wait_for_command_finish(pmu);
+		if (err != _MALI_OSK_ERR_OK) {
+			return err;
+		}
+	} else {
+		mali_hw_core_register_write(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+	}
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+#endif
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+#if !defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	u32 current_domain;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power up: ........................ [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	stat &= pmu->registered_cores_mask;
+
+	mask  &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY);
+	if (0 == mask || 0 == (stat & mask)) return _MALI_OSK_ERR_OK;
+
+	/*
+	 * Assert that we are only powering up domains which are currently
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+
+#if defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_UP, mask);
+
+	err = mali_pmu_wait_for_command_finish(pmu);
+	if (err != _MALI_OSK_ERR_OK) {
+		return err;
+	}
+#else
+	for (current_domain = 1;
+	     current_domain <= pmu->registered_cores_mask;
+	     current_domain <<= 1) {
+		if (current_domain & mask & stat) {
+			mali_hw_core_register_write(&pmu->hw_core,
+						    PMU_REG_ADDR_MGMT_POWER_UP,
+						    current_domain);
+
+			err = mali_pmu_wait_for_command_finish(pmu);
+			if (err != _MALI_OSK_ERR_OK) {
+				return err;
+			}
+		}
+	}
+#endif
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+#endif /* defined(DEBUG) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu)
+{
+	u32 rawstat;
+	u32 timeout = MALI_REG_POLL_COUNT_SLOW;
+
+	MALI_DEBUG_ASSERT(pmu);
+
+	/* Wait for the command to complete */
+	do {
+		rawstat = mali_hw_core_register_read(&pmu->hw_core,
+						     PMU_REG_ADDR_MGMT_INT_RAWSTAT);
+		--timeout;
+	} while (0 == (rawstat & PMU_REG_VAL_IRQ) && 0 < timeout);
+
+	MALI_DEBUG_ASSERT(timeout > 0);
+
+	if (timeout == 0) {
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r7p0/common/mali_pmu.h b/utgard/r7p0/common/mali_pmu.h
new file mode 100644
index 0000000..d100cf6
--- /dev/null
+++ b/utgard/r7p0/common/mali_pmu.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_platform.h
+ * Platform specific Mali driver functions
+ */
+
+#ifndef __MALI_PMU_H__
+#define __MALI_PMU_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_hw_core.h"
+
+/** @brief MALI inbuilt PMU hardware info and PMU hardware has knowledge of cores power mask
+ */
+struct mali_pmu_core {
+	struct mali_hw_core hw_core;
+	u32 registered_cores_mask;
+	u32 switch_delay;
+};
+
+/** @brief Register layout for hardware PMU
+ */
+typedef enum {
+	PMU_REG_ADDR_MGMT_POWER_UP                  = 0x00,     /*< Power up register */
+	PMU_REG_ADDR_MGMT_POWER_DOWN                = 0x04,     /*< Power down register */
+	PMU_REG_ADDR_MGMT_STATUS                    = 0x08,     /*< Core sleep status register */
+	PMU_REG_ADDR_MGMT_INT_MASK                  = 0x0C,     /*< Interrupt mask register */
+	PMU_REG_ADDR_MGMT_INT_RAWSTAT               = 0x10,     /*< Interrupt raw status register */
+	PMU_REG_ADDR_MGMT_INT_CLEAR                 = 0x18,     /*< Interrupt clear register */
+	PMU_REG_ADDR_MGMT_SW_DELAY                  = 0x1C,     /*< Switch delay register */
+	PMU_REGISTER_ADDRESS_SPACE_SIZE             = 0x28,     /*< Size of register space */
+} pmu_reg_addr_mgmt_addr;
+
+#define PMU_REG_VAL_IRQ 1
+
+extern struct mali_pmu_core *mali_global_pmu_core;
+
+/** @brief Initialisation of MALI PMU
+ *
+ * This is called from entry point of the driver in order to create and intialize the PMU resource
+ *
+ * @param resource it will be a pointer to a PMU resource
+ * @param number_of_pp_cores Number of found PP resources in configuration
+ * @param number_of_l2_caches Number of found L2 cache resources in configuration
+ * @return The created PMU object, or NULL in case of failure.
+ */
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource);
+
+/** @brief It deallocates the PMU resource
+ *
+ * This is called on the exit of the driver to terminate the PMU resource
+ *
+ * @param pmu Pointer to PMU core object to delete
+ */
+void mali_pmu_delete(struct mali_pmu_core *pmu);
+
+/** @brief Set registered cores mask
+ *
+ * @param pmu Pointer to PMU core object
+ * @param mask All available/valid domain bits
+ */
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief Retrieves the Mali PMU core object (if any)
+ *
+ * @return The Mali PMU object, or NULL if no PMU exists.
+ */
+MALI_STATIC_INLINE struct mali_pmu_core *mali_pmu_get_global_pmu_core(void)
+{
+	return mali_global_pmu_core;
+}
+
+/** @brief Reset PMU core
+ *
+ * @param pmu Pointer to PMU core object to reset
+ */
+void mali_pmu_reset(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu);
+
+/** @brief Returns a mask of the currently powered up domains
+ *
+ * @param pmu Pointer to PMU core object
+ */
+MALI_STATIC_INLINE u32 mali_pmu_get_mask(struct mali_pmu_core *pmu)
+{
+	u32 stat = mali_hw_core_register_read(&pmu->hw_core, PMU_REG_ADDR_MGMT_STATUS);
+
+	return ((~stat) & pmu->registered_cores_mask);
+}
+
+/** @brief MALI GPU power down using MALI in-built PMU
+ *
+ * Called to power down the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power down
+ * @param mask Mask specifying which power domains to power down
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief MALI GPU power up using MALI in-built PMU
+ *
+ * Called to power up the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power up
+ * @param mask Mask specifying which power domains to power up
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask);
+
+#endif /* __MALI_PMU_H__ */
diff --git a/utgard/r7p0/common/mali_pp.c b/utgard/r7p0/common/mali_pp.c
new file mode 100644
index 0000000..a227272
--- /dev/null
+++ b/utgard/r7p0/common/mali_pp.c
@@ -0,0 +1,510 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp_job.h"
+#include "mali_pp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+#include <mali_platform.h>
+
+/* Number of frame registers on Mali-200 */
+#define MALI_PP_MALI200_NUM_FRAME_REGISTERS ((0x04C/4)+1)
+/* Number of frame registers on Mali-300 and later */
+#define MALI_PP_MALI400_NUM_FRAME_REGISTERS ((0x058/4)+1)
+
+static struct mali_pp_core *mali_global_pp_cores[MALI_MAX_NUMBER_OF_PP_CORES] = { NULL };
+static u32 mali_global_num_pp_cores;
+
+/* Interrupt handlers */
+static void mali_pp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id)
+{
+	struct mali_pp_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali PP: Creating Mali PP core: %s\n", resource->description));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Base address of PP core: 0x%x\n", resource->base));
+
+	if (mali_global_num_pp_cores >= MALI_MAX_NUMBER_OF_PP_CORES) {
+		MALI_PRINT_ERROR(("Mali PP: Too many PP core objects created\n"));
+		return NULL;
+	}
+
+	core = _mali_osk_calloc(1, sizeof(struct mali_pp_core));
+	if (core != NULL) {
+		core->core_id = mali_global_num_pp_cores;
+		core->bcast_id = bcast_id;
+
+		if (mali_hw_core_create(&core->hw_core, resource, MALI200_REG_SIZEOF_REGISTER_BANK) == _MALI_OSK_ERR_OK) {
+			_mali_osk_errcode_t ret;
+
+			if (!is_virtual) {
+				ret = mali_pp_reset(core);
+			} else {
+				ret = _MALI_OSK_ERR_OK;
+			}
+
+			if (ret == _MALI_OSK_ERR_OK) {
+				ret = mali_group_add_pp_core(group, core);
+				if (ret == _MALI_OSK_ERR_OK) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					MALI_DEBUG_ASSERT(!is_virtual || -1 != resource->irq);
+
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_pp,
+								       group,
+								       mali_pp_irq_probe_trigger,
+								       mali_pp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (core->irq != NULL) {
+						mali_global_pp_cores[mali_global_num_pp_cores] = core;
+						mali_global_num_pp_cores++;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali PP: Failed to setup interrupt handlers for PP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_pp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali PP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali PP: Failed to allocate memory for PP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pp_delete(struct mali_pp_core *core)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+
+	/* Remove core from global list */
+	for (i = 0; i < mali_global_num_pp_cores; i++) {
+		if (mali_global_pp_cores[i] == core) {
+			mali_global_pp_cores[i] = NULL;
+			mali_global_num_pp_cores--;
+
+			if (i != mali_global_num_pp_cores) {
+				/* We removed a PP core from the middle of the array -- move the last
+				 * PP core to the current position to close the gap */
+				mali_global_pp_cores[i] = mali_global_pp_cores[mali_global_num_pp_cores];
+				mali_global_pp_cores[mali_global_num_pp_cores] = NULL;
+			}
+
+			break;
+		}
+	}
+
+	_mali_osk_free(core);
+}
+
+void mali_pp_stop_bus(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	/* Will only send the stop bus command, and not wait for it to complete */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_pp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS) & MALI200_REG_VAL_STATUS_BUS_STOPPED)
+			break;
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to stop bus on %s. Status: 0x%08x\n", core->hw_core.description, mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+		if (mali_gp_reset_fail < 65533)
+			mali_gp_reset_fail++;
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Frame register reset values.
+ * Taken from the Mali400 TRM, 3.6. Pixel processor control register summary */
+static const u32 mali_frame_registers_reset_values[_MALI_PP_MAX_FRAME_REGISTERS] = {
+	0x0, /* Renderer List Address Register */
+	0x0, /* Renderer State Word Base Address Register */
+	0x0, /* Renderer Vertex Base Register */
+	0x2, /* Feature Enable Register */
+	0x0, /* Z Clear Value Register */
+	0x0, /* Stencil Clear Value Register */
+	0x0, /* ABGR Clear Value 0 Register */
+	0x0, /* ABGR Clear Value 1 Register */
+	0x0, /* ABGR Clear Value 2 Register */
+	0x0, /* ABGR Clear Value 3 Register */
+	0x0, /* Bounding Box Left Right Register */
+	0x0, /* Bounding Box Bottom Register */
+	0x0, /* FS Stack Address Register */
+	0x0, /* FS Stack Size and Initial Value Register */
+	0x0, /* Reserved */
+	0x0, /* Reserved */
+	0x0, /* Origin Offset X Register */
+	0x0, /* Origin Offset Y Register */
+	0x75, /* Subpixel Specifier Register */
+	0x0, /* Tiebreak mode Register */
+	0x0, /* Polygon List Format Register */
+	0x0, /* Scaling Register */
+	0x0 /* Tilebuffer configuration Register */
+};
+
+/* WBx register reset values */
+static const u32 mali_wb_registers_reset_values[_MALI_PP_MAX_WB_REGISTERS] = {
+	0x0, /* WBx Source Select Register */
+	0x0, /* WBx Target Address Register */
+	0x0, /* WBx Target Pixel Format Register */
+	0x0, /* WBx Target AA Format Register */
+	0x0, /* WBx Target Layout */
+	0x0, /* WBx Target Scanline Length */
+	0x0, /* WBx Target Flags Register */
+	0x0, /* WBx MRT Enable Register */
+	0x0, /* WBx MRT Offset Register */
+	0x0, /* WBx Global Test Enable Register */
+	0x0, /* WBx Global Test Reference Value Register */
+	0x0  /* WBx Global Test Compare Function Register */
+};
+
+/* Performance Counter 0 Enable Register reset value */
+static const u32 mali_perf_cnt_enable_reset_value;
+
+extern int pp_hardware_reset;
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core)
+{
+	/* Bus must be stopped before calling this function */
+	const u32 reset_wait_target_register = MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(2, ("Mali PP: Hard reset of core %s\n", core->hw_core.description));
+	pp_hardware_reset++;
+
+	/* Set register to a bogus value. The register will be used to detect when reset is complete */
+	mali_hw_core_register_write_relaxed(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+
+	/* Force core to reset */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET);
+
+	/* Wait for reset to be complete */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali PP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, 0x00000000); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pp_reset_async(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET);
+}
+
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		u32 status =  mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+
+		if (!(status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE)) {
+			rawstat = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+			if (rawstat == MALI400PP_REG_VAL_IRQ_RESET_COMPLETED) {
+				break;
+			}
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core)
+{
+	mali_pp_reset_async(core);
+	return mali_pp_reset_wait(core);
+}
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual)
+{
+	u32 relative_address;
+	u32 start_index;
+	u32 nr_of_regs;
+	u32 *frame_registers = mali_pp_job_get_frame_registers(job);
+	u32 *wb0_registers = mali_pp_job_get_wb0_registers(job);
+	u32 *wb1_registers = mali_pp_job_get_wb1_registers(job);
+	u32 *wb2_registers = mali_pp_job_get_wb2_registers(job);
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, sub_job);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Write frame registers */
+
+	/*
+	 * There are two frame registers which are different for each sub job:
+	 * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME)
+	 * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK)
+	 */
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]);
+
+	/* For virtual jobs, the stack address shouldn't be broadcast but written individually */
+	if (!mali_pp_job_is_virtual(job) || restart_virtual) {
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]);
+	}
+
+	/* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */
+	relative_address = MALI200_REG_ADDR_RSW;
+	start_index = MALI200_REG_ADDR_RSW / sizeof(u32);
+	nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* MALI200_REG_ADDR_STACK_SIZE */
+	relative_address = MALI200_REG_ADDR_STACK_SIZE;
+	start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32);
+
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core,
+			relative_address, frame_registers[start_index],
+			mali_frame_registers_reset_values[start_index]);
+
+	/* Skip 2 reserved registers */
+
+	/* Write remaining registers */
+	relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X;
+	start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+	nr_of_regs = MALI_PP_MALI400_NUM_FRAME_REGISTERS - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* Write WBx registers */
+	if (wb0_registers[0]) { /* M200_WB0_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb1_registers[0]) { /* M200_WB1_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb2_registers[0]) { /* M200_WB2_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+	if (counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+
+#ifdef CONFIG_MALI400_HEATMAPS_ENABLED
+	if (job->uargs.perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_CONTR, ((job->uargs.tilesx & 0x3FF) << 16) | 1);
+		mali_hw_core_register_write_relaxed(&core->hw_core,  MALI200_REG_ADDR_MGMT_PERFMON_BASE, job->uargs.heatmap_mem & 0xFFFFFFF8);
+	}
+#endif /* CONFIG_MALI400_HEATMAPS_ENABLED */
+
+	MALI_DEBUG_PRINT(3, ("Mali PP: Starting job 0x%08X part %u/%u on PP core %s\n", job, sub_job + 1, mali_pp_job_get_sub_job_count(job), core->hw_core.description));
+
+	/* Adding barrier to make sure all rester writes are finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_PP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING);
+#else
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_END_OF_FRAME);
+#endif
+
+	/* Adding barrier to make sure previous rester writes is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index)
+{
+	if (mali_global_num_pp_cores > index) {
+		return mali_global_pp_cores[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_pp_get_glob_num_pp_cores(void)
+{
+	return mali_global_num_pp_cores;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_pp_irq_probe_trigger(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_BUS_ERROR);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS);
+	if (MALI200_REG_VAL_IRQ_BUS_ERROR & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_BUS_ERROR);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+#if 0
+static void mali_pp_print_registers(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_VERSION = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_RAWSTAT = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_MASK = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE)));
+}
+#endif
+
+#if 0
+void mali_pp_print_state(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: State: 0x%08x\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+}
+#endif
+
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, subjob);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, subjob);
+#if defined(CONFIG_MALI400_PROFILING)
+	int counter_index = COUNTER_FP_0_C0 + (2 * child->core_id);
+#endif
+
+	if (counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		val0 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_pp_job_set_perf_counter_value0(job, subjob, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index, val0);
+		_mali_osk_profiling_record_global_counters(counter_index, val0);
+#endif
+	}
+
+	if (counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		val1 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_pp_job_set_perf_counter_value1(job, subjob, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index + 1, val1);
+		_mali_osk_profiling_record_global_counters(counter_index + 1, val1);
+#endif
+	}
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tPP #%d: %s\n", core->core_id, core->hw_core.description);
+
+	return n;
+}
+#endif
diff --git a/utgard/r7p0/common/mali_pp.h b/utgard/r7p0/common/mali_pp.h
new file mode 100644
index 0000000..99a4bed
--- /dev/null
+++ b/utgard/r7p0/common/mali_pp.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_H__
+#define __MALI_PP_H__
+
+#include "mali_osk.h"
+#include "mali_pp_job.h"
+#include "mali_hw_core.h"
+
+struct mali_group;
+
+#define MALI_MAX_NUMBER_OF_PP_CORES        9
+
+/**
+ * Definition of the PP core struct
+ * Used to track a PP core in the system.
+ */
+struct mali_pp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+	u32                  core_id;           /**< Unique core ID */
+	u32                  bcast_id;          /**< The "flag" value used by the Mali-450 broadcast and DLBU unit */
+};
+
+_mali_osk_errcode_t mali_pp_initialize(void);
+void mali_pp_terminate(void);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id);
+void mali_pp_delete(struct mali_pp_core *core);
+
+void mali_pp_stop_bus(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core);
+void mali_pp_reset_async(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core);
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual);
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core);
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->core_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_bcast_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->bcast_id;
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index);
+u32 mali_pp_get_glob_num_pp_cores(void);
+
+/* Debug */
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size);
+
+/**
+ * Put instrumented HW counters from the core(s) to the job object (if enabled)
+ *
+ * parent and child is always the same, except for virtual jobs on Mali-450.
+ * In this case, the counters will be enabled on the virtual core (parent),
+ * but values need to be read from the child cores.
+ *
+ * @param parent The core used to see if the counters was enabled
+ * @param child The core to actually read the values from
+ * @job Job object to update with counter values (if enabled)
+ * @subjob Which subjob the counters are applicable for (core ID for virtual jobs)
+ */
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob);
+
+MALI_STATIC_INLINE const char *mali_pp_core_description(struct mali_pp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_pp_get_interrupt_result(struct mali_pp_core *core)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT) &
+			   MALI200_REG_VAL_IRQ_MASK_USED;
+	if (rawstat_used == 0) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if (rawstat_used == MALI200_REG_VAL_IRQ_END_OF_FRAME) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_get_rawstat(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+
+MALI_STATIC_INLINE u32 mali_pp_is_active(struct mali_pp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+
+	return (status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_mask_all_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_pp_enable_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+}
+
+MALI_STATIC_INLINE void mali_pp_write_addr_renderer_list(struct mali_pp_core *core,
+		struct mali_pp_job *job, u32 subjob)
+{
+	u32 addr = mali_pp_job_get_addr_frame(job, subjob);
+
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_FRAME, addr);
+}
+
+
+MALI_STATIC_INLINE void mali_pp_write_addr_stack(struct mali_pp_core *core, struct mali_pp_job *job)
+{
+	u32 addr = mali_pp_job_get_addr_stack(job, core->core_id);
+
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_STACK, addr);
+}
+
+#endif /* __MALI_PP_H__ */
diff --git a/utgard/r7p0/common/mali_pp_job.c b/utgard/r7p0/common/mali_pp_job.c
new file mode 100644
index 0000000..a31454a
--- /dev/null
+++ b/utgard/r7p0/common/mali_pp_job.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+#include "mali_memory_swap_alloc.h"
+#include "mali_scheduler.h"
+
+static u32 pp_counter_src0 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 pp_counter_src1 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static _mali_osk_atomic_t pp_counter_per_sub_job_count; /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+static u32 pp_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+static u32 pp_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+
+void mali_pp_job_initialize(void)
+{
+	_mali_osk_atomic_init(&pp_counter_per_sub_job_count, 0);
+}
+
+void mali_pp_job_terminate(void)
+{
+	_mali_osk_atomic_term(&pp_counter_per_sub_job_count);
+}
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session,
+				       _mali_uk_pp_start_job_s __user *uargs, u32 id)
+{
+	struct mali_pp_job *job;
+	u32 perf_counter_flag;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_pp_job));
+	if (job != NULL) {
+
+		_mali_osk_list_init(&job->list);
+		_mali_osk_list_init(&job->session_fb_lookup_list);
+		_mali_osk_atomic_inc(&session->number_of_pp_jobs);
+
+		if (_mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_pp_start_job_s)) != 0) {
+			goto fail;
+		}
+
+		if (job->uargs.num_cores > _MALI_PP_MAX_SUB_JOBS) {
+			MALI_PRINT_ERROR(("Mali PP job: Too many sub jobs specified in job object\n"));
+			goto fail;
+		}
+
+		if (!mali_pp_job_use_no_notification(job)) {
+			job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_FINISHED, sizeof(_mali_uk_pp_job_finished_s));
+			if (job->finished_notification == NULL) goto fail;
+		}
+
+		perf_counter_flag = mali_pp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			u32 sub_job_count = _mali_osk_atomic_read(&pp_counter_per_sub_job_count);
+
+			/* These counters apply for all virtual jobs, and where no per sub job counter is specified */
+			job->uargs.perf_counter_src0 = pp_counter_src0;
+			job->uargs.perf_counter_src1 = pp_counter_src1;
+
+			/* We only copy the per sub job array if it is enabled with at least one counter */
+			if (sub_job_count > 0) {
+				job->perf_counter_per_sub_job_count = sub_job_count;
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src0, pp_counter_per_sub_job_src0, sizeof(pp_counter_per_sub_job_src0));
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src1, pp_counter_per_sub_job_src1, sizeof(pp_counter_per_sub_job_src1));
+			}
+		}
+
+		job->session = session;
+		job->id = id;
+
+		job->sub_jobs_num = job->uargs.num_cores ? job->uargs.num_cores : 1;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+		_mali_osk_atomic_init(&job->sub_jobs_completed, 0);
+		_mali_osk_atomic_init(&job->sub_job_errors, 0);
+		job->swap_status = MALI_NO_SWAP_IN;
+		job->user_notification = MALI_FALSE;
+		job->num_pp_cores_in_virtual = 0;
+
+		if (job->uargs.num_memory_cookies > session->allocation_mgr.mali_allocation_num) {
+			MALI_PRINT_ERROR(("Mali PP job: The number of memory cookies is invalid !\n"));
+			goto fail;
+		}
+
+		if (job->uargs.num_memory_cookies > 0) {
+			u32 size;
+			u32 __user *memory_cookies = (u32 __user *)(uintptr_t)job->uargs.memory_cookies;
+
+			size = sizeof(*memory_cookies) * (job->uargs.num_memory_cookies);
+
+			job->memory_cookies = _mali_osk_malloc(size);
+			if (job->memory_cookies == NULL) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to allocate %d bytes of memory cookies!\n", size));
+				goto fail;
+			}
+
+			if (_mali_osk_copy_from_user(job->memory_cookies, memory_cookies, size) != 0) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to copy %d bytes of memory cookies from user!\n", size));
+				goto fail;
+			}
+		}
+
+		if (mali_pp_job_check(job) != _MALI_OSK_ERR_OK) {
+			/* Not a valid job. */
+			goto fail;
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_PP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		mali_mem_swap_in_pages(job);
+
+		return job;
+	}
+
+fail:
+	if (job != NULL) {
+		mali_pp_job_delete(job);
+	}
+
+	return NULL;
+}
+
+void mali_pp_job_delete(struct mali_pp_job *job)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->session_fb_lookup_list));
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (job->memory_cookies != NULL) {
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+		/* Unmap buffers attached to job */
+		mali_dma_buf_unmap_job(job);
+#endif
+		if (job->swap_status != MALI_NO_SWAP_IN) {
+			mali_mem_swap_out_pages(job);
+		}
+
+		_mali_osk_free(job->memory_cookies);
+	}
+
+	if (job->user_notification) {
+		mali_scheduler_return_pp_job_to_user(job,
+						     job->num_pp_cores_in_virtual);
+	}
+
+	if (job->finished_notification != NULL) {
+		_mali_osk_notification_delete(job->finished_notification);
+	}
+
+	_mali_osk_atomic_term(&job->sub_jobs_completed);
+	_mali_osk_atomic_term(&job->sub_job_errors);
+	_mali_osk_atomic_dec(&session->number_of_pp_jobs);
+	_mali_osk_free(job);
+
+	_mali_osk_wait_queue_wake_up(session->wait_queue);
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_pp_job *iter;
+	struct mali_pp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_pp_job, list) {
+		/* job should be started after iter if iter is in progress. */
+		if (iter->sub_jobs_started > 0) {
+			break;
+		}
+
+		/*
+		 * job should be started after iter if it has a higher
+		 * job id. A span is used to handle job id wrapping.
+		 */
+		if ((mali_pp_job_get_id(job) -
+		     mali_pp_job_get_id(iter)) <
+		    MALI_SCHEDULER_JOB_ID_SPAN) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		return job->uargs.perf_counter_src0;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (job->perf_counter_per_sub_job_src0[sub_job] != MALI_HW_CORE_NO_COUNTER) {
+		return job->perf_counter_per_sub_job_src0[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src0;
+}
+
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		/* Virtual jobs always use the global job counter */
+		return job->uargs.perf_counter_src1;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (job->perf_counter_per_sub_job_src1[sub_job] != MALI_HW_CORE_NO_COUNTER) {
+		return job->perf_counter_per_sub_job_src1[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src1;
+}
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter)
+{
+	pp_counter_src0 = counter;
+}
+
+void mali_pp_job_set_pp_counter_global_src1(u32 counter)
+{
+	pp_counter_src1 = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (pp_counter_per_sub_job_src0[sub_job] == MALI_HW_CORE_NO_COUNTER) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (counter == MALI_HW_CORE_NO_COUNTER) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src0[sub_job] = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (pp_counter_per_sub_job_src1[sub_job] == MALI_HW_CORE_NO_COUNTER) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (counter == MALI_HW_CORE_NO_COUNTER) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src1[sub_job] = counter;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src0(void)
+{
+	return pp_counter_src0;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src1(void)
+{
+	return pp_counter_src1;
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src0[sub_job];
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src1[sub_job];
+}
diff --git a/utgard/r7p0/common/mali_pp_job.h b/utgard/r7p0/common/mali_pp_job.h
new file mode 100644
index 0000000..630087a
--- /dev/null
+++ b/utgard/r7p0/common/mali_pp_job.h
@@ -0,0 +1,591 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_JOB_H__
+#define __MALI_PP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_core.h"
+#include "mali_dlbu.h"
+#include "mali_timeline.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#include "linux/mali_dma_fence.h"
+#include <linux/fence.h>
+#endif
+
+typedef enum pp_job_status {
+	MALI_NO_SWAP_IN,
+	MALI_SWAP_IN_FAIL,
+	MALI_SWAP_IN_SUCC,
+} pp_job_status;
+
+/**
+ * This structure represents a PP job, including all sub jobs.
+ *
+ * The PP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the PP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_pp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_pp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+	u32 perf_counter_per_sub_job_count;                /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+	u32 perf_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src0 */
+	u32 perf_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src1 */
+	u32 sub_jobs_num;                                  /**< Number of subjobs; set to 1 for Mali-450 if DLBU is used, otherwise equals number of PP cores */
+
+	pp_job_status swap_status;                         /**< Used to track each PP job swap status, if fail, we need to drop them in scheduler part */
+	mali_bool user_notification;                       /**< When we deferred delete PP job, we need to judge if we need to send job finish notification to user space */
+	u32 num_pp_cores_in_virtual;                       /**< How many PP cores we have when job finished */
+
+	/*
+	 * These members are used by both scheduler and executor.
+	 * They are "protected" by atomic operations.
+	 */
+	_mali_osk_atomic_t sub_jobs_completed;                            /**< Number of completed sub-jobs in this superjob */
+	_mali_osk_atomic_t sub_job_errors;                                /**< Bitfield with errors (errors for each single sub-job is or'ed together) */
+
+	/*
+	 * These members are used by scheduler, but only when no one else
+	 * knows about this job object but the working function.
+	 * No lock is thus needed for these.
+	 */
+	u32 *memory_cookies;                               /**< Memory cookies attached to job */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+	_mali_osk_list_t session_fb_lookup_list;           /**< Used to link jobs together from the same frame builder in the session */
+
+	u32 sub_jobs_started;                              /**< Total number of sub-jobs started (always started in ascending order) */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 perf_counter_value0[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 0 (to be returned to user space), one for each sub job */
+	u32 perf_counter_value1[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 1 (to be returned to user space), one for each sub job */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	struct mali_dma_fence_context dma_fence_context; /**< The mali dma fence context to record dma fence waiters that this job wait for */
+	struct fence *rendered_dma_fence; /**< the new dma fence link to this job */
+#endif
+};
+
+void mali_pp_job_initialize(void);
+void mali_pp_job_terminate(void);
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, _mali_uk_pp_start_job_s *uargs, u32 id);
+void mali_pp_job_delete(struct mali_pp_job *job);
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job);
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job);
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter);
+void mali_pp_job_set_pp_counter_global_src1(u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter);
+
+u32 mali_pp_job_get_pp_counter_global_src0(void);
+u32 mali_pp_job_get_pp_counter_global_src1(void);
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job);
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job);
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job == NULL) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_cache_order(struct mali_pp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_cache_order(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job == NULL) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_pp_job_get_user_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_frame_builder_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_flush_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_pid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_tid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_frame_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_dlbu_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.dlbu_registers;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_virtual(struct mali_pp_job *job)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.num_cores == 0) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_frame(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (mali_pp_job_is_virtual(job)) {
+		return MALI_DLBU_VIRT_ADDR;
+	} else if (sub_job == 0) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_FRAME / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_frame[sub_job - 1];
+	}
+
+	return 0;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_stack(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (sub_job == 0) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_STACK / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_stack[sub_job - 1];
+	}
+
+	return 0;
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_pp_job_list_addtail(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	_mali_osk_list_addtail(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_move(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb0_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb1_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb2_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb0_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb1_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb2_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb0(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb1(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb2(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_all_writeback_unit_disabled(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT]
+	   ) {
+		/* At least one output unit active */
+		return MALI_FALSE;
+	}
+
+	/* All outputs are disabled - we can abort the job */
+	return MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_add(struct mali_pp_job *job)
+{
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	fb_lookup_id = MALI_PP_JOB_FB_LOOKUP_LIST_MASK & job->uargs.frame_builder_id;
+
+	MALI_DEBUG_ASSERT(fb_lookup_id < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE);
+
+	_mali_osk_list_addtail(&job->session_fb_lookup_list,
+			       &job->session->pp_job_fb_lookup_list[fb_lookup_id]);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->session_fb_lookup_list);
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_pp_job_get_session(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_started_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (job->sub_jobs_started > 0) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_unstarted_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (job->sub_jobs_started < job->sub_jobs_num) ? MALI_TRUE : MALI_FALSE;
+}
+
+/* Function used when we are terminating a session with jobs. Return TRUE if it has a rendering job.
+   Makes sure that no new subjobs are started. */
+MALI_STATIC_INLINE void mali_pp_job_mark_unstarted_failed(struct mali_pp_job *job)
+{
+	u32 jobs_remaining;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	jobs_remaining = job->sub_jobs_num - job->sub_jobs_started;
+	job->sub_jobs_started += jobs_remaining;
+
+	/* Not the most optimal way, but this is only used in error cases */
+	for (i = 0; i < jobs_remaining; i++) {
+		_mali_osk_atomic_inc(&job->sub_jobs_completed);
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_complete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->sub_jobs_num ==
+		_mali_osk_atomic_read(&job->sub_jobs_completed)) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_first_unstarted_sub_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return job->sub_jobs_started;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->sub_jobs_num;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_unstarted_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(job->sub_jobs_num >= job->sub_jobs_started);
+	return (job->sub_jobs_num - job->sub_jobs_started);
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_num_memory_cookies(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.num_memory_cookies;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_memory_cookie(
+	struct mali_pp_job *job, u32 index)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(index < job->uargs.num_memory_cookies);
+	MALI_DEBUG_ASSERT_POINTER(job->memory_cookies);
+	return job->memory_cookies[index];
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_needs_dma_buf_mapping(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (job->uargs.num_memory_cookies > 0) {
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_started(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Assert that we are marking the "first unstarted sub job" as started */
+	MALI_DEBUG_ASSERT(job->sub_jobs_started == sub_job);
+
+	job->sub_jobs_started++;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_completed(struct mali_pp_job *job, mali_bool success)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_atomic_inc(&job->sub_jobs_completed);
+	if (success == MALI_FALSE) {
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_was_success(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (_mali_osk_atomic_read(&job->sub_job_errors) == 0) {
+		return MALI_TRUE;
+	}
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_use_no_notification(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_NO_NOTIFICATION) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_pilot_job(struct mali_pp_job *job)
+{
+	/*
+	 * A pilot job is currently identified as jobs which
+	 * require no callback notification.
+	 */
+	return mali_pp_job_use_no_notification(job);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_pp_job_get_finished_notification(struct mali_pp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_window_surface(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE)
+	       ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_protected_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_PROTECTED)
+	       ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_flag(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value0(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0[sub_job];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value1(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1[sub_job];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value0(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0[sub_job] = value;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value1(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1[sub_job] = value;
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_pp_job_check(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (mali_pp_job_is_virtual(job) && job->sub_jobs_num != 1) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Returns MALI_TRUE if this job has more than two sub jobs and all sub jobs are unstarted.
+ *
+ * @param job Job to check.
+ * @return MALI_TRUE if job has more than two sub jobs and all sub jobs are unstarted, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_large_and_unstarted(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_pp_job_is_virtual(job));
+
+	return (0 == job->sub_jobs_started && 2 < job->sub_jobs_num);
+}
+
+/**
+ * Get PP job's Timeline tracker.
+ *
+ * @param job PP job.
+ * @return Pointer to Timeline tracker for the job.
+ */
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_pp_job_get_tracker(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_timeline_point_ptr(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+#endif /* __MALI_PP_JOB_H__ */
diff --git a/utgard/r7p0/common/mali_scheduler.c b/utgard/r7p0/common/mali_scheduler.c
new file mode 100644
index 0000000..68afc59
--- /dev/null
+++ b/utgard/r7p0/common/mali_scheduler.c
@@ -0,0 +1,1551 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_profiling.h"
+#include "mali_kernel_utilization.h"
+#include "mali_timeline.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+#include "mali_group.h"
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include "mali_pm_metrics.h"
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+#include "mali_dma_fence.h"
+#include <linux/dma-buf.h>
+#endif
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+/*
+ * ---------- static defines/constants ----------
+ */
+
+/*
+ * If dma_buf with map on demand is used, we defer job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif
+#endif
+
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock protecting this module */
+_mali_osk_spinlock_irq_t *mali_scheduler_lock_obj;
+
+/* Queue of jobs to be executed on the GP group */
+struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+struct mali_scheduler_job_queue job_queue_pp;
+
+_mali_osk_atomic_t mali_job_id_autonumber;
+_mali_osk_atomic_t mali_job_cache_order_autonumber;
+/*
+ * ---------- static variables ----------
+ */
+
+_mali_osk_wq_work_t *scheduler_wq_pp_job_delete;
+_mali_osk_spinlock_irq_t *scheduler_pp_job_delete_lock;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_deletion_queue);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static _mali_osk_wq_work_t *scheduler_wq_pp_job_queue;
+static _mali_osk_spinlock_irq_t *scheduler_pp_job_queue_lock;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_queue_list);
+#endif
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job);
+static _mali_osk_errcode_t mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job, mali_timeline_point *point);
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job);
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job);
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success);
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job);
+void mali_scheduler_do_pp_job_delete(void *arg);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job);
+static void mali_scheduler_do_pp_job_queue(void *arg);
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_scheduler_initialize(void)
+{
+	_mali_osk_atomic_init(&mali_job_id_autonumber, 0);
+	_mali_osk_atomic_init(&mali_job_cache_order_autonumber, 0);
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.high_pri);
+	job_queue_gp.depth = 0;
+	job_queue_gp.big_job_num = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.high_pri);
+	job_queue_pp.depth = 0;
+	job_queue_pp.big_job_num = 0;
+
+	mali_scheduler_lock_obj = _mali_osk_spinlock_irq_init(
+					  _MALI_OSK_LOCKFLAG_ORDERED,
+					  _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (mali_scheduler_lock_obj == NULL) {
+		mali_scheduler_terminate();
+	}
+
+	scheduler_wq_pp_job_delete = _mali_osk_wq_create_work(
+					     mali_scheduler_do_pp_job_delete, NULL);
+	if (scheduler_wq_pp_job_delete == NULL) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_delete_lock = _mali_osk_spinlock_irq_init(
+					       _MALI_OSK_LOCKFLAG_ORDERED,
+					       _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (scheduler_pp_job_delete_lock == NULL) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	scheduler_wq_pp_job_queue = _mali_osk_wq_create_work(
+					    mali_scheduler_do_pp_job_queue, NULL);
+	if (scheduler_wq_pp_job_queue == NULL) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_queue_lock = _mali_osk_spinlock_irq_init(
+					      _MALI_OSK_LOCKFLAG_ORDERED,
+					      _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (scheduler_pp_job_queue_lock == NULL) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_scheduler_terminate(void)
+{
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (scheduler_pp_job_queue_lock != NULL) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_queue_lock);
+		scheduler_pp_job_queue_lock = NULL;
+	}
+
+	if (scheduler_wq_pp_job_queue != NULL) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_queue);
+		scheduler_wq_pp_job_queue = NULL;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	if (scheduler_pp_job_delete_lock != NULL) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_delete_lock);
+		scheduler_pp_job_delete_lock = NULL;
+	}
+
+	if (scheduler_wq_pp_job_delete != NULL) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_delete);
+		scheduler_wq_pp_job_delete = NULL;
+	}
+
+	if (mali_scheduler_lock_obj != NULL) {
+		_mali_osk_spinlock_irq_term(mali_scheduler_lock_obj);
+		mali_scheduler_lock_obj = NULL;
+	}
+
+	_mali_osk_atomic_term(&mali_job_cache_order_autonumber);
+	_mali_osk_atomic_term(&mali_job_id_autonumber);
+}
+
+u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure)
+{
+	/*
+	 * Count how many physical sub jobs are present from the head of queue
+	 * until the first virtual job is present.
+	 * Early out when we have reached maximum number of PP cores (8)
+	 */
+	u32 count = 0;
+	struct mali_pp_job *job;
+	struct mali_pp_job *temp;
+
+	/* Check for partially started normal pri jobs */
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(job_queue_pp.depth > 0);
+
+		job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					   struct mali_pp_job, list);
+
+		MALI_DEBUG_ASSERT_POINTER(job);
+
+		if (mali_pp_job_has_started_sub_jobs(job) == MALI_TRUE) {
+			/*
+			 * Remember; virtual jobs can't be queued and started
+			 * at the same time, so this must be a physical job
+			 */
+			if ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			    || (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job))) {
+
+				count += mali_pp_job_unstarted_sub_job_count(job);
+				if (count >= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS) {
+					return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+				}
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if ((mali_pp_job_is_virtual(job) == MALI_FALSE)
+		    && ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			|| (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) {
+
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (count >= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if ((mali_pp_job_is_virtual(job) == MALI_FALSE)
+		    && (mali_pp_job_has_started_sub_jobs(job) == MALI_FALSE)
+		    && ((MALI_FALSE  == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job))
+			|| (MALI_TRUE  == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) {
+
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (count >= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+	return count;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_next(void)
+{
+	struct mali_pp_job *job;
+	struct mali_pp_job *temp;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	/* Check for partially started normal pri jobs */
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(job_queue_pp.depth > 0);
+
+		job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					   struct mali_pp_job, list);
+
+		MALI_DEBUG_ASSERT_POINTER(job);
+
+		if (mali_pp_job_has_started_sub_jobs(job) == MALI_TRUE) {
+			return job;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		return job;
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		return job;
+	}
+
+	return NULL;
+}
+
+mali_bool mali_scheduler_job_next_is_virtual(void)
+{
+	struct mali_pp_job *job;
+
+	job = mali_scheduler_job_pp_virtual_peek();
+	if (job != NULL) {
+		MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void)
+{
+	_mali_osk_list_t *queue;
+	struct mali_gp_job *job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT(job_queue_gp.depth > 0);
+	MALI_DEBUG_ASSERT(job_queue_gp.big_job_num <= job_queue_gp.depth);
+
+	if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+		MALI_DEBUG_ASSERT(!_mali_osk_list_empty(queue));
+	}
+
+	job = _MALI_OSK_LIST_ENTRY(queue->next, struct mali_gp_job, list);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_gp_job_list_remove(job);
+	job_queue_gp.depth--;
+	if (job->big_job) {
+		job_queue_gp.big_job_num--;
+		if (job_queue_gp.big_job_num < MALI_MAX_PENDING_BIG_JOB) {
+			/* wake up process */
+			wait_queue_head_t *queue = mali_session_get_wait_queue();
+
+			wake_up(queue);
+		}
+	}
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	/*
+	 * For PP jobs we favour partially started jobs in normal
+	 * priority queue over unstarted jobs in high priority queue
+	 */
+
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(job_queue_pp.depth > 0);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					       struct mali_pp_job, list);
+		MALI_DEBUG_ASSERT(tmp_job != NULL);
+
+		if (mali_pp_job_is_virtual(tmp_job) == MALI_FALSE) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job ||
+	    MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) {
+		/*
+		 * There isn't a partially started job in normal queue, so
+		 * look in high priority queue.
+		 */
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			MALI_DEBUG_ASSERT(job_queue_pp.depth > 0);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+						       struct mali_pp_job, list);
+			MALI_DEBUG_ASSERT(tmp_job != NULL);
+
+			if (mali_pp_job_is_virtual(tmp_job) == MALI_FALSE) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+		MALI_DEBUG_ASSERT(job_queue_pp.depth > 0);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+					       struct mali_pp_job, list);
+
+		if (mali_pp_job_is_virtual(tmp_job) == MALI_TRUE) {
+			job = tmp_job;
+		}
+	}
+
+	if (job == NULL) {
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			MALI_DEBUG_ASSERT(job_queue_pp.depth > 0);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+						       struct mali_pp_job, list);
+
+			if (mali_pp_job_is_virtual(tmp_job) == MALI_TRUE) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_physical_peek();
+
+	MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job) == MALI_FALSE);
+
+	if (job != NULL) {
+		*sub_job = mali_pp_job_get_first_unstarted_sub_job(job);
+
+		mali_pp_job_mark_sub_job_started(job, *sub_job);
+		if (mali_pp_job_has_unstarted_sub_jobs(job) == MALI_FALSE) {
+			/* Remove from queue when last sub job has been retrieved */
+			mali_pp_job_list_remove(job);
+		}
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_virtual_peek();
+
+	MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job) == MALI_TRUE);
+
+	if (job != NULL) {
+		MALI_DEBUG_ASSERT(0 ==
+				  mali_pp_job_get_first_unstarted_sub_job(job));
+		MALI_DEBUG_ASSERT(1 ==
+				  mali_pp_job_get_sub_job_count(job));
+
+		mali_pp_job_mark_sub_job_started(job, 0);
+
+		mali_pp_job_list_remove(job);
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_gp_job_get_id(job), job));
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_gp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(job));
+		mali_gp_job_signal_pp_tracker(job, MALI_FALSE);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, MALI_FALSE,
+					       MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+
+	return MALI_SCHEDULER_MASK_GP;
+}
+
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_pp_job_get_id(job), job));
+
+	if (MALI_TRUE == mali_timeline_tracker_activation_error(
+		    mali_pp_job_get_tracker(job))) {
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Job %u (0x%08X) activated with error, aborting.\n",
+				     mali_pp_job_get_id(job), job));
+
+		mali_scheduler_lock();
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (mali_pp_job_needs_dma_buf_mapping(job)) {
+		mali_scheduler_deferred_pp_job_queue(job);
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_pp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+	return MALI_SCHEDULER_MASK_PP;
+}
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	if (user_notification) {
+		mali_scheduler_return_gp_job_to_user(job, success);
+	}
+
+	if (dequeued) {
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_gp_end();
+		}
+		mali_pm_record_gpu_idle(MALI_TRUE);
+	}
+
+	mali_gp_job_delete(job);
+}
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	job->user_notification = user_notification;
+	job->num_pp_cores_in_virtual = num_cores_in_virtual;
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	if (job->rendered_dma_fence != NULL)
+		mali_dma_fence_signal_and_put(&job->rendered_dma_fence);
+#endif
+
+	if (dequeued) {
+#if defined(CONFIG_MALI_DVFS)
+		if (mali_pp_job_is_window_surface(job)) {
+			struct mali_session_data *session;
+
+			session = mali_pp_job_get_session(job);
+			mali_session_inc_num_window_jobs(session);
+		}
+#endif
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_pp_end();
+		}
+		mali_pm_record_gpu_idle(MALI_FALSE);
+	}
+
+	/* With ZRAM feature enabled, all pp jobs will be force to use deferred delete. */
+	mali_scheduler_deferred_pp_job_delete(job);
+}
+
+void mali_scheduler_abort_session(struct mali_session_data *session)
+{
+	struct mali_gp_job *gp_job;
+	struct mali_gp_job *gp_tmp;
+	struct mali_pp_job *pp_job;
+	struct mali_pp_job *pp_tmp;
+
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_gp);
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_pp);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali scheduler: Aborting all queued jobs from session 0x%08X.\n",
+			     session));
+
+	mali_scheduler_lock();
+
+	/* Remove from GP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.normal_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+			job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0;
+		}
+	}
+
+	/* Remove from GP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.high_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+			job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0;
+		}
+	}
+
+	/* Remove from PP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (mali_pp_job_has_unstarted_sub_jobs(pp_job) == MALI_FALSE) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							      &removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/* Remove from PP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (mali_pp_job_has_unstarted_sub_jobs(pp_job) == MALI_FALSE) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							      &removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/*
+	 * Release scheduler lock so we can release trackers
+	 * (which will potentially queue new jobs)
+	 */
+	mali_scheduler_unlock();
+
+	/* Release and complete all (non-running) found GP jobs  */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &removed_jobs_gp,
+				    struct mali_gp_job, list) {
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(gp_job));
+		mali_gp_job_signal_pp_tracker(gp_job, MALI_FALSE);
+		_mali_osk_list_delinit(&gp_job->list);
+		mali_scheduler_complete_gp_job(gp_job,
+					       MALI_FALSE, MALI_FALSE, MALI_TRUE);
+	}
+
+	/* Release and complete non-running PP jobs */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, &removed_jobs_pp,
+				    struct mali_pp_job, list) {
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(pp_job));
+		_mali_osk_list_delinit(&pp_job->list);
+		mali_scheduler_complete_pp_job(pp_job, 0,
+					       MALI_FALSE, MALI_TRUE);
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx,
+		_mali_uk_gp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_gp_job_create(session, uargs, mali_scheduler_get_new_id(),
+				 NULL);
+	if (job == NULL) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_gp_job_get_timeline_point_ptr(job);
+
+	point = mali_scheduler_submit_gp_job(session, job);
+
+	if (_mali_osk_put_user(((u32) point), point_ptr) != 0) {
+		/*
+		 * Let user space know that something failed
+		 * after the job was started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx,
+		_mali_uk_pp_start_job_s *uargs)
+{
+	_mali_osk_errcode_t ret;
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_pp_job_create(session, uargs, mali_scheduler_get_new_id());
+	if (job == NULL) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(job);
+
+	/* Submit PP job. */
+	ret = mali_scheduler_submit_pp_job(session, job, &point);
+	job = NULL;
+
+	if (ret == _MALI_OSK_ERR_OK) {
+		if (_mali_osk_put_user(((u32) point), point_ptr) != 0) {
+			/*
+			* Let user space know that something failed
+			* after the jobs were started.
+			*/
+			return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+		}
+	}
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx,
+		_mali_uk_pp_and_gp_start_job_s *uargs)
+{
+	_mali_osk_errcode_t ret;
+	struct mali_session_data *session;
+	_mali_uk_pp_and_gp_start_job_s kargs;
+	struct mali_pp_job *pp_job;
+	struct mali_gp_job *gp_job;
+	u32 __user *point_ptr = NULL;
+	mali_timeline_point point;
+	_mali_uk_pp_start_job_s __user *pp_args;
+	_mali_uk_gp_start_job_s __user *gp_args;
+
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+
+	session = (struct mali_session_data *) ctx;
+
+	if (0 != _mali_osk_copy_from_user(&kargs, uargs,
+					  sizeof(_mali_uk_pp_and_gp_start_job_s))) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	pp_args = (_mali_uk_pp_start_job_s __user *)(uintptr_t)kargs.pp_args;
+	gp_args = (_mali_uk_gp_start_job_s __user *)(uintptr_t)kargs.gp_args;
+
+	pp_job = mali_pp_job_create(session, pp_args,
+				    mali_scheduler_get_new_id());
+	if (pp_job == NULL) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	gp_job = mali_gp_job_create(session, gp_args,
+				    mali_scheduler_get_new_id(),
+				    mali_pp_job_get_tracker(pp_job));
+	if (gp_job == NULL) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		mali_pp_job_delete(pp_job);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(pp_job);
+
+	/* Submit GP job. */
+	mali_scheduler_submit_gp_job(session, gp_job);
+	gp_job = NULL;
+
+	/* Submit PP job. */
+	ret = mali_scheduler_submit_pp_job(session, pp_job, &point);
+	pp_job = NULL;
+
+	if (ret == _MALI_OSK_ERR_OK) {
+		if (_mali_osk_put_user(((u32) point), point_ptr) != 0) {
+			/*
+			* Let user space know that something failed
+			* after the jobs were started.
+			*/
+			return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+		}
+	}
+
+	return ret;
+}
+
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	fb_lookup_id = args->fb_id & MALI_PP_JOB_FB_LOOKUP_LIST_MASK;
+
+	mali_scheduler_lock();
+
+	/* Iterate over all jobs for given frame builder_id. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp,
+				    &session->pp_job_fb_lookup_list[fb_lookup_id],
+				    struct mali_pp_job, session_fb_lookup_list) {
+		MALI_DEBUG_CODE(u32 disable_mask = 0);
+
+		if (mali_pp_job_get_frame_builder_id(job) !=
+		    (u32) args->fb_id) {
+			MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Disable WB mismatching FB.\n"));
+			continue;
+		}
+
+		MALI_DEBUG_CODE(disable_mask |= 0xD << (4 * 3));
+
+		if (mali_pp_job_get_wb0_source_addr(job) == args->wb0_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x1 << (4 * 1));
+			mali_pp_job_disable_wb0(job);
+		}
+
+		if (mali_pp_job_get_wb1_source_addr(job) == args->wb1_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x2 << (4 * 2));
+			mali_pp_job_disable_wb1(job);
+		}
+
+		if (mali_pp_job_get_wb2_source_addr(job) == args->wb2_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x3 << (4 * 3));
+			mali_pp_job_disable_wb2(job);
+		}
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Disable WB: 0x%X.\n",
+				     disable_mask));
+	}
+
+	mali_scheduler_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "GP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_gp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.normal_pri) ?
+				"empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.high_pri) ?
+				"empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"PP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_pp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.normal_pri)
+				? "empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.high_pri)
+				? "empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job)
+{
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Add job to Timeline system. */
+	point = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_gp_job_get_tracker(job), MALI_TIMELINE_GP);
+
+	return point;
+}
+
+static _mali_osk_errcode_t mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job, mali_timeline_point *point)
+
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	struct ww_acquire_ctx ww_actx;
+	u32 i;
+	u32 num_memory_cookies = 0;
+	struct reservation_object **reservation_object_list = NULL;
+	unsigned int num_reservation_object = 0;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_scheduler_lock();
+	/*
+	 * Adding job to the lookup list used to quickly discard
+	 * writeback units of queued jobs.
+	 */
+	mali_pp_job_fb_lookup_add(job);
+	mali_scheduler_unlock();
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+
+	/* Allocate the reservation_object_list to list the dma reservation object of dependent dma buffer */
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	if (num_memory_cookies > 0) {
+		reservation_object_list = kcalloc(num_memory_cookies, sizeof(struct reservation_object *), GFP_KERNEL);
+		if (reservation_object_list == NULL) {
+			MALI_PRINT_ERROR(("Failed to alloc the reservation object list.\n"));
+			ret = _MALI_OSK_ERR_NOMEM;
+			goto failed_to_alloc_reservation_object_list;
+		}
+	}
+
+	/* Add the dma reservation object into reservation_object_list*/
+	for (i = 0; i < num_memory_cookies; i++) {
+		mali_mem_backend *mem_backend = NULL;
+		struct reservation_object *tmp_reservation_object = NULL;
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		mem_backend = mali_mem_backend_struct_search(session, mali_addr);
+
+		MALI_DEBUG_ASSERT_POINTER(mem_backend);
+
+		if (mem_backend == NULL) {
+			MALI_PRINT_ERROR(("Failed to find the memory backend for memory cookie[%d].\n", i));
+			goto failed_to_find_mem_backend;
+		}
+
+		if (mem_backend->type != MALI_MEM_DMA_BUF)
+			continue;
+
+		tmp_reservation_object = mem_backend->dma_buf.attachment->buf->resv;
+
+		if (tmp_reservation_object != NULL) {
+			mali_dma_fence_add_reservation_object_list(tmp_reservation_object,
+					reservation_object_list, &num_reservation_object);
+		}
+	}
+
+	/*
+	 * Add the mali dma fence callback to wait for all dependent dma buf,
+	 * and extend the timeline system to support dma fence,
+	 * then create the new internal dma fence to replace all last dma fence for dependent dma buf.
+	 */
+	if (num_reservation_object > 0) {
+		int error;
+		int num_dma_fence_waiter = 0;
+		/* Create one new dma fence.*/
+		job->rendered_dma_fence = mali_dma_fence_new(job->session->fence_context,
+					  _mali_osk_atomic_inc_return(&job->session->fence_seqno));
+
+		if (job->rendered_dma_fence == NULL) {
+			MALI_PRINT_ERROR(("Failed to creat one new dma fence.\n"));
+			ret = _MALI_OSK_ERR_FAULT;
+			goto failed_to_create_dma_fence;
+		}
+
+		/* In order to avoid deadlock, wait/wound mutex lock to lock all dma buffers*/
+
+		error = mali_dma_fence_lock_reservation_object_list(reservation_object_list,
+				num_reservation_object, &ww_actx);
+
+		if (error != 0) {
+			MALI_PRINT_ERROR(("Failed to lock all reservation objects.\n"));
+			ret = _MALI_OSK_ERR_FAULT;
+			goto failed_to_lock_reservation_object_list;
+		}
+
+		mali_dma_fence_context_init(&job->dma_fence_context,
+					    mali_timeline_dma_fence_callback, (void *)job);
+
+		/* Add dma fence waiters and dma fence callback. */
+		for (i = 0; i < num_reservation_object; i++) {
+			ret = mali_dma_fence_context_add_waiters(&job->dma_fence_context, reservation_object_list[i]);
+			if (ret != _MALI_OSK_ERR_OK) {
+				MALI_PRINT_ERROR(("Failed to add waiter into mali dma fence context.\n"));
+				goto failed_to_add_dma_fence_waiter;
+			}
+		}
+
+		for (i = 0; i < num_reservation_object; i++) {
+			reservation_object_add_excl_fence(reservation_object_list[i], job->rendered_dma_fence);
+		}
+
+		num_dma_fence_waiter = job->dma_fence_context.num_dma_fence_waiter;
+
+		/* Add job to Timeline system. */
+		(*point) = mali_timeline_system_add_tracker(session->timeline_system,
+				mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+
+		if (num_dma_fence_waiter != 0) {
+			mali_dma_fence_context_dec_count(&job->dma_fence_context);
+		}
+
+		/* Unlock all wait/wound mutex lock. */
+		mali_dma_fence_unlock_reservation_object_list(reservation_object_list,
+				num_reservation_object, &ww_actx);
+	} else {
+		/* Add job to Timeline system. */
+		(*point) = mali_timeline_system_add_tracker(session->timeline_system,
+				mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+	}
+
+	kfree(reservation_object_list);
+	return ret;
+#else
+	/* Add job to Timeline system. */
+	(*point) = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+#endif
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+failed_to_add_dma_fence_waiter:
+	mali_dma_fence_context_term(&job->dma_fence_context);
+	mali_dma_fence_unlock_reservation_object_list(reservation_object_list,
+			num_reservation_object, &ww_actx);
+failed_to_lock_reservation_object_list:
+	mali_dma_fence_signal_and_put(&job->rendered_dma_fence);
+failed_to_create_dma_fence:
+failed_to_find_mem_backend:
+	if (reservation_object_list != NULL)
+		kfree(reservation_object_list);
+failed_to_alloc_reservation_object_list:
+	mali_pp_job_fb_lookup_remove(job);
+#endif
+	return ret;
+}
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_gp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	}
+
+	mali_gp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	/* Determine which queue the job should be added to. */
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+	}
+
+	job_queue_gp.depth += 1;
+	job_queue_gp.big_job_num += (job->big_job) ? 1 : 0;
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_gp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the GP as busy from the
+		 * time a GP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_gp_start();
+	}
+
+	mali_pm_record_gpu_active(MALI_TRUE);
+
+	/* Add profiling events for job enqueued */
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE,
+		mali_gp_job_get_pid(job),
+		mali_gp_job_get_tid(job),
+		mali_gp_job_get_frame_builder_id(job),
+		mali_gp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_gp_job_get_tid(job),
+			      mali_gp_job_get_id(job), "GP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali GP scheduler: Job %u (0x%08X) queued\n",
+			     mali_gp_job_get_id(job), job));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue = NULL;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	} else if (unlikely(job->swap_status == MALI_SWAP_IN_FAIL)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while swap in failed.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE;
+	}
+
+	mali_pp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_pp.high_pri;
+	} else {
+		queue = &job_queue_pp.normal_pri;
+	}
+
+	job_queue_pp.depth +=
+		mali_pp_job_get_sub_job_count(job);
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_pp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the PP as busy from the
+		 * time a PP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_pp_start();
+	}
+
+	mali_pm_record_gpu_active(MALI_FALSE);
+
+	/* Add profiling events for job enqueued */
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE,
+		mali_pp_job_get_pid(job),
+		mali_pp_job_get_tid(job),
+		mali_pp_job_get_frame_builder_id(job),
+		mali_pp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_pp_job_get_tid(job),
+			      mali_pp_job_get_id(job), "PP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali PP scheduler: %s job %u (0x%08X) with %u parts queued.\n",
+			     mali_pp_job_is_virtual(job)
+			     ? "Virtual" : "Physical",
+			     mali_pp_job_get_id(job), job,
+			     mali_pp_job_get_sub_job_count(job)));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success)
+{
+	_mali_uk_gp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_gp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->pending_big_job_num = mali_scheduler_job_gp_big_job_count();
+
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	if (success == MALI_TRUE) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+	jobres->heap_current_addr = mali_gp_job_get_current_heap_addr(job);
+	jobres->perf_counter0 = mali_gp_job_get_perf_counter_value0(job);
+	jobres->perf_counter1 = mali_gp_job_get_perf_counter_value1(job);
+
+	mali_session_send_notification(session, notification);
+}
+
+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual)
+{
+	u32 i;
+	u32 num_counters_to_copy;
+	_mali_uk_pp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	if (mali_pp_job_use_no_notification(job) == MALI_TRUE) {
+		return;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_pp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->user_job_ptr = mali_pp_job_get_user_id(job);
+	if (mali_pp_job_was_success(job) == MALI_TRUE) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+
+	if (mali_pp_job_is_virtual(job)) {
+		num_counters_to_copy = num_cores_in_virtual;
+	} else {
+		num_counters_to_copy = mali_pp_job_get_sub_job_count(job);
+	}
+
+	for (i = 0; i < num_counters_to_copy; i++) {
+		jobres->perf_counter0[i] =
+			mali_pp_job_get_perf_counter_value0(job, i);
+		jobres->perf_counter1[i] =
+			mali_pp_job_get_perf_counter_value1(job, i);
+		jobres->perf_counter_src0 =
+			mali_pp_job_get_pp_counter_global_src0();
+		jobres->perf_counter_src1 =
+			mali_pp_job_get_pp_counter_global_src1();
+	}
+
+	mali_session_send_notification(session, notification);
+}
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_deletion_queue);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_delete);
+}
+
+void mali_scheduler_do_pp_job_delete(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be deleted, so we can release
+	 * the lock before we start deleting the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_deletion_queue, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		_mali_osk_list_delinit(&job->list);
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+		mali_dma_fence_context_term(&job->dma_fence_context);
+#endif
+
+		mali_pp_job_delete(job); /* delete the job object itself */
+	}
+}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_queue_list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_queue);
+}
+
+static void mali_scheduler_do_pp_job_queue(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be queued, so we can release
+	 * the lock before we start queueing the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_queue_list, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	/* First loop through all jobs and do the pre-work (no locks needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_needs_dma_buf_mapping(job)) {
+			/*
+			 * This operation could fail, but we continue anyway,
+			 * because the worst that could happen is that this
+			 * job will fail due to a Mali page fault.
+			 */
+			mali_dma_buf_map_job(job);
+		}
+	}
+
+	mali_scheduler_lock();
+
+	/* Then loop through all jobs again to queue them (lock needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+
+		/* Remove from scheduler_pp_job_queue_list before queueing */
+		mali_pp_job_list_remove(job);
+
+		if (mali_scheduler_queue_pp_job(job)) {
+			/* Job queued successfully */
+			schedule_mask |= MALI_SCHEDULER_MASK_PP;
+		} else {
+			/* Failed to enqueue job, release job (with error) */
+			mali_pp_job_fb_lookup_remove(job);
+			mali_pp_job_mark_unstarted_failed(job);
+
+			/* unlock scheduler in this uncommon case */
+			mali_scheduler_unlock();
+
+			schedule_mask |= mali_timeline_tracker_release(
+						 mali_pp_job_get_tracker(job));
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job, 0, MALI_TRUE,
+						       MALI_FALSE);
+
+			mali_scheduler_lock();
+		}
+	}
+
+	mali_scheduler_unlock();
+
+	/* Trigger scheduling of jobs */
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+void mali_scheduler_gp_pp_job_queue_print(void)
+{
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_gp_job *tmp_gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	struct mali_pp_job *tmp_pp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+	/* dump job queup status */
+	if ((0 == job_queue_gp.depth) && (job_queue_pp.depth == 0)) {
+		MALI_PRINT(("No GP&PP job in the job queue.\n"));
+		return;
+	}
+
+	MALI_PRINT(("Total (%d) GP job in the job queue.\n", job_queue_gp.depth));
+	if (job_queue_gp.depth > 0) {
+		if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.high_pri,
+						    struct mali_gp_job, list) {
+				MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job high_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid));
+			}
+		}
+
+		if (!_mali_osk_list_empty(&job_queue_gp.normal_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.normal_pri,
+						    struct mali_gp_job, list) {
+				MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job normal_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid));
+			}
+		}
+	}
+
+	MALI_PRINT(("Total (%d) PP job in the job queue.\n", job_queue_pp.depth));
+	if (job_queue_pp.depth > 0) {
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.high_pri,
+						    struct mali_pp_job, list) {
+				if (mali_pp_job_is_virtual(pp_job)) {
+					MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				} else {
+					MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				}
+			}
+		}
+
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.normal_pri,
+						    struct mali_pp_job, list) {
+				if (mali_pp_job_is_virtual(pp_job)) {
+					MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				} else {
+					MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				}
+			}
+		}
+	}
+
+	/* dump group running job status */
+	mali_executor_running_status_print();
+}
diff --git a/utgard/r7p0/common/mali_scheduler.h b/utgard/r7p0/common/mali_scheduler.h
new file mode 100644
index 0000000..2fa22ee
--- /dev/null
+++ b/utgard/r7p0/common/mali_scheduler.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_H__
+#define __MALI_SCHEDULER_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_scheduler_types.h"
+#include "mali_session.h"
+
+struct mali_scheduler_job_queue {
+	_MALI_OSK_LIST_HEAD(normal_pri); /* Queued jobs with normal priority */
+	_MALI_OSK_LIST_HEAD(high_pri);   /* Queued jobs with high priority */
+	u32 depth;                       /* Depth of combined queues. */
+	u32 big_job_num;
+};
+
+extern _mali_osk_spinlock_irq_t *mali_scheduler_lock_obj;
+
+/* Queue of jobs to be executed on the GP group */
+extern struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+extern struct mali_scheduler_job_queue job_queue_pp;
+
+extern _mali_osk_atomic_t mali_job_id_autonumber;
+extern _mali_osk_atomic_t mali_job_cache_order_autonumber;
+
+#define MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+_mali_osk_errcode_t mali_scheduler_initialize(void);
+void mali_scheduler_terminate(void);
+
+MALI_STATIC_INLINE void mali_scheduler_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_scheduler_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: scheduler lock taken.\n"));
+}
+
+MALI_STATIC_INLINE void mali_scheduler_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: Releasing scheduler lock.\n"));
+	_mali_osk_spinlock_irq_unlock(mali_scheduler_lock_obj);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_count(void)
+{
+	return job_queue_gp.depth;
+}
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_big_job_count(void)
+{
+	return job_queue_gp.big_job_num;
+}
+
+u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure);
+
+mali_bool mali_scheduler_job_next_is_virtual(void);
+struct mali_pp_job *mali_scheduler_job_pp_next(void);
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void);
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_id(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_id_autonumber);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_cache_order(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_cache_order_autonumber);
+}
+
+/**
+ * @brief Used by the Timeline system to queue a GP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The GP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job);
+
+/**
+ * @brief Used by the Timeline system to queue a PP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The PP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job);
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_abort_session(struct mali_session_data *session);
+
+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual);
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size);
+#endif
+
+void mali_scheduler_gp_pp_job_queue_print(void);
+
+#endif /* __MALI_SCHEDULER_H__ */
diff --git a/utgard/r7p0/common/mali_scheduler_types.h b/utgard/r7p0/common/mali_scheduler_types.h
new file mode 100644
index 0000000..db5aaf1
--- /dev/null
+++ b/utgard/r7p0/common/mali_scheduler_types.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_TYPES_H__
+#define __MALI_SCHEDULER_TYPES_H__
+
+#include "mali_osk.h"
+
+#define MALI_SCHEDULER_JOB_ID_SPAN 65535
+
+/**
+ * Bitmask used for defered scheduling of subsystems.
+ */
+typedef u32 mali_scheduler_mask;
+
+#define MALI_SCHEDULER_MASK_GP (1<<0)
+#define MALI_SCHEDULER_MASK_PP (1<<1)
+
+#define MALI_SCHEDULER_MASK_EMPTY 0
+#define MALI_SCHEDULER_MASK_ALL (MALI_SCHEDULER_MASK_GP | MALI_SCHEDULER_MASK_PP)
+
+#endif /* __MALI_SCHEDULER_TYPES_H__ */
diff --git a/utgard/r7p0/common/mali_session.c b/utgard/r7p0/common/mali_session.c
new file mode 100755
index 0000000..11e4cf4
--- /dev/null
+++ b/utgard/r7p0/common/mali_session.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/sched.h>
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_session.h"
+#include "mali_ukk.h"
+#ifdef MALI_MEM_SWAP_TRACKING
+#include "mali_memory_swap_alloc.h"
+#endif
+
+_MALI_OSK_LIST_HEAD(mali_sessions);
+static u32 mali_session_count;
+
+_mali_osk_spinlock_irq_t *mali_sessions_lock;
+wait_queue_head_t pending_queue;
+
+_mali_osk_errcode_t mali_session_initialize(void)
+{
+	_MALI_OSK_INIT_LIST_HEAD(&mali_sessions);
+	/* init wait queue for big varying job */
+	init_waitqueue_head(&pending_queue);
+
+	mali_sessions_lock = _mali_osk_spinlock_irq_init(
+				     _MALI_OSK_LOCKFLAG_ORDERED,
+				     _MALI_OSK_LOCK_ORDER_SESSIONS);
+	if (mali_sessions_lock == NULL) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_session_terminate(void)
+{
+	if (mali_sessions_lock != NULL) {
+		_mali_osk_spinlock_irq_term(mali_sessions_lock);
+		mali_sessions_lock = NULL;
+	}
+}
+
+void mali_session_add(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_add(&session->link, &mali_sessions);
+	mali_session_count++;
+	mali_session_unlock();
+}
+
+void mali_session_remove(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_delinit(&session->link);
+	mali_session_count--;
+	mali_session_unlock();
+}
+
+u32 mali_session_get_count(void)
+{
+	return mali_session_count;
+}
+
+mali_bool mali_session_pp_job_is_empty(void *data)
+{
+	struct mali_session_data *session = (struct mali_session_data *)data;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (_mali_osk_atomic_read(&session->number_of_pp_jobs) == 0) {
+		return MALI_TRUE;
+	}
+	return MALI_FALSE;
+}
+
+wait_queue_head_t *mali_session_get_wait_queue(void)
+{
+	return &pending_queue;
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in window render frame per sec calculate
+ */
+#if defined(CONFIG_MALI_DVFS)
+u32 mali_session_max_window_num(void)
+{
+	struct mali_session_data *session, *tmp;
+	u32 max_window_num = 0;
+	u32 tmp_number = 0;
+
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		tmp_number = _mali_osk_atomic_xchg(
+				     &session->number_of_window_jobs, 0);
+		if (max_window_num < tmp_number) {
+			max_window_num = tmp_number;
+		}
+	}
+
+	mali_session_unlock();
+
+	return max_window_num;
+}
+#endif
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx)
+{
+	struct mali_session_data *session, *tmp;
+	char task_comm[TASK_COMM_LEN];
+	char task_default[] = "not found";
+	struct task_struct *ttask;
+	u32 mali_mem_usage;
+	u32 total_mali_mem_size;
+#ifdef MALI_MEM_SWAP_TRACKING
+	u32 swap_pool_size;
+	u32 swap_unlock_size;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(print_ctx);
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		ttask = pid_task(find_vpid(session->pid), PIDTYPE_PID);
+		if (ttask)
+			strncpy(task_comm, ttask->comm, sizeof(ttask->comm));
+		else
+			strncpy(task_comm, task_default, sizeof(task_default));
+#ifdef MALI_MEM_SWAP_TRACKING
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u %-25s %-10u  %-15u  %-15u  %-10u  %-10u %-10u\n",
+				    session->comm, session->pid, task_comm,
+				    (unsigned int)((atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)session->max_mali_mem_allocated_size,
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_SWAP])) * _MALI_OSK_MALI_PAGE_SIZE)
+				   );
+#else
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u %-25s %-10u  %-15u  %-15u  %-10u  %-10u\n",
+				    session->comm, session->pid, task_comm,
+				    (unsigned int)((atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)session->max_mali_mem_allocated_size,
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE),
+				    (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE)
+				   );
+
+#endif
+	}
+	mali_session_unlock();
+	mali_mem_usage  = _mali_ukk_report_memory_usage();
+	total_mali_mem_size = _mali_ukk_report_total_memory_size();
+	_mali_osk_ctxprintf(print_ctx, "Mali mem usage: %u\nMali mem limit: %u\n", mali_mem_usage, total_mali_mem_size);
+#ifdef MALI_MEM_SWAP_TRACKING
+	mali_mem_swap_tracking(&swap_pool_size, &swap_unlock_size);
+	_mali_osk_ctxprintf(print_ctx, "Mali swap mem pool : %u\nMali swap mem unlock: %u\n", swap_pool_size, swap_unlock_size);
+#endif
+}
diff --git a/utgard/r7p0/common/mali_session.h b/utgard/r7p0/common/mali_session.h
new file mode 100644
index 0000000..88abc71
--- /dev/null
+++ b/utgard/r7p0/common/mali_session.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SESSION_H__
+#define __MALI_SESSION_H__
+
+#include "mali_mmu_page_directory.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_memory_types.h"
+#include "mali_memory_manager.h"
+
+struct mali_timeline_system;
+struct mali_soft_system;
+
+/* Number of frame builder job lists per session. */
+#define MALI_PP_JOB_FB_LOOKUP_LIST_SIZE 16
+#define MALI_PP_JOB_FB_LOOKUP_LIST_MASK (MALI_PP_JOB_FB_LOOKUP_LIST_SIZE - 1)
+/*Max pending big job allowed in kernel*/
+#define MALI_MAX_PENDING_BIG_JOB (2)
+
+struct mali_session_data {
+	_mali_osk_notification_queue_t *ioctl_queue;
+
+	_mali_osk_wait_queue_t *wait_queue; /**The wait queue to wait for the number of pp job become 0.*/
+
+	_mali_osk_mutex_t *memory_lock; /**< Lock protecting the vm manipulation */
+	_mali_osk_mutex_t *cow_lock; /** < Lock protecting the cow memory free manipulation */
+#if 0
+	_mali_osk_list_t memory_head; /**< Track all the memory allocated in this session, for freeing on abnormal termination */
+#endif
+	struct mali_page_directory *page_directory; /**< MMU page directory for this session */
+
+	_MALI_OSK_LIST_HEAD(link); /**< Link for list of all sessions */
+	_MALI_OSK_LIST_HEAD(pp_job_list); /**< List of all PP jobs on this session */
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_t number_of_window_jobs; /**< Record the window jobs completed on this session in a period */
+#endif
+	_mali_osk_atomic_t number_of_pp_jobs; /** < Record the pp jobs on this session */
+
+	_mali_osk_list_t pp_job_fb_lookup_list[MALI_PP_JOB_FB_LOOKUP_LIST_SIZE]; /**< List of PP job lists per frame builder id.  Used to link jobs from same frame builder. */
+	struct mali_soft_job_system *soft_job_system; /**< Soft job system for this session. */
+	struct mali_timeline_system *timeline_system; /**< Timeline system for this session. */
+
+	mali_bool is_aborting; /**< MALI_TRUE if the session is aborting, MALI_FALSE if not. */
+	mali_bool use_high_priority_job_queue; /**< If MALI_TRUE, jobs added from this session will use the high priority job queues. */
+	u32 pid;
+	char *comm;
+	atomic_t mali_mem_array[MALI_MEM_TYPE_MAX]; /**< The array to record mem types' usage for this session. */
+	atomic_t mali_mem_allocated_pages; /** The current allocated mali memory pages, which include mali os memory and mali dedicated memory.*/
+	size_t max_mali_mem_allocated_size; /**< The past max mali memory allocated size, which include mali os memory and mali dedicated memory. */
+	/* Added for new memroy system */
+	struct mali_allocation_manager allocation_mgr;
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	u32 fence_context;      /** <  The execution dma fence context this fence is run on. */
+	_mali_osk_atomic_t fence_seqno; /** < Alinear increasing sequence number for this dma fence context. */
+#endif
+};
+
+_mali_osk_errcode_t mali_session_initialize(void);
+void mali_session_terminate(void);
+
+/* List of all sessions. Actual list head in mali_kernel_core.c */
+extern _mali_osk_list_t mali_sessions;
+/* Lock to protect modification and access to the mali_sessions list */
+extern _mali_osk_spinlock_irq_t *mali_sessions_lock;
+
+MALI_STATIC_INLINE void mali_session_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_sessions_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(mali_sessions_lock);
+}
+
+void mali_session_add(struct mali_session_data *session);
+void mali_session_remove(struct mali_session_data *session);
+u32 mali_session_get_count(void);
+mali_bool mali_session_pp_job_is_empty(void *data);
+wait_queue_head_t *mali_session_get_wait_queue(void);
+
+#define MALI_SESSION_FOREACH(session, tmp, link) \
+	_MALI_OSK_LIST_FOREACHENTRY(session, tmp, &mali_sessions, struct mali_session_data, link)
+
+MALI_STATIC_INLINE struct mali_page_directory *mali_session_get_page_directory(struct mali_session_data *session)
+{
+	return session->page_directory;
+}
+
+MALI_STATIC_INLINE void mali_session_memory_lock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_wait(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_memory_unlock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_signal(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_send_notification(struct mali_session_data *session, _mali_osk_notification_t *object)
+{
+	_mali_osk_notification_queue_send(session->ioctl_queue, object);
+}
+
+#if defined(CONFIG_MALI_DVFS)
+
+MALI_STATIC_INLINE void mali_session_inc_num_window_jobs(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_atomic_inc(&session->number_of_window_jobs);
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in  window render frame per sec calculate
+ */
+u32 mali_session_max_window_num(void);
+
+#endif
+
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx);
+
+#endif /* __MALI_SESSION_H__ */
diff --git a/utgard/r7p0/common/mali_soft_job.c b/utgard/r7p0/common/mali_soft_job.c
new file mode 100644
index 0000000..adf0da6
--- /dev/null
+++ b/utgard/r7p0/common/mali_soft_job.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_soft_job.h"
+#include "mali_osk.h"
+#include "mali_timeline.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+
+MALI_STATIC_INLINE void mali_soft_job_system_lock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	_mali_osk_spinlock_irq_lock(system->lock);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: soft system %p lock taken\n", system));
+	MALI_DEBUG_ASSERT(system->lock_owner == 0);
+	MALI_DEBUG_CODE(system->lock_owner = _mali_osk_get_tid());
+}
+
+MALI_STATIC_INLINE void mali_soft_job_system_unlock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: releasing soft system %p lock\n", system));
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = 0);
+	_mali_osk_spinlock_irq_unlock(system->lock);
+}
+
+#if defined(DEBUG)
+MALI_STATIC_INLINE void mali_soft_job_system_assert_locked(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+}
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system) mali_soft_job_system_assert_locked(system)
+#else
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system)
+#endif /* defined(DEBUG) */
+
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session)
+{
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	system = (struct mali_soft_job_system *) _mali_osk_calloc(1, sizeof(struct mali_soft_job_system));
+	if (system == NULL) {
+		return NULL;
+	}
+
+	system->session = session;
+
+	system->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (system->lock == NULL) {
+		mali_soft_job_system_destroy(system);
+		return NULL;
+	}
+	system->lock_owner = 0;
+	system->last_job_id = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&(system->jobs_used));
+
+	return system;
+}
+
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	/* All jobs should be free at this point. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&(system->jobs_used)));
+
+	if (system != NULL) {
+		if (system->lock != NULL) {
+			_mali_osk_spinlock_irq_term(system->lock);
+		}
+		_mali_osk_free(system);
+	}
+}
+
+static void mali_soft_job_system_free_job(struct mali_soft_job_system *system, struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(job->id != MALI_SOFT_JOB_INVALID_ID);
+	MALI_DEBUG_ASSERT(system == job->system);
+
+	_mali_osk_list_del(&(job->system_list));
+
+	mali_soft_job_system_unlock(job->system);
+
+	_mali_osk_free(job);
+}
+
+MALI_STATIC_INLINE struct mali_soft_job *mali_soft_job_system_lookup_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job, *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		if (job->id == job_id)
+			return job;
+	}
+
+	return NULL;
+}
+
+void mali_soft_job_destroy(struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: destroying soft job %u (0x%08X)\n", job->id, job));
+
+	if (job != NULL) {
+		if (_mali_osk_atomic_dec_return(&job->refcount) > 0) return;
+
+		_mali_osk_atomic_term(&job->refcount);
+
+		if (job->activated_notification != NULL) {
+			_mali_osk_notification_delete(job->activated_notification);
+			job->activated_notification = NULL;
+		}
+
+		mali_soft_job_system_free_job(job->system, job);
+	}
+}
+
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job)
+{
+	struct mali_soft_job *job;
+	_mali_osk_notification_t *notification = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT((type == MALI_SOFT_JOB_TYPE_USER_SIGNALED) ||
+			  (type == MALI_SOFT_JOB_TYPE_SELF_SIGNALED));
+
+	notification = _mali_osk_notification_create(_MALI_NOTIFICATION_SOFT_ACTIVATED, sizeof(_mali_uk_soft_job_activated_s));
+	if (unlikely(notification == NULL)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to allocate notification"));
+		return NULL;
+	}
+
+	job = _mali_osk_malloc(sizeof(struct mali_soft_job));
+	if (unlikely(job == NULL)) {
+		MALI_DEBUG_PRINT(2, ("Mali Soft Job: system alloc job failed.\n"));
+		return NULL;
+	}
+
+	mali_soft_job_system_lock(system);
+
+	job->system = system;
+	job->id = system->last_job_id++;
+	job->state = MALI_SOFT_JOB_STATE_ALLOCATED;
+
+	_mali_osk_list_add(&(job->system_list), &(system->jobs_used));
+
+	job->type = type;
+	job->user_job = user_job;
+	job->activated = MALI_FALSE;
+
+	job->activated_notification = notification;
+
+	_mali_osk_atomic_init(&job->refcount, 1);
+
+	MALI_DEBUG_ASSERT(job->state == MALI_SOFT_JOB_STATE_ALLOCATED);
+	MALI_DEBUG_ASSERT(system == job->system);
+	MALI_DEBUG_ASSERT(job->id != MALI_SOFT_JOB_INVALID_ID);
+
+	mali_soft_job_system_unlock(system);
+
+	return job;
+}
+
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence)
+{
+	mali_timeline_point point;
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	system = job->system;
+
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(system->session->timeline_system);
+
+	mali_soft_job_system_lock(system);
+
+	MALI_DEBUG_ASSERT(job->state == MALI_SOFT_JOB_STATE_ALLOCATED);
+	job->state = MALI_SOFT_JOB_STATE_STARTED;
+
+	mali_soft_job_system_unlock(system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: starting soft job %u (0x%08X)\n", job->id, job));
+
+	mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_SOFT, fence, job);
+	point = mali_timeline_system_add_tracker(system->session->timeline_system, &job->tracker, MALI_TIMELINE_SOFT);
+
+	return point;
+}
+
+static mali_bool mali_soft_job_is_activated(void *data)
+{
+	struct mali_soft_job *job;
+
+	job = (struct mali_soft_job *) data;
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	return job->activated;
+}
+
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job;
+	struct mali_timeline_system *timeline_system;
+	mali_scheduler_mask schedule_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(system);
+
+	job = mali_soft_job_system_lookup_job(system, job_id);
+
+	if ((NULL == job) || (job->type != MALI_SOFT_JOB_TYPE_USER_SIGNALED)
+	    || !(MALI_SOFT_JOB_STATE_STARTED == job->state || MALI_SOFT_JOB_STATE_TIMED_OUT == job->state)) {
+		mali_soft_job_system_unlock(system);
+		MALI_PRINT_ERROR(("Mali Soft Job: invalid soft job id %u", job_id));
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	if (job->state == MALI_SOFT_JOB_STATE_TIMED_OUT) {
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(system);
+
+		MALI_DEBUG_ASSERT(job->activated == MALI_TRUE);
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: soft job %u (0x%08X) was timed out\n", job->id, job));
+		mali_soft_job_destroy(job);
+
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	MALI_DEBUG_ASSERT(job->state == MALI_SOFT_JOB_STATE_STARTED);
+
+	job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+	mali_soft_job_system_unlock(system);
+
+	/* Since the job now is in signaled state, timeouts from the timeline system will be
+	 * ignored, and it is not possible to signal this job again. */
+
+	timeline_system = system->session->timeline_system;
+	MALI_DEBUG_ASSERT_POINTER(timeline_system);
+
+	/* Wait until activated. */
+	_mali_osk_wait_queue_wait_event(timeline_system->wait_queue, mali_soft_job_is_activated, (void *) job);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: signaling soft job %u (0x%08X)\n", job->id, job));
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+
+	mali_soft_job_destroy(job);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_soft_job_send_activated_notification(struct mali_soft_job *job)
+{
+	if (job->activated_notification != NULL) {
+		_mali_uk_soft_job_activated_s *res = job->activated_notification->result_buffer;
+
+		res->user_job = job->user_job;
+		mali_session_send_notification(job->system->session, job->activated_notification);
+	}
+	job->activated_notification = NULL;
+}
+
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline activation for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		MALI_DEBUG_PRINT(3, ("Mali Soft Job: Soft job %u (0x%08X) activated while session is aborting.\n", job->id, job));
+
+		mali_soft_job_system_unlock(job->system);
+
+		/* Since we are in shutdown, we can ignore the scheduling bitmask. */
+		mali_timeline_tracker_release(&job->tracker);
+		mali_soft_job_destroy(job);
+		return schedule_mask;
+	}
+
+	/* Send activated notification. */
+	mali_soft_job_send_activated_notification(job);
+
+	/* Wake up sleeping signaler. */
+	job->activated = MALI_TRUE;
+
+	/* If job type is self signaled, release tracker, move soft job to free list, and scheduler at once */
+	if (job->type == MALI_SOFT_JOB_TYPE_SELF_SIGNALED) {
+		MALI_DEBUG_ASSERT(job->state == MALI_SOFT_JOB_STATE_STARTED);
+
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(job->system);
+
+		schedule_mask |= mali_timeline_tracker_release(&job->tracker);
+
+		mali_soft_job_destroy(job);
+	} else {
+		_mali_osk_wait_queue_wake_up(job->tracker.system->wait_queue);
+
+		mali_soft_job_system_unlock(job->system);
+	}
+
+	return schedule_mask;
+}
+
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+	MALI_DEBUG_ASSERT(job->activated == MALI_TRUE);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline timeout for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED  == job->state ||
+			  MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		/* The session is aborting.  This job will be released and destroyed by @ref
+		 * mali_soft_job_system_abort(). */
+		mali_soft_job_system_unlock(job->system);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	if (job->state != MALI_SOFT_JOB_STATE_STARTED) {
+		MALI_DEBUG_ASSERT(job->state == MALI_SOFT_JOB_STATE_SIGNALED);
+
+		/* The job is about to be signaled, ignore timeout. */
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeout on soft job %u (0x%08X) in signaled state.\n", job->id, job));
+		mali_soft_job_system_unlock(job->system);
+		return schedule_mask;
+	}
+
+	MALI_DEBUG_ASSERT(job->state == MALI_SOFT_JOB_STATE_STARTED);
+
+	job->state = MALI_SOFT_JOB_STATE_TIMED_OUT;
+	_mali_osk_atomic_inc(&job->refcount);
+
+	mali_soft_job_system_unlock(job->system);
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+
+	mali_soft_job_destroy(job);
+
+	return schedule_mask;
+}
+
+void mali_soft_job_system_abort(struct mali_soft_job_system *system)
+{
+	struct mali_soft_job *job, *tmp;
+
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(jobs);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting soft job system for session 0x%08X.\n", system->session));
+
+	mali_soft_job_system_lock(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED   == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (job->state == MALI_SOFT_JOB_STATE_STARTED) {
+			/* If the job has been activated, we have to release the tracker and destroy
+			 * the job.  If not, the tracker will be released and the job destroyed when
+			 * it is activated. */
+			if (job->activated == MALI_TRUE) {
+				MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting unsignaled soft job %u (0x%08X).\n", job->id, job));
+
+				job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+				_mali_osk_list_move(&job->system_list, &jobs);
+			}
+		} else if (job->state == MALI_SOFT_JOB_STATE_TIMED_OUT) {
+			MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting timed out soft job %u (0x%08X).\n", job->id, job));
+
+			/* We need to destroy this soft job. */
+			_mali_osk_list_move(&job->system_list, &jobs);
+		}
+	}
+
+	mali_soft_job_system_unlock(system);
+
+	/* Release and destroy jobs. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &jobs, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED  == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (job->state == MALI_SOFT_JOB_STATE_SIGNALED) {
+			mali_timeline_tracker_release(&job->tracker);
+		}
+
+		/* Move job back to used list before destroying. */
+		_mali_osk_list_move(&job->system_list, &system->jobs_used);
+
+		mali_soft_job_destroy(job);
+	}
+}
diff --git a/utgard/r7p0/common/mali_soft_job.h b/utgard/r7p0/common/mali_soft_job.h
new file mode 100644
index 0000000..42e4ae4
--- /dev/null
+++ b/utgard/r7p0/common/mali_soft_job.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SOFT_JOB_H__
+#define __MALI_SOFT_JOB_H__
+
+#include "mali_osk.h"
+
+#include "mali_timeline.h"
+
+struct mali_timeline_fence;
+struct mali_session_data;
+struct mali_soft_job;
+struct mali_soft_job_system;
+
+/**
+ * Soft job types.
+ *
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED will only complete after activation if either
+ * they are signaled by user-space (@ref mali_soft_job_system_signaled_job) or if they are timed out
+ * by the Timeline system.
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_SELF_SIGNALED will release job resource automatically
+ * in kernel when the job is activated.
+ */
+typedef enum mali_soft_job_type {
+	MALI_SOFT_JOB_TYPE_SELF_SIGNALED,
+	MALI_SOFT_JOB_TYPE_USER_SIGNALED,
+} mali_soft_job_type;
+
+/**
+ * Soft job state.
+ *
+ * mali_soft_job_system_start_job a job will first be allocated.The job's state set to MALI_SOFT_JOB_STATE_ALLOCATED.
+ * Once the job is added to the timeline system, the state changes to MALI_SOFT_JOB_STATE_STARTED.
+ *
+ * For soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED the state is changed to
+ * MALI_SOFT_JOB_STATE_SIGNALED when @ref mali_soft_job_system_signal_job is called and the soft
+ * job's state is MALI_SOFT_JOB_STATE_STARTED or MALI_SOFT_JOB_STATE_TIMED_OUT.
+ *
+ * If a soft job of type MALI_SOFT_JOB_TYPE_USER_SIGNALED is timed out before being signaled, the
+ * state is changed to MALI_SOFT_JOB_STATE_TIMED_OUT.  This can only happen to soft jobs in state
+ * MALI_SOFT_JOB_STATE_STARTED.
+ *
+ */
+typedef enum mali_soft_job_state {
+	MALI_SOFT_JOB_STATE_ALLOCATED,
+	MALI_SOFT_JOB_STATE_STARTED,
+	MALI_SOFT_JOB_STATE_SIGNALED,
+	MALI_SOFT_JOB_STATE_TIMED_OUT,
+} mali_soft_job_state;
+
+#define MALI_SOFT_JOB_INVALID_ID ((u32) -1)
+
+/**
+ * Soft job struct.
+ *
+ * Soft job can be used to represent any kind of CPU work done in kernel-space.
+ */
+typedef struct mali_soft_job {
+	mali_soft_job_type            type;                   /**< Soft job type.  Must be one of MALI_SOFT_JOB_TYPE_*. */
+	u64                           user_job;               /**< Identifier for soft job in user space. */
+	_mali_osk_atomic_t            refcount;               /**< Soft jobs are reference counted to prevent premature deletion. */
+	struct mali_timeline_tracker  tracker;                /**< Timeline tracker for soft job. */
+	mali_bool                     activated;              /**< MALI_TRUE if the job has been activated, MALI_FALSE if not. */
+	_mali_osk_notification_t     *activated_notification; /**< Pre-allocated notification object for ACTIVATED_NOTIFICATION. */
+
+	/* Protected by soft job system lock. */
+	u32                           id;                     /**< Used by user-space to find corresponding soft job in kernel-space. */
+	mali_soft_job_state           state;                  /**< State of soft job, must be one of MALI_SOFT_JOB_STATE_*. */
+	struct mali_soft_job_system  *system;                 /**< The soft job system this job is in. */
+	_mali_osk_list_t              system_list;            /**< List element used by soft job system. */
+} mali_soft_job;
+
+/**
+ * Per-session soft job system.
+ *
+ * The soft job system is used to manage all soft jobs that belongs to a session.
+ */
+typedef struct mali_soft_job_system {
+	struct mali_session_data *session;                    /**< The session this soft job system belongs to. */
+
+	_MALI_OSK_LIST_HEAD(jobs_used);                       /**< List of all allocated soft jobs. */
+
+	_mali_osk_spinlock_irq_t *lock;                       /**< Lock used to protect soft job system and its soft jobs. */
+	u32 lock_owner;                                       /**< Contains tid of thread that locked the system or 0, if not locked. */
+	u32 last_job_id;                                      /**< Recored the last job id protected by lock. */
+} mali_soft_job_system;
+
+/**
+ * Create a soft job system.
+ *
+ * @param session The session this soft job system will belong to.
+ * @return The new soft job system, or NULL if unsuccessful.
+ */
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session);
+
+/**
+ * Destroy a soft job system.
+ *
+ * @note The soft job must not have any started or activated jobs.  Call @ref
+ * mali_soft_job_system_abort first.
+ *
+ * @param system The soft job system we are destroying.
+ */
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system);
+
+/**
+ * Create a soft job.
+ *
+ * @param system Soft job system to create soft job from.
+ * @param type Type of the soft job.
+ * @param user_job Identifier for soft job in user space.
+ * @return New soft job if successful, NULL if not.
+ */
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job);
+
+/**
+ * Destroy soft job.
+ *
+ * @param job Soft job to destroy.
+ */
+void mali_soft_job_destroy(struct mali_soft_job *job);
+
+/**
+ * Start a soft job.
+ *
+ * The soft job will be added to the Timeline system which will then activate it after all
+ * dependencies have been resolved.
+ *
+ * Create soft jobs with @ref mali_soft_job_create before starting them.
+ *
+ * @param job Soft job to start.
+ * @param fence Fence representing dependencies for this soft job.
+ * @return Point on soft job timeline.
+ */
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence);
+
+/**
+ * Use by user-space to signal that a soft job has completed.
+ *
+ * @note Only valid for soft jobs with type MALI_SOFT_JOB_TYPE_USER_SIGNALED.
+ *
+ * @note The soft job must be in state MALI_SOFT_JOB_STATE_STARTED for the signal to be successful.
+ *
+ * @note If the soft job was signaled successfully, or it received a time out, the soft job will be
+ * destroyed after this call and should no longer be used.
+ *
+ * @note This function will block until the soft job has been activated.
+ *
+ * @param system The soft job system the job was started in.
+ * @param job_id ID of soft job we are signaling.
+ *
+ * @return _MALI_OSK_ERR_ITEM_NOT_FOUND if the soft job ID was invalid, _MALI_OSK_ERR_TIMEOUT if the
+ * soft job was timed out or _MALI_OSK_ERR_OK if we successfully signaled the soft job.
+ */
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id);
+
+/**
+ * Used by the Timeline system to activate a soft job.
+ *
+ * @param job The soft job that is being activated.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job);
+
+/**
+ * Used by the Timeline system to timeout a soft job.
+ *
+ * A soft job is timed out if it completes or is signaled later than MALI_TIMELINE_TIMEOUT_HZ after
+ * activation.
+ *
+ * @param job The soft job that is being timed out.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job);
+
+/**
+ * Used to cleanup activated soft jobs in the soft job system on session abort.
+ *
+ * @param system The soft job system that is being aborted.
+ */
+void mali_soft_job_system_abort(struct mali_soft_job_system *system);
+
+#endif /* __MALI_SOFT_JOB_H__ */
diff --git a/utgard/r7p0/common/mali_spinlock_reentrant.c b/utgard/r7p0/common/mali_spinlock_reentrant.c
new file mode 100644
index 0000000..7b3b253
--- /dev/null
+++ b/utgard/r7p0/common/mali_spinlock_reentrant.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_spinlock_reentrant.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order)
+{
+	struct mali_spinlock_reentrant *spinlock;
+
+	spinlock = _mali_osk_calloc(1, sizeof(struct mali_spinlock_reentrant));
+	if (spinlock == NULL) {
+		return NULL;
+	}
+
+	spinlock->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, lock_order);
+	if (spinlock->lock == NULL) {
+		mali_spinlock_reentrant_term(spinlock);
+		return NULL;
+	}
+
+	return spinlock;
+}
+
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT(0 == spinlock->counter && 0 == spinlock->owner);
+
+	if (spinlock->lock != NULL) {
+		_mali_osk_spinlock_irq_term(spinlock->lock);
+	}
+
+	_mali_osk_free(spinlock);
+}
+
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(tid != 0);
+
+	MALI_DEBUG_PRINT(5, ("%s ^\n", __func__));
+
+	if (tid != spinlock->owner) {
+		_mali_osk_spinlock_irq_lock(spinlock->lock);
+		MALI_DEBUG_ASSERT(0 == spinlock->owner && 0 == spinlock->counter);
+		spinlock->owner = tid;
+	}
+
+	MALI_DEBUG_PRINT(5, ("%s v\n", __func__));
+
+	++spinlock->counter;
+}
+
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid && tid == spinlock->owner);
+
+	--spinlock->counter;
+	if (spinlock->counter == 0) {
+		spinlock->owner = 0;
+		MALI_DEBUG_PRINT(5, ("%s release last\n", __func__));
+		_mali_osk_spinlock_irq_unlock(spinlock->lock);
+	}
+}
diff --git a/utgard/r7p0/common/mali_spinlock_reentrant.h b/utgard/r7p0/common/mali_spinlock_reentrant.h
new file mode 100644
index 0000000..4a412e4
--- /dev/null
+++ b/utgard/r7p0/common/mali_spinlock_reentrant.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SPINLOCK_REENTRANT_H__
+#define __MALI_SPINLOCK_REENTRANT_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * Reentrant spinlock.
+ */
+struct mali_spinlock_reentrant {
+	_mali_osk_spinlock_irq_t *lock;
+	u32               owner;
+	u32               counter;
+};
+
+/**
+ * Create a new reentrant spinlock.
+ *
+ * @param lock_order Lock order.
+ * @return New reentrant spinlock.
+ */
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order);
+
+/**
+ * Terminate reentrant spinlock and free any associated resources.
+ *
+ * @param spinlock Reentrant spinlock to terminate.
+ */
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock);
+
+/**
+ * Wait for reentrant spinlock to be signaled.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Signal reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Check if thread is holding reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ * @return MALI_TRUE if thread is holding spinlock, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_spinlock_reentrant_is_held(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	return (tid == spinlock->owner && 0 < spinlock->counter);
+}
+
+#endif /* __MALI_SPINLOCK_REENTRANT_H__ */
diff --git a/utgard/r7p0/common/mali_timeline.c b/utgard/r7p0/common/mali_timeline.c
new file mode 100644
index 0000000..2feae1a
--- /dev/null
+++ b/utgard/r7p0/common/mali_timeline.c
@@ -0,0 +1,1768 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline.h"
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_soft_job.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+
+#define MALI_TIMELINE_SYSTEM_LOCKED(system) (mali_spinlock_reentrant_is_held((system)->spinlock, _mali_osk_get_tid()))
+
+/*
+ * Following three elements are used to record how many
+ * gp, physical pp or virtual pp jobs are delayed in the whole
+ * timeline system, we can use these three value to decide
+ * if need to deactivate idle group.
+ */
+_mali_osk_atomic_t gp_tracker_count;
+_mali_osk_atomic_t phy_pp_tracker_count;
+_mali_osk_atomic_t virt_pp_tracker_count;
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter);
+
+#if defined(CONFIG_SYNC)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+struct mali_deferred_fence_put_entry {
+	struct hlist_node list;
+	struct sync_fence *fence;
+};
+
+static HLIST_HEAD(mali_timeline_sync_fence_to_free_list);
+static DEFINE_SPINLOCK(mali_timeline_sync_fence_to_free_lock);
+
+static void put_sync_fences(struct work_struct *ignore)
+{
+	struct hlist_head list;
+	struct hlist_node *tmp, *pos;
+	unsigned long flags;
+	struct mali_deferred_fence_put_entry *o;
+
+	spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+	hlist_move_list(&mali_timeline_sync_fence_to_free_list, &list);
+	spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+	hlist_for_each_entry_safe(o, pos, tmp, &list, list) {
+		sync_fence_put(o->fence);
+		kfree(o);
+	}
+}
+
+static DECLARE_DELAYED_WORK(delayed_sync_fence_put, put_sync_fences);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+/* Callback that is called when a sync fence a tracker is waiting on is signaled. */
+static void mali_timeline_sync_fence_callback(struct sync_fence *sync_fence, struct sync_fence_waiter *sync_fence_waiter)
+{
+	struct mali_timeline_system  *system;
+	struct mali_timeline_waiter  *waiter;
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool is_aborting = MALI_FALSE;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	int fence_status = sync_fence->status;
+#else
+	int fence_status = atomic_read(&sync_fence->status);
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_waiter);
+
+	tracker = _MALI_OSK_CONTAINER_OF(sync_fence_waiter, struct mali_timeline_tracker, sync_fence_waiter);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	is_aborting = system->session->is_aborting;
+	if (!is_aborting && (fence_status < 0)) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, fence_status));
+		tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+	}
+
+	waiter = tracker->waiter_sync;
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	tracker->sync_fence = NULL;
+	tracker->fence.sync_fd = -1;
+
+	schedule_mask |= mali_timeline_system_release_waiter(system, waiter);
+
+	/* If aborting, wake up sleepers that are waiting for sync fence callbacks to complete. */
+	if (is_aborting) {
+		_mali_osk_wait_queue_wake_up(system->wait_queue);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/*
+	 * Older versions of Linux, before 3.5, doesn't support fput() in interrupt
+	 * context. For those older kernels, allocate a list object and put the
+	 * fence object on that and defer the call to sync_fence_put() to a workqueue.
+	 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	{
+		struct mali_deferred_fence_put_entry *obj;
+
+		obj = kzalloc(sizeof(struct mali_deferred_fence_put_entry), GFP_ATOMIC);
+		if (obj) {
+			unsigned long flags;
+			mali_bool schedule = MALI_FALSE;
+
+			obj->fence = sync_fence;
+
+			spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+			if (hlist_empty(&mali_timeline_sync_fence_to_free_list))
+				schedule = MALI_TRUE;
+			hlist_add_head(&obj->list, &mali_timeline_sync_fence_to_free_list);
+			spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+			if (schedule)
+				schedule_delayed_work(&delayed_sync_fence_put, 0);
+		}
+	}
+#else
+	sync_fence_put(sync_fence);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+	if (!is_aborting) {
+		mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE);
+	}
+}
+#endif /* defined(CONFIG_SYNC) */
+
+static mali_scheduler_mask mali_timeline_tracker_time_out(struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(tracker->type == MALI_TIMELINE_TRACKER_SOFT);
+
+	return mali_soft_job_system_timeout_job((struct mali_soft_job *) tracker->job);
+}
+
+static void mali_timeline_timer_callback(void *data)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker;
+	struct mali_timeline *timeline;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	timeline = (struct mali_timeline *) data;
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	if (!system->timer_enabled) {
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		return;
+	}
+
+	tracker = timeline->tracker_tail;
+	timeline->timer_active = MALI_FALSE;
+
+	if (NULL != tracker && MALI_TRUE == tracker->timer_active) {
+		/* This is likely the delayed work that has been schedule out before cancelled. */
+		if (MALI_TIMELINE_TIMEOUT_HZ > (_mali_osk_time_tickcount() - tracker->os_tick_activate)) {
+			mali_spinlock_reentrant_signal(system->spinlock, tid);
+			return;
+		}
+
+		schedule_mask = mali_timeline_tracker_time_out(tracker);
+		tracker->timer_active = MALI_FALSE;
+	} else {
+		MALI_PRINT_ERROR(("Mali Timeline: Soft job timer callback without a waiting tracker.\n"));
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+	system->timer_enabled = MALI_FALSE;
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (timeline->delayed_work != NULL) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			timeline->timer_active = MALI_FALSE;
+		}
+	}
+}
+
+static void mali_timeline_destroy(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	if (timeline != NULL) {
+		/* Assert that the timeline object has been properly cleaned up before destroying it. */
+		MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+		MALI_DEBUG_ASSERT(timeline->tracker_head == NULL);
+		MALI_DEBUG_ASSERT(timeline->tracker_tail == NULL);
+		MALI_DEBUG_ASSERT(timeline->waiter_head == NULL);
+		MALI_DEBUG_ASSERT(timeline->waiter_tail == NULL);
+		MALI_DEBUG_ASSERT(timeline->system != NULL);
+		MALI_DEBUG_ASSERT(timeline->id < MALI_TIMELINE_MAX);
+
+		if (timeline->delayed_work != NULL) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			_mali_osk_wq_delayed_delete_work_nonflush(timeline->delayed_work);
+		}
+
+#if defined(CONFIG_SYNC)
+		if (timeline->sync_tl != NULL) {
+			sync_timeline_destroy(timeline->sync_tl);
+		}
+#endif /* defined(CONFIG_SYNC) */
+
+#ifndef CONFIG_SYNC
+		_mali_osk_free(timeline);
+#endif
+	}
+}
+
+static struct mali_timeline *mali_timeline_create(struct mali_timeline_system *system, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(id < MALI_TIMELINE_MAX);
+
+	timeline = (struct mali_timeline *) _mali_osk_calloc(1, sizeof(struct mali_timeline));
+	if (timeline == NULL) {
+		return NULL;
+	}
+
+	/* Initially the timeline is empty. */
+#if defined(MALI_TIMELINE_DEBUG_START_POINT)
+	/* Start the timeline a bit before wrapping when debugging. */
+	timeline->point_next = UINT_MAX - MALI_TIMELINE_MAX_POINT_SPAN - 128;
+#else
+	timeline->point_next = 1;
+#endif
+	timeline->point_oldest = timeline->point_next;
+
+	/* The tracker and waiter lists will initially be empty. */
+
+	timeline->system = system;
+	timeline->id = id;
+
+	timeline->delayed_work = _mali_osk_wq_delayed_create_work(mali_timeline_timer_callback, timeline);
+	if (timeline->delayed_work == NULL) {
+		mali_timeline_destroy(timeline);
+		return NULL;
+	}
+
+	timeline->timer_active = MALI_FALSE;
+
+#if defined(CONFIG_SYNC)
+	{
+		char timeline_name[32];
+
+		switch (id) {
+		case MALI_TIMELINE_GP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-gp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_PP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-pp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_SOFT:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-soft", _mali_osk_get_pid());
+			break;
+		default:
+			MALI_PRINT_ERROR(("Mali Timeline: Invalid timeline id %d\n", id));
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->destroyed = MALI_FALSE;
+
+		timeline->sync_tl = mali_sync_timeline_create(timeline, timeline_name);
+		if (timeline->sync_tl == NULL) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+		if (timeline->spinlock == NULL) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	return timeline;
+}
+
+static void mali_timeline_insert_tracker(struct mali_timeline *timeline, struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	if (mali_timeline_is_full(timeline)) {
+		/* Don't add tracker if timeline is full. */
+		tracker->point = MALI_TIMELINE_NO_POINT;
+		return;
+	}
+
+	tracker->timeline = timeline;
+	tracker->point    = timeline->point_next;
+
+	/* Find next available point. */
+	timeline->point_next++;
+	if (timeline->point_next == MALI_TIMELINE_NO_POINT) {
+		timeline->point_next++;
+	}
+
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+
+	if (tracker->type == MALI_TIMELINE_TRACKER_GP) {
+		_mali_osk_atomic_inc(&gp_tracker_count);
+	} else if (tracker->type == MALI_TIMELINE_TRACKER_PP) {
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_inc(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_inc(&phy_pp_tracker_count);
+		}
+	}
+
+	/* Add tracker as new head on timeline's tracker list. */
+	if (timeline->tracker_head == NULL) {
+		/* Tracker list is empty. */
+		MALI_DEBUG_ASSERT(timeline->tracker_tail == NULL);
+
+		timeline->tracker_tail = tracker;
+
+		MALI_DEBUG_ASSERT(tracker->timeline_next == NULL);
+		MALI_DEBUG_ASSERT(tracker->timeline_prev == NULL);
+	} else {
+		MALI_DEBUG_ASSERT(timeline->tracker_head->timeline_next == NULL);
+
+		tracker->timeline_prev = timeline->tracker_head;
+		timeline->tracker_head->timeline_next = tracker;
+
+		MALI_DEBUG_ASSERT(tracker->timeline_next == NULL);
+	}
+	timeline->tracker_head = tracker;
+
+	MALI_DEBUG_ASSERT(timeline->tracker_head->timeline_next == NULL);
+	MALI_DEBUG_ASSERT(timeline->tracker_tail->timeline_prev == NULL);
+}
+
+/* Inserting the waiter object into the given timeline */
+static void mali_timeline_insert_waiter(struct mali_timeline *timeline, struct mali_timeline_waiter *waiter_new)
+{
+	struct mali_timeline_waiter *waiter_prev;
+	struct mali_timeline_waiter *waiter_next;
+
+	/* Waiter time must be between timeline head and tail, and there must
+	 * be less than MALI_TIMELINE_MAX_POINT_SPAN elements between */
+	MALI_DEBUG_ASSERT((waiter_new->point - timeline->point_oldest) < MALI_TIMELINE_MAX_POINT_SPAN);
+	MALI_DEBUG_ASSERT((-waiter_new->point + timeline->point_next) < MALI_TIMELINE_MAX_POINT_SPAN);
+
+	/* Finding out where to put this waiter, in the linked waiter list of the given timeline **/
+	waiter_prev = timeline->waiter_head; /* Insert new after  waiter_prev */
+	waiter_next = NULL;                  /* Insert new before waiter_next */
+
+	/* Iterating backwards from head (newest) to tail (oldest) until we
+	 * find the correct spot to insert the new waiter */
+	while (waiter_prev && mali_timeline_point_after(waiter_prev->point, waiter_new->point)) {
+		waiter_next = waiter_prev;
+		waiter_prev = waiter_prev->timeline_prev;
+	}
+
+	if (NULL == waiter_prev && NULL == waiter_next) {
+		/* list is empty */
+		timeline->waiter_head = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else if (waiter_next == NULL) {
+		/* insert at head */
+		waiter_new->timeline_prev = timeline->waiter_head;
+		timeline->waiter_head->timeline_next = waiter_new;
+		timeline->waiter_head = waiter_new;
+	} else if (waiter_prev == NULL) {
+		/* insert at tail */
+		waiter_new->timeline_next = timeline->waiter_tail;
+		timeline->waiter_tail->timeline_prev = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else {
+		/* insert between */
+		waiter_new->timeline_next = waiter_next;
+		waiter_new->timeline_prev = waiter_prev;
+		waiter_next->timeline_prev = waiter_new;
+		waiter_prev->timeline_next = waiter_new;
+	}
+}
+
+static void mali_timeline_update_delayed_work(struct mali_timeline *timeline)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *oldest_tracker;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(timeline->id == MALI_TIMELINE_SOFT);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Timer is disabled, early out. */
+	if (!system->timer_enabled) return;
+
+	oldest_tracker = timeline->tracker_tail;
+	if (NULL != oldest_tracker && 0 == oldest_tracker->trigger_ref_count) {
+		if (oldest_tracker->timer_active == MALI_FALSE) {
+			if (timeline->timer_active == MALI_TRUE) {
+				_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+			}
+			_mali_osk_wq_delayed_schedule_work(timeline->delayed_work, MALI_TIMELINE_TIMEOUT_HZ);
+			oldest_tracker->timer_active = MALI_TRUE;
+			timeline->timer_active = MALI_TRUE;
+		}
+	} else if (timeline->timer_active == MALI_TRUE) {
+		_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+		timeline->timer_active = MALI_FALSE;
+	}
+}
+
+static mali_scheduler_mask mali_timeline_update_oldest_point(struct mali_timeline *timeline)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	MALI_DEBUG_CODE({
+		struct mali_timeline_system *system = timeline->system;
+
+		MALI_DEBUG_ASSERT_POINTER(system);
+
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	});
+
+	if (timeline->tracker_tail != NULL) {
+		/* Set oldest point to oldest tracker's point */
+		timeline->point_oldest = timeline->tracker_tail->point;
+	} else {
+		/* No trackers, mark point list as empty */
+		timeline->point_oldest = timeline->point_next;
+	}
+
+	/* Release all waiters no longer on the timeline's point list.
+	 * Releasing a waiter can trigger this function to be called again, so
+	 * we do not store any pointers on stack. */
+	while (timeline->waiter_tail != NULL) {
+		u32 waiter_time_relative;
+		u32 time_head_relative;
+		struct mali_timeline_waiter *waiter = timeline->waiter_tail;
+
+		time_head_relative = timeline->point_next - timeline->point_oldest;
+		waiter_time_relative = waiter->point - timeline->point_oldest;
+
+		if (waiter_time_relative < time_head_relative) {
+			/* This and all following waiters are on the point list, so we are done. */
+			break;
+		}
+
+		/* Remove waiter from timeline's waiter list. */
+		if (waiter->timeline_next != NULL) {
+			waiter->timeline_next->timeline_prev = NULL;
+		} else {
+			/* This was the last waiter */
+			timeline->waiter_head = NULL;
+		}
+		timeline->waiter_tail = waiter->timeline_next;
+
+		/* Release waiter.  This could activate a tracker, if this was
+		 * the last waiter for the tracker. */
+		schedule_mask |= mali_timeline_system_release_waiter(timeline->system, waiter);
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_ASSERT(type < MALI_TIMELINE_TRACKER_MAX);
+
+	/* Zero out all tracker members. */
+	_mali_osk_memset(tracker, 0, sizeof(*tracker));
+
+	tracker->type = type;
+	tracker->job = job;
+	tracker->trigger_ref_count = 1;  /* Prevents any callback from trigging while adding it */
+	tracker->os_tick_create = _mali_osk_time_tickcount();
+	MALI_DEBUG_CODE(tracker->magic = MALI_TIMELINE_TRACKER_MAGIC);
+
+	tracker->activation_error = MALI_TIMELINE_ACTIVATION_ERROR_NONE;
+
+	/* Copy fence. */
+	if (fence != NULL) {
+		_mali_osk_memcpy(&tracker->fence, fence, sizeof(struct mali_timeline_fence));
+	}
+}
+
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker_next, *tracker_prev;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	/* Upon entry a group lock will be held, but not a scheduler lock. */
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(tracker->magic == MALI_TIMELINE_TRACKER_MAGIC);
+
+	/* Tracker should have been triggered */
+	MALI_DEBUG_ASSERT(tracker->trigger_ref_count == 0);
+
+	/* All waiters should have been released at this point */
+	MALI_DEBUG_ASSERT(tracker->waiter_head == NULL);
+	MALI_DEBUG_ASSERT(tracker->waiter_tail == NULL);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: releasing tracker for job 0x%08X\n", tracker->job));
+
+	timeline = tracker->timeline;
+	if (timeline == NULL) {
+		/* Tracker was not on a timeline, there is nothing to release. */
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Tracker should still be on timeline */
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, tracker->point));
+
+	/* Tracker is no longer valid. */
+	MALI_DEBUG_CODE(tracker->magic = 0);
+
+	tracker_next = tracker->timeline_next;
+	tracker_prev = tracker->timeline_prev;
+	tracker->timeline_next = NULL;
+	tracker->timeline_prev = NULL;
+
+	/* Removing tracker from timeline's tracker list */
+	if (tracker_next == NULL) {
+		/* This tracker was the head */
+		timeline->tracker_head = tracker_prev;
+	} else {
+		tracker_next->timeline_prev = tracker_prev;
+	}
+
+	if (tracker_prev == NULL) {
+		/* This tracker was the tail */
+		timeline->tracker_tail = tracker_next;
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+		/* Update the timeline's oldest time and release any waiters */
+		schedule_mask |= mali_timeline_update_oldest_point(timeline);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	} else {
+		tracker_prev->timeline_next = tracker_next;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Update delayed work only when it is the soft job timeline */
+	if (tracker->timeline->id == MALI_TIMELINE_SOFT) {
+		mali_timeline_update_delayed_work(tracker->timeline);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_release_waiter_list(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *tail,
+		struct mali_timeline_waiter *head)
+{
+	struct mali_timeline_waiter    *waiter = NULL;
+	struct mali_timeline_waiter    *next = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(head);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	head->tracker_next = system->waiter_empty_list;
+	system->waiter_empty_list = tail;
+
+	waiter = system->waiter_empty_list;
+	while (waiter != NULL) {
+		next = waiter->tracker_next;
+		_mali_osk_free(waiter);
+		waiter = next;
+	}
+	system->waiter_empty_list = NULL;
+}
+
+static mali_scheduler_mask mali_timeline_tracker_activate(struct mali_timeline_tracker *tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	struct mali_timeline_system *system;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(tracker->magic == MALI_TIMELINE_TRACKER_MAGIC);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker->os_tick_activate = _mali_osk_time_tickcount();
+
+	if (tracker->waiter_head != NULL) {
+		mali_timeline_system_release_waiter_list(system, tracker->waiter_tail, tracker->waiter_head);
+		tracker->waiter_head = NULL;
+		tracker->waiter_tail = NULL;
+	}
+
+	switch (tracker->type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		schedule_mask = mali_scheduler_activate_gp_job((struct mali_gp_job *) tracker->job);
+
+		_mali_osk_atomic_dec(&gp_tracker_count);
+		break;
+	case MALI_TIMELINE_TRACKER_PP:
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_dec(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_dec(&phy_pp_tracker_count);
+		}
+		schedule_mask = mali_scheduler_activate_pp_job((struct mali_pp_job *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SOFT:
+		timeline = tracker->timeline;
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		schedule_mask |= mali_soft_job_system_activate_job((struct mali_soft_job *) tracker->job);
+
+		/* Start a soft timer to make sure the soft job be released in a limited time */
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		mali_timeline_update_delayed_work(timeline);
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		break;
+	case MALI_TIMELINE_TRACKER_WAIT:
+		mali_timeline_fence_wait_activate((struct mali_timeline_fence_wait_tracker *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SYNC:
+#if defined(CONFIG_SYNC)
+		mali_timeline_sync_fence_activate((struct mali_timeline_sync_fence_tracker *) tracker->job);
+#else
+		MALI_PRINT_ERROR(("Mali Timeline: sync tracker not supported\n", tracker->type));
+#endif /* defined(CONFIG_SYNC) */
+		break;
+	default:
+		MALI_PRINT_ERROR(("Mali Timeline - Illegal tracker type: %d\n", tracker->type));
+		break;
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker)
+{
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(tracker->trigger_ref_count > 0);
+	tracker->trigger_ref_count++;
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error)
+{
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(tracker->trigger_ref_count > 0);
+	tracker->trigger_ref_count--;
+
+	tracker->activation_error |= activation_error;
+
+	if (tracker->trigger_ref_count == 0) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(uk_fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		fence->points[i] = uk_fence->points[i];
+	}
+
+	fence->sync_fd = uk_fence->sync_fd;
+}
+
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session)
+{
+	u32 i;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: creating timeline system\n"));
+
+	system = (struct mali_timeline_system *) _mali_osk_calloc(1, sizeof(struct mali_timeline_system));
+	if (system == NULL) {
+		return NULL;
+	}
+
+	system->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+	if (system->spinlock == NULL) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		system->timelines[i] = mali_timeline_create(system, (enum mali_timeline_id)i);
+		if (system->timelines[i] == NULL) {
+			mali_timeline_system_destroy(system);
+			return NULL;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	system->signaled_sync_tl = mali_sync_timeline_create(NULL, "mali-always-signaled");
+	if (system->signaled_sync_tl == NULL) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	system->waiter_empty_list = NULL;
+	system->session = session;
+	system->timer_enabled = MALI_TRUE;
+
+	system->wait_queue = _mali_osk_wait_queue_init();
+	if (system->wait_queue == NULL) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	return system;
+}
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE) || defined(CONFIG_SYNC)
+/**
+ * Check if there are any trackers left on timeline.
+ *
+ * Used as a wait queue conditional.
+ *
+ * @param data Timeline.
+ * @return MALI_TRUE if there are no trackers on timeline, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_has_no_trackers(void *data)
+{
+	struct mali_timeline *timeline = (struct mali_timeline *) data;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	return mali_timeline_is_empty(timeline);
+}
+#if defined(CONFIG_SYNC)
+/**
+ * Cancel sync fence waiters waited upon by trackers on all timelines.
+ *
+ * Will return after all timelines have no trackers left.
+ *
+ * @param system Timeline system.
+ */
+static void mali_timeline_cancel_sync_fence_waiters(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+	struct mali_timeline_tracker *tracker, *tracker_next;
+
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(tracker_list);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Cancel sync fence waiters. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		tracker_next = timeline->tracker_tail;
+		while (tracker_next != NULL) {
+			tracker = tracker_next;
+			tracker_next = tracker->timeline_next;
+
+			if (tracker->sync_fence == NULL) continue;
+
+			MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling sync fence wait for tracker 0x%08X.\n", tracker));
+
+			/* Cancel sync fence waiter. */
+			if (sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter) == 0) {
+				/* Callback was not called, move tracker to local list. */
+				_mali_osk_list_add(&tracker->sync_fence_cancel_list, &tracker_list);
+			}
+		}
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/* Manually call sync fence callback in order to release waiter and trigger activation of tracker. */
+	_MALI_OSK_LIST_FOREACHENTRY(tracker, tracker_next, &tracker_list, struct mali_timeline_tracker, sync_fence_cancel_list) {
+		mali_timeline_sync_fence_callback(tracker->sync_fence, &tracker->sync_fence_waiter);
+	}
+
+	/* Sleep until all sync fence callbacks are done and all timelines are empty. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline);
+	}
+}
+
+#endif /* defined(CONFIG_SYNC) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+static void mali_timeline_cancel_dma_fence_waiters(struct mali_timeline_system *system)
+{
+	u32 i, j;
+	u32 tid = _mali_osk_get_tid();
+	struct mali_pp_job *pp_job = NULL;
+	struct mali_pp_job *next_pp_job = NULL;
+	struct mali_timeline *timeline = NULL;
+	struct mali_timeline_tracker *tracker, *tracker_next;
+
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(pp_job_list);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Cancel dma fence waiters. */
+	timeline = system->timelines[MALI_TIMELINE_PP];
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker_next = timeline->tracker_tail;
+	while (tracker_next != NULL) {
+		mali_bool fence_is_signaled = MALI_TRUE;
+
+		tracker = tracker_next;
+		tracker_next = tracker->timeline_next;
+
+		if (tracker->waiter_dma_fence == NULL) continue;
+		pp_job = (struct mali_pp_job *)tracker->job;
+		MALI_DEBUG_ASSERT_POINTER(pp_job);
+		MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling dma fence waiter for tracker 0x%08X.\n", tracker));
+
+		for (j = 0; j < pp_job->dma_fence_context.num_dma_fence_waiter; j++) {
+			if (pp_job->dma_fence_context.mali_dma_fence_waiters[j]) {
+				/* Cancel a previously callback from the fence.
+				* This function returns true if the callback is successfully removed,
+				* or false if the fence has already been signaled.
+				*/
+				bool ret = fence_remove_callback(pp_job->dma_fence_context.mali_dma_fence_waiters[j]->fence,
+								 &pp_job->dma_fence_context.mali_dma_fence_waiters[j]->base);
+				if (ret) {
+					fence_is_signaled = MALI_FALSE;
+				}
+			}
+		}
+
+		/* Callbacks were not called, move pp job to local list. */
+		if (fence_is_signaled == MALI_FALSE)
+			_mali_osk_list_add(&pp_job->list, &pp_job_list);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/* Manually call dma fence callback in order to release waiter and trigger activation of tracker. */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, next_pp_job, &pp_job_list, struct mali_pp_job, list) {
+		mali_timeline_dma_fence_callback((void *)pp_job);
+	}
+
+	/* Sleep until all dma fence callbacks are done and all timelines are empty. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline);
+	}
+}
+#endif
+#endif
+void mali_timeline_system_abort(struct mali_timeline_system *system)
+{
+	MALI_DEBUG_CODE(u32 tid = _mali_osk_get_tid(););
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: Aborting timeline system for session 0x%08X.\n", system->session));
+
+#if defined(CONFIG_SYNC)
+	mali_timeline_cancel_sync_fence_waiters(system);
+#endif /* defined(CONFIG_SYNC) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	mali_timeline_cancel_dma_fence_waiters(system);
+#endif
+
+	/* Should not be any waiters or trackers left at this point. */
+	MALI_DEBUG_CODE({
+		u32 i;
+
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i)
+		{
+			struct mali_timeline *timeline = system->timelines[i];
+
+			MALI_DEBUG_ASSERT_POINTER(timeline);
+			MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+			MALI_DEBUG_ASSERT(timeline->tracker_head == NULL);
+			MALI_DEBUG_ASSERT(timeline->tracker_tail == NULL);
+			MALI_DEBUG_ASSERT(timeline->waiter_head == NULL);
+			MALI_DEBUG_ASSERT(timeline->waiter_tail == NULL);
+		}
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+	});
+}
+
+void mali_timeline_system_destroy(struct mali_timeline_system *system)
+{
+	u32 i;
+	struct mali_timeline_waiter *waiter, *next;
+#if defined(CONFIG_SYNC)
+	u32 tid = _mali_osk_get_tid();
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: destroying timeline system\n"));
+
+	if (system != NULL) {
+
+		/* There should be no waiters left on this queue. */
+		if (system->wait_queue != NULL) {
+			_mali_osk_wait_queue_term(system->wait_queue);
+			system->wait_queue = NULL;
+		}
+
+		/* Free all waiters in empty list */
+		waiter = system->waiter_empty_list;
+		while (waiter != NULL) {
+			next = waiter->tracker_next;
+			_mali_osk_free(waiter);
+			waiter = next;
+		}
+
+#if defined(CONFIG_SYNC)
+		if (system->signaled_sync_tl != NULL) {
+			sync_timeline_destroy(system->signaled_sync_tl);
+		}
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if ((NULL != system->timelines[i]) && (system->timelines[i]->spinlock != NULL)) {
+				mali_spinlock_reentrant_wait(system->timelines[i]->spinlock, tid);
+				system->timelines[i]->destroyed = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->timelines[i]->spinlock, tid);
+			}
+		}
+#endif /* defined(CONFIG_SYNC) */
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if (system->timelines[i] != NULL) {
+				mali_timeline_destroy(system->timelines[i]);
+			}
+		}
+
+		if (system->spinlock != NULL) {
+			mali_spinlock_reentrant_term(system->spinlock);
+		}
+
+		_mali_osk_free(system);
+	}
+}
+
+/**
+ * Find how many waiters are needed for a given fence.
+ *
+ * @param fence The fence to check.
+ * @return Number of waiters needed for fence.
+ */
+static u32 mali_timeline_fence_num_waiters(struct mali_timeline_fence *fence)
+{
+	u32 i, num_waiters = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		if (fence->points[i] != MALI_TIMELINE_NO_POINT) {
+			++num_waiters;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	if (-1 != fence->sync_fd) ++num_waiters;
+#endif /* defined(CONFIG_SYNC) */
+
+	return num_waiters;
+}
+
+static struct mali_timeline_waiter *mali_timeline_system_get_zeroed_waiter(struct mali_timeline_system *system)
+{
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	waiter = system->waiter_empty_list;
+	if (waiter != NULL) {
+		/* Remove waiter from empty list and zero it */
+		system->waiter_empty_list = waiter->tracker_next;
+		_mali_osk_memset(waiter, 0, sizeof(*waiter));
+	}
+
+	/* Return NULL if list was empty. */
+	return waiter;
+}
+
+static void mali_timeline_system_allocate_waiters(struct mali_timeline_system *system,
+		struct mali_timeline_waiter **tail,
+		struct mali_timeline_waiter **head,
+		int max_num_waiters)
+{
+	u32 i, tid = _mali_osk_get_tid();
+	mali_bool do_alloc;
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT_POINTER(head);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	*head = *tail = NULL;
+	do_alloc = MALI_FALSE;
+	i = 0;
+	while (i < max_num_waiters) {
+		if (do_alloc == MALI_FALSE) {
+			waiter = mali_timeline_system_get_zeroed_waiter(system);
+			if (waiter == NULL) {
+				do_alloc = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+				continue;
+			}
+		} else {
+			waiter = _mali_osk_calloc(1, sizeof(struct mali_timeline_waiter));
+			if (waiter == NULL) break;
+		}
+		++i;
+		if (*tail == NULL) {
+			*tail = waiter;
+			*head = waiter;
+		} else {
+			(*head)->tracker_next = waiter;
+			*head = waiter;
+		}
+	}
+	if (do_alloc == MALI_TRUE) {
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+	}
+}
+
+/**
+ * Create waiters for the given tracker. The tracker is activated when all waiters are release.
+ *
+ * @note Tracker can potentially be activated before this function returns.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker we will create waiters for.
+ * @param waiter_tail List of pre-allocated waiters.
+ * @param waiter_head List of pre-allocated waiters.
+ */
+static void mali_timeline_system_create_waiters_and_unlock(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		struct mali_timeline_waiter *waiter_tail,
+		struct mali_timeline_waiter *waiter_head)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *sync_fence = NULL;
+#endif /* defined(CONFIG_SYNC) */
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	MALI_DEBUG_ASSERT(tracker->waiter_head == NULL);
+	MALI_DEBUG_ASSERT(tracker->waiter_tail == NULL);
+	MALI_DEBUG_ASSERT(tracker->job != NULL);
+
+	/* Creating waiter object for all the timelines the fence is put on. Inserting this waiter
+	 * into the timelines sorted list of waiters */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		mali_timeline_point point;
+		struct mali_timeline *timeline;
+		struct mali_timeline_waiter *waiter;
+
+		/* Get point on current timeline from tracker's fence. */
+		point = tracker->fence.points[i];
+
+		if (likely(point == MALI_TIMELINE_NO_POINT)) {
+			/* Fence contains no point on this timeline so we don't need a waiter. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n",
+					  point, timeline->point_oldest, timeline->point_next));
+			continue;
+		}
+
+		if (likely(mali_timeline_is_point_released(timeline, point))) {
+			/* Tracker representing the point has been released so we don't need a
+			 * waiter. */
+			continue;
+		}
+
+		/* The point is on timeline. */
+		MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, point));
+
+		/* Get a new zeroed waiter object. */
+		if (likely(waiter_tail != NULL)) {
+			waiter = waiter_tail;
+			waiter_tail = waiter_tail->tracker_next;
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			continue;
+		}
+
+		/* Yanking the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = point;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (tracker->waiter_head == NULL) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(tracker->waiter_tail == NULL);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Add waiter to timeline. */
+		mali_timeline_insert_waiter(timeline, waiter);
+	}
+#if defined(CONFIG_SYNC)
+	if (-1 != tracker->fence.sync_fd) {
+		int ret;
+		struct mali_timeline_waiter *waiter;
+
+		sync_fence = sync_fence_fdget(tracker->fence.sync_fd);
+		if (unlikely(sync_fence == NULL)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", tracker->fence.sync_fd));
+			goto exit;
+		}
+
+		/* Check if we have a zeroed waiter object available. */
+		if (unlikely(waiter_tail == NULL)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			goto exit;
+		}
+
+		/* Start asynchronous wait that will release waiter when the fence is signaled. */
+		sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback);
+		ret = sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter);
+		if (ret == 1) {
+			/* Fence already signaled, no waiter needed. */
+			tracker->fence.sync_fd = -1;
+			goto exit;
+		} else if (ret != 0) {
+			MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, ret));
+			tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+			goto exit;
+		}
+
+		/* Grab new zeroed waiter object. */
+		waiter = waiter_tail;
+		waiter_tail = waiter_tail->tracker_next;
+
+		/* Increase the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = MALI_TIMELINE_NO_POINT;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (tracker->waiter_head == NULL) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(tracker->waiter_tail == NULL);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Also store waiter in separate field for easy access by sync callback. */
+		tracker->waiter_sync = waiter;
+
+		/* Store the sync fence in tracker so we can retrieve in abort session, if needed. */
+		tracker->sync_fence = sync_fence;
+
+		sync_fence = NULL;
+	}
+#endif /* defined(CONFIG_SYNC)*/
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	if ((NULL != tracker->timeline) && (tracker->timeline->id == MALI_TIMELINE_PP)) {
+
+		struct mali_pp_job *job = (struct mali_pp_job *)tracker->job;
+
+		if (job->dma_fence_context.num_dma_fence_waiter > 0) {
+			struct mali_timeline_waiter *waiter;
+			/* Check if we have a zeroed waiter object available. */
+			if (unlikely(waiter_tail == NULL)) {
+				MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+				goto exit;
+			}
+
+			/* Grab new zeroed waiter object. */
+			waiter = waiter_tail;
+			waiter_tail = waiter_tail->tracker_next;
+
+			/* Increase the trigger ref count of the tracker. */
+			tracker->trigger_ref_count++;
+
+			waiter->point   = MALI_TIMELINE_NO_POINT;
+			waiter->tracker = tracker;
+
+			/* Insert waiter on tracker's singly-linked waiter list. */
+			if (tracker->waiter_head == NULL) {
+				/* list is empty */
+				MALI_DEBUG_ASSERT(tracker->waiter_tail == NULL);
+				tracker->waiter_tail = waiter;
+			} else {
+				tracker->waiter_head->tracker_next = waiter;
+			}
+			tracker->waiter_head = waiter;
+
+			/* Also store waiter in separate field for easy access by sync callback. */
+			tracker->waiter_dma_fence = waiter;
+		}
+	}
+#endif /* defined(CONFIG_MALI_DMA_BUF_FENCE)*/
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE) || defined(CONFIG_SYNC)
+exit:
+#endif /* defined(CONFIG_MALI_DMA_BUF_FENCE) || defined(CONFIG_SYNC) */
+
+	if (waiter_tail != NULL) {
+		mali_timeline_system_release_waiter_list(system, waiter_tail, waiter_head);
+	}
+
+	/* Release the initial trigger ref count. */
+	tracker->trigger_ref_count--;
+
+	/* If there were no waiters added to this tracker we activate immediately. */
+	if (tracker->trigger_ref_count == 0) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC)
+	if (sync_fence != NULL) {
+		sync_fence_put(sync_fence);
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id)
+{
+	int num_waiters = 0;
+	struct mali_timeline_waiter *waiter_tail, *waiter_head;
+	u32 tid = _mali_osk_get_tid();
+
+	mali_timeline_point point = MALI_TIMELINE_NO_POINT;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(system->session->is_aborting == MALI_FALSE);
+	MALI_DEBUG_ASSERT(tracker->type < MALI_TIMELINE_TRACKER_MAX);
+	MALI_DEBUG_ASSERT(tracker->magic == MALI_TIMELINE_TRACKER_MAGIC);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: adding tracker for job %p, timeline: %d\n", tracker->job, timeline_id));
+
+	MALI_DEBUG_ASSERT(tracker->trigger_ref_count > 0);
+	tracker->system = system;
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	num_waiters = mali_timeline_fence_num_waiters(&tracker->fence);
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	if (timeline_id == MALI_TIMELINE_PP) {
+		struct mali_pp_job *job = (struct mali_pp_job *)tracker->job;
+
+		if (job->dma_fence_context.num_dma_fence_waiter > 0)
+			num_waiters++;
+	}
+#endif
+
+	/* Allocate waiters. */
+	mali_timeline_system_allocate_waiters(system, &waiter_tail, &waiter_head, num_waiters);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Add tracker to timeline.  This will allocate a point for the tracker on the timeline. If
+	 * timeline ID is MALI_TIMELINE_NONE the tracker will NOT be added to a timeline and the
+	 * point will be MALI_TIMELINE_NO_POINT.
+	 *
+	 * NOTE: the tracker can fail to be added if the timeline is full.  If this happens, the
+	 * point will be MALI_TIMELINE_NO_POINT. */
+	MALI_DEBUG_ASSERT(timeline_id < MALI_TIMELINE_MAX || timeline_id == MALI_TIMELINE_NONE);
+	if (likely(timeline_id < MALI_TIMELINE_MAX)) {
+		struct mali_timeline *timeline = system->timelines[timeline_id];
+
+		mali_timeline_insert_tracker(timeline, tracker);
+		MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	}
+
+	point = tracker->point;
+
+	/* Create waiters for tracker based on supplied fence.  Each waiter will increase the
+	 * trigger ref count. */
+	mali_timeline_system_create_waiters_and_unlock(system, tracker, waiter_tail, waiter_head);
+	tracker = NULL;
+
+	/* At this point the tracker object might have been freed so we should no longer
+	 * access it. */
+
+
+	/* The tracker will always be activated after calling add_tracker, even if NO_POINT is
+	 * returned. */
+	return point;
+}
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter)
+{
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker = waiter->tracker;
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	/* At this point the waiter has been removed from the timeline's waiter list, but it is
+	 * still on the tracker's waiter list.  All of the tracker's waiters will be released when
+	 * the tracker is activated. */
+
+	waiter->point   = MALI_TIMELINE_NO_POINT;
+	waiter->tracker = NULL;
+
+	tracker->trigger_ref_count--;
+	if (tracker->trigger_ref_count == 0) {
+		/* This was the last waiter; activate tracker */
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	return schedule_mask;
+}
+
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id)
+{
+	mali_timeline_point point;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	if (timeline_id >= MALI_TIMELINE_MAX) {
+		return MALI_TIMELINE_NO_POINT;
+	}
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	timeline = system->timelines[timeline_id];
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	point = MALI_TIMELINE_NO_POINT;
+	if (timeline->point_oldest != timeline->point_next) {
+		point = timeline->point_next - 1;
+		if (point == MALI_TIMELINE_NO_POINT) point--;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return point;
+}
+
+void mali_timeline_initialize(void)
+{
+	_mali_osk_atomic_init(&gp_tracker_count, 0);
+	_mali_osk_atomic_init(&phy_pp_tracker_count, 0);
+	_mali_osk_atomic_init(&virt_pp_tracker_count, 0);
+}
+
+void mali_timeline_terminate(void)
+{
+	_mali_osk_atomic_term(&gp_tracker_count);
+	_mali_osk_atomic_term(&phy_pp_tracker_count);
+	_mali_osk_atomic_term(&virt_pp_tracker_count);
+}
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+static mali_bool is_waiting_on_timeline(struct mali_timeline_tracker *tracker, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT_POINTER(tracker->timeline);
+	timeline = tracker->timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline->system);
+	system = timeline->system;
+
+	if (id < MALI_TIMELINE_MAX) {
+		if (tracker->fence.points[id] != MALI_TIMELINE_NO_POINT) {
+			return mali_timeline_is_point_on(system->timelines[id], tracker->fence.points[id]);
+		} else {
+			return MALI_FALSE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(id == MALI_TIMELINE_NONE);
+		return MALI_FALSE;
+	}
+}
+
+static const char *timeline_id_to_string(enum mali_timeline_id id)
+{
+	switch (id) {
+	case MALI_TIMELINE_GP:
+		return "GP";
+	case MALI_TIMELINE_PP:
+		return "PP";
+	case MALI_TIMELINE_SOFT:
+		return "SOFT";
+	default:
+		return "NONE";
+	}
+}
+
+static const char *timeline_tracker_type_to_string(enum mali_timeline_tracker_type type)
+{
+	switch (type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		return "GP";
+	case MALI_TIMELINE_TRACKER_PP:
+		return "PP";
+	case MALI_TIMELINE_TRACKER_SOFT:
+		return "SOFT";
+	case MALI_TIMELINE_TRACKER_WAIT:
+		return "WAIT";
+	case MALI_TIMELINE_TRACKER_SYNC:
+		return "SYNC";
+	default:
+		return "INVALID";
+	}
+}
+
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	timeline = tracker->timeline;
+
+	if (tracker->trigger_ref_count != 0) {
+		return MALI_TIMELINE_TS_WAITING;
+	}
+
+	if (timeline && (timeline->tracker_tail == tracker || NULL != tracker->timeline_prev)) {
+		return MALI_TIMELINE_TS_ACTIVE;
+	}
+
+	if (timeline && (tracker->point == MALI_TIMELINE_NO_POINT)) {
+		return MALI_TIMELINE_TS_INIT;
+	}
+
+	return MALI_TIMELINE_TS_FINISH;
+}
+
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC)
+	if (tracker->trigger_ref_count != 0) {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+				    tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job));
+	} else {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char,
+				    tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job));
+	}
+#else
+	if (tracker->trigger_ref_count != 0) {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+				    (unsigned int)(uintptr_t)(tracker->job));
+	} else {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char,
+				    (unsigned int)(uintptr_t)(tracker->job));
+	}
+#endif
+}
+
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (tracker != NULL) {
+		mali_timeline_debug_print_tracker(tracker, print_ctx);
+		tracker = tracker->timeline_next;
+	}
+}
+
+#if !(KERNEL_VERSION(3, 17, 0) > LINUX_VERSION_CODE)
+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC)
+	if (tracker->trigger_ref_count != 0) {
+		MALI_PRINT(("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+			    tracker->fence.sync_fd, tracker->sync_fence, tracker->job));
+	} else {
+		MALI_PRINT(("TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char,
+			    tracker->fence.sync_fd, tracker->sync_fence, tracker->job));
+	}
+#else
+	if (tracker->trigger_ref_count != 0) {
+		MALI_PRINT(("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+			    tracker->job));
+	} else {
+		MALI_PRINT(("TL:  %s %u %c  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char,
+			    tracker->job));
+	}
+#endif
+}
+
+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (tracker != NULL) {
+		mali_timeline_debug_direct_print_tracker(tracker);
+		tracker = tracker->timeline_next;
+	}
+}
+
+#endif
+
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx)
+{
+	int i;
+	int num_printed = 0;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Print all timelines */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (timeline->tracker_head == NULL) continue;
+
+		_mali_osk_ctxprintf(print_ctx, "TL: Timeline %s:\n",
+				    timeline_id_to_string((enum mali_timeline_id)i));
+
+		mali_timeline_debug_print_timeline(timeline, print_ctx);
+		num_printed++;
+	}
+
+	if (num_printed == 0) {
+		_mali_osk_ctxprintf(print_ctx, "TL: All timelines empty\n");
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+void mali_timeline_dma_fence_callback(void *pp_job_ptr)
+{
+	struct mali_timeline_system  *system;
+	struct mali_timeline_waiter  *waiter;
+	struct mali_timeline_tracker *tracker;
+	struct mali_pp_job *pp_job = (struct mali_pp_job *)pp_job_ptr;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool is_aborting = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_POINTER(pp_job);
+
+	tracker = &pp_job->tracker;
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	waiter = tracker->waiter_dma_fence;
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	schedule_mask |= mali_timeline_system_release_waiter(system, waiter);
+
+	is_aborting = system->session->is_aborting;
+
+	/* If aborting, wake up sleepers that are waiting for dma fence callbacks to complete. */
+	if (is_aborting) {
+		_mali_osk_wait_queue_wake_up(system->wait_queue);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	if (!is_aborting) {
+		mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE);
+	}
+}
+#endif
diff --git a/utgard/r7p0/common/mali_timeline.h b/utgard/r7p0/common/mali_timeline.h
new file mode 100644
index 0000000..126e134
--- /dev/null
+++ b/utgard/r7p0/common/mali_timeline.h
@@ -0,0 +1,548 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMELINE_H__
+#define __MALI_TIMELINE_H__
+
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+#include "mali_sync.h"
+#include "mali_scheduler_types.h"
+#include <linux/version.h>
+
+/**
+ * Soft job timeout.
+ *
+ * Soft jobs have to be signaled as complete after activation.  Normally this is done by user space,
+ * but in order to guarantee that every soft job is completed, we also have a timer.
+ */
+#define MALI_TIMELINE_TIMEOUT_HZ ((unsigned long) (HZ * 3 / 2)) /* 1500 ms. */
+
+/**
+ * Timeline type.
+ */
+typedef enum mali_timeline_id {
+	MALI_TIMELINE_GP   = MALI_UK_TIMELINE_GP,   /**< GP job timeline. */
+	MALI_TIMELINE_PP   = MALI_UK_TIMELINE_PP,   /**< PP job timeline. */
+	MALI_TIMELINE_SOFT = MALI_UK_TIMELINE_SOFT, /**< Soft job timeline. */
+	MALI_TIMELINE_MAX  = MALI_UK_TIMELINE_MAX
+} mali_timeline_id;
+
+/**
+ * Used by trackers that should not be added to a timeline (@ref mali_timeline_system_add_tracker).
+ */
+#define MALI_TIMELINE_NONE MALI_TIMELINE_MAX
+
+/**
+ * Tracker type.
+ */
+typedef enum mali_timeline_tracker_type {
+	MALI_TIMELINE_TRACKER_GP   = 0, /**< Tracker used by GP jobs. */
+	MALI_TIMELINE_TRACKER_PP   = 1, /**< Tracker used by PP jobs. */
+	MALI_TIMELINE_TRACKER_SOFT = 2, /**< Tracker used by soft jobs. */
+	MALI_TIMELINE_TRACKER_WAIT = 3, /**< Tracker used for fence wait. */
+	MALI_TIMELINE_TRACKER_SYNC = 4, /**< Tracker used for sync fence. */
+	MALI_TIMELINE_TRACKER_MAX  = 5,
+} mali_timeline_tracker_type;
+
+/**
+ * Tracker activation error.
+ */
+typedef u32 mali_timeline_activation_error;
+#define MALI_TIMELINE_ACTIVATION_ERROR_NONE      0
+#define MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT  (1<<1)
+#define MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT (1<<0)
+
+/**
+ * Type used to represent a point on a timeline.
+ */
+typedef u32 mali_timeline_point;
+
+/**
+ * Used to represent that no point on a timeline.
+ */
+#define MALI_TIMELINE_NO_POINT ((mali_timeline_point) 0)
+
+/**
+ * The maximum span of points on a timeline.  A timeline will be considered full if the difference
+ * between the oldest and newest points is equal or larger to this value.
+ */
+#define MALI_TIMELINE_MAX_POINT_SPAN 65536
+
+/**
+ * Magic value used to assert on validity of trackers.
+ */
+#define MALI_TIMELINE_TRACKER_MAGIC 0xabcdabcd
+
+struct mali_timeline;
+struct mali_timeline_waiter;
+struct mali_timeline_tracker;
+
+/**
+ * Timeline fence.
+ */
+struct mali_timeline_fence {
+	mali_timeline_point points[MALI_TIMELINE_MAX]; /**< For each timeline, a point or MALI_TIMELINE_NO_POINT. */
+	s32                 sync_fd;                   /**< A file descriptor representing a sync fence, or -1. */
+};
+
+/**
+ * Timeline system.
+ *
+ * The Timeline system has a set of timelines associated with a session.
+ */
+struct mali_timeline_system {
+	struct mali_spinlock_reentrant *spinlock;   /**< Spin lock protecting the timeline system */
+	struct mali_timeline           *timelines[MALI_TIMELINE_MAX]; /**< The timelines in this system */
+
+	/* Single-linked list of unused waiter objects.  Uses the tracker_next field in tracker. */
+	struct mali_timeline_waiter    *waiter_empty_list;
+
+	struct mali_session_data       *session;    /**< Session that owns this system. */
+
+	mali_bool                       timer_enabled; /**< Set to MALI_TRUE if soft job timer should be enabled, MALI_FALSE if not. */
+
+	_mali_osk_wait_queue_t         *wait_queue; /**< Wait queue. */
+
+#if defined(CONFIG_SYNC)
+	struct sync_timeline           *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */
+#endif /* defined(CONFIG_SYNC) */
+};
+
+/**
+ * Timeline.  Each Timeline system will have MALI_TIMELINE_MAX timelines.
+ */
+struct mali_timeline {
+	mali_timeline_point           point_next;   /**< The next available point. */
+	mali_timeline_point           point_oldest; /**< The oldest point not released. */
+
+	/* Double-linked list of trackers.  Sorted in ascending order by tracker->time_number with
+	 * tail pointing to the tracker with the oldest time. */
+	struct mali_timeline_tracker *tracker_head;
+	struct mali_timeline_tracker *tracker_tail;
+
+	/* Double-linked list of waiters.  Sorted in ascending order by waiter->time_number_wait
+	 * with tail pointing to the waiter with oldest wait time. */
+	struct mali_timeline_waiter  *waiter_head;
+	struct mali_timeline_waiter  *waiter_tail;
+
+	struct mali_timeline_system  *system;       /**< Timeline system this timeline belongs to. */
+	enum mali_timeline_id         id;           /**< Timeline type. */
+
+#if defined(CONFIG_SYNC)
+	struct sync_timeline         *sync_tl;      /**< Sync timeline that corresponds to this timeline. */
+	mali_bool destroyed;
+	struct mali_spinlock_reentrant *spinlock;       /**< Spin lock protecting the timeline system */
+#endif /* defined(CONFIG_SYNC) */
+
+	/* The following fields are used to time out soft job trackers. */
+	_mali_osk_wq_delayed_work_t  *delayed_work;
+	mali_bool                     timer_active;
+};
+
+/**
+ * Timeline waiter.
+ */
+struct mali_timeline_waiter {
+	mali_timeline_point           point;         /**< Point on timeline we are waiting for to be released. */
+	struct mali_timeline_tracker *tracker;       /**< Tracker that is waiting. */
+
+	struct mali_timeline_waiter  *timeline_next; /**< Next waiter on timeline's waiter list. */
+	struct mali_timeline_waiter  *timeline_prev; /**< Previous waiter on timeline's waiter list. */
+
+	struct mali_timeline_waiter  *tracker_next;  /**< Next waiter on tracker's waiter list. */
+};
+
+/**
+ * Timeline tracker.
+ */
+struct mali_timeline_tracker {
+	MALI_DEBUG_CODE(u32            magic); /**< Should always be MALI_TIMELINE_TRACKER_MAGIC for a valid tracker. */
+
+	mali_timeline_point            point; /**< Point on timeline for this tracker */
+
+	struct mali_timeline_tracker  *timeline_next; /**< Next tracker on timeline's tracker list */
+	struct mali_timeline_tracker  *timeline_prev; /**< Previous tracker on timeline's tracker list */
+
+	u32                            trigger_ref_count; /**< When zero tracker will be activated */
+	mali_timeline_activation_error activation_error;  /**< Activation error. */
+	struct mali_timeline_fence     fence;             /**< Fence used to create this tracker */
+
+	/* Single-linked list of waiters.  Sorted in order of insertions with
+	 * tail pointing to first waiter. */
+	struct mali_timeline_waiter   *waiter_head;
+	struct mali_timeline_waiter   *waiter_tail;
+
+#if defined(CONFIG_SYNC)
+	/* These are only used if the tracker is waiting on a sync fence. */
+	struct mali_timeline_waiter   *waiter_sync; /**< A direct pointer to timeline waiter representing sync fence. */
+	struct sync_fence_waiter       sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */
+	struct sync_fence             *sync_fence;   /**< The sync fence this tracker is waiting on. */
+	_mali_osk_list_t               sync_fence_cancel_list; /**< List node used to cancel sync fence waiters. */
+#endif /* defined(CONFIG_SYNC) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+	struct mali_timeline_waiter   *waiter_dma_fence; /**< A direct pointer to timeline waiter representing dma fence. */
+#endif
+
+	struct mali_timeline_system   *system;       /**< Timeline system. */
+	struct mali_timeline          *timeline;     /**< Timeline, or NULL if not on a timeline. */
+	enum mali_timeline_tracker_type type;        /**< Type of tracker. */
+	void                          *job;          /**< Owner of tracker. */
+
+	/* The following fields are used to time out soft job trackers. */
+	unsigned long                 os_tick_create;
+	unsigned long                 os_tick_activate;
+	mali_bool                     timer_active;
+};
+
+extern _mali_osk_atomic_t gp_tracker_count;
+extern _mali_osk_atomic_t phy_pp_tracker_count;
+extern _mali_osk_atomic_t virt_pp_tracker_count;
+
+/**
+ * What follows is a set of functions to check the state of a timeline and to determine where on a
+ * timeline a given point is.  Most of these checks will translate the timeline so the oldest point
+ * on the timeline is aligned with zero.  Remember that all of these calculation are done on
+ * unsigned integers.
+ *
+ * The following example illustrates the three different states a point can be in.  The timeline has
+ * been translated to put the oldest point at zero:
+ *
+ *
+ *
+ *                               [ point is in forbidden zone ]
+ *                                          64k wide
+ *                                MALI_TIMELINE_MAX_POINT_SPAN
+ *
+ *    [ point is on timeline     )                            ( point is released ]
+ *
+ *    0--------------------------##############################--------------------2^32 - 1
+ *    ^                          ^
+ *    \                          |
+ *     oldest point on timeline  |
+ *                               \
+ *                                next point on timeline
+ */
+
+/**
+ * Compare two timeline points
+ *
+ * Returns true if a is after b, false if a is before or equal to b.
+ *
+ * This funcion ignores MALI_TIMELINE_MAX_POINT_SPAN. Wrapping is supported and
+ * the result will be correct if the points is less then UINT_MAX/2 apart.
+ *
+ * @param a Point on timeline
+ * @param b Point on timeline
+ * @return MALI_TRUE if a is after b
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_point_after(mali_timeline_point a, mali_timeline_point b)
+{
+	return 0 > ((s32)b) - ((s32)a);
+}
+
+/**
+ * Check if a point is on timeline.  A point is on a timeline if it is greater than, or equal to,
+ * the oldest point, and less than the next point.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is on timeline, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_on(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(point != MALI_TIMELINE_NO_POINT);
+
+	return (point - timeline->point_oldest) < (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Check if a point has been released.  A point is released if it is older than the oldest point on
+ * the timeline, newer than the next point, and also not in the forbidden zone.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point has been release, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_released(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	mali_timeline_point point_normalized;
+	mali_timeline_point next_normalized;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(point != MALI_TIMELINE_NO_POINT);
+
+	point_normalized = point - timeline->point_oldest;
+	next_normalized = timeline->point_next - timeline->point_oldest;
+
+	return point_normalized > (next_normalized + MALI_TIMELINE_MAX_POINT_SPAN);
+}
+
+/**
+ * Check if a point is valid.  A point is valid if is on the timeline or has been released.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is valid, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_valid(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return mali_timeline_is_point_on(timeline, point) || mali_timeline_is_point_released(timeline, point);
+}
+
+/**
+ * Check if timeline is empty (has no points on it).  A timeline is empty if next == oldest.
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is empty, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_empty(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return timeline->point_next == timeline->point_oldest;
+}
+
+/**
+ * Check if timeline is full.  A valid timeline cannot span more than 64k points (@ref
+ * MALI_TIMELINE_MAX_POINT_SPAN).
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is full, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_full(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return MALI_TIMELINE_MAX_POINT_SPAN <= (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Create a new timeline system.
+ *
+ * @param session The session this timeline system will belong to.
+ * @return New timeline system.
+ */
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session);
+
+/**
+ * Abort timeline system.
+ *
+ * This will release all pending waiters in the timeline system causing all trackers to be
+ * activated.
+ *
+ * @param system Timeline system to abort all jobs from.
+ */
+void mali_timeline_system_abort(struct mali_timeline_system *system);
+
+/**
+ * Destroy an empty timeline system.
+ *
+ * @note @ref mali_timeline_system_abort() should be called prior to this function.
+ *
+ * @param system Timeline system to destroy.
+ */
+void mali_timeline_system_destroy(struct mali_timeline_system *system);
+
+/**
+ * Stop the soft job timer.
+ *
+ * @param system Timeline system
+ */
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system);
+
+/**
+ * Add a tracker to a timeline system and optionally also on a timeline.
+ *
+ * Once added to the timeline system, the tracker is guaranteed to be activated.  The tracker can be
+ * activated before this function returns.  Thus, it is also possible that the tracker is released
+ * before this function returns, depending on the tracker type.
+ *
+ * @note Tracker must be initialized (@ref mali_timeline_tracker_init) before being added to the
+ * timeline system.
+ *
+ * @param system Timeline system the tracker will be added to.
+ * @param tracker The tracker to be added.
+ * @param timeline_id Id of the timeline the tracker will be added to, or
+ *                    MALI_TIMELINE_NONE if it should not be added on a timeline.
+ * @return Point on timeline identifying this tracker, or MALI_TIMELINE_NO_POINT if not on timeline.
+ */
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Get latest point on timeline.
+ *
+ * @param system Timeline system.
+ * @param timeline_id Id of timeline to get latest point from.
+ * @return Latest point on timeline, or MALI_TIMELINE_NO_POINT if the timeline is empty.
+ */
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Initialize tracker.
+ *
+ * Must be called before tracker is added to timeline system (@ref mali_timeline_system_add_tracker).
+ *
+ * @param tracker Tracker to initialize.
+ * @param type Type of tracker.
+ * @param fence Fence used to set up dependencies for tracker.
+ * @param job Pointer to job struct this tracker is associated with.
+ */
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job);
+
+/**
+ * Grab trigger ref count on tracker.
+ *
+ * This will prevent tracker from being activated until the trigger ref count reaches zero.
+ *
+ * @note Tracker must have been initialized (@ref mali_timeline_tracker_init).
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ */
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker);
+
+/**
+ * Release trigger ref count on tracker.
+ *
+ * If the trigger ref count reaches zero, the tracker will be activated.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ * @param activation_error Error bitmask if activated with error, or MALI_TIMELINE_ACTIVATION_ERROR_NONE if no error.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error);
+
+/**
+ * Release a tracker from the timeline system.
+ *
+ * This is used to signal that the job being tracker is finished, either due to normal circumstances
+ * (job complete/abort) or due to a timeout.
+ *
+ * We may need to schedule some subsystems after a tracker has been released and the returned
+ * bitmask will tell us if it is necessary.  If the return value is non-zero, this value needs to be
+ * sent as an input parameter to @ref mali_scheduler_schedule_from_mask() to do the scheduling.
+ *
+ * @note Tracker must have been activated before being released.
+ * @warning Not calling @ref mali_scheduler_schedule_from_mask() after releasing a tracker can lead
+ * to a deadlock.
+ *
+ * @param tracker Tracker being released.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_tracker_activation_error(
+	struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	return (MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT &
+		tracker->activation_error) ? MALI_TRUE : MALI_FALSE;
+}
+
+/**
+ * Copy data from a UK fence to a Timeline fence.
+ *
+ * @param fence Timeline fence.
+ * @param uk_fence UK fence.
+ */
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence);
+
+void mali_timeline_initialize(void);
+
+void mali_timeline_terminate(void);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_gp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&gp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_physical_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&phy_pp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_virtual_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&virt_pp_tracker_count);
+}
+
+#if defined(DEBUG)
+#define MALI_TIMELINE_DEBUG_FUNCTIONS
+#endif /* DEBUG */
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+/**
+ * Tracker state.  Used for debug printing.
+ */
+typedef enum mali_timeline_tracker_state {
+	MALI_TIMELINE_TS_INIT    = 0,
+	MALI_TIMELINE_TS_WAITING = 1,
+	MALI_TIMELINE_TS_ACTIVE  = 2,
+	MALI_TIMELINE_TS_FINISH  = 3,
+} mali_timeline_tracker_state;
+
+/**
+ * Get tracker state.
+ *
+ * @param tracker Tracker to check.
+ * @return State of tracker.
+ */
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker);
+
+/**
+ * Print debug information about tracker.
+ *
+ * @param tracker Tracker to print.
+ */
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx);
+
+/**
+ * Print debug information about timeline.
+ *
+ * @param timeline Timeline to print.
+ */
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx);
+
+#if !(KERNEL_VERSION(3, 17, 0) > LINUX_VERSION_CODE)
+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker);
+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline);
+#endif
+
+/**
+ * Print debug information about timeline system.
+ *
+ * @param system Timeline system to print.
+ */
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx);
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
+
+#if defined(CONFIG_MALI_DMA_BUF_FENCE)
+/**
+ * The timeline dma fence callback when dma fence signal.
+ *
+ * @param pp_job_ptr The pointer to pp job that link to the signaled dma fence.
+ */
+void mali_timeline_dma_fence_callback(void *pp_job_ptr);
+#endif
+
+#endif /* __MALI_TIMELINE_H__ */
diff --git a/utgard/r7p0/common/mali_timeline_fence_wait.c b/utgard/r7p0/common/mali_timeline_fence_wait.c
new file mode 100644
index 0000000..4aaea07
--- /dev/null
+++ b/utgard/r7p0/common/mali_timeline_fence_wait.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline_fence_wait.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+
+/**
+ * Allocate a fence waiter tracker.
+ *
+ * @return New fence waiter if successful, NULL if not.
+ */
+static struct mali_timeline_fence_wait_tracker *mali_timeline_fence_wait_tracker_alloc(void)
+{
+	return (struct mali_timeline_fence_wait_tracker *) _mali_osk_calloc(1, sizeof(struct mali_timeline_fence_wait_tracker));
+}
+
+/**
+ * Free fence waiter tracker.
+ *
+ * @param wait Fence wait tracker to free.
+ */
+static void mali_timeline_fence_wait_tracker_free(struct mali_timeline_fence_wait_tracker *wait)
+{
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	_mali_osk_atomic_term(&wait->refcount);
+	_mali_osk_free(wait);
+}
+
+/**
+ * Check if fence wait tracker has been activated.  Used as a wait queue condition.
+ *
+ * @param data Fence waiter.
+ * @return MALI_TRUE if tracker has been activated, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_tracker_is_activated(void *data)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+
+	wait = (struct mali_timeline_fence_wait_tracker *) data;
+	MALI_DEBUG_ASSERT_POINTER(wait);
+
+	return wait->activated;
+}
+
+/**
+ * Check if fence has been signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Timeline fence.
+ * @return MALI_TRUE if fence is signaled, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_check_status(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool ret = MALI_TRUE;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *sync_fence = NULL;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		mali_timeline_point   point;
+
+		point = fence->points[i];
+
+		if (likely(point == MALI_TIMELINE_NO_POINT)) {
+			/* Fence contains no point on this timeline. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n", point, timeline->point_oldest, timeline->point_next));
+		}
+
+		if (!mali_timeline_is_point_released(timeline, point)) {
+			ret = MALI_FALSE;
+			goto exit;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	if (-1 != fence->sync_fd) {
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+		if (likely(sync_fence != NULL)) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+			if (sync_fence->status == 0) {
+#else
+			if (atomic_read(&sync_fence->status) == 0) {
+#endif
+				ret = MALI_FALSE;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", fence->sync_fd));
+		}
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+exit:
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC)
+	if (sync_fence != NULL) {
+		sync_fence_put(sync_fence);
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	return ret;
+}
+
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+	mali_timeline_point point;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: wait on fence\n"));
+
+	if (timeout == MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY) {
+		return mali_timeline_fence_wait_check_status(system, fence);
+	}
+
+	wait = mali_timeline_fence_wait_tracker_alloc();
+	if (unlikely(wait == NULL)) {
+		MALI_PRINT_ERROR(("Mali Timeline: failed to allocate data for fence wait\n"));
+		return MALI_FALSE;
+	}
+
+	wait->activated = MALI_FALSE;
+	wait->system = system;
+
+	/* Initialize refcount to two references.  The reference first will be released by this
+	 * function after the wait is over.  The second reference will be released when the tracker
+	 * is activated. */
+	_mali_osk_atomic_init(&wait->refcount, 2);
+
+	/* Add tracker to timeline system, but not to a timeline. */
+	mali_timeline_tracker_init(&wait->tracker, MALI_TIMELINE_TRACKER_WAIT, fence, wait);
+	point = mali_timeline_system_add_tracker(system, &wait->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(point == MALI_TIMELINE_NO_POINT);
+	MALI_IGNORE(point);
+
+	/* Wait for the tracker to be activated or time out. */
+	if (timeout == MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER) {
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait);
+	} else {
+		_mali_osk_wait_queue_wait_event_timeout(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait, timeout);
+	}
+
+	ret = wait->activated;
+
+	if (_mali_osk_atomic_dec_return(&wait->refcount) == 0) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+
+	return ret;
+}
+
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *wait)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	MALI_DEBUG_ASSERT_POINTER(wait->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for fence wait tracker\n"));
+
+	MALI_DEBUG_ASSERT(wait->activated == MALI_FALSE);
+	wait->activated = MALI_TRUE;
+
+	_mali_osk_wait_queue_wake_up(wait->system->wait_queue);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&wait->tracker);
+	MALI_DEBUG_ASSERT(schedule_mask == MALI_SCHEDULER_MASK_EMPTY);
+	MALI_IGNORE(schedule_mask);
+
+	if (_mali_osk_atomic_dec_return(&wait->refcount) == 0) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+}
diff --git a/utgard/r7p0/common/mali_timeline_fence_wait.h b/utgard/r7p0/common/mali_timeline_fence_wait.h
new file mode 100644
index 0000000..d56cfcb
--- /dev/null
+++ b/utgard/r7p0/common/mali_timeline_fence_wait.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_fence_wait.h
+ *
+ * This file contains functions used to wait until a Timeline fence is signaled.
+ */
+
+#ifndef __MALI_TIMELINE_FENCE_WAIT_H__
+#define __MALI_TIMELINE_FENCE_WAIT_H__
+
+#include "mali_osk.h"
+#include "mali_timeline.h"
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, a timer is not used and the
+ * function only returns when the fence is signaled.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER ((u32) -1)
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, the function will return
+ * immediately with the current state of the fence.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY 0
+
+/**
+ * Fence wait tracker.
+ *
+ * The fence wait tracker is added to the Timeline system with the fence we are waiting on as a
+ * dependency.  We will then perform a blocking wait, possibly with a timeout, until the tracker is
+ * activated, which happens when the fence is signaled.
+ */
+struct mali_timeline_fence_wait_tracker {
+	mali_bool activated;                  /**< MALI_TRUE if the tracker has been activated, MALI_FALSE if not. */
+	_mali_osk_atomic_t refcount;          /**< Reference count. */
+	struct mali_timeline_system *system;  /**< Timeline system. */
+	struct mali_timeline_tracker tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Wait for a fence to be signaled, or timeout is reached.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to wait on.
+ * @param timeout Timeout in ms, or MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER or
+ * MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY.
+ * @return MALI_TRUE if signaled, MALI_FALSE if timed out.
+ */
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout);
+
+/**
+ * Used by the Timeline system to activate a fence wait tracker.
+ *
+ * @param fence_wait_tracker Fence waiter tracker.
+ */
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *fence_wait_tracker);
+
+#endif /* __MALI_TIMELINE_FENCE_WAIT_H__ */
diff --git a/utgard/r7p0/common/mali_timeline_sync_fence.c b/utgard/r7p0/common/mali_timeline_sync_fence.c
new file mode 100644
index 0000000..0bca5c1
--- /dev/null
+++ b/utgard/r7p0/common/mali_timeline_sync_fence.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline_sync_fence.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_sync.h"
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Creates a sync fence tracker and a sync fence.  Adds sync fence tracker to Timeline system and
+ * returns sync fence.  The sync fence will be signaled when the sync fence tracker is activated.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return Sync fence that will be signaled when tracker is activated.
+ */
+static struct sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	struct mali_timeline_sync_fence_tracker *sync_fence_tracker;
+	struct sync_fence                       *sync_fence;
+	struct mali_timeline_fence               fence;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(point != MALI_TIMELINE_NO_POINT);
+
+	/* Allocate sync fence tracker. */
+	sync_fence_tracker = _mali_osk_calloc(1, sizeof(struct mali_timeline_sync_fence_tracker));
+	if (sync_fence_tracker == NULL) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence_tracker allocation failed\n"));
+		return NULL;
+	}
+
+	/* Create sync flag. */
+	MALI_DEBUG_ASSERT_POINTER(timeline->sync_tl);
+	sync_fence_tracker->flag = mali_sync_flag_create(timeline->sync_tl, point);
+	if (sync_fence_tracker->flag == NULL) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_flag creation failed\n"));
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Create sync fence from sync flag. */
+	sync_fence = mali_sync_flag_create_fence(sync_fence_tracker->flag);
+	if (sync_fence == NULL) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence creation failed\n"));
+		mali_sync_flag_put(sync_fence_tracker->flag);
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Setup fence for tracker. */
+	_mali_osk_memset(&fence, 0, sizeof(struct mali_timeline_fence));
+	fence.sync_fd = -1;
+	fence.points[timeline->id] = point;
+
+	/* Finally, add the tracker to Timeline system. */
+	mali_timeline_tracker_init(&sync_fence_tracker->tracker, MALI_TIMELINE_TRACKER_SYNC, &fence, sync_fence_tracker);
+	point = mali_timeline_system_add_tracker(timeline->system, &sync_fence_tracker->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(point == MALI_TIMELINE_NO_POINT);
+
+	return sync_fence;
+}
+
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	u32 i;
+	struct sync_fence *sync_fence_acc = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		struct sync_fence *sync_fence;
+
+		if (fence->points[i] == MALI_TIMELINE_NO_POINT) continue;
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		sync_fence = mali_timeline_sync_fence_create_and_add_tracker(timeline, fence->points[i]);
+		if (sync_fence == NULL) goto error;
+
+		if (sync_fence_acc != NULL) {
+			/* Merge sync fences. */
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (sync_fence_acc == NULL) goto error;
+		} else {
+			/* This was the first sync fence created. */
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (-1 != fence->sync_fd) {
+		struct sync_fence *sync_fence;
+
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+		if (sync_fence == NULL) goto error;
+
+		if (sync_fence_acc != NULL) {
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (sync_fence_acc == NULL) goto error;
+		} else {
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (sync_fence_acc == NULL) {
+		MALI_DEBUG_ASSERT_POINTER(system->signaled_sync_tl);
+
+		/* There was nothing to wait on, so return an already signaled fence. */
+
+		sync_fence_acc = mali_sync_timeline_create_signaled_fence(system->signaled_sync_tl);
+		if (sync_fence_acc == NULL) goto error;
+	}
+
+	/* Return file descriptor for the accumulated sync fence. */
+	return mali_sync_fence_fd_alloc(sync_fence_acc);
+
+error:
+	if (sync_fence_acc != NULL) {
+		sync_fence_put(sync_fence_acc);
+	}
+
+	return -1;
+}
+
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker->flag);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for sync fence tracker\n"));
+
+	/* Signal flag and release reference. */
+	mali_sync_flag_signal(sync_fence_tracker->flag, 0);
+	mali_sync_flag_put(sync_fence_tracker->flag);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&sync_fence_tracker->tracker);
+	MALI_DEBUG_ASSERT(schedule_mask == MALI_SCHEDULER_MASK_EMPTY);
+
+	_mali_osk_free(sync_fence_tracker);
+}
+
+#endif /* defined(CONFIG_SYNC) */
diff --git a/utgard/r7p0/common/mali_timeline_sync_fence.h b/utgard/r7p0/common/mali_timeline_sync_fence.h
new file mode 100644
index 0000000..93aa120
--- /dev/null
+++ b/utgard/r7p0/common/mali_timeline_sync_fence.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_sync_fence.h
+ *
+ * This file contains code related to creating sync fences from timeline fences.
+ */
+
+#ifndef __MALI_TIMELINE_SYNC_FENCE_H__
+#define __MALI_TIMELINE_SYNC_FENCE_H__
+
+#include "mali_timeline.h"
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Sync fence tracker.
+ */
+struct mali_timeline_sync_fence_tracker {
+	struct mali_sync_flag        *flag;    /**< Sync flag used to connect tracker and sync fence. */
+	struct mali_timeline_tracker  tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Create a sync fence that will be signaled when @ref fence is signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to create sync fence from.
+ * @return File descriptor for new sync fence, or -1 on error.
+ */
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence);
+
+/**
+ * Used by the Timeline system to activate a sync fence tracker.
+ *
+ * @param sync_fence_tracker Sync fence tracker.
+ *
+ */
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker);
+
+#endif /* defined(CONFIG_SYNC) */
+
+#endif /* __MALI_TIMELINE_SYNC_FENCE_H__ */
diff --git a/utgard/r7p0/common/mali_ukk.h b/utgard/r7p0/common/mali_ukk.h
new file mode 100644
index 0000000..f29d45d
--- /dev/null
+++ b/utgard/r7p0/common/mali_ukk.h
@@ -0,0 +1,551 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk.h
+ * Defines the kernel-side interface of the user-kernel interface
+ */
+
+#ifndef __MALI_UKK_H__
+#define __MALI_UKK_H__
+
+#include "mali_osk.h"
+#include "mali_uk_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * - The _mali_uk functions are an abstraction of the interface to the device
+ * driver. On certain OSs, this would be implemented via the IOCTL interface.
+ * On other OSs, it could be via extension of some Device Driver Class, or
+ * direct function call for Bare metal/RTOSs.
+ * - It is important to note that:
+ *   -  The Device Driver has implemented the _mali_ukk set of functions
+ *   -  The Base Driver calls the corresponding set of _mali_uku functions.
+ * - What requires porting is solely the calling mechanism from User-side to
+ * Kernel-side, and propagating back the results.
+ * - Each U/K function is associated with a (group, number) pair from
+ * \ref _mali_uk_functions to make it possible for a common function in the
+ * Base Driver and Device Driver to route User/Kernel calls from/to the
+ * correct _mali_uk function. For example, in an IOCTL system, the IOCTL number
+ * would be formed based on the group and number assigned to the _mali_uk
+ * function, as listed in \ref _mali_uk_functions. On the user-side, each
+ * _mali_uku function would just make an IOCTL with the IOCTL-code being an
+ * encoded form of the (group, number) pair. On the kernel-side, the Device
+ * Driver's IOCTL handler decodes the IOCTL-code back into a (group, number)
+ * pair, and uses this to determine which corresponding _mali_ukk should be
+ * called.
+ *   - Refer to \ref _mali_uk_functions for more information about this
+ * (group, number) pairing.
+ * - In a system where there is no distinction between user and kernel-side,
+ * the U/K interface may be implemented as:@code
+ * MALI_STATIC_INLINE _mali_osk_errcode_t _mali_uku_examplefunction( _mali_uk_examplefunction_s *args )
+ * {
+ *     return mali_ukk_examplefunction( args );
+ * }
+ * @endcode
+ * - Therefore, all U/K calls behave \em as \em though they were direct
+ * function calls (but the \b implementation \em need \em not be a direct
+ * function calls)
+ *
+ * @note Naming the _mali_uk functions the same on both User and Kernel sides
+ * on non-RTOS systems causes debugging issues when setting breakpoints. In
+ * this case, it is not clear which function the breakpoint is put on.
+ * Therefore the _mali_uk functions in user space are prefixed with \c _mali_uku
+ * and in kernel space with \c _mali_ukk. The naming for the argument
+ * structures is unaffected.
+ *
+ * - The _mali_uk functions are synchronous.
+ * - Arguments to the _mali_uk functions are passed in a structure. The only
+ * parameter passed to the _mali_uk functions is a pointer to this structure.
+ * This first member of this structure, ctx, is a pointer to a context returned
+ * by _mali_uku_open(). For example:@code
+ * typedef struct
+ * {
+ *     void *ctx;
+ *     u32 number_of_cores;
+ * } _mali_uk_get_gp_number_of_cores_s;
+ * @endcode
+ *
+ * - Each _mali_uk function has its own argument structure named after the
+ *  function. The argument is distinguished by the _s suffix.
+ * - The argument types are defined by the base driver and user-kernel
+ *  interface.
+ * - All _mali_uk functions return a standard \ref _mali_osk_errcode_t.
+ * - Only arguments of type input or input/output need be initialized before
+ * calling a _mali_uk function.
+ * - Arguments of type output and input/output are only valid when the
+ * _mali_uk function returns \ref _MALI_OSK_ERR_OK.
+ * - The \c ctx member is always invalid after it has been used by a
+ * _mali_uk function, except for the context management functions
+ *
+ *
+ * \b Interface \b restrictions
+ *
+ * The requirements of the interface mean that an implementation of the
+ * User-kernel interface may do no 'real' work. For example, the following are
+ * illegal in the User-kernel implementation:
+ * - Calling functions necessary for operation on all systems,  which would
+ * not otherwise get called on RTOS systems.
+ *     - For example, a  U/K interface that calls multiple _mali_ukk functions
+ * during one particular U/K call. This could not be achieved by the same code
+ * which uses direct function calls for the U/K interface.
+ * -  Writing in values to the args members, when otherwise these members would
+ * not hold a useful value for a direct function call U/K interface.
+ *     - For example, U/K interface implementation that take NULL members in
+ * their arguments structure from the user side, but those members are
+ * replaced with non-NULL values in the kernel-side of the U/K interface
+ * implementation. A scratch area for writing data is one such example. In this
+ * case, a direct function call U/K interface would segfault, because no code
+ * would be present to replace the NULL pointer with a meaningful pointer.
+ *     - Note that we discourage the case where the U/K implementation changes
+ * a NULL argument member to non-NULL, and then the Device Driver code (outside
+ * of the U/K layer) re-checks this member for NULL, and corrects it when
+ * necessary. Whilst such code works even on direct function call U/K
+ * intefaces, it reduces the testing coverage of the Device Driver code. This
+ * is because we have no way of testing the NULL == value path on an OS
+ * implementation.
+ *
+ * A number of allowable examples exist where U/K interfaces do 'real' work:
+ * - The 'pointer switching' technique for \ref _mali_ukk_get_system_info
+ *     - In this case, without the pointer switching on direct function call
+ * U/K interface, the Device Driver code still sees the same thing: a pointer
+ * to which it can write memory. This is because such a system has no
+ * distinction between a user and kernel pointer.
+ * - Writing an OS-specific value into the ukk_private member for
+ * _mali_ukk_mem_mmap().
+ *     - In this case, this value is passed around by Device Driver code, but
+ * its actual value is never checked. Device Driver code simply passes it from
+ * the U/K layer to the OSK layer, where it can be acted upon. In this case,
+ * \em some OS implementations of the U/K (_mali_ukk_mem_mmap()) and OSK
+ * (_mali_osk_mem_mapregion_init()) functions will collaborate on the
+ *  meaning of ukk_private member. On other OSs, it may be unused by both
+ * U/K and OSK layers
+ *     - Therefore, on error inside the U/K interface implementation itself,
+ * it will be as though the _mali_ukk function itself had failed, and cleaned
+ * up after itself.
+ *     - Compare this to a direct function call U/K implementation, where all
+ * error cleanup is handled by the _mali_ukk function itself. The direct
+ * function call U/K interface implementation is automatically atomic.
+ *
+ * The last example highlights a consequence of all U/K interface
+ * implementations: they must be atomic with respect to the Device Driver code.
+ * And therefore, should Device Driver code succeed but the U/K implementation
+ * fail afterwards (but before return to user-space), then the U/K
+ * implementation must cause appropriate cleanup actions to preserve the
+ * atomicity of the interface.
+ *
+ * @{
+ */
+
+
+/** @defgroup _mali_uk_context U/K Context management
+ *
+ * These functions allow for initialisation of the user-kernel interface once per process.
+ *
+ * Generally the context will store the OS specific object to communicate with the kernel device driver and further
+ * state information required by the specific implementation. The context is shareable among all threads in the caller process.
+ *
+ * On IOCTL systems, this is likely to be a file descriptor as a result of opening the kernel device driver.
+ *
+ * On a bare-metal/RTOS system with no distinction between kernel and
+ * user-space, the U/K interface simply calls the _mali_ukk variant of the
+ * function by direct function call. In this case, the context returned is the
+ * mali_session_data from _mali_ukk_open().
+ *
+ * The kernel side implementations of the U/K interface expect the first member of the argument structure to
+ * be the context created by _mali_uku_open(). On some OS implementations, the meaning of this context
+ * will be different between user-side and kernel-side. In which case, the kernel-side will need to replace this context
+ * with the kernel-side equivalent, because user-side will not have access to kernel-side data. The context parameter
+ * in the argument structure therefore has to be of type input/output.
+ *
+ * It should be noted that the caller cannot reuse the \c ctx member of U/K
+ * argument structure after a U/K call, because it may be overwritten. Instead,
+ * the context handle must always be stored  elsewhere, and copied into
+ * the appropriate U/K argument structure for each user-side call to
+ * the U/K interface. This is not usually a problem, since U/K argument
+ * structures are usually placed on the stack.
+ *
+ * @{ */
+
+/** @brief Begin a new Mali Device Driver session
+ *
+ * This is used to obtain a per-process context handle for all future U/K calls.
+ *
+ * @param context pointer to storage to return a (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_open(void **context);
+
+/** @brief End a Mali Device Driver session
+ *
+ * This should be called when the process no longer requires use of the Mali Device Driver.
+ *
+ * The context handle must not be used after it has been closed.
+ *
+ * @param context pointer to a stored (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_close(void **context);
+
+/** @} */ /* end group _mali_uk_context */
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ *
+ * The core functions provide the following functionality:
+ * - verify that the user and kernel API are compatible
+ * - retrieve information about the cores and memory banks in the system
+ * - wait for the result of jobs started on a core
+ *
+ * @{ */
+
+/** @brief Waits for a job notification.
+ *
+ * Sleeps until notified or a timeout occurs. Returns information about the notification.
+ *
+ * @param args see _mali_uk_wait_for_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args);
+
+/** @brief Post a notification to the notification queue of this application.
+ *
+ * @param args see _mali_uk_post_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * This function is obsolete, but kept to allow old, incompatible user space
+ * clients to robustly detect the incompatibility.
+ *
+ * @param args see _mali_uk_get_api_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * @param args see _mali_uk_get_api_version_v2_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args);
+
+/** @brief Get the user space settings applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_settings_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args);
+
+/** @brief Get a user space setting applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_setting_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args);
+
+/* @brief Grant or deny high priority scheduling for this session.
+ *
+ * @param args see _mali_uk_request_high_priority_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args);
+
+/** @brief Make process sleep if the pending big job in kernel  >= MALI_MAX_PENDING_BIG_JOB
+ *
+ */
+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args);
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ *
+ * The memory functions provide functionality with and without a Mali-MMU present.
+ *
+ * For Mali-MMU based systems, the following functionality is provided:
+ * - Initialize and terminate MALI virtual address space
+ * - Allocate/deallocate physical memory to a MALI virtual address range and map into/unmap from the
+ * current process address space
+ * - Map/unmap external physical memory into the MALI virtual address range
+ *
+ * For Mali-nonMMU based systems:
+ * - Allocate/deallocate MALI memory
+ *
+ * @{ */
+
+/** @brief Map Mali Memory into the current user process
+ *
+ * Maps Mali memory into the current user process in a generic way.
+ *
+ * This function is to be used for Mali-MMU mode. The function is available in both Mali-MMU and Mali-nonMMU modes,
+ * but should not be called by a user process in Mali-nonMMU mode.
+ *
+ * The implementation and operation of _mali_ukk_mem_mmap() is dependant on whether the driver is built for Mali-MMU
+ * or Mali-nonMMU:
+ * - In the nonMMU case, _mali_ukk_mem_mmap() requires a physical address to be specified. For this reason, an OS U/K
+ * implementation should not allow this to be called from user-space. In any case, nonMMU implementations are
+ * inherently insecure, and so the overall impact is minimal. Mali-MMU mode should be used if security is desired.
+ * - In the MMU case, _mali_ukk_mem_mmap() the _mali_uk_mem_mmap_s::phys_addr
+ * member is used for the \em Mali-virtual address desired for the mapping. The
+ * implementation of _mali_ukk_mem_mmap() will allocate both the CPU-virtual
+ * and CPU-physical addresses, and can cope with mapping a contiguous virtual
+ * address range to a sequence of non-contiguous physical pages. In this case,
+ * the CPU-physical addresses are not communicated back to the user-side, as
+ * they are unnecsessary; the \em Mali-virtual address range must be used for
+ * programming Mali structures.
+ *
+ * In the second (MMU) case, _mali_ukk_mem_mmap() handles management of
+ * CPU-virtual and CPU-physical ranges, but the \em caller must manage the
+ * \em Mali-virtual address range from the user-side.
+ *
+ * @note Mali-virtual address ranges are entirely separate between processes.
+ * It is not possible for a process to accidentally corrupt another process'
+ * \em Mali-virtual address space.
+ *
+ * @param args see _mali_uk_mem_mmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_mmap(_mali_uk_mem_mmap_s *args);
+
+/** @brief Unmap Mali Memory from the current user process
+ *
+ * Unmaps Mali memory from the current user process in a generic way. This only operates on Mali memory supplied
+ * from _mali_ukk_mem_mmap().
+ *
+ * @param args see _mali_uk_mem_munmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_munmap(_mali_uk_mem_munmap_s *args);
+
+/** @brief Determine the buffer size necessary for an MMU page table dump.
+ * @param args see _mali_uk_query_mmu_page_table_dump_size_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args);
+/** @brief Dump MMU Page tables.
+ * @param args see _mali_uk_dump_mmu_page_table_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args);
+
+/** @brief Write user data to specified Mali memory without causing segfaults.
+ * @param args see _mali_uk_mem_write_safe_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args);
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ *
+ * The Fragment Processor (aka PP (Pixel Processor)) functions provide the following functionality:
+ * - retrieving version of the fragment processors
+ * - determine number of fragment processors
+ * - starting a job on a fragment processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Fragment Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started instead and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx, _mali_uk_pp_start_job_s *uargs);
+
+/**
+ * @brief Issue a request to start new jobs on both Vertex Processor and Fragment Processor.
+ *
+ * @note Will call into @ref _mali_ukk_pp_start_job and @ref _mali_ukk_gp_start_job.
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_and_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx, _mali_uk_pp_and_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Fragment Processors in the system
+ *
+ * @param args see _mali_uk_get_pp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Fragment Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_ukk_get_pp_number_of_cores() indicated at least one Fragment
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args);
+
+/** @brief Disable Write-back unit(s) on specified job
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ */
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args);
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ *
+ * The Vertex Processor (aka GP (Geometry Processor)) functions provide the following functionality:
+ * - retrieving version of the Vertex Processors
+ * - determine number of Vertex Processors available
+ * - starting a job on a Vertex Processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Vertex Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx, _mali_uk_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Vertex Processors in the system.
+ *
+ * @param args see _mali_uk_get_gp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Vertex Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_uk_get_gp_number_of_cores() indicated at least one Vertex
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_gp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args);
+
+/** @brief Resume or abort suspended Vertex Processor jobs.
+ *
+ * After receiving notification that a Vertex Processor job was suspended from
+ * _mali_ukk_wait_for_notification() you can use this function to resume or abort the job.
+ *
+ * @param args see _mali_uk_gp_suspend_response_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args);
+
+/** @} */ /* end group _mali_uk_gp */
+
+#if defined(CONFIG_MALI400_PROFILING)
+/** @addtogroup _mali_uk_profiling U/K Timeline profiling module
+ * @{ */
+
+/** @brief Add event to profiling buffer.
+ *
+ * @param args see _mali_uk_profiling_add_event_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args);
+
+/** @brief Get profiling stream fd.
+ *
+ * @param args see _mali_uk_profiling_stream_fd_get_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args);
+
+/** @brief Profiling control set.
+ *
+ * @param args see _mali_uk_profiling_control_set_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args);
+
+/** @} */ /* end group _mali_uk_profiling */
+#endif
+
+/** @addtogroup _mali_uk_vsync U/K VSYNC reporting module
+ * @{ */
+
+/** @brief Report events related to vsync.
+ *
+ * @note Events should be reported when starting to wait for vsync and when the
+ * waiting is finished. This information can then be used in kernel space to
+ * complement the GPU utilization metric.
+ *
+ * @param args see _mali_uk_vsync_event_report_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args);
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @addtogroup _mali_sw_counters_report U/K Software counter reporting
+ * @{ */
+
+/** @brief Report software counters.
+ *
+ * @param args see _mali_uk_sw_counters_report_s in "mali_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args);
+
+/** @} */ /* end group _mali_sw_counters_report */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+u32 _mali_ukk_report_memory_usage(void);
+
+u32 _mali_ukk_report_total_memory_size(void);
+
+u32 _mali_ukk_utilization_gp_pp(void);
+
+u32 _mali_ukk_utilization_gp(void);
+
+u32 _mali_ukk_utilization_pp(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_H__ */
diff --git a/utgard/r7p0/common/mali_user_settings_db.c b/utgard/r7p0/common/mali_user_settings_db.c
new file mode 100644
index 0000000..5cc6f35
--- /dev/null
+++ b/utgard/r7p0/common/mali_user_settings_db.c
@@ -0,0 +1,150 @@
+/**
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_user_settings_db.h"
+#include "mali_session.h"
+
+static u32 mali_user_settings[_MALI_UK_USER_SETTING_MAX];
+const char *_mali_uk_user_setting_descriptions[] = _MALI_UK_USER_SETTING_DESCRIPTIONS;
+
+static void mali_user_settings_notify(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool done = MALI_FALSE;
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (num_sessions_alloc == 0) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (notobjs == NULL) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_SETTINGS_CHANGED,
+					sizeof(_mali_uk_settings_changed_s));
+			if (notobjs[i] != NULL) {
+				_mali_uk_settings_changed_s *data;
+
+				data = notobjs[i]->result_buffer;
+
+				data->setting = setting;
+				data->value = value;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about setting change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (notobjs[used_notification_objects] != NULL) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (notobjs[used_notification_objects] != NULL) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool notify = MALI_FALSE;
+
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid user setting %ud\n"));
+		return;
+	}
+
+	if (mali_user_settings[setting] != value) {
+		notify = MALI_TRUE;
+	}
+
+	mali_user_settings[setting] = value;
+
+	if (notify) {
+		mali_user_settings_notify(setting, value);
+	}
+}
+
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting)
+{
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		return 0;
+	}
+
+	return mali_user_settings[setting];
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args)
+{
+	_mali_uk_user_setting_t setting;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	setting = args->setting;
+
+	if (setting < _MALI_UK_USER_SETTING_MAX) {
+		args->value = mali_user_settings[setting];
+		return _MALI_OSK_ERR_OK;
+	} else {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	_mali_osk_memcpy(args->settings, mali_user_settings, sizeof(mali_user_settings));
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r7p0/common/mali_user_settings_db.h b/utgard/r7p0/common/mali_user_settings_db.h
new file mode 100644
index 0000000..7f89c27
--- /dev/null
+++ b/utgard/r7p0/common/mali_user_settings_db.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) 2012-2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_USER_SETTINGS_DB_H__
+#define __MALI_USER_SETTINGS_DB_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+
+/** @brief Set Mali user setting in DB
+ *
+ * Update the DB with a new value for \a setting. If the value is different from theprevious set value running sessions will be notified of the change.
+ *
+ * @param setting the setting to be changed
+ * @param value the new value to set
+ */
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value);
+
+/** @brief Get current Mali user setting value from DB
+ *
+ * @param setting the setting to extract
+ * @return the value of the selected setting
+ */
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* __MALI_KERNEL_USER_SETTING__ */
diff --git a/utgard/r7p0/include/linux/mali/mali_utgard.h b/utgard/r7p0/include/linux/mali/mali_utgard.h
new file mode 100644
index 0000000..b5b15e3
--- /dev/null
+++ b/utgard/r7p0/include/linux/mali/mali_utgard.h
@@ -0,0 +1,526 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_utgard.h
+ * Defines types and interface exposed by the Mali Utgard device driver
+ */
+
+#ifndef __MALI_UTGARD_H__
+#define __MALI_UTGARD_H__
+
+#include "mali_osk_types.h"
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#include "mali_pm_metrics.h"
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#endif
+
+#define MALI_GPU_NAME_UTGARD "mali-utgard"
+
+
+#define MALI_OFFSET_GP                    0x00000
+#define MALI_OFFSET_GP_MMU                0x03000
+
+#define MALI_OFFSET_PP0                   0x08000
+#define MALI_OFFSET_PP0_MMU               0x04000
+#define MALI_OFFSET_PP1                   0x0A000
+#define MALI_OFFSET_PP1_MMU               0x05000
+#define MALI_OFFSET_PP2                   0x0C000
+#define MALI_OFFSET_PP2_MMU               0x06000
+#define MALI_OFFSET_PP3                   0x0E000
+#define MALI_OFFSET_PP3_MMU               0x07000
+
+#define MALI_OFFSET_PP4                   0x28000
+#define MALI_OFFSET_PP4_MMU               0x1C000
+#define MALI_OFFSET_PP5                   0x2A000
+#define MALI_OFFSET_PP5_MMU               0x1D000
+#define MALI_OFFSET_PP6                   0x2C000
+#define MALI_OFFSET_PP6_MMU               0x1E000
+#define MALI_OFFSET_PP7                   0x2E000
+#define MALI_OFFSET_PP7_MMU               0x1F000
+
+#define MALI_OFFSET_L2_RESOURCE0          0x01000
+#define MALI_OFFSET_L2_RESOURCE1          0x10000
+#define MALI_OFFSET_L2_RESOURCE2          0x11000
+
+#define MALI400_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE1
+#define MALI450_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE2          MALI_OFFSET_L2_RESOURCE2
+#define MALI470_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+
+#define MALI_OFFSET_BCAST                 0x13000
+#define MALI_OFFSET_DLBU                  0x14000
+
+#define MALI_OFFSET_PP_BCAST              0x16000
+#define MALI_OFFSET_PP_BCAST_MMU          0x15000
+
+#define MALI_OFFSET_PMU                   0x02000
+#define MALI_OFFSET_DMA                   0x12000
+
+/* Mali-300 */
+
+#define MALI_GPU_RESOURCES_MALI300(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI300_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+/* Mali-400 */
+
+#define MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali-450 */
+#define MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI450_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP4, pp3_irq, base_addr + MALI_OFFSET_PP4_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP5, pp4_irq, base_addr + MALI_OFFSET_PP5_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP6, pp5_irq, base_addr + MALI_OFFSET_PP6_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP6_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP4, pp4_irq, base_addr + MALI_OFFSET_PP4_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP5, pp5_irq, base_addr + MALI_OFFSET_PP5_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(6, base_addr + MALI_OFFSET_PP6, pp6_irq, base_addr + MALI_OFFSET_PP6_MMU, pp6_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(7, base_addr + MALI_OFFSET_PP7, pp7_irq, base_addr + MALI_OFFSET_PP7_MMU, pp7_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP8_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali - 470 */
+#define MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCE_L2(addr) \
+	{ \
+		.name = "Mali_L2", \
+			.flags = IORESOURCE_MEM, \
+				 .start = addr, \
+					  .end   = addr + 0x200, \
+	},
+
+#define MALI_GPU_RESOURCE_GP(gp_addr, gp_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_GP_WITH_MMU(gp_addr, gp_irq, gp_mmu_addr, gp_mmu_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_mmu_addr, \
+					  .end =   gp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_mmu_irq, \
+					  .end =   gp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PP(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_WITH_MMU(id, pp_addr, pp_irq, pp_mmu_addr, pp_mmu_irq) \
+	{ \
+		.name = "Mali_PP" #id, \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_addr, \
+					  .end =   pp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_mmu_irq, \
+					  .end =   pp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_MMU(mmu_addr, mmu_irq) \
+	{ \
+		.name = "Mali_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = mmu_addr, \
+					  .end =   mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = mmu_irq, \
+					  .end =   mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PMU(pmu_addr) \
+	{ \
+		.name = "Mali_PMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pmu_addr, \
+					  .end =   pmu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DMA(dma_addr) \
+	{ \
+		.name = "Mali_DMA", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dma_addr, \
+					  .end = dma_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DLBU(dlbu_addr) \
+	{ \
+		.name = "Mali_DLBU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dlbu_addr, \
+					  .end = dlbu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_BCAST(bcast_addr) \
+	{ \
+		.name = "Mali_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = bcast_addr, \
+					  .end = bcast_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_PP_BCAST(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_Broadcast_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_MMU_BCAST(pp_mmu_bcast_addr) \
+	{ \
+		.name = "Mali_PP_MMU_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_bcast_addr, \
+					  .end = pp_mmu_bcast_addr + 0x100, \
+	},
+
+	struct mali_gpu_utilization_data {
+		unsigned int utilization_gpu; /* Utilization for GP and all PP cores combined, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_gp;  /* Utilization for GP core only, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_pp;  /* Utilization for all PP cores combined, 0 = no utilization, 256 = full utilization */
+	};
+
+	struct mali_gpu_clk_item {
+		unsigned int clock; /* unit(MHz) */
+		unsigned int vol;
+	};
+
+	struct mali_gpu_clock {
+		struct mali_gpu_clk_item *item;
+		unsigned int num_of_steps;
+	};
+
+	struct mali_gpu_device_data {
+		/* Shared GPU memory */
+		unsigned long shared_mem_size;
+
+		/*
+		 * Mali PMU switch delay.
+		 * Only needed if the power gates are connected to the PMU in a high fanout
+		 * network. This value is the number of Mali clock cycles it takes to
+		 * enable the power gates and turn on the power mesh.
+		 * This value will have no effect if a daisy chain implementation is used.
+		 */
+		u32 pmu_switch_delay;
+
+		/* Mali Dynamic power domain configuration in sequence from 0-11
+		 *  GP  PP0 PP1  PP2  PP3  PP4  PP5  PP6  PP7, L2$0 L2$1 L2$2
+		 */
+		u16 pmu_domain_config[12];
+
+		/* Dedicated GPU memory range (physical). */
+		unsigned long dedicated_mem_start;
+		unsigned long dedicated_mem_size;
+
+		/* Frame buffer memory to be accessible by Mali GPU (physical) */
+		unsigned long fb_start;
+		unsigned long fb_size;
+
+		/* Max runtime [ms] for jobs */
+		int max_job_runtime;
+
+		/* Report GPU utilization and related control in this interval (specified in ms) */
+		unsigned long control_interval;
+
+		/* Function that will receive periodic GPU utilization numbers */
+		void (*utilization_callback)(struct mali_gpu_utilization_data *data);
+
+		/* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */
+		int (*set_freq)(int setting_clock_step);
+		/* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */
+		void (*get_clock_info)(struct mali_gpu_clock **data);
+		/* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */
+		int (*get_freq)(void);
+		/* Function that init the mali gpu secure mode */
+		int (*secure_mode_init)(void);
+		/* Function that deinit the mali gpu secure mode */
+		void (*secure_mode_deinit)(void);
+		/* Function that reset GPU and enable gpu secure mode */
+		int (*gpu_reset_and_secure_mode_enable)(void);
+		/* Function that Reset GPU and disable gpu secure mode */
+		int (*gpu_reset_and_secure_mode_disable)(void);
+		/* ipa related interface customer need register */
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+		struct devfreq_cooling_power *gpu_cooling_ops;
+#endif
+	};
+
+	/**
+	 * Pause the scheduling and power state changes of Mali device driver.
+	 * mali_dev_resume() must always be called as soon as possible after this function
+	 * in order to resume normal operation of the Mali driver.
+	 */
+	void mali_dev_pause(void);
+
+	/**
+	 * Resume scheduling and allow power changes in Mali device driver.
+	 * This must always be called after mali_dev_pause().
+	 */
+	void mali_dev_resume(void);
+
+	/** @brief Set the desired number of PP cores to use.
+	 *
+	 * The internal Mali PMU will be used, if present, to physically power off the PP cores.
+	 *
+	 * @param num_cores The number of desired cores
+	 * @return 0 on success, otherwise error. -EINVAL means an invalid number of cores was specified.
+	 */
+	int mali_perf_set_num_pp_cores(unsigned int num_cores);
+
+#endif
diff --git a/utgard/r7p0/include/linux/mali/mali_utgard_ioctl.h b/utgard/r7p0/include/linux/mali/mali_utgard_ioctl.h
new file mode 100644
index 0000000..d09aa65
--- /dev/null
+++ b/utgard/r7p0/include/linux/mali/mali_utgard_ioctl.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library.
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so.
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __MALI_UTGARD_IOCTL_H__
+#define __MALI_UTGARD_IOCTL_H__
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/fs.h>       /* file system operations */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file mali_kernel_ioctl.h
+ * Interface to the Linux device driver.
+ * This file describes the interface needed to use the Linux device driver.
+ * Its interface is designed to used by the HAL implementation through a thin arch layer.
+ */
+
+/**
+ * ioctl commands
+ */
+
+#define MALI_IOC_BASE           0x82
+#define MALI_IOC_CORE_BASE      (_MALI_UK_CORE_SUBSYSTEM      + MALI_IOC_BASE)
+#define MALI_IOC_MEMORY_BASE    (_MALI_UK_MEMORY_SUBSYSTEM    + MALI_IOC_BASE)
+#define MALI_IOC_PP_BASE        (_MALI_UK_PP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_GP_BASE        (_MALI_UK_GP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_PROFILING_BASE (_MALI_UK_PROFILING_SUBSYSTEM + MALI_IOC_BASE)
+#define MALI_IOC_VSYNC_BASE     (_MALI_UK_VSYNC_SUBSYSTEM + MALI_IOC_BASE)
+
+#define MALI_IOC_WAIT_FOR_NOTIFICATION      _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_WAIT_FOR_NOTIFICATION, _mali_uk_wait_for_notification_s)
+#define MALI_IOC_GET_API_VERSION            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, u32)
+#define MALI_IOC_GET_API_VERSION_V2         _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, _mali_uk_get_api_version_v2_s)
+#define MALI_IOC_POST_NOTIFICATION          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_POST_NOTIFICATION, _mali_uk_post_notification_s)
+#define MALI_IOC_GET_USER_SETTING           _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTING, _mali_uk_get_user_setting_s)
+#define MALI_IOC_GET_USER_SETTINGS          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTINGS, _mali_uk_get_user_settings_s)
+#define MALI_IOC_REQUEST_HIGH_PRIORITY      _IOW(MALI_IOC_CORE_BASE, _MALI_UK_REQUEST_HIGH_PRIORITY, _mali_uk_request_high_priority_s)
+#define MALI_IOC_TIMELINE_GET_LATEST_POINT  _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_GET_LATEST_POINT, _mali_uk_timeline_get_latest_point_s)
+#define MALI_IOC_TIMELINE_WAIT              _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_WAIT, _mali_uk_timeline_wait_s)
+#define MALI_IOC_TIMELINE_CREATE_SYNC_FENCE _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_CREATE_SYNC_FENCE, _mali_uk_timeline_create_sync_fence_s)
+#define MALI_IOC_SOFT_JOB_START             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_START, _mali_uk_soft_job_start_s)
+#define MALI_IOC_SOFT_JOB_SIGNAL            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_SIGNAL, _mali_uk_soft_job_signal_s)
+#define MALI_IOC_PENDING_SUBMIT             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_PENDING_SUBMIT, _mali_uk_pending_submit_s)
+
+#define MALI_IOC_MEM_ALLOC                  _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_ALLOC_MEM, _mali_uk_alloc_mem_s)
+#define MALI_IOC_MEM_FREE                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_FREE_MEM, _mali_uk_free_mem_s)
+#define MALI_IOC_MEM_BIND                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_BIND_MEM, _mali_uk_bind_mem_s)
+#define MALI_IOC_MEM_UNBIND                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_UNBIND_MEM, _mali_uk_unbind_mem_s)
+#define MALI_IOC_MEM_COW                    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MEM, _mali_uk_cow_mem_s)
+#define MALI_IOC_MEM_COW_MODIFY_RANGE       _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MODIFY_RANGE, _mali_uk_cow_modify_range_s)
+#define MALI_IOC_MEM_RESIZE                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_RESIZE_MEM, _mali_uk_mem_resize_s)
+#define MALI_IOC_MEM_DMA_BUF_GET_SIZE       _IOR(MALI_IOC_MEMORY_BASE, _MALI_UK_DMA_BUF_GET_SIZE, _mali_uk_dma_buf_get_size_s)
+#define MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE _IOR(MALI_IOC_MEMORY_BASE, _MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, _mali_uk_query_mmu_page_table_dump_size_s)
+#define MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_DUMP_MMU_PAGE_TABLE, _mali_uk_dump_mmu_page_table_s)
+#define MALI_IOC_MEM_WRITE_SAFE             _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_MEM_WRITE_SAFE, _mali_uk_mem_write_safe_s)
+
+#define MALI_IOC_PP_START_JOB               _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_START_JOB, _mali_uk_pp_start_job_s)
+#define MALI_IOC_PP_AND_GP_START_JOB        _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_AND_GP_START_JOB, _mali_uk_pp_and_gp_start_job_s)
+#define MALI_IOC_PP_NUMBER_OF_CORES_GET     _IOR(MALI_IOC_PP_BASE, _MALI_UK_GET_PP_NUMBER_OF_CORES, _mali_uk_get_pp_number_of_cores_s)
+#define MALI_IOC_PP_CORE_VERSION_GET        _IOR(MALI_IOC_PP_BASE, _MALI_UK_GET_PP_CORE_VERSION, _mali_uk_get_pp_core_version_s)
+#define MALI_IOC_PP_DISABLE_WB              _IOW(MALI_IOC_PP_BASE, _MALI_UK_PP_DISABLE_WB, _mali_uk_pp_disable_wb_s)
+
+#define MALI_IOC_GP2_START_JOB              _IOWR(MALI_IOC_GP_BASE, _MALI_UK_GP_START_JOB, _mali_uk_gp_start_job_s)
+#define MALI_IOC_GP2_NUMBER_OF_CORES_GET    _IOR(MALI_IOC_GP_BASE, _MALI_UK_GET_GP_NUMBER_OF_CORES, _mali_uk_get_gp_number_of_cores_s)
+#define MALI_IOC_GP2_CORE_VERSION_GET       _IOR(MALI_IOC_GP_BASE, _MALI_UK_GET_GP_CORE_VERSION, _mali_uk_get_gp_core_version_s)
+#define MALI_IOC_GP2_SUSPEND_RESPONSE       _IOW (MALI_IOC_GP_BASE, _MALI_UK_GP_SUSPEND_RESPONSE, _mali_uk_gp_suspend_response_s)
+
+#define MALI_IOC_PROFILING_ADD_EVENT        _IOWR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_ADD_EVENT, _mali_uk_profiling_add_event_s)
+#define MALI_IOC_PROFILING_REPORT_SW_COUNTERS  _IOW(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_REPORT_SW_COUNTERS, _mali_uk_sw_counters_report_s)
+#define MALI_IOC_PROFILING_MEMORY_USAGE_GET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_MEMORY_USAGE_GET, _mali_uk_profiling_memory_usage_get_s)
+#define MALI_IOC_PROFILING_STREAM_FD_GET        _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_STREAM_FD_GET, _mali_uk_profiling_stream_fd_get_s)
+#define MALI_IOC_PROILING_CONTROL_SET   _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_CONTROL_SET, _mali_uk_profiling_control_set_s)
+
+#define MALI_IOC_VSYNC_EVENT_REPORT         _IOW(MALI_IOC_VSYNC_BASE, _MALI_UK_VSYNC_EVENT_REPORT, _mali_uk_vsync_event_report_s)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_IOCTL_H__ */
diff --git a/utgard/r7p0/include/linux/mali/mali_utgard_profiling_events.h b/utgard/r7p0/include/linux/mali/mali_utgard_profiling_events.h
new file mode 100644
index 0000000..0bc57ca
--- /dev/null
+++ b/utgard/r7p0/include/linux/mali/mali_utgard_profiling_events.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library.
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so.
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef _MALI_UTGARD_PROFILING_EVENTS_H_
+#define _MALI_UTGARD_PROFILING_EVENTS_H_
+
+/*
+ * The event ID is a 32 bit value consisting of different fields
+ * reserved, 4 bits, for future use
+ * event type, 4 bits, cinstr_profiling_event_type_t
+ * event channel, 8 bits, the source of the event.
+ * event data, 16 bit field, data depending on event type
+ */
+
+/**
+ * Specifies what kind of event this is
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_TYPE_SINGLE  = 0 << 24,
+	MALI_PROFILING_EVENT_TYPE_START   = 1 << 24,
+	MALI_PROFILING_EVENT_TYPE_STOP    = 2 << 24,
+	MALI_PROFILING_EVENT_TYPE_SUSPEND = 3 << 24,
+	MALI_PROFILING_EVENT_TYPE_RESUME  = 4 << 24,
+} cinstr_profiling_event_type_t;
+
+
+/**
+ * Secifies the channel/source of the event
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_CHANNEL_SOFTWARE =  0 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GP0      =  1 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP0      =  5 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP1      =  6 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP2      =  7 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP3      =  8 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP4      =  9 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP5      = 10 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP6      = 11 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP7      = 12 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GPU      = 21 << 16,
+} cinstr_profiling_event_channel_t;
+
+
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(num) (((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) + (num)) << 16)
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(num) (((MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) + (num)) << 16)
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_NONE                  = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_NEW_FRAME         = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FLUSH                 = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SWAP_BUFFERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FB_EVENT              = 4,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE            = 5,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE            = 6,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_READBACK              = 7,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_WRITEBACK             = 8,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_ENTER_API_FUNC        = 10,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC        = 11,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_DISCARD_ATTACHMENTS   = 13,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_TRY_LOCK          = 53,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_LOCK              = 54,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_UNLOCK            = 55,
+	MALI_PROFILING_EVENT_REASON_SINGLE_LOCK_CONTENDED           = 56,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_MALI_FENCE_DUP    = 57,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SET_PP_JOB_FENCE  = 58,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_WAIT_SYNC         = 59,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_FENCE_SYNC = 60,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_NATIVE_FENCE_SYNC = 61,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FENCE_FLUSH       = 62,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FLUSH_SERVER_WAITS = 63,
+} cinstr_profiling_event_reason_single_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ * to inform whether the core is physical or virtual
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL  = 0,
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL   = 1,
+} cinstr_profiling_event_reason_start_stop_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ */
+typedef enum {
+	/*MALI_PROFILING_EVENT_REASON_START_STOP_SW_NONE            = 0,*/
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_MALI            = 1,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_CALLBACK_THREAD = 2,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_WORKER_THREAD   = 3,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF     = 4,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF      = 5,
+} cinstr_profiling_event_reason_start_stop_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SUSPEND/RESUME is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_NONE                     =  0, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PIPELINE_FULL            =  1, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC                    = 26, /* used in some build configurations */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_WAIT           = 27, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_SYNC           = 28, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_FILTER_CLEANUP   = 29, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_TEXTURE          = 30, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_MIPLEVEL       = 31, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_READPIXELS     = 32, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SWAP_IMMEDIATE  = 33, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_QUEUE_BUFFER         = 34, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_DEQUEUE_BUFFER       = 35, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_UMP_LOCK                 = 36, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_GLOBAL_LOCK          = 37, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_SWAP                 = 38, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_MALI_EGL_IMAGE_SYNC_WAIT = 39, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GP_JOB_HANDLING          = 40, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PP_JOB_HANDLING          = 41, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_MERGE     = 42, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_DUP       = 43,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_FLUSH_SERVER_WAITS   = 44,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SYNC            = 45, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_JOBS_WAIT             = 46, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOFRAMES_WAIT         = 47, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOJOBS_WAIT           = 48, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_SUBMIT_LIMITER_WAIT      = 49, /* USED */
+} cinstr_profiling_event_reason_suspend_resume_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from a HW channel (GPx+PPx)
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_NONE          = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_INTERRUPT     = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH         = 2,
+} cinstr_profiling_event_reason_single_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_NONE              = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE  = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS      = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS      = 4,
+} cinstr_profiling_event_reason_single_gpu_t;
+
+/**
+ * These values are applicable for the 3rd data parameter when
+ * the type MALI_PROFILING_EVENT_TYPE_START is used from the software channel
+ * with the MALI_PROFILING_EVENT_REASON_START_STOP_BOTTOM_HALF reason.
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_DATA_CORE_GP0             =  1,
+	MALI_PROFILING_EVENT_DATA_CORE_PP0             =  5,
+	MALI_PROFILING_EVENT_DATA_CORE_PP1             =  6,
+	MALI_PROFILING_EVENT_DATA_CORE_PP2             =  7,
+	MALI_PROFILING_EVENT_DATA_CORE_PP3             =  8,
+	MALI_PROFILING_EVENT_DATA_CORE_PP4             =  9,
+	MALI_PROFILING_EVENT_DATA_CORE_PP5             = 10,
+	MALI_PROFILING_EVENT_DATA_CORE_PP6             = 11,
+	MALI_PROFILING_EVENT_DATA_CORE_PP7             = 12,
+	MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU         = 22, /* GP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU         = 26, /* PP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP1_MMU         = 27, /* PP1 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP2_MMU         = 28, /* PP2 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP3_MMU         = 29, /* PP3 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP4_MMU         = 30, /* PP4 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP5_MMU         = 31, /* PP5 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP6_MMU         = 32, /* PP6 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP7_MMU         = 33, /* PP7 + 21 */
+
+} cinstr_profiling_event_data_core_t;
+
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU + (num))
+
+
+#endif /*_MALI_UTGARD_PROFILING_EVENTS_H_*/
diff --git a/utgard/r7p0/include/linux/mali/mali_utgard_profiling_gator_api.h b/utgard/r7p0/include/linux/mali/mali_utgard_profiling_gator_api.h
new file mode 100644
index 0000000..e24fa2d
--- /dev/null
+++ b/utgard/r7p0/include/linux/mali/mali_utgard_profiling_gator_api.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library.
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so.
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__
+#define __MALI_UTGARD_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_PROFILING_API_VERSION 4
+
+#define MAX_NUM_L2_CACHE_CORES 3
+#define MAX_NUM_FP_CORES 8
+#define MAX_NUM_VP_CORES 1
+
+#define _MALI_SPCIAL_COUNTER_DESCRIPTIONS \
+	{                                           \
+		"Filmstrip_cnt0",                 \
+		"Frequency",       \
+		"Voltage",       \
+		"vertex",     \
+		"fragment",         \
+		"Total_alloc_pages",        \
+	};
+
+#define _MALI_MEM_COUTNER_DESCRIPTIONS \
+	{                                           \
+		"untyped_memory",                 \
+		"vertex_index_buffer",       \
+		"texture_buffer",       \
+		"varying_buffer",     \
+		"render_target",         \
+		"pbuffer_buffer",        \
+		"plbu_heap",            \
+		"pointer_array_buffer",             \
+		"slave_tilelist",          \
+		"untyped_gp_cmdlist",     \
+		"polygon_cmdlist",               \
+		"texture_descriptor",               \
+		"render_state_word",               \
+		"shader",               \
+		"stream_buffer",               \
+		"fragment_stack",               \
+		"uniform",               \
+		"untyped_frame_pool",               \
+		"untyped_surface",               \
+	};
+
+/** The list of events supported by the Mali DDK. */
+typedef enum {
+	/* Vertex processor activity */
+	ACTIVITY_VP_0 = 0,
+
+	/* Fragment processor activity */
+	ACTIVITY_FP_0,
+	ACTIVITY_FP_1,
+	ACTIVITY_FP_2,
+	ACTIVITY_FP_3,
+	ACTIVITY_FP_4,
+	ACTIVITY_FP_5,
+	ACTIVITY_FP_6,
+	ACTIVITY_FP_7,
+
+	/* L2 cache counters */
+	COUNTER_L2_0_C0,
+	COUNTER_L2_0_C1,
+	COUNTER_L2_1_C0,
+	COUNTER_L2_1_C1,
+	COUNTER_L2_2_C0,
+	COUNTER_L2_2_C1,
+
+	/* Vertex processor counters */
+	COUNTER_VP_0_C0,
+	COUNTER_VP_0_C1,
+
+	/* Fragment processor counters */
+	COUNTER_FP_0_C0,
+	COUNTER_FP_0_C1,
+	COUNTER_FP_1_C0,
+	COUNTER_FP_1_C1,
+	COUNTER_FP_2_C0,
+	COUNTER_FP_2_C1,
+	COUNTER_FP_3_C0,
+	COUNTER_FP_3_C1,
+	COUNTER_FP_4_C0,
+	COUNTER_FP_4_C1,
+	COUNTER_FP_5_C0,
+	COUNTER_FP_5_C1,
+	COUNTER_FP_6_C0,
+	COUNTER_FP_6_C1,
+	COUNTER_FP_7_C0,
+	COUNTER_FP_7_C1,
+
+	/*
+	 * If more hardware counters are added, the _mali_osk_hw_counter_table
+	 * below should also be updated.
+	 */
+
+	/* EGL software counters */
+	COUNTER_EGL_BLIT_TIME,
+
+	/* GLES software counters */
+	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_ARRAYS_CALLS,
+	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_POINTS,
+	COUNTER_GLES_DRAW_LINES,
+	COUNTER_GLES_DRAW_LINE_LOOP,
+	COUNTER_GLES_DRAW_LINE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLES,
+	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLE_FAN,
+	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+	COUNTER_GLES_UPLOAD_VBO_TIME,
+	COUNTER_GLES_NUM_FLUSHES,
+	COUNTER_GLES_NUM_VSHADERS_GENERATED,
+	COUNTER_GLES_NUM_FSHADERS_GENERATED,
+	COUNTER_GLES_VSHADER_GEN_TIME,
+	COUNTER_GLES_FSHADER_GEN_TIME,
+	COUNTER_GLES_INPUT_TRIANGLES,
+	COUNTER_GLES_VXCACHE_HIT,
+	COUNTER_GLES_VXCACHE_MISS,
+	COUNTER_GLES_VXCACHE_COLLISION,
+	COUNTER_GLES_CULLED_TRIANGLES,
+	COUNTER_GLES_CULLED_LINES,
+	COUNTER_GLES_BACKFACE_TRIANGLES,
+	COUNTER_GLES_GBCLIP_TRIANGLES,
+	COUNTER_GLES_GBCLIP_LINES,
+	COUNTER_GLES_TRIANGLES_DRAWN,
+	COUNTER_GLES_DRAWCALL_TIME,
+	COUNTER_GLES_TRIANGLES_COUNT,
+	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+	COUNTER_GLES_FAN_TRIANGLES_COUNT,
+	COUNTER_GLES_LINES_COUNT,
+	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+	COUNTER_GLES_STRIP_LINES_COUNT,
+	COUNTER_GLES_LOOP_LINES_COUNT,
+
+	/* Special counter */
+
+	/* Framebuffer capture pseudo-counter */
+	COUNTER_FILMSTRIP,
+	COUNTER_FREQUENCY,
+	COUNTER_VOLTAGE,
+	COUNTER_VP_ACTIVITY,
+	COUNTER_FP_ACTIVITY,
+	COUNTER_TOTAL_ALLOC_PAGES,
+
+	/* Memory usage counter */
+	COUNTER_MEM_UNTYPED,
+	COUNTER_MEM_VB_IB,
+	COUNTER_MEM_TEXTURE,
+	COUNTER_MEM_VARYING,
+	COUNTER_MEM_RT,
+	COUNTER_MEM_PBUFFER,
+	/* memory usages for gp command */
+	COUNTER_MEM_PLBU_HEAP,
+	COUNTER_MEM_POINTER_ARRAY,
+	COUNTER_MEM_SLAVE_TILELIST,
+	COUNTER_MEM_UNTYPE_GP_CMDLIST,
+	/* memory usages for polygon list command */
+	COUNTER_MEM_POLYGON_CMDLIST,
+	/* memory usages for pp command */
+	COUNTER_MEM_TD,
+	COUNTER_MEM_RSW,
+	/* other memory usages */
+	COUNTER_MEM_SHADER,
+	COUNTER_MEM_STREAMS,
+	COUNTER_MEM_FRAGMENT_STACK,
+	COUNTER_MEM_UNIFORM,
+	/* Special mem usage, which is used for mem pool allocation */
+	COUNTER_MEM_UNTYPE_MEM_POOL,
+	COUNTER_MEM_UNTYPE_SURFACE,
+
+	NUMBER_OF_EVENTS
+} _mali_osk_counter_id;
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_7
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_7_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+#define FIRST_SPECIAL_COUNTER   COUNTER_FILMSTRIP
+#define LAST_SPECIAL_COUNTER    COUNTER_TOTAL_ALLOC_PAGES
+
+#define FIRST_MEM_COUNTER               COUNTER_MEM_UNTYPED
+#define LAST_MEM_COUNTER                COUNTER_MEM_UNTYPE_SURFACE
+
+#define MALI_PROFILING_MEM_COUNTERS_NUM (LAST_MEM_COUNTER - FIRST_MEM_COUNTER + 1)
+#define MALI_PROFILING_SPECIAL_COUNTERS_NUM     (LAST_SPECIAL_COUNTER - FIRST_SPECIAL_COUNTER + 1)
+#define MALI_PROFILING_SW_COUNTERS_NUM  (LAST_SW_COUNTER - FIRST_SW_COUNTER + 1)
+
+/**
+ * Define the stream header type for porfiling stream.
+ */
+#define  STREAM_HEADER_FRAMEBUFFER 0x05         /* The stream packet header type for framebuffer dumping. */
+#define STREAM_HEADER_COUNTER_VALUE  0x09       /* The stream packet header type for hw/sw/memory counter sampling. */
+#define STREAM_HEADER_CORE_ACTIVITY 0x0a                /* The stream packet header type for activity counter sampling. */
+#define STREAM_HEADER_SIZE      5
+
+/**
+ * Define the packet header type of profiling control packet.
+ */
+#define PACKET_HEADER_ERROR            0x80             /* The response packet header type if error. */
+#define PACKET_HEADER_ACK              0x81             /* The response packet header type if OK. */
+#define PACKET_HEADER_COUNTERS_REQUEST 0x82             /* The control packet header type to request counter information from ddk. */
+#define PACKET_HEADER_COUNTERS_ACK         0x83         /* The response packet header type to send out counter information. */
+#define PACKET_HEADER_COUNTERS_ENABLE  0x84             /* The control packet header type to enable counters. */
+#define PACKET_HEADER_START_CAPTURE_VALUE            0x85               /* The control packet header type to start capture values. */
+
+#define PACKET_HEADER_SIZE      5
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters {
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+} _mali_profiling_core_counters;
+
+/**
+ * Structure to pass performance counter data of Mali L2 cache cores
+ */
+typedef struct _mali_profiling_l2_counter_values {
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+/**
+ * Structure to pass data defining Mali instance in use:
+ *
+ * mali_product_id - Mali product id
+ * mali_version_major - Mali version major number
+ * mali_version_minor - Mali version minor number
+ * num_of_l2_cores - number of L2 cache cores
+ * num_of_fp_cores - number of fragment processor cores
+ * num_of_vp_cores - number of vertex processor cores
+ */
+typedef struct _mali_profiling_mali_version {
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+/**
+ * Structure to define the mali profiling counter struct.
+ */
+typedef struct mali_profiling_counter {
+	char counter_name[40];
+	u32 counter_id;
+	u32 counter_event;
+	u32 prev_counter_value;
+	u32 current_counter_value;
+	u32 key;
+	int enabled;
+} mali_profiling_counter;
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define MEM_COUNTER_ENABLE (5)
+#define ANNOTATE_PROFILING_ENABLE (6)
+
+void _mali_profiling_control(u32 action, u32 value);
+
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+int _mali_profiling_set_event(u32 counter_id, s32 event_id);
+
+u32 _mali_profiling_get_api_version(void);
+
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */
diff --git a/utgard/r7p0/include/linux/mali/mali_utgard_uk_types.h b/utgard/r7p0/include/linux/mali/mali_utgard_uk_types.h
new file mode 100644
index 0000000..fc817a6
--- /dev/null
+++ b/utgard/r7p0/include/linux/mali/mali_utgard_uk_types.h
@@ -0,0 +1,1100 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library.
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so.
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+/**
+ * @file mali_uk_types.h
+ * Defines the types and constants used in the user-kernel interface
+ */
+
+#ifndef __MALI_UTGARD_UK_TYPES_H__
+#define __MALI_UTGARD_UK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Iteration functions depend on these values being consecutive. */
+#define MALI_UK_TIMELINE_GP   0
+#define MALI_UK_TIMELINE_PP   1
+#define MALI_UK_TIMELINE_SOFT 2
+#define MALI_UK_TIMELINE_MAX  3
+
+#define MALI_UK_BIG_VARYING_SIZE  (1024*1024*2)
+
+typedef struct {
+	u32 points[MALI_UK_TIMELINE_MAX];
+	s32 sync_fd;
+} _mali_uk_fence_t;
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_uk_core U/K Core
+ * @{ */
+
+/** Definition of subsystem numbers, to assist in creating a unique identifier
+ * for each U/K call.
+ *
+ * @see _mali_uk_functions */
+typedef enum {
+	_MALI_UK_CORE_SUBSYSTEM,      /**< Core Group of U/K calls */
+	_MALI_UK_MEMORY_SUBSYSTEM,    /**< Memory Group of U/K calls */
+	_MALI_UK_PP_SUBSYSTEM,        /**< Fragment Processor Group of U/K calls */
+	_MALI_UK_GP_SUBSYSTEM,        /**< Vertex Processor Group of U/K calls */
+	_MALI_UK_PROFILING_SUBSYSTEM, /**< Profiling Group of U/K calls */
+	_MALI_UK_VSYNC_SUBSYSTEM,     /**< VSYNC Group of U/K calls */
+} _mali_uk_subsystem_t;
+
+/** Within a function group each function has its unique sequence number
+ * to assist in creating a unique identifier for each U/K call.
+ *
+ * An ordered pair of numbers selected from
+ * ( \ref _mali_uk_subsystem_t,\ref  _mali_uk_functions) will uniquely identify the
+ * U/K call across all groups of functions, and all functions. */
+typedef enum {
+	/** Core functions */
+
+	_MALI_UK_OPEN                    = 0, /**< _mali_ukk_open() */
+	_MALI_UK_CLOSE,                       /**< _mali_ukk_close() */
+	_MALI_UK_WAIT_FOR_NOTIFICATION,       /**< _mali_ukk_wait_for_notification() */
+	_MALI_UK_GET_API_VERSION,             /**< _mali_ukk_get_api_version() */
+	_MALI_UK_POST_NOTIFICATION,           /**< _mali_ukk_post_notification() */
+	_MALI_UK_GET_USER_SETTING,            /**< _mali_ukk_get_user_setting() *//**< [out] */
+	_MALI_UK_GET_USER_SETTINGS,           /**< _mali_ukk_get_user_settings() *//**< [out] */
+	_MALI_UK_REQUEST_HIGH_PRIORITY,       /**< _mali_ukk_request_high_priority() */
+	_MALI_UK_TIMELINE_GET_LATEST_POINT,   /**< _mali_ukk_timeline_get_latest_point() */
+	_MALI_UK_TIMELINE_WAIT,               /**< _mali_ukk_timeline_wait() */
+	_MALI_UK_TIMELINE_CREATE_SYNC_FENCE,  /**< _mali_ukk_timeline_create_sync_fence() */
+	_MALI_UK_SOFT_JOB_START,              /**< _mali_ukk_soft_job_start() */
+	_MALI_UK_SOFT_JOB_SIGNAL,             /**< _mali_ukk_soft_job_signal() */
+	_MALI_UK_PENDING_SUBMIT,             /**< _mali_ukk_pending_submit() */
+
+	/** Memory functions */
+
+	_MALI_UK_ALLOC_MEM                = 0,   /**< _mali_ukk_alloc_mem() */
+	_MALI_UK_FREE_MEM,                       /**< _mali_ukk_free_mem() */
+	_MALI_UK_BIND_MEM,                       /**< _mali_ukk_mem_bind() */
+	_MALI_UK_UNBIND_MEM,                     /**< _mali_ukk_mem_unbind() */
+	_MALI_UK_COW_MEM,                        /**< _mali_ukk_mem_cow() */
+	_MALI_UK_COW_MODIFY_RANGE,               /**< _mali_ukk_mem_cow_modify_range() */
+	_MALI_UK_RESIZE_MEM,                     /**<._mali_ukk_mem_resize() */
+	_MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, /**< _mali_ukk_mem_get_mmu_page_table_dump_size() */
+	_MALI_UK_DUMP_MMU_PAGE_TABLE,            /**< _mali_ukk_mem_dump_mmu_page_table() */
+	_MALI_UK_DMA_BUF_GET_SIZE,               /**< _mali_ukk_dma_buf_get_size() */
+	_MALI_UK_MEM_WRITE_SAFE,                 /**< _mali_uku_mem_write_safe() */
+
+	/** Common functions for each core */
+
+	_MALI_UK_START_JOB           = 0,     /**< Start a Fragment/Vertex Processor Job on a core */
+	_MALI_UK_GET_NUMBER_OF_CORES,         /**< Get the number of Fragment/Vertex Processor cores */
+	_MALI_UK_GET_CORE_VERSION,            /**< Get the Fragment/Vertex Processor version compatible with all cores */
+
+	/** Fragment Processor Functions  */
+
+	_MALI_UK_PP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_pp_start_job() */
+	_MALI_UK_GET_PP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_pp_number_of_cores() */
+	_MALI_UK_GET_PP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_pp_core_version() */
+	_MALI_UK_PP_DISABLE_WB,                                           /**< _mali_ukk_pp_job_disable_wb() */
+	_MALI_UK_PP_AND_GP_START_JOB,                                     /**< _mali_ukk_pp_and_gp_start_job() */
+
+	/** Vertex Processor Functions  */
+
+	_MALI_UK_GP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_gp_start_job() */
+	_MALI_UK_GET_GP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_gp_number_of_cores() */
+	_MALI_UK_GET_GP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_gp_core_version() */
+	_MALI_UK_GP_SUSPEND_RESPONSE,                                     /**< _mali_ukk_gp_suspend_response() */
+
+	/** Profiling functions */
+
+	_MALI_UK_PROFILING_ADD_EVENT     = 0, /**< __mali_uku_profiling_add_event() */
+	_MALI_UK_PROFILING_REPORT_SW_COUNTERS,/**< __mali_uku_profiling_report_sw_counters() */
+	_MALI_UK_PROFILING_MEMORY_USAGE_GET,  /**< __mali_uku_profiling_memory_usage_get() */
+	_MALI_UK_PROFILING_STREAM_FD_GET, /** < __mali_uku_profiling_stream_fd_get() */
+	_MALI_UK_PROFILING_CONTROL_SET, /** < __mali_uku_profiling_control_set() */
+
+	/** VSYNC reporting fuctions */
+	_MALI_UK_VSYNC_EVENT_REPORT      = 0, /**< _mali_ukk_vsync_event_report() */
+} _mali_uk_functions;
+
+/** @defgroup _mali_uk_getsysteminfo U/K Get System Info
+ * @{ */
+
+/**
+ * Type definition for the core version number.
+ * Used when returning the version number read from a core
+ *
+ * Its format is that of the 32-bit Version register for a particular core.
+ * Refer to the "Mali200 and MaliGP2 3D Graphics Processor Technical Reference
+ * Manual", ARM DDI 0415C, for more information.
+ */
+typedef u32 _mali_core_version;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @defgroup _mali_uk_gp_suspend_response_s Vertex Processor Suspend Response
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_gp_suspend_response()
+ *
+ * When _mali_wait_for_notification() receives notification that a
+ * Vertex Processor job was suspended, you need to send a response to indicate
+ * what needs to happen with this job. You can either abort or resume the job.
+ *
+ * - set @c code to indicate response code. This is either @c _MALIGP_JOB_ABORT or
+ * @c _MALIGP_JOB_RESUME_WITH_NEW_HEAP to indicate you will provide a new heap
+ * for the job that will resolve the out of memory condition for the job.
+ * - copy the @c cookie value from the @c _mali_uk_gp_job_suspended_s notification;
+ * this is an identifier for the suspended job
+ * - set @c arguments[0] and @c arguments[1] to zero if you abort the job. If
+ * you resume it, @c argument[0] should specify the Mali start address for the new
+ * heap and @c argument[1] the Mali end address of the heap.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ */
+typedef enum _maligp_job_suspended_response_code {
+	_MALIGP_JOB_ABORT,                  /**< Abort the Vertex Processor job */
+	_MALIGP_JOB_RESUME_WITH_NEW_HEAP    /**< Resume the Vertex Processor job with a new heap */
+} _maligp_job_suspended_response_code;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 cookie;                     /**< [in] cookie from the _mali_uk_gp_job_suspended_s notification */
+	_maligp_job_suspended_response_code code; /**< [in] abort or resume response code, see \ref _maligp_job_suspended_response_code */
+	u32 arguments[2];               /**< [in] 0 when aborting a job. When resuming a job, the Mali start and end address for a new heap to resume the job with */
+} _mali_uk_gp_suspend_response_s;
+
+/** @} */ /* end group _mali_uk_gp_suspend_response_s */
+
+/** @defgroup _mali_uk_gpstartjob_s Vertex Processor Start Job
+ * @{ */
+
+/** @brief Status indicating the result of the execution of a Vertex or Fragment processor job  */
+typedef enum {
+	_MALI_UK_JOB_STATUS_END_SUCCESS         = 1 << (16 + 0),
+	_MALI_UK_JOB_STATUS_END_OOM             = 1 << (16 + 1),
+	_MALI_UK_JOB_STATUS_END_ABORT           = 1 << (16 + 2),
+	_MALI_UK_JOB_STATUS_END_TIMEOUT_SW      = 1 << (16 + 3),
+	_MALI_UK_JOB_STATUS_END_HANG            = 1 << (16 + 4),
+	_MALI_UK_JOB_STATUS_END_SEG_FAULT       = 1 << (16 + 5),
+	_MALI_UK_JOB_STATUS_END_ILLEGAL_JOB     = 1 << (16 + 6),
+	_MALI_UK_JOB_STATUS_END_UNKNOWN_ERR     = 1 << (16 + 7),
+	_MALI_UK_JOB_STATUS_END_SHUTDOWN        = 1 << (16 + 8),
+	_MALI_UK_JOB_STATUS_END_SYSTEM_UNUSABLE = 1 << (16 + 9)
+} _mali_uk_job_status;
+
+#define MALIGP2_NUM_REGS_FRAME (6)
+
+/** @brief Arguments for _mali_ukk_gp_start_job()
+ *
+ * To start a Vertex Processor job
+ * - associate the request with a reference to a @c mali_gp_job_info by setting
+ * user_job_ptr to the address of the @c mali_gp_job_info of the job.
+ * - set @c priority to the priority of the @c mali_gp_job_info
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_gp_job_info into @c frame_registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ * When @c _mali_ukk_gp_start_job() returns @c _MALI_OSK_ERR_OK, status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, @c _mali_wait_for_notification() will be notified
+ * that the job finished or got suspended. It may get suspended due to
+ * resource shortage. If it finished (see _mali_ukk_wait_for_notification())
+ * the notification will contain a @c _mali_uk_gp_job_finished_s result. If
+ * it got suspended the notification will contain a @c _mali_uk_gp_job_suspended_s
+ * result.
+ *
+ * The @c _mali_uk_gp_job_finished_s contains the job status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ * In case the job got suspended, @c _mali_uk_gp_job_suspended_s contains
+ * the @c user_job_ptr identifier used to start the job with, the @c reason
+ * why the job stalled (see \ref _maligp_job_suspended_reason) and a @c cookie
+ * to identify the core on which the job stalled.  This @c cookie will be needed
+ * when responding to this nofication by means of _mali_ukk_gp_suspend_response().
+ * (see _mali_ukk_gp_suspend_response()). The response is either to abort or
+ * resume the job. If the job got suspended due to an out of memory condition
+ * you may be able to resolve this by providing more memory and resuming the job.
+ *
+ */
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;                   /**< [in] identifier for the job in user space, a @c mali_gp_job_info* */
+	u32 priority;                       /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[MALIGP2_NUM_REGS_FRAME]; /**< [in] core specific registers associated with this job */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;            /**< [in,out] pointer to u32: location where point on gp timeline for this job will be written */
+	u32 varying_memsize;            /** < [in] size of varying memory to use deffer bind*/
+	u32 deferred_mem_num;
+	u64 deferred_mem_list;         /** < [in] memory hanlde list of varying buffer to use deffer bind */
+} _mali_uk_gp_start_job_s;
+
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE (1<<0) /**< Enable performance counter SRC0 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE (1<<1) /**< Enable performance counter SRC1 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE (1<<2) /**< Enable per tile (aka heatmap) generation with for a job (using the enabled counter sources) */
+
+/** @} */ /* end group _mali_uk_gpstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;               /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;     /**< [out] status of finished job */
+	u32 heap_current_addr;          /**< [out] value of the GP PLB PL heap start address register */
+	u32 perf_counter0;              /**< [out] value of performance counter 0 (see ARM DDI0415A) */
+	u32 perf_counter1;              /**< [out] value of performance counter 1 (see ARM DDI0415A) */
+	u32 pending_big_job_num;
+} _mali_uk_gp_job_finished_s;
+
+typedef struct {
+	u64 user_job_ptr;                    /**< [out] identifier for the job in user space */
+	u32 cookie;                          /**< [out] identifier for the core in kernel space on which the job stalled */
+} _mali_uk_gp_job_suspended_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+
+/** @defgroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+#define _MALI_PP_MAX_SUB_JOBS 8
+
+#define _MALI_PP_MAX_FRAME_REGISTERS ((0x058/4)+1)
+
+#define _MALI_PP_MAX_WB_REGISTERS ((0x02C/4)+1)
+
+#define _MALI_DLBU_MAX_REGISTERS 4
+
+/** Flag for _mali_uk_pp_start_job_s */
+#define _MALI_PP_JOB_FLAG_NO_NOTIFICATION (1<<0)
+#define _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE (1<<1)
+#define _MALI_PP_JOB_FLAG_PROTECTED (1<<2)
+
+/** @defgroup _mali_uk_ppstartjob_s Fragment Processor Start Job
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_pp_start_job()
+ *
+ * To start a Fragment Processor job
+ * - associate the request with a reference to a mali_pp_job by setting
+ * @c user_job_ptr to the address of the @c mali_pp_job of the job.
+ * - set @c priority to the priority of the mali_pp_job
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_pp_job into @c frame_registers.
+ * For MALI200 you also need to copy the write back 0,1 and 2 registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context in @c ctx that was returned from _mali_ukk_open()
+ *
+ * When _mali_ukk_pp_start_job() returns @c _MALI_OSK_ERR_OK, @c status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, _mali_wait_for_notification() will be notified
+ * when the job finished. The notification will contain a
+ * @c _mali_uk_pp_job_finished_s result. It contains the @c user_job_ptr
+ * identifier used to start the job with, the job @c status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than @c watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;               /**< [in] identifier for the job in user space */
+	u32 priority;                   /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[_MALI_PP_MAX_FRAME_REGISTERS];         /**< [in] core specific registers associated with first sub job, see ARM DDI0415A */
+	u32 frame_registers_addr_frame[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_FRAME registers for sub job 1-7 */
+	u32 frame_registers_addr_stack[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_STACK registers for sub job 1-7 */
+	u32 wb0_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb1_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb2_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 dlbu_registers[_MALI_DLBU_MAX_REGISTERS]; /**< [in] Dynamic load balancing unit registers */
+	u32 num_cores;                      /**< [in] Number of cores to set up (valid range: 1-8(M450) or 4(M400)) */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	u32 flags;                          /**< [in] See _MALI_PP_JOB_FLAG_* for a list of avaiable flags */
+	u32 tilesx;                         /**< [in] number of tiles in the x direction (needed for heatmap generation */
+	u32 tilesy;                         /**< [in] number of tiles in y direction (needed for reading the heatmap memory) */
+	u32 heatmap_mem;                    /**< [in] memory address to store counter values per tile (aka heatmap) */
+	u32 num_memory_cookies;             /**< [in] number of memory cookies attached to job */
+	u64 memory_cookies;               /**< [in] pointer to array of u32 memory cookies attached to job */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;           /**< [in,out] pointer to location of u32 where point on pp timeline for this job will be written */
+} _mali_uk_pp_start_job_s;
+
+typedef struct {
+	u64 ctx;       /**< [in,out] user-kernel context (trashed on output) */
+	u64 gp_args;   /**< [in,out] GP uk arguments (see _mali_uk_gp_start_job_s) */
+	u64 pp_args;   /**< [in,out] PP uk arguments (see _mali_uk_pp_start_job_s) */
+} _mali_uk_pp_and_gp_start_job_s;
+
+/** @} */ /* end group _mali_uk_ppstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;                          /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;                /**< [out] status of finished job */
+	u32 perf_counter0[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 0 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter1[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 1 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter_src0;
+	u32 perf_counter_src1;
+} _mali_uk_pp_job_finished_s;
+
+typedef struct {
+	u32 number_of_enabled_cores;               /**< [out] the new number of enabled cores */
+} _mali_uk_pp_num_cores_changed_s;
+
+
+
+/**
+ * Flags to indicate write-back units
+ */
+typedef enum {
+	_MALI_UK_PP_JOB_WB0 = 1,
+	_MALI_UK_PP_JOB_WB1 = 2,
+	_MALI_UK_PP_JOB_WB2 = 4,
+} _mali_uk_pp_job_wbx_flag;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 fb_id;                      /**< [in] Frame builder ID of job to disable WB units for */
+	u32 wb0_memory;
+	u32 wb1_memory;
+	u32 wb2_memory;
+} _mali_uk_pp_disable_wb_s;
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+/** @defgroup _mali_uk_soft_job U/K Soft Job
+ * @{ */
+
+typedef struct {
+	u64 ctx;                            /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job;                       /**< [in] identifier for the job in user space */
+	u64 job_id_ptr;                     /**< [in,out] pointer to location of u32 where job id will be written */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u32 point;                          /**< [out] point on soft timeline for this job */
+	u32 type;                           /**< [in] type of soft job */
+} _mali_uk_soft_job_start_s;
+
+typedef struct {
+	u64 user_job;                       /**< [out] identifier for the job in user space */
+} _mali_uk_soft_job_activated_s;
+
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 job_id;                         /**< [in] id for soft job */
+} _mali_uk_soft_job_signal_s;
+
+/** @} */ /* end group _mali_uk_soft_job */
+
+typedef struct {
+	u32 counter_id;
+	u32 key;
+	int enable;
+} _mali_uk_annotate_profiling_mem_counter_s;
+
+typedef struct {
+	u32 sampling_rate;
+	int enable;
+} _mali_uk_annotate_profiling_enable_s;
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ * @{ */
+
+/** @defgroup _mali_uk_waitfornotification_s Wait For Notification
+ * @{ */
+
+/** @brief Notification type encodings
+ *
+ * Each Notification type is an ordered pair of (subsystem,id), and is unique.
+ *
+ * The encoding of subsystem,id into a 32-bit word is:
+ * encoding = (( subsystem << _MALI_NOTIFICATION_SUBSYSTEM_SHIFT ) & _MALI_NOTIFICATION_SUBSYSTEM_MASK)
+ *            | (( id <<  _MALI_NOTIFICATION_ID_SHIFT ) & _MALI_NOTIFICATION_ID_MASK)
+ *
+ * @see _mali_uk_wait_for_notification_s
+ */
+typedef enum {
+	/** core notifications */
+
+	_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x20,
+	_MALI_NOTIFICATION_APPLICATION_QUIT = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x40,
+	_MALI_NOTIFICATION_SETTINGS_CHANGED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x80,
+	_MALI_NOTIFICATION_SOFT_ACTIVATED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x100,
+
+	/** Fragment Processor notifications */
+
+	_MALI_NOTIFICATION_PP_FINISHED = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x20,
+
+	/** Vertex Processor notifications */
+
+	_MALI_NOTIFICATION_GP_FINISHED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_GP_STALLED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x20,
+
+	/** Profiling notifications */
+	_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x20,
+} _mali_uk_notification_type;
+
+/** to assist in splitting up 32-bit notification value in subsystem and id value */
+#define _MALI_NOTIFICATION_SUBSYSTEM_MASK 0xFFFF0000
+#define _MALI_NOTIFICATION_SUBSYSTEM_SHIFT 16
+#define _MALI_NOTIFICATION_ID_MASK 0x0000FFFF
+#define _MALI_NOTIFICATION_ID_SHIFT 0
+
+
+/** @brief Enumeration of possible settings which match mali_setting_t in user space
+ *
+ *
+ */
+typedef enum {
+	_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE = 0,
+	_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_DEPTHBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_STENCILBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_PER_TILE_COUNTERS_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_COMPOSITOR,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_WINDOW,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_OTHER,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR,
+	_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED,
+	_MALI_UK_USER_SETTING_MAX,
+} _mali_uk_user_setting_t;
+
+/* See mali_user_settings_db.c */
+extern const char *_mali_uk_user_setting_descriptions[];
+#define _MALI_UK_USER_SETTING_DESCRIPTIONS \
+	{                                           \
+		"sw_events_enable",                 \
+		"colorbuffer_capture_enable",       \
+		"depthbuffer_capture_enable",       \
+		"stencilbuffer_capture_enable",     \
+		"per_tile_counters_enable",         \
+		"buffer_capture_compositor",        \
+		"buffer_capture_window",            \
+		"buffer_capture_other",             \
+		"buffer_capture_n_frames",          \
+		"buffer_capture_resize_factor",     \
+		"sw_counters_enable",               \
+	};
+
+/** @brief struct to hold the value to a particular setting as seen in the kernel space
+ */
+typedef struct {
+	_mali_uk_user_setting_t setting;
+	u32 value;
+} _mali_uk_settings_changed_s;
+
+/** @brief Arguments for _mali_ukk_wait_for_notification()
+ *
+ * On successful return from _mali_ukk_wait_for_notification(), the members of
+ * this structure will indicate the reason for notification.
+ *
+ * Specifically, the source of the notification can be identified by the
+ * subsystem and id fields of the mali_uk_notification_type in the code.type
+ * member. The type member is encoded in a way to divide up the types into a
+ * subsystem field, and a per-subsystem ID field. See
+ * _mali_uk_notification_type for more information.
+ *
+ * Interpreting the data union member depends on the notification type:
+ *
+ * - type == _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS
+ *     - The kernel side is shutting down. No further
+ * _mali_uk_wait_for_notification() calls should be made.
+ *     - In this case, the value of the data union member is undefined.
+ *     - This is used to indicate to the user space client that it should close
+ * the connection to the Mali Device Driver.
+ * - type == _MALI_NOTIFICATION_PP_FINISHED
+ *    - The notification data is of type _mali_uk_pp_job_finished_s. It contains the user_job_ptr
+ * identifier used to start the job with, the job status, the number of milliseconds the job took to render,
+ * and values of core registers when the job finished (irq status, performance counters, renderer list
+ * address).
+ *    - A job has finished succesfully when its status member is _MALI_UK_JOB_STATUS_FINISHED.
+ *    - If the hardware detected a timeout while rendering the job, or software detected the job is
+ * taking more than watchdog_msecs (see _mali_ukk_pp_start_job()) to complete, the status member will
+ * indicate _MALI_UK_JOB_STATUS_HANG.
+ *    - If the hardware detected a bus error while accessing memory associated with the job, status will
+ * indicate _MALI_UK_JOB_STATUS_SEG_FAULT.
+ *    - Status will indicate MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to stop the job but the job
+ * didn't start the hardware yet, e.g. when the driver closes.
+ * - type == _MALI_NOTIFICATION_GP_FINISHED
+ *     - The notification data is of type _mali_uk_gp_job_finished_s. The notification is similar to that of
+ * type == _MALI_NOTIFICATION_PP_FINISHED, except that several other GP core register values are returned.
+ * The status values have the same meaning for type == _MALI_NOTIFICATION_PP_FINISHED.
+ * - type == _MALI_NOTIFICATION_GP_STALLED
+ *     - The nofication data is of type _mali_uk_gp_job_suspended_s. It contains the user_job_ptr
+ * identifier used to start the job with, the reason why the job stalled and a cookie to identify the core on
+ * which the job stalled.
+ *     - The reason member of gp_job_suspended is set to _MALIGP_JOB_SUSPENDED_OUT_OF_MEMORY
+ * when the polygon list builder unit has run out of memory.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [out] Type of notification available */
+	union {
+		_mali_uk_gp_job_suspended_s gp_job_suspended;/**< [out] Notification data for _MALI_NOTIFICATION_GP_STALLED notification type */
+		_mali_uk_gp_job_finished_s  gp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_GP_FINISHED notification type */
+		_mali_uk_pp_job_finished_s  pp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_PP_FINISHED notification type */
+		_mali_uk_settings_changed_s setting_changed;/**< [out] Notification data for _MALI_NOTIFICAATION_SETTINGS_CHANGED notification type */
+		_mali_uk_soft_job_activated_s soft_job_activated; /**< [out] Notification data for _MALI_NOTIFICATION_SOFT_ACTIVATED notification type */
+		_mali_uk_annotate_profiling_mem_counter_s profiling_mem_counter;
+		_mali_uk_annotate_profiling_enable_s profiling_enable;
+	} data;
+} _mali_uk_wait_for_notification_s;
+
+/** @brief Arguments for _mali_ukk_post_notification()
+ *
+ * Posts the specified notification to the notification queue for this application.
+ * This is used to send a quit message to the callback thread.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [in] Type of notification to post */
+} _mali_uk_post_notification_s;
+
+/** @} */ /* end group _mali_uk_waitfornotification_s */
+
+/** @defgroup _mali_uk_getapiversion_s Get API Version
+ * @{ */
+
+/** helpers for Device Driver API version handling */
+
+/** @brief Encode a version ID from a 16-bit input
+ *
+ * @note the input is assumed to be 16 bits. It must not exceed 16 bits. */
+#define _MAKE_VERSION_ID(x) (((x) << 16UL) | (x))
+
+/** @brief Check whether a 32-bit value is likely to be Device Driver API
+ * version ID. */
+#define _IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF))
+
+/** @brief Decode a 16-bit version number from a 32-bit Device Driver API version
+ * ID */
+#define _GET_VERSION(x) (((x) >> 16UL) & 0xFFFF)
+
+/** @brief Determine whether two 32-bit encoded version IDs match */
+#define _IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y))))
+
+/**
+ * API version define.
+ * Indicates the version of the kernel API
+ * The version is a 16bit integer incremented on each API change.
+ * The 16bit integer is stored twice in a 32bit integer
+ * For example, for version 1 the value would be 0x00010001
+ */
+#define _MALI_API_VERSION 900
+#define _MALI_UK_API_VERSION _MAKE_VERSION_ID(_MALI_API_VERSION)
+
+/**
+ * The API version is a 16-bit integer stored in both the lower and upper 16-bits
+ * of a 32-bit value. The 16-bit API version value is incremented on each API
+ * change. Version 1 would be 0x00010001. Used in _mali_uk_get_api_version_s.
+ */
+typedef u32 _mali_uk_api_version;
+
+/** @brief Arguments for _mali_uk_get_api_version()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u32 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_s;
+
+/** @brief Arguments for _mali_uk_get_api_version_v2()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u64 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_v2_s;
+
+/** @} */ /* end group _mali_uk_getapiversion_s */
+
+/** @defgroup _mali_uk_get_user_settings_s Get user space settings */
+
+/** @brief struct to keep the matching values of the user space settings within certain context
+ *
+ * Each member of the settings array corresponds to a matching setting in the user space and its value is the value
+ * of that particular setting.
+ *
+ * All settings are given reference to the context pointed to by the ctx pointer.
+ *
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	u32 settings[_MALI_UK_USER_SETTING_MAX]; /**< [out] The values for all settings */
+} _mali_uk_get_user_settings_s;
+
+/** @brief struct to hold the value of a particular setting from the user space within a given context
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_user_setting_t setting; /**< [in] setting to get */
+	u32 value;                       /**< [out] value of setting */
+} _mali_uk_get_user_setting_s;
+
+/** @brief Arguments for _mali_ukk_request_high_priority() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_request_high_priority_s;
+
+/** @brief Arguments for _mali_ukk_pending_submit() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_pending_submit_s;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_memory U/K Memory
+ * @{ */
+
+#define _MALI_MEMORY_ALLOCATE_RESIZEABLE  (1<<4) /* BUFFER can trim dow/grow*/
+#define _MALI_MEMORY_ALLOCATE_NO_BIND_GPU (1<<5) /*Not map to GPU when allocate, must call bind later*/
+#define _MALI_MEMORY_ALLOCATE_SWAPPABLE   (1<<6) /* Allocate swappale memory. */
+#define _MALI_MEMORY_ALLOCATE_DEFER_BIND (1<<7) /*Not map to GPU when allocate, must call bind later*/
+#define _MALI_MEMORY_ALLOCATE_SECURE (1<<8) /* Allocate secure memory. */
+
+
+typedef struct {
+	u64 ctx;                                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                                    /**< [in] GPU virtual address */
+	u32 vsize;                                        /**< [in] vitrual size of the allocation */
+	u32 psize;                                        /**< [in] physical size of the allocation */
+	u32 flags;
+	u64 backend_handle;                               /**< [out] backend handle */
+	s32 secure_shared_fd;                           /** < [in] the mem handle for secure mem */
+} _mali_uk_alloc_mem_s;
+
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                /**< [in] use as handle to free allocation */
+	u32 free_pages_nr;      /** < [out] record the number of free pages */
+} _mali_uk_free_mem_s;
+
+
+#define _MALI_MEMORY_BIND_BACKEND_UMP             (1<<8)
+#define _MALI_MEMORY_BIND_BACKEND_DMA_BUF         (1<<9)
+#define _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY     (1<<10)
+#define _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY (1<<11)
+#define _MALI_MEMORY_BIND_BACKEND_EXT_COW         (1<<12)
+#define _MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION (1<<13)
+
+
+#define _MALI_MEMORY_BIND_BACKEND_MASK (_MALI_MEMORY_BIND_BACKEND_UMP| \
+					_MALI_MEMORY_BIND_BACKEND_DMA_BUF |\
+					_MALI_MEMORY_BIND_BACKEND_MALI_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXT_COW |\
+					_MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION)
+
+
+#define _MALI_MEMORY_GPU_READ_ALLOCATE            (1<<16)
+
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 vaddr;                                      /**< [in] mali address to map the physical memory to */
+	u32 size;                                       /**< [in] size */
+	u32 flags;                                      /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 padding;                                    /** padding for 32/64 struct alignment */
+	union {
+		struct {
+			u32 secure_id;                  /**< [in] secure id */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ump;
+		struct {
+			u32 mem_fd;                     /**< [in] Memory descriptor */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_dma_buf;
+		struct {
+			u32 phys_addr;                  /**< [in] physical address */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ext_memory;
+	} mem_union;
+} _mali_uk_bind_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 flags;                                      /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 vaddr;                                      /**<  [in] identifier for mapped memory object in kernel space  */
+} _mali_uk_unbind_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 target_handle;                              /**< [in] handle of allocation need to do COW */
+	u32 target_offset;                              /**< [in] offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, PAGE_SIZE align)*/
+	u32 target_size;                                /**< [in] size of target allocation to do COW (for support memory bank, PAGE_SIZE align)(in byte) */
+	u32 range_start;                                /**< [in] re allocate range start offset, offset from the start of allocation (PAGE_SIZE align)*/
+	u32 range_size;                                 /**< [in] re allocate size (PAGE_SIZE align)*/
+	u32 vaddr;                                      /**< [in] mali address for the new allocaiton */
+	u32 backend_handle;                             /**< [out] backend handle */
+	u32 flags;
+} _mali_uk_cow_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 range_start;                                /**< [in] re allocate range start offset, offset from the start of allocation */
+	u32 size;                                       /**< [in] re allocate size*/
+	u32 vaddr;                                      /**< [in] mali address for the new allocaiton */
+	s32 change_pages_nr;                            /**< [out] record the page number change for cow operation */
+} _mali_uk_cow_modify_range_s;
+
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 mem_fd;                     /**< [in] Memory descriptor */
+	u32 size;                       /**< [out] size */
+} _mali_uk_dma_buf_get_size_s;
+
+/** Flag for _mali_uk_map_external_mem_s, _mali_uk_attach_ump_mem_s and _mali_uk_attach_dma_buf_s */
+#define _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE (1<<0)
+
+
+typedef struct {
+	u64 ctx;                                /**< [in,out] user-kernel context (trashed on output) */
+	u64 vaddr;                              /* the buffer to do resize*/
+	u32 psize;                              /* wanted physical size of this memory */
+} _mali_uk_mem_resize_s;
+
+/**
+ * @brief Arguments for _mali_uk[uk]_mem_write_safe()
+ */
+typedef struct {
+	u64 ctx;  /**< [in,out] user-kernel context (trashed on output) */
+	u64 src;  /**< [in] Pointer to source data */
+	u64 dest; /**< [in] Destination Mali buffer */
+	u32 size;   /**< [in,out] Number of bytes to write/copy on input, number of bytes actually written/copied on output */
+} _mali_uk_mem_write_safe_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [out] size of MMU page table information (registers + page tables) */
+} _mali_uk_query_mmu_page_table_dump_size_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [in] size of buffer to receive mmu page table information */
+	u64 buffer;                   /**< [in,out] buffer to receive mmu page table information */
+	u32 register_writes_size;       /**< [out] size of MMU register dump */
+	u64 register_writes;           /**< [out] pointer within buffer where MMU register dump is stored */
+	u32 page_table_dump_size;       /**< [out] size of MMU page table dump */
+	u64 page_table_dump;           /**< [out] pointer within buffer where MMU page table dump is stored */
+} _mali_uk_dump_mmu_page_table_s;
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_pp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_number_of_cores(), @c number_of_cores
+ * will contain the number of Fragment Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_total_cores;      /**< [out] Total number of Fragment Processor cores in the system */
+	u32 number_of_enabled_cores;    /**< [out] Number of enabled Fragment Processor cores */
+} _mali_uk_get_pp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_pp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_core_version(), @c version contains
+ * the version that all Fragment Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version  */
+	u32 padding;
+} _mali_uk_get_pp_core_version_s;
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_gp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_number_of_cores(), @c number_of_cores
+ * will contain the number of Vertex Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_cores;            /**< [out] number of Vertex Processor cores in the system */
+} _mali_uk_get_gp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_gp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_core_version(), @c version contains
+ * the version that all Vertex Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version */
+} _mali_uk_get_gp_core_version_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 event_id;                   /**< [in] event id to register (see  enum mali_profiling_events for values) */
+	u32 data[5];                    /**< [in] event specific data */
+} _mali_uk_profiling_add_event_s;
+
+typedef struct {
+	u64 ctx;                     /**< [in,out] user-kernel context (trashed on output) */
+	u32 memory_usage;              /**< [out] total memory usage */
+	u32 vaddr;                                      /**< [in] mali address for the cow allocaiton */
+	s32 change_pages_nr;            /**< [out] record the page number change for cow operation */
+} _mali_uk_profiling_memory_usage_get_s;
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ * @{ */
+
+/** @brief Arguments to _mali_ukk_mem_mmap()
+ *
+ * Use of the phys_addr member depends on whether the driver is compiled for
+ * Mali-MMU or nonMMU:
+ * - in the nonMMU case, this is the physical address of the memory as seen by
+ * the CPU (which may be a constant offset from that used by Mali)
+ * - in the MMU case, this is the Mali Virtual base address of the memory to
+ * allocate, and the particular physical pages used to back the memory are
+ * entirely determined by _mali_ukk_mem_mmap(). The details of the physical pages
+ * are not reported to user-space for security reasons.
+ *
+ * The cookie member must be stored for use later when freeing the memory by
+ * calling _mali_ukk_mem_munmap(). In the Mali-MMU case, the cookie is secure.
+ *
+ * The ukk_private word must be set to zero when calling from user-space. On
+ * Kernel-side, the  OS implementation of the U/K interface can use it to
+ * communicate data to the OS implementation of the OSK layer. In particular,
+ * _mali_ukk_get_big_block() directly calls _mali_ukk_mem_mmap directly, and
+ * will communicate its own ukk_private word through the ukk_private member
+ * here. The common code itself will not inspect or modify the ukk_private
+ * word, and so it may be safely used for whatever purposes necessary to
+ * integrate Mali Memory handling into the OS.
+ *
+ * The uku_private member is currently reserved for use by the user-side
+ * implementation of the U/K interface. Its value must be zero.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [out] Returns user-space virtual address for the mapping */
+	u32 size;                       /**< [in] Size of the requested mapping */
+	u32 phys_addr;                  /**< [in] Physical address - could be offset, depending on caller+callee convention */
+	mali_bool writeable;
+} _mali_uk_mem_mmap_s;
+
+/** @brief Arguments to _mali_ukk_mem_munmap()
+ *
+ * The cookie and mapping members must be that returned from the same previous
+ * call to _mali_ukk_mem_mmap(). The size member must correspond to cookie
+ * and mapping - that is, it must be the value originally supplied to a call to
+ * _mali_ukk_mem_mmap that returned the values of mapping and cookie.
+ *
+ * An error will be returned if an attempt is made to unmap only part of the
+ * originally obtained range, or to unmap more than was originally obtained.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [in] The mapping returned from mmap call */
+	u32 size;                       /**< [in] The size passed to mmap call */
+} _mali_uk_mem_munmap_s;
+/** @} */ /* end group _mali_uk_memory */
+
+/** @defgroup _mali_uk_vsync U/K VSYNC Wait Reporting Module
+ * @{ */
+
+/** @brief VSYNC events
+ *
+ * These events are reported when DDK starts to wait for vsync and when the
+ * vsync has occured and the DDK can continue on the next frame.
+ */
+typedef enum _mali_uk_vsync_event {
+	_MALI_UK_VSYNC_EVENT_BEGIN_WAIT = 0,
+	_MALI_UK_VSYNC_EVENT_END_WAIT
+} _mali_uk_vsync_event;
+
+/** @brief Arguments to _mali_ukk_vsync_event()
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_vsync_event event;     /**< [in] VSYNCH event type */
+} _mali_uk_vsync_event_report_s;
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @defgroup _mali_uk_sw_counters_report U/K Software Counter Reporting
+ * @{ */
+
+/** @brief Software counter values
+ *
+ * Values recorded for each of the software counters during a single renderpass.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 counters;                  /**< [in] The array of u32 counter values */
+	u32 num_counters;              /**< [in] The number of elements in counters array */
+} _mali_uk_sw_counters_report_s;
+
+/** @} */ /* end group _mali_uk_sw_counters_report */
+
+/** @defgroup _mali_uk_timeline U/K Mali Timeline
+ * @{ */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 timeline;                   /**< [in] timeline id */
+	u32 point;                      /**< [out] latest point on timeline */
+} _mali_uk_timeline_get_latest_point_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] fence */
+	u32 timeout;                    /**< [in] timeout (0 for no wait, -1 for blocking) */
+	u32 status;                     /**< [out] status of fence (1 if signaled, 0 if timeout) */
+} _mali_uk_timeline_wait_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] mali fence to create linux sync fence from */
+	s32 sync_fd;                    /**< [out] file descriptor for new linux sync fence */
+} _mali_uk_timeline_create_sync_fence_s;
+
+/** @} */ /* end group _mali_uk_timeline */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+typedef struct {
+	u64 ctx;                 /**< [in,out] user-kernel context (trashed on output) */
+	s32 stream_fd;   /**< [in] The profiling kernel base stream fd handle */
+} _mali_uk_profiling_stream_fd_get_s;
+
+typedef struct {
+	u64 ctx;        /**< [in,out] user-kernel context (trashed on output) */
+	u64 control_packet_data; /**< [in] the control packet data for control settings */
+	u32 control_packet_size;  /**< [in] The control packet size */
+	u64 response_packet_data; /** < [out] The response packet data */
+	u32 response_packet_size; /** < [in,out] The response packet data */
+} _mali_uk_profiling_control_set_s;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_UK_TYPES_H__ */
diff --git a/utgard/r7p0/linux/license/gpl/mali_kernel_license.h b/utgard/r7p0/linux/license/gpl/mali_kernel_license.h
new file mode 100644
index 0000000..2e00603
--- /dev/null
+++ b/utgard/r7p0/linux/license/gpl/mali_kernel_license.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_license.h
+ * Defines for the macro MODULE_LICENSE.
+ */
+
+#ifndef __MALI_KERNEL_LICENSE_H__
+#define __MALI_KERNEL_LICENSE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_KERNEL_LINUX_LICENSE     "GPL"
+#define MALI_LICENSE_IS_GPL 1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LICENSE_H__ */
diff --git a/utgard/r7p0/linux/mali_devfreq.c b/utgard/r7p0/linux/mali_devfreq.c
new file mode 100644
index 0000000..2d65e50
--- /dev/null
+++ b/utgard/r7p0/linux/mali_devfreq.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/driver.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+#include "mali_pm_metrics.h"
+
+static int
+mali_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		MALI_PRINT_ERROR(("Failed to get opp (%ld)\n", PTR_ERR(opp)));
+		return PTR_ERR(opp);
+	}
+
+	MALI_DEBUG_PRINT(2, ("mali_devfreq_target:set_freq = %lld flags = 0x%x\n", freq, flags));
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (mdev->current_freq == freq) {
+		*target_freq = freq;
+		mali_pm_reset_dvfs_utilisation(mdev);
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (mdev->regulator && mdev->current_voltage != voltage
+	    && mdev->current_freq < freq) {
+		err = regulator_set_voltage(mdev->regulator, voltage, voltage);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to increase voltage (%d)\n", err));
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(mdev->clock, freq);
+	if (err) {
+		MALI_PRINT_ERROR(("Failed to set clock %lu (target %lu)\n", freq, *target_freq));
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (mdev->regulator && mdev->current_voltage != voltage
+	    && mdev->current_freq > freq) {
+		err = regulator_set_voltage(mdev->regulator, voltage, voltage);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to decrease voltage (%d)\n", err));
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	mdev->current_voltage = voltage;
+	mdev->current_freq = freq;
+
+	mali_pm_reset_dvfs_utilisation(mdev);
+
+	return err;
+}
+
+static int
+mali_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	*freq = mdev->current_freq;
+
+	MALI_DEBUG_PRINT(2, ("mali_devfreq_cur_freq: freq = %d\n", *freq));
+	return 0;
+}
+
+static int
+mali_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = mdev->current_freq;
+
+	mali_pm_get_dvfs_utilisation(mdev,
+				     &stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	memcpy(&mdev->devfreq->last_status, stat, sizeof(*stat));
+#endif
+
+	return 0;
+}
+
+/* setup platform specific opp in platform.c*/
+int __weak setup_opps(void)
+{
+	return 0;
+}
+
+/* term platform specific opp in platform.c*/
+int __weak term_opps(struct device *dev)
+{
+	return 0;
+}
+
+static int mali_devfreq_init_freq_table(struct mali_device *mdev,
+					struct devfreq_dev_profile *dp)
+{
+	int err, count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	err = setup_opps();
+	if (err)
+		return err;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(mdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	MALI_DEBUG_PRINT(2, ("mali devfreq table count %d\n", count));
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				       GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(mdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+		MALI_DEBUG_PRINT(2, ("mali devfreq table array[%d] = %d\n", i, freq));
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		MALI_PRINT_ERROR(("Unable to enumerate all OPPs (%d!=%d)\n",
+				  count, i));
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void mali_devfreq_term_freq_table(struct mali_device *mdev)
+{
+	struct devfreq_dev_profile *dp = mdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+	term_opps(mdev->dev);
+}
+
+static void mali_devfreq_exit(struct device *dev)
+{
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	mali_devfreq_term_freq_table(mdev);
+}
+
+int mali_devfreq_init(struct mali_device *mdev)
+{
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct devfreq_cooling_power *callbacks = NULL;
+	_mali_osk_device_data data;
+#endif
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	MALI_DEBUG_PRINT(2, ("Init Mali devfreq\n"));
+
+	if (!mdev->clock)
+		return -ENODEV;
+
+	mdev->current_freq = clk_get_rate(mdev->clock);
+
+	dp = &mdev->devfreq_profile;
+
+	dp->initial_freq = mdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = mali_devfreq_target;
+	dp->get_dev_status = mali_devfreq_status;
+	dp->get_cur_freq = mali_devfreq_cur_freq;
+	dp->exit = mali_devfreq_exit;
+
+	if (mali_devfreq_init_freq_table(mdev, dp))
+		return -EFAULT;
+
+	mdev->devfreq = devfreq_add_device(mdev->dev, dp,
+					   "simple_ondemand", NULL);
+	if (IS_ERR(mdev->devfreq)) {
+		mali_devfreq_term_freq_table(mdev);
+		return PTR_ERR(mdev->devfreq);
+	}
+
+	err = devfreq_register_opp_notifier(mdev->dev, mdev->devfreq);
+	if (err) {
+		MALI_PRINT_ERROR(("Failed to register OPP notifier (%d)\n", err));
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	/* Initilization last_status it will be used when first power allocate called */
+	mdev->devfreq->last_status.current_frequency = mdev->current_freq;
+
+	if (_mali_osk_device_data_get(&data) == _MALI_OSK_ERR_OK) {
+		if (data.gpu_cooling_ops != NULL) {
+			callbacks = data.gpu_cooling_ops;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Thermal: Callback handler installed\n"));
+		}
+	}
+
+	if (callbacks) {
+		mdev->devfreq_cooling = of_devfreq_cooling_register_power(
+						mdev->dev->of_node,
+						mdev->devfreq,
+						callbacks);
+		if (IS_ERR_OR_NULL(mdev->devfreq_cooling)) {
+			err = PTR_ERR(mdev->devfreq_cooling);
+			MALI_PRINT_ERROR(("Failed to register cooling device (%d)\n", err));
+			goto cooling_failed;
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali GPU Thermal Cooling installed\n"));
+		}
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	err = devfreq_remove_device(mdev->devfreq);
+	if (err)
+		MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err));
+	else
+		mdev->devfreq = NULL;
+
+	return err;
+}
+
+void mali_devfreq_term(struct mali_device *mdev)
+{
+	int err;
+
+	MALI_DEBUG_PRINT(2, ("Term Mali devfreq\n"));
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	devfreq_cooling_unregister(mdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq);
+
+	err = devfreq_remove_device(mdev->devfreq);
+	if (err)
+		MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err));
+	else
+		mdev->devfreq = NULL;
+}
diff --git a/utgard/r7p0/linux/mali_devfreq.h b/utgard/r7p0/linux/mali_devfreq.h
new file mode 100644
index 0000000..b071b37
--- /dev/null
+++ b/utgard/r7p0/linux/mali_devfreq.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef _MALI_DEVFREQ_H_
+#define _MALI_DEVFREQ_H_
+
+int mali_devfreq_init(struct mali_device *mdev);
+
+void mali_devfreq_term(struct mali_device *mdev);
+
+#endif
diff --git a/utgard/r7p0/linux/mali_device_pause_resume.c b/utgard/r7p0/linux/mali_device_pause_resume.c
new file mode 100644
index 0000000..2fcd00d
--- /dev/null
+++ b/utgard/r7p0/linux/mali_device_pause_resume.c
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_device_pause_resume.c
+ * Implementation of the Mali pause/resume functionality
+ */
+
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+
+void mali_dev_pause(void)
+{
+	/*
+	 * Deactive all groups to prevent hardware being touched
+	 * during the period of mali device pausing
+	 */
+	mali_pm_os_suspend(MALI_FALSE);
+}
+
+EXPORT_SYMBOL(mali_dev_pause);
+
+void mali_dev_resume(void)
+{
+	mali_pm_os_resume();
+}
+
+EXPORT_SYMBOL(mali_dev_resume);
diff --git a/utgard/r7p0/linux/mali_dma_fence.c b/utgard/r7p0/linux/mali_dma_fence.c
new file mode 100644
index 0000000..0b46475
--- /dev/null
+++ b/utgard/r7p0/linux/mali_dma_fence.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/version.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include "mali_dma_fence.h"
+#include <linux/atomic.h>
+#include <linux/workqueue.h>
+#endif
+
+static DEFINE_SPINLOCK(mali_dma_fence_lock);
+
+static bool mali_dma_fence_enable_signaling(struct fence *fence)
+{
+	MALI_IGNORE(fence);
+	return true;
+}
+
+static const char *mali_dma_fence_get_driver_name(struct fence *fence)
+{
+	MALI_IGNORE(fence);
+	return "mali";
+}
+
+static const char *mali_dma_fence_get_timeline_name(struct fence *fence)
+{
+	MALI_IGNORE(fence);
+	return "mali_dma_fence";
+}
+
+static const struct fence_ops mali_dma_fence_ops = {
+	.get_driver_name = mali_dma_fence_get_driver_name,
+	.get_timeline_name = mali_dma_fence_get_timeline_name,
+	.enable_signaling = mali_dma_fence_enable_signaling,
+	.signaled = NULL,
+	.wait = fence_default_wait,
+	.release = NULL
+};
+
+static void mali_dma_fence_context_cleanup(struct mali_dma_fence_context *dma_fence_context)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	for (i = 0; i < dma_fence_context->num_dma_fence_waiter; i++) {
+		if (dma_fence_context->mali_dma_fence_waiters[i]) {
+			fence_remove_callback(dma_fence_context->mali_dma_fence_waiters[i]->fence,
+					      &dma_fence_context->mali_dma_fence_waiters[i]->base);
+			fence_put(dma_fence_context->mali_dma_fence_waiters[i]->fence);
+			kfree(dma_fence_context->mali_dma_fence_waiters[i]);
+			dma_fence_context->mali_dma_fence_waiters[i] = NULL;
+		}
+	}
+
+	if (dma_fence_context->mali_dma_fence_waiters != NULL)
+		kfree(dma_fence_context->mali_dma_fence_waiters);
+
+	dma_fence_context->mali_dma_fence_waiters = NULL;
+	dma_fence_context->num_dma_fence_waiter = 0;
+}
+
+static void mali_dma_fence_context_work_func(struct work_struct *work_handle)
+{
+	struct mali_dma_fence_context *dma_fence_context;
+
+	MALI_DEBUG_ASSERT_POINTER(work_handle);
+
+	dma_fence_context = container_of(work_handle, struct mali_dma_fence_context, work_handle);
+
+	dma_fence_context->cb_func(dma_fence_context->pp_job_ptr);
+}
+
+static void mali_dma_fence_callback(struct fence *fence, struct fence_cb *cb)
+{
+	struct mali_dma_fence_waiter *dma_fence_waiter = NULL;
+	struct mali_dma_fence_context *dma_fence_context = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(cb);
+
+	MALI_IGNORE(fence);
+
+	dma_fence_waiter = container_of(cb, struct mali_dma_fence_waiter, base);
+	dma_fence_context = dma_fence_waiter->parent;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	if (atomic_dec_and_test(&dma_fence_context->count))
+		schedule_work(&dma_fence_context->work_handle);
+}
+
+static _mali_osk_errcode_t mali_dma_fence_add_callback(struct mali_dma_fence_context *dma_fence_context, struct fence *fence)
+{
+	int ret = 0;
+	struct mali_dma_fence_waiter *dma_fence_waiter;
+	struct mali_dma_fence_waiter **dma_fence_waiters;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	dma_fence_waiters = krealloc(dma_fence_context->mali_dma_fence_waiters,
+				     (dma_fence_context->num_dma_fence_waiter + 1)
+				     * sizeof(struct mali_dma_fence_waiter *),
+				     GFP_KERNEL);
+
+	if (dma_fence_waiters == NULL) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to realloc the dma fence waiters.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	dma_fence_context->mali_dma_fence_waiters = dma_fence_waiters;
+
+	dma_fence_waiter = kzalloc(sizeof(struct mali_dma_fence_waiter), GFP_KERNEL);
+
+	if (dma_fence_waiter == NULL) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to create mali dma fence waiter.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	fence_get(fence);
+
+	dma_fence_waiter->fence = fence;
+	dma_fence_waiter->parent = dma_fence_context;
+	atomic_inc(&dma_fence_context->count);
+
+	ret = fence_add_callback(fence, &dma_fence_waiter->base,
+				 mali_dma_fence_callback);
+	if (ret < 0) {
+		fence_put(fence);
+		kfree(dma_fence_waiter);
+		atomic_dec(&dma_fence_context->count);
+		if (-ENOENT == ret) {
+			/*-ENOENT if fence has already been signaled, return _MALI_OSK_ERR_OK*/
+			return _MALI_OSK_ERR_OK;
+		}
+		/* Failed to add the fence callback into fence, return _MALI_OSK_ERR_FAULT*/
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into fence.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	dma_fence_context->mali_dma_fence_waiters[dma_fence_context->num_dma_fence_waiter] = dma_fence_waiter;
+	dma_fence_context->num_dma_fence_waiter++;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+struct fence *mali_dma_fence_new(u32  context, u32 seqno)
+{
+	struct fence *fence = NULL;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+
+	if (fence == NULL) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to create dma fence.\n"));
+		return fence;
+	}
+
+	fence_init(fence,
+		   &mali_dma_fence_ops,
+		   &mali_dma_fence_lock,
+		   context, seqno);
+
+	return fence;
+}
+
+void mali_dma_fence_signal_and_put(struct fence **fence)
+{
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(*fence);
+
+	fence_signal(*fence);
+	fence_put(*fence);
+	*fence = NULL;
+}
+
+void mali_dma_fence_context_init(struct mali_dma_fence_context *dma_fence_context,
+				 mali_dma_fence_context_callback_func_t  cb_func,
+				 void *pp_job_ptr)
+{
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	INIT_WORK(&dma_fence_context->work_handle, mali_dma_fence_context_work_func);
+	atomic_set(&dma_fence_context->count, 1);
+	dma_fence_context->num_dma_fence_waiter = 0;
+	dma_fence_context->mali_dma_fence_waiters = NULL;
+	dma_fence_context->cb_func = cb_func;
+	dma_fence_context->pp_job_ptr = pp_job_ptr;
+}
+
+_mali_osk_errcode_t mali_dma_fence_context_add_waiters(struct mali_dma_fence_context *dma_fence_context,
+		struct reservation_object *dma_reservation_object)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+	struct fence *exclusive_fence = NULL;
+	u32 shared_count = 0, i;
+	struct fence **shared_fences = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object);
+
+	/* Get all the shared/exclusive fences in the reservation object of dma buf*/
+	ret = reservation_object_get_fences_rcu(dma_reservation_object, &exclusive_fence,
+						&shared_count, &shared_fences);
+	if (ret < 0) {
+		MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to get  shared or exclusive_fence dma fences from  the reservation object of dma buf.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (exclusive_fence) {
+		ret = mali_dma_fence_add_callback(dma_fence_context, exclusive_fence);
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into exclusive fence.\n"));
+			mali_dma_fence_context_cleanup(dma_fence_context);
+			goto ended;
+		}
+	}
+
+
+	for (i = 0; i < shared_count; i++) {
+		ret = mali_dma_fence_add_callback(dma_fence_context, shared_fences[i]);
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into shared fence [%d].\n", i));
+			mali_dma_fence_context_cleanup(dma_fence_context);
+			break;
+		}
+	}
+
+ended:
+
+	if (exclusive_fence)
+		fence_put(exclusive_fence);
+
+	if (shared_fences) {
+		for (i = 0; i < shared_count; i++) {
+			fence_put(shared_fences[i]);
+		}
+		kfree(shared_fences);
+	}
+
+	return ret;
+}
+
+
+void mali_dma_fence_context_term(struct mali_dma_fence_context *dma_fence_context)
+{
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+	atomic_set(&dma_fence_context->count, 0);
+	if (dma_fence_context->work_handle.func) {
+		cancel_work_sync(&dma_fence_context->work_handle);
+	}
+	mali_dma_fence_context_cleanup(dma_fence_context);
+}
+
+void mali_dma_fence_context_dec_count(struct mali_dma_fence_context *dma_fence_context)
+{
+	MALI_DEBUG_ASSERT_POINTER(dma_fence_context);
+
+	if (atomic_dec_and_test(&dma_fence_context->count))
+		schedule_work(&dma_fence_context->work_handle);
+}
+
+
+void mali_dma_fence_add_reservation_object_list(struct reservation_object *dma_reservation_object,
+		struct reservation_object **dma_reservation_object_list,
+		u32 *num_dma_reservation_object)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object);
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object_list);
+	MALI_DEBUG_ASSERT_POINTER(num_dma_reservation_object);
+
+	for (i = 0; i < *num_dma_reservation_object; i++) {
+		if (dma_reservation_object_list[i] == dma_reservation_object)
+			return;
+	}
+
+	dma_reservation_object_list[*num_dma_reservation_object] = dma_reservation_object;
+	(*num_dma_reservation_object)++;
+}
+
+int mali_dma_fence_lock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx)
+{
+	u32 i;
+
+	struct reservation_object *reservation_object_to_slow_lock = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(dma_reservation_object_list);
+	MALI_DEBUG_ASSERT_POINTER(ww_actx);
+
+	ww_acquire_init(ww_actx, &reservation_ww_class);
+
+again:
+	for (i = 0; i < num_dma_reservation_object; i++) {
+		int ret;
+
+		if (dma_reservation_object_list[i] == reservation_object_to_slow_lock) {
+			reservation_object_to_slow_lock = NULL;
+			continue;
+		}
+
+		ret = ww_mutex_lock(&dma_reservation_object_list[i]->lock, ww_actx);
+
+		if (ret < 0) {
+			u32  slow_lock_index = i;
+
+			/* unlock all pre locks we have already locked.*/
+			while (i > 0) {
+				i--;
+				ww_mutex_unlock(&dma_reservation_object_list[i]->lock);
+			}
+
+			if (reservation_object_to_slow_lock != NULL)
+				ww_mutex_unlock(&reservation_object_to_slow_lock->lock);
+
+			if (ret == -EDEADLK) {
+				reservation_object_to_slow_lock = dma_reservation_object_list[slow_lock_index];
+				ww_mutex_lock_slow(&reservation_object_to_slow_lock->lock, ww_actx);
+				goto again;
+			}
+			ww_acquire_fini(ww_actx);
+			MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to lock all dma reservation objects.\n", i));
+			return ret;
+		}
+	}
+
+	ww_acquire_done(ww_actx);
+	return 0;
+}
+
+void mali_dma_fence_unlock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx)
+{
+	u32 i;
+
+	for (i = 0; i < num_dma_reservation_object; i++)
+		ww_mutex_unlock(&dma_reservation_object_list[i]->lock);
+
+	ww_acquire_fini(ww_actx);
+}
diff --git a/utgard/r7p0/linux/mali_dma_fence.h b/utgard/r7p0/linux/mali_dma_fence.h
new file mode 100644
index 0000000..ec2e4a7
--- /dev/null
+++ b/utgard/r7p0/linux/mali_dma_fence.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_dma_fence.h
+ *
+ * Mali interface for Linux dma buf fence objects.
+ */
+
+#ifndef _MALI_DMA_FENCE_H_
+#define _MALI_DMA_FENCE_H_
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include <linux/fence.h>
+#include <linux/reservation.h>
+#endif
+
+struct mali_dma_fence_context;
+
+/* The mali dma fence context callback function */
+typedef void (*mali_dma_fence_context_callback_func_t)(void *pp_job_ptr);
+
+struct mali_dma_fence_waiter {
+	struct fence_cb base;
+	struct mali_dma_fence_context *parent;
+	struct fence *fence;
+};
+
+struct mali_dma_fence_context {
+	struct work_struct work_handle;
+	struct mali_dma_fence_waiter **mali_dma_fence_waiters;
+	u32 num_dma_fence_waiter;
+	atomic_t count;
+	void *pp_job_ptr; /* the mali pp job pointer */;
+	mali_dma_fence_context_callback_func_t cb_func;
+};
+
+/* Create a dma fence
+ * @param context The execution context this fence is run on
+ * @param seqno A linearly increasing sequence number for this context
+ * @return the new dma fence if success, or NULL on failure.
+ */
+struct fence *mali_dma_fence_new(u32  context, u32 seqno);
+
+/* Signal and put dma fence
+ * @param fence The dma fence to signal and put
+ */
+void mali_dma_fence_signal_and_put(struct fence **fence);
+
+/**
+ * Initialize a mali dma fence context for pp job.
+ * @param dma_fence_context The mali dma fence context to initialize.
+ * @param cb_func The dma fence context callback function to call when all dma fence release.
+ * @param pp_job_ptr The pp_job to call function with.
+ */
+void mali_dma_fence_context_init(struct mali_dma_fence_context *dma_fence_context,
+				 mali_dma_fence_context_callback_func_t  cb_func,
+				 void *pp_job_ptr);
+
+/**
+ * Add new mali dma fence waiter into mali dma fence context
+ * @param dma_fence_context The mali dma fence context
+ * @param dma_reservation_object the reservation object to create new mali dma fence waiters
+ * @return _MALI_OSK_ERR_OK if success, or not.
+ */
+_mali_osk_errcode_t mali_dma_fence_context_add_waiters(struct mali_dma_fence_context *dma_fence_context,
+		struct reservation_object *dma_reservation_object);
+
+/**
+ * Release the dma fence context
+ * @param dma_fence_text The mali dma fence context.
+ */
+void mali_dma_fence_context_term(struct mali_dma_fence_context *dma_fence_context);
+
+/**
+ * Decrease the dma fence context atomic count
+ * @param dma_fence_text The mali dma fence context.
+ */
+void mali_dma_fence_context_dec_count(struct mali_dma_fence_context *dma_fence_context);
+
+/**
+ * Get all reservation object
+ * @param dma_reservation_object The reservation object to add into the reservation object list
+ * @param dma_reservation_object_list The reservation object list to store all reservation object
+ * @param num_dma_reservation_object The number of all reservation object
+ */
+void mali_dma_fence_add_reservation_object_list(struct reservation_object *dma_reservation_object,
+		struct reservation_object **dma_reservation_object_list,
+		u32 *num_dma_reservation_object);
+
+/**
+ * Wait/wound mutex lock to lock all reservation object.
+ */
+int mali_dma_fence_lock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32  num_dma_reservation_object, struct ww_acquire_ctx *ww_actx);
+
+/**
+ * Wait/wound mutex lock to unlock all reservation object.
+ */
+void mali_dma_fence_unlock_reservation_object_list(struct reservation_object **dma_reservation_object_list,
+		u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx);
+#endif
diff --git a/utgard/r7p0/linux/mali_kernel_linux.c b/utgard/r7p0/linux/mali_kernel_linux.c
new file mode 100755
index 0000000..c5b9f67
--- /dev/null
+++ b/utgard/r7p0/linux/mali_kernel_linux.c
@@ -0,0 +1,1154 @@
+/**
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_linux.c
+ * Implementation of the Linux device driver entrypoints
+ */
+#include <linux/module.h>   /* kernel module definitions */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/mm.h>       /* memory manager definitions */
+#include <linux/mali/mali_utgard_ioctl.h>
+#include <linux/version.h>
+#include <linux/device.h>
+#include "mali_kernel_license.h"
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/bug.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_ukk.h"
+#include "mali_ukk_wrappers.h"
+#include "mali_kernel_sysfs.h"
+#include "mali_pm.h"
+#include "mali_kernel_license.h"
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_swap_alloc.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+#include "mali_osk_profiling.h"
+#include "mali_dvfs_policy.h"
+
+static int is_first_resume = 1;
+/*Store the clk and vol for boot/insmod and mali_resume*/
+static struct mali_gpu_clk_item mali_gpu_clk[2];
+#endif
+
+/* Streamline support for the Mali driver */
+#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_MALI400_PROFILING)
+/* Ask Linux to create the tracepoints */
+#define CREATE_TRACE_POINTS
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_hw_counter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counters);
+#endif /* CONFIG_TRACEPOINTS */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include "mali_devfreq.h"
+#include "mali_osk_mali.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
+#include <linux/pm_opp.h>
+#else
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif /* Linux >= 3.13*/
+#define dev_pm_opp_of_add_table of_init_opp_table
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
+#define dev_pm_opp_of_remove_table of_free_opp_table
+#endif /* Linux >= 3.19 */
+#endif /* Linux >= 4.4.0 */
+#endif
+
+/* from the __malidrv_build_info.c file that is generated during build */
+extern const char *__malidrv_build_info(void);
+extern void mali_post_init(void);
+extern int mali_pdev_dts_init(struct platform_device *mali_gpu_device);
+extern int mpgpu_class_init(void);
+extern void mpgpu_class_exit(void);
+
+int mali_page_fault;
+module_param(mali_page_fault, int, 0664); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_page_fault, "mali_page_fault");
+
+int pp_hardware_reset;
+module_param(pp_hardware_reset, int, 0664); /* rw-rw-r-- */
+MODULE_PARM_DESC(pp_hardware_reset, "mali_hardware_reset");
+/* Module parameter to control log level */
+int mali_debug_level = 2;
+module_param(mali_debug_level, int, 0664); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_debug_level, "Higher number, more dmesg output");
+
+extern int mali_max_job_runtime;
+module_param(mali_max_job_runtime, int, 0664);
+MODULE_PARM_DESC(mali_max_job_runtime, "Maximum allowed job runtime in msecs.\nJobs will be killed after this no matter what");
+
+extern int mali_l2_max_reads;
+module_param(mali_l2_max_reads, int, 0444);
+MODULE_PARM_DESC(mali_l2_max_reads, "Maximum reads for Mali L2 cache");
+
+extern unsigned int mali_dedicated_mem_start;
+module_param(mali_dedicated_mem_start, uint, 0444);
+MODULE_PARM_DESC(mali_dedicated_mem_start, "Physical start address of dedicated Mali GPU memory.");
+
+extern unsigned int mali_dedicated_mem_size;
+module_param(mali_dedicated_mem_size, uint, 0444);
+MODULE_PARM_DESC(mali_dedicated_mem_size, "Size of dedicated Mali GPU memory.");
+
+extern unsigned int mali_shared_mem_size;
+module_param(mali_shared_mem_size, uint, 0444);
+MODULE_PARM_DESC(mali_shared_mem_size, "Size of shared Mali GPU memory.");
+
+#if defined(CONFIG_MALI400_PROFILING)
+extern int mali_boot_profiling;
+module_param(mali_boot_profiling, int, 0444);
+MODULE_PARM_DESC(mali_boot_profiling, "Start profiling as a part of Mali driver initialization");
+#endif
+
+extern int mali_max_pp_cores_group_1;
+module_param(mali_max_pp_cores_group_1, int, 0444);
+MODULE_PARM_DESC(mali_max_pp_cores_group_1, "Limit the number of PP cores to use from first PP group.");
+
+extern int mali_max_pp_cores_group_2;
+module_param(mali_max_pp_cores_group_2, int, 0444);
+MODULE_PARM_DESC(mali_max_pp_cores_group_2, "Limit the number of PP cores to use from second PP group (Mali-450 only).");
+
+extern unsigned int mali_mem_swap_out_threshold_value;
+module_param(mali_mem_swap_out_threshold_value, uint, 0444);
+MODULE_PARM_DESC(mali_mem_swap_out_threshold_value, "Threshold value used to limit how much swappable memory cached in Mali driver.");
+
+#if defined(CONFIG_MALI_DVFS)
+/** the max fps the same as display vsync default 60, can set by module insert parameter */
+extern int mali_max_system_fps;
+module_param(mali_max_system_fps, int, 0664);
+MODULE_PARM_DESC(mali_max_system_fps, "Max system fps the same as display VSYNC.");
+
+/** a lower limit on their desired FPS default 58, can set by module insert parameter*/
+extern int mali_desired_fps;
+module_param(mali_desired_fps, int, 0664);
+MODULE_PARM_DESC(mali_desired_fps, "A bit lower than max_system_fps which user desired fps");
+#endif
+
+#if MALI_ENABLE_CPU_CYCLES
+#include <linux/cpumask.h>
+#include <linux/timer.h>
+#include <asm/smp.h>
+static struct timer_list mali_init_cpu_clock_timers[8];
+static u32 mali_cpu_clock_last_value[8] = {0,};
+#endif
+
+/* Export symbols from common code: mali_user_settings.c */
+#include "mali_user_settings_db.h"
+EXPORT_SYMBOL(mali_set_user_setting);
+EXPORT_SYMBOL(mali_get_user_setting);
+
+static char mali_dev_name[] = "mali"; /* should be const, but the functions we call requires non-cost */
+
+/* This driver only supports one Mali device, and this variable stores this single platform device */
+struct platform_device *mali_platform_device;
+
+/* This driver only supports one Mali device, and this variable stores the exposed misc device (/dev/mali) */
+static struct miscdevice mali_miscdevice = { 0, };
+
+static int mali_miscdevice_register(struct platform_device *pdev);
+static void mali_miscdevice_unregister(void);
+
+static int mali_open(struct inode *inode, struct file *filp);
+static int mali_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static int mali_probe(struct platform_device *pdev);
+static int mali_remove(struct platform_device *pdev);
+
+static int mali_driver_suspend_scheduler(struct device *dev);
+static int mali_driver_resume_scheduler(struct device *dev);
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev);
+static int mali_driver_runtime_resume(struct device *dev);
+static int mali_driver_runtime_idle(struct device *dev);
+#endif
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#if defined(CONFIG_MALI_DT)
+extern int mali_platform_device_init(struct platform_device *device);
+extern int mali_platform_device_deinit(struct platform_device *device);
+#else
+extern int mali_platform_device_register(void);
+extern int mali_platform_device_unregister(void);
+#endif
+#endif
+
+/* Linux power management operations provided by the Mali device driver */
+#if (KERNEL_VERSION(2, 6, 29) > LINUX_VERSION_CODE)
+struct pm_ext_ops mali_dev_ext_pm_ops = {
+	.base = {
+
+		.suspend = mali_driver_suspend_scheduler,
+		.resume = mali_driver_resume_scheduler,
+		.freeze = mali_driver_suspend_scheduler,
+		.thaw =   mali_driver_resume_scheduler,
+	},
+};
+#else
+static const struct dev_pm_ops mali_dev_pm_ops = {
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = mali_driver_runtime_suspend,
+	.runtime_resume = mali_driver_runtime_resume,
+	.runtime_idle = mali_driver_runtime_idle,
+#endif
+	.suspend = mali_driver_suspend_scheduler,
+	.resume = mali_driver_resume_scheduler,
+	.freeze = mali_driver_suspend_scheduler,
+	.thaw = mali_driver_resume_scheduler,
+	.poweroff = mali_driver_suspend_scheduler,
+};
+#endif
+
+#ifdef CONFIG_MALI_DT
+static struct of_device_id base_dt_ids[] = {
+	{.compatible = "arm,mali-300"},
+	{.compatible = "arm,mali-400"},
+	{.compatible = "arm,mali-450"},
+	{.compatible = "arm,mali-470"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, base_dt_ids);
+#endif
+
+/* The Mali device driver struct */
+static struct platform_driver mali_platform_driver = {
+	.probe  = mali_probe,
+	.remove = mali_remove,
+#if (KERNEL_VERSION(2, 6, 29) > LINUX_VERSION_CODE)
+	.pm = &mali_dev_ext_pm_ops,
+#endif
+	.driver = {
+
+		.name   = MALI_GPU_NAME_UTGARD,
+		.owner  = THIS_MODULE,
+		.bus = &platform_bus_type,
+#if (KERNEL_VERSION(2, 6, 29) <= LINUX_VERSION_CODE)
+		.pm = &mali_dev_pm_ops,
+#endif
+#ifdef CONFIG_MALI_DT
+		.of_match_table = of_match_ptr(base_dt_ids),
+#endif
+	},
+};
+
+/* Linux misc device operations (/dev/mali) */
+struct file_operations mali_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_open,
+	.release = mali_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl = mali_ioctl,
+#else
+	.ioctl = mali_ioctl,
+#endif
+	.compat_ioctl = mali_ioctl,
+	.mmap = mali_mmap
+};
+
+#if MALI_ENABLE_CPU_CYCLES
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64)
+{
+	/* The CPU assembly reference used is: ARM Architecture Reference Manual ARMv7-AR C.b */
+	u32 write_value;
+
+	/* See B4.1.116 PMCNTENSET, Performance Monitors Count Enable Set register, VMSA */
+	/* setting p15 c9 c12 1 to 0x8000000f==CPU_CYCLE_ENABLE |EVENT_3_ENABLE|EVENT_2_ENABLE|EVENT_1_ENABLE|EVENT_0_ENABLE */
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
+
+
+	/* See B4.1.117 PMCR, Performance Monitors Control Register. Writing to p15, c9, c12, 0 */
+	write_value = 1 << 0; /* Bit 0 set. Enable counters */
+	if (reset) {
+		write_value |= 1 << 1; /* Reset event counters */
+		write_value |= 1 << 2; /* Reset cycle counter  */
+	}
+	if (enable_divide_by_64) {
+		write_value |= 1 << 3; /* Enable the Clock divider by 64 */
+	}
+	write_value |= 1 << 4; /* Export enable. Not needed */
+	asm volatile("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(write_value));
+
+	/* PMOVSR Overflow Flag Status Register - Clear Clock and Event overflows */
+	asm volatile("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f));
+
+
+	/* See B4.1.124 PMUSERENR - setting p15 c9 c14 to 1" */
+	/* User mode access to the Performance Monitors enabled. */
+	/* Lets User space read cpu clock cycles */
+	asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1));
+}
+
+/** A timer function that configures the cycle clock counter on current CPU.
+ * The function \a mali_init_cpu_time_counters_on_all_cpus sets up this
+ * function to trigger on all Cpus during module load.
+ */
+static void mali_init_cpu_clock_timer_func(unsigned long data)
+{
+	int reset_counters, enable_divide_clock_counter_by_64;
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+	unsigned int sample1;
+
+	MALI_IGNORE(data);
+
+	reset_counters = 1;
+	enable_divide_clock_counter_by_64 = 0;
+	mali_init_cpu_time_counters(reset_counters, enable_divide_clock_counter_by_64);
+
+	sample0 = mali_get_cpu_cyclecount();
+	sample1 = mali_get_cpu_cyclecount();
+
+	MALI_DEBUG_PRINT(3, ("Init Cpu %d cycle counter- First two samples: %08x %08x\n", current_cpu, sample0, sample1));
+}
+
+/** A timer functions for storing current time on all cpus.
+ * Used for checking if the clocks have similar values or if they are drifting.
+ */
+static void mali_print_cpu_clock_timer_func(unsigned long data)
+{
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+
+	MALI_IGNORE(data);
+	sample0 = mali_get_cpu_cyclecount();
+	if (current_cpu < 8) {
+		mali_cpu_clock_last_value[current_cpu] = sample0;
+	}
+}
+
+/** Init the performance registers on all CPUs to count clock cycles.
+ * For init \a print_only should be 0.
+ * If \a print_only is 1, it will intead print the current clock value of all CPUs.
+ */
+void mali_init_cpu_time_counters_on_all_cpus(int print_only)
+{
+	int i = 0;
+	int cpu_number;
+	int jiffies_trigger;
+	int jiffies_wait;
+
+	jiffies_wait = msecs_to_jiffies(20);
+	jiffies_trigger = jiffies + jiffies_wait;
+
+	for (i = 0 ; i < 8 ; i++) {
+		init_timer(&mali_init_cpu_clock_timers[i]);
+		if (print_only) mali_init_cpu_clock_timers[i].function = mali_print_cpu_clock_timer_func;
+		else            mali_init_cpu_clock_timers[i].function = mali_init_cpu_clock_timer_func;
+		mali_init_cpu_clock_timers[i].expires = jiffies_trigger;
+	}
+	cpu_number = cpumask_first(cpu_online_mask);
+	for (i = 0 ; i < 8 ; i++) {
+		int next_cpu;
+
+		add_timer_on(&mali_init_cpu_clock_timers[i], cpu_number);
+		next_cpu = cpumask_next(cpu_number, cpu_online_mask);
+		if (next_cpu >= nr_cpu_ids) break;
+		cpu_number = next_cpu;
+	}
+
+	while (jiffies_wait) jiffies_wait = schedule_timeout_uninterruptible(jiffies_wait);
+
+	for (i = 0 ; i < 8 ; i++) {
+		del_timer_sync(&mali_init_cpu_clock_timers[i]);
+	}
+
+	if (print_only) {
+		if ((0 == mali_cpu_clock_last_value[2]) && (mali_cpu_clock_last_value[3] == 0)) {
+			/* Diff can be printed if we want to check if the clocks are in sync
+			int diff = mali_cpu_clock_last_value[0] - mali_cpu_clock_last_value[1];*/
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1]));
+		} else {
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1], mali_cpu_clock_last_value[2], mali_cpu_clock_last_value[3]));
+		}
+	}
+}
+#endif
+
+int mali_module_init(void)
+{
+	int err = 0;
+
+	MALI_DEBUG_PRINT(2, ("Inserting Mali v%d device driver.\n", _MALI_API_VERSION));
+	//MALI_DEBUG_PRINT(2, ("Compiled: %s, time: %s.\n", __DATE__, __TIME__));
+	MALI_DEBUG_PRINT(2, ("Driver revision: %s\n", SVN_REV_STRING));
+
+#if MALI_ENABLE_CPU_CYCLES
+	mali_init_cpu_time_counters_on_all_cpus(0);
+	MALI_DEBUG_PRINT(2, ("CPU cycle counter setup complete\n"));
+	/* Printing the current cpu counters */
+	mali_init_cpu_time_counters_on_all_cpus(1);
+#endif
+
+	/* Initialize module wide settings */
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering device\n"));
+	err = mali_platform_device_register();
+	if (err != 0) {
+		return err;
+	}
+#endif
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering driver\n"));
+
+	err = platform_driver_register(&mali_platform_driver);
+
+	if (err != 0) {
+		MALI_DEBUG_PRINT(2, ("mali_module_init() Failed to register driver (%d)\n", err));
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+		mali_platform_device_unregister();
+#endif
+#endif
+		mali_platform_device = NULL;
+		return err;
+	}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	err = _mali_internal_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (err != 0) {
+		/* No biggie if we wheren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	/* Tracing the current frequency and voltage from boot/insmod*/
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item(),to record current clk info.*/
+	mali_get_current_gpu_clk_item(&mali_gpu_clk[0]);
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[0].clock,
+				      mali_gpu_clk[0].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	MALI_PRINT(("Mali device driver loaded\n"));
+
+	mpgpu_class_init();
+
+	return 0; /* Success */
+}
+
+void mali_module_exit(void)
+{
+	MALI_DEBUG_PRINT(2, ("Unloading Mali v%d device driver.\n", _MALI_API_VERSION));
+
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering driver\n"));
+
+	platform_driver_unregister(&mali_platform_driver);
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering device\n"));
+	mali_platform_device_unregister();
+#endif
+#endif
+
+	/* Tracing the current frequency and voltage from rmmod*/
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	_mali_internal_profiling_term();
+#endif
+	mpgpu_class_exit();
+
+	MALI_PRINT(("Mali device driver unloaded\n"));
+}
+
+#ifdef CONFIG_MALI_DEVFREQ
+struct mali_device *mali_device_alloc(void)
+{
+	return kzalloc(sizeof(struct mali_device), GFP_KERNEL);
+}
+
+void mali_device_free(struct mali_device *mdev)
+{
+	kfree(mdev);
+}
+#endif
+
+static int mali_probe(struct platform_device *pdev)
+{
+	int err;
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev;
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_probe(): Called for platform device %s\n", pdev->name));
+
+	if (mali_platform_device != NULL) {
+		/* Already connected to a device, return error */
+		MALI_PRINT_ERROR(("mali_probe(): The Mali driver is already connected with a Mali device."));
+		return -EEXIST;
+	}
+
+	mali_platform_device = pdev;
+
+#ifdef CONFIG_MALI_DT
+	/* If we use DT to initialize our DDK, we have to prepare somethings. */
+	err = mali_platform_device_init(mali_platform_device);
+	if (err != 0) {
+		MALI_PRINT_ERROR(("mali_probe(): Failed to initialize platform device."));
+		mali_platform_device = NULL;
+		return -EFAULT;
+	}
+#endif
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mdev = mali_device_alloc();
+	if (!mdev) {
+		MALI_PRINT_ERROR(("Can't allocate mali device private data\n"));
+		return -ENOMEM;
+	}
+
+	mdev->dev = &pdev->dev;
+	dev_set_drvdata(mdev->dev, mdev);
+
+	/*Initilization clock and regulator*/
+#if (KERNEL_VERSION(3, 12, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	mdev->regulator = regulator_get_optional(mdev->dev, "mali");
+	if (IS_ERR_OR_NULL(mdev->regulator)) {
+		MALI_DEBUG_PRINT(2, ("Continuing without Mali regulator control\n"));
+		mdev->regulator = NULL;
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+#if (KERNEL_VERSION(3, 7, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_OF) \
+			&& defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+	if (dev_pm_opp_of_add_table(mdev->dev) < 0)
+		MALI_DEBUG_PRINT(3, ("OPP table not found\n"));
+#endif
+
+	/* Need to name the gpu clock "clk_mali" in the device tree */
+	mdev->clock = clk_get(mdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(mdev->clock)) {
+		MALI_DEBUG_PRINT(2, ("Continuing without Mali clock control\n"));
+		mdev->clock = NULL;
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(mdev->clock);
+		if (err) {
+			MALI_PRINT_ERROR(("Failed to prepare and enable clock (%d)\n", err));
+			goto clock_prepare_failed;
+		}
+	}
+
+	/* initilize pm metrics related */
+	if (mali_pm_metrics_init(mdev) < 0) {
+		MALI_DEBUG_PRINT(2, ("mali pm metrics init failed\n"));
+		goto pm_metrics_init_failed;
+	}
+
+	if (mali_devfreq_init(mdev) < 0) {
+		MALI_DEBUG_PRINT(2, ("mali devfreq init failed\n"));
+		goto devfreq_init_failed;
+	}
+#endif
+
+
+	if (_mali_osk_wq_init() == _MALI_OSK_ERR_OK) {
+		/* Initialize the Mali GPU HW specified by pdev */
+		if (mali_initialize_subsystems() == _MALI_OSK_ERR_OK) {
+			/* Register a misc device (so we are accessible from user space) */
+			err = mali_miscdevice_register(pdev);
+			if (err == 0) {
+				/* Setup sysfs entries */
+				err = mali_sysfs_register(mali_dev_name);
+
+				if (err == 0) {
+					mali_post_init();
+					MALI_DEBUG_PRINT(2, ("mali_probe(): Successfully initialized driver for platform device %s\n", pdev->name));
+
+					return 0;
+				} else {
+					MALI_PRINT_ERROR(("mali_probe(): failed to register sysfs entries"));
+				}
+				mali_miscdevice_unregister();
+			} else {
+				MALI_PRINT_ERROR(("mali_probe(): failed to register Mali misc device."));
+			}
+			mali_terminate_subsystems();
+		} else {
+			MALI_PRINT_ERROR(("mali_probe(): Failed to initialize Mali device driver."));
+		}
+		_mali_osk_wq_term();
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mali_devfreq_term(mdev);
+devfreq_init_failed:
+	mali_pm_metrics_term(mdev);
+pm_metrics_init_failed:
+	clk_disable_unprepare(mdev->clock);
+clock_prepare_failed:
+	clk_put(mdev->clock);
+#if (KERNEL_VERSION(3, 19, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_OF) \
+			&& defined(CONFIG_PM_OPP)
+	dev_pm_opp_of_remove_table(mdev->dev);
+#endif
+
+#if (KERNEL_VERSION(3, 12, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	regulator_put(mdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+	mali_device_free(mdev);
+#endif
+
+#ifdef CONFIG_MALI_DT
+	mali_platform_device_deinit(mali_platform_device);
+#endif
+	mali_platform_device = NULL;
+	return -EFAULT;
+}
+
+static int mali_remove(struct platform_device *pdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(&pdev->dev);
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_remove() called for platform device %s\n", pdev->name));
+	mali_sysfs_unregister();
+	mali_miscdevice_unregister();
+	mali_terminate_subsystems();
+	_mali_osk_wq_term();
+
+#ifdef CONFIG_MALI_DEVFREQ
+	mali_devfreq_term(mdev);
+
+	mali_pm_metrics_term(mdev);
+
+	if (mdev->clock) {
+		clk_disable_unprepare(mdev->clock);
+		clk_put(mdev->clock);
+		mdev->clock = NULL;
+	}
+#if (KERNEL_VERSION(3, 19, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_OF) \
+			&& defined(CONFIG_PM_OPP)
+	dev_pm_opp_of_remove_table(mdev->dev);
+#endif
+
+#if (KERNEL_VERSION(3, 12, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	regulator_put(mdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+	mali_device_free(mdev);
+#endif
+
+#ifdef CONFIG_MALI_DT
+	mali_platform_device_deinit(mali_platform_device);
+#endif
+	mali_platform_device = NULL;
+	return 0;
+}
+
+static int mali_miscdevice_register(struct platform_device *pdev)
+{
+	int err;
+
+	mali_miscdevice.minor = MISC_DYNAMIC_MINOR;
+	mali_miscdevice.name = mali_dev_name;
+	mali_miscdevice.fops = &mali_fops;
+	mali_miscdevice.parent = get_device(&pdev->dev);
+
+	err = misc_register(&mali_miscdevice);
+	if (err != 0) {
+		MALI_PRINT_ERROR(("Failed to register misc device, misc_register() returned %d\n", err));
+	}
+
+	return err;
+}
+
+static void mali_miscdevice_unregister(void)
+{
+	misc_deregister(&mali_miscdevice);
+}
+
+static int mali_driver_suspend_scheduler(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(KERNEL_VERSION(3, 8, 0) <= LINUX_VERSION_CODE)
+	devfreq_suspend_device(mdev->devfreq);
+#endif
+
+	mali_pm_os_suspend(MALI_TRUE);
+	/* Tracing the frequency and voltage after mali is suspended */
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+	return 0;
+}
+
+static int mali_driver_resume_scheduler(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+	mali_pm_os_resume();
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(KERNEL_VERSION(3, 8, 0) <= LINUX_VERSION_CODE)
+	devfreq_resume_device(mdev->devfreq);
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	if (mali_pm_runtime_suspend() == MALI_TRUE) {
+		/* Tracing the frequency and voltage after mali is suspended */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      0,
+					      0,
+					      0, 0, 0);
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(KERNEL_VERSION(3, 8, 0) <= LINUX_VERSION_CODE)
+		MALI_DEBUG_PRINT(4, ("devfreq_suspend_device: stop devfreq monitor\n"));
+		devfreq_suspend_device(mdev->devfreq);
+#endif
+
+		return 0;
+	} else {
+		return -EBUSY;
+	}
+}
+
+static int mali_driver_runtime_resume(struct device *dev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	struct mali_device *mdev = dev_get_drvdata(dev);
+
+	if (!mdev)
+		return -ENODEV;
+#endif
+
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	mali_pm_runtime_resume();
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(KERNEL_VERSION(3, 8, 0) <= LINUX_VERSION_CODE)
+	MALI_DEBUG_PRINT(4, ("devfreq_resume_device: start devfreq monitor\n"));
+	devfreq_resume_device(mdev->devfreq);
+#endif
+	return 0;
+}
+
+static int mali_driver_runtime_idle(struct device *dev)
+{
+	/* Nothing to do */
+	return 0;
+}
+#endif
+
+static int mali_open(struct inode *inode, struct file *filp)
+{
+	struct mali_session_data *session_data;
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_open() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	/* allocated struct to track this session */
+	err = _mali_ukk_open((void **)&session_data);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	/* initialize file pointer */
+	filp->f_pos = 0;
+
+	/* link in our session data */
+	filp->private_data = (void *)session_data;
+
+	filp->f_mapping = mali_mem_swap_get_global_swap_file()->f_mapping;
+
+	return 0;
+}
+
+static int mali_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_release() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	err = _mali_ukk_close((void **)&filp->private_data);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	return 0;
+}
+
+int map_errcode(_mali_osk_errcode_t err)
+{
+	switch (err) {
+	case _MALI_OSK_ERR_OK:
+		return 0;
+	case _MALI_OSK_ERR_FAULT:
+		return -EFAULT;
+	case _MALI_OSK_ERR_INVALID_FUNC:
+		return -ENOTTY;
+	case _MALI_OSK_ERR_INVALID_ARGS:
+		return -EINVAL;
+	case _MALI_OSK_ERR_NOMEM:
+		return -ENOMEM;
+	case _MALI_OSK_ERR_TIMEOUT:
+		return -ETIMEDOUT;
+	case _MALI_OSK_ERR_RESTARTSYSCALL:
+		return -ERESTARTSYS;
+	case _MALI_OSK_ERR_ITEM_NOT_FOUND:
+		return -ENOENT;
+	default:
+		return -EFAULT;
+	}
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int err;
+	struct mali_session_data *session_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	/* inode not used */
+	(void)inode;
+#endif
+
+	MALI_DEBUG_PRINT(7, ("Ioctl received 0x%08X 0x%08lX\n", cmd, arg));
+
+	session_data = (struct mali_session_data *)filp->private_data;
+	if (session_data == NULL) {
+		MALI_DEBUG_PRINT(7, ("filp->private_data was NULL\n"));
+		return -ENOTTY;
+	}
+
+	if (NULL == (void *)arg) {
+		MALI_DEBUG_PRINT(7, ("arg was NULL\n"));
+		return -ENOTTY;
+	}
+
+	switch (cmd) {
+	case MALI_IOC_WAIT_FOR_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_wait_for_notification_s), sizeof(u64)));
+		err = wait_for_notification_wrapper(session_data, (_mali_uk_wait_for_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION_V2:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_api_version_v2_s), sizeof(u64)));
+		err = get_api_version_v2_wrapper(session_data, (_mali_uk_get_api_version_v2_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION:
+		err = get_api_version_wrapper(session_data, (_mali_uk_get_api_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_POST_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_post_notification_s), sizeof(u64)));
+		err = post_notification_wrapper(session_data, (_mali_uk_post_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_USER_SETTINGS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_user_settings_s), sizeof(u64)));
+		err = get_user_settings_wrapper(session_data, (_mali_uk_get_user_settings_s __user *)arg);
+		break;
+
+	case MALI_IOC_REQUEST_HIGH_PRIORITY:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_request_high_priority_s), sizeof(u64)));
+		err = request_high_priority_wrapper(session_data, (_mali_uk_request_high_priority_s __user *)arg);
+		break;
+
+	case MALI_IOC_PENDING_SUBMIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pending_submit_s), sizeof(u64)));
+		err = pending_submit_wrapper(session_data, (_mali_uk_pending_submit_s __user *)arg);
+		break;
+
+#if defined(CONFIG_MALI400_PROFILING)
+	case MALI_IOC_PROFILING_ADD_EVENT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_add_event_s), sizeof(u64)));
+		err = profiling_add_event_wrapper(session_data, (_mali_uk_profiling_add_event_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_sw_counters_report_s), sizeof(u64)));
+		err = profiling_report_sw_counters_wrapper(session_data, (_mali_uk_sw_counters_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_STREAM_FD_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_stream_fd_get_s), sizeof(u64)));
+		err = profiling_get_stream_fd_wrapper(session_data, (_mali_uk_profiling_stream_fd_get_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROILING_CONTROL_SET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_control_set_s), sizeof(u64)));
+		err = profiling_control_set_wrapper(session_data, (_mali_uk_profiling_control_set_s __user *)arg);
+		break;
+#else
+
+	case MALI_IOC_PROFILING_ADD_EVENT:          /* FALL-THROUGH */
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS: /* FALL-THROUGH */
+		MALI_DEBUG_PRINT(2, ("Profiling not supported\n"));
+		err = -ENOTTY;
+		break;
+#endif
+
+	case MALI_IOC_PROFILING_MEMORY_USAGE_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_memory_usage_get_s), sizeof(u64)));
+		err = mem_usage_get_wrapper(session_data, (_mali_uk_profiling_memory_usage_get_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_ALLOC:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_alloc_mem_s), sizeof(u64)));
+		err = mem_alloc_wrapper(session_data, (_mali_uk_alloc_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_FREE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_free_mem_s), sizeof(u64)));
+		err = mem_free_wrapper(session_data, (_mali_uk_free_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_BIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_bind_mem_s), sizeof(u64)));
+		err = mem_bind_wrapper(session_data, (_mali_uk_bind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_UNBIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_unbind_mem_s), sizeof(u64)));
+		err = mem_unbind_wrapper(session_data, (_mali_uk_unbind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_COW:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_mem_s), sizeof(u64)));
+		err = mem_cow_wrapper(session_data, (_mali_uk_cow_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_COW_MODIFY_RANGE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_modify_range_s), sizeof(u64)));
+		err = mem_cow_modify_range_wrapper(session_data, (_mali_uk_cow_modify_range_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_RESIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_resize_s), sizeof(u64)));
+		err = mem_resize_mem_wrapper(session_data, (_mali_uk_mem_resize_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_WRITE_SAFE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_write_safe_s), sizeof(u64)));
+		err = mem_write_safe_wrapper(session_data, (_mali_uk_mem_write_safe_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_query_mmu_page_table_dump_size_s), sizeof(u64)));
+		err = mem_query_mmu_page_table_dump_size_wrapper(session_data, (_mali_uk_query_mmu_page_table_dump_size_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dump_mmu_page_table_s), sizeof(u64)));
+		err = mem_dump_mmu_page_table_wrapper(session_data, (_mali_uk_dump_mmu_page_table_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DMA_BUF_GET_SIZE:
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dma_buf_get_size_s), sizeof(u64)));
+		err = mali_dma_buf_get_size(session_data, (_mali_uk_dma_buf_get_size_s __user *)arg);
+#else
+		MALI_DEBUG_PRINT(2, ("DMA-BUF not supported\n"));
+		err = -ENOTTY;
+#endif
+		break;
+
+	case MALI_IOC_PP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_start_job_s), sizeof(u64)));
+		err = pp_start_job_wrapper(session_data, (_mali_uk_pp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_AND_GP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_and_gp_start_job_s), sizeof(u64)));
+		err = pp_and_gp_start_job_wrapper(session_data, (_mali_uk_pp_and_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_number_of_cores_s), sizeof(u64)));
+		err = pp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_pp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_core_version_s), sizeof(u64)));
+		err = pp_get_core_version_wrapper(session_data, (_mali_uk_get_pp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_DISABLE_WB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_disable_wb_s), sizeof(u64)));
+		err = pp_disable_wb_wrapper(session_data, (_mali_uk_pp_disable_wb_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_start_job_s), sizeof(u64)));
+		err = gp_start_job_wrapper(session_data, (_mali_uk_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_number_of_cores_s), sizeof(u64)));
+		err = gp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_gp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_core_version_s), sizeof(u64)));
+		err = gp_get_core_version_wrapper(session_data, (_mali_uk_get_gp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_SUSPEND_RESPONSE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_suspend_response_s), sizeof(u64)));
+		err = gp_suspend_response_wrapper(session_data, (_mali_uk_gp_suspend_response_s __user *)arg);
+		break;
+
+	case MALI_IOC_VSYNC_EVENT_REPORT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_vsync_event_report_s), sizeof(u64)));
+		err = vsync_event_report_wrapper(session_data, (_mali_uk_vsync_event_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_TIMELINE_GET_LATEST_POINT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_get_latest_point_s), sizeof(u64)));
+		err = timeline_get_latest_point_wrapper(session_data, (_mali_uk_timeline_get_latest_point_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_WAIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_wait_s), sizeof(u64)));
+		err = timeline_wait_wrapper(session_data, (_mali_uk_timeline_wait_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_CREATE_SYNC_FENCE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_create_sync_fence_s), sizeof(u64)));
+		err = timeline_create_sync_fence_wrapper(session_data, (_mali_uk_timeline_create_sync_fence_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_START:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_start_s), sizeof(u64)));
+		err = soft_job_start_wrapper(session_data, (_mali_uk_soft_job_start_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_SIGNAL:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_signal_s), sizeof(u64)));
+		err = soft_job_signal_wrapper(session_data, (_mali_uk_soft_job_signal_s __user *)arg);
+		break;
+
+	default:
+		MALI_DEBUG_PRINT(2, ("No handler for ioctl 0x%08X 0x%08lX\n", cmd, arg));
+		err = -ENOTTY;
+	};
+
+	return err;
+}
+
+
+module_init(mali_module_init);
+module_exit(mali_module_exit);
+
+MODULE_LICENSE(MALI_KERNEL_LINUX_LICENSE);
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(SVN_REV_STRING);
diff --git a/utgard/r7p0/linux/mali_kernel_linux.h b/utgard/r7p0/linux/mali_kernel_linux.h
new file mode 100644
index 0000000..4d9deee
--- /dev/null
+++ b/utgard/r7p0/linux/mali_kernel_linux.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_LINUX_H__
+#define __MALI_KERNEL_LINUX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/idr.h>
+#include <linux/rbtree.h>
+#include "mali_kernel_license.h"
+#include "mali_osk_types.h"
+#include <linux/version.h>
+
+extern struct platform_device *mali_platform_device;
+
+/* After 3.19.0 kenrel droped CONFIG_PM_RUNTIME define,define by ourself */
+#if defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
+#define CONFIG_PM_RUNTIME 1
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/utgard/r7p0/linux/mali_kernel_sysfs.c b/utgard/r7p0/linux/mali_kernel_sysfs.c
new file mode 100644
index 0000000..47da865
--- /dev/null
+++ b/utgard/r7p0/linux/mali_kernel_sysfs.c
@@ -0,0 +1,1419 @@
+/**
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+/**
+ * @file mali_kernel_sysfs.c
+ * Implementation of some sysfs data exports
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include "mali_kernel_license.h"
+#include "mali_kernel_common.h"
+#include "mali_ukk.h"
+
+#if MALI_LICENSE_IS_GPL
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_sysfs.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include <linux/slab.h>
+#include "mali_osk_profiling.h"
+#endif
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_kernel_core.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+
+#define PRIVATE_DATA_COUNTER_MAKE_GP(src) (src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP(src) ((1 << 24) | src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(src, sub_job) ((1 << 24) | (1 << 16) | (sub_job << 8) | src)
+#define PRIVATE_DATA_COUNTER_IS_PP(a) ((((a) >> 24) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SRC(a) (a & 0xFF)
+#define PRIVATE_DATA_COUNTER_IS_SUB_JOB(a) ((((a) >> 16) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SUB_JOB(a) (((a) >> 8) & 0xFF)
+
+#define POWER_BUFFER_SIZE 3
+
+static struct dentry *mali_debugfs_dir;
+
+typedef enum {
+	_MALI_DEVICE_SUSPEND,
+	_MALI_DEVICE_RESUME,
+	_MALI_DEVICE_DVFS_PAUSE,
+	_MALI_DEVICE_DVFS_RESUME,
+	_MALI_MAX_EVENTS
+} _mali_device_debug_power_events;
+
+static const char *const mali_power_events[_MALI_MAX_EVENTS] = {
+	[_MALI_DEVICE_SUSPEND] = "suspend",
+	[_MALI_DEVICE_RESUME] = "resume",
+	[_MALI_DEVICE_DVFS_PAUSE] = "dvfs_pause",
+	[_MALI_DEVICE_DVFS_RESUME] = "dvfs_resume",
+};
+
+static mali_bool power_always_on_enabled = MALI_FALSE;
+
+static int open_copy_private_data(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t group_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	r = snprintf(buffer, 64, "%u\n",
+		     mali_executor_group_is_disabled(group) ? 0 : 1);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static ssize_t group_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	unsigned long val;
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	r = kstrtoul(&buffer[0], 10, &val);
+	if (r != 0) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_group_enable(group);
+		break;
+	case 0:
+		mali_executor_group_disable(group);
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static const struct file_operations group_enabled_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = group_enabled_read,
+	.write = group_enabled_write,
+};
+
+static ssize_t hw_core_base_addr_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_hw_core *hw_core;
+
+	hw_core = (struct mali_hw_core *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(hw_core);
+
+	r = snprintf(buffer, 64, "0x%lX\n", hw_core->phys_addr);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations hw_core_base_addr_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = hw_core_base_addr_read,
+};
+
+static ssize_t profiling_counter_src_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	int r;
+	u32 val;
+
+	if (is_pp == MALI_TRUE) {
+		/* PP counter */
+		if (is_sub_job == MALI_TRUE) {
+			/* Get counter for a particular sub job */
+			if (src_id == 0) {
+				val = mali_pp_job_get_pp_counter_sub_job_src0(sub_job);
+			} else {
+				val = mali_pp_job_get_pp_counter_sub_job_src1(sub_job);
+			}
+		} else {
+			/* Get default counter for all PP sub jobs */
+			if (src_id == 0) {
+				val = mali_pp_job_get_pp_counter_global_src0();
+			} else {
+				val = mali_pp_job_get_pp_counter_global_src1();
+			}
+		}
+	} else {
+		/* GP counter */
+		if (src_id == 0) {
+			val = mali_gp_job_get_gp_counter_src0();
+		} else {
+			val = mali_gp_job_get_gp_counter_src1();
+		}
+	}
+
+	if (val == MALI_HW_CORE_NO_COUNTER) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_counter_src_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	if (is_pp == MALI_TRUE) {
+		/* PP counter */
+		if (is_sub_job == MALI_TRUE) {
+			/* Set counter for a particular sub job */
+			if (src_id == 0) {
+				mali_pp_job_set_pp_counter_sub_job_src0(sub_job, (u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_sub_job_src1(sub_job, (u32)val);
+			}
+		} else {
+			/* Set default counter for all PP sub jobs */
+			if (src_id == 0) {
+				mali_pp_job_set_pp_counter_global_src0((u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_global_src1((u32)val);
+			}
+		}
+	} else {
+		/* GP counter */
+		if (src_id == 0) {
+			mali_gp_job_set_gp_counter_src0((u32)val);
+		} else {
+			mali_gp_job_set_gp_counter_src1((u32)val);
+		}
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_counter_src_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = profiling_counter_src_read,
+	.write = profiling_counter_src_write,
+};
+
+static ssize_t l2_l2x_counter_srcx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 val;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	if (src_id == 0) {
+		val = mali_l2_cache_core_get_counter_src0(l2_core);
+	} else {
+		val = mali_l2_cache_core_get_counter_src1(l2_core);
+	}
+
+	if (val == MALI_HW_CORE_NO_COUNTER) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	mali_l2_cache_core_set_counter_src(l2_core, src_id, (u32)val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_all_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	long val;
+	int ret;
+	u32 l2_id;
+	struct mali_l2_cache_core *l2_cache;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	l2_id = 0;
+	l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	while (l2_cache != NULL) {
+		mali_l2_cache_core_set_counter_src(l2_cache, src_id, (u32)val);
+
+		/* try next L2 */
+		l2_id++;
+		l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_l2x_counter_src0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_l2x_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_all_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_all_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src0_read,
+	.write = l2_l2x_counter_src0_write,
+};
+
+static const struct file_operations l2_l2x_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src1_read,
+	.write = l2_l2x_counter_src1_write,
+};
+
+static const struct file_operations l2_all_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src0_write,
+};
+
+static const struct file_operations l2_all_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src1_write,
+};
+
+static ssize_t l2_l2x_counter_valx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 src0 = 0;
+	u32 val0 = 0;
+	u32 src1 = 0;
+	u32 val1 = 0;
+	u32 val = -1;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	mali_l2_cache_core_get_counter_values(l2_core, &src0, &val0, &src1, &val1);
+
+	if (src_id == 0) {
+		if (val0 != MALI_HW_CORE_NO_COUNTER) {
+			val = val0;
+		}
+	} else {
+		if (val1 != MALI_HW_CORE_NO_COUNTER) {
+			val = val1;
+		}
+	}
+
+	r = snprintf(buf, 64, "%u\n", val);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_val0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_val1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_val0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val0_read,
+};
+
+static const struct file_operations l2_l2x_counter_val1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val1_read,
+};
+
+static ssize_t power_always_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret != 0) {
+		return ret;
+	}
+
+	/* Update setting (not exactly thread safe) */
+	if (1 == val && MALI_FALSE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_TRUE;
+		_mali_osk_pm_dev_ref_get_sync();
+	} else if (0 == val && MALI_TRUE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_FALSE;
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t power_always_on_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (power_always_on_enabled == MALI_TRUE) {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "1\n", 2);
+	} else {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "0\n", 2);
+	}
+}
+
+static const struct file_operations power_always_on_fops = {
+	.owner = THIS_MODULE,
+	.read  = power_always_on_read,
+	.write = power_always_on_write,
+};
+
+static ssize_t power_power_events_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_SUSPEND], strlen(mali_power_events[_MALI_DEVICE_SUSPEND]) - 1)) {
+		mali_pm_os_suspend(MALI_TRUE);
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_RESUME], strlen(mali_power_events[_MALI_DEVICE_RESUME]) - 1)) {
+		mali_pm_os_resume();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_PAUSE], strlen(mali_power_events[_MALI_DEVICE_DVFS_PAUSE]) - 1)) {
+		mali_dev_pause();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_RESUME], strlen(mali_power_events[_MALI_DEVICE_DVFS_RESUME]) - 1)) {
+		mali_dev_resume();
+	}
+	*ppos += cnt;
+	return cnt;
+}
+
+static loff_t power_power_events_seek(struct file *file, loff_t offset, int orig)
+{
+	file->f_pos = offset;
+	return 0;
+}
+
+static const struct file_operations power_power_events_fops = {
+	.owner = THIS_MODULE,
+	.write = power_power_events_write,
+	.llseek = power_power_events_seek,
+};
+
+#if MALI_STATE_TRACKING
+static int mali_seq_internal_state_show(struct seq_file *seq_file, void *v)
+{
+	u32 len = 0;
+	u32 size;
+	char *buf;
+
+	size = seq_get_buf(seq_file, &buf);
+
+	if (!size) {
+		return -ENOMEM;
+	}
+
+	/* Create the internal state dump. */
+	len  = snprintf(buf + len, size - len, "Mali device driver %s\n", SVN_REV_STRING);
+	len += snprintf(buf + len, size - len, "License: %s\n\n", MALI_KERNEL_LINUX_LICENSE);
+
+	len += _mali_kernel_core_dump_state(buf + len, size - len);
+
+	seq_commit(seq_file, len);
+
+	return 0;
+}
+
+static int mali_seq_internal_state_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mali_seq_internal_state_show, NULL);
+}
+
+static const struct file_operations mali_seq_internal_state_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_seq_internal_state_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif /* MALI_STATE_TRACKING */
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+static ssize_t profiling_record_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = snprintf(buf, 64, "%u\n", _mali_internal_profiling_is_recording() ? 1 : 0);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_record_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	unsigned long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val != 0) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* This can be made configurable at a later stage if we need to */
+
+		/* check if we are already recording */
+		if (_mali_internal_profiling_is_recording() == MALI_TRUE) {
+			MALI_DEBUG_PRINT(3, ("Recording of profiling events already in progress\n"));
+			return -EFAULT;
+		}
+
+		/* check if we need to clear out an old recording first */
+		if (_mali_internal_profiling_have_recording() == MALI_TRUE) {
+			if (_mali_internal_profiling_clear() != _MALI_OSK_ERR_OK) {
+				MALI_DEBUG_PRINT(3, ("Failed to clear existing recording of profiling events\n"));
+				return -EFAULT;
+			}
+		}
+
+		/* start recording profiling data */
+		if (_mali_internal_profiling_start(&limit) != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(3, ("Failed to start recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Profiling recording started (max %u events)\n", limit));
+	} else {
+		/* stop recording profiling data */
+		u32 count = 0;
+
+		if (_mali_internal_profiling_stop(&count) != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(2, ("Failed to stop recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Profiling recording stopped (recorded %u events)\n", count));
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_record_fops = {
+	.owner = THIS_MODULE,
+	.read  = profiling_record_read,
+	.write = profiling_record_write,
+};
+
+static void *profiling_events_start(struct seq_file *s, loff_t *pos)
+{
+	loff_t *spos;
+
+	/* check if we have data avaiable */
+	if (_mali_internal_profiling_have_recording() != MALI_TRUE) {
+		return NULL;
+	}
+
+	spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
+	if (spos == NULL) {
+		return NULL;
+	}
+
+	*spos = *pos;
+	return spos;
+}
+
+static void *profiling_events_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	loff_t *spos = v;
+
+	/* check if we have data avaiable */
+	if (_mali_internal_profiling_have_recording() != MALI_TRUE) {
+		return NULL;
+	}
+
+	/* check if the next entry actually is avaiable */
+	if (_mali_internal_profiling_get_count() <= (u32)(*spos + 1)) {
+		return NULL;
+	}
+
+	*pos = ++*spos;
+	return spos;
+}
+
+static void profiling_events_stop(struct seq_file *s, void *v)
+{
+	kfree(v);
+}
+
+static int profiling_events_show(struct seq_file *seq_file, void *v)
+{
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) *spos;
+
+	/* Retrieve all events */
+	if (_mali_internal_profiling_get_event(index, &timestamp, &event_id, data) == _MALI_OSK_ERR_OK) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u\n", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int profiling_events_show_human_readable(struct seq_file *seq_file, void *v)
+{
+#define MALI_EVENT_ID_IS_HW(event_id) (((event_id & 0x00FF0000) >= MALI_PROFILING_EVENT_CHANNEL_GP0) && ((event_id & 0x00FF0000) <= MALI_PROFILING_EVENT_CHANNEL_PP7))
+
+	static u64 start_time;
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) *spos;
+
+	/* Retrieve all events */
+	if (_mali_internal_profiling_get_event(index, &timestamp, &event_id, data) == _MALI_OSK_ERR_OK) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u # ", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+
+		if (index == 0) {
+			start_time = timestamp;
+		}
+
+		seq_printf(seq_file, "[%06u] ", index);
+
+		switch (event_id & 0x0F000000) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			seq_puts(seq_file, "SINGLE | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			seq_puts(seq_file, "START | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			seq_puts(seq_file, "STOP | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_SUSPEND:
+			seq_puts(seq_file, "SUSPEND | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_RESUME:
+			seq_puts(seq_file, "RESUME | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%01X | ", (event_id & 0x0F000000) >> 24);
+			break;
+		}
+
+		switch (event_id & 0x00FF0000) {
+		case MALI_PROFILING_EVENT_CHANNEL_SOFTWARE:
+			seq_puts(seq_file, "SW | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GP0:
+			seq_puts(seq_file, "GP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP0:
+			seq_puts(seq_file, "PP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP1:
+			seq_puts(seq_file, "PP1 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP2:
+			seq_puts(seq_file, "PP2 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP3:
+			seq_puts(seq_file, "PP3 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP4:
+			seq_puts(seq_file, "PP4 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP5:
+			seq_puts(seq_file, "PP5 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP6:
+			seq_puts(seq_file, "PP6 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP7:
+			seq_puts(seq_file, "PP7 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GPU:
+			seq_puts(seq_file, "GPU | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%02X | ", (event_id & 0x00FF0000) >> 16);
+			break;
+		}
+
+		if (MALI_EVENT_ID_IS_HW(event_id)) {
+			if (((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_START) || ((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_STOP)) {
+				switch (event_id & 0x0000FFFF) {
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL:
+					seq_puts(seq_file, "PHYSICAL | ");
+					break;
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL:
+					seq_puts(seq_file, "VIRTUAL | ");
+					break;
+				default:
+					seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+					break;
+				}
+			} else {
+				seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+			}
+		} else {
+			seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+		}
+
+		seq_printf(seq_file, "T0 + 0x%016llX\n", timestamp - start_time);
+
+		return 0;
+	}
+
+	return 0;
+}
+
+static const struct seq_operations profiling_events_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show
+};
+
+static int profiling_events_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_seq_ops);
+}
+
+static const struct file_operations profiling_events_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static const struct seq_operations profiling_events_human_readable_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show_human_readable
+};
+
+static int profiling_events_human_readable_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_human_readable_seq_ops);
+}
+
+static const struct file_operations profiling_events_human_readable_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_human_readable_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+static int memory_debugfs_show(struct seq_file *s, void *private_data)
+{
+#ifdef MALI_MEM_SWAP_TRACKING
+	seq_printf(s, "  %-25s  %-10s %-25s %-10s  %-15s  %-15s  %-10s  %-10s %-10s\n"\
+		   "=======================================================================================================================================\n",
+		   "Name (:bytes)", "pid", "pid-name", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem", "swap_mem");
+#else
+	seq_printf(s, "  %-25s  %-10s %-25s %-10s  %-15s  %-15s  %-10s  %-10s\n"\
+		   "=======================================================================================================================================\n",
+		   "Name (:bytes)", "pid", "pid-name", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem");
+#endif
+	mali_session_memory_tracking(s);
+	return 0;
+}
+
+static int memory_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, memory_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations memory_usage_fops = {
+	.owner = THIS_MODULE,
+	.open = memory_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static ssize_t utilization_gp_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_gp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+
+static const struct file_operations utilization_gp_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_pp_read,
+};
+
+static const struct file_operations utilization_gp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_read,
+};
+
+static const struct file_operations utilization_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_pp_read,
+};
+
+static ssize_t user_settings_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	_mali_uk_user_setting_t setting;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret != 0) {
+		return ret;
+	}
+
+	/* Update setting */
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	mali_set_user_setting(setting, val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t user_settings_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 value;
+	_mali_uk_user_setting_t setting;
+
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	value = mali_get_user_setting(setting);
+
+	r = snprintf(buf, 64, "%u\n", value);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static const struct file_operations user_settings_fops = {
+	.owner = THIS_MODULE,
+	.open = open_copy_private_data,
+	.read = user_settings_read,
+	.write = user_settings_write,
+};
+
+static int mali_sysfs_user_settings_register(void)
+{
+	struct dentry *mali_user_settings_dir = debugfs_create_dir("userspace_settings", mali_debugfs_dir);
+
+	if (mali_user_settings_dir != NULL) {
+		long i;
+
+		for (i = 0; i < _MALI_UK_USER_SETTING_MAX; i++) {
+			debugfs_create_file(_mali_uk_user_setting_descriptions[i],
+					    0600, mali_user_settings_dir, (void *)i,
+					    &user_settings_fops);
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t pp_num_cores_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (ret != 0) {
+		return -EINVAL;
+	}
+
+	ret = mali_executor_set_perf_level(val, MALI_TRUE); /* override even if core scaling is disabled */
+	if (ret) {
+		return ret;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_num_cores_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_enabled());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_num_cores_enabled_write,
+	.read = pp_num_cores_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t pp_num_cores_total_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_total());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_total_fops = {
+	.owner = THIS_MODULE,
+	.read = pp_num_cores_total_read,
+};
+
+static ssize_t pp_core_scaling_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (ret != 0) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_core_scaling_enable();
+		break;
+	case 0:
+		mali_executor_core_scaling_disable();
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_core_scaling_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	return simple_read_from_buffer(buf, count, offp, mali_executor_core_scaling_is_enabled() ? "1\n" : "0\n", 2);
+}
+static const struct file_operations pp_core_scaling_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_core_scaling_enabled_write,
+	.read = pp_core_scaling_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t version_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r = 0;
+	char buffer[64];
+
+	switch (mali_kernel_core_get_product_id()) {
+	case _MALI_PRODUCT_ID_MALI200:
+		r = snprintf(buffer, 64, "Mali-200\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI300:
+		r = snprintf(buffer, 64, "Mali-300\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI400:
+		r = snprintf(buffer, 64, "Mali-400 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI450:
+		r = snprintf(buffer, 64, "Mali-450 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI470:
+		r = snprintf(buffer, 64, "Mali-470 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_UNKNOWN:
+		return -EINVAL;
+		break;
+	};
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations version_fops = {
+	.owner = THIS_MODULE,
+	.read = version_read,
+};
+
+#if defined(DEBUG)
+static int timeline_debugfs_show(struct seq_file *s, void *private_data)
+{
+	struct mali_session_data *session, *tmp;
+	u32 session_seq = 1;
+
+	seq_puts(s, "timeline system info:\n=================\n\n");
+
+	mali_session_lock();
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		seq_printf(s, "session %d <%p> start:\n", session_seq, session);
+		mali_timeline_debug_print_system(session->timeline_system, s);
+		seq_printf(s, "session %d end\n\n\n", session_seq++);
+	}
+	mali_session_unlock();
+
+	return 0;
+}
+
+static int timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, timeline_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations timeline_dump_fops = {
+	.owner = THIS_MODULE,
+	.open = timeline_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release
+};
+#endif
+
+int mali_sysfs_register(const char *mali_dev_name)
+{
+	mali_debugfs_dir = debugfs_create_dir(mali_dev_name, NULL);
+	if (ERR_PTR(-ENODEV) == mali_debugfs_dir) {
+		/* Debugfs not supported. */
+		mali_debugfs_dir = NULL;
+	} else {
+		if (mali_debugfs_dir != NULL) {
+			/* Debugfs directory created successfully; create files now */
+			struct dentry *mali_power_dir;
+			struct dentry *mali_gp_dir;
+			struct dentry *mali_pp_dir;
+			struct dentry *mali_l2_dir;
+			struct dentry *mali_profiling_dir;
+
+			debugfs_create_file("version", 0400, mali_debugfs_dir, NULL, &version_fops);
+
+			mali_power_dir = debugfs_create_dir("power", mali_debugfs_dir);
+			if (mali_power_dir != NULL) {
+				debugfs_create_file("always_on", 0600, mali_power_dir, NULL, &power_always_on_fops);
+				debugfs_create_file("power_events", 0200, mali_power_dir, NULL, &power_power_events_fops);
+			}
+
+			mali_gp_dir = debugfs_create_dir("gp", mali_debugfs_dir);
+			if (mali_gp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+
+					if (gp_core != NULL) {
+						struct dentry *mali_gp_gpx_dir;
+
+						mali_gp_gpx_dir = debugfs_create_dir("gp0", mali_gp_dir);
+						if (mali_gp_gpx_dir != NULL) {
+							debugfs_create_file("base_addr", 0400, mali_gp_gpx_dir, &gp_core->hw_core, &hw_core_base_addr_fops);
+							debugfs_create_file("enabled", 0600, mali_gp_gpx_dir, group, &group_enabled_fops);
+						}
+						break; /* no need to look for any other GP cores */
+					}
+
+				}
+			}
+
+			mali_pp_dir = debugfs_create_dir("pp", mali_debugfs_dir);
+			if (mali_pp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				debugfs_create_file("num_cores_total", 0400, mali_pp_dir, NULL, &pp_num_cores_total_fops);
+				debugfs_create_file("num_cores_enabled", 0600, mali_pp_dir, NULL, &pp_num_cores_enabled_fops);
+				debugfs_create_file("core_scaling_enabled", 0600, mali_pp_dir, NULL, &pp_core_scaling_enabled_fops);
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+
+					if (pp_core != NULL) {
+						char buf[16];
+						struct dentry *mali_pp_ppx_dir;
+
+						_mali_osk_snprintf(buf, sizeof(buf), "pp%u", mali_pp_core_get_id(pp_core));
+						mali_pp_ppx_dir = debugfs_create_dir(buf, mali_pp_dir);
+						if (mali_pp_ppx_dir != NULL) {
+							debugfs_create_file("base_addr", 0400, mali_pp_ppx_dir, &pp_core->hw_core, &hw_core_base_addr_fops);
+							if (!mali_group_is_virtual(group)) {
+								debugfs_create_file("enabled", 0600, mali_pp_ppx_dir, group, &group_enabled_fops);
+							}
+						}
+					}
+				}
+			}
+
+			mali_l2_dir = debugfs_create_dir("l2", mali_debugfs_dir);
+			if (mali_l2_dir != NULL) {
+				struct dentry *mali_l2_all_dir;
+				u32 l2_id;
+				struct mali_l2_cache_core *l2_cache;
+
+				mali_l2_all_dir = debugfs_create_dir("all", mali_l2_dir);
+				if (mali_l2_all_dir != NULL) {
+					debugfs_create_file("counter_src0", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src0_fops);
+					debugfs_create_file("counter_src1", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src1_fops);
+				}
+
+				l2_id = 0;
+				l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				while (l2_cache != NULL) {
+					char buf[16];
+					struct dentry *mali_l2_l2x_dir;
+
+					_mali_osk_snprintf(buf, sizeof(buf), "l2%u", l2_id);
+					mali_l2_l2x_dir = debugfs_create_dir(buf, mali_l2_dir);
+					if (mali_l2_l2x_dir != NULL) {
+						debugfs_create_file("counter_src0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src0_fops);
+						debugfs_create_file("counter_src1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src1_fops);
+						debugfs_create_file("counter_val0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val0_fops);
+						debugfs_create_file("counter_val1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val1_fops);
+						debugfs_create_file("base_addr", 0400, mali_l2_l2x_dir, &l2_cache->hw_core, &hw_core_base_addr_fops);
+					}
+
+					/* try next L2 */
+					l2_id++;
+					l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				}
+			}
+
+			debugfs_create_file("gpu_memory", 0444, mali_debugfs_dir, NULL, &memory_usage_fops);
+
+			debugfs_create_file("utilization_gp_pp", 0400, mali_debugfs_dir, NULL, &utilization_gp_pp_fops);
+			debugfs_create_file("utilization_gp", 0400, mali_debugfs_dir, NULL, &utilization_gp_fops);
+			debugfs_create_file("utilization_pp", 0400, mali_debugfs_dir, NULL, &utilization_pp_fops);
+
+			mali_profiling_dir = debugfs_create_dir("profiling", mali_debugfs_dir);
+			if (mali_profiling_dir != NULL) {
+				u32 max_sub_jobs;
+				long i;
+				struct dentry *mali_profiling_gp_dir;
+				struct dentry *mali_profiling_pp_dir;
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				struct dentry *mali_profiling_proc_dir;
+#endif
+				/*
+				 * Create directory where we can set GP HW counters.
+				 */
+				mali_profiling_gp_dir = debugfs_create_dir("gp", mali_profiling_dir);
+				if (mali_profiling_gp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(1), &profiling_counter_src_fops);
+				}
+
+				/*
+				 * Create directory where we can set PP HW counters.
+				 * Possible override with specific HW counters for a particular sub job
+				 * (Disable core scaling before using the override!)
+				 */
+				mali_profiling_pp_dir = debugfs_create_dir("pp", mali_profiling_dir);
+				if (mali_profiling_pp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(1), &profiling_counter_src_fops);
+				}
+
+				max_sub_jobs = mali_executor_get_num_cores_total();
+				for (i = 0; i < max_sub_jobs; i++) {
+					char buf[16];
+					struct dentry *mali_profiling_pp_x_dir;
+
+					_mali_osk_snprintf(buf, sizeof(buf), "%u", i);
+					mali_profiling_pp_x_dir = debugfs_create_dir(buf, mali_profiling_pp_dir);
+					if (mali_profiling_pp_x_dir != NULL) {
+						debugfs_create_file("counter_src0",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(0, i),
+								    &profiling_counter_src_fops);
+						debugfs_create_file("counter_src1",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(1, i),
+								    &profiling_counter_src_fops);
+					}
+				}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				mali_profiling_proc_dir = debugfs_create_dir("proc", mali_profiling_dir);
+				if (mali_profiling_proc_dir != NULL) {
+					struct dentry *mali_profiling_proc_default_dir = debugfs_create_dir("default", mali_profiling_proc_dir);
+
+					if (mali_profiling_proc_default_dir != NULL) {
+						debugfs_create_file("enable", 0600, mali_profiling_proc_default_dir, (void *)_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, &user_settings_fops);
+					}
+				}
+				debugfs_create_file("record", 0600, mali_profiling_dir, NULL, &profiling_record_fops);
+				debugfs_create_file("events", 0400, mali_profiling_dir, NULL, &profiling_events_fops);
+				debugfs_create_file("events_human_readable", 0400, mali_profiling_dir, NULL, &profiling_events_human_readable_fops);
+#endif
+			}
+
+#if MALI_STATE_TRACKING
+			debugfs_create_file("state_dump", 0400, mali_debugfs_dir, NULL, &mali_seq_internal_state_fops);
+#endif
+
+#if defined(DEBUG)
+			debugfs_create_file("timeline_dump", 0400, mali_debugfs_dir, NULL, &timeline_dump_fops);
+#endif
+			if (mali_sysfs_user_settings_register()) {
+				/* Failed to create the debugfs entries for the user settings DB. */
+				MALI_DEBUG_PRINT(2, ("Failed to create user setting debugfs files. Ignoring...\n"));
+			}
+		}
+	}
+
+	/* Success! */
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	if (mali_debugfs_dir != NULL) {
+		debugfs_remove_recursive(mali_debugfs_dir);
+	}
+	return 0;
+}
+
+#else /* MALI_LICENSE_IS_GPL */
+
+/* Dummy implementations for non-GPL */
+
+int mali_sysfs_register(struct mali_dev *device, dev_t dev, const char *mali_dev_name)
+{
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	return 0;
+}
+
+#endif /* MALI_LICENSE_IS_GPL */
diff --git a/utgard/r7p0/linux/mali_kernel_sysfs.h b/utgard/r7p0/linux/mali_kernel_sysfs.h
new file mode 100644
index 0000000..9d98e3a
--- /dev/null
+++ b/utgard/r7p0/linux/mali_kernel_sysfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_SYSFS_H__
+#define __MALI_KERNEL_SYSFS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/device.h>
+
+#define MALI_PROC_DIR "driver/mali"
+
+int mali_sysfs_register(const char *mali_dev_name);
+int mali_sysfs_unregister(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/utgard/r7p0/linux/mali_linux_trace.h b/utgard/r7p0/linux/mali_linux_trace.h
new file mode 100644
index 0000000..70ab5b8
--- /dev/null
+++ b/utgard/r7p0/linux/mali_linux_trace.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#if !defined(MALI_LINUX_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define MALI_LINUX_TRACE_H
+
+#include <linux/types.h>
+
+#include <linux/stringify.h>
+#include <linux/tracepoint.h>
+
+#undef  TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_SYSTEM_STRING __stringfy(TRACE_SYSTEM)
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+/**
+ * Define the tracepoint used to communicate the status of a GPU. Called
+ * when a GPU turns on or turns off.
+ *
+ * @param event_id The type of the event. This parameter is a bitfield
+ *  encoding the type of the event.
+ *
+ * @param d0 First data parameter.
+ * @param d1 Second data parameter.
+ * @param d2 Third data parameter.
+ * @param d3 Fourth data parameter.
+ * @param d4 Fifth data parameter.
+ */
+TRACE_EVENT(mali_timeline_event,
+
+	    TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1,
+		     unsigned int d2, unsigned int d3, unsigned int d4),
+
+	    TP_ARGS(event_id, d0, d1, d2, d3, d4),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, event_id)
+		    __field(unsigned int, d0)
+		    __field(unsigned int, d1)
+		    __field(unsigned int, d2)
+		    __field(unsigned int, d3)
+		    __field(unsigned int, d4)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->event_id = event_id;
+		    __entry->d0 = d0;
+		    __entry->d1 = d1;
+		    __entry->d2 = d2;
+		    __entry->d3 = d3;
+		    __entry->d4 = d4;
+	    ),
+
+	    TP_printk("event=%d", __entry->event_id)
+	   );
+
+/**
+ * Define a tracepoint used to regsiter the value of a hardware counter.
+ * Hardware counters belonging to the vertex or fragment processor are
+ * reported via this tracepoint each frame, whilst L2 cache hardware
+ * counters are reported continuously.
+ *
+ * @param counter_id The counter ID.
+ * @param value The value of the counter.
+ */
+TRACE_EVENT(mali_hw_counter,
+
+	    TP_PROTO(unsigned int counter_id, unsigned int value),
+
+	    TP_ARGS(counter_id, value),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, counter_id)
+		    __field(unsigned int, value)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->counter_id = counter_id;
+	    ),
+
+	    TP_printk("event %d = %d", __entry->counter_id, __entry->value)
+	   );
+
+/**
+ * Define a tracepoint used to send a bundle of software counters.
+ *
+ * @param counters The bundle of counters.
+ */
+TRACE_EVENT(mali_sw_counters,
+
+	    TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters),
+
+	    TP_ARGS(pid, tid, surface_id, counters),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(pid_t, tid)
+		    __field(void *, surface_id)
+		    __field(unsigned int *, counters)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->tid = tid;
+		    __entry->surface_id = surface_id;
+		    __entry->counters = counters;
+	    ),
+
+	    TP_printk("counters were %s", __entry->counters == NULL ? "NULL" : "not NULL")
+	   );
+
+/**
+ * Define a tracepoint used to gather core activity for systrace
+ * @param pid The process id for which the core activity originates from
+ * @param active If the core is active (1) or not (0)
+ * @param core_type The type of core active, either GP (1) or PP (0)
+ * @param core_id The core id that is active for the core_type
+ * @param frame_builder_id The frame builder id associated with this core activity
+ * @param flush_id The flush id associated with this core activity
+ */
+TRACE_EVENT(mali_core_active,
+
+	    TP_PROTO(pid_t pid, unsigned int active, unsigned int core_type, unsigned int core_id, unsigned int frame_builder_id, unsigned int flush_id),
+
+	    TP_ARGS(pid, active, core_type, core_id, frame_builder_id, flush_id),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(unsigned int, active)
+		    __field(unsigned int, core_type)
+		    __field(unsigned int, core_id)
+		    __field(unsigned int, frame_builder_id)
+		    __field(unsigned int, flush_id)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->active = active;
+		    __entry->core_type = core_type;
+		    __entry->core_id = core_id;
+		    __entry->frame_builder_id = frame_builder_id;
+		    __entry->flush_id = flush_id;
+	    ),
+
+	    TP_printk("%s|%d|%s%i:%x|%d", __entry->active ? "S" : "F", __entry->pid, __entry->core_type ? "GP" : "PP", __entry->core_id, __entry->flush_id, __entry->frame_builder_id)
+	   );
+
+#endif /* MALI_LINUX_TRACE_H */
+
+/* This part must exist outside the header guard. */
+#include <trace/define_trace.h>
+
diff --git a/utgard/r7p0/linux/mali_memory.c b/utgard/r7p0/linux/mali_memory.c
new file mode 100644
index 0000000..8590675
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory.c
@@ -0,0 +1,532 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_executor.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_util.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_swap_alloc.h"
+#include "mali_memory_defer_bind.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_secure.h"
+#endif
+
+extern unsigned int mali_dedicated_mem_size;
+extern unsigned int mali_shared_mem_size;
+
+#define MALI_VM_NUM_FAULT_PREFETCH (0x8)
+
+static void mali_mem_vma_open(struct vm_area_struct *vma)
+{
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+
+	MALI_DEBUG_PRINT(4, ("Open called on vma %p\n", vma));
+
+	/* If need to share the allocation, add ref_count here */
+	mali_allocation_ref(alloc);
+	return;
+}
+static void mali_mem_vma_close(struct vm_area_struct *vma)
+{
+	/* If need to share the allocation, unref ref_count here */
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+
+	mali_allocation_unref(&alloc);
+	vma->vm_private_data = NULL;
+}
+
+static int mali_mem_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret;
+	int prefetch_num = MALI_VM_NUM_FAULT_PREFETCH;
+
+	unsigned long address = (unsigned long)vmf->virtual_address;
+
+	MALI_DEBUG_ASSERT(alloc->backend_handle);
+	MALI_DEBUG_ASSERT((unsigned long)alloc->cpu_mapping.addr <= address);
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		return VM_FAULT_SIGBUS;
+	}
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(mem_bkend->type == alloc->type);
+
+	if ((mem_bkend->type == MALI_MEM_COW && (MALI_MEM_BACKEND_FLAG_SWAP_COWED !=
+			(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) &&
+	    (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE)) {
+		/*check if use page fault to do COW*/
+		MALI_DEBUG_PRINT(4, ("mali_vma_fault: do cow allocate on demand!, address=0x%x\n", address));
+		mutex_lock(&mem_bkend->mutex);
+		ret = mali_mem_cow_allocate_on_demand(mem_bkend,
+						      (address - vma->vm_start) / PAGE_SIZE);
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (ret != _MALI_OSK_ERR_OK) {
+			return VM_FAULT_OOM;
+		}
+		prefetch_num = 1;
+
+		/* handle COW modified range cpu mapping
+		 we zap the mapping in cow_modify_range, it will trigger page fault
+		 when CPU access it, so here we map it to CPU*/
+		mutex_lock(&mem_bkend->mutex);
+		ret = mali_mem_cow_cpu_map_pages_locked(mem_bkend, vma, address, prefetch_num);
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+			return VM_FAULT_SIGBUS;
+		}
+	} else if ((mem_bkend->type == MALI_MEM_SWAP) ||
+		   (mem_bkend->type == MALI_MEM_COW && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+		u32 offset_in_bkend = (address - vma->vm_start) / PAGE_SIZE;
+		int ret = _MALI_OSK_ERR_OK;
+
+		mutex_lock(&mem_bkend->mutex);
+		if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE) {
+			ret = mali_mem_swap_cow_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page);
+		} else {
+			ret = mali_mem_swap_allocate_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page);
+		}
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(2, ("Mali swap memory page fault process failed, address=0x%x\n", address));
+			return VM_FAULT_OOM;
+		} else {
+			return VM_FAULT_LOCKED;
+		}
+	} else {
+		MALI_PRINT_ERROR(("Mali vma fault! It never happen, indicating some logic errors in caller.\n"));
+		/*NOT support yet or OOM*/
+		return VM_FAULT_OOM;
+	}
+	return VM_FAULT_NOPAGE;
+}
+
+static struct vm_operations_struct mali_kernel_vm_ops = {
+	.open = mali_mem_vma_open,
+	.close = mali_mem_vma_close,
+	.fault = mali_mem_vma_fault,
+};
+
+
+/** @ map mali allocation to CPU address
+*
+* Supported backend types:
+* --MALI_MEM_OS
+* -- need to add COW?
+ *Not supported backend types:
+* -_MALI_MEMORY_BIND_BACKEND_UMP
+* -_MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* -_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+*
+*/
+int mali_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct mali_session_data *session;
+	mali_mem_allocation *mali_alloc = NULL;
+	u32 mali_addr = vma->vm_pgoff << PAGE_SHIFT;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret = -EFAULT;
+
+	session = (struct mali_session_data *)filp->private_data;
+	if (session == NULL) {
+		MALI_PRINT_ERROR(("mmap called without any session data available\n"));
+		return -EFAULT;
+	}
+
+	MALI_DEBUG_PRINT(4, ("MMap() handler: start=0x%08X, phys=0x%08X, size=0x%08X vma->flags 0x%08x\n",
+			     (unsigned int)vma->vm_start, (unsigned int)(vma->vm_pgoff << PAGE_SHIFT),
+			     (unsigned int)(vma->vm_end - vma->vm_start), vma->vm_flags));
+
+	/* Operations used on any memory system */
+	/* do not need to anything in vm open/close now */
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		if (unlikely(mali_addr != mali_vma_node->vm_node.start)) {
+			/* only allow to use start address for mmap */
+			MALI_DEBUG_PRINT(1, ("mali_addr != mali_vma_node->vm_node.start\n"));
+			return -EFAULT;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(mali_vma_node == NULL);
+		return -EFAULT;
+	}
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	if (mali_alloc->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) {
+		MALI_DEBUG_PRINT(1, ("ERROR : trying to access varying memory by CPU!\n"));
+		return -EFAULT;
+	}
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		return -EFAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+
+	if (!(MALI_MEM_SWAP == mali_alloc->type ||
+	      (MALI_MEM_COW == mali_alloc->type && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) {
+		/* Set some bits which indicate that, the memory is IO memory, meaning
+		 * that no paging is to be performed and the memory should not be
+		 * included in crash dumps. And that the memory is reserved, meaning
+		 * that it's present and can never be paged out (see also previous
+		 * entry)
+		 */
+		vma->vm_flags |= VM_IO;
+		vma->vm_flags |= VM_DONTCOPY;
+		vma->vm_flags |= VM_PFNMAP;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+		vma->vm_flags |= VM_RESERVED;
+#else
+		vma->vm_flags |= VM_DONTDUMP;
+		vma->vm_flags |= VM_DONTEXPAND;
+#endif
+	} else if (mali_alloc->type == MALI_MEM_SWAP) {
+		vma->vm_pgoff = mem_bkend->start_idx;
+	}
+
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	vma->vm_ops = &mali_kernel_vm_ops;
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	/* If it's a copy-on-write mapping, map to read only */
+	if (!(vma->vm_flags & VM_WRITE)) {
+		MALI_DEBUG_PRINT(4, ("mmap allocation with read only !\n"));
+		/* add VM_WRITE for do_page_fault will check this when a write fault */
+		vma->vm_flags |= VM_WRITE | VM_READ;
+		vma->vm_page_prot = PAGE_READONLY;
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE;
+		goto out;
+	}
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		ret = mali_mem_os_cpu_map(mem_bkend, vma);
+	} else if (mem_bkend->type == MALI_MEM_COW &&
+		   (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+		ret = mali_mem_cow_cpu_map(mem_bkend, vma);
+	} else if (mem_bkend->type == MALI_MEM_BLOCK) {
+		ret = mali_mem_block_cpu_map(mem_bkend, vma);
+	} else if ((mem_bkend->type == MALI_MEM_SWAP) || (mem_bkend->type == MALI_MEM_COW &&
+			(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) {
+		/*For swappable memory, CPU page table will be created by page fault handler. */
+		ret = 0;
+	} else if (mem_bkend->type == MALI_MEM_SECURE) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		ret = mali_mem_secure_cpu_map(mem_bkend, vma);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n"));
+		return -EFAULT;
+#endif
+	} else {
+		/* Not support yet*/
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of backend memory!\n"));
+		return -EFAULT;
+	}
+
+	if (ret != 0) {
+		MALI_DEBUG_PRINT(1, ("ret != 0\n"));
+		return -EFAULT;
+	}
+out:
+	MALI_DEBUG_ASSERT(mali_alloc->magic == MALI_MEM_ALLOCATION_VALID_MAGIC);
+
+	vma->vm_private_data = (void *)mali_alloc;
+	mali_alloc->cpu_mapping.vma = vma;
+
+	mali_allocation_ref(mali_alloc);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor)
+{
+	u32 size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(descriptor->magic == MALI_MEM_ALLOCATION_VALID_MAGIC);
+
+	/* Map dma-buf into this session's page tables */
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start, size);
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size)
+{
+	u32 old_size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(descriptor->magic == MALI_MEM_ALLOCATION_VALID_MAGIC);
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		new_size  += MALI_MMU_PAGE_SIZE;
+	}
+
+	if (new_size > old_size) {
+		MALI_DEBUG_ASSERT(new_size <= descriptor->mali_vma_node.vm_node.size);
+		return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start + old_size, new_size - old_size);
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags)
+{
+	if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	/* Umap and flush L2 */
+	mali_mmu_pagedir_unmap(session->page_directory, vaddr, size);
+	mali_executor_zap_all_active(session);
+}
+
+u32 _mali_ukk_report_memory_usage(void)
+{
+	u32 sum = 0;
+
+	if (mali_memory_have_dedicated_memory() == MALI_TRUE) {
+		sum += mali_mem_block_allocator_stat();
+	}
+
+	sum += mali_mem_os_stat();
+
+	return sum;
+}
+
+u32 _mali_ukk_report_total_memory_size(void)
+{
+	return mali_dedicated_mem_size + mali_shared_mem_size;
+}
+
+
+/**
+ * Per-session memory descriptor mapping table sizes
+ */
+#define MALI_MEM_DESCRIPTORS_INIT 64
+#define MALI_MEM_DESCRIPTORS_MAX 65536
+
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session_data)
+{
+	MALI_DEBUG_PRINT(5, ("Memory session begin\n"));
+
+	session_data->memory_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+				    _MALI_OSK_LOCK_ORDER_MEM_SESSION);
+
+	if (session_data->memory_lock == NULL) {
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	session_data->cow_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0);
+	if (session_data->cow_lock == NULL) {
+		_mali_osk_mutex_term(session_data->memory_lock);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_memory_manager_init(&session_data->allocation_mgr);
+
+	MALI_DEBUG_PRINT(5, ("MMU session begin: success\n"));
+	MALI_SUCCESS;
+}
+
+void mali_memory_session_end(struct mali_session_data *session)
+{
+	MALI_DEBUG_PRINT(3, ("MMU session end\n"));
+
+	if (session == NULL) {
+		MALI_DEBUG_PRINT(1, ("No session data found during session end\n"));
+		return;
+	}
+	/* free allocation */
+	mali_free_session_allocations(session);
+	/* do some check in unint*/
+	mali_memory_manager_uninit(&session->allocation_mgr);
+
+	/* Free the lock */
+	_mali_osk_mutex_term(session->memory_lock);
+	_mali_osk_mutex_term(session->cow_lock);
+	return;
+}
+
+_mali_osk_errcode_t mali_memory_initialize(void)
+{
+	_mali_osk_errcode_t err;
+
+	idr_init(&mali_backend_idr);
+	mutex_init(&mali_idr_mutex);
+
+	err = mali_mem_swap_init();
+	if (err != _MALI_OSK_ERR_OK) {
+		return err;
+	}
+	err = mali_mem_os_init();
+	if (err == _MALI_OSK_ERR_OK) {
+		err = mali_mem_defer_bind_manager_init();
+	}
+
+	return err;
+}
+
+void mali_memory_terminate(void)
+{
+	mali_mem_swap_term();
+	mali_mem_defer_bind_manager_destory();
+	mali_mem_os_term();
+	if (mali_memory_have_dedicated_memory()) {
+		mali_mem_block_allocator_destroy();
+	}
+}
+
+
+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type)
+{
+	mali_page_node *page_node = NULL;
+
+	page_node = kzalloc(sizeof(mali_page_node), GFP_KERNEL);
+	MALI_DEBUG_ASSERT(page_node != NULL);
+
+	if (page_node) {
+		page_node->type = type;
+		INIT_LIST_HEAD(&page_node->list);
+	}
+
+	return page_node;
+}
+
+void _mali_page_node_ref(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* add ref to this page */
+		get_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_add_ref(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		atomic_inc(&node->swap_it->ref_count);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node!\n"));
+	}
+}
+
+void _mali_page_node_unref(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* unref to this page */
+		put_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_dec_ref(node);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node!\n"));
+	}
+}
+
+
+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_OS);
+	node->page = page;
+}
+
+
+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_SWAP);
+	node->swap_it = item;
+}
+
+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	node->blk_it = item;
+}
+
+
+int _mali_page_node_get_ref_count(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* get ref count of this page */
+		return page_count(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_get_ref_count(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return atomic_read(&node->swap_it->ref_count);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node!\n"));
+	}
+	return -1;
+}
+
+
+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return page_private(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return _mali_blk_item_get_phy_addr(node->blk_it);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return node->swap_it->dma_addr;
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node!\n"));
+	}
+	return 0;
+}
+
+
+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return page_to_pfn(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		/* get phy addr for BLOCK page*/
+		return _mali_blk_item_get_pfn(node->blk_it);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return page_to_pfn(node->swap_it->page);
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node!\n"));
+	}
+	return 0;
+}
+
+
diff --git a/utgard/r7p0/linux/mali_memory.h b/utgard/r7p0/linux/mali_memory.h
new file mode 100644
index 0000000..675ca8e
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_H__
+#define __MALI_MEMORY_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+
+_mali_osk_errcode_t mali_memory_initialize(void);
+void mali_memory_terminate(void);
+
+/** @brief Allocate a page table page
+ *
+ * Allocate a page for use as a page directory or page table. The page is
+ * mapped into kernel space.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise an error code
+ * @param table_page GPU pointer to the allocated page
+ * @param mapping CPU pointer to the mapping of the allocated page
+ */
+MALI_STATIC_INLINE _mali_osk_errcode_t
+mali_mmu_get_table_page(mali_dma_addr *table_page, mali_io_address *mapping)
+{
+	return mali_mem_os_get_table_page(table_page, mapping);
+}
+
+/** @brief Release a page table page
+ *
+ * Release a page table page allocated through \a mali_mmu_get_table_page
+ *
+ * @param pa the GPU address of the page to release
+ */
+MALI_STATIC_INLINE void
+mali_mmu_release_table_page(mali_dma_addr phys, void *virt)
+{
+	mali_mem_os_release_table_page(phys, virt);
+}
+
+/** @brief mmap function
+ *
+ * mmap syscalls on the Mali device node will end up here.
+ *
+ * This function allocates Mali memory and maps it on CPU and Mali.
+ */
+int mali_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/** @brief Start a new memory session
+ *
+ * Called when a process opens the Mali device node.
+ *
+ * @param session Pointer to session to initialize
+ */
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session);
+
+/** @brief Close a memory session
+ *
+ * Called when a process closes the Mali device node.
+ *
+ * Memory allocated by the session will be freed
+ *
+ * @param session Pointer to the session to terminate
+ */
+void mali_memory_session_end(struct mali_session_data *session);
+
+/** @brief Prepare Mali page tables for mapping
+ *
+ * This function will prepare the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ */
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor);
+
+/** @brief Resize Mali page tables for mapping
+ *
+ * This function will Resize the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ * @param new_size The new size of descriptor
+ */
+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size);
+
+/** @brief Free Mali page tables for mapping
+ *
+ * This function will unmap pages from Mali memory and free the page tables
+ * that are now unused.
+ *
+ * The updated pages in the Mali L2 cache will be invalidated, and the MMU TLBs will be zapped if necessary.
+ *
+ * @param descriptor Pointer to the memory descriptor to unmap
+ */
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags);
+
+/** @brief Parse resource and prepare the OS memory allocator
+ *
+ * @param size Maximum size to allocate for Mali GPU.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size);
+
+/** @brief Parse resource and prepare the dedicated memory allocator
+ *
+ * @param start Physical start address of dedicated Mali GPU memory.
+ * @param size Size of dedicated Mali GPU memory.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+
+
+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type);
+
+void _mali_page_node_ref(struct mali_page_node *node);
+void _mali_page_node_unref(struct mali_page_node *node);
+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page);
+
+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item);
+
+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item);
+
+int _mali_page_node_get_ref_count(struct mali_page_node *node);
+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node);
+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node);
+
+#endif /* __MALI_MEMORY_H__ */
diff --git a/utgard/r7p0/linux/mali_memory_block_alloc.c b/utgard/r7p0/linux/mali_memory_block_alloc.c
new file mode 100644
index 0000000..9dd87ae
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_block_alloc.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_memory.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_osk.h"
+#include <linux/mutex.h>
+
+
+static mali_block_allocator *mali_mem_block_gobal_allocator;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item)
+{
+	return (item->phy_addr & ~(MALI_BLOCK_REF_MASK));
+}
+
+
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item)
+{
+	return (item->phy_addr / MALI_BLOCK_SIZE);
+}
+
+
+u32 mali_mem_block_get_ref_count(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	return (node->blk_it->phy_addr & MALI_BLOCK_REF_MASK);
+}
+
+
+/* Increase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+
+u32 mali_mem_block_add_ref(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) < MALI_BLOCK_MAX_REF_COUNT);
+	return (node->blk_it->phy_addr++ & MALI_BLOCK_REF_MASK);
+}
+
+/* Decase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+u32 mali_mem_block_dec_ref(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) > 0);
+	return (node->blk_it->phy_addr-- & MALI_BLOCK_REF_MASK);
+}
+
+
+static mali_block_allocator *mali_mem_block_allocator_create(u32 base_address, u32 size)
+{
+	mali_block_allocator *info;
+	u32 usable_size;
+	u32 num_blocks;
+	mali_page_node *m_node;
+	mali_block_item *mali_blk_items = NULL;
+	int i = 0;
+
+	usable_size = size & ~(MALI_BLOCK_SIZE - 1);
+	MALI_DEBUG_PRINT(3, ("Mali block allocator create for region starting at 0x%08X length 0x%08X\n", base_address, size));
+	MALI_DEBUG_PRINT(4, ("%d usable bytes\n", usable_size));
+	num_blocks = usable_size / MALI_BLOCK_SIZE;
+	MALI_DEBUG_PRINT(4, ("which becomes %d blocks\n", num_blocks));
+
+	if (usable_size == 0) {
+		MALI_DEBUG_PRINT(1, ("Memory block of size %d is unusable\n", size));
+		return NULL;
+	}
+
+	info = _mali_osk_calloc(1, sizeof(mali_block_allocator));
+	if (info != NULL) {
+		INIT_LIST_HEAD(&info->free);
+		spin_lock_init(&info->sp_lock);
+		info->total_num = num_blocks;
+		mali_blk_items = _mali_osk_calloc(1, sizeof(mali_block_item) * num_blocks);
+
+		if (mali_blk_items) {
+			info->items = mali_blk_items;
+			/* add blocks(4k size) to free list*/
+			for (i = 0 ; i < num_blocks ; i++) {
+				/* add block information*/
+				mali_blk_items[i].phy_addr = base_address + (i * MALI_BLOCK_SIZE);
+				/* add  to free list */
+				m_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK);
+				if (m_node == NULL)
+					goto fail;
+				_mali_page_node_add_block_item(m_node, &(mali_blk_items[i]));
+				list_add_tail(&m_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			}
+			return info;
+		}
+	}
+fail:
+	mali_mem_block_allocator_destroy();
+	return NULL;
+}
+
+void mali_mem_block_allocator_destroy(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+
+	MALI_DEBUG_ASSERT_POINTER(info);
+	MALI_DEBUG_PRINT(4, ("Memory block destroy !\n"));
+
+	if (info == NULL)
+		return;
+
+	list_for_each_entry_safe(m_page, m_tmp, &info->free, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		list_del(&m_page->list);
+		kfree(m_page);
+	}
+
+	_mali_osk_free(info->items);
+	_mali_osk_free(info);
+}
+
+u32 mali_mem_block_release(mali_mem_backend *mem_bkend)
+{
+	mali_mem_allocation *alloc = mem_bkend->mali_allocation;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_block_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	free_pages_nr = mali_mem_block_free(&mem_bkend->block_mem);
+	mutex_unlock(&mem_bkend->mutex);
+	return free_pages_nr;
+}
+
+
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem: Allocate size = 0x%x\n", size));
+	/*do some init */
+	INIT_LIST_HEAD(&block_mem->pfns);
+
+	spin_lock(&info->sp_lock);
+	/*check if have enough space*/
+	if (atomic_read(&info->free_num) > page_count) {
+		list_for_each_entry_safe(m_page, m_tmp, &info->free, list) {
+			if (page_count > 0) {
+				MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+				MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(m_page) == 0);
+				list_move(&m_page->list, &block_mem->pfns);
+				block_mem->count++;
+				atomic_dec(&info->free_num);
+				_mali_page_node_ref(m_page);
+			} else {
+				break;
+			}
+			page_count--;
+		}
+	} else {
+		/* can't allocate from BLOCK memory*/
+		spin_unlock(&info->sp_lock);
+		return -1;
+	}
+
+	spin_unlock(&info->sp_lock);
+	return 0;
+}
+
+u32 mali_mem_block_free(mali_mem_block_mem *block_mem)
+{
+	u32 free_pages_nr = 0;
+
+	free_pages_nr = mali_mem_block_free_list(&block_mem->pfns);
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem free : allocated size = 0x%x, free size = 0x%x\n", block_mem->count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+	block_mem->count = 0;
+	MALI_DEBUG_ASSERT(list_empty(&block_mem->pfns));
+
+	return free_pages_nr;
+}
+
+
+u32 mali_mem_block_free_list(struct list_head *list)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	u32 free_pages_nr = 0;
+
+	if (info) {
+		spin_lock(&info->sp_lock);
+		list_for_each_entry_safe(m_page, m_tmp, list, list) {
+			if (_mali_page_node_get_ref_count(m_page) == 1) {
+				free_pages_nr++;
+			}
+			mali_mem_block_free_node(m_page);
+		}
+		spin_unlock(&info->sp_lock);
+	}
+	return free_pages_nr;
+}
+
+/* free the node,*/
+void mali_mem_block_free_node(struct mali_page_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (_mali_page_node_get_ref_count(node) == 1) {
+			/*Move to free list*/
+			_mali_page_node_unref(node);
+			list_move_tail(&node->list, &info->free);
+			atomic_add(1, &info->free_num);
+		} else {
+			_mali_page_node_unref(node);
+			list_del(&node->list);
+			kfree(node);
+		}
+	}
+}
+
+/* unref the node, but not free it */
+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	mali_page_node *new_node;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (_mali_page_node_get_ref_count(node) == 1) {
+			/* allocate a  new node, Add to free list, keep the old node*/
+			_mali_page_node_unref(node);
+			new_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK);
+			if (new_node) {
+				memcpy(new_node, node, sizeof(mali_page_node));
+				list_add(&new_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			} else
+				return _MALI_OSK_ERR_FAULT;
+
+		} else {
+			_mali_page_node_unref(node);
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		phys = _mali_page_node_get_dma_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+		/* Verify that the "physical" address is 32-bit and
+		 * usable for Mali, when on a system with bus addresses
+		 * wider than 32-bit. */
+		MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+		mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	int ret;
+	mali_mem_block_mem *block_mem = &mem_bkend->block_mem;
+	unsigned long addr = vma->vm_start;
+	struct mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		ret = vm_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page));
+
+		if (unlikely(ret != 0)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size)
+{
+	mali_block_allocator *allocator;
+
+	/* Do the low level linux operation first */
+
+	/* Request ownership of the memory */
+	if (_MALI_OSK_ERR_OK != _mali_osk_mem_reqregion(start, size, "Dedicated Mali GPU memory")) {
+		MALI_DEBUG_PRINT(1, ("Failed to request memory region for frame buffer (0x%08X - 0x%08X)\n", start, start + size - 1));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create generic block allocator object to handle it */
+	allocator = mali_mem_block_allocator_create(start, size);
+
+	if (allocator == NULL) {
+		MALI_DEBUG_PRINT(1, ("Memory bank registration failed\n"));
+		_mali_osk_mem_unreqregion(start, size);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_mem_block_gobal_allocator = (mali_block_allocator *)allocator;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool mali_memory_have_dedicated_memory(void)
+{
+	return mali_mem_block_gobal_allocator ? MALI_TRUE : MALI_FALSE;
+}
+
+u32 mali_mem_block_allocator_stat(void)
+{
+	mali_block_allocator *allocator = mali_mem_block_gobal_allocator;
+
+	MALI_DEBUG_ASSERT_POINTER(allocator);
+
+
+	return (allocator->total_num - atomic_read(&allocator->free_num)) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/utgard/r7p0/linux/mali_memory_block_alloc.h b/utgard/r7p0/linux/mali_memory_block_alloc.h
new file mode 100644
index 0000000..684fd16
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_block_alloc.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2010, 2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BLOCK_ALLOCATOR_H__
+#define __MALI_BLOCK_ALLOCATOR_H__
+
+#include "mali_session.h"
+#include "mali_memory.h"
+#include <linux/spinlock.h>
+
+#include "mali_memory_types.h"
+
+#define MALI_BLOCK_SIZE (PAGE_SIZE)  /* 4 kB, manage BLOCK memory as page size */
+#define MALI_BLOCK_REF_MASK (0xFFF)
+#define MALI_BLOCK_MAX_REF_COUNT (0xFFF)
+
+
+
+typedef struct mali_block_allocator {
+	/*
+	* In free list, each node's ref_count is 0,
+	* ref_count added when allocated or referenced in COW
+	*/
+	mali_block_item *items; /* information for each block item*/
+	struct list_head free; /*free list of mali_memory_node*/
+	spinlock_t sp_lock; /*lock for reference count & free list opertion*/
+	u32 total_num; /* Number of total pages*/
+	atomic_t free_num; /*number of free pages*/
+} mali_block_allocator;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item);
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item);
+u32 mali_mem_block_get_ref_count(mali_page_node *node);
+u32 mali_mem_block_add_ref(mali_page_node *node);
+u32 mali_mem_block_dec_ref(mali_page_node *node);
+u32 mali_mem_block_release(mali_mem_backend *mem_bkend);
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size);
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+mali_bool mali_memory_have_dedicated_memory(void);
+u32 mali_mem_block_free(mali_mem_block_mem *block_mem);
+u32 mali_mem_block_free_list(struct list_head *list);
+void mali_mem_block_free_node(struct mali_page_node *node);
+void mali_mem_block_allocator_destroy(void);
+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node);
+u32 mali_mem_block_allocator_stat(void);
+
+#endif /* __MALI_BLOCK_ALLOCATOR_H__ */
diff --git a/utgard/r7p0/linux/mali_memory_cow.c b/utgard/r7p0/linux/mali_memory_cow.c
new file mode 100644
index 0000000..153248f
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_cow.c
@@ -0,0 +1,780 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#ifdef CONFIG_ARM
+#include <asm/outercache.h>
+#endif
+#include <asm/dma-mapping.h>
+
+#include "mali_memory.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_swap_alloc.h"
+
+/**
+* allocate pages for COW backend and flush cache
+*/
+static struct page *mali_mem_cow_alloc_page(void)
+
+{
+	mali_mem_os_mem os_mem;
+	struct mali_page_node *node;
+	struct page *new_page;
+
+	int ret = 0;
+	/* allocate pages from os mem */
+	ret = mali_mem_os_alloc_pages(&os_mem, _MALI_OSK_MALI_PAGE_SIZE);
+
+	if (ret) {
+		return NULL;
+	}
+
+	MALI_DEBUG_ASSERT(os_mem.count == 1);
+
+	node = _MALI_OSK_CONTAINER_OF(os_mem.pages.next, struct mali_page_node, list);
+	new_page = node->page;
+	node->page = NULL;
+	list_del(&node->list);
+	kfree(node);
+
+	return new_page;
+}
+
+
+static struct list_head *_mali_memory_cow_get_node_list(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size)
+{
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == target_bk->type || MALI_MEM_COW == target_bk->type ||
+			  MALI_MEM_BLOCK == target_bk->type || MALI_MEM_SWAP == target_bk->type);
+
+	if (target_bk->type == MALI_MEM_OS) {
+		MALI_DEBUG_ASSERT(&target_bk->os_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->os_mem.count);
+		return &target_bk->os_mem.pages;
+	} else if (target_bk->type == MALI_MEM_COW) {
+		MALI_DEBUG_ASSERT(&target_bk->cow_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->cow_mem.count);
+		return  &target_bk->cow_mem.pages;
+	} else if (target_bk->type == MALI_MEM_BLOCK) {
+		MALI_DEBUG_ASSERT(&target_bk->block_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->block_mem.count);
+		return  &target_bk->block_mem.pfns;
+	} else if (target_bk->type == MALI_MEM_SWAP) {
+		MALI_DEBUG_ASSERT(&target_bk->swap_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->swap_mem.count);
+		return  &target_bk->swap_mem.pages;
+	}
+
+	return NULL;
+}
+
+/**
+* Do COW for os memory - support do COW for memory from bank memory
+* The range_start/size can be zero, which means it will call cow_modify_range
+* latter.
+* This function allocate new pages for COW backend from os mem for a modified range
+* It will keep the page which not in the modified range and Add ref to it
+*
+* @target_bk - target allocation's backend(the allocation need to do COW)
+* @target_offset - the offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, 4K align)
+* @target_size - size of target allocation to do COW (for support memory bank)
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range
+*/
+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp, *page_node;
+	int target_page = 0;
+	struct page *new_page;
+	struct list_head *pages = NULL;
+
+	pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size);
+
+	if (pages == NULL) {
+		MALI_DEBUG_PRINT_ERROR(("No memory page  need to cow !\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(cow->count == 0);
+
+	INIT_LIST_HEAD(&cow->pages);
+	mutex_lock(&target_bk->mutex);
+	list_for_each_entry_safe(m_page, m_tmp, pages, list) {
+		/* add page from (target_offset,target_offset+size) to cow backend */
+		if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) {
+
+			/* allocate a new page node, alway use OS memory for COW */
+			page_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+
+			if (page_node == NULL) {
+				mutex_unlock(&target_bk->mutex);
+				goto error;
+			}
+
+			INIT_LIST_HEAD(&page_node->list);
+
+			/* check if in the modified range*/
+			if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+			    (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+				/* need to allocate a new page */
+				/* To simplify the case, All COW memory is allocated from os memory ?*/
+				new_page = mali_mem_cow_alloc_page();
+
+				if (new_page == NULL) {
+					kfree(page_node);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				_mali_page_node_add_page(page_node, new_page);
+			} else {
+				/*Add Block memory case*/
+				if (m_page->type != MALI_PAGE_NODE_BLOCK) {
+					_mali_page_node_add_page(page_node, m_page->page);
+				} else {
+					page_node->type = MALI_PAGE_NODE_BLOCK;
+					_mali_page_node_add_block_item(page_node, m_page->blk_it);
+				}
+
+				/* add ref to this page */
+				_mali_page_node_ref(m_page);
+			}
+
+			/* add it to COW backend page list */
+			list_add_tail(&page_node->list, &cow->pages);
+			cow->count++;
+		}
+		target_page++;
+	}
+	mutex_unlock(&target_bk->mutex);
+	return _MALI_OSK_ERR_OK;
+error:
+	mali_mem_cow_release(backend, MALI_FALSE);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_memory_cow_swap_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp, *page_node;
+	int target_page = 0;
+	struct mali_swap_item *swap_item;
+	struct list_head *pages = NULL;
+
+	pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size);
+	if (pages == NULL) {
+		MALI_DEBUG_PRINT_ERROR(("No swap memory page need to cow !\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(cow->count == 0);
+
+	INIT_LIST_HEAD(&cow->pages);
+	mutex_lock(&target_bk->mutex);
+
+	backend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN;
+
+	list_for_each_entry_safe(m_page, m_tmp, pages, list) {
+		/* add page from (target_offset,target_offset+size) to cow backend */
+		if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) {
+
+			/* allocate a new page node, use swap memory for COW memory swap cowed flag. */
+			page_node = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP);
+
+			if (page_node == NULL) {
+				mutex_unlock(&target_bk->mutex);
+				goto error;
+			}
+
+			/* check if in the modified range*/
+			if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+			    (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+				/* need to allocate a new page */
+				/* To simplify the case, All COW memory is allocated from os memory ?*/
+				swap_item = mali_mem_swap_alloc_swap_item();
+
+				if (swap_item == NULL) {
+					kfree(page_node);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				swap_item->idx = mali_mem_swap_idx_alloc();
+
+				if (swap_item->idx == _MALI_OSK_BITMAP_INVALIDATE_INDEX) {
+					MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW.\n"));
+					kfree(page_node);
+					kfree(swap_item);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				_mali_page_node_add_swap_item(page_node, swap_item);
+			} else {
+				_mali_page_node_add_swap_item(page_node, m_page->swap_it);
+
+				/* add ref to this page */
+				_mali_page_node_ref(m_page);
+			}
+
+			list_add_tail(&page_node->list, &cow->pages);
+			cow->count++;
+		}
+		target_page++;
+	}
+	mutex_unlock(&target_bk->mutex);
+
+	return _MALI_OSK_ERR_OK;
+error:
+	mali_mem_swap_release(backend, MALI_FALSE);
+	return _MALI_OSK_ERR_FAULT;
+
+}
+
+
+_mali_osk_errcode_t _mali_mem_put_page_node(mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return mali_mem_os_put_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_unref_node(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return _mali_mem_swap_put_page_node(node);
+	} else
+		MALI_DEBUG_ASSERT(0);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/**
+* Modify a range of a exist COW backend
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range(in byte)
+*/
+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_allocation *alloc = NULL;
+	struct mali_session_data *session;
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp;
+	LIST_HEAD(pages);
+	struct page *new_page;
+	u32 count = 0;
+	s32 change_pages_nr = 0;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+
+	if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	alloc = backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	MALI_DEBUG_ASSERT(backend->type == MALI_MEM_COW);
+	MALI_DEBUG_ASSERT(((range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE) <= cow->count);
+
+	mutex_lock(&backend->mutex);
+
+	/* free pages*/
+	list_for_each_entry_safe(m_page, m_tmp, &cow->pages, list) {
+
+		/* check if in the modified range*/
+		if ((count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+			if (m_page->type != MALI_PAGE_NODE_SWAP) {
+				new_page = mali_mem_cow_alloc_page();
+
+				if (new_page == NULL) {
+					goto error;
+				}
+				if (_mali_page_node_get_ref_count(m_page) != 1)
+					change_pages_nr++;
+				/* unref old page*/
+				_mali_osk_mutex_wait(session->cow_lock);
+				if (_mali_mem_put_page_node(m_page)) {
+					__free_page(new_page);
+					_mali_osk_mutex_signal(session->cow_lock);
+					goto error;
+				}
+				_mali_osk_mutex_signal(session->cow_lock);
+				/* add new page*/
+				/* always use OS for COW*/
+				m_page->type = MALI_PAGE_NODE_OS;
+				_mali_page_node_add_page(m_page, new_page);
+			} else {
+				struct mali_swap_item *swap_item;
+
+				swap_item = mali_mem_swap_alloc_swap_item();
+
+				if (swap_item == NULL) {
+					goto error;
+				}
+
+				swap_item->idx = mali_mem_swap_idx_alloc();
+
+				if (swap_item->idx == _MALI_OSK_BITMAP_INVALIDATE_INDEX) {
+					MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW modify range.\n"));
+					kfree(swap_item);
+					goto error;
+				}
+
+				if (_mali_page_node_get_ref_count(m_page) != 1) {
+					change_pages_nr++;
+				}
+
+				if (_mali_mem_put_page_node(m_page)) {
+					mali_mem_swap_free_swap_item(swap_item);
+					goto error;
+				}
+
+				_mali_page_node_add_swap_item(m_page, swap_item);
+			}
+		}
+		count++;
+	}
+	cow->change_pages_nr  = change_pages_nr;
+
+	MALI_DEBUG_ASSERT(alloc->type == MALI_MEM_COW);
+
+	/* ZAP cpu mapping(modified range), and do cpu mapping here if need */
+	if (alloc->cpu_mapping.vma != NULL) {
+		MALI_DEBUG_ASSERT(alloc->backend_handle != 0);
+		MALI_DEBUG_ASSERT(alloc->cpu_mapping.vma != NULL);
+		MALI_DEBUG_ASSERT(alloc->cpu_mapping.vma->vm_end - alloc->cpu_mapping.vma->vm_start >= range_size);
+
+		if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+			zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size);
+
+			ret = mali_mem_cow_cpu_map_pages_locked(backend, alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start  + range_start, range_size / _MALI_OSK_MALI_PAGE_SIZE);
+
+			if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+				MALI_DEBUG_PRINT(2, ("mali_memory_cow_modify_range: cpu mapping failed !\n"));
+				ret =  _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			/* used to trigger page fault for swappable cowed memory. */
+			alloc->cpu_mapping.vma->vm_flags |= VM_PFNMAP;
+			alloc->cpu_mapping.vma->vm_flags |= VM_MIXEDMAP;
+
+			zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size);
+			/* delete this flag to let swappble is ummapped regard to stauct page not page frame. */
+			alloc->cpu_mapping.vma->vm_flags &= ~VM_PFNMAP;
+			alloc->cpu_mapping.vma->vm_flags &= ~VM_MIXEDMAP;
+		}
+	}
+
+error:
+	mutex_unlock(&backend->mutex);
+	return ret;
+
+}
+
+
+/**
+* Allocate pages for COW backend
+* @alloc  -allocation for COW allocation
+* @target_bk - target allocation's backend(the allocation need to do COW)
+* @target_offset - the offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, 4K align)
+* @target_size - size of target allocation to do COW (for support memory bank)(in byte)
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range(in byte)
+*/
+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk,
+				       u32 target_offset,
+				       u32 target_size,
+				       mali_mem_backend *backend,
+				       u32 range_start,
+				       u32 range_size)
+{
+	struct mali_session_data *session = backend->mali_allocation->session;
+
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size & offset must be a multiple of the system page size */
+	if (target_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (target_offset % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check backend type */
+	MALI_DEBUG_ASSERT(backend->type == MALI_MEM_COW);
+
+	switch (target_bk->type) {
+	case MALI_MEM_OS:
+	case MALI_MEM_BLOCK:
+		return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		break;
+	case MALI_MEM_COW:
+		if (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) {
+			return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		} else {
+			return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		}
+		break;
+	case MALI_MEM_SWAP:
+		return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		break;
+	case MALI_MEM_EXTERNAL:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("External physical memory not supported !\n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	case MALI_MEM_DMA_BUF:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("DMA buffer not supported !\n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	case MALI_MEM_UMP:
+		/*NOT support yet*/
+		MALI_DEBUG_PRINT_ERROR(("UMP buffer not supported !\n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	default:
+		/*Not support yet*/
+		MALI_DEBUG_PRINT_ERROR(("Invalid memory type not supported !\n"));
+		return _MALI_OSK_ERR_UNSUPPORTED;
+		break;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Map COW backend memory to mali
+* Support OS/BLOCK for mali_page_node
+*/
+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size)
+{
+	mali_mem_allocation *cow_alloc;
+	struct mali_page_node *m_page;
+	struct mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	u32 virt, start;
+
+	cow_alloc = mem_bkend->mali_allocation;
+	virt = cow_alloc->mali_vma_node.vm_node.start;
+	start = virt;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+	MALI_DEBUG_ASSERT_POINTER(cow_alloc);
+
+	session = cow_alloc->session;
+	pagedir = session->page_directory;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+	list_for_each_entry(m_page, &mem_bkend->cow_mem.pages, list) {
+		if ((virt - start >= range_start) && (virt - start < range_start + range_size)) {
+			dma_addr_t phys = _mali_page_node_get_dma_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+			mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys,
+						MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+	return 0;
+}
+
+/**
+* Map COW backend to cpu
+* support OS/BLOCK memory
+*/
+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+
+	list_for_each_entry(m_page, &cow->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		ret = vm_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page));
+
+		if (unlikely(ret != 0)) {
+			return ret;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+/**
+* Map some pages(COW backend) to CPU vma@vaddr
+*@ mem_bkend - COW backend
+*@ vma
+*@ vaddr -start CPU vaddr mapped to
+*@ num - max number of pages to map to CPU vaddr
+*/
+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend,
+		struct vm_area_struct *vma,
+		unsigned long vaddr,
+		int num)
+{
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	int offset;
+	int count;
+	unsigned long vstart = vma->vm_start;
+
+	count = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+	MALI_DEBUG_ASSERT(0 == vaddr % _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE);
+	offset = (vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE;
+
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if ((count >= offset) && (count < offset + num)) {
+			ret = vm_insert_pfn(vma, vaddr, _mali_page_node_get_pfn(m_page));
+
+			if (unlikely(ret != 0)) {
+				if (count == offset) {
+					return _MALI_OSK_ERR_FAULT;
+				} else {
+					/* ret is EBUSY when page isn't in modify range, but now it's OK*/
+					return _MALI_OSK_ERR_OK;
+				}
+			}
+			vaddr += _MALI_OSK_MALI_PAGE_SIZE;
+		}
+		count++;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+* Release COW backend memory
+* free it directly(put_page--unref page), not put into pool
+*/
+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped)
+{
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags)) {
+		/* Unmap the memory from the mali virtual address space. */
+		if (is_mali_mapped == MALI_TRUE)
+			mali_mem_os_mali_unmap(alloc);
+		/* free cow backend list*/
+		_mali_osk_mutex_wait(session->cow_lock);
+		free_pages_nr = mali_mem_os_free(&mem_bkend->cow_mem.pages, mem_bkend->cow_mem.count, MALI_TRUE);
+		_mali_osk_mutex_signal(session->cow_lock);
+
+		free_pages_nr += mali_mem_block_free_list(&mem_bkend->cow_mem.pages);
+
+		MALI_DEBUG_ASSERT(list_empty(&mem_bkend->cow_mem.pages));
+	} else {
+		free_pages_nr = mali_mem_swap_release(mem_bkend, is_mali_mapped);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("COW Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->cow_mem.count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+
+	mem_bkend->cow_mem.count = 0;
+	return free_pages_nr;
+}
+
+
+/* Dst node could os node or swap node. */
+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node)
+{
+	void *dst, *src;
+	struct page *dst_page;
+	dma_addr_t dma_addr;
+
+	MALI_DEBUG_ASSERT(src_node != NULL);
+	MALI_DEBUG_ASSERT(dst_node != NULL);
+	MALI_DEBUG_ASSERT(dst_node->type == MALI_PAGE_NODE_OS
+			  || dst_node->type == MALI_PAGE_NODE_SWAP);
+
+	if (dst_node->type == MALI_PAGE_NODE_OS) {
+		dst_page = dst_node->page;
+	} else {
+		dst_page = dst_node->swap_it->page;
+	}
+
+	dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(dst_node),
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* map it , and copy the content*/
+	dst = kmap_atomic(dst_page);
+
+	if (src_node->type == MALI_PAGE_NODE_OS ||
+	    src_node->type == MALI_PAGE_NODE_SWAP) {
+		struct page *src_page;
+
+		if (src_node->type == MALI_PAGE_NODE_OS) {
+			src_page = src_node->page;
+		} else {
+			src_page = src_node->swap_it->page;
+		}
+
+		/* Clear and invaliate cache */
+		/* In ARM architecture, speculative read may pull stale data into L1 cache
+		 * for kernel linear mapping page table. DMA_BIDIRECTIONAL could
+		 * invalidate the L1 cache so that following read get the latest data
+		*/
+		dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(src_node),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		src = kmap_atomic(src_page);
+		memcpy(dst, src, _MALI_OSK_MALI_PAGE_SIZE);
+		kunmap_atomic(src);
+		dma_addr = dma_map_page(&mali_platform_device->dev, src_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		if (src_node->type == MALI_PAGE_NODE_SWAP) {
+			src_node->swap_it->dma_addr = dma_addr;
+		}
+	} else if (src_node->type == MALI_PAGE_NODE_BLOCK) {
+		/*
+		* use ioremap to map src for BLOCK memory
+		*/
+		src = ioremap_nocache(_mali_page_node_get_dma_addr(src_node), _MALI_OSK_MALI_PAGE_SIZE);
+		memcpy(dst, src, _MALI_OSK_MALI_PAGE_SIZE);
+		iounmap(src);
+	}
+	kunmap_atomic(dst);
+	dma_addr = dma_map_page(&mali_platform_device->dev, dst_page,
+				0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	if (dst_node->type == MALI_PAGE_NODE_SWAP) {
+		dst_node->swap_it->dma_addr = dma_addr;
+	}
+}
+
+
+/*
+* allocate page on demand when CPU access it,
+* THis used in page fault handler
+*/
+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page)
+{
+	struct page *new_page = NULL;
+	struct mali_page_node *new_node = NULL;
+	int i = 0;
+	struct mali_page_node *m_page, *found_node = NULL;
+	struct  mali_session_data *session = NULL;
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+	MALI_DEBUG_ASSERT(offset_page < mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_PRINT(4, ("mali_mem_cow_allocate_on_demand !, offset_page =0x%x\n", offset_page));
+
+	/* allocate new page here */
+	new_page = mali_mem_cow_alloc_page();
+	if (!new_page)
+		return _MALI_OSK_ERR_NOMEM;
+
+	new_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+	if (!new_node) {
+		__free_page(new_page);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	/* find the page in backend*/
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if (i == offset_page) {
+			found_node = m_page;
+			break;
+		}
+		i++;
+	}
+	MALI_DEBUG_ASSERT(found_node);
+	if (found_node == NULL) {
+		__free_page(new_page);
+		kfree(new_node);
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	_mali_page_node_add_page(new_node, new_page);
+
+	/* Copy the src page's content to new page */
+	_mali_mem_cow_copy_page(found_node, new_node);
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend->mali_allocation);
+	session = mem_bkend->mali_allocation->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	if (_mali_page_node_get_ref_count(found_node) != 1) {
+		atomic_add(1, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		mem_bkend->cow_mem.change_pages_nr++;
+	}
+
+	_mali_osk_mutex_wait(session->cow_lock);
+	if (_mali_mem_put_page_node(found_node)) {
+		__free_page(new_page);
+		kfree(new_node);
+		_mali_osk_mutex_signal(session->cow_lock);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	_mali_osk_mutex_signal(session->cow_lock);
+
+	list_replace(&found_node->list, &new_node->list);
+
+	kfree(found_node);
+
+	/* map to GPU side*/
+	_mali_osk_mutex_wait(session->memory_lock);
+	mali_mem_cow_mali_map(mem_bkend, offset_page * _MALI_OSK_MALI_PAGE_SIZE, _MALI_OSK_MALI_PAGE_SIZE);
+	_mali_osk_mutex_signal(session->memory_lock);
+	return _MALI_OSK_ERR_OK;
+}
\ No newline at end of file
diff --git a/utgard/r7p0/linux/mali_memory_cow.h b/utgard/r7p0/linux/mali_memory_cow.h
new file mode 100644
index 0000000..b32ee75
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_cow.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_COW_H__
+#define __MALI_MEMORY_COW_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include "mali_memory_types.h"
+
+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend,
+		struct vm_area_struct *vma,
+		unsigned long vaddr,
+		int num);
+
+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk,
+				       u32 target_offset,
+				       u32 target_size,
+				       mali_mem_backend *backend,
+				       u32 range_start,
+				       u32 range_size);
+
+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size);
+
+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size);
+
+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node);
+
+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size);
+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped);
+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page);
+#endif
+
diff --git a/utgard/r7p0/linux/mali_memory_defer_bind.c b/utgard/r7p0/linux/mali_memory_defer_bind.c
new file mode 100644
index 0000000..5ad1ef6
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_defer_bind.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#ifdef CONFIG_ARM
+#include <asm/outercache.h>
+#endif
+#include <asm/dma-mapping.h>
+
+#include "mali_memory.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory_defer_bind.h"
+#include "mali_executor.h"
+#include "mali_osk.h"
+#include "mali_scheduler.h"
+#include "mali_gp_job.h"
+
+mali_defer_bind_manager *mali_dmem_man;
+
+static u32 mali_dmem_get_gp_varying_size(struct mali_gp_job *gp_job)
+{
+	return gp_job->required_varying_memsize / _MALI_OSK_MALI_PAGE_SIZE;
+}
+
+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void)
+{
+	mali_dmem_man = _mali_osk_calloc(1, sizeof(struct mali_defer_bind_manager));
+	if (!mali_dmem_man)
+		return _MALI_OSK_ERR_NOMEM;
+
+	atomic_set(&mali_dmem_man->num_used_pages, 0);
+	atomic_set(&mali_dmem_man->num_dmem, 0);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_mem_defer_bind_manager_destory(void)
+{
+	if (mali_dmem_man) {
+		MALI_DEBUG_ASSERT(atomic_read(&mali_dmem_man->num_dmem) == 0);
+		kfree(mali_dmem_man);
+	}
+	mali_dmem_man = NULL;
+}
+
+
+/*allocate pages from OS memory*/
+_mali_osk_errcode_t mali_mem_defer_alloc_mem(u32 require, struct mali_session_data *session, mali_defer_mem_block *dblock)
+{
+	int retval = 0;
+	u32 num_pages = require;
+	mali_mem_os_mem os_mem;
+
+	retval = mali_mem_os_alloc_pages(&os_mem, num_pages * _MALI_OSK_MALI_PAGE_SIZE);
+
+	/* add to free pages list */
+	if (retval == 0) {
+		MALI_DEBUG_PRINT(4, ("mali_mem_defer_alloc_mem ,,*** pages allocate = 0x%x\n", num_pages));
+		list_splice(&os_mem.pages, &dblock->free_pages);
+		atomic_add(os_mem.count, &dblock->num_free_pages);
+		atomic_add(os_mem.count, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		return _MALI_OSK_ERR_OK;
+	} else
+		return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock)
+{
+	u32 require_page;
+
+	if (!next_gp_job)
+		return _MALI_OSK_ERR_FAULT;
+
+	require_page = mali_dmem_get_gp_varying_size(next_gp_job);
+
+	MALI_DEBUG_PRINT(4, ("mali_mem_defer_prepare_mem_work, require alloc page 0x%x\n",
+			     require_page));
+	/* allocate more pages from OS */
+	if (mali_mem_defer_alloc_mem(require_page, next_gp_job->session, dblock) != _MALI_OSK_ERR_OK) {
+		MALI_DEBUG_PRINT(1, ("ERROR##mali_mem_defer_prepare_mem_work, allocate page failed!!"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	next_gp_job->bind_flag = MALI_DEFER_BIND_MEMORY_PREPARED;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* do preparetion for allocation before defer bind */
+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list, u32 *required_varying_memsize)
+{
+	mali_mem_backend *mem_bkend = NULL;
+	struct mali_backend_bind_list *bk_list = _mali_osk_calloc(1, sizeof(struct mali_backend_bind_list));
+
+	if (bk_list == NULL)
+		return _MALI_OSK_ERR_FAULT;
+
+	INIT_LIST_HEAD(&bk_list->node);
+	/* Get backend memory */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in defer bind!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		_mali_osk_free(bk_list);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+
+	/* If the mem backend has already been bound, no need to bind again.*/
+	if (mem_bkend->os_mem.count > 0) {
+		_mali_osk_free(bk_list);
+		return _MALI_OSK_ERR_OK;
+	}
+
+	MALI_DEBUG_PRINT(4, ("bind_allocation_prepare:: allocation =%x vaddr=0x%x!\n", alloc, alloc->mali_vma_node.vm_node.start));
+
+	INIT_LIST_HEAD(&mem_bkend->os_mem.pages);
+
+	bk_list->bkend = mem_bkend;
+	bk_list->vaddr = alloc->mali_vma_node.vm_node.start;
+	bk_list->session = alloc->session;
+	bk_list->page_num = mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE;
+	*required_varying_memsize +=  mem_bkend->size;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+
+	/* add to job to do list */
+	list_add(&bk_list->node, list);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+
+/* bind phyiscal memory to allocation
+This function will be called in IRQ handler*/
+static _mali_osk_errcode_t mali_mem_defer_bind_allocation(struct mali_backend_bind_list *bk_node,
+		struct list_head *pages)
+{
+	struct mali_session_data *session = bk_node->session;
+	mali_mem_backend *mem_bkend = bk_node->bkend;
+
+	MALI_DEBUG_PRINT(4, ("mali_mem_defer_bind_allocation, bind bkend = %x page num=0x%x vaddr=%x session=%x\n", mem_bkend, bk_node->page_num, bk_node->vaddr, session));
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+	list_splice(pages, &mem_bkend->os_mem.pages);
+	mem_bkend->os_mem.count = bk_node->page_num;
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		mali_mem_os_mali_map(&mem_bkend->os_mem, session, bk_node->vaddr, 0,
+				     mem_bkend->os_mem.count, MALI_MMU_FLAGS_DEFAULT);
+	}
+	smp_wmb();
+	bk_node->flag = MALI_DEFER_BIND_MEMORY_BINDED;
+	mem_bkend->flags &= ~MALI_MEM_BACKEND_FLAG_NOT_BINDED;
+	mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_BINDED;
+	return _MALI_OSK_ERR_OK;
+}
+
+
+static struct list_head *mali_mem_defer_get_free_page_list(u32 count, struct list_head *pages, mali_defer_mem_block *dblock)
+{
+	int i = 0;
+	struct mali_page_node *m_page, *m_tmp;
+
+	if (atomic_read(&dblock->num_free_pages) < count) {
+		return NULL;
+	} else {
+		list_for_each_entry_safe(m_page, m_tmp, &dblock->free_pages, list) {
+			if (i < count) {
+				list_move_tail(&m_page->list, pages);
+			} else {
+				break;
+			}
+			i++;
+		}
+		MALI_DEBUG_ASSERT(i == count);
+		atomic_sub(count, &dblock->num_free_pages);
+		return pages;
+	}
+}
+
+
+/* called in job start IOCTL to bind physical memory for each allocations
+@ bk_list backend list to do defer bind
+@ pages page list to do this bind
+@ count number of pages
+*/
+_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp,
+					struct mali_defer_mem_block *dmem_block)
+{
+	struct mali_defer_mem *dmem = NULL;
+	struct mali_backend_bind_list *bkn, *bkn_tmp;
+	LIST_HEAD(pages);
+
+	if (gp->required_varying_memsize != (atomic_read(&dmem_block->num_free_pages) * _MALI_OSK_MALI_PAGE_SIZE)) {
+		MALI_DEBUG_PRINT_ERROR(("#BIND:  The memsize of varying buffer not match to the pagesize of the dmem_block!!##\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_PRINT(4, ("#BIND: GP job=%x##\n", gp));
+	dmem = (mali_defer_mem *)_mali_osk_calloc(1, sizeof(struct mali_defer_mem));
+	if (dmem) {
+		INIT_LIST_HEAD(&dmem->node);
+		gp->dmem = dmem;
+	} else {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	atomic_add(1, &mali_dmem_man->num_dmem);
+	/* for each bk_list backend, do bind */
+	list_for_each_entry_safe(bkn, bkn_tmp, &gp->vary_todo, node) {
+		INIT_LIST_HEAD(&pages);
+		if (likely(mali_mem_defer_get_free_page_list(bkn->page_num, &pages, dmem_block))) {
+			list_del(&bkn->node);
+			mali_mem_defer_bind_allocation(bkn, &pages);
+			_mali_osk_free(bkn);
+		} else {
+			/* not enough memory will not happen */
+			MALI_DEBUG_PRINT_ERROR(("#BIND: NOT enough memory when binded !!##\n"));
+			_mali_osk_free(gp->dmem);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+	}
+
+	if (!list_empty(&gp->vary_todo)) {
+		MALI_DEBUG_PRINT_ERROR(("#BIND:  The deferbind backend list isn't empty !!##\n"));
+		_mali_osk_free(gp->dmem);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	dmem->flag = MALI_DEFER_BIND_MEMORY_BINDED;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_defer_dmem_free(struct mali_gp_job *gp)
+{
+	if (gp->dmem) {
+		atomic_dec(&mali_dmem_man->num_dmem);
+		_mali_osk_free(gp->dmem);
+	}
+}
\ No newline at end of file
diff --git a/utgard/r7p0/linux/mali_memory_defer_bind.h b/utgard/r7p0/linux/mali_memory_defer_bind.h
new file mode 100644
index 0000000..58353e0
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_defer_bind.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_MEMORY_DEFER_BIND_H_
+#define __MALI_MEMORY_DEFER_BIND_H_
+
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_gp_job;
+
+typedef struct mali_defer_mem {
+	struct list_head node;   /*dlist node in bind manager */
+	u32 flag;
+} mali_defer_mem;
+
+
+typedef struct mali_defer_mem_block {
+	struct list_head free_pages; /* page pool */
+	atomic_t num_free_pages;
+} mali_defer_mem_block;
+
+/* varying memory list need to bind */
+typedef struct mali_backend_bind_list {
+	struct list_head node;
+	struct mali_mem_backend *bkend;
+	u32 vaddr;
+	u32 page_num;
+	struct mali_session_data *session;
+	u32 flag;
+} mali_backend_bind_lists;
+
+
+typedef struct mali_defer_bind_manager {
+	atomic_t num_used_pages;
+	atomic_t num_dmem;
+} mali_defer_bind_manager;
+
+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void);
+void mali_mem_defer_bind_manager_destory(void);
+_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp, struct mali_defer_mem_block *dmem_block);
+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list,  u32 *required_varying_memsize);
+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock);
+void mali_mem_defer_dmem_free(struct mali_gp_job *gp);
+
+#endif
diff --git a/utgard/r7p0/linux/mali_memory_dma_buf.c b/utgard/r7p0/linux/mali_memory_dma_buf.c
new file mode 100644
index 0000000..8895f20
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_dma_buf.c
@@ -0,0 +1,373 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/fs.h>      /* file system operations */
+#include <asm/uaccess.h>        /* user space access */
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+#include <linux/rbtree.h>
+#include <linux/platform_device.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_virtual.h"
+#include "mali_pp_job.h"
+
+/*
+ * Map DMA buf attachment \a mem into \a session at virtual address \a virt.
+ */
+static int mali_dma_buf_map(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_dma_buf_attachment *mem;
+	struct  mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	struct scatterlist *sg;
+	u32 virt, flags;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(mem->session == session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	flags = alloc->flags;
+
+	mali_session_memory_lock(session);
+	mem->map_ref++;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: map attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (mem->map_ref == 1) {
+
+		/* First reference taken, so we need to map the dma buf */
+		MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+		mem->sgt = dma_buf_map_attachment(mem->attachment, DMA_BIDIRECTIONAL);
+		if (IS_ERR_OR_NULL(mem->sgt)) {
+			MALI_DEBUG_PRINT_ERROR(("Failed to map dma-buf attachment\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -EFAULT;
+		}
+
+		err = mali_mem_mali_map_prepare(alloc);
+		if (err != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(1, ("Mapping of DMA memory failed\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -ENOMEM;
+		}
+
+		pagedir = mali_session_get_page_directory(session);
+		MALI_DEBUG_ASSERT_POINTER(pagedir);
+
+		for_each_sg(mem->sgt->sgl, sg, mem->sgt->nents, i) {
+			u32 size = sg_dma_len(sg);
+			dma_addr_t phys = sg_dma_address(sg);
+
+			/* sg must be page aligned. */
+			MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE);
+			MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF));
+
+			mali_mmu_pagedir_update(pagedir, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+			virt += size;
+		}
+
+		if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+			u32 guard_phys;
+
+			MALI_DEBUG_PRINT(7, ("Mapping in extra guard page\n"));
+
+			guard_phys = sg_dma_address(mem->sgt->sgl);
+			mali_mmu_pagedir_update(pagedir, virt, guard_phys, MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+
+		mem->is_mapped = MALI_TRUE;
+		mali_session_memory_unlock(session);
+		/* Wake up any thread waiting for buffer to become mapped */
+		wake_up_all(&mem->wait_queue);
+	} else {
+		MALI_DEBUG_ASSERT(mem->is_mapped);
+		mali_session_memory_unlock(session);
+	}
+
+	return 0;
+}
+
+static void mali_dma_buf_unmap(mali_mem_allocation *alloc, struct mali_dma_buf_attachment *mem)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+
+	mali_session_memory_lock(alloc->session);
+	mem->map_ref--;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: unmap attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (mem->map_ref == 0) {
+		dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL);
+		if (mem->is_mapped == MALI_TRUE) {
+			mali_mem_mali_map_free(alloc->session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+					       alloc->flags);
+		}
+		mem->is_mapped = MALI_FALSE;
+	}
+	mali_session_memory_unlock(alloc->session);
+	/* Wake up any thread waiting for buffer to become unmapped */
+	wake_up_all(&mem->wait_queue);
+}
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	_mali_osk_errcode_t err;
+	int i;
+	int ret = 0;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(mali_vma_node != NULL);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(mali_alloc != NULL);
+		if (mali_alloc->type != MALI_MEM_DMA_BUF) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(mem_bkend != NULL);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+
+		err = mali_dma_buf_map(mem_bkend);
+		if (err != 0) {
+			MALI_DEBUG_PRINT_ERROR(("Mali DMA-buf: Failed to map dma-buf for mali address %x\n", mali_addr));
+			ret = -EFAULT;
+			continue;
+		}
+	}
+	return ret;
+}
+
+void mali_dma_buf_unmap_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	int i;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(mali_vma_node != NULL);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(mali_alloc != NULL);
+		if (mali_alloc->type != MALI_MEM_DMA_BUF) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(mem_bkend != NULL);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+		mali_dma_buf_unmap(mem_bkend->mali_allocation, mem);
+	}
+}
+#endif /* !CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH */
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *user_arg)
+{
+	_mali_uk_dma_buf_get_size_s args;
+	int fd;
+	struct dma_buf *buf;
+
+	/* get call arguments from user space. copy_from_user returns how many bytes which where NOT copied */
+	if (copy_from_user(&args, (void __user *)user_arg, sizeof(_mali_uk_dma_buf_get_size_s)) != 0) {
+		return -EFAULT;
+	}
+
+	/* Do DMA-BUF stuff */
+	fd = args.mem_fd;
+
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma-buf from fd: %d\n", fd));
+		return PTR_RET(buf);
+	}
+
+	if (put_user(buf->size, &user_arg->size) != 0) {
+		dma_buf_put(buf);
+		return -EFAULT;
+	}
+
+	dma_buf_put(buf);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags)
+{
+	struct dma_buf *buf;
+	struct mali_dma_buf_attachment *dma_mem;
+	struct  mali_session_data *session = alloc->session;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	/* get dma buffer */
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Currently, mapping of the full buffer are supported. */
+	if (alloc->psize != buf->size) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem = _mali_osk_calloc(1, sizeof(struct mali_dma_buf_attachment));
+	if (dma_mem == NULL) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem->buf = buf;
+	dma_mem->session = session;
+	dma_mem->map_ref = 0;
+	init_waitqueue_head(&dma_mem->wait_queue);
+
+	dma_mem->attachment = dma_buf_attach(dma_mem->buf, &mali_platform_device->dev);
+	if (dma_mem->attachment == NULL) {
+		goto failed_dma_attach;
+	}
+
+	mem_backend->dma_buf.attachment = dma_mem;
+
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	/* Map memory into session's Mali virtual address space. */
+	if (mali_dma_buf_map(mem_backend) != 0) {
+		goto Failed_dma_map;
+	}
+#endif
+
+	return _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+Failed_dma_map:
+	mali_dma_buf_unmap(alloc, dma_mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(dma_mem->wait_queue, !dma_mem->is_mapped);
+	MALI_DEBUG_ASSERT(!dma_mem->is_mapped);
+	dma_buf_detach(dma_mem->buf, dma_mem->attachment);
+failed_dma_attach:
+	_mali_osk_free(dma_mem);
+failed_alloc_mem:
+	dma_buf_put(buf);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend)
+{
+	struct mali_dma_buf_attachment *mem;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(mem_backend->type == MALI_MEM_DMA_BUF);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_PRINT(3, ("Mali DMA-buf: release attachment %p\n", mem));
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	MALI_DEBUG_ASSERT_POINTER(mem_backend->mali_allocation);
+	/* We mapped implicitly on attach, so we need to unmap on release */
+	mali_dma_buf_unmap(mem_backend->mali_allocation, mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(mem->wait_queue, !mem->is_mapped);
+	MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+	dma_buf_detach(mem->buf, mem->attachment);
+	dma_buf_put(mem->buf);
+
+	_mali_osk_free(mem);
+}
diff --git a/utgard/r7p0/linux/mali_memory_dma_buf.h b/utgard/r7p0/linux/mali_memory_dma_buf.h
new file mode 100644
index 0000000..063421b
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_dma_buf.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_DMA_BUF_H__
+#define __MALI_MEMORY_DMA_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+struct mali_pp_job;
+
+struct mali_dma_buf_attachment;
+struct mali_dma_buf_attachment {
+	struct dma_buf *buf;
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct mali_session_data *session;
+	int map_ref;
+	struct mutex map_lock;
+	mali_bool is_mapped;
+	wait_queue_head_t wait_queue;
+};
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *arg);
+
+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend);
+
+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags);
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job);
+void mali_dma_buf_unmap_job(struct mali_pp_job *job);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/utgard/r7p0/linux/mali_memory_external.c b/utgard/r7p0/linux/mali_memory_external.c
new file mode 100644
index 0000000..c89cc25
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_external.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_uk_types.h"
+
+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT(mem_backend->type == MALI_MEM_EXTERNAL);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag)
+{
+	struct mali_session_data *session;
+	_mali_osk_errcode_t err;
+	u32 virt, phys, size;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	size = alloc->psize;
+	session = (struct mali_session_data *)(uintptr_t)alloc->session;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check arguments */
+	/* NULL might be a valid Mali address */
+	if (!size) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size must be a multiple of the system page size */
+	if (size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* Validate the mali physical range */
+	if (mali_mem_validation_check(phys_addr, size) != _MALI_OSK_ERR_OK) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (flag & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mali_session_memory_lock(session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	phys = phys_addr;
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (err != _MALI_OSK_ERR_OK) {
+		mali_session_memory_unlock(session);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	mali_mmu_pagedir_update(session->page_directory, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+	if (alloc->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		mali_mmu_pagedir_update(session->page_directory, virt + size, phys, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map physical memory 0x%x-0x%x into virtual memory 0x%x\n",
+			  phys_addr, (phys_addr + size - 1),
+			  virt));
+	mali_session_memory_unlock(session);
+
+	MALI_SUCCESS;
+}
+
diff --git a/utgard/r7p0/linux/mali_memory_external.h b/utgard/r7p0/linux/mali_memory_external.h
new file mode 100644
index 0000000..9f04920
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_external.h
@@ -0,0 +1,29 @@
+
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_EXTERNAL_H__
+#define __MALI_MEMORY_EXTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation * alloc,
+		mali_mem_backend * mem_backend,
+		u32 phys_addr,
+		u32 flag);
+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/utgard/r7p0/linux/mali_memory_manager.c b/utgard/r7p0/linux/mali_memory_manager.c
new file mode 100644
index 0000000..65af52a
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_manager.c
@@ -0,0 +1,1011 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include <linux/platform_device.h>
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_secure.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_util.h"
+#include "mali_memory_external.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_ukk.h"
+#include "mali_memory_swap_alloc.h"
+
+#if (LINUX_VERSION_CODE == KERNEL_VERSION(3, 14, 0)) || (LINUX_VERSION_CODE == KERNEL_VERSION(4, 9, 0))
+#define AML_MALI_DEBUG 1
+#endif
+
+/*
+* New memory system interface
+*/
+
+/*inti idr for backend memory */
+struct idr mali_backend_idr;
+struct mutex mali_idr_mutex;
+
+extern void show_mem(unsigned int flags);
+
+/* init allocation manager */
+int mali_memory_manager_init(struct mali_allocation_manager *mgr)
+{
+	/* init Locks */
+	rwlock_init(&mgr->vm_lock);
+	mutex_init(&mgr->list_mutex);
+
+	/* init link */
+	INIT_LIST_HEAD(&mgr->head);
+
+	/* init RB tree */
+	mgr->allocation_mgr_rb = RB_ROOT;
+	mgr->mali_allocation_num = 0;
+	return 0;
+}
+
+/* Deinit allocation manager
+* Do some check for debug
+*/
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr)
+{
+	/* check RB tree is empty */
+	MALI_DEBUG_ASSERT(((void *)(mgr->allocation_mgr_rb.rb_node) == (void *)rb_last(&mgr->allocation_mgr_rb)));
+	/* check allocation List */
+	MALI_DEBUG_ASSERT(list_empty(&mgr->head));
+}
+
+/* Prepare memory descriptor */
+static mali_mem_allocation *mali_mem_allocation_struct_create(struct mali_session_data *session)
+{
+	mali_mem_allocation *mali_allocation;
+
+	/* Allocate memory */
+	mali_allocation = (mali_mem_allocation *)kzalloc(sizeof(mali_mem_allocation), GFP_KERNEL);
+	if (NULL == mali_allocation) {
+#ifdef AML_MALI_DEBUG
+		MALI_PRINT_ERROR(("mali_mem_allocation_struct_create: descriptor was NULL\n"));
+		show_mem(SHOW_MEM_FILTER_NODES);
+#endif
+
+		return NULL;
+	}
+
+	MALI_DEBUG_CODE(mali_allocation->magic = MALI_MEM_ALLOCATION_VALID_MAGIC);
+
+	/* do init */
+	mali_allocation->flags = 0;
+	mali_allocation->session = session;
+
+	INIT_LIST_HEAD(&mali_allocation->list);
+	_mali_osk_atomic_init(&mali_allocation->mem_alloc_refcount, 1);
+
+	/**
+	*add to session list
+	*/
+	mutex_lock(&session->allocation_mgr.list_mutex);
+	list_add_tail(&mali_allocation->list, &session->allocation_mgr.head);
+	session->allocation_mgr.mali_allocation_num++;
+	mutex_unlock(&session->allocation_mgr.list_mutex);
+
+	return mali_allocation;
+}
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+	mutex_lock(&alloc->session->allocation_mgr.list_mutex);
+	list_del(&alloc->list);
+	alloc->session->allocation_mgr.mali_allocation_num--;
+	mutex_unlock(&alloc->session->allocation_mgr.list_mutex);
+
+	kfree(alloc);
+}
+
+int mali_mem_backend_struct_create(mali_mem_backend **backend, u32 psize)
+{
+	mali_mem_backend *mem_backend = NULL;
+	s32 ret = -ENOSPC;
+	s32 index = -1;
+	*backend = (mali_mem_backend *)kzalloc(sizeof(mali_mem_backend), GFP_KERNEL);
+	if (NULL == *backend) {
+#ifdef AML_MALI_DEBUG
+		MALI_PRINT_ERROR(("mali_mem_backend_struct_create: backend descriptor was NULL\n"));
+		show_mem(SHOW_MEM_FILTER_NODES);
+#endif
+		return -1;
+	}
+	mem_backend = *backend;
+	mem_backend->size = psize;
+	mutex_init(&mem_backend->mutex);
+	INIT_LIST_HEAD(&mem_backend->list);
+	mem_backend->using_count = 0;
+
+
+	/* link backend with id */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
+again:
+	if (!idr_pre_get(&mali_backend_idr, GFP_KERNEL)) {
+		kfree(mem_backend);
+		return -ENOMEM;
+	}
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_get_new_above(&mali_backend_idr, mem_backend, 1, &index);
+	mutex_unlock(&mali_idr_mutex);
+
+	if (-ENOSPC == ret) {
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+	if (-EAGAIN == ret)
+		goto again;
+#else
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_alloc(&mali_backend_idr, mem_backend, 1, MALI_S32_MAX, GFP_KERNEL);
+	mutex_unlock(&mali_idr_mutex);
+	index = ret;
+	if (ret < 0) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: Can't allocate idr for backend!\n"));
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+#endif
+	return index;
+}
+
+
+static void mali_mem_backend_struct_destory(mali_mem_backend **backend, s32 backend_handle)
+{
+	mali_mem_backend *mem_backend = *backend;
+
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_backend);
+	*backend = NULL;
+}
+
+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address)
+{
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_address, 0);
+	if (mali_vma_node == NULL)  {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_search:vma node was NULL\n"));
+		return NULL;
+	}
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(mem_bkend != NULL);
+	return mem_bkend;
+}
+
+static _mali_osk_errcode_t mali_mem_resize(struct mali_session_data *session, mali_mem_backend *mem_backend, u32 physical_size)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	mali_mem_os_mem tmp_os_mem;
+	s32 change_page_count;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called!\n"));
+	MALI_DEBUG_ASSERT(0 == physical_size %  MALI_MMU_PAGE_SIZE);
+
+	mali_allocation = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(mali_allocation);
+
+	MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE & mali_allocation->flags);
+	MALI_DEBUG_ASSERT(mali_allocation->type == MALI_MEM_OS);
+
+	mutex_lock(&mem_backend->mutex);
+
+	/* Do resize*/
+	if (physical_size > mem_backend->size) {
+		u32 add_size = physical_size - mem_backend->size;
+
+		MALI_DEBUG_ASSERT(0 == add_size %  MALI_MMU_PAGE_SIZE);
+
+		/* Allocate new pages from os mem */
+		retval = mali_mem_os_alloc_pages(&tmp_os_mem, add_size);
+
+		if (retval) {
+			if (-ENOMEM == retval) {
+				ret = _MALI_OSK_ERR_NOMEM;
+			} else {
+				ret = _MALI_OSK_ERR_FAULT;
+			}
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory allocation failed !\n"));
+			goto failed_alloc_memory;
+		}
+
+		MALI_DEBUG_ASSERT(tmp_os_mem.count == add_size / MALI_MMU_PAGE_SIZE);
+
+		/* Resize the memory of the backend */
+		ret = mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory	resizing failed !\n"));
+			goto failed_resize_pages;
+		}
+
+		/*Resize cpu mapping */
+		if (mali_allocation->cpu_mapping.vma != NULL) {
+			ret = mali_mem_os_resize_cpu_map_locked(mem_backend, mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start  + mem_backend->size, add_size);
+			if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+				MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: cpu mapping failed !\n"));
+				goto  failed_cpu_map;
+			}
+		}
+
+		/* Resize mali mapping */
+		_mali_osk_mutex_wait(session->memory_lock);
+		ret = mali_mem_mali_map_resize(mali_allocation, physical_size);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_resize: mali map resize fail !\n"));
+			goto failed_gpu_map;
+		}
+
+		ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, mali_allocation->mali_vma_node.vm_node.start,
+					   mali_allocation->psize / MALI_MMU_PAGE_SIZE, add_size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties);
+		if (ret) {
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: mali mapping failed !\n"));
+			goto failed_gpu_map;
+		}
+
+		_mali_osk_mutex_signal(session->memory_lock);
+	} else {
+		u32 dec_size, page_count;
+		u32 vaddr = 0;
+
+		INIT_LIST_HEAD(&tmp_os_mem.pages);
+		tmp_os_mem.count = 0;
+
+		dec_size = mem_backend->size - physical_size;
+		MALI_DEBUG_ASSERT(0 == dec_size %  MALI_MMU_PAGE_SIZE);
+
+		page_count = dec_size / MALI_MMU_PAGE_SIZE;
+		vaddr = mali_allocation->mali_vma_node.vm_node.start + physical_size;
+
+		/* Resize the memory of the backend */
+		ret = mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, physical_size / MALI_MMU_PAGE_SIZE, page_count);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(4, ("_mali_ukk_mem_resize: mali map resize failed!\n"));
+			goto failed_resize_pages;
+		}
+
+		/* Resize mali map */
+		_mali_osk_mutex_wait(session->memory_lock);
+		mali_mem_mali_map_free(session, dec_size, vaddr, mali_allocation->flags);
+		_mali_osk_mutex_signal(session->memory_lock);
+
+		/* Zap cpu mapping */
+		if (mali_allocation->cpu_mapping.addr != 0) {
+			MALI_DEBUG_ASSERT(mali_allocation->cpu_mapping.vma != NULL);
+			zap_vma_ptes(mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start + physical_size, dec_size);
+		}
+
+		/* Free those extra pages */
+		mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE);
+	}
+
+	/* Resize memory allocation and memory backend */
+	change_page_count = (s32)(physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE;
+	mali_allocation->psize = physical_size;
+	mem_backend->size = physical_size;
+	mutex_unlock(&mem_backend->mutex);
+
+	if (change_page_count > 0) {
+		atomic_add(change_page_count, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+
+	} else {
+		atomic_sub((s32)(-change_page_count), &session->mali_mem_allocated_pages);
+	}
+
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+failed_cpu_map:
+	if (physical_size > mem_backend->size) {
+		mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, mem_backend->size / MALI_MMU_PAGE_SIZE,
+					 (physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE);
+	} else {
+		mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count);
+	}
+failed_resize_pages:
+	if (tmp_os_mem.count != 0)
+		mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE);
+failed_alloc_memory:
+
+	mutex_unlock(&mem_backend->mutex);
+	return ret;
+}
+
+
+/* Set GPU MMU properties */
+static void _mali_memory_gpu_map_property_set(u32 *properties, u32 flags)
+{
+	if (_MALI_MEMORY_GPU_READ_ALLOCATE & flags) {
+		*properties = MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE;
+	} else {
+		*properties = MALI_MMU_FLAGS_DEFAULT;
+	}
+}
+
+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size)
+{
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+	u32 new_physical_size;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(0 == add_size %  MALI_MMU_PAGE_SIZE);
+
+	/* Get the memory backend that need to be resize. */
+	mem_backend = mali_mem_backend_struct_search(session, mali_addr);
+
+	if (mem_backend == NULL)  {
+		MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n"));
+		return ret;
+	}
+
+	mali_allocation = mem_backend->mali_allocation;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_allocation);
+
+	new_physical_size = add_size + mem_backend->size;
+
+	if (new_physical_size > (mali_allocation->mali_vma_node.vm_node.size))
+		return ret;
+
+	MALI_DEBUG_ASSERT(new_physical_size != mem_backend->size);
+
+	ret = mali_mem_resize(session, mem_backend, new_physical_size);
+
+	return ret;
+}
+
+/**
+*  function@_mali_ukk_mem_allocate - allocate mali memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args)
+{
+	struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_allocate, vaddr=0x%x, size =0x%x!\n", args->gpu_vaddr, args->psize));
+
+	/* Check if the address is allocated
+	*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->gpu_vaddr, 0);
+
+	if (unlikely(mali_vma_node)) {
+		MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used !\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+	/**
+	*create mali memory allocation
+	*/
+
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_allocate: Failed to create allocation struct!\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->psize;
+	mali_allocation->vsize = args->vsize;
+
+	/* MALI_MEM_OS if need to support mem resize,
+	 * or MALI_MEM_BLOCK if have dedicated memory,
+	 * or MALI_MEM_OS,
+	 * or MALI_MEM_SWAP.
+	 */
+	if (args->flags & _MALI_MEMORY_ALLOCATE_SWAPPABLE) {
+		mali_allocation->type = MALI_MEM_SWAP;
+	} else if (args->flags & _MALI_MEMORY_ALLOCATE_RESIZEABLE) {
+		mali_allocation->type = MALI_MEM_OS;
+		mali_allocation->flags |= MALI_MEM_FLAG_CAN_RESIZE;
+	} else if (args->flags & _MALI_MEMORY_ALLOCATE_SECURE) {
+		mali_allocation->type = MALI_MEM_SECURE;
+	} else if (mali_memory_have_dedicated_memory() == MALI_TRUE) {
+		mali_allocation->type = MALI_MEM_BLOCK;
+	} else {
+		mali_allocation->type = MALI_MEM_OS;
+	}
+
+	/**
+	*add allocation node to RB tree for index
+	*/
+	mali_allocation->mali_vma_node.vm_node.start = args->gpu_vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->vsize;
+
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, args->psize);
+	if (mali_allocation->backend_handle < 0) {
+		ret = _MALI_OSK_ERR_NOMEM;
+		MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0!\n"));
+		goto failed_alloc_backend;
+	}
+
+
+	mem_backend->mali_allocation = mali_allocation;
+	mem_backend->type = mali_allocation->type;
+
+	mali_allocation->mali_mapping.addr = args->gpu_vaddr;
+
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+	/* do prepare for MALI mapping */
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+
+		ret = mali_mem_mali_map_prepare(mali_allocation);
+		if (ret != 0) {
+			_mali_osk_mutex_signal(session->memory_lock);
+			goto failed_prepare_map;
+		}
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+
+	if (mali_allocation->psize == 0) {
+		mem_backend->os_mem.count = 0;
+		INIT_LIST_HEAD(&mem_backend->os_mem.pages);
+		goto done;
+	}
+
+	if (args->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) {
+		mali_allocation->flags |= _MALI_MEMORY_ALLOCATE_DEFER_BIND;
+		mem_backend->flags |= MALI_MEM_BACKEND_FLAG_NOT_BINDED;
+		/* init for defer bind backend*/
+		mem_backend->os_mem.count = 0;
+		INIT_LIST_HEAD(&mem_backend->os_mem.pages);
+
+		goto done;
+	}
+
+	if (likely(mali_allocation->psize > 0)) {
+
+		if (mem_backend->type == MALI_MEM_SECURE) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+			ret = mali_mem_secure_attach_dma_buf(&mem_backend->secure_mem, mem_backend->size, args->secure_shared_fd);
+			if (ret != _MALI_OSK_ERR_OK) {
+				MALI_DEBUG_PRINT(1, ("Failed to attach dma buf for secure memory!\n"));
+				goto failed_alloc_pages;
+			}
+#else
+			ret = _MALI_OSK_ERR_UNSUPPORTED;
+			MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory!\n"));
+			goto failed_alloc_pages;
+#endif
+		} else {
+
+			/**
+			*allocate physical memory
+			*/
+			if (mem_backend->type == MALI_MEM_OS) {
+				retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+			} else if (mem_backend->type == MALI_MEM_BLOCK) {
+				/* try to allocated from BLOCK memory first, then try OS memory if failed.*/
+				if (mali_mem_block_alloc(&mem_backend->block_mem, mem_backend->size)) {
+					retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+					mem_backend->type = MALI_MEM_OS;
+					mali_allocation->type = MALI_MEM_OS;
+				}
+			} else if (mem_backend->type == MALI_MEM_SWAP) {
+				retval = mali_mem_swap_alloc_pages(&mem_backend->swap_mem, mali_allocation->mali_vma_node.vm_node.size, &mem_backend->start_idx);
+			}  else {
+				/* ONLY support mem_os type */
+				MALI_DEBUG_ASSERT(0);
+			}
+
+			if (retval) {
+				ret = _MALI_OSK_ERR_NOMEM;
+				MALI_DEBUG_PRINT(1, (" can't allocate enough pages!\n"));
+				goto failed_alloc_pages;
+			}
+		}
+	}
+
+	/**
+	*map to GPU side
+	*/
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+		/* Map on Mali */
+
+		if (mem_backend->type == MALI_MEM_OS) {
+			ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, args->gpu_vaddr, 0,
+						   mem_backend->size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties);
+
+		} else if (mem_backend->type == MALI_MEM_BLOCK) {
+			mali_mem_block_mali_map(&mem_backend->block_mem, session, args->gpu_vaddr,
+						mali_allocation->mali_mapping.properties);
+		} else if (mem_backend->type == MALI_MEM_SWAP) {
+			ret = mali_mem_swap_mali_map(&mem_backend->swap_mem, session, args->gpu_vaddr,
+						     mali_allocation->mali_mapping.properties);
+		} else if (mem_backend->type == MALI_MEM_SECURE) {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+			ret = mali_mem_secure_mali_map(&mem_backend->secure_mem, session, args->gpu_vaddr, mali_allocation->mali_mapping.properties);
+#endif
+		} else { /* unsupport type */
+			MALI_DEBUG_ASSERT(0);
+		}
+
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+done:
+	if (mem_backend->type == MALI_MEM_OS) {
+		atomic_add(mem_backend->os_mem.count, &session->mali_mem_allocated_pages);
+	} else if (mem_backend->type == MALI_MEM_BLOCK) {
+		atomic_add(mem_backend->block_mem.count, &session->mali_mem_allocated_pages);
+	} else if (mem_backend->type == MALI_MEM_SECURE) {
+		atomic_add(mem_backend->secure_mem.count, &session->mali_mem_allocated_pages);
+	} else {
+		MALI_DEBUG_ASSERT(mem_backend->type == MALI_MEM_SWAP);
+		atomic_add(mem_backend->swap_mem.count, &session->mali_mem_allocated_pages);
+		atomic_add(mem_backend->swap_mem.count, &session->mali_mem_array[mem_backend->type]);
+	}
+
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+	return _MALI_OSK_ERR_OK;
+
+failed_alloc_pages:
+	mali_mem_mali_map_free(session, mali_allocation->psize, mali_allocation->mali_vma_node.vm_node.start, mali_allocation->flags);
+failed_prepare_map:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	u32 vaddr = args->gpu_vaddr;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, vaddr, 0);
+	if (mali_vma_node == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_free: invalid addr: 0x%x\n", vaddr));
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+	MALI_DEBUG_ASSERT(mali_vma_node != NULL);
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+
+	if (mali_alloc)
+		/* check ref_count */
+		args->free_pages_nr = mali_allocation_unref(&mali_alloc);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Function _mali_ukk_mem_bind -- bind a external memory to a new GPU address
+* It will allocate a new mem allocation and bind external memory to it.
+* Supported backend type are:
+* _MALI_MEMORY_BIND_BACKEND_UMP
+* _MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+* CPU access is not supported yet
+*/
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_bind, vaddr=0x%x, size =0x%x!\n", args->vaddr, args->size));
+
+	/**
+	* allocate mali allocation.
+	*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->size;
+	mali_allocation->vsize = args->size;
+	mali_allocation->mali_mapping.addr = args->vaddr;
+
+	/* add allocation node to RB tree for index  */
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->size;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* allocate backend*/
+	if (mali_allocation->psize > 0) {
+		mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+		if (mali_allocation->backend_handle < 0) {
+			goto Failed_alloc_backend;
+		}
+
+	} else {
+		goto Failed_alloc_backend;
+	}
+
+	mem_backend->size = mali_allocation->psize;
+	mem_backend->mali_allocation = mali_allocation;
+
+	switch (args->flags & _MALI_MEMORY_BIND_BACKEND_MASK) {
+	case  _MALI_MEMORY_BIND_BACKEND_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_allocation->type = MALI_MEM_UMP;
+		mem_backend->type = MALI_MEM_UMP;
+		ret = mali_mem_bind_ump_buf(mali_allocation, mem_backend,
+					    args->mem_union.bind_ump.secure_id, args->mem_union.bind_ump.flags);
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(1, ("Bind ump buf failed\n"));
+			goto  Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("UMP not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case  _MALI_MEMORY_BIND_BACKEND_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_allocation->type = MALI_MEM_DMA_BUF;
+		mem_backend->type = MALI_MEM_DMA_BUF;
+		ret = mali_mem_bind_dma_buf(mali_allocation, mem_backend,
+					    args->mem_union.bind_dma_buf.mem_fd, args->mem_union.bind_dma_buf.flags);
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(1, ("Bind dma buf failed\n"));
+			goto Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY:
+		/* not allowed */
+		MALI_DEBUG_PRINT_ERROR(("Mali internal memory type not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY:
+		mali_allocation->type = MALI_MEM_EXTERNAL;
+		mem_backend->type = MALI_MEM_EXTERNAL;
+		ret = mali_mem_bind_ext_buf(mali_allocation, mem_backend, args->mem_union.bind_ext_memory.phys_addr,
+					    args->mem_union.bind_ext_memory.flags);
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(1, ("Bind external buf failed\n"));
+			goto Failed_bind_backend;
+		}
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXT_COW:
+		/* not allowed */
+		MALI_DEBUG_PRINT_ERROR(("External cow memory  type not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+
+	default:
+		MALI_DEBUG_PRINT_ERROR(("Invalid memory type  not supported !\n"));
+		goto Failed_bind_backend;
+		break;
+	}
+	MALI_DEBUG_ASSERT(0 == mem_backend->size % MALI_MMU_PAGE_SIZE);
+	atomic_add(mem_backend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_backend->type]);
+	return _MALI_OSK_ERR_OK;
+
+Failed_bind_backend:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+
+Failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	MALI_DEBUG_PRINT(1, (" _mali_ukk_mem_bind, return ERROR!\n"));
+	return ret;
+}
+
+
+/*
+* Function _mali_ukk_mem_unbind -- unbind a external memory to a new GPU address
+* This function unbind the backend memory and free the allocation
+* no ref_count for this type of memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args)
+{
+	/**/
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	u32 mali_addr = args->vaddr;
+
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_unbind, vaddr=0x%x!\n", args->vaddr));
+
+	/* find the allocation by vaddr */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		mali_allocation = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	} else {
+		MALI_DEBUG_ASSERT(mali_vma_node != NULL);
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	if (mali_allocation != NULL)
+		/* check ref_count */
+		mali_allocation_unref(&mali_allocation);
+	return _MALI_OSK_ERR_OK;
+}
+
+/*
+* Function _mali_ukk_mem_cow --  COW for an allocation
+* This function allocate new pages for  a range (range, range+size) of allocation
+*  And Map it(keep use the not in range pages from target allocation ) to an GPU vaddr
+*/
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_backend *target_backend = NULL;
+	mali_mem_backend *mem_backend = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_allocation = NULL;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	/* Get the target backend for cow */
+	target_backend = mali_mem_backend_struct_search(session, args->target_handle);
+
+	if (NULL == target_backend || 0 == target_backend->size) {
+		MALI_DEBUG_ASSERT_POINTER(target_backend);
+		MALI_DEBUG_ASSERT(target_backend->size != 0);
+		return ret;
+	}
+
+	/*Cow not support resized mem */
+	MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE != (MALI_MEM_FLAG_CAN_RESIZE & target_backend->mali_allocation->flags));
+
+	/* Check if the new mali address is allocated */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->vaddr, 0);
+
+	if (unlikely(mali_vma_node)) {
+		MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used !\n"));
+		return ret;
+	}
+
+	/* create new alloction for COW*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to create allocation struct!\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->target_size;
+	mali_allocation->vsize = args->target_size;
+	mali_allocation->type = MALI_MEM_COW;
+
+	/*add allocation node to RB tree for index*/
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = mali_allocation->vsize;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* create new backend for COW memory */
+	mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+	if (mali_allocation->backend_handle < 0) {
+		ret = _MALI_OSK_ERR_NOMEM;
+		MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0!\n"));
+		goto failed_alloc_backend;
+	}
+	mem_backend->mali_allocation = mali_allocation;
+	mem_backend->type = mali_allocation->type;
+
+	if (target_backend->type == MALI_MEM_SWAP ||
+	    (MALI_MEM_COW == target_backend->type && (MALI_MEM_BACKEND_FLAG_SWAP_COWED & target_backend->flags))) {
+		mem_backend->flags |= MALI_MEM_BACKEND_FLAG_SWAP_COWED;
+		/**
+		 *     CoWed swap backends couldn't be mapped as non-linear vma, because if one
+		 * vma is set with flag VM_NONLINEAR, the vma->vm_private_data will be used by kernel,
+		 * while in mali driver, we use this variable to store the pointer of mali_allocation, so there
+		 * is a conflict.
+		 *     To resolve this problem, we have to do some fake things, we reserved about 64MB
+		 * space from index 0, there isn't really page's index will be set from 0 to (64MB>>PAGE_SHIFT_NUM),
+		 * and all of CoWed swap memory backends' start_idx will be assigned with 0, and these
+		 * backends will be mapped as linear and will add to priority tree of global swap file, while
+		 * these vmas will never be found by using normal page->index, these pages in those vma
+		 * also couldn't be swapped out.
+		 */
+		mem_backend->start_idx = 0;
+	}
+
+	/* Add the target backend's cow count, also allocate new pages for COW backend from os mem
+	*for a modified range and keep the page which not in the modified range and Add ref to it
+	*/
+	MALI_DEBUG_PRINT(3, ("Cow mapping: target_addr: 0x%x;  cow_addr: 0x%x,  size: %u\n", target_backend->mali_allocation->mali_vma_node.vm_node.start,
+			     mali_allocation->mali_vma_node.vm_node.start, mali_allocation->mali_vma_node.vm_node.size));
+
+	ret = mali_memory_do_cow(target_backend, args->target_offset, args->target_size, mem_backend, args->range_start, args->range_size);
+	if (ret != _MALI_OSK_ERR_OK) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to cow!\n"));
+		goto failed_do_cow;
+	}
+
+	/**
+	*map to GPU side
+	*/
+	mali_allocation->mali_mapping.addr = args->vaddr;
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+
+	_mali_osk_mutex_wait(session->memory_lock);
+	/* Map on Mali */
+	ret = mali_mem_mali_map_prepare(mali_allocation);
+	if (ret != 0) {
+		MALI_DEBUG_PRINT(1, (" prepare map fail!\n"));
+		goto failed_gpu_map;
+	}
+
+	if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+		mali_mem_cow_mali_map(mem_backend, 0, mem_backend->size);
+	}
+
+	_mali_osk_mutex_signal(session->memory_lock);
+
+	mutex_lock(&target_backend->mutex);
+	target_backend->flags |= MALI_MEM_BACKEND_FLAG_COWED;
+	mutex_unlock(&target_backend->mutex);
+
+	atomic_add(args->range_size / MALI_MMU_PAGE_SIZE, &session->mali_mem_allocated_pages);
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+	mali_mem_cow_release(mem_backend, MALI_FALSE);
+	mem_backend->cow_mem.count = 0;
+failed_do_cow:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_backend *mem_backend = NULL;
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_cow_modify_range called!\n"));
+	/* Get the backend that need to be modified. */
+	mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+
+	if (NULL == mem_backend || 0 == mem_backend->size) {
+		MALI_DEBUG_ASSERT_POINTER(mem_backend);
+		MALI_DEBUG_ASSERT(mem_backend->size != 0);
+		return ret;
+	}
+
+	MALI_DEBUG_ASSERT(mem_backend->type == MALI_MEM_COW);
+
+	ret =  mali_memory_cow_modify_range(mem_backend, args->range_start, args->size);
+	args->change_pages_nr = mem_backend->cow_mem.change_pages_nr;
+	if (ret != _MALI_OSK_ERR_OK)
+		return  ret;
+	_mali_osk_mutex_wait(session->memory_lock);
+	if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+		mali_mem_cow_mali_map(mem_backend, args->range_start, args->size);
+	}
+	_mali_osk_mutex_signal(session->memory_lock);
+
+	atomic_add(args->change_pages_nr, &session->mali_mem_allocated_pages);
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args)
+{
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called!\n"));
+	MALI_DEBUG_ASSERT(0 == args->psize %  MALI_MMU_PAGE_SIZE);
+
+	/* Get the memory backend that need to be resize. */
+	mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+
+	if (mem_backend == NULL)  {
+		MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n"));
+		return ret;
+	}
+
+	MALI_DEBUG_ASSERT(args->psize != mem_backend->size);
+
+	ret = mali_mem_resize(session, mem_backend, args->psize);
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args)
+{
+	args->memory_usage = _mali_ukk_report_memory_usage();
+	if (args->vaddr != 0) {
+		mali_mem_backend *mem_backend = NULL;
+		struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+		/* Get the backend that need to be modified. */
+		mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+		if (mem_backend == NULL) {
+			MALI_DEBUG_ASSERT_POINTER(mem_backend);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		if (mem_backend->type == MALI_MEM_COW)
+			args->change_pages_nr = mem_backend->cow_mem.change_pages_nr;
+	}
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r7p0/linux/mali_memory_manager.h b/utgard/r7p0/linux/mali_memory_manager.h
new file mode 100644
index 0000000..caccd9e
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_manager.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_MANAGER_H__
+#define __MALI_MEMORY_MANAGER_H__
+
+#include "mali_osk.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_allocation_manager {
+	rwlock_t vm_lock;
+	struct rb_root allocation_mgr_rb;
+	struct list_head head;
+	struct mutex list_mutex;
+	u32 mali_allocation_num;
+};
+
+extern struct idr mali_backend_idr;
+extern struct mutex mali_idr_mutex;
+
+int mali_memory_manager_init(struct mali_allocation_manager *mgr);
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr);
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc);
+
+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size);
+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address);
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args);
+
+
+#endif
+
diff --git a/utgard/r7p0/linux/mali_memory_os_alloc.c b/utgard/r7p0/linux/mali_memory_os_alloc.c
new file mode 100644
index 0000000..e789a03
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_os_alloc.c
@@ -0,0 +1,855 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+
+#include "mali_osk.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_kernel_linux.h"
+
+/* Minimum size of allocator page pool */
+#define MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB * 256)
+#define MALI_OS_MEMORY_POOL_TRIM_JIFFIES (10 * CONFIG_HZ) /* Default to 10s */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+static unsigned long mali_dma_attrs;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+/* Write combine dma_attrs */
+static DEFINE_DMA_ATTRS(dma_attrs_wc);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask);
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask);
+#endif
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc);
+#endif
+#endif
+static void mali_mem_os_trim_pool(struct work_struct *work);
+extern void show_mem(unsigned int flags);
+
+struct mali_mem_os_allocator mali_mem_os_allocator = {
+	.pool_lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+	.pool_pages = LIST_HEAD_INIT(mali_mem_os_allocator.pool_pages),
+	.pool_count = 0,
+
+	.allocated_pages = ATOMIC_INIT(0),
+	.allocation_limit = 0,
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	.shrinker.shrink = mali_mem_os_shrink,
+#else
+	.shrinker.count_objects = mali_mem_os_shrink_count,
+	.shrinker.scan_objects = mali_mem_os_shrink,
+#endif
+	.shrinker.seeks = DEFAULT_SEEKS,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool, TIMER_DEFERRABLE),
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
+	.timed_shrinker = __DEFERRED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#else
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#endif
+};
+
+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag)
+{
+	LIST_HEAD(pages);
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	if (cow_flag == MALI_TRUE) {
+		list_for_each_entry_safe(m_page, m_tmp, os_pages, list) {
+			/*only handle OS node here */
+			if (m_page->type == MALI_PAGE_NODE_OS) {
+				if (_mali_page_node_get_ref_count(m_page) == 1) {
+					list_move(&m_page->list, &pages);
+					atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+					free_pages_nr++;
+				} else {
+					_mali_page_node_unref(m_page);
+					m_page->page = NULL;
+					list_del(&m_page->list);
+					kfree(m_page);
+				}
+			}
+		}
+	} else {
+		list_cut_position(&pages, os_pages, os_pages->prev);
+		atomic_sub(pages_count, &mali_mem_os_allocator.allocated_pages);
+		free_pages_nr = pages_count;
+	}
+
+	/* Put pages on pool. */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_splice(&pages, &mali_mem_os_allocator.pool_pages);
+	mali_mem_os_allocator.pool_count += free_pages_nr;
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	if (mali_mem_os_allocator.pool_count > MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES) {
+		MALI_DEBUG_PRINT(5, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+	return free_pages_nr;
+}
+
+/**
+* put page without put it into page pool
+*/
+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page)
+{
+	MALI_DEBUG_ASSERT_POINTER(page);
+	if (page_count(page) == 1) {
+		atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+		dma_unmap_page(&mali_platform_device->dev, page_private(page),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		ClearPagePrivate(page);
+	}
+	put_page(page);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_from);
+	MALI_DEBUG_ASSERT_POINTER(mem_to);
+
+	if (mem_from->count < start_page + page_count) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	list_for_each_entry_safe(m_page, m_tmp, &mem_from->pages, list) {
+		if (i >= start_page && i < start_page + page_count) {
+			list_move_tail(&m_page->list, &mem_to->pages);
+			mem_from->count--;
+			mem_to->count++;
+		}
+		i++;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size)
+{
+	struct page *new_page;
+	LIST_HEAD(pages_list);
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	size_t remaining = page_count;
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&os_mem->pages);
+	os_mem->count = page_count;
+
+	/* Grab pages from pool. */
+	{
+		size_t pool_pages;
+
+		spin_lock(&mali_mem_os_allocator.pool_lock);
+		pool_pages = min(remaining, mali_mem_os_allocator.pool_count);
+		for (i = pool_pages; i > 0; i--) {
+			BUG_ON(list_empty(&mali_mem_os_allocator.pool_pages));
+			list_move(mali_mem_os_allocator.pool_pages.next, &pages_list);
+		}
+		mali_mem_os_allocator.pool_count -= pool_pages;
+		remaining -= pool_pages;
+		spin_unlock(&mali_mem_os_allocator.pool_lock);
+	}
+
+	/* Process pages from pool. */
+	i = 0;
+	list_for_each_entry_safe(m_page, m_tmp, &pages_list, list) {
+		BUG_ON(m_page == NULL);
+
+		list_move_tail(&m_page->list, &os_mem->pages);
+	}
+
+	/* Allocate new pages, if needed. */
+	for (i = 0; i < remaining; i++) {
+		dma_addr_t dma_addr;
+		gfp_t flags = __GFP_ZERO | __GFP_REPEAT | __GFP_NOWARN | __GFP_COLD;
+		int err;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+		flags |= GFP_HIGHUSER;
+#else
+#ifdef CONFIG_ZONE_DMA32
+		flags |= GFP_DMA32;
+#else
+#ifdef CONFIG_ZONE_DMA
+		flags |= GFP_DMA;
+#else
+		/* arm64 utgard only work on < 4G, but the kernel
+		 * didn't provide method to allocte memory < 4G
+		 */
+		MALI_DEBUG_ASSERT(0);
+#endif
+#endif
+#endif
+
+		new_page = alloc_page(flags);
+
+		if (new_page == NULL) {
+			MALI_PRINT_ERROR(("alloc_page() return NULL! try again!\n"));
+			new_page = alloc_page(flags | GFP_KERNEL);
+		}
+
+		if (unlikely(new_page == NULL)) {
+			/* Calculate the number of pages actually allocated, and free them. */
+			MALI_PRINT_ERROR(("alloc_page() return NULL at last\n"));
+#ifdef AML_MALI_DEBUG
+			show_mem(SHOW_MEM_FILTER_NODES);
+#endif
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -ENOMEM;
+		}
+
+		/* Ensure page is flushed from CPU caches. */
+		dma_addr = dma_map_page(&mali_platform_device->dev, new_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+		err = dma_mapping_error(&mali_platform_device->dev, dma_addr);
+		if (unlikely(err)) {
+			MALI_DEBUG_PRINT_ERROR(("OS Mem: Failed to DMA map page %p: %u",
+						new_page, err));
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -EFAULT;
+		}
+
+		/* Store page phys addr */
+		SetPagePrivate(new_page);
+		set_page_private(new_page, dma_addr);
+
+		m_page = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+		if (unlikely(m_page == NULL)) {
+#ifdef AML_MALI_DEBUG
+			show_mem(SHOW_MEM_FILTER_NODES);
+#endif
+			MALI_PRINT_ERROR(("OS Mem: Can't allocate mali_page node!\n"));
+			dma_unmap_page(&mali_platform_device->dev, page_private(new_page),
+				       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+			ClearPagePrivate(new_page);
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -EFAULT;
+		}
+		m_page->page = new_page;
+
+		list_add_tail(&m_page->list, &os_mem->pages);
+	}
+
+	atomic_add(page_count, &mali_mem_os_allocator.allocated_pages);
+
+	if (mali_mem_os_allocator.pool_count < MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Stopping pool trim timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	u32 virt;
+	u32 prop = props;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	MALI_DEBUG_ASSERT(start_page <= os_mem->count);
+	MALI_DEBUG_ASSERT((start_page + mapping_pgae_num) <= os_mem->count);
+
+	if ((start_page + mapping_pgae_num) == os_mem->count) {
+
+		virt = vaddr + MALI_MMU_PAGE_SIZE * (start_page + mapping_pgae_num);
+
+		list_for_each_entry_reverse(m_page, &os_mem->pages, list) {
+
+			virt -= MALI_MMU_PAGE_SIZE;
+			if (mapping_pgae_num > 0) {
+				dma_addr_t phys = page_private(m_page->page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+				/* Verify that the "physical" address is 32-bit and
+				* usable for Mali, when on a system with bus addresses
+				* wider than 32-bit. */
+				MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+				mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+			} else {
+				break;
+			}
+			mapping_pgae_num--;
+		}
+
+	} else {
+		u32 i = 0;
+
+		virt = vaddr;
+		list_for_each_entry(m_page, &os_mem->pages, list) {
+
+			if (i >= start_page) {
+				dma_addr_t phys = page_private(m_page->page);
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+				/* Verify that the "physical" address is 32-bit and
+				* usable for Mali, when on a system with bus addresses
+				* wider than 32-bit. */
+				MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+				mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+			}
+			i++;
+			virt += MALI_MMU_PAGE_SIZE;
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	mali_mem_os_mem *os_mem = &mem_bkend->os_mem;
+	struct mali_page_node *m_page;
+	struct page *page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+
+	list_for_each_entry(m_page, &os_mem->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		page = m_page->page;
+		ret = vm_insert_pfn(vma, addr, page_to_pfn(page));
+
+		if (unlikely(ret != 0)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size)
+{
+	mali_mem_os_mem *os_mem = &mem_bkend->os_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	int offset;
+	int mapping_page_num;
+	int count;
+
+	unsigned long vstart = vma->vm_start;
+
+	count = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+	MALI_DEBUG_ASSERT(0 == start_vaddr % _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE);
+	offset = (start_vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT(offset <= os_mem->count);
+	mapping_page_num = mappig_size / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT((offset + mapping_page_num) <= os_mem->count);
+
+	if ((offset + mapping_page_num) == os_mem->count) {
+
+		unsigned long vm_end = start_vaddr + mappig_size;
+
+		list_for_each_entry_reverse(m_page, &os_mem->pages, list) {
+
+			vm_end -= _MALI_OSK_MALI_PAGE_SIZE;
+			if (mapping_page_num > 0) {
+				ret = vm_insert_pfn(vma, vm_end, page_to_pfn(m_page->page));
+
+				if (unlikely(ret != 0)) {
+					/*will return -EBUSY If the page has already been mapped into table, but it's OK*/
+					if (-EBUSY == ret) {
+						break;
+					} else {
+						MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, offset is %d,page_count is %d\n",
+								     ret,  offset + mapping_page_num, os_mem->count));
+					}
+					return _MALI_OSK_ERR_FAULT;
+				}
+			} else {
+				break;
+			}
+			mapping_page_num--;
+
+		}
+	} else {
+
+		list_for_each_entry(m_page, &os_mem->pages, list) {
+			if (count >= offset) {
+
+				ret = vm_insert_pfn(vma, vstart, page_to_pfn(m_page->page));
+
+				if (unlikely(ret != 0)) {
+					/*will return -EBUSY If the page has already been mapped into table, but it's OK*/
+					if (-EBUSY == ret) {
+						break;
+					} else {
+						MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, count is %d, offset is %d,page_count is %d\n",
+								     ret, count, offset, os_mem->count));
+					}
+					return _MALI_OSK_ERR_FAULT;
+				}
+			}
+			count++;
+			vstart += _MALI_OSK_MALI_PAGE_SIZE;
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 mali_mem_os_release(mali_mem_backend *mem_bkend)
+{
+
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_os_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	/* Free pages */
+	if (MALI_MEM_BACKEND_FLAG_COWED & mem_bkend->flags) {
+		/* Lock to avoid the free race condition for the cow shared memory page node. */
+		_mali_osk_mutex_wait(session->cow_lock);
+		free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_TRUE);
+		_mali_osk_mutex_signal(session->cow_lock);
+	} else {
+		free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_FALSE);
+	}
+	mutex_unlock(&mem_bkend->mutex);
+
+	MALI_DEBUG_PRINT(4, ("OS Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->os_mem.count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+
+	mem_bkend->os_mem.count = 0;
+	return free_pages_nr;
+}
+
+
+#define MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE 128
+static struct {
+	struct {
+		mali_dma_addr phys;
+		mali_io_address mapping;
+	} page[MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE];
+	size_t count;
+	spinlock_t lock;
+} mali_mem_page_table_page_pool = {
+	.count = 0,
+	.lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+};
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_NOMEM;
+	dma_addr_t tmp_phys;
+
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (mali_mem_page_table_page_pool.count > 0) {
+		u32 i = --mali_mem_page_table_page_pool.count;
+		*phys = mali_mem_page_table_page_pool.page[i].phys;
+		*mapping = mali_mem_page_table_page_pool.page[i].mapping;
+
+		ret = _MALI_OSK_ERR_OK;
+	}
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	if (ret != _MALI_OSK_ERR_OK) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, &dma_attrs_wc);
+#else
+		*mapping = dma_alloc_writecombine(&mali_platform_device->dev,
+						  _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys, GFP_KERNEL);
+#endif
+		if (*mapping != NULL) {
+			ret = _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			/* Verify that the "physical" address is 32-bit and
+			 * usable for Mali, when on a system with bus addresses
+			 * wider than 32-bit. */
+			MALI_DEBUG_ASSERT(0 == (tmp_phys >> 32));
+#endif
+
+			*phys = (mali_dma_addr)tmp_phys;
+		}
+	}
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_PRINT_ERROR(("dma_alloc_attrs() return NULL\n"));
+#ifdef AML_MALI_DEBUG
+			show_mem(SHOW_MEM_FILTER_NODES);
+#endif
+		}
+
+	return ret;
+}
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt)
+{
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (mali_mem_page_table_page_pool.count < MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE) {
+		u32 i = mali_mem_page_table_page_pool.count;
+
+		mali_mem_page_table_page_pool.page[i].phys = phys;
+		mali_mem_page_table_page_pool.page[i].mapping = virt;
+
+		++mali_mem_page_table_page_pool.count;
+
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+	} else {
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			       _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			       mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			       _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			       &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE, virt, phys);
+#endif
+	}
+}
+
+void mali_mem_os_free_page_node(struct mali_page_node *m_page)
+{
+	struct page *page = m_page->page;
+
+	MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_OS);
+
+	if (page_count(page) == 1) {
+		dma_unmap_page(&mali_platform_device->dev, page_private(page),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		ClearPagePrivate(page);
+	}
+	__free_page(page);
+	m_page->page = NULL;
+	list_del(&m_page->list);
+	kfree(m_page);
+}
+
+/* The maximum number of page table pool pages to free in one go. */
+#define MALI_MEM_OS_CHUNK_TO_FREE 64UL
+
+/* Free a certain number of pages from the page table page pool.
+ * The pool lock must be held when calling the function, and the lock will be
+ * released before returning.
+ */
+static void mali_mem_os_page_table_pool_free(size_t nr_to_free)
+{
+	mali_dma_addr phys_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	void *virt_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	u32 i;
+
+	MALI_DEBUG_ASSERT(nr_to_free <= MALI_MEM_OS_CHUNK_TO_FREE);
+
+	/* Remove nr_to_free pages from the pool and store them locally on stack. */
+	for (i = 0; i < nr_to_free; i++) {
+		u32 pool_index = mali_mem_page_table_page_pool.count - i - 1;
+
+		phys_arr[i] = mali_mem_page_table_page_pool.page[pool_index].phys;
+		virt_arr[i] = mali_mem_page_table_page_pool.page[pool_index].mapping;
+	}
+
+	mali_mem_page_table_page_pool.count -= nr_to_free;
+
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	/* After releasing the spinlock: free the pages we removed from the pool. */
+	for (i = 0; i < nr_to_free; i++) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], mali_dma_attrs);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE,
+				      virt_arr[i], (dma_addr_t)phys_arr[i]);
+#endif
+	}
+}
+
+static void mali_mem_os_trim_page_table_page_pool(void)
+{
+	size_t nr_to_free = 0;
+	size_t nr_to_keep;
+
+	/* Keep 2 page table pages for each 1024 pages in the page cache. */
+	nr_to_keep = mali_mem_os_allocator.pool_count / 512;
+	/* And a minimum of eight pages, to accomodate new sessions. */
+	nr_to_keep += 8;
+
+	if (spin_trylock(&mali_mem_page_table_page_pool.lock) == 0) return;
+
+	if (nr_to_keep < mali_mem_page_table_page_pool.count) {
+		nr_to_free = mali_mem_page_table_page_pool.count - nr_to_keep;
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, nr_to_free);
+	}
+
+	/* Pool lock will be released by the callee. */
+	mali_mem_os_page_table_pool_free(nr_to_free);
+}
+
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	return mali_mem_os_allocator.pool_count;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask)
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask)
+#endif /* Linux < 2.6.35 */
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#endif /* Linux < 3.12.0 */
+#endif /* Linux < 3.0.0 */
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unsigned long flags;
+	struct list_head *le, pages;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+	int nr = nr_to_scan;
+#else
+	int nr = sc->nr_to_scan;
+#endif
+
+	if (nr == 0) {
+		return mali_mem_os_shrink_count(shrinker, sc);
+	}
+
+	if (spin_trylock_irqsave(&mali_mem_os_allocator.pool_lock, flags) == 0) {
+		/* Not able to lock. */
+		return -1;
+	}
+
+	if (mali_mem_os_allocator.pool_count == 0) {
+		/* No pages availble */
+		spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+		return 0;
+	}
+
+	/* Release from general page pool */
+	nr = min((size_t)nr, mali_mem_os_allocator.pool_count);
+	mali_mem_os_allocator.pool_count -= nr;
+	list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+		--nr;
+		if (nr == 0) break;
+	}
+	list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page_node(m_page);
+	}
+
+	if (mali_mem_os_allocator.pool_count < MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES) {
+		/* Pools are empty, stop timer */
+		MALI_DEBUG_PRINT(5, ("Stopping timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	return mali_mem_os_shrink_count(shrinker, sc);
+#else
+	return nr;
+#endif
+}
+
+static void mali_mem_os_trim_pool(struct work_struct *data)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	struct list_head *le;
+	LIST_HEAD(pages);
+	size_t nr_to_free;
+
+	MALI_IGNORE(data);
+
+	MALI_DEBUG_PRINT(3, ("OS Mem: Trimming pool %u\n", mali_mem_os_allocator.pool_count));
+
+	/* Release from general page pool */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	if (mali_mem_os_allocator.pool_count > MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES) {
+		size_t count = mali_mem_os_allocator.pool_count - MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES;
+		const size_t min_to_free = min(64, MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES);
+
+		/* Free half the pages on the pool above the static limit. Or 64 pages, 256KB. */
+		nr_to_free = max(count / 2, min_to_free);
+
+		mali_mem_os_allocator.pool_count -= nr_to_free;
+		list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+			--nr_to_free;
+			if (nr_to_free == 0) break;
+		}
+		list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	}
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page_node(m_page);
+	}
+
+	/* Release some pages from page table page pool */
+	mali_mem_os_trim_page_table_page_pool();
+
+	if (mali_mem_os_allocator.pool_count > MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+}
+
+_mali_osk_errcode_t mali_mem_os_init(void)
+{
+	mali_mem_os_allocator.wq = alloc_workqueue("mali-mem", WQ_UNBOUND, 1);
+	if (mali_mem_os_allocator.wq == NULL) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	mali_dma_attrs = DMA_ATTR_WRITE_COMBINE;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &dma_attrs_wc);
+#endif
+
+	register_shrinker(&mali_mem_os_allocator.shrinker);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_os_term(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+
+	unregister_shrinker(&mali_mem_os_allocator.shrinker);
+	cancel_delayed_work_sync(&mali_mem_os_allocator.timed_shrinker);
+
+	if (mali_mem_os_allocator.wq != NULL) {
+		destroy_workqueue(mali_mem_os_allocator.wq);
+		mali_mem_os_allocator.wq = NULL;
+	}
+
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_for_each_entry_safe(m_page, m_tmp, &mali_mem_os_allocator.pool_pages, list) {
+		mali_mem_os_free_page_node(m_page);
+
+		--mali_mem_os_allocator.pool_count;
+	}
+	BUG_ON(mali_mem_os_allocator.pool_count);
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	/* Release from page table page pool */
+	do {
+		u32 nr_to_free;
+
+		spin_lock(&mali_mem_page_table_page_pool.lock);
+
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, mali_mem_page_table_page_pool.count);
+
+		/* Pool lock will be released by the callee. */
+		mali_mem_os_page_table_pool_free(nr_to_free);
+	} while (mali_mem_page_table_page_pool.count != 0);
+}
+
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size)
+{
+	mali_mem_os_allocator.allocation_limit = size;
+
+	MALI_SUCCESS;
+}
+
+u32 mali_mem_os_stat(void)
+{
+	return atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/utgard/r7p0/linux/mali_memory_os_alloc.h b/utgard/r7p0/linux/mali_memory_os_alloc.h
new file mode 100644
index 0000000..58efa25
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_os_alloc.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_OS_ALLOC_H__
+#define __MALI_MEMORY_OS_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_memory_types.h"
+
+
+/** @brief Release Mali OS memory
+ *
+ * The session memory_lock must be held when calling this function.
+ *
+ * @param mem_bkend Pointer to the mali_mem_backend to release
+ */
+u32 mali_mem_os_release(mali_mem_backend *mem_bkend);
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping);
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt);
+
+_mali_osk_errcode_t mali_mem_os_init(void);
+
+void mali_mem_os_term(void);
+
+u32 mali_mem_os_stat(void);
+
+void mali_mem_os_free_page_node(struct mali_page_node *m_page);
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size);
+
+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag);
+
+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page);
+
+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count);
+
+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props);
+
+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+
+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size);
+
+#endif /* __MALI_MEMORY_OS_ALLOC_H__ */
diff --git a/utgard/r7p0/linux/mali_memory_secure.c b/utgard/r7p0/linux/mali_memory_secure.c
new file mode 100644
index 0000000..0ee7a5a
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_secure.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_memory.h"
+#include "mali_memory_secure.h"
+#include "mali_osk.h"
+#include <linux/mutex.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-buf.h>
+
+_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd)
+{
+	struct dma_buf *buf;
+
+	MALI_DEBUG_ASSERT_POINTER(secure_mem);
+
+	/* get dma buffer */
+	buf = dma_buf_get(mem_fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf!\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (size != buf->size) {
+		MALI_DEBUG_PRINT_ERROR(("The secure mem size not match to the dma buf size!\n"));
+		goto failed_alloc_mem;
+	}
+
+	secure_mem->buf =  buf;
+	secure_mem->attachment = dma_buf_attach(secure_mem->buf, &mali_platform_device->dev);
+	if (secure_mem->attachment == NULL) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf attachment!\n"));
+		goto failed_dma_attach;
+	}
+
+	secure_mem->sgt = dma_buf_map_attachment(secure_mem->attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(secure_mem->sgt)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to map dma buf attachment\n"));
+		goto  failed_dma_map;
+	}
+
+	secure_mem->count = size / MALI_MMU_PAGE_SIZE;
+
+	return _MALI_OSK_ERR_OK;
+
+failed_dma_map:
+	dma_buf_detach(secure_mem->buf, secure_mem->attachment);
+failed_dma_attach:
+failed_alloc_mem:
+	dma_buf_put(buf);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir;
+	struct scatterlist *sg;
+	u32 virt = vaddr;
+	u32 prop = props;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(secure_mem);
+	MALI_DEBUG_ASSERT_POINTER(secure_mem->sgt);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	pagedir = session->page_directory;
+
+	for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) {
+		u32 size = sg_dma_len(sg);
+		dma_addr_t phys = sg_dma_address(sg);
+
+		/* sg must be page aligned. */
+		MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE);
+		MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF));
+
+		mali_mmu_pagedir_update(pagedir, virt, phys, size, prop);
+
+		MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x gpu virtual address: 0x%x!\n", phys, virt));
+		virt += size;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+
+	int ret = 0;
+	struct scatterlist *sg;
+	mali_mem_secure *secure_mem = &mem_bkend->secure_mem;
+	unsigned long addr = vma->vm_start;
+	int i;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE);
+
+	for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) {
+		phys_addr_t phys;
+		dma_addr_t dev_addr;
+		u32 size, j;
+
+		dev_addr = sg_dma_address(sg);
+#if defined(CONFIG_ARM64) || LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+		phys =  dma_to_phys(&mali_platform_device->dev, dev_addr);
+#else
+		phys = page_to_phys(pfn_to_page(dma_to_pfn(&mali_platform_device->dev, dev_addr)));
+#endif
+		size = sg_dma_len(sg);
+		MALI_DEBUG_ASSERT(0 == size % _MALI_OSK_MALI_PAGE_SIZE);
+
+		for (j = 0; j < size / _MALI_OSK_MALI_PAGE_SIZE; j++) {
+			ret = vm_insert_pfn(vma, addr, PFN_DOWN(phys));
+
+			if (unlikely(ret != 0)) {
+				return -EFAULT;
+			}
+			addr += _MALI_OSK_MALI_PAGE_SIZE;
+			phys += _MALI_OSK_MALI_PAGE_SIZE;
+
+			MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x , cpu virtual address: 0x%x!\n", phys, addr));
+		}
+	}
+	return ret;
+}
+
+u32 mali_mem_secure_release(mali_mem_backend *mem_bkend)
+{
+	struct mali_mem_secure *mem;
+	mali_mem_allocation *alloc = mem_bkend->mali_allocation;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE);
+
+	mem = &mem_bkend->secure_mem;
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_ASSERT_POINTER(mem->sgt);
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_secure_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL);
+	dma_buf_detach(mem->buf, mem->attachment);
+	dma_buf_put(mem->buf);
+	mutex_unlock(&mem_bkend->mutex);
+
+	free_pages_nr = mem->count;
+
+	return free_pages_nr;
+}
+
+
diff --git a/utgard/r7p0/linux/mali_memory_secure.h b/utgard/r7p0/linux/mali_memory_secure.h
new file mode 100644
index 0000000..41b4e9c
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_secure.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_SECURE_H__
+#define __MALI_MEMORY_SECURE_H__
+
+#include "mali_session.h"
+#include "mali_memory.h"
+#include <linux/spinlock.h>
+
+#include "mali_memory_types.h"
+
+_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd);
+
+_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+
+void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+
+u32 mali_mem_secure_release(mali_mem_backend *mem_bkend);
+
+#endif /* __MALI_MEMORY_SECURE_H__ */
diff --git a/utgard/r7p0/linux/mali_memory_swap_alloc.c b/utgard/r7p0/linux/mali_memory_swap_alloc.c
new file mode 100644
index 0000000..7f5be00
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_swap_alloc.c
@@ -0,0 +1,950 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/idr.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+#include <linux/shmem_fs.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_memory.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_cow.h"
+#include "mali_ukk.h"
+#include "mali_kernel_utilization.h"
+#include "mali_memory_swap_alloc.h"
+
+
+static struct _mali_osk_bitmap idx_mgr;
+static struct file *global_swap_file;
+static struct address_space *global_swap_space;
+static _mali_osk_wq_work_t *mali_mem_swap_out_workq;
+static u32 mem_backend_swapped_pool_size;
+#ifdef MALI_MEM_SWAP_TRACKING
+static u32 mem_backend_swapped_unlock_size;
+#endif
+/* Lock order: mem_backend_swapped_pool_lock  > each memory backend's mutex lock.
+ * This lock used to protect mem_backend_swapped_pool_size and mem_backend_swapped_pool. */
+static struct mutex mem_backend_swapped_pool_lock;
+static struct list_head mem_backend_swapped_pool;
+
+extern struct mali_mem_os_allocator mali_mem_os_allocator;
+
+#define MALI_SWAP_LOW_MEM_DEFAULT_VALUE (60*1024*1024)
+#define MALI_SWAP_INVALIDATE_MALI_ADDRESS (0)               /* Used to mark the given memory cookie is invalidate. */
+#define MALI_SWAP_GLOBAL_SWAP_FILE_SIZE (0xFFFFFFFF)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_SHIFT)
+#else
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_CACHE_SHIFT)
+#endif
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE (1 << 15) /* Reserved for CoW nonlinear swap backend memory, the space size is 128MB. */
+
+unsigned int mali_mem_swap_out_threshold_value = MALI_SWAP_LOW_MEM_DEFAULT_VALUE;
+
+/**
+ * We have two situations to do shrinking things, one is we met low GPU utilization which shows GPU needn't touch too
+ * swappable backends in short time, and the other one is we add new swappable backends, the total pool size exceed
+ * the threshold value of the swapped pool size.
+ */
+typedef enum {
+	MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION = 100,
+	MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS = 257,
+} _mali_mem_swap_pool_shrink_type_t;
+
+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg);
+
+_mali_osk_errcode_t mali_mem_swap_init(void)
+{
+	gfp_t flags = __GFP_NORETRY | __GFP_NOWARN;
+
+	if (_mali_osk_bitmap_init(&idx_mgr, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE) != _MALI_OSK_ERR_OK) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	global_swap_file = shmem_file_setup("mali_swap", MALI_SWAP_GLOBAL_SWAP_FILE_SIZE, VM_NORESERVE);
+	if (IS_ERR(global_swap_file)) {
+		_mali_osk_bitmap_term(&idx_mgr);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	global_swap_space = global_swap_file->f_path.dentry->d_inode->i_mapping;
+
+	mali_mem_swap_out_workq = _mali_osk_wq_create_work(mali_mem_swap_swapped_bkend_pool_check_for_low_utilization, NULL);
+	if (mali_mem_swap_out_workq == NULL) {
+		_mali_osk_bitmap_term(&idx_mgr);
+		fput(global_swap_file);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+	flags |= GFP_HIGHUSER;
+#else
+#ifdef CONFIG_ZONE_DMA32
+	flags |= GFP_DMA32;
+#else
+#ifdef CONFIG_ZONE_DMA
+	flags |= GFP_DMA;
+#else
+	/* arm64 utgard only work on < 4G, but the kernel
+	 * didn't provide method to allocte memory < 4G
+	 */
+	MALI_DEBUG_ASSERT(0);
+#endif
+#endif
+#endif
+
+	/* When we use shmem_read_mapping_page to allocate/swap-in, it will
+	 * use these flags to allocate new page if need.*/
+	mapping_set_gfp_mask(global_swap_space, flags);
+
+	mem_backend_swapped_pool_size = 0;
+#ifdef MALI_MEM_SWAP_TRACKING
+	mem_backend_swapped_unlock_size = 0;
+#endif
+	mutex_init(&mem_backend_swapped_pool_lock);
+	INIT_LIST_HEAD(&mem_backend_swapped_pool);
+
+	MALI_DEBUG_PRINT(2, ("Mali SWAP: Swap out threshold vaule is %uM\n", mali_mem_swap_out_threshold_value >> 20));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_swap_term(void)
+{
+	_mali_osk_bitmap_term(&idx_mgr);
+
+	fput(global_swap_file);
+
+	_mali_osk_wq_delete_work(mali_mem_swap_out_workq);
+
+	MALI_DEBUG_ASSERT(list_empty(&mem_backend_swapped_pool));
+	MALI_DEBUG_ASSERT(mem_backend_swapped_pool_size == 0);
+
+	return;
+}
+
+struct file *mali_mem_swap_get_global_swap_file(void)
+{
+	return  global_swap_file;
+}
+
+/* Judge if swappable backend in swapped pool. */
+static mali_bool mali_memory_swap_backend_in_swapped_pool(mali_mem_backend *mem_bkend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+
+	return !list_empty(&mem_bkend->list);
+}
+
+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+
+	mutex_lock(&mem_backend_swapped_pool_lock);
+	mutex_lock(&mem_bkend->mutex);
+
+	if (mali_memory_swap_backend_in_swapped_pool(mem_bkend) == MALI_FALSE) {
+		mutex_unlock(&mem_bkend->mutex);
+		mutex_unlock(&mem_backend_swapped_pool_lock);
+		return;
+	}
+
+	MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list));
+
+	list_del_init(&mem_bkend->list);
+
+	mutex_unlock(&mem_bkend->mutex);
+
+	mem_backend_swapped_pool_size -= mem_bkend->size;
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+}
+
+static void mali_mem_swap_out_page_node(mali_page_node *page_node)
+{
+	MALI_DEBUG_ASSERT(page_node);
+
+	dma_unmap_page(&mali_platform_device->dev, page_node->swap_it->dma_addr,
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+	set_page_dirty(page_node->swap_it->page);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
+	put_page(page_node->swap_it->page);
+#else
+	page_cache_release(page_node->swap_it->page);
+#endif
+}
+
+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend)
+{
+	mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(mutex_is_locked(&mem_bkend->mutex) == 1);
+
+	if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN)) {
+		return;
+	}
+
+	mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN;
+
+	list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+		mali_mem_swap_out_page_node(m_page);
+	}
+
+	return;
+}
+
+static void mali_mem_swap_unlock_partial_locked_mem_backend(mali_mem_backend *mem_bkend, mali_page_node *page_node)
+{
+	mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(mutex_is_locked(&mem_bkend->mutex) == 1);
+
+	list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+		if (m_page == page_node) {
+			break;
+		}
+		mali_mem_swap_out_page_node(m_page);
+	}
+}
+
+static void mali_mem_swap_swapped_bkend_pool_shrink(_mali_mem_swap_pool_shrink_type_t shrink_type)
+{
+	mali_mem_backend *bkend, *tmp_bkend;
+	long system_free_size;
+	u32 last_gpu_utilization, gpu_utilization_threshold_value, temp_swap_out_threshold_value;
+
+	MALI_DEBUG_ASSERT(mutex_is_locked(&mem_backend_swapped_pool_lock) == 1);
+
+	if (shrink_type == MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION) {
+		/**
+		 * When we met that system memory is very low and Mali locked swappable memory size is less than
+		 * threshold value, and at the same time, GPU load is very low and don't need high performance,
+		 * at this condition, we can unlock more swap memory backend from swapped backends pool.
+		 */
+		gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION;
+		temp_swap_out_threshold_value = (mali_mem_swap_out_threshold_value >> 2);
+	} else {
+		/* When we add swappable memory backends to swapped pool, we need to think that we couldn't
+		* hold too much swappable backends in Mali driver, and also we need considering performance.
+		* So there is a balance for swapping out memory backend, we should follow the following conditions:
+		* 1. Total memory size in global mem backend swapped pool is more than the defined threshold value.
+		* 2. System level free memory size is less than the defined threshold value.
+		* 3. Please note that GPU utilization problem isn't considered in this condition.
+		*/
+		gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS;
+		temp_swap_out_threshold_value = mali_mem_swap_out_threshold_value;
+	}
+
+	/* Get system free pages number. */
+	system_free_size = global_page_state(NR_FREE_PAGES) * PAGE_SIZE;
+	last_gpu_utilization = _mali_ukk_utilization_gp_pp();
+
+	if ((last_gpu_utilization < gpu_utilization_threshold_value)
+	    && (system_free_size < mali_mem_swap_out_threshold_value)
+	    && (mem_backend_swapped_pool_size > temp_swap_out_threshold_value)) {
+		list_for_each_entry_safe(bkend, tmp_bkend, &mem_backend_swapped_pool, list) {
+			if (mem_backend_swapped_pool_size <= temp_swap_out_threshold_value) {
+				break;
+			}
+
+			mutex_lock(&bkend->mutex);
+
+			/* check if backend is in use. */
+			if (bkend->using_count > 0) {
+				mutex_unlock(&bkend->mutex);
+				continue;
+			}
+
+			mali_mem_swap_unlock_single_mem_backend(bkend);
+			list_del_init(&bkend->list);
+			mem_backend_swapped_pool_size -= bkend->size;
+#ifdef MALI_MEM_SWAP_TRACKING
+			mem_backend_swapped_unlock_size += bkend->size;
+#endif
+			mutex_unlock(&bkend->mutex);
+		}
+	}
+
+	return;
+}
+
+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	mutex_lock(&mem_backend_swapped_pool_lock);
+
+	mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION);
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+}
+
+/**
+ * After PP job finished, we add all of swappable memory backend used by this PP
+ * job to the tail of the global swapped pool, and if the total size of swappable memory is more than threshold
+ * value, we also need to shrink the swapped pool start from the head of the list.
+ */
+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend)
+{
+	mutex_lock(&mem_backend_swapped_pool_lock);
+	mutex_lock(&mem_bkend->mutex);
+
+	if (mali_memory_swap_backend_in_swapped_pool(mem_bkend)) {
+		MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list));
+
+		list_del_init(&mem_bkend->list);
+		list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool);
+		mutex_unlock(&mem_bkend->mutex);
+		mutex_unlock(&mem_backend_swapped_pool_lock);
+		return;
+	}
+
+	list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool);
+
+	mutex_unlock(&mem_bkend->mutex);
+	mem_backend_swapped_pool_size += mem_bkend->size;
+
+	mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS);
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+	return;
+}
+
+
+u32 mali_mem_swap_idx_alloc(void)
+{
+	return _mali_osk_bitmap_alloc(&idx_mgr);
+}
+
+void mali_mem_swap_idx_free(u32 idx)
+{
+	_mali_osk_bitmap_free(&idx_mgr, idx);
+}
+
+static u32 mali_mem_swap_idx_range_alloc(u32 count)
+{
+	u32 index;
+
+	index = _mali_osk_bitmap_alloc_range(&idx_mgr, count);
+
+	return index;
+}
+
+static void mali_mem_swap_idx_range_free(u32 idx, int num)
+{
+	_mali_osk_bitmap_free_range(&idx_mgr, idx, num);
+}
+
+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void)
+{
+	mali_swap_item *swap_item;
+
+	swap_item = kzalloc(sizeof(mali_swap_item), GFP_KERNEL);
+
+	if (swap_item == NULL) {
+		return NULL;
+	}
+
+	atomic_set(&swap_item->ref_count, 1);
+	swap_item->page = NULL;
+	atomic_add(1, &mali_mem_os_allocator.allocated_pages);
+
+	return swap_item;
+}
+
+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item)
+{
+	struct inode *file_node;
+	long long start, end;
+
+	/* If this swap item is shared, we just reduce the reference counter. */
+	if (atomic_dec_return(&swap_item->ref_count) == 0) {
+		file_node = global_swap_file->f_path.dentry->d_inode;
+		start = swap_item->idx;
+		start = start << 12;
+		end = start + PAGE_SIZE;
+
+		shmem_truncate_range(file_node, start, (end - 1));
+
+		mali_mem_swap_idx_free(swap_item->idx);
+
+		atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+
+		kfree(swap_item);
+	}
+}
+
+/* Used to allocate new swap item for new memory allocation and cow page for write. */
+struct mali_page_node *_mali_mem_swap_page_node_allocate(void)
+{
+	struct mali_page_node *m_page;
+
+	m_page = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP);
+
+	if (m_page == NULL) {
+		return NULL;
+	}
+
+	m_page->swap_it = mali_mem_swap_alloc_swap_item();
+
+	if (m_page->swap_it == NULL) {
+		kfree(m_page);
+		return NULL;
+	}
+
+	return m_page;
+}
+
+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page)
+{
+
+	mali_mem_swap_free_swap_item(m_page->swap_it);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page)
+{
+	_mali_mem_swap_put_page_node(m_page);
+
+	kfree(m_page);
+
+	return;
+}
+
+u32 mali_mem_swap_free(mali_mem_swap *swap_mem)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(swap_mem);
+
+	list_for_each_entry_safe(m_page, m_tmp, &swap_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP);
+
+		/* free the page node and release the swap item, if the ref count is 1,
+		 * then need also free the swap item. */
+		list_del(&m_page->list);
+		if (_mali_page_node_get_ref_count(m_page) == 1) {
+			free_pages_nr++;
+		}
+
+		_mali_mem_swap_page_node_free(m_page);
+	}
+
+	return free_pages_nr;
+}
+
+static u32 mali_mem_swap_cow_free(mali_mem_cow *cow_mem)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(cow_mem);
+
+	list_for_each_entry_safe(m_page, m_tmp, &cow_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP);
+
+		/* free the page node and release the swap item, if the ref count is 1,
+		 * then need also free the swap item. */
+		list_del(&m_page->list);
+		if (_mali_page_node_get_ref_count(m_page) == 1) {
+			free_pages_nr++;
+		}
+
+		_mali_mem_swap_page_node_free(m_page);
+	}
+
+	return free_pages_nr;
+}
+
+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped)
+{
+	mali_mem_allocation *alloc;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	if (is_mali_mapped) {
+		mali_mem_swap_mali_unmap(alloc);
+	}
+
+	mali_memory_swap_list_backend_delete(mem_bkend);
+
+	mutex_lock(&mem_bkend->mutex);
+	/* To make sure the given memory backend was unlocked from Mali side,
+	 * and then free this memory block. */
+	mali_mem_swap_unlock_single_mem_backend(mem_bkend);
+	mutex_unlock(&mem_bkend->mutex);
+
+	if (mem_bkend->type == MALI_MEM_SWAP) {
+		free_pages_nr = mali_mem_swap_free(&mem_bkend->swap_mem);
+	} else {
+		free_pages_nr = mali_mem_swap_cow_free(&mem_bkend->cow_mem);
+	}
+
+	return free_pages_nr;
+}
+
+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node)
+{
+	MALI_DEBUG_ASSERT(page_node != NULL);
+
+	page_node->swap_it->page = shmem_read_mapping_page(global_swap_space, page_node->swap_it->idx);
+
+	if (IS_ERR(page_node->swap_it->page)) {
+		MALI_DEBUG_PRINT_ERROR(("SWAP Mem: failed to swap in page with index: %d.\n", page_node->swap_it->idx));
+		return MALI_FALSE;
+	}
+
+	/* Ensure page is flushed from CPU caches. */
+	page_node->swap_it->dma_addr = dma_map_page(&mali_platform_device->dev, page_node->swap_it->page,
+				       0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	return MALI_TRUE;
+}
+
+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx)
+{
+	size_t page_count = PAGE_ALIGN(size) / PAGE_SIZE;
+	struct mali_page_node *m_page;
+	long system_free_size;
+	u32 i, index;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT(swap_mem != NULL);
+	MALI_DEBUG_ASSERT(bkend_idx != NULL);
+	MALI_DEBUG_ASSERT(page_count <= MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	INIT_LIST_HEAD(&swap_mem->pages);
+	swap_mem->count = page_count;
+	index = mali_mem_swap_idx_range_alloc(page_count);
+
+	if (index == _MALI_OSK_BITMAP_INVALIDATE_INDEX) {
+		MALI_PRINT_ERROR(("Mali Swap: Failed to allocate continuous index for swappable Mali memory."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	for (i = 0; i < page_count; i++) {
+		m_page = _mali_mem_swap_page_node_allocate();
+
+		if (m_page == NULL) {
+			MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Failed to allocate mali page node."));
+			swap_mem->count = i;
+
+			mali_mem_swap_free(swap_mem);
+			mali_mem_swap_idx_range_free(index + i, page_count - i);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		m_page->swap_it->idx = index + i;
+
+		ret = mali_mem_swap_in_page_node(m_page);
+
+		if (ret == MALI_FALSE) {
+			MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Allocate new page from SHMEM file failed."));
+			_mali_mem_swap_page_node_free(m_page);
+			mali_mem_swap_idx_range_free(index + i + 1, page_count - i - 1);
+
+			swap_mem->count = i;
+			mali_mem_swap_free(swap_mem);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+
+		list_add_tail(&m_page->list, &swap_mem->pages);
+	}
+
+	system_free_size = global_page_state(NR_FREE_PAGES) * PAGE_SIZE;
+
+	if ((system_free_size < mali_mem_swap_out_threshold_value)
+	    && (mem_backend_swapped_pool_size > (mali_mem_swap_out_threshold_value >> 2))
+	    && mali_utilization_enabled()) {
+		_mali_osk_wq_schedule_work(mali_mem_swap_out_workq);
+	}
+
+	*bkend_idx = index;
+	return 0;
+}
+
+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+/* Insert these pages from shmem to mali page table*/
+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &swap_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->swap_it->page != NULL);
+		phys = m_page->swap_it->dma_addr;
+
+		mali_mmu_pagedir_update(pagedir, virt, phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_in_pages(struct mali_pp_job *job)
+{
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	struct mali_page_node *m_page;
+	mali_bool swap_in_success = MALI_TRUE;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		if (mali_vma_node == NULL) {
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			swap_in_success = MALI_FALSE;
+			MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr));
+			continue;
+		}
+
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(mali_alloc != NULL);
+
+		if (MALI_MEM_SWAP != mali_alloc->type &&
+		    MALI_MEM_COW != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on GPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(mem_bkend != NULL);
+
+		/* We neednot hold backend's lock here, race safe.*/
+		if ((mem_bkend->type == MALI_MEM_COW) &&
+		    (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+			continue;
+		}
+
+		mutex_lock(&mem_bkend->mutex);
+
+		/* When swap_in_success is MALI_FALSE, it means this job has memory backend that could not be swapped in,
+		 * and it will be aborted in mali scheduler, so here, we just mark those memory cookies which
+		 * should not be swapped out when delete job to invalide */
+		if (swap_in_success == MALI_FALSE) {
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+		/* Before swap in, checking if this memory backend has been swapped in by the latest flushed jobs. */
+		++mem_bkend->using_count;
+
+		if (mem_bkend->using_count > 1) {
+			MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags));
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+		if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)) {
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+
+		list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+			if (mali_mem_swap_in_page_node(m_page) == MALI_FALSE) {
+				/* Don't have enough memory to swap in page, so release pages have already been swapped
+				 * in and then mark this pp job to be fail. */
+				mali_mem_swap_unlock_partial_locked_mem_backend(mem_bkend, m_page);
+				swap_in_success = MALI_FALSE;
+				break;
+			}
+		}
+
+		if (swap_in_success) {
+#ifdef MALI_MEM_SWAP_TRACKING
+			mem_backend_swapped_unlock_size -= mem_bkend->size;
+#endif
+			_mali_osk_mutex_wait(session->memory_lock);
+			mali_mem_swap_mali_map(&mem_bkend->swap_mem, session, mali_alloc->mali_mapping.addr, mali_alloc->mali_mapping.properties);
+			_mali_osk_mutex_signal(session->memory_lock);
+
+			/* Remove the unlock flag from mem backend flags, mark this backend has been swapped in. */
+			mem_bkend->flags &= ~(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN);
+			mutex_unlock(&mem_bkend->mutex);
+		} else {
+			--mem_bkend->using_count;
+			/* Marking that this backend is not swapped in, need not to be processed anymore. */
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			mutex_unlock(&mem_bkend->mutex);
+		}
+	}
+
+	job->swap_status = swap_in_success ? MALI_SWAP_IN_SUCC : MALI_SWAP_IN_FAIL;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_out_pages(struct mali_pp_job *job)
+{
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		if (mali_addr == MALI_SWAP_INVALIDATE_MALI_ADDRESS) {
+			continue;
+		}
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+
+		if (mali_vma_node == NULL) {
+			MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr));
+			continue;
+		}
+
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(mali_alloc != NULL);
+
+		if (MALI_MEM_SWAP != mali_alloc->type &&
+		    MALI_MEM_COW != mali_alloc->type) {
+			continue;
+		}
+
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(mem_bkend != NULL);
+
+		/* We neednot hold backend's lock here, race safe.*/
+		if ((mem_bkend->type == MALI_MEM_COW) &&
+		    (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+			continue;
+		}
+
+		mutex_lock(&mem_bkend->mutex);
+
+		MALI_DEBUG_ASSERT(mem_bkend->using_count > 0);
+
+		/* Reducing the using_count of mem backend means less pp job are using this memory backend,
+		 * if this count get to zero, it means no pp job is using it now, could put it to swap out list. */
+		--mem_bkend->using_count;
+
+		if (mem_bkend->using_count > 0) {
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+		mutex_unlock(&mem_bkend->mutex);
+
+		mali_memory_swap_list_backend_add(mem_bkend);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep)
+{
+	struct mali_page_node *m_page, *found_node = NULL;
+	struct page *found_page;
+	mali_mem_swap *swap = NULL;
+	mali_mem_cow *cow = NULL;
+	dma_addr_t dma_addr;
+	u32 i = 0;
+
+	if (mem_bkend->type == MALI_MEM_SWAP) {
+		swap = &mem_bkend->swap_mem;
+		list_for_each_entry(m_page, &swap->pages, list) {
+			if (i == offset) {
+				found_node = m_page;
+				break;
+			}
+			i++;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+		MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags));
+
+		cow = &mem_bkend->cow_mem;
+		list_for_each_entry(m_page, &cow->pages, list) {
+			if (i == offset) {
+				found_node = m_page;
+				break;
+			}
+			i++;
+		}
+	}
+
+	if (found_node == NULL) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	found_page = shmem_read_mapping_page(global_swap_space, found_node->swap_it->idx);
+
+	if (!IS_ERR(found_page)) {
+		lock_page(found_page);
+		dma_addr = dma_map_page(&mali_platform_device->dev, found_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		dma_unmap_page(&mali_platform_device->dev, dma_addr,
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+		*pagep = found_page;
+	} else {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep)
+{
+	struct mali_page_node *m_page, *found_node = NULL, *new_node = NULL;
+	mali_mem_cow *cow = NULL;
+	u32 i = 0;
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+	MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED));
+	MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags));
+	MALI_DEBUG_ASSERT(!mali_memory_swap_backend_in_swapped_pool(mem_bkend));
+
+	cow = &mem_bkend->cow_mem;
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if (i == offset) {
+			found_node = m_page;
+			break;
+		}
+		i++;
+	}
+
+	if (found_node == NULL) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	new_node = _mali_mem_swap_page_node_allocate();
+
+	if (new_node == NULL) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	new_node->swap_it->idx = mali_mem_swap_idx_alloc();
+
+	if (new_node->swap_it->idx == _MALI_OSK_BITMAP_INVALIDATE_INDEX) {
+		MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW on demand.\n"));
+		kfree(new_node->swap_it);
+		kfree(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (mali_mem_swap_in_page_node(new_node) == MALI_FALSE) {
+		_mali_mem_swap_page_node_free(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* swap in found node for copy in kernel. */
+	if (mali_mem_swap_in_page_node(found_node) == MALI_FALSE) {
+		mali_mem_swap_out_page_node(new_node);
+		_mali_mem_swap_page_node_free(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_mem_cow_copy_page(found_node, new_node);
+
+	list_replace(&found_node->list, &new_node->list);
+
+	if (_mali_page_node_get_ref_count(found_node) != 1) {
+		atomic_add(1, &mem_bkend->mali_allocation->session->mali_mem_allocated_pages);
+		if (atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > mem_bkend->mali_allocation->session->max_mali_mem_allocated_size) {
+			mem_bkend->mali_allocation->session->max_mali_mem_allocated_size = atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		mem_bkend->cow_mem.change_pages_nr++;
+	}
+
+	mali_mem_swap_out_page_node(found_node);
+	_mali_mem_swap_page_node_free(found_node);
+
+	/* When swap in the new page node, we have called dma_map_page for this page.\n */
+	dma_unmap_page(&mali_platform_device->dev, new_node->swap_it->dma_addr,
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	lock_page(new_node->swap_it->page);
+
+	*pagep = new_node->swap_it->page;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+#ifdef MALI_MEM_SWAP_TRACKING
+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size)
+{
+	*swap_pool_size = mem_backend_swapped_pool_size;
+	*unlock_size =  mem_backend_swapped_unlock_size;
+}
+#endif
diff --git a/utgard/r7p0/linux/mali_memory_swap_alloc.h b/utgard/r7p0/linux/mali_memory_swap_alloc.h
new file mode 100644
index 0000000..8d27ca4
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_swap_alloc.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_SWAP_ALLOC_H__
+#define __MALI_MEMORY_SWAP_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include "mali_memory_types.h"
+#include "mali_pp_job.h"
+
+/**
+ * Initialize memory swapping module.
+ */
+_mali_osk_errcode_t mali_mem_swap_init(void);
+
+void mali_mem_swap_term(void);
+
+/**
+ * Return global share memory file to other modules.
+ */
+struct file *mali_mem_swap_get_global_swap_file(void);
+
+/**
+ * Unlock the given memory backend and pages in it could be swapped out by kernel.
+ */
+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend);
+
+/**
+ * Remove the given memory backend from global swap list.
+ */
+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend);
+
+/**
+ * Add the given memory backend to global swap list.
+ */
+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend);
+
+/**
+ * Allocate 1 index from bitmap used as page index in global swap file.
+ */
+u32 mali_mem_swap_idx_alloc(void);
+
+void mali_mem_swap_idx_free(u32 idx);
+
+/**
+ * Allocate a new swap item without page index.
+ */
+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void);
+
+/**
+ * Free a swap item, truncate the corresponding space in page cache and free index of page.
+ */
+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item);
+
+/**
+ * Allocate a page node with swap item.
+ */
+struct mali_page_node *_mali_mem_swap_page_node_allocate(void);
+
+/**
+ * Reduce the reference count of given page node and if return 0, just free this page node.
+ */
+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page);
+
+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page);
+
+/**
+ * Free a swappable memory backend.
+ */
+u32 mali_mem_swap_free(mali_mem_swap *swap_mem);
+
+/**
+ * Ummap and free.
+ */
+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped);
+
+/**
+ * Read in a page from global swap file with the pre-allcated page index.
+ */
+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node);
+
+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx);
+
+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+
+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc);
+
+/**
+ * When pp job created, we need swap in all of memory backend needed by this pp job.
+ */
+int mali_mem_swap_in_pages(struct mali_pp_job *job);
+
+/**
+ * Put all of memory backends used this pp job to the global swap list.
+ */
+int mali_mem_swap_out_pages(struct mali_pp_job *job);
+
+/**
+ * This will be called in page fault to process CPU read&write.
+ */
+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep);
+
+/**
+ * Used to process cow on demand for swappable memory backend.
+ */
+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep);
+
+#ifdef MALI_MEM_SWAP_TRACKING
+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size);
+#endif
+#endif /* __MALI_MEMORY_SWAP_ALLOC_H__ */
+
diff --git a/utgard/r7p0/linux/mali_memory_types.h b/utgard/r7p0/linux/mali_memory_types.h
new file mode 100644
index 0000000..ec565c6
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_types.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_TYPES_H__
+#define __MALI_MEMORY_TYPES_H__
+
+#include <linux/mm.h>
+
+#if defined(CONFIG_MALI400_UMP)
+#include "ump_kernel_interface.h"
+#endif
+
+typedef u32 mali_address_t;
+
+typedef enum mali_mem_type {
+	MALI_MEM_OS,
+	MALI_MEM_EXTERNAL,
+	MALI_MEM_SWAP,
+	MALI_MEM_DMA_BUF,
+	MALI_MEM_UMP,
+	MALI_MEM_BLOCK,
+	MALI_MEM_COW,
+	MALI_MEM_SECURE,
+	MALI_MEM_TYPE_MAX,
+} mali_mem_type;
+
+typedef struct mali_block_item {
+	/* for block type, the block_phy is alway page size align
+	* so use low 12bit used for ref_cout.
+	*/
+	unsigned long phy_addr;
+} mali_block_item;
+
+/**
+ * idx is used to locate the given page in the address space of swap file.
+ * ref_count is used to mark how many memory backends are using this item.
+ */
+typedef struct mali_swap_item {
+	u32 idx;
+	atomic_t ref_count;
+	struct page *page;
+	dma_addr_t dma_addr;
+} mali_swap_item;
+
+typedef enum mali_page_node_type {
+	MALI_PAGE_NODE_OS,
+	MALI_PAGE_NODE_BLOCK,
+	MALI_PAGE_NODE_SWAP,
+} mali_page_node_type;
+
+typedef struct mali_page_node {
+	struct list_head list;
+	union {
+		struct page *page;
+		mali_block_item *blk_it; /*pointer to block item*/
+		mali_swap_item *swap_it;
+	};
+
+	u32 type;
+} mali_page_node;
+
+typedef struct mali_mem_os_mem {
+	struct list_head pages;
+	u32 count;
+} mali_mem_os_mem;
+
+typedef struct mali_mem_dma_buf {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+	struct mali_dma_buf_attachment *attachment;
+#endif
+} mali_mem_dma_buf;
+
+typedef struct mali_mem_external {
+	dma_addr_t phys;
+	u32 size;
+} mali_mem_external;
+
+typedef struct mali_mem_ump {
+#if defined(CONFIG_MALI400_UMP)
+	ump_dd_handle handle;
+#endif
+} mali_mem_ump;
+
+typedef struct block_allocator_allocation {
+	/* The list will be released in reverse order */
+	struct block_info *last_allocated;
+	u32 mapping_length;
+	struct block_allocator *info;
+} block_allocator_allocation;
+
+typedef struct mali_mem_block_mem {
+	struct list_head pfns;
+	u32 count;
+} mali_mem_block_mem;
+
+typedef struct mali_mem_virt_mali_mapping {
+	mali_address_t addr; /* Virtual Mali address */
+	u32 properties;      /* MMU Permissions + cache, must match MMU HW */
+} mali_mem_virt_mali_mapping;
+
+typedef struct mali_mem_virt_cpu_mapping {
+	void __user *addr;
+	struct vm_area_struct *vma;
+} mali_mem_virt_cpu_mapping;
+
+#define MALI_MEM_ALLOCATION_VALID_MAGIC 0xdeda110c
+#define MALI_MEM_ALLOCATION_FREED_MAGIC 0x10101010
+
+typedef struct mali_mm_node {
+	/* MALI GPU vaddr start, use u32 for mmu only support 32bit address*/
+	uint32_t start; /* GPU vaddr */
+	uint32_t size;  /* GPU allocation virtual size */
+	unsigned allocated : 1;
+} mali_mm_node;
+
+typedef struct mali_vma_node {
+	struct mali_mm_node vm_node;
+	struct rb_node vm_rb;
+} mali_vma_node;
+
+
+typedef struct mali_mem_allocation {
+	MALI_DEBUG_CODE(u32 magic);
+	mali_mem_type type;                /**< Type of memory */
+	u32 flags;                         /**< Flags for this allocation */
+
+	struct mali_session_data *session; /**< Pointer to session that owns the allocation */
+
+	mali_mem_virt_cpu_mapping cpu_mapping; /**< CPU mapping */
+	mali_mem_virt_mali_mapping mali_mapping; /**< Mali mapping */
+
+	/* add for new memory system */
+	struct mali_vma_node mali_vma_node;
+	u32 vsize; /* virtual size*/
+	u32 psize; /* physical backend memory size*/
+	struct list_head list;
+	s32 backend_handle; /* idr for mem_backend */
+	_mali_osk_atomic_t mem_alloc_refcount;
+} mali_mem_allocation;
+
+struct mali_mem_os_allocator {
+	spinlock_t pool_lock;
+	struct list_head pool_pages;
+	size_t pool_count;
+
+	atomic_t allocated_pages;
+	size_t allocation_limit;
+
+	struct shrinker shrinker;
+	struct delayed_work timed_shrinker;
+	struct workqueue_struct *wq;
+};
+
+/* COW backend memory type */
+typedef struct mali_mem_cow {
+	struct list_head pages;  /**< all pages for this cow backend allocation,
+								including new allocated pages for modified range*/
+	u32 count;               /**< number of pages */
+	s32 change_pages_nr;
+} mali_mem_cow;
+
+typedef struct mali_mem_swap {
+	struct list_head pages;
+	u32 count;
+} mali_mem_swap;
+
+typedef struct mali_mem_secure {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+	struct dma_buf *buf;
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+#endif
+	u32 count;
+} mali_mem_secure;
+
+#define MALI_MEM_BACKEND_FLAG_COWED                   (0x1)  /* COW has happen on this backend */
+#define MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE        (0x2)  /* This is an COW backend, mapped as not allowed cpu to write */
+#define MALI_MEM_BACKEND_FLAG_SWAP_COWED              (0x4)  /* Mark the given backend is cowed from swappable memory. */
+/* Mark this backend is not swapped_in in MALI driver, and before using it,
+ * we should swap it in and set up corresponding page table. */
+#define MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN            (0x8)
+#define MALI_MEM_BACKEND_FLAG_NOT_BINDED              (0x1 << 5) /* this backend it not back with physical memory, used for defer bind */
+#define MALI_MEM_BACKEND_FLAG_BINDED              (0x1 << 6) /* this backend it back with physical memory, used for defer bind */
+
+typedef struct mali_mem_backend {
+	mali_mem_type type;                /**< Type of backend memory */
+	u32 flags;                         /**< Flags for this allocation */
+	u32 size;
+	/* Union selected by type. */
+	union {
+		mali_mem_os_mem os_mem;       /**< MALI_MEM_OS */
+		mali_mem_external ext_mem;    /**< MALI_MEM_EXTERNAL */
+		mali_mem_dma_buf dma_buf;     /**< MALI_MEM_DMA_BUF */
+		mali_mem_ump ump_mem;         /**< MALI_MEM_UMP */
+		mali_mem_block_mem block_mem; /**< MALI_MEM_BLOCK */
+		mali_mem_cow cow_mem;
+		mali_mem_swap swap_mem;
+		mali_mem_secure secure_mem;
+	};
+	mali_mem_allocation *mali_allocation;
+	struct mutex mutex;
+	mali_mem_type cow_type;
+
+	struct list_head list;           /**< Used to link swappable memory backend to the global swappable list */
+	int using_count;                 /**< Mark how many PP jobs are using this memory backend */
+	u32 start_idx;                   /**< If the correspondign vma of this backend is linear, this value will be used to set vma->vm_pgoff */
+} mali_mem_backend;
+
+#define MALI_MEM_FLAG_MALI_GUARD_PAGE (_MALI_MAP_EXTERNAL_MAP_GUARD_PAGE)
+#define MALI_MEM_FLAG_DONT_CPU_MAP    (1 << 1)
+#define MALI_MEM_FLAG_CAN_RESIZE  (_MALI_MEMORY_ALLOCATE_RESIZEABLE)
+#endif /* __MALI_MEMORY_TYPES__ */
diff --git a/utgard/r7p0/linux/mali_memory_ump.c b/utgard/r7p0/linux/mali_memory_ump.c
new file mode 100644
index 0000000..c548c30
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_ump.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory.h"
+#include "ump_kernel_interface.h"
+
+static int mali_mem_ump_map(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 nr_blocks;
+	u32 i;
+	ump_dd_physical_block *ump_blocks;
+	struct mali_page_directory *pagedir;
+	u32 offset = 0;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(mem_backend->type == MALI_MEM_UMP);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(ump_mem != UMP_DD_HANDLE_INVALID);
+
+	nr_blocks = ump_dd_phys_block_count_get(ump_mem);
+	if (nr_blocks == 0) {
+		MALI_DEBUG_PRINT(1, ("No block count\n"));
+		return -EINVAL;
+	}
+
+	ump_blocks = _mali_osk_malloc(sizeof(*ump_blocks) * nr_blocks);
+	if (ump_blocks == NULL) {
+		return -ENOMEM;
+	}
+
+	if (ump_dd_phys_blocks_get(ump_mem, ump_blocks, nr_blocks) == UMP_DD_INVALID) {
+		_mali_osk_free(ump_blocks);
+		return -EFAULT;
+	}
+
+	pagedir = session->page_directory;
+
+	mali_session_memory_lock(session);
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (err != _MALI_OSK_ERR_OK) {
+		MALI_DEBUG_PRINT(1, ("Mapping of UMP memory failed\n"));
+
+		_mali_osk_free(ump_blocks);
+		mali_session_memory_unlock(session);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_blocks; ++i) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		MALI_DEBUG_PRINT(7, ("Mapping in 0x%08x size %d\n", ump_blocks[i].addr, ump_blocks[i].size));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[i].addr,
+					ump_blocks[i].size, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += ump_blocks[i].size;
+	}
+
+	if (alloc->flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		/* Map in an extra virtual guard page at the end of the VMA */
+		MALI_DEBUG_PRINT(6, ("Mapping in extra guard page\n"));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[0].addr, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+	mali_session_memory_unlock(session);
+	_mali_osk_free(ump_blocks);
+	return 0;
+}
+
+static void mali_mem_ump_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags)
+{
+	ump_dd_handle ump_mem;
+	int ret;
+
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(mem_backend->type == MALI_MEM_UMP);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map ump memory with secure id %d into virtual memory 0x%08X, size 0x%08X\n",
+			  secure_id, alloc->mali_vma_node.vm_node.start, alloc->mali_vma_node.vm_node.size));
+
+	ump_mem = ump_dd_handle_create_from_secure_id(secure_id);
+	if (ump_mem == UMP_DD_HANDLE_INVALID) MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mem_backend->ump_mem.handle = ump_mem;
+
+	ret = mali_mem_ump_map(mem_backend);
+	if (ret != 0) {
+		ump_dd_reference_release(ump_mem);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	MALI_DEBUG_PRINT(3, ("Returning from UMP bind\n"));
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(mem_backend->type == MALI_MEM_UMP);
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(ump_mem != UMP_DD_HANDLE_INVALID);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	mali_mem_ump_unmap(alloc);
+	ump_dd_reference_release(ump_mem);
+}
+
diff --git a/utgard/r7p0/linux/mali_memory_ump.h b/utgard/r7p0/linux/mali_memory_ump.h
new file mode 100644
index 0000000..23f59ae
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_ump.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_UMP_BUF_H__
+#define __MALI_MEMORY_UMP_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags);
+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/utgard/r7p0/linux/mali_memory_util.c b/utgard/r7p0/linux/mali_memory_util.c
new file mode 100644
index 0000000..dd5a9da
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_util.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_secure.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_external.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_swap_alloc.h"
+
+
+
+/**
+*function @_mali_free_allocation_mem - free a memory allocation
+*/
+static u32 _mali_free_allocation_mem(mali_mem_allocation *mali_alloc)
+{
+	mali_mem_backend *mem_bkend = NULL;
+	u32 free_pages_nr = 0;
+
+	struct mali_session_data *session = mali_alloc->session;
+
+	MALI_DEBUG_PRINT(4, (" _mali_free_allocation_mem, psize =0x%x!\n", mali_alloc->psize));
+	if (mali_alloc->psize == 0)
+		goto out;
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(mem_bkend != NULL);
+
+	switch (mem_bkend->type) {
+	case MALI_MEM_OS:
+		free_pages_nr = mali_mem_os_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+	case MALI_MEM_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_mem_unbind_ump_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+#else
+		MALI_DEBUG_PRINT(1, ("UMP not supported\n"));
+#endif
+		break;
+	case MALI_MEM_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_mem_unbind_dma_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported\n"));
+#endif
+		break;
+	case MALI_MEM_EXTERNAL:
+		mali_mem_unbind_ext_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+		break;
+
+	case MALI_MEM_BLOCK:
+		free_pages_nr = mali_mem_block_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+
+	case MALI_MEM_COW:
+		if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) {
+			free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE);
+		} else {
+			free_pages_nr = mali_mem_cow_release(mem_bkend, MALI_TRUE);
+		}
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+	case MALI_MEM_SWAP:
+		free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		atomic_sub(free_pages_nr, &session->mali_mem_array[mem_bkend->type]);
+		break;
+	case MALI_MEM_SECURE:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		free_pages_nr = mali_mem_secure_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n"));
+#endif
+		break;
+	default:
+		MALI_DEBUG_PRINT(1, ("mem type %d is not in the mali_mem_type enum.\n", mem_bkend->type));
+		break;
+	}
+
+	/*Remove backend memory idex */
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_bkend);
+out:
+	/* remove memory allocation  */
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_alloc->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_alloc);
+	return free_pages_nr;
+}
+
+/**
+*  ref_count for allocation
+*/
+u32 mali_allocation_unref(struct mali_mem_allocation **alloc)
+{
+	u32 free_pages_nr = 0;
+	mali_mem_allocation *mali_alloc = *alloc;
+	*alloc = NULL;
+	if (_mali_osk_atomic_dec_return(&mali_alloc->mem_alloc_refcount) == 0) {
+		free_pages_nr = _mali_free_allocation_mem(mali_alloc);
+	}
+	return free_pages_nr;
+}
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc)
+{
+	_mali_osk_atomic_inc(&alloc->mem_alloc_refcount);
+}
+
+void mali_free_session_allocations(struct mali_session_data *session)
+{
+	struct mali_mem_allocation *entry, *next;
+
+	MALI_DEBUG_PRINT(4, (" mali_free_session_allocations!\n"));
+
+	list_for_each_entry_safe(entry, next, &session->allocation_mgr.head, list) {
+		mali_allocation_unref(&entry);
+	}
+}
diff --git a/utgard/r7p0/linux/mali_memory_util.h b/utgard/r7p0/linux/mali_memory_util.h
new file mode 100644
index 0000000..0a23b77
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_util.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef __MALI_MEMORY_UTIL_H__
+#define __MALI_MEMORY_UTIL_H__
+u32 mali_allocation_unref(struct mali_mem_allocation **alloc);
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc);
+
+void mali_free_session_allocations(struct mali_session_data *session);
+
+#endif
diff --git a/utgard/r7p0/linux/mali_memory_virtual.c b/utgard/r7p0/linux/mali_memory_virtual.c
new file mode 100644
index 0000000..7e81f67
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_virtual.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+
+
+/**
+*internal helper to link node into the rb-tree
+*/
+static inline void _mali_vma_offset_add_rb(struct mali_allocation_manager *mgr,
+		struct mali_vma_node *node)
+{
+	struct rb_node **iter = &mgr->allocation_mgr_rb.rb_node;
+	struct rb_node *parent = NULL;
+	struct mali_vma_node *iter_node;
+
+	while (likely(*iter)) {
+		parent = *iter;
+		iter_node = rb_entry(*iter, struct mali_vma_node, vm_rb);
+
+		if (node->vm_node.start < iter_node->vm_node.start)
+			iter = &(*iter)->rb_left;
+		else if (node->vm_node.start > iter_node->vm_node.start)
+			iter = &(*iter)->rb_right;
+		else
+			MALI_DEBUG_ASSERT(0);
+	}
+
+	rb_link_node(&node->vm_rb, parent, iter);
+	rb_insert_color(&node->vm_rb, &mgr->allocation_mgr_rb);
+}
+
+/**
+ * mali_vma_offset_add() - Add offset node to RB Tree
+ */
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node)
+{
+	int ret = 0;
+
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		goto out;
+	}
+
+	_mali_vma_offset_add_rb(mgr, node);
+	/* set to allocated */
+	node->vm_node.allocated = 1;
+
+out:
+	write_unlock(&mgr->vm_lock);
+	return ret;
+}
+
+/**
+ * mali_vma_offset_remove() - Remove offset node from RB tree
+ */
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node)
+{
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		rb_erase(&node->vm_rb, &mgr->allocation_mgr_rb);
+		memset(&node->vm_node, 0, sizeof(node->vm_node));
+	}
+	write_unlock(&mgr->vm_lock);
+}
+
+/**
+* mali_vma_offset_search - Search the node in RB tree
+*/
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start, unsigned long pages)
+{
+	struct mali_vma_node *node, *best;
+	struct rb_node *iter;
+	unsigned long offset;
+
+	read_lock(&mgr->vm_lock);
+
+	iter = mgr->allocation_mgr_rb.rb_node;
+	best = NULL;
+
+	while (likely(iter)) {
+		node = rb_entry(iter, struct mali_vma_node, vm_rb);
+		offset = node->vm_node.start;
+		if (start >= offset) {
+			iter = iter->rb_right;
+			best = node;
+			if (start == offset)
+				break;
+		} else {
+			iter = iter->rb_left;
+		}
+	}
+
+	if (best) {
+		offset = best->vm_node.start + best->vm_node.size;
+		if (offset <= start + pages)
+			best = NULL;
+	}
+	read_unlock(&mgr->vm_lock);
+
+	return best;
+}
+
diff --git a/utgard/r7p0/linux/mali_memory_virtual.h b/utgard/r7p0/linux/mali_memory_virtual.h
new file mode 100644
index 0000000..2a70cbb
--- /dev/null
+++ b/utgard/r7p0/linux/mali_memory_virtual.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_GPU_VMEM_H__
+#define __MALI_GPU_VMEM_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+
+
+
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node);
+
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node);
+
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start,    unsigned long pages);
+
+#endif
diff --git a/utgard/r7p0/linux/mali_osk_atomics.c b/utgard/r7p0/linux/mali_osk_atomics.c
new file mode 100644
index 0000000..f3856e7
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_atomics.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_atomics.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <asm/atomic.h>
+#include "mali_kernel_common.h"
+
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom)
+{
+	atomic_dec((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_dec_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom)
+{
+	atomic_inc((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_inc_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val)
+{
+	MALI_DEBUG_ASSERT_POINTER(atom);
+	atomic_set((atomic_t *)&atom->u.val, val);
+}
+
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom)
+{
+	return atomic_read((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom)
+{
+	MALI_IGNORE(atom);
+}
+
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val)
+{
+	return atomic_xchg((atomic_t *)&atom->u.val, val);
+}
diff --git a/utgard/r7p0/linux/mali_osk_bitmap.c b/utgard/r7p0/linux/mali_osk_bitmap.c
new file mode 100644
index 0000000..243b9a7
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_bitmap.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2010, 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitmap.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/bitmap.h>
+#include <linux/vmalloc.h>
+#include "common/mali_kernel_common.h"
+#include "mali_osk_types.h"
+#include "mali_osk.h"
+
+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap)
+{
+	u32 obj;
+
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+
+	obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->reserve);
+
+	if (obj < bitmap->max) {
+		set_bit(obj, bitmap->table);
+	} else {
+		obj = -1;
+	}
+
+	if (obj != -1)
+		--bitmap->avail;
+	_mali_osk_spinlock_unlock(bitmap->lock);
+
+	return obj;
+}
+
+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_bitmap_free_range(bitmap, obj, 1);
+}
+
+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt)
+{
+	u32 obj;
+
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	if (cnt <= 0) {
+		return -1;
+	}
+
+	if (cnt == 1) {
+		return _mali_osk_bitmap_alloc(bitmap);
+	}
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+	obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+					 bitmap->last, cnt, 0);
+
+	if (obj >= bitmap->max) {
+		obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+						 bitmap->reserve, cnt, 0);
+	}
+
+	if (obj < bitmap->max) {
+		bitmap_set(bitmap->table, obj, cnt);
+
+		bitmap->last = (obj + cnt);
+		if (bitmap->last >= bitmap->max) {
+			bitmap->last = bitmap->reserve;
+		}
+	} else {
+		obj = -1;
+	}
+
+	if (obj != -1) {
+		bitmap->avail -= cnt;
+	}
+
+	_mali_osk_spinlock_unlock(bitmap->lock);
+
+	return obj;
+}
+
+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	return bitmap->avail;
+}
+
+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+	bitmap_clear(bitmap->table, obj, cnt);
+	bitmap->last = min(bitmap->last, obj);
+
+	bitmap->avail += cnt;
+	_mali_osk_spinlock_unlock(bitmap->lock);
+}
+
+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+	MALI_DEBUG_ASSERT(reserve <= num);
+
+	bitmap->reserve = reserve;
+	bitmap->last = reserve;
+	bitmap->max  = num;
+	bitmap->avail = num - reserve;
+	bitmap->lock = _mali_osk_spinlock_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST);
+	if (!bitmap->lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) *
+				sizeof(long), GFP_KERNEL);
+	if (!bitmap->table) {
+		_mali_osk_spinlock_term(bitmap->lock);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	if (bitmap->lock != NULL) {
+		_mali_osk_spinlock_term(bitmap->lock);
+	}
+
+	if (bitmap->table != NULL) {
+		kfree(bitmap->table);
+	}
+}
+
diff --git a/utgard/r7p0/linux/mali_osk_irq.c b/utgard/r7p0/linux/mali_osk_irq.c
new file mode 100644
index 0000000..267c047
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_irq.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_irq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#if (KERNEL_VERSION(3, 14, 29) > LINUX_VERSION_CODE)
+#include <mach/cpu.h>
+#endif
+#include <linux/slab.h>	/* For memory allocation */
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+typedef struct _mali_osk_irq_t_struct {
+	u32 irqnum;
+	void *data;
+	_mali_osk_irq_uhandler_t uhandler;
+} mali_osk_irq_object_t;
+
+typedef irqreturn_t (*irq_handler_func_t)(int, void *, struct pt_regs *);
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id);   /* , struct pt_regs *regs*/
+
+#if MESON_CPU_TYPE == MESON_CPU_TYPE_MESON6
+u32 get_irqnum(struct _mali_osk_irq_t_struct *irq)
+{
+	if (irq)
+		return irq->irqnum;
+	else
+		return 0;
+}
+#endif
+
+#if defined(DEBUG)
+
+struct test_interrupt_data {
+	_mali_osk_irq_ack_t ack_func;
+	void *probe_data;
+	mali_bool interrupt_received;
+	wait_queue_head_t wq;
+};
+
+static irqreturn_t test_interrupt_upper_half(int port_name, void *dev_id)
+{
+	irqreturn_t ret = IRQ_NONE;
+	struct test_interrupt_data *data = (struct test_interrupt_data *)dev_id;
+
+	if (data->ack_func(data->probe_data) == _MALI_OSK_ERR_OK) {
+		data->interrupt_received = MALI_TRUE;
+		wake_up(&data->wq);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static _mali_osk_errcode_t test_interrupt(u32 irqnum,
+		_mali_osk_irq_trigger_t trigger_func,
+		_mali_osk_irq_ack_t ack_func,
+		void *probe_data,
+		const char *description)
+{
+	unsigned long irq_flags = 0;
+	struct test_interrupt_data data = {
+		.ack_func = ack_func,
+		.probe_data = probe_data,
+		.interrupt_received = MALI_FALSE,
+	};
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	if (request_irq(irqnum, test_interrupt_upper_half, irq_flags, description, &data) != 0) {
+		MALI_DEBUG_PRINT(2, ("Unable to install test IRQ handler for core '%s'\n", description));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	init_waitqueue_head(&data.wq);
+
+	trigger_func(probe_data);
+	wait_event_timeout(data.wq, data.interrupt_received, 100);
+
+	free_irq(irqnum, &data);
+
+	if (data.interrupt_received) {
+		MALI_DEBUG_PRINT(3, ("%s: Interrupt test OK\n", description));
+		return _MALI_OSK_ERR_OK;
+	} else {
+		MALI_PRINT_ERROR(("%s: Failed interrupt test on %u\n", description, irqnum));
+		return _MALI_OSK_ERR_FAULT;
+	}
+}
+
+#endif /* defined(DEBUG) */
+
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description)
+{
+	mali_osk_irq_object_t *irq_object;
+	unsigned long irq_flags = 0;
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	irq_object = kmalloc(sizeof(mali_osk_irq_object_t), GFP_KERNEL);
+	if (irq_object == NULL) {
+		return NULL;
+	}
+
+	if (-1 == irqnum) {
+		/* Probe for IRQ */
+		if ((NULL != trigger_func) && (ack_func != NULL)) {
+			unsigned long probe_count = 3;
+			_mali_osk_errcode_t err;
+			int irq;
+
+			MALI_DEBUG_PRINT(2, ("Probing for irq\n"));
+
+			do {
+				unsigned long mask;
+
+				mask = probe_irq_on();
+				trigger_func(probe_data);
+
+				_mali_osk_time_ubusydelay(5);
+
+				irq = probe_irq_off(mask);
+				err = ack_func(probe_data);
+			} while (irq < 0 && (err == _MALI_OSK_ERR_OK) && probe_count--);
+
+			if (irq < 0 || (err != _MALI_OSK_ERR_OK)) irqnum = -1;
+			else irqnum = irq;
+		} else irqnum = -1; /* no probe functions, fault */
+
+		if (-1 != irqnum) {
+			/* found an irq */
+			MALI_DEBUG_PRINT(2, ("Found irq %d\n", irqnum));
+		} else {
+			MALI_DEBUG_PRINT(2, ("Probe for irq failed\n"));
+		}
+	}
+
+	irq_object->irqnum = irqnum;
+	irq_object->uhandler = uhandler;
+	irq_object->data = int_data;
+
+	if (-1 == irqnum) {
+		MALI_DEBUG_PRINT(2, ("No IRQ for core '%s' found during probe\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	/* Verify that the configured interrupt settings are working */
+	if (test_interrupt(irqnum, trigger_func, ack_func, probe_data, description) != _MALI_OSK_ERR_OK) {
+		MALI_DEBUG_PRINT(2, ("Test of IRQ(%d) handler for core '%s' failed\n", irqnum, description));
+		kfree(irq_object);
+		return NULL;
+	}
+#endif
+
+	if (request_irq(irqnum, irq_handler_upper_half, irq_flags, description, irq_object) != 0) {
+		MALI_DEBUG_PRINT(2, ("Unable to install IRQ handler for core '%s'\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+	return irq_object;
+}
+
+void _mali_osk_irq_term(_mali_osk_irq_t *irq)
+{
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)irq;
+
+	free_irq(irq_object->irqnum, irq_object);
+	kfree(irq_object);
+}
+
+
+/** This function is called directly in interrupt context from the OS just after
+ * the CPU get the hw-irq from mali, or other devices on the same IRQ-channel.
+ * It is registered one of these function for each mali core. When an interrupt
+ * arrives this function will be called equal times as registered mali cores.
+ * That means that we only check one mali core in one function call, and the
+ * core we check for each turn is given by the \a dev_id variable.
+ * If we detect an pending interrupt on the given core, we mask the interrupt
+ * out by settging the core's IRQ_MASK register to zero.
+ * Then we schedule the mali_core_irq_handler_bottom_half to run as high priority
+ * work queue job.
+ */
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id)   /* , struct pt_regs *regs*/
+{
+	irqreturn_t ret = IRQ_NONE;
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)dev_id;
+
+	if (irq_object->uhandler(irq_object->data) == _MALI_OSK_ERR_OK) {
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
diff --git a/utgard/r7p0/linux/mali_osk_locks.c b/utgard/r7p0/linux/mali_osk_locks.c
new file mode 100644
index 0000000..a41c45b
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_locks.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk_locks.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+
+#ifdef DEBUG
+#ifdef LOCK_ORDER_CHECKING
+static DEFINE_SPINLOCK(lock_tracking_lock);
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order);
+#endif /* LOCK_ORDER_CHECKING */
+
+void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+{
+	checker->orig_flags = flags;
+	checker->owner = 0;
+
+#ifdef LOCK_ORDER_CHECKING
+	checker->order = order;
+	checker->next = NULL;
+#endif
+}
+
+void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker)
+{
+	checker->owner = _mali_osk_get_tid();
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		if (!add_lock_to_log_and_check(checker, _mali_osk_get_tid())) {
+			printk(KERN_ERR "%d: ERROR lock %p taken while holding a lock of a higher order.\n",
+			       _mali_osk_get_tid(), checker);
+			dump_stack();
+		}
+	}
+#endif
+}
+
+void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker)
+{
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		remove_lock_from_log(checker, _mali_osk_get_tid());
+	}
+#endif
+	checker->owner = 0;
+}
+
+
+#ifdef LOCK_ORDER_CHECKING
+/* Lock order checking
+ * -------------------
+ *
+ * To assure that lock ordering scheme defined by _mali_osk_lock_order_t is strictly adhered to, the
+ * following function will, together with a linked list and some extra members in _mali_osk_lock_debug_s,
+ * make sure that a lock that is taken has a higher order than the current highest-order lock a
+ * thread holds.
+ *
+ * This is done in the following manner:
+ * - A linked list keeps track of locks held by a thread.
+ * - A `next' pointer is added to each lock. This is used to chain the locks together.
+ * - When taking a lock, the `add_lock_to_log_and_check' makes sure that taking
+ *   the given lock is legal. It will follow the linked list  to find the last
+ *   lock taken by this thread. If the last lock's order was lower than the
+ *   lock that is to be taken, it appends the new lock to the list and returns
+ *   true, if not, it return false. This return value is assert()'ed on in
+ *   _mali_osk_lock_wait().
+ */
+
+static struct _mali_osk_lock_debug_s *lock_lookup_list;
+
+static void dump_lock_tracking_list(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 1;
+
+	/* print list for debugging purposes */
+	l = lock_lookup_list;
+
+	while (l != NULL) {
+		printk(" [lock: %p, tid_owner: %d, order: %d] ->", l, l->owner, l->order);
+		l = l->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+	printk(" NULL\n");
+}
+
+static int tracking_list_length(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 0;
+
+	l = lock_lookup_list;
+
+	while (l != NULL) {
+		l = l->next;
+		n++;
+		MALI_DEBUG_ASSERT(n < 100);
+	}
+	return n;
+}
+
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	mali_bool ret = MALI_FALSE;
+	_mali_osk_lock_order_t highest_order_for_tid = _MALI_OSK_LOCK_ORDER_FIRST;
+	struct _mali_osk_lock_debug_s *highest_order_lock = (struct _mali_osk_lock_debug_s *)0xbeefbabe;
+	struct _mali_osk_lock_debug_s *l;
+	unsigned long local_lock_flag;
+	u32 len;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+
+	l  = lock_lookup_list;
+	if (l == NULL) { /* This is the first lock taken by this thread -- record and return true */
+		lock_lookup_list = lock;
+		spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+		return MALI_TRUE;
+	} else {
+		/* Traverse the locks taken and find the lock of the highest order.
+		 * Since several threads may hold locks, each lock's owner must be
+		 * checked so that locks not owned by this thread can be ignored. */
+		for (;;) {
+			MALI_DEBUG_ASSERT_POINTER(l);
+			if (tid == l->owner && l->order >= highest_order_for_tid) {
+				highest_order_for_tid = l->order;
+				highest_order_lock = l;
+			}
+
+			if (l->next != NULL) {
+				l = l->next;
+			} else {
+				break;
+			}
+		}
+
+		l->next = lock;
+		l->next = NULL;
+	}
+
+	/* We have now found the highest order lock currently held by this thread and can see if it is
+	 * legal to take the requested lock. */
+	ret = highest_order_for_tid < lock->order;
+
+	if (!ret) {
+		printk(KERN_ERR "Took lock of order %d (%s) while holding lock of order %d (%s)\n",
+		       lock->order, lock_order_to_string(lock->order),
+		       highest_order_for_tid, lock_order_to_string(highest_order_for_tid));
+		dump_lock_tracking_list();
+	}
+
+	if (len + 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+	return ret;
+}
+
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	struct _mali_osk_lock_debug_s *curr;
+	struct _mali_osk_lock_debug_s *prev = NULL;
+	unsigned long local_lock_flag;
+	u32 len;
+	u32 n = 0;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+	curr = lock_lookup_list;
+
+	if (curr == NULL) {
+		printk(KERN_ERR "Error: Lock tracking list was empty on call to remove_lock_from_log\n");
+		dump_lock_tracking_list();
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(curr);
+
+
+	while (lock != curr) {
+		prev = curr;
+
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		curr = curr->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+
+	if (prev == NULL) {
+		lock_lookup_list = curr->next;
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		MALI_DEBUG_ASSERT_POINTER(prev);
+		prev->next = curr->next;
+	}
+
+	lock->next = NULL;
+
+	if (len - 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+}
+
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order)
+{
+	switch (order) {
+	case _MALI_OSK_LOCK_ORDER_SESSIONS:
+		return "_MALI_OSK_LOCK_ORDER_SESSIONS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_SESSION:
+		return "_MALI_OSK_LOCK_ORDER_MEM_SESSION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_INFO:
+		return "_MALI_OSK_LOCK_ORDER_MEM_INFO";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_PT_CACHE:
+		return "_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP:
+		return "_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_EXECUTION:
+		return "_MALI_OSK_LOCK_ORDER_PM_EXECUTION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_EXECUTOR:
+		return "_MALI_OSK_LOCK_ORDER_EXECUTOR";
+		break;
+	case _MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM:
+		return "_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DMA_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_DMA_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PROFILING:
+		return "_MALI_OSK_LOCK_ORDER_PROFILING";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2:
+		return "_MALI_OSK_LOCK_ORDER_L2";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_L2_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_UTILIZATION:
+		return "_MALI_OSK_LOCK_ORDER_UTILIZATION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS:
+		return "_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_STATE:
+		return "_MALI_OSK_LOCK_ORDER_PM_STATE";
+		break;
+	default:
+		return "<UNKNOWN_LOCK_ORDER>";
+	}
+}
+#endif /* LOCK_ORDER_CHECKING */
+#endif /* DEBUG */
diff --git a/utgard/r7p0/linux/mali_osk_locks.h b/utgard/r7p0/linux/mali_osk_locks.h
new file mode 100644
index 0000000..263d8a4
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_locks.h
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.h
+ * Defines OS abstraction of lock and mutex
+ */
+#ifndef _MALI_OSK_LOCKS_H
+#define _MALI_OSK_LOCKS_H
+
+#include <linux/spinlock.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+
+#include <linux/slab.h>
+
+#include "mali_osk_types.h"
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+	/* When DEBUG is enabled, this struct will be used to track owner, mode and order checking */
+#ifdef DEBUG
+	struct _mali_osk_lock_debug_s {
+		u32 owner;
+		_mali_osk_lock_flags_t orig_flags;
+		_mali_osk_lock_order_t order;
+		struct _mali_osk_lock_debug_s *next;
+	};
+#endif
+
+	/* Anstraction of spinlock_t */
+	struct _mali_osk_spinlock_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		spinlock_t spinlock;
+	};
+
+	/* Abstration of spinlock_t and lock flag which is used to store register's state before locking */
+	struct _mali_osk_spinlock_irq_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+
+		spinlock_t spinlock;
+		unsigned long flags;
+	};
+
+	/* Abstraction of rw_semaphore in OS */
+	struct _mali_osk_mutex_rw_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+		_mali_osk_lock_mode_t mode;
+#endif
+
+		struct rw_semaphore rw_sema;
+	};
+
+	/* Mutex and mutex_interruptible functions share the same osk mutex struct */
+	struct _mali_osk_mutex_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		struct mutex mutex;
+	};
+
+#ifdef DEBUG
+	/** @brief _mali_osk_locks_debug_init/add/remove() functions are declared when DEBUG is enabled and
+	 * defined in file mali_osk_locks.c. When LOCK_ORDER_CHECKING is enabled, calling these functions when we
+	 * init/lock/unlock a lock/mutex, we could track lock order of a given tid. */
+	void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order);
+	void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker);
+	void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker);
+
+	/** @brief This function can return a given lock's owner when DEBUG     is enabled. */
+	static inline u32 _mali_osk_lock_get_owner(struct _mali_osk_lock_debug_s *lock)
+	{
+		return lock->owner;
+	}
+#else
+#define _mali_osk_locks_debug_init(x, y, z) do {} while (0)
+#define _mali_osk_locks_debug_add(x) do {} while (0)
+#define _mali_osk_locks_debug_remove(x) do {} while (0)
+#endif
+
+	/** @brief Before use _mali_osk_spin_lock, init function should be used to allocate memory and initial spinlock*/
+	static inline _mali_osk_spinlock_t *_mali_osk_spinlock_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_spinlock_t), GFP_KERNEL);
+		if (lock == NULL) {
+			return NULL;
+		}
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock a spinlock */
+	static inline void  _mali_osk_spinlock_lock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(lock == NULL);
+		spin_lock(&lock->spinlock);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock a spinlock */
+	static inline void _mali_osk_spinlock_unlock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(lock == NULL);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock(&lock->spinlock);
+	}
+
+	/** @brief Free a memory block which the argument lock pointed to and its type must be
+	 * _mali_osk_spinlock_t *. */
+	static inline void _mali_osk_spinlock_term(_mali_osk_spinlock_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(lock == NULL);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_spinlock_irq_lock/unlock/term() is called, init function should be
+	 * called to initial spinlock and flags in struct _mali_osk_spinlock_irq_t. */
+	static inline _mali_osk_spinlock_irq_t *_mali_osk_spinlock_irq_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_irq_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_spinlock_irq_t), GFP_KERNEL);
+
+		if (lock == NULL) {
+			return NULL;
+		}
+
+		lock->flags = 0;
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock spinlock and save the register's state */
+	static inline void _mali_osk_spinlock_irq_lock(_mali_osk_spinlock_irq_t *lock)
+	{
+		unsigned long tmp_flags;
+
+		BUG_ON(lock == NULL);
+		spin_lock_irqsave(&lock->spinlock, tmp_flags);
+		lock->flags = tmp_flags;
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock spinlock with saved register's state */
+	static inline void _mali_osk_spinlock_irq_unlock(_mali_osk_spinlock_irq_t *lock)
+	{
+		BUG_ON(lock == NULL);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock_irqrestore(&lock->spinlock, lock->flags);
+	}
+
+	/** @brief Destroy a given memory block which lock pointed to, and the lock type must be
+	 * _mali_osk_spinlock_irq_t *. */
+	static inline void _mali_osk_spinlock_irq_term(_mali_osk_spinlock_irq_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(lock == NULL);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_mutex_rw_wait/signal/term() is called, we should call
+	 * _mali_osk_mutex_rw_init() to kmalloc a memory block and initial part of elements in it. */
+	static inline _mali_osk_mutex_rw_t *_mali_osk_mutex_rw_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_rw_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_rw_t), GFP_KERNEL);
+
+		if (lock == NULL) {
+			return NULL;
+		}
+
+		init_rwsem(&lock->rw_sema);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief When call _mali_osk_mutex_rw_wait/signal() functions, the second argument mode
+	 * should be assigned with value _MALI_OSK_LOCKMODE_RO or _MALI_OSK_LOCKMODE_RW */
+	static inline void _mali_osk_mutex_rw_wait(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(lock == NULL);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			down_read(&lock->rw_sema);
+		} else {
+			down_write(&lock->rw_sema);
+		}
+
+#ifdef DEBUG
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			lock->mode = mode;
+		} else { /* mode == _MALI_OSK_LOCKMODE_RO */
+			lock->mode = mode;
+		}
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+#endif
+	}
+
+	/** @brief Up lock->rw_sema with up_read/write() accordinf argument mode's value. */
+	static inline void  _mali_osk_mutex_rw_signal(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(lock == NULL);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+#ifdef DEBUG
+		/* make sure the thread releasing the lock actually was the owner */
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+			/* This lock now has no owner */
+			lock->checker.owner = 0;
+		}
+#endif
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			up_read(&lock->rw_sema);
+		} else {
+			up_write(&lock->rw_sema);
+		}
+	}
+
+	/** @brief Free a given memory block which lock pointed to and its type must be
+	 * _mali_sok_mutex_rw_t *. */
+	static inline void _mali_osk_mutex_rw_term(_mali_osk_mutex_rw_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(lock == NULL);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Mutex & mutex_interruptible share the same init and term function, because they have the
+	 * same osk mutex struct, and the difference between them is which locking function they use */
+	static inline _mali_osk_mutex_t *_mali_osk_mutex_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_t), GFP_KERNEL);
+
+		if (lock == NULL) {
+			return NULL;
+		}
+		mutex_init(&lock->mutex);
+
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief  Lock the lock->mutex with mutex_lock_interruptible function */
+	static inline _mali_osk_errcode_t _mali_osk_mutex_wait_interruptible(_mali_osk_mutex_t *lock)
+	{
+		_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+		BUG_ON(lock == NULL);
+
+		if (mutex_lock_interruptible(&lock->mutex)) {
+			printk(KERN_WARNING "Mali: Can not lock mutex\n");
+			err = _MALI_OSK_ERR_RESTARTSYSCALL;
+		}
+
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+		return err;
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock_interruptible() function. */
+	static inline void _mali_osk_mutex_signal_interruptible(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(lock == NULL);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Lock the lock->mutex just with mutex_lock() function which could not be interruptted. */
+	static inline void _mali_osk_mutex_wait(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(lock == NULL);
+		mutex_lock(&lock->mutex);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock() function. */
+	static inline void _mali_osk_mutex_signal(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(lock == NULL);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Free a given memory block which lock point. */
+	static inline void _mali_osk_mutex_term(_mali_osk_mutex_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(lock == NULL);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif
diff --git a/utgard/r7p0/linux/mali_osk_low_level_mem.c b/utgard/r7p0/linux/mali_osk_low_level_mem.c
new file mode 100644
index 0000000..45a467c
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_low_level_mem.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_low_level_mem.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <asm/io.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+void _mali_osk_mem_barrier(void)
+{
+	mb();
+}
+
+void _mali_osk_write_mem_barrier(void)
+{
+	wmb();
+}
+
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description)
+{
+	return (mali_io_address)ioremap_nocache(phys, size);
+}
+
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address virt)
+{
+	iounmap((void *)virt);
+}
+
+_mali_osk_errcode_t inline _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description)
+{
+#if MALI_LICENSE_IS_GPL
+	return _MALI_OSK_ERR_OK; /* GPL driver gets the mem region for the resources registered automatically */
+#else
+	return ((request_mem_region(phys, size, description) == NULL) ? _MALI_OSK_ERR_NOMEM : _MALI_OSK_ERR_OK);
+#endif
+}
+
+void inline _mali_osk_mem_unreqregion(uintptr_t phys, u32 size)
+{
+#if !MALI_LICENSE_IS_GPL
+	release_mem_region(phys, size);
+#endif
+}
+
+void inline _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	__raw_writel(cpu_to_le32(val), ((u8 *)addr) + offset);
+}
+
+u32 inline _mali_osk_mem_ioread32(volatile mali_io_address addr, u32 offset)
+{
+	return ioread32(((u8 *)addr) + offset);
+}
+
+void inline _mali_osk_mem_iowrite32(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	iowrite32(val, ((u8 *)addr) + offset);
+}
+
+void _mali_osk_cache_flushall(void)
+{
+	/** @note Cached memory is not currently supported in this implementation */
+}
+
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size)
+{
+	_mali_osk_write_mem_barrier();
+}
+
+u32 _mali_osk_mem_write_safe(void __user *dest, const void __user *src, u32 size)
+{
+#define MALI_MEM_SAFE_COPY_BLOCK_SIZE 4096
+	u32 retval = 0;
+	void *temp_buf;
+
+	temp_buf = kmalloc(MALI_MEM_SAFE_COPY_BLOCK_SIZE, GFP_KERNEL);
+	if (temp_buf != NULL) {
+		u32 bytes_left_to_copy = size;
+		u32 i;
+
+		for (i = 0; i < size; i += MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+			u32 size_to_copy;
+			u32 size_copied;
+			u32 bytes_left;
+
+			if (bytes_left_to_copy > MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+				size_to_copy = MALI_MEM_SAFE_COPY_BLOCK_SIZE;
+			} else {
+				size_to_copy = bytes_left_to_copy;
+			}
+
+			bytes_left = copy_from_user(temp_buf, ((char *)src) + i, size_to_copy);
+			size_copied = size_to_copy - bytes_left;
+
+			bytes_left = copy_to_user(((char *)dest) + i, temp_buf, size_copied);
+			size_copied -= bytes_left;
+
+			bytes_left_to_copy -= size_copied;
+			retval += size_copied;
+
+			if (size_copied != size_to_copy) {
+				break; /* Early out, we was not able to copy this entire block */
+			}
+		}
+
+		kfree(temp_buf);
+	}
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args)
+{
+	void __user *src;
+	void __user *dst;
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (session == NULL) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	src = (void __user *)(uintptr_t)args->src;
+	dst = (void __user *)(uintptr_t)args->dest;
+
+	/* Return number of bytes actually copied */
+	args->size = _mali_osk_mem_write_safe(dst, src, args->size);
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/utgard/r7p0/linux/mali_osk_mali.c b/utgard/r7p0/linux/mali_osk_mali.c
new file mode 100644
index 0000000..129e688
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_mali.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.c
+ * Implementation of the OS abstraction layer which is specific for the Mali kernel device driver
+ */
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/mali/mali_utgard.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h" /* MALI_xxx macros */
+#include "mali_osk.h"           /* kernel side OS functions */
+#include "mali_kernel_linux.h"
+
+static mali_bool mali_secure_mode_enabled = MALI_FALSE;
+static mali_bool mali_secure_mode_supported = MALI_FALSE;
+
+/* Function that init the mali gpu secure mode */
+void (*mali_secure_mode_deinit)(void) = NULL;
+/* Function that reset GPU and enable the mali gpu secure mode */
+int (*mali_gpu_reset_and_secure_mode_enable)(void) = NULL;
+/* Function that reset GPU and disable the mali gpu secure mode */
+int (*mali_gpu_reset_and_secure_mode_disable)(void) = NULL;
+
+#ifdef CONFIG_MALI_DT
+
+#define MALI_OSK_INVALID_RESOURCE_ADDRESS 0xFFFFFFFF
+
+/**
+ * Define the max number of resource we could have.
+ */
+#define MALI_OSK_MAX_RESOURCE_NUMBER 27
+
+/**
+ * Define the max number of resource with interrupts, and they are
+ * the first 20 elements in array mali_osk_resource_bank.
+ */
+#define MALI_OSK_RESOURCE_WITH_IRQ_NUMBER 20
+
+/**
+ * pp core start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_PP_LOCATION_START 2
+#define MALI_OSK_RESOURCE_PP_LOCATION_END 17
+
+/**
+ * L2 cache start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_L2_LOCATION_START 20
+#define MALI_OSK_RESOURCE_l2_LOCATION_END 22
+
+/**
+ * DMA unit location.
+ */
+#define MALI_OSK_RESOURCE_DMA_LOCATION 26
+
+static _mali_osk_resource_t mali_osk_resource_bank[MALI_OSK_MAX_RESOURCE_NUMBER] = {
+	{.description = "Mali_GP", .base = MALI_OFFSET_GP, .irq_name = "IRQGP",},
+	{.description = "Mali_GP_MMU", .base = MALI_OFFSET_GP_MMU, .irq_name = "IRQGPMMU",},
+	{.description = "Mali_PP0", .base = MALI_OFFSET_PP0, .irq_name = "IRQPP0",},
+	{.description = "Mali_PP0_MMU", .base = MALI_OFFSET_PP0_MMU, .irq_name = "IRQPPMMU0",},
+	{.description = "Mali_PP1", .base = MALI_OFFSET_PP1, .irq_name = "IRQPP1",},
+	{.description = "Mali_PP1_MMU", .base = MALI_OFFSET_PP1_MMU, .irq_name = "IRQPPMMU1",},
+	{.description = "Mali_PP2", .base = MALI_OFFSET_PP2, .irq_name = "IRQPP2",},
+	{.description = "Mali_PP2_MMU", .base = MALI_OFFSET_PP2_MMU, .irq_name = "IRQPPMMU2",},
+	{.description = "Mali_PP3", .base = MALI_OFFSET_PP3, .irq_name = "IRQPP3",},
+	{.description = "Mali_PP3_MMU", .base = MALI_OFFSET_PP3_MMU, .irq_name = "IRQPPMMU3",},
+	{.description = "Mali_PP4", .base = MALI_OFFSET_PP4, .irq_name = "IRQPP4",},
+	{.description = "Mali_PP4_MMU", .base = MALI_OFFSET_PP4_MMU, .irq_name = "IRQPPMMU4",},
+	{.description = "Mali_PP5", .base = MALI_OFFSET_PP5, .irq_name = "IRQPP5",},
+	{.description = "Mali_PP5_MMU", .base = MALI_OFFSET_PP5_MMU, .irq_name = "IRQPPMMU5",},
+	{.description = "Mali_PP6", .base = MALI_OFFSET_PP6, .irq_name = "IRQPP6",},
+	{.description = "Mali_PP6_MMU", .base = MALI_OFFSET_PP6_MMU, .irq_name = "IRQPPMMU6",},
+	{.description = "Mali_PP7", .base = MALI_OFFSET_PP7, .irq_name = "IRQPP7",},
+	{.description = "Mali_PP7_MMU", .base = MALI_OFFSET_PP7_MMU, .irq_name = "IRQPPMMU",},
+	{.description = "Mali_PP_Broadcast", .base = MALI_OFFSET_PP_BCAST, .irq_name = "IRQPP",},
+	{.description = "Mali_PMU", .base = MALI_OFFSET_PMU, .irq_name = "IRQPMU",},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE0,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE1,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE2,},
+	{.description = "Mali_PP_MMU_Broadcast", .base = MALI_OFFSET_PP_BCAST_MMU,},
+	{.description = "Mali_Broadcast", .base = MALI_OFFSET_BCAST,},
+	{.description = "Mali_DLBU", .base = MALI_OFFSET_DLBU,},
+	{.description = "Mali_DMA", .base = MALI_OFFSET_DMA,},
+};
+
+static int _mali_osk_get_compatible_name(const char **out_string)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+
+	MALI_DEBUG_ASSERT(node != NULL);
+
+	return of_property_read_string(node, "compatible", out_string);
+}
+
+_mali_osk_errcode_t _mali_osk_resource_initialize(void)
+{
+	mali_bool mali_is_450 = MALI_FALSE, mali_is_470 = MALI_FALSE;
+	int i, pp_core_num = 0, l2_core_num = 0;
+	struct resource *res;
+	const char *compatible_name = NULL;
+
+	if (_mali_osk_get_compatible_name(&compatible_name) == 0) {
+		if (0 == strncmp(compatible_name, "arm,mali-450", strlen("arm,mali-450"))) {
+			mali_is_450 = MALI_TRUE;
+			MALI_DEBUG_PRINT(2, ("mali-450 device tree detected."));
+		} else if (0 == strncmp(compatible_name, "arm,mali-470", strlen("arm,mali-470"))) {
+			mali_is_470 = MALI_TRUE;
+			MALI_DEBUG_PRINT(2, ("mali-470 device tree detected."));
+		}
+	}
+
+	for (i = 0; i < MALI_OSK_RESOURCE_WITH_IRQ_NUMBER; i++) {
+		res = platform_get_resource_byname(mali_platform_device, IORESOURCE_IRQ, mali_osk_resource_bank[i].irq_name);
+		if (res) {
+			mali_osk_resource_bank[i].irq = res->start;
+		} else {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_PP_LOCATION_START; i <= MALI_OSK_RESOURCE_PP_LOCATION_END; i++) {
+		if (mali_osk_resource_bank[i].base != MALI_OSK_INVALID_RESOURCE_ADDRESS) {
+			pp_core_num++;
+		}
+	}
+
+	/* We have to divide by 2, because we caculate twice for only one pp(pp_core and pp_mmu_core). */
+	if (0 != pp_core_num % 2) {
+		MALI_DEBUG_PRINT(2, ("The value of pp core number isn't normal."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pp_core_num /= 2;
+
+	/**
+	 * we can caculate the number of l2 cache core according the number of pp core number
+	 * and device type(mali400/mali450/mali470).
+	 */
+	l2_core_num = 1;
+	if (mali_is_450) {
+		if (pp_core_num > 4) {
+			l2_core_num = 3;
+		} else if (pp_core_num <= 4) {
+			l2_core_num = 2;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_l2_LOCATION_END; i > MALI_OSK_RESOURCE_L2_LOCATION_START + l2_core_num - 1; i--) {
+		mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+	}
+
+	/* If device is not mali-450 type, we have to remove related resource from resource bank. */
+	if (!(mali_is_450 || mali_is_470)) {
+		for (i = MALI_OSK_RESOURCE_l2_LOCATION_END + 1; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	if (mali_is_470)
+		mali_osk_resource_bank[MALI_OSK_RESOURCE_DMA_LOCATION].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+
+	if (mali_platform_device == NULL) {
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	/* Traverse all of resources in resources bank to find the matching one. */
+	for (i = 0; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+		if (mali_osk_resource_bank[i].base == addr) {
+			if (res != NULL) {
+				res->base = addr + _mali_osk_resource_base_address();
+				res->description = mali_osk_resource_bank[i].description;
+				res->irq = mali_osk_resource_bank[i].irq;
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	struct resource *reg_res = NULL;
+	uintptr_t ret = 0;
+
+	reg_res = platform_get_resource(mali_platform_device, IORESOURCE_MEM, 0);
+
+	if (reg_res != NULL) {
+		ret = reg_res->start;
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from device tree configuration.\n"));
+
+	MALI_DEBUG_ASSERT(node != NULL);
+
+	if (!of_get_property(node, "pmu_domain_config", &length)) {
+		return;
+	}
+
+	if (array_size != length / sizeof(u32)) {
+		MALI_PRINT_ERROR(("Wrong pmu domain config in device tree."));
+		return;
+	}
+
+	of_property_for_each_u32(node, "pmu_domain_config", prop, p, u) {
+		domain_config_array[i] = (u16)u;
+		i++;
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	u32 switch_delay;
+
+	MALI_DEBUG_ASSERT(node != NULL);
+
+	if (0 == of_property_read_u32(node, "pmu_switch_delay", &switch_delay)) {
+		return switch_delay;
+	} else {
+		MALI_DEBUG_PRINT(2, ("Couldn't find pmu_switch_delay in device tree configuration.\n"));
+	}
+
+	return 0;
+}
+
+#else /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+	uintptr_t phys_addr;
+
+	if (mali_platform_device == NULL) {
+		/* Not connected to a device */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	phys_addr = addr + _mali_osk_resource_base_address();
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_MEM == resource_type(&(mali_platform_device->resource[i])) &&
+		    mali_platform_device->resource[i].start == phys_addr) {
+			if (res != NULL) {
+				res->base = phys_addr;
+				res->description = mali_platform_device->resource[i].name;
+
+				/* Any (optional) IRQ resource belonging to this resource will follow */
+				if ((i + 1) < mali_platform_device->num_resources &&
+				    IORESOURCE_IRQ == resource_type(&(mali_platform_device->resource[i + 1]))) {
+					res->irq = mali_platform_device->resource[i + 1].start;
+				} else {
+					res->irq = -1;
+				}
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	uintptr_t lowest_addr = (uintptr_t)(0 - 1);
+	uintptr_t ret = 0;
+
+	if (mali_platform_device != NULL) {
+		int i;
+
+		for (i = 0; i < mali_platform_device->num_resources; i++) {
+			if (mali_platform_device->resource[i].flags & IORESOURCE_MEM &&
+			    mali_platform_device->resource[i].start < lowest_addr) {
+				lowest_addr = mali_platform_device->resource[i].start;
+				ret = lowest_addr;
+			}
+		}
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	_mali_osk_device_data data = { 0, };
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from platform device data.\n"));
+	if (_mali_osk_device_data_get(&data) == _MALI_OSK_ERR_OK) {
+		/* Copy the custom customer power domain config */
+		_mali_osk_memcpy(domain_config_array, data.pmu_domain_config, sizeof(data.pmu_domain_config));
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_device_data data = { 0, };
+
+	err = _mali_osk_device_data_get(&data);
+
+	if (err == _MALI_OSK_ERR_OK) {
+		return data.pmu_switch_delay;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(data);
+
+	if (mali_platform_device != NULL) {
+		struct mali_gpu_device_data *os_data = NULL;
+
+		os_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+		if (os_data != NULL) {
+			/* Copy data from OS dependant struct to Mali neutral struct (identical!) */
+			BUILD_BUG_ON(sizeof(*os_data) != sizeof(*data));
+			_mali_osk_memcpy(data, os_data, sizeof(*os_data));
+
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+u32 _mali_osk_identify_gpu_resource(void)
+{
+	if (_mali_osk_resource_find(MALI_OFFSET_L2_RESOURCE1, NULL) == _MALI_OSK_ERR_OK)
+		/* Mali 450 */
+		return 0x450;
+
+	if (_mali_osk_resource_find(MALI_OFFSET_DLBU, NULL) == _MALI_OSK_ERR_OK)
+		/* Mali 470 */
+		return 0x470;
+
+	/* Mali 400 */
+	return 0x400;
+}
+
+mali_bool _mali_osk_shared_interrupts(void)
+{
+	u32 irqs[128];
+	u32 i, j, irq, num_irqs_found = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	MALI_DEBUG_ASSERT(mali_platform_device->num_resources <= 128);
+
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_IRQ & mali_platform_device->resource[i].flags) {
+			irq = mali_platform_device->resource[i].start;
+
+			for (j = 0; j < num_irqs_found; ++j) {
+				if (irq == irqs[j]) {
+					return MALI_TRUE;
+				}
+			}
+
+			irqs[num_irqs_found++] = irq;
+		}
+	}
+
+	return MALI_FALSE;
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void)
+{
+	_mali_osk_device_data data = { 0, };
+
+	if (_mali_osk_device_data_get(&data) == _MALI_OSK_ERR_OK) {
+		if ((NULL != data.secure_mode_init) && (data.secure_mode_deinit != NULL)
+		    && (NULL != data.gpu_reset_and_secure_mode_enable) && (data.gpu_reset_and_secure_mode_disable != NULL)) {
+			int err = data.secure_mode_init();
+
+			if (err) {
+				MALI_DEBUG_PRINT(1, ("Failed to init gpu secure mode.\n"));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			mali_secure_mode_deinit = data.secure_mode_deinit;
+			mali_gpu_reset_and_secure_mode_enable = data.gpu_reset_and_secure_mode_enable;
+			mali_gpu_reset_and_secure_mode_disable = data.gpu_reset_and_secure_mode_disable;
+
+			mali_secure_mode_supported = MALI_TRUE;
+			mali_secure_mode_enabled = MALI_FALSE;
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+	MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void)
+{
+	if (mali_secure_mode_deinit != NULL) {
+		mali_secure_mode_deinit();
+		mali_secure_mode_enabled = MALI_FALSE;
+		mali_secure_mode_supported = MALI_FALSE;
+		return _MALI_OSK_ERR_OK;
+	}
+	MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_enable(void)
+{
+	/* the mali executor lock must be held before enter this function. */
+
+	MALI_DEBUG_ASSERT(mali_secure_mode_enabled == MALI_FALSE);
+
+	if (mali_gpu_reset_and_secure_mode_enable != NULL) {
+		if (mali_gpu_reset_and_secure_mode_enable()) {
+			MALI_DEBUG_PRINT(1, ("Failed to reset GPU or enable gpu secure mode.\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_secure_mode_enabled = MALI_TRUE;
+		return _MALI_OSK_ERR_OK;
+	}
+	MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+}
+
+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_disable(void)
+{
+	/* the mali executor lock must be held before enter this function. */
+
+	MALI_DEBUG_ASSERT(mali_secure_mode_enabled == MALI_TRUE);
+
+	if (mali_gpu_reset_and_secure_mode_disable != NULL) {
+		if (mali_gpu_reset_and_secure_mode_disable()) {
+			MALI_DEBUG_PRINT(1, ("Failed to reset GPU or disable gpu secure mode.\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_secure_mode_enabled = MALI_FALSE;
+
+		return _MALI_OSK_ERR_OK;
+
+	}
+	MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n"));
+	return _MALI_OSK_ERR_UNSUPPORTED;
+
+}
+
+mali_bool _mali_osk_gpu_secure_mode_is_enabled(void)
+{
+	return mali_secure_mode_enabled;
+}
+
+mali_bool _mali_osk_gpu_secure_mode_is_supported(void)
+{
+	return mali_secure_mode_supported;
+}
+
+
diff --git a/utgard/r7p0/linux/mali_osk_math.c b/utgard/r7p0/linux/mali_osk_math.c
new file mode 100644
index 0000000..e3c5dc9
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_math.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_math.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/bitops.h>
+
+u32 _mali_osk_clz(u32 input)
+{
+	return 32 - fls(input);
+}
+
+u32 _mali_osk_fls(u32 input)
+{
+	return fls(input);
+}
diff --git a/utgard/r7p0/linux/mali_osk_memory.c b/utgard/r7p0/linux/mali_osk_memory.c
new file mode 100644
index 0000000..41c7504
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_memory.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_memory.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+void inline * _mali_osk_calloc(u32 n, u32 size)
+{
+	return kcalloc(n, size, GFP_KERNEL);
+}
+
+void inline * _mali_osk_malloc(u32 size)
+{
+	return kmalloc(size, GFP_KERNEL);
+}
+
+void inline _mali_osk_free(void *ptr)
+{
+	kfree(ptr);
+}
+
+void inline * _mali_osk_valloc(u32 size)
+{
+	return vmalloc(size);
+}
+
+void inline _mali_osk_vfree(void *ptr)
+{
+	vfree(ptr);
+}
+
+void inline * _mali_osk_memcpy(void *dst, const void *src, u32  len)
+{
+	return memcpy(dst, src, len);
+}
+
+void inline * _mali_osk_memset(void *s, u32 c, u32 n)
+{
+	return memset(s, c, n);
+}
+
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated)
+{
+	/* No need to prevent an out-of-memory dialogue appearing on Linux,
+	 * so we always return MALI_TRUE.
+	 */
+	return MALI_TRUE;
+}
diff --git a/utgard/r7p0/linux/mali_osk_misc.c b/utgard/r7p0/linux/mali_osk_misc.c
new file mode 100644
index 0000000..1dc150f
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_misc.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_misc.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include "mali_osk.h"
+
+#if !defined(CONFIG_MALI_QUIET)
+void _mali_osk_dbgmsg(const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+}
+#endif /* !defined(CONFIG_MALI_QUIET) */
+
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...)
+{
+	int res;
+	va_list args;
+
+	va_start(args, fmt);
+
+	res = vscnprintf(buf, (size_t)size, fmt, args);
+
+	va_end(args);
+	return res;
+}
+
+void _mali_osk_abort(void)
+{
+	/* make a simple fault by dereferencing a NULL pointer */
+	dump_stack();
+	*(int *)0 = 0;
+}
+
+void _mali_osk_break(void)
+{
+	_mali_osk_abort();
+}
+
+u32 _mali_osk_get_pid(void)
+{
+	/* Thread group ID is the process ID on Linux */
+	return (u32)current->tgid;
+}
+
+char *_mali_osk_get_comm(void)
+{
+	return (char *)current->comm;
+}
+
+
+u32 _mali_osk_get_tid(void)
+{
+	/* pid is actually identifying the thread on Linux */
+	u32 tid = current->pid;
+
+	/* If the pid is 0 the core was idle.  Instead of returning 0 we return a special number
+	 * identifying which core we are on. */
+	if (tid == 0) {
+		tid = -(1 + raw_smp_processor_id());
+	}
+
+	return tid;
+}
diff --git a/utgard/r7p0/linux/mali_osk_notification.c b/utgard/r7p0/linux/mali_osk_notification.c
new file mode 100644
index 0000000..cc92cb7
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_notification.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_notification.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/**
+ * Declaration of the notification queue object type
+ * Contains a linked list of notification pending delivery to user space.
+ * It also contains a wait queue of exclusive waiters blocked in the ioctl
+ * When a new notification is posted a single thread is resumed.
+ */
+struct _mali_osk_notification_queue_t_struct {
+	spinlock_t mutex; /**< Mutex protecting the list */
+	wait_queue_head_t receive_queue; /**< Threads waiting for new entries to the queue */
+	struct list_head head; /**< List of notifications waiting to be picked up */
+};
+
+typedef struct _mali_osk_notification_wrapper_t_struct {
+	struct list_head list;           /**< Internal linked list variable */
+	_mali_osk_notification_t data;   /**< Notification data */
+} _mali_osk_notification_wrapper_t;
+
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void)
+{
+	_mali_osk_notification_queue_t         *result;
+
+	result = (_mali_osk_notification_queue_t *)kmalloc(sizeof(_mali_osk_notification_queue_t), GFP_KERNEL);
+	if (result == NULL) return NULL;
+
+	spin_lock_init(&result->mutex);
+	init_waitqueue_head(&result->receive_queue);
+	INIT_LIST_HEAD(&result->head);
+
+	return result;
+}
+
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size)
+{
+	/* OPT Recycling of notification objects */
+	_mali_osk_notification_wrapper_t *notification;
+
+	notification = (_mali_osk_notification_wrapper_t *)kmalloc(sizeof(_mali_osk_notification_wrapper_t) + size,
+			GFP_KERNEL | __GFP_HIGH | __GFP_REPEAT);
+	if (notification == NULL) {
+		MALI_DEBUG_PRINT(1, ("Failed to create a notification object\n"));
+		return NULL;
+	}
+
+	/* Init the list */
+	INIT_LIST_HEAD(&notification->list);
+
+	if (size != 0) {
+		notification->data.result_buffer = ((u8 *)notification) + sizeof(_mali_osk_notification_wrapper_t);
+	} else {
+		notification->data.result_buffer = NULL;
+	}
+
+	/* set up the non-allocating fields */
+	notification->data.notification_type = type;
+	notification->data.result_buffer_size = size;
+
+	/* all ok */
+	return &(notification->data);
+}
+
+void _mali_osk_notification_delete(_mali_osk_notification_t *object)
+{
+	_mali_osk_notification_wrapper_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+	/* Free the container */
+	kfree(notification);
+}
+
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue)
+{
+	_mali_osk_notification_t *result;
+
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	while (_mali_osk_notification_queue_dequeue(queue, &result) == _MALI_OSK_ERR_OK) {
+		_mali_osk_notification_delete(result);
+	}
+
+	/* not much to do, just free the memory */
+	kfree(queue);
+}
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_notification_wrapper_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	list_add_tail(&notification->list, &queue->head);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	/* and wake up one possible exclusive waiter */
+	wake_up(&queue->receive_queue);
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	_mali_osk_notification_wrapper_t *wrapper_object;
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	if (!list_empty(&queue->head)) {
+		wrapper_object = list_entry(queue->head.next, _mali_osk_notification_wrapper_t, list);
+		*result = &(wrapper_object->data);
+		list_del_init(&wrapper_object->list);
+		ret = _MALI_OSK_ERR_OK;
+	}
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(result);
+
+	/* default result */
+	*result = NULL;
+
+	if (wait_event_interruptible(queue->receive_queue,
+				     _MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, result))) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
diff --git a/utgard/r7p0/linux/mali_osk_pm.c b/utgard/r7p0/linux/mali_osk_pm.c
new file mode 100644
index 0000000..e2d9d7c
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_pm.c
@@ -0,0 +1,85 @@
+/**
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_pm.c
+ * Implementation of the callback functions from common power management
+ */
+
+#include <linux/sched.h>
+
+#include "mali_kernel_linux.h"
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_PM_RUNTIME */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/* Can NOT run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get_sync(&(mali_platform_device->dev));
+#if (KERNEL_VERSION(2, 6, 37) <= LINUX_VERSION_CODE)
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (err < 0) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get_sync() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Can run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get(&(mali_platform_device->dev));
+#if (KERNEL_VERSION(2, 6, 37) <= LINUX_VERSION_CODE)
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err && -EINPROGRESS != err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* Can run in atomic context */
+void _mali_osk_pm_dev_ref_put(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+#if (KERNEL_VERSION(2, 6, 37) <= LINUX_VERSION_CODE)
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+	pm_runtime_put_autosuspend(&(mali_platform_device->dev));
+#else
+	pm_runtime_put(&(mali_platform_device->dev));
+#endif
+#endif
+}
+
+void _mali_osk_pm_dev_barrier(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	pm_runtime_barrier(&(mali_platform_device->dev));
+#endif
+}
diff --git a/utgard/r7p0/linux/mali_osk_profiling.c b/utgard/r7p0/linux/mali_osk_profiling.c
new file mode 100644
index 0000000..b9fca0c
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_profiling.c
@@ -0,0 +1,1310 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched.h>
+
+#include <mali_profiling_gator_api.h>
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_osk_profiling.h"
+#include "mali_linux_trace.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_user_settings_db.h"
+#include "mali_executor.h"
+#include "mali_memory_manager.h"
+
+#define MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE 100
+#define MALI_PROFILING_STREAM_HOLD_TIME 1000000         /*1 ms */
+
+#define MALI_PROFILING_STREAM_BUFFER_SIZE       (1 << 12)
+#define MALI_PROFILING_STREAM_BUFFER_NUM        100
+
+/**
+ * Define the mali profiling stream struct.
+ */
+typedef struct mali_profiling_stream {
+	u8 data[MALI_PROFILING_STREAM_BUFFER_SIZE];
+	u32 used_size;
+	struct list_head list;
+} mali_profiling_stream;
+
+typedef struct mali_profiling_stream_list {
+	spinlock_t spin_lock;
+	struct list_head free_list;
+	struct list_head queue_list;
+} mali_profiling_stream_list;
+
+static const char mali_name[] = "4xx";
+static const char utgard_setup_version[] = "ANNOTATE_SETUP 1\n";
+
+static u32 profiling_sample_rate;
+static u32 first_sw_counter_index;
+
+static mali_bool l2_cache_counter_if_enabled = MALI_FALSE;
+static u32 num_counters_enabled;
+static u32 mem_counters_enabled;
+
+static _mali_osk_atomic_t stream_fd_if_used;
+
+static wait_queue_head_t stream_fd_wait_queue;
+static mali_profiling_counter *global_mali_profiling_counters;
+static u32 num_global_mali_profiling_counters;
+
+static mali_profiling_stream_list *global_mali_stream_list;
+static mali_profiling_stream *mali_counter_stream;
+static mali_profiling_stream *mali_core_activity_stream;
+static u64 mali_core_activity_stream_dequeue_time;
+static spinlock_t mali_activity_lock;
+static u32 mali_activity_cores_num;
+static struct hrtimer profiling_sampling_timer;
+
+const char *_mali_mem_counter_descriptions[] = _MALI_MEM_COUTNER_DESCRIPTIONS;
+const char *_mali_special_counter_descriptions[] = _MALI_SPCIAL_COUNTER_DESCRIPTIONS;
+
+static u32 current_profiling_pid;
+
+static void _mali_profiling_stream_list_destory(mali_profiling_stream_list *profiling_stream_list)
+{
+	mali_profiling_stream *profiling_stream, *tmp_profiling_stream;
+
+	MALI_DEBUG_ASSERT_POINTER(profiling_stream_list);
+
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->free_list, list) {
+		list_del(&profiling_stream->list);
+		kfree(profiling_stream);
+	}
+
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->queue_list, list) {
+		list_del(&profiling_stream->list);
+		kfree(profiling_stream);
+	}
+
+	kfree(profiling_stream_list);
+}
+
+static void _mali_profiling_global_stream_list_free(void)
+{
+	mali_profiling_stream *profiling_stream, *tmp_profiling_stream;
+	unsigned long irq_flags;
+
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &global_mali_stream_list->queue_list, list) {
+		profiling_stream->used_size = 0;
+		list_move(&profiling_stream->list, &global_mali_stream_list->free_list);
+	}
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+}
+
+static _mali_osk_errcode_t _mali_profiling_global_stream_list_dequeue(struct list_head *stream_list, mali_profiling_stream **new_mali_profiling_stream)
+{
+	unsigned long irq_flags;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	MALI_DEBUG_ASSERT_POINTER(stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+
+	if (!list_empty(stream_list)) {
+		*new_mali_profiling_stream = list_entry(stream_list->next, mali_profiling_stream, list);
+		list_del_init(&(*new_mali_profiling_stream)->list);
+	} else {
+		ret = _MALI_OSK_ERR_NOMEM;
+	}
+
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+
+	return ret;
+}
+
+static void _mali_profiling_global_stream_list_queue(struct list_head *stream_list, mali_profiling_stream *current_mali_profiling_stream)
+{
+	unsigned long irq_flags;
+
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	MALI_DEBUG_ASSERT_POINTER(stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	list_add_tail(&current_mali_profiling_stream->list, stream_list);
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+}
+
+static mali_bool _mali_profiling_global_stream_queue_list_if_empty(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	return list_empty(&global_mali_stream_list->queue_list);
+}
+
+static u32 _mali_profiling_global_stream_queue_list_next_size(void)
+{
+	unsigned long irq_flags;
+	u32 size = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	if (!list_empty(&global_mali_stream_list->queue_list)) {
+		mali_profiling_stream *next_mali_profiling_stream =
+			list_entry(global_mali_stream_list->queue_list.next, mali_profiling_stream, list);
+		size = next_mali_profiling_stream->used_size;
+	}
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+	return size;
+}
+
+/* The mali profiling stream file operations functions. */
+static ssize_t _mali_profiling_stream_read(
+	struct file *filp,
+	char __user *buffer,
+	size_t      size,
+	loff_t      *f_pos);
+
+static unsigned int  _mali_profiling_stream_poll(struct file *filp, poll_table *wait);
+
+static int  _mali_profiling_stream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations mali_profiling_stream_fops = {
+	.release = _mali_profiling_stream_release,
+	.read    = _mali_profiling_stream_read,
+	.poll    = _mali_profiling_stream_poll,
+};
+
+static ssize_t _mali_profiling_stream_read(
+	struct file *filp,
+	char __user *buffer,
+	size_t      size,
+	loff_t      *f_pos)
+{
+	u32 copy_len = 0;
+	mali_profiling_stream *current_mali_profiling_stream;
+	u32 used_size;
+
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	while (!_mali_profiling_global_stream_queue_list_if_empty()) {
+		used_size = _mali_profiling_global_stream_queue_list_next_size();
+		if (used_size <= ((u32)size - copy_len)) {
+			current_mali_profiling_stream = NULL;
+			_mali_profiling_global_stream_list_dequeue(&global_mali_stream_list->queue_list,
+					&current_mali_profiling_stream);
+			MALI_DEBUG_ASSERT_POINTER(current_mali_profiling_stream);
+			if (copy_to_user(&buffer[copy_len], current_mali_profiling_stream->data, current_mali_profiling_stream->used_size)) {
+				current_mali_profiling_stream->used_size = 0;
+				_mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream);
+				return -EFAULT;
+			}
+			copy_len += current_mali_profiling_stream->used_size;
+			current_mali_profiling_stream->used_size = 0;
+			_mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream);
+		} else {
+			break;
+		}
+	}
+	return (ssize_t)copy_len;
+}
+
+static unsigned int  _mali_profiling_stream_poll(struct file *filp, poll_table *wait)
+{
+	poll_wait(filp, &stream_fd_wait_queue, wait);
+	if (!_mali_profiling_global_stream_queue_list_if_empty())
+		return POLLIN;
+	return 0;
+}
+
+static int  _mali_profiling_stream_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_atomic_init(&stream_fd_if_used, 0);
+	return 0;
+}
+
+/* The funs for control packet and stream data.*/
+static void _mali_profiling_set_packet_size(unsigned char *const buf, const u32 size)
+{
+	u32 i;
+
+	for (i = 0; i < sizeof(size); ++i)
+		buf[i] = (size >> 8 * i) & 0xFF;
+}
+
+static u32 _mali_profiling_get_packet_size(unsigned char *const buf)
+{
+	u32 i;
+	u32 size = 0;
+
+	for (i = 0; i < sizeof(size); ++i)
+		size |= (u32)buf[i] << 8 * i;
+	return size;
+}
+
+static u32 _mali_profiling_read_packet_int(unsigned char *const buf, u32 *const pos, u32 const packet_size)
+{
+	u64 int_value = 0;
+	u8 shift = 0;
+	u8 byte_value = ~0;
+
+	while ((byte_value & 0x80) != 0) {
+		if ((*pos) >= packet_size)
+			return -1;
+		byte_value = buf[*pos];
+		*pos += 1;
+		int_value |= (u32)(byte_value & 0x7f) << shift;
+		shift += 7;
+	}
+
+	if (shift < 8 * sizeof(int_value) && (byte_value & 0x40) != 0) {
+		int_value |= -(1 << shift);
+	}
+
+	return int_value;
+}
+
+static u32 _mali_profiling_pack_int(u8 *const buf, u32 const buf_size, u32 const pos, s32 value)
+{
+	u32 add_bytes = 0;
+	int more = 1;
+
+	while (more) {
+		/* low order 7 bits of val */
+		char byte_value = value & 0x7f;
+
+		value >>= 7;
+
+		if ((value == 0 && (byte_value & 0x40) == 0) || (value == -1 && (byte_value & 0x40) != 0)) {
+			more = 0;
+		} else {
+			byte_value |= 0x80;
+		}
+
+		if ((pos + add_bytes) >= buf_size)
+			return 0;
+		buf[pos + add_bytes] = byte_value;
+		add_bytes++;
+	}
+
+	return add_bytes;
+}
+
+static int _mali_profiling_pack_long(uint8_t *const buf, u32 const buf_size, u32 const pos, s64 val)
+{
+	int add_bytes = 0;
+	int more = 1;
+
+	while (more) {
+		/* low order 7 bits of x */
+		char byte_value = val & 0x7f;
+
+		val >>= 7;
+
+		if ((val == 0 && (byte_value & 0x40) == 0) || (val == -1 && (byte_value & 0x40) != 0)) {
+			more = 0;
+		} else {
+			byte_value |= 0x80;
+		}
+
+		MALI_DEBUG_ASSERT((pos + add_bytes) < buf_size);
+		buf[pos + add_bytes] = byte_value;
+		add_bytes++;
+	}
+
+	return add_bytes;
+}
+
+static void _mali_profiling_stream_add_counter(mali_profiling_stream *profiling_stream, s64 current_time, u32 key, u32 counter_value)
+{
+	u32 add_size = STREAM_HEADER_SIZE;
+
+	MALI_DEBUG_ASSERT_POINTER(profiling_stream);
+	MALI_DEBUG_ASSERT((profiling_stream->used_size) < MALI_PROFILING_STREAM_BUFFER_SIZE);
+
+	profiling_stream->data[profiling_stream->used_size] = STREAM_HEADER_COUNTER_VALUE;
+
+	add_size += _mali_profiling_pack_long(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					      profiling_stream->used_size + add_size, current_time);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)0);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)key);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)counter_value);
+
+	_mali_profiling_set_packet_size(profiling_stream->data + profiling_stream->used_size + 1,
+					add_size - STREAM_HEADER_SIZE);
+
+	profiling_stream->used_size += add_size;
+}
+
+/* The callback function for sampling timer.*/
+static enum hrtimer_restart  _mali_profiling_sampling_counters(struct hrtimer *timer)
+{
+	u32 counter_index;
+	s64 current_time;
+
+	MALI_DEBUG_ASSERT_POINTER(global_mali_profiling_counters);
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	MALI_DEBUG_ASSERT(mali_counter_stream == NULL);
+	if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue(
+		    &global_mali_stream_list->free_list, &mali_counter_stream)) {
+
+		MALI_DEBUG_ASSERT_POINTER(mali_counter_stream);
+		MALI_DEBUG_ASSERT(mali_counter_stream->used_size == 0);
+
+		/* Capture l2 cache counter values if enabled */
+		if (l2_cache_counter_if_enabled == MALI_TRUE) {
+			int i, j = 0;
+			_mali_profiling_l2_counter_values l2_counters_values;
+
+			_mali_profiling_get_l2_counters(&l2_counters_values);
+
+			for (i  = COUNTER_L2_0_C0; i <= COUNTER_L2_2_C1; i++) {
+				if (0 == (j % 2))
+					_mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value0);
+				else
+					_mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value1);
+				j++;
+			}
+		}
+
+		current_time = (s64)_mali_osk_boot_time_get_ns();
+
+		/* Add all enabled counter values into stream */
+		for (counter_index = 0; counter_index < num_global_mali_profiling_counters; counter_index++) {
+			/* No need to sample these couners here. */
+			if (global_mali_profiling_counters[counter_index].enabled) {
+				if ((global_mali_profiling_counters[counter_index].counter_id >= FIRST_MEM_COUNTER &&
+				     global_mali_profiling_counters[counter_index].counter_id <= LAST_MEM_COUNTER)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_VP_ACTIVITY)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FP_ACTIVITY)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FILMSTRIP)) {
+
+					continue;
+				}
+
+				if (global_mali_profiling_counters[counter_index].counter_id >= COUNTER_L2_0_C0 &&
+				    global_mali_profiling_counters[counter_index].counter_id <= COUNTER_L2_2_C1) {
+
+					u32 prev_val = global_mali_profiling_counters[counter_index].prev_counter_value;
+
+					_mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key,
+									   global_mali_profiling_counters[counter_index].current_counter_value - prev_val);
+
+					prev_val = global_mali_profiling_counters[counter_index].current_counter_value;
+
+					global_mali_profiling_counters[counter_index].prev_counter_value = prev_val;
+				} else {
+
+					if (global_mali_profiling_counters[counter_index].counter_id == COUNTER_TOTAL_ALLOC_PAGES) {
+						u32 total_alloc_mem = _mali_ukk_report_memory_usage();
+
+						global_mali_profiling_counters[counter_index].current_counter_value = total_alloc_mem / _MALI_OSK_MALI_PAGE_SIZE;
+					}
+					_mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key,
+									   global_mali_profiling_counters[counter_index].current_counter_value);
+					if (global_mali_profiling_counters[counter_index].counter_id < FIRST_SPECIAL_COUNTER)
+						global_mali_profiling_counters[counter_index].current_counter_value = 0;
+				}
+			}
+		}
+		_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_counter_stream);
+		mali_counter_stream = NULL;
+	} else {
+		MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n"));
+	}
+
+	wake_up_interruptible(&stream_fd_wait_queue);
+
+	/*Enable the sampling timer again*/
+	if (0 != num_counters_enabled && 0 != profiling_sample_rate) {
+		hrtimer_forward_now(&profiling_sampling_timer, ns_to_ktime(profiling_sample_rate));
+		return HRTIMER_RESTART;
+	}
+	return HRTIMER_NORESTART;
+}
+
+static void _mali_profiling_sampling_core_activity_switch(int counter_id, int core, u32 activity, u32 pid)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&mali_activity_lock, irq_flags);
+	if (activity == 0)
+		mali_activity_cores_num--;
+	else
+		mali_activity_cores_num++;
+	spin_unlock_irqrestore(&mali_activity_lock, irq_flags);
+
+	if (global_mali_profiling_counters != NULL) {
+		int i;
+
+		for (i = 0; i < num_global_mali_profiling_counters; i++) {
+			if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) {
+				u64 current_time = _mali_osk_boot_time_get_ns();
+				u32 add_size = STREAM_HEADER_SIZE;
+
+				if (mali_core_activity_stream != NULL) {
+					if ((mali_core_activity_stream_dequeue_time +  MALI_PROFILING_STREAM_HOLD_TIME < current_time) ||
+					    (MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE > MALI_PROFILING_STREAM_BUFFER_SIZE
+					     - mali_core_activity_stream->used_size)) {
+						_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream);
+						mali_core_activity_stream = NULL;
+						wake_up_interruptible(&stream_fd_wait_queue);
+					}
+				}
+
+				if (mali_core_activity_stream == NULL) {
+					if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue(
+						    &global_mali_stream_list->free_list, &mali_core_activity_stream)) {
+						mali_core_activity_stream_dequeue_time = current_time;
+					} else {
+						MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n"));
+						wake_up_interruptible(&stream_fd_wait_queue);
+						break;
+					}
+
+				}
+
+				mali_core_activity_stream->data[mali_core_activity_stream->used_size] = STREAM_HEADER_CORE_ACTIVITY;
+
+				add_size += _mali_profiling_pack_long(mali_core_activity_stream->data,
+								      MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s64)current_time);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, core);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s32)global_mali_profiling_counters[i].key);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, activity);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, pid);
+
+				_mali_profiling_set_packet_size(mali_core_activity_stream->data + mali_core_activity_stream->used_size + 1,
+								add_size - STREAM_HEADER_SIZE);
+
+				mali_core_activity_stream->used_size += add_size;
+
+				if (mali_activity_cores_num == 0) {
+					_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream);
+					mali_core_activity_stream = NULL;
+					wake_up_interruptible(&stream_fd_wait_queue);
+				}
+
+				break;
+			}
+		}
+	}
+}
+
+static mali_bool _mali_profiling_global_counters_init(void)
+{
+	int core_id, counter_index, counter_number, counter_id;
+	u32 num_l2_cache_cores;
+	u32 num_pp_cores;
+	u32 num_gp_cores = 1;
+
+	MALI_DEBUG_ASSERT(global_mali_profiling_counters == NULL);
+	num_pp_cores = mali_pp_get_glob_num_pp_cores();
+	num_l2_cache_cores =    mali_l2_cache_core_get_glob_num_l2_cores();
+
+	num_global_mali_profiling_counters = 3 * (num_gp_cores + num_pp_cores) + 2 * num_l2_cache_cores
+					     + MALI_PROFILING_SW_COUNTERS_NUM
+					     + MALI_PROFILING_SPECIAL_COUNTERS_NUM
+					     + MALI_PROFILING_MEM_COUNTERS_NUM;
+	global_mali_profiling_counters = _mali_osk_calloc(num_global_mali_profiling_counters, sizeof(mali_profiling_counter));
+
+	if (global_mali_profiling_counters == NULL)
+		return MALI_FALSE;
+
+	counter_index = 0;
+	/*Vertex processor counters */
+	for (core_id = 0; core_id < num_gp_cores; core_id++) {
+		global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_VP_0 + core_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_active", mali_name, core_id);
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* Fragment processors' counters */
+	for (core_id = 0; core_id < num_pp_cores; core_id++) {
+		counter_index++;
+		global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_FP_0 + core_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_active", mali_name, core_id);
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* L2 Cache counters */
+	for (core_id = 0; core_id < num_l2_cache_cores; core_id++) {
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* Now set up the software counter entries */
+	for (counter_id = FIRST_SW_COUNTER; counter_id <= LAST_SW_COUNTER; counter_id++) {
+		counter_index++;
+
+		if (first_sw_counter_index == 0)
+			first_sw_counter_index = counter_index;
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_SW_%d", mali_name, counter_id - FIRST_SW_COUNTER);
+	}
+
+	/* Now set up the special counter entries */
+	for (counter_id = FIRST_SPECIAL_COUNTER; counter_id <= LAST_SPECIAL_COUNTER; counter_id++) {
+
+		counter_index++;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s",
+				   mali_name, _mali_special_counter_descriptions[counter_id - FIRST_SPECIAL_COUNTER]);
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+	}
+
+	/* Now set up the mem counter entries*/
+	for (counter_id = FIRST_MEM_COUNTER; counter_id <= LAST_MEM_COUNTER; counter_id++) {
+
+		counter_index++;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s",
+				   mali_name, _mali_mem_counter_descriptions[counter_id - FIRST_MEM_COUNTER]);
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+	}
+
+	MALI_DEBUG_ASSERT((counter_index + 1) == num_global_mali_profiling_counters);
+
+	return MALI_TRUE;
+}
+
+void _mali_profiling_notification_mem_counter(struct mali_session_data *session, u32 counter_id, u32 key, int enable)
+{
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (session != NULL) {
+		_mali_osk_notification_t *notification;
+		_mali_osk_notification_queue_t *queue;
+
+		queue = session->ioctl_queue;
+		MALI_DEBUG_ASSERT(queue != NULL);
+
+		notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER,
+				sizeof(_mali_uk_annotate_profiling_mem_counter_s));
+
+		if (notification != NULL) {
+			_mali_uk_annotate_profiling_mem_counter_s *data = notification->result_buffer;
+
+			data->counter_id = counter_id;
+			data->key = key;
+			data->enable = enable;
+
+			_mali_osk_notification_queue_send(queue, notification);
+		} else {
+			MALI_PRINT_ERROR(("Failed to create notification object!\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("Failed to find the right session!\n"));
+	}
+}
+
+void _mali_profiling_notification_enable(struct mali_session_data *session, u32 sampling_rate, int enable)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (session != NULL) {
+		_mali_osk_notification_t *notification;
+		_mali_osk_notification_queue_t *queue;
+
+		queue = session->ioctl_queue;
+		MALI_DEBUG_ASSERT(queue != NULL);
+
+		notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE,
+				sizeof(_mali_uk_annotate_profiling_enable_s));
+
+		if (notification != NULL) {
+			_mali_uk_annotate_profiling_enable_s *data = notification->result_buffer;
+
+			data->sampling_rate = sampling_rate;
+			data->enable = enable;
+
+			_mali_osk_notification_queue_send(queue, notification);
+		} else {
+			MALI_PRINT_ERROR(("Failed to create notification object!\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("Failed to find the right session!\n"));
+	}
+}
+
+
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start)
+{
+	int i;
+	mali_profiling_stream *new_mali_profiling_stream = NULL;
+	mali_profiling_stream_list *new_mali_profiling_stream_list = NULL;
+
+	if (auto_start == MALI_TRUE) {
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+	}
+
+	/*Init the global_mali_stream_list*/
+	MALI_DEBUG_ASSERT(global_mali_stream_list == NULL);
+	new_mali_profiling_stream_list = (mali_profiling_stream_list *)kmalloc(sizeof(mali_profiling_stream_list), GFP_KERNEL);
+
+	if (new_mali_profiling_stream_list == NULL) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	spin_lock_init(&new_mali_profiling_stream_list->spin_lock);
+	INIT_LIST_HEAD(&new_mali_profiling_stream_list->free_list);
+	INIT_LIST_HEAD(&new_mali_profiling_stream_list->queue_list);
+
+	spin_lock_init(&mali_activity_lock);
+	mali_activity_cores_num =  0;
+
+	for (i = 0; i < MALI_PROFILING_STREAM_BUFFER_NUM; i++) {
+		new_mali_profiling_stream = (mali_profiling_stream *)kmalloc(sizeof(mali_profiling_stream), GFP_KERNEL);
+		if (new_mali_profiling_stream == NULL) {
+			_mali_profiling_stream_list_destory(new_mali_profiling_stream_list);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+
+		INIT_LIST_HEAD(&new_mali_profiling_stream->list);
+		new_mali_profiling_stream->used_size = 0;
+		list_add_tail(&new_mali_profiling_stream->list, &new_mali_profiling_stream_list->free_list);
+
+	}
+
+	_mali_osk_atomic_init(&stream_fd_if_used, 0);
+	init_waitqueue_head(&stream_fd_wait_queue);
+
+	hrtimer_init(&profiling_sampling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	profiling_sampling_timer.function = _mali_profiling_sampling_counters;
+
+	global_mali_stream_list = new_mali_profiling_stream_list;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_profiling_term(void)
+{
+	if (profiling_sample_rate != 0) {
+		hrtimer_cancel(&profiling_sampling_timer);
+		profiling_sample_rate = 0;
+	}
+	_mali_osk_atomic_term(&stream_fd_if_used);
+
+	if (global_mali_profiling_counters != NULL) {
+		_mali_osk_free(global_mali_profiling_counters);
+		global_mali_profiling_counters = NULL;
+		num_global_mali_profiling_counters = 0;
+	}
+
+	if (global_mali_stream_list != NULL) {
+		_mali_profiling_stream_list_destory(global_mali_stream_list);
+		global_mali_stream_list = NULL;
+	}
+
+}
+
+void _mali_osk_profiling_stop_sampling(u32 pid)
+{
+	if (pid == current_profiling_pid) {
+
+		int i;
+		/* Reset all counter states when closing connection.*/
+		for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+			_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER);
+			global_mali_profiling_counters[i].enabled = 0;
+			global_mali_profiling_counters[i].prev_counter_value = 0;
+			global_mali_profiling_counters[i].current_counter_value = 0;
+		}
+		l2_cache_counter_if_enabled = MALI_FALSE;
+		num_counters_enabled = 0;
+		mem_counters_enabled = 0;
+		_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0);
+		_mali_profiling_control(SW_COUNTER_ENABLE, 0);
+		/* Delete sampling timer when closing connection. */
+		if (profiling_sample_rate != 0) {
+			hrtimer_cancel(&profiling_sampling_timer);
+			profiling_sample_rate = 0;
+		}
+		current_profiling_pid = 0;
+	}
+}
+
+void    _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	/*Record the freq & volt to global_mali_profiling_counters here. */
+	if (profiling_sample_rate != 0) {
+		u32 channel;
+		u32 state;
+
+		channel = (event_id >> 16) & 0xFF;
+		state = ((event_id >> 24) & 0xF) << 24;
+
+		switch (state) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GPU >> 16) == channel) {
+				u32 reason = (event_id & 0xFFFF);
+
+				if (reason == MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE) {
+					_mali_osk_profiling_record_global_counters(COUNTER_FREQUENCY, data0);
+					_mali_osk_profiling_record_global_counters(COUNTER_VOLTAGE, data1);
+				}
+			}
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) {
+				_mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 1, data1);
+			} else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) &&
+				   (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) {
+				u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16);
+
+				_mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 1, data1);
+			}
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) {
+				_mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 0, 0);
+			} else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) &&
+				   (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) {
+				u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16);
+
+				_mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 0, 0);
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	trace_mali_timeline_event(event_id, data0, data1, data2, data3, data4);
+}
+
+void _mali_osk_profiling_report_sw_counters(u32 *counters)
+{
+	trace_mali_sw_counters(_mali_osk_get_pid(), _mali_osk_get_tid(), NULL, counters);
+}
+
+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value)
+{
+	if (global_mali_profiling_counters != NULL) {
+		int i;
+
+		for (i = 0; i < num_global_mali_profiling_counters; i++) {
+			if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) {
+				global_mali_profiling_counters[i].current_counter_value = value;
+				break;
+			}
+		}
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args)
+{
+	/* Always add process and thread identificator in the first two data elements for events from user space */
+	_mali_osk_profiling_add_event(args->event_id, _mali_osk_get_pid(), _mali_osk_get_tid(), args->data[2], args->data[3], args->data[4]);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args)
+{
+	u32 *counters = (u32 *)(uintptr_t)args->counters;
+
+	_mali_osk_profiling_report_sw_counters(counters);
+
+	if (global_mali_profiling_counters != NULL) {
+		int i;
+
+		for (i = 0; i < MALI_PROFILING_SW_COUNTERS_NUM; i++) {
+			if (global_mali_profiling_counters[first_sw_counter_index + i].enabled) {
+				global_mali_profiling_counters[first_sw_counter_index + i].current_counter_value = *(counters + i);
+			}
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (_mali_osk_atomic_inc_return(&stream_fd_if_used) == 1) {
+
+		s32 fd = anon_inode_getfd("[mali_profiling_stream]", &mali_profiling_stream_fops,
+					  session,
+					  O_RDONLY | O_CLOEXEC);
+
+		args->stream_fd = fd;
+		if (fd < 0) {
+			_mali_osk_atomic_dec(&stream_fd_if_used);
+			return _MALI_OSK_ERR_FAULT;
+		}
+		args->stream_fd = fd;
+	} else {
+		_mali_osk_atomic_dec(&stream_fd_if_used);
+		args->stream_fd = -1;
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args)
+{
+	u32 control_packet_size;
+	u32 output_buffer_size;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL == global_mali_profiling_counters && MALI_FALSE == _mali_profiling_global_counters_init()) {
+		MALI_PRINT_ERROR(("Failed to create global_mali_profiling_counters.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	control_packet_size = args->control_packet_size;
+	output_buffer_size = args->response_packet_size;
+
+	if (control_packet_size != 0) {
+		u8 control_type;
+		u8 *control_packet_data;
+		u8 *response_packet_data;
+		u32 version_length = sizeof(utgard_setup_version) - 1;
+
+		control_packet_data = (u8 *)(uintptr_t)args->control_packet_data;
+		MALI_DEBUG_ASSERT_POINTER(control_packet_data);
+		response_packet_data = (u8 *)(uintptr_t)args->response_packet_data;
+		MALI_DEBUG_ASSERT_POINTER(response_packet_data);
+
+		/*Decide if need to ignore Utgard setup version.*/
+		if (control_packet_size >= version_length) {
+			if (memcmp(control_packet_data, utgard_setup_version, version_length) == 0) {
+				if (control_packet_size == version_length) {
+					args->response_packet_size = 0;
+					return _MALI_OSK_ERR_OK;
+				} else {
+					control_packet_data += version_length;
+					control_packet_size -= version_length;
+				}
+			}
+		}
+
+		current_profiling_pid = _mali_osk_get_pid();
+
+		control_type = control_packet_data[0];
+		switch (control_type) {
+		case PACKET_HEADER_COUNTERS_REQUEST: {
+			int i;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size, type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Send supported counters */
+			if (output_buffer_size < PACKET_HEADER_SIZE)
+				return _MALI_OSK_ERR_FAULT;
+
+			*response_packet_data = PACKET_HEADER_COUNTERS_ACK;
+			args->response_packet_size = PACKET_HEADER_SIZE;
+
+			for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+				u32 name_size = strlen(global_mali_profiling_counters[i].counter_name);
+
+				if ((args->response_packet_size + name_size + 1) > output_buffer_size) {
+					MALI_PRINT_ERROR(("Response packet data is too large..\n"));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				memcpy(response_packet_data + args->response_packet_size,
+				       global_mali_profiling_counters[i].counter_name, name_size + 1);
+
+				args->response_packet_size += (name_size + 1);
+
+				if (global_mali_profiling_counters[i].counter_id == COUNTER_VP_ACTIVITY) {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32)1);
+				} else if (global_mali_profiling_counters[i].counter_id == COUNTER_FP_ACTIVITY) {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32)mali_pp_get_glob_num_pp_cores());
+				} else {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32) -1);
+				}
+			}
+
+			_mali_profiling_set_packet_size(response_packet_data + 1, args->response_packet_size);
+			break;
+		}
+
+		case PACKET_HEADER_COUNTERS_ENABLE: {
+			int i;
+			u32 request_pos = PACKET_HEADER_SIZE;
+			mali_bool sw_counter_if_enabled = MALI_FALSE;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Init all counter states before enable requested counters.*/
+			for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+				_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER);
+				global_mali_profiling_counters[i].enabled = 0;
+				global_mali_profiling_counters[i].prev_counter_value = 0;
+				global_mali_profiling_counters[i].current_counter_value = 0;
+
+				if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER &&
+				    global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) {
+					_mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id, 0, 0);
+				}
+			}
+
+			l2_cache_counter_if_enabled = MALI_FALSE;
+			num_counters_enabled = 0;
+			mem_counters_enabled = 0;
+			_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0);
+			_mali_profiling_control(SW_COUNTER_ENABLE, 0);
+			_mali_profiling_notification_enable(session, 0, 0);
+
+			/* Enable requested counters */
+			while (request_pos < control_packet_size) {
+				u32 begin = request_pos;
+				u32 event;
+				u32 key;
+
+				/* Check the counter name which should be ended with null */
+				while (request_pos < control_packet_size && control_packet_data[request_pos] != '\0') {
+					++request_pos;
+				}
+
+				if (request_pos >= control_packet_size)
+					return _MALI_OSK_ERR_FAULT;
+
+				++request_pos;
+				event = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+				key = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+
+				for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+					u32 name_size = strlen((char *)(control_packet_data + begin));
+
+					if (strncmp(global_mali_profiling_counters[i].counter_name, (char *)(control_packet_data + begin), name_size) == 0) {
+						if (!sw_counter_if_enabled && (FIRST_SW_COUNTER <= global_mali_profiling_counters[i].counter_id
+									       && global_mali_profiling_counters[i].counter_id <= LAST_SW_COUNTER)) {
+							sw_counter_if_enabled = MALI_TRUE;
+							_mali_profiling_control(SW_COUNTER_ENABLE, 1);
+						}
+
+						if (global_mali_profiling_counters[i].counter_id == COUNTER_FILMSTRIP) {
+							_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 1);
+							_mali_profiling_control(FBDUMP_CONTROL_RATE, event & 0xff);
+							_mali_profiling_control(FBDUMP_CONTROL_RESIZE_FACTOR, (event >> 8) & 0xff);
+						}
+
+						if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER &&
+						    global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) {
+							_mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id,
+									key, 1);
+							mem_counters_enabled++;
+						}
+
+						global_mali_profiling_counters[i].counter_event = event;
+						global_mali_profiling_counters[i].key = key;
+						global_mali_profiling_counters[i].enabled = 1;
+
+						_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id,
+									  global_mali_profiling_counters[i].counter_event);
+						num_counters_enabled++;
+						break;
+					}
+				}
+
+				if (i == num_global_mali_profiling_counters) {
+					MALI_PRINT_ERROR(("Counter name does not match for type %u.\n", control_type));
+					return _MALI_OSK_ERR_FAULT;
+				}
+			}
+
+			if (output_buffer_size >= PACKET_HEADER_SIZE) {
+				*response_packet_data = PACKET_HEADER_ACK;
+				_mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE);
+				args->response_packet_size = PACKET_HEADER_SIZE;
+			} else {
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			break;
+		}
+
+		case PACKET_HEADER_START_CAPTURE_VALUE: {
+			u32 live_rate;
+			u32 request_pos = PACKET_HEADER_SIZE;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Read samping rate in nanoseconds and live rate, start capture.*/
+			profiling_sample_rate =  _mali_profiling_read_packet_int(control_packet_data,
+						 &request_pos, control_packet_size);
+
+			live_rate = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+
+			if (output_buffer_size >= PACKET_HEADER_SIZE) {
+				*response_packet_data = PACKET_HEADER_ACK;
+				_mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE);
+				args->response_packet_size = PACKET_HEADER_SIZE;
+			} else {
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			if (0 != num_counters_enabled && 0 != profiling_sample_rate) {
+				_mali_profiling_global_stream_list_free();
+				if (mem_counters_enabled > 0) {
+					_mali_profiling_notification_enable(session, profiling_sample_rate, 1);
+				}
+				hrtimer_start(&profiling_sampling_timer,
+					      ktime_set(profiling_sample_rate / 1000000000, profiling_sample_rate % 1000000000),
+					      HRTIMER_MODE_REL_PINNED);
+			}
+
+			break;
+		}
+		default:
+			MALI_PRINT_ERROR(("Unsupported  profiling packet header type %u.\n", control_type));
+			args->response_packet_size  = 0;
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else {
+		_mali_osk_profiling_stop_sampling(current_profiling_pid);
+		_mali_profiling_notification_enable(session, 0, 0);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Called by gator.ko to set HW counters
+ *
+ * @param counter_id The counter ID.
+ * @param event_id Event ID that the counter should count (HW counter value from TRM).
+ *
+ * @return 1 on success, 0 on failure.
+ */
+int _mali_profiling_set_event(u32 counter_id, s32 event_id)
+{
+	if (counter_id == COUNTER_VP_0_C0) {
+		mali_gp_job_set_gp_counter_src0(event_id);
+	} else if (counter_id == COUNTER_VP_0_C1) {
+		mali_gp_job_set_gp_counter_src1(event_id);
+	} else if (COUNTER_FP_0_C0 <= counter_id && COUNTER_FP_7_C1 >= counter_id) {
+		/*
+		 * Two compatibility notes for this function:
+		 *
+		 * 1) Previously the DDK allowed per core counters.
+		 *
+		 *    This did not make much sense on Mali-450 with the "virtual PP core" concept,
+		 *    so this option was removed, and only the same pair of HW counters was allowed on all cores,
+		 *    beginning with r3p2 release.
+		 *
+		 *    Starting with r4p0, it is now possible to set different HW counters for the different sub jobs.
+		 *    This should be almost the same, since sub job 0 is designed to run on core 0,
+		 *    sub job 1 on core 1, and so on.
+		 *
+		 *    The scheduling of PP sub jobs is not predictable, and this often led to situations where core 0 ran 2
+		 *    sub jobs, while for instance core 1 ran zero. Having the counters set per sub job would thus increase
+		 *    the predictability of the returned data (as you would be guaranteed data for all the selected HW counters).
+		 *
+		 *    PS: Core scaling needs to be disabled in order to use this reliably (goes for both solutions).
+		 *
+		 *    The framework/#defines with Gator still indicates that the counter is for a particular core,
+		 *    but this is internally used as a sub job ID instead (no translation needed).
+		 *
+		 *  2) Global/default vs per sub job counters
+		 *
+		 *     Releases before r3p2 had only per PP core counters.
+		 *     r3p2 releases had only one set of default/global counters which applied to all PP cores
+		 *     Starting with r4p0, we have both a set of default/global counters,
+		 *     and individual counters per sub job (equal to per core).
+		 *
+		 *     To keep compatibility with Gator/DS-5/streamline, the following scheme is used:
+		 *
+		 *     r3p2 release; only counters set for core 0 is handled,
+		 *     this is applied as the default/global set of counters, and will thus affect all cores.
+		 *
+		 *     r4p0 release; counters set for core 0 is applied as both the global/default set of counters,
+		 *     and counters for sub job 0.
+		 *     Counters set for core 1-7 is only applied for the corresponding sub job.
+		 *
+		 *     This should allow the DS-5/Streamline GUI to have a simple mode where it only allows setting the
+		 *     values for core 0, and thus this will be applied to all PP sub jobs/cores.
+		 *     Advanced mode will also be supported, where individual pairs of HW counters can be selected.
+		 *
+		 *     The GUI will (until it is updated) still refer to cores instead of sub jobs, but this is probably
+		 *     something we can live with!
+		 *
+		 *     Mali-450 note: Each job is not divided into a deterministic number of sub jobs, as the HW DLBU
+		 *     automatically distributes the load between whatever number of cores is available at this particular time.
+		 *     A normal PP job on Mali-450 is thus considered a single (virtual) job, and it will thus only be possible
+		 *     to use a single pair of HW counters (even if the job ran on multiple PP cores).
+		 *     In other words, only the global/default pair of PP HW counters will be used for normal Mali-450 jobs.
+		 */
+		u32 sub_job = (counter_id - COUNTER_FP_0_C0) >> 1;
+		u32 counter_src = (counter_id - COUNTER_FP_0_C0) & 1;
+
+		if (counter_src == 0) {
+			mali_pp_job_set_pp_counter_sub_job_src0(sub_job, event_id);
+			if (sub_job == 0) {
+				mali_pp_job_set_pp_counter_global_src0(event_id);
+			}
+		} else {
+			mali_pp_job_set_pp_counter_sub_job_src1(sub_job, event_id);
+			if (sub_job == 0) {
+				mali_pp_job_set_pp_counter_global_src1(event_id);
+			}
+		}
+	} else if (COUNTER_L2_0_C0 <= counter_id && COUNTER_L2_2_C1 >= counter_id) {
+		u32 core_id = (counter_id - COUNTER_L2_0_C0) >> 1;
+		struct mali_l2_cache_core *l2_cache_core = mali_l2_cache_core_get_glob_l2_core(core_id);
+
+		if (l2_cache_core != NULL) {
+			u32 counter_src = (counter_id - COUNTER_L2_0_C0) & 1;
+
+			mali_l2_cache_core_set_counter_src(l2_cache_core,
+							   counter_src, event_id);
+			l2_cache_counter_if_enabled = MALI_TRUE;
+		}
+	} else {
+		return 0; /* Failure, unknown event */
+	}
+
+	return 1; /* success */
+}
+
+/**
+ * Called by gator.ko to retrieve the L2 cache counter values for all L2 cache cores.
+ * The L2 cache counters are unique in that they are polled by gator, rather than being
+ * transmitted via the tracepoint mechanism.
+ *
+ * @param values Pointer to a _mali_profiling_l2_counter_values structure where
+ *               the counter sources and values will be output
+ * @return 0 if all went well; otherwise, return the mask with the bits set for the powered off cores
+ */
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values)
+{
+	u32 l2_cores_num = mali_l2_cache_core_get_glob_num_l2_cores();
+	u32 i;
+
+	MALI_DEBUG_ASSERT(l2_cores_num <= 3);
+
+	for (i = 0; i < l2_cores_num; i++) {
+		struct mali_l2_cache_core *l2_cache = mali_l2_cache_core_get_glob_l2_core(i);
+
+		if (l2_cache == NULL) {
+			continue;
+		}
+
+		mali_l2_cache_core_get_counter_values(l2_cache,
+						      &values->cores[i].source0,
+						      &values->cores[i].value0,
+						      &values->cores[i].source1,
+						      &values->cores[i].value1);
+	}
+
+	return 0;
+}
+
+/**
+ * Called by gator to control the production of profiling information at runtime.
+ */
+void _mali_profiling_control(u32 action, u32 value)
+{
+	switch (action) {
+	case FBDUMP_CONTROL_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED, (value == 0 ? MALI_FALSE : MALI_TRUE));
+		break;
+	case FBDUMP_CONTROL_RATE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES, value);
+		break;
+	case SW_COUNTER_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED, value);
+		break;
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR, value);
+		break;
+	default:
+		break;  /* Ignore unimplemented actions */
+	}
+}
+
+/**
+ * Called by gator to get mali api version.
+ */
+u32 _mali_profiling_get_api_version(void)
+{
+	return MALI_PROFILING_API_VERSION;
+}
+
+/**
+* Called by gator to get the data about Mali instance in use:
+* product id, version, number of cores
+*/
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values)
+{
+	values->mali_product_id = (u32)mali_kernel_core_get_product_id();
+	values->mali_version_major = mali_kernel_core_get_gpu_major_version();
+	values->mali_version_minor = mali_kernel_core_get_gpu_minor_version();
+	values->num_of_l2_cores = mali_l2_cache_core_get_glob_num_l2_cores();
+	values->num_of_fp_cores = mali_executor_get_num_cores_total();
+	values->num_of_vp_cores = 1;
+}
+
+
+EXPORT_SYMBOL(_mali_profiling_set_event);
+EXPORT_SYMBOL(_mali_profiling_get_l2_counters);
+EXPORT_SYMBOL(_mali_profiling_control);
+EXPORT_SYMBOL(_mali_profiling_get_api_version);
+EXPORT_SYMBOL(_mali_profiling_get_mali_version);
diff --git a/utgard/r7p0/linux/mali_osk_specific.h b/utgard/r7p0/linux/mali_osk_specific.h
new file mode 100644
index 0000000..372a8ea
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_specific.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_specific.h
+ * Defines per-OS Kernel level specifics, such as unusual workarounds for
+ * certain OSs.
+ */
+
+#ifndef __MALI_OSK_SPECIFIC_H__
+#define __MALI_OSK_SPECIFIC_H__
+
+#include <asm/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/hardirq.h>
+
+
+#include "mali_osk_types.h"
+#include "mali_kernel_linux.h"
+
+#define MALI_STATIC_INLINE static inline
+#define MALI_NON_STATIC_INLINE inline
+
+typedef struct dma_pool *mali_dma_pool;
+
+typedef u32 mali_dma_addr;
+
+#if MALI_ENABLE_CPU_CYCLES
+/* Reads out the clock cycle performance counter of the current cpu.
+   It is useful for cost-free (2 cycle) measuring of the time spent
+   in a code path. Sample before and after, the diff number of cycles.
+   When the CPU is idle it will not increase this clock counter.
+   It means that the counter is accurate if only spin-locks are used,
+   but mutexes may lead to too low values since the cpu might "idle"
+   waiting for the mutex to become available.
+   The clock source is configured on the CPU during mali module load,
+   but will not give useful output after a CPU has been power cycled.
+   It is therefore important to configure the system to not turn of
+   the cpu cores when using this functionallity.*/
+static inline unsigned int mali_get_cpu_cyclecount(void)
+{
+	unsigned int value;
+	/* Reading the CCNT Register - CPU clock counter */
+	asm volatile("MRC p15, 0, %0, c9, c13, 0\t\n" : "=r"(value));
+	return value;
+}
+
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64);
+#endif
+
+
+MALI_STATIC_INLINE u32 _mali_osk_copy_from_user(void *to, void *from, u32 n)
+{
+	return (u32)copy_from_user(to, from, (unsigned long)n);
+}
+
+MALI_STATIC_INLINE mali_bool _mali_osk_in_atomic(void)
+{
+	return in_atomic();
+}
+
+#define _mali_osk_put_user(x, ptr) put_user(x, ptr)
+
+#endif /* __MALI_OSK_SPECIFIC_H__ */
diff --git a/utgard/r7p0/linux/mali_osk_time.c b/utgard/r7p0/linux/mali_osk_time.c
new file mode 100644
index 0000000..2ce10fe
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_time.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2010, 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_time.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <asm/delay.h>
+
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb)
+{
+	return time_after_eq(ticka, tickb) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+unsigned long _mali_osk_time_mstoticks(u32 ms)
+{
+	return msecs_to_jiffies(ms);
+}
+
+u32 _mali_osk_time_tickstoms(unsigned long ticks)
+{
+	return jiffies_to_msecs(ticks);
+}
+
+unsigned long _mali_osk_time_tickcount(void)
+{
+	return jiffies;
+}
+
+void _mali_osk_time_ubusydelay(u32 usecs)
+{
+	udelay(usecs);
+}
+
+u64 _mali_osk_time_get_ns(void)
+{
+	struct timespec tsval;
+
+	getnstimeofday(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
+
+u64 _mali_osk_boot_time_get_ns(void)
+{
+	struct timespec tsval;
+
+	get_monotonic_boottime(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
diff --git a/utgard/r7p0/linux/mali_osk_timers.c b/utgard/r7p0/linux/mali_osk_timers.c
new file mode 100644
index 0000000..40ee6a8
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_timers.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_timers.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_timer_t_struct {
+	struct timer_list timer;
+};
+
+typedef void (*timer_timeout_function_t)(unsigned long);
+
+_mali_osk_timer_t *_mali_osk_timer_init(void)
+{
+	_mali_osk_timer_t *t = (_mali_osk_timer_t *)kmalloc(sizeof(_mali_osk_timer_t), GFP_KERNEL);
+
+	if (t != NULL) init_timer(&t->timer);
+	return t;
+}
+
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.expires = jiffies + ticks_to_expire;
+	add_timer(&(tim->timer));
+}
+
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	mod_timer(&(tim->timer), jiffies + ticks_to_expire);
+}
+
+void _mali_osk_timer_del(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer_sync(&(tim->timer));
+}
+
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer(&(tim->timer));
+}
+
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	return 1 == timer_pending(&(tim->timer));
+}
+
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.data = (unsigned long)data;
+	tim->timer.function = (timer_timeout_function_t)callback;
+}
+
+void _mali_osk_timer_term(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	kfree(tim);
+}
diff --git a/utgard/r7p0/linux/mali_osk_wait_queue.c b/utgard/r7p0/linux/mali_osk_wait_queue.c
new file mode 100644
index 0000000..9c89daa
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_wait_queue.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wait_queue.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_wait_queue_t_struct {
+	wait_queue_head_t wait_queue;
+};
+
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void)
+{
+	_mali_osk_wait_queue_t *ret = NULL;
+
+	ret = kmalloc(sizeof(_mali_osk_wait_queue_t), GFP_KERNEL);
+
+	if (ret == NULL) {
+		return ret;
+	}
+
+	init_waitqueue_head(&ret->wait_queue);
+	MALI_DEBUG_ASSERT(!waitqueue_active(&ret->wait_queue));
+
+	return ret;
+}
+
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event(queue->wait_queue, condition(data));
+}
+
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event_timeout(queue->wait_queue, condition(data), _mali_osk_time_mstoticks(timeout));
+}
+
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* if queue is empty, don't attempt to wake up its elements */
+	if (!waitqueue_active(&queue->wait_queue)) return;
+
+	MALI_DEBUG_PRINT(6, ("Waking up elements in wait queue %p ....\n", queue));
+
+	wake_up_all(&queue->wait_queue);
+
+	MALI_DEBUG_PRINT(6, ("... elements in wait queue %p woken up\n", queue));
+}
+
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue)
+{
+	/* Parameter validation  */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* Linux requires no explicit termination of wait queues */
+	kfree(queue);
+}
diff --git a/utgard/r7p0/linux/mali_osk_wq.c b/utgard/r7p0/linux/mali_osk_wq.c
new file mode 100644
index 0000000..3ad92c6
--- /dev/null
+++ b/utgard/r7p0/linux/mali_osk_wq.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright (C) 2010-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/slab.h> /* For memory allocation */
+#include <linux/workqueue.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_license.h"
+#include "mali_kernel_linux.h"
+
+typedef struct _mali_osk_wq_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	mali_bool high_pri;
+	struct work_struct work_handle;
+} mali_osk_wq_work_object_t;
+
+typedef struct _mali_osk_wq_delayed_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	struct delayed_work work;
+} mali_osk_wq_delayed_work_object_t;
+
+#if MALI_LICENSE_IS_GPL
+static struct workqueue_struct *mali_wq_normal;
+static struct workqueue_struct *mali_wq_high;
+#endif
+
+static void _mali_osk_wq_work_func(struct work_struct *work);
+
+_mali_osk_errcode_t _mali_osk_wq_init(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(mali_wq_normal == NULL);
+	MALI_DEBUG_ASSERT(mali_wq_high == NULL);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_wq_normal = alloc_workqueue("mali", WQ_UNBOUND, 0);
+	mali_wq_high = alloc_workqueue("mali_high_pri", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_wq_normal = create_workqueue("mali");
+	mali_wq_high = create_workqueue("mali_high_pri");
+#endif
+	if (NULL == mali_wq_normal || NULL == mali_wq_high) {
+		MALI_PRINT_ERROR(("Unable to create Mali workqueues\n"));
+
+		if (mali_wq_normal) destroy_workqueue(mali_wq_normal);
+		if (mali_wq_high)   destroy_workqueue(mali_wq_high);
+
+		mali_wq_normal = NULL;
+		mali_wq_high   = NULL;
+
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* MALI_LICENSE_IS_GPL */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_wq_flush(void)
+{
+#if MALI_LICENSE_IS_GPL
+	flush_workqueue(mali_wq_high);
+	flush_workqueue(mali_wq_normal);
+#else
+	flush_scheduled_work();
+#endif
+}
+
+void _mali_osk_wq_term(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(mali_wq_normal != NULL);
+	MALI_DEBUG_ASSERT(mali_wq_high != NULL);
+
+	flush_workqueue(mali_wq_normal);
+	destroy_workqueue(mali_wq_normal);
+
+	flush_workqueue(mali_wq_high);
+	destroy_workqueue(mali_wq_high);
+
+	mali_wq_normal = NULL;
+	mali_wq_high   = NULL;
+#else
+	flush_scheduled_work();
+#endif
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (work == NULL) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_FALSE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (work == NULL) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_TRUE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+
+	_mali_osk_wq_flush();
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+
+	kfree(work_object);
+}
+
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_normal, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_high, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+static void _mali_osk_wq_work_func(struct work_struct *work)
+{
+	mali_osk_wq_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_work_object_t, work_handle);
+
+#if MALI_LICENSE_IS_GPL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+	/* We want highest Dynamic priority of the thread so that the Jobs depending
+	** on this thread could be scheduled in time. Without this, this thread might
+	** sometimes need to wait for some threads in user mode to finish its round-robin
+	** time, causing *bubble* in the Mali pipeline. Thanks to the new implementation
+	** of high-priority workqueue in new kernel, this only happens in older kernel.
+	*/
+	if (work_object->high_pri == MALI_TRUE) {
+		set_user_nice(current, -19);
+	}
+#endif
+#endif /* MALI_LICENSE_IS_GPL */
+
+	work_object->handler(work_object->data);
+}
+
+static void _mali_osk_wq_delayed_work_func(struct work_struct *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_delayed_work_object_t, work.work);
+	work_object->handler(work_object->data);
+}
+
+mali_osk_wq_delayed_work_object_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_delayed_work_object_t *work = kmalloc(sizeof(mali_osk_wq_delayed_work_object_t), GFP_KERNEL);
+
+	if (work == NULL) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+
+	INIT_DELAYED_WORK(&work->work, _mali_osk_wq_delayed_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+
+	cancel_delayed_work(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+
+	cancel_delayed_work_sync(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+
+#if MALI_LICENSE_IS_GPL
+	queue_delayed_work(mali_wq_normal, &work_object->work, delay);
+#else
+	schedule_delayed_work(&work_object->work, delay);
+#endif
+
+}
diff --git a/utgard/r7p0/linux/mali_pmu_power_up_down.c b/utgard/r7p0/linux/mali_pmu_power_up_down.c
new file mode 100644
index 0000000..527f4de
--- /dev/null
+++ b/utgard/r7p0/linux/mali_pmu_power_up_down.c
@@ -0,0 +1,27 @@
+/**
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu_power_up_down.c
+ */
+
+#include <linux/module.h>
+#include "mali_executor.h"
+
+int mali_perf_set_num_pp_cores(unsigned int num_cores)
+{
+#ifndef CONFIG_MALI_DVFS
+	return mali_executor_set_perf_level(num_cores, MALI_TRUE);
+#else
+	return mali_executor_set_perf_level(num_cores, MALI_FALSE);
+#endif
+}
+
+EXPORT_SYMBOL(mali_perf_set_num_pp_cores);
diff --git a/utgard/r7p0/linux/mali_profiling_events.h b/utgard/r7p0/linux/mali_profiling_events.h
new file mode 100644
index 0000000..262d53f
--- /dev/null
+++ b/utgard/r7p0/linux/mali_profiling_events.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_EVENTS_H__
+#define __MALI_PROFILING_EVENTS_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_events.h>
+
+#endif /* __MALI_PROFILING_EVENTS_H__ */
diff --git a/utgard/r7p0/linux/mali_profiling_gator_api.h b/utgard/r7p0/linux/mali_profiling_gator_api.h
new file mode 100644
index 0000000..8ce4e25
--- /dev/null
+++ b/utgard/r7p0/linux/mali_profiling_gator_api.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012-2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_GATOR_API_H__
+#define __MALI_PROFILING_GATOR_API_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_gator_api.h>
+
+#endif /* __MALI_PROFILING_GATOR_API_H__ */
diff --git a/utgard/r7p0/linux/mali_profiling_internal.c b/utgard/r7p0/linux/mali_profiling_internal.c
new file mode 100644
index 0000000..a63779c
--- /dev/null
+++ b/utgard/r7p0/linux/mali_profiling_internal.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_timestamp.h"
+#include "mali_osk_profiling.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+
+typedef struct mali_profiling_entry {
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+} mali_profiling_entry;
+
+typedef enum mali_profiling_state {
+	MALI_PROFILING_STATE_UNINITIALIZED,
+	MALI_PROFILING_STATE_IDLE,
+	MALI_PROFILING_STATE_RUNNING,
+	MALI_PROFILING_STATE_RETURN,
+} mali_profiling_state;
+
+static _mali_osk_mutex_t *lock;
+static mali_profiling_state prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+static mali_profiling_entry *profile_entries;
+static _mali_osk_atomic_t profile_insert_index;
+static u32 profile_mask;
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+void probe_mali_timeline_event(void *data, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned
+			       int d2, unsigned int d3, unsigned int d4))
+{
+	add_event(event_id, d0, d1, d2, d3, d4);
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_init(mali_bool auto_start)
+{
+	profile_entries = NULL;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_PROFILING);
+	if (lock == NULL) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+
+	if (auto_start == MALI_TRUE) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* Use maximum buffer size */
+
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+		if (_mali_internal_profiling_start(&limit) != _MALI_OSK_ERR_OK) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_internal_profiling_term(void)
+{
+	u32 count;
+
+	/* Ensure profiling is stopped */
+	_mali_internal_profiling_stop(&count);
+
+	prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+
+	if (profile_entries != NULL) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	if (lock != NULL) {
+		_mali_osk_mutex_term(lock);
+		lock = NULL;
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_start(u32 *limit)
+{
+	_mali_osk_errcode_t ret;
+	mali_profiling_entry *new_profile_entries;
+
+	_mali_osk_mutex_wait(lock);
+
+	if (prof_state == MALI_PROFILING_STATE_RUNNING) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	new_profile_entries = _mali_osk_valloc(*limit * sizeof(mali_profiling_entry));
+
+	if (new_profile_entries == NULL) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (*limit > MALI_PROFILING_MAX_BUFFER_ENTRIES) {
+		*limit = MALI_PROFILING_MAX_BUFFER_ENTRIES;
+	}
+
+	profile_mask = 1;
+	while (profile_mask <= *limit) {
+		profile_mask <<= 1;
+	}
+	profile_mask >>= 1;
+
+	*limit = profile_mask;
+
+	profile_mask--; /* turns the power of two into a mask of one less */
+
+	if (prof_state != MALI_PROFILING_STATE_IDLE) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	profile_entries = new_profile_entries;
+
+	ret = _mali_timestamp_reset();
+
+	if (ret == _MALI_OSK_ERR_OK) {
+		prof_state = MALI_PROFILING_STATE_RUNNING;
+	} else {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	register_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+	return ret;
+}
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	u32 cur_index = (_mali_osk_atomic_inc_return(&profile_insert_index) - 1) & profile_mask;
+
+	profile_entries[cur_index].timestamp = _mali_timestamp_get();
+	profile_entries[cur_index].event_id = event_id;
+	profile_entries[cur_index].data[0] = data0;
+	profile_entries[cur_index].data[1] = data1;
+	profile_entries[cur_index].data[2] = data2;
+	profile_entries[cur_index].data[3] = data3;
+	profile_entries[cur_index].data[4] = data4;
+
+	/* If event is "leave API function", add current memory usage to the event
+	 * as data point 4.  This is used in timeline profiling to indicate how
+	 * much memory was used when leaving a function. */
+	if (event_id == (MALI_PROFILING_EVENT_TYPE_SINGLE | MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC)) {
+		profile_entries[cur_index].data[4] = _mali_ukk_report_memory_usage();
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_stop(u32 *count)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (prof_state != MALI_PROFILING_STATE_RUNNING) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	/* go into return state (user to retreive events), no more events will be added after this */
+	prof_state = MALI_PROFILING_STATE_RETURN;
+
+	unregister_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+
+	tracepoint_synchronize_unregister();
+
+	*count = _mali_osk_atomic_read(&profile_insert_index);
+	if (*count > profile_mask) *count = profile_mask;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 _mali_internal_profiling_get_count(void)
+{
+	u32 retval = 0;
+
+	_mali_osk_mutex_wait(lock);
+	if (prof_state == MALI_PROFILING_STATE_RETURN) {
+		retval = _mali_osk_atomic_read(&profile_insert_index);
+		if (retval > profile_mask) retval = profile_mask;
+	}
+	_mali_osk_mutex_signal(lock);
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5])
+{
+	u32 raw_index = _mali_osk_atomic_read(&profile_insert_index);
+
+	_mali_osk_mutex_wait(lock);
+
+	if (index < profile_mask) {
+		if ((raw_index & ~profile_mask) != 0) {
+			index += raw_index;
+			index &= profile_mask;
+		}
+
+		if (prof_state != MALI_PROFILING_STATE_RETURN) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+		}
+
+		if (index >= raw_index) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		*timestamp = profile_entries[index].timestamp;
+		*event_id = profile_entries[index].event_id;
+		data[0] = profile_entries[index].data[0];
+		data[1] = profile_entries[index].data[1];
+		data[2] = profile_entries[index].data[2];
+		data[3] = profile_entries[index].data[3];
+		data[4] = profile_entries[index].data[4];
+	} else {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_clear(void)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (prof_state != MALI_PROFILING_STATE_RETURN) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	if (profile_entries != NULL) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool _mali_internal_profiling_is_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RUNNING ? MALI_TRUE : MALI_FALSE;
+}
+
+mali_bool _mali_internal_profiling_have_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RETURN ? MALI_TRUE : MALI_FALSE;
+}
diff --git a/utgard/r7p0/linux/mali_profiling_internal.h b/utgard/r7p0/linux/mali_profiling_internal.h
new file mode 100644
index 0000000..d240969
--- /dev/null
+++ b/utgard/r7p0/linux/mali_profiling_internal.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_INTERNAL_H__
+#define __MALI_PROFILING_INTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_osk.h"
+
+int _mali_internal_profiling_init(mali_bool auto_start);
+void _mali_internal_profiling_term(void);
+
+mali_bool _mali_internal_profiling_is_recording(void);
+mali_bool _mali_internal_profiling_have_recording(void);
+_mali_osk_errcode_t _mali_internal_profiling_clear(void);
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+u32 _mali_internal_profiling_get_count(void);
+int _mali_internal_profiling_stop(u32 *count);
+int _mali_internal_profiling_start(u32 *limit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_PROFILING_INTERNAL_H__ */
diff --git a/utgard/r7p0/linux/mali_sync.c b/utgard/r7p0/linux/mali_sync.c
new file mode 100644
index 0000000..69ba31f
--- /dev/null
+++ b/utgard/r7p0/linux/mali_sync.c
@@ -0,0 +1,457 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_sync.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_timeline.h"
+#include "mali_executor.h"
+
+#include <linux/file.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <asm-generic/fcntl.h>
+
+struct mali_sync_pt {
+	struct sync_pt         sync_pt;
+	struct mali_sync_flag *flag;
+	struct sync_timeline *sync_tl;  /**< Sync timeline this pt is connected to. */
+};
+
+/**
+ * The sync flag is used to connect sync fences to the Mali Timeline system.  Sync fences can be
+ * created from a sync flag, and when the flag is signaled, the sync fences will also be signaled.
+ */
+struct mali_sync_flag {
+	struct sync_timeline *sync_tl;  /**< Sync timeline this flag is connected to. */
+	u32                   point;    /**< Point on timeline. */
+	int                   status;   /**< 0 if unsignaled, 1 if signaled without error or negative if signaled with error. */
+	struct kref           refcount; /**< Reference count. */
+};
+
+/**
+ * Mali sync timeline is used to connect mali timeline to sync_timeline.
+ * When fence timeout can print more detailed mali timeline system info.
+ */
+struct mali_sync_timeline_container {
+	struct sync_timeline sync_timeline;
+	struct mali_timeline *timeline;
+};
+
+MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, sync_pt);
+}
+
+MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct sync_timeline *sync_tl)
+{
+	return container_of(sync_tl, struct mali_sync_timeline_container, sync_timeline);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt, *new_mpt;
+	struct sync_pt *new_pt;
+
+	if (pt == NULL) {
+		dump_stack();
+		return NULL;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	new_pt = sync_pt_create(mpt->sync_tl, sizeof(struct mali_sync_pt));
+	if (new_pt == NULL) return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+
+	mali_sync_flag_get(mpt->flag);
+	new_mpt->flag = mpt->flag;
+	new_mpt->sync_tl = mpt->sync_tl;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	MALI_DEBUG_ASSERT_POINTER(mpt->flag);
+
+	return mpt->flag->status;
+}
+
+static int timeline_compare(struct sync_pt *pta, struct sync_pt *ptb)
+{
+	struct mali_sync_pt *mpta;
+	struct mali_sync_pt *mptb;
+	u32 a, b;
+
+	MALI_DEBUG_ASSERT_POINTER(pta);
+	MALI_DEBUG_ASSERT_POINTER(ptb);
+	mpta = to_mali_sync_pt(pta);
+	mptb = to_mali_sync_pt(ptb);
+
+	MALI_DEBUG_ASSERT_POINTER(mpta->flag);
+	MALI_DEBUG_ASSERT_POINTER(mptb->flag);
+
+	a = mpta->flag->point;
+	b = mptb->flag->point;
+
+	if (a == b) return 0;
+
+	return ((b - a) < (a - b) ? -1 : 1);
+}
+
+static void timeline_free_pt(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	mali_sync_flag_put(mpt->flag);
+}
+
+static void timeline_release(struct sync_timeline *sync_timeline)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_timeline);
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_timeline);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	/* always signaled timeline didn't have mali container */
+	if (mali_tl) {
+		if (mali_tl->spinlock != NULL) {
+			mali_spinlock_reentrant_term(mali_tl->spinlock);
+		}
+		_mali_osk_free(mali_tl);
+	}
+
+	module_put(THIS_MODULE);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+static void timeline_print_pt(struct seq_file *s, struct sync_pt *sync_pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(s);
+	MALI_DEBUG_ASSERT_POINTER(sync_pt);
+
+	mpt = to_mali_sync_pt(sync_pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		seq_printf(s, "%u", mpt->flag->point);
+	} else {
+		seq_puts(s, "uninitialized");
+	}
+}
+
+static void timeline_print_obj(struct seq_file *s, struct sync_timeline *sync_tl)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	if (mali_tl != NULL) {
+		seq_printf(s, "oldest (%u) ", mali_tl->point_oldest);
+		seq_printf(s, "next (%u)", mali_tl->point_next);
+		seq_puts(s, "\n");
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+		{
+			u32 tid = _mali_osk_get_tid();
+			struct mali_timeline_system *system = mali_tl->system;
+
+			mali_spinlock_reentrant_wait(mali_tl->spinlock, tid);
+			if (!mali_tl->destroyed) {
+				mali_spinlock_reentrant_wait(system->spinlock, tid);
+				mali_timeline_debug_print_timeline(mali_tl, s);
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+			}
+			mali_spinlock_reentrant_signal(mali_tl->spinlock, tid);
+
+			/* dump job queue status and group running status */
+			mali_executor_status_dump();
+		}
+#endif
+	}
+}
+#else
+static void timeline_pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(str);
+	MALI_DEBUG_ASSERT_POINTER(pt);
+
+	mpt = to_mali_sync_pt(pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		_mali_osk_snprintf(str, size, "%u", mpt->flag->point);
+	} else {
+		_mali_osk_snprintf(str, size, "uninitialized");
+	}
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str, int size)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	mali_sync_tl = to_mali_sync_tl_container(timeline);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	if (mali_tl != NULL) {
+		_mali_osk_snprintf(str, size, "oldest (%u) ", mali_tl->point_oldest);
+		_mali_osk_snprintf(str, size, "next (%u)", mali_tl->point_next);
+		_mali_osk_snprintf(str, size, "\n");
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+		{
+			u32 tid = _mali_osk_get_tid();
+			struct mali_timeline_system *system = mali_tl->system;
+
+			mali_spinlock_reentrant_wait(mali_tl->spinlock, tid);
+			if (!mali_tl->destroyed) {
+				mali_spinlock_reentrant_wait(system->spinlock, tid);
+				mali_timeline_debug_direct_print_timeline(mali_tl);
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+			}
+			mali_spinlock_reentrant_signal(mali_tl->spinlock, tid);
+
+			/* dump job queue status and group running status */
+			mali_executor_status_dump();
+		}
+#endif
+	}
+}
+#endif
+
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name    = "Mali",
+	.dup            = timeline_dup,
+	.has_signaled   = timeline_has_signaled,
+	.compare        = timeline_compare,
+	.free_pt        = timeline_free_pt,
+	.release_obj    = timeline_release,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	.print_pt       = timeline_print_pt,
+	.print_obj      = timeline_print_obj,
+#else
+	.pt_value_str = timeline_pt_value_str,
+	.timeline_value_str = timeline_value_str,
+#endif
+};
+
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name)
+{
+	struct sync_timeline *sync_tl;
+	struct mali_sync_timeline_container *mali_sync_tl;
+
+	sync_tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name);
+	if (sync_tl == NULL) return NULL;
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	mali_sync_tl->timeline = timeline;
+
+	/* Grab a reference on the module to ensure the callbacks are present
+	 * as long some timeline exists. The reference is released when the
+	 * timeline is freed.
+	 * Since this function is called from a ioctl on an open file we know
+	 * we already have a reference, so using __module_get is safe. */
+	__module_get(THIS_MODULE);
+
+	return sync_tl;
+}
+
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence)
+{
+	s32 fd = -1;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
+	fd = get_unused_fd();
+#else
+	fd = get_unused_fd_flags(O_CLOEXEC);
+#endif
+
+	if (fd < 0) {
+		sync_fence_put(sync_fence);
+		return -1;
+	}
+	sync_fence_install(sync_fence, fd);
+
+	return fd;
+}
+
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2)
+{
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+
+	sync_fence = sync_fence_merge("mali_merge_fence", sync_fence1, sync_fence2);
+	sync_fence_put(sync_fence1);
+	sync_fence_put(sync_fence2);
+
+	return sync_fence;
+}
+
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl)
+{
+	struct mali_sync_flag *flag;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	flag = mali_sync_flag_create(sync_tl, 0);
+	if (flag == NULL) return NULL;
+
+	sync_fence = mali_sync_flag_create_fence(flag);
+
+	mali_sync_flag_signal(flag, 0);
+	mali_sync_flag_put(flag);
+
+	return sync_fence;
+}
+
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, mali_timeline_point point)
+{
+	struct mali_sync_flag *flag;
+
+	if (sync_tl == NULL) return NULL;
+
+	flag = _mali_osk_calloc(1, sizeof(*flag));
+	if (flag == NULL) return NULL;
+
+	flag->sync_tl = sync_tl;
+	flag->point = point;
+
+	flag->status = 0;
+	kref_init(&flag->refcount);
+
+	return flag;
+}
+
+void mali_sync_flag_get(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_get(&flag->refcount);
+}
+
+/**
+ * Free sync flag.
+ *
+ * @param ref kref object embedded in sync flag that should be freed.
+ */
+static void mali_sync_flag_free(struct kref *ref)
+{
+	struct mali_sync_flag *flag;
+
+	MALI_DEBUG_ASSERT_POINTER(ref);
+	flag = container_of(ref, struct mali_sync_flag, refcount);
+
+	_mali_osk_free(flag);
+}
+
+void mali_sync_flag_put(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_put(&flag->refcount, mali_sync_flag_free);
+}
+
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+
+	MALI_DEBUG_ASSERT(flag->status == 0);
+	flag->status = (error < 0) ? error : 1;
+
+	_mali_osk_write_mem_barrier();
+
+	sync_timeline_signal(flag->sync_tl);
+}
+
+/**
+ * Create a sync point attached to given sync flag.
+ *
+ * @note Sync points must be triggered in *exactly* the same order as they are created.
+ *
+ * @param flag Sync flag.
+ * @return New sync point if successful, NULL if not.
+ */
+static struct sync_pt *mali_sync_flag_create_pt(struct mali_sync_flag *flag)
+{
+	struct sync_pt *pt;
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	pt = sync_pt_create(flag->sync_tl, sizeof(struct mali_sync_pt));
+	if (pt == NULL) return NULL;
+
+	mali_sync_flag_get(flag);
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->flag = flag;
+	mpt->sync_tl = flag->sync_tl;
+
+	return pt;
+}
+
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag)
+{
+	struct sync_pt    *sync_pt;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	sync_pt = mali_sync_flag_create_pt(flag);
+	if (sync_pt == NULL) return NULL;
+
+	sync_fence = sync_fence_create("mali_flag_fence", sync_pt);
+	if (sync_fence == NULL) {
+		sync_pt_free(sync_pt);
+		return NULL;
+	}
+
+	return sync_fence;
+}
diff --git a/utgard/r7p0/linux/mali_sync.h b/utgard/r7p0/linux/mali_sync.h
new file mode 100644
index 0000000..b683c85
--- /dev/null
+++ b/utgard/r7p0/linux/mali_sync.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_sync.h
+ *
+ * Mali interface for Linux sync objects.
+ */
+
+#ifndef _MALI_SYNC_H_
+#define _MALI_SYNC_H_
+
+#if defined(CONFIG_SYNC)
+
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+#include <linux/sync.h>
+#else
+#include <sync.h>
+#endif
+
+
+#include "mali_osk.h"
+
+struct mali_sync_flag;
+struct mali_timeline;
+
+/**
+ * Create a sync timeline.
+ *
+ * @param name Name of the sync timeline.
+ * @return The new sync timeline if successful, NULL if not.
+ */
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name);
+
+/**
+ * Creates a file descriptor representing the sync fence.  Will release sync fence if allocation of
+ * file descriptor fails.
+ *
+ * @param sync_fence Sync fence.
+ * @return File descriptor representing sync fence if successful, or -1 if not.
+ */
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence);
+
+/**
+ * Merges two sync fences.  Both input sync fences will be released.
+ *
+ * @param sync_fence1 First sync fence.
+ * @param sync_fence2 Second sync fence.
+ * @return New sync fence that is the result of the merger if successful, or NULL if not.
+ */
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2);
+
+/**
+ * Create a sync fence that is already signaled.
+ *
+ * @param tl Sync timeline.
+ * @return New signaled sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl);
+
+/**
+ * Create a sync flag.
+ *
+ * @param sync_tl Sync timeline.
+ * @param point Point on Mali timeline.
+ * @return New sync flag if successful, NULL if not.
+ */
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, u32 point);
+
+/**
+ * Grab sync flag reference.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_get(struct mali_sync_flag *flag);
+
+/**
+ * Release sync flag reference.  If this was the last reference, the sync flag will be freed.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_put(struct mali_sync_flag *flag);
+
+/**
+ * Signal sync flag.  All sync fences created from this flag will be signaled.
+ *
+ * @param flag Sync flag to signal.
+ * @param error Negative error code, or 0 if no error.
+ */
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error);
+
+/**
+ * Create a sync fence attached to given sync flag.
+ *
+ * @param flag Sync flag.
+ * @return New sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag);
+
+#endif /* defined(CONFIG_SYNC) */
+
+#endif /* _MALI_SYNC_H_ */
diff --git a/utgard/r7p0/linux/mali_uk_types.h b/utgard/r7p0/linux/mali_uk_types.h
new file mode 100644
index 0000000..76c5203
--- /dev/null
+++ b/utgard/r7p0/linux/mali_uk_types.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_UK_TYPES_H__
+#define __MALI_UK_TYPES_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_uk_types.h>
+
+#endif /* __MALI_UK_TYPES_H__ */
diff --git a/utgard/r7p0/linux/mali_ukk_core.c b/utgard/r7p0/linux/mali_ukk_core.c
new file mode 100644
index 0000000..1cc0180
--- /dev/null
+++ b/utgard/r7p0/linux/mali_ukk_core.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/slab.h>     /* memort allocation functions */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs)
+{
+	_mali_uk_get_api_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (get_user(kargs.version, &uargs->version) != 0) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version(&kargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	if (put_user(kargs.version, &uargs->version) != 0) return -EFAULT;
+	if (put_user(kargs.compatible, &uargs->compatible) != 0) return -EFAULT;
+
+	return 0;
+}
+
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs)
+{
+	_mali_uk_get_api_version_v2_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (get_user(kargs.version, &uargs->version) != 0) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version_v2(&kargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	if (put_user(kargs.version, &uargs->version) != 0) return -EFAULT;
+	if (put_user(kargs.compatible, &uargs->compatible) != 0) return -EFAULT;
+
+	return 0;
+}
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs)
+{
+	_mali_uk_wait_for_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_wait_for_notification(&kargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	if (kargs.type != _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS) {
+		kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+		if (copy_to_user(uargs, &kargs, sizeof(_mali_uk_wait_for_notification_s)) != 0) return -EFAULT;
+	} else {
+		if (put_user(kargs.type, &uargs->type) != 0) return -EFAULT;
+	}
+
+	return 0;
+}
+
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs)
+{
+	_mali_uk_post_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	if (get_user(kargs.type, &uargs->type) != 0) {
+		return -EFAULT;
+	}
+
+	err = _mali_ukk_post_notification(&kargs);
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs)
+{
+	_mali_uk_get_user_settings_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_user_settings(&kargs);
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = 0; /* prevent kernel address to be returned to user space */
+	if (copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_user_settings_s)) != 0) return -EFAULT;
+
+	return 0;
+}
+
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs)
+{
+	_mali_uk_request_high_priority_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_request_high_priority(&kargs);
+
+	kargs.ctx = 0;
+
+	return map_errcode(err);
+}
+
+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs)
+{
+	_mali_uk_pending_submit_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_pending_submit(&kargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	return 0;
+}
diff --git a/utgard/r7p0/linux/mali_ukk_gp.c b/utgard/r7p0/linux/mali_ukk_gp.c
new file mode 100644
index 0000000..ef7fb98
--- /dev/null
+++ b/utgard/r7p0/linux/mali_ukk_gp.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_gp_start_job(session_data, uargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	return 0;
+}
+
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs)
+{
+	_mali_uk_get_gp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err =  _mali_ukk_get_gp_core_version(&kargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (put_user(kargs.version, &uargs->version) != 0) return -EFAULT;
+
+	return 0;
+}
+
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs)
+{
+	_mali_uk_gp_suspend_response_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_gp_suspend_response_s)) != 0) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_gp_suspend_response(&kargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	if (put_user(kargs.cookie, &uargs->cookie) != 0) return -EFAULT;
+
+	/* no known transactions to roll-back */
+	return 0;
+}
+
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_gp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_gp_number_of_cores(&kargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (put_user(kargs.number_of_cores, &uargs->number_of_cores) != 0) return -EFAULT;
+
+	return 0;
+}
diff --git a/utgard/r7p0/linux/mali_ukk_mem.c b/utgard/r7p0/linux/mali_ukk_mem.c
new file mode 100644
index 0000000..5e8559d
--- /dev/null
+++ b/utgard/r7p0/linux/mali_ukk_mem.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs)
+{
+	_mali_uk_alloc_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_alloc_mem_s)) != 0) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_allocate(&kargs);
+
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	if (put_user(kargs.backend_handle, &uargs->backend_handle) != 0) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs)
+{
+	_mali_uk_free_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_free_mem_s)) != 0) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_free(&kargs);
+
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	if (put_user(kargs.free_pages_nr, &uargs->free_pages_nr) != 0) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs)
+{
+	_mali_uk_bind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_bind_mem_s)) != 0) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_bind(&kargs);
+
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs)
+{
+	_mali_uk_unbind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_unbind_mem_s)) != 0) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_unbind(&kargs);
+
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+
+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs)
+{
+	_mali_uk_cow_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_mem_s)) != 0) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_cow(&kargs);
+
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	if (put_user(kargs.backend_handle, &uargs->backend_handle) != 0) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs)
+{
+	_mali_uk_cow_modify_range_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_modify_range_s)) != 0) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_cow_modify_range(&kargs);
+
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	if (put_user(kargs.change_pages_nr, &uargs->change_pages_nr) != 0) {
+		return -EFAULT;
+	}
+	return 0;
+}
+
+
+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs)
+{
+	_mali_uk_mem_resize_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_resize_s)) != 0) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_resize(&kargs);
+
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs)
+{
+	_mali_uk_mem_write_safe_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_write_safe_s)) != 0) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	/* Check if we can access the buffers */
+	if (!access_ok(VERIFY_WRITE, kargs.dest, kargs.size)
+	    || !access_ok(VERIFY_READ, kargs.src, kargs.size)) {
+		return -EINVAL;
+	}
+
+	/* Check if size wraps */
+	if ((kargs.size + kargs.dest) <= kargs.dest
+	    || (kargs.size + kargs.src) <= kargs.src) {
+		return -EINVAL;
+	}
+
+	err = _mali_ukk_mem_write_safe(&kargs);
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	if (put_user(kargs.size, &uargs->size) != 0) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+
+
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs)
+{
+	_mali_uk_query_mmu_page_table_dump_size_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_query_mmu_page_table_dump_size(&kargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	if (put_user(kargs.size, &uargs->size) != 0) return -EFAULT;
+
+	return 0;
+}
+
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs)
+{
+	_mali_uk_dump_mmu_page_table_s kargs;
+	_mali_osk_errcode_t err;
+	void __user *user_buffer;
+	void *buffer = NULL;
+	int rc = -EFAULT;
+
+	/* validate input */
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	/* the session_data pointer was validated by caller */
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_dump_mmu_page_table_s)) != 0)
+		goto err_exit;
+
+	user_buffer = (void __user *)(uintptr_t)kargs.buffer;
+	if (!access_ok(VERIFY_WRITE, user_buffer, kargs.size))
+		goto err_exit;
+
+	/* allocate temporary buffer (kernel side) to store mmu page table info */
+	if (kargs.size <= 0)
+		return -EINVAL;
+	/* Allow at most 8MiB buffers, this is more than enough to dump a fully
+	 * populated page table. */
+	if (kargs.size > SZ_8M)
+		return -EINVAL;
+
+	buffer = (void *)(uintptr_t)_mali_osk_valloc(kargs.size);
+	if (buffer == NULL) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.buffer = (uintptr_t)buffer;
+	err = _mali_ukk_dump_mmu_page_table(&kargs);
+	if (err != _MALI_OSK_ERR_OK) {
+		rc = map_errcode(err);
+		goto err_exit;
+	}
+
+	/* copy mmu page table info back to user space and update pointers */
+	if (copy_to_user(user_buffer, buffer, kargs.size) != 0)
+		goto err_exit;
+
+	kargs.register_writes = kargs.register_writes -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+	kargs.page_table_dump = kargs.page_table_dump -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+
+	if (copy_to_user(uargs, &kargs, sizeof(kargs)) != 0)
+		goto err_exit;
+
+	rc = 0;
+
+err_exit:
+	if (buffer) _mali_osk_vfree(buffer);
+	return rc;
+}
+
+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+	_mali_uk_profiling_memory_usage_get_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_memory_usage_get_s)) != 0) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_mem_usage_get(&kargs);
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_memory_usage_get_s)) != 0) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
diff --git a/utgard/r7p0/linux/mali_ukk_pp.c b/utgard/r7p0/linux/mali_ukk_pp.c
new file mode 100644
index 0000000..2f9b03a
--- /dev/null
+++ b/utgard/r7p0/linux/mali_ukk_pp.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2010, 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_start_job(session_data, uargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the jobs were started successfully, 0 is returned.  If there was an error, but the
+	 * jobs were started, we return -ENOENT.  For anything else returned, the jobs were not
+	 * started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_and_gp_start_job(session_data, uargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_pp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_get_pp_number_of_cores(&kargs);
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_pp_number_of_cores_s)) != 0) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs)
+{
+	_mali_uk_get_pp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_pp_core_version(&kargs);
+	if (err != _MALI_OSK_ERR_OK) return map_errcode(err);
+
+	if (put_user(kargs.version, &uargs->version) != 0) return -EFAULT;
+
+	return 0;
+}
+
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs)
+{
+	_mali_uk_pp_disable_wb_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_pp_disable_wb_s)) != 0) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	_mali_ukk_pp_job_disable_wb(&kargs);
+
+	return 0;
+}
diff --git a/utgard/r7p0/linux/mali_ukk_profiling.c b/utgard/r7p0/linux/mali_ukk_profiling.c
new file mode 100644
index 0000000..fa1b400
--- /dev/null
+++ b/utgard/r7p0/linux/mali_ukk_profiling.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+#include <linux/slab.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs)
+{
+	_mali_uk_profiling_add_event_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_add_event_s)) != 0) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_add_event(&kargs);
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs)
+{
+	_mali_uk_sw_counters_report_s kargs;
+	_mali_osk_errcode_t err;
+	u32 *counter_buffer;
+	u32 __user *counters;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_sw_counters_report_s)) != 0) {
+		return -EFAULT;
+	}
+
+	/* make sure that kargs.num_counters is [at least somewhat] sane */
+	if (kargs.num_counters > 10000) {
+		MALI_DEBUG_PRINT(1, ("User space attempted to allocate too many counters.\n"));
+		return -EINVAL;
+	}
+
+	counter_buffer = kmalloc_array(kargs.num_counters, sizeof(u32), GFP_KERNEL);
+	if (counter_buffer == NULL) {
+		return -ENOMEM;
+	}
+
+	counters = (u32 *)(uintptr_t)kargs.counters;
+
+	if (copy_from_user(counter_buffer, counters, sizeof(u32) * kargs.num_counters) != 0) {
+		kfree(counter_buffer);
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.counters = (uintptr_t)counter_buffer;
+
+	err = _mali_ukk_sw_counters_report(&kargs);
+
+	kfree(counter_buffer);
+
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs)
+{
+	_mali_uk_profiling_stream_fd_get_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_stream_fd_get_s)) != 0) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_stream_fd_get(&kargs);
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	if (copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_stream_fd_get_s)) != 0) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs)
+{
+	_mali_uk_profiling_control_set_s kargs;
+	_mali_osk_errcode_t err;
+	u8 *kernel_control_data = NULL;
+	u8 *kernel_response_data = NULL;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (get_user(kargs.control_packet_size, &uargs->control_packet_size) != 0) return -EFAULT;
+	if (get_user(kargs.response_packet_size, &uargs->response_packet_size) != 0) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+
+
+	/* Sanity check about the size */
+	if (kargs.control_packet_size > PAGE_SIZE || kargs.response_packet_size > PAGE_SIZE)
+		return -EINVAL;
+
+	if (kargs.control_packet_size != 0) {
+
+		if (kargs.response_packet_size == 0)
+			return -EINVAL;
+
+		kernel_control_data = _mali_osk_calloc(1, kargs.control_packet_size);
+		if (kernel_control_data == NULL) {
+			return -ENOMEM;
+		}
+
+		kernel_response_data = _mali_osk_calloc(1, kargs.response_packet_size);
+		if (kernel_response_data == NULL) {
+			_mali_osk_free(kernel_control_data);
+			return -ENOMEM;
+		}
+
+		kargs.control_packet_data = (uintptr_t)kernel_control_data;
+		kargs.response_packet_data = (uintptr_t)kernel_response_data;
+
+		if (copy_from_user((void *)(uintptr_t)kernel_control_data, (void *)(uintptr_t)uargs->control_packet_data, kargs.control_packet_size) != 0) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		err = _mali_ukk_profiling_control_set(&kargs);
+		if (err != _MALI_OSK_ERR_OK) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return map_errcode(err);
+		}
+
+		if (0 != kargs.response_packet_size && 0 != copy_to_user(((void *)(uintptr_t)uargs->response_packet_data), ((void *)(uintptr_t)kargs.response_packet_data), kargs.response_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		if (put_user(kargs.response_packet_size, &uargs->response_packet_size) != 0) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		_mali_osk_free(kernel_control_data);
+		_mali_osk_free(kernel_response_data);
+	} else {
+
+		err = _mali_ukk_profiling_control_set(&kargs);
+		if (err != _MALI_OSK_ERR_OK) {
+			return map_errcode(err);
+		}
+
+	}
+	return 0;
+}
diff --git a/utgard/r7p0/linux/mali_ukk_soft_job.c b/utgard/r7p0/linux/mali_ukk_soft_job.c
new file mode 100644
index 0000000..4197068
--- /dev/null
+++ b/utgard/r7p0/linux/mali_ukk_soft_job.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_soft_job.h"
+#include "mali_timeline.h"
+
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs)
+{
+	_mali_uk_soft_job_start_s kargs;
+	u32 type, point;
+	u64 user_job;
+	struct mali_timeline_fence fence;
+	struct mali_soft_job *job = NULL;
+	u32 __user *job_id_ptr = NULL;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session, -EINVAL);
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+
+	if (copy_from_user(&kargs, uargs, sizeof(kargs)) != 0) {
+		return -EFAULT;
+	}
+
+	type = kargs.type;
+	user_job = kargs.user_job;
+	job_id_ptr = (u32 __user *)(uintptr_t)kargs.job_id_ptr;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &kargs.fence);
+
+	if ((MALI_SOFT_JOB_TYPE_USER_SIGNALED != type) && (type != MALI_SOFT_JOB_TYPE_SELF_SIGNALED)) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid soft job type specified\n"));
+		return -EINVAL;
+	}
+
+	/* Create soft job. */
+	job = mali_soft_job_create(session->soft_job_system, (enum mali_soft_job_type)type, user_job);
+	if (unlikely(job == NULL)) {
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Write job id back to user space. */
+	if (put_user(job->id, job_id_ptr) != 0) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to put job id"));
+		mali_soft_job_destroy(job);
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Start soft job. */
+	point = mali_soft_job_start(job, &fence);
+
+	if (put_user(point, &uargs->point) != 0) {
+		/* Let user space know that something failed after the job was started. */
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs)
+{
+	u32 job_id;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (get_user(job_id, &uargs->job_id) != 0) return -EFAULT;
+
+	err = mali_soft_job_system_signal_job(session->soft_job_system, job_id);
+
+	return map_errcode(err);
+}
diff --git a/utgard/r7p0/linux/mali_ukk_timeline.c b/utgard/r7p0/linux/mali_ukk_timeline.c
new file mode 100644
index 0000000..3680108
--- /dev/null
+++ b/utgard/r7p0/linux/mali_ukk_timeline.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_timeline.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs)
+{
+	u32 val;
+	mali_timeline_id timeline;
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (get_user(val, &uargs->timeline) != 0) return -EFAULT;
+
+	if (val >= MALI_UK_TIMELINE_MAX) {
+		return -EINVAL;
+	}
+
+	timeline = (mali_timeline_id)val;
+
+	point = mali_timeline_system_get_latest_point(session->timeline_system, timeline);
+
+	if (put_user(point, &uargs->point) != 0) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs)
+{
+	u32 timeout, status;
+	mali_bool ret;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t)) != 0) return -EFAULT;
+	if (get_user(timeout, &uargs->timeout) != 0) return -EFAULT;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+	ret = mali_timeline_fence_wait(session->timeline_system, &fence, timeout);
+	status = (MALI_TRUE == ret ? 1 : 0);
+
+	if (put_user(status, &uargs->status) != 0) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs)
+{
+	s32 sync_fd = -1;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t)) != 0) return -EFAULT;
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+#if defined(CONFIG_SYNC)
+	sync_fd = mali_timeline_sync_fence_create(session->timeline_system, &fence);
+#else
+	sync_fd = -1;
+#endif /* defined(CONFIG_SYNC) */
+
+	if (put_user(sync_fd, &uargs->sync_fd) != 0) return -EFAULT;
+
+	return 0;
+}
diff --git a/utgard/r7p0/linux/mali_ukk_vsync.c b/utgard/r7p0/linux/mali_ukk_vsync.c
new file mode 100644
index 0000000..7708a87
--- /dev/null
+++ b/utgard/r7p0/linux/mali_ukk_vsync.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2011-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs)
+{
+	_mali_uk_vsync_event_report_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (copy_from_user(&kargs, uargs, sizeof(_mali_uk_vsync_event_report_s)) != 0) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_vsync_event_report(&kargs);
+	if (err != _MALI_OSK_ERR_OK) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
diff --git a/utgard/r7p0/linux/mali_ukk_wrappers.h b/utgard/r7p0/linux/mali_ukk_wrappers.h
new file mode 100644
index 0000000..f511391
--- /dev/null
+++ b/utgard/r7p0/linux/mali_ukk_wrappers.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk_wrappers.h
+ * Defines the wrapper functions for each user-kernel function
+ */
+
+#ifndef __MALI_UKK_WRAPPERS_H__
+#define __MALI_UKK_WRAPPERS_H__
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs);
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs);
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs);
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs);
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs);
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs);
+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs);
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs);
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs);
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs);
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs);
+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs);
+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs);
+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs);
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs);
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs);
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs);
+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs);
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs);
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs);
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs);
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs);
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs);
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs);
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs);
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs);
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs);
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs);
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs);
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs);
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs);
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs);
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs);
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs);
+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs);
+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs);
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs);
+
+
+int map_errcode(_mali_osk_errcode_t err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_WRAPPERS_H__ */
diff --git a/utgard/r7p0/mali.mod.c b/utgard/r7p0/mali.mod.c
new file mode 100644
index 0000000..d472395
--- /dev/null
+++ b/utgard/r7p0/mali.mod.c
@@ -0,0 +1,31 @@
+#include <linux/module.h>
+#include <linux/vermagic.h>
+#include <linux/compiler.h>
+
+MODULE_INFO(vermagic, VERMAGIC_STRING);
+
+__visible struct module __this_module
+__attribute__((section(".gnu.linkonce.this_module"))) = {
+	.name = KBUILD_MODNAME,
+	.init = init_module,
+#ifdef CONFIG_MODULE_UNLOAD
+	.exit = cleanup_module,
+#endif
+	.arch = MODULE_ARCH_INIT,
+};
+
+static const char __module_depends[]
+__used
+__attribute__((section(".modinfo"))) =
+"depends=";
+
+MODULE_ALIAS("of:N*T*Carm,mali-300");
+MODULE_ALIAS("of:N*T*Carm,mali-300C*");
+MODULE_ALIAS("of:N*T*Carm,mali-400");
+MODULE_ALIAS("of:N*T*Carm,mali-400C*");
+MODULE_ALIAS("of:N*T*Carm,mali-450");
+MODULE_ALIAS("of:N*T*Carm,mali-450C*");
+MODULE_ALIAS("of:N*T*Carm,mali-470");
+MODULE_ALIAS("of:N*T*Carm,mali-470C*");
+
+MODULE_INFO(srcversion, "BD5A179FBA85CF5C3CD357C");
diff --git a/utgard/r7p0/readme.txt b/utgard/r7p0/readme.txt
new file mode 100644
index 0000000..6785ac9
--- /dev/null
+++ b/utgard/r7p0/readme.txt
@@ -0,0 +1,28 @@
+Building the Mali Device Driver for Linux
+-----------------------------------------
+
+Build the Mali Device Driver for Linux by running the following make command:
+
+KDIR=<kdir_path> USING_UMP=<ump_option> BUILD=<build_option> make
+
+where
+    kdir_path: Path to your Linux Kernel directory
+    ump_option: 1 = Enable UMP support(*)
+                0 = disable UMP support
+    build_option: debug = debug build of driver
+                  release = release build of driver
+
+(*)  For newer Linux Kernels, the Module.symvers file for the UMP device driver
+     must be available. The UMP_SYMVERS_FILE variable in the Makefile should
+     point to this file. This file is generated when the UMP driver is built.
+
+The result will be a mali.ko file, which can be loaded into the Linux kernel
+by using the insmod command.
+
+Use of UMP is not recommended. The dma-buf API in the Linux kernel has
+replaced UMP. The Mali Device Driver will be built with dma-buf support if the
+kernel config includes enabled dma-buf.
+
+The kernel needs to be provided with a platform_device struct for the Mali GPU
+device. See the mali_utgard.h header file for how to set up the Mali GPU
+resources.
diff --git a/utgard/r7p0/regs/mali_200_regs.h b/utgard/r7p0/regs/mali_200_regs.h
new file mode 100644
index 0000000..cd2056c
--- /dev/null
+++ b/utgard/r7p0/regs/mali_200_regs.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2010, 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALI200_REGS_H_
+#define _MALI200_REGS_H_
+
+/**
+ *  Enum for management register addresses.
+ */
+enum mali200_mgmt_reg {
+	MALI200_REG_ADDR_MGMT_VERSION                              = 0x1000,
+	MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR               = 0x1004,
+	MALI200_REG_ADDR_MGMT_STATUS                               = 0x1008,
+	MALI200_REG_ADDR_MGMT_CTRL_MGMT                            = 0x100c,
+
+	MALI200_REG_ADDR_MGMT_INT_RAWSTAT                          = 0x1020,
+	MALI200_REG_ADDR_MGMT_INT_CLEAR                            = 0x1024,
+	MALI200_REG_ADDR_MGMT_INT_MASK                             = 0x1028,
+	MALI200_REG_ADDR_MGMT_INT_STATUS                           = 0x102c,
+
+	MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS                     = 0x1050,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE                    = 0x1080,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC                       = 0x1084,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT                     = 0x1088,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE                     = 0x108c,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE                    = 0x10a0,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC                       = 0x10a4,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE                     = 0x10ac,
+
+	MALI200_REG_ADDR_MGMT_PERFMON_CONTR                        = 0x10b0,
+	MALI200_REG_ADDR_MGMT_PERFMON_BASE                         = 0x10b4,
+
+	MALI200_REG_SIZEOF_REGISTER_BANK                           = 0x10f0
+
+};
+
+#define MALI200_REG_VAL_PERF_CNT_ENABLE 1
+
+enum mali200_mgmt_ctrl_mgmt {
+	MALI200_REG_VAL_CTRL_MGMT_STOP_BUS         = (1 << 0),
+	MALI200_REG_VAL_CTRL_MGMT_FLUSH_CACHES     = (1 << 3),
+	MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET      = (1 << 5),
+	MALI200_REG_VAL_CTRL_MGMT_START_RENDERING  = (1 << 6),
+	MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET     = (1 << 7), /* Only valid for Mali-300 and later */
+};
+
+enum mali200_mgmt_irq {
+	MALI200_REG_VAL_IRQ_END_OF_FRAME          = (1 << 0),
+	MALI200_REG_VAL_IRQ_END_OF_TILE           = (1 << 1),
+	MALI200_REG_VAL_IRQ_HANG                  = (1 << 2),
+	MALI200_REG_VAL_IRQ_FORCE_HANG            = (1 << 3),
+	MALI200_REG_VAL_IRQ_BUS_ERROR             = (1 << 4),
+	MALI200_REG_VAL_IRQ_BUS_STOP              = (1 << 5),
+	MALI200_REG_VAL_IRQ_CNT_0_LIMIT           = (1 << 6),
+	MALI200_REG_VAL_IRQ_CNT_1_LIMIT           = (1 << 7),
+	MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR  = (1 << 8),
+	MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND = (1 << 9),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW  = (1 << 10),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW   = (1 << 11),
+	MALI400PP_REG_VAL_IRQ_RESET_COMPLETED       = (1 << 12),
+};
+
+#define MALI200_REG_VAL_IRQ_MASK_ALL  ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_END_OF_TILE                            |\
+				       MALI200_REG_VAL_IRQ_HANG                                   |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_BUS_STOP                               |\
+				       MALI200_REG_VAL_IRQ_CNT_0_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_CNT_1_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW                    |\
+				       MALI400PP_REG_VAL_IRQ_RESET_COMPLETED))
+
+#define MALI200_REG_VAL_IRQ_MASK_USED ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW))
+
+#define MALI200_REG_VAL_IRQ_MASK_NONE ((enum mali200_mgmt_irq)(0))
+
+enum mali200_mgmt_status {
+	MALI200_REG_VAL_STATUS_RENDERING_ACTIVE     = (1 << 0),
+	MALI200_REG_VAL_STATUS_BUS_STOPPED          = (1 << 4),
+};
+
+enum mali200_render_unit {
+	MALI200_REG_ADDR_FRAME = 0x0000,
+	MALI200_REG_ADDR_RSW   = 0x0004,
+	MALI200_REG_ADDR_STACK = 0x0030,
+	MALI200_REG_ADDR_STACK_SIZE = 0x0034,
+	MALI200_REG_ADDR_ORIGIN_OFFSET_X  = 0x0040
+};
+
+enum mali200_wb_unit {
+	MALI200_REG_ADDR_WB0 = 0x0100,
+	MALI200_REG_ADDR_WB1 = 0x0200,
+	MALI200_REG_ADDR_WB2 = 0x0300
+};
+
+enum mali200_wb_unit_regs {
+	MALI200_REG_ADDR_WB_SOURCE_SELECT = 0x0000,
+	MALI200_REG_ADDR_WB_SOURCE_ADDR   = 0x0004,
+};
+
+/* This should be in the top 16 bit of the version register of Mali PP */
+#define MALI200_PP_PRODUCT_ID 0xC807
+#define MALI300_PP_PRODUCT_ID 0xCE07
+#define MALI400_PP_PRODUCT_ID 0xCD07
+#define MALI450_PP_PRODUCT_ID 0xCF07
+#define MALI470_PP_PRODUCT_ID 0xCF08
+
+
+
+#endif /* _MALI200_REGS_H_ */
diff --git a/utgard/r7p0/regs/mali_gp_regs.h b/utgard/r7p0/regs/mali_gp_regs.h
new file mode 100644
index 0000000..6983e4e
--- /dev/null
+++ b/utgard/r7p0/regs/mali_gp_regs.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2010, 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALIGP2_CONROL_REGS_H_
+#define _MALIGP2_CONROL_REGS_H_
+
+/**
+ * These are the different geometry processor control registers.
+ * Their usage is to control and monitor the operation of the
+ * Vertex Shader and the Polygon List Builder in the geometry processor.
+ * Addresses are in 32-bit word relative sizes.
+ * @see [P0081] "Geometry Processor Data Structures" for details
+ */
+
+typedef enum {
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR           = 0x00,
+	MALIGP2_REG_ADDR_MGMT_VSCL_END_ADDR             = 0x04,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_START_ADDR         = 0x08,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_END_ADDR           = 0x0c,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR     = 0x10,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR       = 0x14,
+	MALIGP2_REG_ADDR_MGMT_CMD                       = 0x20,
+	MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT               = 0x24,
+	MALIGP2_REG_ADDR_MGMT_INT_CLEAR                 = 0x28,
+	MALIGP2_REG_ADDR_MGMT_INT_MASK                  = 0x2C,
+	MALIGP2_REG_ADDR_MGMT_INT_STAT                  = 0x30,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE         = 0x3C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE         = 0x40,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC            = 0x44,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC            = 0x48,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE          = 0x4C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE          = 0x50,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT          = 0x54,
+	MALIGP2_REG_ADDR_MGMT_STATUS                    = 0x68,
+	MALIGP2_REG_ADDR_MGMT_VERSION                   = 0x6C,
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR_READ      = 0x80,
+	MALIGP2_REG_ADDR_MGMT_PLBCL_START_ADDR_READ     = 0x84,
+	MALIGP2_CONTR_AXI_BUS_ERROR_STAT                = 0x94,
+	MALIGP2_REGISTER_ADDRESS_SPACE_SIZE             = 0x98,
+} maligp_reg_addr_mgmt_addr;
+
+#define MALIGP2_REG_VAL_PERF_CNT_ENABLE 1
+
+/**
+ * Commands to geometry processor.
+ *  @see MALIGP2_CTRL_REG_CMD
+ */
+typedef enum {
+	MALIGP2_REG_VAL_CMD_START_VS                    = (1 << 0),
+	MALIGP2_REG_VAL_CMD_START_PLBU                  = (1 << 1),
+	MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC   = (1 << 4),
+	MALIGP2_REG_VAL_CMD_RESET                               = (1 << 5),
+	MALIGP2_REG_VAL_CMD_FORCE_HANG                  = (1 << 6),
+	MALIGP2_REG_VAL_CMD_STOP_BUS                    = (1 << 9),
+	MALI400GP_REG_VAL_CMD_SOFT_RESET                = (1 << 10), /* only valid for Mali-300 and later */
+} mgp_contr_reg_val_cmd;
+
+
+/**  @defgroup MALIGP2_IRQ
+ * Interrupt status of geometry processor.
+ *  @see MALIGP2_CTRL_REG_INT_RAWSTAT, MALIGP2_REG_ADDR_MGMT_INT_CLEAR,
+ *       MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_ADDR_MGMT_INT_STAT
+ * @{
+ */
+#define MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      (1 << 0)
+#define MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    (1 << 1)
+#define MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     (1 << 2)
+#define MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          (1 << 3)
+#define MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        (1 << 4)
+#define MALIGP2_REG_VAL_IRQ_HANG                (1 << 5)
+#define MALIGP2_REG_VAL_IRQ_FORCE_HANG          (1 << 6)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    (1 << 7)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    (1 << 8)
+#define MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     (1 << 9)
+#define MALIGP2_REG_VAL_IRQ_SYNC_ERROR          (1 << 10)
+#define MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       (1 << 11)
+#define MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     (1 << 12)
+#define MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      (1 << 13)
+#define MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     (1 << 14)
+#define MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     (1 << 19)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW (1 << 20)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  (1 << 21)
+#define MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  (1 << 22)
+
+/* Mask defining all IRQs in Mali GP */
+#define MALIGP2_REG_VAL_IRQ_MASK_ALL \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        | \
+	 MALIGP2_REG_VAL_IRQ_HANG                | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining the IRQs in Mali GP which we use */
+#define MALIGP2_REG_VAL_IRQ_MASK_USED \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining non IRQs on MaliGP2*/
+#define MALIGP2_REG_VAL_IRQ_MASK_NONE 0
+
+/** }@ defgroup MALIGP2_IRQ*/
+
+/** @defgroup MALIGP2_STATUS
+ * The different Status values to the geometry processor.
+ *  @see MALIGP2_CTRL_REG_STATUS
+ * @{
+ */
+#define MALIGP2_REG_VAL_STATUS_VS_ACTIVE         0x0002
+#define MALIGP2_REG_VAL_STATUS_BUS_STOPPED       0x0004
+#define MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE       0x0008
+#define MALIGP2_REG_VAL_STATUS_BUS_ERROR         0x0040
+#define MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR   0x0100
+/** }@ defgroup MALIGP2_STATUS*/
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ACTIVE (\
+		MALIGP2_REG_VAL_STATUS_VS_ACTIVE|\
+		MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE)
+
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ERROR (\
+		MALIGP2_REG_VAL_STATUS_BUS_ERROR |\
+		MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR)
+
+/* This should be in the top 16 bit of the version register of gp.*/
+#define MALI200_GP_PRODUCT_ID 0xA07
+#define MALI300_GP_PRODUCT_ID 0xC07
+#define MALI400_GP_PRODUCT_ID 0xB07
+#define MALI450_GP_PRODUCT_ID 0xD07
+
+/**
+ * The different sources for instrumented on the geometry processor.
+ *  @see MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC
+ */
+
+enum MALIGP2_cont_reg_perf_cnt_src {
+	MALIGP2_REG_VAL_PERF_CNT1_SRC_NUMBER_OF_VERTICES_PROCESSED = 0x0a,
+};
+
+#endif
diff --git a/utgard/r7p0/timestamp-arm11-cc/mali_timestamp.c b/utgard/r7p0/timestamp-arm11-cc/mali_timestamp.c
new file mode 100644
index 0000000..2e2e2b6
--- /dev/null
+++ b/utgard/r7p0/timestamp-arm11-cc/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/utgard/r7p0/timestamp-arm11-cc/mali_timestamp.h b/utgard/r7p0/timestamp-arm11-cc/mali_timestamp.h
new file mode 100644
index 0000000..c9d705d
--- /dev/null
+++ b/utgard/r7p0/timestamp-arm11-cc/mali_timestamp.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	/*
+	 * reset counters and overflow flags
+	 */
+
+	u32 mask = (1 << 0) | /* enable all three counters */
+		   (0 << 1) | /* reset both Count Registers to 0x0 */
+		   (1 << 2) | /* reset the Cycle Counter Register to 0x0 */
+		   (0 << 3) | /* 1 = Cycle Counter Register counts every 64th processor clock cycle */
+		   (0 << 4) | /* Count Register 0 interrupt enable */
+		   (0 << 5) | /* Count Register 1 interrupt enable */
+		   (0 << 6) | /* Cycle Counter interrupt enable */
+		   (0 << 8) | /* Count Register 0 overflow flag (clear or write, flag on read) */
+		   (0 << 9) | /* Count Register 1 overflow flag (clear or write, flag on read) */
+		   (1 << 10); /* Cycle Counter Register overflow flag (clear or write, flag on read) */
+
+	__asm__ __volatile__("MCR    p15, 0, %0, c15, c12, 0" : : "r"(mask));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	u32 result;
+
+	/* this is for the clock cycles */
+	__asm__ __volatile__("MRC    p15, 0, %0, c15, c12, 1" : "=r"(result));
+
+	return (u64)result;
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/utgard/r7p0/timestamp-default/mali_timestamp.c b/utgard/r7p0/timestamp-default/mali_timestamp.c
new file mode 100644
index 0000000..2e2e2b6
--- /dev/null
+++ b/utgard/r7p0/timestamp-default/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/utgard/r7p0/timestamp-default/mali_timestamp.h b/utgard/r7p0/timestamp-default/mali_timestamp.h
new file mode 100644
index 0000000..b0fcdd4
--- /dev/null
+++ b/utgard/r7p0/timestamp-default/mali_timestamp.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	return _mali_osk_boot_time_get_ns();
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/utgard/r8p0 b/utgard/r8p0
new file mode 120000
index 0000000..bc2c859
--- /dev/null
+++ b/utgard/r8p0
@@ -0,0 +1 @@
+../mali
\ No newline at end of file
